From 3dc8a90982ecf3c5e2e2d9aceb331232ba7e4c4b Mon Sep 17 00:00:00 2001
From: Tao Wei <tweicc@gmail.com>
Date: Mon, 23 Apr 2018 17:24:13 +0800
Subject: [PATCH 0001/1085] change to reference variable

---
 .../examples/android/jni/object_tracking/keypoint_detector.cc   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/android/jni/object_tracking/keypoint_detector.cc b/tensorflow/examples/android/jni/object_tracking/keypoint_detector.cc
index eb431328a7..567f1098b9 100644
--- a/tensorflow/examples/android/jni/object_tracking/keypoint_detector.cc
+++ b/tensorflow/examples/android/jni/object_tracking/keypoint_detector.cc
@@ -311,7 +311,7 @@ int KeypointDetector::AddExtraCandidatesForBoxes(
           return num_keypoints_added;
         }
 
-        Keypoint curr_keypoint = keypoints[num_keypoints_added++];
+        Keypoint &curr_keypoint = keypoints[num_keypoints_added++];
         curr_keypoint.pos_ = Point2f(
             box.left_ + box.GetWidth() * (i + 0.5f) / kNumToAddAsCandidates,
             box.top_ + box.GetHeight() * (j + 0.5f) / kNumToAddAsCandidates);
-- 
GitLab


From 071e6175dcc130b4c623e849a380d6434289eb66 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 24 May 2018 15:47:00 +0200
Subject: [PATCH 0002/1085] Added the -Thost=x64 flag to cmake build
 instructions

---
 tensorflow/contrib/cmake/README.md | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 0b79f718d4..5c203b777c 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -106,17 +106,6 @@ Step-by-step Windows build
 
 1. Install the prerequisites detailed above, and set up your environment.
 
-   * The following commands assume that you are using the Windows Command
-     Prompt (`cmd.exe`). You will need to set up your environment to use the
-     appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets
-     we will build are too large for the 32-bit tools, and they will fail with
-     out-of-memory errors.) The typical command to do set up your
-     environment is:
-
-     ```
-     D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
-     ```
-
    * When building with GPU support after installing the CUDNN zip file from NVidia, append its
      bin directory to your PATH environment variable.
      In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable.
@@ -168,7 +157,7 @@ Step-by-step Windows build
    and must be the last character on each line.
 
    ```
-   D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^
+   D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^
    More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^
    More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^
    More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
@@ -197,6 +186,10 @@ Step-by-step Windows build
    not currently supported, because it relies on a `Debug` library for
    Python (`python35d.lib`) that is not distributed by default.
 
+   The `-Thost=x64` flag will ensure that the 64 bit compiler and linker
+   is used when building. Without this flag, MSBuild will use the 32 bit
+   toolchain which is prone to compile errors such as "compiler out of heap space".
+
    There are various options that can be specified when generating the
    solution and project files:
 
@@ -263,6 +256,11 @@ Step-by-step Windows build
 
 4. Invoke MSBuild to build TensorFlow.
 
+   Set up the path to find MSbuild:
+   ```
+   D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
+   ```
+
    To build the C++ example program, which will be created as a `.exe`
    executable in the subdirectory `.\Release`:
 
-- 
GitLab


From 6890731b2693f6b71dedaca6b2eaf8b488226836 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 24 May 2018 15:47:22 +0200
Subject: [PATCH 0003/1085] increase minimum cmake version required to 3.8

---
 tensorflow/contrib/cmake/CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 0708d6b7b9..225c5e6227 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -1,5 +1,9 @@
 # Minimum CMake required
-cmake_minimum_required(VERSION 3.5)
+if(WIN32)
+  cmake_minimum_required(VERSION 3.8)
+else()
+  cmake_minimum_required(VERSION 3.5)
+endif()
 
 # Project
 project(tensorflow C CXX)
-- 
GitLab


From ff94f56d0fd8b4b3043c79cc7ad1f939b587e44c Mon Sep 17 00:00:00 2001
From: Yuxin Wu <ppwwyyxxc@gmail.com>
Date: Wed, 13 Jun 2018 06:41:22 -0700
Subject: [PATCH 0004/1085] Update non_max_suppression_op.cc

---
 tensorflow/core/kernels/non_max_suppression_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc
index 23fdfe944a..4537ccb14b 100644
--- a/tensorflow/core/kernels/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/non_max_suppression_op.cc
@@ -131,7 +131,7 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& boxes,
     // therefore we iterate through the previously selected boxes backwards
     // in order to see if `next_candidate` should be suppressed.
     bool should_select = true;
-    for (int j = selected.size() - 1; j >= 0; --j) {
+    for (int j = static_cast<int>(selected.size()) - 1; j >= 0; --j) {
       iou = IOU(boxes_data, next_candidate.box_index, selected[j]);
       if (iou == 0.0) continue;
       if (iou > iou_threshold) should_select = false;
-- 
GitLab


From 667b9995267ff3e2592dca26d7a5748437ebb820 Mon Sep 17 00:00:00 2001
From: maxpumperla <max.pumperla@googlemail.com>
Date: Thu, 14 Jun 2018 12:14:29 +0200
Subject: [PATCH 0005/1085] More documentation for keras adagrad and adadelta

---
 tensorflow/python/keras/optimizers.py | 60 +++++++++++++++++----------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index f58aeaea1a..629972fd8e 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -314,17 +314,24 @@ class RMSprop(Optimizer):
 
 @tf_export('keras.optimizers.Adagrad')
 class Adagrad(Optimizer):
-  """Adagrad optimizer.
+    """Adagrad optimizer.
 
-  It is recommended to leave the parameters of this optimizer
-  at their default values.
+    Adagrad is an optimizer with parameter-specific learning rates,
+    which are adapted relative to how frequently a parameter gets
+    updated during training. The more updates a parameter receives,
+    the smaller the updates.
 
-  Arguments:
-      lr: float >= 0. Learning rate.
-      epsilon: float >= 0. If `None`, defaults to `K.epsilon()`.
-      decay: float >= 0. Learning rate decay over each update.
+    It is recommended to leave the parameters of this optimizer
+    at their default values.
 
-  """
+    # Arguments
+        lr: float >= 0. Initial learning rate.
+        epsilon: float >= 0. If `None`, defaults to `K.epsilon()`.
+        decay: float >= 0. Learning rate decay over each update.
+
+    # References
+        - [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+    """
 
   def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs):
     super(Adagrad, self).__init__(**kwargs)
@@ -374,19 +381,30 @@ class Adagrad(Optimizer):
 
 @tf_export('keras.optimizers.Adadelta')
 class Adadelta(Optimizer):
-  """Adadelta optimizer.
-
-  It is recommended to leave the parameters of this optimizer
-  at their default values.
-
-  Arguments:
-      lr: float >= 0. Learning rate.
-          It is recommended to leave it at the default value.
-      rho: float >= 0.
-      epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
-      decay: float >= 0. Learning rate decay over each update.
-
-  """
+    """Adadelta optimizer.
+
+    Adadelta is a more robust extension of Adagrad
+    that adapts learning rates based on a moving window of gradient updates,
+    instead of accumulating all past gradients. This way, Adadelta continues
+    learning even when many updates have been done. Compared to Adagrad, in the
+    original version of Adadelta you don't have to set an initial learning
+    rate. In this version, initial learning rate and decay factor can
+    be set, as in most other Keras optimizers.
+
+    It is recommended to leave the parameters of this optimizer
+    at their default values.
+
+    # Arguments
+        lr: float >= 0. Initial learning rate, defaults to 1.
+            It is recommended to leave it at the default value.
+        rho: float >= 0. Adadelta decay factor, corresponding to fraction of
+            gradient to keep at each time step.
+        epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
+        decay: float >= 0. Initial learning rate decay.
+
+    # References
+        - [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
+    """
 
   def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0., **kwargs):
     super(Adadelta, self).__init__(**kwargs)
-- 
GitLab


From 2e436951bb63a0294848b6f6d3746e449a305ad1 Mon Sep 17 00:00:00 2001
From: Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
Date: Tue, 17 Jul 2018 22:37:19 +0300
Subject: [PATCH 0006/1085] version_info.cc generated only once

version_info.cc in the cmake files is generated every time when we build tensorflow and this forces rebuild of the whole project, since it is in the core library.
added make.bat for windows, which does the same as make.sh to be executed easily from a build machine. the default now is visual studio 17
---
 tensorflow/contrib/cmake/make.bat             | 38 +++++++++++++++++++
 .../contrib/cmake/tf_core_framework.cmake     | 23 +++++++----
 2 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/contrib/cmake/make.bat

diff --git a/tensorflow/contrib/cmake/make.bat b/tensorflow/contrib/cmake/make.bat
new file mode 100644
index 0000000000..d52b24e01d
--- /dev/null
+++ b/tensorflow/contrib/cmake/make.bat
@@ -0,0 +1,38 @@
+%echo off
+
+cd /d %~dp0
+
+if exist _build rd /s /q _build
+
+mkdir _build
+chdir _build
+
+
+rem cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install
+
+CALL :NORMALIZEPATH "..\..\..\.."
+SET SOURCE_DIR=%RETVAL%
+
+echo %SOURCE_DIR%
+
+SET SOURCE_DIR=F:\frameworks\tensorflow\
+
+CALL :NORMALIZEPATH "../../../tools/git/gen_git_source.py"
+SET SOURCE_PYTHON_SCRIPT=%RETVAL%
+
+CALL :NORMALIZEPATH "../../../core/util/version_info.cc"
+SET SOURCE_VERSION_CC=%RETVAL%
+
+python %SOURCE_PYTHON_SCRIPT% --raw_generate %SOURCE_VERSION_CC% --source_dir %SOURCE_DIR% --git_tag_override=
+
+cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install
+
+EXIT /B
+
+:NORMALIZEPATH
+  SET RETVAL=%~dpfn1
+  EXIT /B
+
+
+
+                                                                              
\ No newline at end of file
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index 067c299a71..7e806685b8 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -258,14 +258,21 @@ add_dependencies(tf_core_lib ${tensorflow_EXTERNAL_DEPENDENCIES} tf_protos_cc)
 # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run
 # ${VERSION_INFO_CC} would cache, but it depends on a phony never produced
 # target.
-set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
-add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC})
-add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo)
-add_custom_command(OUTPUT
-    ${VERSION_INFO_CC}
-    COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
-    ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE}
-    DEPENDS __force_rebuild)
+# This code forces rebuild every time, not needed as version from git is fetched only once
+# move to make.bat which mimicks make.sh
+
+if (NOT WIN32)
+
+  set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
+  add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC})
+  add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo)
+  add_custom_command(OUTPUT
+      ${VERSION_INFO_CC}
+      COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py
+      ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE}
+      DEPENDS __force_rebuild)
+endif()
+
 set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc)
 
 ########################################################
-- 
GitLab


From 097d891caff16fd5fe47f4655650e2bbb7aa659e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 30 Jul 2018 20:50:03 +0000
Subject: [PATCH 0007/1085] Enable int64 support for MatMul

This fix tries to address the issue raised in 21241 where
there were no int64 support for MatMul. This fix adds
int64 support for MatMul.

This fix fixes 21241.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/matmul_op.cc | 1 +
 tensorflow/core/ops/math_ops.cc      | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc
index 80376c61aa..4317054f91 100644
--- a/tensorflow/core/kernels/matmul_op.cc
+++ b/tensorflow/core/kernels/matmul_op.cc
@@ -598,6 +598,7 @@ TF_CALL_half(REGISTER_CPU);
 TF_CALL_bfloat16(REGISTER_CPU);
 
 TF_CALL_int32(REGISTER_CPU);
+TF_CALL_int64(REGISTER_CPU);
 TF_CALL_complex64(REGISTER_CPU);
 TF_CALL_complex128(REGISTER_CPU);
 #endif
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 1667c398f4..a0e920fb80 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -743,7 +743,9 @@ REGISTER_OP("MatMul")
     .Output("product: T")
     .Attr("transpose_a: bool = false")
     .Attr("transpose_b: bool = false")
-    .Attr("T: {bfloat16, half, float, double, int32, complex64, complex128}")
+    .Attr(
+        "T: {bfloat16, half, float, double, int32, int64, complex64, "
+        "complex128}")
     .SetShapeFn(shape_inference::MatMulShape);
 
 REGISTER_OP("SparseMatMul")
-- 
GitLab


From 16a50fcbacb8e46f6c4560a6e58ed26f5fd2d133 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 30 Jul 2018 20:53:06 +0000
Subject: [PATCH 0008/1085] Enable int64 test cases for MatMul

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/matmul_op_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index b167278984..36ffdc08ed 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -102,7 +102,7 @@ class MatMulGradientTest(test_lib.TestCase):
 def _GetMatMulGradientTest(a_np_, b_np_, use_static_shape_, **kwargs_):
 
   def Test(self):
-    if not use_static_shape_ or a_np_.dtype in (np.int32, np.float16):
+    if not use_static_shape_ or a_np_.dtype in (np.int32, np.int64, np.float16):
       self.skipTest("Skipping infeasible gradient test.")
 
     # Transpose and possibly conjugate a_np_ and b_np_ according to the
@@ -214,9 +214,9 @@ if __name__ == "__main__":
   sizes = [1, 3, 5]
   trans_options = [[False, False], [True, False], [False, True]]
   for use_static_shape in [False, True]:
-    for dtype in (np.int32, np.float16, np.float32, np.float64, np.complex64,
-                  np.complex128):
-      if not use_static_shape and dtype == np.int32:
+    for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64,
+                  np.complex64, np.complex128):
+      if not use_static_shape and dtype == np.int32 and dtype == np.int64:
         # TODO(rmlarsen): Re-enable this test when we have fixed the underlying
         # bug in Windows (b/35935459).
         continue
-- 
GitLab


From 29eaaf0ffd96002bac06b2e41fbcc246bc5023f6 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 30 Jul 2018 20:54:29 +0000
Subject: [PATCH 0009/1085] Also enables int64 support for BatchMatMul,

as test cases uses BatchMatMul as well

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/batch_matmul_op_real.cc | 1 +
 tensorflow/core/ops/math_ops.cc                 | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index fe259c1634..3f560023a3 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -27,6 +27,7 @@ TF_CALL_double(REGISTER_BATCH_MATMUL_CPU);
 #endif
 TF_CALL_half(REGISTER_BATCH_MATMUL_CPU);
 TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU);
+TF_CALL_int64(REGISTER_BATCH_MATMUL_CPU);
 
 #if GOOGLE_CUDA
 TF_CALL_float(REGISTER_BATCH_MATMUL_GPU);
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index a0e920fb80..5151b75650 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -65,7 +65,9 @@ REGISTER_OP("BatchMatMul")
     .Input("x: T")
     .Input("y: T")
     .Output("output: T")
-    .Attr("T: {bfloat16, half, float, double, int32, complex64, complex128}")
+    .Attr(
+        "T: {bfloat16, half, float, double, int32, int64, complex64, "
+        "complex128}")
     .Attr("adj_x: bool = false")
     .Attr("adj_y: bool = false")
     .SetShapeFn([](InferenceContext* c) {
-- 
GitLab


From 6a0bffab81dc5d39a18fbc52d6c71509e016985d Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 31 Jul 2018 20:14:42 +0000
Subject: [PATCH 0010/1085] Fix bug in test case of matmul

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/matmul_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index 36ffdc08ed..493b3cf9d4 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -216,7 +216,7 @@ if __name__ == "__main__":
   for use_static_shape in [False, True]:
     for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64,
                   np.complex64, np.complex128):
-      if not use_static_shape and dtype == np.int32 and dtype == np.int64:
+      if not use_static_shape and (dtype == np.int32 or dtype == np.int64):
         # TODO(rmlarsen): Re-enable this test when we have fixed the underlying
         # bug in Windows (b/35935459).
         continue
-- 
GitLab


From dc93bc7127548da89f0e6d7a0e22986f6a5780d9 Mon Sep 17 00:00:00 2001
From: silent567 <silent56@sjtu.edu.cn>
Date: Tue, 7 Aug 2018 22:28:23 +0800
Subject: [PATCH 0011/1085] Change the docs of the convolution function

The ranks of input and filter should be N+2 instead of N.
Because the function computes N-D convolution, the input's rank should be N+2 with additional batch and input_channel dimensions, and the filter's rank should be N+2 with additional input_channel and output_channel dimensions.
---
 tensorflow/python/ops/nn_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 5cdb7726a7..f8cf044e9a 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -711,12 +711,12 @@ def convolution(
   It is required that 1 <= N <= 3.
 
   Args:
-    input: An N-D `Tensor` of type `T`, of shape
+    input: An (N+2)-D `Tensor` of type `T`, of shape
       `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
       not start with "NC" (default), or
       `[batch_size, in_channels] + input_spatial_shape` if data_format starts
       with "NC".
-    filter: An N-D `Tensor` with the same type as `input` and shape
+    filter: An (N+2)-D `Tensor` with the same type as `input` and shape
       `spatial_filter_shape + [in_channels, out_channels]`.
     padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm.
     strides: Optional.  Sequence of N ints >= 1.  Specifies the output stride.
-- 
GitLab


From 29f596cf21f0332c1e2ece8798fdd9fefd2ba947 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Jun 2018 14:04:59 +0000
Subject: [PATCH 0012/1085] Improve the shape function of Bincount

There was not a lot of restriction in shape function
of Bincount and the output shape was unknown.
It is actually possible to get a better shape output
if `size` input is known.
This fix adds enhancement to the shape function of
Bincount.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 1667c398f4..7d0f29368b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1416,6 +1416,10 @@ REGISTER_OP("Bincount")
     .Attr("T: {int32, int64, float32, float64}")
     .Output("bins: T")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // The input `size` must be a scalar.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+
       c->set_output(0, c->UnknownShapeOfRank(1));
       return Status::OK();
     });
-- 
GitLab


From 740c58b6fa5b6e1c85f688fbda322da0231aa169 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 4 Jun 2018 14:44:44 +0000
Subject: [PATCH 0013/1085] Return `[size]` shape if size is known for
 Bincount.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 7d0f29368b..b57385f63b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1420,7 +1420,19 @@ REGISTER_OP("Bincount")
       // The input `size` must be a scalar.
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
 
-      c->set_output(0, c->UnknownShapeOfRank(1));
+      const Tensor* size_tensor = c->input_tensor(1);
+      if (size_tensor == nullptr) {
+        // Return unknown shape if size is not known.
+        c->set_output(0, c->UnknownShapeOfRank(1));
+        return Status::OK();
+      }
+
+      // Return `[size]` shape if size is known.
+      int32 size_val = size_tensor->scalar<int32>()();
+      if (size_val < 0) {
+        return errors::InvalidArgument("size (", size_val, ") must be non-negative");
+      }
+      c->set_output(0, c->MakeShape({size_val}));
       return Status::OK();
     });
 
-- 
GitLab


From e6981fc2225a529427391e98f492eee7bb865988 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 11 Aug 2018 18:39:13 +0000
Subject: [PATCH 0014/1085] Add additional test cases for Bincount Shape
 function, and fix clang-format issue

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/ops/math_ops.cc               |  3 ++-
 tensorflow/core/ops/math_ops_test.cc          | 12 ++++++++++++
 .../python/kernel_tests/bincount_op_test.py   | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index b57385f63b..0ba4a9a005 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1430,7 +1430,8 @@ REGISTER_OP("Bincount")
       // Return `[size]` shape if size is known.
       int32 size_val = size_tensor->scalar<int32>()();
       if (size_val < 0) {
-        return errors::InvalidArgument("size (", size_val, ") must be non-negative");
+        return errors::InvalidArgument("size (", size_val,
+                                       ") must be non-negative");
       }
       c->set_output(0, c->MakeShape({size_val}));
       return Status::OK();
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index 23f1538912..7bf7c476f4 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -558,4 +558,16 @@ TEST(MathOpsTest, QuantizedAdd_ShapeFn) {
   INFER_ERROR("must be rank 0", op, "?;?;?;?;[3];?");
   INFER_ERROR("must be rank 0", op, "?;?;?;?;?;[4]");
 }
+
+TEST(MathOpsTest, Bincount_ShapeFn) {
+  ShapeInferenceTestOp op("Bincount");
+
+  // size should be scalar.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;[1];?");
+
+  INFER_OK(op, "?;?;?", "[?]");
+  INFER_OK(op, "?;[];?", "[?]");
+  INFER_OK(op, "[?];[];?", "[?]");
+  INFER_OK(op, "[?];[];[?]", "[?]");
+}
 }  // end namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 2767df127e..15d9de56db 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -22,6 +22,8 @@ import numpy as np
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
@@ -97,6 +99,23 @@ class BincountTest(test_util.TensorFlowTestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
 
+  def test_shape_function(self):
+    # size must be scalar.
+    with self.assertRaisesRegexp(
+        ValueError, "Shape must be rank 0 but is rank 1 for 'Bincount'"):
+      gen_math_ops.bincount([1, 2, 3, -1, 6, 8], [1], [])
+    # size must be positive.
+    with self.assertRaisesRegexp(
+        ValueError, "must be non-negative"):
+      gen_math_ops.bincount([1, 2, 3, -1, 6, 8], -5, [])
+    # if size is a constant then the shape is known.
+    v1 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], 5, [])
+    self.assertAllEqual(v1.get_shape().as_list(), [5])
+    # if size is a placeholder then the shape is unknown.
+    s = array_ops.placeholder(dtype=dtypes.int32)
+    v2 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], s, [])
+    self.assertAllEqual(v2.get_shape().as_list(), [None])
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From cea293f00cd8710e439937b048745c14ac52b3ff Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 15 May 2018 22:40:27 +0000
Subject: [PATCH 0015/1085] Fix tf.matching_files issue with access denied
 subdirectory.

This fix tries to address the issue raised in 19274 where
tf.matching_files will return an error if no permission for
subdirectories (Even if the subdirectories does not match).

This fix addresses the issue by ignore the case for `permisson denied`
during the directory traversal.

This fix fixes 19274.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/platform/file_system_helper.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/file_system_helper.cc b/tensorflow/core/platform/file_system_helper.cc
index 0ba0e6304f..1e72e9e41e 100644
--- a/tensorflow/core/platform/file_system_helper.cc
+++ b/tensorflow/core/platform/file_system_helper.cc
@@ -82,7 +82,10 @@ Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern,
     dir_q.pop_front();
     std::vector<string> children;
     Status s = fs->GetChildren(current_dir, &children);
-    ret.Update(s);
+    // We will ignore permission denied error, and update status otherwise.
+    if (s.code() != tensorflow::error::PERMISSION_DENIED) {
+      ret.Update(s);
+    }
     if (children.empty()) continue;
     // This IsDirectory call can be expensive for some FS. Parallelizing it.
     children_dir_status.resize(children.size());
-- 
GitLab


From be275e00c9134ef3e7cce1b0806f62aec28b6945 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 11 Aug 2018 21:53:02 +0000
Subject: [PATCH 0016/1085] Update matching_files and bail in case
 PERMISSION_DENIED is encountered,

update based on review comment.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/platform/file_system_helper.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/platform/file_system_helper.cc b/tensorflow/core/platform/file_system_helper.cc
index 1e72e9e41e..a26a224e14 100644
--- a/tensorflow/core/platform/file_system_helper.cc
+++ b/tensorflow/core/platform/file_system_helper.cc
@@ -82,10 +82,11 @@ Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern,
     dir_q.pop_front();
     std::vector<string> children;
     Status s = fs->GetChildren(current_dir, &children);
-    // We will ignore permission denied error, and update status otherwise.
-    if (s.code() != tensorflow::error::PERMISSION_DENIED) {
-      ret.Update(s);
+    // In case PERMISSION_DENIED is encountered, we bail here.
+    if (s.code() == tensorflow::error::PERMISSION_DENIED) {
+      continue;
     }
+    ret.Update(s);
     if (children.empty()) continue;
     // This IsDirectory call can be expensive for some FS. Parallelizing it.
     children_dir_status.resize(children.size());
-- 
GitLab


From c1731614e10e5b2a8e77f1ee4565b3185541483c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 11 Aug 2018 21:53:56 +0000
Subject: [PATCH 0017/1085] Add test case for matching_files when
 PERMISSION_DENIED is encountered in subdir

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/lib/io/file_io_test.py | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index c21eb93103..1d247aa8ba 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -23,7 +23,9 @@ import os.path
 
 from tensorflow.python.framework import errors
 from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.platform import test
+from tensorflow.python.util import compat
 
 
 class FileIoTest(test.TestCase):
@@ -582,5 +584,31 @@ class FileIoTest(test.TestCase):
     self.assertTrue(crc1 != crc2)
     self.assertEqual(crc2, crc3)
 
+  def testMatchingFilesPermission(self):
+    # Test case for GitHub issue 19274.
+    # Create top level directory test_dir.
+    dir_path = os.path.join(self._base_dir, "test_dir")
+    file_io.create_dir(dir_path)
+    # Create second level directories `noread` and `any`.
+    noread_path = os.path.join(dir_path, "noread")
+    file_io.create_dir(noread_path)
+    any_path = os.path.join(dir_path, "any")
+    file_io.create_dir(any_path)
+    files = ["file1.txt", "file2.txt", "file3.txt"]
+    for name in files:
+      file_path = os.path.join(any_path, name)
+      file_io.FileIO(file_path, mode="w").write("testing")
+    file_path = os.path.join(noread_path, "file4.txt")
+    file_io.FileIO(file_path, mode="w").write("testing")
+    # Change noread to noread access.
+    os.chmod(noread_path, 0)
+    expected_match = [compat.as_bytes(dir_path)]
+    with self.test_session() as sess:
+      self.assertItemsEqual(
+          gen_io_ops.matching_files(dir_path).eval(), expected_match)
+    # Change noread back so that it could be cleaned during tearDown.
+    os.chmod(noread_path, 0o777)
+
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 537bd0d8237d77c789c1b7633d8ba4b68007f52e Mon Sep 17 00:00:00 2001
From: Andy Craze <accraze@gmail.com>
Date: Sun, 12 Aug 2018 14:40:04 -0700
Subject: [PATCH 0018/1085] Update Nesterov implementation docs

 Clarification that this is a modified version of the algorithm which is only correct under certain conditions

Fixes #19899
---
 tensorflow/python/training/momentum.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py
index cb3ec6f053..34c74cda4e 100644
--- a/tensorflow/python/training/momentum.py
+++ b/tensorflow/python/training/momentum.py
@@ -59,6 +59,10 @@ class MomentumOptimizer(optimizer.Optimizer):
         This implementation always computes gradients at the value of the
         variable(s) passed to the optimizer. Using Nesterov Momentum makes the
         variable(s) track the values called `theta_t + mu*v_t` in the paper.
+        This implementation is an approximation of the original formula, valid 
+        for high values of momentum. It will compute the "adjusted gradient" in NAG 
+        by assuming that the new gradient will be estimated by the current 
+        average gradient plus the product of momentum and the change in the average gradient.
 
     @compatibility(eager)
     When eager execution is enabled, `learning_rate` and `momentum` can each be
-- 
GitLab


From 9c21d22957a2d1e1f52464b923bf1bf5cfc29d5e Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Mon, 13 Aug 2018 18:02:23 -0700
Subject: [PATCH 0019/1085] Ran clang_format tool

---
 tensorflow/core/graph/mkl_layout_pass.cc      | 236 +++++++++++++++++-
 .../core/graph/mkl_tfconversion_pass.cc       |  18 +-
 tensorflow/core/kernels/mkl_tfconv_op.h       |   1 +
 3 files changed, 241 insertions(+), 14 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index c22e0a3872..c0ff6e10c1 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -2432,8 +2432,37 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
     csinfo_.mkl_conv2d_grad_filter_with_bias =
         "_MklConv2DBackpropFilterWithBias";
+// Temporarily don't convert quantized operators into MKL versions for now.
+// TODO(Intel-tf) Once all the relevant PRs have been merged then remove
+// the ifdef.
+#ifdef INTEL_MKL_QUANTIZED
+    csinfo_.quantized_avg_pool = "QuantizedAvgPool";
+    csinfo_.quantized_concatv2 = "QuantizedConcatV2";
+    csinfo_.quantized_conv2d = "QuantizedConv2D";
+    csinfo_.quantized_conv2d_with_requantize = "QuantizedConv2DAndRequantize";
+    csinfo_.quantized_conv2d_with_bias = "QuantizedConv2DWithBias";
+    csinfo_.quantized_conv2d_with_bias_and_requantize =
+        "QuantizedConv2DWithBiasAndRequantize";
+    csinfo_.quantized_conv2d_and_relu = "QuantizedConv2DAndRelu";
+    csinfo_.quantized_conv2d_and_relu_and_requantize =
+        "QuantizedConv2DAndReluAndRequantize";
+    csinfo_.quantized_conv2d_with_bias_and_relu =
+        "QuantizedConv2DWithBiasAndRelu";
+    csinfo_.quantized_conv2d_with_bias_and_relu_and_requantize =
+        "QuantizedConv2DWithBiasAndReluAndRequantize";
+    csinfo_.quantized_max_pool = "QuantizedMaxPool";
+    csinfo_.quantized_conv2d_with_bias_sum_and_relu =
+        "QuantizedConv2DWithBiasSumAndRelu";
+    csinfo_.quantized_conv2d_with_bias_sum_and_relu_and_requantize =
+        "QuantizedConv2DWithBiasSumAndReluAndRequantize";
+    csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize =
+        "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize";
+#endif
     csinfo_.relu = "Relu";
     csinfo_.relu_grad = "ReluGrad";
+#ifdef INTEL_MKL_QUANTIZED
+    csinfo_.requantize = "Requantize";
+#endif
     csinfo_.tanh = "Tanh";
     csinfo_.tanh_grad = "TanhGrad";
     csinfo_.reshape = "Reshape";
@@ -2508,11 +2537,73 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     rinfo_.push_back({csinfo_.mul,
                       mkl_op_registry::GetMklOpName(csinfo_.mul),
                       CopyAttrsDataType, AlwaysRewrite});
+#ifdef INTEL_MKL_QUANTIZED
+    rinfo_.push_back({csinfo_.quantized_avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.quantized_avg_pool),
+                      CopyAttrsQuantizedPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.quantized_concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_with_requantize,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_with_requantize),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_with_bias,
+         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_with_bias),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_and_requantize,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_with_bias_and_requantize),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_and_relu,
+         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_and_relu),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_and_relu_and_requantize,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_and_relu_and_requantize),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_and_relu,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_with_bias_and_relu),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_with_bias_and_relu_and_requantize,
+         mkl_op_registry::GetMklOpName(
+             csinfo_.quantized_conv2d_with_bias_and_relu_and_requantize),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.quantized_max_pool),
+                      CopyAttrsQuantizedPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_sum_and_relu,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_with_bias_sum_and_relu),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_with_bias_sum_and_relu_and_requantize,
+         mkl_op_registry::GetMklOpName(
+             csinfo_.quantized_conv2d_with_bias_sum_and_relu_and_requantize),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize,
+         mkl_op_registry::GetMklOpName(
+             csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+#endif
     rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
                       CopyAttrsDataType, AlwaysRewrite});
     rinfo_.push_back({csinfo_.relu_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
                       CopyAttrsDataType, AlwaysRewrite});
+#ifdef INTEL_MKL_QUANTIZED
+    rinfo_.push_back({csinfo_.requantize,
+                      mkl_op_registry::GetMklOpName(csinfo_.requantize),
+                      CopyAttrsRequantize, AlwaysRewrite});
+#endif
     /*
     rinfo_.push_back({csinfo_.tanh,
                       mkl_op_registry::GetMklOpName(csinfo_.tanh),
@@ -2629,8 +2720,23 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string mkl_conv2d_grad_filter_with_bias;
     string mkl_conv2d_with_bias;
     string mul;
+    string quantized_avg_pool;
+    string quantized_conv2d;
+    string quantized_conv2d_with_requantize;
+    string quantized_conv2d_with_bias;
+    string quantized_conv2d_with_bias_and_requantize;
+    string quantized_conv2d_and_relu;
+    string quantized_conv2d_and_relu_and_requantize;
+    string quantized_conv2d_with_bias_and_relu;
+    string quantized_conv2d_with_bias_and_relu_and_requantize;
+    string quantized_concatv2;
+    string quantized_max_pool;
+    string quantized_conv2d_with_bias_sum_and_relu;
+    string quantized_conv2d_with_bias_sum_and_relu_and_requantize;
+    string quant_conv2d_with_bias_signed_sum_and_relu_and_requantize;
     string relu;
     string relu_grad;
+    string requantize;
     string tanh;
     string tanh_grad;
     string reshape;
@@ -2833,6 +2939,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   //
   // @return RewriteInfo* for the applicable rewrite rule
   const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
+  const RewriteInfo* CheckForQuantizedNodeRewrite(const Node* n) const;
 
   // Default rewrite rule to be used in scenario 1 for rewrite.
   // @return - true (since we want to always rewrite)
@@ -3091,7 +3198,11 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsQuantizedConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsQuantizedConcat(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsRequantize(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
 
   // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
@@ -3411,8 +3522,27 @@ Status MklLayoutRewritePass::SetUpInputs(
   // We add workspace edge only for MaxPool, LRN and BatchNorm.
   std::vector<NodeBuilder::NodeOut> workspace_tensors;
   bool are_workspace_tensors_available = false;
-  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
-                           &are_workspace_tensors_available);
+
+  // Avoid workspace check for QuantizedConv2D and the fused
+  // Ops as they don't have attribute: "T".
+  std::vector<string> quant_ops {
+      "QuantizedConv2D",
+      "QuantizedConv2DWithBias",
+      "QuantizedConv2DAndRelu",
+      "QuantizedConv2DWithBiasAndRelu",
+      "QuantizedConv2DWithBiasSumAndRelu",
+      "QuantizedConv2DAndRequantize",
+      "QuantizedConv2DWithBiasAndRequantize",
+      "QuantizedConv2DAndReluAndRequantize",
+      "QuantizedConv2DWithBiasAndReluAndRequantize",
+      "QuantizedConv2DWithBiasSumAndReluAndRequantize",
+      "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"};
+  bool should_check_workspace =
+      std::find(std::begin(quant_ops), std::end(quant_ops),
+                old_node->type_string()) == std::end(quant_ops);
+  if (should_check_workspace)
+    AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
+                             &are_workspace_tensors_available);
 
   int new_node_input_slots = 0;
   if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
@@ -3685,6 +3815,69 @@ void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
   nb->Attr("T", T);
 }
 
+void MklLayoutRewritePass::CopyAttrsQuantizedPooling(const Node* orig_node,
+                                                     NodeBuilder* nb) {
+  DataType T;
+  string data_format;
+  string padding;
+  std::vector<int32> ksize, strides;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("ksize", ksize);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+}
+
+void MklLayoutRewritePass::CopyAttrsQuantizedConv2D(const Node* orig_node,
+                                                    NodeBuilder* nb) {
+  DataType Tinput, Tfilter, out_type;
+  string padding;
+  string data_format("NHWC");
+  std::vector<int32> strides, dilations;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tinput", &Tinput));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tfilter", &Tfilter));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "out_type", &out_type));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations));
+
+  // Add attributes to new node.
+  nb->Attr("Tinput", Tinput);
+  nb->Attr("Tfilter", Tfilter);
+  nb->Attr("out_type", out_type);
+  nb->Attr("padding", padding);
+  nb->Attr("strides", strides);
+  nb->Attr("dilations", dilations);
+  nb->Attr("T", out_type);  // added "T" for facilitating MklToTf conversion.
+  nb->Attr("data_format", data_format);
+  // Requantization attr Tbias
+  DataType Tbias;
+  Status bias_status = GetNodeAttr(orig_node->def(), "Tbias", &Tbias);
+  if (bias_status.ToString() == "OK") nb->Attr("Tbias", Tbias);
+}
+
+void MklLayoutRewritePass::CopyAttrsRequantize(const Node* orig_node,
+                                               NodeBuilder* nb) {
+  DataType Tinput, out_type;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tinput", &Tinput));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "out_type", &out_type));
+
+  // Add attributes to new node.
+  nb->Attr("Tinput", Tinput);
+  nb->Attr("out_type", out_type);
+}
+
 void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
                                             NodeBuilder* nb) {
   DataType T;
@@ -4145,9 +4338,16 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
   }
 
   ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
-  // Set the Mkl layer label for this op.
-  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
 
+  // Set the Mkl layer label for this op.
+  if (DataTypeIsQuantized(orig_node->input_type(0)) ||
+      DataTypeIsQuantized(orig_node->output_type(0))) {
+#ifdef INTEL_MKL_QUANTIZED
+    nb.Attr("_kernel", mkl_op_registry::kMklQuantizedOpLabel);
+#endif
+  } else {
+    nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
+  }
   // Finalize graph and get new node.
   Node* new_node = nullptr;
   TF_CHECK_OK(nb.Finalize(&**g, &new_node));
@@ -4193,10 +4393,38 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
   return Status::OK();
 }
 
+// TODO(mdfaijul): Is there any other elegent way to check for quantized ops
+// having attributes other than "T"?
+// Current implementation reflects only QuantizedConv2D and its fused Ops.
+const MklLayoutRewritePass::RewriteInfo*
+MklLayoutRewritePass::CheckForQuantizedNodeRewrite(const Node* n) const {
+#ifdef INTEL_MKL_QUANTIZED
+  DataType Tinput, Tfilter;
+  if (!(GetNodeAttr(n->def(), "Tinput", &Tinput).ok() &&
+        GetNodeAttr(n->def(), "Tfilter", &Tfilter).ok())) {
+    return nullptr;
+  }
+  if (mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(n->type_string()),
+                               Tinput, Tfilter)) {
+    for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
+      if (n->type_string().compare(ri->name) == 0 && ri->rewrite_rule(n)) {
+        return &*ri;
+      }
+    }
+  }
+#endif
+  return nullptr;
+}
+
 const MklLayoutRewritePass::RewriteInfo*
 MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
   CHECK_NOTNULL(n);
 
+  // QuntizedOps may have attributes other than "T", so decoupled the check
+  // with a function, CheckForQuantizedNodeRewrite(const Node*).
+  const RewriteInfo* ri = CheckForQuantizedNodeRewrite(n);
+  if (ri != nullptr) return ri;
+
   // First check if node along with its type is supported by MKL layer.
   // We do not want to rewrite an op into Mkl op if types are not supported.
   // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index aa39af637f..1530593fc9 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -146,22 +146,20 @@ Status MklToTfConversionPass::InsertConversionNodeOnEdge(
   CHECK_NOTNULL(dst);
 
   Node* conversion_node = nullptr;
-  DataType src_datatype = DT_INVALID;
-  DataType dst_datatype = DT_INVALID;
+  DataType src_datatype = src->output_type(e->src_output());
+  DataType dst_datatype = dst->input_type(e->dst_input());
   string data_format;
 
-  TF_CHECK_OK(GetNodeAttr(src->def(), "T", &src_datatype));
-  bool dst_dtype_found =
-      GetNodeAttr(dst->def(), "T", &dst_datatype) == Status::OK();
   // We compare source and destination datatypes only when both are found.
-  if (dst_dtype_found && (src_datatype != dst_datatype)) {
-    string err_msg = "T attribute of " + src->name() + " and " + dst->name() +
-                     " do not match. Will not insert" +
-                     " MklToTf node in such case.";
+  if (src_datatype != dst_datatype) {
+    string err_msg = "T attribute of " + src->name() + ":" +
+                     std::to_string(e->src_output()) + " and " + dst->name() +
+                     ":" + std::to_string(e->dst_input()) +
+                     " do not"
+                     " match. Will not insert MklToTf node in such case.";
     return Status(error::Code::INVALID_ARGUMENT, err_msg.c_str());
   }
 
-  // Build the conversion node and specify src as input.
   TF_CHECK_OK(
       NodeBuilder((*g)->NewName("Mkl2Tf"), "_MklToTf")
           .Input(src, e->src_output())
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index f4f0035f26..1e156fa531 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -201,6 +201,7 @@ class MklToTfOp : public OpKernel {
                           MklToTfOp<CPUDevice, T>);
 
 TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+TF_CALL_QUANTIZED_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
 #endif  // INTEL_MKL
-- 
GitLab


From 9d45b84c12c8c9fb7a928adb9affaa91b35c7e2b Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Mon, 13 Aug 2018 18:04:38 -0700
Subject: [PATCH 0020/1085] Ran clang-format tool

---
 tensorflow/tools/graph_transforms/BUILD       |   1 +
 .../fuse_quantized_convolution.cc             | 220 +++++++++++
 .../tools/quantization/quantize_graph.py      | 349 +++++++++++++++++-
 3 files changed, 557 insertions(+), 13 deletions(-)
 create mode 100644 tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc

diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 1ad1895269..eb1ed1f2ca 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -97,6 +97,7 @@ cc_library(
         "fold_old_batch_norms.cc",
         "freeze_requantization_ranges.cc",
         "fuse_convolutions.cc",
+        "fuse_quantized_convolution.cc",
         "insert_logging.cc",
         "obfuscate_names.cc",
         "quantize_nodes.cc",
diff --git a/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc
new file mode 100644
index 0000000000..2128bcd978
--- /dev/null
+++ b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc
@@ -0,0 +1,220 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifdef INTEL_MKL
+#include <algorithm>
+
+#include "tensorflow/core/common_runtime/constant_folding.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/graph/subgraph.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/tools/graph_transforms/fold_constants_lib.h"
+#include "tensorflow/tools/graph_transforms/transform_utils.h"
+
+namespace tensorflow {
+namespace graph_transforms {
+
+Status FuseQuantizedConvolutionAndRequantize(
+    const GraphDef& input_graph_def, const TransformFuncContext& context,
+    GraphDef* output_graph_def) {
+  std::map<string, const NodeDef*> node_map;
+  MapNamesToNodes(input_graph_def, &node_map);
+  GraphDef replaced_graph_def;
+  TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
+      input_graph_def,  // clang-format off
+
+      {"Requantize",
+        {
+          {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|"
+            "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"},
+          {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|"
+           "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"},
+          {"QuantizedConv2D|QuantizedConv2DWithBias|QuantizedConv2DWithRelu|"
+           "QuantizedConv2DWithBiasAndRelu|QuantizedConv2DWithBiasSumAndRelu"},
+          {"Const"},
+          {"Const"}
+        }
+      },  // clang-format on */
+      [&node_map](const NodeMatch& match, const std::set<string>& input_nodes,
+         const std::set<string>& output_nodes,
+         std::vector<NodeDef>* new_nodes) {
+        // TODO(mdfaijul/sheng): Current implementation assumed all
+        // requantization cases have bias. Index of inputs need to be updated
+        // for non-bias cases.
+
+        // Find all the nodes we expect in the subgraph.
+        const NodeDef& requantize_node = match.node;
+        CHECK_EQ("Requantize", requantize_node.op());
+        const NodeDef& quantized_conv2D_node = match.inputs[0].node;
+        const NodeDef& const_requantize_range_min_node = match.inputs[3].node;
+        CHECK_EQ("Const", const_requantize_range_min_node.op());
+        const NodeDef& const_requantize_range_max_node = match.inputs[4].node;
+        CHECK_EQ("Const", const_requantize_range_max_node.op());
+
+        string quantized_conv2D_op_name = quantized_conv2D_node.op();
+        // Set up the new fused version of the convolution op.
+        NodeDef fused_conv;
+        fused_conv.set_op(quantized_conv2D_op_name + "AndRequantize");
+        fused_conv.set_name(match.node.name());
+        int n_input = quantized_conv2D_node.input_size();
+        if (quantized_conv2D_op_name.compare(
+                "QuantizedConv2DWithBiasSumAndRelu") == 0)
+          n_input -= 1;  // -1 since summand is moved after frozen min-max
+
+        for (int i=0; i < n_input; i++)
+          AddNodeInput(quantized_conv2D_node.input(i), &fused_conv);
+
+        AddNodeInput(const_requantize_range_min_node.name(), &fused_conv);
+        AddNodeInput(const_requantize_range_max_node.name(), &fused_conv);
+
+        // Add additional inputs to
+        // QuantizedConv2DWithBiasSumAndReluAndRequantize
+        if (quantized_conv2D_op_name.compare(
+              "QuantizedConv2DWithBiasSumAndRelu") == 0) {
+          const NodeDef *in_requantize = node_map[node_map[
+              quantized_conv2D_node.input(n_input)]->input(0)];
+          string summand(in_requantize->name());
+          string min_summand(in_requantize->name() + ":1");
+          string max_summand(in_requantize->name() + ":2");
+          AddNodeInput(summand, &fused_conv);
+          AddNodeInput(min_summand, &fused_conv);
+          AddNodeInput(max_summand, &fused_conv);
+
+          // Signed version QuantizedConv2DWithBiasSumAndReluAndRequantize
+          // if Relu does not follow the convolution operation
+          std::vector<string> signed_ops = {
+              "QuantizedConv2DWithBias",
+              "QuantizedConv2D"
+              };
+          bool is_signed_summand =
+              std::find(signed_ops.begin(), signed_ops.end(),
+              node_map[in_requantize->input(0)]->op()) != signed_ops.end();
+          if (is_signed_summand) {
+            fused_conv.set_op(
+                "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize");
+            SetNodeAttr("Tsummand", DT_QINT8, &fused_conv);
+          } else {
+            SetNodeAttr("Tsummand", DT_QUINT8, &fused_conv);
+          }
+        }
+        CopyNodeAttr(quantized_conv2D_node, "Tinput", "Tinput", &fused_conv);
+        CopyNodeAttr(quantized_conv2D_node, "Tfilter", "Tfilter", &fused_conv);
+        CopyNodeAttr(quantized_conv2D_node, "strides", "strides", &fused_conv);
+        CopyNodeAttr(quantized_conv2D_node, "padding", "padding", &fused_conv);
+
+        // Copy dilation attribute if exsit in the orginal node
+        if (HasNodeAttr(quantized_conv2D_node, "dilations"))
+          CopyNodeAttr(quantized_conv2D_node, "dilations",
+                       "dilations", &fused_conv);
+        if (quantized_conv2D_op_name.compare("QuantizedConv2D") == 0 ||
+           quantized_conv2D_op_name.compare("QuantizedConv2DWithBias") == 0)
+          SetNodeAttr("out_type", DT_QINT8, &fused_conv);
+        else
+          SetNodeAttr("out_type", DT_QUINT8, &fused_conv);
+        new_nodes->push_back(fused_conv);
+        new_nodes->push_back(const_requantize_range_min_node);
+        new_nodes->push_back(const_requantize_range_max_node);
+
+        return Status::OK();
+      },
+      {}, &replaced_graph_def));
+
+  // Convert bias float -> int32 on replaced_graph_def
+  std::vector<string> fused_requantized_bias_ops = {
+      "QuantizedConv2DWithBiasAndRequantize",
+      "QuantizedConv2DWithBiasAndReluAndRequantize",
+      "QuantizedConv2DWithBiasSumAndReluAndRequantize",
+      "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"
+      };
+  node_map.clear();
+  MapNamesToNodes(replaced_graph_def, &node_map);
+  for (auto& node_pair : node_map) {
+    const NodeDef *node = node_pair.second;
+    bool is_fused_requantized_conv_op =
+        std::find(fused_requantized_bias_ops.begin(),
+                  fused_requantized_bias_ops.end(),
+                  node->op()) != fused_requantized_bias_ops.end();
+    if (is_fused_requantized_conv_op) {
+      // If the op is not fed by Another Requantize op,
+      // then we coonvert bias as Int32
+      string input_op = node_map[NodeNameFromInput(node->input(0))]->op();
+      if (str_util::StartsWith(input_op, "QuantizedConv2D") &&
+          str_util::EndsWith(input_op, "AndRequantize")) {
+        NodeDef *bias_node = const_cast<NodeDef*>(node_map[NodeNameFromInput(
+            node->input(2))]);
+        const NodeDef *min_input_node = node_map[NodeNameFromInput(
+            node_map[node->input(0)]->input(7))];
+        const NodeDef *max_input_node = node_map[NodeNameFromInput(
+            node_map[node->input(0)]->input(8))];
+        const NodeDef *min_filter_node = node_map[NodeNameFromInput(
+            node->input(5))];
+        const NodeDef *max_filter_node = node_map[NodeNameFromInput(
+            node->input(6))];
+        const float min_input =
+            GetNodeTensorAttr(*min_input_node, "value").flat<float>()(0);
+        const float max_input =
+            GetNodeTensorAttr(*max_input_node, "value").flat<float>()(0);
+        const float min_filter =
+            GetNodeTensorAttr(*min_filter_node, "value").flat<float>()(0);
+        const float max_filter =
+            GetNodeTensorAttr(*max_filter_node, "value").flat<float>()(0);
+
+        TensorProto float_tensor_proto = bias_node->attr().at("value").tensor();
+        Tensor float_tensor;
+        CHECK(float_tensor.FromProto(float_tensor_proto));
+        CHECK_EQ(float_tensor.dtype(), DT_FLOAT);
+        float *p_bias_float = float_tensor.flat<float>().data();
+
+        Tensor int32_tensor = Tensor(DT_QINT32, float_tensor.shape());
+        qint32 *p_bias_int32 = int32_tensor.flat<qint32>().data();
+
+        float bias_scale = 255.0 * 127.0 /
+            (std::max(std::abs(max_input), std::abs(min_input)) *
+            std::max(std::abs(max_filter), std::abs(min_filter)));
+        int64 nelems = float_tensor.NumElements();
+        for (int64 n = 0; n < nelems; n++)
+          p_bias_int32[n] = (int32_t) (p_bias_float[n] * bias_scale);
+
+        bias_node->clear_attr();
+        AttrValue attr_type;
+        attr_type.set_type(int32_tensor.dtype());
+        bias_node->mutable_attr()->insert({"dtype", attr_type});
+
+        AttrValue attr_tensor;
+        TensorProto* t = attr_tensor.mutable_tensor();
+        int32_tensor.AsProtoTensorContent(t);
+        bias_node->mutable_attr()->insert({"value", attr_tensor});
+        SetNodeAttr("Tbias", DT_QINT32, const_cast<NodeDef*>(node));
+      } else {
+        SetNodeAttr("Tbias", DT_FLOAT, const_cast<NodeDef*>(node));
+      }
+    }
+  }
+  *output_graph_def = replaced_graph_def;
+  return Status::OK();
+}
+
+REGISTER_GRAPH_TRANSFORM("fuse_quantized_conv_and_requantize",
+                         FuseQuantizedConvolutionAndRequantize);
+
+}  // namespace graph_transforms
+}  // namespace tensorflow
+#endif // INTEL_MKL
diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
index 3acb532263..14b572c15f 100644
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/tools/quantization/quantize_graph.py
@@ -21,6 +21,7 @@ bazel build tensorflow/tools/quantization:quantize_graph \
 --output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
 --mode=eightbit --logtostderr
 
+To quantize for Intel CPU, add --intel_cpu_eightbitize=True.
 """
 
 from __future__ import absolute_import
@@ -46,6 +47,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import app
 from tensorflow.python.platform import flags as flags_lib
 from tensorflow.python.platform import gfile
+from google.protobuf import text_format
 
 flags = flags_lib
 FLAGS = flags.FLAGS
@@ -87,7 +89,14 @@ flags.DEFINE_float(
     "information. Note: this should be considered a coarse tool just good "
     "enough for experimentation purposes, since graphs quantized in this way "
     "would be very inaccurate.")
-
+flags.DEFINE_boolean("input_binary", True,
+                     """Input graph binary or text.""")
+flags.DEFINE_boolean("output_binary", True,
+                     """Output graph binary or text.""")
+flags.DEFINE_boolean(
+    "intel_cpu_eightbitize", False,
+    "If true eightbitized graph will include fused quantized"
+    "nodes in the output_graph for Intel CPU.")
 
 def print_input_nodes(current_node, nodes_map, indent, already_visited):
   print(" " * indent + current_node.op + ":" + current_node.name)
@@ -297,6 +306,8 @@ def quantize_weight_eightbit(input_node, quantization_mode):
         dtypes.quint8,
         mode=quantization_mode)
     quint8_tensor = quantize_op[0].eval()
+    min_value = quantize_op[1].eval()
+    max_value = quantize_op[2].eval()
   shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"]
                                              .tensor.tensor_shape)
   quint8_const_node = create_constant_node(
@@ -309,6 +320,57 @@ def quantize_weight_eightbit(input_node, quantization_mode):
   set_attr_string(dequantize_node, "mode", quantization_mode)
   return [quint8_const_node, min_node, max_node, dequantize_node]
 
+# TODO(intel-tf): Current Intel-CPU quantized Conv2D and Matmul supports only
+# signed scaled mode of weight quantization.
+def intel_cpu_quantize_weight_eightbit(input_node, quantization_mode="SCALED"):
+  """Returns replacement of constant weight node.
+
+  This function creates (i) a quantized constant node, (ii) a float min node
+  (iii) a float max node, and (iv) a dequantize node."""
+  base_name = input_node.name + "_"
+  qint8_const_name = base_name + "qint8_const"
+  min_name = base_name + "min"
+  max_name = base_name + "max"
+  float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
+  min_value = np.min(float_tensor.flatten())
+  max_value = np.max(float_tensor.flatten())
+  # Same processing of min-max as in quantize_weight_eightbit function.
+  if min_value > 0.0:
+    min_value = 0.0
+  if min_value == max_value:
+    if abs(min_value) < 0.000001:
+      max_value = min_value + 1.0
+    elif min_value > 0:
+      max_value = 2 * min_value
+    else:
+      max_value = min_value / 2.0
+
+  sess = session.Session()
+  with sess.as_default():
+    quantize_op = array_ops.quantize_v2(
+        float_tensor,
+        min_value,
+        max_value,
+        dtypes.qint8,
+        mode=quantization_mode,
+        round_mode="HALF_TO_EVEN")
+    qint8_tensor = quantize_op[0].eval()
+    # Updated min-max values should be passed to the next feeding node.
+    min_value = quantize_op[1].eval()
+    max_value = quantize_op[2].eval()
+  shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"]
+                                             .tensor.tensor_shape)
+  qint8_const_node = create_constant_node(
+      qint8_const_name, qint8_tensor,
+      dtypes.qint8,
+      shape=shape)
+  min_node = create_constant_node(min_name, min_value, dtypes.float32)
+  max_node = create_constant_node(max_name, max_value, dtypes.float32)
+  dequantize_node = create_node("Dequantize", input_node.name,
+                                [qint8_const_name, min_name, max_name])
+  set_attr_dtype(dequantize_node, "T", dtypes.qint8)
+  set_attr_string(dequantize_node, "mode", b'SCALED')
+  return [qint8_const_node, min_node, max_node, dequantize_node]
 
 EightbitizeRecursionState = collections.namedtuple(
     "EightbitizeRecursionState",
@@ -322,7 +384,8 @@ class GraphRewriter(object):
                input_graph,
                mode,
                quantized_input_range,
-               fallback_quantization_range=None):
+               fallback_quantization_range=None,
+               intel_cpu_eightbitize=False):
     """Sets up the class to rewrite a float graph.
 
     Args:
@@ -344,6 +407,7 @@ class GraphRewriter(object):
     self.nodes_map = self.create_nodes_map(input_graph)
     self.output_graph = None
     self.mode = mode
+    self.intel_cpu_eightbitize = intel_cpu_eightbitize
     self.final_node_renames = {}
     if quantized_input_range:
       self.input_range = (quantized_input_range[0], quantized_input_range[1])
@@ -417,8 +481,22 @@ class GraphRewriter(object):
 
       self.state = EightbitizeRecursionState(
           already_visited={}, output_node_stack=[], merged_with_fake_quant={})
-      for output_node in output_nodes:
-        self.eightbitize_nodes_recursively(output_node)
+
+      if self.intel_cpu_eightbitize:
+        # TODO(intel-tf): Enables fused quantized node for intel cpu.
+        for output_node in output_nodes:
+          # Intiailize output_node_stack with output node.
+          # Each element in the stack is a mutable list containing
+          # [parent_node, index_to_parent, quantization_flag, fusion_flag].
+          # In case of root node, make self as parent.
+          self.state.output_node_stack.append(
+              [output_node, None, False, False])
+          self.intel_cpu_eightbitize_nodes_recursively(output_node)
+          self.state.output_node_stack.pop()
+      else:
+        for output_node in output_nodes:
+          self.eightbitize_nodes_recursively(output_node)
+
       self.state = None
       if self.input_range:
         self.add_output_graph_node(
@@ -653,6 +731,200 @@ class GraphRewriter(object):
           (self.state.output_node_stack[-1][0], current_node.name,
            current_node.op))
 
+  # TODO(intel-tf): Quantized Conv2D could be fused with few other succeeding
+  # ops. Current support is for BiasAdd and Relu. Future implementation will
+  # include:
+  # (i)   Conv2D + {BiasAdd} + Relu + Add + Relu
+  # (ii)  Conv2D + {BiasAdd} + Relu + Add
+  # (ii)  Conv2D + {BiasAdd} + Add + Relu
+  # (iii) Conv2D + {BiasAdd} + Add
+  def intel_cpu_eightbitize_conv_node(self, original_node, bias_node=None,
+                                      bias_add_name=None, add_node_name=None,
+                                      relu_node_name=None):
+    """Replaces a Conv2D node with the eight bit equivalent sub-graph."""
+    all_input_names = self.add_eightbit_prologue_nodes(original_node)
+
+    if bias_node and add_node_name and relu_node_name:
+      new_node = node_def_pb2.NodeDef()
+      new_node.CopyFrom(bias_node)
+      self.add_output_graph_node(new_node)
+      all_input_names = all_input_names[:2] + [bias_node.name] + \
+          all_input_names[2:] + [add_node_name]
+      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
+      quantized_conv_node = create_node("QuantizedConv2DWithBiasSumAndRelu",
+                                        quantized_conv_name, all_input_names)
+    elif bias_node and (not add_node_name) and relu_node_name:
+      new_node = node_def_pb2.NodeDef()
+      new_node.CopyFrom(bias_node)
+      self.add_output_graph_node(new_node)
+      all_input_names = all_input_names[:2] + [bias_node.name] + \
+          all_input_names[2:]
+      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
+      quantized_conv_node = create_node("QuantizedConv2DWithBiasAndRelu",
+                                        quantized_conv_name, all_input_names)
+    elif bias_node and bias_add_name  and \
+        (not add_node_name) and (not relu_node_name):
+      new_node = node_def_pb2.NodeDef()
+      new_node.CopyFrom(bias_node)
+      self.add_output_graph_node(new_node)
+      all_input_names = all_input_names[:2] + [bias_node.name] + \
+          all_input_names[2:]
+      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
+      quantized_conv_node = create_node("QuantizedConv2DWithBias",
+                                        quantized_conv_name, all_input_names)
+    else:
+      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
+      quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name,
+                                        all_input_names)
+    copy_attr(quantized_conv_node, "strides", original_node.attr["strides"])
+    copy_attr(quantized_conv_node, "padding", original_node.attr["padding"])
+    copy_attr(quantized_conv_node, "dilations", original_node.attr["dilations"])
+    set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8)
+    set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8)
+    set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32)
+    self.add_output_graph_node(quantized_conv_node)
+    quantize_down_name = self.add_quantize_down_nodes(original_node,
+                                                      quantized_conv_name)
+    if bias_node and relu_node_name:
+      self.add_dequantize_result_node(quantize_down_name, relu_node_name)
+    elif bias_node and bias_add_name and \
+        (not add_node_name) and (not relu_node_name):
+      self.add_dequantize_result_node(quantize_down_name, bias_add_name)
+    else:
+      self.add_dequantize_result_node(quantize_down_name, original_node.name)
+
+  # TODO(intel-tf): To check whether Conv2D is fed by relu directly or via
+  # pooling ops. This is required as intel cpu requires input tensor for Conv2D
+  # to be non-negative.
+  def intel_cpu_find_relu_recursively(self, current_node):
+    """Helper function to check if Conv2D is fed by Relu."""
+    if current_node.op == "Relu":
+      return True
+    else:
+      first_input_node_name = node_name_from_input(current_node.input[0])
+      input_node = self.nodes_map[first_input_node_name]
+      if input_node.op in ("ConcatV2", "MaxPool", "AvgPool", "Relu"):
+        return self.intel_cpu_find_relu_recursively(input_node)
+      else:
+        return False
+
+  # TODO(intel-tf): We leave the output graph partially quantized for
+  # intel cpu. Current quantization support is for Conv2D and its fusion.
+  # More quantized operations will be included as more implementations are
+  # completed.
+  def intel_cpu_eightbitize_nodes_recursively(self, current_node):
+    """The entry point for transforming a graph into full eight bit."""
+    if current_node.name in self.state.already_visited:
+      if (self.should_merge_with_fake_quant_node() or
+          current_node.name in self.state.merged_with_fake_quant):
+        raise ValueError("Unsupported graph structure: output of node %s "
+                         "is processed by a FakeQuant* node and should have "
+                         "no other outputs.", current_node.name)
+      return
+
+    self.state.already_visited[current_node.name] = True
+    quantize_input, should_quantize_conv, \
+        fuse_with_conv = (False, False, False)
+
+    if current_node.op == "Conv2D":
+      should_quantize_conv = self.intel_cpu_find_relu_recursively(current_node)
+
+    inputs = list(enumerate(current_node.input))
+    if current_node.op == "AddN":
+      inputs = reversed(inputs)  # pylint: disable=redefined-variable-type
+
+    for i, input_node_name in inputs:
+      input_node_name = node_name_from_input(input_node_name)
+      input_node = self.nodes_map[input_node_name]
+
+      if should_quantize_conv and i == 1 and input_node.op == "Const":
+        quantize_input = True
+
+      self.state.output_node_stack.append([current_node, i, quantize_input,
+                                           fuse_with_conv])
+      self.intel_cpu_eightbitize_nodes_recursively(input_node)
+      self.state.output_node_stack.pop()
+
+    if current_node.op == "Conv2D" and should_quantize_conv and quantize_input:
+      # match pattern for fusion with bias and relu
+      grand_parent, parent = self.state.output_node_stack[-2:]
+      if parent[0].op == "BiasAdd" and grand_parent[0].op == "Relu":
+        self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
+        self.state.output_node_stack[-3][3] = True # Relu to be fused
+        bias_node_name = node_name_from_input(parent[0].input[1])
+        bias_node = self.nodes_map[bias_node_name]
+        self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None,
+                                             None, grand_parent[0].name)
+      elif parent[0].op == "BiasAdd" and grand_parent[0].op == "AddN":
+        grand_grand_parent = self.state.output_node_stack[-3]
+        if grand_grand_parent[0].op == "Relu" \
+            and (not self.state.output_node_stack[-3][3]) \
+            and (not self.state.output_node_stack[-4][3]):
+          self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
+          self.state.output_node_stack[-3][3] = True # AddN to be fused
+          self.state.output_node_stack[-4][3] = True # Relu to be fused
+          bias_node_name = node_name_from_input(parent[0].input[1])
+          bias_node = self.nodes_map[bias_node_name]
+          add_node_name = node_name_from_input(grand_parent[0].input[0])
+          self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None,
+                                               add_node_name,
+                                               grand_grand_parent[0].name)
+        elif not self.state.output_node_stack[-2][3]: # Fuse BiasAdd then
+          self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
+          bias_node_name = node_name_from_input(parent[0].input[1])
+          bias_node = self.nodes_map[bias_node_name]
+          self.intel_cpu_eightbitize_conv_node(current_node, bias_node,
+                                               parent[0].name)
+        else:
+          self.intel_cpu_eightbitize_conv_node(current_node)
+      elif parent[0].op == "BiasAdd" and \
+           (not self.state.output_node_stack[-2][3]):
+        self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
+        bias_node_name = node_name_from_input(parent[0].input[1])
+        bias_node = self.nodes_map[bias_node_name]
+        self.intel_cpu_eightbitize_conv_node(current_node, bias_node,
+                                             parent[0].name)
+      else:
+        self.intel_cpu_eightbitize_conv_node(current_node)
+    elif current_node.op == "BiasAdd" and \
+         self.state.output_node_stack[-1][3]:
+      pass # This op is already processed by fused quantization
+    elif current_node.op == "Relu" and \
+         self.state.output_node_stack[-1][3]:
+      pass # This op is already processed by fused quantization
+    elif current_node.op == "AddN" and \
+         self.state.output_node_stack[-1][3]:
+      pass # AddN op is already processed by fused quatization
+    elif current_node.op == "MaxPool" or current_node.op == "AvgPool":
+      self.eightbitize_single_input_tensor_node(current_node,
+                                                self.add_pool_function)
+    elif (current_node.op == "ConcatV2" and
+          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
+      self.eightbitize_concatv2_node(current_node)
+    elif current_node.op == "Const":
+      parent = self.state.output_node_stack[-1]
+      if parent[0].op == "Conv2D" and parent[2]:
+        for n in intel_cpu_quantize_weight_eightbit(current_node, b"SCALED"):
+          self.add_output_graph_node(n)
+      elif parent[0].op == "BiasAdd" and \
+           self.state.output_node_stack[-2][3]:
+        pass # This constant is already process by fused quantization
+      else:
+        new_node = node_def_pb2.NodeDef()
+        new_node.CopyFrom(current_node)
+        self.add_output_graph_node(new_node)
+    else:
+      new_node = node_def_pb2.NodeDef()
+      new_node.CopyFrom(current_node)
+      self.add_output_graph_node(new_node)
+
+    if (self.should_merge_with_fake_quant_node() and
+        current_node.name not in self.state.merged_with_fake_quant):
+      raise ValueError(
+          "FakeQuant* node %s failed to merge with node %s of type %s" %
+          (self.state.output_node_stack[-1][0], current_node.name,
+           current_node.op))
+
   def add_eightbit_prologue_nodes(self, original_node):
     """Adds input conversion nodes to handle quantizing the underlying node."""
     namespace_prefix = original_node.name + "_eightbit"
@@ -712,7 +984,11 @@ class GraphRewriter(object):
         "QuantizeV2", quantize_input_name,
         [original_input_name, min_input_name, max_input_name])
     set_attr_dtype(quantize_input_node, "T", dtypes.quint8)
-    set_attr_string(quantize_input_node, "mode", b"MIN_FIRST")
+    set_attr_string(quantize_input_node, "mode",
+                    b"SCALED" if self.intel_cpu_eightbitize else  b"MIN_FIRST")
+    set_attr_string(quantize_input_node, "round_mode",
+                    b"HALF_TO_EVEN" if self.intel_cpu_eightbitize
+                    else  b"HALF_AWAY_FROM_ZERO")
     self.add_output_graph_node(quantize_input_node)
     min_output_name = quantize_input_name + ":1"
     max_output_name = quantize_input_name + ":2"
@@ -965,6 +1241,44 @@ class GraphRewriter(object):
     self.add_output_graph_node(quantized_concat_node)
     self.add_dequantize_result_node(quantized_concat_name, original_node.name)
 
+  def eightbitize_concatv2_node(self, original_node):
+    """
+    Args:
+      original_node: Float node to be converted.
+
+    Returns:
+      Subgraph representing the quantized version of the original node.
+
+    """
+    namespace_prefix = original_node.name + "_eightbit"
+    quantized_concat_name = namespace_prefix + "_quantized_concatv2"
+    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
+        namespace_prefix)
+    num_input = len(original_node.input)
+    shape_input_name = original_node.input[num_input-1]
+    original_inputs = original_node.input[0:num_input-1]
+    input_names = []
+    min_names = []
+    max_names = []
+    for original_input_name in original_inputs:
+      quantize_input_name, min_input_name, max_input_name = (
+          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
+                                         reshape_dims_name,
+                                         reduction_dims_name))
+      input_names.append(quantize_input_name)
+      min_names.append(min_input_name)
+      max_names.append(max_input_name)
+    all_input_names = input_names
+    all_input_names.append(shape_input_name)
+    all_input_names.extend(min_names)
+    all_input_names.extend(max_names)
+    quantized_concat_node = create_node("QuantizedConcatV2",
+                                        quantized_concat_name, all_input_names)
+    set_attr_int(quantized_concat_node, "N", len(original_inputs))
+    set_attr_dtype(quantized_concat_node, "T", dtypes.quint8)
+    self.add_output_graph_node(quantized_concat_node)
+    self.add_dequantize_result_node(quantized_concat_name, original_node.name)
+
   def eightbitize_placeholder_node(self, current_node):
     """Replaces a placeholder node with a quint8 placeholder node+dequantize."""
     name = current_node.name
@@ -1249,7 +1563,6 @@ class GraphRewriter(object):
     self.input_graph = new_input_graph
     self.nodes_map = self.create_nodes_map(self.input_graph)
 
-
 def main(unused_args):
   if not gfile.Exists(FLAGS.input):
     print("Input graph file '" + FLAGS.input + "' does not exist!")
@@ -1264,9 +1577,14 @@ def main(unused_args):
     return -1
 
   tf_graph = graph_pb2.GraphDef()
-  with gfile.Open(FLAGS.input, "rb") as f:
+  # TODO(intel-tf): Enabling user to work with both binary and text format.
+  mode = "rb" if FLAGS.input_binary else "r"
+  with gfile.Open(FLAGS.input, mode) as f:
     data = f.read()
-    tf_graph.ParseFromString(data)
+    if FLAGS.input_binary:
+      tf_graph.ParseFromString(data)
+    else:
+      text_format.Merge(data, tf_graph)
 
   graph = ops.Graph()
   with graph.as_default():
@@ -1287,16 +1605,21 @@ def main(unused_args):
         FLAGS.quantized_fallback_min, FLAGS.quantized_fallback_max
     ]
 
-  rewriter = GraphRewriter(tf_graph, FLAGS.mode, quantized_input_range,
-                           fallback_quantization_range)
+  rewriter = GraphRewriter(tf_graph, FLAGS.mode,
+                           quantized_input_range, fallback_quantization_range,
+                           FLAGS.intel_cpu_eightbitize)
 
   output_graph = rewriter.rewrite(FLAGS.output_node_names.split(","))
 
-  f = gfile.FastGFile(FLAGS.output, "wb")
-  f.write(output_graph.SerializeToString())
+  # TODO(intel-tf): Enabling user to work with both binary and text format.
+  mode = "wb" if FLAGS.output_binary else "w"
+  f = gfile.FastGFile(FLAGS.output, mode)
+  if FLAGS.output_binary:
+    f.write(output_graph.SerializeToString())
+  else:
+    f.write(str(output_graph))
 
   return 0
 
-
 if __name__ == "__main__":
   app.run()
-- 
GitLab


From aa25cc078c9b55e5ca3e0f59df43e169bfee8f3c Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Thu, 16 Aug 2018 19:04:37 +0800
Subject: [PATCH 0021/1085] Add LeakyRelu C++ Op and its gradient
 implementation.

LeakyRelu, defined as 'y = { x (x>=0) or alpha*x (x<0) }', was computed
by combined Ops 'max(x, alpha*x)' in current codes. Hence its gradient
calculation for back propagation would contain a serial of element-wise
Ops. This looks really unnecessary for such a simple op and it could be
done within just one Op with less memory accesses.
---
 tensorflow/cc/gradients/nn_grad.cc            |  13 ++
 tensorflow/cc/gradients/nn_grad_test.cc       |  13 ++
 tensorflow/core/kernels/relu_op.cc            | 153 +++++++++++-------
 tensorflow/core/kernels/relu_op.h             |  59 +++++++
 tensorflow/core/kernels/relu_op_functor.h     |  31 ++++
 tensorflow/core/kernels/relu_op_gpu.cu.cc     |  18 ++-
 tensorflow/core/ops/nn_ops.cc                 |  15 ++
 tensorflow/core/ops/ops.pbtxt                 |  68 ++++++++
 tensorflow/python/eager/pywrap_tfe_src.cc     |   2 +
 .../python/kernel_tests/relu_op_test.py       | 113 +++++++++++++
 tensorflow/python/ops/nn_grad.py              |  15 ++
 tensorflow/python/ops/nn_ops.py               |   3 +-
 12 files changed, 432 insertions(+), 71 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 588e96cb19..0fc23d0bf7 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -143,6 +143,19 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper);
 
+Status LeakyReluGradHelper(const Scope& scope, const Operation& op,
+                           const std::vector<Output>& grad_inputs,
+                           std::vector<Output>* grad_outputs) {
+  float alpha;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
+  internal::LeakyReluGrad::Attrs attrs;
+  attrs.Alpha(alpha);
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs);
+  grad_outputs->push_back(dx);
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper);
+
 Status EluGradHelper(const Scope& scope, const Operation& op,
                      const std::vector<Output>& grad_inputs,
                      std::vector<Output>* grad_outputs) {
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index aa72cf7ba2..5ebece7b6e 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -41,6 +41,7 @@ using ops::MaxPoolV2;
 using ops::Placeholder;
 using ops::Relu;
 using ops::Relu6;
+using ops::LeakyRelu;
 using ops::Selu;
 using ops::Softmax;
 using ops::Softplus;
@@ -160,6 +161,18 @@ TEST_F(NNGradTest, Relu6Grad) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NNGradTest, LeakyReluGrad) {
+  TensorShape shape({5, 2});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  auto y = LeakyRelu(scope_, x);
+  // Avoid input values where Leaky ReLU gradient is not well defined (around
+  // zero).
+  Tensor x_init_value = test::AsTensor<float>(
+      {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
+      {5, 2});
+  RunTest(x, x_init_value, y, shape);
+}
+
 TEST_F(NNGradTest, EluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc
index d52358737f..c4f2ef5632 100644
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@@ -33,19 +33,25 @@ typedef Eigen::GpuDevice GPUDevice;
 typedef Eigen::SyclDevice SYCLDevice;
 #endif  // TENSORFLOW_USE_SYCL
 
-#define REGISTER_RELU_KERNELS(type)                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
-      ReluOp<CPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),  \
-      ReluGradOp<CPUDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
-      Relu6Op<CPUDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
-      Relu6GradOp<CPUDevice, type>)
+#define REGISTER_RELU_KERNELS(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"),          \
+      ReluOp<CPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
+      ReluGradOp<CPUDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"),         \
+      Relu6Op<CPUDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      Relu6GradOp<CPUDevice, type>)                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      LeakyReluOp<CPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<CPUDevice, type>);
 
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
 #undef REGISTER_RELU_KERNELS
@@ -99,6 +105,19 @@ namespace functor {
   extern template struct Relu6Grad<GPUDevice, T>;                              \
                                                                                \
   template <>                                                                  \
+  void LeakyRelu<GPUDevice, T>::operator()(                                    \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
+      T alpha, typename TTypes<T>::Tensor activations);                        \
+  extern template struct LeakyRelu<GPUDevice, T>;                              \
+                                                                               \
+  template <>                                                                  \
+  void LeakyReluGrad<GPUDevice, T>::operator()(                                \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
+      typename TTypes<T>::ConstTensor features,                                \
+      T alpha, typename TTypes<T>::Tensor backprops);                          \
+  extern template struct LeakyReluGrad<GPUDevice, T>;                          \
+                                                                               \
+  template <>                                                                  \
   void Elu<GPUDevice, T>::operator()(const GPUDevice& d,                       \
                                      typename TTypes<T>::ConstTensor features, \
                                      typename TTypes<T>::Tensor activations);  \
@@ -128,30 +147,36 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 }  // namespace functor
 
 // Registration of the GPU implementations.
-#define REGISTER_GPU_KERNELS(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
-      ReluOp<GPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),  \
-      ReluGradOp<GPUDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
-      Relu6Op<GPUDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
-      Relu6GradOp<GPUDevice, type>);                                  \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"),       \
-      EluOp<GPUDevice, type>);                                        \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),   \
-      EluGradOp<GPUDevice, type>);                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
-      SeluOp<GPUDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),  \
+#define REGISTER_GPU_KERNELS(type)                                        \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
+      ReluOp<GPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
+      ReluGradOp<GPUDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"),         \
+      Relu6Op<GPUDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      Relu6GradOp<GPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      LeakyReluOp<GPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<GPUDevice, type>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"),           \
+      EluOp<GPUDevice, type>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),       \
+      EluGradOp<GPUDevice, type>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
+      SeluOp<GPUDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
       SeluGradOp<GPUDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
@@ -161,30 +186,36 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 
 #ifdef TENSORFLOW_USE_SYCL
 // Registration of the GPU implementations.
-#define REGISTER_SYCL_KERNELS(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      ReluOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),  \
-      ReluGradOp<SYCLDevice, type>);                                   \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
-      Relu6Op<SYCLDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      Relu6GradOp<SYCLDevice, type>);                                  \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
-      EluOp<SYCLDevice, type>);                                        \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),   \
-      EluGradOp<SYCLDevice, type>);                                    \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      SeluOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),  \
+#define REGISTER_SYCL_KERNELS(type)                                        \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
+      ReluOp<SYCLDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
+      ReluGradOp<SYCLDevice, type>);                                       \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),         \
+      Relu6Op<SYCLDevice, type>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
+      Relu6GradOp<SYCLDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
+      LeakyReluOp<SYCLDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
+      LeakyReluGradOp<SYCLDevice, type>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),           \
+      EluOp<SYCLDevice, type>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
+      EluGradOp<SYCLDevice, type>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
+      SeluOp<SYCLDevice, type>);                                           \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
       SeluGradOp<SYCLDevice, type>)
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h
index e712b02bd7..c55190065c 100644
--- a/tensorflow/core/kernels/relu_op.h
+++ b/tensorflow/core/kernels/relu_op.h
@@ -131,6 +131,65 @@ void Relu6GradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
           output->flat<T>());
 }
 
+template <typename Device, typename T>
+class LeakyReluOp : public UnaryElementWiseOp<T, LeakyReluOp<Device, T>> {
+ public:
+  explicit LeakyReluOp(OpKernelConstruction* context)
+      : UnaryElementWiseOp<T, LeakyReluOp<Device, T>>(context) {
+    float alpha_tmp;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp));
+    alpha_ = T(alpha_tmp);
+  }
+
+  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
+    functor::LeakyRelu<Device, T> functor;
+    functor(context->eigen_device<Device>(), input.flat<T>(),
+            alpha_, output->flat<T>());
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Device, typename T>
+class LeakyReluGradOp
+    : public BinaryElementWiseOp<T, LeakyReluGradOp<Device, T>> {
+ public:
+  explicit LeakyReluGradOp(OpKernelConstruction* context)
+      : BinaryElementWiseOp<T, LeakyReluGradOp<Device, T>>(context) {
+    float alpha_tmp;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp));
+    alpha_ = T(alpha_tmp);
+  }
+
+  void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
+                         const Tensor& a, T alpha, Tensor* output);
+
+  // INPUTS:
+  //   g (gradients): backpropagated gradients
+  //   a (inputs): either the inputs that were passed to LeakyReluOp(), or its
+  //               outputs (using either one yields the same result here).
+  // OUTPUT:
+  //   gradients to backprop
+  template <int NDIMS>
+  void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a,
+               Tensor* output) {
+    OperateNoTemplate(context, g, a, alpha_, output);
+  }
+
+ private:
+  T alpha_;
+};
+
+template <typename Device, typename T>
+void LeakyReluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
+    const Tensor& g, const Tensor& a, T alpha, Tensor* output) {
+  if (!ReluHelpers::ValidateSameSize(context, g, a)) return;
+  functor::LeakyReluGrad<Device, T> functor;
+  functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(), alpha,
+          output->flat<T>());
+};
+
 template <typename Device, typename T>
 class EluOp : public UnaryElementWiseOp<T, EluOp<Device, T>> {
  public:
diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h
index 3bc5ba8a50..7f0951451d 100644
--- a/tensorflow/core/kernels/relu_op_functor.h
+++ b/tensorflow/core/kernels/relu_op_functor.h
@@ -91,6 +91,37 @@ struct Relu6Grad {
   }
 };
 
+
+// Functor used by LeakyReluOp to do the computations.
+template <typename Device, typename T>
+struct LeakyRelu {
+  // Computes LeakyRelu activation.
+  //
+  // features: any shape.
+  // activations: same shape as "features".
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor features,
+                  T alpha, typename TTypes<T>::Tensor activations) {
+    activations.device(d) = features.cwiseMax(features * alpha);
+  }
+};
+
+// Functor used by LeakyReluGradOp to do the computations.
+template <typename Device, typename T>
+struct LeakyReluGrad {
+  // Computes LeakyReluGrad backprops.
+  //
+  // gradients: gradients backpropagated to the LeakyRelu op.
+  // features: either the inputs that were passed to the LeakyRelu or, or its
+  //           outputs (using either one yields the same result here).
+  // backprops: gradients to backpropagate to the LeakyRelu inputs.
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
+                  typename TTypes<T>::ConstTensor features, T alpha,
+                  typename TTypes<T>::Tensor backprops) {
+    backprops.device(d) =
+        (features > static_cast<T>(0)).select(gradients, gradients * alpha);
+  }
+};
+
 // Functor used by EluOp to do the computations.
 template <typename Device, typename T>
 struct Elu {
diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc
index 089ca8ed27..4452f4dcc9 100644
--- a/tensorflow/core/kernels/relu_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc
@@ -114,14 +114,16 @@ struct ReluGrad<Device, Eigen::half> {
 }  // namespace functor
 
 // Definition of the GPU implementations declared in relu_op.cc.
-#define DEFINE_GPU_KERNELS(T)                       \
-  template struct functor::Relu<GPUDevice, T>;      \
-  template struct functor::ReluGrad<GPUDevice, T>;  \
-  template struct functor::Relu6<GPUDevice, T>;     \
-  template struct functor::Relu6Grad<GPUDevice, T>; \
-  template struct functor::Elu<GPUDevice, T>;       \
-  template struct functor::EluGrad<GPUDevice, T>;   \
-  template struct functor::Selu<GPUDevice, T>;      \
+#define DEFINE_GPU_KERNELS(T)                           \
+  template struct functor::Relu<GPUDevice, T>;          \
+  template struct functor::ReluGrad<GPUDevice, T>;      \
+  template struct functor::Relu6<GPUDevice, T>;         \
+  template struct functor::Relu6Grad<GPUDevice, T>;     \
+  template struct functor::LeakyRelu<GPUDevice, T>;     \
+  template struct functor::LeakyReluGrad<GPUDevice, T>; \
+  template struct functor::Elu<GPUDevice, T>;           \
+  template struct functor::EluGrad<GPUDevice, T>;       \
+  template struct functor::Selu<GPUDevice, T>;          \
   template struct functor::SeluGrad<GPUDevice, T>;
 
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index e0f25fb4ef..023f988f80 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -983,6 +983,21 @@ REGISTER_OP("Relu6Grad")
     .Attr("T: realnumbertype")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
+REGISTER_OP("LeakyRelu")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("alpha: float = 0.2")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("LeakyReluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Output("backprops: T")
+    .Attr("alpha: float = 0.2")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+
 REGISTER_OP("Elu")
     .Input("features: T")
     .Output("activations: T")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index f2595279e0..837e91bc23 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13604,6 +13604,74 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "LeakyRelu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LeakykReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 2d54555cd3..9b3b5fd7aa 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           "SoftplusGrad",
           "Softsign",
           "ReluGrad",
+          "LeakyReluGrad",
           "Conv2D",
           "DepthwiseConv2dNative",
           "Dilation2D",
@@ -1799,6 +1800,7 @@ bool OpDoesntRequireInput(const string& op_name) {
           "BiasAdd",
           "Relu",
           "Relu6",
+          "LeakyRelu",
           "Elu",
           "Selu",
           "SparseSoftmaxCrossEntropyWithLogits",
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 25e947f09e..ccb3a231bb 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -252,6 +252,119 @@ class Relu6Test(test.TestCase):
     self.assertLess(err, 1e-10)
 
 
+class LeakyReluTest(test.TestCase):
+
+  def _npLeakyRelu(self, np_features, alpha=0.1):
+    return np.maximum(np_features, alpha * np_features)
+
+  def testNpLeakyRelu(self):
+    self.assertAllClose(
+        np.array([[-0.09, 0.7, -0.05, 0.3, -0.01],
+                  [0.1, -0.03, 0.5, -0.07, 0.9]]),
+        self._npLeakyRelu(
+            np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9]
+                     ]), alpha=0.1))
+
+  def _testLeakyRelu(self, np_features, alpha, use_gpu=False):
+    np_leaky_relu = self._npLeakyRelu(np_features, alpha)
+    with self.test_session(use_gpu=use_gpu):
+      leaky_relu = nn_ops.leaky_relu(np_features, alpha)
+      tf_leaky_relu = leaky_relu.eval()
+    self.assertAllClose(np_leaky_relu, tf_leaky_relu)
+    self.assertShapeEqual(np_leaky_relu, leaky_relu)
+
+  def testNumbers(self):
+    for t in [np.int32, np.int64, np.float16, np.float32, np.float64]:
+      self._testLeakyRelu(
+          np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
+          alpha=0.2, use_gpu=False)
+      if t in [np.float16, np.float32, np.float64]:
+        self._testLeakyRelu(
+            np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
+            alpha=0.1, use_gpu=True)
+
+  # The gradient test for ReLU is a bit tricky as the derivative is not well
+  # defined at around zero and we want to avoid that in terms of input values.
+  def testGradientFloat32(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float32,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], y, [2, 5], x_init_value=x_init)
+    print("leaky_relu (float32) gradient err = ", err)
+    self.assertLess(err, 1e-4)
+
+  def testGradientFloat64(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          dtype=dtypes.float64,
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.2, name="leaky_relu")
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float64,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], y, [2, 5], x_init_value=x_init)
+    print("leaky_relu (float64) gradient err = ", err)
+    self.assertLess(err, 1e-10)
+
+  def testGradGradFloat32(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+      z = gradients_impl.gradients(y, x)
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float32,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+    print("leaky_relu (float32) gradient of gradient err = ", err)
+    self.assertLess(err, 1e-4)
+
+  def testGradGradFloat64(self):
+    with self.test_session():
+      x = constant_op.constant(
+          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+          shape=[2, 5],
+          dtype=dtypes.float64,
+          name="x")
+      y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+      z = gradients_impl.gradients(y, x)
+      x_init = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float64,
+          order="F")
+      err = gradient_checker.compute_gradient_error(
+          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+    print("leaky_relu (float64) gradient of gradient err = ", err)
+    self.assertLess(err, 1e-10)
+
+  def testGradientScalar(self):
+    with self.test_session() as sess:
+      x = variables.Variable(-100.)
+      y = nn_ops.leaky_relu(x, 0.05)
+      loss = y**2
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.2)
+      train_op = optimizer.minimize(loss)
+      sess.run(variables.global_variables_initializer())
+      sess.run(train_op)
+      self.assertAllClose(x.eval(), -99.9)
+
+
 class EluTest(test.TestCase):
 
   def _npElu(self, np_features):
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index df23ac55ce..c2dd58bdf0 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -390,6 +390,21 @@ def _Relu6GradGrad(op, grad):
           array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype))
 
 
+@ops.RegisterGradient("LeakyRelu")
+def _LeakyReluGrad(op, grad):
+  x = op.inputs[0]
+  alpha = op.get_attr("alpha")
+  return gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha)
+
+
+@ops.RegisterGradient("LeakyReluGrad")
+def _LeakyReluGradGrad(op, grad):
+  x = op.inputs[1]
+  alpha = op.get_attr("alpha")
+  return (gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha),
+          array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype))
+
+
 @ops.RegisterGradient("Elu")
 def _EluGrad(op, grad):
   return gen_nn_ops.elu_grad(grad, op.outputs[0])
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 6fd1273687..31b8f3945d 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,8 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
-    return math_ops.maximum(alpha * features, features, name=name)
+    return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
 
 
 def _flatten_outer_dims(logits):
-- 
GitLab


From bc5d68b95a8ca1410905d532d1c356dd5c76cd30 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 21 Aug 2018 20:56:48 +0000
Subject: [PATCH 0022/1085] Update test case to use file_io.get_matching_files

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/lib/io/file_io_test.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index 1d247aa8ba..d17c02fe9e 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -23,7 +23,6 @@ import os.path
 
 from tensorflow.python.framework import errors
 from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
@@ -585,7 +584,6 @@ class FileIoTest(test.TestCase):
     self.assertEqual(crc2, crc3)
 
   def testMatchingFilesPermission(self):
-    # Test case for GitHub issue 19274.
     # Create top level directory test_dir.
     dir_path = os.path.join(self._base_dir, "test_dir")
     file_io.create_dir(dir_path)
@@ -602,10 +600,11 @@ class FileIoTest(test.TestCase):
     file_io.FileIO(file_path, mode="w").write("testing")
     # Change noread to noread access.
     os.chmod(noread_path, 0)
-    expected_match = [compat.as_bytes(dir_path)]
-    with self.test_session() as sess:
-      self.assertItemsEqual(
-          gen_io_ops.matching_files(dir_path).eval(), expected_match)
+    expected_match = [
+        compat.as_bytes(os.path.join(any_path, name)) for name in files]
+    self.assertItemsEqual(
+        file_io.get_matching_files(os.path.join(dir_path, "*", "file*.txt")),
+        expected_match)
     # Change noread back so that it could be cleaned during tearDown.
     os.chmod(noread_path, 0o777)
 
-- 
GitLab


From f7a1522354be98af329f3a1b4047a7b3429bce9c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 21 Aug 2018 23:53:55 +0000
Subject: [PATCH 0023/1085] Fix python 3 issues casued by compat.as_bytes

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/lib/io/file_io_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index d17c02fe9e..d130fae34e 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -601,7 +601,7 @@ class FileIoTest(test.TestCase):
     # Change noread to noread access.
     os.chmod(noread_path, 0)
     expected_match = [
-        compat.as_bytes(os.path.join(any_path, name)) for name in files]
+        os.path.join(any_path, name) for name in files]
     self.assertItemsEqual(
         file_io.get_matching_files(os.path.join(dir_path, "*", "file*.txt")),
         expected_match)
-- 
GitLab


From f4df6cb3aebc64a8e9c2c2d2dc06fa039e188566 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 22 Aug 2018 13:24:05 +0000
Subject: [PATCH 0024/1085] Remove unused compat import

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/lib/io/file_io_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index d130fae34e..33cea08045 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -24,7 +24,6 @@ import os.path
 from tensorflow.python.framework import errors
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.platform import test
-from tensorflow.python.util import compat
 
 
 class FileIoTest(test.TestCase):
-- 
GitLab


From cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Fri, 24 Aug 2018 11:51:34 +0800
Subject: [PATCH 0025/1085] Refine LeakyRelu codes and update APIs.

---
 .../api_def/base_api/api_def_LeakyRelu.pbtxt  |  4 ++++
 .../base_api/api_def_LeakyReluGrad.pbtxt      | 24 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  2 +-
 tensorflow/python/eager/pywrap_tfe_src.cc     |  2 +-
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
new file mode 100644
index 0000000000..4a61889f54
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "LeakyRelu"
+  summary: "Computes rectified linear: `max(features, features * alpha)`."
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt
new file mode 100644
index 0000000000..e427526602
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt
@@ -0,0 +1,24 @@
+op {
+  graph_op_name: "LeakyReluGrad"
+  visibility: HIDDEN
+  in_arg {
+    name: "gradients"
+    description: <<END
+The backpropagated gradients to the corresponding LeakyRelu operation.
+END
+  }
+  in_arg {
+    name: "features"
+    description: <<END
+The features passed as input to the corresponding LeakyRelu operation,
+OR the outputs of that operation (both work equivalently).
+END
+  }
+  out_arg {
+    name: "backprops"
+    description: <<END
+`gradients * (features > 0) + alpha * gradients * (featurs <= 0)`.
+END
+  }
+  summary: "Computes rectified linear gradients for a LeakyRelu operation."
+}
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 837e91bc23..7693c2d485 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13637,7 +13637,7 @@ op {
   }
 }
 op {
-  name: "LeakykReluGrad"
+  name: "LeakyReluGrad"
   input_arg {
     name: "gradients"
     type_attr: "T"
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 9b3b5fd7aa..18fafd0de1 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           "SoftplusGrad",
           "Softsign",
           "ReluGrad",
+          "LeakyRelu",
           "LeakyReluGrad",
           "Conv2D",
           "DepthwiseConv2dNative",
@@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) {
           "BiasAdd",
           "Relu",
           "Relu6",
-          "LeakyRelu",
           "Elu",
           "Selu",
           "SparseSoftmaxCrossEntropyWithLogits",
-- 
GitLab


From 877358f68fcfd3ca06fdec87007e0cc90502f202 Mon Sep 17 00:00:00 2001
From: David Norman <davidn@graphcore.ai>
Date: Fri, 24 Aug 2018 17:13:53 +0100
Subject: [PATCH 0026/1085] Allow for disabling these tests via manifest

---
 tensorflow/compiler/xla/tests/while_test.cc | 46 ++++++++++-----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index 1bdf1867b9..e6c69a5a86 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -48,7 +48,7 @@ class WhileTest : public ClientLibraryTestBase {};
 // while (result < 5) {
 //   result = result + 1;
 // }
-TEST_F(WhileTest, WhileWithScalarS32Result) {
+XLA_TEST_F(WhileTest, WhileWithScalarS32Result) {
   auto result_shape = ShapeUtil::MakeShape(S32, {});
 
   // Create a computation for the condition: repeat for 5 iterations.
@@ -84,7 +84,7 @@ TEST_F(WhileTest, WhileWithScalarS32Result) {
 // while (result < 5) {
 //   result = result + 1;
 // }
-TEST_F(WhileTest, WhileWithScalarS64Result) {
+XLA_TEST_F(WhileTest, WhileWithScalarS64Result) {
   auto result_shape = ShapeUtil::MakeShape(S64, {});
 
   // Create a computation for the condition: repeat for 5 iterations.
@@ -114,7 +114,7 @@ TEST_F(WhileTest, WhileWithScalarS64Result) {
   ComputeAndCompareR0<int64>(&builder, 5, {});
 }
 
-TEST_F(WhileTest, WhileWithScalarResultNonConstInit) {
+XLA_TEST_F(WhileTest, WhileWithScalarResultNonConstInit) {
   auto result_shape = ShapeUtil::MakeShape(S32, {});
   auto orig_shape = ShapeUtil::MakeShape(S32, {2});
 
@@ -147,7 +147,7 @@ TEST_F(WhileTest, WhileWithScalarResultNonConstInit) {
   ComputeAndCompareR0<int32>(&builder, 5, {});
 }
 
-TEST_F(WhileTest, WhileWithPredicateResult) {
+XLA_TEST_F(WhileTest, WhileWithPredicateResult) {
   auto result_shape = ShapeUtil::MakeShape(PRED, {});
 
   // Create a computation for the condition: run until condition is true.
@@ -184,7 +184,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) {
 // while (result.sum() < 15.5f) {
 //   result = result + vector<float>(0);
 // }
-TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) {
+XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) {
   Shape result_shape = ShapeUtil::MakeShape(F32, {0});
 
   // Create a computation for the reduction.
@@ -238,7 +238,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) {
 // while (result.sum() < 15.5f) {
 //   result = result + vector<float>(8, 0.125f);
 // }
-TEST_F(WhileTest, WhileWithVectorResult) {
+XLA_TEST_F(WhileTest, WhileWithVectorResult) {
   Shape result_shape = ShapeUtil::MakeShape(F32, {8});
 
   // Create a computation for the reduction.
@@ -298,7 +298,7 @@ TEST_F(WhileTest, WhileWithVectorResult) {
 //   result = result + vector<float>(8, 0.125f);
 // }
 // tuple = tuple { while }
-TEST_F(WhileTest, WhileWithVectorResultIntoTuple) {
+XLA_TEST_F(WhileTest, WhileWithVectorResultIntoTuple) {
   Shape result_shape = ShapeUtil::MakeShape(F32, {8});
 
   // Create a computation for the reduction.
@@ -353,7 +353,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-TEST_F(WhileTest, WhileWithPermutationAndTupleResult) {
+XLA_TEST_F(WhileTest, WhileWithPermutationAndTupleResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
@@ -408,7 +408,7 @@ TEST_F(WhileTest, WhileWithPermutationAndTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-TEST_F(WhileTest, WhileWithPermutationAndVectorResult) {
+XLA_TEST_F(WhileTest, WhileWithPermutationAndVectorResult) {
   std::vector<Shape> shape_elements = {
       ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}),
       ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})};
@@ -466,7 +466,7 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) {
 //   get<0>(result) = get<0>(result) + 1;
 //   get<1>(result) = get<1>(result) + vector<float>(10, 1.0f);
 // }
-TEST_F(WhileTest, WhileWithTupleResult) {
+XLA_TEST_F(WhileTest, WhileWithTupleResult) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(F32, {10})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -516,7 +516,7 @@ TEST_F(WhileTest, WhileWithTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0.0001));
 }
 
-TEST_F(WhileTest, WhileWithPredicateTupleResult) {
+XLA_TEST_F(WhileTest, WhileWithPredicateTupleResult) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(PRED, {})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -562,7 +562,7 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) {
   ComputeAndCompareTuple(&builder, *expected, {}, ErrorSpec(0));
 }
 
-TEST_F(WhileTest, WhileWithTupleConstantScalarResult) {
+XLA_TEST_F(WhileTest, WhileWithTupleConstantScalarResult) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(S32, {})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -622,7 +622,7 @@ TEST_F(WhileTest, WhileWithTupleConstantScalarResult) {
 //        get<1>(w1) = get<1>(w1) + vector<float>(10, 1.0f);
 //      }
 // result = get<1>(w0) + get<1>(w1)
-TEST_F(WhileTest, TwoWhileWithTupleResult) {
+XLA_TEST_F(WhileTest, TwoWhileWithTupleResult) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(F32, {10})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -701,7 +701,7 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) {
 }
 
 // Test while nodes that share the while body computation.
-TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) {
+XLA_TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(F32, {10})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -768,7 +768,7 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) {
 
 // Test while nodes that share the while body computation.
 // TODO(b/37245345): Fails on GPU backend.
-TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) {
+XLA_TEST_F(WhileTest, DISABLED_ON_GPU(WhileLoopsWithSharedBodyAndInit)) {
   std::vector<Shape> shape_elements = {ShapeUtil::MakeShape(S32, {}),
                                        ShapeUtil::MakeShape(F32, {10})};
   Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements);
@@ -907,7 +907,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) {
 // Per backend the values generated can be different as the different backends
 // use different random number generators.
 // TODO(b/32240857): Extend test to verify outputs.
-TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) {
+XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) {
   auto v6s32 = ShapeUtil::MakeShape(S32, {6});
 
   // Create a computation for the condition: repeat for count iterations.
@@ -953,7 +953,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) {
   }
 }
 
-TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) {
+XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   XlaBuilder outer("outer");
@@ -985,7 +985,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) {
                          ErrorSpec(1e-6));
 }
 
-TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) {
+XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) {
   auto element_shape = ShapeUtil::MakeShape(F32, {2});
 
   XlaBuilder outer("outer");
@@ -1010,7 +1010,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) {
                              ErrorSpec(1e-6));
 }
 
-TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) {
+XLA_TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) {
   auto element_shape = ShapeUtil::MakeShape(F32, {});
 
   XlaBuilder outer("outer");
@@ -1044,7 +1044,7 @@ TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) {
 //   result[0] = result[0] + 1;
 //   result[1] = result[1] + 1;
 // }
-TEST_F(WhileTest, WhileWithMixedTupleElements) {
+XLA_TEST_F(WhileTest, WhileWithMixedTupleElements) {
   auto result_shape = ShapeUtil::MakeTupleShape(
       {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})});
 
@@ -1152,7 +1152,7 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) {
 // while (f(result).get<0>()) {
 //   result = result + 1;
 // }
-TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) {
+XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) {
   auto result_shape = ShapeUtil::MakeShape(S32, {});
 
   // Create a computation for the condition: repeat for 5 iterations.
@@ -1192,7 +1192,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) {
   ComputeAndCompareR0<int32>(&builder, 5, {});
 }
 
-TEST_F(WhileTest, WhileWithLoopInvariantOperation) {
+XLA_TEST_F(WhileTest, WhileWithLoopInvariantOperation) {
   auto matrix_shape = ShapeUtil::MakeShape(F32, {2, 2});
   auto scalar_s32 = ShapeUtil::MakeShape(S32, {});
   auto while_shape = ShapeUtil::MakeTupleShape(
@@ -1236,7 +1236,7 @@ TEST_F(WhileTest, WhileWithLoopInvariantOperation) {
       {param_value.get()}, ErrorSpec(4e-5));
 }
 
-TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) {
+XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) {
   auto while_shape = ShapeUtil::MakeShape(S32, {});
 
   XlaComputation condition;
-- 
GitLab


From 7a54c15804f7bb0d0c40fea5c84b1f4acee58bac Mon Sep 17 00:00:00 2001
From: Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
Date: Sat, 25 Aug 2018 13:18:11 +0300
Subject: [PATCH 0027/1085] upgraded protobuf to v.3.6.1

---
 tensorflow/contrib/cmake/external/protobuf.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
index f56fb35a0f..56a57a2340 100644
--- a/tensorflow/contrib/cmake/external/protobuf.cmake
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -16,7 +16,7 @@ include (ExternalProject)
 
 set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
 set(PROTOBUF_URL https://github.com/google/protobuf.git)
-set(PROTOBUF_TAG v3.6.0)
+set(PROTOBUF_TAG v3.6.1)
 
 if(WIN32)
   if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
-- 
GitLab


From e93a9f9ccfd9c7a2419bf3fc1d7866765bbcfce3 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 28 Aug 2018 18:55:51 -0700
Subject: [PATCH 0028/1085] Update GPU occupancy checking to utilize CUDA's
 occupancy calculator functions

-Replace references to the UnqueryableDeviceParams struct with calls to CUDA's built-in occupancy calculation functions
-Update calls to the occupancy checking functions with the new changes
-Changes should provide more long-term reliability and will remove the need to manually update hardcoded data values for new GPU architectures
---
 .../xla/service/gpu/partition_assignment.cc   |   9 +-
 .../stream_executor/cuda/cuda_gpu_executor.cc | 192 ++----------------
 .../stream_executor/device_description.cc     |  98 +++------
 .../stream_executor/device_description.h      |  73 ++-----
 4 files changed, 61 insertions(+), 311 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index cf9f102d31..375f68a159 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -62,13 +62,8 @@ LaunchDimensions CalculateLaunchDimensions(
   //
   //   <num threads per block> * <max blocks per core> = <max threads per core>
 
-  auto threads_per_core = device_desc.threads_per_core_limit();
-  auto blocks_per_core = device_desc.blocks_per_core_limit();
-  int64 threads_per_block;
-  if (threads_per_core != 0 && blocks_per_core != 0) {
-    threads_per_block = device_desc.threads_per_core_limit() /
-                        device_desc.blocks_per_core_limit();
-  } else {
+  int64 threads_per_block = device_desc.threads_per_block_limit();
+  if (threads_per_block == 0) {
     static std::atomic<int64> log_count{0};
     if (log_count.fetch_add(1) < 8) {
       LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index e30f50ea2a..39b0696c93 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -467,33 +467,26 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
     return;
   }
 
+  int block_size = thread_dims.x * thread_dims.y * thread_dims.z;
+
   const DeviceDescription &device_description =
       kernel.parent()->GetDeviceDescription();
 
-  uint64 blocks_per_sm = CalculateOccupancy(
-      device_description, regs_per_thread, smem_per_block, thread_dims);
-  VLOG(2) << "Resident blocks per SM is " << blocks_per_sm;
+  const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel);
+  CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue();
 
-  // To increase occupancy, there must be a sufficient number of blocks
-  // available to spread across the sm's at this new improved occupancy level.
-  int multiprocessor_count = device_description.core_count();
-  int block_count = block_dims.x * block_dims.y * block_dims.z;
-  int available_blocks_per_sm =
-      port::MathUtil::CeilOfRatio(block_count, multiprocessor_count);
-  if (available_blocks_per_sm <= static_cast<int64>(blocks_per_sm)) {
-    VLOG(2) << "Occupancy is limited by number of blocks available per sm.";
-    return;
-  }
+  int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread,
+                                         smem_per_block, thread_dims, cufunc);
+  VLOG(2) << "Resident blocks per SM is " << blocks_per_sm;
 
-  uint64 improved_regs_per_thread = CalculateRegisterLimitForTargetOccupancy(
-      device_description, smem_per_block, thread_dims, blocks_per_sm + 1);
-  if (improved_regs_per_thread != 0) {
-    VLOG(2) << "Reducing register usage from " << regs_per_thread
-            << " to " << improved_regs_per_thread
-            << " could increase resident blocks per SM by one.";
-  } else {
-    VLOG(2) << "Resident blocks per SM cannot be increased by reducing "
-        "register usage.";
+  int suggested_threads =
+      CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread,
+                       smem_per_block, thread_dims, cufunc);
+  if (suggested_threads != 0) {
+    VLOG(2) << "The cuda occupancy calculator reccommends using "
+            << suggested_threads
+            << " threads per block to acheive an occupancy of " << blocks_per_sm
+            << " blocks per SM.";
   }
 }
 
@@ -980,144 +973,6 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) {
 #endif
 }
 
-// Set of compute capability specific device parameters that cannot be
-// queried from the driver API.  These values instead are baked into a
-// lookup table indexed by compute capability version.
-struct UnqueryableDeviceParams {
-  int cc_major;
-  int cc_minor;
-  uint64 blocks_per_core_limit;
-  uint64 registers_per_core_limit;
-  uint64 registers_per_thread_limit;
-  uint64 warp_alloc_granularity;
-  uint64 register_alloc_granularity;
-  uint64 shared_memory_alloc_granularity;
-};
-
-// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
-// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls
-static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = {
-    {
-        2, 0,       // compute capability (2.0)
-        8,          // blocks_per_core_limit
-        32 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        64,         // register_alloc_granularity
-        128,        // shared_memory_alloc_granularity
-    },
-    {
-        2, 1,       // compute capability (2.1)
-        8,          // blocks_per_core_limit
-        32 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        64,         // register_alloc_granularity
-        128,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 0,       // compute capability (3.0)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        63,         // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 2,       // compute capability (3.2)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 5,       // compute capability (3.5)
-        16,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        3, 7,        // compute capability (3.7)
-        16,          // blocks_per_core_limit
-        128 * 1024,  // registers_per_core_limit
-        255,         // registers_per_thread_limit
-        4,           // warp_alloc_granularity
-        256,         // register_alloc_granularity
-        256,         // shared_memory_alloc_granularity
-    },
-    {
-        5, 0,       // compute capability (5.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        5, 2,       // compute capability (5.2)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        5, 3,       // compute capability (5.3)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 0,       // compute capability (6.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 1,       // compute capability (6.1)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    {
-        6, 2,       // compute capability (6.2)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        4,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-    // TODO(jlebar): Confirm the alloc granularity values for sm_70.  These are
-    // not published in the spreadsheet linked above.  Currently we guess that
-    // they're the same as sm_60.
-    {
-        7, 0,       // compute capability (7.0)
-        32,         // blocks_per_core_limit
-        64 * 1024,  // registers_per_core_limit
-        255,        // registers_per_thread_limit
-        2,          // warp_alloc_granularity
-        256,        // register_alloc_granularity
-        256,        // shared_memory_alloc_granularity
-    },
-};
 
 DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
   internal::DeviceDescriptionBuilder builder;
@@ -1193,19 +1048,6 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
     builder.set_name(device_name);
   }
 
-  for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
-    const auto &params = kAllUnqueryableDeviceParams[i];
-    if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) {
-      builder.set_blocks_per_core_limit(params.blocks_per_core_limit);
-      builder.set_registers_per_core_limit(params.registers_per_core_limit);
-      builder.set_registers_per_thread_limit(params.registers_per_thread_limit);
-      builder.set_warp_alloc_granularity(params.warp_alloc_granularity);
-      builder.set_register_alloc_granularity(params.register_alloc_granularity);
-      builder.set_shared_memory_alloc_granularity(
-          params.shared_memory_alloc_granularity);
-    }
-  }
-
   builder.set_platform_version(
       port::StrCat("Compute Capability ", cc_major_, ".", cc_minor_));
 
@@ -1227,6 +1069,10 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
       CUDADriver::GetMaxRegistersPerBlock(device_).ValueOrDie());
   builder.set_threads_per_warp(
       CUDADriver::GetThreadsPerWarp(device_).ValueOrDie());
+  builder.set_registers_per_core_limit(
+      CUDADriver::GetDeviceAttribute(
+          CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, device_)
+          .ValueOrDie());
 
   auto built = builder.Build();
   return built.release();
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc
index 8ca0677f8a..df52ce6cce 100644
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@@ -37,16 +37,11 @@ DeviceDescription::DeviceDescription()
                         kUninitializedUint64),
       block_dim_limit_(kUninitializedUint64, kUninitializedUint64,
                        kUninitializedUint64),
-      blocks_per_core_limit_(kUninitializedUint64),
       threads_per_core_limit_(kUninitializedUint64),
       threads_per_block_limit_(kUninitializedUint64),
       threads_per_warp_(kUninitializedUint64),
       registers_per_core_limit_(kUninitializedUint64),
       registers_per_block_limit_(kUninitializedUint64),
-      registers_per_thread_limit_(kUninitializedUint64),
-      warp_alloc_granularity_(1),
-      register_alloc_granularity_(1),
-      shared_memory_alloc_granularity_(1),
       device_address_bits_(kUninitializedUint64),
       device_memory_size_(kUninitializedUint64),
       memory_bandwidth_(kUninitializedUint64),
@@ -162,75 +157,36 @@ static uint64 RoundDown(uint64 value, uint64 n) {
   return port::MathUtil::FloorOfRatio(value, n) * n;
 }
 
-uint64 CalculateOccupancy(const DeviceDescription &device_description,
-                          uint64 registers_per_thread,
-                          uint64 shared_memory_per_block,
-                          const ThreadDim &thread_dims) {
-  // Don't try to compute occupancy if necessary values are not initialized.
-  uint64 required_fields[] =  { device_description.registers_per_thread_limit(),
-                                device_description.threads_per_warp(),
-                                device_description.warp_alloc_granularity(),
-                                device_description.register_alloc_granularity(),
-                                device_description.registers_per_block_limit(),
-                                device_description.shared_memory_per_core(),
-                                device_description.blocks_per_core_limit() };
-  for (auto value : required_fields) {
-    if (value == kUninitializedUint64) {
-      return 0;
-    }
-  }
-
-  if (registers_per_thread > device_description.registers_per_thread_limit()) {
-    return 0;
-  }
-
-  uint64 warps_per_block =
-      port::MathUtil::CeilOfRatio(thread_dims.x * thread_dims.y * thread_dims.z,
-                                  device_description.threads_per_warp());
-
-  // Warp resources are allocated at a particular granularity.  This value is
-  // the effective number of warps for resource allocation purposes.
-  uint64 alloc_warps_per_block =
-      RoundUp(warps_per_block, device_description.warp_alloc_granularity());
-
-  uint64 alloc_regs_per_warp =
-      RoundUp(device_description.threads_per_warp() * registers_per_thread,
-              device_description.register_alloc_granularity());
-  uint64 regs_per_block = alloc_warps_per_block * alloc_regs_per_warp;
-  uint64 reg_limit =
-      device_description.registers_per_block_limit() / regs_per_block;
-
-  uint64 alloc_smem_per_block = RoundUp(
-      shared_memory_per_block,
-      device_description.shared_memory_alloc_granularity());
-  uint64 smem_limit = alloc_smem_per_block > 0 ?
-      device_description.shared_memory_per_core() / alloc_smem_per_block :
-      device_description.blocks_per_core_limit();
-
-  uint64 thread_limit = device_description.threads_per_core_limit()
-      / (warps_per_block  * device_description.threads_per_warp());
-
-  return std::min({ device_description.blocks_per_core_limit(),
-          reg_limit, smem_limit, thread_limit });
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  return suggested_blocks;
 }
 
-uint64 CalculateRegisterLimitForTargetOccupancy(
-    const DeviceDescription &device_description, uint64 shared_memory_per_block,
-    const ThreadDim &thread_dims, uint64 target_blocks_per_core) {
-  // Linear search from maximum number of registers down until the target
-  // blocks per SM is found.
-  // TODO(meheff): Compute this using a closed form solution.
-  int reg_step = device_description.register_alloc_granularity() /
-      device_description.threads_per_warp();
-  for (int r = device_description.registers_per_thread_limit(); r > 0;
-       r = RoundDown(r - 1, reg_step)) {
-    uint64 occupancy = CalculateOccupancy(
-        device_description, r, shared_memory_per_block, thread_dims);
-    if (occupancy >= target_blocks_per_core) {
-      return r;
-    }
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  if (suggested_blocks > *initial_blocks) {
+    *initial_blocks = suggested_blocks;
+    return suggested_threads;
+  } else {
+    return 0;
   }
-  return 0;
 }
 
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index 7f99d81ef3..d335b9b875 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include <memory>
 #include "tensorflow/stream_executor/platform/port.h"
 
+#include "tensorflow/stream_executor/cuda/cuda_driver.h"
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
@@ -79,10 +80,6 @@ class DeviceDescription {
   // legitimate kernel launch request.
   const BlockDim &block_dim_limit() const { return block_dim_limit_; }
 
-  // Returns the limit on the number of simultaneously resident blocks
-  // on a multiprocessor.
-  uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; }
-
   // Returns the limit on the total number of threads that can be launched in a
   // single block; i.e. the limit on x * y * z dimensions of a ThreadDim.
   // This limit affects what constitutes a legitimate kernel launch request.
@@ -110,27 +107,6 @@ class DeviceDescription {
     return registers_per_block_limit_;
   }
 
-  // Returns the limit on the total number of registers that can be
-  // allocated to a thread.
-  const uint64 &registers_per_thread_limit() const {
-    return registers_per_thread_limit_;
-  }
-
-  // Returns the granularity at which warps are allocated resources.
-  const uint64 &warp_alloc_granularity() const {
-    return warp_alloc_granularity_;
-  }
-
-  // Returns the granularity at which registers are allocated to warps.
-  const uint64 &register_alloc_granularity() const {
-    return register_alloc_granularity_;
-  }
-
-  // Returns the granularity at which shared memory is allocated to warps.
-  const uint64 &shared_memory_alloc_granularity() const {
-    return shared_memory_alloc_granularity_;
-  }
-
   // Returns the number of address bits available to kernel code running on the
   // platform. This affects things like the maximum allocation size and perhaps
   // types used in kernel code such as size_t.
@@ -200,19 +176,12 @@ class DeviceDescription {
   ThreadDim thread_dim_limit_;
   BlockDim block_dim_limit_;
 
-  uint64 blocks_per_core_limit_;
-
   uint64 threads_per_core_limit_;
   uint64 threads_per_block_limit_;
   uint64 threads_per_warp_;
 
   uint64 registers_per_core_limit_;
   uint64 registers_per_block_limit_;
-  uint64 registers_per_thread_limit_;
-
-  uint64 warp_alloc_granularity_;
-  uint64 register_alloc_granularity_;
-  uint64 shared_memory_alloc_granularity_;
 
   uint64 device_address_bits_;
   uint64 device_memory_size_;
@@ -270,10 +239,6 @@ class DeviceDescriptionBuilder {
     device_description_->block_dim_limit_ = value;
   }
 
-  void set_blocks_per_core_limit(uint64 value) {
-    device_description_->blocks_per_core_limit_ = value;
-  }
-
   void set_threads_per_core_limit(uint64 value) {
     device_description_->threads_per_core_limit_ = value;
   }
@@ -290,19 +255,6 @@ class DeviceDescriptionBuilder {
   void set_registers_per_block_limit(uint64 value) {
     device_description_->registers_per_block_limit_ = value;
   }
-  void set_registers_per_thread_limit(uint64 value) {
-    device_description_->registers_per_thread_limit_ = value;
-  }
-
-  void set_warp_alloc_granularity(uint64 value) {
-    device_description_->warp_alloc_granularity_ = value;
-  }
-  void set_register_alloc_granularity(uint64 value) {
-    device_description_->register_alloc_granularity_ = value;
-  }
-  void set_shared_memory_alloc_granularity(uint64 value) {
-    device_description_->shared_memory_alloc_granularity_ = value;
-  }
 
   void set_device_address_bits(uint64 value) {
     device_description_->device_address_bits_ = value;
@@ -375,17 +327,18 @@ void CalculateDimensionality(const DeviceDescription &device_description,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-uint64 CalculateOccupancy(const DeviceDescription &device_description,
-                          uint64 registers_per_thread,
-                          uint64 shared_memory_per_block,
-                          const ThreadDim &thread_dims);
-
-// Compute and return the maximum number of registers per thread which
-// achieves the target occupancy.  If the target is not possible then
-// zero is returned.
-uint64 CalculateRegisterLimitForTargetOccupancy(
-    const DeviceDescription &device_description, uint64 shared_memory_per_block,
-    const ThreadDim &thread_dims, uint64 target_blocks_per_core);
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func);
+
+// Compute and return the suggested thread count to acheive ideal occupancy.
+// If the provided thread dimensions match this number, zero is returned.
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func);
 
 }  // namespace stream_executor
 
-- 
GitLab


From 4e72dd865a3fc83baa69f6b7c08720a1b546a464 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 29 Aug 2018 17:05:43 +0800
Subject: [PATCH 0029/1085] Refine LeakyRelu codes.

1. Add C++ gradient of gradient definition of LeakyReLu and revalant UT.
2. Using forward compatibility layer for python code changes.
---
 tensorflow/cc/gradients/nn_grad.cc            | 18 ++++-
 tensorflow/cc/gradients/nn_grad_test.cc       | 16 +++++
 .../python/kernel_tests/relu_op_test.py       | 70 ++++++++++---------
 tensorflow/python/ops/nn_ops.py               |  5 +-
 4 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 0fc23d0bf7..2a32a2ed6f 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -149,13 +149,27 @@ Status LeakyReluGradHelper(const Scope& scope, const Operation& op,
   float alpha;
   TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
   internal::LeakyReluGrad::Attrs attrs;
-  attrs.Alpha(alpha);
-  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs);
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0),
+                                    attrs.Alpha(alpha));
   grad_outputs->push_back(dx);
   return scope.status();
 }
 REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper);
 
+Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op,
+                               const std::vector<Output>& grad_inputs,
+                               std::vector<Output>* grad_outputs) {
+  float alpha;
+  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha));
+  internal::LeakyReluGrad::Attrs attrs;
+  auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1),
+                                    attrs.Alpha(alpha));
+  grad_outputs->push_back(dx);
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper);
+
 Status EluGradHelper(const Scope& scope, const Operation& op,
                      const std::vector<Output>& grad_inputs,
                      std::vector<Output>* grad_outputs) {
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index 5ebece7b6e..bf0db1f59d 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "tensorflow/cc/framework/gradient_checker.h"
 #include "tensorflow/cc/framework/testutil.h"
 #include "tensorflow/cc/gradients/grad_testutil.h"
+#include "tensorflow/cc/ops/nn_ops_internal.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -173,6 +174,21 @@ TEST_F(NNGradTest, LeakyReluGrad) {
   RunTest(x, x_init_value, y, shape);
 }
 
+TEST_F(NNGradTest, LeakyReluGradGrad) {
+  TensorShape shape({5, 2});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
+  // Avoid input values where Leaky ReLU gradient is not well defined (around
+  // zero).
+  Tensor x_init_value = test::AsTensor<float>(
+      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f},
+      {5, 2});
+  Tensor features = test::AsTensor<float>(
+      {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
+      {5, 2});
+  auto y = ops::internal::LeakyReluGrad(scope_, x, features);
+  RunTest(x, x_init_value, y, shape);
+}
+
 TEST_F(NNGradTest, EluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index ccb3a231bb..7066f28883 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
@@ -283,8 +284,9 @@ class LeakyReluTest(test.TestCase):
             np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t),
             alpha=0.1, use_gpu=True)
 
-  # The gradient test for ReLU is a bit tricky as the derivative is not well
-  # defined at around zero and we want to avoid that in terms of input values.
+  # The gradient test for Leaky ReLU is a bit tricky as the derivative is not
+  # well defined at around zero and we want to avoid that in terms of input
+  # values.
   def testGradientFloat32(self):
     with self.test_session():
       x = constant_op.constant(
@@ -319,39 +321,41 @@ class LeakyReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with self.test_session():
-      x = constant_op.constant(
-          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-          shape=[2, 5],
-          name="x")
-      y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
-      z = gradients_impl.gradients(y, x)
-      x_init = np.asarray(
-          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-          dtype=np.float32,
-          order="F")
-      err = gradient_checker.compute_gradient_error(
-          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
-    print("leaky_relu (float32) gradient of gradient err = ", err)
-    self.assertLess(err, 1e-4)
+    with compat.forward_compatibility_horizon(2018, 10, 2):
+      with self.test_session():
+	x = constant_op.constant(
+	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+	    shape=[2, 5],
+	    name="x")
+	y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+	z = gradients_impl.gradients(y, x)
+	x_init = np.asarray(
+	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+	    dtype=np.float32,
+	    order="F")
+	err = gradient_checker.compute_gradient_error(
+	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+      print("leaky_relu (float32) gradient of gradient err = ", err)
+      self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with self.test_session():
-      x = constant_op.constant(
-          [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-          shape=[2, 5],
-          dtype=dtypes.float64,
-          name="x")
-      y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
-      z = gradients_impl.gradients(y, x)
-      x_init = np.asarray(
-          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-          dtype=np.float64,
-          order="F")
-      err = gradient_checker.compute_gradient_error(
-          x, [2, 5], z[0], [2, 5], x_init_value=x_init)
-    print("leaky_relu (float64) gradient of gradient err = ", err)
-    self.assertLess(err, 1e-10)
+    with compat.forward_compatibility_horizon(2018, 10, 2):
+      with self.test_session():
+	x = constant_op.constant(
+	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+	    shape=[2, 5],
+	    dtype=dtypes.float64,
+	    name="x")
+	y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+	z = gradients_impl.gradients(y, x)
+	x_init = np.asarray(
+	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+	    dtype=np.float64,
+	    order="F")
+	err = gradient_checker.compute_gradient_error(
+	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+      print("leaky_relu (float64) gradient of gradient err = ", err)
+      self.assertLess(err, 1e-10)
 
   def testGradientScalar(self):
     with self.test_session() as sess:
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 31b8f3945d..52ea202636 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,7 +1601,10 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
+    if compat.forward_compatible(2018, 10, 1):
+      return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
+    alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
+    return math_ops.maximum(alpha * features, features, name=name)
 
 
 def _flatten_outer_dims(logits):
-- 
GitLab


From 2586eb3bfeeef3af357e438ae5aff92d2bac12a5 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Mon, 3 Sep 2018 11:48:35 +0800
Subject: [PATCH 0030/1085] Code fix against ci_build error results.

---
 tensorflow/cc/gradients/nn_grad_test.cc       |  3 +-
 tensorflow/core/kernels/relu_op.cc            |  8 +--
 tensorflow/core/kernels/relu_op.h             |  8 +--
 tensorflow/core/kernels/relu_op_functor.h     |  1 -
 .../python/kernel_tests/relu_op_test.py       | 50 +++++++++----------
 .../tools/api/golden/v1/tensorflow.pbtxt      |  4 ++
 6 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index bf0db1f59d..d8c2a1a0fc 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -180,8 +180,7 @@ TEST_F(NNGradTest, LeakyReluGradGrad) {
   // Avoid input values where Leaky ReLU gradient is not well defined (around
   // zero).
   Tensor x_init_value = test::AsTensor<float>(
-      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f},
-      {5, 2});
+      {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2});
   Tensor features = test::AsTensor<float>(
       {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f},
       {5, 2});
diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc
index c4f2ef5632..cafa49cbb6 100644
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@@ -106,15 +106,15 @@ namespace functor {
                                                                                \
   template <>                                                                  \
   void LeakyRelu<GPUDevice, T>::operator()(                                    \
-      const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
-      T alpha, typename TTypes<T>::Tensor activations);                        \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features, T alpha,   \
+      typename TTypes<T>::Tensor activations);                                 \
   extern template struct LeakyRelu<GPUDevice, T>;                              \
                                                                                \
   template <>                                                                  \
   void LeakyReluGrad<GPUDevice, T>::operator()(                                \
       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
-      typename TTypes<T>::ConstTensor features,                                \
-      T alpha, typename TTypes<T>::Tensor backprops);                          \
+      typename TTypes<T>::ConstTensor features, T alpha,                       \
+      typename TTypes<T>::Tensor backprops);                                   \
   extern template struct LeakyReluGrad<GPUDevice, T>;                          \
                                                                                \
   template <>                                                                  \
diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h
index c55190065c..fa79ab03ae 100644
--- a/tensorflow/core/kernels/relu_op.h
+++ b/tensorflow/core/kernels/relu_op.h
@@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp<T, LeakyReluOp<Device, T>> {
 
   void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
     functor::LeakyRelu<Device, T> functor;
-    functor(context->eigen_device<Device>(), input.flat<T>(),
-            alpha_, output->flat<T>());
+    functor(context->eigen_device<Device>(), input.flat<T>(), alpha_,
+            output->flat<T>());
   }
 
  private:
@@ -183,7 +183,9 @@ class LeakyReluGradOp
 
 template <typename Device, typename T>
 void LeakyReluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
-    const Tensor& g, const Tensor& a, T alpha, Tensor* output) {
+                                                   const Tensor& g,
+                                                   const Tensor& a, T alpha,
+                                                   Tensor* output) {
   if (!ReluHelpers::ValidateSameSize(context, g, a)) return;
   functor::LeakyReluGrad<Device, T> functor;
   functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(), alpha,
diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h
index 7f0951451d..548d5a277d 100644
--- a/tensorflow/core/kernels/relu_op_functor.h
+++ b/tensorflow/core/kernels/relu_op_functor.h
@@ -91,7 +91,6 @@ struct Relu6Grad {
   }
 };
 
-
 // Functor used by LeakyReluOp to do the computations.
 template <typename Device, typename T>
 struct LeakyRelu {
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 7066f28883..3e24b8a2c4 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -323,37 +323,37 @@ class LeakyReluTest(test.TestCase):
   def testGradGradFloat32(self):
     with compat.forward_compatibility_horizon(2018, 10, 2):
       with self.test_session():
-	x = constant_op.constant(
-	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-	    shape=[2, 5],
-	    name="x")
-	y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
-	z = gradients_impl.gradients(y, x)
-	x_init = np.asarray(
-	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-	    dtype=np.float32,
-	    order="F")
-	err = gradient_checker.compute_gradient_error(
-	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+        x = constant_op.constant(
+            [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+            shape=[2, 5],
+            name="x")
+        y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu")
+        z = gradients_impl.gradients(y, x)
+        x_init = np.asarray(
+            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+            dtype=np.float32,
+            order="F")
+        err = gradient_checker.compute_gradient_error(
+            x, [2, 5], z[0], [2, 5], x_init_value=x_init)
       print("leaky_relu (float32) gradient of gradient err = ", err)
       self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
     with compat.forward_compatibility_horizon(2018, 10, 2):
       with self.test_session():
-	x = constant_op.constant(
-	    [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-	    shape=[2, 5],
-	    dtype=dtypes.float64,
-	    name="x")
-	y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
-	z = gradients_impl.gradients(y, x)
-	x_init = np.asarray(
-	    [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
-	    dtype=np.float64,
-	    order="F")
-	err = gradient_checker.compute_gradient_error(
-	    x, [2, 5], z[0], [2, 5], x_init_value=x_init)
+        x = constant_op.constant(
+            [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+            shape=[2, 5],
+            dtype=dtypes.float64,
+            name="x")
+        y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu")
+        z = gradients_impl.gradients(y, x)
+        x_init = np.asarray(
+            [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+            dtype=np.float64,
+            order="F")
+        err = gradient_checker.compute_gradient_error(
+            x, [2, 5], z[0], [2, 5], x_init_value=x_init)
       print("leaky_relu (float64) gradient of gradient err = ", err)
       self.assertLess(err, 1e-10)
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 4de662fe33..9e8d320f06 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1324,6 +1324,10 @@ tf_module {
     name: "lbeta"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "leaky_relu"
+    argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Mon, 3 Sep 2018 12:10:51 +0800
Subject: [PATCH 0031/1085] Add XLA support for LeakyReluOp.

Code contributed by: Meng Chen <mc119496@alibaba-inc.com>
---
 tensorflow/compiler/tests/binary_ops_test.py  |  7 ++++
 tensorflow/compiler/tests/unary_ops_test.py   |  5 +++
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 +++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 0aafda7fb4..8941dd4e27 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -178,6 +178,13 @@ class BinaryOpsTest(xla_test.XLATestCase):
               [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype),
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
+      self._testBinary(
+          gen_nn_ops._leaky_relu_grad,
+          np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
+          np.array(
+              [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype),
+          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype))
+
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
           np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype),
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index 73adb0d243..91f876fa23 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -361,6 +361,11 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([[-0.05, 6.05, 5]], dtype=dtype),
           expected=np.array([[0, 6, 5]], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          nn_ops.leaky_relu,
+          np.array([[-1.0, 1.0]], dtype=dtype),
+          expected=np.array([[-0.2, 1.0]], dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           nn_ops.softmax,
           np.array([1, 2, 3, 4], dtype=dtype),
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index d35777ccb1..ec14735884 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,6 +50,24 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
+
+class LeakyReluOp : public XlaOpKernel {
+ public:
+  explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+  }
+  // Compute the max of the input x and alpha*x.
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* builder = ctx->builder();
+    auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
+                                          static_cast<double>(alpha_));
+    ctx->SetOutput(0,
+        xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
+  }
+ private:
+  float alpha_;
+};
+
 class ReluGradOp : public XlaOpKernel {
  public:
   explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -84,10 +102,34 @@ class Relu6GradOp : public XlaOpKernel {
   }
 };
 
+class LeakyReluGradOp : public XlaOpKernel {
+ public:
+  explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
+  }
+  // Return the lhs (incoming gradient) if the rhs (input feature) > 0,
+  // otherwise return the alpha * lhs.
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    const TensorShape shape = ctx->InputShape(0);
+    const auto zero =
+        xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
+    const auto pred = xla::Gt(ctx->Input(1), zero);
+    auto alpha = XlaHelpers::FloatLiteral(b, input_type(0),
+                                          static_cast<double>(alpha_));
+    ctx->SetOutput(0,
+        xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
+  }
+ private:
+  float alpha_;
+};
+
 REGISTER_XLA_OP(Name("Relu"), ReluOp);
 REGISTER_XLA_OP(Name("Relu6"), Relu6Op);
+REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp);
 REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp);
 REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp);
+REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From fa20b59b920233d35bb8da3fbc3c234c369a8291 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 14:20:40 -0700
Subject: [PATCH 0032/1085] Move CUDA-specific occupancy calculation into
 proper file

-Maintain functionality, just move CalculateOccupancy() and CompareOccupancy() methods from device_description to cuda_gpu_executor
-Remove CUDA requirement in general class device_description
---
 .../stream_executor/cuda/cuda_gpu_executor.cc | 37 +++++++++++++++++++
 .../stream_executor/cuda/cuda_gpu_executor.h  | 11 ++++++
 .../stream_executor/device_description.cc     | 32 ----------------
 .../stream_executor/device_description.h      | 17 ---------
 4 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 39b0696c93..458c0e3030 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -490,6 +490,43 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
   }
 }
 
+// Compute and return maximum blocks per core (occupancy) based on the
+// device description, some kernel characteristics and the number of threads per
+// block.  If unable to compute occupancy, zero is returned.
+int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  return suggested_blocks;
+}
+
+// Compute and return the suggested thread count to acheive ideal occupancy.
+// If the provided thread dimensions match this number, zero is returned.
+int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func) {
+  int suggested_blocks = 0;
+  int suggested_threads = 0;
+  CUresult err =
+      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
+                                       func, NULL, shared_memory_per_block, 0);
+  CHECK_EQ(err, CUDA_SUCCESS);
+  if (suggested_blocks > *initial_blocks) {
+    *initial_blocks = suggested_blocks;
+    return suggested_threads;
+  } else {
+    return 0;
+  }
+}
+
 void *CUDAExecutor::Allocate(uint64 size) {
   return CUDADriver::DeviceAllocate(context_, size);
 }
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 8a954d5461..e8ebbc3220 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const BlockDim &block_dims, const KernelBase &k,
               const KernelArgsArrayBase &args) override;
 
+  int CalculateOccupancy(const DeviceDescription& device_description,
+                       uint64 registers_per_thread,
+                       uint64 shared_memory_per_block,
+                       const ThreadDim& thread_dims, CUfunction func);
+
+  int CompareOccupancy(int* initial_blocks,
+                     const DeviceDescription& device_description,
+                     uint64 registers_per_thread,
+                     uint64 shared_memory_per_block,
+                     const ThreadDim& thread_dims, CUfunction func);
+
   void *Allocate(uint64 size) override;
 
   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc
index df52ce6cce..726c4adf74 100644
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@@ -157,36 +157,4 @@ static uint64 RoundDown(uint64 value, uint64 n) {
   return port::MathUtil::FloorOfRatio(value, n) * n;
 }
 
-int CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func) {
-  int suggested_blocks = 0;
-  int suggested_threads = 0;
-  CUresult err =
-      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
-                                       func, NULL, shared_memory_per_block, 0);
-  CHECK_EQ(err, CUDA_SUCCESS);
-  return suggested_blocks;
-}
-
-int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func) {
-  int suggested_blocks = 0;
-  int suggested_threads = 0;
-  CUresult err =
-      cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads,
-                                       func, NULL, shared_memory_per_block, 0);
-  CHECK_EQ(err, CUDA_SUCCESS);
-  if (suggested_blocks > *initial_blocks) {
-    *initial_blocks = suggested_blocks;
-    return suggested_threads;
-  } else {
-    return 0;
-  }
-}
-
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index d335b9b875..b15ce31216 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include <memory>
 #include "tensorflow/stream_executor/platform/port.h"
 
-#include "tensorflow/stream_executor/cuda/cuda_driver.h"
 #include "tensorflow/stream_executor/launch_dim.h"
 #include "tensorflow/stream_executor/platform/port.h"
 
@@ -324,22 +323,6 @@ void CalculateDimensionality(const DeviceDescription &device_description,
                              uint64 element_count, uint64 *threads_per_block,
                              uint64 *block_count);
 
-// Compute and return maximum blocks per core (occupancy) based on the
-// device description, some kernel characteristics and the number of threads per
-// block.  If unable to compute occupancy, zero is returned.
-int CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func);
-
-// Compute and return the suggested thread count to acheive ideal occupancy.
-// If the provided thread dimensions match this number, zero is returned.
-int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func);
-
 }  // namespace stream_executor
 
 #endif  // TENSORFLOW_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_
-- 
GitLab


From cd6597b8fcd82b51ddb47a297972a1614c2a5d78 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 16:17:40 -0700
Subject: [PATCH 0033/1085] Fixed transition typo

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 458c0e3030..a961e9a6c4 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,7 +493,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-int CalculateOccupancy(const DeviceDescription& device_description,
+int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
                        const ThreadDim& thread_dims, CUfunction func) {
@@ -508,7 +508,7 @@ int CalculateOccupancy(const DeviceDescription& device_description,
 
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
-int CompareOccupancy(int* initial_blocks,
+int CUDAExecutor::CompareOccupancy(int* initial_blocks,
                      const DeviceDescription& device_description,
                      uint64 registers_per_thread,
                      uint64 shared_memory_per_block,
-- 
GitLab


From 475b7715f16ad0f94fa9986a0eefc1b2cf2044bd Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Tue, 4 Sep 2018 16:31:01 -0700
Subject: [PATCH 0034/1085] Recommended typo fix

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index a961e9a6c4..ce2f1ce3ae 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -483,7 +483,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
       CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread,
                        smem_per_block, thread_dims, cufunc);
   if (suggested_threads != 0) {
-    VLOG(2) << "The cuda occupancy calculator reccommends using "
+    VLOG(2) << "The cuda occupancy calculator recommends using "
             << suggested_threads
             << " threads per block to acheive an occupancy of " << blocks_per_sm
             << " blocks per SM.";
-- 
GitLab


From a95281ce1b449d8f92a3799ff9c1dbf661b70bc4 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 5 Sep 2018 09:02:40 +0800
Subject: [PATCH 0035/1085] Avoid golden API file changing.

---
 tensorflow/cc/gradients/nn_grad_test.cc                  | 3 +--
 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt | 1 +
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt          | 4 ----
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index d8c2a1a0fc..f5a09e09dc 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -42,7 +42,6 @@ using ops::MaxPoolV2;
 using ops::Placeholder;
 using ops::Relu;
 using ops::Relu6;
-using ops::LeakyRelu;
 using ops::Selu;
 using ops::Softmax;
 using ops::Softplus;
@@ -165,7 +164,7 @@ TEST_F(NNGradTest, Relu6Grad) {
 TEST_F(NNGradTest, LeakyReluGrad) {
   TensorShape shape({5, 2});
   auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
-  auto y = LeakyRelu(scope_, x);
+  auto y = ops::internal::LeakyRelu(scope_, x);
   // Avoid input values where Leaky ReLU gradient is not well defined (around
   // zero).
   Tensor x_init_value = test::AsTensor<float>(
diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
index 4a61889f54..280148e032 100644
--- a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt
@@ -1,4 +1,5 @@
 op {
   graph_op_name: "LeakyRelu"
+  visibility: HIDDEN
   summary: "Computes rectified linear: `max(features, features * alpha)`."
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 9e8d320f06..4de662fe33 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -1324,10 +1324,6 @@ tf_module {
     name: "lbeta"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "leaky_relu"
-    argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], "
-  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From d0574f6b25ab01052e093ab92612520a7e4ada8d Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Thu, 6 Sep 2018 08:22:37 -0700
Subject: [PATCH 0036/1085] Fixed clang formatting

---
 .../stream_executor/cuda/cuda_gpu_executor.cc   | 17 +++++++++--------
 .../stream_executor/cuda/cuda_gpu_executor.h    | 12 ++++++------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ce2f1ce3ae..ef84d01a94 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // Compute and return maximum blocks per core (occupancy) based on the
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
-int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description,
-                       uint64 registers_per_thread,
-                       uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func) {
+int CUDAExecutor::CalculateOccupancy(
+    const DeviceDescription& device_description, uint64 registers_per_thread,
+    uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+    CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
@@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
 int CUDAExecutor::CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func) {
+                                   const DeviceDescription& device_description,
+                                   uint64 registers_per_thread,
+                                   uint64 shared_memory_per_block,
+                                   const ThreadDim& thread_dims,
+                                   CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
   CUresult err =
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index e8ebbc3220..1481dcc19a 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const KernelArgsArrayBase &args) override;
 
   int CalculateOccupancy(const DeviceDescription& device_description,
+                         uint64 registers_per_thread,
+                         uint64 shared_memory_per_block,
+                         const ThreadDim& thread_dims, CUfunction func);
+
+  int CompareOccupancy(int* initial_blocks,
+                       const DeviceDescription& device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
                        const ThreadDim& thread_dims, CUfunction func);
 
-  int CompareOccupancy(int* initial_blocks,
-                     const DeviceDescription& device_description,
-                     uint64 registers_per_thread,
-                     uint64 shared_memory_per_block,
-                     const ThreadDim& thread_dims, CUfunction func);
-
   void *Allocate(uint64 size) override;
 
   void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes,
-- 
GitLab


From e3654a3cb4e26c26409aeeb9e127e3addcb14cee Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:20:11 +0000
Subject: [PATCH 0037/1085] Add float16 support on GPU for
 tf.contrib.image.transform

This fix tries to address the issue raised in 22115 where
there were no float16 support on GPU for tf.contrib.image.transform.

This fix fixes 22115.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/image/kernels/image_ops.cc        | 2 ++
 tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc
index 370a8caf6a..788bf04b28 100644
--- a/tensorflow/contrib/image/kernels/image_ops.cc
+++ b/tensorflow/contrib/image/kernels/image_ops.cc
@@ -156,6 +156,7 @@ namespace functor {
 TF_CALL_uint8(DECLARE_FUNCTOR);
 TF_CALL_int32(DECLARE_FUNCTOR);
 TF_CALL_int64(DECLARE_FUNCTOR);
+TF_CALL_half(DECLARE_FUNCTOR);
 TF_CALL_float(DECLARE_FUNCTOR);
 TF_CALL_double(DECLARE_FUNCTOR);
 
@@ -175,6 +176,7 @@ TF_CALL_double(DECLARE_FUNCTOR);
 TF_CALL_uint8(REGISTER);
 TF_CALL_int32(REGISTER);
 TF_CALL_int64(REGISTER);
+TF_CALL_half(REGISTER);
 TF_CALL_float(REGISTER);
 TF_CALL_double(REGISTER);
 
diff --git a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
index 8743a5ff72..36b9a236a6 100644
--- a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
+++ b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc
@@ -32,6 +32,7 @@ typedef Eigen::GpuDevice GPUDevice;
 template class FillProjectiveTransform<GPUDevice, uint8>;
 template class FillProjectiveTransform<GPUDevice, int32>;
 template class FillProjectiveTransform<GPUDevice, int64>;
+template class FillProjectiveTransform<GPUDevice, Eigen::half>;
 template class FillProjectiveTransform<GPUDevice, float>;
 template class FillProjectiveTransform<GPUDevice, double>;
 
-- 
GitLab


From 7d7e8a725aeede4b724f7376d22df2c7f2ebdcf9 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:22:39 +0000
Subject: [PATCH 0038/1085] Add test case for float16 support on GPU for
 tf.contrib.image.transform

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../contrib/image/python/kernel_tests/image_ops_test.py    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
index 376c0751ee..ef1f79bb94 100644
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
@@ -272,6 +272,13 @@ class ImageOpsTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllEqual([[[[1], [0]], [[0], [1]]]], result.eval())
 
+  def test_transform_data_types(self):
+    for dtype in _DTYPES:
+      image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype)
+      value = image_ops.transform(image, [1] * 8)
+      with self.test_session(use_gpu=True):
+        self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
+
 
 class BipartiteMatchTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 04e20965487c36f43ba5c773b547b23e39478a5c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 19:25:22 +0000
Subject: [PATCH 0039/1085] Pylint fix

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../contrib/image/python/kernel_tests/image_ops_test.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
index ef1f79bb94..4997c31a7f 100644
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
@@ -277,7 +277,9 @@ class ImageOpsTest(test_util.TensorFlowTestCase):
       image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype)
       value = image_ops.transform(image, [1] * 8)
       with self.test_session(use_gpu=True):
-        self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
+        self.assertAllEqual(
+            value.eval(),
+            np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype()))
 
 
 class BipartiteMatchTest(test_util.TensorFlowTestCase):
-- 
GitLab


From 6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 Mon Sep 17 00:00:00 2001
From: Matt Conley <mconley@nvidia.com>
Date: Thu, 6 Sep 2018 13:09:12 -0700
Subject: [PATCH 0040/1085] Fully fixed clang errors

---
 tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++------
 tensorflow/stream_executor/cuda/cuda_gpu_executor.h  | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index ef84d01a94..9d5bcc7f77 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
   const DeviceDescription &device_description =
       kernel.parent()->GetDeviceDescription();
 
-  const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel);
+  const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel);
   CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue();
 
   int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread,
@@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel,
 // device description, some kernel characteristics and the number of threads per
 // block.  If unable to compute occupancy, zero is returned.
 int CUDAExecutor::CalculateOccupancy(
-    const DeviceDescription& device_description, uint64 registers_per_thread,
-    uint64 shared_memory_per_block, const ThreadDim& thread_dims,
+    const DeviceDescription &device_description, uint64 registers_per_thread,
+    uint64 shared_memory_per_block, const ThreadDim &thread_dims,
     CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
@@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy(
 
 // Compute and return the suggested thread count to acheive ideal occupancy.
 // If the provided thread dimensions match this number, zero is returned.
-int CUDAExecutor::CompareOccupancy(int* initial_blocks,
-                                   const DeviceDescription& device_description,
+int CUDAExecutor::CompareOccupancy(int *initial_blocks,
+                                   const DeviceDescription &device_description,
                                    uint64 registers_per_thread,
                                    uint64 shared_memory_per_block,
-                                   const ThreadDim& thread_dims,
+                                   const ThreadDim &thread_dims,
                                    CUfunction func) {
   int suggested_blocks = 0;
   int suggested_threads = 0;
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
index 1481dcc19a..53b2a29ae7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h
@@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface {
               const BlockDim &block_dims, const KernelBase &k,
               const KernelArgsArrayBase &args) override;
 
-  int CalculateOccupancy(const DeviceDescription& device_description,
+  int CalculateOccupancy(const DeviceDescription &device_description,
                          uint64 registers_per_thread,
                          uint64 shared_memory_per_block,
-                         const ThreadDim& thread_dims, CUfunction func);
+                         const ThreadDim &thread_dims, CUfunction func);
 
-  int CompareOccupancy(int* initial_blocks,
-                       const DeviceDescription& device_description,
+  int CompareOccupancy(int *initial_blocks,
+                       const DeviceDescription &device_description,
                        uint64 registers_per_thread,
                        uint64 shared_memory_per_block,
-                       const ThreadDim& thread_dims, CUfunction func);
+                       const ThreadDim &thread_dims, CUfunction func);
 
   void *Allocate(uint64 size) override;
 
-- 
GitLab


From e25cf78285fef5234380ee26fef9090a939e91f5 Mon Sep 17 00:00:00 2001
From: Richard Yu <yohan.richard.yu@gmail.com>
Date: Thu, 6 Sep 2018 17:05:08 -0700
Subject: [PATCH 0041/1085] Ensure all ValueErrors are raised

---
 tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +-
 tensorflow/python/keras/layers/embeddings.py           | 8 ++++----
 tensorflow/python/ops/nn_ops.py                        | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index d9f179bee4..d882b79892 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling):
   bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var,
                                             context)
   if batch_mean_tensor is None and moving_mean_tensor is None:
-    ValueError('Error folding unfused batch norms')
+    raise ValueError('Error folding unfused batch norms')
   if has_scaling:
     gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context)
 
diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index 629a9ec9a1..a0b9393812 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -142,13 +142,13 @@ class Embedding(Layer):
       else:
         in_lens = [self.input_length]
       if len(in_lens) != len(input_shape) - 1:
-        ValueError('"input_length" is %s, but received input has shape %s' %
-                   (str(self.input_length), str(input_shape)))
+        raise ValueError('"input_length" is %s, but received input has shape %s' %
+                         (str(self.input_length), str(input_shape)))
       else:
         for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
           if s1 is not None and s2 is not None and s1 != s2:
-            ValueError('"input_length" is %s, but received input has shape %s' %
-                       (str(self.input_length), str(input_shape)))
+            raise ValueError('"input_length" is %s, but received input has shape %s' %
+                             (str(self.input_length), str(input_shape)))
           elif s1 is None:
             in_lens[i] = s2
       return (input_shape[0],) + tuple(in_lens) + (self.output_dim,)
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ef9afd9e8e..17e10995f2 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -427,8 +427,8 @@ class _WithSpaceToBatch(object):
     try:
       input_shape.with_rank_at_least(expected_input_rank)
     except ValueError:
-      ValueError("input tensor must have rank %d at least" %
-                 (expected_input_rank))
+      raise ValueError("input tensor must have rank %d at least" %
+                       (expected_input_rank))
 
     const_rate = tensor_util.constant_value(dilation_rate)
     rate_or_const_rate = dilation_rate
@@ -818,12 +818,12 @@ class Convolution(object):
     try:
       input_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
 
     try:
       filter_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
 
     if data_format is None or not data_format.startswith("NC"):
       input_channels_dim = input_shape[num_spatial_dims + 1]
-- 
GitLab


From 864e290d1776895d7877777b8368ca8bc6fc22a3 Mon Sep 17 00:00:00 2001
From: Edvard Fagerholm <edvard.fagerholm@gmail.com>
Date: Wed, 29 Aug 2018 11:56:35 +0300
Subject: [PATCH 0042/1085] Make tf.transpose emit simpler graph when possible

If not given an explicit 'perm' parameter, tf.transpose currently
emits a graph that dynamically calculates it from the rank of the
input tensor. This is completely unnecessary when the rank of the
input can be statically determined at graph construction time.

Modify tf.transpose to emit 'perm' as a single Const node whenever
possible.
---
 tensorflow/python/ops/array_ops.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 7bf3869ddf..9597839301 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1409,8 +1409,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      rank = gen_array_ops.rank(a)
-      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      a = ops.convert_to_tensor(a, name="a")
+      if not a.get_shape().ndims:
+        rank = gen_array_ops.rank(a)
+        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      else:
+        rank = a.get_shape().ndims
+        perm = (rank - 1) - np.arange(rank)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From 90cf7fb7786c8a9c135ef73482856b082e80f61a Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Tue, 11 Sep 2018 12:48:30 +0800
Subject: [PATCH 0043/1085] Fix lint errors and typos.

---
 tensorflow/compiler/tests/binary_ops_test.py  |  9 +++++----
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 14 +++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index 8941dd4e27..069e83d083 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -179,11 +179,12 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
       self._testBinary(
-          gen_nn_ops._leaky_relu_grad,
+          gen_nn_ops.leaky_relu_grad,
           np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
-          np.array(
-              [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype),
-          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype))
+          np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+                   dtype=dtype),
+          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10],
+                            dtype=dtype))
 
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index ec14735884..8d65e0339c 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,7 +50,6 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
-
 class LeakyReluOp : public XlaOpKernel {
  public:
   explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
@@ -61,9 +60,9 @@ class LeakyReluOp : public XlaOpKernel {
     xla::XlaBuilder* builder = ctx->builder();
     auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
                                           static_cast<double>(alpha_));
-    ctx->SetOutput(0,
-        xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
+    ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
   }
+
  private:
   float alpha_;
 };
@@ -115,11 +114,12 @@ class LeakyReluGradOp : public XlaOpKernel {
     const auto zero =
         xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
     const auto pred = xla::Gt(ctx->Input(1), zero);
-    auto alpha = XlaHelpers::FloatLiteral(b, input_type(0),
-                                          static_cast<double>(alpha_));
-    ctx->SetOutput(0,
-        xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
+    auto alpha =
+        XlaHelpers::FloatLiteral(b, input_type(0), static_cast<double>(alpha_));
+    ctx->SetOutput(
+        0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
   }
+
  private:
   float alpha_;
 };
-- 
GitLab


From 8530167f68673fa756565c0394bbe2dcdc39db05 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Fri, 24 Aug 2018 16:52:07 +0300
Subject: [PATCH 0044/1085] Add IgniteDataset that allows to work with Apache
 Ignite.

---
 configure.py                                  |   2 +
 tensorflow/BUILD                              |   6 +
 tensorflow/contrib/BUILD                      |  15 +
 tensorflow/contrib/cmake/python_modules.txt   |   2 +
 tensorflow/contrib/ignite/BUILD               | 136 ++++
 tensorflow/contrib/ignite/README.md           | 167 ++++
 tensorflow/contrib/ignite/__init__.py         |  42 +
 .../kernels/ignite_binary_object_parser.cc    | 304 +++++++
 .../kernels/ignite_binary_object_parser.h     |  54 ++
 .../contrib/ignite/kernels/ignite_client.cc   |  55 ++
 .../contrib/ignite/kernels/ignite_client.h    |  40 +
 .../contrib/ignite/kernels/ignite_dataset.cc  | 123 +++
 .../contrib/ignite/kernels/ignite_dataset.h   |  65 ++
 .../ignite/kernels/ignite_dataset_iterator.cc | 447 ++++++++++
 .../ignite/kernels/ignite_dataset_iterator.h  |  87 ++
 .../ignite/kernels/ignite_dataset_ops.cc      | 145 ++++
 .../ignite/kernels/ignite_plain_client.h      |  43 +
 .../kernels/ignite_plain_client_unix.cc       | 132 +++
 .../kernels/ignite_plain_client_windows.cc    | 143 ++++
 .../ignite/kernels/ignite_ssl_wrapper.cc      | 149 ++++
 .../ignite/kernels/ignite_ssl_wrapper.h       |  49 ++
 tensorflow/contrib/ignite/ops/dataset_ops.cc  |  64 ++
 .../ignite/python/ops/ignite_dataset_ops.py   | 763 ++++++++++++++++++
 .../ignite/python/ops/ignite_op_loader.py     |  25 +
 .../ignite/python/tests/bin/start-plain.sh    |  24 +
 .../ignite/python/tests/bin/start-ssl-auth.sh |  28 +
 .../ignite/python/tests/bin/start-ssl.sh      |  26 +
 .../tests/config/ignite-config-plain.xml      |  39 +
 .../tests/config/ignite-config-ssl-auth.xml   |  59 ++
 .../python/tests/config/ignite-config-ssl.xml |  59 ++
 .../python/tests/ignite_dataset_test.py       |  77 ++
 .../ignite/python/tests/keystore/client.jks   | Bin 0 -> 3232 bytes
 .../ignite/python/tests/keystore/client.pem   |  69 ++
 .../ignite/python/tests/keystore/server.jks   | Bin 0 -> 3230 bytes
 .../ignite/python/tests/keystore/trust.jks    | Bin 0 -> 2432 bytes
 .../contrib/ignite/python/tests/sql/init.sql  |  20 +
 .../ignite/python/tests/start_ignite.sh       |  30 +
 .../ignite/python/tests/stop_ignite.sh        |  19 +
 38 files changed, 3508 insertions(+)
 create mode 100644 tensorflow/contrib/ignite/BUILD
 create mode 100644 tensorflow/contrib/ignite/README.md
 create mode 100644 tensorflow/contrib/ignite/__init__.py
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client.h
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
 create mode 100644 tensorflow/contrib/ignite/ops/dataset_ops.cc
 create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
 create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
 create mode 100644 tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.pem
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/server.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/trust.jks
 create mode 100644 tensorflow/contrib/ignite/python/tests/sql/init.sql
 create mode 100755 tensorflow/contrib/ignite/python/tests/start_ignite.sh
 create mode 100755 tensorflow/contrib/ignite/python/tests/stop_ignite.sh

diff --git a/configure.py b/configure.py
index 361bd4764d..8f1957e870 100644
--- a/configure.py
+++ b/configure.py
@@ -1502,6 +1502,8 @@ def main():
                 'with_aws_support', True, 'aws')
   set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
                 'with_kafka_support', True, 'kafka')
+  set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite',
+                'with_ignite_support', True, 'ignite')
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 False, 'xla')
   set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 386e0096ff..6c29c78793 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -248,6 +248,12 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "with_ignite_support",
+    define_values = {"with_ignite_support": "true"},
+    visibility = ["//visibility:public"],
+)
+
 # Crosses between platforms and file system libraries not supported on those
 # platforms due to limitations in nested select() statements.
 config_setting(
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 798f499870..f055e643d0 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -118,6 +118,11 @@ py_library(
             "//tensorflow/contrib/kafka",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
@@ -160,6 +165,11 @@ cc_library(
             "//tensorflow/contrib/kafka:dataset_kernels",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite:dataset_kernels",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
@@ -197,6 +207,11 @@ cc_library(
             "//tensorflow/contrib/kafka:dataset_ops_op_lib",
         ],
         "//conditions:default": [],
+    }) + select({
+        "//tensorflow:with_ignite_support": [
+            "//tensorflow/contrib/ignite:dataset_ops_op_lib",
+        ],
+        "//conditions:default": [],
     }) + select({
         "//tensorflow:with_aws_support_windows_override": [],
         "//tensorflow:with_aws_support": [
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index fb871acae9..56755e817a 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -207,6 +207,8 @@ tensorflow/contrib/integrate/python
 tensorflow/contrib/integrate/python/ops
 tensorflow/contrib/kafka/python
 tensorflow/contrib/kafka/python/ops
+tensorflow/contrib/ignite/python
+tensorflow/contrib/ignite/python/ops
 tensorflow/contrib/keras
 tensorflow/contrib/keras/api
 tensorflow/contrib/keras/api/keras
diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
new file mode 100644
index 0000000000..9f6c666893
--- /dev/null
+++ b/tensorflow/contrib/ignite/BUILD
@@ -0,0 +1,136 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+    "tf_custom_op_library",
+    "tf_custom_op_py_library",
+    "tf_gen_op_libs",
+    "tf_py_test",
+    "if_not_windows",
+    "if_windows",
+)
+
+py_library(
+    name = "ignite",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_ops",
+    ],
+)
+
+tf_custom_op_library(
+    name = "_dataset_ops.so",
+    srcs = ["ops/dataset_ops.cc"],
+    deps = [":dataset_kernels"],
+)
+
+tf_gen_op_libs(
+    op_lib_names = ["dataset_ops"],
+)
+
+cc_library(
+    name = "dataset_kernels",
+    srcs = [
+        "kernels/ignite_dataset_ops.cc",
+        "kernels/ignite_client.h",
+        "kernels/ignite_client.cc",
+        "kernels/ignite_plain_client.h",
+        "kernels/ignite_ssl_wrapper.h",
+        "kernels/ignite_ssl_wrapper.cc",
+        "kernels/ignite_binary_object_parser.h",
+        "kernels/ignite_binary_object_parser.cc",
+        "kernels/ignite_dataset.h",
+        "kernels/ignite_dataset.cc",
+        "kernels/ignite_dataset_iterator.h",
+        "kernels/ignite_dataset_iterator.cc",
+    ] + if_not_windows([
+        "kernels/ignite_plain_client_unix.cc",
+    ]) + if_windows([
+        "kernels/ignite_plain_client_windows.cc",
+    ]),
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//third_party/eigen3",
+        "@boringssl//:ssl",
+        "@protobuf_archive//:protobuf_headers",
+    ],
+    alwayslink = 1,
+)
+
+py_library(
+    name = "dataset_ops",
+    srcs = [
+        "python/ops/ignite_dataset_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ignite_op_loader",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "gen_dataset_ops",
+    out = "python/ops/gen_dataset_ops.py",
+    deps = ["//tensorflow/contrib/ignite:dataset_ops_op_lib"],
+)
+
+tf_kernel_library(
+    name = "dataset_ops_kernels",
+    deps = [
+        ":dataset_kernels",
+        "//tensorflow/core:framework",
+    ],
+    alwayslink = 1,
+)
+
+tf_custom_op_py_library(
+    name = "ignite_op_loader",
+    srcs = ["python/ops/ignite_op_loader.py"],
+    dso = ["//tensorflow/contrib/ignite:_dataset_ops.so"],
+    kernels = [
+        ":dataset_ops_kernels",
+        "//tensorflow/contrib/ignite:dataset_ops_op_lib",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gen_dataset_ops",
+        "//tensorflow/contrib/util:util_py",
+        "//tensorflow/python:platform",
+    ],
+)
+
+# The Apache Ignite servers have to setup before the test and tear down
+# after the test manually. The docker engine has to be installed.
+#
+# To setup Apache Ignite servers:
+# $ bash ./python/tests/start_ignite.sh
+#
+# To tear down Apache Ignite servers:
+# $ bash ./python/tests/stop_ignite.sh
+tf_py_test(
+    name = "ignite_dataset_test",
+    srcs = ["python/tests/ignite_dataset_test.py"],
+    additional_deps = [
+        ":ignite",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+    tags = [
+        "manual",
+        "no_windows",
+        "notap",
+    ],
+)
diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
new file mode 100644
index 0000000000..9054344e94
--- /dev/null
+++ b/tensorflow/contrib/ignite/README.md
@@ -0,0 +1,167 @@
+### Ignite Dataset
+# Ignite Dataset
+
+- [Overview](#overview)
+- [Features](#features)
+  * [Distributed In-Memory Datasource](#distributed-in-memory-datasource)
+  * [Structured Objects](#structured-objects)
+  * [Distributed Training](#distributed-training)
+  * [SSL Connection](#ssl-connection)
+  * [Windows Support](#windows-support)
+- [Try it out](#try-it-out)
+- [Limitations](#limitations)
+
+## Overview
+
+[Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for
+transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. 
+
+## Features
+
+Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below.
+
+### Distributed In-Memory Datasource
+[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. 
+- If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations.
+- If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations.
+- If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow.
+
+It's  important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference.
+
+```bash
+$ apache-ignite-fabric/bin/ignite.sh
+$ apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://localhost:10800/"
+
+jdbc:ignite:thin://localhost/> CREATE TABLE KITTEN_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR);
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (1, 'WARM KITTY');
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (2, 'SOFT KITTY');
+jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL OF FUR');
+```
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="SQL_PUBLIC_KITTEN_CACHE")
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   for _ in range(3):
+>>>     print(sess.run(next_obj))
+
+{'key': 1, 'val': {'NAME': b'WARM KITTY'}}
+{'key': 2, 'val': {'NAME': b'SOFT KITTY'}}
+{'key': 3, 'val': {'NAME': b'LITTLE BALL OF FUR'}}
+```
+
+### Structured Objects
+[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES")
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   print(sess.run(next_obj))
+
+{
+    'key': 'kitten.png', 
+    'val': {
+        'metadata': {
+            'file_name': b'kitten.png',
+            'label': b'little ball of fur',
+            width: 800, 
+            height: 600
+        }, 
+        'pixels': [0, 0, 0, 0, ..., 0]
+    }
+}
+```
+ Neural network training and other computations require transformations that can be done as part of  [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES").map(lambda obj: obj['val']['pixels'])
+>>> iterator = dataset.make_one_shot_iterator()
+>>> next_obj = iterator.get_next()
+>>>
+>>> with tf.Session() as sess:
+>>>   print(sess.run(next_obj))
+
+[0, 0, 0, 0, ..., 0]
+```
+
+### Distributed Training
+
+TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
+
+<a href="https://www.codecogs.com/eqnedit.php?latex=\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" title="\nabla[\sum_1^n(y - \hat{y})^2] = \nabla[\sum_1^{n_1}(y - \hat{y})^2] + \nabla[\sum_{n_1}^{n_2}(y - \hat{y})^2] + ... + \nabla[\sum_{n_{k-1}}^n(y - \hat{y})^2]" /></a>
+
+Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck.
+
+Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition.
+
+Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset("IMAGES")
+>>>
+>>> # Compute gradients locally on every worker node.
+>>> gradients = []    
+>>> for i in range(5):
+>>>     with tf.device("/job:WORKER/task:%d" % i):
+>>>         device_iterator = dataset.make_one_shot_iterator()
+>>>         device_next_obj = device_iterator.get_next()
+>>>         gradient = compute_gradient(device_next_obj)
+>>>         gradients.append(gradient)        
+>>>        
+>>> # Aggregate them on master node.
+>>> result_gradient = tf.reduce_sum(gradients)
+>>>
+>>> with tf.Session("grpc://localhost:10000") as sess:
+>>>     print(sess.run(result_gradient))
+```
+
+High-level TensorFlow API for [distributed training](https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy) is supported as well. 
+
+### SSL Connection
+
+Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
+
+```python
+>>> import tensorflow as tf
+>>> from tensorflow.contrib.ignite import IgniteDataset
+>>> 
+>>> dataset = IgniteDataset(cache_name="IMAGES", certfile="client.pem", cert_password="password", username="ignite", password="ignite")
+>>> ...
+```
+
+### Windows Support
+
+Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
+
+## Try it out
+
+The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
+
+```
+docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist
+```
+
+After that you will be able to work with it following way:
+
+![ignite-dataset-mnist](https://s3.amazonaws.com/helloworld23423423ew23/ignite-dataset-mnist.png "Ignite Dataset Mnist")
+
+## Limitations
+
+Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
\ No newline at end of file
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
new file mode 100644
index 0000000000..468920a557
--- /dev/null
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Apache Ignite is a memory-centric distributed database, caching, and
+   processing platform for transactional, analytical, and streaming workloads,
+   delivering in-memory speeds at petabyte scale. This contrib package
+   contains an integration between Apache Ignite and TensorFlow. The
+   integration is based on tf.data from TensorFlow side and Binary Client
+   Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+   datasource for neural network training, inference and all other
+   computations supported by TensorFlow. Ignite Dataset is based on Apache
+   Ignite Binary Client Protocol:
+   https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+
+@@IgniteDataset
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \
+import IgniteDataset
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+    "IgniteDataset",
+]
+
+remove_undocumented(__name__)
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
new file mode 100644
index 0000000000..bf0ef8766e
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -0,0 +1,304 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_binary_object_parser.h"
+
+namespace ignite {
+
+tensorflow::Status BinaryObjectParser::Parse(
+    uint8_t*& ptr, std::vector<tensorflow::Tensor>& out_tensors,
+    std::vector<int32_t>& types) {
+  uint8_t object_type_id = *ptr;
+  ptr += 1;
+
+  switch (object_type_id) {
+    case BYTE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT8, {});
+      tensor.scalar<tensorflow::uint8>()() = *((uint8_t*)ptr);
+      ptr += 1;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case SHORT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT16, {});
+      tensor.scalar<tensorflow::int16>()() = *((int16_t*)ptr);
+      ptr += 2;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case INT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT32, {});
+      tensor.scalar<tensorflow::int32>()() = *((int32_t*)ptr);
+      ptr += 4;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case LONG: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64, {});
+      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case FLOAT: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_FLOAT, {});
+      tensor.scalar<float>()() = *((float*)ptr);
+      ptr += 4;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DOUBLE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_DOUBLE, {});
+      tensor.scalar<double>()() = *((double*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case UCHAR: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT16, {});
+      tensor.scalar<tensorflow::uint16>()() = *((uint16_t*)ptr);
+      ptr += 2;
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case BOOL: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_BOOL, {});
+      tensor.scalar<bool>()() = *((bool*)ptr);
+      ptr += 1;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case STRING: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_STRING, {});
+      tensor.scalar<std::string>()() = std::string((char*)ptr, length);
+      ptr += length;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case DATE: {
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64, {});
+      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
+      ptr += 8;
+      out_tensors.emplace_back(std::move(tensor));
+
+      break;
+    }
+    case BYTE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT8,
+                                tensorflow::TensorShape({length}));
+
+      uint8_t* arr = (uint8_t*)ptr;
+      ptr += length;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::uint8>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case SHORT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT16,
+                                tensorflow::TensorShape({length}));
+
+      int16_t* arr = (int16_t*)ptr;
+      ptr += length * 2;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int16>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case INT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT32,
+                                tensorflow::TensorShape({length}));
+
+      int32_t* arr = (int32_t*)ptr;
+      ptr += length * 4;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int32>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case LONG_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64,
+                                tensorflow::TensorShape({length}));
+
+      int64_t* arr = (int64_t*)ptr;
+      ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case FLOAT_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_FLOAT,
+                                tensorflow::TensorShape({length}));
+
+      float* arr = (float*)ptr;
+      ptr += 4 * length;
+
+      std::copy_n(arr, length, tensor.flat<float>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DOUBLE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_DOUBLE,
+                                tensorflow::TensorShape({length}));
+
+      double* arr = (double*)ptr;
+      ptr += 8 * length;
+
+      std::copy_n(arr, length, tensor.flat<double>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case UCHAR_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_UINT16,
+                                tensorflow::TensorShape({length}));
+
+      uint16_t* arr = (uint16_t*)ptr;
+      ptr += length * 2;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::uint16>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case BOOL_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_BOOL,
+                                tensorflow::TensorShape({length}));
+
+      bool* arr = (bool*)ptr;
+      ptr += length;
+
+      std::copy_n(arr, length, tensor.flat<bool>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case STRING_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_STRING,
+                                tensorflow::TensorShape({length}));
+
+      for (int32_t i = 0; i < length; i++) {
+        int32_t str_length = *((int32_t*)ptr);
+        ptr += 4;
+        const int8_t* str = (const int8_t*)ptr;
+        ptr += str_length;
+        tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
+      }
+
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case DATE_ARR: {
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
+                                tensorflow::DT_INT64,
+                                tensorflow::TensorShape({length}));
+      int64_t* arr = (int64_t*)ptr;
+      ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
+      out_tensors.emplace_back(std::move(tensor));
+      break;
+    }
+    case WRAPPED_OBJ: {
+      int32_t byte_arr_size = *((int32_t*)ptr);
+      ptr += 4;
+
+      tensorflow::Status status = Parse(ptr, out_tensors, types);
+      if (!status.ok()) return status;
+
+      int32_t offset = *((int32_t*)ptr);
+      ptr += 4;
+
+      break;
+    }
+    case COMPLEX_OBJ: {
+      uint8_t version = *ptr;
+      ptr += 1;
+      int16_t flags = *((int16_t*)ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
+      ptr += 2;
+      int32_t type_id = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t hash_code = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t length = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t schema_id = *((int32_t*)ptr);
+      ptr += 4;
+      int32_t schema_offset = *((int32_t*)ptr);
+      ptr += 4;
+
+      uint8_t* end = ptr + schema_offset - 24;
+      int32_t i = 0;
+      while (ptr < end) {
+        i++;
+        tensorflow::Status status = Parse(ptr, out_tensors, types);
+        if (!status.ok()) return status;
+      }
+
+      ptr += (length - schema_offset);
+
+      break;
+    }
+    default: {
+      return tensorflow::errors::Internal("Unknowd binary type (type id ",
+                                          (int)object_type_id, ")");
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
new file mode 100644
index 0000000000..1e845cbc56
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace ignite {
+
+class BinaryObjectParser {
+ public:
+  tensorflow::Status Parse(uint8_t*& ptr,
+                           std::vector<tensorflow::Tensor>& out_tensors,
+                           std::vector<int32_t>& types);
+};
+
+enum ObjectType {
+  BYTE = 1,
+  SHORT = 2,
+  INT = 3,
+  LONG = 4,
+  FLOAT = 5,
+  DOUBLE = 6,
+  UCHAR = 7,
+  BOOL = 8,
+  STRING = 9,
+  DATE = 11,
+  BYTE_ARR = 12,
+  SHORT_ARR = 13,
+  INT_ARR = 14,
+  LONG_ARR = 15,
+  FLOAT_ARR = 16,
+  DOUBLE_ARR = 17,
+  UCHAR_ARR = 18,
+  BOOL_ARR = 19,
+  STRING_ARR = 20,
+  DATE_ARR = 22,
+  WRAPPED_OBJ = 27,
+  COMPLEX_OBJ = 103
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc
new file mode 100644
index 0000000000..5a8eddb944
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.cc
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+namespace ignite {
+
+tensorflow::Status Client::ReadByte(uint8_t& data) {
+  return ReadData((uint8_t*)&data, 1);
+}
+
+tensorflow::Status Client::ReadShort(int16_t& data) {
+  return ReadData((uint8_t*)&data, 2);
+}
+
+tensorflow::Status Client::ReadInt(int32_t& data) {
+  return ReadData((uint8_t*)&data, 4);
+}
+
+tensorflow::Status Client::ReadLong(int64_t& data) {
+  return ReadData((uint8_t*)&data, 8);
+}
+
+tensorflow::Status Client::WriteByte(uint8_t data) {
+  return WriteData((uint8_t*)&data, 1);
+}
+
+tensorflow::Status Client::WriteShort(int16_t data) {
+  return WriteData((uint8_t*)&data, 2);
+}
+
+tensorflow::Status Client::WriteInt(int32_t data) {
+  return WriteData((uint8_t*)&data, 4);
+}
+
+tensorflow::Status Client::WriteLong(int64_t data) {
+  return WriteData((uint8_t*)&data, 8);
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
new file mode 100644
index 0000000000..64e28d75f0
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/lib/core/status.h"
+
+namespace ignite {
+
+class Client {
+ public:
+  virtual tensorflow::Status Connect() = 0;
+  virtual tensorflow::Status Disconnect() = 0;
+  virtual bool IsConnected() = 0;
+  virtual int GetSocketDescriptor() = 0;
+
+  virtual tensorflow::Status ReadByte(uint8_t& data);
+  virtual tensorflow::Status ReadShort(int16_t& data);
+  virtual tensorflow::Status ReadInt(int32_t& data);
+  virtual tensorflow::Status ReadLong(int64_t& data);
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0;
+
+  virtual tensorflow::Status WriteByte(uint8_t data);
+  virtual tensorflow::Status WriteShort(int16_t data);
+  virtual tensorflow::Status WriteInt(int32_t data);
+  virtual tensorflow::Status WriteLong(int64_t data);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0;
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
new file mode 100644
index 0000000000..a9bf26955b
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -0,0 +1,123 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset_iterator.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
+                             std::string cache_name, std::string host,
+                             tensorflow::int32 port, bool local,
+                             tensorflow::int32 part,
+                             tensorflow::int32 page_size, std::string username,
+                             std::string password, std::string certfile,
+                             std::string keyfile, std::string cert_password,
+                             std::vector<tensorflow::int32> schema,
+                             std::vector<tensorflow::int32> permutation)
+    : DatasetBase(tensorflow::DatasetContext(ctx)),
+      cache_name(cache_name),
+      host(host),
+      port(port),
+      local(local),
+      part(part),
+      page_size(page_size),
+      username(username),
+      password(password),
+      certfile(certfile),
+      keyfile(keyfile),
+      cert_password(cert_password),
+      schema(schema),
+      permutation(permutation) {
+  SchemaToTypes();
+  SchemaToShapes();
+
+  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name
+            << "', host='" << host << "', port=" << port << ", local=" << local
+            << ", part=" << part << ", page_size=" << page_size
+            << ", username='" << username << "', certfile='" << certfile
+            << "', keyfile='" << keyfile + "']";
+}
+
+IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
+
+std::unique_ptr<tensorflow::IteratorBase> IgniteDataset::MakeIteratorInternal(
+    const tensorflow::string& prefix) const {
+  return std::unique_ptr<tensorflow::IteratorBase>(new IgniteDatasetIterator(
+      {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host,
+      this->port, this->cache_name, this->local, this->part, this->page_size,
+      this->username, this->password, this->certfile, this->keyfile,
+      this->cert_password, this->schema, this->permutation));
+}
+
+const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const {
+  return dtypes;
+}
+
+const std::vector<tensorflow::PartialTensorShape>&
+IgniteDataset::output_shapes() const {
+  return shapes;
+}
+
+tensorflow::string IgniteDataset::DebugString() const {
+  return "IgniteDatasetOp::Dataset";
+}
+
+tensorflow::Status IgniteDataset::AsGraphDefInternal(
+    tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
+    tensorflow::Node** output) const {
+  return tensorflow::errors::Unimplemented(
+      "IgniteDataset does not support 'AsGraphDefInternal'");
+}
+
+void IgniteDataset::SchemaToTypes() {
+  for (auto e : schema) {
+    if (e == BYTE || e == BYTE_ARR) {
+      dtypes.push_back(tensorflow::DT_UINT8);
+    } else if (e == SHORT || e == SHORT_ARR) {
+      dtypes.push_back(tensorflow::DT_INT16);
+    } else if (e == INT || e == INT_ARR) {
+      dtypes.push_back(tensorflow::DT_INT32);
+    } else if (e == LONG || e == LONG_ARR) {
+      dtypes.push_back(tensorflow::DT_INT64);
+    } else if (e == FLOAT || e == FLOAT_ARR) {
+      dtypes.push_back(tensorflow::DT_FLOAT);
+    } else if (e == DOUBLE || e == DOUBLE_ARR) {
+      dtypes.push_back(tensorflow::DT_DOUBLE);
+    } else if (e == UCHAR || e == UCHAR_ARR) {
+      dtypes.push_back(tensorflow::DT_UINT8);
+    } else if (e == BOOL || e == BOOL_ARR) {
+      dtypes.push_back(tensorflow::DT_BOOL);
+    } else if (e == STRING || e == STRING_ARR) {
+      dtypes.push_back(tensorflow::DT_STRING);
+    } else {
+      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
+    }
+  }
+}
+
+void IgniteDataset::SchemaToShapes() {
+  for (auto e : schema) {
+    if (e >= 1 && e < 10) {
+      shapes.push_back(tensorflow::PartialTensorShape({}));
+    } else if (e >= 12 && e < 21) {
+      shapes.push_back(tensorflow::PartialTensorShape({-1}));
+    } else {
+      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
+    }
+  }
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
new file mode 100644
index 0000000000..2120dfd342
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -0,0 +1,65 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/dataset.h"
+
+namespace ignite {
+
+class IgniteDataset : public tensorflow::DatasetBase {
+ public:
+  IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name,
+                std::string host, tensorflow::int32 port, bool local,
+                tensorflow::int32 part, tensorflow::int32 page_size,
+                std::string username, std::string password,
+                std::string certfile, std::string keyfile,
+                std::string cert_password,
+                std::vector<tensorflow::int32> schema,
+                std::vector<tensorflow::int32> permutation);
+  ~IgniteDataset();
+  std::unique_ptr<tensorflow::IteratorBase> MakeIteratorInternal(
+      const tensorflow::string& prefix) const override;
+  const tensorflow::DataTypeVector& output_dtypes() const override;
+  const std::vector<tensorflow::PartialTensorShape>& output_shapes()
+      const override;
+  tensorflow::string DebugString() const override;
+
+ protected:
+  tensorflow::Status AsGraphDefInternal(
+      tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
+      tensorflow::Node** output) const override;
+
+ private:
+  const std::string cache_name;
+  const std::string host;
+  const tensorflow::int32 port;
+  const bool local;
+  const tensorflow::int32 part;
+  const tensorflow::int32 page_size;
+  const std::string username;
+  const std::string password;
+  const std::string certfile;
+  const std::string keyfile;
+  const std::string cert_password;
+  const std::vector<tensorflow::int32> schema;
+  const std::vector<tensorflow::int32> permutation;
+
+  tensorflow::DataTypeVector dtypes;
+  std::vector<tensorflow::PartialTensorShape> shapes;
+
+  void SchemaToTypes();
+  void SchemaToShapes();
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
new file mode 100644
index 0000000000..03cc3c1291
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -0,0 +1,447 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset_iterator.h"
+
+#include "ignite_plain_client.h"
+#include "ignite_ssl_wrapper.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include <time.h>
+#include <chrono>
+
+namespace ignite {
+
+#define CHECK_STATUS(status) \
+  if (!status.ok()) return status;
+
+IgniteDatasetIterator::IgniteDatasetIterator(
+    const Params& params, std::string host, tensorflow::int32 port,
+    std::string cache_name, bool local, tensorflow::int32 part,
+    tensorflow::int32 page_size, std::string username, std::string password,
+    std::string certfile, std::string keyfile, std::string cert_password,
+    std::vector<tensorflow::int32> schema,
+    std::vector<tensorflow::int32> permutation)
+    : tensorflow::DatasetIterator<IgniteDataset>(params),
+      cache_name(cache_name),
+      local(local),
+      part(part),
+      page_size(page_size),
+      username(username),
+      password(password),
+      schema(schema),
+      permutation(permutation),
+      remainder(-1),
+      cursor_id(-1),
+      last_page(false) {
+  Client* p_client = new PlainClient(host, port);
+
+  if (certfile.empty())
+    client = std::unique_ptr<Client>(p_client);
+  else
+    client = std::unique_ptr<Client>(new SslWrapper(
+        std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
+
+  LOG(INFO) << "Ignite Dataset Iterator created";
+}
+
+IgniteDatasetIterator::~IgniteDatasetIterator() {
+  tensorflow::Status status = CloseConnection();
+  if (!status.ok()) LOG(ERROR) << status.ToString();
+
+  LOG(INFO) << "Ignite Dataset Iterator destroyed";
+}
+
+tensorflow::Status IgniteDatasetIterator::EstablishConnection() {
+  if (!client->IsConnected()) {
+    tensorflow::Status status = client->Connect();
+    if (!status.ok()) return status;
+
+    status = Handshake();
+    if (!status.ok()) {
+      tensorflow::Status disconnect_status = client->Disconnect();
+      if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
+
+      return status;
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::CloseConnection() {
+  if (cursor_id != -1 && !last_page) {
+    tensorflow::Status conn_status = EstablishConnection();
+    if (!conn_status.ok()) return conn_status;
+
+    CHECK_STATUS(client->WriteInt(18));  // Message length
+    CHECK_STATUS(
+        client->WriteShort(close_connection_opcode));  // Operation code
+    CHECK_STATUS(client->WriteLong(0));                // Request ID
+    CHECK_STATUS(client->WriteLong(cursor_id));        // Resource ID
+
+    int32_t res_len;
+    CHECK_STATUS(client->ReadInt(res_len));
+    if (res_len < 12)
+      return tensorflow::errors::Internal(
+          "Close Resource Response is corrupted");
+
+    int64_t req_id;
+    CHECK_STATUS(client->ReadLong(req_id));
+    int32_t status;
+    CHECK_STATUS(client->ReadInt(status));
+    if (status != 0) {
+      uint8_t err_msg_header;
+      CHECK_STATUS(client->ReadByte(err_msg_header));
+      if (err_msg_header == string_val) {
+        int32_t err_msg_length;
+        CHECK_STATUS(client->ReadInt(err_msg_length));
+        uint8_t* err_msg_c = new uint8_t[err_msg_length];
+        CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+        std::string err_msg((char*)err_msg_c, err_msg_length);
+        delete[] err_msg_c;
+
+        return tensorflow::errors::Internal("Close Resource Error [status=",
+                                            status, ", message=", err_msg, "]");
+      }
+      return tensorflow::errors::Internal("Close Resource Error [status=",
+                                          status, "]");
+    }
+
+    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+
+    cursor_id = -1;
+
+    return client->Disconnect();
+  } else {
+    LOG(INFO) << "Query Cursor " << cursor_id << " is already closed";
+  }
+
+  return client->IsConnected() ? client->Disconnect()
+                               : tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::GetNextInternal(
+    tensorflow::IteratorContext* ctx,
+    std::vector<tensorflow::Tensor>* out_tensors, bool* end_of_sequence) {
+  if (remainder == 0 && last_page) {
+    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+
+    cursor_id = -1;
+    *end_of_sequence = true;
+    return tensorflow::Status::OK();
+  } else {
+    tensorflow::Status status = EstablishConnection();
+    if (!status.ok()) return status;
+
+    if (remainder == -1 || remainder == 0) {
+      tensorflow::Status status =
+          remainder == -1 ? ScanQuery() : LoadNextPage();
+      if (!status.ok()) return status;
+    }
+
+    uint8_t* initial_ptr = ptr;
+    std::vector<int32_t> types;
+    std::vector<tensorflow::Tensor> tensors;
+
+    status = parser.Parse(ptr, tensors, types);  // Parse key
+    if (!status.ok()) return status;
+
+    status = parser.Parse(ptr, tensors, types);  // Parse val
+    if (!status.ok()) return status;
+
+    remainder -= (ptr - initial_ptr);
+
+    out_tensors->resize(tensors.size());
+    for (int32_t i = 0; i < tensors.size(); i++)
+      (*out_tensors)[permutation[i]] = std::move(tensors[i]);
+
+    *end_of_sequence = false;
+    return tensorflow::Status::OK();
+  }
+
+  *end_of_sequence = true;
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::SaveInternal(
+    tensorflow::IteratorStateWriter* writer) {
+  return tensorflow::errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'SaveInternal'");
+}
+
+tensorflow::Status IgniteDatasetIterator::RestoreInternal(
+    tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) {
+  return tensorflow::errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'RestoreInternal')");
+}
+
+tensorflow::Status IgniteDatasetIterator::Handshake() {
+  int32_t msg_len = 8;
+
+  if (username.empty())
+    msg_len += 1;
+  else
+    msg_len += 5 + username.length();
+
+  if (password.empty())
+    msg_len += 1;
+  else
+    msg_len += 5 + password.length();
+
+  CHECK_STATUS(client->WriteInt(msg_len));
+  CHECK_STATUS(client->WriteByte(1));
+  CHECK_STATUS(client->WriteShort(protocol_major_version));
+  CHECK_STATUS(client->WriteShort(protocol_minor_version));
+  CHECK_STATUS(client->WriteShort(protocol_patch_version));
+  CHECK_STATUS(client->WriteByte(2));
+  if (username.empty()) {
+    CHECK_STATUS(client->WriteByte(null_val));
+  } else {
+    CHECK_STATUS(client->WriteByte(string_val));
+    CHECK_STATUS(client->WriteInt(username.length()));
+    CHECK_STATUS(
+        client->WriteData((uint8_t*)username.c_str(), username.length()));
+  }
+
+  if (password.empty()) {
+    CHECK_STATUS(client->WriteByte(null_val));
+  } else {
+    CHECK_STATUS(client->WriteByte(string_val));
+    CHECK_STATUS(client->WriteInt(password.length()));
+    CHECK_STATUS(
+        client->WriteData((uint8_t*)password.c_str(), password.length()));
+  }
+
+  int32_t handshake_res_len;
+  CHECK_STATUS(client->ReadInt(handshake_res_len));
+  uint8_t handshake_res;
+  CHECK_STATUS(client->ReadByte(handshake_res));
+
+  LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
+            << (int16_t)handshake_res;
+
+  if (handshake_res != 1) {
+    int16_t serv_ver_major;
+    CHECK_STATUS(client->ReadShort(serv_ver_major));
+    int16_t serv_ver_minor;
+    CHECK_STATUS(client->ReadShort(serv_ver_minor));
+    int16_t serv_ver_patch;
+    CHECK_STATUS(client->ReadShort(serv_ver_patch));
+    uint8_t header;
+    CHECK_STATUS(client->ReadByte(header));
+
+    if (header == string_val) {
+      int32_t length;
+      CHECK_STATUS(client->ReadInt(length));
+      uint8_t* err_msg_c = new uint8_t[length];
+      CHECK_STATUS(client->ReadData(err_msg_c, length));
+      std::string err_msg((char*)err_msg_c, length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch,
+          ", message='", err_msg, "']");
+    } else if (header == null_val) {
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+    } else {
+      return tensorflow::errors::Internal(
+          "Handshake Error [result=", handshake_res, ", version=",
+          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::ScanQuery() {
+  CHECK_STATUS(client->WriteInt(25));                        // Message length
+  CHECK_STATUS(client->WriteShort(scan_query_opcode));       // Operation code
+  CHECK_STATUS(client->WriteLong(0));                        // Request ID
+  CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name)));  // Cache name
+  CHECK_STATUS(client->WriteByte(0));                        // Flags
+  CHECK_STATUS(client->WriteByte(null_val));                 // Filter object
+  CHECK_STATUS(client->WriteInt(page_size));                 // Cursor page size
+  CHECK_STATUS(client->WriteInt(part));    // Partition to query
+  CHECK_STATUS(client->WriteByte(local));  // Local flag
+
+  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                           std::chrono::system_clock::now().time_since_epoch())
+                           .count();
+
+  int32_t res_len;
+  CHECK_STATUS(client->ReadInt(res_len));
+
+  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                          std::chrono::system_clock::now().time_since_epoch())
+                          .count();
+
+  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
+
+  if (res_len < 12)
+    return tensorflow::errors::Internal("Scan Query Response is corrupted");
+
+  int64_t req_id;
+  CHECK_STATUS(client->ReadLong(req_id));
+
+  int32_t status;
+  CHECK_STATUS(client->ReadInt(status));
+
+  if (status != 0) {
+    uint8_t err_msg_header;
+    CHECK_STATUS(client->ReadByte(err_msg_header));
+
+    if (err_msg_header == string_val) {
+      int32_t err_msg_length;
+      CHECK_STATUS(client->ReadInt(err_msg_length));
+
+      uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      std::string err_msg((char*)err_msg_c, err_msg_length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal("Scan Query Error [status=", status,
+                                          ", message=", err_msg, "]");
+    }
+    return tensorflow::errors::Internal("Scan Query Error [status=", status,
+                                        "]");
+  }
+
+  CHECK_STATUS(client->ReadLong(cursor_id));
+
+  LOG(INFO) << "Query Cursor " << cursor_id << " is opened";
+
+  int32_t row_cnt;
+  CHECK_STATUS(client->ReadInt(row_cnt));
+
+  remainder = res_len - 25;
+  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
+  ptr = page.get();
+
+  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      std::chrono::system_clock::now().time_since_epoch())
+                      .count();
+
+  CHECK_STATUS(client->ReadData(ptr, remainder));
+
+  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     std::chrono::system_clock::now().time_since_epoch())
+                     .count();
+  ;
+
+  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double time_in_s = 1.0 * (stop - start) / 1000;
+  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
+            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
+
+  uint8_t last_page_b;
+  CHECK_STATUS(client->ReadByte(last_page_b));
+
+  last_page = !last_page_b;
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
+  CHECK_STATUS(client->WriteInt(18));                       // Message length
+  CHECK_STATUS(client->WriteShort(load_next_page_opcode));  // Operation code
+  CHECK_STATUS(client->WriteLong(0));                       // Request ID
+  CHECK_STATUS(client->WriteLong(cursor_id));               // Cursor ID
+
+  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                           std::chrono::system_clock::now().time_since_epoch())
+                           .count();
+
+  int32_t res_len;
+  CHECK_STATUS(client->ReadInt(res_len));
+
+  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                          std::chrono::system_clock::now().time_since_epoch())
+                          .count();
+
+  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
+
+  if (res_len < 12)
+    return tensorflow::errors::Internal("Load Next Page Response is corrupted");
+
+  int64_t req_id;
+  CHECK_STATUS(client->ReadLong(req_id));
+
+  int32_t status;
+  CHECK_STATUS(client->ReadInt(status));
+
+  if (status != 0) {
+    uint8_t err_msg_header;
+    CHECK_STATUS(client->ReadByte(err_msg_header));
+
+    if (err_msg_header == string_val) {
+      int32_t err_msg_length;
+      CHECK_STATUS(client->ReadInt(err_msg_length));
+
+      uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      std::string err_msg((char*)err_msg_c, err_msg_length);
+      delete[] err_msg_c;
+
+      return tensorflow::errors::Internal("Load Next Page Error [status=",
+                                          status, ", message=", err_msg, "]");
+    }
+    return tensorflow::errors::Internal("Load Next Page Error [status=", status,
+                                        "]");
+  }
+
+  int32_t row_cnt;
+  CHECK_STATUS(client->ReadInt(row_cnt));
+
+  remainder = res_len - 17;
+  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
+  ptr = page.get();
+
+  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      std::chrono::system_clock::now().time_since_epoch())
+                      .count();
+
+  CHECK_STATUS(client->ReadData(ptr, remainder));
+
+  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
+                     std::chrono::system_clock::now().time_since_epoch())
+                     .count();
+  ;
+
+  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double time_in_s = 1.0 * (stop - start) / 1000;
+  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
+            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
+
+  uint8_t last_page_b;
+  CHECK_STATUS(client->ReadByte(last_page_b));
+
+  last_page = !last_page_b;
+
+  return tensorflow::Status::OK();
+}
+
+int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
+  int32_t h = 0;
+  for (char& c : str) {
+    h = 31 * h + c;
+  }
+  return h;
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
new file mode 100644
index 0000000000..d1df4527f9
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -0,0 +1,87 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_binary_object_parser.h"
+#include "ignite_dataset.h"
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+namespace ignite {
+
+class IgniteDatasetIterator
+    : public tensorflow::DatasetIterator<IgniteDataset> {
+ public:
+  IgniteDatasetIterator(const Params& params, std::string host,
+                        tensorflow::int32 port, std::string cache_name,
+                        bool local, tensorflow::int32 part,
+                        tensorflow::int32 page_size, std::string username,
+                        std::string password, std::string certfile,
+                        std::string keyfile, std::string cert_password,
+                        std::vector<tensorflow::int32> schema,
+                        std::vector<tensorflow::int32> permutation);
+  ~IgniteDatasetIterator();
+  tensorflow::Status GetNextInternal(
+      tensorflow::IteratorContext* ctx,
+      std::vector<tensorflow::Tensor>* out_tensors,
+      bool* end_of_sequence) override;
+
+ protected:
+  tensorflow::Status SaveInternal(
+      tensorflow::IteratorStateWriter* writer) override;
+  tensorflow::Status RestoreInternal(
+      tensorflow::IteratorContext* ctx,
+      tensorflow::IteratorStateReader* reader) override;
+
+ private:
+  std::unique_ptr<Client> client;
+  BinaryObjectParser parser;
+
+  const std::string cache_name;
+  const bool local;
+  const tensorflow::int32 part;
+  const tensorflow::int32 page_size;
+  const std::string username;
+  const std::string password;
+  const std::vector<tensorflow::int32> schema;
+  const std::vector<tensorflow::int32> permutation;
+
+  int32_t remainder;
+  int64_t cursor_id;
+  bool last_page;
+
+  std::unique_ptr<uint8_t> page;
+  uint8_t* ptr;
+
+  tensorflow::Status EstablishConnection();
+  tensorflow::Status CloseConnection();
+  tensorflow::Status Handshake();
+  tensorflow::Status ScanQuery();
+  tensorflow::Status LoadNextPage();
+  int32_t JavaHashCode(std::string str);
+};
+
+constexpr uint8_t null_val = 101;
+constexpr uint8_t string_val = 9;
+constexpr uint8_t protocol_major_version = 1;
+constexpr uint8_t protocol_minor_version = 1;
+constexpr uint8_t protocol_patch_version = 0;
+constexpr int16_t scan_query_opcode = 2000;
+constexpr int16_t load_next_page_opcode = 2001;
+constexpr int16_t close_connection_opcode = 0;
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
new file mode 100644
index 0000000000..543b5e4afc
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -0,0 +1,145 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_dataset.h"
+#include <stdlib.h>
+#include "tensorflow/core/framework/dataset.h"
+
+namespace tensorflow {
+
+class IgniteDatasetOp : public DatasetOpKernel {
+ public:
+  using DatasetOpKernel::DatasetOpKernel;
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+    std::string cache_name = "";
+    std::string host = "";
+    int32 port = -1;
+    bool local = false;
+    int32 part = -1;
+    int32 page_size = -1;
+    std::string username = "";
+    std::string password = "";
+    std::string certfile = "";
+    std::string keyfile = "";
+    std::string cert_password = "";
+
+    const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME");
+    const char* env_host = std::getenv("IGNITE_DATASET_HOST");
+    const char* env_port = std::getenv("IGNITE_DATASET_PORT");
+    const char* env_local = std::getenv("IGNITE_DATASET_LOCAL");
+    const char* env_part = std::getenv("IGNITE_DATASET_PART");
+    const char* env_page_size = std::getenv("IGNITE_DATASET_PAGE_SIZE");
+    const char* env_username = std::getenv("IGNITE_DATASET_USERNAME");
+    const char* env_password = std::getenv("IGNITE_DATASET_PASSWORD");
+    const char* env_certfile = std::getenv("IGNITE_DATASET_CERTFILE");
+    const char* env_keyfile = std::getenv("IGNITE_DATASET_KEYFILE");
+    const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD");
+
+    if (env_cache_name)
+      cache_name = std::string(env_cache_name);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cache_name",
+                                                           &cache_name));
+
+    if (env_host)
+      host = std::string(env_host);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "host", &host));
+
+    if (env_port)
+      port = atoi(env_port);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<int32>(ctx, "port", &port));
+
+    if (env_local)
+      local = true;
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<bool>(ctx, "local", &local));
+
+    if (env_part)
+      part = atoi(env_part);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<int32>(ctx, "part", &part));
+
+    if (env_page_size)
+      page_size = atoi(env_page_size);
+    else
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<int32>(ctx, "page_size", &page_size));
+
+    if (env_username)
+      username = std::string(env_username);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "username", &username));
+
+    if (env_password)
+      password = std::string(env_password);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "password", &password));
+
+    if (env_certfile)
+      certfile = std::string(env_certfile);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "certfile", &certfile));
+
+    if (env_keyfile)
+      keyfile = std::string(env_keyfile);
+    else
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<std::string>(ctx, "keyfile", &keyfile));
+
+    if (env_cert_password)
+      cert_password = std::string(env_cert_password);
+    else
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cert_password",
+                                                           &cert_password));
+
+    const Tensor* schema_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
+    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+                errors::InvalidArgument("`schema` must be a vector."));
+
+    std::vector<int32> schema;
+    schema.reserve(schema_tensor->NumElements());
+    for (int i = 0; i < schema_tensor->NumElements(); i++) {
+      schema.push_back(schema_tensor->flat<int32>()(i));
+    }
+
+    const Tensor* permutation_tensor;
+    OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor));
+    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+                errors::InvalidArgument("`permutation` must be a vector."));
+
+    std::vector<int32> permutation;
+    permutation.reserve(permutation_tensor->NumElements());
+    for (int i = 0; i < permutation_tensor->NumElements(); i++) {
+      permutation.push_back(permutation_tensor->flat<int32>()(i));
+    }
+
+    *output = new ignite::IgniteDataset(
+        ctx, cache_name, host, port, local, part, page_size, username, password,
+        certfile, keyfile, cert_password, std::move(schema),
+        std::move(permutation));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU),
+                        IgniteDatasetOp);
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
new file mode 100644
index 0000000000..5491af68d6
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+#include <string>
+
+namespace ignite {
+
+class PlainClient : public Client {
+ public:
+  PlainClient(std::string host, int port);
+  ~PlainClient();
+
+  virtual tensorflow::Status Connect();
+  virtual tensorflow::Status Disconnect();
+  virtual bool IsConnected();
+  virtual int GetSocketDescriptor();
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+
+ private:
+  std::string host;
+  int port;
+  int sock;
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
new file mode 100644
index 0000000000..dbfa4f8786
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -0,0 +1,132 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_plain_client.h"
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/socket.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <map>
+
+#include <iostream>
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+PlainClient::PlainClient(std::string host, int port)
+    : host(host), port(port), sock(-1) {}
+
+PlainClient::~PlainClient() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+}
+
+tensorflow::Status PlainClient::Connect() {
+  if (sock == -1) {
+    sock = socket(AF_INET, SOCK_STREAM, 0);
+    if (sock == -1)
+      return tensorflow::errors::Internal("Failed to create socket");
+  }
+
+  sockaddr_in server;
+
+  server.sin_addr.s_addr = inet_addr(host.c_str());
+  if (server.sin_addr.s_addr == -1) {
+    hostent* he;
+    in_addr** addr_list;
+
+    if ((he = gethostbyname(host.c_str())) == NULL)
+      return tensorflow::errors::Internal("Failed to resolve hostname \"", host,
+                                          "\"");
+
+    addr_list = (in_addr**)he->h_addr_list;
+    if (addr_list[0] != NULL) server.sin_addr = *addr_list[0];
+  }
+
+  server.sin_family = AF_INET;
+  server.sin_port = htons(port);
+
+  if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0)
+    return tensorflow::errors::Internal("Failed to connect to \"", host, ":",
+                                        port, "\"");
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::Disconnect() {
+  int close_res = close(sock);
+  sock = -1;
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed";
+
+  return close_res == 0 ? tensorflow::Status::OK()
+                        : tensorflow::errors::Internal(
+                              "Failed to correctly close connection");
+}
+
+bool PlainClient::IsConnected() { return sock != -1; }
+
+int PlainClient::GetSocketDescriptor() { return sock; }
+
+tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = recv(sock, buf, length - recieved, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from socket: ", res, ", ",
+          std::string(strerror(errno)));
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = send(sock, buf, length - sent, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res, ", ",
+          std::string(strerror(errno)));
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
new file mode 100644
index 0000000000..f78c9b3627
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -0,0 +1,143 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_plain_client.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#pragma comment(lib, "Ws2_32.lib")
+#pragma comment(lib, "Mswsock.lib")
+#pragma comment(lib, "AdvApi32.lib")
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ignite {
+
+PlainClient::PlainClient(std::string host, int port)
+    : host(host), port(port), sock(INVALID_SOCKET) {}
+
+PlainClient::~PlainClient() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+}
+
+tensorflow::Status PlainClient::Connect() {
+  WSADATA wsaData;
+  addrinfo *result = NULL, *ptr = NULL, hints;
+
+  int res = WSAStartup(MAKEWORD(2, 2), &wsaData);
+  if (res != 0)
+    return tensorflow::errors::Internal("WSAStartup failed with error: ", res);
+
+  ZeroMemory(&hints, sizeof(hints));
+  hints.ai_family = AF_UNSPEC;
+  hints.ai_socktype = SOCK_STREAM;
+  hints.ai_protocol = IPPROTO_TCP;
+
+  res =
+      getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result);
+  if (res != 0)
+    return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res);
+
+  for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
+    sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
+    if (sock == INVALID_SOCKET) {
+      WSACleanup();
+      return tensorflow::errors::Internal("Socket failed with error: ",
+                                          WSAGetLastError());
+    }
+
+    res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen);
+    if (res == SOCKET_ERROR) {
+      closesocket(sock);
+      sock = INVALID_SOCKET;
+      continue;
+    }
+
+    break;
+  }
+
+  freeaddrinfo(result);
+
+  if (sock == INVALID_SOCKET) {
+    WSACleanup();
+    return tensorflow::errors::Internal("Unable to connect to server");
+  }
+
+  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::Disconnect() {
+  int res = shutdown(sock, SD_SEND);
+  closesocket(sock);
+  WSACleanup();
+
+  if (res == SOCKET_ERROR)
+    return tensorflow::errors::Internal("Shutdown failed with error: ",
+                                        WSAGetLastError());
+  else
+    return tensorflow::Status::OK();
+}
+
+bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; }
+
+int PlainClient::GetSocketDescriptor() { return sock; }
+
+tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = recv(sock, buf, length - recieved, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from socket: ", res);
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = send(sock, buf, length - sent, 0);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res);
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
new file mode 100644
index 0000000000..a1101b91f3
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "ignite_ssl_wrapper.h"
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+namespace ignite {
+
+static int PasswordCb(char *buf, int size, int rwflag, void *password) {
+  strncpy(buf, (char *)(password), size);
+  buf[size - 1] = '\0';
+  return (strlen(buf));
+}
+
+SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
+                       std::string keyfile, std::string cert_password)
+    : client(client),
+      certfile(certfile),
+      keyfile(keyfile),
+      cert_password(cert_password),
+      ctx(NULL) {}
+
+SslWrapper::~SslWrapper() {
+  if (IsConnected()) {
+    tensorflow::Status status = Disconnect();
+    if (!status.ok()) LOG(WARNING) << status.ToString();
+  }
+
+  if (ctx != NULL) {
+    SSL_CTX_free(ctx);
+    ctx = NULL;
+  }
+}
+
+tensorflow::Status SslWrapper::InitSslContext() {
+  OpenSSL_add_all_algorithms();
+  SSL_load_error_strings();
+
+  ctx = SSL_CTX_new(SSLv23_method());
+  if (ctx == NULL)
+    return tensorflow::errors::Internal("Couldn't create SSL context");
+
+  SSL_CTX_set_default_passwd_cb(ctx, PasswordCb);
+  SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str());
+
+  if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1)
+    return tensorflow::errors::Internal(
+        "Couldn't load cetificate chain (file '", certfile, "')");
+
+  std::string private_key_file = keyfile.empty() ? certfile : keyfile;
+  if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(),
+                                  SSL_FILETYPE_PEM) != 1)
+    return tensorflow::errors::Internal("Couldn't load private key (file '",
+                                        private_key_file, "')");
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::Connect() {
+  tensorflow::Status status;
+
+  if (ctx == NULL) {
+    status = InitSslContext();
+    if (!status.ok()) return status;
+  }
+
+  ssl = SSL_new(ctx);
+  if (ssl == NULL)
+    return tensorflow::errors::Internal("Failed to establish SSL connection");
+
+  status = client->Connect();
+  if (!status.ok()) return status;
+
+  SSL_set_fd(ssl, client->GetSocketDescriptor());
+  if (SSL_connect(ssl) != 1)
+    return tensorflow::errors::Internal("Failed to establish SSL connection");
+
+  LOG(INFO) << "SSL connection established";
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::Disconnect() {
+  SSL_free(ssl);
+
+  LOG(INFO) << "SSL connection closed";
+
+  return client->Disconnect();
+}
+
+bool SslWrapper::IsConnected() { return client->IsConnected(); }
+
+int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); }
+
+tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+  int recieved = 0;
+
+  while (recieved < length) {
+    int res = SSL_read(ssl, buf, length - recieved);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while reading from SSL socket: ", res);
+
+    if (res == 0)
+      return tensorflow::errors::Internal("Server closed SSL connection");
+
+    recieved += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+  int sent = 0;
+
+  while (sent < length) {
+    int res = SSL_write(ssl, buf, length - sent);
+
+    if (res < 0)
+      return tensorflow::errors::Internal(
+          "Error occured while writing into socket: ", res);
+
+    sent += res;
+    buf += res;
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
new file mode 100644
index 0000000000..e0c2a242dc
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef IGNITE_CLIENT_H
+#define IGNITE_CLIENT_H
+#include "ignite_client.h"
+#endif
+
+#include <openssl/ssl.h>
+#include <string>
+
+namespace ignite {
+
+class SslWrapper : public Client {
+ public:
+  SslWrapper(std::shared_ptr<Client> client, std::string certfile,
+             std::string keyfile, std::string cert_password);
+  ~SslWrapper();
+
+  virtual tensorflow::Status Connect();
+  virtual tensorflow::Status Disconnect();
+  virtual bool IsConnected();
+  virtual int GetSocketDescriptor();
+  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
+  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+
+ private:
+  std::shared_ptr<Client> client;
+  std::string certfile;
+  std::string keyfile;
+  std::string cert_password;
+  SSL_CTX* ctx;
+  SSL* ssl;
+  tensorflow::Status InitSslContext();
+};
+
+}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
new file mode 100644
index 0000000000..17494d1cfd
--- /dev/null
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -0,0 +1,64 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("IgniteDataset")
+  .Input("cache_name: string")
+  .Input("host: string")
+  .Input("port: int32")
+  .Input("local: bool")
+  .Input("part: int32")
+  .Input("page_size: int32")
+  .Input("username: string")
+  .Input("password: string")
+  .Input("certfile: string")
+  .Input("keyfile: string")
+  .Input("cert_password: string")
+  .Input("schema: int32")
+  .Input("permutation: int32")
+  .Output("handle: variant")
+  .SetIsStateful()
+  .SetShapeFn(shape_inference::ScalarShape)
+  .Doc(R"doc(
+Apache Ignite is a memory-centric distributed database, caching, and processing
+platform for transactional, analytical, and streaming workloads, delivering 
+in-memory speeds at petabyte scale. This contrib package contains an 
+integration between Apache Ignite and TensorFlow. The integration is based on 
+tf.data from TensorFlow side and Binary Client Protocol from Apache Ignite side. 
+It allows to use Apache Ignite as a datasource for neural network training, 
+inference and all other computations supported by TensorFlow. Ignite Dataset
+is based on Apache Ignite Binary Client Protocol.
+
+cache_name: Ignite Cache Name.
+host: Ignite Thin Client Host.
+port: Ignite Thin Client Port.
+local: Local flag that defines that data should be fetched from local host only.
+part: Partition data should be fetched from.
+page_size: Page size for Ignite Thin Client.
+username: Username to authenticate via Ignite Thin Client.
+password: Password to authenticate via Ignite Thin Client.
+certfile: SSL certificate to establish SSL connection.
+keyfile: Private key file to establish SSL connection.
+cert_password: SSL certificate password to establish SSL connection.
+schema: Internal structure that defines schema of cache objects.
+permutation: Internal structure that defines permutation of cache objects.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
new file mode 100644
index 0000000000..6fa073957a
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -0,0 +1,763 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Ignite Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import socket
+import struct
+import ssl
+import abc
+
+from tensorflow.contrib.ignite.python.ops import ignite_op_loader  # pylint: disable=unused-import
+from tensorflow.contrib.ignite.python.ops import gen_dataset_ops
+from tensorflow.python.data.ops.dataset_ops import Dataset
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+
+class Readable():
+  """Readable abstract class that exposes methods to do reading-related
+     operations.
+  """
+
+  @abc.abstractmethod
+  def __init__(self):
+    pass
+
+  def read_byte(self):
+    """Reads and returnes byte."""
+    return self.__read("b", 1)
+
+  def read_short(self):
+    """Reads and returns short (2 bytes, little-endian)."""
+    return self.__read("h", 2)
+
+  def read_int(self):
+    """Reads and returns int (4 bytes, little-endian)."""
+    return self.__read("i", 4)
+
+  def read_long(self):
+    """Reads and returns long (8 bytes, little-endian)."""
+    return self.__read("q", 8)
+
+  def skip(self, length):
+    """Skips the specified number of bytes."""
+    self.read_data(length)
+
+  @abc.abstractmethod
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    return None
+
+  def __read(self, data_type, length):
+    """Reads, unpacks and returns specified type (little-endian)."""
+    buffer = self.read_data(length)
+    return struct.unpack("<" + data_type, buffer)[0]
+
+class DataBuffer(Readable):
+  """DataBuffer class that exposes methods to read data from a byte buffer."""
+
+  def __init__(self, buffer):
+    """Constructs a new instance of DataBuffer based on the specified byte
+       buffer.
+
+    Args:
+      buffer: Buffer to be read.
+    """
+    Readable.__init__(self)
+    self.buffer = buffer
+    self.ptr = 0
+
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    data_buffer = self.buffer[self.ptr:][:length]
+    self.ptr += length
+    return data_buffer
+
+class TcpClient(Readable):
+  """TcpClient class that exposes methods to read data from a socket."""
+
+  def __init__(self, host, port, certfile=None, keyfile=None, password=None):
+    """Constructs a new instance of TcpClient based on the specified host
+       and port.
+
+    Args:
+      host: Host to be connected.
+      port: Port to be connected.
+      certfile: File in PEM format containing the certificate as well as any
+        number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    Readable.__init__(self)
+    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+    if certfile is not None:
+      context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+      context.load_cert_chain(certfile, keyfile, password)
+      self.sock = context.wrap_socket(self.sock)
+    else:
+      if keyfile is not None:
+        raise Exception("SSL is disabled, keyfile must not be specified \
+          (to enable SSL specify certfile)")
+      if password is not None:
+        raise Exception("SSL is disabled, password must not be specified \
+          (to enable SSL specify certfile)")
+
+    self.host = host
+    self.port = port
+
+  def __enter__(self):
+    """Connects to host and port specified in the constructor."""
+    self.sock.connect((self.host, self.port))
+    return self
+
+  def __exit__(self, t, v, traceback):
+    """Disconnects the socket."""
+    self.sock.close()
+
+  def write_byte(self, v):
+    """Writes the specified byte."""
+    self.__write(v, "b")
+
+  def write_short(self, v):
+    """Writes the specified short (2 bytes, little-endian)."""
+    self.__write(v, "h")
+
+  def write_int(self, v):
+    """Writes the specified short (4 bytes, little-endian)."""
+    self.__write(v, "i")
+
+  def write_long(self, v):
+    """Writes the specified int (8 bytes, little-endian)."""
+    self.__write(v, "q")
+
+  def write_string(self, v):
+    """Writes the specified string."""
+    self.sock.sendall(v.encode("UTF-8"))
+
+  def read_data(self, length):
+    """Reads the specified number of bytes and returns them as a buffer."""
+    data_buffer = None
+    rem = length
+    while rem > 0:
+      buf = self.sock.recv(rem)
+      rem = rem - len(buf)
+      if data_buffer is None:
+        data_buffer = buf
+      else:
+        data_buffer += buf
+    return data_buffer
+
+  def __write(self, value, data_type):
+    """Packs and writes data using the specified type (little-endian)."""
+    data_buffer = struct.pack("<" + data_type, value)
+    self.sock.sendall(data_buffer)
+
+class BinaryType():
+  """BinaryType class that encapsulated type id, type name and fields."""
+
+  def __init__(self, type_id, type_name, fields):
+    """Constructs a new instance of BinaryType."""
+    self.type_id = type_id
+    self.type_name = type_name
+    self.fields = fields
+
+class BinaryField():
+  """BinaryField class that encapsulated field name, type id and field id."""
+
+  def __init__(self, field_name, type_id, field_id):
+    """Constructs a new instance of BinaryField."""
+    self.field_name = field_name
+    self.type_id = type_id
+    self.field_id = field_id
+
+# Binary types defined in Apache Ignite Thin client and supported by
+# TensorFlow on Apache Ignite, see
+# https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+types = {
+    1: (dtypes.uint8, False),
+    2: (dtypes.int16, False),
+    3: (dtypes.int32, False),
+    4: (dtypes.int64, False),
+    5: (dtypes.float32, False),
+    6: (dtypes.float64, False),
+    7: (dtypes.uint16, False),
+    8: (dtypes.bool, False),
+    9: (dtypes.string, False),
+    12: (dtypes.uint8, True),
+    13: (dtypes.int16, True),
+    14: (dtypes.int32, True),
+    15: (dtypes.int64, True),
+    16: (dtypes.float32, True),
+    17: (dtypes.float64, True),
+    18: (dtypes.uint16, True),
+    19: (dtypes.bool, True),
+    20: (dtypes.string, True)
+}
+
+class TypeTreeNode():
+  """TypeTreeNode class exposes methods to format object tree structure
+     data.
+  """
+  def __init__(self, name, type_id, fields=None, permutation=None):
+    """Constructs a new instance of TypeTreeNode.
+
+    Args:
+      name: Name of the object tree node.
+      type_id: Type id of the object tree node.
+      fields: List of fields (children of the object tree node).
+      permutation: Permutation that should be applied to order object children.
+    """
+    self.name = name
+    self.type_id = type_id
+    self.fields = fields
+    self.permutation = permutation
+
+  def to_output_classes(self):
+    """Formats the tree object the way required in 'output_classes' property of
+       dataset.
+    """
+    if self.fields is None:
+      return ops.Tensor
+    output_classes = {}
+    for field in self.fields:
+      output_classes[field.name] = field.to_output_classes()
+    return output_classes
+
+  def to_output_shapes(self):
+    """Formats the tree object the way required in 'output_shapes' property of
+       dataset.
+    """
+    if self.fields is None:
+      object_type = types[self.type_id]
+      if object_type is not None:
+        is_array = object_type[1]
+        if is_array:
+          return tensor_shape.TensorShape([None])
+        return tensor_shape.TensorShape([])
+      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+    output_shapes = {}
+    for field in self.fields:
+      output_shapes[field.name] = field.to_output_shapes()
+    return output_shapes
+
+  def to_output_types(self):
+    """Formats the tree object the way required in 'output_types' property of
+       dataset.
+    """
+    if self.fields is None:
+      object_type = types[self.type_id]
+      if object_type is not None:
+        return object_type[0]
+      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+    else:
+      output_types = {}
+      for field in self.fields:
+        output_types[field.name] = field.to_output_types()
+      return output_types
+
+  def to_flat(self):
+    """Returns a list of leaf node types."""
+    return self.to_flat_rec([])
+
+  def to_permutation(self):
+    """Returns a permutation that should be applied to order object leafs."""
+    correct_order_dict = {}
+    self.traversal_rec(correct_order_dict, 0)
+    object_order = []
+    self.traversal_permutation_rec(object_order)
+    return [correct_order_dict[o] for o in object_order]
+
+  def to_flat_rec(self, flat):
+    """Formats a list of leaf node types."""
+    flat.append(self.type_id)
+    if self.fields is not None:
+      for field in self.fields:
+        field.to_flat_rec(flat)
+    return flat
+
+  def traversal_permutation_rec(self, permutation):
+    """Collects nodes in accordance with permutation."""
+    if self.fields is None:
+      permutation.append(self)
+    else:
+      for idx in self.permutation:
+        field = self.fields[idx]
+        field.traversal_permutation_rec(permutation)
+
+  def traversal_rec(self, d, i):
+    """Collects nodes in pre-order traversal."""
+    if self.fields is None:
+      d[self] = i
+      i += 1
+    else:
+      for field in self.fields:
+        i = field.traversal_rec(d, i)
+    return i
+
+class IgniteClient(TcpClient):
+  """IgniteClient class exposes methods to work with Apache Ignite using Thin
+     client. This client works with assumption that all object in the cache
+     have the same structure (homogeneous objects) and the cache contains at
+     least one object.
+  """
+  def __init__(self, host, port, username=None, password=None, certfile=None,\
+    keyfile=None, cert_password=None):
+    """Constructs a new instance of IgniteClient.
+
+    Args:
+      host: Apache Ignite Thin client host to be connected.
+      port: Apache Ignite Thin client port to be connected.
+      username: Apache Ignite Thin Client authentication username.
+      password: Apache Ignite Thin Client authentication password.
+      certfile: File in PEM format containing the certificate as well as
+        any number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      cert_password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    TcpClient.__init__(self, host, port, certfile, keyfile, cert_password)
+    self.username = username
+    self.password = password
+
+  def handshake(self):
+    """Makes a handshake required to be made after connect before any other
+       calls.
+    """
+    msg_len = 8
+
+    if self.username is None:
+      msg_len += 1
+    else:
+      msg_len += 5 + len(self.username)
+
+    if self.password is None:
+      msg_len += 1
+    else:
+      msg_len += 5 + len(self.password)
+
+    self.write_int(msg_len)   # Message length
+    self.write_byte(1)        # Handshake operation
+    self.write_short(1)       # Version (1.1.0)
+    self.write_short(1)
+    self.write_short(0)
+    self.write_byte(2)        # Thin client
+
+    if self.username is None: # Username
+      self.write_byte(101)
+    else:
+      self.write_byte(9)
+      self.write_int(len(self.username))
+      self.write_string(self.username)
+
+    if self.password is None: # Password
+      self.write_byte(101)
+    else:
+      self.write_byte(9)
+      self.write_int(len(self.password))
+      self.write_string(self.password)
+
+    self.read_int()           # Result length
+    res = self.read_byte()
+
+    if res != 1:
+      serv_ver_major = self.read_short()
+      serv_ver_minor = self.read_short()
+      serv_ver_patch = self.read_short()
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \
+            % (res, serv_ver_major, serv_ver_minor, serv_ver_patch))
+      else:
+        raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \
+            message='%s']" % (
+                res,
+                serv_ver_major,
+                serv_ver_minor,
+                serv_ver_patch,
+                err_msg
+            ))
+
+  def get_cache_type(self, cache_name):
+    """Collects type information about objects stored in the specified
+       cache.
+    """
+    cache_name_hash = self.__java_hash_code(cache_name)
+    self.write_int(25)        # Message length
+    self.write_short(2000)      # Operation code
+    self.write_long(0)        # Request ID
+    self.write_int(cache_name_hash) # Cache name
+    self.write_byte(0)        # Flags
+    self.write_byte(101)      # Filter (NULL)
+    self.write_int(1)         # Cursor page size
+    self.write_int(-1)        # Partition to query
+    self.write_byte(0)        # Local flag
+
+    result_length = self.read_int()
+    self.read_long()          # Request id
+    status = self.read_int()
+
+    if status != 0:
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Scan Query Error [status=%s]" % status)
+      else:
+        raise Exception("Scan Query Error [status=%s, message='%s']" \
+            % (status, err_msg))
+
+    self.read_long()          # Cursor id
+    row_count = self.read_int()
+
+    if row_count == 0:
+      raise Exception("Scan Query returned empty result, so it's \
+        impossible to derive the cache type")
+
+    payload = DataBuffer(self.read_data(result_length - 25))
+
+    self.read_byte()          # Next page
+
+    res = TypeTreeNode("root", 0, [
+        self.__collect_types("key", payload),
+        self.__collect_types("val", payload)
+    ], [0, 1])
+
+    return res
+
+  def __java_hash_code(self, s):
+    """Computes hash code of the specified string using Java code."""
+    h = 0
+    for c in s:
+      h = (31 * h + ord(c)) & 0xFFFFFFFF
+    return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
+
+  def __collect_types(self, field_name, data):
+    """Extracts type information from the specified object."""
+    type_id = data.read_byte()
+
+    # Byte scalar.
+    if type_id == 1:
+      data.skip(1)
+      return TypeTreeNode(field_name, type_id)
+
+    # Short scalar.
+    if type_id == 2:
+      data.skip(2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Integer scalar.
+    if type_id == 3:
+      data.skip(4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Long scalar.
+    if type_id == 4:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Float scalar.
+    if type_id == 5:
+      data.skip(4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Double scalar.
+    if type_id == 6:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Char scalar.
+    if type_id == 7:
+      data.skip(2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Bool scalar.
+    if type_id == 8:
+      data.skip(1)
+      return TypeTreeNode(field_name, type_id)
+
+    # String scalar.
+    if type_id == 9:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # UUID scalar.
+    if type_id == 10:
+      data.skip(16)
+      return TypeTreeNode(field_name, type_id)
+
+    # Date scalar.
+    if type_id == 11:
+      data.skip(8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Byte array.
+    if type_id == 12:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # Short array.
+    if type_id == 13:
+      length = data.read_int()
+      data.skip(length * 2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Integer array.
+    if type_id == 14:
+      length = data.read_int()
+      data.skip(length * 4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Long array.
+    if type_id == 15:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Float array.
+    if type_id == 16:
+      length = data.read_int()
+      data.skip(length * 4)
+      return TypeTreeNode(field_name, type_id)
+
+    # Double array.
+    if type_id == 17:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Char array.
+    if type_id == 18:
+      length = data.read_int()
+      data.skip(length * 2)
+      return TypeTreeNode(field_name, type_id)
+
+    # Bool array.
+    if type_id == 19:
+      length = data.read_int()
+      data.skip(length)
+      return TypeTreeNode(field_name, type_id)
+
+    # String array.
+    if type_id == 20:
+      length = data.read_int()
+      for _ in range(length):
+        header = data.read_byte()
+        if header == 9:
+          str_length = data.read_int()
+          data.skip(str_length)
+        elif header == 101:
+          pass
+        else:
+          raise Exception("Unknown binary type when expected string \
+            [type_id=%d]" % header)
+      return TypeTreeNode(field_name, type_id)
+
+    # UUID array.
+    if type_id == 21:
+      length = data.read_int()
+      data.skip(length * 16) # TODO: support NULL values.
+      return TypeTreeNode(field_name, type_id)
+
+    # Date array.
+    if type_id == 22:
+      length = data.read_int()
+      data.skip(length * 8)
+      return TypeTreeNode(field_name, type_id)
+
+    # Wrapped Binary Object.
+    if type_id == 27:
+      length = data.read_int()
+      inner_data = data.read_data(length)
+      data.read_int()   # Offset
+      return self.__collect_types(field_name, DataBuffer(inner_data))
+
+    # Complex Object.
+    if type_id == 103:
+      data.read_byte()  # Object version
+      data.read_short() # Object flags
+      obj_type_id = data.read_int()
+      data.read_int()   # Object hash code
+      obj_length = data.read_int()
+      data.read_int()   # Object schema id
+      obj_schema_offset = data.read_int()
+
+      obj_type = self.__get_type(obj_type_id)
+      children = []
+
+      for obj_field in obj_type.fields:
+        child = self.__collect_types(obj_field.field_name, data)
+        children.append(child)
+
+      children_sorted = sorted(children, key=lambda child: child.name)
+      permutation = [children_sorted.index(child) for child in children]
+      children = children_sorted
+
+      data.skip(obj_length - obj_schema_offset)
+
+      return TypeTreeNode(field_name, type_id, children, permutation)
+
+    raise Exception("Unknown binary type [type_id=%d]" % type_id)
+
+  def __get_type(self, type_id):
+    """Queries Apache Ignite information about type by type id."""
+    self.write_int(14)      # Message length
+    self.write_short(3002)  # Operation code
+    self.write_long(0)      # Request ID
+    self.write_int(type_id) # Type ID
+
+    self.read_int()         # Result length
+    self.read_long()        # Request id
+    status = self.read_int()
+
+    if status != 0:
+      err_msg = self.__parse_string()
+      if err_msg is None:
+        raise Exception("Get Binary Type Error [status=%d, message='%s']" \
+            % (status, err_msg))
+      else:
+        raise Exception("Get Binary Type Error [status=%d]" % status)
+
+    binary_type_exists = self.read_byte()
+
+    if binary_type_exists == 0:
+      raise Exception("Binary type not found [type_id=%d] " % type_id)
+
+    binary_type_id = self.read_int()
+    binary_type_name = self.__parse_string()
+    self.__parse_string()   # Affinity field name
+
+    fields = []
+    for _ in range(self.read_int()):
+      field_name = self.__parse_string()
+      field_type_id = self.read_int()
+      field_id = self.read_int()
+
+      field = BinaryField(field_name, field_type_id, field_id)
+      fields.append(field)
+
+    is_enum = self.read_byte()
+    if is_enum == 1:
+      raise Exception("Enum fields are not supported yet")
+
+    schema_cnt = self.read_int()
+    for _ in range(schema_cnt):
+      self.read_int()       # Schema id
+      field_cnt = self.read_int()
+      self.skip(field_cnt * 4)
+
+    return BinaryType(binary_type_id, binary_type_name, fields)
+
+  def __parse_string(self):
+    """Parses string."""
+    header = self.read_byte()
+    if header == 9:
+      length = self.read_int()
+      return self.read_data(length).decode("utf-8")
+    if header == 101:
+      return None
+    raise Exception("Unknown binary type when expected string [type_id=%d]" \
+        % header)
+
+class IgniteDataset(Dataset):
+  """Apache Ignite is a memory-centric distributed database, caching, and
+     processing platform for transactional, analytical, and streaming workloads,
+     delivering in-memory speeds at petabyte scale. This contrib package
+     contains an integration between Apache Ignite and TensorFlow. The
+     integration is based on tf.data from TensorFlow side and Binary Client
+     Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+     datasource for neural network training, inference and all other
+     computations supported by TensorFlow. Ignite Dataset is based on Apache
+     Ignite Binary Client Protocol.
+  """
+
+  def __init__(self, cache_name, host="localhost", port=10800, local=False,\
+    part=-1, page_size=100, username=None, password=None, certfile=None,\
+    keyfile=None, cert_password=None):
+    """Create a IgniteDataset.
+
+    Args:
+      cache_name: Cache name to be used as datasource.
+      host: Apache Ignite Thin Client host to be connected.
+      port: Apache Ignite Thin Client port to be connected.
+      local: Local flag that defines to query only local data.
+      part: Number of partitions to be queried.
+      page_size: Apache Ignite Thin Client page size.
+      username: Apache Ignite Thin Client authentication username.
+      password: Apache Ignite Thin Client authentication password.
+      certfile: File in PEM format containing the certificate as well as
+        any number of CA certificates needed to establish the certificate’s
+        authenticity.
+      keyfile: File containing the private key (otherwise the private key
+        will be taken from certfile as well).
+      cert_password: Password to be used if the private key is encrypted and a
+        password is necessary.
+    """
+    super(IgniteDataset, self).__init__()
+
+    with IgniteClient(host, port, username, password, certfile, keyfile,\
+        cert_password) as client:
+      client.handshake()
+      self.cache_type = client.get_cache_type(cache_name)
+
+    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\
+        name="cache_name")
+    self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host")
+    self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port")
+    self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local")
+    self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
+    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\
+        name="page_size")
+    self.username = ops.convert_to_tensor("" if username is None else username,\
+        dtype=dtypes.string, name="username")
+    self.password = ops.convert_to_tensor("" if password is None else password,\
+        dtype=dtypes.string, name="password")
+    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\
+        dtype=dtypes.string, name="certfile")
+    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\
+        dtype=dtypes.string, name="keyfile")
+    self.cert_password = ops.convert_to_tensor("" if cert_password is None\
+        else cert_password, dtype=dtypes.string, name="cert_password")
+    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\
+        dtype=dtypes.int32, name="schema")
+    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\
+        dtype=dtypes.int32, name="permutation")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\
+        self.port, self.local, self.part, self.page_size, self.username,\
+        self.password, self.certfile, self.keyfile, self.cert_password,\
+        self.schema, self.permutation)
+
+  @property
+  def output_classes(self):
+    return self.cache_type.to_output_classes()
+
+  @property
+  def output_shapes(self):
+    return self.cache_type.to_output_shapes()
+
+  @property
+  def output_types(self):
+    return self.cache_type.to_output_types()
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
new file mode 100644
index 0000000000..8115bda85b
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py
@@ -0,0 +1,25 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Python helper for loading Ignite ops and kernels."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.util import loader
+from tensorflow.python.platform import resource_loader
+
+_dataset_ops = loader.load_op_library(
+    resource_loader.get_path_to_datafile("../../_dataset_ops.so"))
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
new file mode 100755
index 0000000000..f4607ce8ad
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-plain.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh \
+-u "jdbc:ignite:thin://127.0.0.1/" \
+--run=/data/sql/init.sql
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
new file mode 100755
index 0000000000..dde1162816
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl-auth.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\
+sslMode=require&\
+sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\
+sslClientCertificateKeyStorePassword=123456&\
+sslTrustAll=true&\
+username=ignite&\
+password=ignite" --run=/data/sql/init.sql
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
new file mode 100755
index 0000000000..58b40b2738
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl.xml & 
+sleep 5 # Wait Apache Ignite to be started
+
+./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\
+sslMode=require&\
+sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\
+sslClientCertificateKeyStorePassword=123456&\
+sslTrustAll=true" --run=/data/sql/init.sql --verbose=true
+
+tail -f nohup.out
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
new file mode 100644
index 0000000000..d900174a8a
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
new file mode 100644
index 0000000000..8e001b28ab
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean id="client-connector-configuration" 
+        class="org.apache.ignite.configuration.ClientConnectorConfiguration">
+    <property name="sslClientAuth" value="true" />
+    <property name="sslEnabled" value="true" />
+    <property name="useIgniteSslContextFactory" value="true" />
+  </bean>
+
+  <bean id="ssl-context-factory" 
+        class="org.apache.ignite.ssl.SslContextFactory">
+    <property name="keyStoreFilePath" value="/data/keystore/server.jks"/>
+    <property name="keyStorePassword" value="123456"/>
+    <property name="trustStoreFilePath" value="/data/keystore/trust.jks"/>
+    <property name="trustStorePassword" value="123456"/>
+  </bean>
+
+  <bean id="ignite-configuration" 
+        class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="clientConnectorConfiguration" 
+              ref="client-connector-configuration" />
+    <property name="sslContextFactory" ref="ssl-context-factory" />
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
new file mode 100644
index 0000000000..42d480c114
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+<beans xmlns="http://www.springframework.org/schema/beans"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
+       xmlns:util="http://www.springframework.org/schema/util"
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+       http://www.springframework.org/schema/beans/spring-beans.xsd
+       http://www.springframework.org/schema/util
+       http://www.springframework.org/schema/util/spring-util.xsd">  
+
+  <bean id="client-connector-configuration" 
+        class="org.apache.ignite.configuration.ClientConnectorConfiguration">
+    <property name="sslClientAuth" value="false" />
+    <property name="sslEnabled" value="true" />
+    <property name="useIgniteSslContextFactory" value="true" />
+  </bean>
+
+  <bean id="ssl-context-factory" 
+        class="org.apache.ignite.ssl.SslContextFactory">
+    <property name="keyStoreFilePath" value="/data/keystore/server.jks"/>
+    <property name="keyStorePassword" value="123456"/>
+    <property name="trustStoreFilePath" value="/data/keystore/trust.jks"/>
+    <property name="trustStorePassword" value="123456"/>
+  </bean>
+
+  <bean id="ignite-configuration" 
+        class="org.apache.ignite.configuration.IgniteConfiguration">
+    <property name="clientConnectorConfiguration" 
+              ref="client-connector-configuration" />
+    <property name="sslContextFactory" ref="ssl-context-factory" />
+    <property name="discoverySpi">
+      <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
+        <property name="ipFinder">
+          <bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
+            <property name="addresses">
+              <list>
+                <value>127.0.0.1</value>
+              </list>
+            </property>
+          </bean>
+        </property>
+      </bean>
+    </property>
+  </bean>
+
+</beans>
diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
new file mode 100644
index 0000000000..933e62b804
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -0,0 +1,77 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Tests for IgniteDataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tensorflow as tf
+from tensorflow.contrib.ignite import IgniteDataset
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import test
+
+class IgniteDatasetTest(test.TestCase):
+  """The Apache Ignite servers have to setup before the test and tear down
+     after the test manually. The docker engine has to be installed.
+
+     To setup Apache Ignite servers:
+     $ bash start_ignite.sh
+
+     To tear down Apache Ignite servers:
+     $ bash stop_ignite.sh
+  """
+
+  def test_ignite_dataset_with_plain_client(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
+    self.__check_dataset(ds)
+
+  def test_ignite_dataset_with_ssl_client(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\
+      certfile=os.path.dirname(os.path.realpath(__file__)) +\
+      "/keystore/client.pem", cert_password="123456")
+    self.__check_dataset(ds)
+
+  def test_ignite_dataset_with_ssl_client_and_auth(self):
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\
+      certfile=os.path.dirname(os.path.realpath(__file__)) +\
+      "/keystore/client.pem", cert_password="123456",\
+      username="ignite", password="ignite")
+    self.__check_dataset(ds)
+
+  def __check_dataset(self, dataset):
+    """Checks that dataset provids correct data.
+    """
+    self.assertEquals(tf.int64, dataset.output_types['key'])
+    self.assertEquals(tf.string, dataset.output_types['val']['NAME'])
+    self.assertEquals(tf.int64, dataset.output_types['val']['VAL'])
+
+    it = dataset.make_one_shot_iterator()
+    ne = it.get_next()
+
+    with tf.Session() as sess:
+      rows = [sess.run(ne), sess.run(ne), sess.run(ne)]
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(ne)
+
+    self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
+      rows[0])
+    self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
+      rows[1])
+    self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
+      rows[2])
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.jks b/tensorflow/contrib/ignite/python/tests/keystore/client.jks
new file mode 100644
index 0000000000000000000000000000000000000000..1875c71b605253603eb63e446da8f07cd84f64a0
GIT binary patch
literal 3232
zcmezO_TO6u1_mZ5W@KPX&dE&8D`8+@G{5kwMTdcbX^%k@(+&eZHZE;8MixdbCP79<
zRtA<PrX_ZfJ6RuW=xI&a{Juu@TNk6n?t`_bmfw-w$HcMcfVITRQcFv&@Iarl*<JD%
zPl<oY&|+<wSv`NR+56Xv&-#Qm88yu?ROmRuI&VG0@+&M4a;zqusMk3raOz(P!;3eo
zjH?daoM820?wU85i#gkRrpXoO2P+juJhy9}c<_JjPF|^4j=Os5H<N^OHP<^b+<TrO
zwRUkf_l|#jkAE#)_;yFzeCeZp%l2N`X`66{`N+%rn`75l1h4%R^ZE7*{$BfMZQ%v8
zyNaet-|9Gb!SpMGnb?B`Q$l@r#{Otm{<9)K{_6KG1H%B{XQq$VDA*iN%sN~8z^6b{
zAiw6ZiC<!HLc;k~>Yq8p|C_4X`71ir_3D+J-^_h0{`v;Jq^I7?=cj)By2Gv~;-!V;
zrk^*tG^Q9QMw#R_-+6sBwsZISYYa0FeehS&sNA)_h$$$<F3ePXLu>xItuMbc#jm^U
z)EsGM7v48t=yRQpg+|uW%p2uNTMy;l<&ooQ-L|Fm+-LC}Ql7ti^n{IXo#DN-aQ4pg
z#|pK-#~2r{+<*SvmVdL<Pdc=QA8HWRh;?&4;!`}&bP0RxPZxE@ACiydU3=CV{}b+F
zQSjFCsonj(D*sag+pkG%Q^St4+~Ys~_r&F`%kEbgNZe0)x_tBTZ<Tkd=BGW+<xx`D
zQ+4RMy8B(7^Y1o17kFZ}ynr!GU15&qw~IeF?-aXh8@0$~(kqjdZ_fnWd~;!zI3HK~
zJhj5@EP@WVop(1z{{Jee!dP%g{giCLCclr5A9;Q8iT-TzCeW~~?a`^zZFL+c4kWzq
zU;8-hcCPNpzJnh=uKwCK)1#2*?3%?z&1ZuJkEiy^#N2M-@L}($nk@S&;YnWm8MeEN
zm#1t$Gc#bqrK9GT(=@hJ*0;~N{f7DL(oXj^A-|uT&RRC@VTAngkk*Ud;B?Izp=WAf
z$-uyL*Pw~%ra=>v(*kBDMkXdk6$4&2POUbNw(q=*jI68-2IYp_2Apinp)72|OhKWB
zJO*4K4u>#va6l4_#}45M8t{YUxP&>}i!xK(6EpK*3b=)tT~f<nCNT>$B_|rn8OXqG
z;$#%#Pfm15FUm|wPt45IOU}<VkQ3)MG%_$VurxF@FflZb66ZBW<{A{jjKk(If==N#
z;03vrSs0qT4V4TOkewq8$@HjBYMhT8I*hCg%#FPa292Fejg1VK*kz9?F+b=R5I$u(
zscP2kvk&h_^!FZo{-b#Fms5A&i7O?&F%rEc6sEhjMEtM9skgI#p1tvl$*JARsg~8A
z^Y9*hOP>1uEr#pj7x=Nuzv6iE!us=ylaaxU{}zcI(6gN!b!PABw|X+mmI_84U%E|>
z{mQW=_S;Qr`2HyF@K#y!w@;mknUR5UakW7uIJRVkS(pqM40PBy6WTl&+kQAP^0LT_
z$@&+h<^>1)D7dHQr4}WYq^2l1rxulDre!84mZTcUvN4Cs^0A1qh&cZ>+QiDUB3!Cv
z*QCz@@``+0T3;B*gQS&NBn-qFM7I9ms7T4H+0xmqwdiX68%NFUt%1m4!wL$UMuzTN
zKavj~N?{J}lzRGR$J_m%x8=zk+ue9hbM_;R=>eq;wo5iLG6bg=|6T5UzizMm4#|@0
zC3nU6D?6K?{L1|Lkcsc;orbHcP1{x#s@#Ya5UGy8w0-x%FE!^&Uhc18xbL`FJu7bh
zEc2IVEfii)u)od6#&v3~%l?{W-yQ1i>pxA|_=W+N&Y67;nwUKenwU00(m5x?%60vB
zcixWkLv!^YNbd$_rly8Qu=H*M<qn*~@TK~#cdK}mXP-E?-DF{X(U%zE_cH6lWA2`N
z=2ddUd2LwT;*&SI);KU!RlloSeL3^0TIZ>S!M-c39-n=bx7Ci%v0sW&eC`$Zt&A6J
zx2%mi{^N~-V|3b??)9@L{4(U4*Vx*;m6w(6+2y~S{15JDb~|jS)~V&%s_R*>%7H6j
zac`Q_$Ni90-+0HM@g`F8#hN-BS3=Tn<3fYRdDv6%!s;cJ0~XFrNNJJ><Wyl6Rs&{6
z#{Xz3l)15yp>5CIRj);LinkxSv+h!V3q#BbzJF1B6oSQ$a90?dH&#Df;JoYLpG4o4
z_vH`lZk%qiR`UI=vL~V2U#z=$!t$%x!4s7Yn#sG}#XXJ0Jr*}aeBxPNc>0L`(;ur`
z7niRTx7FTo)X_L*nZ@%!-U~AR;=(>@))7-*?Qk|v-SSgPPEf}ZT!%0vCqmjp&4Hu&
pj0zRrXg*^OWubk;VkqUaBW_g*^E|kHw{Q7<!r*Cx;ya&dW&r&#dOrXF

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.pem b/tensorflow/contrib/ignite/python/tests/keystore/client.pem
new file mode 100644
index 0000000000..a71a87e0bb
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/keystore/client.pem
@@ -0,0 +1,69 @@
+Bag Attributes
+    friendlyName: client
+    localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 
+Key Attributes: <No Attributes>
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,CE61EDD98349D0C7
+
+Kzl16sj8R7YUXPCEZCqCrY4LSAjiKCRFNOagEehvN9Jpswcz4JbatoFmvVvOCgBF
+7kkeCaALhfM5a+46uynZ1sOOFUOn8fUFgguN3lLInWfm6vTuXDPslg0/tRNI0YqW
+ujfxyzrm1/k4RX0oLzRE1jZr69VZsBmZndkz9nkz3anWKLE7X/VIFV6U/N6YNPch
+BG1Fxpt/HtM9p3B5wNDSjCVaeNP1ROKe3APLRY6k+SppTuntHV5q9Ni82r1l3ahU
+zf2QvocSy9MLh+bGusJGHyJJAGuwPHm6ytPwbXGHn5xe4HPIno28j9kN7EL1ZoUs
+q0PhipAkFrGIM4zg6nAwVdzY5iGySDQ3fWpz2MkrKMDRftBwA3o/M321NBUW9/2X
+l+XmjXcJd0dEOslGxveb6UXLL2YvYszjQXRR4dCV/40bMJL3umRhVSay0NteoXfY
+82rQchm2NHKOiDfB4RpD8JJtVQeDSMXc9TH5y2Ua7FZND60JXtFpdnfCVfVZuBJm
+yBafyIsXR7EQzLG4z28Dvp4fs42A3JkF+e9Aq6Y6MmYA1wsvIKKT9HKEifqKmbgG
+4E9WOZn5IWi4ZJ44VAwN/uBGrLm//3OjByeB9y8vszNbyY8dQ8x5XqnF/IzIvgqc
+uKA8xuLAkTFmgRGQ/lmMDR+iMhet5dCtg9Orb9tYVL55JAb/OfsCX0LTJ3Y2RmIx
+CaFpkUP7KKYD+69ajnFCxvfGnGxyBkf+JeuDYIZVFklVT9SUtL9RJh26jUdvHt2A
+LQerBl8UCkVbPxsxYjdawvxuBNTD6tSRykM8zwtWcvIubp+gxE7png==
+-----END RSA PRIVATE KEY-----
+Bag Attributes
+    friendlyName: 1.2.840.113549.1.9.1=#1613636c69656e7440677269646761696e2e636f6d,CN=client,OU=Dev,O=GridGain,ST=SPb,C=RU
+    localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 
+subject=/C=RU/ST=SPb/O=GridGain/OU=Dev/CN=client/emailAddress=client@gridgain.com
+issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+-----BEGIN CERTIFICATE-----
+MIIC2TCCAkKgAwIBAgIBJDANBgkqhkiG9w0BAQUFADB3MQswCQYDVQQGEwJSVTEM
+MAoGA1UECBMDU1BiMQwwCgYDVQQHEwNTUGIxETAPBgNVBAoTCEdyaWRHYWluMQww
+CgYDVQQLEwNEZXYxCzAJBgNVBAMTAmNhMR4wHAYJKoZIhvcNAQkBFg9jYUBncmlk
+Z2Fpbi5jb20wHhcNMTIwNjA5MTEwNDE3WhcNMzIwNjA5MTEwNDE3WjBxMQswCQYD
+VQQGEwJSVTEMMAoGA1UECBMDU1BiMREwDwYDVQQKEwhHcmlkR2FpbjEMMAoGA1UE
+CxMDRGV2MQ8wDQYDVQQDEwZjbGllbnQxIjAgBgkqhkiG9w0BCQEWE2NsaWVudEBn
+cmlkZ2Fpbi5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANIHHcYiA+CP
+EBPKNZJ6mtvN4d9Yj43B5/hzs/TK3e4XImLsMhXaElYtrXQX/SDK7Zv5zdj6AkKH
+QkJ9BT8Jw7wvOQx/v4Qxrl+gTgcf6gjk6DvzqMlZUwH+ohbALj2TWsy9y+0uHKal
+EVrHpbYeB9TGpD+3NHwO/CG4SySk/Y4nAgMBAAGjezB5MAkGA1UdEwQCMAAwLAYJ
+YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud
+DgQWBBRD/TKyBQyoVxqEupLzUB8hDrSF6DAfBgNVHSMEGDAWgBS1+Ah4ZG58tImL
+KqLVX+xBKbeFUTANBgkqhkiG9w0BAQUFAAOBgQCL2vhjwcJkA1OJGuXsuO2/87Zu
+HMa7gc4pm+Iol1B1gD2ksQEAU2dz/adD3369H7gZdHuk3RYPeYmD5Ppp9eECDsXc
+gNWrNYaqcSTYWRAUe1/St7vB9HzPdOm/eADfQaMnal6fmjfpzTgg65A/2w4GCsqt
+RL98pvdAft8v5WSx7A==
+-----END CERTIFICATE-----
+Bag Attributes
+    friendlyName: 1.2.840.113549.1.9.1=#160f636140677269646761696e2e636f6d,CN=ca,OU=Dev,O=GridGain,L=SPb,ST=SPb,C=RU
+subject=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com
+-----BEGIN CERTIFICATE-----
+MIIDSTCCArKgAwIBAgIJAKmuj925215OMA0GCSqGSIb3DQEBBQUAMHcxCzAJBgNV
+BAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3Jp
+ZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEW
+D2NhQGdyaWRnYWluLmNvbTAeFw0xMjA2MDkwNjU1MTJaFw0zMjA2MDQwNjU1MTJa
+MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8G
+A1UEChMIR3JpZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkq
+hkiG9w0BCQEWD2NhQGdyaWRnYWluLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
+gYkCgYEAtd16DCObyM63NKF/cvRcE+8cr1dc3c7mSnTEQ61WfqPJ2QqsQAB6e+5+
+q9Np1SaJyqFTTag6483ibrU+DkGPGgEXndRHtQHQPbStWsf47DBBW2bMi6+bkPox
+Cp6BhYO1DQUG5tP9CQ/g32mLQLB7LH0KtS1JcKpAClCjjWZC8b8CAwEAAaOB3DCB
+2TAdBgNVHQ4EFgQUtfgIeGRufLSJiyqi1V/sQSm3hVEwgakGA1UdIwSBoTCBnoAU
+tfgIeGRufLSJiyqi1V/sQSm3hVGhe6R5MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQI
+EwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3JpZEdhaW4xDDAKBgNVBAsT
+A0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEWD2NhQGdyaWRnYWluLmNv
+bYIJAKmuj925215OMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAhrzd
+qusVLHO3wtyu0o+EAFyoDv5avCBTFsQLeDDPMyfDcEO6wfxhTanfH8C7gZc0rRnv
+2nbkVbfortHIOfU2wch5gClju0cXSTIXSKOAWPIMp3HLxC/l+KpFo3epFz0rsMVB
+M1ymOOdRDdAcTxcTTGY7WJXquEM3ZbT5Gh4RLDk=
+-----END CERTIFICATE-----
diff --git a/tensorflow/contrib/ignite/python/tests/keystore/server.jks b/tensorflow/contrib/ignite/python/tests/keystore/server.jks
new file mode 100644
index 0000000000000000000000000000000000000000..006ececc31118aa18ddb6e4ec27d002e5e11646c
GIT binary patch
literal 3230
zcmezO_TO6u1_mZLW=c+EU|=-A@M=~y18anysevT}1GBF|6SJp56Vs*z%uI|-Oq>iW
z*Y)4sc{|R}fR~L^tIebBJ1-+6D=ULRxgoa!CmVAp3!5-gP^ck~0T+nFA<P^ckObqg
zLwJG){2)0lVGj4A%oO*;%siL^ZeeDZ)H0Y!%)(5`iH33pGH{zX8O8XM6CKivGE>qM
zGxPM4^K%X4#CZ*k49pBH4a`hU4UMA2d5u9_6DW7!9M(7=IgA)t8JHV;84MabnHn1z
zw%)DcQJ#I`+;)?N^+jJ|gx|}o50ANf?wMD~5$CmGb&F5l<XYpvP*we|ZuRBNt7@I6
z76$vSuzGy<QQlTNKF5A3M)A2<+_y4bu-&pY>iCa029D8bXS&zVp76_%YhGh(^HyF~
zwr7|Ba`Hd8pV{rOp<1VwYpbqj!72x?fW^IOP9OI(F*7nSE^fSI(0J28mW?@7mXAe@
zMP%y_j*67Lnk}8(T8pm6zj4&u-Wq7oxDq6<%+k2fpmAOURy7N&msAc|I5#1sNgj|>
zg;`h)m>C)WBZodKDD)c{+V<RC^;%S?c>AF{>n`=TFvP6j`xmuGAz17PcZI=uWA(!Y
z&bto&N%UQLU;e=E#_1+&CEwpFdlI_+#kz|pEWer^JW<)8nY`Ow+|x+hV{t>oC!Xbn
zr;q4A{jth*arsJdTkQ=;9gSm_Sv(Kqy&&T+F6@(L9WnLQ4rlY!EkC8?1a&MK7#J8C
z*osq&%2JCUd2Mzj0|V18gC?eJ27GK>+H8z0j9N^BjEt-dEKN*{JR;+brdQtCd04*1
z$YblrpVOXB<ym^M`G-LLJTCnxao6PMarN9H70d42Tl!<QJCo;uPp413*vKWxv@}ug
zogeFuTkqM-(ghhc6e`~BpT5^PYKigDv`*d2hEaDO^v=vQ%@*VoD?4J!;h4Io;ZSk#
z`P6w6S$vxB%J+)t=jxQac%*&o!GpcM-!tDgl<AfmaL6cRa+m+|&6~Q>Dssj0qL-e!
z_3tE^3-va%w|`$L;}Nj<>_c{`U$cx$gv{s4>?kxoKIdBIo;jx!9jbh5SLQifJJrzs
zKg>i-)9GBrhpt-!C&V3HuCAQCd+ibRlH+zhVzLkahd3JDTd_wa_k`B9sMrWek?13{
z*dBacz4IgAeb)@<DTjZ#B{%+0+ITE!Q-Ppme*`C2&?~-`MzL)V0<4=(raczZSdsln
zVe`sX^Gzq*^Y&Z6c3UUDGrQ~lJ4sH%6H8t%)lPXj^-QN<z#3!cz5fo}bt$%=wIrqP
ze~swA+9M3_Ss%@r>M~3AX3(WI_H1kwhYD0)`sg3qynpK@nTnN(CM+8|HvZ!6J^p;z
z70HDklrAs3a-g*4a!r-uh2Qu8U1U*M%*r<}VcnA4fROmk&r_Y<Ot0}u+~R6_-0f;I
zQT@#Vt>szlR%axuOEMR(yjB<ff1Pkedcd4l7Gibz?_1wxE_$2N!|6L=s_4G3D;H%r
zeYhV^m$!Q!H8&}A-lhwm4)Sjf?o(RDp0@s1<Z*)~f!CPMJvuqH#BR&WkDiOYH*Z^O
z`m6X<%&SKA!c2!%f&2?Z-p~J%(AH8Z@#mJ@mcFWAXTELN^S3`$>1N0!)447OT5hin
z`e@h^I(x}V?cc3Y48HPiKl(P^C`fr6_w|pjbFf;;&fi9lD<_^(-g^I5=a%BLPyRf3
zdU0>w&2)9mhk?Jp?O3_5|F@rk_~K+{aP0xBEtu{aG%?*YXkv1L)E11YXnAnJ3JF9l
zVQ6SzVg#=xETLS3LRh7Pt@<KZVeuRAf@(BoVQAhqR5DONuC#<9Ssu0G!dE|B`@3^R
z#`A<V;j`wxcVfPOP*qy#_}k^?ua`X%*DCzZp0eu3ybtwSW|sR@z1*D&Ueyb_v>$#x
zU$5)brMsWC-|mr^9a-qdEyI5}#;Jbot6!hlq^dr@w9ngL^i9Q-xAje)zPeoZQTZ3^
zI{SX>N-Unl-@TrBs?fRG_WGF4H4fa*%CAD|hiZdLaGfqI%)(^AV4%asnb79J*!IJT
zk(WhYOxC|3H7_{WN5MTcFSRJKBsE3BIkl)HGc7YYu_P6#T4`B)<&VueNfCvrDJrI6
z;+q!D*D*4X2Nl)IED{D{*y|Lul*8QE$guv)rbS0jOy~Khd5i0YgcQr&WH!FHLBEbS
zt>f>NJd)IQh2=oPhQ#@NHyz)uGMdNwXx?Y07a!}Fce*DBynQM9JI6`FqT^^1yQE9p
z&0|vfQ<(D9Zo~)i9e#GAtb#Qsng8dHFBKsVC+4kv*LYIX@L62#V&#`f_RBX&U0c1a
z>(PbZn?_qjRA{c@OZB6zBIZyQ>NJaoQmbfAcG9xQoh!B}_ieqmB!8{QbKdug03J1V
AGXMYp

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/keystore/trust.jks b/tensorflow/contrib/ignite/python/tests/keystore/trust.jks
new file mode 100644
index 0000000000000000000000000000000000000000..a00f1251af72982ddcd42c0274fc7b16e35dbc4c
GIT binary patch
literal 2432
zcmezO_TO6u1_mYu1_ov@&6J$Tz`$sJ;nlo346G4)rUsS_49va;P0XGKO-!2>Ff%bS
zF>x}iT-Seh=j}K@170>xtu~Lg@4SqRtgH+M<%Zk_oNUaYENsF|L7|2`23#NxhcI(+
zKoX3{4&ezJ@Pp*IggM-cGE>|WGxJ~yxP_TrQp;c_F$*&#CmPBb$iQvlWEA61PIO2w
z%1lX5%*@kE&d)WF6X!KFGB7i+G%zzYH8hG6=QRd#O`zO?b6Dei<S=4nWngaXWiV*$
zWNK_=*m}2$M|t*%bK6Z8))#$=5q>YTK0M~`xo2J_N1WG&)h#}GlWUCwLsj*=y49C6
zuc~#PS{UrR!s_waM|oTA_#FGC7{%vaao@^#!FJ2qsN+B07&u0!o#|dbd%`b6u6d2E
z&0Bd{*`8hg%gO)XerC7BhH9N!uC2PB1*;sm0v7kCIepyE#LURRxVZ6-LE}vWSvKZS
zSw0pq7Llz#I4V-|YPNKCYc0AO|He^sduyOU<4TacGE3t^gT{FcSk)}7UQ#(=;oO9j
zCV4<k6=q>IU}j|ej~x1}pwMq*Xxnpl)oW3m;_Zj-th?0T!Vt59?_bm&g<!EG+!Y4r
zjnxkqIPW_6C((E1efa~s8>gGBm3)7z>`CbM7waybu>5Lv@I+;UX7X-#aZe+0kHrlU
zpLmuRo<5@g^v5dK#pNr-ZM8QXbu^AyX7N0b_kxVSxUf%}b;Q(HJDkl^xBQfn6V$N;
z=QFnCoXpg`5=dT~Ujff+Om_{Mm~I+0F*!l<8lwuD4+boV5qZqe(7?pd9G1r{p<IJP
zSeC_>`3Yusegj@mdB7|T4H-iv0|n$PFANDV)NGG0cU@waJ*LF`pkF}vl<B0ZS+~zV
zydTlud+_;>;>}-9-F+vnl=Q|(^p;SV?%ERZzY3?`&i;A!#xEwPb|<G=R(sCFd-N@N
z>i4%8u8Uvb$1eYh<H-x_&nr$w1~dL!Bz8d0c5>92y{F&m$t+tc7<GK<HaYey$ClV{
zH>u(KqqxIcWy#+@bx7{2HmC%bh_b>gOa=@FI&7Q?Z61tmKb#nOS>(lJ{R>j_f`fe&
z+*9*XixNvxQxu$2i%K%nGLsWaQjs#H^IxM)tUN2irCN4P`Wzsy$hW2Sg@HUMp)0dU
z7>Hrdk!UH0xv`O<`__--gNIU>gFB_3zS;41|L1LaGRJl|p3|KDNMm|HX@l*Ojf@Pz
z>BWDSJKwL{E5Adsq<YC+G5*TV<|n^0zdmH*J9?+#>T1)rRfQ@yA_YXM<1cOBeeg@o
z`I49WD;VxOE>_Qqn?K9^<yi}b*Awh-^RaQATI;gEX4!X#y8HT1Q#QVVr2XR5qO#N?
zNZMcEg*)x5qWNHuq<tfJ{Q^$=Mn+LoOZ$+J!AScMVdB&NwZA)8WIRt;6FzJ1dne}m
z2UVq&j=x=Q{(9LXajnAd>?x~m%==KUWoEfg)yv(f;8ne#OZ(yH^Yyw;UAp^O`|Tcy
z*^z~Q+%o)!W1Q;OzWVi<O{(hiOZ&Y2Mc-6Rd0XG)>8s0iAC-TxuCwpAuEgR={N3xB
zrwW~`ZLg2%T;ss~to$lf(|*h1D}QX(Ns1^`O;Iro6W_FGzK#(&X@C8fO^c46n9lQ0
z^A^_)2`QGl$!vUYgMJ-vTF2ijc_gXr3d@0n4T<ylZaThQWi*fV(Y()0FFw{U?{rTN
zc>7ZHcaD>UMaR)3c1f4Go5!T|r!eKI-G~q3JN)cKSp{oOGXKvXUn)W#PRv{TuJNR%
v;j_5f#mX;}?3Ztly0&^-*P{!+H%&8h)e~82@&Dp^^P<ERetXZ{Q569I9;OV&

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/ignite/python/tests/sql/init.sql b/tensorflow/contrib/ignite/python/tests/sql/init.sql
new file mode 100644
index 0000000000..5a192aef17
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/sql/init.sql
@@ -0,0 +1,20 @@
+-- Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS, 
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+-- ==============================================================================
+
+CREATE TABLE TEST_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR, VAL LONG);
+
+INSERT INTO TEST_CACHE VALUES (1, 'TEST1', 42);
+INSERT INTO TEST_CACHE VALUES (2, 'TEST2', 43);
+INSERT INTO TEST_CACHE VALUES (3, 'TEST3', 44);
diff --git a/tensorflow/contrib/ignite/python/tests/start_ignite.sh b/tensorflow/contrib/ignite/python/tests/start_ignite.sh
new file mode 100755
index 0000000000..fbcf656afd
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/start_ignite.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+IGNITE_VERSION=2.6.0
+SCRIPT_PATH="$( cd "$(dirname "$0")" ; pwd -P )"
+
+# Start Apache Ignite with plain client listener.
+docker run -itd --name ignite-plain -p 42300:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-plain.sh
+
+# Start Apache Ignite with SSL client listener.
+docker run -itd --name ignite-ssl -p 42301:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-ssl.sh
+
+# Start Apache Ignite with SSL client listener with auth.
+docker run -itd --name ignite-ssl-auth -p 42302:10800 \
+-v ${SCRIPT_PATH}:/data apacheignite/ignite:${IGNITE_VERSION} /data/bin/start-ssl-auth.sh
diff --git a/tensorflow/contrib/ignite/python/tests/stop_ignite.sh b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh
new file mode 100755
index 0000000000..8f03dbd1ed
--- /dev/null
+++ b/tensorflow/contrib/ignite/python/tests/stop_ignite.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+docker rm -f ignite-plain
+docker rm -f ignite-ssl
+docker rm -f ignite-ssl-auth
-- 
GitLab


From 28b0608a8536c287b4084449e36fd42b6f4aed5b Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Fri, 24 Aug 2018 18:15:57 +0300
Subject: [PATCH 0045/1085] Remove duplicated header from README.md.

---
 tensorflow/contrib/ignite/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
index 9054344e94..f2596fc572 100644
--- a/tensorflow/contrib/ignite/README.md
+++ b/tensorflow/contrib/ignite/README.md
@@ -1,4 +1,3 @@
-### Ignite Dataset
 # Ignite Dataset
 
 - [Overview](#overview)
@@ -164,4 +163,4 @@ After that you will be able to work with it following way:
 
 ## Limitations
 
-Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
\ No newline at end of file
+Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
-- 
GitLab


From 241c1740ee26b57b7a5fe8f72b9d34f4515af760 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Sun, 26 Aug 2018 16:03:04 +0000
Subject: [PATCH 0046/1085] Update after review: change 'ignite' namespace to
 'tensorflow', rename variables to satisty code style, use pointers instead of
 references.

---
 tensorflow/contrib/ignite/BUILD               |   1 -
 tensorflow/contrib/ignite/__init__.py         |   4 +-
 .../kernels/ignite_binary_object_parser.cc    | 322 +++++++---------
 .../kernels/ignite_binary_object_parser.h     |   9 +-
 .../contrib/ignite/kernels/ignite_client.cc   |  55 ---
 .../contrib/ignite/kernels/ignite_client.h    |  45 ++-
 .../contrib/ignite/kernels/ignite_dataset.cc  | 105 +++--
 .../contrib/ignite/kernels/ignite_dataset.h   |  65 ++--
 .../ignite/kernels/ignite_dataset_iterator.cc | 358 +++++++++---------
 .../ignite/kernels/ignite_dataset_iterator.h  |  80 ++--
 .../ignite/kernels/ignite_dataset_ops.cc      |  10 +-
 .../ignite/kernels/ignite_plain_client.h      |  21 +-
 .../kernels/ignite_plain_client_unix.cc       |  78 ++--
 .../kernels/ignite_plain_client_windows.cc    |  77 ++--
 .../ignite/kernels/ignite_ssl_wrapper.cc      | 107 +++---
 .../ignite/kernels/ignite_ssl_wrapper.h       |  30 +-
 16 files changed, 619 insertions(+), 748 deletions(-)
 delete mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 9f6c666893..b7d40a99f7 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -40,7 +40,6 @@ cc_library(
     srcs = [
         "kernels/ignite_dataset_ops.cc",
         "kernels/ignite_client.h",
-        "kernels/ignite_client.cc",
         "kernels/ignite_plain_client.h",
         "kernels/ignite_ssl_wrapper.h",
         "kernels/ignite_ssl_wrapper.cc",
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
index 468920a557..b78829d0f4 100644
--- a/tensorflow/contrib/ignite/__init__.py
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -30,9 +30,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \
-import IgniteDataset
-
+from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops import IgniteDataset
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
index bf0ef8766e..9bf4480d2d 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -15,290 +15,258 @@ limitations under the License.
 
 #include "ignite_binary_object_parser.h"
 
-namespace ignite {
+namespace tensorflow {
 
-tensorflow::Status BinaryObjectParser::Parse(
-    uint8_t*& ptr, std::vector<tensorflow::Tensor>& out_tensors,
-    std::vector<int32_t>& types) {
-  uint8_t object_type_id = *ptr;
-  ptr += 1;
+Status BinaryObjectParser::Parse(uint8_t** ptr,
+                                 std::vector<Tensor>* out_tensors,
+                                 std::vector<int32_t>* types) {
+  uint8_t object_type_id = **ptr;
+  *ptr += 1;
 
   switch (object_type_id) {
     case BYTE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT8, {});
-      tensor.scalar<tensorflow::uint8>()() = *((uint8_t*)ptr);
-      ptr += 1;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_UINT8, {});
+      tensor.scalar<uint8>()() = *((uint8_t*)*ptr);
+      *ptr += 1;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case SHORT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT16, {});
-      tensor.scalar<tensorflow::int16>()() = *((int16_t*)ptr);
-      ptr += 2;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT16, {});
+      tensor.scalar<int16>()() = *((int16_t*)*ptr);
+      *ptr += 2;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case INT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT32, {});
-      tensor.scalar<tensorflow::int32>()() = *((int32_t*)ptr);
-      ptr += 4;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT32, {});
+      tensor.scalar<int32>()() = *((int32_t*)*ptr);
+      *ptr += 4;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case LONG: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64, {});
-      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT64, {});
+      tensor.scalar<int64>()() = *((int64_t*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case FLOAT: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_FLOAT, {});
-      tensor.scalar<float>()() = *((float*)ptr);
-      ptr += 4;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_FLOAT, {});
+      tensor.scalar<float>()() = *((float*)*ptr);
+      *ptr += 4;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DOUBLE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_DOUBLE, {});
-      tensor.scalar<double>()() = *((double*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_DOUBLE, {});
+      tensor.scalar<double>()() = *((double*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case UCHAR: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT16, {});
-      tensor.scalar<tensorflow::uint16>()() = *((uint16_t*)ptr);
-      ptr += 2;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_UINT16, {});
+      tensor.scalar<uint16>()() = *((uint16_t*)*ptr);
+      *ptr += 2;
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case BOOL: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_BOOL, {});
-      tensor.scalar<bool>()() = *((bool*)ptr);
-      ptr += 1;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_BOOL, {});
+      tensor.scalar<bool>()() = *((bool*)*ptr);
+      *ptr += 1;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case STRING: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_STRING, {});
-      tensor.scalar<std::string>()() = std::string((char*)ptr, length);
-      ptr += length;
-      out_tensors.emplace_back(std::move(tensor));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_STRING, {});
+      tensor.scalar<std::string>()() = std::string((char*)*ptr, length);
+      *ptr += length;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case DATE: {
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64, {});
-      tensor.scalar<tensorflow::int64>()() = *((int64_t*)ptr);
-      ptr += 8;
-      out_tensors.emplace_back(std::move(tensor));
+      Tensor tensor(cpu_allocator(), DT_INT64, {});
+      tensor.scalar<int64>()() = *((int64_t*)*ptr);
+      *ptr += 8;
+      out_tensors->push_back(std::move(tensor));
 
       break;
     }
     case BYTE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT8,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length}));
 
-      uint8_t* arr = (uint8_t*)ptr;
-      ptr += length;
+      uint8_t* arr = (uint8_t*)*ptr;
+      *ptr += length;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::uint8>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<uint8>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case SHORT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT16,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length}));
 
-      int16_t* arr = (int16_t*)ptr;
-      ptr += length * 2;
+      int16_t* arr = (int16_t*)*ptr;
+      *ptr += length * 2;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int16>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int16>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case INT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT32,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length}));
 
-      int32_t* arr = (int32_t*)ptr;
-      ptr += length * 4;
+      int32_t* arr = (int32_t*)*ptr;
+      *ptr += length * 4;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int32>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int32>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case LONG_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
 
-      int64_t* arr = (int64_t*)ptr;
-      ptr += length * 8;
+      int64_t* arr = (int64_t*)*ptr;
+      *ptr += length * 8;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<int64>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case FLOAT_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_FLOAT,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length}));
 
-      float* arr = (float*)ptr;
-      ptr += 4 * length;
+      float* arr = (float*)*ptr;
+      *ptr += 4 * length;
 
       std::copy_n(arr, length, tensor.flat<float>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DOUBLE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_DOUBLE,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length}));
 
-      double* arr = (double*)ptr;
-      ptr += 8 * length;
+      double* arr = (double*)*ptr;
+      *ptr += 8 * length;
 
       std::copy_n(arr, length, tensor.flat<double>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case UCHAR_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_UINT16,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length}));
 
-      uint16_t* arr = (uint16_t*)ptr;
-      ptr += length * 2;
+      uint16_t* arr = (uint16_t*)*ptr;
+      *ptr += length * 2;
 
-      std::copy_n(arr, length, tensor.flat<tensorflow::uint16>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      std::copy_n(arr, length, tensor.flat<uint16>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case BOOL_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_BOOL,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length}));
 
-      bool* arr = (bool*)ptr;
-      ptr += length;
+      bool* arr = (bool*)*ptr;
+      *ptr += length;
 
       std::copy_n(arr, length, tensor.flat<bool>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case STRING_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_STRING,
-                                tensorflow::TensorShape({length}));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length}));
 
       for (int32_t i = 0; i < length; i++) {
-        int32_t str_length = *((int32_t*)ptr);
-        ptr += 4;
-        const int8_t* str = (const int8_t*)ptr;
-        ptr += str_length;
+        int32_t str_length = *((int32_t*)*ptr);
+        *ptr += 4;
+        const int8_t* str = (const int8_t*)*ptr;
+        *ptr += str_length;
         tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
       }
 
-      out_tensors.emplace_back(std::move(tensor));
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case DATE_ARR: {
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      tensorflow::Tensor tensor(tensorflow::cpu_allocator(),
-                                tensorflow::DT_INT64,
-                                tensorflow::TensorShape({length}));
-      int64_t* arr = (int64_t*)ptr;
-      ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<tensorflow::int64>().data());
-      out_tensors.emplace_back(std::move(tensor));
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
+      int64_t* arr = (int64_t*)*ptr;
+      *ptr += length * 8;
+
+      std::copy_n(arr, length, tensor.flat<int64>().data());
+      out_tensors->push_back(std::move(tensor));
       break;
     }
     case WRAPPED_OBJ: {
-      int32_t byte_arr_size = *((int32_t*)ptr);
-      ptr += 4;
+      int32_t byte_arr_size = *((int32_t*)*ptr);
+      *ptr += 4;
 
-      tensorflow::Status status = Parse(ptr, out_tensors, types);
-      if (!status.ok()) return status;
+      TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
 
-      int32_t offset = *((int32_t*)ptr);
-      ptr += 4;
+      int32_t offset = *((int32_t*)*ptr);
+      *ptr += 4;
 
       break;
     }
     case COMPLEX_OBJ: {
-      uint8_t version = *ptr;
-      ptr += 1;
-      int16_t flags = *((int16_t*)ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
-      ptr += 2;
-      int32_t type_id = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t hash_code = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t length = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t schema_id = *((int32_t*)ptr);
-      ptr += 4;
-      int32_t schema_offset = *((int32_t*)ptr);
-      ptr += 4;
-
-      uint8_t* end = ptr + schema_offset - 24;
+      uint8_t version = **ptr;
+      *ptr += 1;
+      int16_t flags = *((int16_t*)*ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
+      *ptr += 2;
+      int32_t type_id = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t hash_code = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t length = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t schema_id = *((int32_t*)*ptr);
+      *ptr += 4;
+      int32_t schema_offset = *((int32_t*)*ptr);
+      *ptr += 4;
+
+      uint8_t* end = *ptr + schema_offset - 24;
       int32_t i = 0;
-      while (ptr < end) {
+      while (*ptr < end) {
         i++;
-        tensorflow::Status status = Parse(ptr, out_tensors, types);
-        if (!status.ok()) return status;
+        TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
       }
 
-      ptr += (length - schema_offset);
+      *ptr += (length - schema_offset);
 
       break;
     }
     default: {
-      return tensorflow::errors::Internal("Unknowd binary type (type id ",
-                                          (int)object_type_id, ")");
+      return errors::Internal("Unknowd binary type (type id ",
+                              (int)object_type_id, ")");
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
index 1e845cbc56..9accbd796f 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -17,13 +17,12 @@ limitations under the License.
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/lib/core/status.h"
 
-namespace ignite {
+namespace tensorflow {
 
 class BinaryObjectParser {
  public:
-  tensorflow::Status Parse(uint8_t*& ptr,
-                           std::vector<tensorflow::Tensor>& out_tensors,
-                           std::vector<int32_t>& types);
+  Status Parse(uint8_t** ptr, std::vector<Tensor>* out_tensors,
+               std::vector<int32_t>* types);
 };
 
 enum ObjectType {
@@ -51,4 +50,4 @@ enum ObjectType {
   COMPLEX_OBJ = 103
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc
deleted file mode 100644
index 5a8eddb944..0000000000
--- a/tensorflow/contrib/ignite/kernels/ignite_client.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
-#include "ignite_client.h"
-#endif
-
-namespace ignite {
-
-tensorflow::Status Client::ReadByte(uint8_t& data) {
-  return ReadData((uint8_t*)&data, 1);
-}
-
-tensorflow::Status Client::ReadShort(int16_t& data) {
-  return ReadData((uint8_t*)&data, 2);
-}
-
-tensorflow::Status Client::ReadInt(int32_t& data) {
-  return ReadData((uint8_t*)&data, 4);
-}
-
-tensorflow::Status Client::ReadLong(int64_t& data) {
-  return ReadData((uint8_t*)&data, 8);
-}
-
-tensorflow::Status Client::WriteByte(uint8_t data) {
-  return WriteData((uint8_t*)&data, 1);
-}
-
-tensorflow::Status Client::WriteShort(int16_t data) {
-  return WriteData((uint8_t*)&data, 2);
-}
-
-tensorflow::Status Client::WriteInt(int32_t data) {
-  return WriteData((uint8_t*)&data, 4);
-}
-
-tensorflow::Status Client::WriteLong(int64_t data) {
-  return WriteData((uint8_t*)&data, 8);
-}
-
-}  // namespace ignite
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
index 64e28d75f0..944b3fe184 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -13,28 +13,43 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
+
 #include "tensorflow/core/lib/core/status.h"
 
-namespace ignite {
+namespace tensorflow {
 
 class Client {
  public:
-  virtual tensorflow::Status Connect() = 0;
-  virtual tensorflow::Status Disconnect() = 0;
+  virtual Status Connect() = 0;
+  virtual Status Disconnect() = 0;
   virtual bool IsConnected() = 0;
   virtual int GetSocketDescriptor() = 0;
+  virtual Status ReadData(uint8_t* buf, int32_t length) = 0;
+  virtual Status WriteData(uint8_t* buf, int32_t length) = 0;
+
+  inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); }
+
+  inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); }
+
+  inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); }
+
+  inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); }
 
-  virtual tensorflow::Status ReadByte(uint8_t& data);
-  virtual tensorflow::Status ReadShort(int16_t& data);
-  virtual tensorflow::Status ReadInt(int32_t& data);
-  virtual tensorflow::Status ReadLong(int64_t& data);
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0;
-
-  virtual tensorflow::Status WriteByte(uint8_t data);
-  virtual tensorflow::Status WriteShort(int16_t data);
-  virtual tensorflow::Status WriteInt(int32_t data);
-  virtual tensorflow::Status WriteLong(int64_t data);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0;
+  inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); }
+
+  inline Status WriteShort(int16_t data) {
+    return WriteData((uint8_t*)&data, 2);
+  }
+
+  inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); }
+
+  inline Status WriteLong(int64_t data) {
+    return WriteData((uint8_t*)&data, 8);
+  }
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
+
+#endif
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
index a9bf26955b..f25f8a5b18 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -16,31 +16,29 @@ limitations under the License.
 #include "ignite_dataset_iterator.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
-IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
-                             std::string cache_name, std::string host,
-                             tensorflow::int32 port, bool local,
-                             tensorflow::int32 part,
-                             tensorflow::int32 page_size, std::string username,
+IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name,
+                             std::string host, int32 port, bool local,
+                             int32 part, int32 page_size, std::string username,
                              std::string password, std::string certfile,
                              std::string keyfile, std::string cert_password,
-                             std::vector<tensorflow::int32> schema,
-                             std::vector<tensorflow::int32> permutation)
-    : DatasetBase(tensorflow::DatasetContext(ctx)),
-      cache_name(cache_name),
-      host(host),
-      port(port),
-      local(local),
-      part(part),
-      page_size(page_size),
-      username(username),
-      password(password),
-      certfile(certfile),
-      keyfile(keyfile),
-      cert_password(cert_password),
-      schema(schema),
-      permutation(permutation) {
+                             std::vector<int32> schema,
+                             std::vector<int32> permutation)
+    : DatasetBase(DatasetContext(ctx)),
+      cache_name_(cache_name),
+      host_(host),
+      port_(port),
+      local_(local),
+      part_(part),
+      page_size_(page_size),
+      username_(username),
+      password_(password),
+      certfile_(certfile),
+      keyfile_(keyfile),
+      cert_password_(cert_password),
+      schema_(schema),
+      permutation_(permutation) {
   SchemaToTypes();
   SchemaToShapes();
 
@@ -53,55 +51,50 @@ IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx,
 
 IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
 
-std::unique_ptr<tensorflow::IteratorBase> IgniteDataset::MakeIteratorInternal(
-    const tensorflow::string& prefix) const {
-  return std::unique_ptr<tensorflow::IteratorBase>(new IgniteDatasetIterator(
-      {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host,
-      this->port, this->cache_name, this->local, this->part, this->page_size,
-      this->username, this->password, this->certfile, this->keyfile,
-      this->cert_password, this->schema, this->permutation));
+std::unique_ptr<IteratorBase> IgniteDataset::MakeIteratorInternal(
+    const string& prefix) const {
+  return std::unique_ptr<IteratorBase>(new IgniteDatasetIterator(
+      {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_,
+      this->cache_name_, this->local_, this->part_, this->page_size_,
+      this->username_, this->password_, this->certfile_, this->keyfile_,
+      this->cert_password_, this->schema_, this->permutation_));
 }
 
-const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const {
-  return dtypes;
-}
+const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; }
 
-const std::vector<tensorflow::PartialTensorShape>&
-IgniteDataset::output_shapes() const {
-  return shapes;
+const std::vector<PartialTensorShape>& IgniteDataset::output_shapes() const {
+  return shapes_;
 }
 
-tensorflow::string IgniteDataset::DebugString() const {
-  return "IgniteDatasetOp::Dataset";
-}
+string IgniteDataset::DebugString() const { return "IgniteDatasetOp::Dataset"; }
 
-tensorflow::Status IgniteDataset::AsGraphDefInternal(
-    tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
-    tensorflow::Node** output) const {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx,
+                                         DatasetGraphDefBuilder* b,
+                                         Node** output) const {
+  return errors::Unimplemented(
       "IgniteDataset does not support 'AsGraphDefInternal'");
 }
 
 void IgniteDataset::SchemaToTypes() {
-  for (auto e : schema) {
+  for (auto e : schema_) {
     if (e == BYTE || e == BYTE_ARR) {
-      dtypes.push_back(tensorflow::DT_UINT8);
+      dtypes_.push_back(DT_UINT8);
     } else if (e == SHORT || e == SHORT_ARR) {
-      dtypes.push_back(tensorflow::DT_INT16);
+      dtypes_.push_back(DT_INT16);
     } else if (e == INT || e == INT_ARR) {
-      dtypes.push_back(tensorflow::DT_INT32);
+      dtypes_.push_back(DT_INT32);
     } else if (e == LONG || e == LONG_ARR) {
-      dtypes.push_back(tensorflow::DT_INT64);
+      dtypes_.push_back(DT_INT64);
     } else if (e == FLOAT || e == FLOAT_ARR) {
-      dtypes.push_back(tensorflow::DT_FLOAT);
+      dtypes_.push_back(DT_FLOAT);
     } else if (e == DOUBLE || e == DOUBLE_ARR) {
-      dtypes.push_back(tensorflow::DT_DOUBLE);
+      dtypes_.push_back(DT_DOUBLE);
     } else if (e == UCHAR || e == UCHAR_ARR) {
-      dtypes.push_back(tensorflow::DT_UINT8);
+      dtypes_.push_back(DT_UINT8);
     } else if (e == BOOL || e == BOOL_ARR) {
-      dtypes.push_back(tensorflow::DT_BOOL);
+      dtypes_.push_back(DT_BOOL);
     } else if (e == STRING || e == STRING_ARR) {
-      dtypes.push_back(tensorflow::DT_STRING);
+      dtypes_.push_back(DT_STRING);
     } else {
       LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
     }
@@ -109,15 +102,15 @@ void IgniteDataset::SchemaToTypes() {
 }
 
 void IgniteDataset::SchemaToShapes() {
-  for (auto e : schema) {
+  for (auto e : schema_) {
     if (e >= 1 && e < 10) {
-      shapes.push_back(tensorflow::PartialTensorShape({}));
+      shapes_.push_back(PartialTensorShape({}));
     } else if (e >= 12 && e < 21) {
-      shapes.push_back(tensorflow::PartialTensorShape({-1}));
+      shapes_.push_back(PartialTensorShape({-1}));
     } else {
       LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
     }
   }
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
index 2120dfd342..d3fec5910b 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -15,51 +15,48 @@ limitations under the License.
 
 #include "tensorflow/core/framework/dataset.h"
 
-namespace ignite {
+namespace tensorflow {
 
-class IgniteDataset : public tensorflow::DatasetBase {
+class IgniteDataset : public DatasetBase {
  public:
-  IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name,
-                std::string host, tensorflow::int32 port, bool local,
-                tensorflow::int32 part, tensorflow::int32 page_size,
+  IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host,
+                int32 port, bool local, int32 part, int32 page_size,
                 std::string username, std::string password,
                 std::string certfile, std::string keyfile,
-                std::string cert_password,
-                std::vector<tensorflow::int32> schema,
-                std::vector<tensorflow::int32> permutation);
+                std::string cert_password, std::vector<int32> schema,
+                std::vector<int32> permutation);
   ~IgniteDataset();
-  std::unique_ptr<tensorflow::IteratorBase> MakeIteratorInternal(
-      const tensorflow::string& prefix) const override;
-  const tensorflow::DataTypeVector& output_dtypes() const override;
-  const std::vector<tensorflow::PartialTensorShape>& output_shapes()
-      const override;
-  tensorflow::string DebugString() const override;
+  std::unique_ptr<IteratorBase> MakeIteratorInternal(
+      const string& prefix) const override;
+  const DataTypeVector& output_dtypes() const override;
+  const std::vector<PartialTensorShape>& output_shapes() const override;
+  string DebugString() const override;
 
  protected:
-  tensorflow::Status AsGraphDefInternal(
-      tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b,
-      tensorflow::Node** output) const override;
+  Status AsGraphDefInternal(SerializationContext* ctx,
+                            DatasetGraphDefBuilder* b,
+                            Node** output) const override;
 
  private:
-  const std::string cache_name;
-  const std::string host;
-  const tensorflow::int32 port;
-  const bool local;
-  const tensorflow::int32 part;
-  const tensorflow::int32 page_size;
-  const std::string username;
-  const std::string password;
-  const std::string certfile;
-  const std::string keyfile;
-  const std::string cert_password;
-  const std::vector<tensorflow::int32> schema;
-  const std::vector<tensorflow::int32> permutation;
-
-  tensorflow::DataTypeVector dtypes;
-  std::vector<tensorflow::PartialTensorShape> shapes;
+  const std::string cache_name_;
+  const std::string host_;
+  const int32 port_;
+  const bool local_;
+  const int32 part_;
+  const int32 page_size_;
+  const std::string username_;
+  const std::string password_;
+  const std::string certfile_;
+  const std::string keyfile_;
+  const std::string cert_password_;
+  const std::vector<int32> schema_;
+  const std::vector<int32> permutation_;
+
+  DataTypeVector dtypes_;
+  std::vector<PartialTensorShape> shapes_;
 
   void SchemaToTypes();
   void SchemaToShapes();
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
index 03cc3c1291..1774585ecd 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -22,270 +22,262 @@ limitations under the License.
 #include <time.h>
 #include <chrono>
 
-namespace ignite {
-
-#define CHECK_STATUS(status) \
-  if (!status.ok()) return status;
+namespace tensorflow {
 
 IgniteDatasetIterator::IgniteDatasetIterator(
-    const Params& params, std::string host, tensorflow::int32 port,
-    std::string cache_name, bool local, tensorflow::int32 part,
-    tensorflow::int32 page_size, std::string username, std::string password,
-    std::string certfile, std::string keyfile, std::string cert_password,
-    std::vector<tensorflow::int32> schema,
-    std::vector<tensorflow::int32> permutation)
-    : tensorflow::DatasetIterator<IgniteDataset>(params),
-      cache_name(cache_name),
-      local(local),
-      part(part),
-      page_size(page_size),
-      username(username),
-      password(password),
-      schema(schema),
-      permutation(permutation),
-      remainder(-1),
-      cursor_id(-1),
-      last_page(false) {
+    const Params& params, std::string host, int32 port, std::string cache_name,
+    bool local, int32 part, int32 page_size, std::string username,
+    std::string password, std::string certfile, std::string keyfile,
+    std::string cert_password, std::vector<int32> schema,
+    std::vector<int32> permutation)
+    : DatasetIterator<IgniteDataset>(params),
+      cache_name_(cache_name),
+      local_(local),
+      part_(part),
+      page_size_(page_size),
+      username_(username),
+      password_(password),
+      schema_(schema),
+      permutation_(permutation),
+      remainder_(-1),
+      cursor_id_(-1),
+      last_page_(false) {
   Client* p_client = new PlainClient(host, port);
 
   if (certfile.empty())
-    client = std::unique_ptr<Client>(p_client);
+    client_ = std::unique_ptr<Client>(p_client);
   else
-    client = std::unique_ptr<Client>(new SslWrapper(
+    client_ = std::unique_ptr<Client>(new SslWrapper(
         std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
 
   LOG(INFO) << "Ignite Dataset Iterator created";
 }
 
 IgniteDatasetIterator::~IgniteDatasetIterator() {
-  tensorflow::Status status = CloseConnection();
+  Status status = CloseConnection();
   if (!status.ok()) LOG(ERROR) << status.ToString();
 
   LOG(INFO) << "Ignite Dataset Iterator destroyed";
 }
 
-tensorflow::Status IgniteDatasetIterator::EstablishConnection() {
-  if (!client->IsConnected()) {
-    tensorflow::Status status = client->Connect();
+Status IgniteDatasetIterator::EstablishConnection() {
+  if (!client_->IsConnected()) {
+    Status status = client_->Connect();
     if (!status.ok()) return status;
 
     status = Handshake();
     if (!status.ok()) {
-      tensorflow::Status disconnect_status = client->Disconnect();
+      Status disconnect_status = client_->Disconnect();
       if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
 
       return status;
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::CloseConnection() {
-  if (cursor_id != -1 && !last_page) {
-    tensorflow::Status conn_status = EstablishConnection();
+Status IgniteDatasetIterator::CloseConnection() {
+  if (cursor_id_ != -1 && !last_page_) {
+    Status conn_status = EstablishConnection();
     if (!conn_status.ok()) return conn_status;
 
-    CHECK_STATUS(client->WriteInt(18));  // Message length
-    CHECK_STATUS(
-        client->WriteShort(close_connection_opcode));  // Operation code
-    CHECK_STATUS(client->WriteLong(0));                // Request ID
-    CHECK_STATUS(client->WriteLong(cursor_id));        // Resource ID
+    TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
+    TF_RETURN_IF_ERROR(
+        client_->WriteShort(close_connection_opcode));   // Operation code
+    TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
+    TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Resource ID
 
     int32_t res_len;
-    CHECK_STATUS(client->ReadInt(res_len));
+    TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
     if (res_len < 12)
-      return tensorflow::errors::Internal(
-          "Close Resource Response is corrupted");
+      return errors::Internal("Close Resource Response is corrupted");
 
     int64_t req_id;
-    CHECK_STATUS(client->ReadLong(req_id));
+    TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
     int32_t status;
-    CHECK_STATUS(client->ReadInt(status));
+    TF_RETURN_IF_ERROR(client_->ReadInt(&status));
     if (status != 0) {
       uint8_t err_msg_header;
-      CHECK_STATUS(client->ReadByte(err_msg_header));
+      TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
       if (err_msg_header == string_val) {
         int32_t err_msg_length;
-        CHECK_STATUS(client->ReadInt(err_msg_length));
+        TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
         uint8_t* err_msg_c = new uint8_t[err_msg_length];
-        CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+        TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
         std::string err_msg((char*)err_msg_c, err_msg_length);
         delete[] err_msg_c;
 
-        return tensorflow::errors::Internal("Close Resource Error [status=",
-                                            status, ", message=", err_msg, "]");
+        return errors::Internal("Close Resource Error [status=", status,
+                                ", message=", err_msg, "]");
       }
-      return tensorflow::errors::Internal("Close Resource Error [status=",
-                                          status, "]");
+      return errors::Internal("Close Resource Error [status=", status, "]");
     }
 
-    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
 
-    cursor_id = -1;
+    cursor_id_ = -1;
 
-    return client->Disconnect();
+    return client_->Disconnect();
   } else {
-    LOG(INFO) << "Query Cursor " << cursor_id << " is already closed";
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is already closed";
   }
 
-  return client->IsConnected() ? client->Disconnect()
-                               : tensorflow::Status::OK();
+  return client_->IsConnected() ? client_->Disconnect() : Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::GetNextInternal(
-    tensorflow::IteratorContext* ctx,
-    std::vector<tensorflow::Tensor>* out_tensors, bool* end_of_sequence) {
-  if (remainder == 0 && last_page) {
-    LOG(INFO) << "Query Cursor " << cursor_id << " is closed";
+Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
+                                              std::vector<Tensor>* out_tensors,
+                                              bool* end_of_sequence) {
+  if (remainder_ == 0 && last_page_) {
+    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
 
-    cursor_id = -1;
+    cursor_id_ = -1;
     *end_of_sequence = true;
-    return tensorflow::Status::OK();
+    return Status::OK();
   } else {
-    tensorflow::Status status = EstablishConnection();
+    Status status = EstablishConnection();
     if (!status.ok()) return status;
 
-    if (remainder == -1 || remainder == 0) {
-      tensorflow::Status status =
-          remainder == -1 ? ScanQuery() : LoadNextPage();
+    if (remainder_ == -1 || remainder_ == 0) {
+      Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage();
       if (!status.ok()) return status;
     }
 
-    uint8_t* initial_ptr = ptr;
+    uint8_t* initial_ptr = ptr_;
     std::vector<int32_t> types;
-    std::vector<tensorflow::Tensor> tensors;
+    std::vector<Tensor> tensors;
 
-    status = parser.Parse(ptr, tensors, types);  // Parse key
+    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse key
     if (!status.ok()) return status;
 
-    status = parser.Parse(ptr, tensors, types);  // Parse val
+    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse val
     if (!status.ok()) return status;
 
-    remainder -= (ptr - initial_ptr);
+    remainder_ -= (ptr_ - initial_ptr);
 
     out_tensors->resize(tensors.size());
     for (int32_t i = 0; i < tensors.size(); i++)
-      (*out_tensors)[permutation[i]] = std::move(tensors[i]);
+      (*out_tensors)[permutation_[i]] = std::move(tensors[i]);
 
     *end_of_sequence = false;
-    return tensorflow::Status::OK();
+    return Status::OK();
   }
 
   *end_of_sequence = true;
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::SaveInternal(
-    tensorflow::IteratorStateWriter* writer) {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
+  return errors::Unimplemented(
       "Iterator for IgniteDataset does not support 'SaveInternal'");
 }
 
-tensorflow::Status IgniteDatasetIterator::RestoreInternal(
-    tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) {
-  return tensorflow::errors::Unimplemented(
+Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
+                                              IteratorStateReader* reader) {
+  return errors::Unimplemented(
       "Iterator for IgniteDataset does not support 'RestoreInternal')");
 }
 
-tensorflow::Status IgniteDatasetIterator::Handshake() {
+Status IgniteDatasetIterator::Handshake() {
   int32_t msg_len = 8;
 
-  if (username.empty())
+  if (username_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + username.length();
+    msg_len += 5 + username_.length();
 
-  if (password.empty())
+  if (password_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + password.length();
-
-  CHECK_STATUS(client->WriteInt(msg_len));
-  CHECK_STATUS(client->WriteByte(1));
-  CHECK_STATUS(client->WriteShort(protocol_major_version));
-  CHECK_STATUS(client->WriteShort(protocol_minor_version));
-  CHECK_STATUS(client->WriteShort(protocol_patch_version));
-  CHECK_STATUS(client->WriteByte(2));
-  if (username.empty()) {
-    CHECK_STATUS(client->WriteByte(null_val));
+    msg_len += 5 + password_.length();
+
+  TF_RETURN_IF_ERROR(client_->WriteInt(msg_len));
+  TF_RETURN_IF_ERROR(client_->WriteByte(1));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version));
+  TF_RETURN_IF_ERROR(client_->WriteByte(2));
+  if (username_.empty()) {
+    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
   } else {
-    CHECK_STATUS(client->WriteByte(string_val));
-    CHECK_STATUS(client->WriteInt(username.length()));
-    CHECK_STATUS(
-        client->WriteData((uint8_t*)username.c_str(), username.length()));
+    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteInt(username_.length()));
+    TF_RETURN_IF_ERROR(
+        client_->WriteData((uint8_t*)username_.c_str(), username_.length()));
   }
 
-  if (password.empty()) {
-    CHECK_STATUS(client->WriteByte(null_val));
+  if (password_.empty()) {
+    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
   } else {
-    CHECK_STATUS(client->WriteByte(string_val));
-    CHECK_STATUS(client->WriteInt(password.length()));
-    CHECK_STATUS(
-        client->WriteData((uint8_t*)password.c_str(), password.length()));
+    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteInt(password_.length()));
+    TF_RETURN_IF_ERROR(
+        client_->WriteData((uint8_t*)password_.c_str(), password_.length()));
   }
 
   int32_t handshake_res_len;
-  CHECK_STATUS(client->ReadInt(handshake_res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&handshake_res_len));
   uint8_t handshake_res;
-  CHECK_STATUS(client->ReadByte(handshake_res));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res));
 
   LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
             << (int16_t)handshake_res;
 
   if (handshake_res != 1) {
     int16_t serv_ver_major;
-    CHECK_STATUS(client->ReadShort(serv_ver_major));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major));
     int16_t serv_ver_minor;
-    CHECK_STATUS(client->ReadShort(serv_ver_minor));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_minor));
     int16_t serv_ver_patch;
-    CHECK_STATUS(client->ReadShort(serv_ver_patch));
+    TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_patch));
     uint8_t header;
-    CHECK_STATUS(client->ReadByte(header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&header));
 
     if (header == string_val) {
       int32_t length;
-      CHECK_STATUS(client->ReadInt(length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&length));
       uint8_t* err_msg_c = new uint8_t[length];
-      CHECK_STATUS(client->ReadData(err_msg_c, length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length));
       std::string err_msg((char*)err_msg_c, length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch,
-          ", message='", err_msg, "']");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, ", message='", err_msg,
+                              "']");
     } else if (header == null_val) {
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, "]");
     } else {
-      return tensorflow::errors::Internal(
-          "Handshake Error [result=", handshake_res, ", version=",
-          serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]");
+      return errors::Internal("Handshake Error [result=", handshake_res,
+                              ", version=", serv_ver_major, ".", serv_ver_minor,
+                              ".", serv_ver_patch, "]");
     }
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::ScanQuery() {
-  CHECK_STATUS(client->WriteInt(25));                        // Message length
-  CHECK_STATUS(client->WriteShort(scan_query_opcode));       // Operation code
-  CHECK_STATUS(client->WriteLong(0));                        // Request ID
-  CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name)));  // Cache name
-  CHECK_STATUS(client->WriteByte(0));                        // Flags
-  CHECK_STATUS(client->WriteByte(null_val));                 // Filter object
-  CHECK_STATUS(client->WriteInt(page_size));                 // Cursor page size
-  CHECK_STATUS(client->WriteInt(part));    // Partition to query
-  CHECK_STATUS(client->WriteByte(local));  // Local flag
+Status IgniteDatasetIterator::ScanQuery() {
+  TF_RETURN_IF_ERROR(client_->WriteInt(25));                   // Message length
+  TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode));  // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));                   // Request ID
+  TF_RETURN_IF_ERROR(
+      client_->WriteInt(JavaHashCode(cache_name_)));  // Cache name
+  TF_RETURN_IF_ERROR(client_->WriteByte(0));          // Flags
+  TF_RETURN_IF_ERROR(client_->WriteByte(null_val));   // Filter object
+  TF_RETURN_IF_ERROR(client_->WriteInt(page_size_));  // Cursor page size
+  TF_RETURN_IF_ERROR(client_->WriteInt(part_));       // part_ition to query
+  TF_RETURN_IF_ERROR(client_->WriteByte(local_));     // local_ flag
 
   int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
                            std::chrono::system_clock::now().time_since_epoch())
                            .count();
 
   int32_t res_len;
-  CHECK_STATUS(client->ReadInt(res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
 
   int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                           std::chrono::system_clock::now().time_since_epoch())
@@ -293,82 +285,81 @@ tensorflow::Status IgniteDatasetIterator::ScanQuery() {
 
   LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
 
-  if (res_len < 12)
-    return tensorflow::errors::Internal("Scan Query Response is corrupted");
+  if (res_len < 12) return errors::Internal("Scan Query Response is corrupted");
 
   int64_t req_id;
-  CHECK_STATUS(client->ReadLong(req_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
 
   int32_t status;
-  CHECK_STATUS(client->ReadInt(status));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&status));
 
   if (status != 0) {
     uint8_t err_msg_header;
-    CHECK_STATUS(client->ReadByte(err_msg_header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
     if (err_msg_header == string_val) {
       int32_t err_msg_length;
-      CHECK_STATUS(client->ReadInt(err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
-      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
       std::string err_msg((char*)err_msg_c, err_msg_length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal("Scan Query Error [status=", status,
-                                          ", message=", err_msg, "]");
+      return errors::Internal("Scan Query Error [status=", status, ", message=",
+                              err_msg, "]");
     }
-    return tensorflow::errors::Internal("Scan Query Error [status=", status,
-                                        "]");
+    return errors::Internal("Scan Query Error [status=", status, "]");
   }
 
-  CHECK_STATUS(client->ReadLong(cursor_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_));
 
-  LOG(INFO) << "Query Cursor " << cursor_id << " is opened";
+  LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened";
 
   int32_t row_cnt;
-  CHECK_STATUS(client->ReadInt(row_cnt));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder = res_len - 25;
-  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
-  ptr = page.get();
+  remainder_ = res_len - 25;
+  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
+  ptr_ = page_.get();
 
   int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
                       std::chrono::system_clock::now().time_since_epoch())
                       .count();
 
-  CHECK_STATUS(client->ReadData(ptr, remainder));
+  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
 
   int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                      std::chrono::system_clock::now().time_since_epoch())
                      .count();
   ;
 
-  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
   double time_in_s = 1.0 * (stop - start) / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
   uint8_t last_page_b;
-  CHECK_STATUS(client->ReadByte(last_page_b));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
 
-  last_page = !last_page_b;
+  last_page_ = !last_page_b;
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
-  CHECK_STATUS(client->WriteInt(18));                       // Message length
-  CHECK_STATUS(client->WriteShort(load_next_page_opcode));  // Operation code
-  CHECK_STATUS(client->WriteLong(0));                       // Request ID
-  CHECK_STATUS(client->WriteLong(cursor_id));               // Cursor ID
+Status IgniteDatasetIterator::LoadNextPage() {
+  TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
+  TF_RETURN_IF_ERROR(
+      client_->WriteShort(load_next_page_opcode));     // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
+  TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Cursor ID
 
   int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
                            std::chrono::system_clock::now().time_since_epoch())
                            .count();
 
   int32_t res_len;
-  CHECK_STATUS(client->ReadInt(res_len));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
 
   int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                           std::chrono::system_clock::now().time_since_epoch())
@@ -377,66 +368,65 @@ tensorflow::Status IgniteDatasetIterator::LoadNextPage() {
   LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
 
   if (res_len < 12)
-    return tensorflow::errors::Internal("Load Next Page Response is corrupted");
+    return errors::Internal("Load Next Page Response is corrupted");
 
   int64_t req_id;
-  CHECK_STATUS(client->ReadLong(req_id));
+  TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
 
   int32_t status;
-  CHECK_STATUS(client->ReadInt(status));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&status));
 
   if (status != 0) {
     uint8_t err_msg_header;
-    CHECK_STATUS(client->ReadByte(err_msg_header));
+    TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
     if (err_msg_header == string_val) {
       int32_t err_msg_length;
-      CHECK_STATUS(client->ReadInt(err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
-      CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length));
+      TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
       std::string err_msg((char*)err_msg_c, err_msg_length);
       delete[] err_msg_c;
 
-      return tensorflow::errors::Internal("Load Next Page Error [status=",
-                                          status, ", message=", err_msg, "]");
+      return errors::Internal("Load Next Page Error [status=", status,
+                              ", message=", err_msg, "]");
     }
-    return tensorflow::errors::Internal("Load Next Page Error [status=", status,
-                                        "]");
+    return errors::Internal("Load Next Page Error [status=", status, "]");
   }
 
   int32_t row_cnt;
-  CHECK_STATUS(client->ReadInt(row_cnt));
+  TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder = res_len - 17;
-  page = std::unique_ptr<uint8_t>(new uint8_t[remainder]);
-  ptr = page.get();
+  remainder_ = res_len - 17;
+  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
+  ptr_ = page_.get();
 
   int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
                       std::chrono::system_clock::now().time_since_epoch())
                       .count();
 
-  CHECK_STATUS(client->ReadData(ptr, remainder));
+  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
 
   int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
                      std::chrono::system_clock::now().time_since_epoch())
                      .count();
   ;
 
-  double size_in_mb = 1.0 * remainder / 1024 / 1024;
+  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
   double time_in_s = 1.0 * (stop - start) / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
   uint8_t last_page_b;
-  CHECK_STATUS(client->ReadByte(last_page_b));
+  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
 
-  last_page = !last_page_b;
+  last_page_ = !last_page_b;
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
+int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const {
   int32_t h = 0;
   for (char& c : str) {
     h = 31 * h + c;
@@ -444,4 +434,4 @@ int32_t IgniteDatasetIterator::JavaHashCode(std::string str) {
   return h;
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
index d1df4527f9..5858dbfcb9 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -14,65 +14,55 @@ limitations under the License.
 ==============================================================================*/
 
 #include "ignite_binary_object_parser.h"
-#include "ignite_dataset.h"
-
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
+#include "ignite_dataset.h"
 
-namespace ignite {
+namespace tensorflow {
 
-class IgniteDatasetIterator
-    : public tensorflow::DatasetIterator<IgniteDataset> {
+class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
  public:
-  IgniteDatasetIterator(const Params& params, std::string host,
-                        tensorflow::int32 port, std::string cache_name,
-                        bool local, tensorflow::int32 part,
-                        tensorflow::int32 page_size, std::string username,
+  IgniteDatasetIterator(const Params& params, std::string host, int32 port,
+                        std::string cache_name, bool local, int32 part,
+                        int32 page_size, std::string username,
                         std::string password, std::string certfile,
                         std::string keyfile, std::string cert_password,
-                        std::vector<tensorflow::int32> schema,
-                        std::vector<tensorflow::int32> permutation);
+                        std::vector<int32> schema,
+                        std::vector<int32> permutation);
   ~IgniteDatasetIterator();
-  tensorflow::Status GetNextInternal(
-      tensorflow::IteratorContext* ctx,
-      std::vector<tensorflow::Tensor>* out_tensors,
-      bool* end_of_sequence) override;
+  Status GetNextInternal(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+                         bool* end_of_sequence) override;
 
  protected:
-  tensorflow::Status SaveInternal(
-      tensorflow::IteratorStateWriter* writer) override;
-  tensorflow::Status RestoreInternal(
-      tensorflow::IteratorContext* ctx,
-      tensorflow::IteratorStateReader* reader) override;
+  Status SaveInternal(IteratorStateWriter* writer) override;
+  Status RestoreInternal(IteratorContext* ctx,
+                         IteratorStateReader* reader) override;
 
  private:
-  std::unique_ptr<Client> client;
-  BinaryObjectParser parser;
+  std::unique_ptr<Client> client_;
+  BinaryObjectParser parser_;
 
-  const std::string cache_name;
-  const bool local;
-  const tensorflow::int32 part;
-  const tensorflow::int32 page_size;
-  const std::string username;
-  const std::string password;
-  const std::vector<tensorflow::int32> schema;
-  const std::vector<tensorflow::int32> permutation;
+  const std::string cache_name_;
+  const bool local_;
+  const int32 part_;
+  const int32 page_size_;
+  const std::string username_;
+  const std::string password_;
+  const std::vector<int32> schema_;
+  const std::vector<int32> permutation_;
 
-  int32_t remainder;
-  int64_t cursor_id;
-  bool last_page;
+  int32_t remainder_;
+  int64_t cursor_id_;
+  bool last_page_;
 
-  std::unique_ptr<uint8_t> page;
-  uint8_t* ptr;
+  std::unique_ptr<uint8_t> page_;
+  uint8_t* ptr_;
 
-  tensorflow::Status EstablishConnection();
-  tensorflow::Status CloseConnection();
-  tensorflow::Status Handshake();
-  tensorflow::Status ScanQuery();
-  tensorflow::Status LoadNextPage();
-  int32_t JavaHashCode(std::string str);
+  Status EstablishConnection();
+  Status CloseConnection();
+  Status Handshake();
+  Status ScanQuery();
+  Status LoadNextPage();
+  int32_t JavaHashCode(std::string str) const;
 };
 
 constexpr uint8_t null_val = 101;
@@ -84,4 +74,4 @@ constexpr int16_t scan_query_opcode = 2000;
 constexpr int16_t load_next_page_opcode = 2001;
 constexpr int16_t close_connection_opcode = 0;
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index 543b5e4afc..89eecf9c14 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
+namespace {
 
 class IgniteDatasetOp : public DatasetOpKernel {
  public:
@@ -132,14 +133,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
       permutation.push_back(permutation_tensor->flat<int32>()(i));
     }
 
-    *output = new ignite::IgniteDataset(
-        ctx, cache_name, host, port, local, part, page_size, username, password,
-        certfile, keyfile, cert_password, std::move(schema),
-        std::move(permutation));
+    *output =
+        new IgniteDataset(ctx, cache_name, host, port, local, part, page_size,
+                          username, password, certfile, keyfile, cert_password,
+                          std::move(schema), std::move(permutation));
   }
 };
 
 REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU),
                         IgniteDatasetOp);
 
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 5491af68d6..6f417a3cb5 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -13,31 +13,28 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
 
 #include <string>
 
-namespace ignite {
+namespace tensorflow {
 
 class PlainClient : public Client {
  public:
   PlainClient(std::string host, int port);
   ~PlainClient();
 
-  virtual tensorflow::Status Connect();
-  virtual tensorflow::Status Disconnect();
+  virtual Status Connect();
+  virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, int32_t length);
+  virtual Status WriteData(uint8_t* buf, int32_t length);
 
  private:
-  std::string host;
-  int port;
-  int sock;
+  const std::string host_;
+  const int port_;
+  int sock_;
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
index dbfa4f8786..a4c58a9563 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -29,104 +29,98 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
 PlainClient::PlainClient(std::string host, int port)
-    : host(host), port(port), sock(-1) {}
+    : host_(host), port_(port), sock_(-1) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 }
 
-tensorflow::Status PlainClient::Connect() {
-  if (sock == -1) {
-    sock = socket(AF_INET, SOCK_STREAM, 0);
-    if (sock == -1)
-      return tensorflow::errors::Internal("Failed to create socket");
+Status PlainClient::Connect() {
+  if (sock_ == -1) {
+    sock_ = socket(AF_INET, SOCK_STREAM, 0);
+    if (sock_ == -1) return errors::Internal("Failed to create socket");
   }
 
   sockaddr_in server;
 
-  server.sin_addr.s_addr = inet_addr(host.c_str());
+  server.sin_addr.s_addr = inet_addr(host_.c_str());
   if (server.sin_addr.s_addr == -1) {
     hostent* he;
     in_addr** addr_list;
 
-    if ((he = gethostbyname(host.c_str())) == NULL)
-      return tensorflow::errors::Internal("Failed to resolve hostname \"", host,
-                                          "\"");
+    if ((he = gethostbyname(host_.c_str())) == NULL)
+      return errors::Internal("Failed to resolve hostname \"", host_, "\"");
 
     addr_list = (in_addr**)he->h_addr_list;
     if (addr_list[0] != NULL) server.sin_addr = *addr_list[0];
   }
 
   server.sin_family = AF_INET;
-  server.sin_port = htons(port);
+  server.sin_port = htons(port_);
 
-  if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0)
-    return tensorflow::errors::Internal("Failed to connect to \"", host, ":",
-                                        port, "\"");
+  if (connect(sock_, (sockaddr*)&server, sizeof(server)) < 0)
+    return errors::Internal("Failed to connect to \"", host_, ":", port_, "\"");
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::Disconnect() {
-  int close_res = close(sock);
-  sock = -1;
+Status PlainClient::Disconnect() {
+  int close_res = close(sock_);
+  sock_ = -1;
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" is closed";
 
-  return close_res == 0 ? tensorflow::Status::OK()
-                        : tensorflow::errors::Internal(
-                              "Failed to correctly close connection");
+  return close_res == 0
+             ? Status::OK()
+             : errors::Internal("Failed to correctly close connection");
 }
 
-bool PlainClient::IsConnected() { return sock != -1; }
+bool PlainClient::IsConnected() { return sock_ != -1; }
 
-int PlainClient::GetSocketDescriptor() { return sock; }
+int PlainClient::GetSocketDescriptor() { return sock_; }
 
-tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock, buf, length - recieved, 0);
+    int res = recv(sock_, buf, length - recieved, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from socket: ", res, ", ",
-          std::string(strerror(errno)));
+      return errors::Internal("Error occured while reading from socket: ", res,
+                              ", ", std::string(strerror(errno)));
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed connection");
+    if (res == 0) return errors::Internal("Server closed connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock, buf, length - sent, 0);
+    int res = send(sock_, buf, length - sent, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res, ", ",
-          std::string(strerror(errno)));
+      return errors::Internal("Error occured while writing into socket: ", res,
+                              ", ", std::string(strerror(errno)));
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index f78c9b3627..7ba037f2d2 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -27,48 +27,45 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
-namespace ignite {
+namespace tensorflow {
 
 PlainClient::PlainClient(std::string host, int port)
-    : host(host), port(port), sock(INVALID_SOCKET) {}
+    : host_(host), port_(port), sock_(INVALID_SOCKET) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 }
 
-tensorflow::Status PlainClient::Connect() {
+Status PlainClient::Connect() {
   WSADATA wsaData;
   addrinfo *result = NULL, *ptr = NULL, hints;
 
   int res = WSAStartup(MAKEWORD(2, 2), &wsaData);
-  if (res != 0)
-    return tensorflow::errors::Internal("WSAStartup failed with error: ", res);
+  if (res != 0) return errors::Internal("WSAStartup failed with error: ", res);
 
   ZeroMemory(&hints, sizeof(hints));
   hints.ai_family = AF_UNSPEC;
   hints.ai_socktype = SOCK_STREAM;
   hints.ai_protocol = IPPROTO_TCP;
 
-  res =
-      getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result);
-  if (res != 0)
-    return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res);
+  res = getaddrinfo(host_.c_str(), std::to_string(port_).c_str(), &hints,
+                    &result);
+  if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
-    sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
-    if (sock == INVALID_SOCKET) {
+    sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
+    if (sock_ == INVALID_SOCKET) {
       WSACleanup();
-      return tensorflow::errors::Internal("Socket failed with error: ",
-                                          WSAGetLastError());
+      return errors::Internal("Socket failed with error: ", WSAGetLastError());
     }
 
-    res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen);
+    res = connect(sock_, ptr->ai_addr, (int)ptr->ai_addrlen);
     if (res == SOCKET_ERROR) {
-      closesocket(sock);
-      sock = INVALID_SOCKET;
+      closesocket(sock_);
+      sock_ = INVALID_SOCKET;
       continue;
     }
 
@@ -77,67 +74,63 @@ tensorflow::Status PlainClient::Connect() {
 
   freeaddrinfo(result);
 
-  if (sock == INVALID_SOCKET) {
+  if (sock_ == INVALID_SOCKET) {
     WSACleanup();
-    return tensorflow::errors::Internal("Unable to connect to server");
+    return errors::Internal("Unable to connect to server");
   }
 
-  LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established";
+  LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::Disconnect() {
-  int res = shutdown(sock, SD_SEND);
-  closesocket(sock);
+Status PlainClient::Disconnect() {
+  int res = shutdown(sock_, SD_SEND);
+  closesocket(sock_);
   WSACleanup();
 
   if (res == SOCKET_ERROR)
-    return tensorflow::errors::Internal("Shutdown failed with error: ",
-                                        WSAGetLastError());
+    return errors::Internal("Shutdown failed with error: ", WSAGetLastError());
   else
-    return tensorflow::Status::OK();
+    return Status::OK();
 }
 
-bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; }
+bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; }
 
-int PlainClient::GetSocketDescriptor() { return sock; }
+int PlainClient::GetSocketDescriptor() { return sock_; }
 
-tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock, buf, length - recieved, 0);
+    int res = recv(sock_, buf, length - recieved, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from socket: ", res);
+      return errors::Internal("Error occured while reading from socket: ", res);
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed connection");
+    if (res == 0) return errors::Internal("Server closed connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock, buf, length - sent, 0);
+    int res = send(sock_, buf, length - sent, 0);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res);
+      return errors::Internal("Error occured while writing into socket: ", res);
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
index a1101b91f3..a2bc6b9609 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #include <openssl/err.h>
 #include <openssl/ssl.h>
 
-namespace ignite {
+namespace tensorflow {
 
 static int PasswordCb(char *buf, int size, int rwflag, void *password) {
   strncpy(buf, (char *)(password), size);
@@ -31,119 +31,112 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) {
 
 SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
                        std::string keyfile, std::string cert_password)
-    : client(client),
-      certfile(certfile),
-      keyfile(keyfile),
-      cert_password(cert_password),
-      ctx(NULL) {}
+    : client_(client),
+      certfile_(certfile),
+      keyfile_(keyfile),
+      cert_password_(cert_password),
+      ctx_(NULL) {}
 
 SslWrapper::~SslWrapper() {
   if (IsConnected()) {
-    tensorflow::Status status = Disconnect();
+    Status status = Disconnect();
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 
-  if (ctx != NULL) {
-    SSL_CTX_free(ctx);
-    ctx = NULL;
+  if (ctx_ != NULL) {
+    SSL_CTX_free(ctx_);
+    ctx_ = NULL;
   }
 }
 
-tensorflow::Status SslWrapper::InitSslContext() {
+Status SslWrapper::InitSslContext() {
   OpenSSL_add_all_algorithms();
   SSL_load_error_strings();
 
-  ctx = SSL_CTX_new(SSLv23_method());
-  if (ctx == NULL)
-    return tensorflow::errors::Internal("Couldn't create SSL context");
+  ctx_ = SSL_CTX_new(SSLv23_method());
+  if (ctx_ == NULL) return errors::Internal("Couldn't create SSL context");
 
-  SSL_CTX_set_default_passwd_cb(ctx, PasswordCb);
-  SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str());
+  SSL_CTX_set_default_passwd_cb(ctx_, PasswordCb);
+  SSL_CTX_set_default_passwd_cb_userdata(ctx_, (void *)cert_password_.c_str());
 
-  if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1)
-    return tensorflow::errors::Internal(
-        "Couldn't load cetificate chain (file '", certfile, "')");
+  if (SSL_CTX_use_certificate_chain_file(ctx_, certfile_.c_str()) != 1)
+    return errors::Internal("Couldn't load cetificate chain (file '", certfile_,
+                            "')");
 
-  std::string private_key_file = keyfile.empty() ? certfile : keyfile;
-  if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(),
+  std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
+  if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(),
                                   SSL_FILETYPE_PEM) != 1)
-    return tensorflow::errors::Internal("Couldn't load private key (file '",
-                                        private_key_file, "')");
+    return errors::Internal("Couldn't load private key (file '",
+                            private_key_file, "')");
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::Connect() {
-  tensorflow::Status status;
-
-  if (ctx == NULL) {
-    status = InitSslContext();
-    if (!status.ok()) return status;
+Status SslWrapper::Connect() {
+  if (ctx_ == NULL) {
+    TF_RETURN_IF_ERROR(InitSslContext());
   }
 
-  ssl = SSL_new(ctx);
-  if (ssl == NULL)
-    return tensorflow::errors::Internal("Failed to establish SSL connection");
+  ssl_ = SSL_new(ctx_);
+  if (ssl_ == NULL)
+    return errors::Internal("Failed to establish SSL connection");
 
-  status = client->Connect();
-  if (!status.ok()) return status;
+  TF_RETURN_IF_ERROR(client_->Connect());
 
-  SSL_set_fd(ssl, client->GetSocketDescriptor());
-  if (SSL_connect(ssl) != 1)
-    return tensorflow::errors::Internal("Failed to establish SSL connection");
+  SSL_set_fd(ssl_, client_->GetSocketDescriptor());
+  if (SSL_connect(ssl_) != 1)
+    return errors::Internal("Failed to establish SSL connection");
 
   LOG(INFO) << "SSL connection established";
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::Disconnect() {
-  SSL_free(ssl);
+Status SslWrapper::Disconnect() {
+  SSL_free(ssl_);
 
   LOG(INFO) << "SSL connection closed";
 
-  return client->Disconnect();
+  return client_->Disconnect();
 }
 
-bool SslWrapper::IsConnected() { return client->IsConnected(); }
+bool SslWrapper::IsConnected() { return client_->IsConnected(); }
 
-int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); }
+int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); }
 
-tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = SSL_read(ssl, buf, length - recieved);
+    int res = SSL_read(ssl_, buf, length - recieved);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while reading from SSL socket: ", res);
+      return errors::Internal("Error occured while reading from SSL socket: ",
+                              res);
 
-    if (res == 0)
-      return tensorflow::errors::Internal("Server closed SSL connection");
+    if (res == 0) return errors::Internal("Server closed SSL connection");
 
     recieved += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = SSL_write(ssl, buf, length - sent);
+    int res = SSL_write(ssl_, buf, length - sent);
 
     if (res < 0)
-      return tensorflow::errors::Internal(
-          "Error occured while writing into socket: ", res);
+      return errors::Internal("Error occured while writing into socket: ", res);
 
     sent += res;
     buf += res;
   }
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 
-}  // namespace ignite
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index e0c2a242dc..bbba6cc181 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -13,15 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef IGNITE_CLIENT_H
-#define IGNITE_CLIENT_H
 #include "ignite_client.h"
-#endif
 
 #include <openssl/ssl.h>
 #include <string>
 
-namespace ignite {
+namespace tensorflow {
 
 class SslWrapper : public Client {
  public:
@@ -29,21 +26,22 @@ class SslWrapper : public Client {
              std::string keyfile, std::string cert_password);
   ~SslWrapper();
 
-  virtual tensorflow::Status Connect();
-  virtual tensorflow::Status Disconnect();
+  virtual Status Connect();
+  virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length);
-  virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, int32_t length);
+  virtual Status WriteData(uint8_t* buf, int32_t length);
 
  private:
-  std::shared_ptr<Client> client;
-  std::string certfile;
-  std::string keyfile;
-  std::string cert_password;
-  SSL_CTX* ctx;
-  SSL* ssl;
-  tensorflow::Status InitSslContext();
+  std::shared_ptr<Client> client_;
+  std::string certfile_;
+  std::string keyfile_;
+  std::string cert_password_;
+  SSL_CTX* ctx_;
+  SSL* ssl_;
+
+  Status InitSslContext();
 };
 
-}  // namespace ignite
+}  // namespace tensorflow
-- 
GitLab


From 1408a1563e73e69f68c1eb6f34a0976c7c950ad9 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 28 Aug 2018 11:32:57 +0300
Subject: [PATCH 0047/1085] Update README.md.

---
 tensorflow/contrib/ignite/README.md | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md
index f2596fc572..8fec4066c4 100644
--- a/tensorflow/contrib/ignite/README.md
+++ b/tensorflow/contrib/ignite/README.md
@@ -13,19 +13,20 @@
 ## Overview
 
 [Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for
-transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. 
+transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a data source for neural network training, inference and all other computations supported by TensorFlow. 
 
 ## Features
 
-Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below.
+Ignite Dataset provides features that that you can use in a wide range of cases. The most important and interesting features are described below.
 
 ### Distributed In-Memory Datasource
-[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. 
+[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that provides fast data access. It allows you to avoid limitations of hard drive and and store and operate with as much data as you need in distributed cluster. You can utilize
+these benefits of Apache Ignite by using Ignite Dataset. Moreover, Ignite Dataset can be used for the following use-cases:
 - If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations.
 - If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations.
 - If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow.
 
-It's  important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference.
+Note that Apache Ignite is not just a step of ETL pipeline between a database or a data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. By choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, at the same time, an ability to use this data for neural network training and inference.
 
 ```bash
 $ apache-ignite-fabric/bin/ignite.sh
@@ -55,7 +56,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
 ```
 
 ### Structured Objects
-[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
+[Apache Ignite](https://ignite.apache.org/) allows to store any type of objects. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects.
 
 ```python
 >>> import tensorflow as tf
@@ -81,7 +82,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
     }
 }
 ```
- Neural network training and other computations require transformations that can be done as part of  [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
+ Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset.
 
 ```python
 >>> import tensorflow as tf
@@ -99,15 +100,15 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL
 
 ### Distributed Training
 
-TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
+TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is the ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. 
 
 <a href="https://www.codecogs.com/eqnedit.php?latex=\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\nabla[\sum_1^n(y&space;-&space;\hat{y})^2]&space;=&space;\nabla[\sum_1^{n_1}(y&space;-&space;\hat{y})^2]&space;&plus;&space;\nabla[\sum_{n_1}^{n_2}(y&space;-&space;\hat{y})^2]&space;&plus;&space;...&space;&plus;&space;\nabla[\sum_{n_{k-1}}^n(y&space;-&space;\hat{y})^2]" title="\nabla[\sum_1^n(y - \hat{y})^2] = \nabla[\sum_1^{n_1}(y - \hat{y})^2] + \nabla[\sum_{n_1}^{n_2}(y - \hat{y})^2] + ... + \nabla[\sum_{n_{k-1}}^n(y - \hat{y})^2]" /></a>
 
-Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck.
+Using this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottlenecks.
 
-Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition.
+Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL), we can specify the number of partitions the data will be partitioned on. For example, if an Apache Ignite cluster consists of 10 machines and we create cache with 10 partitions, then every machine will maintain approximately one data partition.
 
-Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
+Ignite Dataset allows using these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that can be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach, we can assign a specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset.
 
 ```python
 >>> import tensorflow as tf
@@ -135,7 +136,7 @@ High-level TensorFlow API for [distributed training](https://www.tensorflow.org/
 
 ### SSL Connection
 
-Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
+Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information, please refer to the [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation.
 
 ```python
 >>> import tensorflow as tf
@@ -147,11 +148,11 @@ Your data should not be accessible without any control. Apache Ignite allows to
 
 ### Windows Support
 
-Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
+Ignite Dataset is fully compatible with Windows. You can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems.
 
 ## Try it out
 
-The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
+The simplest way to try Ignite Dataset is to run a [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and after start interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine:
 
 ```
 docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist
@@ -163,4 +164,4 @@ After that you will be able to work with it following way:
 
 ## Limitations
 
-Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures.
+Presently, Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of an object structure.
-- 
GitLab


From 92019765d7b7db99d0235268d00f349b7a53d1a9 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Wed, 5 Sep 2018 14:47:20 +0000
Subject: [PATCH 0048/1085] Fix pylint checks, fix VS compilation issue.

---
 .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 4 ++--
 .../contrib/ignite/python/ops/ignite_dataset_ops.py       | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 7ba037f2d2..e1e2ee3b20 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock_, buf, length - recieved, 0);
+    int res = recv(sock_, (char*)buf, length - recieved, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res);
@@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock_, buf, length - sent, 0);
+    int res = send(sock_, (char*)buf, length - sent, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res);
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index 6fa073957a..60003ca3b7 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -66,13 +66,13 @@ class Readable():
 
   def __read(self, data_type, length):
     """Reads, unpacks and returns specified type (little-endian)."""
-    buffer = self.read_data(length)
-    return struct.unpack("<" + data_type, buffer)[0]
+    data_buffer = self.read_data(length)
+    return struct.unpack("<" + data_type, data_buffer)[0]
 
 class DataBuffer(Readable):
   """DataBuffer class that exposes methods to read data from a byte buffer."""
 
-  def __init__(self, buffer):
+  def __init__(self, data_buffer):
     """Constructs a new instance of DataBuffer based on the specified byte
        buffer.
 
@@ -80,7 +80,7 @@ class DataBuffer(Readable):
       buffer: Buffer to be read.
     """
     Readable.__init__(self)
-    self.buffer = buffer
+    self.buffer = data_buffer
     self.ptr = 0
 
   def read_data(self, length):
-- 
GitLab


From 0b6654bc223f4f3807209043dc34ccb07b55474e Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 11 Sep 2018 09:50:47 +0000
Subject: [PATCH 0049/1085] Fix code style.

---
 .../ignite/kernels/ignite_dataset_ops.cc      |  2 +-
 .../kernels/ignite_plain_client_windows.cc    |  4 +--
 tensorflow/contrib/ignite/ops/dataset_ops.cc  | 34 +++++++++----------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index 89eecf9c14..d03404a460 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset.h"
 #include <stdlib.h>
+#include "ignite_dataset.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index e1e2ee3b20..8182fde6d9 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
-    int res = recv(sock_, (char*)buf, length - recieved, 0);
+    int res = recv(sock_, (char *)buf, length - recieved, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res);
@@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
   int sent = 0;
 
   while (sent < length) {
-    int res = send(sock_, (char*)buf, length - sent, 0);
+    int res = send(sock_, (char *)buf, length - sent, 0);
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res);
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index 17494d1cfd..fb16b290b1 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -20,23 +20,23 @@ limitations under the License.
 namespace tensorflow {
 
 REGISTER_OP("IgniteDataset")
-  .Input("cache_name: string")
-  .Input("host: string")
-  .Input("port: int32")
-  .Input("local: bool")
-  .Input("part: int32")
-  .Input("page_size: int32")
-  .Input("username: string")
-  .Input("password: string")
-  .Input("certfile: string")
-  .Input("keyfile: string")
-  .Input("cert_password: string")
-  .Input("schema: int32")
-  .Input("permutation: int32")
-  .Output("handle: variant")
-  .SetIsStateful()
-  .SetShapeFn(shape_inference::ScalarShape)
-  .Doc(R"doc(
+    .Input("cache_name: string")
+    .Input("host: string")
+    .Input("port: int32")
+    .Input("local: bool")
+    .Input("part: int32")
+    .Input("page_size: int32")
+    .Input("username: string")
+    .Input("password: string")
+    .Input("certfile: string")
+    .Input("keyfile: string")
+    .Input("cert_password: string")
+    .Input("schema: int32")
+    .Input("permutation: int32")
+    .Output("handle: variant")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
 Apache Ignite is a memory-centric distributed database, caching, and processing
 platform for transactional, analytical, and streaming workloads, delivering 
 in-memory speeds at petabyte scale. This contrib package contains an 
-- 
GitLab


From 5e9a9547f907599f6954fc5e28b7a78acf3b54eb Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 12 Sep 2018 11:02:12 +0800
Subject: [PATCH 0050/1085] Revert "Add XLA support for LeakyReluOp."

This reverts commit d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74.

Since bfloat16 was not supported by LeakyRelu, but it should be
supported in XLA Ops.
---
 tensorflow/compiler/tests/binary_ops_test.py  |  8 ----
 tensorflow/compiler/tests/unary_ops_test.py   |  5 ---
 tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 -------------------
 3 files changed, 55 deletions(-)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index c478ff4eea..17280e445b 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -178,14 +178,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
               [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype),
           expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype))
 
-      self._testBinary(
-          gen_nn_ops.leaky_relu_grad,
-          np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype),
-          np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
-                   dtype=dtype),
-          expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10],
-                            dtype=dtype))
-
       self._testBinary(
           gen_nn_ops.softmax_cross_entropy_with_logits,
           np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype),
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index dd29ef34ce..5b0e57f83f 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -361,11 +361,6 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([[-0.05, 6.05, 5]], dtype=dtype),
           expected=np.array([[0, 6, 5]], dtype=dtype))
 
-      self._assertOpOutputMatchesExpected(
-          nn_ops.leaky_relu,
-          np.array([[-1.0, 1.0]], dtype=dtype),
-          expected=np.array([[-0.2, 1.0]], dtype=dtype))
-
       self._assertOpOutputMatchesExpected(
           nn_ops.softmax,
           np.array([1, 2, 3, 4], dtype=dtype),
diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
index 8d65e0339c..d35777ccb1 100644
--- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc
@@ -50,23 +50,6 @@ class Relu6Op : public XlaOpKernel {
   }
 };
 
-class LeakyReluOp : public XlaOpKernel {
- public:
-  explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
-  }
-  // Compute the max of the input x and alpha*x.
-  void Compile(XlaOpKernelContext* ctx) override {
-    xla::XlaBuilder* builder = ctx->builder();
-    auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0),
-                                          static_cast<double>(alpha_));
-    ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0)));
-  }
-
- private:
-  float alpha_;
-};
-
 class ReluGradOp : public XlaOpKernel {
  public:
   explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
@@ -101,35 +84,10 @@ class Relu6GradOp : public XlaOpKernel {
   }
 };
 
-class LeakyReluGradOp : public XlaOpKernel {
- public:
-  explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_));
-  }
-  // Return the lhs (incoming gradient) if the rhs (input feature) > 0,
-  // otherwise return the alpha * lhs.
-  void Compile(XlaOpKernelContext* ctx) override {
-    xla::XlaBuilder* b = ctx->builder();
-    const TensorShape shape = ctx->InputShape(0);
-    const auto zero =
-        xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes());
-    const auto pred = xla::Gt(ctx->Input(1), zero);
-    auto alpha =
-        XlaHelpers::FloatLiteral(b, input_type(0), static_cast<double>(alpha_));
-    ctx->SetOutput(
-        0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0))));
-  }
-
- private:
-  float alpha_;
-};
-
 REGISTER_XLA_OP(Name("Relu"), ReluOp);
 REGISTER_XLA_OP(Name("Relu6"), Relu6Op);
-REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp);
 REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp);
 REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp);
-REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 9ec9c8b24cca5f1e746fef8cd351b3cae6d5a740 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Wed, 12 Sep 2018 20:42:01 +0300
Subject: [PATCH 0051/1085] Fixes after second review.

---
 tensorflow/contrib/ignite/BUILD               |   1 +
 tensorflow/contrib/ignite/__init__.py         |  22 +-
 .../kernels/ignite_binary_object_parser.cc    | 404 ++++++++++--------
 .../kernels/ignite_binary_object_parser.h     |  36 +-
 .../contrib/ignite/kernels/ignite_client.h    |  55 ++-
 .../contrib/ignite/kernels/ignite_dataset.cc  |  99 ++---
 .../contrib/ignite/kernels/ignite_dataset.h   |  37 +-
 .../ignite/kernels/ignite_dataset_iterator.cc | 383 ++++++++---------
 .../ignite/kernels/ignite_dataset_iterator.h  |  74 ++--
 .../ignite/kernels/ignite_dataset_ops.cc      | 123 ++++--
 .../ignite/kernels/ignite_plain_client.h      |  15 +-
 .../kernels/ignite_plain_client_unix.cc       |  14 +-
 .../kernels/ignite_plain_client_windows.cc    |  17 +-
 .../ignite/kernels/ignite_ssl_wrapper.cc      |  34 +-
 .../ignite/kernels/ignite_ssl_wrapper.h       |  26 +-
 tensorflow/contrib/ignite/ops/dataset_ops.cc  |   2 +
 .../ignite/python/ops/ignite_dataset_ops.py   | 176 ++++----
 17 files changed, 848 insertions(+), 670 deletions(-)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index b7d40a99f7..2f598b4aed 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -40,6 +40,7 @@ cc_library(
     srcs = [
         "kernels/ignite_dataset_ops.cc",
         "kernels/ignite_client.h",
+        "kernels/ignite_byte_swapper.h",
         "kernels/ignite_plain_client.h",
         "kernels/ignite_ssl_wrapper.h",
         "kernels/ignite_ssl_wrapper.cc",
diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py
index b78829d0f4..f42947696f 100644
--- a/tensorflow/contrib/ignite/__init__.py
+++ b/tensorflow/contrib/ignite/__init__.py
@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Apache Ignite is a memory-centric distributed database, caching, and
-   processing platform for transactional, analytical, and streaming workloads,
-   delivering in-memory speeds at petabyte scale. This contrib package
-   contains an integration between Apache Ignite and TensorFlow. The
-   integration is based on tf.data from TensorFlow side and Binary Client
-   Protocol from Apache Ignite side. It allows to use Apache Ignite as a
-   datasource for neural network training, inference and all other
-   computations supported by TensorFlow. Ignite Dataset is based on Apache
-   Ignite Binary Client Protocol:
-   https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+"""IgniteDataset that allows to get data from Apache Ignite.
+
+Apache Ignite is a memory-centric distributed database, caching, and
+processing platform for transactional, analytical, and streaming workloads,
+delivering in-memory speeds at petabyte scale. This contrib package
+contains an integration between Apache Ignite and TensorFlow. The
+integration is based on tf.data from TensorFlow side and Binary Client
+Protocol from Apache Ignite side. It allows to use Apache Ignite as a
+datasource for neural network training, inference and all other
+computations supported by TensorFlow. Ignite Dataset is based on Apache
+Ignite Binary Client Protocol:
+https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
 
 @@IgniteDataset
 """
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
index 9bf4480d2d..2c8a7d44b0 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc
@@ -13,242 +13,171 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
+BinaryObjectParser::BinaryObjectParser() : byte_swapper_(ByteSwapper(false)) {}
+
 Status BinaryObjectParser::Parse(uint8_t** ptr,
                                  std::vector<Tensor>* out_tensors,
-                                 std::vector<int32_t>* types) {
-  uint8_t object_type_id = **ptr;
-  *ptr += 1;
+                                 std::vector<int32_t>* types) const {
+  uint8_t object_type_id = ParseByte(ptr);
+
+  // Skip non-leaf nodes.
+  if (object_type_id != WRAPPED_OBJ && object_type_id != COMPLEX_OBJ)
+    types->push_back(object_type_id);
 
   switch (object_type_id) {
     case BYTE: {
-      Tensor tensor(cpu_allocator(), DT_UINT8, {});
-      tensor.scalar<uint8>()() = *((uint8_t*)*ptr);
-      *ptr += 1;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT8, TensorShape({}));
+      out_tensors->back().scalar<uint8>()() = ParseByte(ptr);
       break;
     }
     case SHORT: {
-      Tensor tensor(cpu_allocator(), DT_INT16, {});
-      tensor.scalar<int16>()() = *((int16_t*)*ptr);
-      *ptr += 2;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT16, TensorShape({}));
+      out_tensors->back().scalar<int16>()() = ParseShort(ptr);
+      break;
+    }
+    case USHORT: {
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT16, TensorShape({}));
+      out_tensors->back().scalar<uint16>()() = ParseUnsignedShort(ptr);
       break;
     }
     case INT: {
-      Tensor tensor(cpu_allocator(), DT_INT32, {});
-      tensor.scalar<int32>()() = *((int32_t*)*ptr);
-      *ptr += 4;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT32, TensorShape({}));
+      out_tensors->back().scalar<int32>()() = ParseInt(ptr);
       break;
     }
     case LONG: {
-      Tensor tensor(cpu_allocator(), DT_INT64, {});
-      tensor.scalar<int64>()() = *((int64_t*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({}));
+      out_tensors->back().scalar<int64>()() = ParseLong(ptr);
       break;
     }
     case FLOAT: {
-      Tensor tensor(cpu_allocator(), DT_FLOAT, {});
-      tensor.scalar<float>()() = *((float*)*ptr);
-      *ptr += 4;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, TensorShape({}));
+      out_tensors->back().scalar<float>()() = ParseFloat(ptr);
       break;
     }
     case DOUBLE: {
-      Tensor tensor(cpu_allocator(), DT_DOUBLE, {});
-      tensor.scalar<double>()() = *((double*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
-      break;
-    }
-    case UCHAR: {
-      Tensor tensor(cpu_allocator(), DT_UINT16, {});
-      tensor.scalar<uint16>()() = *((uint16_t*)*ptr);
-      *ptr += 2;
-      out_tensors->push_back(std::move(tensor));
+      out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, TensorShape({}));
+      out_tensors->back().scalar<double>()() = ParseDouble(ptr);
       break;
     }
     case BOOL: {
-      Tensor tensor(cpu_allocator(), DT_BOOL, {});
-      tensor.scalar<bool>()() = *((bool*)*ptr);
-      *ptr += 1;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_BOOL, TensorShape({}));
+      out_tensors->back().scalar<bool>()() = ParseBool(ptr);
       break;
     }
     case STRING: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_STRING, {});
-      tensor.scalar<std::string>()() = std::string((char*)*ptr, length);
-      *ptr += length;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_STRING, TensorShape({}));
+      out_tensors->back().scalar<string>()() = ParseString(ptr);
       break;
     }
     case DATE: {
-      Tensor tensor(cpu_allocator(), DT_INT64, {});
-      tensor.scalar<int64>()() = *((int64_t*)*ptr);
-      *ptr += 8;
-      out_tensors->push_back(std::move(tensor));
-
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({}));
+      out_tensors->back().scalar<int64>()() = ParseLong(ptr);
       break;
     }
     case BYTE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length}));
-
-      uint8_t* arr = (uint8_t*)*ptr;
-      *ptr += length;
-
-      std::copy_n(arr, length, tensor.flat<uint8>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      uint8_t* arr = ParseByteArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT8,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<uint8>().data());
       break;
     }
     case SHORT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length}));
-
-      int16_t* arr = (int16_t*)*ptr;
-      *ptr += length * 2;
-
-      std::copy_n(arr, length, tensor.flat<int16>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int16_t* arr = ParseShortArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT16,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int16>().data());
+      break;
+    }
+    case USHORT_ARR: {
+      int32_t length = ParseInt(ptr);
+      uint16_t* arr = ParseUnsignedShortArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_UINT16,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<uint16>().data());
       break;
     }
     case INT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length}));
-
-      int32_t* arr = (int32_t*)*ptr;
-      *ptr += length * 4;
-
-      std::copy_n(arr, length, tensor.flat<int32>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int32_t* arr = ParseIntArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT32,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int32>().data());
       break;
     }
     case LONG_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
-
-      int64_t* arr = (int64_t*)*ptr;
-      *ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<int64>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int64_t* arr = ParseLongArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int64>().data());
       break;
     }
     case FLOAT_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length}));
-
-      float* arr = (float*)*ptr;
-      *ptr += 4 * length;
-
-      std::copy_n(arr, length, tensor.flat<float>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      float* arr = ParseFloatArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_FLOAT,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<float>().data());
       break;
     }
     case DOUBLE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length}));
-
-      double* arr = (double*)*ptr;
-      *ptr += 8 * length;
-
-      std::copy_n(arr, length, tensor.flat<double>().data());
-      out_tensors->push_back(std::move(tensor));
-      break;
-    }
-    case UCHAR_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length}));
-
-      uint16_t* arr = (uint16_t*)*ptr;
-      *ptr += length * 2;
-
-      std::copy_n(arr, length, tensor.flat<uint16>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      double* arr = ParseDoubleArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<double>().data());
       break;
     }
     case BOOL_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length}));
-
-      bool* arr = (bool*)*ptr;
-      *ptr += length;
-
-      std::copy_n(arr, length, tensor.flat<bool>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      bool* arr = ParseBoolArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_BOOL,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<bool>().data());
       break;
     }
     case STRING_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length}));
-
-      for (int32_t i = 0; i < length; i++) {
-        int32_t str_length = *((int32_t*)*ptr);
-        *ptr += 4;
-        const int8_t* str = (const int8_t*)*ptr;
-        *ptr += str_length;
-        tensor.vec<std::string>()(i) = std::string((char*)str, str_length);
-      }
-
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      out_tensors->emplace_back(cpu_allocator(), DT_STRING,
+                                TensorShape({length}));
+      for (int32_t i = 0; i < length; i++)
+        out_tensors->back().vec<string>()(i) = ParseString(ptr);
       break;
     }
     case DATE_ARR: {
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length}));
-      int64_t* arr = (int64_t*)*ptr;
-      *ptr += length * 8;
-
-      std::copy_n(arr, length, tensor.flat<int64>().data());
-      out_tensors->push_back(std::move(tensor));
+      int32_t length = ParseInt(ptr);
+      int64_t* arr = ParseLongArr(ptr, length);
+      out_tensors->emplace_back(cpu_allocator(), DT_INT64,
+                                TensorShape({length}));
+      std::copy_n(arr, length, out_tensors->back().flat<int64>().data());
       break;
     }
     case WRAPPED_OBJ: {
-      int32_t byte_arr_size = *((int32_t*)*ptr);
-      *ptr += 4;
-
+      int32_t byte_arr_size = ParseInt(ptr);
       TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types));
-
-      int32_t offset = *((int32_t*)*ptr);
-      *ptr += 4;
+      int32_t offset = ParseInt(ptr);
 
       break;
     }
     case COMPLEX_OBJ: {
-      uint8_t version = **ptr;
-      *ptr += 1;
-      int16_t flags = *((int16_t*)*ptr);  // USER_TYPE = 1, HAS_SCHEMA = 2
-      *ptr += 2;
-      int32_t type_id = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t hash_code = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t length = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t schema_id = *((int32_t*)*ptr);
-      *ptr += 4;
-      int32_t schema_offset = *((int32_t*)*ptr);
-      *ptr += 4;
-
+      uint8_t version = ParseByte(ptr);
+      int16_t flags = ParseShort(ptr);
+      int32_t type_id = ParseInt(ptr);
+      int32_t hash_code = ParseInt(ptr);
+      int32_t length = ParseInt(ptr);
+      int32_t schema_id = ParseInt(ptr);
+      int32_t schema_offset = ParseInt(ptr);
+
+      // 24 is size of header just read.
       uint8_t* end = *ptr + schema_offset - 24;
       int32_t i = 0;
       while (*ptr < end) {
@@ -261,12 +190,145 @@ Status BinaryObjectParser::Parse(uint8_t** ptr,
       break;
     }
     default: {
-      return errors::Internal("Unknowd binary type (type id ",
-                              (int)object_type_id, ")");
+      return errors::Unknown("Unknowd binary type (type id ",
+                             (int)object_type_id, ")");
     }
   }
 
   return Status::OK();
 }
 
+uint8_t BinaryObjectParser::ParseByte(uint8_t** ptr) const {
+  uint8_t res = **ptr;
+  *ptr += 1;
+
+  return res;
+}
+
+int16_t BinaryObjectParser::ParseShort(uint8_t** ptr) const {
+  int16_t* res = *reinterpret_cast<int16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt16(res);
+  *ptr += 2;
+
+  return *res;
+}
+
+uint16_t BinaryObjectParser::ParseUnsignedShort(uint8_t** ptr) const {
+  uint16_t* res = *reinterpret_cast<uint16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredUnsignedInt16(res);
+  *ptr += 2;
+
+  return *res;
+}
+
+int32_t BinaryObjectParser::ParseInt(uint8_t** ptr) const {
+  int32_t* res = *reinterpret_cast<int32_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt32(res);
+  *ptr += 4;
+
+  return *res;
+}
+
+int64_t BinaryObjectParser::ParseLong(uint8_t** ptr) const {
+  int64_t* res = *reinterpret_cast<int64_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt64(res);
+  *ptr += 8;
+
+  return *res;
+}
+
+float BinaryObjectParser::ParseFloat(uint8_t** ptr) const {
+  float* res = *reinterpret_cast<float**>(ptr);
+  byte_swapper_.SwapIfRequiredFloat(res);
+  *ptr += 4;
+
+  return *res;
+}
+
+double BinaryObjectParser::ParseDouble(uint8_t** ptr) const {
+  double* res = *reinterpret_cast<double**>(ptr);
+  byte_swapper_.SwapIfRequiredDouble(res);
+  *ptr += 8;
+
+  return *res;
+}
+
+bool BinaryObjectParser::ParseBool(uint8_t** ptr) const {
+  bool res = **reinterpret_cast<bool**>(ptr);
+  *ptr += 1;
+
+  return res;
+}
+
+string BinaryObjectParser::ParseString(uint8_t** ptr) const {
+  int32_t length = ParseInt(ptr);
+  string res(*reinterpret_cast<char**>(ptr), length);
+  *ptr += length;
+
+  return res;
+}
+
+uint8_t* BinaryObjectParser::ParseByteArr(uint8_t** ptr, int length) const {
+  uint8_t* res = *reinterpret_cast<uint8_t**>(ptr);
+  *ptr += length;
+
+  return res;
+}
+
+int16_t* BinaryObjectParser::ParseShortArr(uint8_t** ptr, int length) const {
+  int16_t* res = *reinterpret_cast<int16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt16Arr(res, length);
+  *ptr += length * 2;
+
+  return res;
+}
+
+uint16_t* BinaryObjectParser::ParseUnsignedShortArr(uint8_t** ptr,
+                                                    int length) const {
+  uint16_t* res = *reinterpret_cast<uint16_t**>(ptr);
+  byte_swapper_.SwapIfRequiredUnsignedInt16Arr(res, length);
+  *ptr += length * 2;
+
+  return res;
+}
+
+int32_t* BinaryObjectParser::ParseIntArr(uint8_t** ptr, int length) const {
+  int32_t* res = *reinterpret_cast<int32_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt32Arr(res, length);
+  *ptr += length * 4;
+
+  return res;
+}
+
+int64_t* BinaryObjectParser::ParseLongArr(uint8_t** ptr, int length) const {
+  int64_t* res = *reinterpret_cast<int64_t**>(ptr);
+  byte_swapper_.SwapIfRequiredInt64Arr(res, length);
+  *ptr += length * 8;
+
+  return res;
+}
+
+float* BinaryObjectParser::ParseFloatArr(uint8_t** ptr, int length) const {
+  float* res = *reinterpret_cast<float**>(ptr);
+  byte_swapper_.SwapIfRequiredFloatArr(res, length);
+  *ptr += length * 4;
+
+  return res;
+}
+
+double* BinaryObjectParser::ParseDoubleArr(uint8_t** ptr, int length) const {
+  double* res = *reinterpret_cast<double**>(ptr);
+  byte_swapper_.SwapIfRequiredDoubleArr(res, length);
+  *ptr += length * 8;
+
+  return res;
+}
+
+bool* BinaryObjectParser::ParseBoolArr(uint8_t** ptr, int length) const {
+  bool* res = *reinterpret_cast<bool**>(ptr);
+  *ptr += length;
+
+  return res;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
index 9accbd796f..eb1f856643 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h
@@ -13,16 +13,42 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
+
 #include <vector>
-#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 
 class BinaryObjectParser {
  public:
+  BinaryObjectParser();
   Status Parse(uint8_t** ptr, std::vector<Tensor>* out_tensors,
-               std::vector<int32_t>* types);
+               std::vector<int32_t>* types) const;
+
+ private:
+  uint8_t ParseByte(uint8_t** ptr) const;
+  int16_t ParseShort(uint8_t** ptr) const;
+  uint16_t ParseUnsignedShort(uint8_t** ptr) const;
+  int32_t ParseInt(uint8_t** ptr) const;
+  int64_t ParseLong(uint8_t** ptr) const;
+  float ParseFloat(uint8_t** ptr) const;
+  double ParseDouble(uint8_t** ptr) const;
+  bool ParseBool(uint8_t** ptr) const;
+  string ParseString(uint8_t** ptr) const;
+  uint8_t* ParseByteArr(uint8_t** ptr, int length) const;
+  int16_t* ParseShortArr(uint8_t** ptr, int length) const;
+  uint16_t* ParseUnsignedShortArr(uint8_t** ptr, int length) const;
+  int32_t* ParseIntArr(uint8_t** ptr, int length) const;
+  int64_t* ParseLongArr(uint8_t** ptr, int length) const;
+  float* ParseFloatArr(uint8_t** ptr, int length) const;
+  double* ParseDoubleArr(uint8_t** ptr, int length) const;
+  bool* ParseBoolArr(uint8_t** ptr, int length) const;
+
+  const ByteSwapper byte_swapper_;
 };
 
 enum ObjectType {
@@ -32,7 +58,7 @@ enum ObjectType {
   LONG = 4,
   FLOAT = 5,
   DOUBLE = 6,
-  UCHAR = 7,
+  USHORT = 7,
   BOOL = 8,
   STRING = 9,
   DATE = 11,
@@ -42,7 +68,7 @@ enum ObjectType {
   LONG_ARR = 15,
   FLOAT_ARR = 16,
   DOUBLE_ARR = 17,
-  UCHAR_ARR = 18,
+  USHORT_ARR = 18,
   BOOL_ARR = 19,
   STRING_ARR = 20,
   DATE_ARR = 22,
@@ -51,3 +77,5 @@ enum ObjectType {
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h
index 944b3fe184..508b6e4a60 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_client.h
@@ -16,40 +16,69 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
 #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
 
+#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 
 class Client {
  public:
+  Client(bool big_endian) : byte_swapper_(ByteSwapper(big_endian)){};
   virtual Status Connect() = 0;
   virtual Status Disconnect() = 0;
   virtual bool IsConnected() = 0;
   virtual int GetSocketDescriptor() = 0;
-  virtual Status ReadData(uint8_t* buf, int32_t length) = 0;
-  virtual Status WriteData(uint8_t* buf, int32_t length) = 0;
+  virtual Status ReadData(uint8_t *buf, const int32_t length) = 0;
+  virtual Status WriteData(const uint8_t *buf, const int32_t length) = 0;
 
-  inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); }
+  inline Status ReadByte(uint8_t *data) { return ReadData(data, 1); }
 
-  inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); }
+  inline Status ReadShort(int16_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 2));
+    byte_swapper_.SwapIfRequiredInt16(data);
 
-  inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); }
+    return Status::OK();
+  }
+
+  inline Status ReadInt(int32_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 4));
+    byte_swapper_.SwapIfRequiredInt32(data);
+
+    return Status::OK();
+  }
 
-  inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); }
+  inline Status ReadLong(int64_t *data) {
+    TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 8));
+    byte_swapper_.SwapIfRequiredInt64(data);
 
-  inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); }
+    return Status::OK();
+  }
+
+  inline Status WriteByte(const uint8_t data) { return WriteData(&data, 1); }
 
-  inline Status WriteShort(int16_t data) {
-    return WriteData((uint8_t*)&data, 2);
+  inline Status WriteShort(const int16_t data) {
+    int16_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt16(&tmp);
+    return WriteData((uint8_t *)&tmp, 2);
   }
 
-  inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); }
+  inline Status WriteInt(const int32_t data) {
+    int32_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt32(&tmp);
+    return WriteData((uint8_t *)&tmp, 4);
+  }
 
-  inline Status WriteLong(int64_t data) {
-    return WriteData((uint8_t*)&data, 8);
+  inline Status WriteLong(const int64_t data) {
+    int64_t tmp = data;
+    byte_swapper_.SwapIfRequiredInt64(&tmp);
+    return WriteData((uint8_t *)&tmp, 8);
   }
+
+ private:
+  const ByteSwapper byte_swapper_;
 };
 
 }  // namespace tensorflow
 
-#endif
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
index f25f8a5b18..c4a7d3c513 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc
@@ -13,40 +13,41 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset_iterator.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name,
-                             std::string host, int32 port, bool local,
-                             int32 part, int32 page_size, std::string username,
-                             std::string password, std::string certfile,
-                             std::string keyfile, std::string cert_password,
-                             std::vector<int32> schema,
-                             std::vector<int32> permutation)
+IgniteDataset::IgniteDataset(OpKernelContext* ctx, string cache_name,
+                             string host, int32 port, bool local, int32 part,
+                             int32 page_size, string username, string password,
+                             string certfile, string keyfile,
+                             string cert_password, std::vector<int32> schema,
+                             std::vector<int32> permutation,
+                             DataTypeVector dtypes,
+                             std::vector<PartialTensorShape> shapes)
     : DatasetBase(DatasetContext(ctx)),
-      cache_name_(cache_name),
-      host_(host),
+      cache_name_(std::move(cache_name)),
+      host_(std::move(host)),
       port_(port),
       local_(local),
       part_(part),
       page_size_(page_size),
-      username_(username),
-      password_(password),
-      certfile_(certfile),
-      keyfile_(keyfile),
-      cert_password_(cert_password),
-      schema_(schema),
-      permutation_(permutation) {
-  SchemaToTypes();
-  SchemaToShapes();
-
-  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name
-            << "', host='" << host << "', port=" << port << ", local=" << local
-            << ", part=" << part << ", page_size=" << page_size
-            << ", username='" << username << "', certfile='" << certfile
-            << "', keyfile='" << keyfile + "']";
+      username_(std::move(username)),
+      password_(std::move(password)),
+      certfile_(std::move(certfile)),
+      keyfile_(std::move(keyfile)),
+      cert_password_(std::move(cert_password)),
+      schema_(std::move(schema)),
+      permutation_(std::move(permutation)),
+      dtypes_(dtypes),
+      shapes_(shapes) {
+  LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name_
+            << "', host='" << host_ << "', port=" << port_
+            << ", local=" << local_ << ", part=" << part_
+            << ", page_size=" << page_size_ << ", username='" << username_
+            << "', certfile='" << certfile_ << "', keyfile='"
+            << keyfile_ + "']";
 }
 
 IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
@@ -54,10 +55,12 @@ IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; }
 std::unique_ptr<IteratorBase> IgniteDataset::MakeIteratorInternal(
     const string& prefix) const {
   return std::unique_ptr<IteratorBase>(new IgniteDatasetIterator(
-      {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_,
-      this->cache_name_, this->local_, this->part_, this->page_size_,
-      this->username_, this->password_, this->certfile_, this->keyfile_,
-      this->cert_password_, this->schema_, this->permutation_));
+      {this, strings::StrCat(prefix, "::Ignite")}, std::move(this->host_),
+      this->port_, std::move(this->cache_name_), this->local_, this->part_,
+      this->page_size_, std::move(this->username_), std::move(this->password_),
+      std::move(this->certfile_), std::move(this->keyfile_),
+      std::move(this->cert_password_), std::move(this->schema_),
+      std::move(this->permutation_)));
 }
 
 const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; }
@@ -75,42 +78,4 @@ Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx,
       "IgniteDataset does not support 'AsGraphDefInternal'");
 }
 
-void IgniteDataset::SchemaToTypes() {
-  for (auto e : schema_) {
-    if (e == BYTE || e == BYTE_ARR) {
-      dtypes_.push_back(DT_UINT8);
-    } else if (e == SHORT || e == SHORT_ARR) {
-      dtypes_.push_back(DT_INT16);
-    } else if (e == INT || e == INT_ARR) {
-      dtypes_.push_back(DT_INT32);
-    } else if (e == LONG || e == LONG_ARR) {
-      dtypes_.push_back(DT_INT64);
-    } else if (e == FLOAT || e == FLOAT_ARR) {
-      dtypes_.push_back(DT_FLOAT);
-    } else if (e == DOUBLE || e == DOUBLE_ARR) {
-      dtypes_.push_back(DT_DOUBLE);
-    } else if (e == UCHAR || e == UCHAR_ARR) {
-      dtypes_.push_back(DT_UINT8);
-    } else if (e == BOOL || e == BOOL_ARR) {
-      dtypes_.push_back(DT_BOOL);
-    } else if (e == STRING || e == STRING_ARR) {
-      dtypes_.push_back(DT_STRING);
-    } else {
-      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
-    }
-  }
-}
-
-void IgniteDataset::SchemaToShapes() {
-  for (auto e : schema_) {
-    if (e >= 1 && e < 10) {
-      shapes_.push_back(PartialTensorShape({}));
-    } else if (e >= 12 && e < 21) {
-      shapes_.push_back(PartialTensorShape({-1}));
-    } else {
-      LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]";
-    }
-  }
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
index d3fec5910b..66bfdf2e2a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h
@@ -13,18 +13,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
+
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
 
 class IgniteDataset : public DatasetBase {
  public:
-  IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host,
+  IgniteDataset(OpKernelContext* ctx, string cache_name, string host,
                 int32 port, bool local, int32 part, int32 page_size,
-                std::string username, std::string password,
-                std::string certfile, std::string keyfile,
-                std::string cert_password, std::vector<int32> schema,
-                std::vector<int32> permutation);
+                string username, string password, string certfile,
+                string keyfile, string cert_password, std::vector<int32> schema,
+                std::vector<int32> permutation, DataTypeVector dtypes,
+                std::vector<PartialTensorShape> shapes);
   ~IgniteDataset();
   std::unique_ptr<IteratorBase> MakeIteratorInternal(
       const string& prefix) const override;
@@ -38,25 +41,23 @@ class IgniteDataset : public DatasetBase {
                             Node** output) const override;
 
  private:
-  const std::string cache_name_;
-  const std::string host_;
+  const string cache_name_;
+  const string host_;
   const int32 port_;
   const bool local_;
   const int32 part_;
   const int32 page_size_;
-  const std::string username_;
-  const std::string password_;
-  const std::string certfile_;
-  const std::string keyfile_;
-  const std::string cert_password_;
+  const string username_;
+  const string password_;
+  const string certfile_;
+  const string keyfile_;
+  const string cert_password_;
   const std::vector<int32> schema_;
   const std::vector<int32> permutation_;
-
-  DataTypeVector dtypes_;
-  std::vector<PartialTensorShape> shapes_;
-
-  void SchemaToTypes();
-  void SchemaToShapes();
+  const DataTypeVector dtypes_;
+  const std::vector<PartialTensorShape> shapes_;
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
index 1774585ecd..f68ded5a3a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_dataset_iterator.h"
-
-#include "ignite_plain_client.h"
-#include "ignite_ssl_wrapper.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/logging.h"
 
 #include <time.h>
@@ -25,30 +25,31 @@ limitations under the License.
 namespace tensorflow {
 
 IgniteDatasetIterator::IgniteDatasetIterator(
-    const Params& params, std::string host, int32 port, std::string cache_name,
-    bool local, int32 part, int32 page_size, std::string username,
-    std::string password, std::string certfile, std::string keyfile,
-    std::string cert_password, std::vector<int32> schema,
-    std::vector<int32> permutation)
+    const Params& params, string host, int32 port, string cache_name,
+    bool local, int32 part, int32 page_size, string username, string password,
+    string certfile, string keyfile, string cert_password,
+    std::vector<int32> schema, std::vector<int32> permutation)
     : DatasetIterator<IgniteDataset>(params),
-      cache_name_(cache_name),
+      cache_name_(std::move(cache_name)),
       local_(local),
       part_(part),
       page_size_(page_size),
-      username_(username),
-      password_(password),
-      schema_(schema),
-      permutation_(permutation),
+      username_(std::move(username)),
+      password_(std::move(password)),
+      schema_(std::move(schema)),
+      permutation_(std::move(permutation)),
       remainder_(-1),
       cursor_id_(-1),
-      last_page_(false) {
-  Client* p_client = new PlainClient(host, port);
+      last_page_(false),
+      valid_state_(true) {
+  Client* p_client = new PlainClient(std::move(host), port, false);
 
   if (certfile.empty())
     client_ = std::unique_ptr<Client>(p_client);
   else
-    client_ = std::unique_ptr<Client>(new SslWrapper(
-        std::unique_ptr<Client>(p_client), certfile, keyfile, cert_password));
+    client_ = std::unique_ptr<Client>(
+        new SslWrapper(std::unique_ptr<Client>(p_client), std::move(certfile),
+                       std::move(keyfile), std::move(cert_password), false));
 
   LOG(INFO) << "Ignite Dataset Iterator created";
 }
@@ -60,12 +61,80 @@ IgniteDatasetIterator::~IgniteDatasetIterator() {
   LOG(INFO) << "Ignite Dataset Iterator destroyed";
 }
 
+Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
+                                              std::vector<Tensor>* out_tensors,
+                                              bool* end_of_sequence) {
+  mutex_lock l(mutex_);
+
+  if (valid_state_) {
+    Status status =
+        GetNextInternalWithValidState(ctx, out_tensors, end_of_sequence);
+
+    if (!status.ok()) valid_state_ = false;
+
+    return status;
+  }
+
+  return errors::Unknown("Iterator is invalid");
+}
+
+Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
+  return errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'SaveInternal'");
+}
+
+Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
+                                              IteratorStateReader* reader) {
+  return errors::Unimplemented(
+      "Iterator for IgniteDataset does not support 'RestoreInternal')");
+}
+
+Status IgniteDatasetIterator::GetNextInternalWithValidState(
+    IteratorContext* ctx, std::vector<Tensor>* out_tensors,
+    bool* end_of_sequence) {
+  if (remainder_ == 0 && last_page_) {
+    cursor_id_ = -1;
+    *end_of_sequence = true;
+
+    return Status::OK();
+  } else {
+    TF_RETURN_IF_ERROR(EstablishConnection());
+
+    if (remainder_ == -1) {
+      TF_RETURN_IF_ERROR(ScanQuery());
+    } else if (remainder_ == 0) {
+      TF_RETURN_IF_ERROR(LoadNextPage());
+    }
+
+    uint8_t* initial_ptr = ptr_;
+    std::vector<Tensor> tensors;
+    std::vector<int32_t> types;
+
+    TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types));  // Parse key
+    TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types));  // Parse val
+
+    remainder_ -= (ptr_ - initial_ptr);
+
+    TF_RETURN_IF_ERROR(CheckTypes(types));
+
+    for (size_t i = 0; i < tensors.size(); i++)
+      out_tensors->push_back(tensors[permutation_[i]]);
+
+    *end_of_sequence = false;
+
+    return Status::OK();
+  }
+
+  *end_of_sequence = true;
+
+  return Status::OK();
+}
+
 Status IgniteDatasetIterator::EstablishConnection() {
   if (!client_->IsConnected()) {
-    Status status = client_->Connect();
-    if (!status.ok()) return status;
+    TF_RETURN_IF_ERROR(client_->Connect());
 
-    status = Handshake();
+    Status status = Handshake();
     if (!status.ok()) {
       Status disconnect_status = client_->Disconnect();
       if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString();
@@ -79,19 +148,17 @@ Status IgniteDatasetIterator::EstablishConnection() {
 
 Status IgniteDatasetIterator::CloseConnection() {
   if (cursor_id_ != -1 && !last_page_) {
-    Status conn_status = EstablishConnection();
-    if (!conn_status.ok()) return conn_status;
+    TF_RETURN_IF_ERROR(EstablishConnection());
 
-    TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
-    TF_RETURN_IF_ERROR(
-        client_->WriteShort(close_connection_opcode));   // Operation code
+    TF_RETURN_IF_ERROR(client_->WriteInt(kCloseConnectionReqLength));
+    TF_RETURN_IF_ERROR(client_->WriteShort(kCloseConnectionOpcode));
     TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
     TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Resource ID
 
     int32_t res_len;
     TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
-    if (res_len < 12)
-      return errors::Internal("Close Resource Response is corrupted");
+    if (res_len < kMinResLength)
+      return errors::Unknown("Close Resource Response is corrupted");
 
     int64_t req_id;
     TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -100,22 +167,21 @@ Status IgniteDatasetIterator::CloseConnection() {
     if (status != 0) {
       uint8_t err_msg_header;
       TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
-      if (err_msg_header == string_val) {
+      if (err_msg_header == kStringVal) {
         int32_t err_msg_length;
         TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
+
         uint8_t* err_msg_c = new uint8_t[err_msg_length];
+        auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
         TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-        std::string err_msg((char*)err_msg_c, err_msg_length);
-        delete[] err_msg_c;
+        string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-        return errors::Internal("Close Resource Error [status=", status,
-                                ", message=", err_msg, "]");
+        return errors::Unknown("Close Resource Error [status=", status,
+                               ", message=", err_msg, "]");
       }
-      return errors::Internal("Close Resource Error [status=", status, "]");
+      return errors::Unknown("Close Resource Error [status=", status, "]");
     }
 
-    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
-
     cursor_id_ = -1;
 
     return client_->Disconnect();
@@ -126,94 +192,43 @@ Status IgniteDatasetIterator::CloseConnection() {
   return client_->IsConnected() ? client_->Disconnect() : Status::OK();
 }
 
-Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx,
-                                              std::vector<Tensor>* out_tensors,
-                                              bool* end_of_sequence) {
-  if (remainder_ == 0 && last_page_) {
-    LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed";
-
-    cursor_id_ = -1;
-    *end_of_sequence = true;
-    return Status::OK();
-  } else {
-    Status status = EstablishConnection();
-    if (!status.ok()) return status;
-
-    if (remainder_ == -1 || remainder_ == 0) {
-      Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage();
-      if (!status.ok()) return status;
-    }
-
-    uint8_t* initial_ptr = ptr_;
-    std::vector<int32_t> types;
-    std::vector<Tensor> tensors;
-
-    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse key
-    if (!status.ok()) return status;
-
-    status = parser_.Parse(&ptr_, &tensors, &types);  // Parse val
-    if (!status.ok()) return status;
-
-    remainder_ -= (ptr_ - initial_ptr);
-
-    out_tensors->resize(tensors.size());
-    for (int32_t i = 0; i < tensors.size(); i++)
-      (*out_tensors)[permutation_[i]] = std::move(tensors[i]);
-
-    *end_of_sequence = false;
-    return Status::OK();
-  }
-
-  *end_of_sequence = true;
-  return Status::OK();
-}
-
-Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) {
-  return errors::Unimplemented(
-      "Iterator for IgniteDataset does not support 'SaveInternal'");
-}
-
-Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx,
-                                              IteratorStateReader* reader) {
-  return errors::Unimplemented(
-      "Iterator for IgniteDataset does not support 'RestoreInternal')");
-}
-
 Status IgniteDatasetIterator::Handshake() {
-  int32_t msg_len = 8;
+  int32_t msg_len = kHandshakeReqDefaultLength;
 
   if (username_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + username_.length();
+    msg_len += 5 + username_.length();  // 1 byte header, 4 bytes length.
 
   if (password_.empty())
     msg_len += 1;
   else
-    msg_len += 5 + password_.length();
+    msg_len += 5 + password_.length();  // 1 byte header, 4 bytes length.
 
   TF_RETURN_IF_ERROR(client_->WriteInt(msg_len));
   TF_RETURN_IF_ERROR(client_->WriteByte(1));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version));
-  TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMajorVersion));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMinorVersion));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolPatchVersion));
   TF_RETURN_IF_ERROR(client_->WriteByte(2));
   if (username_.empty()) {
-    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));
   } else {
-    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal));
     TF_RETURN_IF_ERROR(client_->WriteInt(username_.length()));
     TF_RETURN_IF_ERROR(
-        client_->WriteData((uint8_t*)username_.c_str(), username_.length()));
+        client_->WriteData(reinterpret_cast<const uint8_t*>(username_.c_str()),
+                           username_.length()));
   }
 
   if (password_.empty()) {
-    TF_RETURN_IF_ERROR(client_->WriteByte(null_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));
   } else {
-    TF_RETURN_IF_ERROR(client_->WriteByte(string_val));
+    TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal));
     TF_RETURN_IF_ERROR(client_->WriteInt(password_.length()));
     TF_RETURN_IF_ERROR(
-        client_->WriteData((uint8_t*)password_.c_str(), password_.length()));
+        client_->WriteData(reinterpret_cast<const uint8_t*>(password_.c_str()),
+                           password_.length()));
   }
 
   int32_t handshake_res_len;
@@ -221,9 +236,6 @@ Status IgniteDatasetIterator::Handshake() {
   uint8_t handshake_res;
   TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res));
 
-  LOG(INFO) << "Handshake length " << handshake_res_len << ", res "
-            << (int16_t)handshake_res;
-
   if (handshake_res != 1) {
     int16_t serv_ver_major;
     TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major));
@@ -234,26 +246,26 @@ Status IgniteDatasetIterator::Handshake() {
     uint8_t header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&header));
 
-    if (header == string_val) {
+    if (header == kStringVal) {
       int32_t length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&length));
+
       uint8_t* err_msg_c = new uint8_t[length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length));
-      std::string err_msg((char*)err_msg_c, length);
-      delete[] err_msg_c;
-
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, ", message='", err_msg,
-                              "']");
-    } else if (header == null_val) {
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, "]");
+      string err_msg(reinterpret_cast<char*>(err_msg_c), length);
+
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, ", message='", err_msg, "']");
+    } else if (header == kNullVal) {
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, "]");
     } else {
-      return errors::Internal("Handshake Error [result=", handshake_res,
-                              ", version=", serv_ver_major, ".", serv_ver_minor,
-                              ".", serv_ver_patch, "]");
+      return errors::Unknown("Handshake Error [result=", handshake_res,
+                             ", version=", serv_ver_major, ".", serv_ver_minor,
+                             ".", serv_ver_patch, "]");
     }
   }
 
@@ -261,31 +273,26 @@ Status IgniteDatasetIterator::Handshake() {
 }
 
 Status IgniteDatasetIterator::ScanQuery() {
-  TF_RETURN_IF_ERROR(client_->WriteInt(25));                   // Message length
-  TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode));  // Operation code
-  TF_RETURN_IF_ERROR(client_->WriteLong(0));                   // Request ID
+  TF_RETURN_IF_ERROR(client_->WriteInt(kScanQueryReqLength));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kScanQueryOpcode));
+  TF_RETURN_IF_ERROR(client_->WriteLong(0));  // Request ID
   TF_RETURN_IF_ERROR(
       client_->WriteInt(JavaHashCode(cache_name_)));  // Cache name
   TF_RETURN_IF_ERROR(client_->WriteByte(0));          // Flags
-  TF_RETURN_IF_ERROR(client_->WriteByte(null_val));   // Filter object
+  TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal));   // Filter object
   TF_RETURN_IF_ERROR(client_->WriteInt(page_size_));  // Cursor page size
   TF_RETURN_IF_ERROR(client_->WriteInt(part_));       // part_ition to query
   TF_RETURN_IF_ERROR(client_->WriteByte(local_));     // local_ flag
 
-  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                           std::chrono::system_clock::now().time_since_epoch())
-                           .count();
-
+  uint64 wait_start = Env::Default()->NowMicros();
   int32_t res_len;
   TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
+  int64_t wait_stop = Env::Default()->NowMicros();
 
-  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                          std::chrono::system_clock::now().time_since_epoch())
-                          .count();
+  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) / 1000 << " ms";
 
-  LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms";
-
-  if (res_len < 12) return errors::Internal("Scan Query Response is corrupted");
+  if (res_len < kMinResLength)
+    return errors::Unknown("Scan Query Response is corrupted");
 
   int64_t req_id;
   TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -297,78 +304,47 @@ Status IgniteDatasetIterator::ScanQuery() {
     uint8_t err_msg_header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
-    if (err_msg_header == string_val) {
+    if (err_msg_header == kStringVal) {
       int32_t err_msg_length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-      std::string err_msg((char*)err_msg_c, err_msg_length);
-      delete[] err_msg_c;
+      string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-      return errors::Internal("Scan Query Error [status=", status, ", message=",
-                              err_msg, "]");
+      return errors::Unknown("Scan Query Error [status=", status, ", message=",
+                             err_msg, "]");
     }
-    return errors::Internal("Scan Query Error [status=", status, "]");
+    return errors::Unknown("Scan Query Error [status=", status, "]");
   }
 
   TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_));
 
-  LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened";
-
   int32_t row_cnt;
   TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder_ = res_len - 25;
-  page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
-  ptr_ = page_.get();
-
-  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                      std::chrono::system_clock::now().time_since_epoch())
-                      .count();
-
-  TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
-
-  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                     std::chrono::system_clock::now().time_since_epoch())
-                     .count();
-  ;
-
-  double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
-  double time_in_s = 1.0 * (stop - start) / 1000;
-  LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
-            << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
-
-  uint8_t last_page_b;
-  TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b));
-
-  last_page_ = !last_page_b;
+  int32_t page_size = res_len - kScanQueryResHeaderLength;
 
-  return Status::OK();
+  return ReceivePage(page_size);
 }
 
 Status IgniteDatasetIterator::LoadNextPage() {
-  TF_RETURN_IF_ERROR(client_->WriteInt(18));  // Message length
-  TF_RETURN_IF_ERROR(
-      client_->WriteShort(load_next_page_opcode));     // Operation code
+  TF_RETURN_IF_ERROR(client_->WriteInt(kLoadNextPageReqLength));
+  TF_RETURN_IF_ERROR(client_->WriteShort(kLoadNextPageOpcode));
   TF_RETURN_IF_ERROR(client_->WriteLong(0));           // Request ID
   TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_));  // Cursor ID
 
-  int64_t wait_start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                           std::chrono::system_clock::now().time_since_epoch())
-                           .count();
-
+  uint64 wait_start = Env::Default()->NowMicros();
   int32_t res_len;
   TF_RETURN_IF_ERROR(client_->ReadInt(&res_len));
+  uint64 wait_stop = Env::Default()->NowMicros();
 
-  int64_t wait_stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                          std::chrono::system_clock::now().time_since_epoch())
-                          .count();
+  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) / 1000
+            << " ms";
 
-  LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms";
-
-  if (res_len < 12)
-    return errors::Internal("Load Next Page Response is corrupted");
+  if (res_len < kMinResLength)
+    return errors::Unknown("Load Next Page Response is corrupted");
 
   int64_t req_id;
   TF_RETURN_IF_ERROR(client_->ReadLong(&req_id));
@@ -380,41 +356,40 @@ Status IgniteDatasetIterator::LoadNextPage() {
     uint8_t err_msg_header;
     TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header));
 
-    if (err_msg_header == string_val) {
+    if (err_msg_header == kStringVal) {
       int32_t err_msg_length;
       TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length));
 
       uint8_t* err_msg_c = new uint8_t[err_msg_length];
+      auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; });
       TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length));
-      std::string err_msg((char*)err_msg_c, err_msg_length);
-      delete[] err_msg_c;
+      string err_msg(reinterpret_cast<char*>(err_msg_c), err_msg_length);
 
-      return errors::Internal("Load Next Page Error [status=", status,
-                              ", message=", err_msg, "]");
+      return errors::Unknown("Load Next Page Error [status=", status,
+                             ", message=", err_msg, "]");
     }
-    return errors::Internal("Load Next Page Error [status=", status, "]");
+    return errors::Unknown("Load Next Page Error [status=", status, "]");
   }
 
   int32_t row_cnt;
   TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt));
 
-  remainder_ = res_len - 17;
+  int32_t page_size = res_len - kLoadNextPageResHeaderLength;
+
+  return ReceivePage(page_size);
+}
+
+Status IgniteDatasetIterator::ReceivePage(int32_t page_size) {
+  remainder_ = page_size;
   page_ = std::unique_ptr<uint8_t>(new uint8_t[remainder_]);
   ptr_ = page_.get();
 
-  int64_t start = std::chrono::duration_cast<std::chrono::milliseconds>(
-                      std::chrono::system_clock::now().time_since_epoch())
-                      .count();
-
+  uint64 start = Env::Default()->NowMicros();
   TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_));
-
-  int64_t stop = std::chrono::duration_cast<std::chrono::milliseconds>(
-                     std::chrono::system_clock::now().time_since_epoch())
-                     .count();
-  ;
+  uint64 stop = Env::Default()->NowMicros();
 
   double size_in_mb = 1.0 * remainder_ / 1024 / 1024;
-  double time_in_s = 1.0 * (stop - start) / 1000;
+  double time_in_s = 1.0 * (stop - start) / 1000 / 1000;
   LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000
             << " ms download speed " << size_in_mb / time_in_s << " Mb/sec";
 
@@ -426,7 +401,19 @@ Status IgniteDatasetIterator::LoadNextPage() {
   return Status::OK();
 }
 
-int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const {
+Status IgniteDatasetIterator::CheckTypes(const std::vector<int32_t>& types) {
+  if (schema_.size() != types.size())
+    return errors::Unknown("Object has unexpected schema");
+
+  for (size_t i = 0; i < schema_.size(); i++) {
+    if (schema_[i] != types[permutation_[i]])
+      return errors::Unknown("Object has unexpected schema");
+  }
+
+  return Status::OK();
+}
+
+int32_t IgniteDatasetIterator::JavaHashCode(string str) const {
   int32_t h = 0;
   for (char& c : str) {
     h = 31 * h + c;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
index 5858dbfcb9..c499e2c9cc 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h
@@ -13,19 +13,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_binary_object_parser.h"
-#include "ignite_client.h"
-#include "ignite_dataset.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
+
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
 
 class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
  public:
-  IgniteDatasetIterator(const Params& params, std::string host, int32 port,
-                        std::string cache_name, bool local, int32 part,
-                        int32 page_size, std::string username,
-                        std::string password, std::string certfile,
-                        std::string keyfile, std::string cert_password,
+  IgniteDatasetIterator(const Params& params, string host, int32 port,
+                        string cache_name, bool local, int32 part,
+                        int32 page_size, string username, string password,
+                        string certfile, string keyfile, string cert_password,
                         std::vector<int32> schema,
                         std::vector<int32> permutation);
   ~IgniteDatasetIterator();
@@ -38,15 +41,28 @@ class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
                          IteratorStateReader* reader) override;
 
  private:
+  Status GetNextInternalWithValidState(IteratorContext* ctx,
+                                       std::vector<Tensor>* out_tensors,
+                                       bool* end_of_sequence);
+
+  Status EstablishConnection();
+  Status CloseConnection();
+  Status Handshake();
+  Status ScanQuery();
+  Status LoadNextPage();
+  Status ReceivePage(int32_t page_size);
+  Status CheckTypes(const std::vector<int32_t>& types);
+  int32_t JavaHashCode(string str) const;
+
   std::unique_ptr<Client> client_;
   BinaryObjectParser parser_;
 
-  const std::string cache_name_;
+  const string cache_name_;
   const bool local_;
   const int32 part_;
   const int32 page_size_;
-  const std::string username_;
-  const std::string password_;
+  const string username_;
+  const string password_;
   const std::vector<int32> schema_;
   const std::vector<int32> permutation_;
 
@@ -54,24 +70,30 @@ class IgniteDatasetIterator : public DatasetIterator<IgniteDataset> {
   int64_t cursor_id_;
   bool last_page_;
 
+  bool valid_state_;
+
+  mutex mutex_;
+
   std::unique_ptr<uint8_t> page_;
   uint8_t* ptr_;
-
-  Status EstablishConnection();
-  Status CloseConnection();
-  Status Handshake();
-  Status ScanQuery();
-  Status LoadNextPage();
-  int32_t JavaHashCode(std::string str) const;
 };
 
-constexpr uint8_t null_val = 101;
-constexpr uint8_t string_val = 9;
-constexpr uint8_t protocol_major_version = 1;
-constexpr uint8_t protocol_minor_version = 1;
-constexpr uint8_t protocol_patch_version = 0;
-constexpr int16_t scan_query_opcode = 2000;
-constexpr int16_t load_next_page_opcode = 2001;
-constexpr int16_t close_connection_opcode = 0;
+constexpr uint8_t kNullVal = 101;
+constexpr uint8_t kStringVal = 9;
+constexpr uint8_t kProtocolMajorVersion = 1;
+constexpr uint8_t kProtocolMinorVersion = 1;
+constexpr uint8_t kProtocolPatchVersion = 0;
+constexpr int16_t kScanQueryOpcode = 2000;
+constexpr int16_t kLoadNextPageOpcode = 2001;
+constexpr int16_t kCloseConnectionOpcode = 0;
+constexpr int32_t kScanQueryReqLength = 25;
+constexpr int32_t kScanQueryResHeaderLength = 25;
+constexpr int32_t kLoadNextPageReqLength = 18;
+constexpr int32_t kLoadNextPageResHeaderLength = 17;
+constexpr int32_t kCloseConnectionReqLength = 18;
+constexpr int32_t kHandshakeReqDefaultLength = 8;
+constexpr int32_t kMinResLength = 12;
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index d03404a460..eeb29ef30b 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,29 +13,73 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include <stdlib.h>
-#include "ignite_dataset.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
 namespace {
 
+Status SchemaToTypes(const std::vector<int32>& schema, DataTypeVector* dtypes) {
+  for (auto e : schema) {
+    if (e == BYTE || e == BYTE_ARR) {
+      dtypes->push_back(DT_UINT8);
+    } else if (e == SHORT || e == SHORT_ARR) {
+      dtypes->push_back(DT_INT16);
+    } else if (e == INT || e == INT_ARR) {
+      dtypes->push_back(DT_INT32);
+    } else if (e == LONG || e == LONG_ARR) {
+      dtypes->push_back(DT_INT64);
+    } else if (e == FLOAT || e == FLOAT_ARR) {
+      dtypes->push_back(DT_FLOAT);
+    } else if (e == DOUBLE || e == DOUBLE_ARR) {
+      dtypes->push_back(DT_DOUBLE);
+    } else if (e == USHORT || e == USHORT_ARR) {
+      dtypes->push_back(DT_UINT8);
+    } else if (e == BOOL || e == BOOL_ARR) {
+      dtypes->push_back(DT_BOOL);
+    } else if (e == STRING || e == STRING_ARR) {
+      dtypes->push_back(DT_STRING);
+    } else {
+      return errors::Unknown("Unexpected type in schema [type_id=", e, "]");
+    }
+  }
+
+  return Status::OK();
+}
+
+Status SchemaToShapes(const std::vector<int32>& schema,
+                      std::vector<PartialTensorShape>* shapes) {
+  for (auto e : schema) {
+    if (e >= 1 && e < 10) {
+      shapes->push_back(PartialTensorShape({}));
+    } else if (e >= 12 && e < 21) {
+      shapes->push_back(PartialTensorShape({-1}));
+    } else {
+      return errors::Unknown("Unexpected type in schema [type_id=", e, "]");
+    }
+  }
+
+  return Status::OK();
+}
+
 class IgniteDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
-    std::string cache_name = "";
-    std::string host = "";
+    string cache_name = "";
+    string host = "";
     int32 port = -1;
     bool local = false;
     int32 part = -1;
     int32 page_size = -1;
-    std::string username = "";
-    std::string password = "";
-    std::string certfile = "";
-    std::string keyfile = "";
-    std::string cert_password = "";
+    string username = "";
+    string password = "";
+    string certfile = "";
+    string keyfile = "";
+    string cert_password = "";
 
     const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME");
     const char* env_host = std::getenv("IGNITE_DATASET_HOST");
@@ -50,15 +94,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
     const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD");
 
     if (env_cache_name)
-      cache_name = std::string(env_cache_name);
+      cache_name = string(env_cache_name);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cache_name",
-                                                           &cache_name));
+      OP_REQUIRES_OK(
+          ctx, ParseScalarArgument<string>(ctx, "cache_name", &cache_name));
 
     if (env_host)
-      host = std::string(env_host);
+      host = string(env_host);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "host", &host));
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "host", &host));
 
     if (env_port)
       port = atoi(env_port);
@@ -82,34 +126,34 @@ class IgniteDatasetOp : public DatasetOpKernel {
                      ParseScalarArgument<int32>(ctx, "page_size", &page_size));
 
     if (env_username)
-      username = std::string(env_username);
+      username = string(env_username);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "username", &username));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "username", &username));
 
     if (env_password)
-      password = std::string(env_password);
+      password = string(env_password);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "password", &password));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "password", &password));
 
     if (env_certfile)
-      certfile = std::string(env_certfile);
+      certfile = string(env_certfile);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "certfile", &certfile));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "certfile", &certfile));
 
     if (env_keyfile)
-      keyfile = std::string(env_keyfile);
+      keyfile = string(env_keyfile);
     else
-      OP_REQUIRES_OK(
-          ctx, ParseScalarArgument<std::string>(ctx, "keyfile", &keyfile));
+      OP_REQUIRES_OK(ctx,
+                     ParseScalarArgument<string>(ctx, "keyfile", &keyfile));
 
     if (env_cert_password)
-      cert_password = std::string(env_cert_password);
+      cert_password = string(env_cert_password);
     else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<std::string>(ctx, "cert_password",
-                                                           &cert_password));
+      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "cert_password",
+                                                      &cert_password));
 
     const Tensor* schema_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
@@ -124,19 +168,28 @@ class IgniteDatasetOp : public DatasetOpKernel {
 
     const Tensor* permutation_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor));
-    OP_REQUIRES(ctx, schema_tensor->dims() == 1,
+    OP_REQUIRES(ctx, permutation_tensor->dims() == 1,
                 errors::InvalidArgument("`permutation` must be a vector."));
 
     std::vector<int32> permutation;
-    permutation.reserve(permutation_tensor->NumElements());
+    permutation.resize(permutation_tensor->NumElements());
     for (int i = 0; i < permutation_tensor->NumElements(); i++) {
-      permutation.push_back(permutation_tensor->flat<int32>()(i));
+      // Inversed permutation.
+      permutation[permutation_tensor->flat<int32>()(i)] = i;
     }
 
-    *output =
-        new IgniteDataset(ctx, cache_name, host, port, local, part, page_size,
-                          username, password, certfile, keyfile, cert_password,
-                          std::move(schema), std::move(permutation));
+    DataTypeVector dtypes;
+    std::vector<PartialTensorShape> shapes;
+
+    OP_REQUIRES_OK(ctx, SchemaToTypes(schema, &dtypes));
+    OP_REQUIRES_OK(ctx, SchemaToShapes(schema, &shapes));
+
+    *output = new IgniteDataset(
+        ctx, std::move(cache_name), std::move(host), port, local, part,
+        page_size, std::move(username), std::move(password),
+        std::move(certfile), std::move(keyfile), std::move(cert_password),
+        std::move(schema), std::move(permutation), std::move(dtypes),
+        std::move(shapes));
   }
 };
 
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 6f417a3cb5..750ebe605a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -13,28 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_client.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
 
-#include <string>
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
 
 namespace tensorflow {
 
 class PlainClient : public Client {
  public:
-  PlainClient(std::string host, int port);
+  PlainClient(string host, int port, bool big_endian);
   ~PlainClient();
 
   virtual Status Connect();
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, int32_t length);
-  virtual Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length);
+  virtual Status WriteData(const uint8_t* buf, const int32_t length);
 
  private:
-  const std::string host_;
+  const string host_;
   const int port_;
   int sock_;
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
index a4c58a9563..e16c92307d 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -31,8 +31,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-PlainClient::PlainClient(std::string host, int port)
-    : host_(host), port_(port), sock_(-1) {}
+PlainClient::PlainClient(string host, int port, bool big_endian)
+    : Client(big_endian), host_(std::move(host)), port_(port), sock_(-1) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
@@ -87,7 +87,7 @@ bool PlainClient::IsConnected() { return sock_ != -1; }
 
 int PlainClient::GetSocketDescriptor() { return sock_; }
 
-Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t* buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -95,7 +95,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
 
     if (res < 0)
       return errors::Internal("Error occured while reading from socket: ", res,
-                              ", ", std::string(strerror(errno)));
+                              ", ", string(strerror(errno)));
 
     if (res == 0) return errors::Internal("Server closed connection");
 
@@ -106,7 +106,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) {
   return Status::OK();
 }
 
-Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
+Status PlainClient::WriteData(const uint8_t* buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
@@ -114,7 +114,7 @@ Status PlainClient::WriteData(uint8_t* buf, int32_t length) {
 
     if (res < 0)
       return errors::Internal("Error occured while writing into socket: ", res,
-                              ", ", std::string(strerror(errno)));
+                              ", ", string(strerror(errno)));
 
     sent += res;
     buf += res;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 8182fde6d9..9cd08a7779 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_plain_client.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h"
 
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
@@ -29,8 +29,11 @@ limitations under the License.
 
 namespace tensorflow {
 
-PlainClient::PlainClient(std::string host, int port)
-    : host_(host), port_(port), sock_(INVALID_SOCKET) {}
+PlainClient::PlainClient(string host, int port, bool big_endian)
+    : Client(big_endian),
+      host_(std::move(host)),
+      port_(port),
+      sock_(INVALID_SOCKET) {}
 
 PlainClient::~PlainClient() {
   if (IsConnected()) {
@@ -55,6 +58,8 @@ Status PlainClient::Connect() {
                     &result);
   if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
+  auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); });
+
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
     sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
     if (sock_ == INVALID_SOCKET) {
@@ -72,8 +77,6 @@ Status PlainClient::Connect() {
     break;
   }
 
-  freeaddrinfo(result);
-
   if (sock_ == INVALID_SOCKET) {
     WSACleanup();
     return errors::Internal("Unable to connect to server");
@@ -99,7 +102,7 @@ bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; }
 
 int PlainClient::GetSocketDescriptor() { return sock_; }
 
-Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
+Status PlainClient::ReadData(uint8_t *buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -117,7 +120,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) {
   return Status::OK();
 }
 
-Status PlainClient::WriteData(uint8_t *buf, int32_t length) {
+Status PlainClient::WriteData(const uint8_t *buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
index a2bc6b9609..28db509eaa 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_ssl_wrapper.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h"
 
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
@@ -29,13 +29,15 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) {
   return (strlen(buf));
 }
 
-SslWrapper::SslWrapper(std::shared_ptr<Client> client, std::string certfile,
-                       std::string keyfile, std::string cert_password)
-    : client_(client),
-      certfile_(certfile),
-      keyfile_(keyfile),
-      cert_password_(cert_password),
-      ctx_(NULL) {}
+SslWrapper::SslWrapper(std::shared_ptr<Client> client, string certfile,
+                       string keyfile, string cert_password, bool big_endian)
+    : Client(big_endian),
+      client_(client),
+      certfile_(std::move(certfile)),
+      keyfile_(std::move(keyfile)),
+      cert_password_(std::move(cert_password)),
+      ctx_(nullptr),
+      ssl_(nullptr) {}
 
 SslWrapper::~SslWrapper() {
   if (IsConnected()) {
@@ -43,9 +45,14 @@ SslWrapper::~SslWrapper() {
     if (!status.ok()) LOG(WARNING) << status.ToString();
   }
 
-  if (ctx_ != NULL) {
+  if (ctx_ != nullptr) {
     SSL_CTX_free(ctx_);
-    ctx_ = NULL;
+    ctx_ = nullptr;
+  }
+
+  if (ssl_ != nullptr) {
+    SSL_free(ssl_);
+    ssl_ = nullptr;
   }
 }
 
@@ -63,7 +70,7 @@ Status SslWrapper::InitSslContext() {
     return errors::Internal("Couldn't load cetificate chain (file '", certfile_,
                             "')");
 
-  std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
+  string private_key_file = keyfile_.empty() ? certfile_ : keyfile_;
   if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(),
                                   SSL_FILETYPE_PEM) != 1)
     return errors::Internal("Couldn't load private key (file '",
@@ -94,6 +101,7 @@ Status SslWrapper::Connect() {
 
 Status SslWrapper::Disconnect() {
   SSL_free(ssl_);
+  ssl_ = nullptr;
 
   LOG(INFO) << "SSL connection closed";
 
@@ -104,7 +112,7 @@ bool SslWrapper::IsConnected() { return client_->IsConnected(); }
 
 int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); }
 
-Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
+Status SslWrapper::ReadData(uint8_t *buf, const int32_t length) {
   int recieved = 0;
 
   while (recieved < length) {
@@ -123,7 +131,7 @@ Status SslWrapper::ReadData(uint8_t *buf, int32_t length) {
   return Status::OK();
 }
 
-Status SslWrapper::WriteData(uint8_t *buf, int32_t length) {
+Status SslWrapper::WriteData(const uint8_t *buf, const int32_t length) {
   int sent = 0;
 
   while (sent < length) {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index bbba6cc181..d59ce91aba 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -13,35 +13,39 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "ignite_client.h"
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
+
+#include "tensorflow/contrib/ignite/kernels/ignite_client.h"
 
 #include <openssl/ssl.h>
-#include <string>
 
 namespace tensorflow {
 
 class SslWrapper : public Client {
  public:
-  SslWrapper(std::shared_ptr<Client> client, std::string certfile,
-             std::string keyfile, std::string cert_password);
+  SslWrapper(std::shared_ptr<Client> client, string certfile, string keyfile,
+             string cert_password, bool big_endian);
   ~SslWrapper();
 
   virtual Status Connect();
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, int32_t length);
-  virtual Status WriteData(uint8_t* buf, int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length);
+  virtual Status WriteData(const uint8_t* buf, const int32_t length);
 
  private:
+  Status InitSslContext();
+
   std::shared_ptr<Client> client_;
-  std::string certfile_;
-  std::string keyfile_;
-  std::string cert_password_;
+  string certfile_;
+  string keyfile_;
+  string cert_password_;
   SSL_CTX* ctx_;
   SSL* ssl_;
-
-  Status InitSslContext();
 };
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_
\ No newline at end of file
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index fb16b290b1..7d18df11aa 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -37,6 +37,8 @@ REGISTER_OP("IgniteDataset")
     .SetIsStateful()
     .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
+IgniteDataset that allows to get data from Apache Ignite.
+
 Apache Ignite is a memory-centric distributed database, caching, and processing
 platform for transactional, analytical, and streaming workloads, delivering 
 in-memory speeds at petabyte scale. This contrib package contains an 
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index 60003ca3b7..c0e24b1c69 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -41,19 +41,19 @@ class Readable():
 
   def read_byte(self):
     """Reads and returnes byte."""
-    return self.__read("b", 1)
+    return self._read("b", 1)
 
   def read_short(self):
     """Reads and returns short (2 bytes, little-endian)."""
-    return self.__read("h", 2)
+    return self._read("h", 2)
 
   def read_int(self):
     """Reads and returns int (4 bytes, little-endian)."""
-    return self.__read("i", 4)
+    return self._read("i", 4)
 
   def read_long(self):
     """Reads and returns long (8 bytes, little-endian)."""
-    return self.__read("q", 8)
+    return self._read("q", 8)
 
   def skip(self, length):
     """Skips the specified number of bytes."""
@@ -64,7 +64,7 @@ class Readable():
     """Reads the specified number of bytes and returns them as a buffer."""
     return None
 
-  def __read(self, data_type, length):
+  def _read(self, data_type, length):
     """Reads, unpacks and returns specified type (little-endian)."""
     data_buffer = self.read_data(length)
     return struct.unpack("<" + data_type, data_buffer)[0]
@@ -116,10 +116,10 @@ class TcpClient(Readable):
       self.sock = context.wrap_socket(self.sock)
     else:
       if keyfile is not None:
-        raise Exception("SSL is disabled, keyfile must not be specified \
+        raise RuntimeError("SSL is disabled, keyfile must not be specified \
           (to enable SSL specify certfile)")
       if password is not None:
-        raise Exception("SSL is disabled, password must not be specified \
+        raise RuntimeError("SSL is disabled, password must not be specified \
           (to enable SSL specify certfile)")
 
     self.host = host
@@ -136,19 +136,19 @@ class TcpClient(Readable):
 
   def write_byte(self, v):
     """Writes the specified byte."""
-    self.__write(v, "b")
+    self._write(v, "b")
 
   def write_short(self, v):
     """Writes the specified short (2 bytes, little-endian)."""
-    self.__write(v, "h")
+    self._write(v, "h")
 
   def write_int(self, v):
     """Writes the specified short (4 bytes, little-endian)."""
-    self.__write(v, "i")
+    self._write(v, "i")
 
   def write_long(self, v):
     """Writes the specified int (8 bytes, little-endian)."""
-    self.__write(v, "q")
+    self._write(v, "q")
 
   def write_string(self, v):
     """Writes the specified string."""
@@ -167,7 +167,7 @@ class TcpClient(Readable):
         data_buffer += buf
     return data_buffer
 
-  def __write(self, value, data_type):
+  def _write(self, value, data_type):
     """Packs and writes data using the specified type (little-endian)."""
     data_buffer = struct.pack("<" + data_type, value)
     self.sock.sendall(data_buffer)
@@ -193,6 +193,7 @@ class BinaryField():
 # Binary types defined in Apache Ignite Thin client and supported by
 # TensorFlow on Apache Ignite, see
 # https://apacheignite.readme.io/v2.6/docs/binary-client-protocol.
+# True means that type is a vector, False means type is scalar.
 types = {
     1: (dtypes.uint8, False),
     2: (dtypes.int16, False),
@@ -248,13 +249,13 @@ class TypeTreeNode():
        dataset.
     """
     if self.fields is None:
-      object_type = types[self.type_id]
-      if object_type is not None:
+      if self.type_id in types:
+        object_type = types[self.type_id]
         is_array = object_type[1]
         if is_array:
           return tensor_shape.TensorShape([None])
         return tensor_shape.TensorShape([])
-      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+      raise ValueError("Unsupported type [type_id=%d]" % self.type_id)
     output_shapes = {}
     for field in self.fields:
       output_shapes[field.name] = field.to_output_shapes()
@@ -265,10 +266,10 @@ class TypeTreeNode():
        dataset.
     """
     if self.fields is None:
-      object_type = types[self.type_id]
-      if object_type is not None:
+      if self.type_id in types:
+        object_type = types[self.type_id]
         return object_type[0]
-      raise Exception("Unsupported type [type_id=%d]" % self.type_id)
+      raise ValueError("Unsupported type [type_id=%d]" % self.type_id)
     else:
       output_types = {}
       for field in self.fields:
@@ -276,11 +277,11 @@ class TypeTreeNode():
       return output_types
 
   def to_flat(self):
-    """Returns a list of leaf node types."""
+    """Returns a list of node types."""
     return self.to_flat_rec([])
 
   def to_permutation(self):
-    """Returns a permutation that should be applied to order object leafs."""
+    """Returns a permutation that should be applied to order object leaves."""
     correct_order_dict = {}
     self.traversal_rec(correct_order_dict, 0)
     object_order = []
@@ -288,9 +289,10 @@ class TypeTreeNode():
     return [correct_order_dict[o] for o in object_order]
 
   def to_flat_rec(self, flat):
-    """Formats a list of leaf node types."""
-    flat.append(self.type_id)
-    if self.fields is not None:
+    """Formats a list of leaf node types in pre-order."""
+    if self.fields is None:
+      flat.append(self.type_id)
+    else:
       for field in self.fields:
         field.to_flat_rec(flat)
     return flat
@@ -320,8 +322,8 @@ class IgniteClient(TcpClient):
      have the same structure (homogeneous objects) and the cache contains at
      least one object.
   """
-  def __init__(self, host, port, username=None, password=None, certfile=None,\
-    keyfile=None, cert_password=None):
+  def __init__(self, host, port, username=None, password=None, certfile=None,
+               keyfile=None, cert_password=None):
     """Constructs a new instance of IgniteClient.
 
     Args:
@@ -385,12 +387,13 @@ class IgniteClient(TcpClient):
       serv_ver_major = self.read_short()
       serv_ver_minor = self.read_short()
       serv_ver_patch = self.read_short()
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \
-            % (res, serv_ver_major, serv_ver_minor, serv_ver_patch))
+        raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d]"
+                           % (res, serv_ver_major, serv_ver_minor,
+                              serv_ver_patch))
       else:
-        raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \
+        raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d, \
             message='%s']" % (
                 res,
                 serv_ver_major,
@@ -403,7 +406,7 @@ class IgniteClient(TcpClient):
     """Collects type information about objects stored in the specified
        cache.
     """
-    cache_name_hash = self.__java_hash_code(cache_name)
+    cache_name_hash = self._java_hash_code(cache_name)
     self.write_int(25)        # Message length
     self.write_short(2000)      # Operation code
     self.write_long(0)        # Request ID
@@ -419,18 +422,18 @@ class IgniteClient(TcpClient):
     status = self.read_int()
 
     if status != 0:
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Scan Query Error [status=%s]" % status)
+        raise RuntimeError("Scan Query Error [status=%s]" % status)
       else:
-        raise Exception("Scan Query Error [status=%s, message='%s']" \
-            % (status, err_msg))
+        raise RuntimeError("Scan Query Error [status=%s, message='%s']"
+                           % (status, err_msg))
 
     self.read_long()          # Cursor id
     row_count = self.read_int()
 
     if row_count == 0:
-      raise Exception("Scan Query returned empty result, so it's \
+      raise RuntimeError("Scan Query returned empty result, so it's \
         impossible to derive the cache type")
 
     payload = DataBuffer(self.read_data(result_length - 25))
@@ -438,20 +441,20 @@ class IgniteClient(TcpClient):
     self.read_byte()          # Next page
 
     res = TypeTreeNode("root", 0, [
-        self.__collect_types("key", payload),
-        self.__collect_types("val", payload)
+        self._collect_types("key", payload),
+        self._collect_types("val", payload)
     ], [0, 1])
 
     return res
 
-  def __java_hash_code(self, s):
+  def _java_hash_code(self, s):
     """Computes hash code of the specified string using Java code."""
     h = 0
     for c in s:
       h = (31 * h + ord(c)) & 0xFFFFFFFF
     return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
 
-  def __collect_types(self, field_name, data):
+  def _collect_types(self, field_name, data):
     """Extracts type information from the specified object."""
     type_id = data.read_byte()
 
@@ -570,7 +573,7 @@ class IgniteClient(TcpClient):
         elif header == 101:
           pass
         else:
-          raise Exception("Unknown binary type when expected string \
+          raise RuntimeError("Unknown binary type when expected string \
             [type_id=%d]" % header)
       return TypeTreeNode(field_name, type_id)
 
@@ -591,7 +594,7 @@ class IgniteClient(TcpClient):
       length = data.read_int()
       inner_data = data.read_data(length)
       data.read_int()   # Offset
-      return self.__collect_types(field_name, DataBuffer(inner_data))
+      return self._collect_types(field_name, DataBuffer(inner_data))
 
     # Complex Object.
     if type_id == 103:
@@ -603,11 +606,11 @@ class IgniteClient(TcpClient):
       data.read_int()   # Object schema id
       obj_schema_offset = data.read_int()
 
-      obj_type = self.__get_type(obj_type_id)
+      obj_type = self._get_type(obj_type_id)
       children = []
 
       for obj_field in obj_type.fields:
-        child = self.__collect_types(obj_field.field_name, data)
+        child = self._collect_types(obj_field.field_name, data)
         children.append(child)
 
       children_sorted = sorted(children, key=lambda child: child.name)
@@ -618,9 +621,9 @@ class IgniteClient(TcpClient):
 
       return TypeTreeNode(field_name, type_id, children, permutation)
 
-    raise Exception("Unknown binary type [type_id=%d]" % type_id)
+    raise RuntimeError("Unknown binary type [type_id=%d]" % type_id)
 
-  def __get_type(self, type_id):
+  def _get_type(self, type_id):
     """Queries Apache Ignite information about type by type id."""
     self.write_int(14)      # Message length
     self.write_short(3002)  # Operation code
@@ -632,25 +635,25 @@ class IgniteClient(TcpClient):
     status = self.read_int()
 
     if status != 0:
-      err_msg = self.__parse_string()
+      err_msg = self._parse_string()
       if err_msg is None:
-        raise Exception("Get Binary Type Error [status=%d, message='%s']" \
-            % (status, err_msg))
+        raise RuntimeError("Get Binary Type Error [status=%d, message='%s']"
+                           % (status, err_msg))
       else:
-        raise Exception("Get Binary Type Error [status=%d]" % status)
+        raise RuntimeError("Get Binary Type Error [status=%d]" % status)
 
     binary_type_exists = self.read_byte()
 
     if binary_type_exists == 0:
-      raise Exception("Binary type not found [type_id=%d] " % type_id)
+      raise RuntimeError("Binary type not found [type_id=%d] " % type_id)
 
     binary_type_id = self.read_int()
-    binary_type_name = self.__parse_string()
-    self.__parse_string()   # Affinity field name
+    binary_type_name = self._parse_string()
+    self._parse_string()   # Affinity field name
 
     fields = []
     for _ in range(self.read_int()):
-      field_name = self.__parse_string()
+      field_name = self._parse_string()
       field_type_id = self.read_int()
       field_id = self.read_int()
 
@@ -659,7 +662,7 @@ class IgniteClient(TcpClient):
 
     is_enum = self.read_byte()
     if is_enum == 1:
-      raise Exception("Enum fields are not supported yet")
+      raise RuntimeError("Enum fields are not supported yet")
 
     schema_cnt = self.read_int()
     for _ in range(schema_cnt):
@@ -669,7 +672,7 @@ class IgniteClient(TcpClient):
 
     return BinaryType(binary_type_id, binary_type_name, fields)
 
-  def __parse_string(self):
+  def _parse_string(self):
     """Parses string."""
     header = self.read_byte()
     if header == 9:
@@ -677,8 +680,8 @@ class IgniteClient(TcpClient):
       return self.read_data(length).decode("utf-8")
     if header == 101:
       return None
-    raise Exception("Unknown binary type when expected string [type_id=%d]" \
-        % header)
+    raise RuntimeError("Unknown binary type when expected string [type_id=%d]"
+                       % header)
 
 class IgniteDataset(Dataset):
   """Apache Ignite is a memory-centric distributed database, caching, and
@@ -692,9 +695,9 @@ class IgniteDataset(Dataset):
      Ignite Binary Client Protocol.
   """
 
-  def __init__(self, cache_name, host="localhost", port=10800, local=False,\
-    part=-1, page_size=100, username=None, password=None, certfile=None,\
-    keyfile=None, cert_password=None):
+  def __init__(self, cache_name, host="localhost", port=10800, local=False,
+               part=-1, page_size=100, username=None, password=None,
+               certfile=None, keyfile=None, cert_password=None):
     """Create a IgniteDataset.
 
     Args:
@@ -716,39 +719,44 @@ class IgniteDataset(Dataset):
     """
     super(IgniteDataset, self).__init__()
 
-    with IgniteClient(host, port, username, password, certfile, keyfile,\
-        cert_password) as client:
+    with IgniteClient(host, port, username, password, certfile, keyfile,
+                      cert_password) as client:
       client.handshake()
       self.cache_type = client.get_cache_type(cache_name)
 
-    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\
-        name="cache_name")
+    self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,
+                                            name="cache_name")
     self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host")
     self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port")
     self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local")
     self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
-    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\
-        name="page_size")
-    self.username = ops.convert_to_tensor("" if username is None else username,\
-        dtype=dtypes.string, name="username")
-    self.password = ops.convert_to_tensor("" if password is None else password,\
-        dtype=dtypes.string, name="password")
-    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\
-        dtype=dtypes.string, name="certfile")
-    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\
-        dtype=dtypes.string, name="keyfile")
-    self.cert_password = ops.convert_to_tensor("" if cert_password is None\
-        else cert_password, dtype=dtypes.string, name="cert_password")
-    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\
-        dtype=dtypes.int32, name="schema")
-    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\
-        dtype=dtypes.int32, name="permutation")
+    self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,
+                                           name="page_size")
+    self.username = ops.convert_to_tensor("" if username is None else username,
+                                          dtype=dtypes.string, name="username")
+    self.password = ops.convert_to_tensor("" if password is None else password,
+                                          dtype=dtypes.string, name="password")
+    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,
+                                          dtype=dtypes.string, name="certfile")
+    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,
+                                         dtype=dtypes.string, name="keyfile")
+    self.cert_password = ops.convert_to_tensor("" if cert_password is None
+                                               else cert_password,
+                                               dtype=dtypes.string,
+                                               name="cert_password")
+    self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),
+                                        dtype=dtypes.int32, name="schema")
+    self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),
+                                             dtype=dtypes.int32,
+                                             name="permutation")
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\
-        self.port, self.local, self.part, self.page_size, self.username,\
-        self.password, self.certfile, self.keyfile, self.cert_password,\
-        self.schema, self.permutation)
+    return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,
+                                          self.port, self.local, self.part,
+                                          self.page_size, self.username,
+                                          self.password, self.certfile,
+                                          self.keyfile, self.cert_password,
+                                          self.schema, self.permutation)
 
   @property
   def output_classes(self):
-- 
GitLab


From 172d199db9bab808723d24ea586322f1b2d80413 Mon Sep 17 00:00:00 2001
From: leondgarse <leondgarse@aol.com>
Date: Thu, 13 Sep 2018 16:12:18 +0800
Subject: [PATCH 0052/1085] Use the last char instead of the first in
 prediction

---
 .../examples/generative_examples/text_generation.ipynb    | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
index e0d5e494d4..07dbfd3630 100644
--- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
+++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
@@ -598,19 +598,13 @@
         "# empty string to store our results\n",
         "text_generated = ''\n",
         "\n",
-        "# low temperatures results in more predictable text.\n",
-        "# higher temperatures results in more surprising text\n",
-        "# experiment to find the best setting\n",
-        "temperature = 1.0\n",
-        "\n",
         "# hidden state shape == (batch_size, number of rnn units); here batch size == 1\n",
         "hidden = [tf.zeros((1, units))]\n",
         "for i in range(num_generate):\n",
         "    predictions, hidden = model(input_eval, hidden)\n",
         "\n",
         "    # using a multinomial distribution to predict the word returned by the model\n",
-        "    predictions = predictions / temperature\n",
-        "    predicted_id = tf.argmax(predictions[0]).numpy()\n",
+        "    predicted_id = tf.argmax(predictions[-1]).numpy()\n",
         "    \n",
         "    # We pass the predicted word as the next input to the model\n",
         "    # along with the previous hidden state\n",
-- 
GitLab


From ce9b23070638094022036656e5d1fbf3e23b74c6 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 11:24:37 +0300
Subject: [PATCH 0053/1085] Add forgotten ignite_byte_swapper.h

---
 .../ignite/kernels/ignite_byte_swapper.h      | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
new file mode 100644
index 0000000000..986bedcf69
--- /dev/null
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -0,0 +1,129 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
+#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
+
+#include <stdint.h>
+
+namespace tensorflow {
+
+class ByteSwapper {
+ public:
+  ByteSwapper(bool big_endian) {
+    int x = 1;
+    bool is_little_endian = (*(char *)&x == 1);
+    swap_ = big_endian == is_little_endian;
+  }
+
+  inline void SwapIfRequiredInt16(int16_t *x) const {
+    if (swap_) {
+      Swap16(x);
+    }
+  }
+
+  inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const {
+    if (swap_) {
+      Swap16(reinterpret_cast<int16_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt32(int32_t *x) const {
+    if (swap_) {
+      Swap32(x);
+    }
+  }
+
+  inline void SwapIfRequiredFloat(float *x) const {
+    if (swap_) {
+      Swap32(reinterpret_cast<int32_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt64(int64_t *x) const {
+    if (swap_) {
+      Swap64(x);
+    }
+  }
+
+  inline void SwapIfRequiredDouble(double *x) const {
+    if (swap_) {
+      Swap64(reinterpret_cast<int64_t*>(x));
+    }
+  }
+
+  inline void SwapIfRequiredInt16Arr(int16_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap16(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x,
+                                             int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap16(reinterpret_cast<int16_t*>(&x[i]));
+    }
+  }
+
+  inline void SwapIfRequiredInt32Arr(int32_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap32(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredFloatArr(float *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap32(reinterpret_cast<int32_t*>(&x[i]));
+    }
+  }
+
+  inline void SwapIfRequiredInt64Arr(int64_t *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) Swap64(&x[i]);
+    }
+  }
+
+  inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const {
+    if (swap_) {
+      for (int32_t i = 0; i < length; i++) 
+        Swap64(reinterpret_cast<int64_t*>(&x[i]));
+    }
+  }
+
+ private:
+  inline void Swap16(int16_t *x) const {
+    *x = ((*x & 0xFF) << 8) | ((*x >> 8) & 0xFF);
+  }
+
+  inline void Swap32(int32_t *x) const {
+    *x = ((*x & 0xFF) << 24) | (((*x >> 8) & 0xFF) << 16) |
+         (((*x >> 16) & 0xFF) << 8) | ((*x >> 24) & 0xFF);
+  }
+
+  inline void Swap64(int64_t *x) const {
+    *x = ((*x & 0xFF) << 56) | (((*x >> 8) & 0xFF) << 48) |
+         (((*x >> 16) & 0xFF) << 40) | (((*x >> 24) & 0xFF) << 32) |
+         (((*x >> 32) & 0xFF) << 24) | (((*x >> 40) & 0xFF) << 16) |
+         (((*x >> 48) & 0xFF) << 8) | ((*x >> 56) & 0xFF);
+  }
+
+  bool swap_;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
-- 
GitLab


From d797e99a043e01609583a37c04e1e509d126e1a0 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 09:42:16 +0000
Subject: [PATCH 0054/1085] Fix windows build.

---
 .../contrib/ignite/kernels/ignite_plain_client_windows.cc      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 9cd08a7779..17f2bf45d1 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #pragma comment(lib, "Mswsock.lib")
 #pragma comment(lib, "AdvApi32.lib")
 
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -58,7 +59,7 @@ Status PlainClient::Connect() {
                     &result);
   if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res);
 
-  auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); });
+  auto clean = gtl::MakeCleanup([result] { freeaddrinfo(result); });
 
   for (ptr = result; ptr != NULL; ptr = ptr->ai_next) {
     sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol);
-- 
GitLab


From c8b60b894b91cfdb4176176d7dcf328d2b40b41f Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 16:34:59 +0300
Subject: [PATCH 0055/1085] Fix code style.

---
 .../ignite/kernels/ignite_byte_swapper.h       | 18 +++++++++---------
 .../ignite/kernels/ignite_dataset_ops.cc       |  2 +-
 .../kernels/ignite_plain_client_windows.cc     |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 986bedcf69..5b42de4c5a 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -36,7 +36,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const {
     if (swap_) {
-      Swap16(reinterpret_cast<int16_t*>(x));
+      Swap16(reinterpret_cast<int16_t *>(x));
     }
   }
 
@@ -48,7 +48,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredFloat(float *x) const {
     if (swap_) {
-      Swap32(reinterpret_cast<int32_t*>(x));
+      Swap32(reinterpret_cast<int32_t *>(x));
     }
   }
 
@@ -60,7 +60,7 @@ class ByteSwapper {
 
   inline void SwapIfRequiredDouble(double *x) const {
     if (swap_) {
-      Swap64(reinterpret_cast<int64_t*>(x));
+      Swap64(reinterpret_cast<int64_t *>(x));
     }
   }
 
@@ -73,8 +73,8 @@ class ByteSwapper {
   inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x,
                                              int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap16(reinterpret_cast<int16_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap16(reinterpret_cast<int16_t *>(&x[i]));
     }
   }
 
@@ -86,8 +86,8 @@ class ByteSwapper {
 
   inline void SwapIfRequiredFloatArr(float *x, int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap32(reinterpret_cast<int32_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap32(reinterpret_cast<int32_t *>(&x[i]));
     }
   }
 
@@ -99,8 +99,8 @@ class ByteSwapper {
 
   inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const {
     if (swap_) {
-      for (int32_t i = 0; i < length; i++) 
-        Swap64(reinterpret_cast<int64_t*>(&x[i]));
+      for (int32_t i = 0; i < length; i++)
+        Swap64(reinterpret_cast<int64_t *>(&x[i]));
     }
   }
 
diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index eeb29ef30b..e48fce4ed2 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include <stdlib.h>
 #include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h"
+#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h"
 #include "tensorflow/core/framework/dataset.h"
 
 namespace tensorflow {
diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
index 17f2bf45d1..43d6108c34 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #pragma comment(lib, "Mswsock.lib")
 #pragma comment(lib, "AdvApi32.lib")
 
-#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
-- 
GitLab


From c513c04aed8790c78c46b78f90ec848555498ce4 Mon Sep 17 00:00:00 2001
From: dmitrievanthony <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 15:13:54 +0000
Subject: [PATCH 0056/1085] Add -DWIN32_LEAN_AND_MEAN option into BUILD.

---
 tensorflow/contrib/ignite/BUILD | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 2f598b4aed..1adc6c6ccc 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -61,6 +61,9 @@ cc_library(
         "@boringssl//:ssl",
         "@protobuf_archive//:protobuf_headers",
     ],
+    copts = if_windows([
+        "-DWIN32_LEAN_AND_MEAN",
+    ]),
     alwayslink = 1,
 )
 
-- 
GitLab


From f54856b1448bed24534189e4aa2ebb9d0b4f5b9a Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Thu, 13 Sep 2018 18:13:47 +0000
Subject: [PATCH 0057/1085] Apply buildifier changes.

---
 tensorflow/contrib/ignite/BUILD | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 1adc6c6ccc..9393b702d1 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -6,14 +6,14 @@ exports_files(["LICENSE"])
 
 load(
     "//tensorflow:tensorflow.bzl",
-    "tf_gen_op_wrapper_py",
-    "tf_kernel_library",
+    "if_not_windows",
+    "if_windows",
     "tf_custom_op_library",
     "tf_custom_op_py_library",
     "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
     "tf_py_test",
-    "if_not_windows",
-    "if_windows",
 )
 
 py_library(
@@ -55,15 +55,15 @@ cc_library(
     ]) + if_windows([
         "kernels/ignite_plain_client_windows.cc",
     ]),
+    copts = if_windows([
+        "-DWIN32_LEAN_AND_MEAN",
+    ]),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
         "@boringssl//:ssl",
         "@protobuf_archive//:protobuf_headers",
     ],
-    copts = if_windows([
-        "-DWIN32_LEAN_AND_MEAN",
-    ]),
     alwayslink = 1,
 )
 
-- 
GitLab


From 1557c36e6552138ba3aacb8a56fcd082c76ed606 Mon Sep 17 00:00:00 2001
From: Rasmi Elasmar <rasmi@google.com>
Date: Thu, 13 Sep 2018 16:45:31 -0400
Subject: [PATCH 0058/1085] Updated docs to point to tfp instead of tf.contrib

---
 tensorflow/contrib/bayesflow/__init__.py                     | 2 ++
 tensorflow/contrib/bayesflow/python/ops/monte_carlo.py       | 5 ++++-
 tensorflow/contrib/distributions/__init__.py                 | 2 ++
 .../contrib/distributions/python/ops/bijectors/__init__.py   | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py
index 41a8c920fc..493046b399 100644
--- a/tensorflow/contrib/bayesflow/__init__.py
+++ b/tensorflow/contrib/bayesflow/__init__.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 """Ops for representing Bayesian computation.
 
+Use [tfp](/probability/api_docs/python/tfp) instead.
+
 ## This package provides classes for Bayesian computation with TensorFlow.
 """
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo.py
index 68fa415eea..28a829d87d 100644
--- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo.py
+++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Monte Carlo integration and helpers."""
+"""Monte Carlo integration and helpers.
+
+Use [tfp.monte_carlo](/probability/api_docs/python/tfp/monte_carlo) instead.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 5cec93c4df..92bb058e17 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """Classes representing statistical distributions and ops for working with them.
+
+Use [tfp.distributions](/probability/api_docs/python/tfp/distributions) instead.
 """
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py
index e141f8b5c6..3b17de9b8a 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 """Bijector Ops.
 
+Use [tfp.bijectors](/probability/api_docs/python/tfp/bijectors) instead.
+
 @@AbsoluteValue
 @@Affine
 @@AffineLinearOperator
-- 
GitLab


From 8b13622dec3caaa26fec0b48079198721da5c1e7 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sat, 15 Sep 2018 21:02:47 -0700
Subject: [PATCH 0059/1085] Raise warning once only if trainable_weights and
 _collected_trainable_weights are inconsistent

---
 tensorflow/python/keras/engine/training.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index dc464c02b6..ed51b2069e 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -676,11 +676,10 @@ class Model(Network):
       return
 
     if len(self.trainable_weights) != len(self._collected_trainable_weights):
-      logging.warning(
-          UserWarning(
-              'Discrepancy between trainable weights and collected trainable'
-              ' weights, did you set `model.trainable` without calling'
-              ' `model.compile` after ?'))
+      logging.log_first_n(logging.WARN,
+                          'Discrepancy between trainable weights and collected trainable'
+                          ' weights, did you set `model.trainable` without calling'
+                          ' `model.compile` after ?', 1)
 
   def _make_train_function(self):
     if not hasattr(self, 'train_function'):
-- 
GitLab


From 41adb4d6c787f23f4555a692e54f62b0101a7a6f Mon Sep 17 00:00:00 2001
From: leondgarse <leondgarse@aol.com>
Date: Sun, 16 Sep 2018 16:25:42 +0800
Subject: [PATCH 0060/1085] Replace multinomial in comment by argmax

Replace 'a multinomial distribution' in comment by 'argmax', and also in the text introduction.
---
 .../python/examples/generative_examples/text_generation.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
index 07dbfd3630..ad481175fe 100644
--- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
+++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
@@ -567,7 +567,7 @@
         "\n",
         "* We get predictions using the start_string and the hidden state\n",
         "\n",
-        "* Then we use a multinomial distribution to calculate the index of the predicted word. **We use this predicted word as our next input to the model**\n",
+        "* Then we use argmax to calculate the index of the predicted word. **We use this predicted word as our next input to the model**\n",
         "\n",
         "* **The hidden state returned by the model is fed back into the model so that it now has more context rather than just one word.** After we predict the next word, the modified hidden states are again fed back into the model, which is how it learns as it gets more context from the previously predicted words.\n",
         "\n",
@@ -603,7 +603,7 @@
         "for i in range(num_generate):\n",
         "    predictions, hidden = model(input_eval, hidden)\n",
         "\n",
-        "    # using a multinomial distribution to predict the word returned by the model\n",
+        "    # using argmax to predict the word returned by the model\n",
         "    predicted_id = tf.argmax(predictions[-1]).numpy()\n",
         "    \n",
         "    # We pass the predicted word as the next input to the model\n",
-- 
GitLab


From 14b5eb7d0295060204bf56e041da4c84c44d8cd5 Mon Sep 17 00:00:00 2001
From: leondgarse <leondgarse@aol.com>
Date: Sun, 16 Sep 2018 16:34:32 +0800
Subject: [PATCH 0061/1085] Remove a temperature line

Remove a temperature line in Next Steps part
---
 .../python/examples/generative_examples/text_generation.ipynb    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
index ad481175fe..bda9e77085 100644
--- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
+++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb
@@ -626,7 +626,6 @@
         "\n",
         "* Change the start string to a different character, or the start of a sentence.\n",
         "* Experiment with training on a different, or with different parameters. [Project  Gutenberg](http://www.gutenberg.org/ebooks/100), for example, contains a large collection of books.\n",
-        "* Experiment with the temperature parameter.\n",
         "* Add another RNN layer.\n"
       ]
     },
-- 
GitLab


From 6ce5f3e461aad6cc8c53c661d1679d651502f4d3 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sun, 16 Sep 2018 18:42:53 -0700
Subject: [PATCH 0062/1085] Fix line too long

---
 tensorflow/python/keras/engine/training.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index ed51b2069e..be03c096bf 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -677,9 +677,9 @@ class Model(Network):
 
     if len(self.trainable_weights) != len(self._collected_trainable_weights):
       logging.log_first_n(logging.WARN,
-                          'Discrepancy between trainable weights and collected trainable'
-                          ' weights, did you set `model.trainable` without calling'
-                          ' `model.compile` after ?', 1)
+                          'Discrepancy between trainable weights and collected'
+                          ' trainable weights, did you set `model.trainable`'
+                          ' without calling `model.compile` after ?', 1)
 
   def _make_train_function(self):
     if not hasattr(self, 'train_function'):
-- 
GitLab


From fa80a920f2a3bc00522fe95fc9a07a28d67fc055 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 17 Sep 2018 12:50:18 +0300
Subject: [PATCH 0063/1085] Add 'override' specifier to ReadData, WriteData.

---
 tensorflow/contrib/ignite/kernels/ignite_plain_client.h | 4 ++--
 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
index 750ebe605a..d12d56fdc1 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h
@@ -29,8 +29,8 @@ class PlainClient : public Client {
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, const int32_t length);
-  virtual Status WriteData(const uint8_t* buf, const int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length) override;
+  virtual Status WriteData(const uint8_t* buf, const int32_t length) override;
 
  private:
   const string host_;
diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
index d59ce91aba..372156a757 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h
@@ -32,8 +32,8 @@ class SslWrapper : public Client {
   virtual Status Disconnect();
   virtual bool IsConnected();
   virtual int GetSocketDescriptor();
-  virtual Status ReadData(uint8_t* buf, const int32_t length);
-  virtual Status WriteData(const uint8_t* buf, const int32_t length);
+  virtual Status ReadData(uint8_t* buf, const int32_t length) override;
+  virtual Status WriteData(const uint8_t* buf, const int32_t length) override;
 
  private:
   Status InitSslContext();
-- 
GitLab


From 50762768ef9d7915ade5cf485d26ffb96753df71 Mon Sep 17 00:00:00 2001
From: Yifei Feng <1192265+yifeif@users.noreply.github.com>
Date: Tue, 18 Sep 2018 00:18:57 -0700
Subject: [PATCH 0064/1085] Update CONTRIBUTING.md to reflect PR merge process

---
 CONTRIBUTING.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f598999f35..3f62e3f645 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,7 +31,8 @@ Follow either of the two links above to access the appropriate CLA and instructi
 If you have improvements to TensorFlow, send us your pull requests! For those
 just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
 
-TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, we will merge the pull requests.
+TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, a TensorFlow team member will apply `ready to pull` to your change. This means we are working on getting your pull request submitted to our internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
+
 For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally.
 
 If you want to contribute but you're not sure where to start, take a look at the
-- 
GitLab


From a8fe42cdfc7341655f61414ce02ddd9d016165ed Mon Sep 17 00:00:00 2001
From: Yifei Feng <1192265+yifeif@users.noreply.github.com>
Date: Tue, 18 Sep 2018 00:19:38 -0700
Subject: [PATCH 0065/1085] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3f62e3f645..05e970e8cc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,7 +31,7 @@ Follow either of the two links above to access the appropriate CLA and instructi
 If you have improvements to TensorFlow, send us your pull requests! For those
 just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
 
-TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, a TensorFlow team member will apply `ready to pull` to your change. This means we are working on getting your pull request submitted to our internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
+TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, a TensorFlow team member will apply `ready to pull` label to your change. This means we are working on getting your pull request submitted to our internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
 
 For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally.
 
-- 
GitLab


From d28a29e2c3fbc19ab6207e9075e2c3ccfdc415ed Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Tue, 18 Sep 2018 10:16:14 +0200
Subject: [PATCH 0066/1085] indentation fix

---
 tensorflow/python/keras/optimizers.py | 76 +++++++++++++--------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index 629972fd8e..68081e2485 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -314,24 +314,24 @@ class RMSprop(Optimizer):
 
 @tf_export('keras.optimizers.Adagrad')
 class Adagrad(Optimizer):
-    """Adagrad optimizer.
+  """Adagrad optimizer.
 
-    Adagrad is an optimizer with parameter-specific learning rates,
-    which are adapted relative to how frequently a parameter gets
-    updated during training. The more updates a parameter receives,
-    the smaller the updates.
+  Adagrad is an optimizer with parameter-specific learning rates,
+  which are adapted relative to how frequently a parameter gets
+  updated during training. The more updates a parameter receives,
+  the smaller the updates.
 
-    It is recommended to leave the parameters of this optimizer
-    at their default values.
+  It is recommended to leave the parameters of this optimizer
+  at their default values.
 
-    # Arguments
-        lr: float >= 0. Initial learning rate.
-        epsilon: float >= 0. If `None`, defaults to `K.epsilon()`.
-        decay: float >= 0. Learning rate decay over each update.
+  # Arguments
+      lr: float >= 0. Initial learning rate.
+      epsilon: float >= 0. If `None`, defaults to `K.epsilon()`.
+      decay: float >= 0. Learning rate decay over each update.
 
-    # References
-        - [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
-    """
+  # References
+      - [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+  """
 
   def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs):
     super(Adagrad, self).__init__(**kwargs)
@@ -381,30 +381,30 @@ class Adagrad(Optimizer):
 
 @tf_export('keras.optimizers.Adadelta')
 class Adadelta(Optimizer):
-    """Adadelta optimizer.
-
-    Adadelta is a more robust extension of Adagrad
-    that adapts learning rates based on a moving window of gradient updates,
-    instead of accumulating all past gradients. This way, Adadelta continues
-    learning even when many updates have been done. Compared to Adagrad, in the
-    original version of Adadelta you don't have to set an initial learning
-    rate. In this version, initial learning rate and decay factor can
-    be set, as in most other Keras optimizers.
-
-    It is recommended to leave the parameters of this optimizer
-    at their default values.
-
-    # Arguments
-        lr: float >= 0. Initial learning rate, defaults to 1.
-            It is recommended to leave it at the default value.
-        rho: float >= 0. Adadelta decay factor, corresponding to fraction of
-            gradient to keep at each time step.
-        epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
-        decay: float >= 0. Initial learning rate decay.
-
-    # References
-        - [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
-    """
+  """Adadelta optimizer.
+
+  Adadelta is a more robust extension of Adagrad
+  that adapts learning rates based on a moving window of gradient updates,
+  instead of accumulating all past gradients. This way, Adadelta continues
+  learning even when many updates have been done. Compared to Adagrad, in the
+  original version of Adadelta you don't have to set an initial learning
+  rate. In this version, initial learning rate and decay factor can
+  be set, as in most other Keras optimizers.
+
+  It is recommended to leave the parameters of this optimizer
+  at their default values.
+
+  # Arguments
+      lr: float >= 0. Initial learning rate, defaults to 1.
+          It is recommended to leave it at the default value.
+      rho: float >= 0. Adadelta decay factor, corresponding to fraction of
+          gradient to keep at each time step.
+      epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
+      decay: float >= 0. Initial learning rate decay.
+
+  # References
+      - [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
+  """
 
   def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0., **kwargs):
     super(Adadelta, self).__init__(**kwargs)
-- 
GitLab


From 6d67ba41f566e963e2c061ca7df63edad89e1fca Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 18 Sep 2018 18:56:55 +0300
Subject: [PATCH 0067/1085] Work out the endianness statically.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 5b42de4c5a..484cc4d6f5 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -20,12 +20,12 @@ limitations under the License.
 
 namespace tensorflow {
 
+constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
+
 class ByteSwapper {
  public:
   ByteSwapper(bool big_endian) {
-    int x = 1;
-    bool is_little_endian = (*(char *)&x == 1);
-    swap_ = big_endian == is_little_endian;
+    swap_ = big_endian == kLittleEndian;
   }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
-- 
GitLab


From 14e9345a88b08f5d2a12f3f441b1d82c041d7ea3 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 18 Sep 2018 18:23:52 +0000
Subject: [PATCH 0068/1085] Avoid saving sensitive information in graph.

---
 .../ignite/kernels/ignite_dataset_ops.cc      | 30 ++-------
 tensorflow/contrib/ignite/ops/dataset_ops.cc  | 10 ---
 .../ignite/python/ops/ignite_dataset_ops.py   | 18 +----
 .../python/tests/ignite_dataset_test.py       | 66 ++++++++++++++-----
 4 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
index e48fce4ed2..bdaed72387 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
+++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc
@@ -125,35 +125,15 @@ class IgniteDatasetOp : public DatasetOpKernel {
       OP_REQUIRES_OK(ctx,
                      ParseScalarArgument<int32>(ctx, "page_size", &page_size));
 
-    if (env_username)
-      username = string(env_username);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "username", &username));
+    if (env_username) username = string(env_username);
 
-    if (env_password)
-      password = string(env_password);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "password", &password));
+    if (env_password) password = string(env_password);
 
-    if (env_certfile)
-      certfile = string(env_certfile);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "certfile", &certfile));
+    if (env_certfile) certfile = string(env_certfile);
 
-    if (env_keyfile)
-      keyfile = string(env_keyfile);
-    else
-      OP_REQUIRES_OK(ctx,
-                     ParseScalarArgument<string>(ctx, "keyfile", &keyfile));
+    if (env_keyfile) keyfile = string(env_keyfile);
 
-    if (env_cert_password)
-      cert_password = string(env_cert_password);
-    else
-      OP_REQUIRES_OK(ctx, ParseScalarArgument<string>(ctx, "cert_password",
-                                                      &cert_password));
+    if (env_cert_password) cert_password = string(env_cert_password);
 
     const Tensor* schema_tensor;
     OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor));
diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc
index 7d18df11aa..3d6fbe00e6 100644
--- a/tensorflow/contrib/ignite/ops/dataset_ops.cc
+++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc
@@ -26,11 +26,6 @@ REGISTER_OP("IgniteDataset")
     .Input("local: bool")
     .Input("part: int32")
     .Input("page_size: int32")
-    .Input("username: string")
-    .Input("password: string")
-    .Input("certfile: string")
-    .Input("keyfile: string")
-    .Input("cert_password: string")
     .Input("schema: int32")
     .Input("permutation: int32")
     .Output("handle: variant")
@@ -54,11 +49,6 @@ port: Ignite Thin Client Port.
 local: Local flag that defines that data should be fetched from local host only.
 part: Partition data should be fetched from.
 page_size: Page size for Ignite Thin Client.
-username: Username to authenticate via Ignite Thin Client.
-password: Password to authenticate via Ignite Thin Client.
-certfile: SSL certificate to establish SSL connection.
-keyfile: Private key file to establish SSL connection.
-cert_password: SSL certificate password to establish SSL connection.
 schema: Internal structure that defines schema of cache objects.
 permutation: Internal structure that defines permutation of cache objects.
 )doc");
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index c0e24b1c69..7fc9e1fdd1 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -732,18 +732,6 @@ class IgniteDataset(Dataset):
     self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part")
     self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,
                                            name="page_size")
-    self.username = ops.convert_to_tensor("" if username is None else username,
-                                          dtype=dtypes.string, name="username")
-    self.password = ops.convert_to_tensor("" if password is None else password,
-                                          dtype=dtypes.string, name="password")
-    self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,
-                                          dtype=dtypes.string, name="certfile")
-    self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,
-                                         dtype=dtypes.string, name="keyfile")
-    self.cert_password = ops.convert_to_tensor("" if cert_password is None
-                                               else cert_password,
-                                               dtype=dtypes.string,
-                                               name="cert_password")
     self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),
                                         dtype=dtypes.int32, name="schema")
     self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),
@@ -753,10 +741,8 @@ class IgniteDataset(Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,
                                           self.port, self.local, self.part,
-                                          self.page_size, self.username,
-                                          self.password, self.certfile,
-                                          self.keyfile, self.cert_password,
-                                          self.schema, self.permutation)
+                                          self.page_size, self.schema,
+                                          self.permutation)
 
   @property
   def output_classes(self):
diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
index 933e62b804..5d74617690 100644
--- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -35,28 +35,60 @@ class IgniteDatasetTest(test.TestCase):
   """
 
   def test_ignite_dataset_with_plain_client(self):
+    """Test Ignite Dataset with plain client.
+    """
+    self._clear_env()
     ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
-    self.__check_dataset(ds)
+    self._check_dataset(ds)
 
   def test_ignite_dataset_with_ssl_client(self):
-    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\
-      certfile=os.path.dirname(os.path.realpath(__file__)) +\
-      "/keystore/client.pem", cert_password="123456")
-    self.__check_dataset(ds)
+    """Test Ignite Dataset with ssl client.
+    """
+    self._clear_env()
+    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
+        os.path.realpath(__file__)) + "/keystore/client.pem"
+    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
+
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,
+                       certfile=os.environ["IGNITE_DATASET_CERTFILE"],
+                       cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"])
+    self._check_dataset(ds)
 
   def test_ignite_dataset_with_ssl_client_and_auth(self):
-    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\
-      certfile=os.path.dirname(os.path.realpath(__file__)) +\
-      "/keystore/client.pem", cert_password="123456",\
-      username="ignite", password="ignite")
-    self.__check_dataset(ds)
+    """Test Ignite Dataset with ssl client and authentication.
+    """
+    self._clear_env()
+    os.environ['IGNITE_DATASET_USERNAME'] = "ignite"
+    os.environ['IGNITE_DATASET_PASSWORD'] = "ignite"
+    os.environ['IGNITE_DATASET_CERTFILE'] = os.path.dirname(
+        os.path.realpath(__file__)) + "/keystore/client.pem"
+    os.environ['IGNITE_DATASET_CERT_PASSWORD'] = "123456"
+
+    ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,
+                       certfile=os.environ['IGNITE_DATASET_CERTFILE'],
+                       cert_password=os.environ['IGNITE_DATASET_CERT_PASSWORD'],
+                       username=os.environ['IGNITE_DATASET_USERNAME'],
+                       password=os.environ['IGNITE_DATASET_PASSWORD'])
+    self._check_dataset(ds)
+
+  def _clear_env(self):
+    """Clears environment variables used by Ignite Dataset.
+    """
+    if 'IGNITE_DATASET_USERNAME' in os.environ:
+      del os.environ['IGNITE_DATASET_USERNAME']
+    if 'IGNITE_DATASET_PASSWORD' in os.environ:
+      del os.environ['IGNITE_DATASET_PASSWORD']
+    if 'IGNITE_DATASET_CERTFILE' in os.environ:
+      del os.environ['IGNITE_DATASET_CERTFILE']
+    if 'IGNITE_DATASET_CERT_PASSWORD' in os.environ:
+      del os.environ['IGNITE_DATASET_CERT_PASSWORD']
 
-  def __check_dataset(self, dataset):
+  def _check_dataset(self, dataset):
     """Checks that dataset provids correct data.
     """
-    self.assertEquals(tf.int64, dataset.output_types['key'])
-    self.assertEquals(tf.string, dataset.output_types['val']['NAME'])
-    self.assertEquals(tf.int64, dataset.output_types['val']['VAL'])
+    self.assertEqual(tf.int64, dataset.output_types['key'])
+    self.assertEqual(tf.string, dataset.output_types['val']['NAME'])
+    self.assertEqual(tf.int64, dataset.output_types['val']['VAL'])
 
     it = dataset.make_one_shot_iterator()
     ne = it.get_next()
@@ -66,11 +98,11 @@ class IgniteDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(ne)
 
-    self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
+    self.assertEqual({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\
       rows[0])
-    self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
+    self.assertEqual({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\
       rows[1])
-    self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
+    self.assertEqual({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\
       rows[2])
 
 if __name__ == "__main__":
-- 
GitLab


From 1e821cd9a02b59a90a8b983759cf74eded16265f Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 19 Sep 2018 11:06:40 -0700
Subject: [PATCH 0069/1085] Fix bug in metrics sparse_categorical_accuracy and
 sparse_top_k_categorical_accuracy

---
 tensorflow/python/keras/metrics.py      | 15 ++++++++------
 tensorflow/python/keras/metrics_test.py | 26 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index e64241e5cf..2fd3244800 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -635,7 +635,9 @@ def categorical_accuracy(y_true, y_pred):
 
 @tf_export('keras.metrics.sparse_categorical_accuracy')
 def sparse_categorical_accuracy(y_true, y_pred):
-  y_true = math_ops.reduce_max(y_true, axis=-1)
+  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
+  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
+    y_true = array_ops.squeeze(y_true, [-1])
   y_pred = math_ops.argmax(y_pred, axis=-1)
 
   # If the expected labels are float, we need to cast the int returned by
@@ -654,11 +656,12 @@ def top_k_categorical_accuracy(y_true, y_pred, k=5):
 
 @tf_export('keras.metrics.sparse_top_k_categorical_accuracy')
 def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
-  return K.mean(
-      nn.in_top_k(y_pred,
-                  math_ops.cast(math_ops.reduce_max(y_true, axis=-1), 'int32'),
-                  k),
-      axis=-1)
+  # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
+  if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
+    y_true = array_ops.squeeze(y_true, [-1])
+
+  return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k),
+                axis=-1)
 
 # Aliases
 
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 4195ea18ad..43ac5b7ead 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -54,6 +54,18 @@ class KerasMetricsTest(test.TestCase):
       y_pred = K.variable(np.random.random((6, 7)))
       self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
 
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_true = K.variable([1., 0., 0., 0.])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
+      # Test correctness if the shape of y_true is (num_samples, 1)
+      y_true = K.variable([[1.], [0.], [0.], [0.]])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
   def test_sparse_categorical_accuracy_float(self):
     with self.cached_session():
       metric = metrics.sparse_categorical_accuracy
@@ -79,6 +91,7 @@ class KerasMetricsTest(test.TestCase):
 
   def test_sparse_top_k_categorical_accuracy(self):
     with self.cached_session():
+      # Test correctness if the shape of y_true is (num_samples, 1)
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
       y_true = K.variable(np.array([[1], [0]]))
       result = K.eval(
@@ -91,6 +104,19 @@ class KerasMetricsTest(test.TestCase):
           metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
       self.assertEqual(result, 0.)
 
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
+      y_true = K.variable(np.array([1, 0]))
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+      self.assertEqual(result, 1)
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+      self.assertEqual(result, 0.5)
+      result = K.eval(
+        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+      self.assertEqual(result, 0.)
+
   def test_top_k_categorical_accuracy(self):
     with self.cached_session():
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-- 
GitLab


From 78e205d35b31aa49e8dac357d827900a165f0a21 Mon Sep 17 00:00:00 2001
From: Erik Smistad <ersmistad@gmail.com>
Date: Thu, 20 Sep 2018 15:56:34 +0200
Subject: [PATCH 0070/1085] Added warning message if cmake version is below 3.8
 or host toolset is not set to x64 on windows

---
 tensorflow/contrib/cmake/CMakeLists.txt | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 225c5e6227..a7a66472df 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -1,8 +1,14 @@
 # Minimum CMake required
+cmake_minimum_required(VERSION 3.5)
+
 if(WIN32)
-  cmake_minimum_required(VERSION 3.8)
-else()
-  cmake_minimum_required(VERSION 3.5)
+	if(${CMAKE_VERSION} VERSION_LESS "3.8")
+		message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.")
+	else()
+		if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64")
+			message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.")
+		endif()
+	endif()
 endif()
 
 # Project
-- 
GitLab


From 039ddaa6c0af4be4291383564db5a964d0035c1d Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 20 Sep 2018 15:49:40 -0700
Subject: [PATCH 0071/1085] Fix bad indentation

---
 tensorflow/python/keras/metrics_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 43ac5b7ead..5f5565d4d5 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -108,13 +108,13 @@ class KerasMetricsTest(test.TestCase):
       y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
       y_true = K.variable(np.array([1, 0]))
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
       self.assertEqual(result, 1)
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
       self.assertEqual(result, 0.5)
       result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
       self.assertEqual(result, 0.)
 
   def test_top_k_categorical_accuracy(self):
-- 
GitLab


From 6ee2153dea6e094fd1a9667ac3bfabbce368a70d Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 20 Jul 2018 14:38:00 -0700
Subject: [PATCH 0072/1085] Numpy ndarray should be serialized as Python list

---
 tensorflow/python/keras/engine/saving_test.py | 22 +++++++++++++++++++
 tensorflow/python/util/serialization.py       |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 02d99d5d69..6ff54a94ca 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -679,6 +679,28 @@ class TestWholeModelSaving(test.TestCase):
       os.remove(fname)
 
 
+  def test_saving_constant_initializer_with_numpy(self):
+      if h5py is None:
+        self.skipTest('h5py required to run this test')
+
+      with self.test_session():
+        model = keras.models.Sequential()
+        model.add(
+            keras.layers.Dense(
+                2,
+                input_shape=(3,),
+                kernel_initializer=keras.initializers.Constant(np.ones((3, 2)))
+            )
+        )
+        model.add(keras.layers.Dense(3))
+        model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
+        fd, fname = tempfile.mkstemp('.h5')
+        keras.models.save_model(model, fname)
+        model = keras.models.load_model(fname)
+        os.close(fd)
+        os.remove(fname)
+
+
 class SubclassedModel(training.Model):
 
   def __init__(self):
diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py
index faf5164faa..cff864c030 100644
--- a/tensorflow/python/util/serialization.py
+++ b/tensorflow/python/util/serialization.py
@@ -43,7 +43,7 @@ def get_json_type(obj):
   # if obj is any numpy type
   if type(obj).__module__ == np.__name__:
     if isinstance(obj, np.ndarray):
-      return {'type': type(obj), 'value': obj.tolist()}
+      return obj.tolist()
     else:
       return obj.item()
 
-- 
GitLab


From d6f93dfd0bb784a3d99105f8f5989ff08f33119e Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 15 Aug 2018 17:31:35 -0700
Subject: [PATCH 0073/1085] Fix bad indentation

---
 tensorflow/python/keras/engine/saving_test.py | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 6ff54a94ca..4be370cc13 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -680,25 +680,25 @@ class TestWholeModelSaving(test.TestCase):
 
 
   def test_saving_constant_initializer_with_numpy(self):
-      if h5py is None:
-        self.skipTest('h5py required to run this test')
+    if h5py is None:
+      self.skipTest('h5py required to run this test')
 
-      with self.test_session():
-        model = keras.models.Sequential()
-        model.add(
-            keras.layers.Dense(
-                2,
-                input_shape=(3,),
-                kernel_initializer=keras.initializers.Constant(np.ones((3, 2)))
-            )
-        )
-        model.add(keras.layers.Dense(3))
-        model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-        fd, fname = tempfile.mkstemp('.h5')
-        keras.models.save_model(model, fname)
-        model = keras.models.load_model(fname)
-        os.close(fd)
-        os.remove(fname)
+    with self.test_session():
+      model = keras.models.Sequential()
+      model.add(
+          keras.layers.Dense(
+              2,
+              input_shape=(3,),
+              kernel_initializer=keras.initializers.Constant(np.ones((3, 2)))
+          )
+      )
+      model.add(keras.layers.Dense(3))
+      model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
+      fd, fname = tempfile.mkstemp('.h5')
+      keras.models.save_model(model, fname)
+      model = keras.models.load_model(fname)
+      os.close(fd)
+      os.remove(fname)
 
 
 class SubclassedModel(training.Model):
-- 
GitLab


From 510c117752a681e80e26cf8cf3614e82d59d1e53 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 21 Sep 2018 11:32:20 -0700
Subject: [PATCH 0074/1085] Change test_session to cached_session

---
 tensorflow/python/keras/engine/saving_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 4be370cc13..8d179aea87 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -683,7 +683,7 @@ class TestWholeModelSaving(test.TestCase):
     if h5py is None:
       self.skipTest('h5py required to run this test')
 
-    with self.test_session():
+    with self.cached_session():
       model = keras.models.Sequential()
       model.add(
           keras.layers.Dense(
-- 
GitLab


From 268bf6b118646c8e93162d591263bca907c7db28 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Fri, 21 Sep 2018 11:39:29 -0700
Subject: [PATCH 0075/1085] Removing dead code. With the addition of mkl slice
 using MKL DNN this code will not longer be executed

---
 tensorflow/core/kernels/slice_op.cc | 198 ----------------------------
 1 file changed, 198 deletions(-)

diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 77594479cb..83377ffab5 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -228,190 +228,6 @@ class SliceOp : public OpKernel {
   }
 };
 
-#ifdef INTEL_MKL
-template <typename Device, typename T>
-class MklSliceOp : public OpKernel {
- public:
-  explicit MklSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    TensorShape output_shape;
-    gtl::InlinedVector<int64, 4> begin;
-    gtl::InlinedVector<int64, 4> size;
-    Tensor* result = nullptr;
-    bool done = false;
-    SharedSliceCommonCases<T>(context, &output_shape, &begin, &size, &result,
-                              &done);
-    if (!context->status().ok() || done == true) return;
-
-    const Tensor& input = context->input(0);
-    const int input_dims = input.dims();
-
-    if (output_shape.num_elements() > 0) {
-      if (std::is_same<Device, CPUDevice>::value && input_dims == 2 &&
-          DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
-        auto input = context->input(0).tensor<T, 2>();
-        auto output = result->tensor<T, 2>();
-        // TODO(agarwal): Consider multi-threading this loop for cases where
-        // size[0] is very large.
-        for (int i = 0; i < size[0]; ++i) {
-          const int64 row = begin[0] + i;
-          if (i + 1 < size[0]) {
-            port::prefetch<port::PREFETCH_HINT_T0>(&output(i + 1, 0));
-            port::prefetch<port::PREFETCH_HINT_T0>(&input(row + 1, begin[1]));
-          }
-          memcpy(&output(i, 0), &input(row, begin[1]), size[1] * sizeof(T));
-        }
-        return;
-      }
-#define HANDLE_DIM(NDIM)                            \
-  if (input_dims == NDIM) {                         \
-    HandleCase<NDIM>(context, begin, size, result); \
-    return;                                         \
-  }
-
-      HANDLE_DIM(1);
-      HANDLE_DIM(2);
-      HANDLE_DIM(3);
-      HANDLE_DIM(4);
-      HANDLE_DIM(5);
-      HANDLE_DIM(6);
-      HANDLE_DIM(7);
-
-#undef HANDLE_DIM
-
-      OP_REQUIRES(
-          context, false,
-          errors::Unimplemented("SliceOp : Unhandled input dimensions"));
-    }
-  }
-
- private:
-  // Helper function for DoesSliceShapeDifferInOnly1D. Checks if the following
-  // criteria matches for slice_dim: if indices for slice are 0 in all dims
-  // except slice_dim and if sizes of all the dimensions of the slice are same
-  // as the sizes of all the dimensions of the input except slice_dim, then
-  // returns True. Otherwise, returns False.
-  bool DoesSliceShapeDifferInOnly1DHelper(const TensorShape& input_shape,
-                                          const gtl::ArraySlice<int64>& begin,
-                                          const gtl::ArraySlice<int64>& size,
-                                          int slice_dim) {
-    for (int dim = 0; dim < 4; dim++) {
-      if (dim != slice_dim &&
-          (begin[dim] != 0 || size[dim] != input_shape.dim_size(dim))) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // Is 'input' tensor being sliced over a single dimension out of 4?
-  //
-  // This check is applicable in the context of Slice of a 4-D tensor in
-  // NHWC or NCHW format over channel dimension.
-  //
-  // If indices for slice are 0 in all dims except one dimension and if sizes of
-  // all dimensions of slice are same as sizes of all dimensions of inputs
-  // except that dimension, then we are slicing over a single dimension.
-  //
-  // Returns True if Slicing over a single dimension, and sets slice_dim
-  // to the number of the dimension that satisfies criteria.
-  bool DoesSliceShapeDifferInOnly1D(const TensorShape& input_shape,
-                                    const gtl::ArraySlice<int64>& begin,
-                                    const gtl::ArraySlice<int64>& size,
-                                    int* slice_dim) {
-    for (int dim = 0; dim < 4; dim++) {
-      if (DoesSliceShapeDifferInOnly1DHelper(input_shape, begin, size, dim)) {
-        *slice_dim = dim;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  template <int NDIM>
-  void HandleCase(OpKernelContext* context, const gtl::ArraySlice<int64>& begin,
-                  const gtl::ArraySlice<int64>& size, Tensor* result) {
-    int slice_dim = -1;
-    TensorShape in_shape = context->input(0).shape();
-    // Special case for handling 4-D tensor slice when shape of the slice
-    // differs from the input tensor in only 1 out of 4 dimensions.
-    // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW
-    // format over channel dimension.
-    if (NDIM == 4 &&
-        DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) {
-      size_t in_strides[4] = {
-          (size_t)in_shape.dim_size(1) * in_shape.dim_size(2) *
-              in_shape.dim_size(3),
-          (size_t)in_shape.dim_size(2) * in_shape.dim_size(3),
-          (size_t)in_shape.dim_size(3), (size_t)1};
-
-      size_t out_strides[4] = {(size_t)size[1] * size[2] * size[3],
-                               (size_t)size[2] * size[3], (size_t)size[3],
-                               (size_t)1};
-
-      T* in_buf = const_cast<T*>(
-          const_cast<const T*>(context->input(0).flat<T>().data()));
-      T* op_buf = result->flat<T>().data();
-
-      if (slice_dim == 1) {
-        /* data format = NCHW */
-
-#pragma omp parallel for
-        for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) {
-          T* ip = in_buf + (d0 * in_strides[0]);
-          T* op = op_buf + ((d0 - begin[0]) * out_strides[0]);
-#pragma omp parallel for
-          for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) {
-            T* ip1 = ip + (d1 * in_strides[1]);
-            T* op1 = op + ((d1 - begin[1]) * out_strides[1]);
-            // For NCHW, H and W will be contiguous. So we can copy
-            // both with one memcpy.
-            memcpy(static_cast<void*>(op1), static_cast<void*>(ip1),
-                   sizeof(T) * in_strides[1]);
-          }
-        }
-        return;
-      } else if (slice_dim == 3) {
-        /* data_format = NHWC */
-
-#pragma omp parallel for
-        for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) {
-          T* ip = in_buf + (d0 * in_strides[0]);
-          T* op = op_buf + ((d0 - begin[0]) * out_strides[0]);
-#pragma omp parallel for
-          for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) {
-            T* ip1 = ip + (d1 * in_strides[1]);
-            T* op1 = op + ((d1 - begin[1]) * out_strides[1]);
-#pragma omp parallel for
-            for (ssize_t d2 = begin[2]; d2 < begin[2] + size[2]; d2++) {
-              T* ip2 = ip1 + (d2 * in_strides[2]);
-              T* ip3 = ip2 + begin[3];
-              T* op2 = op1 + ((d2 - begin[2]) * out_strides[2]);
-              T* op3 = op2;
-              memcpy(static_cast<void*>(op3), static_cast<void*>(ip3),
-                     sizeof(T) * size[3]);
-            }
-          }
-        }
-        return;
-      }
-      // slice_dim is not 1 or 3, then we fallback to Eigen implementation.
-    }
-
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> indices;
-    Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes;
-    for (int i = 0; i < NDIM; ++i) {
-      indices[i] = begin[i];
-      sizes[i] = size[i];
-    }
-
-    functor::Slice<Device, T, NDIM>()(
-        context->eigen_device<Device>(), result->tensor<T, NDIM>(),
-        context->input(0).tensor<T, NDIM>(), indices, sizes);
-  }
-};
-#endif
 
 // Forward declarations of the functor specializations for declared in the
 // sharded source files.
@@ -440,7 +256,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N);
 #undef DECLARE_CPU_SPEC
 }  // namespace functor
 
-#ifndef INTEL_MKL
 #define REGISTER_SLICE(type)                             \
   REGISTER_KERNEL_BUILDER(Name("Slice")                  \
                               .Device(DEVICE_CPU)        \
@@ -452,19 +267,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N);
 TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
 TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
 #undef REGISTER_SLICE
-#else
-#define REGISTER_SLICE(type)                             \
-  REGISTER_KERNEL_BUILDER(Name("Slice")                  \
-                              .Device(DEVICE_CPU)        \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("begin")       \
-                              .HostMemory("size"),       \
-                          MklSliceOp<CPUDevice, type>)
-
-TF_CALL_POD_STRING_TYPES(REGISTER_SLICE);
-TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE);
-#undef REGISTER_SLICE
-#endif  // INTEL_MKL
 
 #if GOOGLE_CUDA
 // Forward declarations of the functor specializations for GPU.
-- 
GitLab


From 457ef66c2d4985000aa1d1a9bc643f66bbddd46d Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:58:32 -0700
Subject: [PATCH 0076/1085] Fix long lines

---
 tensorflow/python/keras/layers/embeddings.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index a0b9393812..76e551a7ce 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -142,12 +142,14 @@ class Embedding(Layer):
       else:
         in_lens = [self.input_length]
       if len(in_lens) != len(input_shape) - 1:
-        raise ValueError('"input_length" is %s, but received input has shape %s' %
+        raise ValueError('"input_length" is %s, '
+                         'but received input has shape %s' %
                          (str(self.input_length), str(input_shape)))
       else:
         for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])):
           if s1 is not None and s2 is not None and s1 != s2:
-            raise ValueError('"input_length" is %s, but received input has shape %s' %
+            raise ValueError('"input_length" is %s, '
+                             'but received input has shape %s' %
                              (str(self.input_length), str(input_shape)))
           elif s1 is None:
             in_lens[i] = s2
-- 
GitLab


From 282d6e7c384c83f9b6bf43b7b37eb606ccc64d06 Mon Sep 17 00:00:00 2001
From: Martin Wicke <577277+martinwicke@users.noreply.github.com>
Date: Fri, 21 Sep 2018 12:59:15 -0700
Subject: [PATCH 0077/1085] Fix long lines

---
 tensorflow/python/ops/nn_ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 17e10995f2..a68422c315 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -818,12 +818,14 @@ class Convolution(object):
     try:
       input_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("input tensor must have rank %d" % 
+                       (num_spatial_dims + 2))
 
     try:
       filter_shape.with_rank(num_spatial_dims + 2)
     except ValueError:
-      raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2))
+      raise ValueError("filter tensor must have rank %d" % 
+                       (num_spatial_dims + 2))
 
     if data_format is None or not data_format.startswith("NC"):
       input_channels_dim = input_shape[num_spatial_dims + 1]
-- 
GitLab


From 6dd7a09211cc74d11ff1554624b527c432020cbc Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Sun, 23 Sep 2018 20:33:19 +0800
Subject: [PATCH 0078/1085] Enable partitioned variable assignments

---
 .../python/kernel_tests/variables_test.py     | 43 ++++++++++++++++-
 tensorflow/python/ops/variables.py            | 47 +++++++++++++++++--
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 2e7975667c..687784c8b7 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase):
         v0._set_save_slice_info(
             variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
         v1._set_save_slice_info(
-            variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
+            variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
         partitions = [2]
 
         variables.PartitionedVariable(
@@ -696,6 +696,47 @@ class PartitionedVariableTest(test.TestCase):
             variable_list=[v0],
             partitions=partitions)
 
+  def testPartitionedVariableAssignments(self):
+    with ops.Graph().as_default(), self.cached_session() as sess:
+      v0 = variables.Variable(initial_value=[0.0])
+      v1 = variables.Variable(initial_value=[1.0])
+      v0._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
+      v1._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
+      partitions = [2]
+
+      # Pass variable_list as [v1, v0] to ensure they are properly
+      # re-sorted to [v0, v1] based on their slice info offsets.
+      partitioned_variable = variables.PartitionedVariable(
+          name="two_vars",
+          shape=[2],
+          dtype=v0.dtype,
+          variable_list=[v0, v1],
+          partitions=partitions)
+      
+      deltas_a = constant_op.constant([1.0, 2.0])
+      deltas_b = constant_op.constant([3.0, 4.0])
+      ones = array_ops.ones([2])
+      plus_delta = partitioned_variable.assign_add(deltas_a)
+      minus_delta = partitioned_variable.assign_sub(deltas_b)
+      assign_ones = partitioned_variable.assign(ones)
+      variables.global_variables_initializer().run()
+
+      self.assertEqual([1.0], plus_delta[0].eval())
+      self.assertEqual([1.0], v0.eval())
+      self.assertEqual([3.0], plus_delta[1].eval())
+      self.assertEqual([3.0], v1.eval())
+      
+      self.assertEqual([-2.0], minus_delta[0].eval())
+      self.assertEqual([-2.0], v0.eval())
+      self.assertEqual([-1.0], minus_delta[1].eval())
+      self.assertEqual([-1.0], v1.eval())
+ 
+      self.assertEqual([1.0], assign_ones[0].eval())
+      self.assertEqual([1.0], v0.eval())
+      self.assertEqual([1.0], assign_ones[1].eval())
+      self.assertEqual([1.0], v1.eval())
 
 class VariableContainerTest(test.TestCase):
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 7a46157739..2d6a767fed 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2395,11 +2395,50 @@ class PartitionedVariable(object):
   def _get_partitions(self):
     return self._partitions
 
-  def assign(self, value, use_locking=False):
-    _ = value, use_locking
-    raise NotImplementedError(
-        "assign() has not been implemented for PartitionedVariable.")
+  def _apply_assign_fn(self,
+                       assign_fn,
+                       value):
+    partition_axes = self._partition_axes()
+    if len(partition_axes) > 1:
+      raise NotImplementedError(
+          "Cannot concatenate along more than one dimension: %s.  "
+          "Multi-axis partition assign_fn is not supported" % str(partition_axes))
+    partition_ix = partition_axes[0]
+    size_splits_list = [
+        var.shape[partition_ix].value for var in self._variable_list]
+    value_list = array_ops.split(
+        value, size_splits_list, axis=partition_ix)
+    op_list = [
+        assign_fn(var, value_list[idx], idx) \
+        for idx, var in enumerate(self._variable_list)]
+    return op_list
 
+  def assign(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
+
+  def assign_add(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign_add(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
+
+  def assign_sub(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, r_value, idx: var.assign_sub(
+        r_value, use_locking=use_locking,
+        name="%s_%d" % (name, idx), read_value=read_value)
+    assign_list = self._apply_assign_fn(assign_fn, value)
+    if read_value:
+      return assign_list
+    return [assign.op for assign in assign_list]
 
 @tf_export("global_variables")
 def global_variables(scope=None):
-- 
GitLab


From a4eecdb369ecdae3b7fe7c1415d7b3b55bcc7b9e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 23 Sep 2018 17:14:53 +0000
Subject: [PATCH 0079/1085] Fix GPU build issue on python 3

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/image/kernels/image_ops.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h
index 6b63eed130..7fac774d07 100644
--- a/tensorflow/contrib/image/kernels/image_ops.h
+++ b/tensorflow/contrib/image/kernels/image_ops.h
@@ -71,14 +71,7 @@ class ProjectiveGenerator {
         (transform[3] * output_x + transform[4] * output_y + transform[5]) /
         projection;
 
-    // TODO(ringwalt): Add a fill value input.
-#if (defined __CUDA_ARCH__) && (CUDART_VERSION < 8000)
-    // On CUDA versions previous to 8.0, only __shared__ variables
-    // could be declared as static in the device code.
     const T fill_value = T(0);
-#else
-    static const T fill_value = T(0);
-#endif
     switch (interpolation_) {
       case INTERPOLATION_NEAREST:
         // Switch the order of x and y again for indexing into the image.
-- 
GitLab


From 8f4ded5884684f40b4912d95c717b185340996b8 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 24 Sep 2018 11:07:21 +0300
Subject: [PATCH 0080/1085] Fix clang styles.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 484cc4d6f5..6753c67701 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -24,9 +24,7 @@ constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
 
 class ByteSwapper {
  public:
-  ByteSwapper(bool big_endian) {
-    swap_ = big_endian == kLittleEndian;
-  }
+  ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
     if (swap_) {
-- 
GitLab


From 90c68770467701a23d23a85c5d769f6f4fa39f0f Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Mon, 24 Sep 2018 12:14:45 +0300
Subject: [PATCH 0081/1085] Fix byte-order issue.

---
 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
index 6753c67701..46df3e39dc 100644
--- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
+++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h
@@ -17,14 +17,13 @@ limitations under the License.
 #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_
 
 #include <stdint.h>
+#include "tensorflow/core/platform/byte_order.h"
 
 namespace tensorflow {
 
-constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
-
 class ByteSwapper {
  public:
-  ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; }
+  ByteSwapper(bool big_endian) { swap_ = big_endian == port::kLittleEndian; }
 
   inline void SwapIfRequiredInt16(int16_t *x) const {
     if (swap_) {
-- 
GitLab


From ef2c1190d6dc7ec8bca911d03ca2c67b8692a293 Mon Sep 17 00:00:00 2001
From: "Dougal J. Sutherland" <dougal@gmail.com>
Date: Mon, 23 Jul 2018 19:22:57 +0100
Subject: [PATCH 0082/1085] add KID implementation

---
 tensorflow/contrib/gan/README.md              |   2 +-
 .../eval/python/classifier_metrics_impl.py    | 380 +++++++++++++++++-
 .../eval/python/classifier_metrics_test.py    |  97 +++++
 3 files changed, 476 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md
index 4ead66ca13..6a4ead86fe 100644
--- a/tensorflow/contrib/gan/README.md
+++ b/tensorflow/contrib/gan/README.md
@@ -47,7 +47,7 @@ Easily experiment with already-implemented and well-tested losses and penalties,
 such as the Wasserstein loss, gradient penalty, mutual information penalty, etc
 
 * [evaluation](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/):
-Use `Inception Score` or `Frechet Distance` with a pretrained Inception
+Use `Inception Score`, `Frechet Distance`, or `Kernel Distance` with a pretrained Inception
 network to evaluate your unconditional generative model. You can also use
 your own pretrained classifier for more specific performance numbers, or use
 other methods for evaluating conditional generative models.
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
index d914f54945..7dc60df474 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py
@@ -14,8 +14,8 @@
 # ==============================================================================
 """Model evaluation tools for TFGAN.
 
-These methods come from https://arxiv.org/abs/1606.03498 and
-https://arxiv.org/abs/1706.08500.
+These methods come from https://arxiv.org/abs/1606.03498,
+https://arxiv.org/abs/1706.08500, and https://arxiv.org/abs/1801.01401.
 
 NOTE: This implementation uses the same weights as in
 https://github.com/openai/improved-gan/blob/master/inception_score/model.py,
@@ -40,6 +40,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.ops import linalg_ops
@@ -64,6 +65,12 @@ __all__ = [
     'frechet_classifier_distance_from_activations',
     'mean_only_frechet_classifier_distance_from_activations',
     'diagonal_only_frechet_classifier_distance_from_activations',
+    'kernel_inception_distance',
+    'kernel_inception_distance_and_std',
+    'kernel_classifier_distance',
+    'kernel_classifier_distance_and_std',
+    'kernel_classifier_distance_from_activations',
+    'kernel_classifier_distance_and_std_from_activations',
     'INCEPTION_DEFAULT_IMAGE_SIZE',
 ]
 
@@ -734,3 +741,372 @@ frechet_inception_distance = functools.partial(
     frechet_classifier_distance,
     classifier_fn=functools.partial(
         run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance(real_images,
+                               generated_images,
+                               classifier_fn,
+                               num_classifier_batches=1,
+                               max_block_size=1024,
+                               dtype=None):
+  """Kernel "classifier" distance for evaluating a generative model.
+
+  This is based on the Kernel Inception distance, but for an arbitrary
+  embedding.
+
+  This technique is described in detail in https://arxiv.org/abs/1801.01401.
+  Given two distributions P and Q of activations, this function calculates
+
+      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+  where k is the polynomial kernel
+
+      k(x, y) = ( x^T y / dimension + 1 )^3.
+
+  This captures how different the distributions of real and generated images'
+  visual features are. Like the Frechet distance (and unlike the Inception
+  score), this is a true distance and incorporates information about the
+  target images. Unlike the Frechet score, this function computes an
+  *unbiased* and asymptotically normal estimator, which makes comparing
+  estimates across models much more intuitive.
+
+  The estimator used takes time quadratic in max_block_size. Larger values of
+  max_block_size will decrease the variance of the estimator but increase the
+  computational cost. This differs slightly from the estimator used by the
+  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+  NOTE: the blocking code assumes that real_activations and
+  generated_activations are both in random order. If either is sorted in a
+  meaningful order, the estimator will behave poorly.
+
+  NOTE: This function consumes images, computes their activations, and then
+  computes the classifier score. If you would like to precompute many
+  activations for real and generated images for large batches, or to compute
+  multiple scores based on the same images, please use
+  kernel_clasifier_distance_from_activations(), which this method also uses.
+
+  Args:
+    real_images: Real images to use to compute Kernel Inception distance.
+    generated_images: Generated images to use to compute Kernel Inception
+      distance.
+    classifier_fn: A function that takes images and produces activations
+      based on a classifier.
+    num_classifier_batches: Number of batches to split images in to in order to
+      efficiently run them through the classifier network.
+    max_estimator_block_size: integer, default 1024. The distance estimator
+      splits samples into blocks for computational efficiency. Larger values
+      are more computationally expensive but decrease the variance of the
+      distance estimate.
+    dtype: if not None, coerce activations to this dtype before computations.
+
+  Returns:
+   The Kernel Inception Distance. A floating-point scalar of the same type
+   as the output of the activations.
+  """
+  return kernel_classifier_distance_and_std(
+      real_images, generated_images, classifier_fn,
+      num_classifier_batches=num_classifier_batches,
+      max_block_size=max_block_size,
+      dtype=dtype)[0]
+
+
+kernel_inception_distance = functools.partial(
+    kernel_classifier_distance,
+    classifier_fn=functools.partial(
+        run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance_and_std(real_images,
+                                       generated_images,
+                                       classifier_fn,
+                                       num_classifier_batches=1,
+                                       max_block_size=1024,
+                                       dtype=None):
+  """Kernel "classifier" distance for evaluating a generative model.
+
+  This is based on the Kernel Inception distance, but for an arbitrary
+  embedding. Also returns an estimate of the standard error of the distance
+  estimator.
+
+  This technique is described in detail in https://arxiv.org/abs/1801.01401.
+  Given two distributions P and Q of activations, this function calculates
+
+      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+  where k is the polynomial kernel
+
+      k(x, y) = ( x^T y / dimension + 1 )^3.
+
+  This captures how different the distributions of real and generated images'
+  visual features are. Like the Frechet distance (and unlike the Inception
+  score), this is a true distance and incorporates information about the
+  target images. Unlike the Frechet score, this function computes an
+  *unbiased* and asymptotically normal estimator, which makes comparing
+  estimates across models much more intuitive.
+
+  The estimator used takes time quadratic in max_block_size. Larger values of
+  max_block_size will decrease the variance of the estimator but increase the
+  computational cost. This differs slightly from the estimator used by the
+  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+  NOTE: the blocking code assumes that real_activations and
+  generated_activations are both in random order. If either is sorted in a
+  meaningful order, the estimator will behave poorly.
+
+  NOTE: This function consumes images, computes their activations, and then
+  computes the classifier score. If you would like to precompute many
+  activations for real and generated images for large batches, or to compute
+  multiple scores based on the same images, please use
+  kernel_clasifier_distance_from_activations(), which this method also uses.
+
+  Args:
+    real_images: Real images to use to compute Kernel Inception distance.
+    generated_images: Generated images to use to compute Kernel Inception
+      distance.
+    classifier_fn: A function that takes images and produces activations
+      based on a classifier.
+    num_classifier_batches: Number of batches to split images in to in order to
+      efficiently run them through the classifier network.
+    max_estimator_block_size: integer, default 1024. The distance estimator
+      splits samples into blocks for computational efficiency. Larger values
+      are more computationally expensive but decrease the variance of the
+      distance estimate. Having a smaller block size also gives a better
+      estimate of the standard error.
+    dtype: if not None, coerce activations to this dtype before computations.
+
+  Returns:
+   The Kernel Inception Distance. A floating-point scalar of the same type
+     as the output of the activations.
+   An estimate of the standard error of the distance estimator (a scalar of
+     the same type).
+  """
+  real_images_list = array_ops.split(
+      real_images, num_or_size_splits=num_classifier_batches)
+  generated_images_list = array_ops.split(
+      generated_images, num_or_size_splits=num_classifier_batches)
+
+  real_imgs = array_ops.stack(real_images_list)
+  generated_imgs = array_ops.stack(generated_images_list)
+
+  # Compute the activations using the memory-efficient `map_fn`.
+  def compute_activations(elems):
+    return functional_ops.map_fn(fn=classifier_fn,
+                                 elems=elems,
+                                 parallel_iterations=1,
+                                 back_prop=False,
+                                 swap_memory=True,
+                                 name='RunClassifier')
+
+  real_a = compute_activations(real_imgs)
+  gen_a = compute_activations(generated_imgs)
+
+  # Ensure the activations have the right shapes.
+  real_a = array_ops.concat(array_ops.unstack(real_a), 0)
+  gen_a = array_ops.concat(array_ops.unstack(gen_a), 0)
+
+  return kernel_classifier_distance_and_std_from_activations(
+      real_a, gen_a, max_block_size=max_block_size)
+
+
+kernel_inception_distance_and_std = functools.partial(
+    kernel_classifier_distance_and_std,
+    classifier_fn=functools.partial(
+        run_inception, output_tensor=INCEPTION_FINAL_POOL))
+
+
+def kernel_classifier_distance_from_activations(real_activations,
+                                                generated_activations,
+                                                max_block_size=1024,
+                                                dtype=None):
+  '''Kernel "classifier" distance for evaluating a generative model.
+
+  This methods computes the kernel classifier distance from activations of
+  real images and generated images. This can be used independently of the
+  kernel_classifier_distance() method, especially in the case of using large
+  batches during evaluation where we would like to precompute all of the
+  activations before computing the classifier distance, or if we want to
+  compute multiple metrics based on the same images.
+
+  This technique is described in detail in https://arxiv.org/abs/1801.01401.
+  Given two distributions P and Q of activations, this function calculates
+
+      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+  where k is the polynomial kernel
+
+      k(x, y) = ( x^T y / dimension + 1 )^3.
+
+  This captures how different the distributions of real and generated images'
+  visual features are. Like the Frechet distance (and unlike the Inception
+  score), this is a true distance and incorporates information about the
+  target images. Unlike the Frechet score, this function computes an
+  *unbiased* and asymptotically normal estimator, which makes comparing
+  estimates across models much more intuitive.
+
+  The estimator used takes time quadratic in max_block_size. Larger values of
+  max_block_size will decrease the variance of the estimator but increase the
+  computational cost. This differs slightly from the estimator used by the
+  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+
+  NOTE: the blocking code assumes that real_activations and
+  generated_activations are both in random order. If either is sorted in a
+  meaningful order, the estimator will behave poorly.
+
+  Args:
+    real_activations: 2D Tensor containing activations of real data. Shape is
+      [batch_size, activation_size].
+    generated_activations: 2D Tensor containing activations of generated data.
+      Shape is [batch_size, activation_size].
+    max_block_size: integer, default 1024. The distance estimator
+      splits samples into blocks for computational efficiency. Larger values
+      are more computationally expensive but decrease the variance of the
+      distance estimate.
+    dtype: if not None, coerce activations to this dtype before computations.
+
+  Returns:
+   The Kernel Inception Distance. A floating-point scalar of the same type
+   as the output of the activations.
+  '''
+  return kernel_classifier_distance_and_std_from_activations(
+      real_activations, generated_activations,
+      max_block_size=max_block_size)[0]
+
+
+def kernel_classifier_distance_and_std_from_activations(real_activations,
+                                                        generated_activations,
+                                                        max_block_size=1024,
+                                                        dtype=None):
+  '''Kernel "classifier" distance for evaluating a generative model.
+
+  This methods computes the kernel classifier distance from activations of
+  real images and generated images. This can be used independently of the
+  kernel_classifier_distance() method, especially in the case of using large
+  batches during evaluation where we would like to precompute all of the
+  activations before computing the classifier distance, or if we want to
+  compute multiple metrics based on the same images. It also returns a rough
+  estimate of the standard error of the estimator.
+
+  This technique is described in detail in https://arxiv.org/abs/1801.01401.
+  Given two distributions P and Q of activations, this function calculates
+
+      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
+        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
+
+  where k is the polynomial kernel
+
+      k(x, y) = ( x^T y / dimension + 1 )^3.
+
+  This captures how different the distributions of real and generated images'
+  visual features are. Like the Frechet distance (and unlike the Inception
+  score), this is a true distance and incorporates information about the
+  target images. Unlike the Frechet score, this function computes an
+  *unbiased* and asymptotically normal estimator, which makes comparing
+  estimates across models much more intuitive.
+
+  The estimator used takes time quadratic in max_block_size. Larger values of
+  max_block_size will decrease the variance of the estimator but increase the
+  computational cost. This differs slightly from the estimator used by the
+  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
+  The estimate of the standard error will also be more reliable when there are
+  more blocks, i.e. when max_block_size is smaller.
+
+  NOTE: the blocking code assumes that real_activations and
+  generated_activations are both in random order. If either is sorted in a
+  meaningful order, the estimator will behave poorly.
+
+  Args:
+    real_activations: 2D Tensor containing activations of real data. Shape is
+      [batch_size, activation_size].
+    generated_activations: 2D Tensor containing activations of generated data.
+      Shape is [batch_size, activation_size].
+    max_block_size: integer, default 1024. The distance estimator
+      splits samples into blocks for computational efficiency. Larger values
+      are more computationally expensive but decrease the variance of the
+      distance estimate. Having a smaller block size also gives a better
+      estimate of the standard error.
+    dtype: if not None, coerce activations to this dtype before computations.
+
+  Returns:
+   The Kernel Inception Distance. A floating-point scalar of the same type
+     as the output of the activations.
+   An estimate of the standard error of the distance estimator (a scalar of
+     the same type).
+  '''
+
+  real_activations.shape.assert_has_rank(2)
+  generated_activations.shape.assert_has_rank(2)
+  real_activations.shape[1].assert_is_compatible_with(
+      generated_activations.shape[1])
+
+  if dtype is None:
+    dtype = real_activations.dtype
+    assert generated_activations.dtype == dtype
+  else:
+    real_activations = math_ops.cast(real_activations, dtype)
+    generated_activations = math_ops.cast(generated_activations, dtype)
+
+  # Figure out how to split the activations into blocks of approximately
+  # equal size, with none larger than max_block_size.
+  n_r = array_ops.shape(real_activations)[0]
+  n_g = array_ops.shape(generated_activations)[0]
+
+  n_bigger = math_ops.maximum(n_r, n_g)
+  n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))
+
+  v_r = n_r // n_blocks
+  v_g = n_g // n_blocks
+
+  n_plusone_r = n_r - v_r * n_blocks
+  n_plusone_g = n_g - v_g * n_blocks
+
+  sizes_r = array_ops.concat([
+      array_ops.fill([n_blocks - n_plusone_r], v_r),
+      array_ops.fill([n_plusone_r], v_r + 1),
+  ], 0)
+  sizes_g = array_ops.concat([
+      array_ops.fill([n_blocks - n_plusone_g], v_g),
+      array_ops.fill([n_plusone_g], v_g + 1),
+  ], 0)
+
+  zero = array_ops.zeros([1], dtype=dtypes.int32)
+  inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
+  inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)
+
+  dim = math_ops.cast(real_activations.shape[1], dtype)
+
+  def compute_kid_block(i):
+    "Compute the ith block of the KID estimate."
+    r_s = inds_r[i]
+    r_e = inds_r[i + 1]
+    r = real_activations[r_s:r_e]
+    m = math_ops.cast(r_e - r_s, dtype)
+
+    g_s = inds_g[i]
+    g_e = inds_g[i + 1]
+    g = generated_activations[g_s:g_e]
+    n = math_ops.cast(g_e - g_s, dtype)
+
+    k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1) ** 3
+    k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1) ** 3
+    k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1) ** 3
+    return (
+        -2 * math_ops.reduce_mean(k_rg)
+        + (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1))
+        + (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1)))
+
+  ests = functional_ops.map_fn(
+      compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False)
+
+  mn = math_ops.reduce_mean(ests)
+
+  # nn_impl.moments doesn't use the Bessel correction, which we want here
+  n_blocks_ = math_ops.cast(n_blocks, dtype)
+  var = control_flow_ops.cond(
+      math_ops.less_equal(n_blocks, 1),
+      lambda: array_ops.constant(float("nan"), dtype=dtype),
+      lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1))
+
+  return mn, math_ops.sqrt(var / n_blocks_)
diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
index 4fb8d58bc9..b6042bdb72 100644
--- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
+++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py
@@ -86,6 +86,43 @@ def _expected_fid(real_imgs, gen_imgs):
 def _expected_trace_sqrt_product(sigma, sigma_v):
   return np.trace(scp_linalg.sqrtm(np.dot(sigma, sigma_v)))
 
+
+def _expected_kid_and_std(real_imgs, gen_imgs, max_block_size=1024):
+  n_r, dim = real_imgs.shape
+  n_g = gen_imgs.shape[0]
+
+  n_blocks = int(np.ceil(max(n_r, n_g) / max_block_size))
+
+  sizes_r = np.full(n_blocks, n_r // n_blocks)
+  to_patch = n_r - n_blocks * (n_r // n_blocks)
+  if to_patch > 0:
+    sizes_r[-to_patch:] += 1
+  inds_r = np.r_[0, np.cumsum(sizes_r)]
+  assert inds_r[-1] == n_r
+
+  sizes_g = np.full(n_blocks, n_g // n_blocks)
+  to_patch = n_g - n_blocks * (n_g // n_blocks)
+  if to_patch > 0:
+    sizes_g[-to_patch:] += 1
+  inds_g = np.r_[0, np.cumsum(sizes_g)]
+  assert inds_g[-1] == n_g
+
+  ests = []
+  for i in range(n_blocks):
+    r = real_imgs[inds_r[i]:inds_r[i + 1]]
+    g = gen_imgs[inds_g[i]:inds_g[i + 1]]
+
+    k_rr = (np.dot(r, r.T) / dim + 1) ** 3
+    k_rg = (np.dot(r, g.T) / dim + 1) ** 3
+    k_gg = (np.dot(g, g.T) / dim + 1) ** 3
+    ests.append(
+        -2 * k_rg.mean()
+        + k_rr[np.triu_indices_from(k_rr, k=1)].mean()
+        + k_gg[np.triu_indices_from(k_gg, k=1)].mean())
+
+  var = np.var(ests, ddof=1) if len(ests) > 1 else np.nan
+  return np.mean(ests), np.sqrt(var / len(ests))
+
 # A dummy GraphDef string with the minimum number of Ops.
 graphdef_string = """
 node {
@@ -272,6 +309,18 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase):
     # Check that none of the model variables are trainable.
     self.assertListEqual([], variables.trainable_variables())
 
+  def test_kernel_inception_distance_graph(self):
+    """Test `frechet_inception_distance` graph construction."""
+    img = array_ops.ones([7, 299, 299, 3])
+    distance = _run_with_mock(
+        classifier_metrics.kernel_inception_distance, img, img)
+
+    self.assertTrue(isinstance(distance, ops.Tensor))
+    distance.shape.assert_has_rank(0)
+
+    # Check that none of the model variables are trainable.
+    self.assertListEqual([], variables.trainable_variables())
+
   def test_run_inception_multicall(self):
     """Test that `run_inception` can be called multiple times."""
     for batch_size in (7, 3, 2):
@@ -411,6 +460,54 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase):
     # Check that the FIDs increase monotonically.
     self.assertTrue(all(fid_a < fid_b for fid_a, fid_b in zip(fids, fids[1:])))
 
+  def test_kernel_classifier_distance_value(self):
+    """Test that `kernel_classifier_distance` gives the correct value."""
+    np.random.seed(0)
+
+    test_pool_real_a = np.float32(np.random.randn(512, 256))
+    test_pool_gen_a = np.float32(np.random.randn(512, 256) * 1.1 + .05)
+
+    kid_op = _run_with_mock(
+        classifier_metrics.kernel_classifier_distance_and_std,
+        test_pool_real_a,
+        test_pool_gen_a,
+        classifier_fn=lambda x: x,
+        max_block_size=600)
+
+    with self.test_session() as sess:
+      actual_kid, actual_std = sess.run(kid_op)
+
+    expected_kid, expected_std = _expected_kid_and_std(
+        test_pool_real_a, test_pool_gen_a)
+
+    self.assertAllClose(expected_kid, actual_kid, 0.001)
+    self.assertAllClose(expected_std, actual_std, 0.001)
+
+  def test_kernel_classifier_distance_block_sizes(self):
+    """Test that `kernel_classifier_distance` works with unusual max_block_size
+    values.."""
+    np.random.seed(0)
+
+    test_pool_real_a = np.float32(np.random.randn(512, 256))
+    test_pool_gen_a = np.float32(np.random.randn(768, 256) * 1.1 + .05)
+
+    max_block_size = array_ops.placeholder(dtypes.int32, shape=())
+    kid_op = _run_with_mock(
+        classifier_metrics.kernel_classifier_distance_and_std_from_activations,
+        array_ops.constant(test_pool_real_a),
+        array_ops.constant(test_pool_gen_a),
+        max_block_size=max_block_size)
+
+    for block_size in [50, 512, 1000]:
+      with self.test_session() as sess:
+        actual_kid, actual_std = sess.run(kid_op, {max_block_size: block_size})
+
+      expected_kid, expected_std = _expected_kid_and_std(
+          test_pool_real_a, test_pool_gen_a, max_block_size=block_size)
+
+      self.assertAllClose(expected_kid, actual_kid, 0.001)
+      self.assertAllClose(expected_std, actual_std, 0.001)
+
   def test_trace_sqrt_product_value(self):
     """Test that `trace_sqrt_product` gives the correct value."""
     np.random.seed(0)
-- 
GitLab


From f0886f7269de900d226455d4831722f6fc94a71b Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Tue, 25 Sep 2018 09:59:17 +0800
Subject: [PATCH 0083/1085] Fix build dependencies in tensorflow/cc/BUILD.

---
 tensorflow/cc/BUILD                            | 1 +
 tensorflow/python/kernel_tests/relu_op_test.py | 4 ++--
 tensorflow/python/ops/nn_ops.py                | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index f56521dac0..e99d15f85d 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -410,6 +410,7 @@ tf_cc_test(
     srcs = ["gradients/nn_grad_test.cc"],
     deps = [
         ":cc_ops",
+        ":cc_ops_internal",
         ":grad_op_registry",
         ":grad_testutil",
         ":gradient_checker",
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 86d9c90e83..d97a1613b9 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -351,7 +351,7 @@ class LeakyReluTest(test.TestCase):
     self.assertLess(err, 1e-10)
 
   def testGradGradFloat32(self):
-    with compat.forward_compatibility_horizon(2018, 10, 2):
+    with compat.forward_compatibility_horizon(2018, 11, 2):
       with self.test_session():
         x = constant_op.constant(
             [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
@@ -369,7 +369,7 @@ class LeakyReluTest(test.TestCase):
       self.assertLess(err, 1e-4)
 
   def testGradGradFloat64(self):
-    with compat.forward_compatibility_horizon(2018, 10, 2):
+    with compat.forward_compatibility_horizon(2018, 11, 2):
       with self.test_session():
         x = constant_op.constant(
             [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index d646245ce3..2861f40586 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1601,7 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None):
     features = ops.convert_to_tensor(features, name="features")
     if features.dtype.is_integer:
       features = math_ops.to_float(features)
-    if compat.forward_compatible(2018, 10, 1):
+    if compat.forward_compatible(2018, 11, 1):
       return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name)
     alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha")
     return math_ops.maximum(alpha * features, features, name=name)
-- 
GitLab


From c12a90e45c5f94b80289f4278f81be4a0348fa19 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 13:51:36 +0800
Subject: [PATCH 0084/1085] fix pylint

---
 tensorflow/python/ops/variables.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 2d6a767fed..d058478d58 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2402,7 +2402,8 @@ class PartitionedVariable(object):
     if len(partition_axes) > 1:
       raise NotImplementedError(
           "Cannot concatenate along more than one dimension: %s.  "
-          "Multi-axis partition assign_fn is not supported" % str(partition_axes))
+          "Multi-axis partition assign_fn is not supported "
+          % str(partition_axes))
     partition_ix = partition_axes[0]
     size_splits_list = [
         var.shape[partition_ix].value for var in self._variable_list]
-- 
GitLab


From 3d60d636de59449a8448cbcbcd71af82e2871538 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 13:53:36 +0800
Subject: [PATCH 0085/1085] fix back variabe name

---
 tensorflow/python/kernel_tests/variables_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 687784c8b7..0b101529fe 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase):
         v0._set_save_slice_info(
             variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
         v1._set_save_slice_info(
-            variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
+            variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
         partitions = [2]
 
         variables.PartitionedVariable(
-- 
GitLab


From 21d4e8bb30a1753a81edd4912881d95b47ae3d1c Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Tue, 25 Sep 2018 15:50:10 +0800
Subject: [PATCH 0086/1085] remove warning lines

---
 tensorflow/python/ops/variables.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index d058478d58..69f63bc8e6 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2401,7 +2401,6 @@ class PartitionedVariable(object):
     partition_axes = self._partition_axes()
     if len(partition_axes) > 1:
       raise NotImplementedError(
-          "Cannot concatenate along more than one dimension: %s.  "
           "Multi-axis partition assign_fn is not supported "
           % str(partition_axes))
     partition_ix = partition_axes[0]
-- 
GitLab


From 7630e9df4804a01f5dd0ab20d4c0bcfb58e45432 Mon Sep 17 00:00:00 2001
From: Richard Yu <yohan.richard.yu@gmail.com>
Date: Tue, 25 Sep 2018 15:50:13 -0700
Subject: [PATCH 0087/1085] Fixing error

---
 tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index d882b79892..d9f179bee4 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling):
   bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var,
                                             context)
   if batch_mean_tensor is None and moving_mean_tensor is None:
-    raise ValueError('Error folding unfused batch norms')
+    ValueError('Error folding unfused batch norms')
   if has_scaling:
     gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context)
 
-- 
GitLab


From f55e5ef27b3ccf1b75932e219f7358976dbf56c2 Mon Sep 17 00:00:00 2001
From: IMBurbank <bassmanburbank@gmail.com>
Date: Tue, 25 Sep 2018 18:39:11 -0600
Subject: [PATCH 0088/1085] Update to use python 2-3 compatible function
 tf_inspect.getfullargspec.

---
 .../python/losses/python/tuple_losses_impl.py |   2 +-
 .../labeled_tensor/python/ops/_typecheck.py   |   2 +-
 .../layers/python/layers/rev_block_lib.py     |   3 +-
 .../python/learn/estimators/estimator.py      |   4 +-
 .../learn/python/learn/estimators/head.py     |   2 +-
 .../learn/python/learn/experiment_test.py     |   2 +-
 .../learn/python/learn/export_strategy.py     |   2 +-
 .../contrib/learn/python/learn/metric_spec.py |   2 +-
 .../contrib/learn/python/learn/monitors.py    |   2 +-
 .../contrib/tpu/python/tpu/tpu_function.py    |   2 +-
 tensorflow/python/framework/errors_impl.py    |   2 +-
 tensorflow/python/framework/function.py       |   6 +-
 tensorflow/python/keras/backend_test.py       |   2 +-
 tensorflow/python/keras/testing_utils.py      |   2 +-
 .../kernel_tests/variable_scope_test.py       |   4 +-
 tensorflow/python/ops/variable_scope.py       |   4 +-
 tensorflow/python/util/tf_contextlib_test.py  |   2 +-
 tensorflow/python/util/tf_inspect.py          |   7 +-
 tensorflow/python/util/tf_inspect_test.py     | 249 +++++++++++++++++-
 .../api/lib/python_object_to_proto_visitor.py |   2 +-
 20 files changed, 267 insertions(+), 36 deletions(-)

diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
index 221c70c38b..00a83e5e55 100644
--- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
+++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
@@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn):
   """
   # Match arguments in `loss_fn` to elements of `namedtuple`.
   # TODO(joelshor): Properly handle `varargs` and `keywords`.
-  argspec = tf_inspect.getargspec(loss_fn)
+  argspec = tf_inspect.getfullargspec(loss_fn)
   defaults = argspec.defaults or []
 
   required_args = set(argspec.args[:-len(defaults)])
diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
index 80fa17ec1f..0e23039847 100644
--- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
@@ -230,7 +230,7 @@ def accepts(*types):
 
   def check_accepts(f):
     """Check the types."""
-    spec = tf_inspect.getargspec(f)
+    spec = tf_inspect.getfullargspec(f)
 
     num_function_arguments = len(spec.args)
     if len(types) != num_function_arguments:
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 06da32072f..55979cc391 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -576,7 +576,8 @@ def _recomputing_grad_fn(compute_fn,
 
 def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """See recompute_grad."""
-  has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args
+  has_is_recompute_kwarg = (
+      "is_recomputing" in tf_inspect.getfullargspec(fn).args)
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError("All inputs to function must be Tensors")
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index c1de42782e..b88923bca2 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -199,11 +199,11 @@ def _model_fn_args(fn):
   if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'):
     # Handle functools.partial and similar objects.
     return tuple([
-        arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):]
+        arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):]
         if arg not in set(fn.keywords.keys())
     ])
   # Handle function.
-  return tuple(tf_inspect.getargspec(fn).args)
+  return tuple(tf_inspect.getfullargspec(fn).args)
 
 
 def _get_replica_device_setter(config):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index c6f79e00d5..63dd08316b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -1861,7 +1861,7 @@ def _get_arguments(func):
   _, func = tf_decorator.unwrap(func)
   if hasattr(func, "__code__"):
     # Regular function.
-    return tf_inspect.getargspec(func)
+    return tf_inspect.getfullargspec(func)
   elif hasattr(func, "func"):
     # Partial function.
     return _get_arguments(func.func)
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index fb16c94c29..6926696fb6 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -126,7 +126,7 @@ class TestBaseEstimator(object):
 
 def _check_method_supports_args(method, kwargs):
   """Checks that the given method supports the given args."""
-  supported_args = tuple(tf_inspect.getargspec(method).args)
+  supported_args = tuple(tf_inspect.getfullargspec(method).args)
   for kwarg in kwargs:
     if kwarg not in supported_args:
       raise ValueError(
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py
index 075cab536e..0d6e0cdc18 100644
--- a/tensorflow/contrib/learn/python/learn/export_strategy.py
+++ b/tensorflow/contrib/learn/python/learn/export_strategy.py
@@ -96,7 +96,7 @@ class ExportStrategy(
     """
     # don't break existing export_fns that don't accept checkpoint_path and
     # eval_result
-    export_fn_args = tf_inspect.getargspec(self.export_fn).args
+    export_fn_args = tf_inspect.getfullargspec(self.export_fn).args
     kwargs = {}
     if 'checkpoint_path' in export_fn_args:
       kwargs['checkpoint_path'] = checkpoint_path
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index 97220365d5..604d6d46b4 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -51,7 +51,7 @@ def _args(fn):
     return tuple(
         [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
-  return tuple(tf_inspect.getargspec(fn).args)
+  return tuple(tf_inspect.getfullargspec(fn).args)
 
 
 _CANONICAL_LABELS_ARG = 'labels'
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index 3d691d4340..5f61e0264f 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook):
   def end(self, session):
     self._last_step = None
     for m in self._monitors:
-      if "session" in tf_inspect.getargspec(m.end).args:
+      if "session" in tf_inspect.getfullargspec(m.end).args:
         m.end(session=session)
       else:
         m.end()
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index 0c7a38dbbb..9c4bd1c4d1 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
@@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   number_of_arguments_needed = input_arity
   if infeed_queue is not None:
     number_of_arguments_needed += infeed_queue.number_of_tuple_elements
-  arg_spec = tf_inspect.getargspec(func)
+  arg_spec = tf_inspect.getfullargspec(func)
   number_of_args = len(arg_spec.args)
   if arg_spec.defaults is None:
     number_of_defaults = 0
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index 5af71f2cfb..c373e75a74 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -55,7 +55,7 @@ class OpError(Exception):
 
   def __reduce__(self):
     # Allow the subclasses to accept less arguments in their __init__.
-    init_argspec = tf_inspect.getargspec(self.__class__.__init__)
+    init_argspec = tf_inspect.getfullargspec(self.__class__.__init__)
     args = tuple(getattr(self, arg) for arg in init_argspec.args[1:])
     return self.__class__, args
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index f287289bd0..3db6f683c9 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -132,9 +132,9 @@ class Defun(object):
       raise ValueError("func %s must be callable" % func)
 
     # Func should not use kwargs and defaults.
-    argspec = tf_inspect.getargspec(func)
-    if argspec.keywords or argspec.defaults:
-      raise ValueError("Functions with argument defaults or keyword "
+    argspec = tf_inspect.getfullargspec(func)
+    if argspec.varkw or argspec.defaults:
+      raise ValueError("Functions with argument defaults or varkw "
                        "arguments are not supported.")
 
     # Computes how many arguments 'func' has.
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..31191d0d35 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase):
         compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
                                          keras_kwargs={'axis': -1},
                                          np_kwargs={'axis': -1})
-        if 'keepdims' in tf_inspect.getargspec(keras_op).args:
+        if 'keepdims' in tf_inspect.getfullargspec(keras_op).args:
           compare_single_input_op_to_numpy(keras_op, np_op,
                                            input_shape=(4, 7, 5),
                                            keras_kwargs={'axis': 1,
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 501b50ba5f..1afaba5653 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   layer.set_weights(weights)
 
   # test and instantiation from weights
-  if 'weights' in tf_inspect.getargspec(layer_cls.__init__):
+  if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__):
     kwargs['weights'] = weights
     layer = layer_cls(**kwargs)
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 401e1ae102..1d0b72b17a 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase):
 
   def testSignatureGetVarVsGetLocalVar(self):
     """get_{local,}variable() must take the same list of args."""
-    arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
-    local_arg_names = tf_inspect.getargspec(
+    arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0]
+    local_arg_names = tf_inspect.getfullargspec(
         variable_scope.get_local_variable)[0]
     self.assertEqual(arg_names, local_arg_names)
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index a43676cd70..3cc1eb916d 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -892,14 +892,14 @@ class _VariableStore(object):
         if shape and shape.is_fully_defined():
           init_val = lambda: initializer(  # pylint: disable=g-long-lambda
               shape.as_list(), dtype=dtype, partition_info=partition_info)
-        elif not tf_inspect.getargspec(initializer).args:
+        elif not tf_inspect.getfullargspec(initializer).args:
           init_val = initializer
         else:
           raise ValueError("You can only pass an initializer function that "
                            "expects no arguments to its callable when the "
                            "shape is not fully defined. The given initializer "
                            "function expects the following args %s" %
-                           tf_inspect.getargspec(initializer).args)
+                           tf_inspect.getfullargspec(initializer).args)
         variable_dtype = dtype.base_dtype
 
     # Create the variable.
diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py
index 4a5bf388a6..1e921b5ea3 100644
--- a/tensorflow/python/util/tf_contextlib_test.py
+++ b/tensorflow/python/util/tf_contextlib_test.py
@@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase):
     self.assertFalse(isinstance(target, tf_decorator.TFDecorator))
 
   def testGetArgSpecReturnsWrappedArgSpec(self):
-    argspec = tf_inspect.getargspec(test_params_and_defaults)
+    argspec = tf_inspect.getfullargspec(test_params_and_defaults)
     self.assertEqual(['a', 'b', 'c', 'd'], argspec.args)
     self.assertEqual((2, True, 'hello'), argspec.defaults)
 
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 967c872c2a..234850ac3f 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -43,7 +43,12 @@ def currentframe():
 
 
 def getargspec(obj):
-  """TFDecorator-aware replacement for inspect.getargspec.
+  """TFDecorator-aware replacement for `inspect.getargspec`.
+
+  This should not be called from other modules. It is deprecated in python3.
+
+  Use `getfullargspec`. It is a TFDecorator-aware replacement for 
+  `inspect.getfullargspec` compatible with both python2 and python3.
 
   Args:
     obj: A function, partial function, or callable object, possibly
diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py
index d3b7e4b969..55f88f8fc6 100644
--- a/tensorflow/python/util/tf_inspect_test.py
+++ b/tensorflow/python/util/tf_inspect_test.py
@@ -122,18 +122,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getargspec(partial_func))
 
-  def testGetFullArgsSpecForPartial(self):
-
-    def func(a, b):
-      del a, b
-
-    partial_function = functools.partial(func, 1)
-    argspec = tf_inspect.FullArgSpec(
-        args=['b'], varargs=None, varkw=None, defaults=None,
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
-
   def testGetArgSpecOnPartialInvalidArgspec(self):
     """Tests getargspec on partial function that doesn't have valid argspec."""
 
@@ -303,6 +291,243 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getargspec(NewClass))
 
+  def testGetFullArgSpecOnDecoratorsThatDontProvideFullArgSpec(self):
+    argspec = tf_inspect.getfullargspec(
+        test_decorated_function_with_defaults)
+    self.assertEqual(['a', 'b', 'c'], argspec.args)
+    self.assertEqual((2, 'Hello'), argspec.defaults)
+
+  def testGetFullArgSpecOnDecoratorThatChangesFullArgSpec(self):
+    argspec = tf_inspect.FullArgSpec(
+        args=['a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    decorator = tf_decorator.TFDecorator('', test_undecorated_function, '',
+                                         argspec)
+    self.assertEqual(argspec, tf_inspect.getfullargspec(decorator))
+
+  def testGetFullArgSpecIgnoresDecoratorsThatDontProvideFullArgSpec(self):
+    argspec = tf_inspect.FullArgSpec(
+        args=['a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function,
+                                               '', argspec)
+    outer_decorator = tf_decorator.TFDecorator('', inner_decorator)
+    self.assertEqual(argspec, tf_inspect.getfullargspec(outer_decorator))
+
+  def testGetFullArgSpecReturnsOutermostDecoratorThatChangesFullArgSpec(self):
+    outer_argspec = tf_inspect.FullArgSpec(
+        args=['a'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+    inner_argspec = tf_inspect.FullArgSpec(
+        args=['b'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function,
+                                               '', inner_argspec)
+    outer_decorator = tf_decorator.TFDecorator('', inner_decorator, '',
+                                               outer_argspec)
+    self.assertEqual(outer_argspec,
+                     tf_inspect.getfullargspec(outer_decorator))
+
+  def testGetFullArgsSpecForPartial(self):
+
+    def func(a, b):
+      del a, b
+
+    partial_function = functools.partial(func, 1)
+    argspec = tf_inspect.FullArgSpec(
+        args=['b'], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
+
+  def testGetFullArgSpecOnPartialInvalidFullArgSpec(self):
+    """Tests getfullargspec.
+
+    Tests on partial function that doesn't have valid fullargspec.
+    """
+
+    def func(m, n, l, k=4):
+      return 2 * m + l + n * k
+
+    partial_func = functools.partial(func, n=7)
+
+    exception_message = (r"Some arguments \['l'\] do not have default value, "
+                         "but they are positioned after those with default "
+                         "values. This can not be expressed with ArgSpec.")
+    with self.assertRaisesRegexp(ValueError, exception_message):
+      tf_inspect.getfullargspec(partial_func)
+
+  def testGetFullArgSpecOnPartialValidFullArgSpec(self):
+    """Tests getfullargspec on partial function with valid fullargspec."""
+
+    def func(m, n, l, k=4):
+      return 2 * m + l + n * k
+
+    partial_func = functools.partial(func, n=7, l=2)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n', 'l', 'k'],
+        varargs=None,
+        varkw=None,
+        defaults=(7, 2, 4),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialNoArgumentsLeft(self):
+    """Tests getfullargspec on partial function that prunes all arguments."""
+
+    def func(m, n):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, 7, 10)
+    argspec = tf_inspect.FullArgSpec(
+        args=[], varargs=None, varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialKeywordArgument(self):
+    """Tests getfullargspec on partial function that prunes some arguments."""
+
+    def func(m, n):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(7,),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self):
+    """Tests getfullargspec.
+    
+    Tests on partial function that prunes argument by keyword.
+    """
+
+    def func(m=1, n=2):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithVarargs(self):
+    """Tests getfullargspec on partial function with variable arguments."""
+
+    def func(m, *arg):
+      return m + len(arg)
+
+    partial_func = functools.partial(func, 7, 8)
+    argspec = tf_inspect.FullArgSpec(
+        args=[], varargs='arg', varkw=None, defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithVarkwargs(self):
+    """Tests getfullargspec.
+
+    Tests on partial function with variable keyword arguments.
+    """
+
+    def func(m, n, **kwarg):
+      return m * n + len(kwarg)
+
+    partial_func = functools.partial(func, 7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['n'], varargs=None, varkw='kwarg', defaults=None,
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnPartialWithDecorator(self):
+    """Tests getfullargspec on decorated partial function."""
+
+    @test_decorator('decorator')
+    def func(m=1, n=2):
+      return 2 * m + n
+
+    partial_func = functools.partial(func, n=7)
+    argspec = tf_inspect.FullArgSpec(
+        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
+        kwonlyargs=[], kwonlydefaults=None, annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
+
+  def testGetFullArgSpecOnCallableObject(self):
+
+    class Callable(object):
+
+      def __call__(self, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['self', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    test_obj = Callable()
+    self.assertEqual(argspec, tf_inspect.getfullargspec(test_obj))
+
+  def testGetFullArgSpecOnInitClass(self):
+
+    class InitClass(object):
+
+      def __init__(self, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['self', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(InitClass))
+
+  def testGetFullArgSpecOnNewClass(self):
+
+    class NewClass(object):
+
+      def __new__(cls, a, b=1, c='hello'):
+        pass
+
+    argspec = tf_inspect.FullArgSpec(
+        args=['cls', 'a', 'b', 'c'],
+        varargs=None,
+        varkw=None,
+        defaults=(1, 'hello'),
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+
+    self.assertEqual(argspec, tf_inspect.getfullargspec(NewClass))
+
   def testGetDoc(self):
     self.assertEqual('Test Decorated Function With Defaults Docstring.',
                      tf_inspect.getdoc(test_decorated_function_with_defaults))
diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index 3a48cf683c..2a40caf720 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -47,7 +47,7 @@ def _SanitizedArgSpec(obj):
     string, a string representation of the argspec.
   """
   output_string = ''
-  unsanitized_arg_spec = tf_inspect.getargspec(obj)
+  unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
 
   for clean_attr in ('args', 'varargs', 'keywords'):
     output_string += '%s=%s, ' % (clean_attr,
-- 
GitLab


From 7c2341501a583ca625c976f118090e495cdcbe07 Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 04:44:12 +0000
Subject: [PATCH 0089/1085] Find NCCL2 debians in Tensorflow configure

---
 configure.py                        | 136 +++++++++++++++++++---------
 third_party/nccl/nccl_configure.bzl |  14 ++-
 third_party/nccl/system.BUILD.tpl   |   4 +-
 3 files changed, 105 insertions(+), 49 deletions(-)

diff --git a/configure.py b/configure.py
index f0b9fada5e..9fd2dc2630 100644
--- a/configure.py
+++ b/configure.py
@@ -54,6 +54,12 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
 _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
 _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
 
+NCCL_LIB_PATHS = [
+  "lib64/",
+  "lib/powerpc64le-linux-gnu/",
+  "lib/x86_64-linux-gnu/",
+  ""
+]
 
 class UserInputError(Exception):
   pass
@@ -1085,7 +1091,7 @@ def set_tf_tensorrt_install_path(environ_cp):
 
 
 def set_tf_nccl_install_path(environ_cp):
-  """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION.
+  """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION.
 
   Args:
     environ_cp: copy of the os.environ.
@@ -1111,46 +1117,98 @@ def set_tf_nccl_install_path(environ_cp):
     if tf_nccl_version == '1':
       break  # No need to get install path, NCCL 1 is a GitHub repo.
 
-    # TODO(csigg): Look with ldconfig first if we can find the library in paths
+    # Look with ldconfig first if we can find the library in paths
     # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
     # include directory. This is where the NCCL .deb packages install them.
-    # Then ask the user if we should use that. Instead of a single
-    # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to
-    # nccl_configure.bzl
-    default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
-    ask_nccl_path = (r'Please specify the location where NCCL %s library is '
+
+    # First check to see if NCCL is in the ldconfig.
+    # If its found, use that location.
+    if is_linux():
+      ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
+      nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
+      nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)',
+                                           nccl2_path_from_ldconfig)
+    if nccl2_path_from_ldconfig:
+      nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1)
+      if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)):
+        nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig)
+        print('NCCL libraries found in ' + nccl2_path_from_ldconfig)
+        
+        # Check if this is the main system lib location
+        if re.search('.*linux-gnu', nccl_install_path):
+          trunc_nccl_install_path = "/usr"
+          print("This looks like a system path.")
+        else:
+          trunc_nccl_install_path = nccl_install_path + "/.."
+  
+        # Look for header
+        nccl_hdr_path = trunc_nccl_install_path + "/include"
+        print("Assuming NCCL header path is " + nccl_hdr_path)
+        if os.path.exists(nccl_hdr_path + "/nccl.h"):
+          # Set NCCL_INSTALL_PATH
+          environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
+          write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
+
+          # Set NCCL_HDR_PATH
+          environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path
+          write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path)
+          break
+        else:
+          print('The header for NCCL2 cannot be found. Please install the libnccl-dev package.')
+      else:
+          print('NCCL2 is listed by ldconfig but the library is not found. ' 
+                'Your ldconfig is out of date. Please run sudo ldconfig.')
+    else:
+      # NCCL is not found in ldconfig. Ask the user for the location.
+      default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
+      ask_nccl_path = (r'Please specify the location where NCCL %s library is '
                      'installed. Refer to README.md for more details. [Default '
                      'is %s]:') % (tf_nccl_version, default_nccl_path)
-    nccl_install_path = get_from_env_or_user_or_default(
+      nccl_install_path = get_from_env_or_user_or_default(
         environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
 
-    # Result returned from "read" will be used unexpanded. That make "~"
-    # unusable. Going through one more level of expansion to handle that.
-    nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
-    if is_windows() or is_cygwin():
-      nccl_install_path = cygpath(nccl_install_path)
-
-    if is_windows():
-      nccl_lib_path = 'lib/x64/nccl.lib'
-    elif is_linux():
-      nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version
-    elif is_macos():
-      nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
-
-    nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
-    nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h')
-    if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
-      # Set NCCL_INSTALL_PATH
-      environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
-      write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
-      break
-
-    # Reset and Retry
-    print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
-          'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
+      # Result returned from "read" will be used unexpanded. That make "~"
+      # unusable. Going through one more level of expansion to handle that.
+      nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
+      if is_windows() or is_cygwin():
+        nccl_install_path = cygpath(nccl_install_path)
+
+      if is_windows():
+        nccl_lib_path = 'lib/x64/nccl.lib'
+      elif is_linux():
+        nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version
+        nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename)
+        if not os.path.exists(nccl_lpath):
+          for relative_path in NCCL_LIB_PATHS:
+            path = '%s/%s%s' % (nccl_install_path, relative_path, nccl_lib_filename)
+            if os.path.exists(path):
+              print("NCCL found at " + path)
+              nccl_lib_path = path
+              break
+        else:
+          nccl_lib_path = nccl_lpath
+      elif is_macos():
+        nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
+
+      nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
+      nccl_hdr_path = os.path.join(os.path.dirname(nccl_lib_path), '../include/nccl.h')
+      print("Assuming NCCL header path is "+nccl_hdr_path)
+      if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
+        # Set NCCL_INSTALL_PATH
+        environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path)
+        write_action_env_to_bazelrc('NCCL_INSTALL_PATH', os.path.dirname(nccl_lib_path))
+
+        # Set NCCL_HDR_PATH
+        environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path)
+        write_action_env_to_bazelrc('NCCL_HDR_PATH', os.path.dirname(nccl_hdr_path))
+        break
+
+      # Reset and Retry
+      print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
+            'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
                                               nccl_hdr_path))
 
-    environ_cp['TF_NCCL_VERSION'] = ''
+      environ_cp['TF_NCCL_VERSION'] = ''
   else:
     raise UserInputError('Invalid TF_NCCL setting was provided %d '
                          'times in a row. Assuming to be a scripting mistake.' %
@@ -1401,20 +1459,10 @@ def set_grpc_build_flags():
 
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
+  syslibs = ','.join(sorted(syslibs.split(',')))
   if syslibs and syslibs != '':
-    if ',' in syslibs:
-      syslibs = ','.join(sorted(syslibs.split(',')))
-    else:
-      syslibs = ','.join(sorted(syslibs.split()))
     write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
 
-  if 'PREFIX' in environ_cp:
-    write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
-  if 'LIBDIR' in environ_cp:
-    write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
-  if 'INCLUDEDIR' in environ_cp:
-    write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
-
 
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index ce9447096e..0713b36724 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -5,6 +5,7 @@
 
   * `TF_NCCL_VERSION`: The NCCL version.
   * `NCCL_INSTALL_PATH`: The installation path of the NCCL library.
+  * `NCCL_HDR_PATH`: The installation path of the NCCL header files.
 """
 
 load(
@@ -15,6 +16,7 @@ load(
 )
 
 _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_NCCL_HDR_PATH = "NCCL_HDR_PATH"
 _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
 
@@ -68,7 +70,7 @@ def _find_nccl_header(repository_ctx, nccl_install_path):
   return header_path
 
 
-def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
+def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
   """Checks whether the header file matches the specified version of NCCL.
 
   Args:
@@ -79,7 +81,9 @@ def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version):
   Returns:
     A string containing the library version of NCCL.
   """
-  header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+  header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+  if not header_path.exists:
+    header_path = _find_nccl_header(repository_ctx, nccl_install_path)
   header_dir = str(header_path.realpath.dirname)
   major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
                                    _DEFINE_NCCL_MAJOR)
@@ -109,6 +113,7 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
   """
   lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
                                                            nccl_version))
+
   if not lib_path.exists:
     auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
   return lib_path
@@ -138,10 +143,12 @@ def _nccl_configure_impl(repository_ctx):
   else:
     # Create target for locally installed NCCL.
     nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-    _check_nccl_version(repository_ctx, nccl_install_path, nccl_version)
+    nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+    _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
     repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
         "%{version}": nccl_version,
         "%{install_path}": nccl_install_path,
+        "%{hdr_path}": nccl_hdr_path,
     })
 
 
@@ -149,6 +156,7 @@ nccl_configure = repository_rule(
     implementation=_nccl_configure_impl,
     environ=[
         _NCCL_INSTALL_PATH,
+        _NCCL_HDR_PATH,
         _TF_NCCL_VERSION,
     ],
 )
diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index 7ca835dedf..a07f54955f 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl
@@ -20,7 +20,7 @@ genrule(
     "libnccl.so.%{version}",
     "nccl.h",
   ],
-  cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" &&
-           cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
+  cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
+           cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
 )
 
-- 
GitLab


From 96eec07af06f4dfc75cee57b74ba4b5347619634 Mon Sep 17 00:00:00 2001
From: Cao Zongyan <zongyan.cao@alibaba-inc.com>
Date: Wed, 26 Sep 2018 13:04:46 +0800
Subject: [PATCH 0090/1085] Re-add compat module for leaky_relu implementation.

---
 tensorflow/python/ops/nn_ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 3f64f0af9a..78e000e458 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -22,6 +22,7 @@ import numbers
 
 import numpy as np
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
-- 
GitLab


From d59678448469ca134875e062f7f8d6d77942af4e Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:19:10 +0000
Subject: [PATCH 0091/1085] fix unintential removal of set_system_libs_flag

---
 configure.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 9fd2dc2630..3791ead3ed 100644
--- a/configure.py
+++ b/configure.py
@@ -1459,10 +1459,20 @@ def set_grpc_build_flags():
 
 def set_system_libs_flag(environ_cp):
   syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
-  syslibs = ','.join(sorted(syslibs.split(',')))
   if syslibs and syslibs != '':
+    if ',' in syslibs:
+      syslibs = ','.join(sorted(syslibs.split(',')))
+    else:
+      syslibs = ','.join(sorted(syslibs.split()))
     write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
 
+  if 'PREFIX' in environ_cp:
+    write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
+  if 'LIBDIR' in environ_cp:
+    write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
+  if 'INCLUDEDIR' in environ_cp:
+write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+
 
 def set_windows_build_flags(environ_cp):
   """Set Windows specific build options."""
-- 
GitLab


From 1668d28ca3558f3bc4fcf94752799712211f219e Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:22:04 +0000
Subject: [PATCH 0092/1085] fix in last line of set_system_lib_flag

---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 3791ead3ed..b1ab55b657 100644
--- a/configure.py
+++ b/configure.py
@@ -1471,7 +1471,7 @@ def set_system_libs_flag(environ_cp):
   if 'LIBDIR' in environ_cp:
     write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
   if 'INCLUDEDIR' in environ_cp:
-write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+    write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
 
 
 def set_windows_build_flags(environ_cp):
-- 
GitLab


From 09bf8eb99cd76c506dcd2a0e8c8e893f7f3916b1 Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 26 Sep 2018 05:26:54 +0000
Subject: [PATCH 0093/1085] white space removal

---
 third_party/nccl/nccl_configure.bzl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index 0713b36724..d78fe8f3aa 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -113,7 +113,6 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
   """
   lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
                                                            nccl_version))
-
   if not lib_path.exists:
     auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
   return lib_path
-- 
GitLab


From d970edea764a5a0937135b7d45061b1d31af6d0a Mon Sep 17 00:00:00 2001
From: Yicheng Fan <thunderfyc@gmail.com>
Date: Wed, 26 Sep 2018 20:18:31 +0800
Subject: [PATCH 0094/1085] Use scatter ops to calculate sparse tensor, which
 gives 40x speed-up on my job

---
 tensorflow/python/training/adam.py | 81 ++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 704ad6d3fe..4ace03376d 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -174,7 +175,51 @@ class AdamOptimizer(optimizer.Optimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  def _apply_sparse_shared(self, grad, var, indices, scatter_add):
+  class ScatterOpWrapper(object):
+    """Wraps necessary scatter ops for sparse tensors."""
+
+    def __init__(self, use_locking=False):
+      self._use_locking = use_locking
+
+    def add(self, sparse_tensor, index, delta):
+      return state_ops.scatter_add(sparse_tensor, index, delta,
+                                   use_locking = self._use_locking)
+
+    def sub(self, sparse_tensor, index, delta):
+      return state_ops.scatter_sub(sparse_tensor, index, delta,
+                                   use_locking = self._use_locking)
+
+    def update(self, sparse_tensor, index, value):
+      return state_ops.scatter_update(sparse_tensor, index, value,
+                                      use_locking = self._use_locking)
+
+
+  class ResourceScatterOpWrapper(ScatterOpWrapper):
+    """Wraps necessay scatter ops for sparse resource variables."""
+
+    def __init__(self, use_locking=False):
+      super(AdamOptimizer.ResourceScatterOpWrapper, self).__init__(use_locking)
+
+    def add(self, sparse_tensor, index, delta):
+      with ops.control_dependencies(
+          [resource_variable_ops.resource_scatter_add(
+              sparse_tensor.handle, index, delta)]):
+        return sparse_tensor.value()
+
+    def sub(self, sparse_tensor, index, delta):
+      with ops.control_dependencies(
+          [resource_variable_ops.resource_scatter_sub(
+              sparse_tensor.handle, index, delta)]):
+        return sparse_tensor.value()
+
+    def update(self, sparse_tensor, index, value):
+      with ops.control_dependencies(
+          [resource_variable_ops.resource_scatter_update(
+              sparse_tensor.handle, index, value)]):
+        return sparse_tensor.value()
+
+
+  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -186,37 +231,29 @@ class AdamOptimizer(optimizer.Optimizer):
     # m_t = beta1 * m + (1 - beta1) * g_t
     m = self.get_slot(var, "m")
     m_scaled_g_values = grad * (1 - beta1_t)
-    m_t = state_ops.assign(m, m * beta1_t,
-                           use_locking=self._use_locking)
-    with ops.control_dependencies([m_t]):
-      m_t = scatter_add(m, indices, m_scaled_g_values)
+    m_gathered = array_ops.gather(m, indices)
+    m_t_gathered = m_gathered * beta1_t + m_scaled_g_values
+    m_t = scatter_op_wrapper.update(m, indices, m_t_gathered)
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
     v = self.get_slot(var, "v")
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
-    v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
-    with ops.control_dependencies([v_t]):
-      v_t = scatter_add(v, indices, v_scaled_g_values)
-    v_sqrt = math_ops.sqrt(v_t)
-    var_update = state_ops.assign_sub(var,
-                                      lr * m_t / (v_sqrt + epsilon_t),
-                                      use_locking=self._use_locking)
+    v_gathered = array_ops.gather(v, indices)
+    v_t_gathered = v_gathered * beta2_t + v_scaled_g_values
+    v_t = scatter_op_wrapper.update(v, indices, v_t_gathered)
+
+    v_sqrt_gathered = math_ops.sqrt(v_t_gathered)
+    var_update = scatter_op_wrapper.sub(
+        var, indices, lr * m_t_gathered / (v_sqrt_gathered + epsilon_t))
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
+
   def _apply_sparse(self, grad, var):
     return self._apply_sparse_shared(
-        grad.values, var, grad.indices,
-        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
-            x, i, v, use_locking=self._use_locking))
-
-  def _resource_scatter_add(self, x, i, v):
-    with ops.control_dependencies(
-        [resource_variable_ops.resource_scatter_add(
-            x.handle, i, v)]):
-      return x.value()
+        grad.values, var, grad.indices, AdamOptimizer.ScatterOpWrapper(self._use_locking))
 
   def _resource_apply_sparse(self, grad, var, indices):
     return self._apply_sparse_shared(
-        grad, var, indices, self._resource_scatter_add)
+       grad, var, indices, AdamOptimizer.ResourceScatterOpWrapper(self._use_locking))
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
-- 
GitLab


From b60c3747594fe7a879378d7b69b60eb9a0c02ca9 Mon Sep 17 00:00:00 2001
From: Yicheng Fan <thunderfyc@gmail.com>
Date: Wed, 26 Sep 2018 20:30:48 +0800
Subject: [PATCH 0095/1085] Fix lint

---
 tensorflow/python/training/adam.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 4ace03376d..d462dc576f 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -183,15 +183,15 @@ class AdamOptimizer(optimizer.Optimizer):
 
     def add(self, sparse_tensor, index, delta):
       return state_ops.scatter_add(sparse_tensor, index, delta,
-                                   use_locking = self._use_locking)
+                                   use_locking=self._use_locking)
 
     def sub(self, sparse_tensor, index, delta):
       return state_ops.scatter_sub(sparse_tensor, index, delta,
-                                   use_locking = self._use_locking)
+                                   use_locking=self._use_locking)
 
     def update(self, sparse_tensor, index, value):
       return state_ops.scatter_update(sparse_tensor, index, value,
-                                      use_locking = self._use_locking)
+                                      use_locking=self._use_locking)
 
 
   class ResourceScatterOpWrapper(ScatterOpWrapper):
@@ -246,14 +246,15 @@ class AdamOptimizer(optimizer.Optimizer):
         var, indices, lr * m_t_gathered / (v_sqrt_gathered + epsilon_t))
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
-
   def _apply_sparse(self, grad, var):
     return self._apply_sparse_shared(
-        grad.values, var, grad.indices, AdamOptimizer.ScatterOpWrapper(self._use_locking))
+        grad.values, var, grad.indices,
+        AdamOptimizer.ScatterOpWrapper(self._use_locking))
 
   def _resource_apply_sparse(self, grad, var, indices):
     return self._apply_sparse_shared(
-       grad, var, indices, AdamOptimizer.ResourceScatterOpWrapper(self._use_locking))
+        grad, var, indices,
+        AdamOptimizer.ResourceScatterOpWrapper(self._use_locking))
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
-- 
GitLab


From fa76895ad577246a8ab241e668765cad651558fb Mon Sep 17 00:00:00 2001
From: Isaac Burbank <bassmanburbank@gmail.com>
Date: Wed, 26 Sep 2018 11:20:44 -0600
Subject: [PATCH 0096/1085] Update python_object_to_proto_visitor.py

Changed test key for FullArgSpec to check for `varkw`, replacing the old ArgSpec key `keywords`
---
 tensorflow/tools/api/lib/python_object_to_proto_visitor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index 2a40caf720..a8e69fda4f 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -49,7 +49,7 @@ def _SanitizedArgSpec(obj):
   output_string = ''
   unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
 
-  for clean_attr in ('args', 'varargs', 'keywords'):
+  for clean_attr in ('args', 'varargs', 'varkw'):
     output_string += '%s=%s, ' % (clean_attr,
                                   getattr(unsanitized_arg_spec, clean_attr))
 
-- 
GitLab


From 5bbcdb8a58efd97b0f73927218d5896da67f5203 Mon Sep 17 00:00:00 2001
From: Isaac Burbank <bassmanburbank@gmail.com>
Date: Wed, 26 Sep 2018 11:34:38 -0600
Subject: [PATCH 0097/1085] Update tf_inspect_test.py

Remove subsection of added tests that were problematic.
---
 tensorflow/python/util/tf_inspect_test.py | 78 -----------------------
 1 file changed, 78 deletions(-)

diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py
index 55f88f8fc6..ba9430c756 100644
--- a/tensorflow/python/util/tf_inspect_test.py
+++ b/tensorflow/python/util/tf_inspect_test.py
@@ -353,41 +353,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function))
 
-  def testGetFullArgSpecOnPartialInvalidFullArgSpec(self):
-    """Tests getfullargspec.
-
-    Tests on partial function that doesn't have valid fullargspec.
-    """
-
-    def func(m, n, l, k=4):
-      return 2 * m + l + n * k
-
-    partial_func = functools.partial(func, n=7)
-
-    exception_message = (r"Some arguments \['l'\] do not have default value, "
-                         "but they are positioned after those with default "
-                         "values. This can not be expressed with ArgSpec.")
-    with self.assertRaisesRegexp(ValueError, exception_message):
-      tf_inspect.getfullargspec(partial_func)
-
-  def testGetFullArgSpecOnPartialValidFullArgSpec(self):
-    """Tests getfullargspec on partial function with valid fullargspec."""
-
-    def func(m, n, l, k=4):
-      return 2 * m + l + n * k
-
-    partial_func = functools.partial(func, n=7, l=2)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n', 'l', 'k'],
-        varargs=None,
-        varkw=None,
-        defaults=(7, 2, 4),
-        kwonlyargs=[],
-        kwonlydefaults=None,
-        annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnPartialNoArgumentsLeft(self):
     """Tests getfullargspec on partial function that prunes all arguments."""
 
@@ -401,35 +366,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
 
-  def testGetFullArgSpecOnPartialKeywordArgument(self):
-    """Tests getfullargspec on partial function that prunes some arguments."""
-
-    def func(m, n):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(7,),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
-  def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self):
-    """Tests getfullargspec.
-    
-    Tests on partial function that prunes argument by keyword.
-    """
-
-    def func(m=1, n=2):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnPartialWithVarargs(self):
     """Tests getfullargspec on partial function with variable arguments."""
 
@@ -459,20 +395,6 @@ class TfInspectTest(test.TestCase):
 
     self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
 
-  def testGetFullArgSpecOnPartialWithDecorator(self):
-    """Tests getfullargspec on decorated partial function."""
-
-    @test_decorator('decorator')
-    def func(m=1, n=2):
-      return 2 * m + n
-
-    partial_func = functools.partial(func, n=7)
-    argspec = tf_inspect.FullArgSpec(
-        args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7),
-        kwonlyargs=[], kwonlydefaults=None, annotations={})
-
-    self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func))
-
   def testGetFullArgSpecOnCallableObject(self):
 
     class Callable(object):
-- 
GitLab


From 3c01644ed3fad31ac1b09afe31e655bd8892f02b Mon Sep 17 00:00:00 2001
From: William Irons <wdirons@us.ibm.com>
Date: Wed, 26 Sep 2018 14:36:45 -0400
Subject: [PATCH 0098/1085] Artifact links for ppc64le GPU builds.

whl files for tensorflow_gpu..._ppc6le are now hosted on the OSU
Jenkins build server. Nightly builds and Stable Release builds
are provided. I didn't include the version number so we won't need
to update the readme file for every new release
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 57efb876c9..4f57aca99f 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,8 @@ The TensorFlow project strives to abide by generally accepted best practices in
 | ---             | ---    | ---       |
 | **IBM s390x**       | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA |
 | **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA |
-| **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA |
+| **IBM ppc64le GPU** Nightly | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) |
+| **IBM ppc64le GPU** Stable Release | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) |
 | **Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) |
 | **Linux CPU with Intel® MKL-DNN** Python 2.7<br> **Linux CPU with Intel® MKL-DNN** Python 3.5<br>  **Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)<br>[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)<br>[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) |
 
-- 
GitLab


From df26537ea43493a086f279a5ae8262dc79d03f20 Mon Sep 17 00:00:00 2001
From: Yicheng Fan <thunderfyc@gmail.com>
Date: Thu, 27 Sep 2018 11:05:11 +0800
Subject: [PATCH 0099/1085] Fix tests failure in contrib, and hide inner class
 from public API

---
 .../contrib/opt/python/training/adamax.py     | 22 +++++--------------
 .../opt/python/training/nadam_optimizer.py    |  6 ++---
 tensorflow/python/training/adam.py            | 12 +++++-----
 3 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py
index 686bac0d84..9c954ca298 100644
--- a/tensorflow/contrib/opt/python/training/adamax.py
+++ b/tensorflow/contrib/opt/python/training/adamax.py
@@ -134,8 +134,7 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  def _apply_sparse_shared(self, grad, var, indices,
-                           scatter_add, scatter_update):
+  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
     beta1_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
@@ -147,38 +146,29 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
     m_slice = array_ops.gather(m, indices)
     m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t)
     with ops.control_dependencies([m_t_slice]):
-      m_t = scatter_update(m, indices, m_t_slice)
+      m_t = scatter_op_wrapper.update(m, indices, m_t_slice)
     # u_t = max(beta2 * u, abs(g_t))
     v = self.get_slot(var, "v")
     v_slice = array_ops.gather(v, indices)
     v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad))
     with ops.control_dependencies([v_t_slice]):
-      v_t = scatter_update(v, indices, v_t_slice)
+      v_t = scatter_op_wrapper.update(v, indices, v_t_slice)
     # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
     var_slice = -lr_t / (1 - beta1_power) * (m_t_slice /
                                              (v_t_slice + epsilon_t))
     with ops.control_dependencies([var_slice]):
-      var_update = scatter_add(var, indices, var_slice)
+      var_update = scatter_op_wrapper.add(var, indices, var_slice)
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
   def _apply_sparse(self, grad, var):
     return self._apply_sparse_shared(
         grad.values, var, grad.indices,
-        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
-            x, i, v, use_locking=self._use_locking),
-        lambda x, i, v: state_ops.scatter_update(  # pylint: disable=g-long-lambda
-            x, i, v, use_locking=self._use_locking))
-
-  def _resource_scatter_update(self, x, i, v):
-    with ops.control_dependencies(
-        [resource_variable_ops.resource_scatter_update(
-            x.handle, i, v)]):
-      return x.value()
+        adam.AdamOptimizer._ScatterOpWrapper(self._use_locking))
 
   def _resource_apply_sparse(self, grad, var, indices):
     return self._apply_sparse_shared(
         grad, var, indices,
-        self._resource_scatter_add, self._resource_scatter_update)
+        adam.AdamOptimizer._ResourceScatterOpWrapper())
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
diff --git a/tensorflow/contrib/opt/python/training/nadam_optimizer.py b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
index 44a8890cb1..fc08d5827d 100644
--- a/tensorflow/contrib/opt/python/training/nadam_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
@@ -67,7 +67,7 @@ class NadamOptimizer(adam.AdamOptimizer):
         use_locking=self._use_locking,
         use_nesterov=True)
 
-  def _apply_sparse_shared(self, grad, var, indices, scatter_add):
+  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -81,7 +81,7 @@ class NadamOptimizer(adam.AdamOptimizer):
     m_scaled_g_values = grad * (1 - beta1_t)
     m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
     with ops.control_dependencies([m_t]):
-      m_t = scatter_add(m, indices, m_scaled_g_values)
+      m_t = scatter_op_wrapper.add(m, indices, m_scaled_g_values)
       # m_bar = (1 - beta1) * g_t + beta1 * m_t
       m_bar = m_scaled_g_values + beta1_t * m_t
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
@@ -89,7 +89,7 @@ class NadamOptimizer(adam.AdamOptimizer):
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
     v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
     with ops.control_dependencies([v_t]):
-      v_t = scatter_add(v, indices, v_scaled_g_values)
+      v_t = scatter_op_wrapper.add(v, indices, v_scaled_g_values)
     v_sqrt = math_ops.sqrt(v_t)
     var_update = state_ops.assign_sub(
         var, lr * m_bar / (v_sqrt + epsilon_t), use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index d462dc576f..05ab859274 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -175,7 +175,7 @@ class AdamOptimizer(optimizer.Optimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  class ScatterOpWrapper(object):
+  class _ScatterOpWrapper(object):
     """Wraps necessary scatter ops for sparse tensors."""
 
     def __init__(self, use_locking=False):
@@ -194,11 +194,11 @@ class AdamOptimizer(optimizer.Optimizer):
                                       use_locking=self._use_locking)
 
 
-  class ResourceScatterOpWrapper(ScatterOpWrapper):
+  class _ResourceScatterOpWrapper(_ScatterOpWrapper):
     """Wraps necessay scatter ops for sparse resource variables."""
 
-    def __init__(self, use_locking=False):
-      super(AdamOptimizer.ResourceScatterOpWrapper, self).__init__(use_locking)
+    def __init__(self):
+      pass
 
     def add(self, sparse_tensor, index, delta):
       with ops.control_dependencies(
@@ -249,12 +249,12 @@ class AdamOptimizer(optimizer.Optimizer):
   def _apply_sparse(self, grad, var):
     return self._apply_sparse_shared(
         grad.values, var, grad.indices,
-        AdamOptimizer.ScatterOpWrapper(self._use_locking))
+        AdamOptimizer._ScatterOpWrapper(self._use_locking))
 
   def _resource_apply_sparse(self, grad, var, indices):
     return self._apply_sparse_shared(
         grad, var, indices,
-        AdamOptimizer.ResourceScatterOpWrapper(self._use_locking))
+        AdamOptimizer._ResourceScatterOpWrapper())
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
-- 
GitLab


From 4c0c4bb3fe0d68833cf7888e1c164b20d9bfcea0 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <s.lebedev@criteo.com>
Date: Thu, 27 Sep 2018 16:52:51 +0200
Subject: [PATCH 0100/1085] Added chief to the default device_filters for
 /job:ps

This behaviour matches the description in the
_get_default_session_config_distributed docstring, and restores the
symmetry of the default device_filters.
---
 tensorflow/python/estimator/run_config.py      | 2 +-
 tensorflow/python/estimator/run_config_test.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 3773810a04..1995f50733 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -566,7 +566,7 @@ class RunConfig(object):
     elif self._task_type == TaskType.WORKER:
       device_filters = ['/job:ps', '/job:worker/task:%d' % self._task_id]
     elif self._task_type == TaskType.PS:
-      device_filters = ['/job:ps', '/job:worker', '/job:master']
+      device_filters = ['/job:ps', '/job:worker', '/job:chief', '/job:master']
     else:
       # If the task_type is `EVALUATOR` or something other than the ones in
       # TaskType then don't set any device filters.
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index 06df7cb9dd..313bf62c05 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -1196,8 +1196,9 @@ class RunConfigSessionConfigTest(test.TestCase):
         }
     }
     run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_equal_session_config(run_config.session_config,
-                                      ['/job:ps', '/job:worker', '/job:master'])
+    self._assert_equal_session_config(
+      run_config.session_config,
+      ['/job:ps', '/job:worker', '/job:chief', '/job:master'])
 
   def test_evaluator_session_config(self):
     tf_config = {
-- 
GitLab


From 33023751e01b90fcc461dcf8deb41505fa1b62b6 Mon Sep 17 00:00:00 2001
From: Yicheng Fan <thunderfyc@gmail.com>
Date: Fri, 28 Sep 2018 11:19:24 +0800
Subject: [PATCH 0101/1085] use variable.scatter_add/sub/update

---
 .../contrib/opt/python/training/adamax.py     | 19 +++---
 .../opt/python/training/nadam_optimizer.py    |  8 ++-
 tensorflow/python/training/adam.py            | 62 +++----------------
 3 files changed, 21 insertions(+), 68 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py
index 9c954ca298..debb6d99af 100644
--- a/tensorflow/contrib/opt/python/training/adamax.py
+++ b/tensorflow/contrib/opt/python/training/adamax.py
@@ -134,7 +134,7 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
+  def _apply_sparse_shared(self, grad, var, indices):
     beta1_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
@@ -146,29 +146,28 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
     m_slice = array_ops.gather(m, indices)
     m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t)
     with ops.control_dependencies([m_t_slice]):
-      m_t = scatter_op_wrapper.update(m, indices, m_t_slice)
+      m_t = m.scatter_update(ops.IndexedSlices(m_t_slice, indices),
+                             use_locking=self._use_locking)
     # u_t = max(beta2 * u, abs(g_t))
     v = self.get_slot(var, "v")
     v_slice = array_ops.gather(v, indices)
     v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad))
     with ops.control_dependencies([v_t_slice]):
-      v_t = scatter_op_wrapper.update(v, indices, v_t_slice)
+      v_t = v.scatter_update(ops.IndexedSlices(v_t_slice, indices),
+                             use_locking=self._use_locking)
     # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
     var_slice = -lr_t / (1 - beta1_power) * (m_t_slice /
                                              (v_t_slice + epsilon_t))
     with ops.control_dependencies([var_slice]):
-      var_update = scatter_op_wrapper.add(var, indices, var_slice)
+      var_update = var.scatter_add(ops.IndexedSlices(var_slice, indices),
+                                   use_locking=self._use_locking)
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
   def _apply_sparse(self, grad, var):
-    return self._apply_sparse_shared(
-        grad.values, var, grad.indices,
-        adam.AdamOptimizer._ScatterOpWrapper(self._use_locking))
+    return self._apply_sparse_shared(grad.values, var, grad.indices)
 
   def _resource_apply_sparse(self, grad, var, indices):
-    return self._apply_sparse_shared(
-        grad, var, indices,
-        adam.AdamOptimizer._ResourceScatterOpWrapper())
+    return self._apply_sparse_shared(grad, var, indices)
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
diff --git a/tensorflow/contrib/opt/python/training/nadam_optimizer.py b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
index fc08d5827d..208909d2da 100644
--- a/tensorflow/contrib/opt/python/training/nadam_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
@@ -67,7 +67,7 @@ class NadamOptimizer(adam.AdamOptimizer):
         use_locking=self._use_locking,
         use_nesterov=True)
 
-  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
+  def _apply_sparse_shared(self, grad, var, indices):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -81,7 +81,8 @@ class NadamOptimizer(adam.AdamOptimizer):
     m_scaled_g_values = grad * (1 - beta1_t)
     m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
     with ops.control_dependencies([m_t]):
-      m_t = scatter_op_wrapper.add(m, indices, m_scaled_g_values)
+      m_t = m.scatter_add(ops.IndexedSlices(m_scaled_g_values, indices),
+                          use_locking=self._use_locking)
       # m_bar = (1 - beta1) * g_t + beta1 * m_t
       m_bar = m_scaled_g_values + beta1_t * m_t
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
@@ -89,7 +90,8 @@ class NadamOptimizer(adam.AdamOptimizer):
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
     v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
     with ops.control_dependencies([v_t]):
-      v_t = scatter_op_wrapper.add(v, indices, v_scaled_g_values)
+      v_t = v.scatter_add(ops.IndexedSlices(v_scaled_g_values, indices),
+                          use_locking=self._use_locking)
     v_sqrt = math_ops.sqrt(v_t)
     var_update = state_ops.assign_sub(
         var, lr * m_bar / (v_sqrt + epsilon_t), use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 05ab859274..4b31fac6c3 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -175,51 +175,7 @@ class AdamOptimizer(optimizer.Optimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  class _ScatterOpWrapper(object):
-    """Wraps necessary scatter ops for sparse tensors."""
-
-    def __init__(self, use_locking=False):
-      self._use_locking = use_locking
-
-    def add(self, sparse_tensor, index, delta):
-      return state_ops.scatter_add(sparse_tensor, index, delta,
-                                   use_locking=self._use_locking)
-
-    def sub(self, sparse_tensor, index, delta):
-      return state_ops.scatter_sub(sparse_tensor, index, delta,
-                                   use_locking=self._use_locking)
-
-    def update(self, sparse_tensor, index, value):
-      return state_ops.scatter_update(sparse_tensor, index, value,
-                                      use_locking=self._use_locking)
-
-
-  class _ResourceScatterOpWrapper(_ScatterOpWrapper):
-    """Wraps necessay scatter ops for sparse resource variables."""
-
-    def __init__(self):
-      pass
-
-    def add(self, sparse_tensor, index, delta):
-      with ops.control_dependencies(
-          [resource_variable_ops.resource_scatter_add(
-              sparse_tensor.handle, index, delta)]):
-        return sparse_tensor.value()
-
-    def sub(self, sparse_tensor, index, delta):
-      with ops.control_dependencies(
-          [resource_variable_ops.resource_scatter_sub(
-              sparse_tensor.handle, index, delta)]):
-        return sparse_tensor.value()
-
-    def update(self, sparse_tensor, index, value):
-      with ops.control_dependencies(
-          [resource_variable_ops.resource_scatter_update(
-              sparse_tensor.handle, index, value)]):
-        return sparse_tensor.value()
-
-
-  def _apply_sparse_shared(self, grad, var, indices, scatter_op_wrapper):
+  def _apply_sparse_shared(self, grad, var, indices):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -233,28 +189,24 @@ class AdamOptimizer(optimizer.Optimizer):
     m_scaled_g_values = grad * (1 - beta1_t)
     m_gathered = array_ops.gather(m, indices)
     m_t_gathered = m_gathered * beta1_t + m_scaled_g_values
-    m_t = scatter_op_wrapper.update(m, indices, m_t_gathered)
+    m_t = m.scatter_update(ops.IndexedSlices(m_t_gathered, indices))
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
     v = self.get_slot(var, "v")
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
     v_gathered = array_ops.gather(v, indices)
     v_t_gathered = v_gathered * beta2_t + v_scaled_g_values
-    v_t = scatter_op_wrapper.update(v, indices, v_t_gathered)
+    v_t = v.scatter_update(ops.IndexedSlices(v_t_gathered, indices))
 
     v_sqrt_gathered = math_ops.sqrt(v_t_gathered)
-    var_update = scatter_op_wrapper.sub(
-        var, indices, lr * m_t_gathered / (v_sqrt_gathered + epsilon_t))
+    var_update = var.scatter_sub(ops.IndexedSlices(
+        lr * m_t_gathered / (v_sqrt_gathered + epsilon_t), indices))
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
   def _apply_sparse(self, grad, var):
-    return self._apply_sparse_shared(
-        grad.values, var, grad.indices,
-        AdamOptimizer._ScatterOpWrapper(self._use_locking))
+    return self._apply_sparse_shared(grad.values, var, grad.indices)
 
   def _resource_apply_sparse(self, grad, var, indices):
-    return self._apply_sparse_shared(
-        grad, var, indices,
-        AdamOptimizer._ResourceScatterOpWrapper())
+    return self._apply_sparse_shared(grad, var, indices)
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
-- 
GitLab


From efe17306442aa91192df953ae537d3f9b824dae6 Mon Sep 17 00:00:00 2001
From: IMBurbank <bassmanburbank@gmail.com>
Date: Thu, 27 Sep 2018 22:21:47 -0600
Subject: [PATCH 0102/1085] Updated python3 tf_inspect.getargspec calls to use
 getfullargspec and repackage the return values into the getargspec struct.

---
 .../python/losses/python/tuple_losses_impl.py |  2 +-
 .../labeled_tensor/python/ops/_typecheck.py   |  2 +-
 .../layers/python/layers/rev_block_lib.py     |  3 +-
 .../python/learn/estimators/estimator.py      |  4 +-
 .../learn/python/learn/estimators/head.py     |  2 +-
 .../learn/python/learn/experiment_test.py     |  2 +-
 .../learn/python/learn/export_strategy.py     |  2 +-
 .../contrib/learn/python/learn/metric_spec.py |  2 +-
 .../contrib/learn/python/learn/monitors.py    |  2 +-
 .../contrib/tpu/python/tpu/tpu_function.py    |  2 +-
 tensorflow/python/framework/errors_impl.py    |  2 +-
 tensorflow/python/framework/function.py       |  6 +-
 tensorflow/python/keras/backend_test.py       |  2 +-
 tensorflow/python/keras/testing_utils.py      |  2 +-
 .../kernel_tests/variable_scope_test.py       |  4 +-
 tensorflow/python/ops/variable_scope.py       |  4 +-
 tensorflow/python/util/tf_contextlib_test.py  |  2 +-
 tensorflow/python/util/tf_inspect.py          | 89 ++++++++++++-------
 .../api/lib/python_object_to_proto_visitor.py |  4 +-
 19 files changed, 79 insertions(+), 59 deletions(-)

diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
index 00a83e5e55..221c70c38b 100644
--- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
+++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py
@@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn):
   """
   # Match arguments in `loss_fn` to elements of `namedtuple`.
   # TODO(joelshor): Properly handle `varargs` and `keywords`.
-  argspec = tf_inspect.getfullargspec(loss_fn)
+  argspec = tf_inspect.getargspec(loss_fn)
   defaults = argspec.defaults or []
 
   required_args = set(argspec.args[:-len(defaults)])
diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
index 0e23039847..80fa17ec1f 100644
--- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py
@@ -230,7 +230,7 @@ def accepts(*types):
 
   def check_accepts(f):
     """Check the types."""
-    spec = tf_inspect.getfullargspec(f)
+    spec = tf_inspect.getargspec(f)
 
     num_function_arguments = len(spec.args)
     if len(types) != num_function_arguments:
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 55979cc391..06da32072f 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -576,8 +576,7 @@ def _recomputing_grad_fn(compute_fn,
 
 def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """See recompute_grad."""
-  has_is_recompute_kwarg = (
-      "is_recomputing" in tf_inspect.getfullargspec(fn).args)
+  has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError("All inputs to function must be Tensors")
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index b88923bca2..c1de42782e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -199,11 +199,11 @@ def _model_fn_args(fn):
   if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'):
     # Handle functools.partial and similar objects.
     return tuple([
-        arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):]
+        arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):]
         if arg not in set(fn.keywords.keys())
     ])
   # Handle function.
-  return tuple(tf_inspect.getfullargspec(fn).args)
+  return tuple(tf_inspect.getargspec(fn).args)
 
 
 def _get_replica_device_setter(config):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 63dd08316b..c6f79e00d5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -1861,7 +1861,7 @@ def _get_arguments(func):
   _, func = tf_decorator.unwrap(func)
   if hasattr(func, "__code__"):
     # Regular function.
-    return tf_inspect.getfullargspec(func)
+    return tf_inspect.getargspec(func)
   elif hasattr(func, "func"):
     # Partial function.
     return _get_arguments(func.func)
diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
index 6926696fb6..fb16c94c29 100644
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
@@ -126,7 +126,7 @@ class TestBaseEstimator(object):
 
 def _check_method_supports_args(method, kwargs):
   """Checks that the given method supports the given args."""
-  supported_args = tuple(tf_inspect.getfullargspec(method).args)
+  supported_args = tuple(tf_inspect.getargspec(method).args)
   for kwarg in kwargs:
     if kwarg not in supported_args:
       raise ValueError(
diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py
index 0d6e0cdc18..075cab536e 100644
--- a/tensorflow/contrib/learn/python/learn/export_strategy.py
+++ b/tensorflow/contrib/learn/python/learn/export_strategy.py
@@ -96,7 +96,7 @@ class ExportStrategy(
     """
     # don't break existing export_fns that don't accept checkpoint_path and
     # eval_result
-    export_fn_args = tf_inspect.getfullargspec(self.export_fn).args
+    export_fn_args = tf_inspect.getargspec(self.export_fn).args
     kwargs = {}
     if 'checkpoint_path' in export_fn_args:
       kwargs['checkpoint_path'] = checkpoint_path
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index 604d6d46b4..97220365d5 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -51,7 +51,7 @@ def _args(fn):
     return tuple(
         [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())])
   # Handle function.
-  return tuple(tf_inspect.getfullargspec(fn).args)
+  return tuple(tf_inspect.getargspec(fn).args)
 
 
 _CANONICAL_LABELS_ARG = 'labels'
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index 5f61e0264f..3d691d4340 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook):
   def end(self, session):
     self._last_step = None
     for m in self._monitors:
-      if "session" in tf_inspect.getfullargspec(m.end).args:
+      if "session" in tf_inspect.getargspec(m.end).args:
         m.end(session=session)
       else:
         m.end()
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
index 9c4bd1c4d1..0c7a38dbbb 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py
@@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue):
   number_of_arguments_needed = input_arity
   if infeed_queue is not None:
     number_of_arguments_needed += infeed_queue.number_of_tuple_elements
-  arg_spec = tf_inspect.getfullargspec(func)
+  arg_spec = tf_inspect.getargspec(func)
   number_of_args = len(arg_spec.args)
   if arg_spec.defaults is None:
     number_of_defaults = 0
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index c373e75a74..5af71f2cfb 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -55,7 +55,7 @@ class OpError(Exception):
 
   def __reduce__(self):
     # Allow the subclasses to accept less arguments in their __init__.
-    init_argspec = tf_inspect.getfullargspec(self.__class__.__init__)
+    init_argspec = tf_inspect.getargspec(self.__class__.__init__)
     args = tuple(getattr(self, arg) for arg in init_argspec.args[1:])
     return self.__class__, args
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 3db6f683c9..225208944e 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -132,9 +132,9 @@ class Defun(object):
       raise ValueError("func %s must be callable" % func)
 
     # Func should not use kwargs and defaults.
-    argspec = tf_inspect.getfullargspec(func)
-    if argspec.varkw or argspec.defaults:
-      raise ValueError("Functions with argument defaults or varkw "
+    argspec = tf_inspect.getargspec(func)
+    if argspec.keywords or argspec.defaults:
+      raise ValueError("Functions with argument defaults or keywords "
                        "arguments are not supported.")
 
     # Computes how many arguments 'func' has.
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 31191d0d35..ab71589940 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase):
         compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
                                          keras_kwargs={'axis': -1},
                                          np_kwargs={'axis': -1})
-        if 'keepdims' in tf_inspect.getfullargspec(keras_op).args:
+        if 'keepdims' in tf_inspect.getargspec(keras_op).args:
           compare_single_input_op_to_numpy(keras_op, np_op,
                                            input_shape=(4, 7, 5),
                                            keras_kwargs={'axis': 1,
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 1afaba5653..501b50ba5f 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   layer.set_weights(weights)
 
   # test and instantiation from weights
-  if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__):
+  if 'weights' in tf_inspect.getargspec(layer_cls.__init__):
     kwargs['weights'] = weights
     layer = layer_cls(**kwargs)
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 1d0b72b17a..401e1ae102 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase):
 
   def testSignatureGetVarVsGetLocalVar(self):
     """get_{local,}variable() must take the same list of args."""
-    arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0]
-    local_arg_names = tf_inspect.getfullargspec(
+    arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
+    local_arg_names = tf_inspect.getargspec(
         variable_scope.get_local_variable)[0]
     self.assertEqual(arg_names, local_arg_names)
 
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 3cc1eb916d..a43676cd70 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -892,14 +892,14 @@ class _VariableStore(object):
         if shape and shape.is_fully_defined():
           init_val = lambda: initializer(  # pylint: disable=g-long-lambda
               shape.as_list(), dtype=dtype, partition_info=partition_info)
-        elif not tf_inspect.getfullargspec(initializer).args:
+        elif not tf_inspect.getargspec(initializer).args:
           init_val = initializer
         else:
           raise ValueError("You can only pass an initializer function that "
                            "expects no arguments to its callable when the "
                            "shape is not fully defined. The given initializer "
                            "function expects the following args %s" %
-                           tf_inspect.getfullargspec(initializer).args)
+                           tf_inspect.getargspec(initializer).args)
         variable_dtype = dtype.base_dtype
 
     # Create the variable.
diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py
index 1e921b5ea3..4a5bf388a6 100644
--- a/tensorflow/python/util/tf_contextlib_test.py
+++ b/tensorflow/python/util/tf_contextlib_test.py
@@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase):
     self.assertFalse(isinstance(target, tf_decorator.TFDecorator))
 
   def testGetArgSpecReturnsWrappedArgSpec(self):
-    argspec = tf_inspect.getfullargspec(test_params_and_defaults)
+    argspec = tf_inspect.getargspec(test_params_and_defaults)
     self.assertEqual(['a', 'b', 'c', 'd'], argspec.args)
     self.assertEqual((2, True, 'hello'), argspec.defaults)
 
diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py
index 234850ac3f..3cd6c515b9 100644
--- a/tensorflow/python/util/tf_inspect.py
+++ b/tensorflow/python/util/tf_inspect.py
@@ -36,6 +36,53 @@ else:
       'annotations'
   ])
 
+if hasattr(_inspect, 'getfullargspec'):
+  _getfullargspec = _inspect.getfullargspec  # pylint: disable=invalid-name
+  
+  def _getargspec(target):
+    """A python3 version of getargspec.
+
+    Calls `getfullargspec` and assigns args, varargs, 
+    varkw, and defaults to a python 2/3 compatible `ArgSpec`.
+
+    The parameter name 'varkw' is changed to 'keywords' to fit the 
+    `ArgSpec` struct.
+
+    Args:
+      target: the target object to inspect.
+    Returns:
+      An ArgSpec with args, varargs, keywords, and defaults parameters
+      from FullArgSpec.
+    """
+    fullargspecs = getfullargspec(target)
+    argspecs = ArgSpec(
+        args=fullargspecs.args,
+        varargs=fullargspecs.varargs,
+        keywords=fullargspecs.varkw,
+        defaults=fullargspecs.defaults)
+    return argspecs
+else:
+  _getargspec = _inspect.getargspec
+
+  def _getfullargspec(target):
+    """A python2 version of getfullargspec.
+
+    Args:
+      target: the target object to inspect.
+    Returns:
+      A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations.
+    """
+    argspecs = getargspec(target)
+    fullargspecs = FullArgSpec(
+        args=argspecs.args,
+        varargs=argspecs.varargs,
+        varkw=argspecs.keywords,
+        defaults=argspecs.defaults,
+        kwonlyargs=[],
+        kwonlydefaults=None,
+        annotations={})
+    return fullargspecs
+
 
 def currentframe():
   """TFDecorator-aware replacement for inspect.currentframe."""
@@ -45,10 +92,8 @@ def currentframe():
 def getargspec(obj):
   """TFDecorator-aware replacement for `inspect.getargspec`.
 
-  This should not be called from other modules. It is deprecated in python3.
-
-  Use `getfullargspec`. It is a TFDecorator-aware replacement for 
-  `inspect.getfullargspec` compatible with both python2 and python3.
+  Note: `getfullargspec` is recommended as the python 2/3 compatible 
+  replacement for this function.
 
   Args:
     obj: A function, partial function, or callable object, possibly
@@ -56,8 +101,8 @@ def getargspec(obj):
 
   Returns:
     The `ArgSpec` that describes the signature of the outermost decorator that
-    changes the callable's signature. If the callable is not decorated,
-    `inspect.getargspec()` will be called directly on the object.
+    changes the callable's signature, or the `ArgSpec` that describes 
+    the object if not decorated.
 
   Raises:
     ValueError: When callable's signature can not be expressed with
@@ -77,24 +122,24 @@ def getargspec(obj):
 
   try:
     # Python3 will handle most callables here (not partial).
-    return _inspect.getargspec(target)
+    return _getargspec(target)
   except TypeError:
     pass
 
   if isinstance(target, type):
     try:
-      return _inspect.getargspec(target.__init__)
+      return _getargspec(target.__init__)
     except TypeError:
       pass
 
     try:
-      return _inspect.getargspec(target.__new__)
+      return _getargspec(target.__new__)
     except TypeError:
       pass
 
   # The `type(target)` ensures that if a class is received we don't return
   # the signature of it's __call__ method.
-  return _inspect.getargspec(type(target).__call__)
+  return _getargspec(type(target).__call__)
 
 
 def _get_argspec_for_partial(obj):
@@ -177,30 +222,6 @@ def _get_argspec_for_partial(obj):
   return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:]))
 
 
-if hasattr(_inspect, 'getfullargspec'):
-  _getfullargspec = _inspect.getfullargspec
-else:
-
-  def _getfullargspec(target):
-    """A python2 version of getfullargspec.
-
-    Args:
-      target: the target object to inspect.
-    Returns:
-      A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations.
-    """
-    argspecs = getargspec(target)
-    fullargspecs = FullArgSpec(
-        args=argspecs.args,
-        varargs=argspecs.varargs,
-        varkw=argspecs.keywords,
-        defaults=argspecs.defaults,
-        kwonlyargs=[],
-        kwonlydefaults=None,
-        annotations={})
-    return fullargspecs
-
-
 def getfullargspec(obj):
   """TFDecorator-aware replacement for `inspect.getfullargspec`.
 
diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index a8e69fda4f..3a48cf683c 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -47,9 +47,9 @@ def _SanitizedArgSpec(obj):
     string, a string representation of the argspec.
   """
   output_string = ''
-  unsanitized_arg_spec = tf_inspect.getfullargspec(obj)
+  unsanitized_arg_spec = tf_inspect.getargspec(obj)
 
-  for clean_attr in ('args', 'varargs', 'varkw'):
+  for clean_attr in ('args', 'varargs', 'keywords'):
     output_string += '%s=%s, ' % (clean_attr,
                                   getattr(unsanitized_arg_spec, clean_attr))
 
-- 
GitLab


From 307a095da517a7382f66e14273464c85296425aa Mon Sep 17 00:00:00 2001
From: Rin Arakaki <rnarkkx@gmail.com>
Date: Fri, 28 Sep 2018 04:59:16 +0000
Subject: [PATCH 0103/1085] Modify docs to conform to Python syntax

---
 tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt | 2 +-
 tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt | 2 +-
 tensorflow/go/op/wrappers.go                              | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt
index 40c00ef58f..cd4cc5c906 100644
--- a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt
@@ -21,7 +21,7 @@ used to convert the float values to their quantized equivalents.
 In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 
 ```
-if T == qint8, in[i] += (range(T) + 1)/ 2.0
+if T == qint8: in[i] += (range(T) + 1)/ 2.0
 out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
 ```
 here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt
index 37ac10dddb..b7311153f4 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt
@@ -42,7 +42,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 
 ```
 out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-if T == qint8, out[i] -= (range(T) + 1) / 2.0
+if T == qint8: out[i] -= (range(T) + 1) / 2.0
 ```
 
 here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 2f297d5161..9ec651777d 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -19500,7 +19500,7 @@ func QuantizeV2RoundMode(value string) QuantizeV2Attr {
 //
 // ```
 // out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-// if T == qint8, out[i] -= (range(T) + 1) / 2.0
+// if T == qint8: out[i] -= (range(T) + 1) / 2.0
 // ```
 //
 // here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
@@ -23823,7 +23823,7 @@ func DequantizeMode(value string) DequantizeAttr {
 // In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 //
 // ```
-// if T == qint8, in[i] += (range(T) + 1)/ 2.0
+// if T == qint8: in[i] += (range(T) + 1)/ 2.0
 // out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
 // ```
 // here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-- 
GitLab


From 27489419e8d8870163f5173f77ea56aa118689d8 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 27 Sep 2018 23:46:02 -0700
Subject: [PATCH 0104/1085] Update the relative file pathes in the comments of
 tf.data kernel files

---
 tensorflow/core/kernels/data/batch_dataset_op.cc                | 2 +-
 tensorflow/core/kernels/data/cache_dataset_ops.cc               | 2 +-
 tensorflow/core/kernels/data/concatenate_dataset_op.cc          | 2 +-
 tensorflow/core/kernels/data/dataset_ops.cc                     | 2 +-
 .../core/kernels/data/dense_to_sparse_batch_dataset_op.cc       | 2 +-
 tensorflow/core/kernels/data/filter_by_component_dataset_op.cc  | 2 +-
 tensorflow/core/kernels/data/filter_dataset_op.cc               | 2 +-
 tensorflow/core/kernels/data/flat_map_dataset_op.cc             | 2 +-
 tensorflow/core/kernels/data/generator_dataset_op.cc            | 2 +-
 tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc     | 2 +-
 tensorflow/core/kernels/data/group_by_window_dataset_op.cc      | 2 +-
 tensorflow/core/kernels/data/interleave_dataset_op.cc           | 2 +-
 tensorflow/core/kernels/data/iterator_ops.cc                    | 2 +-
 tensorflow/core/kernels/data/map_and_batch_dataset_op.cc        | 2 +-
 tensorflow/core/kernels/data/map_dataset_op.cc                  | 2 +-
 tensorflow/core/kernels/data/optimize_dataset_op.cc             | 2 +-
 tensorflow/core/kernels/data/padded_batch_dataset_op.cc         | 2 +-
 tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc  | 2 +-
 tensorflow/core/kernels/data/parallel_map_dataset_op.cc         | 2 +-
 tensorflow/core/kernels/data/parse_example_dataset_op.cc        | 2 +-
 tensorflow/core/kernels/data/prefetch_dataset_op.cc             | 2 +-
 tensorflow/core/kernels/data/random_dataset_op.cc               | 2 +-
 tensorflow/core/kernels/data/range_dataset_op.cc                | 2 +-
 tensorflow/core/kernels/data/reader_dataset_ops.cc              | 2 +-
 tensorflow/core/kernels/data/repeat_dataset_op.cc               | 2 +-
 tensorflow/core/kernels/data/scan_dataset_op.cc                 | 2 +-
 tensorflow/core/kernels/data/shuffle_dataset_op.cc              | 2 +-
 tensorflow/core/kernels/data/skip_dataset_op.cc                 | 2 +-
 tensorflow/core/kernels/data/slide_dataset_op.cc                | 2 +-
 tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc  | 2 +-
 tensorflow/core/kernels/data/sql_dataset_ops.cc                 | 2 +-
 tensorflow/core/kernels/data/take_dataset_op.cc                 | 2 +-
 tensorflow/core/kernels/data/tensor_dataset_op.cc               | 2 +-
 tensorflow/core/kernels/data/tensor_slice_dataset_op.cc         | 2 +-
 tensorflow/core/kernels/data/unbatch_dataset_op.cc              | 2 +-
 tensorflow/core/kernels/data/window_dataset_op.cc               | 2 +-
 tensorflow/core/kernels/data/zip_dataset_op.cc                  | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/kernels/data/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc
index d1db1d7bec..023cf79966 100644
--- a/tensorflow/core/kernels/data/batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/batch_dataset_op.cc
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class BatchDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index 34c6c86538..d86d96b9fc 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -23,7 +23,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level description of
+// See documentation in ../../ops/dataset_ops.cc for a high-level description of
 // the following op.
 
 class CacheDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
index a04f150e71..46df039530 100644
--- a/tensorflow/core/kernels/data/concatenate_dataset_op.cc
+++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ConcatenateDatasetOp : public BinaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/dataset_ops.cc b/tensorflow/core/kernels/data/dataset_ops.cc
index bd1ccd5b5d..c689a119c3 100644
--- a/tensorflow/core/kernels/data/dataset_ops.cc
+++ b/tensorflow/core/kernels/data/dataset_ops.cc
@@ -21,7 +21,7 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 class DatasetToGraphOp : public OpKernel {
  public:
diff --git a/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
index 237511a07d..45678aa84f 100644
--- a/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/dense_to_sparse_batch_dataset_op.cc
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/filter_by_component_dataset_op.cc b/tensorflow/core/kernels/data/filter_by_component_dataset_op.cc
index a7e3a56727..d09904a0eb 100644
--- a/tensorflow/core/kernels/data/filter_by_component_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_by_component_dataset_op.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 // TODO(prazek): Filter already has a logic of filtering by the given tensor,
 // but it must return both components.  We could introduce kernel like
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 00884314a9..a35f9a021c 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
index 2fada22a21..5d8565e745 100644
--- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class FlatMapDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc
index b4367d5a11..5de2e2871d 100644
--- a/tensorflow/core/kernels/data/generator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/generator_dataset_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class GeneratorDatasetOp::Dataset : public DatasetBase {
diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
index e7244ee208..87600d7873 100644
--- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc
@@ -25,7 +25,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
  public:
diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
index 14aefe5d54..7363664982 100644
--- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
  public:
diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
index 0aa802b874..83c1f7b719 100644
--- a/tensorflow/core/kernels/data/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class InterleaveDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..50b72f46c2 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -39,7 +39,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following ops.
 
 const char kIteratorVariantTypeName[] = "tensorflow::Iterator";
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 2bbf4af664..b48a2c3eca 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -34,7 +34,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..1122d7918c 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -23,7 +23,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc
index d5b725eac9..58d68d9de0 100644
--- a/tensorflow/core/kernels/data/optimize_dataset_op.cc
+++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc
@@ -36,7 +36,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 class OptimizeDatasetOp : public UnaryDatasetOpKernel {
  public:
diff --git a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
index 7b01c3b4e0..d0943a583e 100644
--- a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class PaddedBatchDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2e6e0465f7..e180e510b7 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -31,7 +31,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..c4e2fecc6e 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index c28c06da62..0d77dfe24e 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -23,7 +23,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 754ed772db..e1d42a9a6b 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -26,7 +26,7 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class PrefetchDatasetOp::Dataset : public DatasetBase {
diff --git a/tensorflow/core/kernels/data/random_dataset_op.cc b/tensorflow/core/kernels/data/random_dataset_op.cc
index 044a791a3f..bcd26ab389 100644
--- a/tensorflow/core/kernels/data/random_dataset_op.cc
+++ b/tensorflow/core/kernels/data/random_dataset_op.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class RandomDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/range_dataset_op.cc b/tensorflow/core/kernels/data/range_dataset_op.cc
index 89fbaae369..0c0cb5ddc1 100644
--- a/tensorflow/core/kernels/data/range_dataset_op.cc
+++ b/tensorflow/core/kernels/data/range_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class RangeDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/reader_dataset_ops.cc b/tensorflow/core/kernels/data/reader_dataset_ops.cc
index c474cb4773..df4fbfc69a 100644
--- a/tensorflow/core/kernels/data/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/reader_dataset_ops.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following ops.
 
 class TextLineDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc
index 94e96635ab..e43dc1f6d8 100644
--- a/tensorflow/core/kernels/data/repeat_dataset_op.cc
+++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class RepeatDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc
index 2a911aa368..c49a265b51 100644
--- a/tensorflow/core/kernels/data/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/scan_dataset_op.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ScanDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 66466d6a36..038d9cb9bd 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -30,7 +30,7 @@ namespace {
 
 const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/skip_dataset_op.cc b/tensorflow/core/kernels/data/skip_dataset_op.cc
index b8c7fb15f4..bfaa632a74 100644
--- a/tensorflow/core/kernels/data/skip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/skip_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class SkipDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc
index 1e73cfc753..2be7fd7410 100644
--- a/tensorflow/core/kernels/data/slide_dataset_op.cc
+++ b/tensorflow/core/kernels/data/slide_dataset_op.cc
@@ -26,7 +26,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class SlideDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
index 85b1e50695..ccb125f3c3 100644
--- a/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
@@ -24,7 +24,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 template <typename T>
diff --git a/tensorflow/core/kernels/data/sql_dataset_ops.cc b/tensorflow/core/kernels/data/sql_dataset_ops.cc
index 6bbe459332..a50a041f5d 100644
--- a/tensorflow/core/kernels/data/sql_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/sql_dataset_ops.cc
@@ -27,7 +27,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following ops.
 
 class SqlDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/take_dataset_op.cc b/tensorflow/core/kernels/data/take_dataset_op.cc
index e5cdfdd732..e8570b68c9 100644
--- a/tensorflow/core/kernels/data/take_dataset_op.cc
+++ b/tensorflow/core/kernels/data/take_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class TakeDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc
index ca4ea25b89..ee0fb0069a 100644
--- a/tensorflow/core/kernels/data/tensor_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class TensorDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
index 7dc64b0a75..fe2f5ea536 100644
--- a/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class TensorSliceDatasetOp : public DatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
index 81c432b938..9d0abc21ef 100644
--- a/tensorflow/core/kernels/data/unbatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
@@ -21,7 +21,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class UnbatchDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/window_dataset_op.cc b/tensorflow/core/kernels/data/window_dataset_op.cc
index ac44623ce2..16698c0b1a 100644
--- a/tensorflow/core/kernels/data/window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/window_dataset_op.cc
@@ -22,7 +22,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class WindowDatasetOp : public UnaryDatasetOpKernel {
diff --git a/tensorflow/core/kernels/data/zip_dataset_op.cc b/tensorflow/core/kernels/data/zip_dataset_op.cc
index 61a2078f46..4186cb4ecd 100644
--- a/tensorflow/core/kernels/data/zip_dataset_op.cc
+++ b/tensorflow/core/kernels/data/zip_dataset_op.cc
@@ -20,7 +20,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class ZipDatasetOp : public DatasetOpKernel {
-- 
GitLab


From d0690d46466bf0393ad65544d1e8c55e948df133 Mon Sep 17 00:00:00 2001
From: EFanZh <efanzh@gmail.com>
Date: Fri, 28 Sep 2018 15:20:26 +0800
Subject: [PATCH 0105/1085] Fix some documentation errors

---
 tensorflow/contrib/distribute/python/mirrored_strategy.py | 5 +++--
 tensorflow/python/keras/engine/training.py                | 2 +-
 tensorflow/python/training/distribute.py                  | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 504f45a695..c0861da567 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -318,12 +318,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
   [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed).
   The distribution strategy inherits these concepts as well and in addition to
   that we also clarify several more concepts:
-    * **In-graph replication**: the `client` creates a single `tf.Graph` that
+
+  * **In-graph replication**: the `client` creates a single `tf.Graph` that
     specifies tasks for devices on all workers. The `client` then creates a
     client session which will talk to the `master` service of a `worker`. Then
     the `master` will partition the graph and distribute the work to all
     participating workers.
-    * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one
+  * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one
     physical machine. We will have multiple `worker`s with different `task`
     index. They all do similar things except for one worker checkpointing model
     variables, writing summaries, etc. in addition to its ordinary work.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 5091cac836..1bd8422658 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -2356,6 +2356,6 @@ class DistributedCallbackModel(Model):
     # Whitelisted atttributes of the model that can be accessed by the user
     # during a callback.
     if item not in ['_setattr_tracking']:
-      logging.warning('You are accessing attribute ' + item + 'of the '
+      logging.warning('You are accessing attribute ' + item + ' of the '
                       'DistributedCallbackModel that may not have been set '
                       'correctly.')
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 419a9ec12b..fd4704285c 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -631,7 +631,7 @@ class DistributionStrategy(object):
 
     Args:
       fn: function to run using this distribution strategy. The function must
-        have the following signature: def fn(context, *inputs).
+        have the following signature: `def fn(context, *inputs)`.
         `context` is an instance of `MultiStepContext` that will be passed when
         `fn` is run. `context` can be used to specify the outputs to be returned
         from `fn` by calling `context.set_last_step_output`. It can also be used
@@ -797,9 +797,9 @@ class DistributionStrategy(object):
     return merged(results)
     ```
 
-    Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.'
+    Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.
 
-    Neither *args nor **kwargs may contain per-device values.
+    Neither `*args` nor `**kwargs` may contain per-device values.
     If they contain mirrored values, they will be unwrapped before
     calling `fn`.
 
-- 
GitLab


From 8eb27871583d9fc61e046493acaa0df2839bc1c7 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 28 Sep 2018 18:51:34 +0800
Subject: [PATCH 0106/1085] remove slash

---
 tensorflow/python/ops/variables.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 69f63bc8e6..262cd61e5a 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2401,7 +2401,8 @@ class PartitionedVariable(object):
     partition_axes = self._partition_axes()
     if len(partition_axes) > 1:
       raise NotImplementedError(
-          "Multi-axis partition assign_fn is not supported "
+          "Cannot do assign action along more than one dimension: %s.  "
+          "Multi-axis partition assign action is not supported "
           % str(partition_axes))
     partition_ix = partition_axes[0]
     size_splits_list = [
@@ -2409,7 +2410,7 @@ class PartitionedVariable(object):
     value_list = array_ops.split(
         value, size_splits_list, axis=partition_ix)
     op_list = [
-        assign_fn(var, value_list[idx], idx) \
+        assign_fn(var, value_list[idx], idx)
         for idx, var in enumerate(self._variable_list)]
     return op_list
 
-- 
GitLab


From a74a3217f7ff2dbee2fb618aa658cf666861545c Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 4 Aug 2018 14:13:00 +0800
Subject: [PATCH 0107/1085] Move bazel.rc to workspace root to support
 bazel-0.18.0

Bazel 0.18.0 will contain a change for which rc files it accepts.
https://github.com/bazelbuild/bazel/commit/ec83598cb6ee4136166bb562a24dc5dfa58921db
https://github.com/bazelbuild/bazel/issues/4502

Old bazel used to read %workspace%/tools/bazel.rc. New bazel will not
read that and instead will only read %workspace%/.bazelrc.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tools/bazel.rc => .bazelrc | 4 +++-
 .gitignore                 | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)
 rename tools/bazel.rc => .bazelrc (98%)

diff --git a/tools/bazel.rc b/.bazelrc
similarity index 98%
rename from tools/bazel.rc
rename to .bazelrc
index 3734fab715..9f09fdff97 100644
--- a/tools/bazel.rc
+++ b/.bazelrc
@@ -29,7 +29,7 @@ build:mkl -c opt
 
 # This config option is used to enable MKL-DNN open source library only,
 # without depending on MKL binary version.
-build:mkl_open_source_only --define=build_with_mkl_dnn_only=true 
+build:mkl_open_source_only --define=build_with_mkl_dnn_only=true
 build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
 
 build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
@@ -84,3 +84,5 @@ build:dynamic_kernels --define=dynamic_loaded_kernels=true
 build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
 build --define=INCLUDEDIR=$(PREFIX)/include
+
+# Do not commit the tf_configure.bazelrc line
diff --git a/.gitignore b/.gitignore
index 1ef4c297ee..cb65f447d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 .DS_Store
 .ipynb_checkpoints
 node_modules
-/.bazelrc
 /.tf_configure.bazelrc
 /bazel-*
 /bazel_pip
-- 
GitLab


From d3f6b72bc7356d5c94289e32426dc482b8ededf0 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sat, 4 Aug 2018 14:28:02 +0800
Subject: [PATCH 0108/1085] configure: use workspace-relative path to
 tf_configure_bazelrc

/.bazelrc is not gitignored anymore so this should help in case the
import line is accidentally committed. Bazel 0.18.0 will support a new
'try-import' statement that should be used once 0.18.0 has been out long
enough.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 configure.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/configure.py b/configure.py
index 55fce8b93b..129d9c5fe7 100644
--- a/configure.py
+++ b/configure.py
@@ -257,11 +257,7 @@ def reset_tf_configure_bazelrc(workspace_path):
       if _TF_BAZELRC_FILENAME in l:
         continue
       f.write('%s\n' % l)
-    if is_windows():
-      tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/')
-    else:
-      tf_bazelrc_path = _TF_BAZELRC
-    f.write('import %s\n' % tf_bazelrc_path)
+    f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME)
 
 
 def cleanup_makefile():
-- 
GitLab


From b5feceb9058e06eac3de86ec45c44f5637054855 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Tue, 25 Sep 2018 00:42:42 -0700
Subject: [PATCH 0109/1085] Added the feature to disable MKL support of
 TensorFlow by environmental variable TF_DISABLE_MKL=1

---
 .../core/common_runtime/mkl_cpu_allocator.h   | 54 +++++++++++++------
 .../core/common_runtime/process_util.cc       |  5 ++
 .../core/common_runtime/threadpool_device.cc  |  4 ++
 tensorflow/core/graph/mkl_layout_pass.cc      |  5 ++
 .../core/graph/mkl_tfconversion_pass.cc       |  5 ++
 tensorflow/core/util/util.cc                  | 20 +++++++
 tensorflow/core/util/util.h                   |  5 ++
 7 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 429b19599b..516138d28d 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
+#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -163,6 +164,12 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
+    if (DisableMKL()) {
+        VLOG(1) << "TF-MKL: Disabling pool allocator";
+        tf_disable_pool_allocator_flag_ = true;
+        return Status::OK();
+    }
+
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -217,6 +224,10 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
+    if (tf_disable_pool_allocator_flag_) {
+      return port::AlignedMalloc(num_bytes, alignment);
+    }
+
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -227,6 +238,10 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
+    if (tf_disable_pool_allocator_flag_) {
+      port::AlignedFree(ptr);
+      return;
+    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -237,26 +252,30 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    AllocatorStats l_stats, s_stats;
-    small_size_allocator_->GetStats(&s_stats);
-    large_size_allocator_->GetStats(&l_stats);
-
-    // Combine statistics from small-size and large-size allocator.
-    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-    stats->max_bytes_in_use =
-        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-    // Since small-size allocations go to MklSmallSizeAllocator,
-    // max_alloc_size from large_size_allocator would be the maximum
-    // size allocated by MklCPUAllocator.
-    stats->max_alloc_size = l_stats.max_alloc_size;
-    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    if (!tf_disable_pool_allocator_flag_) {
+      AllocatorStats l_stats, s_stats;
+      small_size_allocator_->GetStats(&s_stats);
+      large_size_allocator_->GetStats(&l_stats);
+
+      // Combine statistics from small-size and large-size allocator.
+      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+      stats->max_bytes_in_use =
+          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+      // Since small-size allocations go to MklSmallSizeAllocator,
+      // max_alloc_size from large_size_allocator would be the maximum
+      // size allocated by MklCPUAllocator.
+      stats->max_alloc_size = l_stats.max_alloc_size;
+      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
+    }
   }
 
   void ClearStats() override {
-    small_size_allocator_->ClearStats();
-    large_size_allocator_->ClearStats();
+    if (!tf_disable_pool_allocator_flag_) {
+      small_size_allocator_->ClearStats();
+      large_size_allocator_->ClearStats();
+    }
   }
 
  private:
@@ -295,6 +314,7 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
+  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index a5d31b75c7..60fa601907 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/util.h"
 
 namespace tensorflow {
 
@@ -56,6 +57,10 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
+  // Early return if MKL is disabled
+  if (DisableMKL())
+    return port::NumSchedulableCPUs();
+
   // MKL library executes ops in parallel using OMP threads
   // Set inter_op conservatively to avoid thread oversubscription that could
   // lead to severe perf degradations and OMP resource exhaustion
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 8587d1783a..29c01d7f72 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/util.h"
 
 #ifdef INTEL_MKL
 #ifdef _OPENMP
@@ -49,6 +50,9 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
+  // Eearly return when MKL is disabled
+  if (DisableMKL())
+    return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 06d3fefef1..7394b1cddf 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_layout_pass.h"
@@ -4511,6 +4512,10 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of a graph
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 8c5ffd71a3..6804ab84ce 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/util.h"
 
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/graph/mkl_tfconversion_pass.h"
@@ -424,6 +425,10 @@ Status MklToTfConversionPass::Run(const GraphOptimizationPassOptions& options) {
   if (options.graph == nullptr && options.partition_graphs == nullptr) {
     return Status::OK();
   }
+  if (DisableMKL()) {
+    VLOG(2) << "TF-MKL: Disabling MKL";
+    return Status::OK();
+  }
 
   auto process_graph = [&](std::unique_ptr<Graph>* g) {
     // Get the ownership of graph
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 1e5a9c5712..44d5becb9c 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -120,4 +120,24 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
   return result;
 }
 
+#ifdef INTEL_MKL
+bool DisableMKL() {
+  enum MklStatus {
+    MKL_DEFAULT = 0,
+    MKL_ON = 1,
+    MKL_OFF = 2
+  };
+  static MklStatus status = MKL_DEFAULT;
+  if (status == MKL_DEFAULT) {
+    char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
+    if ((tf_disable_mkl != NULL) && (std::stoi(tf_disable_mkl) == 1)) {
+      VLOG(2) << "TF-MKL: Disabling MKL";
+      status = MKL_OFF;
+    } else {
+      status = MKL_ON;
+    }
+  }
+  return status == MKL_OFF ? true : false;
+}
+#endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index 93dfd51ab5..ba90ad52c2 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -56,6 +56,11 @@ string PrintMemory(const char* ptr, size_t n);
 // "tensor", "tensor[i]", "tensor[i, j]", etc.
 string SliceDebugString(const TensorShape& shape, const int64 flat);
 
+// disable MKL in runtime
+#ifdef INTEL_MKL
+bool DisableMKL();
+#endif
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_UTIL_UTIL_H_
-- 
GitLab


From 921f8347bd463347653389f65803326528609691 Mon Sep 17 00:00:00 2001
From: franklin5 <franklin5@users.noreply.github.com>
Date: Fri, 28 Sep 2018 15:12:46 -0700
Subject: [PATCH 0110/1085] attention score shape

from equation 4, the attention score should not be a rank-3 tensor object.
---
 .../python/examples/nmt_with_attention/nmt_with_attention.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
index 560fc8c5a2..6d463cc5c4 100644
--- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
+++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
@@ -352,7 +352,7 @@
         "And the pseudo-code:\n",
         "\n",
         "* `score = FC(tanh(FC(EO) + FC(H)))`\n",
-        "* `attention weights = softmax(score, axis = 1)`. Softmax by default is applied on the last axis but here we want to apply it on the *1st axis*, since the shape of score is *(batch_size, max_length, hidden_size)*. `Max_length` is the length of our input. Since we are trying to assign a weight to each input, softmax should be applied on that axis.\n",
+        "* `attention weights = softmax(score, axis = 1)`. Softmax by default is applied on the last axis but here we want to apply it on the *1st axis*, since the shape of score is *(batch_size, max_length, 1)*. `Max_length` is the length of our input. Since we are trying to assign a weight to each input, softmax should be applied on that axis.\n",
         "* `context vector = sum(attention weights * EO, axis = 1)`. Same reason as above for choosing axis as 1.\n",
         "* `embedding output` = The input to the decoder X is passed through an embedding layer.\n",
         "* `merged vector = concat(embedding output, context vector)`\n",
-- 
GitLab


From ba2d36ef87cfb6cbbf06abd998edbd1ad047a741 Mon Sep 17 00:00:00 2001
From: franklin5 <franklin5@users.noreply.github.com>
Date: Fri, 28 Sep 2018 16:10:26 -0700
Subject: [PATCH 0111/1085] updating code to reflect on

pseudo code and equations
---
 .../examples/nmt_with_attention/nmt_with_attention.ipynb  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
index 6d463cc5c4..480777d948 100644
--- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
+++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb
@@ -446,12 +446,12 @@
         "        # we are doing this to perform addition to calculate the score\n",
         "        hidden_with_time_axis = tf.expand_dims(hidden, 1)\n",
         "        \n",
-        "        # score shape == (batch_size, max_length, hidden_size)\n",
-        "        score = tf.nn.tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis))\n",
+        "        # score shape == (batch_size, max_length, 1)\n",
+        "        # we get 1 at the last axis because we are applying tanh(FC(EO) + FC(H)) to self.V\n",
+        "        score = self.V(tf.nn.tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis)))\n",
         "        \n",
         "        # attention_weights shape == (batch_size, max_length, 1)\n",
-        "        # we get 1 at the last axis because we are applying score to self.V\n",
-        "        attention_weights = tf.nn.softmax(self.V(score), axis=1)\n",
+        "        attention_weights = tf.nn.softmax(score, axis=1)\n",
         "        \n",
         "        # context_vector shape after sum == (batch_size, hidden_size)\n",
         "        context_vector = attention_weights * enc_output\n",
-- 
GitLab


From e4fea9419ac387ddcb9c932abaa8e92fb045e29f Mon Sep 17 00:00:00 2001
From: knightXun <badgangkiller@gmail.com>
Date: Sat, 29 Sep 2018 00:42:23 +0800
Subject: [PATCH 0112/1085] print error information, when the os is not
 supported

---
 tensorflow/go/test.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/go/test.sh b/tensorflow/go/test.sh
index 6083608f22..47c3a68379 100755
--- a/tensorflow/go/test.sh
+++ b/tensorflow/go/test.sh
@@ -63,6 +63,9 @@ then
   else
     export DYLD_LIBRARY_PATH="${PWD}/tensorflow:${DYLD_LIBRARY_PATH}"
   fi
+else 
+  echo "Only support Linux/Darwin, System $OS is not supported"
+  exit 1
 fi
 
 # Document the Go version and run tests
-- 
GitLab


From d936d819752916d3122f02def571ecac9e995029 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Fri, 28 Sep 2018 19:49:23 -0700
Subject: [PATCH 0113/1085] Lower the MKLCpuAllocator priority so that it can
 use default allocator when MKL is disabled, and with some  minor changes

---
 .../core/common_runtime/mkl_cpu_allocator.h   | 54 ++++++-------------
 .../core/common_runtime/process_util.cc       | 37 ++++++-------
 .../core/common_runtime/threadpool_device.cc  |  4 +-
 3 files changed, 36 insertions(+), 59 deletions(-)

diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 516138d28d..429b19599b 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/mem.h"
-#include "tensorflow/core/util/util.h"
 #include "tensorflow/core/platform/numa.h"
 
 #ifndef INTEL_MKL_DNN_ONLY
@@ -164,12 +163,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   Status Initialize() {
-    if (DisableMKL()) {
-        VLOG(1) << "TF-MKL: Disabling pool allocator";
-        tf_disable_pool_allocator_flag_ = true;
-        return Status::OK();
-    }
-
     VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
 
     // Set upper bound on memory allocation to physical RAM available on the
@@ -224,10 +217,6 @@ class MklCPUAllocator : public Allocator {
   inline string Name() override { return kName; }
 
   inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
-    if (tf_disable_pool_allocator_flag_) {
-      return port::AlignedMalloc(num_bytes, alignment);
-    }
-
     // If the allocation size is less than threshold, call small allocator,
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
@@ -238,10 +227,6 @@ class MklCPUAllocator : public Allocator {
   }
 
   inline void DeallocateRaw(void* ptr) override {
-    if (tf_disable_pool_allocator_flag_) {
-      port::AlignedFree(ptr);
-      return;
-    }
     // Check if ptr is for "small" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
     if (small_size_allocator_->IsSmallSizeAllocation(ptr)) {
@@ -252,30 +237,26 @@ class MklCPUAllocator : public Allocator {
   }
 
   void GetStats(AllocatorStats* stats) override {
-    if (!tf_disable_pool_allocator_flag_) {
-      AllocatorStats l_stats, s_stats;
-      small_size_allocator_->GetStats(&s_stats);
-      large_size_allocator_->GetStats(&l_stats);
-
-      // Combine statistics from small-size and large-size allocator.
-      stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
-      stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
-      stats->max_bytes_in_use =
-          l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
-
-      // Since small-size allocations go to MklSmallSizeAllocator,
-      // max_alloc_size from large_size_allocator would be the maximum
-      // size allocated by MklCPUAllocator.
-      stats->max_alloc_size = l_stats.max_alloc_size;
-      stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
-    }
+    AllocatorStats l_stats, s_stats;
+    small_size_allocator_->GetStats(&s_stats);
+    large_size_allocator_->GetStats(&l_stats);
+
+    // Combine statistics from small-size and large-size allocator.
+    stats->num_allocs = l_stats.num_allocs + s_stats.num_allocs;
+    stats->bytes_in_use = l_stats.bytes_in_use + s_stats.bytes_in_use;
+    stats->max_bytes_in_use =
+        l_stats.max_bytes_in_use + s_stats.max_bytes_in_use;
+
+    // Since small-size allocations go to MklSmallSizeAllocator,
+    // max_alloc_size from large_size_allocator would be the maximum
+    // size allocated by MklCPUAllocator.
+    stats->max_alloc_size = l_stats.max_alloc_size;
+    stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit);
   }
 
   void ClearStats() override {
-    if (!tf_disable_pool_allocator_flag_) {
-      small_size_allocator_->ClearStats();
-      large_size_allocator_->ClearStats();
-    }
+    small_size_allocator_->ClearStats();
+    large_size_allocator_->ClearStats();
   }
 
  private:
@@ -314,7 +295,6 @@ class MklCPUAllocator : public Allocator {
   // The alignment that we need for the allocations
   static constexpr const size_t kAlignment = 64;
 
-  bool tf_disable_pool_allocator_flag_ = false;
   Allocator* large_size_allocator_;              // owned by this class
   MklSmallSizeAllocator* small_size_allocator_;  // owned by this class.
 
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 60fa601907..b3064a4c08 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -57,28 +57,25 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
   const int32 inter_op = options.config.inter_op_parallelism_threads();
   if (inter_op != 0) return inter_op;
 #ifdef INTEL_MKL
-  // Early return if MKL is disabled
-  if (DisableMKL())
-    return port::NumSchedulableCPUs();
-
-  // MKL library executes ops in parallel using OMP threads
-  // Set inter_op conservatively to avoid thread oversubscription that could
-  // lead to severe perf degradations and OMP resource exhaustion
-  int mkl_intra_op = 1;
-#ifdef _OPENMP
-  mkl_intra_op = omp_get_max_threads();
-#endif  // _OPENMP
-  CHECK_GE(mkl_intra_op, 1);
-  const int32 mkl_inter_op = std::max(
-      (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-  VLOG(0) << "Creating new thread pool with default inter op setting: "
-          << mkl_inter_op
-          << ". Tune using inter_op_parallelism_threads for best performance.";
-  return mkl_inter_op;
-#else
+  if (!DisableMKL()) {
+    // MKL library executes ops in parallel using OMP threads
+    // Set inter_op conservatively to avoid thread oversubscription that could
+    // lead to severe perf degradations and OMP resource exhaustion
+    int mkl_intra_op = 1;
+  #ifdef _OPENMP
+    mkl_intra_op = omp_get_max_threads();
+  #endif  // _OPENMP
+    CHECK_GE(mkl_intra_op, 1);
+    const int32 mkl_inter_op = std::max(
+        (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
+    VLOG(0) << "Creating new thread pool with default inter op setting: "
+            << mkl_inter_op
+            << ". Tune using inter_op_parallelism_threads for best performance.";
+    return mkl_inter_op;
+  }
+#endif  // INTEL_MKL
   // Default to using the number of cores available in the process.
   return port::NumSchedulableCPUs();
-#endif  // INTEL_MKL
 }
 
 thread::ThreadPool* NewThreadPoolFromSessionOptions(
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 29c01d7f72..f188016610 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -50,7 +50,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       allocator_(allocator),
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
-  // Eearly return when MKL is disabled
+  // Early return when MKL is disabled
   if (DisableMKL())
     return;
 #ifdef _OPENMP
@@ -118,7 +118,7 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", 200, MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
-- 
GitLab


From 04865e685f9d8ca8723e8f4e4aee3da41f902ae6 Mon Sep 17 00:00:00 2001
From: lanhin <lanhin1@gmail.com>
Date: Sat, 29 Sep 2018 20:48:07 +0800
Subject: [PATCH 0114/1085] Comment fix: cudnn output tensor data_format
 convert.

---
 tensorflow/core/kernels/conv_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 78856c4a99..4e60d9aada 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -851,7 +851,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
         ") filter shape(", filter.shape().DebugString(), ")"));
   }
 
-  // Convert the output tensor back from NHWC to NCHW.
+  // Convert the output tensor back from NCHW to NHWC.
   if (data_format == FORMAT_NHWC) {
     functor::NCHWToNHWC<GPUDevice, T, 4>()(
         ctx->eigen_device<GPUDevice>(),
-- 
GitLab


From f16111286b19f4145df63b73c45be1645bde8737 Mon Sep 17 00:00:00 2001
From: Bairen Yi <byi@connect.ust.hk>
Date: Sat, 29 Sep 2018 22:13:09 +0800
Subject: [PATCH 0115/1085] Added log entries for copying unpinned memory RDMA

Currently there are large number of tensors managed
by non-visitable memory allocators in CPU-only PS.
GPU workers seem less prone to this problem.

Copying large sized tensor buffers may introduce
non-trivial overhead. Should probably fix this.

Signed-off-by: Bairen Yi <byi@connect.ust.hk>
---
 tensorflow/contrib/gdr/gdr_memory_manager.cc | 156 +++++++++++--------
 1 file changed, 93 insertions(+), 63 deletions(-)

diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index bb06f1c41c..3549cedb70 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <fstream>
 #include <list>
 #include <map>
-#include <set>
 
 #include <fcntl.h>
 #include <rdma/rdma_cma.h>
@@ -30,19 +29,17 @@ limitations under the License.
 #include <sys/epoll.h>
 
 #include "tensorflow/contrib/gdr/gdr.pb.h"
-#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/pool_allocator.h"
 #include "tensorflow/core/common_runtime/process_state.h"
 #if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #endif  // GOOGLE_CUDA
-#include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
 
 namespace tensorflow {
 
@@ -70,14 +67,11 @@ bool IsGDRAvailable() {
 int TryToReadNumaNode(ibv_device* device) {
 #if defined(__APPLE__)
   LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0";
-  return 0;
+  return port::kNUMANoAffinity;
 #elif defined(PLATFORM_WINDOWS)
   // Windows support for NUMA is not currently implemented. Return node 0.
-  return 0;
+  return port::kNUMANoAffinity;
 #else
-  VLOG(2) << "Trying to read NUMA node for device: " << device->name;
-  static const int kUnknownNumaNode = -1;
-
   auto filename = string(device->ibdev_path) + "/device/numa_node";
 
   std::ifstream ifs(filename.c_str());
@@ -91,12 +85,12 @@ int TryToReadNumaNode(ibv_device* device) {
                 << value
                 << "), but there must be at least one NUMA node"
                    ", so returning NUMA node zero";
-      return 0;
+      return port::kNUMANoAffinity;
     }
     LOG(INFO) << "NUMA node for device: " << device->name << " is " << value;
     return value;
   }
-  return kUnknownNumaNode;
+  return port::kNUMANoAffinity;
 #endif
 }
 
@@ -138,8 +132,6 @@ class GdrMemoryManager : public RemoteMemoryManager {
       Device* device, DeviceContext* device_context, bool on_host,
       StatusCallback done) override;
 
-  static void RegMemVisitors();
-
  protected:
   Status CreateEndpoint(const string& host, const string& port,
                         RdmaEndpointPtr& endpoint);
@@ -150,7 +142,8 @@ class GdrMemoryManager : public RemoteMemoryManager {
 
   ibv_mr* FindMemoryRegion(void* addr, size_t length);
 
-  void InsertMemoryRegion(void* addr, size_t length);
+  void InsertMemoryRegion(void* addr, size_t length,
+                          const std::string& allocator_name);
 
   void EvictMemoryRegion(void* addr, size_t length);
 
@@ -160,6 +153,7 @@ class GdrMemoryManager : public RemoteMemoryManager {
   RdmaEndpointPtr listening_;
   std::atomic<bool> stopped_;
   int epfd_;
+  int numa_node_;
 
   // Server side endpoints
   // Accessed sequentially in Run() so not protected by lock
@@ -190,46 +184,10 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {
-  static std::once_flag flag;
-  std::call_once(flag, []() { RegMemVisitors(); });
-}
+      next_key_(0) {}
 
 GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
 
-/*static*/ void GdrMemoryManager::RegMemVisitors() {
-  SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node,
-                                           size_t num_bytes) {
-    GdrMemoryManager::Singleton().InsertMemoryRegion(
-        ptr, num_bytes, strings::StrCat("CPU:", numa_node));
-  };
-  SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node,
-                                          size_t num_bytes) {
-    GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes);
-  };
-  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
-  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
-
-#if GOOGLE_CUDA
-  if (IsGDRAvailable()) {
-    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
-
-    // Note we don't free allocated GPU memory so there is no free visitor
-    SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
-                                                  size_t num_bytes) {
-      RdmaMemoryMgr::Singleton().InsertMemoryRegion(
-          ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
-    };
-    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
-                                                     cuda_alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
-                                                          alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
-    LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
-  }
-#endif  // GOOGLE_CUDA
-}
-
 Status GdrMemoryManager::Init() {
   epfd_ = epoll_create1(0);
   if (epfd_ == -1) {
@@ -289,6 +247,42 @@ Status GdrMemoryManager::Init() {
                                "cannot add server to epoll");
   }
 
+  numa_node_ = TryToReadNumaNode(listening_->verbs->device);
+
+  SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node,
+                                               size_t num_bytes) {
+    VLOG(2) << "Registering RDMA capable memory region on numa_node "
+            << numa_node;
+    InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node));
+  };
+  SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node,
+                                              size_t num_bytes) {
+    VLOG(2) << "De-registering RDMA capable memory region on numa_node "
+            << numa_node;
+    EvictMemoryRegion(ptr, num_bytes);
+  };
+  ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor);
+  ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
+  LOG(INFO) << "Instrumenting CPU allocator(s)";
+
+#if GOOGLE_CUDA
+  if (IsGDRAvailable()) {
+    int bus_id = numa_node_ + 1;
+
+    SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id,
+                                                      size_t num_bytes) {
+      VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id;
+      InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
+    };
+    GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
+                                                     cuda_alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
+                                                          alloc_visitor);
+    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator(s) with bus_id " << bus_id;
+  }
+#endif  // GOOGLE_CUDA
+
   return Status::OK();
 }
 
@@ -405,7 +399,7 @@ void GdrMemoryManager::TransportOptionsFromTensor(
   ibv_mr* mr = FindMemoryRegion(addr, length);
 
 #if GOOGLE_CUDA
-  if (!on_host) {
+  if (device->tensorflow_gpu_device_info() && !on_host) {
     Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
     Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape());
     GPUUtil::CopyGPUTensorToCPU(
@@ -456,11 +450,27 @@ void GdrMemoryManager::TransportOptionsFromTensor(
 #endif
 
   if (mr == nullptr) {
-    done(errors::Unavailable("Cannot find pinned memory region"));
-    return;
+    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
+    Tensor host_copy(alloc, tensor.dtype(), tensor.shape());
+
+    std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length);
+    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
+
+    buffer = DMAHelper::buffer(&host_copy);
+    addr = buffer->data();
+    length = buffer->size();
+
+    mr = FindMemoryRegion(addr, length);
+    if (mr == nullptr) {
+      done(errors::Unavailable("Cannot find pinned memory region"));
+      return;
+    }
+
+    buffer->Ref();
+  } else {
+    buffer->Ref();
   }
 
-  buffer->Ref();
   TensorKey tensor_key = next_key_++;
   {
     mutex_lock l(server_mu_);
@@ -470,7 +480,7 @@ void GdrMemoryManager::TransportOptionsFromTensor(
   uint64_t checksum = 0;
   if (VLOG_IS_ON(2)) {
 #ifdef GOOGLE_CUDA
-    if (!on_host) {
+    if (device->tensorflow_gpu_device_info() && !on_host) {
       checksum = GPUUtil::Checksum(device, device_context, tensor);
     } else {
       checksum = GPUUtil::Checksum(tensor);
@@ -508,7 +518,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
   Tensor host_copy;
 #if GOOGLE_CUDA
   if (mr == nullptr && !on_host) {
-    Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
+    Allocator* alloc =
+        GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_);
     host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
     buffer = DMAHelper::buffer(&host_copy);
     addr = buffer->data();
@@ -518,8 +529,18 @@ void GdrMemoryManager::TensorFromTransportOptions(
 #endif  // GOOGLE_CUDA
 
   if (mr == nullptr) {
-    done(errors::Unavailable("Cannot find pinned memory region"));
-    return;
+    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
+    host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
+
+    buffer = DMAHelper::buffer(&host_copy);
+    addr = buffer->data();
+    length = buffer->size();
+
+    mr = FindMemoryRegion(addr, length);
+    if (mr == nullptr) {
+      done(errors::Unavailable("Cannot find pinned memory region"));
+      return;
+    }
   }
 
   decltype(clients_)::iterator iter;
@@ -568,7 +589,8 @@ void GdrMemoryManager::TensorFromTransportOptions(
   }
 
 #if GOOGLE_CUDA
-  if (host_copy.NumElements() > 0) {
+  if (device->tensorflow_gpu_device_info() && !on_host &&
+      host_copy.NumElements() > 0) {
     uint64_t checksum = 0;
     if (VLOG_IS_ON(2)) {
       checksum = GPUUtil::Checksum(host_copy);
@@ -598,6 +620,12 @@ void GdrMemoryManager::TensorFromTransportOptions(
   }
 #endif  // GOOGLE_CUDA
 
+  if ((on_host || !device->tensorflow_gpu_device_info()) &&
+      host_copy.NumElements() > 0) {
+    std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length);
+    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
+  }
+
   uint64_t end = Env::Default()->NowMicros();
 
   VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey()
@@ -607,7 +635,7 @@ void GdrMemoryManager::TensorFromTransportOptions(
   uint64_t checksum = 0;
   if (VLOG_IS_ON(2)) {
 #ifdef GOOGLE_CUDA
-    if (device->tensorflow_gpu_device_info() && (!on_host)) {
+    if (device->tensorflow_gpu_device_info() && !on_host) {
       checksum = GPUUtil::Checksum(device, device_context, *tensor);
     } else {
       checksum = GPUUtil::Checksum(*tensor);
@@ -668,7 +696,8 @@ ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) {
   }
 }
 
-void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) {
+void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length,
+                                          const std::string& allocator_name) {
   if (length == 0) return;
   ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length);
   if (mr != nullptr) {
@@ -676,7 +705,8 @@ void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) {
     auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator);
     mrs_.insert(iter, {mr, &MRDeleter});
   } else {
-    LOG(WARNING) << "Cannot register memory region";
+    LOG(WARNING) << "Cannot register memory region allocated by "
+                 << allocator_name;
   }
 }
 
-- 
GitLab


From eb6c1bdcbf6093888f2b443fdb49f836f3352316 Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Tue, 13 Mar 2018 07:23:18 +0000
Subject: [PATCH 0116/1085] Update core.py

Added `data_format` to flatten to allow changing of it during inference time.
---
 tensorflow/python/layers/core.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 9879e5020f..5f89e3c0c3 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -268,7 +268,14 @@ def dropout(inputs,
 @tf_export('layers.Flatten')
 class Flatten(keras_layers.Flatten, base.Layer):
   """Flattens an input tensor while preserving the batch axis (axis 0).
-
+  
+  Arguments:
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, ..., channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, ...)`.
+      
   Examples:
 
   ```
@@ -285,11 +292,16 @@ class Flatten(keras_layers.Flatten, base.Layer):
 
 
 @tf_export('layers.flatten')
-def flatten(inputs, name=None):
+def flatten(inputs, data_format='channels_last', name=None):
   """Flattens an input tensor while preserving the batch axis (axis 0).
 
   Arguments:
     inputs: Tensor input.
+    data_format: A string, one of `channels_last` (default) or `channels_first`.
+      The ordering of the dimensions in the inputs.
+      `channels_last` corresponds to inputs with shape
+      `(batch, height, width, channels)` while `channels_first` corresponds to
+      inputs with shape `(batch, channels, height, width)`.
     name: The name of the layer (string).
 
   Returns:
@@ -307,7 +319,7 @@ def flatten(inputs, name=None):
     # now `y` has shape `(None, None)`
   ```
   """
-  layer = Flatten(name=name)
+  layer = Flatten(data_format=data_format, name=name)
   return layer.apply(inputs)
 
 
-- 
GitLab


From dd928d5ae31dd0484e5e4a96c6322adecc4e511b Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 19:24:10 +0000
Subject: [PATCH 0117/1085] Added Flatten Test

---
 tensorflow/python/layers/core_test.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index d26f3f4789..0d019897aa 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -476,6 +476,22 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
+  def testDataFormat(self):
+    np_input_channels_last = np.arange(3, 7).reshape([1, 2, 3, 2])
+
+    with self.test_session() as sess:
+      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_last')(x)
+      np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
+
+      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_first')(x)
+      np_input_channels_first = np.transpose(np_input_channels_last,
+                                             [0, 3, 1, 2])
+      np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
+
+      self.assertEqual(np_output_cl, np_output_cf)
+
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
     y = core_layers.flatten(x, name='flatten')
-- 
GitLab


From 579aecd2de1f0582858f83e3c8da2a8dbb57993b Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 20:08:59 +0000
Subject: [PATCH 0118/1085] added dtype to test

---
 tensorflow/python/layers/core_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 0d019897aa..31f3a4e0b0 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -477,7 +477,7 @@ class FlattenTest(test.TestCase):
     self.assertEqual(shape.as_list(), [None, None])
 
   def testDataFormat(self):
-    np_input_channels_last = np.arange(3, 7).reshape([1, 2, 3, 2])
+    np_input_channels_last = np.arange(12, dtype='float32').reshape([1, 2, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
-- 
GitLab


From 76964f315f7c52d63ce6578d87278a96c7394ece Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 22:01:21 +0000
Subject: [PATCH 0119/1085] pylint compliance

---
 tensorflow/python/layers/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 5f89e3c0c3..5919fa543e 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -268,14 +268,14 @@ def dropout(inputs,
 @tf_export('layers.Flatten')
 class Flatten(keras_layers.Flatten, base.Layer):
   """Flattens an input tensor while preserving the batch axis (axis 0).
-  
+
   Arguments:
     data_format: A string, one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
       `(batch, ..., channels)` while `channels_first` corresponds to
       inputs with shape `(batch, channels, ...)`.
-      
+
   Examples:
 
   ```
-- 
GitLab


From 110baa57112a95c2644896ce6ff75894e1ae61c7 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sun, 18 Mar 2018 23:10:55 +0000
Subject: [PATCH 0120/1085] Extended to N-dims

---
 tensorflow/python/layers/core_test.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 31f3a4e0b0..d5b8a0ff65 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -476,15 +476,31 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
-  def testDataFormat(self):
-    np_input_channels_last = np.arange(12, dtype='float32').reshape([1, 2, 3, 2])
+  def testDataFormat5d(self):
+    np_input_channels_last = np.arange(120, dtype='float32').reshape([1, 5, 4, 3, 2])
 
     with self.test_session() as sess:
-      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      x = array_ops.placeholder(shape=(1, 5, 4, 3, 2), dtype='float32')
       y = core_layers.Flatten(data_format='channels_last')(x)
       np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
 
-      x = array_ops.placeholder(shape=(1, 2, 3, 2), dtype='float32')
+      x = array_ops.placeholder(shape=(1, 2, 5, 4, 3), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_first')(x)
+      np_input_channels_first = np.transpose(np_input_channels_last,
+                                             [0, 4, 1, 2, 3])
+      np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
+
+      self.assertEqual(np_output_cl, np_output_cf)
+
+  def testDataFormat4d(self):
+    np_input_channels_last = np.arange(24, dtype='float32').reshape([1, 4, 3, 2])
+
+    with self.test_session() as sess:
+      x = array_ops.placeholder(shape=(1, 4, 3, 2), dtype='float32')
+      y = core_layers.Flatten(data_format='channels_last')(x)
+      np_output_cl = sess.run(y, feed_dict={x: np_input_channels_last})
+
+      x = array_ops.placeholder(shape=(1, 2, 4, 3), dtype='float32')
       y = core_layers.Flatten(data_format='channels_first')(x)
       np_input_channels_first = np.transpose(np_input_channels_last,
                                              [0, 3, 1, 2])
-- 
GitLab


From 4de591a03a9bd49a05d67fe48f9358dbdac51561 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Sat, 30 Jun 2018 08:14:40 +0100
Subject: [PATCH 0121/1085] Fixed Pylint Issues

---
 tensorflow/python/layers/core_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index d5b8a0ff65..8ad0e8c4ba 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -477,7 +477,8 @@ class FlattenTest(test.TestCase):
     self.assertEqual(shape.as_list(), [None, None])
 
   def testDataFormat5d(self):
-    np_input_channels_last = np.arange(120, dtype='float32').reshape([1, 5, 4, 3, 2])
+    np_input_channels_last = np.arange(120, dtype='float32').reshape(
+        [1, 5, 4, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 5, 4, 3, 2), dtype='float32')
@@ -493,7 +494,8 @@ class FlattenTest(test.TestCase):
       self.assertEqual(np_output_cl, np_output_cf)
 
   def testDataFormat4d(self):
-    np_input_channels_last = np.arange(24, dtype='float32').reshape([1, 4, 3, 2])
+    np_input_channels_last = np.arange(24, dtype='float32').reshape(
+        [1, 4, 3, 2])
 
     with self.test_session() as sess:
       x = array_ops.placeholder(shape=(1, 4, 3, 2), dtype='float32')
-- 
GitLab


From 46fc7a9530e9c8f6bf909de8df8c97e4b38a99a5 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Mon, 23 Jul 2018 23:06:48 +0100
Subject: [PATCH 0122/1085] Fixed Tests

---
 tensorflow/python/layers/core_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 8ad0e8c4ba..22ed75dda7 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -491,7 +491,7 @@ class FlattenTest(test.TestCase):
                                              [0, 4, 1, 2, 3])
       np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
 
-      self.assertEqual(np_output_cl, np_output_cf)
+      self.assertAllEqual(np_output_cl, np_output_cf)
 
   def testDataFormat4d(self):
     np_input_channels_last = np.arange(24, dtype='float32').reshape(
@@ -508,7 +508,7 @@ class FlattenTest(test.TestCase):
                                              [0, 3, 1, 2])
       np_output_cf = sess.run(y, feed_dict={x: np_input_channels_first})
 
-      self.assertEqual(np_output_cl, np_output_cf)
+      self.assertAllEqual(np_output_cl, np_output_cf)
 
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
-- 
GitLab


From da930ea7fd16c903346ff36f5f57548dbea98bdc Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Tue, 21 Aug 2018 08:17:29 +0100
Subject: [PATCH 0123/1085] Updated golden

---
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 509ceff9df..e65ffeb12e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -832,10 +832,6 @@ tf_module {
     name: "broadcast_static_shape"
     argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "broadcast_to"
-    argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "case"
     argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], "
-- 
GitLab


From 459accb2b7bdea542415f3a744cbe9e348f847d6 Mon Sep 17 00:00:00 2001
From: josephyearsley <joggino23@gmail.com>
Date: Tue, 21 Aug 2018 21:02:13 +0100
Subject: [PATCH 0124/1085] Updated layers

---
 tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
index df74c32e1f..5d9ea2e5a3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
   }
   member_method {
     name: "max_pooling1d"
-- 
GitLab


From a58135a6a9637db0908c88f39df22b69bafaec3d Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 25 Aug 2018 16:04:34 +0100
Subject: [PATCH 0125/1085] Updated protobuf

---
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index e65ffeb12e..509ceff9df 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -832,6 +832,10 @@ tf_module {
     name: "broadcast_static_shape"
     argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "broadcast_to"
+    argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "case"
     argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], "
-- 
GitLab


From 8e87c649fc290c758c4240bf202de0c7f0f3a4ad Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 29 Sep 2018 17:38:44 +0100
Subject: [PATCH 0126/1085] Updated v2

---
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index df74c32e1f..5fd6ba1192 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,8 +122,8 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "  
+}
   member_method {
     name: "max_pooling1d"
     argspec: "args=[\'inputs\', \'pool_size\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'valid\', \'channels_last\', \'None\'], "
-- 
GitLab


From 32059ed204ecbee7828057d23a1c1daf561c87fd Mon Sep 17 00:00:00 2001
From: Joe Yearsley <josephelliotyearsley@gmail.com>
Date: Sat, 29 Sep 2018 17:42:52 +0100
Subject: [PATCH 0127/1085] Update tensorflow.layers.pbtxt

---
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index 5fd6ba1192..5d9ea2e5a3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,8 +122,8 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "  
-}
+    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+  }
   member_method {
     name: "max_pooling1d"
     argspec: "args=[\'inputs\', \'pool_size\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'valid\', \'channels_last\', \'None\'], "
-- 
GitLab


From 914f68bfc0d7629496cd5ef6a6104efc94b6eecc Mon Sep 17 00:00:00 2001
From: Balint Cristian <cristian.balint@gmail.com>
Date: Sat, 29 Sep 2018 20:13:26 +0300
Subject: [PATCH 0128/1085] Add abseil_cpp cmake dependence.

---
 tensorflow/contrib/cmake/CMakeLists.txt       |   6 +-
 .../contrib/cmake/external/abseil_cpp.cmake   | 100 ++++++++++++++++++
 .../contrib/cmake/modules/FindAbseilCpp.cmake |  72 +++++++++++++
 3 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/contrib/cmake/external/abseil_cpp.cmake
 create mode 100644 tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index c6d6f04168..dc0d2569c5 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -90,10 +90,12 @@ if (NOT WIN32)
 
   # Options for linking other libraries
   option(systemlib_ZLIB "Use the system installed library as shared objects instead of downloading ZLIB and statically linking to it: ZLIB" OFF)
+  option(systemlib_ABSEIL_CPP "Use the system installed library as shared objects instead of downloading ABSEIL_CPP and statically linking to it: ABSEIL_CPP" OFF)
 
   option(systemlib_ALL "Turn on every possible systemlib_* options" OFF)
   if (systemlib_ALL)
     set (systemlib_ZLIB ON)
+    set (systemlib_ABSEIL_CPP ON)
   endif (systemlib_ALL)
 endif()
 
@@ -115,7 +117,7 @@ function(SHOW_VARIABLES)
 endfunction()
 
 # External dependencies
-set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/external)
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/external ${PROJECT_SOURCE_DIR}/modules)
 
 # Location where external projects will be downloaded
 set (DOWNLOAD_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/downloads"
@@ -240,6 +242,7 @@ include(re2)
 include(cub)
 include(sqlite)
 include(double_conversion)
+include(abseil_cpp)
 if (tensorflow_BUILD_CC_TESTS)
   include(googletest)
 endif()
@@ -248,6 +251,7 @@ add_definitions(${ADD_CFLAGS})
 link_directories(${ADD_LINK_DIRECTORY})
 
 set(tensorflow_EXTERNAL_LIBRARIES
+    ${tensorflow_EXTERNAL_LIBRARIES}
     ${gif_STATIC_LIBRARIES}
     ${png_STATIC_LIBRARIES}
     ${jpeg_STATIC_LIBRARIES}
diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake
new file mode 100644
index 0000000000..c6c5021f60
--- /dev/null
+++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake
@@ -0,0 +1,100 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+if (systemlib_ABSEIL_CPP)
+
+  find_package(AbseilCpp REQUIRED
+               absl_base
+               absl_spinlock_wait
+               absl_dynamic_annotations
+               absl_malloc_internal
+               absl_throw_delegate
+               absl_strings
+               str_format_internal
+               absl_bad_optional_access)
+
+  include_directories(${ABSEIL_CPP_INCLUDE_DIR})
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${ABSEIL_CPP_LIBRARIES})
+
+  message(STATUS "  abseil_cpp includes: ${ABSEIL_CPP_INCLUDE_DIR}")
+  message(STATUS "  abseil_cpp libraries: ${ABSEIL_CPP_LIBRARIES}")
+
+  add_custom_target(abseil_cpp_build)
+  list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build)
+
+else (systemlib_ABSEIL_CPP)
+
+  include (ExternalProject)
+
+  set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build)
+  set(abseil_cpp_URL https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz)
+  set(abseil_cpp_HASH SHA256=84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e)
+  set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build)
+
+  if(WIN32)
+    if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
+      set(abseil_cpp_STATIC_LIBRARIES
+          ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib
+          ${abseil_cpp_BUILD}/absl/base/Release/absl_spinlock_wait.lib
+          ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib
+          ${abseil_cpp_BUILD}/absl/base/Release/absl_malloc_internal.lib
+          ${abseil_cpp_BUILD}/absl/base/Release/absl_throw_delegate.lib
+          ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib
+          ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib
+          ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib)
+    else()
+      set(abseil_cpp_STATIC_LIBRARIES
+          ${abseil_cpp_BUILD}/absl/base/absl_base.lib
+          ${abseil_cpp_BUILD}/absl/base/absl_spinlock_wait.lib
+          ${abseil_cpp_BUILD}/absl/base/absl_dynamic_annotations.lib
+          ${abseil_cpp_BUILD}/absl/base/absl_malloc_internal.lib
+          ${abseil_cpp_BUILD}/absl/base/absl_throw_delegate.lib
+          ${abseil_cpp_BUILD}/absl/strings/absl_strings.lib
+          ${abseil_cpp_BUILD}/absl/strings/str_format_internal.lib
+          ${abseil_cpp_BUILD}/absl/types/absl_bad_optional_access.lib)
+    endif()
+  else()
+    set(abseil_cpp_STATIC_LIBRARIES
+        ${abseil_cpp_BUILD}/absl/base/libabsl_base.a
+        ${abseil_cpp_BUILD}/absl/base/libabsl_spinlock_wait.a
+        ${abseil_cpp_BUILD}/absl/base/libabsl_dynamic_annotations.a
+        ${abseil_cpp_BUILD}/absl/base/libabsl_malloc_internal.a
+        ${abseil_cpp_BUILD}/absl/base/libabsl_throw_delegate.a
+        ${abseil_cpp_BUILD}/absl/strings/libabsl_strings.a
+        ${abseil_cpp_BUILD}/absl/strings/libstr_format_internal.a
+        ${abseil_cpp_BUILD}/absl/types/libabsl_bad_optional_access.a)
+  endif()
+
+  ExternalProject_Add(abseil_cpp_build
+      PREFIX abseil_cpp
+      URL ${abseil_cpp_URL}
+      URL_HASH ${abseil_cpp_HASH}
+      DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+      BUILD_IN_SOURCE 1
+      BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES}
+      BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release
+      COMMAND ${CMAKE_COMMAND} --build . --config Release
+      INSTALL_COMMAND ""
+      CMAKE_CACHE_ARGS
+          -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE}
+          -DCMAKE_BUILD_TYPE:STRING=Release
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+  )
+
+  include_directories(${abseil_cpp_INCLUDE_DIR})
+  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${abseil_cpp_STATIC_LIBRARIES})
+
+  list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build)
+
+endif (systemlib_ABSEIL_CPP)
diff --git a/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake b/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake
new file mode 100644
index 0000000000..d4f8bb1bec
--- /dev/null
+++ b/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake
@@ -0,0 +1,72 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+find_path(ABSEIL_CPP_INCLUDE_DIR absl/base/config.h
+  HINTS "${ABSEIL_CPP_INCLUDE_DIR_HINTS}"
+  PATHS "$ENV{PROGRAMFILES}"
+        "$ENV{PROGRAMW6432}"
+  PATH_SUFFIXES "")
+
+if(EXISTS "${ABSEIL_CPP_INCLUDE_DIR}" AND NOT "${ABSEIL_CPP_INCLUDE_DIR}" STREQUAL "")
+
+  if(NOT AbseilCpp_FIND_COMPONENTS)
+    # search all libraries if no COMPONENTS was requested
+    set(AbseilCpp_FIND_COMPONENTS
+        "absl_algorithm;absl_any;absl_bad_any_cast"
+        "absl_bad_optional_access;absl_base absl_container;absl_debugging"
+        "absl_dynamic_annotations;absl_examine_stack;absl_failure_signal_handler"
+        "absl_int128;absl_leak_check;absl_malloc_internal;absl_memory;absl_meta"
+        "absl_numeric;absl_optional;absl_span;absl_spinlock_wait;absl_stack_consumption"
+        "absl_stacktrace;absl_str_format;absl_strings;absl_symbolize;absl_synchronization"
+        "absl_throw_delegate;absl_time;absl_utility;str_format_extension_internal"
+        "str_format_internal;test_instance_tracker_lib")
+  endif()
+
+  foreach(LIBNAME ${AbseilCpp_FIND_COMPONENTS})
+
+    unset(ABSEIL_CPP_LIBRARY CACHE)
+
+    find_library(ABSEIL_CPP_LIBRARY
+                 NAMES ${LIBNAME}
+                 HINTS ${ABSEIL_CPP_LIBRARIES_DIR_HINTS})
+
+    if(ABSEIL_CPP_LIBRARY)
+      list(APPEND ABSEIL_CPP_LIBRARIES ${ABSEIL_CPP_LIBRARY})
+    else()
+      message(FATAL_ERROR "\n"
+        "abseil_cpp library \"${LIBNAME}\" not found in system path.\n"
+        "Please provide locations using: -DABSEIL_CPP_LIBRARIES_DIR_HINTS:STRING=\"PATH\"\n")
+    endif()
+
+  endforeach()
+
+  unset(LIBNAME CACHE)
+  unset(ABSEIL_CPP_LIBRARY CACHE)
+
+  set(ABSEIL_CPP_FOUND TRUE)
+  message(STATUS "Found abseil_cpp libraries")
+
+  set(ABSEIL_CPP_INCLUDE_DIR "${ABSEIL_CPP_INCLUDE_DIR}" CACHE PATH "" FORCE)
+  mark_as_advanced(ABSEIL_CPP_INCLUDE_DIR)
+
+  set(ABSEIL_CPP_LIBRARIES "${ABSEIL_CPP_LIBRARIES}" CACHE PATH "" FORCE)
+  mark_as_advanced(ABSEIL_CPP_LIBRARIES)
+
+else()
+
+  message(FATAL_ERROR "\n"
+    "abseil_cpp headers not found in system path.\n"
+    "Please provide locations using: -DABSEIL_CPP_INCLUDE_DIR_HINTS:STRING=\"PATH\"\n")
+
+endif()
-- 
GitLab


From 70a395f9795a48c21bc35cdf1dc44778f73a7bba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 11:58:55 -0700
Subject: [PATCH 0129/1085] Automated rollback of commit
 d78595d333c9b5c8a0705ba6852c08b107d6c462

PiperOrigin-RevId: 215073584
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 tensorflow/tensorflow.bzl                     | 39 ++++++++-----------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 99d7f70513..cadfe7f9e0 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -318,7 +318,6 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
-        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dead44c57e..cad5de1b0c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,29 +1798,22 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    if main == None:
-        main = name + ".py"
-    for config in ["cpu", "gpu"]:
-        test_name = name
-        test_tags = tags
-        if config == "gpu":
-            test_name += "_gpu"
-            test_tags = test_tags + tf_cuda_tests_tags()
-        tf_py_test(
-            name = test_name,
-            size = size,
-            srcs = srcs,
-            data = data,
-            main = main,
-            args = args,
-            tags = test_tags,
-            shard_count = shard_count,
-            additional_deps = additional_deps,
-            kernels = kernels,
-            flaky = flaky,
-            xla_enabled = xla_enabled,
-            grpc_enabled = grpc_enabled,
-        )
+    test_tags = tags + tf_cuda_tests_tags()
+    tf_py_test(
+        name = name,
+        size = size,
+        srcs = srcs,
+        data = data,
+        main = main,
+        args = args,
+        tags = test_tags,
+        shard_count = shard_count,
+        additional_deps = additional_deps,
+        kernels = kernels,
+        flaky = flaky,
+        xla_enabled = xla_enabled,
+        grpc_enabled = grpc_enabled,
+    )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index e7f9628fa6..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
+  ignore_extensions = ["_test", "_test.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 639d0dd8c1ba8d2956ccb59604c157de7ba0a7f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 12:00:53 -0700
Subject: [PATCH 0130/1085] Cleanup

PiperOrigin-RevId: 215073641
---
 tensorflow/core/BUILD          | 3 ---
 tensorflow/core/profiler/BUILD | 1 -
 2 files changed, 4 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 7da4b9fbd0..57819cec70 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -239,7 +239,6 @@ tf_proto_library(
     srcs = [],
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     protodeps = [
         ":protos_all_proto",
@@ -2385,7 +2384,6 @@ tf_proto_library(
     srcs = ERROR_CODES_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     provide_cc_alias = True,
 )
@@ -2406,7 +2404,6 @@ tf_proto_library(
     srcs = COMMON_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    java_api_version = 2,
     js_api_version = 2,
     protodeps = [
         ":error_codes_proto",
diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD
index af034bdd7d..2bf371276e 100644
--- a/tensorflow/core/profiler/BUILD
+++ b/tensorflow/core/profiler/BUILD
@@ -40,7 +40,6 @@ tf_proto_library(
     name = "protos_all",
     srcs = glob(["**/*.proto"]),
     cc_api_version = 2,
-    java_api_version = 2,
     protodeps = tf_additional_all_protos(),
     visibility = ["//visibility:public"],
 )
-- 
GitLab


From 4cf1b45b2e9188086bcb7d12654cd3e130e9b823 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 14:13:01 -0700
Subject: [PATCH 0131/1085] Disable PinToHostOptimizer for NoOp.

PiperOrigin-RevId: 215079134
---
 .../core/grappler/optimizers/pin_to_host_optimizer.cc     | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 2190d38937..89eb76046e 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -169,7 +169,13 @@ bool IsTPUGraphDef(const GraphDef& def) {
 }
 
 // All the nodes that should be blacklisted and not swapped.
-bool IsBlacklisted(const NodeDef& node) { return IsCollective(node); }
+bool IsBlacklisted(const NodeDef& node) {
+  return
+      // Collective ops should not be swapped.
+      IsCollective(node) ||
+      // NoOp breaks perf regression tests (probably due to group dependencies).
+      IsNoOp(node);
+}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-- 
GitLab


From 2538e68a69e585696175bd972cae119e06bde294 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 29 Sep 2018 16:13:51 -0700
Subject: [PATCH 0132/1085] Remove workaround for symlinked headers.

PiperOrigin-RevId: 215083669
---
 third_party/gpus/cuda_configure.bzl | 33 +++++++++--------------------
 third_party/py/python_configure.bzl |  4 ++--
 2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index f5fdd3a75e..69f4599c16 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -1107,8 +1107,8 @@ def symlink_genrule_for_dir(
             # $(@D) will include the full path to the file.
             dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
 
-            # On Windows, symlink is not supported, so we just copy all the files.
-            cmd = "cp -f" if _is_windows(repository_ctx) else "ln -s"
+            # Copy the headers to create a sandboxable setup.
+            cmd = "cp -f"
             command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
             outs.append('        "' + dest_dir + dest_files[i] + '",')
     genrule = _genrule(
@@ -1334,27 +1334,14 @@ def _create_local_cuda_repository(repository_ctx):
         cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
         cuda_defines["%{host_compiler_warnings}"] = ""
 
-        # TODO(klimek): We currently need to inject "/" as builtin directory path
-        # to disable bazel's dependency checks.
-        # The problem is that:
-        # - the python rules symlink the python headers into the bazel root
-        # - the rules use 'includes' in the BUILD file to redirect includes of the
-        #   python headers through those paths
-        # - bazel currently uses -isystem for include paths specified via 'includes'
-        # - gcc follows symlinks when resolving files via -isystem paths, and puts
-        #   the resolved paths into the .d file, which makes the dependency check
-        #   fail for bazel
-        # There are multiple possible ways to solve this:
-        # 1. make bazel not use -isystem for paths specified via 'includes'
-        # 2. cp the headers instead of symlinking them
-        #
-        # Once this is fixed, the right builtin directory path is:
-        # (host_compiler_includes +
-        #    "\n  cxx_builtin_include_directory: \"%s\"" % cuda_include_path)
-        # The cuda directory needs to be passed, as there is currently no rule
-        # providing the cuda headers in the same way the python headers are
-        # provided.
-        cuda_defines["%{host_compiler_includes}"] = "\n  cxx_builtin_include_directory: \"/\""
+        # nvcc has the system include paths built in and will automatically
+        # search them; we cannot work around that, so we add the relevant cuda
+        # system paths to the allowed compiler specific include paths.
+        cuda_defines["%{host_compiler_includes}"] = (
+            host_compiler_includes + "\n" +
+            _cuda_include_path(repository_ctx, cuda_config) +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+            "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
         nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
                                             (
                                                 cuda_config.cuda_toolkit_path,
diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl
index 3c7e5c8469..53264630a1 100644
--- a/third_party/py/python_configure.bzl
+++ b/third_party/py/python_configure.bzl
@@ -130,8 +130,8 @@ def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name,
       # If we have only one file to link we do not want to use the dest_dir, as
       # $(@D) will include the full path to the file.
       dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i]
-      # On Windows, symlink is not supported, so we just copy all the files.
-      cmd = 'cp -f' if _is_windows(repository_ctx) else 'ln -s'
+      # Copy the headers to create a sandboxable setup.
+      cmd = 'cp -f'
       command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest))
       outs.append('        "' + dest_dir + dest_files[i] + '",')
   genrule = _genrule(src_dir, genrule_name, " && ".join(command),
-- 
GitLab


From e0da6256cd116d17057374594f2fc191cf201f42 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Sat, 29 Sep 2018 23:29:28 -0700
Subject: [PATCH 0133/1085] Fixed format errors reported by clang-format

---
 tensorflow/core/common_runtime/process_util.cc      | 11 ++++++-----
 tensorflow/core/common_runtime/threadpool_device.cc |  6 +++---
 tensorflow/core/util/util.cc                        |  8 ++------
 tensorflow/core/util/util.h                         |  2 +-
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index b3064a4c08..4570496637 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -62,15 +62,16 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
     // Set inter_op conservatively to avoid thread oversubscription that could
     // lead to severe perf degradations and OMP resource exhaustion
     int mkl_intra_op = 1;
-  #ifdef _OPENMP
+#ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
-  #endif  // _OPENMP
+#endif  // _OPENMP
     CHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
-    VLOG(0) << "Creating new thread pool with default inter op setting: "
-            << mkl_inter_op
-            << ". Tune using inter_op_parallelism_threads for best performance.";
+    VLOG(0)
+        << "Creating new thread pool with default inter op setting: "
+        << mkl_inter_op
+        << ". Tune using inter_op_parallelism_threads for best performance.";
     return mkl_inter_op;
   }
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index f188016610..6404d8bc6a 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -51,8 +51,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
   // Early return when MKL is disabled
-  if (DisableMKL())
-    return;
+  if (DisableMKL()) return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
@@ -118,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
+                       MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 44d5becb9c..489999d1e8 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -122,11 +122,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat) {
 
 #ifdef INTEL_MKL
 bool DisableMKL() {
-  enum MklStatus {
-    MKL_DEFAULT = 0,
-    MKL_ON = 1,
-    MKL_OFF = 2
-  };
+  enum MklStatus { MKL_DEFAULT = 0, MKL_ON = 1, MKL_OFF = 2 };
   static MklStatus status = MKL_DEFAULT;
   if (status == MKL_DEFAULT) {
     char* tf_disable_mkl = getenv("TF_DISABLE_MKL");
@@ -139,5 +135,5 @@ bool DisableMKL() {
   }
   return status == MKL_OFF ? true : false;
 }
-#endif
+#endif  // INTEL_MKL
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index ba90ad52c2..4aa47aa48a 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -59,7 +59,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat);
 // disable MKL in runtime
 #ifdef INTEL_MKL
 bool DisableMKL();
-#endif
+#endif  // INTEL_MKL
 
 }  // namespace tensorflow
 
-- 
GitLab


From 2b456a2b5dc6b5bb092b3986a400acb77b21a30f Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Sun, 30 Sep 2018 01:12:34 -0700
Subject: [PATCH 0134/1085] Added some minor format changes

---
 tensorflow/core/common_runtime/process_util.cc      | 6 +++---
 tensorflow/core/common_runtime/threadpool_device.cc | 6 +++---
 tensorflow/core/util/util.cc                        | 2 +-
 tensorflow/core/util/util.h                         | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index b3064a4c08..c75d8a8ce6 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -62,15 +62,15 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
     // Set inter_op conservatively to avoid thread oversubscription that could
     // lead to severe perf degradations and OMP resource exhaustion
     int mkl_intra_op = 1;
-  #ifdef _OPENMP
+#ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
-  #endif  // _OPENMP
+#endif  // _OPENMP
     CHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
     VLOG(0) << "Creating new thread pool with default inter op setting: "
             << mkl_inter_op
-            << ". Tune using inter_op_parallelism_threads for best performance.";
+            << ".Tune using inter_op_parallelism_threads for best performance.";
     return mkl_inter_op;
   }
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index f188016610..6404d8bc6a 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -51,8 +51,7 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
       scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {
 #ifdef INTEL_MKL
   // Early return when MKL is disabled
-  if (DisableMKL())
-    return;
+  if (DisableMKL()) return;
 #ifdef _OPENMP
   const char* user_omp_threads = getenv("OMP_NUM_THREADS");
   if (user_omp_threads == nullptr) {
@@ -118,7 +117,8 @@ class MklCPUAllocatorFactory : public AllocatorFactory {
 };
 
 #ifdef ENABLE_MKL
-REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200), MklCPUAllocatorFactory);
+REGISTER_MEM_ALLOCATOR("MklCPUAllocator", (DisableMKL() ? 50 : 200),
+                       MklCPUAllocatorFactory);
 #endif  // ENABLE_MKL
 
 }  // namespace
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 44d5becb9c..6e78777dd9 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -139,5 +139,5 @@ bool DisableMKL() {
   }
   return status == MKL_OFF ? true : false;
 }
-#endif
+#endif  // INTEL_MKL
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index ba90ad52c2..4aa47aa48a 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -59,7 +59,7 @@ string SliceDebugString(const TensorShape& shape, const int64 flat);
 // disable MKL in runtime
 #ifdef INTEL_MKL
 bool DisableMKL();
-#endif
+#endif  // INTEL_MKL
 
 }  // namespace tensorflow
 
-- 
GitLab


From a00fe72261cf6fe4a00467139e401de14c16224c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 30 Sep 2018 02:00:58 -0700
Subject: [PATCH 0135/1085] compat: Update forward compatibility horizon to
 2018-09-30

PiperOrigin-RevId: 215109054
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 24a795c787..1f7cfe48b3 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 29)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 30)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 4ecce5aa64587afe1cd07ee4c92bbb5ce2cf85df Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Sun, 30 Sep 2018 06:52:22 -0700
Subject: [PATCH 0136/1085] Removing the setuptools upper limit.

PiperOrigin-RevId: 215120867
---
 tensorflow/tools/pip_package/setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index b95e1f5c87..a9d8b0cff5 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -56,7 +56,6 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.6.0',
-    'setuptools <= 39.1.0',
     'tensorboard >= 1.11.0, < 1.12.0',
     'termcolor >= 1.1.0',
 ]
-- 
GitLab


From 5fa4e1ac928b0512b28e955c588c5a7eab2ea046 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 30 Sep 2018 11:57:45 -0700
Subject: [PATCH 0137/1085] Parallel_for: fix converters for some ops that
 don't support broadcasting.

PiperOrigin-RevId: 215133508
---
 tensorflow/python/ops/parallel_for/pfor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index e0f6d51881..83cbe64ff2 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@@ -1987,14 +1987,12 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs("Pow", math_ops.pow)
 @RegisterPForWithArgs("RealDiv", math_ops.divide)
 @RegisterPForWithArgs("Real", math_ops.real)
-@RegisterPForWithArgs("ReciprocalGrad", math_ops.reciprocal_grad)
 @RegisterPForWithArgs("Reciprocal", math_ops.reciprocal)
 @RegisterPForWithArgs("Relu6", nn_ops.relu6)
 @RegisterPForWithArgs("Relu", nn_ops.relu)
 @RegisterPForWithArgs("RightShift", bitwise_ops.right_shift)
 @RegisterPForWithArgs("Rint", math_ops.rint)
 @RegisterPForWithArgs("Round", math_ops.round)
-@RegisterPForWithArgs("RsqrtGrad", math_ops.rsqrt_grad)
 @RegisterPForWithArgs("Rsqrt", math_ops.rsqrt)
 @RegisterPForWithArgs("Selu", nn_ops.selu)
 @RegisterPForWithArgs("Sigmoid", math_ops.sigmoid)
@@ -2003,7 +2001,6 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs("Sin", math_ops.sin)
 @RegisterPForWithArgs("Softplus", nn_ops.softplus)
 @RegisterPForWithArgs("Softsign", nn_ops.softsign)
-@RegisterPForWithArgs("SqrtGrad", math_ops.sqrt_grad)
 @RegisterPForWithArgs("Sqrt", math_ops.sqrt)
 @RegisterPForWithArgs("SquaredDifference", math_ops.squared_difference)
 @RegisterPForWithArgs("Square", math_ops.square)
@@ -2095,6 +2092,9 @@ def _convert_biasaddgrad(pfor_input):
 @RegisterPForWithArgs("SoftplusGrad")
 @RegisterPForWithArgs("SoftsignGrad")
 @RegisterPForWithArgs("TanhGrad")
+@RegisterPForWithArgs("SqrtGrad")
+@RegisterPForWithArgs("RsqrtGrad")
+@RegisterPForWithArgs("ReciprocalGrad")
 def _convert_grads(pfor_input, op_type, *args, **kw_args):
   del args
   del kw_args
-- 
GitLab


From 76c4853b50f201b4a809ac66746c798e049b294c Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sun, 30 Sep 2018 20:03:29 -0700
Subject: [PATCH 0138/1085] Bump the version of protobuf TF pip package depends
 on.

Fixes #21719

PiperOrigin-RevId: 215154273
---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a9d8b0cff5..88c9c20d36 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -55,7 +55,7 @@ REQUIRED_PACKAGES = [
     'keras_preprocessing >= 1.0.3',
     'numpy >= 1.13.3',
     'six >= 1.10.0',
-    'protobuf >= 3.6.0',
+    'protobuf >= 3.6.1',
     'tensorboard >= 1.11.0, < 1.12.0',
     'termcolor >= 1.1.0',
 ]
-- 
GitLab


From b797bfb750504e03a38a988c44e3c52e902e87c4 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Sun, 30 Sep 2018 22:34:28 -0700
Subject: [PATCH 0139/1085] [HloOrdering] Make parameter always defined before
 other instructions.

- Make parameter always defined before other instructions.
- Add extra indentations to the predecessor field in ToString() method to make it clear.

PiperOrigin-RevId: 215162840
---
 .../compiler/xla/service/hlo_ordering.cc      | 10 +++++++---
 .../compiler/xla/service/hlo_ordering_test.cc | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc
index f1dc08bafa..23d41d91d6 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering.cc
+++ b/tensorflow/compiler/xla/service/hlo_ordering.cc
@@ -92,14 +92,18 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a,
 }
 
 bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const {
-  // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b'
-  // is live into the module.
+  // Entry parameter should always be defined before other instructions.
   const HloModule* module = b.defining_instruction()->parent()->parent();
   if (b.defining_instruction()->parent() == module->entry_computation() &&
       b.defining_instruction()->opcode() == HloOpcode::kParameter) {
     return false;
   }
 
+  if (a.defining_instruction()->parent() == module->entry_computation() &&
+      a.defining_instruction()->opcode() == HloOpcode::kParameter) {
+    return true;
+  }
+
   // Phi values require special handling. Because XLA does not have a phi
   // instruction, the definition instruction of the phis values are
   // placeholders: either the subcomputation parameter (body or condition) or
@@ -316,7 +320,7 @@ string PredecessorHloOrdering::ToStringHelper(const string& name) const {
       for (auto predecessor : all) {
         if (predecessors_.at(computation)
                 ->IsReachable(predecessor, instruction)) {
-          pieces.push_back(absl::StrFormat("  %s", predecessor->name()));
+          pieces.push_back(absl::StrFormat("    %s", predecessor->name()));
         }
       }
     }
diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
index 00970bcda3..b045adc964 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc
@@ -174,6 +174,26 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) {
   EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param));
 }
 
+TEST_F(HloOrderingTest, ParametersDefinedBeforeOthers) {
+  // Entry parameter should always be defined before other instruction.
+  auto module = CreateNewModule();
+  const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {});
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1.0)));
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param"));
+  module->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(auto dataflow,
+                          HloDataflowAnalysis::Run(*module, /*ssa_form=*/true));
+
+  DependencyHloOrdering ordering(module.get());
+  EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(param),
+                                       dataflow->GetValueDefinedAt(constant)));
+  EXPECT_TRUE(!ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant),
+                                        dataflow->GetValueDefinedAt(param)));
+}
+
 TEST_F(HloOrderingTest, ValuesInWhileComputations) {
   // Tests the ordering of values (defined by dataflow analysis) in the body and
   // condition of a while instruction. HLO code:
-- 
GitLab


From 03c5f9cdce62f6711b91fe81505e3c085e54a771 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 02:03:50 -0700
Subject: [PATCH 0140/1085] compat: Update forward compatibility horizon to
 2018-10-01

PiperOrigin-RevId: 215179315
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1f7cfe48b3..bea5aa990f 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 30)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 1)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 0fd21d8c34e15bc3013e93014d101b672e1f3687 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 02:41:01 -0700
Subject: [PATCH 0141/1085] [TF:XLA] Teach deadness analysis more of
 distributive property.

PiperOrigin-RevId: 215183847
---
 tensorflow/compiler/jit/deadness_analysis.cc  | 107 ++++++++++++++----
 .../compiler/jit/deadness_analysis_test.cc    |  31 ++++-
 2 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index 9128b48da3..25e2e9a7af 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/jit/deadness_analysis.h"
+#include "absl/algorithm/container.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -383,6 +384,8 @@ class PredicateFactory {
   }
 
   Predicate* MakeAndOrImpl(absl::Span<Predicate* const> operands, bool is_and);
+  Predicate* MakeInternedAndOr(std::vector<Predicate*> simplified_ops,
+                               Predicate::Kind pred_kind);
 
   // Predicate instances are interned, meaning that there is only a single
   // instance of a Predicate object with a given content.  This makes checking
@@ -429,11 +432,40 @@ class PredicateFactory {
       interned_symbol_instances_;
 };
 
+Predicate* PredicateFactory::MakeInternedAndOr(
+    std::vector<Predicate*> simplified_ops, Predicate::Kind pred_kind) {
+  std::stable_sort(
+      simplified_ops.begin(), simplified_ops.end(),
+      [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); });
+
+  auto it = interned_and_or_instances_.find({pred_kind, simplified_ops});
+  if (it != interned_and_or_instances_.end()) {
+    return it->second.get();
+  }
+
+  simplified_ops.shrink_to_fit();
+  // NB!  Because we'll use a non-owning reference to simplified_ops in the
+  // key for interned_and_or_instances_ we need to be careful to std::move()
+  // it all the way through.
+  absl::Span<Predicate* const> operands_slice = simplified_ops;
+  std::unique_ptr<Predicate> new_pred =
+      pred_kind == Predicate::Kind::kAnd
+          ? Make<AndPredicate>(std::move(simplified_ops))
+          : Make<OrPredicate>(std::move(simplified_ops));
+
+  Predicate* new_pred_ptr = new_pred.get();
+  interned_and_or_instances_.emplace(
+      SignatureForAndOr(pred_kind, operands_slice), std::move(new_pred));
+  return new_pred_ptr;
+}
+
 // Common code to create AndPredicate or OrPredicate instances.
 Predicate* PredicateFactory::MakeAndOrImpl(
     absl::Span<Predicate* const> operands, bool is_and) {
   Predicate::Kind pred_kind =
       is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr;
+  Predicate::Kind other_pred_kind =
+      is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd;
   gtl::FlatSet<Predicate*> simplified_ops_set;
   std::vector<Predicate*> simplified_ops;
   for (Predicate* op : operands) {
@@ -472,30 +504,63 @@ Predicate* PredicateFactory::MakeAndOrImpl(
     }
   }
 
-  std::stable_sort(
-      simplified_ops.begin(), simplified_ops.end(),
-      [](Predicate* a, Predicate* b) { return a->hash() < b->hash(); });
+  // If all ops contain the same subop, then factor it out thanks to the
+  // distributive property. Such as:
+  // - (A & B) | (A & C) | (A & D) => A & (B | C | D)
+  // - (A | B) & (A | C) & (A | D) => A | (B & C & D)
+  //
+  // First find any predicates contained in all subops.
+  std::vector<Predicate*> common_inner_operands;
+  gtl::FlatSet<Predicate*> common_inner_operands_set;
+  for (Predicate* op : simplified_ops) {
+    if (op->kind() != other_pred_kind) {
+      common_inner_operands.clear();
+      break;
+    }
 
-  auto it = interned_and_or_instances_.find({pred_kind, simplified_ops});
-  if (it == interned_and_or_instances_.end()) {
-    simplified_ops.shrink_to_fit();
-    // NB!  Because we'll use a non-owning reference to simplified_ops in the
-    // key for interned_and_or_instances_ we need to be careful to std::move()
-    // it all the way through.
-    absl::Span<Predicate* const> operands_slice = simplified_ops;
-    std::unique_ptr<Predicate> new_pred =
-        is_and ? Make<AndPredicate>(std::move(simplified_ops))
-               : Make<OrPredicate>(std::move(simplified_ops));
+    if (common_inner_operands.empty()) {
+      common_inner_operands.insert(common_inner_operands.end(),
+                                   op->GetOperands().begin(),
+                                   op->GetOperands().end());
+    } else {
+      std::vector<Predicate*> sub_ops_intersection;
+      common_inner_operands.clear();
+      absl::c_copy_if(op->GetOperands(),
+                      std::back_inserter(common_inner_operands),
+                      [&](Predicate* sub_op) {
+                        return common_inner_operands_set.count(sub_op) == 1;
+                      });
+    }
+    if (common_inner_operands.empty()) break;
+    common_inner_operands_set.clear();
+    common_inner_operands_set.insert(common_inner_operands.begin(),
+                                     common_inner_operands.end());
+  }
 
-    Predicate* new_pred_ptr = new_pred.get();
-    CHECK(interned_and_or_instances_
-              .emplace(SignatureForAndOr(pred_kind, operands_slice),
-                       std::move(new_pred))
-              .second);
-    return new_pred_ptr;
-  } else {
-    return it->second.get();
+  if (common_inner_operands.empty()) {
+    return MakeInternedAndOr(std::move(simplified_ops), pred_kind);
   }
+
+  // For all predicates that can be factored out, remove them and recreate the
+  // subops.
+  std::vector<Predicate*> factored_ops;
+  for (Predicate* op : simplified_ops) {
+    std::vector<Predicate*> new_sub_op_ops;
+    absl::c_copy_if(op->GetOperands(), std::back_inserter(new_sub_op_ops),
+                    [&](Predicate* sub_op) {
+                      return std::find(common_inner_operands.begin(),
+                                       common_inner_operands.end(),
+                                       sub_op) == common_inner_operands.end();
+                    });
+    factored_ops.push_back(MakeAndOrImpl(new_sub_op_ops, !is_and));
+  }
+
+  Predicate* new_inner_op = MakeAndOrImpl(factored_ops, is_and);
+  std::vector<Predicate*> outer_ops;
+  outer_ops.push_back(new_inner_op);
+  outer_ops.insert(outer_ops.end(), common_inner_operands.begin(),
+                   common_inner_operands.end());
+  return MakeAndOrImpl(outer_ops, !is_and);
 }
 
 class DeadnessAnalysisImpl : public DeadnessAnalysis {
diff --git a/tensorflow/compiler/jit/deadness_analysis_test.cc b/tensorflow/compiler/jit/deadness_analysis_test.cc
index 28a56044d5..617e31488c 100644
--- a/tensorflow/compiler/jit/deadness_analysis_test.cc
+++ b/tensorflow/compiler/jit/deadness_analysis_test.cc
@@ -384,10 +384,31 @@ TEST(DeadnessAnalysisTest, OrOfAnd) {
   EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add2.node()));
 }
 
-TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) {
-  // This demonstrates one of the weaknesses in the current approach -- since we
-  // only do some basic simplifications we can't see that "(A|B)&C" ==
-  // "(A&C)|(B&C)".
+TEST(DeadnessAnalysisTest, AndOrDistributiveSimplified) {
+  // (*A | (~*A & ((~*B & ~*A) | (~*A & *B)))) == #true
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  ops::Switch sw_0 = CreateSwitch(root, "A");
+  ops::Switch sw_1 = CreateSwitch(root, "B");
+  Output add0 =
+      ops::Add(root.WithOpName("and0"), sw_0.output_false, sw_1.output_true);
+  Output add1 =
+      ops::Add(root.WithOpName("and1"), sw_0.output_false, sw_1.output_false);
+  ops::Merge or2(root.WithOpName("or2"), {add0, add1});
+  Output add3 =
+      ops::Add(root.WithOpName("and3"), or2.output, sw_0.output_false);
+  ops::Merge or4(root.WithOpName("or4"), {add3, sw_0.output_true});
+
+  std::unique_ptr<DeadnessAnalysis> result;
+  TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result));
+
+  PredicateMapTy predicate_map;
+  TF_ASSERT_OK(ComputePredicates(*root.graph(), &predicate_map));
+  EXPECT_EQ(predicate_map[ControlOutputFor(or4.output)], "#true");
+}
+
+TEST(DeadnessAnalysisTest, AndOrDistributive) {
+  // (A|B)&C == (A&C)|(B&C)
   Scope root = Scope::NewRootScope().ExitOnError();
 
   ops::Switch sw_0 = CreateSwitch(root, "0");
@@ -408,7 +429,7 @@ TEST(DeadnessAnalysisTest, NEGATIVE_AndOrDistributive) {
   std::unique_ptr<DeadnessAnalysis> result;
   TF_ASSERT_OK(AnalyzeDeadness(root.graph(), &result));
 
-  EXPECT_TRUE(result->HasInputsWithMismatchingDeadness(*add2.node()));
+  EXPECT_FALSE(result->HasInputsWithMismatchingDeadness(*add3.node()));
 }
 
 TEST(DeadnessAnalysisTest, Ternary) {
-- 
GitLab


From c1c63c936c4bc51b401b82fbe54ed1945f49a314 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 03:27:05 -0700
Subject: [PATCH 0142/1085] Moves the creation of regularizer ops in
 get_variable out of surrounding context.

This resembles the behaviour for initializer ops.

PiperOrigin-RevId: 215187942
---
 tensorflow/python/ops/variable_scope.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index af5c7d4050..5032ca79f9 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -939,7 +939,8 @@ class _VariableStore(object):
     if regularizer:
       with ops.colocate_with(v):
         with ops.name_scope(name + "/Regularizer/"):
-          loss = regularizer(v)
+          with ops.init_scope():
+            loss = regularizer(v)
         if loss is not None:
           if context.executing_eagerly():
             v_name = "v_%s" % type(v)
-- 
GitLab


From 9a169bf3ba840af8ab3caae7ea1c69c682be3ab7 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 1 Oct 2018 03:34:35 -0700
Subject: [PATCH 0143/1085] Add allowed optimizations to GrapplerItem.

(1) Skip UnaryOpComposition rewrite if the optimized graph needs to have a gradient registered for all nodes.

PiperOrigin-RevId: 215188461
---
 tensorflow/core/grappler/grappler_item.cc     |   1 +
 tensorflow/core/grappler/grappler_item.h      |   9 ++
 tensorflow/core/grappler/op_types.cc          |   4 +
 tensorflow/core/grappler/op_types.h           |   1 +
 tensorflow/core/grappler/optimizers/BUILD     |   2 +
 .../optimizers/arithmetic_optimizer.cc        |   4 +
 .../grappler/optimizers/meta_optimizer.cc     |  19 +++
 .../optimizers/meta_optimizer_test.cc         | 126 ++++++++++++++++++
 8 files changed, 166 insertions(+)

diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc
index bbc0fedd22..2c490f3966 100644
--- a/tensorflow/core/grappler/grappler_item.cc
+++ b/tensorflow/core/grappler/grappler_item.cc
@@ -38,6 +38,7 @@ GrapplerItem::GrapplerItem(const GrapplerItem& other, GraphDef* graph_def) {
   restore_op = other.restore_op;
   save_restore_loc_tensor = other.save_restore_loc_tensor;
   queue_runners = other.queue_runners;
+  allowed_optimizations = other.allowed_optimizations;
   graph.Swap(graph_def);
 }
 
diff --git a/tensorflow/core/grappler/grappler_item.h b/tensorflow/core/grappler/grappler_item.h
index 939e5fa046..a0748abfe6 100644
--- a/tensorflow/core/grappler/grappler_item.h
+++ b/tensorflow/core/grappler/grappler_item.h
@@ -77,6 +77,15 @@ struct GrapplerItem {
   // Return a set of node names that must be preserved. This includes feed and
   // fetch nodes, keep_ops, init_ops.
   std::unordered_set<string> NodesToPreserve() const;
+
+  // Restrict types of optimizations that are allowed for this GrapplerItem.
+  struct AllowedOptimizations {
+    // Is it allowed to add nodes to the graph that do not have registered
+    // gradient function.
+    bool non_differentiable_rewrites = true;
+  };
+
+  AllowedOptimizations allowed_optimizations;
 };
 
 // Return the transitive fanin of a set of terminal nodes.
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 3521669b63..9f0d9dbf28 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -425,6 +425,10 @@ bool IsSwitch(const NodeDef& node) {
   return op == "Switch" || op == "RefSwitch";
 }
 
+bool IsSymbolicGradient(const NodeDef& node) {
+  return node.op() == "SymbolicGradient";
+}
+
 bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
 
 bool IsTile(const NodeDef& node) { return node.op() == "Tile"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 25ab6b65ac..7f86a5f295 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -149,6 +149,7 @@ bool IsStridedSliceGrad(const NodeDef& node);
 bool IsSub(const NodeDef& node);
 bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
+bool IsSymbolicGradient(const NodeDef& node);
 bool IsTanhGrad(const NodeDef& node);
 bool IsTile(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 960d1addb3..c708f84948 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -525,6 +525,7 @@ cc_library(
         "//tensorflow/core:core_cpu_base",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/utils:colocation",
@@ -541,6 +542,7 @@ tf_cuda_cc_test(
         ":custom_graph_optimizer_registry",
         ":meta_optimizer",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
         "//tensorflow/core:test",
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 3388ee8035..7d5014ee0a 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3249,6 +3249,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   optimized_graph_ = &optimized_item.graph;
   node_map_.reset(new NodeMap(optimized_graph_));
 
+  // Disable restricted graph rewrites.
+  options_.unary_ops_composition &=
+      item.allowed_optimizations.non_differentiable_rewrites;
+
   if (options_.dedup_computations) {
     DedupComputations();
   }
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 406c1b60ce..a5f851fb1a 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -413,6 +414,15 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   FunctionLibraryDefinition flib(OpRegistry::Global(),
                                  optimized_graph->library());
 
+  // Find functions for which we might need to compute a gradient at runtime.
+  gtl::FlatSet<string> differentiable_functions;
+  for (const NodeDef& node : optimized_graph->node()) {
+    if (IsSymbolicGradient(node)) {
+      const auto* f_attr = gtl::FindOrNull(node.attr(), "f");
+      if (f_attr) differentiable_functions.insert(f_attr->func().name());
+    }
+  }
+
   // Optimize each function only once.
   std::unordered_set<string> optimized_funcs;
   bool optimize_function_library = true;
@@ -428,6 +438,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
       // Skip parametrized functions (function type or body is defined only at
       // function call time by caller node attributes).
+      // They should be specialized to their instantiation type parameters by
+      // the function optimizer, before we can optimize function body.
       if (IsParametrized(func)) continue;
 
       VLOG(3) << "Optimize function: function=" << func_name;
@@ -442,6 +454,13 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       TF_RETURN_IF_ERROR(MakeGrapplerFunctionItem(
           func, flib, item.graph.versions().producer(), &func_item));
 
+      // If we need to compute the gradient of optimized function at runtime, we
+      // can't perform non-differentiable rewrites.
+      if (differentiable_functions.find(func_name) !=
+          differentiable_functions.end()) {
+        func_item.allowed_optimizations.non_differentiable_rewrites = false;
+      }
+
       // Optimize function body graph.
       GraphDef optimized_func_graph;
       TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index c477c4d4b1..3f3f43382f 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/grappler_test.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -82,6 +83,48 @@ class TestOptimizerWithParams : public TestOptimizer {
 
 REGISTER_GRAPH_OPTIMIZER(TestOptimizerWithParams);
 
+// Record various properties of the GrapplerItems passed for optimization.
+class GrapplerItemPropertiesAccumulator : public CustomGraphOptimizer {
+ public:
+  static void SetAllowedOptimizations(
+      gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+          allowed_optimizations) {
+    allowed_optimizations_ = allowed_optimizations;
+  }
+  static void ResetAllowedOptimizations() { allowed_optimizations_ = nullptr; }
+
+  GrapplerItemPropertiesAccumulator() {}
+  string name() const override {
+    return "grappler_item_properties_accumulator";
+  }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override {
+    *optimized_graph = item.graph;
+    if (allowed_optimizations_) {
+      allowed_optimizations_->insert({item.id, item.allowed_optimizations});
+    }
+    return Status::OK();
+  }
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+
+ private:
+  static gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+      allowed_optimizations_;
+};
+
+gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>*
+    GrapplerItemPropertiesAccumulator::allowed_optimizations_;
+
+REGISTER_GRAPH_OPTIMIZER(GrapplerItemPropertiesAccumulator);
+
 class MetaOptimizerTest : public GrapplerTest {};
 
 TEST_F(MetaOptimizerTest, RunsCustomOptimizer) {
@@ -335,6 +378,89 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) {
   test::ExpectTensorEqual<int>(tensors_expected[1], tensors[1]);
 }
 
+TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
+  using test::function::NDef;
+  using FDH = FunctionDefHelper;
+
+  // We will record what type of optimizations meta optimizer allows for each
+  // GrapplerItem (main graph and graphs for each function).
+  gtl::FlatMap<string, GrapplerItem::AllowedOptimizations>
+      allowed_optimizations;
+  GrapplerItemPropertiesAccumulator::SetAllowedOptimizations(
+      &allowed_optimizations);
+
+  // Just record properties of optimized Grappler items.
+  RewriterConfig rewriter_config;
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+  rewriter_config.add_optimizers("GrapplerItemPropertiesAccumulator");
+  rewriter_config.set_min_graph_nodes(-1);
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+
+  // Define simple function library with two identical mul functions.
+  FunctionDef mul_func_1 = FunctionDefHelper::Create(
+      "MyMul1", {"x:float", "y:float"}, {"z:float"}, {},
+      {{{"mul"}, "Mul", {"x", "y"}, {}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "mul:z:0"}});
+
+  FunctionDef mul_func_2 = FunctionDefHelper::Create(
+      "MyMul2", {"x:float", "y:float"}, {"z:float"}, {},
+      {{{"mul"}, "Mul", {"x", "y"}, {}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z", "mul:z:0"}});
+
+  // Tensorflow graph:
+  //
+  //   x0 = tf.Placeholder(tf.float);
+  //   x1 = tf.Placeholder(tf.float);
+  //   dy = tf.Placeholder(tf.float);
+  //
+  //   mul_1 = MyMul1(x0, x1);
+  //   mul_2 = MyMul2(x0, x1);
+  //   dx = SymbolicGradient({x0, x1, dy}, f=MyMul2)
+  GrapplerItem item;
+  item.id = "main";
+  item.graph = test::function::GDef(
+      {NDef("x0", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("x1", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("dy", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       // Calls into function library
+       NDef("mul_1", "MyMul1", {"x0", "x1"}, {}, kDevice),
+       NDef("mul_2", "MyMul2", {"x0", "x1"}, {}, kDevice),
+       // Symbolic gradient of a MyMul2
+       NDef("dx", "SymbolicGradient", {"x0", "x1", "dy"},
+            {{"f", FDH::FunctionRef("MyMul2", {})},
+             {"Tin", DataTypeSlice{DT_FLOAT}},
+             {"Tout", DataTypeSlice{DT_FLOAT, DT_FLOAT}}},
+            kDevice)},
+      // FunctionLib
+      {mul_func_1, mul_func_2});
+  item.fetch = {"mul_1", "mul_2", "dx"};
+
+  GraphDef output;
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  // Our custom optimizer must be called for the main graph and for the two
+  // functions.
+  ASSERT_EQ(allowed_optimizations.size(), 3);
+
+  auto allowed_optimizations_main =
+      gtl::FindOrNull(allowed_optimizations, "main");
+  ASSERT_NE(allowed_optimizations_main, nullptr);
+  EXPECT_TRUE(allowed_optimizations_main->non_differentiable_rewrites);
+
+  auto allowed_optimizations_my_mul_1 =
+      gtl::FindOrNull(allowed_optimizations, "MyMul1");
+  ASSERT_NE(allowed_optimizations_my_mul_1, nullptr);
+  EXPECT_TRUE(allowed_optimizations_my_mul_1->non_differentiable_rewrites);
+
+  auto allowed_optimizations_my_mul_2 =
+      gtl::FindOrNull(allowed_optimizations, "MyMul2");
+  ASSERT_NE(allowed_optimizations_my_mul_2, nullptr);
+  EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From b73c5f80926de3b724a92a57cf0bc49aa7de37bd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 05:50:51 -0700
Subject: [PATCH 0144/1085] Automated rollback of commit
 3f4423fad57694bc8d7adc427d65e5a18c8592b2

PiperOrigin-RevId: 215200418
---
 .../contrib/tpu/ops/tpu_embedding_ops.cc      | 42 ++++++++++++++++---
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 6b0730b40c..5c27d59f82 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -103,10 +103,19 @@ Status RegisterPerTableLoadOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -138,9 +147,11 @@ parameters that are loaded from a checkpoint before a training loop is
 executed.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto.
+  EmbeddingLayerConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -149,10 +160,14 @@ shard_id: Identifier of shard for this operation.
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    if (table_name.empty()) {
-      return errors::InvalidArgument("table_name attribute must be set");
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
@@ -225,10 +240,19 @@ Status RegisterPerTableRetrieveOpsForAlgorithmBody(
       arg->set_type(DT_FLOAT);
     }
   }
+  {
+    auto* table_id_attr = op_def->add_attr();
+    table_id_attr->set_name("table_id");
+    table_id_attr->set_type("int");
+    table_id_attr->set_has_minimum(true);
+    table_id_attr->set_minimum(-1);
+    table_id_attr->mutable_default_value()->set_i(-1);
+  }
   {
     auto* table_name_attr = op_def->add_attr();
     table_name_attr->set_name("table_name");
     table_name_attr->set_type("string");
+    table_name_attr->mutable_default_value()->set_s("");
   }
   {
     auto* num_shards_attr = op_def->add_attr();
@@ -259,9 +283,11 @@ the correct embedding table configuration. For example, this op is
 used to retrieve updated parameters before saving a checkpoint.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto.
+  EmbeddingLayerConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
+table_id: Index of this table in the EmbeddingLayerConfiguration proto
+  (deprecated).
 )doc",
                                           parameter_descriptions.c_str()));
   op_def->set_is_commutative(false);
@@ -270,10 +296,14 @@ shard_id: Identifier of shard for this operation.
   auto shape_inference_function =
       [state_variable_specs,
        is_debug_op](shape_inference::InferenceContext* c) -> Status {
+    int table_id;
+    TF_RETURN_IF_ERROR(c->GetAttr("table_id", &table_id));
     string table_name;
     TF_RETURN_IF_ERROR(c->GetAttr("table_name", &table_name));
-    if (table_name.empty()) {
-      return errors::InvalidArgument("table_name must be non-empty");
+    // Exactly one must be non-default.
+    if ((table_id >= 0) == (!table_name.empty())) {
+      return errors::InvalidArgument(
+          "exactly one of table_id or table_name must be non-default");
     }
     int num_shards;
     TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards));
-- 
GitLab


From 7c5eb354a6b5b2d5a2e27d8ce3dc4861cb51153c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 07:15:23 -0700
Subject: [PATCH 0145/1085] In TensorFlow configure, write the
 .tf_configure.bazelrc into the --workspace path if provided.

This allows repositories that depend on TensorFlow to execute
'bazel run @org_tensorflow//:configure -- --workspace $(pwd)'
to configure TensorFlow.
END_PUBLIC

Before this change, the .tf_configure.bazelrc ended up in the bazel exec root, and 'bazel clean' would undo the configuration.

PiperOrigin-RevId: 215209207
---
 configure.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/configure.py b/configure.py
index 0a3b9a7894..796c6231e8 100644
--- a/configure.py
+++ b/configure.py
@@ -48,10 +48,9 @@ _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16]
 
 _DEFAULT_PROMPT_ASK_ATTEMPTS = 10
 
-_TF_WORKSPACE_ROOT = os.path.abspath(os.path.dirname(__file__))
 _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
-_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
-_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
+_TF_WORKSPACE_ROOT = ''
+_TF_BAZELRC = ''
 
 if platform.machine() == 'ppc64le':
   _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
@@ -243,10 +242,10 @@ def setup_python(environ_cp):
     f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
 
 
-def reset_tf_configure_bazelrc(workspace_path):
+def reset_tf_configure_bazelrc():
   """Reset file that contains customized config settings."""
   open(_TF_BAZELRC, 'w').close()
-  bazelrc_path = os.path.join(workspace_path, '.bazelrc')
+  bazelrc_path = os.path.join(_TF_WORKSPACE_ROOT, '.bazelrc')
 
   data = []
   if os.path.exists(bazelrc_path):
@@ -1469,21 +1468,27 @@ def config_info_line(name, help_text):
 
 
 def main():
+  global _TF_WORKSPACE_ROOT
+  global _TF_BAZELRC
+
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '--workspace',
       type=str,
-      default=_TF_WORKSPACE_ROOT,
+      default=os.path.abspath(os.path.dirname(__file__)),
       help='The absolute path to your active Bazel workspace.')
   args = parser.parse_args()
 
+  _TF_WORKSPACE_ROOT = args.workspace
+  _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
+
   # Make a copy of os.environ to be clear when functions and getting and setting
   # environment variables.
   environ_cp = dict(os.environ)
 
   check_bazel_version('0.15.0')
 
-  reset_tf_configure_bazelrc(args.workspace)
+  reset_tf_configure_bazelrc()
   cleanup_makefile()
   setup_python(environ_cp)
 
-- 
GitLab


From 9a2f872acd0c38d74d60e4f67701241aa1a26419 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 08:21:58 -0700
Subject: [PATCH 0146/1085] Move from deprecated self.test_session() to
 self.cached_session() or self.session().

* Move from self.test_session(graph=ops.Graph(), ...) to self.session(...) (semantically equivalent).
* Move from self.test_session() to self.cached_session(config=self.config) when run_in_graph_and_eager_modes(config=config) is set to be consistent between eager and non eager modes.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about:
* the fact that the session may be reused.
* the session is not closed even when doing a "with self.test_session()" statement.

PiperOrigin-RevId: 215216964
---
 tensorflow/contrib/distribute/python/values_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index ae3e134333..121d2fbb3f 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -641,7 +641,7 @@ class MirroredVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, devices, mirrored = _make_mirrored()
 
       # Overwrite the initial values.
@@ -744,7 +744,7 @@ class MirroredVariableTest(test.TestCase):
     if context.num_gpus() < 1 or context.executing_eagerly():
       self.skipTest("A GPU is not available for this test or it's eager mode.")
 
-    with self.test_session(
+    with self.session(
         graph=ops.Graph()) as sess, mirrored_strategy.MirroredStrategy(
             ["/device:GPU:0"]).scope():
       with ops.device("/device:GPU:0"):
@@ -827,7 +827,7 @@ class TowerLocalVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, tower_local = _make_tower_local(variable_scope.VariableAggregation.SUM)
 
       # Overwrite the initial values.
@@ -850,7 +850,7 @@ class TowerLocalVariableTest(test.TestCase):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
 
-    with self.test_session() as sess:
+    with self.cached_session(config=self.config) as sess:
       v, tower_local = _make_tower_local(
           variable_scope.VariableAggregation.MEAN)
 
-- 
GitLab


From e285dea8d9626b832f34d65159639f294c2d6881 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Mon, 1 Oct 2018 09:23:48 -0700
Subject: [PATCH 0147/1085] Update documentation. - Use absolute links instead
 of relative links. Relative links break when published on website. - Correct
 NNAPI abbreviation.

PiperOrigin-RevId: 215225415
---
 tensorflow/contrib/lite/g3doc/performance.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md
index 0ae9400068..6b7943caf8 100644
--- a/tensorflow/contrib/lite/g3doc/performance.md
+++ b/tensorflow/contrib/lite/g3doc/performance.md
@@ -7,12 +7,12 @@ Mobile and embedded devices have limited computational resources and it is impor
 Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
 
 You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for
-[image classification] (https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
+[image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
  [object detection](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
 
 
 ## Profile your model
-Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](../tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
+Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
 
 ## Profile and optimize operators in the graph
 If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator.
@@ -22,7 +22,7 @@ If a particular operator appears frequently in the model and based on profiling
 If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well.
 
 ## Tweak the number of threads
-Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](../interpreter.h) threads.
+Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads.
 
 ## Eliminate redundant copies
 Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
@@ -31,8 +31,8 @@ Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to
 Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform.
 
 ## Use hardware accelerators available on the device
-Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
-You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable NNAPI call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
+Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
+You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
 
 ## Need more help
 The Tensorflow team is happy to help diagnose and address specific performance issues you may be facing. Please file a bug on [github](https://github.com/tensorflow/tensorflow/issues) with details of the issue.
-- 
GitLab


From 03a18ca576410d49e8f0692464e35e900a54f59f Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 1 Oct 2018 10:01:20 -0700
Subject: [PATCH 0148/1085] Remove outdated integration test in preparation for
 update of keras_preprocessing.

PiperOrigin-RevId: 215231309
---
 .../python/keras/preprocessing/image_test.py  | 37 -------------------
 1 file changed, 37 deletions(-)

diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py
index 362cbc1dc9..4abaadfcd3 100644
--- a/tensorflow/python/keras/preprocessing/image_test.py
+++ b/tensorflow/python/keras/preprocessing/image_test.py
@@ -94,43 +94,6 @@ class TestImage(test.TestCase):
         self.assertEqual(x.shape[1:], images.shape[1:])
         break
 
-  def test_image_data_generator_with_validation_split(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    for test_images in _generate_test_images():
-      img_list = []
-      for im in test_images:
-        img_list.append(keras.preprocessing.image.img_to_array(im)[None, ...])
-
-      images = np.vstack(img_list)
-      generator = keras.preprocessing.image.ImageDataGenerator(
-          validation_split=0.5)
-      seq = generator.flow(
-          images,
-          np.arange(images.shape[0]),
-          shuffle=False,
-          batch_size=3,
-          subset='validation')
-      _, y = seq[0]
-      self.assertEqual(list(y), [0, 1, 2])
-      seq = generator.flow(
-          images,
-          np.arange(images.shape[0]),
-          shuffle=False,
-          batch_size=3,
-          subset='training')
-      _, y2 = seq[0]
-      self.assertEqual(list(y2), [4, 5, 6])
-
-      with self.assertRaises(ValueError):
-        generator.flow(
-            images,
-            np.arange(images.shape[0]),
-            shuffle=False,
-            batch_size=3,
-            subset='foo')
-
   def test_image_data_generator_with_split_value_error(self):
     with self.assertRaises(ValueError):
       keras.preprocessing.image.ImageDataGenerator(validation_split=5)
-- 
GitLab


From a5fc8b064884b926ade9f7973dc096c0677a14e0 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Mon, 1 Oct 2018 10:35:02 -0700
Subject: [PATCH 0149/1085] Name fusion parameters simply "param_X". Where "X"
 is the parameter number. Previously, fusion parameter names including the
 name of the original instruction which produced the value which was
 confusing.

PiperOrigin-RevId: 215238171
---
 .../compiler/xla/service/hlo_computation.cc   | 36 +++----------------
 .../compiler/xla/service/hlo_instructions.cc  |  3 +-
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 0e5920af7a..4613d6762e 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -122,30 +122,6 @@ HloInstruction* HloComputation::AddParameter(
   return instructions_.back().get();
 }
 
-namespace {
-
-// Returns the new name for a fusion parameter when we change its number.
-//
-// Fusion parameters are named foo.param_1, bar.param_2, etc. We are
-// renumbering the parameters, so replace the final number in the name with
-// the updated value.
-string RenameFusionParameter(const string& original_name, int64 new_param_no) {
-  const string param_underscore = ".param_";
-  size_t index = original_name.rfind(param_underscore);
-  if (index == string::npos) {
-    return original_name;
-  }
-  string after_param = original_name.substr(index + param_underscore.size());
-  int64 numeric_suffix;
-  if (absl::SimpleAtoi(after_param, &numeric_suffix)) {
-    return StrCat(original_name.substr(0, index + param_underscore.size()),
-                  new_param_no);
-  }
-  return original_name;
-}
-
-}  // namespace
-
 Status HloComputation::RemoveParameter(int64 param_no) {
   CHECK_GE(param_no, 0);
   CHECK_LT(param_no, param_instructions_.size());
@@ -158,11 +134,9 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
   while (param_no < param_instructions_.size()) {
     param_instruction = param_instructions_[param_no];
-    string param_name =
-        RenameFusionParameter(param_instruction->name(), param_no);
     HloInstruction* new_instr =
         AddInstructionInternal(HloInstruction::CreateParameter(
-            param_no, param_instruction->shape(), param_name));
+            param_no, param_instruction->shape(), StrCat("param_", param_no)));
     TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr));
     param_instructions_[param_no] = new_instr;
     TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction));
@@ -186,11 +160,9 @@ Status HloComputation::RemoveUnusedParameters() {
 
     if (removed > 0) {
       const int64 param_no = i - removed;
-      string param_name =
-          RenameFusionParameter(param_instruction->name(), param_no);
-      HloInstruction* new_instr =
-          AddInstructionInternal(HloInstruction::CreateParameter(
-              param_no, param_instruction->shape(), param_name));
+      HloInstruction* new_instr = AddInstructionInternal(
+          HloInstruction::CreateParameter(param_no, param_instruction->shape(),
+                                          StrCat("param_", param_no)));
       TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr));
       param_instructions_[param_no] = new_instr;
       TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction));
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index cd71bc3323..ad45a82941 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1042,7 +1042,8 @@ HloInstruction* HloFusionInstruction::AddFusionOperand(
   const int64 param_no = operand_count();
   // Name the parameter after the instruction it represents in the outer
   // (non-fusion) computation.
-  string param_name = StrCat(new_operand->name(), ".param_", param_no);
+  // string param_name = StrCat(new_operand->name(), ".param_", param_no);
+  string param_name = StrCat("param_", param_no);
   HloInstruction* fused_parameter =
       fused_instructions_computation()->AddParameter(
           HloInstruction::CreateParameter(param_no, new_operand->shape(),
-- 
GitLab


From a6478312ef296ba9684931135851e9c7bb460444 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 1 Oct 2018 10:36:07 -0700
Subject: [PATCH 0150/1085] Replace the tf.name_scope call with an internal
 context manager that can contain additional boilerplate later on.
 Unfortunately it could not be extended to include the error handling.

PiperOrigin-RevId: 215238369
---
 tensorflow/python/autograph/converters/BUILD  |  6 +--
 .../{name_scopes.py => function_scopes.py}    | 32 ++++++++-------
 ...scopes_test.py => function_scopes_test.py} | 40 +++++++++----------
 tensorflow/python/autograph/core/BUILD        | 12 ++++++
 .../autograph/core/converter_testing.py       |  2 +
 .../autograph/core/function_wrapping.py       | 30 ++++++++++++++
 .../autograph/core/function_wrapping_test.py  | 34 ++++++++++++++++
 .../python/autograph/impl/conversion.py       |  6 ++-
 8 files changed, 122 insertions(+), 40 deletions(-)
 rename tensorflow/python/autograph/converters/{name_scopes.py => function_scopes.py} (72%)
 rename tensorflow/python/autograph/converters/{name_scopes_test.py => function_scopes_test.py} (71%)
 create mode 100644 tensorflow/python/autograph/core/function_wrapping.py
 create mode 100644 tensorflow/python/autograph/core/function_wrapping_test.py

diff --git a/tensorflow/python/autograph/converters/BUILD b/tensorflow/python/autograph/converters/BUILD
index 7b029de8ed..f06dc78f0e 100644
--- a/tensorflow/python/autograph/converters/BUILD
+++ b/tensorflow/python/autograph/converters/BUILD
@@ -27,10 +27,10 @@ py_library(
         "decorators.py",
         "directives.py",
         "error_handlers.py",
+        "function_scopes.py",
         "list_comprehensions.py",
         "lists.py",
         "logical_expressions.py",
-        "name_scopes.py",
         "return_statements.py",
         "side_effect_guards.py",
         "slices.py",
@@ -157,8 +157,8 @@ py_test(
 )
 
 py_test(
-    name = "name_scopes_test",
-    srcs = ["name_scopes_test.py"],
+    name = "function_scopes_test",
+    srcs = ["function_scopes_test.py"],
     deps = [
         ":converters",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/autograph/converters/name_scopes.py b/tensorflow/python/autograph/converters/function_scopes.py
similarity index 72%
rename from tensorflow/python/autograph/converters/name_scopes.py
rename to tensorflow/python/autograph/converters/function_scopes.py
index a9c55ccff0..284b5b3519 100644
--- a/tensorflow/python/autograph/converters/name_scopes.py
+++ b/tensorflow/python/autograph/converters/function_scopes.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Wraps a function body with a `name_scope` of the function name."""
+"""Wraps the body of a converted function with auxiliary constructs."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,8 +24,8 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import templates
 
 
-class FunctionNameScopeTransformer(converter.Base):
-  """Wrap a function body with a `name_scope` of the function name."""
+class FunctionBodyTransformer(converter.Base):
+  """Wraps function bodies around autograph-specific boilerplate."""
 
   def _name_for_current_scope(self):
     innermost = self.enclosing_entities[-1]
@@ -49,26 +49,28 @@ class FunctionNameScopeTransformer(converter.Base):
   def visit_FunctionDef(self, node):
     node = self.generic_visit(node)
 
-    unscoped_body = []
-    scoped_body = node.body
-    if scoped_body:
-      first = scoped_body[0]
-      if isinstance(first, gast.Expr) and isinstance(first.value, gast.Str):
-        # Skip any docstring.
-        unscoped_body = scoped_body[:1]
-        scoped_body = scoped_body[1:]
+    final_body = []
+    indented_body = node.body
+    if node.body:
+      first_statement = node.body[0]
+      # Skip the docstring, if any.
+      if (isinstance(first_statement, gast.Expr) and
+          isinstance(first_statement.value, gast.Str)):
+        indented_body = indented_body[1:]
+        final_body.append(first_statement)
 
     template = """
-      with tf.name_scope(scope_name):
+      with ag__.function_scope(scope_name):
         body
     """
     scoped_body = templates.replace(
         template,
         scope_name=gast.Str(self._name_for_current_scope()),
-        body=scoped_body)
-    node.body = unscoped_body + scoped_body
+        body=indented_body)
+    final_body.extend(scoped_body)
+    node.body = final_body
     return node
 
 
 def transform(node, ctx):
-  return FunctionNameScopeTransformer(ctx).visit(node)
+  return FunctionBodyTransformer(ctx).visit(node)
diff --git a/tensorflow/python/autograph/converters/name_scopes_test.py b/tensorflow/python/autograph/converters/function_scopes_test.py
similarity index 71%
rename from tensorflow/python/autograph/converters/name_scopes_test.py
rename to tensorflow/python/autograph/converters/function_scopes_test.py
index 73933c1c4f..e5ce03a109 100644
--- a/tensorflow/python/autograph/converters/name_scopes_test.py
+++ b/tensorflow/python/autograph/converters/function_scopes_test.py
@@ -12,51 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for for_canonicalization module."""
+"""Tests for function_scopes module."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.autograph.converters import name_scopes
+from tensorflow.python.autograph.converters import function_scopes
 from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class FunctionNameScopeTransformer(converter_testing.TestCase):
+class FunctionBodyTransformerTest(converter_testing.TestCase):
 
   def test_basic(self):
 
     def test_fn(l):
-      """This should stay here."""
+      """Docstring."""
       a = 1
       l += a
       return l
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
+    with self.converted(test_fn, function_scopes, {}) as result:
       result_op = result.test_fn(constant_op.constant(1))
       self.assertIn('test_fn/', result_op.op.name)
-      self.assertEqual('This should stay here.', result.test_fn.__doc__)
+      self.assertEqual('Docstring.', result.test_fn.__doc__)
 
-  def test_long_docstring(self):
+  def test_multiline_docstring(self):
 
-    def test_fn(l):
-      """Multi-line docstring.
+    tf = None
+
+    def test_fn():
+      """First sentence.
 
-      Args:
-        l: A thing.
-      Returns:
-        l
+      Second sentence.
       """
-      return l + 1
+      return tf.constant(1)
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
-      result_op = result.test_fn(constant_op.constant(1))
+    with self.converted(test_fn, function_scopes, {},
+                        constant_op.constant) as result:
+      result_op = result.test_fn()
       self.assertIn('test_fn/', result_op.op.name)
-      self.assertIn('Multi-line docstring.', result.test_fn.__doc__)
-      self.assertIn('Returns:', result.test_fn.__doc__)
+      self.assertIn('First sentence.', result.test_fn.__doc__)
+      self.assertIn('Second sentence.', result.test_fn.__doc__)
 
   def test_nested_functions(self):
 
@@ -68,7 +68,7 @@ class FunctionNameScopeTransformer(converter_testing.TestCase):
       l += 1
       return l, inner_fn(l)
 
-    with self.converted(test_fn, name_scopes, {}, ops.name_scope) as result:
+    with self.converted(test_fn, function_scopes, {}, ops.name_scope) as result:
       first, second = result.test_fn(constant_op.constant(1))
       self.assertIn('test_fn/', first.op.name)
       self.assertNotIn('inner_fn', first.op.name)
@@ -88,7 +88,7 @@ class FunctionNameScopeTransformer(converter_testing.TestCase):
 
     ns = {'TestClass': TestClass}
     node, ctx = self.prepare(TestClass, ns, owner_type=TestClass)
-    node = name_scopes.transform(node, ctx)
+    node = function_scopes.transform(node, ctx)
 
     with self.compiled(node, {}, ops.name_scope) as result:
       first, second = result.TestClass().test_fn(constant_op.constant(1))
diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 85fecf084d..843e381f31 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -20,11 +20,13 @@ py_library(
         "config.py",
         "converter.py",
         "errors.py",
+        "function_wrapping.py",
         "naming.py",
     ],
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python/autograph/pyct",
         "//tensorflow/python/autograph/pyct/static_analysis",
         "//tensorflow/python/autograph/utils",
@@ -46,6 +48,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "function_wrapping_test",
+    srcs = ["function_wrapping_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":core",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "naming_test",
     srcs = ["naming_test.py"],
diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index 7ce1b7c4c5..dc2d419d34 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -29,6 +29,7 @@ from tensorflow.python.autograph import utils
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import errors
+from tensorflow.python.autograph.core import function_wrapping
 from tensorflow.python.autograph.pyct import compiler
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import pretty_printer
@@ -112,6 +113,7 @@ class TestCase(test.TestCase):
       fake_ag.__dict__['utils'] = utils
       fake_ag.__dict__['rewrite_graph_construction_error'] = (
           errors.rewrite_graph_construction_error)
+      fake_ag.__dict__['function_scope'] = function_wrapping.function_scope
       result.__dict__['ag__'] = fake_ag
       for k, v in namespace.items():
         result.__dict__[k] = v
diff --git a/tensorflow/python/autograph/core/function_wrapping.py b/tensorflow/python/autograph/core/function_wrapping.py
new file mode 100644
index 0000000000..21b66eff02
--- /dev/null
+++ b/tensorflow/python/autograph/core/function_wrapping.py
@@ -0,0 +1,30 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Support for wrapping converted functions bodies with auxiliary logic."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+from tensorflow.python.framework import ops
+
+
+@contextlib.contextmanager
+def function_scope(function_name):
+  """Returns a context manager for the converted body of a function."""
+  with ops.name_scope(function_name):
+    yield
diff --git a/tensorflow/python/autograph/core/function_wrapping_test.py b/tensorflow/python/autograph/core/function_wrapping_test.py
new file mode 100644
index 0000000000..5e217055c7
--- /dev/null
+++ b/tensorflow/python/autograph/core/function_wrapping_test.py
@@ -0,0 +1,34 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for function_wrapping module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.core import function_wrapping
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+
+
+class FunctionWrappingTest(test.TestCase):
+
+  def test_function_scope_name(self):
+    with function_wrapping.function_scope('test_name'):
+      t = constant_op.constant(1)
+    self.assertIn('test_name', t.name)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index a0d13c82a8..52abd40626 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -34,15 +34,16 @@ from tensorflow.python.autograph.converters import control_flow
 from tensorflow.python.autograph.converters import decorators
 from tensorflow.python.autograph.converters import directives
 from tensorflow.python.autograph.converters import error_handlers
+from tensorflow.python.autograph.converters import function_scopes
 from tensorflow.python.autograph.converters import lists
 from tensorflow.python.autograph.converters import logical_expressions
-from tensorflow.python.autograph.converters import name_scopes
 from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.converters import side_effect_guards
 from tensorflow.python.autograph.converters import slices
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import errors
+from tensorflow.python.autograph.core import function_wrapping
 from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import origin_info
@@ -257,6 +258,7 @@ def _add_self_references(namespace, autograph_module):
     ag_internal.converted_call = autograph_module.converted_call
     ag_internal.ConversionOptions = autograph_module.ConversionOptions
     ag_internal.utils = utils
+    ag_internal.function_scope = function_wrapping.function_scope
     ag_internal.rewrite_graph_construction_error = (
         errors.rewrite_graph_construction_error)
     # TODO(mdan): Add safeguards against name clashes.
@@ -346,7 +348,7 @@ def node_to_graph(node, context, rewrite_errors=True):
   node = converter.apply_(node, context, conditional_expressions)
   node = converter.apply_(node, context, logical_expressions)
   node = converter.apply_(node, context, side_effect_guards)
-  node = converter.apply_(node, context, name_scopes)
+  node = converter.apply_(node, context, function_scopes)
   if rewrite_errors:
     node = converter.apply_(node, context, error_handlers)
   return node
-- 
GitLab


From 57a831d20929e71279d164905fed93e1f518ee37 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 10:41:58 -0700
Subject: [PATCH 0151/1085] Bugfix: When a subgraph is encapsulated and
 replaced by XlaLaunch op, the requested device placement of the XlaLaunch op
 must be derived from the subgraph. PiperOrigin-RevId: 215239672

---
 tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc    | 6 ++++++
 .../compiler/jit/encapsulate_xla_computations_pass.cc    | 2 ++
 .../jit/encapsulate_xla_computations_pass_test.cc        | 9 ++++++---
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index e0632ff7e4..15faf31077 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -748,6 +748,12 @@ Node* Encapsulator::Subgraph::MakeNodeImage(const Graph* graph_in, Node* node) {
     graph_->set_versions(graph_in->versions());
   }
 
+  // TODO(b/116981129): Enhance how the device for the encapsulated subgraph is
+  // determined. In case of hard placement, ensure all the encapsulated nodes
+  // have the same requested device, which in turn will be the requested device
+  // for the entire encapsulated subgraph. In case of soft placement, use a
+  // deterministic approach to fill in the requested device. Handle co-location
+  // constraints similarly if they exist.
   if (device_.empty()) {
     device_ = node->assigned_device_name().empty()
                   ? node->requested_device()
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 97ef8cd3cb..755c364c62 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -297,7 +297,9 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Target the XLA CPU/GPU backends.
     VLOG(2) << "Replacing with XlaLaunch";
+    VLOG(2) << "Device is " << launch->requested_device();
     def.set_op("XlaLaunch");
+    def.set_device(launch->requested_device());
     AddNodeAttr("Tconstants", DataTypeVector{}, &def);
     AddNodeAttr("Targs", arg_types, &def);
     AddNodeAttr("Nresources", num_variables, &def);
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
index f643fb0cfe..479038ac8e 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -55,6 +55,7 @@ static std::unique_ptr<Graph> MakeOuterGraph(
           .Input(u.node()->name(), 0, DT_RESOURCE)
           .Input(v.node()->name(), 0, DT_RESOURCE)
           .Input(w.node()->name(), 0, DT_RESOURCE)
+          .Device("/gpu:0")
           .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0")
           .Attr("_variable_start_index", 4)
           .Finalize(&def));
@@ -107,10 +108,11 @@ static std::unique_ptr<Graph> MakeBodyGraph() {
 
   auto add_attrs = [](Node* node) {
     node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+    node->set_requested_device("/gpu:0");
   };
 
   auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1);
-
+  add_attrs(b_identity.node());
   auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT);
   add_attrs(read_u.node());
   auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT);
@@ -215,6 +217,7 @@ TEST(EncapsulateXlaComputations, Encapsulate) {
 
     auto add_attrs = [](Node* node) {
       node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+      node->set_requested_device("/gpu:0");
     };
 
     auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b);
@@ -317,8 +320,8 @@ TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) {
   NameAttrList function;
   function.set_name("launch0");
   auto launch = ops::XlaLaunch(
-      scope.WithOpName("launch0"), std::initializer_list<Input>{},
-      std::initializer_list<Input>{a, b, c, d},
+      scope.WithOpName("launch0").WithDevice("/gpu:0"),
+      std::initializer_list<Input>{}, std::initializer_list<Input>{a, b, c, d},
       std::initializer_list<Input>{u, v, w},
       DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function);
 
-- 
GitLab


From ec2b5f889fb3eb677f7b8198cbd8d505b2779fa7 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 10:42:14 -0700
Subject: [PATCH 0152/1085] Automated rollback of commit
 5f822d694af6e4aa57fe8a426032a91dc61e30d6

PiperOrigin-RevId: 215239710
---
 tensorflow/contrib/factorization/BUILD           |  9 +--------
 .../contrib/factorization/python/ops/gmm_ops.py  | 14 +++++++-------
 .../factorization/python/ops/wals_test.py        | 16 ++++++++--------
 tensorflow/contrib/opt/BUILD                     |  5 -----
 .../contrib/timeseries/python/timeseries/BUILD   |  7 +------
 5 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD
index 510f292508..e344d7a23b 100644
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
@@ -154,8 +154,6 @@ tf_py_test(
     ],
     tags = [
         "no_pip",  # b/38283730
-        "noasan",  # b/116875897
-        "nomsan",
         "notsan",  # Flaky: b/30756419
     ],
 )
@@ -179,11 +177,7 @@ tf_py_test(
         "//tensorflow/python:random_seed",
         "//tensorflow/python:variables",
     ],
-    tags = [
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",  # b/62863147
-    ],
+    tags = ["notsan"],  # b/62863147
 )
 
 py_library(
@@ -282,7 +276,6 @@ tf_py_test(
         "manual",
         "noasan",  # times out b/63678675
         "nomsan",
-        "notsan",  # b/116875897
     ],
 )
 
diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
index e076631bc1..d365ad1117 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
@@ -154,10 +154,10 @@ class GmmAlgorithm(object):
   def _create_variables(self):
     """Initializes GMM algorithm."""
     init_value = array_ops.constant([], dtype=dtypes.float32)
-    self._means = variables.Variable(init_value,
-                                     name=self.CLUSTERS_VARIABLE,
-                                     validate_shape=False)
-    self._covs = variables.Variable(
+    self._means = variables.VariableV1(init_value,
+                                       name=self.CLUSTERS_VARIABLE,
+                                       validate_shape=False)
+    self._covs = variables.VariableV1(
         init_value, name=self.CLUSTERS_COVS_VARIABLE, validate_shape=False)
     # Mixture weights, representing the probability that a randomly
     # selected unobservable data (in EM terms) was generated by component k.
@@ -165,9 +165,9 @@ class GmmAlgorithm(object):
         array_ops.tile([1.0 / self._num_classes], [self._num_classes]),
         name=self.CLUSTERS_WEIGHT,
         validate_shape=False)
-    self._cluster_centers_initialized = variables.Variable(False,
-                                                           dtype=dtypes.bool,
-                                                           name='initialized')
+    self._cluster_centers_initialized = variables.VariableV1(False,
+                                                             dtype=dtypes.bool,
+                                                             name='initialized')
 
   def _initialize_variables(self, data, initial_means=None):
     """Initializes variables.
diff --git a/tensorflow/contrib/factorization/python/ops/wals_test.py b/tensorflow/contrib/factorization/python/ops/wals_test.py
index 9bdbd05015..75d577f429 100644
--- a/tensorflow/contrib/factorization/python/ops/wals_test.py
+++ b/tensorflow/contrib/factorization/python/ops/wals_test.py
@@ -420,13 +420,13 @@ class WALSMatrixFactorizationUnsupportedTest(test.TestCase):
 class SweepHookTest(test.TestCase):
 
   def test_sweeps(self):
-    is_row_sweep_var = variables.Variable(True)
-    is_sweep_done_var = variables.Variable(False)
-    init_done = variables.Variable(False)
-    row_prep_done = variables.Variable(False)
-    col_prep_done = variables.Variable(False)
-    row_train_done = variables.Variable(False)
-    col_train_done = variables.Variable(False)
+    is_row_sweep_var = variables.VariableV1(True)
+    is_sweep_done_var = variables.VariableV1(False)
+    init_done = variables.VariableV1(False)
+    row_prep_done = variables.VariableV1(False)
+    col_prep_done = variables.VariableV1(False)
+    row_train_done = variables.VariableV1(False)
+    col_train_done = variables.VariableV1(False)
 
     init_op = state_ops.assign(init_done, True)
     row_prep_op = state_ops.assign(row_prep_done, True)
@@ -486,7 +486,7 @@ class StopAtSweepHookTest(test.TestCase):
 
   def test_stop(self):
     hook = wals_lib._StopAtSweepHook(last_sweep=10)
-    completed_sweeps = variables.Variable(
+    completed_sweeps = variables.VariableV1(
         8, name=wals_lib.WALSMatrixFactorization.COMPLETED_SWEEPS)
     train_op = state_ops.assign_add(completed_sweeps, 1)
     hook.begin()
diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index 6a67c6295d..f4ac70eb1a 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -377,11 +377,6 @@ py_test(
     size = "large",
     srcs = ["python/training/shampoo_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",
-    ],
     deps = [
         ":opt_py",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD
index cb1f707028..c230919168 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/BUILD
+++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD
@@ -159,12 +159,7 @@ py_test(
     ],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = [
-        "no_pip_gpu",  # b/63391119
-        "noasan",  # b/116875897
-        "nomsan",
-        "notsan",
-    ],
+    tags = ["no_pip_gpu"],  # b/63391119
     deps = [
         ":estimators",
         ":feature_keys",
-- 
GitLab


From ce1cdd52eda4b40ff8fb8c09bc178210883b3773 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 1 Oct 2018 10:57:32 -0700
Subject: [PATCH 0153/1085] Make GCS filesystem/metadata lookup retries
 configurable

PiperOrigin-RevId: 215243030
---
 .../cloud/compute_engine_metadata_client.cc   |   15 +-
 .../cloud/compute_engine_metadata_client.h    |   10 +-
 .../compute_engine_metadata_client_test.cc    |    6 +-
 .../compute_engine_zone_provider_test.cc      |    8 +-
 .../core/platform/cloud/gcs_file_system.cc    |   25 +-
 .../core/platform/cloud/gcs_file_system.h     |    7 +-
 .../platform/cloud/gcs_file_system_test.cc    | 1286 +++++++++--------
 .../cloud/google_auth_provider_test.cc        |   20 +-
 .../platform/cloud/retrying_file_system.h     |   67 +-
 .../cloud/retrying_file_system_test.cc        |  102 +-
 .../core/platform/cloud/retrying_utils.cc     |   35 +-
 .../core/platform/cloud/retrying_utils.h      |   29 +-
 .../platform/cloud/retrying_utils_test.cc     |   32 +-
 13 files changed, 849 insertions(+), 793 deletions(-)

diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc b/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
index f41b83ac34..affb68ebbb 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
-#include "tensorflow/core/platform/cloud/retrying_utils.h"
 
 namespace tensorflow {
 
@@ -25,21 +24,14 @@ namespace {
 
 // The URL to retrieve metadata when running in Google Compute Engine.
 constexpr char kGceMetadataBaseUrl[] = "http://metadata/computeMetadata/v1/";
-// The default initial delay between retries with exponential backoff.
-constexpr int kInitialRetryDelayUsec = 500000;  // 0.5 sec
 
 }  // namespace
 
-ComputeEngineMetadataClient::ComputeEngineMetadataClient(
-    std::shared_ptr<HttpRequest::Factory> http_request_factory)
-    : ComputeEngineMetadataClient(std::move(http_request_factory),
-                                  kInitialRetryDelayUsec) {}
-
 ComputeEngineMetadataClient::ComputeEngineMetadataClient(
     std::shared_ptr<HttpRequest::Factory> http_request_factory,
-    int64 initial_retry_delay_usec)
+    const RetryConfig& config)
     : http_request_factory_(std::move(http_request_factory)),
-      initial_retry_delay_usec_(initial_retry_delay_usec) {}
+      retry_config_(config) {}
 
 Status ComputeEngineMetadataClient::GetMetadata(
     const string& path, std::vector<char>* response_buffer) {
@@ -52,8 +44,7 @@ Status ComputeEngineMetadataClient::GetMetadata(
     return Status::OK();
   };
 
-  return RetryingUtils::CallWithRetries(get_metadata_from_gce,
-                                        initial_retry_delay_usec_);
+  return RetryingUtils::CallWithRetries(get_metadata_from_gce, retry_config_);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client.h b/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
index 534ccf30b2..7f060327da 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
+#include "tensorflow/core/platform/cloud/retrying_utils.h"
 
 namespace tensorflow {
 
@@ -31,10 +32,11 @@ namespace tensorflow {
 class ComputeEngineMetadataClient {
  public:
   explicit ComputeEngineMetadataClient(
-      std::shared_ptr<HttpRequest::Factory> http_request_factory);
-  ComputeEngineMetadataClient(
       std::shared_ptr<HttpRequest::Factory> http_request_factory,
-      int64 initial_retry_delay_usec);
+      const RetryConfig& config = RetryConfig(
+          10000,  /* init_delay_time_us = 1 ms */
+          1000000 /* max_delay_time_us = 1 s */
+          ));
   virtual ~ComputeEngineMetadataClient() {}
 
   /// \brief Get the metadata value for a given attribute of the metadata
@@ -54,7 +56,7 @@ class ComputeEngineMetadataClient {
 
  private:
   std::shared_ptr<HttpRequest::Factory> http_request_factory_;
-  const int64 initial_retry_delay_usec_;
+  const RetryConfig retry_config_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(ComputeEngineMetadataClient);
 };
diff --git a/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc b/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
index 4c41ccaa0e..e891b4a5e9 100644
--- a/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_metadata_client_test.cc
@@ -30,7 +30,8 @@ TEST(ComputeEngineMetadataClientTest, GetMetadata) {
 
   std::shared_ptr<HttpRequest::Factory> http_factory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  ComputeEngineMetadataClient client(http_factory, 0);
+  ComputeEngineMetadataClient client(http_factory,
+                                     RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<char> result;
   TF_EXPECT_OK(
@@ -56,7 +57,8 @@ TEST(ComputeEngineMetadataClientTest, RetryOnFailure) {
 
   std::shared_ptr<HttpRequest::Factory> http_factory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  ComputeEngineMetadataClient client(http_factory, 0);
+  ComputeEngineMetadataClient client(http_factory,
+                                     RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<char> result;
   TF_EXPECT_OK(
diff --git a/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc b/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
index f7477eca23..476e4f9c1f 100644
--- a/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_zone_provider_test.cc
@@ -34,8 +34,8 @@ TEST_F(ComputeEngineZoneProviderTest, GetZone) {
 
   auto httpRequestFactory = std::make_shared<FakeHttpRequestFactory>(&requests);
 
-  auto metadata_client =
-      std::make_shared<ComputeEngineMetadataClient>(httpRequestFactory, 0);
+  auto metadata_client = std::make_shared<ComputeEngineMetadataClient>(
+      httpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   ComputeEngineZoneProvider provider(metadata_client);
 
@@ -55,8 +55,8 @@ TEST_F(ComputeEngineZoneProviderTest, InvalidZoneString) {
 
   auto httpRequestFactory = std::make_shared<FakeHttpRequestFactory>(&requests);
 
-  auto metadata_client =
-      std::make_shared<ComputeEngineMetadataClient>(httpRequestFactory, 0);
+  auto metadata_client = std::make_shared<ComputeEngineMetadataClient>(
+      httpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   ComputeEngineZoneProvider provider(metadata_client);
 
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 83ea8539ed..c61b68aeeb 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -333,14 +333,14 @@ class GcsWritableFile : public WritableFile {
                   GcsFileSystem* filesystem,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
-                  int64 initial_retry_delay_usec)
+                  RetryConfig retry_config)
       : bucket_(bucket),
         object_(object),
         filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
-        initial_retry_delay_usec_(initial_retry_delay_usec) {
+        retry_config_(retry_config) {
     // TODO: to make it safer, outfile_ should be constructed from an FD
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
@@ -357,14 +357,14 @@ class GcsWritableFile : public WritableFile {
                   GcsFileSystem* filesystem, const string& tmp_content_filename,
                   GcsFileSystem::TimeoutConfig* timeouts,
                   std::function<void()> file_cache_erase,
-                  int64 initial_retry_delay_usec)
+                  RetryConfig retry_config)
       : bucket_(bucket),
         object_(object),
         filesystem_(filesystem),
         timeouts_(timeouts),
         file_cache_erase_(std::move(file_cache_erase)),
         sync_needed_(true),
-        initial_retry_delay_usec_(initial_retry_delay_usec) {
+        retry_config_(retry_config) {
     tmp_content_filename_ = tmp_content_filename;
     outfile_.open(tmp_content_filename_,
                   std::ofstream::binary | std::ofstream::app);
@@ -441,7 +441,7 @@ class GcsWritableFile : public WritableFile {
           first_attempt = false;
           return UploadToSession(session_uri, already_uploaded);
         },
-        initial_retry_delay_usec_);
+        retry_config_);
     if (upload_status.code() == errors::Code::NOT_FOUND) {
       // GCS docs recommend retrying the whole upload. We're relying on the
       // RetryingFileSystem to retry the Sync() call.
@@ -586,7 +586,7 @@ class GcsWritableFile : public WritableFile {
   GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
-  int64 initial_retry_delay_usec_;
+  RetryConfig retry_config_;
 };
 
 class GcsReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
@@ -791,7 +791,7 @@ GcsFileSystem::GcsFileSystem(
     std::unique_ptr<ZoneProvider> zone_provider, size_t block_size,
     size_t max_bytes, uint64 max_staleness, uint64 stat_cache_max_age,
     size_t stat_cache_max_entries, uint64 matching_paths_cache_max_age,
-    size_t matching_paths_cache_max_entries, int64 initial_retry_delay_usec,
+    size_t matching_paths_cache_max_entries, RetryConfig retry_config,
     TimeoutConfig timeouts, const std::unordered_set<string>& allowed_locations,
     std::pair<const string, const string>* additional_header)
     : auth_provider_(std::move(auth_provider)),
@@ -806,7 +806,7 @@ GcsFileSystem::GcsFileSystem(
           kCacheNeverExpire, kBucketLocationCacheMaxEntries)),
       allowed_locations_(allowed_locations),
       timeouts_(timeouts),
-      initial_retry_delay_usec_(initial_retry_delay_usec),
+      retry_config_(retry_config),
       additional_header_(additional_header) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
@@ -941,7 +941,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(bucket, object, this, &timeouts_,
                                     [this, fname]() { ClearFileCaches(fname); },
-                                    initial_retry_delay_usec_));
+                                    retry_config_));
   return Status::OK();
 }
 
@@ -981,7 +981,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object));
   result->reset(new GcsWritableFile(
       bucket, object, this, old_content_filename, &timeouts_,
-      [this, fname]() { ClearFileCaches(fname); }, initial_retry_delay_usec_));
+      [this, fname]() { ClearFileCaches(fname); }, retry_config_));
   return Status::OK();
 }
 
@@ -1534,7 +1534,7 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) {
   // on the server side, we can't just retry the whole RenameFile operation
   // because the source object is already gone.
   return RetryingUtils::DeleteWithRetries(
-      [this, &src]() { return DeleteFile(src); }, initial_retry_delay_usec_);
+      [this, &src]() { return DeleteFile(src); }, retry_config_);
 }
 
 Status GcsFileSystem::IsDirectory(const string& fname) {
@@ -1590,8 +1590,7 @@ Status GcsFileSystem::DeleteRecursively(const string& dirname,
     // and therefore RetryingFileSystem won't pay attention to the failures,
     // we need to make sure these failures are properly retried.
     const auto& delete_file_status = RetryingUtils::DeleteWithRetries(
-        [this, &full_path]() { return DeleteFile(full_path); },
-        initial_retry_delay_usec_);
+        [this, &full_path]() { return DeleteFile(full_path); }, retry_config_);
     if (!delete_file_status.ok()) {
       if (IsDirectory(full_path).ok()) {
         // The object is a directory marker.
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 71db707687..d0840a3046 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -93,7 +93,7 @@ class GcsFileSystem : public FileSystem {
                 uint64 stat_cache_max_age, size_t stat_cache_max_entries,
                 uint64 matching_paths_cache_max_age,
                 size_t matching_paths_cache_max_entries,
-                int64 initial_retry_delay_usec, TimeoutConfig timeouts,
+                RetryConfig retry_config, TimeoutConfig timeouts,
                 const std::unordered_set<string>& allowed_locations,
                 std::pair<const string, const string>* additional_header);
 
@@ -332,7 +332,7 @@ class GcsFileSystem : public FileSystem {
   GcsStatsInterface* stats_ = nullptr;  // Not owned.
 
   /// The initial delay for exponential backoffs when retrying failed calls.
-  const int64 initial_retry_delay_usec_ = 1000000L;
+  RetryConfig retry_config_;
 
   // Additional header material to be transmitted with all GCS requests
   std::unique_ptr<std::pair<const string, const string>> additional_header_;
@@ -344,7 +344,8 @@ class GcsFileSystem : public FileSystem {
 class RetryingGcsFileSystem : public RetryingFileSystem<GcsFileSystem> {
  public:
   RetryingGcsFileSystem()
-      : RetryingFileSystem(std::unique_ptr<GcsFileSystem>(new GcsFileSystem)) {}
+      : RetryingFileSystem(std::unique_ptr<GcsFileSystem>(new GcsFileSystem),
+                           RetryConfig(100000 /* init_delay_time_us */)) {}
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 14376ad339..702802b185 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -24,6 +24,8 @@ namespace tensorflow {
 namespace {
 
 static GcsFileSystem::TimeoutConfig kTestTimeoutConfig(5, 1, 10, 20, 30);
+static RetryConfig kTestRetryConfig(0 /* init_delay_time_us */);
+
 // Default (empty) constraint config
 static std::unordered_set<string>* kAllowedLocationsDefault =
     new std::unordered_set<string>();
@@ -62,16 +64,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
            "Range: 6-11\n"
            "Timeouts: 5 1 20\n",
            "6789")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -108,9 +110,9 @@ TEST(GcsFileSystemTest,
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -150,9 +152,9 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithLocationConstraintCaching) {
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
 
@@ -191,9 +193,9 @@ TEST(GcsFileSystemTest,
                    0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
                    0 /* stat cache max age */, 0 /* stat cache max entries */,
                    0 /* matching paths cache max age */,
-                   0 /* matching paths cache max entries */,
-                   0 /* initial retry delay */, kTestTimeoutConfig,
-                   *kAllowedLocationsAuto, nullptr /* gcs additional header */);
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsAuto,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(tensorflow::errors::FailedPrecondition(
@@ -216,16 +218,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) {
            "Range: 3-12\n"
            "Timeouts: 5 1 20\n",
            "3456789")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
@@ -283,7 +285,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -372,7 +374,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -414,17 +416,17 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
                            "Range: 8-15\n"
                            "Timeouts: 5 1 20\n",
                            "89abcdef")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
-      16 /* max bytes */, 3600 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   8 /* block size */, 16 /* max bytes */,
+                   3600 /* max staleness */, 3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
   char scratch[100];
   StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
@@ -492,7 +494,7 @@ TEST(GcsFileSystemTest,
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 9 /* block size */,
       18 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -513,17 +515,17 @@ TEST(GcsFileSystemTest,
 
 TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
-      0 /* read ahead bytes */, 0 /* max bytes */, 0 /* max staleness */,
-      0 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* read ahead bytes */, 0 /* max bytes */,
+                   0 /* max staleness */, 0 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<RandomAccessFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -547,16 +549,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) {
            "012")});
 
   // Set stat_cache_max_age to 1000s so that StatCache could work.
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 1e3 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   1e3 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // Stat the file first so that the file stats are cached.
   FileStatistics stat;
@@ -621,7 +623,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
       8 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -703,16 +705,16 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) {
                            "Timeouts: 5 1 30\n"
                            "Put body: t2\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -773,17 +775,17 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
            "Range: 0-7\n"
            "Timeouts: 5 1 20\n",
            "01234567")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 8 /* block size */,
-      8 /* max bytes */, 3600 /* max staleness */,
-      3600 /* stat cache max age */, 0 /* stat cache max entries */,
-      0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   8 /* block size */, 8 /* max bytes */,
+                   3600 /* max staleness */, 3600 /* stat cache max age */,
+                   0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
   // Pull the file's first block into the cache. This will trigger the first
   // HTTP request to GCS.
   std::unique_ptr<RandomAccessFile> rfile;
@@ -867,9 +869,9 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 2 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+      0 /* matching paths cache max entries */,
+      RetryConfig(2 /* .init_delay_time_us */), kTestTimeoutConfig,
+      *kAllowedLocationsDefault, nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -918,16 +920,16 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
                            "Timeouts: 5 1 30\n"
                            "Put body: content1,content2\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file));
@@ -948,16 +950,16 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) {
 
 TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1013,7 +1015,7 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 32 /* block size */,
       32 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1041,16 +1043,16 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
 
 TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<WritableFile> file;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1075,16 +1077,16 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
                            "Range: 0-",
                            content.size() - 1, "\n", "Timeouts: 5 1 20\n"),
            content)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -1096,16 +1098,16 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
 
 TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1120,16 +1122,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt"));
 }
@@ -1150,16 +1152,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder"));
 }
@@ -1176,16 +1178,16 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"size\": \"100\"}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.FileExists("gs://bucket1"));
   TF_EXPECT_OK(fs.FileExists("gs://bucket1/"));
@@ -1206,16 +1208,16 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"items\": []}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(errors::Code::NOT_FOUND,
             fs.FileExists("gs://bucket/path/file1.txt").code());
@@ -1233,19 +1235,19 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
-  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
-            fs.FileExists("gs://bucket2/").code());
-  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
+  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
+            fs.FileExists("gs://bucket2/").code());
+  EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.FileExists("gs://bucket2").code());
 }
 
@@ -1279,7 +1281,7 @@ TEST(GcsFileSystemTest, FileExists_StatCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1306,7 +1308,7 @@ TEST(GcsFileSystemTest, FileExists_DirectoryMark) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1322,16 +1324,16 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1350,16 +1352,16 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1379,16 +1381,16 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) {
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1407,16 +1409,16 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/file3.txt\" }],"
       "\"prefixes\": [\"path/subpath/\"]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1432,16 +1434,16 @@ TEST(GcsFileSystemTest, GetChildren_Root) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children));
@@ -1457,16 +1459,16 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
@@ -1498,16 +1500,16 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) {
            "  { \"name\": \"path/file4.txt\" },"
            "  { \"name\": \"path/file5.txt\" }]}")});
 
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children));
@@ -1525,16 +1527,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) {
       "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/subpath/file2.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(
@@ -1553,16 +1555,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result));
@@ -1582,16 +1584,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result));
@@ -1608,16 +1610,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) {
       "{\"items\": [ "
       "  { \"name\": \"path/\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result));
@@ -1634,16 +1636,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
       "  { \"name\": \"path/file1.txt\" },"
       "  { \"name\": \"path/subpath/file2.txt\" },"
       "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result));
@@ -1652,16 +1654,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) {
 
 TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::vector<string> result;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -1686,16 +1688,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) {
            "  { \"name\": \"path/file1.txt\" },"
            "  { \"name\": \"path/subpath/file2.txt\" },"
            "  { \"name\": \"path/file3.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 3600 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   3600 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // Repeated calls to fs.GetMatchingPaths on these patterns should not lead to
   // any additional HTTP requests to GCS.
@@ -1729,16 +1731,16 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache_Flush) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"path/subpath/file2.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 3600 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   3600 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   // This loop should trigger the first HTTP request to GCS.
   for (int i = 0; i < 10; i++) {
@@ -1800,7 +1802,7 @@ TEST(GcsFileSystemTest, DeleteFile) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       16 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1821,16 +1823,16 @@ TEST(GcsFileSystemTest, DeleteFile) {
 
 TEST(GcsFileSystemTest, DeleteFile_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
             fs.DeleteFile("gs://bucket/").code());
@@ -1871,7 +1873,7 @@ TEST(GcsFileSystemTest, DeleteFile_StatCacheRemoved) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       16 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -1894,16 +1896,16 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1923,16 +1925,16 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) {
                            "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/"));
 }
@@ -1943,16 +1945,16 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) {
       "name%2CnextPageToken&maxResults=2\nAuth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.DeleteDir("gs://bucket"));
 }
@@ -1965,16 +1967,16 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
       "Timeouts: 5 1 10\n",
       "{\"items\": [ "
       "  { \"name\": \"path/file1.txt\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.DeleteDir("gs://bucket/path/").code());
@@ -1988,16 +1990,16 @@ TEST(GcsFileSystemTest, GetFileSize) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size));
@@ -2006,16 +2008,16 @@ TEST(GcsFileSystemTest, GetFileSize) {
 
 TEST(GcsFileSystemTest, GetFileSize_NoObjectName) {
   std::vector<HttpRequest*> requests;
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   uint64 size;
   EXPECT_EQ(errors::Code::INVALID_ARGUMENT,
@@ -2092,16 +2094,16 @@ TEST(GcsFileSystemTest, RenameFile_Folder) {
            "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/"));
 }
@@ -2191,7 +2193,7 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 16 /* block size */,
       64 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // Do an initial read of the source and destination files to load their
@@ -2272,7 +2274,7 @@ TEST(GcsFileSystemTest, RenameFile_Object_FlushTargetStatCache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // Do an initial stat of the destination file to load their contents into the
@@ -2332,16 +2334,16 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) {
            "Timeouts: 5 1 10\n"
            "Delete: yes\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(
       fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt"));
@@ -2374,16 +2376,16 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) {
            "Post: yes\n"
            "Timeouts: 5 1 10\n",
            "{\"done\": false}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(
       errors::Code::UNIMPLEMENTED,
@@ -2399,16 +2401,16 @@ TEST(GcsFileSystemTest, Stat_Object) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat));
@@ -2433,16 +2435,16 @@ TEST(GcsFileSystemTest, Stat_Folder) {
            "Timeouts: 5 1 10\n",
            "{\"items\": [ "
            "  { \"name\": \"subfolder/\" }]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat));
@@ -2466,16 +2468,16 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code());
@@ -2487,16 +2489,16 @@ TEST(GcsFileSystemTest, Stat_Bucket) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat));
@@ -2511,16 +2513,16 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code());
@@ -2556,7 +2558,7 @@ TEST(GcsFileSystemTest, Stat_Cache) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
 
@@ -2598,7 +2600,7 @@ TEST(GcsFileSystemTest, Stat_Cache_Flush) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 3600 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       nullptr /* gcs additional header */);
   // There should be a single HTTP request to GCS for fs.Stat in this loop.
@@ -2628,16 +2630,16 @@ TEST(GcsFileSystemTest, Stat_FilenameEndingWithSlash) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"5\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("gs://bucket/dir/", &stat));
@@ -2660,16 +2662,16 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::NOT_FOUND,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -2691,16 +2693,16 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) {
            "Timeouts: 5 1 10\n",
            strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                            "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::FAILED_PRECONDITION,
             fs.IsDirectory("gs://bucket/file.txt").code());
@@ -2722,16 +2724,16 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{\"items\": [{\"name\": \"subfolder/\"}]}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/"));
@@ -2749,16 +2751,16 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket"));
   TF_EXPECT_OK(fs.IsDirectory("gs://bucket/"));
@@ -2770,16 +2772,16 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
       "Auth Token: fake_token\n"
       "Timeouts: 5 1 10\n",
       "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code());
 }
@@ -2812,16 +2814,16 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                            "Timeouts: 5 1 30\n"
                            "Put body: \n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
@@ -2839,16 +2841,16 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/"));
   TF_EXPECT_OK(fs.CreateDir("gs://bucket"));
@@ -2911,16 +2913,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) {
                            "Timeouts: 5 1 10\n"
                            "Delete: yes\n",
                            "")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -3004,16 +3006,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) {
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
 
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files,
@@ -3039,16 +3041,16 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) {
            "Auth Token: fake_token\n"
            "Timeouts: 5 1 10\n",
            "", errors::NotFound("404"), 404)});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay*/,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   int64 undeleted_files, undeleted_dirs;
   EXPECT_EQ(error::Code::NOT_FOUND,
@@ -3130,7 +3132,7 @@ TEST(GcsFileSystemTest, AdditionalRequestHeaderTest) {
       std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
       0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
       0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
+      0 /* matching paths cache max entries */, kTestRetryConfig,
       kTestTimeoutConfig, *kAllowedLocationsDefault,
       add_header /* gcs additional header */);
 
@@ -3199,16 +3201,16 @@ TEST(GcsFileSystemTest, CreateHttpRequest) {
                            "Auth Token: fake_token\n"
                            "Header Hello: world\n",
                            "{}")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   std::unique_ptr<HttpRequest> request;
   TF_EXPECT_OK(fs.CreateHttpRequest(&request));
@@ -3262,16 +3264,16 @@ TEST(GcsFileSystemTest, Stat_StatsRecording) {
       "Timeouts: 5 1 10\n",
       strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
                       "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TestGcsStats stats;
   fs.SetStats(&stats);
@@ -3289,16 +3291,16 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) {
       "Range: 0-5\n"
       "Timeouts: 5 1 20\n",
       "012345")});
-  GcsFileSystem fs(
-      std::unique_ptr<AuthProvider>(new FakeAuthProvider),
-      std::unique_ptr<HttpRequest::Factory>(
-          new FakeHttpRequestFactory(&requests)),
-      std::unique_ptr<ZoneProvider>(new FakeZoneProvider), 0 /* block size */,
-      0 /* max bytes */, 0 /* max staleness */, 0 /* stat cache max age */,
-      0 /* stat cache max entries */, 0 /* matching paths cache max age */,
-      0 /* matching paths cache max entries */, 0 /* initial retry delay */,
-      kTestTimeoutConfig, *kAllowedLocationsDefault,
-      nullptr /* gcs additional header */);
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   std::unique_ptr<ZoneProvider>(new FakeZoneProvider),
+                   0 /* block size */, 0 /* max bytes */, 0 /* max staleness */,
+                   0 /* stat cache max age */, 0 /* stat cache max entries */,
+                   0 /* matching paths cache max age */,
+                   0 /* matching paths cache max entries */, kTestRetryConfig,
+                   kTestTimeoutConfig, *kAllowedLocationsDefault,
+                   nullptr /* gcs additional header */);
 
   TestGcsStats stats;
   fs.SetStats(&stats);
diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
index 07b88a880f..ec31c5ee8c 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
@@ -93,8 +93,8 @@ TEST_F(GoogleAuthProviderTest, EnvironmentVariable_Caching) {
 
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
   oauth_client->return_token = "fake-token";
@@ -129,8 +129,8 @@ TEST_F(GoogleAuthProviderTest, GCloudRefreshToken) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
 
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
@@ -178,8 +178,8 @@ TEST_F(GoogleAuthProviderTest, RunningOnGCE) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
@@ -206,8 +206,8 @@ TEST_F(GoogleAuthProviderTest, OverrideForTesting) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&empty_requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
@@ -228,8 +228,8 @@ TEST_F(GoogleAuthProviderTest, NothingAvailable) {
   FakeEnv env;
   std::shared_ptr<HttpRequest::Factory> fakeHttpRequestFactory =
       std::make_shared<FakeHttpRequestFactory>(&requests);
-  auto metadataClient =
-      std::make_shared<ComputeEngineMetadataClient>(fakeHttpRequestFactory, 0);
+  auto metadataClient = std::make_shared<ComputeEngineMetadataClient>(
+      fakeHttpRequestFactory, RetryConfig(0 /* init_delay_time_us */));
   GoogleAuthProvider provider(std::unique_ptr<OAuthClient>(oauth_client),
                               metadataClient, &env);
 
diff --git a/tensorflow/core/platform/cloud/retrying_file_system.h b/tensorflow/core/platform/cloud/retrying_file_system.h
index 941ab7ad65..5ce6670dc7 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system.h
+++ b/tensorflow/core/platform/cloud/retrying_file_system.h
@@ -34,9 +34,9 @@ template <typename Underlying>
 class RetryingFileSystem : public FileSystem {
  public:
   RetryingFileSystem(std::unique_ptr<Underlying> base_file_system,
-                     int64 delay_microseconds = 1000000)
+                     const RetryConfig& retry_config)
       : base_file_system_(std::move(base_file_system)),
-        initial_delay_microseconds_(delay_microseconds) {}
+        retry_config_(retry_config) {}
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -55,7 +55,7 @@ class RetryingFileSystem : public FileSystem {
   Status FileExists(const string& fname) override {
     return RetryingUtils::CallWithRetries(
         [this, &fname]() { return base_file_system_->FileExists(fname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetChildren(const string& dir, std::vector<string>* result) override {
@@ -63,7 +63,7 @@ class RetryingFileSystem : public FileSystem {
         [this, &dir, result]() {
           return base_file_system_->GetChildren(dir, result);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetMatchingPaths(const string& pattern,
@@ -72,31 +72,31 @@ class RetryingFileSystem : public FileSystem {
         [this, &pattern, result]() {
           return base_file_system_->GetMatchingPaths(pattern, result);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status Stat(const string& fname, FileStatistics* stat) override {
     return RetryingUtils::CallWithRetries(
         [this, &fname, stat]() { return base_file_system_->Stat(fname, stat); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteFile(const string& fname) override {
     return RetryingUtils::DeleteWithRetries(
         [this, &fname]() { return base_file_system_->DeleteFile(fname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status CreateDir(const string& dirname) override {
     return RetryingUtils::CallWithRetries(
         [this, &dirname]() { return base_file_system_->CreateDir(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteDir(const string& dirname) override {
     return RetryingUtils::DeleteWithRetries(
         [this, &dirname]() { return base_file_system_->DeleteDir(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status GetFileSize(const string& fname, uint64* file_size) override {
@@ -104,7 +104,7 @@ class RetryingFileSystem : public FileSystem {
         [this, &fname, file_size]() {
           return base_file_system_->GetFileSize(fname, file_size);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status RenameFile(const string& src, const string& target) override {
@@ -112,13 +112,13 @@ class RetryingFileSystem : public FileSystem {
         [this, &src, &target]() {
           return base_file_system_->RenameFile(src, target);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status IsDirectory(const string& dirname) override {
     return RetryingUtils::CallWithRetries(
         [this, &dirname]() { return base_file_system_->IsDirectory(dirname); },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   Status DeleteRecursively(const string& dirname, int64* undeleted_files,
@@ -128,7 +128,7 @@ class RetryingFileSystem : public FileSystem {
           return base_file_system_->DeleteRecursively(dirname, undeleted_files,
                                                       undeleted_dirs);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
   void FlushCaches() override { base_file_system_->FlushCaches(); }
@@ -137,7 +137,7 @@ class RetryingFileSystem : public FileSystem {
 
  private:
   std::unique_ptr<Underlying> base_file_system_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(RetryingFileSystem);
 };
@@ -147,9 +147,8 @@ namespace retrying_internals {
 class RetryingRandomAccessFile : public RandomAccessFile {
  public:
   RetryingRandomAccessFile(std::unique_ptr<RandomAccessFile> base_file,
-                           int64 delay_microseconds)
-      : base_file_(std::move(base_file)),
-        initial_delay_microseconds_(delay_microseconds) {}
+                           const RetryConfig& retry_config)
+      : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
   Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
@@ -157,20 +156,19 @@ class RetryingRandomAccessFile : public RandomAccessFile {
         [this, offset, n, result, scratch]() {
           return base_file_->Read(offset, n, result, scratch);
         },
-        initial_delay_microseconds_);
+        retry_config_);
   }
 
  private:
   std::unique_ptr<RandomAccessFile> base_file_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 };
 
 class RetryingWritableFile : public WritableFile {
  public:
   RetryingWritableFile(std::unique_ptr<WritableFile> base_file,
-                       int64 delay_microseconds)
-      : base_file_(std::move(base_file)),
-        initial_delay_microseconds_(delay_microseconds) {}
+                       const RetryConfig& retry_config)
+      : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
   ~RetryingWritableFile() override {
     // Makes sure the retrying version of Close() is called in the destructor.
@@ -179,25 +177,24 @@ class RetryingWritableFile : public WritableFile {
 
   Status Append(StringPiece data) override {
     return RetryingUtils::CallWithRetries(
-        [this, &data]() { return base_file_->Append(data); },
-        initial_delay_microseconds_);
+        [this, &data]() { return base_file_->Append(data); }, retry_config_);
   }
   Status Close() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Close(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Close(); }, retry_config_);
   }
   Status Flush() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Flush(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Flush(); }, retry_config_);
   }
   Status Sync() override {
     return RetryingUtils::CallWithRetries(
-        [this]() { return base_file_->Sync(); }, initial_delay_microseconds_);
+        [this]() { return base_file_->Sync(); }, retry_config_);
   }
 
  private:
   std::unique_ptr<WritableFile> base_file_;
-  const int64 initial_delay_microseconds_;
+  const RetryConfig retry_config_;
 };
 
 }  // namespace retrying_internals
@@ -210,9 +207,9 @@ Status RetryingFileSystem<Underlying>::NewRandomAccessFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewRandomAccessFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingRandomAccessFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -224,9 +221,9 @@ Status RetryingFileSystem<Underlying>::NewWritableFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewWritableFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingWritableFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -238,9 +235,9 @@ Status RetryingFileSystem<Underlying>::NewAppendableFile(
       [this, &filename, &base_file]() {
         return base_file_system_->NewAppendableFile(filename, &base_file);
       },
-      initial_delay_microseconds_));
+      retry_config_));
   result->reset(new retrying_internals::RetryingWritableFile(
-      std::move(base_file), initial_delay_microseconds_));
+      std::move(base_file), retry_config_));
   return Status::OK();
 }
 
@@ -252,7 +249,7 @@ Status RetryingFileSystem<Underlying>::NewReadOnlyMemoryRegionFromFile(
         return base_file_system_->NewReadOnlyMemoryRegionFromFile(filename,
                                                                   result);
       },
-      initial_delay_microseconds_);
+      retry_config_);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index 5910fef1d2..868eea096c 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -184,7 +184,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_ImmediateSuccess) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -211,7 +212,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -235,7 +237,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_AllRetriesFailed) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -265,7 +268,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_NoRetriesForSomeErrors) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->random_access_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped random access file.
   std::unique_ptr<RandomAccessFile> random_access_file;
@@ -291,7 +295,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_ImmediateSuccess) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -317,7 +322,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -343,7 +349,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry_ViaDestructor) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -368,7 +375,8 @@ TEST(RetryingFileSystemTest, NewAppendableFile_SuccessWith3rdTry) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped appendable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -391,7 +399,8 @@ TEST(RetryingFileSystemTest, NewWritableFile_AllRetriesFailed) {
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
   base_fs->writable_file_to_return = std::move(base_file);
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   // Retrieve the wrapped writable file.
   std::unique_ptr<WritableFile> writable_file;
@@ -412,7 +421,8 @@ TEST(RetryingFileSystemTest,
        std::make_tuple("NewReadOnlyMemoryRegionFromFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::unique_ptr<ReadOnlyMemoryRegion> result;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile("filename.txt", &result));
@@ -423,7 +433,8 @@ TEST(RetryingFileSystemTest, NewReadOnlyMemoryRegionFromFile_AllRetriesFailed) {
       CreateRetriableErrors("NewReadOnlyMemoryRegionFromFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::unique_ptr<ReadOnlyMemoryRegion> result;
   const auto& status =
@@ -440,7 +451,8 @@ TEST(RetryingFileSystemTest, GetChildren_SuccessWith2ndTry) {
        std::make_tuple("GetChildren", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetChildren("gs://path", &result));
@@ -450,7 +462,8 @@ TEST(RetryingFileSystemTest, GetChildren_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("GetChildren", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.GetChildren("gs://path", &result);
@@ -466,7 +479,8 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_SuccessWith2ndTry) {
        std::make_tuple("GetMatchingPaths", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.GetMatchingPaths("gs://path/dir", &result));
@@ -477,7 +491,8 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_AllRetriesFailed) {
       CreateRetriableErrors("GetMatchingPaths", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.GetMatchingPaths("gs://path/dir", &result);
@@ -492,7 +507,8 @@ TEST(RetryingFileSystemTest, DeleteFile_SuccessWith2ndTry) {
        std::make_tuple("DeleteFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteFile("gs://path/file.txt"));
@@ -502,7 +518,8 @@ TEST(RetryingFileSystemTest, DeleteFile_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("DeleteFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.DeleteFile("gs://path/file.txt");
@@ -517,7 +534,8 @@ TEST(RetryingFileSystemTest, CreateDir_SuccessWith2ndTry) {
        std::make_tuple("CreateDir", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.CreateDir("gs://path/newdir"));
@@ -527,7 +545,8 @@ TEST(RetryingFileSystemTest, CreateDir_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("CreateDir", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.CreateDir("gs://path/newdir");
@@ -542,7 +561,8 @@ TEST(RetryingFileSystemTest, DeleteDir_SuccessWith2ndTry) {
        std::make_tuple("DeleteDir", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   TF_EXPECT_OK(fs.DeleteDir("gs://path/dir"));
@@ -552,7 +572,8 @@ TEST(RetryingFileSystemTest, DeleteDir_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("DeleteDir", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   std::vector<string> result;
   const auto& status = fs.DeleteDir("gs://path/dir");
@@ -568,7 +589,8 @@ TEST(RetryingFileSystemTest, GetFileSize_SuccessWith2ndTry) {
        std::make_tuple("GetFileSize", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize("gs://path/file.txt", &size));
@@ -578,7 +600,8 @@ TEST(RetryingFileSystemTest, GetFileSize_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("GetFileSize", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   uint64 size;
   const auto& status = fs.GetFileSize("gs://path/file.txt", &size);
@@ -593,7 +616,8 @@ TEST(RetryingFileSystemTest, RenameFile_SuccessWith2ndTry) {
        std::make_tuple("RenameFile", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.RenameFile("old_name", "new_name"));
 }
@@ -602,7 +626,8 @@ TEST(RetryingFileSystemTest, RenameFile_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("RenameFile", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.RenameFile("old_name", "new_name");
   EXPECT_TRUE(
@@ -616,7 +641,8 @@ TEST(RetryingFileSystemTest, Stat_SuccessWith2ndTry) {
        std::make_tuple("Stat", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   FileStatistics stat;
   TF_EXPECT_OK(fs.Stat("file_name", &stat));
@@ -626,7 +652,8 @@ TEST(RetryingFileSystemTest, Stat_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("Stat", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   FileStatistics stat;
   const auto& status = fs.Stat("file_name", &stat);
@@ -639,7 +666,8 @@ TEST(RetryingFileSystemTest, FileExists_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("FileExists", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.FileExists("file_name");
   EXPECT_TRUE(
@@ -653,7 +681,8 @@ TEST(RetryingFileSystemTest, FileExists_SuccessWith2ndTry) {
        std::make_tuple("FileExists", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.FileExists("gs://path/dir"));
 }
@@ -665,7 +694,8 @@ TEST(RetryingFileSystemTest, IsDirectory_SuccessWith2ndTry) {
        std::make_tuple("IsDirectory", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   TF_EXPECT_OK(fs.IsDirectory("gs://path/dir"));
 }
@@ -674,7 +704,8 @@ TEST(RetryingFileSystemTest, IsDirectory_AllRetriesFailed) {
   ExpectedCalls expected_fs_calls = CreateRetriableErrors("IsDirectory", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
 
   const auto& status = fs.IsDirectory("gs://path/dir");
   EXPECT_TRUE(
@@ -689,7 +720,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_SuccessWith2ndTry) {
        std::make_tuple("DeleteRecursively", Status::OK())});
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   int64 undeleted_files, undeleted_dirs;
 
   TF_EXPECT_OK(
@@ -701,7 +733,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_AllRetriesFailed) {
       CreateRetriableErrors("DeleteRecursively", 11);
   std::unique_ptr<MockFileSystem> base_fs(
       new MockFileSystem(expected_fs_calls));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   int64 undeleted_files, undeleted_dirs;
 
   const auto& status =
@@ -715,7 +748,8 @@ TEST(RetryingFileSystemTest, FlushCaches) {
   ExpectedCalls none;
   bool flushed = false;
   std::unique_ptr<MockFileSystem> base_fs(new MockFileSystem(none, &flushed));
-  RetryingFileSystem<MockFileSystem> fs(std::move(base_fs), 0);
+  RetryingFileSystem<MockFileSystem> fs(
+      std::move(base_fs), RetryConfig(0 /* init_delay_time_us */));
   fs.FlushCaches();
   EXPECT_TRUE(flushed);
 }
diff --git a/tensorflow/core/platform/cloud/retrying_utils.cc b/tensorflow/core/platform/cloud/retrying_utils.cc
index d2df422024..cb0aecdd35 100644
--- a/tensorflow/core/platform/cloud/retrying_utils.cc
+++ b/tensorflow/core/platform/cloud/retrying_utils.cc
@@ -23,11 +23,6 @@ namespace tensorflow {
 
 namespace {
 
-// In case of failure, every call will be retried kMaxRetries times.
-constexpr int kMaxRetries = 10;
-// Maximum backoff time in microseconds.
-constexpr int64 kMaximumBackoffMicroseconds = 32000000;  // 32 seconds.
-
 bool IsRetriable(error::Code code) {
   switch (code) {
     case error::UNAVAILABLE:
@@ -43,40 +38,41 @@ bool IsRetriable(error::Code code) {
 }  // namespace
 
 Status RetryingUtils::CallWithRetries(const std::function<Status()>& f,
-                                      const int64 initial_delay_microseconds) {
-  return CallWithRetries(f, initial_delay_microseconds, [](int64 micros) {
-    return Env::Default()->SleepForMicroseconds(micros);
-  });
+                                      const RetryConfig& config) {
+  return CallWithRetries(
+      f,
+      [](int64 micros) { return Env::Default()->SleepForMicroseconds(micros); },
+      config);
 }
 
 Status RetryingUtils::CallWithRetries(
-    const std::function<Status()>& f, const int64 initial_delay_microseconds,
-    const std::function<void(int64)>& sleep_usec) {
+    const std::function<Status()>& f,
+    const std::function<void(int64)>& sleep_usec, const RetryConfig& config) {
   int retries = 0;
   while (true) {
     auto status = f();
     if (!IsRetriable(status.code())) {
       return status;
     }
-    if (retries >= kMaxRetries) {
+    if (retries >= config.max_retries) {
       // Return AbortedError, so that it doesn't get retried again somewhere
       // at a higher level.
       return Status(
           error::ABORTED,
           strings::StrCat(
-              "All ", kMaxRetries,
+              "All ", config.max_retries,
               " retry attempts failed. The last failure: ", status.ToString()));
     }
     int64 delay_micros = 0;
-    if (initial_delay_microseconds > 0) {
+    if (config.init_delay_time_us > 0) {
       const int64 random_micros = random::New64() % 1000000;
-      delay_micros = std::min(initial_delay_microseconds << retries,
-                              kMaximumBackoffMicroseconds) +
+      delay_micros = std::min(config.init_delay_time_us << retries,
+                              config.max_delay_time_us) +
                      random_micros;
     }
     LOG(INFO) << "The operation failed and will be automatically retried in "
               << (delay_micros / 1000000.0) << " seconds (attempt "
-              << (retries + 1) << " out of " << kMaxRetries
+              << (retries + 1) << " out of " << config.max_retries
               << "), caused by: " << status.ToString();
     sleep_usec(delay_micros);
     retries++;
@@ -84,8 +80,7 @@ Status RetryingUtils::CallWithRetries(
 }
 
 Status RetryingUtils::DeleteWithRetries(
-    const std::function<Status()>& delete_func,
-    const int64 initial_delay_microseconds) {
+    const std::function<Status()>& delete_func, const RetryConfig& config) {
   bool is_retried = false;
   return RetryingUtils::CallWithRetries(
       [delete_func, &is_retried]() {
@@ -96,7 +91,7 @@ Status RetryingUtils::DeleteWithRetries(
         is_retried = true;
         return status;
       },
-      initial_delay_microseconds);
+      config);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_utils.h b/tensorflow/core/platform/cloud/retrying_utils.h
index 546b8d1c4a..1a7ce1b122 100644
--- a/tensorflow/core/platform/cloud/retrying_utils.h
+++ b/tensorflow/core/platform/cloud/retrying_utils.h
@@ -21,6 +21,26 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Default time before reporting failure: ~100 seconds.
+struct RetryConfig {
+  RetryConfig(int64 init_delay_time_us = 100 * 1000,
+              int64 max_delay_time_us = 32 * 1000 * 1000,
+              int max_retries = 10) {
+    this->init_delay_time_us = init_delay_time_us;
+    this->max_delay_time_us = max_delay_time_us;
+    this->max_retries = max_retries;
+  }
+
+  // In case of failure, every call will be retried max_retries times.
+  int max_retries;
+
+  // Initial backoff time
+  int64 init_delay_time_us;
+
+  // Maximum backoff time in microseconds.
+  int64 max_delay_time_us;
+};
+
 class RetryingUtils {
  public:
   /// \brief Retries the function in case of failure with exponential backoff.
@@ -31,18 +51,19 @@ class RetryingUtils {
   /// retries.
   /// If all retries failed, returns the last error status.
   static Status CallWithRetries(const std::function<Status()>& f,
-                                const int64 initial_delay_microseconds);
+                                const RetryConfig& config);
+
   /// sleep_usec is a function that sleeps for the given number of microseconds.
   static Status CallWithRetries(const std::function<Status()>& f,
-                                const int64 initial_delay_microseconds,
-                                const std::function<void(int64)>& sleep_usec);
+                                const std::function<void(int64)>& sleep_usec,
+                                const RetryConfig& config);
   /// \brief A retrying wrapper for a function that deletes a resource.
   ///
   /// The function takes care of the scenario when a delete operation
   /// returns a failure but succeeds under the hood: if a retry returns
   /// NOT_FOUND, the whole operation is considered a success.
   static Status DeleteWithRetries(const std::function<Status()>& delete_func,
-                                  const int64 initial_delay_microseconds);
+                                  const RetryConfig& config);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/cloud/retrying_utils_test.cc b/tensorflow/core/platform/cloud/retrying_utils_test.cc
index 1b6527618a..75fe8a98f4 100644
--- a/tensorflow/core/platform/cloud/retrying_utils_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_utils_test.cc
@@ -30,7 +30,8 @@ TEST(RetryingUtilsTest, CallWithRetries_RetryDelays) {
   };
   std::function<Status()> f = []() { return errors::Unavailable("Failed."); };
 
-  const auto& status = RetryingUtils::CallWithRetries(f, 500000L, sleep);
+  const auto& status = RetryingUtils::CallWithRetries(
+      f, sleep, RetryConfig(500000 /* init_delay_time_us */));
   EXPECT_EQ(errors::Code::ABORTED, status.code());
   EXPECT_TRUE(str_util::StrContains(
       status.error_message(),
@@ -60,8 +61,10 @@ TEST(RetryingUtilsTest, CallWithRetries_NotFoundIsNotRetried) {
     results.erase(results.begin());
     return result;
   };
-  EXPECT_EQ(errors::Code::NOT_FOUND,
-            RetryingUtils::CallWithRetries(f, 0).code());
+  EXPECT_EQ(
+      errors::Code::NOT_FOUND,
+      RetryingUtils::CallWithRetries(f, RetryConfig(0 /* init_delay_time_us */))
+          .code());
 }
 
 TEST(RetryingUtilsTest, CallWithRetries_ImmediateSuccess) {
@@ -74,7 +77,8 @@ TEST(RetryingUtilsTest, CallWithRetries_ImmediateSuccess) {
     results.erase(results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::CallWithRetries(f, 1.0, sleep));
+  TF_EXPECT_OK(RetryingUtils::CallWithRetries(
+      f, sleep, RetryConfig(1L /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, CallWithRetries_EventualSuccess) {
@@ -86,7 +90,8 @@ TEST(RetryingUtilsTest, CallWithRetries_EventualSuccess) {
     results.erase(results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::CallWithRetries(f, 0));
+  TF_EXPECT_OK(RetryingUtils::CallWithRetries(
+      f, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_ImmediateSuccess) {
@@ -96,7 +101,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_ImmediateSuccess) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_EventualSuccess) {
@@ -106,7 +112,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_EventualSuccess) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_PermissionDeniedNotRetried) {
@@ -118,7 +125,9 @@ TEST(RetryingUtilsTest, DeleteWithRetries_PermissionDeniedNotRetried) {
     return result;
   };
   EXPECT_EQ(errors::Code::PERMISSION_DENIED,
-            RetryingUtils::DeleteWithRetries(delete_func, 0).code());
+            RetryingUtils::DeleteWithRetries(
+                delete_func, RetryConfig(0 /* init_delay_time_us */))
+                .code());
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_SuccessThroughFileNotFound) {
@@ -129,7 +138,8 @@ TEST(RetryingUtilsTest, DeleteWithRetries_SuccessThroughFileNotFound) {
     delete_results.erase(delete_results.begin());
     return result;
   };
-  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(delete_func, 0));
+  TF_EXPECT_OK(RetryingUtils::DeleteWithRetries(
+      delete_func, RetryConfig(0 /* init_delay_time_us */)));
 }
 
 TEST(RetryingUtilsTest, DeleteWithRetries_FirstNotFoundReturnedAsIs) {
@@ -140,7 +150,9 @@ TEST(RetryingUtilsTest, DeleteWithRetries_FirstNotFoundReturnedAsIs) {
     return result;
   };
   EXPECT_EQ(error::NOT_FOUND,
-            RetryingUtils::DeleteWithRetries(delete_func, 0).code());
+            RetryingUtils::DeleteWithRetries(
+                delete_func, RetryConfig(0 /* init_delay_time_us */))
+                .code());
 }
 
 }  // namespace
-- 
GitLab


From 84a051e7d0cd1406c1bb846efc677c8aa3fc896e Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 1 Oct 2018 11:12:03 -0700
Subject: [PATCH 0154/1085] Fix typo.

PiperOrigin-RevId: 215246174
---
 tensorflow/python/autograph/CONTRIBUTING.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/python/autograph/CONTRIBUTING.md b/tensorflow/python/autograph/CONTRIBUTING.md
index 1ded5ba5f6..f3587a4384 100644
--- a/tensorflow/python/autograph/CONTRIBUTING.md
+++ b/tensorflow/python/autograph/CONTRIBUTING.md
@@ -9,8 +9,6 @@ In preparation for TF 2.0, we moved the code base of AutoGraph from
 does not impact functionality, and AutoGraph will remain accessible under
 `tensorflow.contrib.autograph` until `tensorflow.contrib` is retired.
 
-When 
-
 ## TensorFlow Code of Conduct
 Please review and follow the [TensorFlow Code of Conduct](../../CODE_OF_CONDUCT.md).
 
-- 
GitLab


From 2bbf05148ad94928c1c828d40e479afdf34e2ef8 Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Mon, 1 Oct 2018 11:24:41 -0700
Subject: [PATCH 0155/1085] Automated rollback of commit
 6a787235b95dd3040fc5ff7fb7104585e746c66a

PiperOrigin-RevId: 215248737
---
 tensorflow/core/kernels/batching_util/BUILD | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD
index 039b0db144..0d53240330 100644
--- a/tensorflow/core/kernels/batching_util/BUILD
+++ b/tensorflow/core/kernels/batching_util/BUILD
@@ -12,11 +12,6 @@ cc_library(
     name = "periodic_function_dynamic",
     srcs = ["periodic_function.cc"],
     hdrs = ["periodic_function.h"],
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:protos_all_cc",
@@ -25,11 +20,6 @@ cc_library(
 
 cc_library(
     name = "periodic_function",
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         ":periodic_function_dynamic",
         "//tensorflow/core:lib",
@@ -198,11 +188,6 @@ cc_library(
     testonly = 1,
     srcs = ["fake_clock_env.cc"],
     hdrs = ["fake_clock_env.h"],
-    visibility = [
-        "//learning/serving:__subpackages__",
-        "//tensorflow:internal",
-        "//tensorflow_serving:__subpackages__",
-    ],
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:tensorflow",
-- 
GitLab


From a9b01e8a31a02188bc81349c103f136095f322ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:26:02 -0700
Subject: [PATCH 0156/1085] internal change only

PiperOrigin-RevId: 215248985
---
 tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index 8e6e9aa0cd..1c5ea2d997 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
@@ -237,7 +237,8 @@ void StartMonitoring(const tensorflow::string& service_addr, int duration_ms,
     MonitorResponse response;
     TF_QCHECK_OK(FromGrpcStatus(stub->Monitor(&context, request, &response)));
 
-    std::cout << "Xprof Monitoring Results (Sample " << query + 1 << "):\n\n"
+    std::cout << "Cloud TPU Monitoring Results (Sample " << query + 1
+              << "):\n\n"
               << response.data() << std::flush;
   }
 }
-- 
GitLab


From f0f301f05fb1f1965c966ef57cc390e48d966f12 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 1 Oct 2018 11:29:30 -0700
Subject: [PATCH 0157/1085] Add deprecation notice for BasicRNNCell, which will
 be replaced by keras.SimpleRNNCell.

PiperOrigin-RevId: 215249611
---
 tensorflow/python/kernel_tests/rnn_test.py    |  39 ++++
 tensorflow/python/ops/rnn_cell_impl.py        |   4 +-
 ...orflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt | 202 ------------------
 .../golden/v2/tensorflow.nn.rnn_cell.pbtxt    |   4 -
 4 files changed, 42 insertions(+), 207 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt

diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 05ad9f6336..2f6963f6b8 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -535,6 +535,45 @@ class RNNTest(test.TestCase):
     self.assertAllClose(tf_out, k_out)
     self.assertAllClose(tf_state, k_state)
 
+  def testSimpleRNNCellAndBasicRNNCellComparison(self):
+    input_shape = 10
+    output_shape = 5
+    timestep = 4
+    batch = 20
+    (x_train, _), _ = testing_utils.get_test_data(
+        train_samples=batch,
+        test_samples=0,
+        input_shape=(timestep, input_shape),
+        num_classes=output_shape)
+    fix_weights_generator = keras.layers.SimpleRNNCell(output_shape)
+    fix_weights_generator.build((None, input_shape))
+    # The SimpleRNNCell contains 3 weights: kernel, recurrent_kernel, and bias
+    # The BasicRNNCell contains 2 weight: kernel and bias, where kernel is
+    # zipped [kernel, recurrent_kernel] in SimpleRNNCell.
+    keras_weights = fix_weights_generator.get_weights()
+    kernel, recurrent_kernel, bias = keras_weights
+    tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias]
+
+    with self.test_session(graph=ops_lib.Graph()) as sess:
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      cell = keras.layers.SimpleRNNCell(output_shape)
+      k_out, k_state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      cell.set_weights(keras_weights)
+      [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train})
+    with self.test_session(graph=ops_lib.Graph()) as sess:
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      cell = rnn_cell_impl.BasicRNNCell(output_shape)
+      tf_out, tf_state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      cell.set_weights(tf_weights)
+      [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
+
+    self.assertAllClose(tf_out, k_out)
+    self.assertAllClose(tf_state, k_state)
+
   def testBasicLSTMCellInterchangeWithLSTMCell(self):
     with self.session(graph=ops_lib.Graph()) as sess:
       basic_cell = rnn_cell_impl.BasicLSTMCell(1)
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index c2751e529a..dd4f3d7a99 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -370,7 +370,7 @@ class LayerRNNCell(RNNCell):
                                      *args, **kwargs)
 
 
-@tf_export("nn.rnn_cell.BasicRNNCell")
+@tf_export(v1=["nn.rnn_cell.BasicRNNCell"])
 class BasicRNNCell(LayerRNNCell):
   """The most basic RNN cell.
 
@@ -393,6 +393,8 @@ class BasicRNNCell(LayerRNNCell):
       `trainable` etc when constructing the cell from configs of get_config().
   """
 
+  @deprecated(None, "This class is equivalent as tf.keras.layers.SimpleRNNCell,"
+                    " and will be replaced by that in Tensorflow 2.0.")
   def __init__(self,
                num_units,
                activation=None,
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
deleted file mode 100644
index a4483fefa2..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ /dev/null
@@ -1,202 +0,0 @@
-path: "tensorflow.nn.rnn_cell.BasicRNNCell"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.BasicRNNCell\'>"
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.LayerRNNCell\'>"
-  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
-  is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "graph"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_size"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "scope_name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "state_size"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'num_units\', \'activation\', \'reuse\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_initial_state"
-    argspec: "args=[\'self\', \'inputs\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "zero_state"
-    argspec: "args=[\'self\', \'batch_size\', \'dtype\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
index 64697e8a02..24767e250f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
@@ -4,10 +4,6 @@ tf_module {
     name: "BasicLSTMCell"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "BasicRNNCell"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "DeviceWrapper"
     mtype: "<type \'type\'>"
-- 
GitLab


From 7cabc6be4e32dfb7f42c7f5e33549984bfdb68a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:44:17 -0700
Subject: [PATCH 0158/1085] Allow zero number of inputs in XRT execute
 operation.

PiperOrigin-RevId: 215252408
---
 tensorflow/compiler/xrt/ops/xrt_execute_op.cc |  2 +-
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 41 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
index fda4c31298..40ec1b0ba9 100644
--- a/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_execute_op.cc
@@ -21,7 +21,7 @@ limitations under the License.
 namespace tensorflow {
 
 REGISTER_OP("XRTExecute")
-    .Attr("Ninputs: int")
+    .Attr("Ninputs: int >= 0")
     .Input("computation_handle: int64")
     .Input("execution_config: string")
     .Input("input_handles: Ninputs * int64")
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 2952feb16a..f590fbf0d9 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -108,6 +108,14 @@ bool CompareLiteralToLiteralProto(const xla::Literal& a,
   return equal;
 }
 
+xla::XlaComputation OnePlusTwo() {
+  xla::XlaBuilder builder("OnePlusTwo");
+  auto c0 = xla::ConstantR0(&builder, 1.0f);
+  auto c1 = xla::ConstantR0(&builder, 2.0f);
+  xla::Add(c0, c1);
+  return builder.Build().ValueOrDie();
+}
+
 xla::XlaComputation AddAndScale() {
   xla::XlaBuilder builder("AddAndScale");
   auto p0 = xla::Parameter(&builder, 0,
@@ -346,6 +354,39 @@ TEST(RawApiTest, CompileAndExecute) {
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
 }
 
+TEST(RawApiTest, CompileAndExecuteZeroArg) {
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->mutable_result() = xla::ShapeUtil::MakeShape(xla::F32, {});
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+  StoreComputationSnapshot(OnePlusTwo(), c.mutable_hlo_snapshot());
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto e_config =
+      ops::Const(root.WithDevice("/device:CPU:0"), e.SerializeAsString());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto result = ops::XRTExecute(root, c_handle, e_config,
+                                std::initializer_list<Input>({}));
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
+
+  auto expected = xla::LiteralUtil::CreateR0<float>(3.0f);
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
 TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   xrt::XLAAllocation p0;
   p0.set_device_ordinal(0);
-- 
GitLab


From f1fd53748b99532b2572b8909efcd4f5c06ce28d Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 1 Oct 2018 11:53:27 -0700
Subject: [PATCH 0159/1085] Updating function and class tf_export decorators
 for endpoints according to https://github.com/tensorflow/community/pull/16.
 In addition to the changes in the doc, I made the following updates (these
 changes make sense to me and I didn't notice them when compiling the doc): *
 deprecate saved_model.builder.SavedModelBuilder - replaced with
 saved_model.SavedModelBuilder * deprecate python_io.tf_record_iterator -
 replaced with io.tf_record_iterator * deprecate python_io.TFRecordWriter -
 replaced with io.TFRecordWriter * move reduce_join to tf.string

PiperOrigin-RevId: 215253944
---
 tensorflow/python/framework/dtypes.py         |   4 +-
 tensorflow/python/framework/errors_impl.py    |   6 +-
 tensorflow/python/framework/graph_io.py       |   2 +-
 tensorflow/python/framework/importer.py       |   2 +-
 tensorflow/python/framework/random_seed.py    |   6 +-
 tensorflow/python/framework/sparse_tensor.py  |   2 +-
 tensorflow/python/lib/io/tf_record.py         |  13 +-
 tensorflow/python/ops/array_ops.py            |  44 ++--
 .../python/ops/candidate_sampling_ops.py      |   8 +-
 tensorflow/python/ops/check_ops.py            |  63 ++++--
 tensorflow/python/ops/clip_ops.py             |   8 +-
 tensorflow/python/ops/confusion_matrix.py     |   4 +-
 tensorflow/python/ops/control_flow_ops.py     |   2 +-
 tensorflow/python/ops/data_flow_ops.py        |  17 +-
 tensorflow/python/ops/init_ops.py             |   5 +
 tensorflow/python/ops/linalg_ops.py           |  15 +-
 tensorflow/python/ops/lookup_ops.py           |   2 +-
 tensorflow/python/ops/manip_ops.py            |   4 +-
 tensorflow/python/ops/math_ops.py             | 145 ++++++++------
 tensorflow/python/ops/nn_impl.py              |   6 +-
 tensorflow/python/ops/nn_ops.py               |   8 +-
 tensorflow/python/ops/numerics.py             |   4 +-
 tensorflow/python/ops/parsing_ops.py          |  18 +-
 tensorflow/python/ops/random_ops.py           |  19 +-
 tensorflow/python/ops/sparse_ops.py           | 107 ++++++----
 tensorflow/python/ops/special_math_ops.py     |   4 +-
 tensorflow/python/ops/string_ops.py           |   7 +-
 tensorflow/python/saved_model/builder_impl.py |   7 +-
 tensorflow/python/saved_model/loader_impl.py  |   8 +-
 tensorflow/python/saved_model/main_op_impl.py |   5 +-
 .../saved_model/signature_def_utils_impl.py   |  27 ++-
 tensorflow/python/saved_model/utils_impl.py   |  10 +-
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 tensorflow/python/training/input.py           |   3 +-
 .../api/golden/v1/tensorflow.debugging.pbtxt  |  96 +++++++++
 .../golden/v1/tensorflow.dtypes.-d-type.pbtxt |  77 +++++++
 .../api/golden/v1/tensorflow.dtypes.pbtxt     |  20 ++
 .../api/golden/v1/tensorflow.graph_util.pbtxt |   4 +
 .../api/golden/v1/tensorflow.image.pbtxt      |   4 +
 .../golden/v1/tensorflow.initializers.pbtxt   |   4 +
 .../v1/tensorflow.io.-fixed-len-feature.pbtxt |  27 +++
 ...rflow.io.-fixed-len-sequence-feature.pbtxt |  31 +++
 ...tensorflow.io.-padding-f-i-f-o-queue.pbtxt |  66 ++++++
 .../v1/tensorflow.io.-priority-queue.pbtxt    |  66 ++++++
 .../golden/v1/tensorflow.io.-queue-base.pbtxt |  65 ++++++
 .../tensorflow.io.-random-shuffle-queue.pbtxt |  66 ++++++
 .../v1/tensorflow.io.-sparse-feature.pbtxt    |  35 ++++
 ...flow.io.-t-f-record-compression-type.pbtxt |  20 ++
 .../tensorflow.io.-t-f-record-options.pbtxt   |  17 ++
 .../v1/tensorflow.io.-t-f-record-writer.pbtxt |  21 ++
 .../v1/tensorflow.io.-var-len-feature.pbtxt   |  19 ++
 .../tools/api/golden/v1/tensorflow.io.pbtxt   |  84 ++++++++
 .../api/golden/v1/tensorflow.linalg.pbtxt     |  12 ++
 .../tools/api/golden/v1/tensorflow.math.pbtxt | 188 ++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.nn.pbtxt   |  12 ++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   8 +
 .../golden/v1/tensorflow.quantization.pbtxt   |   4 +
 .../api/golden/v1/tensorflow.random.pbtxt     |  47 +++++
 .../v1/tensorflow.saved_model.-builder.pbtxt  |  21 ++
 .../golden/v1/tensorflow.saved_model.pbtxt    |  44 ++++
 ...arse.-sparse-conditional-accumulator.pbtxt |  46 +++++
 .../v1/tensorflow.sparse.-sparse-tensor.pbtxt |  54 +++++
 .../api/golden/v1/tensorflow.sparse.pbtxt     | 112 +++++++++++
 .../api/golden/v1/tensorflow.strings.pbtxt    |   4 +
 .../api/golden/v1/tensorflow.train.pbtxt      |   4 +
 .../api/golden/v2/tensorflow.debugging.pbtxt  |  96 +++++++++
 .../golden/v2/tensorflow.dtypes.-d-type.pbtxt |  77 +++++++
 .../api/golden/v2/tensorflow.dtypes.pbtxt     |  20 ++
 .../api/golden/v2/tensorflow.graph_util.pbtxt |   4 +
 .../api/golden/v2/tensorflow.image.pbtxt      |   4 +
 .../golden/v2/tensorflow.initializers.pbtxt   |   4 +
 .../v2/tensorflow.io.-fixed-len-feature.pbtxt |  27 +++
 ...rflow.io.-fixed-len-sequence-feature.pbtxt |  31 +++
 ...tensorflow.io.-padding-f-i-f-o-queue.pbtxt |  66 ++++++
 .../v2/tensorflow.io.-priority-queue.pbtxt    |  66 ++++++
 .../golden/v2/tensorflow.io.-queue-base.pbtxt |  65 ++++++
 .../tensorflow.io.-random-shuffle-queue.pbtxt |  66 ++++++
 .../v2/tensorflow.io.-sparse-feature.pbtxt    |  35 ++++
 ...flow.io.-t-f-record-compression-type.pbtxt |  20 ++
 .../tensorflow.io.-t-f-record-options.pbtxt   |  17 ++
 .../v2/tensorflow.io.-t-f-record-writer.pbtxt |  21 ++
 .../v2/tensorflow.io.-var-len-feature.pbtxt   |  19 ++
 .../tools/api/golden/v2/tensorflow.io.pbtxt   |  84 ++++++++
 .../api/golden/v2/tensorflow.linalg.pbtxt     |  12 ++
 .../tools/api/golden/v2/tensorflow.math.pbtxt | 188 ++++++++++++++++++
 .../tools/api/golden/v2/tensorflow.nn.pbtxt   |  12 ++
 .../tools/api/golden/v2/tensorflow.pbtxt      |   8 +
 .../golden/v2/tensorflow.quantization.pbtxt   |   4 +
 .../api/golden/v2/tensorflow.random.pbtxt     |  47 +++++
 .../v2/tensorflow.saved_model.-builder.pbtxt  |  21 ++
 .../golden/v2/tensorflow.saved_model.pbtxt    |  44 ++++
 ...arse.-sparse-conditional-accumulator.pbtxt |  46 +++++
 .../v2/tensorflow.sparse.-sparse-tensor.pbtxt |  54 +++++
 .../api/golden/v2/tensorflow.sparse.pbtxt     | 112 +++++++++++
 .../api/golden/v2/tensorflow.strings.pbtxt    |   4 +
 .../api/golden/v2/tensorflow.train.pbtxt      |   4 +
 97 files changed, 2926 insertions(+), 217 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt

diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index c3f70df7d8..64d3b42d89 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -26,7 +26,7 @@ from tensorflow.python.util.tf_export import tf_export
 _np_bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
 
 
-@tf_export("DType")
+@tf_export("dtypes.DType", "DType")
 class DType(object):
   """Represents the type of the elements in a `Tensor`.
 
@@ -658,7 +658,7 @@ _PYTHON_TO_TF = {
 }
 
 
-@tf_export("as_dtype")
+@tf_export("dtypes.as_dtype", "as_dtype")
 def as_dtype(type_value):
   """Converts the given `type_value` to a `DType`.
 
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index 5af71f2cfb..8b303fa8a9 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -25,11 +25,13 @@ from tensorflow.core.lib.core import error_codes_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.framework import c_api_util
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("OpError", "errors.OpError")
+@tf_export("errors.OpError", "OpError")
+@deprecation.deprecated_endpoints("OpError")
 class OpError(Exception):
   """A generic error that is raised when TensorFlow execution fails.
 
@@ -72,7 +74,7 @@ class OpError(Exception):
     or `Recv` op, there will be no corresponding
     `tf.Operation`
     object.  In that case, this will return `None`, and you should
-    instead use the `tf.OpError.node_def` to
+    instead use the `tf.errors.OpError.node_def` to
     discover information about the op.
 
     Returns:
diff --git a/tensorflow/python/framework/graph_io.py b/tensorflow/python/framework/graph_io.py
index be30b16f5f..47e1344eae 100644
--- a/tensorflow/python/framework/graph_io.py
+++ b/tensorflow/python/framework/graph_io.py
@@ -27,7 +27,7 @@ from tensorflow.python.lib.io import file_io
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('train.write_graph')
+@tf_export('io.write_graph', 'train.write_graph')
 def write_graph(graph_or_graph_def, logdir, name, as_text=True):
   """Writes a graph proto to a file.
 
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index e48e67c8a1..c6595918ae 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -329,7 +329,7 @@ def _SetDefaultAttrValues(node_def, op_def):
         node_def.attr[key].CopyFrom(attr_def.default_value)
 
 
-@tf_export('import_graph_def')
+@tf_export('graph_util.import_graph_def', 'import_graph_def')
 @deprecated_args(None, 'Please file an issue at '
                  'https://github.com/tensorflow/tensorflow/issues if you depend'
                  ' on this feature.', 'op_dict')
diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py
index 2f9504889a..6f9f347a99 100644
--- a/tensorflow/python/framework/random_seed.py
+++ b/tensorflow/python/framework/random_seed.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -33,7 +34,8 @@ def _truncate_seed(seed):
   return seed % _MAXINT32  # Truncate to fit into 32-bit integer
 
 
-@tf_export('get_seed')
+@tf_export('random.get_seed', 'get_seed')
+@deprecation.deprecated_endpoints('get_seed')
 def get_seed(op_seed):
   """Returns the local seeds an operation should use given an op-specific seed.
 
@@ -80,7 +82,7 @@ def get_seed(op_seed):
   return seeds
 
 
-@tf_export('set_random_seed')
+@tf_export('random.set_random_seed', 'set_random_seed')
 def set_random_seed(seed):
   """Sets the graph-level random seed.
 
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index d1bdd9b80a..41ef2e11d1 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -33,7 +33,7 @@ _override_helper = ops._override_helper
 # pylint: enable=protected-access
 
 
-@tf_export("SparseTensor")
+@tf_export("sparse.SparseTensor", "SparseTensor")
 class SparseTensor(_TensorLike):
   """Represents a sparse tensor.
 
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
index cce71a2bab..9ab683d96a 100644
--- a/tensorflow/python/lib/io/tf_record.py
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -22,10 +22,12 @@ from __future__ import print_function
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import errors
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("python_io.TFRecordCompressionType")
+@tf_export("io.TFRecordCompressionType", "python_io.TFRecordCompressionType")
+@deprecation.deprecated_endpoints("python_io.TFRecordCompressionType")
 class TFRecordCompressionType(object):
   """The type of compression for the record."""
   NONE = 0
@@ -33,7 +35,8 @@ class TFRecordCompressionType(object):
   GZIP = 2
 
 
-@tf_export("python_io.TFRecordOptions")
+@tf_export("io.TFRecordOptions", "python_io.TFRecordOptions")
+@deprecation.deprecated_endpoints("python_io.TFRecordOptions")
 class TFRecordOptions(object):
   """Options used for manipulating TFRecord files."""
   compression_type_map = {
@@ -143,7 +146,8 @@ class TFRecordOptions(object):
     return options
 
 
-@tf_export("python_io.tf_record_iterator")
+@tf_export("io.tf_record_iterator", "python_io.tf_record_iterator")
+@deprecation.deprecated_endpoints("python_io.tf_record_iterator")
 def tf_record_iterator(path, options=None):
   """An iterator that read the records from a TFRecords file.
 
@@ -175,7 +179,8 @@ def tf_record_iterator(path, options=None):
     reader.Close()
 
 
-@tf_export("python_io.TFRecordWriter")
+@tf_export("io.TFRecordWriter", "python_io.TFRecordWriter")
+@deprecation.deprecated_endpoints("python_io.TFRecordWriter")
 class TFRecordWriter(object):
   """A class to write records to a TFRecords file.
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index a7f57e94e3..9f5149d5ac 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1204,7 +1204,8 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
     return _apply_mask_1d(tensor, mask, axis)
 
 
-@tf_export("sparse_mask")
+@tf_export("sparse.mask", "sparse_mask")
+@deprecation.deprecated_endpoints("sparse_mask")
 def sparse_mask(a, mask_indices, name=None):
   """Masks elements of `IndexedSlices`.
 
@@ -1226,7 +1227,7 @@ def sparse_mask(a, mask_indices, name=None):
   # `b` will be the subset of `a` slices at its second and third indices, so
   # we want to mask its first and last indices (which are at absolute
   # indices 12, 45)
-  b = tf.sparse_mask(a, [12, 45])
+  b = tf.sparse.mask(a, [12, 45])
 
   b.indices  # [26, 37]
   tf.shape(b.values)  # [2, 10]
@@ -1382,7 +1383,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
                     [10, 11, 12]]])
 
   # Take the transpose of the matrices in dimension-0
-  # (this common operation has a shorthand `matrix_transpose`)
+  # (this common operation has a shorthand `linalg.transpose`)
   tf.transpose(x, perm=[0, 2, 1])  # [[[1,  4],
                                    #   [2,  5],
                                    #   [3,  6]],
@@ -1421,7 +1422,8 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
 
 
 # pylint: disable=invalid-name
-@tf_export("matrix_transpose", "linalg.transpose")
+@tf_export("linalg.transpose", "matrix_transpose")
+@deprecation.deprecated_endpoints("matrix_transpose")
 def matrix_transpose(a, name="matrix_transpose", conjugate=False):
   """Transposes last two dimensions of tensor `a`.
 
@@ -1429,19 +1431,19 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
 
   ```python
   x = tf.constant([[1, 2, 3], [4, 5, 6]])
-  tf.matrix_transpose(x)  # [[1, 4],
+  tf.linalg.transpose(x)  # [[1, 4],
                           #  [2, 5],
                           #  [3, 6]]
 
   x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
                    [4 + 4j, 5 + 5j, 6 + 6j]])
-  tf.matrix_transpose(x, conjugate=True)  # [[1 - 1j, 4 - 4j],
+  tf.linalg.transpose(x, conjugate=True)  # [[1 - 1j, 4 - 4j],
                                           #  [2 - 2j, 5 - 5j],
                                           #  [3 - 3j, 6 - 6j]]
 
   # Matrix with two batch dimensions.
   # x.shape is [1, 2, 3, 4]
-  # tf.matrix_transpose(x) is shape [1, 2, 4, 3]
+  # tf.linalg.transpose(x) is shape [1, 2, 4, 3]
   ```
 
   Note that `tf.matmul` provides kwargs allowing for transpose of arguments.
@@ -1452,14 +1454,14 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
   tf.matmul(matrix, b, transpose_b=True)
 
   # Inefficient!
-  tf.matmul(matrix, tf.matrix_transpose(b))
+  tf.matmul(matrix, tf.linalg.transpose(b))
   ```
 
   @compatibility(numpy)
   In `numpy` transposes are memory-efficient constant time operations as they
   simply return a new view of the same data with adjusted `strides`.
 
-  TensorFlow does not support strides, `matrix_transposes` return a new tensor
+  TensorFlow does not support strides, `linalg.transposes` return a new tensor
   with the items permuted.
   @end_compatibility
 
@@ -1467,7 +1469,7 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
     a: A `Tensor` with `rank >= 2`.
     name: A name for the operation (optional).
     conjugate: Optional bool. Setting it to `True` is mathematically equivalent
-      to tf.conj(tf.matrix_transpose(input)).
+      to tf.conj(tf.linalg.transpose(input)).
 
   Returns:
     A transposed batch matrix `Tensor`.
@@ -1756,7 +1758,8 @@ def _normalize_sparse_shape(shape, name):
   return (ops.convert_to_tensor(shape, dtype=dtypes.int64, name=name), rank)
 
 
-@tf_export("sparse_placeholder")
+@tf_export("sparse.placeholder", "sparse_placeholder")
+@deprecation.deprecated_endpoints("sparse_placeholder")
 def sparse_placeholder(dtype, shape=None, name=None):
   """Inserts a placeholder for a sparse tensor that will be always fed.
 
@@ -1767,8 +1770,8 @@ def sparse_placeholder(dtype, shape=None, name=None):
   For example:
 
   ```python
-  x = tf.sparse_placeholder(tf.float32)
-  y = tf.sparse_reduce_sum(x)
+  x = tf.sparse.placeholder(tf.float32)
+  y = tf.sparse.reduce_sum(x)
 
   with tf.Session() as sess:
     print(sess.run(y))  # ERROR: will fail because x was not fed.
@@ -2250,7 +2253,8 @@ def required_space_to_batch_paddings(input_shape,
     return result_paddings, result_crops
 
 
-@tf_export("space_to_batch")
+@tf_export("nn.space_to_batch", "space_to_batch")
+@deprecation.deprecated_endpoints("space_to_batch")
 def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=redefined-builtin
   result = space_to_batch_nd(
       input,
@@ -2264,7 +2268,8 @@ def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=r
 space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__
 
 
-@tf_export("space_to_depth")
+@tf_export("nn.space_to_depth", "space_to_depth")
+@deprecation.deprecated_endpoints("space_to_depth")
 def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.space_to_depth(input, block_size, data_format, name=name)
 
@@ -2272,7 +2277,8 @@ def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint:
 space_to_depth.__doc__ = gen_array_ops.space_to_depth.__doc__
 
 
-@tf_export("depth_to_space")
+@tf_export("nn.depth_to_space", "depth_to_space")
+@deprecation.deprecated_endpoints("depth_to_space")
 def depth_to_space(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.depth_to_space(input, block_size, data_format, name=name)
 
@@ -2747,7 +2753,8 @@ def batch_gather(params, indices, name=None):
 @tf_export("quantize_v2")
 @deprecation.deprecated(
     "2017-10-25",
-    "`tf.quantize_v2` is deprecated, please use `tf.quantize` instead.")
+    "`tf.quantize_v2` is deprecated, please use `tf.quantization.quantize` "
+    "instead.")  # pylint: disable=missing-docstring
 def quantize_v2(input,  # pylint: disable=redefined-builtin
                 min_range,
                 max_range,
@@ -2769,7 +2776,8 @@ quantize_v2.__doc__ = """Please use `tf.quantize` instead."""
 
 # We want to expose tf.quantize instead of tf.quantize_v2; we can deprecate
 # tf.quantize_v2 in next version of TensorFlow.
-@tf_export("quantize")
+@tf_export("quantization.quantize", "quantize")
+@deprecation.deprecated_endpoints("quantize")
 def quantize(input,  # pylint: disable=redefined-builtin
              min_range,
              max_range,
diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py
index 9ea1ea9c92..98dde995c9 100644
--- a/tensorflow/python/ops/candidate_sampling_ops.py
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@@ -23,10 +23,12 @@ from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops  # pylint: disable=unused-import
 from tensorflow.python.ops import gen_candidate_sampling_ops
 from tensorflow.python.ops import math_ops  # pylint: disable=unused-import
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('nn.uniform_candidate_sampler')
+@tf_export('random.uniform_candidate_sampler', 'nn.uniform_candidate_sampler')
+@deprecation.deprecated_endpoints('nn.uniform_candidate_sampler')
 def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                               range_max, seed=None, name=None):
   """Samples a set of classes using a uniform base distribution.
@@ -82,7 +84,9 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
       seed2=seed2, name=name)
 
 
-@tf_export('nn.log_uniform_candidate_sampler')
+@tf_export('random.log_uniform_candidate_sampler',
+           'nn.log_uniform_candidate_sampler')
+@deprecation.deprecated_endpoints('nn.log_uniform_candidate_sampler')
 def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                                   range_max, seed=None, name=None):
   """Samples a set of classes using a log-uniform (Zipfian) base distribution.
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index c3cf6e61f2..d607f1d9fb 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 NUMERIC_TYPES = frozenset(
@@ -91,7 +92,8 @@ def _shape_and_dtype_str(tensor):
   return 'shape=%s dtype=%s' % (tensor.shape, tensor.dtype.name)
 
 
-@tf_export('assert_proper_iterable')
+@tf_export('debugging.assert_proper_iterable', 'assert_proper_iterable')
+@deprecation.deprecated_endpoints('assert_proper_iterable')
 def assert_proper_iterable(values):
   """Static assert that values is a "proper" iterable.
 
@@ -119,7 +121,8 @@ def assert_proper_iterable(values):
         'Expected argument "values" to be iterable.  Found: %s' % type(values))
 
 
-@tf_export('assert_negative')
+@tf_export('debugging.assert_negative', 'assert_negative')
+@deprecation.deprecated_endpoints('assert_negative')
 def assert_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < 0` holds element-wise.
 
@@ -160,7 +163,8 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less(x, zero, data=data, summarize=summarize)
 
 
-@tf_export('assert_positive')
+@tf_export('debugging.assert_positive', 'assert_positive')
+@deprecation.deprecated_endpoints('assert_positive')
 def assert_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x > 0` holds element-wise.
 
@@ -200,7 +204,8 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None):
     return assert_less(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('assert_non_negative')
+@tf_export('debugging.assert_non_negative', 'assert_non_negative')
+@deprecation.deprecated_endpoints('assert_non_negative')
 def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x >= 0` holds element-wise.
 
@@ -242,7 +247,8 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less_equal(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('assert_non_positive')
+@tf_export('debugging.assert_non_positive', 'assert_non_positive')
+@deprecation.deprecated_endpoints('assert_non_positive')
 def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= 0` holds element-wise.
 
@@ -284,7 +290,7 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
     return assert_less_equal(x, zero, data=data, summarize=summarize)
 
 
-@tf_export('assert_equal')
+@tf_export('debugging.assert_equal', 'assert_equal')
 def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x == y` holds element-wise.
 
@@ -384,7 +390,8 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_none_equal')
+@tf_export('debugging.assert_none_equal', 'assert_none_equal')
+@deprecation.deprecated_endpoints('assert_none_equal')
 def assert_none_equal(
     x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x != y` holds for all elements.
@@ -435,7 +442,8 @@ def assert_none_equal(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_near')
+@tf_export('debugging.assert_near', 'assert_near')
+@deprecation.deprecated_endpoints('assert_near')
 def assert_near(
     x, y, rtol=None, atol=None, data=None, summarize=None, message=None,
     name=None):
@@ -513,7 +521,7 @@ def assert_near(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_less')
+@tf_export('debugging.assert_less', 'assert_less')
 def assert_less(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < y` holds element-wise.
 
@@ -561,7 +569,8 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_less_equal')
+@tf_export('debugging.assert_less_equal', 'assert_less_equal')
+@deprecation.deprecated_endpoints('assert_less_equal')
 def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= y` holds element-wise.
 
@@ -609,7 +618,7 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_greater')
+@tf_export('debugging.assert_greater', 'assert_greater')
 def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x > y` holds element-wise.
 
@@ -657,7 +666,8 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_greater_equal')
+@tf_export('debugging.assert_greater_equal', 'assert_greater_equal')
+@deprecation.deprecated_endpoints('assert_greater_equal')
 def assert_greater_equal(x, y, data=None, summarize=None, message=None,
                          name=None):
   """Assert the condition `x >= y` holds element-wise.
@@ -755,7 +765,7 @@ def _assert_rank_condition(
   return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_rank')
+@tf_export('debugging.assert_rank', 'assert_rank')
 def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank equal to `rank`.
 
@@ -817,7 +827,8 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
   return assert_op
 
 
-@tf_export('assert_rank_at_least')
+@tf_export('debugging.assert_rank_at_least', 'assert_rank_at_least')
+@deprecation.deprecated_endpoints('assert_rank_at_least')
 def assert_rank_at_least(
     x, rank, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank equal to `rank` or higher.
@@ -948,7 +959,8 @@ def _assert_ranks_condition(
   return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('assert_rank_in')
+@tf_export('debugging.assert_rank_in', 'assert_rank_in')
+@deprecation.deprecated_endpoints('assert_rank_in')
 def assert_rank_in(
     x, ranks, data=None, summarize=None, message=None, name=None):
   """Assert `x` has rank in `ranks`.
@@ -1010,7 +1022,8 @@ def assert_rank_in(
   return assert_op
 
 
-@tf_export('assert_integer')
+@tf_export('debugging.assert_integer', 'assert_integer')
+@deprecation.deprecated_endpoints('assert_integer')
 def assert_integer(x, message=None, name=None):
   """Assert that `x` is of integer dtype.
 
@@ -1048,7 +1061,8 @@ def assert_integer(x, message=None, name=None):
     return control_flow_ops.no_op('statically_determined_was_integer')
 
 
-@tf_export('assert_type')
+@tf_export('debugging.assert_type', 'assert_type')
+@deprecation.deprecated_endpoints('assert_type')
 def assert_type(tensor, tf_type, message=None, name=None):
   """Statically asserts that the given `Tensor` is of the specified type.
 
@@ -1095,12 +1109,14 @@ def _get_diff_for_monotonic_comparison(x):
   return control_flow_ops.cond(is_shorter_than_two, short_result, diff)
 
 
-@tf_export('is_numeric_tensor')
+@tf_export('debugging.is_numeric_tensor', 'is_numeric_tensor')
+@deprecation.deprecated_endpoints('is_numeric_tensor')
 def is_numeric_tensor(tensor):
   return isinstance(tensor, ops.Tensor) and tensor.dtype in NUMERIC_TYPES
 
 
-@tf_export('is_non_decreasing')
+@tf_export('debugging.is_non_decreasing', 'is_non_decreasing')
+@deprecation.deprecated_endpoints('is_non_decreasing')
 def is_non_decreasing(x, name=None):
   """Returns `True` if `x` is non-decreasing.
 
@@ -1127,7 +1143,8 @@ def is_non_decreasing(x, name=None):
     return math_ops.reduce_all(math_ops.less_equal(zero, diff))
 
 
-@tf_export('is_strictly_increasing')
+@tf_export('debugging.is_strictly_increasing', 'is_strictly_increasing')
+@deprecation.deprecated_endpoints('is_strictly_increasing')
 def is_strictly_increasing(x, name=None):
   """Returns `True` if `x` is strictly increasing.
 
@@ -1202,7 +1219,8 @@ def _assert_same_base_type(items, expected_type=None):
     return expected_type
 
 
-@tf_export('assert_same_float_dtype')
+@tf_export('debugging.assert_same_float_dtype', 'assert_same_float_dtype')
+@deprecation.deprecated_endpoints('assert_same_float_dtype')
 def assert_same_float_dtype(tensors=None, dtype=None):
   """Validate and return float type based on `tensors` and `dtype`.
 
@@ -1231,7 +1249,8 @@ def assert_same_float_dtype(tensors=None, dtype=None):
   return dtype
 
 
-@tf_export('assert_scalar')
+@tf_export('debugging.assert_scalar', 'assert_scalar')
+@deprecation.deprecated_endpoints('assert_scalar')
 def assert_scalar(tensor, name=None):
   with ops.name_scope(name, 'assert_scalar', [tensor]) as name_scope:
     tensor = ops.convert_to_tensor(tensor, name=name_scope)
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 29468431b3..45516068f4 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import numerics
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -76,8 +77,8 @@ def clip_by_value(t, clip_value_min, clip_value_max,
 
   return t_max
   # TODO(scottzhu): switch to use new implmentation in 2 weeks.
-    # return gen_math_ops.clip_by_value(
-    #     t, clip_value_min, clip_value_max, name=name)
+  # return gen_math_ops.clip_by_value(
+  #     t, clip_value_min, clip_value_max, name=name)
 
 
 # TODO(scottzhu): switch to use new implmentation in 2 weeks.
@@ -159,7 +160,8 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
   return tclip
 
 
-@tf_export("global_norm")
+@tf_export("linalg.global_norm", "global_norm")
+@deprecation.deprecated_endpoints("global_norm")
 def global_norm(t_list, name=None):
   """Computes the global norm of multiple tensors.
 
diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index c09154129f..8259142456 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -26,6 +26,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -89,7 +90,8 @@ def remove_squeezable_dimensions(
     return labels, predictions
 
 
-@tf_export('confusion_matrix')
+@tf_export('train.confusion_matrix', 'confusion_matrix')
+@deprecation.deprecated_endpoints('confusion_matrix')
 def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32,
                      name=None, weights=None):
   """Computes the confusion matrix from predictions and labels.
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 9d7d31df22..8ad71fe00c 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -106,7 +106,7 @@ def _summarize_eager(tensor, summarize=None):
 
 # Assert and Print are special symbols in python, so we must
 # use an upper-case version of them.
-@tf_export("Assert")
+@tf_export("debugging.Assert", "Assert")
 @tf_should_use.should_use_result
 def Assert(condition, data, summarize=None, name=None):
   """Asserts that the given condition is true.
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 69c0fcbbee..97b6f3bd9c 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -39,6 +39,7 @@ from tensorflow.python.ops import resource_variable_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_data_flow_ops import *
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 # pylint: enable=wildcard-import
@@ -112,7 +113,8 @@ def _shape_common(s1, s2):
 
 
 # pylint: disable=protected-access
-@tf_export("QueueBase")
+@tf_export("io.QueueBase", "QueueBase")
+@deprecation.deprecated_endpoints("QueueBase")
 class QueueBase(object):
   """Base class for queue implementations.
 
@@ -604,7 +606,8 @@ def _shared_name(shared_name):
   return shared_name
 
 
-@tf_export("RandomShuffleQueue")
+@tf_export("io.RandomShuffleQueue", "RandomShuffleQueue")
+@deprecation.deprecated_endpoints("RandomShuffleQueue")
 class RandomShuffleQueue(QueueBase):
   """A queue implementation that dequeues elements in a random order.
 
@@ -746,7 +749,8 @@ class FIFOQueue(QueueBase):
     super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("PaddingFIFOQueue")
+@tf_export("io.PaddingFIFOQueue", "PaddingFIFOQueue")
+@deprecation.deprecated_endpoints("PaddingFIFOQueue")
 class PaddingFIFOQueue(QueueBase):
   """A FIFOQueue that supports batching variable-sized tensors by padding.
 
@@ -820,7 +824,8 @@ class PaddingFIFOQueue(QueueBase):
     super(PaddingFIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("PriorityQueue")
+@tf_export("io.PriorityQueue", "PriorityQueue")
+@deprecation.deprecated_endpoints("PriorityQueue")
 class PriorityQueue(QueueBase):
   """A queue implementation that dequeues elements in prioritized order.
 
@@ -1300,7 +1305,9 @@ class ConditionalAccumulator(ConditionalAccumulatorBase):
     return out
 
 
-@tf_export("SparseConditionalAccumulator")
+@tf_export("sparse.SparseConditionalAccumulator",
+           "SparseConditionalAccumulator")
+@deprecation.deprecated_endpoints("SparseConditionalAccumulator")
 class SparseConditionalAccumulator(ConditionalAccumulatorBase):
   """A conditional accumulator for aggregating sparse gradients.
 
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index fff3d9b930..65bb77b474 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -43,6 +43,7 @@ from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.deprecation import  deprecated_arg_values
 from tensorflow.python.util.tf_export import tf_export
@@ -341,6 +342,7 @@ class TruncatedNormal(Initializer):
 
 @tf_export("initializers.uniform_unit_scaling",
            "uniform_unit_scaling_initializer")
+@deprecation.deprecated_endpoints("uniform_unit_scaling_initializer")
 class UniformUnitScaling(Initializer):
   """Initializer that generates tensors without scaling variance.
 
@@ -401,6 +403,7 @@ class UniformUnitScaling(Initializer):
 
 @tf_export("keras.initializers.VarianceScaling",
            "initializers.variance_scaling", "variance_scaling_initializer")
+@deprecation.deprecated_endpoints("variance_scaling_initializer")
 class VarianceScaling(Initializer):
   """Initializer capable of adapting its scale to the shape of weights tensors.
 
@@ -494,6 +497,7 @@ class VarianceScaling(Initializer):
 
 @tf_export("keras.initializers.Orthogonal", "initializers.orthogonal",
            "orthogonal_initializer", "keras.initializers.orthogonal")
+@deprecation.deprecated_endpoints("orthogonal_initializer")
 class Orthogonal(Initializer):
   """Initializer that generates an orthogonal matrix.
 
@@ -1149,6 +1153,7 @@ class GlorotUniform(VarianceScaling):
 
 @tf_export("glorot_normal_initializer", "keras.initializers.glorot_normal",
            "initializers.glorot_normal")
+@deprecation.deprecated_endpoints("glorot_normal_initializer")
 class GlorotNormal(VarianceScaling):
   """The Glorot normal initializer, also called Xavier normal initializer.
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index f4a93560be..bf4354fa73 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -80,6 +80,7 @@ def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind):
 
 
 @tf_export('cholesky_solve', 'linalg.cholesky_solve')
+@deprecation.deprecated_endpoints('cholesky_solve')
 def cholesky_solve(chol, rhs, name=None):
   """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations.
 
@@ -167,7 +168,8 @@ def eye(num_rows,
                              name=name)
 
 
-@tf_export('matrix_solve_ls', 'linalg.lstsq')
+@tf_export('linalg.lstsq', 'matrix_solve_ls')
+@deprecation.deprecated_endpoints('matrix_solve_ls')
 def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
   r"""Solves one or more linear least-squares problems.
 
@@ -220,7 +222,7 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
       squares sense.
 
   Raises:
-    NotImplementedError: matrix_solve_ls is currently disabled for complex128
+    NotImplementedError: linalg.lstsq is currently disabled for complex128
     and l2_regularizer != 0 due to poor accuracy.
   """
 
@@ -303,7 +305,8 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
         matrix, rhs, l2_regularizer, fast=fast, name=name)
 
 
-@tf_export('self_adjoint_eig', 'linalg.eigh')
+@tf_export('linalg.eigh', 'self_adjoint_eig')
+@deprecation.deprecated_endpoints('self_adjoint_eig')
 def self_adjoint_eig(tensor, name=None):
   """Computes the eigen decomposition of a batch of self-adjoint matrices.
 
@@ -325,12 +328,13 @@ def self_adjoint_eig(tensor, name=None):
   return e, v
 
 
-@tf_export('self_adjoint_eigvals', 'linalg.eigvalsh')
+@tf_export('linalg.eigvalsh', 'self_adjoint_eigvals')
+@deprecation.deprecated_endpoints('self_adjoint_eigvals')
 def self_adjoint_eigvals(tensor, name=None):
   """Computes the eigenvalues of one or more self-adjoint matrices.
 
   Note: If your program backpropagates through this function, you should replace
-  it with a call to tf.self_adjoint_eig (possibly ignoring the second output) to
+  it with a call to tf.linalg.eigvalsh (possibly ignoring the second output) to
   avoid computing the eigen decomposition twice. This is because the
   eigenvectors are used to compute the gradient w.r.t. the eigenvalues. See
   _SelfAdjointEigV2Grad in linalg_grad.py.
@@ -348,6 +352,7 @@ def self_adjoint_eigvals(tensor, name=None):
 
 
 @tf_export('svd', 'linalg.svd')
+@deprecation.deprecated_endpoints('svd')
 def svd(tensor, full_matrices=False, compute_uv=True, name=None):
   r"""Computes the singular value decompositions of one or more matrices.
 
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 5443699ddd..cffaa983d4 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -59,7 +59,7 @@ def initialize_all_tables(name="init_all_tables"):
   return tables_initializer(name)
 
 
-@tf_export("tables_initializer")
+@tf_export("initializers.tables_initializer", "tables_initializer")
 def tables_initializer(name="init_all_tables"):
   """Returns an Op that initializes all tables of the default graph.
 
diff --git a/tensorflow/python/ops/manip_ops.py b/tensorflow/python/ops/manip_ops.py
index 6633565a64..d9d0728287 100644
--- a/tensorflow/python/ops/manip_ops.py
+++ b/tensorflow/python/ops/manip_ops.py
@@ -19,11 +19,13 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.ops import gen_manip_ops as _gen_manip_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # pylint: disable=protected-access
-@tf_export('manip.roll')
+@tf_export('roll', 'manip.roll')
+@deprecation.deprecated_endpoints('manip.roll')
 def roll(input, shift, axis):  # pylint: disable=redefined-builtin
   return _gen_manip_ops.roll(input, shift, axis)
 
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f57abf6704..83b8b5a3a4 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -70,7 +70,7 @@ def _set_doc(doc):
 
 
 # pylint: disable=redefined-builtin
-@tf_export("argmax")
+@tf_export("math.argmax", "argmax")
 @deprecation.deprecated_args(None, "Use the `axis` argument instead",
                              "dimension")
 @_set_doc(
@@ -88,7 +88,7 @@ def argmax(input,
   return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type)
 
 
-@tf_export("argmin")
+@tf_export("math.argmin", "argmin")
 @deprecation.deprecated_args(None, "Use the `axis` argument instead",
                              "dimension")
 @_set_doc(
@@ -111,7 +111,7 @@ def argmin(input,
 
 # pylint: disable=anomalous-backslash-in-string,protected-access
 # pylint: disable=g-docstring-has-escape
-@tf_export("abs")
+@tf_export("math.abs", "abs")
 def abs(x, name=None):  # pylint: disable=redefined-builtin
   r"""Computes the absolute value of a tensor.
 
@@ -186,7 +186,7 @@ class DivideDelegateWithName(object):
     return _div_python2(self.x, y, self.name)
 
 
-@tf_export("divide")
+@tf_export("math.divide", "divide")
 def divide(x, y, name=None):
   """Computes Python style division of `x` by `y`."""
 
@@ -198,7 +198,7 @@ def divide(x, y, name=None):
     return x / y
 
 
-@tf_export("multiply")
+@tf_export("math.multiply", "multiply")
 def multiply(x, y, name=None):
   return gen_math_ops.mul(x, y, name)
 
@@ -218,7 +218,7 @@ _mul.__doc__ = (
     gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__))
 
 
-@tf_export("subtract")
+@tf_export("math.subtract", "subtract")
 def subtract(x, y, name=None):
   return gen_math_ops.sub(x, y, name)
 
@@ -239,7 +239,7 @@ _sub.__doc__ = (
 
 
 # pylint: disable=g-docstring-has-escape
-@tf_export("negative")
+@tf_export("math.negative", "negative")
 def negative(x, name=None):
   """Computes numerical negative value element-wise.
 
@@ -288,7 +288,7 @@ def _neg(x, name=None):
 # pylint: enable=g-docstring-has-escape
 
 
-@tf_export("sign")
+@tf_export("math.sign", "sign")
 def sign(x, name=None):
   """Returns an element-wise indication of the sign of a number.
 
@@ -319,7 +319,7 @@ def sign(x, name=None):
       return gen_math_ops.sign(x, name=name)
 
 
-@tf_export("square")
+@tf_export("math.square", "square")
 def square(x, name=None):
   r"""Computes square of x element-wise.
 
@@ -342,7 +342,7 @@ def square(x, name=None):
       return gen_math_ops.square(x, name=name)
 
 
-@tf_export("sqrt")
+@tf_export("math.sqrt", "sqrt")
 def sqrt(x, name=None):
   r"""Computes square root of x element-wise.
 
@@ -365,7 +365,8 @@ def sqrt(x, name=None):
       return gen_math_ops.sqrt(x, name=name)
 
 
-@tf_export("erf")
+@tf_export("math.erf", "erf")
+@deprecation.deprecated_endpoints("erf")
 def erf(x, name=None):
   """Computes the Gauss error function of `x` element-wise.
 
@@ -386,7 +387,7 @@ def erf(x, name=None):
       return gen_math_ops.erf(x, name=name)
 
 
-@tf_export("scalar_mul")
+@tf_export("math.scalar_mul", "scalar_mul")
 def scalar_mul(scalar, x):
   """Multiplies a scalar times a `Tensor` or `IndexedSlices` object.
 
@@ -416,7 +417,7 @@ def scalar_mul(scalar, x):
     raise ValueError("Only scalar multiply works, got shape %s" % shape)
 
 
-@tf_export("pow")
+@tf_export("math.pow", "pow")
 def pow(x, y, name=None):  # pylint: disable=redefined-builtin
   r"""Computes the power of one value to another.
 
@@ -444,7 +445,7 @@ def pow(x, y, name=None):  # pylint: disable=redefined-builtin
 
 
 # pylint: disable=redefined-builtin,redefined-outer-name
-@tf_export("complex")
+@tf_export("dtypes.complex", "complex")
 def complex(real, imag, name=None):
   r"""Converts two real numbers to a complex number.
 
@@ -486,7 +487,8 @@ def complex(real, imag, name=None):
     return gen_math_ops._complex(real, imag, Tout=Tout, name=name)
 
 
-@tf_export("real")
+@tf_export("math.real", "real")
+@deprecation.deprecated_endpoints("real")
 def real(input, name=None):
   r"""Returns the real part of a complex (or real) tensor.
 
@@ -517,7 +519,8 @@ def real(input, name=None):
       return input
 
 
-@tf_export("imag")
+@tf_export("math.imag", "imag")
+@deprecation.deprecated_endpoints("imag")
 def imag(input, name=None):
   r"""Returns the imaginary part of a complex (or real) tensor.
 
@@ -547,7 +550,8 @@ def imag(input, name=None):
       return array_ops.zeros_like(input)
 
 
-@tf_export("angle")
+@tf_export("math.angle", "angle")
+@deprecation.deprecated_endpoints("angle")
 def angle(input, name=None):
   r"""Returns the element-wise argument of a complex (or real) tensor.
 
@@ -586,7 +590,7 @@ def angle(input, name=None):
 # pylint: enable=redefined-outer-name,redefined-builtin
 
 
-@tf_export("round")
+@tf_export("math.round", "round")
 def round(x, name=None):  # pylint: disable=redefined-builtin
   """Rounds the values of a tensor to the nearest integer, element-wise.
 
@@ -613,7 +617,7 @@ def round(x, name=None):  # pylint: disable=redefined-builtin
     return gen_math_ops.round(x, name=name)
 
 
-@tf_export("cast")
+@tf_export("dtypes.cast", "cast")
 def cast(x, dtype, name=None):
   """Casts a tensor to a new type.
 
@@ -676,7 +680,7 @@ def cast(x, dtype, name=None):
     return x
 
 
-@tf_export("saturate_cast")
+@tf_export("dtypes.saturate_cast", "saturate_cast")
 def saturate_cast(value, dtype, name=None):
   """Performs a safe saturating cast of `value` to `dtype`.
 
@@ -995,7 +999,7 @@ def _div_python2(x, y, name=None):
       return gen_math_ops.floor_div(x, y, name=name)
 
 
-@tf_export("truediv")
+@tf_export("math.truediv", "truediv")
 def truediv(x, y, name=None):
   """Divides x / y elementwise (using Python 3 division operator semantics).
 
@@ -1006,7 +1010,7 @@ def truediv(x, y, name=None):
   arguments are cast to floating types first.   This op is generated by normal
   `x / y` division in Python 3 and in Python 2.7 with
   `from __future__ import division`.  If you want integer division that rounds
-  down, use `x // y` or `tf.floordiv`.
+  down, use `x // y` or `tf.math.floordiv`.
 
   `x` and `y` must have the same numeric type.  If the inputs are floating
   point, the output will have the same type.  If the inputs are integral, the
@@ -1078,7 +1082,8 @@ mod = gen_math_ops.floor_mod
 
 # TODO(aselle): Deprecate this once all internal functionality uses
 # tf.truncatediv
-@tf_export("floordiv")
+@tf_export("math.floordiv", "floordiv")
+@deprecation.deprecated_endpoints("floordiv")
 def floordiv(x, y, name=None):
   """Divides `x / y` elementwise, rounding toward the most negative integer.
 
@@ -1151,7 +1156,8 @@ _OverrideBinaryOperatorHelper(gen_math_ops.floor_mod, "mod")
 _OverrideBinaryOperatorHelper(pow, "pow")
 
 
-@tf_export("logical_xor")
+@tf_export("math.logical_xor", "logical_xor")
+@deprecation.deprecated_endpoints("logical_xor")
 def logical_xor(x, y, name="LogicalXor"):
   """x ^ y = (x | y) & ~(x & y)."""
   # TODO(alemi) Make this a cwise op if people end up relying on it.
@@ -1277,7 +1283,7 @@ def _may_reduce_to_scalar(keepdims, axis, reduction_indices, output):
   return output
 
 
-@tf_export("reduce_sum")
+@tf_export("math.reduce_sum", "reduce_sum")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_sum(input_tensor,
@@ -1339,7 +1345,7 @@ def reduce_sum(input_tensor,
                                    name=name))
 
 
-@tf_export("count_nonzero")
+@tf_export("math.count_nonzero", "count_nonzero")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def count_nonzero(input_tensor,
@@ -1417,7 +1423,7 @@ def count_nonzero(input_tensor,
         dtype=dtype)
 
 
-@tf_export("reduce_mean")
+@tf_export("math.reduce_mean", "reduce_mean")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_mean(input_tensor,
@@ -1489,7 +1495,7 @@ def reduce_mean(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_prod")
+@tf_export("math.reduce_prod", "reduce_prod")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_prod(input_tensor,
@@ -1539,7 +1545,7 @@ def reduce_prod(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_min")
+@tf_export("math.reduce_min", "reduce_min")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_min(input_tensor,
@@ -1588,7 +1594,7 @@ def reduce_min(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_max")
+@tf_export("math.reduce_max", "reduce_max")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_max(input_tensor,
@@ -1637,7 +1643,7 @@ def reduce_max(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_all")
+@tf_export("math.reduce_all", "reduce_all")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_all(input_tensor,
@@ -1695,7 +1701,7 @@ def reduce_all(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_any")
+@tf_export("math.reduce_any", "reduce_any")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_any(input_tensor,
@@ -1753,7 +1759,7 @@ def reduce_any(input_tensor,
                                    name=name))
 
 
-@tf_export("reduce_logsumexp")
+@tf_export("math.reduce_logsumexp", "reduce_logsumexp")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def reduce_logsumexp(input_tensor,
@@ -1827,7 +1833,8 @@ def reduce_logsumexp(input_tensor,
     return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
 
 
-@tf_export("trace", "linalg.trace")
+@tf_export("linalg.trace", "trace")
+@deprecation.deprecated_endpoints("trace")
 def trace(x, name=None):
   """Compute the trace of a tensor `x`.
 
@@ -1841,12 +1848,12 @@ def trace(x, name=None):
 
   ```python
   x = tf.constant([[1, 2], [3, 4]])
-  tf.trace(x)  # 5
+  tf.linalg.trace(x)  # 5
 
   x = tf.constant([[1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9]])
-  tf.trace(x)  # 15
+  tf.linalg.trace(x)  # 15
 
   x = tf.constant([[[1, 2, 3],
                     [4, 5, 6],
@@ -1854,7 +1861,7 @@ def trace(x, name=None):
                    [[-1, -2, -3],
                     [-4, -5, -6],
                     [-7, -8, -9]]])
-  tf.trace(x)  # [15, -15]
+  tf.linalg.trace(x)  # [15, -15]
   ```
 
   Args:
@@ -1869,7 +1876,7 @@ def trace(x, name=None):
     return reduce_sum(array_ops.matrix_diag_part(x), [-1], name=name)
 
 
-@tf_export("matmul")
+@tf_export("linalg.matmul", "matmul")
 def matmul(a,
            b,
            transpose_a=False,
@@ -2131,7 +2138,7 @@ def _as_indexed_slices_list(inputs, optimize=True):
   return casted_outputs
 
 
-@tf_export("add_n")
+@tf_export("math.add_n", "add_n")
 def add_n(inputs, name=None):
   """Adds all input tensors element-wise.
 
@@ -2166,14 +2173,15 @@ def add_n(inputs, name=None):
   return gen_math_ops.add_n(inputs, name=name)
 
 
-@tf_export("accumulate_n")
+@tf_export("math.accumulate_n", "accumulate_n")
+@deprecation.deprecated_endpoints("accumulate_n")
 def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   """Returns the element-wise sum of a list of tensors.
 
   Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
   otherwise, these are inferred.
 
-  `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not
+  `tf.math.accumulate_n` performs the same operation as `tf.add_n`, but does not
   wait for all of its inputs to be ready before beginning to sum. This can
   save memory if inputs are ready at different times, since minimum temporary
   storage is proportional to the output size rather than the inputs size.
@@ -2185,10 +2193,10 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   ```python
   a = tf.constant([[1, 2], [3, 4]])
   b = tf.constant([[5, 0], [0, 6]])
-  tf.accumulate_n([a, b, a])  # [[7, 4], [6, 14]]
+  tf.math.accumulate_n([a, b, a])  # [[7, 4], [6, 14]]
 
   # Explicitly pass shape and type
-  tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
+  tf.math.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32)
                                                                  # [[7,  4],
                                                                  #  [6, 14]]
   ```
@@ -2252,7 +2260,7 @@ def _accumulate_n_grad(op, grad):
   return [grad] * len(op.inputs)
 
 
-@tf_export("nn.sigmoid", "sigmoid")
+@tf_export("math.sigmoid", "nn.sigmoid", "sigmoid")
 def sigmoid(x, name=None):
   """Computes sigmoid of `x` element-wise.
 
@@ -2275,7 +2283,8 @@ def sigmoid(x, name=None):
     return gen_math_ops.sigmoid(x, name=name)
 
 
-@tf_export("log_sigmoid")
+@tf_export("math.log_sigmoid", "log_sigmoid")
+@deprecation.deprecated_endpoints("log_sigmoid")
 def log_sigmoid(x, name=None):
   """Computes log sigmoid of `x` element-wise.
 
@@ -2294,7 +2303,7 @@ def log_sigmoid(x, name=None):
     return gen_math_ops.neg(gen_nn_ops.softplus(-x), name=name)
 
 
-@tf_export("nn.tanh", "tanh")
+@tf_export("math.tanh", "nn.tanh", "tanh")
 def tanh(x, name=None):
   """Computes hyperbolic tangent of `x` element-wise.
 
@@ -2315,7 +2324,8 @@ def tanh(x, name=None):
       return gen_math_ops.tanh(x, name=name)
 
 
-@tf_export("bincount")
+@tf_export("math.bincount", "bincount")
+@deprecation.deprecated_endpoints("bincount")
 def bincount(arr,
              weights=None,
              minlength=None,
@@ -2362,7 +2372,7 @@ def bincount(arr,
   return gen_math_ops.bincount(arr, output_size, weights)
 
 
-@tf_export("cumsum")
+@tf_export("math.cumsum", "cumsum")
 def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
   """Compute the cumulative sum of the tensor `x` along `axis`.
 
@@ -2414,7 +2424,8 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("cumprod")
+@tf_export("math.cumprod", "cumprod")
+@deprecation.deprecated_endpoints("cumprod")
 def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   """Compute the cumulative product of the tensor `x` along `axis`.
 
@@ -2422,7 +2433,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   first element of the input is identical to the first element of the output:
 
   ```python
-  tf.cumprod([a, b, c])  # [a, a * b, a * b * c]
+  tf.math.cumprod([a, b, c])  # [a, a * b, a * b * c]
   ```
 
   By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
@@ -2430,21 +2441,21 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   instead:
 
   ```python
-  tf.cumprod([a, b, c], exclusive=True)  # [1, a, a * b]
+  tf.math.cumprod([a, b, c], exclusive=True)  # [1, a, a * b]
   ```
 
   By setting the `reverse` kwarg to `True`, the cumprod is performed in the
   opposite direction:
 
   ```python
-  tf.cumprod([a, b, c], reverse=True)  # [a * b * c, b * c, c]
+  tf.math.cumprod([a, b, c], reverse=True)  # [a * b * c, b * c, c]
   ```
 
   This is more efficient than using separate `tf.reverse` ops.
   The `reverse` and `exclusive` kwargs can also be combined:
 
   ```python
-  tf.cumprod([a, b, c], exclusive=True, reverse=True)  # [b * c, c, 1]
+  tf.math.cumprod([a, b, c], exclusive=True, reverse=True)  # [b * c, c, 1]
   ```
 
   Args:
@@ -2466,7 +2477,8 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("conj")
+@tf_export("math.conj", "conj")
+@deprecation.deprecated_endpoints("conj")
 def conj(x, name=None):
   r"""Returns the complex conjugate of a complex number.
 
@@ -2480,7 +2492,7 @@ def conj(x, name=None):
   For example:
 
       # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-      tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+      tf.math.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
 
   If `x` is real, it is returned unchanged.
 
@@ -2566,7 +2578,8 @@ def _unsorted_segment_N(data, segment_ids, num_segments):
   return gen_math_ops.maximum(N, 1)
 
 
-@tf_export("unsorted_segment_mean")
+@tf_export("math.unsorted_segment_mean", "unsorted_segment_mean")
+@deprecation.deprecated_endpoints("unsorted_segment_mean")
 def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
   r"""Computes the mean along segments of a tensor.
 
@@ -2608,7 +2621,8 @@ def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
     return summed / N
 
 
-@tf_export("unsorted_segment_sqrt_n")
+@tf_export("math.unsorted_segment_sqrt_n", "unsorted_segment_sqrt_n")
+@deprecation.deprecated_endpoints("unsorted_segment_sqrt_n")
 def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
   r"""Computes the sum along segments of a tensor divided by the sqrt(N).
 
@@ -2653,7 +2667,8 @@ def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
     return summed / gen_math_ops.sqrt(N)
 
 
-@tf_export("sparse_segment_sum")
+@tf_export("sparse.segment_sum", "sparse_segment_sum")
+@deprecation.deprecated_endpoints("sparse_segment_sum")
 def sparse_segment_sum(data, indices, segment_ids, name=None,
                        num_segments=None):
   r"""Computes the sum along sparse segments of a tensor.
@@ -2674,16 +2689,16 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
   c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
 
   # Select two rows, one segment.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
   # => [[0 0 0 0]]
 
   # Select two rows, two segment.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
   # => [[ 1  2  3  4]
   #     [-1 -2 -3 -4]]
 
   # With missing segment ids.
-  tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
+  tf.sparse.segment_sum(c, tf.constant([0, 1]), tf.constant([0, 2]),
                         num_segments=4)
   # => [[ 1  2  3  4]
   #     [ 0  0  0  0]
@@ -2691,7 +2706,7 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
   #     [ 0  0  0  0]]
 
   # Select all rows, two segments.
-  tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+  tf.sparse.segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
   # => [[0 0 0 0]
   #     [5 6 7 8]]
 
@@ -2726,7 +2741,8 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse_segment_mean")
+@tf_export("sparse.segment_mean", "sparse_segment_mean")
+@deprecation.deprecated_endpoints("sparse_segment_mean")
 def sparse_segment_mean(data,
                         indices,
                         segment_ids,
@@ -2771,7 +2787,8 @@ def sparse_segment_mean(data,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse_segment_sqrt_n")
+@tf_export("sparse.segment_sqrt_n", "sparse_segment_sqrt_n")
+@deprecation.deprecated_endpoints("sparse_segment_sqrt_n")
 def sparse_segment_sqrt_n(data,
                           indices,
                           segment_ids,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 2a1919e66f..453848fc00 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -328,7 +328,7 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-@tf_export("nn.l2_normalize")
+@tf_export("math.l2_normalize", "linalg.l2_normalize", "nn.l2_normalize")
 @deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
   """Normalizes along dimension `axis` using an L2 norm.
@@ -360,7 +360,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     return math_ops.multiply(x, x_inv_norm, name=name)
 
 
-@tf_export("nn.zero_fraction")
+@tf_export("math.zero_fraction", "nn.zero_fraction")
 def zero_fraction(value, name=None):
   """Returns the fraction of zeros in `value`.
 
@@ -689,7 +689,7 @@ def moments(
     # Compute true mean while keeping the dims for proper broadcasting.
     mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
     # sample variance, not unbiased variance
-    # Note: stop_gradient does not change the gradient that gets 
+    # Note: stop_gradient does not change the gradient that gets
     #       backpropagated to the mean from the variance calculation,
     #       because that gradient is zero
     variance = math_ops.reduce_mean(
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 9ef177e97b..fd71e7cc39 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1692,7 +1692,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@tf_export("nn.softmax")
+@tf_export("nn.softmax", "math.softmax")
 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
@@ -1722,7 +1722,7 @@ def softmax(logits, axis=None, name=None, dim=None):
   return _softmax(logits, gen_nn_ops.softmax, axis, name)
 
 
-@tf_export("nn.log_softmax")
+@tf_export("nn.log_softmax", "math.log_softmax")
 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
@@ -2329,7 +2329,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):  # pylint: di
     return ret
 
 
-@tf_export("nn.top_k")
+@tf_export("math.top_k", "nn.top_k")
 def top_k(input, k=1, sorted=True, name=None):  # pylint: disable=redefined-builtin
   """Finds values and indices of the `k` largest entries for the last dimension.
 
@@ -2644,7 +2644,7 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
             name=name))
 
 
-@tf_export("nn.in_top_k")
+@tf_export("math.in_top_k", "nn.in_top_k")
 def in_top_k(predictions, targets, k, name=None):
   r"""Says whether the targets are in the top `K` predictions.
 
diff --git a/tensorflow/python/ops/numerics.py b/tensorflow/python/ops/numerics.py
index 8fcbd7d834..002e87b411 100644
--- a/tensorflow/python/ops/numerics.py
+++ b/tensorflow/python/ops/numerics.py
@@ -24,10 +24,12 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("verify_tensor_all_finite")
+@tf_export("debugging.assert_all_finite", "verify_tensor_all_finite")
+@deprecation.deprecated_endpoints("verify_tensor_all_finite")
 def verify_tensor_all_finite(t, msg, name=None):
   """Assert that the tensor does not contain any NaN's or Inf's.
 
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index b3e03a0135..ff50fe0d09 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.gen_parsing_ops import *
 # pylint: enable=wildcard-import,undefined-variable
 from tensorflow.python.platform import tf_logging
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -45,7 +46,7 @@ ops.NotDifferentiable("SerializeTensor")
 ops.NotDifferentiable("StringToNumber")
 
 
-@tf_export("VarLenFeature")
+@tf_export("io.VarLenFeature", "VarLenFeature")
 class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
   """Configuration for parsing a variable-length input feature.
 
@@ -55,7 +56,7 @@ class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
   pass
 
 
-@tf_export("SparseFeature")
+@tf_export("io.SparseFeature", "SparseFeature")
 class SparseFeature(
     collections.namedtuple(
         "SparseFeature",
@@ -130,7 +131,7 @@ class SparseFeature(
         cls, index_key, value_key, dtype, size, already_sorted)
 
 
-@tf_export("FixedLenFeature")
+@tf_export("io.FixedLenFeature", "FixedLenFeature")
 class FixedLenFeature(collections.namedtuple(
     "FixedLenFeature", ["shape", "dtype", "default_value"])):
   """Configuration for parsing a fixed-length input feature.
@@ -150,7 +151,7 @@ class FixedLenFeature(collections.namedtuple(
         cls, shape, dtype, default_value)
 
 
-@tf_export("FixedLenSequenceFeature")
+@tf_export("io.FixedLenSequenceFeature", "FixedLenSequenceFeature")
 class FixedLenSequenceFeature(collections.namedtuple(
     "FixedLenSequenceFeature",
     ["shape", "dtype", "allow_missing", "default_value"])):
@@ -360,7 +361,7 @@ def _prepend_none_dimension(features):
     return features
 
 
-@tf_export("parse_example")
+@tf_export("io.parse_example", "parse_example")
 def parse_example(serialized, features, name=None, example_names=None):
   # pylint: disable=line-too-long
   """Parses `Example` protos into a `dict` of tensors.
@@ -761,7 +762,7 @@ def _process_raw_parameters(names, dense_defaults, sparse_keys, sparse_types,
           dense_shapes_as_proto, dense_shapes)
 
 
-@tf_export("parse_single_example")
+@tf_export("io.parse_single_example", "parse_single_example")
 def parse_single_example(serialized, features, name=None, example_names=None):
   """Parses a single `Example` proto.
 
@@ -1244,7 +1245,7 @@ def _parse_sequence_example_raw(serialized,
 
 # TODO(sundberg): rewrite this method to call the batch version, which is more
 # efficient especially for large inputs.
-@tf_export("parse_single_sequence_example")
+@tf_export("io.parse_single_sequence_example", "parse_single_sequence_example")
 def parse_single_sequence_example(
     serialized, context_features=None, sequence_features=None,
     example_name=None, name=None):
@@ -1564,7 +1565,8 @@ def _parse_single_sequence_example_raw(serialized,
 
 
 # Swap `name` and `na_value` for backward compatibility.
-@tf_export("decode_csv")
+@tf_export("io.decode_csv", "decode_csv")
+@deprecation.deprecated_endpoints("decode_csv")
 def decode_csv(records,
                record_defaults,
                field_delim=",",
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index 4baf506385..c2eb9dfc5d 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_random_ops import *
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 # pylint: enable=wildcard-import
@@ -43,7 +44,7 @@ def _ShapeTensor(shape):
   return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
 
 
-@tf_export("random_normal")
+@tf_export("random.normal", "random_normal")
 def random_normal(shape,
                   mean=0.0,
                   stddev=1.0,
@@ -136,7 +137,7 @@ def parameterized_truncated_normal(shape,
     return rnd
 
 
-@tf_export("truncated_normal")
+@tf_export("random.truncated_normal", "truncated_normal")
 def truncated_normal(shape,
                      mean=0.0,
                      stddev=1.0,
@@ -181,7 +182,7 @@ ops.NotDifferentiable("ParameterizedTruncatedNormal")
 ops.NotDifferentiable("TruncatedNormal")
 
 
-@tf_export("random_uniform")
+@tf_export("random.uniform", "random_uniform")
 def random_uniform(shape,
                    minval=0,
                    maxval=None,
@@ -246,7 +247,7 @@ def random_uniform(shape,
 ops.NotDifferentiable("RandomUniform")
 
 
-@tf_export("random_shuffle")
+@tf_export("random.shuffle", "random_shuffle")
 def random_shuffle(value, seed=None, name=None):
   """Randomly shuffles a tensor along its first dimension.
 
@@ -277,7 +278,7 @@ def random_shuffle(value, seed=None, name=None):
       value, seed=seed1, seed2=seed2, name=name)
 
 
-@tf_export("random_crop")
+@tf_export("image.random_crop", "random_crop")
 def random_crop(value, size, seed=None, name=None):
   """Randomly crops a tensor to a given size.
 
@@ -320,7 +321,7 @@ def random_crop(value, size, seed=None, name=None):
     return array_ops.slice(value, offset, size, name=name)
 
 
-@tf_export("multinomial")
+@tf_export("random.multinomial", "multinomial")
 def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
   """Draws samples from a multinomial distribution.
 
@@ -356,7 +357,8 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
 ops.NotDifferentiable("Multinomial")
 
 
-@tf_export("random_gamma")
+@tf_export("random.gamma", "random_gamma")
+@deprecation.deprecated_endpoints("random_gamma")
 def random_gamma(shape,
                  alpha,
                  beta=None,
@@ -439,7 +441,8 @@ def random_gamma(shape,
             shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta)
 
 
-@tf_export("random_poisson")
+@tf_export("random.poisson", "random_poisson")
+@deprecation.deprecated_endpoints("random_poisson")
 def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
   """Draws `shape` samples from each of the given Poisson distribution(s).
 
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 400a42a3c0..7e3dbdbad4 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -185,7 +185,8 @@ def sparse_eye(num_rows,
 
 
 # pylint: disable=protected-access
-@tf_export("sparse_concat")
+@tf_export("sparse.concat", "sparse_concat")
+@deprecation.deprecated_endpoints("sparse_concat")
 @deprecation.deprecated_args(
     None, "concat_dim is deprecated, use axis instead", "concat_dim")
 def sparse_concat(axis,
@@ -317,7 +318,8 @@ def sparse_concat(axis,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_add")
+@tf_export("sparse.add", "sparse_add")
+@deprecation.deprecated_endpoints("sparse_add")
 def sparse_add(a, b, thresh=0):
   """Adds two tensors, at least one of each is a `SparseTensor`.
 
@@ -557,7 +559,8 @@ def sparse_dense_cwise_add(sp_t, dense_t):
   return sparse_tensor.SparseTensor(sp_t.indices, result, sp_t.dense_shape)
 
 
-@tf_export("sparse_reorder")
+@tf_export("sparse.reorder", "sparse_reorder")
+@deprecation.deprecated_endpoints("sparse_reorder")
 def sparse_reorder(sp_input, name=None):
   """Reorders a `SparseTensor` into the canonical, row-major ordering.
 
@@ -607,7 +610,8 @@ def sparse_reorder(sp_input, name=None):
   return sparse_tensor.SparseTensor(reordered_ind, reordered_val, dense_shape)
 
 
-@tf_export("sparse_reshape")
+@tf_export("sparse.reshape", "sparse_reshape")
+@deprecation.deprecated_endpoints("sparse_reshape")
 def sparse_reshape(sp_input, shape, name=None):
   """Reshapes a `SparseTensor` to represent values in a new dense shape.
 
@@ -700,7 +704,8 @@ class KeywordRequired(object):
     return "KeywordRequired()"
 
 
-@tf_export("sparse_split")
+@tf_export("sparse.split", "sparse_split")
+@deprecation.deprecated_endpoints("sparse_split")
 @deprecation.deprecated_args(
     None, "split_dim is deprecated, use axis instead", "split_dim")
 def sparse_split(keyword_required=KeywordRequired(),
@@ -773,7 +778,8 @@ def sparse_split(keyword_required=KeywordRequired(),
   return sparse_tensors
 
 
-@tf_export("sparse_slice")
+@tf_export("sparse.slice", "sparse_slice")
+@deprecation.deprecated_endpoints("sparse_slice")
 def sparse_slice(sp_input, start, size, name=None):
   """Slice a `SparseTensor` based on the `start` and `size.
 
@@ -785,11 +791,11 @@ def sparse_slice(sp_input, start, size, name=None):
 
   Graphically the output tensors are:
 
-      sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
+      sparse.slice([0, 0], [2, 4]) = shape = [2, 4]
       [    a  ]
       [b c    ]
 
-      sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
+      sparse.slice([0, 4], [2, 3]) = shape = [2, 3]
       [ d e  ]
       [      ]
 
@@ -823,6 +829,9 @@ def sparse_slice(sp_input, start, size, name=None):
 
 
 @tf_export("sparse_to_dense")
+@deprecation.deprecated(
+    None,
+    "Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.")
 def sparse_to_dense(sparse_indices,
                     output_shape,
                     sparse_values,
@@ -878,7 +887,8 @@ def sparse_to_dense(sparse_indices,
       name=name)
 
 
-@tf_export("sparse_reduce_max")
+@tf_export("sparse.reduce_max", "sparse_reduce_max")
+@deprecation.deprecated_endpoints("sparse_reduce_max")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_max(sp_input, axis=None, keepdims=None,
@@ -912,16 +922,16 @@ def sparse_reduce_max(sp_input, axis=None, keepdims=None,
   # 'x' represents [[1, ?, 2]
   #                 [?, 3, ?]]
   # where ? is implicitly-zero.
-  tf.sparse_reduce_max(x) ==> 3
-  tf.sparse_reduce_max(x, 0) ==> [1, 3, 2]
-  tf.sparse_reduce_max(x, 1) ==> [2, 3]  # Can also use -1 as the axis.
-  tf.sparse_reduce_max(x, 1, keepdims=True) ==> [[2], [3]]
-  tf.sparse_reduce_max(x, [0, 1]) ==> 3
+  tf.sparse.reduce_max(x) ==> 3
+  tf.sparse.reduce_max(x, 0) ==> [1, 3, 2]
+  tf.sparse.reduce_max(x, 1) ==> [2, 3]  # Can also use -1 as the axis.
+  tf.sparse.reduce_max(x, 1, keepdims=True) ==> [[2], [3]]
+  tf.sparse.reduce_max(x, [0, 1]) ==> 3
 
   # 'y' represents [[-7, ?]
   #                 [ 4, 3]
   #                 [ ?, ?]
-  tf.sparse_reduce_max(x, 1) ==> [-7, 4, 0]
+  tf.sparse.reduce_max(x, 1) ==> [-7, 4, 0]
   ```
 
   Args:
@@ -945,7 +955,8 @@ def sparse_reduce_max(sp_input, axis=None, keepdims=None,
       math_ops._ReductionDims(sp_input, axis, reduction_axes), keepdims)
 
 
-@tf_export("sparse_reduce_max_sparse")
+@tf_export("sparse.reduce_max_sparse", "sparse_reduce_max_sparse")
+@deprecation.deprecated_endpoints("sparse_reduce_max_sparse")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_max_sparse(sp_input,
@@ -995,7 +1006,8 @@ def sparse_reduce_max_sparse(sp_input,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_reduce_sum")
+@tf_export("sparse.reduce_sum", "sparse_reduce_sum")
+@deprecation.deprecated_endpoints("sparse_reduce_sum")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
@@ -1021,11 +1033,11 @@ def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
   # 'x' represents [[1, ?, 1]
   #                 [?, 1, ?]]
   # where ? is implicitly-zero.
-  tf.sparse_reduce_sum(x) ==> 3
-  tf.sparse_reduce_sum(x, 0) ==> [1, 1, 1]
-  tf.sparse_reduce_sum(x, 1) ==> [2, 1]  # Can also use -1 as the axis.
-  tf.sparse_reduce_sum(x, 1, keepdims=True) ==> [[2], [1]]
-  tf.sparse_reduce_sum(x, [0, 1]) ==> 3
+  tf.sparse.reduce_sum(x) ==> 3
+  tf.sparse.reduce_sum(x, 0) ==> [1, 1, 1]
+  tf.sparse.reduce_sum(x, 1) ==> [2, 1]  # Can also use -1 as the axis.
+  tf.sparse.reduce_sum(x, 1, keepdims=True) ==> [[2], [1]]
+  tf.sparse.reduce_sum(x, [0, 1]) ==> 3
   ```
 
   Args:
@@ -1049,7 +1061,8 @@ def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
       math_ops._ReductionDims(sp_input, axis, reduction_axes), keepdims)
 
 
-@tf_export("sparse_reduce_sum_sparse")
+@tf_export("sparse.reduce_sum_sparse", "sparse_reduce_sum_sparse")
+@deprecation.deprecated_endpoints("sparse_reduce_sum_sparse")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
 def sparse_reduce_sum_sparse(sp_input,
@@ -1099,7 +1112,8 @@ def sparse_reduce_sum_sparse(sp_input,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse_tensor_to_dense")
+@tf_export("sparse.to_dense", "sparse_tensor_to_dense")
+@deprecation.deprecated_endpoints("sparse_tensor_to_dense")
 def sparse_tensor_to_dense(sp_input,
                            default_value=0,
                            validate_indices=True,
@@ -1151,7 +1165,8 @@ def sparse_tensor_to_dense(sp_input,
       name=name)
 
 
-@tf_export("sparse_to_indicator")
+@tf_export("sparse.to_indicator", "sparse_to_indicator")
+@deprecation.deprecated_endpoints("sparse_to_indicator")
 def sparse_to_indicator(sp_input, vocab_size, name=None):
   """Converts a `SparseTensor` of ids into a dense bool indicator tensor.
 
@@ -1214,7 +1229,8 @@ def sparse_to_indicator(sp_input, vocab_size, name=None):
         sp_new, default_value=False, validate_indices=False, name=name)
 
 
-@tf_export("sparse_merge")
+@tf_export("sparse.merge", "sparse_merge")
+@deprecation.deprecated_endpoints("sparse_merge")
 def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
                  already_sorted=False):
   """Combines a batch of feature ids and values into a single `SparseTensor`.
@@ -1358,7 +1374,8 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
         sorted_result.indices, sorted_result.values, new_shape)
 
 
-@tf_export("sparse_retain")
+@tf_export("sparse.retain", "sparse_retain")
+@deprecation.deprecated_endpoints("sparse_retain")
 def sparse_retain(sp_input, to_retain):
   """Retains specified non-empty values within a `SparseTensor`.
 
@@ -1402,7 +1419,8 @@ def sparse_retain(sp_input, to_retain):
                                     array_ops.identity(sp_input.dense_shape))
 
 
-@tf_export("sparse_reset_shape")
+@tf_export("sparse.reset_shape", "sparse_reset_shape")
+@deprecation.deprecated_endpoints("sparse_reset_shape")
 def sparse_reset_shape(sp_input, new_shape=None):
   """Resets the shape of a `SparseTensor` with indices and values unchanged.
 
@@ -1503,7 +1521,8 @@ def sparse_reset_shape(sp_input, new_shape=None):
   return sparse_tensor.SparseTensor(in_indices, in_values, output_shape_tensor)
 
 
-@tf_export("sparse_fill_empty_rows")
+@tf_export("sparse.fill_empty_rows", "sparse_fill_empty_rows")
+@deprecation.deprecated_endpoints("sparse_fill_empty_rows")
 def sparse_fill_empty_rows(sp_input, default_value, name=None):
   """Fills empty rows in the input 2-D `SparseTensor` with a default value.
 
@@ -1567,7 +1586,8 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None):
         dense_shape=sp_input.dense_shape), empty_row_indicator)
 
 
-@tf_export("serialize_sparse")
+@tf_export("io.serialize_sparse", "serialize_sparse")
+@deprecation.deprecated_endpoints("serialize_sparse")
 def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize a `SparseTensor` into a 3-vector (1-D `Tensor`) object.
 
@@ -1593,7 +1613,8 @@ def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
       out_type=out_type)
 
 
-@tf_export("serialize_many_sparse")
+@tf_export("io.serialize_many_sparse", "serialize_many_sparse")
+@deprecation.deprecated_endpoints("serialize_many_sparse")
 def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor`.
 
@@ -1694,7 +1715,8 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("deserialize_many_sparse")
+@tf_export("io.deserialize_many_sparse", "deserialize_many_sparse")
+@deprecation.deprecated_endpoints("deserialize_many_sparse")
 def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   """Deserialize and concatenate `SparseTensors` from a serialized minibatch.
 
@@ -1712,7 +1734,7 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
 
   The input `SparseTensor` objects' indices are assumed ordered in
   standard lexicographic order.  If this is not the case, after this
-  step run `sparse_reorder` to restore index ordering.
+  step run `sparse.reorder` to restore index ordering.
 
   For example, if the serialized input is a `[2, 3]` matrix representing two
   original `SparseTensor` objects:
@@ -1764,7 +1786,8 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("sparse_tensor_dense_matmul")
+@tf_export("sparse.matmul", "sparse_tensor_dense_matmul")
+@deprecation.deprecated_endpoints("sparse_tensor_dense_matmul")
 def sparse_tensor_dense_matmul(sp_a,
                                b,
                                adjoint_a=False,
@@ -1777,7 +1800,7 @@ def sparse_tensor_dense_matmul(sp_a,
   following input format is recommended for optimal behavior:
 
   * If `adjoint_a == false`: `A` should be sorted in lexicographically
-    increasing order.  Use `sparse_reorder` if you're not sure.
+    increasing order.  Use `sparse.reorder` if you're not sure.
   * If `adjoint_a == true`: `A` should be sorted in order of increasing
     dimension 1 (i.e., "column major" order instead of "row major" order).
 
@@ -1981,7 +2004,8 @@ def sparse_tensor_dense_matmul(sp_a,
         adjoint_b=adjoint_b)
 
 
-@tf_export("sparse_softmax")
+@tf_export("sparse.softmax", "sparse_softmax")
+@deprecation.deprecated_endpoints("sparse_softmax")
 def sparse_softmax(sp_input, name=None):
   """Applies softmax to a batched N-D `SparseTensor`.
 
@@ -2036,7 +2060,8 @@ def sparse_softmax(sp_input, name=None):
                                       sp_input.dense_shape)
 
 
-@tf_export("sparse_maximum")
+@tf_export("sparse.maximum", "sparse_maximum")
+@deprecation.deprecated_endpoints("sparse_maximum")
 def sparse_maximum(sp_a, sp_b, name=None):
   """Returns the element-wise max of two SparseTensors.
 
@@ -2073,7 +2098,8 @@ def sparse_maximum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse_minimum")
+@tf_export("sparse.minimum", "sparse_minimum")
+@deprecation.deprecated_endpoints("sparse_minimum")
 def sparse_minimum(sp_a, sp_b, name=None):
   """Returns the element-wise min of two SparseTensors.
 
@@ -2110,7 +2136,8 @@ def sparse_minimum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse_transpose")
+@tf_export("sparse.transpose", "sparse_transpose")
+@deprecation.deprecated_endpoints("sparse_transpose")
 def sparse_transpose(sp_input, perm=None, name=None):
   """Transposes a `SparseTensor`
 
@@ -2259,7 +2286,7 @@ def _take_many_sparse_from_tensors_map(sparse_map_op,
 
   The input `SparseTensor` objects' indices are assumed ordered in
   standard lexicographic order.  If this is not the case, after this
-  step run `sparse_reorder` to restore index ordering.
+  step run `sparse.reorder` to restore index ordering.
 
   For example, if the serialized input is a `[2, 3]` matrix representing two
   original `SparseTensor` objects:
diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py
index 9a10abfcf7..cfab943896 100644
--- a/tensorflow/python/ops/special_math_ops.py
+++ b/tensorflow/python/ops/special_math_ops.py
@@ -29,11 +29,13 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # TODO(b/27419586) Change docstring for required dtype of x once int allowed
-@tf_export('lbeta')
+@tf_export('math.lbeta', 'lbeta')
+@deprecation.deprecated_endpoints('lbeta')
 def lbeta(x, name=None):
   r"""Computes \\(ln(|Beta(x)|)\\), reducing along the last dimension.
 
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 046a48d192..e83c08f643 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -310,8 +310,9 @@ def _reduce_join_reduction_dims(x, axis, reduction_indices):
     return math_ops.range(array_ops.rank(x) - 1, -1, -1)
 
 
-@tf_export("reduce_join")
-def reduce_join(inputs, axis=None,
+@tf_export("strings.reduce_join", "reduce_join")
+@deprecation.deprecated_endpoints("reduce_join")
+def reduce_join(inputs, axis=None,  # pylint: disable=missing-docstring
                 keep_dims=False,
                 separator="",
                 name=None,
@@ -329,6 +330,8 @@ def reduce_join(inputs, axis=None,
 
 reduce_join.__doc__ = deprecation.rewrite_argument_docstring(
     gen_string_ops.reduce_join.__doc__, "reduction_indices", "axis")
+reduce_join.__doc__ = reduce_join.__doc__.replace("tf.reduce_join(",
+                                                  "tf.strings.reduce_join(")
 
 
 # This wrapper provides backwards compatibility for code that predates the
diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py
index 8e7f123a85..8bf057f69d 100644
--- a/tensorflow/python/saved_model/builder_impl.py
+++ b/tensorflow/python/saved_model/builder_impl.py
@@ -36,10 +36,13 @@ from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training import saver as tf_saver
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_endpoints
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("saved_model.builder.SavedModelBuilder")
+@tf_export("saved_model.Builder",
+           "saved_model.builder.SavedModelBuilder")
+@deprecated_endpoints("saved_model.builder.SavedModelBuilder")
 class SavedModelBuilder(object):
   """Builds the `SavedModel` protocol buffer and saves variables and assets.
 
@@ -61,7 +64,7 @@ class SavedModelBuilder(object):
   Typical usage for the `SavedModelBuilder`:
   ```python
   ...
-  builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+  builder = tf.saved_model.Builder(export_dir)
 
   with tf.Session(graph=tf.Graph()) as sess:
     ...
diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py
index e8536108e8..895644a030 100644
--- a/tensorflow/python/saved_model/loader_impl.py
+++ b/tensorflow/python/saved_model/loader_impl.py
@@ -34,6 +34,7 @@ from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training import saver as tf_saver
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -144,7 +145,10 @@ def _get_main_op_tensor(
   return main_op_tensor
 
 
-@tf_export("saved_model.loader.maybe_saved_model_directory")
+@tf_export("saved_model.maybe_saved_model_directory",
+           "saved_model.loader.maybe_saved_model_directory")
+@deprecation.deprecated_endpoints(
+    "saved_model.loader.maybe_saved_model_directory")
 def maybe_saved_model_directory(export_dir):
   """Checks whether the provided export directory could contain a SavedModel.
 
@@ -165,7 +169,7 @@ def maybe_saved_model_directory(export_dir):
   return file_io.file_exists(txt_path) or file_io.file_exists(pb_path)
 
 
-@tf_export("saved_model.loader.load")
+@tf_export("saved_model.load", "saved_model.loader.load")
 def load(sess, tags, export_dir, import_scope=None, **saver_kwargs):
   """Loads the model from a SavedModel as specified by tags.
 
diff --git a/tensorflow/python/saved_model/main_op_impl.py b/tensorflow/python/saved_model/main_op_impl.py
index 631ee63729..ad4511b28e 100644
--- a/tensorflow/python/saved_model/main_op_impl.py
+++ b/tensorflow/python/saved_model/main_op_impl.py
@@ -22,6 +22,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -42,7 +43,9 @@ def main_op():
 
 
 # TODO(sukritiramesh): Integrate with Saver for complete restore functionality.
-@tf_export('saved_model.main_op.main_op_with_restore')
+@tf_export('saved_model.main_op_with_restore',
+           'saved_model.main_op.main_op_with_restore')
+@deprecation.deprecated_endpoints('saved_model.main_op.main_op_with_restore')
 def main_op_with_restore(restore_op_name):
   """Returns a main op to init variables, tables and restore the graph.
 
diff --git a/tensorflow/python/saved_model/signature_def_utils_impl.py b/tensorflow/python/saved_model/signature_def_utils_impl.py
index 37f927f381..a1034416e9 100644
--- a/tensorflow/python/saved_model/signature_def_utils_impl.py
+++ b/tensorflow/python/saved_model/signature_def_utils_impl.py
@@ -24,10 +24,14 @@ from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.framework import ops
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import utils
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('saved_model.signature_def_utils.build_signature_def')
+@tf_export('saved_model.build_signature_def',
+           'saved_model.signature_def_utils.build_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.build_signature_def')
 def build_signature_def(inputs=None, outputs=None, method_name=None):
   """Utility function to build a SignatureDef protocol buffer.
 
@@ -53,7 +57,10 @@ def build_signature_def(inputs=None, outputs=None, method_name=None):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.regression_signature_def')
+@tf_export('saved_model.regression_signature_def',
+           'saved_model.signature_def_utils.regression_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.regression_signature_def')
 def regression_signature_def(examples, predictions):
   """Creates regression signature from given examples and predictions.
 
@@ -95,7 +102,10 @@ def regression_signature_def(examples, predictions):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.classification_signature_def')
+@tf_export('saved_model.classification_signature_def',
+           'saved_model.signature_def_utils.classification_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.classification_signature_def')
 def classification_signature_def(examples, classes, scores):
   """Creates classification signature from given examples and predictions.
 
@@ -148,7 +158,10 @@ def classification_signature_def(examples, classes, scores):
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.predict_signature_def')
+@tf_export('saved_model.predict_signature_def',
+           'saved_model.signature_def_utils.predict_signature_def')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.predict_signature_def')
 def predict_signature_def(inputs, outputs):
   """Creates prediction signature from given inputs and outputs.
 
@@ -239,7 +252,10 @@ def _supervised_signature_def(
   return signature_def
 
 
-@tf_export('saved_model.signature_def_utils.is_valid_signature')
+@tf_export('saved_model.is_valid_signature',
+           'saved_model.signature_def_utils.is_valid_signature')
+@deprecation.deprecated_endpoints(
+    'saved_model.signature_def_utils.is_valid_signature')
 def is_valid_signature(signature_def):
   """Determine whether a SignatureDef can be served by TensorFlow Serving."""
   if signature_def is None:
@@ -313,4 +329,3 @@ def _is_valid_classification_signature(signature_def):
     return False
 
   return True
-
diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py
index 06d09325c8..0bba7b6fac 100644
--- a/tensorflow/python/saved_model/utils_impl.py
+++ b/tensorflow/python/saved_model/utils_impl.py
@@ -27,13 +27,16 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import constants
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
 # TensorInfo helpers.
 
 
-@tf_export("saved_model.utils.build_tensor_info")
+@tf_export("saved_model.build_tensor_info",
+           "saved_model.utils.build_tensor_info")
+@deprecation.deprecated_endpoints("saved_model.utils.build_tensor_info")
 def build_tensor_info(tensor):
   """Utility function to build TensorInfo proto.
 
@@ -57,7 +60,10 @@ def build_tensor_info(tensor):
   return tensor_info
 
 
-@tf_export("saved_model.utils.get_tensor_from_tensor_info")
+@tf_export("saved_model.get_tensor_from_tensor_info",
+           "saved_model.utils.get_tensor_from_tensor_info")
+@deprecation.deprecated_endpoints(
+    "saved_model.utils.get_tensor_from_tensor_info")
 def get_tensor_from_tensor_info(tensor_info, graph=None, import_scope=None):
   """Returns the Tensor or SparseTensor described by a TensorInfo proto.
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 92446e2f8f..5ce5410e0b 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -69,6 +69,7 @@ TENSORFLOW_API_INIT_FILES = [
     "profiler/__init__.py",
     "python_io/__init__.py",
     "quantization/__init__.py",
+    "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index bc2f3516d1..587eb232f5 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -69,6 +69,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "profiler/__init__.py",
     "python_io/__init__.py",
     "quantization/__init__.py",
+    "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index 9d9db70890..eb131ac9f7 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -56,7 +56,8 @@ _restore_sparse = sparse_ops._take_many_sparse_from_tensors_map
 # pylint: enable=protected-access
 
 
-@tf_export("train.match_filenames_once")
+@tf_export("io.match_filenames_once", "train.match_filenames_once")
+@deprecation.deprecated_endpoints("train.match_filenames_once")
 def match_filenames_once(pattern, name=None):
   """Save the list of files matching pattern, so it is only computed once.
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
index d9efe97821..ab6287f8cd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.debugging.pbtxt
@@ -1,5 +1,89 @@
 path: "tensorflow.debugging"
 tf_module {
+  member_method {
+    name: "Assert"
+    argspec: "args=[\'condition\', \'data\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_all_finite"
+    argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_integer"
+    argspec: "args=[\'x\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_near"
+    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_none_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_proper_iterable"
+    argspec: "args=[\'values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "assert_rank"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_at_least"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_in"
+    argspec: "args=[\'x\', \'ranks\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_same_float_dtype"
+    argspec: "args=[\'tensors\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_scalar"
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_type"
+    argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "check_numerics"
     argspec: "args=[\'tensor\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +100,16 @@ tf_module {
     name: "is_nan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "is_non_decreasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "is_numeric_tensor"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_strictly_increasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
new file mode 100644
index 0000000000..423eca32a2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.-d-type.pbtxt
@@ -0,0 +1,77 @@
+path: "tensorflow.dtypes.DType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "as_datatype_enum"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "as_numpy_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "base_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_bool"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_complex"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_floating"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_integer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_numpy_compatible"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_quantized"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_unsigned"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "limits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "max"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "min"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "real_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'type_enum\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
index 98e1feed00..ea23feca84 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
@@ -1,7 +1,27 @@
 path: "tensorflow.dtypes"
 tf_module {
+  member {
+    name: "DType"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "as_dtype"
+    argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "as_string"
     argspec: "args=[\'input\', \'precision\', \'scientific\', \'shortest\', \'width\', \'fill\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'False\', \'False\', \'-1\', \'\', \'None\'], "
   }
+  member_method {
+    name: "cast"
+    argspec: "args=[\'x\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "complex"
+    argspec: "args=[\'real\', \'imag\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "saturate_cast"
+    argspec: "args=[\'value\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
index eeabf845dc..162ee76ee7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.graph_util.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "extract_sub_graph"
     argspec: "args=[\'graph_def\', \'dest_nodes\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "import_graph_def"
+    argspec: "args=[\'graph_def\', \'input_map\', \'return_elements\', \'name\', \'op_dict\', \'producer_op_list\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "must_run_on_cpu"
     argspec: "args=[\'node\', \'pin_variables_on_cpu\'], varargs=None, keywords=None, defaults=[\'False\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
index 5c46dc5ee7..0a231f1b65 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.image.pbtxt
@@ -148,6 +148,10 @@ tf_module {
     name: "random_contrast"
     argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "random_crop"
+    argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "random_flip_left_right"
     argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
index d499c67d89..19ca62122e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.initializers.pbtxt
@@ -72,6 +72,10 @@ tf_module {
     name: "local_variables"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "tables_initializer"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
+  }
   member_method {
     name: "variables"
     argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
new file mode 100644
index 0000000000..cd0e51c8c7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-feature.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.io.FixedLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
new file mode 100644
index 0000000000..8a38f25fdf
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-fixed-len-sequence-feature.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.io.FixedLenSequenceFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "allow_missing"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..85306fdcac
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PaddingFIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
new file mode 100644
index 0000000000..02d8037b34
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-priority-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PriorityQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
new file mode 100644
index 0000000000..a30481a0ea
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-queue-base.pbtxt
@@ -0,0 +1,65 @@
+path: "tensorflow.io.QueueBase"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
new file mode 100644
index 0000000000..82cbf9884f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-random-shuffle-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.RandomShuffleQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
new file mode 100644
index 0000000000..216947b4ed
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-sparse-feature.pbtxt
@@ -0,0 +1,35 @@
+path: "tensorflow.io.SparseFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "already_sorted"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "index_key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "value_key"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
new file mode 100644
index 0000000000..b598f73d7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-compression-type.pbtxt
@@ -0,0 +1,20 @@
+path: "tensorflow.io.TFRecordCompressionType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordCompressionType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "GZIP"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "NONE"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "ZLIB"
+    mtype: "<type \'int\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
new file mode 100644
index 0000000000..bfbf37ccf4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-options.pbtxt
@@ -0,0 +1,17 @@
+path: "tensorflow.io.TFRecordOptions"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordOptions\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "compression_type_map"
+    mtype: "<type \'dict\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'compression_type\', \'flush_mode\', \'input_buffer_size\', \'output_buffer_size\', \'window_bits\', \'compression_level\', \'compression_method\', \'mem_level\', \'compression_strategy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_compression_type_string"
+    argspec: "args=[\'cls\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6fd443f6d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-t-f-record-writer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.io.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flush"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'record\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
new file mode 100644
index 0000000000..fd835dbfbb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.-var-len-feature.pbtxt
@@ -0,0 +1,19 @@
+path: "tensorflow.io.VarLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
index 8938cf217b..dccf136788 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
@@ -1,5 +1,49 @@
 path: "tensorflow.io"
 tf_module {
+  member {
+    name: "FixedLenFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FixedLenSequenceFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordCompressionType"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordOptions"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "VarLenFeature"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "decode_base64"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -8,6 +52,10 @@ tf_module {
     name: "decode_compressed"
     argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
   }
+  member_method {
+    name: "decode_csv"
+    argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], "
+  }
   member_method {
     name: "decode_json_example"
     argspec: "args=[\'json_examples\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +64,38 @@ tf_module {
     name: "decode_raw"
     argspec: "args=[\'bytes\', \'out_type\', \'little_endian\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
+  member_method {
+    name: "deserialize_many_sparse"
+    argspec: "args=[\'serialized_sparse\', \'dtype\', \'rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "encode_base64"
     argspec: "args=[\'input\', \'pad\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "match_filenames_once"
+    argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "matching_files"
     argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "parse_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "parse_sequence_example"
     argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "parse_single_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_single_sequence_example"
+    argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "parse_tensor"
     argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -36,8 +104,24 @@ tf_module {
     name: "read_file"
     argspec: "args=[\'filename\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "serialize_many_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "serialize_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "tf_record_iterator"
+    argspec: "args=[\'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "write_file"
     argspec: "args=[\'filename\', \'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "write_graph"
+    argspec: "args=[\'graph_or_graph_def\', \'logdir\', \'name\', \'as_text\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
index d979116887..6ac95d96da 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
@@ -108,10 +108,18 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "global_norm"
+    argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "inv"
     argspec: "args=[\'input\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logdet"
     argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -124,6 +132,10 @@ tf_module {
     name: "lstsq"
     argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
   }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'a\', \'b\', \'transpose_a\', \'transpose_b\', \'adjoint_a\', \'adjoint_b\', \'a_is_sparse\', \'b_is_sparse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "norm"
     argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
index 72856466ec..459b9e3684 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.math.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.math"
 tf_module {
+  member_method {
+    name: "abs"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "accumulate_n"
+    argspec: "args=[\'inputs\', \'shape\', \'tensor_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "acos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -12,6 +20,22 @@ tf_module {
     name: "add"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "add_n"
+    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "angle"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "argmax"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "argmin"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
   member_method {
     name: "asin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -52,10 +76,18 @@ tf_module {
     name: "betainc"
     argspec: "args=[\'a\', \'b\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'arr\', \'weights\', \'minlength\', \'maxlength\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int32\'>\"], "
+  }
   member_method {
     name: "ceil"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "conj"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "cos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -64,14 +96,34 @@ tf_module {
     name: "cosh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "count_nonzero"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "cumprod"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "cumsum"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "digamma"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "divide"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erf"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -88,6 +140,10 @@ tf_module {
     name: "floor"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "floordiv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "greater"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -104,10 +160,26 @@ tf_module {
     name: "igammac"
     argspec: "args=[\'a\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "imag"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "in_top_k"
+    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "invert_permutation"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "lbeta"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -128,6 +200,14 @@ tf_module {
     name: "log1p"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "log_sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "log_softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logical_and"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -140,6 +220,10 @@ tf_module {
     name: "logical_or"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "logical_xor"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'LogicalXor\'], "
+  }
   member_method {
     name: "maximum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -148,6 +232,14 @@ tf_module {
     name: "minimum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "multiply"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "negative"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "not_equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -160,18 +252,66 @@ tf_module {
     name: "polyval"
     argspec: "args=[\'coeffs\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "pow"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "real"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "reciprocal"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "reduce_all"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_any"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_logsumexp"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_mean"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_min"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_prod"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "round"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "rsqrt"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "scalar_mul"
+    argspec: "args=[\'scalar\', \'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -192,6 +332,14 @@ tf_module {
     name: "segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sign"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "sin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -200,6 +348,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softplus"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -208,18 +360,46 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sqrt"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "square"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "squared_difference"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "subtract"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "tan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tanh"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "top_k"
+    argspec: "args=[\'input\', \'k\', \'sorted\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "truediv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_mean"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_min"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -228,6 +408,10 @@ tf_module {
     name: "unsorted_segment_prod"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_sqrt_n"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -240,6 +424,10 @@ tf_module {
     name: "xlogy"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "zero_fraction"
+    argspec: "args=[\'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
index d9e5b0d0fc..9b28ce5746 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
@@ -100,6 +100,10 @@ tf_module {
     name: "ctc_loss"
     argspec: "args=[\'labels\', \'inputs\', \'sequence_length\', \'preprocess_collapse_repeated\', \'ctc_merge_repeated\', \'ignore_longer_outputs_than_inputs\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'False\', \'True\'], "
   }
+  member_method {
+    name: "depth_to_space"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "depthwise_conv2d"
     argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'rate\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -304,6 +308,14 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "space_to_batch"
+    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "space_to_depth"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "sparse_softmax_cross_entropy_with_logits"
     argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 509ceff9df..a268529c1f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -496,6 +496,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "random"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random_normal_initializer"
     mtype: "<type \'type\'>"
@@ -1744,6 +1748,10 @@ tf_module {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "roll"
+    argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "round"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
index 6d865efed0..77c92aeb0d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
@@ -28,6 +28,10 @@ tf_module {
     name: "fake_quant_with_min_max_vars_per_channel_gradient"
     argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'narrow_range\', \'name\'], varargs=None, keywords=None, defaults=[\'8\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "quantize"
+    argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
new file mode 100644
index 0000000000..a568dd4cd8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
@@ -0,0 +1,47 @@
+path: "tensorflow.random"
+tf_module {
+  member_method {
+    name: "gamma"
+    argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_seed"
+    argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "log_uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_random_seed"
+    argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'shape\', \'minval\', \'maxval\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
new file mode 100644
index 0000000000..67457de070
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.-builder.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.saved_model.Builder"
+tf_class {
+  is_instance: "<class \'tensorflow.python.saved_model.builder_impl.SavedModelBuilder\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "add_meta_graph"
+    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "add_meta_graph_and_variables"
+    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "save"
+    argspec: "args=[\'self\', \'as_text\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
index e1a0385092..3f4965fc69 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "Builder"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "builder"
     mtype: "<type \'module\'>"
@@ -32,6 +36,46 @@ tf_module {
     name: "utils"
     mtype: "<type \'module\'>"
   }
+  member_method {
+    name: "build_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "build_tensor_info"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "classification_signature_def"
+    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_tensor_from_tensor_info"
+    argspec: "args=[\'tensor_info\', \'graph\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "is_valid_signature"
+    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "load"
+    argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "main_op_with_restore"
+    argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "maybe_saved_model_directory"
+    argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "predict_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "regression_signature_def"
+    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "simple_save"
     argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
new file mode 100644
index 0000000000..cd97716c9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
@@ -0,0 +1,46 @@
+path: "tensorflow.sparse.SparseConditionalAccumulator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.SparseConditionalAccumulator\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.ConditionalAccumulatorBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "accumulator_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\', \'shared_name\', \'name\', \'reduction_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'sparse_conditional_accumulator\', \'MEAN\'], "
+  }
+  member_method {
+    name: "apply_grad"
+    argspec: "args=[\'self\', \'grad_indices\', \'grad_values\', \'grad_shape\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
+  }
+  member_method {
+    name: "apply_indexed_slices_grad"
+    argspec: "args=[\'self\', \'grad\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
+  }
+  member_method {
+    name: "num_accumulated"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "set_global_step"
+    argspec: "args=[\'self\', \'new_global_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_indexed_slices_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
new file mode 100644
index 0000000000..02e59a63e1
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt
@@ -0,0 +1,54 @@
+path: "tensorflow.sparse.SparseTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.sparse_tensor.SparseTensor\'>"
+  is_instance: "<class \'tensorflow.python.framework.ops._TensorLike\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dense_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "indices"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "op"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'indices\', \'values\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "consumers"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "eval"
+    argspec: "args=[\'self\', \'feed_dict\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_shape"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
index ba9e651b34..32bd8d5f8e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
@@ -1,5 +1,21 @@
 path: "tensorflow.sparse"
 tf_module {
+  member {
+    name: "SparseConditionalAccumulator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseTensor"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "add"
+    argspec: "args=[\'a\', \'b\', \'thresh\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "concat"
+    argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "cross"
     argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +32,100 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "fill_empty_rows"
+    argspec: "args=[\'sp_input\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "mask"
+    argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'sp_a\', \'b\', \'adjoint_a\', \'adjoint_b\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "maximum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'sp_ids\', \'sp_values\', \'vocab_size\', \'name\', \'already_sorted\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "minimum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "placeholder"
+    argspec: "args=[\'dtype\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reorder"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reset_shape"
+    argspec: "args=[\'sp_input\', \'new_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reshape"
+    argspec: "args=[\'sp_input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "retain"
+    argspec: "args=[\'sp_input\', \'to_retain\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "segment_mean"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sqrt_n"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sum"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "slice"
+    argspec: "args=[\'sp_input\', \'start\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'keyword_required\', \'sp_input\', \'num_split\', \'axis\', \'name\', \'split_dim\'], varargs=None, keywords=None, defaults=[\'KeywordRequired()\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'sp_input\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "to_indicator"
+    argspec: "args=[\'sp_input\', \'vocab_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "transpose"
+    argspec: "args=[\'sp_input\', \'perm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index 312e94b41d..ebdaf57231 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "length"
     argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
+  member_method {
+    name: "reduce_join"
+    argspec: "args=[\'inputs\', \'axis\', \'keep_dims\', \'separator\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'\', \'None\', \'None\'], "
+  }
   member_method {
     name: "regex_full_match"
     argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
index 9f35395284..45c81fdd3b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
@@ -272,6 +272,10 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "confusion_matrix"
+    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
+  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
index d9efe97821..ab6287f8cd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.debugging.pbtxt
@@ -1,5 +1,89 @@
 path: "tensorflow.debugging"
 tf_module {
+  member_method {
+    name: "Assert"
+    argspec: "args=[\'condition\', \'data\', \'summarize\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_all_finite"
+    argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_greater_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_integer"
+    argspec: "args=[\'x\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_less_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_near"
+    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_negative"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_non_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_none_equal"
+    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_positive"
+    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_proper_iterable"
+    argspec: "args=[\'values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "assert_rank"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_at_least"
+    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_rank_in"
+    argspec: "args=[\'x\', \'ranks\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_same_float_dtype"
+    argspec: "args=[\'tensors\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "assert_scalar"
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "assert_type"
+    argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "check_numerics"
     argspec: "args=[\'tensor\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +100,16 @@ tf_module {
     name: "is_nan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "is_non_decreasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "is_numeric_tensor"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_strictly_increasing"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
new file mode 100644
index 0000000000..423eca32a2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.-d-type.pbtxt
@@ -0,0 +1,77 @@
+path: "tensorflow.dtypes.DType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "as_datatype_enum"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "as_numpy_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "base_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_bool"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_complex"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_floating"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_integer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_numpy_compatible"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_quantized"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "is_unsigned"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "limits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "max"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "min"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "real_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'type_enum\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
index 98e1feed00..ea23feca84 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
@@ -1,7 +1,27 @@
 path: "tensorflow.dtypes"
 tf_module {
+  member {
+    name: "DType"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "as_dtype"
+    argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "as_string"
     argspec: "args=[\'input\', \'precision\', \'scientific\', \'shortest\', \'width\', \'fill\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'False\', \'False\', \'-1\', \'\', \'None\'], "
   }
+  member_method {
+    name: "cast"
+    argspec: "args=[\'x\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "complex"
+    argspec: "args=[\'real\', \'imag\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "saturate_cast"
+    argspec: "args=[\'value\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
index eeabf845dc..162ee76ee7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.graph_util.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "extract_sub_graph"
     argspec: "args=[\'graph_def\', \'dest_nodes\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "import_graph_def"
+    argspec: "args=[\'graph_def\', \'input_map\', \'return_elements\', \'name\', \'op_dict\', \'producer_op_list\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "must_run_on_cpu"
     argspec: "args=[\'node\', \'pin_variables_on_cpu\'], varargs=None, keywords=None, defaults=[\'False\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
index 5c46dc5ee7..0a231f1b65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt
@@ -148,6 +148,10 @@ tf_module {
     name: "random_contrast"
     argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "random_crop"
+    argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "random_flip_left_right"
     argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
index e3c63fe737..d49181714f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt
@@ -64,4 +64,8 @@ tf_module {
     name: "lecun_uniform"
     argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tables_initializer"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
new file mode 100644
index 0000000000..cd0e51c8c7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-feature.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.io.FixedLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
new file mode 100644
index 0000000000..8a38f25fdf
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-fixed-len-sequence-feature.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.io.FixedLenSequenceFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.FixedLenSequenceFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "allow_missing"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "default_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..85306fdcac
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PaddingFIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
new file mode 100644
index 0000000000..02d8037b34
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.PriorityQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
new file mode 100644
index 0000000000..a30481a0ea
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
@@ -0,0 +1,65 @@
+path: "tensorflow.io.QueueBase"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
new file mode 100644
index 0000000000..82cbf9884f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.io.RandomShuffleQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
new file mode 100644
index 0000000000..216947b4ed
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-sparse-feature.pbtxt
@@ -0,0 +1,35 @@
+path: "tensorflow.io.SparseFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.SparseFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "already_sorted"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "index_key"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "size"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "value_key"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
new file mode 100644
index 0000000000..b598f73d7e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-compression-type.pbtxt
@@ -0,0 +1,20 @@
+path: "tensorflow.io.TFRecordCompressionType"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordCompressionType\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "GZIP"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "NONE"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "ZLIB"
+    mtype: "<type \'int\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
new file mode 100644
index 0000000000..bfbf37ccf4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-options.pbtxt
@@ -0,0 +1,17 @@
+path: "tensorflow.io.TFRecordOptions"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordOptions\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "compression_type_map"
+    mtype: "<type \'dict\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'compression_type\', \'flush_mode\', \'input_buffer_size\', \'output_buffer_size\', \'window_bits\', \'compression_level\', \'compression_method\', \'mem_level\', \'compression_strategy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_compression_type_string"
+    argspec: "args=[\'cls\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6fd443f6d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-t-f-record-writer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.io.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flush"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'record\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
new file mode 100644
index 0000000000..fd835dbfbb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.-var-len-feature.pbtxt
@@ -0,0 +1,19 @@
+path: "tensorflow.io.VarLenFeature"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<class \'tensorflow.python.ops.parsing_ops.VarLenFeature\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
index 8938cf217b..dccf136788 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
@@ -1,5 +1,49 @@
 path: "tensorflow.io"
 tf_module {
+  member {
+    name: "FixedLenFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "FixedLenSequenceFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseFeature"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordCompressionType"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordOptions"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "VarLenFeature"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "decode_base64"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -8,6 +52,10 @@ tf_module {
     name: "decode_compressed"
     argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
   }
+  member_method {
+    name: "decode_csv"
+    argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], "
+  }
   member_method {
     name: "decode_json_example"
     argspec: "args=[\'json_examples\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +64,38 @@ tf_module {
     name: "decode_raw"
     argspec: "args=[\'bytes\', \'out_type\', \'little_endian\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
+  member_method {
+    name: "deserialize_many_sparse"
+    argspec: "args=[\'serialized_sparse\', \'dtype\', \'rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "encode_base64"
     argspec: "args=[\'input\', \'pad\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "match_filenames_once"
+    argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "matching_files"
     argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "parse_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "parse_sequence_example"
     argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "parse_single_example"
+    argspec: "args=[\'serialized\', \'features\', \'name\', \'example_names\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_single_sequence_example"
+    argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "parse_tensor"
     argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -36,8 +104,24 @@ tf_module {
     name: "read_file"
     argspec: "args=[\'filename\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "serialize_many_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "serialize_sparse"
+    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
+  }
+  member_method {
+    name: "tf_record_iterator"
+    argspec: "args=[\'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "write_file"
     argspec: "args=[\'filename\', \'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "write_graph"
+    argspec: "args=[\'graph_or_graph_def\', \'logdir\', \'name\', \'as_text\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
index d979116887..6ac95d96da 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
@@ -108,10 +108,18 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "global_norm"
+    argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "inv"
     argspec: "args=[\'input\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logdet"
     argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -124,6 +132,10 @@ tf_module {
     name: "lstsq"
     argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
   }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'a\', \'b\', \'transpose_a\', \'transpose_b\', \'adjoint_a\', \'adjoint_b\', \'a_is_sparse\', \'b_is_sparse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "norm"
     argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keepdims\', \'name\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index 72856466ec..459b9e3684 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.math"
 tf_module {
+  member_method {
+    name: "abs"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "accumulate_n"
+    argspec: "args=[\'inputs\', \'shape\', \'tensor_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "acos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -12,6 +20,22 @@ tf_module {
     name: "add"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "add_n"
+    argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "angle"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "argmax"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "argmin"
+    argspec: "args=[\'input\', \'axis\', \'name\', \'dimension\', \'output_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int64\'>\"], "
+  }
   member_method {
     name: "asin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -52,10 +76,18 @@ tf_module {
     name: "betainc"
     argspec: "args=[\'a\', \'b\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'arr\', \'weights\', \'minlength\', \'maxlength\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int32\'>\"], "
+  }
   member_method {
     name: "ceil"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "conj"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "cos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -64,14 +96,34 @@ tf_module {
     name: "cosh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "count_nonzero"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'dtype\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"<dtype: \'int64\'>\", \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "cumprod"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "cumsum"
+    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
+  }
   member_method {
     name: "digamma"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "divide"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "erf"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -88,6 +140,10 @@ tf_module {
     name: "floor"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "floordiv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "greater"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -104,10 +160,26 @@ tf_module {
     name: "igammac"
     argspec: "args=[\'a\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "imag"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "in_top_k"
+    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "invert_permutation"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "l2_normalize"
+    argspec: "args=[\'x\', \'axis\', \'epsilon\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1e-12\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "lbeta"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -128,6 +200,14 @@ tf_module {
     name: "log1p"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "log_sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "log_softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "logical_and"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -140,6 +220,10 @@ tf_module {
     name: "logical_or"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "logical_xor"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'LogicalXor\'], "
+  }
   member_method {
     name: "maximum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -148,6 +232,14 @@ tf_module {
     name: "minimum"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "multiply"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "negative"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "not_equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -160,18 +252,66 @@ tf_module {
     name: "polyval"
     argspec: "args=[\'coeffs\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "pow"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "real"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "reciprocal"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "reduce_all"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_any"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_logsumexp"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_mean"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_min"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_prod"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "round"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "rsqrt"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "scalar_mul"
+    argspec: "args=[\'scalar\', \'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -192,6 +332,14 @@ tf_module {
     name: "segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sigmoid"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "sign"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "sin"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -200,6 +348,10 @@ tf_module {
     name: "sinh"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
   member_method {
     name: "softplus"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -208,18 +360,46 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "sqrt"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "square"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "squared_difference"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "subtract"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "tan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "tanh"
+    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "top_k"
+    argspec: "args=[\'input\', \'k\', \'sorted\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "truediv"
+    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_mean"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_min"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -228,6 +408,10 @@ tf_module {
     name: "unsorted_segment_prod"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unsorted_segment_sqrt_n"
+    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -240,6 +424,10 @@ tf_module {
     name: "xlogy"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "zero_fraction"
+    argspec: "args=[\'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "zeta"
     argspec: "args=[\'x\', \'q\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index d9e5b0d0fc..9b28ce5746 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -100,6 +100,10 @@ tf_module {
     name: "ctc_loss"
     argspec: "args=[\'labels\', \'inputs\', \'sequence_length\', \'preprocess_collapse_repeated\', \'ctc_merge_repeated\', \'ignore_longer_outputs_than_inputs\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'False\', \'True\'], "
   }
+  member_method {
+    name: "depth_to_space"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "depthwise_conv2d"
     argspec: "args=[\'input\', \'filter\', \'strides\', \'padding\', \'rate\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -304,6 +308,14 @@ tf_module {
     name: "softsign"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "space_to_batch"
+    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "space_to_depth"
+    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+  }
   member_method {
     name: "sparse_softmax_cross_entropy_with_logits"
     argspec: "args=[\'_sentinel\', \'labels\', \'logits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index d2dc8bc85f..5b3ea75bce 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -456,6 +456,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "random"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random_normal_initializer"
     mtype: "<type \'type\'>"
@@ -1608,6 +1612,10 @@ tf_module {
     name: "rint"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "roll"
+    argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "round"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
index 6d865efed0..77c92aeb0d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
@@ -28,6 +28,10 @@ tf_module {
     name: "fake_quant_with_min_max_vars_per_channel_gradient"
     argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'narrow_range\', \'name\'], varargs=None, keywords=None, defaults=[\'8\', \'False\', \'None\'], "
   }
+  member_method {
+    name: "quantize"
+    argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
new file mode 100644
index 0000000000..a568dd4cd8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
@@ -0,0 +1,47 @@
+path: "tensorflow.random"
+tf_module {
+  member_method {
+    name: "gamma"
+    argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "get_seed"
+    argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "log_uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "poisson"
+    argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "set_random_seed"
+    argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "truncated_normal"
+    argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform"
+    argspec: "args=[\'shape\', \'minval\', \'maxval\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
+  }
+  member_method {
+    name: "uniform_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
new file mode 100644
index 0000000000..67457de070
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.-builder.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.saved_model.Builder"
+tf_class {
+  is_instance: "<class \'tensorflow.python.saved_model.builder_impl.SavedModelBuilder\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "add_meta_graph"
+    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "add_meta_graph_and_variables"
+    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "save"
+    argspec: "args=[\'self\', \'as_text\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
index e1a0385092..3f4965fc69 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "Builder"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "builder"
     mtype: "<type \'module\'>"
@@ -32,6 +36,46 @@ tf_module {
     name: "utils"
     mtype: "<type \'module\'>"
   }
+  member_method {
+    name: "build_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "build_tensor_info"
+    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "classification_signature_def"
+    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_tensor_from_tensor_info"
+    argspec: "args=[\'tensor_info\', \'graph\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "is_valid_signature"
+    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "load"
+    argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "main_op_with_restore"
+    argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "maybe_saved_model_directory"
+    argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "predict_signature_def"
+    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "regression_signature_def"
+    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "simple_save"
     argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
new file mode 100644
index 0000000000..cd97716c9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-conditional-accumulator.pbtxt
@@ -0,0 +1,46 @@
+path: "tensorflow.sparse.SparseConditionalAccumulator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.SparseConditionalAccumulator\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.ConditionalAccumulatorBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "accumulator_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\', \'shared_name\', \'name\', \'reduction_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'sparse_conditional_accumulator\', \'MEAN\'], "
+  }
+  member_method {
+    name: "apply_grad"
+    argspec: "args=[\'self\', \'grad_indices\', \'grad_values\', \'grad_shape\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
+  }
+  member_method {
+    name: "apply_indexed_slices_grad"
+    argspec: "args=[\'self\', \'grad\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
+  }
+  member_method {
+    name: "num_accumulated"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "set_global_step"
+    argspec: "args=[\'self\', \'new_global_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "take_indexed_slices_grad"
+    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt
new file mode 100644
index 0000000000..02e59a63e1
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt
@@ -0,0 +1,54 @@
+path: "tensorflow.sparse.SparseTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.framework.sparse_tensor.SparseTensor\'>"
+  is_instance: "<class \'tensorflow.python.framework.ops._TensorLike\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dense_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "indices"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "op"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'indices\', \'values\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "consumers"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "eval"
+    argspec: "args=[\'self\', \'feed_dict\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_shape"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
index ba9e651b34..32bd8d5f8e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
@@ -1,5 +1,21 @@
 path: "tensorflow.sparse"
 tf_module {
+  member {
+    name: "SparseConditionalAccumulator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SparseTensor"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "add"
+    argspec: "args=[\'a\', \'b\', \'thresh\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "concat"
+    argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "cross"
     argspec: "args=[\'inputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,4 +32,100 @@ tf_module {
     name: "eye"
     argspec: "args=[\'num_rows\', \'num_columns\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "fill_empty_rows"
+    argspec: "args=[\'sp_input\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "mask"
+    argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "matmul"
+    argspec: "args=[\'sp_a\', \'b\', \'adjoint_a\', \'adjoint_b\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "maximum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'sp_ids\', \'sp_values\', \'vocab_size\', \'name\', \'already_sorted\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "minimum"
+    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "placeholder"
+    argspec: "args=[\'dtype\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_max_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reduce_sum_sparse"
+    argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "reorder"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reset_shape"
+    argspec: "args=[\'sp_input\', \'new_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "reshape"
+    argspec: "args=[\'sp_input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "retain"
+    argspec: "args=[\'sp_input\', \'to_retain\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "segment_mean"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sqrt_n"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "segment_sum"
+    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "slice"
+    argspec: "args=[\'sp_input\', \'start\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "softmax"
+    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "split"
+    argspec: "args=[\'keyword_required\', \'sp_input\', \'num_split\', \'axis\', \'name\', \'split_dim\'], varargs=None, keywords=None, defaults=[\'KeywordRequired()\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "to_dense"
+    argspec: "args=[\'sp_input\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
+  }
+  member_method {
+    name: "to_indicator"
+    argspec: "args=[\'sp_input\', \'vocab_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "transpose"
+    argspec: "args=[\'sp_input\', \'perm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index 312e94b41d..ebdaf57231 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "length"
     argspec: "args=[\'input\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
+  member_method {
+    name: "reduce_join"
+    argspec: "args=[\'inputs\', \'axis\', \'keep_dims\', \'separator\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'\', \'None\', \'None\'], "
+  }
   member_method {
     name: "regex_full_match"
     argspec: "args=[\'input\', \'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index cb6da5088b..7e980fe44d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -252,6 +252,10 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "confusion_matrix"
+    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
+  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
-- 
GitLab


From 694367b574dcaf5ac90f3e42b8dee8fa51ca9f38 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 11:58:17 -0700
Subject: [PATCH 0160/1085] Automated rollback of commit
 cb98ceba9cff8c10ee3c7e89dc8925c88b28118e

PiperOrigin-RevId: 215254762
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++--
 tensorflow/core/protobuf/rewriter_config.proto        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index a5f851fb1a..c3d70a1fdf 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -139,7 +139,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
+  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -527,7 +527,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
+         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 8e0448d536..8c31468ff5 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -75,7 +75,7 @@ message RewriterConfig {
   // Try to allocate some independent Op outputs contiguously in order to
   // merge or eliminate downstream Ops (off by default).
   Toggle scoped_allocator_optimization = 15;
-  // Force small ops onto the CPU (default is ON).
+  // Force small ops onto the CPU (default is OFF).
   Toggle pin_to_host_optimization = 18;
   // Disable the entire meta optimizer (off by default).
   bool disable_meta_optimizer = 19;
-- 
GitLab


From c4b3ce081b8abfae5560814ec445f0169cb4c368 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 1 Oct 2018 12:03:53 -0700
Subject: [PATCH 0161/1085] Add new attributes for the defun forward/backward
 functions.

PiperOrigin-RevId: 215255826
---
 tensorflow/python/eager/function.py      | 39 ++++++++++++++++++------
 tensorflow/python/eager/function_test.py | 15 +++++++++
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index dd3e1a3723..60a4f018cd 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import collections
 import functools
+import re
 import sys
 import threading
 import weakref
@@ -61,9 +62,15 @@ cond_v2_impl._function = sys.modules[__name__]  # pylint: disable=protected-acce
 # This is to avoid a circular dependency with gradients_impl
 gradients_impl._function = sys.modules[__name__]  # pylint: disable=protected-access
 
+FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name"
+BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name"
 
 # TODO(scottzhu): Update this to allow arbitrary attribute names in future.
-WHITELIST_FUNCTION_ATTRIBUTE_PREFIX = "experimental_"
+WHITELIST_FUNCTION_ATTRIBUTE_REGEX = [
+    "experimental_.*",
+    FORWARD_FUNCTION_ATTRIBUTE_NAME,
+    BACKWARD_FUNCTION_ATTRIBUTE_NAME
+]
 
 
 def _create_substitute_placeholder(value, name=None, dtype=None):
@@ -140,10 +147,11 @@ def _parse_func_attrs(attributes):
   """
   attrs = {}
   for key, value in attributes.items():
-    if not key.startswith(WHITELIST_FUNCTION_ATTRIBUTE_PREFIX):
+    if not any([re.match(reg, key)
+                for reg in WHITELIST_FUNCTION_ATTRIBUTE_REGEX]):
       raise ValueError("Attribute name is not whitelisted. "
                        "Whitelisted: prefix %s, got: %s" %
-                       (WHITELIST_FUNCTION_ATTRIBUTE_PREFIX, key))
+                       (WHITELIST_FUNCTION_ATTRIBUTE_REGEX, key))
 
     if isinstance(value, attr_value_pb2.AttrValue):
       attrs[key] = value
@@ -154,7 +162,7 @@ def _parse_func_attrs(attributes):
       attrs[key] = attr_value_pb2.AttrValue(i=value)
     elif isinstance(value, float):
       attrs[key] = attr_value_pb2.AttrValue(f=value)
-    elif isinstance(value, str):
+    elif isinstance(value, (str, bytes)):
       attrs[key] = attr_value_pb2.AttrValue(s=compat.as_bytes(value))
     else:
       raise ValueError("Unsupported attribute type for %s with type %s" %
@@ -705,6 +713,7 @@ class Function(object):
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
+    forward_function_name = _forward_name(self._func_graph.name)
     with backwards_graph.as_default():
       gradients_wrt_outputs = [
           graph_placeholder(x.dtype, x.shape) for x in self._func_graph.outputs
@@ -715,11 +724,11 @@ class Function(object):
           grad_ys=gradients_wrt_outputs,
           src_graph=self._func_graph)
 
-    self._forward_function = _EagerDefinedFunction(
-        _forward_name(
-            self._func_graph.name), self._func_graph, self._func_graph.inputs,
-        self._func_graph.outputs + list(backwards_graph.captures.keys()),
-        self._attrs)
+    backwards_graph_captures = list(backwards_graph.captures.keys())
+
+    backward_function_attr = _parse_func_attrs(
+        {FORWARD_FUNCTION_ATTRIBUTE_NAME: forward_function_name})
+    backward_function_attr.update(self._attrs)
 
     # The ordering of `backwards_graph.inputs` is important: inputs of
     # `self._backward_graph_function` correspond to outputs of
@@ -732,7 +741,17 @@ class Function(object):
         grad for grad in _flatten(gradients_wrt_inputs) if grad is not None)
     backwards_graph.structured_outputs = gradients_wrt_inputs
     self._backward_graph_function = Function(
-        backwards_graph, attrs=self._attrs)
+        backwards_graph, attrs=backward_function_attr)
+
+    forward_function_attr = _parse_func_attrs({
+        BACKWARD_FUNCTION_ATTRIBUTE_NAME:
+            self._backward_graph_function._inference_function.name})  # pylint: disable=protected-access
+    forward_function_attr.update(self._attrs)
+
+    self._forward_function = _EagerDefinedFunction(
+        forward_function_name, self._func_graph, self._func_graph.inputs,
+        self._func_graph.outputs + backwards_graph_captures,
+        forward_function_attr)
 
   def _backprop_call(self, args):
     """Calls the forward function and records the result on a tape.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 34a2648e26..afe3ba9893 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1687,6 +1687,21 @@ class FunctionTest(test.TestCase):
           self.assertRegexpMatches(captured_function_names[i],
                                    expected_func_name_regex[i])
 
+        # Check the forward and backward function has the correct attributes.
+        self.assertEquals(
+            functions[1].definition.attr['backward_function_name'].s,
+            functions[2].name)
+        self.assertEquals(
+            functions[2].definition.attr['forward_function_name'].s,
+            functions[1].name)
+
+        self.assertEquals(
+            functions[4].definition.attr['backward_function_name'].s,
+            functions[5].name)
+        self.assertEquals(
+            functions[5].definition.attr['forward_function_name'].s,
+            functions[4].name)
+
         sq = defun_matmul(t, t)
         double = add(t, t)
         self.assertAllEqual(sq.eval().reshape(-1), [7, 10, 15, 22])
-- 
GitLab


From f0c219d095f38f7ce6febfb68d4f84d64aa1829a Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Mon, 1 Oct 2018 12:28:32 -0700
Subject: [PATCH 0162/1085]  Expose tpu_host_placement_function().

PiperOrigin-RevId: 215259803
---
 tensorflow/contrib/tpu/python/tpu/tpu_context.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
index 7cfb6c38fa..da6bdf67d6 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py
@@ -154,6 +154,20 @@ class TPUContext(object):
     # as far as model is replicated to all cores in the system.
     return self._internal_ctx.device_for_replica(replica_id)
 
+  @property
+  def tpu_host_placement_function(self):
+    """Returns the TPU host place function.
+
+    The place function takes host_id as the input and returns the TF device
+    for the correspoding host.
+    """
+
+    def _placement_function(host_id):
+      """Return the host device given host_id."""
+      return self._internal_ctx.tpu_host_placement_function(host_id=host_id)
+
+    return _placement_function
+
 
 class _InternalTPUContext(object):
   """A context holds immutable states of TPU computation.
-- 
GitLab


From 5c8c48df7fd4ccbe4a9dec035fdec6b02a5d6016 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 12:54:56 -0700
Subject: [PATCH 0163/1085] Internal build specification change

PiperOrigin-RevId: 215263951
---
 tensorflow/core/BUILD | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 57819cec70..0aae29d10c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -271,6 +271,12 @@ proto_library(
     visibility = ["//visibility:public"],
 )
 
+java_proto_library(
+    name = "example_java_proto",
+    visibility = ["//visibility:public"],
+    deps = [":example_protos"],
+)
+
 closure_proto_library(
     name = "example_protos_closure",
     visibility = ["//visibility:public"],
-- 
GitLab


From 3648cb0198690d551ea5c8eefcf706c8fa67f4f0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:07:12 -0700
Subject: [PATCH 0164/1085] Add option to initialize the TPU system.

PiperOrigin-RevId: 215266241
---
 tensorflow/python/tools/saved_model_cli.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 3dbccd1409..2fcb0fa029 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -267,7 +267,8 @@ def scan_meta_graph_def(meta_graph_def):
 
 def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
                                    input_tensor_key_feed_dict, outdir,
-                                   overwrite_flag, worker=None, tf_debug=False):
+                                   overwrite_flag, worker=None, init_tpu=False,
+                                   tf_debug=False):
   """Runs SavedModel and fetch all outputs.
 
   Runs the input dictionary through the MetaGraphDef within a SavedModel
@@ -287,6 +288,8 @@ def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
         the same name exists.
     worker: If provided, the session will be run on the worker.  Valid worker
         specification is a bns or gRPC path.
+    init_tpu: If true, the TPU system will be initialized after the session
+        is created.
     tf_debug: A boolean flag to use TensorFlow Debugger (TFDBG) to observe the
         intermediate Tensor values and runtime GraphDefs while running the
         SavedModel.
@@ -328,6 +331,12 @@ def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key,
   ]
 
   with session.Session(worker, graph=ops_lib.Graph()) as sess:
+    if init_tpu:
+      print('Initializing TPU System ...')
+      # This is needed for freshly started worker, or if the job
+      # restarts after a preemption.
+      sess.run(tf.contrib.tpu.initialize_system())
+
     loader.load(sess, tag_set.split(','), saved_model_dir)
 
     if tf_debug:
@@ -632,7 +641,7 @@ def run(args):
   run_saved_model_with_feed_dict(args.dir, args.tag_set, args.signature_def,
                                  tensor_key_feed_dict, args.outdir,
                                  args.overwrite, worker=args.worker,
-                                 tf_debug=args.tf_debug)
+                                 init_tpu=args.init_tpu, tf_debug=args.tf_debug)
 
 
 def scan(args):
@@ -775,6 +784,12 @@ def create_parser():
       default=None,
       help='if specified, a Session will be run on the worker. '
            'Valid worker specification is a bns or gRPC path.')
+  parser_run.add_argument(
+      '--init_tpu',
+      action='store_true',
+      default=None,
+      help='if specified, tpu.initialize_system will be called on the Session. '
+           'This option should be only used if the worker is a TPU job.')
   parser_run.set_defaults(func=run)
 
   # scan command
-- 
GitLab


From 3c6e6885f32e7638ece306dad3a5081b06137bdc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:08:10 -0700
Subject: [PATCH 0165/1085] Check in and refactor the OVIC detector
 benchmarker.

PiperOrigin-RevId: 215266415
---
 tensorflow/contrib/lite/java/ovic/BUILD       |  61 +++++-
 .../contrib/lite/java/ovic/demo/app/BUILD     |   5 +-
 .../demo/app/OvicBenchmarkerActivity.java     |  77 +++++---
 .../demo/app/res/layout/activity_main.xml     |  27 ++-
 .../java/ovic/demo/app/res/values/strings.xml |   3 +-
 .../java/org/tensorflow/ovic/BoundingBox.java |  68 +++++++
 .../org/tensorflow/ovic/OvicBenchmarker.java  | 152 ++++++---------
 ...ult.java => OvicClassificationResult.java} |  12 +-
 .../org/tensorflow/ovic/OvicClassifier.java   |  10 +-
 .../ovic/OvicClassifierBenchmarker.java       | 142 ++++++++++++++
 .../tensorflow/ovic/OvicDetectionResult.java  |  91 +++++++++
 .../org/tensorflow/ovic/OvicDetector.java     | 184 ++++++++++++++++++
 .../ovic/OvicDetectorBenchmarker.java         | 160 +++++++++++++++
 .../org/tensorflow/ovic/OvicValidator.java    |   2 +-
 .../tensorflow/ovic/OvicClassifierTest.java   |   6 +-
 .../org/tensorflow/ovic/OvicDetectorTest.java | 149 ++++++++++++++
 .../contrib/lite/java/ovic/src/testdata/BUILD |   5 +-
 .../java/ovic/src/testdata/coco_labels.txt    |  91 +++++++++
 18 files changed, 1101 insertions(+), 144 deletions(-)
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
 rename tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/{OvicSingleImageResult.java => OvicClassificationResult.java} (83%)
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
 create mode 100644 tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt

diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD
index bb0be04ca2..ea9b9ed4b6 100644
--- a/tensorflow/contrib/lite/java/ovic/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/BUILD
@@ -9,6 +9,7 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow/java:build_defs.bzl", "JAVACOPTS")
 
+# Build targets for OVIC classification.
 java_test(
     name = "OvicClassifierTest",
     size = "medium",
@@ -45,8 +46,9 @@ android_library(
     name = "ovicbenchmarkerlib",
     srcs = [
         "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java",
+        "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java",
         "src/main/java/org/tensorflow/ovic/OvicClassifier.java",
-        "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java",
     ],
     manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml",
     tags = ["no_oss"],
@@ -60,8 +62,8 @@ android_library(
 java_library(
     name = "ovicbenchmarkerlib_java",
     srcs = [
+        "src/main/java/org/tensorflow/ovic/OvicClassificationResult.java",
         "src/main/java/org/tensorflow/ovic/OvicClassifier.java",
-        "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java",
     ],
     javacopts = JAVACOPTS,
     tags = ["no_oss"],
@@ -73,3 +75,58 @@ java_library(
         "@org_checkerframework_qual",
     ],
 )
+
+# Build targets for OVIC detection.
+java_test(
+    name = "OvicDetectorTest",
+    size = "medium",
+    srcs = ["src/test/java/org/tensorflow/ovic/OvicDetectorTest.java"],
+    data = [
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "@tflite_mobilenet_ssd_quant//:detect.tflite",
+    ],
+    javacopts = JAVACOPTS,
+    tags = ["no_oss"],
+    test_class = "org.tensorflow.ovic.OvicDetectorTest",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib_java",
+        "@com_google_truth",
+        "@junit",
+    ],
+)
+
+android_library(
+    name = "ovicdetectionbenchmarkerlib",
+    srcs = [
+        "src/main/java/org/tensorflow/ovic/BoundingBox.java",
+        "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetector.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java",
+    ],
+    manifest = "//tensorflow/contrib/lite/java:AndroidManifest.xml",
+    deps = [
+        "//tensorflow/contrib/lite/java:tensorflowlite",
+        "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
+        "@org_checkerframework_qual",
+    ],
+)
+
+java_library(
+    name = "ovicdetectionbenchmarkerlib_java",
+    srcs = [
+        "src/main/java/org/tensorflow/ovic/BoundingBox.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetectionResult.java",
+        "src/main/java/org/tensorflow/ovic/OvicDetector.java",
+    ],
+    javacopts = JAVACOPTS,
+    deps = [
+        "//tensorflow/contrib/lite/java:libtensorflowlite_jni.so",
+        "//tensorflow/contrib/lite/java:tensorflowlite_java",
+        "//tensorflow/contrib/lite/java/src/main/native",
+        "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
+        "@org_checkerframework_qual",
+    ],
+)
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
index 058240aada..f567358ea3 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD
@@ -10,8 +10,10 @@ android_binary(
     ],
     aapt_version = "aapt",
     assets = [
-        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:coco_labels.txt",
         "//tensorflow/contrib/lite/java/ovic/src/testdata:labels.txt",
+        "//tensorflow/contrib/lite/java/ovic/src/testdata:ovic_testdata",
+        "@tflite_mobilenet_ssd_quant//:detect.tflite",
     ],
     assets_dir = "",
     custom_package = "ovic.demo.app",
@@ -25,6 +27,7 @@ android_binary(
     deps = [
         "//tensorflow/contrib/lite/java:tensorflowlite",
         "//tensorflow/contrib/lite/java/ovic:ovicbenchmarkerlib",
+        "//tensorflow/contrib/lite/java/ovic:ovicdetectionbenchmarkerlib",
         "@androidsdk//com.android.support:support-v13-25.2.0",
         "@androidsdk//com.android.support:support-v4-25.2.0",
     ],
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
index 4adf94aeb6..48c29ecebe 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java
@@ -35,19 +35,18 @@ import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.text.DecimalFormat;
 import org.tensorflow.ovic.OvicBenchmarker;
-import org.tensorflow.ovic.OvicSingleImageResult;
-
+import org.tensorflow.ovic.OvicClassifierBenchmarker;
+import org.tensorflow.ovic.OvicDetectorBenchmarker;
 
 /** Class that benchmark image classifier models. */
 public class OvicBenchmarkerActivity extends Activity {
   /** Tag for the {@link Log}. */
   private static final String TAG = "OvicBenchmarkerActivity";
 
-  /** Name of the label file stored in Assets. */
-  private static final String LABEL_PATH = "labels.txt";
-
-  private static final String TEST_IMAGE_PATH = "test_image_224.jpg";
-  private static final String MODEL_PATH = "float_model.lite";
+  /** Name of the task-dependent data files stored in Assets. */
+  private static String labelPath = null;
+  private static String testImagePath = null;
+  private static String modelPath = null;
   /**
    * Each bottom press will launch a benchmarking experiment. The experiment stops when either the
    * total native latency reaches WALL_TIME or the number of iterations reaches MAX_ITERATIONS,
@@ -66,8 +65,6 @@ public class OvicBenchmarkerActivity extends Activity {
   private MappedByteBuffer model = null;
   private InputStream labelInputStream = null;
   private OvicBenchmarker benchmarker;
-  /** Inference result of each iteration. */
-  OvicSingleImageResult iterResult = null;
 
   private TextView textView = null;
   // private Button startButton = null;
@@ -83,21 +80,31 @@ public class OvicBenchmarkerActivity extends Activity {
   }
 
   private Bitmap loadTestBitmap() throws IOException {
-    InputStream imageStream = getAssets().open(TEST_IMAGE_PATH);
+    InputStream imageStream = getAssets().open(testImagePath);
     return BitmapFactory.decodeStream(imageStream);
   }
 
-  public void initializeTest() throws IOException {
+  public void initializeTest(boolean benchmarkClassification) throws IOException {
     Log.i(TAG, "Initializing benchmarker.");
-    benchmarker = new OvicBenchmarker(WALL_TIME);
+    if (benchmarkClassification) {
+      benchmarker = new OvicClassifierBenchmarker(WALL_TIME);
+      labelPath = "labels.txt";
+      testImagePath = "test_image_224.jpg";
+      modelPath = "quantized_model.lite";
+    } else {  // Benchmarking detection.
+      benchmarker = new OvicDetectorBenchmarker(WALL_TIME);
+      labelPath = "coco_labels.txt";
+      testImagePath = "test_image_224.jpg";
+      modelPath = "detect.tflite";
+    }
     AssetManager am = getAssets();
-    AssetFileDescriptor fileDescriptor = am.openFd(MODEL_PATH);
+    AssetFileDescriptor fileDescriptor = am.openFd(modelPath);
     FileInputStream modelInputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
     FileChannel fileChannel = modelInputStream.getChannel();
     long startOffset = fileDescriptor.getStartOffset();
     long declaredLength = fileDescriptor.getDeclaredLength();
     model = fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
-    labelInputStream = am.open(LABEL_PATH);
+    labelInputStream = am.open(labelPath);
   }
 
   public Boolean doTestIteration() throws IOException, InterruptedException {
@@ -117,24 +124,44 @@ public class OvicBenchmarkerActivity extends Activity {
     Log.i(TAG, "Going to do test iter.");
     // Start testing.
     Bitmap testImageBitmap = loadTestBitmap();
-    iterResult = benchmarker.doTestIteration(testImageBitmap);
-    testImageBitmap.recycle();
-    if (iterResult == null) {
+    try {
+      if (!benchmarker.processBitmap(testImageBitmap)) {
+        throw new RuntimeException("Failed to run test.");
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw e;
+    } finally {
+      testImageBitmap.recycle();
+    }
+    String iterResultString = benchmarker.getLastResultString();
+    if (iterResultString == null) {
       throw new RuntimeException("Inference failed to produce a result.");
     }
-    Log.i(TAG, iterResult.toString());
+    Log.i(TAG, iterResultString);
     return true;
   }
 
-  public void startPressed(View view) throws IOException {
-    Log.i(TAG, "Start pressed");
+  public void detectPressed(View view) throws IOException {
+    benchmarkSession(false);
+  }
+  public void classifyPressed(View view) throws IOException {
+    benchmarkSession(true);
+  }
+
+  private void benchmarkSession(boolean benchmarkClassification) throws IOException {
     try {
-      initializeTest();
+      initializeTest(benchmarkClassification);
     } catch (IOException e) {
       Log.e(TAG, "Can't initialize benchmarker.", e);
       throw e;
     }
     String displayText = "";
+    if (benchmarkClassification) {
+      displayText = "Classification benchmark: ";
+    } else {
+      displayText = "Detection benchmark: ";
+    }
     try {
       setProcessorAffinity(BIG_CORE_MASK);
     } catch (IOException e) {
@@ -144,7 +171,6 @@ public class OvicBenchmarkerActivity extends Activity {
     Log.i(TAG, "Successfully initialized benchmarker.");
     int testIter = 0;
     Boolean iterSuccess = false;
-    double totalLatency = 0.0f;
     while (testIter < MAX_ITERATIONS) {
       try {
         iterSuccess = doTestIteration();
@@ -153,23 +179,22 @@ public class OvicBenchmarkerActivity extends Activity {
         throw e;
       } catch (InterruptedException e) {
         Log.e(TAG, "Interrupted at iteration " + testIter);
+        displayText += e.getMessage() + "\n";
       }
       if (!iterSuccess) {
         break;
       }
       testIter++;
-      totalLatency += (double) iterResult.latency;
     }
-    ;
     Log.i(TAG, "Benchmarking finished");
 
     if (textView != null) {
       if (testIter > 0) {
         textView.setText(
             displayText
-                + MODEL_PATH
+                + modelPath
                 + ": Average latency="
-                + df2.format(totalLatency / testIter)
+                + df2.format(benchmarker.getTotalRunTime() / testIter)
                 + "ms after "
                 + testIter
                 + " runs.");
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
index e9d83bae54..1bce60ff7d 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/layout/activity_main.xml
@@ -30,14 +30,14 @@
     android:layout_height="wrap_content"
     android:text="@string/initial_status_msg"
     android:id="@+id/textView"
-    android:layout_above="@+id/button_start"
+    android:layout_above="@+id/button_clf_start"
     android:layout_alignParentTop="true"/>
 
   <Button
     android:layout_width="wrap_content"
     android:layout_height="wrap_content"
-    android:text="@string/start_label"
-    android:id="@id/button_start"
+    android:text="@string/start_clf_label"
+    android:id="@id/button_clf_start"
     android:layout_alignParentBottom="true"
     android:layout_alignParentLeft="true"
     android:background="@drawable/start_button_color"
@@ -49,6 +49,25 @@
     android:textColor="#ffffff"
     android:enabled="true"
     style="?android:attr/buttonBarButtonStyle"
-    android:onClick="startPressed"/>
+    android:onClick="classifyPressed"/>
+
+  <Button
+    android:layout_width="wrap_content"
+    android:layout_height="wrap_content"
+    android:text="@string/start_det_label"
+    android:id="@+id/button_det_start"
+    android:layout_alignParentBottom="true"
+    android:layout_alignParentRight="true"
+    android:layout_toRightOf="@id/button_clf_start"
+    android:background="@drawable/start_button_color"
+    android:padding="10dp"
+    android:layout_marginRight="100dp"
+    android:layout_marginLeft="30dp"
+    android:layout_marginTop="10dp"
+    android:foreground="#000000"
+    android:textColor="#ffffff"
+    android:enabled="true"
+    style="?android:attr/buttonBarButtonStyle"
+    android:onClick="detectPressed"/>
 
 </RelativeLayout>
diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml b/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
index d26beb1d27..53525908d3 100644
--- a/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
+++ b/tensorflow/contrib/lite/java/ovic/demo/app/res/values/strings.xml
@@ -17,6 +17,7 @@
 <resources>
     <string name="app_name" translatable="false">Benchmarker</string>
 
-    <string name="start_label" translatable="false">Start</string>
+    <string name="start_clf_label" translatable="false">Clf</string>
+    <string name="start_det_label" translatable="false">Det</string>
     <string name="initial_status_msg" translatable="false"> Press start to run the benchmarks.</string>
 </resources>
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
new file mode 100644
index 0000000000..9bf7d005d2
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/BoundingBox.java
@@ -0,0 +1,68 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+/** Class for holding a detection bounding box with category and confidence. */
+public class BoundingBox {
+  // Upper left point.
+  public float x1;
+  public float y1;
+
+  // Lower right point.
+  public float x2;
+  public float y2;
+
+  // The area of the box
+  public float area;
+
+  // The object category
+  public int category;
+
+  // The confidence of the detection
+  public float score;
+
+  public BoundingBox(float x1, float y1, float x2, float y2, int category, float score) {
+    this.x1 = x1;
+    this.y1 = y1;
+    this.x2 = x2;
+    this.y2 = y2;
+    this.category = category;
+    this.score = score;
+    // -1 stands for area not initialized
+    this.area = -1;
+  }
+
+  // The intersection area of two bounding boxes
+  public float intersect(BoundingBox bbx) {
+    return Math.max(0, Math.min(x2, bbx.x2) - Math.max(x1, bbx.x1))
+        * Math.max(0, Math.min(y2, bbx.y2) - Math.max(y1, bbx.y1));
+  }
+
+  // The union area of two bounding boxes
+  public float union(BoundingBox bbx) {
+    return bbx.getArea() + this.getArea() - this.intersect(bbx);
+  }
+
+  public float getArea() {
+    if (area < 0) {
+      area = (x2 - x1) * (y2 - y1);
+    }
+    return area;
+  }
+
+  public float computeIoU(BoundingBox bbx) {
+    return (float) (this.intersect(bbx) * 1.0 / this.union(bbx));
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
index 4cda258bee..15d9511f50 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java
@@ -20,11 +20,10 @@ import android.util.Log;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
 
 /**
- * Class that benchmarks image classifier models.
+ * Base class that benchmarks image models.
  *
  * <p>===================== General workflow =======================
  *
@@ -33,37 +32,40 @@ import java.nio.MappedByteBuffer;
  * benchmarker.getReadyToTest(labelInputStream, model);
  * while (!benchmarker.shouldStop()) {
  *   Bitmap bitmap = ...
- *   benchmarker.doTestIteration(bitmap);
+ *   imgId = ...
+ *   benchmarker.processBitmap(bitmap, imgId);
  * }
  * }</pre>
  */
-public class OvicBenchmarker {
+public abstract class OvicBenchmarker {
   /** Tag for the {@link Log}. */
   private static final String TAG = "OvicBenchmarker";
 
-  /** Evaluation transformation parameters. */
-  private static final float CENTRAL_FRACTION = 0.875f;
-
   /** Dimensions of inputs. */
-  private static final int DIM_BATCH_SIZE = 1;
-  private static final int DIM_PIXEL_SIZE = 3;
-  private int imgHeight = 224;
-  private int imgWidth = 224;
+  protected static final int DIM_BATCH_SIZE = 1;
+  protected static final int DIM_PIXEL_SIZE = 3;
+  protected int imgHeight = 224;
+  protected int imgWidth = 224;
+
+  /** Preprocess parameters (only used when input is float). */
+  protected static final float IMAGE_MEAN = 127.5f;
+  protected static final float IMAGE_STD = 127.5f;
+
+  /** Whether input is float or quantized. */
+  protected Boolean quantizedInput = null;
 
   /* Preallocated buffers for storing image data in. */
-  private int[] intValues = null;
+  protected int[] intValues = null;
 
   /** A ByteBuffer to hold image data, to be feed into classifier as inputs. */
-  private ByteBuffer imgData = null;
-
-  private OvicClassifier classifier;
+  protected ByteBuffer imgData = null;
 
   /** Total runtime in ms. */
-  private double totalRuntime = 0.0;
+  protected double totalRuntime = 0.0;
   /** Total allowed runtime in ms. */
-  private double wallTime = 20000 * 30.0;
-
-  private Boolean benchmarkStarted = null;
+  protected double wallTime = 20000 * 30.0;
+  /** Record whether benchmark has started (used to skip the first image). */
+  protected boolean benchmarkStarted = false;
 
   /**
    * Initializes an {@link OvicBenchmarker}
@@ -76,6 +78,11 @@ public class OvicBenchmarker {
     this.wallTime = wallTime;
   }
 
+  /** Return the cumulative latency of all runs so far. */
+  public double getTotalRunTime() {
+    return totalRuntime;
+  }
+
   /** Check whether the benchmarker should stop. */
   public Boolean shouldStop() {
     if (totalRuntime >= wallTime) {
@@ -90,105 +97,62 @@ public class OvicBenchmarker {
     return false;
   }
 
-  /** Check whether the benchmarker is ready to start classifying images. */
-  public Boolean readyToTest() {
-    return (classifier != null);
-  }
+  /** Abstract class for checking whether the benchmarker is ready to start processing images */
+  public abstract boolean readyToTest();
 
   /**
-   * Getting the benchmarker ready for classifying images.
+   * Abstract class for getting the benchmarker ready.
    *
    * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
    *     read from.
    * @param model: a {@link MappedByteBuffer} model to benchmark.
    */
-  public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
-    try {
-      Log.i(TAG, "Creating classifier.");
-      classifier = new OvicClassifier(labelInputStream, model);
-      int [] inputDims = classifier.getInputDims();
-      imgHeight = inputDims[1];
-      imgWidth = inputDims[2];
-      // Only accept QUANTIZED_UINT8 input.
-      imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
-      imgData.order(ByteOrder.nativeOrder());
-      intValues = new int[imgHeight * imgWidth];
-    } catch (Exception e) {
-        Log.e(TAG, e.getMessage());
-        Log.e(TAG, "Failed to initialize ImageNet classifier for the benchmarker.");
-    }
-  }
-
-  /** Return how many classes are predicted per image. */
-  public int getNumPredictions() {
-    return classifier.getNumPredictions();
-  }
+  public abstract void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model);
 
   /**
    * Perform test on a single bitmap image.
    *
-   * @param bitmap: a {@link Bitmap} image to classify.
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
    */
-  public OvicSingleImageResult doTestIteration(Bitmap bitmap)
-      throws IOException, InterruptedException {
-    if (shouldStop() || !readyToTest()) {
-      return null;
-    }
-    OvicSingleImageResult iterResult = null;
-    try {
-      Log.i(TAG, "Converting bitmap.");
-      convertBitmapToInput(bitmap);
-      Log.i(TAG, "Classifying image.");
-      iterResult = classifier.classifyByteBuffer(imgData);
-    } catch (RuntimeException e) {
-      Log.e(TAG, e.getMessage());
-      Log.e(TAG, "Failed to classify image.");
-    }
-    if (iterResult == null || iterResult.latency == null) {
-      throw new RuntimeException("Classification result or timing is invalid.");
-    }
-    Log.d(TAG, "Native inference latency: " + iterResult.latency);
-    Log.i(TAG, iterResult.toString());
+  public abstract boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException;
 
-    if (!benchmarkStarted) {  // Skip the first image to discount warming-up time.
-      benchmarkStarted = true;
-    } else {
-      totalRuntime += (double) iterResult.latency;
-    }
-    return iterResult;
+  /** Perform test on a single bitmap image without an image ID. */
+  public boolean processBitmap(Bitmap bitmap) throws IOException, InterruptedException {
+    return processBitmap(bitmap, /* imageId = */ 0);
   }
 
+  /** Returns the last inference results as string. */
+  public abstract String getLastResultString();
+
   /**
-   * Writes Image data into a {@link ByteBuffer}.
-   *
-   * @param bitmap: a {@link Bitmap} source image.
-   */
-  private void convertBitmapToInput(Bitmap bitmap) throws RuntimeException {
-    if (imgData == null) {
+   * Loads input buffer from intValues into ByteBuffer for the interpreter.
+   * Input buffer must be loaded in intValues and output will be placed in imgData.
+  */
+  protected void loadsInputToByteBuffer() {
+    if (imgData == null || intValues == null || quantizedInput == null) {
       throw new RuntimeException("Benchmarker is not yet ready to test.");
     }
-    imgData.rewind();
-    // Perform transformations corresponding to evaluation mode.
-    float width = (float) bitmap.getWidth();
-    float height = (float) bitmap.getHeight();
-    int stWidth = Math.round((width - width * CENTRAL_FRACTION) / 2);
-    int stHeight = Math.round((height - height * CENTRAL_FRACTION) / 2);
-    int newWidth = Math.round(width - stWidth * 2);
-    int newHeight = Math.round(height - stHeight * 2);
-    bitmap = Bitmap.createBitmap(bitmap, stWidth, stHeight, newWidth, newHeight);
-    bitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
-    bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
-
     // Convert the image to ByteBuffer.
+    imgData.rewind();
     int pixel = 0;
     long startTime = SystemClock.uptimeMillis();
 
     for (int i = 0; i < imgHeight; ++i) {
       for (int j = 0; j < imgWidth; ++j) {
-        final int val = intValues[pixel++];
-        imgData.put((byte) ((val >> 16) & 0xFF));
-        imgData.put((byte) ((val >> 8) & 0xFF));
-        imgData.put((byte) (val & 0xFF));
+        final int pixelValue = intValues[pixel++];
+        if (quantizedInput) {
+          // Quantized model
+          imgData.put((byte) ((pixelValue >> 16) & 0xFF));
+          imgData.put((byte) ((pixelValue >> 8) & 0xFF));
+          imgData.put((byte) (pixelValue & 0xFF));
+        } else {
+          // Float model
+          imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+        }
       }
     }
     long endTime = SystemClock.uptimeMillis();
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
similarity index 83%
rename from tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java
rename to tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
index 4af9a65c2f..5ab804e6ee 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassificationResult.java
@@ -1,4 +1,4 @@
-/*Copyright 2018 Google LLC
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,17 +17,17 @@ package org.tensorflow.ovic;
 import java.util.ArrayList;
 
 /** Result class for inference run on a single image. */
-public class OvicSingleImageResult {
+public class OvicClassificationResult {
 
   /** Top K classes and probabilities. */
-  public ArrayList<String> topKClasses;
-  public ArrayList<Float> topKProbs;
-  public ArrayList<Integer> topKIndices;
+  public final ArrayList<String> topKClasses;
+  public final ArrayList<Float> topKProbs;
+  public final ArrayList<Integer> topKIndices;
 
   /** Latency (ms). */
   public Long latency;
 
-  OvicSingleImageResult() {
+  OvicClassificationResult() {
     topKClasses = new ArrayList<>();
     topKProbs = new ArrayList<>();
     topKIndices = new ArrayList<>();
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
index fd610b054f..d8a54c1f3b 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java
@@ -31,7 +31,7 @@ import java.util.PriorityQueue;
 import org.tensorflow.lite.Interpreter;
 import org.tensorflow.lite.TestHelper;
 
-/** Benchmark ImageNet Classifier with Tensorflow Lite. */
+/** Class for running ImageNet classification with a TfLite model. */
 public class OvicClassifier {
 
   /** Tag for the {@link Log}. */
@@ -106,7 +106,7 @@ public class OvicClassifier {
 
   /** Classifies a {@link ByteBuffer} image. */
   // @throws RuntimeException if model is uninitialized.
-  public OvicSingleImageResult classifyByteBuffer(ByteBuffer imgData) {
+  public OvicClassificationResult classifyByteBuffer(ByteBuffer imgData) {
     if (tflite == null) {
       throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed.");
     }
@@ -122,7 +122,7 @@ public class OvicClassifier {
         labelProbArray[0][i] = (inferenceOutputArray[0][i] & 0xff) / 255.0f;
       }
     }
-    OvicSingleImageResult iterResult = computeTopKLabels();
+    OvicClassificationResult iterResult = computeTopKLabels();
     iterResult.latency = getLastNativeInferenceLatencyMilliseconds();
     return iterResult;
   }
@@ -174,7 +174,7 @@ public class OvicClassifier {
   }
 
   /** Computes top-K labels. */
-  private OvicSingleImageResult computeTopKLabels() {
+  private OvicClassificationResult computeTopKLabels() {
     if (labelList == null) {
       throw new RuntimeException("Label file has not been loaded.");
     }
@@ -184,7 +184,7 @@ public class OvicClassifier {
         sortedLabels.poll();
       }
     }
-    OvicSingleImageResult singleImageResult = new OvicSingleImageResult();
+    OvicClassificationResult singleImageResult = new OvicClassificationResult();
     if (sortedLabels.size() != RESULTS_TO_SHOW) {
       throw new RuntimeException(
           "Number of returned labels does not match requirement: "
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
new file mode 100644
index 0000000000..0cdd0f7bec
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifierBenchmarker.java
@@ -0,0 +1,142 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import android.graphics.Bitmap;
+import android.util.Log;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+
+/** Class that benchmarks image classifier models. */
+public final class OvicClassifierBenchmarker extends OvicBenchmarker {
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicClassifierBenchmarker";
+
+  /** ImageNet preprocessing parameters. */
+  private static final float CENTRAL_FRACTION = 0.875f;
+  private OvicClassifier classifier;
+  private OvicClassificationResult iterResult = null;
+
+  public OvicClassifierBenchmarker(double wallTime) {
+    super(wallTime);
+  }
+
+  /** Test if the classifier is ready for benchmarking. */
+  @Override
+  public boolean readyToTest() {
+    return (classifier != null);
+  }
+
+  /**
+   * Getting the benchmarker ready for classifying images.
+   *
+   * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
+   *     read from.
+   * @param model: a {@link MappedByteBuffer} model to benchmark.
+   */
+  @Override
+   public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
+    try {
+      Log.i(TAG, "Creating classifier.");
+      classifier = new OvicClassifier(labelInputStream, model);
+      int [] inputDims = classifier.getInputDims();
+      imgHeight = inputDims[1];
+      imgWidth = inputDims[2];
+      quantizedInput = true;
+      // Only accept QUANTIZED_UINT8 input.
+      imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
+      imgData.order(ByteOrder.nativeOrder());
+      intValues = new int[imgHeight * imgWidth];
+    } catch (Exception e) {
+        Log.e(TAG, e.getMessage());
+        Log.e(TAG, "Failed to initialize ImageNet classifier for the benchmarker.");
+    }
+  }
+
+  /**
+   * Perform classification on a single bitmap image.
+   *
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  @Override
+  public boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException {
+    if (shouldStop() || !readyToTest()) {
+      return false;
+    }
+    try {
+      Log.i(TAG, "Converting bitmap.");
+      convertBitmapToInput(bitmap);
+      Log.i(TAG, "Classifying image: " + imageId);
+      iterResult = classifier.classifyByteBuffer(imgData);
+    } catch (RuntimeException e) {
+      Log.e(TAG, e.getMessage());
+      Log.e(TAG, "Failed to classify image.");
+    }
+    if (iterResult == null || iterResult.latency == null) {
+      throw new RuntimeException("Classification result or timing is invalid.");
+    }
+    Log.d(TAG, "Native inference latency: " + iterResult.latency);
+    Log.i(TAG, iterResult.toString());
+
+    if (!benchmarkStarted) {  // Skip the first image to discount warming-up time.
+      benchmarkStarted = true;
+    } else {
+      totalRuntime += ((double) iterResult.latency);
+    }
+    return true;
+  }
+
+  /** Return how many classes are predicted per image. */
+  public int getNumPredictions() {
+    return classifier.getNumPredictions();
+  }
+
+  public OvicClassificationResult getLastClassificationResult() {
+    return iterResult;
+  }
+
+  @Override
+  public String getLastResultString() {
+    if (iterResult == null) {
+      return null;
+    } else {
+      return iterResult.toString();
+    }
+  }
+
+  /**
+   * Preprocess bitmap according to ImageNet protocol then writes result into a {@link ByteBuffer}.
+   *
+   * @param bitmap: a {@link Bitmap} source image.
+   */
+  private void convertBitmapToInput(Bitmap bitmap) {
+    // Perform transformations corresponding to evaluation mode.
+    float width = (float) bitmap.getWidth();
+    float height = (float) bitmap.getHeight();
+    int stWidth = Math.round((width - width * CENTRAL_FRACTION) / 2);
+    int stHeight = Math.round((height - height * CENTRAL_FRACTION) / 2);
+    int newWidth = Math.round(width - stWidth * 2);
+    int newHeight = Math.round(height - stHeight * 2);
+    bitmap = Bitmap.createBitmap(bitmap, stWidth, stHeight, newWidth, newHeight);
+    bitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
+    bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
+    loadsInputToByteBuffer();
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
new file mode 100644
index 0000000000..cf2902a5cb
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectionResult.java
@@ -0,0 +1,91 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import java.util.ArrayList;
+
+/** Result class for inference run on a single image. */
+public class OvicDetectionResult {
+
+  // Top K classes and probabilities.
+  public final ArrayList<BoundingBox> detections;
+  // Latency (ms).
+  public Long latency = -1L;
+  // id of the image.
+  public int id = -1;
+  // Number of valid detections (separately maintained, maybe different from detections.size()).
+  public int count = 0;
+
+  // Create OvicDetectionResult object with pre-filled capacity. Note that detections.size() will
+  // be equal to capacity after this call.
+  OvicDetectionResult(int capacity) {
+    detections = new ArrayList<BoundingBox>(capacity);
+    for (int i = 0; i < capacity; i++) {
+      detections.add(new BoundingBox(-1.0f, -1.0f, -1.0f, -1.0f, -1, -1.0f));
+    }
+  }
+
+  public void resetTo(Long latency, int id) {
+    count = 0;
+    this.latency = latency;
+    this.id = id;
+  }
+
+  public void addBox(float x1, float y1, float x2, float y2, int category, float score) {
+    detections.get(count).x1 = x1;
+    detections.get(count).y1 = y1;
+    detections.get(count).x2 = x2;
+    detections.get(count).y2 = y2;
+    detections.get(count).category = category;
+    detections.get(count).score = score;
+    count += 1;
+  }
+
+  public void scaleUp(double scaleFactorWidth, double scaleFactorHeight) {
+    for (BoundingBox box : detections) {
+      box.x1 = (float) (box.x1 * scaleFactorWidth);
+      box.y1 = (float) (box.y1 * scaleFactorHeight);
+      box.x2 = (float) (box.x2 * scaleFactorWidth);
+      box.y2 = (float) (box.y2 * scaleFactorHeight);
+    }
+  }
+
+  @Override
+  public String toString() {
+    String textToShow = latency + "ms";
+    int k = 0;
+    for (BoundingBox box : detections) {
+      textToShow +=
+          "\nPrediction ["
+              + k
+              + "] = Class "
+              + box.category
+              + " ("
+              + box.x1
+              + ", "
+              + box.y1
+              + ", "
+              + box.x2
+              + ", "
+              + box.y2
+              + ") : "
+              + box.score;
+      k++;
+    }
+
+
+    return textToShow;
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
new file mode 100644
index 0000000000..56836a79e5
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetector.java
@@ -0,0 +1,184 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.tensorflow.lite.Interpreter;
+import org.tensorflow.lite.TestHelper;
+
+/** Class for running COCO detection with a TfLite model. */
+public class OvicDetector implements AutoCloseable {
+
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicDetector";
+
+  /** An instance of the driver class to run model inference with Tensorflow Lite. */
+  private Interpreter tflite;
+
+  /** Labels corresponding to the output of the vision model. */
+  private final List<String> labelList;
+
+  /** Define the output format. */
+  private final Boolean inputIsFloat;
+
+  /** Number of detections per image. 10 for demo, 100 for the actual competition. */
+  private static final int NUM_RESULTS = 10;
+
+  /** The output arrays for the mobilenet SSD. */
+  private float[][][] outputLocations;
+  private float[][] outputClasses;
+  private float[][] outputScores;
+  private float[] numDetections;
+  private Map<Integer, Object> outputMap;
+
+  /** Input resolution. */
+  private final int[] inputDims;
+
+  /** Final result. */
+  public OvicDetectionResult result = null;
+
+  OvicDetector(InputStream labelInputStream, MappedByteBuffer model) throws IOException {
+    // Load the label list.
+    labelList = loadLabelList(labelInputStream);
+
+    // Create the TfLite interpreter.
+    tflite = new Interpreter(model, new Interpreter.Options().setNumThreads(1));
+    inputDims = TestHelper.getInputDims(tflite, 0);
+    inputIsFloat = TestHelper.getInputDataType(tflite, 0).equals("float");
+    if (inputDims.length != 4) {
+      throw new RuntimeException("The model's input dimensions must be 4 (BWHC).");
+    }
+    if (inputDims[0] != 1) {
+      throw new RuntimeException(
+          "The model must have a batch size of 1, got " + inputDims[0] + " instead.");
+    }
+    if (inputDims[3] != 3) {
+      throw new RuntimeException(
+          "The model must have three color channels, got " + inputDims[3] + " instead.");
+    }
+    // Check the resolution.
+    int minSide = Math.min(inputDims[1], inputDims[2]);
+    int maxSide = Math.max(inputDims[1], inputDims[2]);
+    if (minSide <= 0 || maxSide > 1000) {
+      throw new RuntimeException("The model's resolution must be between (0, 1000].");
+    }
+
+    // Initialize the input array and result arrays. The input images are stored in a list of
+    // Object. Since this function anaylzed one image per time, there is only 1 item.
+    // The output is fomulated as a map of int -> Object. The output arrays are added to the map.
+    outputLocations = new float[1][NUM_RESULTS][4];
+    outputClasses = new float[1][NUM_RESULTS];
+    outputScores = new float[1][NUM_RESULTS];
+    numDetections = new float[1];
+    outputMap = new HashMap<>();
+    outputMap.put(0, outputLocations);
+    outputMap.put(1, outputClasses);
+    outputMap.put(2, outputScores);
+    outputMap.put(3, numDetections);
+    // Preallocate the result. This will be where inference result is stored after each
+    // detectByteBuffer call.
+    result = new OvicDetectionResult(NUM_RESULTS);
+  }
+
+  public Boolean quantizedInput() {
+    return !inputIsFloat;
+  }
+
+  /** Reads label list from Assets. */
+  private static List<String> loadLabelList(InputStream labelInputStream) throws IOException {
+    List<String> labelList = new ArrayList<>();
+    try (BufferedReader reader =
+        new BufferedReader(new InputStreamReader(labelInputStream, StandardCharsets.UTF_8))) {
+      String line;
+      while ((line = reader.readLine()) != null) {
+        labelList.add(line);
+      }
+    }
+    return labelList;
+  }
+
+  /**
+   * The interface to run the detection. This method currently only support float mobilenet_ssd
+   * model. The quantized models will be added in the future.
+   *
+   * @param imgData The image buffer in ByteBuffer format.
+   * @return boolean indicator of whether detection was a success. If success, the detection results
+   *  is available in the result member variable.
+   *     See OvicDetectionResult.java for details.
+   */
+  boolean detectByteBuffer(ByteBuffer imgData, int imageId) {
+    if (tflite == null) {
+      throw new RuntimeException(TAG + ": Detector has not been initialized; Failed.");
+    }
+    if (inputIsFloat == null) {
+      throw new RuntimeException(TAG + ": Detector input type has not been resolved.");
+    }
+
+    Object[] inputArray = {imgData};
+    tflite.runForMultipleInputsOutputs(inputArray, outputMap);
+
+    Long latency = getLastNativeInferenceLatencyMilliseconds();
+
+    // Update the results.
+    result.resetTo(latency, imageId);
+    for (int i = 0; i < NUM_RESULTS; i++) {
+      result.addBox(outputLocations[0][i][1] * inputDims[1],
+              outputLocations[0][i][0] * inputDims[1],
+              outputLocations[0][i][3] * inputDims[2],
+              outputLocations[0][i][2] * inputDims[2],
+              Math.round(outputClasses[0][i] + 1 /* Label offset */),
+              outputScores[0][i]);
+    }
+    return true;  // Marks that the result is available.
+  }
+
+  /*
+   * Get native inference latency of last image detection run.
+   *  @throws RuntimeException if model is uninitialized.
+   *  @return The inference latency in millisecond.
+   */
+  public Long getLastNativeInferenceLatencyMilliseconds() {
+    if (tflite == null) {
+      throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed.");
+    }
+    Long latency = tflite.getLastNativeInferenceDurationNanoseconds();
+    return (latency == null) ? null : (Long) (latency / 1000000);
+  }
+
+  public int[] getInputDims() {
+    return inputDims;
+  }
+
+  public List<String> getLabels() {
+    return labelList;
+  }
+
+  /** Closes tflite to release resources. */
+  @Override
+  public void close() {
+    tflite.close();
+    tflite = null;
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
new file mode 100644
index 0000000000..1a4e193ff2
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicDetectorBenchmarker.java
@@ -0,0 +1,160 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import android.graphics.Bitmap;
+import android.util.Log;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+
+/**
+ * Class that benchmarks object detection models.
+ */
+public final class OvicDetectorBenchmarker extends OvicBenchmarker {
+  /** Tag for the {@link Log}. */
+  private static final String TAG = "OvicDetectorBenchmarker";
+
+  public double scaleFactorWidth = 1.0f;
+  public double scaleFactorHeight = 1.0f;
+  private Bitmap scaledBitmap = null;  // Preallocate bitmap for scaling.
+
+  private OvicDetector detector;
+
+  /**
+   * Initializes an {@link OvicDetectionBenchmarker}
+   *
+   * @param wallTime: a double number specifying the total amount of time to benchmark.
+   */
+  public OvicDetectorBenchmarker(double wallTime) {
+    super(wallTime);
+  }
+
+  /** Check to see if the detector is ready to test. */
+  @Override
+  public boolean readyToTest() {
+    return (detector != null);
+  }
+
+  /**
+   * Getting the benchmarker ready for detecting images.
+   *
+   * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be
+   *     read from.
+   * @param model: a {@link MappedByteBuffer} model to benchmark.
+   */
+  @Override
+  public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) {
+    try {
+      Log.i(TAG, "Creating detector.");
+      detector = new OvicDetector(labelInputStream, model);
+      quantizedInput = detector.quantizedInput();
+      int[] inputDims = detector.getInputDims();
+      imgHeight = inputDims[1];
+      imgWidth = inputDims[2];
+      if (quantizedInput) {
+        imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE);
+      } else {
+        imgData =
+            ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE * 4);
+      }
+      imgData.order(ByteOrder.nativeOrder());
+      intValues = new int[imgHeight * imgWidth];
+      benchmarkStarted = false;
+    } catch (Exception e) {
+      Log.e(TAG, e.getMessage());
+      Log.e(TAG, "Failed to initialize COCO detector for the benchmarker.", e);
+    }
+  }
+
+  /**
+   * Perform detection on a single ByteBuffer {@link ByteBuffer} image. The image must have the
+   * same dimension that the model expects.
+   *
+   * @param image: a {@link ByteBuffer} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  public boolean processBuffer(ByteBuffer image, int imageId) {
+    if (!readyToTest()) {
+      return false;
+    }
+    try {
+      if (!detector.detectByteBuffer(image, imageId)) {
+        return false;
+      }
+    } catch (RuntimeException e) {
+      Log.e(TAG, e.getMessage());
+      return false;
+    }
+
+    if (!benchmarkStarted) { // Skip the first image to discount warming-up time.
+      benchmarkStarted = true;
+    } else {
+      totalRuntime += ((double) detector.result.latency);
+    }
+    return true;  // Indicating that result is ready.
+  }
+
+  /**
+   * Perform detection on a single bitmap image.
+   *
+   * @param bitmap: a {@link Bitmap} image to process.
+   * @param imageId: an ID uniquely representing the image.
+   */
+  @Override
+  public boolean processBitmap(Bitmap bitmap, int imageId)
+      throws IOException, InterruptedException {
+    if (shouldStop() || !readyToTest()) {
+      return false;
+    }
+    convertBitmapToInput(bitmap);  // Scale bitmap if needed, store result in imgData.
+    if (!processBuffer(imgData, imageId)) {
+      return false;
+    }
+    // Scale results back to original image coordinates.
+    detector.result.scaleUp(scaleFactorWidth, scaleFactorHeight);
+    return true;  // Indicating that result is ready.
+  }
+
+  public OvicDetectionResult getLastDetectionResult() {
+    return detector.result;
+  }
+
+  @Override
+  public String getLastResultString() {
+    if (detector.result == null) {
+      return null;
+    }
+    return detector.result.toString();
+  }
+
+  /**
+   * Preprocess bitmap image into {@link ByteBuffer} format for the detector.
+   *
+   * @param bitmap: a {@link Bitmap} source image.
+   */
+  private void convertBitmapToInput(Bitmap bitmap) {
+    int originalWidth = bitmap.getWidth();
+    int originalHeight = bitmap.getHeight();
+    scaledBitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true);
+    scaleFactorWidth = originalWidth * 1.0 / imgWidth;
+    scaleFactorHeight = originalHeight * 1.0 / imgHeight;
+    scaledBitmap.getPixels(intValues, 0, imgWidth, 0, 0, imgWidth, imgHeight);
+    scaledBitmap.recycle();
+    loadsInputToByteBuffer();
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
index a504ec74a9..baa14baf92 100644
--- a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
+++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicValidator.java
@@ -51,7 +51,7 @@ public class OvicValidator {
       MappedByteBuffer model = loadModelFile(modelFile);
       OvicClassifier classifier = new OvicClassifier(labelsInputStream, model);
       ByteBuffer imgData = createByteBufferForClassifier(classifier);
-      OvicSingleImageResult testResult = classifier.classifyByteBuffer(imgData);
+      OvicClassificationResult testResult = classifier.classifyByteBuffer(imgData);
       if (testResult.topKClasses.isEmpty()) {
         throw new RuntimeException("Failed to return top K predictions.");
       }
diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
index 1587c3c56f..99e874ca78 100644
--- a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
+++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java
@@ -1,4 +1,4 @@
-/*Copyright 2018 Google LLC
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ public final class OvicClassifierTest {
   private MappedByteBuffer lowResModel = null;
   private ByteBuffer testImage = null;
   private ByteBuffer lowResTestImage = null;
-  private OvicSingleImageResult testResult = null;
+  private OvicClassificationResult testResult = null;
   private static final String LABELS_PATH =
       "tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt";
   private static final String QUANTIZED_MODEL_PATH =
@@ -147,7 +147,7 @@ public final class OvicClassifierTest {
     return imgData;
   }
 
-  private static void assertCorrectTopK(OvicSingleImageResult testResult) {
+  private static void assertCorrectTopK(OvicClassificationResult testResult) {
     assertThat(testResult.topKClasses.size() > 0).isTrue();
     Boolean topKAccurate = false;
     // Assert that the correct class is in the top K.
diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
new file mode 100644
index 0000000000..4681e26052
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicDetectorTest.java
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+package org.tensorflow.ovic;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import javax.imageio.ImageIO;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit test for {@link org.tensorflow.ovic.OvicDetector}. */
+@RunWith(JUnit4.class)
+public final class OvicDetectorTest {
+  private OvicDetector detector = null;
+  private InputStream labelsInputStream = null;
+  private MappedByteBuffer model = null;
+  private ByteBuffer testImage = null;
+
+  private static final float IMAGE_MEAN = 128f;
+  private static final float IMAGE_STD = 128f;
+
+  private Boolean quantizedInput = null;
+  private static final String LABELS_PATH =
+      "tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt";
+  private static final String MODEL_PATH =
+      "external/tflite_mobilenet_ssd_quant/detect.tflite";
+  private static final String TEST_IMAGE_PATH =
+      "external/tflite_ovic_testdata/test_image_224.jpg";
+  private static final int GROUNDTRUTH = 1 /* Person */;
+
+  @Before
+  public void setUp() {
+    try {
+      // load models.
+      model = loadModelFile(MODEL_PATH);
+
+      // Load label files;
+      File labelsfile = new File(LABELS_PATH);
+      labelsInputStream = new FileInputStream(labelsfile);
+
+      // Create detector.
+      detector = new OvicDetector(labelsInputStream, model);
+      quantizedInput = detector.quantizedInput();
+
+      // Load test image and convert into byte buffer.
+      File imageFile = new File(TEST_IMAGE_PATH);
+      BufferedImage rawimg = ImageIO.read(imageFile);
+      int[] inputDims = detector.getInputDims();
+      BufferedImage img = new BufferedImage(inputDims[1], inputDims[2], rawimg.getType());
+      Graphics2D g = img.createGraphics();
+      g.drawImage(rawimg, 0, 0, inputDims[1], inputDims[2], null);
+      g.dispose();
+      testImage = toByteBuffer(img);
+    } catch (IOException e) {
+      System.out.println(e.getMessage());
+    }
+
+    System.out.println("Successfully setup");
+  }
+
+  private static MappedByteBuffer loadModelFile(String modelFilePath) throws IOException {
+    File modelfile = new File(modelFilePath);
+    FileInputStream inputStream = new FileInputStream(modelfile);
+    FileChannel fileChannel = inputStream.getChannel();
+    long startOffset = 0L;
+    long declaredLength = fileChannel.size();
+    return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+  }
+
+  private ByteBuffer toByteBuffer(BufferedImage image) {
+    ByteBuffer imgData;
+    if (quantizedInput) {
+      imgData = ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 3);
+    } else {
+      imgData = ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 12);
+    }
+    imgData.order(ByteOrder.nativeOrder());
+    for (int y = 0; y < image.getHeight(); y++) {
+      for (int x = 0; x < image.getWidth(); x++) {
+        int pixelValue = image.getRGB(x, y);
+        if (quantizedInput) {
+          // Quantized model
+          imgData.put((byte) ((pixelValue >> 16) & 0xFF));
+          imgData.put((byte) ((pixelValue >> 8) & 0xFF));
+          imgData.put((byte) (pixelValue & 0xFF));
+        } else {
+          // Float model
+          imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+          imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
+        }
+      }
+    }
+    return imgData;
+  }
+
+  @Test
+  public void ovicDetector_detectSuccess() throws Exception {
+    assertThat(detector.detectByteBuffer(testImage, 1)).isTrue();
+    assertThat(detector.result != null).isTrue();
+  }
+
+  @Test
+  public void ovicDetector_simpleBatchTest() throws Exception {
+    final int numRepeats = 5;
+    for (int i = 0; i < numRepeats; i++) {
+      assertThat(detector.detectByteBuffer(testImage, 1)).isTrue();
+      OvicDetectionResult result = detector.result;
+      Boolean detectWithinTop5 = false;
+      for (int j = 0; j < Math.min(5, result.count); j++) {
+        if (result.detections.get(j).category == GROUNDTRUTH) {
+          detectWithinTop5 = true;
+          break;
+        }
+      }
+      if (!detectWithinTop5) {
+        System.out.println("---------------- Image " + i + " ---------------------");
+        System.out.println("Expect category " + GROUNDTRUTH);
+        System.out.println("Detection results: ");
+        System.out.println(result.toString());
+      }
+      assertThat(detectWithinTop5).isTrue();
+    }
+  }
+}
diff --git a/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD b/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
index 1021ea30dd..051aa2204e 100644
--- a/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
+++ b/tensorflow/contrib/lite/java/ovic/src/testdata/BUILD
@@ -14,6 +14,9 @@ filegroup(
 )
 
 exports_files(
-    ["labels.txt"],
+    [
+        "labels.txt",
+        "coco_labels.txt",
+    ],
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt b/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt
new file mode 100644
index 0000000000..d91f535b1a
--- /dev/null
+++ b/tensorflow/contrib/lite/java/ovic/src/testdata/coco_labels.txt
@@ -0,0 +1,91 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+empty
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+empty
+backpack
+umbrella
+empty
+empty
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+empty
+wine glasses
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+empty
+dining table
+empty
+empty
+toilet
+empty
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+empty
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
+empty
-- 
GitLab


From 1630584951975479dee852cf6f7603fe6819fde1 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 13:28:17 -0700
Subject: [PATCH 0166/1085] Fixes possible out-of-bounds access by strided
 slice.

PiperOrigin-RevId: 215269882
---
 tensorflow/core/kernels/strided_slice_op.cc      | 2 +-
 tensorflow/python/kernel_tests/array_ops_test.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index f0575de4d9..3e8a4c5b72 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -149,7 +149,7 @@ class StridedSliceOp : public OpKernel {
       // NDIM and T
       if (is_simple_slice && std::is_same<Device, CPUDevice>::value &&
           input_dims == 2 && processing_shape.dims() == 2 &&
-          final_shape.dims() == 2) {
+          final_shape.dims() == 2 && new_axis_mask == 0) {
         MemCpyFunctor<T> functor;
         if (functor.Copy(input, begin, end, result)) {
           return;
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index c5547b19be..dcc594789e 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -615,6 +615,14 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[:, 0]
       _ = checker[:, :, 0]
 
+  def testBothNewAxisAndShrink(self):
+    with self.test_session(use_gpu=True):
+      ones = array_ops.placeholder(shape=[2, 2], dtype=dtypes.int16)
+      self.assertAllEqual(
+          ones[array_ops.newaxis, :, 0].eval(
+              feed_dict={ones: [[1, 1], [1, 1]]}),
+          [[1, 1]])
+
   def testTensorIndexing(self):
     with self.test_session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
-- 
GitLab


From c86f5941359526b91d85daf844e94ff5d39b2d6c Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 1 Oct 2018 13:40:30 -0700
Subject: [PATCH 0167/1085] Make cond_v2 If op lowering work in a defun +
 eager.

Prior to this change, the lowering pass assumed that the If op
functions would be available in the If op's graph. If the If op is
defined in a defun and then called via eager execution, the functions
will be in the eager context, but not in the defun's graph. This
change makes the lowering pass correctly use the function library
passed in by the caller via GraphOptimizationPassOptions.

PiperOrigin-RevId: 215271990
---
 tensorflow/core/common_runtime/lower_if_op.cc | 43 ++++++++++++-------
 tensorflow/core/common_runtime/lower_if_op.h  |  5 ++-
 .../core/common_runtime/lower_if_op_test.cc   |  4 +-
 .../kernel_tests/control_flow_ops_py_test.py  | 22 ++++++++++
 4 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index dfce7c23e7..a02084f223 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -38,11 +38,12 @@ class CondBuilder {
  public:
   enum Branch { kElseBranch = 0, kThenBranch = 1 };
 
-  // Create a CondBuilder to create the lowering of If op.  that has then and
+  // Create a CondBuilder to create the lowered form of `if_op` with then and
   // else functions named `then_fn_name` and `else_fn_name` respectively in the
-  // given graph.
+  // `graph`. The functions should be available in `flib`.
   CondBuilder(Node* if_op, const string& then_fn_name,
-              const string& else_fn_name, Graph* graph);
+              const string& else_fn_name, const FunctionLibraryDefinition& flib,
+              Graph* graph);
 
   // Constructs the basic conditional control flow using switch and merge nodes.
   Status CreatePivotNodes();
@@ -89,6 +90,7 @@ class CondBuilder {
   Node* then_call_node_;
   Node* else_call_node_;
   Graph* graph_;
+  const FunctionLibraryDefinition& flib_;
   string name_;
 
   NodeBuilder then_call_builder_;
@@ -96,9 +98,11 @@ class CondBuilder {
 };
 
 CondBuilder::CondBuilder(Node* if_op, const string& then_fn_name,
-                         const string& else_fn_name, Graph* graph)
+                         const string& else_fn_name,
+                         const FunctionLibraryDefinition& flib, Graph* graph)
     : if_op_(if_op),
       graph_(graph),
+      flib_(flib),
       name_(if_op->name()),
       then_call_builder_(NewName("then"), then_fn_name, graph->op_registry()),
       else_call_builder_(NewName("else"), else_fn_name, graph->op_registry()) {
@@ -193,15 +197,15 @@ Status CondBuilder::AddOutputs() {
   return Status::OK();
 }
 
-Status InlineCallInGraph(Node* n, Graph* g) {
-  const auto& lib = g->flib_def();
-  const FunctionDef* fdef = lib.Find(n->type_string());
+Status InlineCallInGraph(Node* n, const FunctionLibraryDefinition& flib,
+                         Graph* g) {
+  const FunctionDef* fdef = flib.Find(n->type_string());
   CHECK(fdef != nullptr);
   FunctionBody* fbody;
   TF_RETURN_IF_ERROR(
-      FunctionDefToBodyHelper(*fdef, n->attrs(), &lib,
-                              [&lib](const string& op, const OpDef** sig) {
-                                return lib.LookUpOpDef(op, sig);
+      FunctionDefToBodyHelper(*fdef, n->attrs(), &flib,
+                              [&flib](const string& op, const OpDef** sig) {
+                                return flib.LookUpOpDef(op, sig);
                               },
                               &fbody));
   // TODO(jpienaar): Improve this interface to make the need to delete it
@@ -219,8 +223,8 @@ Status CondBuilder::BuildLoweredIfOutput() {
 }
 
 Status CondBuilder::InlineCallNodes() {
-  TF_RETURN_IF_ERROR(InlineCallInGraph(then_call_node_, graph_));
-  TF_RETURN_IF_ERROR(InlineCallInGraph(else_call_node_, graph_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(then_call_node_, flib_, graph_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(else_call_node_, flib_, graph_));
   return Status::OK();
 }
 
@@ -240,6 +244,12 @@ Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) {
     return errors::Internal("Lowering If op requires a graph to be available.");
   }
 
+  FunctionLibraryDefinition* flib = options.flib_def;
+  if (flib == nullptr) {
+    return errors::Internal(
+        "Lowering If op requires a FunctionLibraryDefinition to be available.");
+  }
+
   // Match all the nodes that need to be rewritten.
   gtl::InlinedVector<Node*, 2> matches;
   for (Node* n : g->op_nodes()) {
@@ -251,12 +261,14 @@ Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) {
     }
   }
   for (Node* n : matches) {
-    TF_RETURN_IF_ERROR(RewriteNode(n, g));
+    TF_RETURN_IF_ERROR(RewriteNode(n, *flib, g));
   }
   return Status::OK();
 }
 
-Status LowerIfOpPass::RewriteNode(Node* n, Graph* g) {
+Status LowerIfOpPass::RewriteNode(Node* n,
+                                  const FunctionLibraryDefinition& flib,
+                                  Graph* g) {
   const AttrValue* then_attr = n->attrs().Find("then_branch");
   if (then_attr == nullptr) {
     return errors::InvalidArgument("Then branch function missing");
@@ -266,7 +278,8 @@ Status LowerIfOpPass::RewriteNode(Node* n, Graph* g) {
     return errors::InvalidArgument("Else branch function missing");
   }
 
-  CondBuilder cb(n, then_attr->func().name(), else_attr->func().name(), g);
+  CondBuilder cb(n, then_attr->func().name(), else_attr->func().name(), flib,
+                 g);
   TF_RETURN_IF_ERROR(cb.CreatePivotNodes());
   TF_RETURN_IF_ERROR(cb.AddInputs());
   TF_RETURN_IF_ERROR(cb.AddOutputs());
diff --git a/tensorflow/core/common_runtime/lower_if_op.h b/tensorflow/core/common_runtime/lower_if_op.h
index a9ef39ae5c..5ab1123e3f 100644
--- a/tensorflow/core/common_runtime/lower_if_op.h
+++ b/tensorflow/core/common_runtime/lower_if_op.h
@@ -29,8 +29,9 @@ class LowerIfOpPass : public GraphOptimizationPass {
   Status Run(const GraphOptimizationPassOptions& options) override;
 
  private:
-  // Rewrite the given If node `n` in graph `g` to use the switch-merge form.
-  Status RewriteNode(Node* n, Graph* g);
+  // Rewrite the given If node `n` in graph `g` to use the switch-merge
+  // form. `flib` should contain the branch functions referenced by `n`.
+  Status RewriteNode(Node* n, const FunctionLibraryDefinition& flib, Graph* g);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_if_op_test.cc b/tensorflow/core/common_runtime/lower_if_op_test.cc
index 319a617b32..044a355d06 100644
--- a/tensorflow/core/common_runtime/lower_if_op_test.cc
+++ b/tensorflow/core/common_runtime/lower_if_op_test.cc
@@ -36,9 +36,7 @@ namespace tensorflow {
 namespace {
 
 Status Rewrite(std::unique_ptr<Graph>* graph) {
-  FunctionDefLibrary flib;
-  FunctionLibraryDefinition flib_def((*graph)->op_registry(), flib);
-
+  FunctionLibraryDefinition flib_def((*graph)->flib_def());
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = graph;
   opt_options.flib_def = &flib_def;
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index d91a848e01..ae61be614e 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -31,6 +31,7 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -3414,6 +3415,27 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(r.numpy(), 10)
       self.assertFalse(isinstance(r, list))
 
+  def testCondInDefun(self):
+    if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
+      return unittest.skip("b/113346829 (gpu failure)")
+
+    with context.eager_mode():
+
+      @eager_function.defun
+      def foo(pred):
+        # TODO(b/111124878): this only needs to output one element.
+        fn1 = lambda: (constant_op.constant(10), constant_op.constant(100))
+        fn2 = lambda: (constant_op.constant(20), constant_op.constant(200))
+        return control_flow_ops.cond(constant_op.constant(pred), fn1, fn2)
+
+      r = foo(True)
+      self.assertAllEqual(r[0].numpy(), 10)
+      self.assertNotIsInstance(r, list)
+
+      r = foo(False)
+      self.assertAllEqual(r[0].numpy(), 20)
+      self.assertFalse(isinstance(r, list))
+
   def testWhileLoop(self):
     with context.eager_mode():
       tensor = constant_op.constant([1, 2, 3, 4, 5])
-- 
GitLab


From 44acd839c57494860666c799afd24360f1df3bed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 13:42:40 -0700
Subject: [PATCH 0168/1085] Fix reported cuDNN default version during
 configuration.

PiperOrigin-RevId: 215272308
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 2de2365ff3..57d9574d1f 100644
--- a/configure.py
+++ b/configure.py
@@ -884,7 +884,7 @@ def set_tf_cudnn_version(environ_cp):
   """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
-      '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
+      '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION
 
   for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_cudnn_version = get_from_env_or_user_or_default(
-- 
GitLab


From 3039a4694e22674b502257ae34b0a5b614a631f3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 1 Oct 2018 13:43:49 -0700
Subject: [PATCH 0169/1085] [XLA] Migrate from gtl::FlatMap to
 absl::flat_hash_map

PiperOrigin-RevId: 215272497
---
 tensorflow/compiler/jit/BUILD                 |  5 +++
 tensorflow/compiler/jit/deadness_analysis.cc  | 22 ++++++------
 .../compiler/jit/deadness_analysis_internal.h |  4 +--
 tensorflow/compiler/jit/kernels/BUILD         |  1 +
 tensorflow/compiler/jit/kernels/xla_ops.cc    |  3 +-
 .../jit/mark_for_compilation_pass_test.cc     | 11 +++---
 .../jit/resource_operation_safety_analysis.cc |  1 -
 .../compiler/jit/xla_compilation_cache.h      |  6 ++--
 tensorflow/compiler/tf2xla/BUILD              |  2 ++
 .../tf2xla/resource_operation_table.cc        | 14 ++++----
 .../tf2xla/resource_operation_table_test.cc   |  3 +-
 tensorflow/compiler/xla/client/BUILD          |  1 +
 tensorflow/compiler/xla/client/xla_builder.h  |  4 +--
 tensorflow/compiler/xla/service/BUILD         | 33 +++++++++++++++++
 .../compiler/xla/service/allocation_tracker.h |  5 +--
 .../xla/service/batchnorm_expander.cc         |  1 -
 .../xla/service/bfloat16_propagation.h        |  6 ++--
 .../compiler/xla/service/buffer_assignment.cc | 30 ++++++++--------
 .../compiler/xla/service/buffer_assignment.h  | 23 ++++++------
 .../compiler/xla/service/buffer_liveness.h    |  1 -
 tensorflow/compiler/xla/service/call_graph.h  |  6 ++--
 .../compiler/xla/service/copy_insertion.cc    | 11 +++---
 tensorflow/compiler/xla/service/cpu/BUILD     |  4 +++
 .../xla/service/cpu/cpu_layout_assignment.cc  |  3 +-
 .../compiler/xla/service/cpu/cpu_runtime.cc   |  4 +--
 .../compiler/xla/service/cpu/ir_emitter.cc    |  4 +--
 .../compiler/xla/service/cpu/ir_emitter.h     | 10 +++---
 .../service/cpu/target_machine_features.cc    |  1 +
 .../xla/service/cpu/target_machine_features.h |  5 ++-
 tensorflow/compiler/xla/service/defuser.cc    |  3 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    |  1 -
 tensorflow/compiler/xla/service/gpu/BUILD     |  2 ++
 .../xla/service/gpu/gpu_executable.cc         |  3 +-
 .../compiler/xla/service/gpu/gpu_executable.h |  4 +--
 .../xla/service/gpu/stream_assignment.h       |  4 +--
 .../compiler/xla/service/heap_simulator.cc    | 17 +++++----
 .../compiler/xla/service/heap_simulator.h     | 21 ++++++-----
 .../xla/service/heap_simulator_test.cc        |  4 +--
 .../xla/service/hlo_alias_analysis.cc         |  7 ++--
 .../compiler/xla/service/hlo_alias_analysis.h |  3 +-
 .../compiler/xla/service/hlo_clone_context.h  | 12 +++----
 .../compiler/xla/service/hlo_computation.cc   | 11 +++---
 .../compiler/xla/service/hlo_computation.h    | 10 +++---
 .../compiler/xla/service/hlo_domain_map.cc    |  5 +--
 .../compiler/xla/service/hlo_domain_map.h     |  9 +++--
 .../compiler/xla/service/hlo_instruction.cc   | 13 +++----
 .../compiler/xla/service/hlo_instruction.h    |  8 ++---
 .../compiler/xla/service/hlo_instructions.cc  |  6 ++--
 .../xla/service/hlo_memory_scheduler.cc       | 35 +++++++++----------
 .../xla/service/hlo_memory_scheduler.h        | 11 +++---
 .../xla/service/hlo_memory_scheduler_test.cc  |  5 +--
 tensorflow/compiler/xla/service/hlo_module.cc |  5 +--
 .../xla/service/hlo_module_group_metadata.h   | 14 ++++----
 .../xla/service/hlo_module_group_util.h       |  4 +--
 tensorflow/compiler/xla/service/hlo_opcode.cc |  4 +--
 .../compiler/xla/service/hlo_ordering.h       |  8 ++---
 .../compiler/xla/service/hlo_pass_pipeline.cc |  3 +-
 .../compiler/xla/service/hlo_reachability.h   |  4 +--
 .../xla/service/hlo_rematerialization.cc      | 11 +++---
 .../xla/service/hlo_rematerialization.h       |  4 +--
 .../compiler/xla/service/hlo_schedule.cc      | 19 +++++-----
 .../compiler/xla/service/hlo_schedule.h       |  6 ++--
 .../compiler/xla/service/hlo_verifier.cc      |  6 ++--
 .../xla/service/indexed_array_analysis.cc     |  3 +-
 .../xla/service/indexed_array_analysis.h      |  4 +--
 .../xla/service/instruction_fusion.cc         |  8 ++---
 .../compiler/xla/service/instruction_fusion.h |  5 +--
 .../compiler/xla/service/layout_assignment.h  |  6 ++--
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 +
 .../xla/service/llvm_ir/alias_analysis.h      | 10 +++---
 .../xla/service/multi_output_fusion.cc        |  2 +-
 .../xla/service/multi_output_fusion.h         |  3 +-
 .../compiler/xla/service/name_uniquer.h       |  4 +--
 .../xla/service/reduce_precision_insertion.h  |  1 -
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 .../service/while_loop_constant_sinking.cc    |  1 -
 .../while_loop_invariant_code_motion.cc       |  8 ++---
 .../xla/service/while_loop_simplifier.cc      |  6 ++--
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 .../xla/tests/xla_hlo_profile_test.cc         |  8 ++---
 80 files changed, 319 insertions(+), 259 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 5bf4af1014..29b60d1dbe 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -258,6 +258,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -323,6 +324,7 @@ cc_library(
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
@@ -400,6 +402,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -471,6 +474,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -509,6 +513,7 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler/optimizers/data:graph_utils",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index 25e2e9a7af..e63d4b7792 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -420,15 +421,15 @@ class PredicateFactory {
     }
   };
 
-  gtl::FlatMap<SignatureForAndOr, std::unique_ptr<Predicate>,
-               HashSignatureForAndOr>
+  absl::flat_hash_map<SignatureForAndOr, std::unique_ptr<Predicate>,
+                      HashSignatureForAndOr>
       interned_and_or_instances_;
-  gtl::FlatMap<SignatureForNot, std::unique_ptr<Predicate>>
+  absl::flat_hash_map<SignatureForNot, std::unique_ptr<Predicate>>
       interned_not_instances_;
-  gtl::FlatMap<SignatureForAndRec, std::unique_ptr<Predicate>>
+  absl::flat_hash_map<SignatureForAndRec, std::unique_ptr<Predicate>>
       interned_and_rec_instances_;
-  gtl::FlatMap<SignatureForSymbol, std::unique_ptr<Predicate>,
-               HashSignatureForSymbol>
+  absl::flat_hash_map<SignatureForSymbol, std::unique_ptr<Predicate>,
+                      HashSignatureForSymbol>
       interned_symbol_instances_;
 };
 
@@ -572,7 +573,8 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
   Status PopulateWithReversePostOrder(absl::Span<Node* const> rpo);
   bool HasInputsWithMismatchingDeadness(const Node& node) override;
   void Print() const override;
-  gtl::FlatMap<TensorId, string, TensorId::Hasher> PredicateMapAsString() const;
+  absl::flat_hash_map<TensorId, string, TensorId::Hasher> PredicateMapAsString()
+      const;
 
  private:
   enum class EdgeKind { kDataAndControl, kDataOnly, kControlOnly };
@@ -614,7 +616,7 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
   Status HandleNode(Node* n, std::vector<bool>* should_revisit);
 
   const Graph& graph_;
-  gtl::FlatMap<TensorId, Predicate*, TensorId::Hasher> predicate_map_;
+  absl::flat_hash_map<TensorId, Predicate*, TensorId::Hasher> predicate_map_;
   PredicateFactory predicate_factory_;
   bool vlog_;
 };
@@ -977,9 +979,9 @@ DeadnessAnalysis::~DeadnessAnalysis() {}
   return Status::OK();
 }
 
-gtl::FlatMap<TensorId, string, TensorId::Hasher>
+absl::flat_hash_map<TensorId, string, TensorId::Hasher>
 DeadnessAnalysisImpl::PredicateMapAsString() const {
-  gtl::FlatMap<TensorId, string, TensorId::Hasher> result;
+  absl::flat_hash_map<TensorId, string, TensorId::Hasher> result;
   std::vector<TensorId> tensor_ids;
   for (const auto& kv_pair : predicate_map_) {
     CHECK(result.insert({kv_pair.first, kv_pair.second->ToString()}).second);
diff --git a/tensorflow/compiler/jit/deadness_analysis_internal.h b/tensorflow/compiler/jit/deadness_analysis_internal.h
index 3df2679c62..354782374a 100644
--- a/tensorflow/compiler/jit/deadness_analysis_internal.h
+++ b/tensorflow/compiler/jit/deadness_analysis_internal.h
@@ -16,15 +16,15 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_
 #define TENSORFLOW_COMPILER_JIT_DEADNESS_ANALYSIS_INTERNAL_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace tensorflow {
 namespace deadness_analysis_internal {
 
 // Returns a map describing the predicate each Tensor was mapped to.  For
 // testing purposes only.
-using PredicateMapTy = gtl::FlatMap<TensorId, string, TensorId::Hasher>;
+using PredicateMapTy = absl::flat_hash_map<TensorId, string, TensorId::Hasher>;
 Status ComputePredicates(const Graph& graph, PredicateMapTy* out_predicate_map);
 
 // Returns a map describing the predicate each Tensor was mapped to.  For
diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD
index 0839f1cb3d..26cb3af9d6 100644
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
     alwayslink = 1,
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index a85006eb03..cfd27a6510 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/kernels/xla_ops.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
@@ -163,7 +164,7 @@ class XlaExecutableClosureStore {
  private:
   mutex mutex_;
   int64 key_counter_ GUARDED_BY(mutex_);
-  gtl::FlatMap<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
+  absl::flat_hash_map<KeyT, XlaExecutableClosure> closures_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore);
 };
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
index 4f9145b479..2a80c745e3 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "tensorflow/cc/framework/ops.h"
@@ -61,10 +62,10 @@ std::unordered_map<string, string> GetClusters(const Graph& graph) {
   return ids;
 }
 
-gtl::FlatMap<string, std::vector<string>> GetClusterSets(
+absl::flat_hash_map<string, std::vector<string>> GetClusterSets(
     const Graph& g, std::vector<string>* cluster_names = nullptr) {
   CHECK(cluster_names == nullptr || cluster_names->empty());
-  gtl::FlatMap<string, std::vector<string>> cluster_sets;
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets;
   for (const auto& p : GetClusters(g)) {
     cluster_sets[p.second].push_back(p.first);
   }
@@ -566,7 +567,7 @@ TEST(XlaCompilationTest, ResourcesClusteringAllowed) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
   TF_EXPECT_OK(root.ToGraph(graph.get()));
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph);
   ASSERT_EQ(cluster_sets.size(), 1);
   std::vector<string> expected_clustered_nodes = {"AssignmentW", "ReadR",
@@ -586,7 +587,7 @@ TEST(XlaCompilationTest, ResourcesClusteringDisallowed) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
   TF_EXPECT_OK(root.ToGraph(graph.get()));
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph);
   ASSERT_EQ(cluster_sets.size(), 1);
   std::vector<string> expected_clustered_nodes = {"AssignmentW",
@@ -616,7 +617,7 @@ TEST(XlaCompilationTest, ChainOfOps) {
   TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph));
 
   std::vector<string> cluster_names;
-  gtl::FlatMap<string, std::vector<string>> cluster_sets =
+  absl::flat_hash_map<string, std::vector<string>> cluster_sets =
       GetClusterSets(*graph, &cluster_names);
 
   ASSERT_EQ(cluster_sets.size(), 2);
diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
index 56e35c0059..657bb409db 100644
--- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
+++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
@@ -89,7 +89,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/util/ptr_util.h"
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index 10ad87e38c..17c0321c1e 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_
 #define TENSORFLOW_COMPILER_JIT_XLA_COMPILATION_CACHE_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/threadpool.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
@@ -152,7 +152,7 @@ class XlaCompilationCache : public ResourceBase {
   };
 
   mutex compile_cache_mu_;
-  gtl::FlatMap<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
+  absl::flat_hash_map<Signature, std::unique_ptr<Entry>, Signature::Hash> cache_
       GUARDED_BY(compile_cache_mu_);
 
   struct CompileStats {
@@ -165,7 +165,7 @@ class XlaCompilationCache : public ResourceBase {
   mutex compile_stats_mu_;
 
   // Maps cluster names to compilation statistics for said cluster.
-  gtl::FlatMap<string, CompileStats> compile_stats_
+  absl::flat_hash_map<string, CompileStats> compile_stats_
       GUARDED_BY(compile_stats_mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache);
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index ba1e3b2b4f..3f631f91ec 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -635,6 +635,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:ops",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -649,6 +650,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/resource_operation_table.cc b/tensorflow/compiler/tf2xla/resource_operation_table.cc
index 20f2ce2919..72b240996f 100644
--- a/tensorflow/compiler/tf2xla/resource_operation_table.cc
+++ b/tensorflow/compiler/tf2xla/resource_operation_table.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/resource_operation_table.h"
 #include "absl/algorithm/container.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "absl/container/flat_hash_map.h"
 
 namespace tensorflow {
 /*static*/ absl::string_view XlaResourceOpInfo::XlaResourceOpKindToString(
@@ -30,9 +30,9 @@ namespace tensorflow {
   }
 }
 
-static gtl::FlatMap<absl::string_view, XlaResourceOpInfo>*
+static absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>*
 CreateResourceOpInfoMap() {
-  auto* result = new gtl::FlatMap<absl::string_view, XlaResourceOpInfo>;
+  auto* result = new absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>;
 
   auto add = [&](absl::string_view op, XlaResourceOpKind op_kind,
                  XlaResourceKind resource_kind) {
@@ -103,15 +103,15 @@ CreateResourceOpInfoMap() {
   return result;
 }
 
-static const gtl::FlatMap<absl::string_view, XlaResourceOpInfo>&
+static const absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>&
 GetStaticResourceOpInfoMap() {
-  static gtl::FlatMap<absl::string_view, XlaResourceOpInfo>* op_info_map =
-      CreateResourceOpInfoMap();
+  static absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>*
+      op_info_map = CreateResourceOpInfoMap();
   return *op_info_map;
 }
 
 const XlaResourceOpInfo* GetResourceOpInfoForOp(absl::string_view op) {
-  const gtl::FlatMap<absl::string_view, XlaResourceOpInfo>& op_infos =
+  const absl::flat_hash_map<absl::string_view, XlaResourceOpInfo>& op_infos =
       GetStaticResourceOpInfoMap();
   auto it = op_infos.find(op);
   return it == op_infos.end() ? nullptr : &it->second;
diff --git a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
index a85ef040a7..956f597301 100644
--- a/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
+++ b/tensorflow/compiler/tf2xla/resource_operation_table_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/resource_operation_table.h"
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -33,7 +34,7 @@ bool HasResourceInputOrOutput(const OpDef& op_def) {
 }
 
 TEST(ResourceOperationTableTest, HaveAllResourceOps) {
-  gtl::FlatMap<string, bool> known_resource_ops;
+  absl::flat_hash_map<string, bool> known_resource_ops;
   for (absl::string_view known_resource_op :
        resource_op_table_internal::GetKnownResourceOps()) {
     ASSERT_TRUE(
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index f825f67b44..1191cff109 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -220,6 +220,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:shape_inference",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 1da6ddd318..b7295e8a53 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <type_traits>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/padding.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stacktrace.h"
@@ -1027,7 +1027,7 @@ class XlaBuilder {
 
   // A map from XlaOp::Handle to the index in the instructions_ vector where the
   // instruction is held.
-  tensorflow::gtl::FlatMap<int64, int64> handle_to_index_;
+  absl::flat_hash_map<int64, int64> handle_to_index_;
 
   // The embedded computations used by this computation. Each computation was
   // the entry computation of some XlaComputation, the key is the unique id of
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index e800cf470c..8da6364786 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -146,6 +146,7 @@ cc_library(
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -250,6 +251,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -333,6 +335,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -395,6 +398,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/types:span",
     ],
 )
@@ -485,6 +489,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -903,6 +908,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -952,6 +958,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -987,6 +994,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1034,6 +1042,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -1087,6 +1096,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1125,6 +1135,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1146,6 +1157,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1196,6 +1208,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:optional",
     ],
@@ -1216,6 +1229,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -1260,6 +1274,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -1280,6 +1295,7 @@ tf_cc_test(
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:test",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -1304,6 +1320,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1330,6 +1347,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1385,6 +1403,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
     ],
@@ -1640,6 +1659,7 @@ cc_library(
         ":while_loop_analysis",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -1671,6 +1691,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -2203,6 +2224,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
@@ -2263,6 +2285,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2319,6 +2342,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2345,6 +2369,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2416,6 +2441,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -2460,6 +2486,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2588,6 +2615,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2701,6 +2729,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -3147,6 +3176,7 @@ cc_library(
         ":hlo_pass_pipeline",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -3269,6 +3299,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3298,6 +3329,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3354,6 +3386,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index a7d8927cf7..af227fe4da 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/backend.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -110,7 +111,7 @@ class AllocationTracker {
 
   // A map from device memory opaque value to allocation. One such map is
   // maintained per device ordinal.
-  using AllocationMap = tensorflow::gtl::FlatMap<const void*, Allocation>;
+  using AllocationMap = absl::flat_hash_map<const void*, Allocation>;
 
   tensorflow::mutex mutex_;
 
@@ -146,7 +147,7 @@ class AllocationTracker {
   // non-owning "view" into a tuple's sub-buffers.  The sub-buffers are then
   // free'd when both the view *and* the original tuple are Unregistered.  This
   // refcounting is managed in opaque_to_allocation_map_.
-  tensorflow::gtl::FlatMap<int64, std::vector<std::unique_ptr<ShapedBuffer>>>
+  absl::flat_hash_map<int64, std::vector<std::unique_ptr<ShapedBuffer>>>
       handle_to_shaped_buffers_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker);
diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc
index 30d33e0d35..f70f6ddfec 100644
--- a/tensorflow/compiler/xla/service/batchnorm_expander.cc
+++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc
@@ -35,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index 6a62439f88..c74326f631 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/bfloat16_support.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -186,7 +187,7 @@ class BFloat16Propagation : public HloModulePass {
 
   // Mapping from each HloComputation to the number of callers to it in the
   // module. Populated at the beginning of this pass.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> caller_counts_;
+  absl::flat_hash_map<const HloComputation*, int64> caller_counts_;
 
   // We first store the potential F32-to-BF16 changes to changes_to_bf16_, which
   // are subject to further adjustment, then finally applied to the HLOs. This
@@ -195,8 +196,7 @@ class BFloat16Propagation : public HloModulePass {
   //
   // For each HloInstruction, changes_to_bf16_ stores the affected buffers in
   // the output as a map from in-place pointers to subshapes to shape indices.
-  tensorflow::gtl::FlatMap<HloInstruction*,
-                           tensorflow::gtl::FlatMap<Shape*, ShapeIndex>>
+  absl::flat_hash_map<HloInstruction*, absl::flat_hash_map<Shape*, ShapeIndex>>
       changes_to_bf16_;
 
   // Whether the last processed HLO module has been changed by this pass.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 34a7be0e9c..3efa0b1dad 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <ostream>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -41,9 +42,9 @@ limitations under the License.
 namespace xla {
 namespace {
 
+using absl::flat_hash_map;
 using absl::StrAppend;
 using absl::StrAppendFormat;
-using ::tensorflow::gtl::FlatMap;
 using ::tensorflow::gtl::FlatSet;
 using ::tensorflow::strings::HumanReadableNumBytes;
 
@@ -519,7 +520,8 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation,
 // BufferAllocation.
 void BufferAssignment::CombineTempAllocations() {
   VLOG(1) << "CombineTempAllocations()";
-  FlatMap<LogicalBuffer::Color, BufferAllocation, LogicalBuffer::Color::Hasher>
+  flat_hash_map<LogicalBuffer::Color, BufferAllocation,
+                LogicalBuffer::Color::Hasher>
       combined_allocation_map;
 
   // Move all temp allocations into a single run at the end of the allocations
@@ -582,7 +584,8 @@ void BufferAssignment::CombineTempAllocations() {
   }
 
   // Update allocation indices to their new positions.
-  allocation_index_for_buffer_.clear_no_resize();
+  allocation_index_for_buffer_.erase(allocation_index_for_buffer_.begin(),
+                                     allocation_index_for_buffer_.end());
   for (size_t index = 0; index < allocations_.size(); ++index) {
     BufferAllocation* allocation = &allocations_[index];
     allocation->set_index(index);
@@ -814,7 +817,7 @@ Status BufferAssigner::AssignBuffersForComputation(
     const HloComputation* computation, bool is_thread_local,
     const FlatSet<const LogicalBuffer*>& colocated_buffers,
     const FlatSet<BufferAllocation::Index>& colocated_allocations,
-    FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>*
+    flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>*
         buffers_to_assign_sequentially,
     BufferAssignment* assignment) {
   // Buffers are sorted and assigned to BufferAllocations in decreasing order of
@@ -833,7 +836,7 @@ Status BufferAssigner::AssignBuffersForComputation(
 
   // Generate a post order sort of instructions for sorting of the
   // LogicalBuffers.
-  FlatMap<const HloInstruction*, int> post_order_position;
+  flat_hash_map<const HloInstruction*, int> post_order_position;
   int position = 0;
   for (auto* instruction : computation->MakeInstructionPostOrder()) {
     post_order_position.emplace(instruction, position);
@@ -1043,12 +1046,12 @@ Status BufferAssigner::AssignBuffersForComputation(
   return Status::OK();
 }
 
-FlatMap<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
-        LogicalBuffer::Color::Hasher>
+flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+              LogicalBuffer::Color::Hasher>
 BufferAssigner::SplitBuffersByColor(
     const FlatSet<const LogicalBuffer*>& buffers) {
-  FlatMap<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
-          LogicalBuffer::Color::Hasher>
+  flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+                LogicalBuffer::Color::Hasher>
       color_map;
   for (auto buffer : buffers) {
     color_map[buffer->color()].insert(buffer);
@@ -1057,7 +1060,7 @@ BufferAssigner::SplitBuffersByColor(
 }
 
 Status BufferAssigner::AssignBuffersWithSequentialOrdering(
-    const FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>&
+    const flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>&
         buffers_to_assign_sequentially,
     bool run_whole_module_heap_simulation, BufferAssignment* assignment) {
   // Run the sequence of instructions through the heap simulator.  The heuristic
@@ -1155,9 +1158,8 @@ std::vector<const LogicalBuffer*> ComputePeakMemoryLogicalBuffers(
     const BufferAllocation& allocation, const HeapSimulatorTrace& heap_trace) {
   // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical
   // buffers in this allocation.
-  tensorflow::gtl::FlatMap<LogicalBuffer::Id, const LogicalBuffer*>
-      id_to_buffer;
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, int64> buffer_sizes;
+  absl::flat_hash_map<LogicalBuffer::Id, const LogicalBuffer*> id_to_buffer;
+  absl::flat_hash_map<const LogicalBuffer*, int64> buffer_sizes;
   for (const auto& pair : allocation.assigned_buffers()) {
     const LogicalBuffer* buffer = pair.first;
     const BufferAllocation::OffsetSize& offset_size = pair.second;
@@ -1679,7 +1681,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
 
   // First assign buffers for global computatations. Temporary buffers for
   // sequential computations are collected in 'buffers_to_assign_sequentially'.
-  FlatMap<const HloComputation*, FlatSet<const LogicalBuffer*>>
+  flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>
       buffers_to_assign_sequentially;
   for (auto* computation : global_computations) {
     TF_RETURN_IF_ERROR(AssignBuffersForComputation(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 24ba7c16f5..9ba40617a3 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -148,7 +148,7 @@ class BufferAllocation {
 
   // Access to the logical buffers assigned to this allocation, and their
   // associated logical offsets and sizes.
-  const tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize>&
+  const absl::flat_hash_map<const LogicalBuffer*, OffsetSize>&
   assigned_buffers() const {
     return assigned_buffers_;
   }
@@ -323,7 +323,7 @@ class BufferAllocation {
 
   // Mapping from the set of buffers assigned to this allocation to their
   // logical offsets and sizes.
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize> assigned_buffers_;
+  absl::flat_hash_map<const LogicalBuffer*, OffsetSize> assigned_buffers_;
 
   int64 fragmentation_bytes_ = 0;
   std::vector<HeapSimulatorTrace> heap_traces_;
@@ -500,7 +500,7 @@ class BufferAssignment {
   int64 temp_allocation_total_size_ = 0;
 
   // Maps Buffers to the index of the BufferAllocation which holds the buffer.
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, BufferAllocation::Index>
+  absl::flat_hash_map<const LogicalBuffer*, BufferAllocation::Index>
       allocation_index_for_buffer_;
 
   const HloModule* module_;
@@ -557,8 +557,8 @@ class BufferAssigner {
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers,
       const tensorflow::gtl::FlatSet<BufferAllocation::Index>&
           colocated_allocations,
-      tensorflow::gtl::FlatMap<const HloComputation*,
-                               tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
+      absl::flat_hash_map<const HloComputation*,
+                          tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
           buffers_to_assign_sequentially,
       BufferAssignment* assignment);
 
@@ -568,9 +568,8 @@ class BufferAssigner {
   // 'run_whole_module_heap_simulation' is true, the heap simulation will be run
   // assuming all global computations are sequentially ordered.
   Status AssignBuffersWithSequentialOrdering(
-      const tensorflow::gtl::FlatMap<
-          const HloComputation*,
-          tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
+      const absl::flat_hash_map<const HloComputation*,
+                                tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
           buffers_to_assign_sequentially,
       bool run_whole_module_heap_simulation, BufferAssignment* assignment);
 
@@ -624,9 +623,9 @@ class BufferAssigner {
 
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
-  tensorflow::gtl::FlatMap<LogicalBuffer::Color,
-                           tensorflow::gtl::FlatSet<const LogicalBuffer*>,
-                           LogicalBuffer::Color::Hasher>
+  absl::flat_hash_map<LogicalBuffer::Color,
+                      tensorflow::gtl::FlatSet<const LogicalBuffer*>,
+                      LogicalBuffer::Color::Hasher>
   SplitBuffersByColor(
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers);
 
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h
index cdd3cf4032..2911bbcfbf 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.h
+++ b/tensorflow/compiler/xla/service/buffer_liveness.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index 3af2ab5edf..0c2e9b99db 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h
@@ -20,10 +20,10 @@ limitations under the License.
 
 #include <ostream>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -157,7 +157,7 @@ class CallGraphNode {
 
   // The map from instruction to index in callsites_ for looking up the callsite
   // (if any) associated with a particular instruction in this computation.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> callsite_instructions_;
+  absl::flat_hash_map<const HloInstruction*, int64> callsite_instructions_;
 
   // The call sites in other computations which call this computation.
   std::vector<CallSite> caller_callsites_;
@@ -267,7 +267,7 @@ class CallGraph {
 
   // Map from HLO computation to the index of the corresponding call graph node
   // in nodes_.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> node_indices_;
+  absl::flat_hash_map<const HloComputation*, int64> node_indices_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index b65dfef9c9..7f78412924 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -432,7 +432,7 @@ class CopyRemover {
       // Construct a list for each HLO buffer in the alias analysis. Maintain a
       // map from HloValue to the respective list element representing that
       // value. The map is used to construct the copy info map below.
-      tensorflow::gtl::FlatMap<const HloValue*, ValueNode*> value_to_node;
+      absl::flat_hash_map<const HloValue*, ValueNode*> value_to_node;
       for (const HloBuffer& buffer : alias_analysis.buffers()) {
         // Verify values contained in the buffer are strictly ordered. This
         // should always be the case after adding copies to eliminate
@@ -480,7 +480,7 @@ class CopyRemover {
     // respective ValueNode representing that value.
     void AddValueList(
         absl::Span<const HloValue* const> values,
-        tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>* value_to_node) {
+        absl::flat_hash_map<const HloValue*, ValueNode*>* value_to_node) {
       ValueNode* tail = nullptr;
       ValueNode* head = nullptr;
       for (const HloValue* value : values) {
@@ -516,8 +516,7 @@ class CopyRemover {
     // respective ValueNode.
     void CreateCopyMap(
         const HloModule& module,
-        const tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>&
-            value_to_node) {
+        const absl::flat_hash_map<const HloValue*, ValueNode*>& value_to_node) {
       for (HloComputation* computation : module.computations()) {
         for (HloInstruction* instruction : computation->instructions()) {
           // Add copies with unambiguous source values to the map. Copies with
@@ -916,7 +915,7 @@ class CopyRemover {
       ValueNode* src = nullptr;
       ValueNode* dest = nullptr;
     };
-    tensorflow::gtl::FlatMap<const HloInstruction*, CopyNodes> copy_map_;
+    absl::flat_hash_map<const HloInstruction*, CopyNodes> copy_map_;
   };
 
   HloModule* module_;
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index b7103118ac..6a83909a3b 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -290,6 +290,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
@@ -309,6 +310,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@llvm//:analysis",
         "@llvm//:target",
     ],
@@ -471,6 +473,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
         "//tensorflow/core:lib",
         "//tensorflow/stream_executor",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/types:span",
     ],
@@ -762,6 +765,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:layout_assignment",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
index bfecbd6e01..c291bf2d1b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <numeric>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/cpu/dot_op_emitter.h"
 #include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h"
@@ -38,7 +39,7 @@ using absl::nullopt;
 using absl::optional;
 
 using ShouldMakeOperandColMajorCache =
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>;
+    absl::flat_hash_map<const HloInstruction*, bool>;
 }  // namespace
 
 static bool ShouldMakeAllUsersColMajor(const HloInstruction* instruction) {
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
index 20cf855735..a9febe891b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <functional>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/core/platform/dynamic_annotations.h"
@@ -30,8 +31,7 @@ namespace cpu {
 namespace runtime {
 
 XfeedManager* GetXfeedManager(int device_ordinal) {
-  static tensorflow::gtl::FlatMap<int, XfeedManager*>* managers =
-      new tensorflow::gtl::FlatMap<int, XfeedManager*>();
+  static auto* managers = new absl::flat_hash_map<int, XfeedManager*>();
   static absl::Mutex* mutex = new absl::Mutex();
 
   absl::MutexLock lock(mutex);
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c3e8020783..953a75c35f 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
@@ -67,7 +68,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -1398,7 +1398,7 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) {
   //
   // So if we reduce f32[A,B,C,D] on dimensions 1 and 2, this map contains
   // [0->0, 3->1].
-  gtl::FlatMap<int64, int64> unreduced_dim_map;
+  absl::flat_hash_map<int64, int64> unreduced_dim_map;
 
   gtl::FlatSet<int64> reduced_dims(reduce.dimensions().begin(),
                                    reduce.dimensions().end());
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index daafef4eb3..586f27b104 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "llvm/ADT/Triple.h"
@@ -47,7 +48,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -427,7 +427,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // Maps the buffer allocation slices for the parameters to the computation
   // being compiled to their parameter numbers.  Only relevant for thread local
   // computations.
-  tensorflow::gtl::FlatMap<BufferAllocation::Index, int64>
+  absl::flat_hash_map<BufferAllocation::Index, int64>
       computation_parameter_allocations_;
 
   // Maps HLO instructions to their index into the profile counter array.
@@ -567,11 +567,11 @@ class IrEmitter : public DfsHloVisitorWithDefault,
     }
   };
 
-  tensorflow::gtl::FlatMap<const Literal*, llvm::Constant*,
-                           LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
+  absl::flat_hash_map<const Literal*, llvm::Constant*, LiteralPtrHashFunctor,
+                      LiteralPtrEqualityFunctor>
       emitted_literals_;
 
-  tensorflow::gtl::FlatMap<BufferAllocation::Index, llvm::Constant*>
+  absl::flat_hash_map<BufferAllocation::Index, llvm::Constant*>
       constant_buffer_to_global_;
 
   std::vector<const HloComputation*> thread_local_computations_;
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
index a0cd8ee2d2..5cdac203af 100644
--- a/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
+#include "tensorflow/core/platform/logging.h"
 
 namespace xla {
 namespace cpu {
diff --git a/tensorflow/compiler/xla/service/cpu/target_machine_features.h b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
index 8b00ae9e47..a383b4a4a0 100644
--- a/tensorflow/compiler/xla/service/cpu/target_machine_features.h
+++ b/tensorflow/compiler/xla/service/cpu/target_machine_features.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace cpu {
@@ -97,8 +97,7 @@ class LLVMTargetMachineFeatures : public TargetMachineFeatures {
   // This is mutated from within `GetTargetTransformInfoFor` which is
   // semantically a getter (and thus `const`); and is therefore declared
   // mutable.  Making this mutable is okay because it has cache semantics.
-  mutable tensorflow::gtl::FlatMap<const llvm::Function*,
-                                   llvm::TargetTransformInfo>
+  mutable absl::flat_hash_map<const llvm::Function*, llvm::TargetTransformInfo>
       target_transform_info_cache_;
   llvm::TargetMachine* target_machine_;
 };
diff --git a/tensorflow/compiler/xla/service/defuser.cc b/tensorflow/compiler/xla/service/defuser.cc
index d124f74d19..661539cccb 100644
--- a/tensorflow/compiler/xla/service/defuser.cc
+++ b/tensorflow/compiler/xla/service/defuser.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -48,7 +49,7 @@ Status Defuse(HloInstruction* fusion_instruction) {
       fusion_instruction->fused_instructions_computation();
 
   // A map from fused instruction to its defused clone.
-  tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>
+  absl::flat_hash_map<const HloInstruction*, HloInstruction*>
       defused_instructions;
   // Initialize map to contain the fusion instruction parameters mapping
   // to the operands of the fusion instruction.
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 5761573791..68d01d75a2 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 51968d13d4..e65d3fa332 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -91,6 +91,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_reachability",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -357,6 +358,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:cufft_plugin",
         "//tensorflow/core/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
         "//tensorflow/stream_executor",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 31a9f9b1be..5742632782 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
@@ -197,7 +198,7 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
   }
   module_spec.AddCudaPtxInMemory(ptx().c_str());
 
-  tensorflow::gtl::FlatMap<int64, se::DeviceMemoryBase> globals;
+  absl::flat_hash_map<int64, se::DeviceMemoryBase> globals;
   se::ModuleHandle module_handle;
   executor->LoadModule(module_spec, &module_handle);
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index 38b0f8f15b..0e276282e4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
@@ -35,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
 
@@ -101,7 +101,7 @@ class GpuExecutable : public Executable {
   const PointsToSet& GetRootPointsToSet() const;
 
   using BufferAllocToDeviceMemoryMap =
-      tensorflow::gtl::FlatMap<BufferAllocation::Index, se::DeviceMemoryBase>;
+      absl::flat_hash_map<BufferAllocation::Index, se::DeviceMemoryBase>;
 
   // Loads the PTX or CUBIN for this executable into `executor` and resolves the
   // globals corresponding to constant buffers.  Returns a map mapping buffer
diff --git a/tensorflow/compiler/xla/service/gpu/stream_assignment.h b/tensorflow/compiler/xla/service/gpu/stream_assignment.h
index c2df83aaa4..52d38b6f20 100644
--- a/tensorflow/compiler/xla/service/gpu/stream_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/stream_assignment.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_ASSIGNMENT_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace gpu {
@@ -34,7 +34,7 @@ class StreamAssignment {
 
  private:
   int stream_count_ = 1;  // At least the main stream.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> hlo_to_stream_number_;
+  absl::flat_hash_map<const HloInstruction*, int> hlo_to_stream_number_;
 };
 
 // Assigns GPU streams to instructions in `module`.
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 2bd04259c0..147776c8c4 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -18,13 +18,14 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
 
-using tensorflow::gtl::FlatMap;
+using absl::flat_hash_map;
 using tensorflow::gtl::FlatSet;
 
 /*static*/
@@ -56,7 +57,7 @@ StatusOr<int64> HeapSimulator::MinimumMemoryForComputation(
     const HloComputation& computation, const HloInstructionSequence& sequence,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation) {
   TF_ASSIGN_OR_RETURN(
       HeapSimulator::Result result,
@@ -88,7 +89,7 @@ StatusOr<HeapSimulator::Result> HeapSimulator::Run(
     const HloInstructionSequence& instruction_sequence,
     const TuplePointsToAnalysis& points_to_analysis,
     const BufferValue::SizeFunction& size_fn, const Options& options,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation) {
   HeapSimulator heap(std::move(algorithm), size_fn, options,
                      /*schedule=*/nullptr, memory_by_computation);
@@ -115,8 +116,10 @@ Status HeapSimulator::RunComputation(
   // 'used_buffers' is the reverse map - it tracks which buffers were used by an
   // instruction, so that we can remove the instructions from a buffer's live
   // set after they are visited.
-  FlatMap<const BufferValue*, FlatSet<const HloInstruction*>> live_buffers;
-  FlatMap<const HloInstruction*, FlatSet<const BufferValue*>> used_buffers;
+  flat_hash_map<const BufferValue*, FlatSet<const HloInstruction*>>
+      live_buffers;
+  flat_hash_map<const HloInstruction*, FlatSet<const BufferValue*>>
+      used_buffers;
   auto add_user_to_buffer = [this, &live_buffers, &used_buffers](
                                 const HloInstruction* user,
                                 const BufferValue* buffer) {
@@ -345,7 +348,7 @@ HeapSimulator::HeapSimulator(
     std::unique_ptr<HeapAlgorithm> algorithm,
     const BufferValue::SizeFunction& size_fn, const Options& options,
     const HloSchedule* schedule,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+    const absl::flat_hash_map<const HloComputation*, int64>*
         memory_by_computation)
     : no_fragmentation_stats_(absl::make_unique<NoFragmentationStatsHeap>()),
       algorithm_(std::move(algorithm)),
@@ -536,7 +539,7 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
 
 void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
     const HloInstruction* instruction,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We only count the memory usage of the largest subcomputation, instead of
   // adding them all, because subcomputations won't execute in parallel.
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index 7d6dcc0dc9..a5bb3f81f7 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/buffer_value_containers.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -58,7 +58,7 @@ class HeapSimulator {
   // Result represents the result of the heap simulation.
   struct Result {
     // The assignment of buffers to chunks.
-    tensorflow::gtl::FlatMap<const BufferValue*, Chunk> chunk_map;
+    absl::flat_hash_map<const BufferValue*, Chunk> chunk_map;
 
     // The total size in bytes of the heap, containing all assigned chunks.
     int64 heap_size = 0;
@@ -100,7 +100,7 @@ class HeapSimulator {
       const HloComputation& computation, const HloInstructionSequence& sequence,
       const TuplePointsToAnalysis& points_to_analysis,
       const LogicalBuffer::SizeFunction& size_function,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+      const absl::flat_hash_map<const HloComputation*, int64>*
           memory_by_computation = nullptr);
 
   // Run the heap simulation with the given algorithm, assuming the given
@@ -130,7 +130,7 @@ class HeapSimulator {
       const TuplePointsToAnalysis& points_to_analysis,
       const BufferValue::SizeFunction& size_fn,
       const Options& options = Options(),
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+      const absl::flat_hash_map<const HloComputation*, int64>*
           memory_by_computation = nullptr);
 
  private:
@@ -140,7 +140,7 @@ class HeapSimulator {
   HeapSimulator(std::unique_ptr<HeapAlgorithm> algorithm,
                 const BufferValue::SizeFunction& size_fn,
                 const Options& options, const HloSchedule* schedule = nullptr,
-                const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+                const absl::flat_hash_map<const HloComputation*, int64>*
                     memory_by_computation = nullptr);
   ~HeapSimulator();
 
@@ -172,7 +172,7 @@ class HeapSimulator {
   // handle subcomputations. It would be good to unify the handling of
   // subcomputations, but it's not clear how.
   const HloSchedule* schedule_;
-  const tensorflow::gtl::FlatMap<const HloComputation*, int64>*
+  const absl::flat_hash_map<const HloComputation*, int64>*
       memory_by_computation_;
 
   // In addition to Alloc and Free, the heap simulator exposes a concept of
@@ -193,7 +193,7 @@ class HeapSimulator {
     const BufferValue* canonical = nullptr;
     int64 refcount = 0;
   };
-  tensorflow::gtl::FlatMap<const BufferValue*, std::shared_ptr<SharedGroup>>
+  absl::flat_hash_map<const BufferValue*, std::shared_ptr<SharedGroup>>
       shared_buffers_;
 
   // Hold some sets for error-checking the sequence of Alloc and Free calls.
@@ -235,7 +235,7 @@ class HeapAlgorithm {
   // analysis, it's not worth making major changes to HeapSimulator now.
   virtual void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {}
 
   // Free de-allocates a previously allocated buffer.
@@ -262,7 +262,7 @@ class NoFragmentationStatsHeap : public HeapAlgorithm {
 
   void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) override;
 
   void Free(const BufferValue* buffer, int64 size) override;
@@ -382,8 +382,7 @@ class GlobalDecreasingSizeBestFitHeap : public HeapAlgorithm {
     // Free time of the buffer.
     int64 end;
   };
-  tensorflow::gtl::FlatMap<const BufferValue*, BufferInterval>
-      buffer_intervals_;
+  absl::flat_hash_map<const BufferValue*, BufferInterval> buffer_intervals_;
 };
 
 // A heap algorithm that chooses the best results from other algorithms added to
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index 191fbf8194..ea0bced923 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace {
@@ -174,7 +174,7 @@ class HeapSimulatorTracker {
 
     // Construct the module sequence grouped by computation.
     HloSchedule schedule(module_.get());
-    tensorflow::gtl::FlatMap<const HloInstruction*, int> reverse_position;
+    absl::flat_hash_map<const HloInstruction*, int> reverse_position;
     for (int i = 0; i < full_module_sequence.size(); ++i) {
       const HloInstruction* instruction = full_module_sequence[i];
       schedule.GetOrCreateSequence(instruction->parent())
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index 0986da65cb..b6e1f52cf5 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -290,13 +291,11 @@ class BufferValueMap {
   const HloDataflowAnalysis& dataflow_;
 
   // A map containing the set of values contained in each buffer.
-  tensorflow::gtl::FlatMap<BufferNumber,
-                           tensorflow::gtl::FlatSet<const HloValue*>>
+  absl::flat_hash_map<BufferNumber, tensorflow::gtl::FlatSet<const HloValue*>>
       buffers_;
 
   // A map indicating which buffer each value is contained in.
-  tensorflow::gtl::FlatMap<const HloValue*, BufferNumber>
-      value_to_buffer_number_;
+  absl::flat_hash_map<const HloValue*, BufferNumber> value_to_buffer_number_;
 
   // The buffer number of the next buffer to be created.
   BufferNumber next_buffer_number_ = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.h b/tensorflow/compiler/xla/service/hlo_alias_analysis.h
index e345804537..372f99ff01 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_buffer.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
@@ -110,7 +111,7 @@ class HloAliasAnalysis {
   std::unique_ptr<HloDataflowAnalysis> dataflow_analysis_;
 
   // A map indicating which buffer a value is contained in.
-  tensorflow::gtl::FlatMap<const HloValue*, HloBuffer*> value_to_buffer_;
+  absl::flat_hash_map<const HloValue*, HloBuffer*> value_to_buffer_;
 
   // A lazily constructed vector containing all HloBuffers sorted by
   // HloBuffer::Id.
diff --git a/tensorflow/compiler/xla/service/hlo_clone_context.h b/tensorflow/compiler/xla/service/hlo_clone_context.h
index 658643b427..24910ca07b 100644
--- a/tensorflow/compiler/xla/service/hlo_clone_context.h
+++ b/tensorflow/compiler/xla/service/hlo_clone_context.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/map_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -73,12 +73,12 @@ class HloCloneContext {
     return FindOrDie(computations_, old_computation);
   }
 
-  const tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>&
+  const absl::flat_hash_map<const HloInstruction*, HloInstruction*>&
   cloned_instructions() const {
     return instructions_;
   }
 
-  const tensorflow::gtl::FlatMap<const HloComputation*, HloComputation*>&
+  const absl::flat_hash_map<const HloComputation*, HloComputation*>&
   cloned_computations() const {
     return computations_;
   }
@@ -86,10 +86,8 @@ class HloCloneContext {
  private:
   HloModule* module_;
   string suffix_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, HloInstruction*>
-      instructions_;
-  tensorflow::gtl::FlatMap<const HloComputation*, HloComputation*>
-      computations_;
+  absl::flat_hash_map<const HloInstruction*, HloInstruction*> instructions_;
+  absl::flat_hash_map<const HloComputation*, HloComputation*> computations_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 4613d6762e..257dd5876f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <sstream>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -297,7 +298,7 @@ void ComputeComputationPostOrder(
 void HloComputation::ComputeInstructionPostOrder(
     const HloComputation::ChannelDependencyMap& channel_dependency_map,
     std::vector<HloInstruction*>* post_order, HloInstruction* root,
-    tensorflow::gtl::FlatMap<HloInstruction*, VisitState>* visited) const {
+    absl::flat_hash_map<HloInstruction*, VisitState>* visited) const {
   std::vector<HloInstruction*> dfs_stack;
   dfs_stack.push_back(root);
   while (!dfs_stack.empty()) {
@@ -394,7 +395,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
   std::vector<HloInstruction*> post_order;
   post_order.reserve(instruction_count());
   std::vector<HloInstruction*> trace_instructions;
-  tensorflow::gtl::FlatMap<HloInstruction*, VisitState> visited;
+  absl::flat_hash_map<HloInstruction*, VisitState> visited;
   for (auto& instruction : instructions_) {
     if (instruction->opcode() == HloOpcode::kTrace) {
       // Trace instructions aren't handled by the DFS visitor. Add trace
@@ -505,9 +506,9 @@ HloComputationProto HloComputation::ToProto() const {
 /* static */ StatusOr<std::unique_ptr<HloComputation>>
 HloComputation::CreateFromProto(
     const HloComputationProto& proto,
-    const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map) {
-  tensorflow::gtl::FlatMap<int64, HloInstruction*> instruction_map;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> to_proto_id;
+    const absl::flat_hash_map<int64, HloComputation*>& computation_map) {
+  absl::flat_hash_map<int64, HloInstruction*> instruction_map;
+  absl::flat_hash_map<HloInstruction*, int64> to_proto_id;
   std::vector<std::unique_ptr<HloInstruction>> instructions;
   int64 parameter_count = 0;
   for (const HloInstructionProto& instruction_proto : proto.instructions()) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 936a53bd7e..af929ac009 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/iterator_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -188,7 +188,7 @@ class HloComputation {
   //     calls.
   static StatusOr<std::unique_ptr<HloComputation>> CreateFromProto(
       const HloComputationProto& proto,
-      const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map);
+      const absl::flat_hash_map<int64, HloComputation*>& computation_map);
 
   // Gets the instructions in this computation.
   //
@@ -414,14 +414,14 @@ class HloComputation {
   // cross-replica-sum the union of the dependencies for all participating
   // instructions.
   using ChannelDependencyMap =
-      tensorflow::gtl::FlatMap<int64, absl::InlinedVector<HloInstruction*, 1>>;
+      absl::flat_hash_map<int64, absl::InlinedVector<HloInstruction*, 1>>;
   ChannelDependencyMap ComputeChannelDependencies() const;
 
   enum VisitState { kVisiting, kVisited };
   void ComputeInstructionPostOrder(
       const HloComputation::ChannelDependencyMap& channel_dependency_map,
       std::vector<HloInstruction*>* post_order, HloInstruction* root,
-      tensorflow::gtl::FlatMap<HloInstruction*, VisitState>* visited) const;
+      absl::flat_hash_map<HloInstruction*, VisitState>* visited) const;
 
   string name_;
   int64 unique_id_;
@@ -439,7 +439,7 @@ class HloComputation {
   // instruction pointer to location in the list for fast lookup.
   using InstructionList = std::list<std::unique_ptr<HloInstruction>>;
   InstructionList instructions_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, InstructionList::iterator>
+  absl::flat_hash_map<const HloInstruction*, InstructionList::iterator>
       instruction_iterators_;
 
   std::vector<HloInstruction*> param_instructions_;
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 113fd18eae..159c39d557 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -106,8 +107,8 @@ Status HloDomainMap::PopulateDomainMetadataMap() {
   auto equal = [](const DomainMetadata* a, const DomainMetadata* b) {
     return a->Matches(*b);
   };
-  tensorflow::gtl::FlatMap<const DomainMetadata*, int64, decltype(hash),
-                           decltype(equal)>
+  absl::flat_hash_map<const DomainMetadata*, int64, decltype(hash),
+                      decltype(equal)>
       domain_metadata(1024, hash, equal);
 
   for (auto& domain : instruction_domains_) {
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index 56b557d7ce..8584bc021d 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -19,13 +19,13 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -77,8 +77,7 @@ class HloDomainMap {
  private:
   // Map used for representing instruction ordering, i.e.
   // order_map[a] < order_map[b] means a must be ordered before b.
-  using InstructionOrderMap =
-      tensorflow::gtl::FlatMap<const HloInstruction*, int64>;
+  using InstructionOrderMap = absl::flat_hash_map<const HloInstruction*, int64>;
 
   HloDomainMap(string domain_kind) : domain_kind_(std::move(domain_kind)) {}
 
@@ -120,8 +119,8 @@ class HloDomainMap {
 
   string domain_kind_;
   std::vector<std::unique_ptr<DomainMetadata::Domain>> instruction_domains_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> instruction_to_domain_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int64> domain_metadata_id_;
+  absl::flat_hash_map<HloInstruction*, int64> instruction_to_domain_;
+  absl::flat_hash_map<HloInstruction*, int64> domain_metadata_id_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 23787dbc8a..5d5c9c7e58 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/ascii.h"
@@ -43,7 +44,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/human_readable_json.h"
@@ -59,8 +59,8 @@ using absl::StrJoin;
 /* static */
 StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     const HloInstructionProto& proto,
-    const tensorflow::gtl::FlatMap<int64, HloInstruction*>& instruction_map,
-    const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map) {
+    const absl::flat_hash_map<int64, HloInstruction*>& instruction_map,
+    const absl::flat_hash_map<int64, HloComputation*>& computation_map) {
   TF_RET_CHECK(!proto.opcode().empty());
   TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode()));
   TF_RET_CHECK(proto.has_shape());
@@ -266,7 +266,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           << "Expect 1 called computation for fusion instruction but sees "
           << proto.called_computation_ids_size();
       const int64 fusion_id = proto.called_computation_ids(0);
-      auto* fused_computation = FindPtrOrNull(computation_map, fusion_id);
+      auto* fused_computation =
+          tensorflow::gtl::FindPtrOrNull(computation_map, fusion_id);
       TF_RET_CHECK(fused_computation != nullptr)
           << "No fusion computation with id " << fusion_id;
       instruction = CreateFusion(proto.shape(), fusion_kind, all_operands(),
@@ -2661,14 +2662,14 @@ class HloInstruction::FusionReusesParamElements {
   // the value of this parameter, which would save stack space but not allow us
   // to finish early if we find a reuse.
   static UseKind Compute(int64 i, const HloInstruction& hlo) {
-    tensorflow::gtl::FlatMap<const HloInstruction*, UseKind> memoization_cache;
+    absl::flat_hash_map<const HloInstruction*, UseKind> memoization_cache;
     return ComputeInternal(i, hlo, &memoization_cache);
   }
 
  private:
   static UseKind ComputeInternal(
       int64 i, const HloInstruction& hlo,
-      tensorflow::gtl::FlatMap<const HloInstruction*, UseKind>* cache) {
+      absl::flat_hash_map<const HloInstruction*, UseKind>* cache) {
     if (auto hlo_param = DynCast<HloParameterInstruction>(&hlo)) {
       if (hlo_param->parameter_number() == i) {
         return UseKind::kUse;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 009bd3bab3..1bfdc88abc 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -32,6 +32,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -50,7 +51,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/iterator_range.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -247,7 +247,7 @@ class CanonicalNameMap {
 
  private:
   int64 index;
-  tensorflow::gtl::FlatMap<string, string> canonical_name_map;
+  absl::flat_hash_map<string, string> canonical_name_map;
 };
 
 // HLO instructions are the atomic unit of the high-level compiler's IR.
@@ -350,8 +350,8 @@ class HloInstruction {
   //     calls.
   static StatusOr<std::unique_ptr<HloInstruction>> CreateFromProto(
       const HloInstructionProto& proto,
-      const tensorflow::gtl::FlatMap<int64, HloInstruction*>& instruction_map,
-      const tensorflow::gtl::FlatMap<int64, HloComputation*>& computation_map);
+      const absl::flat_hash_map<int64, HloInstruction*>& instruction_map,
+      const absl::flat_hash_map<int64, HloComputation*>& computation_map);
 
   // Creates a parameter-retrieving instruction.
   static std::unique_ptr<HloInstruction> CreateParameter(int64 parameter_number,
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index ad45a82941..1bc168c8b7 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <deque>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/window_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 namespace {
@@ -1099,7 +1099,7 @@ void HloFusionInstruction::MergeFusionInstructionIntoMultiOutput(
   // Note that we add the unfused instructions to this->parent_ computation.
   // This is necessary because the unique_id needs for an instruction and
   // it's only added when inserting to the computation.
-  tensorflow::gtl::FlatMap<HloInstruction*, HloInstruction*> old_to_new;
+  absl::flat_hash_map<HloInstruction*, HloInstruction*> old_to_new;
   std::vector<HloInstruction*> unfused_instructions;
   auto computation_to_merge =
       instruction_to_merge->fused_instructions_computation();
@@ -1392,7 +1392,7 @@ std::unique_ptr<HloInstruction> HloFusionInstruction::CloneWithNewOperandsImpl(
 }
 
 Status HloFusionInstruction::DeduplicateFusionOperands() {
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> operand_indices;
+  absl::flat_hash_map<const HloInstruction*, int> operand_indices;
   std::vector<int> operands_to_remove;
   for (int i = 0; i < operand_count(); ++i) {
     auto emplace_result = operand_indices.emplace(operand(i), i);
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 6a4e766788..1c2b2868fd 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
@@ -74,7 +75,7 @@ class ListScheduler {
       const HloComputation& computation,
       const TuplePointsToAnalysis& points_to_analysis,
       const LogicalBuffer::SizeFunction& size_function,
-      const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+      const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {
     ListScheduler scheduler(computation, points_to_analysis, size_function,
                             memory_by_computation);
@@ -99,7 +100,7 @@ class ListScheduler {
   ListScheduler(const HloComputation& computation,
                 const TuplePointsToAnalysis& points_to_analysis,
                 const LogicalBuffer::SizeFunction& size_function,
-                const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+                const absl::flat_hash_map<const HloComputation*, int64>&
                     memory_by_computation)
       : computation_(computation),
         points_to_analysis_(points_to_analysis),
@@ -234,8 +235,7 @@ class ListScheduler {
 
     // Populate the ready list with instructions which have no operands or
     // control predecessors.
-    tensorflow::gtl::FlatMap<const HloInstruction*, int64>
-        unscheduled_pred_count;
+    absl::flat_hash_map<const HloInstruction*, int64> unscheduled_pred_count;
     for (auto* instruction : computation_.instructions()) {
       // TODO(b/34466113): Replace this and above with successors() or
       // predecessors() when these methods are added to HloInstruction.
@@ -251,8 +251,8 @@ class ListScheduler {
     std::multimap<Priority, ReadyListEntry> ready_queue;
 
     // Map of ready instructions to their iterators in ready_queue.
-    tensorflow::gtl::FlatMap<const HloInstruction*,
-                             std::multimap<Priority, ReadyListEntry>::iterator>
+    absl::flat_hash_map<const HloInstruction*,
+                        std::multimap<Priority, ReadyListEntry>::iterator>
         ready_instructions;
 
     auto add_to_ready_queue = [&](HloInstruction* inst) {
@@ -347,12 +347,11 @@ class ListScheduler {
   // Computations are analyzed in post-order. When scheduling an instruction
   // that includes subcomputations, such as a while loop, we use this map to
   // look up the memory needed by subcomputations.
-  const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+  const absl::flat_hash_map<const HloComputation*, int64>&
       memory_by_computation_;
 
   // A map containing the LogicalBuffers that each instruction uses.
-  tensorflow::gtl::FlatMap<const HloInstruction*,
-                           std::vector<const LogicalBuffer*>>
+  absl::flat_hash_map<const HloInstruction*, std::vector<const LogicalBuffer*>>
       buffer_uses_;
 
   // A map containing the count of unscheduled HLOs which using a particular
@@ -379,7 +378,7 @@ StatusOr<HloInstructionSequence> ScheduleComputationHelper(
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
     const MemorySchedulerAlgorithm& algorithm,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   VLOG(2) << "Computation: " << computation.name();
   if (algorithm) {
@@ -396,13 +395,13 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // These variables are a hack to prevent overflows.
   int64 cumulative_total_size = 0;
   int64 total_hlos = computation.parent()->instruction_count();
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> extra_users;
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> total_sizes;
+  absl::flat_hash_map<const HloInstruction*, int64> extra_users;
+  absl::flat_hash_map<const HloInstruction*, int64> total_sizes;
   for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) {
     if (ListScheduler::IgnoreInstruction(*hlo)) {
       extra_users[hlo] = 0;
@@ -467,7 +466,7 @@ StatusOr<HloInstructionSequence> ListMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   return ListScheduler::Run(computation, points_to_analysis, size_function,
                             memory_by_computation);
@@ -477,7 +476,7 @@ StatusOr<HloInstructionSequence> PostOrderMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   return HloInstructionSequence(computation.MakeInstructionPostOrder());
 }
@@ -486,7 +485,7 @@ StatusOr<HloInstructionSequence> DefaultMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We try a few schedulers and choose whichever returns a lower min-memory,
   // not accounting for fragmentation.
@@ -549,7 +548,7 @@ StatusOr<HloSchedule> ScheduleModule(
   HloSchedule schedule(&module);
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(&module));
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   for (const auto* computation : module.MakeComputationPostOrder()) {
     if (!computation->IsFusionComputation()) {
       TF_ASSIGN_OR_RETURN(HloInstructionSequence computation_sequence,
@@ -577,7 +576,7 @@ StatusOr<HloInstructionSequence> ScheduleComputation(
   CHECK(!computation.IsFusionComputation());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<TuplePointsToAnalysis> points_to_analysis,
                       TuplePointsToAnalysis::Run(computation.parent()));
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> empty_map;
+  absl::flat_hash_map<const HloComputation*, int64> empty_map;
   return ScheduleComputationHelper(computation, *points_to_analysis,
                                    size_function, nullptr, empty_map);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
index 9964c6fdd7..a4c1d3db81 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -37,7 +38,7 @@ namespace xla {
 typedef std::function<StatusOr<HloInstructionSequence>(
     const HloComputation&, const TuplePointsToAnalysis&,
     const LogicalBuffer::SizeFunction&,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&)>
+    const absl::flat_hash_map<const HloComputation*, int64>&)>
     MemorySchedulerAlgorithm;
 
 // List scheduler
@@ -45,7 +46,7 @@ StatusOr<HloInstructionSequence> ListMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // DFS-order scheduler
@@ -53,7 +54,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // Naive Post Order scheduler
@@ -61,7 +62,7 @@ StatusOr<HloInstructionSequence> PostOrderMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // The default scheduling algorithm. Runs both the list scheduler
@@ -71,7 +72,7 @@ StatusOr<HloInstructionSequence> DefaultMemoryScheduler(
     const HloComputation& computation,
     const TuplePointsToAnalysis& points_to_analysis,
     const LogicalBuffer::SizeFunction& size_function,
-    const tensorflow::gtl::FlatMap<const HloComputation*, int64>&
+    const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation);
 
 // Returns an HloSchedule which seeks to minimize the memory required for
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
index 1b9e9bfc77..5a9fccc7dd 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
@@ -247,7 +248,7 @@ TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) {
   EXPECT_TRUE(ordering.ExecutesBefore(bcast, add));
   EXPECT_TRUE(ordering.ExecutesBefore(transpose, add));
 
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   memory_by_computation[cond_computation] = 17;
   memory_by_computation[body_computation] = 16;
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
@@ -409,7 +410,7 @@ TEST_F(HloSchedulingTest, HeapSimulatorAccountsForSubcomputations) {
   EXPECT_EQ(module->entry_computation()->instruction_count(),
             schedule.sequence(module->entry_computation()).size());
 
-  tensorflow::gtl::FlatMap<const HloComputation*, int64> memory_by_computation;
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
   memory_by_computation[cond_computation] = 17;
   memory_by_computation[body_computation] = 16;
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index b3949f3a6d..9359e9a8be 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -285,8 +286,8 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
       << ShapeUtil::HumanStringWithLayout(expected_program_shape.result())
       << ", actual: " << ShapeUtil::HumanStringWithLayout(result_shape);
 
-  tensorflow::gtl::FlatMap<int64, HloComputation*> computation_map;
-  tensorflow::gtl::FlatMap<HloComputation*, int64> to_proto_id;
+  absl::flat_hash_map<int64, HloComputation*> computation_map;
+  absl::flat_hash_map<HloComputation*, int64> to_proto_id;
   std::vector<std::unique_ptr<HloComputation>> computations;
   HloComputation* entry = nullptr;
   for (const HloComputationProto& computation_proto : proto.computations()) {
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 278d94cdd3..0311b73207 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -250,25 +250,25 @@ class HloModuleGroupMetadata {
   std::vector<std::unique_ptr<std::vector<HloInstruction*>>> companion_sets_;
 
   // Map from each companion while instruction to the index into companion_set_.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int64> companion_set_index_;
+  absl::flat_hash_map<const HloInstruction*, int64> companion_set_index_;
 
   // Map from computation to the instruction using it (a kWhile, kConditional).
-  tensorflow::gtl::FlatMap<const HloComputation*, TrackedInstruction>
+  absl::flat_hash_map<const HloComputation*, TrackedInstruction>
       tracked_instructions_;
 
   // Maps tracked instructions (kWhile, kConditional, kCall, ...) to the set of
   // communicating instructions within the proper called computation(s).
-  tensorflow::gtl::FlatMap<HloInstruction*, std::vector<HloInstruction*>>
+  absl::flat_hash_map<HloInstruction*, std::vector<HloInstruction*>>
       tracked_instructions_comms_;
 
   // All channels in the module.
   std::vector<Channel> channels_;
 
   // Map from channel ids to the index in channels_.
-  tensorflow::gtl::FlatMap<int64, int64> channel_id_map_;
+  absl::flat_hash_map<int64, int64> channel_id_map_;
 
   // Map from all-reduce ids to the all reduce instructions.
-  tensorflow::gtl::FlatMap<int64, std::vector<HloInstruction*>> all_reduce_map_;
+  absl::flat_hash_map<int64, std::vector<HloInstruction*>> all_reduce_map_;
 
   // The maximum channel id used in the module group.
   int64 max_channel_id_ = -1;
@@ -276,7 +276,7 @@ class HloModuleGroupMetadata {
   // The modules that this metadata was built from.
   const std::vector<HloModule*>& modules_;
 
-  tensorflow::gtl::FlatMap<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
+  absl::flat_hash_map<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
       points_to_analyses_;
 };
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h
index 309c23045d..f21b44bcd9 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -87,7 +87,7 @@ class HloModuleGroupUtil {
   // * visit_state: map from each instruction to its visit state.
   // * visit_function: function called when each instruction group.
   // * root: the root instruction of the traversal.
-  using VisitStates = tensorflow::gtl::FlatMap<HloInstruction*, VisitState>;
+  using VisitStates = absl::flat_hash_map<HloInstruction*, VisitState>;
   Status VisitTopologicalOrder(VisitStates* visit_state,
                                const VisitFunction& visit_function,
                                HloInstruction* root);
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.cc b/tensorflow/compiler/xla/service/hlo_opcode.cc
index 2d4e38589f..4551a1c2e2 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.cc
+++ b/tensorflow/compiler/xla/service/hlo_opcode.cc
@@ -14,9 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -31,7 +31,7 @@ string HloOpcodeString(HloOpcode opcode) {
 }
 
 StatusOr<HloOpcode> StringToHloOpcode(const string& opcode_name) {
-  static auto* opcode_map = new tensorflow::gtl::FlatMap<string, HloOpcode>({
+  static auto* opcode_map = new absl::flat_hash_map<string, HloOpcode>({
 #define STRING_TO_OPCODE_ENTRY(enum_name, opcode_name, ...) \
   {opcode_name, HloOpcode::enum_name},
       HLO_OPCODE_LIST(STRING_TO_OPCODE_ENTRY)
diff --git a/tensorflow/compiler/xla/service/hlo_ordering.h b/tensorflow/compiler/xla/service/hlo_ordering.h
index b0361c3f02..66313492eb 100644
--- a/tensorflow/compiler/xla/service/hlo_ordering.h
+++ b/tensorflow/compiler/xla/service/hlo_ordering.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/hlo_value.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -120,8 +120,8 @@ class PredecessorHloOrdering : public HloOrdering {
   // predecessors. An instruction is an element of its own predecessor set.
   //
   // Subclasses should fill this in to define the desired ordering.
-  tensorflow::gtl::FlatMap<const HloComputation*,
-                           std::unique_ptr<HloReachabilityMap>>
+  absl::flat_hash_map<const HloComputation*,
+                      std::unique_ptr<HloReachabilityMap>>
       predecessors_;
 };
 
@@ -204,7 +204,7 @@ class SequentialHloOrdering : public HloOrdering {
   // this map so more than one instruction may have the same position
   // value. This is not a problem because ExecutesBefore also verifies
   // instructions are in the same computation.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> order_position_;
+  absl::flat_hash_map<const HloInstruction*, int> order_position_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 8c2f928ca1..59fd01cb58 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <functional>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -98,7 +99,7 @@ void HloPassPipeline::MaybeDumpHlo(const HloModule& module,
   if (!proto_dump_path.empty()) {
     static tensorflow::mutex mu(tensorflow::LINKER_INITIALIZED);
     static auto* const module_id_to_pass_number =
-        new tensorflow::gtl::FlatMap<int64, int64>();
+        new absl::flat_hash_map<int64, int64>();
 
     tensorflow::mutex_lock lock(mu);
     const int64 pass_number = (*module_id_to_pass_number)[module.unique_id()]++;
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h
index b66a2aa4bd..5a5f01f8fd 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.h
+++ b/tensorflow/compiler/xla/service/hlo_reachability.h
@@ -19,11 +19,11 @@ limitations under the License.
 #include <list>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -154,7 +154,7 @@ class HloReachabilityMap {
 
   // Dense assignment from HloInstruction* to number. These numbers index
   // into the bit_vectors_ vector and into the bits within a BitVector.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int> indices_;
+  absl::flat_hash_map<const HloInstruction*, int> indices_;
 
   // Bitvectors holding the reachability to each instruction. The bit vector for
   // instruction X includes ones for each instruction which X is reachable from.
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index a438671936..abdd9a9212 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <set>
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -75,7 +76,7 @@ bool IsRematerializable(const HloInstruction* instruction) {
 // cache before, and eventually calling the IsRematerializable() API.
 bool CanBeRematerialized(
     const HloInstruction* instruction,
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>* remat_able) {
+    absl::flat_hash_map<const HloInstruction*, bool>* remat_able) {
   auto it = remat_able->find(instruction);
   if (it != remat_able->end()) {
     return it->second;
@@ -268,7 +269,7 @@ class InstructionList {
   Item* first_;
 
   // Item for each instruction.
-  tensorflow::gtl::FlatMap<const HloInstruction*, Item*> item_map_;
+  absl::flat_hash_map<const HloInstruction*, Item*> item_map_;
 };
 
 // Return the items which use the given LogicalBuffer. Sets
@@ -503,7 +504,7 @@ MemoryUsageTracker::MemoryUsageTracker(
   PointsToSet::BufferSet live_out_set =
       points_to_analysis.GetPointsToSet(computation_->root_instruction())
           .CreateFlattenedSet();
-  tensorflow::gtl::FlatMap<const LogicalBuffer*, BufferId>
+  absl::flat_hash_map<const LogicalBuffer*, BufferId>
       logical_buffer_to_buffer_id;
 
   for (auto* item = instruction_list_.first(); item != nullptr;
@@ -854,7 +855,7 @@ int64 RematerializationCost(const HloInstruction* instruction,
 Item* PickRematerializationCandidate(
     const MemoryUsageTracker& memory_tracker,
     const InstructionList& instruction_list, int64 memory_limit_bytes,
-    tensorflow::gtl::FlatMap<const HloInstruction*, bool>* remat_able) {
+    absl::flat_hash_map<const HloInstruction*, bool>* remat_able) {
   Item* best_item = nullptr;
   int64 best_cost = 0;
 
@@ -983,7 +984,7 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   tensorflow::gtl::FlatSet<const HloInstruction*> remat_move_instructions;
 
   // The map from instructions to their rematerializable status.
-  tensorflow::gtl::FlatMap<const HloInstruction*, bool> remat_able;
+  absl::flat_hash_map<const HloInstruction*, bool> remat_able;
 
   // The peak memory of the computation at any point in the instruction
   // sequence.
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 7330d73c09..5a02e3a8bb 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -15,6 +15,7 @@
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -115,8 +116,7 @@ class HloRematerialization : public HloModulePass {
   // computations called from sequential context
   // (CallContext::kSequential). These values are updated as rematerialization
   // occurs.
-  tensorflow::gtl::FlatMap<const HloComputation*, int64>
-      computation_peak_memory_;
+  absl::flat_hash_map<const HloComputation*, int64> computation_peak_memory_;
 
   std::unique_ptr<TuplePointsToAnalysis> points_to_analysis_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 3fc5dbeb02..7c5c98f04e 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -30,7 +31,7 @@ namespace xla {
 
 /* static */ StatusOr<HloSchedule> HloSchedule::CreateFromProto(
     const HloModule* module, const HloScheduleProto& proto) {
-  tensorflow::gtl::FlatMap<int64, const HloComputation*> id_to_computation;
+  absl::flat_hash_map<int64, const HloComputation*> id_to_computation;
   for (const HloComputation* computation : module->computations()) {
     id_to_computation[computation->unique_id()] = computation;
   }
@@ -44,7 +45,7 @@ namespace xla {
         << "No computation exists in HLO module with id " << computation_id;
     const HloComputation* computation = comp_it->second;
 
-    tensorflow::gtl::FlatMap<int64, const HloInstruction*> id_to_instruction;
+    absl::flat_hash_map<int64, const HloInstruction*> id_to_instruction;
     for (const HloInstruction* instruction : computation->instructions()) {
       id_to_instruction[instruction->unique_id()] = instruction;
     }
@@ -112,7 +113,7 @@ Status HloSchedule::UpdateComputationSchedule(
     const HloComputation* computation) {
   // Map from unique ID to HloInstruction pointer for instructions in the
   // computation.
-  tensorflow::gtl::FlatMap<int, const HloInstruction*> id_to_instruction;
+  absl::flat_hash_map<int, const HloInstruction*> id_to_instruction;
   for (const HloInstruction* instruction : computation->instructions()) {
     InsertOrDie(&id_to_instruction, instruction->unique_id(), instruction);
   }
@@ -126,15 +127,13 @@ Status HloSchedule::UpdateComputationSchedule(
   // Map from HloInstruction X to newly added instructions (instruction is in
   // computation, but not in schedule) which use X. If an instruction is not in
   // the map, then it has no users which are newly added instructions.
-  tensorflow::gtl::FlatMap<const HloInstruction*,
-                           std::vector<const HloInstruction*>>
+  absl::flat_hash_map<const HloInstruction*, std::vector<const HloInstruction*>>
       new_instruction_uses;
 
   // For each newly added instruction, this is the count of the instruction's
   // operands that have not yet been scheduled. When this value reaches zero,
   // then the instruction may be placed in the schedule.
-  tensorflow::gtl::FlatMap<const HloInstruction*, int>
-      unscheduled_operand_count;
+  absl::flat_hash_map<const HloInstruction*, int> unscheduled_operand_count;
 
   // Create a worklist of newly added instructions which are ready to be added
   // to the schedule. Initialize worklist with those that have zero operands.
@@ -217,9 +216,9 @@ Status HloSchedule::Update() {
     }
     for (auto it = sequences_.begin(); it != sequences_.end();) {
       if (nonfusion_computations_ids.count(it->first) == 0) {
-        it = sequences_.erase(it);
+        sequences_.erase(it++);
       } else {
-        it++;
+        ++it;
       }
     }
   }
@@ -254,7 +253,7 @@ Status HloSchedule::Verify() const {
   // For each computation verify the set of instructions is the same and that
   // each dependency and control edge is honored.
   for (const HloComputation* computation : nonfusion_computations) {
-    tensorflow::gtl::FlatMap<const HloInstruction*, int> instruction_position;
+    absl::flat_hash_map<const HloInstruction*, int> instruction_position;
     int pos = 0;
     for (const HloInstruction* instruction :
          sequence(computation).instructions()) {
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.h b/tensorflow/compiler/xla/service/hlo_schedule.h
index 270fe6039f..0a714101ee 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.h
+++ b/tensorflow/compiler/xla/service/hlo_schedule.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -103,8 +104,7 @@ class HloSchedule {
 
   // Returns a map from HloComputation unique ID to instruction sequence. The
   // map contains all sequences in the schedule.
-  const tensorflow::gtl::FlatMap<int64, HloInstructionSequence>& sequences()
-      const {
+  const absl::flat_hash_map<int64, HloInstructionSequence>& sequences() const {
     return sequences_;
   }
 
@@ -148,7 +148,7 @@ class HloSchedule {
   // A map from computation unique ID to instruction sequence. Unique IDs are
   // used rather than HloComputation pointers because HLO pointers are not
   // unique across HLO transformations because pointers may be recycled.
-  tensorflow::gtl::FlatMap<int64, HloInstructionSequence> sequences_;
+  absl::flat_hash_map<int64, HloInstructionSequence> sequences_;
 };
 
 std::ostream& operator<<(std::ostream& out, const HloSchedule& schedule);
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 6eb6658904..a7727824fe 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <set>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
@@ -23,7 +24,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -993,7 +993,7 @@ Status CheckSameIsHostTransfer(const HloInstruction* instr1,
 
 // Checks various invariants of send and recv instructions.
 Status VerifySendsAndRecvs(const HloModule& module) {
-  tensorflow::gtl::FlatMap<int64, const HloInstruction*> host_channels;
+  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
   // Host send/recv instructions must have their own unique channel.
   auto check_unique_host_channel = [&](const HloInstruction* instruction) {
     const HloSendRecvInstruction* sendrecv =
@@ -1061,7 +1061,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-  tensorflow::gtl::FlatMap<string, const HloInstruction*> instructions;
+  absl::flat_hash_map<string, const HloInstruction*> instructions;
 
   for (auto* computation : module->computations()) {
     for (const auto& instruction : computation->instructions()) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 06f0e1ed25..7ee789276d 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/indexed_array_analysis.h"
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -95,7 +96,7 @@ Status IndexedArrayAnalysis::TraverseAndPopulateCache(
   absl::InlinedVector<const HloInstruction*, 4> stack;
 
   enum DfsState { kDiscovered, kVisited };
-  gtl::FlatMap<const HloInstruction*, DfsState> dfs_state_map;
+  absl::flat_hash_map<const HloInstruction*, DfsState> dfs_state_map;
 
   stack.push_back(root);
   InsertOrDie(&dfs_state_map, root, kDiscovered);
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h
index 3e238f97a0..e5aa67fd85 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.h
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <type_traits>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace xla {
@@ -360,7 +360,7 @@ class IndexedArrayAnalysis {
 
   std::vector<std::unique_ptr<Array>> owned_tensors_;
   std::vector<Literal> owned_literals_;
-  tensorflow::gtl::FlatMap<const HloInstruction*, Array*> cache_;
+  absl::flat_hash_map<const HloInstruction*, Array*> cache_;
 };
 
 // A pass that prints all non-trivial results returned by IndexedArrayAnalysis.
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index e884122fcb..5a99c40df4 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -22,11 +22,11 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -189,7 +189,7 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) {
 bool InstructionFusion::CanFuseOnAllPaths(
     HloInstruction* producer, HloInstruction* consumer,
     const HloInstructionSet& do_not_fuse,
-    tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>*
+    absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>*
         result_cache) {
   if (consumer == producer) {
     return true;
@@ -241,7 +241,7 @@ InstructionFusion::ComputeGloballyUnfusible(
   // fusing operations that require duplication later depending on
   // is_expensive_().
   HloInstructionSet do_not_duplicate;
-  tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>, bool>
+  absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>
       can_fuse_on_all_paths_result_cache;
   for (HloInstruction* consumer : post_order) {
     for (HloInstruction* producer : consumer->operands()) {
@@ -430,7 +430,7 @@ class ReversePostOrderFusionQueue : public FusionQueue {
 
  private:
   std::vector<HloInstruction*> post_order_;
-  tensorflow::gtl::FlatMap<HloInstruction*, int> post_order_index_;
+  absl::flat_hash_map<HloInstruction*, int> post_order_index_;
 };
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index c1ec3b18a1..da2032f6c7 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -1,3 +1,4 @@
+#include "absl/container/flat_hash_map.h"
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -158,8 +159,8 @@ class InstructionFusion : public HloModulePass {
   bool CanFuseOnAllPaths(
       HloInstruction* producer, HloInstruction* consumer,
       const HloInstructionSet& do_not_fuse,
-      tensorflow::gtl::FlatMap<std::pair<HloInstruction*, HloInstruction*>,
-                               bool>* result_cache);
+      absl::flat_hash_map<std::pair<HloInstruction*, HloInstruction*>, bool>*
+          result_cache);
 
   // Computes the set of nodes that we do not want to fuse into any of their
   // consumers based on a global analysis of the HLO graph.
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index e29c199c42..1591256fad 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -38,7 +39,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -228,8 +228,8 @@ class LayoutConstraints {
   // Array-shaped buffers which have not yet been constrained.
   std::set<LogicalBuffer::Id> unconstrained_buffer_ids_;
 
-  mutable tensorflow::gtl::FlatMap<const HloInstruction*,
-                                   std::unique_ptr<PointsToSet::BufferSet>>
+  mutable absl::flat_hash_map<const HloInstruction*,
+                              std::unique_ptr<PointsToSet::BufferSet>>
       buffer_sets_cache_;
 
   HloComputation* computation_;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 540bbb7c7a..3934d2e493 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -38,6 +38,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:logical_buffer",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@llvm//:core",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
index 8d9fa99d82..88cde2d3d9 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
@@ -16,13 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_ALIAS_ANALYSIS_H_
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "llvm/IR/Module.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
@@ -77,14 +77,14 @@ class AliasAnalysis {
   // A map from a buffer slice to metadata corresponding to its alias.scope
   // metadata.  The index kParameterAliasSet is used to hold aliasing
   // information for parameters.
-  tensorflow::gtl::FlatMap<BufferAllocation::Slice, llvm::MDNode*,
-                           BufferAllocation::Slice::Hasher>
+  absl::flat_hash_map<BufferAllocation::Slice, llvm::MDNode*,
+                      BufferAllocation::Slice::Hasher>
       alias_scope_metadata_;
 
   // A map from a buffer slice to metadata corresponding to its noalias
   // metadata.
-  tensorflow::gtl::FlatMap<BufferAllocation::Slice, llvm::MDNode*,
-                           BufferAllocation::Slice::Hasher>
+  absl::flat_hash_map<BufferAllocation::Slice, llvm::MDNode*,
+                      BufferAllocation::Slice::Hasher>
       noalias_metadata_;
 };
 
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index b9ec31c497..95b1c20663 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index 0344626b26..9508ab2ed1 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
@@ -126,7 +127,7 @@ class MultiOutputFusion : public HloModulePass {
   std::vector<FusionCandidate> candidates_;
 
   // A map that maps an instruction to the index_.
-  tensorflow::gtl::FlatMap<HloInstruction*, int> candidates_index_;
+  absl::flat_hash_map<HloInstruction*, int> candidates_index_;
 
   // The reachability map of current computation.
   std::unique_ptr<HloReachabilityMap> reachability_;
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index 6dd89c240f..1ac60f1cf4 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -78,7 +78,7 @@ class NameUniquer {
 
   // Map from name prefix to the generator data structure which tracks used
   // identifiers and generates new ones.
-  tensorflow::gtl::FlatMap<string, SequentialIdGenerator> generated_names_;
+  absl::flat_hash_map<string, SequentialIdGenerator> generated_names_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(NameUniquer);
 };
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
index 4bb22428f3..0b4e82e8d6 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index a9e8a51e09..78392d3bb2 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
index 56145822be..067cfcc17d 100644
--- a/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
+++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index e8fe33e626..2590473c77 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -15,17 +15,17 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/tuple_util.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
+using absl::flat_hash_map;
 using absl::InlinedVector;
-using tensorflow::gtl::FlatMap;
 using tensorflow::gtl::FlatSet;
 
 // Copies `to_hoist` to the computation containing `while_instr`, hoisting its
@@ -34,7 +34,7 @@ using tensorflow::gtl::FlatSet;
 // function hoists the operands in `unhoisted_invariant_instructions` and moves
 // them into `hoisted_instructions`.
 static void CreateLoopInvariantCopy(
-    FlatMap<HloInstruction*, HloInstruction*>* hoisted_instructions,
+    flat_hash_map<HloInstruction*, HloInstruction*>* hoisted_instructions,
     FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
     HloInstruction* while_instr, HloInstruction* to_hoist) {
   HloComputation* parent_of_while = while_instr->parent();
@@ -147,7 +147,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
 
   // Maps instructions in the while body to instructions hoisted outside the
   // while that compute the same value.
-  FlatMap<HloInstruction*, HloInstruction*> hoisted_instructions;
+  flat_hash_map<HloInstruction*, HloInstruction*> hoisted_instructions;
 
   // Contains instructions that can be legally hoisted, but were deemed to be
   // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 9a74f22395..07de8492ba 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -14,12 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/while_loop_analysis.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 
 namespace xla {
 
@@ -181,7 +181,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
                                           used_tuple_indices.end());
   std::sort(new_to_old_tuple_idx.begin(), new_to_old_tuple_idx.end());
 
-  tensorflow::gtl::FlatMap<int64, int64> old_to_new_tuple_idx;
+  absl::flat_hash_map<int64, int64> old_to_new_tuple_idx;
   for (int64 new_idx = 0; new_idx < new_to_old_tuple_idx.size(); ++new_idx) {
     int64 old_idx = new_to_old_tuple_idx[new_idx];
     old_to_new_tuple_idx[old_idx] = new_idx;
@@ -405,7 +405,7 @@ static StatusOr<bool> TryPropagateConstant(HloInstruction* while_op) {
   // build a map from the tuple element index to the constant value. Limit this
   // to scalar constant values because propagating array constants can regress
   // performance by forcing us to copy constants.
-  tensorflow::gtl::FlatMap<int, const HloInstruction*> index_to_constant;
+  absl::flat_hash_map<int, const HloInstruction*> index_to_constant;
   for (int i = 0; i < root_operands.size(); i++) {
     HloInstruction* instr = root_operands[i];
     if (instr->opcode() == HloOpcode::kGetTupleElement &&
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index f474ecb18c..06b6330321 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -422,6 +422,7 @@ xla_test(
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core:test",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
index db5a824de0..a6e70eb6ca 100644
--- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
+++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/regexp.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -83,7 +83,7 @@ struct ParsedProfileOutputLine {
 
 Status ParseOneProfileOutputLine(
     const string& line, bool expect_hlo,
-    gtl::FlatMap<string, ParsedProfileOutputLine>* parsed_results,
+    absl::flat_hash_map<string, ParsedProfileOutputLine>* parsed_results,
     absl::Span<const absl::string_view> opcodes_to_ignore = {}) {
   string separator = "[^:]*:: +";
   string match_percentage = R"(\d+\.\d*% +\d+Σ)";
@@ -208,7 +208,7 @@ XLA_TEST_F(HloProfileTest, ProfileSingleComputation) {
   std::vector<string> profile_output_lines =
       absl::StrSplit(profile_output, '\n');
 
-  gtl::FlatMap<string, ParsedProfileOutputLine> parsed_profile_lines;
+  absl::flat_hash_map<string, ParsedProfileOutputLine> parsed_profile_lines;
 
   TF_ASSERT_OK(ParseOneProfileOutputLine(
       profile_output_lines[1], /*expect_hlo=*/false, &parsed_profile_lines));
@@ -314,7 +314,7 @@ XLA_TEST_F(HloProfileTest, ProfileWhileComputation) {
 
   ASSERT_NE(while_body_profile_end, profile_output_lines.end());
 
-  gtl::FlatMap<string, ParsedProfileOutputLine> parsed_profile_lines;
+  absl::flat_hash_map<string, ParsedProfileOutputLine> parsed_profile_lines;
 
   for (auto while_body_profile_i = while_body_profile_start + 1;
        while_body_profile_i != while_body_profile_end; while_body_profile_i++) {
-- 
GitLab


From ec900f15e352e4b203b1f0678f7d2ff042df57d5 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 13:46:31 -0700
Subject: [PATCH 0170/1085] Minor speed improvements to defun.

- EncodeArg in C instead of python.
- Also caches parsed device specs, and device spec hashes
- Adds a common way to register python types in C.
- Fastpath canonicalize function inputs when no kwargs are passed
- Set the func name attr directly instead of creating an op to wrap it.
- Rewrite IsAttrsHelper without caching

Before:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 101.803263028
  extras {
    key: "examples_per_sec"
    value {
      double_value: 9822.86785562
    }
  }
}

After:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 47.2899993261
  extras {
    key: "examples_per_sec"
    value {
      double_value: 21146.1199884
    }
  }
}
PiperOrigin-RevId: 215272962
---
 tensorflow/c/eager/c_api.cc                  |   8 +
 tensorflow/c/eager/c_api.h                   |   3 +
 tensorflow/python/eager/BUILD                |   1 +
 tensorflow/python/eager/function.py          | 100 +++------
 tensorflow/python/eager/function_test.py     |  26 ++-
 tensorflow/python/eager/pywrap_tfe.h         |   4 +
 tensorflow/python/eager/pywrap_tfe_src.cc    | 223 ++++++++++++++++++-
 tensorflow/python/framework/device.py        |  12 +-
 tensorflow/python/framework/sparse_tensor.py |   2 +-
 tensorflow/python/pywrap_tfe.i               |   1 +
 tensorflow/python/util/nest.py               |   4 +-
 tensorflow/python/util/util.cc               | 223 +++++++++++--------
 tensorflow/python/util/util.h                |  34 ++-
 tensorflow/python/util/util.i                |  10 +-
 14 files changed, 462 insertions(+), 189 deletions(-)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 0bf3d9542b..3554ec0bf3 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -578,6 +578,14 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
   op->operation.MutableAttrs()->Set(attr_name, attr_value);
 }
 
+void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+                               const char* data, size_t length) {
+  tensorflow::AttrValue attr_value;
+  tensorflow::NameAttrList* func = attr_value.mutable_func();
+  func->set_name(data, length);
+  op->operation.MutableAttrs()->Set(attr_name, attr_value);
+}
+
 void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
                          TF_Status* status) {
   tensorflow::Tensor t;
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 6323f8a053..b2454d8722 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -313,6 +313,9 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op,
                                                  const char* attr_name,
                                                  const TFE_Op* value);
 
+TF_CAPI_EXPORT void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+                                              const char* data, size_t length);
+
 TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op,
                                                const char* attr_name,
                                                TF_Tensor* tensor,
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index d3d997e6df..d0c1a93118 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -37,6 +37,7 @@ cc_library(
         "//tensorflow/python:safe_ptr",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:variant",
     ],
 )
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 60a4f018cd..3b6f288fb9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1005,52 +1005,8 @@ def func_graph_from_py_func(name,
   return func_graph
 
 
-_TensorType = collections.namedtuple("_TensorType", ["dtype", "shape"])
-
-
-def _encode_arg(arg):
-  """A canonical representation for this argument, for use in a cache key."""
-
-  # `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes
-  # are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes
-  # are used for both performance reasons, as much TensorFlow code specializes
-  # on known shapes to produce slimmer graphs, and correctness, as some
-  # high-level APIs require shapes to be fully-known.
-  #
-  # TODO(akshayka): Add support for sparse tensors.
-  #
-  # pylint: disable=protected-access
-  if isinstance(arg, ops.Tensor):
-    return _TensorType(arg.dtype, arg._shape_tuple())
-  elif isinstance(arg, ops.IndexedSlices):
-    if arg.dense_shape is not None:
-      return tuple([
-          _TensorType(arg.values.dtype, arg.values._shape_tuple()),
-          _TensorType(arg.indices.dtype, arg.indices._shape_tuple()),
-          _TensorType(arg.dense_shape.dtype, arg.dense_shape._shape_tuple()),
-      ])
-    else:
-      return tuple([
-          _TensorType(arg.values.dtype, arg.values._shape_tuple()),
-          _TensorType(arg.indices.dtype, arg.indices._shape_tuple()),
-      ])
-  # pylint: enable=protected-access
-  elif isinstance(arg, (list, tuple)):
-    return tuple([_encode_arg(elem) for elem in arg])
-  elif isinstance(arg, dict):
-    return tuple(
-        (_encode_arg(key), _encode_arg(arg[key])) for key in sorted(arg))
-  else:
-    try:
-      # If possible, keep only a weak reference to Python objects. Weak
-      # references hash to the same value as the original object.
-      # TODO(allenl): Clean up dead functions and their cache keys if the cache
-      # gets large. Right now creating objects with a defunned method, calling
-      # the method, and losing a reference to the object in a loop will leak
-      # memory here.
-      return weakref.ref(arg)
-    except TypeError:
-      return arg
+pywrap_tensorflow.RegisterType("Tensor", ops.Tensor)
+pywrap_tensorflow.RegisterType("IndexedSlices", ops.IndexedSlices)
 
 
 def _deterministic_dict_values(dictionary):
@@ -1120,6 +1076,8 @@ class PolymorphicFunction(object):
         offset + index: default
         for index, default in enumerate(fullargspec.defaults or [])
     }
+    self._default_values = fullargspec.defaults
+    self._default_values_start_index = offset
     if input_signature is None:
       self._input_signature = None
     else:
@@ -1180,7 +1138,7 @@ class PolymorphicFunction(object):
     """Computes the cache key given inputs and execution context."""
     if self._input_signature is None:
       inputs = (args, kwargs) if kwargs else args
-      cache_key = tuple(_encode_arg(arg) for arg in inputs)
+      cache_key = pywrap_tensorflow.TFE_Py_EncodeArg(inputs)
     else:
       del args, kwargs
       cache_key = self._flat_input_signature
@@ -1203,7 +1161,7 @@ class PolymorphicFunction(object):
     colocation_stack = (() if executing_eagerly else
                         tuple(default_graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
 
-    return cache_key + (execution_context, device_functions, colocation_stack)
+    return (cache_key, execution_context, device_functions, colocation_stack)
 
   def _canonicalize_function_inputs(self, *args, **kwargs):
     """Canonicalizes `args` and `kwargs`.
@@ -1231,26 +1189,32 @@ class PolymorphicFunction(object):
     # Maps from index of arg to its corresponding value, according to `args`
     # and `kwargs`; seeded with the default values for the named args that
     # aren't in `args`.
-    arg_indices_to_values = {
-        index: default
-        for index, default in six.iteritems(self._arg_indices_to_default_values)
-        if index >= len(args)
-    }
-    consumed_args = []
-    for arg, value in six.iteritems(kwargs):
-      index = self._args_to_indices.get(arg, None)
-      if index is not None:
-        arg_indices_to_values[index] = value
-        consumed_args.append(arg)
-      elif self._input_signature is not None:
-        raise ValueError("Cannot define a TensorFlow function from a Python "
-                         "function with keyword arguments when "
-                         "input_signature is provided.")
-    for arg in consumed_args:
-      # After this loop, `kwargs` will only contain true keyword arguments, as
-      # opposed to named arguments called in a keyword-like fashion.
-      kwargs.pop(arg)
-    inputs = args + _deterministic_dict_values(arg_indices_to_values)
+    if not kwargs:
+      if self._default_values:
+        inputs = args + self._default_values[len(args) -
+                                             self._default_values_start_index:]
+      else:
+        inputs = args
+    else:
+      arg_indices_to_values = {
+          index: default for index, default in six.iteritems(
+              self._arg_indices_to_default_values) if index >= len(args)
+      }
+      consumed_args = []
+      for arg, value in six.iteritems(kwargs):
+        index = self._args_to_indices.get(arg, None)
+        if index is not None:
+          arg_indices_to_values[index] = value
+          consumed_args.append(arg)
+        elif self._input_signature is not None:
+          raise ValueError("Cannot define a TensorFlow function from a Python "
+                           "function with keyword arguments when "
+                           "input_signature is provided.")
+      for arg in consumed_args:
+        # After this loop, `kwargs` will only contain true keyword arguments, as
+        # opposed to named arguments called in a keyword-like fashion.
+        kwargs.pop(arg)
+      inputs = args + _deterministic_dict_values(arg_indices_to_values)
     flat_inputs = nest.flatten(inputs)
 
     # Check for NumPy arrays in arguments and convert them to Tensors.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index afe3ba9893..9ce367a837 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1237,6 +1237,24 @@ class FunctionTest(test.TestCase):
     x = constant_op.constant([1.0, 2.0])
     self.assertAllEqual([2., 4.], self.evaluate(defined(x)))
 
+  def testCacheObjectHashCollisions(self):
+
+    class Foo(object):
+
+      def __hash__(self):
+        return 42
+
+    def func(foo):
+      del foo
+      return
+
+    defined = function.defun(func)
+    defined(Foo())
+    self.assertEqual(len(defined._function_cache), 1)
+
+    defined(Foo())
+    self.assertEqual(len(defined._function_cache), 2)
+
   def testPythonFunctionWithDefaultArgs(self):
 
     def func(foo, bar=1, baz=2):
@@ -1250,20 +1268,20 @@ class FunctionTest(test.TestCase):
 
     def cache_keys():
       """Sanitizes cache keys of non-input metadata."""
-      return tuple(key[:3] for key in defined._function_cache)
+      return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn((0, 1, 20), cache_keys())
+    self.assertIn(('tRRR', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn((1, 1, 2), cache_keys())
+    self.assertIn(('tRRR', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
     self.assertEqual(len(defined._function_cache), 2)
 
     defined(1, 2, 3)
-    self.assertIn((1, 2, 3), cache_keys())
+    self.assertIn(('tRRR', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h
index f1b4042ec9..decd635b58 100755
--- a/tensorflow/python/eager/pywrap_tfe.h
+++ b/tensorflow/python/eager/pywrap_tfe.h
@@ -224,4 +224,8 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensors, int slice_dim);
 // The shape is represented as a Python tuple of integers.
 PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor);
 
+// Encodes the object as a tuple that is meant to be used as part of the key
+// for the defun function cache.
+PyObject* TFE_Py_EncodeArg(PyObject*);
+
 #endif  // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 196e20e4d7..4b9f7f4100 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
+#include "absl/strings/str_cat.h"
 #include "absl/types/variant.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
@@ -567,11 +568,8 @@ bool SetOpAttrScalar(
         return false;
       }
     }
-    TFE_Op* func = TFE_NewOp(
-        ctx, string(func_name.data(), func_name.size()).c_str(), status);
-    if (TF_GetCode(status) != TF_OK) return false;
-    TFE_OpSetAttrFunction(op, key, func);
-    TFE_DeleteOp(func);
+    TF_SetStatus(status, TF_OK, "");
+    TFE_OpSetAttrFunctionName(op, key, func_name.data(), func_name.size());
   } else {
     TF_SetStatus(
         status, TF_UNIMPLEMENTED,
@@ -2748,3 +2746,218 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs,
 
   return RecordGradient(op_name, inputs, attrs, results, name);
 }
+
+namespace {
+
+tensorflow::int64 GetPyNoneHash() {
+  tensorflow::int64 py_none_hash = PyObject_Hash(Py_None);
+  return py_none_hash;
+}
+
+struct EncodeResult {
+  string str;
+  std::vector<PyObject*> objects;
+
+  PyObject* ToPyTuple() {
+    PyObject* result = PyTuple_New(2);
+
+    PyTuple_SET_ITEM(result, 0, GetPythonObjectFromString(str.c_str()));
+
+    if (objects.empty()) {
+      Py_INCREF(Py_None);
+      PyTuple_SET_ITEM(result, 1, Py_None);
+    } else {
+      PyObject* objects_tuple = PyTuple_New(objects.size());
+
+      for (int i = 0; i < objects.size(); i++) {
+        PyTuple_SET_ITEM(objects_tuple, i, objects[i]);
+      }
+
+      PyTuple_SET_ITEM(result, 1, objects_tuple);
+    }
+
+    return result;
+  }
+};
+
+tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
+  if (EagerTensor_CheckExact(arg)) {
+    TFE_TensorHandle* t = EagerTensor_Handle(arg);
+    tensorflow::TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape));
+    absl::StrAppend(&result->str, t->handle->dtype);
+
+    for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
+      absl::StrAppend(&result->str, dim_size);
+    }
+
+    return tensorflow::Status::OK();
+  }
+
+  tensorflow::Safe_PyObjectPtr dtype_object(
+      PyObject_GetAttrString(arg, "dtype"));
+
+  if (dtype_object == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor object doesn't have dtype() attr.");
+  }
+
+  tensorflow::Safe_PyObjectPtr dtype_enum(
+      PyObject_GetAttrString(dtype_object.get(), "_type_enum"));
+
+  if (dtype_enum == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor's dtype object doesn't have _type_enum() attr.");
+  }
+
+  tensorflow::DataType dtype =
+      static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get()));
+
+  absl::StrAppend(&result->str, dtype);
+  static char _shape_tuple[] = "_shape_tuple";
+  tensorflow::Safe_PyObjectPtr shape_tuple(
+      PyObject_CallMethod(arg, _shape_tuple, nullptr));
+
+  if (shape_tuple == nullptr) {
+    return tensorflow::errors::InvalidArgument(
+        "ops.Tensor object doesn't have _shape_tuple() method.");
+  }
+
+  if (shape_tuple.get() == Py_None) {
+    // Unknown shape, encode that directly.
+    absl::StrAppend(&result->str, GetPyNoneHash());
+    return tensorflow::Status::OK();
+  }
+
+  tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast(
+      shape_tuple.get(), "shape_tuple didn't return a sequence"));
+
+  int len = PySequence_Fast_GET_SIZE(shape_seq.get());
+  for (int i = 0; i < len; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i);
+    if (item == Py_None) {
+      absl::StrAppend(&result->str, GetPyNoneHash());
+    } else {
+      absl::StrAppend(&result->str, MakeInt(item));
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+const char kTensor[] = "T";
+const char kIndexedSlices[] = "I";
+const char kList[] = "L";
+const char kTuple[] = "t";
+const char kDict[] = "D";
+const char kRaw[] = "R";
+
+tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result);
+
+// This function doesn't set the type of sequence before
+tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
+                                         EncodeResult* result) {
+  tensorflow::Safe_PyObjectPtr arg_seq(
+      PySequence_Fast(arg, "unable to create seq from list/tuple"));
+
+  absl::StrAppend(&result->str, type);
+  int len = PySequence_Fast_GET_SIZE(arg_seq.get());
+  for (int i = 0; i < len; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i);
+    if (item == Py_None) {
+      absl::StrAppend(&result->str, GetPyNoneHash());
+    } else {
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
+    }
+  }
+
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result) {
+  if (tensorflow::swig::IsTensor(arg)) {
+    absl::StrAppend(&result->str, kTensor);
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(arg, result));
+  } else if (tensorflow::swig::IsIndexedSlices(arg)) {
+    absl::StrAppend(&result->str, kIndexedSlices);
+    tensorflow::Safe_PyObjectPtr values(PyObject_GetAttrString(arg, "values"));
+    if (values == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a values attr");
+    }
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(values.get(), result));
+
+    tensorflow::Safe_PyObjectPtr indices(
+        PyObject_GetAttrString(arg, "indices"));
+    if (indices == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a indices attr");
+    }
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(indices.get(), result));
+
+    tensorflow::Safe_PyObjectPtr dense_shape(
+        PyObject_GetAttrString(arg, "dense_shape"));
+    if (dense_shape == nullptr) {
+      PyErr_Clear();
+      return tensorflow::errors::InvalidArgument(
+          "IndexedSlices does not have a dense_shape attr");
+    }
+    if (dense_shape.get() != Py_None) {
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeTensor(dense_shape.get(), result));
+    }
+  } else if (PyList_Check(arg)) {
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, result));
+  } else if (PyTuple_Check(arg)) {
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, result));
+  } else if (PyDict_Check(arg)) {
+    tensorflow::Safe_PyObjectPtr keys(PyDict_Keys(arg));
+    if (PyList_Sort(keys.get()) == -1) {
+      return tensorflow::errors::Internal("Unable to sort keys");
+    }
+
+    absl::StrAppend(&result->str, kDict);
+    int len = PyList_Size(keys.get());
+
+    for (int i = 0; i < len; i++) {
+      PyObject* key = PyList_GetItem(keys.get(), i);
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(key, result));
+      PyObject* value = PyDict_GetItem(arg, key);
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(value, result));
+    }
+  } else {
+    PyObject* object = PyWeakref_NewRef(arg, nullptr);
+
+    if (object == nullptr) {
+      PyErr_Clear();
+
+      object = arg;
+      Py_INCREF(object);
+    }
+
+    absl::StrAppend(&result->str, kRaw);
+    result->objects.push_back(object);
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace
+
+// `defun` uses dtypes and shapes instead of `Tensors` as cache keys. Dtypes
+// are used because TensorFlow graphs are not parametric w.r.t. dtypes. Shapes
+// are used for both performance reasons, as much TensorFlow code specializes
+// on known shapes to produce slimmer graphs, and correctness, as some
+// high-level APIs require shapes to be fully-known.
+//
+// TODO(nareshmodi): Add support for sparse tensors.
+PyObject* TFE_Py_EncodeArg(PyObject* arg) {
+  EncodeResult result;
+  const auto status = TFE_Py_EncodeArgHelper(arg, &result);
+  if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
+    return nullptr;
+  }
+
+  return result.ToPyTuple();
+}
diff --git a/tensorflow/python/framework/device.py b/tensorflow/python/framework/device.py
index 06c653097a..7f6e0a75a5 100644
--- a/tensorflow/python/framework/device.py
+++ b/tensorflow/python/framework/device.py
@@ -87,6 +87,7 @@ class DeviceSpec(object):
     else:
       self.device_type = device_type
     self.device_index = device_index
+    self._hash = hash(self.to_string())
 
   def _clear(self):
     self._job = None
@@ -234,7 +235,7 @@ class DeviceSpec(object):
     return self.to_string() == other.to_string()
 
   def __hash__(self):
-    return hash(self.to_string())
+    return self._hash
 
 
 def check_valid(spec):
@@ -266,6 +267,7 @@ def canonical_name(device):
 # possible to compare the device function stacks belonging to different
 # graphs in a meaningful way.
 _cached_device_functions = {}
+_cached_device_specs = {}
 _cache_lock = threading.Lock()
 
 
@@ -297,7 +299,13 @@ def merge_device(spec):
   """
   with _cache_lock:
     if not isinstance(spec, DeviceSpec):
-      spec = DeviceSpec.from_string(spec or "")
+      cached_device_spec = _cached_device_specs.get(spec, None)
+      if cached_device_spec is None:
+        device_spec = DeviceSpec.from_string(spec or "")
+        _cached_device_specs[spec] = device_spec
+        spec = device_spec
+      else:
+        spec = cached_device_spec
     cached_function = _cached_device_functions.get(spec, None)
     if cached_function is not None:
       return cached_function
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index 41ef2e11d1..440e3a0968 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -245,7 +245,7 @@ class SparseTensor(_TensorLike):
 SparseTensorValue = collections.namedtuple(
     "SparseTensorValue", ["indices", "values", "dense_shape"])
 tf_export("SparseTensorValue")(SparseTensorValue)
-pywrap_tensorflow.RegisterSparseTensorValueClass(SparseTensorValue)
+pywrap_tensorflow.RegisterType("SparseTensorValue", SparseTensorValue)
 
 
 @tf_export("convert_to_tensor_or_sparse_tensor")
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index c411a58b70..61e0abbfcb 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -67,6 +67,7 @@ limitations under the License.
 %rename("%s") TFE_ContextStartStep;
 %rename("%s") TFE_ContextEndStep;
 %rename("%s") TFE_Py_RegisterVSpace;
+%rename("%s") TFE_Py_EncodeArg;
 
 %{
 #include "tensorflow/python/eager/pywrap_tfe.h"
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index 758cba7487..d67dbde304 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -819,5 +819,5 @@ def flatten_with_joined_string_paths(structure, separator="/"):
   return list(zip(flat_string_paths, flatten(structure)))
 
 
-_pywrap_tensorflow.RegisterSequenceClass(_collections.Sequence)
-_pywrap_tensorflow.RegisterMappingClass(_collections.Mapping)
+_pywrap_tensorflow.RegisterType("Mapping", _collections.Mapping)
+_pywrap_tensorflow.RegisterType("Sequence", _collections.Sequence)
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 38b8491c66..7b3e618e84 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -29,14 +29,51 @@ limitations under the License.
 namespace tensorflow {
 namespace swig {
 
-namespace {
+std::unordered_map<string, PyObject*>* PythonTypesMap() {
+  static auto* m = new std::unordered_map<string, PyObject*>();
+  return m;
+}
+
+PyObject* GetRegisteredType(const string& key) {
+  auto* m = PythonTypesMap();
+  auto it = m->find(key);
+  if (it == m->end()) return nullptr;
+  return it->second;
+}
+
+PyObject* RegisterType(PyObject* type_name, PyObject* type) {
+  if (!PyType_Check(type)) {
+    PyErr_SetString(PyExc_TypeError,
+                    tensorflow::strings::StrCat("Expecting a type, got ",
+                                                Py_TYPE(type)->tp_name)
+                        .c_str());
+    return nullptr;
+  }
 
-// Type object for collections.Sequence. This is set by RegisterSequenceClass.
-PyObject* CollectionsSequenceType = nullptr;
-// Type object for collections.Mapping, set by RegisterMappingClass.
-PyObject* CollectionsMappingType = nullptr;
-PyTypeObject* SparseTensorValueType = nullptr;
+  string key;
+  if (PyBytes_Check(type_name)) {
+    key = PyBytes_AsString(type_name);
+  }
+#if PY_MAJOR_VERSION >= 3
+  if (PyUnicode_Check(type_name)) {
+    key = PyUnicode_AsUTF8(type_name);
+  }
+#endif
 
+  if (PythonTypesMap()->find(key) != PythonTypesMap()->end()) {
+    PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat(
+                                         "Type already registered for ", key)
+                                         .c_str());
+    return nullptr;
+  }
+
+  Py_INCREF(type);
+  PythonTypesMap()->emplace(key, type);
+
+  Py_RETURN_NONE;
+}
+
+namespace {
 const int kMaxItemsInCache = 1024;
 
 bool WarnedThatSetIsNotSequence = false;
@@ -177,46 +214,82 @@ class CachedTypeCheck {
 // Returns -1 if an error occurred.
 int IsMappingHelper(PyObject* o) {
   static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
-    return PyObject_IsInstance(to_check, CollectionsMappingType);
+    PyObject* collections_mapping_type = GetRegisteredType("Mapping");
+    if (TF_PREDICT_FALSE(collections_mapping_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "collections.Mapping type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Mapping\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, collections_mapping_type);
   });
   if (PyDict_Check(o)) return true;
-  if (TF_PREDICT_FALSE(CollectionsMappingType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Mapping type has not been set. "
-            "Please call RegisterMappingClass before using this module")
-            .c_str());
-    return -1;
-  }
   return check_cache->CachedLookup(o);
 }
 
 // Returns 1 if `o` is an instance of attrs-decorated class.
 // Returns 0 otherwise.
 int IsAttrsHelper(PyObject* o) {
-  Safe_PyObjectPtr cls(PyObject_GetAttrString(o, "__class__"));
-  if (cls) {
-    return PyObject_HasAttrString(cls.get(), "__attrs_attrs__");
-  } else {
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    Safe_PyObjectPtr cls(PyObject_GetAttrString(to_check, "__class__"));
+    if (cls) {
+      return PyObject_HasAttrString(cls.get(), "__attrs_attrs__");
+    }
+
     // PyObject_GetAttrString returns null on error
     PyErr_Clear();
     return 0;
-  }
+  });
+  return check_cache->CachedLookup(o);
 }
 
-// Returns 1 if `o` is considered a sequence for the purposes of Flatten().
+// Returns 1 if `o` is an object of type IndexedSlices.
 // Returns 0 otherwise.
 // Returns -1 if an error occurred.
-int IsSequenceHelper(PyObject* o) {
+int IsIndexedSlicesHelper(PyObject* o) {
   static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
-    int is_instance = PyObject_IsInstance(to_check, CollectionsSequenceType);
-
-    // Don't cache a failed is_instance check.
-    if (is_instance == -1) return -1;
+    PyObject* indexed_slices_type = GetRegisteredType("IndexedSlices");
+    if (TF_PREDICT_FALSE(indexed_slices_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "IndexedSlices type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"IndexedSlices\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, indexed_slices_type);
+  });
+  return check_cache->CachedLookup(o);
+}
 
-    return static_cast<int>(is_instance != 0 && !IsString(to_check));
+// Returns 1 if `o` is a Tensor.
+// Returns 0 otherwise.
+// Returns -1 if an error occurred.
+int IsTensorHelper(PyObject* o) {
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    PyObject* tensor_type = GetRegisteredType("Tensor");
+    if (TF_PREDICT_FALSE(tensor_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "Tensor type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Tensor\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    return PyObject_IsInstance(to_check, tensor_type);
   });
+  return check_cache->CachedLookup(o);
+}
+
+// Returns 1 if `o` is considered a sequence for the purposes of Flatten().
+// Returns 0 otherwise.
+// Returns -1 if an error occurred.
+int IsSequenceHelper(PyObject* o) {
   // We treat dicts and other mappings as special cases of sequences.
   if (IsMappingHelper(o)) return true;
   if (IsAttrsHelper(o)) return true;
@@ -226,15 +299,24 @@ int IsSequenceHelper(PyObject* o) {
                     "so consider avoiding using them.";
     WarnedThatSetIsNotSequence = true;
   }
-  if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Sequence type has not been set. "
-            "Please call RegisterSequenceClass before using this module")
-            .c_str());
-    return -1;
-  }
+  static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) {
+    PyObject* collections_sequence_type = GetRegisteredType("Sequence");
+    if (TF_PREDICT_FALSE(collections_sequence_type == nullptr)) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      tensorflow::strings::StrCat(
+                          "collections.Sequence type has not been set. "
+                          "Please register the type with the identifier "
+                          "\"Sequence\" using RegisterType.")
+                          .c_str());
+      return -1;
+    }
+    int is_instance = PyObject_IsInstance(to_check, collections_sequence_type);
+
+    // Don't cache a failed is_instance check.
+    if (is_instance == -1) return -1;
+
+    return static_cast<int>(is_instance != 0 && !IsString(to_check));
+  });
   return check_cache->CachedLookup(o);
 }
 
@@ -401,11 +483,13 @@ class AttrsValueIterator : public ValueIterator {
 };
 
 bool IsSparseTensorValueType(PyObject* o) {
-  if (TF_PREDICT_FALSE(SparseTensorValueType == nullptr)) {
+  PyObject* sparse_tensor_value_type = GetRegisteredType("SparseTensorValue");
+  if (TF_PREDICT_FALSE(sparse_tensor_value_type == nullptr)) {
     return false;
   }
 
-  return PyObject_TypeCheck(o, SparseTensorValueType) == 1;
+  return PyObject_TypeCheck(
+             o, reinterpret_cast<PyTypeObject*>(sparse_tensor_value_type)) == 1;
 }
 
 int IsSequenceForDataHelper(PyObject* o) {
@@ -647,49 +731,11 @@ bool AssertSameStructureHelper(
 
 }  // namespace
 
-void RegisterSequenceClass(PyObject* sequence_class) {
-  if (!PyType_Check(sequence_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `collections.Sequence`. Got ",
-            Py_TYPE(sequence_class)->tp_name)
-            .c_str());
-    return;
-  }
-  CollectionsSequenceType = sequence_class;
-}
-
-void RegisterMappingClass(PyObject* mapping_class) {
-  if (!PyType_Check(mapping_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `collections.Mapping`. Got ",
-            Py_TYPE(mapping_class)->tp_name)
-            .c_str());
-    return;
-  }
-  CollectionsMappingType = mapping_class;
-}
-
-void RegisterSparseTensorValueClass(PyObject* sparse_tensor_value_class) {
-  if (!PyType_Check(sparse_tensor_value_class)) {
-    PyErr_SetString(
-        PyExc_TypeError,
-        tensorflow::strings::StrCat(
-            "Expecting a class definition for `SparseTensorValue`. Got ",
-            Py_TYPE(sparse_tensor_value_class)->tp_name)
-            .c_str());
-    return;
-  }
-  SparseTensorValueType =
-      reinterpret_cast<PyTypeObject*>(sparse_tensor_value_class);
-}
-
 bool IsSequence(PyObject* o) { return IsSequenceHelper(o) == 1; }
 bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; }
 bool IsAttrs(PyObject* o) { return IsAttrsHelper(o) == 1; }
+bool IsTensor(PyObject* o) { return IsTensorHelper(o) == 1; }
+bool IsIndexedSlices(PyObject* o) { return IsIndexedSlicesHelper(o) == 1; }
 
 PyObject* Flatten(PyObject* nested) {
   PyObject* list = PyList_New(0);
@@ -737,13 +783,15 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
     }
   }
 
-  if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) {
-    PyErr_SetString(
-        PyExc_RuntimeError,
-        tensorflow::strings::StrCat(
-            "collections.Sequence type has not been set. "
-            "Please call RegisterSequenceClass before using this module")
-            .c_str());
+  PyObject* collections_sequence_type = GetRegisteredType("Sequence");
+
+  if (TF_PREDICT_FALSE(collections_sequence_type == nullptr)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    tensorflow::strings::StrCat(
+                        "collections.Sequence type has not been set. "
+                        "Please register the type with the identifier "
+                        "\"Sequence\" using RegisterType.")
+                        .c_str());
     return nullptr;
   }
 
@@ -755,7 +803,8 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
   }
 
   Safe_PyObjectPtr fields = make_safe(PyObject_GetAttrString(o, "_fields"));
-  int is_instance = PyObject_IsInstance(fields.get(), CollectionsSequenceType);
+  int is_instance =
+      PyObject_IsInstance(fields.get(), collections_sequence_type);
   if (is_instance == 0) {
     Py_RETURN_FALSE;
   } else if (is_instance == -1) {
diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h
index 01f85ea1dc..f37cd527d8 100644
--- a/tensorflow/python/util/util.h
+++ b/tensorflow/python/util/util.h
@@ -65,6 +65,24 @@ bool IsMapping(PyObject* o);
 //   True if the object is an instance of an attr.s decorated class.
 bool IsAttrs(PyObject* o);
 
+// Returns a true if its input is an ops.Tensor.
+//
+// Args:
+//   seq: the input to be checked.
+//
+// Returns:
+//   True if the object is a tensor.
+bool IsTensor(PyObject* o);
+
+// Returns a true if its input is an ops.IndexesSlices.
+//
+// Args:
+//   seq: the input to be checked.
+//
+// Returns:
+//   True if the object is an ops.IndexedSlices.
+bool IsIndexedSlices(PyObject* o);
+
 // Implements the same interface as tensorflow.util.nest._same_namedtuples
 // Returns Py_True iff the two namedtuples have the same name and fields.
 // Raises RuntimeError if `o1` or `o2` don't look like namedtuples (don't have
@@ -130,18 +148,6 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types);
 //   TypeError: The nest is or contains a dict with non-sortable keys.
 PyObject* Flatten(PyObject* nested);
 
-// RegisterSequenceClass is used to pass PyTypeObject for collections.Sequence
-// (which is defined in python) into the C++ world.
-// Alternative approach could be to import the collections modules and retrieve
-// the type from the module. This approach also requires some trigger from
-// Python so that we know that Python interpreter had been initialzied.
-void RegisterSequenceClass(PyObject* sequence_class);
-// Like RegisterSequenceClass, but for collections.Mapping.
-void RegisterMappingClass(PyObject* mapping_class);
-// Similar to the above functions, except for the
-// sparse_tensor.SparseTensorValue class.
-void RegisterSparseTensorValueClass(PyObject* sparse_tensor_value_class);
-
 // The tensorflow.python.data package has its own nest utility that follows very
 // slightly different semantics for its functions than the tensorflow.python
 // nest utility. Returns a true if its input is a collections.Sequence (except
@@ -167,6 +173,10 @@ PyObject* FlattenForData(PyObject* nested);
 PyObject* AssertSameStructureForData(PyObject* o1, PyObject* o2,
                                      bool check_types);
 
+// RegisterType is used to pass PyTypeObject (which is defined in python) for an
+// arbitrary identifier `type_name` into C++.
+PyObject* RegisterType(PyObject* type_name, PyObject* type);
+
 }  // namespace swig
 }  // namespace tensorflow
 
diff --git a/tensorflow/python/util/util.i b/tensorflow/python/util/util.i
index 32a6e684fa..3c0ec87fa4 100644
--- a/tensorflow/python/util/util.i
+++ b/tensorflow/python/util/util.i
@@ -28,14 +28,8 @@ limitations under the License.
 // for functions in this module because they use python methods that need GIL.
 // TODO(iga): Find a way not to leak such definitions across files.
 
-%unignore tensorflow::swig::RegisterSequenceClass;
-%noexception tensorflow::swig::RegisterSequenceClass;
-
-%unignore tensorflow::swig::RegisterMappingClass;
-%noexception tensorflow::swig::RegisterMappingClass;
-
-%unignore tensorflow::swig::RegisterSparseTensorValueClass;
-%noexception tensorflow::swig::RegisterSparseTensorValueClass;
+%unignore tensorflow::swig::RegisterType;
+%noexception tensorflow::swig::RegisterType;
 
 %feature("docstring") tensorflow::swig::IsSequence
 """Returns a true if its input is a collections.Sequence (except strings).
-- 
GitLab


From 3aa8b781b342c36302bd500737ab4ce9b2b87a45 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 14:07:17 -0700
Subject: [PATCH 0171/1085] Disable async remote tests

PiperOrigin-RevId: 215276816
---
 tensorflow/contrib/eager/python/remote_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index ba6fe9701d..7aa4b598b8 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -47,8 +47,9 @@ def run_sync_and_async(f):
 
   @functools.wraps(f)
   def decorator(self, *args, **kwargs):
-    with context.execution_mode(context.ASYNC):
-      f(self, *args, **kwargs)
+    # TODO(b/117110239): Re-enable.
+    # with context.execution_mode(context.ASYNC):
+    #   f(self, *args, **kwargs)
 
     with context.execution_mode(context.SYNC):
       f(self, *args, **kwargs)
-- 
GitLab


From d7edbeb8dcc81a9cabc922ae46f549fe6b498eb9 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 1 Oct 2018 14:09:23 -0700
Subject: [PATCH 0172/1085] Update keras_applications to 1.0.6 and
 keras_preprocessing to 1.0.5. This removes the transitive keras and scipy
 dependencies in TensorFlow.

PiperOrigin-RevId: 215277190
---
 tensorflow/tools/ci_build/Dockerfile.cmake                | 4 ++--
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 8 ++++----
 .../ci_build/install/install_python3.5_pip_packages.sh    | 4 ++--
 .../ci_build/install/install_python3.6_pip_packages.sh    | 4 ++--
 tensorflow/tools/docker/Dockerfile                        | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel                  | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-gpu              | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-mkl              | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel-mkl-horovod      | 4 ++--
 tensorflow/tools/docker/Dockerfile.gpu                    | 4 ++--
 tensorflow/tools/docker/Dockerfile.mkl                    | 4 ++--
 tensorflow/tools/docker/Dockerfile.mkl-horovod            | 4 ++--
 tensorflow/tools/pip_package/setup.py                     | 4 ++--
 13 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake
index b7450c83de..ef0024fdb4 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cmake
+++ b/tensorflow/tools/ci_build/Dockerfile.cmake
@@ -28,8 +28,8 @@ RUN pip install --upgrade astor
 RUN pip install --upgrade gast
 RUN pip install --upgrade numpy
 RUN pip install --upgrade termcolor
-RUN pip install keras_applications==1.0.5
-RUN pip install keras_preprocessing==1.0.3
+RUN pip install --upgrade keras_applications
+RUN pip install --upgrade keras_preprocessing
 
 # Install golang
 RUN apt-get install -t xenial-backports -y golang-1.9
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 4ced96f90b..b90f3f3b97 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -115,10 +115,10 @@ pip2 install --upgrade setuptools==39.1.0
 pip3 install --upgrade setuptools==39.1.0
 
 # Keras
-pip2 install keras_applications==1.0.5 --no-deps
-pip3 install keras_applications==1.0.5 --no-deps
-pip2 install keras_preprocessing==1.0.3 --no-deps
-pip3 install keras_preprocessing==1.0.3 --no-deps
+pip2 install keras_applications==1.0.6 --no-deps
+pip3 install keras_applications==1.0.6 --no-deps
+pip2 install keras_preprocessing==1.0.5 --no-deps
+pip3 install keras_preprocessing==1.0.5 --no-deps
 pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
 
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 37e6b51f66..61d4fe3fe8 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -85,8 +85,8 @@ pip3.5 install --upgrade termcolor
 pip3.5 install --upgrade setuptools==39.1.0
 
 # Keras
-pip3.5 install keras_applications==1.0.5
-pip3.5 install keras_preprocessing==1.0.3
+pip3.5 install keras_applications==1.0.6
+pip3.5 install keras_preprocessing==1.0.5
 pip3.5 install --upgrade h5py==2.8.0
 
 # Install last working version of setuptools.
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 7520ff74cb..8949af8a88 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -102,7 +102,7 @@ pip3 install --upgrade setuptools==39.1.0
 pip3 install --upgrade h5py==2.8.0
 
 # Keras
-pip3 install keras_applications==1.0.5
-pip3 install keras_preprocessing==1.0.3
+pip3 install keras_applications==1.0.6
+pip3 install keras_preprocessing==1.0.5
 
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh)
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index b5a6c05193..205128ad58 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -29,8 +29,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index c741e8ad0c..6f8e91fccf 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -33,8 +33,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index f544725af4..69a117fda6 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -55,8 +55,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index db7c701289..e433e9ebb2 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -52,8 +52,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 987b582d10..48f2400569 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -45,8 +45,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         mock \
         numpy \
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 781bf9e851..7dc92a888b 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -42,8 +42,8 @@ RUN pip --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.mkl b/tensorflow/tools/docker/Dockerfile.mkl
index 641c9e3b16..ac41cffe4b 100755
--- a/tensorflow/tools/docker/Dockerfile.mkl
+++ b/tensorflow/tools/docker/Dockerfile.mkl
@@ -38,8 +38,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/docker/Dockerfile.mkl-horovod b/tensorflow/tools/docker/Dockerfile.mkl-horovod
index 2b11679f54..4daf4fefff 100755
--- a/tensorflow/tools/docker/Dockerfile.mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.mkl-horovod
@@ -38,8 +38,8 @@ RUN ${PIP} --no-cache-dir install \
         h5py \
         ipykernel \
         jupyter \
-        keras_applications==1.0.5 \
-        keras_preprocessing==1.0.3 \
+        keras_applications \
+        keras_preprocessing \
         matplotlib \
         numpy \
         pandas \
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 88c9c20d36..d864a7a039 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -51,8 +51,8 @@ REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
     'astor >= 0.6.0',
     'gast >= 0.2.0',
-    'keras_applications >= 1.0.5',
-    'keras_preprocessing >= 1.0.3',
+    'keras_applications >= 1.0.6',
+    'keras_preprocessing >= 1.0.5',
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
-- 
GitLab


From 094e1953b7df0bbb9bd4d0e3329b3b4611edf984 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 14:14:32 -0700
Subject: [PATCH 0173/1085] Fix benchmark regression.

PiperOrigin-RevId: 215278033
---
 tensorflow/python/ops/conv2d_benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/conv2d_benchmark.py b/tensorflow/python/ops/conv2d_benchmark.py
index 28111c2730..f40488afbe 100644
--- a/tensorflow/python/ops/conv2d_benchmark.py
+++ b/tensorflow/python/ops/conv2d_benchmark.py
@@ -63,9 +63,9 @@ def build_graph(device, dtype, data_format, input_shape, filter_shape, strides,
     An array of tensors to run()
   """
   with ops.device("/%s:0" % device):
-    inp = variables.Variable(
+    inp = variables.VariableV1(
         random_ops.truncated_normal(input_shape, dtype=dtype))
-    filt = variables.Variable(
+    filt = variables.VariableV1(
         random_ops.truncated_normal(filter_shape, dtype=dtype))
 
     outputs = []
-- 
GitLab


From 5e3c2255b7f90146a895cd20267de699fbb15c27 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Mon, 1 Oct 2018 14:38:57 -0700
Subject: [PATCH 0174/1085] internal change

PiperOrigin-RevId: 215282721
---
 tensorflow/docs_src/BUILD                     |   14 -
 tensorflow/docs_src/__init__.py               |    0
 .../performance/xla/operation_semantics.md    | 2426 +++++++++++++++++
 tensorflow/tools/docs/BUILD                   |    1 -
 tensorflow/tools/docs/build_docs_test.py      |    6 +-
 5 files changed, 2430 insertions(+), 17 deletions(-)
 delete mode 100644 tensorflow/docs_src/BUILD
 delete mode 100644 tensorflow/docs_src/__init__.py
 create mode 100644 tensorflow/docs_src/performance/xla/operation_semantics.md

diff --git a/tensorflow/docs_src/BUILD b/tensorflow/docs_src/BUILD
deleted file mode 100644
index 34bf7b6a11..0000000000
--- a/tensorflow/docs_src/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-# Files used to generate TensorFlow docs.
-
-licenses(["notice"])  # Apache 2.0
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-exports_files(["LICENSE"])
-
-filegroup(
-    name = "docs_src",
-    data = glob(["**/*.md"]),
-)
diff --git a/tensorflow/docs_src/__init__.py b/tensorflow/docs_src/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
new file mode 100644
index 0000000000..96d269bec4
--- /dev/null
+++ b/tensorflow/docs_src/performance/xla/operation_semantics.md
@@ -0,0 +1,2426 @@
+# Operation Semantics
+
+The following describes the semantics of operations defined in the
+[`XlaBuilder`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+interface. Typically, these operations map one-to-one to operations defined in
+the RPC interface in
+[`xla_data.proto`](https://www.tensorflow.org/code/tensorflow/compiler/xla/xla_data.proto).
+
+A note on nomenclature: the generalized data type XLA deals with is an
+N-dimensional array holding elements of some uniform type (such as 32-bit
+float). Throughout the documentation, *array* is used to denote an
+arbitrary-dimensional array. For convenience, special cases have more specific
+and familiar names; for example a *vector* is a 1-dimensional array and a
+*matrix* is a 2-dimensional array.
+
+## AllToAll
+
+See also
+[`XlaBuilder::AllToAll`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Alltoall is a collective operation that sends data from all cores to all cores.
+It has two phases:
+
+1.  the scatter phase. On each core, the operand is split into `split_count`
+    number of blocks along the `split_dimensions`, and the blocks are scattered
+    to all cores, e.g., the ith block is send to the ith core.
+2.  the gather phase. Each core concatenates the received blocks along the
+    `concat_dimension`.
+
+The participating cores can be configured by:
+
+-   `replica_groups`: each ReplicaGroup contains a list of replica id. If empty,
+    all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
+    applied within subgroups in the specified order. For example, replica
+    groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
+    1, 2, 3, and in the gather phase, the received blocks will be concatenated
+    in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
+    5, 0, and the concatenation order is 4, 5, 0.
+
+Prerequisites:
+
+-   The dimension size of the operand on the split_dimension is divisible by
+    split_count.
+-   The operand's shape is not tuple.
+
+<b> `AllToAll(operand, split_dimension, concat_dimension, split_count,
+replica_groups)` </b>
+
+
+| Arguments          | Type                  | Semantics                       |
+| ------------------ | --------------------- | ------------------------------- |
+| `operand`          | `XlaOp`               | n dimensional input array       |
+| `split_dimension`  | `int64`               | A value in the interval `[0,    |
+:                    :                       : n)` that names the dimension    :
+:                    :                       : along which the operand is      :
+:                    :                       : split                           :
+| `concat_dimension` | `int64`               | a value in the interval `[0,    |
+:                    :                       : n)` that names the dimension    :
+:                    :                       : along which the split blocks    :
+:                    :                       : are concatenated                :
+| `split_count`      | `int64`               | the number of cores that        |
+:                    :                       : participate this operation. If  :
+:                    :                       : `replica_groups` is empty, this :
+:                    :                       : should be the number of         :
+:                    :                       : replicas; otherwise, this       :
+:                    :                       : should be equal to the number   :
+:                    :                       : of replicas in each group.      :
+| `replica_groups`   | `ReplicaGroup` vector | each group contains a list of   |
+:                    :                       : replica id.                     :
+
+Below shows an example of Alltoall.
+
+```
+XlaBuilder b("alltoall");
+auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
+AllToAll(x, /*split_dimension=*/1, /*concat_dimension=*/0, /*split_count=*/4);
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/xla/ops_alltoall.png">
+</div>
+
+In this example, there are 4 cores participating the Alltoall. On each core, the
+operand is split into 4 parts along dimension 0, so each part has shape
+f32[4,4]. The 4 parts are scattered to all cores. Then each core concatenates
+the received parts along dimension 1, in the order or core 0-4. So the output on
+each core has shape f32[16,4].
+
+## BatchNormGrad
+
+See also
+[`XlaBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Calculates gradients of batch norm.
+
+<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
+
+| Arguments       | Type                    | Semantics                        |
+| --------------- | ----------------------- | -------------------------------- |
+| `operand`       | `XlaOp`                 | n dimensional array to be        |
+:                 :                         : normalized (x)                   :
+| `scale`         | `XlaOp`                 | 1 dimensional array              |
+:                 :                         : (\\(\gamma\\))                   :
+| `mean`          | `XlaOp`                 | 1 dimensional array (\\(\mu\\))  |
+| `variance`      | `XlaOp`                 | 1 dimensional array              |
+:                 :                         : (\\(\sigma^2\\))                 :
+| `grad_output`   | `XlaOp`                 | Gradients passed to              |
+:                 :                         : `BatchNormTraining`              :
+:                 :                         : (\\( \nabla y\\))                :
+| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
+| `feature_index` | `int64`                 | Index to feature dimension in    |
+:                 :                         : `operand`                        :
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the gradients with
+respect to `operand`, `offset` and `scale` across all the other dimensions. The
+`feature_index` must be a valid index for the feature dimension in `operand`.
+
+The three gradients are defined by the following formulas (assuming a
+4-dimensional tensor as `operand` and with feature dimension index \\(l\\),
+batch size `m` and spatial sizes `w` and `h`):
+
+\\[ \begin{split} c_l&=
+\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h
+\left( \nabla y_{ijkl} \frac{x_{ijkl} - \mu_l}{\sigma^2_l+\epsilon} \right)
+\\\\
+\nabla x_{ijkl} &= \frac{\gamma_{l}}{\sqrt{\sigma^2_{l}+\epsilon}}
+\left( \nabla y_{ijkl} - \mathrm{mean}(\nabla y) - c_l (x_{ijkl} - \mu_{l})
+\right)
+\\\\
+\nabla \gamma_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \left( \nabla y_{ijkl}
+\frac{x_{ijkl} - \mu_l}{\sqrt{\sigma^2_{l}+\epsilon}} \right)
+\\\\\
+\nabla \beta_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl}
+\end{split} \\]
+
+The inputs `mean` and `variance` represent moments value
+across batch and spatial dimensions.
+
+The output type is a tuple of three handles:
+
+| Outputs        | Type                    | Semantics                         |
+| -------------  | ----------------------- | --------------------------------- |
+| `grad_operand` | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `operand` (\\( \nabla x\\))       :
+| `grad_scale`   | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `scale` (\\( \nabla \gamma\\))    :
+| `grad_offset`  | `XlaOp`                 | gradient with respect to input    |
+:                :                         : `offset`(\\( \nabla \beta\\))     :
+
+## BatchNormInference
+
+See also
+[`XlaBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | ---------------------------------------
+`operand`       | `XlaOp` | n dimensional array to be normalized
+`scale`         | `XlaOp` | 1 dimensional array
+`offset`        | `XlaOp` | 1 dimensional array
+`mean`          | `XlaOp` | 1 dimensional array
+`variance`      | `XlaOp` | 1 dimensional array
+`epsilon`       | `float` | Epsilon value
+`feature_index` | `int64` | Index to feature dimension in `operand`
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
+computing `mean` and `variance` for each batch. It uses the input `mean` and
+`variance` instead as estimated values. The purpose of this op is to reduce
+latency in inference, hence the name `BatchNormInference`.
+
+The output is an n-dimensional, normalized array with the same shape as input
+`operand`.
+
+## BatchNormTraining
+
+See also
+[`XlaBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and [`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
+for a detailed description of the algorithm.
+
+Normalizes an array across batch and spatial dimensions.
+
+<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | ----------------------------------------
+`operand`       | `XlaOp` | n dimensional array to be normalized (x)
+`scale`         | `XlaOp` | 1 dimensional array (\\(\gamma\\))
+`offset`        | `XlaOp` | 1 dimensional array (\\(\beta\\))
+`epsilon`       | `float` | Epsilon value (\\(\epsilon\\))
+`feature_index` | `int64` | Index to feature dimension in `operand`
+
+For each feature in the feature dimension (`feature_index` is the index for the
+feature dimension in `operand`), the operation calculates the mean and variance
+across all the other dimensions and uses the mean and variance to normalize each
+element in `operand`. The `feature_index` must be a valid index for the feature
+dimension in `operand`.
+
+The algorithm goes as follows for each batch in `operand` \\(x\\) that
+contains `m` elements with `w` and `h` as the size of spatial dimensions
+(assuming `operand` is an 4 dimensional array):
+
+- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
+\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
+
+- Calculates batch variance \\(\sigma^2_l\\):
+\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
+
+- Normalizes, scales and shifts:
+\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
+
+The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
+
+The output type is a tuple of three `XlaOp`s:
+
+| Outputs      | Type                    | Semantics                            |
+| ------------ | ----------------------- | -------------------------------------|
+| `output`     | `XlaOp`                 | n dimensional array with the same    |
+:              :                         : shape as input `operand` (y)         :
+| `batch_mean` | `XlaOp`                 | 1 dimensional array (\\(\mu\\))      |
+| `batch_var`  | `XlaOp`                 | 1 dimensional array (\\(\sigma^2\\)) |
+
+The `batch_mean` and `batch_var` are moments calculated across the batch and
+spatial dimensions using the formulas above.
+
+## BitcastConvertType
+
+See also
+[`XlaBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast
+operation from a data shape to a target shape. The dimensions must match, and
+the conversion is an element-wise one; e.g. `s32` elements become `f32` elements
+via bitcast routine. Bitcast is implemented as a low-level cast, so machines
+with different floating-point representations will give different results.
+
+<b> `BitcastConvertType(operand, new_element_type)` </b>
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`operand`          | `XlaOp`         | array of type T with dims D
+`new_element_type` | `PrimitiveType` | type U
+
+The dimensions of the operand and the target shape must match. The bit-width of
+the source and destination element types must be equal. The source
+and destination element types must not be tuples.
+
+## Broadcast
+
+See also
+[`XlaBuilder::Broadcast`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Adds dimensions to an array by duplicating the data in the array.
+
+<b> `Broadcast(operand, broadcast_sizes)` </b>
+
+Arguments         | Type                | Semantics
+----------------- | ------------------- | -------------------------------
+`operand`         | `XlaOp`             | The array to duplicate
+`broadcast_sizes` | `ArraySlice<int64>` | The sizes of the new dimensions
+
+The new dimensions are inserted on the left, i.e. if `broadcast_sizes` has
+values `{a0, ..., aN}` and the operand shape has dimensions `{b0, ..., bM}` then
+the shape of the output has dimensions `{a0, ..., aN, b0, ..., bM}`.
+
+The new dimensions index into copies of the operand, i.e.
+
+```
+output[i0, ..., iN, j0, ..., jM] = operand[j0, ..., jM]
+```
+
+For example, if `operand` is a scalar `f32` with value `2.0f`, and
+`broadcast_sizes` is `{2, 3}`, then the result will be an array with shape
+`f32[2, 3]` and all the values in the result will be `2.0f`.
+
+## Call
+
+See also
+[`XlaBuilder::Call`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Invokes a computation with the given arguments.
+
+<b> `Call(computation, args...)` </b>
+
+| Arguments     | Type                   | Semantics                           |
+| ------------- | ---------------------- | ----------------------------------- |
+| `computation` | `XlaComputation`       | computation of type `T_0, T_1, ..., |
+:               :                        : T_N -> S` with N parameters of      :
+:               :                        : arbitrary type                      :
+| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type       |
+
+The arity and types of the `args` must match the parameters of the
+`computation`. It is allowed to have no `args`.
+
+## Clamp
+
+See also
+[`XlaBuilder::Clamp`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Clamps an operand to within the range between a minimum and maximum value.
+
+<b> `Clamp(min, operand, max)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------
+`min`     | `XlaOp` | array of type T
+`operand` | `XlaOp` | array of type T
+`max`     | `XlaOp` | array of type T
+
+Given an operand and minimum and maximum values, returns the operand if it is in
+the range between the minimum and maximum, else returns the minimum value if the
+operand is below this range or the maximum value if the operand is above this
+range.  That is, `clamp(a, x, b) =  min(max(a, x), b)`.
+
+All three arrays must be the same shape. Alternatively, as a restricted form of
+[broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`.
+
+Example with scalar `min` and `max`:
+
+```
+let operand: s32[3] = {-1, 5, 9};
+let min: s32 = 0;
+let max: s32 = 6;
+==>
+Clamp(min, operand, max) = s32[3]{0, 5, 6};
+```
+
+## Collapse
+
+See also
+[`XlaBuilder::Collapse`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and the `tf.reshape` operation.
+
+Collapses dimensions of an array into one dimension.
+
+<b> `Collapse(operand, dimensions)` </b>
+
+Arguments    | Type           | Semantics
+------------ | -------------- | -----------------------------------------------
+`operand`    | `XlaOp`        | array of type T
+`dimensions` | `int64` vector | in-order, consecutive subset of T's dimensions.
+
+Collapse replaces the given subset of the operand's dimensions by a single
+dimension. The input arguments are an arbitrary array of type T and a
+compile-time-constant vector of dimension indices. The dimension indices must be
+an in-order (low to high dimension numbers), consecutive subset of T's
+dimensions. Thus, {0, 1, 2}, {0, 1}, or {1, 2} are all valid dimension sets, but
+{1, 0} or {0, 2} are not. They are replaced by a single new dimension, in the
+same position in the dimension sequence as those they replace, with the new
+dimension size equal to the product of original dimension sizes. The lowest
+dimension number in `dimensions` is the slowest varying dimension (most major)
+in the loop nest which collapses these dimension, and the highest dimension
+number is fastest varying (most minor). See the `tf.reshape` operator
+if more general collapse ordering is needed.
+
+For example, let v be an array of 24 elements:
+
+```
+let v = f32[4x2x3] {{{10, 11, 12},  {15, 16, 17}},
+                    {{20, 21, 22},  {25, 26, 27}},
+                    {{30, 31, 32},  {35, 36, 37}},
+                    {{40, 41, 42},  {45, 46, 47}}};
+
+// Collapse to a single dimension, leaving one dimension.
+let v012 = Collapse(v, {0,1,2});
+then v012 == f32[24] {10, 11, 12, 15, 16, 17,
+                      20, 21, 22, 25, 26, 27,
+                      30, 31, 32, 35, 36, 37,
+                      40, 41, 42, 45, 46, 47};
+
+// Collapse the two lower dimensions, leaving two dimensions.
+let v01 = Collapse(v, {0,1});
+then v01 == f32[4x6] {{10, 11, 12, 15, 16, 17},
+                      {20, 21, 22, 25, 26, 27},
+                      {30, 31, 32, 35, 36, 37},
+                      {40, 41, 42, 45, 46, 47}};
+
+// Collapse the two higher dimensions, leaving two dimensions.
+let v12 = Collapse(v, {1,2});
+then v12 == f32[8x3] {{10, 11, 12},
+                      {15, 16, 17},
+                      {20, 21, 22},
+                      {25, 26, 27},
+                      {30, 31, 32},
+                      {35, 36, 37},
+                      {40, 41, 42},
+                      {45, 46, 47}};
+
+```
+
+## Concatenate
+
+See also
+[`XlaBuilder::ConcatInDim`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Concatenate composes an array from multiple array operands. The array is of the
+same rank as each of the input array operands (which must be of the same rank as
+each other) and contains the arguments in the order that they were specified.
+
+<b> `Concatenate(operands..., dimension)` </b>
+
+| Arguments   | Type                  | Semantics                              |
+| ----------- | --------------------- | -------------------------------------- |
+| `operands`  | sequence of N `XlaOp` | N arrays of type T with dimensions     |
+:             :                       : [L0, L1, ...]. Requires N >= 1.        :
+| `dimension` | `int64`               | A value in the interval `[0, N)` that  |
+:             :                       : names the dimension to be concatenated :
+:             :                       : between the `operands`.                :
+
+With the exception of `dimension` all dimensions must be the same. This is
+because XLA does not support "ragged" arrays. Also note that rank-0 values
+cannot be concatenated (as it's impossible to name the dimension along which the
+concatenation occurs).
+
+1-dimensional example:
+
+```
+Concat({{2, 3}, {4, 5}, {6, 7}}, 0)
+>>> {2, 3, 4, 5, 6, 7}
+```
+
+2-dimensional example:
+
+```
+let a = {
+  {1, 2},
+  {3, 4},
+  {5, 6},
+};
+let b = {
+  {7, 8},
+};
+Concat({a, b}, 0)
+>>> {
+  {1, 2},
+  {3, 4},
+  {5, 6},
+  {7, 8},
+}
+```
+
+Diagram:
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
+</div>
+
+## Conditional
+
+See also
+[`XlaBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Conditional(pred, true_operand, true_computation, false_operand,
+false_computation)` </b>
+
+Arguments           | Type             | Semantics
+------------------- | ---------------- | ---------------------------------
+`pred`              | `XlaOp`          | Scalar of type `PRED`
+`true_operand`      | `XlaOp`          | Argument of type `T_0`
+`true_computation`  | `XlaComputation` | XlaComputation of type `T_0 -> S`
+`false_operand`     | `XlaOp`          | Argument of type `T_1`
+`false_computation` | `XlaComputation` | XlaComputation of type `T_1 -> S`
+
+Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
+is `false`, and returns the result.
+
+The `true_computation` must take in a single argument of type `T_0` and will be
+invoked with `true_operand` which must be of the same type. The
+`false_computation` must take in a single argument of type `T_1` and will be
+invoked with `false_operand` which must be of the same type. The type of the
+returned value of `true_computation` and `false_computation` must be the same.
+
+Note that only one of `true_computation` and `false_computation` will be
+executed depending on the value of `pred`.
+
+## Conv (convolution)
+
+See also
+[`XlaBuilder::Conv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+As ConvWithGeneralPadding, but the padding is specified in a short-hand way as
+either SAME or VALID. SAME padding pads the input (`lhs`) with zeroes so that
+the output has the same shape as the input when not taking striding into
+account. VALID padding simply means no padding.
+
+## ConvWithGeneralPadding (convolution)
+
+See also
+[`XlaBuilder::ConvWithGeneralPadding`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Computes a convolution of the kind used in neural networks. Here, a convolution
+can be thought of as a n-dimensional window moving across a n-dimensional base
+area and a computation is performed for each possible position of the window.
+
+| Arguments             | Type                 | Semantics                     |
+| --------------------- | -------------------- | ----------------------------- |
+| `lhs`                 | `XlaOp`              | rank n+2 array of inputs      |
+| `rhs`                 | `XlaOp`              | rank n+2 array of kernel      |
+:                       :                      : weights                       :
+| `window_strides`      | `ArraySlice<int64>`  | n-d array of kernel strides   |
+| `padding`             | `ArraySlice<         | n-d array of (low, high)      |
+:                       : pair<int64, int64>>` : padding                       :
+| `lhs_dilation`        | `ArraySlice<int64>`  | n-d lhs dilation factor array |
+| `rhs_dilation`        | `ArraySlice<int64>`  | n-d rhs dilation factor array |
+| `feature_group_count` | int64                | the number of feature groups  |
+
+Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2
+array describing the base area. This is called the input, even though of course
+the rhs is also an input. In a neural network, these are the input activations.
+The n+2 dimensions are, in this order:
+
+*   `batch`: Each coordinate in this dimension represents an independent input
+    for which convolution is carried out.
+*   `z/depth/features`: Each (y,x) position in the base area has a vector
+    associated to it, which goes into this dimension.
+*   `spatial_dims`: Describes the `n` spatial dimensions that define the base
+    area that the window moves across.
+
+The `rhs` argument is a rank n+2 array describing the convolutional
+filter/kernel/window. The dimensions are, in this order:
+
+*   `output-z`: The `z` dimension of the output.
+*   `input-z`: The size of this dimension times `feature_group_count` should
+    equal the size of the `z` dimension in lhs.
+*   `spatial_dims`: Describes the `n` spatial dimensions that define the n-d
+    window that moves across the base area.
+
+The `window_strides` argument specifies the stride of the convolutional window
+in the spatial dimensions. For example, if the stride in the first spatial
+dimension is 3, then the window can only be placed at coordinates where the
+first spatial index is divisible by 3.
+
+The `padding` argument specifies the amount of zero padding to be applied to the
+base area. The amount of padding can be negative -- the absolute value of
+negative padding indicates the number of elements to remove from the specified
+dimension before doing the convolution. `padding[0]` specifies the padding for
+dimension `y` and `padding[1]` specifies the padding for dimension `x`. Each
+pair has the low padding as the first element and the high padding as the second
+element. The low padding is applied in the direction of lower indices while the
+high padding is applied in the direction of higher indices. For example, if
+`padding[1]` is `(2,3)` then there will be a padding by 2 zeroes on the left and
+by 3 zeroes on the right in the second spatial dimension. Using padding is
+equivalent to inserting those same zero values into the input (`lhs`) before
+doing the convolution.
+
+The `lhs_dilation` and `rhs_dilation` arguments specify the dilation factor to
+be applied to the lhs and rhs, respectively, in each spatial dimension. If the
+dilation factor in a spatial dimension is d, then d-1 holes are implicitly
+placed between each of the entries in that dimension, increasing the size of the
+array. The holes are filled with a no-op value, which for convolution means
+zeroes.
+
+Dilation of the rhs is also called atrous convolution. For more details, see
+`tf.nn.atrous_conv2d`. Dilation of the lhs is also called transposed
+convolution. For more details, see `tf.nn.conv2d_transpose`.
+
+The `feature_group_count` argument (default value 1) can be used for grouped
+convolutions. `feature_group_count` needs to be a divisor of both the input and
+the output feature dimension. If `feature_group_count` is greater than 1, it
+means that conceptually the input and output feature dimension and the `rhs`
+output feature dimension are split evenly into `feature_group_count` many
+groups, each group consisting of a consecutive subsequence of features. The
+input feature dimension of `rhs` needs to be equal to the `lhs` input feature
+dimension divided by `feature_group_count` (so it already has the size of a
+group of input features). The i-th groups are used together to compute
+`feature_group_count` many separate convolutions. The results of these
+convolutions are concatenated together in the output feature dimension.
+
+For depthwise convolution the `feature_group_count` argument would be set to the
+input feature dimension, and the filter would be reshaped from
+`[filter_height, filter_width, in_channels, channel_multiplier]` to
+`[filter_height, filter_width, 1, in_channels * channel_multiplier]`. For more
+details, see `tf.nn.depthwise_conv2d`.
+
+The output shape has these dimensions, in this order:
+
+*   `batch`: Same size as `batch` on the input (`lhs`).
+*   `z`: Same size as `output-z` on the kernel (`rhs`).
+*   `spatial_dims`: One value for each valid placement of the convolutional
+    window.
+
+The valid placements of the convolutional window are determined by the strides
+and the size of the base area after padding.
+
+To describe what a convolution does, consider a 2d convolution, and pick some
+fixed `batch`, `z`, `y`, `x` coordinates in the output. Then `(y,x)` is a
+position of a corner of the window within the base area (e.g. the upper left
+corner, depending on how you interpret the spatial dimensions). We now have a 2d
+window, taken from the base area, where each 2d point is associated to a 1d
+vector, so we get a 3d box. From the convolutional kernel, since we fixed the
+output coordinate `z`, we also have a 3d box. The two boxes have the same
+dimensions, so we can take the sum of the element-wise products between the two
+boxes (similar to a dot product). That is the output value.
+
+Note that if `output-z` is e.g., 5, then each position of the window produces 5
+values in the output into the `z` dimension of the output. These values differ
+in what part of the convolutional kernel is used - there is a separate 3d box of
+values used for each `output-z` coordinate. So you could think of it as 5
+separate convolutions with a different filter for each of them.
+
+Here is pseudo-code for a 2d convolution with padding and striding:
+
+```
+for (b, oz, oy, ox) {  // output coordinates
+  value = 0;
+  for (iz, ky, kx) {  // kernel coordinates and input z
+    iy = oy*stride_y + ky - pad_low_y;
+    ix = ox*stride_x + kx - pad_low_x;
+    if ((iy, ix) inside the base area considered without padding) {
+      value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
+    }
+  }
+  output(b, oz, oy, ox) = value;
+}
+```
+
+## ConvertElementType
+
+See also
+[`XlaBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Similar to an element-wise `static_cast` in C++, performs an element-wise
+conversion operation from a data shape to a target shape. The dimensions must
+match, and the conversion is an element-wise one; e.g. `s32` elements become
+`f32` elements via an `s32`-to-`f32` conversion routine.
+
+<b> `ConvertElementType(operand, new_element_type)` </b>
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`operand`          | `XlaOp`         | array of type T with dims D
+`new_element_type` | `PrimitiveType` | type U
+
+The dimensions of the operand and the target shape must match. The source and
+destination element types must not be tuples.
+
+A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
+conversion routine such as round-to-nearest-even.
+
+> Note: The precise float-to-int and visa-versa conversions are currently
+> unspecified, but may become additional arguments to the convert operation in
+> the future.  Not all possible conversions have been implemented for all
+>targets.
+
+```
+let a: s32[3] = {0, 1, 2};
+let b: f32[3] = convert(a, f32);
+then b == f32[3]{0.0, 1.0, 2.0}
+```
+
+## CrossReplicaSum
+
+See also
+[`XlaBuilder::CrossReplicaSum`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Computes a sum across replicas.
+
+<b> `CrossReplicaSum(operand)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | -----------------------------
+`operand` | `XlaOp` | Array to sum across replicas.
+| `replica_group_ids`    | `int64` vector | Group ID for each replica.      |
+
+The output shape is the same as the input shape. For example, if there are two
+replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
+respectively on the two replicas, then the output value from this op will be
+`(4.0, 7.75)` on both replicas.
+
+`replica_group_ids` identifies the group ID of each replica. The group ID must
+either be empty (all replicas belong to a single group), or contain the same
+number of elements as the number of replicas. For example, if
+`replica_group_ids` = {0, 1, 2, 3, 0, 1, 2, 3} has eight replicas, there are
+four subgroups of replica IDs: {0, 4}, {1, 5}, {2, 6}, and {3, 7}. The size of
+each subgroup *must* be identical, so, for example, using:
+`replica_group_ids` = {0, 1, 2, 0} for four replicas is invalid.
+
+Computing the result of CrossReplicaSum requires having one input from each
+replica, so if one replica executes a CrossReplicaSum node more times than
+another, then the former replica will wait forever. Since the replicas are all
+running the same program, there are not a lot of ways for that to happen, but it
+is possible when a while loop's condition depends on data from infeed and the
+data that is infed causes the while loop to iterate more times on one replica
+than another.
+
+## CustomCall
+
+See also
+[`XlaBuilder::CustomCall`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Call a user-provided function within a computation.
+
+<b> `CustomCall(target_name, args..., shape)` </b>
+
+| Arguments     | Type                   | Semantics                         |
+| ------------- | ---------------------- | --------------------------------- |
+| `target_name` | `string`               | Name of the function. A call      |
+:               :                        : instruction will be emitted which :
+:               :                        : targets this symbol name.         :
+| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type,    |
+:               :                        : which will be passed to the       :
+:               :                        : function.                         :
+| `shape`       | `Shape`                | Output shape of the function      |
+
+The function signature is the same, regardless of the arity or type of args:
+
+```
+extern "C" void target_name(void* out, void** in);
+```
+
+For example, if CustomCall is used as follows:
+
+```
+let x = f32[2] {1,2};
+let y = f32[2x3] {{10, 20, 30}, {40, 50, 60}};
+
+CustomCall("myfunc", {x, y}, f32[3x3])
+```
+
+Here is an example of an implementation of `myfunc`:
+
+```
+extern "C" void myfunc(void* out, void** in) {
+  float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
+  float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
+  EXPECT_EQ(1, x[0]);
+  EXPECT_EQ(2, x[1]);
+  EXPECT_EQ(10, y[0][0]);
+  EXPECT_EQ(20, y[0][1]);
+  EXPECT_EQ(30, y[0][2]);
+  EXPECT_EQ(40, y[1][0]);
+  EXPECT_EQ(50, y[1][1]);
+  EXPECT_EQ(60, y[1][2]);
+  float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
+  z[0][0] = x[1] + y[1][0];
+  // ...
+}
+```
+
+The user-provided function must not have side-effects and its execution must be
+idempotent.
+
+> Note: The opaque nature of the user-provided function restricts optimization
+> opportunities for the compiler. Try to express your computation in terms of
+> native XLA ops whenever possible; only use CustomCall as a last resort.
+
+## Dot
+
+See also
+[`XlaBuilder::Dot`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Dot(lhs, rhs)` </b>
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------
+`lhs`     | `XlaOp` | array of type T
+`rhs`     | `XlaOp` | array of type T
+
+The exact semantics of this operation depend on the ranks of the operands:
+
+| Input                   | Output                | Semantics               |
+| ----------------------- | --------------------- | ----------------------- |
+| vector [n] `dot` vector | scalar                | vector dot product      |
+: [n]                     :                       :                         :
+| matrix [m x k] `dot`    | vector [m]            | matrix-vector           |
+: vector [k]              :                       : multiplication          :
+| matrix [m x k] `dot`    | matrix [m x n]        | matrix-matrix           |
+: matrix [k x n]          :                       : multiplication          :
+
+The operation performs sum of products over the last dimension of `lhs` and the
+one-before-last dimension of `rhs`. These are the "contracted" dimensions. The
+contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
+it can be used to perform dot products between vectors, vector/matrix
+multiplications or matrix/matrix multiplications.
+
+## DotGeneral
+
+See also
+[`XlaBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
+
+Arguments           | Type                  | Semantics
+------------------- | --------------------- | ---------------
+`lhs`               | `XlaOp`               | array of type T
+`rhs`               | `XlaOp`               | array of type T
+`dimension_numbers` | `DotDimensionNumbers` | array of type T
+
+As Dot, but allows contracting and batch dimension numbers to be specified for
+both the 'lhs' and 'rhs'.
+
+| DotDimensionNumbers Fields | Type                    | Semantics
+| --------- | ----------------------- | ---------------
+| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
+| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
+| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
+| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
+
+DotGeneral performs the sum of products over contracting dimensions specified
+in 'dimension_numbers'.
+
+Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
+to be the same, but must be listed in the same order in both
+'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
+There must be exactly one contracting dimension on both 'lhs' and 'rhs'.
+
+Example with contracting dimension numbers:
+
+```
+lhs = { {1.0, 2.0, 3.0},
+        {4.0, 5.0, 6.0} }
+
+rhs = { {1.0, 1.0, 1.0},
+        {2.0, 2.0, 2.0} }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(1);
+dnums.add_rhs_contracting_dimensions(1);
+
+DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
+                                 {15.0, 30.0} }
+```
+
+Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
+dimension number, must be listed in the same order in both arrays, must
+have the same dimension sizes, and must be ordered before contracting and
+non-contracting/non-batch dimension numbers.
+
+Example with batch dimension numbers (batch size 2, 2x2 matrices):
+
+```
+lhs = { { {1.0, 2.0},
+          {3.0, 4.0} },
+        { {5.0, 6.0},
+          {7.0, 8.0} } }
+
+rhs = { { {1.0, 0.0},
+          {0.0, 1.0} },
+        { {1.0, 0.0},
+          {0.0, 1.0} } }
+
+DotDimensionNumbers dnums;
+dnums.add_lhs_contracting_dimensions(2);
+dnums.add_rhs_contracting_dimensions(1);
+dnums.add_lhs_batch_dimensions(0);
+dnums.add_rhs_batch_dimensions(0);
+
+DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
+                                   {3.0, 4.0} },
+                                 { {5.0, 6.0},
+                                   {7.0, 8.0} } }
+```
+
+| Input                               | Output            | Semantics        |
+| ----------------------------------- | ----------------- | ---------------- |
+| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
+| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
+
+It follows that the resulting dimension number starts with the batch dimension,
+then the 'lhs' non-contracting/non-batch dimension, and finally the 'rhs'
+non-contracting/non-batch dimension.
+
+## DynamicSlice
+
+See also
+[`XlaBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+DynamicSlice extracts a sub-array from the input array at dynamic
+`start_indices`. The size of the slice in each dimension is passed in
+`size_indices`, which specify the end point of exclusive slice intervals in each
+dimension: [start, start + size). The shape of `start_indices` must be rank ==
+1, with dimension size equal to the rank of `operand`.
+
+<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
+
+| Arguments       | Type                | Semantics                           |
+| --------------- | ------------------- | ----------------------------------- |
+| `operand`       | `XlaOp`             | N dimensional array of type T       |
+| `start_indices` | `XlaOp`             | Rank 1 array of N integers          |
+:                 :                     : containing the starting indices of  :
+:                 :                     : the slice for each dimension. Value :
+:                 :                     : must be greater than or equal to    :
+:                 :                     : zero.                               :
+| `size_indices`  | `ArraySlice<int64>` | List of N integers containing the   |
+:                 :                     : slice size for each dimension. Each :
+:                 :                     : value must be strictly greater than :
+:                 :                     : zero, and start + size must be less :
+:                 :                     : than or equal to the size of the    :
+:                 :                     : dimension to avoid wrapping modulo  :
+:                 :                     : dimension size.                     :
+
+The effective slice indices are computed by applying the following
+transformation for each index `i` in `[1, N)` before performing the slice:
+
+```
+start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - size_indices[i])
+```
+
+This ensures that the extracted slice is always in-bounds with respect to the
+operand array. If the slice is in-bounds before the transformation is applied,
+the transformation has no effect.
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let s = {2}
+
+DynamicSlice(a, s, {2}) produces:
+  {2.0, 3.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let s = {2, 1}
+
+DynamicSlice(b, s, {2, 2}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+## DynamicUpdateSlice
+
+See also
+[`XlaBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+DynamicUpdateSlice generates a result which is the value of the input array
+`operand`, with a slice `update` overwritten at `start_indices`.
+The shape of `update` determines the shape of the sub-array of the result which
+is updated.
+The shape of `start_indices` must be rank == 1, with dimension size equal to
+the rank of `operand`.
+
+<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
+
+| Arguments       | Type    | Semantics                                        |
+| --------------- | ------- | ------------------------------------------------ |
+| `operand`       | `XlaOp` | N dimensional array of type T                    |
+| `update`        | `XlaOp` | N dimensional array of type T containing the     |
+:                 :         : slice update. Each dimension of update shape     :
+:                 :         : must be strictly greater than zero, and start +  :
+:                 :         : update must be less than or equal to the operand :
+:                 :         : size for each dimension to avoid generating      :
+:                 :         : out-of-bounds update indices.                    :
+| `start_indices` | `XlaOp` | Rank 1 array of N integers containing the        |
+:                 :         : starting indices of the slice for each           :
+:                 :         : dimension. Value must be greater than or equal   :
+:                 :         : to zero.                                         :
+
+The effective slice indices are computed by applying the following
+transformation for each index `i` in `[1, N)` before performing the slice:
+
+```
+start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - update.dimension_size[i])
+```
+
+This ensures that the updated slice is always in-bounds with respect to the
+operand array. If the slice is in-bounds before the transformation is applied,
+the transformation has no effect.
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+let u = {5.0, 6.0}
+let s = {2}
+
+DynamicUpdateSlice(a, u, s) produces:
+  {0.0, 1.0, 5.0, 6.0, 4.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+let u =
+ { {12.0,  13.0},
+   {14.0,  15.0},
+   {16.0,  17.0} }
+
+let s = {1, 1}
+
+DynamicUpdateSlice(b, u, s) produces:
+ { {0.0,  1.0,  2.0},
+   {3.0, 12.0, 13.0},
+   {6.0, 14.0, 15.0},
+   {9.0, 16.0, 17.0} }
+```
+
+## Element-wise binary arithmetic operations
+
+See also
+[`XlaBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A set of element-wise binary arithmetic operations is supported.
+
+<b> `Op(lhs, rhs)` </b>
+
+Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
+(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
+(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
+
+Arguments | Type    | Semantics
+--------- | ------- | ----------------------------------------
+`lhs`     | `XlaOp` | left-hand-side operand: array of type T
+`rhs`     | `XlaOp` | right-hand-side operand: array of type T
+
+The arguments' shapes have to be either similar or compatible. See the
+[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays. In this variant, operations between arrays of
+different ranks are *not* supported, unless one of the operands is a scalar.
+
+When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
+absolute value of the result is always less than the divisor's absolute value.
+
+Integer division overflow (signed/unsigned division/remainder by zero or signed
+divison/remainder of `INT_SMIN` with `-1`) produces an implementation defined
+value.
+
+An alternative variant with different-rank broadcasting support exists for these
+operations:
+
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+
+Where `Op` is the same as above. This variant of the operation should be used
+for arithmetic operations between arrays of different ranks (such as adding a
+matrix to a vector).
+
+The additional `broadcast_dimensions` operand is a slice of integers used to
+expand the rank of the lower-rank operand up to the rank of the higher-rank
+operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
+the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
+shape are filled with dimensions of size one. Degenerate-dimension broadcasting
+then broadcasts the shapes along these degenerate dimensions to equalize the
+shapes of both operands. The semantics are described in detail on the
+[broadcasting page](../../performance/xla/broadcasting.md).
+
+## Element-wise comparison operations
+
+See also
+[`XlaBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A set of standard element-wise binary comparison operations is supported. Note
+that standard IEEE 754 floating-point comparison semantics apply when comparing
+floating-point types.
+
+<b> `Op(lhs, rhs)` </b>
+
+Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
+(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
+(less-than).
+
+Arguments | Type    | Semantics
+--------- | ------- | ----------------------------------------
+`lhs`     | `XlaOp` | left-hand-side operand: array of type T
+`rhs`     | `XlaOp` | right-hand-side operand: array of type T
+
+The arguments' shapes have to be either similar or compatible. See the
+[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
+be compatible. The result of an operation has a shape which is the result of
+broadcasting the two input arrays with the element type `PRED`. In this variant,
+operations between arrays of different ranks are *not* supported, unless one of
+the operands is a scalar.
+
+An alternative variant with different-rank broadcasting support exists for these
+operations:
+
+<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
+
+Where `Op` is the same as above. This variant of the operation should be used
+for comparison operations between arrays of different ranks (such as adding a
+matrix to a vector).
+
+The additional `broadcast_dimensions` operand is a slice of integers specifying
+the dimensions to use for broadcasting the operands. The semantics are described
+in detail on the [broadcasting page](../../performance/xla/broadcasting.md).
+
+## Element-wise unary functions
+
+XlaBuilder supports these element-wise unary functions:
+
+<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
+
+<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
+
+<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
+
+<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
+
+<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
+
+<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
+i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
+of `PRED` values with the same shape as the input, where each element is `true`
+if and only if the corresponding input element is finite.
+
+<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
+
+<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
+
+<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
+
+<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
+
+$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
+
+using the comparison operator of the element type of `operand`.
+
+<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
+
+
+Arguments | Type    | Semantics
+--------- | ------- | ---------------------------
+`operand` | `XlaOp` | The operand to the function
+
+The function is applied to each element in the `operand` array, resulting in an
+array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
+
+## Gather
+
+The XLA gather operation stitches together several slices (each slice at a
+potentially different runtime offset) of an input array.
+
+### General Semantics
+
+See also
+[`XlaBuilder::Gather`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+For a more intuitive description, see the "Informal Description" section below.
+
+<b> `gather(operand, start_indices, offset_dims, collapsed_slice_dims, slice_sizes, start_index_map)` </b>
+
+|Arguments         | Type                    | Semantics                       |
+|----------------- | ----------------------- | --------------------------------|
+|`operand`         | `XlaOp`                 | The array we’re gathering       |
+:                  :                         : from.                           :
+|`start_indices`   | `XlaOp`                 | Array containing the starting  |
+:                  :                         : indices of the slices we gather.:
+|`index_vector_dim` | `int64`                | The dimension in                |
+:                  :                         : `start_indices` that "contains" :
+:                  :                         : the starting indices.  See      :
+:                  :                         : below for a detailed            :
+:                  :                         : description.                    :
+|`offset_dims`     | `ArraySlice<int64>`     | The set of dimensions in  the   :
+:                  :                         : output shape that offset into a :
+:                  :                         : array sliced from operand.     :
+|`slice_sizes`     | `ArraySlice<int64>`      | `slice_sizes[i]` is the bounds |
+:                  :                          : for the slice on dimension `i`.:
+|`collapsed_slice_dims` | `ArraySlice<int64>` | The set of dimensions in each  :
+|                  :                          | slice that are collapsed away. :
+|                  :                          | These dimensions must have size:
+|                  :                          | 1.                             |
+|`start_index_map` | `ArraySlice<int64>`      | A map that describes how to map|
+:                  :                          : indices in `start_indices` to  :
+:                  :                          : to legal indices into operand. :
+
+For convenience, we label dimensions in the output array not in `offset_dims`
+as `batch_dims`.
+
+The output is an array of rank `batch_dims.size` + `operand.rank` -
+`collapsed_slice_dims`.size.
+
+If `index_vector_dim` is equal to `start_indices.rank` we implicitly consider
+`start_indices` to have a trailing `1` dimension (i.e. if `start_indices` was of
+shape `[6,7]` and `index_vector_dim` is `2` then we implicitly consider the
+shape of `start_indices` to be `[6,7,1]`).
+
+The bounds for the output array along dimension `i` is computed as follows:
+
+  1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
+     some `k`) then we pick the corresponding dimension bounds out of
+     `start_indices.shape`, skipping `index_vector_dim` (i.e. pick
+     `start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
+     `start_indices.shape.dims`[`k`+`1`] otherwise).
+
+  2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
+     some `k`) then we pick the corresponding bound out of `slice_sizes` after
+     accounting for `collapsed_slice_dims` (i.e. we pick
+     `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
+     with the bounds at indices `collapsed_slice_dims` removed).
+
+Formally, the operand index `In` corresponding to an output index `Out` is
+computed as follows:
+
+  1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
+     vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
+     Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
+     this is well defined even if `G` is empty -- if `G` is empty then `S` =
+     `start_indices`.
+
+  2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
+     scattering `S` using `start_index_map`.  More precisely:
+       1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
+          `start_index_map.size`.
+       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
+
+  3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
+     at the offset dimensions in `Out` according to the `collapsed_slice_dims`
+     set.  More precisely:
+       1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
+          `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
+          (`expand_offset_dims` is defined below).
+       2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
+  4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
+     addition.
+
+`expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`)
+and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
+`offset.size` is `4`, `operand.rank` is `6` and `collapsed_slice_dims` is {`0`,
+`2`} then `expand_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}.
+
+### Informal Description and Examples
+
+Informally, every index `Out` in the output array corresponds to an element `E`
+in the operand array, computed as follows:
+
+  - We use the batch dimensions in `Out` to look up a starting index from
+    `start_indices`.
+
+  - We use `start_index_map` to map the starting index (which may have size less
+    than operand.rank) to a "full" starting index into operand.
+
+  - We dynamic-slice out a slice with size `slice_sizes` using the full starting
+    index.
+
+  - We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
+    Since all collapsed slice dimensions have to have bound 1 this reshape is
+    always legal.
+
+  - We use the offset dimensions in `Out` to index into this slice to get the
+    input element, `E`, corresponding to output index `Out`.
+
+`index_vector_dim` is set to `start_indices.rank` - `1` in all of the
+examples that follow.  More interesting values for `index_vector_dim` does not
+change the operation fundamentally, but makes the visual representation more
+cumbersome.
+
+To get an intuition on how all of the above fits together, let's look at an
+example that gathers 5 slices of shape `[8,6]` from a `[16,11]` array.  The
+position of a slice into the `[16,11]` array can be represented as an index
+vector of shape `S64[2]`, so the set of 5 positions can be represented as a
+`S64[5,2]` array.
+
+The behavior of the gather operation can then be depicted as an index
+transformation that takes [`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>], an index in
+the output shape, and maps it to an element in the input array in the following
+way:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_0.svg">
+</div>
+
+We first select an (`X`,`Y`) vector from the gather indices array using `G`.
+The element in the output array at index
+[`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>] is then the element in the input
+array at index [`X`+`O`<sub>`0`</sub>,`Y`+`O`<sub>`1`</sub>].
+
+`slice_sizes` is `[8,6]`, which decides the range of W<sub>`0`</sub> and
+W<sub>`1`</sub>, and this in turn decides the bounds of the slice.
+
+This gather operation acts as a batch dynamic slice with `G` as the batch
+dimension.
+
+The gather indices may be multidimensional.  For instance, a more general
+version of the example above using a "gather indices" array of shape `[4,5,2]`
+would translate indices like this:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_1.svg">
+</div>
+
+Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
+`G`<sub>`1`</sub> as the batch dimensions.  The slice size is still `[8,6]`.
+
+The gather operation in XLA generalizes the informal semantics outlined above in
+the following ways:
+
+ 1. We can configure which dimensions in the output shape are the offset
+    dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
+    the last example).  The output batch dimensions (dimensions containing
+    `G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
+    the output dimensions that are not offset dimensions.
+
+ 2. The number of output offset dimensions explicitly present in the output
+    shape may be smaller than the input rank.  These "missing" dimensions, which
+    are listed explicitly as `collapsed_slice_dims`, must have a slice size of
+    `1`.  Since they have a slice size of `1` the only valid index for them is
+    `0` and eliding them does not introduce ambiguity.
+
+ 3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
+    example) may have fewer elements than the input array rank, and an explicit
+    mapping dictates how the index should be expanded to have the same rank as
+    the input.
+
+As a final example, we use (2) and (3) to implement `tf.gather_nd`:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/ops_xla_gather_2.svg">
+</div>
+
+`G`<sub>`0`</sub> and `G`<sub>`1`</sub> are used to slice out a starting index
+from the gather indices array as usual, except the starting index has only one
+element, `X`.  Similarly, there is only one output offset index with the value
+`O`<sub>`0`</sub>.  However, before being used as indices into the input array,
+these are expanded in accordance to "Gather Index Mapping" (`start_index_map` in
+the formal description) and "Offset Mapping" (`expand_offset_dims` in the formal
+description) into [`0`,`O`<sub>`0`</sub>] and [`X`,`0`] respectively, adding up
+to [`X`,`O`<sub>`0`</sub>].  In other words, the output index
+[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`O`<sub>`0`</sub>] maps to the input index
+[`GatherIndices`[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`0`],`X`] which gives us
+the semantics for `tf.gather_nd`.
+
+`slice_sizes` for this case is `[1,11]`.  Intuitively this means that every
+index `X` in the gather indices array picks an entire row and the result is the
+concatenation of all these rows.
+
+## GetTupleElement
+
+See also
+[`XlaBuilder::GetTupleElement`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Indexes into a tuple with a compile-time-constant value.
+
+The value must be a compile-time-constant so that shape inference can determine
+the type of the resulting value.
+
+This is analogous to `std::get<int N>(t)` in C++. Conceptually:
+
+```
+let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+let s: s32 = 5;
+let t: (f32[10], s32) = tuple(v, s);
+let element_1: s32 = gettupleelement(t, 1);  // Inferred shape matches s32.
+```
+
+See also `tf.tuple`.
+
+## Infeed
+
+See also
+[`XlaBuilder::Infeed`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Infeed(shape)` </b>
+
+| Argument | Type    | Semantics                                             |
+| -------- | ------- | ----------------------------------------------------- |
+| `shape`  | `Shape` | Shape of the data read from the Infeed interface. The |
+:          :         : layout field of the shape must be set to match the    :
+:          :         : layout of the data sent to the device; otherwise its  :
+:          :         : behavior is undefined.                                :
+
+Reads a single data item from the implicit Infeed streaming interface of the
+device, interpreting the data as the given shape and its layout, and returns a
+`XlaOp` of the data. Multiple Infeed operations are allowed in a
+computation, but there must be a total order among the Infeed operations. For
+example, two Infeeds in the code below have a total order since there is a
+dependency between the while loops.
+
+```
+result1 = while (condition, init = init_value) {
+  Infeed(shape)
+}
+
+result2 = while (condition, init = result1) {
+  Infeed(shape)
+}
+```
+
+Nested tuple shapes are not supported. For an empty tuple shape, the Infeed
+operation is effectively a no-op and proceeds without reading any data from the
+Infeed of the device.
+
+> Note: We plan to allow multiple Infeed operations without a total order, in
+> which case the compiler will provide information about how the Infeed
+> operations are serialized in the compiled program.
+
+## Iota
+
+<b> `Iota()` </b>
+
+Builds a constant literal on device rather than a potentially large host
+transfer.  Creates a rank 1 tensor of values starting at zero and incrementing
+by one.
+
+Arguments          | Type            | Semantics
+------------------ | --------------- | ---------------------------
+`type`             | `PrimitiveType` | type U
+`size`             | `int64`         | The number of elements in the tensor.
+
+## Map
+
+See also
+[`XlaBuilder::Map`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Map(operands..., computation)` </b>
+
+| Arguments         | Type                   | Semantics                      |
+| ----------------- | ---------------------- | ------------------------------ |
+| `operands`        | sequence of N `XlaOp`s | N arrays of types T_0..T_{N-1} |
+| `computation`     | `XlaComputation`       | computation of type `T_0, T_1, |
+:                   :                        : ..., T_{N + M -1} -> S` with N :
+:                   :                        : parameters of type T and M of  :
+:                   :                        : arbitrary type                 :
+| `dimensions`      | `int64` array          | array of map dimensions        |
+
+Applies a scalar function over the given `operands` arrays, producing an array
+of the same dimensions where each element is the result of the mapped function
+applied to the corresponding elements in the input arrays.
+
+The mapped function is an arbitrary computation with the restriction that it has
+N inputs of scalar type `T` and a single output with type `S`. The output has
+the same dimensions as the operands except that the element type T is replaced
+with S.
+
+For example: `Map(op1, op2, op3, computation, par1)` maps `elem_out <-
+computation(elem1, elem2, elem3, par1)` at each (multi-dimensional) index in the
+input arrays to produce the output array.
+
+## Pad
+
+See also
+[`XlaBuilder::Pad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Pad(operand, padding_value, padding_config)` </b>
+
+| Arguments        | Type            | Semantics                               |
+| ---------------- | --------------- | --------------------------------------- |
+| `operand`        | `XlaOp`         | array of type `T`                       |
+| `padding_value`  | `XlaOp`         | scalar of type `T` to fill in the added |
+:                  :                 : padding                                 :
+| `padding_config` | `PaddingConfig` | padding amount on both edges (low,      |
+:                  :                 : high) and between the elements of each  :
+:                  :                 : dimension                               :
+
+Expands the given `operand` array by padding around the array as well as between
+the elements of the array with the given `padding_value`. `padding_config`
+specifies the amount of edge padding and the interior padding for each
+dimension.
+
+`PaddingConfig` is a repeated field of `PaddingConfigDimension`, which contains
+three fields for each dimension: `edge_padding_low`, `edge_padding_high`, and
+`interior_padding`. `edge_padding_low` and `edge_padding_high` specify the
+amount of padding added at the low-end (next to index 0) and the high-end (next
+to the highest index) of each dimension respectively. The amount of edge padding
+can be negative -- the absolute value of negative padding indicates the number
+of elements to remove from the specified dimension. `interior_padding` specifies
+the amount of padding added between any two elements in each dimension. Interior
+padding occurs logically before edge padding, so in the case of negative edge
+padding elements are removed from the interior-padded operand. This operation is
+a no-op if the edge padding pairs are all (0, 0) and the interior padding values
+are all 0. The figure below shows examples of different `edge_padding` and
+`interior_padding` values for a two-dimensional array.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_pad.png">
+</div>
+
+## Recv
+
+See also
+[`XlaBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Recv(shape, channel_handle)` </b>
+
+| Arguments        | Type            | Semantics                            |
+| ---------------- | --------------- | ------------------------------------ |
+| `shape`          | `Shape`         | shape of the data to receive         |
+| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair |
+
+Receives data of the given shape from a `Send` instruction in another
+computation that shares the same channel handle. Returns a
+XlaOp for the received data.
+
+The client API of `Recv` operation represents synchronous communication.
+However, the instruction is internally decomposed into 2 HLO instructions
+(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
+
+<b>`Recv(const Shape& shape, int64 channel_id)`</b>
+
+Allocates resources required to receive data from a `Send` instruction with the
+same channel_id. Returns a context for the allocated resources, which is used
+by a following `RecvDone` instruction to wait for the completion of the data
+transfer. The context is a tuple of {receive buffer (shape), request identifier
+(U32)} and it can only be used by a `RecvDone` instruction.
+
+<b> `RecvDone(HloInstruction context)` </b>
+
+Given a context created by a `Recv` instruction, waits for the data transfer to
+complete and returns the received data.
+
+## Reduce
+
+See also
+[`XlaBuilder::Reduce`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Applies a reduction function to one or more arrays in parallel.
+
+<b> `Reduce(operands..., init_values..., computation, dimensions)` </b>
+
+Arguments     | Type                  | Semantics
+------------- | --------------------- | ---------------------------------------
+`operands`    | Sequence of N `XlaOp` | N arrays of types `T_0, ..., T_N`.
+`init_values` | Sequence of N `XlaOp` | N scalars of types `T_0, ..., T_N`.
+`computation` | `XlaComputation`      | computation of type
+              :                       : `T_0, ..., T_N, T_0, ..., T_N -> Collate(T_0, ..., T_N)`
+`dimensions`  | `int64` array         | unordered array of dimensions to reduce
+
+Where:
+* N is required to be greater or equal to 1.
+* All input arrays must have the same dimensions.
+* If `N = 1`, `Collate(T)` is `T`.
+* If `N > 1`, `Collate(T_0, ..., T_N)` is a tuple of `N` elements of type `T`.
+
+The output of the op is `Collate(Q_0, ..., Q_N)` where `Q_i` is an array of type
+`T_i`, the dimensions of which are described below.
+
+This operation reduces one or more dimensions of each input array into scalars.
+The rank of each returned array is `rank(operand) - len(dimensions)`.
+`init_value` is the initial value used for every reduction and may be inserted
+anywhere during computation by the back-end. In most cases, `init_value` is an
+identity of the reduction function (for example, 0 for addition). The applied
+`computation` is always passed the `init_value` on the left-hand side.
+
+The evaluation order of the reduction function is arbitrary and may be
+non-deterministic. Therefore, the reduction function should not be overly
+sensitive to reassociation.
+
+Some reduction functions like addition are not strictly associative for floats.
+However, if the range of the data is limited, floating-point addition is close
+enough to being associative for most practical uses. It is possible to conceive
+of some completely non-associative reductions, however, and these will produce
+incorrect or unpredictable results in XLA reductions.
+
+As an example, when reducing across one dimension in a single 1D array with
+values [10, 11, 12, 13], with reduction function `f` (this is `computation`)
+then that could be computed as
+
+`f(10, f(11, f(12, f(init_value, 13)))`
+
+but there are also many other possibilities, e.g.
+
+`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(init_value, 13))))`
+
+The following is a rough pseudo-code example of how reduction could be
+implemented, using summation as the reduction computation with an initial value
+of 0.
+
+```python
+result_shape <- remove all dims in dimensions from operand_shape
+
+# Iterate over all elements in result_shape. The number of r's here is equal
+# to the rank of the result
+for r0 in range(result_shape[0]), r1 in range(result_shape[1]), ...:
+  # Initialize this result element
+  result[r0, r1...] <- 0
+
+  # Iterate over all the reduction dimensions
+  for d0 in range(dimensions[0]), d1 in range(dimensions[1]), ...:
+    # Increment the result element with the value of the operand's element.
+    # The index of the operand's element is constructed from all ri's and di's
+    # in the right order (by construction ri's and di's together index over the
+    # whole operand shape).
+    result[r0, r1...] += operand[ri... di]
+```
+
+Here's an example of reducing a 2D array (matrix). The shape has rank 2,
+dimension 0 of size 2 and dimension 1 of size 3:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_2d_matrix.png">
+</div>
+
+Results of reducing dimensions 0 or 1 with an "add" function:
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_2d_matrix.png">
+</div>
+
+Note that both reduction results are 1D arrays. The diagram shows one as column
+and another as row just for visual convenience.
+
+For a more complex example, here is a 3D array. Its rank is 3, dimension 0 of
+size 4, dimension 1 of size 2 and dimension 2 of size 3. For simplicity, the
+values 1 to 6 are replicated across dimension 0.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_3d_matrix.png">
+</div>
+
+Similarly to the 2D example, we can reduce just one dimension. If we reduce
+dimension 0, for example, we get a rank-2 array where all values across
+dimension 0 were folded into a scalar:
+
+```text
+|  4   8  12 |
+| 16  20  24 |
+```
+
+If we reduce dimension 2, we also get a rank-2 array where all values across
+dimension 2 were folded into a scalar:
+
+```text
+| 6  15 |
+| 6  15 |
+| 6  15 |
+| 6  15 |
+```
+
+Note that the relative order between the remaining dimensions in the input is
+preserved in the output, but some dimensions may get assigned new numbers (since
+the rank changes).
+
+We can also reduce multiple dimensions. Add-reducing dimensions 0 and 1 produces
+the 1D array `| 20 28 36 |`.
+
+Reducing the 3D array over all its dimensions produces the scalar `84`.
+
+When `N > 1`, reduce function application is slightly more complex, as it is
+applied simultaneously to all inputs. For example, consider the following
+reduction function, which can be used to compute the max and the argmax of a
+a 1-D tensor in parallel:
+
+```
+f: (Float, Int, Float, Int) -> Float, Int
+f(max, argmax, value, index):
+  if value >= argmax:
+    return (value, index)
+  else:
+    return (max, argmax)
+```
+
+For 1-D Input arrays `V = Float[N], K = Int[N]`, and init values
+`I_V = Float, I_K =  Int`, the result `f_(N-1)` of reducing across the only
+input dimension is equivalent to the following recursive application:
+```
+f_0 = f(I_V, I_K, V_0, K_0)
+f_1 = f(f_0.first, f_0.second, V_1, K_1)
+...
+f_(N-1) = f(f_(N-2).first, f_(N-2).second, V_(N-1), K_(N-1))
+```
+
+Applying this reduction to an array of values, and an array of sequential
+indices (i.e. iota), will co-iterate over the arrays, and return a tuple
+containing the maximal value and the matching index.
+
+## ReducePrecision
+
+See also
+[`XlaBuilder::ReducePrecision`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Models the effect of converting floating-point values to a lower-precision
+format (such as IEEE-FP16) and back to the original format.  The number of
+exponent and mantissa bits in the lower-precision format can be specified
+arbitrarily, although all bit sizes may not be supported on all hardware
+implementations.
+
+<b> `ReducePrecision(operand, mantissa_bits, exponent_bits)` </b>
+
+Arguments       | Type    | Semantics
+--------------- | ------- | -------------------------------------------------
+`operand`       | `XlaOp` | array of floating-point type `T`.
+`exponent_bits` | `int32` | number of exponent bits in lower-precision format
+`mantissa_bits` | `int32` | number of mantissa bits in lower-precision format
+
+The result is an array of type `T`.  The input values are rounded to the nearest
+value representable with the given number of mantissa bits (using "ties to even"
+semantics), and any values that exceed the range specified by the number of
+exponent bits are clamped to positive or negative infinity.  `NaN` values are
+retained, although they may be converted to canonical `NaN` values.
+
+The lower-precision format must have at least one exponent bit (in order to
+distinguish a zero value from an infinity, since both have a zero mantissa), and
+must have a non-negative number of mantissa bits.  The number of exponent or
+mantissa bits may exceed the corresponding value for type `T`; the corresponding
+portion of the conversion is then simply a no-op.
+
+## ReduceWindow
+
+See also
+[`XlaBuilder::ReduceWindow`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Applies a reduction function to all elements in each window of the input
+multi-dimensional array, producing an output multi-dimensional array with the
+same number of elements as the number of valid positions of the window. A
+pooling layer can be expressed as a `ReduceWindow`. Similar to
+[`Reduce`](#reduce), the applied `computation` is always passed the `init_value`
+on the left-hand side.
+
+<b> `ReduceWindow(operand, init_value, computation, window_dimensions,
+window_strides, padding)` </b>
+
+| Arguments           | Type                | Semantics                        |
+| ------------------- | ------------------- | -------------------------------- |
+| `operand`           | `XlaOp`             | N dimensional array containing   |
+:                     :                     : elements of type T. This is the  :
+:                     :                     : base area on which the window is :
+:                     :                     : placed.                          :
+| `init_value`        | `XlaOp`             | Starting value for the           |
+:                     :                     : reduction. See [Reduce](#reduce) :
+:                     :                     : for details.                     :
+| `computation`       | `XlaComputation`    | Reduction function of type `T, T |
+:                     :                     : -> T`, to apply to all elements  :
+:                     :                     : in each window                   :
+| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : dimension values                 :
+| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : stride values                    :
+| `padding`           | `Padding`           | padding type for window          |
+:                     :                     : (Padding\:\:kSame or             :
+:                     :                     : Padding\:\:kValid)               :
+
+Below code and figure shows an example of using `ReduceWindow`. Input is a
+matrix of size [4x6] and both window_dimensions and window_stride_dimensions are
+[2x3].
+
+```
+// Create a computation for the reduction (maximum).
+XlaComputation max;
+{
+  XlaBuilder builder(client_, "max");
+  auto y = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "y");
+  auto x = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "x");
+  builder.Max(y, x);
+  max = builder.Build().ConsumeValueOrDie();
+}
+
+// Create a ReduceWindow computation with the max reduction computation.
+XlaBuilder builder(client_, "reduce_window_2x3");
+auto shape = ShapeUtil::MakeShape(F32, {4, 6});
+auto input = builder.Parameter(0, shape, "input");
+builder.ReduceWindow(
+    input, *max,
+    /*init_val=*/builder.ConstantLiteral(LiteralUtil::MinValue(F32)),
+    /*window_dimensions=*/{2, 3},
+    /*window_stride_dimensions=*/{2, 3},
+    Padding::kValid);
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_window.png">
+</div>
+
+Stride of 1 in a dimension specifies that the position of a window in the
+dimension is 1 element away from its adjacent window. In order to specify that
+no windows overlap with each other, window_stride_dimensions should be equal to
+window_dimensions. The figure below illustrates the use of two different stride
+values. Padding is applied to each dimension of the input and the calculations
+are the same as though the input came in with the dimensions it has after
+padding.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:75%" src="https://www.tensorflow.org/images/ops_reduce_window_stride.png">
+</div>
+
+The evaluation order of the reduction function is arbitrary and may be
+non-deterministic. Therefore, the reduction function should not be overly
+sensitive to reassociation. See the discussion about associativity in the
+context of [`Reduce`](#reduce) for more details.
+
+## Reshape
+
+See also
+[`XlaBuilder::Reshape`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
+and the [`Collapse`](#collapse) operation.
+
+Reshapes the dimensions of an array into a new configuration.
+
+<b> `Reshape(operand, new_sizes)` </b>
+<b> `Reshape(operand, dimensions, new_sizes)` </b>
+
+Arguments    | Type           | Semantics
+------------ | -------------- | ---------------------------------------
+`operand`    | `XlaOp`        | array of type T
+`dimensions` | `int64` vector | order in which dimensions are collapsed
+`new_sizes`  | `int64` vector | vector of sizes of new dimensions
+
+Conceptually, reshape first flattens an array into a one-dimensional vector of
+data values, and then refines this vector into a new shape. The input arguments
+are an arbitrary array of type T, a compile-time-constant vector of dimension
+indices, and a compile-time-constant vector of dimension sizes for the result.
+The values in the `dimension` vector, if given, must be a permutation of all of
+T's dimensions; the default if not given is `{0, ..., rank - 1}`. The order of
+the dimensions in `dimensions` is from slowest-varying dimension (most major) to
+fastest-varying dimension (most minor) in the loop nest which collapses the
+input array into a single dimension. The `new_sizes` vector determines the size
+of the output array. The value at index 0 in `new_sizes` is the size of
+dimension 0, the value at index 1 is the size of dimension 1, and so on. The
+product of the `new_size` dimensions must equal the product of the operand's
+dimension sizes. When refining the collapsed array into the multidimensional
+array defined by `new_sizes`, the dimensions in `new_sizes` are ordered from
+slowest varying (most major) and to fastest varying (most minor).
+
+For example, let v be an array of 24 elements:
+
+```
+let v = f32[4x2x3] {{{10, 11, 12}, {15, 16, 17}},
+                    {{20, 21, 22}, {25, 26, 27}},
+                    {{30, 31, 32}, {35, 36, 37}},
+                    {{40, 41, 42}, {45, 46, 47}}};
+
+In-order collapse:
+let v012_24 = Reshape(v, {0,1,2}, {24});
+then v012_24 == f32[24] {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
+                         30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47};
+
+let v012_83 = Reshape(v, {0,1,2}, {8,3});
+then v012_83 == f32[8x3] {{10, 11, 12}, {15, 16, 17},
+                          {20, 21, 22}, {25, 26, 27},
+                          {30, 31, 32}, {35, 36, 37},
+                          {40, 41, 42}, {45, 46, 47}};
+
+Out-of-order collapse:
+let v021_24 = Reshape(v, {1,2,0}, {24});
+then v012_24 == f32[24]  {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
+                          15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47};
+
+let v021_83 = Reshape(v, {1,2,0}, {8,3});
+then v021_83 == f32[8x3] {{10, 20, 30}, {40, 11, 21},
+                          {31, 41, 12}, {22, 32, 42},
+                          {15, 25, 35}, {45, 16, 26},
+                          {36, 46, 17}, {27, 37, 47}};
+
+
+let v021_262 = Reshape(v, {1,2,0}, {2,6,2});
+then v021_262 == f32[2x6x2] {{{10, 20}, {30, 40},
+                              {11, 21}, {31, 41},
+                              {12, 22}, {32, 42}},
+                             {{15, 25}, {35, 45},
+                              {16, 26}, {36, 46},
+                              {17, 27}, {37, 47}}};
+```
+
+As a special case, reshape can transform a single-element array to a scalar and
+vice versa. For example,
+
+```
+Reshape(f32[1x1] {{5}}, {0,1}, {}) == 5;
+Reshape(5, {}, {1,1}) == f32[1x1] {{5}};
+```
+
+## Rev (reverse)
+
+See also
+[`XlaBuilder::Rev`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b>`Rev(operand, dimensions)`</b>
+
+Arguments    | Type                | Semantics
+------------ | ------------------- | ---------------------
+`operand`    | `XlaOp`             | array of type T
+`dimensions` | `ArraySlice<int64>` | dimensions to reverse
+
+Reverses the order of elements in the `operand` array along the specified
+`dimensions`, generating an output array of the same shape. Each element of the
+operand array at a multidimensional index is stored into the output array at a
+transformed index. The multidimensional index is transformed by reversing the
+index in each dimension to be reversed (i.e., if a dimension of size N is one of
+the reversing dimensions, its index i is transformed into N - 1 - i).
+
+One use for the `Rev` operation is to reverse the convolution weight array along
+the two window dimensions during the gradient computation in neural networks.
+
+## RngNormal
+
+See also
+[`XlaBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output of a given shape with random numbers generated following
+the $$N(\mu, \sigma)$$ normal distribution. The parameters $$\mu$$ and
+$$\sigma$$, and output shape have to have a floating point elemental type. The
+parameters furthermore have to be scalar valued.
+
+<b>`RngNormal(mu, sigma, shape)`</b>
+
+| Arguments | Type    | Semantics                                           |
+| --------- | ------- | --------------------------------------------------- |
+| `mu`      | `XlaOp` | Scalar of type T specifying mean of generated       |
+:           :         : numbers                                   :
+| `sigma`   | `XlaOp` | Scalar of type T specifying standard deviation of   |
+:           :         : generated numbers                                   :
+| `shape`   | `Shape` | Output shape of type T                              |
+
+## RngUniform
+
+See also
+[`XlaBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output of a given shape with random numbers generated following
+the uniform distribution over the interval $$[a,b)$$. The parameters and output
+element type have to be a boolean type, an integral type or a floating point
+types, and the types have to be consistent. The CPU and GPU backends currently
+only support F64, F32, F16, BF16, S64, U64, S32 and U32. Furthermore, the
+parameters need to be scalar valued. If $$b <= a$$ the result is
+implementation-defined.
+
+<b>`RngUniform(a, b, shape)`</b>
+
+| Arguments | Type                    | Semantics                         |
+| --------- | ----------------------- | --------------------------------- |
+| `a`       | `XlaOp`                 | Scalar of type T specifying lower |
+:           :                         : limit of interval                 :
+| `b`       | `XlaOp`                 | Scalar of type T specifying upper |
+:           :                         : limit of interval                 :
+| `shape`   | `Shape`                 | Output shape of type T            |
+
+## Scatter
+
+The XLA scatter operation generates a result which is the value of the input
+tensor `operand`, with several slices (at indices specified by
+`scatter_indices`) updated with the values in `updates` using
+`update_computation`.
+
+See also
+[`XlaBuilder::Scatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `scatter(operand, scatter_indices, updates, update_computation, index_vector_dim, update_window_dims, inserted_window_dims, scatter_dims_to_operand_dims)` </b>
+
+|Arguments         | Type                   | Semantics                        |
+|------------------|------------------------|----------------------------------|
+|`operand`         | `XlaOp`                | Tensor to be scattered into.     |
+|`scatter_indices` | `XlaOp`                | Tensor containing the starting   |
+:                  :                        : indices of the slices that must  :
+:                  :                        : be scattered to.                 :
+|`updates`         | `XlaOp`                | Tensor containing the values that|
+:                  :                        : must be used for scattering.     :
+|`update_computation`| `XlaComputation`     | Computation to be used for       |
+:                  :                        : combining the existing values in :
+:                  :                        : the input tensor and the updates :
+:                  :                        : during scatter. This computation :
+:                  :                        : should be of type `T, T -> T`.   :
+|`index_vector_dim`| `int64`                | The dimension in                 |
+:                  :                        : `scatter_indices` that contains  :
+:                  :                        : the starting indices.            :
+|`update_window_dims`| `ArraySlice<int64>`  | The set of dimensions in         |
+:                  :                        : `updates` shape that are _window :
+:                  :                        : dimensions_.                     :
+|`inserted_window_dims`| `ArraySlice<int64>`| The set of _window dimensions_   |
+:                  :                        : that must be inserted into       :
+:                  :                        : `updates` shape.                 :
+|`scatter_dims_to_operand_dims`| `ArraySlice<int64>`  | A dimensions map from  |
+:                  :                        : the scatter indices to the       :
+:                  :                        : operand index space. This array  :
+:                  :                        : is interpreted as mapping `i` to :
+:                  :                        : `scatter_dims_to_operand_dims[i]`:
+:                  :                        : . It has to be one-to-one and    :
+:                  :                        : total.                           :
+
+If `index_vector_dim` is equal to `scatter_indices.rank` we implicitly consider
+`scatter_indices` to have a trailing `1` dimension.
+
+We define `update_scatter_dims` of type `ArraySlice<int64>` as the set of
+dimensions in `updates` shape that are not in `update_window_dims`, in ascending
+order.
+
+The arguments of scatter should follow these constraints:
+
+  - `updates` tensor must be of rank `update_window_dims.size +
+  scatter_indices.rank - 1`.
+
+  - Bounds of dimension `i` in `updates` must conform to the following:
+      - If `i` is present in `update_window_dims` (i.e. equal to
+        `update_window_dims`[`k`] for some `k`), then the bound of dimension
+        `i` in `updates` must not exceed the corresponding bound of `operand`
+        after accounting for the `inserted_window_dims` (i.e.
+        `adjusted_window_bounds`[`k`], where `adjusted_window_bounds` contains
+        the bounds of `operand` with the bounds at indices
+        `inserted_window_dims` removed).
+      - If `i` is present in `update_scatter_dims` (i.e. equal to
+        `update_scatter_dims`[`k`] for some `k`), then the bound of dimension
+        `i` in `updates` must be equal to the corresponding bound of
+        `scatter_indices`, skipping `index_vector_dim` (i.e.
+        `scatter_indices.shape.dims`[`k`], if `k` < `index_vector_dim` and
+        `scatter_indices.shape.dims`[`k+1`] otherwise).
+
+  - `update_window_dims` must be in ascending order, not have any repeating
+    dimension numbers, and be in the range `[0, updates.rank)`.
+
+  - `inserted_window_dims` must be in ascending order, not have any
+    repeating dimension numbers, and be in the range `[0, operand.rank)`.
+
+  - `scatter_dims_to_operand_dims.size` must be equal to
+    `scatter_indices`[`index_vector_dim`], and its values must be in the range
+    `[0, operand.rank)`.
+
+For a given index `U` in the `updates` tensor, the corresponding index `I` in
+the `operand` tensor into which this update has to be applied is computed as
+follows:
+
+  1. Let `G` = { `U`[`k`] for `k` in `update_scatter_dims` }. Use `G` to look up
+     an index vector `S` in the `scatter_indices` tensor such that `S`[`i`] =
+     `scatter_indices`[Combine(`G`, `i`)] where Combine(A, b) inserts b at
+     positions `index_vector_dim` into A.
+  2. Create an index `S`<sub>`in`</sub> into `operand` using `S` by scattering
+     `S` using the `scatter_dims_to_operand_dims` map. More formally:
+       1. `S`<sub>`in`</sub>[`scatter_dims_to_operand_dims`[`k`]] = `S`[`k`] if
+          `k` < `scatter_dims_to_operand_dims.size`.
+       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
+  3. Create an index `W`<sub>`in`</sub> into `operand` by scattering the indices
+     at `update_window_dims` in `U` according to `inserted_window_dims`.
+     More formally:
+       1. `W`<sub>`in`</sub>[`window_dims_to_operand_dims`(`k`)] = `U`[`k`] if
+          `k` < `update_window_dims.size`, where `window_dims_to_operand_dims`
+          is the monotonic function with domain [`0`, `update_window_dims.size`)
+          and range [`0`, `operand.rank`) \\ `inserted_window_dims`. (For
+          example, if `update_window_dims.size` is `4`, `operand.rank` is `6`,
+          and `inserted_window_dims` is {`0`, `2`} then
+          `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`,
+          `3`→`5`}).
+       2. `W`<sub>`in`</sub>[`_`] = `0` otherwise.
+  4. `I` is `W`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
+     addition.
+
+In summary, the scatter operation can be defined as follows.
+
+   - Initialize `output` with `operand`, i.e. for all indices `O` in the
+     `operand` tensor:\
+       `output`[`O`] = `operand`[`O`]
+   - For every index `U` in the `updates` tensor and the corresponding index `O`
+     in the `operand` tensor:\
+       `output`[`O`] = `update_computation`(`output`[`O`], `updates`[`U`])
+
+The order in which updates are applied is non-deterministic. So, when multiple
+indices in `updates` refer to the same index in `operand`, the corresponding
+value in `output` will be non-deterministic.
+
+Note that the first parameter that is passed into the `update_computation` will
+always be the current value from the `output` tensor and the second parameter
+will always be the value from the `updates` tensor. This is important
+specifically for cases when the `update_computation` is _not commutative_.
+
+Informally, the scatter op can be viewed as an _inverse_ of the gather op, i.e.
+the scatter op updates the elements in the input that are extracted by the
+corresponding gather op.
+
+For a detailed informal description and examples, refer to the
+"Informal Description" section under `Gather`.
+
+## Select
+
+See also
+[`XlaBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Constructs an output array from elements of two input arrays, based on the
+values of a predicate array.
+
+<b> `Select(pred, on_true, on_false)` </b>
+
+Arguments  | Type    | Semantics
+---------- | ------- | ------------------
+`pred`     | `XlaOp` | array of type PRED
+`on_true`  | `XlaOp` | array of type T
+`on_false` | `XlaOp` | array of type T
+
+The arrays `on_true` and `on_false` must have the same shape. This is also the
+shape of the output array. The array `pred` must have the same dimensionality as
+`on_true` and `on_false`, with the `PRED` element type.
+
+For each element `P` of `pred`, the corresponding element of the output array is
+taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
+value of `P` is `false`. As a restricted form of [broadcasting]
+(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
+output array is taken wholly from `on_true` if `pred` is `true`, and from
+`on_false` if `pred` is `false`.
+
+Example with non-scalar `pred`:
+
+```
+let pred: PRED[4] = {true, false, false, true};
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
+```
+
+Example with scalar `pred`:
+
+```
+let pred: PRED = true;
+let v1: s32[4] = {1, 2, 3, 4};
+let v2: s32[4] = {100, 200, 300, 400};
+==>
+Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
+```
+
+Selections between tuples are supported. Tuples are considered to be scalar
+types for this purpose. If `on_true` and `on_false` are tuples (which must have
+the same shape!) then `pred` has to be a scalar of type `PRED`.
+
+## SelectAndScatter
+
+See also
+[`XlaBuilder::SelectAndScatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+This operation can be considered as a composite operation that first computes
+`ReduceWindow` on the `operand` array to select an element from each window, and
+then scatters the `source` array to the indices of the selected elements to
+construct an output array with the same shape as the operand array. The binary
+`select` function is used to select an element from each window by applying it
+across each window, and it is called with the property that the first
+parameter's index vector is lexicographically less than the second parameter's
+index vector. The `select` function returns `true` if the first parameter is
+selected and returns `false` if the second parameter is selected, and the
+function must hold transitivity (i.e., if `select(a, b)` and `select(b, c)` are
+`true`, then `select(a, c)` is also `true`) so that the selected element does
+not depend on the order of the elements traversed for a given window.
+
+The function `scatter` is applied at each selected index in the output array. It
+takes two scalar parameters:
+
+1.  Current value at the selected index in the output array
+2.  The scatter value from `source` that applies to the selected index
+
+It combines the two parameters and returns a scalar value that's used to update
+the value at the selected index in the output array. Initially, all indices of
+the output array are set to `init_value`.
+
+The output array has the same shape as the `operand` array and the `source`
+array must have the same shape as the result of applying a `ReduceWindow`
+operation on the `operand` array. `SelectAndScatter` can be used to
+backpropagate the gradient values for a pooling layer in a neural network.
+
+<b>`SelectAndScatter(operand, select, window_dimensions, window_strides,
+padding, source, init_value, scatter)`</b>
+
+| Arguments           | Type                | Semantics                        |
+| ------------------- | ------------------- | -------------------------------- |
+| `operand`           | `XlaOp`             | array of type T over which the   |
+:                     :                     : windows slide                    :
+| `select`            | `XlaComputation`    | binary computation of type `T, T |
+:                     :                     : -> PRED`, to apply to all        :
+:                     :                     : elements in each window; returns :
+:                     :                     : `true` if the first parameter is :
+:                     :                     : selected and returns `false` if  :
+:                     :                     : the second parameter is selected :
+| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : dimension values                 :
+| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
+:                     :                     : stride values                    :
+| `padding`           | `Padding`           | padding type for window          |
+:                     :                     : (Padding\:\:kSame or             :
+:                     :                     : Padding\:\:kValid)               :
+| `source`            | `XlaOp`             | array of type T with the values  |
+:                     :                     : to scatter                       :
+| `init_value`        | `XlaOp`             | scalar value of type T for the   |
+:                     :                     : initial value of the output      :
+:                     :                     : array                            :
+| `scatter`           | `XlaComputation`    | binary computation of type `T, T |
+:                     :                     : -> T`, to apply each scatter     :
+:                     :                     : source element with its          :
+:                     :                     : destination element              :
+
+The figure below shows examples of using `SelectAndScatter`, with the `select`
+function computing the maximal value among its parameters. Note that when the
+windows overlap, as in the figure (2) below, an index of the `operand` array may
+be selected multiple times by different windows. In the figure, the element of
+value 9 is selected by both of the top windows (blue and red) and the binary
+addition `scatter` function produces the output element of value 8 (2 + 6).
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%"
+    src="https://www.tensorflow.org/images/ops_scatter_to_selected_window_element.png">
+</div>
+
+The evaluation order of the `scatter` function is arbitrary and may be
+non-deterministic. Therefore, the `scatter` function should not be overly
+sensitive to reassociation. See the discussion about associativity in the
+context of [`Reduce`](#reduce) for more details.
+
+## Send
+
+See also
+[`XlaBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `Send(operand, channel_handle)` </b>
+
+Arguments        | Type            | Semantics
+---------------- | --------------- | -----------------------------------------
+`operand`        | `XlaOp`         | data to send (array of type T)
+`channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair
+
+Sends the given operand data to a `Recv` instruction in another computation
+that shares the same channel handle. Does not return any data.
+
+Similar to the `Recv` operation, the client API of `Send` operation represents
+synchronous communication, and is internally decomposed into 2 HLO instructions
+(`Send` and `SendDone`) to enable asynchronous data transfers. See also
+[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
+
+<b>`Send(HloInstruction operand, int64 channel_id)`</b>
+
+Initiates an asynchronous transfer of the operand to the resources allocated by
+the `Recv` instruction with the same channel id. Returns a context, which is
+used by a following `SendDone` instruction to wait for the completion of the
+data transfer. The context is a tuple of {operand (shape), request identifier
+(U32)} and it can only be used by a `SendDone` instruction.
+
+<b> `SendDone(HloInstruction context)` </b>
+
+Given a context created by a `Send` instruction, waits for the data transfer to
+complete.  The instruction does not return any data.
+
+<b> Scheduling of channel instructions </b>
+
+The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
+`Send`, `SendDone`) is as below.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:70%" src="../../images/send_recv_order.png">
+</div>
+
+* `Recv` happens before `Send`
+* `Send` happens before `RecvDone`
+* `Recv` happens before `RecvDone`
+* `Send` happens before `SendDone`
+
+When the backend compilers generate a linear schedule for each computation that
+communicates via channel instructions, there must not be cycles across the
+computations. For example, below schedules lead to deadlocks.
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="../../images/send_recv_schedule.png">
+</div>
+
+## Slice
+
+See also
+[`XlaBuilder::Slice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+Slicing extracts a sub-array from the input array. The sub-array is of the same
+rank as the input and contains the values inside a bounding box within the input
+array where the dimensions and indices of the bounding box are given as
+arguments to the slice operation.
+
+<b> `Slice(operand, start_indices, limit_indices)` </b>
+
+| Arguments       | Type                | Semantics                            |
+| --------------- | ------------------- | ------------------------------------ |
+| `operand`       | `XlaOp`             | N dimensional array of type T        |
+| `start_indices` | `ArraySlice<int64>` | List of N integers containing the    |
+:                 :                     : starting indices of the slice for    :
+:                 :                     : each dimension. Values must be       :
+:                 :                     : greater than or equal to zero.       :
+| `limit_indices` | `ArraySlice<int64>` | List of N integers containing the    |
+:                 :                     : ending indices (exclusive) for the   :
+:                 :                     : slice for each dimension. Each value :
+:                 :                     : must be greater than or equal to the :
+:                 :                     : respective `start_indices` value for :
+:                 :                     : the dimension and less than or equal :
+:                 :                     : to the size of the dimension.        :
+
+1-dimensional example:
+
+```
+let a = {0.0, 1.0, 2.0, 3.0, 4.0}
+Slice(a, {2}, {4}) produces:
+  {2.0, 3.0}
+```
+
+2-dimensional example:
+
+```
+let b =
+ { {0.0,  1.0,  2.0},
+   {3.0,  4.0,  5.0},
+   {6.0,  7.0,  8.0},
+   {9.0, 10.0, 11.0} }
+
+Slice(b, {2, 1}, {4, 3}) produces:
+  { { 7.0,  8.0},
+    {10.0, 11.0} }
+```
+
+## Sort
+
+See also
+[`XlaBuilder::Sort`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+There are two versions of the Sort instruction: a single-operand and a
+two-operand version.
+
+<b>`Sort(operand)`</b>
+
+Arguments   | Type    | Semantics
+----------- | ------- | --------------------
+`operand`   | `XlaOp` | The operand to sort.
+`dimension` | `int64` | The dimension along which to sort.
+
+Sorts the elements in the operand in ascending order along the provided
+dimension. For example, for a rank-2 (matrix) operand, a `dimension` value of 0
+will sort each column independently, and a `dimension` value of 1 will sort each
+row independently. If the operand's elements have floating point type, and the
+operand contains NaN elements, the order of elements in the output is
+implementation-defined.
+
+<b>`Sort(key, value)`</b>
+
+Sorts both the key and the value operands. The keys are sorted as in the
+single-operand version. The values are sorted according to the order of their
+corresponding keys. For example, if the inputs are `keys = [3, 1]` and
+`values = [42, 50]`, then the output of the sort is the tuple 
+`{[1, 3], [50, 42]}`.
+
+The sort is not guaranteed to be stable, that is, if the keys array contains
+duplicates, the order of their corresponding values may not be preserved.
+
+Arguments   | Type    | Semantics
+----------- | ------- | -------------------
+`keys`      | `XlaOp` | The sort keys.
+`values`    | `XlaOp` | The values to sort.
+`dimension` | `int64` | The dimension along which to sort.
+
+The `keys` and `values` must have the same dimensions, but may have different
+element types.
+
+## Transpose
+
+See also the `tf.reshape` operation.
+
+<b>`Transpose(operand)`</b>
+
+Arguments     | Type                | Semantics
+------------- | ------------------- | ------------------------------
+`operand`     | `XlaOp`             | The operand to transpose.
+`permutation` | `ArraySlice<int64>` | How to permute the dimensions.
+
+
+Permutes the operand dimensions with the given permutation, so
+`∀ i . 0 ≤ i < rank ⇒ input_dimensions[permutation[i]] = output_dimensions[i]`.
+
+This is the same as Reshape(operand, permutation,
+                            Permute(permutation, operand.shape.dimensions)).
+
+## Tuple
+
+See also
+[`XlaBuilder::Tuple`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+A tuple containing a variable number of data handles, each of which has its own
+shape.
+
+This is analogous to `std::tuple` in C++. Conceptually:
+
+```
+let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+let s: s32 = 5;
+let t: (f32[10], s32) = tuple(v, s);
+```
+
+Tuples can be deconstructed (accessed) via the [`GetTupleElement`]
+(#gettupleelement) operation.
+
+## While
+
+See also
+[`XlaBuilder::While`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
+
+<b> `While(condition, body, init)` </b>
+
+| Arguments   | Type             | Semantics                                |
+| ----------- | ---------------- | ---------------------------------------- |
+| `condition` | `XlaComputation` | XlaComputation of type `T -> PRED` which |
+:             :                  : defines the termination condition of the :
+:             :                  : loop.                                    :
+| `body`      | `XlaComputation` | XlaComputation of type `T -> T` which    |
+:             :                  : defines the body of the loop.            :
+| `init`      | `T`              | Initial value for the parameter of       |
+:             :                  : `condition` and `body`.                  :
+
+Sequentially executes the `body` until the `condition` fails. This is similar to
+a typical while loop in many other languages except for the differences and
+restrictions listed below.
+
+*   A `While` node returns a value of type `T`, which is the result from the
+    last execution of the `body`.
+*   The shape of the type `T` is statically determined and must be the same
+    across all iterations.
+
+The T parameters of the computations are initialized with the `init` value in
+the first iteration and are automatically updated to the new result from `body`
+in each subsequent iteration.
+
+One main use case of the `While` node is to implement the repeated execution of
+training in neural networks. Simplified pseudocode is shown below with a graph
+that represents the computation. The code can be found in
+[`while_test.cc`](https://www.tensorflow.org/code/tensorflow/compiler/xla/tests/while_test.cc).
+The type `T` in this example is a `Tuple` consisting of an `int32` for the
+iteration count and a `vector[10]` for the accumulator. For 1000 iterations, the
+loop keeps adding a constant vector to the accumulator.
+
+```
+// Pseudocode for the computation.
+init = {0, zero_vector[10]} // Tuple of int32 and float[10].
+result = init;
+while (result(0) < 1000) {
+  iteration = result(0) + 1;
+  new_vector = result(1) + constant_vector[10];
+  result = {iteration, new_vector};
+}
+```
+
+<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
+  <img style="width:100%" src="https://www.tensorflow.org/images/ops_while.png">
+</div>
diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index 2a858b4fd6..1a53f24177 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -127,7 +127,6 @@ py_test(
     name = "build_docs_test",
     size = "small",
     srcs = ["build_docs_test.py"],
-    data = ["//tensorflow/docs_src"],
     srcs_version = "PY2AND3",
     tags = [
         # No reason to run sanitizers or fastbuild for this test.
diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py
index 0cbf8b478f..4d3bedda2d 100644
--- a/tensorflow/tools/docs/build_docs_test.py
+++ b/tensorflow/tools/docs/build_docs_test.py
@@ -30,9 +30,11 @@ from tensorflow.tools.docs import generate_lib
 
 class Flags(object):
   resource_root = resource_loader.get_root_dir_with_all_resources()
-  src_dir = os.path.join(resource_root, 'tensorflow/docs_src')
+  src_dir = os.path.join(googletest.GetTempDir(), 'input')
+  os.mkdir(src_dir)
   base_dir = os.path.join(resource_root, 'tensorflow/')
-  output_dir = googletest.GetTempDir()
+  output_dir = os.path.join(googletest.GetTempDir(), 'output')
+  os.mkdir(output_dir)
 
 
 class BuildDocsTest(googletest.TestCase):
-- 
GitLab


From df7221d84988e5f7c1cc2775d8f5f44ffdd5918b Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 1 Oct 2018 14:39:31 -0700
Subject: [PATCH 0175/1085] Drop external control dependencies in tfe.defun.

They shouldn't help given the automatic control dependencies, and are tricky
to capture in the general case.

PiperOrigin-RevId: 215282837
---
 tensorflow/python/eager/function.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 3b6f288fb9..f261d92d64 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -269,6 +269,15 @@ class FuncGraph(ops.Graph):
   def variables(self, var_list):
     self._weak_variables = [weakref.ref(v) for v in var_list]
 
+  def control_dependencies(self, control_inputs):
+    # Drop control dependencies to outside of the graph. TODO(b/117109273)
+    # unclear how to capture an op, not a tensor.
+    if not control_inputs:
+      return super(FuncGraph, self).control_dependencies(control_inputs)
+    return super(FuncGraph, self).control_dependencies(
+        [c for c in control_inputs
+         if getattr(c, "graph", None) is self])
+
   def create_op(
       self,
       op_type,
-- 
GitLab


From 9084e999b3caf65833f9651c72bc09eb3094eba5 Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Mon, 1 Oct 2018 15:08:25 -0700
Subject: [PATCH 0176/1085] Don't run initialize ops if it's empty. Fixes a bug
 when using the profiler.

PiperOrigin-RevId: 215287936
---
 tensorflow/python/training/session_manager.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py
index 5e4749f306..cd313c2ce0 100644
--- a/tensorflow/python/training/session_manager.py
+++ b/tensorflow/python/training/session_manager.py
@@ -184,9 +184,11 @@ class SessionManager(object):
     self._target = master
     sess = session.Session(self._target, graph=self._graph, config=config)
     # TODO(jhseu): Delete once tpu.initialize_system() goes away.
-    sess.run(
+    initialize_ops = (
         distribution_strategy_context.get_distribution_strategy().initialize()
     )
+    if initialize_ops:
+      sess.run(initialize_ops)
 
     if checkpoint_dir and checkpoint_filename_with_path:
       raise ValueError("Can not provide both checkpoint_dir and "
-- 
GitLab


From c7237e6070dbf4acd1ade5a40dc676418cbd889b Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 1 Oct 2018 15:10:19 -0700
Subject: [PATCH 0177/1085] Don't generate backward function and delete when
 its not necessary

PiperOrigin-RevId: 215288224
---
 tensorflow/c/eager/tape.h                 | 7 +++----
 tensorflow/python/eager/pywrap_tfe_src.cc | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 41b5b8ff36..5ba55a203f 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -130,7 +130,7 @@ class GradientTape {
       const string& op_type, std::vector<TapeTensor>& output_tensors,
       gtl::ArraySlice<int64> input_tensor_id,
       gtl::ArraySlice<tensorflow::DataType> input_dtypes,
-      BackwardFunction* backward_function,
+      const std::function<BackwardFunction*()>& backward_function_getter,
       const std::function<void(BackwardFunction*)>& backward_function_deleter);
 
   void DeleteTrace(int64 tensor_id);
@@ -206,10 +206,9 @@ void GradientTape<Gradient, BackwardFunction, TapeTensor>::RecordOperation(
     const string& op_type, std::vector<TapeTensor>& output_tensors,
     gtl::ArraySlice<int64> input_tensor_id,
     gtl::ArraySlice<tensorflow::DataType> input_dtypes,
-    BackwardFunction* backward_function,
+    const std::function<BackwardFunction*()>& backward_function_getter,
     const std::function<void(BackwardFunction*)>& backward_function_deleter) {
   if (!ShouldRecord(input_tensor_id, input_dtypes)) {
-    backward_function_deleter(backward_function);
     return;
   }
   std::vector<int64> ids;
@@ -229,7 +228,7 @@ void GradientTape<Gradient, BackwardFunction, TapeTensor>::RecordOperation(
     tensors.push_back(o);
   }
   op_tape_[op_id] = OpTapeEntry<BackwardFunction, TapeTensor>{
-      op_type, std::move(tensors), ids, backward_function,
+      op_type, std::move(tensors), std::move(ids), backward_function_getter(),
       backward_function_deleter};
 }
 
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 4b9f7f4100..ae1e12f9c3 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1567,9 +1567,8 @@ void TapeSetRecordOperation(
   }
 
   for (TFE_Py_Tape* tape : SafeTapeSet()) {
-    auto* function = backward_function_getter();
     tape->tape->RecordOperation(op_type_str, output_info, input_ids,
-                                input_dtypes, function,
+                                input_dtypes, backward_function_getter,
                                 backward_function_killer);
   }
 }
-- 
GitLab


From cca204f12a5838f0ffdd4a80c27d451cf61d3636 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 15:25:27 -0700
Subject: [PATCH 0178/1085] Added option (off by default) to enable a
 higher-performance variant of the Adam optimizer's variable update formula.

PiperOrigin-RevId: 215290881
---
 tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index a43f45554f..8529b48c15 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -62,7 +62,10 @@ message FtrlParameters {
 // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
 // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
 // order to get correct results; a warning will be printed otherwise (which may
-// change to an error in the future).
+// change to an error in the future). If use_max_with_epsilon is set, the Adam
+// variable update formula will be changed from m / (sqrt(v) + epsilon) to
+// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU
+// training and is not expected to harm model quality.
 message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
@@ -70,6 +73,7 @@ message AdamParameters {
   float initial_m = 6;
   float initial_v = 7;
   bool use_non_lazy_adam = 8;
+  bool use_max_with_epsilon = 9;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-- 
GitLab


From 52574f95279d8cd5ec22cfc24668b9586e41367a Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Mon, 1 Oct 2018 15:26:59 -0700
Subject: [PATCH 0179/1085] Remove jemalloc build files and dead configuration
 options.

PiperOrigin-RevId: 215291195
---
 configure.py                                  |   2 -
 tensorflow/BUILD                              |  39 --
 tensorflow/contrib/cmake/CMakeLists.txt       |  11 -
 .../contrib/cmake/external/jemalloc.cmake     |  50 ---
 .../core/platform/default/build_config.bzl    |  20 +-
 tensorflow/core/platform/posix/port.cc        |  36 +-
 tensorflow/core/platform/windows/port.cc      |  51 +--
 tensorflow/tools/lib_package/BUILD            |  16 -
 tensorflow/tools/pip_package/BUILD            |   8 -
 tensorflow/workspace.bzl                      |  12 -
 third_party/jemalloc.BUILD                    | 356 ------------------
 third_party/systemlibs/jemalloc.BUILD         |  30 --
 third_party/systemlibs/syslibs_configure.bzl  |   1 -
 tools/bazel.rc                                |   1 -
 14 files changed, 11 insertions(+), 622 deletions(-)
 delete mode 100644 tensorflow/contrib/cmake/external/jemalloc.cmake
 delete mode 100644 third_party/jemalloc.BUILD
 delete mode 100644 third_party/systemlibs/jemalloc.BUILD

diff --git a/configure.py b/configure.py
index 57d9574d1f..0efa11aa41 100644
--- a/configure.py
+++ b/configure.py
@@ -1493,7 +1493,6 @@ def main():
   setup_python(environ_cp)
 
   if is_windows():
-    environ_cp['TF_NEED_JEMALLOC'] = '0'
     environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
     environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
@@ -1507,7 +1506,6 @@ def main():
     environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
 
   if is_macos():
-    environ_cp['TF_NEED_JEMALLOC'] = '0'
     environ_cp['TF_NEED_TENSORRT'] = '0'
     environ_cp['TF_ENABLE_XLA'] = '0'
 
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 4876b51a6f..9b62a50452 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -203,21 +203,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-# TODO(jhseu): Enable on other platforms other than Linux.
-config_setting(
-    name = "with_jemalloc_linux_x86_64",
-    define_values = {"with_jemalloc": "true"},
-    values = {"cpu": "k8"},
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_jemalloc_linux_ppc64le",
-    define_values = {"with_jemalloc": "true"},
-    values = {"cpu": "ppc"},
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "with_default_optimizations",
     define_values = {"with_default_optimizations": "true"},
@@ -265,30 +250,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-config_setting(
-    name = "with_jemalloc_linux_x86_64_dynamic",
-    define_values = {
-        "with_jemalloc": "true",
-        "framework_shared_object": "true",
-    },
-    values = {
-        "cpu": "k8",
-    },
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "with_jemalloc_linux_ppc64le_dynamic",
-    define_values = {
-        "with_jemalloc": "true",
-        "framework_shared_object": "true",
-    },
-    values = {
-        "cpu": "ppc",
-    },
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "using_cuda_clang",
     define_values = {
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index c6d6f04168..f675c135f4 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -30,7 +30,6 @@ endif()
 
 option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
 option(tensorflow_ENABLE_HDFS_SUPPORT "Enable HDFS support" OFF)
-option(tensorflow_ENABLE_JEMALLOC_SUPPORT "Enable jemalloc support" OFF)
 option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
 option(tensorflow_BUILD_PYTHON_BINDINGS "Build the Python bindings" ON)
 option(tensorflow_BUILD_ALL_KERNELS "Build all OpKernels" ON)
@@ -218,10 +217,6 @@ if (tensorflow_WIN_CPU_SIMD_OPTIONS)
   endif()
 endif()
 
-if (tensorflow_ENABLE_JEMALLOC_SUPPORT)
-  add_definitions(-DTENSORFLOW_USE_JEMALLOC -DJEMALLOC_EXPORT=)
-endif()
-
 # External dependencies
 include(zlib)
 include(gif)
@@ -329,12 +324,6 @@ if(tensorflow_ENABLE_GRPC_SUPPORT)
     list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl)
   endif()
 endif()
-if(tensorflow_ENABLE_JEMALLOC_SUPPORT)
-  include(jemalloc)
-  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${jemalloc_STATIC_LIBRARIES})
-  list(APPEND tensorflow_EXTERNAL_DEPENDENCIES jemalloc)
-  include_directories(${jemalloc_INCLUDE_DIRS})
-endif()
 if(tensorflow_ENABLE_SNAPPY_SUPPORT)
   include(snappy)
   list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES})
diff --git a/tensorflow/contrib/cmake/external/jemalloc.cmake b/tensorflow/contrib/cmake/external/jemalloc.cmake
deleted file mode 100644
index afadcc007d..0000000000
--- a/tensorflow/contrib/cmake/external/jemalloc.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-include (ExternalProject)
-
-set(jemalloc_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include)
-set(jemalloc_URL https://mirror.bazel.build/github.com/jemalloc/jemalloc-cmake/archive/jemalloc-cmake.4.3.1.tar.gz)
-set(jemalloc_HASH SHA256=f9be9a05fe906deb5c1c8ca818071a7d2e27d66fd87f5ba9a7bf3750bcedeaf0)
-set(jemalloc_BUILD ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc)
-
-if (WIN32)
-    set(jemalloc_INCLUDE_DIRS
-        ${jemalloc_INCLUDE_DIRS} 
-        ${CMAKE_CURRENT_BINARY_DIR}/jemalloc/src/jemalloc/include/msvc_compat
-    )
-    if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
-        set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.lib)
-    else()
-        set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/jemalloc.lib)
-    endif()
-else()
-    set(jemalloc_STATIC_LIBRARIES ${jemalloc_BUILD}/Release/jemalloc.a)
-endif()
-
-ExternalProject_Add(jemalloc
-    PREFIX jemalloc
-    URL ${jemalloc_URL}
-    URL_HASH ${jemalloc_HASH}
-    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-    BUILD_IN_SOURCE 1
-    BUILD_BYPRODUCTS ${jemalloc_STATIC_LIBRARIES}
-    BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target jemalloc
-    INSTALL_COMMAND ${CMAKE_COMMAND} -E echo "Skipping install step."
-    CMAKE_CACHE_ARGS
-        -DCMAKE_BUILD_TYPE:STRING=Release
-        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-        -Dwith-jemalloc-prefix:STRING=jemalloc_
-        -Dwithout-export:BOOL=ON
-)
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 3b14757945..d884c1aa7c 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -615,11 +615,7 @@ def tf_kernel_tests_linkstatic():
 
 def tf_additional_lib_defines():
     """Additional defines needed to build TF libraries."""
-    return select({
-        "//tensorflow:with_jemalloc_linux_x86_64": ["TENSORFLOW_USE_JEMALLOC"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["TENSORFLOW_USE_JEMALLOC"],
-        "//conditions:default": [],
-    })
+    return []
 
 def tf_additional_lib_deps():
     """Additional dependencies needed to build TF libraries."""
@@ -631,13 +627,7 @@ def tf_additional_lib_deps():
     ] + if_static(
         ["@nsync//:nsync_cpp"],
         ["@nsync//:nsync_headers"],
-    ) + select({
-        "//tensorflow:with_jemalloc_linux_x86_64_dynamic": ["@jemalloc//:jemalloc_headers"],
-        "//tensorflow:with_jemalloc_linux_ppc64le_dynamic": ["@jemalloc//:jemalloc_headers"],
-        "//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc//:jemalloc_impl"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc//:jemalloc_impl"],
-        "//conditions:default": [],
-    })
+    )
 
 def tf_additional_core_deps():
     return select({
@@ -725,11 +715,7 @@ def tf_additional_binary_deps():
             "//tensorflow/stream_executor:cuda_platform",
             "//tensorflow/core/platform/default/build_config:cuda",
         ],
-    ) + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": ["@jemalloc//:jemalloc_impl"],
-        "//tensorflow:with_jemalloc_linux_ppc64le": ["@jemalloc//:jemalloc_impl"],
-        "//conditions:default": [],
-    }) + [
+    ) + [
         # TODO(allenl): Split these out into their own shared objects (they are
         # here because they are shared between contrib/ op shared objects and
         # core).
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index b46b9927cd..acdd7798ea 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -13,10 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef TENSORFLOW_USE_JEMALLOC
-#include "jemalloc/jemalloc.h"
-#endif
-
 #include "absl/base/internal/sysinfo.h"
 
 #include "tensorflow/core/platform/cpu_info.h"
@@ -101,11 +97,7 @@ void* AlignedMalloc(size_t size, int minimum_alignment) {
   // memory aligned to at least the size of a pointer.
   const int required_alignment = sizeof(void*);
   if (minimum_alignment < required_alignment) return Malloc(size);
-#ifdef TENSORFLOW_USE_JEMALLOC
-  int err = jemalloc_posix_memalign(&ptr, minimum_alignment, size);
-#else
   int err = posix_memalign(&ptr, minimum_alignment, size);
-#endif
   if (err != 0) {
     return nullptr;
   } else {
@@ -116,29 +108,11 @@ void* AlignedMalloc(size_t size, int minimum_alignment) {
 
 void AlignedFree(void* aligned_memory) { Free(aligned_memory); }
 
-void* Malloc(size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_malloc(size);
-#else
-  return malloc(size);
-#endif
-}
+void* Malloc(size_t size) { return malloc(size); }
 
-void* Realloc(void* ptr, size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_realloc(ptr, size);
-#else
-  return realloc(ptr, size);
-#endif
-}
+void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
 
-void Free(void* ptr) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  jemalloc_free(ptr);
-#else
-  free(ptr);
-#endif
-}
+void Free(void* ptr) { free(ptr); }
 
 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
   return AlignedMalloc(size, minimum_alignment);
@@ -146,9 +120,7 @@ void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
 
 void NUMAFree(void* ptr, size_t size) { Free(ptr); }
 
-int NUMAGetMemAffinity(const void* addr) {
-  return kNUMANoAffinity;
-}
+int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
 
 void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
   // No-op.
diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc
index 5375f56372..911ea1902f 100644
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@@ -13,10 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifdef TENSORFLOW_USE_JEMALLOC
-#include "jemalloc/jemalloc.h"
-#endif
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -70,55 +66,16 @@ void NUMASetThreadNodeAffinity(int node) {}
 int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
 
 void* AlignedMalloc(size_t size, int minimum_alignment) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  void* ptr = NULL;
-  // posix_memalign requires that the requested alignment be at least
-  // sizeof(void*). In this case, fall back on malloc which should return
-  // memory aligned to at least the size of a pointer.
-  const int required_alignment = sizeof(void*);
-  if (minimum_alignment < required_alignment) return Malloc(size);
-  int err = jemalloc_posix_memalign(&ptr, minimum_alignment, size);
-  if (err != 0) {
-    return NULL;
-  } else {
-    return ptr;
-  }
-#else
   return _aligned_malloc(size, minimum_alignment);
-#endif
 }
 
-void AlignedFree(void* aligned_memory) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  jemalloc_free(aligned_memory);
-#else
-  _aligned_free(aligned_memory);
-#endif
-}
+void AlignedFree(void* aligned_memory) { _aligned_free(aligned_memory); }
 
-void* Malloc(size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_malloc(size);
-#else
-  return malloc(size);
-#endif
-}
+void* Malloc(size_t size) { return malloc(size); }
 
-void* Realloc(void* ptr, size_t size) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_realloc(ptr, size);
-#else
-  return realloc(ptr, size);
-#endif
-}
+void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
 
-void Free(void* ptr) {
-#ifdef TENSORFLOW_USE_JEMALLOC
-  return jemalloc_free(ptr);
-#else
-  return free(ptr);
-#endif
-}
+void Free(void* ptr) { return free(ptr); }
 
 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
   return AlignedMalloc(size, minimum_alignment);
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index b9f4902639..85514b8629 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -137,14 +137,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
@@ -202,14 +194,6 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index c621812535..3a1c4a45d4 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -167,14 +167,6 @@ filegroup(
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + select({
-        "//tensorflow:with_jemalloc_linux_x86_64": [
-            "@jemalloc//:COPYING",
-        ],
-        "//tensorflow:with_jemalloc_linux_ppc64le": [
-            "@jemalloc//:COPYING",
-        ],
-        "//conditions:default": [],
-    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9b4b698874..bcc89ef729 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -642,18 +642,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         testonly_ = True,
     )
 
-    tf_http_archive(
-        name = "jemalloc",
-        build_file = clean_dep("//third_party:jemalloc.BUILD"),
-        sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8",
-        strip_prefix = "jemalloc-4.4.0",
-        system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
-            "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz",
-        ],
-    )
-
     java_import_external(
         name = "com_google_testing_compile",
         jar_sha256 = "edc180fdcd9f740240da1a7a45673f46f59c5578d8cd3fbc912161f74b5aebb8",
diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD
deleted file mode 100644
index 1b0829b8fe..0000000000
--- a/third_party/jemalloc.BUILD
+++ /dev/null
@@ -1,356 +0,0 @@
-# Description:
-# jemalloc - a general-purpose scalable concurrent malloc implementation
-
-licenses(["notice"])  # BSD
-
-exports_files(["COPYING"])
-
-load("@org_tensorflow//third_party:common.bzl", "template_rule")
-
-cc_library(
-    name = "jemalloc_headers",
-    hdrs = ["include/jemalloc/jemalloc.h"],
-    includes = ["include"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    srcs = [
-        "src/arena.c",
-        "src/atomic.c",
-        "src/base.c",
-        "src/bitmap.c",
-        "src/chunk.c",
-        "src/chunk_dss.c",
-        "src/chunk_mmap.c",
-        "src/ckh.c",
-        "src/ctl.c",
-        "src/extent.c",
-        "src/hash.c",
-        "src/huge.c",
-        "src/jemalloc.c",
-        "src/mb.c",
-        "src/mutex.c",
-        "src/nstime.c",
-        "src/pages.c",
-        "src/prng.c",
-        "src/prof.c",
-        "src/quarantine.c",
-        "src/rtree.c",
-        "src/spin.c",
-        "src/stats.c",
-        "src/tcache.c",
-        "src/tsd.c",
-        "src/util.c",
-        "src/witness.c",
-    ],
-    hdrs = [
-        "include/jemalloc/internal/arena.h",
-        "include/jemalloc/internal/assert.h",
-        "include/jemalloc/internal/atomic.h",
-        "include/jemalloc/internal/base.h",
-        "include/jemalloc/internal/bitmap.h",
-        "include/jemalloc/internal/chunk.h",
-        "include/jemalloc/internal/chunk_dss.h",
-        "include/jemalloc/internal/chunk_mmap.h",
-        "include/jemalloc/internal/ckh.h",
-        "include/jemalloc/internal/ctl.h",
-        "include/jemalloc/internal/extent.h",
-        "include/jemalloc/internal/hash.h",
-        "include/jemalloc/internal/huge.h",
-        "include/jemalloc/internal/jemalloc_internal.h",
-        "include/jemalloc/internal/jemalloc_internal_decls.h",
-        "include/jemalloc/internal/jemalloc_internal_defs.h",
-        "include/jemalloc/internal/jemalloc_internal_macros.h",
-        "include/jemalloc/internal/mb.h",
-        "include/jemalloc/internal/mutex.h",
-        "include/jemalloc/internal/nstime.h",
-        "include/jemalloc/internal/pages.h",
-        "include/jemalloc/internal/ph.h",
-        "include/jemalloc/internal/private_namespace.h",
-        "include/jemalloc/internal/prng.h",
-        "include/jemalloc/internal/prof.h",
-        "include/jemalloc/internal/ql.h",
-        "include/jemalloc/internal/qr.h",
-        "include/jemalloc/internal/quarantine.h",
-        "include/jemalloc/internal/rb.h",
-        "include/jemalloc/internal/rtree.h",
-        "include/jemalloc/internal/size_classes.h",
-        "include/jemalloc/internal/smoothstep.h",
-        "include/jemalloc/internal/spin.h",
-        "include/jemalloc/internal/stats.h",
-        "include/jemalloc/internal/tcache.h",
-        "include/jemalloc/internal/ticker.h",
-        "include/jemalloc/internal/tsd.h",
-        "include/jemalloc/internal/util.h",
-        "include/jemalloc/internal/valgrind.h",
-        "include/jemalloc/internal/witness.h",
-    ],
-    # Same flags that jemalloc uses to build.
-    copts = [
-        "-O3",
-        "-funroll-loops",
-        "-D_GNU_SOURCE",
-        "-D_REENTRANT",
-    ],
-    includes = ["include"],
-    # pthread_atfork() is called for PPC.
-    linkopts = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": [
-            "-lpthread",
-        ],
-        "@org_tensorflow//tensorflow:linux_x86_64": [
-            "-lpthread",
-        ],
-        "//conditions:default": [
-        ],
-    }),
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
-
-sh_binary(
-    name = "jemalloc_sh",
-    srcs = ["include/jemalloc/jemalloc.sh"],
-)
-
-genrule(
-    name = "jemalloc_h",
-    srcs = [
-        ":jemalloc_defs_h",
-        ":jemalloc_macros_h",
-        ":jemalloc_mangle_h",
-        ":jemalloc_protos_h",
-        ":jemalloc_rename_h",
-        ":jemalloc_typedefs_h",
-    ],
-    outs = ["include/jemalloc/jemalloc.h"],
-    cmd = "$(location :jemalloc_sh) $$(dirname $(location :jemalloc_defs_h))/../../ >$@",
-    tools = [":jemalloc_sh"],
-)
-
-# Add to this list if you want to export more symbols from jemalloc.
-genrule(
-    name = "public_symbols_txt",
-    outs = ["include/jemalloc/internal/public_symbols.txt"],
-    cmd = "\n".join([
-        "cat <<'EOF' > $@",
-        "free:jemalloc_free",
-        "malloc:jemalloc_malloc",
-        "posix_memalign:jemalloc_posix_memalign",
-        "realloc:jemalloc_realloc",
-        "EOF",
-    ]),
-)
-
-sh_binary(
-    name = "jemalloc_mangle_sh",
-    srcs = ["include/jemalloc/jemalloc_mangle.sh"],
-)
-
-genrule(
-    name = "jemalloc_mangle_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_mangle.h"],
-    cmd = "$(location :jemalloc_mangle_sh) $(location :public_symbols_txt) je_ >$@",
-    tools = [":jemalloc_mangle_sh"],
-)
-
-sh_binary(
-    name = "jemalloc_rename_sh",
-    srcs = ["include/jemalloc/jemalloc_rename.sh"],
-)
-
-genrule(
-    name = "jemalloc_rename_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/jemalloc_rename.h"],
-    cmd = "$(location :jemalloc_rename_sh) $(location :public_symbols_txt) >$@",
-    tools = [":jemalloc_rename_sh"],
-)
-
-sh_binary(
-    name = "private_namespace_sh",
-    srcs = ["include/jemalloc/internal/private_namespace.sh"],
-)
-
-genrule(
-    name = "private_namespace_h",
-    srcs = ["include/jemalloc/internal/private_symbols.txt"],
-    outs = ["include/jemalloc/internal/private_namespace.h"],
-    cmd = "$(location :private_namespace_sh) $(location include/jemalloc/internal/private_symbols.txt) >$@",
-    tools = [":private_namespace_sh"],
-)
-
-sh_binary(
-    name = "public_namespace_sh",
-    srcs = ["include/jemalloc/internal/public_namespace.sh"],
-)
-
-genrule(
-    name = "public_namespace_h",
-    srcs = [":public_symbols_txt"],
-    outs = ["include/jemalloc/internal/public_namespace.h"],
-    cmd = "$(location :public_namespace_sh) $(location :public_symbols_txt) >$@",
-    tools = [":public_namespace_sh"],
-)
-
-sh_binary(
-    name = "size_classes_sh",
-    srcs = ["include/jemalloc/internal/size_classes.sh"],
-)
-
-# Size classes for Linux x86_64 and ppc64le. Update if adding builds for other
-# architectures. See size_classes.sh for details on the arguments.
-# For default case, kept the arguments same as that of  x86_64 for now.
-genrule(
-    name = "size_classes_h",
-    outs = ["include/jemalloc/internal/size_classes.h"],
-    cmd = select({
-        "@org_tensorflow//tensorflow:linux_ppc64le": "$(location :size_classes_sh) \"3 4\" 3 16 2 >$@",
-        "@org_tensorflow//tensorflow:linux_x86_64": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-        "//conditions:default": "$(location :size_classes_sh) \"3 4\" 3 12 2 >$@",
-    }),
-    tools = [":size_classes_sh"],
-)
-
-template_rule(
-    name = "jemalloc_internal_h",
-    src = "include/jemalloc/internal/jemalloc_internal.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal.h",
-    substitutions = {
-        "@private_namespace@": "je_",
-        "@install_suffix@": "",
-    },
-)
-
-template_rule(
-    name = "jemalloc_internal_defs_h",
-    src = "include/jemalloc/internal/jemalloc_internal_defs.h.in",
-    out = "include/jemalloc/internal/jemalloc_internal_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_PREFIX": "#define JEMALLOC_PREFIX \"jemalloc_\"",
-        "#undef JEMALLOC_CPREFIX": "#define JEMALLOC_CPREFIX \"JEMALLOC_\"",
-        "#undef JEMALLOC_PRIVATE_NAMESPACE": "#define JEMALLOC_PRIVATE_NAMESPACE je_",
-        "#undef CPU_SPINWAIT": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define CPU_SPINWAIT __asm__ volatile(\"or 27,27,27\")",
-            "#else",
-            "#define CPU_SPINWAIT __asm__ volatile(\"pause\")",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_HAVE_BUILTIN_CLZ": "#define JEMALLOC_HAVE_BUILTIN_CLZ",
-        "#undef JEMALLOC_USE_SYSCALL": "#define JEMALLOC_USE_SYSCALL",
-        "#undef JEMALLOC_HAVE_SECURE_GETENV": "#define JEMALLOC_HAVE_SECURE_GETENV",
-        "#undef JEMALLOC_HAVE_PTHREAD_ATFORK": "#define JEMALLOC_HAVE_PTHREAD_ATFORK",
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_HAVE_CLOCK_MONOTONIC\n": "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1\n",
-        "#undef JEMALLOC_THREADED_INIT": "#define JEMALLOC_THREADED_INIT",
-        "#undef JEMALLOC_TLS_MODEL": "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))",
-        "#undef JEMALLOC_CC_SILENCE": "#define JEMALLOC_CC_SILENCE",
-        "#undef JEMALLOC_STATS": "#define JEMALLOC_STATS",
-        "#undef JEMALLOC_TCACHE": "#define JEMALLOC_TCACHE",
-        "#undef JEMALLOC_DSS": "#define JEMALLOC_DSS",
-        "#undef JEMALLOC_FILL": "#define JEMALLOC_FILL",
-        "#undef LG_TINY_MIN": "#define LG_TINY_MIN 3",
-        "#undef LG_PAGE": "\n".join([
-            "#if defined(__powerpc64__) || defined(__powerpc__)",
-            "#define LG_PAGE 16",
-            "#else",
-            "#define LG_PAGE 12",
-            "#endif",
-        ]),
-        "#undef JEMALLOC_MAPS_COALESCE": "#define JEMALLOC_MAPS_COALESCE",
-        "#undef JEMALLOC_TLS": "#define JEMALLOC_TLS",
-        "#undef JEMALLOC_INTERNAL_UNREACHABLE": "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable",
-        "#undef JEMALLOC_INTERNAL_FFSLL": "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll",
-        # Newline required because of substitution conflicts.
-        "#undef JEMALLOC_INTERNAL_FFSL\n": "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl\n",
-        "#undef JEMALLOC_INTERNAL_FFS\n": "#define JEMALLOC_INTERNAL_FFS __builtin_ffs\n",
-        "#undef JEMALLOC_CACHE_OBLIVIOUS": "#define JEMALLOC_CACHE_OBLIVIOUS",
-        "#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY": "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY",
-        "#undef JEMALLOC_HAVE_MADVISE": "#define JEMALLOC_HAVE_MADVISE",
-        "#undef JEMALLOC_PURGE_MADVISE_DONTNEED": "#define JEMALLOC_PURGE_MADVISE_DONTNEED",
-        "#undef JEMALLOC_THP": "#define JEMALLOC_THP",
-        "#undef JEMALLOC_HAS_ALLOCA_H": "#define JEMALLOC_HAS_ALLOCA_H 1",
-        # Newline required because of substitution conflicts.
-        "#undef LG_SIZEOF_INT\n": "#define LG_SIZEOF_INT 2\n",
-        "#undef LG_SIZEOF_LONG\n": "#define LG_SIZEOF_LONG 3\n",
-        "#undef LG_SIZEOF_LONG_LONG": "#define LG_SIZEOF_LONG_LONG 3",
-        "#undef LG_SIZEOF_INTMAX_T": "#define LG_SIZEOF_INTMAX_T 3",
-        "#undef JEMALLOC_GLIBC_MALLOC_HOOK": "#define JEMALLOC_GLIBC_MALLOC_HOOK",
-        "#undef JEMALLOC_GLIBC_MEMALIGN_HOOK": "#define JEMALLOC_GLIBC_MEMALIGN_HOOK",
-        "#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP": "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP",
-        "#undef JEMALLOC_CONFIG_MALLOC_CONF": "#define JEMALLOC_CONFIG_MALLOC_CONF \"\"",
-    },
-)
-
-template_rule(
-    name = "jemalloc_defs_h",
-    src = "include/jemalloc/jemalloc_defs.h.in",
-    out = "include/jemalloc/jemalloc_defs.h",
-    substitutions = {
-        "#undef JEMALLOC_HAVE_ATTR": "#define JEMALLOC_HAVE_ATTR",
-        "#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE": "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF",
-        "#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF": "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF",
-        "#undef JEMALLOC_OVERRIDE_MEMALIGN": "#define JEMALLOC_OVERRIDE_MEMALIGN",
-        "#undef JEMALLOC_OVERRIDE_VALLOC": "#define JEMALLOC_OVERRIDE_VALLOC",
-        "#undef JEMALLOC_USABLE_SIZE_CONST": "#define JEMALLOC_USABLE_SIZE_CONST",
-        "#undef JEMALLOC_USE_CXX_THROW": "#define JEMALLOC_USE_CXX_THROW",
-        "#undef LG_SIZEOF_PTR": "#define LG_SIZEOF_PTR 3",
-    },
-)
-
-template_rule(
-    name = "jemalloc_macros_h",
-    src = "include/jemalloc/jemalloc_macros.h.in",
-    out = "include/jemalloc/jemalloc_macros.h",
-    substitutions = {
-        "@jemalloc_version@": "0.0.0",
-        "@jemalloc_version_major@": "0",
-        "@jemalloc_version_minor@": "0",
-        "@jemalloc_version_bugfix@": "0",
-        "@jemalloc_version_nrev@": "0",
-        "@jemalloc_version_gid@": "0000000000000000000000000000000000000000",
-    },
-)
-
-template_rule(
-    name = "jemalloc_protos_h",
-    src = "include/jemalloc/jemalloc_protos.h.in",
-    out = "include/jemalloc/jemalloc_protos.h",
-    substitutions = {
-        "@aligned_alloc": "aligned_alloc",
-        "@calloc": "calloc",
-        "@cbopaque": "cbopaque",
-        "@dallocx": "dallocx",
-        "@free": "free",
-        "@je": "je",
-        "@mallctl": "mallctl",
-        "@mallctlnametomib": "mallctlnametomib",
-        "@mallctlbymib": "mallctlbymib",
-        "@malloc_stats_print": "malloc_stats_print",
-        "@malloc_usable_size": "malloc_usable_size",
-        "@malloc": "malloc",
-        "@mallocx": "mallocx",
-        "@memalign": "memalign",
-        "@nallocx": "nallocx",
-        "@posix_memalign": "posix_memalign",
-        "@rallocx": "rallocx",
-        "@realloc": "realloc",
-        "@sallocx": "sallocx",
-        "@sdallocx": "sdallocx",
-        "@valloc": "valloc",
-        "@xallocx": "xallocx",
-    },
-)
-
-template_rule(
-    name = "jemalloc_typedefs_h",
-    src = "include/jemalloc/jemalloc_typedefs.h.in",
-    out = "include/jemalloc/jemalloc_typedefs.h",
-    substitutions = {},
-)
diff --git a/third_party/systemlibs/jemalloc.BUILD b/third_party/systemlibs/jemalloc.BUILD
deleted file mode 100644
index 6a48d582ba..0000000000
--- a/third_party/systemlibs/jemalloc.BUILD
+++ /dev/null
@@ -1,30 +0,0 @@
-licenses(["notice"])  # BSD
-
-filegroup(
-    name = "COPYING",
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_headers",
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "jemalloc_impl",
-    linkopts = ["-ljemalloc"],
-    defines = [
-        "jemalloc_posix_memalign=posix_memalign",
-        "jemalloc_malloc=malloc",
-        "jemalloc_realloc=realloc",
-        "jemalloc_free=free",
-    ],
-    visibility = ["//visibility:public"],
-    deps = [":jemalloc_headers"],
-)
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index 8b0ab39eaf..b03d3380d7 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -23,7 +23,6 @@ VALID_LIBS = [
     "gast_archive",
     "gif_archive",
     "grpc",
-    "jemalloc",
     "jpeg",
     "jsoncpp_git",
     "lmdb",
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 0cd148ed87..3734fab715 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -73,7 +73,6 @@ build --define=grpc_no_ares=true
 build --spawn_strategy=standalone
 build --genrule_strategy=standalone
 build -c opt
-build --define=with_jemalloc=false
 
 # Other build flags.
 build --define=grpc_no_ares=true
-- 
GitLab


From 55d96e8ea93407da156c156702a38fd8b5d06b2a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 1 Oct 2018 15:34:08 -0700
Subject: [PATCH 0180/1085] Fix Android builds when using
 --define=with_tflite_flex

PiperOrigin-RevId: 215292521
---
 tensorflow/contrib/lite/delegates/flex/BUILD | 6 +++---
 tensorflow/core/common_runtime/eager/BUILD   | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index bf5d91899c..9dd38958e5 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -20,7 +20,7 @@ cc_library(
         "//tensorflow/contrib/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -60,7 +60,7 @@ cc_library(
         "//tensorflow/contrib/lite:util",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
@@ -178,7 +178,7 @@ cc_library(
         "//tensorflow/contrib/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite_no_runtime",
+            "//tensorflow/core:android_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index be5f3bae3a..7b74c67c85 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -147,10 +147,11 @@ tf_cuda_library(
         "kernel_and_device.h",
     ],
     visibility = ["//tensorflow:internal"],
-    deps = select({
+    deps = [
+        "@farmhash_archive//:farmhash",
+    ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
-            "//util/hash:farmhash_fingerprint",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu_lib",
@@ -219,13 +220,13 @@ tf_cuda_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":kernel_and_device",
+        "@farmhash_archive//:farmhash",
         # Only the TF_AttrType enum is required, so pull in just the C headers.
         # TODO(b/113535673): Break this dependency and avoid the C header completely.
         "//tensorflow/c:c_api_headers",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
-            "//util/hash:farmhash_fingerprint",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu",
-- 
GitLab


From dc4ac1b84c9c74655f04254779516f9968a5c385 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 1 Oct 2018 15:41:29 -0700
Subject: [PATCH 0181/1085] Clean up the build_xla_ops to use the generated C++
 TF op wrappers.

This cleanup will make the future CL implementing lazy compilation simpler.

Includes some supporting changes:

 - Teach NewInternalScope to create a scope that doesn't do shape inference.  We
   need this because we don't have a ShapeRefiner that has been run over the
   entire graph available in the build_xla_ops pass.

 - Add a WithAssignedDevice modifier to tensorflow::Scope.

 - Make cc_op_gen write out an Operation field for nodes which may not
   necessarily have any outputs.  We already did this in most cases, but we
   weren't doing it for nodes that have possibly-empty list outputs.

 - Minor change renaming ops/xla_jit_op.cc to ops/xla_jit_ops.cc, now that we
   have more than one XLA JIT op.

PiperOrigin-RevId: 215293817
---
 tensorflow/cc/framework/cc_op_gen.cc          |  10 +-
 tensorflow/cc/framework/scope.cc              |  33 +++-
 tensorflow/cc/framework/scope.h               |   4 +
 tensorflow/cc/framework/scope_internal.h      |   5 +
 tensorflow/compiler/jit/BUILD                 |   4 +
 tensorflow/compiler/jit/build_xla_ops_pass.cc | 180 ++++++++----------
 .../compiler/jit/build_xla_ops_pass_test.cc   |  32 +++-
 .../encapsulate_xla_computations_pass_test.cc |   2 +-
 tensorflow/compiler/tf2xla/cc/BUILD           |   7 +-
 tensorflow/core/graph/node_builder.cc         |   7 +
 tensorflow/core/graph/node_builder.h          |   4 +
 11 files changed, 174 insertions(+), 114 deletions(-)

diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index a32d1b1eb5..39593370d1 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -853,11 +853,7 @@ void OpInfo::WriteClassDecl(WritableFile* h) const {
     }
   }
 
-  strings::StrAppend(&class_decl, "\n");
-
-  if (output_types.empty()) {
-    strings::StrAppend(&class_decl, "  Operation operation;\n");
-  }
+  strings::StrAppend(&class_decl, "\n  Operation operation;\n");
   for (int i = 0; i < output_types.size(); ++i) {
     strings::StrAppend(&class_decl, "  ", output_types[i], " ", output_names[i],
                        ";\n");
@@ -878,9 +874,11 @@ void OpInfo::GetOutput(string* out) const {
   string return_on_error =
       strings::StrCat("if (!", scope_str, ".ok()) return;");
 
+  strings::StrAppend(out, "  this->operation = Operation(ret);\n");
+
   // No outputs.
   if (graph_op_def.output_arg_size() == 0) {
-    strings::StrAppend(out, "  this->operation = Operation(ret);\n  return;\n");
+    strings::StrAppend(out, "  return;\n");
     return;
   }
   if (graph_op_def.output_arg_size() == 1) {
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 7f6ac4cae7..6abc9e268e 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -62,7 +62,7 @@ Scope::Impl::Impl(const std::shared_ptr<Graph>& graph,
       refiner_(refiner),
       scope_used_(nullptr),
       colocation_constraints_(),
-      disable_shape_inference_(false) {}
+      disable_shape_inference_(refiner_ == nullptr) {}
 
 Scope Scope::NewRootScope() {
   Graph* graph = new Graph(OpRegistry::Global());
@@ -94,6 +94,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -110,6 +111,7 @@ Scope::Impl::Impl(const Scope& other, Tags::OpName, const string& name,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -132,6 +134,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ControlDeps,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -163,6 +166,7 @@ Scope::Impl::Impl(const Scope& other, Tags::SingleUseScope,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -178,6 +182,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ExitOnError)
       exit_on_error_(true),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -194,6 +199,7 @@ Scope::Impl::Impl(const Scope& other, Tags::KernelLabel,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(kernel_label),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(other.impl()->colocation_constraints_),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
@@ -210,12 +216,30 @@ Scope::Impl::Impl(const Scope& other, Tags::Colocate,
       exit_on_error_(other.impl()->exit_on_error_),
       kernel_label_(other.impl()->kernel_label_),
       device_(other.impl()->device_),
+      assigned_device_(other.impl()->assigned_device_),
       colocation_constraints_(
           clear_colocations
               ? std::unordered_set<string>()
               : other.impl()->GetColocationConstraints(colocate_with_op)),
       disable_shape_inference_(other.impl()->disable_shape_inference_) {}
 
+Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice,
+                  const string& assigned_device)
+    : graph_(other.impl()->graph_),
+      status_(other.impl()->status_),
+      name_map_(other.impl()->name_map_),
+      refiner_(other.impl()->refiner_),
+      scope_used_(other.impl()->scope_used_),
+      control_deps_(other.impl()->control_deps_),
+      name_(other.impl()->name_),
+      op_name_(other.impl()->op_name_),
+      exit_on_error_(other.impl()->exit_on_error_),
+      kernel_label_(other.impl()->kernel_label_),
+      device_(other.impl()->device_),
+      assigned_device_(assigned_device),
+      colocation_constraints_(other.impl()->colocation_constraints_),
+      disable_shape_inference_(other.impl()->disable_shape_inference_) {}
+
 std::unordered_set<string> Scope::Impl::GetColocationConstraints(
     const Operation& colocate_with_op) const {
   std::unordered_set<string> current_constraints(colocation_constraints_);
@@ -299,6 +323,9 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const {
   if (!impl()->device_.empty()) {
     builder->Device(impl()->device_);
   }
+  if (!impl()->assigned_device_.empty()) {
+    builder->AssignedDevice(impl()->assigned_device_);
+  }
 }
 
 string Scope::Impl::GetUniqueName(const string& prefix,
@@ -394,6 +421,10 @@ Scope Scope::WithDevice(const string& device) const {
   return Scope(new Impl(*this, Impl::Tags::Device(), device));
 }
 
+Scope Scope::WithAssignedDevice(const string& assigned_device) const {
+  return Scope(new Impl(*this, Impl::Tags::AssignedDevice(), assigned_device));
+}
+
 Scope Scope::ColocateWith(const Operation& op) const {
   return Scope(new Impl(*this, Impl::Tags::Colocate(), op,
                         /* clear_colocations */ false));
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index 30c32bd44b..e307d8989b 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -133,6 +133,10 @@ class Scope {
   /// the device field set to 'device'.
   Scope WithDevice(const string& device) const;
 
+  /// Returns a new scope.  All ops created within the returned scope will have
+  /// their assigned device set to `assigned_device`.
+  Scope WithAssignedDevice(const string& assigned_device) const;
+
   /// Return a new scope. All ops created within the returned scope will be
   /// co-located on the device where op is placed.
   /// NOTE: This function is intended to be use internal libraries only for
diff --git a/tensorflow/cc/framework/scope_internal.h b/tensorflow/cc/framework/scope_internal.h
index 58adaef2e9..514e02e841 100644
--- a/tensorflow/cc/framework/scope_internal.h
+++ b/tensorflow/cc/framework/scope_internal.h
@@ -26,6 +26,8 @@ class ShapeRefiner;
 // graph, status, name_map, and refiner.
 // This is intended to enable the C API (which are used by other language
 // bindings) to create a Scope and access C++ functionality (i.e. gradients).
+//
+// Shape inference is disabled if `refiner` is nullptr.
 Scope NewInternalScope(Graph* graph, Status* status, ShapeRefiner* refiner);
 
 class Scope::Impl {
@@ -58,6 +60,7 @@ class Scope::Impl {
     enum class ExitOnError;
     enum class KernelLabel;
     enum class Colocate;
+    enum class AssignedDevice;
   };
 
   Impl(Graph* graph, Status* status, NameMap* name_map, ShapeRefiner* refiner,
@@ -74,6 +77,7 @@ class Scope::Impl {
   Impl(const Scope& other, Tags::KernelLabel, const string& kernel_label);
   Impl(const Scope& other, Tags::Colocate, const Operation& colocate_with_op,
        bool clear_colocations);
+  Impl(const Scope& other, Tags::AssignedDevice, const string& assigned_device);
 
   std::unordered_set<string> GetColocationConstraints(
       const Operation& colocate_with_op) const;
@@ -107,6 +111,7 @@ class Scope::Impl {
   const bool exit_on_error_ = false;
   const string kernel_label_ = "";
   const string device_ = "";
+  const string assigned_device_ = "";
   const std::unordered_set<string> colocation_constraints_;
 
   // If true, Scope::DoShapeInference() always returns Status:OK().
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 29b60d1dbe..f20270931f 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -385,12 +385,16 @@ cc_library(
         ":shape_inference_helpers",
         ":union_find",
         ":xla_cluster_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:ops",
+        "//tensorflow/cc:scope_internal",
         "//tensorflow/compiler/jit/graphcycles",
         "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
         "//tensorflow/compiler/jit/ops:xla_ops",
         "//tensorflow/compiler/tf2xla:dump_graph",
         "//tensorflow/compiler/tf2xla:resource_operation_table",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:core_cpu",
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 9e3fd93cda..5974696b77 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -14,8 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/jit/build_xla_ops_pass.h"
+#include "absl/algorithm/container.h"
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/common_runtime/function.h"
@@ -31,132 +35,108 @@ limitations under the License.
 #include "tensorflow/core/public/version.h"
 
 namespace tensorflow {
-
-static Status BuildXlaCompileNode(
-    const string& nodename, const string& function_name,
-    const AttrValueMap& function_attr, const string& device_name,
-    const DataTypeVector& constant_dtypes, int num_resources,
-    const DataTypeVector& arg_dtypes, Graph* graph, Node** node) {
-  NodeDef def;
-  def.set_name(graph->NewName(nodename));
-  def.set_op("_XlaCompile");
-  def.set_device(device_name);
-  AddNodeAttr("Tconstants", constant_dtypes, &def);
-  AddNodeAttr("Targs", arg_dtypes, &def);
-  AddNodeAttr("Nresources", num_resources, &def);
-  NameAttrList function;
-  function.set_name(function_name);
-  *function.mutable_attr() = function_attr;
-  AddNodeAttr("function", function, &def);
-
-  Status status;
-  *node = graph->AddNode(def, &status);
-  return status;
+namespace {
+void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
+  std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
+                                     old_node->out_edges().end());
+  for (const Edge* edge : out_edges) {
+    // TODO(sanjoy): This does not update NodeDef inputs.  To be able to update
+    // NodeDef inputs we first need to fix encapsulate_subgraphs_pass to fix up
+    // the NodeDef inputs to the function call nodes.
+    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
+    g->RemoveEdge(edge);
+  }
 }
 
-static Status BuildXlaRunNode(const string& nodename, const string& device_name,
-                              const DataTypeVector& arg_dtypes,
-                              const DataTypeVector& result_dtypes, Graph* graph,
-                              Node** node) {
-  NodeDef def;
-  def.set_name(graph->NewName(nodename));
-  def.set_op("_XlaRun");
-  def.set_device(device_name);
-  AddNodeAttr("Targs", arg_dtypes, &def);
-  AddNodeAttr("Tresults", result_dtypes, &def);
+struct XlaClusterInfo {
+  std::vector<Output> constant_inputs;
+  std::vector<Output> non_constant_inputs;
+  std::vector<Output> resource_inputs;
+  NameAttrList function;
+};
 
-  Status status;
-  *node = graph->AddNode(def, &status);
-  return status;
+Output IncomingEdgeAsOutput(const Edge* e) {
+  return Output(e->src(), e->src_output());
 }
 
-static Status GetXlaAttrs(Node* node, int* num_constant_args,
-                          int* num_resource_args, DataTypeVector* const_dtypes,
-                          DataTypeVector* arg_dtypes) {
+Status GetXlaClusterInfo(Node* n, XlaClusterInfo* result) {
+  int num_constant_inputs, num_resource_inputs;
   TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumConstantArgsAttr, num_constant_args));
+      GetNodeAttr(n->attrs(), kXlaNumConstantArgsAttr, &num_constant_inputs));
   TF_RETURN_IF_ERROR(
-      GetNodeAttr(node->attrs(), kXlaNumResourceArgsAttr, num_resource_args));
+      GetNodeAttr(n->attrs(), kXlaNumResourceArgsAttr, &num_resource_inputs));
 
-  if (*num_constant_args < 0 || *num_resource_args < 0 ||
-      *num_constant_args + *num_resource_args > node->num_inputs()) {
+  if (num_constant_inputs < 0 || num_resource_inputs < 0 ||
+      num_constant_inputs + num_resource_inputs > n->num_inputs()) {
     return errors::InvalidArgument(
         "Invalid number of constant/resource arguments to XLA kernel.");
   }
 
-  const int num_nonconst_args =
-      node->num_inputs() - *num_constant_args - *num_resource_args;
-
-  const DataTypeVector& input_types = node->input_types();
-  std::copy(input_types.begin(), input_types.begin() + *num_constant_args,
-            std::back_inserter(*const_dtypes));
-  std::copy(input_types.begin() + *num_constant_args,
-            input_types.begin() + *num_constant_args + num_nonconst_args,
-            std::back_inserter(*arg_dtypes));
-  return Status::OK();
-}
-
-static void CopyIncomingEdges(Graph* g, Node* old_node, Node* new_node,
-                              int prefix_to_ignore) {
-  for (const Edge* edge : old_node->in_edges()) {
-    if (edge->IsControlEdge()) {
-      g->AddControlEdge(edge->src(), new_node);
-    } else if (edge->dst_input() >= prefix_to_ignore) {
-      g->AddEdge(edge->src(), edge->src_output(), new_node,
-                 edge->dst_input() - prefix_to_ignore);
-    }
-  }
-}
+  int num_non_constant_inputs =
+      n->num_inputs() - num_constant_inputs - num_resource_inputs;
 
-static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
-  std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
-                                     old_node->out_edges().end());
-  for (const Edge* edge : out_edges) {
-    // TODO(sanjoy): This does not update NodeDef inputs.
-    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
-    g->RemoveEdge(edge);
-  }
-}
+  std::vector<const Edge*> input_edges_vector;
+  TF_RETURN_IF_ERROR(n->input_edges(&input_edges_vector));
+  absl::Span<const Edge*> input_edges(input_edges_vector);
 
-static Status ReplaceNodeWithXlaCompileAndRun(Graph* g, Node* n) {
-  int num_constant_args, num_resource_args;
-  DataTypeVector const_dtypes;
-  DataTypeVector arg_dtypes;
+  absl::c_transform(input_edges.subspan(0, num_constant_inputs),
+                    std::back_inserter(result->constant_inputs),
+                    IncomingEdgeAsOutput);
 
-  TF_RETURN_IF_ERROR(GetXlaAttrs(n, &num_constant_args, &num_resource_args,
-                                 &const_dtypes, &arg_dtypes));
+  absl::c_transform(
+      input_edges.subspan(num_constant_inputs, num_non_constant_inputs),
+      std::back_inserter(result->non_constant_inputs), IncomingEdgeAsOutput);
 
-  Node *compile_node, *run_node;
+  absl::c_transform(
+      input_edges.subspan(num_constant_inputs + num_non_constant_inputs,
+                          num_resource_inputs),
+      std::back_inserter(result->resource_inputs), IncomingEdgeAsOutput);
 
-  TF_RETURN_IF_ERROR(BuildXlaCompileNode(
-      n->name(), n->type_string(), n->def().attr(), n->requested_device(),
-      const_dtypes, num_resource_args, arg_dtypes, g, &compile_node));
+  result->function.set_name(n->type_string());
+  *result->function.mutable_attr() = n->def().attr();
+  return Status::OK();
+}
 
-  DataTypeVector arg_dtypes_with_resources = arg_dtypes;
-  for (int i = 0; i < num_resource_args; i++) {
-    arg_dtypes_with_resources.push_back(DT_RESOURCE);
+Status CopyIncomingControlEdges(Graph* g, Node* from, Node* to) {
+  for (const Edge* e : from->in_edges()) {
+    if (e->IsControlEdge()) {
+      g->AddControlEdge(e->src(), to);
+    }
   }
 
-  TF_RETURN_IF_ERROR(BuildXlaRunNode(n->name(), n->requested_device(),
-                                     arg_dtypes_with_resources,
-                                     n->output_types(), g, &run_node));
-
-  compile_node->set_assigned_device_name(n->assigned_device_name());
-  run_node->set_assigned_device_name(n->assigned_device_name());
+  return Status::OK();
+}
 
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/compile_node,
-                    /*prefix_to_ignore=*/0);
-  CopyIncomingEdges(g, /*old_node=*/n, /*new_node=*/run_node,
-                    /*prefix_to_ignore=*/num_constant_args);
+Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) {
+  Status status;
+  Scope root = NewInternalScope(g, &status, /*refiner=*/nullptr)
+                   .NewSubScope(n->name())
+                   .WithDevice(n->requested_device())
+                   .WithAssignedDevice(n->assigned_device_name());
+
+  XlaClusterInfo cluster_info;
+  TF_RETURN_IF_ERROR(GetXlaClusterInfo(n, &cluster_info));
+
+  ops::_XlaCompile xla_compile(root.WithOpName("xla_compile"),
+                               /*constants=*/cluster_info.constant_inputs,
+                               /*args=*/cluster_info.non_constant_inputs,
+                               /*resources=*/cluster_info.resource_inputs,
+                               cluster_info.function);
+  TF_RETURN_IF_ERROR(
+      CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node()));
 
-  // The compilation_key output.
-  g->AddEdge(compile_node, 0, run_node, n->num_inputs() - num_constant_args);
+  std::vector<Output> xla_run_args = cluster_info.non_constant_inputs;
+  absl::c_copy(cluster_info.resource_inputs, std::back_inserter(xla_run_args));
+  ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args,
+                       xla_compile.key, n->output_types());
 
-  MoveOutgoingEdges(g, /*old_node=*/n, /*new_node=*/run_node);
+  MoveOutgoingEdges(g, /*old_node=*/n,
+                    /*new_node=*/xla_run.operation.node());
   g->RemoveNode(n);
 
   return Status::OK();
 }
+}  // namespace
 
 Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
   Graph* graph = options.graph->get();
@@ -170,7 +150,7 @@ Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
     // Only compile nodes that are marked for compilation by the
     // compilation-marking pass (via 'attr_name').
     if (IsXlaCompiledKernel(*n)) {
-      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndRun(graph, n));
+      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(graph, n));
     }
   }
 
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
index b7cb4506b9..9d56db7b6b 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
@@ -56,18 +56,26 @@ Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
 }
 
 Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
-                             const string& node_name, Node** result) {
+                             const string& node_name, int num_constant_args,
+                             int num_resource_args, Node** result) {
   NodeDef call_node;
   call_node.set_name(node_name);
   call_node.set_op(callee_name);
   AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node);
-  AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node);
-  AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node);
+  AddNodeAttr(kXlaNumConstantArgsAttr, num_constant_args, &call_node);
+  AddNodeAttr(kXlaNumResourceArgsAttr, num_resource_args, &call_node);
   Status s;
   *result = graph->AddNode(call_node, &s);
   return s;
 }
 
+Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
+                             const string& node_name, Node** result) {
+  return MakeXlaCompiledKernel(graph, callee_name, node_name,
+                               /*num_constant_args=*/0, /*num_resource_args=*/0,
+                               result);
+}
+
 Node* MakeWrite(const Scope& scope, const string& id) {
   Output var_handle =
       ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({}));
@@ -108,5 +116,23 @@ TEST(BuildXlaOps, ControlDepsPreserved) {
   EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun")))));
 }
 
+TEST(BuildXlaOps, CleanFailureOnBogusAttr) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+  Node* call;
+  TF_ASSERT_OK(
+      MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", 100, 100, &call));
+  Node* write_op = MakeWrite(root, "write");
+  root.graph()->AddControlEdge(call, write_op);
+
+  std::unique_ptr<Graph> graph;
+  Status failure_status = BuildXlaOps(root, &graph);
+  ASSERT_FALSE(failure_status.ok());
+  EXPECT_EQ(failure_status.code(), error::INVALID_ARGUMENT);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
index 479038ac8e..22531a4ace 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/cc/ops/resource_variable_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
-#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h"
 #include "tensorflow/compiler/tf2xla/test_util.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/graph/graph_constructor.h"
diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD
index ea8d1b3d14..adcdb6c8f7 100644
--- a/tensorflow/compiler/tf2xla/cc/BUILD
+++ b/tensorflow/compiler/tf2xla/cc/BUILD
@@ -30,14 +30,15 @@ cc_library(
 
 tf_gen_op_wrapper_cc(
     name = "xla_jit_op_gen",
-    out_ops_file = "ops/xla_jit_op",
+    include_internal_ops = 1,
+    out_ops_file = "ops/xla_jit_ops",
     deps = ["//tensorflow/compiler/jit/ops:xla_ops"],
 )
 
 cc_library(
     name = "xla_jit_ops",
-    srcs = ["ops/xla_jit_op.cc"],
-    hdrs = ["ops/xla_jit_op.h"],
+    srcs = ["ops/xla_jit_ops.cc"],
+    hdrs = ["ops/xla_jit_ops.h"],
     deps = [
         "//tensorflow/cc:const_op",
         "//tensorflow/cc:ops",
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index a446e0d136..d92874909f 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -99,6 +99,11 @@ NodeBuilder& NodeBuilder::Device(StringPiece device_spec) {
   return *this;
 }
 
+NodeBuilder& NodeBuilder::AssignedDevice(StringPiece device) {
+  assigned_device_ = string(device);
+  return *this;
+}
+
 Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const {
   // In case of error, set *created_node to nullptr.
   if (created_node != nullptr) *created_node = nullptr;
@@ -115,6 +120,8 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const {
   Node* node = graph->AddNode(node_def, &status);
   if (!status.ok()) return status;
 
+  node->set_assigned_device_name(assigned_device_);
+
   for (size_t i = 0; i < inputs_.size(); ++i) {
     if (inputs_[i].node != nullptr) {  // Skip back edges.
       graph->AddEdge(inputs_[i].node, inputs_[i].index, node, i);
diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h
index 4727ee7b56..d576985a23 100644
--- a/tensorflow/core/graph/node_builder.h
+++ b/tensorflow/core/graph/node_builder.h
@@ -100,6 +100,9 @@ class NodeBuilder {
   // "assigned device" in the Node).
   NodeBuilder& Device(StringPiece device_spec);
 
+  // Sets the device name in the "assigned device" field in tensorflow::Node.
+  NodeBuilder& AssignedDevice(StringPiece device);
+
   // Set the value of an attr.  attr_name must match the name of one of
   // attrs defined by the Op, and value must have the corresponding type
   // (see SetAttrValue() in ../framework/attr_value_util.h for legal
@@ -141,6 +144,7 @@ class NodeBuilder {
   std::vector<NodeOut> inputs_;
   std::vector<Node*> control_inputs_;
   std::vector<string> errors_;
+  string assigned_device_;
 };
 
 // IMPLEMENTATION -------------------------------------------------------------
-- 
GitLab


From 28a5ce4cf8702a6605e13a99c861ec6f2cd75929 Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Mon, 1 Oct 2018 15:47:52 -0700
Subject: [PATCH 0182/1085]   Improve error message in transpose shape
 inference.

PiperOrigin-RevId: 215294817
---
 tensorflow/compiler/xla/service/shape_inference.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 7194b2cafd..6ccea9d2b5 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -2380,7 +2380,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
       !std::is_permutation(dimensions.begin(), dimensions.end(),
                            indices.begin())) {
     return InvalidArgument(
-        "Transpose dimensions not a permutation of the operand dimensions.");
+        "Transpose dimensions [%s] are not a permutation of the operand "
+        "dimensions (operand shape is %s).",
+        StrJoin(dimensions, ","), ShapeUtil::HumanString(operand));
   }
 
   // Permute(dimensions,input) computes output[dimensions[i]]=input[i]. However,
-- 
GitLab


From 6509437545f8fc973b39489c285811ea8cc8b15a Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Mon, 1 Oct 2018 15:52:16 -0700
Subject: [PATCH 0183/1085] If keras_model_path is google storage url, provide
 util to download model remotely.

PiperOrigin-RevId: 215295504
---
 tensorflow/python/estimator/keras.py      | 48 ++++++++++++++++++++---
 tensorflow/python/estimator/keras_test.py |  6 ---
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 7546771ed3..5d5ed81fbb 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -368,6 +368,44 @@ def _save_first_checkpoint(keras_model, custom_objects, config):
   return latest_path
 
 
+def _get_file_from_google_storage(keras_model_path, model_dir):
+  """Get file from google storage and download to local file.
+
+  Args:
+    keras_model_path: a google storage path for compiled keras model.
+    model_dir: the directory from estimator config.
+
+  Returns:
+    The path where keras model is saved.
+
+  Raises:
+    ValueError: if storage object name does not end with .h5.
+  """
+  try:
+    from google.cloud import storage  # pylint:disable=g-import-not-at-top
+  except ImportError:
+    raise TypeError('Could not save model to Google cloud storage; please '
+                    'install `google-cloud-storage` via '
+                    '`pip install google-cloud-storage`.')
+  storage_client = storage.Client()
+  path, blob_name = os.path.split(keras_model_path)
+  _, bucket_name = os.path.split(path)
+  keras_model_dir = os.path.join(model_dir, 'keras')
+  if not gfile.Exists(keras_model_dir):
+    gfile.MakeDirs(keras_model_dir)
+  file_name = os.path.join(keras_model_dir, 'keras_model.h5')
+  try:
+    blob = storage_client.get_bucket(bucket_name).blob(blob_name)
+    blob.download_to_filename(file_name)
+  except:
+    raise ValueError('Failed to download keras model, please check '
+                     'environment variable GOOGLE_APPLICATION_CREDENTIALS '
+                     'and model path storage.googleapis.com/{bucket}/{object}.')
+  logging.info('Saving model to {}'.format(file_name))
+  del storage_client
+  return file_name
+
+
 def model_to_estimator(keras_model=None,
                        keras_model_path=None,
                        custom_objects=None,
@@ -407,12 +445,13 @@ def model_to_estimator(keras_model=None,
         'Please specity either `keras_model` or `keras_model_path`, '
         'but not both.')
 
+  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+      config, model_dir)
   if not keras_model:
     if keras_model_path.startswith(
         'gs://') or 'storage.googleapis.com' in keras_model_path:
-      raise ValueError(
-          '%s is not a local path. Please copy the model locally first.' %
-          keras_model_path)
+      keras_model_path = _get_file_from_google_storage(keras_model_path,
+                                                       config.model_dir)
     logging.info('Loading models from %s', keras_model_path)
     keras_model = models.load_model(keras_model_path)
   else:
@@ -425,9 +464,6 @@ def model_to_estimator(keras_model=None,
         'Please compile the model with `model.compile()` '
         'before calling `model_to_estimator()`.')
 
-  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(config,
-                                                                      model_dir)
-
   keras_model_fn = _create_keras_model_fn(keras_model, custom_objects)
   if _any_weight_initialized(keras_model):
     # Warn if config passed to estimator tries to update GPUOptions. If a
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
index 288f9b8906..4e285fa25a 100644
--- a/tensorflow/python/estimator/keras_test.py
+++ b/tensorflow/python/estimator/keras_test.py
@@ -581,12 +581,6 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(ValueError, 'compiled'):
         keras_lib.model_to_estimator(keras_model=keras_model)
 
-    with self.cached_session():
-      keras_model = simple_sequential_model()
-      with self.assertRaisesRegexp(ValueError, 'not a local path'):
-        keras_lib.model_to_estimator(
-            keras_model_path='gs://bucket/object')
-
   def test_invalid_ionames_error(self):
     (x_train, y_train), (_, _) = testing_utils.get_test_data(
         train_samples=_TRAIN_SIZE,
-- 
GitLab


From 8559bc2c4c7616c5da8b4f7a3e1405c549a6068d Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@google.com>
Date: Mon, 1 Oct 2018 15:58:21 -0700
Subject: [PATCH 0184/1085] Add email comment explicitly authorizing
 distributions/special_math.py be released under Apache 2.0.

PiperOrigin-RevId: 215296386
---
 .../python/ops/distributions/special_math.py  | 61 ++++++++++++++++++-
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py
index 31b7a36fd3..ccc667cae3 100644
--- a/tensorflow/python/ops/distributions/special_math.py
+++ b/tensorflow/python/ops/distributions/special_math.py
@@ -12,6 +12,62 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+
+# Functions "ndtr" and "ndtri" are derived from calculations made in:
+# https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
+# In the following email exchange, the author gives his consent to redistribute
+# derived works under an Apache 2.0 license.
+#
+# From: Stephen Moshier <steve@moshier.net>
+# Date: Sat, Jun 9, 2018 at 2:36 PM
+# Subject: Re: Licensing cephes under Apache (BSD-like) license.
+# To: rif <rif@google.com>
+#
+#
+#
+# Hello Rif,
+#
+# Yes, Google may distribute Cephes files under the Apache 2 license.
+#
+# If clarification is needed, I do not favor BSD over other free licenses.
+# I would agree that Apache 2 seems to cover the concern you mentioned
+# about sublicensees.
+#
+# Best wishes for good luck with your projects!
+# Steve Moshier
+#
+#
+#
+# On Thu, 31 May 2018, rif wrote:
+#
+# > Hello Steve.
+# > My name is Rif. I work on machine learning software at Google.
+# >
+# > Your cephes software continues to be incredibly useful and widely used. I
+# > was wondering whether it would be permissible for us to use the Cephes code
+# > under the Apache 2.0 license, which is extremely similar in permissions to
+# > the BSD license (Wikipedia comparisons). This would be quite helpful to us
+# > in terms of avoiding multiple licenses on software.
+# >
+# > I'm sorry to bother you with this (I can imagine you're sick of hearing
+# > about this by now), but I want to be absolutely clear we're on the level and
+# > not misusing your important software. In former conversation with Eugene
+# > Brevdo (ebrevdo@google.com), you wrote "If your licensing is similar to BSD,
+# > the formal way that has been handled is simply to add a statement to the
+# > effect that you are incorporating the Cephes software by permission of the
+# > author." I wanted to confirm that (a) we could use the Apache license, (b)
+# > that we don't need to (and probably you don't want to) keep getting
+# > contacted about individual uses, because your intent is generally to allow
+# > this software to be reused under "BSD-like" license, and (c) you're OK
+# > letting incorporators decide whether a license is sufficiently BSD-like?
+# >
+# > Best,
+# >
+# > rif
+# >
+# >
+# >
+
 """Special Math Ops."""
 
 from __future__ import absolute_import
@@ -135,7 +191,7 @@ def _ndtri(p):
 
   # Constants used in piece-wise rational approximations. Taken from the cephes
   # library:
-  # https://github.com/scipy/scipy/blob/master/scipy/special/cephes/ndtri.c
+  # https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
   p0 = list(reversed([-5.99633501014107895267E1,
                       9.80010754185999661536E1,
                       -5.66762857469070293439E1,
@@ -305,7 +361,8 @@ def log_ndtr(x, series_order=3, name="log_ndtr"):
     else:
       raise TypeError("x.dtype=%s is not supported." % x.dtype)
 
-    # The basic idea here was ported from py/scipy/special/cephes/ndtr.c.
+    # The basic idea here was ported from:
+    #   https://root.cern.ch/doc/v608/SpecFuncCephesInv_8cxx_source.html
     # We copy the main idea, with a few changes
     # * For x >> 1, and X ~ Normal(0, 1),
     #     Log[P[X < x]] = Log[1 - P[X < -x]] approx -P[X < -x],
-- 
GitLab


From 55f561e6740d61b3665594babce4be72ad955bc6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:07:09 -0700
Subject: [PATCH 0185/1085] Small tweaks to comments and documentation strings.

PiperOrigin-RevId: 215297961
---
 tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index 5c27d59f82..ef2f8dd36d 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -46,7 +46,7 @@ namespace tensorflow {
 // 5. TPUEmbeddingActivations, when used with appropriate Python libraries,
 //    enables the automatic differentiation of models that use embeddings.
 // 6. TPUEmbeddingSendGradients takes a list of Tensors (of the same shapes
-//    as those returned by TPUEmbeddingReceivActivations) containing gradients
+//    as those returned by TPUEmbeddingReceiveActivations) containing gradients
 //    to use in updating the embedding tables.
 // 7. Before saving a checkpoint, use the TPUEmbeddingRetrieve Op to update
 //    the Graph's embedding table Variables from the updated tables in the
@@ -147,7 +147,7 @@ parameters that are loaded from a checkpoint before a training loop is
 executed.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  TPUEmbeddingConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
 table_id: Index of this table in the EmbeddingLayerConfiguration proto
@@ -283,7 +283,7 @@ the correct embedding table configuration. For example, this op is
 used to retrieve updated parameters before saving a checkpoint.
 %s
 table_name: Name of this table; must match a name in the
-  EmbeddingLayerConfiguration proto (overrides table_id).
+  TPUEmbeddingConfiguration proto (overrides table_id).
 num_shards: Number of shards into which the embedding tables are divided.
 shard_id: Identifier of shard for this operation.
 table_id: Index of this table in the EmbeddingLayerConfiguration proto
-- 
GitLab


From 24333d8e55bdd995089e93122750340bf8d1ddba Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 1 Oct 2018 16:09:45 -0700
Subject: [PATCH 0186/1085] [TF/XLA] Optimize
 `Encapsulator::GetFunctionNameAttr()`.

The previous version was hitting a very slow path in `GetNodeAttr()`, which is expensive when the named attr is not found. This change inlines the logic of finding the two relevant attrs inside `GetFunctionNameAttr()` and avoids constructing a status object with a serialized `NodeDef` when the attr can't be found.

PiperOrigin-RevId: 215298411
---
 .../jit/encapsulate_subgraphs_pass.cc         | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index 15faf31077..d165341f21 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -1363,28 +1363,31 @@ void Encapsulator::Subgraph::GetOutsideCompilationSubgraphNames(
 
 Status Encapsulator::GetFunctionNameAttr(
     Node const* node, string* attr, string* outside_compilation_attr) const {
-  Status s = GetNodeAttr(node->attrs(), group_attribute_, attr);
-  if (s.code() == error::Code::NOT_FOUND) {
-    // Return empty attr if there's no group_attribute.
-    attr->clear();
-  } else {
-    TF_RETURN_IF_ERROR(s);
-  }
-  bool has_group_attr = s.ok();
-  s = GetNodeAttr(node->attrs(), outside_compilation_attribute_,
-                  outside_compilation_attr);
-  if (s.code() == error::Code::NOT_FOUND) {
-    // Return empty attr if there's no outside_compilation attribute.
-    outside_compilation_attr->clear();
-  } else {
-    TF_RETURN_IF_ERROR(s);
-    if (!has_group_attr) {
-      return errors::InvalidArgument(
-          "Node ", node->name(), " has ", outside_compilation_attribute_,
-          " attribute but no ", group_attribute_, " attribute.");
+  AttrSlice attrs = node->attrs();
+  attr->clear();
+  outside_compilation_attr->clear();
+  bool found_group_attribute = false;
+  bool found_outside_compilation_attribute = false;
+  for (const auto& node_attr : attrs) {
+    if (node_attr.first == group_attribute_) {
+      TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string"));
+      *attr = node_attr.second.s();
+      found_group_attribute = true;
+    } else if (node_attr.first == outside_compilation_attribute_) {
+      TF_RETURN_IF_ERROR(AttrValueHasType(node_attr.second, "string"));
+      *outside_compilation_attr = node_attr.second.s();
+      found_outside_compilation_attribute = true;
     }
+    if (found_group_attribute && found_outside_compilation_attribute) break;
+  }
+
+  if (found_outside_compilation_attribute && !found_group_attribute) {
+    return errors::InvalidArgument(
+        "Node ", node->name(), " has ", outside_compilation_attribute_,
+        " attribute but no ", group_attribute_, " attribute.");
+  } else {
+    return Status::OK();
   }
-  return Status::OK();
 }
 
 bool IsInSubgraph(const string& func_id, const string& outside_compilation_id) {
-- 
GitLab


From 49bbfec04b729960999ef054e3acab719631b101 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:16:43 -0700
Subject: [PATCH 0187/1085] Override implementation of log survival for
 Exponential distribution to better handle small values.

PiperOrigin-RevId: 215299532
---
 .../distributions/exponential_test.py            | 16 ++++++++++++++++
 .../python/ops/distributions/exponential.py      |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/tensorflow/python/kernel_tests/distributions/exponential_test.py b/tensorflow/python/kernel_tests/distributions/exponential_test.py
index 27d1291912..367f8bb0f1 100644
--- a/tensorflow/python/kernel_tests/distributions/exponential_test.py
+++ b/tensorflow/python/kernel_tests/distributions/exponential_test.py
@@ -81,6 +81,22 @@ class ExponentialTest(test.TestCase):
     expected_cdf = stats.expon.cdf(x, scale=1 / lam_v)
     self.assertAllClose(self.evaluate(cdf), expected_cdf)
 
+  def testExponentialLogSurvival(self):
+    batch_size = 7
+    lam = constant_op.constant([2.0] * batch_size)
+    lam_v = 2.0
+    x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0, 10.0], dtype=np.float32)
+
+    exponential = exponential_lib.Exponential(rate=lam)
+
+    log_survival = exponential.log_survival_function(x)
+    self.assertEqual(log_survival.get_shape(), (7,))
+
+    if not stats:
+      return
+    expected_log_survival = stats.expon.logsf(x, scale=1 / lam_v)
+    self.assertAllClose(self.evaluate(log_survival), expected_log_survival)
+
   def testExponentialMean(self):
     lam_v = np.array([1.0, 4.0, 2.5])
     exponential = exponential_lib.Exponential(rate=lam_v)
diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py
index 4325a14449..02129b5e2a 100644
--- a/tensorflow/python/ops/distributions/exponential.py
+++ b/tensorflow/python/ops/distributions/exponential.py
@@ -114,6 +114,9 @@ class Exponential(gamma.Gamma):
   def rate(self):
     return self._rate
 
+  def _log_survival_function(self, value):
+    return self._log_prob(value) - math_ops.log(self._rate)
+
   def _sample_n(self, n, seed=None):
     shape = array_ops.concat([[n], array_ops.shape(self._rate)], 0)
     # Uniform variates must be sampled from the open-interval `(0, 1)` rather
-- 
GitLab


From bb1f9e1a57c8bc18325b3c86298be96e6647a0a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 16:31:13 -0700
Subject: [PATCH 0188/1085] Change semantics of DistributionStrategy.update()
 to make sure the output depends on the updates across all mirrors. Before
 this change, update() would return a Mirrored value that where each component
 was an update to a single mirror. This caused a problem since for reading
 purposes other DistributionStrategy methods would consider it okay to read
 any single component, and so if you for example did something like
 session.run(strategy.update(...)) it would only perform the update on one
 replica. The fix is to have the output be a Mirrored value that is actually
 the identity operation returning the output on that device, but that has a
 control dependency making sure that the update actually happens on all the
 replicas. This fix was already present in MirroredVariable._assign_func, this
 CL moves the fix into update() and generalizes it to multiple return values.

To disable this new grouping behavior, you may now pass
"grouped=False" to update(). For example, some callers (like Optimizer)
are performing a lot of updates and they prefer to group all of them
together at once for performance reasons.  In this case, we still want
to make sure the caller executes the update on all replicas, so we
return an unwrapped value instead of a Mirrored value. This has the
happy side effect of removing a bunch of unwrap calls in client code,
since unwrapping was the only safe way to use the Mirrored value we
used to return.

PiperOrigin-RevId: 215301909
---
 .../collective_all_reduce_strategy_test.py    |  3 +-
 .../distribute/python/mirrored_strategy.py    | 12 +++--
 .../python/mirrored_strategy_multigpu_test.py |  2 +-
 .../distribute/python/one_device_strategy.py  | 17 +++++--
 .../python/parameter_server_strategy.py       | 22 ++++++--
 .../python/parameter_server_strategy_test.py  |  3 +-
 .../distribute/python/strategy_test_lib.py    |  6 ++-
 .../contrib/distribute/python/tpu_strategy.py | 36 ++++++++-----
 .../contrib/distribute/python/values.py       | 36 ++++++++-----
 .../contrib/optimizer_v2/optimizer_v2.py      | 32 +++++-------
 tensorflow/python/training/distribute.py      | 51 +++++++++++--------
 .../training/distribution_strategy_context.py |  2 +
 tensorflow/python/training/optimizer.py       | 10 ++--
 13 files changed, 144 insertions(+), 88 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 33ffbf6abe..6796a23d46 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -128,7 +128,8 @@ class CollectiveAllReduceStrategyTestBase(
             # TODO(yuefengz): support non-Mirrored variable as destinations.
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(
+                d.update(v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 4d7516063c..6bd380a22d 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -627,9 +627,11 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
     return self._get_cross_tower_ops().batch_reduce(aggregation,
                                                     value_destination_pairs)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     # TODO(josh11b): In eager mode, use one thread per device.
     assert isinstance(var, values.DistributedVariable)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     updates = {}
     for d, v in var._index.items():  # pylint: disable=protected-access
       name = "update_%d" % self._device_index.get(d)
@@ -638,10 +640,12 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
         updates[d] = fn(v,
                         *values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
-    return values.regroup(updates, values.Mirrored)
+    return values.update_regroup(self, updates, should_group)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     assert isinstance(colocate_with, list)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     # TODO(josh11b): In eager mode, use one thread per device.
     updates = {}
     for d in colocate_with:
@@ -649,7 +653,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
       with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
         updates[d] = fn(*values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
-    return values.regroup(updates, values.Mirrored)
+    return values.update_regroup(self, updates, should_group)
 
   def read_var(self, tower_local_var):
     """Read the aggregate value of a tower-local variable."""
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index f51e543624..eeac528329 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -826,7 +826,7 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
       with dist.scope():
         ret_v_sum = dist.call_for_each_tower(model_fn, run_concurrently=False)
-        update_ops = dist.unwrap(dist.update(ret_v_sum, update, 5.0))
+        update_ops = dist.update(ret_v_sum, update, 5.0, grouped=False)
 
         # Initialize variables.
         self.evaluate(variables.global_variables_initializer())
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index 23b220f64b..f525919048 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -141,14 +141,21 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy):
       else:
         assert False
 
-  def _update(self, var, fn, *args, **kwargs):
-    with ops.device(self._device), distribute_lib.UpdateContext(self._device):
-      return fn(var, *args, **kwargs)
+  def _update(self, var, options, fn, *args, **kwargs):
+    # The implementations of _update() and _update_non_slot() are identical
+    # except _update() passes `var` as the first argument to `fn()`.
+    return self._update_non_slot(var, options, fn, var, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     del colocate_with
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.device(self._device), distribute_lib.UpdateContext(self._device):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def read_var(self, tower_local_var):
     """Read the aggregate value of a tower-local variable."""
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index 1125d027f6..6ddd91507b 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -343,21 +343,33 @@ class ParameterServerStrategy(distribute_lib.DistributionStrategy):
 
     return nest.map_structure(_select_fn, structured)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     if isinstance(var, values.AggregatingVariable):
       var = var.get()
     if not isinstance(var, resource_variable_ops.ResourceVariable):
       raise ValueError(
           "You can not update `var` %r. It must be a Variable." % var)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.colocate_with(var), distribute_lib.UpdateContext(var.device):
-      return fn(var, *self._select_single_value(args),
-                **self._select_single_value(kwargs))
+      result = fn(var, *self._select_single_value(args),
+                  **self._select_single_value(kwargs))
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   # TODO(yuefengz): does it need to call _select_single_value?
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     with ops.device(
         colocate_with.device), distribute_lib.UpdateContext(colocate_with):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def _unwrap(self, val):
     if isinstance(val, values.DistributedValues):
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 12789e0bc9..353d11a583 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -395,7 +395,8 @@ class ParameterServerStrategyTestBase(
             # TODO(yuefengz): support non-Mirrored variable as destinations.
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(
+                d.update(v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py
index 5d498fb629..fd280f5754 100644
--- a/tensorflow/contrib/distribute/python/strategy_test_lib.py
+++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py
@@ -115,7 +115,8 @@ class DistributionTestBase(test.TestCase):
           with ops.control_dependencies([fetched]):
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(d.update(
+                v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
@@ -169,7 +170,8 @@ class DistributionTestBase(test.TestCase):
           with ops.control_dependencies([fetched]):
             g = d.reduce(
                 variable_scope.VariableAggregation.SUM, g, destinations=v)
-            with ops.control_dependencies(d.unwrap(d.update(v, update, g))):
+            with ops.control_dependencies(d.update(
+                v, update, g, grouped=False)):
               after_list.append(d.read_var(v))
         return before_list, after_list
 
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 1b555482d3..c3c7df3cd8 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -297,6 +297,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       # For outputs that have already been aggregated, take the first value
       # from the list as each value should be the same. Else return the full
       # list of values.
+      # TODO(josh11b): If aggregation is NONE, we should return a PerDevice value.
       if aggregation is not variables_lib.VariableAggregation.NONE:
         # TODO(priyag): Should this return the element or a list with 1 element
         last_step_tensor_outputs_dict[name] = output[0]
@@ -398,11 +399,16 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       return output * (1. / len(value))
     return output
 
-  def _update(self, var, fn, *args, **kwargs):
-    # TODO(jhseu): Consider supporting grouped==False.
+  def _update(self, var, options, fn, *args, **kwargs):
     assert isinstance(var, values.TPUMirroredVariable)
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
+
     if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
-      return fn(var, *args, **kwargs)
+      if should_group:
+        return fn(var, *args, **kwargs)
+      else:
+        return [fn(var, *args, **kwargs)]
 
     # Otherwise, we revert to MirroredStrategy behavior and update each variable
     # directly.
@@ -414,23 +420,25 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
         updates[d] = fn(v,
                         *values.select_device_mirrored(d, args),
                         **values.select_device_mirrored(d, kwargs))
+    return values.update_regroup(self, updates, should_group)
 
-    # Make a single control dependency to keep the variables mirrored. If one
-    # assignment is fetched, then run all assignments.
-    sorted_keys = sorted(updates.keys())
-    update_tuple = control_flow_ops.tuple([updates[d] for d in sorted_keys])
-    for i, d in enumerate(sorted_keys):
-      updates[d] = update_tuple[i]
-    return values.regroup(updates, values.Mirrored)
+  # TODO(josh11b): Need to implement _update_non_slot()!
 
   def read_var(self, var):
     assert isinstance(var, values.TPUMirroredVariable)
     return var.read_value()
 
-  def _unwrap(self, value):
-    if isinstance(value, list):
-      return value
-    return [value]
+  def _unwrap(self, val):
+    if isinstance(val, values.DistributedValues):
+      # Return in a deterministic order.
+      return [val.get(device=d) for d in sorted(val.devices)]
+    elif isinstance(val, list):
+      # TODO(josh11b): We need to remove this case; per device values should
+      # be represented using a PerDevice wrapper instead of a list with
+      # one entry per device.
+      return val
+    return [val]
+
 
   @property
   def num_towers(self):
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index c18faeb67d..18ceba42c2 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -366,18 +366,7 @@ class MirroredVariable(DistributedVariable, Mirrored,
       # We are calling assign on the mirrored variable in cross tower context,
       # use update to update the variable.
       strategy = distribution_strategy_context.get_distribution_strategy()
-      updates = strategy.update(self, f, *args, **kwargs)
-      grouped = strategy.group(updates)
-      if isinstance(updates, DistributedValues) and updates.is_tensor_like:
-        # Make sure we run all updates. Without this, something like
-        # session.run(mirrored_var.assign*(...)) may only update one tower.
-        index = {}
-        for d in updates.devices:
-          with ops.device(d), ops.control_dependencies([grouped]):
-            index[d] = array_ops.identity(updates.get(d))
-        return Mirrored(index)
-      else:
-        return grouped
+      return strategy.update(self, f, *args, **kwargs)
     else:
       _assert_tower_context()
       # We are calling an assign function on the mirrored variable in tower
@@ -1049,6 +1038,29 @@ def select_device_mirrored(device, structured):
   return nest.map_structure(_get_mirrored, structured)
 
 
+def update_regroup(strategy, updates, should_group):
+  """Regroup for an update, with dependencies to ensure all updates execute."""
+  regrouped = regroup(updates, Mirrored)
+  if not should_group:
+    return nest.map_structure(strategy.unwrap, regrouped)
+  grouped_flat = []
+  for u in nest.flatten(regrouped):
+    if isinstance(u, DistributedValues):
+      g = strategy.group(u)
+      if u.is_tensor_like:
+        # Make sure we run all updates. Without this, something like
+        # session.run(strategy.update(...)) may only update one tower.
+        index = {}
+        for d in u.devices:
+          with ops.device(d), ops.control_dependencies([g]):
+            index[d] = array_ops.identity(u.get(d))
+        g = Mirrored(index)
+    else:
+      g = u
+    grouped_flat.append(g)
+  return nest.pack_sequence_as(regrouped, grouped_flat)
+
+
 class PerDeviceDataIterator(object):
   """An iterator (like `tf.data.Iterator`) into a `PerDeviceDataset`."""
 
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 6af59dcfbf..53e27c08c4 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -30,7 +30,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import distribute as distribute_lib
@@ -965,8 +964,7 @@ class OptimizerV2(optimizer_v1.Optimizer):
       # Use the processors to update the variables.
       update_ops = []
       for grad, var in grads_and_vars:
-        update_ops.extend(distribution.unwrap(distribution.update(
-            var, update, grad)))
+        update_ops.extend(distribution.update(var, update, grad, grouped=False))
 
       # Give the child class a chance to do something after applying
       # gradients
@@ -978,26 +976,24 @@ class OptimizerV2(optimizer_v1.Optimizer):
 
       update_ops = control_flow_ops.group(update_ops)
       with ops.control_dependencies([update_ops]):
-        finish_updates = distribution.update_non_slot(non_slot_devices, finish)
-      if finish_updates is None:
-        finish_updates = update_ops
+        finish_updates = distribution.update_non_slot(
+            non_slot_devices, finish, grouped=False)
+      # We said grouped=False, which means finish_updates is always a list.
+      # It will be [None] when finish() returns None.
+      if finish_updates == [None]:
+        finish_updates = [update_ops]
 
       # Update `global_step` (if any).
       if global_step is None:
         apply_updates = distribution.group(finish_updates, name=name)
       else:
-        with ops.control_dependencies(distribution.unwrap(finish_updates)):
-
-          def update_global_step(global_step):
-            if isinstance(global_step, resource_variable_ops.ResourceVariable):
-              return global_step.assign_add(
-                  ops.convert_to_tensor(1, dtype=global_step.dtype),
-                  read_value=False)
-            else:
-              return state_ops.assign_add(global_step, 1)
-
-          apply_updates = distribution.group(
-              distribution.update(global_step, update_global_step), name=name)
+        with ops.control_dependencies(finish_updates):
+
+          def update_global_step(global_step, name):
+            return global_step.assign_add(1, read_value=False, name=name)
+
+          apply_updates = distribution.update(
+              global_step, update_global_step, name)
 
       # Add the training op to the TRAIN_OP graph collection in graph mode.
       if not eager_execution:
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index 419a9ec12b..a92a1bdee7 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -26,7 +26,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.platform import tf_logging
@@ -807,15 +806,22 @@ class DistributionStrategy(object):
       var: Variable, possibly mirrored to multiple devices, to operate on.
       fn: Function to call. Should take the variable as the first argument.
       *args: Additional positional arguments to pass to `fn()`.
-      **kwargs: Keyword arguments to pass to `fn()`.
+      **kwargs: Keyword arguments to pass to `fn()`. If "grouped=False" is
+        specified, the return value will be unwrapped.
 
     Returns:
-      Merged return value of `fn` across all towers.
+      By default, the merged return value of `fn` across all towers.  The merged
+      result has dependencies to make sure that if it is evaluated at all, the
+      side effects (updates) will happen on every tower. If instead
+      "grouped=False" is specified, this function will return a nest of lists
+      where each list has an element per tower, and the caller is responsible
+      for ensuring all elements are executed.
     """
     _require_cross_tower_context(self)
-    return self._update(var, fn, *args, **kwargs)
+    options = {"grouped": kwargs.pop("grouped", True)}
+    return self._update(var, options, fn, *args, **kwargs)
 
-  def _update(self, var, fn, *args, **kwargs):
+  def _update(self, var, options, fn, *args, **kwargs):
     raise NotImplementedError("must be implemented in descendants")
 
   def update_non_slot(self, colocate_with, fn, *args, **kwargs):
@@ -825,15 +831,18 @@ class DistributionStrategy(object):
       colocate_with: The return value of `non_slot_devices()`.
       fn: Function to execute.
       *args: Positional arguments to pass to `fn()`.
-      **kwargs: Keyword arguments to pass to `fn()`.
+      **kwargs: Keyword arguments to pass to `fn()`. If "grouped=False" is
+        specified, the return value will be unwrapped and the caller is
+        responsible for ensuring all elements are executed.
 
     Returns:
       Return value of `fn`, possibly merged across devices.
     """
     _require_cross_tower_context(self)
-    return self._update_non_slot(colocate_with, fn, *args, **kwargs)
+    options = {"grouped": kwargs.pop("grouped", True)}
+    return self._update_non_slot(colocate_with, options, fn, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
     raise NotImplementedError("must be implemented in descendants")
 
   def unwrap(self, value):
@@ -1134,17 +1143,22 @@ class _DefaultDistributionStrategy(DistributionStrategy):
     del aggregation, destinations
     return value
 
-  def _update(self, var, fn, *args, **kwargs):
-    # TODO(josh11b): Figure out what we should be passing to UpdateContext()
-    # once that value is used for something.
-    with ops.colocate_with(var), UpdateContext(var):
-      return fn(var, *args, **kwargs)
+  def _update(self, var, options, fn, *args, **kwargs):
+    # The implementations of _update() and _update_non_slot() are identical
+    # except _update() passes `var` as the first argument to `fn()`.
+    return self._update_non_slot(var, options, fn, var, *args, **kwargs)
 
-  def _update_non_slot(self, colocate_with, fn, *args, **kwargs):
+  def _update_non_slot(self, colocate_with, options, fn, *args, **kwargs):
+    should_group = options.pop("grouped")
+    assert not options  # Validate that we are processing all of the options.
     # TODO(josh11b): Figure out what we should be passing to UpdateContext()
     # once that value is used for something.
     with ops.colocate_with(colocate_with), UpdateContext(colocate_with):
-      return fn(*args, **kwargs)
+      result = fn(*args, **kwargs)
+      if should_group:
+        return result
+      else:
+        return nest.map_structure(self._unwrap, result)
 
   def read_var(self, tower_local_var):
     return array_ops.identity(tower_local_var)
@@ -1193,13 +1207,10 @@ class _DefaultDistributionStrategy(DistributionStrategy):
 def increment_var(v, amount=1):
   """`v += amount`, distributed-aware version."""
   def update(vu):
-    if isinstance(vu, resource_variable_ops.ResourceVariable):
-      return vu.assign_add(amount, read_value=False)
-    else:
-      return state_ops.assign_add(vu, amount)
+    return vu.assign_add(amount, read_value=False)
 
   def merge_fn(dist, vm):
-    return dist.group(dist.update(vm, update))
+    return dist.update(vm, update)
 
   tower_context = distribution_strategy_context.get_tower_context()
   return tower_context.merge_call(merge_fn, v)
diff --git a/tensorflow/python/training/distribution_strategy_context.py b/tensorflow/python/training/distribution_strategy_context.py
index 998b5c35ce..ce580a406f 100644
--- a/tensorflow/python/training/distribution_strategy_context.py
+++ b/tensorflow/python/training/distribution_strategy_context.py
@@ -89,6 +89,7 @@ def get_tower_context():
   """Returns the current TowerContext or None if in a cross-tower context.
 
   Note that execution:
+
   1. starts in the default (single-tower) tower context (this function
      will return the default TowerContext object);
   2. switches to cross-tower context (in which case this will return
@@ -121,6 +122,7 @@ def get_cross_tower_context():
   """Returns the current DistributionStrategy if in a cross-tower context.
 
   Note that execution:
+
   1. starts in the default (single-tower) tower context;
   2. switches to cross-tower context when entering a
      `with DistributionStrategy.scope():` block;
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 30b0ed20c8..47034919e1 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -692,7 +692,7 @@ class Optimizer(
       update_ops = [
           op
           for grad, var in grads_and_vars
-          for op in distribution.unwrap(distribution.update(var, update, grad))
+          for op in distribution.update(var, update, grad, grouped=False)
       ]
 
       def finish(self, update_ops):
@@ -700,13 +700,13 @@ class Optimizer(
 
       non_slot_devices = distribution.non_slot_devices(var_list)
       finish_updates = distribution.update_non_slot(
-          non_slot_devices, finish, self, update_ops)
+          non_slot_devices, finish, self, update_ops, grouped=False)
       if global_step is None:
         apply_updates = distribution.group(finish_updates, name=name)
       else:
-        with ops.control_dependencies(distribution.unwrap(finish_updates)):
-          apply_updates = distribution.group(distribution.update(
-              global_step, state_ops.assign_add, 1, name=name))
+        with ops.control_dependencies(finish_updates):
+          apply_updates = distribution.update(
+              global_step, state_ops.assign_add, 1, name=name)
 
       if not context.executing_eagerly():
         if isinstance(apply_updates, ops.Tensor):
-- 
GitLab


From b72265dc002e712fc3d0f33434f13c7a36a484b2 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 1 Oct 2018 16:45:11 -0700
Subject: [PATCH 0189/1085] [tf.data] Deprecate `tf.contrib.data` and introduce
 `tf.data.experimental` to replace it.

This change prepares `tf.data` for TensorFlow 2.0, where `tf.contrib` will no longer exist. It retains the pre-existing endpoints in `tf.contrib.data` with deprecation warnings.

Note there are some exceptions to the move:

* Deprecated symbols in `tf.contrib.data` have not been moved to `tf.data.experimental`, because replacements already exist.
* `tf.contrib.data.LMDBDataset` has not been moved, because we plan to move it to a SIG-maintained repository.
* `tf.contrib.data.assert_element_shape()` has not yet been moved, because it depends on functionality in `tf.contrib`, and it will move in a later change.
* `tf.contrib.data.AUTOTUNE` has not yet been moved, because we have not yet determined how to `tf_export()` a Python integer.
* The stats-related API endpoints have not yet appeared in a released version of TensorFlow, so these are moved to `tf.data.experimental` without retaining an endpoint in `tf.contrib.data`.

In addition, this change includes some build rule and ApiDef refactoring:
* Some of the "//third_party/tensorflow/python:training" dependencies had to be split in order to avoid a circular dependency.
* The `tf.contrib.stateless` ops now have a private core library for the generated wrappers (and accordingly are hidden in their ApiDef) so that `tf.data.experimental.sample_from_datasets()` can depend on them.

PiperOrigin-RevId: 215304249
---
 tensorflow/contrib/bigtable/README.md         |   4 +-
 .../bigtable/python/ops/bigtable_api.py       |   4 +-
 tensorflow/contrib/cmake/python_modules.txt   |   1 -
 tensorflow/contrib/data/README.md             |  18 +-
 tensorflow/contrib/data/__init__.py           |  11 +-
 .../contrib/data/python/kernel_tests/BUILD    | 560 +----------
 .../kernel_tests/assert_element_shape_test.py | 226 +++++
 .../kernel_tests/reduce_dataset_test.py       |  62 ++
 .../kernel_tests/window_dataset_op_test.py    | 527 ----------
 tensorflow/contrib/data/python/ops/BUILD      | 170 +---
 .../contrib/data/python/ops/batching.py       | 549 +----------
 tensorflow/contrib/data/python/ops/counter.py |  13 +-
 .../contrib/data/python/ops/enumerate_ops.py  |  15 +-
 .../contrib/data/python/ops/error_ops.py      |  37 +-
 .../data/python/ops/get_single_element.py     |  29 +-
 .../contrib/data/python/ops/grouping.py       | 441 +--------
 .../contrib/data/python/ops/interleave_ops.py | 149 +--
 .../contrib/data/python/ops/iterator_ops.py   | 167 +---
 .../contrib/data/python/ops/parsing_ops.py    | 107 +--
 .../data/python/ops/prefetching_ops.py        | 486 +---------
 .../contrib/data/python/ops/random_ops.py     |  34 +-
 tensorflow/contrib/data/python/ops/readers.py | 674 +------------
 .../contrib/data/python/ops/resampling.py     | 260 +----
 .../contrib/data/python/ops/scan_ops.py       | 137 +--
 .../contrib/data/python/ops/shuffle_ops.py    |  56 +-
 .../contrib/data/python/ops/threadpool.py     |  88 +-
 tensorflow/contrib/data/python/ops/unique.py  |  43 +-
 tensorflow/contrib/data/python/ops/writers.py |  40 +-
 .../distribute/python/prefetching_ops_v2.py   |   2 +-
 tensorflow/contrib/eager/python/datasets.py   |   4 +-
 .../contrib/eager/python/datasets_test.py     |   6 +-
 .../python/examples/revnet/imagenet_input.py  |  12 +-
 .../estimator/python/estimator/rnn_test.py    |   2 +-
 tensorflow/contrib/lookup/lookup_ops_test.py  |   2 +-
 tensorflow/contrib/stateless/BUILD            |   8 +-
 tensorflow/contrib/stateless/__init__.py      |   5 +-
 tensorflow/contrib/tpu/python/tpu/datasets.py |   4 +-
 tensorflow/contrib/tpu/tpu_estimator.md       |   2 +-
 tensorflow/contrib/training/BUILD             |   2 +-
 .../training/tensor_queue_dataset_test.py     |   2 +-
 .../api_def_StatelessMultinomial.pbtxt        |   4 +
 .../api_def_StatelessRandomNormal.pbtxt       |   4 +
 .../api_def_StatelessRandomUniform.pbtxt      |   4 +
 .../api_def_StatelessTruncatedNormal.pbtxt    |   4 +
 .../examples/get_started/regression/test.py   |   2 +-
 tensorflow/python/BUILD                       |  34 +
 tensorflow/python/data/BUILD                  |   1 +
 tensorflow/python/data/__init__.py            |   1 +
 tensorflow/python/data/experimental/BUILD     |  16 +
 .../python/data/experimental/__init__.py      | 109 +++
 .../data/experimental/kernel_tests/BUILD      | 569 +++++++++++
 .../kernel_tests/batch_dataset_op_test.py     | 317 +-----
 .../kernel_tests/bucketing_test.py            |   2 +-
 .../kernel_tests/csv_dataset_op_test.py       |   4 +-
 .../dataset_constructor_op_test.py            |   2 +-
 .../dataset_serialization_test_base.py        |   2 +-
 .../directed_interleave_dataset_test.py       |   4 +-
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../kernel_tests/get_single_element_test.py   |  30 +-
 .../kernel_tests/indexed_dataset_ops_test.py  |   2 +-
 .../interleave_dataset_op_test.py             |   2 +-
 .../kernel_tests/iterator_ops_test.py         |   2 +-
 .../kernel_tests/map_dataset_op_test.py       |   6 +-
 .../kernel_tests/map_defun_op_test.py         |   2 +-
 .../kernel_tests/optimization/BUILD           |  30 +-
 .../assert_next_dataset_op_test.py            |   2 +-
 .../optimization/hoist_random_uniform_test.py |   2 +-
 .../optimization/latency_all_edges_test.py    |   6 +-
 .../map_and_filter_fusion_test.py             |   2 +-
 .../optimization/map_parallelization_test.py  |   2 +-
 .../optimization/map_vectorization_test.py    |   2 +-
 .../optimization/model_dataset_op_test.py     |   4 +-
 .../optimization/noop_elimination_test.py     |   2 +-
 .../optimization/optimize_dataset_op_test.py  |   2 +-
 .../kernel_tests/parsing_ops_test.py          |   3 +-
 .../kernel_tests/prefetching_ops_test.py      |   2 +-
 .../kernel_tests/range_dataset_op_test.py     |   4 +-
 .../kernel_tests/reader_dataset_ops_test.py   |   4 +-
 .../reader_dataset_ops_test_base.py           |   2 +-
 .../kernel_tests/resample_test.py             |   2 +-
 .../kernel_tests/scan_dataset_op_test.py      |   2 +-
 .../kernel_tests/serialization/BUILD          |  46 +-
 .../batch_dataset_serialization_test.py       |   4 +-
 .../cache_dataset_serialization_test.py       |   2 +-
 .../concatenate_dataset_serialization_test.py |   2 +-
 .../csv_dataset_serialization_test.py         |   4 +-
 .../dataset_constructor_serialization_test.py |   2 +-
 .../dataset_serialization_test_base.py        | 692 ++++++++++++++
 .../filter_dataset_serialization_test.py      |   2 +-
 ...ength_record_dataset_serialization_test.py |   4 +-
 .../flat_map_dataset_serialization_test.py    |   2 +-
 .../group_by_reducer_serialization_test.py    |   4 +-
 .../group_by_window_serialization_test.py     |   4 +-
 .../ignore_errors_serialization_test.py       |   4 +-
 .../interleave_dataset_serialization_test.py  |   2 +-
 ...ap_and_batch_dataset_serialization_test.py |   4 +-
 .../map_dataset_serialization_test.py         |   2 +-
 .../optimize_dataset_serialization_test.py    |   4 +-
 ...padded_batch_dataset_serialization_test.py |   2 +-
 ...l_interleave_dataset_serialization_test.py |   4 +-
 ...parallel_map_dataset_serialization_test.py |   4 +-
 ...arse_example_dataset_serialization_test.py |   4 +-
 .../prefetch_dataset_serialization_test.py    |   2 +-
 .../range_dataset_serialization_test.py       |   2 +-
 ...sample_from_datasets_serialization_test.py |   4 +-
 .../scan_dataset_serialization_test.py        |   4 +-
 .../sequence_dataset_serialization_test.py    |   2 +-
 .../serialization_integration_test.py         |   2 +-
 ...e_and_repeat_dataset_serialization_test.py |   4 +-
 .../shuffle_dataset_serialization_test.py     |   4 +-
 .../sql_dataset_serialization_test.py         |   6 +-
 .../stats_dataset_serialization_test.py       |   4 +-
 .../textline_dataset_serialization_test.py    |   4 +-
 .../tf_record_dataset_serialization_test.py   |   4 +-
 .../unbatch_dataset_serialization_test.py     |   4 +-
 .../unique_dataset_serialization_test.py      |   4 +-
 .../zip_dataset_serialization_test.py         |   2 +-
 .../serialization_integration_test.py         |  85 ++
 .../kernel_tests/shuffle_dataset_op_test.py   |   2 +-
 .../kernel_tests/sql_dataset_op_test.py       |   2 +-
 .../kernel_tests/sql_dataset_op_test_base.py  |   2 +-
 .../kernel_tests/stats_dataset_ops_test.py    |   4 +-
 .../kernel_tests/stats_dataset_test_base.py   |   0
 .../threadpool_dataset_ops_test.py            |   4 +-
 .../kernel_tests/unique_dataset_op_test.py    |   2 +-
 .../kernel_tests/writer_ops_test.py           |   2 +-
 tensorflow/python/data/experimental/ops/BUILD | 377 ++++++++
 .../python/data/experimental/ops/batching.py  | 669 +++++++++++++
 .../python/data/experimental/ops/counter.py   |  55 ++
 .../data/experimental/ops/enumerate_ops.py    |  60 ++
 .../python/data/experimental/ops/error_ops.py |  78 ++
 .../experimental/ops/get_single_element.py    |  72 ++
 .../python/data/experimental/ops/grouping.py  | 551 +++++++++++
 .../experimental}/ops/indexed_dataset_ops.py  |   0
 .../data/experimental/ops/interleave_ops.py   | 262 +++++
 .../data/experimental/ops/iterator_ops.py     | 268 ++++++
 .../data/experimental}/ops/map_defun.py       |   0
 .../data/experimental}/ops/optimization.py    |   0
 .../data/experimental/ops/parsing_ops.py      | 152 +++
 .../data/experimental/ops/prefetching_ops.py  | 531 ++++++++++
 .../data/experimental/ops/random_ops.py       |  54 ++
 .../python/data/experimental/ops/readers.py   | 904 ++++++++++++++++++
 .../data/experimental/ops/resampling.py       | 296 ++++++
 .../python/data/experimental/ops/scan_ops.py  | 177 ++++
 .../data/experimental/ops/shuffle_ops.py      | 102 ++
 .../data/experimental}/ops/stats_ops.py       |  14 +-
 .../data/experimental/ops/threadpool.py       | 104 ++
 .../python/data/experimental/ops/unique.py    |  79 ++
 .../python/data/experimental/ops/writers.py   |  60 ++
 tensorflow/python/data/ops/dataset_ops.py     |   4 +-
 tensorflow/python/data/ops/optional_ops.py    |   4 +-
 tensorflow/python/data/ops/readers.py         |   4 +-
 .../debug/examples/debug_tflearn_iris.py      |  14 +-
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 ...ntal.-checkpoint-input-pipeline-hook.pbtxt |  30 +
 ...erimental.-csv-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-csv-dataset.pbtxt | 127 +++
 ...nsorflow.data.experimental.-optional.pbtxt |  28 +
 ...mental.-random-dataset.__metaclass__.pbtxt |  14 +
 ...ow.data.experimental.-random-dataset.pbtxt | 127 +++
 ...ensorflow.data.experimental.-reducer.pbtxt |  21 +
 ...erimental.-sql-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-sql-dataset.pbtxt | 127 +++
 ....data.experimental.-stats-aggregator.pbtxt |  13 +
 ...data.experimental.-t-f-record-writer.pbtxt |  13 +
 .../v1/tensorflow.data.experimental.pbtxt     | 139 +++
 .../tools/api/golden/v1/tensorflow.data.pbtxt |   4 +
 ...ntal.-checkpoint-input-pipeline-hook.pbtxt |  30 +
 ...erimental.-csv-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-csv-dataset.pbtxt | 127 +++
 ...nsorflow.data.experimental.-optional.pbtxt |  28 +
 ...mental.-random-dataset.__metaclass__.pbtxt |  14 +
 ...ow.data.experimental.-random-dataset.pbtxt | 127 +++
 ...ensorflow.data.experimental.-reducer.pbtxt |  21 +
 ...erimental.-sql-dataset.__metaclass__.pbtxt |  14 +
 ...rflow.data.experimental.-sql-dataset.pbtxt | 127 +++
 ....data.experimental.-stats-aggregator.pbtxt |  13 +
 ...data.experimental.-t-f-record-writer.pbtxt |  13 +
 .../v2/tensorflow.data.experimental.pbtxt     | 139 +++
 .../tools/api/golden/v2/tensorflow.data.pbtxt |   4 +
 tensorflow/tools/pip_package/BUILD            |   4 +-
 182 files changed, 8389 insertions(+), 4960 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
 delete mode 100644 tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
 create mode 100644 tensorflow/python/data/experimental/BUILD
 create mode 100644 tensorflow/python/data/experimental/__init__.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/BUILD
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/batch_dataset_op_test.py (67%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/bucketing_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/csv_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/dataset_constructor_op_test.py (97%)
 rename tensorflow/{contrib/data/python/kernel_tests/serialization => python/data/experimental/kernel_tests}/dataset_serialization_test_base.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/directed_interleave_dataset_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/filter_dataset_op_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/get_single_element_test.py (76%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/indexed_dataset_ops_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/interleave_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/iterator_ops_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/map_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/map_defun_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/BUILD (81%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/assert_next_dataset_op_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/hoist_random_uniform_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/latency_all_edges_test.py (91%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_and_filter_fusion_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_parallelization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/map_vectorization_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/model_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/noop_elimination_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/optimization/optimize_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/parsing_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/prefetching_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/range_dataset_op_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/reader_dataset_ops_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/reader_dataset_ops_test_base.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/resample_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/scan_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/BUILD (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/batch_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/cache_dataset_serialization_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/concatenate_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/csv_dataset_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/dataset_constructor_serialization_test.py (97%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/filter_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/flat_map_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/group_by_reducer_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/group_by_window_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/ignore_errors_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/interleave_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py (94%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/map_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/optimize_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/padded_batch_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parallel_map_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/parse_example_dataset_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/prefetch_dataset_serialization_test.py (93%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/range_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sample_from_datasets_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/scan_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sequence_dataset_serialization_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/serialization_integration_test.py (97%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/shuffle_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/sql_dataset_serialization_test.py (88%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/stats_dataset_serialization_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/textline_dataset_serialization_test.py (90%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/tf_record_dataset_serialization_test.py (95%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/unbatch_dataset_serialization_test.py (91%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/unique_dataset_serialization_test.py (89%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/zip_dataset_serialization_test.py (94%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/shuffle_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/sql_dataset_op_test.py (99%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/sql_dataset_op_test_base.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/stats_dataset_ops_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/stats_dataset_test_base.py (100%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/threadpool_dataset_ops_test.py (96%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/unique_dataset_op_test.py (98%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/writer_ops_test.py (98%)
 create mode 100644 tensorflow/python/data/experimental/ops/BUILD
 create mode 100644 tensorflow/python/data/experimental/ops/batching.py
 create mode 100644 tensorflow/python/data/experimental/ops/counter.py
 create mode 100644 tensorflow/python/data/experimental/ops/enumerate_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/error_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/get_single_element.py
 create mode 100644 tensorflow/python/data/experimental/ops/grouping.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/indexed_dataset_ops.py (100%)
 create mode 100644 tensorflow/python/data/experimental/ops/interleave_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/iterator_ops.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/map_defun.py (100%)
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/optimization.py (100%)
 create mode 100644 tensorflow/python/data/experimental/ops/parsing_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/prefetching_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/random_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/readers.py
 create mode 100644 tensorflow/python/data/experimental/ops/resampling.py
 create mode 100644 tensorflow/python/data/experimental/ops/scan_ops.py
 create mode 100644 tensorflow/python/data/experimental/ops/shuffle_ops.py
 rename tensorflow/{contrib/data/python => python/data/experimental}/ops/stats_ops.py (92%)
 create mode 100644 tensorflow/python/data/experimental/ops/threadpool.py
 create mode 100644 tensorflow/python/data/experimental/ops/unique.py
 create mode 100644 tensorflow/python/data/experimental/ops/writers.py
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt

diff --git a/tensorflow/contrib/bigtable/README.md b/tensorflow/contrib/bigtable/README.md
index f33eaf7e3d..2c44abed5e 100644
--- a/tensorflow/contrib/bigtable/README.md
+++ b/tensorflow/contrib/bigtable/README.md
@@ -203,7 +203,7 @@ def interleave_fn(index):
   start = tf.string_join(['training_data_', start_idx_str])
   end = tf.string_join(['training_data_', end_idx_str])
   return table.scan_range(start_idx, end_idx, columns=columns)
-ds = ds.apply(tf.contrib.data.parallel_interleave(
+ds = ds.apply(tf.data.experimental.parallel_interleave(
     interleave_fn, cycle_length=NUM_PARALLEL_READS, prefetch_input_elements=1))
 ```
 
@@ -249,7 +249,7 @@ def make_row_key_dataset():
    - ...
    - fake-data-23498103
   """
-  counter_dataset = tf.contrib.data.Counter()
+  counter_dataset = tf.data.experimental.Counter()
   width = 8
   row_key_prefix = 'fake-data-'
   ds = counter_dataset.map(lambda index: tf.as_string(index,
diff --git a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
index cf56822ff4..7c87b0daeb 100644
--- a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
+++ b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
@@ -31,8 +31,8 @@ from six import iteritems
 from six import string_types
 
 from tensorflow.contrib.bigtable.ops import gen_bigtable_ops
-from tensorflow.contrib.data.python.ops import interleave_ops
 from tensorflow.contrib.util import loader
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -228,7 +228,7 @@ class BigtableTable(object):
     """Retrieves a sampling of row keys from the Bigtable table.
 
     This dataset is most often used in conjunction with
-    `tf.contrib.data.parallel_interleave` to construct a set of ranges for
+    `tf.data.experimental.parallel_interleave` to construct a set of ranges for
     scanning in parallel.
 
     Returns:
diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt
index 9b80eb559f..6e72670142 100644
--- a/tensorflow/contrib/cmake/python_modules.txt
+++ b/tensorflow/contrib/cmake/python_modules.txt
@@ -134,7 +134,6 @@ tensorflow/contrib/cudnn_rnn/python/ops
 tensorflow/contrib/data
 tensorflow/contrib/data/python
 tensorflow/contrib/data/python/kernel_tests
-tensorflow/contrib/data/python/kernel_tests/serialization
 tensorflow/contrib/data/python/ops
 tensorflow/contrib/decision_trees
 tensorflow/contrib/decision_trees/proto
diff --git a/tensorflow/contrib/data/README.md b/tensorflow/contrib/data/README.md
index 848782e8d8..90be7a66ca 100644
--- a/tensorflow/contrib/data/README.md
+++ b/tensorflow/contrib/data/README.md
@@ -1,10 +1,12 @@
 `tf.contrib.data` API
 =====================
 
-NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead.
-We are continuing to support existing code using the `tf.contrib.data` APIs in
-the current version of TensorFlow, but will eventually remove support. The
-`tf.data` APIs are subject to backwards compatibility guarantees.
+NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead,
+or `tf.data.experimental` for the experimental transformations previously hosted
+in this module. We are continuing to support existing code using the
+`tf.contrib.data` APIs in the current version of TensorFlow, but will eventually
+remove support. The non-experimental `tf.data` APIs are subject to backwards
+compatibility guarantees.
 
 Porting your code to `tf.data`
 ------------------------------
@@ -25,13 +27,13 @@ instead apply them using `Dataset.apply()` transformation. The full list of
 changes is as follows:
 
 * `dataset.dense_to_sparse_batch(...)` is now
-  `dataset.apply(tf.contrib.data.dense_to_sparse_batch(...)`.
+  `dataset.apply(tf.data.experimental.dense_to_sparse_batch(...)`.
 * `dataset.enumerate(...)` is now
-  `dataset.apply(tf.contrib.data.enumerate_dataset(...))`.
+  `dataset.apply(tf.data.experimental.enumerate_dataset(...))`.
 * `dataset.group_by_window(...)` is now
-  `dataset.apply(tf.contrib.data.group_by_window(...))`.
+  `dataset.apply(tf.data.experimental.group_by_window(...))`.
 * `dataset.ignore_errors()` is now
-  `dataset.apply(tf.contrib.data.ignore_errors())`.
+  `dataset.apply(tf.data.experimental.ignore_errors())`.
 * `dataset.unbatch()` is now `dataset.apply(tf.contrib.data.unbatch())`.
 
 The `Dataset.make_dataset_resource()` and `Iterator.dispose_op()` methods have
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 3cb51279c3..c3d3e981fa 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -96,10 +96,6 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datase
 from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave
 from tensorflow.contrib.data.python.ops.iterator_ops import CheckpointInputPipelineHook
 from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator
-
-# Optimization constant that can be used to enable auto-tuning.
-from tensorflow.contrib.data.python.ops.optimization import AUTOTUNE
-
 from tensorflow.contrib.data.python.ops.parsing_ops import parse_example_dataset
 from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device
 from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device
@@ -114,11 +110,12 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample
 from tensorflow.contrib.data.python.ops.scan_ops import scan
 from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat
 from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch
-from tensorflow.contrib.data.python.ops.stats_ops import latency_stats
-from tensorflow.contrib.data.python.ops.stats_ops import set_stats_aggregator
-from tensorflow.contrib.data.python.ops.stats_ops import StatsAggregator
 from tensorflow.contrib.data.python.ops.unique import unique
 from tensorflow.contrib.data.python.ops.writers import TFRecordWriter
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.python.data.experimental.ops.optimization import AUTOTUNE
+
 from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
 from tensorflow.python.data.ops.optional_ops import Optional
 # pylint: enable=unused-import
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 33784afa3f..42f538b4ba 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -8,51 +8,17 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_test(
-    name = "batch_dataset_op_test",
-    size = "medium",
-    srcs = ["batch_dataset_op_test.py"],
+    name = "assert_element_shape_test",
+    srcs = ["assert_element_shape_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",  # (b/79552534)
-        "no_pip",
-    ],
     deps = [
         "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
         "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "bucketing_test",
-    size = "medium",
-    srcs = ["bucketing_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python/data/kernel_tests:test_base",
@@ -61,147 +27,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "csv_dataset_op_test",
-    size = "medium",
-    srcs = ["csv_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:error_ops",
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/eager:context",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "dataset_constructor_op_test",
-    size = "medium",
-    srcs = ["dataset_constructor_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "nomac",  # b/62040583
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-py_test(
-    name = "directed_interleave_dataset_test",
-    size = "medium",
-    srcs = ["directed_interleave_dataset_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:random_seed",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "get_single_element_test",
-    size = "small",
-    srcs = ["get_single_element_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:get_single_element",
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "indexed_dataset_ops_test",
-    srcs = ["indexed_dataset_ops_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:indexed_dataset_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "interleave_dataset_op_test",
-    size = "medium",
-    srcs = ["interleave_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "notap",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@six_archive//:six",
-    ],
-)
-
-py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/estimator:estimator_py",
-    ],
-)
-
 py_test(
     name = "lmdb_dataset_op_test",
     size = "medium",
@@ -229,252 +54,18 @@ py_test(
 )
 
 py_test(
-    name = "map_dataset_op_test",
-    size = "medium",
-    srcs = ["map_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "noasan",  # times out
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:error_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "filter_dataset_op_test",
-    size = "medium",
-    srcs = ["filter_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "map_defun_op_test",
+    name = "reduce_dataset_test",
     size = "small",
-    srcs = ["map_defun_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    srcs = ["reduce_dataset_test.py"],
     deps = [
-        "//tensorflow/contrib/data/python/ops:map_defun",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:data_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python/data/kernel_tests:test_base",
-    ],
-)
-
-py_test(
-    name = "parsing_ops_test",
-    size = "small",
-    srcs = ["parsing_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:parsing_ops",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "prefetching_ops_test",
-    size = "small",
-    srcs = ["prefetching_ops_test.py"],
-    additional_deps = [
-        "//tensorflow/contrib/data/python/ops:prefetching_ops",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/compat:compat",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = ["no_windows_gpu"],
-)
-
-py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/data/python/ops:counter",
-        "//tensorflow/contrib/data/python/ops:enumerate_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_library(
-    name = "reader_dataset_ops_test_base",
-    testonly = 1,
-    srcs = [
-        "reader_dataset_ops_test_base.py",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = [
-        "//tensorflow/contrib/data/python/kernel_tests:__pkg__",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/core:protos_all_py",
+        "//tensorflow/contrib/data/python/ops:get_single_element",
+        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
-py_test(
-    name = "reader_dataset_ops_test",
-    size = "medium",
-    srcs = ["reader_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "resample_test",
-    size = "medium",
-    srcs = ["resample_test.py"],
-    shard_count = 2,
-    srcs_version = "PY2AND3",
-    tags = [
-        "noasan",
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:resampling",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:util",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-    ],
-)
-
-py_test(
-    name = "scan_dataset_op_test",
-    size = "small",
-    srcs = ["scan_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:scan_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/eager:context",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "shuffle_dataset_op_test",
-    size = "medium",
-    srcs = ["shuffle_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "optonly",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:shuffle_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
     ],
 )
 
@@ -496,142 +87,3 @@ py_test(
         "@absl_py//absl/testing:parameterized",
     ],
 )
-
-py_library(
-    name = "sql_dataset_op_test_base",
-    srcs = ["sql_dataset_op_test_base.py"],
-    srcs_version = "PY2AND3",
-    visibility = [
-        "//tensorflow/contrib/data/python/kernel_tests:__pkg__",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:__pkg__",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:readers",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "@org_sqlite//:python",
-    ],
-)
-
-py_test(
-    name = "sql_dataset_op_test",
-    size = "small",
-    srcs = ["sql_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":sql_dataset_op_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-    ],
-)
-
-py_test(
-    name = "stats_dataset_ops_test",
-    size = "medium",
-    srcs = ["stats_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        ":stats_dataset_test_base",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "stats_dataset_test_base",
-    srcs = ["stats_dataset_test_base.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/data/kernel_tests:test_base",
-    ],
-)
-
-py_test(
-    name = "threadpool_dataset_ops_test",
-    size = "small",
-    srcs = ["threadpool_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:threadpool",
-        "//tensorflow/contrib/data/python/ops:unique",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "unique_dataset_op_test",
-    size = "small",
-    srcs = ["unique_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:unique",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_test(
-    name = "window_dataset_op_test",
-    size = "medium",
-    srcs = ["window_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-    ],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-py_test(
-    name = "writer_ops_test",
-    size = "small",
-    srcs = ["writer_ops_test.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:writers",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
diff --git a/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
new file mode 100644
index 0000000000..0456463a19
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/assert_element_shape_test.py
@@ -0,0 +1,226 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import script_ops
+from tensorflow.python.platform import test
+
+
+class AssertElementShapeTest(test_base.DatasetTestBase):
+
+  def test_assert_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((3, 4)))
+    self.assertEqual(expected_shapes, dataset.output_shapes)
+
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((3, 10)))
+    with self.assertRaises(ValueError):
+      dataset.apply(batching.assert_element_shape(wrong_shapes))
+
+  def test_assert_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((3, 4)))
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((3, 10)))
+    iterator = (
+        dataset.apply(batching.assert_element_shape(wrong_shapes))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+  def test_assert_partial_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
+    partial_expected_shape = (
+        tensor_shape.TensorShape(None),  # Unknown shape
+        tensor_shape.TensorShape((None, 4)))  # Partial shape
+    result = dataset.apply(
+        batching.assert_element_shape(partial_expected_shape))
+    # Partial shapes are merged with actual shapes:
+    actual_shapes = (tensor_shape.TensorShape(2),
+                     tensor_shape.TensorShape((3, 4)))
+    self.assertEqual(actual_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_partial_element_shape(self):
+
+    def create_dataset(_):
+      return (array_ops.ones(2, dtype=dtypes.float32),
+              array_ops.zeros((3, 4), dtype=dtypes.int32))
+
+    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((None, 10)))
+    with self.assertRaises(ValueError):
+      dataset.apply(batching.assert_element_shape(wrong_shapes))
+
+  def test_assert_partial_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    expected_shapes = (tensor_shape.TensorShape(2),
+                       tensor_shape.TensorShape((None, 4)))
+    result = dataset.apply(batching.assert_element_shape(expected_shapes))
+    self.assertEqual(expected_shapes, result.output_shapes)
+
+    iterator = result.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for _ in range(5):
+        sess.run(get_next)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self):
+
+    def create_unknown_shape_dataset(x):
+      return script_ops.py_func(
+          lambda _: (  # pylint: disable=g-long-lambda
+              np.ones(2, dtype=np.float32),
+              np.zeros((3, 4), dtype=np.int32)),
+          [x],
+          [dtypes.float32, dtypes.int32])
+
+    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
+    unknown_shapes = (tensor_shape.TensorShape(None),
+                      tensor_shape.TensorShape(None))
+    self.assertEqual(unknown_shapes, dataset.output_shapes)
+
+    wrong_shapes = (tensor_shape.TensorShape(2),
+                    tensor_shape.TensorShape((None, 10)))
+    iterator = (
+        dataset.apply(batching.assert_element_shape(wrong_shapes))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
new file mode 100644
index 0000000000..e7281d5318
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/reduce_dataset_test.py
@@ -0,0 +1,62 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import get_single_element
+from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class ReduceDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("SumZero", 0),
+      ("SumOne", 1),
+      ("SumFive", 5),
+      ("SumTen", 10),
+  )
+  def testReduceDataset(self, stop):
+    def init_fn(_):
+      return np.int64(0)
+
+    def reduce_fn(state, value):
+      return state + value
+
+    def finalize_fn(state):
+      return state
+
+    sum_reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
+
+    stop_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = dataset_ops.Dataset.range(stop_t)
+    element = get_single_element.reduce_dataset(dataset, sum_reducer)
+
+    with self.cached_session() as sess:
+      value = sess.run(element, feed_dict={stop_t: stop})
+      self.assertEqual(stop * (stop - 1) / 2, value)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
deleted file mode 100644
index 79134c7bc6..0000000000
--- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py
+++ /dev/null
@@ -1,527 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import grouping
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.platform import test
-
-
-class WindowDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  def _structuredDataset(self, structure, shape, dtype):
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredElement(self, structure, shape, dtype):
-    if structure is None:
-      return array_ops.zeros(shape, dtype=dtype)
-    else:
-      return tuple([
-          self._structuredElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-
-  def _assertEqual(self, xs, ys):
-    self.assertEqual(type(xs), type(ys))
-    if isinstance(xs, tuple) and isinstance(ys, tuple):
-      self.assertEqual(len(xs), len(ys))
-      for x, y in zip(xs, ys):
-        self._assertEqual(x, y)
-    elif isinstance(xs, np.ndarray) and isinstance(ys, np.ndarray):
-      self.assertAllEqual(xs, ys)
-    else:
-      self.assertEqual(xs, ys)
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetFlatMap(self, structure, shape, dtype):
-    """Tests windowing by chaining it with flat map.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return args[0]
-      return dataset_ops.Dataset.zip(
-          tuple([fn(*arg) if isinstance(arg, tuple) else arg for arg in args]))
-
-    dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply(
-        grouping.window_dataset(5)).flat_map(fn)
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(self._structuredElement(structure, shape, dtype))
-      for _ in range(5):
-        actual = sess.run(get_next)
-        self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetBatchDense(self, structure, shape, dtype):
-    """Tests batching of dense tensor windows.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.batch_window(args[0])
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg)
-          for arg in args
-      ])
-
-    dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply(
-        grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredElement(structure, np.concatenate(
-              ([5], shape), axis=0), dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([])),
-      ("2", np.int32([1])),
-      ("3", np.int32([1, 2, 3])),
-  )
-  def testWindowDatasetBatchDenseDynamicShape(self, shape):
-    """Tests batching of dynamically shaped dense tensor windows.
-
-    Args:
-      shape: the input shape
-    """
-
-    shape_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(
-        array_ops.zeros(shape_t)).repeat(5).apply(
-            grouping.window_dataset(5)).apply(
-                grouping._map_x_dataset(batching.batch_window))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shape_t: shape})
-      expected = sess.run(
-          self._structuredElement(None, np.concatenate(([5], shape), axis=0),
-                                  dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  def _make_dense_to_sparse_fn(self, is_scalar):
-
-    def dense_to_sparse_scalar(tensor):
-      indices = [[]]
-      values = array_ops.expand_dims(tensor, 0)
-      shape = []
-      return sparse_tensor.SparseTensorValue(indices, values, shape)
-
-    def dense_to_sparse_non_scalar(tensor):
-      indices = array_ops.where(array_ops.ones_like(tensor, dtype=dtypes.bool))
-      values = array_ops.gather_nd(tensor, indices)
-      shape = array_ops.shape(tensor, out_type=dtypes.int64)
-      return sparse_tensor.SparseTensorValue(indices, values, shape)
-
-    if is_scalar:
-      return dense_to_sparse_scalar
-    return dense_to_sparse_non_scalar
-
-  def _structuredSparseDataset(self, structure, shape, dtype):
-    dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          dense_to_sparse(array_ops.zeros(shape, dtype=dtype)))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredSparseDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredSparseElement(self, structure, shape, dtype):
-    dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-    if structure is None:
-      return dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-    else:
-      return tuple([
-          self._structuredSparseElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([]), dtypes.bool),
-      ("2", None, np.int32([]), dtypes.int32),
-      ("3", None, np.int32([]), dtypes.float32),
-      ("4", None, np.int32([]), dtypes.string),
-      ("5", None, np.int32([2]), dtypes.int32),
-      ("6", None, np.int32([2, 2]), dtypes.int32),
-      ("7", (None, None, None), np.int32([]), dtypes.int32),
-      ("8", (None, (None, None)), np.int32([]), dtypes.int32),
-  )
-  def testWindowDatasetBatchSparse(self, structure, shape, dtype):
-    """Tests batching of sparse tensor windows.
-
-    Args:
-      structure: the input structure
-      shape: the input shape
-      dtype: the input data type
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.batch_window(args[0])
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.batch_window(arg)
-          for arg in args
-      ])
-
-    dataset = self._structuredSparseDataset(
-        structure, shape, dtype).repeat(5).apply(
-            grouping.window_dataset(5)).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredSparseElement(structure,
-                                        np.concatenate(([5], shape), axis=0),
-                                        dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([])),
-      ("2", np.int32([1])),
-      ("3", np.int32([1, 2, 3])),
-  )
-  def testWindowDatasetBatchSparseDynamicShape(self, shape):
-    """Tests batching of dynamically shaped sparse tensor windows.
-
-    Args:
-      shape: the input shape
-    """
-
-    shape_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(array_ops.zeros(shape_t)).map(
-        self._make_dense_to_sparse_fn(len(shape) == 0)).repeat(5).apply(  # pylint: disable=g-explicit-length-test
-            grouping.window_dataset(5)).apply(
-                grouping._map_x_dataset(batching.batch_window))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shape_t: shape})
-      expected = sess.run(
-          self._structuredSparseElement(None,
-                                        np.concatenate(([5], shape), axis=0),
-                                        dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  def _structuredRaggedDataset(self, structure, shapes, dtype):
-
-    if structure is None:
-      return dataset_ops.Dataset.from_tensor_slices(shapes).map(
-          lambda shape: array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredRaggedDataset(substructure, shapes, dtype)
-              for substructure in structure
-          ]))
-
-  @parameterized.named_parameters(
-      ("1", None, np.int32([[1], [2], [3]]), dtypes.bool, [-1]),
-      ("2", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("3", None, np.int32([[1], [2], [3]]), dtypes.float32, [-1]),
-      ("4", None, np.int32([[1], [2], [3]]), dtypes.string, [-1]),
-      ("5", None, np.int32([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]),
-      ("6", None, np.int32([[3, 1, 3], [1, 3, 1]]), dtypes.int32, [-1, -1, -1]),
-      ("7", (None, None, None), np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("8", (None,
-             (None, None)), np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("9", None, np.int32([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("10", None, np.int32([[1], [2], [3]]), dtypes.int32, np.int32([10])),
-  )
-  def testWindowDatasetPaddedBatchDense(self, structure, shapes, dtype,
-                                        padded_shape):
-    """Tests padded batching of dense tensor windows.
-
-    Args:
-      structure: the input structure
-      shapes: the input shapes
-      dtype: the input data type
-      padded_shape: the shape to pad the output to
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.padded_batch_window(args[0], padded_shape)
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window(
-              arg, padded_shape) for arg in args
-      ])
-
-    dataset = self._structuredRaggedDataset(structure, shapes, dtype).apply(
-        grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      expected = sess.run(
-          self._structuredElement(
-              structure,
-              np.concatenate((np.int32([len(shapes)]), expected_shape)), dtype))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([[1], [2], [3]]), [-1]),
-      ("2", np.int32([[1, 3], [2, 2], [3, 1]]), [-1, -1]),
-      ("3", np.int32([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]),
-  )
-  def testWindowDatasetPaddedBatchDenseDynamicShape(self, shapes, padded_shape):
-    """Tests padded batching of dynamically shaped dense tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    shapes_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply(
-            grouping.window_dataset(len(shapes))).apply(
-                grouping._map_x_dataset(
-                    lambda x: batching.padded_batch_window(x, padded_shape)))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shapes_t: shapes})
-      expected_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      expected = sess.run(
-          self._structuredElement(
-              None, np.concatenate((np.int32([len(shapes)]), expected_shape)),
-              dtypes.int32))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int32([[1]]), np.int32([0])),
-      ("2", np.int32([[10], [20]]), np.int32([15])),
-  )
-  def testWindowDatasetPaddedBatchDenseInvalid(self, shapes, padded_shape):
-    """Tests invalid padded batching of dense tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).apply(
-            grouping.window_dataset(len(shapes))).apply(
-                grouping._map_x_dataset(
-                    lambda x: batching.padded_batch_window(x, padded_shape)))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def _structuredRaggedSparseDataset(self, structure, shapes, dtype):
-
-    def map_fn(shape):
-      dense_to_sparse = self._make_dense_to_sparse_fn(False)
-      return dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-
-    if structure is None:
-      return dataset_ops.Dataset.from_tensor_slices(shapes).map(map_fn)
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self._structuredRaggedSparseDataset(substructure, shapes, dtype)
-              for substructure in structure
-          ]))
-
-  def _structuredRaggedSparseElement(self, structure, shapes, dtype,
-                                     padded_shape):
-    if structure is None:
-      dense_shape = np.maximum(np.amax(shapes, axis=0), padded_shape)
-      values = []
-      for shape in shapes:
-        dense_to_sparse = self._make_dense_to_sparse_fn(len(shape) == 0)  # pylint: disable=g-explicit-length-test
-        sparse = dense_to_sparse(array_ops.zeros(shape, dtype=dtype))
-        padded_sparse = sparse_tensor.SparseTensor(sparse.indices,
-                                                   sparse.values, dense_shape)
-        reshaped_sparse = sparse_ops.sparse_reshape(
-            padded_sparse,
-            array_ops.concat([np.array([1], dtype=np.int64), dense_shape], 0))
-        values.append(reshaped_sparse)
-      return sparse_ops.sparse_concat(0, values)
-    else:
-      return tuple([
-          self._structuredRaggedSparseElement(substructure, shapes, dtype,
-                                              padded_shape)
-          for substructure in structure
-      ])
-
-  @parameterized.named_parameters(
-      ("1", None, np.int64([[1], [2], [3]]), dtypes.bool, [-1]),
-      ("2", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("3", None, np.int64([[1], [2], [3]]), dtypes.float32, [-1]),
-      ("4", None, np.int64([[1], [2], [3]]), dtypes.string, [-1]),
-      ("5", None, np.int64([[1, 3], [2, 2], [3, 1]]), dtypes.int32, [-1, -1]),
-      ("6", None, np.int64([[1, 3, 1], [3, 1, 3]]), dtypes.int32, [-1, -1, -1]),
-      ("7", (None, None, None), np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("8", (None,
-             (None, None)), np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("9", None, np.int64([[1], [2], [3]]), dtypes.int32, [-1]),
-      ("10", None, np.int64([[1], [2], [3]]), dtypes.int32, np.int64([10])),
-  )
-  def testWindowDatasetPaddedBatchSparse(self, structure, shapes, dtype,
-                                         padded_shape):
-    """Tests padded batching of sparse tensor windows.
-
-    Args:
-      structure: the input structure
-      shapes: the input shapes
-      dtype: the input data type
-      padded_shape: the shape to pad the output to
-    """
-
-    def fn(*args):
-      if len(args) == 1 and not isinstance(args[0], tuple):
-        return batching.padded_batch_window(args[0], padded_shape)
-
-      return tuple([
-          fn(*arg) if isinstance(arg, tuple) else batching.padded_batch_window(
-              arg, padded_shape) for arg in args
-      ])
-
-    dataset = self._structuredRaggedSparseDataset(
-        structure, shapes, dtype).apply(grouping.window_dataset(
-            len(shapes))).apply(grouping._map_x_dataset(fn))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      expected = sess.run(
-          self._structuredRaggedSparseElement(structure, shapes, dtype,
-                                              padded_shape))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int64([[1], [2], [3]]), [-1]),
-      ("2", np.int64([[1, 3], [2, 2], [3, 1]]), [-1, -1]),
-      ("3", np.int64([[3, 1, 3], [1, 3, 1]]), [-1, -1, -1]),
-  )
-  def testWindowDatasetPaddedBatchSparseDynamicShape(self, shapes,
-                                                     padded_shape):
-    """Tests padded batching of dynamically shaped sparse tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    shapes_t = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes_t).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map(
-            self._make_dense_to_sparse_fn(False)
-        ).apply(grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(
-                lambda x: batching.padded_batch_window(x, padded_shape)))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op, {shapes_t: shapes})
-      expected = sess.run(
-          self._structuredRaggedSparseElement(None, shapes, dtypes.int32,
-                                              padded_shape))
-      actual = sess.run(get_next)
-      self._assertEqual(expected, actual)
-
-  @parameterized.named_parameters(
-      ("1", np.int64([[1]]), [0]),
-      ("2", np.int64([[10], [20]]), [15]),
-  )
-  def testWindowDatasetPaddedBatchSparseInvalid(self, shapes, padded_shape):
-    """Tests invalid padded batching of sparse tensor windows.
-
-    Args:
-      shapes: the input shapes
-      padded_shape: the shape to pad the output to
-    """
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(shapes).map(
-        lambda shape: array_ops.zeros(shape, dtype=dtypes.int32)).map(
-            self._make_dense_to_sparse_fn(False)
-        ).apply(grouping.window_dataset(len(shapes))).apply(
-            grouping._map_x_dataset(
-                lambda x: batching.padded_batch_window(x, padded_shape)))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 5cd1ed542b..34dc2379d0 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -16,10 +16,7 @@ py_library(
     srcs = ["counter.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":scan_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:counter",
     ],
 )
 
@@ -28,12 +25,7 @@ py_library(
     srcs = ["get_single_element.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":grouping",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-        "//third_party/py/numpy",
+        "//tensorflow/python/data/experimental/ops:get_single_element",
     ],
 )
 
@@ -44,10 +36,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
     ],
 )
 
@@ -58,15 +47,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:random_seed",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:random_ops",
     ],
 )
 
@@ -79,7 +60,6 @@ py_library(
     deps = [
         ":batching",
         ":interleave_ops",
-        ":optimization",
         ":parsing_ops",
         ":shuffle_ops",
         "//tensorflow/python:constant_op",
@@ -91,6 +71,7 @@ py_library(
         "//tensorflow/python:platform",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:readers",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:readers",
         "//tensorflow/python/data/util:convert",
@@ -106,7 +87,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
     ],
 )
 
@@ -125,6 +106,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/util:convert",
         "//tensorflow/python/data/util:nest",
@@ -138,8 +120,7 @@ py_library(
     srcs = ["enumerate_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
     ],
 )
 
@@ -148,10 +129,7 @@ py_library(
     srcs = ["error_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:error_ops",
     ],
 )
 
@@ -160,16 +138,7 @@ py_library(
     srcs = ["grouping.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:grouping",
     ],
 )
 
@@ -178,30 +147,7 @@ py_library(
     srcs = ["interleave_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":random_ops",
-        "//tensorflow/contrib/stateless",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-py_library(
-    name = "optimization",
-    srcs = ["optimization.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
     ],
 )
 
@@ -210,25 +156,7 @@ py_library(
     srcs = ["parsing_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-    ],
-)
-
-py_library(
-    name = "map_defun",
-    srcs = ["map_defun.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:parsing_ops",
     ],
 )
 
@@ -237,18 +165,7 @@ py_library(
     srcs = ["resampling.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":batching",
-        ":interleave_ops",
-        ":scan_ops",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:logging_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
+        "//tensorflow/python/data/experimental/ops:resampling",
     ],
 )
 
@@ -257,12 +174,7 @@ py_library(
     srcs = ["scan_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:function",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
     ],
 )
 
@@ -281,32 +193,12 @@ py_library(
     ],
 )
 
-py_library(
-    name = "stats_ops",
-    srcs = ["stats_ops.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
 py_library(
     name = "threadpool",
     srcs = ["threadpool.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-        "//tensorflow/python/eager:context",
+        "//tensorflow/python/data/experimental/ops:threadpool",
     ],
 )
 
@@ -317,11 +209,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:unique",
     ],
 )
 
@@ -332,20 +220,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
-py_library(
-    name = "indexed_dataset_ops",
-    srcs = ["indexed_dataset_ops.py"],
-    deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:writers",
     ],
 )
 
@@ -353,11 +228,7 @@ py_library(
     name = "prefetching_ops",
     srcs = ["prefetching_ops.py"],
     deps = [
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
     ],
 )
 
@@ -370,17 +241,14 @@ py_library(
         ":error_ops",
         ":get_single_element",
         ":grouping",
-        ":indexed_dataset_ops",
         ":interleave_ops",
-        ":map_defun",
-        ":optimization",
         ":prefetching_ops",
+        ":random_ops",
         ":readers",
         ":resampling",
         ":scan_ops",
         ":shuffle_ops",
         ":sliding",
-        ":stats_ops",
         ":threadpool",
         ":unique",
         ":writers",
diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py
index 7a0f221284..8c60459ca8 100644
--- a/tensorflow/contrib/data/python/ops/batching.py
+++ b/tensorflow/contrib/data/python/ops/batching.py
@@ -17,134 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import get_single_element
-from tensorflow.contrib.data.python.ops import grouping
 from tensorflow.contrib.framework import with_shape
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import convert
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
 from tensorflow.python.util import deprecation
 
 
-def batch_window(dataset):
-  """Batches a window of tensors.
-
-  Args:
-    dataset: the input dataset.
-
-  Returns:
-    A `Tensor` representing the batch of the entire input dataset.
-  """
-  if isinstance(dataset.output_classes, tuple):
-    raise TypeError("Input dataset expected to have a single component")
-  if dataset.output_classes is ops.Tensor:
-    return _batch_dense_window(dataset)
-  elif dataset.output_classes is sparse_tensor.SparseTensor:
-    return _batch_sparse_window(dataset)
-  else:
-    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
-
-
-def _batch_dense_window(dataset):
-  """Batches a window of dense tensors."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def shape_init_fn(_):
-    return array_ops.shape(first_element)
-
-  def shape_reduce_fn(state, value):
-    check_ops.assert_equal(state, array_ops.shape(value))
-    return state
-
-  def finalize_fn(state):
-    return state
-
-  if dataset.output_shapes.is_fully_defined():
-    shape = dataset.output_shapes
-  else:
-    first_element = get_single_element.get_single_element(dataset.take(1))
-    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
-                                     finalize_fn)
-    shape = get_single_element.get_single_element(
-        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
-
-  def batch_init_fn(_):
-    batch_shape = array_ops.concat([[0], shape], 0)
-    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
-
-  def batch_reduce_fn(state, value):
-    return array_ops.concat([state, [value]], 0)
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
-def _batch_sparse_window(dataset):
-  """Batches a window of sparse tensors."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def shape_init_fn(_):
-    return first_element.dense_shape
-
-  def shape_reduce_fn(state, value):
-    check_ops.assert_equal(state, value.dense_shape)
-    return state
-
-  def finalize_fn(state):
-    return state
-
-  if dataset.output_shapes.is_fully_defined():
-    shape = dataset.output_shapes
-  else:
-    first_element = get_single_element.get_single_element(dataset.take(1))
-    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
-                                     finalize_fn)
-    shape = get_single_element.get_single_element(
-        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
-
-  def batch_init_fn(_):
-    indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
-    return sparse_tensor.SparseTensor(
-        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
-        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
-        dense_shape=array_ops.concat(
-            [np.array([0], dtype=np.int64),
-             math_ops.cast(shape, dtypes.int64)], 0))
-
-  def batch_reduce_fn(state, value):
-    return sparse_ops.sparse_concat(0, [state, value])
-
-  def reshape_fn(value):
-    return sparse_ops.sparse_reshape(
-        value,
-        array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.map(reshape_fn).apply(
-          grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.dense_to_sparse_batch(...)`.")
 def dense_to_sparse_batch(batch_size, row_shape):
   """A transformation that batches ragged elements into `tf.SparseTensor`s.
 
@@ -187,201 +67,10 @@ def dense_to_sparse_batch(batch_size, row_shape):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _DenseToSparseBatchDataset(dataset, batch_size, row_shape)
-
-  return _apply_fn
-
-
-def padded_batch_window(dataset, padded_shape, padding_value=None):
-  """Batches a window of tensors with padding.
-
-  Args:
-    dataset: the input dataset.
-    padded_shape: (Optional.) `tf.TensorShape` or `tf.int64` vector tensor-like
-      object representing the shape to which the input elements should be padded
-      prior to batching. Any unknown dimensions (e.g. `tf.Dimension(None)` in a
-      `tf.TensorShape` or `-1` in a tensor-like object) will be padded to the
-      maximum size of that dimension in each batch.
-    padding_value: (Optional.) A scalar-shaped `tf.Tensor`, representing the
-      padding value to use. Defaults are `0` for numeric types and the empty
-      string for string types. If `dataset` contains `tf.SparseTensor`, this
-      value is ignored.
-
-  Returns:
-    A `Tensor` representing the batch of the entire input dataset.
-
-  Raises:
-    ValueError: if invalid arguments are provided.
-  """
-  if not issubclass(dataset.output_classes,
-                    (ops.Tensor, sparse_tensor.SparseTensor)):
-    raise TypeError("Input dataset expected to have a single tensor component")
-  if issubclass(dataset.output_classes, (ops.Tensor)):
-    return _padded_batch_dense_window(dataset, padded_shape, padding_value)
-  elif issubclass(dataset.output_classes, (sparse_tensor.SparseTensor)):
-    if padding_value is not None:
-      raise ValueError("Padding value not allowed for sparse tensors")
-    return _padded_batch_sparse_window(dataset, padded_shape)
-  else:
-    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
-
-
-def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
-  """Batches a window of dense tensors with padding."""
-
-  padded_shape = math_ops.cast(
-      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def max_init_fn(_):
-    return padded_shape
-
-  def max_reduce_fn(state, value):
-    """Computes the maximum shape to pad to."""
-    condition = math_ops.reduce_all(
-        math_ops.logical_or(
-            math_ops.less_equal(array_ops.shape(value), padded_shape),
-            math_ops.equal(padded_shape, -1)))
-    assert_op = control_flow_ops.Assert(condition, [
-        "Actual shape greater than padded shape: ",
-        array_ops.shape(value), padded_shape
-    ])
-    with ops.control_dependencies([assert_op]):
-      return math_ops.maximum(state, array_ops.shape(value))
-
-  def finalize_fn(state):
-    return state
-
-  # Compute the padded shape.
-  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
-  padded_shape = get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
-
-  if padding_value is None:
-    if dataset.output_types == dtypes.string:
-      padding_value = ""
-    elif dataset.output_types == dtypes.bool:
-      padding_value = False
-    elif dataset.output_types == dtypes.variant:
-      raise TypeError("Unable to create padding for field of type 'variant'")
-    else:
-      padding_value = 0
-
-  def batch_init_fn(_):
-    batch_shape = array_ops.concat(
-        [np.array([0], dtype=np.int32), padded_shape], 0)
-    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
-
-  def batch_reduce_fn(state, value):
-    return array_ops.concat([state, [value]], 0)
-
-  def pad_fn(value):
-    shape = array_ops.shape(value)
-    left = array_ops.zeros_like(shape)
-    right = padded_shape - shape
-    return array_ops.pad(
-        value, array_ops.stack([left, right], 1), constant_values=padding_value)
-
-  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.map(pad_fn).apply(
-          grouping.group_by_reducer(key_fn, batch_reducer)))
-
-
-def _padded_batch_sparse_window(dataset, padded_shape):
-  """Batches a window of sparse tensors with padding."""
-
-  def key_fn(_):
-    return np.int64(0)
-
-  def max_init_fn(_):
-    return convert.partial_shape_to_tensor(padded_shape)
-
-  def max_reduce_fn(state, value):
-    """Computes the maximum shape to pad to."""
-    condition = math_ops.reduce_all(
-        math_ops.logical_or(
-            math_ops.less_equal(value.dense_shape, padded_shape),
-            math_ops.equal(padded_shape, -1)))
-    assert_op = control_flow_ops.Assert(condition, [
-        "Actual shape greater than padded shape: ", value.dense_shape,
-        padded_shape
-    ])
-    with ops.control_dependencies([assert_op]):
-      return math_ops.maximum(state, value.dense_shape)
-
-  def finalize_fn(state):
-    return state
-
-  # Compute the padded shape.
-  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
-  padded_shape = get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
-
-  def batch_init_fn(_):
-    indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]],
-                                     0)
-    return sparse_tensor.SparseTensor(
-        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
-        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
-        dense_shape=array_ops.concat(
-            [np.array([0], dtype=np.int64), padded_shape], 0))
-
-  def batch_reduce_fn(state, value):
-    padded_value = sparse_tensor.SparseTensor(
-        indices=value.indices, values=value.values, dense_shape=padded_shape)
-    reshaped_value = sparse_ops.sparse_reshape(
-        padded_value,
-        array_ops.concat(
-            [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
-    return sparse_ops.sparse_concat(0, [state, reshaped_value])
-
-  reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
-  return get_single_element.get_single_element(
-      dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
-
-
-class _UnbatchDataset(dataset_ops.UnaryDataset):
-  """A dataset that splits the elements of its input into multiple elements."""
-
-  def __init__(self, input_dataset):
-    """See `unbatch()` for more details."""
-    super(_UnbatchDataset, self).__init__(input_dataset)
-    flat_shapes = nest.flatten(input_dataset.output_shapes)
-    if any(s.ndims == 0 for s in flat_shapes):
-      raise ValueError("Cannot unbatch an input with scalar components.")
-    known_batch_dim = tensor_shape.Dimension(None)
-    for s in flat_shapes:
-      try:
-        known_batch_dim = known_batch_dim.merge_with(s[0])
-      except ValueError:
-        raise ValueError("Cannot unbatch an input whose components have "
-                         "different batch sizes.")
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.unbatch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return nest.map_structure(lambda s: s[1:],
-                              self._input_dataset.output_shapes)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return batching.dense_to_sparse_batch(batch_size, row_shape)
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.unbatch()`.")
 def unbatch():
   """Splits elements of a dataset into multiple elements on the batch dimension.
 
@@ -403,39 +92,7 @@ def unbatch():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    if not sparse.any_sparse(dataset.output_classes):
-      return _UnbatchDataset(dataset)
-
-    # NOTE(mrry): We must ensure that any SparseTensors in `dataset`
-    # are normalized to the rank-1 dense representation, so that the
-    # sparse-oblivious unbatching logic will slice them
-    # appropriately. This leads to a somewhat inefficient re-encoding step
-    # for all SparseTensor components.
-    # TODO(mrry): Consider optimizing this in future
-    # if it turns out to be a bottleneck.
-    def normalize(arg, *rest):
-      if rest:
-        return sparse.serialize_many_sparse_tensors((arg,) + rest)
-      else:
-        return sparse.serialize_many_sparse_tensors(arg)
-
-    normalized_dataset = dataset.map(normalize)
-
-    # NOTE(mrry): Our `map()` has lost information about the sparseness
-    # of any SparseTensor components, so re-apply the structure of the
-    # original dataset.
-    restructured_dataset = _RestructuredDataset(
-        normalized_dataset,
-        dataset.output_types,
-        dataset.output_shapes,
-        dataset.output_classes,
-        allow_unsafe_cast=True)
-    return _UnbatchDataset(restructured_dataset)
-
-  return _apply_fn
+  return batching.unbatch()
 
 
 @deprecation.deprecated(
@@ -514,135 +171,8 @@ def padded_batch_and_drop_remainder(batch_size,
   return _apply_fn
 
 
-class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""
-
-  def __init__(self, input_dataset, batch_size, row_shape):
-    """See `Dataset.dense_to_sparse_batch()` for more details."""
-    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
-    if not isinstance(input_dataset.output_types, dtypes.DType):
-      raise TypeError("DenseToSparseDataset requires an input whose elements "
-                      "have a single component, whereas the input has %r." %
-                      input_dataset.output_types)
-    self._input_dataset = input_dataset
-    self._batch_size = batch_size
-    self._row_shape = row_shape
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.dense_to_sparse_batch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._batch_size,
-        row_shape=convert.partial_shape_to_tensor(self._row_shape),
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return sparse_tensor.SparseTensor
-
-  @property
-  def output_shapes(self):
-    return tensor_shape.vector(None).concatenate(self._row_shape)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _RestructuredDataset(dataset_ops.UnaryDataset):
-  """An internal helper for changing the structure and shape of a dataset."""
-
-  def __init__(self,
-               dataset,
-               output_types,
-               output_shapes=None,
-               output_classes=None,
-               allow_unsafe_cast=False):
-    """Creates a new dataset with the given output types and shapes.
-
-    The given `dataset` must have a structure that is convertible:
-    * `dataset.output_types` must be the same as `output_types` module nesting.
-    * Each shape in `dataset.output_shapes` must be compatible with each shape
-      in `output_shapes` (if given).
-
-    Note: This helper permits "unsafe casts" for shapes, equivalent to using
-    `tf.Tensor.set_shape()` where domain-specific knowledge is available.
-
-    Args:
-      dataset: A `Dataset` object.
-      output_types: A nested structure of `tf.DType` objects.
-      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
-        If omitted, the shapes will be inherited from `dataset`.
-      output_classes: (Optional.) A nested structure of class types.
-        If omitted, the class types will be inherited from `dataset`.
-      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
-        reported output types and shapes of the restructured dataset, e.g. to
-        switch a sparse tensor represented as `tf.variant` to its user-visible
-        type and shape.
-
-    Raises:
-      ValueError: If either `output_types` or `output_shapes` is not compatible
-        with the structure of `dataset`.
-    """
-    super(_RestructuredDataset, self).__init__(dataset)
-    self._input_dataset = dataset
-
-    if not allow_unsafe_cast:
-      # Validate that the types are compatible.
-      output_types = nest.map_structure(dtypes.as_dtype, output_types)
-      flat_original_types = nest.flatten(dataset.output_types)
-      flat_new_types = nest.flatten(output_types)
-      if flat_original_types != flat_new_types:
-        raise ValueError(
-            "Dataset with output types %r cannot be restructured to have "
-            "output types %r" % (dataset.output_types, output_types))
-
-    self._output_types = output_types
-
-    if output_shapes is None:
-      # Inherit shapes from the original `dataset`.
-      self._output_shapes = nest.pack_sequence_as(output_types,
-                                                  nest.flatten(
-                                                      dataset.output_shapes))
-    else:
-      if not allow_unsafe_cast:
-        # Validate that the shapes are compatible.
-        nest.assert_same_structure(output_types, output_shapes)
-        flat_original_shapes = nest.flatten(dataset.output_shapes)
-        flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)
-
-        for original_shape, new_shape in zip(flat_original_shapes,
-                                             flat_new_shapes):
-          if not original_shape.is_compatible_with(new_shape):
-            raise ValueError(
-                "Dataset with output shapes %r cannot be restructured to have "
-                "incompatible output shapes %r" % (dataset.output_shapes,
-                                                   output_shapes))
-      self._output_shapes = nest.map_structure_up_to(
-          output_types, tensor_shape.as_shape, output_shapes)
-    if output_classes is None:
-      # Inherit class types from the original `dataset`.
-      self._output_classes = nest.pack_sequence_as(output_types,
-                                                   nest.flatten(
-                                                       dataset.output_classes))
-    else:
-      self._output_classes = output_classes
-
-  def _as_variant_tensor(self):
-    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-
+# TODO(b/116817045): Move this to `tf.data.experimental` when the `with_shape()`
+# function is available in the core.
 def assert_element_shape(expected_shapes):
   """Assert the shape of this `Dataset`.
 
@@ -687,7 +217,8 @@ def assert_element_shape(expected_shapes):
   def _apply_fn(dataset):
     output_shapes = _merge_output_shapes(dataset.output_shapes,
                                          expected_shapes)
-    return _RestructuredDataset(
+    # pylint: disable=protected-access
+    return batching._RestructuredDataset(
         dataset.map(_check_shape),
         dataset.output_types,
         output_shapes=output_shapes,
@@ -696,49 +227,7 @@ def assert_element_shape(expected_shapes):
   return _apply_fn
 
 
-class _MapAndBatchDataset(dataset_ops.MapDataset):
-  """A `Dataset` that maps a function over a batch of elements."""
-
-  def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
-               drop_remainder):
-    """See `Dataset.map()` for details."""
-    super(_MapAndBatchDataset, self).__init__(input_dataset, map_func)
-    self._batch_size_t = ops.convert_to_tensor(
-        batch_size, dtype=dtypes.int64, name="batch_size")
-    self._num_parallel_calls_t = ops.convert_to_tensor(
-        num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls")
-    self._drop_remainder_t = ops.convert_to_tensor(
-        drop_remainder, dtype=dtypes.bool, name="drop_remainder")
-
-    self._batch_size = batch_size
-    self._drop_remainder = drop_remainder
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_resource = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.map_and_batch_dataset_v2(
-        input_resource,
-        self._map_func.captured_inputs,
-        f=self._map_func,
-        batch_size=self._batch_size_t,
-        num_parallel_calls=self._num_parallel_calls_t,
-        drop_remainder=self._drop_remainder_t,
-        **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
-
-  @property
-  def output_shapes(self):
-    dim = self._batch_size if self._drop_remainder else None
-    return nest.pack_sequence_as(self._output_shapes, [
-        tensor_shape.vector(dim).concatenate(s)
-        for s in nest.flatten(self._output_shapes)
-    ])
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-
+@deprecation.deprecated(None, "Use `tf.data.experimental.map_and_batch(...)`.")
 def map_and_batch(map_func,
                   batch_size,
                   num_parallel_batches=None,
@@ -779,17 +268,5 @@ def map_and_batch(map_func,
     ValueError: If both `num_parallel_batches` and `num_parallel_calls` are
       specified.
   """
-
-  if num_parallel_batches is None and num_parallel_calls is None:
-    num_parallel_calls = batch_size
-  elif num_parallel_batches is not None and num_parallel_calls is None:
-    num_parallel_calls = batch_size * num_parallel_batches
-  elif num_parallel_batches is not None and num_parallel_calls is not None:
-    raise ValueError("The `num_parallel_batches` and `num_parallel_calls` "
-                     "arguments are mutually exclusive.")
-
-  def _apply_fn(dataset):
-    return _MapAndBatchDataset(dataset, map_func, batch_size,
-                               num_parallel_calls, drop_remainder)
-
-  return _apply_fn
+  return batching.map_and_batch(map_func, batch_size, num_parallel_batches,
+                                drop_remainder, num_parallel_calls)
diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py
index 6ef65f9624..4ff5bf3e39 100644
--- a/tensorflow/contrib/data/python/ops/counter.py
+++ b/tensorflow/contrib/data/python/ops/counter.py
@@ -17,13 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import scan_ops
-
-from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.Counter(...)`.")
 def Counter(start=0, step=1, dtype=dtypes.int64):
   """Creates a `Dataset` that counts from `start` in steps of size `step`.
 
@@ -46,8 +45,4 @@ def Counter(start=0, step=1, dtype=dtypes.int64):
   Returns:
     A `Dataset` of scalar `dtype` elements.
   """
-  with ops.name_scope("counter"):
-    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
-    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
-    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
-        scan_ops.scan(start, lambda state, _: (state + step, state)))
+  return counter.Counter(start, step, dtype)
diff --git a/tensorflow/contrib/data/python/ops/enumerate_ops.py b/tensorflow/contrib/data/python/ops/enumerate_ops.py
index 490281e0d2..a21da4d3ec 100644
--- a/tensorflow/contrib/data/python/ops/enumerate_ops.py
+++ b/tensorflow/contrib/data/python/ops/enumerate_ops.py
@@ -17,12 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
+from tensorflow.python.data.experimental.ops import enumerate_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.enumerate_dataset(...)`.")
 def enumerate_dataset(start=0):
   """A transformation that enumerate the elements of a dataset.
 
@@ -49,10 +50,4 @@ def enumerate_dataset(start=0):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    max_value = np.iinfo(dtypes.int64.as_numpy_dtype).max
-    return dataset_ops.Dataset.zip((dataset_ops.Dataset.range(start, max_value),
-                                    dataset))
-
-  return _apply_fn
+  return enumerate_ops.enumerate_dataset(start)
diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py
index f962e623ee..0559a2e09c 100644
--- a/tensorflow/contrib/data/python/ops/error_ops.py
+++ b/tensorflow/contrib/data/python/ops/error_ops.py
@@ -17,10 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.ignore_errors()`.")
 def ignore_errors():
   """Creates a `Dataset` from another `Dataset` and silently ignores any errors.
 
@@ -43,34 +44,4 @@ def ignore_errors():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _IgnoreErrorsDataset(dataset)
-
-  return _apply_fn
-
-
-class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that silently ignores errors when computing its input."""
-
-  def __init__(self, input_dataset):
-    """See `Dataset.ignore_errors()` for details."""
-    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return error_ops.ignore_errors()
diff --git a/tensorflow/contrib/data/python/ops/get_single_element.py b/tensorflow/contrib/data/python/ops/get_single_element.py
index a6713b017a..58ad9eea90 100644
--- a/tensorflow/contrib/data/python/ops/get_single_element.py
+++ b/tensorflow/contrib/data/python/ops/get_single_element.py
@@ -19,13 +19,13 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import get_single_element as experimental_get_single_element
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.get_single_element(...)`.")
 def get_single_element(dataset):
   """Returns the single element in `dataset` as a nested structure of tensors.
 
@@ -61,18 +61,10 @@ def get_single_element(dataset):
     InvalidArgumentError (at runtime): if `dataset` does not contain exactly
       one element.
   """
-  if not isinstance(dataset, dataset_ops.Dataset):
-    raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
-
-  nested_ret = nest.pack_sequence_as(
-      dataset.output_types, gen_dataset_ops.dataset_to_single_element(
-          dataset._as_variant_tensor(),  # pylint: disable=protected-access
-          **dataset_ops.flat_structure(dataset)))
-  return sparse.deserialize_sparse_tensors(
-      nested_ret, dataset.output_types, dataset.output_shapes,
-      dataset.output_classes)
+  return experimental_get_single_element.get_single_element(dataset)
 
 
+@deprecation.deprecated(None, "Use `tf.data.Dataset.reduce(...)`.")
 def reduce_dataset(dataset, reducer):
   """Returns the result of reducing the `dataset` using `reducer`.
 
@@ -90,11 +82,4 @@ def reduce_dataset(dataset, reducer):
   if not isinstance(dataset, dataset_ops.Dataset):
     raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
 
-  # The sentinel dataset is used in case the reduced dataset is empty.
-  sentinel_dataset = dataset_ops.Dataset.from_tensors(
-      reducer.finalize_func(reducer.init_func(np.int64(0))))
-  reduced_dataset = dataset.apply(
-      grouping.group_by_reducer(lambda x: np.int64(0), reducer))
-
-  return get_single_element(
-      reduced_dataset.concatenate(sentinel_dataset).take(1))
+  return dataset.reduce(reducer.init_func(np.int64(0)), reducer.reduce_func)
diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py
index 7cae33beb3..a99dc2f29a 100644
--- a/tensorflow/contrib/data/python/ops/grouping.py
+++ b/tensorflow/contrib/data/python/ops/grouping.py
@@ -17,20 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import math_ops
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.group_by_reducer(...)`.")
 def group_by_reducer(key_func, reducer):
   """A transformation that groups elements and performs a reduction.
 
@@ -52,14 +45,11 @@ def group_by_reducer(key_func, reducer):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _GroupByReducerDataset(dataset, key_func, reducer)
-
-  return _apply_fn
+  return grouping.group_by_reducer(key_func, reducer)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.group_by_window(...)`.")
 def group_by_window(key_func,
                     reduce_func,
                     window_size=None,
@@ -98,27 +88,12 @@ def group_by_window(key_func,
     ValueError: if neither or both of {`window_size`, `window_size_func`} are
       passed.
   """
-  if (window_size is not None and window_size_func or
-      not (window_size is not None or window_size_func)):
-    raise ValueError("Must pass either window_size or window_size_func.")
-
-  if window_size is not None:
-
-    def constant_window_func(unused_key):
-      return ops.convert_to_tensor(window_size, dtype=dtypes.int64)
-
-    window_size_func = constant_window_func
-
-  assert window_size_func is not None
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _GroupByWindowDataset(dataset, key_func, reduce_func,
-                                 window_size_func)
-
-  return _apply_fn
+  return grouping.group_by_window(key_func, reduce_func, window_size,
+                                  window_size_func)
 
 
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.bucket_by_sequence_length(...)`.")
 def bucket_by_sequence_length(element_length_func,
                               bucket_boundaries,
                               bucket_batch_sizes,
@@ -163,342 +138,12 @@ def bucket_by_sequence_length(element_length_func,
   Raises:
     ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`.
   """
-  with ops.name_scope("bucket_by_seq_length"):
-    if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1):
-      raise ValueError(
-          "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1")
-
-    batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64)
-
-    def element_to_bucket_id(*args):
-      """Return int64 id of the length bucket for this element."""
-      seq_length = element_length_func(*args)
-
-      boundaries = list(bucket_boundaries)
-      buckets_min = [np.iinfo(np.int32).min] + boundaries
-      buckets_max = boundaries + [np.iinfo(np.int32).max]
-      conditions_c = math_ops.logical_and(
-          math_ops.less_equal(buckets_min, seq_length),
-          math_ops.less(seq_length, buckets_max))
-      bucket_id = math_ops.reduce_min(array_ops.where(conditions_c))
-
-      return bucket_id
-
-    def window_size_fn(bucket_id):
-      # The window size is set to the batch size for this bucket
-      window_size = batch_sizes[bucket_id]
-      return window_size
-
-    def make_padded_shapes(shapes, none_filler=None):
-      padded = []
-      for shape in nest.flatten(shapes):
-        shape = tensor_shape.TensorShape(shape)
-        shape = [
-            none_filler if d.value is None else d
-            for d in shape
-        ]
-        padded.append(shape)
-      return nest.pack_sequence_as(shapes, padded)
-
-    def batching_fn(bucket_id, grouped_dataset):
-      """Batch elements in dataset."""
-      batch_size = window_size_fn(bucket_id)
-      if no_padding:
-        return grouped_dataset.batch(batch_size)
-      none_filler = None
-      if pad_to_bucket_boundary:
-        err_msg = ("When pad_to_bucket_boundary=True, elements must have "
-                   "length < max(bucket_boundaries).")
-        check = check_ops.assert_less(
-            bucket_id,
-            constant_op.constant(len(bucket_batch_sizes) - 1,
-                                 dtype=dtypes.int64),
-            message=err_msg)
-        with ops.control_dependencies([check]):
-          boundaries = constant_op.constant(bucket_boundaries,
-                                            dtype=dtypes.int64)
-          bucket_boundary = boundaries[bucket_id]
-          none_filler = bucket_boundary - 1
-      shapes = make_padded_shapes(
-          padded_shapes or grouped_dataset.output_shapes,
-          none_filler=none_filler)
-      return grouped_dataset.padded_batch(batch_size, shapes, padding_values)
-
-    def _apply_fn(dataset):
-      return dataset.apply(
-          group_by_window(element_to_bucket_id, batching_fn,
-                          window_size_func=window_size_fn))
-
-    return _apply_fn
-
-
-def _map_x_dataset(map_func):
-  """A transformation that maps `map_func` across its input.
-
-  This transformation is similar to `tf.data.Dataset.map`, but in addition to
-  supporting dense and sparse tensor inputs, it also supports dataset inputs.
-
-  Args:
-    map_func: A function mapping a nested structure of tensors and/or datasets
-      (having shapes and types defined by `self.output_shapes` and
-     `self.output_types`) to another nested structure of tensors and/or
-     datasets.
-
-  Returns:
-    Dataset: A `Dataset`.
-  """
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _MapXDataset(dataset, map_func)
-
-  return _apply_fn
-
-
-# TODO(b/115382007) Remove this once canned reducers move to core.
-def window_dataset(window_size):
-  """A transformation that creates window datasets from the input dataset.
-
-  The resulting datasets will contain `window_size` elements (or
-  `N % window_size` for the last dataset if `window_size` does not divide the
-  number of input elements `N` evenly).
-
-  Args:
-    window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
-      consecutive elements of the input dataset to combine into a window.
-
-  Returns:
-    Dataset: A `Dataset`.
-  """
-
-  def _apply_fn(dataset):
-    return dataset_ops.WindowDataset(
-        dataset,
-        size=window_size,
-        shift=window_size,
-        stride=1,
-        drop_remainder=False)
-
-  return _apply_fn
-
-
-class _GroupByReducerDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that groups its input and performs a reduction."""
-
-  def __init__(self, input_dataset, key_func, reducer):
-    """See `group_by_reducer()` for details."""
-    super(_GroupByReducerDataset, self).__init__(input_dataset)
+  return grouping.bucket_by_sequence_length(
+      element_length_func, bucket_boundaries, bucket_batch_sizes, padded_shapes,
+      padding_values, pad_to_bucket_boundary, no_padding)
 
-    self._input_dataset = input_dataset
 
-    self._make_key_func(key_func, input_dataset)
-    self._make_init_func(reducer.init_func)
-    self._make_reduce_func(reducer.reduce_func, input_dataset)
-    self._make_finalize_func(reducer.finalize_func)
-
-  def _make_key_func(self, key_func, input_dataset):
-    """Make wrapping Defun for key_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func, "tf.contrib.data.group_by_reducer()", input_dataset)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`key_func` must return a single tf.int64 tensor. "
-          "Got type=%s and shape=%s"
-          % (wrapped_func.output_types, wrapped_func.output_shapes))
-    self._key_func = wrapped_func.function
-
-  def _make_init_func(self, init_func):
-    """Make wrapping Defun for init_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        init_func, "tf.contrib.data.group_by_reducer()",
-        input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(),
-        input_types=dtypes.int64)
-    self._init_func = wrapped_func.function
-    self._state_classes = wrapped_func.output_classes
-    self._state_shapes = wrapped_func.output_shapes
-    self._state_types = wrapped_func.output_types
-
-  def _make_reduce_func(self, reduce_func, input_dataset):
-    """Make wrapping Defun for reduce_func."""
-
-    # Iteratively rerun the reduce function until reaching a fixed point on
-    # `self._state_shapes`.
-    need_to_rerun = True
-    while need_to_rerun:
-
-      wrapped_func = dataset_ops.StructuredFunctionWrapper(
-          reduce_func, "tf.contrib.data.group_by_reducer()",
-          input_classes=(self._state_classes, input_dataset.output_classes),
-          input_shapes=(self._state_shapes, input_dataset.output_shapes),
-          input_types=(self._state_types, input_dataset.output_types),
-          add_to_graph=False)
-
-      # Extract and validate class information from the returned values.
-      for new_state_class, state_class in zip(
-          nest.flatten(wrapped_func.output_classes),
-          nest.flatten(self._state_classes)):
-        if not issubclass(new_state_class, state_class):
-          raise TypeError(
-              "The element classes for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_classes, wrapped_func.output_classes))
-
-      # Extract and validate type information from the returned values.
-      for new_state_type, state_type in zip(
-          nest.flatten(wrapped_func.output_types),
-          nest.flatten(self._state_types)):
-        if new_state_type != state_type:
-          raise TypeError(
-              "The element types for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_types, wrapped_func.output_types))
-
-      # Extract shape information from the returned values.
-      flat_state_shapes = nest.flatten(self._state_shapes)
-      flat_new_state_shapes = nest.flatten(wrapped_func.output_shapes)
-      weakened_state_shapes = [
-          original.most_specific_compatible_shape(new)
-          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
-      ]
-
-      need_to_rerun = False
-      for original_shape, weakened_shape in zip(flat_state_shapes,
-                                                weakened_state_shapes):
-        if original_shape.ndims is not None and (
-            weakened_shape.ndims is None or
-            original_shape.as_list() != weakened_shape.as_list()):
-          need_to_rerun = True
-          break
-
-      if need_to_rerun:
-        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
-                                                   weakened_state_shapes)
-
-    self._reduce_func = wrapped_func.function
-    self._reduce_func.add_to_graph(ops.get_default_graph())
-
-  def _make_finalize_func(self, finalize_func):
-    """Make wrapping Defun for finalize_func."""
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        finalize_func, "tf.contrib.data.group_by_reducer()",
-        input_classes=self._state_classes, input_shapes=self._state_shapes,
-        input_types=self._state_types)
-    self._finalize_func = wrapped_func.function
-    self._output_classes = wrapped_func.output_classes
-    self._output_shapes = wrapped_func.output_shapes
-    self._output_types = wrapped_func.output_types
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.group_by_reducer_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.captured_inputs,
-        self._init_func.captured_inputs,
-        self._reduce_func.captured_inputs,
-        self._finalize_func.captured_inputs,
-        key_func=self._key_func,
-        init_func=self._init_func,
-        reduce_func=self._reduce_func,
-        finalize_func=self._finalize_func,
-        **dataset_ops.flat_structure(self))
-
-
-class _GroupByWindowDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that groups its input and performs a windowed reduction."""
-
-  def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
-    """See `group_by_window()` for details."""
-    super(_GroupByWindowDataset, self).__init__(input_dataset)
-
-    self._input_dataset = input_dataset
-
-    self._make_key_func(key_func, input_dataset)
-    self._make_reduce_func(reduce_func, input_dataset)
-    self._make_window_size_func(window_size_func)
-
-  def _make_window_size_func(self, window_size_func):
-    """Make wrapping Defun for window_size_func."""
-    def window_size_func_wrapper(key):
-      return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64)
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        window_size_func_wrapper, "tf.contrib.data.group_by_window()",
-        input_classes=ops.Tensor, input_shapes=tensor_shape.scalar(),
-        input_types=dtypes.int64)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`window_size_func` must return a single tf.int64 scalar tensor.")
-    self._window_size_func = wrapped_func.function
-
-  def _make_key_func(self, key_func, input_dataset):
-    """Make wrapping Defun for key_func."""
-    def key_func_wrapper(*args):
-      return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func_wrapper, "tf.contrib.data.group_by_window()", input_dataset)
-    if not (
-        wrapped_func.output_types == dtypes.int64 and
-        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
-      raise ValueError(
-          "`key_func` must return a single tf.int64 scalar tensor.")
-    self._key_func = wrapped_func.function
-
-  def _make_reduce_func(self, reduce_func, input_dataset):
-    """Make wrapping Defun for reduce_func."""
-    nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset)  # pylint: disable=protected-access
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        reduce_func, "tf.contrib.data.reduce_by_window()",
-        input_classes=(ops.Tensor, nested_dataset),
-        input_shapes=(tensor_shape.scalar(), nested_dataset),
-        input_types=(dtypes.int64, nested_dataset),
-        experimental_nested_dataset_support=True)
-    if not isinstance(
-        wrapped_func.output_classes, dataset_ops._NestedDatasetComponent):  # pylint: disable=protected-access
-      raise TypeError("`reduce_func` must return a `Dataset` object.")
-    self._output_classes = wrapped_func.output_classes.output_classes
-    self._output_types = wrapped_func.output_types.output_types
-    self._output_shapes = wrapped_func.output_shapes.output_shapes
-    self._reduce_func = wrapped_func.function
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.group_by_window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.captured_inputs,
-        self._reduce_func.captured_inputs,
-        self._window_size_func.captured_inputs,
-        key_func=self._key_func,
-        reduce_func=self._reduce_func,
-        window_size_func=self._window_size_func,
-        **dataset_ops.flat_structure(self))
-
-
-class Reducer(object):
+class Reducer(grouping.Reducer):
   """A reducer is used for reducing a set of elements.
 
   A reducer is represented as a tuple of the three functions:
@@ -507,58 +152,6 @@ class Reducer(object):
     3) finalization function: state => result
   """
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.Reducer(...)`.")
   def __init__(self, init_func, reduce_func, finalize_func):
-    self._init_func = init_func
-    self._reduce_func = reduce_func
-    self._finalize_func = finalize_func
-
-  @property
-  def init_func(self):
-    return self._init_func
-
-  @property
-  def reduce_func(self):
-    return self._reduce_func
-
-  @property
-  def finalize_func(self):
-    return self._finalize_func
-
-
-class _MapXDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that maps a function over elements in its input."""
-
-  def __init__(self, input_dataset, map_func):
-    """See `map_x_dataset()` for details."""
-    super(_MapXDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-    wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        map_func,
-        "tf.contrib.data.map_x_dataset()",
-        input_dataset,
-        experimental_nested_dataset_support=True)
-    self._output_classes = wrapped_func.output_classes
-    self._output_shapes = wrapped_func.output_shapes
-    self._output_types = wrapped_func.output_types
-    self._map_func = wrapped_func.function
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return gen_dataset_ops.map_dataset(
-        input_t,
-        self._map_func.captured_inputs,
-        f=self._map_func,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
+    super(Reducer, self).__init__(init_func, reduce_func, finalize_func)
diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py
index 1ee9db1aa8..f50da4d429 100644
--- a/tensorflow/contrib/data/python/ops/interleave_ops.py
+++ b/tensorflow/contrib/data/python/ops/interleave_ops.py
@@ -17,20 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib import stateless
-from tensorflow.contrib.data.python.ops import random_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import readers
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops
-from tensorflow.python.ops import math_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.parallel_interleave(...)`.")
 def parallel_interleave(map_func,
                         cycle_length,
                         block_length=1,
@@ -80,12 +72,9 @@ def parallel_interleave(map_func,
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return readers.ParallelInterleaveDataset(
-        dataset, map_func, cycle_length, block_length, sloppy,
-        buffer_output_elements, prefetch_input_elements)
-
-  return _apply_fn
+  return interleave_ops.parallel_interleave(
+      map_func, cycle_length, block_length, sloppy, buffer_output_elements,
+      prefetch_input_elements)
 
 
 @deprecation.deprecated(
@@ -139,63 +128,12 @@ def sloppy_interleave(map_func, cycle_length, block_length=1):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return readers.ParallelInterleaveDataset(
-        dataset,
-        map_func,
-        cycle_length,
-        block_length,
-        sloppy=True,
-        buffer_output_elements=None,
-        prefetch_input_elements=None)
-
-  return _apply_fn
-
-
-class _DirectedInterleaveDataset(dataset_ops.Dataset):
-  """A substitute for `Dataset.interleave()` on a fixed list of datasets."""
-
-  def __init__(self, selector_input, data_inputs):
-    self._selector_input = selector_input
-    self._data_inputs = list(data_inputs)
-
-    for data_input in data_inputs[1:]:
-      if (data_input.output_types != data_inputs[0].output_types or
-          data_input.output_classes != data_inputs[0].output_classes):
-        raise TypeError("All datasets must have the same type and class.")
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return (
-        gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
-            self._selector_input._as_variant_tensor(), [
-                data_input._as_variant_tensor()
-                for data_input in self._data_inputs
-            ], **dataset_ops.flat_structure(self)))
-    # pylint: enable=protected-access
-
-  def _inputs(self):
-    return [self._selector_input] + self._data_inputs
-
-  @property
-  def output_classes(self):
-    return self._data_inputs[0].output_classes
-
-  @property
-  def output_shapes(self):
-    ret = self._data_inputs[0].output_shapes
-    for data_input in self._data_inputs[1:]:
-      ret = nest.pack_sequence_as(ret, [
-          ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip(
-              nest.flatten(ret), nest.flatten(data_input.output_shapes))
-      ])
-    return ret
-
-  @property
-  def output_types(self):
-    return self._data_inputs[0].output_types
+  return interleave_ops.parallel_interleave(
+      map_func, cycle_length, block_length, sloppy=True)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.sample_from_datasets(...)`.")
 def sample_from_datasets(datasets, weights=None, seed=None):
   """Samples elements at random from the datasets in `datasets`.
 
@@ -219,64 +157,11 @@ def sample_from_datasets(datasets, weights=None, seed=None):
     ValueError: If the `weights` argument is specified and does not match the
       length of the `datasets` element.
   """
-  num_datasets = len(datasets)
-  if not isinstance(weights, dataset_ops.Dataset):
-    if weights is None:
-      # Select inputs with uniform probability.
-      logits = [[1.0] * num_datasets]
-
-    else:
-      # Use the given `weights` as the probability of choosing the respective
-      # input.
-      weights = ops.convert_to_tensor(weights, name="weights")
-      if weights.dtype not in (dtypes.float32, dtypes.float64):
-        raise TypeError("`weights` must be convertible to a tensor of "
-                        "`tf.float32` or `tf.float64` elements.")
-      if not weights.shape.is_compatible_with([num_datasets]):
-        raise ValueError(
-            "`weights` must be a vector of length `len(datasets)`.")
-
-      # The `stateless_multinomial()` op expects log-probabilities, as opposed
-      # to weights.
-      logits = array_ops.expand_dims(math_ops.log(weights, name="logits"), 0)
-
-    # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it
-    # is a `Dataset`, it is possible that evaluating it has a side effect the
-    # user depends on.
-    if len(datasets) == 1:
-      return datasets[0]
-
-    def select_dataset_constant_logits(seed):
-      return array_ops.squeeze(
-          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
-
-    selector_input = dataset_ops.MapDataset(
-        random_ops.RandomDataset(seed).batch(2),
-        select_dataset_constant_logits,
-        use_inter_op_parallelism=False)
-
-  else:
-    # Use each element of the given `weights` dataset as the probability of
-    # choosing the respective input.
-
-    # The `stateless_multinomial()` op expects log-probabilities, as opposed to
-    # weights.
-    logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits"))
-
-    def select_dataset_varying_logits(logits, seed):
-      return array_ops.squeeze(
-          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
-
-    logits_and_seeds = dataset_ops.Dataset.zip(
-        (logits_ds, random_ops.RandomDataset(seed).batch(2)))
-    selector_input = dataset_ops.MapDataset(
-        logits_and_seeds,
-        select_dataset_varying_logits,
-        use_inter_op_parallelism=False)
-
-  return _DirectedInterleaveDataset(selector_input, datasets)
+  return interleave_ops.sample_from_datasets(datasets, weights, seed)
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.choose_from_datasets(...)`.")
 def choose_from_datasets(datasets, choice_dataset):
   """Creates a dataset that deterministically chooses elements from `datasets`.
 
@@ -312,10 +197,4 @@ def choose_from_datasets(datasets, choice_dataset):
     TypeError: If the `datasets` or `choice_dataset` arguments have the wrong
       type.
   """
-  if not (choice_dataset.output_types == dtypes.int64
-          and choice_dataset.output_shapes.is_compatible_with(
-              tensor_shape.scalar())
-          and choice_dataset.output_classes == ops.Tensor):
-    raise TypeError("`choice_dataset` must be a dataset of scalar "
-                    "`tf.int64` tensors.")
-  return _DirectedInterleaveDataset(choice_dataset, datasets)
+  return interleave_ops.choose_from_datasets(datasets, choice_dataset)
diff --git a/tensorflow/contrib/data/python/ops/iterator_ops.py b/tensorflow/contrib/data/python/ops/iterator_ops.py
index 18515e21ed..48c325c86f 100644
--- a/tensorflow/contrib/data/python/ops/iterator_ops.py
+++ b/tensorflow/contrib/data/python/ops/iterator_ops.py
@@ -16,15 +16,13 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import session_run_hook
 
+from tensorflow.python.data.experimental.ops import iterator_ops
+from tensorflow.python.util import deprecation
 
+
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_saveable_from_iterator(...)`.")
 def make_saveable_from_iterator(iterator):
   """Returns a SaveableObject for saving/restore iterator state using Saver.
 
@@ -60,27 +58,10 @@ def make_saveable_from_iterator(iterator):
   Note: Not all iterators support checkpointing yet. Attempting to save the
   state of an unsupported iterator will throw an error.
   """
-  return _Saveable(iterator._iterator_resource)  # pylint: disable=protected-access
-
-
-class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject):
-  """SaveableObject for saving/restoring iterator state."""
+  return iterator_ops.make_saveable_from_iterator(iterator)
 
-  def __init__(self, iterator_resource):
-    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
-    specs = [
-        saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "",
-                                            iterator_resource.name + "-state")
-    ]
-    super(_Saveable, self).__init__(iterator_resource, specs,
-                                    iterator_resource.name)
 
-  def restore(self, restored_tensors, unused_restored_shapes):
-    with ops.colocate_with(self.op):
-      return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0])
-
-
-class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
+class CheckpointInputPipelineHook(iterator_ops.CheckpointInputPipelineHook):
   """Checkpoints input pipeline state every N steps or seconds.
 
   This hook saves the state of the iterators in the `Graph` so that when
@@ -125,135 +106,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
   collector when building the eval graph.
   """
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.CheckpointInputPipelineHook(...)`.")
   def __init__(self, estimator):
-    """Initializes a `CheckpointInputPipelineHook`.
-
-    Args:
-      estimator: Estimator.
-
-    Raises:
-      ValueError: One of `save_steps` or `save_secs` should be set.
-      ValueError: At most one of saver or scaffold should be set.
-    """
-    # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or
-    # of the form "input_<task_type>_<task_id>.ckpt" for distributed pipelines.
-    # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is
-    # "model.ckpt". We intentionally choose the input pipeline checkpoint prefix
-    # to be different to avoid conflicts with the model checkpoint.
-
-    # pylint: disable=protected-access
-    checkpoint_prefix = "input"
-    if estimator._config.num_worker_replicas > 1:
-      # Distributed setting.
-      suffix = "_{}_{}".format(estimator._config.task_type,
-                               estimator._config.task_id)
-      checkpoint_prefix += suffix
-    # pylint: enable=protected-access
-
-    # We use a composition paradigm instead of inheriting from
-    # `CheckpointSaverHook` because `Estimator` does an `isinstance` check
-    # to check whether a `CheckpointSaverHook` is already present in the list
-    # of hooks and if not, adds one. Inheriting from `CheckpointSaverHook`
-    # would thwart this behavior. This hook checkpoints *only the iterators*
-    # and not the graph variables.
-    self._checkpoint_saver_hook = basic_session_run_hooks.CheckpointSaverHook(
-        estimator.model_dir,
-        save_secs=estimator._config.save_checkpoints_secs,  # pylint: disable=protected-access
-        save_steps=estimator._config.save_checkpoints_steps,  # pylint: disable=protected-access
-        checkpoint_basename=checkpoint_prefix + ".ckpt")
-
-    # Name for the protocol buffer file that will contain the list of most
-    # recent checkpoints stored as a `CheckpointState` protocol buffer.
-    # This file, kept in the same directory as the checkpoint files, is
-    # automatically managed by the `Saver` to keep track of recent checkpoints.
-    # The default name used by the `Saver` for this file is "checkpoint". Here
-    # we use the name "checkpoint_<checkpoint_prefix>" so that in case the
-    # `checkpoint_dir` is the same as the model checkpoint directory, there are
-    # no conflicts during restore.
-    self._latest_filename = "checkpoint_" + checkpoint_prefix
-    self._first_run = True
-
-  def begin(self):
-    # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS`
-    # collection if no `Saver` or `Scaffold` is provided.
-    # pylint: disable=protected-access
-    if (self._checkpoint_saver_hook._saver is None and
-        self._checkpoint_saver_hook._scaffold is None):
-      iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS)
-      saveables = [_Saveable(i) for i in iterators]
-      self._checkpoint_saver_hook._saver = _CustomSaver(saveables,
-                                                        self._latest_filename)
-    # pylint: enable=protected-access
-    self._checkpoint_saver_hook.begin()
-
-  def _restore_or_save_initial_ckpt(self, session):
-    # Ideally this should be run in after_create_session but is not for the
-    # following reason:
-    # Currently there is no way of enforcing an order of running the
-    # `SessionRunHooks`. Hence it is possible that the `_DatasetInitializerHook`
-    # is run *after* this hook. That is troublesome because
-    # 1. If a checkpoint exists and this hook restores it, the initializer hook
-    #    will override it.
-    # 2. If no checkpoint exists, this hook will try to save an initialized
-    #    iterator which will result in an exception.
-    #
-    # As a temporary fix we enter the following implicit contract between this
-    # hook and the _DatasetInitializerHook.
-    # 1. The _DatasetInitializerHook initializes the iterator in the call to
-    #    after_create_session.
-    # 2. This hook saves the iterator on the first call to `before_run()`, which
-    #    is guaranteed to happen after `after_create_session()` of all hooks
-    #    have been run.
-
-    # Check if there is an existing checkpoint. If so, restore from it.
-    # pylint: disable=protected-access
-    latest_checkpoint_path = checkpoint_management.latest_checkpoint(
-        self._checkpoint_saver_hook._checkpoint_dir,
-        latest_filename=self._latest_filename)
-    if latest_checkpoint_path:
-      self._checkpoint_saver_hook._get_saver().restore(session,
-                                                       latest_checkpoint_path)
-    else:
-      # The checkpoint saved here is the state at step "global_step".
-      # Note: We do not save the GraphDef or MetaGraphDef here.
-      global_step = session.run(self._checkpoint_saver_hook._global_step_tensor)
-      self._checkpoint_saver_hook._save(session, global_step)
-      self._checkpoint_saver_hook._timer.update_last_triggered_step(global_step)
-    # pylint: enable=protected-access
-
-  def before_run(self, run_context):
-    if self._first_run:
-      self._restore_or_save_initial_ckpt(run_context.session)
-      self._first_run = False
-    return self._checkpoint_saver_hook.before_run(run_context)
-
-  def after_run(self, run_context, run_values):
-    self._checkpoint_saver_hook.after_run(run_context, run_values)
-
-  def end(self, session):
-    self._checkpoint_saver_hook.end(session)
-
-
-class _CustomSaver(saver_lib.Saver):
-  """`Saver` with a different default `latest_filename`.
-
-  This is used in the `CheckpointInputPipelineHook` to avoid conflicts with
-  the model ckpt saved by the `CheckpointSaverHook`.
-  """
-
-  def __init__(self, var_list, latest_filename):
-    super(_CustomSaver, self).__init__(var_list)
-    self._latest_filename = latest_filename
-
-  def save(self,
-           sess,
-           save_path,
-           global_step=None,
-           latest_filename=None,
-           meta_graph_suffix="meta",
-           write_meta_graph=True,
-           write_state=True,
-           strip_default_attrs=False):
-    return super(_CustomSaver, self).save(
-        sess, save_path, global_step, latest_filename or self._latest_filename,
-        meta_graph_suffix, write_meta_graph, write_state, strip_default_attrs)
+    super(CheckpointInputPipelineHook, self).__init__(estimator)
diff --git a/tensorflow/contrib/data/python/ops/parsing_ops.py b/tensorflow/contrib/data/python/ops/parsing_ops.py
index cfbba701b0..3aeee9d8e4 100644
--- a/tensorflow/contrib/data/python/ops/parsing_ops.py
+++ b/tensorflow/contrib/data/python/ops/parsing_ops.py
@@ -17,92 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import parsing_ops
+from tensorflow.python.data.experimental.ops import parsing_ops
+from tensorflow.python.util import deprecation
 
 
-class _ParseExampleDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that parses `example` dataset into a `dict` dataset."""
-
-  def __init__(self, input_dataset, features, num_parallel_calls):
-    super(_ParseExampleDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if not all(types == dtypes.string
-               for types in nest.flatten(input_dataset.output_types)):
-      raise TypeError("Input dataset should be a dataset of vectors of strings")
-    self._num_parallel_calls = num_parallel_calls
-    # pylint: disable=protected-access
-    self._features = parsing_ops._prepend_none_dimension(features)
-    # sparse_keys and dense_keys come back sorted here.
-    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
-     dense_shapes) = parsing_ops._features_to_raw_params(
-         self._features, [
-             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
-             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
-         ])
-    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
-    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
-     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
-         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
-         dense_types, dense_shapes)
-    # pylint: enable=protected-access
-    self._sparse_keys = sparse_keys
-    self._sparse_types = sparse_types
-    self._dense_keys = dense_keys
-    self._dense_defaults = dense_defaults_vec
-    self._dense_shapes = dense_shapes
-    self._dense_types = dense_types
-    dense_output_shapes = [
-        self._input_dataset.output_shapes.concatenate(shape)
-        for shape in dense_shape_as_shape
-    ]
-    sparse_output_shapes = [
-        self._input_dataset.output_shapes.concatenate([None])
-        for _ in range(len(sparse_keys))
-    ]
-
-    self._output_shapes = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            dense_output_shapes + sparse_output_shapes))
-    self._output_types = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            self._dense_types + self._sparse_types))
-    self._output_classes = dict(
-        zip(self._dense_keys + self._sparse_keys,
-            [ops.Tensor for _ in range(len(self._dense_defaults))] +
-            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
-            ]))
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.parse_example_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._num_parallel_calls,
-        self._dense_defaults,
-        self._sparse_keys,
-        self._dense_keys,
-        self._sparse_types,
-        self._dense_shapes,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-
-# TODO(b/111553342): add arguments names and example names as well.
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.parse_example_dataset(...)`.")
 def parse_example_dataset(features, num_parallel_calls=1):
   """A transformation that parses `Example` protos into a `dict` of tensors.
 
@@ -130,21 +50,4 @@ def parse_example_dataset(features, num_parallel_calls=1):
   Raises:
     ValueError: if features argument is None.
   """
-  if features is None:
-    raise ValueError("Missing: features was %s." % features)
-
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls)
-    if any([
-        isinstance(feature, parsing_ops.SparseFeature)
-        for _, feature in features.items()
-    ]):
-      # pylint: disable=protected-access
-      # pylint: disable=g-long-lambda
-      out_dataset = out_dataset.map(
-          lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features(
-              features, x), num_parallel_calls=num_parallel_calls)
-    return out_dataset
-
-  return _apply_fn
+  return parsing_ops.parse_example_dataset(features, num_parallel_calls)
diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py
index 46f82e453a..adfb390cd9 100644
--- a/tensorflow/contrib/data/python/ops/prefetching_ops.py
+++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py
@@ -17,321 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import warnings
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.eager import context
-from tensorflow.python.framework import device as framework_device
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gen_dataset_ops
-from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
-from tensorflow.python.ops import resource_variable_ops
-
-
-def function_buffering_resource(string_arg,
-                                target_device,
-                                f,
-                                buffer_size,
-                                output_types,
-                                container="",
-                                shared_name=None,
-                                name=None):
-  """Creates a FunctionBufferingResource.
-
-  A FunctionBufferingResource fills up a buffer by calling a function `f` on
-  `target_device`. `f` should take in only a single string argument as input.
-
-  Args:
-    string_arg: The single string argument to the function.
-    target_device: The device to run `f` on.
-    f: The function to be executed.
-    buffer_size: Size of the buffer to be populated.
-    output_types: The output types generated by the function.
-    container: (Optional) string. Defaults to "".
-    shared_name: (Optional) string.
-    name: (Optional) string to name the op.
-
-  Returns:
-    Handle to a FunctionBufferingResource.
-  """
-  if shared_name is None:
-    shared_name = ""
-  return ged_ops.experimental_function_buffering_resource(
-      string_arg=string_arg,
-      target_device=target_device,
-      shared_name=shared_name,
-      f=f,
-      buffer_size=buffer_size,
-      container=container,
-      name=name,
-      output_types=output_types)
-
-
-def function_buffering_resource_get_next(function_buffer_resource,
-                                         output_types,
-                                         name=None):
-  return ged_ops.experimental_function_buffering_resource_get_next(
-      function_buffer_resource=function_buffer_resource,
-      output_types=output_types,
-      name=name)
-
-
-def function_buffering_resource_reset(function_buffer_resource, name=None):
-  return ged_ops.experimental_function_buffering_resource_reset(
-      function_buffer_resource=function_buffer_resource, name=name)
-
-
-# pylint: disable=protected-access
-class _PrefetchToDeviceIterator(object):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    device: A fully specified device string where we want to prefetch to
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be
-        shared under the given name across multiple sessions that share the
-        same devices (e.g. when using a remote server).
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               one_shot,
-               device,
-               buffer_size,
-               shared_name=None):
-    self._input_dataset = input_dataset
-    self._get_next_call_count = 0
-    self._one_shot = one_shot
-    if shared_name is None:
-      shared_name = ""
-
-    if self._one_shot:
-      self._input_iterator = input_dataset.make_one_shot_iterator()
-    else:
-      self._input_iterator = iterator_ops.Iterator.from_structure(
-          self._input_dataset.output_types, self._input_dataset.output_shapes,
-          shared_name, self._input_dataset.output_classes)
-    input_iterator_handle = self._input_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self._input_iterator.output_types,
-          self._input_iterator.output_shapes,
-          self._input_iterator.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    iterator_device = ged_ops.experimental_iterator_get_device(
-        self._input_iterator._iterator_resource)
-
-    with ops.device(device):
-      self._buffering_resource = function_buffering_resource(
-          f=_prefetch_fn,
-          target_device=iterator_device,
-          string_arg=input_iterator_handle,
-          buffer_size=buffer_size,
-          shared_name=shared_name,
-          output_types=nest.flatten(
-              sparse.as_dense_types(self._input_dataset.output_types,
-                                    self._input_dataset.output_classes)))
-
-    if not self._one_shot:
-      reset_op = function_buffering_resource_reset(self._buffering_resource)
-      with ops.control_dependencies([reset_op]):
-        self._initializer = self._input_iterator.make_initializer(
-            self._input_dataset)
-
-  def get_next(self, name=None):
-    """See `tf.data.Iterator.get_next`."""
-    self._get_next_call_count += 1
-    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
-      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
-
-    flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
-        self._buffering_resource,
-        output_types=nest.flatten(
-            sparse.as_dense_types(self.output_types, self.output_classes)),
-        name=name)
-
-    ret = sparse.deserialize_sparse_tensors(
-        nest.pack_sequence_as(self.output_types, flat_ret),
-        self.output_types, self.output_shapes, self.output_classes)
-
-    for tensor, shape in zip(
-        nest.flatten(ret), nest.flatten(self.output_shapes)):
-      if isinstance(tensor, ops.Tensor):
-        tensor.set_shape(shape)
-
-    return ret
-
-  @property
-  def initializer(self):
-    if self._one_shot:
-      raise NotImplementedError("Can't initialize a one_shot_iterator")
-    return self._initializer
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    device: A fully specified device string where we want to prefetch to
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be
-        shared under the given name across multiple sessions that share the
-        same devices (e.g. when using a remote server).
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               device,
-               buffer_size):
-    with ops.device("/device:CPU:0"):
-      super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset)
-      input_iterator_handle = gen_dataset_ops.iterator_to_string_handle(
-          self._resource)
-
-    self._device = device
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self.output_types, self.output_shapes, self.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    _prefetch_fn.add_to_graph(None)
-
-    with ops.device(device):
-      self._buffering_resource = function_buffering_resource(
-          f=_prefetch_fn,
-          output_types=self._flat_output_types,
-          target_device=ged_ops.experimental_iterator_get_device(
-              self._resource),
-          string_arg=input_iterator_handle,
-          buffer_size=buffer_size,
-          shared_name=iterator_ops._generate_shared_name(
-              "function_buffer_resource"))
-
-  def _next_internal(self):
-    """Returns a nested structure of `tf.Tensor`s containing the next element.
-    """
-    # This runs in sync mode as iterators use an error status to communicate
-    # that there is no more data to iterate over.
-    # TODO(b/77291417): Fix
-    with context.execution_mode(context.SYNC):
-      with ops.device(self._device):
-        ret = ged_ops.experimental_function_buffering_resource_get_next(
-            function_buffer_resource=self._buffering_resource,
-            output_types=self._flat_output_types)
-      return sparse.deserialize_sparse_tensors(
-          nest.pack_sequence_as(self._output_types, ret), self._output_types,
-          self._output_shapes, self._output_classes)
-# pylint: enable=protected-access
-
-
-class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` whose iterator prefetches elements to another device."""
-
-  def __init__(self, input_dataset, device, buffer_size):
-    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._device = device
-    self._buffer_size = buffer_size if buffer_size is not None else 1
-
-  # The static analysis cannot tell that the eager iterator's superclass has
-  # a `next()` method.
-  # pylint: disable=non-iterator-returned
-  def __iter__(self):
-    """Creates an `Iterator` for enumerating the elements of this dataset.
-
-    The returned iterator implements the Python iterator protocol and therefore
-    can only be used in eager mode.
-
-    Returns:
-      An `Iterator` over the elements of this dataset.
-
-    Raises:
-      RuntimeError: If eager execution is enabled.
-    """
-    if context.executing_eagerly():
-      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
-                                            self._buffer_size)
-    else:
-      raise RuntimeError("dataset.__iter__() is only supported when eager "
-                         "execution is enabled.")
-  # pylint: enable=non-iterator-returned
-
-  def make_one_shot_iterator(self):
-    if context.executing_eagerly():
-      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
-                                            self._buffer_size)
-    else:
-      return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True,
-                                       device=self._device,
-                                       buffer_size=self._buffer_size)
-
-  def make_initializable_iterator(self, shared_name=None):
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=False,
-        device=self._device,
-        buffer_size=self._buffer_size,
-        shared_name=shared_name)
-
-  def _as_variant_tensor(self):
-    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
-    # transformation methods is called.
-    # TODO(mrry): Investigate support for chaining further transformations after
-    # the prefetch, including GPU support.
-    raise NotImplementedError("`prefetch_to_device()` must be the last "
-                              "transformation in a dataset pipeline.")
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.prefetch_to_device(...)`.")
 def prefetch_to_device(device, buffer_size=None):
   """A transformation that prefetches dataset values to the given `device`.
 
@@ -347,12 +38,10 @@ def prefetch_to_device(device, buffer_size=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return _PrefetchToDeviceDataset(dataset, device, buffer_size)
-
-  return _apply_fn
+  return prefetching_ops.prefetch_to_device(device, buffer_size)
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.copy_to_device(...)`.")
 def copy_to_device(target_device, source_device="/cpu:0"):
   """A transformation that copies dataset elements to the given `target_device`.
 
@@ -364,165 +53,4 @@ def copy_to_device(target_device, source_device="/cpu:0"):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _CopyToDeviceDataset(
-        dataset, target_device=target_device, source_device=source_device)
-
-  return _apply_fn
-
-
-# TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate
-# all inputs to the Op are in host memory, thereby avoiding some unnecessary
-# Sends and Recvs.
-class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that copies elements to another device."""
-
-  def __init__(self, input_dataset, target_device, source_device="/cpu:0"):
-    """Constructs a _CopyToDeviceDataset.
-
-    Args:
-      input_dataset: `Dataset` to be copied
-      target_device: The name of the device to which elements would be copied.
-      source_device: Device where input_dataset would be placed.
-    """
-    super(_CopyToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._target_device = target_device
-    spec = framework_device.DeviceSpec().from_string(self._target_device)
-    self._is_gpu_target = (spec.device_type == "GPU")
-    self._source_device_string = source_device
-    self._source_device = ops.convert_to_tensor(source_device)
-
-    self._flat_output_shapes = nest.flatten(
-        sparse.as_dense_shapes(self._input_dataset.output_shapes,
-                               self._input_dataset.output_classes))
-    self._flat_output_types = nest.flatten(
-        sparse.as_dense_types(self._input_dataset.output_types,
-                              self._input_dataset.output_classes))
-
-    @function.Defun()
-    def _init_func():
-      """Creates an iterator for the input dataset.
-
-      Returns:
-        A `string` tensor that encapsulates the iterator created.
-      """
-      # pylint: disable=protected-access
-      ds_variant = self._input_dataset._as_variant_tensor()
-      resource = gen_dataset_ops.anonymous_iterator(
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-      with ops.control_dependencies(
-          [gen_dataset_ops.make_iterator(ds_variant, resource)]):
-        return gen_dataset_ops.iterator_to_string_handle(resource)
-
-    @function.Defun()
-    def _remote_init_func():
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=_init_func.captured_inputs,
-          Tout=[dtypes.string],
-          f=_init_func)
-
-    self._init_func = _remote_init_func
-    self._init_captured_args = _remote_init_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _next_func(string_handle):
-      """Calls get_next for created iterator.
-
-      Args:
-        string_handle: An iterator string handle created by _init_func
-      Returns:
-        The elements generated from `input_dataset`
-      """
-      with ops.device(self._source_device_string):
-        iterator = iterator_ops.Iterator.from_string_handle(
-            string_handle, self.output_types, self.output_shapes,
-            self.output_classes)
-      ret = iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    @function.Defun(dtypes.string)
-    def _remote_next_func(string_handle):
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=[string_handle] + _next_func.captured_inputs,
-          Tout=self._flat_output_types,
-          f=_next_func)
-
-    self._next_func = _remote_next_func
-    self._next_captured_args = _remote_next_func.captured_inputs
-
-    @function.Defun(dtypes.string)
-    def _finalize_func(string_handle):
-      """Destroys the iterator resource created.
-
-      Args:
-        string_handle: An iterator string handle created by _init_func
-      Returns:
-        Tensor constant 0
-      """
-      iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
-          string_handle,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-      with ops.control_dependencies([
-          resource_variable_ops.destroy_resource_op(
-              iterator_resource, ignore_lookup_error=True)]):
-        return array_ops.constant(0, dtypes.int64)
-
-    @function.Defun(dtypes.string)
-    def _remote_finalize_func(string_handle):
-      return functional_ops.remote_call(
-          target=self._source_device,
-          args=[string_handle] + _finalize_func.captured_inputs,
-          Tout=[dtypes.int64],
-          f=_finalize_func)
-
-    self._finalize_func = _remote_finalize_func
-    self._finalize_captured_args = _remote_finalize_func.captured_inputs
-
-    g = ops.get_default_graph()
-    _remote_init_func.add_to_graph(g)
-    _remote_next_func.add_to_graph(g)
-    _remote_finalize_func.add_to_graph(g)
-    # pylint: enable=protected-scope
-
-  # The one_shot_iterator implementation needs a 0 arg _make_dataset function
-  # that thereby captures all the inputs required to create the dataset. Since
-  # there are strings that are inputs to the GeneratorDataset which can't be
-  # placed on a GPU, this fails for the GPU case. Therefore, disabling it for
-  # GPU
-  def make_one_shot_iterator(self):
-    if self._is_gpu_target:
-      raise ValueError("Cannot create a one shot iterator when using "
-                       "`tf.contrib.data.copy_to_device()` on GPU. Please use "
-                       "`Dataset.make_initializable_iterator()` instead.")
-    else:
-      return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
-
-  def _as_variant_tensor(self):
-    with ops.device(self._target_device):
-      return gen_dataset_ops.generator_dataset(
-          self._init_captured_args,
-          self._next_captured_args,
-          self._finalize_captured_args,
-          init_func=self._init_func,
-          next_func=self._next_func,
-          finalize_func=self._finalize_func,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
+  return prefetching_ops.copy_to_device(target_device, source_device)
diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py
index 344a0763c8..2c95125636 100644
--- a/tensorflow/contrib/data/python/ops/random_ops.py
+++ b/tensorflow/contrib/data/python/ops/random_ops.py
@@ -17,36 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import random_seed
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.util import deprecation
 
 
-class RandomDataset(dataset_ops.DatasetSource):
+class RandomDataset(random_ops.RandomDataset):
   """A `Dataset` of pseudorandom values."""
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.RandomDataset(...)`.")
   def __init__(self, seed=None):
-    """A `Dataset` of pseudorandom values."""
-    super(RandomDataset, self).__init__()
-    self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.random_dataset(
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return ops.Tensor
-
-  @property
-  def output_shapes(self):
-    return tensor_shape.scalar()
-
-  @property
-  def output_types(self):
-    return dtypes.int64
+    super(RandomDataset, self).__init__(seed)
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 360971e200..4601376dff 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -17,295 +17,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import csv
-
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
-from tensorflow.contrib.data.python.ops import optimization
-from tensorflow.contrib.data.python.ops import parsing_ops
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
-from tensorflow.python.data.util import convert
 from tensorflow.python.data.util import nest
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
-from tensorflow.python.platform import gfile
 from tensorflow.python.util import deprecation
 
-_ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32,
-                         dtypes.int64, dtypes.string)
-
-
-def _is_valid_int32(str_val):
-  try:
-    # Checks equality to prevent int32 overflow
-    return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype(
-        str_val)
-  except (ValueError, OverflowError):
-    return False
-
-
-def _is_valid_int64(str_val):
-  try:
-    dtypes.int64.as_numpy_dtype(str_val)
-    return True
-  except (ValueError, OverflowError):
-    return False
-
-
-def _is_valid_float(str_val, float_dtype):
-  try:
-    return float_dtype.as_numpy_dtype(str_val) < np.inf
-  except ValueError:
-    return False
-
-
-def _infer_type(str_val, na_value, prev_type):
-  """Given a string, infers its tensor type.
-
-  Infers the type of a value by picking the least 'permissive' type possible,
-  while still allowing the previous type inference for this column to be valid.
-
-  Args:
-    str_val: String value to infer the type of.
-    na_value: Additional string to recognize as a NA/NaN CSV value.
-    prev_type: Type previously inferred based on values of this column that
-      we've seen up till now.
-  Returns:
-    Inferred dtype.
-  """
-  if str_val in ("", na_value):
-    # If the field is null, it gives no extra information about its type
-    return prev_type
-
-  type_list = [
-      dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string
-  ]  # list of types to try, ordered from least permissive to most
-
-  type_functions = [
-      _is_valid_int32,
-      _is_valid_int64,
-      lambda str_val: _is_valid_float(str_val, dtypes.float32),
-      lambda str_val: _is_valid_float(str_val, dtypes.float64),
-      lambda str_val: True,
-  ]  # Corresponding list of validation functions
-
-  for i in range(len(type_list)):
-    validation_fn = type_functions[i]
-    if validation_fn(str_val) and (prev_type is None or
-                                   prev_type in type_list[:i + 1]):
-      return type_list[i]
-
-
-def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header):
-  """Generator that yields rows of CSV file(s) in order."""
-  for fn in filenames:
-    with file_io.FileIO(fn, "r") as f:
-      rdr = csv.reader(
-          f,
-          delimiter=field_delim,
-          quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE)
-      if header:
-        next(rdr)  # Skip header lines
-
-      for csv_row in rdr:
-        if len(csv_row) != num_cols:
-          raise ValueError(
-              "Problem inferring types: CSV row has different number of fields "
-              "than expected.")
-        yield csv_row
-
-
-def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim,
-                           na_value, header, num_rows_for_inference,
-                           select_columns):
-  """Infers column types from the first N valid CSV records of files."""
-  if select_columns is None:
-    select_columns = range(num_cols)
-  inferred_types = [None] * len(select_columns)
-
-  for i, csv_row in enumerate(
-      _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header)):
-    if num_rows_for_inference is not None and i >= num_rows_for_inference:
-      break
-
-    for j, col_index in enumerate(select_columns):
-      inferred_types[j] = _infer_type(csv_row[col_index], na_value,
-                                      inferred_types[j])
-
-  # Replace None's with a default type
-  inferred_types = [t or dtypes.string for t in inferred_types]
-  # Default to 0 or '' for null values
-  return [
-      constant_op.constant([0 if t is not dtypes.string else ""], dtype=t)
-      for t in inferred_types
-  ]
-
-
-def _infer_column_names(filenames, field_delim, use_quote_delim):
-  """Infers column names from first rows of files."""
-  csv_kwargs = {
-      "delimiter": field_delim,
-      "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE
-  }
-  with file_io.FileIO(filenames[0], "r") as f:
-    try:
-      column_names = next(csv.reader(f, **csv_kwargs))
-    except StopIteration:
-      raise ValueError(("Received StopIteration when reading the header line "
-                        "of %s.  Empty file?") % filenames[0])
-
-  for name in filenames[1:]:
-    with file_io.FileIO(name, "r") as f:
-      try:
-        if next(csv.reader(f, **csv_kwargs)) != column_names:
-          raise ValueError(
-              "Files have different column names in the header row.")
-      except StopIteration:
-        raise ValueError(("Received StopIteration when reading the header line "
-                          "of %s.  Empty file?") % filenames[0])
-  return column_names
-
-
-def _get_sorted_col_indices(select_columns, column_names):
-  """Transforms select_columns argument into sorted column indices."""
-  names_to_indices = {n: i for i, n in enumerate(column_names)}
-  num_cols = len(column_names)
-  for i, v in enumerate(select_columns):
-    if isinstance(v, int):
-      if v < 0 or v >= num_cols:
-        raise ValueError(
-            "Column index %d specified in select_columns out of valid range." %
-            v)
-      continue
-    if v not in names_to_indices:
-      raise ValueError(
-          "Value '%s' specified in select_columns not a valid column index or "
-          "name." % v)
-    select_columns[i] = names_to_indices[v]
-
-  # Sort and ensure there are no duplicates
-  result = sorted(set(select_columns))
-  if len(result) != len(select_columns):
-    raise ValueError("select_columns contains duplicate columns")
-  return result
-
-
-def _maybe_shuffle_and_repeat(
-    dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed):
-  """Optionally shuffle and repeat dataset, as requested."""
-  if num_epochs != 1 and shuffle:
-    # Use shuffle_and_repeat for perf
-    return dataset.apply(
-        shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
-                                       shuffle_seed))
-  elif shuffle:
-    return dataset.shuffle(shuffle_buffer_size, shuffle_seed)
-  elif num_epochs != 1:
-    return dataset.repeat(num_epochs)
-  return dataset
-
-
-def make_tf_record_dataset(file_pattern,
-                           batch_size,
-                           parser_fn=None,
-                           num_epochs=None,
-                           shuffle=True,
-                           shuffle_buffer_size=None,
-                           shuffle_seed=None,
-                           prefetch_buffer_size=optimization.AUTOTUNE,
-                           num_parallel_reads=None,
-                           num_parallel_parser_calls=None,
-                           drop_final_batch=False):
-  """Reads and optionally parses TFRecord files into a dataset.
-
-  Provides common functionality such as batching, optional parsing, shuffling,
-  and performant defaults.
-
-  Args:
-    file_pattern: List of files or patterns of TFRecord file paths.
-      See `tf.gfile.Glob` for pattern rules.
-    batch_size: An int representing the number of records to combine
-      in a single batch.
-    parser_fn: (Optional.) A function accepting string input to parse
-      and process the record contents. This function must map records
-      to components of a fixed shape, so they may be batched. By
-      default, uses the record contents unmodified.
-    num_epochs: (Optional.) An int specifying the number of times this
-      dataset is repeated.  If None (the default), cycles through the
-      dataset forever.
-    shuffle: (Optional.) A bool that indicates whether the input
-      should be shuffled. Defaults to `True`.
-    shuffle_buffer_size: (Optional.) Buffer size to use for
-      shuffling. A large buffer size ensures better shuffling, but
-      increases memory usage and startup time.
-    shuffle_seed: (Optional.) Randomization seed to use for shuffling.
-    prefetch_buffer_size: (Optional.) An int specifying the number of
-      feature batches to prefetch for performance improvement.
-      Defaults to auto-tune. Set to 0 to disable prefetching.
-    num_parallel_reads: (Optional.) Number of threads used to read
-      records from files. By default or if set to a value >1, the
-      results will be interleaved.
-    num_parallel_parser_calls: (Optional.) Number of parallel
-      records to parse in parallel. Defaults to an automatic selection.
-    drop_final_batch: (Optional.) Whether the last batch should be
-      dropped in case its size is smaller than `batch_size`; the
-      default behavior is not to drop the smaller batch.
-
-  Returns:
-    A dataset, where each element matches the output of `parser_fn`
-    except it will have an additional leading `batch-size` dimension,
-    or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
-    unspecified.
-  """
-  files = dataset_ops.Dataset.list_files(
-      file_pattern, shuffle=shuffle, seed=shuffle_seed)
-
-  if num_parallel_reads is None:
-    # Note: We considered auto-tuning this value, but there is a concern
-    # that this affects the mixing of records from different files, which
-    # could affect training convergence/accuracy, so we are defaulting to
-    # a constant for now.
-    num_parallel_reads = 24
-  dataset = core_readers.TFRecordDataset(
-      files, num_parallel_reads=num_parallel_reads)
-
-  if shuffle_buffer_size is None:
-    # TODO(josh11b): Auto-tune this value when not specified
-    shuffle_buffer_size = 10000
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # NOTE(mrry): We set `drop_final_batch=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  drop_final_batch = drop_final_batch or num_epochs is None
-
-  if parser_fn is None:
-    dataset = dataset.batch(batch_size, drop_remainder=drop_final_batch)
-  else:
-    # TODO(josh11b): if num_parallel_parser_calls is None, use some function
-    # of num cores instead of map_and_batch's default behavior of one batch.
-    dataset = dataset.apply(batching.map_and_batch(
-        parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
-        drop_remainder=drop_final_batch))
-
-  if prefetch_buffer_size == 0:
-    return dataset
-  else:
-    return dataset.prefetch(buffer_size=prefetch_buffer_size)
-
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.make_csv_dataset(...)`.")
 def make_csv_dataset(
     file_pattern,
     batch_size,
@@ -387,7 +112,6 @@ def make_csv_dataset(
     prefetch_buffer_size: An int specifying the number of feature
       batches to prefetch for performance improvement. Recommended value is the
       number of batches consumed per training step. Defaults to auto-tune.
-
     num_parallel_reads: Number of threads used to read CSV records from files.
       If >1, the results will be interleaved.
     sloppy: If `True`, reading performance will be improved at
@@ -411,106 +135,18 @@ def make_csv_dataset(
   Raises:
     ValueError: If any of the arguments is malformed.
   """
-  # Create dataset of all matching filenames
-  filenames = _get_file_names(file_pattern, False)
-  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
-  if shuffle:
-    dataset = dataset.shuffle(len(filenames), shuffle_seed)
-
-  # Clean arguments; figure out column names and defaults
+  return readers.make_csv_dataset(
+      file_pattern, batch_size, column_names, column_defaults, label_name,
+      select_columns, field_delim, use_quote_delim, na_value, header,
+      num_epochs, shuffle, shuffle_buffer_size, shuffle_seed,
+      prefetch_buffer_size, num_parallel_reads, sloppy, num_rows_for_inference,
+      compression_type)
 
-  if column_names is None:
-    if not header:
-      raise ValueError("Cannot infer column names without a header line.")
-    # If column names are not provided, infer from the header lines
-    column_names = _infer_column_names(filenames, field_delim, use_quote_delim)
-  if len(column_names) != len(set(column_names)):
-    raise ValueError("Cannot have duplicate column names.")
 
-  if select_columns is not None:
-    select_columns = _get_sorted_col_indices(select_columns, column_names)
-
-  if column_defaults is not None:
-    column_defaults = [
-        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
-        for x in column_defaults
-    ]
-  else:
-    # If column defaults are not provided, infer from records at graph
-    # construction time
-    column_defaults = _infer_column_defaults(
-        filenames, len(column_names), field_delim, use_quote_delim, na_value,
-        header, num_rows_for_inference, select_columns)
-
-  if select_columns is not None and len(column_defaults) != len(select_columns):
-    raise ValueError(
-        "If specified, column_defaults and select_columns must have same "
-        "length."
-    )
-  if select_columns is not None and len(column_names) > len(select_columns):
-    # Pick the relevant subset of column names
-    column_names = [column_names[i] for i in select_columns]
-
-  if label_name is not None and label_name not in column_names:
-    raise ValueError("`label_name` provided must be one of the columns.")
-
-  def filename_to_dataset(filename):
-    return CsvDataset(
-        filename,
-        record_defaults=column_defaults,
-        field_delim=field_delim,
-        use_quote_delim=use_quote_delim,
-        na_value=na_value,
-        select_cols=select_columns,
-        header=header,
-        compression_type=compression_type,
-    )
-
-  def map_fn(*columns):
-    """Organizes columns into a features dictionary.
-
-    Args:
-      *columns: list of `Tensor`s corresponding to one csv record.
-    Returns:
-      An OrderedDict of feature names to values for that particular record. If
-      label_name is provided, extracts the label feature to be returned as the
-      second element of the tuple.
-    """
-    features = collections.OrderedDict(zip(column_names, columns))
-    if label_name is not None:
-      label = features.pop(label_name)
-      return features, label
-    return features
-
-  # Read files sequentially (if num_parallel_reads=1) or in parallel
-  dataset = dataset.apply(
-      interleave_ops.parallel_interleave(
-          filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy))
-
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # Apply batch before map for perf, because map has high overhead relative
-  # to the size of the computation in each map.
-  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  dataset = dataset.batch(batch_size=batch_size,
-                          drop_remainder=num_epochs is None)
-  dataset = dataset_ops.MapDataset(
-      dataset, map_fn, use_inter_op_parallelism=False)
-  dataset = dataset.prefetch(prefetch_buffer_size)
-
-  return dataset
-
-
-_DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024  # 4 MB
-
-
-class CsvDataset(dataset_ops.DatasetSource):
+class CsvDataset(readers.CsvDataset):
   """A Dataset comprising lines from one or more CSV files."""
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.CsvDataset(...)`.")
   def __init__(self,
                filenames,
                record_defaults,
@@ -521,140 +157,13 @@ class CsvDataset(dataset_ops.DatasetSource):
                use_quote_delim=True,
                na_value="",
                select_cols=None):
-    """Creates a `CsvDataset` by reading and decoding CSV files.
-
-    The elements of this dataset correspond to records from the file(s).
-    RFC 4180 format is expected for CSV files
-    (https://tools.ietf.org/html/rfc4180)
-    Note that we allow leading and trailing spaces with int or float field.
-
-
-    For example, suppose we have a file 'my_file0.csv' with four CSV columns of
-    different data types:
-    ```
-    abcdefg,4.28E10,5.55E6,12
-    hijklmn,-5.3E14,,2
-    ```
-
-    We can construct a CsvDataset from it as follows:
-    ```python
-    dataset = tf.contrib.data.CsvDataset(
-      "my_file*.csv",
-      [tf.float32,  # Required field, use dtype or empty tensor
-       tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
-       tf.int32,  # Required field, use dtype or empty tensor
-       ],
-      select_cols=[1,2,3]  # Only parse last three columns
-    )
-    ```
-
-    The expected output of its iterations is:
-    ```python
-    next_element = dataset.make_one_shot_iterator().get_next()
-    with tf.Session() as sess:
-      while True:
-        try:
-          print(sess.run(next_element))
-        except tf.errors.OutOfRangeError:
-          break
-
-    >> (4.28e10, 5.55e6, 12)
-    >> (-5.3e14, 0.0, 2)
-    ```
-
-    Args:
-      filenames: A `tf.string` tensor containing one or more filenames.
-      record_defaults: A list of default values for the CSV fields. Each item in
-        the list is either a valid CSV `DType` (float32, float64, int32, int64,
-        string), or a `Tensor` object with one of the above types. One per
-        column of CSV data, with either a scalar `Tensor` default value for the
-        column if it is optional, or `DType` or empty `Tensor` if required. If
-        both this and `select_columns` are specified, these must have the same
-        lengths, and `column_defaults` is assumed to be sorted in order of
-        increasing column index.
-      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
-        `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no
-        compression.
-      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
-        to buffer while reading files. Defaults to 4MB.
-      header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s)
-        have header line(s) that should be skipped when parsing. Defaults to
-        `False`.
-      field_delim: (Optional.) A `tf.string` scalar containing the delimiter
-        character that separates fields in a record. Defaults to `","`.
-      use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats
-        double quotation marks as regular characters inside of string fields
-        (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`.
-      na_value: (Optional.) A `tf.string` scalar indicating a value that will
-        be treated as NA/NaN.
-      select_cols: (Optional.) A sorted list of column indices to select from
-        the input data. If specified, only this subset of columns will be
-        parsed. Defaults to parsing all columns.
-    """
-    super(CsvDataset, self).__init__()
-    self._filenames = ops.convert_to_tensor(
-        filenames, dtype=dtypes.string, name="filenames")
-    self._compression_type = convert.optional_param_to_tensor(
-        "compression_type",
-        compression_type,
-        argument_default="",
-        argument_dtype=dtypes.string)
-    record_defaults = [
-        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
-        for x in record_defaults
-    ]
-    self._record_defaults = ops.convert_n_to_tensor(
-        record_defaults, name="record_defaults")
-    self._buffer_size = convert.optional_param_to_tensor(
-        "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
-    self._header = ops.convert_to_tensor(
-        header, dtype=dtypes.bool, name="header")
-    self._field_delim = ops.convert_to_tensor(
-        field_delim, dtype=dtypes.string, name="field_delim")
-    self._use_quote_delim = ops.convert_to_tensor(
-        use_quote_delim, dtype=dtypes.bool, name="use_quote_delim")
-    self._na_value = ops.convert_to_tensor(
-        na_value, dtype=dtypes.string, name="na_value")
-    self._select_cols = convert.optional_param_to_tensor(
-        "select_cols",
-        select_cols,
-        argument_default=[],
-        argument_dtype=dtypes.int64,
-    )
-    self._output_shapes = tuple(
-        tensor_shape.scalar() for _ in range(len(record_defaults)))
-    self._output_types = tuple(d.dtype for d in self._record_defaults)
-    self._output_classes = tuple(
-        ops.Tensor for _ in range(len(record_defaults)))
-
-  def _as_variant_tensor(self):
-    # Constructs graph node for the dataset op.
-    return gen_experimental_dataset_ops.experimental_csv_dataset(
-        filenames=self._filenames,
-        record_defaults=self._record_defaults,
-        buffer_size=self._buffer_size,
-        header=self._header,
-        output_shapes=self._output_shapes,
-        field_delim=self._field_delim,
-        use_quote_delim=self._use_quote_delim,
-        na_value=self._na_value,
-        select_cols=self._select_cols,
-        compression_type=self._compression_type,
-    )
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_classes(self):
-    return self._output_classes
+    super(CsvDataset, self).__init__(
+        filenames, record_defaults, compression_type, buffer_size, header,
+        field_delim, use_quote_delim, na_value, select_cols)
 
 
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_batched_features_dataset(...)`.")
 def make_batched_features_dataset(file_pattern,
                                   batch_size,
                                   features,
@@ -759,57 +268,15 @@ def make_batched_features_dataset(file_pattern,
   Raises:
     ValueError: If `label_key` is not one of the `features` keys.
   """
-  # Create dataset of all matching filenames
-  filenames = _get_file_names(file_pattern, False)
-  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
-  if shuffle:
-    dataset = dataset.shuffle(len(filenames), shuffle_seed)
-
-  # Read `Example` records from files as tensor objects.
-  if reader_args is None:
-    reader_args = []
+  return readers.make_batched_features_dataset(
+      file_pattern, batch_size, features, reader, label_key, reader_args,
+      num_epochs, shuffle, shuffle_buffer_size, shuffle_seed,
+      prefetch_buffer_size, reader_num_threads, parser_num_threads,
+      sloppy_ordering, drop_final_batch)
 
-  # Read files sequentially (if reader_num_threads=1) or in parallel
-  dataset = dataset.apply(
-      interleave_ops.parallel_interleave(
-          lambda filename: reader(filename, *reader_args),
-          cycle_length=reader_num_threads,
-          sloppy=sloppy_ordering))
 
-  # Extract values if the `Example` tensors are stored as key-value tuples.
-  if dataset.output_types == (dtypes.string, dtypes.string):
-    dataset = dataset_ops.MapDataset(
-        dataset, lambda _, v: v, use_inter_op_parallelism=False)
-
-  # Apply dataset repeat and shuffle transformations.
-  dataset = _maybe_shuffle_and_repeat(
-      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
-
-  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
-  # improve the shape inference, because it makes the batch dimension static.
-  # It is safe to do this because in that case we are repeating the input
-  # indefinitely, and all batches will be full-sized.
-  dataset = dataset.batch(
-      batch_size, drop_remainder=drop_final_batch or num_epochs is None)
-
-  # Parse `Example` tensors to a dictionary of `Feature` tensors.
-  dataset = dataset.apply(
-      parsing_ops.parse_example_dataset(
-          features, num_parallel_calls=parser_num_threads))
-
-  if label_key:
-    if label_key not in features:
-      raise ValueError(
-          "The `label_key` provided (%r) must be one of the `features` keys." %
-          label_key)
-    dataset = dataset.map(lambda x: (x, x.pop(label_key)))
-
-  dataset = dataset.prefetch(prefetch_buffer_size)
-  return dataset
-
-
-@deprecation.deprecated(None,
-                        "Use `tf.contrib.data.make_batched_features_dataset`")
+@deprecation.deprecated(
+    None, "Use `tf.data.experimental.make_batched_features_dataset(...)`")
 def read_batch_features(file_pattern,
                         batch_size,
                         features,
@@ -879,7 +346,7 @@ def read_batch_features(file_pattern,
   Returns:
     A dict from keys in features to `Tensor` or `SparseTensor` objects.
   """
-  dataset = make_batched_features_dataset(
+  dataset = readers.make_batched_features_dataset(
       file_pattern,
       batch_size,
       features,
@@ -893,96 +360,13 @@ def read_batch_features(file_pattern,
   return outputs
 
 
-def _get_file_names(file_pattern, shuffle):
-  """Parse list of file names from pattern, optionally shuffled.
-
-  Args:
-    file_pattern: File glob pattern, or list of glob patterns.
-    shuffle: Whether to shuffle the order of file names.
-
-  Returns:
-    List of file names matching `file_pattern`.
-
-  Raises:
-    ValueError: If `file_pattern` is empty, or pattern matches no files.
-  """
-  if isinstance(file_pattern, list):
-    if not file_pattern:
-      raise ValueError("File pattern is empty.")
-    file_names = []
-    for entry in file_pattern:
-      file_names.extend(gfile.Glob(entry))
-  else:
-    file_names = list(gfile.Glob(file_pattern))
-
-  if not file_names:
-    raise ValueError("No files match %s." % file_pattern)
-
-  # Sort files so it will be deterministic for unit tests.
-  if not shuffle:
-    file_names = sorted(file_names)
-  return file_names
-
-
-class SqlDataset(dataset_ops.DatasetSource):
+class SqlDataset(readers.SqlDataset):
   """A `Dataset` consisting of the results from a SQL query."""
 
+  @deprecation.deprecated(None, "Use `tf.data.experimental.SqlDataset(...)`.")
   def __init__(self, driver_name, data_source_name, query, output_types):
-    """Creates a `SqlDataset`.
-
-    `SqlDataset` allows a user to read data from the result set of a SQL query.
-    For example:
-
-    ```python
-    dataset = tf.contrib.data.SqlDataset("sqlite", "/foo/bar.sqlite3",
-                                         "SELECT name, age FROM people",
-                                         (tf.string, tf.int32))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-    # Prints the rows of the result set of the above query.
-    while True:
-      try:
-        print(sess.run(next_element))
-      except tf.errors.OutOfRangeError:
-        break
-    ```
-
-    Args:
-      driver_name: A 0-D `tf.string` tensor containing the database type.
-        Currently, the only supported value is 'sqlite'.
-      data_source_name: A 0-D `tf.string` tensor containing a connection string
-        to connect to the database.
-      query: A 0-D `tf.string` tensor containing the SQL query to execute.
-      output_types: A tuple of `tf.DType` objects representing the types of the
-        columns returned by `query`.
-    """
-    super(SqlDataset, self).__init__()
-    self._driver_name = ops.convert_to_tensor(
-        driver_name, dtype=dtypes.string, name="driver_name")
-    self._data_source_name = ops.convert_to_tensor(
-        data_source_name, dtype=dtypes.string, name="data_source_name")
-    self._query = ops.convert_to_tensor(
-        query, dtype=dtypes.string, name="query")
-    self._output_types = output_types
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.sql_dataset(self._driver_name,
-                                       self._data_source_name, self._query,
-                                       nest.flatten(self.output_types),
-                                       nest.flatten(self.output_shapes))
-
-  @property
-  def output_classes(self):
-    return nest.map_structure(lambda _: ops.Tensor, self._output_types)
-
-  @property
-  def output_shapes(self):
-    return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
-                              self._output_types)
-
-  @property
-  def output_types(self):
-    return self._output_types
+    super(SqlDataset, self).__init__(
+        driver_name, data_source_name, query, output_types)
 
 
 class LMDBDataset(dataset_ops.DatasetSource):
diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py
index 75642f143e..29d77528d9 100644
--- a/tensorflow/contrib/data/python/ops/resampling.py
+++ b/tensorflow/contrib/data/python/ops/resampling.py
@@ -17,22 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
-from tensorflow.contrib.data.python.ops import scan_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import logging_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.data.experimental.ops import resampling
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.rejection_resample(...)`.")
 def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
   """A transformation that resamples a dataset to achieve a target distribution.
 
@@ -52,243 +42,5 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    """Function from `Dataset` to `Dataset` that applies the transformation."""
-    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
-    class_values_ds = dataset.map(class_func)
-
-    # Get initial distribution.
-    if initial_dist is not None:
-      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
-      acceptance_dist, prob_of_original = (
-          _calculate_acceptance_probs_with_mixing(initial_dist_t,
-                                                  target_dist_t))
-      initial_dist_ds = dataset_ops.Dataset.from_tensors(
-          initial_dist_t).repeat()
-      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
-          acceptance_dist).repeat()
-      prob_of_original_ds = dataset_ops.Dataset.from_tensors(
-          prob_of_original).repeat()
-    else:
-      initial_dist_ds = _estimate_initial_dist_ds(
-          target_dist_t, class_values_ds)
-      acceptance_and_original_prob_ds = initial_dist_ds.map(
-          lambda initial: _calculate_acceptance_probs_with_mixing(
-              initial, target_dist_t))
-      acceptance_dist_ds = acceptance_and_original_prob_ds.map(
-          lambda accept_prob, _: accept_prob)
-      prob_of_original_ds = acceptance_and_original_prob_ds.map(
-          lambda _, prob_original: prob_original)
-    filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds,
-                             class_values_ds, seed)
-    # Prefetch filtered dataset for speed.
-    filtered_ds = filtered_ds.prefetch(3)
-
-    prob_original_static = _get_prob_original_static(
-        initial_dist_t, target_dist_t) if initial_dist is not None else None
-    if prob_original_static == 1:
-      return dataset_ops.Dataset.zip((class_values_ds, dataset))
-    elif prob_original_static == 0:
-      return filtered_ds
-    else:
-      return interleave_ops.sample_from_datasets(
-          [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds],
-          weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]),
-          seed=seed)
-
-  return _apply_fn
-
-
-def _get_prob_original_static(initial_dist_t, target_dist_t):
-  """Returns the static probability of sampling from the original.
-
-  `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters
-  an Op that it isn't defined for. We have some custom logic to avoid this.
-
-  Args:
-    initial_dist_t: A tensor of the initial distribution.
-    target_dist_t: A tensor of the target distribution.
-
-  Returns:
-    The probability of sampling from the original distribution as a constant,
-    if it is a constant, or `None`.
-  """
-  init_static = tensor_util.constant_value(initial_dist_t)
-  target_static = tensor_util.constant_value(target_dist_t)
-
-  if init_static is None or target_static is None:
-    return None
-  else:
-    return np.min(target_static / init_static)
-
-
-def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds,
-               seed):
-  """Filters a dataset based on per-class acceptance probabilities.
-
-  Args:
-    dataset: The dataset to be filtered.
-    acceptance_dist_ds: A dataset of acceptance probabilities.
-    initial_dist_ds: A dataset of the initial probability distribution, given or
-        estimated.
-    class_values_ds: A dataset of the corresponding classes.
-    seed: (Optional.) Python integer seed for the resampler.
-
-  Returns:
-    A dataset of (class value, data) after filtering.
-  """
-  def maybe_warn_on_large_rejection(accept_dist, initial_dist):
-    proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist)
-    return control_flow_ops.cond(
-        math_ops.less(proportion_rejected, .5),
-        lambda: accept_dist,
-        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
-            accept_dist, [proportion_rejected, initial_dist, accept_dist],
-            message="Proportion of examples rejected by sampler is high: ",
-            summarize=100,
-            first_n=10))
-
-  acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
-                                                 initial_dist_ds))
-                        .map(maybe_warn_on_large_rejection))
-
-  def _gather_and_copy(class_val, acceptance_prob, data):
-    return class_val, array_ops.gather(acceptance_prob, class_val), data
-
-  current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
-      (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
-  filtered_ds = (
-      current_probabilities_and_class_and_data_ds
-      .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
-  return filtered_ds.map(lambda class_value, _, data: (class_value, data))
-
-
-def _estimate_initial_dist_ds(
-    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
-    smoothing_constant=10):
-  num_classes = (target_dist_t.shape[0].value or
-                 array_ops.shape(target_dist_t)[0])
-  initial_examples_per_class_seen = array_ops.fill(
-      [num_classes], np.int64(smoothing_constant))
-
-  def update_estimate_and_tile(num_examples_per_class_seen, c):
-    updated_examples_per_class_seen, dist = _estimate_data_distribution(
-        c, num_examples_per_class_seen)
-    tiled_dist = array_ops.tile(
-        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
-    return updated_examples_per_class_seen, tiled_dist
-
-  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
-                     .apply(scan_ops.scan(initial_examples_per_class_seen,
-                                          update_estimate_and_tile))
-                     .apply(batching.unbatch()))
-
-  return initial_dist_ds
-
-
-def _get_target_to_initial_ratio(initial_probs, target_probs):
-  # Add tiny to initial_probs to avoid divide by zero.
-  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
-  return target_probs / denom
-
-
-def _estimate_data_distribution(c, num_examples_per_class_seen):
-  """Estimate data distribution as labels are seen.
-
-  Args:
-    c: The class labels.  Type `int32`, shape `[batch_size]`.
-    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
-      containing counts.
-
-  Returns:
-    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
-      `[num_classes]`.
-    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
-  """
-  num_classes = num_examples_per_class_seen.get_shape()[0].value
-  # Update the class-count based on what labels are seen in batch.
-  num_examples_per_class_seen = math_ops.add(
-      num_examples_per_class_seen, math_ops.reduce_sum(
-          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
-  init_prob_estimate = math_ops.truediv(
-      num_examples_per_class_seen,
-      math_ops.reduce_sum(num_examples_per_class_seen))
-  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
-  return num_examples_per_class_seen, dist
-
-
-def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
-  """Calculates the acceptance probabilities and mixing ratio.
-
-  In this case, we assume that we can *either* sample from the original data
-  distribution with probability `m`, or sample from a reshaped distribution
-  that comes from rejection sampling on the original distribution. This
-  rejection sampling is done on a per-class basis, with `a_i` representing the
-  probability of accepting data from class `i`.
-
-  This method is based on solving the following analysis for the reshaped
-  distribution:
-
-  Let F be the probability of a rejection (on any example).
-  Let p_i be the proportion of examples in the data in class i (init_probs)
-  Let a_i is the rate the rejection sampler should *accept* class i
-  Let t_i is the target proportion in the minibatches for class i (target_probs)
-
-  ```
-  F = sum_i(p_i * (1-a_i))
-    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
-  ```
-
-  An example with class `i` will be accepted if `k` rejections occur, then an
-  example with class `i` is seen by the rejector, and it is accepted. This can
-  be written as follows:
-
-  ```
-  t_i = sum_k=0^inf(F^k * p_i * a_i)
-      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
-      = p_i * a_i / sum_j(p_j * a_j)        using F from above
-  ```
-
-  Note that the following constraints hold:
-  ```
-  0 <= p_i <= 1, sum_i(p_i) = 1
-  0 <= a_i <= 1
-  0 <= t_i <= 1, sum_i(t_i) = 1
-  ```
-
-  A solution for a_i in terms of the other variables is the following:
-    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
-
-  If we try to minimize the amount of data rejected, we get the following:
-
-  M_max = max_i [ t_i / p_i ]
-  M_min = min_i [ t_i / p_i ]
-
-  The desired probability of accepting data if it comes from class `i`:
-
-  a_i = (t_i/p_i - m) / (M_max - m)
-
-  The desired probability of pulling a data element from the original dataset,
-  rather than the filtered one:
-
-  m = M_min
-
-  Args:
-    initial_probs: A Tensor of the initial probability distribution, given or
-      estimated.
-    target_probs: A Tensor of the corresponding classes.
-
-  Returns:
-    (A 1D Tensor with the per-class acceptance probabilities, the desired
-    probability of pull from the original distribution.)
-  """
-  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
-  max_ratio = math_ops.reduce_max(ratio_l)
-  min_ratio = math_ops.reduce_min(ratio_l)
-
-  # Target prob to sample from original distribution.
-  m = min_ratio
-
-  # TODO(joelshor): Simplify fraction, if possible.
-  a_i = (ratio_l - m) / (max_ratio - m)
-  return a_i, m
+  return resampling.rejection_resample(class_func, target_dist, initial_dist,
+                                       seed)
diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py
index c52582cd35..0ca9fddb23 100644
--- a/tensorflow/contrib/data/python/ops/scan_ops.py
+++ b/tensorflow/contrib/data/python/ops/scan_ops.py
@@ -17,137 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import gen_dataset_ops
-
-
-class _ScanDataset(dataset_ops.UnaryDataset):
-  """A dataset that scans a function across its input."""
-
-  def __init__(self, input_dataset, initial_state, scan_func):
-    """See `scan()` for details."""
-    super(_ScanDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-    with ops.name_scope("initial_state"):
-      # Convert any `SparseTensorValue`s to `SparseTensor`s and all other
-      # values to tensors.
-      self._initial_state = nest.pack_sequence_as(initial_state, [
-          sparse_tensor.SparseTensor.from_value(t)
-          if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(
-              t, name="component_%d" % i)
-          for i, t in enumerate(nest.flatten(initial_state))
-      ])
-
-    # Compute initial values for the state classes, shapes and types based on
-    # the initial state. The shapes may be refined by running `tf_scan_func` one
-    # or more times below.
-    self._state_classes = sparse.get_classes(self._initial_state)
-    self._state_shapes = nest.pack_sequence_as(
-        self._initial_state,
-        [t.get_shape() for t in nest.flatten(self._initial_state)])
-    self._state_types = nest.pack_sequence_as(
-        self._initial_state,
-        [t.dtype for t in nest.flatten(self._initial_state)])
-
-    # Will be populated by calling `tf_scan_func`.
-    self._output_classes = None
-    self._output_shapes = None
-    self._output_types = None
-
-    # Iteratively rerun the scan function until reaching a fixed point on
-    # `self._state_shapes`.
-    need_to_rerun = True
-    while need_to_rerun:
-
-      wrapped_func = dataset_ops.StructuredFunctionWrapper(
-          scan_func, "tf.contrib.data.scan()",
-          input_classes=(self._state_classes, input_dataset.output_classes),
-          input_shapes=(self._state_shapes, input_dataset.output_shapes),
-          input_types=(self._state_types, input_dataset.output_types),
-          add_to_graph=False)
-      if not (
-          isinstance(wrapped_func.output_types, collections.Sequence) and
-          len(wrapped_func.output_types) == 2):
-        raise TypeError("The scan function must return a pair comprising the "
-                        "new state and the output value.")
-
-      new_state_classes, self._output_classes = wrapped_func.output_classes
-
-      # Extract and validate class information from the returned values.
-      for new_state_class, state_class in zip(
-          nest.flatten(new_state_classes),
-          nest.flatten(self._state_classes)):
-        if not issubclass(new_state_class, state_class):
-          raise TypeError(
-              "The element classes for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_classes, new_state_classes))
-
-      # Extract and validate type information from the returned values.
-      new_state_types, self._output_types = wrapped_func.output_types
-      for new_state_type, state_type in zip(
-          nest.flatten(new_state_types), nest.flatten(self._state_types)):
-        if new_state_type != state_type:
-          raise TypeError(
-              "The element types for the new state must match the initial "
-              "state. Expected %s; got %s." %
-              (self._state_types, new_state_types))
-
-      # Extract shape information from the returned values.
-      new_state_shapes, self._output_shapes = wrapped_func.output_shapes
-
-      flat_state_shapes = nest.flatten(self._state_shapes)
-      flat_new_state_shapes = nest.flatten(new_state_shapes)
-      weakened_state_shapes = [
-          original.most_specific_compatible_shape(new)
-          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
-      ]
-
-      need_to_rerun = False
-      for original_shape, weakened_shape in zip(flat_state_shapes,
-                                                weakened_state_shapes):
-        if original_shape.ndims is not None and (
-            weakened_shape.ndims is None or
-            original_shape.as_list() != weakened_shape.as_list()):
-          need_to_rerun = True
-          break
-
-      if need_to_rerun:
-        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
-                                                   weakened_state_shapes)
-
-    self._scan_func = wrapped_func.function
-    self._scan_func.add_to_graph(ops.get_default_graph())
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return gen_dataset_ops.scan_dataset(
-        input_t,
-        nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)),
-        self._scan_func.captured_inputs,
-        f=self._scan_func,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._output_classes
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
+from tensorflow.python.data.experimental.ops import scan_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.scan(...)`.")
 def scan(initial_state, scan_func):
   """A transformation that scans a function across an input dataset.
 
@@ -168,7 +42,4 @@ def scan(initial_state, scan_func):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-  def _apply_fn(dataset):
-    return _ScanDataset(dataset, initial_state, scan_func)
-
-  return _apply_fn
+  return scan_ops.scan(initial_state, scan_func)
diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py
index 985d1d87d0..329b34fdfe 100644
--- a/tensorflow/contrib/data/python/ops/shuffle_ops.py
+++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py
@@ -17,54 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import random_seed
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
-
-
-class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that fuses `shuffle` and `repeat`."""
-
-  def __init__(self, input_dataset, buffer_size, count=None, seed=None):
-    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._buffer_size = ops.convert_to_tensor(
-        buffer_size, dtype=dtypes.int64, name="buffer_size")
-    if count is None:
-      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
-    else:
-      self._count = ops.convert_to_tensor(
-          count, dtype=dtypes.int64, name="count")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_resource = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.shuffle_and_repeat_dataset(
-        input_resource,
-        buffer_size=self._buffer_size,
-        count=self._count,
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+from tensorflow.python.data.experimental.ops import shuffle_ops
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None,
+                        "Use `tf.data.experimental.shuffle_and_repeat(...)`.")
 def shuffle_and_repeat(buffer_size, count=None, seed=None):
   """Shuffles and repeats a Dataset returning a new permutation for each epoch.
 
@@ -93,8 +51,4 @@ def shuffle_and_repeat(buffer_size, count=None, seed=None):
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):  # pylint: disable=missing-docstring
-    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
-
-  return _apply_fn
+  return shuffle_ops.shuffle_and_repeat(buffer_size, count, seed)
diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py
index f73c3fd9cb..20cceb4647 100644
--- a/tensorflow/contrib/data/python/ops/threadpool.py
+++ b/tensorflow/contrib/data/python/ops/threadpool.py
@@ -17,88 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import context
-from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
-from tensorflow.python.ops import resource_variable_ops
-
-_uid_counter = 0
-_uid_lock = threading.Lock()
-
-
-def _generate_shared_name(prefix):
-  with _uid_lock:
-    global _uid_counter
-    uid = _uid_counter
-    _uid_counter += 1
-  return "{}{}".format(prefix, uid)
-
-
-# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
-class PrivateThreadPool(object):
-  """A stateful resource that represents a private thread pool."""
-
-  def __init__(self, num_threads, display_name=None,
-               max_intra_op_parallelism=1):
-    """Creates a `PrivateThreadPool` with the given number of threads."""
-    if context.executing_eagerly():
-      shared_name = _generate_shared_name("privatethreadpool")
-      self._resource = ged_ops.experimental_thread_pool_handle(
-          num_threads=num_threads,
-          max_intra_op_parallelism=max_intra_op_parallelism,
-          display_name=display_name,
-          shared_name=shared_name)
-      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
-          handle=self._resource, handle_device=context.context().device_name)
-    else:
-      self._resource = ged_ops.experimental_thread_pool_handle(
-          num_threads=num_threads,
-          max_intra_op_parallelism=max_intra_op_parallelism,
-          display_name=display_name)
-
-
-class _ThreadPoolDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and sets a custom threadpool."""
-
-  def __init__(self, input_dataset, thread_pool):
-    super(_ThreadPoolDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._thread_pool = thread_pool
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_thread_pool_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._thread_pool._resource,  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-
-# TODO(b/73383364): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
-def override_threadpool(dataset, thread_pool):
-  """Returns a new dataset that uses the given thread pool for its operations.
-
-  Args:
-    dataset: A `tf.data.Dataset` object.
-    thread_pool: A `PrivateThreadPool` object.
-
-  Returns:
-    A dataset containing the same values as `dataset`, but which uses
-    `thread_pool` to compute any of its parallel operations (such as
-    `tf.data.Dataset.map`).
-  """
-  return _ThreadPoolDataset(dataset, thread_pool)
+# pylint: disable=unused-import
+from tensorflow.python.data.experimental.ops.threadpool import override_threadpool
+from tensorflow.python.data.experimental.ops.threadpool import PrivateThreadPool
diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py
index ed363a7090..909d06c677 100644
--- a/tensorflow/contrib/data/python/ops/unique.py
+++ b/tensorflow/contrib/data/python/ops/unique.py
@@ -17,11 +17,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.data.experimental.ops import unique as experimental_unique
+from tensorflow.python.util import deprecation
 
 
+@deprecation.deprecated(None, "Use `tf.data.experimental.unique()`.")
 def unique():
   """Creates a `Dataset` from another `Dataset`, discarding duplicates.
 
@@ -39,39 +39,4 @@ def unique():
     A `Dataset` transformation function, which can be passed to
     `tf.data.Dataset.apply`.
   """
-
-  def _apply_fn(dataset):
-    return _UniqueDataset(dataset)
-
-  return _apply_fn
-
-
-class _UniqueDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` contains the unique elements from its input."""
-
-  def __init__(self, input_dataset):
-    """See `unique()` for details."""
-    super(_UniqueDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
-                                          dtypes.string):
-      raise TypeError(
-          "`tf.contrib.data.unique()` only supports inputs with a single "
-          "`tf.int32`, `tf.int64`, or `tf.string` component.")
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_unique_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
+  return experimental_unique.unique()
diff --git a/tensorflow/contrib/data/python/ops/writers.py b/tensorflow/contrib/data/python/ops/writers.py
index c455fdcba6..42fb69bf07 100644
--- a/tensorflow/contrib/data/python/ops/writers.py
+++ b/tensorflow/contrib/data/python/ops/writers.py
@@ -17,42 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import convert
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.data.experimental.ops import writers
+from tensorflow.python.util import deprecation
 
 
-class TFRecordWriter(object):
+class TFRecordWriter(writers.TFRecordWriter):
   """Writes data to a TFRecord file."""
 
+  @deprecation.deprecated(
+      None, "Use `tf.data.experimental.TFRecordWriter(...)`.")
   def __init__(self, filename, compression_type=None):
-    self._filename = ops.convert_to_tensor(
-        filename, dtypes.string, name="filename")
-    self._compression_type = convert.optional_param_to_tensor(
-        "compression_type",
-        compression_type,
-        argument_default="",
-        argument_dtype=dtypes.string)
-
-  def write(self, dataset):
-    """Returns a `tf.Operation` to write a dataset to a file.
-
-    Args:
-      dataset: a `tf.data.Dataset` whose elements are to be written to a file
-
-    Returns:
-      A `tf.Operation` that, when run, writes contents of `dataset` to a file.
-    """
-    if not isinstance(dataset, dataset_ops.Dataset):
-      raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
-    if (dataset.output_types != dtypes.string or
-        dataset.output_shapes != tensor_shape.scalar()):
-      raise TypeError(
-          "`dataset` must produce scalar `DT_STRING` tensors whereas it "
-          "produces shape {0} and types {1}".format(dataset.output_shapes,
-                                                    dataset.output_types))
-    return gen_dataset_ops.dataset_to_tf_record(
-        dataset._as_variant_tensor(), self._filename, self._compression_type)  # pylint: disable=protected-access
+    super(TFRecordWriter, self).__init__(filename, compression_type)
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
index 8d949943b7..d48aa9c89b 100644
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import warnings
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest as data_nest
diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py
index 135095a979..3aed121233 100644
--- a/tensorflow/contrib/eager/python/datasets.py
+++ b/tensorflow/contrib/eager/python/datasets.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
@@ -54,7 +54,7 @@ class Iterator(iterator_ops.EagerIterator):
     """
     if isinstance(dataset, prefetching_ops._PrefetchToDeviceDataset):  # pylint: disable=protected-access
       raise TypeError(
-          "`tf.contrib.data.prefetch_to_device()` is not compatible with "
+          "`tf.data.experimental.prefetch_to_device()` is not compatible with "
           "`tf.contrib.eager.Iterator`. Use `for ... in dataset:` to iterate "
           "over the dataset instead.")
 
diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py
index a753d77580..6a508fc6ba 100644
--- a/tensorflow/contrib/eager/python/datasets_test.py
+++ b/tensorflow/contrib/eager/python/datasets_test.py
@@ -24,11 +24,11 @@ import time
 import numpy as np
 
 from tensorflow.contrib import lookup
-from tensorflow.contrib.data.python.ops import prefetching_ops
-from tensorflow.contrib.data.python.ops import threadpool
-from tensorflow.contrib.data.python.ops import unique
 from tensorflow.contrib.eager.python import datasets
 from tensorflow.python.data import Dataset
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.experimental.ops import threadpool
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
index 34a9984b0e..d85188de03 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/imagenet_input.py
@@ -169,11 +169,11 @@ class ImageNetInput(object):
 
     # Read the data from disk in parallel
     dataset = dataset.apply(
-        tf.contrib.data.parallel_interleave(
+        tf.data.experimental.parallel_interleave(
             fetch_dataset, cycle_length=self.num_parallel_calls, sloppy=True))
     if self.cache:
       dataset = dataset.cache().apply(
-          tf.contrib.data.shuffle_and_repeat(1024 * 16))
+          tf.data.experimental.shuffle_and_repeat(1024 * 16))
     else:
       dataset = dataset.shuffle(1024)
 
@@ -188,9 +188,11 @@ class ImageNetInput(object):
     # batch size. As long as this validation is done with consistent batch size,
     # exactly the same images will be used.
     dataset = dataset.apply(
-        tf.contrib.data.map_and_batch(
-            self.dataset_parser, batch_size=batch_size,
-            num_parallel_batches=self.num_cores, drop_remainder=True))
+        tf.data.experimental.map_and_batch(
+            self.dataset_parser,
+            batch_size=batch_size,
+            num_parallel_batches=self.num_cores,
+            drop_remainder=True))
 
     # Transpose for performance on TPU
     if self.transpose_input:
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
index 1aebed348d..89506ee661 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
@@ -25,12 +25,12 @@ import tempfile
 import numpy as np
 import six
 
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.contrib.estimator.python.estimator import head as head_lib
 from tensorflow.contrib.estimator.python.estimator import rnn
 from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import parsing_utils
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 89b538d1ba..9e9345e875 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -23,8 +23,8 @@ import numpy as np
 import six
 
 from tensorflow.contrib import lookup
-from tensorflow.contrib.data.python.ops import counter
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD
index dcbef2881d..a217397c1a 100644
--- a/tensorflow/contrib/stateless/BUILD
+++ b/tensorflow/contrib/stateless/BUILD
@@ -9,19 +9,13 @@ exports_files(["LICENSE"])
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 
-tf_gen_op_wrapper_py(
-    name = "stateless_random_ops",
-    out = "gen_stateless_random_ops.py",  # cmake chokes without this
-    deps = ["//tensorflow/core:stateless_random_ops_op_lib"],
-)
-
 py_library(
     name = "stateless",
     srcs = ["__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":stateless_random_ops",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:stateless_random_ops_gen",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py
index 0cca40f071..fe23fe0dd8 100644
--- a/tensorflow/contrib/stateless/__init__.py
+++ b/tensorflow/contrib/stateless/__init__.py
@@ -32,10 +32,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.framework import ops
+
 # pylint: disable=wildcard-import
-from tensorflow.contrib.stateless.gen_stateless_random_ops import *
+from tensorflow.python.ops.gen_stateless_random_ops import *
 
-from tensorflow.python.framework import ops
 from tensorflow.python.util.all_util import remove_undocumented
 
 ops.NotDifferentiable("StatelessMultinomial")
diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py
index d879170b68..c694e9c1bc 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers
diff --git a/tensorflow/contrib/tpu/tpu_estimator.md b/tensorflow/contrib/tpu/tpu_estimator.md
index 639e708169..b6514e19dc 100644
--- a/tensorflow/contrib/tpu/tpu_estimator.md
+++ b/tensorflow/contrib/tpu/tpu_estimator.md
@@ -87,7 +87,7 @@ handle training:
           label = tf.cast(features["label"], tf.int32)
           return image, label
 
-        dataset = tf.contrib.data.TFRecordDataset(
+        dataset = tf.data.TFRecordDataset(
             filename, buffer_size=FLAGS.dataset_reader_buffer_size)
         dataset = dataset.map(parser).cache().repeat().batch(batch_size)
         images, labels = dataset.make_one_shot_iterator().get_next()
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index b565ebd073..00295f57f6 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -295,7 +295,6 @@ py_test(
     tags = ["notsan"],
     deps = [
         ":training_py",
-        "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:gradients",
@@ -305,6 +304,7 @@ py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
         "//tensorflow/python/data",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:dataset_serialization_test_base",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py b/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
index d9b0511a98..c1657fec7b 100644
--- a/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
+++ b/tensorflow/contrib/training/python/training/tensor_queue_dataset_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.contrib.training.python.training import tensor_queue_dataset as tqd
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
new file mode 100644
index 0000000000..d3c70190dd
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessMultinomial.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessMultinomial"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
new file mode 100644
index 0000000000..e294325fb8
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessRandomNormal.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessRandomNormal"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
new file mode 100644
index 0000000000..95d414c54a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessRandomUniform.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessRandomUniform"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
new file mode 100644
index 0000000000..c72bdda94a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_StatelessTruncatedNormal.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "StatelessTruncatedNormal"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/examples/get_started/regression/test.py b/tensorflow/examples/get_started/regression/test.py
index 0b1477ad96..bb4db6700b 100644
--- a/tensorflow/examples/get_started/regression/test.py
+++ b/tensorflow/examples/get_started/regression/test.py
@@ -29,7 +29,7 @@ import tensorflow.examples.get_started.regression.imports85 as imports85
 sys.modules["imports85"] = imports85
 
 # pylint: disable=g-bad-import-order,g-import-not-at-top
-import tensorflow.contrib.data as data
+import tensorflow.data as data
 
 import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression
 import tensorflow.examples.get_started.regression.linear_regression as linear_regression
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9275ad767e..fe81254ef7 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1739,6 +1739,14 @@ tf_gen_op_wrapper_private_py(
     ],
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "stateless_random_ops_gen",
+    visibility = [
+        "//tensorflow/contrib/stateless:__pkg__",
+        "//tensorflow/python/data/experimental/ops:__pkg__",
+    ],
+)
+
 tf_gen_op_wrapper_private_py(
     name = "list_ops_gen",
 )
@@ -3302,9 +3310,11 @@ py_library(
             "training/checkpointable/**/*.py",
             # The following targets have their own build rules (same name as the
             # file):
+            "training/basic_session_run_hooks.py",
             "training/checkpoint_management.py",
             "training/saveable_object.py",
             "training/saver.py",
+            "training/session_run_hook.py",
             "training/training_util.py",
         ],
     ),
@@ -3312,6 +3322,7 @@ py_library(
     deps = [
         ":array_ops",
         ":array_ops_gen",
+        ":basic_session_run_hooks",
         ":checkpoint_management",
         ":checkpoint_ops_gen",
         ":client",
@@ -3336,6 +3347,7 @@ py_library(
         ":saver",
         ":sdca_ops",
         ":session",
+        ":session_run_hook",
         ":sparse_ops",
         ":sparse_tensor",
         ":state_ops",
@@ -3379,6 +3391,28 @@ py_library(
     ],
 )
 
+py_library(
+    name = "session_run_hook",
+    srcs = ["training/session_run_hook.py"],
+    srcs_version = "PY2AND3",
+    deps = [":util"],
+)
+
+py_library(
+    name = "basic_session_run_hooks",
+    srcs = ["training/basic_session_run_hooks.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":client",
+        ":framework",
+        ":platform",
+        ":protos_all_py",
+        ":session_run_hook",
+        ":training_util",
+        ":util",
+    ],
+)
+
 py_library(
     name = "saver",
     srcs = ["training/saver.py"],
diff --git a/tensorflow/python/data/BUILD b/tensorflow/python/data/BUILD
index 138141f4fc..e32eeecbb8 100644
--- a/tensorflow/python/data/BUILD
+++ b/tensorflow/python/data/BUILD
@@ -10,6 +10,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//tensorflow/python/data/ops:multi_device_iterator_ops",
diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py
index f8b561205e..7536ba668a 100644
--- a/tensorflow/python/data/__init__.py
+++ b/tensorflow/python/data/__init__.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import
+from tensorflow.python.data import experimental
 from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.data.ops.iterator_ops import Iterator
 from tensorflow.python.data.ops.readers import FixedLengthRecordDataset
diff --git a/tensorflow/python/data/experimental/BUILD b/tensorflow/python/data/experimental/BUILD
new file mode 100644
index 0000000000..84e761d376
--- /dev/null
+++ b/tensorflow/python/data/experimental/BUILD
@@ -0,0 +1,16 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "experimental",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
new file mode 100644
index 0000000000..2ac159d38a
--- /dev/null
+++ b/tensorflow/python/data/experimental/__init__.py
@@ -0,0 +1,109 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for building input pipelines.
+
+This module contains experimental `Dataset` sources and transformations that can
+be used in conjunction with the `tf.data.Dataset` API. Note that the
+`tf.data.experimental` API is not subject to the same backwards compatibility
+guarantees as `tf.data`, but we will provide deprecation advice in advance of
+removing existing functionality.
+
+See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
+
+@@Counter
+@@CheckpointInputPipelineHook
+@@CsvDataset
+@@Optional
+@@RandomDataset
+@@Reducer
+@@SqlDataset
+@@TFRecordWriter
+
+@@bucket_by_sequence_length
+@@choose_from_datasets
+@@copy_to_device
+@@dense_to_sparse_batch
+@@enumerate_dataset
+@@get_next_as_optional
+@@get_single_element
+@@group_by_reducer
+@@group_by_window
+@@ignore_errors
+@@latency_stats
+@@make_batched_features_dataset
+@@make_csv_dataset
+@@make_saveable_from_iterator
+@@map_and_batch
+@@parallel_interleave
+@@parse_example_dataset
+@@prefetch_to_device
+@@rejection_resample
+@@sample_from_datasets
+@@scan
+@@set_stats_aggregator
+@@shuffle_and_repeat
+@@StatsAggregator
+@@unbatch
+@@unique
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import
+
+from tensorflow.python.data.experimental.ops.batching import dense_to_sparse_batch
+from tensorflow.python.data.experimental.ops.batching import map_and_batch
+from tensorflow.python.data.experimental.ops.batching import unbatch
+from tensorflow.python.data.experimental.ops.counter import Counter
+from tensorflow.python.data.experimental.ops.enumerate_ops import enumerate_dataset
+from tensorflow.python.data.experimental.ops.error_ops import ignore_errors
+from tensorflow.python.data.experimental.ops.get_single_element import get_single_element
+from tensorflow.python.data.experimental.ops.grouping import bucket_by_sequence_length
+from tensorflow.python.data.experimental.ops.grouping import group_by_reducer
+from tensorflow.python.data.experimental.ops.grouping import group_by_window
+from tensorflow.python.data.experimental.ops.grouping import Reducer
+from tensorflow.python.data.experimental.ops.interleave_ops import choose_from_datasets
+from tensorflow.python.data.experimental.ops.interleave_ops import parallel_interleave
+from tensorflow.python.data.experimental.ops.interleave_ops import sample_from_datasets
+from tensorflow.python.data.experimental.ops.iterator_ops import CheckpointInputPipelineHook
+from tensorflow.python.data.experimental.ops.iterator_ops import make_saveable_from_iterator
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.python.data.experimental.ops.optimization import AUTOTUNE
+
+from tensorflow.python.data.experimental.ops.parsing_ops import parse_example_dataset
+from tensorflow.python.data.experimental.ops.prefetching_ops import copy_to_device
+from tensorflow.python.data.experimental.ops.prefetching_ops import prefetch_to_device
+from tensorflow.python.data.experimental.ops.random_ops import RandomDataset
+from tensorflow.python.data.experimental.ops.readers import CsvDataset
+from tensorflow.python.data.experimental.ops.readers import make_batched_features_dataset
+from tensorflow.python.data.experimental.ops.readers import make_csv_dataset
+from tensorflow.python.data.experimental.ops.readers import SqlDataset
+from tensorflow.python.data.experimental.ops.resampling import rejection_resample
+from tensorflow.python.data.experimental.ops.scan_ops import scan
+from tensorflow.python.data.experimental.ops.shuffle_ops import shuffle_and_repeat
+from tensorflow.python.data.experimental.ops.stats_ops import latency_stats
+from tensorflow.python.data.experimental.ops.stats_ops import set_stats_aggregator
+from tensorflow.python.data.experimental.ops.stats_ops import StatsAggregator
+from tensorflow.python.data.experimental.ops.unique import unique
+from tensorflow.python.data.experimental.ops.writers import TFRecordWriter
+from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
+from tensorflow.python.data.ops.optional_ops import Optional
+# pylint: enable=unused-import
+
+from tensorflow.python.util.all_util import remove_undocumented
+remove_undocumented(__name__)
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
new file mode 100644
index 0000000000..a46c30ed2e
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -0,0 +1,569 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_test(
+    name = "batch_dataset_op_test",
+    size = "medium",
+    srcs = ["batch_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",  # (b/79552534)
+        "no_pip",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "bucketing_test",
+    size = "medium",
+    srcs = ["bucketing_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "csv_dataset_op_test",
+    size = "medium",
+    srcs = ["csv_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/eager:context",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "dataset_constructor_op_test",
+    size = "medium",
+    srcs = ["dataset_constructor_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "manual",
+        "nomac",  # b/62040583
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+py_test(
+    name = "directed_interleave_dataset_test",
+    size = "medium",
+    srcs = ["directed_interleave_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "get_single_element_test",
+    size = "small",
+    srcs = ["get_single_element_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:get_single_element",
+        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "indexed_dataset_ops_test",
+    srcs = ["indexed_dataset_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "interleave_dataset_op_test",
+    size = "medium",
+    srcs = ["interleave_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "notap",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "iterator_ops_test",
+    size = "small",
+    srcs = ["iterator_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
+py_test(
+    name = "map_dataset_op_test",
+    size = "medium",
+    srcs = ["map_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "noasan",  # times out
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "filter_dataset_op_test",
+    size = "medium",
+    srcs = ["filter_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "map_defun_op_test",
+    size = "small",
+    srcs = ["map_defun_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:data_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:map_defun",
+        "//tensorflow/python/data/kernel_tests:test_base",
+    ],
+)
+
+py_test(
+    name = "parsing_ops_test",
+    size = "small",
+    srcs = ["parsing_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:parsing_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "prefetching_ops_test",
+    size = "small",
+    srcs = ["prefetching_ops_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/compat:compat",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
+py_test(
+    name = "range_dataset_op_test",
+    size = "small",
+    srcs = ["range_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:counter",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "reader_dataset_ops_test_base",
+    testonly = 1,
+    srcs = [
+        "reader_dataset_ops_test_base.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = [
+        "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
+py_test(
+    name = "reader_dataset_ops_test",
+    size = "medium",
+    srcs = ["reader_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "resample_test",
+    size = "medium",
+    srcs = ["resample_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = [
+        "noasan",
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:resampling",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "scan_dataset_op_test",
+    size = "small",
+    srcs = ["scan_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/eager:context",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_test(
+    name = "shuffle_dataset_op_test",
+    size = "medium",
+    srcs = ["shuffle_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "optonly",
+    ],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "sql_dataset_op_test_base",
+    srcs = ["sql_dataset_op_test_base.py"],
+    srcs_version = "PY2AND3",
+    visibility = [
+        "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
+        "//tensorflow/python/data/experimental/kernel_tests/serialization:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "@org_sqlite//:python",
+    ],
+)
+
+py_test(
+    name = "sql_dataset_op_test",
+    size = "small",
+    srcs = ["sql_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":sql_dataset_op_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+    ],
+)
+
+py_test(
+    name = "stats_dataset_ops_test",
+    size = "medium",
+    srcs = ["stats_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        ":stats_dataset_test_base",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "stats_dataset_test_base",
+    srcs = ["stats_dataset_test_base.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
+    ],
+)
+
+py_test(
+    name = "threadpool_dataset_ops_test",
+    size = "small",
+    srcs = ["threadpool_dataset_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python/data/experimental/ops:threadpool",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "unique_dataset_op_test",
+    size = "small",
+    srcs = ["unique_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_test(
+    name = "writer_ops_test",
+    size = "small",
+    srcs = ["writer_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:writers",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
similarity index 67%
rename from tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
index fed7de5f2b..8703b2810e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
@@ -23,8 +23,8 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -32,7 +32,6 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
@@ -43,7 +42,6 @@ from tensorflow.python.util import compat
 
 class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-
   def testDenseToSparseBatchDataset(self):
     components = np.random.randint(12, size=(100,)).astype(np.int32)
     iterator = (
@@ -302,128 +300,6 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(next_element)
 
-  def testBatchAndDropRemainder(self):
-    components = (np.arange(7),
-                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
-                  np.array(37.0) * np.arange(7))
-
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(batch_size))
-        .make_initializable_iterator())
-
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for test_batch_size in [1, 3, 7, 10]:
-        sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
-        num_batches = 7 // test_batch_size
-        for i in range(num_batches):
-          result = sess.run(next_element)
-          for component, result_component in zip(components, result):
-            for j in range(test_batch_size):
-              self.assertAllEqual(component[(i * test_batch_size + j)],
-                                  result_component[j])
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
-
-  def testBatchAndDropRemainderSparse(self):
-
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0]], values=(i * [1]), dense_shape=[1])
-
-    iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
-        batching.batch_and_drop_remainder(5)).make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(2):
-        actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensorValue(
-            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
-            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
-            dense_shape=[5, 1])
-        self.assertTrue(sparse_tensor.is_sparse(actual))
-        self.assertSparseValuesEqual(actual, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testPaddedBatchAndDropRemainder(self):
-    els = []
-    for length in [3, 6, 9, 4, 12, 10, 2]:
-      els.append((np.array(length), np.arange(length) + 1,
-                  np.array(length * 2)))
-
-    dataset = dataset_ops.Dataset.from_tensors(els[0])
-    for el in els[1:]:
-      dataset = dataset.concatenate(dataset_ops.Dataset.from_tensors(el))
-
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(
-            batching.padded_batch_and_drop_remainder(
-                batch_size, ([], [None], []))).make_initializable_iterator())
-
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for test_batch_size in [1, 3, 7, 10]:
-        sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
-        num_batches = 7 // test_batch_size
-        for i in range(num_batches):
-          result = sess.run(next_element)
-          for component_idx, result_component in enumerate(result):
-            for j in range(test_batch_size):
-              data_idx = i * test_batch_size + j
-              comp = result_component[j]
-              unpadded = comp[comp > 0]
-              if np.isscalar(comp):
-                # The boolean mask indexing above adds a dim back. Rm it.
-                unpadded = unpadded[0]
-              self.assertAllEqual(els[data_idx][component_idx], unpadded)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
-
-  def testPaddedBatchAndDropRemainderSparseError(self):
-
-    def _map_fn(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
-
-    with self.assertRaises(TypeError):
-      _ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
-          batching.padded_batch_and_drop_remainder(5))
-
-  def testBatchAndDropRemainderShapeInference(self):
-    components = (array_ops.placeholder(dtypes.int32),
-                  (array_ops.placeholder(dtypes.int32, shape=[None]),
-                   array_ops.placeholder(dtypes.int32, shape=[20, 30])))
-
-    # Test with a statically known batch size.
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(128)))
-
-    self.assertIs(None, dataset.output_shapes[0].ndims)
-    self.assertEqual([128], dataset.output_shapes[1][0].as_list())
-    self.assertEqual([128, 30], dataset.output_shapes[1][1].as_list())
-
-    # Test with a dynamic batch size: the static shape will be unknown, because
-    # `batch_size` is a placeholder.
-    batch_size = array_ops.placeholder(dtypes.int64)
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            batching.batch_and_drop_remainder(batch_size)))
-
-    self.assertIs(None, dataset.output_shapes[0].ndims)
-    self.assertEqual([None], dataset.output_shapes[1][0].as_list())
-    self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
-
   @parameterized.named_parameters(
       ("Default", None, None),
       ("SequentialCalls", 1, None),
@@ -720,197 +596,6 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
 
-class RestructuredDatasetTest(test_base.DatasetTestBase):
-
-  def test_assert_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((3, 4)))
-    self.assertEqual(expected_shapes, dataset.output_shapes)
-
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((3, 10)))
-    with self.assertRaises(ValueError):
-      dataset.apply(batching.assert_element_shape(wrong_shapes))
-
-  def test_assert_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((3, 4)))
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((3, 10)))
-    iterator = (
-        dataset.apply(batching.assert_element_shape(wrong_shapes))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def test_assert_partial_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(5).map(create_dataset)
-    partial_expected_shape = (tensor_shape.TensorShape(None),       # Unknown shape
-                              tensor_shape.TensorShape((None, 4)))  # Partial shape
-    result = dataset.apply(
-        batching.assert_element_shape(partial_expected_shape))
-    # Partial shapes are merged with actual shapes:
-    actual_shapes = (tensor_shape.TensorShape(2),
-                     tensor_shape.TensorShape((3, 4)))
-    self.assertEqual(actual_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_partial_element_shape(self):
-
-    def create_dataset(_):
-      return (array_ops.ones(2, dtype=dtypes.float32),
-              array_ops.zeros((3, 4), dtype=dtypes.int32))
-
-    dataset = dataset_ops.Dataset.range(3).map(create_dataset)
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((None, 10)))
-    with self.assertRaises(ValueError):
-      dataset.apply(batching.assert_element_shape(wrong_shapes))
-
-  def test_assert_partial_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(5).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    expected_shapes = (tensor_shape.TensorShape(2),
-                       tensor_shape.TensorShape((None, 4)))
-    result = dataset.apply(batching.assert_element_shape(expected_shapes))
-    self.assertEqual(expected_shapes, result.output_shapes)
-
-    iterator = result.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(5):
-        sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def test_assert_wrong_partial_element_shape_on_unknown_shape_dataset(self):
-
-    def create_unknown_shape_dataset(x):
-      return script_ops.py_func(
-          lambda _: (  # pylint: disable=g-long-lambda
-              np.ones(2, dtype=np.float32),
-              np.zeros((3, 4), dtype=np.int32)),
-          [x],
-          [dtypes.float32, dtypes.int32])
-
-    dataset = dataset_ops.Dataset.range(3).map(create_unknown_shape_dataset)
-    unknown_shapes = (tensor_shape.TensorShape(None),
-                      tensor_shape.TensorShape(None))
-    self.assertEqual(unknown_shapes, dataset.output_shapes)
-
-    wrong_shapes = (tensor_shape.TensorShape(2),
-                    tensor_shape.TensorShape((None, 10)))
-    iterator = (
-        dataset.apply(batching.assert_element_shape(wrong_shapes))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-
 class UnbatchDatasetBenchmark(test.Benchmark):
 
   def benchmarkNativeUnbatch(self):
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
rename to tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
index ae401f786c..153a03989b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
@@ -21,7 +21,7 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
index 5b3c512b64..4ee1779710 100644
--- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
@@ -27,9 +27,9 @@ import zlib
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import error_ops
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.eager import context
diff --git a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
index 722e87e555..3fc7157bc5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
index 595cecef4d..7f435b8239 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_serialization_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
@@ -22,7 +22,7 @@ import os
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
rename to tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
index bc10c21472..796a692c56 100644
--- a/tensorflow/contrib/data/python/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
@@ -84,7 +84,7 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
     # Use chi-squared test to assert that the observed distribution matches the
     # expected distribution. Based on the implementation in
-    # "tensorflow/python/kernel_tests/multinomial_op_test.py".
+    # "third_party/tensorflow/python/kernel_tests/multinomial_op_test.py".
     for probs in [[.85, .05, .1], rand_probs, [1.]]:
       probs = np.asarray(probs)
       classes = len(probs)
diff --git a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
index 6d01bf585c..c6ee88c676 100644
--- a/tensorflow/contrib/data/python/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
@@ -21,8 +21,8 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
similarity index 76%
rename from tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
rename to tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
index cc22ea1df7..8c07afbac5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/get_single_element_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
@@ -18,10 +18,8 @@ from __future__ import division
 from __future__ import print_function
 
 from absl.testing import parameterized
-import numpy as np
 
-from tensorflow.contrib.data.python.ops import get_single_element
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
@@ -69,32 +67,6 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
         with self.assertRaisesRegexp(error, error_msg):
           sess.run(element, feed_dict={skip_t: skip, take_t: take})
 
-  @parameterized.named_parameters(
-      ("SumZero", 0),
-      ("SumOne", 1),
-      ("SumFive", 5),
-      ("SumTen", 10),
-  )
-  def testReduceDataset(self, stop):
-    def init_fn(_):
-      return np.int64(0)
-
-    def reduce_fn(state, value):
-      return state + value
-
-    def finalize_fn(state):
-      return state
-
-    sum_reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
-
-    stop_t = array_ops.placeholder(dtypes.int64, shape=[])
-    dataset = dataset_ops.Dataset.range(stop_t)
-    element = get_single_element.reduce_dataset(dataset, sum_reducer)
-
-    with self.cached_session() as sess:
-      value = sess.run(element, feed_dict={stop_t: stop})
-      self.assertEqual(stop * (stop - 1) / 2, value)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
index d4d3d4adb2..c93a8353ce 100644
--- a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import unittest
 
-from tensorflow.contrib.data.python.ops import indexed_dataset_ops
+from tensorflow.python.data.experimental.ops import indexed_dataset_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
index 28bd670ab5..560902caad 100644
--- a/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
@@ -24,7 +24,7 @@ import time
 
 from six.moves import zip_longest
 
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
index 58a1d7c93b..94393d6d4b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator import estimator
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
index 385c4ef6ea..2f0bd1456b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
@@ -24,11 +24,11 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import error_ops
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 751e6d5b30..612ee332c4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import time
 
-from tensorflow.contrib.data.python.ops import map_defun
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import map_defun
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
similarity index 81%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
rename to tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index d7b5edcd9a..68f73bddb5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -12,9 +12,9 @@ py_test(
     srcs = ["assert_next_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
@@ -26,12 +26,12 @@ py_test(
     srcs = ["hoist_random_uniform_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -44,11 +44,11 @@ py_test(
     srcs = ["latency_all_edges_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
-        "//tensorflow/contrib/data/python/ops:optimization",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/kernel_tests:stats_dataset_test_base",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -59,7 +59,6 @@ py_test(
     srcs = ["map_vectorization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -68,6 +67,7 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -81,12 +81,12 @@ py_test(
     srcs = ["map_and_filter_fusion_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -99,12 +99,12 @@ py_test(
     srcs = ["map_parallelization_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "@absl_py//absl/testing:parameterized",
@@ -120,11 +120,11 @@ py_test(
         "optonly",
     ],
     deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -137,11 +137,11 @@ py_test(
     srcs = ["noop_elimination_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:batching",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -154,9 +154,9 @@ py_test(
     srcs = ["optimize_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
index fe1b5280ba..45b77b5c20 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index b43efb5c7c..3cd9753665 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
similarity index 91%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
index e4f18222fd..45623876ae 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
@@ -17,9 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base
-from tensorflow.contrib.data.python.ops import optimization
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index e9e3fc81e5..a439635716 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index f7907eb890..334d8e3778 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index a5ea85f454..d47492753e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -22,8 +22,8 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index 33c250ab2a..a9f2ce8c03 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -21,8 +21,8 @@ import time
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import batching
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
index b9e60cfa4e..092e0ff62a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index 04f499f8c5..eb661796c0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
index 66ccaceea5..13f924b656 100644
--- a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
@@ -22,9 +22,9 @@ import copy
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import parsing_ops as contrib_parsing_ops
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import parsing_ops as contrib_parsing_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
@@ -846,6 +846,5 @@ class ParseExampleTest(test_base.DatasetTestBase):
                       "allow_missing to be True."))
 
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
index 7a6a7a709a..7d7b842c17 100644
--- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
@@ -19,9 +19,9 @@ from __future__ import print_function
 
 import threading
 
-from tensorflow.contrib.data.python.ops import prefetching_ops
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
+from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
index 2e901587f4..22412c3965 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import counter
-from tensorflow.contrib.data.python.ops import enumerate_ops
+from tensorflow.python.data.experimental.ops import counter
+from tensorflow.python.data.experimental.ops import enumerate_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
index 66ed547b6d..a02f4bd14f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
@@ -23,8 +23,8 @@ import zlib
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index f443b5501b..b6ab80d132 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -22,9 +22,9 @@ import gzip
 import os
 import zlib
 
-from tensorflow.contrib.data.python.ops import readers
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers as core_readers
diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/python/data/experimental/kernel_tests/resample_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/resample_test.py
rename to tensorflow/python/data/experimental/kernel_tests/resample_test.py
index 32474bd411..775648c943 100644
--- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/resample_test.py
@@ -23,7 +23,7 @@ from absl.testing import parameterized
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
-from tensorflow.contrib.data.python.ops import resampling
+from tensorflow.python.data.experimental.ops import resampling
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
index bdf80eae4e..78ec80de23 100644
--- a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
@@ -21,7 +21,7 @@ import itertools
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import scan_ops
+from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/BUILD
rename to tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index aa89674c6e..20c02a5366 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -13,7 +13,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -24,6 +23,7 @@ py_library(
         "//tensorflow/python:training",
         "//tensorflow/python:util",
         "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:iterator_ops",
         "//third_party/py/numpy",
     ],
@@ -37,10 +37,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -81,9 +81,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
 
@@ -126,8 +126,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -160,8 +160,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -174,8 +174,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:grouping",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -189,9 +189,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:error_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:error_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -222,9 +222,9 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -258,8 +258,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:optimization",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -288,10 +288,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -326,8 +326,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
     ],
 )
 
@@ -370,8 +370,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:interleave_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -384,8 +384,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:scan_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:scan_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -411,10 +411,10 @@ py_test(
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
     deps = [
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -427,8 +427,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:shuffle_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:shuffle_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -441,10 +441,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:iterator_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:training",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -457,11 +457,11 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:sql_dataset_op_test_base",
-        "//tensorflow/contrib/data/python/ops:readers",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_op_test_base",
+        "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
 
@@ -473,10 +473,10 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:stats_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/experimental/ops:stats_ops",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -490,8 +490,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -505,8 +505,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/kernel_tests:reader_dataset_ops_test_base",
         "//tensorflow/python/data/ops:readers",
     ],
 )
@@ -519,8 +519,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -534,8 +534,8 @@ py_test(
     tags = ["no_pip"],
     deps = [
         ":dataset_serialization_test_base",
-        "//tensorflow/contrib/data/python/ops:unique",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:unique",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
index af87d8b608..d72a6df14c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/batch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
index 1b6059ccbc..2bcf77f5d8 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/cache_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/cache_dataset_serialization_test.py
@@ -21,7 +21,7 @@ import os
 
 from absl.testing import parameterized
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
index 96f13d75a3..c075dff8cb 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/concatenate_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/concatenate_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
index 247f2046ea..d4983492e7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/csv_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/csv_dataset_serialization_test.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 import gzip
 import os
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.platform import test
 
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
index 2139b5c33d..41a095fb1a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/dataset_constructor_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_constructor_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
new file mode 100644
index 0000000000..7f435b8239
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
@@ -0,0 +1,692 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base class for testing serializable datasets."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.util import nest
+
+
+def remove_variants(get_next_op):
+  # TODO(b/72408568): Remove this once session.run can get
+  # variant tensors.
+  """Remove variants from a nest structure, so sess.run will execute."""
+
+  def _remove_variant(x):
+    if isinstance(x, ops.Tensor) and x.dtype == dtypes.variant:
+      return ()
+    else:
+      return x
+
+  return nest.map_structure(_remove_variant, get_next_op)
+
+
+class DatasetSerializationTestBase(test.TestCase):
+  """Base class for testing serializable datasets."""
+
+  def tearDown(self):
+    self._delete_ckpt()
+
+  # TODO(b/72657739): Remove sparse_tensor argument, which is to test the
+  # (deprecated) saveable `SparseTensorSliceDataset`, once the API
+  # `from_sparse_tensor_slices()`and related tests are deleted.
+  def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False):
+    """Runs the core tests.
+
+    Args:
+      ds_fn1: 0-argument function that returns a Dataset.
+      ds_fn2: 0-argument function that returns a Dataset different from
+        ds_fn1. If None, verify_restore_in_modified_graph test is not run.
+      num_outputs: Total number of outputs expected from this Dataset.
+      sparse_tensors: Whether dataset is built from SparseTensor(s).
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_unused_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_fully_used_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_exhausted_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_init_before_restore(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_multiple_breaks(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_reset_restored_iterator(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    self.verify_restore_in_empty_graph(
+        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+    if ds_fn2:
+      self.verify_restore_in_modified_graph(
+          ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
+
+  def verify_unused_iterator(self,
+                             ds_fn,
+                             num_outputs,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Verifies that saving and restoring an unused iterator works.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn, [0],
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_fully_used_iterator(self, ds_fn, num_outputs,
+                                 sparse_tensors=False):
+    """Verifies that saving and restoring a fully used iterator works.
+
+    Note that this only checks saving and restoring an iterator from which
+    `num_outputs` items have been produced but does not check for an
+    exhausted iterator, i.e., one from which an OutOfRange error has been
+    returned.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn, [num_outputs], num_outputs, sparse_tensors=sparse_tensors)
+
+  def verify_exhausted_iterator(self, ds_fn, num_outputs, sparse_tensors=False):
+    """Verifies that saving and restoring an exhausted iterator works.
+
+    An exhausted iterator is one which has returned an OutOfRange error.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        verify_exhausted=True,
+        sparse_tensors=sparse_tensors)
+    actual = self.gen_outputs(
+        ds_fn, [],
+        0,
+        ckpt_saved=True,
+        verify_exhausted=True,
+        sparse_tensors=sparse_tensors)
+    self.assertEqual(len(actual), 0)
+
+  def verify_init_before_restore(self,
+                                 ds_fn,
+                                 num_outputs,
+                                 sparse_tensors=False,
+                                 verify_exhausted=True):
+    """Verifies that restoring into an already initialized iterator works.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn,
+        self.gen_break_points(num_outputs),
+        num_outputs,
+        init_before_restore=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_multiple_breaks(self,
+                             ds_fn,
+                             num_outputs,
+                             num_breaks=10,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Attempts to save/restore at multiple break points.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      num_breaks: The number of break points. These are uniformly spread in
+        [0, num_outputs] both inclusive.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    self.verify_run_with_breaks(
+        ds_fn,
+        self.gen_break_points(num_outputs, num_breaks),
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+  def verify_reset_restored_iterator(self,
+                                     ds_fn,
+                                     num_outputs,
+                                     break_point=None,
+                                     sparse_tensors=False,
+                                     verify_exhausted=True):
+    """Attempts to re-initialize a restored iterator.
+
+    This is useful when restoring a training checkpoint during validation.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Collect ground truth containing all outputs.
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Skip some items and save checkpoint.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Restore from checkpoint and then run init_op.
+    with ops.Graph().as_default() as g:
+      saver = self._import_meta_graph()
+      init_op, get_next_op = self._get_iterator_ops_from_collection(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        self._initialize(init_op, sess)
+        for _ in range(num_outputs):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+    self.match(expected, actual)
+
+  def verify_restore_in_modified_graph(self,
+                                       ds_fn1,
+                                       ds_fn2,
+                                       num_outputs,
+                                       break_point=None,
+                                       sparse_tensors=False,
+                                       verify_exhausted=True):
+    """Attempts to restore an iterator in a modified graph.
+
+    Builds an input pipeline using ds_fn1, runs it for `break_point` steps
+    and saves a checkpoint. Then builds a new graph using ds_fn2, restores
+    the checkpoint from ds_fn1 and verifies that the restore is successful.
+
+    Args:
+      ds_fn1: See `run_core_tests`.
+      ds_fn2: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Skip `break_point` items and store the remaining produced from ds_fn1
+    # in `expected`.
+    self.gen_outputs(
+        ds_fn1, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+    expected = self.gen_outputs(
+        ds_fn1, [],
+        num_outputs - break_point,
+        ckpt_saved=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Generate `break_point` items from ds_fn1 and save checkpoint.
+    self.gen_outputs(
+        ds_fn1, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Build graph for ds_fn2 but load checkpoint for ds_fn1.
+    with ops.Graph().as_default() as g:
+      _, get_next_op, saver = self._build_graph(
+          ds_fn2, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        for _ in range(num_outputs - break_point):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    self.match(expected, actual)
+
+  def verify_restore_in_empty_graph(self,
+                                    ds_fn,
+                                    num_outputs,
+                                    break_point=None,
+                                    sparse_tensors=False,
+                                    verify_exhausted=True):
+    """Attempts to restore an iterator in an empty graph.
+
+    Builds an input pipeline using ds_fn, runs it for `break_point` steps
+    and saves a checkpoint. Then builds a new empty graph, restores
+    the checkpoint from ds_fn and verifies that the restore is successful.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    break_point = num_outputs // 2 if not break_point else break_point
+
+    # Skip `break_point` items and store the remaining produced from ds_fn
+    # in `expected`.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs - break_point,
+        ckpt_saved=True,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    # Generate `break_point` items from ds_fn and save checkpoint.
+    self.gen_outputs(
+        ds_fn, [],
+        break_point,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=False)
+
+    actual = []
+    # Build an empty graph but load checkpoint for ds_fn.
+    with ops.Graph().as_default() as g:
+      get_next_op, saver = self._build_empty_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._restore(saver, sess)
+        for _ in range(num_outputs - break_point):
+          actual.append(sess.run(get_next_op))
+        if verify_exhausted:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    self.match(expected, actual)
+
+  def verify_error_on_save(self,
+                           ds_fn,
+                           num_outputs,
+                           error,
+                           break_point=None,
+                           sparse_tensors=False):
+    """Attempts to save a non-saveable iterator.
+
+    Args:
+      ds_fn: See `run_core_tests`.
+      num_outputs: See `run_core_tests`.
+      error: Declared error when trying to save iterator.
+      break_point: Break point. Optional. Defaults to num_outputs/2.
+      sparse_tensors: See `run_core_tests`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+
+    break_point = num_outputs // 2 if not break_point else break_point
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, saver = self._build_graph(
+          ds_fn, sparse_tensors=sparse_tensors)
+      get_next_op = remove_variants(get_next_op)
+      with self.session(graph=g) as sess:
+        self._initialize(init_op, sess)
+        for _ in range(break_point):
+          sess.run(get_next_op)
+        with self.assertRaises(error):
+          self._save(sess, saver)
+
+  def verify_run_with_breaks(self,
+                             ds_fn,
+                             break_points,
+                             num_outputs,
+                             init_before_restore=False,
+                             sparse_tensors=False,
+                             verify_exhausted=True):
+    """Verifies that ds_fn() produces the same outputs with and without breaks.
+
+    1. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
+       *without* stopping at break points.
+    2. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
+       with stopping at break points.
+
+    Deep matches outputs from 1 and 2.
+
+    Args:
+      ds_fn: See `gen_outputs`.
+      break_points: See `gen_outputs`.
+      num_outputs: See `gen_outputs`.
+      init_before_restore: See `gen_outputs`.
+      sparse_tensors: See `run_core_tests`.
+      verify_exhausted: See `gen_outputs`.
+
+    Raises:
+      AssertionError if any test fails.
+    """
+    expected = self.gen_outputs(
+        ds_fn, [],
+        num_outputs,
+        init_before_restore=init_before_restore,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    actual = self.gen_outputs(
+        ds_fn,
+        break_points,
+        num_outputs,
+        init_before_restore=init_before_restore,
+        sparse_tensors=sparse_tensors,
+        verify_exhausted=verify_exhausted)
+
+    self.match(expected, actual)
+
+  def gen_outputs(self,
+                  ds_fn,
+                  break_points,
+                  num_outputs,
+                  ckpt_saved=False,
+                  init_before_restore=False,
+                  sparse_tensors=False,
+                  verify_exhausted=True,
+                  save_checkpoint_at_end=True):
+    """Generates elements from input dataset while stopping at break points.
+
+    Produces `num_outputs` outputs and saves the state of the iterator in the
+    Saver checkpoint.
+
+    Args:
+      ds_fn: 0-argument function that returns the dataset.
+      break_points: A list of integers. For each `break_point` in
+        `break_points`, we produce outputs till `break_point` number of items
+        have been produced and then checkpoint the state. The current graph
+        and session are destroyed and a new graph and session are used to
+        produce outputs till next checkpoint or till `num_outputs` elements
+        have been produced. `break_point` must be <= `num_outputs`.
+      num_outputs: The total number of outputs to produce from the iterator.
+      ckpt_saved: Whether a checkpoint already exists. If False, we build the
+        graph from ds_fn.
+      init_before_restore: Whether init should be called before saver.restore.
+        This is just so that we can verify that restoring an already initialized
+        iterator works.
+      sparse_tensors:  Whether dataset is built from SparseTensor(s).
+      verify_exhausted: Whether to verify that the iterator has been exhausted
+        after producing `num_outputs` elements.
+      save_checkpoint_at_end: Whether to save a checkpoint after producing all
+        outputs. If False, checkpoints are saved each break point but not at the
+        end. Note that checkpoints overwrite each other so there is always only
+        a single checkpoint available. Defaults to True.
+
+    Returns:
+      A list of `num_outputs` items.
+    """
+    outputs = []
+
+    def get_ops():
+      if ckpt_saved:
+        saver = self._import_meta_graph()
+        init_op, get_next_op = self._get_iterator_ops_from_collection(
+            ds_fn, sparse_tensors=sparse_tensors)
+      else:
+        init_op, get_next_op, saver = self._build_graph(
+            ds_fn, sparse_tensors=sparse_tensors)
+      return init_op, get_next_op, saver
+
+    for i in range(len(break_points) + 1):
+      with ops.Graph().as_default() as g:
+        init_op, get_next_op, saver = get_ops()
+        get_next_op = remove_variants(get_next_op)
+        with self.session(graph=g) as sess:
+          if ckpt_saved:
+            if init_before_restore:
+              self._initialize(init_op, sess)
+            self._restore(saver, sess)
+          else:
+            self._initialize(init_op, sess)
+          start = break_points[i - 1] if i > 0 else 0
+          end = break_points[i] if i < len(break_points) else num_outputs
+          num_iters = end - start
+          for _ in range(num_iters):
+            outputs.append(sess.run(get_next_op))
+          if i == len(break_points) and verify_exhausted:
+            with self.assertRaises(errors.OutOfRangeError):
+              sess.run(get_next_op)
+          if save_checkpoint_at_end or i < len(break_points):
+            self._save(sess, saver)
+            ckpt_saved = True
+
+    return outputs
+
+  def match(self, expected, actual):
+    """Matches nested structures.
+
+    Recursively matches shape and values of `expected` and `actual`.
+    Handles scalars, numpy arrays and other python sequence containers
+    e.g. list, dict.
+
+    Args:
+      expected: Nested structure 1.
+      actual: Nested structure 2.
+
+    Raises:
+      AssertionError if matching fails.
+    """
+    if isinstance(expected, np.ndarray):
+      expected = expected.tolist()
+    if isinstance(actual, np.ndarray):
+      actual = actual.tolist()
+    self.assertEqual(type(expected), type(actual))
+
+    if nest.is_sequence(expected):
+      self.assertEqual(len(expected), len(actual))
+      if isinstance(expected, dict):
+        for key1, key2 in zip(sorted(expected), sorted(actual)):
+          self.assertEqual(key1, key2)
+          self.match(expected[key1], actual[key2])
+      else:
+        for item1, item2 in zip(expected, actual):
+          self.match(item1, item2)
+    else:
+      self.assertEqual(expected, actual)
+
+  def does_not_match(self, expected, actual):
+    with self.assertRaises(AssertionError):
+      self.match(expected, actual)
+
+  def gen_break_points(self, num_outputs, num_samples=10):
+    """Generates `num_samples` breaks points in [0, num_outputs]."""
+    return np.linspace(0, num_outputs, num_samples, dtype=int)
+
+  def _build_graph(self, ds_fn, sparse_tensors=False):
+    iterator = ds_fn().make_initializable_iterator()
+
+    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+    init_op = iterator.initializer
+    if sparse_tensors:
+      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    else:
+      get_next = iterator.get_next()
+    self._add_iterator_ops_to_collection(init_op, get_next, ds_fn,
+                                         sparse_tensors)
+    saver = saver_lib.Saver(allow_empty=True)
+    return init_op, get_next, saver
+
+  def _build_empty_graph(self, ds_fn, sparse_tensors=False):
+    iterator = iterator_ops.Iterator.from_structure(
+        self._get_output_types(ds_fn),
+        output_shapes=self._get_output_shapes(ds_fn),
+        output_classes=self._get_output_classes(ds_fn))
+    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+    if sparse_tensors:
+      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
+    else:
+      get_next = iterator.get_next()
+    saver = saver_lib.Saver(allow_empty=True)
+    return get_next, saver
+
+  def _add_iterator_ops_to_collection(self,
+                                      init_op,
+                                      get_next,
+                                      ds_fn,
+                                      sparse_tensors=False):
+    ops.add_to_collection("iterator_ops", init_op)
+    # `get_next` may be a tuple e.g. in TensorSliceDataset. Since Collections
+    # do not support tuples we flatten the tensors and restore the shape in
+    # `_get_iterator_ops_from_collection`.
+    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
+      ops.add_to_collection("iterator_ops", get_next.indices)
+      ops.add_to_collection("iterator_ops", get_next.values)
+      ops.add_to_collection("iterator_ops", get_next.dense_shape)
+      return
+
+    get_next_list = nest.flatten(get_next)
+    for i, output_class in enumerate(
+        nest.flatten(self._get_output_classes(ds_fn))):
+      if output_class is sparse_tensor.SparseTensor:
+        ops.add_to_collection("iterator_ops", get_next_list[i].indices)
+        ops.add_to_collection("iterator_ops", get_next_list[i].values)
+        ops.add_to_collection("iterator_ops", get_next_list[i].dense_shape)
+      else:
+        ops.add_to_collection("iterator_ops", get_next_list[i])
+
+  def _get_iterator_ops_from_collection(self, ds_fn, sparse_tensors=False):
+    all_ops = ops.get_collection("iterator_ops")
+    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
+      init_op, indices, values, dense_shape = all_ops
+      return init_op, sparse_tensor.SparseTensor(indices, values, dense_shape)
+    get_next_list = []
+    i = 1
+    for output_class in nest.flatten(self._get_output_classes(ds_fn)):
+      if output_class is sparse_tensor.SparseTensor:
+        indices, values, dense_shape = all_ops[i:i + 3]
+        i += 3
+        get_next_list.append(
+            sparse_tensor.SparseTensor(indices, values, dense_shape))
+      else:
+        get_next_list.append(all_ops[i])
+        i += 1
+    return all_ops[0], nest.pack_sequence_as(
+        self._get_output_types(ds_fn), get_next_list)
+
+  def _get_output_types(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_types
+
+  def _get_output_shapes(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_shapes
+
+  def _get_output_classes(self, ds_fn):
+    with ops.Graph().as_default():
+      return ds_fn().output_classes
+
+  def _ckpt_path(self):
+    return os.path.join(self.get_temp_dir(), "iterator")
+
+  def _latest_ckpt(self):
+    return checkpoint_management.latest_checkpoint(self.get_temp_dir())
+
+  def _save(self, sess, saver):
+    saver.save(sess, self._ckpt_path())
+
+  def _restore(self, saver, sess):
+    sess.run(lookup_ops.tables_initializer())
+    saver.restore(sess, self._latest_ckpt())
+
+  def _initialize(self, init_op, sess):
+    sess.run(variables.global_variables_initializer())
+    sess.run(lookup_ops.tables_initializer())
+    sess.run(init_op)
+
+  def _import_meta_graph(self):
+    meta_file_path = self._ckpt_path() + ".meta"
+    return saver_lib.import_meta_graph(meta_file_path)
+
+  def _delete_ckpt(self):
+    # Remove all checkpoint files.
+    prefix = self._ckpt_path()
+    pattern = prefix + "*"
+    files = gfile.Glob(pattern)
+    map(gfile.Remove, files)
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
index 7c170078a1..225f6cbac0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/filter_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/filter_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
index 34392d88d4..70caf3e0d5 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/fixed_length_record_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
index 16051ffd3f..c30534a9e9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/flat_map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/flat_map_dataset_serialization_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
index 571e0899bb..169c8845d0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_reducer_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_reducer_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
index f86af4084e..e5bc76288e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/group_by_window_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/group_by_window_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import grouping
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
index 65ae9923b8..df1f43129a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/ignore_errors_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/ignore_errors_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import error_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
index 243f6405a1..0c1d40ce39 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/interleave_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/interleave_dataset_serialization_test.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import sparse_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
index c9cd211328..166ffa99ca 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import math
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
index ab783e5cce..b93156a96c 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/map_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
index d5c03495e3..ed4a1da596 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/optimize_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import optimization
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
index 9ac42a461a..6f72b24673 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/padded_batch_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import string_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
index 1f8a584df9..b8f38e8a28 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_interleave_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import sparse_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
index 3fb7605be1..a0bdd4fa59 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parallel_map_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -65,7 +65,7 @@ class ParallelMapDatasetSerializationTest(
     for ds_fn in [self._build_ds, self._build_ds_with_prefetch]:
       self.run_core_tests(
           ds_fn,
-          lambda: ds_fn(multiplier=15.0),
+          lambda: ds_fn(multiplier=15.0),  # pylint: disable=cell-var-from-loop
           self._num_outputs)
 
   def testSaveStatefulFunction(self):
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
index d3fa84e74c..a0dd6960b0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/parse_example_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.platform import test
 
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
similarity index 93%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
index c802402461..00d74c0025 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/prefetch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/prefetch_dataset_serialization_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
index 6341190847..ef99d01c73 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/range_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
index fdb35ea624..c23c1ecdfb 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sample_from_datasets_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sample_from_datasets_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
index af9ef48c0f..5f50160619 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/scan_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/scan_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import scan_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
index 2afebca0f5..fe99a3d3d9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sequence_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sequence_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
similarity index 97%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
index 6aac50ecd9..88d5c896c9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/serialization_integration_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
index f199ec835e..f847ac19f9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_and_repeat_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
index a59fa94d66..a04f1ddafc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/shuffle_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
similarity index 88%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
index 93b26ed58a..b179770ce3 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/sql_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
@@ -19,9 +19,9 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.kernel_tests import sql_dataset_op_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
index a10f85263a..ef7061b190 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/stats_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
similarity index 90%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
index 2483787f44..c87a7443a7 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/textline_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/textline_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
index 55a6257a27..f0dcc131d4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/tf_record_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/tf_record_dataset_serialization_test.py
@@ -21,8 +21,8 @@ import gzip
 import os
 import zlib
 
-from tensorflow.contrib.data.python.kernel_tests import reader_dataset_ops_test_base
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
similarity index 91%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
index b2a5a8a20d..528598dfe4 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/unbatch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/unbatch_dataset_serialization_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import batching
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
similarity index 89%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
index 22f15b8846..e2862af4d6 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/unique_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/unique_dataset_serialization_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
similarity index 94%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
index 340a6ff72e..4ea6131c22 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/zip_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/zip_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
new file mode 100644
index 0000000000..88d5c896c9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
@@ -0,0 +1,85 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for dataset serialization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+from tensorflow.python.training import saver as saver_lib
+
+
+class SerializationIntegrationTest(test.TestCase):
+
+  def _build_input_pipeline(self, name, num_outputs):
+    with ops.name_scope(name):
+      ds = dataset_ops.Dataset.range(num_outputs).shuffle(
+          10, reshuffle_each_iteration=False).prefetch(10)
+      iterator = ds.make_initializable_iterator()
+      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
+      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
+      return iterator.initializer, iterator.get_next()
+
+  def _build_graph(self, num_pipelines, num_outputs):
+    init_ops = []
+    get_next_ops = []
+    for i in range(num_pipelines):
+      name = "input_pipeline_%d" % i
+      init_op, get_next_op = self._build_input_pipeline(name, num_outputs)
+      init_ops.append(init_op)
+      get_next_ops.append(get_next_op)
+    saver = saver_lib.Saver()
+    return init_ops, get_next_ops, saver
+
+  def _ckpt_path(self):
+    return os.path.join(self.get_temp_dir(), "iterator")
+
+  def testConcurrentSaves(self):
+    num_pipelines = 100
+    num_outputs = 100
+    break_point = 10
+    all_outputs = [[] for _ in range(num_pipelines)]
+    with ops.Graph().as_default() as g:
+      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
+                                                        num_outputs)
+      with self.session(graph=g) as sess:
+        sess.run(init_ops)
+        for _ in range(break_point):
+          output = sess.run(get_next_ops)
+          for i in range(num_pipelines):
+            all_outputs[i].append(output[i])
+        saver.save(sess, self._ckpt_path())
+
+    with ops.Graph().as_default() as g:
+      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
+                                                        num_outputs)
+      with self.session(graph=g) as sess:
+        saver.restore(sess, self._ckpt_path())
+        for _ in range(num_outputs - break_point):
+          output = sess.run(get_next_ops)
+          for i in range(num_pipelines):
+            all_outputs[i].append(output[i])
+
+    for output in all_outputs:
+      self.assertSequenceEqual(sorted(output), range(num_outputs))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
index c97002a255..50895b5945 100644
--- a/tensorflow/contrib/data/python/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import shuffle_ops
+from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
similarity index 99%
rename from tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
index 52823d3fca..301f75488a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
index 319a2ea263..a135c357f0 100644
--- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
@@ -23,7 +23,7 @@ import os
 
 import sqlite3
 
-from tensorflow.contrib.data.python.ops import readers
+from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index be8ae5e955..6761fbd16b 100644
--- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -19,8 +19,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base
-from tensorflow.contrib.data.python.ops import stats_ops
+from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
+from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
similarity index 100%
rename from tensorflow/contrib/data/python/kernel_tests/stats_dataset_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
diff --git a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
similarity index 96%
rename from tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
index 08de3a9143..4432dcb05a 100644
--- a/tensorflow/contrib/data/python/kernel_tests/threadpool_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
@@ -22,8 +22,8 @@ import threading
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.contrib.data.python.ops import threadpool
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.ops import threadpool
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
index 8856ce5afb..b5a0b20f3f 100644
--- a/tensorflow/contrib/data/python/kernel_tests/unique_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import unique
+from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
diff --git a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
similarity index 98%
rename from tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
index fca546a570..25a2e63ba1 100644
--- a/tensorflow/contrib/data/python/kernel_tests/writer_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.contrib.data.python.ops import writers
+from tensorflow.python.data.experimental.ops import writers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD
new file mode 100644
index 0000000000..915d399f1b
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/BUILD
@@ -0,0 +1,377 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
+)
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+
+py_library(
+    name = "counter",
+    srcs = ["counter.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":scan_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "get_single_element",
+    srcs = ["get_single_element.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "iterator_ops",
+    srcs = [
+        "iterator_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:basic_session_run_hooks",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session_run_hook",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:optional_ops",
+    ],
+)
+
+py_library(
+    name = "random_ops",
+    srcs = [
+        "random_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "readers",
+    srcs = [
+        "readers.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":batching",
+        ":interleave_ops",
+        ":optimization",
+        ":parsing_ops",
+        ":shuffle_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:convert",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "shuffle_ops",
+    srcs = [
+        "shuffle_ops.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "batching",
+    srcs = ["batching.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":get_single_element",
+        ":grouping",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:tensor_util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:convert",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "enumerate_ops",
+    srcs = ["enumerate_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "error_ops",
+    srcs = ["error_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "grouping",
+    srcs = ["grouping.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "interleave_ops",
+    srcs = ["interleave_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":random_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:stateless_random_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "optimization",
+    srcs = ["optimization.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "parsing_ops",
+    srcs = ["parsing_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
+
+py_library(
+    name = "map_defun",
+    srcs = ["map_defun.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:tensor_shape",
+    ],
+)
+
+py_library(
+    name = "resampling",
+    srcs = ["resampling.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":batching",
+        ":interleave_ops",
+        ":scan_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:logging_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "scan_ops",
+    srcs = ["scan_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:function",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "stats_ops",
+    srcs = ["stats_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "threadpool",
+    srcs = ["threadpool.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+py_library(
+    name = "unique",
+    srcs = [
+        "unique.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "writers",
+    srcs = [
+        "writers.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_library(
+    name = "indexed_dataset_ops",
+    srcs = ["indexed_dataset_ops.py"],
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "prefetching_ops",
+    srcs = ["prefetching_ops.py"],
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+        "//tensorflow/python/data/util:sparse",
+    ],
+)
+
+py_library(
+    name = "dataset_ops",
+    deps = [
+        ":batching",
+        ":counter",
+        ":enumerate_ops",
+        ":error_ops",
+        ":get_single_element",
+        ":grouping",
+        ":indexed_dataset_ops",
+        ":interleave_ops",
+        ":map_defun",
+        ":optimization",
+        ":prefetching_ops",
+        ":readers",
+        ":resampling",
+        ":scan_ops",
+        ":shuffle_ops",
+        ":stats_ops",
+        ":threadpool",
+        ":unique",
+        ":writers",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
new file mode 100644
index 0000000000..d42af9e7e9
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -0,0 +1,669 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Batching dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import get_single_element
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import convert
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+def batch_window(dataset):
+  """Batches a window of tensors.
+
+  Args:
+    dataset: the input dataset.
+
+  Returns:
+    A `Tensor` representing the batch of the entire input dataset.
+  """
+  if isinstance(dataset.output_classes, tuple):
+    raise TypeError("Input dataset expected to have a single component")
+  if dataset.output_classes is ops.Tensor:
+    return _batch_dense_window(dataset)
+  elif dataset.output_classes is sparse_tensor.SparseTensor:
+    return _batch_sparse_window(dataset)
+  else:
+    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
+
+
+def _batch_dense_window(dataset):
+  """Batches a window of dense tensors."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def shape_init_fn(_):
+    return array_ops.shape(first_element)
+
+  def shape_reduce_fn(state, value):
+    check_ops.assert_equal(state, array_ops.shape(value))
+    return state
+
+  def finalize_fn(state):
+    return state
+
+  if dataset.output_shapes.is_fully_defined():
+    shape = dataset.output_shapes
+  else:
+    first_element = get_single_element.get_single_element(dataset.take(1))
+    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
+                                     finalize_fn)
+    shape = get_single_element.get_single_element(
+        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
+
+  def batch_init_fn(_):
+    batch_shape = array_ops.concat([[0], shape], 0)
+    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
+
+  def batch_reduce_fn(state, value):
+    return array_ops.concat([state, [value]], 0)
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+def _batch_sparse_window(dataset):
+  """Batches a window of sparse tensors."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def shape_init_fn(_):
+    return first_element.dense_shape
+
+  def shape_reduce_fn(state, value):
+    check_ops.assert_equal(state, value.dense_shape)
+    return state
+
+  def finalize_fn(state):
+    return state
+
+  if dataset.output_shapes.is_fully_defined():
+    shape = dataset.output_shapes
+  else:
+    first_element = get_single_element.get_single_element(dataset.take(1))
+    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
+                                     finalize_fn)
+    shape = get_single_element.get_single_element(
+        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))
+
+  def batch_init_fn(_):
+    indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
+    return sparse_tensor.SparseTensor(
+        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
+        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
+        dense_shape=array_ops.concat(
+            [np.array([0], dtype=np.int64),
+             math_ops.cast(shape, dtypes.int64)], 0))
+
+  def batch_reduce_fn(state, value):
+    return sparse_ops.sparse_concat(0, [state, value])
+
+  def reshape_fn(value):
+    return sparse_ops.sparse_reshape(
+        value,
+        array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.map(reshape_fn).apply(
+          grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+@tf_export("data.experimental.dense_to_sparse_batch")
+def dense_to_sparse_batch(batch_size, row_shape):
+  """A transformation that batches ragged elements into `tf.SparseTensor`s.
+
+  Like `Dataset.padded_batch()`, this transformation combines multiple
+  consecutive elements of the dataset, which might have different
+  shapes, into a single element. The resulting element has three
+  components (`indices`, `values`, and `dense_shape`), which
+  comprise a `tf.SparseTensor` that represents the same data. The
+  `row_shape` represents the dense shape of each row in the
+  resulting `tf.SparseTensor`, to which the effective batch size is
+  prepended. For example:
+
+  ```python
+  # NOTE: The following examples use `{ ... }` to represent the
+  # contents of a dataset.
+  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }
+
+  a.apply(tf.data.experimental.dense_to_sparse_batch(
+      batch_size=2, row_shape=[6])) ==
+  {
+      ([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],  # indices
+       ['a', 'b', 'c', 'a', 'b'],                 # values
+       [2, 6]),                                   # dense_shape
+      ([[0, 0], [0, 1], [0, 2], [0, 3]],
+       ['a', 'b', 'c', 'd'],
+       [1, 6])
+  }
+  ```
+
+  Args:
+    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      number of consecutive elements of this dataset to combine in a
+      single batch.
+    row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like
+      object representing the equivalent dense shape of a row in the
+      resulting `tf.SparseTensor`. Each element of this dataset must
+      have the same rank as `row_shape`, and must have size less
+      than or equal to `row_shape` in each dimension.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _DenseToSparseBatchDataset(dataset, batch_size, row_shape)
+
+  return _apply_fn
+
+
+def padded_batch_window(dataset, padded_shape, padding_value=None):
+  """Batches a window of tensors with padding.
+
+  Args:
+    dataset: the input dataset.
+    padded_shape: (Optional.) `tf.TensorShape` or `tf.int64` vector tensor-like
+      object representing the shape to which the input elements should be padded
+      prior to batching. Any unknown dimensions (e.g. `tf.Dimension(None)` in a
+      `tf.TensorShape` or `-1` in a tensor-like object) will be padded to the
+      maximum size of that dimension in each batch.
+    padding_value: (Optional.) A scalar-shaped `tf.Tensor`, representing the
+      padding value to use. Defaults are `0` for numeric types and the empty
+      string for string types. If `dataset` contains `tf.SparseTensor`, this
+      value is ignored.
+
+  Returns:
+    A `Tensor` representing the batch of the entire input dataset.
+
+  Raises:
+    ValueError: if invalid arguments are provided.
+  """
+  if not issubclass(dataset.output_classes,
+                    (ops.Tensor, sparse_tensor.SparseTensor)):
+    raise TypeError("Input dataset expected to have a single tensor component")
+  if issubclass(dataset.output_classes, (ops.Tensor)):
+    return _padded_batch_dense_window(dataset, padded_shape, padding_value)
+  elif issubclass(dataset.output_classes, (sparse_tensor.SparseTensor)):
+    if padding_value is not None:
+      raise ValueError("Padding value not allowed for sparse tensors")
+    return _padded_batch_sparse_window(dataset, padded_shape)
+  else:
+    raise TypeError("Unsupported dataset type: %s" % dataset.output_classes)
+
+
+def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
+  """Batches a window of dense tensors with padding."""
+
+  padded_shape = math_ops.cast(
+      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def max_init_fn(_):
+    return padded_shape
+
+  def max_reduce_fn(state, value):
+    """Computes the maximum shape to pad to."""
+    condition = math_ops.reduce_all(
+        math_ops.logical_or(
+            math_ops.less_equal(array_ops.shape(value), padded_shape),
+            math_ops.equal(padded_shape, -1)))
+    assert_op = control_flow_ops.Assert(condition, [
+        "Actual shape greater than padded shape: ",
+        array_ops.shape(value), padded_shape
+    ])
+    with ops.control_dependencies([assert_op]):
+      return math_ops.maximum(state, array_ops.shape(value))
+
+  def finalize_fn(state):
+    return state
+
+  # Compute the padded shape.
+  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
+  padded_shape = get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
+
+  if padding_value is None:
+    if dataset.output_types == dtypes.string:
+      padding_value = ""
+    elif dataset.output_types == dtypes.bool:
+      padding_value = False
+    elif dataset.output_types == dtypes.variant:
+      raise TypeError("Unable to create padding for field of type 'variant'")
+    else:
+      padding_value = 0
+
+  def batch_init_fn(_):
+    batch_shape = array_ops.concat(
+        [np.array([0], dtype=np.int32), padded_shape], 0)
+    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)
+
+  def batch_reduce_fn(state, value):
+    return array_ops.concat([state, [value]], 0)
+
+  def pad_fn(value):
+    shape = array_ops.shape(value)
+    left = array_ops.zeros_like(shape)
+    right = padded_shape - shape
+    return array_ops.pad(
+        value, array_ops.stack([left, right], 1), constant_values=padding_value)
+
+  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.map(pad_fn).apply(
+          grouping.group_by_reducer(key_fn, batch_reducer)))
+
+
+def _padded_batch_sparse_window(dataset, padded_shape):
+  """Batches a window of sparse tensors with padding."""
+
+  def key_fn(_):
+    return np.int64(0)
+
+  def max_init_fn(_):
+    return convert.partial_shape_to_tensor(padded_shape)
+
+  def max_reduce_fn(state, value):
+    """Computes the maximum shape to pad to."""
+    condition = math_ops.reduce_all(
+        math_ops.logical_or(
+            math_ops.less_equal(value.dense_shape, padded_shape),
+            math_ops.equal(padded_shape, -1)))
+    assert_op = control_flow_ops.Assert(condition, [
+        "Actual shape greater than padded shape: ", value.dense_shape,
+        padded_shape
+    ])
+    with ops.control_dependencies([assert_op]):
+      return math_ops.maximum(state, value.dense_shape)
+
+  def finalize_fn(state):
+    return state
+
+  # Compute the padded shape.
+  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
+  padded_shape = get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))
+
+  def batch_init_fn(_):
+    indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]],
+                                     0)
+    return sparse_tensor.SparseTensor(
+        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
+        values=constant_op.constant([], shape=[0], dtype=dataset.output_types),
+        dense_shape=array_ops.concat(
+            [np.array([0], dtype=np.int64), padded_shape], 0))
+
+  def batch_reduce_fn(state, value):
+    padded_value = sparse_tensor.SparseTensor(
+        indices=value.indices, values=value.values, dense_shape=padded_shape)
+    reshaped_value = sparse_ops.sparse_reshape(
+        padded_value,
+        array_ops.concat(
+            [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
+    return sparse_ops.sparse_concat(0, [state, reshaped_value])
+
+  reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
+  return get_single_element.get_single_element(
+      dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
+
+
+class _UnbatchDataset(dataset_ops.UnaryDataset):
+  """A dataset that splits the elements of its input into multiple elements."""
+
+  def __init__(self, input_dataset):
+    """See `unbatch()` for more details."""
+    super(_UnbatchDataset, self).__init__(input_dataset)
+    flat_shapes = nest.flatten(input_dataset.output_shapes)
+    if any(s.ndims == 0 for s in flat_shapes):
+      raise ValueError("Cannot unbatch an input with scalar components.")
+    known_batch_dim = tensor_shape.Dimension(None)
+    for s in flat_shapes:
+      try:
+        known_batch_dim = known_batch_dim.merge_with(s[0])
+      except ValueError:
+        raise ValueError("Cannot unbatch an input whose components have "
+                         "different batch sizes.")
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.unbatch_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return nest.map_structure(lambda s: s[1:],
+                              self._input_dataset.output_shapes)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+@tf_export("data.experimental.unbatch")
+def unbatch():
+  """Splits elements of a dataset into multiple elements on the batch dimension.
+
+  For example, if elements of the dataset are shaped `[B, a0, a1, ...]`,
+  where `B` may vary for each input element, then for each element in the
+  dataset, the unbatched dataset will contain `B` consecutive elements
+  of shape `[a0, a1, ...]`.
+
+  ```python
+  # NOTE: The following example uses `{ ... }` to represent the contents
+  # of a dataset.
+  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }
+
+  a.apply(tf.data.experimental.unbatch()) == {
+      'a', 'b', 'c', 'a', 'b', 'a', 'b', 'c', 'd'}
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    if not sparse.any_sparse(dataset.output_classes):
+      return _UnbatchDataset(dataset)
+
+    # NOTE(mrry): We must ensure that any SparseTensors in `dataset`
+    # are normalized to the rank-1 dense representation, so that the
+    # sparse-oblivious unbatching logic will slice them
+    # appropriately. This leads to a somewhat inefficient re-encoding step
+    # for all SparseTensor components.
+    # TODO(mrry): Consider optimizing this in future
+    # if it turns out to be a bottleneck.
+    def normalize(arg, *rest):
+      if rest:
+        return sparse.serialize_many_sparse_tensors((arg,) + rest)
+      else:
+        return sparse.serialize_many_sparse_tensors(arg)
+
+    normalized_dataset = dataset.map(normalize)
+
+    # NOTE(mrry): Our `map()` has lost information about the sparseness
+    # of any SparseTensor components, so re-apply the structure of the
+    # original dataset.
+    restructured_dataset = _RestructuredDataset(
+        normalized_dataset,
+        dataset.output_types,
+        dataset.output_shapes,
+        dataset.output_classes,
+        allow_unsafe_cast=True)
+    return _UnbatchDataset(restructured_dataset)
+
+  return _apply_fn
+
+
+class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""
+
+  def __init__(self, input_dataset, batch_size, row_shape):
+    """See `Dataset.dense_to_sparse_batch()` for more details."""
+    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
+    if not isinstance(input_dataset.output_types, dtypes.DType):
+      raise TypeError("DenseToSparseDataset requires an input whose elements "
+                      "have a single component, whereas the input has %r." %
+                      input_dataset.output_types)
+    self._input_dataset = input_dataset
+    self._batch_size = batch_size
+    self._row_shape = row_shape
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.dense_to_sparse_batch_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._batch_size,
+        row_shape=convert.partial_shape_to_tensor(self._row_shape),
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return sparse_tensor.SparseTensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.vector(None).concatenate(self._row_shape)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _RestructuredDataset(dataset_ops.UnaryDataset):
+  """An internal helper for changing the structure and shape of a dataset."""
+
+  def __init__(self,
+               dataset,
+               output_types,
+               output_shapes=None,
+               output_classes=None,
+               allow_unsafe_cast=False):
+    """Creates a new dataset with the given output types and shapes.
+
+    The given `dataset` must have a structure that is convertible:
+    * `dataset.output_types` must be the same as `output_types` module nesting.
+    * Each shape in `dataset.output_shapes` must be compatible with each shape
+      in `output_shapes` (if given).
+
+    Note: This helper permits "unsafe casts" for shapes, equivalent to using
+    `tf.Tensor.set_shape()` where domain-specific knowledge is available.
+
+    Args:
+      dataset: A `Dataset` object.
+      output_types: A nested structure of `tf.DType` objects.
+      output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects.
+        If omitted, the shapes will be inherited from `dataset`.
+      output_classes: (Optional.) A nested structure of class types.
+        If omitted, the class types will be inherited from `dataset`.
+      allow_unsafe_cast: (Optional.) If `True`, the caller may switch the
+        reported output types and shapes of the restructured dataset, e.g. to
+        switch a sparse tensor represented as `tf.variant` to its user-visible
+        type and shape.
+
+    Raises:
+      ValueError: If either `output_types` or `output_shapes` is not compatible
+        with the structure of `dataset`.
+    """
+    super(_RestructuredDataset, self).__init__(dataset)
+    self._input_dataset = dataset
+
+    if not allow_unsafe_cast:
+      # Validate that the types are compatible.
+      output_types = nest.map_structure(dtypes.as_dtype, output_types)
+      flat_original_types = nest.flatten(dataset.output_types)
+      flat_new_types = nest.flatten(output_types)
+      if flat_original_types != flat_new_types:
+        raise ValueError(
+            "Dataset with output types %r cannot be restructured to have "
+            "output types %r" % (dataset.output_types, output_types))
+
+    self._output_types = output_types
+
+    if output_shapes is None:
+      # Inherit shapes from the original `dataset`.
+      self._output_shapes = nest.pack_sequence_as(output_types,
+                                                  nest.flatten(
+                                                      dataset.output_shapes))
+    else:
+      if not allow_unsafe_cast:
+        # Validate that the shapes are compatible.
+        nest.assert_same_structure(output_types, output_shapes)
+        flat_original_shapes = nest.flatten(dataset.output_shapes)
+        flat_new_shapes = nest.flatten_up_to(output_types, output_shapes)
+
+        for original_shape, new_shape in zip(flat_original_shapes,
+                                             flat_new_shapes):
+          if not original_shape.is_compatible_with(new_shape):
+            raise ValueError(
+                "Dataset with output shapes %r cannot be restructured to have "
+                "incompatible output shapes %r" % (dataset.output_shapes,
+                                                   output_shapes))
+      self._output_shapes = nest.map_structure_up_to(
+          output_types, tensor_shape.as_shape, output_shapes)
+    if output_classes is None:
+      # Inherit class types from the original `dataset`.
+      self._output_classes = nest.pack_sequence_as(output_types,
+                                                   nest.flatten(
+                                                       dataset.output_classes))
+    else:
+      self._output_classes = output_classes
+
+  def _as_variant_tensor(self):
+    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+
+class _MapAndBatchDataset(dataset_ops.MapDataset):
+  """A `Dataset` that maps a function over a batch of elements."""
+
+  def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
+               drop_remainder):
+    """See `Dataset.map()` for details."""
+    super(_MapAndBatchDataset, self).__init__(input_dataset, map_func)
+    self._batch_size_t = ops.convert_to_tensor(
+        batch_size, dtype=dtypes.int64, name="batch_size")
+    self._num_parallel_calls_t = ops.convert_to_tensor(
+        num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls")
+    self._drop_remainder_t = ops.convert_to_tensor(
+        drop_remainder, dtype=dtypes.bool, name="drop_remainder")
+
+    self._batch_size = batch_size
+    self._drop_remainder = drop_remainder
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.map_and_batch_dataset_v2(
+        input_resource,
+        self._map_func.captured_inputs,
+        f=self._map_func,
+        batch_size=self._batch_size_t,
+        num_parallel_calls=self._num_parallel_calls_t,
+        drop_remainder=self._drop_remainder_t,
+        **dataset_ops.flat_structure(self))
+    # pylint: enable=protected-access
+
+  @property
+  def output_shapes(self):
+    dim = self._batch_size if self._drop_remainder else None
+    return nest.pack_sequence_as(self._output_shapes, [
+        tensor_shape.vector(dim).concatenate(s)
+        for s in nest.flatten(self._output_shapes)
+    ])
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+
+@tf_export("data.experimental.map_and_batch")
+def map_and_batch(map_func,
+                  batch_size,
+                  num_parallel_batches=None,
+                  drop_remainder=False,
+                  num_parallel_calls=None):
+  """Fused implementation of `map` and `batch`.
+
+  Maps `map_func` across `batch_size` consecutive elements of this dataset
+  and then combines them into a batch. Functionally, it is equivalent to `map`
+  followed by `batch`. However, by fusing the two transformations together, the
+  implementation can be more efficient. Surfacing this transformation in the API
+  is temporary. Once automatic input pipeline optimization is implemented,
+  the fusing of `map` and `batch` will happen automatically and this API will be
+  deprecated.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors to another
+      nested structure of tensors.
+    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements of this dataset to combine in a single batch.
+    num_parallel_batches: (Optional.) A `tf.int64` scalar `tf.Tensor`,
+      representing the number of batches to create in parallel. On one hand,
+      higher values can help mitigate the effect of stragglers. On the other
+      hand, higher values can increase contention if CPU is scarce.
+    drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
+      whether the last batch should be dropped in case its size is smaller than
+      desired; the default behavior is not to drop the smaller batch.
+    num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+        representing the number of elements to process in parallel. If not
+        specified, `batch_size * num_parallel_batches` elements will be
+        processed in parallel.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: If both `num_parallel_batches` and `num_parallel_calls` are
+      specified.
+  """
+
+  if num_parallel_batches is None and num_parallel_calls is None:
+    num_parallel_calls = batch_size
+  elif num_parallel_batches is not None and num_parallel_calls is None:
+    num_parallel_calls = batch_size * num_parallel_batches
+  elif num_parallel_batches is not None and num_parallel_calls is not None:
+    raise ValueError("The `num_parallel_batches` and `num_parallel_calls` "
+                     "arguments are mutually exclusive.")
+
+  def _apply_fn(dataset):
+    return _MapAndBatchDataset(dataset, map_func, batch_size,
+                               num_parallel_calls, drop_remainder)
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/counter.py b/tensorflow/python/data/experimental/ops/counter.py
new file mode 100644
index 0000000000..42200eaef9
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/counter.py
@@ -0,0 +1,55 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The Counter Dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import scan_ops
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.Counter")
+def Counter(start=0, step=1, dtype=dtypes.int64):
+  """Creates a `Dataset` that counts from `start` in steps of size `step`.
+
+  For example:
+
+  ```python
+  Dataset.count() == [0, 1, 2, ...)
+  Dataset.count(2) == [2, 3, ...)
+  Dataset.count(2, 5) == [2, 7, 12, ...)
+  Dataset.count(0, -1) == [0, -1, -2, ...)
+  Dataset.count(10, -1) == [10, 9, ...)
+  ```
+
+  Args:
+    start: (Optional.) The starting value for the counter. Defaults to 0.
+    step: (Optional.) The step size for the counter. Defaults to 1.
+    dtype: (Optional.) The data type for counter elements. Defaults to
+      `tf.int64`.
+
+  Returns:
+    A `Dataset` of scalar `dtype` elements.
+  """
+  with ops.name_scope("counter"):
+    start = ops.convert_to_tensor(start, dtype=dtype, name="start")
+    step = ops.convert_to_tensor(step, dtype=dtype, name="step")
+    return dataset_ops.Dataset.from_tensors(0).repeat(None).apply(
+        scan_ops.scan(start, lambda state, _: (state + step, state)))
diff --git a/tensorflow/python/data/experimental/ops/enumerate_ops.py b/tensorflow/python/data/experimental/ops/enumerate_ops.py
new file mode 100644
index 0000000000..a1af98f552
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/enumerate_ops.py
@@ -0,0 +1,60 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Enumerate dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.enumerate_dataset")
+def enumerate_dataset(start=0):
+  """A transformation that enumerate the elements of a dataset.
+
+  It is Similar to python's `enumerate`.
+  For example:
+
+  ```python
+  # NOTE: The following examples use `{ ... }` to represent the
+  # contents of a dataset.
+  a = { 1, 2, 3 }
+  b = { (7, 8), (9, 10) }
+
+  # The nested structure of the `datasets` argument determines the
+  # structure of elements in the resulting dataset.
+  a.apply(tf.data.experimental.enumerate(start=5)) == { (5, 1), (6, 2), (7, 3) }
+  b.apply(tf.data.experimental.enumerate()) == { (0, (7, 8)), (1, (9, 10)) }
+  ```
+
+  Args:
+    start: A `tf.int64` scalar `tf.Tensor`, representing the start
+      value for enumeration.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    max_value = np.iinfo(dtypes.int64.as_numpy_dtype).max
+    return dataset_ops.Dataset.zip((dataset_ops.Dataset.range(start, max_value),
+                                    dataset))
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/error_ops.py b/tensorflow/python/data/experimental/ops/error_ops.py
new file mode 100644
index 0000000000..82e274b70c
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/error_ops.py
@@ -0,0 +1,78 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ignore_errors dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.ignore_errors")
+def ignore_errors():
+  """Creates a `Dataset` from another `Dataset` and silently ignores any errors.
+
+  Use this transformation to produce a dataset that contains the same elements
+  as the input, but silently drops any elements that caused an error. For
+  example:
+
+  ```python
+  dataset = tf.data.Dataset.from_tensor_slices([1., 2., 0., 4.])
+
+  # Computing `tf.check_numerics(1. / 0.)` will raise an InvalidArgumentError.
+  dataset = dataset.map(lambda x: tf.check_numerics(1. / x, "error"))
+
+  # Using `ignore_errors()` will drop the element that causes an error.
+  dataset =
+      dataset.apply(tf.data.experimental.ignore_errors())  # ==> {1., 0.5, 0.2}
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _IgnoreErrorsDataset(dataset)
+
+  return _apply_fn
+
+
+class _IgnoreErrorsDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that silently ignores errors when computing its input."""
+
+  def __init__(self, input_dataset):
+    """See `Dataset.ignore_errors()` for details."""
+    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/experimental/ops/get_single_element.py b/tensorflow/python/data/experimental/ops/get_single_element.py
new file mode 100644
index 0000000000..132526166c
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/get_single_element.py
@@ -0,0 +1,72 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for Datasets and Iterators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.get_single_element")
+def get_single_element(dataset):
+  """Returns the single element in `dataset` as a nested structure of tensors.
+
+  This function enables you to use a `tf.data.Dataset` in a stateless
+  "tensor-in tensor-out" expression, without creating a `tf.data.Iterator`.
+  This can be useful when your preprocessing transformations are expressed
+  as a `Dataset`, and you want to use the transformation at serving time.
+  For example:
+
+  ```python
+  input_batch = tf.placeholder(tf.string, shape=[BATCH_SIZE])
+
+  def preprocessing_fn(input_str):
+    # ...
+    return image, label
+
+  dataset = (tf.data.Dataset.from_tensor_slices(input_batch)
+             .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE)
+             .batch(BATCH_SIZE))
+
+  image_batch, label_batch = tf.data.experimental.get_single_element(dataset)
+  ```
+
+  Args:
+    dataset: A `tf.data.Dataset` object containing a single element.
+
+  Returns:
+    A nested structure of `tf.Tensor` objects, corresponding to the single
+    element of `dataset`.
+
+  Raises:
+    TypeError: if `dataset` is not a `tf.data.Dataset` object.
+    InvalidArgumentError (at runtime): if `dataset` does not contain exactly
+      one element.
+  """
+  if not isinstance(dataset, dataset_ops.Dataset):
+    raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
+
+  nested_ret = nest.pack_sequence_as(
+      dataset.output_types, gen_dataset_ops.dataset_to_single_element(
+          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          **dataset_ops.flat_structure(dataset)))
+  return sparse.deserialize_sparse_tensors(
+      nested_ret, dataset.output_types, dataset.output_shapes,
+      dataset.output_classes)
diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py
new file mode 100644
index 0000000000..18ba583220
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/grouping.py
@@ -0,0 +1,551 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Grouping dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.group_by_reducer")
+def group_by_reducer(key_func, reducer):
+  """A transformation that groups elements and performs a reduction.
+
+  This transformation maps element of a dataset to a key using `key_func` and
+  groups the elements by key. The `reducer` is used to process each group; its
+  `init_func` is used to initialize state for each group when it is created, the
+  `reduce_func` is used to update the state every time an element is mapped to
+  the matching group, and the `finalize_func` is used to map the final state to
+  an output value.
+
+  Args:
+    key_func: A function mapping a nested structure of tensors
+      (having shapes and types defined by `self.output_shapes` and
+      `self.output_types`) to a scalar `tf.int64` tensor.
+    reducer: An instance of `Reducer`, which captures the reduction logic using
+      the `init_func`, `reduce_func`, and `finalize_func` functions.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _GroupByReducerDataset(dataset, key_func, reducer)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.group_by_window")
+def group_by_window(key_func,
+                    reduce_func,
+                    window_size=None,
+                    window_size_func=None):
+  """A transformation that groups windows of elements by key and reduces them.
+
+  This transformation maps each consecutive element in a dataset to a key
+  using `key_func` and groups the elements by key. It then applies
+  `reduce_func` to at most `window_size_func(key)` elements matching the same
+  key. All except the final window for each key will contain
+  `window_size_func(key)` elements; the final window may be smaller.
+
+  You may provide either a constant `window_size` or a window size determined by
+  the key through `window_size_func`.
+
+  Args:
+    key_func: A function mapping a nested structure of tensors
+      (having shapes and types defined by `self.output_shapes` and
+      `self.output_types`) to a scalar `tf.int64` tensor.
+    reduce_func: A function mapping a key and a dataset of up to `window_size`
+      consecutive elements matching that key to another dataset.
+    window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements matching the same key to combine in a single
+      batch, which will be passed to `reduce_func`. Mutually exclusive with
+      `window_size_func`.
+    window_size_func: A function mapping a key to a `tf.int64` scalar
+      `tf.Tensor`, representing the number of consecutive elements matching
+      the same key to combine in a single batch, which will be passed to
+      `reduce_func`. Mutually exclusive with `window_size`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if neither or both of {`window_size`, `window_size_func`} are
+      passed.
+  """
+  if (window_size is not None and window_size_func or
+      not (window_size is not None or window_size_func)):
+    raise ValueError("Must pass either window_size or window_size_func.")
+
+  if window_size is not None:
+
+    def constant_window_func(unused_key):
+      return ops.convert_to_tensor(window_size, dtype=dtypes.int64)
+
+    window_size_func = constant_window_func
+
+  assert window_size_func is not None
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _GroupByWindowDataset(dataset, key_func, reduce_func,
+                                 window_size_func)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.bucket_by_sequence_length")
+def bucket_by_sequence_length(element_length_func,
+                              bucket_boundaries,
+                              bucket_batch_sizes,
+                              padded_shapes=None,
+                              padding_values=None,
+                              pad_to_bucket_boundary=False,
+                              no_padding=False):
+  """A transformation that buckets elements in a `Dataset` by length.
+
+  Elements of the `Dataset` are grouped together by length and then are padded
+  and batched.
+
+  This is useful for sequence tasks in which the elements have variable length.
+  Grouping together elements that have similar lengths reduces the total
+  fraction of padding in a batch which increases training step efficiency.
+
+  Args:
+    element_length_func: function from element in `Dataset` to `tf.int32`,
+      determines the length of the element, which will determine the bucket it
+      goes into.
+    bucket_boundaries: `list<int>`, upper length boundaries of the buckets.
+    bucket_batch_sizes: `list<int>`, batch size per bucket. Length should be
+      `len(bucket_boundaries) + 1`.
+    padded_shapes: Nested structure of `tf.TensorShape` to pass to
+      `tf.data.Dataset.padded_batch`. If not provided, will use
+      `dataset.output_shapes`, which will result in variable length dimensions
+      being padded out to the maximum length in each batch.
+    padding_values: Values to pad with, passed to
+      `tf.data.Dataset.padded_batch`. Defaults to padding with 0.
+    pad_to_bucket_boundary: bool, if `False`, will pad dimensions with unknown
+      size to maximum length in batch. If `True`, will pad dimensions with
+      unknown size to bucket boundary minus 1 (i.e., the maximum length in each
+      bucket), and caller must ensure that the source `Dataset` does not contain
+      any elements with length longer than `max(bucket_boundaries)`.
+    no_padding: `bool`, indicates whether to pad the batch features (features
+      need to be either of type `tf.SparseTensor` or of same shape).
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`.
+  """
+  with ops.name_scope("bucket_by_seq_length"):
+    if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1):
+      raise ValueError(
+          "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1")
+
+    batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64)
+
+    def element_to_bucket_id(*args):
+      """Return int64 id of the length bucket for this element."""
+      seq_length = element_length_func(*args)
+
+      boundaries = list(bucket_boundaries)
+      buckets_min = [np.iinfo(np.int32).min] + boundaries
+      buckets_max = boundaries + [np.iinfo(np.int32).max]
+      conditions_c = math_ops.logical_and(
+          math_ops.less_equal(buckets_min, seq_length),
+          math_ops.less(seq_length, buckets_max))
+      bucket_id = math_ops.reduce_min(array_ops.where(conditions_c))
+
+      return bucket_id
+
+    def window_size_fn(bucket_id):
+      # The window size is set to the batch size for this bucket
+      window_size = batch_sizes[bucket_id]
+      return window_size
+
+    def make_padded_shapes(shapes, none_filler=None):
+      padded = []
+      for shape in nest.flatten(shapes):
+        shape = tensor_shape.TensorShape(shape)
+        shape = [
+            none_filler if d.value is None else d
+            for d in shape
+        ]
+        padded.append(shape)
+      return nest.pack_sequence_as(shapes, padded)
+
+    def batching_fn(bucket_id, grouped_dataset):
+      """Batch elements in dataset."""
+      batch_size = window_size_fn(bucket_id)
+      if no_padding:
+        return grouped_dataset.batch(batch_size)
+      none_filler = None
+      if pad_to_bucket_boundary:
+        err_msg = ("When pad_to_bucket_boundary=True, elements must have "
+                   "length < max(bucket_boundaries).")
+        check = check_ops.assert_less(
+            bucket_id,
+            constant_op.constant(len(bucket_batch_sizes) - 1,
+                                 dtype=dtypes.int64),
+            message=err_msg)
+        with ops.control_dependencies([check]):
+          boundaries = constant_op.constant(bucket_boundaries,
+                                            dtype=dtypes.int64)
+          bucket_boundary = boundaries[bucket_id]
+          none_filler = bucket_boundary - 1
+      shapes = make_padded_shapes(
+          padded_shapes or grouped_dataset.output_shapes,
+          none_filler=none_filler)
+      return grouped_dataset.padded_batch(batch_size, shapes, padding_values)
+
+    def _apply_fn(dataset):
+      return dataset.apply(
+          group_by_window(element_to_bucket_id, batching_fn,
+                          window_size_func=window_size_fn))
+
+    return _apply_fn
+
+
+def _map_x_dataset(map_func):
+  """A transformation that maps `map_func` across its input.
+
+  This transformation is similar to `tf.data.Dataset.map`, but in addition to
+  supporting dense and sparse tensor inputs, it also supports dataset inputs.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors and/or datasets
+      (having shapes and types defined by `self.output_shapes` and
+     `self.output_types`) to another nested structure of tensors and/or
+     datasets.
+
+  Returns:
+    Dataset: A `Dataset`.
+  """
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    return _MapXDataset(dataset, map_func)
+
+  return _apply_fn
+
+
+class _GroupByReducerDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that groups its input and performs a reduction."""
+
+  def __init__(self, input_dataset, key_func, reducer):
+    """See `group_by_reducer()` for details."""
+    super(_GroupByReducerDataset, self).__init__(input_dataset)
+
+    self._input_dataset = input_dataset
+
+    self._make_key_func(key_func, input_dataset)
+    self._make_init_func(reducer.init_func)
+    self._make_reduce_func(reducer.reduce_func, input_dataset)
+    self._make_finalize_func(reducer.finalize_func)
+
+  def _make_key_func(self, key_func, input_dataset):
+    """Make wrapping Defun for key_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        key_func, "tf.data.experimental.group_by_reducer()", input_dataset)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`key_func` must return a single tf.int64 tensor. "
+          "Got type=%s and shape=%s"
+          % (wrapped_func.output_types, wrapped_func.output_shapes))
+    self._key_func = wrapped_func.function
+
+  def _make_init_func(self, init_func):
+    """Make wrapping Defun for init_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        init_func,
+        "tf.data.experimental.group_by_reducer()",
+        input_classes=ops.Tensor,
+        input_shapes=tensor_shape.scalar(),
+        input_types=dtypes.int64)
+    self._init_func = wrapped_func.function
+    self._state_classes = wrapped_func.output_classes
+    self._state_shapes = wrapped_func.output_shapes
+    self._state_types = wrapped_func.output_types
+
+  def _make_reduce_func(self, reduce_func, input_dataset):
+    """Make wrapping Defun for reduce_func."""
+
+    # Iteratively rerun the reduce function until reaching a fixed point on
+    # `self._state_shapes`.
+    need_to_rerun = True
+    while need_to_rerun:
+
+      wrapped_func = dataset_ops.StructuredFunctionWrapper(
+          reduce_func,
+          "tf.data.experimental.group_by_reducer()",
+          input_classes=(self._state_classes, input_dataset.output_classes),
+          input_shapes=(self._state_shapes, input_dataset.output_shapes),
+          input_types=(self._state_types, input_dataset.output_types),
+          add_to_graph=False)
+
+      # Extract and validate class information from the returned values.
+      for new_state_class, state_class in zip(
+          nest.flatten(wrapped_func.output_classes),
+          nest.flatten(self._state_classes)):
+        if not issubclass(new_state_class, state_class):
+          raise TypeError(
+              "The element classes for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_classes, wrapped_func.output_classes))
+
+      # Extract and validate type information from the returned values.
+      for new_state_type, state_type in zip(
+          nest.flatten(wrapped_func.output_types),
+          nest.flatten(self._state_types)):
+        if new_state_type != state_type:
+          raise TypeError(
+              "The element types for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_types, wrapped_func.output_types))
+
+      # Extract shape information from the returned values.
+      flat_state_shapes = nest.flatten(self._state_shapes)
+      flat_new_state_shapes = nest.flatten(wrapped_func.output_shapes)
+      weakened_state_shapes = [
+          original.most_specific_compatible_shape(new)
+          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
+      ]
+
+      need_to_rerun = False
+      for original_shape, weakened_shape in zip(flat_state_shapes,
+                                                weakened_state_shapes):
+        if original_shape.ndims is not None and (
+            weakened_shape.ndims is None or
+            original_shape.as_list() != weakened_shape.as_list()):
+          need_to_rerun = True
+          break
+
+      if need_to_rerun:
+        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
+                                                   weakened_state_shapes)
+
+    self._reduce_func = wrapped_func.function
+    self._reduce_func.add_to_graph(ops.get_default_graph())
+
+  def _make_finalize_func(self, finalize_func):
+    """Make wrapping Defun for finalize_func."""
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        finalize_func,
+        "tf.data.experimental.group_by_reducer()",
+        input_classes=self._state_classes,
+        input_shapes=self._state_shapes,
+        input_types=self._state_types)
+    self._finalize_func = wrapped_func.function
+    self._output_classes = wrapped_func.output_classes
+    self._output_shapes = wrapped_func.output_shapes
+    self._output_types = wrapped_func.output_types
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.group_by_reducer_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._key_func.captured_inputs,
+        self._init_func.captured_inputs,
+        self._reduce_func.captured_inputs,
+        self._finalize_func.captured_inputs,
+        key_func=self._key_func,
+        init_func=self._init_func,
+        reduce_func=self._reduce_func,
+        finalize_func=self._finalize_func,
+        **dataset_ops.flat_structure(self))
+
+
+class _GroupByWindowDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that groups its input and performs a windowed reduction."""
+
+  def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
+    """See `group_by_window()` for details."""
+    super(_GroupByWindowDataset, self).__init__(input_dataset)
+
+    self._input_dataset = input_dataset
+
+    self._make_key_func(key_func, input_dataset)
+    self._make_reduce_func(reduce_func, input_dataset)
+    self._make_window_size_func(window_size_func)
+
+  def _make_window_size_func(self, window_size_func):
+    """Make wrapping Defun for window_size_func."""
+    def window_size_func_wrapper(key):
+      return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64)
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        window_size_func_wrapper,
+        "tf.data.experimental.group_by_window()",
+        input_classes=ops.Tensor,
+        input_shapes=tensor_shape.scalar(),
+        input_types=dtypes.int64)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`window_size_func` must return a single tf.int64 scalar tensor.")
+    self._window_size_func = wrapped_func.function
+
+  def _make_key_func(self, key_func, input_dataset):
+    """Make wrapping Defun for key_func."""
+    def key_func_wrapper(*args):
+      return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        key_func_wrapper, "tf.data.experimental.group_by_window()",
+        input_dataset)
+    if not (
+        wrapped_func.output_types == dtypes.int64 and
+        wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
+      raise ValueError(
+          "`key_func` must return a single tf.int64 scalar tensor.")
+    self._key_func = wrapped_func.function
+
+  def _make_reduce_func(self, reduce_func, input_dataset):
+    """Make wrapping Defun for reduce_func."""
+    nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset)  # pylint: disable=protected-access
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        reduce_func,
+        "tf.data.experimental.reduce_by_window()",
+        input_classes=(ops.Tensor, nested_dataset),
+        input_shapes=(tensor_shape.scalar(), nested_dataset),
+        input_types=(dtypes.int64, nested_dataset),
+        experimental_nested_dataset_support=True)
+    if not isinstance(
+        wrapped_func.output_classes, dataset_ops._NestedDatasetComponent):  # pylint: disable=protected-access
+      raise TypeError("`reduce_func` must return a `Dataset` object.")
+    self._output_classes = wrapped_func.output_classes.output_classes
+    self._output_types = wrapped_func.output_types.output_types
+    self._output_shapes = wrapped_func.output_shapes.output_shapes
+    self._reduce_func = wrapped_func.function
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.group_by_window_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._key_func.captured_inputs,
+        self._reduce_func.captured_inputs,
+        self._window_size_func.captured_inputs,
+        key_func=self._key_func,
+        reduce_func=self._reduce_func,
+        window_size_func=self._window_size_func,
+        **dataset_ops.flat_structure(self))
+
+
+@tf_export("data.experimental.Reducer")
+class Reducer(object):
+  """A reducer is used for reducing a set of elements.
+
+  A reducer is represented as a tuple of the three functions:
+    1) initialization function: key => initial state
+    2) reduce function: (old state, input) => new state
+    3) finalization function: state => result
+  """
+
+  def __init__(self, init_func, reduce_func, finalize_func):
+    self._init_func = init_func
+    self._reduce_func = reduce_func
+    self._finalize_func = finalize_func
+
+  @property
+  def init_func(self):
+    return self._init_func
+
+  @property
+  def reduce_func(self):
+    return self._reduce_func
+
+  @property
+  def finalize_func(self):
+    return self._finalize_func
+
+
+class _MapXDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that maps a function over elements in its input."""
+
+  def __init__(self, input_dataset, map_func):
+    """See `map_x_dataset()` for details."""
+    super(_MapXDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+    wrapped_func = dataset_ops.StructuredFunctionWrapper(
+        map_func,
+        "tf.data.experimental.map_x_dataset()",
+        input_dataset,
+        experimental_nested_dataset_support=True)
+    self._output_classes = wrapped_func.output_classes
+    self._output_shapes = wrapped_func.output_shapes
+    self._output_types = wrapped_func.output_types
+    self._map_func = wrapped_func.function
+
+  def _as_variant_tensor(self):
+    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    return gen_dataset_ops.map_dataset(
+        input_t,
+        self._map_func.captured_inputs,
+        f=self._map_func,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
diff --git a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py b/tensorflow/python/data/experimental/ops/indexed_dataset_ops.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/indexed_dataset_ops.py
rename to tensorflow/python/data/experimental/ops/indexed_dataset_ops.py
diff --git a/tensorflow/python/data/experimental/ops/interleave_ops.py b/tensorflow/python/data/experimental/ops/interleave_ops.py
new file mode 100644
index 0000000000..a3c094859e
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/interleave_ops.py
@@ -0,0 +1,262 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Non-deterministic dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.ops import gen_stateless_random_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.parallel_interleave")
+def parallel_interleave(map_func,
+                        cycle_length,
+                        block_length=1,
+                        sloppy=False,
+                        buffer_output_elements=None,
+                        prefetch_input_elements=None):
+  """A parallel version of the `Dataset.interleave()` transformation.
+
+  `parallel_interleave()` maps `map_func` across its input to produce nested
+  datasets, and outputs their elements interleaved. Unlike
+  `tf.data.Dataset.interleave`, it gets elements from `cycle_length` nested
+  datasets in parallel, which increases the throughput, especially in the
+  presence of stragglers. Furthermore, the `sloppy` argument can be used to
+  improve performance, by relaxing the requirement that the outputs are produced
+  in a deterministic order, and allowing the implementation to skip over nested
+  datasets whose elements are not readily available when requested.
+
+  Example usage:
+
+  ```python
+  # Preprocess 4 files concurrently.
+  filenames = tf.data.Dataset.list_files("/path/to/data/train*.tfrecords")
+  dataset = filenames.apply(
+      tf.data.experimental.parallel_interleave(
+          lambda filename: tf.data.TFRecordDataset(filename),
+          cycle_length=4))
+  ```
+
+  WARNING: If `sloppy` is `True`, the order of produced elements is not
+  deterministic.
+
+  Args:
+    map_func: A function mapping a nested structure of tensors to a `Dataset`.
+    cycle_length: The number of input `Dataset`s to interleave from in parallel.
+    block_length: The number of consecutive elements to pull from an input
+      `Dataset` before advancing to the next input `Dataset`.
+    sloppy: If false, elements are produced in deterministic order. Otherwise,
+      the implementation is allowed, for the sake of expediency, to produce
+      elements in a non-deterministic order.
+    buffer_output_elements: The number of elements each iterator being
+      interleaved should buffer (similar to the `.prefetch()` transformation for
+      each interleaved iterator).
+    prefetch_input_elements: The number of input elements to transform to
+      iterators before they are needed for interleaving.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return readers.ParallelInterleaveDataset(
+        dataset, map_func, cycle_length, block_length, sloppy,
+        buffer_output_elements, prefetch_input_elements)
+
+  return _apply_fn
+
+
+class _DirectedInterleaveDataset(dataset_ops.Dataset):
+  """A substitute for `Dataset.interleave()` on a fixed list of datasets."""
+
+  def __init__(self, selector_input, data_inputs):
+    self._selector_input = selector_input
+    self._data_inputs = list(data_inputs)
+
+    for data_input in data_inputs[1:]:
+      if (data_input.output_types != data_inputs[0].output_types or
+          data_input.output_classes != data_inputs[0].output_classes):
+        raise TypeError("All datasets must have the same type and class.")
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    return (
+        gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
+            self._selector_input._as_variant_tensor(), [
+                data_input._as_variant_tensor()
+                for data_input in self._data_inputs
+            ], **dataset_ops.flat_structure(self)))
+    # pylint: enable=protected-access
+
+  def _inputs(self):
+    return [self._selector_input] + self._data_inputs
+
+  @property
+  def output_classes(self):
+    return self._data_inputs[0].output_classes
+
+  @property
+  def output_shapes(self):
+    ret = self._data_inputs[0].output_shapes
+    for data_input in self._data_inputs[1:]:
+      ret = nest.pack_sequence_as(ret, [
+          ts1.most_specific_compatible_shape(ts2) for (ts1, ts2) in zip(
+              nest.flatten(ret), nest.flatten(data_input.output_shapes))
+      ])
+    return ret
+
+  @property
+  def output_types(self):
+    return self._data_inputs[0].output_types
+
+
+@tf_export("data.experimental.sample_from_datasets")
+def sample_from_datasets(datasets, weights=None, seed=None):
+  """Samples elements at random from the datasets in `datasets`.
+
+  Args:
+    datasets: A list of `tf.data.Dataset` objects with compatible structure.
+    weights: (Optional.) A list of `len(datasets)` floating-point values where
+      `weights[i]` represents the probability with which an element should be
+      sampled from `datasets[i]`, or a `tf.data.Dataset` object where each
+      element is such a list. Defaults to a uniform distribution across
+      `datasets`.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      `tf.set_random_seed` for behavior.
+
+  Returns:
+    A dataset that interleaves elements from `datasets` at random, according to
+    `weights` if provided, otherwise with uniform probability.
+
+  Raises:
+    TypeError: If the `datasets` or `weights` arguments have the wrong type.
+    ValueError: If the `weights` argument is specified and does not match the
+      length of the `datasets` element.
+  """
+  num_datasets = len(datasets)
+  if not isinstance(weights, dataset_ops.Dataset):
+    if weights is None:
+      # Select inputs with uniform probability.
+      logits = [[1.0] * num_datasets]
+
+    else:
+      # Use the given `weights` as the probability of choosing the respective
+      # input.
+      weights = ops.convert_to_tensor(weights, name="weights")
+      if weights.dtype not in (dtypes.float32, dtypes.float64):
+        raise TypeError("`weights` must be convertible to a tensor of "
+                        "`tf.float32` or `tf.float64` elements.")
+      if not weights.shape.is_compatible_with([num_datasets]):
+        raise ValueError(
+            "`weights` must be a vector of length `len(datasets)`.")
+
+      # The `stateless_multinomial()` op expects log-probabilities, as opposed
+      # to weights.
+      logits = array_ops.expand_dims(math_ops.log(weights, name="logits"), 0)
+
+    # NOTE(mrry): We only specialize when `weights` is not a `Dataset`. When it
+    # is a `Dataset`, it is possible that evaluating it has a side effect the
+    # user depends on.
+    if len(datasets) == 1:
+      return datasets[0]
+
+    def select_dataset_constant_logits(seed):
+      return array_ops.squeeze(
+          gen_stateless_random_ops.stateless_multinomial(logits, 1, seed=seed),
+          axis=[0, 1])
+
+    selector_input = dataset_ops.MapDataset(
+        random_ops.RandomDataset(seed).batch(2),
+        select_dataset_constant_logits,
+        use_inter_op_parallelism=False)
+
+  else:
+    # Use each element of the given `weights` dataset as the probability of
+    # choosing the respective input.
+
+    # The `stateless_multinomial()` op expects log-probabilities, as opposed to
+    # weights.
+    logits_ds = weights.map(lambda *p: math_ops.log(p, name="logits"))
+
+    def select_dataset_varying_logits(logits, seed):
+      return array_ops.squeeze(
+          gen_stateless_random_ops.stateless_multinomial(logits, 1, seed=seed),
+          axis=[0, 1])
+
+    logits_and_seeds = dataset_ops.Dataset.zip(
+        (logits_ds, random_ops.RandomDataset(seed).batch(2)))
+    selector_input = dataset_ops.MapDataset(
+        logits_and_seeds,
+        select_dataset_varying_logits,
+        use_inter_op_parallelism=False)
+
+  return _DirectedInterleaveDataset(selector_input, datasets)
+
+
+@tf_export("data.experimental.choose_from_datasets")
+def choose_from_datasets(datasets, choice_dataset):
+  """Creates a dataset that deterministically chooses elements from `datasets`.
+
+  For example, given the following datasets:
+
+  ```python
+  datasets = [tf.data.Dataset.from_tensors("foo").repeat(),
+              tf.data.Dataset.from_tensors("bar").repeat(),
+              tf.data.Dataset.from_tensors("baz").repeat()]
+
+  # Define a dataset containing `[0, 1, 2, 0, 1, 2, 0, 1, 2]`.
+  choice_dataset = tf.data.Dataset.range(3).repeat(3)
+
+  result = tf.data.experimental.choose_from_datasets(datasets, choice_dataset)
+  ```
+
+  The elements of `result` will be:
+
+  ```
+  "foo", "bar", "baz", "foo", "bar", "baz", "foo", "bar", "baz"
+  ```
+
+  Args:
+    datasets: A list of `tf.data.Dataset` objects with compatible structure.
+    choice_dataset: A `tf.data.Dataset` of scalar `tf.int64` tensors between
+      `0` and `len(datasets) - 1`.
+
+  Returns:
+    A dataset that interleaves elements from `datasets` according to the values
+    of `choice_dataset`.
+
+  Raises:
+    TypeError: If the `datasets` or `choice_dataset` arguments have the wrong
+      type.
+  """
+  if not (choice_dataset.output_types == dtypes.int64
+          and choice_dataset.output_shapes.is_compatible_with(
+              tensor_shape.scalar())
+          and choice_dataset.output_classes == ops.Tensor):
+    raise TypeError("`choice_dataset` must be a dataset of scalar "
+                    "`tf.int64` tensors.")
+  return _DirectedInterleaveDataset(choice_dataset, datasets)
diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
new file mode 100644
index 0000000000..72d7d58f06
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -0,0 +1,268 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Iterator ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.ops import optional_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.make_saveable_from_iterator")
+def make_saveable_from_iterator(iterator):
+  """Returns a SaveableObject for saving/restore iterator state using Saver.
+
+  Args:
+    iterator: Iterator.
+
+  For example:
+
+  ```python
+  with tf.Graph().as_default():
+    ds = tf.data.Dataset.range(10)
+    iterator = ds.make_initializable_iterator()
+    # Build the iterator SaveableObject.
+    saveable_obj = tf.data.experimental.make_saveable_from_iterator(iterator)
+    # Add the SaveableObject to the SAVEABLE_OBJECTS collection so
+    # it can be automatically saved using Saver.
+    tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable_obj)
+    saver = tf.train.Saver()
+
+    while continue_training:
+      ... Perform training ...
+      if should_save_checkpoint:
+        saver.save()
+  ```
+
+  Note: When restoring the iterator, the existing iterator state is completely
+  discarded. This means that any changes you may have made to the Dataset
+  graph will be discarded as well! This includes the new Dataset graph
+  that you may have built during validation. So, while running validation,
+  make sure to run the initializer for the validation input pipeline after
+  restoring the checkpoint.
+
+  Note: Not all iterators support checkpointing yet. Attempting to save the
+  state of an unsupported iterator will throw an error.
+  """
+  return _Saveable(iterator._iterator_resource)  # pylint: disable=protected-access
+
+
+class _Saveable(saver_lib.BaseSaverBuilder.SaveableObject):
+  """SaveableObject for saving/restoring iterator state."""
+
+  def __init__(self, iterator_resource):
+    serialized_iterator = gen_dataset_ops.serialize_iterator(iterator_resource)
+    specs = [
+        saver_lib.BaseSaverBuilder.SaveSpec(serialized_iterator, "",
+                                            iterator_resource.name + "-state")
+    ]
+    super(_Saveable, self).__init__(iterator_resource, specs,
+                                    iterator_resource.name)
+
+  def restore(self, restored_tensors, unused_restored_shapes):
+    with ops.colocate_with(self.op):
+      return gen_dataset_ops.deserialize_iterator(self.op, restored_tensors[0])
+
+
+@tf_export("data.experimental.CheckpointInputPipelineHook")
+class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
+  """Checkpoints input pipeline state every N steps or seconds.
+
+  This hook saves the state of the iterators in the `Graph` so that when
+  training is resumed the input pipeline continues from where it left off.
+  This could potentially avoid overfitting in certain pipelines where the
+  number of training steps per eval are small compared to the dataset
+  size or if the training pipeline is pre-empted.
+
+  Differences from `CheckpointSaverHook`:
+  1. Saves only the input pipelines in the "iterators" collection and not the
+     global variables or other saveable objects.
+  2. Does not write the `GraphDef` and `MetaGraphDef` to the summary.
+
+  Example of checkpointing the training pipeline:
+
+  ```python
+  est = tf.estimator.Estimator(model_fn)
+  while True:
+    est.train(
+        train_input_fn,
+        hooks=[tf.data.experimental.CheckpointInputPipelineHook(est)],
+        steps=train_steps_per_eval)
+    # Note: We do not pass the hook here.
+    metrics = est.evaluate(eval_input_fn)
+    if should_stop_the_training(metrics):
+      break
+  ```
+
+  This hook should be used if the input pipeline state needs to be saved
+  separate from the model checkpoint. Doing so may be useful for a few reasons:
+  1. The input pipeline checkpoint may be large, if there are large shuffle
+     or prefetch buffers for instance, and may bloat the checkpoint size.
+  2. If the input pipeline is shared between training and validation, restoring
+     the checkpoint during validation may override the validation input
+     pipeline.
+
+  For saving the input pipeline checkpoint alongside the model weights use
+  `tf.data.experimental.make_saveable_from_iterator` directly to create a
+  `SaveableObject` and add to the `SAVEABLE_OBJECTS` collection. Note, however,
+  that you will need to be careful not to restore the training iterator during
+  eval. You can do that by not adding the iterator to the SAVEABLE_OBJECTS
+  collector when building the eval graph.
+  """
+
+  def __init__(self, estimator):
+    """Initializes a `CheckpointInputPipelineHook`.
+
+    Args:
+      estimator: Estimator.
+
+    Raises:
+      ValueError: One of `save_steps` or `save_secs` should be set.
+      ValueError: At most one of saver or scaffold should be set.
+    """
+    # `checkpoint_basename` is "input.ckpt" for non-distributed pipelines or
+    # of the form "input_<task_type>_<task_id>.ckpt" for distributed pipelines.
+    # Note: The default `checkpoint_basename` used by `CheckpointSaverHook` is
+    # "model.ckpt". We intentionally choose the input pipeline checkpoint prefix
+    # to be different to avoid conflicts with the model checkpoint.
+
+    # pylint: disable=protected-access
+    checkpoint_prefix = "input"
+    if estimator._config.num_worker_replicas > 1:
+      # Distributed setting.
+      suffix = "_{}_{}".format(estimator._config.task_type,
+                               estimator._config.task_id)
+      checkpoint_prefix += suffix
+    # pylint: enable=protected-access
+
+    # We use a composition paradigm instead of inheriting from
+    # `CheckpointSaverHook` because `Estimator` does an `isinstance` check
+    # to check whether a `CheckpointSaverHook` is already present in the list
+    # of hooks and if not, adds one. Inheriting from `CheckpointSaverHook`
+    # would thwart this behavior. This hook checkpoints *only the iterators*
+    # and not the graph variables.
+    self._checkpoint_saver_hook = basic_session_run_hooks.CheckpointSaverHook(
+        estimator.model_dir,
+        save_secs=estimator._config.save_checkpoints_secs,  # pylint: disable=protected-access
+        save_steps=estimator._config.save_checkpoints_steps,  # pylint: disable=protected-access
+        checkpoint_basename=checkpoint_prefix + ".ckpt")
+
+    # Name for the protocol buffer file that will contain the list of most
+    # recent checkpoints stored as a `CheckpointState` protocol buffer.
+    # This file, kept in the same directory as the checkpoint files, is
+    # automatically managed by the `Saver` to keep track of recent checkpoints.
+    # The default name used by the `Saver` for this file is "checkpoint". Here
+    # we use the name "checkpoint_<checkpoint_prefix>" so that in case the
+    # `checkpoint_dir` is the same as the model checkpoint directory, there are
+    # no conflicts during restore.
+    self._latest_filename = "checkpoint_" + checkpoint_prefix
+    self._first_run = True
+
+  def begin(self):
+    # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS`
+    # collection if no `Saver` or `Scaffold` is provided.
+    # pylint: disable=protected-access
+    if (self._checkpoint_saver_hook._saver is None and
+        self._checkpoint_saver_hook._scaffold is None):
+      iterators = ops.get_collection(iterator_ops.GLOBAL_ITERATORS)
+      saveables = [_Saveable(i) for i in iterators]
+      self._checkpoint_saver_hook._saver = _CustomSaver(saveables,
+                                                        self._latest_filename)
+    # pylint: enable=protected-access
+    self._checkpoint_saver_hook.begin()
+
+  def _restore_or_save_initial_ckpt(self, session):
+    # Ideally this should be run in after_create_session but is not for the
+    # following reason:
+    # Currently there is no way of enforcing an order of running the
+    # `SessionRunHooks`. Hence it is possible that the `_DatasetInitializerHook`
+    # is run *after* this hook. That is troublesome because
+    # 1. If a checkpoint exists and this hook restores it, the initializer hook
+    #    will override it.
+    # 2. If no checkpoint exists, this hook will try to save an initialized
+    #    iterator which will result in an exception.
+    #
+    # As a temporary fix we enter the following implicit contract between this
+    # hook and the _DatasetInitializerHook.
+    # 1. The _DatasetInitializerHook initializes the iterator in the call to
+    #    after_create_session.
+    # 2. This hook saves the iterator on the first call to `before_run()`, which
+    #    is guaranteed to happen after `after_create_session()` of all hooks
+    #    have been run.
+
+    # Check if there is an existing checkpoint. If so, restore from it.
+    # pylint: disable=protected-access
+    latest_checkpoint_path = checkpoint_management.latest_checkpoint(
+        self._checkpoint_saver_hook._checkpoint_dir,
+        latest_filename=self._latest_filename)
+    if latest_checkpoint_path:
+      self._checkpoint_saver_hook._get_saver().restore(session,
+                                                       latest_checkpoint_path)
+    else:
+      # The checkpoint saved here is the state at step "global_step".
+      # Note: We do not save the GraphDef or MetaGraphDef here.
+      global_step = session.run(self._checkpoint_saver_hook._global_step_tensor)
+      self._checkpoint_saver_hook._save(session, global_step)
+      self._checkpoint_saver_hook._timer.update_last_triggered_step(global_step)
+    # pylint: enable=protected-access
+
+  def before_run(self, run_context):
+    if self._first_run:
+      self._restore_or_save_initial_ckpt(run_context.session)
+      self._first_run = False
+    return self._checkpoint_saver_hook.before_run(run_context)
+
+  def after_run(self, run_context, run_values):
+    self._checkpoint_saver_hook.after_run(run_context, run_values)
+
+  def end(self, session):
+    self._checkpoint_saver_hook.end(session)
+
+
+class _CustomSaver(saver_lib.Saver):
+  """`Saver` with a different default `latest_filename`.
+
+  This is used in the `CheckpointInputPipelineHook` to avoid conflicts with
+  the model ckpt saved by the `CheckpointSaverHook`.
+  """
+
+  def __init__(self, var_list, latest_filename):
+    super(_CustomSaver, self).__init__(var_list)
+    self._latest_filename = latest_filename
+
+  def save(self,
+           sess,
+           save_path,
+           global_step=None,
+           latest_filename=None,
+           meta_graph_suffix="meta",
+           write_meta_graph=True,
+           write_state=True,
+           strip_default_attrs=False):
+    return super(_CustomSaver, self).save(
+        sess, save_path, global_step, latest_filename or self._latest_filename,
+        meta_graph_suffix, write_meta_graph, write_state, strip_default_attrs)
+
+
+tf_export("data.experimental.Optional")(optional_ops.Optional)
+tf_export("data.experimental.get_next_as_optional")(
+    iterator_ops.get_next_as_optional)
diff --git a/tensorflow/contrib/data/python/ops/map_defun.py b/tensorflow/python/data/experimental/ops/map_defun.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/map_defun.py
rename to tensorflow/python/data/experimental/ops/map_defun.py
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
similarity index 100%
rename from tensorflow/contrib/data/python/ops/optimization.py
rename to tensorflow/python/data/experimental/ops/optimization.py
diff --git a/tensorflow/python/data/experimental/ops/parsing_ops.py b/tensorflow/python/data/experimental/ops/parsing_ops.py
new file mode 100644
index 0000000000..6615b9022a
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/parsing_ops.py
@@ -0,0 +1,152 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental `dataset` API for parsing example."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ParseExampleDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that parses `example` dataset into a `dict` dataset."""
+
+  def __init__(self, input_dataset, features, num_parallel_calls):
+    super(_ParseExampleDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if not all(types == dtypes.string
+               for types in nest.flatten(input_dataset.output_types)):
+      raise TypeError("Input dataset should be a dataset of vectors of strings")
+    self._num_parallel_calls = num_parallel_calls
+    # pylint: disable=protected-access
+    self._features = parsing_ops._prepend_none_dimension(features)
+    # sparse_keys and dense_keys come back sorted here.
+    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
+     dense_shapes) = parsing_ops._features_to_raw_params(
+         self._features, [
+             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
+             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
+         ])
+    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
+    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
+     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
+         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
+         dense_types, dense_shapes)
+    # pylint: enable=protected-access
+    self._sparse_keys = sparse_keys
+    self._sparse_types = sparse_types
+    self._dense_keys = dense_keys
+    self._dense_defaults = dense_defaults_vec
+    self._dense_shapes = dense_shapes
+    self._dense_types = dense_types
+    dense_output_shapes = [
+        self._input_dataset.output_shapes.concatenate(shape)
+        for shape in dense_shape_as_shape
+    ]
+    sparse_output_shapes = [
+        self._input_dataset.output_shapes.concatenate([None])
+        for _ in range(len(sparse_keys))
+    ]
+
+    self._output_shapes = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            dense_output_shapes + sparse_output_shapes))
+    self._output_types = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            self._dense_types + self._sparse_types))
+    self._output_classes = dict(
+        zip(self._dense_keys + self._sparse_keys,
+            [ops.Tensor for _ in range(len(self._dense_defaults))] +
+            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
+            ]))
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.parse_example_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._num_parallel_calls,
+        self._dense_defaults,
+        self._sparse_keys,
+        self._dense_keys,
+        self._sparse_types,
+        self._dense_shapes,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+
+# TODO(b/111553342): add arguments names and example names as well.
+@tf_export("data.experimental.parse_example_dataset")
+def parse_example_dataset(features, num_parallel_calls=1):
+  """A transformation that parses `Example` protos into a `dict` of tensors.
+
+  Parses a number of serialized `Example` protos given in `serialized`. We refer
+  to `serialized` as a batch with `batch_size` many entries of individual
+  `Example` protos.
+
+  This op parses serialized examples into a dictionary mapping keys to `Tensor`
+  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
+  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
+  and `SparseFeature` is mapped to a `SparseTensor`, and each
+  `FixedLenFeature` is mapped to a `Tensor`. See `tf.parse_example` for more
+  details about feature dictionaries.
+
+  Args:
+   features: A `dict` mapping feature keys to `FixedLenFeature`,
+     `VarLenFeature`, and `SparseFeature` values.
+   num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
+      representing the number of parsing processes to call in parallel.
+
+  Returns:
+    A dataset transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+
+  Raises:
+    ValueError: if features argument is None.
+  """
+  if features is None:
+    raise ValueError("Missing: features was %s." % features)
+
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls)
+    if any([
+        isinstance(feature, parsing_ops.SparseFeature)
+        for _, feature in features.items()
+    ]):
+      # pylint: disable=protected-access
+      # pylint: disable=g-long-lambda
+      out_dataset = out_dataset.map(
+          lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features(
+              features, x), num_parallel_calls=num_parallel_calls)
+    return out_dataset
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
new file mode 100644
index 0000000000..48d7136f95
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -0,0 +1,531 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrapper for prefetching_ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import warnings
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.eager import context
+from tensorflow.python.framework import device as framework_device
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+def function_buffering_resource(string_arg,
+                                target_device,
+                                f,
+                                buffer_size,
+                                output_types,
+                                container="",
+                                shared_name=None,
+                                name=None):
+  """Creates a FunctionBufferingResource.
+
+  A FunctionBufferingResource fills up a buffer by calling a function `f` on
+  `target_device`. `f` should take in only a single string argument as input.
+
+  Args:
+    string_arg: The single string argument to the function.
+    target_device: The device to run `f` on.
+    f: The function to be executed.
+    buffer_size: Size of the buffer to be populated.
+    output_types: The output types generated by the function.
+    container: (Optional) string. Defaults to "".
+    shared_name: (Optional) string.
+    name: (Optional) string to name the op.
+
+  Returns:
+    Handle to a FunctionBufferingResource.
+  """
+  if shared_name is None:
+    shared_name = ""
+  return ged_ops.experimental_function_buffering_resource(
+      string_arg=string_arg,
+      target_device=target_device,
+      shared_name=shared_name,
+      f=f,
+      buffer_size=buffer_size,
+      container=container,
+      name=name,
+      output_types=output_types)
+
+
+def function_buffering_resource_get_next(function_buffer_resource,
+                                         output_types,
+                                         name=None):
+  return ged_ops.experimental_function_buffering_resource_get_next(
+      function_buffer_resource=function_buffer_resource,
+      output_types=output_types,
+      name=name)
+
+
+def function_buffering_resource_reset(function_buffer_resource, name=None):
+  return ged_ops.experimental_function_buffering_resource_reset(
+      function_buffer_resource=function_buffer_resource, name=name)
+
+
+# pylint: disable=protected-access
+class _PrefetchToDeviceIterator(object):
+  """A replacement for `tf.data.Iterator` that prefetches to another device.
+
+  Args:
+    input_dataset: The input dataset
+    one_shot: If true, we make a one shot iterator that's already initialized.
+    device: A fully specified device string where we want to prefetch to
+    buffer_size: Size of the prefetching buffer.
+    shared_name: (Optional.) If non-empty, the returned iterator will be
+        shared under the given name across multiple sessions that share the
+        same devices (e.g. when using a remote server).
+
+  Returns:
+    An Iterator type object.
+  """
+
+  def __init__(self,
+               input_dataset,
+               one_shot,
+               device,
+               buffer_size,
+               shared_name=None):
+    self._input_dataset = input_dataset
+    self._get_next_call_count = 0
+    self._one_shot = one_shot
+    if shared_name is None:
+      shared_name = ""
+
+    if self._one_shot:
+      self._input_iterator = input_dataset.make_one_shot_iterator()
+    else:
+      self._input_iterator = iterator_ops.Iterator.from_structure(
+          self._input_dataset.output_types, self._input_dataset.output_shapes,
+          shared_name, self._input_dataset.output_classes)
+    input_iterator_handle = self._input_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _prefetch_fn(handle):
+      """Prefetches one element from `input_iterator`."""
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          handle, self._input_iterator.output_types,
+          self._input_iterator.output_shapes,
+          self._input_iterator.output_classes)
+      ret = remote_iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    iterator_device = ged_ops.experimental_iterator_get_device(
+        self._input_iterator._iterator_resource)
+
+    with ops.device(device):
+      self._buffering_resource = function_buffering_resource(
+          f=_prefetch_fn,
+          target_device=iterator_device,
+          string_arg=input_iterator_handle,
+          buffer_size=buffer_size,
+          shared_name=shared_name,
+          output_types=nest.flatten(
+              sparse.as_dense_types(self._input_dataset.output_types,
+                                    self._input_dataset.output_classes)))
+
+    if not self._one_shot:
+      reset_op = function_buffering_resource_reset(self._buffering_resource)
+      with ops.control_dependencies([reset_op]):
+        self._initializer = self._input_iterator.make_initializer(
+            self._input_dataset)
+
+  def get_next(self, name=None):
+    """See `tf.data.Iterator.get_next`."""
+    self._get_next_call_count += 1
+    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
+      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
+
+    flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
+        self._buffering_resource,
+        output_types=nest.flatten(
+            sparse.as_dense_types(self.output_types, self.output_classes)),
+        name=name)
+
+    ret = sparse.deserialize_sparse_tensors(
+        nest.pack_sequence_as(self.output_types, flat_ret),
+        self.output_types, self.output_shapes, self.output_classes)
+
+    for tensor, shape in zip(
+        nest.flatten(ret), nest.flatten(self.output_shapes)):
+      if isinstance(tensor, ops.Tensor):
+        tensor.set_shape(shape)
+
+    return ret
+
+  @property
+  def initializer(self):
+    if self._one_shot:
+      raise NotImplementedError("Can't initialize a one_shot_iterator")
+    return self._initializer
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator):
+  """A replacement for `tf.data.Iterator` that prefetches to another device.
+
+  Args:
+    input_dataset: The input dataset
+    one_shot: If true, we make a one shot iterator that's already initialized.
+    device: A fully specified device string where we want to prefetch to
+    buffer_size: Size of the prefetching buffer.
+    shared_name: (Optional.) If non-empty, the returned iterator will be
+        shared under the given name across multiple sessions that share the
+        same devices (e.g. when using a remote server).
+
+  Returns:
+    An Iterator type object.
+  """
+
+  def __init__(self,
+               input_dataset,
+               device,
+               buffer_size):
+    with ops.device("/device:CPU:0"):
+      super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset)
+      input_iterator_handle = gen_dataset_ops.iterator_to_string_handle(
+          self._resource)
+
+    self._device = device
+
+    @function.Defun(dtypes.string)
+    def _prefetch_fn(handle):
+      """Prefetches one element from `input_iterator`."""
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          handle, self.output_types, self.output_shapes, self.output_classes)
+      ret = remote_iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    _prefetch_fn.add_to_graph(None)
+
+    with ops.device(device):
+      self._buffering_resource = function_buffering_resource(
+          f=_prefetch_fn,
+          output_types=self._flat_output_types,
+          target_device=ged_ops.experimental_iterator_get_device(
+              self._resource),
+          string_arg=input_iterator_handle,
+          buffer_size=buffer_size,
+          shared_name=iterator_ops._generate_shared_name(
+              "function_buffer_resource"))
+
+  def _next_internal(self):
+    """Returns a nested structure of `tf.Tensor`s containing the next element.
+    """
+    # This runs in sync mode as iterators use an error status to communicate
+    # that there is no more data to iterate over.
+    # TODO(b/77291417): Fix
+    with context.execution_mode(context.SYNC):
+      with ops.device(self._device):
+        ret = ged_ops.experimental_function_buffering_resource_get_next(
+            function_buffer_resource=self._buffering_resource,
+            output_types=self._flat_output_types)
+      return sparse.deserialize_sparse_tensors(
+          nest.pack_sequence_as(self._output_types, ret), self._output_types,
+          self._output_shapes, self._output_classes)
+# pylint: enable=protected-access
+
+
+class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` whose iterator prefetches elements to another device."""
+
+  def __init__(self, input_dataset, device, buffer_size):
+    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._device = device
+    self._buffer_size = buffer_size if buffer_size is not None else 1
+
+  # The static analysis cannot tell that the eager iterator's superclass has
+  # a `next()` method.
+  # pylint: disable=non-iterator-returned
+  def __iter__(self):
+    """Creates an `Iterator` for enumerating the elements of this dataset.
+
+    The returned iterator implements the Python iterator protocol and therefore
+    can only be used in eager mode.
+
+    Returns:
+      An `Iterator` over the elements of this dataset.
+
+    Raises:
+      RuntimeError: If eager execution is enabled.
+    """
+    if context.executing_eagerly():
+      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
+                                            self._buffer_size)
+    else:
+      raise RuntimeError("dataset.__iter__() is only supported when eager "
+                         "execution is enabled.")
+  # pylint: enable=non-iterator-returned
+
+  def make_one_shot_iterator(self):
+    if context.executing_eagerly():
+      return _PrefetchToDeviceEagerIterator(self._input_dataset, self._device,
+                                            self._buffer_size)
+    else:
+      return _PrefetchToDeviceIterator(self._input_dataset, one_shot=True,
+                                       device=self._device,
+                                       buffer_size=self._buffer_size)
+
+  def make_initializable_iterator(self, shared_name=None):
+    return _PrefetchToDeviceIterator(
+        self._input_dataset,
+        one_shot=False,
+        device=self._device,
+        buffer_size=self._buffer_size,
+        shared_name=shared_name)
+
+  def _as_variant_tensor(self):
+    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
+    # transformation methods is called.
+    # TODO(mrry): Investigate support for chaining further transformations after
+    # the prefetch, including GPU support.
+    raise NotImplementedError("`prefetch_to_device()` must be the last "
+                              "transformation in a dataset pipeline.")
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+@tf_export("data.experimental.prefetch_to_device")
+def prefetch_to_device(device, buffer_size=None):
+  """A transformation that prefetches dataset values to the given `device`.
+
+  NOTE: Although the transformation creates a `tf.data.Dataset`, the
+  transformation must be the final `Dataset` in the input pipeline.
+
+  Args:
+    device: A string. The name of a device to which elements will be prefetched.
+    buffer_size: (Optional.) The number of elements to buffer on `device`.
+      Defaults to an automatically chosen value.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return _PrefetchToDeviceDataset(dataset, device, buffer_size)
+
+  return _apply_fn
+
+
+@tf_export("data.experimental.copy_to_device")
+def copy_to_device(target_device, source_device="/cpu:0"):
+  """A transformation that copies dataset elements to the given `target_device`.
+
+  Args:
+    target_device: The name of a device to which elements will be copied.
+    source_device: The original device on which `input_dataset` will be placed.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _CopyToDeviceDataset(
+        dataset, target_device=target_device, source_device=source_device)
+
+  return _apply_fn
+
+
+# TODO(rohanj): Use the _input_hostmem attr on the RemoteCall ops to indicate
+# all inputs to the Op are in host memory, thereby avoiding some unnecessary
+# Sends and Recvs.
+class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that copies elements to another device."""
+
+  def __init__(self, input_dataset, target_device, source_device="/cpu:0"):
+    """Constructs a _CopyToDeviceDataset.
+
+    Args:
+      input_dataset: `Dataset` to be copied
+      target_device: The name of the device to which elements would be copied.
+      source_device: Device where input_dataset would be placed.
+    """
+    super(_CopyToDeviceDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._target_device = target_device
+    spec = framework_device.DeviceSpec().from_string(self._target_device)
+    self._is_gpu_target = (spec.device_type == "GPU")
+    self._source_device_string = source_device
+    self._source_device = ops.convert_to_tensor(source_device)
+
+    self._flat_output_shapes = nest.flatten(
+        sparse.as_dense_shapes(self._input_dataset.output_shapes,
+                               self._input_dataset.output_classes))
+    self._flat_output_types = nest.flatten(
+        sparse.as_dense_types(self._input_dataset.output_types,
+                              self._input_dataset.output_classes))
+
+    @function.Defun()
+    def _init_func():
+      """Creates an iterator for the input dataset.
+
+      Returns:
+        A `string` tensor that encapsulates the iterator created.
+      """
+      # pylint: disable=protected-access
+      ds_variant = self._input_dataset._as_variant_tensor()
+      resource = gen_dataset_ops.anonymous_iterator(
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+      with ops.control_dependencies(
+          [gen_dataset_ops.make_iterator(ds_variant, resource)]):
+        return gen_dataset_ops.iterator_to_string_handle(resource)
+
+    @function.Defun()
+    def _remote_init_func():
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=_init_func.captured_inputs,
+          Tout=[dtypes.string],
+          f=_init_func)
+
+    self._init_func = _remote_init_func
+    self._init_captured_args = _remote_init_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _next_func(string_handle):
+      """Calls get_next for created iterator.
+
+      Args:
+        string_handle: An iterator string handle created by _init_func
+      Returns:
+        The elements generated from `input_dataset`
+      """
+      with ops.device(self._source_device_string):
+        iterator = iterator_ops.Iterator.from_string_handle(
+            string_handle, self.output_types, self.output_shapes,
+            self.output_classes)
+      ret = iterator.get_next()
+      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+
+    @function.Defun(dtypes.string)
+    def _remote_next_func(string_handle):
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=[string_handle] + _next_func.captured_inputs,
+          Tout=self._flat_output_types,
+          f=_next_func)
+
+    self._next_func = _remote_next_func
+    self._next_captured_args = _remote_next_func.captured_inputs
+
+    @function.Defun(dtypes.string)
+    def _finalize_func(string_handle):
+      """Destroys the iterator resource created.
+
+      Args:
+        string_handle: An iterator string handle created by _init_func
+      Returns:
+        Tensor constant 0
+      """
+      iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
+          string_handle,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+      with ops.control_dependencies([
+          resource_variable_ops.destroy_resource_op(
+              iterator_resource, ignore_lookup_error=True)]):
+        return array_ops.constant(0, dtypes.int64)
+
+    @function.Defun(dtypes.string)
+    def _remote_finalize_func(string_handle):
+      return functional_ops.remote_call(
+          target=self._source_device,
+          args=[string_handle] + _finalize_func.captured_inputs,
+          Tout=[dtypes.int64],
+          f=_finalize_func)
+
+    self._finalize_func = _remote_finalize_func
+    self._finalize_captured_args = _remote_finalize_func.captured_inputs
+
+    g = ops.get_default_graph()
+    _remote_init_func.add_to_graph(g)
+    _remote_next_func.add_to_graph(g)
+    _remote_finalize_func.add_to_graph(g)
+    # pylint: enable=protected-scope
+
+  # The one_shot_iterator implementation needs a 0 arg _make_dataset function
+  # that thereby captures all the inputs required to create the dataset. Since
+  # there are strings that are inputs to the GeneratorDataset which can't be
+  # placed on a GPU, this fails for the GPU case. Therefore, disabling it for
+  # GPU
+  def make_one_shot_iterator(self):
+    if self._is_gpu_target:
+      raise ValueError("Cannot create a one shot iterator when using "
+                       "`tf.data.experimental.copy_to_device()` on GPU. Please "
+                       "use `Dataset.make_initializable_iterator()` instead.")
+    else:
+      return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
+
+  def _as_variant_tensor(self):
+    with ops.device(self._target_device):
+      return gen_dataset_ops.generator_dataset(
+          self._init_captured_args,
+          self._next_captured_args,
+          self._finalize_captured_args,
+          init_func=self._init_func,
+          next_func=self._next_func,
+          finalize_func=self._finalize_func,
+          output_types=self._flat_output_types,
+          output_shapes=self._flat_output_shapes)
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
new file mode 100644
index 0000000000..e3a2aeab31
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -0,0 +1,54 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Datasets for random number generators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import random_seed
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.RandomDataset")
+class RandomDataset(dataset_ops.DatasetSource):
+  """A `Dataset` of pseudorandom values."""
+
+  def __init__(self, seed=None):
+    """A `Dataset` of pseudorandom values."""
+    super(RandomDataset, self).__init__()
+    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.random_dataset(
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.scalar()
+
+  @property
+  def output_types(self):
+    return dtypes.int64
diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py
new file mode 100644
index 0000000000..3b2d094514
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/readers.py
@@ -0,0 +1,904 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for reader Datasets."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import csv
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import optimization
+from tensorflow.python.data.experimental.ops import parsing_ops
+from tensorflow.python.data.experimental.ops import shuffle_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.data.util import convert
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.util.tf_export import tf_export
+
+_ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32,
+                         dtypes.int64, dtypes.string)
+
+
+def _is_valid_int32(str_val):
+  try:
+    # Checks equality to prevent int32 overflow
+    return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype(
+        str_val)
+  except (ValueError, OverflowError):
+    return False
+
+
+def _is_valid_int64(str_val):
+  try:
+    dtypes.int64.as_numpy_dtype(str_val)
+    return True
+  except (ValueError, OverflowError):
+    return False
+
+
+def _is_valid_float(str_val, float_dtype):
+  try:
+    return float_dtype.as_numpy_dtype(str_val) < np.inf
+  except ValueError:
+    return False
+
+
+def _infer_type(str_val, na_value, prev_type):
+  """Given a string, infers its tensor type.
+
+  Infers the type of a value by picking the least 'permissive' type possible,
+  while still allowing the previous type inference for this column to be valid.
+
+  Args:
+    str_val: String value to infer the type of.
+    na_value: Additional string to recognize as a NA/NaN CSV value.
+    prev_type: Type previously inferred based on values of this column that
+      we've seen up till now.
+  Returns:
+    Inferred dtype.
+  """
+  if str_val in ("", na_value):
+    # If the field is null, it gives no extra information about its type
+    return prev_type
+
+  type_list = [
+      dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string
+  ]  # list of types to try, ordered from least permissive to most
+
+  type_functions = [
+      _is_valid_int32,
+      _is_valid_int64,
+      lambda str_val: _is_valid_float(str_val, dtypes.float32),
+      lambda str_val: _is_valid_float(str_val, dtypes.float64),
+      lambda str_val: True,
+  ]  # Corresponding list of validation functions
+
+  for i in range(len(type_list)):
+    validation_fn = type_functions[i]
+    if validation_fn(str_val) and (prev_type is None or
+                                   prev_type in type_list[:i + 1]):
+      return type_list[i]
+
+
+def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header):
+  """Generator that yields rows of CSV file(s) in order."""
+  for fn in filenames:
+    with file_io.FileIO(fn, "r") as f:
+      rdr = csv.reader(
+          f,
+          delimiter=field_delim,
+          quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE)
+      if header:
+        next(rdr)  # Skip header lines
+
+      for csv_row in rdr:
+        if len(csv_row) != num_cols:
+          raise ValueError(
+              "Problem inferring types: CSV row has different number of fields "
+              "than expected.")
+        yield csv_row
+
+
+def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim,
+                           na_value, header, num_rows_for_inference,
+                           select_columns):
+  """Infers column types from the first N valid CSV records of files."""
+  if select_columns is None:
+    select_columns = range(num_cols)
+  inferred_types = [None] * len(select_columns)
+
+  for i, csv_row in enumerate(
+      _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header)):
+    if num_rows_for_inference is not None and i >= num_rows_for_inference:
+      break
+
+    for j, col_index in enumerate(select_columns):
+      inferred_types[j] = _infer_type(csv_row[col_index], na_value,
+                                      inferred_types[j])
+
+  # Replace None's with a default type
+  inferred_types = [t or dtypes.string for t in inferred_types]
+  # Default to 0 or '' for null values
+  return [
+      constant_op.constant([0 if t is not dtypes.string else ""], dtype=t)
+      for t in inferred_types
+  ]
+
+
+def _infer_column_names(filenames, field_delim, use_quote_delim):
+  """Infers column names from first rows of files."""
+  csv_kwargs = {
+      "delimiter": field_delim,
+      "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE
+  }
+  with file_io.FileIO(filenames[0], "r") as f:
+    try:
+      column_names = next(csv.reader(f, **csv_kwargs))
+    except StopIteration:
+      raise ValueError(("Received StopIteration when reading the header line "
+                        "of %s.  Empty file?") % filenames[0])
+
+  for name in filenames[1:]:
+    with file_io.FileIO(name, "r") as f:
+      try:
+        if next(csv.reader(f, **csv_kwargs)) != column_names:
+          raise ValueError(
+              "Files have different column names in the header row.")
+      except StopIteration:
+        raise ValueError(("Received StopIteration when reading the header line "
+                          "of %s.  Empty file?") % filenames[0])
+  return column_names
+
+
+def _get_sorted_col_indices(select_columns, column_names):
+  """Transforms select_columns argument into sorted column indices."""
+  names_to_indices = {n: i for i, n in enumerate(column_names)}
+  num_cols = len(column_names)
+  for i, v in enumerate(select_columns):
+    if isinstance(v, int):
+      if v < 0 or v >= num_cols:
+        raise ValueError(
+            "Column index %d specified in select_columns out of valid range." %
+            v)
+      continue
+    if v not in names_to_indices:
+      raise ValueError(
+          "Value '%s' specified in select_columns not a valid column index or "
+          "name." % v)
+    select_columns[i] = names_to_indices[v]
+
+  # Sort and ensure there are no duplicates
+  result = sorted(set(select_columns))
+  if len(result) != len(select_columns):
+    raise ValueError("select_columns contains duplicate columns")
+  return result
+
+
+def _maybe_shuffle_and_repeat(
+    dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed):
+  """Optionally shuffle and repeat dataset, as requested."""
+  if num_epochs != 1 and shuffle:
+    # Use shuffle_and_repeat for perf
+    return dataset.apply(
+        shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
+                                       shuffle_seed))
+  elif shuffle:
+    return dataset.shuffle(shuffle_buffer_size, shuffle_seed)
+  elif num_epochs != 1:
+    return dataset.repeat(num_epochs)
+  return dataset
+
+
+def make_tf_record_dataset(file_pattern,
+                           batch_size,
+                           parser_fn=None,
+                           num_epochs=None,
+                           shuffle=True,
+                           shuffle_buffer_size=None,
+                           shuffle_seed=None,
+                           prefetch_buffer_size=optimization.AUTOTUNE,
+                           num_parallel_reads=None,
+                           num_parallel_parser_calls=None,
+                           drop_final_batch=False):
+  """Reads and optionally parses TFRecord files into a dataset.
+
+  Provides common functionality such as batching, optional parsing, shuffling,
+  and performant defaults.
+
+  Args:
+    file_pattern: List of files or patterns of TFRecord file paths.
+      See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    parser_fn: (Optional.) A function accepting string input to parse
+      and process the record contents. This function must map records
+      to components of a fixed shape, so they may be batched. By
+      default, uses the record contents unmodified.
+    num_epochs: (Optional.) An int specifying the number of times this
+      dataset is repeated.  If None (the default), cycles through the
+      dataset forever.
+    shuffle: (Optional.) A bool that indicates whether the input
+      should be shuffled. Defaults to `True`.
+    shuffle_buffer_size: (Optional.) Buffer size to use for
+      shuffling. A large buffer size ensures better shuffling, but
+      increases memory usage and startup time.
+    shuffle_seed: (Optional.) Randomization seed to use for shuffling.
+    prefetch_buffer_size: (Optional.) An int specifying the number of
+      feature batches to prefetch for performance improvement.
+      Defaults to auto-tune. Set to 0 to disable prefetching.
+    num_parallel_reads: (Optional.) Number of threads used to read
+      records from files. By default or if set to a value >1, the
+      results will be interleaved.
+    num_parallel_parser_calls: (Optional.) Number of parallel
+      records to parse in parallel. Defaults to an automatic selection.
+    drop_final_batch: (Optional.) Whether the last batch should be
+      dropped in case its size is smaller than `batch_size`; the
+      default behavior is not to drop the smaller batch.
+
+  Returns:
+    A dataset, where each element matches the output of `parser_fn`
+    except it will have an additional leading `batch-size` dimension,
+    or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
+    unspecified.
+  """
+  files = dataset_ops.Dataset.list_files(
+      file_pattern, shuffle=shuffle, seed=shuffle_seed)
+
+  if num_parallel_reads is None:
+    # Note: We considered auto-tuning this value, but there is a concern
+    # that this affects the mixing of records from different files, which
+    # could affect training convergence/accuracy, so we are defaulting to
+    # a constant for now.
+    num_parallel_reads = 24
+  dataset = core_readers.TFRecordDataset(
+      files, num_parallel_reads=num_parallel_reads)
+
+  if shuffle_buffer_size is None:
+    # TODO(josh11b): Auto-tune this value when not specified
+    shuffle_buffer_size = 10000
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # NOTE(mrry): We set `drop_final_batch=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  drop_final_batch = drop_final_batch or num_epochs is None
+
+  if parser_fn is None:
+    dataset = dataset.batch(batch_size, drop_remainder=drop_final_batch)
+  else:
+    # TODO(josh11b): if num_parallel_parser_calls is None, use some function
+    # of num cores instead of map_and_batch's default behavior of one batch.
+    dataset = dataset.apply(batching.map_and_batch(
+        parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
+        drop_remainder=drop_final_batch))
+
+  if prefetch_buffer_size == 0:
+    return dataset
+  else:
+    return dataset.prefetch(buffer_size=prefetch_buffer_size)
+
+
+@tf_export("data.experimental.make_csv_dataset")
+def make_csv_dataset(
+    file_pattern,
+    batch_size,
+    column_names=None,
+    column_defaults=None,
+    label_name=None,
+    select_columns=None,
+    field_delim=",",
+    use_quote_delim=True,
+    na_value="",
+    header=True,
+    num_epochs=None,
+    shuffle=True,
+    shuffle_buffer_size=10000,
+    shuffle_seed=None,
+    prefetch_buffer_size=optimization.AUTOTUNE,
+    num_parallel_reads=1,
+    sloppy=False,
+    num_rows_for_inference=100,
+    compression_type=None,
+):
+  """Reads CSV files into a dataset.
+
+  Reads CSV files into a dataset, where each element is a (features, labels)
+  tuple that corresponds to a batch of CSV rows. The features dictionary
+  maps feature column names to `Tensor`s containing the corresponding
+  feature data, and labels is a `Tensor` containing the batch's label data.
+
+  Args:
+    file_pattern: List of files or patterns of file paths containing CSV
+      records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    column_names: An optional list of strings that corresponds to the CSV
+      columns, in order. One per column of the input record. If this is not
+      provided, infers the column names from the first row of the records.
+      These names will be the keys of the features dict of each dataset element.
+    column_defaults: A optional list of default values for the CSV fields. One
+      item per selected column of the input record. Each item in the list is
+      either a valid CSV dtype (float32, float64, int32, int64, or string), or a
+      `Tensor` with one of the aforementioned types. The tensor can either be
+      a scalar default value (if the column is optional), or an empty tensor (if
+      the column is required). If a dtype is provided instead of a tensor, the
+      column is also treated as required. If this list is not provided, tries
+      to infer types based on reading the first num_rows_for_inference rows of
+      files specified, and assumes all columns are optional, defaulting to `0`
+      for numeric values and `""` for string values. If both this and
+      `select_columns` are specified, these must have the same lengths, and
+      `column_defaults` is assumed to be sorted in order of increasing column
+      index.
+    label_name: A optional string corresponding to the label column. If
+      provided, the data for this column is returned as a separate `Tensor` from
+      the features dictionary, so that the dataset complies with the format
+      expected by a `tf.Estimator.train` or `tf.Estimator.evaluate` input
+      function.
+    select_columns: An optional list of integer indices or string column
+      names, that specifies a subset of columns of CSV data to select. If
+      column names are provided, these must correspond to names provided in
+      `column_names` or inferred from the file header lines. When this argument
+      is specified, only a subset of CSV columns will be parsed and returned,
+      corresponding to the columns specified. Using this results in faster
+      parsing and lower memory usage. If both this and `column_defaults` are
+      specified, these must have the same lengths, and `column_defaults` is
+      assumed to be sorted in order of increasing column index.
+    field_delim: An optional `string`. Defaults to `","`. Char delimiter to
+      separate fields in a record.
+    use_quote_delim: An optional bool. Defaults to `True`. If false, treats
+      double quotation marks as regular characters inside of the string fields.
+    na_value: Additional string to recognize as NA/NaN.
+    header: A bool that indicates whether the first rows of provided CSV files
+      correspond to header lines with column names, and should not be included
+      in the data.
+    num_epochs: An int specifying the number of times this dataset is repeated.
+      If None, cycles through the dataset forever.
+    shuffle: A bool that indicates whether the input should be shuffled.
+    shuffle_buffer_size: Buffer size to use for shuffling. A large buffer size
+      ensures better shuffling, but increases memory usage and startup time.
+    shuffle_seed: Randomization seed to use for shuffling.
+    prefetch_buffer_size: An int specifying the number of feature
+      batches to prefetch for performance improvement. Recommended value is the
+      number of batches consumed per training step. Defaults to auto-tune.
+
+    num_parallel_reads: Number of threads used to read CSV records from files.
+      If >1, the results will be interleaved.
+    sloppy: If `True`, reading performance will be improved at
+      the cost of non-deterministic ordering. If `False`, the order of elements
+      produced is deterministic prior to shuffling (elements are still
+      randomized if `shuffle=True`. Note that if the seed is set, then order
+      of elements after shuffling is deterministic). Defaults to `False`.
+    num_rows_for_inference: Number of rows of a file to use for type inference
+      if record_defaults is not provided. If None, reads all the rows of all
+      the files. Defaults to 100.
+    compression_type: (Optional.) A `tf.string` scalar evaluating to one of
+      `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no compression.
+
+  Returns:
+    A dataset, where each element is a (features, labels) tuple that corresponds
+    to a batch of `batch_size` CSV rows. The features dictionary maps feature
+    column names to `Tensor`s containing the corresponding column data, and
+    labels is a `Tensor` containing the column data for the label column
+    specified by `label_name`.
+
+  Raises:
+    ValueError: If any of the arguments is malformed.
+  """
+  # Create dataset of all matching filenames
+  filenames = _get_file_names(file_pattern, False)
+  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
+  if shuffle:
+    dataset = dataset.shuffle(len(filenames), shuffle_seed)
+
+  # Clean arguments; figure out column names and defaults
+
+  if column_names is None:
+    if not header:
+      raise ValueError("Cannot infer column names without a header line.")
+    # If column names are not provided, infer from the header lines
+    column_names = _infer_column_names(filenames, field_delim, use_quote_delim)
+  if len(column_names) != len(set(column_names)):
+    raise ValueError("Cannot have duplicate column names.")
+
+  if select_columns is not None:
+    select_columns = _get_sorted_col_indices(select_columns, column_names)
+
+  if column_defaults is not None:
+    column_defaults = [
+        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
+        for x in column_defaults
+    ]
+  else:
+    # If column defaults are not provided, infer from records at graph
+    # construction time
+    column_defaults = _infer_column_defaults(
+        filenames, len(column_names), field_delim, use_quote_delim, na_value,
+        header, num_rows_for_inference, select_columns)
+
+  if select_columns is not None and len(column_defaults) != len(select_columns):
+    raise ValueError(
+        "If specified, column_defaults and select_columns must have same "
+        "length."
+    )
+  if select_columns is not None and len(column_names) > len(select_columns):
+    # Pick the relevant subset of column names
+    column_names = [column_names[i] for i in select_columns]
+
+  if label_name is not None and label_name not in column_names:
+    raise ValueError("`label_name` provided must be one of the columns.")
+
+  def filename_to_dataset(filename):
+    return CsvDataset(
+        filename,
+        record_defaults=column_defaults,
+        field_delim=field_delim,
+        use_quote_delim=use_quote_delim,
+        na_value=na_value,
+        select_cols=select_columns,
+        header=header,
+        compression_type=compression_type,
+    )
+
+  def map_fn(*columns):
+    """Organizes columns into a features dictionary.
+
+    Args:
+      *columns: list of `Tensor`s corresponding to one csv record.
+    Returns:
+      An OrderedDict of feature names to values for that particular record. If
+      label_name is provided, extracts the label feature to be returned as the
+      second element of the tuple.
+    """
+    features = collections.OrderedDict(zip(column_names, columns))
+    if label_name is not None:
+      label = features.pop(label_name)
+      return features, label
+    return features
+
+  # Read files sequentially (if num_parallel_reads=1) or in parallel
+  dataset = dataset.apply(
+      interleave_ops.parallel_interleave(
+          filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy))
+
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # Apply batch before map for perf, because map has high overhead relative
+  # to the size of the computation in each map.
+  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  dataset = dataset.batch(batch_size=batch_size,
+                          drop_remainder=num_epochs is None)
+  dataset = dataset_ops.MapDataset(
+      dataset, map_fn, use_inter_op_parallelism=False)
+  dataset = dataset.prefetch(prefetch_buffer_size)
+
+  return dataset
+
+
+_DEFAULT_READER_BUFFER_SIZE_BYTES = 4 * 1024 * 1024  # 4 MB
+
+
+@tf_export("data.experimental.CsvDataset")
+class CsvDataset(dataset_ops.DatasetSource):
+  """A Dataset comprising lines from one or more CSV files."""
+
+  def __init__(self,
+               filenames,
+               record_defaults,
+               compression_type=None,
+               buffer_size=None,
+               header=False,
+               field_delim=",",
+               use_quote_delim=True,
+               na_value="",
+               select_cols=None):
+    """Creates a `CsvDataset` by reading and decoding CSV files.
+
+    The elements of this dataset correspond to records from the file(s).
+    RFC 4180 format is expected for CSV files
+    (https://tools.ietf.org/html/rfc4180)
+    Note that we allow leading and trailing spaces with int or float field.
+
+
+    For example, suppose we have a file 'my_file0.csv' with four CSV columns of
+    different data types:
+    ```
+    abcdefg,4.28E10,5.55E6,12
+    hijklmn,-5.3E14,,2
+    ```
+
+    We can construct a CsvDataset from it as follows:
+    ```python
+    dataset = tf.data.experimental.CsvDataset(
+        "my_file*.csv",
+        [tf.float32,  # Required field, use dtype or empty tensor
+         tf.constant([0.0], dtype=tf.float32),  # Optional field, default to 0.0
+         tf.int32,  # Required field, use dtype or empty tensor
+         ],
+        select_cols=[1,2,3]  # Only parse last three columns
+    )
+    ```
+
+    The expected output of its iterations is:
+    ```python
+    next_element = dataset.make_one_shot_iterator().get_next()
+    with tf.Session() as sess:
+      while True:
+        try:
+          print(sess.run(next_element))
+        except tf.errors.OutOfRangeError:
+          break
+
+    >> (4.28e10, 5.55e6, 12)
+    >> (-5.3e14, 0.0, 2)
+    ```
+
+    Args:
+      filenames: A `tf.string` tensor containing one or more filenames.
+      record_defaults: A list of default values for the CSV fields. Each item in
+        the list is either a valid CSV `DType` (float32, float64, int32, int64,
+        string), or a `Tensor` object with one of the above types. One per
+        column of CSV data, with either a scalar `Tensor` default value for the
+        column if it is optional, or `DType` or empty `Tensor` if required. If
+        both this and `select_columns` are specified, these must have the same
+        lengths, and `column_defaults` is assumed to be sorted in order of
+        increasing column index.
+      compression_type: (Optional.) A `tf.string` scalar evaluating to one of
+        `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no
+        compression.
+      buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes
+        to buffer while reading files. Defaults to 4MB.
+      header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s)
+        have header line(s) that should be skipped when parsing. Defaults to
+        `False`.
+      field_delim: (Optional.) A `tf.string` scalar containing the delimiter
+        character that separates fields in a record. Defaults to `","`.
+      use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats
+        double quotation marks as regular characters inside of string fields
+        (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`.
+      na_value: (Optional.) A `tf.string` scalar indicating a value that will
+        be treated as NA/NaN.
+      select_cols: (Optional.) A sorted list of column indices to select from
+        the input data. If specified, only this subset of columns will be
+        parsed. Defaults to parsing all columns.
+    """
+    super(CsvDataset, self).__init__()
+    self._filenames = ops.convert_to_tensor(
+        filenames, dtype=dtypes.string, name="filenames")
+    self._compression_type = convert.optional_param_to_tensor(
+        "compression_type",
+        compression_type,
+        argument_default="",
+        argument_dtype=dtypes.string)
+    record_defaults = [
+        constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
+        for x in record_defaults
+    ]
+    self._record_defaults = ops.convert_n_to_tensor(
+        record_defaults, name="record_defaults")
+    self._buffer_size = convert.optional_param_to_tensor(
+        "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
+    self._header = ops.convert_to_tensor(
+        header, dtype=dtypes.bool, name="header")
+    self._field_delim = ops.convert_to_tensor(
+        field_delim, dtype=dtypes.string, name="field_delim")
+    self._use_quote_delim = ops.convert_to_tensor(
+        use_quote_delim, dtype=dtypes.bool, name="use_quote_delim")
+    self._na_value = ops.convert_to_tensor(
+        na_value, dtype=dtypes.string, name="na_value")
+    self._select_cols = convert.optional_param_to_tensor(
+        "select_cols",
+        select_cols,
+        argument_default=[],
+        argument_dtype=dtypes.int64,
+    )
+    self._output_shapes = tuple(
+        tensor_shape.scalar() for _ in range(len(record_defaults)))
+    self._output_types = tuple(d.dtype for d in self._record_defaults)
+    self._output_classes = tuple(
+        ops.Tensor for _ in range(len(record_defaults)))
+
+  def _as_variant_tensor(self):
+    # Constructs graph node for the dataset op.
+    return gen_experimental_dataset_ops.experimental_csv_dataset(
+        filenames=self._filenames,
+        record_defaults=self._record_defaults,
+        buffer_size=self._buffer_size,
+        header=self._header,
+        output_shapes=self._output_shapes,
+        field_delim=self._field_delim,
+        use_quote_delim=self._use_quote_delim,
+        na_value=self._na_value,
+        select_cols=self._select_cols,
+        compression_type=self._compression_type,
+    )
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+
+@tf_export("data.experimental.make_batched_features_dataset")
+def make_batched_features_dataset(file_pattern,
+                                  batch_size,
+                                  features,
+                                  reader=core_readers.TFRecordDataset,
+                                  label_key=None,
+                                  reader_args=None,
+                                  num_epochs=None,
+                                  shuffle=True,
+                                  shuffle_buffer_size=10000,
+                                  shuffle_seed=None,
+                                  prefetch_buffer_size=optimization.AUTOTUNE,
+                                  reader_num_threads=1,
+                                  parser_num_threads=2,
+                                  sloppy_ordering=False,
+                                  drop_final_batch=False):
+  """Returns a `Dataset` of feature dictionaries from `Example` protos.
+
+  If label_key argument is provided, returns a `Dataset` of tuple
+  comprising of feature dictionaries and label.
+
+  Example:
+
+  ```
+  serialized_examples = [
+    features {
+      feature { key: "age" value { int64_list { value: [ 0 ] } } }
+      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
+      feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } }
+    },
+    features {
+      feature { key: "age" value { int64_list { value: [] } } }
+      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
+      feature { key: "kws" value { bytes_list { value: [ "sports" ] } } }
+    }
+  ]
+  ```
+
+  We can use arguments:
+
+  ```
+  features: {
+    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
+    "gender": FixedLenFeature([], dtype=tf.string),
+    "kws": VarLenFeature(dtype=tf.string),
+  }
+  ```
+
+  And the expected output is:
+
+  ```python
+  {
+    "age": [[0], [-1]],
+    "gender": [["f"], ["f"]],
+    "kws": SparseTensor(
+      indices=[[0, 0], [0, 1], [1, 0]],
+      values=["code", "art", "sports"]
+      dense_shape=[2, 2]),
+  }
+  ```
+
+  Args:
+    file_pattern: List of files or patterns of file paths containing
+      `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int representing the number of records to combine
+      in a single batch.
+    features: A `dict` mapping feature keys to `FixedLenFeature` or
+      `VarLenFeature` values. See `tf.parse_example`.
+    reader: A function or class that can be
+      called with a `filenames` tensor and (optional) `reader_args` and returns
+      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
+    label_key: (Optional) A string corresponding to the key labels are stored in
+      `tf.Examples`. If provided, it must be one of the `features` key,
+      otherwise results in `ValueError`.
+    reader_args: Additional arguments to pass to the reader class.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If None, cycles through the dataset forever. Defaults to `None`.
+    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
+      to `True`.
+    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
+      ensures better shuffling but would increase memory usage and startup time.
+    shuffle_seed: Randomization seed to use for shuffling.
+    prefetch_buffer_size: Number of feature batches to prefetch in order to
+      improve performance. Recommended value is the number of batches consumed
+      per training step. Defaults to auto-tune.
+    reader_num_threads: Number of threads used to read `Example` records. If >1,
+      the results will be interleaved.
+    parser_num_threads: Number of threads to use for parsing `Example` tensors
+      into a dictionary of `Feature` tensors.
+    sloppy_ordering: If `True`, reading performance will be improved at
+      the cost of non-deterministic ordering. If `False`, the order of elements
+      produced is deterministic prior to shuffling (elements are still
+      randomized if `shuffle=True`. Note that if the seed is set, then order
+      of elements after shuffling is deterministic). Defaults to `False`.
+    drop_final_batch: If `True`, and the batch size does not evenly divide the
+      input dataset size, the final smaller batch will be dropped. Defaults to
+      `False`.
+
+  Returns:
+    A dataset of `dict` elements, (or a tuple of `dict` elements and label).
+    Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects.
+
+  Raises:
+    ValueError: If `label_key` is not one of the `features` keys.
+  """
+  # Create dataset of all matching filenames
+  filenames = _get_file_names(file_pattern, False)
+  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
+  if shuffle:
+    dataset = dataset.shuffle(len(filenames), shuffle_seed)
+
+  # Read `Example` records from files as tensor objects.
+  if reader_args is None:
+    reader_args = []
+
+  # Read files sequentially (if reader_num_threads=1) or in parallel
+  dataset = dataset.apply(
+      interleave_ops.parallel_interleave(
+          lambda filename: reader(filename, *reader_args),
+          cycle_length=reader_num_threads,
+          sloppy=sloppy_ordering))
+
+  # Extract values if the `Example` tensors are stored as key-value tuples.
+  if dataset.output_types == (dtypes.string, dtypes.string):
+    dataset = dataset_ops.MapDataset(
+        dataset, lambda _, v: v, use_inter_op_parallelism=False)
+
+  # Apply dataset repeat and shuffle transformations.
+  dataset = _maybe_shuffle_and_repeat(
+      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
+
+  # NOTE(mrry): We set `drop_remainder=True` when `num_epochs is None` to
+  # improve the shape inference, because it makes the batch dimension static.
+  # It is safe to do this because in that case we are repeating the input
+  # indefinitely, and all batches will be full-sized.
+  dataset = dataset.batch(
+      batch_size, drop_remainder=drop_final_batch or num_epochs is None)
+
+  # Parse `Example` tensors to a dictionary of `Feature` tensors.
+  dataset = dataset.apply(
+      parsing_ops.parse_example_dataset(
+          features, num_parallel_calls=parser_num_threads))
+
+  if label_key:
+    if label_key not in features:
+      raise ValueError(
+          "The `label_key` provided (%r) must be one of the `features` keys." %
+          label_key)
+    dataset = dataset.map(lambda x: (x, x.pop(label_key)))
+
+  dataset = dataset.prefetch(prefetch_buffer_size)
+  return dataset
+
+
+def _get_file_names(file_pattern, shuffle):
+  """Parse list of file names from pattern, optionally shuffled.
+
+  Args:
+    file_pattern: File glob pattern, or list of glob patterns.
+    shuffle: Whether to shuffle the order of file names.
+
+  Returns:
+    List of file names matching `file_pattern`.
+
+  Raises:
+    ValueError: If `file_pattern` is empty, or pattern matches no files.
+  """
+  if isinstance(file_pattern, list):
+    if not file_pattern:
+      raise ValueError("File pattern is empty.")
+    file_names = []
+    for entry in file_pattern:
+      file_names.extend(gfile.Glob(entry))
+  else:
+    file_names = list(gfile.Glob(file_pattern))
+
+  if not file_names:
+    raise ValueError("No files match %s." % file_pattern)
+
+  # Sort files so it will be deterministic for unit tests.
+  if not shuffle:
+    file_names = sorted(file_names)
+  return file_names
+
+
+@tf_export("data.experimental.SqlDataset")
+class SqlDataset(dataset_ops.DatasetSource):
+  """A `Dataset` consisting of the results from a SQL query."""
+
+  def __init__(self, driver_name, data_source_name, query, output_types):
+    """Creates a `SqlDataset`.
+
+    `SqlDataset` allows a user to read data from the result set of a SQL query.
+    For example:
+
+    ```python
+    dataset = tf.data.experimental.SqlDataset("sqlite", "/foo/bar.sqlite3",
+                                              "SELECT name, age FROM people",
+                                              (tf.string, tf.int32))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+    # Prints the rows of the result set of the above query.
+    while True:
+      try:
+        print(sess.run(next_element))
+      except tf.errors.OutOfRangeError:
+        break
+    ```
+
+    Args:
+      driver_name: A 0-D `tf.string` tensor containing the database type.
+        Currently, the only supported value is 'sqlite'.
+      data_source_name: A 0-D `tf.string` tensor containing a connection string
+        to connect to the database.
+      query: A 0-D `tf.string` tensor containing the SQL query to execute.
+      output_types: A tuple of `tf.DType` objects representing the types of the
+        columns returned by `query`.
+    """
+    super(SqlDataset, self).__init__()
+    self._driver_name = ops.convert_to_tensor(
+        driver_name, dtype=dtypes.string, name="driver_name")
+    self._data_source_name = ops.convert_to_tensor(
+        data_source_name, dtype=dtypes.string, name="data_source_name")
+    self._query = ops.convert_to_tensor(
+        query, dtype=dtypes.string, name="query")
+    self._output_types = output_types
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.sql_dataset(self._driver_name,
+                                       self._data_source_name, self._query,
+                                       nest.flatten(self.output_types),
+                                       nest.flatten(self.output_shapes))
+
+  @property
+  def output_classes(self):
+    return nest.map_structure(lambda _: ops.Tensor, self._output_types)
+
+  @property
+  def output_shapes(self):
+    return nest.map_structure(lambda _: tensor_shape.TensorShape([]),
+                              self._output_types)
+
+  @property
+  def output_types(self):
+    return self._output_types
diff --git a/tensorflow/python/data/experimental/ops/resampling.py b/tensorflow/python/data/experimental/ops/resampling.py
new file mode 100644
index 0000000000..3a3040ae9a
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/resampling.py
@@ -0,0 +1,296 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resampling dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops import interleave_ops
+from tensorflow.python.data.experimental.ops import scan_ops
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.rejection_resample")
+def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
+  """A transformation that resamples a dataset to achieve a target distribution.
+
+  **NOTE** Resampling is performed via rejection sampling; some fraction
+  of the input values will be dropped.
+
+  Args:
+    class_func: A function mapping an element of the input dataset to a scalar
+      `tf.int32` tensor. Values should be in `[0, num_classes)`.
+    target_dist: A floating point type tensor, shaped `[num_classes]`.
+    initial_dist: (Optional.)  A floating point type tensor, shaped
+      `[num_classes]`.  If not provided, the true class distribution is
+      estimated live in a streaming fashion.
+    seed: (Optional.) Python integer seed for the resampler.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    """Function from `Dataset` to `Dataset` that applies the transformation."""
+    target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist")
+    class_values_ds = dataset.map(class_func)
+
+    # Get initial distribution.
+    if initial_dist is not None:
+      initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist")
+      acceptance_dist, prob_of_original = (
+          _calculate_acceptance_probs_with_mixing(initial_dist_t,
+                                                  target_dist_t))
+      initial_dist_ds = dataset_ops.Dataset.from_tensors(
+          initial_dist_t).repeat()
+      acceptance_dist_ds = dataset_ops.Dataset.from_tensors(
+          acceptance_dist).repeat()
+      prob_of_original_ds = dataset_ops.Dataset.from_tensors(
+          prob_of_original).repeat()
+    else:
+      initial_dist_ds = _estimate_initial_dist_ds(
+          target_dist_t, class_values_ds)
+      acceptance_and_original_prob_ds = initial_dist_ds.map(
+          lambda initial: _calculate_acceptance_probs_with_mixing(  # pylint: disable=g-long-lambda
+              initial, target_dist_t))
+      acceptance_dist_ds = acceptance_and_original_prob_ds.map(
+          lambda accept_prob, _: accept_prob)
+      prob_of_original_ds = acceptance_and_original_prob_ds.map(
+          lambda _, prob_original: prob_original)
+    filtered_ds = _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds,
+                             class_values_ds, seed)
+    # Prefetch filtered dataset for speed.
+    filtered_ds = filtered_ds.prefetch(3)
+
+    prob_original_static = _get_prob_original_static(
+        initial_dist_t, target_dist_t) if initial_dist is not None else None
+    if prob_original_static == 1:
+      return dataset_ops.Dataset.zip((class_values_ds, dataset))
+    elif prob_original_static == 0:
+      return filtered_ds
+    else:
+      return interleave_ops.sample_from_datasets(
+          [dataset_ops.Dataset.zip((class_values_ds, dataset)), filtered_ds],
+          weights=prob_of_original_ds.map(lambda prob: [(prob, 1.0 - prob)]),
+          seed=seed)
+
+  return _apply_fn
+
+
+def _get_prob_original_static(initial_dist_t, target_dist_t):
+  """Returns the static probability of sampling from the original.
+
+  `tensor_util.constant_value(prob_of_original)` returns `None` if it encounters
+  an Op that it isn't defined for. We have some custom logic to avoid this.
+
+  Args:
+    initial_dist_t: A tensor of the initial distribution.
+    target_dist_t: A tensor of the target distribution.
+
+  Returns:
+    The probability of sampling from the original distribution as a constant,
+    if it is a constant, or `None`.
+  """
+  init_static = tensor_util.constant_value(initial_dist_t)
+  target_static = tensor_util.constant_value(target_dist_t)
+
+  if init_static is None or target_static is None:
+    return None
+  else:
+    return np.min(target_static / init_static)
+
+
+def _filter_ds(dataset, acceptance_dist_ds, initial_dist_ds, class_values_ds,
+               seed):
+  """Filters a dataset based on per-class acceptance probabilities.
+
+  Args:
+    dataset: The dataset to be filtered.
+    acceptance_dist_ds: A dataset of acceptance probabilities.
+    initial_dist_ds: A dataset of the initial probability distribution, given or
+        estimated.
+    class_values_ds: A dataset of the corresponding classes.
+    seed: (Optional.) Python integer seed for the resampler.
+
+  Returns:
+    A dataset of (class value, data) after filtering.
+  """
+  def maybe_warn_on_large_rejection(accept_dist, initial_dist):
+    proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist)
+    return control_flow_ops.cond(
+        math_ops.less(proportion_rejected, .5),
+        lambda: accept_dist,
+        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
+            accept_dist, [proportion_rejected, initial_dist, accept_dist],
+            message="Proportion of examples rejected by sampler is high: ",
+            summarize=100,
+            first_n=10))
+
+  acceptance_dist_ds = (dataset_ops.Dataset.zip((acceptance_dist_ds,
+                                                 initial_dist_ds))
+                        .map(maybe_warn_on_large_rejection))
+
+  def _gather_and_copy(class_val, acceptance_prob, data):
+    return class_val, array_ops.gather(acceptance_prob, class_val), data
+
+  current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip(
+      (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy)
+  filtered_ds = (
+      current_probabilities_and_class_and_data_ds
+      .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p))
+  return filtered_ds.map(lambda class_value, _, data: (class_value, data))
+
+
+def _estimate_initial_dist_ds(
+    target_dist_t, class_values_ds, dist_estimation_batch_size=32,
+    smoothing_constant=10):
+  num_classes = (target_dist_t.shape[0].value or
+                 array_ops.shape(target_dist_t)[0])
+  initial_examples_per_class_seen = array_ops.fill(
+      [num_classes], np.int64(smoothing_constant))
+
+  def update_estimate_and_tile(num_examples_per_class_seen, c):
+    updated_examples_per_class_seen, dist = _estimate_data_distribution(
+        c, num_examples_per_class_seen)
+    tiled_dist = array_ops.tile(
+        array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1])
+    return updated_examples_per_class_seen, tiled_dist
+
+  initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size)
+                     .apply(scan_ops.scan(initial_examples_per_class_seen,
+                                          update_estimate_and_tile))
+                     .apply(batching.unbatch()))
+
+  return initial_dist_ds
+
+
+def _get_target_to_initial_ratio(initial_probs, target_probs):
+  # Add tiny to initial_probs to avoid divide by zero.
+  denom = (initial_probs + np.finfo(initial_probs.dtype.as_numpy_dtype).tiny)
+  return target_probs / denom
+
+
+def _estimate_data_distribution(c, num_examples_per_class_seen):
+  """Estimate data distribution as labels are seen.
+
+  Args:
+    c: The class labels.  Type `int32`, shape `[batch_size]`.
+    num_examples_per_class_seen: Type `int64`, shape `[num_classes]`,
+      containing counts.
+
+  Returns:
+    num_examples_per_lass_seen: Updated counts.  Type `int64`, shape
+      `[num_classes]`.
+    dist: The updated distribution.  Type `float32`, shape `[num_classes]`.
+  """
+  num_classes = num_examples_per_class_seen.get_shape()[0].value
+  # Update the class-count based on what labels are seen in batch.
+  num_examples_per_class_seen = math_ops.add(
+      num_examples_per_class_seen, math_ops.reduce_sum(
+          array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
+  init_prob_estimate = math_ops.truediv(
+      num_examples_per_class_seen,
+      math_ops.reduce_sum(num_examples_per_class_seen))
+  dist = math_ops.cast(init_prob_estimate, dtypes.float32)
+  return num_examples_per_class_seen, dist
+
+
+def _calculate_acceptance_probs_with_mixing(initial_probs, target_probs):
+  """Calculates the acceptance probabilities and mixing ratio.
+
+  In this case, we assume that we can *either* sample from the original data
+  distribution with probability `m`, or sample from a reshaped distribution
+  that comes from rejection sampling on the original distribution. This
+  rejection sampling is done on a per-class basis, with `a_i` representing the
+  probability of accepting data from class `i`.
+
+  This method is based on solving the following analysis for the reshaped
+  distribution:
+
+  Let F be the probability of a rejection (on any example).
+  Let p_i be the proportion of examples in the data in class i (init_probs)
+  Let a_i is the rate the rejection sampler should *accept* class i
+  Let t_i is the target proportion in the minibatches for class i (target_probs)
+
+  ```
+  F = sum_i(p_i * (1-a_i))
+    = 1 - sum_i(p_i * a_i)     using sum_i(p_i) = 1
+  ```
+
+  An example with class `i` will be accepted if `k` rejections occur, then an
+  example with class `i` is seen by the rejector, and it is accepted. This can
+  be written as follows:
+
+  ```
+  t_i = sum_k=0^inf(F^k * p_i * a_i)
+      = p_i * a_j / (1 - F)    using geometric series identity, since 0 <= F < 1
+      = p_i * a_i / sum_j(p_j * a_j)        using F from above
+  ```
+
+  Note that the following constraints hold:
+  ```
+  0 <= p_i <= 1, sum_i(p_i) = 1
+  0 <= a_i <= 1
+  0 <= t_i <= 1, sum_i(t_i) = 1
+  ```
+
+  A solution for a_i in terms of the other variables is the following:
+    ```a_i = (t_i / p_i) / max_i[t_i / p_i]```
+
+  If we try to minimize the amount of data rejected, we get the following:
+
+  M_max = max_i [ t_i / p_i ]
+  M_min = min_i [ t_i / p_i ]
+
+  The desired probability of accepting data if it comes from class `i`:
+
+  a_i = (t_i/p_i - m) / (M_max - m)
+
+  The desired probability of pulling a data element from the original dataset,
+  rather than the filtered one:
+
+  m = M_min
+
+  Args:
+    initial_probs: A Tensor of the initial probability distribution, given or
+      estimated.
+    target_probs: A Tensor of the corresponding classes.
+
+  Returns:
+    (A 1D Tensor with the per-class acceptance probabilities, the desired
+    probability of pull from the original distribution.)
+  """
+  ratio_l = _get_target_to_initial_ratio(initial_probs, target_probs)
+  max_ratio = math_ops.reduce_max(ratio_l)
+  min_ratio = math_ops.reduce_min(ratio_l)
+
+  # Target prob to sample from original distribution.
+  m = min_ratio
+
+  # TODO(joelshor): Simplify fraction, if possible.
+  a_i = (ratio_l - m) / (max_ratio - m)
+  return a_i, m
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
new file mode 100644
index 0000000000..e05e7c5a18
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -0,0 +1,177 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Scan dataset transformation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import sparse
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ScanDataset(dataset_ops.UnaryDataset):
+  """A dataset that scans a function across its input."""
+
+  def __init__(self, input_dataset, initial_state, scan_func):
+    """See `scan()` for details."""
+    super(_ScanDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+    with ops.name_scope("initial_state"):
+      # Convert any `SparseTensorValue`s to `SparseTensor`s and all other
+      # values to tensors.
+      self._initial_state = nest.pack_sequence_as(initial_state, [
+          sparse_tensor.SparseTensor.from_value(t)
+          if sparse_tensor.is_sparse(t) else ops.convert_to_tensor(
+              t, name="component_%d" % i)
+          for i, t in enumerate(nest.flatten(initial_state))
+      ])
+
+    # Compute initial values for the state classes, shapes and types based on
+    # the initial state. The shapes may be refined by running `tf_scan_func` one
+    # or more times below.
+    self._state_classes = sparse.get_classes(self._initial_state)
+    self._state_shapes = nest.pack_sequence_as(
+        self._initial_state,
+        [t.get_shape() for t in nest.flatten(self._initial_state)])
+    self._state_types = nest.pack_sequence_as(
+        self._initial_state,
+        [t.dtype for t in nest.flatten(self._initial_state)])
+
+    # Will be populated by calling `tf_scan_func`.
+    self._output_classes = None
+    self._output_shapes = None
+    self._output_types = None
+
+    # Iteratively rerun the scan function until reaching a fixed point on
+    # `self._state_shapes`.
+    need_to_rerun = True
+    while need_to_rerun:
+
+      wrapped_func = dataset_ops.StructuredFunctionWrapper(
+          scan_func,
+          "tf.data.experimental.scan()",
+          input_classes=(self._state_classes, input_dataset.output_classes),
+          input_shapes=(self._state_shapes, input_dataset.output_shapes),
+          input_types=(self._state_types, input_dataset.output_types),
+          add_to_graph=False)
+      if not (
+          isinstance(wrapped_func.output_types, collections.Sequence) and
+          len(wrapped_func.output_types) == 2):
+        raise TypeError("The scan function must return a pair comprising the "
+                        "new state and the output value.")
+
+      new_state_classes, self._output_classes = wrapped_func.output_classes
+
+      # Extract and validate class information from the returned values.
+      for new_state_class, state_class in zip(
+          nest.flatten(new_state_classes),
+          nest.flatten(self._state_classes)):
+        if not issubclass(new_state_class, state_class):
+          raise TypeError(
+              "The element classes for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_classes, new_state_classes))
+
+      # Extract and validate type information from the returned values.
+      new_state_types, self._output_types = wrapped_func.output_types
+      for new_state_type, state_type in zip(
+          nest.flatten(new_state_types), nest.flatten(self._state_types)):
+        if new_state_type != state_type:
+          raise TypeError(
+              "The element types for the new state must match the initial "
+              "state. Expected %s; got %s." %
+              (self._state_types, new_state_types))
+
+      # Extract shape information from the returned values.
+      new_state_shapes, self._output_shapes = wrapped_func.output_shapes
+
+      flat_state_shapes = nest.flatten(self._state_shapes)
+      flat_new_state_shapes = nest.flatten(new_state_shapes)
+      weakened_state_shapes = [
+          original.most_specific_compatible_shape(new)
+          for original, new in zip(flat_state_shapes, flat_new_state_shapes)
+      ]
+
+      need_to_rerun = False
+      for original_shape, weakened_shape in zip(flat_state_shapes,
+                                                weakened_state_shapes):
+        if original_shape.ndims is not None and (
+            weakened_shape.ndims is None or
+            original_shape.as_list() != weakened_shape.as_list()):
+          need_to_rerun = True
+          break
+
+      if need_to_rerun:
+        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
+                                                   weakened_state_shapes)
+
+    self._scan_func = wrapped_func.function
+    self._scan_func.add_to_graph(ops.get_default_graph())
+
+  def _as_variant_tensor(self):
+    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    return gen_dataset_ops.scan_dataset(
+        input_t,
+        nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)),
+        self._scan_func.captured_inputs,
+        f=self._scan_func,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._output_classes
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+
+@tf_export("data.experimental.scan")
+def scan(initial_state, scan_func):
+  """A transformation that scans a function across an input dataset.
+
+  This transformation is a stateful relative of `tf.data.Dataset.map`.
+  In addition to mapping `scan_func` across the elements of the input dataset,
+  `scan()` accumulates one or more state tensors, whose initial values are
+  `initial_state`.
+
+  Args:
+    initial_state: A nested structure of tensors, representing the initial state
+      of the accumulator.
+    scan_func: A function that maps `(old_state, input_element)` to
+      `(new_state, output_element). It must take two arguments and return a
+      pair of nested structures of tensors. The `new_state` must match the
+      structure of `initial_state`.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+  def _apply_fn(dataset):
+    return _ScanDataset(dataset, initial_state, scan_func)
+
+  return _apply_fn
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
new file mode 100644
index 0000000000..a4307212da
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -0,0 +1,102 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental shuffle ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import random_seed
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that fuses `shuffle` and `repeat`."""
+
+  def __init__(self, input_dataset, buffer_size, count=None, seed=None):
+    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._buffer_size = ops.convert_to_tensor(
+        buffer_size, dtype=dtypes.int64, name="buffer_size")
+    if count is None:
+      self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
+    else:
+      self._count = ops.convert_to_tensor(
+          count, dtype=dtypes.int64, name="count")
+    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+  def _as_variant_tensor(self):
+    # pylint: disable=protected-access
+    input_resource = self._input_dataset._as_variant_tensor()
+    return gen_dataset_ops.shuffle_and_repeat_dataset(
+        input_resource,
+        buffer_size=self._buffer_size,
+        count=self._count,
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
+    # pylint: enable=protected-access
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+@tf_export("data.experimental.shuffle_and_repeat")
+def shuffle_and_repeat(buffer_size, count=None, seed=None):
+  """Shuffles and repeats a Dataset returning a new permutation for each epoch.
+
+  `dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size, count))`
+
+  is equivalent to
+
+  `dataset.shuffle(buffer_size, reshuffle_each_iteration=True).repeat(count)`
+
+  The difference is that the latter dataset is not serializable. So,
+  if you need to checkpoint an input pipeline with reshuffling you must use
+  this implementation.
+
+  Args:
+    buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
+      maximum number elements that will be buffered when prefetching.
+    count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      number of times the dataset should be repeated. The default behavior
+      (if `count` is `None` or `-1`) is for the dataset be repeated
+      indefinitely.
+    seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the
+      random seed that will be used to create the distribution. See
+      `tf.set_random_seed` for behavior.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):  # pylint: disable=missing-docstring
+    return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed)
+
+  return _apply_fn
diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/python/data/experimental/ops/stats_ops.py
similarity index 92%
rename from tensorflow/contrib/data/python/ops/stats_ops.py
rename to tensorflow/python/data/experimental/ops/stats_ops.py
index bc47c5989d..c918d223e8 100644
--- a/tensorflow/contrib/data/python/ops/stats_ops.py
+++ b/tensorflow/python/data/experimental/ops/stats_ops.py
@@ -21,8 +21,10 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("data.experimental.StatsAggregator")
 class StatsAggregator(object):
   """A stateful resource that aggregates statistics from one or more iterators.
 
@@ -34,7 +36,7 @@ class StatsAggregator(object):
 
   ```python
   dataset = ...
-  dataset = dataset.apply(stats_ops.latency_stats("total_bytes"))
+  dataset = dataset.apply(tf.data.experimental.latency_stats("total_bytes"))
   ```
 
   To associate a `StatsAggregator` with a `tf.data.Dataset` object, use
@@ -46,7 +48,7 @@ class StatsAggregator(object):
 
   # Apply `set_stats_aggregator` to associate `dataset` with `stats_aggregator`.
   dataset = dataset.apply(
-      tf.contrib.data.set_stats_aggregator(stats_aggregator))
+      tf.data.experimental.set_stats_aggregator(stats_aggregator))
   iterator = dataset.make_one_shot_iterator()
   ```
 
@@ -111,11 +113,12 @@ class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
     return self._input_dataset.output_classes
 
 
+@tf_export("data.experimental.set_stats_aggregator")
 def set_stats_aggregator(stats_aggregator):
   """Set the given `stats_aggregator` for aggregating the input dataset stats.
 
   Args:
-    stats_aggregator: A `tf.contrib.data.StatsAggregator` object.
+    stats_aggregator: A `tf.data.experimental.StatsAggregator` object.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -128,8 +131,8 @@ def set_stats_aggregator(stats_aggregator):
   return _apply_fn
 
 
-# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable
-# or make private / remove.
+# TODO(b/38416882): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
 def bytes_produced_stats(tag):
   """Records the number of bytes produced by each element of the input dataset.
 
@@ -152,6 +155,7 @@ def bytes_produced_stats(tag):
   return _apply_fn
 
 
+@tf_export("data.experimental.latency_stats")
 def latency_stats(tag):
   """Records the latency of producing each element of the input dataset.
 
diff --git a/tensorflow/python/data/experimental/ops/threadpool.py b/tensorflow/python/data/experimental/ops/threadpool.py
new file mode 100644
index 0000000000..3ea017c6e8
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/threadpool.py
@@ -0,0 +1,104 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for controlling threading in `tf.data` pipelines."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import threading
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
+from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
+from tensorflow.python.ops import resource_variable_ops
+
+_uid_counter = 0
+_uid_lock = threading.Lock()
+
+
+def _generate_shared_name(prefix):
+  with _uid_lock:
+    global _uid_counter
+    uid = _uid_counter
+    _uid_counter += 1
+  return "{}{}".format(prefix, uid)
+
+
+# TODO(b/73383364): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
+class PrivateThreadPool(object):
+  """A stateful resource that represents a private thread pool."""
+
+  def __init__(self, num_threads, display_name=None,
+               max_intra_op_parallelism=1):
+    """Creates a `PrivateThreadPool` with the given number of threads."""
+    if context.executing_eagerly():
+      shared_name = _generate_shared_name("privatethreadpool")
+      self._resource = ged_ops.experimental_thread_pool_handle(
+          num_threads=num_threads,
+          max_intra_op_parallelism=max_intra_op_parallelism,
+          display_name=display_name,
+          shared_name=shared_name)
+      self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
+          handle=self._resource, handle_device=context.context().device_name)
+    else:
+      self._resource = ged_ops.experimental_thread_pool_handle(
+          num_threads=num_threads,
+          max_intra_op_parallelism=max_intra_op_parallelism,
+          display_name=display_name)
+
+
+class _ThreadPoolDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that acts as an identity, and sets a custom threadpool."""
+
+  def __init__(self, input_dataset, thread_pool):
+    super(_ThreadPoolDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._thread_pool = thread_pool
+
+  def _as_variant_tensor(self):
+    return ged_ops.experimental_thread_pool_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._thread_pool._resource,  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+# TODO(b/73383364): Properly export in the `tf.data.experimental` API when
+# stable or make private / remove.
+def override_threadpool(dataset, thread_pool):
+  """Returns a new dataset that uses the given thread pool for its operations.
+
+  Args:
+    dataset: A `tf.data.Dataset` object.
+    thread_pool: A `PrivateThreadPool` object.
+
+  Returns:
+    A dataset containing the same values as `dataset`, but which uses
+    `thread_pool` to compute any of its parallel operations (such as
+    `tf.data.Dataset.map`).
+  """
+  return _ThreadPoolDataset(dataset, thread_pool)
diff --git a/tensorflow/python/data/experimental/ops/unique.py b/tensorflow/python/data/experimental/ops/unique.py
new file mode 100644
index 0000000000..2a7775c456
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/unique.py
@@ -0,0 +1,79 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unique element dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.unique")
+def unique():
+  """Creates a `Dataset` from another `Dataset`, discarding duplicates.
+
+  Use this transformation to produce a dataset that contains one instance of
+  each unique element in the input. For example:
+
+  ```python
+  dataset = tf.data.Dataset.from_tensor_slices([1, 37, 2, 37, 2, 1])
+
+  # Using `unique()` will drop the duplicate elements.
+  dataset = dataset.apply(tf.data.experimental.unique())  # ==> { 1, 37, 2 }
+  ```
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _UniqueDataset(dataset)
+
+  return _apply_fn
+
+
+class _UniqueDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` contains the unique elements from its input."""
+
+  def __init__(self, input_dataset):
+    """See `unique()` for details."""
+    super(_UniqueDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
+                                          dtypes.string):
+      raise TypeError(
+          "`tf.data.experimental.unique()` only supports inputs with a single "
+          "`tf.int32`, `tf.int64`, or `tf.string` component.")
+
+  def _as_variant_tensor(self):
+    return gen_experimental_dataset_ops.experimental_unique_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/experimental/ops/writers.py b/tensorflow/python/data/experimental/ops/writers.py
new file mode 100644
index 0000000000..994447cb4d
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/writers.py
@@ -0,0 +1,60 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Python wrappers for tf.data writers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import convert
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("data.experimental.TFRecordWriter")
+class TFRecordWriter(object):
+  """Writes data to a TFRecord file."""
+
+  def __init__(self, filename, compression_type=None):
+    self._filename = ops.convert_to_tensor(
+        filename, dtypes.string, name="filename")
+    self._compression_type = convert.optional_param_to_tensor(
+        "compression_type",
+        compression_type,
+        argument_default="",
+        argument_dtype=dtypes.string)
+
+  def write(self, dataset):
+    """Returns a `tf.Operation` to write a dataset to a file.
+
+    Args:
+      dataset: a `tf.data.Dataset` whose elements are to be written to a file
+
+    Returns:
+      A `tf.Operation` that, when run, writes contents of `dataset` to a file.
+    """
+    if not isinstance(dataset, dataset_ops.Dataset):
+      raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
+    if (dataset.output_types != dtypes.string or
+        dataset.output_shapes != tensor_shape.scalar()):
+      raise TypeError(
+          "`dataset` must produce scalar `DT_STRING` tensors whereas it "
+          "produces shape {0} and types {1}".format(dataset.output_shapes,
+                                                    dataset.output_types))
+    return gen_dataset_ops.dataset_to_tf_record(
+        dataset._as_variant_tensor(), self._filename, self._compression_type)  # pylint: disable=protected-access
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 6bba72a8e9..3b9d3a639d 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -889,8 +889,8 @@ class Dataset(object):
       will be padded out to the maximum length of all elements in that
       dimension.
 
-    See also `tf.contrib.data.dense_to_sparse_batch`, which combines elements
-    that may have different shapes into a `tf.SparseTensor`.
+    See also `tf.data.experimental.dense_to_sparse_batch`, which combines
+    elements that may have different shapes into a `tf.SparseTensor`.
 
     Args:
       batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
diff --git a/tensorflow/python/data/ops/optional_ops.py b/tensorflow/python/data/ops/optional_ops.py
index 3bbebd7878..aca989e03a 100644
--- a/tensorflow/python/data/ops/optional_ops.py
+++ b/tensorflow/python/data/ops/optional_ops.py
@@ -31,7 +31,7 @@ class Optional(object):
 
   An `Optional` can represent the result of an operation that may fail as a
   value, rather than raising an exception and halting execution. For example,
-  `tf.contrib.data.get_next_as_optional` returns an `Optional` that either
+  `tf.data.experimental.get_next_as_optional` returns an `Optional` that either
   contains the next value from a `tf.data.Iterator` if one exists, or a "none"
   value that indicates the end of the sequence has been reached.
   """
@@ -111,7 +111,7 @@ class Optional(object):
 
 
 class _OptionalImpl(Optional):
-  """Concrete implementation of `tf.contrib.data.Optional`.
+  """Concrete implementation of `tf.data.experimental.Optional`.
 
   NOTE(mrry): This implementation is kept private, to avoid defining
   `Optional.__init__()` in the public API.
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index b0f26631f9..d08da6704c 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -129,7 +129,7 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length,
                sloppy, buffer_output_elements, prefetch_input_elements):
-    """See `tf.contrib.data.parallel_interleave()` for details."""
+    """See `tf.data.experimental.parallel_interleave()` for details."""
     super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func,
                                                     cycle_length, block_length)
     self._sloppy = ops.convert_to_tensor(
@@ -158,7 +158,7 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
     # pylint: enable=protected-access
 
   def _transformation_name(self):
-    return "tf.contrib.data.parallel_interleave()"
+    return "tf.data.experimental.parallel_interleave()"
 
 
 @tf_export("data.TFRecordDataset")
diff --git a/tensorflow/python/debug/examples/debug_tflearn_iris.py b/tensorflow/python/debug/examples/debug_tflearn_iris.py
index 019f13c450..f9bb3148fb 100644
--- a/tensorflow/python/debug/examples/debug_tflearn_iris.py
+++ b/tensorflow/python/debug/examples/debug_tflearn_iris.py
@@ -94,13 +94,15 @@ def main(_):
         "sepal_length", "sepal_width", "petal_length", "petal_width", "label"]
     batch_size = 32
     def training_input_fn():
-      return tf.contrib.data.make_csv_dataset(
-          [training_data_path], batch_size,
-          column_names=column_names, label_name="label")
+      return tf.data.experimental.make_csv_dataset([training_data_path],
+                                                   batch_size,
+                                                   column_names=column_names,
+                                                   label_name="label")
     def test_input_fn():
-      return tf.contrib.data.make_csv_dataset(
-          [test_data_path], batch_size,
-          column_names=column_names, label_name="label")
+      return tf.data.experimental.make_csv_dataset([test_data_path],
+                                                   batch_size,
+                                                   column_names=column_names,
+                                                   label_name="label")
     feature_columns = [tf.feature_column.numeric_column(feature)
                        for feature in column_names[:-1]]
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 5ce5410e0b..533a138a39 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -8,6 +8,7 @@ TENSORFLOW_API_INIT_FILES = [
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
+    "data/experimental/__init__.py",
     "debugging/__init__.py",
     "distributions/__init__.py",
     "dtypes/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 587eb232f5..0747424eab 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -8,6 +8,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
+    "data/experimental/__init__.py",
     "debugging/__init__.py",
     "distributions/__init__.py",
     "dtypes/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
new file mode 100644
index 0000000000..03c16cda8b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.data.experimental.CheckpointInputPipelineHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.iterator_ops.CheckpointInputPipelineHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..3eeaa1b185
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.CsvDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
new file mode 100644
index 0000000000..0c0405ee02
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.CsvDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.CsvDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filenames\', \'record_defaults\', \'compression_type\', \'buffer_size\', \'header\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \',\', \'True\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
new file mode 100644
index 0000000000..b4c9459098
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.data.experimental.Optional"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.Optional\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "value_structure"
+    mtype: "<class \'abc.abstractproperty\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "has_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "none_from_structure"
+    argspec: "args=[\'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..2991b12f64
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.RandomDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
new file mode 100644
index 0000000000..bce0be4b17
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.RandomDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.random_ops.RandomDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
new file mode 100644
index 0000000000..6b477a8a72
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-reducer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.data.experimental.Reducer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.grouping.Reducer\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "finalize_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "init_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "reduce_func"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'init_func\', \'reduce_func\', \'finalize_func\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..948e99ef86
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.SqlDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
new file mode 100644
index 0000000000..8aeae92d96
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.SqlDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.SqlDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'driver_name\', \'data_source_name\', \'query\', \'output_types\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
new file mode 100644
index 0000000000..0bcc8cf3e8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-stats-aggregator.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.StatsAggregator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.stats_ops.StatsAggregator\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_summary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6f9d18a701
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-t-f-record-writer.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.writers.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filename\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
new file mode 100644
index 0000000000..b14585f8d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -0,0 +1,139 @@
+path: "tensorflow.data.experimental"
+tf_module {
+  member {
+    name: "CheckpointInputPipelineHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CsvDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Optional"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Reducer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SqlDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "StatsAggregator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "Counter"
+    argspec: "args=[\'start\', \'step\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "bucket_by_sequence_length"
+    argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "choose_from_datasets"
+    argspec: "args=[\'datasets\', \'choice_dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "copy_to_device"
+    argspec: "args=[\'target_device\', \'source_device\'], varargs=None, keywords=None, defaults=[\'/cpu:0\'], "
+  }
+  member_method {
+    name: "dense_to_sparse_batch"
+    argspec: "args=[\'batch_size\', \'row_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "enumerate_dataset"
+    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "get_next_as_optional"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_single_element"
+    argspec: "args=[\'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_reducer"
+    argspec: "args=[\'key_func\', \'reducer\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_window"
+    argspec: "args=[\'key_func\', \'reduce_func\', \'window_size\', \'window_size_func\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "ignore_errors"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "latency_stats"
+    argspec: "args=[\'tag\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_batched_features_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'features\', \'reader\', \'label_key\', \'reader_args\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'reader_num_threads\', \'parser_num_threads\', \'sloppy_ordering\', \'drop_final_batch\'], varargs=None, keywords=None, defaults=[\"<class \'tensorflow.python.data.ops.readers.TFRecordDataset\'>\", \'None\', \'None\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'2\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "make_csv_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+  }
+  member_method {
+    name: "make_saveable_from_iterator"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map_and_batch"
+    argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "parallel_interleave"
+    argspec: "args=[\'map_func\', \'cycle_length\', \'block_length\', \'sloppy\', \'buffer_output_elements\', \'prefetch_input_elements\'], varargs=None, keywords=None, defaults=[\'1\', \'False\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_example_dataset"
+    argspec: "args=[\'features\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "prefetch_to_device"
+    argspec: "args=[\'device\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "rejection_resample"
+    argspec: "args=[\'class_func\', \'target_dist\', \'initial_dist\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "sample_from_datasets"
+    argspec: "args=[\'datasets\', \'weights\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "scan"
+    argspec: "args=[\'initial_state\', \'scan_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_stats_aggregator"
+    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle_and_repeat"
+    argspec: "args=[\'buffer_size\', \'count\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "unbatch"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unique"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
index 56fb270a49..e205157523 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
@@ -20,4 +20,8 @@ tf_module {
     name: "TextLineDataset"
     mtype: "<class \'abc.ABCMeta\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
new file mode 100644
index 0000000000..03c16cda8b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-checkpoint-input-pipeline-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.data.experimental.CheckpointInputPipelineHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.iterator_ops.CheckpointInputPipelineHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'estimator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..3eeaa1b185
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.CsvDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
new file mode 100644
index 0000000000..0c0405ee02
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.CsvDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.CsvDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filenames\', \'record_defaults\', \'compression_type\', \'buffer_size\', \'header\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \',\', \'True\', \'\', \'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
new file mode 100644
index 0000000000..b4c9459098
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.data.experimental.Optional"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.Optional\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "value_structure"
+    mtype: "<class \'abc.abstractproperty\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "has_value"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "none_from_structure"
+    argspec: "args=[\'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..2991b12f64
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.RandomDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
new file mode 100644
index 0000000000..bce0be4b17
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.RandomDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.random_ops.RandomDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
new file mode 100644
index 0000000000..6b477a8a72
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-reducer.pbtxt
@@ -0,0 +1,21 @@
+path: "tensorflow.data.experimental.Reducer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.grouping.Reducer\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "finalize_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "init_func"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "reduce_func"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'init_func\', \'reduce_func\', \'finalize_func\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
new file mode 100644
index 0000000000..948e99ef86
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.__metaclass__.pbtxt
@@ -0,0 +1,14 @@
+path: "tensorflow.data.experimental.SqlDataset.__metaclass__"
+tf_class {
+  is_instance: "<class \'abc.ABCMeta\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "mro"
+  }
+  member_method {
+    name: "register"
+    argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
new file mode 100644
index 0000000000..8aeae92d96
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -0,0 +1,127 @@
+path: "tensorflow.data.experimental.SqlDataset"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.readers.SqlDataset\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetSource\'>"
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Dataset\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "output_classes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shapes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_types"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'driver_name\', \'data_source_name\', \'query\', \'output_types\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'transformation_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "batch"
+    argspec: "args=[\'self\', \'batch_size\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'False\'], "
+  }
+  member_method {
+    name: "cache"
+    argspec: "args=[\'self\', \'filename\'], varargs=None, keywords=None, defaults=[\'\'], "
+  }
+  member_method {
+    name: "concatenate"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "filter"
+    argspec: "args=[\'self\', \'predicate\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "flat_map"
+    argspec: "args=[\'self\', \'map_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_generator"
+    argspec: "args=[\'generator\', \'output_types\', \'output_shapes\', \'args\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_sparse_tensor_slices"
+    argspec: "args=[\'sparse_tensor\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensor_slices"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_tensors"
+    argspec: "args=[\'tensors\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "interleave"
+    argspec: "args=[\'self\', \'map_func\', \'cycle_length\', \'block_length\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "list_files"
+    argspec: "args=[\'file_pattern\', \'shuffle\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "make_initializable_iterator"
+    argspec: "args=[\'self\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "make_one_shot_iterator"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map"
+    argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "padded_batch"
+    argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
+  }
+  member_method {
+    name: "prefetch"
+    argspec: "args=[\'self\', \'buffer_size\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[], varargs=args, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reduce"
+    argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "repeat"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "shard"
+    argspec: "args=[\'self\', \'num_shards\', \'index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle"
+    argspec: "args=[\'self\', \'buffer_size\', \'seed\', \'reshuffle_each_iteration\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "skip"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "take"
+    argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "window"
+    argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
+  }
+  member_method {
+    name: "zip"
+    argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
new file mode 100644
index 0000000000..0bcc8cf3e8
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-stats-aggregator.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.StatsAggregator"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.stats_ops.StatsAggregator\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_summary"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
new file mode 100644
index 0000000000..6f9d18a701
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-t-f-record-writer.pbtxt
@@ -0,0 +1,13 @@
+path: "tensorflow.data.experimental.TFRecordWriter"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.experimental.ops.writers.TFRecordWriter\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'filename\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
new file mode 100644
index 0000000000..b14585f8d7
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -0,0 +1,139 @@
+path: "tensorflow.data.experimental"
+tf_module {
+  member {
+    name: "CheckpointInputPipelineHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CsvDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Optional"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "Reducer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SqlDataset"
+    mtype: "<class \'abc.ABCMeta\'>"
+  }
+  member {
+    name: "StatsAggregator"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "TFRecordWriter"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "Counter"
+    argspec: "args=[\'start\', \'step\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'1\', \"<dtype: \'int64\'>\"], "
+  }
+  member_method {
+    name: "bucket_by_sequence_length"
+    argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "choose_from_datasets"
+    argspec: "args=[\'datasets\', \'choice_dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "copy_to_device"
+    argspec: "args=[\'target_device\', \'source_device\'], varargs=None, keywords=None, defaults=[\'/cpu:0\'], "
+  }
+  member_method {
+    name: "dense_to_sparse_batch"
+    argspec: "args=[\'batch_size\', \'row_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "enumerate_dataset"
+    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
+  }
+  member_method {
+    name: "get_next_as_optional"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_single_element"
+    argspec: "args=[\'dataset\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_reducer"
+    argspec: "args=[\'key_func\', \'reducer\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "group_by_window"
+    argspec: "args=[\'key_func\', \'reduce_func\', \'window_size\', \'window_size_func\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "ignore_errors"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "latency_stats"
+    argspec: "args=[\'tag\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "make_batched_features_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'features\', \'reader\', \'label_key\', \'reader_args\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'reader_num_threads\', \'parser_num_threads\', \'sloppy_ordering\', \'drop_final_batch\'], varargs=None, keywords=None, defaults=[\"<class \'tensorflow.python.data.ops.readers.TFRecordDataset\'>\", \'None\', \'None\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'2\', \'False\', \'False\'], "
+  }
+  member_method {
+    name: "make_csv_dataset"
+    argspec: "args=[\'file_pattern\', \'batch_size\', \'column_names\', \'column_defaults\', \'label_name\', \'select_columns\', \'field_delim\', \'use_quote_delim\', \'na_value\', \'header\', \'num_epochs\', \'shuffle\', \'shuffle_buffer_size\', \'shuffle_seed\', \'prefetch_buffer_size\', \'num_parallel_reads\', \'sloppy\', \'num_rows_for_inference\', \'compression_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \',\', \'True\', \'\', \'True\', \'None\', \'True\', \'10000\', \'None\', \'-1\', \'1\', \'False\', \'100\', \'None\'], "
+  }
+  member_method {
+    name: "make_saveable_from_iterator"
+    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "map_and_batch"
+    argspec: "args=[\'map_func\', \'batch_size\', \'num_parallel_batches\', \'drop_remainder\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "parallel_interleave"
+    argspec: "args=[\'map_func\', \'cycle_length\', \'block_length\', \'sloppy\', \'buffer_output_elements\', \'prefetch_input_elements\'], varargs=None, keywords=None, defaults=[\'1\', \'False\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "parse_example_dataset"
+    argspec: "args=[\'features\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'1\'], "
+  }
+  member_method {
+    name: "prefetch_to_device"
+    argspec: "args=[\'device\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "rejection_resample"
+    argspec: "args=[\'class_func\', \'target_dist\', \'initial_dist\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "sample_from_datasets"
+    argspec: "args=[\'datasets\', \'weights\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "scan"
+    argspec: "args=[\'initial_state\', \'scan_func\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_stats_aggregator"
+    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "shuffle_and_repeat"
+    argspec: "args=[\'buffer_size\', \'count\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "unbatch"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "unique"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
index 56fb270a49..e205157523 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
@@ -20,4 +20,8 @@ tf_module {
     name: "TextLineDataset"
     mtype: "<class \'abc.ABCMeta\'>"
   }
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
 }
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 3a1c4a45d4..164b3d8303 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -64,8 +64,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
     "//tensorflow/contrib/compiler:xla",
     "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
-    "//tensorflow/contrib/data/python/kernel_tests/serialization:dataset_serialization_test_base",
-    "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base",
     "//tensorflow/contrib/eager/python/examples:examples_pip",
     "//tensorflow/contrib/eager/python:evaluator",
     "//tensorflow/contrib/gan:gan",
@@ -106,6 +104,8 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python:meta_graph_testdata",
     "//tensorflow/python:spectral_ops_test_util",
     "//tensorflow/python:util_example_parser_configuration",
+    "//tensorflow/python/data/experimental/kernel_tests/serialization:dataset_serialization_test_base",
+    "//tensorflow/python/data/experimental/kernel_tests:stats_dataset_test_base",
     "//tensorflow/python/data/kernel_tests:test_base",
     "//tensorflow/python/debug:debug_pip",
     "//tensorflow/python/eager:eager_pip",
-- 
GitLab


From 80f8931682aeaae89786f0940892a6557b4cfd67 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 17:05:45 -0700
Subject: [PATCH 0190/1085] Mark bfloat16 as supported for
 ExponentialMovingAverage.

PiperOrigin-RevId: 215307701
---
 tensorflow/python/training/moving_averages.py |  9 ++++---
 .../python/training/moving_averages_test.py   | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index 177a7ddfa5..041266da3e 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -372,13 +372,13 @@ class ExponentialMovingAverage(object):
 
     Args:
       var_list: A list of Variable or Tensor objects. The variables
-        and Tensors must be of types float16, float32, or float64.
+        and Tensors must be of types bfloat16, float16, float32, or float64.
 
     Returns:
       An Operation that updates the moving averages.
 
     Raises:
-      TypeError: If the arguments are not all float16, float32, or float64.
+      TypeError: If the arguments are not an allowed type.
       ValueError: If the moving average of one of the variables is already
         being computed.
     """
@@ -387,8 +387,9 @@ class ExponentialMovingAverage(object):
       var_list = variables.trainable_variables()
     zero_debias_true = set()  # set of vars to set `zero_debias=True`
     for var in var_list:
-      if var.dtype.base_dtype not in [dtypes.float16, dtypes.float32,
-                                      dtypes.float64]:
+      if var.dtype.base_dtype not in [
+          dtypes.bfloat16, dtypes.float16, dtypes.float32, dtypes.float64
+      ]:
         raise TypeError("The variables must be half, float, or double: %s" %
                         var.name)
 
diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py
index 93991d0e14..bb2fca66e3 100644
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -110,6 +111,32 @@ class MovingAveragesTest(test.TestCase):
       denominator_2 = denominator_1 * decay + weight_2 * (1.0 - decay)
       self.assertAllClose(numerator_2 / denominator_2, wma_array)
 
+  def testWeightedMovingAverageBfloat16(self):
+    bfloat16 = pywrap_tensorflow.TF_bfloat16_type()
+    with self.cached_session() as sess:
+      decay = 0.5
+      weight = array_ops.placeholder(dtypes.bfloat16, [])
+      val = array_ops.placeholder(dtypes.bfloat16, [])
+
+      wma = moving_averages.weighted_moving_average(val, decay, weight)
+      variables.global_variables_initializer().run()
+
+      # Get the first weighted moving average.
+      val_1 = 3.0
+      weight_1 = 4.0
+      wma_array = sess.run(wma, feed_dict={val: val_1, weight: weight_1})
+      numerator_1 = val_1 * weight_1 * (1.0 - decay)
+      denominator_1 = weight_1 * (1.0 - decay)
+      self.assertAllClose(numerator_1 / denominator_1, wma_array)
+
+      # Get the second weighted moving average.
+      val_2 = 11.0
+      weight_2 = 22.0
+      wma_array = sess.run(wma, feed_dict={val: val_2, weight: weight_2})
+      numerator_2 = numerator_1 * decay + val_2 * weight_2 * (1.0 - decay)
+      denominator_2 = denominator_1 * decay + weight_2 * (1.0 - decay)
+      self.assertAllClose(bfloat16(numerator_2 / denominator_2), wma_array)
+
 
 def _Repeat(value, dim):
   if dim == 1:
-- 
GitLab


From 7dc5f7caa959c70d5ca948f7b0fc5abfea9a5935 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Mon, 1 Oct 2018 17:18:28 -0700
Subject: [PATCH 0191/1085]  Minor changes, hanged  CHECK_GE to DCHECK_GE due
 to code policy change

---
 tensorflow/core/common_runtime/process_util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index 4570496637..e1dc08d645 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -65,7 +65,7 @@ int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
 #ifdef _OPENMP
     mkl_intra_op = omp_get_max_threads();
 #endif  // _OPENMP
-    CHECK_GE(mkl_intra_op, 1);
+    DCHECK_GE(mkl_intra_op, 1);
     const int32 mkl_inter_op = std::max(
         (port::NumSchedulableCPUs() + mkl_intra_op - 1) / mkl_intra_op, 2);
     VLOG(0)
-- 
GitLab


From bfbe2bbe6a83a4acfa8f87aa5c8228e74b37bb61 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 1 Oct 2018 17:18:24 -0700
Subject: [PATCH 0192/1085] [tf.data] More robust solution for input pipeline
 <--> performance model coordination.

PiperOrigin-RevId: 215309735
---
 tensorflow/core/framework/dataset.h           | 12 +--
 tensorflow/core/framework/model.cc            | 83 ++++++++---------
 tensorflow/core/framework/model.h             | 42 +++++----
 .../kernels/data/map_and_batch_dataset_op.cc  | 90 ++++++++++---------
 .../data/parallel_interleave_dataset_op.cc    | 86 +++++++++---------
 .../kernels/data/parallel_map_iterator.cc     | 77 ++++++++--------
 6 files changed, 201 insertions(+), 189 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 697e0604bf..8c1151cb56 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -657,15 +657,15 @@ class DatasetBaseIterator : public IteratorBase {
   // When performance modeling is enabled, this method adds a tunable parameter
   // to the model node corresponding to this iterator.
   //
-  // The performance modeling logic may use `value` to set the value of the
+  // The performance modeling logic may use `state` to set the value of the
   // tunable parameter at any point during the lifetime of this iterator. When
-  // it does, it notifies `cond_var`.
+  // it does, it acquires `state->mu` and notifies `state->cond_var`.
   void AddTunableParameter(IteratorContext* ctx, const string& name,
-                           std::atomic<int64>* value, int64 min, int64 max,
-                           condition_variable* cond_var) {
+                           std::shared_ptr<model::SharedState> state, int64 min,
+                           int64 max) {
     if (ctx->model()) {
-      ctx->model()->AddTunableParameter(prefix(), name, value, min, max,
-                                        cond_var);
+      ctx->model()->AddTunableParameter(prefix(), name, std::move(state), min,
+                                        max);
     }
   }
 
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index b0330ec990..bfdb3a6658 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -296,12 +296,12 @@ void Model::AddProcessingTime(const string& name, int64 delta) {
 
 void Model::AddTunableParameter(const string& node_name,
                                 const string& parameter_name,
-                                std::atomic<int64>* value, int64 min, int64 max,
-                                condition_variable* cond_var) {
+                                std::shared_ptr<SharedState> state, int64 min,
+                                int64 max) {
   tf_shared_lock l(mu_);
   auto node = *gtl::FindOrNull(lookup_table_, node_name);
   DCHECK(node);
-  node->add_tunable_param(parameter_name, value, min, max, cond_var);
+  node->add_tunable_param(parameter_name, std::move(state), min, max);
 }
 
 // The optimization algorithm starts by setting all tunable parallelism
@@ -311,54 +311,55 @@ void Model::AddTunableParameter(const string& node_name,
 // is less than or equal to the processing time needed to produce an element
 // divided by CPU budget.
 void Model::Optimize(int64 cpu_budget) {
-  tf_shared_lock lock(mu_);
   std::vector<std::shared_ptr<Model::Node::Tunable>> tunables;
-  const int64 processing_time = ProcessingTime();
-  tunables = CollectTunables();
-  for (auto tunable : tunables) {
-    tunable->value = 1;
-  }
-  while (true) {
-    const int64 output_time = OutputTime();
-    bool all_tunables = true;
-    for (auto& tunable : tunables) {
-      if (tunable->value < tunable->max) {
-        all_tunables = false;
+  {
+    tf_shared_lock lock(mu_);
+    const int64 processing_time = ProcessingTime();
+    tunables = CollectTunables();
+    for (auto tunable : tunables) {
+      tunable->value = 1;
+    }
+    while (true) {
+      const int64 output_time = OutputTime();
+      bool all_tunables = true;
+      for (auto& tunable : tunables) {
+        if (tunable->value < tunable->max) {
+          all_tunables = false;
+          break;
+        }
+      }
+      if (output_time < processing_time / cpu_budget || all_tunables) {
         break;
       }
-    }
-    if (output_time < processing_time / cpu_budget || all_tunables) {
-      break;
-    }
-    int64 best_delta = -1;
-    Model::Node::Tunable* best_tunable = nullptr;
-    for (auto& tunable : tunables) {
-      if (tunable->value == tunable->max) {
-        continue;
+      int64 best_delta = -1;
+      Model::Node::Tunable* best_tunable = nullptr;
+      for (auto& tunable : tunables) {
+        if (tunable->value == tunable->max) {
+          continue;
+        }
+        tunable->value++;
+        int64 delta = output_time - OutputTime();
+        if (delta > best_delta) {
+          best_delta = delta;
+          best_tunable = tunable.get();
+        }
+        tunable->value--;
       }
-      tunable->value++;
-      int64 delta = output_time - OutputTime();
-      if (delta > best_delta) {
-        best_delta = delta;
-        best_tunable = tunable.get();
+      if (!best_tunable) {
+        // NOTE: This can happen because we are performing the optimization
+        // while the model data is changing. If this becomes an issue, we should
+        // look into performing the optimization using a model snapshot.
+        break;
       }
-      tunable->value--;
+      best_tunable->value++;
     }
-    if (!best_tunable) {
-      // NOTE: This can happen because we are performing the optimization
-      // while the model data is changing. If this becomes an issue, we should
-      // look into performing the optimization using a model snapshot.
-      break;
-    }
-    best_tunable->value++;
   }
   VLOG(2) << "Number of knobs: " << tunables.size();
   for (auto& tunable : tunables) {
     VLOG(2) << "Setting tunable parameter: " << tunable->value;
-    tunable->value_ptr->store(tunable->value);
-    if (tunable->cond_var) {
-      tunable->cond_var->notify_all();
-    }
+    mutex_lock l(*tunable->state->mu);
+    tunable->state->value = tunable->value;
+    tunable->state->cond_var->notify_all();
   }
 }
 
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index 26402f5cd3..eae0fa70e8 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -33,6 +33,19 @@ namespace tensorflow {
 namespace data {
 namespace model {
 
+// Represents thread-safe state that can be shared between an input pipeline and
+// the performance model.
+struct SharedState {
+ public:
+  explicit SharedState(int64 value, std::shared_ptr<mutex> mu,
+                       std::shared_ptr<condition_variable> cond_var)
+      : value(value), mu(std::move(mu)), cond_var(std::move(cond_var)) {}
+
+  std::shared_ptr<mutex> mu;
+  std::shared_ptr<condition_variable> cond_var;
+  int64 value;
+};
+
 // Abstract representation of a TensorFlow input pipeline that can be used
 // for collecting runtime information and optimizing performance. It collects
 // runtime information about execution of the input pipeline that is used to
@@ -62,8 +75,8 @@ class Model {
   // Adds a tunable parameter for the given node.
   void AddTunableParameter(const string& node_name,
                            const string& parameter_name,
-                           std::atomic<int64>* value, int64 min, int64 max,
-                           condition_variable* cond_var) LOCKS_EXCLUDED(mu_);
+                           std::shared_ptr<SharedState> value, int64 min,
+                           int64 max) LOCKS_EXCLUDED(mu_);
 
   // Runs optimization.
   void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_);
@@ -109,13 +122,8 @@ class Model {
    public:
     // Represents a tunable parameter.
     struct Tunable {
-      Tunable(std::atomic<int64>* value, int64 min, int64 max,
-              condition_variable* cond_var)
-          : value(*value),
-            min(min),
-            max(max),
-            value_ptr(value),
-            cond_var(cond_var) {}
+      Tunable(std::shared_ptr<SharedState> state, int64 min, int64 max)
+          : value(state->value), min(min), max(max), state(std::move(state)) {}
 
       // Identifies the model value of the parameter. This can be different from
       // the actual value (e.g. during optimization search).
@@ -127,12 +135,8 @@ class Model {
       // Identifies the maximum value of the parameter.
       int64 max;
 
-      // Points to the actual value of the parameter. Not owned.
-      std::atomic<int64>* value_ptr;
-
-      // If non-null, this condition variable is notified when the model updates
-      // the actual value of the parameter (via `value_ptr`). Not owned.
-      condition_variable* cond_var;
+      // Shared state of the parameter.
+      std::shared_ptr<SharedState> state;
     };
 
     Node(int64 id, const string& name, std::shared_ptr<Node> output)
@@ -158,12 +162,12 @@ class Model {
     }
 
     // Adds a tunable parameter.
-    void add_tunable_param(const string& name, std::atomic<int64>* value,
-                           int64 min, int64 max, condition_variable* cond_var)
-        LOCKS_EXCLUDED(mu_) {
+    void add_tunable_param(const string& name,
+                           std::shared_ptr<SharedState> state, int64 min,
+                           int64 max) LOCKS_EXCLUDED(mu_) {
       mutex_lock l(mu_);
       tunable_params_[name] =
-          std::make_shared<Tunable>(value, min, max, cond_var);
+          std::make_shared<Tunable>(std::move(state), min, max);
     }
 
     // Returns the unique node ID.
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index b4c7f9e510..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -187,29 +187,31 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
-            num_parallel_calls_(params.dataset->num_parallel_calls_) {}
+            mu_(std::make_shared<mutex>()),
+            cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
 
       ~Iterator() override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Cancel the runner thread.
         cancelled_ = true;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
-        if (num_parallel_calls_ == kAutoTune) {
-          num_parallel_calls_ = 1;
-          AddTunableParameter(ctx, "parallelism",
-                              &num_parallel_calls_ /* value */, 1 /* min */,
-                              port::NumSchedulableCPUs() /* max */, &cond_var_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = 1;
+          AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                              port::NumSchedulableCPUs());
         } else {
-          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
@@ -221,27 +223,27 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         std::shared_ptr<BatchResult> result;
         {
-          mutex_lock l(mu_);
+          mutex_lock l(*mu_);
           EnsureRunnerThreadStarted(ctx);
           while (batch_results_.empty() ||
                  batch_results_.front()->num_calls > 0) {
             RecordStop(ctx);
-            cond_var_.wait(l);
+            cond_var_->wait(l);
             RecordStart(ctx);
           }
           std::swap(result, batch_results_.front());
           batch_results_.pop_front();
-          cond_var_.notify_all();
+          cond_var_->notify_all();
         }
         return ProcessResult(ctx, result, out_tensors, end_of_sequence);
       }
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
         CHECK_EQ(num_calls_, 0);
         TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -257,7 +259,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
         TF_RETURN_IF_ERROR(
             reader->ReadScalar(full_name("call_counter"), &call_counter_));
@@ -298,7 +300,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void Callback(const std::shared_ptr<IteratorContext>& ctx,
                     const std::shared_ptr<BatchResult>& result,
                     const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(mu_) {
+                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
         result->UpdateStatus(status);
         if (status.ok()) {
           EnsureOutputAllocated(ctx, result, return_values);
@@ -334,16 +336,16 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
-          LOCKS_EXCLUDED(mu_) {
-        mutex_lock l(mu_);
+          LOCKS_EXCLUDED(*mu_) {
+        mutex_lock l(*mu_);
         num_calls_--;
         result->num_calls--;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
 
       void CallFunction(std::shared_ptr<IteratorContext> ctx,
                         const std::shared_ptr<BatchResult>& result,
-                        int64 offset) LOCKS_EXCLUDED(mu_) {
+                        int64 offset) LOCKS_EXCLUDED(*mu_) {
         // Get the next input element.
         std::vector<Tensor> input_element;
         bool end_of_input;
@@ -400,7 +402,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
@@ -476,14 +478,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx)
-          LOCKS_EXCLUDED(mu_) {
+          LOCKS_EXCLUDED(*mu_) {
         std::vector<std::pair<std::shared_ptr<BatchResult>, int64>> new_calls;
         RecordStart(ctx.get());
         auto stop_cleanup =
             gtl::MakeCleanup([this, &ctx]() { RecordStop(ctx.get()); });
-        new_calls.reserve(num_parallel_calls_);
-        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
-          int64 num_parallel_calls = num_parallel_calls_;
+        new_calls.reserve(num_parallel_calls_->value);
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
+          int64 num_parallel_calls = num_parallel_calls_->value;
           int64 max_batch_results =
               (num_parallel_calls + dataset()->batch_size_ - 1) /
               dataset()->batch_size_;
@@ -494,10 +496,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         };
         while (true) {
           {
-            mutex_lock l(mu_);
+            mutex_lock l(*mu_);
             while (!cancelled_ && busy()) {
               RecordStop(ctx.get());
-              cond_var_.wait(l);
+              cond_var_->wait(l);
               RecordStart(ctx.get());
             }
 
@@ -524,7 +526,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
-                             size_t index) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                             size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
@@ -569,7 +571,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status ReadStatus(IteratorStateReader* reader, const string& prefix,
-                        Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                        Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         int64 code_int;
         TF_RETURN_IF_ERROR(reader->ReadScalar(
             full_name(strings::StrCat(prefix, "_code")), &code_int));
@@ -587,7 +589,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status WriteBatchResult(IteratorStateWriter* writer, size_t index)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         std::shared_ptr<BatchResult> result = batch_results_[index];
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -628,7 +630,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status WriteStatus(IteratorStateWriter* writer, const string& prefix,
-                         const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                         const Status& status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         TF_RETURN_IF_ERROR(
             writer->WriteScalar(full_name(strings::StrCat(prefix, "_code")),
                                 static_cast<int64>(status.code())));
@@ -642,24 +644,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       // Used for coordination between the main thread, the runner thread, and
       // the callback threads.
-      mutex mu_;
+      const std::shared_ptr<mutex> mu_;
       // Used for coordination between the main thread, the runner thread, and
       // the callback threads. In particular, the runner thread should only
-      // schedule new calls when the number of in-flight calls is less than the
-      // user specified level of parallelism and there are slots available in
-      // the `batch_results_` buffer.
-      condition_variable cond_var_;
+      // schedule new calls when the number of in-flight calls is less than
+      // `num_parallel_calls_->value` and there are slots available in the
+      // `batch_results_` buffer.
+      const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
-      std::atomic<int64> num_parallel_calls_;
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
       // Counts the number of outstanding calls for this batch.
-      int64 num_calls_ GUARDED_BY(mu_) = 0;
+      int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
-      int64 call_counter_ GUARDED_BY(mu_) = 0;
+      int64 call_counter_ GUARDED_BY(*mu_) = 0;
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
-      std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
-      bool cancelled_ GUARDED_BY(mu_) = false;
+      std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2bb38bf0b9..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -1217,7 +1217,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
      public:
       explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
-            num_parallel_calls_(params.dataset->num_parallel_calls_),
+            mu_(std::make_shared<mutex>()),
+            cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
             args_list_(params.dataset->cycle_length_),
             current_elements_(params.dataset->cycle_length_),
             element_in_use_(params.dataset->cycle_length_, false),
@@ -1227,25 +1230,24 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                 false /* low_latency_hint */)) {}
 
       ~Iterator() override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Cancel the runner thread.
         cancelled_ = true;
-        cond_var_.notify_all();
+        cond_var_->notify_all();
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
       }
 
       Status Initialize(IteratorContext* ctx) override {
-        mutex_lock l(mu_);
-        if (num_parallel_calls_ == kAutoTune) {
-          num_parallel_calls_ = 1;
-          AddTunableParameter(ctx, "parallelism",
-                              &num_parallel_calls_ /* value */, 1 /* min */,
-                              dataset()->cycle_length_ /* max */, &cond_var_);
+        mutex_lock l(*mu_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = 1;
+          AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                              dataset()->cycle_length_);
         } else {
-          AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
         }
         AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_);
         TF_RETURN_IF_ERROR(
@@ -1259,12 +1261,12 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         std::shared_ptr<InvocationResult> result;
         do {
           {
-            mutex_lock l(mu_);
+            mutex_lock l(*mu_);
             EnsureRunnerThreadStarted(ctx);
             while (invocation_results_.empty() &&
                    (!end_of_input_ || num_open_ > 0)) {
               RecordStop(ctx);
-              cond_var_.wait(l);
+              cond_var_->wait(l);
               RecordStart(ctx);
             }
             if (!invocation_results_.empty()) {
@@ -1274,7 +1276,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
               *end_of_sequence = true;
               return Status::OK();
             }
-            cond_var_.notify_all();
+            cond_var_->notify_all();
           }
           RecordStop(ctx);
           result->notification.WaitForNotification();
@@ -1290,10 +1292,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
      protected:
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         // Wait for all in-flight calls to complete.
         while (num_calls_ > 0) {
-          cond_var_.wait(l);
+          cond_var_->wait(l);
         }
         CHECK_EQ(num_calls_, 0);
         TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -1331,7 +1333,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
         int64 invocation_results_size;
         TF_RETURN_IF_ERROR(reader->ReadScalar(
@@ -1384,7 +1386,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       };
 
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
@@ -1401,7 +1403,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       void FetchOutputs(
           const std::shared_ptr<IteratorContext>& ctx, int64 cycle_index,
           const std::vector<std::shared_ptr<InvocationResult>>& results)
-          LOCKS_EXCLUDED(mu_) {
+          LOCKS_EXCLUDED(*mu_) {
         RecordStart(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
         bool end_of_input = false;
@@ -1424,14 +1426,14 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         if (end_of_input) {
           current_elements_[cycle_index].reset();
         }
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         element_in_use_[cycle_index] = false;
         num_calls_--;
         if (end_of_input) {
           args_list_[cycle_index].clear();
           num_open_--;
         }
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
 
       // Method responsible for 1) creating iterators out of input elements, 2)
@@ -1442,20 +1444,20 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       void RunnerThread(const std::shared_ptr<IteratorContext>& ctx) {
         RecordStart(ctx.get());
         auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
-        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
+        auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
           return element_in_use_[cycle_index_] ||
-                 num_calls_ >= num_parallel_calls_ ||
+                 num_calls_ >= num_parallel_calls_->value ||
                  invocation_results_.size() >=
                      dataset()->cycle_length_ * dataset()->block_length_;
         };
         while (true) {
-          mutex_lock l(mu_);
+          mutex_lock l(*mu_);
           // Wait until this thread is cancelled, the end of input has been
           // reached, or the cycle element at the `cycle_index_` position is
           // not in use and there is space in the `invocation_results_` queue.
           while (!cancelled_ && (!end_of_input_ || num_open_ > 0) && busy()) {
             RecordStop(ctx.get());
-            cond_var_.wait(l);
+            cond_var_->wait(l);
             RecordStart(ctx.get());
           }
 
@@ -1509,13 +1511,13 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
             }
             cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
           }
-          cond_var_.notify_all();
+          cond_var_->notify_all();
         }
       }
 
       Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
                                const Status& status)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         TF_RETURN_IF_ERROR(writer->WriteScalar(
             CodeKey(index), static_cast<int64>(status.code())));
         if (!status.ok()) {
@@ -1526,7 +1528,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
       Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
-                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                              Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         int64 code_int;
         TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
         error::Code code = static_cast<error::Code>(code_int);
@@ -1553,7 +1555,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
       Status WriteCurrentElements(IteratorStateWriter* writer)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         for (int idx = 0; idx < current_elements_.size(); idx++) {
           if (current_elements_[idx]) {
             TF_RETURN_IF_ERROR(SaveInput(writer, current_elements_[idx]));
@@ -1572,7 +1574,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       Status ReadCurrentElements(IteratorContext* ctx,
                                  IteratorStateReader* reader)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         for (int idx = 0; idx < current_elements_.size(); idx++) {
           if (reader->Contains(
                   full_name(strings::StrCat("args_size[", idx, "]")))) {
@@ -1600,7 +1602,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
       // Used for coordination between the main thread, the runner thread, and
       // the worker threads.
-      mutex mu_;
+      const std::shared_ptr<mutex> mu_;
 
       // Used for coordination between the main thread, the runner thread, and
       // the worker threads. In particular, the runner thread should only
@@ -1608,45 +1610,45 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       // user specified level of parallelism, there are slots available in the
       // `invocation_results_` buffer, the current cycle element is not in use,
       // and there are elements left to be fetched.
-      condition_variable cond_var_;
+      const std::shared_ptr<condition_variable> cond_var_;
 
       // Identifies the maximum number of parallel calls.
-      std::atomic<int64> num_parallel_calls_;
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
 
       // Iterator for input elements.
-      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
+      std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(*mu_);
 
       // Identifies current cycle element.
       int64 cycle_index_ = 0;
 
       // Arguments for creating an iterator for cycle elements.
-      std::vector<std::vector<Tensor>> args_list_ GUARDED_BY(mu_);
+      std::vector<std::vector<Tensor>> args_list_ GUARDED_BY(*mu_);
 
       // Iterators for the current cycle elements. Concurrent access is
       // protected by `element_in_use_`.
       std::vector<std::unique_ptr<IteratorBase>> current_elements_;
 
       // Identifies cycle elements that are in use by worker threads.
-      std::vector<bool> element_in_use_ GUARDED_BY(mu_);
+      std::vector<bool> element_in_use_ GUARDED_BY(*mu_);
 
       // Buffer for storing the invocation results.
       std::deque<std::shared_ptr<InvocationResult>> invocation_results_
-          GUARDED_BY(mu_);
+          GUARDED_BY(*mu_);
 
       // Identifies whether end of input has been reached.
-      bool end_of_input_ GUARDED_BY(mu_) = false;
+      bool end_of_input_ GUARDED_BY(*mu_) = false;
 
       // Identifies the number of open iterators.
-      int64 num_open_ GUARDED_BY(mu_) = 0;
+      int64 num_open_ GUARDED_BY(*mu_) = 0;
 
       // Identifies the number of outstanding calls.
-      int64 num_calls_ GUARDED_BY(mu_) = 0;
+      int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
-      bool cancelled_ GUARDED_BY(mu_) = false;
+      bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
     const DatasetBase* const input_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index da067a4e6f..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -40,30 +40,32 @@ class ParallelMapIterator : public DatasetBaseIterator {
         input_dataset_(input_dataset),
         init_func_(std::move(init_func)),
         map_func_(std::move(map_func)),
-        num_parallel_calls_(num_parallel_calls) {}
+        mu_(std::make_shared<mutex>()),
+        cond_var_(std::make_shared<condition_variable>()),
+        num_parallel_calls_(std::make_shared<model::SharedState>(
+            num_parallel_calls, mu_, cond_var_)) {}
 
   ~ParallelMapIterator() override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     // Cancel the runner thread.
     cancelled_ = true;
-    cond_var_.notify_all();
+    cond_var_->notify_all();
     // Wait for all in-flight calls to complete.
     while (num_calls_ > 0) {
-      cond_var_.wait(l);
+      cond_var_->wait(l);
     }
   }
 
   Status Initialize(IteratorContext* ctx) override {
-    mutex_lock l(mu_);
-    if (num_parallel_calls_ == kAutoTune) {
-      num_parallel_calls_ = 1;
+    mutex_lock l(*mu_);
+    if (num_parallel_calls_->value == kAutoTune) {
+      num_parallel_calls_->value = 1;
       // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and
       // use it here for the maximum.
-      AddTunableParameter(ctx, "parallelism", &num_parallel_calls_ /* value */,
-                          1 /* min */, port::NumSchedulableCPUs() /* max */,
-                          &cond_var_);
+      AddTunableParameter(ctx, "parallelism", num_parallel_calls_, 1,
+                          port::NumSchedulableCPUs());
     } else {
-      AddConstantParameter(ctx, "parallelism", num_parallel_calls_);
+      AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
     }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
@@ -77,16 +79,16 @@ class ParallelMapIterator : public DatasetBaseIterator {
                          bool* end_of_sequence) override {
     std::shared_ptr<InvocationResult> result;
     {
-      mutex_lock l(mu_);
+      mutex_lock l(*mu_);
       EnsureRunnerThreadStarted(ctx);
       while (invocation_results_.empty()) {
         RecordStop(ctx);
-        cond_var_.wait(l);
+        cond_var_->wait(l);
         RecordStart(ctx);
       }
       std::swap(result, invocation_results_.front());
       invocation_results_.pop_front();
-      cond_var_.notify_all();
+      cond_var_->notify_all();
     }
     RecordStop(ctx);
     result->notification.WaitForNotification();
@@ -96,10 +98,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
  protected:
   Status SaveInternal(IteratorStateWriter* writer) override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     // Wait for all in-flight calls to complete.
     while (num_calls_ > 0) {
-      cond_var_.wait(l);
+      cond_var_->wait(l);
     }
     CHECK_EQ(num_calls_, 0);
     TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
@@ -128,7 +130,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
   Status RestoreInternal(IteratorContext* ctx,
                          IteratorStateReader* reader) override {
-    mutex_lock l(mu_);
+    mutex_lock l(*mu_);
     TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
     int64 invocation_results_size;
     TF_RETURN_IF_ERROR(reader->ReadScalar(
@@ -175,7 +177,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   };
 
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
       runner_thread_.reset(ctx->env()->StartThread(
@@ -185,18 +187,18 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   void CallCompleted(const std::shared_ptr<InvocationResult>& result)
-      LOCKS_EXCLUDED(mu_) {
+      LOCKS_EXCLUDED(*mu_) {
     {
-      mutex_lock l(mu_);
+      mutex_lock l(*mu_);
       num_calls_--;
-      cond_var_.notify_all();
+      cond_var_->notify_all();
     }
     result->notification.Notify();
   }
 
   void CallFunction(const std::shared_ptr<IteratorContext>& ctx,
                     const std::shared_ptr<InvocationResult>& result)
-      LOCKS_EXCLUDED(mu_) {
+      LOCKS_EXCLUDED(*mu_) {
     // Get the next input element.
     std::vector<Tensor> input_element;
     result->status =
@@ -239,18 +241,18 @@ class ParallelMapIterator : public DatasetBaseIterator {
     RecordStart(ctx.get());
     auto cleanup = gtl::MakeCleanup([this, ctx] { RecordStop(ctx.get()); });
     std::vector<std::shared_ptr<InvocationResult>> new_calls;
-    new_calls.reserve(num_parallel_calls_);
-    auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(mu_) -> bool {
-      int64 num_parallel_calls = num_parallel_calls_;
+    new_calls.reserve(num_parallel_calls_->value);
+    auto busy = [this]() EXCLUSIVE_LOCKS_REQUIRED(*mu_) -> bool {
+      int64 num_parallel_calls = num_parallel_calls_->value;
       return num_calls_ >= num_parallel_calls ||
              invocation_results_.size() >= num_parallel_calls;
     };
     while (true) {
       {
-        mutex_lock l(mu_);
+        mutex_lock l(*mu_);
         while (!cancelled_ && busy()) {
           RecordStop(ctx.get());
-          cond_var_.wait(l);
+          cond_var_->wait(l);
           RecordStart(ctx.get());
         }
         if (cancelled_) {
@@ -261,7 +263,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
           new_calls.push_back(invocation_results_.back());
           num_calls_++;
         }
-        cond_var_.notify_all();
+        cond_var_->notify_all();
       }
       for (const auto& call : new_calls) {
         CallFunction(ctx, call);
@@ -271,7 +273,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   Status WriteStatusLocked(IteratorStateWriter* writer, size_t index,
-                           const Status& status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                           const Status& status)
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     TF_RETURN_IF_ERROR(
         writer->WriteScalar(CodeKey(index), static_cast<int64>(status.code())));
     if (!status.ok()) {
@@ -282,7 +285,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
   Status ReadStatusLocked(IteratorStateReader* reader, size_t index,
-                          Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                          Status* status) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     int64 code_int;
     TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int));
     error::Code code = static_cast<error::Code>(code_int);
@@ -312,23 +315,23 @@ class ParallelMapIterator : public DatasetBaseIterator {
   const std::function<Status(IteratorContext*)> init_func_;
   const ParallelMapIteratorFunction map_func_;
   // Used for coordination between the main thread and the runner thread.
-  mutex mu_;
+  const std::shared_ptr<mutex> mu_;
   // Used for coordination between the main thread and the runner thread. In
   // particular, the runner thread should only schedule new calls when the
   // number of in-flight calls is less than the user specified level of
   // parallelism and there are slots available in the `invocation_results_`
   // buffer.
-  condition_variable cond_var_;
+  const std::shared_ptr<condition_variable> cond_var_;
   // Identifies the maximum number of parallel calls.
-  std::atomic<int64> num_parallel_calls_;
+  const std::shared_ptr<model::SharedState> num_parallel_calls_;
   // Counts the number of outstanding calls.
-  int64 num_calls_ GUARDED_BY(mu_) = 0;
+  int64 num_calls_ GUARDED_BY(*mu_) = 0;
   std::unique_ptr<IteratorBase> input_impl_;
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
-      GUARDED_BY(mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(mu_);
-  bool cancelled_ GUARDED_BY(mu_) = false;
+      GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
 }  // namespace
-- 
GitLab


From 9a23e9251ecba026471ff77a5bbbc802a2889a10 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 1 Oct 2018 17:26:37 -0700
Subject: [PATCH 0193/1085] [tf.data] Adding `tf.data.Options()`,
 `tf.data.Dataset.options()`, and `tf.data.Dataset.with_options()` to make it
 possible to respectively represent, get, and set options, such as
 optimization configuration, of a tf.data input pipeline.

PiperOrigin-RevId: 215310764
---
 .../core/kernels/data/optimize_dataset_op.cc  |  16 +-
 .../optimization/hoist_random_uniform_test.py |  11 +-
 .../optimization/latency_all_edges_test.py    |   7 +-
 .../map_and_filter_fusion_test.py             |  27 +-
 .../optimization/map_parallelization_test.py  |   6 +-
 .../optimization/map_vectorization_test.py    |  14 +-
 .../optimization/model_dataset_op_test.py     |  20 +-
 .../optimization/noop_elimination_test.py     |   4 +-
 .../optimization/optimize_dataset_op_test.py  |  45 ++-
 .../data/experimental/ops/optimization.py     |  61 +---
 tensorflow/python/data/kernel_tests/BUILD     |  18 +-
 .../data/kernel_tests/dataset_ops_test.py     | 158 ++++++++++-
 tensorflow/python/data/ops/dataset_ops.py     | 268 +++++++++++++++++-
 .../golden/v1/tensorflow.data.-dataset.pbtxt  |   8 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   8 +
 .../golden/v1/tensorflow.data.-options.pbtxt  |  57 ++++
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   8 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   8 +
 ...rflow.data.experimental.-csv-dataset.pbtxt |   8 +
 ...ow.data.experimental.-random-dataset.pbtxt |   8 +
 ...rflow.data.experimental.-sql-dataset.pbtxt |   8 +
 .../tools/api/golden/v1/tensorflow.data.pbtxt |   4 +
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |   8 +
 ...ow.data.-fixed-length-record-dataset.pbtxt |   8 +
 .../golden/v2/tensorflow.data.-options.pbtxt  |  57 ++++
 .../tensorflow.data.-t-f-record-dataset.pbtxt |   8 +
 .../tensorflow.data.-text-line-dataset.pbtxt  |   8 +
 ...rflow.data.experimental.-csv-dataset.pbtxt |   8 +
 ...ow.data.experimental.-random-dataset.pbtxt |   8 +
 ...rflow.data.experimental.-sql-dataset.pbtxt |   8 +
 .../tools/api/golden/v2/tensorflow.data.pbtxt |   4 +
 31 files changed, 742 insertions(+), 147 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt

diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc
index d5b725eac9..1cb7caa738 100644
--- a/tensorflow/core/kernels/data/optimize_dataset_op.cc
+++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc
@@ -154,12 +154,8 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel {
           : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
-        IteratorContext::Params params;
-        params.env = ctx->env();
-        params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        IteratorContext::Params params = ctx->params();
         params.lib = dataset()->lib_;
-        params.allocator_getter = ctx->allocator_getter();
         return dataset()->optimized_input_->MakeIterator(
             IteratorContext(params), prefix(), &input_impl_);
       }
@@ -167,14 +163,10 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel {
       Status GetNextInternal(IteratorContext* ctx,
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
-        IteratorContext::Params params;
-        params.env = ctx->env();
-        params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        IteratorContext::Params params = ctx->params();
         params.lib = dataset()->lib_;
-        params.allocator_getter = ctx->allocator_getter();
-        IteratorContext iter_ctx(params);
-        return input_impl_->GetNext(&iter_ctx, out_tensors, end_of_sequence);
+        return input_impl_->GetNext(IteratorContext(params), out_tensors,
+                                    end_of_sequence);
       }
 
      protected:
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index 3cd9753665..81437c0aec 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -64,7 +64,9 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
         optimization.assert_next(
             ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function)
 
-    dataset = dataset.apply(optimization.optimize(["hoist_random_uniform"]))
+    options = dataset_ops.Options()
+    options.experimental_hoist_random_uniform = True
+    dataset = dataset.with_options(options)
     self._testDataset(dataset)
 
   def testAdditionalInputs(self):
@@ -77,9 +79,10 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
           [], minval=1, maxval=10, dtype=dtypes.float32, seed=42)
 
     dataset = dataset_ops.Dataset.range(5).apply(
-        optimization.assert_next(
-            ["Zip[0]", "Map"])).map(random_with_capture).apply(
-                optimization.optimize(["hoist_random_uniform"]))
+        optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture)
+    options = dataset_ops.Options()
+    options.experimental_hoist_random_uniform = True
+    dataset = dataset.with_options(options)
     self._testDataset(dataset)
 
   def _testDataset(self, dataset):
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
index 45623876ae..26fec0414e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
@@ -28,14 +28,15 @@ from tensorflow.python.platform import test
 class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
   def testLatencyStatsOptimization(self):
-
     stats_aggregator = stats_ops.StatsAggregator()
     dataset = dataset_ops.Dataset.from_tensors(1).apply(
         optimization.assert_next(
             ["LatencyStats", "Map", "LatencyStats", "Prefetch",
              "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply(
-                 stats_ops.set_stats_aggregator(stats_aggregator)).apply(
-                     optimization.optimize(["latency_all_edges"]))
+                 stats_ops.set_stats_aggregator(stats_aggregator))
+    options = dataset_ops.Options()
+    options.experimental_latency_all_edges = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     get_next = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index a439635716..7f8a4e6406 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -72,7 +72,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     for function in functions:
       dataset = dataset.map(function)
 
-    dataset = dataset.prefetch(0).apply(optimization.optimize(["map_fusion"]))
+    dataset = dataset.prefetch(0)
+    options = dataset_ops.Options()
+    options.experimental_map_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
     with self.cached_session() as sess:
@@ -124,9 +127,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testMapFilterFusion(self, function, predicate):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(
-            ["Map",
-             "FilterByLastComponent"])).map(function).filter(predicate).apply(
-                 optimization.optimize(["map_and_filter_fusion"]))
+            ["Map", "FilterByLastComponent"])).map(function).filter(predicate)
+    options = dataset_ops.Options()
+    options.experimental_map_and_filter_fusion = True
+    dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
 
   def _testMapAndFilter(self, dataset, function, predicate):
@@ -156,10 +160,11 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     # We are currently not supporting functions with additional inputs.
     dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Filter"])).map(function).filter(predicate).apply(
-                optimization.optimize(["map_and_filter_fusion"]))
-
+        optimization.assert_next(["Map",
+                                  "Filter"])).map(function).filter(predicate)
+    options = dataset_ops.Options()
+    options.experimental_map_and_filter_fusion = True
+    dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
 
   @staticmethod
@@ -197,8 +202,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     for predicate in predicates:
       dataset = dataset.filter(predicate)
 
-    dataset = dataset.prefetch(0).apply(
-        optimization.optimize(["filter_fusion"]))
+    dataset = dataset.prefetch(0)
+    options = dataset_ops.Options()
+    options.experimental_filter_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
     with self.cached_session() as sess:
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index 334d8e3778..ce9c9bc47b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -62,8 +62,10 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testMapParallelization(self, function, should_optimize):
     next_nodes = ["ParallelMap"] if should_optimize else ["Map"]
     dataset = dataset_ops.Dataset.range(5).apply(
-        optimization.assert_next(next_nodes)).map(function).apply(
-            optimization.optimize(["map_parallelization"]))
+        optimization.assert_next(next_nodes)).map(function)
+    options = dataset_ops.Options()
+    options.experimental_map_parallelization = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index d47492753e..32ebc49c40 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -69,10 +69,11 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
           map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size)
 
     unoptimized = _make_dataset([map_node_name, "Batch"])
-    optimized = _make_dataset(["Batch", map_node_name] if expect_optimized else
-                              [map_node_name, "Batch"]).apply(
-                                  optimization.optimize(["map_vectorization"]))
-
+    optimized = _make_dataset(["Batch", map_node_name]
+                              if expect_optimized else [map_node_name, "Batch"])
+    options = dataset_ops.Options()
+    options.experimental_map_vectorization = True
+    optimized = optimized.with_options(options)
     return unoptimized, optimized
 
   @parameterized.named_parameters(
@@ -179,7 +180,10 @@ class MapVectorizationBenchmark(test.Benchmark):
     unoptimized = input_dataset.map(map_fn).batch(batch_size)
     unoptimized_op = unoptimized.make_one_shot_iterator().get_next()
 
-    optimized = unoptimized.apply(optimization.optimize(["map_vectorization"]))
+    optimized = input_dataset.map(map_fn).batch(batch_size)
+    options = dataset_ops.Options()
+    options.experimental_map_vectorization = True
+    optimized = optimized.with_options(options)
     optimized_op = optimized.make_one_shot_iterator().get_next()
 
     unoptimized_time = self._run(
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index a9f2ce8c03..82516356df 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -37,7 +37,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
     dataset = dataset.map(math_ops.matmul)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -61,7 +63,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
                                                                1))).repeat()
     dataset = dataset.map(
         math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -89,7 +93,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
             math_ops.matmul,
             num_parallel_calls=optimization.AUTOTUNE,
             batch_size=batch_size))
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -116,7 +122,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
         lambda _: dataset,
         cycle_length=10,
         num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
@@ -161,7 +169,9 @@ class ModelDatasetTest(test_base.DatasetTestBase):
         lambda _: dataset, cycle_length=2)
 
     dataset = dataset.map(f3, num_parallel_calls=optimization.AUTOTUNE)
-    iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     deltas = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
index 092e0ff62a..fb0640fe9f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
@@ -40,7 +40,9 @@ class NoopEliminationTest(test_base.DatasetTestBase):
             ["FiniteRepeat", "FiniteSkip", "Prefetch", "Prefetch"]))
     dataset = dataset.repeat(some_tensor).skip(5).prefetch(0).take(-1).skip(
         0).repeat(1).prefetch(0)
-    dataset = dataset.apply(optimization.optimize(["noop_elimination"]))
+    options = dataset_ops.Options()
+    options.experimental_noop_elimination = True
+    dataset = dataset.with_options(options)
 
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index eb661796c0..760cd8cc4e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -33,23 +33,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationDefault(self):
     dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Batch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize())
-    iterator = dataset.make_one_shot_iterator()
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testOptimizationEmpty(self):
-    dataset = dataset_ops.Dataset.range(10).apply(
-        optimization.assert_next(
-            ["Map", "Batch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize([]))
-    iterator = dataset.make_one_shot_iterator()
+        optimization.assert_next(["Map",
+                                  "Batch"])).map(lambda x: x * x).batch(10)
+    iterator = dataset.with_options(
+        dataset_ops.Options()).make_one_shot_iterator()
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
@@ -60,8 +47,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
   def testOptimizationFusion(self):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(
-            ["MapAndBatch"])).map(lambda x: x * x).batch(10).apply(
-                optimization.optimize(["map_and_batch_fusion"]))
+            ["MapAndBatch"])).map(lambda x: x * x).batch(10)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -72,8 +61,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationStatefulFunction(self):
     dataset = dataset_ops.Dataset.range(10).map(
-        lambda _: random_ops.random_uniform([])).batch(10).apply(
-            optimization.optimize(["map_and_batch_fusion"]))
+        lambda _: random_ops.random_uniform([])).batch(10)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -82,8 +73,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationLargeInputFromTensor(self):
     input_t = array_ops.placeholder(dtypes.int32, (None, None, None))
-    dataset = dataset_ops.Dataset.from_tensors(input_t).apply(
-        optimization.optimize())
+    dataset = dataset_ops.Dataset.from_tensors(input_t)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
@@ -94,8 +87,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
   def testOptimizationLargeInputFromTensorSlices(self):
     input_t = array_ops.placeholder(dtypes.int32, (None, None, None, None))
-    dataset = dataset_ops.Dataset.from_tensor_slices(input_t).apply(
-        optimization.optimize())
+    dataset = dataset_ops.Dataset.from_tensor_slices(input_t)
+    options = dataset_ops.Options()
+    options.experimental_map_and_batch_fusion = True
+    dataset = dataset.with_options(options)
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
index 30348ede36..276dde8383 100644
--- a/tensorflow/python/data/experimental/ops/optimization.py
+++ b/tensorflow/python/data/experimental/ops/optimization.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
 
 # A constant that can be used to enable auto-tuning.
@@ -58,7 +57,7 @@ def model():
 
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _ModelDataset(dataset)
+    return dataset_ops._ModelDataset(dataset)  # pylint: disable=protected-access
 
   return _apply_fn
 
@@ -78,7 +77,7 @@ def optimize(optimizations=None):
 
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    return _OptimizeDataset(dataset, optimizations)
+    return dataset_ops._OptimizeDataset(dataset, optimizations)  # pylint: disable=protected-access
 
   return _apply_fn
 
@@ -113,59 +112,3 @@ class _AssertNextDataset(dataset_ops.UnaryDataset):
   def output_types(self):
     return self._input_dataset.output_types
 
-
-class _ModelDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and models performance."""
-
-  def __init__(self, input_dataset):
-    """See `optimize()` for details."""
-    super(_ModelDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.model_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-class _OptimizeDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` that acts as an identity, and applies optimizations."""
-
-  def __init__(self, input_dataset, optimizations):
-    """See `optimize()` for details."""
-    super(_OptimizeDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    if optimizations is None:
-      optimizations = []
-    self._optimizations = ops.convert_to_tensor(
-        optimizations, dtype=dtypes.string, name="optimizations")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.optimize_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._optimizations,
-        **dataset_ops.flat_structure(self))
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index cadfe7f9e0..bf76860aa4 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -115,8 +115,10 @@ tf_py_test(
     srcs = ["dataset_ops_test.py"],
     additional_deps = [
         ":test_base",
-        "//tensorflow/core:protos_all_py",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
@@ -172,20 +174,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "inputs_test",
-    size = "small",
-    srcs = ["inputs_test.py"],
-    additional_deps = [
-        ":test_base",
-        "@absl_py//absl/testing:parameterized",
-        "//third_party/py/numpy",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/ops:dataset_ops",
-    ],
-)
-
 tf_py_test(
     name = "interleave_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/dataset_ops_test.py b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
index f115f9d9c7..b9f8875b9f 100644
--- a/tensorflow/python/data/kernel_tests/dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
@@ -18,13 +18,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+import numpy as np
+
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.platform import test
 
 
-class DatasetOpsTest(test_base.DatasetTestBase):
+class DatasetOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testAsSerializedGraph(self):
     dataset = dataset_ops.Dataset.range(10)
@@ -33,6 +40,155 @@ class DatasetOpsTest(test_base.DatasetTestBase):
           sess.run(dataset._as_serialized_graph()))
       self.assertTrue(any([node.op != "RangeDataset" for node in graph.node]))
 
+  @staticmethod
+  def make_apply_fn(dataset):
+
+    def apply_fn(dataset):
+
+      def _apply_fn(dataset):
+        return dataset.cache()
+
+      return dataset.apply(_apply_fn)
+
+    return apply_fn
+
+  @staticmethod
+  def make_gen():
+
+    def gen():
+      yield 42
+
+    return gen
+
+  @staticmethod
+  def make_interleave_fn(dataset, num_parallel_calls=None):
+
+    def interleave_fn(dataset):
+      return dataset.interleave(
+          lambda x: dataset_ops.Dataset.range(0),
+          cycle_length=2,
+          num_parallel_calls=num_parallel_calls)
+
+    return interleave_fn
+
+  @parameterized.named_parameters(
+      ("FixedLengthRecord", readers.FixedLengthRecordDataset("", 42)),
+      ("FromGenerator",
+       dataset_ops.Dataset.from_generator(make_gen.__func__(), dtypes.int32),
+       1),
+      ("FromSparseTensorSlices",
+       dataset_ops.Dataset.from_sparse_tensor_slices(
+           sparse_tensor.SparseTensor(
+               indices=np.array([[0, 0], [1, 0], [2, 0]]),
+               values=np.array([0, 0, 0]),
+               dense_shape=np.array([3, 1])))),
+      ("FromTensors", dataset_ops.Dataset.from_tensors([42])),
+      ("FromTensorSlices", dataset_ops.Dataset.from_tensors([42])),
+      ("Range", dataset_ops.Dataset.range(10)),
+      ("TextLine", readers.TextLineDataset("")),
+      ("TFRecord", readers.TFRecordDataset(""), 1),
+  )
+  def testDatasetSourceInputs(self, dataset, num_inputs=0):
+    self.assertEqual(num_inputs, len(dataset._inputs()))
+
+  @parameterized.named_parameters(
+      ("Apply", make_apply_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Batch", lambda x: x.batch(10), dataset_ops.Dataset.range(0)),
+      ("Cache", lambda x: x.cache(), dataset_ops.Dataset.range(0)),
+      ("Filter", lambda x: x.filter(lambda x: True),
+       dataset_ops.Dataset.range(0)),
+      ("FlatMap", lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Interleave", make_interleave_fn.__func__(dataset_ops.Dataset.range(0)),
+       dataset_ops.Dataset.range(0)),
+      ("Map", lambda x: x.map(lambda x: x), dataset_ops.Dataset.range(0)),
+      ("PaddedBatch", lambda x: x.padded_batch(10, []),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelInterleave",
+       make_interleave_fn.__func__(dataset_ops.Dataset.range(0), 2),
+       dataset_ops.Dataset.range(0)),
+      ("ParallelMap", lambda x: x.map(lambda x: x, num_parallel_calls=2),
+       dataset_ops.Dataset.range(0)),
+      ("Repeat", lambda x: x.repeat(), dataset_ops.Dataset.range(0)),
+      ("Shuffle", lambda x: x.shuffle(10), dataset_ops.Dataset.range(0)),
+      ("Skip", lambda x: x.skip(1), dataset_ops.Dataset.range(0)),
+      ("Take", lambda x: x.take(1), dataset_ops.Dataset.range(0)),
+      ("Window", lambda x: x.window(10), dataset_ops.Dataset.range(0)),
+  )
+  def testUnaryTransformationInputs(self, dataset_fn, input_dataset):
+    self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
+
+  @parameterized.named_parameters(
+      ("Concatenate", lambda x, y: x.concatenate(y),
+       dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1)))
+  def testBinaryTransformationInputs(self, dataset_fn, input1, input2):
+    self.assertEqual([input1, input2], dataset_fn(input1, input2)._inputs())
+
+  @parameterized.named_parameters(
+      ("ZipOne", dataset_ops.Dataset.zip, (dataset_ops.Dataset.range(0))),
+      ("ZipNest", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0),
+        (dataset_ops.Dataset.range(1), dataset_ops.Dataset.range(2)))),
+      ("ZipTuple", dataset_ops.Dataset.zip,
+       (dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1))))
+  def testVariadicTransformationInputs(self, dataset_fn, input_datasets):
+    self.assertEqual(
+        nest.flatten(input_datasets),
+        dataset_fn(input_datasets)._inputs())
+
+  def testCollectInputs(self):
+    ds1 = dataset_ops.Dataset.range(0)
+    ds2 = ds1.concatenate(ds1)
+    ds3 = dataset_ops.Dataset.zip((ds2, ds1, ds2))
+
+    inputs = []
+    queue = [ds3]
+    while queue:
+      ds = queue[0]
+      queue = queue[1:]
+      queue.extend(ds._inputs())
+      inputs.append(ds)
+
+    self.assertEqual(5, inputs.count(ds1))
+    self.assertEqual(2, inputs.count(ds2))
+    self.assertEqual(1, inputs.count(ds3))
+
+  def testOptionsDefault(self):
+    ds = dataset_ops.Dataset.range(0)
+    self.assertEqual(dataset_ops.Options(), ds.options())
+
+  def testOptionsOnce(self):
+    options = dataset_ops.Options()
+    ds = dataset_ops.Dataset.range(0).with_options(options).cache()
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceSame(self):
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    ds = dataset_ops.Dataset.range(0).with_options(options).with_options(
+        options)
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceDifferent(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_filter_fusion = False
+    ds = dataset_ops.Dataset.range(0).with_options(options1).with_options(
+        options2)
+    self.assertTrue(ds.options().experimental_autotune)
+    self.assertFalse(ds.options().experimental_filter_fusion)
+
+  def testOptionsTwiceDifferentError(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_autotune = False
+    with self.assertRaisesRegexp(ValueError,
+                                 "Cannot merge incompatible values of option"):
+      dataset_ops.Dataset.range(0).with_options(options1).with_options(options2)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 3b9d3a639d..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -86,6 +86,18 @@ class Dataset(object):
 
     raise NotImplementedError("Dataset._inputs")
 
+  def options(self):
+    """Returns the options for this dataset.
+
+    Returns:
+      A `tf.data.Options` object representing the dataset options.
+    """
+    for input_dataset in self._inputs():
+      options = input_dataset.options()
+      if options is not None:
+        return options
+    return Options()
+
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -114,6 +126,13 @@ class Dataset(object):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
+    dataset = self
+    options = self.options()
+    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+    if static_optimizations:
+      dataset = _OptimizeDataset(dataset, static_optimizations)
+    if options.experimental_autotune:
+      dataset = _ModelDataset(dataset)
     if shared_name is None:
       shared_name = ""
     if compat.forward_compatible(2018, 8, 3):
@@ -123,11 +142,12 @@ class Dataset(object):
       iterator_resource = gen_dataset_ops.iterator(
           container="", shared_name=shared_name, **flat_structure(self))
     with ops.colocate_with(iterator_resource):
-      initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(),
-                                                  iterator_resource)
+      initializer = gen_dataset_ops.make_iterator(
+          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          iterator_resource)
     return iterator_ops.Iterator(iterator_resource, initializer,
-                                 self.output_types, self.output_shapes,
-                                 self.output_classes)
+                                 dataset.output_types, dataset.output_shapes,
+                                 dataset.output_classes)
 
   def __iter__(self):
     """Creates an `Iterator` for enumerating the elements of this dataset.
@@ -162,7 +182,14 @@ class Dataset(object):
     # a 0-argument function.
     @function.Defun(capture_by_value=True)
     def _make_dataset():
-      return self._as_variant_tensor()  # pylint: disable=protected-access
+      dataset = self
+      options = self.options()
+      static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+      if static_optimizations:
+        dataset = _OptimizeDataset(dataset, static_optimizations)
+      if options.experimental_autotune:
+        dataset = _ModelDataset(dataset)
+      return dataset._as_variant_tensor()  # pylint: disable=protected-access
 
     try:
       _make_dataset.add_to_graph(ops.get_default_graph())
@@ -1325,6 +1352,146 @@ class Dataset(object):
         output_shapes,
         output_classes)
 
+  def with_options(self, options):
+    """Returns a new `tf.data.Dataset` with the given options set.
+
+    The options are "global" in the sense they apply to the entire input
+    pipeline in which the `with_options` transformation is used. If options are
+    set multiple times, they are merged if possible (see
+    `tf.data.Options.merge()` for details).
+
+    Args:
+      options: A `tf.data.Options` that identifies the options the use.
+
+    Returns:
+      Dataset: A `Dataset` with the given options.
+
+    Raises:
+      ValueError: if options are set more than once
+    """
+    return _OptionsDataset(self, options)
+
+
+@tf_export("data.Options")
+class Options(object):
+  """Represents options for tf.data.Dataset.
+
+  An `Options` object can be for instance used to control which static
+  optimizations to apply or whether to use performance modeling to dynamically
+  tune the parallelism of operations such as `tf.data.Dataset.map` or
+  `tf.data.Dataset.interleave`.
+  """
+  for _name, _ty, _docstring in [
+      ("experimental_autotune", bool,
+       "Whether to dynamically adjust the values of tunable parameters (e.g. "
+       "degrees of parallelism)."),
+      ("experimental_filter_fusion", bool,
+       "Whether to fuse filter transformations."),
+      ("experimental_hoist_random_uniform", bool,
+       "Whether to hoist `tf.random_uniform()` ops out of map transformations."
+      ),
+      ("experimental_latency_all_edges", bool,
+       "Whether to add latency measurements on all edges."),
+      ("experimental_map_and_batch_fusion", bool,
+       "Whether to fuse map and batch transformations."),
+      ("experimental_map_and_filter_fusion", bool,
+       "Whether to fuse map and filter transformations."),
+      ("experimental_map_fusion", bool, "Whether to fuse map transformations."),
+      ("experimental_map_parallelization", bool,
+       "Whether to parallelize stateless map transformations."),
+      ("experimental_map_vectorization", bool,
+       "Whether to vectorize map transformations."),
+      ("experimental_noop_elimination", bool,
+       "Whether to eliminate no-op transformations."),
+      ("experimental_shuffle_and_repeat_fusion", bool,
+       "Whether to fuse shuffle and repeat transformations."),
+  ]:
+
+    def _make_getter(name):  # pylint: disable=no-self-argument
+
+      def getter(self):
+        return getattr(self, "_" + name)
+
+      return getter
+
+    def _make_setter(name, ty):  # pylint: disable=no-self-argument
+
+      def setter(self, value):
+        if not isinstance(value, ty):
+          raise TypeError(
+              "Attempting to set the option %s to incompatible value: %r" %
+              (name, value))
+        setattr(self, "_" + name, value)
+
+      return setter
+
+    vars()["_" + _name] = None
+    vars()[_name] = property(
+        _make_getter(_name), _make_setter(_name, _ty), None, _docstring)
+
+  def __init__(self):
+    pass
+
+  def __eq__(self, other):
+    if isinstance(other, self.__class__):
+      return self.__dict__ == other.__dict__
+    else:
+      return False
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def _static_optimizations(self):
+    """Produces the list of enabled static optimizations."""
+    experimental_optimizations = [
+        "filter_fusion", "hoist_random_uniform", "latency_all_edges",
+        "map_and_batch_fusion", "map_and_filter_fusion", "map_fusion",
+        "map_parallelization", "map_vectorization", "noop_elimination",
+        "shuffle_and_repeat_fusion"
+    ]
+    result = []
+    for exp_opt in experimental_optimizations:
+      if getattr(self, "experimental_" + exp_opt):
+        result.append(exp_opt)
+    return result
+
+  def merge(self, options):
+    """Merges itself with the given `tf.data.Options`.
+
+    The given `tf.data.Options` can be merged as long as there does not exist an
+    attribute that is set to different values in `self` and `options`.
+
+    Args:
+      options: a `tf.data.Options` to merge with
+
+    Raises:
+      ValueError: if the given `tf.data.Options` cannot be merged
+
+    Returns:
+      New `tf.data.Options()` object which is the result of merging self with
+      the input `tf.data.Options`.
+    """
+    result = Options()
+    for other in [self, options]:
+      for name in [
+          "experimental_autotune", "experimental_filter_fusion",
+          "experimental_hoist_random_uniform", "experimental_latency_all_edges",
+          "experimental_map_and_batch_fusion",
+          "experimental_map_and_filter_fusion", "experimental_map_fusion",
+          "experimental_map_parallelization", "experimental_map_vectorization",
+          "experimental_noop_elimination",
+          "experimental_shuffle_and_repeat_fusion"
+      ]:
+        this = getattr(result, name)
+        that = getattr(other, name)
+        if that is not None:
+          if this is None:
+            setattr(result, name, that)
+          elif this != that:
+            raise ValueError(
+                "Cannot merge incompatible values of option: %s" % (name))
+    return result
+
 
 class DatasetSource(Dataset):
   """Abstract class representing a dataset with no inputs."""
@@ -1664,6 +1831,9 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
+          if t.options() is not None:  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with options. These "
+                          "options will not be applied to the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
@@ -2703,3 +2873,91 @@ class WindowDataset(UnaryDataset):
   @property
   def output_types(self):
     return self._output_types
+
+
+class _OptionsDataset(UnaryDataset):
+  """An identity `Dataset` that stores options."""
+
+  def __init__(self, input_dataset, options):
+    super(_OptionsDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._options = input_dataset.options()
+    if self._options:
+      self._options = self._options.merge(options)
+    else:
+      self._options = options
+
+  def _as_variant_tensor(self):
+    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+
+  def options(self):
+    return self._options
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _ModelDataset(UnaryDataset):
+  """A `Dataset` that acts as an identity, and models performance."""
+
+  def __init__(self, input_dataset):
+    """See `optimize()` for details."""
+    super(_ModelDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.model_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        **flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+
+class _OptimizeDataset(UnaryDataset):
+  """A `Dataset` that acts as an identity, and applies optimizations."""
+
+  def __init__(self, input_dataset, optimizations):
+    """See `optimize()` for details."""
+    super(_OptimizeDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    if optimizations is None:
+      optimizations = []
+    self._optimizations = ops.convert_to_tensor(
+        optimizations, dtype=dtypes.string, name="optimizations")
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.optimize_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._optimizations,
+        **flat_structure(self))
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
index 825afb622f..8b7f63e43e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt
@@ -78,6 +78,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -118,6 +122,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
index cdad5f6360..a7bfa82c65 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
new file mode 100644
index 0000000000..d15dccc173
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
@@ -0,0 +1,57 @@
+path: "tensorflow.data.Options"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Options\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "experimental_autotune"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_hoist_random_uniform"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_latency_all_edges"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_batch_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_parallelization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_vectorization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_noop_elimination"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_shuffle_and_repeat_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
index df41bff1b5..7b7a9ebaf0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
index 028bcc2ce9..2817f900e1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
index 0c0405ee02..2520e28a3c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
index bce0be4b17..1dd53b1eab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
index 8aeae92d96..8fdd9dc52e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
index e205157523..3023276a1d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "Iterator"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Options"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordDataset"
     mtype: "<class \'abc.ABCMeta\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index 825afb622f..8b7f63e43e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -78,6 +78,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -118,6 +122,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
index cdad5f6360..a7bfa82c65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
new file mode 100644
index 0000000000..d15dccc173
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
@@ -0,0 +1,57 @@
+path: "tensorflow.data.Options"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.Options\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "experimental_autotune"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_hoist_random_uniform"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_latency_all_edges"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_batch_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_and_filter_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_parallelization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_map_vectorization"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_noop_elimination"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "experimental_shuffle_and_repeat_fusion"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "merge"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
index df41bff1b5..7b7a9ebaf0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
index 028bcc2ce9..2817f900e1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt
@@ -79,6 +79,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -119,6 +123,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
index 0c0405ee02..2520e28a3c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-csv-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
index bce0be4b17..1dd53b1eab 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-random-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
index 8aeae92d96..8fdd9dc52e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sql-dataset.pbtxt
@@ -80,6 +80,10 @@ tf_class {
     name: "map"
     argspec: "args=[\'self\', \'map_func\', \'num_parallel_calls\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "options"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "padded_batch"
     argspec: "args=[\'self\', \'batch_size\', \'padded_shapes\', \'padding_values\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
@@ -120,6 +124,10 @@ tf_class {
     name: "window"
     argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], "
   }
+  member_method {
+    name: "with_options"
+    argspec: "args=[\'self\', \'options\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "zip"
     argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
index e205157523..3023276a1d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "Iterator"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Options"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordDataset"
     mtype: "<class \'abc.ABCMeta\'>"
-- 
GitLab


From bacf1949f92bb1daa9e5c8a31cc6924e532551e9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 17:33:55 -0700
Subject: [PATCH 0194/1085] [XLA] Add kAllToAll and kCollectivePermute to
 EffectiveOperandPrecisionIsOutputPrecision list.

PiperOrigin-RevId: 215311766
---
 tensorflow/compiler/xla/service/bfloat16_support.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc
index 23645346e6..5b48f10505 100644
--- a/tensorflow/compiler/xla/service/bfloat16_support.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_support.cc
@@ -78,8 +78,10 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision(
     const HloInstruction& hlo, int64 operand_index) {
   switch (hlo.opcode()) {
     case HloOpcode::kAbs:
+    case HloOpcode::kAllToAll:
     case HloOpcode::kBroadcast:
     case HloOpcode::kClamp:
+    case HloOpcode::kCollectivePermute:
     case HloOpcode::kConcatenate:
     case HloOpcode::kConvert:
     case HloOpcode::kCopy:
-- 
GitLab


From beede8525be5386451bf0098992c37416d1864db Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 1 Oct 2018 17:45:22 -0700
Subject: [PATCH 0195/1085] Make Keras/TPU more robust to closed TF sessions.

PiperOrigin-RevId: 215313156
---
 .../contrib/tpu/python/tpu/keras_support.py   | 278 ++++++++++--------
 1 file changed, 155 insertions(+), 123 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 696656e840..a3a7fd8bb0 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -46,6 +46,7 @@ from __future__ import print_function
 
 import abc
 import collections
+import contextlib
 import re
 import sys
 import time
@@ -94,21 +95,56 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 
 
+# TODO(b/114775106): temporary shim to optionally initialize the TPU
+# This increases the odds our session is initialized, but shouldn't be needed.
+def _maybe_initialize_tpu(session):
+  """Initialize the TPU if it has not already been initialized."""
+  try:
+
+    def test_op():
+      return constant_op.constant(1) + constant_op.constant(1)
+
+    session.run(tpu.rewrite(test_op))
+  except errors.FailedPreconditionError as _:
+    session.run(tpu.initialize_system())
+
+
+@contextlib.contextmanager
+def _tpu_session_context():
+  """Initialize the TPU and cleans cache entries for bad sessions."""
+  try:
+    _maybe_initialize_tpu(K.get_session())
+    yield
+  except (errors.FailedPreconditionError, errors.AbortedError) as e:
+    K.clear_session()
+    raise Exception("""
+An error occurred connecting or initializing your TPU.
+
+The session has been reset. re-run keras_to_tpu_model to create a new session.
+""" + e)
+
+
 def setup_tpu_session(cluster_resolver):
   """Construct or return a `tf.Session` connected to the given cluster."""
   master = cluster_resolver.master()
 
   # Use the existing session if we're already connected to this TPU
-  if (K.get_session()._target == master and
-      getattr(K.get_session(), '_tpu_initialized', None)):
-    return
+  # N.B K.get_session() is a non-trivial operation, and may fail if the remote
+  # session has been reset.
+  try:
+    default_session = K.get_session()
+    if (default_session._target == master and
+        getattr(default_session, '_tpu_initialized', None)):
+      return
+  except errors.AbortedError as _:
+    # We lost the remote session and need to re-initialize.
+    logging.warning('Lost remote session: creating a new session.')
 
   cluster_spec = cluster_resolver.cluster_spec()
   config = config_pb2.ConfigProto(isolate_session_state=True)
   if cluster_spec:
     config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
 
-  logging.info('Initialize')
   tpu_session = tf_session.Session(target=master, config=config)
   tpu_session.run(tpu.initialize_system())
   tpu_session._tpu_initialized = True
@@ -1391,97 +1427,74 @@ class KerasTPUModel(models.Model):
       raise EnvironmentError('KerasTPUModel currently does not support eager '
                              'mode.')
 
-    assert not self._numpy_to_infeed_manager_list  # Ensure empty.
-
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      with ops.device('/job:%s/device:CPU:0' %
-                      self._tpu_assignment.worker_name):
-        dataset = x()
-        if steps_per_epoch is None:
-          raise ValueError('When using tf.data as input to a model, you '
-                           'should specify the steps_per_epoch argument.')
-        if y is not None:
-          raise ValueError('When using tf.data as input to a model, y must be '
-                           'None')
-        infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+    with _tpu_session_context():
+      assert not self._numpy_to_infeed_manager_list  # Ensure empty.
+
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        with ops.device(
+            '/job:%s/device:CPU:0' % self._tpu_assignment.worker_name):
+          dataset = x()
+          if steps_per_epoch is None:
+            raise ValueError('When using tf.data as input to a model, you '
+                             'should specify the steps_per_epoch argument.')
+          if y is not None:
+            raise ValueError('When using tf.data as input to a model, y must '
+                             'be None')
+          infeed_manager = TPUDatasetInfeedManager(
+              dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+          # Use dummy numpy inputs for the rest of Keras' shape checking. We
+          # intercept them when building the model.
+          x = infeed_manager.dummy_x
+          y = infeed_manager.dummy_y
+          infeed_managers.append((x, infeed_manager))
+
+      if isinstance(validation_data, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(validation_data):
+        dataset = validation_data()
+        if validation_steps is None:
+          raise ValueError('When using tf.data as validation for a model, you '
+                           'should specify the validation_steps argument.')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
-        x = infeed_manager.dummy_x
-        y = infeed_manager.dummy_y
-        infeed_managers.append((x, infeed_manager))
+        val_x = infeed_manager.dummy_x
+        val_y = infeed_manager.dummy_y
+        infeed_managers.append((val_x, infeed_manager))
+        validation_data = (val_x, val_y)
 
-    if isinstance(validation_data, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(validation_data):
-      dataset = validation_data()
-      if validation_steps is None:
-        raise ValueError('When using tf.data as validation for a model, you '
-                         'should specify the validation_steps argument.')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      val_x = infeed_manager.dummy_x
-      val_y = infeed_manager.dummy_y
-      infeed_managers.append((val_x, infeed_manager))
-      validation_data = (val_x, val_y)
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      if not kwargs.get('_pipeline', True):
-        logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
-                     kwargs['_pipeline'])
-        kwargs.pop('_pipeline')
-        return super(KerasTPUModel, self).fit(
-            x,
-            y,
-            batch_size,
-            epochs,
-            verbose,
-            callbacks,
-            validation_split,
-            validation_data,
-            shuffle,
-            class_weight,
-            sample_weight,
-            initial_epoch,
-            steps_per_epoch,
-            validation_steps,
-            **kwargs)
-      return self._pipeline_fit(
-          x,
-          y,
-          batch_size,
-          epochs,
-          verbose,
-          callbacks,
-          validation_split,
-          validation_data,
-          shuffle,
-          class_weight,
-          sample_weight,
-          initial_epoch,
-          steps_per_epoch,
-          validation_steps,
-          **kwargs)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        if not kwargs.get('_pipeline', True):
+          logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
+                       kwargs['_pipeline'])
+          kwargs.pop('_pipeline')
+          return super(KerasTPUModel, self).fit(
+              x, y, batch_size, epochs, verbose, callbacks, validation_split,
+              validation_data, shuffle, class_weight, sample_weight,
+              initial_epoch, steps_per_epoch, validation_steps, **kwargs)
+        return self._pipeline_fit(x, y, batch_size, epochs, verbose, callbacks,
+                                  validation_split, validation_data, shuffle,
+                                  class_weight, sample_weight, initial_epoch,
+                                  steps_per_epoch, validation_steps, **kwargs)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def evaluate(self,
                x=None,
@@ -1492,37 +1505,38 @@ class KerasTPUModel(models.Model):
                steps=None):
     assert not self._numpy_to_infeed_manager_list  # Ensure empty.
 
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      dataset = x()
-      if steps is None:
-        raise ValueError('When using tf.data as input to a model, you '
-                         'should specify the steps argument.')
-      if y is not None:
-        raise ValueError('When using tf.data as input to a model, y must be '
-                         'None')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      x = infeed_manager.dummy_x
-      y = infeed_manager.dummy_y
-      infeed_managers.append((x, infeed_manager))
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
-                                                 sample_weight, steps)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+    with _tpu_session_context():
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        dataset = x()
+        if steps is None:
+          raise ValueError('When using tf.data as input to a model, you '
+                           'should specify the steps argument.')
+        if y is not None:
+          raise ValueError('When using tf.data as input to a model, y must be '
+                           'None')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
+        # Use dummy numpy inputs for the rest of Keras' shape checking. We
+        # intercept them when building the model.
+        x = infeed_manager.dummy_x
+        y = infeed_manager.dummy_y
+        infeed_managers.append((x, infeed_manager))
+
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
+                                                   sample_weight, steps)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def _pipeline_fit(self, x, y, batch_size, epochs, verbose, callbacks,
                     validation_split, validation_data, shuffle, class_weight,
@@ -1910,6 +1924,24 @@ class KerasTPUModel(models.Model):
 
     return val_x, val_y, val_sample_weights
 
+  def predict(self,
+              x,
+              batch_size=None,
+              verbose=0,
+              steps=None,
+              max_queue_size=10,
+              workers=1,
+              use_multiprocessing=False):
+    with _tpu_session_context():
+      return super(KerasTPUModel, self).predict(
+          x,
+          batch_size=batch_size,
+          verbose=verbose,
+          steps=steps,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing)
+
   @property
   def optimizer(self):
     if self._tpu_model:
-- 
GitLab


From 11437d4b40e36ea7e599084dc89bdbba18d2f0ce Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@gadde.mtv.corp.google.com>
Date: Mon, 1 Oct 2018 18:03:33 -0700
Subject: [PATCH 0196/1085] Updating TF release version to 1.12

---
 tensorflow/core/public/version.h             | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel     | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-mkl | 2 +-
 tensorflow/tools/pip_package/setup.py        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index b043a69431..07eeeb4f03 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 11
+#define TF_MINOR_VERSION 12
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX "-rc0"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 6f8e91fccf..a3893a2713 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -78,7 +78,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.12 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 69a117fda6..7f9b55b455 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -100,7 +100,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.12 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index e433e9ebb2..4db64ee1c1 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -3,7 +3,7 @@ FROM ubuntu:16.04
 LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>"
 
 # These parameters can be overridden by parameterized_docker_build.sh
-ARG TF_BUILD_VERSION=r1.11
+ARG TF_BUILD_VERSION=r1.12
 ARG PYTHON="python"
 ARG PYTHON3_DEV=""
 ARG WHL_DIR="/tmp/pip"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d864a7a039..3632ee2076 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.11.0-rc1'
+_VERSION = '1.12.0-rc0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 337ce142e8851816a0fa6dccd4d3a93a5e9d2c53 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@gadde.mtv.corp.google.com>
Date: Mon, 1 Oct 2018 18:03:33 -0700
Subject: [PATCH 0197/1085] Updating TF release version to 1.12

---
 tensorflow/core/public/version.h             | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel     | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-mkl | 2 +-
 tensorflow/tools/pip_package/setup.py        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index b043a69431..07eeeb4f03 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 11
+#define TF_MINOR_VERSION 12
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX "-rc0"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 6f8e91fccf..a3893a2713 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -78,7 +78,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.12 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 69a117fda6..7f9b55b455 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -100,7 +100,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.12 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index e433e9ebb2..4db64ee1c1 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -3,7 +3,7 @@ FROM ubuntu:16.04
 LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>"
 
 # These parameters can be overridden by parameterized_docker_build.sh
-ARG TF_BUILD_VERSION=r1.11
+ARG TF_BUILD_VERSION=r1.12
 ARG PYTHON="python"
 ARG PYTHON3_DEV=""
 ARG WHL_DIR="/tmp/pip"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d864a7a039..3632ee2076 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.11.0-rc1'
+_VERSION = '1.12.0-rc0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 991f06fd50fc73285ce415d57f720994c2b2e861 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Mon, 1 Oct 2018 19:42:12 -0700
Subject: [PATCH 0198/1085] [XLA] Migrate from gtl::FlatSet to
 absl::flat_hash_set

PiperOrigin-RevId: 215324035
---
 tensorflow/compiler/jit/BUILD                 |  2 +
 tensorflow/compiler/jit/deadness_analysis.cc  | 10 ++--
 .../jit/encapsulate_subgraphs_pass.cc         |  7 +--
 .../jit/encapsulate_xla_computations_pass.cc  | 10 ++--
 .../compiler/jit/mark_for_compilation_pass.cc |  6 +--
 .../compiler/jit/partially_decluster_pass.cc  |  7 +--
 .../jit/resource_operation_safety_analysis.cc |  4 +-
 tensorflow/compiler/tests/BUILD               |  1 +
 tensorflow/compiler/tests/randomized_tests.cc | 14 +++---
 tensorflow/compiler/xla/client/BUILD          |  1 +
 tensorflow/compiler/xla/client/xla_builder.cc |  4 +-
 tensorflow/compiler/xla/client/xla_builder.h  |  4 +-
 tensorflow/compiler/xla/service/BUILD         | 27 +++++++++++
 .../xla/service/bfloat16_propagation.cc       |  9 ++--
 .../xla/service/bfloat16_propagation.h        | 11 +++--
 .../compiler/xla/service/buffer_assignment.cc | 48 ++++++++++---------
 .../compiler/xla/service/buffer_assignment.h  | 22 ++++-----
 .../compiler/xla/service/buffer_liveness.h    |  4 +-
 .../xla/service/buffer_value_containers.h     |  4 +-
 tensorflow/compiler/xla/service/call_graph.cc |  9 ++--
 tensorflow/compiler/xla/service/call_graph.h  | 10 ++--
 .../compiler/xla/service/copy_insertion.cc    |  6 +--
 tensorflow/compiler/xla/service/cpu/BUILD     |  1 +
 .../compiler/xla/service/cpu/ir_emitter.cc    |  8 ++--
 .../xla/service/cpu/tests/cpu_noalias_test.cc |  2 +-
 tensorflow/compiler/xla/service/gpu/BUILD     |  3 ++
 .../xla/service/gpu/gpu_copy_insertion.cc     |  2 +-
 .../xla/service/gpu/instruction_fusion.cc     |  5 +-
 .../xla/service/gpu/multi_output_fusion.cc    |  6 +--
 .../compiler/xla/service/heap_simulator.cc    | 13 ++---
 .../compiler/xla/service/heap_simulator.h     |  6 +--
 .../xla/service/hlo_alias_analysis.cc         |  9 ++--
 tensorflow/compiler/xla/service/hlo_buffer.cc |  2 +-
 .../compiler/xla/service/hlo_computation.cc   | 11 ++---
 .../compiler/xla/service/hlo_computation.h    |  2 +-
 tensorflow/compiler/xla/service/hlo_cse.cc    |  6 +--
 .../xla/service/hlo_dataflow_analysis.cc      |  9 ++--
 .../compiler/xla/service/hlo_domain_map.cc    |  3 +-
 .../compiler/xla/service/hlo_domain_map.h     |  4 +-
 .../xla/service/hlo_domain_metadata.h         |  8 ++--
 .../compiler/xla/service/hlo_instruction.cc   |  4 +-
 .../xla/service/hlo_memory_scheduler.cc       |  7 +--
 tensorflow/compiler/xla/service/hlo_module.cc |  9 ++--
 .../xla/service/hlo_module_group_util.cc      |  6 +--
 .../compiler/xla/service/hlo_pass_pipeline.cc |  6 +--
 .../xla/service/hlo_rematerialization.cc      |  3 +-
 .../xla/service/hlo_rematerialization.h       |  3 +-
 .../compiler/xla/service/hlo_schedule.cc      |  5 +-
 tensorflow/compiler/xla/service/hlo_value.cc  |  4 +-
 .../xla/service/indexed_array_analysis.cc     |  2 +-
 .../compiler/xla/service/layout_assignment.h  |  7 ++-
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 +
 .../xla/service/llvm_ir/alias_analysis.cc     |  6 +--
 .../xla/service/llvm_ir/alias_analysis.h      |  1 -
 .../xla/service/multi_output_fusion.cc        |  6 +--
 .../compiler/xla/service/name_uniquer.h       |  4 +-
 .../compiler/xla/service/shape_inference.cc   |  4 +-
 .../compiler/xla/service/shaped_buffer.cc     |  4 +-
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 .../while_loop_invariant_code_motion.cc       |  8 ++--
 .../xla/service/while_loop_simplifier.cc      |  3 +-
 tensorflow/compiler/xla/tests/BUILD           |  2 +-
 .../compiler/xla/tests/test_utils_test.cc     |  5 +-
 63 files changed, 235 insertions(+), 186 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index f20270931f..661b444a42 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -325,6 +325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
@@ -407,6 +408,7 @@ cc_library(
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index e63d4b7792..e0b9932d80 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -16,11 +16,11 @@ limitations under the License.
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 // ALGORITHM OVERVIEW
@@ -298,7 +298,7 @@ class SymbolPredicate : public Predicate {
 
 template <typename FunctionTy>
 /*static*/ void Predicate::Visit(Predicate* p, const FunctionTy& func) {
-  gtl::FlatSet<Predicate*> visited;
+  absl::flat_hash_set<Predicate*> visited;
   std::vector<Predicate*> stack;
 
   stack.push_back(p);
@@ -467,7 +467,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
       is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr;
   Predicate::Kind other_pred_kind =
       is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd;
-  gtl::FlatSet<Predicate*> simplified_ops_set;
+  absl::flat_hash_set<Predicate*> simplified_ops_set;
   std::vector<Predicate*> simplified_ops;
   for (Predicate* op : operands) {
     // Simplify A&A => A and  A|A => A.
@@ -492,7 +492,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   }
 
   // Simplify "A&~A=>False" and "A|~A=>True".
-  gtl::FlatSet<Predicate*> negated_ops;
+  absl::flat_hash_set<Predicate*> negated_ops;
   for (Predicate* op : simplified_ops) {
     if (op->kind() == Predicate::Kind::kNot) {
       negated_ops.insert(dynamic_cast<NotPredicate&>(*op).operand());
@@ -512,7 +512,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   //
   // First find any predicates contained in all subops.
   std::vector<Predicate*> common_inner_operands;
-  gtl::FlatSet<Predicate*> common_inner_operands_set;
+  absl::flat_hash_set<Predicate*> common_inner_operands_set;
   for (Predicate* op : simplified_ops) {
     if (op->kind() != other_pred_kind) {
       common_inner_operands.clear();
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index d165341f21..da27f837e8 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/public/session_options.h"
@@ -78,7 +78,8 @@ void SortControlInputs(GraphDef* gdef) {
 namespace {
 
 bool AreAllParentsGuaranteedConst(
-    const Node& n, const gtl::FlatSet<const Node*>& runtime_const_nodes) {
+    const Node& n,
+    const absl::flat_hash_set<const Node*>& runtime_const_nodes) {
   if (n.type_string() == "GuaranteeConst") {
     // If the current node is itself a cast-to-const, no need
     // to look at the incoming edges.
@@ -101,7 +102,7 @@ bool AreAllParentsGuaranteedConst(
 void MarkGuaranteedConstants(
     const Graph& graph,
     const std::vector<std::pair<const Node*, Node*>>& src_arg_pairs) {
-  gtl::FlatSet<const Node*> guaranteed_const_nodes;
+  absl::flat_hash_set<const Node*> guaranteed_const_nodes;
   std::vector<const Node*> srcs;
   srcs.reserve(src_arg_pairs.size());
   for (const auto& src_arg : src_arg_pairs) {
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 755c364c62..2ce6fa73fc 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -15,13 +15,13 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -62,7 +62,7 @@ DataType EdgeType(const Edge* edge) {
 }
 
 // Adds the control inputs of `node` to `*deps`.
-void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlInputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.in_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->src());
@@ -71,7 +71,7 @@ void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
 }
 
 // Adds the control outputs of `node` to `*deps`.
-void AddControlOutputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlOutputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.out_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->dst());
@@ -246,7 +246,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Data and control inputs to the new XlaLaunch node.
     std::vector<std::pair<Node*, int>> data_inputs(num_inputs);
-    gtl::FlatSet<Node*> control_inputs;
+    absl::flat_hash_set<Node*> control_inputs;
     DataTypeVector arg_types(num_args);
 
     AddControlInputs(*launch, &control_inputs);
@@ -266,7 +266,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Outputs.
     const int num_outputs = launch->output_types().size();
-    gtl::FlatSet<Node*> control_outputs;
+    absl::flat_hash_set<Node*> control_outputs;
     std::vector<std::vector<std::pair<Node*, int>>> data_outputs(num_outputs);
     DataTypeVector output_types(num_outputs);
 
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 133d982360..4f0c370e65 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -42,7 +43,6 @@ limitations under the License.
 #include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/public/version.h"
 
@@ -371,7 +371,7 @@ bool IsXlaFusable(const NodeDef& node) {
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    OrderedNodeSet* candidates, gtl::FlatSet<Node*>* isolated_nodes) {
+    OrderedNodeSet* candidates, absl::flat_hash_set<Node*>* isolated_nodes) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -849,7 +849,7 @@ Status MarkForCompilationPass::RunImpl(
   Graph* graph = options.graph->get();
 
   OrderedNodeSet compilation_candidates;
-  gtl::FlatSet<Node*> isolated_nodes;
+  absl::flat_hash_set<Node*> isolated_nodes;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc
index 10fc9e85d9..b1f9e9088f 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass.cc
@@ -15,17 +15,18 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/partially_decluster_pass.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace tensorflow {
 namespace {
-Status FindNodesToDecluster(const Graph& graph, gtl::FlatSet<Node*>* result,
+Status FindNodesToDecluster(const Graph& graph,
+                            absl::flat_hash_set<Node*>* result,
                             absl::Span<Node* const> post_order) {
   // Find nodes that have at least one user outside their cluster that expects
   // hostmem output.  These nodes should be cloned to outside the cluster to
@@ -171,7 +172,7 @@ Status PartiallyDeclusterToRemoveDeviceToHostCopies(Graph* graph) {
   GetPostOrder(*graph, &post_order, /*stable_comparator=*/NodeComparatorName(),
                /*edge_filter=*/NotBackedge);
 
-  gtl::FlatSet<Node*> nodes_to_partially_decluster;
+  absl::flat_hash_set<Node*> nodes_to_partially_decluster;
   TF_RETURN_IF_ERROR(
       FindNodesToDecluster(*graph, &nodes_to_partially_decluster, post_order));
 
diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
index 657bb409db..e039d46ec8 100644
--- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
+++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
@@ -82,6 +82,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/resource_operation_safety_analysis.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -89,7 +90,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/util/ptr_util.h"
 
@@ -176,7 +176,7 @@ string ResourceOpToString(const ResourceOp& resource_op) {
 // point.
 class ResourceOpSet {
  private:
-  using Impl = gtl::FlatSet<ResourceOp>;
+  using Impl = absl::flat_hash_set<ResourceOp>;
 
  public:
   ResourceOpSet() = default;
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 3cf74fa788..822fedf121 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1105,6 +1105,7 @@ cc_library(
         "//tensorflow/core:test",
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index bddda6f302..7a96f4c25c 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -45,6 +45,7 @@ limitations under the License.
 #include <random>
 #include <unordered_map>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/jit/defs.h"
@@ -63,7 +64,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -457,7 +457,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
   Tensor tensor(dtype, TensorShape(shape));
   switch (dtype) {
     case DT_FLOAT: {
-      gtl::FlatSet<float> already_generated;
+      absl::flat_hash_set<float> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<float>(&tensor, [&](int i) -> float {
         float generated;
@@ -470,7 +470,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_DOUBLE: {
-      gtl::FlatSet<double> already_generated;
+      absl::flat_hash_set<double> already_generated;
       std::uniform_real_distribution<double> distribution(-1.0, 1.0);
       test::FillFn<double>(&tensor, [&](int i) -> double {
         double generated;
@@ -483,7 +483,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_COMPLEX64: {
-      gtl::FlatSet<std::pair<float, float>> already_generated;
+      absl::flat_hash_set<std::pair<float, float>> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<complex64>(&tensor, [&](int i) {
         complex64 generated;
@@ -500,7 +500,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT32: {
-      gtl::FlatSet<int32> already_generated;
+      absl::flat_hash_set<int32> already_generated;
       std::uniform_int_distribution<int32> distribution(-(1 << 20), 1 << 20);
       test::FillFn<int32>(&tensor, [&](int i) -> int32 {
         int32 generated;
@@ -513,7 +513,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT64: {
-      gtl::FlatSet<int64> already_generated;
+      absl::flat_hash_set<int64> already_generated;
       std::uniform_int_distribution<int64> distribution(-(1LL << 40),
                                                         1LL << 40);
       test::FillFn<int64>(&tensor, [&](int i) -> int64 {
@@ -527,7 +527,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_BOOL: {
-      gtl::FlatSet<bool> already_generated;
+      absl::flat_hash_set<bool> already_generated;
       std::bernoulli_distribution distribution;
       test::FillFn<bool>(&tensor, [&](int i) -> bool {
         bool generated;
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index 1191cff109..dc097f3696 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -221,6 +221,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 5277de6a85..e0ec91dba1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/mutex.h"
 
 namespace xla {
@@ -2290,7 +2290,7 @@ StatusOr<XlaComputation> XlaBuilder::BuildConstantSubGraph(
   // also a valid dependency order). The related ops will be added to the
   // subgraph in the same order.
   std::set<int64> related_ops;
-  tensorflow::gtl::FlatSet<int64> related_calls;  // Related computations.
+  absl::flat_hash_set<int64> related_calls;  // Related computations.
   std::queue<int64> worklist;
   worklist.push(root->id());
   related_ops.insert(root->id());
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index b7295e8a53..cd0d5ca5d3 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/padding.h"
@@ -35,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stacktrace.h"
 #include "tensorflow/core/platform/types.h"
@@ -1035,7 +1035,7 @@ class XlaBuilder {
   std::map<int64, HloComputationProto> embedded_;
 
   // The unique parameter numbers.
-  tensorflow::gtl::FlatSet<int64> parameter_numbers_;
+  absl::flat_hash_set<int64> parameter_numbers_;
 
   // The metadata to attach to each op. This is structured as a "modal"-like
   // operation, in order to simplify client code (and not sprinkle this metadata
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 8da6364786..13803f5ebe 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -147,6 +147,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -183,6 +184,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
@@ -336,6 +338,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -490,6 +493,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -781,6 +785,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -959,6 +964,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -995,6 +1001,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1043,6 +1050,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -1136,6 +1144,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1230,6 +1239,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -1275,6 +1285,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -1348,6 +1359,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1660,6 +1672,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -2064,6 +2077,7 @@ cc_library(
         ":logical_buffer",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -2099,6 +2113,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -2120,6 +2135,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2203,6 +2219,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2225,6 +2242,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
@@ -2286,6 +2304,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2343,6 +2362,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2370,6 +2390,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2487,6 +2508,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2616,6 +2638,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2655,6 +2678,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -2730,6 +2754,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -3300,6 +3325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3387,6 +3413,7 @@ cc_library(
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
index 58f78f8e24..002be9c970 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/bfloat16_propagation.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -81,7 +82,7 @@ void BFloat16Propagation::RevertIfFusionInternalBF16Changes(
   };
 
   auto root = fusion->fused_instructions_computation()->root_instruction();
-  tensorflow::gtl::FlatSet<const HloValue*> changed_root_buffers;
+  absl::flat_hash_set<const HloValue*> changed_root_buffers;
 
   auto root_changes_it = changes_to_bf16_.find(root);
   if (root_changes_it != changes_to_bf16_.end()) {
@@ -500,7 +501,7 @@ void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) {
 
 bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
     HloComputation* computation,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations) {
+    absl::flat_hash_set<const HloComputation*>* visited_computations) {
   bool parameter_changed = false;
   auto insts = computation->MakeInstructionPostOrder();
   // Do the adjustment on each instruction in the computation in reverse
@@ -560,7 +561,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
       // another input parameter. A fixed point will be reached because the
       // parameters can only be changed from BF16 to F32, not the other way
       // around.
-      tensorflow::gtl::FlatSet<const HloComputation*> visited_in_while;
+      absl::flat_hash_set<const HloComputation*> visited_in_while;
       while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(),
                                                          &visited_in_while) ||
              ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(),
@@ -587,7 +588,7 @@ void BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers(
     HloModule* module) {
   const auto& computations_topological_order =
       module->MakeComputationPostOrder();
-  tensorflow::gtl::FlatSet<const HloComputation*> resolved;
+  absl::flat_hash_set<const HloComputation*> resolved;
   for (auto comp_it = computations_topological_order.rbegin();
        comp_it != computations_topological_order.rend(); ++comp_it) {
     if (ContainsKey(resolved, *comp_it)) {
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index c74326f631..5fcaa15c83 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/bfloat16_support.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -82,7 +83,7 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of instructions to consider using bfloat16, computed in the forward
   // pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*> consider_using_bfloat16_;
+  absl::flat_hash_set<const HloInstruction*> consider_using_bfloat16_;
 
   // ***************************
   // Functions called and state produced by the backward pass (from root to
@@ -111,12 +112,12 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of HloInstructions that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
+  absl::flat_hash_set<const HloInstruction*>
       instructions_visited_in_backward_pass_;
 
   // The set of HloComputations that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloComputation*>
+  absl::flat_hash_set<const HloComputation*>
       computations_visited_in_backward_pass_;
 
   // ***************************
@@ -132,7 +133,7 @@ class BFloat16Propagation : public HloModulePass {
   // point is reached.
   bool ResolveInconsistencyOfAliasingBuffersHelper(
       HloComputation* computation,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations);
+      absl::flat_hash_set<const HloComputation*>* visited_computations);
 
   // Makes the parameters of called computations match how they are called by
   // the given HLO.
@@ -183,7 +184,7 @@ class BFloat16Propagation : public HloModulePass {
                                       PrimitiveType target_type);
 
   // The set of F32 HLO values that must be kept in F32.
-  tensorflow::gtl::FlatSet<const HloValue*> values_that_must_be_kept_as_f32_;
+  absl::flat_hash_set<const HloValue*> values_that_must_be_kept_as_f32_;
 
   // Mapping from each HloComputation to the number of callers to it in the
   // module. Populated at the beginning of this pass.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 3efa0b1dad..2c2d1626c2 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -43,9 +44,9 @@ namespace xla {
 namespace {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::StrAppend;
 using absl::StrAppendFormat;
-using ::tensorflow::gtl::FlatSet;
 using ::tensorflow::strings::HumanReadableNumBytes;
 
 template <typename T>
@@ -129,8 +130,8 @@ Status GatherComputationsByAllocationType(
 
   // Sets for quickly checking membership. Computations are returned in vectors
   // for stable iteration.
-  FlatSet<const HloComputation*> thread_local_set;
-  FlatSet<const HloComputation*> global_set;
+  flat_hash_set<const HloComputation*> thread_local_set;
+  flat_hash_set<const HloComputation*> global_set;
 
   while (!worklist.empty()) {
     auto worklist_front = worklist.front();
@@ -445,7 +446,7 @@ bool BufferAssignment::SharesSliceAtIndex(
 bool BufferAssignment::HaveDisjointSlices(const HloInstruction* hlo_a,
                                           const HloInstruction* hlo_b) const {
   using SliceSet =
-      FlatSet<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
+      flat_hash_set<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
   // Gets the slices all of instr's subshapes.  If any subshape doesn't have an
   // assigned slice, returns the empty set.
   auto collect_slices = [&](const HloInstruction* instr) -> SliceSet {
@@ -815,9 +816,9 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
 
 Status BufferAssigner::AssignBuffersForComputation(
     const HloComputation* computation, bool is_thread_local,
-    const FlatSet<const LogicalBuffer*>& colocated_buffers,
-    const FlatSet<BufferAllocation::Index>& colocated_allocations,
-    flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>*
+    const flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+    const flat_hash_set<BufferAllocation::Index>& colocated_allocations,
+    flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>*
         buffers_to_assign_sequentially,
     BufferAssignment* assignment) {
   // Buffers are sorted and assigned to BufferAllocations in decreasing order of
@@ -853,8 +854,8 @@ Status BufferAssigner::AssignBuffersForComputation(
     // buffers_to_assign_sequentially map, even if we end up with an empty set
     // of buffers. This ensures we can correctly determine whether to run
     // whole-module heap simulation.
-    buffers_to_assign_sequentially->emplace(computation,
-                                            FlatSet<const LogicalBuffer*>());
+    buffers_to_assign_sequentially->emplace(
+        computation, flat_hash_set<const LogicalBuffer*>());
   }
 
   // Sort the LogicalBuffers first by size. We assign the larger LogicalBuffers
@@ -1046,11 +1047,11 @@ Status BufferAssigner::AssignBuffersForComputation(
   return Status::OK();
 }
 
-flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
               LogicalBuffer::Color::Hasher>
 BufferAssigner::SplitBuffersByColor(
-    const FlatSet<const LogicalBuffer*>& buffers) {
-  flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+    const flat_hash_set<const LogicalBuffer*>& buffers) {
+  flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
                 LogicalBuffer::Color::Hasher>
       color_map;
   for (auto buffer : buffers) {
@@ -1060,7 +1061,8 @@ BufferAssigner::SplitBuffersByColor(
 }
 
 Status BufferAssigner::AssignBuffersWithSequentialOrdering(
-    const flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>&
+    const flat_hash_map<const HloComputation*,
+                        flat_hash_set<const LogicalBuffer*>>&
         buffers_to_assign_sequentially,
     bool run_whole_module_heap_simulation, BufferAssignment* assignment) {
   // Run the sequence of instructions through the heap simulator.  The heuristic
@@ -1086,10 +1088,11 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     // only live for the duration of their calling instructions.
     VLOG(1) << "Running whole-module heap simulation";
     HloSchedule schedule(&assignment->module());
-    FlatSet<const LogicalBuffer*> all_buffers_to_assign;
+    flat_hash_set<const LogicalBuffer*> all_buffers_to_assign;
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1123,7 +1126,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     VLOG(1) << "Running per-computation heap simulation";
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1198,7 +1202,7 @@ std::vector<const LogicalBuffer*> ComputePeakMemoryLogicalBuffers(
 
   // Next gather the set of logical buffers live at the earliest point of
   // maximal live set size.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> live_buffers;
+  absl::flat_hash_set<const LogicalBuffer*> live_buffers;
   live_size = 0;
   for (const auto& event : heap_trace.events()) {
     const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id());
@@ -1588,8 +1592,8 @@ void BufferAssigner::BuildColocatedBufferSets(
 void BufferAssigner::AssignColocatedBufferSets(
     const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
     BufferAssignment* assignment,
-    FlatSet<const LogicalBuffer*>* colocated_buffers,
-    FlatSet<BufferAllocation::Index>* colocated_allocations) {
+    flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+    flat_hash_set<BufferAllocation::Index>* colocated_allocations) {
   for (const ColocatedBufferSet& colocated_buffer_set : colocated_buffer_sets) {
     BufferAllocation* allocation = nullptr;
     // Set 'entry_parameter_number' and 'entry_parameter_shape_idx' if entry
@@ -1662,8 +1666,8 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
   // Once b/32491382 enables module-level liveness analysis, we may be able
   // to assign colocated buffers (or at least reuse their allocation for
   // buffers outside of the set) in AssignBuffersForComputation.
-  FlatSet<const LogicalBuffer*> colocated_buffers;
-  FlatSet<BufferAllocation::Index> colocated_allocations;
+  flat_hash_set<const LogicalBuffer*> colocated_buffers;
+  flat_hash_set<BufferAllocation::Index> colocated_allocations;
   std::vector<ColocatedBufferSet> colocated_buffer_sets;
   BuildColocatedBufferSets(module, assignment->liveness(),
                            assignment->buffer_size_, &colocated_buffer_sets);
@@ -1681,7 +1685,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
 
   // First assign buffers for global computatations. Temporary buffers for
   // sequential computations are collected in 'buffers_to_assign_sequentially'.
-  flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>
+  flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>
       buffers_to_assign_sequentially;
   for (auto* computation : global_computations) {
     TF_RETURN_IF_ERROR(AssignBuffersForComputation(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 9ba40617a3..899cd36e1f 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -554,11 +554,10 @@ class BufferAssigner {
   // true.
   Status AssignBuffersForComputation(
       const HloComputation* computation, bool is_thread_local,
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers,
-      const tensorflow::gtl::FlatSet<BufferAllocation::Index>&
-          colocated_allocations,
+      const absl::flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+      const absl::flat_hash_set<BufferAllocation::Index>& colocated_allocations,
       absl::flat_hash_map<const HloComputation*,
-                          tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
+                          absl::flat_hash_set<const LogicalBuffer*>>*
           buffers_to_assign_sequentially,
       BufferAssignment* assignment);
 
@@ -569,7 +568,7 @@ class BufferAssigner {
   // assuming all global computations are sequentially ordered.
   Status AssignBuffersWithSequentialOrdering(
       const absl::flat_hash_map<const HloComputation*,
-                                tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
+                                absl::flat_hash_set<const LogicalBuffer*>>&
           buffers_to_assign_sequentially,
       bool run_whole_module_heap_simulation, BufferAssignment* assignment);
 
@@ -589,7 +588,7 @@ class BufferAssigner {
   // alias. Explicitly handling these colocated buffers is necessary because
   // points-to analysis is computation level scope and does not recognize
   // aliasing across computations (b/32491382).
-  using ColocatedBufferSet = tensorflow::gtl::FlatSet<const LogicalBuffer*>;
+  using ColocatedBufferSet = absl::flat_hash_set<const LogicalBuffer*>;
 
   // Returns a vector of ColocatedBufferSet objects, where each
   // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module'
@@ -604,8 +603,8 @@ class BufferAssigner {
   void AssignColocatedBufferSets(
       const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
       BufferAssignment* assignment,
-      tensorflow::gtl::FlatSet<const LogicalBuffer*>* colocated_buffers,
-      tensorflow::gtl::FlatSet<BufferAllocation::Index>* colocated_allocations);
+      absl::flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+      absl::flat_hash_set<BufferAllocation::Index>* colocated_allocations);
 
   // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining
   // the invariant that all sets in 'colocated_buffer_sets' are disjoint.
@@ -624,10 +623,9 @@ class BufferAssigner {
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
   absl::flat_hash_map<LogicalBuffer::Color,
-                      tensorflow::gtl::FlatSet<const LogicalBuffer*>,
+                      absl::flat_hash_set<const LogicalBuffer*>,
                       LogicalBuffer::Color::Hasher>
-  SplitBuffersByColor(
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers);
+  SplitBuffersByColor(const absl::flat_hash_set<const LogicalBuffer*>& buffers);
 
   // If true, buffer assignments assumes that input parameter buffers and output
   // buffers can be shared if their sizes match.
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h
index 2911bbcfbf..f939a426ea 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.h
+++ b/tensorflow/compiler/xla/service/buffer_liveness.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -101,7 +101,7 @@ class BufferLiveness {
   // Set of LogicalBuffers which are aliased in the output of other
   // instructions. For example, a LogicalBuffer which is inserted into a tuple
   // is considered to be aliased and will be in this set.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> aliased_buffers_;
+  absl::flat_hash_set<const LogicalBuffer*> aliased_buffers_;
 
   // LogicalBuffers that may be live out of the entry computation.
   PointsToSet::BufferSet maybe_live_out_buffers_;
diff --git a/tensorflow/compiler/xla/service/buffer_value_containers.h b/tensorflow/compiler/xla/service/buffer_value_containers.h
index 305914fca8..cc46af5eee 100644
--- a/tensorflow/compiler/xla/service/buffer_value_containers.h
+++ b/tensorflow/compiler/xla/service/buffer_value_containers.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -38,7 +38,7 @@ BufferValueCompactPointerSet ToBufferValueCompactPointerSet(
   return output;
 }
 
-using BufferValueFlatSet = tensorflow::gtl::FlatSet<const BufferValue*>;
+using BufferValueFlatSet = absl::flat_hash_set<const BufferValue*>;
 template <class LogicalBufferContainerT>
 BufferValueFlatSet ToBufferValueFlatSet(
     const LogicalBufferContainerT& logical_buffer_container) {
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 23b2a32709..bdd5069632 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <queue>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -138,7 +139,7 @@ CallGraphNode& CallGraph::GetNode(const HloComputation* computation) {
 
 bool CallGraph::DominatesHelper(
     const HloComputation* a, const HloComputation* b,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited) const {
+    absl::flat_hash_set<const HloComputation*>* visited) const {
   if (a == b || ContainsKey(*visited, b)) {
     // The call graph is guaranteed to be acyclic so any previously visited node
     // we encounter was already determined to be dominated.
@@ -163,7 +164,7 @@ bool CallGraph::DominatesHelper(
 
 bool CallGraph::Dominates(const HloComputation* a,
                           const HloComputation* b) const {
-  tensorflow::gtl::FlatSet<const HloComputation*> visited;
+  absl::flat_hash_set<const HloComputation*> visited;
   return DominatesHelper(a, b, &visited);
 }
 
@@ -277,7 +278,7 @@ std::unique_ptr<CallGraph> CallGraph::Build(const HloModule* module) {
 
 Status CallGraph::VisitNodesInternal(
     const VisitorFunction& visitor_func, const CallGraphNode& node,
-    tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const {
+    absl::flat_hash_set<const CallGraphNode*>* visited) const {
   auto pair = visited->insert(&node);
   if (!pair.second) {
     // Node was not inserted. Node has already been visited.
@@ -294,7 +295,7 @@ Status CallGraph::VisitNodesInternal(
 
 Status CallGraph::VisitNodes(const VisitorFunction& visitor_func,
                              bool visit_unreachable_nodes) const {
-  tensorflow::gtl::FlatSet<const CallGraphNode*> visited;
+  absl::flat_hash_set<const CallGraphNode*> visited;
   if (visit_unreachable_nodes) {
     // Traverse from all roots in the call graph.
     for (const CallGraphNode& node : nodes()) {
diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index 0c2e9b99db..cb56f4789d 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h
@@ -21,10 +21,10 @@ limitations under the License.
 #include <ostream>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -145,12 +145,12 @@ class CallGraphNode {
   // The computations called by this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callees_;
-  tensorflow::gtl::FlatSet<HloComputation*> callee_set_;
+  absl::flat_hash_set<HloComputation*> callee_set_;
 
   // The computations which call this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callers_;
-  tensorflow::gtl::FlatSet<HloComputation*> caller_set_;
+  absl::flat_hash_set<HloComputation*> caller_set_;
 
   // The call sites in this computation
   std::vector<CallSite> callsites_;
@@ -250,14 +250,14 @@ class CallGraph {
   // 'visited'.
   Status VisitNodesInternal(
       const VisitorFunction& visitor_func, const CallGraphNode& node,
-      tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const;
+      absl::flat_hash_set<const CallGraphNode*>* visited) const;
 
   // Recursive helper for computing whether 'a' dominates 'b' in the call
   // graph. 'b_ancestor' is the currently visited node (which starts at 'b'),
   // and 'visited' is the set of computations which have been visited.
   bool DominatesHelper(
       const HloComputation* a, const HloComputation* b,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited) const;
+      absl::flat_hash_set<const HloComputation*>* visited) const;
 
   // The HLO module represented by this call graph.
   const HloModule* module_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index 7f78412924..f35324aa35 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -904,7 +904,7 @@ class CopyRemover {
     // The heads of all the value lists. Each value list represents the HLO
     // values contained in a particular HLO buffer. The values in the list are
     // in dependency order.
-    tensorflow::gtl::FlatSet<const ValueNode*> value_lists_;
+    absl::flat_hash_set<const ValueNode*> value_lists_;
 
     // Copy removal requires fast access to the value list elements
     // corresponding to the source and destination values of the kCopy
@@ -1009,7 +1009,7 @@ Status CopyInsertion::AddSpecialCaseCopies(const CallGraph& call_graph,
     HloInstruction* root = computation->root_instruction();
 
     // Mark nondistinct/ambiguous indices.
-    tensorflow::gtl::FlatSet<const HloBuffer*> seen;
+    absl::flat_hash_set<const HloBuffer*> seen;
     ShapeUtil::ForEachSubshape(
         root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) {
           std::vector<const HloBuffer*> buffers_at_index =
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6a83909a3b..ae4c6e962d 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -291,6 +291,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 953a75c35f..a70abb117a 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
@@ -68,7 +69,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -1400,8 +1400,8 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) {
   // [0->0, 3->1].
   absl::flat_hash_map<int64, int64> unreduced_dim_map;
 
-  gtl::FlatSet<int64> reduced_dims(reduce.dimensions().begin(),
-                                   reduce.dimensions().end());
+  absl::flat_hash_set<int64> reduced_dims(reduce.dimensions().begin(),
+                                          reduce.dimensions().end());
 
   const Shape& operand_shape = reduce.operand(0)->shape();
   const Shape& result_shape = reduce.shape();
@@ -1977,7 +1977,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   //
   // * Implement the memcpy within the innermost loop.
 
-  gtl::FlatSet<int64> inner_dims;
+  absl::flat_hash_set<int64> inner_dims;
   for (int64 dim : LayoutUtil::MinorToMajor(layout)) {
     if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) {
       break;
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
index 7af51db55a..b35fd9dad8 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
@@ -121,7 +121,7 @@ TEST_F(CpuNoAliasTest, Concat) {
     CHECK: %read_concat2_array = load {{.*}} !alias.scope [[concat1_noalias]], !noalias [[concat1_scope]]
     CHECK-DAG: [[buf_size32:![0-9]+]] = !{!"buffer:{{.*}} size:32
     CHECK-DAG: [[buf_size48:![0-9]+]] = !{!"buffer:{{.*}} size:48
-    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size32]], [[buf_size48]]}
+    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size48]], [[buf_size32]]}
     CHECK-DAG: [[concat1_scope]] = !{[[buf_size32]]}
     CHECK-DAG: [[concat1_noalias]] = !{[[buf_size48]]}
   )";
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index e65d3fa332..a838464cae 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -476,6 +476,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:instruction_fusion",
         "//tensorflow/compiler/xla/service:pattern_matcher",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -508,6 +509,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:multi_output_fusion",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -541,6 +543,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
index 79c74e7e8b..e2ab00ce41 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <set>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index 4d5d8e99f8..b61f038739 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -125,8 +126,8 @@ bool IsIEEEFloatingPointScalarConstant(const HloInstruction* constant) {
   }
 
   // Compute the precise number of operands to the new fusion.
-  tensorflow::gtl::FlatSet<const HloInstruction*> operands(
-      a->operands().begin(), a->operands().end());
+  absl::flat_hash_set<const HloInstruction*> operands(a->operands().begin(),
+                                                      a->operands().end());
   operands.insert(b->operands().begin(), b->operands().end());
   // If there's an edge between `a` and `b`, don't count it: We're fusing that
   // producer -> consumer relationship.
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
index c21f76f6eb..835924024b 100644
--- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -101,7 +101,7 @@ bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) {
 
 int64 GpuMultiOutputFusion::GetProfit(HloInstruction* instr1,
                                       HloInstruction* instr2) {
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   for (auto instr : instr1->operands()) {
     if (!IsProfitableOperand(instr)) {
       continue;
@@ -148,7 +148,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
   bool changed = false;
   RecomputeReachability();
 
-  tensorflow::gtl::FlatSet<HloInstruction*> to_fuse;
+  absl::flat_hash_set<HloInstruction*> to_fuse;
   // Keep a list of the instructions to fuse after making all the fusion
   // decisions. We first aggressively add instructions to potential_fusion_list,
   // then filter out instructions that will be no longer fusible because of
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 147776c8c4..b343305554 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -26,7 +27,7 @@ limitations under the License.
 namespace xla {
 
 using absl::flat_hash_map;
-using tensorflow::gtl::FlatSet;
+using absl::flat_hash_set;
 
 /*static*/
 StatusOr<int64> HeapSimulator::MinimumMemoryForModule(
@@ -116,9 +117,9 @@ Status HeapSimulator::RunComputation(
   // 'used_buffers' is the reverse map - it tracks which buffers were used by an
   // instruction, so that we can remove the instructions from a buffer's live
   // set after they are visited.
-  flat_hash_map<const BufferValue*, FlatSet<const HloInstruction*>>
+  flat_hash_map<const BufferValue*, flat_hash_set<const HloInstruction*>>
       live_buffers;
-  flat_hash_map<const HloInstruction*, FlatSet<const BufferValue*>>
+  flat_hash_map<const HloInstruction*, flat_hash_set<const BufferValue*>>
       used_buffers;
   auto add_user_to_buffer = [this, &live_buffers, &used_buffers](
                                 const HloInstruction* user,
@@ -216,7 +217,7 @@ Status HeapSimulator::RunComputation(
       VLOG(4) << "  Removing user " << instruction->name() << " from buffer "
               << operand_buffer->ToString();
       auto it = live_buffers.find(operand_buffer);
-      FlatSet<const HloInstruction*>* live_set = &it->second;
+      flat_hash_set<const HloInstruction*>* live_set = &it->second;
       live_set->erase(instruction);
       if (live_set->empty()) {
         live_buffers.erase(it);
@@ -238,7 +239,7 @@ Status HeapSimulator::RunComputation(
     // that we should assign.
 
     // Make sure each buffer get reused at most once.
-    FlatSet<const BufferValue*> reused_buffers;
+    flat_hash_set<const BufferValue*> reused_buffers;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -326,7 +327,7 @@ Status HeapSimulator::RunComputation(
   to_free.reserve(live_buffers.size());
   for (const auto& buffer_pending : live_buffers) {
     const BufferValue* buffer = buffer_pending.first;
-    const FlatSet<const HloInstruction*>& pending = buffer_pending.second;
+    const flat_hash_set<const HloInstruction*>& pending = buffer_pending.second;
     CHECK_EQ(pending.size(), 1) << *buffer;
     CHECK(*pending.begin() == nullptr) << *buffer;
     to_free.push_back(buffer);
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index a5bb3f81f7..b0295a6163 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/buffer_value_containers.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -197,8 +197,8 @@ class HeapSimulator {
       shared_buffers_;
 
   // Hold some sets for error-checking the sequence of Alloc and Free calls.
-  tensorflow::gtl::FlatSet<const BufferValue*> allocated_buffers_;
-  tensorflow::gtl::FlatSet<const BufferValue*> freed_buffers_;
+  absl::flat_hash_set<const BufferValue*> allocated_buffers_;
+  absl::flat_hash_set<const BufferValue*> freed_buffers_;
 
   // Debugging information filled in while the heap simulator runs.
   HeapSimulatorTrace debug_trace_;
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index b6e1f52cf5..c3da12e273 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -120,7 +121,7 @@ class BufferValueMap {
   }
 
   // Return a set of all the values in the given buffer.
-  const tensorflow::gtl::FlatSet<const HloValue*>& GetValuesInBuffer(
+  const absl::flat_hash_set<const HloValue*>& GetValuesInBuffer(
       BufferNumber buffer_number) const {
     return buffers_.at(buffer_number);
   }
@@ -143,7 +144,7 @@ class BufferValueMap {
   // Move the given value into the given buffer.
   void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) {
     BufferNumber old_buffer_number = value_to_buffer_number_.at(&value);
-    tensorflow::gtl::FlatSet<const HloValue*>& old_value_set =
+    absl::flat_hash_set<const HloValue*>& old_value_set =
         buffers_.at(old_buffer_number);
     old_value_set.erase(&value);
     if (old_value_set.empty()) {
@@ -291,7 +292,7 @@ class BufferValueMap {
   const HloDataflowAnalysis& dataflow_;
 
   // A map containing the set of values contained in each buffer.
-  absl::flat_hash_map<BufferNumber, tensorflow::gtl::FlatSet<const HloValue*>>
+  absl::flat_hash_map<BufferNumber, absl::flat_hash_set<const HloValue*>>
       buffers_;
 
   // A map indicating which buffer each value is contained in.
@@ -351,7 +352,7 @@ bool HloAliasAnalysis::InstructionBuffersAreAmbiguous(
 
 bool HloAliasAnalysis::InstructionBuffersAreDistinct(
     const HloInstruction* instruction) const {
-  tensorflow::gtl::FlatSet<const HloBuffer*> buffers_seen;
+  absl::flat_hash_set<const HloBuffer*> buffers_seen;
   for (const auto& pair :
        dataflow_analysis_->GetInstructionValueSet(instruction)) {
     const HloValueSet& value_set = pair.second;
diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc
index 6c11a073b7..9c3aa0e64d 100644
--- a/tensorflow/compiler/xla/service/hlo_buffer.cc
+++ b/tensorflow/compiler/xla/service/hlo_buffer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 257dd5876f..6ef67ab0a8 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -278,10 +278,9 @@ void HloComputation::set_root_instruction(HloInstruction* new_root_instruction,
 namespace {
 
 // Helper which builds a post order of the HLO call graph.
-void ComputeComputationPostOrder(
-    HloComputation* computation,
-    tensorflow::gtl::FlatSet<HloComputation*>* visited,
-    std::vector<HloComputation*>* post_order) {
+void ComputeComputationPostOrder(HloComputation* computation,
+                                 absl::flat_hash_set<HloComputation*>* visited,
+                                 std::vector<HloComputation*>* post_order) {
   if (visited->insert(computation).second) {
     for (auto* instruction : computation->instructions()) {
       for (HloComputation* called_computation :
@@ -416,7 +415,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
 
 std::vector<HloComputation*> HloComputation::MakeEmbeddedComputationsList()
     const {
-  tensorflow::gtl::FlatSet<HloComputation*> visited;
+  absl::flat_hash_set<HloComputation*> visited;
   std::vector<HloComputation*> post_order;
 
   // To avoid special handling of this computation, cast away const of
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index af929ac009..d87ab4bda1 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/iterator_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -41,7 +42,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc
index b59c9ba3ed..e602107cbe 100644
--- a/tensorflow/compiler/xla/service/hlo_cse.cc
+++ b/tensorflow/compiler/xla/service/hlo_cse.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 namespace xla {
@@ -137,8 +137,8 @@ StatusOr<bool> HloCSE::Run(HloModule* module) {
     // HLO instructions are grouped into equivalency classes by using the
     // cse_equal predicate defined above. This set holds a representative
     // instruction for each class.
-    tensorflow::gtl::FlatSet<HloInstruction*, decltype(&CseHash),
-                             decltype(cse_equal)>
+    absl::flat_hash_set<HloInstruction*, decltype(&CseHash),
+                        decltype(cse_equal)>
         representatives(/*N=*/computation->instruction_count() + 1, &CseHash,
                         cse_equal);
     for (auto instruction : computation->MakeInstructionPostOrder()) {
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 6a63681996..44cde4a3d2 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -91,7 +92,7 @@ HloDataflowAnalysis::HloDataflowAnalysis(
 
 bool HloDataflowAnalysis::AreTransitiveUsesElementwiseOrTuple(
     const HloInstruction* inst) {
-  tensorflow::gtl::FlatSet<const HloInstruction*> visited;
+  absl::flat_hash_set<const HloInstruction*> visited;
   absl::InlinedVector<const HloInstruction*, 4> stack;
   stack.push_back(inst);
   while (!stack.empty()) {
@@ -159,8 +160,8 @@ void HloDataflowAnalysis::MarkValueForDeletion(HloValue::Id value_id) {
 void HloDataflowAnalysis::DeleteMarkedValues() {
 #ifndef NDEBUG
   // Verify that no marked-for-deletion values are in any of the value sets.
-  tensorflow::gtl::FlatSet<HloValue::Id> id_set(value_ids_to_delete_.begin(),
-                                                value_ids_to_delete_.end());
+  absl::flat_hash_set<HloValue::Id> id_set(value_ids_to_delete_.begin(),
+                                           value_ids_to_delete_.end());
   for (const auto& pair : value_sets_) {
     const HloInstruction* instruction = pair.first;
     const InstructionValueSet& instruction_value_set = pair.second;
@@ -673,7 +674,7 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
 
 void HloDataflowAnalysis::Propagate() {
   std::queue<HloInstruction*> worklist;
-  tensorflow::gtl::FlatSet<HloInstruction*> workset;
+  absl::flat_hash_set<HloInstruction*> workset;
   auto add_to_worklist = [&worklist, &workset](HloInstruction* instruction) {
     if (workset.insert(instruction).second) {
       worklist.push(instruction);
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 159c39d557..6ca1255ede 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -217,7 +218,7 @@ bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const {
 
 /* static */ std::vector<HloInstruction*>
 HloDomainMap::MakeNonDomainInstructions(
-    const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+    const absl::flat_hash_set<HloInstruction*>& instruction_set,
     const InstructionOrderMap& instructions_order) {
   std::vector<HloInstruction*> instructions;
   instructions.reserve(instruction_set.size());
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index 8584bc021d..c8d581b746 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -20,13 +20,13 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -110,7 +110,7 @@ class HloDomainMap {
   // Out of an instruction set, returns a vector of all the ones which are not
   // a kDomain kind.
   static std::vector<HloInstruction*> MakeNonDomainInstructions(
-      const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+      const absl::flat_hash_set<HloInstruction*>& instruction_set,
       const InstructionOrderMap& instructions_order);
 
   // Populates domain_metadata_id_ that maps each HloInstruction to the unique
diff --git a/tensorflow/compiler/xla/service/hlo_domain_metadata.h b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
index 302807f816..d3c83c15ae 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
@@ -20,11 +20,11 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -42,7 +42,7 @@ class DomainMetadata {
     // operand/user pathways, without crossing a kDomain instruction of a given
     // kind. The reach_set can contain kDomain instructions of other kinds, if
     // two domains of different kind intersect each other.
-    tensorflow::gtl::FlatSet<HloInstruction*> reach_set;
+    absl::flat_hash_set<HloInstruction*> reach_set;
 
     // The same instructions in reach_set, but purged from kDomain instructions
     // and ordered according to their computation graph post-order, i.e.
@@ -55,8 +55,8 @@ class DomainMetadata {
     // whose dataflow enters the reach set (domain), while the exit_domains
     // contains the set of kDomain instructions whose dataflow exit the reach
     // set.
-    tensorflow::gtl::FlatSet<HloInstruction*> enter_domains;
-    tensorflow::gtl::FlatSet<HloInstruction*> exit_domains;
+    absl::flat_hash_set<HloInstruction*> enter_domains;
+    absl::flat_hash_set<HloInstruction*> exit_domains;
   };
 
   virtual ~DomainMetadata() = default;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5d5c9c7e58..0207f9ae3f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/ascii.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/human_readable_json.h"
 #include "tensorflow/core/platform/logging.h"
@@ -1433,7 +1433,7 @@ int64 HloInstruction::operand_index(const HloInstruction* target) const {
 
 HloInstruction::InstructionVector HloInstruction::unique_operands() const {
   InstructionVector unique;
-  tensorflow::gtl::FlatSet<const HloInstruction*> seen;
+  absl::flat_hash_set<const HloInstruction*> seen;
   for (HloInstruction* operand : operands()) {
     if (seen.insert(operand).second) {
       unique.push_back(operand);
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 1c2b2868fd..55314d0ae9 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
@@ -111,7 +112,7 @@ class ListScheduler {
     // LogicalBuffer is in an operand of the instruction as indicated by
     // points-to analysis.
     for (auto* instruction : computation.instructions()) {
-      tensorflow::gtl::FlatSet<const LogicalBuffer*> instr_uses;
+      absl::flat_hash_set<const LogicalBuffer*> instr_uses;
       for (auto* operand : instruction->operands()) {
         points_to_analysis.GetPointsToSet(operand).ForEachElement(
             [&](const ShapeIndex& /*index*/,
@@ -360,7 +361,7 @@ class ListScheduler {
   std::unordered_map<const LogicalBuffer*, int64> unscheduled_use_count_;
 
   // Set of instructions which have been scheduled.
-  tensorflow::gtl::FlatSet<const HloInstruction*> scheduled_instructions_;
+  absl::flat_hash_set<const HloInstruction*> scheduled_instructions_;
 };
 
 int64 SumLogicalBufferSizes(
@@ -418,7 +419,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
         points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function);
     total_sizes[hlo] = logical_buffer_size;
     cumulative_total_size += logical_buffer_size;
-    tensorflow::gtl::FlatSet<const HloInstruction*> unique_operands(
+    absl::flat_hash_set<const HloInstruction*> unique_operands(
         hlo->operands().begin(), hlo->operands().end());
     for (const HloInstruction* operand : unique_operands) {
       extra_users[hlo] += extra_users[operand];
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 9359e9a8be..7527e35c95 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -328,10 +329,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
-  tensorflow::gtl::FlatSet<string> computation_names;
-  tensorflow::gtl::FlatSet<string> instruction_names;
-  tensorflow::gtl::FlatSet<int> computation_ids;
-  tensorflow::gtl::FlatSet<int> instruction_ids;
+  absl::flat_hash_set<string> computation_names;
+  absl::flat_hash_set<string> instruction_names;
+  absl::flat_hash_set<int> computation_ids;
+  absl::flat_hash_set<int> instruction_ids;
   for (HloComputation* computation : module->computations()) {
     TF_RET_CHECK(!ContainsKey(computation_names, computation->name()))
         << "Computation name is not unique: " << computation->name();
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
index d83ee71490..fddeb5f0a2 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -42,7 +42,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalPredecessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       predecessors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique predecessors list; if the predecessors is a companion
   // instruction, also add companion instructions; if the predecessors is a
@@ -119,7 +119,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalSuccessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       successors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique successors list; if the successor is a companion
   // instruction, also add companion instructions; if the successor is a
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 59fd01cb58..5e004ce78a 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <functional>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -25,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -75,8 +75,8 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
 std::vector<HloPassInterface*> HloPassPipeline::GetEnabledPasses(
     const DebugOptions& debug_options) {
   auto repeated_field = debug_options.xla_disable_hlo_passes();
-  tensorflow::gtl::FlatSet<string> disabled_pass_names(repeated_field.begin(),
-                                                       repeated_field.end());
+  absl::flat_hash_set<string> disabled_pass_names(repeated_field.begin(),
+                                                  repeated_field.end());
   if (!disabled_pass_names.empty()) {
     VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
             << absl::StrJoin(disabled_pass_names, ", ");
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index abdd9a9212..5ac43808ee 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -981,7 +982,7 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   // rematerialization is essentially a move). If the next rematerialization of
   // the instruction is also a move then the rematerialization is added to the
   // blacklist.
-  tensorflow::gtl::FlatSet<const HloInstruction*> remat_move_instructions;
+  absl::flat_hash_set<const HloInstruction*> remat_move_instructions;
 
   // The map from instructions to their rematerializable status.
   absl::flat_hash_map<const HloInstruction*, bool> remat_able;
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 5a02e3a8bb..70d83c04f0 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -16,6 +16,7 @@
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -122,7 +123,7 @@ class HloRematerialization : public HloModulePass {
 
   // Set of computations which have had rematerialization
   // applied. Rematerialization is only applied once per computation.
-  tensorflow::gtl::FlatSet<const HloComputation*> rematerialized_computations_;
+  absl::flat_hash_set<const HloComputation*> rematerialized_computations_;
 
   // Count of the total instructions rematerialized.
   int64 instructions_rematerialized_ = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 7c5c98f04e..9972eb2077 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -119,7 +120,7 @@ Status HloSchedule::UpdateComputationSchedule(
   }
 
   // Set of all HloInstructions in the schedule.
-  tensorflow::gtl::FlatSet<int> ids_in_schedule;
+  absl::flat_hash_set<int> ids_in_schedule;
   for (int id : sequences_.at(computation->unique_id()).ids()) {
     InsertOrDie(&ids_in_schedule, id);
   }
@@ -210,7 +211,7 @@ Status HloSchedule::Update() {
   if (sequences_.size() > nonfusion_computations.size()) {
     // Schedule contains some computations which have been removed from the
     // HloModule. Remove them from the schedule as well.
-    tensorflow::gtl::FlatSet<int64> nonfusion_computations_ids;
+    absl::flat_hash_set<int64> nonfusion_computations_ids;
     for (const HloComputation* computation : nonfusion_computations) {
       nonfusion_computations_ids.insert(computation->unique_id());
     }
diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc
index 8549487702..59594ab2f0 100644
--- a/tensorflow/compiler/xla/service/hlo_value.cc
+++ b/tensorflow/compiler/xla/service/hlo_value.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -167,7 +167,7 @@ void HloValue::SetPositionsAndComputeUses(
   positions_.insert(positions_.end(), positions.begin(), positions.end());
 
   // Gather the computation roots at which this value appears.
-  tensorflow::gtl::FlatSet<HloInstruction*> root_positions;
+  absl::flat_hash_set<HloInstruction*> root_positions;
   for (const HloPosition& position : positions_) {
     if (position.instruction ==
         position.instruction->parent()->root_instruction()) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 7ee789276d..1ebb331977 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace gtl = ::tensorflow::gtl;
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 1591256fad..15f0adcaaf 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -39,7 +40,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -504,7 +504,7 @@ class LayoutAssignment : public HloModulePass {
 
   // Every copy added to the module by the layout assignment pass is registered
   // here.
-  tensorflow::gtl::FlatSet<HloInstruction*> added_copies_;
+  absl::flat_hash_set<HloInstruction*> added_copies_;
 
   // The pointer to the channel layout constraints passed in with the
   // constructor. If not nullptr, this is an input/output argument.
@@ -521,8 +521,7 @@ class LayoutAssignment : public HloModulePass {
 
   // The set of HLO instructions which lacked any layout constraint, thus
   // receiving propagated default layouts.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
-      unconstrained_layout_instructions_;
+  absl::flat_hash_set<const HloInstruction*> unconstrained_layout_instructions_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 3934d2e493..6223a34b12 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -39,6 +39,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:logical_buffer",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@llvm//:core",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
index e5370eca56..643ecd0fba 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h"
 
-#include <unordered_set>
+#include <map>
 
 #include "llvm/IR/MDBuilder.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
@@ -164,9 +164,7 @@ llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer(
     add_buffers_to_worklist(operand);
   }
 
-  tensorflow::gtl::FlatSet<BufferAllocation::Slice,
-                           BufferAllocation::Slice::Hasher>
-      buffers;
+  std::set<BufferAllocation::Slice> buffers;
   for (const LogicalBuffer* buffer : worklist) {
     // Skip buffers which cannot be added to the noalias set.
     if (!assignment.HasAllocation(*buffer) ||
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
index 88cde2d3d9..2b46b3c396 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace llvm_ir {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 95b1c20663..2ca527bc4c 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -15,10 +15,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/multi_output_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -50,7 +50,7 @@ StatusOr<bool> MultiOutputFusion::Run(HloModule* module) {
       all_fusion_candidates_.push_back(instruction);
 
       std::vector<HloInstruction*> candidates;
-      tensorflow::gtl::FlatSet<HloInstruction*> candidates_set;
+      absl::flat_hash_set<HloInstruction*> candidates_set;
       VLOG(10) << "Looking at instruction: " << instruction->name();
       for (auto operand : instruction->operands()) {
         // Filter out the non-interesting instructions -- they
@@ -172,7 +172,7 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
   // Update the fusible list for fusion. Variable new_fusibles keeps
   // track of the new or changed entries.
   std::vector<std::pair<HloInstruction*, int64>> new_fusibles;
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   auto it = fusion_node.fusibles.begin();
   while (it != fusion_node.fusibles.end()) {
     HloInstruction* instr = it->first;
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index 1ac60f1cf4..8909d0f4fe 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace xla {
@@ -69,7 +69,7 @@ class NameUniquer {
     int64 next_ = 0;
 
     // Set of all the identifiers which has been used.
-    tensorflow::gtl::FlatSet<int64> used_;
+    absl::flat_hash_set<int64> used_;
   };
 
   // The string to use to separate the prefix of the name from the uniquing
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 6ccea9d2b5..e379911462 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -577,7 +577,7 @@ Status ValidateDotDimensionNumbers(
   // Check that dimension numbers are unique.
   auto dims_unique = [](absl::Span<const int64> contracting_dims,
                         absl::Span<const int64> batch_dims) -> bool {
-    tensorflow::gtl::FlatSet<int64> dim_set;
+    absl::flat_hash_set<int64> dim_set;
     auto is_unique = [&dim_set](int64 i) -> bool {
       return dim_set.insert(i).second;
     };
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index 921a984589..56952e3ada 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -26,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -147,7 +147,7 @@ void ScopedShapedBuffer::Deallocate() {
   // Deallocate all non-null buffers. A buffer may appear in more than one spot
   // in the shape (eg, a tuple with a repeated element) so keep track of what
   // has been deallocated.
-  tensorflow::gtl::FlatSet<void*> deallocated_ptrs;
+  absl::flat_hash_set<void*> deallocated_ptrs;
   for (auto& pair : buffers_) {
     se::DeviceMemoryBase& memory_base = pair.second;
     if (!memory_base.is_null() &&
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 78392d3bb2..64ad1dc80e 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index 2590473c77..9795b2830b 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -16,17 +16,17 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/tuple_util.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::InlinedVector;
-using tensorflow::gtl::FlatSet;
 
 // Copies `to_hoist` to the computation containing `while_instr`, hoisting its
 // operands as needed.  All of its transitive operands are expected to be either
@@ -35,7 +35,7 @@ using tensorflow::gtl::FlatSet;
 // them into `hoisted_instructions`.
 static void CreateLoopInvariantCopy(
     flat_hash_map<HloInstruction*, HloInstruction*>* hoisted_instructions,
-    FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
+    flat_hash_set<HloInstruction*>* unhoisted_invariant_instructions,
     HloInstruction* while_instr, HloInstruction* to_hoist) {
   HloComputation* parent_of_while = while_instr->parent();
   HloComputation* while_body = while_instr->while_body();
@@ -153,7 +153,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
   // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
   // hoist an instruction in this set, we move it from
   // unhoisted_invariant_instructions to hoisted_instructions.
-  FlatSet<HloInstruction*> unhoisted_invariant_instructions;
+  flat_hash_set<HloInstruction*> unhoisted_invariant_instructions;
 
   // Invariant GTE's axiomatically satisfy the constraints for
   // unhoisted_invariant_instructions -- they can be legally hoisted, but there
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 07de8492ba..630d71e5ca 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -114,7 +115,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     return false;
   }
 
-  tensorflow::gtl::FlatSet<int64> used_tuple_indices;
+  absl::flat_hash_set<int64> used_tuple_indices;
   for (HloComputation* comp : {while_body, while_cond}) {
     // The HLO verifier ensures that while_input's shape matches while_init's
     // shape, which we verified above is a tuple.
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 06b6330321..8a0ae33042 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -2146,11 +2146,11 @@ xla_test(
         ":test_utils",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index 181e5cbe29..bc433eac8f 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -145,7 +146,7 @@ ENTRY %sort.148.1589 (parameter.0: f32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<uint32> key_set;
+  absl::flat_hash_set<uint32> key_set;
   for (const float& value : key_arg.data<float>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
@@ -168,7 +169,7 @@ ENTRY %sort.148.1589 (parameter.0: s32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<int32> key_set;
+  absl::flat_hash_set<int32> key_set;
   for (const int32& value : key_arg.data<int32>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
-- 
GitLab


From 350388fca9cb9509962ff393a9d21fb2879c9179 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 1 Oct 2018 19:56:47 -0700
Subject: [PATCH 0199/1085] Add mode_override to the TPU embedding enqueue ops.
 This allows the mode to be overridden at runtime allowing dynamic switching
 between inference and training modes. Not fully implemented yet.

PiperOrigin-RevId: 215325071
---
 tensorflow/contrib/tpu/BUILD                  |   3 +
 .../contrib/tpu/ops/tpu_embedding_ops.cc      |  52 ++++--
 tensorflow/contrib/tpu/python/ops/tpu_ops.py  | 148 ++++++++++++++++++
 3 files changed, 186 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 0c4bdab191..10ed1c2891 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -135,6 +135,9 @@ tf_gen_op_wrapper_py(
     name = "tpu_ops",
     hidden = [
         "SendTPUEmbeddingGradients",
+        "EnqueueTPUEmbeddingIntegerBatch",
+        "EnqueueTPUEmbeddingSparseBatch",
+        "EnqueueTPUEmbeddingSparseTensorBatch",
     ],
     deps = [
         ":cross_replica_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index ef2f8dd36d..0ef29bdf73 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
@@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
@@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() {
 }  // namespace
 
 REGISTER_OP("RecvTPUEmbeddingActivations")
-    .Output("outputs: num_outputs * float")
+    .Output("outputs: num_outputs * float32")
     .Attr("num_outputs: int >= 1")
     .Attr("config: string")
     .SetIsStateful()
@@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto.
 
 REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
     .Input("batch: N * int32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
@@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding.
 
 batch: A list of 1D tensors, one for each embedding table, containing the
     indices into the tables.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 )doc");
@@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .SetIsStateful()
@@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists
 must have the same shape, i.e. rank 1 with dim_size() equal to the total
 number of lookups into the table described by the corresponding table_id.
 
-sample_indices: A list of Rank 1 Tensors specifying the training example and
+sample_indices: A list of rank 1 Tensors specifying the training example and
     feature to which the corresponding embedding_indices and aggregation_weights
     values belong. sample_indices[i] must equal b * nf + f, where nf is the
     number of features from the corresponding table, f is in [0, nf), and
     b is in [0, batch size).
-embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
     (training example, feature) -- aggregation weights.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .Attr("table_ids: list(int)")
@@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
 This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
 
 sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to ith feature. table_ids[i] indicates which embedding table to look up ith
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
 feature.
 
 The tensors at corresponding positions in the three input lists (sample_indices,
@@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
 with dim_size() equal to the total number of lookups into the table described by
 the corresponding feature.
 
-sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+sample_indices: A list of rank 1 Tensors specifying the training example to
+    which the corresponding embedding_indices and aggregation_weights values
+    belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+    It corresponds to sp_ids.values in embedding_lookup_sparse().
+aggregation_weights: A list of rank 1 Tensors containing per training example
+    aggregation weights. It corresponds to sp_weights.values in
     embedding_lookup_sparse().
-embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
-    in embedding_lookup_sparse().
-aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
-    in embedding_lookup_sparse().
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify
     the sum of the weights be 0 for 'mean' or the sum of the squared weights be
     0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
     all tables.
-table_ids: A list of int. table_ids[i] indicates which embedding table to look
-    up ith feature in the list.
+table_ids: A list of integers specifying the identifier of the embedding table
+    (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+    corresponding input. The ith input is looked up using table_ids[i]. The size
+    of the table_ids list must be equal to that of sample_indices,
+    embedding_indices and aggregation_weights.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index e2e4acadab..968adccf2b 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -227,6 +227,154 @@ if platform.system() != "Windows":
         inputs=inputs, learning_rates=learning_rates, config=config, name=name)
 
 
+  send_tpu_embedding_gradients.__doc__ = (
+      gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_integer_batch(batch,
+                                          device_ordinal,
+                                          mode_override=None,
+                                          name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      batch: A list of 1D tensors, one for each embedding table, containing the
+        indices into the tables.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingIntegerBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
+        batch=batch,
+        device_ordinal=device_ordinal,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_integer_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_batch(sample_indices,
+                                         embedding_indices,
+                                         aggregation_weights,
+                                         device_ordinal,
+                                         combiners=None,
+                                         mode_override=None,
+                                         name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        and feature to which the corresponding embedding_indices and
+        aggregation_weights values belong. sample_indices[i] must equal b * nf +
+        f, where nf is the number of features from the corresponding table, f is
+        in [0, nf), and b is in [0, batch size).
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables.
+      aggregation_weights: A list of rank 1 Tensors containing per sample --
+        i.e. per (training example, feature) -- aggregation weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+                                                embedding_indices,
+                                                aggregation_weights,
+                                                table_ids,
+                                                device_ordinal,
+                                                combiners=None,
+                                                mode_override=None,
+                                                name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        to which the corresponding embedding_indices and aggregation_weights
+        values
+        belong. It corresponds to sp_ids.indices[:,0] in
+          embedding_lookup_sparse().
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+      aggregation_weights: A list of rank 1 Tensors containing per training
+        example aggregation weights. It corresponds to sp_weights.values in
+        embedding_lookup_sparse().
+      table_ids: A list of integers specifying the identifier of the embedding
+        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+        lookup the corresponding input. The ith input is looked up using
+        table_ids[i]. The size of the table_ids list must be equal to that of
+        sample_indices, embedding_indices and aggregation_weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseTensorBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        table_ids=table_ids,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
 else:
   # We have already built the appropriate libraries into the binary via CMake
   # if we have built contrib, so we don't need this
-- 
GitLab


From 721ab82745a113fb8cca4ce2b1f22d1d5ab5d546 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Mon, 1 Oct 2018 23:03:16 -0700
Subject: [PATCH 0200/1085] Loosen test bounds.

PiperOrigin-RevId: 215338403
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 6d1ead20be..9c02b69180 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,8 +131,8 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-8,
-          dtypes.float64: 1e-13,
+          dtypes.float32: 1e-7,
+          dtypes.float64: 1e-12,
       }[data_type]
 
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type)
-- 
GitLab


From 9884cb36290664593682d235ce0d5e1925e3fa23 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 1 Oct 2018 23:06:12 -0700
Subject: [PATCH 0201/1085] Check that IsValid{Input|Output}Tensor is only
 given non-control edges

PiperOrigin-RevId: 215338658
---
 tensorflow/core/graph/graph.cc | 4 ++--
 tensorflow/core/graph/graph.h  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 1630ab7a15..4c0cd14ff1 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -643,7 +643,7 @@ Status Graph::IsValidNode(const Node* node) const {
 
 Status Graph::IsValidOutputTensor(const Node* node, int idx) const {
   TF_RETURN_IF_ERROR(IsValidNode(node));
-  if (idx >= node->num_outputs()) {
+  if (idx >= node->num_outputs() || idx < 0) {
     return errors::OutOfRange("Node '", node->name(), "' (type: '",
                               node->op_def().name(),
                               "', num of outputs: ", node->num_outputs(),
@@ -654,7 +654,7 @@ Status Graph::IsValidOutputTensor(const Node* node, int idx) const {
 
 Status Graph::IsValidInputTensor(const Node* node, int idx) const {
   TF_RETURN_IF_ERROR(IsValidNode(node));
-  if (idx >= node->num_inputs()) {
+  if (idx >= node->num_inputs() || idx < 0) {
     return errors::OutOfRange("Node '", node->name(), "' (type: '",
                               node->op_def().name(),
                               "', num of inputs: ", node->num_inputs(),
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 52e9f23a76..72cef07072 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -590,12 +590,12 @@ class Graph {
   // Returns OK if `node` is non-null and belongs to this graph
   Status IsValidNode(const Node* node) const;
 
-  // Returns OK if IsValidNode(`node`) and `idx` is less than
-  // node->num_outputs()
+  // Returns OK if IsValidNode(`node`) and `idx` is a valid output.  Does not
+  // accept control outputs.
   Status IsValidOutputTensor(const Node* node, int idx) const;
 
-  // Returns OK if IsValidNode(`node`) and `idx` is less than
-  // node->num_inputs()
+  // Returns OK if IsValidNode(`node`) and `idx` a valid input.  Does not accept
+  // control inputs.
   Status IsValidInputTensor(const Node* node, int idx) const;
 
   // Create and return a new WhileContext owned by this graph. This is called
-- 
GitLab


From 38808119e9d5f8ad24bb414aab281e0fa3fde6dc Mon Sep 17 00:00:00 2001
From: Gautam <gautamrbharadwaj@gmail.com>
Date: Tue, 2 Oct 2018 11:56:06 +0530
Subject: [PATCH 0202/1085] Update backend.py

Adding missing import files in the commented examples. When trying out that particular example in commented section the TensorFlow and bumpy imports are missing
---
 tensorflow/python/keras/backend.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..79ca4beb73 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -773,6 +773,8 @@ def is_keras_tensor(x):
 
   Examples:
   ```python
+      >>> import tensorflow as tf
+      >>> import numpy
       >>> from keras import backend as K
       >>> from keras.layers import Input, Dense
       >>> np_var = numpy.array([1, 2])
-- 
GitLab


From 7830912c03fe3939120651574d33cec01bc73fcf Mon Sep 17 00:00:00 2001
From: Gautam <gautamrbharadwaj@gmail.com>
Date: Tue, 2 Oct 2018 12:00:14 +0530
Subject: [PATCH 0203/1085] Update backend.py

adding missing import numpy
---
 tensorflow/python/keras/backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..9c1581eef9 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -653,6 +653,7 @@ def variable(value, dtype=None, name=None, constraint=None):
 
   Examples:
   ```python
+      >>> import numpy as np
       >>> from keras import backend as K
       >>> val = np.array([[1, 2], [3, 4]])
       >>> kvar = K.variable(value=val, dtype='float64', name='example_var')
-- 
GitLab


From edea1be5dd98775399dbd12728e86039a14fb967 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 02:13:06 -0700
Subject: [PATCH 0204/1085] compat: Update forward compatibility horizon to
 2018-10-02

PiperOrigin-RevId: 215354927
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index bea5aa990f..3bb95b56c2 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 1)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 2)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 44da41e4900c3fd481f12c9aa4c49679c9f32fa4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 03:01:09 -0700
Subject: [PATCH 0205/1085] Fix layout assignment for cross module all reduce

Previously we could have ended up with the different HLOs being assigned
different layouts what made lowering impossible. This change enforces a
consistent layout between the communicating nodes the same way it is
done for send&recv pairs.

PiperOrigin-RevId: 215359420
---
 .../compiler/xla/service/layout_assignment.cc | 65 +++++++++++++++----
 .../xla/service/layout_assignment_test.cc     | 44 +++++++++++++
 2 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 082bf8bffe..25d5327561 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -498,6 +498,22 @@ Status LayoutAssignment::AddMandatoryConstraints(
         TF_RETURN_IF_ERROR(
             constraints->SetBufferLayout(new_shape.layout(), *buffer));
       }
+    } else if (instruction->IsCrossModuleAllReduce()) {
+      CHECK(get_channel_constraints(instruction))
+          << "Multi-module layout assignment requires ChannelLayoutConstraints";
+      int64 all_reduce_id = instruction->all_reduce_id().value();
+      if (!get_channel_constraints(instruction)
+               ->IsChannelConstrained(all_reduce_id)) {
+        continue;
+      }
+      // TODO(b/68493863): Change to use SetOperandLayout().
+      const Shape& buffer_shape = instruction->operand(0)->shape();
+      TF_RET_CHECK(ShapeUtil::IsArray(buffer_shape));
+      Shape new_buffer_shape =
+          get_channel_constraints(instruction)
+              ->LayoutShapeForChannel(buffer_shape, all_reduce_id);
+      TF_RETURN_IF_ERROR(
+          constraints->SetInstructionLayout(new_buffer_shape, instruction));
     }
   }
 
@@ -1512,19 +1528,6 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     // Verify all layouts in the shape have been set.
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
-
-  // Copy the root instruction's result if its layout does not match the result
-  // layout constraint.
-  if (constraints.ResultLayout() != nullptr &&
-      !constraints.ResultLayout()->MatchesLayoutInShape(
-          computation->root_instruction()->shape())) {
-    TF_ASSIGN_OR_RETURN(
-        HloInstruction * new_root,
-        CreateCopyWithNewLayout(constraints.ResultLayout()->shape(),
-                                computation->root_instruction()));
-    computation->set_root_instruction(new_root);
-  }
-
   return Status::OK();
 }
 
@@ -1654,6 +1657,18 @@ Status LayoutAssignment::RunOnComputation(
     TF_RETURN_IF_ERROR(
         ConstrainChannelLayouts(computation, channel_constraints));
   }
+
+  // Copy the root instruction's result if its layout does not match the result
+  // layout constraint.
+  if (constraints.ResultLayout() != nullptr &&
+      !constraints.ResultLayout()->MatchesLayoutInShape(
+          computation->root_instruction()->shape())) {
+    TF_ASSIGN_OR_RETURN(
+        HloInstruction * new_root,
+        CreateCopyWithNewLayout(constraints.ResultLayout()->shape(),
+                                computation->root_instruction()));
+    computation->set_root_instruction(new_root);
+  }
   return Status::OK();
 }
 
@@ -1709,6 +1724,30 @@ Status LayoutAssignment::ConstrainChannelLayouts(
             ShapeUtil::GetMutableSubshape(instruction->mutable_shape(), {0});
         *send_shape = shape;
       }
+    } else if (instruction->IsCrossModuleAllReduce()) {
+      const Layout* layout =
+          get_channel_constraints(instruction)
+              ->ConstrainChannel(instruction->all_reduce_id().value(),
+                                 instruction->shape().layout());
+      if (layout != nullptr) {
+        // We found an already constrained layout which does not match the one
+        // the channel wants to impose. Either add a new kCopy, or use the
+        // existing one to marshal the correct shape.
+        HloInstruction* operand = instruction->mutable_operand(0);
+        Shape shape = operand->shape();
+        *shape.mutable_layout() = *layout;
+        if (operand->opcode() != HloOpcode::kCopy) {
+          HloInstruction* copy = operand->parent()->AddInstruction(
+              HloInstruction::CreateUnary(shape, HloOpcode::kCopy, operand));
+          RegisterAddedCopy(copy);
+          SetupCopiedInstruction(*operand, copy, {});
+          TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(0, copy));
+          operand = copy;
+        } else {
+          *operand->mutable_shape() = shape;
+        }
+        *instruction->mutable_shape() = shape;
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 752a61476d..10f9a95121 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -860,6 +860,50 @@ TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) {
       ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0})));
 }
 
+TEST_F(LayoutAssignmentTest, AllReduceLayoutMissmatch) {
+  // Pin non matching layouts to parameter and root.
+  const char* module_str = R"(
+    HloModule test_module
+
+    add {
+      lhs = f32[] parameter(0)
+      rhs = f32[] parameter(1)
+      ROOT add = f32[] add(lhs, rhs)
+    }
+
+    ENTRY entry_computation {
+      param = (f32[2,2]) parameter(0)
+      gte = f32[2,2] get-tuple-element(param), index=0
+      ar.0 = f32[2,2] cross-replica-sum(gte),
+        all_reduce_id=0, replica_groups={{0}}, to_apply=add,
+        sharding={maximal device=0}
+      const = f32[2,2] constant(f32[2,2]{{0,1},{2,3}})
+      ROOT ar.1 = f32[2,2] cross-replica-sum(const),
+        all_reduce_id=0, replica_groups={{0}}, to_apply=add,
+        sharding={maximal device=1}
+    })";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  ComputationLayout computation_layout(
+      module->entry_computation()->ComputeProgramShape());
+  Shape param_shape = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {0, 1})});
+  TF_ASSERT_OK(
+      computation_layout.mutable_parameter_layout(0)->CopyLayoutFromShape(
+          param_shape));
+  computation_layout.mutable_result_layout()->ResetLayout(
+      LayoutUtil::MakeLayout({1, 0}));
+
+  ChannelLayoutConstraints channel_constraints;
+  AssignLayouts(module.get(), &computation_layout, &channel_constraints);
+
+  EXPECT_THAT(LayoutOf(module.get(), "gte"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(module.get(), "ar.0"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(module.get(), "ar.1"), ElementsAre(0, 1));
+  const HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root->shape().layout().minor_to_major(), ElementsAre(1, 0));
+}
+
 TEST_F(LayoutAssignmentTest, CopySliceOperandToAvoidImplicitLayoutChange) {
   const char* module_str = R"(
     HloModule CopySliceOperandToAvoidImplicitLayoutChange
-- 
GitLab


From f22037abf5a6f4581f5fb6013f72f91747f22965 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 03:36:14 -0700
Subject: [PATCH 0206/1085] Add a hint parameter to
 TransferLiteralToDeviceAsync that the implementation can use to accelerate
 transfers.

PiperOrigin-RevId: 215362667
---
 tensorflow/compiler/jit/xla_device_context.cc    | 15 +++++++++++----
 tensorflow/compiler/jit/xla_device_context.h     |  3 ++-
 .../xla/service/generic_transfer_manager.cc      |  2 +-
 .../xla/service/generic_transfer_manager.h       |  7 ++++---
 .../compiler/xla/service/transfer_manager.h      | 16 +++++++++++++++-
 5 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index af83c792e5..e083652978 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -75,8 +75,9 @@ XlaTransferManager::XlaTransferManager(
   }
 }
 
-Status XlaTransferManager::TransferLiteralToDevice(
-    const Tensor& host_tensor, Tensor* device_tensor) const {
+Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
+                                                   Tensor* device_tensor,
+                                                   bool buffer_is_fresh) const {
   xla::Shape xla_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
                                            host_tensor.shape(), &xla_shape));
@@ -97,8 +98,11 @@ Status XlaTransferManager::TransferLiteralToDevice(
     // synchronized.
     host_to_device_stream_->ThenWaitFor(stream_.get());
   }
+  xla::TransferManager::TransferToDeviceHint hint =
+      buffer_is_fresh ? xla::TransferManager::kBufferUndefined
+                      : xla::TransferManager::kNoHint;
   TF_RETURN_IF_ERROR(transfer_manager_->TransferLiteralToDeviceAsync(
-      host_to_device_stream_.get(), *literal, shaped_buffer));
+      host_to_device_stream_.get(), *literal, shaped_buffer, hint));
   if (UseMultipleStreams()) {
     auto event = std::make_shared<se::Event>(stream_->parent());
     TF_RET_CHECK(event->Init()) << "Event failed to initialize!";
@@ -165,6 +169,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     return;
   }
   TensorShape shape = shape_or_status.ValueOrDie();
+  bool buffer_is_fresh = false;
   if (!xla_tensor->has_shaped_buffer()) {
     Status s =
         xla_tensor->AllocateShapedBuffer(device_tensor->dtype(), shape, client_,
@@ -173,6 +178,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
       done(s);
       return;
     }
+    buffer_is_fresh = true;
   }
 
   Status status;
@@ -183,7 +189,8 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
           "Tensor::CopyFrom failed when copying from CPU to XLA device"));
       return;
     }
-    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor);
+    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor,
+                                     buffer_is_fresh);
   } else {
     se::DeviceMemoryBase dev_dst_ptr =
         XlaTensor::DeviceMemoryFromTensor(*device_tensor);
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index df82421294..a4c0c296fc 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -67,7 +67,8 @@ class XlaTransferManager {
 
  private:
   Status TransferLiteralToDevice(const Tensor& host_tensor,
-                                 Tensor* device_tensor) const;
+                                 Tensor* device_tensor,
+                                 bool buffer_is_fresh) const;
   void TransferLiteralFromDevice(Tensor* host_tensor,
                                  const Tensor& device_tensor,
                                  const StatusCallback& done) const;
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index bec02e14f9..f92fde7f46 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -98,7 +98,7 @@ Status GenericTransferManager::TransferLiteralFromDeviceInternal(
 
 Status GenericTransferManager::TransferLiteralToDeviceAsync(
     se::Stream* stream, const LiteralSlice& literal,
-    const ShapedBuffer& device_buffer) {
+    const ShapedBuffer& device_buffer, TransferToDeviceHint /*hint*/) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
           << ShapeUtil::HumanString(shape)
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 86c8b1c145..b1cba82b9f 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -45,9 +45,10 @@ class GenericTransferManager : public TransferManager {
                                  MutableBorrowingLiteral literal,
                                  std::function<void(Status)> done) override;
 
-  Status TransferLiteralToDeviceAsync(
-      se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer) override;
+  Status TransferLiteralToDeviceAsync(se::Stream* stream,
+                                      const LiteralSlice& literal,
+                                      const ShapedBuffer& device_buffer,
+                                      TransferToDeviceHint hint) override;
 
   Status TransferLiteralToInfeed(se::StreamExecutor* executor,
                                  const LiteralSlice& literal) override;
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index f952e64af2..9199e32d0f 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -89,6 +89,16 @@ class TransferManager {
                                          const LiteralSlice& literal,
                                          const ShapedBuffer& device_buffer);
 
+  // Hint type given to TransferLiteralToDeviceAsync.
+  enum TransferToDeviceHint {
+    // No hint available.
+    kNoHint,
+
+    // The destination buffer is undefined on the device, meaning it can be
+    // transferred to eagerly rather than waiting for Stream ordering.
+    kBufferUndefined,
+  };
+
   // Transfers the given literal into the previously allocated device memory
   // represented by the given ShapedBuffer using the given executor. The shape
   // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
@@ -96,9 +106,13 @@ class TransferManager {
   //
   // This operation is performed asynchronously on the given stream. It returns
   // once the transfer is enqueued.
+  //
+  // The optional hint can allow implementations to optimize transfers. It is
+  // not mandatory for an implementation to obey the hint.
   virtual Status TransferLiteralToDeviceAsync(
       se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer) = 0;
+      const ShapedBuffer& device_buffer,
+      TransferToDeviceHint hint = kNoHint) = 0;
 
   // Convenience methods for transferring an array to or from the device at a
   // known address. This avoids having to construct a ShapedBuffer just to
-- 
GitLab


From 35f3046a326daea0179d024044636f2fcbb45f4a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 05:18:28 -0700
Subject: [PATCH 0207/1085] Export endpoint for the version of the
 `regex_replace` function that calls StaticRegexReplace.

PiperOrigin-RevId: 215371291
---
 .../python_api/api_def_RegexReplace.pbtxt     |  8 +-----
 tensorflow/python/ops/string_ops.py           | 25 +++++++++++--------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt b/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
index b17806b338..5020844204 100644
--- a/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_RegexReplace.pbtxt
@@ -1,10 +1,4 @@
 op {
   graph_op_name: "RegexReplace"
-  endpoint {
-    name: "strings.regex_replace"
-  }
-  endpoint {
-    name: "regex_replace"
-    deprecated: true
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index e83c08f643..0812f901a2 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -46,6 +46,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 # pylint: disable=redefined-builtin
+@tf_export("strings.regex_full_match")
 def regex_full_match(input, pattern, name=None):
   r"""Match elements of `input` with regex `pattern`.
 
@@ -73,15 +74,14 @@ def regex_full_match(input, pattern, name=None):
 
 regex_full_match.__doc__ = gen_string_ops.regex_full_match.__doc__
 
-# Expose regex_full_match in strings namespace
-tf_export("strings.regex_full_match")(regex_full_match)
 
-
-def regex_replace(source, pattern, rewrite, replace_global=True):
-  r"""Replace elements of `source` matching regex `pattern` with `rewrite`.
+@tf_export("strings.regex_replace", "regex_replace")
+@deprecation.deprecated_endpoints("regex_replace")
+def regex_replace(input, pattern, rewrite, replace_global=True, name=None):
+  r"""Replace elements of `input` matching regex `pattern` with `rewrite`.
 
   Args:
-    source: string `Tensor`, the source strings to process.
+    input: string `Tensor`, the source strings to process.
     pattern: string or scalar string `Tensor`, regular expression to use,
       see more details at https://github.com/google/re2/wiki/Syntax
     rewrite: string or scalar string `Tensor`, value to use in match
@@ -89,9 +89,10 @@ def regex_replace(source, pattern, rewrite, replace_global=True):
       text matching corresponding parenthesized group.
     replace_global: `bool`, if `True` replace all non-overlapping matches,
       else replace only the first match.
+    name: A name for the operation (optional).
 
   Returns:
-    string `Tensor` of the same shape as `source` with specified replacements.
+    string `Tensor` of the same shape as `input` with specified replacements.
   """
   if (isinstance(pattern, util_compat.bytes_or_text_types) and
       isinstance(rewrite, util_compat.bytes_or_text_types)):
@@ -99,11 +100,13 @@ def regex_replace(source, pattern, rewrite, replace_global=True):
     # use a version which performs the expensive regex compilation once at
     # creation time.
     return gen_string_ops.static_regex_replace(
-        input=source, pattern=pattern,
-        rewrite=rewrite, replace_global=replace_global)
+        input=input, pattern=pattern,
+        rewrite=rewrite, replace_global=replace_global,
+        name=name)
   return gen_string_ops.regex_replace(
-      input=source, pattern=pattern,
-      rewrite=rewrite, replace_global=replace_global)
+      input=input, pattern=pattern,
+      rewrite=rewrite, replace_global=replace_global,
+      name=name)
 
 
 @tf_export("strings.format")
-- 
GitLab


From 97d515273a1e86a861cdfb338671a42b3b1126a7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 2 Oct 2018 07:34:40 -0700
Subject: [PATCH 0208/1085] Make
 StatelessRandomOpsTest.testRandomNormalIsFinite actually test
 stateless_random_normal.

Fixes #22611

PiperOrigin-RevId: 215385610
---
 tensorflow/compiler/tests/stateless_random_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index f3861043b2..e8741bc468 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -91,7 +91,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
     with self.cached_session() as sess, self.test_scope():
       for dtype in self._random_types():
         seed_t = array_ops.placeholder(dtypes.int32, shape=[2])
-        x = stateless.stateless_random_uniform(
+        x = stateless.stateless_random_normal(
             shape=[10000], seed=seed_t, dtype=dtype)
         y = sess.run(x, {seed_t: [0x12345678, 0xabcdef12]})
         self.assertTrue(np.all(np.isfinite(y)))
-- 
GitLab


From 1a56a3299e904d5a3352a3a15e4cf7401f72bbc3 Mon Sep 17 00:00:00 2001
From: joe yearsley <joe@kheironmed.com>
Date: Tue, 2 Oct 2018 16:33:37 +0100
Subject: [PATCH 0209/1085] Updated ordering for kwargs

---
 tensorflow/python/layers/core.py                       | 6 +++---
 tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt | 2 +-
 tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index 5919fa543e..e06e9aba4a 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -292,17 +292,17 @@ class Flatten(keras_layers.Flatten, base.Layer):
 
 
 @tf_export('layers.flatten')
-def flatten(inputs, data_format='channels_last', name=None):
+def flatten(inputs, name=None, data_format='channels_last'):
   """Flattens an input tensor while preserving the batch axis (axis 0).
 
   Arguments:
     inputs: Tensor input.
+    name: The name of the layer (string).
     data_format: A string, one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
       `(batch, height, width, channels)` while `channels_first` corresponds to
       inputs with shape `(batch, channels, height, width)`.
-    name: The name of the layer (string).
 
   Returns:
     Reshaped tensor.
@@ -319,7 +319,7 @@ def flatten(inputs, data_format='channels_last', name=None):
     # now `y` has shape `(None, None)`
   ```
   """
-  layer = Flatten(data_format=data_format, name=name)
+  layer = Flatten(name=name, data_format=data_format)
   return layer.apply(inputs)
 
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
index 5d9ea2e5a3..0c24e9c7dd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+    argspec: "args=[\'inputs\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'channels_last\'], "
   }
   member_method {
     name: "max_pooling1d"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
index 5d9ea2e5a3..0c24e9c7dd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.layers.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "flatten"
-    argspec: "args=[\'inputs\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'channels_last\', \'None\'], "
+    argspec: "args=[\'inputs\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'channels_last\'], "
   }
   member_method {
     name: "max_pooling1d"
-- 
GitLab


From 28757ad658243526d84fd16d53b9eefbf809c6ff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 08:30:36 -0700
Subject: [PATCH 0210/1085] Use xlogy in a few places in TFP to avoid NaN's for
 certain special cases.

PiperOrigin-RevId: 215392621
---
 .../kernel_tests/distributions/beta_test.py     |  5 +++++
 .../distributions/dirichlet_test.py             | 17 +++++++++++++++++
 .../distributions/exponential_test.py           |  7 +++++++
 .../kernel_tests/distributions/gamma_test.py    |  8 ++++++++
 tensorflow/python/ops/distributions/beta.py     |  4 ++--
 .../python/ops/distributions/dirichlet.py       |  2 +-
 tensorflow/python/ops/distributions/gamma.py    |  2 +-
 7 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/distributions/beta_test.py b/tensorflow/python/kernel_tests/distributions/beta_test.py
index d580a415dd..42e81bd658 100644
--- a/tensorflow/python/kernel_tests/distributions/beta_test.py
+++ b/tensorflow/python/kernel_tests/distributions/beta_test.py
@@ -167,6 +167,11 @@ class BetaTest(test.TestCase):
     self.assertAllClose([[1., 3. / 2], [3. / 2, 15. / 8]], self.evaluate(pdf))
     self.assertEqual((2, 2), pdf.get_shape())
 
+  def testLogPdfOnBoundaryIsFiniteWhenAlphaIsOne(self):
+    b = [[0.01, 0.1, 1., 2], [5., 10., 2., 3]]
+    pdf = self.evaluate(beta_lib.Beta(1., b).prob(0.))
+    self.assertAllEqual(np.ones_like(pdf, dtype=np.bool), np.isfinite(pdf))
+
   def testBetaMean(self):
     a = [1., 2, 3]
     b = [2., 4, 1.2]
diff --git a/tensorflow/python/kernel_tests/distributions/dirichlet_test.py b/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
index cace5b3ba2..0f96382453 100644
--- a/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
+++ b/tensorflow/python/kernel_tests/distributions/dirichlet_test.py
@@ -83,6 +83,23 @@ class DirichletTest(test.TestCase):
     with self.assertRaisesOpError("sample last-dimension must sum to `1`"):
       self.evaluate(dist.prob([.1, .2, .8]))
 
+  def testLogPdfOnBoundaryIsFiniteWhenAlphaIsOne(self):
+    # Test concentration = 1. for each dimension.
+    concentration = 3 * np.ones((10, 10)).astype(np.float32)
+    concentration[range(10), range(10)] = 1.
+    x = 1 / 9. * np.ones((10, 10)).astype(np.float32)
+    x[range(10), range(10)] = 0.
+    dist = dirichlet_lib.Dirichlet(concentration)
+    log_prob = self.evaluate(dist.log_prob(x))
+    self.assertAllEqual(
+        np.ones_like(log_prob, dtype=np.bool), np.isfinite(log_prob))
+
+    # Test when concentration[k] = 1., and x is zero at various dimensions.
+    dist = dirichlet_lib.Dirichlet(10 * [1.])
+    log_prob = self.evaluate(dist.log_prob(x))
+    self.assertAllEqual(
+        np.ones_like(log_prob, dtype=np.bool), np.isfinite(log_prob))
+
   def testPdfZeroBatches(self):
     alpha = [1., 2]
     x = [.5, .5]
diff --git a/tensorflow/python/kernel_tests/distributions/exponential_test.py b/tensorflow/python/kernel_tests/distributions/exponential_test.py
index 367f8bb0f1..1600387585 100644
--- a/tensorflow/python/kernel_tests/distributions/exponential_test.py
+++ b/tensorflow/python/kernel_tests/distributions/exponential_test.py
@@ -65,6 +65,13 @@ class ExponentialTest(test.TestCase):
     self.assertAllClose(self.evaluate(log_pdf), expected_log_pdf)
     self.assertAllClose(self.evaluate(pdf), np.exp(expected_log_pdf))
 
+  def testExponentialLogPDFBoundary(self):
+    # Check that Log PDF is finite at 0.
+    rate = np.array([0.1, 0.5, 1., 2., 5., 10.], dtype=np.float32)
+    exponential = exponential_lib.Exponential(rate=rate)
+    log_pdf = exponential.log_prob(0.)
+    self.assertAllClose(np.log(rate), self.evaluate(log_pdf))
+
   def testExponentialCDF(self):
     batch_size = 6
     lam = constant_op.constant([2.0] * batch_size)
diff --git a/tensorflow/python/kernel_tests/distributions/gamma_test.py b/tensorflow/python/kernel_tests/distributions/gamma_test.py
index 4eff40b029..4c5b9c3ea3 100644
--- a/tensorflow/python/kernel_tests/distributions/gamma_test.py
+++ b/tensorflow/python/kernel_tests/distributions/gamma_test.py
@@ -77,6 +77,14 @@ class GammaTest(test.TestCase):
     self.assertAllClose(self.evaluate(log_pdf), expected_log_pdf)
     self.assertAllClose(self.evaluate(pdf), np.exp(expected_log_pdf))
 
+  def testGammaLogPDFBoundary(self):
+    # When concentration = 1, we have an exponential distribution. Check that at
+    # 0 we have finite log prob.
+    rate = np.array([0.1, 0.5, 1., 2., 5., 10.], dtype=np.float32)
+    gamma = gamma_lib.Gamma(concentration=1., rate=rate)
+    log_pdf = gamma.log_prob(0.)
+    self.assertAllClose(np.log(rate), self.evaluate(log_pdf))
+
   def testGammaLogPDFMultidimensional(self):
     batch_size = 6
     alpha = constant_op.constant([[2.0, 4.0]] * batch_size)
diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py
index 2ba1ea6744..d6f89a3517 100644
--- a/tensorflow/python/ops/distributions/beta.py
+++ b/tensorflow/python/ops/distributions/beta.py
@@ -267,8 +267,8 @@ class Beta(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return ((self.concentration1 - 1.) * math_ops.log(x)
-            + (self.concentration0 - 1.) * math_ops.log1p(-x))
+    return (math_ops.xlogy(self.concentration1 - 1., x) +
+            (self.concentration0 - 1.) * math_ops.log1p(-x))
 
   def _log_normalization(self):
     return (math_ops.lgamma(self.concentration1)
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 415249a958..997b1d392d 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -236,7 +236,7 @@ class Dirichlet(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return math_ops.reduce_sum((self.concentration - 1.) * math_ops.log(x), -1)
+    return math_ops.reduce_sum(math_ops.xlogy(self.concentration - 1., x), -1)
 
   def _log_normalization(self):
     return special_math_ops.lbeta(self.concentration)
diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py
index 3293cda874..bbc64da7bc 100644
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@@ -225,7 +225,7 @@ class Gamma(distribution.Distribution):
 
   def _log_unnormalized_prob(self, x):
     x = self._maybe_assert_valid_sample(x)
-    return (self.concentration - 1.) * math_ops.log(x) - self.rate * x
+    return math_ops.xlogy(self.concentration - 1., x) - self.rate * x
 
   def _log_normalization(self):
     return (math_ops.lgamma(self.concentration)
-- 
GitLab


From 13643287a535581c133de529e3b02942ef7dd730 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 2 Oct 2018 18:46:11 +0300
Subject: [PATCH 0211/1085] Fix merge artifacts: replace Dataset by
 DatasetSource in Ignite Dataset.

---
 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index cfe59b6b23..288d485320 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -688,7 +688,7 @@ class IgniteClient(TcpClient):
         "Unknown binary type when expected string [type_id=%d]" % header)
 
 
-class IgniteDataset(dataset_ops.Dataset):
+class IgniteDataset(dataset_ops.DatasetSource):
   """Apache Ignite is a memory-centric distributed database, caching, and
 
      processing platform for transactional, analytical, and streaming workloads,
-- 
GitLab


From 7d66a720acb756291adc99ebe444c2c00bd37d84 Mon Sep 17 00:00:00 2001
From: Anton Dmitriev <dmitrievanthony@gmail.com>
Date: Tue, 2 Oct 2018 18:57:07 +0300
Subject: [PATCH 0212/1085] Remove Ignite Dataset SSL tests by internal policy.

---
 .../python/tests/ignite_dataset_test.py       | 36 -------------------
 1 file changed, 36 deletions(-)

diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
index 1856a4fba8..ef29b5f14a 100644
--- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
+++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py
@@ -46,42 +46,6 @@ class IgniteDatasetTest(test.TestCase):
     ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300)
     self._check_dataset(ds)
 
-  def test_ignite_dataset_with_ssl_client(self):
-    """Test Ignite Dataset with ssl client.
-
-    """
-    self._clear_env()
-    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
-        os.path.realpath(__file__)) + "/keystore/client.pem"
-    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
-
-    ds = IgniteDataset(
-        cache_name="SQL_PUBLIC_TEST_CACHE",
-        port=42301,
-        certfile=os.environ["IGNITE_DATASET_CERTFILE"],
-        cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"])
-    self._check_dataset(ds)
-
-  def test_ignite_dataset_with_ssl_client_and_auth(self):
-    """Test Ignite Dataset with ssl client and authentication.
-
-    """
-    self._clear_env()
-    os.environ["IGNITE_DATASET_USERNAME"] = "ignite"
-    os.environ["IGNITE_DATASET_PASSWORD"] = "ignite"
-    os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname(
-        os.path.realpath(__file__)) + "/keystore/client.pem"
-    os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456"
-
-    ds = IgniteDataset(
-        cache_name="SQL_PUBLIC_TEST_CACHE",
-        port=42302,
-        certfile=os.environ["IGNITE_DATASET_CERTFILE"],
-        cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"],
-        username=os.environ["IGNITE_DATASET_USERNAME"],
-        password=os.environ["IGNITE_DATASET_PASSWORD"])
-    self._check_dataset(ds)
-
   def _clear_env(self):
     """Clears environment variables used by Ignite Dataset.
 
-- 
GitLab


From ce41d2f95e1e5883f1808030c94fd9aaa57d9f10 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 09:32:20 -0700
Subject: [PATCH 0213/1085] Generate an error when --rnn_states refers to array
 names that aren't produced/consumed by any op.

PiperOrigin-RevId: 215402308
---
 .../resolve_multiply_by_zero.cc               | 14 ++++-----
 .../contrib/lite/toco/model_cmdline_flags.cc  | 18 ++++++++----
 tensorflow/contrib/lite/toco/tooling_util.cc  | 29 +++++++++++++++----
 3 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
index 4bb1217828..b2b2ea151b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
@@ -60,6 +60,10 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   const auto& output_array_name = mul_op->outputs[0];
   auto& output_array = model->GetArray(output_array_name);
 
+  if (!IsDiscardableArray(*model, output_array_name)) {
+    return false;
+  }
+
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
     return false;
@@ -139,14 +143,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   }
 
   // Erase input arrays to the multiply if no longer used
-  if (IsDiscardableArray(*model, mul_op->inputs[0]) &&
-      CountOpsWithInput(*model, mul_op->inputs[0]) == 1) {
-    model->EraseArray(mul_op->inputs[0]);
-  }
-  if (IsDiscardableArray(*model, mul_op->inputs[1]) &&
-      CountOpsWithInput(*model, mul_op->inputs[1]) == 1) {
-    model->EraseArray(mul_op->inputs[1]);
-  }
+  DeleteArrayIfUsedOnce(mul_op->inputs[0], model);
+  DeleteArrayIfUsedOnce(mul_op->inputs[1], model);
 
   // Erase the multiply operator.
   model->operators.erase(mul_it);
diff --git a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
index d34da63e43..b6a401aaf2 100644
--- a/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
+++ b/tensorflow/contrib/lite/toco/model_cmdline_flags.cc
@@ -394,12 +394,18 @@ void ReadModelFlagsFromCommandLineFlags(
     }
   }
 
-  model_flags->set_allow_nonascii_arrays(
-      parsed_model_flags.allow_nonascii_arrays.value());
-  model_flags->set_allow_nonexistent_arrays(
-      parsed_model_flags.allow_nonexistent_arrays.value());
-  model_flags->set_change_concat_input_ranges(
-      parsed_model_flags.change_concat_input_ranges.value());
+  if (!model_flags->has_allow_nonascii_arrays()) {
+    model_flags->set_allow_nonascii_arrays(
+        parsed_model_flags.allow_nonascii_arrays.value());
+  }
+  if (!model_flags->has_allow_nonexistent_arrays()) {
+    model_flags->set_allow_nonexistent_arrays(
+        parsed_model_flags.allow_nonexistent_arrays.value());
+  }
+  if (!model_flags->has_change_concat_input_ranges()) {
+    model_flags->set_change_concat_input_ranges(
+        parsed_model_flags.change_concat_input_ranges.value());
+  }
 
   if (parsed_model_flags.arrays_extra_info_file.specified()) {
     string arrays_extra_info_file_contents;
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 4a1ae35cb5..b87e01fbf0 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -843,24 +843,40 @@ void CheckNonAsciiIOArrays(const ModelFlags& model_flags) {
 }
 
 void CheckNonExistentIOArrays(const Model& model) {
+  // "non-existent" is interpreted in the stronger sense of
+  // "not actually produced/consumed by an op".
+  // Rationale: we have to artificially fix up TensorFlow graphs by creating
+  // any array that it refers to, so just checking that arrays exist isn't
+  // sufficient. The real invariant here is whether arrays are produced/consumed
+  // by something.
   if (model.flags.allow_nonexistent_arrays()) {
     return;
   }
   for (const auto& input_array : model.flags.input_arrays()) {
-    CHECK(model.HasArray(input_array.name()))
-        << "Input array not found: " << input_array.name();
+    QCHECK(GetOpWithInput(model, input_array.name()))
+        << "Specified input array " << input_array.name()
+        << " is not consumed by any op in this graph. Is it a typo?";
   }
   for (const string& output_array : model.flags.output_arrays()) {
-    CHECK(model.HasArray(output_array))
-        << "Output array not found: " << output_array;
+    QCHECK(GetOpWithOutput(model, output_array))
+        << "Specified output array " << output_array
+        << " is not produced by any op in this graph. Is it a typo?";
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
     if (!rnn_state.discardable()) {
-      CHECK(model.HasArray(rnn_state.state_array()));
-      CHECK(model.HasArray(rnn_state.back_edge_source_array()));
+      // Check that all RNN states are consumed
+      QCHECK(GetOpWithInput(model, rnn_state.state_array()))
+          << "Specified RNN state " << rnn_state.state_array()
+          << " is not consumed by any op in this graph. Is it a typo?";
+      // Check that all RNN back-edge source arrays are produced
+      QCHECK(GetOpWithOutput(model, rnn_state.back_edge_source_array()))
+          << "Specified RNN back-edge source array "
+          << rnn_state.back_edge_source_array()
+          << " is not produced by any op in this graph. Is it a typo?";
     }
   }
 }
+
 }  // namespace
 
 void CheckNoMissingArray(const Model& model) {
@@ -1597,6 +1613,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       input_array.GetOrCreateMinMax() = input_minmax;
     }
   }
+
   // Creation of the RNN state arrays
   for (const auto& rnn_state : model->flags.rnn_states()) {
     CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(),
-- 
GitLab


From dd66b78b38b457c7d37527472c4e92a7a07f4b09 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 2 Oct 2018 10:15:11 -0700
Subject: [PATCH 0214/1085] [XLA] Fix some outdated comments referring to
 FlatMap

Also convert unordered_map to flat/node_hash_map where the comments allow.

PiperOrigin-RevId: 215410566
---
 tensorflow/compiler/xla/service/BUILD                | 2 +-
 tensorflow/compiler/xla/service/allocation_tracker.h | 5 +----
 tensorflow/compiler/xla/service/gpu/BUILD            | 1 +
 tensorflow/compiler/xla/service/gpu/nvptx_compiler.h | 9 +++++----
 tensorflow/compiler/xla/service/hlo_evaluator.h      | 5 +++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 13803f5ebe..3f8b734afb 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -253,8 +253,8 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
-        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
+        "@com_google_absl//absl/container:node_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index af227fe4da..43feccee3c 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -124,10 +124,7 @@ class AllocationTracker {
   int64 next_handle_ GUARDED_BY(mutex_);
 
   // A map from device ordinal to AllocationMap.
-  //
-  // This is not a TF FlatMap because (currently) FlatMap (and therefore
-  // AllocationMap) is not movable.
-  std::unordered_map<int, AllocationMap> opaque_to_allocation_map_
+  absl::flat_hash_map<int, AllocationMap> opaque_to_allocation_map_
       GUARDED_BY(mutex_);
 
   // A map from data handle to a vector of shaped buffers that represent the
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index a838464cae..522e9f5948 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -718,6 +718,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core:stream_executor_no_cuda",
+        "@com_google_absl//absl/container:node_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index 8e97774750..c4a0b727cd 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/node_hash_map.h"
 #include "absl/types/optional.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/executable.h"
@@ -140,10 +141,10 @@ class NVPTXCompiler : public LLVMCompiler {
     tensorflow::condition_variable compilation_done_cv_;
   };
 
-  // Don't even think about switching this to FlatMap; iterator stability is
-  // critical here.
-  std::unordered_map<CompilationCacheKey, CompilationCacheValue,
-                     CompilationCacheHash, CompilationCacheEq>
+  // Don't even think about switching this to flat_hash_map; iterator stability
+  // is critical here.
+  absl::node_hash_map<CompilationCacheKey, CompilationCacheValue,
+                      CompilationCacheHash, CompilationCacheEq>
       compilation_cache_ GUARDED_BY(mutex_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(NVPTXCompiler);
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 6c2662ebae..2b0792616e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "absl/container/node_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
@@ -210,8 +211,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // post-orderring.
   // Must be cleared for each evaluation.
   // Storing Literal in place require the container to have pointer stability so
-  // we cannot use FlatMap any more.
-  std::unordered_map<const HloInstruction*, Literal> evaluated_;
+  // we cannot use flat_hash_map any more.
+  absl::node_hash_map<const HloInstruction*, Literal> evaluated_;
 
  private:
   template <typename ReturnT, typename NativeT>
-- 
GitLab


From feb0dc87078698fd335b528c661c54226a58efa9 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 2 Oct 2018 11:30:04 -0700
Subject: [PATCH 0215/1085] Remove dependency on contrib model_variable.

Also remove add_arg_scope.

PiperOrigin-RevId: 215426187
---
 tensorflow/contrib/quantize/BUILD             |  1 -
 .../contrib/quantize/python/quant_ops.py      | 28 +++++++++++++------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD
index 23e3a25d71..94a2d9672d 100644
--- a/tensorflow/contrib/quantize/BUILD
+++ b/tensorflow/contrib/quantize/BUILD
@@ -138,7 +138,6 @@ py_library(
     srcs = ["python/quant_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/framework:framework_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:init_ops",
diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py
index 27069444a4..d9dc7fa62e 100644
--- a/tensorflow/contrib/quantize/python/quant_ops.py
+++ b/tensorflow/contrib/quantize/python/quant_ops.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework.python.ops import add_arg_scope
-from tensorflow.contrib.framework.python.ops import model_variable
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
@@ -29,7 +27,6 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.training import moving_averages
 
 
-@add_arg_scope
 def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
   """Adds a fake quantize layer with fixed quantization interval.
 
@@ -46,7 +43,21 @@ def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
         inputs, min=init_min, max=init_max)
 
 
-@add_arg_scope
+def _ModelVariable(name,
+                   shape=None,
+                   initializer=None,
+                   collections=None,
+                   trainable=None):
+  collections = list(collections or [])
+  collections += [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES]
+  return variable_scope.get_variable(
+      name,
+      shape=shape,
+      initializer=initializer,
+      collections=collections,
+      trainable=trainable)
+
+
 def LastValueQuantize(inputs,
                       per_channel=False,
                       init_min=-6.0,
@@ -93,13 +104,13 @@ def LastValueQuantize(inputs,
     else:
       min_max_shape = []
 
-    min_var = model_variable(
+    min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
         collections=[vars_collection],
         trainable=False)
-    max_var = model_variable(
+    max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
@@ -153,7 +164,6 @@ def LastValueQuantize(inputs,
         narrow_range=narrow_range)
 
 
-@add_arg_scope
 def MovingAvgQuantize(inputs,
                       per_channel=False,
                       init_min=-6.0,
@@ -202,13 +212,13 @@ def MovingAvgQuantize(inputs,
     else:
       min_max_shape = []
 
-    min_var = model_variable(
+    min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
         collections=[vars_collection],
         trainable=False)
-    max_var = model_variable(
+    max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
-- 
GitLab


From b4c23d661228b549186dc82c16ecb22d261becf6 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 2 Oct 2018 11:40:08 -0700
Subject: [PATCH 0216/1085] [XLA] Replace the last FlatMap in XLA with a simple
 array.

A hash map for 18 pointers is just a waste of space.

PiperOrigin-RevId: 215428176
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc |  2 +-
 tensorflow/compiler/xla/service/hlo_evaluator.h  | 10 ++--------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index d7c39b2778..eec8d242fa 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1378,7 +1378,7 @@ Status HloEvaluator::HandleReduce(HloInstruction* reduce) {
             "unsupported");
       }
     }
-    return reduce->Visit(typed_visitors_.at(first_element_type).get());
+    return reduce->Visit(typed_visitors_[first_element_type].get());
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 2b0792616e..07f8d0aad4 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace xla {
@@ -135,7 +134,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // Wraps around instruction handling to infer types before dispatching to
   // the corresponding typed Visitor.
   Status DefaultAction(HloInstruction* hlo) override {
-    return hlo->Visit(typed_visitors_.at(hlo->shape().element_type()).get());
+    return hlo->Visit(typed_visitors_[hlo->shape().element_type()].get());
   }
 
   Status Preprocess(HloInstruction* hlo) override;
@@ -242,12 +241,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   }
 
   // Map from a primitive type to its associated (templated) DfsHloVisitor.
-  // Note: the hash function here is only needed because current gcc std::hash
-  // does not specialize for enum types. This should however be fixed in the
-  // future: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60970#c5
-  tensorflow::gtl::FlatMap<PrimitiveType, std::unique_ptr<DfsHloVisitor>,
-                           std::hash<int>>
-      typed_visitors_;
+  std::unique_ptr<DfsHloVisitor> typed_visitors_[PrimitiveType_ARRAYSIZE];
 
   // Caches pointers to input literals, assuming they are in post-order.
   // Literals are not owned by this class, and they must outlive the lifetime of
-- 
GitLab


From 16b44d48d485dbb62b9922e172df4cc460174046 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 2 Oct 2018 12:14:58 -0700
Subject: [PATCH 0217/1085] Fix the case when an object may have multiple
 directives with the same annotation.

PiperOrigin-RevId: 215435613
---
 tensorflow/python/autograph/core/BUILD        |  47 ++++---
 tensorflow/python/autograph/core/converter.py |  53 ++++----
 .../python/autograph/core/converter_test.py   | 124 ++++++++++++++++++
 3 files changed, 184 insertions(+), 40 deletions(-)
 create mode 100644 tensorflow/python/autograph/core/converter_test.py

diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 843e381f31..3ab2e7b1bc 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -33,6 +33,35 @@ py_library(
     ],
 )
 
+py_library(
+    name = "test_lib",
+    srcs = [
+        "converter_testing.py",
+    ],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:__subpackages__"],
+    deps = [
+        ":core",
+        "//tensorflow/python/autograph/operators",
+        "//tensorflow/python/autograph/pyct",
+        "//tensorflow/python/autograph/pyct/static_analysis",
+        "//tensorflow/python/autograph/utils",
+        "@gast_archive//:gast",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "converter_test",
+    srcs = ["converter_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":core",
+        ":test_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "errors_test",
     srcs = ["errors_test.py"],
@@ -67,21 +96,3 @@ py_test(
         "//tensorflow/python:client_testlib",
     ],
 )
-
-py_library(
-    name = "test_lib",
-    srcs = [
-        "converter_testing.py",
-    ],
-    srcs_version = "PY2AND3",
-    visibility = ["//tensorflow:__subpackages__"],
-    deps = [
-        ":core",
-        "//tensorflow/python/autograph/operators",
-        "//tensorflow/python/autograph/pyct",
-        "//tensorflow/python/autograph/pyct/static_analysis",
-        "//tensorflow/python/autograph/utils",
-        "@gast_archive//:gast",
-        "@six_archive//:six",
-    ],
-)
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 80928ae7f4..408a573ad0 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -210,14 +210,22 @@ class Base(transformer.Base):
     self._ast_depth = 0
 
   def get_definition_directive(self, node, directive, arg, default):
-    """Returns the unique directive for a symbol, or a default if none exist.
+    """Returns the unique directive argument for a symbol.
 
     See lang/directives.py for details on directives.
 
+    Example:
+       # Given a directive in the code:
+       ag.foo_directive(bar, baz=1)
+
+       # One can write for an AST node Name(id='bar'):
+       get_definition_directive(node, ag.foo_directive, 'baz')
+
     Args:
-      node: ast.AST
-      directive: Callable[..., Any]
-      arg: str
+      node: ast.AST, the node representing the symbol for which the directive
+        argument is needed.
+      directive: Callable[..., Any], the directive to search.
+      arg: str, the directive argument to return.
       default: Any
 
     Raises:
@@ -227,27 +235,28 @@ class Base(transformer.Base):
     if not defs:
       return default
 
-    # TODO(mdan): Simplify this.
-    arg_values = []
+    arg_values_found = []
     for def_ in defs:
-      if (directive not in def_.directives or
-          arg not in def_.directives[directive]):
-        continue
-      arg_value = def_.directives[directive][arg]
-      for prev_value in arg_values:
-        if not ast_util.matches(arg_value, prev_value):
-          qn = anno.getanno(node, anno.Basic.QN)
-          raise ValueError('%s has ambiguous annotations for %s(%s): %s, %s' %
-                           (qn, directive.__name__, arg,
-                            compiler.ast_to_source(arg_value).strip(),
-                            compiler.ast_to_source(prev_value).strip()))
-      arg_values.append(arg_value)
-
-    if not arg_values:
+      if (directive in def_.directives and arg in def_.directives[directive]):
+        arg_values_found.append(def_.directives[directive][arg])
+
+    if not arg_values_found:
       return default
 
-    arg_value, = arg_values
-    return arg_value
+    if len(arg_values_found) == 1:
+      return arg_values_found[0]
+
+    # If multiple annotations reach the symbol, they must all match. If they do,
+    # return any of them.
+    first_value = arg_values_found[0]
+    for other_value in arg_values_found[1:]:
+      if not ast_util.matches(first_value, other_value):
+        qn = anno.getanno(node, anno.Basic.QN)
+        raise ValueError('%s has ambiguous annotations for %s(%s): %s, %s' %
+                         (qn, directive.__name__, arg,
+                          compiler.ast_to_source(other_value).strip(),
+                          compiler.ast_to_source(first_value).strip()))
+    return first_value
 
   def visit(self, node):
     if not self._ast_depth:
diff --git a/tensorflow/python/autograph/core/converter_test.py b/tensorflow/python/autograph/core/converter_test.py
new file mode 100644
index 0000000000..b73c67e337
--- /dev/null
+++ b/tensorflow/python/autograph/core/converter_test.py
@@ -0,0 +1,124 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for lists module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.core import converter_testing
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.platform import test
+
+
+class TestConverter(converter.Base):
+  pass
+
+
+class ConverterBaseTest(converter_testing.TestCase):
+
+  def test_get_definition_directive_basic(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[1].value
+    defs, = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs.directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('bar'),
+    }
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       None)
+    self.assertEqual(value.id, 'foo')
+
+  def test_get_definition_directive_default(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[1].value
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       parser.parse_expression('default'))
+    self.assertEqual(value.id, 'default')
+
+  def test_get_definition_directive_multiple_consistent(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      if a:
+        a = 2
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[2].value
+    defs = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs[0].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('bar'),
+    }
+    defs[1].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+        'other_arg': parser.parse_expression('baz'),
+    }
+    c = TestConverter(ctx)
+    value = c.get_definition_directive(symbol_a, directive_key, 'test_arg',
+                                       None)
+    self.assertEqual(value.id, 'foo')
+
+  def test_get_definition_directive_multiple_inconsistent(self):
+
+    directive_key = object
+
+    def test_fn():
+      a = 1
+      if a:
+        a = 2
+      return a
+
+    ns = {}
+    node, ctx = self.prepare(test_fn, ns)
+    symbol_a = node.body[2].value
+    defs = anno.getanno(symbol_a, anno.Static.ORIG_DEFINITIONS)
+    defs[0].directives[directive_key] = {
+        'test_arg': parser.parse_expression('foo'),
+    }
+    defs[1].directives[directive_key] = {
+        'test_arg': parser.parse_expression('bar'),
+    }
+    c = TestConverter(ctx)
+    with self.assertRaises(ValueError):
+      c.get_definition_directive(symbol_a, directive_key, 'test_arg', None)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 8d4ef71f06a06a093419bf0f80562a1941059029 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 2 Oct 2018 12:15:36 -0700
Subject: [PATCH 0218/1085] Allow creating a list from a tensor. Fix a few
 inconsistencies in the tensor list constructors.

PiperOrigin-RevId: 215435720
---
 .../autograph/lang/special_functions.py       | 24 ++++++++++--
 .../autograph/lang/special_functions_test.py  | 37 ++++++++++++++++++-
 .../autograph/operators/data_structures.py    | 17 ++++++++-
 .../operators/data_structures_test.py         | 31 ++++++++++++++--
 4 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/autograph/lang/special_functions.py b/tensorflow/python/autograph/lang/special_functions.py
index e4838d1b6d..62ac018ac4 100644
--- a/tensorflow/python/autograph/lang/special_functions.py
+++ b/tensorflow/python/autograph/lang/special_functions.py
@@ -24,6 +24,26 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.autograph.operators import data_structures
+from tensorflow.python.framework import tensor_util
+
+
+def _validate_list_constructor(elements, element_dtype, element_shape):
+  """Validates the inputs of tensor_list."""
+  if element_dtype is not None and element_shape is not None:
+    return
+  if tensor_util.is_tensor(elements):
+    return
+  if isinstance(elements, (list, tuple)):
+    if elements:
+      return
+    else:
+      raise ValueError(
+          'element_dtype and element_shape are required when elements are'
+          ' empty')
+
+  raise ValueError(
+      'unknown type for elements: {}; only Tensor, list and tuple are'
+      ' allowed'.format(type(elements)))
 
 
 def tensor_list(elements,
@@ -52,9 +72,7 @@ def tensor_list(elements,
   Raises:
     ValueError: for invalid arguments
   """
-  if not (elements or (element_dtype and element_shape)):
-    raise ValueError(
-        'element_dtype and element_shape are required for empty lists')
+  _validate_list_constructor(elements, element_dtype, element_shape)
   if use_tensor_array:
     return data_structures.tf_tensor_array_new(elements, element_dtype,
                                                element_shape)
diff --git a/tensorflow/python/autograph/lang/special_functions_test.py b/tensorflow/python/autograph/lang/special_functions_test.py
index 545dd11729..206a32d07c 100644
--- a/tensorflow/python/autograph/lang/special_functions_test.py
+++ b/tensorflow/python/autograph/lang/special_functions_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.autograph.lang import special_functions
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -28,12 +30,43 @@ from tensorflow.python.platform import test
 
 class SpecialFunctionsTest(test.TestCase):
 
+  def test_tensor_list_empty_list(self):
+    l = special_functions.tensor_list([],
+                                      element_dtype=dtypes.int32,
+                                      element_shape=())
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+    l = special_functions.tensor_list((),
+                                      element_dtype=dtypes.int32,
+                                      element_shape=())
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+  def test_tensor_list_tensor(self):
+    l = special_functions.tensor_list(
+        constant_op.constant([], dtype=dtypes.int32))
+    sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(sl), [])
+
+  def test_tensor_list_unsupported_initializer(self):
+    with self.assertRaisesRegexp(ValueError, 'unknown type'):
+      special_functions.tensor_list(np.array([1, 2, 3]))
+
+  def test_tensor_list_empty_list_no_type(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'element_dtype and element_shape are required'):
+      special_functions.tensor_list([])
+
   def test_tensor_list_from_elements(self):
     elements = [constant_op.constant([1, 2]), constant_op.constant([3, 4])]
 
     l = special_functions.tensor_list(elements)
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
-    with self.cached_session() as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_tensor_list_array_from_elements(self):
@@ -41,7 +74,7 @@ class SpecialFunctionsTest(test.TestCase):
 
     l = special_functions.tensor_list(elements, use_tensor_array=True)
     sl = l.stack()
-    with self.cached_session() as sess:
+    with self.test_session() as sess:
       self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
 
   def test_stack(self):
diff --git a/tensorflow/python/autograph/operators/data_structures.py b/tensorflow/python/autograph/operators/data_structures.py
index cc0a3c3544..b3a3851333 100644
--- a/tensorflow/python/autograph/operators/data_structures.py
+++ b/tensorflow/python/autograph/operators/data_structures.py
@@ -106,6 +106,14 @@ def tf_tensor_array_new(elements, element_dtype=None, element_shape=None):
 
 def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
   """Overload of new_list that stages a Tensor list creation."""
+  if tensor_util.is_tensor(elements):
+    if element_shape is not None:
+      raise ValueError(
+          'element shape may not be specified when creating list from tensor')
+    element_shape = array_ops.shape(elements)[1:]
+    l = list_ops.tensor_list_from_tensor(elements, element_shape=element_shape)
+    return l
+
   elements = tuple(ops.convert_to_tensor(el) for el in elements)
 
   all_dtypes = set(el.dtype for el in elements)
@@ -115,13 +123,15 @@ def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
       raise ValueError(
           'incompatible dtype; specified: {}, inferred from {}: {}'.format(
               element_dtype, elements, inferred_dtype))
-  else:
+  elif all_dtypes:
     # Heterogeneous lists are ok.
     if element_dtype is not None:
       raise ValueError(
           'specified dtype {} is inconsistent with that of elements {}'.format(
               element_dtype, elements))
     inferred_dtype = dtypes.variant
+  else:
+    inferred_dtype = dtypes.variant
 
   all_shapes = set(tuple(el.shape.as_list()) for el in elements)
   if len(all_shapes) == 1:
@@ -130,19 +140,22 @@ def tf_tensor_list_new(elements, element_dtype=None, element_shape=None):
       raise ValueError(
           'incompatible shape; specified: {}, inferred from {}: {}'.format(
               element_shape, elements, inferred_shape))
-  else:
+  elif all_shapes:
     # Heterogeneous lists are ok.
     if element_shape is not None:
       raise ValueError(
           'specified shape {} is inconsistent with that of elements {}'.format(
               element_shape, elements))
     inferred_shape = constant_op.constant(-1)  # unknown shape, by convention
+  else:
+    inferred_shape = constant_op.constant(-1)  # unknown shape, by convention
 
   if element_dtype is None:
     element_dtype = inferred_dtype
   if element_shape is None:
     element_shape = inferred_shape
 
+  element_shape = ops.convert_to_tensor(element_shape, dtype=dtypes.int32)
   l = list_ops.empty_tensor_list(
       element_shape=element_shape, element_dtype=element_dtype)
   for el in elements:
diff --git a/tensorflow/python/autograph/operators/data_structures_test.py b/tensorflow/python/autograph/operators/data_structures_test.py
index 8532dbe466..6039b07982 100644
--- a/tensorflow/python/autograph/operators/data_structures_test.py
+++ b/tensorflow/python/autograph/operators/data_structures_test.py
@@ -45,6 +45,20 @@ class ListTest(test.TestCase):
     with self.cached_session() as sess:
       self.assertAllEqual(sess.run(t), [3, 4, 5])
 
+  def test_tf_tensor_list_new_empty(self):
+    l = data_structures.tf_tensor_list_new([],
+                                           element_dtype=dtypes.int32,
+                                           element_shape=())
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.cached_session() as sess:
+      self.assertAllEqual(sess.run(t), [])
+
+  def test_tf_tensor_list_new_from_tensor(self):
+    l = data_structures.tf_tensor_list_new(constant_op.constant([3, 4, 5]))
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.cached_session() as sess:
+      self.assertAllEqual(sess.run(t), [3, 4, 5])
+
   def test_tf_tensor_list_new_illegal_input(self):
     with self.assertRaises(ValueError):
       data_structures.tf_tensor_list_new([3, 4.0])
@@ -56,9 +70,8 @@ class ListTest(test.TestCase):
     with self.assertRaises(ValueError):
       data_structures.tf_tensor_list_new([3, 4], element_shape=(2,))
     with self.assertRaises(ValueError):
-      data_structures.tf_tensor_list_new([], element_shape=(2,))
-    with self.assertRaises(ValueError):
-      data_structures.tf_tensor_list_new([], element_dtype=dtypes.float32)
+      data_structures.tf_tensor_list_new(
+          constant_op.constant([1, 2, 3]), element_shape=[1])
 
   def test_tf_tensor_array_new(self):
     l = data_structures.tf_tensor_array_new([3, 4, 5])
@@ -141,6 +154,18 @@ class ListTest(test.TestCase):
       t = data_structures.list_stack(l, opts)
       self.assertAllEqual(sess.run(t), sess.run(initial_list))
 
+  def test_stack_tensor_list_empty(self):
+    l = list_ops.empty_tensor_list(
+        element_shape=-1,
+        element_dtype=dtypes.variant)
+
+    opts = data_structures.ListStackOpts(
+        element_dtype=dtypes.int32, original_call=None)
+
+    # TODO(mdan): Allow stacking empty lists if the dtype and shape are known.
+    with self.assertRaises(ValueError):
+      data_structures.list_stack(l, opts)
+
   def test_stack_fallback(self):
 
     def dummy_function(l):
-- 
GitLab


From d3e830e608211bc81cfb111abe3c0357bd92a12e Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 12:38:53 -0700
Subject: [PATCH 0219/1085] Disable fused_conv tests that don't build in
 open-source.

PiperOrigin-RevId: 215440356
---
 tensorflow/contrib/fused_conv/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index 490da9b33b..57a5bfbf43 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -145,6 +145,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
     ],
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "no_pip",
         "requires-gpu-sm70",
     ],
@@ -169,6 +170,7 @@ cuda_py_test(
     ],
     main = "python/ops/fused_conv2d_bias_activation_benchmark.py",
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "requires-gpu-sm70",
     ],
 )
-- 
GitLab


From 508dd179b6b6dd78aa3e24212648789e8fc018a0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 12:41:31 -0700
Subject: [PATCH 0220/1085] Allow passing --allow_nonexistent_arrays via
 toco_convert

PiperOrigin-RevId: 215440829
---
 tensorflow/contrib/lite/python/convert.py    |  8 +++++++-
 tensorflow/contrib/lite/toco/tooling_util.cc | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 613a1530f7..1bf42d7551 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -155,7 +155,8 @@ def build_toco_convert_protos(input_tensors,
                               post_training_quantize=False,
                               dump_graphviz_dir=None,
                               dump_graphviz_video=False,
-                              converter_mode=ConverterMode.DEFAULT):
+                              converter_mode=ConverterMode.DEFAULT,
+                              allow_nonexistent_arrays=False):
   """Builds protocol buffers describing a conversion of a model using TOCO.
 
   Typically this is to convert from TensorFlow GraphDef to TFLite, in which
@@ -212,6 +213,8 @@ def build_toco_convert_protos(input_tensors,
       every graph transformation. (default False)
     converter_mode: Experimental flag, subject to change. ConverterMode
       indicating which converter to use. (default ConverterMode.DEFAULT)
+    allow_nonexistent_arrays: Allow specifying array names that don't exist
+      or are unused in the final graph.  (default False)
 
   Returns:
     model_flags, toco_flags: two protocol buffers describing the conversion
@@ -261,6 +264,9 @@ def build_toco_convert_protos(input_tensors,
 
   for output_tensor in output_tensors:
     model.output_arrays.append(tensor_name(output_tensor))
+
+  model.allow_nonexistent_arrays = allow_nonexistent_arrays
+
   return model, toco
 
 
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index b87e01fbf0..e3f27e9e2a 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -852,27 +852,30 @@ void CheckNonExistentIOArrays(const Model& model) {
   if (model.flags.allow_nonexistent_arrays()) {
     return;
   }
+  static constexpr char general_comment[] =
+      "Is it a typo? To silence this message, pass this flag:  "
+      "allow_nonexistent_arrays";
   for (const auto& input_array : model.flags.input_arrays()) {
     QCHECK(GetOpWithInput(model, input_array.name()))
-        << "Specified input array " << input_array.name()
-        << " is not consumed by any op in this graph. Is it a typo?";
+        << "Specified input array \"" << input_array.name()
+        << "\" is not consumed by any op in this graph. " << general_comment;
   }
   for (const string& output_array : model.flags.output_arrays()) {
     QCHECK(GetOpWithOutput(model, output_array))
-        << "Specified output array " << output_array
-        << " is not produced by any op in this graph. Is it a typo?";
+        << "Specified output array \"" << output_array
+        << "\" is not produced by any op in this graph. " << general_comment;
   }
   for (const auto& rnn_state : model.flags.rnn_states()) {
     if (!rnn_state.discardable()) {
       // Check that all RNN states are consumed
       QCHECK(GetOpWithInput(model, rnn_state.state_array()))
-          << "Specified RNN state " << rnn_state.state_array()
-          << " is not consumed by any op in this graph. Is it a typo?";
+          << "Specified RNN state \"" << rnn_state.state_array()
+          << "\" is not consumed by any op in this graph. " << general_comment;
       // Check that all RNN back-edge source arrays are produced
       QCHECK(GetOpWithOutput(model, rnn_state.back_edge_source_array()))
-          << "Specified RNN back-edge source array "
+          << "Specified RNN back-edge source array \""
           << rnn_state.back_edge_source_array()
-          << " is not produced by any op in this graph. Is it a typo?";
+          << "\" is not produced by any op in this graph. " << general_comment;
     }
   }
 }
-- 
GitLab


From 0a201955b47d484c6bfa149364c264a5b5f91be7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 12:47:05 -0700
Subject: [PATCH 0221/1085] Copy tf.distributions to tfp.distributions, and
 deprecate the tf.distributions API.

PiperOrigin-RevId: 215441733
---
 .../python/debug/examples/examples_test.sh    |  2 +-
 tensorflow/python/ops/distributions/BUILD     |  7 ++++++
 .../python/ops/distributions/bernoulli.py     |  9 +++++++
 tensorflow/python/ops/distributions/beta.py   | 14 +++++++++++
 .../python/ops/distributions/categorical.py   |  9 +++++++
 .../python/ops/distributions/dirichlet.py     |  9 +++++++
 .../distributions/dirichlet_multinomial.py    |  9 +++++++
 .../python/ops/distributions/distribution.py  | 17 +++++++++++++
 .../python/ops/distributions/exponential.py   | 13 ++++++++++
 tensorflow/python/ops/distributions/gamma.py  | 14 +++++++++++
 .../ops/distributions/identity_bijector.py    |  9 +++++++
 .../ops/distributions/kullback_leibler.py     | 25 +++++++++++++++++++
 .../python/ops/distributions/laplace.py       | 14 +++++++++++
 .../python/ops/distributions/multinomial.py   |  9 +++++++
 tensorflow/python/ops/distributions/normal.py | 14 +++++++++++
 .../python/ops/distributions/student_t.py     | 14 +++++++++++
 .../distributions/transformed_distribution.py |  9 +++++++
 .../python/ops/distributions/uniform.py       |  9 +++++++
 18 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh
index f7d597c8c0..89dc918616 100755
--- a/tensorflow/python/debug/examples/examples_test.sh
+++ b/tensorflow/python/debug/examples/examples_test.sh
@@ -115,7 +115,7 @@ OUTPUT=$(${OFFLINE_ANALYZER_BIN} 2>&1)
 set -e
 
 EXPECTED_OUTPUT="ERROR: dump_dir flag is empty."
-if [[ "${OUTPUT}" != "${EXPECTED_OUTPUT}" ]]; then
+if ! echo "${OUTPUT}" | grep -q "${EXPECTED_OUTPUT}"; then
   echo "ERROR: offline_analyzer output didn't match expectation: ${OUTPUT}" 1>&2
   echo "Expected output: ${EXPECTED_OUTPUT}"
   exit 1
diff --git a/tensorflow/python/ops/distributions/BUILD b/tensorflow/python/ops/distributions/BUILD
index e7ad028376..59ba9aee59 100644
--- a/tensorflow/python/ops/distributions/BUILD
+++ b/tensorflow/python/ops/distributions/BUILD
@@ -12,6 +12,13 @@ py_library(
         ["*.py"],
         exclude = ["util.py"],
     ),
+    deprecation = ("TensorFlow Distributions has migrated to " +
+                   "TensorFlow Probability " +
+                   "(https://github.com/tensorflow/probability). " +
+                   "Deprecated copies remaining in tf.distributions " +
+                   "will not receive new features, and will be removed by " +
+                   "early 2019. You should update all usage of " +
+                   "`tf.distributions` to `tfp.distributions`."),
     srcs_version = "PY2AND3",
     deps = [
         ":util",
diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py
index 84d9d40a35..baecc321d3 100644
--- a/tensorflow/python/ops/distributions/bernoulli.py
+++ b/tensorflow/python/ops/distributions/bernoulli.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -39,6 +40,14 @@ class Bernoulli(distribution.Distribution):
   `1` outcome (vs a `0` outcome).
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                logits=None,
                probs=None,
diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py
index d6f89a3517..51c4f6eb3d 100644
--- a/tensorflow/python/ops/distributions/beta.py
+++ b/tensorflow/python/ops/distributions/beta.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -150,6 +151,14 @@ class Beta(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration1=None,
                concentration0=None,
@@ -341,6 +350,11 @@ class Beta(distribution.Distribution):
 class BetaWithSoftplusConcentration(Beta):
   """Beta with softplus transform of `concentration1` and `concentration0`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Beta(tf.nn.softplus(concentration1), "
+      "tf.nn.softplus(concentration2))` instead.",
+      warn_once=True)
   def __init__(self,
                concentration1,
                concentration0,
diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py
index fbbacf2521..26a3da2fb6 100644
--- a/tensorflow/python/ops/distributions/categorical.py
+++ b/tensorflow/python/ops/distributions/categorical.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -149,6 +150,14 @@ class Categorical(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(
       self,
       logits=None,
diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py
index 997b1d392d..675c30b383 100644
--- a/tensorflow/python/ops/distributions/dirichlet.py
+++ b/tensorflow/python/ops/distributions/dirichlet.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -156,6 +157,14 @@ class Dirichlet(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration,
                validate_args=False,
diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
index 5350c82847..2e3151a5ab 100644
--- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py
+++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -163,6 +164,14 @@ class DirichletMultinomial(distribution.Distribution):
 
   # TODO(b/27419586) Change docstring for dtype of concentration once int
   # allowed.
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                total_count,
                concentration,
diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py
index 12fd039392..4741370cd8 100644
--- a/tensorflow/python/ops/distributions/distribution.py
+++ b/tensorflow/python/ops/distributions/distribution.py
@@ -34,6 +34,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
@@ -229,6 +230,14 @@ class ReparameterizationType(object):
     gradients / surrogate loss instead.
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, rep_type):
     self._rep_type = rep_type
 
@@ -405,6 +414,14 @@ class Distribution(_BaseDistribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                dtype,
                reparameterization_type,
diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py
index 02129b5e2a..6a52af8c33 100644
--- a/tensorflow/python/ops/distributions/exponential.py
+++ b/tensorflow/python/ops/distributions/exponential.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import gamma
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -70,6 +71,14 @@ class Exponential(gamma.Gamma):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                rate,
                validate_args=False,
@@ -138,6 +147,10 @@ class Exponential(gamma.Gamma):
 class ExponentialWithSoftplusRate(Exponential):
   """Exponential with softplus transform on `rate`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Exponential(tf.nn.softplus(rate)).",
+      warn_once=True)
   def __init__(self,
                rate,
                validate_args=False,
diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py
index bbc64da7bc..4a2db208d4 100644
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -121,6 +122,14 @@ class Gamma(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                concentration,
                rate,
@@ -279,6 +288,11 @@ class Gamma(distribution.Distribution):
 class GammaWithSoftplusConcentrationRate(Gamma):
   """`Gamma` with softplus of `concentration` and `rate`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Gamma(tf.nn.softplus(concentration), "
+      "tf.nn.softplus(rate))` instead.",
+      warn_once=True)
   def __init__(self,
                concentration,
                rate,
diff --git a/tensorflow/python/ops/distributions/identity_bijector.py b/tensorflow/python/ops/distributions/identity_bijector.py
index 8628e68f96..eded96f5bc 100644
--- a/tensorflow/python/ops/distributions/identity_bijector.py
+++ b/tensorflow/python/ops/distributions/identity_bijector.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops.distributions import bijector
+from tensorflow.python.util import deprecation
 
 
 __all__ = [
@@ -43,6 +44,14 @@ class Identity(bijector.Bijector):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, validate_args=False, name="identity"):
     super(Identity, self).__init__(
         forward_min_event_ndims=0,
diff --git a/tensorflow/python/ops/distributions/kullback_leibler.py b/tensorflow/python/ops/distributions/kullback_leibler.py
index fdeb97bf64..12743fa23d 100644
--- a/tensorflow/python/ops/distributions/kullback_leibler.py
+++ b/tensorflow/python/ops/distributions/kullback_leibler.py
@@ -22,6 +22,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
@@ -51,6 +52,14 @@ def _registered_kl(type_a, type_b):
   return kl_fn
 
 
+@deprecation.deprecated(
+    "2019-01-01",
+    "The TensorFlow Distributions library has moved to "
+    "TensorFlow Probability "
+    "(https://github.com/tensorflow/probability). You "
+    "should update all references to use `tfp.distributions` "
+    "instead of `tf.distributions`.",
+    warn_once=True)
 @tf_export("distributions.kl_divergence")
 def kl_divergence(distribution_a, distribution_b,
                   allow_nan_stats=True, name=None):
@@ -112,6 +121,14 @@ def kl_divergence(distribution_a, distribution_b,
       return array_ops.identity(kl_t, name="checked_kl")
 
 
+@deprecation.deprecated(
+    "2019-01-01",
+    "The TensorFlow Distributions library has moved to "
+    "TensorFlow Probability "
+    "(https://github.com/tensorflow/probability). You "
+    "should update all references to use `tfp.distributions` "
+    "instead of `tf.distributions`.",
+    warn_once=True)
 def cross_entropy(ref, other,
                   allow_nan_stats=True, name=None):
   """Computes the (Shannon) cross entropy.
@@ -155,6 +172,14 @@ class RegisterKL(object):
     # Return KL(norm_a || norm_b)
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self, dist_cls_a, dist_cls_b):
     """Initialize the KL registrar.
 
diff --git a/tensorflow/python/ops/distributions/laplace.py b/tensorflow/python/ops/distributions/laplace.py
index be17cf2527..4f6a8f587d 100644
--- a/tensorflow/python/ops/distributions/laplace.py
+++ b/tensorflow/python/ops/distributions/laplace.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import special_math
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -71,6 +72,14 @@ class Laplace(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
@@ -211,6 +220,11 @@ class Laplace(distribution.Distribution):
 class LaplaceWithSoftplusScale(Laplace):
   """Laplace with softplus applied to `scale`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Laplace(loc, tf.nn.softplus(scale)) "
+      "instead.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index d0943e8eee..8397353cd5 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -148,6 +149,14 @@ class Multinomial(distribution.Distribution):
   ```
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                total_count,
                logits=None,
diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py
index 2feaf806c0..9f511709b9 100644
--- a/tensorflow/python/ops/distributions/normal.py
+++ b/tensorflow/python/ops/distributions/normal.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import kullback_leibler
 from tensorflow.python.ops.distributions import special_math
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -106,6 +107,14 @@ class Normal(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
@@ -240,6 +249,11 @@ class Normal(distribution.Distribution):
 class NormalWithSoftplusScale(Normal):
   """Normal with softplus applied to `scale`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.Normal(loc, tf.nn.softplus(scale)) "
+      "instead.",
+      warn_once=True)
   def __init__(self,
                loc,
                scale,
diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py
index e8d214bbe0..b69e61925c 100644
--- a/tensorflow/python/ops/distributions/student_t.py
+++ b/tensorflow/python/ops/distributions/student_t.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -140,6 +141,14 @@ class StudentT(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                df,
                loc,
@@ -361,6 +370,11 @@ class StudentT(distribution.Distribution):
 class StudentTWithAbsDfSoftplusScale(StudentT):
   """StudentT with `df = floor(abs(df))` and `scale = softplus(scale)`."""
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "Use `tfd.StudentT(tf.floor(tf.abs(df)), loc, "
+      "tf.nn.softplus(scale)) instead.",
+      warn_once=True)
   def __init__(self,
                df,
                loc,
diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py
index e80bf9ee42..1becfc1877 100644
--- a/tensorflow/python/ops/distributions/transformed_distribution.py
+++ b/tensorflow/python/ops/distributions/transformed_distribution.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import distribution as distribution_lib
 from tensorflow.python.ops.distributions import identity_bijector
 from tensorflow.python.ops.distributions import util as distribution_util
+from tensorflow.python.util import deprecation
 
 __all__ = [
     "TransformedDistribution",
@@ -227,6 +228,14 @@ class TransformedDistribution(distribution_lib.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                distribution,
                bijector=None,
diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py
index e66c4a37e7..b6b24187cc 100644
--- a/tensorflow/python/ops/distributions/uniform.py
+++ b/tensorflow/python/ops/distributions/uniform.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -76,6 +77,14 @@ class Uniform(distribution.Distribution):
 
   """
 
+  @deprecation.deprecated(
+      "2019-01-01",
+      "The TensorFlow Distributions library has moved to "
+      "TensorFlow Probability "
+      "(https://github.com/tensorflow/probability). You "
+      "should update all references to use `tfp.distributions` "
+      "instead of `tf.distributions`.",
+      warn_once=True)
   def __init__(self,
                low=0.,
                high=1.,
-- 
GitLab


From 78e4ce52aeda5a10ddaf5e64ea8958f439a2f9f2 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Tue, 2 Oct 2018 13:08:39 -0700
Subject: [PATCH 0222/1085] Add proto serialization/deserialization testing to
 the HLO parser tests. Many of the HLO parser tests verify that an text form
 of an HLO module preserves all information when running through ToString then
 parsing. It makes sense to also use these tests to exercise proto
 serialization/deserialization. This is done by adding additional
 instantiations of the parameterized parsing tests. This caught several bugs
 which are fixed in this CL:

(1) Domain instructions were not being serialized properly.
(2) Host send/recv instructions did not preserve the is_host_transfer bit.
(3) Sparse literals could not be serialized or deserialized.

PiperOrigin-RevId: 215445200
---
 tensorflow/compiler/xla/literal.cc            | 18 ++++
 tensorflow/compiler/xla/literal_test.cc       | 10 +++
 tensorflow/compiler/xla/service/BUILD         | 20 +----
 tensorflow/compiler/xla/service/hlo.proto     |  6 +-
 .../compiler/xla/service/hlo_instruction.cc   | 33 +++++--
 .../compiler/xla/service/hlo_instructions.cc  | 21 +++++
 .../compiler/xla/service/hlo_instructions.h   |  3 +
 .../compiler/xla/service/hlo_parser_test.cc   | 85 +++++++++++++------
 8 files changed, 141 insertions(+), 55 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 5035f41988..d1dad0d45f 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -1850,6 +1850,24 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) {
   TF_RET_CHECK(LayoutUtil::HasLayout(proto.shape()));
   TF_RET_CHECK(ShapeUtil::Equal(proto.shape(), subshape()));
 
+  if (LayoutUtil::IsSparseArray(subshape())) {
+    // Compute the number of elements (indices) in the sparse shape and reserve
+    // the necessary space in spare_indices.
+    TF_RET_CHECK(ShapeUtil::Rank(subshape()) != 0)
+        << "Scalar shapes cannot be sparse";
+    TF_RET_CHECK(proto.sparse_indices_size() % ShapeUtil::Rank(subshape()) == 0)
+        << "Unexpected number of indices in proto ("
+        << proto.sparse_indices_size() << ") for shape of rank "
+        << ShapeUtil::Rank(subshape());
+    const int64 index_count =
+        proto.sparse_indices_size() / ShapeUtil::Rank(subshape());
+    sparse_indices()->Resize(index_count);
+
+    // Copy the indices from the proto into the SparseIndexArray object.
+    TF_RETURN_IF_ERROR(CopyFromRepeatedField(sparse_indices()->mutable_data(),
+                                             proto.sparse_indices()));
+  }
+
   switch (subshape().element_type()) {
     case PRED:
       TF_RETURN_IF_ERROR(CopyFromRepeatedField(data<bool>(), proto.preds()));
diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc
index 7ad287c897..dd5b54e4c9 100644
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@@ -224,6 +224,16 @@ TEST_F(LiteralUtilTest, CreateSparse) {
             absl::Span<const int64>(expected_indices.data(),
                                     expected_indices.num_elements()));
   EXPECT_EQ(literal.data<int64>(), absl::Span<const int64>(expected_values));
+
+  // Serialize then deserialize and verify the resulting literal.
+  TF_ASSERT_OK_AND_ASSIGN(Literal literal_from_proto,
+                          Literal::CreateFromProto(literal.ToProto()));
+
+  EXPECT_EQ(literal_from_proto.sparse_indices()->data(),
+            absl::Span<const int64>(expected_indices.data(),
+                                    expected_indices.num_elements()));
+  EXPECT_EQ(literal_from_proto.data<int64>(),
+            absl::Span<const int64>(expected_values));
 }
 
 TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 3f8b734afb..f329a27e14 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -300,6 +300,7 @@ cc_library(
         "hlo_opcode.cc",
         "hlo_schedule.cc",
         "hlo_sharding.cc",
+        "hlo_sharding_metadata.cc",
     ],
     hdrs = [
         "dfs_hlo_visitor.h",
@@ -313,6 +314,7 @@ cc_library(
         "hlo_opcode.h",
         "hlo_schedule.h",
         "hlo_sharding.h",
+        "hlo_sharding_metadata.h",
     ],
     deps = [
         ":hlo_casting_utils",
@@ -2759,22 +2761,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "hlo_sharding_metadata",
-    srcs = ["hlo_sharding_metadata.cc"],
-    hdrs = [
-        "hlo_sharding_metadata.h",
-    ],
-    deps = [
-        ":hlo",
-        "//tensorflow/compiler/xla:shape_tree",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
 cc_library(
     name = "hlo_domain_verifier",
     srcs = ["hlo_domain_verifier.cc"],
@@ -2825,7 +2811,6 @@ tf_cc_test(
         ":hlo_domain_isolator",
         ":hlo_domain_remover",
         ":hlo_parser",
-        ":hlo_sharding_metadata",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla/legacy_flags:debug_options_flags",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
@@ -3441,7 +3426,6 @@ cc_library(
     deps = [
         ":hlo",
         ":hlo_lexer",
-        ":hlo_sharding_metadata",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index caaca16f71..1ea26ddd5b 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 54
+// Next ID: 56
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -180,6 +180,10 @@ message HloInstructionProto {
 
   // Collective permute field.
   repeated SourceTarget source_target_pairs = 52;
+
+  // Sharding for kDomain instructions.
+  xla.OpSharding domain_entry_sharding = 54;
+  xla.OpSharding domain_exit_sharding = 55;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 0207f9ae3f..de22b2d3a5 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h"
 #include "tensorflow/compiler/xla/service/name_uniquer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -467,14 +468,27 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           proto.dot_dimension_numbers(), precision_config);
       break;
     }
-    case HloOpcode::kDomain:
+    case HloOpcode::kDomain: {
       TF_RET_CHECK(proto.operand_ids_size() == 1)
           << "Domain instruction should have 1 operands but sees "
           << proto.operand_ids_size();
+      TF_RET_CHECK(proto.has_domain_entry_sharding())
+          << "Domain instruction must domain_entry_sharding";
+      TF_RET_CHECK(proto.has_domain_exit_sharding())
+          << "Domain instruction must domain_exit_sharding";
+      TF_ASSIGN_OR_RETURN(
+          HloSharding entry_hlo_sharding,
+          HloSharding::FromProto(proto.domain_entry_sharding()));
+      TF_ASSIGN_OR_RETURN(HloSharding exit_hlo_sharding,
+                          HloSharding::FromProto(proto.domain_exit_sharding()));
       instruction = absl::make_unique<HloDomainInstruction>(
-          proto.shape(), operands(0), /*operand_side_metadata=*/nullptr,
-          /*user_side_metadata=*/nullptr);
+          proto.shape(), operands(0),
+          absl::make_unique<ShardingMetadata>(
+              std::make_shared<const HloSharding>(entry_hlo_sharding)),
+          absl::make_unique<ShardingMetadata>(
+              std::make_shared<const HloSharding>(exit_hlo_sharding)));
       break;
+    }
     default: {
       instruction = absl::WrapUnique(new HloInstruction(opcode, proto.shape()));
       for (const int64 operand_id : proto.operand_ids()) {
@@ -482,12 +496,6 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
             << "No instruction with id " << operand_id;
         instruction->AppendOperand(instruction_map.at(operand_id));
       }
-      for (const int64 predecessor_id : proto.control_predecessor_ids()) {
-        TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id))
-            << "No instruction with id " << predecessor_id;
-        TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id)
-                               ->AddControlDependencyTo(instruction.get()));
-      }
       if (instruction->opcode() != HloOpcode::kFusion) {
         for (const int64 computation_id : proto.called_computation_ids()) {
           TF_RET_CHECK(ContainsKey(computation_map, computation_id))
@@ -503,6 +511,13 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     }
   }
 
+  for (const int64 predecessor_id : proto.control_predecessor_ids()) {
+    TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id))
+        << "No instruction with id " << predecessor_id;
+    TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id)
+                           ->AddControlDependencyTo(instruction.get()));
+  }
+
   TF_RET_CHECK(!proto.name().empty());
   instruction->SetAndSanitizeName(proto.name());
   instruction->metadata_ = proto.metadata();
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 1bc168c8b7..68d0979f5c 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding_metadata.h"
 #include "tensorflow/compiler/xla/window_util.h"
 
 namespace xla {
@@ -213,6 +214,7 @@ HloSendRecvInstruction::HloSendRecvInstruction(HloOpcode opcode,
 HloInstructionProto HloSendRecvInstruction::ToProto() const {
   HloInstructionProto proto = HloInstruction::ToProto();
   proto.set_channel_id(channel_id_);
+  proto.set_is_host_transfer(is_host_transfer_);
   return proto;
 }
 
@@ -2310,4 +2312,23 @@ std::unique_ptr<HloInstruction> HloDomainInstruction::CloneWithNewOperandsImpl(
       shape, new_operands[0], operand_side_metadata_->Clone(),
       user_side_metadata_->Clone());
 }
+
+HloInstructionProto HloDomainInstruction::ToProto() const {
+  HloInstructionProto proto = HloInstruction::ToProto();
+  auto operand_side_sharding =
+      dynamic_cast<const ShardingMetadata*>(operand_side_metadata_.get());
+  if (operand_side_sharding) {
+    *proto.mutable_domain_entry_sharding() =
+        operand_side_sharding->sharding()->ToProto();
+  }
+
+  auto user_side_sharding =
+      dynamic_cast<const ShardingMetadata*>(user_side_metadata_.get());
+  if (user_side_sharding) {
+    *proto.mutable_domain_exit_sharding() =
+        user_side_sharding->sharding()->ToProto();
+  }
+
+  return proto;
+}
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 9c22f5db7e..c929867bb9 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1341,6 +1341,9 @@ class HloDomainInstruction : public HloInstruction {
       std::unique_ptr<DomainMetadata> operand_side_metadata,
       std::unique_ptr<DomainMetadata> user_side_metadata);
 
+  // Returns a serialized representation of this instruction.
+  HloInstructionProto ToProto() const override;
+
   // Retrieves the operand side metadata of a kDomain instruction.
   const DomainMetadata& operand_side_metadata() const {
     return *operand_side_metadata_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 96db96bdb9..dd4ee780f0 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1163,49 +1163,80 @@ ENTRY Sort {
   // clang-format on
 }
 
-class HloParserTest : public ::testing::Test,
-                      public ::testing::WithParamInterface<TestData> {
+// The test class for those tests defined above which round-trip through the
+// parser and ToString is templatized on two bool parameters:
+//
+//  short_form : used for the "short" test cases which use the ShortParsable
+//    output form.
+//  proto_round_trip : whether the module should also be round-tripped through
+//    HloProto form. This provides much better coverage for the proto
+//    serialization/deserialization.
+//
+// The proto_round_trip=true case also technically covers the Parser->ToString
+// roundtrip as well, but separating out the Parser->ToString roundtrip as its
+// own test provides better isolation and could conceivably catch weirdo bugs
+// which are hidden by interaction between the textual and proto roundtripping.
+template <bool short_form, bool proto_round_trip>
+class HloParameterizedParserTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<TestData> {
  protected:
-  static void ExpectHasSubstr(string_view s, string_view expected) {
-    EXPECT_TRUE(absl::StrContains(s, expected))
-        << "'" << s << "' does not contain '" << expected << "'";
-  }
-
   // Expects "ToString(ParseHloString(string)) == string", that is, parses the
   // string, asserts that it succeeded, stringifies the parsed module, and
   // checks that the it equals the original string.
   void ExpectEqual() {
     const string& original = GetParam().module_string;
-    auto result = ParseHloString(original);
-    TF_ASSERT_OK(result.status());
-    EXPECT_EQ(original, result.ValueOrDie()->ToString(
-                            HloPrintOptions().set_print_large_constants(true)));
+    TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                            ParseHloString(original));
+    if (proto_round_trip) {
+      TF_ASSERT_OK_AND_ASSIGN(module, HloModule::CreateFromProto(
+                                          module->ToProto(), module->config()));
+    }
+    if (short_form) {
+      EXPECT_EQ(original, module->ToString(HloPrintOptions::ShortParsable()));
+    } else {
+      EXPECT_EQ(
+          original,
+          module->ToString(HloPrintOptions().set_print_large_constants(true)));
+    }
   }
 };
 
-class HloParserShortTest : public HloParserTest {
- protected:
-  void ExpectEqualShort() {
-    const string& original = GetParam().module_string;
-    auto result = ParseHloString(original);
-    TF_ASSERT_OK(result.status());
-    EXPECT_EQ(original,
-              result.ValueOrDie()->ToString(HloPrintOptions::ShortParsable()));
-  }
-};
+// These using shenanigans are required because the TEST_P macro doesn't like
+// template instantiations which contain commas.
+using HloParserTestLong = HloParameterizedParserTest<false, false>;
+using HloParserTestLongProto = HloParameterizedParserTest<false, true>;
+using HloParserTestShort = HloParameterizedParserTest<true, false>;
+using HloParserTestShortProto = HloParameterizedParserTest<true, true>;
 
-TEST_P(HloParserTest, Run) { ExpectEqual(); }
+TEST_P(HloParserTestLong, Run) { ExpectEqual(); }
+TEST_P(HloParserTestLongProto, Run) { ExpectEqual(); }
+TEST_P(HloParserTestShort, Run) { ExpectEqual(); }
+TEST_P(HloParserTestShortProto, Run) { ExpectEqual(); }
 
-TEST_P(HloParserShortTest, Run) { ExpectEqualShort(); }
-
-INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTest,
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestLong,
                         ::testing::ValuesIn(CreateTestCases()),
                         TestDataToString);
-
-INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserShortTest,
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation,
+                        HloParserTestLongProto,
+                        ::testing::ValuesIn(CreateTestCases()),
+                        TestDataToString);
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation, HloParserTestShort,
+                        ::testing::ValuesIn(CreateShortTestCases()),
+                        TestDataToString);
+INSTANTIATE_TEST_CASE_P(HloParserTestSuccessInstantiation,
+                        HloParserTestShortProto,
                         ::testing::ValuesIn(CreateShortTestCases()),
                         TestDataToString);
 
+class HloParserTest : public ::testing::Test {
+ protected:
+  static void ExpectHasSubstr(string_view s, string_view expected) {
+    EXPECT_TRUE(absl::StrContains(s, expected))
+        << "'" << s << "' does not contain '" << expected << "'";
+  }
+};
+
 TEST_F(HloParserTest, Empty) {
   const string original = "";
   auto result = ParseHloString(original);
-- 
GitLab


From 8d12c635cc48e896da0bcac1cd568bd6381ca64e Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 2 Oct 2018 13:18:27 -0700
Subject: [PATCH 0223/1085] Support shape_invariants in while_v2. Note that
 this arg is temporary and may be replaced by automatic shape inference in TF
 2.0 (or before). Add a output_shapes attr to While op to allow output shapes
 to be different from the incoming loop_vars.

PiperOrigin-RevId: 215446737
---
 .../function_functional_while.pbtxt           |  7 +++
 tensorflow/core/ops/functional_ops.cc         | 23 +++++++-
 .../kernel_tests/control_flow_ops_py_test.py  | 11 ++--
 tensorflow/python/ops/control_flow_ops.py     |  3 +-
 tensorflow/python/ops/while_v2.py             | 59 ++++++++++++++++---
 5 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt b/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
index c94ee2f227..0ec95dd684 100644
--- a/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
+++ b/tensorflow/core/grappler/costs/graph_properties_testdata/function_functional_while.pbtxt
@@ -88,6 +88,13 @@ library {
           }
         }
       }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+          }
+        }
+      }
     }
     ret {
       key: "while"
diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index bda4a75c5d..fed3fa22ed 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -150,10 +150,29 @@ REGISTER_OP("While")
     .Attr("T: list(type) >= 0")
     .Attr("cond: func")
     .Attr("body: func")
+    .Attr("output_shapes: list(shape) = []")
     .SetIsStateful()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
-      for (int i = 0; i < c->num_outputs(); ++i) {
-        c->set_output(i, c->input(i));
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      // If `output_shapes` attr is set use that as the shapes of the outputs
+      // else use the input shapes.
+      if (!output_shapes.empty()) {
+        if (output_shapes.size() != c->num_outputs()) {
+          return errors::InvalidArgument(
+              "`output_shapes` must be the same length as num outputs (",
+              output_shapes.size(), " vs. ", c->num_outputs());
+        }
+        for (size_t i = 0; i < output_shapes.size(); ++i) {
+          shape_inference::ShapeHandle output_shape_handle;
+          TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+              output_shapes[i], &output_shape_handle));
+          c->set_output(static_cast<int>(i), output_shape_handle);
+        }
+      } else {
+        for (int i = 0; i < c->num_outputs(); ++i) {
+          c->set_output(i, c->input(i));
+        }
       }
       return Status::OK();
     });
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index ae61be614e..655fece5ff 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1040,7 +1040,6 @@ class ControlFlowTest(test.TestCase):
       result = r[3].eval()
     self.assertAllEqual(42, result)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhile_5(self):
     with self.cached_session():
 
@@ -1116,7 +1115,6 @@ class ControlFlowTest(test.TestCase):
     self._testWhile_Gpu_1(use_gpu=False)
     self._testWhile_Gpu_1(use_gpu=True)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShape(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1152,7 +1150,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual([10000], r.eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileShapeInference(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1366,6 +1363,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda x: x < 10, body, [x0])
       self.assertEqual(10, sess.run(r, {b: True}))
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCondWithControl(self):
     # Ensure that no control edges by an outer control dependency context are
     # added to nodes inside cond/while contexts.
@@ -1477,6 +1475,7 @@ class ControlFlowTest(test.TestCase):
     self._testCondWhile_3(use_gpu=False)
     self._testCondWhile_3(use_gpu=True)
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_1(self):
 
     with self.cached_session():
@@ -1493,6 +1492,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [i])
       self.assertAllEqual(10, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_2(self):
 
     with self.cached_session():
@@ -1502,6 +1502,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n])
       self.assertAllEqual(10, r.eval())
 
+  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_3(self):
 
     with self.cached_session():
@@ -1696,7 +1697,7 @@ class ControlFlowTest(test.TestCase):
       for i in xrange(10):
         self.assertEqual([i], q.dequeue().eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
+  @test_util.disable_control_flow_v2("b/117119329 (stack)")
   def testWhileStack_1(self):
     with self.cached_session():
       s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo")
@@ -1781,7 +1782,6 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, r.eval())
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Shape(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[None])
@@ -2291,7 +2291,6 @@ class ControlFlowTest(test.TestCase):
       r = sess.run(r, feed_dict={v: 2.0})
       self.assertAllClose(1024.0, r)
 
-  @test_util.disable_control_flow_v2("b/116283162 (shape_invariants)")
   def testWhileGrad_Concat(self):
     with self.cached_session() as sess:
       x = variable_scope.get_variable("x", initializer=[[1., 2.]])
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 8ad71fe00c..f779c3d273 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -3225,7 +3225,8 @@ def while_loop(cond,
       raise ValueError("The while_v2 module is not set. Did you forget to "
                        "import tensorflow.python.ops."
                        "while_v2?")
-    return _while_v2.while_loop(cond, body, loop_vars, name)
+    return _while_v2.while_loop(
+        cond, body, loop_vars, shape_invariants=shape_invariants, name=name)
 
   with ops.name_scope(name, "while", loop_vars):
     if not loop_vars:
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 6791e1cd61..8e88a84d60 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -32,6 +32,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import function_def_to_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl as cond_v2
 from tensorflow.python.ops import control_flow_ops
@@ -52,8 +53,17 @@ control_flow_ops._while_v2 = sys.modules[__name__]
 # handled in the CapturingGraph itself.
 
 
-def while_loop(cond, body, loop_vars, name=None):
+def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
   """Like tf.while_loop, except emits a single While op."""
+  flattened_loop_vars = nest.flatten(loop_vars)
+  if shape_invariants is not None:
+    nest.assert_same_structure(loop_vars, shape_invariants)
+    flattened_shapes = nest.flatten(shape_invariants)
+  else:
+    flattened_shapes = [t.shape for t in flattened_loop_vars]
+
+  del shape_invariants
+
   if not name:
     name = "while"
 
@@ -62,25 +72,33 @@ def while_loop(cond, body, loop_vars, name=None):
       cond_name = _get_unique_name(("%scond" % scope).replace("/", "_"))
       body_name = _get_unique_name(("%sbody" % scope).replace("/", "_"))
 
-    flattened_loop_vars = nest.flatten(loop_vars)
     num_outputs = len(flattened_loop_vars)
 
     # Add loop counter needed for computing gradients.
     flattened_loop_vars = [constant_op.constant(0., name="loop_counter")
                           ] + flattened_loop_vars
 
+    flattened_shapes = [tensor_shape.scalar()] + flattened_shapes
+
     # Build a `cond` wrapper that can handle the extra counter loop_var.
     def wrapped_cond(unused_loop_counter, *loop_vars):
       return cond(*loop_vars)
 
-    cond_graph = function.func_graph_from_py_func(cond_name, wrapped_cond,
-                                                  flattened_loop_vars, {})
+    signature = [
+        tensor_spec.TensorSpec(shape, t.dtype)
+        for shape, t in zip(flattened_shapes, flattened_loop_vars)
+    ]
+    cond_graph = function.func_graph_from_py_func(
+        cond_name, wrapped_cond, flattened_loop_vars, {}, signature=signature)
 
     # Add external_captures of cond to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
     # the value of that tensor in each iteration is the same as it was at the
     # beginning of the loop execution.
     flattened_loop_vars = flattened_loop_vars + cond_graph.external_captures
+    flattened_shapes = flattened_shapes + [
+        t.shape for t in cond_graph.external_captures
+    ]
 
     def wrapped_body(loop_counter, *args):
       """Loop body augmented with counter update.
@@ -105,8 +123,12 @@ def while_loop(cond, body, loop_vars, name=None):
       # is_constant=True for inputs that are directly passed to outputs.
       return [loop_counter + 1] + list(outputs) + list(args[num_outputs:])
 
-    body_graph = function.func_graph_from_py_func(body_name, wrapped_body,
-                                                  flattened_loop_vars, {})
+    signature = [
+        tensor_spec.TensorSpec(shape, t.dtype)
+        for shape, t in zip(flattened_shapes, flattened_loop_vars)
+    ]
+    body_graph = function.func_graph_from_py_func(
+        body_name, wrapped_body, flattened_loop_vars, {}, signature=signature)
     # Add external captures of body to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
     # the value of that tensor in each iteration is the same as it was at the
@@ -149,10 +171,17 @@ def while_loop(cond, body, loop_vars, name=None):
         # Add this modified tensor list to the list of outputs.
         body_graph.outputs.append(appended_tensor_list)
 
+    # Make sure that the shapes of the loop outputs are compatible with the
+    # shape invariants, or the shapes of the loop vars if the invariants are not
+    # specified.
+    _check_shapes_compat(body_graph.outputs[1:1 + num_outputs],
+                         flattened_shapes[1:1 + num_outputs],
+                         flattened_loop_vars[1:1 + num_outputs])
     outputs = gen_functional_ops._while(
         flattened_loop_vars,
         cond_v2._create_new_tf_function(cond_graph),
         cond_v2._create_new_tf_function(body_graph),
+        output_shapes=[t.shape for t in body_graph.outputs],
         name=scope)
 
     _copy_handle_data(body_graph.outputs, outputs)
@@ -216,6 +245,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       loop_vars,
       cond_v2._create_new_tf_function(cond_grad_graph),
       cond_v2._create_new_tf_function(body_grad_graph),
+      output_shapes=[t.shape for t in body_grad_graph.outputs],
       name=_get_unique_name("%s_grad" % op.name))
 
   _copy_handle_data(body_grad_graph.outputs, outputs)
@@ -236,8 +266,10 @@ def _get_body_graph(while_op):
   Returns:
     `FuncGraph` for the while body.
   """
-  extra_inputs = list(while_op.inputs)
-  input_shapes = [t.shape for t in extra_inputs]
+  # TODO(srbs): Handle TensorShapeProto in function_def_to_graph.input_shapes.
+  input_shapes = [
+      tensor_shape.TensorShape(s) for s in while_op.get_attr("output_shapes")
+  ]
   func_name = while_op.get_attr("body").name
   fdef = while_op.graph._get_function(func_name).definition
   func_graph = function_def_to_graph.function_def_to_graph(fdef, input_shapes)
@@ -535,6 +567,17 @@ class _WhileBodyGradFuncGraph(function.FuncGraph):
     return captured_tensor
 
 
+def _check_shapes_compat(output_tensors, shape_invariants, input_tensors):
+  for (t, shape, input_t) in zip(output_tensors, shape_invariants,
+                                 input_tensors):
+    if not control_flow_ops._ShapeLessThanOrEqual(t.shape, shape):
+      raise ValueError(
+          "Input tensor '%s' enters the loop with shape %s, but has "
+          "shape %s after one iteration. To allow the shape to vary across "
+          "iterations, use the `shape_invariants` argument of tf.while_loop to "
+          "specify a less-specific shape." % (input_t.name, shape, t.shape))
+
+
 def _copy_handle_data(src_tensors, tgt_tensors):
   for src_t, tgt_t in zip(src_tensors, tgt_tensors):
     function._copy_handle_data(src_t, tgt_t)
-- 
GitLab


From a2599d1f89e3d6fe0a3f0436b5053fcbf4ae0265 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 13:28:51 -0700
Subject: [PATCH 0224/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 215448397
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 33 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  8 +++++
 2 files changed, 41 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 43c14d83b5..e46cbc863d 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -76797,6 +76797,39 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "While"
+  input_arg {
+    name: "input"
+    type_list_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "cond"
+    type: "func"
+  }
+  attr {
+    name: "body"
+    type: "func"
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "WholeFileReader"
   output_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index abee803889..0e9f939ab4 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -36935,6 +36935,14 @@ op {
     name: "body"
     type: "func"
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
   is_stateful: true
 }
 op {
-- 
GitLab


From a12b8c4afdca3ac2945d62b3b83ca2599ab360f9 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Sun, 16 Sep 2018 18:39:50 -0400
Subject: [PATCH 0225/1085] [xla] Improve validation of Broadcast shape

If one misreads the semantics of this instruction, it's easy to cause
an out of bounds access into the dimensions here. Add an extra check
to return a proper error to the user rather than crashing in that
case.

Ref #22130
---
 tensorflow/compiler/xla/service/hlo_verifier.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 50f39cbcb5..0f6ecd42f6 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -313,8 +313,9 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
        operand_dimension < ShapeUtil::Rank(operand_shape);
        ++operand_dimension) {
     int64 output_dimension = broadcast->dimensions()[operand_dimension];
-    TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) ==
-                 operand_shape.dimensions(operand_dimension))
+    TF_RET_CHECK((output_dimension < ShapeUtil::Rank(broadcast->shape())) &&
+                 (broadcast->shape().dimensions(output_dimension) ==
+                 operand_shape.dimensions(operand_dimension)))
         << broadcast->ToString() << " operand shape " << operand_shape;
   }
   return Status::OK();
-- 
GitLab


From e45c90f0e4d17ac22048a73f1e81bd9c7a7a5145 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:03:40 -0700
Subject: [PATCH 0226/1085] Upgrade cloud tpu profiler to 1.12.0.

PiperOrigin-RevId: 215454323
---
 tensorflow/contrib/tpu/profiler/pip_package/setup.py | 2 +-
 tensorflow/contrib/tpu/profiler/version.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
index 2415c46718..f27ae38e04 100644
--- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py
+++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 from setuptools import setup
 
-_VERSION = '1.11.0'
+_VERSION = '1.12.0'
 
 CONSOLE_SCRIPTS = [
     'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
diff --git a/tensorflow/contrib/tpu/profiler/version.h b/tensorflow/contrib/tpu/profiler/version.h
index 90d34b5ef1..4b6d1b2b07 100644
--- a/tensorflow/contrib/tpu/profiler/version.h
+++ b/tensorflow/contrib/tpu/profiler/version.h
@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 #define TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
 
-#define TPU_PROFILER_VERSION "1.11.0"
+#define TPU_PROFILER_VERSION "1.12.0"
 
 #endif  // TENSORFLOW_CONTRIB_TPU_PROFILER_VERSION_H_
-- 
GitLab


From c921e45bccac86ce0becc71cedc3da2c702d5c38 Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Tue, 2 Oct 2018 14:30:22 -0700
Subject: [PATCH 0227/1085] Add support for multiple input/output numpy arrays
 when using Keras APIs.

PiperOrigin-RevId: 215459075
---
 tensorflow/contrib/distribute/python/BUILD    |   1 +
 .../contrib/distribute/python/keras_test.py   |  88 ++++++++++--
 .../engine/distributed_training_utils.py      | 134 +++++++++++++++---
 tensorflow/python/keras/engine/training.py    |  48 ++++---
 .../keras/engine/training_distributed.py      |  30 ++--
 tensorflow/python/keras/models.py             |   5 +
 6 files changed, 237 insertions(+), 69 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index cfb9d42a6f..defa82f98a 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -728,6 +728,7 @@ cuda_py_test(
     additional_deps = [
         ":keras_test_lib",
     ],
+    shard_count = 16,
     tags = [
         "multi_and_single_gpu",
         "no_pip",
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 3aab2c521f..993cb2bac3 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -189,6 +189,14 @@ def get_dataset(distribution):
   return dataset
 
 
+def get_predict_dataset(distribution):
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+  dataset = dataset.repeat(100)
+  dataset = batch_wrapper(dataset, 10, distribution)
+  return dataset
+
+
 strategies = [combinations.default_strategy,
               combinations.one_device_strategy,
               combinations.mirrored_strategy_with_gpu_and_cpu,
@@ -387,16 +395,26 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
           distributed_training_utils.validate_distributed_dataset_inputs(
               strategy, x, y)
 
-  def test_calling_model_with_numpy_arrays(self):
+  # TODO(anjalisridhar): Move this test along with other numpy related tests to
+  # its own class.
+  @combinations.generate(strategy_combinations())
+  def test_creating_var_with_numpy_arrays(self, distribution):
+    with self.cached_session():
+      x = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      var_x = distributed_training_utils.get_var_for_numpy(distribution, x)
+      val = self.evaluate(var_x.value())
+      # Verify that the numpy value is copied to the variable.
+      self.assertAllEqual(x, val)
+
+  @combinations.generate(strategy_combinations())
+  def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
       model = get_model()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.001)
       loss = 'mse'
-      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+      metrics = ['mae']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
 
       inputs = np.zeros((64, 3), dtype=np.float32)
       targets = np.zeros((64, 4), dtype=np.float32)
@@ -419,6 +437,48 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       # with batch_size
       model.predict(inputs, batch_size=8)
 
+  @combinations.generate(strategy_combinations())
+  def test_calling_model_with_nested_numpy_arrays(self, distribution):
+    with self.cached_session():
+      a = keras.layers.Input(shape=(3,), name='input_a')
+      b = keras.layers.Input(shape=(3,), name='input_b')
+
+      dense = keras.layers.Dense(4, name='dense')
+      c = dense(a)
+      d = dense(b)
+      e = keras.layers.Dropout(0.5, name='dropout')(c)
+
+      model = keras.models.Model([a, b], [d, e])
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      input_b_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      inputs = [input_a_np, input_b_np]
+
+      output_d_np = np.asarray(np.random.random((64, 4)), dtype=np.float32)
+      output_e_np = np.asarray(np.random.random((64, 4)), dtype=np.float32)
+      targets = [output_d_np, output_e_np]
+
+      # Call fit with validation data
+      model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0)
+
+      # TODO(anjalisridhar): We need tests for when the batch size and steps are
+      # smaller and results in a 0 batch_size and steps value.
+      model.evaluate(inputs, targets)
+      # with steps
+      model.evaluate(inputs, targets, steps=2)
+      # with batch_size
+      model.evaluate(inputs, targets, batch_size=8)
+
+      model.predict(inputs)
+      # with steps
+      model.predict(inputs, steps=2)
+      # with batch_size
+      model.predict(inputs, batch_size=8)
+
   @combinations.generate(strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
@@ -436,7 +496,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
                 validation_data=dataset, validation_steps=2)
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                 validation_data=dataset, validation_steps=2)
-      model.predict(dataset, steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
   # as clone_model's input_tensors argument only seems to accept list and not
@@ -496,10 +556,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
       model.evaluate(dataset, steps=2, verbose=1)
-      model.predict(dataset, steps=2)
-      # Test with validation data
-      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                validation_data=dataset, validation_steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   @combinations.generate(strategy_and_optimizer_combinations())
   def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
@@ -513,7 +570,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
       model.evaluate(dataset, steps=2, verbose=1)
-      model.predict(dataset, steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
 
   def test_unsupported_features(self):
     with self.cached_session():
@@ -726,8 +783,12 @@ class NormalizationLayerWithDistributionStrategyTest(
       dataset = dataset.repeat(100)
       dataset = batch_wrapper(dataset, 32, distribution)
 
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
+      predict_dataset = predict_dataset.repeat(100)
+      predict_dataset = batch_wrapper(predict_dataset, 32, distribution)
+
       model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
-      out = model.predict(dataset, steps=2)
+      out = model.predict(predict_dataset, steps=2)
       out -= keras.backend.eval(norm.beta)
       out /= keras.backend.eval(norm.gamma)
       np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
@@ -811,8 +872,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase,
         predict_batch_size = 4
         if with_distribution:
           predict_batch_size //= with_distribution.num_towers
-        predict_dataset = dataset_ops.Dataset.from_tensor_slices((x_predict,
-                                                                  x_predict))
+        predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict)
         predict_dataset = batch_wrapper(predict_dataset,
                                         predict_batch_size, distribution)
         predict_result = model.predict(predict_dataset, steps=1)
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index 39341a931b..050602868a 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -17,12 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.client import session as session_module
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import distribute as distribute_lib
 from tensorflow.python.util import nest
@@ -304,23 +310,19 @@ def validate_inputs(x, y, distribution_strategy):
       compiled.
 
   Raises:
-    ValueError: if input is not a Dataset or a numpy array.
+    ValueError: if input is not a Dataset or a numpy array(when we use
+      MirroredStrategy).
   """
-  if isinstance(x, list) or isinstance(y, list):
-    raise ValueError('DistributionStrategy does not support lists of numpy'
-                     'arrays. You must pass a Dataset object or a numpy array '
-                     'as input.')
-
   if isinstance(x, dict) or isinstance(y, dict):
-    raise ValueError('DistributionStrategy does not support inputs of type '
-                     'dict. You must pass a Dataset object or a numpy array as '
-                     'input.')
+    raise ValueError('`DistributionStrategy` does not support inputs of type '
+                     'dict. You must pass a `tf.data.Dataset` object or a '
+                     'numpy array as input.')
 
-  if isinstance(x, iterator_ops.Iterator) or \
-      isinstance(y, iterator_ops.Iterator):
-    raise ValueError('DistributionStrategy does not support inputs of type '
-                     'Iterator. You must pass a Dataset object or a numpy '
-                     'array as input.')
+  if (isinstance(x, iterator_ops.Iterator) or
+      isinstance(y, iterator_ops.Iterator)):
+    raise ValueError('`DistributionStrategy` does not support inputs of type '
+                     'Iterator. You must pass a `tf.data.Dataset` object or a '
+                     'numpy array as input.')
 
   if distribution_strategy.__class__.__name__ == 'TPUStrategy':
     for i in [x, y]:
@@ -334,14 +336,14 @@ def validate_inputs(x, y, distribution_strategy):
               'Found unknown shape {} in input {}.'.format(s, i))
 
 
-def get_input_batch_params(first_x_value, batch_size, current_strategy):
+def get_input_batch_params(first_x_value, batch_size, distribution_strategy):
   """Calculate the number of batches and steps/steps_per_epoch.
 
   Args:
     first_x_value: This is the first input numpy array that is passed in as the
       model input.
     batch_size: The specified batch_size or the default batch_size of 32.
-    current_strategy: The current DistributionStrategy used to compile the
+    distribution_strategy: The current DistributionStrategy used to compile the
       model.
 
   Returns:
@@ -359,14 +361,14 @@ def get_input_batch_params(first_x_value, batch_size, current_strategy):
   # TODO(anjalisridhar): TPU currently supports using the num_towers property.
   # We might want to look into implementing worker_devices. In multi worker
   # strategy, perhaps num_towers works better?
-  steps = num_batches // current_strategy.num_towers
+  steps = num_batches // distribution_strategy.num_towers
   if not steps:
     # TODO(anjalisridhar): Number of towers in the error message may not convey
     # what we want to the user. Is there another terminology that we can use
     # that is consistent across different strategies.
     raise ValueError('The number of batches %d is smaller than the number '
                      'of towers %d used for DistributionStrategy. ' %
-                     num_batches, current_strategy.num_towers)
+                     (num_batches, distribution_strategy.num_towers))
   return steps
 
 
@@ -376,3 +378,99 @@ def get_batch_dimension(iterator):
   # all.
   dims = shapes[0].dims
   return dims[0] if dims else None
+
+
+def get_cpu_device(distribution_strategy):
+  """Returns the CPU device of the TPU host or the default CPU device string.
+
+  Args:
+    distribution_strategy: The DistributionStrategy used to compile the model.
+
+  Returns:
+    A device string which is the TPU host's CPU device in case of
+    TPUDistributionStrategy or the default CPU device string in all other
+    cases.
+
+  Raises:
+    NotImplementedError: We currently don't support copying numpy data to
+    multiple hosts in the case of Cloud TPU pods.
+  """
+  if distribution_strategy.__class__.__name__ == 'TPUStrategy':
+    if distribution_strategy.num_hosts > 1:
+      raise NotImplementedError('TPUDistributionStrategy does not '
+                                'support numpy inputs when running on Cloud'
+                                'TPU pods.')
+    return distribution_strategy.get_host_cpu_device(0)
+  else:
+    # For all strategies except TPUDistributionStrategy
+    # TODO(anjalisridhar): We may need to modify this when we add support for
+    # multi-worker strategy.
+    return '/CPU:0'
+
+
+def get_var_for_numpy(distribution_strategy, x):
+  if isinstance(x, list):
+    var_x = tuple([_get_var_for_numpy(distribution_strategy, single_input)
+                   for single_input in x])
+  else:
+    var_x = _get_var_for_numpy(distribution_strategy, x)
+  return var_x
+
+
+def _get_var_for_numpy(distribution_strategy, input_array):
+  """Creates a variable and assigns the value of the numpy array to it.
+
+  Args:
+    distribution_strategy: The DistributionStrategy used to compile the model.
+    input_array: The input numpy array whose value will be assigned to the
+      variable we create.
+
+  Returns:
+    The variable to which we will copy the value of the input numpy array.
+
+  """
+  with ops.device(get_cpu_device(distribution_strategy)):
+    # Create and initialize a variable on the CPU device. This is the CPU
+    # device of the host in the case of TPUDistributionStrategy.
+    input_var = variables.VariableV1(array_ops.zeros(input_array.shape,
+                                                     input_array.dtype),
+                                     trainable=False, use_resource=True)
+  K.get_session().run(input_var.initializer)
+
+  # Create a placeholder for the numpy array input slices. We copy the value
+  # of the input numpy array to the variable in slices of size 64 MB to avoid
+  # running into memory issues or RPC message limits.
+  start_placeholder = array_ops.placeholder(dtypes.int64, ())
+  end_placeholder = array_ops.placeholder(dtypes.int64, ())
+  slice_placeholder = array_ops.placeholder(input_var.dtype)
+  assign_slice_op = input_var[start_placeholder:end_placeholder].assign(
+      slice_placeholder)
+
+  # If each batch element is > 64 MB, then we copy each batch element
+  # individually. Otherwise, the slices will be < 128 MB. There might be padding
+  # which might mean that the slices are 128 MB even if the size of the
+  # tensor allocated is less than 128 MB.
+  # This formula gives slices with size:
+  # ceil(64 MB / byte size per batch element) bytes.
+  # Using ceil() guarantees we get a number >= 1.
+
+  # Calculate the size of each batch element.
+  byte_size_per_batch_element = np.prod(input_array.shape[1:]) * \
+                                input_var.dtype.size
+
+  # Calculate number of elements we want to copy per slice.
+  batch_size_per_slice = np.ceil((64 << 20) / byte_size_per_batch_element)
+
+  # Copy slices of the above size starting at 0, except the last slice will be
+  # smaller.
+  start = 0
+  limit = input_array.shape[0]
+  while start < limit:
+    end = min(start + batch_size_per_slice, limit)
+    K.get_session().run(assign_slice_op, feed_dict={
+        start_placeholder: start,
+        end_placeholder: end,
+        slice_placeholder: input_array[start:end]})
+    start = end
+
+  return input_var
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 5091cac836..c842b8192e 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -20,11 +20,9 @@ from __future__ import print_function
 
 import weakref
 import numpy as np
-import six
 
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.eager import context
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -814,19 +812,21 @@ class Model(Network):
     first_x_value = nest.flatten(x)[0]
     if isinstance(first_x_value, np.ndarray):
       x_shape = first_x_value.shape
-      x_dtype = first_x_value.dtype
       if batch_size is None:
         batch_size = x_shape[0] // steps
       if y is not None:
-        first_y_value = nest.flatten(y)[0]
-        x = Dataset.from_generator(lambda x=x, y=y: six.moves.zip(x, y),
-                                   output_types=(x_dtype, first_y_value.dtype),
-                                   output_shapes=(x_shape[1:],
-                                                  first_y_value.shape[1:]))
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        var_y = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, y)
+
+        x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
         # TODO(anjalisridhar): What should the buffer size be?
         x = x.shuffle(10000)
         x = x.repeat()
-        x = x.batch(batch_size)
+        # We need to use the drop_remainder argument to allow for a static
+        # input shape which is required for TPUs.
+        x = x.batch(batch_size, drop_remainder=True)
         y = None
       else:
         # This case is for the predict call where the dataset only contains
@@ -834,11 +834,13 @@ class Model(Network):
         # TODO(anjalisridhar): Raise an error if we are not able to process
         # all the predict samples. This can happen if the number of batches is
         # not evenly divisible by the number of worker devices.
-        x = Dataset.from_generator(lambda x=x: x,
-                                   output_types=x_dtype,
-                                   output_shapes=x_shape[1:])
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        x = dataset_ops.Dataset.from_tensor_slices(var_x)
         x = x.repeat()
-        x = x.batch(batch_size)
+        # We need to use the drop_remainder argument to allow for a static
+        # input shape which is required for TPUs.
+        x = x.batch(batch_size, drop_remainder=True)
 
     # TODO(anjalisridhar): Can we use the iterator and getnext op cache?
     # We require users to pass Datasets since we distribute the dataset across
@@ -978,16 +980,18 @@ class Model(Network):
                            'Make sure that your dataset can generate '
                            'required number of samples.')
 
-      if (not isinstance(next_element, (list, tuple)) or
-          len(next_element) not in [2, 3]):
-        raise ValueError(
-            'Please provide model inputs as a list or tuple of 2  or 3'
-            'elements: (input, target) or (input, target, sample_weights)'
-            'Received %s' % next_element)
-      if len(next_element) == 2:
-        x, y = next_element
+      if isinstance(next_element, (list, tuple)):
+        if len(next_element) not in [2, 3]:
+          raise ValueError(
+              'Please provide model inputs as a list or tuple of 2  or 3'
+              'elements: (input, target) or (input, target, sample_weights)'
+              'Received %s' % next_element)
+        if len(next_element) == 2:
+          x, y = next_element
+        else:
+          x, y, sample_weight = next_element
       else:
-        x, y, sample_weight = next_element
+        x = next_element
     x, y, sample_weights = self._standardize_weights(x, y, sample_weight,
                                                      class_weight, batch_size)
     return x, y, sample_weights
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index a6470458d2..04e8d079c0 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.util import nest
 
 
 # TODO(priyag, sourabhbajaj): Refactor this file to address code duplication.
@@ -296,15 +297,16 @@ def _experimental_fit_loop(
     initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
 
   if steps_per_epoch is None:
-    raise ValueError('steps_per_epoch should be specified in the fit call.')
-  steps_per_run_var = K.variable(
+    raise ValueError('`steps_per_epoch` should be specified when calling '
+                     '`fit` on the model.')
+  steps_per_run = K.variable(
       value=min(steps_per_epoch, current_strategy.steps_per_run),
       dtype='int32',
-      name='steps_per_run_var')
+      name='steps_per_run')
 
   with current_strategy.scope():
     ctx = current_strategy.run_steps_on_dataset(
-        step_fn, iterator, iterations=steps_per_run_var,
+        step_fn, iterator, iterations=steps_per_run,
         initial_loop_values=initial_loop_values)
 
   train_op = ctx.run_op
@@ -344,7 +346,7 @@ def _experimental_fit_loop(
       batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count}
       callbacks.on_batch_begin(step_index, batch_logs)
       if prev_step_count is None or step_count != prev_step_count:
-        steps_per_run_var.load(step_count, K.get_session())
+        steps_per_run.load(step_count, K.get_session())
         prev_step_count = step_count
       try:
         _, outputs = K.get_session().run([train_op, output_tensors])
@@ -720,13 +722,9 @@ def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
             model.predict_function.updates_op,
             model.predict_function.session_kwargs)
 
-  def step_fn(ctx, inputs, targets):
+  def step_fn(ctx, *inputs):
     """Clones the model and calls make_predict_function."""
 
-    # TODO(anjalisridhar): Support predict input correctly as it will not
-    # contain targets, only inputs.
-    del targets
-
     # TODO(priyag, sourabhbajaj): The model gets cloned every time
     # fit/test/predict is called. We should look into caching this keyed on
     # input shapes.
@@ -824,9 +822,10 @@ def _clone_and_build_model(model, inputs=None, targets=None):
 
   # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
   # single tensor should be OK but it throws an error in that case.
-  if (targets is not None and not isinstance(targets, list) and
-      not isinstance(targets, dict)):
+  if targets is not None and not isinstance(targets, (list, dict, tuple)):
     targets = [targets]
+  if isinstance(targets, tuple):
+    targets = nest.flatten(targets)
   cloned_model.compile(
       optimizer,
       model.loss,
@@ -891,11 +890,12 @@ def _get_input_from_iterator(iterator, model):
   """Get elements from the iterator and verify the input shape and type."""
   next_element = iterator.get_next()
 
-  if isinstance(next_element, tuple):
-    x, y = next_element
-  else:
+  if len(nest.flatten(next_element)) == len(model.inputs):
     x = next_element
     y = None
+  else:
+    x, y = next_element
+
   # Validate that all the elements in x and y are of the same type and shape.
   # We can then pass the first element of x and y to `_standardize_weights`
   # below and be confident of the output.
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index b04b4df257..2883c9ad74 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -96,6 +96,8 @@ def _clone_functional_model(model, input_tensors=None):
   else:
     # Make sure that all input tensors come from a Keras layer.
     # If tensor comes from an input layer: cache the input layer.
+    if isinstance(input_tensors, tuple):
+      input_tensors = list(input_tensors)
     input_tensors = generic_utils.to_list(input_tensors)
     input_tensors_ = []
     for i, x in enumerate(input_tensors):
@@ -212,6 +214,9 @@ def _clone_sequential_model(model, input_tensors=None):
       raise ValueError('To clone a `Sequential` model, we expect '
                        ' at most one tensor '
                        'as part of `input_tensors`.')
+
+    if isinstance(input_tensors, tuple):
+      input_tensors = list(input_tensors)
     x = generic_utils.to_list(input_tensors)[0]
     if K.is_keras_tensor(x):
       origin_layer = x._keras_history[0]
-- 
GitLab


From 05812d761031b108b43560c90867b96dc4f030eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:35:49 -0700
Subject: [PATCH 0228/1085] Fixes for few issues in
 HloModule::CreateFromProto()

PiperOrigin-RevId: 215460064
---
 tensorflow/compiler/xla/literal.cc            |  2 ++
 .../compiler/xla/service/hlo_computation.cc   | 22 +++++++++++++++++++
 .../compiler/xla/service/hlo_instruction.cc   | 20 +++++++++++++----
 .../compiler/xla/service/hlo_sharding.cc      |  8 +++++--
 tensorflow/compiler/xla/shape_util.cc         |  3 ++-
 5 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index d1dad0d45f..deeb140b8f 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -287,6 +287,8 @@ Status MutableLiteralBase::CopyElementFrom(const LiteralSlice& src_literal,
     return InvalidArgument("LiteralProto has no layout");
   }
 
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape()));
+
   Literal literal(proto.shape());
 
   TF_RETURN_IF_ERROR(literal.root_piece_->ForEachMutableSubpieceWithStatus(
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 6ef67ab0a8..c2041c4667 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -535,6 +535,28 @@ HloComputation::CreateFromProto(
               return to_proto_id[a.get()] < to_proto_id[b.get()];
             });
 
+  TF_RETURN_IF_ERROR([&]() -> Status {
+    std::vector<bool> parameters_seen(parameter_count);
+    int parameters_seen_count = 0;
+    for (auto& instruction : instructions) {
+      if (instruction->opcode() == HloOpcode::kParameter) {
+        int64 param_no = instruction->parameter_number();
+        TF_RET_CHECK(param_no >= 0 && param_no < parameter_count)
+            << "Invalid parameter number.  Expected [0, " << parameter_count
+            << "), got " << param_no;
+        TF_RET_CHECK(!parameters_seen[param_no])
+            << "Parameter number " << param_no
+            << " already allocated in this computation";
+        parameters_seen[param_no] = true;
+        parameters_seen_count++;
+      }
+    }
+    TF_RET_CHECK(parameters_seen_count == parameter_count)
+        << "Not all parameters in range [0, " << parameter_count
+        << ") were referenced";
+    return Status::OK();
+  }());
+
   auto computation = absl::WrapUnique(
       new HloComputation(proto.name(), parameter_count, &instructions, root,
                          /*fusion_instruction=*/nullptr));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index de22b2d3a5..5c16d6bb5e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -81,6 +81,20 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   const auto computations = [&computation_map, &proto](int index) {
     return computation_map.at(proto.called_computation_ids(index));
   };
+
+  TF_RET_CHECK(std::all_of(
+      proto.operand_ids().begin(), proto.operand_ids().end(),
+      [&instruction_map](int64 id) { return instruction_map.contains(id); }))
+      << proto.name() << " instruction contains invalid operand id(s)";
+
+  TF_RET_CHECK(std::all_of(
+      proto.called_computation_ids().begin(),
+      proto.called_computation_ids().end(),
+      [&computation_map](int64 id) { return computation_map.contains(id); }))
+      << proto.name() << " instruction references invalid computation id(s)";
+
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(proto.shape()));
+
   switch (opcode) {
     // Ops migrated to subclasses.
     case HloOpcode::kBatchNormTraining:
@@ -304,6 +318,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     } break;
     case HloOpcode::kOutfeed:
       TF_RET_CHECK(proto.operand_ids_size() == 2);
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape()));
       instruction = CreateOutfeed(proto.outfeed_shape(), operands(0),
                                   operands(1), proto.outfeed_config());
       break;
@@ -492,14 +508,10 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     default: {
       instruction = absl::WrapUnique(new HloInstruction(opcode, proto.shape()));
       for (const int64 operand_id : proto.operand_ids()) {
-        TF_RET_CHECK(ContainsKey(instruction_map, operand_id))
-            << "No instruction with id " << operand_id;
         instruction->AppendOperand(instruction_map.at(operand_id));
       }
       if (instruction->opcode() != HloOpcode::kFusion) {
         for (const int64 computation_id : proto.called_computation_ids()) {
-          TF_RET_CHECK(ContainsKey(computation_map, computation_id))
-              << "No computation with id " << computation_id;
           instruction->called_computations_.push_back(
               computation_map.at(computation_id));
         }
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index de7e6b53d4..94c7bafd3b 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -369,10 +369,14 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
     return HloSharding(tuple_shardings);
   } else if (proto.type() == OpSharding::Type::OpSharding_Type_REPLICATED) {
     return Replicate();
-  } else if (proto.type() == OpSharding::Type::OpSharding_Type_MAXIMAL ||
-             proto.tile_assignment_devices().size() == 1) {
+  } else if (proto.tile_assignment_devices().size() == 1) {
     return HloSharding(proto.tile_assignment_devices(0));
   }
+
+  TF_RET_CHECK(proto.type() != OpSharding::Type::OpSharding_Type_MAXIMAL)
+      << "Maximal sharding is expected to have single device assignment, but "
+      << proto.tile_assignment_devices().size() << " has provided.";
+
   // Some versions of gcc cannot infer the TileAssignment constructor from a
   // braced initializer-list, so create one manually.
   std::vector<int64> devices(proto.tile_assignment_devices().begin(),
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 020c167ee9..476a9fe868 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -831,7 +831,8 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
 
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
     const Shape& shape) {
-  if (shape.element_type() == PRIMITIVE_TYPE_INVALID) {
+  if (shape.element_type() == PRIMITIVE_TYPE_INVALID ||
+      !PrimitiveType_IsValid(shape.element_type())) {
     return InvalidArgument("shape has invalid element type: %s",
                            shape.ShortDebugString());
   }
-- 
GitLab


From b03d13e5962d0e9cf5c736ce11fc906700944f95 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 1 Oct 2018 17:45:22 -0700
Subject: [PATCH 0229/1085] Make Keras/TPU more robust to closed TF sessions.

PiperOrigin-RevId: 215313156
---
 .../contrib/tpu/python/tpu/keras_support.py   | 278 ++++++++++--------
 1 file changed, 155 insertions(+), 123 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 696656e840..a3a7fd8bb0 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -46,6 +46,7 @@ from __future__ import print_function
 
 import abc
 import collections
+import contextlib
 import re
 import sys
 import time
@@ -94,21 +95,56 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 
 
+# TODO(b/114775106): temporary shim to optionally initialize the TPU
+# This increases the odds our session is initialized, but shouldn't be needed.
+def _maybe_initialize_tpu(session):
+  """Initialize the TPU if it has not already been initialized."""
+  try:
+
+    def test_op():
+      return constant_op.constant(1) + constant_op.constant(1)
+
+    session.run(tpu.rewrite(test_op))
+  except errors.FailedPreconditionError as _:
+    session.run(tpu.initialize_system())
+
+
+@contextlib.contextmanager
+def _tpu_session_context():
+  """Initialize the TPU and cleans cache entries for bad sessions."""
+  try:
+    _maybe_initialize_tpu(K.get_session())
+    yield
+  except (errors.FailedPreconditionError, errors.AbortedError) as e:
+    K.clear_session()
+    raise Exception("""
+An error occurred connecting or initializing your TPU.
+
+The session has been reset. re-run keras_to_tpu_model to create a new session.
+""" + e)
+
+
 def setup_tpu_session(cluster_resolver):
   """Construct or return a `tf.Session` connected to the given cluster."""
   master = cluster_resolver.master()
 
   # Use the existing session if we're already connected to this TPU
-  if (K.get_session()._target == master and
-      getattr(K.get_session(), '_tpu_initialized', None)):
-    return
+  # N.B K.get_session() is a non-trivial operation, and may fail if the remote
+  # session has been reset.
+  try:
+    default_session = K.get_session()
+    if (default_session._target == master and
+        getattr(default_session, '_tpu_initialized', None)):
+      return
+  except errors.AbortedError as _:
+    # We lost the remote session and need to re-initialize.
+    logging.warning('Lost remote session: creating a new session.')
 
   cluster_spec = cluster_resolver.cluster_spec()
   config = config_pb2.ConfigProto(isolate_session_state=True)
   if cluster_spec:
     config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
 
-  logging.info('Initialize')
   tpu_session = tf_session.Session(target=master, config=config)
   tpu_session.run(tpu.initialize_system())
   tpu_session._tpu_initialized = True
@@ -1391,97 +1427,74 @@ class KerasTPUModel(models.Model):
       raise EnvironmentError('KerasTPUModel currently does not support eager '
                              'mode.')
 
-    assert not self._numpy_to_infeed_manager_list  # Ensure empty.
-
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      with ops.device('/job:%s/device:CPU:0' %
-                      self._tpu_assignment.worker_name):
-        dataset = x()
-        if steps_per_epoch is None:
-          raise ValueError('When using tf.data as input to a model, you '
-                           'should specify the steps_per_epoch argument.')
-        if y is not None:
-          raise ValueError('When using tf.data as input to a model, y must be '
-                           'None')
-        infeed_manager = TPUDatasetInfeedManager(
-            dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+    with _tpu_session_context():
+      assert not self._numpy_to_infeed_manager_list  # Ensure empty.
+
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        with ops.device(
+            '/job:%s/device:CPU:0' % self._tpu_assignment.worker_name):
+          dataset = x()
+          if steps_per_epoch is None:
+            raise ValueError('When using tf.data as input to a model, you '
+                             'should specify the steps_per_epoch argument.')
+          if y is not None:
+            raise ValueError('When using tf.data as input to a model, y must '
+                             'be None')
+          infeed_manager = TPUDatasetInfeedManager(
+              dataset, self._tpu_assignment, model_fn_lib.ModeKeys.TRAIN)
+          # Use dummy numpy inputs for the rest of Keras' shape checking. We
+          # intercept them when building the model.
+          x = infeed_manager.dummy_x
+          y = infeed_manager.dummy_y
+          infeed_managers.append((x, infeed_manager))
+
+      if isinstance(validation_data, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(validation_data):
+        dataset = validation_data()
+        if validation_steps is None:
+          raise ValueError('When using tf.data as validation for a model, you '
+                           'should specify the validation_steps argument.')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
         # Use dummy numpy inputs for the rest of Keras' shape checking. We
         # intercept them when building the model.
-        x = infeed_manager.dummy_x
-        y = infeed_manager.dummy_y
-        infeed_managers.append((x, infeed_manager))
+        val_x = infeed_manager.dummy_x
+        val_y = infeed_manager.dummy_y
+        infeed_managers.append((val_x, infeed_manager))
+        validation_data = (val_x, val_y)
 
-    if isinstance(validation_data, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(validation_data):
-      dataset = validation_data()
-      if validation_steps is None:
-        raise ValueError('When using tf.data as validation for a model, you '
-                         'should specify the validation_steps argument.')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      val_x = infeed_manager.dummy_x
-      val_y = infeed_manager.dummy_y
-      infeed_managers.append((val_x, infeed_manager))
-      validation_data = (val_x, val_y)
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      if not kwargs.get('_pipeline', True):
-        logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
-                     kwargs['_pipeline'])
-        kwargs.pop('_pipeline')
-        return super(KerasTPUModel, self).fit(
-            x,
-            y,
-            batch_size,
-            epochs,
-            verbose,
-            callbacks,
-            validation_split,
-            validation_data,
-            shuffle,
-            class_weight,
-            sample_weight,
-            initial_epoch,
-            steps_per_epoch,
-            validation_steps,
-            **kwargs)
-      return self._pipeline_fit(
-          x,
-          y,
-          batch_size,
-          epochs,
-          verbose,
-          callbacks,
-          validation_split,
-          validation_data,
-          shuffle,
-          class_weight,
-          sample_weight,
-          initial_epoch,
-          steps_per_epoch,
-          validation_steps,
-          **kwargs)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        if not kwargs.get('_pipeline', True):
+          logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
+                       kwargs['_pipeline'])
+          kwargs.pop('_pipeline')
+          return super(KerasTPUModel, self).fit(
+              x, y, batch_size, epochs, verbose, callbacks, validation_split,
+              validation_data, shuffle, class_weight, sample_weight,
+              initial_epoch, steps_per_epoch, validation_steps, **kwargs)
+        return self._pipeline_fit(x, y, batch_size, epochs, verbose, callbacks,
+                                  validation_split, validation_data, shuffle,
+                                  class_weight, sample_weight, initial_epoch,
+                                  steps_per_epoch, validation_steps, **kwargs)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def evaluate(self,
                x=None,
@@ -1492,37 +1505,38 @@ class KerasTPUModel(models.Model):
                steps=None):
     assert not self._numpy_to_infeed_manager_list  # Ensure empty.
 
-    infeed_managers = []  # Managers to clean up at the end of the fit call.
-    if isinstance(x, dataset_ops.Dataset):
-      # TODO(b/111413240): Support taking a tf.data.Dataset directly.
-      raise ValueError(
-          'Taking a Dataset directly is not yet supported. Please '
-          'wrap your dataset construction code in a function and '
-          'pass that to fit instead. For examples, see: '
-          'https://github.com/tensorflow/tpu/tree/master/models/experimental'
-          '/keras')
-    if callable(x):
-      dataset = x()
-      if steps is None:
-        raise ValueError('When using tf.data as input to a model, you '
-                         'should specify the steps argument.')
-      if y is not None:
-        raise ValueError('When using tf.data as input to a model, y must be '
-                         'None')
-      infeed_manager = TPUDatasetInfeedManager(
-          dataset, self._tpu_assignment, model_fn_lib.ModeKeys.EVAL)
-      # Use dummy numpy inputs for the rest of Keras' shape checking. We
-      # intercept them when building the model.
-      x = infeed_manager.dummy_x
-      y = infeed_manager.dummy_y
-      infeed_managers.append((x, infeed_manager))
-
-    self._numpy_to_infeed_manager_list = infeed_managers
-    try:
-      return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
-                                                 sample_weight, steps)
-    finally:
-      self._numpy_to_infeed_manager_list = []
+    with _tpu_session_context():
+      infeed_managers = []  # Managers to clean up at the end of the fit call.
+      if isinstance(x, dataset_ops.Dataset):
+        # TODO(b/111413240): Support taking a tf.data.Dataset directly.
+        raise ValueError(
+            'Taking a Dataset directly is not yet supported. Please '
+            'wrap your dataset construction code in a function and '
+            'pass that to fit instead. For examples, see: '
+            'https://github.com/tensorflow/tpu/tree/master/models/experimental'
+            '/keras')
+      if callable(x):
+        dataset = x()
+        if steps is None:
+          raise ValueError('When using tf.data as input to a model, you '
+                           'should specify the steps argument.')
+        if y is not None:
+          raise ValueError('When using tf.data as input to a model, y must be '
+                           'None')
+        infeed_manager = TPUDatasetInfeedManager(dataset, self._tpu_assignment,
+                                                 model_fn_lib.ModeKeys.EVAL)
+        # Use dummy numpy inputs for the rest of Keras' shape checking. We
+        # intercept them when building the model.
+        x = infeed_manager.dummy_x
+        y = infeed_manager.dummy_y
+        infeed_managers.append((x, infeed_manager))
+
+      self._numpy_to_infeed_manager_list = infeed_managers
+      try:
+        return super(KerasTPUModel, self).evaluate(x, y, batch_size, verbose,
+                                                   sample_weight, steps)
+      finally:
+        self._numpy_to_infeed_manager_list = []
 
   def _pipeline_fit(self, x, y, batch_size, epochs, verbose, callbacks,
                     validation_split, validation_data, shuffle, class_weight,
@@ -1910,6 +1924,24 @@ class KerasTPUModel(models.Model):
 
     return val_x, val_y, val_sample_weights
 
+  def predict(self,
+              x,
+              batch_size=None,
+              verbose=0,
+              steps=None,
+              max_queue_size=10,
+              workers=1,
+              use_multiprocessing=False):
+    with _tpu_session_context():
+      return super(KerasTPUModel, self).predict(
+          x,
+          batch_size=batch_size,
+          verbose=verbose,
+          steps=steps,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing)
+
   @property
   def optimizer(self):
     if self._tpu_model:
-- 
GitLab


From 891e49f57b8229f58315cfeb743e38c235918083 Mon Sep 17 00:00:00 2001
From: Suyog Gupta <suyoggupta@google.com>
Date: Tue, 2 Oct 2018 14:46:13 -0700
Subject: [PATCH 0230/1085] Add missing documentation for use_tpu hparam

PiperOrigin-RevId: 215462000
---
 tensorflow/contrib/model_pruning/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md
index 15d95896d9..b313024e28 100644
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@@ -62,6 +62,7 @@ The pruning library allows for specification of the following hyper parameters:
 | sparsity_function_begin_step | integer | 0 | The global step at this which the gradual sparsity function begins to take effect |
 | sparsity_function_end_step | integer | 100 | The global step used as the end point for the gradual sparsity function |
 | sparsity_function_exponent | float | 3.0 | exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning |
+| use_tpu | bool | False | Training using TPUs? |
 
 The sparsity $$s_t$$ at global step $$t$$ is given by:
 
-- 
GitLab


From 664f3dde67bfa436e5216ae54ee256761c7c6962 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 14:52:16 -0700
Subject: [PATCH 0231/1085] Do not warn about loss of accuracy in trivial cases
 when all array elements are equal to either the min or the max value, so that
 they are trivially exactly quantized. This case does not normally occur for
 true learned weights, which is what this warning is intended for.

PiperOrigin-RevId: 215463096
---
 .../toco/graph_transformations/quantize.cc    | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index 1bc366f555..fb299c31b7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -97,15 +97,6 @@ const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) {
   // to allow easily trying out quantization even if the graph
   // lacks some minmax information.
   if (array.buffer != nullptr) {
-    LOG(WARNING)
-        << "Constant array " << array_name
-        << " lacks MinMax information. To make up for that, we will now compute"
-        << " the MinMax from actual array elements. That will result in"
-        << " quantization parameters that probably do not match whichever "
-           "arithmetic"
-        << " was used during training, and thus will probably be a cause of "
-           "poor"
-        << " inference accuracy.";
     CHECK(array.buffer->type == ArrayDataType::kFloat);
     const auto& data = array.GetBuffer<ArrayDataType::kFloat>().data;
     // We always want [min, max] to contain 0.
@@ -120,6 +111,27 @@ const MinMax& GetOrComputeMinMax(Model* model, const string& array_name) {
       // to not be equal.
       max = 1.f;
     }
+    // No need to warn about accuracy if all array values are equal to either
+    // min or max:
+    // in that case, quantization is exact, and such arrays are not learned
+    // weights arrays for which fake-quantization would make sense, rather
+    // they tend to be hardcoded arrays of zeros or ones used in some graphs.
+    bool is_quantization_trivially_exact = true;
+    for (auto val : data) {
+      is_quantization_trivially_exact &= (val == min || val == max);
+    }
+    if (!is_quantization_trivially_exact) {
+      LOG(WARNING)
+          << "Constant array " << array_name
+          << " lacks MinMax information. To make up for that, we will now "
+             "compute"
+          << " the MinMax from actual array elements. That will result in"
+          << " quantization parameters that probably do not match whichever "
+             "arithmetic"
+          << " was used during training, and thus will probably be a cause of "
+             "poor"
+          << " inference accuracy.";
+    }
     auto& minmax = array.GetOrCreateMinMax();
     minmax.min = min;
     minmax.max = max;
-- 
GitLab


From c67f66ead31c843776ac53b15e7c763a50d1c85a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Tue, 2 Oct 2018 15:01:25 -0700
Subject: [PATCH 0232/1085] Disable fused_conv tests that don't build in
 open-source. (#22675)

PiperOrigin-RevId: 215440356
---
 tensorflow/contrib/fused_conv/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index 490da9b33b..57a5bfbf43 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -145,6 +145,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
     ],
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "no_pip",
         "requires-gpu-sm70",
     ],
@@ -169,6 +170,7 @@ cuda_py_test(
     ],
     main = "python/ops/fused_conv2d_bias_activation_benchmark.py",
     tags = [
+        "manual",  # TODO(b/117128481): re-enable after fixing OSS build
         "requires-gpu-sm70",
     ],
 )
-- 
GitLab


From 44f273e853360042ee14def03eba85d1e04a7272 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Tue, 2 Oct 2018 14:54:08 -0700
Subject: [PATCH 0233/1085] [XLA] A test that disables layout assignment should
 only contain layout consistent HLO instructions.

Fix a dot test that disables layout assignment pass to not generate layout
inconsistent HLO instructions. This includes only adding the dot result to an
addend with the same layout, and disabling algebraic simplification which may
transform a dot to a multiplication with inconsistent layouts.

PiperOrigin-RevId: 215463477
---
 .../compiler/xla/tests/dot_operation_test.cc  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc
index 0171f51583..6c0847a875 100644
--- a/tensorflow/compiler/xla/tests/dot_operation_test.cc
+++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc
@@ -394,6 +394,10 @@ class ParametricDotTestWithoutLayoutAssignment : public ParametricDotTest {
   ParametricDotTestWithoutLayoutAssignment() {
     execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
         "layout-assignment");
+    // Disable algebraic simplification because the pass may replace a dot
+    // instruction with a layout-changing multiplication instruction.
+    execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
+        "algsimp");
   }
 };
 
@@ -404,31 +408,18 @@ std::vector<DotTestParam> CreateNoLayoutAssignmentDotTestParameters() {
     for (bool lhs_row_major : {true, false}) {
       for (bool rhs_row_major : {true, false}) {
         for (bool has_addend : {true, false}) {
+          // The addend needs to be row major to match the result of the dot.
           params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
                             /*dot_lhs_row_major=*/lhs_row_major,
                             /*dot_rhs_row_major=*/rhs_row_major,
                             /*has_addend=*/has_addend,
                             /*addend_row_major=*/true});
-          if (has_addend) {
-            params.push_back({/*m=*/1, /*k=*/k, /*n=*/n,
-                              /*dot_lhs_row_major=*/lhs_row_major,
-                              /*dot_rhs_row_major=*/rhs_row_major,
-                              /*has_addend=*/has_addend,
-                              /*addend_row_major=*/false});
-          }
           if (n != 1) {
             params.push_back({/*m=*/n, /*k=*/k, /*n=*/1,
                               /*dot_lhs_row_major=*/lhs_row_major,
                               /*dot_rhs_row_major=*/rhs_row_major,
                               /*has_addend=*/has_addend,
                               /*addend_row_major=*/true});
-            if (has_addend) {
-              params.push_back({/*m=*/n, /*k=*/k, /*n=*/1,
-                                /*dot_lhs_row_major=*/lhs_row_major,
-                                /*dot_rhs_row_major=*/rhs_row_major,
-                                /*has_addend=*/has_addend,
-                                /*addend_row_major=*/false});
-            }
           }
         }
       }
-- 
GitLab


From 08e5ad2839ca2c6749544ace354f78d00f5243d9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:06:38 -0700
Subject: [PATCH 0234/1085] Fix a bug: the use of sequence-point boolean
 operators here had the unintended effect of causing the second line not to
 run at all depending on the result from the first line.

PiperOrigin-RevId: 215466006
---
 .../read_array_minmax_and_narrow_range_from_fake_quant.cc   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
index 5b41c49bfa..eaa9d3bcda 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
@@ -71,8 +71,10 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
   CHECK(fq_op->minmax);
   CHECK_EQ(1, fq_op->inputs.size());
 
-  return ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]) ||
-         ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
+  bool changed = false;
+  changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]);
+  changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
+  return changed;
 }
 
 }  // namespace toco
-- 
GitLab


From cfec3aa38db1d2b70045e7b89d82fae87c3fec02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:07:36 -0700
Subject: [PATCH 0235/1085] Update code to use
 convert_to_tensor_or_indexed_slices, since features may be SparseTensors as
 well.

PiperOrigin-RevId: 215466199
---
 .../estimator/python/estimator/dnn_with_layer_annotations.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 5faf0aacfe..6ca7aaf989 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -151,7 +151,7 @@ def make_input_layer_with_layer_annotations(original_input_layer):
     # spec and looking at the keys.
     spec = feature_column_lib.make_parse_example_spec(feature_columns)
     for key in spec.keys():
-      tensor = ops.convert_to_tensor(features[key])
+      tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
       ops.add_to_collection(
           LayerAnnotationsCollectionNames.keys(
               LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
-- 
GitLab


From bb84d5d5e309204110315f7d0ff8ca0dbb022dd2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 15:08:52 -0700
Subject: [PATCH 0236/1085] [XLA] Support parsing the canonical format of HLO
 text.

Also stop truncating operands in the canonical format.

PiperOrigin-RevId: 215466465
---
 .../xla/service/hlo_execution_profile.cc      |   5 +-
 .../compiler/xla/service/hlo_instruction.cc   |   2 +-
 .../compiler/xla/service/hlo_instruction.h    |  14 +-
 tensorflow/compiler/xla/service/hlo_parser.cc | 276 ++++++++++++------
 tensorflow/compiler/xla/service/hlo_parser.h  |   5 +-
 .../compiler/xla/service/hlo_parser_test.cc   | 142 ++++++++-
 6 files changed, 338 insertions(+), 106 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index de3d7a1677..ce4cad4235 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -90,8 +90,9 @@ std::unique_ptr<HloProfilePrinterData> CreateHloProfilePrinterData(
       HloInstructionInfo* instruction_info =
           computation_info->add_instruction_infos();
       instruction_info->set_long_name(hlo->ToString());
-      instruction_info->set_short_name(
-          hlo->ToString(HloPrintOptions().set_compact_operands(true)));
+      instruction_info->set_short_name(hlo->ToString(
+          HloPrintOptions().set_compact_operands(true).set_print_operand_names(
+              false)));
       instruction_info->set_category(hlo->ToCategory());
       instruction_info->set_flop_count(cost_analysis.flop_count(*hlo));
       instruction_info->set_transcendental_count(
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5c16d6bb5e..8bddaa8c96 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2034,7 +2034,7 @@ string HloInstruction::OperandsToStringWithCanonicalNameMap(
         options.is_in_nested_computation()) {
       str.push_back(PrintName(
           canonical_name_map->LookupOrInsert(operand->name()), options));
-    } else if (!options.compact_operands()) {
+    } else if (options.print_operand_names()) {
       str.push_back(PrintName(operand->name(), options));
     }
     StrAppend(out, StrJoin(str, " "));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 1bfdc88abc..9deed20e5d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -80,6 +80,7 @@ class HloPrintOptions {
         print_backend_config_(true),
         compact_operands_(false),
         print_operand_shape_(true),
+        print_operand_names_(true),
         print_program_shape_(true),
         print_percent_(true),
         print_control_dependencies_(true),
@@ -107,6 +108,7 @@ class HloPrintOptions {
         .set_print_metadata(false)
         .set_print_backend_config(false)
         .set_compact_operands(true)
+        .set_print_operand_names(false)
         .set_print_operand_shape(true)
         .set_print_program_shape(false)
         .set_print_percent(false)
@@ -144,6 +146,12 @@ class HloPrintOptions {
     return *this;
   }
 
+  // If true, the operand names will be printed.
+  HloPrintOptions& set_print_operand_names(bool value) {
+    print_operand_names_ = value;
+    return *this;
+  }
+
   // If true, program shape of hlo computations will be printed.
   HloPrintOptions& set_print_program_shape(bool value) {
     print_program_shape_ = value;
@@ -162,8 +170,8 @@ class HloPrintOptions {
     return *this;
   }
 
-  // If true, only a part of operands will be printed out, and their names will
-  // be omitted (note that in this case the text will not be parsable).
+  // If true, only a part of operands will be printed out (note that in this
+  // case the text will not be parsable).
   HloPrintOptions& set_compact_operands(bool value) {
     compact_operands_ = value;
     return *this;
@@ -197,6 +205,7 @@ class HloPrintOptions {
   bool print_backend_config() const { return print_backend_config_; }
   bool compact_operands() const { return compact_operands_; }
   bool print_operand_shape() const { return print_operand_shape_; }
+  bool print_operand_names() const { return print_operand_names_; }
   bool print_program_shape() const { return print_program_shape_; }
   bool print_percent() const { return print_percent_; }
   bool print_control_dependencies() const {
@@ -215,6 +224,7 @@ class HloPrintOptions {
   bool print_backend_config_;
   bool compact_operands_;
   bool print_operand_shape_;
+  bool print_operand_names_;
   bool print_program_shape_;
   bool print_percent_;
   bool print_control_dependencies_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 25b70740e3..5a125b4c08 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -80,17 +80,23 @@ class HloParser {
   StatusOr<PaddingConfig> ParsePaddingConfigOnly();
 
   // Stand-alone parsing utility for a single instruction worth of text.
-  Status ParseSingleInstruction(HloComputation::Builder* builder,
-                                string* root_name);
+  Status ParseSingleInstruction(HloModule* module);
 
  private:
-  // Locates an instruction with the given name in the instruction_pool_ or
+  using InstrNameTable =
+      std::unordered_map<string, std::pair<HloInstruction*, LocTy>>;
+
+  // Returns the map from the instruction name to the instruction itself and its
+  // location in the current scope.
+  InstrNameTable& current_name_table() { return scoped_name_tables_.back(); }
+
+  // Locates an instruction with the given name in the current_name_table() or
   // returns nullptr.
   //
-  // If the missing_instruction_hook_ is registered and a "shape" is provided,
-  // the hook will be called and may satisfy the request for the given
-  // instruction. This is useful when we reify parameters as they're resolved;
-  // i.e. for ParseSingleInstruction.
+  // When the name is not found or name is empty, if create_missing_instruction_
+  // hook is registered and a "shape" is provided, the hook will be called to
+  // create an instruction. This is useful when we reify parameters as they're
+  // resolved; i.e. for ParseSingleInstruction.
   std::pair<HloInstruction*, LocTy>* FindInstruction(
       const string& name, const optional<Shape>& shape = nullopt);
 
@@ -98,9 +104,11 @@ class HloParser {
   bool ParseHloModule(HloModule* module);
   bool ParseComputations(HloModule* module);
   bool ParseComputation(HloComputation** entry_computation);
-  bool ParseInstructionList(HloComputation::Builder* builder,
-                            string* root_name);
+  bool ParseInstructionList(HloComputation** computation,
+                            const string& computation_name);
   bool ParseInstruction(HloComputation::Builder* builder, string* root_name);
+  bool ParseInstruciontRhs(HloComputation::Builder* builder, const string& name,
+                           LocTy name_loc);
   bool ParseControlPredecessors(HloInstruction* instruction);
   bool ParseLiteral(Literal* literal, const Shape& shape);
   bool ParseTupleLiteral(Literal* literal, const Shape& shape);
@@ -281,23 +289,47 @@ class HloParser {
   bool AddComputation(const string& name, HloComputation* computation,
                       LocTy name_loc);
 
-  // The map from the instruction/computation name to the
-  // instruction/computation itself and it's location. This does not own the
-  // pointers.
-  std::unordered_map<string, std::pair<HloInstruction*, LocTy>>
-      instruction_pool_;
+  HloLexer lexer_;
+
+  // A stack for the instruction names. The top of the stack stores the
+  // instruction name table for the current scope.
+  //
+  // A instruction's name is unique among its scope (i.e. its parent
+  // computation), but it's not necessarily unique among all computations in the
+  // module. When there are multiple levels of nested computations, the same
+  // name could appear in both an outer computation and an inner computation. So
+  // we need a stack to make sure a name is only visible within its scope,
+  std::vector<InstrNameTable> scoped_name_tables_;
+
+  // A helper class which pushes and pops to an InstrNameTable stack via RAII.
+  class Scope {
+   public:
+    explicit Scope(std::vector<InstrNameTable>* scoped_name_tables)
+        : scoped_name_tables_(scoped_name_tables) {
+      scoped_name_tables_->emplace_back();
+    }
+    ~Scope() { scoped_name_tables_->pop_back(); }
+
+   private:
+    std::vector<InstrNameTable>* scoped_name_tables_;
+  };
+
+  // Map from the computation name to the computation itself and its location.
   std::unordered_map<string, std::pair<HloComputation*, LocTy>>
       computation_pool_;
 
-  HloLexer lexer_;
   std::vector<std::unique_ptr<HloComputation>> computations_;
   std::vector<string> error_;
 
-  // Function that gets invoked when we try to resolve an instruction
-  // instruction_pool_ but fail to do so.
-  std::function<std::pair<HloInstruction*, LocTy>*(string,
-                                                   const optional<Shape>&)>
-      missing_instruction_hook_;
+  // When an operand name cannot be resolved, this function is called to create
+  // a parameter instruction with the given name and shape. It registers the
+  // name, instruction, and a placeholder location in the name table. It returns
+  // the newly-created instruction and the placeholder location. If `name` is
+  // empty, this should create the parameter with a generated name. This is
+  // supposed to be set and used only in ParseSingleInstruction.
+  std::function<std::pair<HloInstruction*, LocTy>*(const string& name,
+                                                   const Shape& shape)>
+      create_missing_instruction_;
 };
 
 bool SplitToInt64s(absl::string_view s, char delim, std::vector<int64>* out) {
@@ -351,11 +383,21 @@ bool HloParser::Run(HloModule* module) {
 
 std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
     const string& name, const optional<Shape>& shape) {
-  std::pair<HloInstruction*, LocTy>* instr =
-      tensorflow::gtl::FindOrNull(instruction_pool_, name);
+  std::pair<HloInstruction*, LocTy>* instr = nullptr;
+  if (!name.empty()) {
+    instr = tensorflow::gtl::FindOrNull(current_name_table(), name);
+  }
+
   // Potentially call the missing instruction hook.
-  if (instr == nullptr && missing_instruction_hook_ != nullptr) {
-    return missing_instruction_hook_(name, shape);
+  if (instr == nullptr && create_missing_instruction_ != nullptr &&
+      scoped_name_tables_.size() == 1) {
+    if (!shape.has_value()) {
+      Error(lexer_.GetLoc(),
+            "Operand had no shape in HLO text; cannot create parameter for "
+            "single-instruction module.");
+      return nullptr;
+    }
+    return create_missing_instruction_(name, *shape);
   }
   return instr;
 }
@@ -439,7 +481,6 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
   if (!ParseName(&name)) {
     return false;
   }
-  auto builder = absl::make_unique<HloComputation::Builder>(name);
 
   LocTy shape_loc = nullptr;
   Shape shape;
@@ -447,40 +488,21 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
     return false;
   }
 
-  string root_name;
-  if (!ParseInstructionList(builder.get(), &root_name)) {
+  HloComputation* computation = nullptr;
+  if (!ParseInstructionList(&computation, name)) {
     return false;
   }
 
-  std::pair<HloInstruction*, LocTy>* root_node = FindInstruction(root_name);
-  // This means some instruction was marked as ROOT but we didn't find it in the
-  // pool, which should not happen.
-  if (!root_name.empty() && root_node == nullptr) {
-    LOG(FATAL) << "instruction " << root_name
-               << " was marked as ROOT but the parser has not seen it before";
-  }
-
-  HloInstruction* root = root_node == nullptr ? nullptr : root_node->first;
-  // Now root can be either an existing instruction or a nullptr. If it's a
-  // nullptr, the implementation of Builder will set the last instruction as
-  // root instruction.
-  computations_.emplace_back(builder->Build(root));
-  HloComputation* computation = computations_.back().get();
-
-  if (!root) {
-    root = computation->root_instruction();
-  } else {
-    CHECK_EQ(root, computation->root_instruction());
-  }
-
   // If param_list_to_shape was present, check compatibility.
-  if (shape_loc != nullptr && !ShapeUtil::Compatible(root->shape(), shape)) {
+  if (shape_loc != nullptr &&
+      !ShapeUtil::Compatible(computation->root_instruction()->shape(), shape)) {
     return Error(
         shape_loc,
-        StrCat("Shape of computation ", name, ", ",
-               ShapeUtil::HumanString(shape),
-               ", is not compatible with that of its root instruction ",
-               root_name, ", ", ShapeUtil::HumanString(root->shape())));
+        StrCat(
+            "Shape of computation ", name, ", ", ShapeUtil::HumanString(shape),
+            ", is not compatible with that of its root instruction ",
+            computation->root_instruction()->name(), ", ",
+            ShapeUtil::HumanString(computation->root_instruction()->shape())));
   }
 
   if (is_entry_computation) {
@@ -489,43 +511,62 @@ bool HloParser::ParseComputation(HloComputation** entry_computation) {
     }
     *entry_computation = computation;
   }
-  instruction_pool_.clear();
 
   return AddComputation(name, computation, name_loc);
 }
 
 // instruction_list ::= '{' instruction_list1 '}'
 // instruction_list1 ::= (instruction)+
-bool HloParser::ParseInstructionList(HloComputation::Builder* builder,
-                                     string* root_name) {
+bool HloParser::ParseInstructionList(HloComputation** computation,
+                                     const string& computation_name) {
+  Scope scope(&scoped_name_tables_);
+  HloComputation::Builder builder(computation_name);
   if (!ParseToken(TokKind::kLbrace,
                   "expects '{' at the beginning of instruction list.")) {
     return false;
   }
+  string root_name;
   do {
-    if (!ParseInstruction(builder, root_name)) {
+    if (!ParseInstruction(&builder, &root_name)) {
       return false;
     }
   } while (lexer_.GetKind() != TokKind::kRbrace);
-  return ParseToken(TokKind::kRbrace,
-                    "expects '}' at the end of instruction list.");
+  if (!ParseToken(TokKind::kRbrace,
+                  "expects '}' at the end of instruction list.")) {
+    return false;
+  }
+  HloInstruction* root = nullptr;
+  if (!root_name.empty()) {
+    std::pair<HloInstruction*, LocTy>* root_node =
+        tensorflow::gtl::FindOrNull(current_name_table(), root_name);
+
+    // This means some instruction was marked as ROOT but we didn't find it in
+    // the pool, which should not happen.
+    if (root_node == nullptr) {
+      LOG(FATAL) << "instruction " << root_name
+                 << " was marked as ROOT but the parser has not seen it before";
+    }
+    root = root_node->first;
+  }
+
+  // Now root can be either an existing instruction or a nullptr. If it's a
+  // nullptr, the implementation of Builder will set the last instruction as
+  // the root instruction.
+  computations_.emplace_back(builder.Build(root));
+  *computation = computations_.back().get();
+  return true;
 }
 
 // instruction ::= ('ROOT')? name '=' shape opcode operands (attribute)*
 bool HloParser::ParseInstruction(HloComputation::Builder* builder,
                                  string* root_name) {
   string name;
-  Shape shape;
-  HloOpcode opcode;
-  std::vector<HloInstruction*> operands;
-
   LocTy maybe_root_loc = lexer_.GetLoc();
   bool is_root = EatIfPresent(TokKind::kw_ROOT);
 
   const LocTy name_loc = lexer_.GetLoc();
   if (!ParseName(&name) ||
-      !ParseToken(TokKind::kEqual, "expects '=' in instruction") ||
-      !ParseShape(&shape) || !ParseOpcode(&opcode)) {
+      !ParseToken(TokKind::kEqual, "expects '=' in instruction")) {
     return false;
   }
 
@@ -536,6 +577,19 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
     *root_name = name;
   }
 
+  return ParseInstruciontRhs(builder, name, name_loc);
+}
+
+bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
+                                    const string& name, LocTy name_loc) {
+  Shape shape;
+  HloOpcode opcode;
+  std::vector<HloInstruction*> operands;
+
+  if (!ParseShape(&shape) || !ParseOpcode(&opcode)) {
+    return false;
+  }
+
   // Add optional attributes.
   std::unordered_map<string, AttrConfig> attrs;
   optional<OpSharding> sharding;
@@ -2146,7 +2200,20 @@ bool HloParser::ParseOperands(std::vector<HloInstruction*>* operands) {
         }
       }
       if (!ParseName(&name)) {
-        return false;
+        // When parsing a single instruction (as opposed to a whole module), an
+        // HLO may have one or more operands with a shape but no name:
+        //
+        //  foo = add(f32[10], f32[10])
+        //
+        // create_missing_instruction_ is always non-null when parsing a single
+        // instruction, and is responsible for creating kParameter instructions
+        // for these operands.
+        if (shape.has_value() && create_missing_instruction_ != nullptr &&
+            scoped_name_tables_.size() == 1) {
+          name = "";
+        } else {
+          return false;
+        }
       }
       std::pair<HloInstruction*, LocTy>* instruction =
           FindInstruction(name, shape);
@@ -2299,9 +2366,17 @@ bool HloParser::ParseAttributeHelper(
         return true;
       }
       case AttrTy::kHloComputation: {
-        HloComputation* result;
-        if (!ParseComputationName(&result)) {
-          return false;
+        HloComputation* result = nullptr;
+        if (lexer_.GetKind() == TokKind::kLbrace) {
+          // This means it is a nested computation.
+          if (!ParseInstructionList(&result, /*computation_name=*/"_")) {
+            return false;
+          }
+        } else {
+          // This means it is a computation name.
+          if (!ParseComputationName(&result)) {
+            return false;
+          }
         }
         static_cast<optional<HloComputation*>*>(attr_out_ptr)->emplace(result);
         return true;
@@ -3134,7 +3209,7 @@ bool HloParser::EatIfPresent(TokKind kind) {
 
 bool HloParser::AddInstruction(const string& name, HloInstruction* instruction,
                                LocTy name_loc) {
-  auto result = instruction_pool_.insert({name, {instruction, name_loc}});
+  auto result = current_name_table().insert({name, {instruction, name_loc}});
   if (!result.second) {
     Error(name_loc, StrCat("instruction already exists: ", name));
     return Error(/*loc=*/result.first->second.second,
@@ -3204,36 +3279,51 @@ StatusOr<PaddingConfig> HloParser::ParsePaddingConfigOnly() {
   return padding_config;
 }
 
-Status HloParser::ParseSingleInstruction(HloComputation::Builder* builder,
-                                         string* root_name) {
-  TF_RET_CHECK(missing_instruction_hook_ == nullptr);
+Status HloParser::ParseSingleInstruction(HloModule* module) {
+  TF_RET_CHECK(create_missing_instruction_ == nullptr);
+  TF_RET_CHECK(scoped_name_tables_.empty());
+  HloComputation::Builder builder(module->name());
 
   // The missing instruction hook we register creates the shaped instruction on
   // the fly as a parameter and returns it.
   int64 parameter_count = 0;
-  missing_instruction_hook_ =
-      [this, builder, &parameter_count](
-          string name,
-          const optional<Shape>& shape) -> std::pair<HloInstruction*, LocTy>* {
-    if (!shape.has_value()) {
-      Error(lexer_.GetLoc(),
-            StrCat("Operand ", name,
-                   " had no shape in HLO text; cannot create parameter for "
-                   "single-instruction module."));
-      return nullptr;
-    }
-    HloInstruction* parameter = builder->AddInstruction(
-        HloInstruction::CreateParameter(parameter_count++, *shape, name));
-    instruction_pool_[name] = {parameter, lexer_.GetLoc()};
-    return tensorflow::gtl::FindOrNull(instruction_pool_, name);
+  create_missing_instruction_ =
+      [this, &builder, &parameter_count](
+          const string& name,
+          const Shape& shape) -> std::pair<HloInstruction*, LocTy>* {
+    string new_name = name.empty() ? StrCat("_", parameter_count) : name;
+    HloInstruction* parameter = builder.AddInstruction(
+        HloInstruction::CreateParameter(parameter_count++, shape, new_name));
+    current_name_table()[new_name] = {parameter, lexer_.GetLoc()};
+    return tensorflow::gtl::FindOrNull(current_name_table(), new_name);
   };
 
   // Prime the lexer.
   lexer_.Lex();
 
   // Parse the instruction with the registered hook.
-  if (!ParseInstruction(builder, root_name)) {
-    return InvalidArgument("Syntax error:\n%s", GetError());
+  Scope scope(&scoped_name_tables_);
+  if (CanBeShape()) {
+    // This means that the instruction's left-hand side is probably omitted,
+    // e.g.
+    //
+    //  f32[10] fusion(...), calls={...}
+    if (!ParseInstruciontRhs(&builder, module->name(), lexer_.GetLoc())) {
+      return InvalidArgument("Syntax error:\n%s", GetError());
+    }
+  } else {
+    // This means that the instruction's left-hand side might exist, e.g.
+    //
+    //  foo = f32[10] fusion(...), calls={...}
+    string root_name;
+    if (!ParseInstruction(&builder, &root_name)) {
+      return InvalidArgument("Syntax error:\n%s", GetError());
+    }
+  }
+
+  module->AddEntryComputation(builder.Build());
+  for (auto& comp : computations_) {
+    module->AddEmbeddedComputation(std::move(comp));
   }
   return Status::OK();
 }
@@ -3271,12 +3361,8 @@ Status ParseHloString(absl::string_view str, HloModule* module) {
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name) {
   HloParser parser(str);
-  auto builder = absl::make_unique<HloComputation::Builder>(string(name));
-  string root_name;
-  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(builder.get(), &root_name));
-  std::unique_ptr<HloComputation> computation = builder->Build();
   auto module = absl::make_unique<HloModule>(string(name), HloModuleConfig());
-  module->AddEntryComputation(std::move(computation));
+  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(module.get()));
   return std::move(module);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 3696035514..97d6f0117e 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -40,8 +40,9 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
 // point to an empty module (no computations).
 Status ParseHloString(absl::string_view str, HloModule* module);
 
-// Parses the text for a single HLO operation into an HLO module with a function
-// that runs that operation (with the same parameters) as its entry computation.
+// Parses the text for a single HLO instruction into an HLO module with an
+// entry computation that runs that instruction (with the same parameters) as
+// its root instruction.
 StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
     absl::string_view str, absl::string_view name = "single_op");
 
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index dd4ee780f0..d10acf3814 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1763,6 +1763,25 @@ ENTRY entry {
       "was parsing 8:39: error: instruction does not exist: aparam");
 }
 
+TEST_F(HloParserTest, SameNameDiffComputations) {
+  const string original = R"(HloModule same_names:
+add {
+  p0 = f32[] parameter(0)
+  p1 = f32[] parameter(1)
+  ROOT result = f32[] add(p0, p1)
+}
+
+ENTRY ReduceR3ToR2 {
+  p0 = f32[8,16,256]{2,1,0} parameter(0)
+  p1 = f32[] constant(0)
+  ROOT result = f32[8,16]{1,0} reduce(p0, p1), dimensions={2}, to_apply=add
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(original));
+  ASSERT_NE(module->entry_computation(), nullptr);
+  EXPECT_THAT(module->entry_computation()->root_instruction(), op::Reduce());
+}
+
 TEST_F(HloParserTest, ParseSharding) {
   const string original = "{maximal device=42}";
   TF_ASSERT_OK_AND_ASSIGN(HloSharding sharding, ParseSharding(original));
@@ -1823,14 +1842,129 @@ TEST(HloParserSingleOpTest, SingleOp) {
               op::Multiply(op::Parameter(0), op::Parameter(1)));
 }
 
-TEST(HloParserSingleOpTest, SingleOpNoShapesProducesError) {
+TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
+  const string text = "multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)";
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  ASSERT_TRUE(!module.status().ok());
+  LOG(INFO) << "Status: " << module.status();
+  EXPECT_THAT(module.status().ToString(),
+              ::testing::HasSubstr("expects '=' in instruction"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
   const string text = "%multiply = f32[2,4]{1,0} multiply(%broadcast, %x)";
   StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
-  EXPECT_THAT(
-      module.status().ToString(),
-      ::testing::HasSubstr("Operand broadcast had no shape in HLO text"));
+  EXPECT_THAT(module.status().ToString(),
+              ::testing::HasSubstr("Operand had no shape in HLO text"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpNoNames) {
+  const string text =
+      "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Multiply(op::Parameter(0), op::Parameter(1)));
+}
+
+TEST(HloParserSingleOpTest, CanonicalOp) {
+  const string text = "f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Multiply(op::Parameter(0), op::Parameter(1)));
+  EXPECT_EQ(
+      computation->root_instruction()->ToString(HloPrintOptions::Canonical()),
+      text);
+}
+
+TEST(HloParserSingleOpTest, CanonicalOpWithNested) {
+  const string text =
+      R"(f32[5,20]{1,0} while(f32[5,10]{1,0}), condition=
+{
+  tmp_0 = f32[5,10]{1,0} parameter(0)
+  tmp_1 = f32[20,10]{1,0} parameter(1)
+  ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls=
+  {
+    tmp_0 = f32[5,10]{1,0} parameter(0)
+    tmp_1 = f32[20,10]{1,0} parameter(1)
+    tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0}
+    ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  }
+}, body=
+{
+  tmp_0 = f32[5,10]{1,0} parameter(0)
+  tmp_1 = f32[20,10]{1,0} parameter(1)
+  ROOT tmp_2 = f32[5,20]{1,0} fusion(f32[5,10]{1,0} tmp_0, f32[20,10]{1,0} tmp_1), kind=kLoop, calls=
+  {
+    tmp_0 = f32[5,10]{1,0} parameter(0)
+    tmp_1 = f32[20,10]{1,0} parameter(1)
+    tmp_2 = f32[10,20]{1,0} transpose(f32[20,10]{1,0} tmp_1), dimensions={1,0}
+    ROOT tmp_3 = f32[5,20]{1,0} dot(f32[5,10]{1,0} tmp_0, f32[10,20]{1,0} tmp_2), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  }
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_EQ(
+      computation->root_instruction()->ToString(HloPrintOptions::Canonical()),
+      text);
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested) {
+  const string text =
+      R"(%fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %p0, f32[2]{0} %p1), kind=kLoop, calls=
+{
+  %param_0 = f32[3,2,1,1]{3,2,1,0} parameter(0)
+  %param_1 = f32[2]{0} parameter(1)
+  %broadcast = f32[3,2,1,1]{3,2,1,0} broadcast(f32[2]{0} %param_1), dimensions={1}
+  ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %param_0, f32[3,2,1,1]{3,2,1,0} %broadcast)
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  const HloComputation* computation = module->entry_computation();
+  ASSERT_NE(computation, nullptr);
+  EXPECT_THAT(computation->root_instruction(),
+              op::Fusion(op::Parameter(0), op::Parameter(1)));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_DoesNotExist) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  result = f32[] add(f32[] x, f32[] y)
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("does not exist: x"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_NoLhs) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  f32[] add(f32[] x, f32[] y)
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
+}
+
+TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
+  const string text =
+      R"(reduce = f32[] reduce(f32[10], f32[]), dimensions={1}, to_apply=
+{
+  result = f32[] add(f32[], f32[])
+})";
+  auto status = ParseHloOpToModule(text).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
 
 TEST(HloParserSingleOpTest, ConvolutionTrivialFeatureGroupCount) {
-- 
GitLab


From 00000cbfdf0efac737f3bfff94950a49d48659fb Mon Sep 17 00:00:00 2001
From: Christopher Olston <olston@google.com>
Date: Tue, 2 Oct 2018 15:48:17 -0700
Subject: [PATCH 0237/1085] Delete the shims in tensorflow/contrib/batching/.

PiperOrigin-RevId: 215473319
---
 tensorflow/contrib/batching/BUILD             | 58 -------------------
 .../adaptive_shared_batch_scheduler.h         | 21 -------
 .../contrib/batching/basic_batch_scheduler.h  | 21 -------
 tensorflow/contrib/batching/batch_scheduler.h | 21 -------
 .../batching/serial_device_batch_scheduler.h  | 21 -------
 .../contrib/batching/shared_batch_scheduler.h | 21 -------
 tensorflow/contrib/batching/test_util/BUILD   | 19 ------
 .../batching/test_util/fake_clock_env.h       | 21 -------
 tensorflow/contrib/batching/util/BUILD        | 28 ---------
 .../contrib/batching/util/periodic_function.h | 20 -------
 10 files changed, 251 deletions(-)
 delete mode 100644 tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/basic_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/serial_device_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/shared_batch_scheduler.h
 delete mode 100644 tensorflow/contrib/batching/test_util/BUILD
 delete mode 100644 tensorflow/contrib/batching/test_util/fake_clock_env.h
 delete mode 100644 tensorflow/contrib/batching/util/BUILD
 delete mode 100644 tensorflow/contrib/batching/util/periodic_function.h

diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD
index b27a19b16c..648f3ebb05 100644
--- a/tensorflow/contrib/batching/BUILD
+++ b/tensorflow/contrib/batching/BUILD
@@ -7,64 +7,6 @@ package(
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-
-cc_library(
-    name = "batch_scheduler_hdrs",
-    hdrs = ["batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:batch_scheduler_hdrs",
-    ],
-)
-
-cc_library(
-    name = "batch_scheduler",
-    hdrs = ["batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "shared_batch_scheduler_hdrs",
-    hdrs = ["shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs",
-    ],
-)
-
-cc_library(
-    name = "shared_batch_scheduler",
-    hdrs = ["shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:shared_batch_scheduler",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "adaptive_shared_batch_scheduler",
-    hdrs = ["adaptive_shared_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:adaptive_shared_batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "serial_device_batch_scheduler",
-    hdrs = ["serial_device_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:serial_device_batch_scheduler",
-    ],
-)
-
-cc_library(
-    name = "basic_batch_scheduler",
-    hdrs = ["basic_batch_scheduler.h"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:basic_batch_scheduler",
-    ],
-)
-
 load(
     "//tensorflow:tensorflow.bzl",
     "py_test",
diff --git a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h b/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
deleted file mode 100644
index 86250e6692..0000000000
--- a/tensorflow/contrib/batching/adaptive_shared_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_ADAPTIVE_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/basic_batch_scheduler.h b/tensorflow/contrib/batching/basic_batch_scheduler.h
deleted file mode 100644
index d9b37da693..0000000000
--- a/tensorflow/contrib/batching/basic_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/basic_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_BASIC_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/batch_scheduler.h b/tensorflow/contrib/batching/batch_scheduler.h
deleted file mode 100644
index 8e94e1fd8b..0000000000
--- a/tensorflow/contrib/batching/batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/serial_device_batch_scheduler.h b/tensorflow/contrib/batching/serial_device_batch_scheduler.h
deleted file mode 100644
index bf6b708361..0000000000
--- a/tensorflow/contrib/batching/serial_device_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/serial_device_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_SERIAL_DEVICE_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h
deleted file mode 100644
index 83a59695d7..0000000000
--- a/tensorflow/contrib/batching/shared_batch_scheduler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
-#define TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
-
-#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_SHARED_BATCH_SCHEDULER_H_
diff --git a/tensorflow/contrib/batching/test_util/BUILD b/tensorflow/contrib/batching/test_util/BUILD
deleted file mode 100644
index 7cb2d8079b..0000000000
--- a/tensorflow/contrib/batching/test_util/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-# Description: Utilities to aid testing.
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-cc_library(
-    name = "fake_clock_env",
-    testonly = 1,
-    hdrs = ["fake_clock_env.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:fake_clock_env",
-    ],
-)
diff --git a/tensorflow/contrib/batching/test_util/fake_clock_env.h b/tensorflow/contrib/batching/test_util/fake_clock_env.h
deleted file mode 100644
index 40a39a5569..0000000000
--- a/tensorflow/contrib/batching/test_util/fake_clock_env.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
-#define TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
-
-#include "tensorflow/core/kernels/batching_util/fake_clock_env.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_TEST_UTIL_FAKE_CLOCK_ENV_H_
diff --git a/tensorflow/contrib/batching/util/BUILD b/tensorflow/contrib/batching/util/BUILD
deleted file mode 100644
index 8f81b6702f..0000000000
--- a/tensorflow/contrib/batching/util/BUILD
+++ /dev/null
@@ -1,28 +0,0 @@
-# Description: Utilities.
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-
-cc_library(
-    name = "periodic_function_dynamic",
-    hdrs = ["periodic_function.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//tensorflow/core/kernels/batching_util:periodic_function_dynamic",
-        "//third_party/eigen3",
-    ],
-)
-
-cc_library(
-    name = "periodic_function",
-    visibility = ["//visibility:public"],
-    deps = [
-        ":periodic_function_dynamic",
-        "//tensorflow/core/kernels/batching_util:periodic_function",
-    ],
-)
diff --git a/tensorflow/contrib/batching/util/periodic_function.h b/tensorflow/contrib/batching/util/periodic_function.h
deleted file mode 100644
index aa2ed0a385..0000000000
--- a/tensorflow/contrib/batching/util/periodic_function.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-#define TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-
-#include "tensorflow/core/kernels/batching_util/periodic_function.h"
-
-#endif  // TENSORFLOW_CONTRIB_BATCHING_UTIL_PERIODIC_FUNCTION_H_
-- 
GitLab


From 6c487cddd3503ef72c015c5c283fff81328282e5 Mon Sep 17 00:00:00 2001
From: Revan Sopher <rsopher@google.com>
Date: Tue, 2 Oct 2018 15:48:27 -0700
Subject: [PATCH 0238/1085] Internal change.

PiperOrigin-RevId: 215473351
---
 .../data/experimental/kernel_tests/BUILD      | 113 +++++++++-
 .../kernel_tests/optimization/BUILD           |  43 ++++
 .../kernel_tests/serialization/BUILD          | 196 +++++++++++++++---
 3 files changed, 316 insertions(+), 36 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index a46c30ed2e..f56127f3ef 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -15,6 +15,7 @@ py_test(
     tags = [
         "no_oss",  # (b/79552534)
         "no_pip",
+        "no_windows",
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -43,6 +44,11 @@ py_test(
     size = "medium",
     srcs = ["bucketing_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -66,7 +72,11 @@ py_test(
     size = "medium",
     srcs = ["csv_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -93,6 +103,9 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "manual",
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "nomac",  # b/62040583
     ],
     deps = [
@@ -111,6 +124,11 @@ py_test(
     size = "medium",
     srcs = ["directed_interleave_dataset_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -126,6 +144,11 @@ py_test(
     name = "get_single_element_test",
     size = "small",
     srcs = ["get_single_element_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -144,6 +167,11 @@ py_test(
 py_test(
     name = "indexed_dataset_ops_test",
     srcs = ["indexed_dataset_ops_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -164,6 +192,7 @@ py_test(
     tags = [
         "no_oss",
         "no_pip",
+        "no_windows",
         "notap",
     ],
     deps = [
@@ -187,7 +216,11 @@ py_test(
     size = "small",
     srcs = ["iterator_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -208,7 +241,9 @@ py_test(
     srcs = ["map_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_pip",
+        "no_windows",
         "noasan",  # times out
         "optonly",
     ],
@@ -234,6 +269,11 @@ py_test(
     size = "medium",
     srcs = ["filter_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -254,7 +294,11 @@ py_test(
     size = "small",
     srcs = ["map_defun_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
@@ -277,6 +321,11 @@ py_test(
     size = "small",
     srcs = ["parsing_ops_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
@@ -313,7 +362,12 @@ cuda_py_test(
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
-    tags = ["no_windows_gpu"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+        "no_windows_gpu",
+    ],
 )
 
 py_test(
@@ -321,6 +375,11 @@ py_test(
     size = "small",
     srcs = ["range_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -366,7 +425,11 @@ py_test(
     size = "medium",
     srcs = ["reader_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":reader_dataset_ops_test_base",
         "//tensorflow/python:client_testlib",
@@ -390,6 +453,9 @@ py_test(
     shard_count = 2,
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "noasan",
         "optonly",
     ],
@@ -415,7 +481,11 @@ py_test(
     size = "small",
     srcs = ["scan_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -438,7 +508,9 @@ py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_pip",
+        "no_windows",
         "optonly",
     ],
     deps = [
@@ -475,7 +547,11 @@ py_test(
     size = "small",
     srcs = ["sql_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":sql_dataset_op_test_base",
         "//tensorflow/python:client_testlib",
@@ -489,7 +565,11 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":reader_dataset_ops_test_base",
         ":stats_dataset_test_base",
@@ -519,7 +599,11 @@ py_test(
     size = "small",
     srcs = ["threadpool_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -539,7 +623,11 @@ py_test(
     size = "small",
     srcs = ["unique_dataset_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
@@ -555,6 +643,11 @@ py_test(
     name = "writer_ops_test",
     size = "small",
     srcs = ["writer_ops_test.py"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index 68f73bddb5..c92bb8b9bc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -11,6 +11,11 @@ py_test(
     size = "medium",
     srcs = ["assert_next_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -25,6 +30,11 @@ py_test(
     size = "small",
     srcs = ["hoist_random_uniform_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -43,6 +53,11 @@ py_test(
     size = "small",
     srcs = ["latency_all_edges_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -58,6 +73,11 @@ py_test(
     size = "small",
     srcs = ["map_vectorization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
@@ -80,6 +100,11 @@ py_test(
     size = "medium",
     srcs = ["map_and_filter_fusion_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -98,6 +123,11 @@ py_test(
     size = "small",
     srcs = ["map_parallelization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -117,6 +147,9 @@ py_test(
     srcs = ["model_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
         "optonly",
     ],
     deps = [
@@ -136,6 +169,11 @@ py_test(
     size = "small",
     srcs = ["noop_elimination_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -153,6 +191,11 @@ py_test(
     size = "small",
     srcs = ["optimize_dataset_op_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index 20c02a5366..58a335ae4f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -34,7 +34,11 @@ py_test(
     size = "medium",
     srcs = ["batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -51,6 +55,11 @@ py_test(
     size = "small",
     srcs = ["cache_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -65,6 +74,11 @@ py_test(
     size = "small",
     srcs = ["concatenate_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -78,7 +92,11 @@ py_test(
     size = "small",
     srcs = ["csv_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -92,6 +110,11 @@ py_test(
     size = "medium",
     srcs = ["dataset_constructor_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -106,7 +129,11 @@ py_test(
     size = "medium",
     srcs = ["filter_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -123,7 +150,11 @@ py_test(
     srcs = ["fixed_length_record_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -136,7 +167,11 @@ py_test(
     name = "flat_map_dataset_serialization_test",
     size = "medium",
     srcs = ["flat_map_dataset_serialization_test.py"],
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -158,6 +193,11 @@ py_test(
     size = "medium",
     srcs = ["group_by_reducer_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -172,6 +212,11 @@ py_test(
     size = "medium",
     srcs = ["group_by_window_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -186,7 +231,11 @@ py_test(
     size = "small",
     srcs = ["ignore_errors_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -202,7 +251,11 @@ py_test(
     size = "medium",
     srcs = ["interleave_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -219,7 +272,11 @@ py_test(
     size = "medium",
     srcs = ["map_and_batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -234,7 +291,11 @@ py_test(
     size = "medium",
     srcs = ["map_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -256,6 +317,11 @@ py_test(
     size = "small",
     srcs = ["optimize_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -269,7 +335,11 @@ py_test(
     size = "medium",
     srcs = ["padded_batch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -285,7 +355,11 @@ py_test(
     size = "medium",
     srcs = ["parallel_interleave_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -302,7 +376,11 @@ py_test(
     size = "medium",
     srcs = ["parallel_map_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -323,7 +401,11 @@ py_test(
     size = "medium",
     srcs = ["parse_example_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -336,7 +418,11 @@ py_test(
     size = "small",
     srcs = ["prefetch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -349,6 +435,11 @@ py_test(
     size = "small",
     srcs = ["range_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -368,6 +459,11 @@ py_test(
     size = "medium",
     srcs = ["sample_from_datasets_serialization_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -381,7 +477,11 @@ py_test(
     size = "small",
     srcs = ["scan_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -395,7 +495,11 @@ py_test(
     size = "medium",
     srcs = ["sequence_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -409,7 +513,11 @@ py_test(
     size = "small",
     srcs = ["serialization_integration_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
@@ -424,7 +532,11 @@ py_test(
     size = "medium",
     srcs = ["shuffle_and_repeat_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -438,7 +550,11 @@ py_test(
     size = "medium",
     srcs = ["shuffle_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -454,7 +570,11 @@ py_test(
     size = "small",
     srcs = ["sql_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -470,7 +590,11 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:array_ops",
@@ -487,7 +611,11 @@ py_test(
     srcs = ["textline_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -502,7 +630,11 @@ py_test(
     srcs = ["tf_record_dataset_serialization_test.py"],
     shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -516,7 +648,11 @@ py_test(
     size = "medium",
     srcs = ["unbatch_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -531,7 +667,11 @@ py_test(
     size = "small",
     srcs = ["unique_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
@@ -545,7 +685,11 @@ py_test(
     size = "small",
     srcs = ["zip_dataset_serialization_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_oss",
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":dataset_serialization_test_base",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From 7c0c0abab5b07528bae982d69257ebf4a8c077cb Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 16:14:32 -0700
Subject: [PATCH 0239/1085] Internal change.

PiperOrigin-RevId: 215477724
---
 tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index cd7206baf8..9c6390070c 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -29,7 +29,7 @@ TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-8}
 # p100 has minimum 12G memory. Therefore, we should limit each test to 1.5G.
 # To leave some room in case we want to run more tests in parallel in the
 # future and to use a rounder number, we set it to 1G.
-export TF_PER_DEVICE_MEMORY_LIMIT_MB=1024
+export TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB:-1024}
 
 # *******************************************************************
 #         This section of the script is needed to
-- 
GitLab


From 6663959a8a2dd93a4dab9b049767d64761a00adc Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 2 Oct 2018 16:27:57 -0700
Subject: [PATCH 0240/1085] Update Keras RNN layer to support time major input.

PiperOrigin-RevId: 215479788
---
 tensorflow/python/keras/backend.py            | 25 ++++--
 .../python/keras/layers/cudnn_recurrent.py    | 24 +++--
 .../keras/layers/cudnn_recurrent_test.py      | 27 ++++++
 tensorflow/python/keras/layers/recurrent.py   | 65 ++++++++++----
 .../python/keras/layers/recurrent_test.py     | 90 +++++++++++++++++++
 .../golden/v1/tensorflow.keras.backend.pbtxt  |  2 +-
 .../v1/tensorflow.keras.layers.-r-n-n.pbtxt   |  2 +-
 .../golden/v2/tensorflow.keras.backend.pbtxt  |  2 +-
 .../v2/tensorflow.keras.layers.-r-n-n.pbtxt   |  2 +-
 9 files changed, 207 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 584facc859..0d6877e4a1 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3058,7 +3058,8 @@ def rnn(step_function,
         mask=None,
         constants=None,
         unroll=False,
-        input_length=None):
+        input_length=None,
+        time_major=False):
   """Iterates over the time dimension of a tensor.
 
   Arguments:
@@ -3087,6 +3088,13 @@ def rnn(step_function,
       constants: List of constant values passed at each step.
       unroll: Whether to unroll the RNN or to use a symbolic `while_loop`.
       input_length: If specified, assume time dimension is of this length.
+      time_major: Boolean. If true, the inputs and outputs will be in shape
+          `(timesteps, batch, ...)`, whereas in the False case, it will be
+          `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
+          efficient because it avoids transposes at the beginning and end of the
+          RNN calculation. However, most TensorFlow data is batch-major, so by
+          default this function accepts input and emits output in batch-major
+          form.
 
   Returns:
       A tuple, `(last_output, outputs, new_states)`.
@@ -3108,15 +3116,17 @@ def rnn(step_function,
   if ndim < 3:
     raise ValueError('Input should be at least 3D.')
   inputs_shape = inputs.shape
-  axes = [1, 0] + list(range(2, ndim))
-  inputs = array_ops.transpose(inputs, (axes))
+  if not time_major:
+    axes = [1, 0] + list(range(2, ndim))
+    inputs = array_ops.transpose(inputs, axes)
 
   if mask is not None:
     if mask.dtype != dtypes_module.bool:
       mask = math_ops.cast(mask, dtypes_module.bool)
     if len(mask.shape) == ndim - 1:
       mask = expand_dims(mask)
-    mask = array_ops.transpose(mask, axes)
+    if not time_major:
+      mask = array_ops.transpose(mask, axes)
 
   if constants is None:
     constants = []
@@ -3297,10 +3307,11 @@ def rnn(step_function,
     outputs = output_ta.stack()
     last_output = output_ta.read(last_time - 1)
 
-  axes = [1, 0] + list(range(2, len(outputs.shape)))
-  outputs = array_ops.transpose(outputs, axes)
+  if not time_major:
+    axes = [1, 0] + list(range(2, len(outputs.shape)))
+    outputs = array_ops.transpose(outputs, axes)
 
-  # Static shape inference: (samples, time, ...)
+  # Static shape inference: (samples, time, ...) or (time, sample, ...)
   outputs_shape = outputs.shape.as_list()
   outputs_shape[0] = inputs_shape[0]
   outputs_shape[1] = inputs_shape[1]
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py
index cf2b0c476c..29a09a3d71 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent.py
@@ -47,6 +47,9 @@ class _CuDNNRNN(RNN):
     stateful: Boolean (default False). If True, the last state
         for each sample at index i in a batch will be used as initial
         state for the sample of index i in the following batch.
+    time_major: Boolean (default False). If true, the inputs and outputs will be
+        in shape `(timesteps, batch, ...)`, whereas in the False case, it will
+        be `(batch, timesteps, ...)`.
   """
 
   def __init__(self,
@@ -54,6 +57,7 @@ class _CuDNNRNN(RNN):
                return_state=False,
                go_backwards=False,
                stateful=False,
+               time_major=False,
                **kwargs):
     # We invoke the base layer's initializer directly here because we do not
     # want to create RNN cell instance.
@@ -62,6 +66,7 @@ class _CuDNNRNN(RNN):
     self.return_state = return_state
     self.go_backwards = go_backwards
     self.stateful = stateful
+    self.time_major = time_major
     self.supports_masking = False
     self.input_spec = [InputSpec(ndim=3)]
     if hasattr(self.cell.state_size, '__len__'):
@@ -124,7 +129,8 @@ class _CuDNNRNN(RNN):
         'return_sequences': self.return_sequences,
         'return_state': self.return_state,
         'go_backwards': self.go_backwards,
-        'stateful': self.stateful
+        'stateful': self.stateful,
+        'time_major': self.time_major,
     }
     base_config = super(  # pylint: disable=bad-super-call
         RNN, self).get_config()
@@ -267,7 +273,8 @@ class CuDNNGRU(_CuDNNRNN):
     self.built = True
 
   def _process_batch(self, inputs, initial_state):
-    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
+    if not self.time_major:
+      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
     input_h = initial_state[0]
     input_h = array_ops.expand_dims(input_h, axis=0)
 
@@ -301,7 +308,10 @@ class CuDNNGRU(_CuDNNRNN):
     if self.stateful or self.return_state:
       h = h[0]
     if self.return_sequences:
-      output = array_ops.transpose(outputs, perm=(1, 0, 2))
+      if self.time_major:
+        output = outputs
+      else:
+        output = array_ops.transpose(outputs, perm=(1, 0, 2))
     else:
       output = outputs[-1]
     return output, [h]
@@ -456,7 +466,8 @@ class CuDNNLSTM(_CuDNNRNN):
     self.built = True
 
   def _process_batch(self, inputs, initial_state):
-    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
+    if not self.time_major:
+      inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
     input_h = initial_state[0]
     input_c = initial_state[1]
     input_h = array_ops.expand_dims(input_h, axis=0)
@@ -496,7 +507,10 @@ class CuDNNLSTM(_CuDNNRNN):
       h = h[0]
       c = c[0]
     if self.return_sequences:
-      output = array_ops.transpose(outputs, perm=(1, 0, 2))
+      if self.time_major:
+        output = outputs
+      else:
+        output = array_ops.transpose(outputs, perm=(1, 0, 2))
     else:
       output = outputs[-1]
     return output, [h, c]
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 2ed0aa8f26..7becbfede1 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -26,6 +26,7 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
@@ -138,6 +139,32 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
         np.testing.assert_allclose(
             keras.backend.eval(layer.states[0]), state, atol=1e-4)
 
+  @parameterized.named_parameters(
+      ('cudnngru', keras.layers.CuDNNGRU),
+      ('cudnnlstm', keras.layers.CuDNNLSTM),
+  )
+  def test_time_major_input(self, layer_class):
+    if test.is_gpu_available(cuda_only=True):
+      with self.test_session(use_gpu=True):
+        input_size = 10
+        timesteps = 6
+        units = 2
+        num_samples = 32
+
+        model = keras.models.Sequential()
+        model.add(
+            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+        layer = layer_class(units, time_major=True, return_sequences=True)
+        model.add(layer)
+        model.add(
+            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+        model.compile(loss='categorical_crossentropy', optimizer='adam')
+        model.fit(
+            np.ones((num_samples, timesteps, input_size)),
+            np.ones((num_samples, timesteps, units)))
+        out = model.predict(np.ones((num_samples, timesteps, input_size)))
+        self.assertEqual(out.shape, (num_samples, timesteps, units))
+
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index ba7498e7e6..b07ec71178 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -336,9 +336,18 @@ class RNN(Layer):
           in your model, you would need to specify the input length
           at the level of the first layer
           (e.g. via the `input_shape` argument)
+      time_major: The shape format of the `inputs` and `outputs` tensors.
+          If True, the inputs and outputs will be in shape
+          `(timesteps, batch, ...)`, whereas in the False case, it will be
+          `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
+          efficient because it avoids transposes at the beginning and end of the
+          RNN calculation. However, most TensorFlow data is batch-major, so by
+          default this function accepts input and emits output in batch-major
+          form.
 
   Input shape:
-      N-D tensor with shape `(batch_size, timesteps, ...)`.
+      N-D tensor with shape `(batch_size, timesteps, ...)` or
+      `(timesteps, batch_size, ...)` when time_major is True.
 
   Output shape:
       - if `return_state`: a list of tensors. The first tensor is
@@ -347,7 +356,8 @@ class RNN(Layer):
           be a high dimension tensor shape.
       - if `return_sequences`: N-D tensor with shape
           `(batch_size, timesteps, output_size)`, where `output_size` could
-          be a high dimension tensor shape.
+          be a high dimension tensor shape, or
+          `(timesteps, batch_size, output_size)` when `time_major` is True.
       - else, N-D tensor with shape `(batch_size, output_size)`, where
           `output_size` could be a high dimension tensor shape.
 
@@ -448,6 +458,7 @@ class RNN(Layer):
                go_backwards=False,
                stateful=False,
                unroll=False,
+               time_major=False,
                **kwargs):
     if isinstance(cell, (list, tuple)):
       cell = StackedRNNCells(cell)
@@ -468,6 +479,7 @@ class RNN(Layer):
     self.go_backwards = go_backwards
     self.stateful = stateful
     self.unroll = unroll
+    self.time_major = time_major
 
     self.supports_masking = True
     self.input_spec = [None]  # The input shape is unknown yet, at least rank 3.
@@ -503,14 +515,21 @@ class RNN(Layer):
       # Note that state_size[0] could be a tensor_shape or int.
       output_dim = tensor_shape.as_shape(state_size[0]).as_list()
 
+    batch = input_shape[0]
+    time_step = input_shape[1]
+    if self.time_major:
+      batch, time_step = time_step, batch
     if self.return_sequences:
-      output_shape = tuple([input_shape[0], input_shape[1]] + output_dim)
+      if self.time_major:
+        output_shape = tuple([time_step, batch] + output_dim)
+      else:
+        output_shape = tuple([batch, time_step] + output_dim)
     else:
-      output_shape = tuple([input_shape[0]] + output_dim)
+      output_shape = tuple([batch] + output_dim)
 
     if self.return_state:
       state_shape = [
-          tuple([input_shape[0]] + tensor_shape.as_shape(dim).as_list())
+          tuple([batch] + tensor_shape.as_shape(dim).as_list())
           for dim in state_size
       ]
       return [output_shape] + state_shape
@@ -539,13 +558,18 @@ class RNN(Layer):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
 
-    batch_size = input_shape[0] if self.stateful else None
-    input_dim = input_shape[2:]
-    self.input_spec[0] = InputSpec(shape=(batch_size, None) + input_dim)
+    input_spec_shape = list(input_shape)
+    batch_index, time_step_index = (1, 0) if self.time_major else (0, 1)
+    if not self.stateful:
+      input_spec_shape[batch_index] = None
+    input_spec_shape[time_step_index] = None
+    self.input_spec[0] = InputSpec(shape=tuple(input_spec_shape))
 
+    batch = input_shape[batch_index]
+    input_dim = input_shape[2:]
+    step_input_shape = (batch,) + input_dim
     # allow cell (if layer) to build before we set or validate state_spec
     if isinstance(self.cell, Layer):
-      step_input_shape = (input_shape[0],) + input_dim
       if constants_shape is not None:
         self.cell.build([step_input_shape] + constants_shape)
       else:
@@ -598,12 +622,16 @@ class RNN(Layer):
 
   def get_initial_state(self, inputs):
     get_initial_state_fn = getattr(self.cell, 'get_initial_state', None)
+
+    input_shape = array_ops.shape(inputs)
+    batch_size = input_shape[1] if self.time_major else input_shape[0]
+    dtype = inputs.dtype
     if get_initial_state_fn:
       init_state = get_initial_state_fn(
-          inputs=inputs, batch_size=None, dtype=None)
+          inputs=None, batch_size=batch_size, dtype=dtype)
     else:
-      init_state = _generate_zero_filled_state(
-          array_ops.shape(inputs)[0], self.cell.state_size, inputs.dtype)
+      init_state = _generate_zero_filled_state(batch_size, self.cell.state_size,
+                                               dtype)
     # Keras RNN expect the states in a list, even if it's a single state tensor.
     if not nest.is_sequence(init_state):
       init_state = [init_state]
@@ -696,7 +724,7 @@ class RNN(Layer):
           'Layer has ' + str(len(self.states)) + ' states but was passed ' +
           str(len(initial_state)) + ' initial states.')
     input_shape = K.int_shape(inputs)
-    timesteps = input_shape[1]
+    timesteps = input_shape[0] if self.time_major else input_shape[1]
     if self.unroll and timesteps in [None, 1]:
       raise ValueError('Cannot unroll a RNN if the '
                        'time dimension is undefined or equal to 1. \n'
@@ -747,7 +775,8 @@ class RNN(Layer):
         go_backwards=self.go_backwards,
         mask=mask,
         unroll=self.unroll,
-        input_length=timesteps)
+        input_length=timesteps,
+        time_major=self.time_major)
     if self.stateful:
       updates = []
       for i in range(len(states)):
@@ -777,7 +806,10 @@ class RNN(Layer):
   def reset_states(self, states=None):
     if not self.stateful:
       raise AttributeError('Layer must be stateful.')
-    batch_size = self.input_spec[0].shape[0]
+    if self.time_major:
+      batch_size = self.input_spec[0].shape[1]
+    else:
+      batch_size = self.input_spec[0].shape[0]
     if not batch_size:
       raise ValueError('If a RNN is stateful, it needs to know '
                        'its batch size. Specify the batch size '
@@ -839,7 +871,8 @@ class RNN(Layer):
         'return_state': self.return_state,
         'go_backwards': self.go_backwards,
         'stateful': self.stateful,
-        'unroll': self.unroll
+        'unroll': self.unroll,
+        'time_major': self.time_major
     }
     if self._num_constants is not None:
       config['num_constants'] = self._num_constants
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index b9e90095e4..d246be6b45 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -186,6 +186,96 @@ class RNNTest(test.TestCase):
       y_np_2 = model.predict(x_np)
       self.assertAllClose(y_np, y_np_2, atol=1e-4)
 
+  def test_rnn_with_time_major(self):
+    batch = 10
+    time_step = 5
+    embedding_dim = 4
+    units = 3
+
+    with self.cached_session():
+      # Test basic case.
+      x = keras.Input((time_step, embedding_dim))
+      time_major_x = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      layer = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)
+      self.assertEqual(
+          layer.compute_output_shape((time_step, None,
+                                      embedding_dim)).as_list(),
+          [time_step, None, units])
+      y = layer(time_major_x)
+      self.assertEqual(layer.output_shape, (time_step, None, units))
+
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+    with self.cached_session():
+      # Test stacking.
+      x = keras.Input((time_step, embedding_dim))
+      time_major_x = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      cell_units = [10, 8, 6]
+      cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)]
+      layer = keras.layers.RNN(cells, time_major=True, return_sequences=True)
+      y = layer(time_major_x)
+      self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1]))
+
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, cell_units[-1])))
+
+    with self.cached_session():
+      # Test masking.
+      x = keras.Input((time_step, embedding_dim))
+      time_major = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      mask = keras.layers.Masking()(time_major)
+      rnn = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)(mask)
+      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(rnn)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+    with self.cached_session():
+      # Test layer output
+      x = keras.Input((time_step, embedding_dim))
+      rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True)
+      y = rnn_1(x)
+
+      model = keras.models.Model(x, y)
+      model.compile(optimizer='rmsprop', loss='mse')
+      model.train_on_batch(
+          np.zeros((batch, time_step, embedding_dim)),
+          np.zeros((batch, time_step, units)))
+
+      x_np = np.random.random((batch, time_step, embedding_dim))
+      y_np_1 = model.predict(x_np)
+
+      time_major = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+      rnn_2 = keras.layers.SimpleRNN(
+          units, time_major=True, return_sequences=True)
+      y_2 = rnn_2(time_major)
+      y_2 = keras.layers.Lambda(
+          lambda t: array_ops.transpose(t, [1, 0, 2]))(y_2)
+
+      model_2 = keras.models.Model(x, y_2)
+      rnn_2.set_weights(rnn_1.get_weights())
+
+      y_np_2 = model_2.predict(x_np)
+      self.assertAllClose(y_np_1, y_np_2, atol=1e-4)
+
   def test_rnn_cell_with_constants_layer(self):
 
     class RNNCellWithConstants(keras.layers.Layer):
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index 126ce8db6a..a71a59e269 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -398,7 +398,7 @@ tf_module {
   }
   member_method {
     name: "rnn"
-    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "round"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
index 2b6e8af11d..68b6678d48 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -86,7 +86,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index 126ce8db6a..a71a59e269 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -398,7 +398,7 @@ tf_module {
   }
   member_method {
     name: "rnn"
-    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\'], "
+    argspec: "args=[\'step_function\', \'inputs\', \'initial_states\', \'go_backwards\', \'mask\', \'constants\', \'unroll\', \'input_length\', \'time_major\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'None\', \'False\', \'None\', \'False\'], "
   }
   member_method {
     name: "round"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
index 2b6e8af11d..68b6678d48 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -86,7 +86,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'cell\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\'], varargs=None, keywords=kwargs, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "add_loss"
-- 
GitLab


From 41e97007638ef41764b1da86fb2de772f35762e5 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:00:46 -0700
Subject: [PATCH 0241/1085] Disable XLA from raspberry pi builds.

There is no known conceptual reason we can't use XLA, but in practice
we have some build issues that will need to be fixed.

PiperOrigin-RevId: 215484942
---
 tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 3d27e84b81..864278c647 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -34,6 +34,8 @@ set -e
 #
 # Make sure you have an up to date version of the Bazel build tool installed too.
 
+export TF_ENABLE_XLA=0
+
 yes '' | ./configure
 
 # Fix for curl build problem in 32-bit, see https://stackoverflow.com/questions/35181744/size-of-array-curl-rule-01-is-negative
-- 
GitLab


From e4188461aee1d614a14f17fe2abaf2a9a94886d9 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:02:30 -0700
Subject: [PATCH 0242/1085] Add missing `import unittest` to
 control_flow_ops_py_test.py

PiperOrigin-RevId: 215485333
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 655fece5ff..07ec859766 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 import collections
 import math
 import time
+import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
-- 
GitLab


From 64401718141e078cc00b64d9d22038c07fa32480 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:00:46 -0700
Subject: [PATCH 0243/1085] Disable XLA from raspberry pi builds.

There is no known conceptual reason we can't use XLA, but in practice
we have some build issues that will need to be fixed.

PiperOrigin-RevId: 215484942
---
 tensorflow/tools/ci_build/pi/build_raspberry_pi.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
index 3d27e84b81..864278c647 100755
--- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
@@ -34,6 +34,8 @@ set -e
 #
 # Make sure you have an up to date version of the Bazel build tool installed too.
 
+export TF_ENABLE_XLA=0
+
 yes '' | ./configure
 
 # Fix for curl build problem in 32-bit, see https://stackoverflow.com/questions/35181744/size-of-array-curl-rule-01-is-negative
-- 
GitLab


From 22919770355b1b7d8f4c5a20327898e881aa11cb Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 2 Oct 2018 17:09:45 -0700
Subject: [PATCH 0244/1085] Pin wheel=0.31.1 to work around issue
 https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215486669
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b90f3f3b97..7f293e8604 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -24,8 +24,10 @@ easy_install3 -U pip==9.0.3
 # Install pip packages from whl files to avoid the time-consuming process of
 # building from source.
 
-pip2 install wheel
-pip3 install wheel
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+pip2 install wheel==0.31.1
+pip3 install wheel==0.31.1
 
 pip2 install virtualenv
 pip3 install virtualenv
-- 
GitLab


From 24c9e16795c55d08b8f0117187d236aea1c10791 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Tue, 2 Oct 2018 17:02:30 -0700
Subject: [PATCH 0245/1085] Add missing `import unittest` to
 control_flow_ops_py_test.py

PiperOrigin-RevId: 215485333
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index ae61be614e..bf1d057bbc 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,6 +23,7 @@ from __future__ import print_function
 import collections
 import math
 import time
+import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
-- 
GitLab


From 29e8aa76076d4e58403991ee319dbc0e60f5ff72 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 2 Oct 2018 17:09:45 -0700
Subject: [PATCH 0246/1085] Pin wheel=0.31.1 to work around issue
 https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215486669
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index b90f3f3b97..7f293e8604 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -24,8 +24,10 @@ easy_install3 -U pip==9.0.3
 # Install pip packages from whl files to avoid the time-consuming process of
 # building from source.
 
-pip2 install wheel
-pip3 install wheel
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+pip2 install wheel==0.31.1
+pip3 install wheel==0.31.1
 
 pip2 install virtualenv
 pip3 install virtualenv
-- 
GitLab


From 80821abd6410f47130fc031b15e9ac220de5b1b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 17:16:05 -0700
Subject: [PATCH 0247/1085] Make RemoveTrivialPassthrough preserve
 minmax-related info

PiperOrigin-RevId: 215487633
---
 .../remove_trivial_passthrough.cc             | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
index fc49fbda59..d5983a1f12 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc
@@ -29,20 +29,34 @@ namespace {
 // array instead. from_array is assumed to be discardable, and consequently
 // this only updates operator edges (since discardable arrays only
 // appear there, and not e.g. in model flags).
-void RerouteEdges(const string& from_array, const string& to_array,
-                  Model* model) {
+void Reroute(const string& from, const string& to, Model* model) {
   for (const auto& op : model->operators) {
     for (auto& output : op->outputs) {
-      if (output == from_array) {
-        output = to_array;
+      if (output == from) {
+        output = to;
       }
     }
     for (auto& input : op->inputs) {
-      if (input == from_array) {
-        input = to_array;
+      if (input == from) {
+        input = to;
       }
     }
   }
+  const Array& from_array = model->GetArray(from);
+  Array& to_array = model->GetOrCreateArray(to);
+  // Preserve minmax information if to_array didn't already have any.
+  if (from_array.minmax && !to_array.minmax) {
+    to_array.GetOrCreateMinMax() = from_array.GetMinMax();
+    // If we're copying minmax info, then we should also be copying
+    // narrow_range, which affects how minmax info is to be interpreted.
+    to_array.narrow_range = from_array.narrow_range;
+  }
+  // Separately, also preserve final_data_type if to_array didn't already
+  // have any.
+  if (from_array.final_data_type != ArrayDataType::kNone &&
+      to_array.final_data_type == ArrayDataType::kNone) {
+    to_array.final_data_type = from_array.final_data_type;
+  }
 }
 
 }  // namespace
@@ -90,14 +104,14 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
     transformation->AddMessageF(
         "Removing %s, keeping its non-constant input array %s and removing %s",
         LogName(*passthru_op), main_input_name, output_name);
-    RerouteEdges(output_name, main_input_name, model);
+    Reroute(output_name, main_input_name, model);
   } else if (IsDiscardableArray(*model, main_input_name) &&
              !IsConstantParameterArray(*model, main_input_name)) {
     transformation->AddMessageF(
         "Removing %s, keeping its output array %s and removing non-constant "
         "input %s",
         LogName(*passthru_op), output_name, main_input_name);
-    RerouteEdges(main_input_name, output_name, model);
+    Reroute(main_input_name, output_name, model);
   } else {
     transformation->AddMessageF(
         "Cannot remove %s, neither its main input nor its output may be "
-- 
GitLab


From b7e9cbab27c893283acc4a6154d7a59dffb23758 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Tue, 2 Oct 2018 17:48:25 -0700
Subject: [PATCH 0248/1085] Use `defun` instead of `Defun` for `tf.data`,
 except for `make_one_shot_iterator` which is to be deprecated in future.

PiperOrigin-RevId: 215491729
---
 .../contrib/distribute/python/input_ops.py    |  2 +-
 tensorflow/python/data/ops/dataset_ops.py     | 60 ++++++++-----------
 tensorflow/python/eager/function.py           | 14 +++++
 tensorflow/python/eager/function_test.py      |  9 ++-
 4 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/input_ops.py b/tensorflow/contrib/distribute/python/input_ops.py
index f07ec8234d..423952c9e2 100644
--- a/tensorflow/contrib/distribute/python/input_ops.py
+++ b/tensorflow/contrib/distribute/python/input_ops.py
@@ -78,7 +78,7 @@ def auto_shard_dataset(dataset, num_shards, index):
       elif hasattr(dataset, "_map_func"):
         # TODO(priyag): Make this check more robust by enforcing some common
         # property on all map/flatmap/interleave datasets.
-        map_func_def = dataset._map_func.definition
+        map_func_def = dataset._map_func.function_def
         for node in map_func_def.node_def:
           if node.op in _READER_DATASET_OPS:
             found_reader_op = True
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..d90da5908d 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import random_seed
 from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
+from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -37,6 +38,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -1713,7 +1715,8 @@ class _VariantDataset(Dataset):
 
 
 class StructuredFunctionWrapper(object):
-  """A wrapper for `Defun` that supports structured arguments and return values.
+  """A wrapper for `defun` that supports structured arguments and return values.
+
   """
 
   def __init__(self, func, transformation_name, dataset=None,
@@ -1765,7 +1768,7 @@ class StructuredFunctionWrapper(object):
     # TODO(b/110122868): Enable this support for all `tf.data` functions.
     self._nested_dataset_support = experimental_nested_dataset_support
 
-    @function.Defun(*self._defun_args())
+    @eager_function.defun(input_signature=self._defun_args())
     def tf_data_structured_function_wrapper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
       flat_args = []
@@ -1850,36 +1853,43 @@ class StructuredFunctionWrapper(object):
       self._output_shapes = nest.pack_sequence_as(ret, flat_shapes)
       self._output_types = nest.pack_sequence_as(ret, flat_types)
 
-      _warn_if_collections(transformation_name)
-
       return flat_ret
 
-    self._function = tf_data_structured_function_wrapper
+    table_initializers_len = len(ops.get_default_graph().get_collection(
+        ops.GraphKeys.TABLE_INITIALIZERS))
+
+    self._function = tf_data_structured_function_wrapper.get_concrete_function()
     if add_to_graph:
       self._function.add_to_graph(ops.get_default_graph())
-    else:
-      # Use the private method that will execute
-      # `tf_data_structured_function_wrapper` but delay adding it to the graph
-      # in case (e.g.) we need to rerun the function.
-      self._function._create_definition_if_needed()  # pylint: disable=protected-access
+    if len(
+        self._function.graph.get_collection(
+            ops.GraphKeys.TABLE_INITIALIZERS)) != table_initializers_len:
+      warnings.warn(
+          "Creating lookup tables inside a function passed to %s is not"
+          " supported. Create each table outside the function, and "
+          "capture it inside the function to use it." % transformation_name)
 
   def _defun_args(self):
-    """Returns a flat list of `tf.DType` for the input element structure."""
+    """Returns a list of `tf.TensorSpec` for the input element structure."""
     ret = []
-    for input_type, input_class in zip(nest.flatten(self._input_types),
-                                       nest.flatten(self._input_classes)):
+    for input_type, input_shape, input_class in zip(
+        nest.flatten(self._input_types), nest.flatten(self._input_shapes),
+        nest.flatten(self._input_classes)):
       # TODO(b/110122868): Add a registration mechanism for new component types.
       if input_class is sparse_tensor_lib.SparseTensor:
-        ret.append(dtypes.variant)
+        ret.append(
+            tensor_spec.TensorSpec(
+                tensor_shape.TensorShape(None), dtypes.variant))
       elif isinstance(input_class, _NestedDatasetComponent):
         if not self._nested_dataset_support:
           raise NotImplementedError(
               "The %s transformation does not currently support nested "
               "datasets as inputs." % self._transformation_name)
-        ret.append(dtypes.variant)
+        ret.append(
+            tensor_spec.TensorSpec(tensor_shape.scalar(), dtypes.variant))
       else:
         assert isinstance(input_type, dtypes.DType)
-        ret.append(input_type)
+        ret.append(tensor_spec.TensorSpec(input_shape, input_type))
     return ret
 
   @property
@@ -2579,24 +2589,6 @@ def _should_unpack_args(args):
   return type(args) is tuple  # pylint: disable=unidiomatic-typecheck
 
 
-def _warn_if_collections(transformation_name):
-  """Prints warning message if the current graph uses common graph collections.
-
-  NOTE(mrry): Currently a warning is only generated for lookup tables. Any
-  variables created will be automatically hoisted out to the outermost scope
-  using `init_scope()`. Some collections (such as for control-flow contexts)
-  are benign and should not generate a warning.
-
-  Args:
-    transformation_name: A human-readable name for the transformation.
-  """
-  if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS):
-    warnings.warn("Creating lookup tables inside a function passed to %s is not"
-                  " supported. Create each table outside the function, and "
-                  "capture it inside the function to use it."
-                  % transformation_name)
-
-
 class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f261d92d64..aeb1cac3e9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -662,6 +662,11 @@ class Function(object):
     outputs = self._inference_function.call(ctx, args)
     return self._build_call_outputs(outputs)
 
+  @property
+  def name(self):
+    """Function name."""
+    return self._inference_function.name
+
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -719,6 +724,10 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
+  def add_to_graph(self, g):
+    """Adds this function into the graph g."""
+    return self._inference_function.add_to_graph(g)
+
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1122,6 +1131,8 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
+    if self._input_signature:
+      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1304,6 +1315,9 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
+  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
+  created with an `input_signature`.
+
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 9ce367a837..ac45606eb0 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1750,11 +1750,10 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test input param shape mismatch
-        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        with self.assertRaisesRegexp(
-            ValueError, 'Python inputs incompatible with input_signature'):
-          function.register(defun_matmul, t2, t2)
+        # Test register function with cache, note inputs are ignored.
+        function.register(defun_matmul)
+        graph = ops.get_default_graph()
+        self.assertEqual(len(graph._functions), 3)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 9f7a138640408cea58698a432fd1596cf436b484 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Tue, 2 Oct 2018 17:57:49 -0700
Subject: [PATCH 0249/1085] Set shape for output tensors of cond_v2.

PiperOrigin-RevId: 215492782
---
 tensorflow/core/ops/functional_ops.cc         | 21 ++++++++++++++++++-
 .../kernel_tests/control_flow_ops_py_test.py  |  7 +++++++
 tensorflow/python/ops/cond_v2_impl.py         | 20 +++++++++++++++---
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index fed3fa22ed..22b4b07eff 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -110,8 +110,27 @@ REGISTER_OP("If")
     .Attr("Tout: list(type) >= 0")
     .Attr("then_branch: func")
     .Attr("else_branch: func")
+    .Attr("output_shapes: list(shape) = []")
     .SetIsStateful()
-    .SetShapeFn(shape_inference::UnknownShape);
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      std::vector<PartialTensorShape> output_shapes;
+      TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      // If `output_shapes` attr is set use that as the shapes of the outputs
+      // else return unknown shapes.
+      if (output_shapes.empty()) return shape_inference::UnknownShape(c);
+      if (output_shapes.size() != c->num_outputs()) {
+        return errors::InvalidArgument(
+            "`output_shapes` must be the same length as num outputs (",
+            output_shapes.size(), " vs. ", c->num_outputs());
+      }
+      for (size_t i = 0; i < output_shapes.size(); ++i) {
+        shape_inference::ShapeHandle output_shape_handle;
+        TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+            output_shapes[i], &output_shape_handle));
+        c->set_output(static_cast<int>(i), output_shape_handle);
+      }
+      return Status::OK();
+    });
 
 // TODO(drpng): remove this.
 REGISTER_OP("_While")
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 07ec859766..a1be77601c 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -351,6 +351,13 @@ class ControlFlowTest(test.TestCase):
     grad = gradients_impl.gradients(y, [v])
     self.assertAllEqual([None], grad)
 
+  def testCondOutputShape(self):
+    x = constant_op.constant(1.0)
+    b = control_flow_ops.cond(
+        constant_op.constant(True), lambda: math_ops.square(x),
+        lambda: math_ops.subtract(x, 1.))
+    self.assertEqual(b.shape, tensor_shape.scalar())
+
   def testFetchable(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index f8b1ddb140..195ad11c71 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -96,9 +96,12 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
 
     # Create the If op.
     tensors = gen_functional_ops._if(  # pylint: disable=protected-access
-        pred, cond_inputs, [t.dtype for t in true_graph.outputs],
+        pred,
+        cond_inputs, [t.dtype for t in true_graph.outputs],
         _create_new_tf_function(true_graph),
         _create_new_tf_function(false_graph),
+        output_shapes=_get_output_shapes(true_graph.outputs,
+                                         false_graph.outputs),
         name=scope)
 
     # Set the flag to enable lowering on the `if` op if necessary
@@ -175,9 +178,12 @@ def _IfGrad(op, *grads):  # pylint: disable=invalid-name
 
   # Create the gradient If op.
   tensors = gen_functional_ops._if(
-      op.inputs[0], grad_inputs, [t.dtype for t in true_grad_graph.outputs],
+      op.inputs[0],
+      grad_inputs, [t.dtype for t in true_grad_graph.outputs],
       _create_new_tf_function(true_grad_graph),
-      _create_new_tf_function(false_grad_graph))
+      _create_new_tf_function(false_grad_graph),
+      output_shapes=_get_output_shapes(true_grad_graph.outputs,
+                                       false_grad_graph.outputs))
 
   # The predicate has no gradient.
   return [None] + tensors[:num_grad_outputs]
@@ -480,6 +486,14 @@ def _check_same_outputs(true_graph, false_graph):
         "  false_fn: %s" % (true_output_types, false_output_types))
 
 
+def _get_output_shapes(true_graph_outputs, false_graph_outputs):
+  output_shapes = [
+      t_out.shape.most_specific_compatible_shape(f_out.shape)
+      for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
+  ]
+  return output_shapes
+
+
 def _is_ancestor(graph, maybe_ancestor):
   if maybe_ancestor == graph:
     return True
-- 
GitLab


From 05bc6c6762d5a58bacd585e9243133bf0378515f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 18:10:46 -0700
Subject: [PATCH 0250/1085] Remove initial accumulator (and other auxiliary
 parameter) values from optimization parameter protos and removed uses of that
 functionality in tests.

PiperOrigin-RevId: 215494433
---
 .../tpu/proto/optimization_parameters.proto     | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index 8529b48c15..b9e0747fa4 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -28,7 +28,6 @@ message LearningRate {
 // https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
 // https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151
 message AdagradParameters {
-  float initial_accumulator = 1;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
@@ -42,8 +41,6 @@ message FtrlParameters {
   float l1 = 1;
   float l2 = 2;
   float lr_power = 3;
-  float initial_accum = 4;
-  float initial_linear = 5;
 }
 
 // The Adam optimizer does not implement hyper-parameter update; use the dynamic
@@ -70,8 +67,6 @@ message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
   float epsilon = 5;
-  float initial_m = 6;
-  float initial_v = 7;
   bool use_non_lazy_adam = 8;
   bool use_max_with_epsilon = 9;
 }
@@ -81,7 +76,6 @@ message AdamParameters {
 message MomentumParameters {
   float momentum = 1;
   bool use_nesterov = 2;
-  float initial_accum = 3;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -90,8 +84,6 @@ message RmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
-  float initial_ms = 4;
-  float initial_mom = 5;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -100,9 +92,6 @@ message CenteredRmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
-  float initial_ms = 4;
-  float initial_mom = 5;
-  float initial_mg = 6;
 }
 
 // Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf
@@ -119,9 +108,6 @@ message MdlAdagradLightParameters {
   float mdl_hard_limit = 10;
   bool hard_limit_min_benefit = 11;
   bool mdl_regularize = 12;
-  float initial_accumulator = 13;
-  float initial_weight = 14;
-  float initial_benefit = 15;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -129,8 +115,6 @@ message MdlAdagradLightParameters {
 message AdadeltaParameters {
   float rho = 1;
   float epsilon = 2;
-  float initial_accumulator = 3;
-  float initial_update = 4;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -138,7 +122,6 @@ message AdadeltaParameters {
 message ProximalAdagradParameters {
   float l1 = 1;
   float l2 = 2;
-  float initial_accumulator = 3;
 }
 
 message OptimizationParameters {
-- 
GitLab


From f8ba42b0ab0bb19af0e4a930b95e7e7b3d2f557e Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 2 Oct 2018 18:38:24 -0700
Subject: [PATCH 0251/1085] Disable the cuDNN workarounds if the version number
 is new enough to get the corresponding bugs fixed. The bugs that were
 work-arounded were fixed and verified.

PiperOrigin-RevId: 215497418
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 104 ++++++++++----------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index ca90c383f9..df8538a4b8 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2487,30 +2487,32 @@ port::Status CudnnSupport::DoConvolveImpl(
 
   // Report an error if we might be hitting a cuDNN bug that accesses illegal
   // memory. See nvbugs/2138754, b/80018418.
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (input_descriptor.ndims() < 3) {
-      return port::Status::OK();
-    }
-    // Checks that a*b is within the valid range (as provided by NVIDIA).
-    auto check_sizes = [](size_t a, size_t b) {
-      if ((a * b * 4608 - 1) >> 31 == 0) {
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
         return port::Status::OK();
       }
-      return port::Status(
-          port::error::FAILED_PRECONDITION,
-          "This configuration potentially accesses illegal memory.");
-    };
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
-                                   output_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   input_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   output_descriptor.feature_map_count()));
-    return port::Status::OK();
-  }());
+      if (input_descriptor.ndims() < 3) {
+        return port::Status::OK();
+      }
+      // Checks that a*b is within the valid range (as provided by NVIDIA).
+      auto check_sizes = [](size_t a, size_t b) {
+        if ((a * b * 4608 - 1) >> 31 == 0) {
+          return port::Status::OK();
+        }
+        return port::Status(
+            port::error::FAILED_PRECONDITION,
+            "This configuration potentially accesses illegal memory.");
+      };
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
+                                     output_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     input_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     output_descriptor.feature_map_count()));
+      return port::Status::OK();
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3166,7 +3168,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
 
   // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
   // zero-initialized, nvbugs/2254619.
-  if (CUDNN_VERSION >= 7000 &&
+  if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
@@ -3317,31 +3319,33 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
 
   // Report an error if we might be hitting a cuDNN bug that produces incorrect
   // results. See nvbugs/2072856
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
-      return port::Status::OK();
-    }
-    int convolution_size = output_descriptor.height() > 1
-                               ? filter_descriptor.input_filter_height()
-                               : filter_descriptor.input_filter_width();
-    if (convolution_size <= 32) {
-      return port::Status::OK();
-    }
-    cudnnConvolutionMode_t convolution_mode;
-    cudnnDataType_t compute_type;
-    RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
-        conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode,
-        &compute_type));
-    if (convolution_mode != CUDNN_CONVOLUTION) {
-      return port::Status::OK();
-    }
-    return port::Status(
-        port::error::FAILED_PRECONDITION,
-        "This configuration potentially produces incorrect results.");
-  }());
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
+        return port::Status::OK();
+      }
+      if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
+        return port::Status::OK();
+      }
+      int convolution_size = output_descriptor.height() > 1
+                                 ? filter_descriptor.input_filter_height()
+                                 : filter_descriptor.input_filter_width();
+      if (convolution_size <= 32) {
+        return port::Status::OK();
+      }
+      cudnnConvolutionMode_t convolution_mode;
+      cudnnDataType_t compute_type;
+      RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
+          conv.handle(), 0, nullptr, nullptr, nullptr, nullptr,
+          &convolution_mode, &compute_type));
+      if (convolution_mode != CUDNN_CONVOLUTION) {
+        return port::Status::OK();
+      }
+      return port::Status(
+          port::error::FAILED_PRECONDITION,
+          "This configuration potentially produces incorrect results.");
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3357,8 +3361,8 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
   // This wrong result caused by the bug is very flaky. It needs to be run for
   // up to 20 times to produce a mismatch.
   //
-  // TODO(timshen): add a nvbugs link.
-  if (CUDNN_VERSION >= 7100 &&
+  // See nvbugs/2379553.
+  if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
-- 
GitLab


From 82b2794ea0e46b273079dfcf9ce288836d5544e9 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 2 Oct 2018 18:38:24 -0700
Subject: [PATCH 0252/1085] Disable the cuDNN workarounds if the version number
 is new enough to get the corresponding bugs fixed. The bugs that were
 work-arounded were fixed and verified.

PiperOrigin-RevId: 215497418
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 104 ++++++++++----------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index ca90c383f9..df8538a4b8 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2487,30 +2487,32 @@ port::Status CudnnSupport::DoConvolveImpl(
 
   // Report an error if we might be hitting a cuDNN bug that accesses illegal
   // memory. See nvbugs/2138754, b/80018418.
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (input_descriptor.ndims() < 3) {
-      return port::Status::OK();
-    }
-    // Checks that a*b is within the valid range (as provided by NVIDIA).
-    auto check_sizes = [](size_t a, size_t b) {
-      if ((a * b * 4608 - 1) >> 31 == 0) {
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
         return port::Status::OK();
       }
-      return port::Status(
-          port::error::FAILED_PRECONDITION,
-          "This configuration potentially accesses illegal memory.");
-    };
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
-                                   output_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   input_descriptor.feature_map_count()));
-    SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
-                                   output_descriptor.feature_map_count()));
-    return port::Status::OK();
-  }());
+      if (input_descriptor.ndims() < 3) {
+        return port::Status::OK();
+      }
+      // Checks that a*b is within the valid range (as provided by NVIDIA).
+      auto check_sizes = [](size_t a, size_t b) {
+        if ((a * b * 4608 - 1) >> 31 == 0) {
+          return port::Status::OK();
+        }
+        return port::Status(
+            port::error::FAILED_PRECONDITION,
+            "This configuration potentially accesses illegal memory.");
+      };
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.feature_map_count(),
+                                     output_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     input_descriptor.feature_map_count()));
+      SE_RETURN_IF_ERROR(check_sizes(input_descriptor.count(),
+                                     output_descriptor.feature_map_count()));
+      return port::Status::OK();
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3166,7 +3168,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
 
   // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
   // zero-initialized, nvbugs/2254619.
-  if (CUDNN_VERSION >= 7000 &&
+  if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
@@ -3317,31 +3319,33 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
 
   // Report an error if we might be hitting a cuDNN bug that produces incorrect
   // results. See nvbugs/2072856
-  SE_RETURN_IF_ERROR([&] {
-    if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
-      return port::Status::OK();
-    }
-    if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
-      return port::Status::OK();
-    }
-    int convolution_size = output_descriptor.height() > 1
-                               ? filter_descriptor.input_filter_height()
-                               : filter_descriptor.input_filter_width();
-    if (convolution_size <= 32) {
-      return port::Status::OK();
-    }
-    cudnnConvolutionMode_t convolution_mode;
-    cudnnDataType_t compute_type;
-    RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
-        conv.handle(), 0, nullptr, nullptr, nullptr, nullptr, &convolution_mode,
-        &compute_type));
-    if (convolution_mode != CUDNN_CONVOLUTION) {
-      return port::Status::OK();
-    }
-    return port::Status(
-        port::error::FAILED_PRECONDITION,
-        "This configuration potentially produces incorrect results.");
-  }());
+  if (CUDNN_VERSION < 7300) {
+    SE_RETURN_IF_ERROR([&] {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
+        return port::Status::OK();
+      }
+      if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
+        return port::Status::OK();
+      }
+      int convolution_size = output_descriptor.height() > 1
+                                 ? filter_descriptor.input_filter_height()
+                                 : filter_descriptor.input_filter_width();
+      if (convolution_size <= 32) {
+        return port::Status::OK();
+      }
+      cudnnConvolutionMode_t convolution_mode;
+      cudnnDataType_t compute_type;
+      RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionNdDescriptor(
+          conv.handle(), 0, nullptr, nullptr, nullptr, nullptr,
+          &convolution_mode, &compute_type));
+      if (convolution_mode != CUDNN_CONVOLUTION) {
+        return port::Status::OK();
+      }
+      return port::Status(
+          port::error::FAILED_PRECONDITION,
+          "This configuration potentially produces incorrect results.");
+    }());
+  }
 
   if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
@@ -3357,8 +3361,8 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
   // This wrong result caused by the bug is very flaky. It needs to be run for
   // up to 20 times to produce a mismatch.
   //
-  // TODO(timshen): add a nvbugs link.
-  if (CUDNN_VERSION >= 7100 &&
+  // See nvbugs/2379553.
+  if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
       algorithm_config.algorithm().algo_id() ==
           CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
-- 
GitLab


From 8dc7bc7764150253c03a666eee84fc48f867d6a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:13:14 -0700
Subject: [PATCH 0253/1085] In all constant-propagation transformations, check
 that the array we'd be turning into a constant is a discardable array. If
 it's not discardable, it means that the user wants this array to keep
 existing in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760
---
 .../toco/graph_transformations/resolve_constant_binary.cc | 8 ++++++++
 .../resolve_constant_concatenation.cc                     | 7 +++++++
 .../graph_transformations/resolve_constant_fake_quant.cc  | 7 +++++++
 .../toco/graph_transformations/resolve_constant_fill.cc   | 7 +++++++
 .../toco/graph_transformations/resolve_constant_gather.cc | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_pack.cc   | 8 ++++++++
 .../resolve_constant_random_uniform.cc                    | 7 +++++++
 .../toco/graph_transformations/resolve_constant_range.cc  | 8 ++++++++
 .../graph_transformations/resolve_constant_reshape.cc     | 7 +++++++
 .../toco/graph_transformations/resolve_constant_select.cc | 8 ++++++++
 .../resolve_constant_shape_or_rank.cc                     | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_slice.cc  | 8 ++++++++
 .../resolve_constant_strided_slice.cc                     | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_tile.cc   | 7 +++++++
 .../graph_transformations/resolve_constant_transpose.cc   | 8 ++++++++
 .../toco/graph_transformations/resolve_constant_unary.cc  | 8 ++++++++
 16 files changed, 122 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index f7e5aa6609..3e57d3f467 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -191,6 +191,14 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, binary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for binary ops of types that we know how to resolve
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index d916ae0ddf..c6c5035a51 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -144,6 +144,13 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, concat_op->outputs[0])) {
+    return false;
+  }
+
   for (const string& input_name : concat_op->inputs) {
     // We only expect constant unquantized arrays as input, otherwise we return.
     // We  also make sure the shapes of the input arrays are known and they are
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index f5f2f77460..3d797533c9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -69,6 +69,13 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   const auto* fakequant_op =
       static_cast<const FakeQuantOperator*>(fakequant_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) {
+    return false;
+  }
+
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
     return false;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index f6f95481b5..2cb1e64f3a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -52,6 +52,13 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 36d7dad0ce..4dfe203a25 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -71,6 +71,14 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index e86616574d..6f44025dd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -59,6 +59,14 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index 88d06d7dc7..c9f2b95d09 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -70,6 +70,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index 1a0ba9e2bc..e347286dd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -28,6 +28,14 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto* op = static_cast<RangeOperator*>(base_op);
 
   CHECK_EQ(op->inputs.size(), 3);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index a6f665b5f0..bfdaa8aafd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -33,6 +33,13 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index e880a3f44d..3a95d39cd4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -37,6 +37,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 3);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 8a0e3e8995..452bef1f16 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -27,6 +27,14 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index b35c3e19c4..58d6797e1c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -96,6 +96,14 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index 8853ed87e6..e275447a0c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -114,6 +114,14 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
       static_cast<const StridedSliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 5cfa1a5582..378a38f14b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -105,6 +105,13 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index fe15dfa06f..5d3f4a6240 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -111,6 +111,14 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index c698a9567a..e35ed0898b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -48,6 +48,14 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
 bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, unary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for unary ops of types that we know how to resolve.
   switch (unary_op->type) {
     case OperatorType::kCast:
-- 
GitLab


From fa61b939bec50d731b86f40c79054503d629e29b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:28:27 -0700
Subject: [PATCH 0254/1085] [XLA] Merge the single instruction parsing and the
 full module parsing in one function.

PiperOrigin-RevId: 215501702
---
 tensorflow/compiler/xla/service/hlo_parser.cc | 66 ++++++++++---------
 tensorflow/compiler/xla/service/hlo_parser.h  |  6 --
 .../compiler/xla/service/hlo_parser_test.cc   | 22 +++----
 3 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 5a125b4c08..0440f1b54f 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -68,7 +68,7 @@ class HloParser {
 
   // Runs the parser and constructs the resulting HLO in the given (empty)
   // HloModule. Returns false if an error occurred.
-  bool Run(HloModule* module);
+  Status Run(HloModule* module);
 
   // Returns the error information.
   string GetError() const { return StrJoin(error_, "\n"); }
@@ -79,9 +79,6 @@ class HloParser {
   StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbersOnly();
   StatusOr<PaddingConfig> ParsePaddingConfigOnly();
 
-  // Stand-alone parsing utility for a single instruction worth of text.
-  Status ParseSingleInstruction(HloModule* module);
-
  private:
   using InstrNameTable =
       std::unordered_map<string, std::pair<HloInstruction*, LocTy>>;
@@ -100,8 +97,12 @@ class HloParser {
   std::pair<HloInstruction*, LocTy>* FindInstruction(
       const string& name, const optional<Shape>& shape = nullopt);
 
+  // Parse a single instruction worth of text.
+  bool ParseSingleInstruction(HloModule* module);
+
   // ParseXXX returns false if an error occurred.
   bool ParseHloModule(HloModule* module);
+
   bool ParseComputations(HloModule* module);
   bool ParseComputation(HloComputation** entry_computation);
   bool ParseInstructionList(HloComputation** computation,
@@ -376,9 +377,25 @@ bool HloParser::TokenError(absl::string_view msg) {
   return Error(lexer_.GetLoc(), msg);
 }
 
-bool HloParser::Run(HloModule* module) {
+Status HloParser::Run(HloModule* module) {
   lexer_.Lex();
-  return ParseHloModule(module);
+  if (lexer_.GetKind() == TokKind::kw_HloModule) {
+    // This means that the text contains a full HLO module.
+    if (!ParseHloModule(module)) {
+      return InvalidArgument(
+          "Syntax error when trying to parse the text as a HloModule:\n%s",
+          GetError());
+    }
+    return Status::OK();
+  }
+  // This means that the text is a single HLO instruction.
+  if (!ParseSingleInstruction(module)) {
+    return InvalidArgument(
+        "Syntax error when trying to parse the text as single "
+        "HloInstruction:\n%s",
+        GetError());
+  }
+  return Status::OK();
 }
 
 std::pair<HloInstruction*, HloParser::LocTy>* HloParser::FindInstruction(
@@ -3279,9 +3296,11 @@ StatusOr<PaddingConfig> HloParser::ParsePaddingConfigOnly() {
   return padding_config;
 }
 
-Status HloParser::ParseSingleInstruction(HloModule* module) {
-  TF_RET_CHECK(create_missing_instruction_ == nullptr);
-  TF_RET_CHECK(scoped_name_tables_.empty());
+bool HloParser::ParseSingleInstruction(HloModule* module) {
+  if (create_missing_instruction_ != nullptr || !scoped_name_tables_.empty()) {
+    LOG(FATAL) << "Parser state is not clean. Please do not call any other "
+                  "methods before calling ParseSingleInstruction.";
+  }
   HloComputation::Builder builder(module->name());
 
   // The missing instruction hook we register creates the shaped instruction on
@@ -3298,9 +3317,6 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     return tensorflow::gtl::FindOrNull(current_name_table(), new_name);
   };
 
-  // Prime the lexer.
-  lexer_.Lex();
-
   // Parse the instruction with the registered hook.
   Scope scope(&scoped_name_tables_);
   if (CanBeShape()) {
@@ -3309,7 +3325,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     //
     //  f32[10] fusion(...), calls={...}
     if (!ParseInstruciontRhs(&builder, module->name(), lexer_.GetLoc())) {
-      return InvalidArgument("Syntax error:\n%s", GetError());
+      return false;
     }
   } else {
     // This means that the instruction's left-hand side might exist, e.g.
@@ -3317,7 +3333,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
     //  foo = f32[10] fusion(...), calls={...}
     string root_name;
     if (!ParseInstruction(&builder, &root_name)) {
-      return InvalidArgument("Syntax error:\n%s", GetError());
+      return false;
     }
   }
 
@@ -3325,7 +3341,7 @@ Status HloParser::ParseSingleInstruction(HloModule* module) {
   for (auto& comp : computations_) {
     module->AddEmbeddedComputation(std::move(comp));
   }
-  return Status::OK();
+  return true;
 }
 
 }  // namespace
@@ -3334,38 +3350,24 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config) {
   auto module = absl::make_unique<HloModule>(/*name=*/"", config);
   HloParser parser(str);
-  if (!parser.Run(module.get())) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str) {
   auto module = absl::make_unique<HloModule>(/*name=*/"", HloModuleConfig());
   HloParser parser(str);
-  if (!parser.Run(module.get())) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 Status ParseHloString(absl::string_view str, HloModule* module) {
   TF_RET_CHECK(module->computation_count() == 0);
   HloParser parser(str);
-  if (!parser.Run(module)) {
-    return InvalidArgument("Syntax error:\n%s", parser.GetError());
-  }
+  TF_RETURN_IF_ERROR(parser.Run(module));
   return Status::OK();
 }
 
-StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
-    absl::string_view str, absl::string_view name) {
-  HloParser parser(str);
-  auto module = absl::make_unique<HloModule>(string(name), HloModuleConfig());
-  TF_RETURN_IF_ERROR(parser.ParseSingleInstruction(module.get()));
-  return std::move(module);
-}
-
 StatusOr<HloSharding> ParseSharding(absl::string_view str) {
   HloParser parser(str);
   return parser.ParseShardingOnly();
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index 97d6f0117e..81eeb9f13b 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -40,12 +40,6 @@ StatusOr<std::unique_ptr<HloModule>> ParseHloString(
 // point to an empty module (no computations).
 Status ParseHloString(absl::string_view str, HloModule* module);
 
-// Parses the text for a single HLO instruction into an HLO module with an
-// entry computation that runs that instruction (with the same parameters) as
-// its root instruction.
-StatusOr<std::unique_ptr<HloModule>> ParseHloOpToModule(
-    absl::string_view str, absl::string_view name = "single_op");
-
 // Given a string in the HloModule::ToString() format, parses the string and
 // creates a HloModule with default config.
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str);
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index d10acf3814..b618510640 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1835,7 +1835,7 @@ TEST(HloParserSingleOpTest, SingleOp) {
   const string text =
       "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0} %broadcast, "
       "f32[2,4]{1,0} %x)";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1844,7 +1844,7 @@ TEST(HloParserSingleOpTest, SingleOp) {
 
 TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
   const string text = "multiply(f32[2,4]{1,0} %broadcast, f32[2,4]{1,0} %x)";
-  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloString(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
   EXPECT_THAT(module.status().ToString(),
@@ -1853,7 +1853,7 @@ TEST(HloParserSingleOpTest, SingleOpNoShapeProducesError) {
 
 TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
   const string text = "%multiply = f32[2,4]{1,0} multiply(%broadcast, %x)";
-  StatusOr<std::unique_ptr<HloModule>> module = ParseHloOpToModule(text);
+  StatusOr<std::unique_ptr<HloModule>> module = ParseHloString(text);
   ASSERT_TRUE(!module.status().ok());
   LOG(INFO) << "Status: " << module.status();
   EXPECT_THAT(module.status().ToString(),
@@ -1863,7 +1863,7 @@ TEST(HloParserSingleOpTest, SingleOpNoOperandShapesProducesError) {
 TEST(HloParserSingleOpTest, SingleOpNoNames) {
   const string text =
       "%multiply = f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1872,7 +1872,7 @@ TEST(HloParserSingleOpTest, SingleOpNoNames) {
 
 TEST(HloParserSingleOpTest, CanonicalOp) {
   const string text = "f32[2,4]{1,0} multiply(f32[2,4]{1,0}, f32[2,4]{1,0})";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1908,7 +1908,7 @@ TEST(HloParserSingleOpTest, CanonicalOpWithNested) {
   }
 })";
 
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_EQ(
@@ -1926,7 +1926,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested) {
   ROOT %subtract = f32[3,2,1,1]{3,2,1,0} subtract(f32[3,2,1,1]{3,2,1,0} %param_0, f32[3,2,1,1]{3,2,1,0} %broadcast)
 })";
 
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
@@ -1939,7 +1939,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_DoesNotExist) {
 {
   result = f32[] add(f32[] x, f32[] y)
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(),
               ::testing::HasSubstr("does not exist: x"));
@@ -1951,7 +1951,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoLhs) {
 {
   f32[] add(f32[] x, f32[] y)
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
@@ -1962,7 +1962,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
 {
   result = f32[] add(f32[], f32[])
 })";
-  auto status = ParseHloOpToModule(text).status();
+  auto status = ParseHloString(text).status();
   ASSERT_FALSE(status.ok());
   EXPECT_THAT(status.error_message(), ::testing::HasSubstr("expects name"));
 }
@@ -1970,7 +1970,7 @@ TEST(HloParserSingleOpTest, SingleOpWithNested_NoOperandName) {
 TEST(HloParserSingleOpTest, ConvolutionTrivialFeatureGroupCount) {
   const string text =
       R"(%convolution = f32[1,2,1]{2,0,1} convolution(f32[1,2,1]{2,0,1} %copy, f32[1,1,1]{2,1,0} %filter), window={size=1}, dim_labels=b0f_0io->b0f)";
-  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloOpToModule(text));
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(text));
   const HloComputation* computation = module->entry_computation();
   ASSERT_NE(computation, nullptr);
   EXPECT_THAT(computation->root_instruction(),
-- 
GitLab


From 4b2d0180ba8c903f098f52eb9a12d26a7626dd34 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 19:28:31 -0700
Subject: [PATCH 0255/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 215501709
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 46 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  8 ++++
 2 files changed, 54 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index e46cbc863d..4845767405 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -27069,6 +27069,52 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "If"
+  input_arg {
+    name: "cond"
+    type_attr: "Tcond"
+  }
+  input_arg {
+    name: "input"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tcond"
+    type: "type"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "then_branch"
+    type: "func"
+  }
+  attr {
+    name: "else_branch"
+    type: "func"
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Igamma"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0e9f939ab4..229022b64c 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -13176,6 +13176,14 @@ op {
     name: "else_branch"
     type: "func"
   }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    default_value {
+      list {
+      }
+    }
+  }
   is_stateful: true
 }
 op {
-- 
GitLab


From 2597b883a14749c77fffd7e5f9677107021ff40a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 2 Oct 2018 20:00:36 -0700
Subject: [PATCH 0256/1085] Automated rollback of commit
 b7e9cbab27c893283acc4a6154d7a59dffb23758

PiperOrigin-RevId: 215503549
---
 .../contrib/distribute/python/input_ops.py    |  2 +-
 tensorflow/python/data/ops/dataset_ops.py     | 60 +++++++++++--------
 tensorflow/python/eager/function.py           | 14 -----
 tensorflow/python/eager/function_test.py      |  9 +--
 4 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/input_ops.py b/tensorflow/contrib/distribute/python/input_ops.py
index 423952c9e2..f07ec8234d 100644
--- a/tensorflow/contrib/distribute/python/input_ops.py
+++ b/tensorflow/contrib/distribute/python/input_ops.py
@@ -78,7 +78,7 @@ def auto_shard_dataset(dataset, num_shards, index):
       elif hasattr(dataset, "_map_func"):
         # TODO(priyag): Make this check more robust by enforcing some common
         # property on all map/flatmap/interleave datasets.
-        map_func_def = dataset._map_func.function_def
+        map_func_def = dataset._map_func.definition
         for node in map_func_def.node_def:
           if node.op in _READER_DATASET_OPS:
             found_reader_op = True
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d90da5908d..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -30,7 +30,6 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import random_seed
 from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
-from tensorflow.python.eager import function as eager_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
@@ -38,7 +37,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -1715,8 +1713,7 @@ class _VariantDataset(Dataset):
 
 
 class StructuredFunctionWrapper(object):
-  """A wrapper for `defun` that supports structured arguments and return values.
-
+  """A wrapper for `Defun` that supports structured arguments and return values.
   """
 
   def __init__(self, func, transformation_name, dataset=None,
@@ -1768,7 +1765,7 @@ class StructuredFunctionWrapper(object):
     # TODO(b/110122868): Enable this support for all `tf.data` functions.
     self._nested_dataset_support = experimental_nested_dataset_support
 
-    @eager_function.defun(input_signature=self._defun_args())
+    @function.Defun(*self._defun_args())
     def tf_data_structured_function_wrapper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
       flat_args = []
@@ -1853,43 +1850,36 @@ class StructuredFunctionWrapper(object):
       self._output_shapes = nest.pack_sequence_as(ret, flat_shapes)
       self._output_types = nest.pack_sequence_as(ret, flat_types)
 
-      return flat_ret
+      _warn_if_collections(transformation_name)
 
-    table_initializers_len = len(ops.get_default_graph().get_collection(
-        ops.GraphKeys.TABLE_INITIALIZERS))
+      return flat_ret
 
-    self._function = tf_data_structured_function_wrapper.get_concrete_function()
+    self._function = tf_data_structured_function_wrapper
     if add_to_graph:
       self._function.add_to_graph(ops.get_default_graph())
-    if len(
-        self._function.graph.get_collection(
-            ops.GraphKeys.TABLE_INITIALIZERS)) != table_initializers_len:
-      warnings.warn(
-          "Creating lookup tables inside a function passed to %s is not"
-          " supported. Create each table outside the function, and "
-          "capture it inside the function to use it." % transformation_name)
+    else:
+      # Use the private method that will execute
+      # `tf_data_structured_function_wrapper` but delay adding it to the graph
+      # in case (e.g.) we need to rerun the function.
+      self._function._create_definition_if_needed()  # pylint: disable=protected-access
 
   def _defun_args(self):
-    """Returns a list of `tf.TensorSpec` for the input element structure."""
+    """Returns a flat list of `tf.DType` for the input element structure."""
     ret = []
-    for input_type, input_shape, input_class in zip(
-        nest.flatten(self._input_types), nest.flatten(self._input_shapes),
-        nest.flatten(self._input_classes)):
+    for input_type, input_class in zip(nest.flatten(self._input_types),
+                                       nest.flatten(self._input_classes)):
       # TODO(b/110122868): Add a registration mechanism for new component types.
       if input_class is sparse_tensor_lib.SparseTensor:
-        ret.append(
-            tensor_spec.TensorSpec(
-                tensor_shape.TensorShape(None), dtypes.variant))
+        ret.append(dtypes.variant)
       elif isinstance(input_class, _NestedDatasetComponent):
         if not self._nested_dataset_support:
           raise NotImplementedError(
               "The %s transformation does not currently support nested "
               "datasets as inputs." % self._transformation_name)
-        ret.append(
-            tensor_spec.TensorSpec(tensor_shape.scalar(), dtypes.variant))
+        ret.append(dtypes.variant)
       else:
         assert isinstance(input_type, dtypes.DType)
-        ret.append(tensor_spec.TensorSpec(input_shape, input_type))
+        ret.append(input_type)
     return ret
 
   @property
@@ -2589,6 +2579,24 @@ def _should_unpack_args(args):
   return type(args) is tuple  # pylint: disable=unidiomatic-typecheck
 
 
+def _warn_if_collections(transformation_name):
+  """Prints warning message if the current graph uses common graph collections.
+
+  NOTE(mrry): Currently a warning is only generated for lookup tables. Any
+  variables created will be automatically hoisted out to the outermost scope
+  using `init_scope()`. Some collections (such as for control-flow contexts)
+  are benign and should not generate a warning.
+
+  Args:
+    transformation_name: A human-readable name for the transformation.
+  """
+  if ops.get_default_graph().get_collection(ops.GraphKeys.TABLE_INITIALIZERS):
+    warnings.warn("Creating lookup tables inside a function passed to %s is not"
+                  " supported. Create each table outside the function, and "
+                  "capture it inside the function to use it."
+                  % transformation_name)
+
+
 class MapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input."""
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index aeb1cac3e9..f261d92d64 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -662,11 +662,6 @@ class Function(object):
     outputs = self._inference_function.call(ctx, args)
     return self._build_call_outputs(outputs)
 
-  @property
-  def name(self):
-    """Function name."""
-    return self._inference_function.name
-
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -724,10 +719,6 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
-  def add_to_graph(self, g):
-    """Adds this function into the graph g."""
-    return self._inference_function.add_to_graph(g)
-
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1131,8 +1122,6 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
-    if self._input_signature:
-      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1315,9 +1304,6 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
-  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
-  created with an `input_signature`.
-
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index ac45606eb0..9ce367a837 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1750,10 +1750,11 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test register function with cache, note inputs are ignored.
-        function.register(defun_matmul)
-        graph = ops.get_default_graph()
-        self.assertEqual(len(graph._functions), 3)
+        # Test input param shape mismatch
+        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        with self.assertRaisesRegexp(
+            ValueError, 'Python inputs incompatible with input_signature'):
+          function.register(defun_matmul, t2, t2)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e Mon Sep 17 00:00:00 2001
From: Shanqing Cai <cais@google.com>
Date: Tue, 2 Oct 2018 20:04:31 -0700
Subject: [PATCH 0257/1085] Improve error messages and doc strings for
 eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says "please provide either batch_size or steps_per_epoch". Now it reads "please provide either batch_size or validation_steps".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991
---
 tensorflow/python/keras/engine/training.py    |  9 ++++--
 .../python/keras/engine/training_eager.py     |  3 +-
 .../keras/engine/training_eager_test.py       | 30 +++++++++++++++++++
 .../python/keras/engine/training_utils.py     | 15 +++++++---
 4 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index c842b8192e..85233de9b1 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1419,6 +1419,8 @@ class Model(Network):
               - tuple `(x_val, y_val)` of Numpy arrays or tensors
               - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
               - dataset or a dataset iterator
+            For the first two cases, `batch_size` must be provided.
+            For the last case, `validation_steps` must be provided.
         shuffle: Boolean (whether to shuffle the training data
             before each epoch) or str (for 'batch').
             'batch' is a special option for dealing with the
@@ -1454,9 +1456,10 @@ class Model(Network):
             TensorFlow data tensors, the default `None` is equal to
             the number of samples in your dataset divided by
             the batch size, or 1 if that cannot be determined.
-        validation_steps: Only relevant if `steps_per_epoch`
-            is specified. Total number of steps (batches of samples)
-            to validate before stopping.
+        validation_steps: Only relevant if `validation_data` is provided and
+            is a dataset or dataset iterator. Total number of steps (batches of
+            samples) to draw before stopping when performing validation
+            at the end of every epoch.
         max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
             input only. Maximum size for the generator queue.
             If unspecified, `max_queue_size` will default to 10.
diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py
index fb71bf2596..2a62edd698 100644
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py
@@ -739,7 +739,8 @@ def test_loop(model, inputs, targets,
       y=targets,
       sample_weights=sample_weights,
       batch_size=batch_size,
-      steps_per_epoch=steps)
+      steps_per_epoch=steps,
+      is_validation=True)
   with backend.learning_phase_scope(0):
     return iterator_test_loop(model, inputs, steps, verbose=verbose)
 
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index 1f5176c4d7..943ede1be9 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -125,6 +125,36 @@ class TrainingTest(test.TestCase):
     model.train_on_batch(inputs, targets)
     model.test_on_batch(inputs, targets)
 
+  def test_model_fit_and_validation_with_missing_arg_errors(self):
+    x = keras.layers.Input(shape=(3,), name='input')
+    y = keras.layers.Dense(4, name='dense')(x)
+    model = keras.Model(x, y)
+    model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse')
+
+    x = keras.backend.zeros(shape=(10, 3))
+    y = keras.backend.zeros(shape=(10, 4))
+    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5)
+    iterator = dataset.make_one_shot_iterator()
+    validation_dataset = dataset_ops.Dataset.from_tensor_slices(
+        (x, y)).repeat(10).batch(5)
+    validation_iterator = validation_dataset.make_one_shot_iterator()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'specify .* `steps_per_epoch`'):
+      model.fit(iterator, epochs=1, verbose=0)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=(x, y))
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_dataset)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_iterator)
+
   def test_generator_methods(self):
     model = keras.Sequential()
     model.add(keras.layers.Dense(4, input_shape=(3,)))
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 9c303f4bed..dd2a7f16ec 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -106,7 +106,8 @@ def convert_to_iterator(x=None,
                         batch_size=None,
                         steps_per_epoch=None,
                         epochs=1,
-                        shuffle=False):
+                        shuffle=False,
+                        is_validation=False):
   """Converts NumPy arrays or EagerTensors to an EagerIterator.
 
   Combines all provided data into a single EagerIterator.
@@ -124,6 +125,9 @@ def convert_to_iterator(x=None,
         epoch.
       epochs: Epochs to repeat iterator for.
       shuffle: Whether to shuffle data after each epoch.
+      is_validation: Whether this call is for validation during a training
+        (e.g., `fit()`) call. This info is used to construct error messages
+        (if any).
 
   Raises:
       ValueError: if steps_per_epoch cannot be calculated from the data
@@ -151,9 +155,12 @@ def convert_to_iterator(x=None,
     steps_per_epoch = int(math.ceil(num_samples / batch_size))
 
   if steps_per_epoch is None:
-    raise ValueError('Could not determine steps_per_epoch.'
-                     'Please provide either batch_size or'
-                     'steps_per_epoch.')
+    alternative_arg_name = (
+        'validation_steps' if is_validation else 'steps_per_epoch')
+    raise ValueError(
+        'Could not determine how to convert EagerTensors into EagerIterator. '
+        'Please provide either `batch_size` or '
+        '`%s`.' % alternative_arg_name)
 
   # TODO(omalleyt) for NumPy arrays in graph mode
   # placeholder ops should be used
-- 
GitLab


From a2c827114d36b8bb4957411158c2d9bbe60ac36a Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 16:14:32 -0700
Subject: [PATCH 0258/1085] Internal change.

PiperOrigin-RevId: 215477724
---
 tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index cd7206baf8..9c6390070c 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -29,7 +29,7 @@ TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-8}
 # p100 has minimum 12G memory. Therefore, we should limit each test to 1.5G.
 # To leave some room in case we want to run more tests in parallel in the
 # future and to use a rounder number, we set it to 1G.
-export TF_PER_DEVICE_MEMORY_LIMIT_MB=1024
+export TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB:-1024}
 
 # *******************************************************************
 #         This section of the script is needed to
-- 
GitLab


From 65b5190065db0074f8722b09ba43423438c40258 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 21:49:20 -0700
Subject: [PATCH 0259/1085] Further loosen bounds for depthwise_conv_op_test.

PiperOrigin-RevId: 215512168
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 9c02b69180..6aee2eb0a3 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,7 +131,7 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-7,
+          dtypes.float32: 1e-6,
           dtypes.float64: 1e-12,
       }[data_type]
 
-- 
GitLab


From bbe15eee6779941c54e145d12e16f6473738857c Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Tue, 2 Oct 2018 22:39:09 -0700
Subject: [PATCH 0260/1085] [XLA] Modify the function that determines whether
 an instruction can change layout so that it can be used by the HLO verifier.

Change the function to a static member function of the LayoutAssignment class.

Add an std::function member to LayoutAssignment to store the function object
passed down from the backend compiler class and use it to decide whether an
instruction can change layouts.

Fix affected test cases.

PiperOrigin-RevId: 215515611
---
 .../compiler/xla/service/cpu/cpu_compiler.cc   |  3 ++-
 .../xla/service/cpu/cpu_layout_assignment.h    |  5 ++++-
 .../service/cpu/cpu_layout_assignment_test.cc  | 10 ++++++----
 .../xla/service/gpu/gpu_layout_assignment.h    |  5 ++++-
 .../service/gpu/gpu_layout_assignment_test.cc  | 17 +++++++++++------
 .../compiler/xla/service/gpu/nvptx_compiler.cc |  3 ++-
 .../xla/service/interpreter/compiler.cc        |  3 ++-
 .../compiler/xla/service/layout_assignment.cc  | 18 ++++++++++++------
 .../compiler/xla/service/layout_assignment.h   | 18 ++++++++++++++----
 .../xla/service/layout_assignment_test.cc      |  3 ++-
 10 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 18fc144efe..ea8c200dee 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -308,7 +308,8 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
 
   pipeline.AddPass<CpuLayoutAssignment>(
-      module->mutable_entry_computation_layout(), target_machine_features);
+      module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout, target_machine_features);
   return pipeline.Run(module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
index 3c4fe68b83..f4da35dd37 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
@@ -30,8 +30,11 @@ class CpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit CpuLayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func,
       const TargetMachineFeatures* target_machine_features)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         target_machine_features_(*target_machine_features) {}
   ~CpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 4668f3872d..97659b88a7 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -54,8 +54,9 @@ class CpuLayoutAssignmentTest : public HloTestBase {
         [](int64 shape_size) {
           return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
         });
-    cpu::CpuLayoutAssignment layout_assignment(entry_computation_layout,
-                                               &target_machine_features);
+    cpu::CpuLayoutAssignment layout_assignment(
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        &target_machine_features);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 };
@@ -321,8 +322,9 @@ static StatusOr<DotOutputFusionLayoutAssignmentResult> RunDotOutputFusion(
       [](int64 shape_size) {
         return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
       });
-  cpu::CpuLayoutAssignment layout_assignment(&computation_layout,
-                                             &target_machine_features);
+  cpu::CpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      &target_machine_features);
   TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something,
                       layout_assignment.Run(module));
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index e2b96a81d4..4ba7989e9c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -30,8 +30,11 @@ namespace gpu {
 class GpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout,
+                               std::function<bool(const HloInstruction*)>
+                                   instruction_can_change_layout_func,
                                se::StreamExecutor* stream_executor)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         stream_executor_(stream_executor) {}
   ~GpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index fbc8ddf599..04681cfcec 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -75,7 +75,8 @@ TEST_F(LayoutAssignmentTest, Elementwise) {
             ShapeLayout(result_shape_with_layout);
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         for (const HloInstruction* operand : add->operands()) {
@@ -163,7 +164,8 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -233,7 +235,8 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -314,7 +317,8 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) {
         }
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         // The first and fourth operands to the batchnorm call should have the
@@ -348,8 +352,9 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
 
   ComputationLayout computation_layout(
       module->entry_computation()->ComputeProgramShape());
-  GpuLayoutAssignment layout_assignment(&computation_layout,
-                                        backend().default_stream_executor());
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
   EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
   Shape expected_shape =
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 0b3b429710..b4ae2e42c7 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -232,7 +232,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     // a layout-sensitive verifier!
     HloPassPipeline pipeline("layout assignment");
     pipeline.AddPass<GpuLayoutAssignment>(
-        hlo_module->mutable_entry_computation_layout(), stream_exec);
+        hlo_module->mutable_entry_computation_layout(),
+        LayoutAssignment::InstructionCanChangeLayout, stream_exec);
     TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status());
   }
 
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index bb69cb9c47..27fe89375d 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -44,7 +44,8 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
   HloPassPipeline pipeline("Interpreter");
 
   pipeline.AddPass<LayoutAssignment>(
-      hlo_module->mutable_entry_computation_layout());
+      hlo_module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout);
   return pipeline.Run(hlo_module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 25d5327561..68a08a0886 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -974,10 +974,15 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
 
 LayoutAssignment::LayoutAssignment(
     ComputationLayout* entry_computation_layout,
+    std::function<bool(const HloInstruction*)>
+        instruction_can_change_layout_func,
     ChannelLayoutConstraints* channel_constraints)
     : entry_computation_layout_(entry_computation_layout),
+
       saved_entry_computation_layout_(*entry_computation_layout),
-      channel_layout_constraints_(channel_constraints) {
+      channel_layout_constraints_(channel_constraints),
+      instruction_can_change_layout_func_(
+          std::move(instruction_can_change_layout_func)) {
   if (channel_layout_constraints_ != nullptr) {
     // Save a copy of the input ChannelLayoutConstraints so that we can reset it
     // if we have to undo previous operations (ClearPreviousPassSideEffects()).
@@ -998,7 +1003,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) ==
           ShapeUtil::Rank(instruction->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(instruction)) {
+      !instruction_can_change_layout_func_(instruction)) {
     // Propagate the result layout to the operand layout if the instruction
     // requires the same layout out for the result and the operand.
     //
@@ -1076,7 +1081,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
 
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) == ShapeUtil::Rank(user->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(user)) {
+      !instruction_can_change_layout_func_(user)) {
     // Assign users the same layout as the operand.
     return absl::make_unique<Layout>(operand_layout);
   }
@@ -1842,7 +1847,8 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   return true;
 }
 
-bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
+/* static */
+bool LayoutAssignment::InstructionCanChangeLayout(
     const HloInstruction* instruction) {
   switch (instruction->opcode()) {
     case HloOpcode::kAbs:
@@ -1908,7 +1914,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTanh:
     case HloOpcode::kTupleSelect:
     case HloOpcode::kWhile:
-      return true;
+      return false;
     case HloOpcode::kBatchNormGrad:
     case HloOpcode::kBatchNormInference:
     case HloOpcode::kBatchNormTraining:
@@ -1939,7 +1945,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTrace:
     case HloOpcode::kTranspose:
     case HloOpcode::kTuple:
-      return false;
+      return true;
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 15f0adcaaf..2d48e12263 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -286,6 +286,11 @@ class LayoutAssignment : public HloModulePass {
   // entry_computation_layout is modified to populate a layout for the result in
   // the case that no particular layout is requested.
   //
+  // instruction_can_change_layout_func is a function object that determines
+  // whether an instruction can change layouts. An instruction not being able to
+  // change layout means that it requires operands with the same rank as the
+  // output to have the same layout as the output.
+  //
   // channel_constraints is both an input and output. Any sends or recvs that
   // are present in channel_constraints will be laid out as constrained. Any
   // unconstrained sends or recvs will be laid out as locally optimal and their
@@ -295,6 +300,8 @@ class LayoutAssignment : public HloModulePass {
   // within any module passed to `Run`.
   explicit LayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func = InstructionCanChangeLayout,
       ChannelLayoutConstraints* channel_constraints = nullptr);
   ~LayoutAssignment() override {}
   absl::string_view name() const override { return "layout-assignment"; }
@@ -303,10 +310,10 @@ class LayoutAssignment : public HloModulePass {
   // (any layouts were changed).
   StatusOr<bool> Run(HloModule* module) override;
 
-  // Returns true if the instruction requires that operands with the same rank
-  // as the output have to have the same layout as the output.
-  virtual bool InstructionRequiresInputLayoutEqualToOutputLayout(
-      const HloInstruction* instruction);
+  // Determines whether an instruction can change layouts. An instruction not
+  // being able to change layout means that it requires operands with the same
+  // rank as the output to have the same layout as the output.
+  static bool InstructionCanChangeLayout(const HloInstruction* instruction);
 
  protected:
   // These methods, invoked by PropagateConstraints, propagate a layout
@@ -522,6 +529,9 @@ class LayoutAssignment : public HloModulePass {
   // The set of HLO instructions which lacked any layout constraint, thus
   // receiving propagated default layouts.
   absl::flat_hash_set<const HloInstruction*> unconstrained_layout_instructions_;
+
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 10f9a95121..15c16d667c 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -55,7 +55,8 @@ class LayoutAssignmentTest : public HloVerifiedTestBase {
                      ComputationLayout* entry_computation_layout,
                      ChannelLayoutConstraints* channel_constraints = nullptr) {
     LayoutAssignment layout_assignment(
-        entry_computation_layout, /*channel_constraints=*/channel_constraints);
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        /*channel_constraints=*/channel_constraints);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 
-- 
GitLab


From b790ac196148b7547bb4da7091973e8f0ae58803 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 23:10:30 -0700
Subject: [PATCH 0261/1085] [XLA:CPU] Re-enable the inliner pass in the cpu
 compiler.

PiperOrigin-RevId: 215517752
---
 tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index ea8c200dee..afc94f2185 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -249,9 +249,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       &pipeline, module->config().debug_options(),
       ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
-  // TODO(b/35786417): Re-enable inliner pass after fixing the bug and deciding
-  // where we will take this pass in future.
-  // pipeline.AddPass<Inliner>();
+  pipeline.AddPass<Inliner>();
 
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
-- 
GitLab


From ac15fb000dc0558495b62e897206e2c4ad189c5a Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 2 Oct 2018 23:18:36 -0700
Subject: [PATCH 0262/1085] Internal change.

PiperOrigin-RevId: 215518288
---
 tensorflow/python/kernel_tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 9490746fd9..44575fc452 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2999,6 +2999,7 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
     ],
     shard_count = 20,
+    tags = ["no_oss"],  # b/117185141
 )
 
 cuda_py_test(
-- 
GitLab


From 3d452dbcf7e1a71ba449f6acf7342cdd1dd11859 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 2 Oct 2018 23:37:58 -0700
Subject: [PATCH 0263/1085] [XLA] In the HLO parser, give the module a
 non-empty default name.

Otherwise, when parsing a single instruction, the parsed module doesn't have a name, which won't pass the hlo verifier check.

PiperOrigin-RevId: 215519412
---
 tensorflow/compiler/xla/service/hlo_parser.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 0440f1b54f..dd62988bcc 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -391,7 +391,7 @@ Status HloParser::Run(HloModule* module) {
   // This means that the text is a single HLO instruction.
   if (!ParseSingleInstruction(module)) {
     return InvalidArgument(
-        "Syntax error when trying to parse the text as single "
+        "Syntax error when trying to parse the text as a single "
         "HloInstruction:\n%s",
         GetError());
   }
@@ -3348,14 +3348,14 @@ bool HloParser::ParseSingleInstruction(HloModule* module) {
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(
     absl::string_view str, const HloModuleConfig& config) {
-  auto module = absl::make_unique<HloModule>(/*name=*/"", config);
+  auto module = absl::make_unique<HloModule>(/*name=*/"_", config);
   HloParser parser(str);
   TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
 }
 
 StatusOr<std::unique_ptr<HloModule>> ParseHloString(absl::string_view str) {
-  auto module = absl::make_unique<HloModule>(/*name=*/"", HloModuleConfig());
+  auto module = absl::make_unique<HloModule>(/*name=*/"_", HloModuleConfig());
   HloParser parser(str);
   TF_RETURN_IF_ERROR(parser.Run(module.get()));
   return std::move(module);
-- 
GitLab


From 946e58e402778606d26056f5decf91ecfb4a9f89 Mon Sep 17 00:00:00 2001
From: YongJoon Lee <joon0351@gmail.com>
Date: Wed, 3 Oct 2018 16:43:55 +0900
Subject: [PATCH 0264/1085] fix spelling problem

---
 .../contrib/estimator/python/estimator/boosted_trees.py     | 6 +++---
 .../estimator/python/estimator/dnn_linear_combined.py       | 2 +-
 .../python/estimator/dnn_with_layer_annotations.py          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index a1f1c5f3d7..b131ed4f12 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -75,7 +75,7 @@ class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase):  # pylint:
         layer.
       head: the `Head` instance defined for Estimator.
       model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
+        also be used to load checkpoints from the directory into an estimator
         to continue training a previously saved model.
       weight_column: A string or a `_NumericColumn` created by
         `tf.feature_column.numeric_column` defining feature column representing
@@ -199,7 +199,7 @@ def boosted_trees_classifier_train_in_memory(
       the model. All items in the set should be instances of classes derived
       from `FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator
+      also be used to load checkpoints from the directory into an estimator
       to continue training a previously saved model.
     n_classes: number of label classes. Default is binary classification.
       Multiclass support is not yet implemented.
@@ -345,7 +345,7 @@ def boosted_trees_regressor_train_in_memory(
       the model. All items in the set should be instances of classes derived
       from `FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator
+      also be used to load checkpoints from the directory into an estimator
       to continue training a previously saved model.
     label_dimension: Number of regression targets per example.
       Multi-dimensional support is not yet implemented.
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
index 724bc2c82f..4e7965ef26 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
@@ -118,7 +118,7 @@ class DNNLinearCombinedEstimator(estimator.Estimator):
       head: A `_Head` instance constructed with a method such as
         `tf.contrib.estimator.multi_label_head`.
       model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
+        also be used to load checkpoints from the directory into an estimator
         to continue training a previously saved model.
       linear_feature_columns: An iterable containing all the feature columns
         used by linear part of the model. All items in the set must be
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 6ca7aaf989..40a91175b7 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -248,7 +248,7 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
       model. All items in the set should be instances of classes derived from
       `_FeatureColumn`.
     model_dir: Directory to save model parameters, graph and etc. This can also
-      be used to load checkpoints from the directory into a estimator to
+      be used to load checkpoints from the directory into an estimator to
       continue training a previously saved model.
     n_classes: Number of label classes. Defaults to 2, namely binary
       classification. Must be > 1.
-- 
GitLab


From c248f458c76df89fa3d608dcbe7c4c5e10962c24 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 02:25:06 -0700
Subject: [PATCH 0265/1085] compat: Update forward compatibility horizon to
 2018-10-03

PiperOrigin-RevId: 215534396
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 3bb95b56c2..d833defb8e 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 2)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 3)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From dd52e1d30702df5dfc805a1f433061dfbb75c814 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 06:14:11 -0700
Subject: [PATCH 0266/1085] Fix test that was relying on old lax toco behavior

PiperOrigin-RevId: 215553161
---
 .../contrib/lite/testing/generate_examples.py      | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 18036fac6f..3f2255c454 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -762,8 +762,11 @@ def make_constant_tests(zip_path):
         dtype=parameters["dtype"],
         name="input1",
         shape=parameters["input_shape"])
-    out = tf.constant(
+    constant = tf.constant(
         create_tensor_data(parameters["dtype"], parameters["input_shape"]))
+    # This maximum node is here to avoid the situation where a graph output is
+    # a constant, which is an error in toco.
+    out = tf.maximum(dummy_input, constant)
     return [dummy_input], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
@@ -2848,7 +2851,14 @@ def make_zeros_like_tests(zip_path):
         dtype=parameters["input_dtype"],
         name="input",
         shape=parameters["input_shape"])
-    out = tf.zeros_like(input_tensor)
+    zeros = tf.zeros_like(input_tensor)
+    # This maximum node is so that toco can perform the constants-propagation
+    # through the above zeros_like, which it can't do if the output of the
+    # zeros_like as an output of the whole graphs (graph outputs can't be
+    # constants). If toco does not perform such constants-propagation then
+    # the resulting tflite graph retains the zeros_like as a Fill op, which
+    # is unsupported by TFLite, even as a custom op.
+    out = tf.maximum(zeros, input_tensor)
     return [input_tensor], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
-- 
GitLab


From 5e637b0280986d198a25b7ceede80cdfacfff154 Mon Sep 17 00:00:00 2001
From: Andrew Banchich <andrewbanchich@gmail.com>
Date: Wed, 3 Oct 2018 09:43:06 -0400
Subject: [PATCH 0267/1085] Update README.md

Fix sentence structure
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 57efb876c9..6a73d4e8a8 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's
 uphold this code.**
 
 **We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for
-tracking requests and bugs. So please see
+tracking requests and bugs, so please see
 [TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions
 and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).**
 
-- 
GitLab


From c9bdd3938e2b43334a0065b4c198ec9d491c8cb8 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 10:04:37 -0700
Subject: [PATCH 0268/1085] [tf.data] Switch background threads to use
 `BackgroundWorker`.

PiperOrigin-RevId: 215579950
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 ---
 .../kernels/data/map_and_batch_dataset_op.cc  | 10 ++++---
 .../core/kernels/data/model_dataset_op.cc     | 10 ++++---
 .../data/parallel_interleave_dataset_op.cc    | 27 +++++++++++--------
 .../kernels/data/parallel_map_iterator.cc     | 10 ++++---
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 ++++---
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..8acd6cc724 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,10 +16,8 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
-#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -27,13 +25,11 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..6a670f1efb 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -405,9 +406,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              std::bind(&Iterator::RunnerThread, this, ctx_copy));
         }
       }
 
@@ -660,7 +662,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 9aa505f4f1..859df57962 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -126,9 +127,10 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_.reset(ctx->env()->StartThread(
-              {}, "optimize_thread",
-              [this, new_ctx]() { OptimizeThread(new_ctx); }));
+          optimize_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
+          optimize_thread_->Schedule(
+              [this, new_ctx]() { OptimizeThread(new_ctx); });
         }
         return Status::OK();
       }
@@ -167,7 +169,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 6b6b3d6ab9..9c836b836e 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -481,9 +482,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
           }
         }
         return Status::OK();
@@ -580,9 +582,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1047,7 +1050,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
+          GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1389,9 +1393,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              [this, new_ctx]() { RunnerThread(new_ctx); }));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              [this, new_ctx]() { RunnerThread(new_ctx); });
         }
       }
 
@@ -1645,7 +1650,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..626e98af91 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -180,9 +181,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-      runner_thread_.reset(ctx->env()->StartThread(
-          {}, "runner_thread",
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
+      runner_thread_ =
+          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+      runner_thread_->Schedule(
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
     }
   }
 
@@ -330,7 +332,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 754ed772db..e9c38eb8a0 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -256,10 +257,11 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
+        prefetch_thread_ =
+            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_.reset(ctx->env()->StartThread(
-            {}, "prefetch_thread",
-            [this, new_ctx]() { PrefetchThread(new_ctx); }));
+        prefetch_thread_->Schedule(
+            [this, new_ctx]() { PrefetchThread(new_ctx); });
       }
       return Status::OK();
     }
@@ -363,7 +365,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 3f76695bb1..7bb2077b62 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        thread_pool_(new thread::ThreadPool(
-            ctx->env(), ThreadOptions(),
-            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
-            1 /* num_threads */, false /* low_latency_hint */)) {}
+        background_worker_(
+            ctx->env(),
+            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
+  }
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    thread_pool_->Schedule([this, ctx, done]() {
+    background_worker_.Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
+  BackgroundWorker background_worker_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From 2af8fd975aaf5c70ebb396895fa15a8f034a8440 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 3 Oct 2018 10:09:14 -0700
Subject: [PATCH 0269/1085] Skip control flow functionalization if there is no
 Switch or Merge node.

PiperOrigin-RevId: 215580891
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++++++++------
 1 file changed, 90 insertions(+), 39 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 36c6f5d316..28e09d7b79 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, string>* canonicalized_name_to_new_name) {
+    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
+    bool* modified) {
+  *modified = false;
+
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -92,6 +95,19 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
+  // Check if the graph has Switch or Merge node before optimizing the graph.
+  bool has_switch_or_merge = false;
+  for (Node* n : body->graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
+  // We cannot return here directly if the graph has no Switch/Merge.
+  // It might contain function call nodes, or If/While nodes with Switch/Merge
+  // in function body. We still need to rewrite those functions and modify
+  // corresponding nodes.
+
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -129,6 +145,13 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
+  // Some inlined functions might have Switch/Merge nodes.
+  for (Node* n : optimized_graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -151,10 +174,15 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
+      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already functionalized this function, skip functionalization
-        // but still rewrite the node.
-        new_name = iter->second;
+        // If we already processed this function, check if it was rewritten. If
+        // the function was rewritten, the entry will be non-empty. Otherwise
+        // the entry will be empty.
+        function_modified = iter->second.has_value();
+        if (function_modified) {
+          new_name = iter->second.value();
+        }
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -166,42 +194,62 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name));
-        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+            canonicalized_name_to_new_name, &function_modified));
+        if (function_modified) {
+          // If the function was rewritten, add an non-empty entry. So later we
+          // know we have processed this function, and it was rewritten into
+          // another function.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+        } else {
+          // If the function was not rewritten, add an empty entry. So later
+          // we know we have processed this function, and it does not need to be
+          // rewritten.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
+        }
+      }
+      if (function_modified) {
+        *modified = true;
+
+        // Notice that if "n" is a function call, RewriteAssociatedFunction()
+        // will delete it and create a new node instead, making "n" an invalid
+        // pointer. That's fine because in that case, associated_functions will
+        // only have one member and the loop will only run once.
+        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+            optimized_graph.get(), n, fld, associated_function, new_name));
       }
-      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
-      // delete it and create a new node instead, making "n" an invalid pointer.
-      // That's fine because in that case, associated_functions will only have
-      // one member and the loop will only run once.
-      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Functionalize the function body.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *optimized_graph, fld);
-  }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *optimized_graph, fld);
+  if (has_switch_or_merge) {
+    *modified = true;
+
+    // Functionalize the function body.
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+          *optimized_graph, fld);
+    }
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+          *optimized_graph, fld);
+    }
   }
-  FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                        &functionalized_fdef));
 
-  // Add rewritten FunctionDef into library.
-  if (func_name == new_func_name) {
-    VLOG(2) << "Replacing function " << func_name;
-    TF_RETURN_IF_ERROR(
-        fld->ReplaceFunction(new_func_name, functionalized_fdef));
-  } else {
-    VLOG(2) << "Adding function " << new_func_name;
-    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+  if (*modified) {
+    // Add rewritten FunctionDef into library.
+    FunctionDef functionalized_fdef;
+    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                          &functionalized_fdef));
+    if (func_name == new_func_name) {
+      VLOG(2) << "Replacing function " << func_name;
+      TF_RETURN_IF_ERROR(
+          fld->ReplaceFunction(new_func_name, functionalized_fdef));
+    } else {
+      VLOG(2) << "Adding function " << new_func_name;
+      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+    }
   }
 
   return ret_status;
@@ -227,7 +275,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, string> canonicalized_name_to_new_name;
+  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -242,12 +290,15 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
+      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name));
-      n->ClearAttr(func_attr);
-      func.set_name(new_func_name);
-      n->AddAttr(func_attr, func);
+          &canonicalized_name_to_new_name, &modified));
+      if (modified) {
+        n->ClearAttr(func_attr);
+        func.set_name(new_func_name);
+        n->AddAttr(func_attr, func);
+      }
     }
   }
 
-- 
GitLab


From 022af5300701d457d848e60ea511dd8d05f68738 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 3 Oct 2018 10:18:59 -0700
Subject: [PATCH 0270/1085] Fix TfLiteTensor invalidation issue when using the
 Java API

Fix an issue where the Java Tensor class would hold a reference
to an invalidated TfLiteTensor instance. This issue was manifest
in certain models that add temporary tensors during execution.

PiperOrigin-RevId: 215582842
---
 .../lite/NativeInterpreterWrapper.java        | 26 +++++++---
 .../main/java/org/tensorflow/lite/Tensor.java | 27 ++++++++--
 .../native/nativeinterpreterwrapper_jni.cc    | 22 +++-----
 .../native/nativeinterpreterwrapper_jni.h     | 24 ++++-----
 .../lite/java/src/main/native/tensor_jni.cc   | 50 +++++++++++++++----
 .../lite/java/src/main/native/tensor_jni.h    | 17 +++++++
 .../java/org/tensorflow/lite/TensorTest.java  | 13 ++++-
 7 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 9bc44bf797..6f03e7853a 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -18,7 +18,6 @@ package org.tensorflow.lite;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -83,6 +82,19 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
   @Override
   public void close() {
+    // Close the tensors first as they may reference the native interpreter.
+    for (int i = 0; i < inputTensors.length; ++i) {
+      if (inputTensors[i] != null) {
+        inputTensors[i].close();
+        inputTensors[i] = null;
+      }
+    }
+    for (int i = 0; i < outputTensors.length; ++i) {
+      if (outputTensors[i] != null) {
+        outputTensors[i].close();
+        outputTensors[i] = null;
+      }
+    }
     delete(errorHandle, modelHandle, interpreterHandle);
     errorHandle = 0;
     modelHandle = 0;
@@ -91,8 +103,6 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     inputsIndexes = null;
     outputsIndexes = null;
     isMemoryAllocated = false;
-    Arrays.fill(inputTensors, null);
-    Arrays.fill(outputTensors, null);
   }
 
   /** Sets inputs, runs model inference and returns outputs. */
@@ -260,7 +270,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor inputTensor = inputTensors[index];
     if (inputTensor == null) {
       inputTensor =
-          inputTensors[index] = Tensor.fromHandle(getInputTensor(interpreterHandle, index));
+          inputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getInputTensorIndex(interpreterHandle, index));
     }
     return inputTensor;
   }
@@ -282,7 +293,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor outputTensor = outputTensors[index];
     if (outputTensor == null) {
       outputTensor =
-          outputTensors[index] = Tensor.fromHandle(getOutputTensor(interpreterHandle, index));
+          outputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getOutputTensorIndex(interpreterHandle, index));
     }
     return outputTensor;
   }
@@ -317,9 +329,9 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native long allocateTensors(long interpreterHandle, long errorHandle);
 
-  private static native long getInputTensor(long interpreterHandle, int inputIdx);
+  private static native int getInputTensorIndex(long interpreterHandle, int inputIdx);
 
-  private static native long getOutputTensor(long interpreterHandle, int outputIdx);
+  private static native int getOutputTensorIndex(long interpreterHandle, int outputIdx);
 
   private static native int getInputCount(long interpreterHandle);
 
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index f174178d98..6ca47aa3ed 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -23,13 +23,26 @@ import java.util.Arrays;
 /**
  * A typed multi-dimensional array used in Tensorflow Lite.
  *
- * <p>The native handle of a {@code Tensor} belongs to {@code NativeInterpreterWrapper}, thus not
- * needed to be closed here.
+ * <p>The native handle of a {@code Tensor} is managed by {@code NativeInterpreterWrapper}, and does
+ * not needed to be closed by the client. However, once the {@code NativeInterpreterWrapper} has
+ * been closed, the tensor handle will be invalidated.
  */
 public final class Tensor {
 
-  static Tensor fromHandle(long nativeHandle) {
-    return new Tensor(nativeHandle);
+  /**
+   * Creates a Tensor wrapper from the provided interpreter instance and tensor index.
+   *
+   * <p>The caller is responsible for closing the created wrapper, and ensuring the provided
+   * native interpreter is valid until the tensor is closed.
+   */
+  static Tensor fromIndex(long nativeInterpreterHandle, int tensorIndex) {
+    return new Tensor(create(nativeInterpreterHandle, tensorIndex));
+  }
+
+  /** Disposes of any resources used by the Tensor wrapper. */
+  void close() {
+    delete(nativeHandle);
+    nativeHandle = 0;
   }
 
   /** Returns the {@link DataType} of elements stored in the Tensor. */
@@ -235,7 +248,7 @@ public final class Tensor {
     return o instanceof ByteBuffer;
   }
 
-  private final long nativeHandle;
+  private long nativeHandle;
   private final DataType dtype;
   private int[] shapeCopy;
 
@@ -249,6 +262,10 @@ public final class Tensor {
     return buffer(nativeHandle).order(ByteOrder.nativeOrder());
   }
 
+  private static native long create(long interpreterHandle, int tensorIndex);
+
+  private static native void delete(long handle);
+
   private static native ByteBuffer buffer(long handle);
 
   private static native void writeDirectBuffer(long handle, ByteBuffer src);
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index abb7320bc5..4dc73fbcf8 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -159,26 +159,20 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
   }
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->inputs()[index]));
+  return interpreter->inputs()[input_index];
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->outputs()[index]));
+  return interpreter->outputs()[output_index];
 }
 
 JNIEXPORT jint JNICALL
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
index aa809dff8a..f8f3e7028c 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
@@ -46,25 +46,21 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getInputTensor
- *  Signature: (JI)J
+ *  Method:    getInputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getOutputTensor
- *  Signature: (JI)J
+ *  Method:    getOutputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
index 7ff96a3172..d3378f5f14 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
@@ -16,17 +16,36 @@ limitations under the License.
 #include "tensorflow/contrib/lite/java/src/main/native/tensor_jni.h"
 #include <cstring>
 #include <memory>
+#include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h"
 
 namespace {
 
-TfLiteTensor* convertLongToTensor(JNIEnv* env, jlong handle) {
+// Convenience handle for obtaining a TfLiteTensor given an interpreter and
+// tensor index.
+//
+// Historically, the Java Tensor class used a TfLiteTensor pointer as its native
+// handle. However, this approach isn't generally safe, as the interpreter may
+// invalidate all TfLiteTensor* handles during inference or allocation.
+class TensorHandle {
+ public:
+  TensorHandle(tflite::Interpreter* interpreter, int tensor_index)
+      : interpreter_(interpreter), tensor_index_(tensor_index) {}
+
+  TfLiteTensor* tensor() const { return interpreter_->tensor(tensor_index_); }
+
+ private:
+  tflite::Interpreter* const interpreter_;
+  const int tensor_index_;
+};
+
+TfLiteTensor* GetTensorFromHandle(JNIEnv* env, jlong handle) {
   if (handle == 0) {
     throwException(env, kIllegalArgumentException,
                    "Internal error: Invalid handle to TfLiteTensor.");
     return nullptr;
   }
-  return reinterpret_cast<TfLiteTensor*>(handle);
+  return reinterpret_cast<TensorHandle*>(handle)->tensor();
 }
 
 size_t elementByteSize(TfLiteType data_type) {
@@ -192,10 +211,23 @@ size_t writeMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type,
 
 }  // namespace
 
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index) {
+  tflite::Interpreter* interpreter =
+      reinterpret_cast<tflite::Interpreter*>(interpreter_handle);
+  return reinterpret_cast<jlong>(new TensorHandle(interpreter, tensor_index));
+}
+
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle) {
+  delete reinterpret_cast<TensorHandle*>(handle);
+}
+
 JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
                                                                  jclass clazz,
                                                                  jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -208,7 +240,7 @@ JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
 
 JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_writeDirectBuffer(
     JNIEnv* env, jclass clazz, jlong handle, jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
 
   char* src_data_raw = static_cast<char*>(env->GetDirectBufferAddress(src));
@@ -226,7 +258,7 @@ Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env,
                                                           jclass clazz,
                                                           jlong handle,
                                                           jobject value) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   int num_dims = tensor->dims->size;
   if (num_dims == 0) {
@@ -243,7 +275,7 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
                                                            jclass clazz,
                                                            jlong handle,
                                                            jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -262,14 +294,14 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_dtype(JNIEnv* env,
                                                              jclass clazz,
                                                              jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->type);
 }
 
 JNIEXPORT jintArray JNICALL
 Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   int num_dims = tensor->dims->size;
   jintArray result = env->NewIntArray(num_dims);
@@ -280,7 +312,7 @@ Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_numBytes(JNIEnv* env,
                                                                 jclass clazz,
                                                                 jlong handle) {
-  const TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  const TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->bytes);
 }
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
index 2f73128bdf..c5e9690e9a 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
@@ -23,6 +23,23 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    create
+ * Signature: (JI)J
+ */
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index);
+
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    delete
+ * Signature: (J)
+ */
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle);
+
 /*
  * Class:     org_tensorflow_lite_Tensor
  * Method:    buffer
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
index 85ad393d89..56a38ea3e2 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
@@ -182,7 +182,7 @@ public final class TensorTest {
     dataType = Tensor.dataTypeOf(testFloatArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     float[][] testMultiDimArray = {testFloatArray, testFloatArray, testFloatArray};
-    dataType = Tensor.dataTypeOf(testFloatArray);
+    dataType = Tensor.dataTypeOf(testMultiDimArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     try {
       double[] testDoubleArray = {0.783, 0.251};
@@ -238,4 +238,15 @@ public final class TensorTest {
     assertThat(shape[1]).isEqualTo(3);
     assertThat(shape[2]).isEqualTo(1);
   }
+
+  @Test
+  public void testUseAfterClose() {
+    tensor.close();
+    try {
+      tensor.numBytes();
+      fail();
+    } catch (IllegalArgumentException e) {
+      // Expected failure.
+    }
+  }
 }
-- 
GitLab


From a5b3cd8b4d28cfcdcb9adb3d3568b168b9b8a088 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:19:55 -0700
Subject: [PATCH 0271/1085] Fix bug in shape function for transpose: If the
 rank of the input is unknown and the rank derived from the permutation array
 is 0 or 1, the shape is ambiguous and cannot be determined at graph
 construction time. In this case, forward the shape of the input.

PiperOrigin-RevId: 215583050
---
 tensorflow/core/ops/array_ops.cc              |  8 +++++
 tensorflow/core/ops/array_ops_test.cc         |  1 +
 tensorflow/python/kernel_tests/BUILD          |  2 +-
 .../python/kernel_tests/transpose_op_test.py  | 29 +++++++++++++++++--
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index c9f80df5e4..f55562ec99 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -133,6 +133,14 @@ Status TransposeShapeFn(InferenceContext* c) {
   } else {
     rank = perm->NumElements();
   }
+  if (!c->RankKnown(input) && rank < 2) {
+    // A permutation array containing a single element is ambiguous. It could
+    // indicate either a scalar or a 1-dimensional array, both of which the
+    // transpose op returns unchanged.
+    c->set_output(0, input);
+    return Status::OK();
+  }
+
   std::vector<DimensionHandle> dims;
   dims.resize(rank);
   TF_RETURN_IF_ERROR(c->WithRank(input, rank, &input));
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 03dab390a7..1c29cd2491 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -975,6 +975,7 @@ TEST(ArrayOpsTest, Transpose_ShapeFn) {
   INFER_OK(op, "?;[2]", "[?,?]");
   INFER_OK(op, "[?,?];[2]", "[d0_1,d0_0]");
   INFER_OK(op, "[1,?];[2]", "[d0_1,d0_0]");
+  INFER_OK(op, "?;[0]", "in0");
 
   // Invalid arguments.
   perm = test::AsTensor<int32>({1, 2});
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 44575fc452..c0e9a3c975 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2367,7 +2367,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
     ],
-    shard_count = 4,
+    shard_count = 10,
     tags = [
         "no_gpu",
         "no_oss",
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index f42800226e..a825052dd2 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -39,7 +39,12 @@ class TransposeTest(test.TestCase):
     return ret
 
   def _compareCpu(self, x, p, conjugate=False):
-    np_ans = self._np_transpose(x, p)
+    if p is None:
+      rank = x.ndim
+      perm = (rank - 1) - np.arange(rank)
+    else:
+      perm = p
+    np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
     with self.test_session(use_gpu=False):
@@ -65,7 +70,12 @@ class TransposeTest(test.TestCase):
       return tf_ans, jacob_t
 
   def _compareGpu(self, x, p, conjugate=False):
-    np_ans = self._np_transpose(x, p)
+    if p is None:
+      rank = x.ndim
+      perm = (rank - 1) - np.arange(rank)
+    else:
+      perm = p
+    np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
     with self.test_session(use_gpu=True):
@@ -102,6 +112,11 @@ class TransposeTest(test.TestCase):
         self._compareCpu(x, p, conjugate=c)
         if use_gpu:
           self._compareGpu(x, p, conjugate=c)
+    # Test with an empty permutation
+    for c in cs:
+      self._compareCpu(x, None, conjugate=c)
+      if use_gpu:
+        self._compareGpu(x, None, conjugate=c)
 
   def _compare_cpu_gpu(self, x):
     n = np.ndim(x)
@@ -449,6 +464,10 @@ class TransposeTest(test.TestCase):
     self.assertEqual(
         tensor_shape.TensorShape(None),
         array_ops.transpose(array_ops.placeholder(dtypes.int32)).get_shape())
+    self.assertEqual(
+        tensor_shape.TensorShape(None),
+        array_ops.transpose(array_ops.placeholder(dtypes.int32),
+                            [0]).get_shape())
 
   def testNullTensor(self):
     with self.cached_session():
@@ -456,6 +475,12 @@ class TransposeTest(test.TestCase):
       xt = array_ops.transpose(x, [0, 2, 1]).eval()
       self.assertAllEqual(xt.shape, (1, 0, 4))
 
+  def testScalar(self):
+    with self.cached_session():
+      x = constant_op.constant(42, dtype=dtypes.float32, shape=[])
+      xt = array_ops.transpose(x).eval()
+      self.assertAllEqual(xt, x)
+
   def _testError(self, x, p, err):
     with self.cached_session():
       with self.assertRaisesOpError(err):
-- 
GitLab


From 0f9baa02a4e32b672b0cc29e99d5bfcf1329988c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:26:41 -0700
Subject: [PATCH 0272/1085] Re-enable the arithmetic optimizer by default in
 tests. Add a warning to not disable optimizers without consulting with the
 Grappler team.

PiperOrigin-RevId: 215584369
---
 tensorflow/python/framework/test_util.py                    | 6 ++++--
 .../python/kernel_tests/distributions/laplace_test.py       | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 6673bc5561..4ec4b41b5e 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1992,10 +1992,12 @@ class TensorFlowTestCase(googletest.TestCase):
       # Don't perform optimizations for tests so we don't inadvertently run
       # gpu ops on cpu
       config.graph_options.optimizer_options.opt_level = -1
+      # Disable Grappler constant folding since some tests & benchmarks
+      # use constant input and become meaningless after constant folding.
+      # DO NOT DISABLE GRAPPLER OPTIMIZERS WITHOUT CONSULTING WITH THE
+      # GRAPPLER TEAM.
       config.graph_options.rewrite_options.constant_folding = (
           rewriter_config_pb2.RewriterConfig.OFF)
-      config.graph_options.rewrite_options.arithmetic_optimization = (
-          rewriter_config_pb2.RewriterConfig.OFF)
       config.graph_options.rewrite_options.pin_to_host_optimization = (
           rewriter_config_pb2.RewriterConfig.OFF)
       return config
diff --git a/tensorflow/python/kernel_tests/distributions/laplace_test.py b/tensorflow/python/kernel_tests/distributions/laplace_test.py
index 630c2cb424..2610ba23b8 100644
--- a/tensorflow/python/kernel_tests/distributions/laplace_test.py
+++ b/tensorflow/python/kernel_tests/distributions/laplace_test.py
@@ -275,8 +275,8 @@ class LaplaceTest(test.TestCase):
     self.assertAllClose(
         sample_values.var(axis=0),
         stats.laplace.var(loc_bc, scale=scale_bc),
-        rtol=0.10,
-        atol=0.)
+        rtol=0.105,
+        atol=0.0)
     fails = 0
     trials = 0
     for ai, a in enumerate(np.reshape(loc_v, [-1])):
-- 
GitLab


From 26ce26d127587bc1f5dc7950e22f7d935d372abf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:31:02 -0700
Subject: [PATCH 0273/1085] Re-add proto fields temporarily for internal
 compatibility.

PiperOrigin-RevId: 215585187
---
 .../tpu/proto/optimization_parameters.proto     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index b9e0747fa4..8529b48c15 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -28,6 +28,7 @@ message LearningRate {
 // https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
 // https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151
 message AdagradParameters {
+  float initial_accumulator = 1;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
@@ -41,6 +42,8 @@ message FtrlParameters {
   float l1 = 1;
   float l2 = 2;
   float lr_power = 3;
+  float initial_accum = 4;
+  float initial_linear = 5;
 }
 
 // The Adam optimizer does not implement hyper-parameter update; use the dynamic
@@ -67,6 +70,8 @@ message AdamParameters {
   float beta1 = 3;
   float beta2 = 4;
   float epsilon = 5;
+  float initial_m = 6;
+  float initial_v = 7;
   bool use_non_lazy_adam = 8;
   bool use_max_with_epsilon = 9;
 }
@@ -76,6 +81,7 @@ message AdamParameters {
 message MomentumParameters {
   float momentum = 1;
   bool use_nesterov = 2;
+  float initial_accum = 3;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -84,6 +90,8 @@ message RmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
+  float initial_ms = 4;
+  float initial_mom = 5;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -92,6 +100,9 @@ message CenteredRmsPropParameters {
   float rho = 1;
   float momentum = 2;
   float epsilon = 3;
+  float initial_ms = 4;
+  float initial_mom = 5;
+  float initial_mg = 6;
 }
 
 // Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf
@@ -108,6 +119,9 @@ message MdlAdagradLightParameters {
   float mdl_hard_limit = 10;
   bool hard_limit_min_benefit = 11;
   bool mdl_regularize = 12;
+  float initial_accumulator = 13;
+  float initial_weight = 14;
+  float initial_benefit = 15;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -115,6 +129,8 @@ message MdlAdagradLightParameters {
 message AdadeltaParameters {
   float rho = 1;
   float epsilon = 2;
+  float initial_accumulator = 3;
+  float initial_update = 4;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
@@ -122,6 +138,7 @@ message AdadeltaParameters {
 message ProximalAdagradParameters {
   float l1 = 1;
   float l2 = 2;
+  float initial_accumulator = 3;
 }
 
 message OptimizationParameters {
-- 
GitLab


From af1458a9c1a3bc8d49a1e55386950b4941ab1815 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 3 Oct 2018 10:39:07 -0700
Subject: [PATCH 0274/1085] Fix filename/line number lookup for logging.

Log messages now show the correct file/function name/line number instead of that of the helper function.

PiperOrigin-RevId: 215586852
---
 tensorflow/python/platform/tf_logging.py | 58 ++++++++++++++++++------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/platform/tf_logging.py b/tensorflow/python/platform/tf_logging.py
index 5962d2f220..59e60856ae 100644
--- a/tensorflow/python/platform/tf_logging.py
+++ b/tensorflow/python/platform/tf_logging.py
@@ -25,6 +25,7 @@ import logging as _logging
 import os as _os
 import sys as _sys
 import time as _time
+import traceback as _traceback
 from logging import DEBUG
 from logging import ERROR
 from logging import FATAL
@@ -36,13 +37,49 @@ import six
 
 from tensorflow.python.util.tf_export import tf_export
 
-
 # Don't use this directly. Use _get_logger() instead.
 _logger = None
 _logger_lock = threading.Lock()
 
 
+def _get_caller(offset=3):
+  """Returns a code and frame object for the lowest non-logging stack frame."""
+  # Use sys._getframe().  This avoids creating a traceback object.
+  # pylint: disable=protected-access
+  f = _sys._getframe(offset)
+  # pylint: enable=protected-access
+  our_file = f.f_code.co_filename
+  f = f.f_back
+  while f:
+    code = f.f_code
+    if code.co_filename != our_file:
+      return code, f
+    f = f.f_back
+  return None, None
+
+
+# The definition of `findCaller` changed in Python 3.2
+if _sys.version_info.major >= 3 and _sys.version_info.minor >= 2:
+  def _logger_find_caller(stack_info=False):  # pylint: disable=g-wrong-blank-lines
+    code, frame = _get_caller(4)
+    sinfo = None
+    if stack_info:
+      sinfo = '\n'.join(_traceback.format_stack())
+    if code:
+      return (code.co_filename, frame.f_lineno, code.co_name, sinfo)
+    else:
+      return '(unknown file)', 0, '(unknown function)', sinfo
+else:
+  def _logger_find_caller():  # pylint: disable=g-wrong-blank-lines
+    code, frame = _get_caller(4)
+    if code:
+      return (code.co_filename, frame.f_lineno, code.co_name)
+    else:
+      return '(unknown file)', 0, '(unknown function)'
+
+
 def _get_logger():
+  """Return TF logger instance."""
   global _logger
 
   # Use double-checked locking to avoid taking lock unnecessarily.
@@ -58,6 +95,9 @@ def _get_logger():
     # Scope the TensorFlow logger to not conflict with users' loggers.
     logger = _logging.getLogger('tensorflow')
 
+    # Override findCaller on the logger to skip internal helper functions
+    logger.findCaller = _logger_find_caller
+
     # Don't further configure the TensorFlow logger if the root logger is
     # already configured. This prevents double logging in those cases.
     if not _logging.getLogger().handlers:
@@ -216,18 +256,10 @@ def log_if(level, msg, condition, *args):
 
 def _GetFileAndLine():
   """Returns (filename, linenumber) for the stack frame."""
-  # Use sys._getframe().  This avoids creating a traceback object.
-  # pylint: disable=protected-access
-  f = _sys._getframe()
-  # pylint: enable=protected-access
-  our_file = f.f_code.co_filename
-  f = f.f_back
-  while f:
-    code = f.f_code
-    if code.co_filename != our_file:
-      return (code.co_filename, f.f_lineno)
-    f = f.f_back
-  return ('<unknown>', 0)
+  code, f = _get_caller()
+  if not code:
+    return ('<unknown>', 0)
+  return (code.co_filename, f.f_lineno)
 
 
 def google2_log_prefix(level, timestamp=None, file_and_line=None):
-- 
GitLab


From 560624bff65b7b502da2c52f9b250d9181c4a3f7 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Wed, 3 Oct 2018 10:51:17 -0700
Subject: [PATCH 0275/1085] Internal change.

PiperOrigin-RevId: 215589009
---
 tensorflow/contrib/lite/python/interpreter.py | 17 ++++
 .../interpreter_wrapper.cc                    | 19 ++++-
 .../interpreter_wrapper/interpreter_wrapper.h |  1 +
 .../model_coverage/model_coverage_lib.py      | 81 +++++++++++++++++--
 .../model_coverage/model_coverage_lib_test.py | 38 +++++++++
 5 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py
index 5700bf7892..6300552cbe 100644
--- a/tensorflow/contrib/lite/python/interpreter.py
+++ b/tensorflow/contrib/lite/python/interpreter.py
@@ -129,6 +129,23 @@ class Interpreter(object):
 
     return details
 
+  def get_tensor_details(self):
+    """Gets tensor details for every tensor with valid tensor details.
+
+    Tensors where required information about the tensor is not found are not
+    added to the list. This includes temporary tensors without a name.
+
+    Returns:
+      A list of dictionaries containing tensor information.
+    """
+    tensor_details = []
+    for idx in range(self._interpreter.NumTensors()):
+      try:
+        tensor_details.append(self._get_tensor_details(idx))
+      except ValueError:
+        pass
+    return tensor_details
+
   def get_input_details(self):
     """Gets model input details.
 
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index 418f19a179..1e2384b6d2 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -277,13 +277,20 @@ PyObject* InterpreterWrapper::ResizeInputTensor(int i, PyObject* value) {
   Py_RETURN_NONE;
 }
 
+int InterpreterWrapper::NumTensors() const {
+  if (!interpreter_) {
+    return 0;
+  }
+  return interpreter_->tensors_size();
+}
+
 std::string InterpreterWrapper::TensorName(int i) const {
   if (!interpreter_ || i >= interpreter_->tensors_size() || i < 0) {
     return "";
   }
 
   const TfLiteTensor* tensor = interpreter_->tensor(i);
-  return tensor->name;
+  return tensor->name ? tensor->name : "";
 }
 
 PyObject* InterpreterWrapper::TensorType(int i) const {
@@ -291,6 +298,11 @@ PyObject* InterpreterWrapper::TensorType(int i) const {
   TFLITE_PY_TENSOR_BOUNDS_CHECK(i);
 
   const TfLiteTensor* tensor = interpreter_->tensor(i);
+  if (tensor->type == kTfLiteNoType) {
+    PyErr_Format(PyExc_ValueError, "Tensor with no type found.");
+    return nullptr;
+  }
+
   int code = TfLiteTypeToPyArrayType(tensor->type);
   if (code == -1) {
     PyErr_Format(PyExc_ValueError, "Invalid tflite type code %d", code);
@@ -302,7 +314,12 @@ PyObject* InterpreterWrapper::TensorType(int i) const {
 PyObject* InterpreterWrapper::TensorSize(int i) const {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
   TFLITE_PY_TENSOR_BOUNDS_CHECK(i);
+
   const TfLiteTensor* tensor = interpreter_->tensor(i);
+  if (tensor->dims == nullptr) {
+    PyErr_Format(PyExc_ValueError, "Tensor with no shape found.");
+    return nullptr;
+  }
   PyObject* np_array =
       PyArrayFromIntVector(tensor->dims->data, tensor->dims->size);
 
diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
index f5ca81e62a..b98046fe8a 100644
--- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -59,6 +59,7 @@ class InterpreterWrapper {
   PyObject* OutputIndices() const;
   PyObject* ResizeInputTensor(int i, PyObject* value);
 
+  int NumTensors() const;
   std::string TensorName(int i) const;
   PyObject* TensorType(int i) const;
   PyObject* TensorSize(int i) const;
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
index 5ca57d083d..72029ed03c 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -35,9 +35,9 @@ def _convert(converter, **kwargs):
   """Converts the model.
 
   Args:
-    converter: TocoConverter object.
+    converter: TFLiteConverter object.
     **kwargs: Additional arguments to be passed into the converter. Supported
-      flags are {"converter_mode", "post_training_quant"}.
+      flags are {"converter_mode", "post_training_quantize"}.
 
   Returns:
     The converted TFLite model in serialized format.
@@ -174,7 +174,7 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
     tflite_model: Serialized TensorFlow Lite model.
     tf_eval_func: Lambda function that takes in input data and outputs the
       results of the TensorFlow model ([np.ndarray data] : [np.ndarray result]).
-    tolerance: Decimal place to check accuracy to.
+    tolerance: Decimal place to check accuracy to. (default 5)
   """
   input_data = _generate_random_input_data(tflite_model)
   tf_results = tf_eval_func(input_data)
@@ -183,6 +183,71 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5):
     np.testing.assert_almost_equal(tf_result, tflite_result, tolerance)
 
 
+def test_frozen_graph_quant(filename,
+                            input_arrays,
+                            output_arrays,
+                            input_shapes=None,
+                            **kwargs):
+  """Sanity check to validate post quantize flag alters the graph.
+
+  This test does not check correctness of the converted model. It converts the
+  TensorFlow frozen graph to TFLite with and without the post_training_quantized
+  flag. It ensures some tensors have different types between the float and
+  quantized models in the case of an all TFLite model or mix-and-match model.
+  It ensures tensor types do not change in the case of an all Flex model.
+
+  Args:
+    filename: Full filepath of file containing frozen GraphDef.
+    input_arrays: List of input tensors to freeze graph with.
+    output_arrays: List of output tensors to freeze graph with.
+    input_shapes: Dict of strings representing input tensor names to list of
+      integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+      Automatically determined when input shapes is None (e.g., {"foo" : None}).
+        (default None)
+    **kwargs: Additional arguments to be passed into the converter.
+
+  Raises:
+    ValueError: post_training_quantize flag doesn't act as intended.
+  """
+  # Convert and load the float model.
+  converter = _lite.TFLiteConverter.from_frozen_graph(
+      filename, input_arrays, output_arrays, input_shapes)
+  tflite_model_float = _convert(converter, **kwargs)
+
+  interpreter_float = _lite.Interpreter(model_content=tflite_model_float)
+  interpreter_float.allocate_tensors()
+  float_tensors = interpreter_float.get_tensor_details()
+
+  # Convert and load the quantized model.
+  converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays,
+                                                      output_arrays)
+  tflite_model_quant = _convert(
+      converter, post_training_quantize=True, **kwargs)
+
+  interpreter_quant = _lite.Interpreter(model_content=tflite_model_quant)
+  interpreter_quant.allocate_tensors()
+  quant_tensors = interpreter_quant.get_tensor_details()
+  quant_tensors_map = {
+      tensor_detail["name"]: tensor_detail for tensor_detail in quant_tensors
+  }
+
+  # Check if weights are of different types in the float and quantized models.
+  num_tensors_float = len(float_tensors)
+  num_tensors_same_dtypes = sum(
+      float_tensor["dtype"] == quant_tensors_map[float_tensor["name"]]["dtype"]
+      for float_tensor in float_tensors)
+  has_quant_tensor = num_tensors_float != num_tensors_same_dtypes
+
+  if ("converter_mode" in kwargs and
+      kwargs["converter_mode"] == _lite.ConverterMode.TOCO_FLEX_ALL):
+    if has_quant_tensor:
+      raise ValueError("--post_training_quantize flag unexpectedly altered the "
+                       "full Flex mode graph.")
+  elif not has_quant_tensor:
+    raise ValueError("--post_training_quantize flag was unable to quantize the "
+                     "graph as expected in TFLite and mix-and-match mode.")
+
+
 def test_frozen_graph(filename,
                       input_arrays,
                       output_arrays,
@@ -203,8 +268,8 @@ def test_frozen_graph(filename,
         (default None)
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays,
-                                                    output_arrays, input_shapes)
+  converter = _lite.TFLiteConverter.from_frozen_graph(
+      filename, input_arrays, output_arrays, input_shapes)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays)
@@ -224,8 +289,8 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
     signature_key: Key identifying SignatureDef containing inputs and outputs.
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_saved_model(directory, tag_set,
-                                                   signature_key)
+  converter = _lite.TFLiteConverter.from_saved_model(directory, tag_set,
+                                                     signature_key)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_saved_model(directory, tag_set, signature_key)
@@ -242,7 +307,7 @@ def test_keras_model(filename, **kwargs):
     filename: Full filepath of HDF5 file containing the tf.keras model.
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TocoConverter.from_keras_model_file(filename)
+  converter = _lite.TFLiteConverter.from_keras_model_file(filename)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_keras_model(filename)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
index 1498f86c6f..e07202b1a6 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 import tempfile
+import numpy as np
 
 from tensorflow.contrib.lite.python import lite
 from tensorflow.contrib.lite.testing.model_coverage import model_coverage_lib as model_coverage
@@ -66,6 +67,43 @@ class EvaluateFrozenGraph(test.TestCase):
     model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
                                      ['add', 'Mean'])
 
+  def _getQuantizedModel(self):
+    np.random.seed(0)
+    with session.Session().as_default() as sess:
+      # The tensor needs to have more than 1024 elements for quantize_weights to
+      # kick in. Thus, the [33, 33] shape.
+      in_tensor_1 = array_ops.placeholder(
+          shape=[33, 33], dtype=dtypes.float32, name='inputA')
+      in_tensor_2 = constant_op.constant(
+          np.random.uniform(low=-10., high=10., size=(33, 33)),
+          shape=[33, 33],
+          dtype=dtypes.float32,
+          name='inputB')
+      _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output')
+
+    filename = self._saveFrozenGraph(sess)
+    return filename
+
+  def testQuantized(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(filename, ['inputA', 'inputB'],
+                                           ['output'])
+
+  def testQuantizedInputShapes(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(
+        filename, ['inputA', 'inputB'], ['output'],
+        input_shapes={
+            'inputA': [33, 33],
+            'inputB': [33, 33],
+        })
+
+  def testQuantizedFlexAll(self):
+    filename = self._getQuantizedModel()
+    model_coverage.test_frozen_graph_quant(
+        filename, ['inputA', 'inputB'], ['output'],
+        converter_mode=lite.ConverterMode.TOCO_FLEX_ALL)
+
 
 class EvaluateSavedModel(test.TestCase):
 
-- 
GitLab


From 0796d711f17c8c981d19461c9edd0e16837c8ab7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 10:51:56 -0700
Subject: [PATCH 0276/1085] Update _check_shape to accept six.integer_types
 instead of int

Currently _check_shape requires that a shape be an `int` or sequence of `int`s.  This CL allows `six.integer_type`s so now (1L,) would be a valid shape.

PiperOrigin-RevId: 215589131
---
 tensorflow/python/feature_column/feature_column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 618e70f3a5..5352796174 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2829,7 +2829,7 @@ def _check_shape(shape, key):
     shape = [shape]
   shape = tuple(shape)
   for dimension in shape:
-    if not isinstance(dimension, int):
+    if not isinstance(dimension, six.integer_types):
       raise TypeError('shape dimensions must be integer. '
                       'shape: {}, key: {}'.format(shape, key))
     if dimension < 1:
-- 
GitLab


From b25ef3877da28b7ec31d0bd69a7a6268f5e8a4b4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 3 Oct 2018 10:58:53 -0700
Subject: [PATCH 0277/1085] Add a new GetRunFilesDir function to Env.

PiperOrigin-RevId: 215590440
---
 tensorflow/core/platform/env.h          |  6 ++++++
 tensorflow/core/platform/posix/env.cc   | 11 +++++++++++
 tensorflow/core/platform/windows/env.cc | 11 +++++++++++
 3 files changed, 28 insertions(+)

diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 5b237c4736..5732271f15 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -228,6 +228,10 @@ class Env {
   /// |suffix|. Returns true if success.
   bool CreateUniqueFileName(string* prefix, const string& suffix);
 
+  /// \brief Return the runfiles directory if running under bazel. Returns
+  /// the directory the executable is located in if not running under bazel.
+  virtual string GetRunfilesDir() = 0;
+
   // TODO(jeff,sanjay): Add back thread/thread-pool support if needed.
   // TODO(jeff,sanjay): if needed, tighten spec so relative to epoch, or
   // provide a routine to get the absolute time.
@@ -360,6 +364,8 @@ class EnvWrapper : public Env {
     return target_->FormatLibraryFileName(name, version);
   }
 
+  string GetRunfilesDir() override { return target_->GetRunfilesDir(); }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override {
     target_->GetLocalTempDirectories(list);
diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
index 418874d340..af95d8201e 100644
--- a/tensorflow/core/platform/posix/env.cc
+++ b/tensorflow/core/platform/posix/env.cc
@@ -119,6 +119,17 @@ class PosixEnv : public Env {
     return tensorflow::internal::FormatLibraryFileName(name, version);
   }
 
+  string GetRunfilesDir() override {
+    string bin_path = this->GetExecutablePath();
+    string runfiles_path = bin_path + ".runfiles/org_tensorflow";
+    Status s = this->IsDirectory(runfiles_path);
+    if (!s.ok()) {
+      return runfiles_path;
+    } else {
+      return bin_path.substr(0, bin_path.find_last_of("/\\"));
+    }
+  }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override;
 };
diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc
index 68ee3595a2..f26ccd1662 100644
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@@ -160,6 +160,17 @@ class WindowsEnv : public Env {
     return filename;
   }
 
+  string GetRunfilesDir() override {
+    string bin_path = this->GetExecutablePath();
+    string runfiles_path = bin_path + ".runfiles\\org_tensorflow";
+    Status s = this->IsDirectory(runfiles_path);
+    if (!s.ok()) {
+      return runfiles_path;
+    } else {
+      return bin_path.substr(0, bin_path.find_last_of("/\\"));
+    }
+  }
+
  private:
   void GetLocalTempDirectories(std::vector<string>* list) override;
 
-- 
GitLab


From 55ea7f89ee6aa45c5a7623ac9ba671044467e807 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 11:00:21 -0700
Subject: [PATCH 0278/1085] Supports TPUEstimatorSpec in multi_head for TRAIN
 and PREDICT modes.

PiperOrigin-RevId: 215590676
---
 .../estimator/python/estimator/multi_head.py  | 67 ++++++++++++-----
 .../python/estimator/multi_head_test.py       | 75 ++++++++++++++++---
 2 files changed, 111 insertions(+), 31 deletions(-)

diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py
index ce75899214..6e793c8302 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py
@@ -233,6 +233,22 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
       self, features, mode, logits, labels=None, optimizer=None,
       train_op_fn=None):
     """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=False)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None):
+    """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=True)
+
+  def _create_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, use_tpu=False):
+    """Returns `EstimatorSpec` or `TPUEstimatorSpec`."""
     if isinstance(logits, dict):
       logits_dict = logits
     else:
@@ -255,14 +271,15 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
       spec = self._merge_train(
           all_estimator_spec=all_estimator_spec,
           optimizer=optimizer,
-          train_op_fn=train_op_fn)
+          train_op_fn=train_op_fn,
+          use_tpu=use_tpu)
       with ops.name_scope(''):
         summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss)
       return spec
     if mode == model_fn.ModeKeys.PREDICT:
-      return self._merge_predict(all_estimator_spec)
+      return self._merge_predict(all_estimator_spec, use_tpu=use_tpu)
     if mode == model_fn.ModeKeys.EVAL:
-      return self._merge_eval(all_estimator_spec)
+      return self._merge_eval(all_estimator_spec, use_tpu=use_tpu)
     raise ValueError('mode={} unrecognized'.format(mode))
 
   def _split_logits(self, logits):
@@ -284,28 +301,28 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
         begin_idx += head.logits_dimension
     return logits_dict
 
-  def _merge_train(self, all_estimator_spec, optimizer, train_op_fn):
-    """Merges list of `EstimatorSpec` for training.
+  def _merge_train(
+      self, all_estimator_spec, optimizer, train_op_fn, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for training.
 
     Args:
-      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
       optimizer: `Optimizer` instance to create train op. See
         `create_estimator_spec` documentation for more details.
       train_op_fn: Function to create train op. Used if `optimizer` is `None`.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
 
     Returns:
-      `EstimatorSpec` that merges all heads for TRAIN.
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for TRAIN.
 
     Raises:
       ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
         mode.
     """
     losses = []
-    metrics = {}
     for spec in all_estimator_spec:
       losses.append(spec.loss)
-      # Metric keys already contain head.name.
-      metrics.update(spec.eval_metric_ops or {})
     loss = _merge_losses(losses, self._head_weights)
     if optimizer is not None:
       if train_op_fn is not None:
@@ -317,20 +334,23 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
     else:
       raise ValueError('train_op_fn and optimizer cannot both be None.')
 
-    return model_fn.EstimatorSpec(
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
         mode=model_fn.ModeKeys.TRAIN,
         loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
+        train_op=train_op)
 
-  def _merge_predict(self, all_estimator_spec):
-    """Merges list of `EstimatorSpec` for prediction.
+  def _merge_predict(self, all_estimator_spec, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for prediction.
 
     Args:
-      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
 
     Returns:
-      `EstimatorSpec` that merges all heads for PREDICT.
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for PREDICT.
     """
     predictions = {}
     export_outputs = {
@@ -357,20 +377,29 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
     export_outputs[head_lib._PREDICT_SERVING_KEY] = (  # pylint:disable=protected-access
         export_output_lib.PredictOutput(merged_predict_outputs))
 
-    return model_fn.EstimatorSpec(
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
         mode=model_fn.ModeKeys.PREDICT,
         predictions=predictions,
         export_outputs=export_outputs)
 
-  def _merge_eval(self, all_estimator_spec):
+  def _merge_eval(self, all_estimator_spec, use_tpu=False):
     """Merges list of `EstimatorSpec` for eval.
 
     Args:
       all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      use_tpu: If `True`, will raise `NotImplementedError`, because TPU is not
+        yet supported for eval.
 
     Returns:
       `EstimatorSpec` that merges all heads for EVAL.
+    Raises:
+      NotImplementedError: If `use_tpu` is `True`.
     """
+    if use_tpu:
+      raise NotImplementedError(
+          'TPU evaluation is not implemented for multi_head.')
     predictions = {}
     metrics = {}
     losses = []
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
index 2b4d5f5261..a602f87b4a 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
@@ -106,7 +106,7 @@ class MultiHeadTest(test.TestCase):
     multi_head = multi_head_lib.multi_head([head1, head2])
     self.assertEqual('head1_head2', multi_head.name)
 
-  def test_predict_two_heads_logits_dict(self):
+  def _test_predict_two_heads_logits_dict(self, use_tpu):
     """Tests predict with logits as dict."""
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     head2 = head_lib.multi_label_head(n_classes=3, name='head2')
@@ -121,10 +121,16 @@ class MultiHeadTest(test.TestCase):
         'head2': _sigmoid(logits['head2']),
     }
 
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits)
 
     self.assertItemsEqual(
         (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
@@ -175,6 +181,12 @@ class MultiHeadTest(test.TestCase):
           sess.run(
               spec.export_outputs['head2/predict'].outputs['probabilities']))
 
+  def test_predict_two_heads_logits_dict(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=False)
+
+  def test_predict_two_heads_logits_dict_tpu(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=True)
+
   def test_predict_two_heads_logits_tensor(self):
     """Tests predict with logits as Tensor."""
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
@@ -350,6 +362,31 @@ class MultiHeadTest(test.TestCase):
           rtol=tol,
           atol=tol)
 
+  def test_eval_tpu(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        r'TPU evaluation is not implemented for multi_head\.'):
+      multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits,
+          labels=labels)
+
   def test_train_create_loss_one_head(self):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     multi_head = multi_head_lib.multi_head([head1])
@@ -587,7 +624,7 @@ class MultiHeadTest(test.TestCase):
           six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
           train_result)
 
-  def test_train_two_heads_with_weights(self):
+  def _test_train_two_heads_with_weights(self, use_tpu):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
     head2 = head_lib.multi_label_head(n_classes=3, name='head2')
     multi_head = multi_head_lib.multi_head(
@@ -619,12 +656,20 @@ class MultiHeadTest(test.TestCase):
           [constant_op.constant(expected_train_result),
            string_ops.as_string(loss, precision=3)])
 
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn)
 
     self.assertIsNotNone(spec.loss)
     self.assertEqual({}, spec.eval_metric_ops)
@@ -649,6 +694,12 @@ class MultiHeadTest(test.TestCase):
           metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2,
       }, summary_str, tol)
 
+  def test_train_two_heads_with_weights(self):
+    self._test_train_two_heads_with_weights(use_tpu=False)
+
+  def test_train_two_heads_with_weights_tpu(self):
+    self._test_train_two_heads_with_weights(use_tpu=True)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 51b266fba181dffb6b3f9207280cde6b7670dd90 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 11:09:44 -0700
Subject: [PATCH 0279/1085] [tf.data] Fix noisy warning.

PiperOrigin-RevId: 215592456
---
 tensorflow/python/data/ops/dataset_ops.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..3693cc88f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1830,10 +1830,11 @@ class StructuredFunctionWrapper(object):
           component = _NestedDatasetComponent(t)
           flat_classes.append(component)
           flat_shapes.append(component)
-          flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with options. These "
-                          "options will not be applied to the outer dataset.")
+          flat_types.append(component)          
+          if t.options() != Options():  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with non-default "
+                          "options. These options will not be propagated to "
+                          "the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 880dcb7a91e5ee497045614d9c5f4ab93c9ffacf Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 11:17:48 -0700
Subject: [PATCH 0280/1085] Automated rollback of commit
 51b266fba181dffb6b3f9207280cde6b7670dd90

PiperOrigin-RevId: 215593867
---
 tensorflow/python/data/ops/dataset_ops.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 3693cc88f2..46ce191f7b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1830,11 +1830,10 @@ class StructuredFunctionWrapper(object):
           component = _NestedDatasetComponent(t)
           flat_classes.append(component)
           flat_shapes.append(component)
-          flat_types.append(component)          
-          if t.options() != Options():  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with non-default "
-                          "options. These options will not be propagated to "
-                          "the outer dataset.")
+          flat_types.append(component)
+          if t.options() is not None:  # pylint: disable=protected-access
+            warnings.warn("Encountered a nested dataset with options. These "
+                          "options will not be applied to the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 47eafbaf43c763dc65a2cd3cfd9ecbd8fbbdf668 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Wed, 3 Oct 2018 11:24:41 -0700
Subject: [PATCH 0281/1085] [tf.data] Add utility to deduplicate graph node
 names (after vectorization)

PiperOrigin-RevId: 215595078
---
 tensorflow/core/graph/graph.cc                |  5 ++++
 tensorflow/core/graph/graph.h                 |  1 +
 .../core/grappler/optimizers/data/BUILD       |  2 ++
 .../grappler/optimizers/data/graph_utils.cc   | 21 ++++++++++++++
 .../grappler/optimizers/data/graph_utils.h    |  9 ++++++
 .../optimizers/data/graph_utils_test.cc       | 28 +++++++++++++++++++
 .../optimizers/data/vectorization_utils.cc    |  2 ++
 7 files changed, 68 insertions(+)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 4c0cd14ff1..7a4a0096fa 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -192,6 +192,11 @@ void Node::ClearAttr(const string& name) {
   (*props_->node_def.mutable_attr()).erase(name);
 }
 
+void Node::set_name(string name) {
+  MaybeCopyOnWrite();
+  props_->node_def.set_name(std::move(name));
+}
+
 void Node::set_requested_device(const string& device) {
   MaybeCopyOnWrite();
   props_->node_def.set_device(device);
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 72cef07072..2944951f82 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -72,6 +72,7 @@ class Node {
   int id() const { return id_; }
   int cost_id() const { return cost_id_; }
   const string& name() const;
+  void set_name(string name);
   const string& type_string() const;
 
   // def() provides the NodeDef the user supplied, but the specifics
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 5a3abbb545..755af3361e 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -129,6 +129,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core/grappler:utils",
+        "//tensorflow/core:lib_internal",
     ] + tf_protos_all(),
 )
 
@@ -138,6 +139,7 @@ tf_cc_test(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
+        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 3eaaf8fbef..b863a25dc5 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -272,6 +273,26 @@ void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
       ->MergeFrom(second.attr().at(attribute_name).list());
 }
 
+Status EnsureNodeNamesUnique(Graph* g) {
+  // Modeled after Scope::Impl::GetUniqueName
+  std::unordered_map<string, int> name_map;
+
+  for (auto node : g->op_nodes()) {
+    const string& prefix = node->name();
+    if (auto entry = gtl::FindOrNull(name_map, prefix)) {
+      string unique_name;
+      do {
+        unique_name = strings::StrCat(prefix, "_", ++(*entry));
+      } while (name_map.find(unique_name) != name_map.end());
+      name_map.insert({unique_name, 0});
+      node->set_name(std::move(unique_name));
+    } else {
+      name_map.insert({node->name(), 0});
+    }
+  }
+
+  return Status::OK();
+}
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 3af34f6904..d130fee204 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -131,6 +132,14 @@ void CopyAttribute(const string& attribute_name, const NodeDef& from,
 void ConcatAttributeList(const string& attribute_name, const NodeDef& first,
                          const NodeDef& second, NodeDef* to_node);
 
+// Checks that all nodes in the graphs have unique names, and sets their names
+// to be unique if they are not already.  This is necessary as Graph does not
+// have the provisions to deduplicate names, and name deduplication elsewhere
+// in tensorflow happens in other layers (for example, in the Scope class of the
+// C++ API). Note that the nodes in the graph are identified by their id,
+// and renaming nodes does not mutate any edges.
+Status EnsureNodeNamesUnique(Graph* g);
+
 }  // end namespace graph_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index db986542b2..4ab6d71532 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -229,6 +230,33 @@ TEST(GraphUtilsTest, GetInputNode) {
   EXPECT_EQ(GetInputNode(*node1, graph), nullptr);
 }
 
+TEST(GraphUtilsTest, EnsureNodeNamesUnique) {
+  Graph g(OpRegistry::Global());
+
+  Node *const_0, *const_1, *const_2;
+
+  // Arbitrary const
+  Tensor tensor(DT_INT32, {});
+  tensor.scalar<int32>()() = 5;
+
+  for (auto node : {&const_0, &const_1}) {
+    TF_EXPECT_OK(NodeBuilder("Const", "Const")
+                     .Attr("value", tensor)
+                     .Attr("dtype", DT_INT32)
+                     .Finalize(&g, node));
+  }
+  // Make sure generated name doesn't clash with existing name either
+  TF_EXPECT_OK(NodeBuilder("Const_1", "Const")
+                   .Attr("value", tensor)
+                   .Attr("dtype", DT_INT32)
+                   .Finalize(&g, &const_2));
+
+  TF_EXPECT_OK(EnsureNodeNamesUnique(&g));
+  EXPECT_NE(const_0->name(), const_1->name());
+  EXPECT_NE(const_1->name(), const_2->name());
+  EXPECT_NE(const_0->name(), const_2->name());
+}
+
 }  // namespace
 }  // namespace graph_utils
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index cea667f668..2d6cf562b1 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -352,6 +352,8 @@ Status Vectorization::Initialize(const FunctionDef& outer_scope,
 
 Status Vectorization::GetResult(FunctionDef** vectorized_function) {
   TF_RETURN_IF_ERROR(status_);
+  TF_RETURN_IF_ERROR(graph_utils::EnsureNodeNamesUnique(outer_scope_.get()));
+  TF_RETURN_IF_ERROR(graph_utils::EnsureNodeNamesUnique(map_defun_fn_->graph));
 
   if (!map_defun_fn_->ret_nodes.empty()) {
     FunctionDef* map_defun_fn = lib_->add_function();
-- 
GitLab


From 6c93a3e98450ad56ab91cd2d5c92765e5313c771 Mon Sep 17 00:00:00 2001
From: mdfaijul <md.faijul.amin@intel.com>
Date: Wed, 3 Oct 2018 11:53:33 -0700
Subject: [PATCH 0282/1085] Fixed style with clang-format

---
 tensorflow/core/graph/mkl_layout_pass.cc | 386 +++++++++++++----------
 1 file changed, 220 insertions(+), 166 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 0ac7682b07..a3cff18535 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -299,90 +299,124 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
-    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
-                      CopyAttrsAddN, AddNRewrite, nullptr});
-    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.avg_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
+    rinfo_.push_back(
+        {csinfo_.addn,  mkl_op_registry::GetMklOpName(csinfo_.addn),
+         CopyAttrsAddN, AddNRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.add,       mkl_op_registry::GetMklOpName(csinfo_.add),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.avg_pool, mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+         CopyAttrsPooling, AlwaysRewrite,
+         nullptr});
     rinfo_.push_back({csinfo_.avg_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
+                      CopyAttrsPooling,
+                      AlwaysRewrite,
+                      nullptr});
     // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending
     // on if context contains Conv2D.
     rinfo_.push_back({csinfo_.bias_add_grad,
                       csinfo_.mkl_conv2d_with_bias_backprop_bias,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
+                      CopyAttrsBiasAddGrad,
+                      ContextMatchRewrite,
                       &biasaddgrad_conv2dwithbias_context_});
     // BiasAddGrad gets written into BiasAddGrad depending on if context
     // contains MatMul.
-    rinfo_.push_back({csinfo_.bias_add_grad, csinfo_.matmul,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
+    rinfo_.push_back({csinfo_.bias_add_grad,       csinfo_.matmul,
+                      CopyAttrsBiasAddGrad,        ContextMatchRewrite,
                       &biasaddgrad_matmul_context_});
-    rinfo_.push_back({csinfo_.concat,
-                      mkl_op_registry::GetMklOpName(csinfo_.concat),
-                      CopyAttrsConcat, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.concatv2,
-                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
+    rinfo_.push_back(
+        {csinfo_.concat,  mkl_op_registry::GetMklOpName(csinfo_.concat),
+         CopyAttrsConcat, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.concatv2,  mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+         CopyAttrsConcatV2, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.conv2d,  mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+         CopyAttrsConv2D, AlwaysRewrite,
+         nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
+                      CopyAttrsConv2D,
+                      AlwaysRewrite,
+                      nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
+                      CopyAttrsConv2D,
+                      AlwaysRewrite,
+                      nullptr});
 
     rinfo_.push_back({csinfo_.fused_batch_norm,
                       mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm,
+                      AlwaysRewrite,
+                      nullptr});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.fused_batch_norm_grad),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
     rinfo_.push_back(
-        {csinfo_.fused_batch_norm_grad,
-         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
-         CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.identity,
-                      mkl_op_registry::GetMklOpName(csinfo_.identity),
-                      CopyAttrsIdentity, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.max_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr});
+        {csinfo_.identity,  mkl_op_registry::GetMklOpName(csinfo_.identity),
+         CopyAttrsIdentity, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back({csinfo_.lrn,  mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite,
+                      nullptr});
+    rinfo_.push_back(
+        {csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+         CopyAttrsLRN,     AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+         CopyAttrsPooling, NonDepthBatchWisePoolRewrite,
+         nullptr});
     rinfo_.push_back({csinfo_.max_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.reshape,
-                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
-                      CopyAttrsReshape, AlwaysRewrite, nullptr});
+                      CopyAttrsPooling,
+                      AlwaysRewrite,
+                      nullptr});
+    rinfo_.push_back(
+        {csinfo_.maximum,   mkl_op_registry::GetMklOpName(csinfo_.maximum),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.mul,       mkl_op_registry::GetMklOpName(csinfo_.mul),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.relu,      mkl_op_registry::GetMklOpName(csinfo_.relu),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.relu_grad, mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
+    rinfo_.push_back(
+        {csinfo_.reshape,  mkl_op_registry::GetMklOpName(csinfo_.reshape),
+         CopyAttrsReshape, AlwaysRewrite,
+         nullptr});
     rinfo_.push_back({csinfo_.squared_difference,
                       mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+                      CopyAttrsDataType,
+                      AlwaysRewrite,
+                      nullptr});
+    rinfo_.push_back(
+        {csinfo_.sub,       mkl_op_registry::GetMklOpName(csinfo_.sub),
+         CopyAttrsDataType, AlwaysRewrite,
+         nullptr});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
     wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
 
     // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add, 0,
-                      csinfo_.mkl_conv2d_with_bias});
+    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add,
+                      0,                  csinfo_.mkl_conv2d_with_bias});
 
     biasaddgrad_matmul_context_ = {csinfo_.bias_add_grad, csinfo_.matmul,
                                    IsBiasAddGradInMatMulContext};
@@ -548,14 +582,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
 
     // If Op has been specifically assigned to a non-CPU device, then No.
     if (!n->assigned_device_name().empty() &&
-       !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) {
       result = false;
       reason = "Op has been assigned a runtime device that is not CPU.";
     }
 
     // If user has specifically assigned this op to a non-CPU device, then No.
     if (!n->def().device().empty() &&
-       !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) {
       result = false;
       reason = "User has assigned a device that is not CPU.";
     }
@@ -1046,7 +1080,7 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
                                                       // device of the original
                                                       // node.
                   .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
+  CHECK_NOTNULL(*out);  // Make sure we got a valid object before using it
 
   // If number of inputs to the original node is > 0, then we add
   // control dependency between 1st input (index 0) of the original node and
@@ -1158,7 +1192,7 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
     for (const Edge* e : filter_node->out_edges()) {
       if (e->dst()->type_string() == csinfo_.mkl_conv2d &&
           e->dst_input() == kConv2DFilterInputSlotIdx
-          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+              /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
           VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
                   << " feeding multiple Conv2D nodes: "
@@ -1340,7 +1374,7 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
                                                       // device of the original
                                                       // node.
                   .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
+  CHECK_NOTNULL(*out);  // Make sure we got a valid object before using it
 
   // If number of inputs to the original node is > 0, then we add
   // control dependency between 1st input (index 0) of the original node and
@@ -2073,7 +2107,7 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
   // BiasAddGrad is not an Mkl layer, so we make an exception for it.
   if (n->type_string() != csinfo_.bias_add_grad) {
     if (!mkl_op_registry::IsMklOp(
-            mkl_op_registry::GetMklOpName(n->type_string()), T)) {
+             mkl_op_registry::GetMklOpName(n->type_string()), T)) {
       return nullptr;
     }
   }
@@ -2217,7 +2251,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
   return Status::OK();
 }
 
-#else   // INTEL_MKL_ML_ONLY
+#else  // INTEL_MKL_ML_ONLY
 
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
@@ -2493,110 +2527,125 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
-    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
-                      CopyAttrsAddN, AddNRewrite});
-    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.avg_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.addn,  mkl_op_registry::GetMklOpName(csinfo_.addn),
+         CopyAttrsAddN, AddNRewrite});
+    rinfo_.push_back(
+        {csinfo_.add,       mkl_op_registry::GetMklOpName(csinfo_.add),
+         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.avg_pool, mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+         CopyAttrsPooling, AlwaysRewrite});
     rinfo_.push_back({csinfo_.avg_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.avg_pool3d,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d),
-                      CopyAttrsPooling, AlwaysRewrite});
+                      CopyAttrsPooling,
+                      AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.avg_pool3d, mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d),
+         CopyAttrsPooling,   AlwaysRewrite});
     rinfo_.push_back({csinfo_.avg_pool3d_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d_grad),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.concat,
-                      mkl_op_registry::GetMklOpName(csinfo_.concat),
-                      CopyAttrsConcat, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.concatv2,
-                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv2d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsPooling,
+                      AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.concat,  mkl_op_registry::GetMklOpName(csinfo_.concat),
+         CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.concatv2,  mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+         CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.conv2d, mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+         CopyAttrsConv,  AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_with_bias, csinfo_.mkl_conv2d_with_bias,
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsConv,            AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsConv,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
-                      csinfo_.mkl_conv2d_grad_filter_with_bias, CopyAttrsConv,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias,
+                      CopyAttrsConv,
                       AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.conv3d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv3d),
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsConv,
+                      AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.conv3d, mkl_op_registry::GetMklOpName(csinfo_.conv3d),
+         CopyAttrsConv,  AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv3d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv3d_grad_filter),
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsConv,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv3d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv3d_grad_input),
-                      CopyAttrsConv, AlwaysRewrite});
+                      CopyAttrsConv,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.fused_batch_norm,
                       mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
+                      CopyAttrsFusedBatchNorm,
+                      AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.fused_batch_norm_grad),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite});
     rinfo_.push_back(
-        {csinfo_.fused_batch_norm_grad,
-         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
-         CopyAttrsFusedBatchNorm, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.identity,
-                      mkl_op_registry::GetMklOpName(csinfo_.identity),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
+        {csinfo_.identity,  mkl_op_registry::GetMklOpName(csinfo_.identity),
+         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn,  mkl_op_registry::GetMklOpName(csinfo_.lrn),
                       CopyAttrsLRN, LrnRewrite});
-    rinfo_.push_back({csinfo_.lrn_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-                      CopyAttrsLRN, LrnGradRewrite});
-    rinfo_.push_back({csinfo_.max_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back(
+        {csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+         CopyAttrsLRN,     LrnGradRewrite});
+    rinfo_.push_back(
+        {csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+         CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
     rinfo_.push_back({csinfo_.max_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling, MaxpoolGradRewrite});
-    rinfo_.push_back({csinfo_.max_pool3d,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool3d),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+                      CopyAttrsPooling,
+                      MaxpoolGradRewrite});
+    rinfo_.push_back(
+        {csinfo_.max_pool3d, mkl_op_registry::GetMklOpName(csinfo_.max_pool3d),
+         CopyAttrsPooling,   NonDepthBatchWisePoolRewrite});
     rinfo_.push_back({csinfo_.max_pool3d_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool3d_grad),
-                      CopyAttrsPooling, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.mul,
-                      mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite});
+                      CopyAttrsPooling,
+                      AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.maximum,   mkl_op_registry::GetMklOpName(csinfo_.maximum),
+         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.mul,       mkl_op_registry::GetMklOpName(csinfo_.mul),
+         CopyAttrsDataType, AlwaysRewrite});
 #ifdef INTEL_MKL_QUANTIZED
     rinfo_.push_back({csinfo_.quantized_avg_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_avg_pool),
-                      CopyAttrsQuantizedPooling, AlwaysRewrite});
+                      CopyAttrsQuantizedPooling,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_concatv2,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite});
+                      CopyAttrsConcatV2,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d),
-                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+                      CopyAttrsQuantizedConv2D,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_requantize),
                       CopyAttrsQuantizedConv2D, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.quantized_conv2d_with_bias,
-         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_with_bias),
-         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_with_bias,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_with_bias),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_and_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_bias_and_requantize),
                       CopyAttrsQuantizedConv2D, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.quantized_conv2d_and_relu,
-         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_and_relu),
-         CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.quantized_conv2d_and_relu,
+                      mkl_op_registry::GetMklOpName(
+                          csinfo_.quantized_conv2d_and_relu),
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_and_relu_and_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_and_relu_and_requantize),
@@ -2612,7 +2661,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
          CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_max_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_max_pool),
-                      CopyAttrsQuantizedPooling, AlwaysRewrite});
+                      CopyAttrsQuantizedPooling,
+                      AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_sum_and_relu,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_bias_sum_and_relu),
@@ -2628,15 +2678,16 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
              csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize),
          CopyAttrsQuantizedConv2D, AlwaysRewrite});
 #endif
-    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.relu_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.relu,      mkl_op_registry::GetMklOpName(csinfo_.relu),
+         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.relu_grad, mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+         CopyAttrsDataType, AlwaysRewrite});
 #ifdef INTEL_MKL_QUANTIZED
-    rinfo_.push_back({csinfo_.requantize,
-                      mkl_op_registry::GetMklOpName(csinfo_.requantize),
-                      CopyAttrsRequantize, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.requantize,  mkl_op_registry::GetMklOpName(csinfo_.requantize),
+         CopyAttrsRequantize, AlwaysRewrite});
 #endif
     /*
     rinfo_.push_back({csinfo_.tanh,
@@ -2646,34 +2697,36 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
                       mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
                       CopyAttrsDataType, AlwaysRewrite});
     */
-    rinfo_.push_back({csinfo_.reshape,
-                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
-                      CopyAttrsReshape, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.slice,
-                      mkl_op_registry::GetMklOpName(csinfo_.slice),
-                      CopyAttrsSlice, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.softmax,
-                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
-                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.reshape,  mkl_op_registry::GetMklOpName(csinfo_.reshape),
+         CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.slice,  mkl_op_registry::GetMklOpName(csinfo_.slice),
+         CopyAttrsSlice, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.softmax,   mkl_op_registry::GetMklOpName(csinfo_.softmax),
+         CopyAttrsDataType, AlwaysRewrite});
 
     rinfo_.push_back({csinfo_.squared_difference,
                       mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.sub,
-                      mkl_op_registry::GetMklOpName(csinfo_.sub),
-                      CopyAttrsDataType, AlwaysRewrite});
+                      CopyAttrsDataType,
+                      AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.sub,       mkl_op_registry::GetMklOpName(csinfo_.sub),
+         CopyAttrsDataType, AlwaysRewrite});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
     wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
-    wsinfo_.push_back
-        ({csinfo_.max_pool3d, csinfo_.max_pool3d_grad, 0, 1, 1, 3});
+    wsinfo_.push_back(
+        {csinfo_.max_pool3d, csinfo_.max_pool3d_grad, 0, 1, 1, 3});
 
     // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
+    minfo_.push_back({csinfo_.conv2d,           csinfo_.bias_add,
                       csinfo_.conv2d_with_bias, GetConv2DOrBiasAdd});
 
-    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
+    minfo_.push_back({csinfo_.conv2d_grad_filter,
+                      csinfo_.bias_add_grad,
                       csinfo_.conv2d_grad_filter_with_bias,
                       GetConv2DBackpropFilterOrBiasAddGrad});
   }
@@ -3206,8 +3259,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   // Helper function used by FixMklMetaDataEdges. Fixes the metadata edge
   // pointed by 'e_metadata' corresponding to the data edge 'e_data' in graph
   // 'g'. Returns true is fixup was done; otherwise, it returns false.
-  bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g,
-    const Edge* e_data, const Edge* e_metadata);
+  bool FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g, const Edge* e_data,
+                                  const Edge* e_metadata);
 
   // Are the input Mkl metadata edges for node 'n' in graph 'g' correctly
   // connected? If not, then fix them. This is needed because a graph may have
@@ -3338,7 +3391,7 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
                                                       // device of the original
                                                       // node.
                   .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
+  CHECK_NOTNULL(*out);  // Make sure we got a valid object before using it
 
   // If number of inputs to the original node is > 0, then we add
   // control dependency between 1st input (index 0) of the original node and
@@ -3453,7 +3506,7 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
       if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
            e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
           e->dst_input() == kConv2DFilterInputSlotIdx
-          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+              /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
           VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
                   << " feeding multiple Conv2D nodes: "
@@ -3576,7 +3629,7 @@ Status MklLayoutRewritePass::SetUpInputs(
 
   // Avoid workspace check for QuantizedConv2D and the fused
   // Ops as they don't have attribute: "T".
-  std::vector<string> quant_ops {
+  std::vector<string> quant_ops{
       "QuantizedConv2D",
       "QuantizedConv2DWithBias",
       "QuantizedConv2DAndRelu",
@@ -4561,23 +4614,24 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
 //              Post-rewrite Mkl metadata fixup pass
 ///////////////////////////////////////////////////////////////////////////////
 bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g,
-    const Edge* e_data, const Edge* e_metadata) {
+                                                      const Edge* e_data,
+                                                      const Edge* e_metadata) {
   if (g == nullptr || e_data == nullptr || e_metadata == nullptr) {
     return false;
   }
 
   Node* n_data = e_data->src();
   int n_data_op_slot = e_data->src_output();
-  int n_metadata_op_slot = GetTensorMetaDataIndex(n_data_op_slot,
-                                                  n_data->num_outputs());
+  int n_metadata_op_slot =
+      GetTensorMetaDataIndex(n_data_op_slot, n_data->num_outputs());
 
   // If the source of meta edge is a constant node (producing dummy Mkl metadata
   // tensor), then we will need to fix.
   if (IsConstant(e_metadata->src())) {
     Node* e_metadata_dst = e_metadata->dst();
     int e_metadata_in_slot = e_metadata->dst_input();
-    CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot,
-                  e_metadata_dst, e_metadata_in_slot));
+    CHECK_NOTNULL((*g)->AddEdge(n_data, n_metadata_op_slot, e_metadata_dst,
+                                e_metadata_in_slot));
 
     (*g)->RemoveEdge(e_metadata);
     return true;
@@ -4587,7 +4641,7 @@ bool MklLayoutRewritePass::FixMklMetaDataEdgeIfNeeded(std::unique_ptr<Graph>* g,
 }
 
 bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr<Graph>* g,
-    Node* n) {
+                                               Node* n) {
   bool result = false;
 
   // If graph node is not Mkl node, then return.
@@ -4628,8 +4682,8 @@ bool MklLayoutRewritePass::FixMklMetaDataEdges(std::unique_ptr<Graph>* g,
       // Let's get edge that carries Mkl metadata corresponding to Mkl data edge
       // 'e'. For that, let's first get the input slot of 'n' where the meta
       // edge will feed the value.
-      int e_meta_in_slot = GetTensorMetaDataIndex(e->dst_input(),
-                                                  n->num_inputs());
+      int e_meta_in_slot =
+          GetTensorMetaDataIndex(e->dst_input(), n->num_inputs());
       const Edge* e_meta = nullptr;
       TF_CHECK_OK(n->input_edge(e_meta_in_slot, &e_meta));
 
-- 
GitLab


From d6cbdf31bb69a799e166cc6c43af0f5570d94fb1 Mon Sep 17 00:00:00 2001
From: mdfaijul <md.faijul.amin@intel.com>
Date: Wed, 3 Oct 2018 11:59:35 -0700
Subject: [PATCH 0283/1085] Removed
 tensorflow/tools/quantization/quantize_graph.py

---
 .../tools/quantization/quantize_graph.py      | 1625 -----------------
 1 file changed, 1625 deletions(-)
 delete mode 100644 tensorflow/tools/quantization/quantize_graph.py

diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/tools/quantization/quantize_graph.py
deleted file mode 100644
index 14b572c15f..0000000000
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ /dev/null
@@ -1,1625 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Transforms a float-trained graph into an equivalent quantized version.
-
-An example of command-line usage is:
-bazel build tensorflow/tools/quantization:quantize_graph \
-&& bazel-bin/tensorflow/tools/quantization/quantize_graph \
---input=tensorflow_inception_graph.pb
---output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
---mode=eightbit --logtostderr
-
-To quantize for Intel CPU, add --intel_cpu_eightbitize=True.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import re
-import numpy as np
-
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.core.framework import graph_pb2
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.client import session
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import graph_util
-from tensorflow.python.framework import importer
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import app
-from tensorflow.python.platform import flags as flags_lib
-from tensorflow.python.platform import gfile
-from google.protobuf import text_format
-
-flags = flags_lib
-FLAGS = flags.FLAGS
-
-flags.DEFINE_boolean("print_nodes", False, """Lists all nodes in the model.""")
-flags.DEFINE_string("input", "", """TensorFlow 'GraphDef' file to load.""")
-flags.DEFINE_string("output_node_names", "",
-                    """Output node names, comma separated.""")
-flags.DEFINE_string("output", "", """File to save the output graph to.""")
-flags.DEFINE_integer("bitdepth", 8,
-                     """How many bits to quantize the graph to.""")
-flags.DEFINE_string("mode", "round",
-                    """What transformation to apply (round, quantize,"""
-                    """ eightbit, weights, or weights_rounded).""")
-flags.DEFINE_string("test_input_dims", "1,224,224,3",
-                    """The size of the input tensor to use when testing a"""
-                    """ graph loaded from a file.""")
-flags.DEFINE_boolean("strip_redundant_quantization", True,
-                     """Removes redundant dequantize/quantize pairs.""")
-flags.DEFINE_boolean("quantized_input", False,
-                     "If true, assume Placeholders are quantized with values "
-                     "covering [--quantized_input_min,--quantized_input_max]. "
-                     "Only supported when --mode=eightbit")
-flags.DEFINE_float("quantized_input_min", 0,
-                   "The minimum of the actual input range when "
-                   "--quantized_input")
-flags.DEFINE_float("quantized_input_max", 1,
-                   "The maximum of the actual input range when "
-                   "--quantized_input")
-flags.DEFINE_float(
-    "quantized_fallback_min", None,
-    "The fallback 'min' value to use for layers which lack min-max "
-    "information. Note: this should be considered a coarse tool just good "
-    "enough for experimentation purposes, since graphs quantized in this way "
-    "would be very inaccurate.")
-flags.DEFINE_float(
-    "quantized_fallback_max", None,
-    "The fallback 'max' value to use for layers which lack min-max "
-    "information. Note: this should be considered a coarse tool just good "
-    "enough for experimentation purposes, since graphs quantized in this way "
-    "would be very inaccurate.")
-flags.DEFINE_boolean("input_binary", True,
-                     """Input graph binary or text.""")
-flags.DEFINE_boolean("output_binary", True,
-                     """Output graph binary or text.""")
-flags.DEFINE_boolean(
-    "intel_cpu_eightbitize", False,
-    "If true eightbitized graph will include fused quantized"
-    "nodes in the output_graph for Intel CPU.")
-
-def print_input_nodes(current_node, nodes_map, indent, already_visited):
-  print(" " * indent + current_node.op + ":" + current_node.name)
-  already_visited[current_node.name] = True
-  for input_node_name in current_node.input:
-    if input_node_name in already_visited:
-      continue
-    input_node = nodes_map[input_node_name]
-    print_input_nodes(input_node, nodes_map, indent + 1, already_visited)
-
-
-def create_node(op, name, inputs):
-  new_node = node_def_pb2.NodeDef()
-  new_node.op = op
-  new_node.name = name
-  for input_name in inputs:
-    new_node.input.extend([input_name])
-  return new_node
-
-
-def create_constant_node(name, value, dtype, shape=None):
-  node = create_node("Const", name, [])
-  set_attr_dtype(node, "dtype", dtype)
-  set_attr_tensor(node, "value", value, dtype, shape)
-  return node
-
-
-def copy_attr(node, key, attr_value):
-  try:
-    node.attr[key].CopyFrom(attr_value)
-  except KeyError:
-    pass
-
-
-def set_attr_dtype(node, key, value):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(type=value.as_datatype_enum))
-  except KeyError:
-    pass
-
-
-def set_attr_shape(node, key, value):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(shape=tensor_shape.as_shape(value).as_proto()))
-  except KeyError:
-    pass
-
-
-def set_attr_tensor(node, key, value, dtype, shape=None):
-  try:
-    node.attr[key].CopyFrom(
-        attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(
-            value, dtype=dtype, shape=shape)))
-  except KeyError:
-    pass
-
-
-def set_attr_string(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(s=value))
-  except KeyError:
-    pass
-
-
-def set_attr_int_list(node, key, value):
-  list_value = attr_value_pb2.AttrValue.ListValue(i=value)
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(list=list_value))
-  except KeyError:
-    pass
-
-
-def set_attr_bool(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(b=value))
-  except KeyError:
-    pass
-
-
-def set_attr_int(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(i=value))
-  except KeyError:
-    pass
-
-
-def set_attr_float(node, key, value):
-  try:
-    node.attr[key].CopyFrom(attr_value_pb2.AttrValue(f=value))
-  except KeyError:
-    pass
-
-
-def node_name_from_input(node_name):
-  """Strips off ports and other decorations to get the underlying node name."""
-  if node_name.startswith("^"):
-    node_name = node_name[1:]
-  m = re.search(r"(.*):\d+$", node_name)
-  if m:
-    node_name = m.group(1)
-  return node_name
-
-
-def ensure_tensor_name_has_port(node_name):
-  """Makes sure that a tensor name has :0 if no explicit port exists."""
-  m = re.search(r"(.*):\d+$", node_name)
-  if m:
-    name_with_port = node_name
-  else:
-    name_with_port = node_name + ":0"
-  return name_with_port
-
-
-def unique_node_name_from_input(node_name):
-  """Replaces invalid characters in input names to get a unique node name."""
-  return node_name.replace(":", "__port__").replace("^", "__hat__")
-
-
-def quantize_array(arr, num_buckets):
-  """Quantizes a numpy array.
-
-  This function maps each scalar in arr to the center of one of num_buckets
-  buckets. For instance,
-  quantize_array([0, 0.3, 0.6, 1], 2) => [0.25, 0.25, 0.75, 0.75]
-
-  Args:
-    arr: The numpy array to quantize.
-    num_buckets: The number of buckets to map "var" to.
-  Returns:
-    The quantized numpy array.
-  Raises:
-    ValueError: when num_buckets < 1.
-  """
-  if num_buckets < 1:
-    raise ValueError("num_buckets must be >= 1")
-  arr_max = arr.max()
-  arr_min = arr.min()
-  if arr_max == arr_min:
-    return arr
-  bucket_width = (arr_max - arr_min) / num_buckets
-  # Map scalars to bucket indices. Take special care of max(arr).
-  bucket_indices = np.floor((arr - arr_min) / bucket_width)
-  bucket_indices[bucket_indices == num_buckets] = num_buckets - 1
-  # Map each scalar to the center of a bucket.
-  arr = arr_min + bucket_width * (bucket_indices + 0.5)
-  return arr
-
-
-def quantize_weight_rounded(input_node):
-  """Returns a replacement node for input_node containing bucketed floats."""
-  input_tensor = input_node.attr["value"].tensor
-  tensor_value = tensor_util.MakeNdarray(input_tensor)
-  shape = input_tensor.tensor_shape
-  # Currently, the parameter FLAGS.bitdepth is used to compute the
-  # number of buckets as 1 << FLAGS.bitdepth, meaning the number of
-  # buckets can only be a power of 2.
-  # This could be fixed by introducing a new parameter, num_buckets,
-  # which would allow for more flexibility in chosing the right model
-  # size/accuracy tradeoff. But I didn't want to add more parameters
-  # to this script than absolutely necessary.
-  num_buckets = 1 << FLAGS.bitdepth
-  tensor_value_rounded = quantize_array(tensor_value, num_buckets)
-  tensor_shape_list = tensor_util.TensorShapeProtoToList(shape)
-  return [
-      create_constant_node(
-          input_node.name,
-          tensor_value_rounded,
-          dtypes.float32,
-          shape=tensor_shape_list)
-  ]
-
-
-def quantize_weight_eightbit(input_node, quantization_mode):
-  """Returns replacement nodes for input_node using the Dequantize op."""
-  base_name = input_node.name + "_"
-  quint8_const_name = base_name + "quint8_const"
-  min_name = base_name + "min"
-  max_name = base_name + "max"
-  float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
-  min_value = np.min(float_tensor.flatten())
-  max_value = np.max(float_tensor.flatten())
-  # Make sure that the range includes zero.
-  if min_value > 0.0:
-    min_value = 0.0
-  # min_value == max_value is a tricky case. It can occur for general
-  # tensors, and of course for scalars. The quantized ops cannot deal
-  # with this case, so we set max_value to something else.
-  # It's a tricky question what is the numerically best solution to
-  # deal with this degeneracy.
-  # TODO(petewarden): Better use a tolerance than a hard comparison?
-  if min_value == max_value:
-    if abs(min_value) < 0.000001:
-      max_value = min_value + 1.0
-    elif min_value > 0:
-      max_value = 2 * min_value
-    else:
-      max_value = min_value / 2.0
-
-  sess = session.Session()
-  with sess.as_default():
-    quantize_op = array_ops.quantize_v2(
-        float_tensor,
-        min_value,
-        max_value,
-        dtypes.quint8,
-        mode=quantization_mode)
-    quint8_tensor = quantize_op[0].eval()
-    min_value = quantize_op[1].eval()
-    max_value = quantize_op[2].eval()
-  shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"]
-                                             .tensor.tensor_shape)
-  quint8_const_node = create_constant_node(
-      quint8_const_name, quint8_tensor, dtypes.quint8, shape=shape)
-  min_node = create_constant_node(min_name, min_value, dtypes.float32)
-  max_node = create_constant_node(max_name, max_value, dtypes.float32)
-  dequantize_node = create_node("Dequantize", input_node.name,
-                                [quint8_const_name, min_name, max_name])
-  set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-  set_attr_string(dequantize_node, "mode", quantization_mode)
-  return [quint8_const_node, min_node, max_node, dequantize_node]
-
-# TODO(intel-tf): Current Intel-CPU quantized Conv2D and Matmul supports only
-# signed scaled mode of weight quantization.
-def intel_cpu_quantize_weight_eightbit(input_node, quantization_mode="SCALED"):
-  """Returns replacement of constant weight node.
-
-  This function creates (i) a quantized constant node, (ii) a float min node
-  (iii) a float max node, and (iv) a dequantize node."""
-  base_name = input_node.name + "_"
-  qint8_const_name = base_name + "qint8_const"
-  min_name = base_name + "min"
-  max_name = base_name + "max"
-  float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
-  min_value = np.min(float_tensor.flatten())
-  max_value = np.max(float_tensor.flatten())
-  # Same processing of min-max as in quantize_weight_eightbit function.
-  if min_value > 0.0:
-    min_value = 0.0
-  if min_value == max_value:
-    if abs(min_value) < 0.000001:
-      max_value = min_value + 1.0
-    elif min_value > 0:
-      max_value = 2 * min_value
-    else:
-      max_value = min_value / 2.0
-
-  sess = session.Session()
-  with sess.as_default():
-    quantize_op = array_ops.quantize_v2(
-        float_tensor,
-        min_value,
-        max_value,
-        dtypes.qint8,
-        mode=quantization_mode,
-        round_mode="HALF_TO_EVEN")
-    qint8_tensor = quantize_op[0].eval()
-    # Updated min-max values should be passed to the next feeding node.
-    min_value = quantize_op[1].eval()
-    max_value = quantize_op[2].eval()
-  shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"]
-                                             .tensor.tensor_shape)
-  qint8_const_node = create_constant_node(
-      qint8_const_name, qint8_tensor,
-      dtypes.qint8,
-      shape=shape)
-  min_node = create_constant_node(min_name, min_value, dtypes.float32)
-  max_node = create_constant_node(max_name, max_value, dtypes.float32)
-  dequantize_node = create_node("Dequantize", input_node.name,
-                                [qint8_const_name, min_name, max_name])
-  set_attr_dtype(dequantize_node, "T", dtypes.qint8)
-  set_attr_string(dequantize_node, "mode", b'SCALED')
-  return [qint8_const_node, min_node, max_node, dequantize_node]
-
-EightbitizeRecursionState = collections.namedtuple(
-    "EightbitizeRecursionState",
-    ["already_visited", "output_node_stack", "merged_with_fake_quant"])
-
-
-class GraphRewriter(object):
-  """Takes a float graph, and rewrites it in quantized form."""
-
-  def __init__(self,
-               input_graph,
-               mode,
-               quantized_input_range,
-               fallback_quantization_range=None,
-               intel_cpu_eightbitize=False):
-    """Sets up the class to rewrite a float graph.
-
-    Args:
-      input_graph: A float graph to transform.
-      mode: A string controlling how quantization is performed -
-        round, quantize, eightbit, or weights.
-      quantized_input_range: if set, assume the input is
-        quantized and represents the range
-        [quantized_input_range[0], quantized_input_range[1]]
-      fallback_quantization_range: if set, then for nodes where the quantization
-        range can't be inferred from the graph, use the range
-        [fallback_quantization_range[0], fallback_quantization_range[1]) instead
-        of using a RequantizationRange node in the graph.
-
-    Raises:
-      ValueError: Two nodes with the same name were found in the graph.
-    """
-    self.input_graph = input_graph
-    self.nodes_map = self.create_nodes_map(input_graph)
-    self.output_graph = None
-    self.mode = mode
-    self.intel_cpu_eightbitize = intel_cpu_eightbitize
-    self.final_node_renames = {}
-    if quantized_input_range:
-      self.input_range = (quantized_input_range[0], quantized_input_range[1])
-      if self.input_range[0] >= self.input_range[1]:
-        raise ValueError("Invalid quantized_input_range: [%s,%s]" %
-                         self.input_range)
-      if self.mode != "eightbit":
-        raise ValueError(
-            "quantized_input_range can only be specified in eightbit mode")
-    else:
-      self.input_range = None
-
-    if fallback_quantization_range:
-      self.fallback_quantization_range = [
-          fallback_quantization_range[0], fallback_quantization_range[1]
-      ]
-      if (self.fallback_quantization_range[0] >=
-          self.fallback_quantization_range[1]):
-        raise ValueError("Invalid fallback_quantization_range: [%s,%s]" %
-                         self.fallback_quantization_range)
-      if self.mode != "eightbit":
-        raise ValueError("fallback_quantization_range can only be "
-                         "specified in eightbit mode")
-    else:
-      self.fallback_quantization_range = None
-
-    # Data that is valid only during the recursive call to rewrite the graph.
-    self.state = None
-
-  def create_nodes_map(self, graph):
-    """Builds a mapping of node names to their defs from the graph."""
-    nodes_map = {}
-    for node in graph.node:
-      if node.name not in nodes_map.keys():
-        nodes_map[node.name] = node
-      else:
-        raise ValueError("Duplicate node names detected.")
-    return nodes_map
-
-  def rewrite(self, output_node_names):
-    """Triggers rewriting of the float graph.
-
-    Args:
-      output_node_names: A list of names of the nodes that produce the final
-        results.
-
-    Returns:
-      A quantized version of the float graph.
-    """
-    self.output_graph = graph_pb2.GraphDef()
-    output_nodes = [
-        self.nodes_map[output_node_name]
-        for output_node_name in output_node_names
-    ]
-    if self.mode == "round":
-      self.already_visited = {}
-      for output_node in output_nodes:
-        self.round_nodes_recursively(output_node)
-    elif self.mode == "quantize":
-      self.already_visited = {}
-      self.already_quantized = {}
-      for output_node in output_nodes:
-        self.quantize_nodes_recursively(output_node)
-    elif self.mode == "eightbit":
-      self.set_input_graph(graph_util.remove_training_nodes(
-          self.input_graph, protected_nodes=output_node_names))
-      output_nodes = [
-          self.nodes_map[output_node_name]
-          for output_node_name in output_node_names
-      ]
-
-      self.state = EightbitizeRecursionState(
-          already_visited={}, output_node_stack=[], merged_with_fake_quant={})
-
-      if self.intel_cpu_eightbitize:
-        # TODO(intel-tf): Enables fused quantized node for intel cpu.
-        for output_node in output_nodes:
-          # Intiailize output_node_stack with output node.
-          # Each element in the stack is a mutable list containing
-          # [parent_node, index_to_parent, quantization_flag, fusion_flag].
-          # In case of root node, make self as parent.
-          self.state.output_node_stack.append(
-              [output_node, None, False, False])
-          self.intel_cpu_eightbitize_nodes_recursively(output_node)
-          self.state.output_node_stack.pop()
-      else:
-        for output_node in output_nodes:
-          self.eightbitize_nodes_recursively(output_node)
-
-      self.state = None
-      if self.input_range:
-        self.add_output_graph_node(
-            create_constant_node("quantized_input_min_value", self.input_range[
-                0], dtypes.float32, []))
-        self.add_output_graph_node(
-            create_constant_node("quantized_input_max_value", self.input_range[
-                1], dtypes.float32, []))
-      if self.fallback_quantization_range:
-        self.add_output_graph_node(
-            create_constant_node("fallback_quantization_min_value",
-                                 self.fallback_quantization_range[0],
-                                 dtypes.float32, []))
-        self.add_output_graph_node(
-            create_constant_node("fallback_quantization_max_value",
-                                 self.fallback_quantization_range[1],
-                                 dtypes.float32, []))
-      if FLAGS.strip_redundant_quantization:
-        self.output_graph = self.remove_redundant_quantization(
-            self.output_graph)
-        self.remove_dead_nodes(output_node_names)
-      self.apply_final_node_renames()
-    elif self.mode == "weights":
-      self.output_graph = self.quantize_weights(self.input_graph,
-                                                b"MIN_COMBINED")
-      self.remove_dead_nodes(output_node_names)
-    elif self.mode == "weights_rounded":
-      self.output_graph = self.quantize_weights(self.input_graph, self.mode)
-      self.remove_dead_nodes(output_node_names)
-    else:
-      print("Bad mode - " + self.mode + ".")
-    return self.output_graph
-
-  def round_nodes_recursively(self, current_node):
-    """The entry point for simple rounding quantization."""
-    if (current_node.name in self.already_visited
-       ) and self.already_visited[current_node.name]:
-      return
-    self.already_visited[current_node.name] = True
-    for input_node_name in current_node.input:
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.round_nodes_recursively(input_node)
-    nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
-    if any(current_node.op in s for s in nodes_to_quantize):
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      new_node.name = current_node.name + "_original"
-      self.add_output_graph_node(new_node)
-      levels = 1 << FLAGS.bitdepth
-      constant_name = current_node.name + "_round_depth"
-      constant_tensor = constant_op.constant(
-          levels, dtype=dtypes.int32, name=constant_name)
-      constant_node = constant_tensor.op.node_def
-      self.add_output_graph_node(constant_node)
-      quantize_node = node_def_pb2.NodeDef()
-      quantize_node.op = "RoundToSteps"
-      quantize_node.name = current_node.name
-      quantize_node.input.extend([current_node.name + "_original"])
-      quantize_node.input.extend([constant_node.name])
-      self.add_output_graph_node(quantize_node)
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-  def quantize_nodes_recursively(self, current_node):
-    """The entry point for quantizing nodes to eight bit and back."""
-    if self.already_visited[current_node.name]:
-      return
-    self.already_visited[current_node.name] = True
-    for input_node_name in current_node.input:
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.quantize_nodes_recursively(input_node)
-    nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
-    if any(current_node.op in s for s in nodes_to_quantize):
-      for input_name in current_node.input:
-        input_name = node_name_from_input(input_name)
-        input_node = self.nodes_map[input_name]
-        self.quantize_node(input_node)
-      self.quantize_node(current_node)
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-  def quantize_node(self, input_node):
-    """Handles quantizing a single node."""
-    input_name = input_node.name
-    if input_name in self.already_quantized:
-      return
-    self.already_quantized[input_name] = True
-    original_input_name = input_name + "_original"
-    reshape_name = input_name + "_reshape"
-    reshape_dims_name = input_name + "_reshape_dims"
-    max_name = input_name + "_max"
-    min_name = input_name + "_min"
-    dims_name = input_name + "_dims"
-    quantize_name = input_name + "_quantize"
-    dequantize_name = input_name
-    original_input_node = node_def_pb2.NodeDef()
-    original_input_node.CopyFrom(input_node)
-    original_input_node.name = original_input_name
-    self.add_output_graph_node(original_input_node)
-    reshape_dims_node = create_constant_node(reshape_dims_name, -1,
-                                             dtypes.int32, [1])
-    self.add_output_graph_node(reshape_dims_node)
-    reshape_node = create_node("Reshape", reshape_name,
-                               [original_input_name, reshape_dims_name])
-    set_attr_dtype(reshape_node, "T", dtypes.float32)
-    self.add_output_graph_node(reshape_node)
-    dims_node = create_constant_node(dims_name, 0, dtypes.int32, [1])
-    self.add_output_graph_node(dims_node)
-    max_node = create_node("Max", max_name, [reshape_name, dims_name])
-    set_attr_dtype(max_node, "T", dtypes.float32)
-    set_attr_bool(max_node, "keep_dims", False)
-    self.add_output_graph_node(max_node)
-    min_node = create_node("Min", min_name, [reshape_name, dims_name])
-    set_attr_dtype(min_node, "T", dtypes.float32)
-    set_attr_bool(min_node, "keep_dims", False)
-    self.add_output_graph_node(min_node)
-    quantize_node = create_node("Quantize", quantize_name,
-                                [original_input_name, min_name, max_name])
-    set_attr_dtype(quantize_node, "T", dtypes.quint8)
-    set_attr_string(quantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(quantize_node)
-    dequantize_node = create_node("Dequantize", dequantize_name,
-                                  [quantize_name, min_name, max_name])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-  def should_merge_with_fake_quant_node(self):
-    """Should the current node merge with self.state.output_node_stack[-1]?"""
-    if not self.state.output_node_stack:
-      return False
-    top = self.state.output_node_stack[-1]
-    return top[1] == 0 and top[0].op in ["FakeQuantWithMinMaxVars"]
-
-  def should_quantize_const(self, node):
-    if not self.state.output_node_stack:
-      return False
-    top = self.state.output_node_stack[-1]
-    if not top[2]:
-      return False
-    dtype = dtypes.as_dtype(node.attr["dtype"].type)
-    assert dtype == dtypes.float32, (
-        "Failed to quantized constant %s of type %s" % (node.name, dtype))
-    return True
-
-  def eightbitize_nodes_recursively(self, current_node):
-    """The entry point for transforming a graph into full eight bit."""
-    if current_node.name in self.state.already_visited:
-      if (self.should_merge_with_fake_quant_node() or
-          current_node.name in self.state.merged_with_fake_quant):
-        raise ValueError("Unsupported graph structure: output of node %s "
-                         "is processed by a FakeQuant* node and should have "
-                         "no other outputs.", current_node.name)
-      return
-    self.state.already_visited[current_node.name] = True
-
-    for i, input_node_name in enumerate(current_node.input):
-      quantize_input = False
-      if current_node.op in ("MatMul", "Conv2D", "BiasAdd", "MaxPool",
-                             "AvgPool", "Relu", "Relu6",
-                             "BatchNormWithGlobalNormalization"):
-        quantize_input = True
-      elif current_node.op == "Concat" and i > 0:
-        quantize_input = (
-            dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32)
-      elif current_node.op == "Reshape" and i == 0:
-        quantize_input = (
-            dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32)
-
-      self.state.output_node_stack.append((current_node, i, quantize_input))
-
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-      self.eightbitize_nodes_recursively(input_node)
-
-      self.state.output_node_stack.pop()
-
-    if current_node.op == "MatMul":
-      self.eightbitize_mat_mul_node(current_node)
-    elif current_node.op == "Conv2D":
-      self.eightbitize_conv_node(current_node)
-    elif current_node.op == "BiasAdd":
-      self.eightbitize_bias_add_node(current_node)
-    elif current_node.op == "MaxPool" or current_node.op == "AvgPool":
-      self.eightbitize_single_input_tensor_node(current_node,
-                                                self.add_pool_function)
-    elif current_node.op == "Relu" or current_node.op == "Relu6":
-      self.eightbitize_single_input_tensor_node(current_node,
-                                                self.add_relu_function)
-    elif (current_node.op == "Concat" and
-          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
-      self.eightbitize_concat_node(current_node)
-    elif current_node.op == "BatchNormWithGlobalNormalization":
-      self.eightbitize_batch_norm_node(current_node)
-    elif (current_node.op == "Reshape" and
-          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
-      self.eightbitize_reshape_node(current_node)
-    elif (self.input_range and
-          current_node.op in ("Placeholder", "PlaceholderV2")):
-      self.eightbitize_placeholder_node(current_node)
-    elif current_node.op == "FakeQuantWithMinMaxVars":
-      # It will have been merged into the underlying node.
-      pass
-    elif current_node.op == "Const":
-      if self.should_quantize_const(current_node):
-        for n in quantize_weight_eightbit(current_node, b"MIN_FIRST"):
-          self.add_output_graph_node(n)
-      else:
-        new_node = node_def_pb2.NodeDef()
-        new_node.CopyFrom(current_node)
-        self.add_output_graph_node(new_node)
-
-    ###################################################################
-    # Note: if more cases are added here, you may need to update the op
-    # name lists in the loop over children at the start of the function.
-    ###################################################################
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-    if (self.should_merge_with_fake_quant_node() and
-        current_node.name not in self.state.merged_with_fake_quant):
-      raise ValueError(
-          "FakeQuant* node %s failed to merge with node %s of type %s" %
-          (self.state.output_node_stack[-1][0], current_node.name,
-           current_node.op))
-
-  # TODO(intel-tf): Quantized Conv2D could be fused with few other succeeding
-  # ops. Current support is for BiasAdd and Relu. Future implementation will
-  # include:
-  # (i)   Conv2D + {BiasAdd} + Relu + Add + Relu
-  # (ii)  Conv2D + {BiasAdd} + Relu + Add
-  # (ii)  Conv2D + {BiasAdd} + Add + Relu
-  # (iii) Conv2D + {BiasAdd} + Add
-  def intel_cpu_eightbitize_conv_node(self, original_node, bias_node=None,
-                                      bias_add_name=None, add_node_name=None,
-                                      relu_node_name=None):
-    """Replaces a Conv2D node with the eight bit equivalent sub-graph."""
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-
-    if bias_node and add_node_name and relu_node_name:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(bias_node)
-      self.add_output_graph_node(new_node)
-      all_input_names = all_input_names[:2] + [bias_node.name] + \
-          all_input_names[2:] + [add_node_name]
-      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-      quantized_conv_node = create_node("QuantizedConv2DWithBiasSumAndRelu",
-                                        quantized_conv_name, all_input_names)
-    elif bias_node and (not add_node_name) and relu_node_name:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(bias_node)
-      self.add_output_graph_node(new_node)
-      all_input_names = all_input_names[:2] + [bias_node.name] + \
-          all_input_names[2:]
-      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-      quantized_conv_node = create_node("QuantizedConv2DWithBiasAndRelu",
-                                        quantized_conv_name, all_input_names)
-    elif bias_node and bias_add_name  and \
-        (not add_node_name) and (not relu_node_name):
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(bias_node)
-      self.add_output_graph_node(new_node)
-      all_input_names = all_input_names[:2] + [bias_node.name] + \
-          all_input_names[2:]
-      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-      quantized_conv_node = create_node("QuantizedConv2DWithBias",
-                                        quantized_conv_name, all_input_names)
-    else:
-      quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-      quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name,
-                                        all_input_names)
-    copy_attr(quantized_conv_node, "strides", original_node.attr["strides"])
-    copy_attr(quantized_conv_node, "padding", original_node.attr["padding"])
-    copy_attr(quantized_conv_node, "dilations", original_node.attr["dilations"])
-    set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8)
-    set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8)
-    set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32)
-    self.add_output_graph_node(quantized_conv_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_conv_name)
-    if bias_node and relu_node_name:
-      self.add_dequantize_result_node(quantize_down_name, relu_node_name)
-    elif bias_node and bias_add_name and \
-        (not add_node_name) and (not relu_node_name):
-      self.add_dequantize_result_node(quantize_down_name, bias_add_name)
-    else:
-      self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  # TODO(intel-tf): To check whether Conv2D is fed by relu directly or via
-  # pooling ops. This is required as intel cpu requires input tensor for Conv2D
-  # to be non-negative.
-  def intel_cpu_find_relu_recursively(self, current_node):
-    """Helper function to check if Conv2D is fed by Relu."""
-    if current_node.op == "Relu":
-      return True
-    else:
-      first_input_node_name = node_name_from_input(current_node.input[0])
-      input_node = self.nodes_map[first_input_node_name]
-      if input_node.op in ("ConcatV2", "MaxPool", "AvgPool", "Relu"):
-        return self.intel_cpu_find_relu_recursively(input_node)
-      else:
-        return False
-
-  # TODO(intel-tf): We leave the output graph partially quantized for
-  # intel cpu. Current quantization support is for Conv2D and its fusion.
-  # More quantized operations will be included as more implementations are
-  # completed.
-  def intel_cpu_eightbitize_nodes_recursively(self, current_node):
-    """The entry point for transforming a graph into full eight bit."""
-    if current_node.name in self.state.already_visited:
-      if (self.should_merge_with_fake_quant_node() or
-          current_node.name in self.state.merged_with_fake_quant):
-        raise ValueError("Unsupported graph structure: output of node %s "
-                         "is processed by a FakeQuant* node and should have "
-                         "no other outputs.", current_node.name)
-      return
-
-    self.state.already_visited[current_node.name] = True
-    quantize_input, should_quantize_conv, \
-        fuse_with_conv = (False, False, False)
-
-    if current_node.op == "Conv2D":
-      should_quantize_conv = self.intel_cpu_find_relu_recursively(current_node)
-
-    inputs = list(enumerate(current_node.input))
-    if current_node.op == "AddN":
-      inputs = reversed(inputs)  # pylint: disable=redefined-variable-type
-
-    for i, input_node_name in inputs:
-      input_node_name = node_name_from_input(input_node_name)
-      input_node = self.nodes_map[input_node_name]
-
-      if should_quantize_conv and i == 1 and input_node.op == "Const":
-        quantize_input = True
-
-      self.state.output_node_stack.append([current_node, i, quantize_input,
-                                           fuse_with_conv])
-      self.intel_cpu_eightbitize_nodes_recursively(input_node)
-      self.state.output_node_stack.pop()
-
-    if current_node.op == "Conv2D" and should_quantize_conv and quantize_input:
-      # match pattern for fusion with bias and relu
-      grand_parent, parent = self.state.output_node_stack[-2:]
-      if parent[0].op == "BiasAdd" and grand_parent[0].op == "Relu":
-        self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
-        self.state.output_node_stack[-3][3] = True # Relu to be fused
-        bias_node_name = node_name_from_input(parent[0].input[1])
-        bias_node = self.nodes_map[bias_node_name]
-        self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None,
-                                             None, grand_parent[0].name)
-      elif parent[0].op == "BiasAdd" and grand_parent[0].op == "AddN":
-        grand_grand_parent = self.state.output_node_stack[-3]
-        if grand_grand_parent[0].op == "Relu" \
-            and (not self.state.output_node_stack[-3][3]) \
-            and (not self.state.output_node_stack[-4][3]):
-          self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
-          self.state.output_node_stack[-3][3] = True # AddN to be fused
-          self.state.output_node_stack[-4][3] = True # Relu to be fused
-          bias_node_name = node_name_from_input(parent[0].input[1])
-          bias_node = self.nodes_map[bias_node_name]
-          add_node_name = node_name_from_input(grand_parent[0].input[0])
-          self.intel_cpu_eightbitize_conv_node(current_node, bias_node, None,
-                                               add_node_name,
-                                               grand_grand_parent[0].name)
-        elif not self.state.output_node_stack[-2][3]: # Fuse BiasAdd then
-          self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
-          bias_node_name = node_name_from_input(parent[0].input[1])
-          bias_node = self.nodes_map[bias_node_name]
-          self.intel_cpu_eightbitize_conv_node(current_node, bias_node,
-                                               parent[0].name)
-        else:
-          self.intel_cpu_eightbitize_conv_node(current_node)
-      elif parent[0].op == "BiasAdd" and \
-           (not self.state.output_node_stack[-2][3]):
-        self.state.output_node_stack[-2][3] = True # BiasAdd to be fused
-        bias_node_name = node_name_from_input(parent[0].input[1])
-        bias_node = self.nodes_map[bias_node_name]
-        self.intel_cpu_eightbitize_conv_node(current_node, bias_node,
-                                             parent[0].name)
-      else:
-        self.intel_cpu_eightbitize_conv_node(current_node)
-    elif current_node.op == "BiasAdd" and \
-         self.state.output_node_stack[-1][3]:
-      pass # This op is already processed by fused quantization
-    elif current_node.op == "Relu" and \
-         self.state.output_node_stack[-1][3]:
-      pass # This op is already processed by fused quantization
-    elif current_node.op == "AddN" and \
-         self.state.output_node_stack[-1][3]:
-      pass # AddN op is already processed by fused quatization
-    elif current_node.op == "MaxPool" or current_node.op == "AvgPool":
-      self.eightbitize_single_input_tensor_node(current_node,
-                                                self.add_pool_function)
-    elif (current_node.op == "ConcatV2" and
-          dtypes.as_dtype(current_node.attr["T"].type) == dtypes.float32):
-      self.eightbitize_concatv2_node(current_node)
-    elif current_node.op == "Const":
-      parent = self.state.output_node_stack[-1]
-      if parent[0].op == "Conv2D" and parent[2]:
-        for n in intel_cpu_quantize_weight_eightbit(current_node, b"SCALED"):
-          self.add_output_graph_node(n)
-      elif parent[0].op == "BiasAdd" and \
-           self.state.output_node_stack[-2][3]:
-        pass # This constant is already process by fused quantization
-      else:
-        new_node = node_def_pb2.NodeDef()
-        new_node.CopyFrom(current_node)
-        self.add_output_graph_node(new_node)
-    else:
-      new_node = node_def_pb2.NodeDef()
-      new_node.CopyFrom(current_node)
-      self.add_output_graph_node(new_node)
-
-    if (self.should_merge_with_fake_quant_node() and
-        current_node.name not in self.state.merged_with_fake_quant):
-      raise ValueError(
-          "FakeQuant* node %s failed to merge with node %s of type %s" %
-          (self.state.output_node_stack[-1][0], current_node.name,
-           current_node.op))
-
-  def add_eightbit_prologue_nodes(self, original_node):
-    """Adds input conversion nodes to handle quantizing the underlying node."""
-    namespace_prefix = original_node.name + "_eightbit"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    input_names = []
-    min_max_names = []
-    for original_input_name in original_node.input:
-      quantize_input_name, min_input_name, max_input_name = (
-          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                         reshape_dims_name,
-                                         reduction_dims_name))
-      input_names.append(quantize_input_name)
-      min_max_names.append(min_input_name)
-      min_max_names.append(max_input_name)
-    all_input_names = []
-    all_input_names.extend(input_names)
-    all_input_names.extend(min_max_names)
-    return all_input_names
-
-  def add_common_quantization_nodes(self, namespace_prefix):
-    """Builds constant nodes needed for quantization of inputs."""
-    reshape_dims_name = namespace_prefix + "_reshape_dims"
-    reduction_dims_name = namespace_prefix + "_reduction_dims"
-
-    reshape_dims_node = create_constant_node(reshape_dims_name, -1,
-                                             dtypes.int32, [1])
-    self.add_output_graph_node(reshape_dims_node)
-    reduction_dims_node = create_constant_node(reduction_dims_name, 0,
-                                               dtypes.int32, [1])
-    self.add_output_graph_node(reduction_dims_node)
-    return reshape_dims_name, reduction_dims_name
-
-  def eightbitize_input_to_node(self, namespace_prefix, original_input_name,
-                                reshape_dims_name, reduction_dims_name):
-    """Takes one float input to an op, and converts it to quantized form."""
-    unique_input_name = unique_node_name_from_input(original_input_name)
-    reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name
-    min_input_name = namespace_prefix + "_min_" + unique_input_name
-    max_input_name = namespace_prefix + "_max_" + unique_input_name
-    quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name
-    reshape_input_node = create_node("Reshape", reshape_input_name,
-                                     [original_input_name, reshape_dims_name])
-    set_attr_dtype(reshape_input_node, "T", dtypes.float32)
-    self.add_output_graph_node(reshape_input_node)
-    min_input_node = create_node("Min", min_input_name,
-                                 [reshape_input_name, reduction_dims_name])
-    set_attr_dtype(min_input_node, "T", dtypes.float32)
-    set_attr_bool(min_input_node, "keep_dims", False)
-    self.add_output_graph_node(min_input_node)
-    max_input_node = create_node("Max", max_input_name,
-                                 [reshape_input_name, reduction_dims_name])
-    set_attr_dtype(max_input_node, "T", dtypes.float32)
-    set_attr_bool(max_input_node, "keep_dims", False)
-    self.add_output_graph_node(max_input_node)
-    quantize_input_node = create_node(
-        "QuantizeV2", quantize_input_name,
-        [original_input_name, min_input_name, max_input_name])
-    set_attr_dtype(quantize_input_node, "T", dtypes.quint8)
-    set_attr_string(quantize_input_node, "mode",
-                    b"SCALED" if self.intel_cpu_eightbitize else  b"MIN_FIRST")
-    set_attr_string(quantize_input_node, "round_mode",
-                    b"HALF_TO_EVEN" if self.intel_cpu_eightbitize
-                    else  b"HALF_AWAY_FROM_ZERO")
-    self.add_output_graph_node(quantize_input_node)
-    min_output_name = quantize_input_name + ":1"
-    max_output_name = quantize_input_name + ":2"
-    return quantize_input_name, min_output_name, max_output_name
-
-  def add_quantize_down_nodes(self, original_node, quantized_output_name):
-    quantized_outputs = [
-        quantized_output_name, quantized_output_name + ":1",
-        quantized_output_name + ":2"
-    ]
-    min_max_inputs = None
-    if self.should_merge_with_fake_quant_node():
-      # Use the inputs to the FakeQuantWithMinMaxVars node as the inputs to
-      # Requantize.
-      fake_quant_node = self.state.output_node_stack[-1][0]
-      min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]]
-      assert original_node.name not in self.state.merged_with_fake_quant
-      self.state.merged_with_fake_quant[original_node.name] = True
-    elif self.fallback_quantization_range:
-      min_max_inputs = [
-          "fallback_quantization_min_value:0",
-          "fallback_quantization_max_value:0"
-      ]
-    else:
-      # Add a RequantizationRange node for finding the min and max values.
-      requant_range_node = create_node(
-          "RequantizationRange", original_node.name + "_eightbit_requant_range",
-          quantized_outputs)
-      set_attr_dtype(requant_range_node, "Tinput", dtypes.qint32)
-      self.add_output_graph_node(requant_range_node)
-      min_max_inputs = [
-          requant_range_node.name + ":0", requant_range_node.name + ":1"
-      ]
-    requantize_node = create_node("Requantize",
-                                  original_node.name + "_eightbit_requantize",
-                                  quantized_outputs + min_max_inputs)
-    set_attr_dtype(requantize_node, "Tinput", dtypes.qint32)
-    set_attr_dtype(requantize_node, "out_type", dtypes.quint8)
-    self.add_output_graph_node(requantize_node)
-    return requantize_node.name
-
-  def add_dequantize_result_node(self,
-                                 quantized_output_name,
-                                 original_node_name,
-                                 min_tensor_index=1):
-    min_max_inputs = [
-        "%s:%s" % (quantized_output_name, min_tensor_index),
-        "%s:%s" % (quantized_output_name, (min_tensor_index + 1))
-    ]
-    dequantize_name = original_node_name
-    if self.should_merge_with_fake_quant_node():
-      fake_quant_node = self.state.output_node_stack[-1][0]
-      if original_node_name not in self.state.merged_with_fake_quant:
-        min_max_inputs = [fake_quant_node.input[1], fake_quant_node.input[2]]
-        self.state.merged_with_fake_quant[original_node_name] = True
-      dequantize_name = fake_quant_node.name
-
-    dequantize_node = create_node(
-        "Dequantize", dequantize_name,
-        [quantized_output_name, min_max_inputs[0], min_max_inputs[1]])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-  def eightbitize_mat_mul_node(self, original_node):
-    """Replaces a MatMul node with the eight bit equivalent sub-graph."""
-    quantized_mat_mul_name = original_node.name + "_eightbit_quantized_mat_mul"
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_mat_mul_node = create_node("QuantizedMatMul",
-                                         quantized_mat_mul_name,
-                                         all_input_names)
-    set_attr_dtype(quantized_mat_mul_node, "T1", dtypes.quint8)
-    set_attr_dtype(quantized_mat_mul_node, "T2", dtypes.quint8)
-    set_attr_dtype(quantized_mat_mul_node, "Toutput", dtypes.qint32)
-    copy_attr(quantized_mat_mul_node, "transpose_a",
-              original_node.attr["transpose_a"])
-    copy_attr(quantized_mat_mul_node, "transpose_b",
-              original_node.attr["transpose_b"])
-    self.add_output_graph_node(quantized_mat_mul_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_mat_mul_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_conv_node(self, original_node):
-    """Replaces a Conv2D node with the eight bit equivalent sub-graph."""
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_conv_name = original_node.name + "_eightbit_quantized_conv"
-    quantized_conv_node = create_node("QuantizedConv2D", quantized_conv_name,
-                                      all_input_names)
-    copy_attr(quantized_conv_node, "strides", original_node.attr["strides"])
-    copy_attr(quantized_conv_node, "padding", original_node.attr["padding"])
-    set_attr_dtype(quantized_conv_node, "Tinput", dtypes.quint8)
-    set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.quint8)
-    set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32)
-    self.add_output_graph_node(quantized_conv_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_conv_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_bias_add_node(self, original_node):
-    """Replaces a BiasAdd node with the eight bit equivalent sub-graph."""
-    quantized_bias_add_name = (
-        original_node.name + "_eightbit_quantized_bias_add")
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_bias_add_node = create_node("QuantizedBiasAdd",
-                                          quantized_bias_add_name,
-                                          all_input_names)
-    set_attr_dtype(quantized_bias_add_node, "T1", dtypes.quint8)
-    set_attr_dtype(quantized_bias_add_node, "T2", dtypes.quint8)
-    set_attr_dtype(quantized_bias_add_node, "out_type", dtypes.qint32)
-    self.add_output_graph_node(quantized_bias_add_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_bias_add_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def eightbitize_single_input_tensor_node(self, original_node,
-                                           add_op_function):
-    """Replaces a single-tensor node with the eight bit equivalent sub-graph.
-
-    Converts a node like this:
-
-       Shape(f)   Input(f)
-         |          |
-         +--------v v
-                Operation
-                    |
-                    v
-                   (f)
-
-     Into a quantized equivalent:
-
-                    Input(f)              ReshapeDims
-                       +------v v-------------+
-                       |    Reshape
-                       |      |
-                       |      |          ReductionDims
-                       |      +-----+         |
-                       |      | +---c---------+
-                       |      v v   v v-------+
-                       |      Min   Max
-                       |  +----+      |
-                       v  v  v--------+
-                      Quantize
-                          |
-                          v
-                   QuantizedOperation
-                      |   |   |
-                      v   v   v
-                      Dequantize
-                          |
-                          v
-                         (f)
-
-
-    Args:
-      original_node: Float node to be converted.
-      add_op_function: Function to create the actual node.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    quantized_op_name = original_node.name + "_eightbit_quantized"
-    quantized_op_type = "Quantized" + original_node.op
-    all_input_names = self.add_eightbit_prologue_nodes(original_node)
-    quantized_op_node = create_node(quantized_op_type, quantized_op_name,
-                                    all_input_names)
-    add_op_function(original_node, quantized_op_node)
-    self.add_output_graph_node(quantized_op_node)
-    self.add_dequantize_result_node(quantized_op_name, original_node.name)
-
-  def add_pool_function(self, original_node, quantized_op_node):
-    set_attr_dtype(quantized_op_node, "T", dtypes.quint8)
-    copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"])
-    copy_attr(quantized_op_node, "strides", original_node.attr["strides"])
-    copy_attr(quantized_op_node, "padding", original_node.attr["padding"])
-
-  def add_relu_function(self, unused_arg_node, quantized_op_node):
-    set_attr_dtype(quantized_op_node, "Tinput", dtypes.quint8)
-
-  def eightbitize_concat_node(self, original_node):
-    """Replaces a Concat node with the eight bit equivalent sub-graph.
-
-    Converts a node like this:
-
-       Shape(f)   Input0(f)   Input1(f)
-         |          |            |
-         +--------v v v----------+
-                  Concat
-                    |
-                    v
-                   (f)
-
-     Into a quantized equivalent:
-
-       Shape(f)     Input0(f)             ReshapeDims                  Input1(f)
-         |             +------v v--------------+------------------v v------+
-         |             |    Reshape                             Reshape    |
-         |             |      |                                     |      |
-         |             |      |           ReductionDims             |      |
-         |             |      +------+         |           +--------+      |
-         |             |      |  +---c---------+-----------c-----+  |      |
-         |             |      +v v   v v-------+---------v v     v v+      |
-         |             |       Min   Max                 Min     Max       |
-         |             |  +----+      |                   |       +-----+  |
-         |             v  v  v--------+                   +----------v  v  v
-         |            Quantize                                       Quantize
-         |                +------------------+   +----------------------+
-         +-------------------------------+   |   |
-                                         v   v   v
-                                      QuantizedConcat
-                                         |   |   |
-                                         v   v   v
-                                        Dequantize
-                                             |
-                                             v
-                                            (f)
-    Args:
-      original_node: Float node to be converted.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    namespace_prefix = original_node.name + "_eightbit"
-    quantized_concat_name = namespace_prefix + "_quantized_concat"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    shape_input_name = original_node.input[0]
-    original_inputs = original_node.input[1:]
-    input_names = []
-    min_names = []
-    max_names = []
-    for original_input_name in original_inputs:
-      quantize_input_name, min_input_name, max_input_name = (
-          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                         reshape_dims_name,
-                                         reduction_dims_name))
-      input_names.append(quantize_input_name)
-      min_names.append(min_input_name)
-      max_names.append(max_input_name)
-    all_input_names = [shape_input_name]
-    all_input_names.extend(input_names)
-    all_input_names.extend(min_names)
-    all_input_names.extend(max_names)
-    quantized_concat_node = create_node("QuantizedConcat",
-                                        quantized_concat_name, all_input_names)
-    set_attr_int(quantized_concat_node, "N", len(original_inputs))
-    set_attr_dtype(quantized_concat_node, "T", dtypes.quint8)
-    self.add_output_graph_node(quantized_concat_node)
-    self.add_dequantize_result_node(quantized_concat_name, original_node.name)
-
-  def eightbitize_concatv2_node(self, original_node):
-    """
-    Args:
-      original_node: Float node to be converted.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    namespace_prefix = original_node.name + "_eightbit"
-    quantized_concat_name = namespace_prefix + "_quantized_concatv2"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    num_input = len(original_node.input)
-    shape_input_name = original_node.input[num_input-1]
-    original_inputs = original_node.input[0:num_input-1]
-    input_names = []
-    min_names = []
-    max_names = []
-    for original_input_name in original_inputs:
-      quantize_input_name, min_input_name, max_input_name = (
-          self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                         reshape_dims_name,
-                                         reduction_dims_name))
-      input_names.append(quantize_input_name)
-      min_names.append(min_input_name)
-      max_names.append(max_input_name)
-    all_input_names = input_names
-    all_input_names.append(shape_input_name)
-    all_input_names.extend(min_names)
-    all_input_names.extend(max_names)
-    quantized_concat_node = create_node("QuantizedConcatV2",
-                                        quantized_concat_name, all_input_names)
-    set_attr_int(quantized_concat_node, "N", len(original_inputs))
-    set_attr_dtype(quantized_concat_node, "T", dtypes.quint8)
-    self.add_output_graph_node(quantized_concat_node)
-    self.add_dequantize_result_node(quantized_concat_name, original_node.name)
-
-  def eightbitize_placeholder_node(self, current_node):
-    """Replaces a placeholder node with a quint8 placeholder node+dequantize."""
-    name = current_node.name
-
-    # Convert the placeholder into a quantized type.
-    output_node = node_def_pb2.NodeDef()
-    output_node.CopyFrom(current_node)
-    set_attr_dtype(output_node, "dtype", dtypes.quint8)
-    output_node.name += "_original_input"
-    self.add_output_graph_node(output_node)
-
-    # Add a dequantize to convert back to float.
-    dequantize_node = create_node("Dequantize", name, [
-        output_node.name, "quantized_input_min_value",
-        "quantized_input_max_value"
-    ])
-    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
-    set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-    self.add_output_graph_node(dequantize_node)
-
-    # For the descent over the graph to work, the dequantize node must be named
-    # current_node.name.  However, for the feeding of the graph to work, the
-    # placeholder must have the name current_node.name; so record a final set
-    # of renames to apply after all processing has been done.
-    self.final_node_renames[output_node.name] = name
-    self.final_node_renames[dequantize_node.name] = name + "_dequantize"
-
-  def eightbitize_reshape_node(self, original_node):
-    """Replaces a Reshape node with the eight bit equivalent sub-graph.
-
-    Args:
-      original_node: Float node to be converted.
-
-    Returns:
-      Subgraph representing the quantized version of the original node.
-
-    """
-    namespace_prefix = original_node.name + "_eightbit"
-    quantized_reshape_name = namespace_prefix + "_quantized_reshape"
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    shape_input_name = original_node.input[1]
-    quantize_input_name, min_input_name, max_input_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_node.input[0],
-                                       reshape_dims_name, reduction_dims_name))
-    quantized_reshape_node = create_node(
-        "QuantizedReshape", quantized_reshape_name,
-        [quantize_input_name, shape_input_name, min_input_name, max_input_name])
-    set_attr_dtype(quantized_reshape_node, "T", dtypes.quint8)
-    self.add_output_graph_node(quantized_reshape_node)
-    self.add_dequantize_result_node(quantized_reshape_name, original_node.name)
-
-  def eightbitize_batch_norm_node(self, original_node):
-    """Replaces a MatMul node with the eight bit equivalent sub-graph."""
-    namespace_prefix = original_node.name + "_eightbit"
-    original_input_name = original_node.input[0]
-    original_mean_name = original_node.input[1]
-    original_variance_name = original_node.input[2]
-    original_beta_name = original_node.input[3]
-    original_gamma_name = original_node.input[4]
-    quantized_batch_norm_name = namespace_prefix + "_quantized_batch_norm"
-
-    reshape_dims_name, reduction_dims_name = self.add_common_quantization_nodes(
-        namespace_prefix)
-    quantize_input_name, min_input_name, max_input_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_input_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_mean_name, min_mean_name, max_mean_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_mean_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_variance_name, min_variance_name, max_variance_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_variance_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_beta_name, min_beta_name, max_beta_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_beta_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantize_gamma_name, min_gamma_name, max_gamma_name = (
-        self.eightbitize_input_to_node(namespace_prefix, original_gamma_name,
-                                       reshape_dims_name, reduction_dims_name))
-    quantized_batch_norm_node = create_node(
-        "QuantizedBatchNormWithGlobalNormalization", quantized_batch_norm_name,
-        [
-            quantize_input_name, min_input_name, max_input_name,
-            quantize_mean_name, min_mean_name, max_mean_name,
-            quantize_variance_name, min_variance_name, max_variance_name,
-            quantize_beta_name, min_beta_name, max_beta_name,
-            quantize_gamma_name, min_gamma_name, max_gamma_name
-        ])
-    set_attr_dtype(quantized_batch_norm_node, "Tinput", dtypes.quint8)
-    set_attr_dtype(quantized_batch_norm_node, "out_type", dtypes.qint32)
-    copy_attr(quantized_batch_norm_node, "scale_after_normalization",
-              original_node.attr["scale_after_normalization"])
-    copy_attr(quantized_batch_norm_node, "variance_epsilon",
-              original_node.attr["variance_epsilon"])
-    self.add_output_graph_node(quantized_batch_norm_node)
-    quantize_down_name = self.add_quantize_down_nodes(original_node,
-                                                      quantized_batch_norm_name)
-    self.add_dequantize_result_node(quantize_down_name, original_node.name)
-
-  def add_output_graph_node(self, output_node):
-    """Inserts one node into the new graph."""
-    self.output_graph.node.extend([output_node])
-
-  def remove_redundant_quantization(self, old_graph):
-    """Removes unneeded pairs of quantize/dequantize ops from the graph.
-
-    This is a bit of a tricky function, because it's attempting to spot the
-    pattern of dequantizing from eight-bit up to float, and then immediately
-    quantizing back down to eight bits again, that's introduced by previous
-    passes that do 'key-hole' conversions of individual nodes but have to
-    convert back to float to match the previous output interface, since they
-    don't know that the next op can handle quantized tensors.
-    It works by:
-     - Looking for Quantize nodes.
-     - Checking to see if their first input is a Dequantize node.
-     - Seeing if their min/max inputs come from Min/Max nodes.
-     - Making sure those Min/Max nodes are being fed from the same Dequantize.
-     - Or that the Min is indirectly being fed from the same Dequantize as Max.
-     - Making sure the Dequantize is going through a Reshape (which we add
-       during the previous pass when we create the quantize sub-graph).
-     - Looking for the dims Const op for the Min/Max dims.
-    If all of these conditions are met, then it's a sub-graph pattern that
-    we know how to optimize out (and is likely the common one we've introduced).
-    We then rewire the graph to skip it entirely, and then rely on the dead node
-    removal pass to get rid of any nodes that are no longer needed.
-
-    Args:
-      old_graph: The model we'll be stripping redundant nodes from.
-
-    Returns:
-      A graph with the unnecessary nodes removed.
-
-    Raises:
-      ValueError: Two nodes with the same name were found in the graph.
-    """
-    old_nodes_map = self.create_nodes_map(old_graph)
-    self.output_graph = graph_pb2.GraphDef()
-    inputs_to_rename = {}
-    # We go through all the nodes, looking for any that match the patterns we
-    # know how to optimize away.
-    for node in old_graph.node:
-      # We always start with a Quantize node, and examine its inputs to see if
-      # they are in a form that can be removed.
-      if node.op not in ["Quantize", "QuantizeV2"]:
-        continue
-      dequantize_node_name = node_name_from_input(node.input[0])
-      if dequantize_node_name not in old_nodes_map:
-        raise ValueError("Input node name '" + dequantize_node_name +
-                         "' not found in node '" + node.name + "'")
-      dequantize_node = old_nodes_map[dequantize_node_name]
-      # Do we have a Dequantize feeding in, with the same type as the Quantize?
-      if dequantize_node.op != "Dequantize":
-        continue
-      if node.attr["T"] != dequantize_node.attr["T"]:
-        continue
-      # Now look at the other inputs, and ensure they're Min/Max nodes.
-      min_node_name = node_name_from_input(node.input[1])
-      max_node_name = node_name_from_input(node.input[2])
-      min_node = old_nodes_map[min_node_name]
-      max_node = old_nodes_map[max_node_name]
-      is_min_right_type = (min_node.op in ["Min", "Dequantize"])
-      is_max_right_type = (max_node.op in ["Max", "Dequantize"])
-      if not is_min_right_type or not is_max_right_type:
-        print("Didn't find expected types on inputs : %s, %s." % (min_node.op,
-                                                                  max_node.op))
-        continue
-      min_node_input_name = node_name_from_input(min_node.input[0])
-      max_node_input_name = node_name_from_input(max_node.input[0])
-      # There are two different patterns for Min nodes we can recognize, one
-      # where the input comes directly from the same one as the Max, and
-      # another where we run it through another Min first, so check for both.
-      is_same_input = False
-      if min_node_input_name == max_node_input_name:
-        is_same_input = True
-      else:
-        first_min_node_input = old_nodes_map[min_node_input_name]
-        if first_min_node_input.op == "Concat":
-          second_min_node_name = node_name_from_input(
-              first_min_node_input.input[1])
-          second_min_node = old_nodes_map[second_min_node_name]
-          if second_min_node.op == "Min":
-            second_min_node_input_name = node_name_from_input(
-                second_min_node.input[0])
-            is_same_input = (second_min_node_input_name == max_node_input_name)
-      if not is_same_input:
-        print("Different min/max inputs: " + min_node_input_name)
-        continue
-      # We recognize this pattern, so mark the graph edges to be rewired to
-      # route around it entirely, since we know it's a no-op.
-      dequantize_source_name = node_name_from_input(dequantize_node.input[0])
-      node_tensor_name = ensure_tensor_name_has_port(node.name)
-      min_tensor_name = node.name + ":1"
-      max_tensor_name = node.name + ":2"
-      inputs_to_rename[node_tensor_name] = dequantize_source_name
-      inputs_to_rename[min_tensor_name] = dequantize_node.input[1]
-      inputs_to_rename[max_tensor_name] = dequantize_node.input[2]
-    # Finally we apply all the rewiring we've marked to the graph.
-    for node in old_graph.node:
-      for index, input_full_name in enumerate(node.input):
-        input_name = ensure_tensor_name_has_port(input_full_name)
-        if input_name in inputs_to_rename:
-          node.input[index] = inputs_to_rename[input_name]
-      self.add_output_graph_node(node)
-    return self.output_graph
-
-  def apply_final_node_renames(self):
-    """Applies node renames in self.final_node_renames to self.output_graph."""
-    old_graph = self.output_graph
-    self.output_graph = graph_pb2.GraphDef()
-    for node in old_graph.node:
-      node.name = self.final_node_renames.get(node.name, node.name)
-      for index, input_name in enumerate(node.input):
-        node_name = node_name_from_input(input_name)
-        input_full_name = ensure_tensor_name_has_port(input_name)
-        if node_name in self.final_node_renames:
-          node.input[index] = "%s%s" % (self.final_node_renames[node_name],
-                                        input_full_name[len(node_name):])
-      self.add_output_graph_node(node)
-    return self.output_graph
-
-  def remove_dead_nodes(self, output_names):
-    """Removes nodes that are no longer needed for inference from the graph."""
-    old_output_graph = self.output_graph
-    self.output_graph = graph_util.extract_sub_graph(old_output_graph,
-                                                     output_names)
-
-  def quantize_weights(self, input_graph, quantization_mode):
-    """Quantize float Const ops.
-
-    There are two modes of operations, both replace float Const ops with
-    quantized values.
-    1. If quantization_mode is "weights_rounded", this function replaces float
-    Const ops with quantized float Const ops - same as the original op, but
-    float values being mapped to the center of one of 1<<FLAGS.bitdepth buckets.
-    This does not change the raw model size, but compression algorithms such as
-    zip (as used for compressing apks) or bzip2 will achieve a very good
-    compression ratio.
-    2. For other quantization modes ("MIN_COMBINED" or "MIN_FIRST"), float
-    Const ops are quantized and replaced by a tuple of four ops to perform
-    the dequantization at runtime:
-    * eight-bit Const (bucket indices, same shape as original float Const op
-    * two float Const ops (min and max value of original float Const op)
-    * Dequantize op to convert the eight-bit consts to float tensors.
-    The quantization mode is important because we see accuracy problems when
-    quantizing weights for different situations depending on the algorithm
-    used. We haven't figured out exactly what the underlying cause is yet,
-    unfortunately.
-
-    Args:
-      input_graph: A GraphDef of the model containing float Const ops.
-      quantization_mode: How to quantize and dequantize the values.
-
-    Returns:
-      A GraphDef of the converted graph.
-
-    Raises:
-      ValueError: If quantization_mode is unsupported.
-    """
-    output_graph = graph_pb2.GraphDef()
-    for input_node in input_graph.node:
-      should_quantize = False
-      if input_node.op == "Const":
-        dtype = dtypes.as_dtype(input_node.attr["dtype"].type)
-        if dtype == dtypes.float32:
-          should_quantize = True
-      if should_quantize:
-        if quantization_mode == "weights_rounded":
-          output_graph.node.extend(quantize_weight_rounded(input_node))
-        elif quantization_mode in (b"MIN_COMBINED", b"MIN_FIRST"):
-          output_graph.node.extend(
-              quantize_weight_eightbit(input_node, quantization_mode))
-        else:
-          raise ValueError("Unsupported quantization mode %s." %
-                           quantization_mode)
-      else:
-        output_node = node_def_pb2.NodeDef()
-        output_node.CopyFrom(input_node)
-        output_graph.node.extend([output_node])
-    return output_graph
-
-  def set_input_graph(self, new_input_graph):
-    self.input_graph = new_input_graph
-    self.nodes_map = self.create_nodes_map(self.input_graph)
-
-def main(unused_args):
-  if not gfile.Exists(FLAGS.input):
-    print("Input graph file '" + FLAGS.input + "' does not exist!")
-    return -1
-
-  known_modes = [
-      "round", "quantize", "eightbit", "weights", "test", "weights_rounded"
-  ]
-  if not any(FLAGS.mode in s for s in known_modes):
-    print("mode is '" + FLAGS.mode + "', not in " + ", ".join(known_modes) +
-          ".")
-    return -1
-
-  tf_graph = graph_pb2.GraphDef()
-  # TODO(intel-tf): Enabling user to work with both binary and text format.
-  mode = "rb" if FLAGS.input_binary else "r"
-  with gfile.Open(FLAGS.input, mode) as f:
-    data = f.read()
-    if FLAGS.input_binary:
-      tf_graph.ParseFromString(data)
-    else:
-      text_format.Merge(data, tf_graph)
-
-  graph = ops.Graph()
-  with graph.as_default():
-    importer.import_graph_def(tf_graph, input_map={}, name="")
-
-  quantized_input_range = None
-  if FLAGS.quantized_input:
-    quantized_input_range = [
-        FLAGS.quantized_input_min, FLAGS.quantized_input_max
-    ]
-
-  fallback_quantization_range = None
-  if (FLAGS.quantized_fallback_min is not None or
-      FLAGS.quantized_fallback_max is not None):
-    assert FLAGS.quantized_fallback_min is not None
-    assert FLAGS.quantized_fallback_max is not None
-    fallback_quantization_range = [
-        FLAGS.quantized_fallback_min, FLAGS.quantized_fallback_max
-    ]
-
-  rewriter = GraphRewriter(tf_graph, FLAGS.mode,
-                           quantized_input_range, fallback_quantization_range,
-                           FLAGS.intel_cpu_eightbitize)
-
-  output_graph = rewriter.rewrite(FLAGS.output_node_names.split(","))
-
-  # TODO(intel-tf): Enabling user to work with both binary and text format.
-  mode = "wb" if FLAGS.output_binary else "w"
-  f = gfile.FastGFile(FLAGS.output, mode)
-  if FLAGS.output_binary:
-    f.write(output_graph.SerializeToString())
-  else:
-    f.write(str(output_graph))
-
-  return 0
-
-if __name__ == "__main__":
-  app.run()
-- 
GitLab


From 29d6299e7c70e8021d98991de302479dc3a3cdba Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 3 Oct 2018 10:18:59 -0700
Subject: [PATCH 0284/1085] Fix TfLiteTensor invalidation issue when using the
 Java API

Fix an issue where the Java Tensor class would hold a reference
to an invalidated TfLiteTensor instance. This issue was manifest
in certain models that add temporary tensors during execution.

PiperOrigin-RevId: 215582842
---
 .../lite/NativeInterpreterWrapper.java        | 26 +++++++---
 .../main/java/org/tensorflow/lite/Tensor.java | 27 ++++++++--
 .../native/nativeinterpreterwrapper_jni.cc    | 22 +++-----
 .../native/nativeinterpreterwrapper_jni.h     | 24 ++++-----
 .../lite/java/src/main/native/tensor_jni.cc   | 50 +++++++++++++++----
 .../lite/java/src/main/native/tensor_jni.h    | 17 +++++++
 .../java/org/tensorflow/lite/TensorTest.java  | 13 ++++-
 7 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 9bc44bf797..6f03e7853a 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -18,7 +18,6 @@ package org.tensorflow.lite;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -83,6 +82,19 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   /** Releases resources associated with this {@code NativeInterpreterWrapper}. */
   @Override
   public void close() {
+    // Close the tensors first as they may reference the native interpreter.
+    for (int i = 0; i < inputTensors.length; ++i) {
+      if (inputTensors[i] != null) {
+        inputTensors[i].close();
+        inputTensors[i] = null;
+      }
+    }
+    for (int i = 0; i < outputTensors.length; ++i) {
+      if (outputTensors[i] != null) {
+        outputTensors[i].close();
+        outputTensors[i] = null;
+      }
+    }
     delete(errorHandle, modelHandle, interpreterHandle);
     errorHandle = 0;
     modelHandle = 0;
@@ -91,8 +103,6 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     inputsIndexes = null;
     outputsIndexes = null;
     isMemoryAllocated = false;
-    Arrays.fill(inputTensors, null);
-    Arrays.fill(outputTensors, null);
   }
 
   /** Sets inputs, runs model inference and returns outputs. */
@@ -260,7 +270,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor inputTensor = inputTensors[index];
     if (inputTensor == null) {
       inputTensor =
-          inputTensors[index] = Tensor.fromHandle(getInputTensor(interpreterHandle, index));
+          inputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getInputTensorIndex(interpreterHandle, index));
     }
     return inputTensor;
   }
@@ -282,7 +293,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     Tensor outputTensor = outputTensors[index];
     if (outputTensor == null) {
       outputTensor =
-          outputTensors[index] = Tensor.fromHandle(getOutputTensor(interpreterHandle, index));
+          outputTensors[index] =
+              Tensor.fromIndex(interpreterHandle, getOutputTensorIndex(interpreterHandle, index));
     }
     return outputTensor;
   }
@@ -317,9 +329,9 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native long allocateTensors(long interpreterHandle, long errorHandle);
 
-  private static native long getInputTensor(long interpreterHandle, int inputIdx);
+  private static native int getInputTensorIndex(long interpreterHandle, int inputIdx);
 
-  private static native long getOutputTensor(long interpreterHandle, int outputIdx);
+  private static native int getOutputTensorIndex(long interpreterHandle, int outputIdx);
 
   private static native int getInputCount(long interpreterHandle);
 
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index f174178d98..6ca47aa3ed 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -23,13 +23,26 @@ import java.util.Arrays;
 /**
  * A typed multi-dimensional array used in Tensorflow Lite.
  *
- * <p>The native handle of a {@code Tensor} belongs to {@code NativeInterpreterWrapper}, thus not
- * needed to be closed here.
+ * <p>The native handle of a {@code Tensor} is managed by {@code NativeInterpreterWrapper}, and does
+ * not needed to be closed by the client. However, once the {@code NativeInterpreterWrapper} has
+ * been closed, the tensor handle will be invalidated.
  */
 public final class Tensor {
 
-  static Tensor fromHandle(long nativeHandle) {
-    return new Tensor(nativeHandle);
+  /**
+   * Creates a Tensor wrapper from the provided interpreter instance and tensor index.
+   *
+   * <p>The caller is responsible for closing the created wrapper, and ensuring the provided
+   * native interpreter is valid until the tensor is closed.
+   */
+  static Tensor fromIndex(long nativeInterpreterHandle, int tensorIndex) {
+    return new Tensor(create(nativeInterpreterHandle, tensorIndex));
+  }
+
+  /** Disposes of any resources used by the Tensor wrapper. */
+  void close() {
+    delete(nativeHandle);
+    nativeHandle = 0;
   }
 
   /** Returns the {@link DataType} of elements stored in the Tensor. */
@@ -235,7 +248,7 @@ public final class Tensor {
     return o instanceof ByteBuffer;
   }
 
-  private final long nativeHandle;
+  private long nativeHandle;
   private final DataType dtype;
   private int[] shapeCopy;
 
@@ -249,6 +262,10 @@ public final class Tensor {
     return buffer(nativeHandle).order(ByteOrder.nativeOrder());
   }
 
+  private static native long create(long interpreterHandle, int tensorIndex);
+
+  private static native void delete(long handle);
+
   private static native ByteBuffer buffer(long handle);
 
   private static native void writeDirectBuffer(long handle, ByteBuffer src);
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index abb7320bc5..4dc73fbcf8 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -159,26 +159,20 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
   }
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->inputs()[index]));
+  return interpreter->inputs()[input_index];
 }
 
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index) {
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index) {
   tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return 0;
-  return reinterpret_cast<jlong>(
-      interpreter->tensor(interpreter->outputs()[index]));
+  return interpreter->outputs()[output_index];
 }
 
 JNIEXPORT jint JNICALL
diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
index aa809dff8a..f8f3e7028c 100644
--- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h
@@ -46,25 +46,21 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allocateTensors(
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getInputTensor
- *  Signature: (JI)J
+ *  Method:    getInputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensor(JNIEnv* env,
-                                                                 jclass clazz,
-                                                                 jlong handle,
-                                                                 jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint input_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
- *  Method:    getOutputTensor
- *  Signature: (JI)J
+ *  Method:    getOutputTensorIndex
+ *  Signature: (JI)I
  */
-JNIEXPORT jlong JNICALL
-Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensor(JNIEnv* env,
-                                                                  jclass clazz,
-                                                                  jlong handle,
-                                                                  jint index);
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
+    JNIEnv* env, jclass clazz, jlong handle, jint output_index);
 
 /*
  *  Class:     org_tensorflow_lite_NativeInterpreterWrapper
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
index 7ff96a3172..d3378f5f14 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.cc
@@ -16,17 +16,36 @@ limitations under the License.
 #include "tensorflow/contrib/lite/java/src/main/native/tensor_jni.h"
 #include <cstring>
 #include <memory>
+#include "tensorflow/contrib/lite/interpreter.h"
 #include "tensorflow/contrib/lite/java/src/main/native/exception_jni.h"
 
 namespace {
 
-TfLiteTensor* convertLongToTensor(JNIEnv* env, jlong handle) {
+// Convenience handle for obtaining a TfLiteTensor given an interpreter and
+// tensor index.
+//
+// Historically, the Java Tensor class used a TfLiteTensor pointer as its native
+// handle. However, this approach isn't generally safe, as the interpreter may
+// invalidate all TfLiteTensor* handles during inference or allocation.
+class TensorHandle {
+ public:
+  TensorHandle(tflite::Interpreter* interpreter, int tensor_index)
+      : interpreter_(interpreter), tensor_index_(tensor_index) {}
+
+  TfLiteTensor* tensor() const { return interpreter_->tensor(tensor_index_); }
+
+ private:
+  tflite::Interpreter* const interpreter_;
+  const int tensor_index_;
+};
+
+TfLiteTensor* GetTensorFromHandle(JNIEnv* env, jlong handle) {
   if (handle == 0) {
     throwException(env, kIllegalArgumentException,
                    "Internal error: Invalid handle to TfLiteTensor.");
     return nullptr;
   }
-  return reinterpret_cast<TfLiteTensor*>(handle);
+  return reinterpret_cast<TensorHandle*>(handle)->tensor();
 }
 
 size_t elementByteSize(TfLiteType data_type) {
@@ -192,10 +211,23 @@ size_t writeMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type,
 
 }  // namespace
 
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index) {
+  tflite::Interpreter* interpreter =
+      reinterpret_cast<tflite::Interpreter*>(interpreter_handle);
+  return reinterpret_cast<jlong>(new TensorHandle(interpreter, tensor_index));
+}
+
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle) {
+  delete reinterpret_cast<TensorHandle*>(handle);
+}
+
 JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
                                                                  jclass clazz,
                                                                  jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -208,7 +240,7 @@ JNIEXPORT jobject JNICALL Java_org_tensorflow_lite_Tensor_buffer(JNIEnv* env,
 
 JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_writeDirectBuffer(
     JNIEnv* env, jclass clazz, jlong handle, jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
 
   char* src_data_raw = static_cast<char*>(env->GetDirectBufferAddress(src));
@@ -226,7 +258,7 @@ Java_org_tensorflow_lite_Tensor_readMultiDimensionalArray(JNIEnv* env,
                                                           jclass clazz,
                                                           jlong handle,
                                                           jobject value) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   int num_dims = tensor->dims->size;
   if (num_dims == 0) {
@@ -243,7 +275,7 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
                                                            jclass clazz,
                                                            jlong handle,
                                                            jobject src) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
   if (tensor->data.raw == nullptr) {
     throwException(env, kIllegalArgumentException,
@@ -262,14 +294,14 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_dtype(JNIEnv* env,
                                                              jclass clazz,
                                                              jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->type);
 }
 
 JNIEXPORT jintArray JNICALL
 Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
-  TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return nullptr;
   int num_dims = tensor->dims->size;
   jintArray result = env->NewIntArray(num_dims);
@@ -280,7 +312,7 @@ Java_org_tensorflow_lite_Tensor_shape(JNIEnv* env, jclass clazz, jlong handle) {
 JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_numBytes(JNIEnv* env,
                                                                 jclass clazz,
                                                                 jlong handle) {
-  const TfLiteTensor* tensor = convertLongToTensor(env, handle);
+  const TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->bytes);
 }
diff --git a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
index 2f73128bdf..c5e9690e9a 100644
--- a/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
+++ b/tensorflow/contrib/lite/java/src/main/native/tensor_jni.h
@@ -23,6 +23,23 @@ limitations under the License.
 extern "C" {
 #endif  // __cplusplus
 
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    create
+ * Signature: (JI)J
+ */
+JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_Tensor_create(
+    JNIEnv* env, jclass clazz, jlong interpreter_handle, jint tensor_index);
+
+/*
+ * Class:     org_tensorflow_lite_Tensor
+ * Method:    delete
+ * Signature: (J)
+ */
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_delete(JNIEnv* env,
+                                                              jclass clazz,
+                                                              jlong handle);
+
 /*
  * Class:     org_tensorflow_lite_Tensor
  * Method:    buffer
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
index 85ad393d89..56a38ea3e2 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
@@ -182,7 +182,7 @@ public final class TensorTest {
     dataType = Tensor.dataTypeOf(testFloatArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     float[][] testMultiDimArray = {testFloatArray, testFloatArray, testFloatArray};
-    dataType = Tensor.dataTypeOf(testFloatArray);
+    dataType = Tensor.dataTypeOf(testMultiDimArray);
     assertThat(dataType).isEqualTo(DataType.FLOAT32);
     try {
       double[] testDoubleArray = {0.783, 0.251};
@@ -238,4 +238,15 @@ public final class TensorTest {
     assertThat(shape[1]).isEqualTo(3);
     assertThat(shape[2]).isEqualTo(1);
   }
+
+  @Test
+  public void testUseAfterClose() {
+    tensor.close();
+    try {
+      tensor.numBytes();
+      fail();
+    } catch (IllegalArgumentException e) {
+      // Expected failure.
+    }
+  }
 }
-- 
GitLab


From 3d76a83037388b61bcda1571d3b3e175a2f53f2e Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Wed, 3 Oct 2018 12:25:25 -0700
Subject: [PATCH 0285/1085] Disable XLA for Android builds.

PiperOrigin-RevId: 215605865
---
 tensorflow/tools/ci_build/builds/configured | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured
index 868a3beac5..3eee11fd7e 100755
--- a/tensorflow/tools/ci_build/builds/configured
+++ b/tensorflow/tools/ci_build/builds/configured
@@ -32,6 +32,10 @@ COMMAND=("$@")
 
 export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}"
 export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}"
+# XLA currently does not build under Android, so disable it for now.
+if [[ "${CONTAINER_TYPE}" -eq 'android' ]]; then
+  export TF_ENABLE_XLA=0
+fi
 
 pushd "${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
 yes "" | $PYTHON_BIN_PATH configure.py
-- 
GitLab


From 295b3c80555cc82d8d70faf96a47681e1d904b9c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 12:32:16 -0700
Subject: [PATCH 0286/1085] Automated rollback of commit
 c9bdd3938e2b43334a0065b4c198ec9d491c8cb8

PiperOrigin-RevId: 215607038
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 +++
 .../kernels/data/map_and_batch_dataset_op.cc  | 10 +++----
 .../core/kernels/data/model_dataset_op.cc     | 10 +++----
 .../data/parallel_interleave_dataset_op.cc    | 27 ++++++++-----------
 .../kernels/data/parallel_map_iterator.cc     | 10 +++----
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 +++----
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 8acd6cc724..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,8 +16,10 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -25,11 +27,13 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 6a670f1efb..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -406,10 +405,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              std::bind(&Iterator::RunnerThread, this, ctx_copy));
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
         }
       }
 
@@ -662,7 +660,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 859df57962..9aa505f4f1 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -127,10 +126,9 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
-          optimize_thread_->Schedule(
-              [this, new_ctx]() { OptimizeThread(new_ctx); });
+          optimize_thread_.reset(ctx->env()->StartThread(
+              {}, "optimize_thread",
+              [this, new_ctx]() { OptimizeThread(new_ctx); }));
         }
         return Status::OK();
       }
@@ -169,7 +167,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9c836b836e..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -482,10 +481,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
           }
         }
         return Status::OK();
@@ -582,10 +580,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1050,8 +1047,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
-          GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1393,10 +1389,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              [this, new_ctx]() { RunnerThread(new_ctx); });
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              [this, new_ctx]() { RunnerThread(new_ctx); }));
         }
       }
 
@@ -1650,7 +1645,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 626e98af91..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,7 +22,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -181,10 +180,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
-      runner_thread_ =
-          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-      runner_thread_->Schedule(
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
+      runner_thread_.reset(ctx->env()->StartThread(
+          {}, "runner_thread",
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
     }
   }
 
@@ -332,7 +330,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index e9c38eb8a0..754ed772db 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -257,11 +256,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
-        prefetch_thread_ =
-            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_->Schedule(
-            [this, new_ctx]() { PrefetchThread(new_ctx); });
+        prefetch_thread_.reset(ctx->env()->StartThread(
+            {}, "prefetch_thread",
+            [this, new_ctx]() { PrefetchThread(new_ctx); }));
       }
       return Status::OK();
     }
@@ -365,7 +363,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 7bb2077b62..3f76695bb1 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        background_worker_(
-            ctx->env(),
-            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
-  }
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    background_worker_.Schedule([this, ctx, done]() {
+    thread_pool_->Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  BackgroundWorker background_worker_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From d4e9282dc53697432178a68940634612c4ab2baa Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 3 Oct 2018 12:32:57 -0700
Subject: [PATCH 0287/1085] [tf.data] Fix noisy warning.

PiperOrigin-RevId: 215607171
---
 tensorflow/python/data/ops/dataset_ops.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..b7e19055f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1831,9 +1831,10 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with options. These "
-                          "options will not be applied to the outer dataset.")
+          if t.options() != Options():
+            warnings.warn("Encountered a nested dataset with non-default "
+                          "options. These options will not be propagated to "
+                          "the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 506ea0b8d3af1b54f42721584a414957e1525c8a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 12:36:16 -0700
Subject: [PATCH 0288/1085] Change hierarchical_tree_broadcaster_test from
 small to medium.

PiperOrigin-RevId: 215607769
---
 tensorflow/core/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0aae29d10c..6a3ee3c1cb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3750,7 +3750,7 @@ tf_cc_tests_gpu(
 
 tf_cc_tests_gpu(
     name = "hierarchical_tree_broadcaster_test",
-    size = "small",
+    size = "medium",
     srcs = [
         "common_runtime/hierarchical_tree_broadcaster_test.cc",
     ],
-- 
GitLab


From 4c4207eb8f1dabb92ff231fe5f6ebb0954c2c116 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 12:36:16 -0700
Subject: [PATCH 0289/1085] Change hierarchical_tree_broadcaster_test from
 small to medium.

PiperOrigin-RevId: 215607769
---
 tensorflow/core/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 0aae29d10c..6a3ee3c1cb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3750,7 +3750,7 @@ tf_cc_tests_gpu(
 
 tf_cc_tests_gpu(
     name = "hierarchical_tree_broadcaster_test",
-    size = "small",
+    size = "medium",
     srcs = [
         "common_runtime/hierarchical_tree_broadcaster_test.cc",
     ],
-- 
GitLab


From 19833284cc8fa555115aacde350ad66652b250dc Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Wed, 3 Oct 2018 12:39:32 -0700
Subject: [PATCH 0290/1085] Automated rollback of commit
 2af8fd975aaf5c70ebb396895fa15a8f034a8440

PiperOrigin-RevId: 215608349
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++------------
 1 file changed, 39 insertions(+), 90 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 28e09d7b79..36c6f5d316 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,10 +79,7 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
-    bool* modified) {
-  *modified = false;
-
+    std::map<string, string>* canonicalized_name_to_new_name) {
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -95,19 +92,6 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
-  // Check if the graph has Switch or Merge node before optimizing the graph.
-  bool has_switch_or_merge = false;
-  for (Node* n : body->graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
-  // We cannot return here directly if the graph has no Switch/Merge.
-  // It might contain function call nodes, or If/While nodes with Switch/Merge
-  // in function body. We still need to rewrite those functions and modify
-  // corresponding nodes.
-
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -145,13 +129,6 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
-  // Some inlined functions might have Switch/Merge nodes.
-  for (Node* n : optimized_graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -174,15 +151,10 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
-      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already processed this function, check if it was rewritten. If
-        // the function was rewritten, the entry will be non-empty. Otherwise
-        // the entry will be empty.
-        function_modified = iter->second.has_value();
-        if (function_modified) {
-          new_name = iter->second.value();
-        }
+        // If we already functionalized this function, skip functionalization
+        // but still rewrite the node.
+        new_name = iter->second;
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -194,62 +166,42 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name, &function_modified));
-        if (function_modified) {
-          // If the function was rewritten, add an non-empty entry. So later we
-          // know we have processed this function, and it was rewritten into
-          // another function.
-          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
-        } else {
-          // If the function was not rewritten, add an empty entry. So later
-          // we know we have processed this function, and it does not need to be
-          // rewritten.
-          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
-        }
-      }
-      if (function_modified) {
-        *modified = true;
-
-        // Notice that if "n" is a function call, RewriteAssociatedFunction()
-        // will delete it and create a new node instead, making "n" an invalid
-        // pointer. That's fine because in that case, associated_functions will
-        // only have one member and the loop will only run once.
-        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-            optimized_graph.get(), n, fld, associated_function, new_name));
+            canonicalized_name_to_new_name));
+        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
       }
+      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
+      // delete it and create a new node instead, making "n" an invalid pointer.
+      // That's fine because in that case, associated_functions will only have
+      // one member and the loop will only run once.
+      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  if (has_switch_or_merge) {
-    *modified = true;
-
-    // Functionalize the function body.
-    if (VLOG_IS_ON(4)) {
-      dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-          *optimized_graph, fld);
-    }
-    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-    if (VLOG_IS_ON(4)) {
-      dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-          *optimized_graph, fld);
-    }
+  // Functionalize the function body.
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+        *optimized_graph, fld);
   }
+  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+  if (VLOG_IS_ON(4)) {
+    dump_graph::DumpGraphToFile(
+        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+        *optimized_graph, fld);
+  }
+  FunctionDef functionalized_fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                        &functionalized_fdef));
 
-  if (*modified) {
-    // Add rewritten FunctionDef into library.
-    FunctionDef functionalized_fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                          &functionalized_fdef));
-    if (func_name == new_func_name) {
-      VLOG(2) << "Replacing function " << func_name;
-      TF_RETURN_IF_ERROR(
-          fld->ReplaceFunction(new_func_name, functionalized_fdef));
-    } else {
-      VLOG(2) << "Adding function " << new_func_name;
-      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
-    }
+  // Add rewritten FunctionDef into library.
+  if (func_name == new_func_name) {
+    VLOG(2) << "Replacing function " << func_name;
+    TF_RETURN_IF_ERROR(
+        fld->ReplaceFunction(new_func_name, functionalized_fdef));
+  } else {
+    VLOG(2) << "Adding function " << new_func_name;
+    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
   }
 
   return ret_status;
@@ -275,7 +227,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
+  std::map<string, string> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -290,15 +242,12 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
-      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name, &modified));
-      if (modified) {
-        n->ClearAttr(func_attr);
-        func.set_name(new_func_name);
-        n->AddAttr(func_attr, func);
-      }
+          &canonicalized_name_to_new_name));
+      n->ClearAttr(func_attr);
+      func.set_name(new_func_name);
+      n->AddAttr(func_attr, func);
     }
   }
 
-- 
GitLab


From 808b1dcb318b1feb5a8c9fed5558f95cd05728e4 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 3 Oct 2018 12:44:47 -0700
Subject: [PATCH 0291/1085] [data-stats] Sets user given `tag` and
 `counter_prefix` with `set_stats_aggregator`. `tag` would get prep-end with
 all the statistics recorded as summary and `counter_prefix` would set the
 prefix for the statistics recorded as counter. Note: `counter` defaults to
 `\tensorflow`, and `tag` and `prefix` gets associated with the dataset (not
 the stats_aggregator).

PiperOrigin-RevId: 215609159
---
 tensorflow/core/framework/dataset.h           | 22 +-----
 tensorflow/core/kernels/data/BUILD            |  1 +
 .../experimental/threadpool_dataset_op.cc     |  2 +-
 .../kernels/data/parse_example_dataset_op.cc  |  4 +-
 .../data/stats_aggregator_dataset_op.cc       | 78 +++++++++++++++++--
 .../core/kernels/data/stats_aggregator_ops.cc | 11 +--
 .../core/ops/compat/ops_history.v1.pbtxt      |  8 ++
 tensorflow/core/ops/dataset_ops.cc            |  2 +
 .../kernel_tests/stats_dataset_ops_test.py    | 69 ++++++++++++++++
 .../python/data/experimental/ops/stats_ops.py | 17 +++-
 .../v1/tensorflow.data.experimental.pbtxt     |  2 +-
 .../v2/tensorflow.data.experimental.pbtxt     |  2 +-
 12 files changed, 179 insertions(+), 39 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 8c1151cb56..964a7d5f8c 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -278,15 +278,8 @@ class IteratorContext {
     // Function call support.
     std::function<void(std::function<void()>)> runner = nullptr;
 
-    // A function that returns the current `StatsAggregator` instance to be
-    // used when recording statistics about the iterator.
-    //
-    // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator`
-    // is a property of the `IteratorResource` (which this class does not know
-    // about), and (ii) it can change after the `IteratorContext` has been
-    // created. Better suggestions are welcome!
-    std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter =
-        nullptr;
+    // The `StatsAggregator` object to record statistics about the iterator.
+    std::shared_ptr<StatsAggregator> stats_aggregator = nullptr;
 
     // The FunctionLibraryRuntime object to be used to make function calls.
     FunctionLibraryRuntime* lib = nullptr;
@@ -320,13 +313,6 @@ class IteratorContext {
     return &params_.runner;
   }
 
-  std::shared_ptr<StatsAggregator> stats_aggregator() {
-    if (params_.stats_aggregator_getter) {
-      return params_.stats_aggregator_getter();
-    } else {
-      return nullptr;
-    }
-  }
 
   std::shared_ptr<const FunctionLibraryDefinition> function_library() {
     return params_.function_library;
@@ -344,8 +330,8 @@ class IteratorContext {
     return params_.allocator_getter;
   }
 
-  std::function<std::shared_ptr<StatsAggregator>()> stats_aggregator_getter() {
-    return params_.stats_aggregator_getter;
+  std::shared_ptr<StatsAggregator> stats_aggregator() {
+    return params_.stats_aggregator;
   }
 
   std::shared_ptr<model::Model> model() { return params_.model; }
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 6333853cdf..451f8c1a6c 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -458,6 +458,7 @@ tf_kernel_library(
     srcs = ["stats_aggregator_dataset_op.cc"],
     deps = [
         ":dataset",
+        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib_internal",
     ],
diff --git a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
index c80493d3a1..8d561ca0e3 100644
--- a/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/threadpool_dataset_op.cc
@@ -191,7 +191,7 @@ class ThreadPoolDatasetOp : public UnaryDatasetOpKernel {
         params.runner = [pool](std::function<void()> c) {
           pool->Schedule(std::move(c));
         };
-        params.stats_aggregator_getter = ctx->stats_aggregator_getter();
+        params.stats_aggregator = ctx->stats_aggregator();
         params.lib = ctx->lib();
         params.function_library = ctx->function_library();
         params.allocator_getter = ctx->allocator_getter();
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index c28c06da62..1d1a717062 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -253,7 +253,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
               for (example::PerExampleFeatureStats feature_stats :
                    example_result.feature_stats) {
                 stats_aggregator->AddToHistogram(
-                    strings::StrCat("record_stats", ":features"),
+                    "features",
                     {static_cast<double>(feature_stats.features_count)});
                 stats_aggregator->IncrementCounter(
                     "features_count", "trainer", feature_stats.features_count);
@@ -261,7 +261,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
                     "feature_values_count", "trainer",
                     feature_stats.feature_values_count);
                 stats_aggregator->AddToHistogram(
-                    strings::StrCat("record_stats", ":feature-values"),
+                    "feature-values",
                     {static_cast<double>(feature_stats.feature_values_count)});
               }
             }
diff --git a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
index c8abfb9eb5..c09a73fff1 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_dataset_op.cc
@@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <memory>
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 
@@ -22,6 +24,52 @@ namespace tensorflow {
 namespace data {
 namespace {
 
+class StatsAggregatorWithTagAndPrefix : public StatsAggregator {
+ public:
+  StatsAggregatorWithTagAndPrefix(
+      std::shared_ptr<StatsAggregator> stats_aggregator, const string& tag,
+      const string& prefix)
+      : wrapped_(stats_aggregator), tag_(tag), prefix_(prefix) {}
+
+  void AddToHistogram(const string& name,
+                      gtl::ArraySlice<double> values) override {
+    if (!tag_.empty()) {
+      wrapped_->AddToHistogram(strings::StrCat(tag_, "_", name), values);
+    } else {
+      wrapped_->AddToHistogram(name, values);
+    }
+  }
+
+  void AddScalar(const string& name, float value) override {
+    if (!tag_.empty()) {
+      wrapped_->AddScalar(strings::StrCat(tag_, "_", name), value);
+    } else {
+      wrapped_->AddScalar(name, value);
+    }
+  }
+
+  void EncodeToProto(Summary* out_summary) override {
+    wrapped_->EncodeToProto(out_summary);
+  }
+
+  void IncrementCounter(const string& name, const string& label,
+                        int64 val) override {
+    if (!prefix_.empty()) {
+      wrapped_->IncrementCounter(strings::StrCat(prefix_, "/", name), label,
+                                 val);
+    } else {
+      wrapped_->IncrementCounter(strings::StrCat("/tensorflow/", name), label,
+                                 val);
+    }
+  }
+
+ private:
+  std::shared_ptr<StatsAggregator> wrapped_;
+  string tag_;
+  string prefix_;
+  TF_DISALLOW_COPY_AND_ASSIGN(StatsAggregatorWithTagAndPrefix);
+};
+
 class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit SetStatsAggregatorDatasetOp(OpKernelConstruction* ctx)
@@ -33,8 +81,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1),
                                        &stats_aggregator_resource));
     core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource);
+    string tag;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag));
+    string prefix;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "counter_prefix", &prefix));
 
-    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource);
+    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource,
+                          tag, prefix);
   }
 
  private:
@@ -42,11 +95,14 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
    public:
     explicit Dataset(OpKernelContext* ctx, const DatasetBase* input,
                      const Tensor& resource_handle,
-                     StatsAggregatorResource* stats_aggregator_resource)
+                     StatsAggregatorResource* stats_aggregator_resource,
+                     const string& tag, const string& prefix)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           resource_handle_(resource_handle),
-          stats_aggregator_resource_(stats_aggregator_resource) {
+          stats_aggregator_resource_(stats_aggregator_resource),
+          tag_(tag),
+          prefix_(prefix) {
       input_->Ref();
       stats_aggregator_resource_->Ref();
     }
@@ -81,8 +137,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* resource_handle_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddTensor(resource_handle_, &resource_handle_node));
+      Node* tag_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(tag_, &tag_node));
+      Node* prefix_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddScalar(prefix_, &prefix_node));
       TF_RETURN_IF_ERROR(b->AddDataset(
-          this, {input_graph_node, resource_handle_node}, output));
+          this, {input_graph_node, resource_handle_node, tag_node, prefix_node},
+          output));
       return Status::OK();
     }
 
@@ -105,9 +166,10 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
         IteratorContext::Params params;
         params.env = ctx->env();
         params.runner = *(ctx->runner());
-        params.stats_aggregator_getter = [stats_aggregator_resource]() {
-          return stats_aggregator_resource->stats_aggregator();
-        };
+        params.stats_aggregator = std::shared_ptr<StatsAggregator>(
+            new StatsAggregatorWithTagAndPrefix(
+                stats_aggregator_resource->stats_aggregator(), dataset()->tag_,
+                dataset()->prefix_));
         params.lib = ctx->lib();
         params.function_library = ctx->function_library();
         params.allocator_getter = ctx->allocator_getter();
@@ -136,6 +198,8 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     const DatasetBase* const input_;
     const Tensor resource_handle_;
     StatsAggregatorResource* stats_aggregator_resource_;
+    string tag_;
+    string prefix_;
   };
 };
 
diff --git a/tensorflow/core/kernels/data/stats_aggregator_ops.cc b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
index a7ded67876..2d51467616 100644
--- a/tensorflow/core/kernels/data/stats_aggregator_ops.cc
+++ b/tensorflow/core/kernels/data/stats_aggregator_ops.cc
@@ -82,11 +82,12 @@ class StatsAggregatorImpl : public StatsAggregator {
     auto counters_map = get_counters_map();
     if (counters_map->find(name) == counters_map->end()) {
       counters_map->emplace(
-          name, monitoring::Counter<1>::New(
-                    /*streamz name*/ "/tensorflow/" + name,
-                    /*streamz description*/
-                    name + " generated or consumed by the component.",
-                    /*streamz label name*/ "component_descriptor"));
+          name,
+          monitoring::Counter<1>::New(
+              /*streamz name*/ name,
+              /*streamz description*/
+              strings::StrCat(name, " generated or consumed by the component."),
+              /*streamz label name*/ "component_descriptor"));
     }
     counters_map->at(name)->GetCell(label)->IncrementBy(val);
   }
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 4845767405..33f18ae13f 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -59785,6 +59785,14 @@ op {
     name: "stats_aggregator"
     type: DT_RESOURCE
   }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "counter_prefix"
+    type: DT_STRING
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 71f4cc3c4c..889a6a4640 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -185,6 +185,8 @@ REGISTER_OP("ParseExampleDataset")
 REGISTER_OP("SetStatsAggregatorDataset")
     .Input("input_dataset: variant")
     .Input("stats_aggregator: resource")
+    .Input("tag: string")
+    .Input("counter_prefix: string")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 6761fbd16b..19f5a62d45 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
 from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.ops import dataset_ops
@@ -248,6 +249,74 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         sess.run(next_element)
       self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
 
+  def testMultipleDatasetWithTags(self):
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "dataset1"))
+    dataset2 = dataset_ops.Dataset.range(100).apply(
+        stats_ops.latency_stats("record_latency")).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "dataset2"))
+    iterator_0 = dataset.make_initializable_iterator()
+    iterator_1 = dataset2.make_initializable_iterator()
+    next_element = iterator_0.get_next() + iterator_1.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run([iterator_0.initializer, iterator_1.initializer])
+      for i in range(100):
+        self.assertEqual(i * 2, sess.run(next_element))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "dataset1_record_latency", float(i + 1))
+        self._assertSummaryHasCount(
+            sess.run(summary_t), "dataset2_record_latency", float(i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "dataset1_record_latency", 100.0)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "dataset2_record_latency", 100.0)
+
+
+class FeatureStatsDatasetTest(
+    stats_dataset_test_base.StatsDatasetTestBase,
+    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
+
+  def testFeaturesStats(self):
+    num_epochs = 5
+    total_records = num_epochs * self._num_records
+    batch_size = 2
+    stats_aggregator = stats_ops.StatsAggregator()
+    dataset = self.make_batch_feature(
+        filenames=self.test_filenames[0],
+        num_epochs=num_epochs,
+        batch_size=batch_size,
+        shuffle=True,
+        shuffle_seed=5,
+        drop_final_batch=False).apply(
+            stats_ops.set_stats_aggregator(stats_aggregator, "record_stats"))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+    summary_t = stats_aggregator.get_summary()
+
+    with self.test_session() as sess:
+      sess.run(iterator.initializer)
+      for _ in range(total_records // batch_size + 1 if total_records %
+                     batch_size else total_records // batch_size):
+        sess.run(next_element)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "record_stats_features", total_records)
+      self._assertSummaryHasCount(
+          sess.run(summary_t), "record_stats_feature-values", total_records)
+      self._assertSummaryHasSum(
+          sess.run(summary_t), "record_stats_features", total_records * 4)
+      self._assertSummaryHasSum(
+          sess.run(summary_t), "record_stats_feature-values",
+          self._sum_keywords(1) * num_epochs + 3 * total_records)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/stats_ops.py b/tensorflow/python/data/experimental/ops/stats_ops.py
index c918d223e8..54ef6fc3e8 100644
--- a/tensorflow/python/data/experimental/ops/stats_ops.py
+++ b/tensorflow/python/data/experimental/ops/stats_ops.py
@@ -89,15 +89,19 @@ class StatsAggregator(object):
 class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that acts as an identity, and sets given stats_aggregator."""
 
-  def __init__(self, input_dataset, stats_aggregator):
+  def __init__(self, input_dataset, stats_aggregator, tag, prefix):
     super(_SetStatsAggregatorDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._stats_aggregator = stats_aggregator
+    self._tag = tag
+    self._prefix = prefix
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.set_stats_aggregator_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         self._stats_aggregator._resource,  # pylint: disable=protected-access
+        self._tag,
+        self._prefix,
         **dataset_ops.flat_structure(self))
 
   @property
@@ -114,11 +118,15 @@ class _SetStatsAggregatorDataset(dataset_ops.UnaryDataset):
 
 
 @tf_export("data.experimental.set_stats_aggregator")
-def set_stats_aggregator(stats_aggregator):
+def set_stats_aggregator(stats_aggregator, tag="", counter_prefix=""):
   """Set the given `stats_aggregator` for aggregating the input dataset stats.
 
   Args:
-    stats_aggregator: A `tf.data.experimental.StatsAggregator` object.
+    stats_aggregator: A `tf.contrib.data.StatsAggregator` object.
+    tag: (Optional) String, all statistics recorded for the input `dataset`
+      will have given `tag` prepend with the name.
+    counter_prefix: (Optional) String, all statistics recorded as `counters`
+      will have the given `prefix` for the counter. Defaults to "/tesorflow".
 
   Returns:
     A `Dataset` transformation function, which can be passed to
@@ -126,7 +134,8 @@ def set_stats_aggregator(stats_aggregator):
   """
 
   def _apply_fn(dataset):
-    return _SetStatsAggregatorDataset(dataset, stats_aggregator)
+    return _SetStatsAggregatorDataset(dataset, stats_aggregator, tag,
+                                      counter_prefix)
 
   return _apply_fn
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index b14585f8d7..2a1f899dc0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "set_stats_aggregator"
-    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'stats_aggregator\', \'tag\', \'counter_prefix\'], varargs=None, keywords=None, defaults=[\'\', \'\'], "
   }
   member_method {
     name: "shuffle_and_repeat"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index b14585f8d7..2a1f899dc0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -122,7 +122,7 @@ tf_module {
   }
   member_method {
     name: "set_stats_aggregator"
-    argspec: "args=[\'stats_aggregator\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'stats_aggregator\', \'tag\', \'counter_prefix\'], varargs=None, keywords=None, defaults=[\'\', \'\'], "
   }
   member_method {
     name: "shuffle_and_repeat"
-- 
GitLab


From 56cc2e170d88f2a4bbfd5c8ab317eb835159beea Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Wed, 3 Oct 2018 12:25:25 -0700
Subject: [PATCH 0292/1085] Disable XLA for Android builds.

PiperOrigin-RevId: 215605865
---
 tensorflow/tools/ci_build/builds/configured | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured
index 868a3beac5..3eee11fd7e 100755
--- a/tensorflow/tools/ci_build/builds/configured
+++ b/tensorflow/tools/ci_build/builds/configured
@@ -32,6 +32,10 @@ COMMAND=("$@")
 
 export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}"
 export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}"
+# XLA currently does not build under Android, so disable it for now.
+if [[ "${CONTAINER_TYPE}" -eq 'android' ]]; then
+  export TF_ENABLE_XLA=0
+fi
 
 pushd "${CI_TENSORFLOW_SUBMODULE_PATH:-.}"
 yes "" | $PYTHON_BIN_PATH configure.py
-- 
GitLab


From 7566f3d5ad690c71c36e78611b1ae5913ec3e845 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:22:52 -0700
Subject: [PATCH 0293/1085] Fix handling of tuples in CreateCopyWithNewLayout.

If the layout of a single tensor in a tuple is different from its use, then
CreateCopyWithNewLayout will do a deep copy of the entire tuple.  Not only does
this operation create unnecessary copies of elements where the layout is the
same, it will throw an error if the tuple contains elements like token[] that
cannot be copied.  As a result, layout assignment on TPU occassionally causes
mysterious compilation failures for code that runs correctly on CPU and GPU.

PiperOrigin-RevId: 215615731
---
 .../compiler/xla/service/layout_assignment.cc | 28 +++++----
 .../xla/service/layout_assignment_test.cc     | 59 +++++++++++++++++++
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 68a08a0886..cc4a342e9d 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -792,21 +792,27 @@ StatusOr<HloInstruction*> LayoutAssignment::CreateCopyWithNewLayout(
       << " instruction: " << instruction->ToString();
 
   if (ShapeUtil::IsTuple(instruction->shape())) {
-    // Deep-copy tuples.
+    // Copy tuple elements which have differing layouts.
     std::vector<HloInstruction*> element_copies;
     for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape());
          ++i) {
+      const Shape& target_shape =
+          ShapeUtil::GetSubshape(shape_with_layout, {i});
+      const Shape& instr_shape =
+          ShapeUtil::GetSubshape(instruction->shape(), {i});
       HloInstruction* gte = instruction->parent()->AddInstruction(
-          HloInstruction::CreateGetTupleElement(
-              ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction,
-              i));
-      SetupCopiedInstruction(*instruction, gte, {i});
-      // Recurse to copy each elements.
-      TF_ASSIGN_OR_RETURN(
-          HloInstruction * element_copy,
-          CreateCopyWithNewLayout(
-              ShapeUtil::GetSubshape(shape_with_layout, {i}), gte));
-      element_copies.push_back(element_copy);
+          HloInstruction::CreateGetTupleElement(instr_shape, instruction, i));
+
+      if (ShapeUtil::Equal(target_shape, instr_shape)) {
+        // Shapes and layouts are equal, no need to copy.
+        element_copies.push_back(gte);
+      } else {
+        SetupCopiedInstruction(*instruction, gte, {i});
+        // Recurse to copy each element.
+        TF_ASSIGN_OR_RETURN(HloInstruction * element_copy,
+                            CreateCopyWithNewLayout(target_shape, gte));
+        element_copies.push_back(element_copy);
+      }
     }
     // Gather element copies into a tuple with a new Tuple instruction.
     HloInstruction* tuple_copy = instruction->parent()->AddInstruction(
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 15c16d667c..2c549cd872 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -1043,5 +1043,64 @@ TEST_F(LayoutAssignmentTest, PropagatingLayoutFromResultToOperand) {
                                     op::ShapeWithLayout(shape_copy))));
 }
 
+TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) {
+  // The first infeed uses layout {0,1}, while the second uses layout {1,0}.
+  // The mismatch forces a copy of the tuple.  The tuple contains a token, so
+  // layout assignment will fail if it tries to copy the whole tuple.
+  const char* module_str = R"(
+    HloModule TupleCopyOnLayoutMismatch
+
+    condition.1 (tup: (s32[], token[], f32[512,1024]{0,1})) -> pred[] {
+      tup.1 = (s32[], token[], f32[512,1024]{0,1}) parameter(0)
+      counter.1 = s32[] get-tuple-element(tup.1), index=0
+      five = s32[] constant(5)
+      ROOT lt = pred[] less-than(counter.1, five)
+    }
+
+    body.2 (tup: (s32[], token[], f32[512,1024]{0,1})) -> (s32[], token[], f32[512,1024]{0,1}) {
+      tup.2 = (s32[], token[], f32[512,1024]{0,1}) parameter(0)
+      counter.2 = s32[] get-tuple-element(tup.2), index=0
+      tok.2 = token[] get-tuple-element(tup.2), index=1
+
+      ifeed.2 = (f32[512,1024]{1,0}, token[]) infeed(tok.2)
+      next_tok = token[] get-tuple-element(ifeed.2), index=1
+      next_buf = f32[512,1024]{1,0} get-tuple-element(ifeed.2), index=0
+
+      one = s32[] constant(1)
+      next_counter = s32[] add(counter.2, one)
+      ROOT tup = (s32[], token[], f32[512,1024]{0,1}) tuple(next_counter, next_tok, next_buf)
+    }
+
+    ENTRY main () -> f32[512,1024]{0,1} {
+      start_tok = token[] after-all()
+
+      ifeed.3 = (f32[512,1024]{0,1}, token[]) infeed(start_tok)
+      itok = token[] get-tuple-element(ifeed.3), index=1
+      ibuf = f32[512,1024]{0,1} get-tuple-element(ifeed.3), index=0
+
+      zero = s32[] constant(0)
+      itup = (s32[], token[], f32[512,1024]{0,1}) tuple(zero, itok, ibuf)
+
+      loop = (s32[], token[], f32[512,1024]{0,1}) while(itup), condition=condition.1, body=body.2
+      ROOT result = f32[512,1024]{0,1} get-tuple-element(loop), index=2
+    }
+  )";
+
+  ParseAndVerifyModule(module_str);
+  ComputationLayout computation_layout(
+      module().entry_computation()->ComputeProgramShape());
+
+  // Sanity check to verify that there's a layout mismatch.
+  EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
+
+  AssignLayouts(&module(), &computation_layout);
+
+  // Make sure that layout assignment did not magically eliminate the mismatch,
+  // in which case the test didn't prove anything.
+  EXPECT_THAT(LayoutOf(&module(), "ibuf"), ElementsAre(0, 1));
+  EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From c2c8cfe22492cf7fab804d32283b623632270035 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:25:22 -0700
Subject: [PATCH 0294/1085] Add the option of merging bidirectional RNN and
 LSTM outputs into a single output tensor.

This is useful if the output of both directions will be passed to the next layer as a single output, as it avoids adding a concatenation op, which can be expensive on mobile devices where memory movement is relatively expensive.

PiperOrigin-RevId: 215616140
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |  16 ++
 .../contrib/lite/c/builtin_op_data_test.cc    |   2 +
 .../lite/core/api/flatbuffer_conversions.cc   |  34 ++-
 .../kernels/bidirectional_sequence_lstm.cc    | 116 +++++----
 .../bidirectional_sequence_lstm_test.cc       | 186 +++++++++++++-
 .../kernels/bidirectional_sequence_rnn.cc     |  85 +++---
 .../bidirectional_sequence_rnn_test.cc        |  56 +++-
 tensorflow/contrib/lite/schema/schema.fbs     |  12 +
 .../contrib/lite/schema/schema_generated.h    | 243 +++++++++++++++++-
 9 files changed, 640 insertions(+), 110 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index be9d551ee4..44daf7adaa 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -99,6 +99,12 @@ typedef struct {
   TfLiteFusedActivation activation;
 } TfLiteSequenceRNNParams;
 
+typedef struct {
+  bool time_major;
+  TfLiteFusedActivation activation;
+  bool merge_outputs;
+} TfLiteBidirectionalSequenceRNNParams;
+
 typedef enum {
   kTfLiteFullyConnectedWeightsFormatDefault = 0,
   kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
@@ -180,6 +186,16 @@ typedef struct {
   TfLiteLSTMKernelType kernel_type;
 } TfLiteLSTMParams;
 
+typedef struct {
+  // Parameters for the LSTM kernel.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+
+  // If true, store the outputs of both directions in the first output.
+  bool merge_outputs;
+} TfLiteBidirectionalSequenceLSTMParams;
+
 typedef struct {
   bool align_corners;
 } TfLiteResizeBilinearParams;
diff --git a/tensorflow/contrib/lite/c/builtin_op_data_test.cc b/tensorflow/contrib/lite/c/builtin_op_data_test.cc
index 4d0ba75e68..ba458b4252 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data_test.cc
+++ b/tensorflow/contrib/lite/c/builtin_op_data_test.cc
@@ -73,6 +73,8 @@ TEST(IntArray, CanCompileStructs) {
   TfLiteFakeQuantParams fake_quant_params;
   TfLitePackParams pack_params;
   TfLiteOneHotParams one_hot_params;
+  TfLiteBidirectionalSequenceRNNParams bidi_sequence_rnn_params;
+  TfLiteBidirectionalSequenceLSTMParams bidi_sequence_lstm_params;
 }
 
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index e6900e0950..eac7db9a88 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -224,10 +224,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
-      TfLiteSequenceRNNParams* params =
-          allocator->AllocatePOD<TfLiteSequenceRNNParams>();
+      auto params = allocator->AllocatePOD<TfLiteSequenceRNNParams>();
       if (auto* sequence_rnn_params =
               op->builtin_options_as_SequenceRNNOptions()) {
         params->activation =
@@ -237,6 +235,19 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: {
+      auto params =
+          allocator->AllocatePOD<TfLiteBidirectionalSequenceRNNParams>();
+      if (auto* bidi_sequence_rnn_params =
+              op->builtin_options_as_BidirectionalSequenceRNNOptions()) {
+        params->activation = parse_activation(
+            bidi_sequence_rnn_params->fused_activation_function());
+        params->time_major = bidi_sequence_rnn_params->time_major();
+        params->merge_outputs = bidi_sequence_rnn_params->merge_outputs();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RNN: {
       TfLiteRNNParams* params = allocator->AllocatePOD<TfLiteRNNParams>();
       if (auto* rnn_params = op->builtin_options_as_RNNOptions()) {
@@ -360,10 +371,9 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_LSTM: {
-      TfLiteLSTMParams* params = allocator->AllocatePOD<TfLiteLSTMParams>();
+      auto params = allocator->AllocatePOD<TfLiteLSTMParams>();
       if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
         params->activation =
             parse_activation(lstm_params->fused_activation_function());
@@ -381,6 +391,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
+      auto params =
+          allocator->AllocatePOD<TfLiteBidirectionalSequenceLSTMParams>();
+      if (auto* bidi_lstm_params =
+              op->builtin_options_as_BidirectionalSequenceLSTMOptions()) {
+        params->activation =
+            parse_activation(bidi_lstm_params->fused_activation_function());
+        params->cell_clip = bidi_lstm_params->cell_clip();
+        params->proj_clip = bidi_lstm_params->proj_clip();
+        params->merge_outputs = bidi_lstm_params->merge_outputs();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
     case BuiltinOperator_RESIZE_BILINEAR: {
       auto* params = allocator->AllocatePOD<TfLiteResizeBilinearParams>();
       if (auto* schema_params =
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 66b947771c..0532528f52 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -119,7 +119,7 @@ constexpr int kBwAuxInputToOutputWeightsTensor = 47;  // Optional
 
 // Output tensors.
 constexpr int kFwOutputTensor = 0;
-constexpr int kBwOutputTensor = 1;
+constexpr int kBwOutputTensor = 1;  // Ignored if merge_outputs is set.
 
 // Temporary tensors.
 enum TemporaryTensor {
@@ -162,7 +162,8 @@ TfLiteStatus CheckLstmTensorDimensions(
     int input_gate_bias_tensor, int forget_gate_bias_tensor,
     int cell_gate_bias_tensor, int output_gate_bias_tensor,
     int projection_weights_tensor, int projection_bias_tensor) {
-  const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Making sure clipping parameters have valid values.
   // == 0 means no clipping
@@ -347,10 +348,13 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 // tensors. Also check that the size of the input tensors match each other.
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   int* scratch_tensor_index = reinterpret_cast<int*>(node->user_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Check we have all the inputs and outputs we need.
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 48);
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size,
+                    params->merge_outputs ? 1 : 2);
 
   // Inferring batch size, number of outputs and sequence length and
   // number of cells from the input tensors.
@@ -368,6 +372,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, fw_input_to_output_weights->dims->data[1],
                     n_input);
 
+  const TfLiteTensor* bw_input_to_output_weights =
+      GetInput(context, node, kBwInputToOutputWeightsTensor);
+  const int n_bw_cell = bw_input_to_output_weights->dims->data[0];
+  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1],
+                    n_input);
+
   const TfLiteTensor* fw_recurrent_to_output_weights =
       GetInput(context, node, kFwRecurrentToOutputWeightsTensor);
   TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->size, 2);
@@ -375,6 +386,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                     n_fw_cell);
   const int n_fw_output = fw_recurrent_to_output_weights->dims->data[1];
 
+  const TfLiteTensor* bw_recurrent_to_output_weights =
+      GetInput(context, node, kBwRecurrentToOutputWeightsTensor);
+  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0],
+                    n_bw_cell);
+  const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1];
+
   // Check that input tensor dimensions matches with each other.
   TF_LITE_ENSURE_OK(
       context, CheckInputTensorDimensions(context, node, n_input, n_fw_output,
@@ -440,7 +458,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteIntArray* fw_output_size = TfLiteIntArrayCreate(3);
   fw_output_size->data[0] = max_time;
   fw_output_size->data[1] = n_batch;
-  fw_output_size->data[2] = n_fw_output;
+  fw_output_size->data[2] =
+      params->merge_outputs ? n_bw_output + n_fw_output : n_fw_output;
   TF_LITE_ENSURE_OK(context,
                     context->ResizeTensor(context, fw_output, fw_output_size));
 
@@ -479,39 +498,28 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, fw_scratch_buffer,
                                                    fw_scratch_buffer_size));
   // Same for the backward cell.
-  const TfLiteTensor* bw_input_to_output_weights =
-      GetInput(context, node, kBwInputToOutputWeightsTensor);
-  const int n_bw_cell = bw_input_to_output_weights->dims->data[0];
-  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1],
-                    n_input);
-
-  const TfLiteTensor* bw_recurrent_to_output_weights =
-      GetInput(context, node, kBwRecurrentToOutputWeightsTensor);
-  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0],
-                    n_bw_cell);
-  const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1];
 
   // Check that input tensor dimensions matches with each other.
   TF_LITE_ENSURE_OK(
       context, CheckInputTensorDimensions(context, node, n_input, n_bw_output,
                                           n_bw_cell));
 
-  // Get the pointer to output, activation_state and cell_state buffer tensors.
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  // Get the pointer to activation_state and cell_state buffer tensors.
   TfLiteTensor* bw_activation_state =
       GetVariableInput(context, node, kBwInputActivationStateTensor);
   TfLiteTensor* bw_cell_state =
       GetVariableInput(context, node, kBwInputCellStateTensor);
 
   // Resize the output tensors.
-  TfLiteIntArray* bw_output_size = TfLiteIntArrayCreate(3);
-  bw_output_size->data[0] = max_time;
-  bw_output_size->data[1] = n_batch;
-  bw_output_size->data[2] = n_bw_output;
-  TF_LITE_ENSURE_OK(context,
-                    context->ResizeTensor(context, bw_output, bw_output_size));
+  if (!params->merge_outputs) {
+    TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+    TfLiteIntArray* bw_output_size = TfLiteIntArrayCreate(3);
+    bw_output_size->data[0] = max_time;
+    bw_output_size->data[1] = n_batch;
+    bw_output_size->data[2] = n_bw_output;
+    TF_LITE_ENSURE_OK(
+        context, context->ResizeTensor(context, bw_output, bw_output_size));
+  }
 
   // Check the shape of input state tensors.
   // These tensor may be 1D or 2D. It's fine as long as the total size is
@@ -705,7 +713,7 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
     TfLiteTensor* cell_state, TfLiteTensor* output) {
   const int max_time = input->dims->data[0];
@@ -771,12 +779,13 @@ TfLiteStatus EvalFloat(
 
   // Loop through the sequence.
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * n_output;
+  const int output_step = n_batch * output->dims->data[2];
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr_time = output->data.f + t_rel * output_step;
+    float* output_ptr_time =
+        output->data.f + t_rel * output_step + output_offset;
 
     kernel_utils::LstmStepWithAuxInput(
         input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
@@ -816,7 +825,7 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
     TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
     TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
@@ -972,12 +981,12 @@ TfLiteStatus EvalHybrid(
 
   // Feed the sequence into the LSTM step-by-step.
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * n_output;
+  const int output_step = n_batch * output->dims->data[2];
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr = output->data.f + t_rel * output_step;
+    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
 
     kernel_utils::LstmStepWithAuxInput(
         input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
@@ -1011,7 +1020,8 @@ TfLiteStatus EvalHybrid(
 
 // The LSTM Op engine.
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
+      node->builtin_data);
 
   // Input tensor.
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
@@ -1107,7 +1117,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetVariableInput(context, node, kBwInputActivationStateTensor);
   TfLiteTensor* bw_cell_state =
       GetVariableInput(context, node, kBwInputCellStateTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  TfLiteTensor* bw_output = params->merge_outputs
+                                ? nullptr
+                                : GetOutput(context, node, kBwOutputTensor);
 
   // Temporary tensors.
   TfLiteTensor* fw_scratch_buffer =
@@ -1135,6 +1147,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bw_aux_input_to_output_weights =
       GetOptionalInputTensor(context, node, kBwAuxInputToOutputWeightsTensor);
 
+  // Populate a TfLiteLSTMParams struct for the evaluation functions.
+  TfLiteLSTMParams lstm_params = {params->activation, params->cell_clip,
+                                  params->proj_clip, kTfLiteLSTMFullKernel};
+
+  const int bw_output_offset =
+      params->merge_outputs ? fw_recurrent_to_output_weights->dims->data[1] : 0;
+  const auto actual_bw_output = params->merge_outputs ? fw_output : bw_output;
+
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
       TfLiteStatus fw_pass_status = EvalFloat(
@@ -1147,9 +1167,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, params,
-          /*forward_sequence=*/true, fw_scratch_buffer, fw_activation_state,
-          fw_cell_state, fw_output);
+          fw_projection_weights, fw_projection_bias, &lstm_params,
+          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
+          fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
       TfLiteStatus bw_pass_status = EvalFloat(
@@ -1162,9 +1182,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_aux_input_to_forget_weights, bw_aux_input_to_cell_weights,
           bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, params,
-          /*forward_sequence=*/false, bw_scratch_buffer, bw_activation_state,
-          bw_cell_state, bw_output);
+          bw_projection_weights, bw_projection_bias, &lstm_params,
+          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
+          bw_activation_state, bw_cell_state, actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
@@ -1198,10 +1218,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, params,
-          /*forward_sequence=*/true, fw_scratch_buffer, scaling_factors,
-          prod_scaling_factors, recovered_cell_weights, input_quantized,
-          aux_input_quantized, fw_activation_state_quantized,
+          fw_projection_weights, fw_projection_bias, &lstm_params,
+          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
+          scaling_factors, prod_scaling_factors, recovered_cell_weights,
+          input_quantized, aux_input_quantized, fw_activation_state_quantized,
           fw_cell_state_quantized, fw_activation_state, fw_cell_state,
           fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
@@ -1216,12 +1236,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
           fw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, params,
-          /*forward_sequence=*/false, bw_scratch_buffer, scaling_factors,
-          prod_scaling_factors, recovered_cell_weights, input_quantized,
-          aux_input_quantized, bw_activation_state_quantized,
+          bw_projection_weights, bw_projection_bias, &lstm_params,
+          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
+          scaling_factors, prod_scaling_factors, recovered_cell_weights,
+          input_quantized, aux_input_quantized, bw_activation_state_quantized,
           bw_cell_state_quantized, bw_activation_state, bw_cell_state,
-          bw_output);
+          actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
index 74ba8021c2..9cc04907e1 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -35,8 +35,8 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
   BidirectionalLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
                            int sequence_length, bool use_cifg,
                            bool use_peephole, bool use_projection_weights,
-                           bool use_projection_bias, float cell_clip,
-                           float proj_clip,
+                           bool use_projection_bias, bool merge_outputs,
+                           float cell_clip, float proj_clip,
                            const std::vector<std::vector<int>>& input_shapes)
       : n_batch_(n_batch),
         n_input_(n_input),
@@ -175,7 +175,9 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
 
     fw_output_ = AddOutput(TensorType_FLOAT32);
 
-    bw_output_ = AddOutput(TensorType_FLOAT32);
+    if (!merge_outputs) {
+      bw_output_ = AddOutput(TensorType_FLOAT32);
+    }
 
     aux_input_ = AddNullInput();
     fw_aux_input_to_input_weights_ = AddNullInput();
@@ -188,9 +190,10 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     bw_aux_input_to_output_weights_ = AddNullInput();
 
     SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
-                 BuiltinOptions_LSTMOptions,
-                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH,
-                                   cell_clip, proj_clip)
+                 BuiltinOptions_BidirectionalSequenceLSTMOptions,
+                 CreateBidirectionalSequenceLSTMOptions(
+                     builder_, ActivationFunctionType_TANH, cell_clip,
+                     proj_clip, merge_outputs)
                      .Union());
     BuildInterpreter(input_shapes);
   }
@@ -380,7 +383,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -526,6 +530,162 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
               ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
+// Same as the previous test, yet with a single merged output tensor.
+TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
+  const int n_batch = 1;
+  const int n_input = 2;
+  // n_cell and n_output have the same size when there is no projection.
+  const int n_cell = 4;
+  const int n_output = 4;
+  const int sequence_length = 3;
+
+  BidirectionalLSTMOpModel lstm(
+      n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
+      /*use_peephole=*/false, /*use_projection_weights=*/false,
+      /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
+      {
+          {sequence_length, n_batch, n_input},  // input tensor
+
+          // Forward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          // Backward cell
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, sequence_length, 0},  // aux_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_forget tensor
+          {n_cell, 0},                    // aux_fw_input_to_cell tensor
+          {n_cell, 0},                    // aux_fw_input_to_output tensor
+          {n_cell, 0},                    // aux_bw_input_to_input tensor
+          {n_cell, 0},                    // aux_bw_input_to_forget tensor
+          {n_cell, 0},                    // aux_bw_input_to_cell tensor
+          {n_cell, 0},                    // aux_bw_input_to_output tensor
+      });
+
+  lstm.SetInputToInputWeights({-0.45018822, -0.02338299, -0.0870589,
+                               -0.34550029, 0.04266912, -0.15680569,
+                               -0.34856534, 0.43890524});
+
+  lstm.SetInputToCellWeights({-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+                              -0.20583314, 0.44344562, 0.22077113,
+                              -0.29909778});
+
+  lstm.SetInputToForgetWeights({0.09701663, 0.20334584, -0.50592935,
+                                -0.31343272, -0.40032279, 0.44781327,
+                                0.01387155, -0.35593212});
+
+  lstm.SetInputToOutputWeights({-0.25065863, -0.28290087, 0.04613829,
+                                0.40525138, 0.44272184, 0.03897077, -0.1556896,
+                                0.19487578});
+
+  lstm.SetInputGateBias({0., 0., 0., 0.});
+
+  lstm.SetCellBias({0., 0., 0., 0.});
+
+  lstm.SetForgetGateBias({1., 1., 1., 1.});
+
+  lstm.SetOutputGateBias({0., 0., 0., 0.});
+
+  lstm.SetRecurrentToInputWeights(
+      {-0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+       -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322,
+       -0.12528998, 0.24077177, -0.51332325, -0.33502164, 0.10629296});
+
+  lstm.SetRecurrentToCellWeights(
+      {-0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+       -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+       -0.46367589, 0.26016325, -0.03894562, -0.16368064});
+
+  lstm.SetRecurrentToForgetWeights(
+      {-0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+       -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+       0.28053468, 0.01560611, -0.20127171, -0.01140004});
+
+  lstm.SetRecurrentToOutputWeights(
+      {0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+       0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+       -0.51818722, -0.15390486, 0.0468148, 0.39922136});
+
+  // Input should have n_input * sequence_length many values.
+  static float lstm_input[] = {2., 3., 3., 4., 1., 1.};
+  static float lstm_fw_golden_output[] = {
+      -0.02973187, 0.1229473,  0.20885126, -0.15358765,
+      -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+      -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+  static float lstm_bw_golden_output[] = {
+      -0.0806187, 0.139077, 0.400476,   -0.197842, -0.0332076, 0.123838,
+      0.309777,   -0.17621, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
+
+  float* batch0_start = lstm_input;
+  float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length();
+
+  lstm.SetInput(0, batch0_start, batch0_end);
+
+  lstm.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int k = 0; k < lstm.sequence_length(); k++) {
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_fw_golden_output + k * lstm.num_fw_outputs(),
+        lstm_fw_golden_output + (k + 1) * lstm.num_fw_outputs());
+    merged_expected.insert(
+        merged_expected.end(),
+        lstm_bw_golden_output + k * lstm.num_bw_outputs(),
+        lstm_bw_golden_output + (k + 1) * lstm.num_bw_outputs());
+  }
+  EXPECT_THAT(lstm.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   const int n_batch = 1;
   const int n_input = 2;
@@ -537,7 +697,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -696,7 +857,8 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -845,7 +1007,8 @@ TEST(LSTMOpTest,
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -994,7 +1157,8 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/true, /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index 2f896c5289..9f62ac3f2c 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -47,7 +47,7 @@ constexpr int kFwAuxWeightsTensor = 10;  // Optional.
 constexpr int kBwAuxWeightsTensor = 11;  // Optional.
 // Output tensors.
 constexpr int kFwOutputTensor = 0;
-constexpr int kBwOutputTensor = 1;
+constexpr int kBwOutputTensor = 1;  // Only if merge_outputs is false.
 
 // Temporary tensors.
 enum TemporaryTensor {
@@ -70,9 +70,13 @@ void Free(TfLiteContext* context, void* buffer) {
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceRNNParams*>(
+      node->builtin_data);
+
   // Check we have all the inputs and outputs we need.
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 12);
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size,
+                    params->merge_outputs ? 1 : 2);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* fw_input_weights =
@@ -142,9 +146,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       bw_aux_input_weights->dims->data[1]);
   }
 
-  TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
-
   const bool is_hybrid_op =
       (fw_input_weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
 
@@ -233,18 +234,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   // Resize outputs.
+  TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
   TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3);
   fw_output_size_array->data[0] = batch_size;
   fw_output_size_array->data[1] = max_time;
-  fw_output_size_array->data[2] = fw_num_units;
+  fw_output_size_array->data[2] =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   TF_LITE_ENSURE_OK(
       context, context->ResizeTensor(context, fw_output, fw_output_size_array));
-  TfLiteIntArray* bw_output_size_array = TfLiteIntArrayCreate(3);
-  bw_output_size_array->data[0] = batch_size;
-  bw_output_size_array->data[1] = max_time;
-  bw_output_size_array->data[2] = bw_num_units;
-  TF_LITE_ENSURE_OK(
-      context, context->ResizeTensor(context, bw_output, bw_output_size_array));
+  if (!params->merge_outputs) {
+    TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+    TfLiteIntArray* bw_output_size_array = TfLiteIntArrayCreate(3);
+    bw_output_size_array->data[0] = batch_size;
+    bw_output_size_array->data[1] = max_time;
+    bw_output_size_array->data[2] = bw_num_units;
+    TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, bw_output,
+                                                     bw_output_size_array));
+  }
 
   return kTfLiteOk;
 }
@@ -256,9 +262,9 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* bw_recurrent_weights, const TfLiteTensor* bw_bias,
     const TfLiteTensor* aux_input, const TfLiteTensor* fw_aux_input_weights,
     const TfLiteTensor* bw_aux_input_weights,
-    const TfLiteSequenceRNNParams* params, TfLiteTensor* fw_hidden_state,
-    TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state,
-    TfLiteTensor* bw_output) {
+    const TfLiteBidirectionalSequenceRNNParams* params,
+    TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
+    TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int input_size = input->dims->data[2];
@@ -281,10 +287,15 @@ TfLiteStatus EvalFloat(
                                               ? bw_aux_input_weights->data.f
                                               : nullptr;
 
+  const int fw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
+  const int bw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   for (int b = 0; b < batch_size; b++) {
     // Forward cell.
     float* fw_hidden_state_ptr_batch =
         fw_hidden_state->data.f + b * fw_num_units;
+    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -292,8 +303,7 @@ TfLiteStatus EvalFloat(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          fw_output->data.f + b * fw_num_units * max_time + s * fw_num_units;
+      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
@@ -304,6 +314,10 @@ TfLiteStatus EvalFloat(
     // Backward cell.
     float* bw_hidden_state_ptr_batch =
         bw_hidden_state->data.f + b * bw_num_units;
+    float* bw_output_offset =
+        params->merge_outputs
+            ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
+            : bw_output->data.f + b * bw_output_step * max_time;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -311,8 +325,7 @@ TfLiteStatus EvalFloat(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          bw_output->data.f + b * bw_num_units * max_time + s * bw_num_units;
+      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
@@ -331,11 +344,12 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* bw_recurrent_weights, const TfLiteTensor* bw_bias,
     const TfLiteTensor* aux_input, const TfLiteTensor* aux_fw_input_weights,
     const TfLiteTensor* aux_bw_input_weights,
-    const TfLiteSequenceRNNParams* params, TfLiteTensor* scaling_factors,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* fw_hidden_state_quantized, TfLiteTensor* fw_hidden_state,
-    TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state_quantized,
-    TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
+    const TfLiteBidirectionalSequenceRNNParams* params,
+    TfLiteTensor* scaling_factors, TfLiteTensor* input_quantized,
+    TfLiteTensor* aux_input_quantized, TfLiteTensor* fw_hidden_state_quantized,
+    TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
+    TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state,
+    TfLiteTensor* bw_output) {
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int input_size = input->dims->data[2];
@@ -384,10 +398,15 @@ TfLiteStatus EvalHybrid(
       reinterpret_cast<int8_t*>(bw_hidden_state_quantized->data.uint8);
   float* scaling_factors_ptr = scaling_factors->data.f;
 
+  const int fw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
+  const int bw_output_step =
+      params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   for (int b = 0; b < batch_size; b++) {
     // Forward cell.
     float* fw_hidden_state_ptr_batch =
         fw_hidden_state->data.f + b * fw_num_units;
+    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -395,8 +414,7 @@ TfLiteStatus EvalHybrid(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          fw_output->data.f + b * fw_num_units * max_time + s * fw_num_units;
+      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
@@ -411,6 +429,10 @@ TfLiteStatus EvalHybrid(
     // Backward cell.
     float* bw_hidden_state_ptr_batch =
         bw_hidden_state->data.f + b * bw_num_units;
+    float* bw_output_offset =
+        params->merge_outputs
+            ? fw_output->data.f + b * bw_output_step * max_time
+            : bw_output->data.f + b * bw_output_step * max_time;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
           input->data.f + b * input_size * max_time + s * input_size;
@@ -418,8 +440,7 @@ TfLiteStatus EvalHybrid(
           (aux_input != nullptr)
               ? aux_input->data.f + b * input_size * max_time + s * input_size
               : nullptr;
-      float* output_ptr_batch =
-          bw_output->data.f + b * bw_num_units * max_time + s * bw_num_units;
+      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
@@ -436,8 +457,8 @@ TfLiteStatus EvalHybrid(
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params =
-      reinterpret_cast<TfLiteSequenceRNNParams*>(node->builtin_data);
+  const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceRNNParams*>(
+      node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* fw_input_weights =
@@ -465,7 +486,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetVariableInput(context, node, kBwHiddenStateTensor);
 
   TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
-  TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
+  TfLiteTensor* bw_output = params->merge_outputs
+                                ? nullptr
+                                : GetOutput(context, node, kBwOutputTensor);
 
   switch (fw_input_weights->type) {
     case kTfLiteFloat32:
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
index 3e34ba6196..f555c472f5 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -654,7 +654,7 @@ const std::initializer_list<float> recurrent_weights = {
 class BidirectionalRNNOpModel : public SingleOpModel {
  public:
   BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units,
-                          int bw_units, int input_size)
+                          int bw_units, int input_size, bool merge_outputs)
       : batches_(batches),
         sequence_len_(sequence_len),
         fw_units_(fw_units),
@@ -675,12 +675,15 @@ class BidirectionalRNNOpModel : public SingleOpModel {
     aux_bw_weights_ = AddNullInput();
 
     fw_output_ = AddOutput(TensorType_FLOAT32);
-    bw_output_ = AddOutput(TensorType_FLOAT32);
+    if (!merge_outputs) {
+      bw_output_ = AddOutput(TensorType_FLOAT32);
+    }
 
     SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-                 BuiltinOptions_SequenceRNNOptions,
-                 CreateSequenceRNNOptions(builder_, /*time_major=*/false,
-                                          ActivationFunctionType_RELU)
+                 BuiltinOptions_BidirectionalSequenceRNNOptions,
+                 CreateBidirectionalSequenceRNNOptions(
+                     builder_, /*time_major=*/false,
+                     ActivationFunctionType_RELU, merge_outputs)
                      .Union());
     BuildInterpreter({
         {batches_, sequence_len_, input_size_},  // input
@@ -767,7 +770,7 @@ class BidirectionalRNNOpModel : public SingleOpModel {
 TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -800,12 +803,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
+// Same as the previous test, yet with merged outputs.
+TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
+  BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                              /*fw_units=*/16, /*bw_units=*/16,
+                              /*input_size=*/8, /*merge_outputs=*/true);
+  rnn.SetFwWeights(weights);
+  rnn.SetBwWeights(weights);
+  rnn.SetFwBias(biases);
+  rnn.SetBwBias(biases);
+  rnn.SetFwRecurrentWeights(recurrent_weights);
+  rnn.SetBwRecurrentWeights(recurrent_weights);
+
+  const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  float* batch_start = rnn_input;
+  float* batch_end = batch_start + input_sequence_size;
+  rnn.SetInput(0, batch_start, batch_end);
+  rnn.SetInput(input_sequence_size, batch_start, batch_end);
+
+  rnn.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int bid = 0; bid < rnn.num_batches(); bid++) {
+    for (int step = 0; step < rnn.sequence_len(); step++) {
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_fw_output + rnn.num_fw_units() * step,
+          rnn_golden_fw_output + rnn.num_fw_units() * (step + 1));
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_bw_output + rnn.num_bw_units() * step,
+          rnn_golden_bw_output + rnn.num_bw_units() * (step + 1));
+    }
+  }
+  EXPECT_THAT(rnn.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 // Check that if the input sequence is reversed the outputs are the same just
 // forward and backward are swapped (and reversed).
 TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -851,7 +891,7 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
 TEST(BidirectionalRNNOpTest, EndToEndTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8);
+                              /*input_size=*/8, /*merge_outputs=*/false);
   const int output_size = 4;
   float dnn_weights[] = {
       -0.5782342,  -0.052212059, 0.73036242,  -0.81216097, -0.80088139,
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 3da3188c3a..ff8430827c 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -248,6 +248,8 @@ union BuiltinOptions {
   SquareOptions,
   ZerosLikeOptions,
   FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -327,6 +329,7 @@ table SequenceRNNOptions {
 table BidirectionalSequenceRNNOptions {
   time_major:bool;
   fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
 }
 
 enum FullyConnectedOptionsWeightsFormat: byte {
@@ -391,6 +394,15 @@ table LSTMOptions {
   kernel_type: LSTMKernelType = FULL;
 }
 
+table BidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+}
+
 table ResizeBilinearOptions {
   new_height: int (deprecated);
   new_width: int (deprecated);
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 23ac8484de..f3cb113c9c 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT;
 struct LSTMOptions;
 struct LSTMOptionsT;
 
+struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsT;
+
 struct ResizeBilinearOptions;
 struct ResizeBilinearOptionsT;
 
@@ -676,11 +679,13 @@ enum BuiltinOptions {
   BuiltinOptions_SquareOptions = 66,
   BuiltinOptions_ZerosLikeOptions = 67,
   BuiltinOptions_FillOptions = 68,
+  BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
+  BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_FillOptions
+  BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -750,7 +755,9 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[69] {
     BuiltinOptions_FloorDivOptions,
     BuiltinOptions_SquareOptions,
     BuiltinOptions_ZerosLikeOptions,
-    BuiltinOptions_FillOptions
+    BuiltinOptions_FillOptions,
+    BuiltinOptions_BidirectionalSequenceLSTMOptions,
+    BuiltinOptions_BidirectionalSequenceRNNOptions
   };
   return values;
 }
@@ -826,6 +833,8 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "SquareOptions",
     "ZerosLikeOptions",
     "FillOptions",
+    "BidirectionalSequenceLSTMOptions",
+    "BidirectionalSequenceRNNOptions",
     nullptr
   };
   return names;
@@ -1112,6 +1121,14 @@ template<> struct BuiltinOptionsTraits<FillOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
 };
 
+template<> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
+
+template<> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1687,6 +1704,22 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_FillOptions ?
       reinterpret_cast<const FillOptionsT *>(value) : nullptr;
   }
+  BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
+  const BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -2834,9 +2867,11 @@ struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable {
   typedef BidirectionalSequenceRNNOptions TableType;
   bool time_major;
   ActivationFunctionType fused_activation_function;
+  bool merge_outputs;
   BidirectionalSequenceRNNOptionsT()
       : time_major(false),
-        fused_activation_function(ActivationFunctionType_NONE) {
+        fused_activation_function(ActivationFunctionType_NONE),
+        merge_outputs(false) {
   }
 };
 
@@ -2844,7 +2879,8 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
   typedef BidirectionalSequenceRNNOptionsT NativeTableType;
   enum {
     VT_TIME_MAJOR = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_MERGE_OUTPUTS = 8
   };
   bool time_major() const {
     return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
@@ -2852,10 +2888,14 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
   ActivationFunctionType fused_activation_function() const {
     return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
            verifier.EndTable();
   }
   BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2872,6 +2912,9 @@ struct BidirectionalSequenceRNNOptionsBuilder {
   void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
     fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
   }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
   explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -2887,8 +2930,10 @@ struct BidirectionalSequenceRNNOptionsBuilder {
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
     bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    bool merge_outputs = false) {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_merge_outputs(merge_outputs);
   builder_.add_fused_activation_function(fused_activation_function);
   builder_.add_time_major(time_major);
   return builder_.Finish();
@@ -3424,6 +3469,96 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
 
 flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef BidirectionalSequenceLSTMOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  float cell_clip;
+  float proj_clip;
+  bool merge_outputs;
+  BidirectionalSequenceLSTMOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE),
+        cell_clip(0.0f),
+        proj_clip(0.0f),
+        merge_outputs(false) {
+  }
+};
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BidirectionalSequenceLSTMOptionsT NativeTableType;
+  enum {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_MERGE_OUTPUTS = 10
+  };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+           verifier.EndTable();
+  }
+  BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
+  explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  BidirectionalSequenceLSTMOptionsBuilder &operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
+  flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f,
+    bool merge_outputs = false) {
+  BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
   typedef ResizeBilinearOptions TableType;
   bool align_corners;
@@ -6347,6 +6482,12 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const FillOptions *builtin_options_as_FillOptions() const {
     return builtin_options_type() == BuiltinOptions_FillOptions ? static_cast<const FillOptions *>(builtin_options()) : nullptr;
   }
+  const BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
+  const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
+    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6650,6 +6791,14 @@ template<> inline const FillOptions *Operator::builtin_options_as<FillOptions>()
   return builtin_options_as_FillOptions();
 }
 
+template<> inline const BidirectionalSequenceLSTMOptions *Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const {
+  return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -7407,6 +7556,7 @@ inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOp
   (void)_resolver;
   { auto _e = time_major(); _o->time_major = _e; };
   { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; };
 }
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -7419,10 +7569,12 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
   struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
   auto _time_major = _o->time_major;
   auto _fused_activation_function = _o->fused_activation_function;
+  auto _merge_outputs = _o->merge_outputs;
   return tflite::CreateBidirectionalSequenceRNNOptions(
       _fbb,
       _time_major,
-      _fused_activation_function);
+      _fused_activation_function,
+      _merge_outputs);
 }
 
 inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
@@ -7657,6 +7809,41 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBuffe
       _kernel_type);
 }
 
+inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new BidirectionalSequenceLSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = cell_clip(); _o->cell_clip = _e; };
+  { auto _e = proj_clip(); _o->proj_clip = _e; };
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; };
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _merge_outputs = _o->merge_outputs;
+  return tflite::CreateBidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip,
+      _merge_outputs);
+}
+
 inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new ResizeBilinearOptionsT();
   UnPackTo(_o, _resolver);
@@ -9425,6 +9612,14 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const FillOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9715,6 +9910,14 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const FillOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -9993,6 +10196,14 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const FillOptionsT *>(value);
       return CreateFillOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value);
+      return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10271,6 +10482,14 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new FillOptionsT(*reinterpret_cast<FillOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      value = new BidirectionalSequenceLSTMOptionsT(*reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10618,6 +10837,16 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<BidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 261b6958fb95db18cd28c1aba140a627deb790a1 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Wed, 3 Oct 2018 13:25:23 -0700
Subject: [PATCH 0295/1085] Enable collective graph key test for GPU builds.

In the process, properly place nodes on devices in the collective graph key
test.

PiperOrigin-RevId: 215616146
---
 .../common_runtime/direct_session_test.cc     | 58 +++++++++----------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index e3e431f800..a6440c55ad 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -2262,8 +2262,8 @@ class DirectSessionCollectiveTest : public ::testing::Test {
     TF_RETURN_IF_ERROR(session->Create(g));
     std::vector<Tensor> outputs;
     TF_RETURN_IF_ERROR(
-        session->Run({{"input1:0", t1}, {"input2:0", t2}}, {},
-                     {"collective_call1:0", "collective_call2:0"}, &outputs));
+        session->Run({{"input0:0", t1}, {"input1:0", t2}}, {},
+                     {"collective_call0:0", "collective_call1:0"}, &outputs));
     DirectSession* direct_session = static_cast<DirectSession*>(session.get());
     {
       mutex_lock l(direct_session->collective_graph_key_lock_);
@@ -2301,6 +2301,26 @@ class DirectSessionCollectiveTest : public ::testing::Test {
         }});
   }
 
+  NodeDef Input(int id) {
+    AttrValue dtype_attr;
+    SetAttrValue(DT_FLOAT, &dtype_attr);
+    NodeDef input;
+    input.set_name(strings::StrCat("input", id));
+    input.set_op("Placeholder");
+    input.mutable_attr()->insert({"dtype", dtype_attr});
+    return input;
+  }
+
+  NodeDef CollectiveCall(const string& op, const string& input, int cpu_id) {
+    NodeDef collective_call;
+    collective_call.set_name(strings::StrCat("collective_call", cpu_id));
+    collective_call.set_op(op);
+    collective_call.add_input(input);
+    collective_call.set_device(
+        strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", cpu_id));
+    return collective_call;
+  }
+
   // Creates a GraphDef that adds two CollectiveFunctions, one each on CPU0 and
   // CPU1, with instance_key 1, and appropriate placeholder inputs.  If
   // `add_unused_function` is true, adds another CollectiveFunction with
@@ -2317,42 +2337,17 @@ class DirectSessionCollectiveTest : public ::testing::Test {
       *lib->add_function() = unused_function;
     }
 
-    // Inputs.
-    AttrValue dtype_attr;
-    SetAttrValue(DT_FLOAT, &dtype_attr);
-    NodeDef input1;
-    input1.set_name("input1");
-    input1.set_op("Placeholder");
-    input1.mutable_attr()->insert({"dtype", dtype_attr});
-    NodeDef input2;
-    input2.set_name("input2");
-    input2.set_op("Placeholder");
-    input2.mutable_attr()->insert({"dtype", dtype_attr});
-
+    *g.add_node() = Input(0);
+    *g.add_node() = Input(1);
     // CollectiveReduce on CPU0 with instance_key 1.
-    NodeDef collective_call1;
-    collective_call1.set_name("collective_call1");
-    collective_call1.set_op("CollectiveFunction1");
-    collective_call1.add_input("input1");
-    collective_call1.set_device("/job:localhost/replica:0/task:0/device:CPU:0");
+    *g.add_node() = CollectiveCall("CollectiveFunction1", "input0", 0);
     // CollectiveReduce on CPU1 with instance_key 1.
-    NodeDef collective_call2;
-    collective_call2.set_name("collective_call2");
-    collective_call2.set_op("CollectiveFunction1");
-    collective_call2.add_input("input2");
-    collective_call1.set_device("/job:localhost/replica:0/task:0/device:CPU:1");
-
-    *g.add_node() = input1;
-    *g.add_node() = input2;
-    *g.add_node() = collective_call1;
-    *g.add_node() = collective_call2;
+    *g.add_node() = CollectiveCall("CollectiveFunction1", "input1", 1);
 
     return g;
   }
 };
 
-#ifndef GOOGLE_CUDA
-// TODO(ayushd): enable this test for GPU builds.
 TEST_F(DirectSessionCollectiveTest,
        TestCollectiveGraphKeyUsesOnlyCalledFunctions) {
   int64 key1;
@@ -2361,6 +2356,5 @@ TEST_F(DirectSessionCollectiveTest,
   TF_ASSERT_OK(RunGraphWithCollectiveFunctions(true, &key2));
   ASSERT_EQ(key1, key2);
 }
-#endif
 
 }  // namespace tensorflow
-- 
GitLab


From d66aac16855ddb70c8d3d5b4c9d4da24a34dffec Mon Sep 17 00:00:00 2001
From: Mustafa Ispir <ispir@google.com>
Date: Wed, 3 Oct 2018 13:33:12 -0700
Subject: [PATCH 0296/1085] Updates the doc of SyncReplicasOptimizer. It notes
 that some worker can consume multiple mini-batches while some may not even
 one.

PiperOrigin-RevId: 215617588
---
 tensorflow/python/training/sync_replicas_optimizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index 7afaa92699..6a3756fba9 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -78,7 +78,11 @@ class SyncReplicasOptimizer(optimizer.Optimizer):
   4. Only after all variables have been updated, increment the global step.
   5. Only after step 4, pushes `global_step` in the `token_queue`, once for
      each worker replica. The workers can now fetch the global step, use it to
-     update its local_step variable and start the next batch.
+     update its local_step variable and start the next batch. Please note that
+     some workers can consume multiple minibatches, while some may not consume
+     even one. This is because each worker fetches minibatches as long as
+     a token exists. If one worker is stuck for some reason and does not
+     consume a token, another worker can use it.
 
   For the replicas:
 
-- 
GitLab


From 43073e9d4dc957367d8e2b73c37733ff1dc376c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:34:21 -0700
Subject: [PATCH 0297/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 215617800
---
 tensorflow/core/ops/ops.pbtxt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 229022b64c..0e58a9475d 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28603,6 +28603,14 @@ op {
     name: "stats_aggregator"
     type: DT_RESOURCE
   }
+  input_arg {
+    name: "tag"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "counter_prefix"
+    type: DT_STRING
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
-- 
GitLab


From ce9a5d143f89a37ab029a29c62433883323987e8 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Wed, 3 Oct 2018 13:39:44 -0700
Subject: [PATCH 0298/1085] Tests for metrics correctness with TPU strategy

PiperOrigin-RevId: 215618809
---
 tensorflow/contrib/distribute/python/BUILD    |  17 ++-
 .../contrib/distribute/python/combinations.py |   4 +-
 .../distribute/python/metrics_v1_test.py      | 121 ++++++++++--------
 3 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index defa82f98a..8267612236 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -737,18 +737,27 @@ cuda_py_test(
     ],
 )
 
-cuda_py_test(
-    name = "metrics_v1_test",
+py_library(
+    name = "metrics_v1_test_lib",
+    testonly = 1,
     srcs = ["metrics_v1_test.py"],
-    additional_deps = [
+    deps = [
         ":combinations",
-        "@absl_py//absl/testing:parameterized",
         "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:metrics",
         "//tensorflow/python:variables",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/eager:test",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+cuda_py_test(
+    name = "metrics_v1_test",
+    srcs = ["metrics_v1_test.py"],
+    additional_deps = [
+        ":metrics_v1_test_lib",
     ],
     tags = [
         "multi_and_single_gpu",
diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index 82ca041cc2..cff4b0a463 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -329,10 +329,10 @@ one_device_strategy = NamedDistribution(
     required_gpus=None)
 tpu_strategy = NamedDistribution(
     "TPU", lambda: tpu_lib.TPUStrategy(
-        TPUClusterResolver(""), steps_per_run=5),
+        TPUClusterResolver(""), steps_per_run=2),
     required_tpu=True)
 tpu_strategy_one_step = NamedDistribution(
-    "TPU", lambda: tpu_lib.TPUStrategy(
+    "TPUOneStep", lambda: tpu_lib.TPUStrategy(
         TPUClusterResolver(""), steps_per_run=1),
     required_tpu=True)
 # Note that we disable prefetching for testing since prefetching makes
diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index 8163494c8e..ae4189eb1c 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.contrib.distribute.python import combinations
+from tensorflow.contrib.distribute.python import tpu_strategy
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import test
 from tensorflow.python.framework import ops
@@ -35,7 +36,8 @@ def _labeled_dataset_fn():
   #  8: 3, 2 -> False;  9: 4, 0 -> False; 10: 0, 1 -> False; 11: 1, 2 -> False
   # 12: 2, 0 -> False; 13: 3, 1 -> False; 14: 4, 2 -> False; 15: 0, 0 -> True
   return dataset_ops.Dataset.range(1000).map(
-      lambda x: {"labels": x % 5, "predictions": x % 3}).batch(4)
+      lambda x: {"labels": x % 5, "predictions": x % 3}).batch(
+          4, drop_remainder=True)
 
 
 def _boolean_dataset_fn():
@@ -47,7 +49,8 @@ def _boolean_dataset_fn():
   #   F, T -> FP;  T, F -> FN;   F, F -> TN
   return dataset_ops.Dataset.from_tensor_slices({
       "labels": [True, False, True, False],
-      "predictions": [True, True, False, False]}).repeat().batch(3)
+      "predictions": [True, True, False, False]}).repeat().batch(
+          3, drop_remainder=True)
 
 
 def _threshold_dataset_fn():
@@ -59,7 +62,8 @@ def _threshold_dataset_fn():
   #  False, .75 -> FP;   True, .25 -> FN;  False, 0.0 -> TN
   return dataset_ops.Dataset.from_tensor_slices({
       "labels": [True, False, True, False],
-      "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch(3)
+      "predictions": [1.0, 0.75, 0.25, 0.]}).repeat().batch(
+          3, drop_remainder=True)
 
 
 def _regression_dataset_fn():
@@ -79,6 +83,12 @@ def all_combinations():
       mode=["graph"])
 
 
+def tpu_combinations():
+  return combinations.combine(distribution=[combinations.tpu_strategy_one_step,
+                                            combinations.tpu_strategy],
+                              mode=["graph"])
+
+
 # TODO(josh11b): Test metrics.recall_at_top_k, metrics.average_precision_at_k,
 # metrics.precision_at_k
 class MetricsV1Test(test.TestCase, parameterized.TestCase):
@@ -87,42 +97,50 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     with ops.Graph().as_default(), distribution.scope():
       iterator = distribution.distribute_dataset(
           dataset_fn).make_one_shot_iterator()
-      value, update = distribution.call_for_each_tower(
-          metric_fn, iterator.get_next())
-      update = distribution.group(update)
+      if isinstance(distribution, tpu_strategy.TPUStrategy):
+        def step_fn(ctx, inputs):
+          value, update = distribution.call_for_each_tower(
+              metric_fn, inputs)
+          ctx.set_non_tensor_output(name="value", output=value)
+          return distribution.group(update)
+
+        ctx = distribution.run_steps_on_dataset(
+            step_fn, iterator, iterations=distribution.steps_per_run)
+        update = ctx.run_op
+        value = ctx.non_tensor_outputs["value"]
+        # In each run, we run multiple steps, and each steps consumes as many
+        # batches as number of towers.
+        batches_per_update = (
+            distribution.num_towers * distribution.steps_per_run)
+      else:
+        value, update = distribution.call_for_each_tower(
+            metric_fn, iterator.get_next())
+        update = distribution.group(update)
+        # TODO(josh11b): Once we switch to using a global batch size for input,
+        # replace "distribution.num_towers" with "1".
+        batches_per_update = distribution.num_towers
+
+      self.evaluate(distribution.initialize())
       self.evaluate(variables.local_variables_initializer())
-      # TODO(josh11b): Once we switch to using a global batch size for input,
-      # replace "distribution.num_towers" with "1".
-      batches_per_update = distribution.num_towers
-
-      # Update variables using the first `num_towers` batches.
-      self.evaluate(update)
-      self.assertAllClose(expected_fn(batches_per_update), self.evaluate(value),
-                          0.001, msg="After first update")
-
-      # Update variables using the second `num_towers` batches.
-      self.evaluate(update)
-      self.assertAllClose(expected_fn(2 * batches_per_update),
-                          self.evaluate(value),
-                          0.001,
-                          msg="After second update")
-
-      if batches_per_update == 1:  # Consume 4 input batches
-        self.evaluate(update)
-        self.assertAllClose(expected_fn(3 * batches_per_update),
-                            self.evaluate(value),
-                            0.001,
-                            msg="After third update")
+
+      batches_consumed = 0
+      for i in range(4):
         self.evaluate(update)
-        self.assertAllClose(expected_fn(4 * batches_per_update),
+        batches_consumed += batches_per_update
+        self.assertAllClose(expected_fn(batches_consumed),
                             self.evaluate(value),
                             0.001,
-                            msg="After fourth update")
+                            msg="After update #" + str(i+1))
+        if batches_consumed >= 4:  # Consume 4 input batches in total.
+          break
 
-  @combinations.generate(all_combinations())
+      self.evaluate(distribution.finalize())
+
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMean(self, distribution):
     def _dataset_fn():
-      return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch(4)
+      return dataset_ops.Dataset.range(1000).map(math_ops.to_float).batch(
+          4, drop_remainder=True)
 
     def _expected_fn(num_batches):
       # Mean(0..3) = 1.5, Mean(0..7) = 3.5, Mean(0..11) = 5.5, etc.
@@ -130,7 +148,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
 
     self._test_metric(distribution, _dataset_fn, metrics.mean, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAccuracy(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -143,6 +161,8 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
+  # TODO(priyag, jhseu): Enable TPU for this test once scatter_add is added
+  # for TPUMirroredVariable.
   @combinations.generate(all_combinations())
   def testMeanPerClassAccuracy(self, distribution):
     def _metric_fn(x):
@@ -161,6 +181,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
+  # NOTE(priyag): This metric doesn't work on TPUs yet.
   @combinations.generate(all_combinations())
   def testMeanIOU(self, distribution):
     def _metric_fn(x):
@@ -179,7 +200,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _labeled_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMeanTensor(self, distribution):
     def _dataset_fn():
       dataset = dataset_ops.Dataset.range(1000).map(math_ops.to_float)
@@ -198,7 +219,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _dataset_fn, metrics.mean_tensor, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAUCROC(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -212,7 +233,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testAUCPR(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -226,7 +247,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalseNegatives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -239,7 +260,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalseNegativesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -252,7 +273,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTrueNegatives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -265,7 +286,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTrueNegativesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -278,7 +299,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalsePositives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -291,7 +312,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testFalsePositivesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -304,7 +325,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTruePositives(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -317,7 +338,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testTruePositivesAtThresholds(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -330,7 +351,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testPrecision(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -343,7 +364,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testPrecisionAtThreshold(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -356,7 +377,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRecall(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -369,7 +390,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _boolean_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRecallAtThreshold(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -382,7 +403,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _threshold_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testMeanSquaredError(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
@@ -395,7 +416,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
     self._test_metric(
         distribution, _regression_dataset_fn, _metric_fn, _expected_fn)
 
-  @combinations.generate(all_combinations())
+  @combinations.generate(all_combinations() + tpu_combinations())
   def testRootMeanSquaredError(self, distribution):
     def _metric_fn(x):
       labels = x["labels"]
-- 
GitLab


From c26b5e9685b05fafc509d8ebc88c8304be5974a4 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 3 Oct 2018 13:45:59 -0700
Subject: [PATCH 0299/1085] Some tiny speed improvements for defun.

Before:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 48.4476327896
  extras {
    key: "examples_per_sec"
    value {
      double_value: 20640.8433688
    }
  }
}

After:
entry {
  name: "MicroBenchmarks.benchmark_defun_matmul_2_by_2_CPU"
  iters: 30000
  wall_time: 45.2344338099
  extras {
    key: "examples_per_sec"
    value {
      double_value: 22107.0524327
    }
  }
}
PiperOrigin-RevId: 215619902
---
 tensorflow/python/eager/function.py | 36 +++++++++++++++--------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f261d92d64..dd9f5e233c 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1152,23 +1152,22 @@ class PolymorphicFunction(object):
       del args, kwargs
       cache_key = self._flat_input_signature
 
+    ctx = context.context()
     with ops.init_scope():
-      init_graph = ops.get_default_graph()
-
       # The graph, or whether we're executing eagerly, should be a part of the
       # cache key so we don't improperly capture tensors such as variables.
-      executing_eagerly = context.executing_eagerly()
-      execution_context = executing_eagerly or init_graph
-
-    default_graph = ops.get_default_graph()
-    # Putting the device in the cache key ensures that call-site device
-    # annotations are respected.
-    device_functions = _get_device_functions(context.context(), default_graph)
+      executing_eagerly = ctx.executing_eagerly()
+      execution_context = executing_eagerly or ops.get_default_graph()
 
-    # `ops.colocate_with` directives translate into `ops.device` directives when
-    # eager execution is enabled.
-    colocation_stack = (() if executing_eagerly else
-                        tuple(default_graph._colocation_stack.peek_objs()))  # pylint: disable=protected-access
+    if executing_eagerly:
+      device_functions = (pydev.merge_device(ctx.device_name),)
+      colocation_stack = ()
+    else:
+      default_graph = ops.get_default_graph()
+      # Putting the device in the cache key ensures that call-site device
+      # annotations are respected.
+      device_functions = tuple(default_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
+      colocation_stack = tuple(default_graph._colocation_stack.peek_objs())  # pylint: disable=protected-access
 
     return (cache_key, execution_context, device_functions, colocation_stack)
 
@@ -1195,9 +1194,6 @@ class PolymorphicFunction(object):
     """
     args = self._args_to_prepend + args
     kwargs = dict(kwargs, **self._kwargs_to_include)
-    # Maps from index of arg to its corresponding value, according to `args`
-    # and `kwargs`; seeded with the default values for the named args that
-    # aren't in `args`.
     if not kwargs:
       if self._default_values:
         inputs = args + self._default_values[len(args) -
@@ -1205,6 +1201,9 @@ class PolymorphicFunction(object):
       else:
         inputs = args
     else:
+      # Maps from index of arg to its corresponding value, according to `args`
+      # and `kwargs`; seeded with the default values for the named args that
+      # aren't in `args`.
       arg_indices_to_values = {
           index: default for index, default in six.iteritems(
               self._arg_indices_to_default_values) if index >= len(args)
@@ -1227,9 +1226,12 @@ class PolymorphicFunction(object):
     flat_inputs = nest.flatten(inputs)
 
     # Check for NumPy arrays in arguments and convert them to Tensors.
+    # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
+    # finding a way to store them directly in the cache key (currently not
+    # possible since ndarrays are not hashable).
     need_packing = False
     for index, value in enumerate(flat_inputs):
-      if isinstance(value, np.ndarray):
+      if type(value) == np.ndarray:
         flat_inputs[index] = constant_op.constant(value)
         need_packing = True
     if need_packing:
-- 
GitLab


From 0b7a3df432f0e607b39ab17d1b85fb0b04e05bd5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 13:46:19 -0700
Subject: [PATCH 0300/1085] Fixes bug in Conv2D unit test that made it test a
 SeparableConv2D layer instead of a Conv2D layer.

PiperOrigin-RevId: 215619966
---
 tensorflow/python/keras/layers/convolutional_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index 2d3d38a5ce..cad5e4c8bd 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -113,7 +113,7 @@ class Conv2DTest(test.TestCase):
       test_kwargs[arg] = value
       with self.test_session(use_gpu=True):
         testing_utils.layer_test(
-            keras.layers.SeparableConv2D,
+            keras.layers.Conv2D,
             kwargs=test_kwargs,
             input_shape=(num_samples, num_row, num_col, stack_size))
 
-- 
GitLab


From ed904611009a74ae530335d3bd16b7070238cec3 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 3 Oct 2018 14:01:16 -0700
Subject: [PATCH 0301/1085] Update reference to tools/bazel.rc to .bazelrc
 after cl/215483141

PiperOrigin-RevId: 215623215
---
 configure.py             | 4 ++--
 tensorflow/workspace.bzl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.py b/configure.py
index 2d2da11700..a88fdb3555 100644
--- a/configure.py
+++ b/configure.py
@@ -1676,8 +1676,8 @@ def main():
   # TODO(pcloudy): remove the following if check when they make sense on Windows
   if not is_windows():
     print('Preconfigured Bazel build configs. You can use any of the below by '
-          'adding "--config=<>" to your build command. See tools/bazel.rc for '
-          'more details.')
+          'adding "--config=<>" to your build command. See .bazelrc for more '
+          'details.')
     config_info_line('mkl', 'Build with MKL support.')
     config_info_line('monolithic', 'Config for mostly static monolithic build.')
     config_info_line('gdr', 'Build with GDR support.')
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index bcc89ef729..d27732a801 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -888,7 +888,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     # why we can't depend on the canonical build target.
 
     # gRPC wants a cares dependency but its contents is not actually
-    # important since we have set GRPC_ARES=0 in tools/bazel.rc
+    # important since we have set GRPC_ARES=0 in .bazelrc
     native.bind(
         name = "cares",
         actual = "@grpc//third_party/nanopb:nanopb",
-- 
GitLab


From 94267ccc14516ad9df67897bea8ede20cbad24ca Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 3 Oct 2018 14:09:05 -0700
Subject: [PATCH 0302/1085] Move out-params to end of argument list and add an
 out_ prefix; NFC

PiperOrigin-RevId: 215624875
---
 tensorflow/compiler/jit/kernels/xla_ops.cc    |  2 +-
 .../compiler/jit/xla_compilation_cache.cc     | 33 ++++++++++---------
 .../compiler/jit/xla_compilation_cache.h      | 29 ++++++++--------
 .../compiler/jit/xla_compile_on_demand_op.cc  |  2 +-
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index cfd27a6510..accc86a86d 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -277,7 +277,7 @@ static Status CompileToLocalExecutable(
   compile_options.always_return_tuple = false;
 
   return cache->Compile(options, function, constant_args, *variables, ctx,
-                        kernel, executable, compile_options);
+                        compile_options, kernel, executable);
 }
 
 void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index 3aa9e9c7ed..0471995015 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -228,37 +228,38 @@ Status XlaCompilationCache::Compile(
     const XlaCompiler::Options& options, const NameAttrList& function,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options) {
+    const XlaCompiler::CompileOptions& compile_options,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
   return CompileImpl(options, function, constant_args, variable_args, ctx,
-                     compilation_result, executable, compile_options, false);
+                     compile_options, /*compile_single_op=*/false,
+                     out_compilation_result, out_executable);
 }
 
 Status XlaCompilationCache::CompileSingleOp(
     const XlaCompiler::Options& options,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options) {
+    const XlaCompiler::CompileOptions& compile_options,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
   const NodeDef& def = ctx->op_kernel().def();
   NameAttrList name;
   name.set_name(def.op());
   *name.mutable_attr() = def.attr();
-  return CompileImpl(options, name, constant_args, variable_args, ctx,
-                     compilation_result, executable, compile_options, true);
+  return CompileImpl(
+      options, name, constant_args, variable_args, ctx, compile_options,
+      /*compile_single_op=*/true, out_compilation_result, out_executable);
 }
 
 Status XlaCompilationCache::CompileImpl(
     const XlaCompiler::Options& options, const NameAttrList& function,
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-    const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable,
-    const XlaCompiler::CompileOptions& compile_options,
-    bool compile_single_op) {
-  CHECK_NE(executable, nullptr);
+    const XlaCompiler::CompileOptions& compile_options, bool compile_single_op,
+    const XlaCompiler::CompilationResult** out_compilation_result,
+    xla::LocalExecutable** out_executable) {
+  DCHECK_NE(out_executable, nullptr);
   VLOG(2) << "XlaCompilationCache::Compile " << DebugString();
 
   if (VLOG_IS_ON(2)) {
@@ -357,8 +358,8 @@ Status XlaCompilationCache::CompileImpl(
     }
   }
   TF_RETURN_IF_ERROR(entry->compilation_status);
-  *compilation_result = &entry->compilation_result;
-  *executable = entry->executable.get();
+  *out_compilation_result = &entry->compilation_result;
+  *out_executable = entry->executable.get();
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index 17c0321c1e..75c7758f73 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -68,9 +68,9 @@ class XlaCompilationCache : public ResourceBase {
                  const std::map<int, Tensor>& constant_args,
                  const std::map<int, OptionalTensor>& variable_args,
                  OpKernelContext* ctx,
-                 const XlaCompiler::CompilationResult** compilation_result,
-                 xla::LocalExecutable** executable,
-                 const XlaCompiler::CompileOptions& compile_options);
+                 const XlaCompiler::CompileOptions& compile_options,
+                 const XlaCompiler::CompilationResult** out_compilation_result,
+                 xla::LocalExecutable** out_executable);
 
   // As above, but calls XlaCompiler::CompileSingleOp instead of
   // XlaCompiler::CompileFunction.
@@ -78,9 +78,9 @@ class XlaCompilationCache : public ResourceBase {
       const XlaCompiler::Options& options,
       const std::map<int, Tensor>& constant_args,
       const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
-      const XlaCompiler::CompilationResult** compilation_result,
-      xla::LocalExecutable** executable,
-      const XlaCompiler::CompileOptions& compile_options);
+      const XlaCompiler::CompileOptions& compile_options,
+      const XlaCompiler::CompilationResult** out_compilation_result,
+      xla::LocalExecutable** out_executable);
 
   xla::LocalClient* client() const { return client_; }
   const DeviceType& device_type() const { return device_type_; }
@@ -89,15 +89,14 @@ class XlaCompilationCache : public ResourceBase {
 
  private:
   // Common implementation of Compile and CompileSingleOp.
-  Status CompileImpl(const XlaCompiler::Options& options,
-                     const NameAttrList& function,
-                     const std::map<int, Tensor>& constant_args,
-                     const std::map<int, OptionalTensor>& variable_args,
-                     OpKernelContext* ctx,
-                     const XlaCompiler::CompilationResult** compilation_result,
-                     xla::LocalExecutable** executable,
-                     const XlaCompiler::CompileOptions& compile_options,
-                     bool compile_single_op);
+  Status CompileImpl(
+      const XlaCompiler::Options& options, const NameAttrList& function,
+      const std::map<int, Tensor>& constant_args,
+      const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
+      const XlaCompiler::CompileOptions& compile_options,
+      bool compile_single_op,
+      const XlaCompiler::CompilationResult** out_compilation_result,
+      xla::LocalExecutable** out_executable);
 
   // Takes `result` which has been compiled from a Tensorflow subgraph to a
   // XLA computation already, and generates an XLA LocalExecutable `executable`.
diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index b98c0cb028..79976c85df 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -180,7 +180,7 @@ Status XlaCompileOnDemandOp::Compile(
 
   std::map<int, OptionalTensor> variable_args = GetVariables(ctx);
   return cache->CompileSingleOp(options, constant_arguments, variable_args, ctx,
-                                result, executable, compile_options);
+                                compile_options, result, executable);
 }
 
 void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) {
-- 
GitLab


From f5f8dff270b9f2cdf36bba9d671c324a4f7c6fac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 14:28:25 -0700
Subject: [PATCH 0303/1085] Add NNAPI padding enums to NeuralNetworksShim.h

PiperOrigin-RevId: 215628561
---
 tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 687944023b..eccf4aefb6 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -179,6 +179,14 @@ enum {
   ANEURALNETWORKS_BAD_STATE = 6,
 };
 
+/**
+ * Implicit padding algorithms.
+ */
+enum {
+  ANEURALNETWORKS_PADDING_SAME = 1,
+  ANEURALNETWORKS_PADDING_VALID = 2,
+};
+
 /**
  * ANeuralNetworksMemory is an opaque type that represents memory.
  *
-- 
GitLab


From 2e11deba60cb00027de4373af17703676fa74bd7 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Wed, 3 Oct 2018 14:37:57 -0700
Subject: [PATCH 0304/1085] [XLA] Disable a test for layout changing
 elementwise operations.

Rename the test to make it obvious that it is for testing the codegen
correctness in handling layout changing elementwise operations.

Keep the test only for the CPU backend.

PiperOrigin-RevId: 215630611
---
 tensorflow/compiler/xla/tests/fusion_test.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index 9c94acb437..fd79a9d041 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -764,8 +764,9 @@ XLA_TEST_F(FusionTest, Clamp2D) {
   TestElementwise2D<float, 3>(HloOpcode::kClamp);
 }
 
-// TODO(b/73903144): Enable on interpreter once interpreter supports bitcast.
-XLA_TEST_F(FusionTest, DISABLED_ON_INTERPRETER(FusionWithLayout)) {
+// TODO(b/117156505): Remove this test when the bug is fixed.
+XLA_TEST_F(FusionTest, DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
+                           LayoutChangingElementWiseOp))) {
   const string hlo_text = R"(
 HloModule Cluster
 
-- 
GitLab


From c1b3b0b9e041d82e80c2cdcc623a387753daf0b4 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 3 Oct 2018 14:42:56 -0700
Subject: [PATCH 0305/1085] Internal change.

PiperOrigin-RevId: 215631612
---
 tensorflow/contrib/lite/kernels/BUILD | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index daaf6714cc..b349a2863c 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -337,7 +337,10 @@ tf_cc_test(
     name = "activations_test",
     size = "small",
     srcs = ["activations_test.cc"],
-    tags = ["tflite_not_portable_ios"],
+    tags = [
+        "nomac",
+        "tflite_not_portable_ios",
+    ],
     deps = [
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
-- 
GitLab


From 312e37cee391b0d207293d59d8882db3c8030f9d Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 3 Oct 2018 14:51:08 -0700
Subject: [PATCH 0306/1085] Add a require_static_shapes argument to
 DistributionStrategy class. This allows us to identify if we need to set the
 drop_remainder option when creating Dataset objects.

PiperOrigin-RevId: 215633097
---
 tensorflow/contrib/distribute/python/tpu_strategy.py |  4 +++-
 tensorflow/python/keras/engine/training.py           | 11 +++++------
 tensorflow/python/training/distribute.py             |  7 +++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index c3c7df3cd8..1d9e299b38 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -132,7 +132,7 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     """
     # TODO(sourabhbajaj): OneDeviceStrategy should be initialized with the
     # master node fetched from the cluster resolver.
-    super(TPUStrategy, self).__init__('/device:CPU:0')
+    super(TPUStrategy, self).__init__("/device:CPU:0")
 
     self._tpu_cluster_resolver = tpu_cluster_resolver
     self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
@@ -152,6 +152,8 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     # at a time is comparable to multiple steps.
     self.steps_per_run = steps_per_run
 
+    self._require_static_shapes = True
+
   def _get_enqueue_op_per_host(self, host_id, iterator, input_shapes,
                                iterations):
     """Create an enqueue op for a single host identified using host_id.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 85233de9b1..d81bd83f7f 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -814,6 +814,9 @@ class Model(Network):
       x_shape = first_x_value.shape
       if batch_size is None:
         batch_size = x_shape[0] // steps
+      # We need to use the drop_remainder argument to allow for a static
+      # input shape which is required for TPUs.
+      drop_remainder = self._distribution_strategy.require_static_shapes
       if y is not None:
         var_x = distributed_training_utils.get_var_for_numpy(
             self._distribution_strategy, x)
@@ -824,9 +827,7 @@ class Model(Network):
         # TODO(anjalisridhar): What should the buffer size be?
         x = x.shuffle(10000)
         x = x.repeat()
-        # We need to use the drop_remainder argument to allow for a static
-        # input shape which is required for TPUs.
-        x = x.batch(batch_size, drop_remainder=True)
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
         y = None
       else:
         # This case is for the predict call where the dataset only contains
@@ -838,9 +839,7 @@ class Model(Network):
             self._distribution_strategy, x)
         x = dataset_ops.Dataset.from_tensor_slices(var_x)
         x = x.repeat()
-        # We need to use the drop_remainder argument to allow for a static
-        # input shape which is required for TPUs.
-        x = x.batch(batch_size, drop_remainder=True)
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
 
     # TODO(anjalisridhar): Can we use the iterator and getnext op cache?
     # We require users to pass Datasets since we distribute the dataset across
diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py
index a92a1bdee7..b3f3c29b2f 100644
--- a/tensorflow/python/training/distribute.py
+++ b/tensorflow/python/training/distribute.py
@@ -436,6 +436,9 @@ class DistributionStrategy(object):
 
   def __init__(self):
     self._default_device = None
+    # This property is used to determine if we should set drop_remainder=True
+    # when creating Datasets from numpy array inputs.
+    self._require_static_shapes = False
 
   def scope(self):
     """Returns a context manager selecting this DistributionStrategy as current.
@@ -898,6 +901,10 @@ class DistributionStrategy(object):
     """
     raise NotImplementedError("must be implemented in descendants")
 
+  @property
+  def require_static_shapes(self):
+    return self._require_static_shapes
+
   @property
   def num_towers(self):
     """Returns number of towers, for purposes of averaging across towers."""
-- 
GitLab


From 148bc62dba0a0b9d26945ce48b6dcd903613de14 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 15:14:32 -0700
Subject: [PATCH 0307/1085] Update size of multi_device_iterator_test to medium
 to fix timeouts

PiperOrigin-RevId: 215637785
---
 tensorflow/python/data/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index bf76860aa4..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -291,7 +291,7 @@ tf_py_test(
 
 cuda_py_test(
     name = "multi_device_iterator_test",
-    size = "small",
+    size = "medium",
     srcs = ["multi_device_iterator_test.py"],
     additional_deps = [
         ":test_base",
-- 
GitLab


From efbee1ab2cac59f511cc0850d84414e711bbda3b Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 15:15:23 -0700
Subject: [PATCH 0308/1085] Fix ci_parameterized_build to pass environment
 variables to tests.

This is particularly important when using --run_under with
parallel_gpu_execute, since the envvars control the execution.

PiperOrigin-RevId: 215637931
---
 .../tools/ci_build/ci_parameterized_build.sh   | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 49a9048c03..99bdedf7b4 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -65,8 +65,6 @@
 #   TF_GPU_COUNT:
 #                      Run this many parallel tests for serial builds.
 #                      For now, only can be edited for PIP builds.
-#                      TODO(gunan): Find a way to pass this environment variable
-#                      to the script bazel runs (using --run_under).
 #   TF_BUILD_TEST_TUTORIALS:
 #                      If set to any non-empty and non-0 value, will perform
 #                      tutorials tests (Applicable only if TF_BUILD_IS_PIP is
@@ -150,6 +148,13 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
+# Environment variables to set when running bazel tests.  These are especially
+# important when using --run_under with parallel_gpu_execute.
+BAZEL_TEST_ENV=""\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
+
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -410,13 +415,14 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} ${EXTRA_ARGS} -- "\
-"${BAZEL_TARGET}"
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
-"--run_under=${PARALLEL_GPU_TEST_CMD} ${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"--run_under=${PARALLEL_GPU_TEST_CMD} "\
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == "android" ]]; then
     # Run android specific script for android build.
     NO_PIP_MAIN_CMD="${ANDROID_CMD} ${OPT_FLAG} "
-- 
GitLab


From 8f1571fbb18a0ae4499bdfc3f8f05473b51dfd87 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 15:15:23 -0700
Subject: [PATCH 0309/1085] Fix ci_parameterized_build to pass environment
 variables to tests.

This is particularly important when using --run_under with
parallel_gpu_execute, since the envvars control the execution.

PiperOrigin-RevId: 215637931
---
 .../tools/ci_build/ci_parameterized_build.sh   | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 49a9048c03..99bdedf7b4 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -65,8 +65,6 @@
 #   TF_GPU_COUNT:
 #                      Run this many parallel tests for serial builds.
 #                      For now, only can be edited for PIP builds.
-#                      TODO(gunan): Find a way to pass this environment variable
-#                      to the script bazel runs (using --run_under).
 #   TF_BUILD_TEST_TUTORIALS:
 #                      If set to any non-empty and non-0 value, will perform
 #                      tutorials tests (Applicable only if TF_BUILD_IS_PIP is
@@ -150,6 +148,13 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
+# Environment variables to set when running bazel tests.  These are especially
+# important when using --run_under with parallel_gpu_execute.
+BAZEL_TEST_ENV=""\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
+
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -410,13 +415,14 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} ${EXTRA_ARGS} -- "\
-"${BAZEL_TARGET}"
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
-"--run_under=${PARALLEL_GPU_TEST_CMD} ${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"--run_under=${PARALLEL_GPU_TEST_CMD} "\
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == "android" ]]; then
     # Run android specific script for android build.
     NO_PIP_MAIN_CMD="${ANDROID_CMD} ${OPT_FLAG} "
-- 
GitLab


From 0dfde8ab8addef36f90a445f0d604618a199508c Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 15:48:53 -0700
Subject: [PATCH 0310/1085] Disable norm_op_test and svd_op_test under msan

PiperOrigin-RevId: 215643600
---
 tensorflow/python/kernel_tests/BUILD | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index c0e9a3c975..9303c70c60 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2999,7 +2999,10 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
     ],
     shard_count = 20,
-    tags = ["no_oss"],  # b/117185141
+    tags = [
+        "no_oss",  # b/117185141.
+        "nomsan",  # TODO(b/117236102): Re-enable in msan build.
+    ],
 )
 
 cuda_py_test(
@@ -3014,7 +3017,11 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
     ],
     shard_count = 20,
-    tags = ["no_windows_gpu"],
+    # TODO(b/117236102): Re-enable in msan build.
+    tags = [
+        "no_windows_gpu",
+        "nomsan",
+    ],
 )
 
 cuda_py_test(
-- 
GitLab


From b05d8ded3b1678e5ddc375eca485b6a8b0f02d36 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Wed, 3 Oct 2018 16:02:09 -0700
Subject: [PATCH 0311/1085] Initial version of fix

---
 tensorflow/python/ops/control_flow_ops.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index f779c3d273..2ae2f53eb4 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -85,6 +85,11 @@ def _summarize_eager(tensor, summarize=None):
     tensor: EagerTensor to summarize
     summarize: Include these many first elements of `array`
   """
+  # Emulate the behavior of Tensor::SummarizeValue()
+  if summarize is None:
+    summarize = 3
+  elif summarize < 0:
+    summarize = array_ops.size(tensor)
   # reshape((-1,)) is the fastest way to get a flat array view
   if tensor._rank():  # pylint: disable=protected-access
     flat = tensor.numpy().reshape((-1,))
-- 
GitLab


From 041c347df995e6c6d9206920ae061f558e120b92 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 3 Oct 2018 15:59:41 -0700
Subject: [PATCH 0312/1085] [TF:XLA] Bump open source abseil revision to
 f21d187b80e3b7f08fb279775ea9c8b48c636030

PiperOrigin-RevId: 215645351
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d27732a801..72f3fd0cf8 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -110,11 +110,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "7dd09690ae7ca4551de3111d4a86b75b23ec17445f273d3c42bdcdc1c7b02e4e",
-        strip_prefix = "abseil-cpp-48cd2c3f351ff188bc85684b84a91b6e6d17d896",
+        sha256 = "507903ef9353cb25cccd0a6840048fdd348fd20e98314d694f04a990c0f277e3",
+        strip_prefix = "abseil-cpp-f21d187b80e3b7f08fb279775ea9c8b48c636030",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
         ],
     )
 
-- 
GitLab


From c572581d8806d10d05790c30505df2d976c6853d Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Wed, 3 Oct 2018 16:21:49 -0700
Subject: [PATCH 0313/1085] Added new test case for summarize arg

---
 .../python/kernel_tests/check_ops_test.py     | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index bd4011d58e..8830c09486 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -240,6 +240,47 @@ First 2 elements of y:
       out = array_ops.identity(larry)
     self.evaluate(out)
 
+  def test_error_message_eager(self):
+    expected_error_msg_full = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0
+b'y (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0
+"""
+    expected_error_msg_default = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, 2.0, ...
+b'y (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, 2.0, ...
+"""
+    expected_error_msg_short = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, ...
+b'y (shape=(2, 3) dtype=float32) = '
+0.0, 1.0, ...
+"""
+    with context.eager_mode():
+      t = tf.constant(np.array(range(6)), shape=[2,3], dtype=tf.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_none_equal(t, t, message="This is the error message.",
+                               summarize=10)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_equal(t, t, message="This is the error message.",
+                               summarize=-1)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_default):
+        check_ops.assert_equal(t, t, message="This is the error message.")
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_short):
+        check_ops.assert_equal(t, t, message="This is the error message.",
+                               summarize=2)
+
+
 
 class AssertNoneEqualTest(test.TestCase):
 
-- 
GitLab


From 207bea0e35ab635e66137520963761a6e94354ea Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Wed, 3 Oct 2018 16:34:05 -0700
Subject: [PATCH 0314/1085] [XLA] Revise the way to express a CPU specific
 test.

Use #ifdef XLA_TEST_BACKEND_CPU to protect the test instead of disabling it for
all the other backends except for the CPU backend.

PiperOrigin-RevId: 215651036
---
 tensorflow/compiler/xla/tests/fusion_test.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc
index fd79a9d041..4d4b676a53 100644
--- a/tensorflow/compiler/xla/tests/fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/fusion_test.cc
@@ -764,9 +764,10 @@ XLA_TEST_F(FusionTest, Clamp2D) {
   TestElementwise2D<float, 3>(HloOpcode::kClamp);
 }
 
-// TODO(b/117156505): Remove this test when the bug is fixed.
-XLA_TEST_F(FusionTest, DISABLED_ON_GPU(DISABLED_ON_INTERPRETER(
-                           LayoutChangingElementWiseOp))) {
+// TODO(b/117156505): Remove this test when the bug is fixed and the CPU backend
+// should not generate layout changing elementwise operations.
+#ifdef XLA_TEST_BACKEND_CPU
+XLA_TEST_F(FusionTest, LayoutChangingElementWiseOp) {
   const string hlo_text = R"(
 HloModule Cluster
 
@@ -795,6 +796,7 @@ ENTRY main {
       LiteralUtil::CreateR3<float>({{{0.}, {0.76159415595}}, {{0.}, {0.}}}),
       result));
 }
+#endif
 
 class FusionClientLibraryTest : public ClientLibraryTestBase {};
 
-- 
GitLab


From 9801b8810e07859141d4417746317cc3dbebc227 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:36:23 -0700
Subject: [PATCH 0315/1085] Reduce batch sizes for some eager tests to prevert
 OOMs in OSS runs

PiperOrigin-RevId: 215651413
---
 .../python/examples/resnet50/resnet50_graph_test.py    | 10 +++++++---
 .../eager/python/examples/revnet/revnet_test.py        |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
index 551c76b0df..f3bb978875 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
@@ -51,7 +51,9 @@ def random_batch(batch_size):
 class ResNet50GraphTest(tf.test.TestCase):
 
   def testApply(self):
-    batch_size = 64
+    # Use small batches for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    batch_size = 8
     with tf.Graph().as_default():
       images = tf.placeholder(tf.float32, image_shape(None))
       model = resnet50.ResNet50(data_format())
@@ -63,7 +65,7 @@ class ResNet50GraphTest(tf.test.TestCase):
         sess.run(init)
         np_images, _ = random_batch(batch_size)
         out = sess.run(predictions, feed_dict={images: np_images})
-        self.assertAllEqual([64, 1000], out.shape)
+        self.assertAllEqual([batch_size, 1000], out.shape)
 
   def testTrainWithSummary(self):
     with tf.Graph().as_default():
@@ -87,7 +89,9 @@ class ResNet50GraphTest(tf.test.TestCase):
       init = tf.global_variables_initializer()
       self.assertEqual(321, len(tf.global_variables()))
 
-      batch_size = 32
+      # Use small batches for tests because the OSS version runs
+      # in constrained GPU environment with 1-2GB of memory.
+      batch_size = 2
       with tf.Session() as sess:
         sess.run(init)
         sess.run(tf.contrib.summary.summary_writer_initializer_op())
diff --git a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
index 6a921e1997..4f4cc3af6f 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
@@ -50,6 +50,9 @@ class RevNetTest(tf.test.TestCase):
     # Reconstruction could cause numerical error, use double precision for tests
     config.dtype = tf.float64
     config.fused = False  # Fused batch norm does not support tf.float64
+    # Reduce the batch size for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    config.batch_size = 2
     shape = (config.batch_size,) + config.input_shape
     self.model = revnet.RevNet(config=config)
     self.x = tf.random_normal(shape=shape, dtype=tf.float64)
-- 
GitLab


From 635c21f486f6c6426f3eb51198833afb29e21b33 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:36:23 -0700
Subject: [PATCH 0316/1085] Reduce batch sizes for some eager tests to prevert
 OOMs in OSS runs

PiperOrigin-RevId: 215651413
---
 .../python/examples/resnet50/resnet50_graph_test.py    | 10 +++++++---
 .../eager/python/examples/revnet/revnet_test.py        |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
index 551c76b0df..f3bb978875 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py
@@ -51,7 +51,9 @@ def random_batch(batch_size):
 class ResNet50GraphTest(tf.test.TestCase):
 
   def testApply(self):
-    batch_size = 64
+    # Use small batches for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    batch_size = 8
     with tf.Graph().as_default():
       images = tf.placeholder(tf.float32, image_shape(None))
       model = resnet50.ResNet50(data_format())
@@ -63,7 +65,7 @@ class ResNet50GraphTest(tf.test.TestCase):
         sess.run(init)
         np_images, _ = random_batch(batch_size)
         out = sess.run(predictions, feed_dict={images: np_images})
-        self.assertAllEqual([64, 1000], out.shape)
+        self.assertAllEqual([batch_size, 1000], out.shape)
 
   def testTrainWithSummary(self):
     with tf.Graph().as_default():
@@ -87,7 +89,9 @@ class ResNet50GraphTest(tf.test.TestCase):
       init = tf.global_variables_initializer()
       self.assertEqual(321, len(tf.global_variables()))
 
-      batch_size = 32
+      # Use small batches for tests because the OSS version runs
+      # in constrained GPU environment with 1-2GB of memory.
+      batch_size = 2
       with tf.Session() as sess:
         sess.run(init)
         sess.run(tf.contrib.summary.summary_writer_initializer_op())
diff --git a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
index 6a921e1997..4f4cc3af6f 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
@@ -50,6 +50,9 @@ class RevNetTest(tf.test.TestCase):
     # Reconstruction could cause numerical error, use double precision for tests
     config.dtype = tf.float64
     config.fused = False  # Fused batch norm does not support tf.float64
+    # Reduce the batch size for tests because the OSS version runs
+    # in constrained GPU environment with 1-2GB of memory.
+    config.batch_size = 2
     shape = (config.batch_size,) + config.input_shape
     self.model = revnet.RevNet(config=config)
     self.x = tf.random_normal(shape=shape, dtype=tf.float64)
-- 
GitLab


From d5b362a67a57f53f610536ed6068a5b67bc37b88 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Wed, 3 Oct 2018 16:38:22 -0700
Subject: [PATCH 0317/1085] Update size of mvn_diag_test and core_rnn_cell_test
 to medium to fix timeouts

PiperOrigin-RevId: 215651746
---
 tensorflow/contrib/distributions/BUILD | 2 +-
 tensorflow/contrib/rnn/BUILD           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 3ff7da4f89..60f6b90edc 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -299,7 +299,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "mvn_diag_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/mvn_diag_test.py"],
     additional_deps = [
         ":distributions_py",
diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index 4e67d80558..1385a9ddc1 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -108,7 +108,7 @@ cuda_py_tests(
 
 cuda_py_tests(
     name = "core_rnn_cell_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/kernel_tests/core_rnn_cell_test.py"],
     additional_deps = [
         ":rnn_py",
-- 
GitLab


From aeb044c9784d30a25c0d15fa31f479001be55052 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 3 Oct 2018 16:41:21 -0700
Subject: [PATCH 0318/1085] assert_nontrivial_match in
 tf.keras.Model.load_weights (TF format)

Adds a bit of sanity checking by default to load_weights (e.g. for the case when absolutely nothing matches) while still supporting restore-on-create and the addition of new Layers to checkpointed models.

PiperOrigin-RevId: 215652168
---
 tensorflow/python/keras/engine/network.py     |  1 +
 tensorflow/python/keras/engine/saving_test.py | 13 +++++
 .../python/training/checkpointable/util.py    | 56 +++++++++++++++++--
 .../training/checkpointable/util_test.py      |  5 ++
 4 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 5ef8d13487..8d34006967 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1526,6 +1526,7 @@ class Network(base_layer.Layer):
         # Restore existing variables (if any) immediately, and set up a
         # streaming restore for any variables created in the future.
         checkpointable_utils.streaming_restore(status=status, session=session)
+      status.assert_nontrivial_match()
       return status
     if h5py is None:
       raise ImportError(
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 02d99d5d69..f5045be907 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import training as training_module
+from tensorflow.python.training.checkpointable import util as checkpointable
 
 try:
   import h5py  # pylint:disable=g-import-not-at-top
@@ -922,6 +923,18 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase):
         SubclassedModel, SubclassedModelRestore,
         _restore_init_fn)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_incompatible_checkpoint(self):
+    save_path = checkpointable.Checkpoint().save(
+        os.path.join(self.get_temp_dir(), 'ckpt'))
+    m = keras.Model()
+    with self.assertRaisesRegexp(AssertionError, 'Nothing to load'):
+      m.load_weights(save_path)
+    m.dense = keras.layers.Dense(2)
+    m.dense(constant_op.constant([[1.]]))
+    with self.assertRaisesRegexp(
+        AssertionError, 'Nothing except the root object matched'):
+      m.load_weights(save_path)
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index eff15b24ce..edab6cc6eb 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -853,6 +853,11 @@ class _LoadStatus(object):
     """Raises an exception unless existing Python objects have been matched."""
     pass
 
+  @abc.abstractmethod
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    pass
+
   @abc.abstractmethod
   def run_restore_ops(self, session=None):
     """Runs restore ops from the checkpoint. Requires a valid checkpoint."""
@@ -975,6 +980,26 @@ class CheckpointLoadStatus(_LoadStatus):
           % (list(unused_python_objects),))
     return self
 
+  def assert_nontrivial_match(self):
+    """Raises an exception if only the root object matched."""
+    for checkpointable_object in list_objects(self._root_checkpointable):
+      self._checkpoint.all_python_objects.add(checkpointable_object)
+    if len(self._checkpoint.object_by_proto_id) <= 1:
+      unused_python_objects = (
+          _ObjectIdentitySet(self._checkpoint.all_python_objects)
+          - _ObjectIdentitySet(self._checkpoint.object_by_proto_id.values()))
+      if unused_python_objects:
+        raise AssertionError(
+            ("Nothing except the root object matched a checkpointed value. "
+             "Typically this means that the checkpoint does not match the "
+             "Python program. The following objects have no matching "
+             "checkpointed value: %s") % (list(unused_python_objects),))
+      else:
+        raise AssertionError(
+            "Nothing to load. No dependencies have been added to %s yet." % (
+                self._root_checkpointable,))
+    return self
+
   def run_restore_ops(self, session=None):
     """Run operations to restore objects in the dependency graph."""
     if context.executing_eagerly():
@@ -1039,6 +1064,11 @@ class InitializationOnlyStatus(_LoadStatus):
     raise AssertionError(
         "No checkpoint specified (save_path=None); nothing is being restored.")
 
+  def assert_nontrivial_match(self):
+    """Assertion for consistency with `CheckpointLoadStatus`. Always fails."""
+    raise AssertionError(
+        "No checkpoint specified (save_path=None); nothing is being restored.")
+
   def run_restore_ops(self, session=None):
     """For consistency with `CheckpointLoadStatus`.
 
@@ -1122,6 +1152,14 @@ class NameBasedSaverStatus(_LoadStatus):
     # useful since we don't touch Python objects or Python state).
     return self.assert_consumed()
 
+  def assert_nontrivial_match(self):
+    """Raises an exception if currently created objects are unmatched."""
+    # For name-based checkpoints there's no object information in the
+    # checkpoint, so there's no distinction between
+    # assert_nontrivial_match and assert_consumed (and both are less
+    # useful since we don't touch Python objects or Python state).
+    return self.assert_consumed()
+
   def _gather_saveable_objects(self):
     """Walk the object graph, using global names for SaveableObjects."""
     objects = list_objects(self._root_checkpointable)
@@ -1779,13 +1817,15 @@ class Checkpoint(tracking.Checkpointable):
       status of a checkpoint restoration and run initialization/restore ops.
 
       The returned status object has the following methods:
-      - `assert_consumed()`:
+
+      * `assert_consumed()`:
           Raises an exception if any variables/objects are unmatched: either
           checkpointed values which don't have a matching Python object or
           Python objects in the dependency graph with no values in the
           checkpoint. This method returns the status object, and so may be
           chained with `initialize_or_restore` or `run_restore_ops`.
-      -  `assert_existing_objects_matched()`:
+
+      * `assert_existing_objects_matched()`:
           Raises an exception if any existing Python objects in the dependency
           graph are unmatched. Unlike `assert_consumed`, this assertion will
           pass if values in the checkpoint have no corresponding Python
@@ -1796,12 +1836,20 @@ class Checkpoint(tracking.Checkpointable):
           a `tf.train.Optimizer` was saved but only the state required for
           inference is being loaded. This method returns the status object, and
           so may be chained with `initialize_or_restore` or `run_restore_ops`.
-      - `initialize_or_restore(session=None)`:
+
+      * `assert_nontrivial_match()`: Asserts that something aside from the root
+          object was matched. This is a very weak assertion, but is useful for
+          sanity checking in library code where objects may exist in the
+          checkpoint which haven't been created in Python and some Python
+          objects may not have a checkpointed value.
+
+      * `initialize_or_restore(session=None)`:
           When graph building, runs variable initializers if `save_path` is
           `None`, but otherwise runs restore operations. If no `session` is
           explicitly specified, the default session is used. No effect when
           executing eagerly (variables are initialized or restored eagerly).
-      - `run_restore_ops(session=None)`:
+
+      * `run_restore_ops(session=None)`:
           When graph building, runs restore operations. If no `session` is
           explicitly specified, the default session is used. No effect when
           executing eagerly (restore operations are run eagerly). May only be
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index f8b5bd8501..14b47a1940 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -437,6 +437,7 @@ class CheckpointingTests(test.TestCase):
         optimizer=on_create_optimizer, model=on_create_model)
     # Deferred restoration
     status = on_create_root.restore(save_path=save_path)
+    status.assert_nontrivial_match()
     status.assert_existing_objects_matched()
     with self.assertRaises(AssertionError):
       status.assert_consumed()
@@ -1509,6 +1510,8 @@ class CheckpointCompatibilityTests(test.TestCase):
           status.assert_consumed()
         with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
           status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_nontrivial_match()
       else:
         # When graph building, we haven't read any keys, so we don't know
         # whether the restore will be complete.
@@ -1516,6 +1519,8 @@ class CheckpointCompatibilityTests(test.TestCase):
           status.assert_consumed()
         with self.assertRaisesRegexp(AssertionError, "not restored"):
           status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_nontrivial_match()
       status.run_restore_ops()
       self._check_sentinels(root)
       self._set_sentinels(root)
-- 
GitLab


From 13941241e984e4a4296891f4e61a9ed5b3107b22 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Wed, 3 Oct 2018 16:47:49 -0700
Subject: [PATCH 0319/1085] [TF:XLA] Improve the accounting for subcomputations
 in the heap simulator.

Subtract the size of the aliased buffers from the subcomputation estimate instead of from the current computation. This way, the memory estimate for the current computation is more accurate.

For the newly added test, the heap simulation calculates 48 bytes at head instead of the correct 64 bytes.

PiperOrigin-RevId: 215653047
---
 .../compiler/xla/service/heap_simulator.cc    |  34 +++--
 .../compiler/xla/service/heap_simulator.h     |  13 +-
 .../xla/service/heap_simulator_test.cc        | 118 +++++++++++++++++
 .../xla/service/hlo_memory_scheduler_test.cc  | 120 ------------------
 4 files changed, 136 insertions(+), 149 deletions(-)

diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index b343305554..9220865867 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -240,6 +240,7 @@ Status HeapSimulator::RunComputation(
 
     // Make sure each buffer get reused at most once.
     flat_hash_set<const BufferValue*> reused_buffers;
+    int64 alloc_size_by_instruction = 0;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -272,14 +273,15 @@ Status HeapSimulator::RunComputation(
 
       if (!shared) {
         VLOG(3) << "  Allocating: " << buffer->ToString();
+        alloc_size_by_instruction += size_fn_(*buffer);
         Alloc(buffer, instruction);
       }
     }
     // Account for the memory used by subcomputations when estimating the
     // current heap size.
     if (memory_by_computation_ != nullptr) {
-      algorithm_->AccountForSubcomputationMemory(instruction,
-                                                 *memory_by_computation_);
+      algorithm_->AccountForSubcomputationMemory(
+          instruction, alloc_size_by_instruction, *memory_by_computation_);
     }
 
     // If all computations in the module have been scheduled, we can save memory
@@ -385,10 +387,8 @@ void HeapSimulator::Alloc(const BufferValue* buffer,
 
   allocated_buffers_.insert(buffer);
   const int64 size = size_fn_(*buffer);
-  const HloInstruction* instruction_to_calc_aliasing =
-      memory_by_computation_ == nullptr ? nullptr : instruction;
-  algorithm_->Alloc(buffer, size, instruction_to_calc_aliasing);
-  no_fragmentation_stats_->Alloc(buffer, size, instruction_to_calc_aliasing);
+  algorithm_->Alloc(buffer, size);
+  no_fragmentation_stats_->Alloc(buffer, size);
   FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction,
                  nullptr);
 }
@@ -526,20 +526,8 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size) {
   }
 }
 
-void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
-                                     const HloInstruction* instruction) {
-  // The output buffer of while/call/conditional is always aliased with the
-  // output buffer of the root instruction in the body. Don't double count.
-  if (instruction == nullptr ||
-      (instruction->opcode() != HloOpcode::kWhile &&
-       instruction->opcode() != HloOpcode::kCall &&
-       instruction->opcode() != HloOpcode::kConditional)) {
-    Alloc(buffer, size);
-  }
-}
-
 void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
-    const HloInstruction* instruction,
+    const HloInstruction* instruction, int64 alloc_size_by_instruction,
     const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We only count the memory usage of the largest subcomputation, instead of
@@ -554,6 +542,14 @@ void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
       }
     }
   }
+  if (max_subcomputation_bytes > 0 &&
+      (instruction->opcode() == HloOpcode::kWhile ||
+       instruction->opcode() == HloOpcode::kCall ||
+       instruction->opcode() == HloOpcode::kConditional)) {
+    // The output buffer of while/call/conditional is always aliased with the
+    // output buffer of the root instruction in the body. Don't double count.
+    max_subcomputation_bytes -= alloc_size_by_instruction;
+  }
   max_heap_size_ =
       std::max(max_heap_size_, current_heap_size_ + max_subcomputation_bytes);
 }
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index b0295a6163..dbbf43082f 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -218,12 +218,6 @@ class HeapAlgorithm {
   // Alloc allocates a buffer of 'size' bytes.
   virtual void Alloc(const BufferValue* buffer, int64 size) = 0;
 
-  // NoFragmentationStatsHeap overrides this method.
-  virtual void Alloc(const BufferValue* buffer, int64 size,
-                     const HloInstruction* instruction) {
-    Alloc(buffer, size);
-  }
-
   // Takes memory usage of subcomputations into account when calculating the
   // memory usage of a computation. Currently, we don't handle buffer aliasing
   // between computations entirely correctly. We are careful to not double count
@@ -235,6 +229,8 @@ class HeapAlgorithm {
   // analysis, it's not worth making major changes to HeapSimulator now.
   virtual void AccountForSubcomputationMemory(
       const HloInstruction* instruction,
+      // The total number of bytes allocated by instruction.
+      int64 alloc_size_by_instruction,
       const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) {}
 
@@ -257,11 +253,8 @@ class NoFragmentationStatsHeap : public HeapAlgorithm {
 
   void Alloc(const BufferValue* buffer, int64 size) override;
 
-  void Alloc(const BufferValue* buffer, int64 size,
-             const HloInstruction* instruction) override;
-
   void AccountForSubcomputationMemory(
-      const HloInstruction* instruction,
+      const HloInstruction* instruction, int64 alloc_size_by_instruction,
       const absl::flat_hash_map<const HloComputation*, int64>&
           memory_by_computation) override;
 
diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc
index ea0bced923..e30e7667f3 100644
--- a/tensorflow/compiler/xla/service/heap_simulator_test.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc
@@ -98,6 +98,124 @@ TEST_F(MinimumMemoryForSequenceTest, MultiComputation) {
       HeapSimulator::MinimumMemoryForModule(schedule, size_fn).ValueOrDie());
 }
 
+TEST_F(MinimumMemoryForSequenceTest, SubcomputationAccounting) {
+  // HloModule SubcomputationAccounting
+
+  // %WhileBody (body_param: f32[4]) -> f32[4] {
+  //   %body_param = f32[4]{0} parameter(0)
+  //   %constant.1 = f32[4]{0} constant({1, 1, 1, 1})
+  //   ROOT %subtract = f32[4]{0} subtract(f32[4]{0} %body_param, f32[4]{0}
+  //   %constant.1)
+  // }
+
+  // %WhileCond (cond_param: f32[4]) -> pred[] {
+  //   %cond_param = f32[4]{0} parameter(0)
+  //   %slice = f32[1]{0} slice(f32[4]{0} %cond_param), slice={[0:1]}
+  //   %reshape = f32[] reshape(f32[1]{0} %slice)
+  //   %constant = f32[] constant(0)
+  //   ROOT %not-equal-to = pred[] not-equal-to(f32[] %reshape, f32[] %constant)
+  // }
+
+  // ENTRY %SubcomputationAccounting () -> f32[2,4] {
+  //   %constant.3 = f32[2,4]{1,0} constant(f32[2,4] { { 1, 2, 3, 4 }, { 1, 2,
+  //   3, 4 } }) %transpose = f32[2,4]{1,0} transpose(f32[2,4]{1,0}
+  //   %constant.3), dimensions={0,1} %constant.2 = f32[4]{0} constant({1, 1, 1,
+  //   1}) %while = f32[4]{0} while(f32[4]{0} %constant.2),
+  //   condition=%WhileCond, body=%WhileBody %broadcast = f32[2,4]{1,0}
+  //   broadcast(f32[4]{0} %while), dimensions={1} ROOT %add = f32[2,4]{1,0}
+  //   add(f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast)
+  // }
+
+  auto module = CreateNewVerifiedModule();
+  const Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+  const Shape r1f32 = ShapeUtil::MakeShape(F32, {4});
+  const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4});
+
+  // reshape(slice(param)) != 0
+  // Needs 5 bytes
+  auto cond_builder = HloComputation::Builder("WhileCond");
+  HloInstruction* cond_param = cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "cond_param"));
+  HloInstruction* slice =
+      cond_builder.AddInstruction(HloInstruction::CreateSlice(
+          ShapeUtil::MakeShape(F32, {1}), cond_param, {0}, {1}, {1}));
+  HloInstruction* reshape =
+      cond_builder.AddInstruction(HloInstruction::CreateReshape(r0f32, slice));
+  HloInstruction* zero = cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(0)));
+  HloInstruction* cond_comparison =
+      cond_builder.AddInstruction(HloInstruction::CreateBinary(
+          ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, reshape, zero));
+  auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build());
+
+  // param - 1
+  // Needs 16 bytes
+  auto body_builder = HloComputation::Builder("WhileBody");
+  HloInstruction* body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r1f32, "body_param"));
+  HloInstruction* one_vector =
+      body_builder.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR1<float>({1, 1, 1, 1})));
+  HloInstruction* subtract =
+      body_builder.AddInstruction(HloInstruction::CreateBinary(
+          r1f32, HloOpcode::kSubtract, body_param, one_vector));
+  auto body_computation = module->AddEmbeddedComputation(body_builder.Build());
+
+  // transpose(matrix) + bcast(while)
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* while_init =
+      builder.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR1<float>({1, 1, 1, 1})));
+  // Creates 16 bytes, ignoring subcomputations
+  HloInstruction* while_loop =
+      builder.AddInstruction(HloInstruction::CreateWhile(
+          r1f32, cond_computation, body_computation, while_init));
+
+  // Creates 32 bytes and frees 16
+  HloInstruction* bcast = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(r2f32, while_loop, {1}));
+
+  HloInstruction* matrix = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR2<float>(
+          {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}})));
+  // Creates 32 bytes
+  HloInstruction* transpose = builder.AddInstruction(
+      HloInstruction::CreateTranspose(r2f32, matrix, {0, 1}));
+
+  // Creates 32 bytes and frees 64
+  HloInstruction* add = builder.AddInstruction(
+      HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast));
+
+  auto entry_computation = module->AddEntryComputation(builder.Build());
+
+  HloSchedule schedule(module.get());
+  std::vector<HloInstruction*> cond_vec = {cond_param, slice, reshape, zero,
+                                           cond_comparison};
+  std::vector<HloInstruction*> while_body_vec = {body_param, one_vector,
+                                                 subtract};
+  std::vector<HloInstruction*> entry_comp_vec = {while_init, while_loop, bcast,
+                                                 matrix,     transpose,  add};
+  schedule.set_sequence(cond_computation, cond_vec);
+  schedule.set_sequence(body_computation, while_body_vec);
+  schedule.set_sequence(entry_computation, entry_comp_vec);
+
+  auto size_fn = [](const BufferValue& buffer) {
+    return ShapeUtil::ByteSizeOf(buffer.shape());
+  };
+  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
+  memory_by_computation[cond_computation] = 5;
+  memory_by_computation[body_computation] = 16;
+  std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
+      TuplePointsToAnalysis::Run(module.get()).ValueOrDie();
+
+  // HeapSimulator accounts for subcomputations. The output buffer is aliased,
+  // so we don't double count.
+  EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation(
+                    *entry_computation, schedule.sequence(entry_computation),
+                    *points_to_analysis, size_fn, &memory_by_computation)
+                    .ValueOrDie());
+}
+
 const char kAlloc[] = "Alloc";
 const char kFree[] = "Free";
 const char kFinish[] = "Finish";
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
index 5a9fccc7dd..214119fba8 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler_test.cc
@@ -147,126 +147,6 @@ ENTRY root {
                                       instructions_by_name.at("e")));
 }
 
-TEST_F(HloSchedulingTest, ListAccountsForSubcomputations) {
-  // %WhileCond (cond_param: f32[4]) -> pred[] {
-  //   %cond_param = f32[4]{0} parameter(0)
-  //   %constant = f32[1,4]{1,0} constant(f32[1,4] { { 0, 0, 0, 0 } })
-  //   ROOT %not-equal-to = pred[] not-equal-to(
-  //     f32[4]{0} %cond_param, f32[1,4]{1,0} %constant)
-  // }
-  // %WhileBody (body_param: f32[4]) -> f32[4] {
-  //   %body_param = f32[4]{0} parameter(0)
-  //   %constant.1 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } })
-  //   ROOT %subtract = f32[4]{0} subtract(
-  //     f32[4]{0} %body_param, f32[1,4]{1,0} %constant.1)
-  // }
-  // %ListAccountsForSubcomputations () -> f32[2,4] {
-  //   %constant.3 = f32[2,4]{1,0} constant(
-  //     f32[2,4] { { 1, 2, 3, 4 }, { 1, 2, 3, 4 } })
-  //   %transpose = f32[2,4]{1,0} transpose(
-  //     f32[2,4]{1,0} %constant.3), dimensions={0,1}
-  //   %constant.2 = f32[1,4]{1,0} constant(f32[1,4] { { 1, 1, 1, 1 } })
-  //   %while = f32[4]{0} while(f32[1,4]{1,0} %constant.2),
-  //      condition=%WhileCond,
-  //      body=%WhileBody
-  //   %broadcast = f32[2,4]{1,0} broadcast(f32[4]{0} %while), dimensions={0}
-  //   ROOT %add = f32[2,4]{1,0} add(
-  //     f32[2,4]{1,0} %transpose, f32[2,4]{1,0} %broadcast)
-  // }
-
-  auto module = CreateNewModule();
-  const Shape r1f32 = ShapeUtil::MakeShape(F32, {4});
-  const Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 4});
-
-  // param != 0
-  // Needs 17 bytes
-  auto cond_builder = HloComputation::Builder("WhileCond");
-  HloInstruction* cond_param = cond_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r1f32, "cond_param"));
-  HloInstruction* zero_vector =
-      cond_builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{0, 0, 0, 0}})));
-  cond_builder.AddInstruction(HloInstruction::CreateBinary(
-      ShapeUtil::MakeShape(PRED, {}), HloOpcode::kNe, cond_param, zero_vector));
-  auto cond_computation = module->AddEmbeddedComputation(cond_builder.Build());
-
-  // param - 1
-  // Needs 16 bytes
-  auto body_builder = HloComputation::Builder("WhileBody");
-  HloInstruction* body_param = body_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, r1f32, "body_param"));
-  HloInstruction* one_vector =
-      body_builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{1, 1, 1, 1}})));
-  body_builder.AddInstruction(HloInstruction::CreateBinary(
-      r1f32, HloOpcode::kSubtract, body_param, one_vector));
-  auto body_computation = module->AddEmbeddedComputation(body_builder.Build());
-
-  // transpose(matrix) + bcast(while)
-  auto builder = HloComputation::Builder(TestName());
-  HloInstruction* while_init =
-      builder.AddInstruction(HloInstruction::CreateConstant(
-          LiteralUtil::CreateR2<float>({{1, 1, 1, 1}})));
-  // Creates 16 bytes, ignoring subcomputations
-  HloInstruction* while_loop =
-      builder.AddInstruction(HloInstruction::CreateWhile(
-          r1f32, cond_computation, body_computation, while_init));
-
-  // Creates 32 bytes and frees 16
-  HloInstruction* bcast = builder.AddInstruction(
-      HloInstruction::CreateBroadcast(r2f32, while_loop, {0}));
-
-  HloInstruction* matrix = builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR2<float>(
-          {{1.0, 2.0, 3.0, 4.0}, {1.0, 2.0, 3.0, 4.0}})));
-  // Creates 32 bytes
-  HloInstruction* transpose = builder.AddInstruction(
-      HloInstruction::CreateTranspose(r2f32, matrix, {0, 1}));
-
-  // Creates 32 bytes and frees 64
-  HloInstruction* add = builder.AddInstruction(
-      HloInstruction::CreateBinary(r2f32, HloOpcode::kAdd, transpose, bcast));
-
-  module->AddEntryComputation(builder.Build());
-
-  auto size_fn = [](const BufferValue& buffer) {
-    return ShapeUtil::ByteSizeOf(buffer.shape());
-  };
-  TF_ASSERT_OK_AND_ASSIGN(
-      HloSchedule schedule,
-      ScheduleModule(*module, size_fn, ListMemoryScheduler));
-  // Verify that all instructions are in the sequence.
-  auto entry_computation = module->entry_computation();
-  EXPECT_EQ(entry_computation->instruction_count(),
-            schedule.sequence(entry_computation).size());
-  SequentialHloOrdering ordering(schedule);
-  // This schedule is an example of List's greedy heuristics being suboptimal.
-  // The while_loop is more expensive than transpose, so it would have been
-  // better to schedule it first, instead of during the busy time.
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, while_loop));
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, bcast));
-  EXPECT_TRUE(ordering.ExecutesBefore(bcast, add));
-  EXPECT_TRUE(ordering.ExecutesBefore(transpose, add));
-
-  absl::flat_hash_map<const HloComputation*, int64> memory_by_computation;
-  memory_by_computation[cond_computation] = 17;
-  memory_by_computation[body_computation] = 16;
-  std::unique_ptr<TuplePointsToAnalysis> points_to_analysis =
-      TuplePointsToAnalysis::Run(module.get()).ValueOrDie();
-
-  // HeapSimulator doesn't account for subcomputations
-  EXPECT_EQ(80, HeapSimulator::MinimumMemoryForComputation(
-                    *entry_computation, schedule.sequence(entry_computation),
-                    *points_to_analysis, size_fn)
-                    .ValueOrDie());
-  // HeapSimulator accounts for subcomputations. The output buffer is aliased,
-  // so we don't double count.
-  EXPECT_EQ(64, HeapSimulator::MinimumMemoryForComputation(
-                    *entry_computation, schedule.sequence(entry_computation),
-                    *points_to_analysis, size_fn, &memory_by_computation)
-                    .ValueOrDie());
-}
-
 TEST_F(HloSchedulingTest, TuplesAreAccountedCorrectly) {
   auto builder = HloComputation::Builder(TestName());
   const auto TUPLE_SIZE = 1;
-- 
GitLab


From caaf9a89750a9a0b3d66f3ce3e9bd507f4c6514c Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Wed, 3 Oct 2018 16:51:30 -0700
Subject: [PATCH 0320/1085] Create new classes for Keras tests to allow us to
 create new test targets.

PiperOrigin-RevId: 215653650
---
 .../contrib/distribute/python/keras_test.py   | 256 +++++++++---------
 1 file changed, 131 insertions(+), 125 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 993cb2bac3..3511b7761f 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -355,48 +355,9 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase):
     gfile.DeleteRecursively(self._config.model_dir)
 
 
-class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
-
-  def test_validating_dataset_input_tensors_with_shape_mismatch(self):
-    with self.cached_session():
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
-      a = constant_op.constant([1, 2], shape=(1, 2))
-      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
-      with strategy.scope():
-        # Removed device and input tensor shape details from the error message
-        # since the order of the device and the corresponding input tensor shape
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor shapes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              strategy, x, y)
-
-  def test_validating_dataset_input_tensors_with_dtype_mismatch(self):
-    with self.cached_session():
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
-      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
-      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
-      with strategy.scope():
-        # Removed device and input tensor dtype details from the error message
-        # since the order of the device and the corresponding input tensor dtype
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor dtypes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
-          distributed_training_utils.validate_distributed_dataset_inputs(
-              strategy, x, y)
+class TestDistributionStrategyWithNumpyArrays(test.TestCase,
+                                              parameterized.TestCase):
 
-  # TODO(anjalisridhar): Move this test along with other numpy related tests to
-  # its own class.
   @combinations.generate(strategy_combinations())
   def test_creating_var_with_numpy_arrays(self, distribution):
     with self.cached_session():
@@ -479,6 +440,10 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       # with batch_size
       model.predict(inputs, batch_size=8)
 
+
+class TestDistributionStrategyWithDatasets(test.TestCase,
+                                           parameterized.TestCase):
+
   @combinations.generate(strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
@@ -572,86 +537,6 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       model.evaluate(dataset, steps=2, verbose=1)
       model.predict(get_predict_dataset(distribution), steps=2)
 
-  def test_unsupported_features(self):
-    with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
-
-      dataset = get_dataset(strategy)
-
-      # Test with validation split
-      with self.assertRaisesRegexp(
-          ValueError, '`validation_split` argument is not '
-                      'supported when input `x` is a dataset or a '
-                      'dataset iterator.+'):
-        model.fit(dataset,
-                  epochs=1, steps_per_epoch=2, verbose=0,
-                  validation_split=0.5, validation_steps=2)
-
-      # Test with sample weight.
-      sample_weight = np.random.random((10,))
-      with self.assertRaisesRegexp(
-          NotImplementedError, '`sample_weight` is currently not supported '
-                               'when using DistributionStrategy.'):
-        model.fit(
-            dataset,
-            epochs=1,
-            steps_per_epoch=2,
-            verbose=0,
-            sample_weight=sample_weight)
-
-      # Test with not specifying the `steps` argument.
-      with self.assertRaisesRegexp(
-          ValueError, 'you should specify the `steps_per_epoch` argument'):
-        model.fit(dataset, epochs=1, verbose=0)
-      with self.assertRaisesRegexp(ValueError,
-                                   'you should specify the `steps` argument'):
-        model.evaluate(dataset, verbose=0)
-
-      with self.assertRaisesRegexp(ValueError,
-                                   'you should specify the `steps` argument'):
-        model.predict(dataset, verbose=0)
-
-  def test_calling_with_unsupported_predefined_callbacks(self):
-    with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
-                                                     '/device:GPU:0'])
-      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
-
-      dataset = get_dataset(strategy)
-
-      def schedule(_):
-        return 0.001
-      with self.assertRaisesRegexp(ValueError,
-                                   'LearningRateScheduler callback is not '
-                                   'supported with DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
-
-      with self.assertRaisesRegexp(ValueError,
-                                   'ReduceLROnPlateau callback is not '
-                                   'supported with DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
-      with self.assertRaisesRegexp(ValueError,
-                                   'histogram_freq in the TensorBoard callback '
-                                   'is not supported when using '
-                                   'DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
-
   def test_dataset_input_shape_validation(self):
     with self.cached_session():
       model = get_model()
@@ -736,7 +621,128 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase):
       self.assertNotEqual(np.mean(predict_output), 0)
 
 
-class LossMaskingWithDistributionStrategyTest(test.TestCase):
+class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
+
+  def test_validating_dataset_input_tensors_with_shape_mismatch(self):
+    with self.cached_session():
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      a = constant_op.constant([1, 2], shape=(1, 2))
+      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
+      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
+      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      with strategy.scope():
+        # Removed device and input tensor shape details from the error message
+        # since the order of the device and the corresponding input tensor shape
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor shapes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              strategy, x, y)
+
+  def test_validating_dataset_input_tensors_with_dtype_mismatch(self):
+    with self.cached_session():
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
+      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
+      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
+      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      with strategy.scope():
+        # Removed device and input tensor dtype details from the error message
+        # since the order of the device and the corresponding input tensor dtype
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor dtypes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              strategy, x, y)
+
+  def test_unsupported_features(self):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
+                                                     '/device:GPU:0'])
+
+      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+
+      dataset = get_dataset(strategy)
+
+      # Test with validation split
+      with self.assertRaisesRegexp(
+          ValueError, '`validation_split` argument is not '
+                      'supported when input `x` is a dataset or a '
+                      'dataset iterator.+'):
+        model.fit(dataset,
+                  epochs=1, steps_per_epoch=2, verbose=0,
+                  validation_split=0.5, validation_steps=2)
+
+      # Test with sample weight.
+      sample_weight = np.random.random((10,))
+      with self.assertRaisesRegexp(
+          NotImplementedError, '`sample_weight` is currently not supported '
+                               'when using DistributionStrategy.'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            sample_weight=sample_weight)
+
+      # Test with not specifying the `steps` argument.
+      with self.assertRaisesRegexp(
+          ValueError, 'you should specify the `steps_per_epoch` argument'):
+        model.fit(dataset, epochs=1, verbose=0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.evaluate(dataset, verbose=0)
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.predict(dataset, verbose=0)
+
+  def test_calling_with_unsupported_predefined_callbacks(self):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1',
+                                                     '/device:GPU:0'])
+      model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
+
+      dataset = get_dataset(strategy)
+
+      def schedule(_):
+        return 0.001
+      with self.assertRaisesRegexp(ValueError,
+                                   'LearningRateScheduler callback is not '
+                                   'supported with DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'ReduceLROnPlateau callback is not '
+                                   'supported with DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
+      with self.assertRaisesRegexp(ValueError,
+                                   'histogram_freq in the TensorBoard callback '
+                                   'is not supported when using '
+                                   'DistributionStrategy.'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
+
+
+class TestDistributionStrategyWithLossMasking(test.TestCase):
 
   # TODO(priyag): Enable all strategies for this test. Currently it does not
   # work for TPU due to some invalid datatype.
@@ -763,7 +769,7 @@ class LossMaskingWithDistributionStrategyTest(test.TestCase):
       self.assertEqual(hist.history['loss'][0], 0)
 
 
-class NormalizationLayerWithDistributionStrategyTest(
+class TestDistributionStrategyWithNormalizationLayer(
     test.TestCase, parameterized.TestCase):
 
   @combinations.generate(strategy_combinations())
@@ -795,8 +801,8 @@ class NormalizationLayerWithDistributionStrategyTest(
       np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
 
-class CorrectnessWithDistributionStrategyTest(test.TestCase,
-                                              parameterized.TestCase):
+class TestDistributionStrategyCorrectness(test.TestCase,
+                                          parameterized.TestCase):
 
   @combinations.generate(strategy_combinations())
   def test_metric_correctness(self, distribution):
-- 
GitLab


From 3a9a3664fe1aa9e5c81ca4959f028c2a8161520e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 16:52:30 -0700
Subject: [PATCH 0321/1085] Fix 1970s-style bug in LogSoftmax eval.

PiperOrigin-RevId: 215653797
---
 tensorflow/contrib/lite/kernels/activations.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc
index cf9441aee3..9aed4f09b8 100644
--- a/tensorflow/contrib/lite/kernels/activations.cc
+++ b/tensorflow/contrib/lite/kernels/activations.cc
@@ -616,13 +616,15 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
   switch (input->type) {
-    case kTfLiteFloat32:
+    case kTfLiteFloat32: {
       SoftmaxParams op_params;
       optimized_ops::LogSoftmax(
           op_params, GetTensorShape(input), GetTensorData<float>(input),
           GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
-    case kTfLiteUInt8:
+    }
+    case kTfLiteUInt8: {
+      SoftmaxParams op_params;
       op_params.input_multiplier = data->input_multiplier;
       op_params.input_left_shift = data->input_left_shift;
       op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
@@ -632,6 +634,7 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
           op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
           GetTensorShape(output), GetTensorData<uint8_t>(output));
       return kTfLiteOk;
+    }
     default:
       context->ReportError(context, "Only float32 supported currently., got %d",
                            input->type);
-- 
GitLab


From d340eb9f7ea46012b7ead202f4c12fb6b32cc56d Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:56:14 -0700
Subject: [PATCH 0322/1085] Increase error-epsilon for
 ProfilingTest::ProfilesAreCollected.

PiperOrigin-RevId: 215654327
---
 tensorflow/contrib/lite/profiling/profiler_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/profiling/profiler_test.cc b/tensorflow/contrib/lite/profiling/profiler_test.cc
index 0fba0450a0..cf56eed2a4 100644
--- a/tensorflow/contrib/lite/profiling/profiler_test.cc
+++ b/tensorflow/contrib/lite/profiling/profiler_test.cc
@@ -83,8 +83,8 @@ TEST(ProfilingTest, ProfilesAreCollected) {
   EXPECT_EQ("SleepForQuarter", profile_events[4]->tag);
 
 #ifndef ADDRESS_SANITIZER
-  // ASAN build is sometimes very slow.
-  const int eps_ms = 10;
+  // ASAN build is sometimes very slow. Set a large epsilon to avoid flakiness.
+  const int eps_ms = 50;
   AssertDurationOfEventAroundMs(profile_events[0], /*expected_ms*/ 500, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[1], /*expected_ms*/ 250, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[2], /*expected_ms*/ 250, eps_ms);
-- 
GitLab


From 031ad1a5cd0609e66886f49fd63ed1c79f488c72 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 16:52:30 -0700
Subject: [PATCH 0323/1085] Fix 1970s-style bug in LogSoftmax eval.

PiperOrigin-RevId: 215653797
---
 tensorflow/contrib/lite/kernels/activations.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc
index cf9441aee3..9aed4f09b8 100644
--- a/tensorflow/contrib/lite/kernels/activations.cc
+++ b/tensorflow/contrib/lite/kernels/activations.cc
@@ -616,13 +616,15 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
   switch (input->type) {
-    case kTfLiteFloat32:
+    case kTfLiteFloat32: {
       SoftmaxParams op_params;
       optimized_ops::LogSoftmax(
           op_params, GetTensorShape(input), GetTensorData<float>(input),
           GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
-    case kTfLiteUInt8:
+    }
+    case kTfLiteUInt8: {
+      SoftmaxParams op_params;
       op_params.input_multiplier = data->input_multiplier;
       op_params.input_left_shift = data->input_left_shift;
       op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
@@ -632,6 +634,7 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
           op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
           GetTensorShape(output), GetTensorData<uint8_t>(output));
       return kTfLiteOk;
+    }
     default:
       context->ReportError(context, "Only float32 supported currently., got %d",
                            input->type);
-- 
GitLab


From c842d38978a0babb373fe2acbb0231960aa1c1d0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 17:05:38 -0700
Subject: [PATCH 0324/1085] Add MinimalRNN cell.

The implementation is based on: https://arxiv.org/pdf/1806.05394v2.pdf.

PiperOrigin-RevId: 215655857
---
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  72 +++++++++++
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 116 ++++++++++++++++++
 2 files changed, 188 insertions(+)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 6689664fb9..0a27200015 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -29,6 +29,9 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.keras import initializers
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras import utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
@@ -40,7 +43,9 @@ from tensorflow.python.ops import rnn_cell
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
+from tensorflow.python.training import training
 from tensorflow.python.util import nest
 
 
@@ -1115,6 +1120,73 @@ class RNNCellTest(test.TestCase):
             r"input size \(3\) must be divisible by number_of_groups \(2\)"):
           gcell(glstm_input, gcell_zero_state)
 
+  def testMinimalRNNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope(
+          "root"):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.MinimalRNNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.18899589, 0.18899589]])
+      with variable_scope.variable_scope(
+          "other"):
+        # Test MinimalRNN with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.MinimalRNNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.19554167, 0.19554167]])
+
+  def testMinimalRNNCellEndToEnd(self):
+    with self.cached_session() as sess:
+      input_shape = 10
+      output_shape = 5
+      timestep = 4
+      batch = 100
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = utils.to_categorical(y_train)
+      cell = contrib_rnn_cell.MinimalRNNCell(output_shape)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape))
+
+      outputs, state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
+      self.assertEqual(state.shape.as_list(), [None, output_shape])
+      loss = losses.softmax_cross_entropy(predict, state)
+      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      _, outputs, state = sess.run(
+          [train_op, outputs, state], {inputs: x_train, predict: y_train})
+
+      self.assertEqual(len(outputs), batch)
+      self.assertEqual(len(state), batch)
+
 
 class LayerNormBasicLSTMCellTest(test.TestCase):
 
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 06c481672c..59a61af7b3 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -28,6 +28,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import initializers
 from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
@@ -3394,3 +3396,117 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
 
     new_state = rnn_cell_impl.LSTMStateTuple(new_c, new_h)
     return new_h, new_state
+
+
+class MinimalRNNCell(rnn_cell_impl.LayerRNNCell):
+  """MinimalRNN cell.
+
+  The implementation is based on:
+
+    https://arxiv.org/pdf/1806.05394v2.pdf
+
+  Minmin Chen, Jeffrey Pennington, Samuel S. Schoenholz.
+  "Dynamical Isometry and a Mean Field Theory of RNNs: Gating Enables Signal
+   Propagation in Recurrent Neural Networks." ICML, 2018.
+
+  A MinimalRNN cell first projects the input to the hidden space. The new
+  hidden state is then calcuated as a weighted sum of the projected input and
+  the previous hidden state, using a single update gate.
+  """
+
+  def __init__(self,
+               units,
+               activation="tanh",
+               kernel_initializer="glorot_uniform",
+               bias_initializer="ones",
+               name=None,
+               dtype=None,
+               **kwargs):
+    """Initialize the parameters for a MinimalRNN cell.
+
+    Args:
+      units: int, The number of units in the MinimalRNN cell.
+      activation: Nonlinearity to use in the feedforward network. Default:
+        `tanh`.
+      kernel_initializer: The initializer to use for the weight in the update
+        gate and feedforward network. Default: `glorot_uniform`.
+      bias_initializer: The initializer to use for the bias in the update
+        gate. Default: `ones`.
+      name: String, the name of the cell.
+      dtype: Default dtype of the cell.
+      **kwargs: Dict, keyword named properties for common cell attributes.
+    """
+    super(MinimalRNNCell, self).__init__(name=name, dtype=dtype, **kwargs)
+
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self.units = units
+    self.activation = activations.get(activation)
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.bias_initializer = initializers.get(bias_initializer)
+
+  @property
+  def state_size(self):
+    return self.units
+
+  @property
+  def output_size(self):
+    return self.units
+
+  def build(self, inputs_shape):
+    if inputs_shape[-1] is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % str(inputs_shape))
+
+    input_size = inputs_shape[-1]
+    # pylint: disable=protected-access
+    # self._kernel contains W_x, W, V
+    self.kernel = self.add_weight(
+        name=rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        shape=[input_size + 2 * self.units, self.units],
+        initializer=self.kernel_initializer)
+    self.bias = self.add_weight(
+        name=rnn_cell_impl._BIAS_VARIABLE_NAME,
+        shape=[self.units],
+        initializer=self.bias_initializer)
+    # pylint: enable=protected-access
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of MinimalRNN.
+
+    Args:
+      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
+      state: state Tensor, must be 2-D, `[batch, state_size]`.
+
+    Returns:
+      A tuple containing:
+
+      - Output: A `2-D` tensor with shape `[batch_size, state_size]`.
+      - New state: A `2-D` tensor with shape `[batch_size, state_size]`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    input_size = inputs.get_shape()[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    feedforward_weight, gate_weight = array_ops.split(
+        value=self.kernel,
+        num_or_size_splits=[input_size.value, 2 * self.units],
+        axis=0)
+
+    feedforward = math_ops.matmul(inputs, feedforward_weight)
+    feedforward = self.activation(feedforward)
+
+    gate_inputs = math_ops.matmul(
+        array_ops.concat([feedforward, state], 1), gate_weight)
+    gate_inputs = nn_ops.bias_add(gate_inputs, self.bias)
+    u = math_ops.sigmoid(gate_inputs)
+
+    new_h = u * state + (1 - u) * feedforward
+    return new_h, new_h
-- 
GitLab


From 4da5b350e1c062b9d55896ee872e0e4790f30bcb Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Wed, 3 Oct 2018 17:25:46 -0700
Subject: [PATCH 0325/1085] TFLite Flex: Blacklist Control Flow Ops

PiperOrigin-RevId: 215658384
---
 tensorflow/contrib/lite/toco/tflite/export.cc | 132 +++++++++++++-----
 tensorflow/contrib/lite/toco/tflite/export.h  |  20 ++-
 .../contrib/lite/toco/tflite/export_test.cc   |  40 ++++++
 3 files changed, 152 insertions(+), 40 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 0c9fac249c..45ca7f7f0c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -47,6 +47,22 @@ using ::tflite::Tensor;
 
 namespace {
 
+// Check if a TensorFlow Op is a control flow op by its name.
+bool IsControlFlowOp(const string& tensorflow_op) {
+  // Technically this is equalivent to `::tensorflow::Node::IsControlFlow()`.
+  // It requires to construct a `::tensorflow::Graph` to use that helper
+  // function, so we simply hardcode the list of control flow ops here.
+  if (tensorflow_op == "Switch" || tensorflow_op == "RefSwitch" ||
+      tensorflow_op == "Merge" || tensorflow_op == "RefMerge" ||
+      tensorflow_op == "Enter" || tensorflow_op == "RefEnter" ||
+      tensorflow_op == "Exit" || tensorflow_op == "RefExit" ||
+      tensorflow_op == "NextIteration" || tensorflow_op == "RefNextIteration") {
+    return true;
+  }
+  // TODO(ycling): Also check how to handle Variable ops and Assign ops.
+  return false;
+}
+
 details::OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
@@ -55,21 +71,13 @@ details::OperatorKey GetOperatorKey(
   if (op.type == OperatorType::kUnsupported) {
     const TensorFlowUnsupportedOperator& unsupported_op =
         static_cast<const TensorFlowUnsupportedOperator&>(op);
-
-    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-    // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
-      custom_code = string(::tflite::kFlexCustomCodePrefix) +
-                    unsupported_op.tensorflow_op;
-    } else {
-      custom_code = unsupported_op.tensorflow_op;
-    }
+    custom_code = unsupported_op.tensorflow_op;
   }
   int version = 1;
   if (ops_by_type.count(op.type) != 0) {
     version = ops_by_type.at(op.type)->GetVersion(op);
   }
-  return details::OperatorKey(op.type, custom_code, version);
+  return details::OperatorKey(op.type, custom_code, version, allow_flex_ops);
 }
 
 void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
@@ -83,6 +91,29 @@ void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
 
 namespace details {
 
+OperatorKey::OperatorKey(OperatorType type, const std::string& custom_code,
+                         int version, bool allow_flex_ops) {
+  this->type = type;
+  this->custom_code = custom_code;
+  this->version = version;
+
+  if (type == OperatorType::kUnsupported) {
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+    // to populate a regular custom op. We need to find a way to fix this.
+    if (allow_flex_ops) {
+      // Memorize the original TensorFlow op name.
+      this->flex_tensorflow_op = custom_code;
+      // Prefix the custom code of the flex op.
+      this->custom_code = string(::tflite::kFlexCustomCodePrefix) + custom_code;
+      this->is_flex_op = true;
+
+      if (IsControlFlowOp(this->flex_tensorflow_op)) {
+        is_unsupported_flex_op = true;
+      }
+    }
+  }
+}
+
 void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
   // First find a list of unique array names.
   std::set<string> names;
@@ -199,7 +230,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const Model& model,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     const details::OperatorsMap& operators_map, FlatBufferBuilder* builder,
-    std::set<string>* error_summary, const ExportParams& params) {
+    std::set<string>* unsupported_ops, const ExportParams& params) {
   // Map from operator name to TF Lite enum value, for all builtins.
   std::map<string, BuiltinOperator> builtin_ops;
   for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
@@ -240,8 +271,8 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
       }
       // Either way, this is an operator that is not supported by TF Lite,
       // so we output it as a custom op and add it to the error summary.
-      if (error_summary) {
-        error_summary->insert(name);
+      if (unsupported_ops) {
+        unsupported_ops->insert(name);
       }
       ordered_opcodes[op_index] =
           CreateOperatorCode(*builder, BuiltinOperator_CUSTOM,
@@ -355,9 +386,9 @@ void Export(
   Array empty_array;
   buffers_to_write.push_back(&empty_array);
 
-  std::set<string> error_summary;
+  std::set<string> unsupported_ops;
   auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map,
-                                      &builder, &error_summary, params);
+                                      &builder, &unsupported_ops, params);
 
   for (const auto& op : model.operators) {
     if (op->type == OperatorType::kFakeQuant) {
@@ -367,30 +398,61 @@ void Export(
                       "for --std_values and --mean_values.";
     }
   }
-  if (!params.allow_custom_ops && !error_summary.empty()) {
-    // Remove ExpandDims and ReorderAxes from unimplemented list unless they
-    // compose the list. Both ops are removed during graph transformations.
-    // However, if an op is unimplemented earlier in the model, the graph
-    // transformation is unable to run because the output shape is not defined.
-    // This causes unnecessary confusion during model conversion time.
-    std::set<string> error_summary_final;
-    for (const auto& op_type : error_summary) {
-      if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
-        error_summary_final.insert(op_type);
+  if (!unsupported_ops.empty()) {
+    if (!params.allow_custom_ops) {
+      // Remove ExpandDims and ReorderAxes from unimplemented list unless they
+      // compose the list. Both ops are removed during graph transformations.
+      // However, if an op is unimplemented earlier in the model, the graph
+      // transformation is unable to run because the output shape is not
+      // defined. This causes unnecessary confusion during model conversion
+      // time.
+      std::set<string> unsupported_ops_final;
+      for (const auto& op_type : unsupported_ops) {
+        if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
+          unsupported_ops_final.insert(op_type);
+        }
+      }
+      if (unsupported_ops_final.empty()) {
+        unsupported_ops_final = unsupported_ops;
+      }
+
+      LOG(QFATAL)
+          << "Some of the operators in the model are not supported by "
+             "the standard TensorFlow Lite runtime. If you have a custom "
+             "implementation for them you can disable this error with "
+             "--allow_custom_ops, or by setting allow_custom_ops=True "
+             "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
+             "of operators for which  you will need custom implementations: "
+          << absl::StrJoin(unsupported_ops_final, ", ") << ".";
+    }
+
+    std::set<string> unsupported_control_flow_ops;
+    // Check if unsupported ops contains control flow ops. It's impossible
+    // to implement these ops as custom ops at the moment.
+    for (const auto& op : unsupported_ops) {
+      if (IsControlFlowOp(op)) {
+        unsupported_control_flow_ops.insert(op);
       }
     }
-    if (error_summary_final.empty()) {
-      error_summary_final = error_summary;
+    if (!unsupported_control_flow_ops.empty()) {
+      LOG(QFATAL)
+          << "TensorFlow Lite currently doesn't support control flow ops: "
+          << absl::StrJoin(unsupported_control_flow_ops, ", ") << ".";
     }
+  }
+
+  std::set<string> unsupported_flex_ops;
+  for (const auto& it : operators_map) {
+    const details::OperatorKey& key = it.first;
+    if (key.is_unsupported_flex_op) {
+      unsupported_flex_ops.insert(key.custom_code);
+    }
+  }
 
-    LOG(QFATAL)
-        << "Some of the operators in the model are not supported by "
-           "the standard TensorFlow Lite runtime. If you have a custom "
-           "implementation for them you can disable this error with "
-           "--allow_custom_ops, or by setting allow_custom_ops=True "
-           "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
-           "of operators for which  you will need custom implementations: "
-        << absl::StrJoin(error_summary_final, ", ") << ".";
+  if (!unsupported_flex_ops.empty()) {
+    LOG(QFATAL) << "Some of the operators in the model are not supported by "
+                   "TensorFlow Flex runtime: "
+                << absl::StrJoin(unsupported_flex_ops, ", ") << ".";
   }
 
   std::set<int32_t> variable_tensor_indices;
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index 29d6de4049..9efb282c6c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -81,11 +81,21 @@ using TensorsMap = std::unordered_map<string, int>;
 // Only when `type` is `kUnsupported`, `custom_code` is filled to
 // identify which operation is used.
 struct OperatorKey {
-  OperatorKey(OperatorType type, const std::string& custom_code, int version)
-      : type(type), custom_code(custom_code), version(version) {}
-  const OperatorType type;
-  const std::string custom_code;
-  const int version;
+  OperatorKey(OperatorType type, const std::string& custom_code, int version,
+              bool allow_flex_ops = false);
+
+  // Only `type`, `custom_code` and `version` is used to compute hash and
+  // identity.
+  OperatorType type;
+  std::string custom_code;
+  int version;
+
+  // THe fields below are not used to compute hash and identity.
+  bool is_flex_op = false;
+  bool is_unsupported_flex_op = false;
+  // The original TensorFlow op name for the flex op. Filled only when
+  // `is_flex_op` is true.
+  std::string flex_tensorflow_op;
 
   bool operator<(const OperatorKey& other) const {
     if (type < other.type) return true;
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index 93882a91a7..a71a64d56f 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -313,6 +313,46 @@ TEST_F(VersionedOpExportTest, Export) {
   EXPECT_EQ(1, (*operators)[1]->opcode_index());
 }
 
+TEST(OperatorKeyTest, TestBuiltinOp) {
+  details::OperatorKey key(OperatorType::kConv, "", 2);
+  EXPECT_EQ(key.type, OperatorType::kConv);
+  EXPECT_EQ(key.custom_code, "");
+  EXPECT_EQ(key.version, 2);
+}
+
+TEST(OperatorKeyTest, TestFlexOp) {
+  {
+    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
+                             false);
+    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    // It shouldn't be converted to Flex op if `allow_flex_op` is false.
+    EXPECT_EQ(key.custom_code, "SomeUnsupportedOp");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_FALSE(key.is_flex_op);
+  }
+
+  {
+    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
+                             true);
+    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    // Verify that the custom op name is prefixed by "Flex" and `is_flex_op`
+    // is true.
+    EXPECT_EQ(key.custom_code, "FlexSomeUnsupportedOp");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_TRUE(key.is_flex_op);
+  }
+}
+
+TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
+  details::OperatorKey key(OperatorType::kUnsupported, "Merge", 1, true);
+  EXPECT_EQ(key.type, OperatorType::kUnsupported);
+  EXPECT_EQ(key.custom_code, "FlexMerge");
+  EXPECT_EQ(key.version, 1);
+  EXPECT_TRUE(key.is_flex_op);
+  // The control flow ops should be marked as unsupported.
+  EXPECT_TRUE(key.is_unsupported_flex_op);
+}
+
 // TODO(ahentz): tests for tensors, inputs, outputs, opcodes and operators.
 
 }  // namespace
-- 
GitLab


From d6e14a53835eed5eed279c83e475440f8f814f0e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 3 Oct 2018 17:28:57 -0700
Subject: [PATCH 0326/1085] Automated rollback of commit
 c1b3b0b9e041d82e80c2cdcc623a387753daf0b4

PiperOrigin-RevId: 215658770
---
 tensorflow/contrib/lite/kernels/BUILD | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index b349a2863c..daaf6714cc 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -337,10 +337,7 @@ tf_cc_test(
     name = "activations_test",
     size = "small",
     srcs = ["activations_test.cc"],
-    tags = [
-        "nomac",
-        "tflite_not_portable_ios",
-    ],
+    tags = ["tflite_not_portable_ios"],
     deps = [
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
-- 
GitLab


From f7edc2d308523fa6c2d233c09e3f2da1c98e3dbc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 18:00:17 -0700
Subject: [PATCH 0327/1085] PinToHostOptimizer: Refactored code. Update
 blacklist. Added recursive lookback for Identity op. This fixes many
 performance regressions.

PiperOrigin-RevId: 215662393
---
 .../core/grappler/costs/graph_properties.h    |   4 +
 tensorflow/core/grappler/graph_view.cc        |  33 +-
 tensorflow/core/grappler/graph_view.h         |   3 +-
 tensorflow/core/grappler/graph_view_test.cc   |  22 +-
 tensorflow/core/grappler/op_types.cc          | 114 ++++---
 tensorflow/core/grappler/op_types.h           |   2 +
 .../optimizers/pin_to_host_optimizer.cc       | 303 ++++++++++++------
 .../optimizers/pin_to_host_optimizer_test.cc  |  42 +++
 8 files changed, 366 insertions(+), 157 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index f716cd72c9..28fd7565cc 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -74,6 +74,10 @@ class GraphProperties {
   // shape information.
   void ClearInputProperties(const string& node_name);
   void ClearOutputProperties(const string& node_name);
+  // Returns true if we have *any* properties.
+  bool has_properties() const {
+    return input_properties_.size() > 0 || output_properties_.size() > 0;
+  }
 
  private:
   // Relaxes shapes <shapes_and_types>, determined from an EnqueueV2 node, into
diff --git a/tensorflow/core/grappler/graph_view.cc b/tensorflow/core/grappler/graph_view.cc
index 0b8cb5e919..de0a63fc4e 100644
--- a/tensorflow/core/grappler/graph_view.cc
+++ b/tensorflow/core/grappler/graph_view.cc
@@ -20,23 +20,25 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
-  for (int output_arg_id = 0; output_arg_id < op.output_arg_size();
-       ++output_arg_id) {
+namespace {
+int OpPortIdToArgId(const NodeDef& node,
+                    const protobuf::RepeatedPtrField<OpDef::ArgDef>& args,
+                    int port_id) {
+  for (int arg_id = 0; arg_id < args.size(); ++arg_id) {
     if (port_id < 0) {
       return -1;
     } else if (port_id == 0) {
-      return output_arg_id;
+      return arg_id;
     }
 
-    // Default is 1 port per output arg.
+    // Default is 1 port per arg.
     int n = 1;
 
-    const auto& output_arg = op.output_arg(output_arg_id);
-    if (!output_arg.number_attr().empty()) {
-      n = node.attr().at(output_arg.number_attr()).i();
-    } else if (!output_arg.type_list_attr().empty()) {
-      n = node.attr().at(output_arg.type_list_attr()).list().type_size();
+    const auto& arg = args.Get(arg_id);
+    if (!arg.number_attr().empty()) {
+      n = node.attr().at(arg.number_attr()).i();
+    } else if (!arg.type_list_attr().empty()) {
+      n = node.attr().at(arg.type_list_attr()).list().type_size();
     }
 
     if (n < 0) {
@@ -44,13 +46,22 @@ int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
       DCHECK_GE(n, 0);
       return -1;
     } else if (port_id < n) {
-      return output_arg_id;
+      return arg_id;
     }
     port_id -= n;
   }
 
   return -1;
 }
+}  // end namespace
+
+int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
+  return OpPortIdToArgId(node, op.output_arg(), port_id);
+}
+
+int OpInputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id) {
+  return OpPortIdToArgId(node, op.input_arg(), port_id);
+}
 
 GraphView::GraphView(GraphDef* graph) : graph_(graph) {
   for (int i = 0; i < graph_->node_size(); i++) {
diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h
index ec946ca3b5..09c36a1368 100644
--- a/tensorflow/core/grappler/graph_view.h
+++ b/tensorflow/core/grappler/graph_view.h
@@ -26,7 +26,7 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-// Map a node/op's output port_id to arg_id.
+// Map a node/op's input/output port_id to arg_id.
 //
 // The port_id refers to the n-th tensor of the node, while the arg_id refers to
 // the n-th arg of the op. These two can be different if an op's arg is a list
@@ -34,6 +34,7 @@ namespace grappler {
 //
 // We return -1 for any invalid port_id (i.e., no corresponding arg_id).
 int OpOutputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id);
+int OpInputPortIdToArgId(const NodeDef& node, const OpDef& op, int port_id);
 
 // A utility class to simplify the traversal of a GraphDef.
 class GraphView {
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index 3d7d2faf7c..f90e2c8cfc 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -26,7 +26,7 @@ namespace {
 
 class GraphViewTest : public ::testing::Test {};
 
-TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
+TEST_F(GraphViewTest, OpPortIdToArgIdShapeN) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output a = ops::Const(s.WithOpName("a"), 0.0f, {10, 10});
   ops::ShapeN b(s.WithOpName("b"), {a, a, a});
@@ -45,9 +45,16 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
   EXPECT_TRUE(
       OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
 
-  EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *a_op_def, 0));
-  EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *a_op_def, 1));
+  // Const has 0 inputs, 1 output.
+  EXPECT_EQ(-1, OpInputPortIdToArgId(a_node_def, *a_op_def, 0));
+  EXPECT_EQ(0, OpOutputPortIdToArgId(a_node_def, *a_op_def, 0));
+  EXPECT_EQ(-1, OpOutputPortIdToArgId(a_node_def, *a_op_def, 1));
 
+  // ShapeN has N=3 inputs and outputs.
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 0));
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 1));
+  EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 2));
+  EXPECT_EQ(-1, OpInputPortIdToArgId(b_node_def, *b_op_def, 3));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 0));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 1));
   EXPECT_EQ(0, OpOutputPortIdToArgId(b_node_def, *b_op_def, 2));
@@ -55,7 +62,7 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdShapeN) {
   EXPECT_EQ(-1, OpOutputPortIdToArgId(b_node_def, *b_op_def, 4));
 }
 
-TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
+TEST_F(GraphViewTest, OpPortIdToArgIdSparseSplit) {
   for (int num_splits : {1, 2}) {
     tensorflow::Scope s = tensorflow::Scope::NewRootScope();
     Output a = ops::Const<int64>(s.WithOpName("a"), 1, {10, 10});
@@ -70,6 +77,13 @@ TEST_F(GraphViewTest, OpOutputPortIdToArgIdSparseSplit) {
     EXPECT_TRUE(
         OpRegistry::Global()->LookUpOpDef(b_node_def.op(), &b_op_def).ok());
 
+    // We have 4 inputs.
+    EXPECT_EQ(0, OpInputPortIdToArgId(b_node_def, *b_op_def, 0));
+    EXPECT_EQ(1, OpInputPortIdToArgId(b_node_def, *b_op_def, 1));
+    EXPECT_EQ(2, OpInputPortIdToArgId(b_node_def, *b_op_def, 2));
+    EXPECT_EQ(3, OpInputPortIdToArgId(b_node_def, *b_op_def, 3));
+    EXPECT_EQ(-1, OpInputPortIdToArgId(b_node_def, *b_op_def, 4));
+
     for (int port_id = 0; port_id <= num_splits * 3; ++port_id) {
       int arg_id = -1;
       if (port_id < num_splits * 3) {
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 9f0d9dbf28..1b5a215987 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -13,14 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <unordered_set>
-
+#include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -102,6 +101,18 @@ bool IsConjugateTranspose(const NodeDef& node) {
   return node.op() == "ConjugateTranspose";
 }
 
+bool IsControlFlow(const NodeDef& node) {
+  // clang-format off
+  return node.op() == "ControlTrigger" ||
+         node.op() == "Enter" ||
+         node.op() == "Exit" ||
+         node.op() == "LoopCond" ||
+         node.op() == "Merge" ||
+         node.op() == "NextIteration" ||
+         node.op() == "Switch";
+  // clang-format on
+}
+
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
 
 bool IsConv2DBackpropFilter(const NodeDef& node) {
@@ -140,26 +151,26 @@ bool IsDiv(const NodeDef& node) { return node.op() == "Div"; }
 // e.g. sqrt, exp. *is_non_decreasing is false, the function is non-increasing,
 // e.g. inv.
 bool IsElementWiseMonotonic(const NodeDef& node, bool* is_non_decreasing) {
-  static const std::unordered_set<string>* monotonic_non_decreasing_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kMonotonicNonDecreasingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Asinh", "Atanh",   "Ceil",  "Elu",  "Erf",  "Exp",   "Expm1",
           "Floor", "Log",     "Log1p", "Relu", "Relu", "Relu6", "Rint",
           "Selu",  "Sigmoid", "Sign",  "Sinh", "Sqrt", "Tanh",
       }));
-  static const std::unordered_set<string>* monotonic_non_increasing_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kMonotonicNonIncreasingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Inv",
           "Reciprocal",
           "Erfc",
           "Rsqrt",
           "Neg",
       }));
-  if (monotonic_non_decreasing_ops->count(node.op()) > 0) {
+  if (kMonotonicNonDecreasingOps->count(node.op()) > 0) {
     if (is_non_decreasing) {
       *is_non_decreasing = true;
     }
     return true;
-  } else if (monotonic_non_increasing_ops->count(node.op()) > 0) {
+  } else if (kMonotonicNonIncreasingOps->count(node.op()) > 0) {
     if (is_non_decreasing) {
       *is_non_decreasing = false;
     }
@@ -431,6 +442,38 @@ bool IsSymbolicGradient(const NodeDef& node) {
 
 bool IsTanhGrad(const NodeDef& node) { return node.op() == "TanhGrad"; }
 
+bool IsTensorArray(const NodeDef& node) {
+  static const gtl::FlatSet<string>* const kTensorArrayOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
+          "TensorArray",
+          "TensorArrayV2",
+          "TensorArrayV3",
+          "TensorArrayGrad",
+          "TensorArrayGradV2",
+          "TensorArrayGradV3",
+          "TensorArrayGradWithShape",
+          "TensorArrayWrite",
+          "TensorArrayWriteV2",
+          "TensorArrayWriteV3",
+          "TensorArrayRead",
+          "TensorArrayReadV2",
+          "TensorArrayReadV3",
+          "TensorArrayConcat",
+          "TensorArrayConcatV2",
+          "TensorArrayConcatV3",
+          "TensorArraySplit",
+          "TensorArraySplitV2",
+          "TensorArraySplitV3",
+          "TensorArraySize",
+          "TensorArraySizeV2",
+          "TensorArraySizeV3",
+          "TensorArrayClose",
+          "TensorArrayCloseV2",
+          "TensorArrayCloseV3",
+      }));
+  return kTensorArrayOps->count(node.op()) > 0;
+}
+
 bool IsTile(const NodeDef& node) { return node.op() == "Tile"; }
 
 bool IsTranspose(const NodeDef& node) { return node.op() == "Transpose"; }
@@ -542,30 +585,29 @@ OPDEF_PROPERTY_HELPER(Aggregate, aggregate)
 OPDEF_PROPERTY_HELPER(Commutative, commutative)
 
 bool IsInvolution(const NodeDef& node) {
-  static const std::unordered_set<string>* involution_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
-          "Conj", "Reciprocal", "Invert", "Neg", "LogicalNot"}));
-  return involution_ops->count(node.op()) > 0;
+  static const gtl::FlatSet<string>* const kInvolutionOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{"Conj", "Reciprocal", "Invert",
+                                              "Neg", "LogicalNot"}));
+  return kInvolutionOps->count(node.op()) > 0;
 }
 
 bool IsValueAndOrderAndShapePreserving(const NodeDef& node) {
   if (NumNonControlInputs(node) == 1 && IsAggregate(node)) {
     return true;
   }
-  static const std::unordered_set<string>*
-      value_and_order_and_shape_preserving_ops =
-          CHECK_NOTNULL((new const std::unordered_set<string>{
-              "CheckNumerics",
-              "DebugGradientIdentity",
-              "DeepCopy"
-              "Enter",
-              "Exit",
-              "PreventGradient",
-              "Print",
-              "Snapshot",
-              "StopGradient",
-          }));
-  return value_and_order_and_shape_preserving_ops->count(node.op()) > 0 ||
+  static const gtl::FlatSet<string>* const kValueAndOrderAndShapePreservingOps =
+      CHECK_NOTNULL((new const gtl::FlatSet<string>{
+          "CheckNumerics",
+          "DebugGradientIdentity",
+          "DeepCopy"
+          "Enter",
+          "Exit",
+          "PreventGradient",
+          "Print",
+          "Snapshot",
+          "StopGradient",
+      }));
+  return kValueAndOrderAndShapePreservingOps->count(node.op()) > 0 ||
          IsIdentity(node);
 }
 
@@ -573,31 +615,31 @@ bool IsValueAndOrderPreserving(const NodeDef& node) {
   if (NumNonControlInputs(node) == 1 && IsAggregate(node)) {
     return true;
   }
-  static const std::unordered_set<string>* value_and_order_preserving_ops =
-      CHECK_NOTNULL((new const std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kValueAndOrderPreservingOps =
+      CHECK_NOTNULL((new const gtl::FlatSet<string>{
           "ExpandDims",
           "Reshape",
           "Squeeze",
       }));
-  return value_and_order_preserving_ops->count(node.op()) > 0 ||
+  return kValueAndOrderPreservingOps->count(node.op()) > 0 ||
          IsValueAndOrderAndShapePreserving(node);
 }
 
 bool IsValuePreserving(const NodeDef& node) {
-  static const std::unordered_set<string>* value_preserving_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kValuePreservingOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "InvertPermutation",
           "Reverse",
           "Roll",
           "Transpose",
       }));
   return IsValueAndOrderPreserving(node) ||
-         value_preserving_ops->count(node.op()) > 0;
+         kValuePreservingOps->count(node.op()) > 0;
 }
 
 bool IsUnaryElementWise(const NodeDef& node) {
-  static const std::unordered_set<string>* element_wise_ops =
-      CHECK_NOTNULL((new std::unordered_set<string>{
+  static const gtl::FlatSet<string>* const kElementWiseOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
           "Abs",
           "Acos",
           "Acosh",
@@ -646,7 +688,7 @@ bool IsUnaryElementWise(const NodeDef& node) {
           "Tan"
           "Tanh",
       }));
-  return element_wise_ops->count(node.op()) > 0 ||
+  return kElementWiseOps->count(node.op()) > 0 ||
          IsValueAndOrderAndShapePreserving(node);
 }
 
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 7f86a5f295..d4e0159e81 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -46,6 +46,7 @@ bool IsConjugateTranspose(const NodeDef& node);
 bool IsConcat(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
+bool IsControlFlow(const NodeDef& node);
 bool IsConv2D(const NodeDef& node);
 bool IsConv2DBackpropFilter(const NodeDef& node);
 bool IsConv2DBackpropInput(const NodeDef& node);
@@ -151,6 +152,7 @@ bool IsSum(const NodeDef& node);
 bool IsSwitch(const NodeDef& node);
 bool IsSymbolicGradient(const NodeDef& node);
 bool IsTanhGrad(const NodeDef& node);
+bool IsTensorArray(const NodeDef& node);
 bool IsTile(const NodeDef& node);
 bool IsTranspose(const NodeDef& node);
 bool IsTruncateDiv(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 89eb76046e..8ed4271fa4 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -35,13 +35,44 @@ namespace internal {
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
-// Find KernelDef for `node`.
-Status TryFindKernelDef(const NodeDef& node, const KernelDef** kdef) {
-  // Try find KernelDef for node.device, else GPU or CPU.
-  for (const DeviceType& device :
-       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}) {
-    Status s = FindKernelDef(device, node, kdef, nullptr);
+// All the nodes that should be blacklisted and not swapped.
+bool IsBlacklisted(const NodeDef& node) {
+  return
+      // Collective ops should not be swapped.
+      IsCollective(node) ||
+      // ControlFlow ops should not be swapped.
+      IsControlFlow(node) ||
+      // NoOp ops should not be swapped (due to group dependencies).
+      IsNoOp(node);
+}
+
+// Check if Tensor is integer and small size.
+bool IsTensorIntegerAndSmall(const OpInfo::TensorProperties& prop) {
+  // Check type to be int32 or int64.
+  if (prop.dtype() != DataType::DT_INT32 &&
+      prop.dtype() != DataType::DT_INT64) {
+    return false;
+  }
+
+  // Check size known and small.
+  const int64 size = NumCoefficients(prop.shape());
+  if (size < 0 || size > kTensorMaxSize) {
+    return false;
+  }
+
+  return true;
+}
+
+// Find KernelDef for `node`, greedily return first found from `devices`.
+Status TryFindKernelDef(const std::vector<DeviceType>& devices,
+                        const NodeDef& node, const KernelDef** kdef) {
+  for (const DeviceType& device : devices) {
+    const KernelDef* kernel = nullptr;
+    Status s = FindKernelDef(device, node, &kernel, nullptr);
     if (s.ok()) {
+      if (kdef) {
+        *kdef = kernel;
+      }
       return Status::OK();
     }
   }
@@ -49,88 +80,183 @@ Status TryFindKernelDef(const NodeDef& node, const KernelDef** kdef) {
   return errors::NotFound("Could not find KernelDef for op: ", node.op());
 }
 
-// Check if all node's inputs are pinned to CPU memory.
-bool AreAllNodeInputsPinnedToHost(const GraphView& graph, const NodeDef& node) {
-  // Loop through all the inputs excluding the controlling nodes.
-  for (const GraphView::OutputPort& fanin : graph.GetFanins(node, false)) {
-    // Check if (the fanin) op's device is on CPU.
-    if (str_util::StrContains(fanin.node->device(), DEVICE_CPU)) {
-      continue;
-    }
-
-    // Check if (the fanin) op's output port is pinned to HostMemory.
-    const OpDef* fanin_odef = nullptr;
-    Status s = OpRegistry::Global()->LookUpOpDef(fanin.node->op(), &fanin_odef);
-    if (!s.ok()) {
-      LOG(INFO) << "Could not find OpDef for : " << fanin.node->op();
-      return false;
-    }
+// Checks if a node's output port is host friendly.
+// Roughly this means checking if the output port is on Host memory.
+Status IsNodeOutputPortHostFriendly(const GraphView& graph,
+                                    GraphProperties* properties,
+                                    const NodeDef& node, int port_id,
+                                    bool* is_candidate) {
+  *is_candidate = false;
 
-    const int output_arg_id =
-        OpOutputPortIdToArgId(*fanin.node, *fanin_odef, fanin.port_id);
-    if (output_arg_id < 0) {
-      LOG(WARNING) << "Invalid port: " << fanin.port_id << "!\n"
-                   << node.DebugString() << "\n"
-                   << fanin.node->DebugString() << "\n"
-                   << fanin_odef->DebugString();
-      return false;
-    }
+  // Make sure we are not a blacklisted op.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
 
-    const KernelDef* fanin_kdef = nullptr;
-    s = TryFindKernelDef(*fanin.node, &fanin_kdef);
-    if (!s.ok()) {
-      LOG(INFO) << "Could not find KernelDef for : " << fanin.node->op();
-      return false;
-    }
+  // Check to make sure we have the right properties (i.e., statically shaped).
+  if (!properties->has_properties()) {
+    // This is an expensive call, call it lazily.
+    TF_RETURN_IF_ERROR(properties->InferStatically(
+        /*assume_valid_feeds=*/false));
+  }
+  const auto& output_properties = properties->GetOutputProperties(node.name());
+  if (port_id >= output_properties.size()) {
+    LOG(WARNING) << "port_id=" << port_id
+                 << " but output_properties.size()=" << output_properties.size()
+                 << "\n"
+                 << node.DebugString();
+    return Status::OK();
+  }
+  if (!IsTensorIntegerAndSmall(output_properties[port_id])) {
+    return Status::OK();
+  }
 
-    bool fanin_pinned = false;
-    for (const string& host_memory_arg : fanin_kdef->host_memory_arg()) {
-      if (fanin_odef->output_arg(output_arg_id).name() == host_memory_arg) {
-        fanin_pinned = true;
-        break;
+  // These nodes may be optimized away downstream (even if pinned to Host), we
+  // should (recusively) check their source.
+  if (IsIdentity(node)) {
+    for (const auto& fanin : graph.GetFanins(node, false)) {
+      bool fanin_candidate = false;
+      TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+      if (!fanin_candidate) {
+        return Status::OK();
       }
     }
+    *is_candidate = true;
+    return Status::OK();
+  }
 
-    if (!fanin_pinned) {
-      return false;
+  // Check if op's device is on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
+    return Status::OK();
+  }
+
+  // Check if op's output port is pinned to HostMemory.
+  const OpDef* op = nullptr;
+  Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
+  if (!s.ok()) {
+    LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    return Status::OK();
+  }
+
+  // Map the port_id to output_arg_id.
+  const int output_arg_id = OpOutputPortIdToArgId(node, *op, port_id);
+  if (output_arg_id < 0) {
+    LOG(WARNING) << "Invalid port: " << port_id << "!\n"
+                 << node.DebugString() << "\n"
+                 << op->DebugString();
+    return Status::OK();
+  }
+
+  // Find the kernel.
+  const KernelDef* kernel = nullptr;
+  s = TryFindKernelDef({node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node,
+                       &kernel);
+  if (!s.ok()) {
+    LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    return Status::OK();
+  }
+
+  // Check if the output_arg is pinned to Host.
+  for (const string& host_memory_arg : kernel->host_memory_arg()) {
+    if (op->output_arg(output_arg_id).name() == host_memory_arg) {
+      *is_candidate = true;
+      break;
     }
   }
 
-  return true;
+  return Status::OK();
 }
 
-bool IsTensorIntegerAndSmall(const OpInfo::TensorProperties& prop) {
-  // Check if Tensor is integer and small size.
+// Checks if a node's input port is Host friendly.
+// Roughly this means checking if the input port is on Host memory.
+bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
+  // If node is on Host, assume its inputs are Host friendly.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    return true;
+  }
 
-  // Check type to be int32 or int64.
-  if (prop.dtype() != DataType::DT_INT32 &&
-      prop.dtype() != DataType::DT_INT64) {
+  // Check if op's input port is pinned to HostMemory.
+  const OpDef* op = nullptr;
+  Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
+  if (!s.ok()) {
+    LOG(WARNING) << "Could not find OpDef for : " << node.op();
     return false;
   }
-
-  // Check size known and small.
-  const int64 size = NumCoefficients(prop.shape());
-  if (size < 0 || size > kTensorMaxSize) {
+  const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
+
+  // Find the kernel.
+  const KernelDef* kernel = nullptr;
+  s = internal::TryFindKernelDef(
+      {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
+  if (!s.ok()) {
+    LOG(INFO) << "Could not find KernelDef for: " << node.op();
     return false;
   }
 
-  return true;
+  // Check if the input_arg is pinned to Host.
+  for (const string& host_memory_arg : kernel->host_memory_arg()) {
+    if (op->input_arg(input_arg_id).name() == host_memory_arg) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
-bool AreAllNodeInputsAndOutputsIntsAndSmall(const GraphProperties& properties,
-                                            const NodeDef& node) {
-  for (const auto& prop : properties.GetInputProperties(node.name())) {
-    if (!IsTensorIntegerAndSmall(prop)) {
-      return false;
+// Checks if a node is a candidate to pin to Host.
+// The rough algorithm is as follows:
+// 1] Check if node is blacklisted.
+// 2] Check if node can run on Host.
+// 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
+//    ints, and pinned to Host).
+Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
+                           const NodeDef& node, bool* is_candidate) {
+  *is_candidate = false;
+
+  // Check if node already on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
+    return Status::OK();
+  }
+
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
+
+  // Check the node can be run on CPU.
+  Status s = TryFindKernelDef({DEVICE_CPU}, node, nullptr);
+  if (!s.ok()) {
+    return Status::OK();
+  }
+
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
     }
   }
 
-  for (const auto& prop : properties.GetOutputProperties(node.name())) {
+  // Check all outputs are Host friendly.
+  if (!properties->has_properties()) {
+    // This is an expensive call, call it lazily.
+    TF_RETURN_IF_ERROR(properties->InferStatically(
+        /*assume_valid_feeds=*/false));
+  }
+  for (const auto& prop : properties->GetOutputProperties(node.name())) {
     if (!IsTensorIntegerAndSmall(prop)) {
-      return false;
+      return Status::OK();
     }
   }
-  return true;
+
+  *is_candidate = true;
+  return Status::OK();
 }
 
 string TryFindHostDevice(const gtl::FlatSet<string>& devices,
@@ -167,15 +293,6 @@ bool IsTPUGraphDef(const GraphDef& def) {
   }
   return false;
 }
-
-// All the nodes that should be blacklisted and not swapped.
-bool IsBlacklisted(const NodeDef& node) {
-  return
-      // Collective ops should not be swapped.
-      IsCollective(node) ||
-      // NoOp breaks perf regression tests (probably due to group dependencies).
-      IsNoOp(node);
-}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -188,7 +305,6 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   }
 
   GraphProperties properties(item);
-  bool has_properties = false;
   GraphView graph(optimized_graph);
 
   gtl::FlatSet<string> devices;
@@ -209,35 +325,10 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
   for (auto& node : *optimized_graph->mutable_node()) {
-    // Check if node already on CPU.
-    if (str_util::StrContains(node.device(), DEVICE_CPU)) {
-      continue;
-    }
-
-    // Skip these node types.
-    if (internal::IsBlacklisted(node)) {
-      continue;
-    }
-
-    // Check the node can be run on CPU.
-    Status s = FindKernelDef(DEVICE_CPU, node, nullptr, nullptr);
-    if (!s.ok()) {
-      continue;
-    }
-
-    // Check all input's are pinned to CPU.
-    if (!internal::AreAllNodeInputsPinnedToHost(graph, node)) {
-      continue;
-    }
-
-    if (!has_properties) {
-      // This is an expensive call, call it lazily.
-      TF_RETURN_IF_ERROR(properties.InferStatically(false));
-      has_properties = true;
-    }
-
-    // Check all inputs and outputs are integers and small.
-    if (!internal::AreAllNodeInputsAndOutputsIntsAndSmall(properties, node)) {
+    bool is_candidate = false;
+    TF_RETURN_IF_ERROR(
+        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
+    if (!is_candidate) {
       continue;
     }
 
@@ -254,10 +345,12 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     NodeDef* node = it.first;
     const string& device = it.second;
 
-    // Check all the consumers of this node, if any of them are on the original
-    // device, swap this node back onto the original device.
+    // Check all the consumers of this node, if any of them are not on CPU, swap
+    // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
-      if (fanout.node->device() == device) {
+      // The consumer is not Host friendly, swap it back to the original device.
+      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
+                                                 fanout.port_id)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 173cb3fe3c..7c64529441 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -160,6 +160,48 @@ TEST_F(PinToHostOptimizerTest, NoSwap) {
   EXPECT_EQ(found, 3);
 }
 
+TEST_F(PinToHostOptimizerTest, Identity) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  // `a,c` is on GPU, `e` is on CPU, consequently `e` should not be swapped.
+  // `b` should be placed onto Host since `c` pins the input to Host memory.
+  Output a =
+      ops::Const(s.WithOpName("a").WithDevice("/device:GPU:0"), 1, {64, 64});
+  Output b = ops::Const(s.WithOpName("b"), {0, 1}, {2});
+  Output c =
+      ops::ReduceProd(s.WithOpName("c").WithDevice("/device:GPU:0"), a, b);
+  Output d = ops::Identity(s.WithDevice("/device:CPU:0").WithOpName("d"), c);
+  Output e = ops::Multiply(s.WithOpName("e"), d, d);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  GraphDef output;
+  PinToHostOptimizer optimizer(RewriterConfig::ON);
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "a" || node.name() == "c") {
+      EXPECT_EQ(node.device(), "/device:GPU:0");
+    } else if (node.name() == "b") {
+      // If CUDA, then there is a GPU kernel registration that is pinned to Host
+      // memory. Consequently, `b` will be mapped to Host correct if there is
+      // a GPU kernel registered.
+#if GOOGLE_CUDA
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+#else
+      EXPECT_TRUE(node.device().empty());
+#endif
+    } else if (node.name() == "d") {
+      EXPECT_EQ(node.device(), "/device:CPU:0");
+    } else if (node.name() == "e") {
+      EXPECT_TRUE(node.device().empty());
+    }
+    ++found;
+  }
+  EXPECT_EQ(found, 5);
+}
+
 TEST_F(PinToHostOptimizerTest, PortIdToArgId) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output a = ops::Const(s.WithOpName("a"), 1, {1, 2, 3});
-- 
GitLab


From 18f589350f0cb244e2373480048d17cbacd241e1 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 18:05:22 -0700
Subject: [PATCH 0328/1085] [XLA] Add a size limit to the constant folder to
 avoid forming giant constants during compilation.

PiperOrigin-RevId: 215663002
---
 .../xla/service/hlo_constant_folding.cc       | 17 ++++++++++++++++
 .../xla/service/hlo_constant_folding_test.cc  | 20 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
index f837816cea..538816a353 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
@@ -76,6 +76,22 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
         continue;
       }
 
+      // Don't constant fold unless it's a net positive or the output is small.
+      int64 elements_in_removed_operands = 0;
+      for (HloInstruction* operand : instruction->operands()) {
+        if (operand->user_count() == 1) {
+          elements_in_removed_operands +=
+              ShapeUtil::ElementsIn(operand->shape());
+        }
+      }
+      int64 elements_in_constant = ShapeUtil::ElementsIn(instruction->shape());
+
+      static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
+      if (elements_in_constant > elements_in_removed_operands &&
+          elements_in_constant > kMaximumConstantSizeElements) {
+        continue;
+      }
+
       Literal result;
       // Currently we skip unimplemented operations.
       // TODO(b/35975797): Fold constant computations for more operations.
@@ -84,6 +100,7 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
                 << instruction->ToString();
         continue;
       }
+      VLOG(4) << "Constant folded: " << instruction->ToString();
 
       TF_RETURN_IF_ERROR(computation->ReplaceWithNewInstruction(
           instruction, HloInstruction::CreateConstant(std::move(result))));
diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
index 3e0def5d26..e45f905f71 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
@@ -242,5 +242,25 @@ TEST_F(HloConstantFoldingTest, ConstantFoldReduceNoLayout) {
   EXPECT_THAT(module().entry_computation()->root_instruction(), op::Reduce());
 }
 
+const char* const kConstantFoldLargePad = R"(
+  HloModule ConstantFoldLargePad
+
+  ENTRY r {
+    a = f32[1,1,1] constant(f32[1,1,1]{{{7}}})
+    b = f32[] constant(42)
+    ROOT pad = f32[2048,2048,128] pad(a, b), padding=1024_1023x1024_1023x64_63
+  })";
+
+TEST_F(HloConstantFoldingTest, DoesNotFoldLargePad) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(kConstantFoldLargePad));
+  HloConstantFolding const_folder;
+  TF_ASSERT_OK_AND_ASSIGN(bool result, const_folder.Run(module.get()));
+  EXPECT_FALSE(result);
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Pad(op::Constant(), op::Constant()));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 010621615696c31016ccceec76576b95f82971df Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 16:56:14 -0700
Subject: [PATCH 0329/1085] Increase error-epsilon for
 ProfilingTest::ProfilesAreCollected.

PiperOrigin-RevId: 215654327
---
 tensorflow/contrib/lite/profiling/profiler_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/profiling/profiler_test.cc b/tensorflow/contrib/lite/profiling/profiler_test.cc
index 0fba0450a0..cf56eed2a4 100644
--- a/tensorflow/contrib/lite/profiling/profiler_test.cc
+++ b/tensorflow/contrib/lite/profiling/profiler_test.cc
@@ -83,8 +83,8 @@ TEST(ProfilingTest, ProfilesAreCollected) {
   EXPECT_EQ("SleepForQuarter", profile_events[4]->tag);
 
 #ifndef ADDRESS_SANITIZER
-  // ASAN build is sometimes very slow.
-  const int eps_ms = 10;
+  // ASAN build is sometimes very slow. Set a large epsilon to avoid flakiness.
+  const int eps_ms = 50;
   AssertDurationOfEventAroundMs(profile_events[0], /*expected_ms*/ 500, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[1], /*expected_ms*/ 250, eps_ms);
   AssertDurationOfEventAroundMs(profile_events[2], /*expected_ms*/ 250, eps_ms);
-- 
GitLab


From 54bebc286bbe7d6a866a3bdbcefd8af55adbe39a Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Wed, 3 Oct 2018 18:26:28 -0700
Subject: [PATCH 0330/1085] Fix a test. - SetCustomOp also sets the name of the
 custom op. Test was checking against the wrong name in the profile.

PiperOrigin-RevId: 215665359
---
 .../contrib/lite/profiling/profile_summarizer_test.cc       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
index 67a5eecfa0..465c294962 100644
--- a/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
+++ b/tensorflow/contrib/lite/profiling/profile_summarizer_test.cc
@@ -31,6 +31,8 @@ namespace profiling {
 
 namespace {
 
+const char* kOpName = "SimpleOpEval";
+
 #ifdef TFLITE_PROFILING_ENABLED
 TfLiteStatus SimpleOpEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input1 = tflite::GetInput(context, node, /*index=*/0);
@@ -63,7 +65,7 @@ TfLiteRegistration* RegisterSimpleOpWithProfilingDetails() {
                                             SimpleOpEval,
                                             SimpleOpProfilingString,
                                             tflite::BuiltinOperator_CUSTOM,
-                                            "SimpleOpEval",
+                                            kOpName,
                                             1};
   return &registration;
 }
@@ -89,7 +91,7 @@ void SimpleOpModel::Init(
   inputs_[0] = AddInput({TensorType_INT32, {1}});
   inputs_[1] = AddInput({TensorType_INT32, {1}});
   output_ = AddOutput({TensorType_INT32, {}});
-  SetCustomOp("SimpleAdd", {}, registration);
+  SetCustomOp(kOpName, {}, registration);
   BuildInterpreter({GetShape(inputs_[0]), GetShape(inputs_[1])});
 }
 
-- 
GitLab


From 9bd6f5ed55e533ccac055a5bc7fbb771e2d432c5 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 18:56:00 -0700
Subject: [PATCH 0331/1085] [TF:XLA] Use xla::Iota rather than expanding Range
 ops to constants.

PiperOrigin-RevId: 215668016
---
 .../compiler/tf2xla/kernels/sequence_ops.cc   | 39 +++++++++----------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
index 25a5bcbe1d..0c32b8def0 100644
--- a/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sequence_ops.cc
@@ -18,7 +18,9 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/numeric.h"
 #include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -55,10 +57,10 @@ Status GetIntValue(int index, XlaOpKernelContext* ctx, int64* value) {
 
 // The type-specific part of the implementation of Range.
 template <typename T>
-Status CreateRangeTensor(const xla::LiteralSlice& start_literal,
-                         const xla::LiteralSlice& limit_literal,
-                         const xla::LiteralSlice& delta_literal,
-                         Tensor* output) {
+xla::StatusOr<xla::XlaOp> CreateRangeTensor(
+    const xla::LiteralSlice& start_literal,
+    const xla::LiteralSlice& limit_literal,
+    const xla::LiteralSlice& delta_literal, xla::XlaBuilder* builder) {
   T start = start_literal.Get<T>({});
   T limit = limit_literal.Get<T>({});
   T delta = delta_literal.Get<T>({});
@@ -82,14 +84,10 @@ Status CreateRangeTensor(const xla::LiteralSlice& start_literal,
            ? ((std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta))
            : std::ceil(std::abs((limit - start) / delta)));
 
-  *output = Tensor(DataTypeToEnum<T>::v(), TensorShape({size}));
-  auto flat = output->flat<T>();
-  T val = start;
-  for (int64 i = 0; i < size; ++i) {
-    flat(i) = val;
-    val += delta;
-  }
-  return Status::OK();
+  return xla::ConstantR0(builder, start) +
+         xla::ConstantR0(builder, delta) *
+             xla::Iota(builder, xla::primitive_util::NativeToPrimitiveType<T>(),
+                       size);
 }
 
 class RangeOp : public XlaOpKernel {
@@ -115,27 +113,26 @@ class RangeOp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->ConstantInput(2, &delta));
 
     DataType type = input_type(0);
-    Tensor output;
-    Status status;
+    xla::StatusOr<xla::XlaOp> output;
     switch (type) {
       case DT_INT32:
-        status = CreateRangeTensor<int32>(start, limit, delta, &output);
+        output = CreateRangeTensor<int32>(start, limit, delta, ctx->builder());
         break;
       case DT_INT64:
-        status = CreateRangeTensor<int64>(start, limit, delta, &output);
+        output = CreateRangeTensor<int64>(start, limit, delta, ctx->builder());
         break;
       case DT_FLOAT:
-        status = CreateRangeTensor<float>(start, limit, delta, &output);
+        output = CreateRangeTensor<float>(start, limit, delta, ctx->builder());
         break;
       case DT_DOUBLE:
-        status = CreateRangeTensor<double>(start, limit, delta, &output);
+        output = CreateRangeTensor<double>(start, limit, delta, ctx->builder());
         break;
       default:
-        status = errors::InvalidArgument("Invalid type for Range ",
+        output = errors::InvalidArgument("Invalid type for Range ",
                                          DataTypeString(type));
     }
-    OP_REQUIRES_OK(ctx, status);
-    ctx->SetConstantOutput(0, output);
+    OP_REQUIRES_OK(ctx, output.status());
+    ctx->SetOutput(0, output.ValueOrDie());
   }
 };
 
-- 
GitLab


From 2e19f32d28ab88b5bd3dd4f6d42a54040591dfbb Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 3 Oct 2018 20:48:35 -0700
Subject: [PATCH 0332/1085] [XLA] Fix handling of tuple constants in HLO
 constant folding.

PiperOrigin-RevId: 215676675
---
 .../xla/service/hlo_constant_folding.cc       | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
index 538816a353..4f898ce61c 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc
@@ -77,19 +77,23 @@ StatusOr<bool> HloConstantFolding::Run(HloModule* module) {
       }
 
       // Don't constant fold unless it's a net positive or the output is small.
-      int64 elements_in_removed_operands = 0;
-      for (HloInstruction* operand : instruction->operands()) {
-        if (operand->user_count() == 1) {
-          elements_in_removed_operands +=
-              ShapeUtil::ElementsIn(operand->shape());
+      if (ShapeUtil::IsArray(instruction->shape())) {
+        int64 elements_in_removed_operands = 0;
+        for (HloInstruction* operand : instruction->operands()) {
+          if (operand->user_count() == 1 &&
+              ShapeUtil::IsArray(operand->shape())) {
+            elements_in_removed_operands +=
+                ShapeUtil::ElementsIn(operand->shape());
+          }
         }
-      }
-      int64 elements_in_constant = ShapeUtil::ElementsIn(instruction->shape());
+        int64 elements_in_constant =
+            ShapeUtil::ElementsIn(instruction->shape());
 
-      static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
-      if (elements_in_constant > elements_in_removed_operands &&
-          elements_in_constant > kMaximumConstantSizeElements) {
-        continue;
+        static const int64 kMaximumConstantSizeElements = 2 * 1000 * 1000;
+        if (elements_in_constant > elements_in_removed_operands &&
+            elements_in_constant > kMaximumConstantSizeElements) {
+          continue;
+        }
       }
 
       Literal result;
-- 
GitLab


From 7a4a6381650bd87f775f99139144b6c4c66881cb Mon Sep 17 00:00:00 2001
From: Nehal J Wani <nehaljw.kkd1@gmail.com>
Date: Wed, 3 Oct 2018 23:03:18 -0500
Subject: [PATCH 0333/1085] Static cast size_t to int in arguments 1,2 to
 forward_input_or_allocate_output()

This fix resolves the following compiler error:

tensorflow/core/kernels/mkl_relu_op.cc(1028): error C2398: Element '1':
conversion from 'const std::size_t' to 'int' requires a narrowing conversion
---
 tensorflow/core/kernels/mkl_relu_op.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 84385356e1..3d145f1802 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1023,7 +1023,8 @@ class MklReluGradOpBase : public OpKernel {
       }
 
       OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
-                                  {diff_dst_index}, diff_src_index,
+                                  {static_cast<const int>(diff_dst_index)},
+                                  static_cast<const int>(diff_src_index),
                                   tf_shape_diff_src, &diff_src_tensor));
       AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src);
 
-- 
GitLab


From 8a437200e14c8e09fcc8e952679d489909f175c8 Mon Sep 17 00:00:00 2001
From: Mingxing Tan <tanmingxing@google.com>
Date: Wed, 3 Oct 2018 21:06:27 -0700
Subject: [PATCH 0334/1085] BEGIN_PUBLIC Rollback some quantization changes
 that breaks some models. END_PUBLIC

Automated rollback of commit d3f14ef70cdf113f9d330c1f7c638003429a1dc4. Revert #19894.

PiperOrigin-RevId: 215678307
---
 .../contrib/quantize/python/quantize.py       | 115 +++++++-----------
 .../quantize/python/quantize_graph_test.py    |  37 ------
 2 files changed, 41 insertions(+), 111 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index afb9de8370..5e63d33db8 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -461,8 +461,8 @@ class _LayerMatch(object):
     return self._bias_add_op
 
 
-def _GetFollowingFakeQuantOp(tensor):
-  """Returns the following FakeQuant op if it exists else None."""
+def _FollowedByFakeQuant(tensor):
+  """Returns True if the tensor is followed by a FakeQuant."""
   fake_quant_ops = set([
       'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs',
       'FakeQuantWithMinMaxVarsPerChannel'
@@ -472,11 +472,11 @@ def _GetFollowingFakeQuantOp(tensor):
   while consumers:
     c = consumers.pop()
     if c.type in fake_quant_ops:
-      return c
+      return True
     elif c.type in pass_through_ops:
       for output in c.outputs:
         consumers.extend(output.consumers())
-  return None
+  return False
 
 
 def _InsertQuantOp(context,
@@ -559,77 +559,44 @@ def _InsertQuantOp(context,
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
   # overlap between multiple matches, so we need to ensure that we don't
   # add duplicate FakeQuant operations.
-  fake_quant_op = _GetFollowingFakeQuantOp(inputs)
-
-  # If we find that we are attempting to insert a fake quant op following
-  # a fake quant, we skip inserting a fake quant op
-
-  if fake_quant_op is None:
-    if moving_avg:
-      quant = (
-          quant_ops.MovingAvgQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              ema_decay=ema_decay,
-              is_training=is_training,
-              num_bits=bits,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-    else:
-      quant = (
-          quant_ops.LastValueQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              is_training=is_training,
-              num_bits=bits,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-
-    if quant_delay and quant_delay > 0:
-      activate_quant = math_ops.greater_equal(
-          common.CreateOrGetQuantizationStep(),
-          quant_delay,
-          name=name_prefix + '/activate_quant')
-      quant = control_flow_ops.cond(
-          activate_quant,
-          lambda: quant,
-          lambda: inputs,
-          name=name_prefix + '/delayed_quant')
+  if _FollowedByFakeQuant(inputs):
+    return
+
+  if moving_avg:
+    quant = (
+        quant_ops.MovingAvgQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            ema_decay=ema_decay,
+            is_training=is_training,
+            num_bits=bits,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
   else:
-    # If a fake quant op is present already, make sure that
-    # any downstream use of the tensor reroutes to the appropriate quantized
-    # tensor. If there is no quant_delay, this is simply the output of the
-    # fake quant op. If there is a quant delay, we reroute to the output
-    # of the delayed quant operation, which inserts quantization only after
-    # a specified quant_delay
-
-    quant = fake_quant_op.outputs[0]
-    if quant_delay and quant_delay > 0:
-      name_prefix = '/'.join(quant.name.split('/')[:-1])
-      quant = quant.graph.get_tensor_by_name(name_prefix +
-                                             '/delayed_quant/Merge:0')
-    pruned_consumer_set = set()
-    for consumer in consumers:
-      fake_quant_dest_op = _GetFollowingFakeQuantOp(consumer.outputs[0])
-      if (fake_quant_dest_op is None or
-          fake_quant_dest_op.name != fake_quant_op.name):
-        pruned_consumer_set.add(consumer)
-    consumers = pruned_consumer_set
-
-    # If we have
-    # input->pass_through->fake_quant
-    # there is nothing to reroute.
-    #
-    # If we have
-    #  input-> pass_through->fake_quant
-    #                |-> consumer
-    # Then we reroute such that:
-    # input-> pass_through->fake_quant
-    #                            |-> consumer
+    quant = (
+        quant_ops.LastValueQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            is_training=is_training,
+            num_bits=bits,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
+
+  if quant_delay and quant_delay > 0:
+    activate_quant = math_ops.greater_equal(
+        common.CreateOrGetQuantizationStep(),
+        quant_delay,
+        name=name_prefix + '/activate_quant')
+    quant = control_flow_ops.cond(
+        activate_quant,
+        lambda: quant,
+        lambda: inputs,
+        name=name_prefix + '/delayed_quant')
+
   if consumers:
     tensors_modified_count = common.RerouteTensor(
         quant, inputs, can_modify=consumers)
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index a9fc6c3c61..e80d2183a6 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -27,7 +27,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import template
 from tensorflow.python.platform import googletest
 
 
@@ -307,42 +306,6 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     # No ops should be inserted or removed.
     self.assertEqual(op_names_before_rewrite, op_names_after_rewrite)
 
-  def testWithSharedWeights(self):
-
-    self._RunTestOverAllRewrites(self._TestWithSharedWeights)
-    self._RunTestOverTrainingRewrites(self._TestRewriteWithSharedWeights)
-
-  def _TestRewriteWithSharedWeights(self, rewrite_fn, quant_delay=1):
-    self._TestWithSharedWeights(rewrite_fn, quant_delay)
-
-  def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None):
-    with ops.Graph().as_default() as g:
-      conv = template.make_template('shared_weights_conv', self._ConvLayer)
-      conv()
-      conv()
-      if quant_delay is None:
-        rewrite_fn()
-      else:
-        rewrite_fn(quant_delay=quant_delay)
-
-    conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D']
-    weights_quants = [
-        op for op in g.get_operations()
-        if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars'
-    ]
-    # Check that the shared weights variable is not quantized multiple times
-    self.assertTrue(len(weights_quants) == 1)
-    weights_quant_tensor = weights_quants[0].outputs[0]
-    if quant_delay:
-      delayed_weights_quants = [
-          op for op in g.get_operations()
-          if 'weights_quant' in op.name and op.type == 'Merge'
-      ]
-      self.assertTrue(len(delayed_weights_quants) == 1)
-      weights_quant_tensor = delayed_weights_quants[0].outputs[0]
-    # Check that the Conv2D operations get the quantized weights
-    self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops))
-
   def _ConvLayer(
       self, input_tensor=None, scope='test', pre_activation_bypass=False,
       post_activation_bypass=False):
-- 
GitLab


From d3ced638f0496c70c3a063be82b30b358179e369 Mon Sep 17 00:00:00 2001
From: Yuanzhong Xu <yuanzx@google.com>
Date: Wed, 3 Oct 2018 21:41:43 -0700
Subject: [PATCH 0335/1085] [XLA] Delete IsInplaceSlice.

PiperOrigin-RevId: 215681153
---
 .../xla/service/hlo_dataflow_analysis.cc      | 24 -------------------
 .../xla/service/hlo_dataflow_analysis.h       |  1 -
 .../compiler/xla/service/hlo_instruction.cc   |  4 ----
 .../compiler/xla/service/hlo_instruction.h    |  3 ---
 .../compiler/xla/service/hlo_instructions.h   | 14 -----------
 .../xla/service/tuple_points_to_analysis.cc   | 23 ++++--------------
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 7 files changed, 4 insertions(+), 66 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 44cde4a3d2..c22adcdd8d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -356,23 +356,6 @@ bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) {
   return false;
 }
 
-bool HloDataflowAnalysis::UpdateSliceValueSet(HloInstruction* slice) {
-  CHECK_EQ(slice->opcode(), HloOpcode::kSlice);
-  if (!slice->IsInPlaceSlice()) {
-    return false;
-  }
-  // If this slice is lowered to an in-place version, then it forwards the
-  // operand value to the output.
-  const InstructionValueSet& operand_set =
-      GetInstructionValueSet(slice->operand(0));
-  InstructionValueSet& slice_set = GetInstructionValueSet(slice);
-  if (operand_set != slice_set) {
-    slice_set = operand_set;
-    return true;
-  }
-  return false;
-}
-
 bool HloDataflowAnalysis::UpdateSendValueSet(HloInstruction* send) {
   CHECK_EQ(send->opcode(), HloOpcode::kSend);
   bool changed = false;
@@ -641,8 +624,6 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
   switch (instruction->opcode()) {
     case HloOpcode::kBitcast:
       return UpdateBitcastValueSet(instruction);
-    case HloOpcode::kSlice:
-      return UpdateSliceValueSet(instruction);
     case HloOpcode::kDomain:
       return UpdateDomainValueSet(instruction);
     case HloOpcode::kCopy:
@@ -814,11 +795,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() {
             define_all_values();
           }
           break;
-        case HloOpcode::kSlice:
-          if (!instruction->IsInPlaceSlice()) {
-            define_all_values();
-          }
-          break;
         case HloOpcode::kWhile:
         case HloOpcode::kCall:
         case HloOpcode::kConditional:
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
index e62c1c2ac8..abac398c04 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
@@ -182,7 +182,6 @@ class HloDataflowAnalysis {
   // Updates the value set for a particular instruction type. Returns whether
   // the instruction value set changed.
   bool UpdateBitcastValueSet(HloInstruction* bitcast);
-  bool UpdateSliceValueSet(HloInstruction* slice);
   bool UpdateCallValueSet(HloInstruction* call);
   bool UpdateConditionalValueSet(HloInstruction* conditional);
   bool UpdateCopyValueSet(HloInstruction* copy);
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 8bddaa8c96..fb91adc302 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -3076,10 +3076,6 @@ const std::vector<int64>& HloInstruction::slice_strides() const {
   return Cast<HloSliceInstruction>(this)->slice_strides();
 }
 
-bool HloInstruction::IsInPlaceSlice() const {
-  return Cast<HloSliceInstruction>(this)->IsInPlaceSlice();
-}
-
 const Literal& HloInstruction::literal() const {
   return Cast<HloConstantInstruction>(this)->literal();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 9deed20e5d..374862c4b6 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1330,9 +1330,6 @@ class HloInstruction {
   int64 slice_strides(int64 dimension) const;
   const std::vector<int64>& slice_strides() const;
 
-  // Delegates to HloSliceInstruction::IsInPlaceSlice.
-  bool IsInPlaceSlice() const;
-
   // Returns the literal associated with this instruction.
   const Literal& literal() const;
 
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index c929867bb9..ab168800f6 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -546,17 +546,6 @@ class HloSliceInstruction : public HloInstruction {
   }
   const std::vector<int64>& slice_strides() const { return slice_strides_; }
 
-  // Returns the flag that describes whether a slice must be lowered into an
-  // offset into the original operand.
-  bool IsInPlaceSlice() const { return is_in_place_slice_; }
-
-  // Sets and returns the flag that describes whether a slice must be lowered
-  // into an offset into the original operand.
-  bool SetIsInPlaceSlice(bool value) {
-    is_in_place_slice_ = value;
-    return value;
-  }
-
  private:
   std::vector<string> ExtraAttributesToStringImpl(
       const HloPrintOptions& options) const override;
@@ -573,9 +562,6 @@ class HloSliceInstruction : public HloInstruction {
   std::vector<int64> slice_starts_;
   std::vector<int64> slice_limits_;
   std::vector<int64> slice_strides_;
-
-  // Describes whether the slice can be lowered to an offset into the operand.
-  bool is_in_place_slice_ = false;
 };
 
 class HloConstantInstruction : public HloInstruction {
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 6fed7c76d0..811ac55e2d 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -280,16 +280,6 @@ Status TuplePointsToAnalysis::HandleDomain(HloInstruction* domain) {
   return Status::OK();
 }
 
-Status TuplePointsToAnalysis::HandleSlice(HloInstruction* slice) {
-  // A kSlice instruction aliases its operand if the backend lowers it to an
-  // in-place implementation.
-  if (slice->IsInPlaceSlice()) {
-    CreateCopiedPointsToSet(slice, slice->operand(0));
-    return Status::OK();
-  }
-  return DefaultAction(slice);
-}
-
 Status TuplePointsToAnalysis::HandleRecvDone(HloInstruction* recv_done) {
   // RecvDone aliases its input (Recv) tuple element {0} to element {0} of its
   // output. The other indices ({} and {1}) define their own buffers.
@@ -455,15 +445,10 @@ bool TuplePointsToAnalysis::InstructionDefinesBufferAtIndex(
 
 Status TuplePointsToAnalysis::VerifyBuffer(const LogicalBuffer& buffer) const {
   if (!InstructionDefinesBufferAtIndex(buffer.instruction(), buffer.index())) {
-    // kSlice ops that are lowered to an in-place version are expected to not
-    // define their output buffer.
-    if (buffer.instruction()->opcode() != HloOpcode::kSlice ||
-        !buffer.instruction()->IsInPlaceSlice()) {
-      return FailedPrecondition(
-          "LogicalBuffer %s is ill-defined: instruction %s does not define a "
-          "buffer at that index",
-          buffer.ToString(), buffer.instruction()->name());
-    }
+    return FailedPrecondition(
+        "LogicalBuffer %s is ill-defined: instruction %s does not define a "
+        "buffer at that index",
+        buffer.ToString(), buffer.instruction()->name());
   }
 
   if (buffer.id() < 0 ||
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 64ad1dc80e..30c365053c 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -247,7 +247,6 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   Status HandleGetTupleElement(HloInstruction* get_tuple_element) override;
   Status HandleBitcast(HloInstruction* bitcast) override;
   Status HandleDomain(HloInstruction* domain) override;
-  Status HandleSlice(HloInstruction* slice) override;
   Status HandleCopy(HloInstruction* copy) override;
   Status HandleRecvDone(HloInstruction* recv_done) override;
   Status HandleSend(HloInstruction* send) override;
-- 
GitLab


From 54cde61fbf473270ce19f8b40e9511373fbc12c7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 3 Oct 2018 22:00:51 -0700
Subject: [PATCH 0336/1085] [tf.data] Fix bug in
 `tf.data.experimental.unbatch()`.

Previously, if the rank of the input to this transformation was
statically unknown, we would erroneously report that the output is a
scalar, and violate downstream shape integrity checks. Instead, in
that case the output shape should be unknown.

PiperOrigin-RevId: 215683027
---
 tensorflow/core/kernels/data/unbatch_dataset_op.cc | 13 +++++++++----
 .../kernel_tests/batch_dataset_op_test.py          | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
index 81c432b938..74908994b4 100644
--- a/tensorflow/core/kernels/data/unbatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
@@ -41,11 +41,16 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
         : DatasetBase(DatasetContext(ctx)), input_(input) {
       input_->Ref();
       for (const PartialTensorShape& shape : input->output_shapes()) {
-        gtl::InlinedVector<int64, 4> partial_dim_sizes;
-        for (int i = 1; i < shape.dims(); ++i) {
-          partial_dim_sizes.push_back(shape.dim_size(i));
+        if (!shape.unknown_rank()) {
+          gtl::InlinedVector<int64, 4> partial_dim_sizes;
+          for (int i = 1; i < shape.dims(); ++i) {
+            partial_dim_sizes.push_back(shape.dim_size(i));
+          }
+          shapes_.emplace_back(std::move(partial_dim_sizes));
+        } else {
+          // If the input shape is unknown, the output shape will be unknown.
+          shapes_.emplace_back();
         }
-        shapes_.emplace_back(std::move(partial_dim_sizes));
       }
     }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
index 8703b2810e..956b4518f6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
@@ -131,6 +131,20 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                                    "larger than the row shape"):
         sess.run(get_next)
 
+  def testUnbatchWithUnknownRankInput(self):
+    placeholder = array_ops.placeholder(dtypes.int32)
+    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
+        batching.unbatch())
+    iterator = dataset.make_initializable_iterator()
+    next_elem = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
+      for i in range(4):
+        self.assertEqual(i, sess.run(next_elem))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_elem)
+
   def testUnbatchScalarDataset(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
-- 
GitLab


From 1f1fe5a01af616707b8554d59651fb4925d7faee Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Wed, 3 Oct 2018 22:23:08 -0700
Subject: [PATCH 0337/1085] Include .inc files for absl headers

---
 tensorflow/tools/pip_package/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index d864a7a039..54a7b7ffbe 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -232,6 +232,8 @@ headers = (list(find_files('*.h', 'tensorflow/core')) +
            list(find_files('*', 'third_party/eigen3')) +
            list(find_files('*.h',
                            'tensorflow/include/external/com_google_absl')) +
+           list(find_files('*.inc',
+                           'tensorflow/include/external/com_google_absl')) +
            list(find_files('*', 'tensorflow/include/external/eigen_archive')))
 
 setup(
-- 
GitLab


From 6795491bcc0c276e27be6a9e1a4a14c019c2ba37 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 22:24:14 -0700
Subject: [PATCH 0338/1085] Pin wheel=0.31.1 in install_auditwheel.sh to work
 around issue https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215685104
---
 tensorflow/tools/ci_build/install/install_auditwheel.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/install/install_auditwheel.sh b/tensorflow/tools/ci_build/install/install_auditwheel.sh
index e6f6124d56..0e6d98c0a8 100755
--- a/tensorflow/tools/ci_build/install/install_auditwheel.sh
+++ b/tensorflow/tools/ci_build/install/install_auditwheel.sh
@@ -18,6 +18,10 @@ set -e
 
 sudo pip3 install auditwheel==1.5.0
 
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+sudo pip3 install wheel==0.31.1
+
 set +e
 patchelf_location=$(which patchelf)
 if [[ -z "$patchelf_location" ]]; then
-- 
GitLab


From 80c012334f18b7b4b283dfea4b49e7cb42fa5b0a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Wed, 3 Oct 2018 22:24:14 -0700
Subject: [PATCH 0339/1085] Pin wheel=0.31.1 in install_auditwheel.sh to work
 around issue https://github.com/pypa/auditwheel/issues/102

PiperOrigin-RevId: 215685104
---
 tensorflow/tools/ci_build/install/install_auditwheel.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/install/install_auditwheel.sh b/tensorflow/tools/ci_build/install/install_auditwheel.sh
index e6f6124d56..0e6d98c0a8 100755
--- a/tensorflow/tools/ci_build/install/install_auditwheel.sh
+++ b/tensorflow/tools/ci_build/install/install_auditwheel.sh
@@ -18,6 +18,10 @@ set -e
 
 sudo pip3 install auditwheel==1.5.0
 
+# Pin wheel==0.31.1 to work around issue
+# https://github.com/pypa/auditwheel/issues/102
+sudo pip3 install wheel==0.31.1
+
 set +e
 patchelf_location=$(which patchelf)
 if [[ -z "$patchelf_location" ]]; then
-- 
GitLab


From e57874169fca3cfdd15cf0dda3717a6374a7dcb9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 3 Oct 2018 23:03:11 -0700
Subject: [PATCH 0340/1085] [XLA] Update Tf2Xla bridge to use Scatter HLO.

PiperOrigin-RevId: 215687800
---
 tensorflow/compiler/tf2xla/lib/scatter.cc     | 213 ++++++++++--------
 tensorflow/compiler/tf2xla/lib/scatter.h      |   6 +-
 tensorflow/compiler/xla/client/xla_builder.cc |   3 +
 tensorflow/compiler/xla/service/hlo_module.cc |   3 +-
 tensorflow/compiler/xla/service/inliner.cc    |  32 +--
 .../compiler/xla/service/inliner_test.cc      |  30 +++
 6 files changed, 177 insertions(+), 110 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc
index 38dfde165d..2b1c2ced92 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.cc
+++ b/tensorflow/compiler/tf2xla/lib/scatter.cc
@@ -38,12 +38,10 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
         combiner,
     xla::XlaBuilder* builder) {
   TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer));
-  TF_RETURN_IF_ERROR(builder->GetShape(updates).status());
+  TF_ASSIGN_OR_RETURN(xla::Shape updates_shape, builder->GetShape(updates));
   TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices));
   absl::Span<const int64> indices_dims =
       xla::AsInt64Slice(indices_shape.dimensions());
-  absl::Span<const int64> buffer_dims =
-      xla::AsInt64Slice(buffer_shape.dimensions());
 
   // If the indices are N-dimensional, the minor dimension of indices contains
   // the indices to update. Otherwise the indices are all scalars.
@@ -81,104 +79,129 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
     }
   }
 
-  // Shape of the non-indexed dimensions of the buffer.
-  std::vector<int64> buffer_shape_post_axes(
-      buffer_dims.begin() + num_index_dims, buffer_dims.end());
-
-  // Flatten the major dimensions of indices and updates into a single dimension
-  // for ease of iteration.
-  std::vector<int64> flat_indices_shape({num_indices});
-  if (indices_are_vectors) {
-    flat_indices_shape.push_back(num_index_dims);
+  // Example of a 1-D scatter that updates two [3,1] tensors in a tensor of
+  // shape [3,3]:
+  // NOTE: ***This case will not be generated by any of the tf.scatter ops.***
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[3,2] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={0},
+  //       inserted_window_dims={1},
+  //       scatter_dims_to_operand_dims={1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a 1-D scatter that updates two [1,3] tensors in a tensor of
+  // shape [3,3]:
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[2,3] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of an N-D scatter updating slices of shape [1,1,2] in a tensor of
+  // shape [3,3,2]
+  //
+  //   operand = s32[3,3,2] parameter(0)
+  //   indices = s32[2,2] parameter(1)
+  //   updates = s32[2,2] parameter(2)
+  //   scatter = s32[3,3,2] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0,1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a scatter updating slices of shape [] in a tensor of shape [1,1]
+  //
+  //   operand = s32[1,1] parameter(0)
+  //   indices = s32[1] parameter(1)
+  //   updates = s32[1] parameter(2)
+  //   scatter = s32[1,1] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  // Note that updates operand would be broadcasted into [1] in this case.
+  //
+
+  xla::ScatterDimensionNumbers dim_numbers;
+  dim_numbers.set_index_vector_dim(indices_are_vectors
+                                       ? indices_shape.dimensions_size() - 1
+                                       : indices_shape.dimensions_size());
+
+  int64 updates_rank = xla::ShapeUtil::Rank(updates_shape);
+  int64 buffer_rank = xla::ShapeUtil::Rank(buffer_shape);
+  int64 num_window_dims_in_updates = buffer_rank - num_index_dims;
+
+  // If the rank of `updates` is 0 and does not match the expected rank of
+  // updates, broadcast `updates` to the expected shape of updates.
+  auto new_updates = updates;
+  std::vector<int64> expected_updates_dims(indices_dims.begin(),
+                                           indices_dims.end());
+  for (int64 dim = num_index_dims; dim < buffer_rank; ++dim) {
+    expected_updates_dims.push_back(buffer_shape.dimensions(dim));
+  }
+  int64 expected_updates_rank = expected_updates_dims.size();
+  if (updates_rank == 0 && expected_updates_rank != 0) {
+    new_updates = xla::Broadcast(updates, expected_updates_dims);
+    TF_ASSIGN_OR_RETURN(updates_shape, builder->GetShape(new_updates));
+    updates_rank = xla::ShapeUtil::Rank(updates_shape);
   }
 
-  std::vector<int64> flat_updates_shape({num_indices});
-  flat_updates_shape.insert(flat_updates_shape.end(),
-                            buffer_shape_post_axes.begin(),
-                            buffer_shape_post_axes.end());
-
-  // Construct the initial values of the loop-carried Tensors.
-  auto flat_indices = xla::Reshape(indices, flat_indices_shape);
-  auto flat_updates = xla::Reshape(updates, flat_updates_shape);
-  auto init = {flat_indices, flat_updates, buffer};
-
-  // Constructs the loop body. The implementation of scatter is essentially:
-  // for i in range(num_indices):
-  //   index = dynamic-slice(indices, i)
-  //   update = dynamic-slice(updates, i)
-  //   buffer = dynamic-update-slice(buffer, update, index)
-  auto body_fn = [&](xla::XlaOp i, absl::Span<const xla::XlaOp> loop_vars,
-                     xla::XlaBuilder* body_builder) {
-    auto indices = loop_vars[0];
-    auto updates = loop_vars[1];
-    auto buffer = loop_vars[2];
-
-    auto zero_index = xla::ConstantLiteral(
-        body_builder, xla::LiteralUtil::Zero(indices_shape.element_type()));
-
-    // Slice the i-th index from the indices array.
-    xla::XlaOp index;
-    auto indices_offset = xla::Reshape(i, {1});
-    if (indices_are_vectors) {
-      indices_offset = xla::Pad(indices_offset, zero_index,
-                                xla::MakeEdgePaddingConfig({{0, 1}}));
-
-      index = xla::DynamicSlice(indices, indices_offset, {1, num_index_dims});
-      index = xla::Collapse(index, {0, 1});
-    } else {
-      index = xla::DynamicSlice(indices, indices_offset, {1});
+  if (updates_rank > 0) {
+    for (int64 i = (updates_rank - num_window_dims_in_updates);
+         i < updates_rank; ++i) {
+      dim_numbers.add_update_window_dims(i);
     }
+  }
 
-    // Discard updates with negative indices, since some users expect this.
-    auto index_in_range = xla::ReduceAll(
-        xla::Le(zero_index, index), xla::ConstantR0<bool>(body_builder, true),
-        xla::CreateScalarAndComputation(xla::PRED, body_builder));
-
-    // Make the index in bounds to prevent implementation defined behavior.
-    index = xla::Max(index, zero_index);
-    index = xla::Pad(
-        index, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-
-    // Slice the i-th index from the updates array.
-    auto updates_offset = xla::Reshape(i, {1});
-    updates_offset = xla::Pad(
-        updates_offset, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-    std::vector<int64> flat_updates_slice_shape({1});
-    flat_updates_slice_shape.insert(flat_updates_slice_shape.end(),
-                                    buffer_shape_post_axes.begin(),
-                                    buffer_shape_post_axes.end());
-    auto update =
-        xla::DynamicSlice(updates, updates_offset, flat_updates_slice_shape);
-
-    // Unflatten the major (iteration) dimensions of the slice to their
-    // original shape.
-    std::vector<int64> updates_slice_shape(num_index_dims, 1);
-    updates_slice_shape.insert(updates_slice_shape.end(),
-                               buffer_shape_post_axes.begin(),
-                               buffer_shape_post_axes.end());
-    update = xla::Reshape(update, updates_slice_shape);
-
-    // Apply the update to the buffer. If there is a combiner, use it to merge
-    // the current values with the update.
-    auto current_value = xla::DynamicSlice(buffer, index, updates_slice_shape);
+  for (int64 i = 0; i < num_index_dims; ++i) {
+    dim_numbers.add_inserted_window_dims(i);
+    dim_numbers.add_scatter_dims_to_operand_dims(i);
+  }
+
+  // Build the combiner computation.
+  xla::XlaComputation combiner_computation;
+  {
+    xla::XlaBuilder cb("scatter-combiner");
+    auto xla_scalar_shape =
+        xla::ShapeUtil::MakeShape(buffer_shape.element_type(), {});
+    auto p0 = xla::Parameter(&cb, 0, xla_scalar_shape, "p0");
+    auto p1 = xla::Parameter(&cb, 1, xla_scalar_shape, "p1");
     if (combiner) {
-      update = combiner(current_value, update, body_builder);
+      combiner(p0, p1, &cb);
     }
-    // Use the current value instead of the update if the index is out of
-    // bounds.
-    update = xla::Select(index_in_range, update, current_value);
-    // Apply the update.
-    buffer = xla::DynamicUpdateSlice(buffer, update, index);
-
-    return std::vector<xla::XlaOp>{indices, updates, buffer};
-  };
-
-  TF_ASSIGN_OR_RETURN(auto outputs,
-                      XlaForEachIndex(num_indices, indices_shape.element_type(),
-                                      body_fn, init, "scatter", builder));
-  return outputs[2];
+    combiner_computation = cb.Build().ConsumeValueOrDie();
+  }
+
+  VLOG(3) << "Scatter op:";
+  VLOG(3) << "  Input: " << xla::ShapeUtil::HumanString(buffer_shape);
+  VLOG(3) << "  Indices: " << xla::ShapeUtil::HumanString(indices_shape);
+  VLOG(3) << "  Updates: " << xla::ShapeUtil::HumanString(updates_shape);
+  VLOG(3) << "  Scatter Dimension Numbers: ";
+  VLOG(3) << "    index_vector_dim: " << dim_numbers.index_vector_dim();
+  VLOG(3) << "    update_window_dims: ["
+          << absl::StrJoin(dim_numbers.update_window_dims(), ",") << "]";
+  VLOG(3) << "    inserted_window_dims: ["
+          << absl::StrJoin(dim_numbers.inserted_window_dims(), ",") << "]";
+  VLOG(3) << "    scatter_dims_to_operand_dims: ["
+          << absl::StrJoin(dim_numbers.scatter_dims_to_operand_dims(), ",")
+          << "]";
+
+  return xla::Scatter(buffer, indices, new_updates, combiner_computation,
+                      dim_numbers);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h
index 13a5f1b850..4cf478c4b9 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.h
+++ b/tensorflow/compiler/tf2xla/lib/scatter.h
@@ -34,7 +34,11 @@ namespace tensorflow {
 // Otherwise, `indices_are_vectors`, then indices are multidimensional and the
 // minor dimension of `indices` represents a vector of indices.
 //
-// If any indices are negative, the corresponding update is discarded.
+// If `updates` is a scalar, then it will be broadcasted into the expected shape
+// of updates.
+//
+// If any part of the update region is out-of-bounds, the corresponding update
+// is discarded.
 //
 // If a `combiner` is provided, updates are combined with the existing values in
 // the buffer using the combiner function. Otherwise, the updates replace the
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index e0ec91dba1..d196252db1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -208,6 +208,9 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle,
     case HloOpcode::kWhile:
       // TODO(b/32495713): We aren't checking the condition and body
       // computations themselves.
+    case HloOpcode::kScatter:
+      // TODO(b/32495713): We aren't checking the embedded computation in
+      // Scatter.
     case HloOpcode::kSend:
     case HloOpcode::kRecv:
     case HloOpcode::kParameter:
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 7527e35c95..93e04eb3db 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -146,7 +146,8 @@ void HloModule::ReplaceComputations(
         case HloOpcode::kCall:
         case HloOpcode::kMap:
         case HloOpcode::kReduce:
-        case HloOpcode::kReduceWindow: {
+        case HloOpcode::kReduceWindow:
+        case HloOpcode::kScatter: {
           HloComputation* new_arg = tensorflow::gtl::FindWithDefault(
               replacements, instruction->to_apply(), nullptr);
           if (new_arg != nullptr) {
diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/inliner.cc
index 5fd779ebf9..50c408f5bb 100644
--- a/tensorflow/compiler/xla/service/inliner.cc
+++ b/tensorflow/compiler/xla/service/inliner.cc
@@ -71,26 +71,23 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
   // profitability model for inlining is defined.
   if (hlo_query::AllOperandsAreParameters(root)) {
     if (root.opcode() == HloOpcode::kFusion ||
-        root.opcode() == HloOpcode::kParameter ||
         root.opcode() == HloOpcode::kTrace) {
       // Cloning not supported for these instructions.
       return Status::OK();
     }
     VLOG(10) << "inlining map({X ... Y}, op) => : op(X ... Y) with function "
              << root.ToShortString();
-    // If the input is a constant then the shape of the constant could be
-    // different than the map shape. Hence, a broadcast is needed, else the
-    // cloned operand with new shape and operands work.
-    if (root.opcode() != HloOpcode::kConstant) {
-      std::vector<HloInstruction*> params;
-      for (int64 o = 0; o < root.operands().size(); o++) {
-        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
-      }
-      HloInstruction* placed_instruction = computation_->AddInstruction(
-          root.CloneWithNewOperands(map->shape(), params));
+    if (root.opcode() == HloOpcode::kParameter) {
+      // If the root is a parameter, then use the corresponding operand as the
+      // result of the computation.
       TF_RETURN_IF_ERROR(
-          computation_->ReplaceInstruction(map, placed_instruction));
-    } else {
+          map->ReplaceAllUsesWith(map->operands()[root.parameter_number()]));
+      TF_RETURN_IF_ERROR(computation_->RemoveInstruction(map));
+    } else if (root.opcode() == HloOpcode::kConstant) {
+      // If the input is a constant then the shape of the constant could be
+      // different than the map shape. Hence, a broadcast is needed, else the
+      // cloned operand with new shape and operands work.
+      //
       // The constant is in an embedded computation and needs to be recreated
       // as part of the computation that the broadcast is inserted into.
       HloInstruction* constant = computation_->AddInstruction(root.Clone());
@@ -98,6 +95,15 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
           HloInstruction::CreateBroadcast(map->shape(), constant, {}));
       TF_RETURN_IF_ERROR(
           computation_->ReplaceInstruction(map, placed_instruction));
+    } else {
+      std::vector<HloInstruction*> params;
+      for (int64 o = 0; o < root.operands().size(); o++) {
+        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
+      }
+      HloInstruction* placed_instruction = computation_->AddInstruction(
+          root.CloneWithNewOperands(map->shape(), params));
+      TF_RETURN_IF_ERROR(
+          computation_->ReplaceInstruction(map, placed_instruction));
     }
     changed_ = true;
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/inliner_test.cc
index 7e967f035c..98e0f2cfd7 100644
--- a/tensorflow/compiler/xla/service/inliner_test.cc
+++ b/tensorflow/compiler/xla/service/inliner_test.cc
@@ -146,6 +146,36 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
+TEST_F(InlinerTest, MapParameter) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+
+  auto param_builder = HloComputation::Builder(TestName());
+  param_builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32, "p0"));
+  param_builder.AddInstruction(HloInstruction::CreateParameter(1, r0f32, "p1"));
+  auto param_f32 = param_builder.Build();
+
+  auto builder = HloComputation::Builder("MapParamFunction");
+  auto lhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1)));
+  auto rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(4)));
+  builder.AddInstruction(
+      HloInstruction::CreateMap(lhs->shape(), {lhs, rhs}, param_f32.get()));
+
+  auto computation = builder.Build();
+  auto hlo_module = CreateNewVerifiedModule();
+  hlo_module->AddEmbeddedComputation(std::move(param_f32));
+  hlo_module->AddEntryComputation(std::move(computation));
+
+  Inliner inliner;
+  EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie());
+  EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs);
+
+  // Verify execution on CPU.
+  auto result = ExecuteAndTransfer(hlo_module->Clone(), {});
+  auto expected = LiteralUtil::CreateR0<float>(4);
+  EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
+}
 
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 67e0ccb3e5c1a48d62bcc45201fd70d2420dc4eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 02:27:59 -0700
Subject: [PATCH 0341/1085] compat: Update forward compatibility horizon to
 2018-10-04

PiperOrigin-RevId: 215706500
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index d833defb8e..76e08610ba 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 3)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 4)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 98ea840dabc0c4e9417ebe9a0fd10c9d471cda51 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 02:41:25 -0700
Subject: [PATCH 0342/1085] Improve the performance of the ListMemoryScheduler

This CL replaces a std::unordered_map with an absl::flat_hash_map and
removes an unnecessary map lookup. This two change can improve the
performance of the scheduler on large graphs by up to 2x.

PiperOrigin-RevId: 215707921
---
 .../compiler/xla/service/hlo_memory_scheduler.cc       | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 55314d0ae9..bf30764488 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -263,9 +263,8 @@ class ListScheduler {
     };
 
     for (auto* instruction : computation_.instructions()) {
-      // Instruction with no operands or control predecessors will
-      // not be in the map.
-      if (unscheduled_pred_count.count(instruction) == 0) {
+      if (instruction->operands().empty() &&
+          instruction->control_predecessors().empty()) {
         add_to_ready_queue(instruction);
       }
     }
@@ -356,9 +355,8 @@ class ListScheduler {
       buffer_uses_;
 
   // A map containing the count of unscheduled HLOs which using a particular
-  // LogicalBuffer.  We rely on iterator stability in this map, and that the map
-  // entries are std::pair's.
-  std::unordered_map<const LogicalBuffer*, int64> unscheduled_use_count_;
+  // LogicalBuffer.
+  absl::flat_hash_map<const LogicalBuffer*, int64> unscheduled_use_count_;
 
   // Set of instructions which have been scheduled.
   absl::flat_hash_set<const HloInstruction*> scheduled_instructions_;
-- 
GitLab


From 6b538d9ce54e878576131cde0c76e43a893180c2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 03:12:04 -0700
Subject: [PATCH 0343/1085] Automated rollback of commit
 70a395f9795a48c21bc35cdf1dc44778f73a7bba

PiperOrigin-RevId: 215710849
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 +
 tensorflow/tensorflow.bzl                     | 39 +++++++++++--------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..10ec0dbe1c 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -306,6 +306,7 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
+        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index cad5de1b0c..dead44c57e 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,22 +1798,29 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    test_tags = tags + tf_cuda_tests_tags()
-    tf_py_test(
-        name = name,
-        size = size,
-        srcs = srcs,
-        data = data,
-        main = main,
-        args = args,
-        tags = test_tags,
-        shard_count = shard_count,
-        additional_deps = additional_deps,
-        kernels = kernels,
-        flaky = flaky,
-        xla_enabled = xla_enabled,
-        grpc_enabled = grpc_enabled,
-    )
+    if main == None:
+        main = name + ".py"
+    for config in ["cpu", "gpu"]:
+        test_name = name
+        test_tags = tags
+        if config == "gpu":
+            test_name += "_gpu"
+            test_tags = test_tags + tf_cuda_tests_tags()
+        tf_py_test(
+            name = test_name,
+            size = size,
+            srcs = srcs,
+            data = data,
+            main = main,
+            args = args,
+            tags = test_tags,
+            shard_count = shard_count,
+            additional_deps = additional_deps,
+            kernels = kernels,
+            flaky = flaky,
+            xla_enabled = xla_enabled,
+            grpc_enabled = grpc_enabled,
+        )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..e7f9628fa6 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py"]
+  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 6cc738da1748e819b9c8ee92dc2f1a7bdb291b50 Mon Sep 17 00:00:00 2001
From: Adria Puigdomenech <adriap@google.com>
Date: Thu, 4 Oct 2018 03:19:46 -0700
Subject: [PATCH 0344/1085] Make batch_gather work with indices of dtype int64.

PiperOrigin-RevId: 215711383
---
 tensorflow/python/kernel_tests/BUILD               |  1 +
 .../python/kernel_tests/batch_gather_op_test.py    | 13 ++++++++-----
 tensorflow/python/ops/array_ops.py                 | 14 ++++++++++----
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 9303c70c60..e055ef1c1b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -76,6 +76,7 @@ tf_py_test(
     name = "batch_gather_op_test",
     srcs = ["batch_gather_op_test.py"],
     additional_deps = [
+        "@absl_py//absl/testing:parameterized",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/batch_gather_op_test.py
index 7dd347989a..84e93b8136 100644
--- a/tensorflow/python/kernel_tests/batch_gather_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_gather_op_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import constant_op
@@ -29,7 +30,7 @@ _TEST_TYPES = (dtypes.int64, dtypes.float32,
                dtypes.complex64, dtypes.complex128)
 
 
-class GatherTest(test.TestCase):
+class GatherTest(test.TestCase, parameterized.TestCase):
 
   def _buildParams(self, data, dtype):
     data = data.astype(dtype.as_numpy_dtype)
@@ -39,14 +40,15 @@ class GatherTest(test.TestCase):
       return data + 10j * data
     return data
 
-  def testSimpleGather(self):
+  @parameterized.parameters(dtypes.int32, dtypes.int64)
+  def testSimpleGather(self, indices_dtype):
     data = np.array([0, 1, 2, 3, 7, 5, 8, 9, 10, 11, 15, 13])
     indices = [3, 4]
     with self.test_session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
-        indices_tf = constant_op.constant(indices)
+        indices_tf = constant_op.constant(indices, dtype=indices_dtype)
         gather_t = array_ops.batch_gather(params, indices_tf)
         expected_result = np.array([3, 7])
         np_val = self._buildParams(expected_result, dtype)
@@ -54,14 +56,15 @@ class GatherTest(test.TestCase):
         self.assertAllEqual(np_val, gather_val)
         self.assertEqual(np_val.shape, gather_t.get_shape())
 
-  def test2DArray(self):
+  @parameterized.parameters(dtypes.int32, dtypes.int64)
+  def test2DArray(self, indices_dtype):
     data = np.array([[0, 1, 2, 3, 7, 5], [8, 9, 10, 11, 15, 13]])
     indices = [[3], [4]]
     with self.test_session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
-        indices_tf = constant_op.constant(indices)
+        indices_tf = constant_op.constant(indices, dtype=indices_dtype)
         gather_t = array_ops.batch_gather(params, indices_tf)
         expected_result = np.array([[3], [15]])
         np_val = self._buildParams(expected_result, dtype)
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 9f5149d5ac..4be9c532f4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2716,16 +2716,22 @@ def batch_gather(params, indices, name=None):
     params = ops.convert_to_tensor(params, name="params")
     indices_shape = shape(indices)
     params_shape = shape(params)
+
     ndims = indices.shape.ndims
     if ndims is None:
       raise ValueError("batch_gather does not allow indices with unknown "
                        "shape.")
     batch_indices = indices
-    accum_dim_value = 1
+    indices_dtype = indices.dtype.base_dtype
+    accum_dim_value = ones((), dtype=indices_dtype)
+    # Use correct type for offset index computation
+    casted_params_shape = gen_math_ops.cast(params_shape, indices_dtype)
     for dim in range(ndims-1, 0, -1):
-      dim_value = params_shape[dim-1]
-      accum_dim_value *= params_shape[dim]
-      dim_indices = gen_math_ops._range(0, dim_value, 1)
+      dim_value = casted_params_shape[dim-1]
+      accum_dim_value *= casted_params_shape[dim]
+      start = zeros((), dtype=indices_dtype)
+      step = ones((), dtype=indices_dtype)
+      dim_indices = gen_math_ops._range(start, dim_value, step)
       dim_indices *= accum_dim_value
       dim_shape = stack([1] * (dim - 1) + [dim_value] + [1] * (ndims - dim),
                         axis=0)
-- 
GitLab


From 9cd6cab4f85f1f35c6532da3fb68839294d44ee4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 03:20:59 -0700
Subject: [PATCH 0345/1085] Internal change.

PiperOrigin-RevId: 215711454
---
 .../cluster_resolver/python/training/tpu_cluster_resolver.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
index 1056894f18..f4a8e16c99 100644
--- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
+++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py
@@ -60,6 +60,7 @@ class TPUClusterResolver(ClusterResolver):
     if (self._tpu == compat.as_bytes('') or
         self._tpu == compat.as_bytes('local') or
         self._tpu.startswith(compat.as_bytes('/bns')) or
+        self._tpu.startswith(compat.as_bytes('localhost:')) or
         self._tpu.startswith(compat.as_bytes('grpc://'))):
       return False
     return True
-- 
GitLab


From 28f239fdfa0c94f715fccf0197ab6c3c8df27d28 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 4 Oct 2018 05:34:55 -0700
Subject: [PATCH 0346/1085] Implement DataFormatVecPermute for XLA.

Also clear "_kernel" attributes of nodes if they are set to "host".
This is not meaningful when processing the graph for XLA, and it
would prevent finding the registered XLA kernel.

PiperOrigin-RevId: 215722216
---
 tensorflow/compiler/tests/BUILD               | 13 +++
 tensorflow/compiler/tests/permute_test.py     | 80 +++++++++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 .../compiler/tf2xla/kernels/permute_op.cc     | 98 +++++++++++++++++++
 tensorflow/compiler/tf2xla/xla_compiler.cc    | 11 +++
 5 files changed, 203 insertions(+)
 create mode 100644 tensorflow/compiler/tests/permute_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/permute_op.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 822fedf121..ee36729fd1 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1028,6 +1028,19 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "permute_test",
+    size = "small",
+    srcs = ["permute_test.py"],
+    deps = [
+        "//tensorflow/compiler/tests:xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:nn_ops",
+    ],
+)
+
 tf_xla_py_test(
     name = "xla_device_test",
     size = "small",
diff --git a/tensorflow/compiler/tests/permute_test.py b/tensorflow/compiler/tests/permute_test.py
new file mode 100644
index 0000000000..dbb9274df4
--- /dev/null
+++ b/tensorflow/compiler/tests/permute_test.py
@@ -0,0 +1,80 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the DataFormatVecPermute operator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+
+
+class XlaPermuteOpTest(xla_test.XLATestCase):
+
+  def _runPermuteAndCompare(self, x, src_format, dst_format, expected):
+    with self.cached_session() as session:
+      with self.test_scope():
+        placeholder = array_ops.placeholder(dtypes.as_dtype(x.dtype), x.shape)
+        param = {placeholder: x}
+        output = nn_ops.data_format_vec_permute(
+            placeholder, src_format=src_format, dst_format=dst_format)
+      result = session.run(output, param)
+    self.assertAllEqual(result, expected)
+
+  def testNHWCToNCHW(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "NCHW", [7, 3, 4, 9])
+
+  def testNCHWToNHWC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NCHW", "NHWC", [7, 9, 3, 4])
+
+  def testNHWCToHWNC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "HWNC", [4, 9, 7, 3])
+
+  def testHWNCToNHWC(self):
+    x = np.array([7, 4, 9, 3], dtype=np.int32)
+    self._runPermuteAndCompare(x, "HWNC", "NHWC", [9, 7, 4, 3])
+
+  def testNHWCToNCHW2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "NCHW",
+                               [[7, 4], [5, 1], [9, 3], [4, 5]])
+
+  def testNHWCToHWNC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NHWC", "HWNC",
+                               [[9, 3], [4, 5], [7, 4], [5, 1]])
+
+  def testHWNCToNHWC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "HWNC", "NHWC",
+                               [[4, 5], [7, 4], [9, 3], [5, 1]])
+
+  def testNCHWToNHWC2D(self):
+    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
+    self._runPermuteAndCompare(x, "NCHW", "NHWC",
+                               [[7, 4], [4, 5], [5, 1], [9, 3]])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 3e823254d3..9a7130f253 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -62,6 +62,7 @@ tf_kernel_library(
         "one_hot_op.cc",
         "pack_op.cc",
         "pad_op.cc",
+        "permute_op.cc",
         "pooling_ops.cc",
         "qr_op.cc",
         "quantize_and_dequantize_op.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
new file mode 100644
index 0000000000..0764e5503d
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
@@ -0,0 +1,98 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+namespace {
+
+class DataFormatVecPermuteOp : public XlaOpKernel {
+ public:
+  explicit DataFormatVecPermuteOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("src_format", &src_format_));
+    OP_REQUIRES(
+        ctx, src_format_.size() == 4,
+        errors::InvalidArgument("Data format should have 4 characters"));
+    TensorFormat data_format;
+    OP_REQUIRES(ctx, FormatFromString(src_format_, &data_format),
+                errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dst_format", &dst_format_));
+    OP_REQUIRES(
+        ctx, dst_format_.size() == 4,
+        errors::InvalidArgument("Data format should have 4 characters"));
+    OP_REQUIRES(ctx, FormatFromString(dst_format_, &data_format),
+                errors::InvalidArgument("Invalid data format"));
+  }
+  void Compile(XlaOpKernelContext* ctx) override {
+    auto builder = ctx->builder();
+    const TensorShape input_tensor_shape = ctx->InputShape(0);
+    int input_rank = input_tensor_shape.dims();
+    OP_REQUIRES(ctx, input_rank == 1 || input_rank == 2,
+                errors::InvalidArgument(
+                    "Input must be a vector or matrix, but got shape ",
+                    input_tensor_shape.DebugString()));
+    OP_REQUIRES(
+        ctx, input_tensor_shape.dim_size(0) == 4,
+        errors::InvalidArgument(
+            "First dimension of input must be of size 4, but got shape ",
+            input_tensor_shape.DebugString()));
+    if (input_rank == 2) {
+      OP_REQUIRES(
+          ctx, input_tensor_shape.dim_size(1) == 2,
+          errors::InvalidArgument(
+              "Second dimension of 2D input must be of size 2, but got shape ",
+              input_tensor_shape.DebugString()));
+    }
+    std::vector<int32> dst_indices(4, 0);
+    for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < 4; ++j) {
+        if (src_format_[i] == dst_format_[j]) {
+          dst_indices[i] = j;
+          break;
+        }
+      }
+    }
+    auto keys = xla::ConstantR1(builder, absl::Span<const int32>(dst_indices));
+    if (input_rank == 2) {
+      keys = xla::BroadcastInDim(
+          keys, xla::ShapeUtil::MakeShape(xla::S32, {4, 2}), {0});
+    }
+    auto sorted = xla::Sort(keys, ctx->Input(0), 0);
+    auto output = xla::GetTupleElement(sorted, 1);
+    ctx->SetOutput(0, output);
+  }
+
+ private:
+  string src_format_;
+  string dst_format_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(DataFormatVecPermuteOp);
+};
+
+// TODO(b/115384656): Support DT_INT64.
+REGISTER_XLA_OP(Name("DataFormatVecPermute").TypeConstraint("T", DT_INT32),
+                DataFormatVecPermuteOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index d5094e8ec5..b2c57e8880 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -194,6 +194,17 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options,
 
   std::unique_ptr<Graph> graph = GetGraph(fbody);
 
+  // Clear the "_kernel" attribute if it is set to "host". This is used to
+  // indicate that a computation should happen on the host instead of the
+  // accelerator, but doesn't make sense in XLA.
+  const char* const kKernelAttr = "_kernel";
+  for (Node* n : graph->nodes()) {
+    string value;
+    if (GetNodeAttrSimple(n->attrs(), kKernelAttr, &value) && value == "host") {
+      n->ClearAttr(kKernelAttr);
+    }
+  }
+
   // _Arg and _Retval nodes don't exist in the stored subgraph for the function;
   // they are added by the function body looked up.  Therefore, they don't have
   // core assignments here.
-- 
GitLab


From 2c9369c8d878c913b5dfcd3c27849bcd3d6af6c9 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 06:00:02 -0700
Subject: [PATCH 0347/1085] [TF:XLA] Don't expand complex64 tensors during
 TF/XLA lowering, if possible.

PiperOrigin-RevId: 215724324
---
 tensorflow/compiler/tests/nullary_ops_test.py | 43 +++++++++++++------
 .../compiler/tf2xla/kernels/const_op.cc       | 12 ++++++
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/tests/nullary_ops_test.py b/tensorflow/compiler/tests/nullary_ops_test.py
index f985c5d2d9..38cb2f83ef 100644
--- a/tensorflow/compiler/tests/nullary_ops_test.py
+++ b/tensorflow/compiler/tests/nullary_ops_test.py
@@ -43,18 +43,37 @@ class NullaryOpsTest(xla_test.XLATestCase):
       output.run()
 
   def testConstants(self):
-    constants = [
-        np.float32(42),
-        np.array([], dtype=np.float32),
-        np.array([1, 2], dtype=np.float32),
-        np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
-        np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]],
-                 dtype=np.float32),
-        np.array([[[]], [[]]], dtype=np.float32),
-        np.array([[[[1]]]], dtype=np.float32),
-    ]
-    for c in constants:
-      self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
+    for dtype in self.numeric_types:
+      constants = [
+          dtype(42),
+          np.array([], dtype=dtype),
+          np.array([1, 2], dtype=dtype),
+          np.array([7, 7, 7, 7, 7], dtype=dtype),
+          np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype),
+          np.array([[[1, 2], [3, 4], [5, 6]], [[10, 20], [30, 40], [50, 60]]],
+                   dtype=dtype),
+          np.array([[[]], [[]]], dtype=dtype),
+          np.array([[[[1]]]], dtype=dtype),
+      ]
+      for c in constants:
+        self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
+
+  def testComplexConstants(self):
+    for dtype in self.complex_types:
+      constants = [
+          dtype(42 + 3j),
+          np.array([], dtype=dtype),
+          np.ones([50], dtype=dtype) * (3 + 4j),
+          np.array([1j, 2 + 1j], dtype=dtype),
+          np.array([[1, 2j, 7j], [4, 5, 6]], dtype=dtype),
+          np.array([[[1, 2], [3, 4 + 6j], [5, 6]],
+                    [[10 + 7j, 20], [30, 40], [50, 60]]],
+                   dtype=dtype),
+          np.array([[[]], [[]]], dtype=dtype),
+          np.array([[[[1 + 3j]]]], dtype=dtype),
+      ]
+      for c in constants:
+        self._testNullary(lambda c=c: constant_op.constant(c), expected=c)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tf2xla/kernels/const_op.cc b/tensorflow/compiler/tf2xla/kernels/const_op.cc
index da8cf3fc6f..2628ef8e24 100644
--- a/tensorflow/compiler/tf2xla/kernels/const_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/const_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
 
 namespace tensorflow {
 namespace {
@@ -76,6 +77,17 @@ class ConstOp : public XlaOpKernel {
             return;
           }
           break;
+        case DT_COMPLEX64:
+          if (proto_.scomplex_val_size() == 2) {
+            ctx->SetOutput(
+                0,
+                xla::Broadcast(xla::ConstantR0<xla::complex64>(
+                                   b, xla::complex64(proto_.scomplex_val(0),
+                                                     proto_.scomplex_val(1))),
+                               shape.dim_sizes()));
+            return;
+          }
+          break;
         case DT_INT32:
           if (proto_.int_val_size() == 1) {
             ctx->SetOutput(
-- 
GitLab


From 82ea80b979768c7fe1daa4b50cf054e5a0968f31 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 06:09:42 -0700
Subject: [PATCH 0348/1085] Add option in tf.gradients() to return zero tensors
 for unconnected gradients.

tf.gradients currently returns [NONE] when the gradient of unconnected variables
is required. This backwards compatable change adds in the option to have zero
tensors returned that match the dimensions of the input tensor.

PiperOrigin-RevId: 215725488
---
 tensorflow/python/BUILD                       |  4 ++
 tensorflow/python/ops/gradients.py            |  1 +
 tensorflow/python/ops/gradients_impl.py       | 67 +++++++++++++++++--
 tensorflow/python/ops/gradients_test.py       | 34 ++++++++++
 .../tensorflow.-unconnected-gradients.pbtxt   | 12 ++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |  6 +-
 .../tensorflow.-unconnected-gradients.pbtxt   | 12 ++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |  6 +-
 8 files changed, 135 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index fe81254ef7..da3c56db92 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2152,6 +2152,7 @@ py_library(
         ":array_grad",
         ":array_ops",
         ":bitwise_ops",
+        ":check_ops",
         ":cond_v2_impl",
         ":control_flow_grad",
         ":control_flow_ops",
@@ -2172,8 +2173,11 @@ py_library(
         ":random_grad",
         ":resource_variable_ops",
         ":spectral_grad",
+        ":tensor_array_ops",
+        ":tensor_util",
         ":util",
         ":variable_scope",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:tape",
diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py
index 1dc666e78b..794465b10e 100644
--- a/tensorflow/python/ops/gradients.py
+++ b/tensorflow/python/ops/gradients.py
@@ -25,4 +25,5 @@ from tensorflow.python.ops.custom_gradient import custom_gradient
 from tensorflow.python.ops.gradients_impl import AggregationMethod
 from tensorflow.python.ops.gradients_impl import gradients
 from tensorflow.python.ops.gradients_impl import hessians
+from tensorflow.python.ops.gradients_impl import UnconnectedGradients
 # pylint: enable=unused-import
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 056015d6b6..aac95037dc 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import collections
 import contextlib
+import enum  # pylint: disable=g-bad-import-order
 import sys
 import warnings
 
@@ -537,6 +538,26 @@ def _Consumers(t, func_graphs):
   return consumers
 
 
+@tf_export("UnconnectedGradients")
+class UnconnectedGradients(enum.Enum):
+  """Controls how gradient computation behaves when y does not depend on x.
+
+  The gradient of y with respect to x can be zero in two different ways: there
+  could be no differentiable path in the graph connecting x to y (and so we can
+  statically prove that the gradient is zero) or it could be that runtime values
+  of tensors in a particular execution lead to a gradient of zero (say, if a
+  relu unit happens to not be activated). To allow you to distinguish between
+  these two cases you can choose what value gets returned for the gradient when
+  there is no path in the graph from x to y:
+
+  * `NONE`: Indicates that [None] will be returned if there is no path from x
+    to y
+  * `ZERO`: Indicates that a zero tensor will be returned in the shape of x.
+  """
+  NONE = "none"
+  ZERO = "zero"
+
+
 @tf_export("gradients")
 def gradients(ys,
               xs,
@@ -545,7 +566,8 @@ def gradients(ys,
               colocate_gradients_with_ops=False,
               gate_gradients=False,
               aggregation_method=None,
-              stop_gradients=None):
+              stop_gradients=None,
+              unconnected_gradients=UnconnectedGradients.NONE):
   """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.
 
   `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
@@ -596,6 +618,23 @@ def gradients(ys,
   All integer tensors are considered constant with respect to all `xs`, as if
   they were included in `stop_gradients`.
 
+  `unconnected_gradients` determines the value returned for each x in xs if it
+  is unconnected in the graph to ys. By default this is None to safeguard
+  against errors. MAthematically these gradients are zero which can be requested
+  using the `'zero'` option. `tf.UnconnectedGradients` provides the
+  following options and behaviors:
+
+  ```python
+  a = tf.ones([1, 2])
+  b = tf.ones([3, 1])
+  g1 = tf.gradients([b], [a], unnconnected_gradients='none')
+  sess.run(g1)  # [None]
+
+  g2 = tf.gradients([b], [a], unconnected_gradients='zero')
+  sess.run(g2)  # [array([[0., 0.]], dtype=float32)]
+  ```
+
+
   Args:
     ys: A `Tensor` or list of tensors to be differentiated.
     xs: A `Tensor` or list of tensors to be used for differentiation.
@@ -611,6 +650,10 @@ def gradients(ys,
       Accepted values are constants defined in the class `AggregationMethod`.
     stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate
       through.
+    unconnected_gradients: Optional. Specifies the gradient value returned when
+      the given input tensors are unconnected. Accepted values are constants
+      defined in the class `tf.UnconnectedGradients` and the default value is
+      `none`.
 
   Returns:
     A list of `sum(dy/dx)` for each x in `xs`.
@@ -627,7 +670,8 @@ def gradients(ys,
   # mutating new ops.
   with ops.get_default_graph()._mutation_lock():  # pylint: disable=protected-access
     return _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops,
-                            gate_gradients, aggregation_method, stop_gradients)
+                            gate_gradients, aggregation_method, stop_gradients,
+                            unconnected_gradients)
 
 
 def _GradientsHelper(ys,
@@ -638,6 +682,7 @@ def _GradientsHelper(ys,
                      gate_gradients=False,
                      aggregation_method=None,
                      stop_gradients=None,
+                     unconnected_gradients=UnconnectedGradients.NONE,
                      src_graph=None):
   """Implementation of gradients()."""
   if context.executing_eagerly():
@@ -645,6 +690,11 @@ def _GradientsHelper(ys,
                        "is enabled. Use tf.GradientTape instead.")
   if src_graph is None:
     src_graph = ops.get_default_graph()
+  try:
+    unconnected_gradients = UnconnectedGradients(unconnected_gradients)
+  except ValueError:
+    raise ValueError(
+        "Unknown value for unconnected_gradients: %r" % unconnected_gradients)
 
   # If src_graph is a _FuncGraph (i.e. a function body), gather it and all
   # ancestor graphs. This is necessary for correctly handling captured values.
@@ -856,7 +906,7 @@ def _GradientsHelper(ys,
 
   if loop_state:
     loop_state.PostProcessing()
-  return [_GetGrad(grads, x) for x in xs]
+  return [_GetGrad(grads, x, unconnected_gradients) for x in xs]
 
 
 def _HasAnyNotNoneGrads(grads, op):
@@ -924,12 +974,19 @@ def _SetGrad(grads, t, grad):
     op_grads[t.value_index] = grad
 
 
-def _GetGrad(grads, t):
+def _GetGrad(grads, t, unconnected_gradients):
   """Gets gradient for tensor "t"."""
   op = t.op
   op_grads = grads.get(op)
   if not op_grads:
-    return None
+    if unconnected_gradients == UnconnectedGradients.ZERO:
+      return array_ops.zeros_like(t)
+    elif unconnected_gradients == UnconnectedGradients.NONE:
+      return None
+    else:
+      raise ValueError(
+          "Unknown value for unconnected_gradients: %r" % unconnected_gradients)
+
   t_grad = op_grads[t.value_index]
   assert not isinstance(
       t_grad, list), ("gradients list should have been aggregated by now.")
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 3c9b7a01c7..c93e2493ee 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -350,6 +350,40 @@ class GradientsTest(test_util.TensorFlowTestCase):
       for a, b in zip(npgrad1, npgrad2):
         np.testing.assert_allclose(a, b)
 
+  def testUnconnectedGradientsNoneUnconnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0, shape=[2, 2])
+      y = constant(3.0, shape=[3, 1])
+      grad = gradients.gradients(
+          [y], [x], unconnected_gradients="none")
+    self.assertIsNone(grad[0])
+
+  def testUnconnectedGradientsZerosUnconnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0, shape=[2, 2])
+      y = constant(3.0, shape=[3, 1])
+      grads = gradients.gradients(
+          [y], [x], unconnected_gradients="zero")
+      with self.cached_session() as sess:
+        self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], sess.run(grads)[0])
+
+  def testUnconnectedGradientsZeroConnectedGradients(self):
+    with ops.Graph().as_default():
+      x = constant(1.0)
+      y = x * 3.0
+      grad = gradients.gradients(
+          [y], [x], unconnected_gradients="zero")
+      with self.cached_session() as sess:
+        self.assertEquals(3.0, sess.run(grad)[0])
+
+  def testUnknownUnconnectedGradientsValueGiven(self):
+    with ops.Graph().as_default():
+      x = constant(1.0)
+      y = constant(1.0)
+      with self.assertRaisesRegexp(
+          ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
+        gradients.gradients([y], [x], unconnected_gradients="nonsense")
+
 
 class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
new file mode 100644
index 0000000000..c5eb959430
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-unconnected-gradients.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.UnconnectedGradients"
+tf_class {
+  is_instance: "<enum \'UnconnectedGradients\'>"
+  member {
+    name: "NONE"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+  member {
+    name: "ZERO"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index a268529c1f..c1cc7322f0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -248,6 +248,10 @@ tf_module {
     name: "TextLineReader"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "UnconnectedGradients"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "VERSION"
     mtype: "<type \'str\'>"
@@ -1234,7 +1238,7 @@ tf_module {
   }
   member_method {
     name: "gradients"
-    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], "
   }
   member_method {
     name: "greater"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt
new file mode 100644
index 0000000000..c5eb959430
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-unconnected-gradients.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.UnconnectedGradients"
+tf_class {
+  is_instance: "<enum \'UnconnectedGradients\'>"
+  member {
+    name: "NONE"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+  member {
+    name: "ZERO"
+    mtype: "<enum \'UnconnectedGradients\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 5b3ea75bce..571abc3b19 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -220,6 +220,10 @@ tf_module {
     name: "TensorShape"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "UnconnectedGradients"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "VERSION"
     mtype: "<type \'str\'>"
@@ -1134,7 +1138,7 @@ tf_module {
   }
   member_method {
     name: "gradients"
-    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], "
+    argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], "
   }
   member_method {
     name: "greater"
-- 
GitLab


From 7b56d4ff7679ed59e3ea799054c5dcefd0600ab0 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 08:08:22 -0700
Subject: [PATCH 0349/1085] [TF] Fail fast if there is no CPU kernel during
 constant tensor evaluation. Avoids LOG(ERROR) spam when the Executor is
 unable to find a CPU kernel.

PiperOrigin-RevId: 215738481
---
 .../core/common_runtime/eval_const_tensor.cc   | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tensorflow/core/common_runtime/eval_const_tensor.cc b/tensorflow/core/common_runtime/eval_const_tensor.cc
index c1542f1f57..87749da7af 100644
--- a/tensorflow/core/common_runtime/eval_const_tensor.cc
+++ b/tensorflow/core/common_runtime/eval_const_tensor.cc
@@ -113,6 +113,13 @@ Status TryToInferTensorOutputFromInputShapes(const Edge& edge,
   return Status::OK();
 }
 
+// Returns true if 'node' has a registered CPU kernel.
+bool HasCpuKernel(const Node& node) {
+  return FindKernelDef(DeviceType(DEVICE_CPU), node.def(), /*def=*/nullptr,
+                       /*kernel_class_name=*/nullptr)
+      .ok();
+}
+
 // Extracts the subgraph ending at 'target_node' that is statically computable
 // and inserts into 'out_graph'. If statically computable, 'is_constant_graph'
 // will be set to true.
@@ -136,6 +143,12 @@ Status ExtractConstantSubgraph(
     return Status::OK();
   }
 
+  // Since constant-folding runs on the CPU, do not attempt to constant-fold
+  // operators that have no CPU kernel.
+  if (!HasCpuKernel(target_node)) {
+    return Status::OK();
+  }
+
   // TODO(skyewm): should more of the filtering applied in input nodes below be
   // applied to target_node here?
 
@@ -201,6 +214,11 @@ Status ExtractConstantSubgraph(
       return Status::OK();
     }
 
+    if (!HasCpuKernel(*current_node)) {
+      *is_constant_graph = false;
+      return Status::OK();
+    }
+
     // If there is nothing more to recurse down, see if
     // the generator node is a constant.
     if (current_node->num_inputs() == 0) {
-- 
GitLab


From dcd7dd2d2e1ed7d8c26dd22dbbd2bac269c42e1e Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Thu, 4 Oct 2018 08:30:22 -0700
Subject: [PATCH 0350/1085] Sparse output fully connected custom op.

PiperOrigin-RevId: 215741296
---
 tensorflow/contrib/lite/kernels/BUILD         |  18 ++
 .../kernels/sparse_output_fully_connected.cc  | 235 ++++++++++++++++++
 .../sparse_output_fully_connected_test.cc     | 158 ++++++++++++
 3 files changed, 411 insertions(+)
 create mode 100644 tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
 create mode 100644 tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index daaf6714cc..95e387814d 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -210,6 +210,7 @@ cc_library(
         "slice.cc",
         "space_to_batch_nd.cc",
         "space_to_depth.cc",
+        "sparse_output_fully_connected.cc",
         "sparse_to_dense.cc",
         "split.cc",
         "squeeze.cc",
@@ -333,6 +334,23 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "sparse_output_fully_connected_test",
+    size = "small",
+    srcs = ["sparse_output_fully_connected_test.cc"],
+    tags = [
+        "no_oss",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":builtin_ops",
+        "//tensorflow/contrib/lite:framework",
+        "//tensorflow/contrib/lite/kernels:test_util",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
 tf_cc_test(
     name = "activations_test",
     size = "small",
diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
new file mode 100644
index 0000000000..843ed0768c
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
@@ -0,0 +1,235 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// SparseOutputFullyConnected is a fully connected layer that uses a single
+// row in the weights and bias via a lookup.
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace custom {
+namespace sparse_output_fully_connected {
+
+// Input tensors of size {n_batch, n_input}
+constexpr int kInputTensor = 0;
+// Auxiliary input tensor of size { 1 }
+constexpr int kInputLookupTensor = 1;
+
+// Weights tensor of size { n_embeddings , n_input }
+constexpr int kWeightsTensor = 2;
+// Bias tensor of size { n_embeddings }
+constexpr int kBiasTensor = 3;
+
+// Output tensor.
+constexpr int kOutputTensor = 0;
+
+// Temporary tensors.
+enum TemporaryTensor {
+  kInputQuantized = 0,
+  kScalingFactors = 1,
+  kNumTemporaryTensors = 2
+};
+
+// Struct to hold op data.
+struct OpData {
+  int scratch_tensor_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* data = new OpData;
+  context->AddTensors(context, /*tensors_to_add=*/kNumTemporaryTensors,
+                      &data->scratch_tensor_index);
+  return data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 4);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const TfLiteTensor* lookup = GetInput(context, node, kInputLookupTensor);
+  TF_LITE_ENSURE_EQ(context, lookup->type, kTfLiteInt32);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(lookup), 1);
+  // Only support single lookup.
+  TF_LITE_ENSURE_EQ(context, SizeOfDimension(lookup, 0), 1);
+
+  const TfLiteTensor* weights = GetInput(context, node, kWeightsTensor);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(weights), 2);
+  TF_LITE_ENSURE_EQ(context, SizeOfDimension(weights, 1), n_input);
+
+  const TfLiteTensor* bias = GetInput(context, node, kBiasTensor);
+  TF_LITE_ENSURE_EQ(context, NumElements(bias), SizeOfDimension(weights, 0));
+
+  const bool is_hybrid_op =
+      (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
+
+  if (is_hybrid_op) {
+    TfLiteIntArrayFree(node->temporaries);
+    node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors);
+
+    // Allocate temporary tensors to store quantized values of input.
+    node->temporaries->data[kInputQuantized] = op_data->scratch_tensor_index;
+    TfLiteTensor* input_quantized =
+        GetTemporary(context, node, /*index=*/kInputQuantized);
+    input_quantized->type = kTfLiteUInt8;
+    input_quantized->allocation_type = kTfLiteArenaRw;
+    if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
+      TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
+      TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized,
+                                                       input_quantized_size));
+    }
+
+    // Tell interpreter to allocate temporary tensors to store scaling factors.
+    node->temporaries->data[kScalingFactors] =
+        op_data->scratch_tensor_index + kScalingFactors;
+    TfLiteTensor* scaling_factors =
+        GetTemporary(context, node, /*index=*/kScalingFactors);
+    scaling_factors->type = kTfLiteFloat32;
+    scaling_factors->allocation_type = kTfLiteArenaRw;
+    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+    scaling_factors_size->data[0] = n_batch;
+    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+      TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
+                                                       scaling_factors_size));
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalFloat(const TfLiteTensor* input, const TfLiteTensor* lookup,
+                       const TfLiteTensor* weights, const TfLiteTensor* bias,
+                       TfLiteTensor* output) {
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const float* input_ptr_batch = input->data.f;
+
+  // Initialize pointer to right row according to lookup value.
+  int32 lookup_index = lookup->data.i32[0];
+  const float* weights_ptr = weights->data.f + lookup_index * n_input;
+
+  // Initialize output to bias.
+  if (bias) {
+    float* bias_ptr = bias->data.f + lookup_index;
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, 1, n_batch, output->data.f);
+  } else {
+    tensor_utils::ZeroVector(output->data.f, n_batch * 1);
+  }
+
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      weights_ptr, /*m_rows=*/1, n_input, input_ptr_batch, n_batch,
+      output->data.f, /*result_stride=*/1);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalHybrid(const TfLiteTensor* input, const TfLiteTensor* lookup,
+                        const TfLiteTensor* weights, const TfLiteTensor* bias,
+                        TfLiteTensor* scaling_factors,
+                        TfLiteTensor* input_quantized, TfLiteTensor* output) {
+  const int n_batch = SizeOfDimension(input, 0);
+  const int n_input = SizeOfDimension(input, 1);
+
+  const float* input_ptr_batch = input->data.f;
+  // Initialize the pointer to storage for quantized values and
+  // scaling factors.
+  int8_t* quantized_input_ptr_batch =
+      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  float* scaling_factors_ptr = scaling_factors->data.f;
+
+  // Initialize pointer to right row according to lookup value.
+  int32 lookup_index = lookup->data.i32[0];
+  int8_t* weights_ptr =
+      reinterpret_cast<int8_t*>(weights->data.uint8) + lookup_index * n_input;
+
+  // Initialize output to bias.
+  if (bias) {
+    float* bias_ptr = bias->data.f + lookup_index;
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, 1, n_batch, output->data.f);
+  } else {
+    tensor_utils::ZeroVector(output->data.f, n_batch * 1);
+  }
+
+  if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
+    // Quantize input from float to int8.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset,
+          &unused_min, &unused_max, &scaling_factors_ptr[b]);
+      scaling_factors_ptr[b] *= weights->params.scale;
+    }
+
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        weights_ptr, /*m_rows=*/1, n_input, quantized_input_ptr_batch,
+        scaling_factors_ptr, n_batch, output->data.f, /*result_stride=*/1);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* lookup = GetInput(context, node, kInputLookupTensor);
+  const TfLiteTensor* weights = GetInput(context, node, kWeightsTensor);
+  const TfLiteTensor* bias = GetInput(context, node, kBiasTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  switch (weights->type) {
+    case kTfLiteFloat32: {
+      return EvalFloat(input, lookup, weights, bias, output);
+    }
+    case kTfLiteUInt8: {
+      TfLiteTensor* input_quantized =
+          GetTemporary(context, node, /*index=*/kInputQuantized);
+      TfLiteTensor* scaling_factors =
+          GetTemporary(context, node, /*index=*/kScalingFactors);
+      return EvalHybrid(input, lookup, weights, bias, scaling_factors,
+                        input_quantized, output);
+    }
+    default:
+      context->ReportError(context, "Type %d is not currently supported.",
+                           weights->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace sparse_output_fully_connected
+
+TfLiteRegistration* Register_SPARSE_OUTPUT_FULLY_CONNECTED() {
+  static TfLiteRegistration r = {sparse_output_fully_connected::Init,
+                                 sparse_output_fully_connected::Free,
+                                 sparse_output_fully_connected::Prepare,
+                                 sparse_output_fully_connected::Eval};
+  return &r;
+}
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc
new file mode 100644
index 0000000000..365986a5c1
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected_test.cc
@@ -0,0 +1,158 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Unit test for TFLite sparse output fully connected op.
+#include <iomanip>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flexbuffers.h"  // TF:flatbuffers
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+
+namespace tflite {
+
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_SPARSE_OUTPUT_FULLY_CONNECTED();
+
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseSparseOutputFullyConnectedOpModel : public SingleOpModel {
+ public:
+  BaseSparseOutputFullyConnectedOpModel(const TensorData& input,
+                                        const TensorData& weights,
+                                        const TensorData& output = {
+                                            TensorType_FLOAT32}) {
+    input_ = AddInput(input);
+    lookup_ = AddInput({TensorType_INT32, {1}});
+    weights_ = AddInput(weights);
+    int bias_size = GetShape(weights_)[0];
+    bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+    output_ = AddOutput(output);
+
+    // Create empty (required) options map.
+    flexbuffers::Builder fbb;
+    fbb.Map([&]() {});
+    fbb.Finish();
+
+    SetCustomOp("SPARSE_OUTPUT_FULLY_CONNECTED", fbb.GetBuffer(),
+                Register_SPARSE_OUTPUT_FULLY_CONNECTED);
+    BuildInterpreter({GetShape(input_), GetShape(lookup_), GetShape(weights_),
+                      GetShape(bias_)});
+  }
+
+  void SetInput(const std::vector<float>& data) {
+    PopulateTensor(input_, data);
+  }
+
+  void SetLookup(const std::vector<int32>& f) { PopulateTensor(lookup_, f); }
+
+  void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+  int input_;
+  int lookup_;
+  int weights_;
+  int bias_;
+  int output_;
+};
+
+class FloatSparseOutputFullyConnectedOpModel
+    : public BaseSparseOutputFullyConnectedOpModel {
+ public:
+  using BaseSparseOutputFullyConnectedOpModel::
+      BaseSparseOutputFullyConnectedOpModel;
+
+  void SetWeights(const std::vector<float>& f) { PopulateTensor(weights_, f); }
+};
+
+class HybridSparseOutputFullyConnectedOpModel
+    : public BaseSparseOutputFullyConnectedOpModel {
+ public:
+  using BaseSparseOutputFullyConnectedOpModel::
+      BaseSparseOutputFullyConnectedOpModel;
+
+  void SetWeights(const std::vector<float>& f) {
+    SymmetricQuantizeAndPopulate(weights_, f);
+  }
+};
+
+TEST(SparseOutputFullyConnectedOpTest, SimpleTestFloat) {
+  FloatSparseOutputFullyConnectedOpModel m({TensorType_FLOAT32, {1, 5}},
+                                           {TensorType_FLOAT32, {3, 5}},
+                                           {TensorType_FLOAT32, {}});
+
+  m.SetInput({-1.0, 0.0, 1.0, 2.0, 3.0});
+
+  m.SetLookup({2});
+
+  m.SetWeights({
+      -1.0, 0.0, 1.0, 2.0, 3.0,  //
+      0.0, 1.0, 2.0, 3.0, 4.0,   //
+      1.0, 2.0, 3.0, 4.0, 5.0,   //
+  });
+
+  m.SetBias({1.0, 2.0, 3.0});
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({28}));
+}
+
+TEST(SparseOutputFullyConnectedOpTest, SimpleTestHybrid) {
+  HybridSparseOutputFullyConnectedOpModel m({TensorType_FLOAT32, {1, 5}},
+                                            {TensorType_UINT8, {3, 5}},
+                                            {TensorType_FLOAT32, {}});
+
+  m.SetInput({-1.0, 0.0, 1.0, 2.0, 3.0});
+
+  m.SetLookup({2});
+
+  m.SetWeights({
+      -1.0, 0.0, 1.0, 2.0, 3.0,  //
+      0.0, 1.0, 2.0, 3.0, 4.0,   //
+      1.0, 2.0, 3.0, 4.0, 5.0,   //
+  });
+
+  m.SetBias({1.0, 2.0, 3.0});
+
+  m.Invoke();
+
+  // We get 28.0552 instead of 28.
+  //
+  // Input -> -42, 0, 42, 85, 127 with scale factor of 127/3.
+  // Looked up weights ->  25, 51, 76, 102, 127 with scale factor of 127/5.
+  //
+  // (-42 * 25 + 0 * 51 + 42 * 76 + 85 * 102 + 127 * 127) * (3*5/127^2) + 3.0
+  // gives us the expected result.
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({28}, 0.0553)));
+}
+
+}  // namespace
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
-- 
GitLab


From 80c9eec9b2475630f83a596f77a906c8075f8e6c Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 4 Oct 2018 08:56:45 -0700
Subject: [PATCH 0351/1085] Remove CHECKs from HloInstruction constructors.
 Move these checks to RET_CHECKs in the HloVerifier. Added a new visitor class
 InstructionVerifier inside of hlo_verifier.cc for handling these random
 non-result-shape verifications.

PiperOrigin-RevId: 215745043
---
 .../compiler/xla/service/hlo_instructions.cc  |  12 -
 .../compiler/xla/service/hlo_instructions.h   |   1 -
 .../compiler/xla/service/hlo_verifier.cc      | 456 ++++++++++--------
 .../compiler/xla/service/hlo_verifier.h       |  11 -
 4 files changed, 248 insertions(+), 232 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 68d0979f5c..152d8eacdb 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -643,14 +643,6 @@ HloTransposeInstruction::HloTransposeInstruction(
     absl::Span<const int64> dimensions)
     : HloInstruction(HloOpcode::kTranspose, shape),
       dimensions_(dimensions.begin(), dimensions.end()) {
-  CHECK_EQ(shape.dimensions().size(), dimensions.size());
-  CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size());
-  CHECK(std::equal(operand->shape().dimensions().begin(),
-                   operand->shape().dimensions().end(),
-                   Permute(dimensions, shape.dimensions()).begin()))
-      << "shape: " << ShapeUtil::HumanString(shape)
-      << ", operand->shape(): " << ShapeUtil::HumanString(shape)
-      << ", dimensions: {" << StrJoin(dimensions, ", ") << "}";
   AppendOperand(operand);
 }
 
@@ -1491,7 +1483,6 @@ HloParameterInstruction::CloneWithNewOperandsImpl(
 HloGetTupleElementInstruction::HloGetTupleElementInstruction(
     const Shape& shape, HloInstruction* operand, int64 index)
     : HloInstruction(HloOpcode::kGetTupleElement, shape), tuple_index_(index) {
-  CHECK(ShapeUtil::IsTuple(operand->shape()));
   AppendOperand(operand);
 }
 
@@ -1613,9 +1604,6 @@ HloOutfeedInstruction::HloOutfeedInstruction(const Shape& outfeed_shape,
     : HloInstruction(HloOpcode::kOutfeed, ShapeUtil::MakeTokenShape()),
       outfeed_shape_(outfeed_shape),
       outfeed_config_(outfeed_config) {
-  CHECK(ShapeUtil::Compatible(operand->shape(), outfeed_shape))
-      << "Outfeed shape " << outfeed_shape
-      << " must be compatible with operand shape " << operand->shape();
   AppendOperand(operand);
   AppendOperand(token_operand);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index ab168800f6..e169604072 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -896,7 +896,6 @@ class HloOutfeedInstruction : public HloInstruction {
                                  absl::string_view outfeed_config);
   // Returns the shape for the Outfeed instruction.
   const Shape& outfeed_shape() const {
-    TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(outfeed_shape_));
     return outfeed_shape_;
   }
   // Returns the config for the Outfeed instruction.
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index a7727824fe..b5498bb936 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -763,7 +763,136 @@ Status VerifyHloStructure(HloModule* module) {
   return Status::OK();
 }
 
-Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
+namespace {
+
+// Returns true if the given Shape has a TOKEN shape as any subshape.
+bool ShapeContainsToken(const Shape& shape) {
+  bool contains_token = false;
+  ShapeUtil::ForEachSubshape(
+      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
+        if (ShapeUtil::IsToken(subshape)) {
+          contains_token = true;
+        }
+      });
+  return contains_token;
+}
+
+// Verifies that all types entering and exiting the entry computation are
+// legal.
+Status VerifyEntryAndExitShapes(const HloModule& module) {
+  // Tokens cannot be passed as entry parameters.
+  // TODO(b/80000000): Remove this constraint.
+  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
+    HloInstruction* param =
+        module.entry_computation()->parameter_instruction(i);
+    if (ShapeContainsToken(param->shape())) {
+      return InternalError(
+          "Entry parameter %d is or contains a token shape: %s", i,
+          ShapeUtil::HumanString(param->shape()));
+    }
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions share the same channel id.
+Status CheckSameChannel(const HloInstruction* instr1,
+                        const HloInstruction* instr2) {
+  if (instr1->channel_id() != instr2->channel_id()) {
+    return InternalError(
+        "Expected to have the same channel id, actual channel ids are: %s "
+        "(%d), %s (%d)",
+        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
+        instr2->channel_id());
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions have the same is_host_transfer
+// attribute value. Intsructions must be send/recv instructions or their
+// 'done' variant.
+Status CheckSameIsHostTransfer(const HloInstruction* instr1,
+                               const HloInstruction* instr2) {
+  const HloSendRecvInstruction* send_recv1 =
+      DynCast<const HloSendRecvInstruction>(instr1);
+  const HloSendRecvInstruction* send_recv2 =
+      DynCast<const HloSendRecvInstruction>(instr2);
+  TF_RET_CHECK(send_recv1 != nullptr);
+  TF_RET_CHECK(send_recv2 != nullptr);
+  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
+    return InternalError(
+        "Expected instructions to have the same is-host-transfer property: "
+        "%s, "
+        "%s ",
+        instr1->ToString(), instr2->ToString());
+  }
+  return Status::OK();
+}
+
+// Checks various invariants of send and recv instructions.
+Status VerifySendsAndRecvs(const HloModule& module) {
+  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
+  // Host send/recv instructions must have their own unique channel.
+  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
+    const HloSendRecvInstruction* sendrecv =
+        DynCast<const HloSendRecvInstruction>(instruction);
+    if (sendrecv->is_host_transfer()) {
+      auto it_inserted =
+          host_channels.insert({sendrecv->channel_id(), sendrecv});
+      if (!it_inserted.second) {
+        return FailedPrecondition(
+            "Channel %d is used for multiple host send/recv instructions: "
+            "%s "
+            "and "
+            "%s",
+            sendrecv->channel_id(), sendrecv->ToString(),
+            it_inserted.first->second->ToString());
+      }
+    }
+
+    return Status::OK();
+  };
+
+  // Send/Recv instruction must have a single user: the corresponding
+  // SendDone/RecvDone. with matching channel.
+  for (const HloComputation* computation : module.computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      switch (instruction->opcode()) {
+        case HloOpcode::kSend: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* send_done = instruction->users().front();
+          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
+          break;
+        }
+        case HloOpcode::kRecv: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* recv_done = instruction->users().front();
+          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
+          break;
+        }
+        case HloOpcode::kSendDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
+          break;
+        case HloOpcode::kRecvDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// CHECKs various invariants of a fusion instruction.
+Status CheckFusionInstruction(HloInstruction* fusion) {
   // The parent fusion instruction of the fusion computation must be 'fusion'.
   HloComputation* fused_computation = fusion->fused_instructions_computation();
   if (fusion != fused_computation->FusionInstruction()) {
@@ -866,50 +995,32 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
     }
   }
 
+  TF_RET_CHECK(fusion->called_computations() ==
+               absl::Span<HloComputation* const>(
+                   {fusion->fused_instructions_computation()}))
+      << "Fusion HLO calls computations other than the "
+         "fused_instructions_computation: "
+      << fusion->ToString() << " fusion->fused_instructions_computation(): "
+      << fusion->fused_instructions_computation()->ToString()
+      << " fusion->called_computations(): "
+      << ComputationsToString(fusion->called_computations());
+
+  for (const auto& fused : fusion->fused_instructions()) {
+    TF_RET_CHECK(fused->parent() == fusion->fused_instructions_computation())
+        << "Fused HLO was missing a parent: " << fused->ToString()
+        << " parent: " << fused->parent()
+        << " computation: " << fusion->parent();
+  }
+
   // TODO(b/65423525): We'd like to check that all operands are distinct.
   // This is currently disabled due to the invariant being violated by
   // multi-output fusion.
   return Status::OK();
 }
 
-Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) {
-  auto* while_cond = instruction->while_condition();
-  auto* while_body = instruction->while_body();
-  if (while_cond->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While condition must have exactly 1 parameter; had %d : %s",
-        while_cond->num_parameters(), while_cond->ToString());
-  }
-  if (while_body->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While body must have exactly 1 parameter; had %d : %s",
-        while_body->num_parameters(), while_body->ToString());
-  }
-  if (instruction->operand_count() != 1) {
-    return FailedPrecondition(
-        "While loop must have exactly one operand; had %d : %s",
-        instruction->operand_count(), instruction->ToString());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckConditionalInstruction(HloInstruction* instruction) {
-  if (instruction->true_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "True computation %s of %s must have 1 parameter insted of %d",
-        instruction->true_computation()->name(), instruction->ToString(),
-        instruction->true_computation()->num_parameters());
-  }
-  if (instruction->false_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "False computation %s of %s must have 1 parameter insted of %d",
-        instruction->false_computation()->name(), instruction->ToString(),
-        instruction->false_computation()->num_parameters());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
+// Checks that the non-scalar operand shapes are compatible to the output
+// shape, i.e., that there are no implicit broadcasts of size-one dimensions.
+Status CheckElementwiseInstruction(HloInstruction* instruction) {
   const Shape& out_shape = instruction->shape();
   for (HloInstruction* operand : instruction->operands()) {
     const Shape& operand_shape = operand->shape();
@@ -926,133 +1037,114 @@ Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
   return Status::OK();
 }
 
-namespace {
+// Visitor which verifies various fields on the HLO instruction. This class does
+// not check result shape as that is checked in the ShapeVerifier.
+class InstructionVerifier : public DfsHloVisitorWithDefault {
+ public:
+  InstructionVerifier() {}
 
-// Returns true if the given Shape has a TOKEN shape as any subshape.
-bool ShapeContainsToken(const Shape& shape) {
-  bool contains_token = false;
-  ShapeUtil::ForEachSubshape(
-      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
-        if (ShapeUtil::IsToken(subshape)) {
-          contains_token = true;
-        }
-      });
-  return contains_token;
-}
+  Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
-// Verifies that all types entering and exiting the entry computation are
-// legal.
-Status VerifyEntryAndExitShapes(const HloModule& module) {
-  // Tokens cannot be passed as entry parameters.
-  // TODO(b/80000000): Remove this constraint.
-  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
-    HloInstruction* param =
-        module.entry_computation()->parameter_instruction(i);
-    if (ShapeContainsToken(param->shape())) {
-      return InternalError(
-          "Entry parameter %d is or contains a token shape: %s", i,
-          ShapeUtil::HumanString(param->shape()));
-    }
+  Status HandleFusion(HloInstruction* fusion) override {
+    return CheckFusionInstruction(fusion);
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions share the same channel id.
-Status CheckSameChannel(const HloInstruction* instr1,
-                        const HloInstruction* instr2) {
-  if (instr1->channel_id() != instr2->channel_id()) {
-    return InternalError(
-        "Expected to have the same channel id, actual channel ids are: %s "
-        "(%d), %s (%d)",
-        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
-        instr2->channel_id());
+  Status HandleBroadcast(HloInstruction* broadcast) override {
+    // If you see this failure then someone has confused the difference
+    // between the HLO broadcast op, and the UserComputation broadcast
+    // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
+    // or ComputationLowerer::Visit()
+    TF_RET_CHECK(broadcast->dimensions().size() ==
+                 ShapeUtil::Rank(broadcast->operand(0)->shape()))
+        << "Broadcast HLO (" << broadcast->ToShortString()
+        << ") has invalid number of dimensions: "
+        << broadcast->dimensions().size()
+        << " != " << ShapeUtil::Rank(broadcast->operand(0)->shape());
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions have the same is_host_transfer
-// attribute value. Intsructions must be send/recv instructions or their
-// 'done' variant.
-Status CheckSameIsHostTransfer(const HloInstruction* instr1,
-                               const HloInstruction* instr2) {
-  const HloSendRecvInstruction* send_recv1 =
-      DynCast<const HloSendRecvInstruction>(instr1);
-  const HloSendRecvInstruction* send_recv2 =
-      DynCast<const HloSendRecvInstruction>(instr2);
-  TF_RET_CHECK(send_recv1 != nullptr);
-  TF_RET_CHECK(send_recv2 != nullptr);
-  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
-    return InternalError(
-        "Expected instructions to have the same is-host-transfer property: "
-        "%s, "
-        "%s ",
-        instr1->ToString(), instr2->ToString());
+  Status HandleWhile(HloInstruction* xla_while) override {
+    auto* while_cond = xla_while->while_condition();
+    auto* while_body = xla_while->while_body();
+    if (while_cond->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While condition must have exactly 1 parameter; had %d : %s",
+          while_cond->num_parameters(), while_cond->ToString());
+    }
+    if (while_body->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While body must have exactly 1 parameter; had %d : %s",
+          while_body->num_parameters(), while_body->ToString());
+    }
+    if (xla_while->operand_count() != 1) {
+      return FailedPrecondition(
+          "While loop must have exactly one operand; had %d : %s",
+          xla_while->operand_count(), xla_while->ToString());
+    }
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks various invariants of send and recv instructions.
-Status VerifySendsAndRecvs(const HloModule& module) {
-  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
-  // Host send/recv instructions must have their own unique channel.
-  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
-    const HloSendRecvInstruction* sendrecv =
-        DynCast<const HloSendRecvInstruction>(instruction);
-    if (sendrecv->is_host_transfer()) {
-      auto it_inserted =
-          host_channels.insert({sendrecv->channel_id(), sendrecv});
-      if (!it_inserted.second) {
-        return FailedPrecondition(
-            "Channel %d is used for multiple host send/recv instructions: "
-            "%s "
-            "and "
-            "%s",
-            sendrecv->channel_id(), sendrecv->ToString(),
-            it_inserted.first->second->ToString());
-      }
+  Status HandleConditional(HloInstruction* conditional) override {
+    if (conditional->true_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "True computation %s of %s must have 1 parameter insted of %d",
+          conditional->true_computation()->name(), conditional->ToString(),
+          conditional->true_computation()->num_parameters());
     }
+    if (conditional->false_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "False computation %s of %s must have 1 parameter insted of %d",
+          conditional->false_computation()->name(), conditional->ToString(),
+          conditional->false_computation()->num_parameters());
+    }
+    return Status::OK();
+  }
+
+  Status HandleElementwiseUnary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
+
+  Status HandleElementwiseBinary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
 
+  Status HandleGetTupleElement(HloInstruction* gte) override {
+    TF_RET_CHECK(ShapeUtil::IsTuple(gte->operand(0)->shape()));
     return Status::OK();
-  };
+  }
 
-  // Send/Recv instruction must have a single user: the corresponding
-  // SendDone/RecvDone. with matching channel.
-  for (const HloComputation* computation : module.computations()) {
-    for (const HloInstruction* instruction : computation->instructions()) {
-      switch (instruction->opcode()) {
-        case HloOpcode::kSend: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* send_done = instruction->users().front();
-          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
-          break;
-        }
-        case HloOpcode::kRecv: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* recv_done = instruction->users().front();
-          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
-          break;
-        }
-        case HloOpcode::kSendDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
-          break;
-        case HloOpcode::kRecvDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
-          break;
-        default:
-          break;
-      }
-    }
+  Status HandleTranspose(HloInstruction* transpose) override {
+    const Shape& shape = transpose->shape();
+    const HloInstruction* operand = transpose->operand(0);
+    TF_RET_CHECK(shape.dimensions().size() == transpose->dimensions().size());
+    TF_RET_CHECK(shape.dimensions().size() ==
+                 transpose->operand(0)->shape().dimensions().size());
+    TF_RET_CHECK(std::equal(
+        operand->shape().dimensions().begin(),
+        operand->shape().dimensions().end(),
+        Permute(transpose->dimensions(), shape.dimensions()).begin()))
+        << "shape: " << shape << ", operand->shape(): " << shape
+        << ", dimensions: {" << absl::StrJoin(transpose->dimensions(), ", ")
+        << "}";
+    return Status::OK();
   }
-  return Status::OK();
-}
+
+  Status Preprocess(HloInstruction* instruction) override {
+    auto previous = instructions_by_name_.find(instruction->name());
+    TF_RET_CHECK(previous == instructions_by_name_.end())
+        << "HLO has name that is not unique within module:\n"
+        << instruction->ToString()
+        << " in computation: " << instruction->parent()->name()
+        << "\nPrevious HLO with same name:\n"
+        << previous->second->ToString()
+        << " in computation: " << previous->second->parent()->name();
+    instructions_by_name_[instruction->name()] = instruction;
+    return Status::OK();
+  }
+
+ private:
+  absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
+};
 
 }  // namespace
 
@@ -1061,65 +1153,13 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-  absl::flat_hash_map<string, const HloInstruction*> instructions;
 
   for (auto* computation : module->computations()) {
-    for (const auto& instruction : computation->instructions()) {
-      TF_RET_CHECK(instruction->parent() == computation);
-      if (instruction->opcode() == HloOpcode::kFusion) {
-        TF_RETURN_IF_ERROR(CheckFusionInstruction(instruction));
-        TF_RET_CHECK(instruction->called_computations() ==
-                     absl::Span<HloComputation* const>(
-                         {instruction->fused_instructions_computation()}))
-            << "Fusion HLO calls computations other than the "
-               "fused_instructions_computation: "
-            << instruction->ToString()
-            << " instruction->fused_instructions_computation(): "
-            << instruction->fused_instructions_computation()->ToString()
-            << " instruction->called_computations(): "
-            << ComputationsToString(instruction->called_computations());
-
-        for (const auto& fused : instruction->fused_instructions()) {
-          TF_RET_CHECK(fused->parent() ==
-                       instruction->fused_instructions_computation())
-              << "Fused HLO was missing a parent: " << fused->ToString()
-              << " parent: " << fused->parent()
-              << " computation: " << computation;
-        }
-      } else if (instruction->opcode() == HloOpcode::kBroadcast) {
-        // If you see this failure then someone has confused the difference
-        // between the HLO broadcast op, and the UserComputation broadcast
-        // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
-        // or ComputationLowerer::Visit()
-        TF_RET_CHECK(instruction->dimensions().size() ==
-                     ShapeUtil::Rank(instruction->operand(0)->shape()))
-            << "Broadcast HLO (" << instruction->ToShortString()
-            << ") has invalid number of dimensions: "
-            << instruction->dimensions().size()
-            << " != " << ShapeUtil::Rank(instruction->operand(0)->shape());
-      } else if (instruction->opcode() == HloOpcode::kWhile) {
-        TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction));
-      } else if (instruction->opcode() == HloOpcode::kConditional) {
-        TF_RETURN_IF_ERROR(CheckConditionalInstruction(instruction));
-      } else if (instruction->opcode() !=
-                     HloOpcode::kRng /* Rng operands are always scalar. */
-                 && instruction->IsElementwise()) {
-        TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction));
-      }
-
-      auto previous = instructions.find(instruction->name());
-      TF_RET_CHECK(previous == instructions.end())
-          << "HLO has name that is not unique within module:\n"
-          << instruction->ToString()
-          << " in computation: " << computation->name()
-          << "\nPrevious HLO with same name:\n"
-          << previous->second->ToString()
-          << " in computation: " << previous->second->parent()->name();
-      instructions[instruction->name()] = instruction;
-    }
-
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
+
+    InstructionVerifier instruction_verifier;
+    TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
   TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module));
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 0cde4a31af..6d16586c2c 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -172,17 +172,6 @@ class HloVerifier : public HloModulePass {
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
-  // CHECKs various invariants of a fusion instruction.
-  Status CheckFusionInstruction(HloInstruction* fusion) const;
-
-  Status CheckWhileInstruction(HloInstruction* instruction);
-
-  Status CheckConditionalInstruction(HloInstruction* instruction);
-
-  // Checks that the non-scalar operand shapes are compatible to the output
-  // shape, i.e., that there are no implicit broadcasts of size-one dimensions.
-  Status CheckElementwiseInstruction(HloInstruction* instruction);
-
   // Creates a ShapeVerifier that checks that shapes match inferred
   // expectations. This is a factory function because ShapeVerifier,
   // being a DfsHloVisitor, is stateful. We want a clean object
-- 
GitLab


From 3302b4c1fcf2ecd3ae3119cddb16d057235ece07 Mon Sep 17 00:00:00 2001
From: Tingbo Lu <tingbopku@gmail.com>
Date: Fri, 5 Oct 2018 00:02:45 +0800
Subject: [PATCH 0352/1085] Update rnn_cell.py

---
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 59a61af7b3..e8073f8463 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -1110,7 +1110,7 @@ _Linear = core_rnn_cell._Linear  # pylint: disable=invalid-name
 class AttentionCellWrapper(rnn_cell_impl.RNNCell):
   """Basic attention cell wrapper.
 
-  Implementation based on https://arxiv.org/abs/1409.0473.
+  Implementation based on https://arxiv.org/abs/1601.06733.
   """
 
   def __init__(self,
-- 
GitLab


From d7f10cab6296eaa8c7d156ec58d703ee424df261 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Thu, 4 Oct 2018 09:22:40 -0700
Subject: [PATCH 0353/1085] Fix typos in test case

---
 tensorflow/python/kernel_tests/check_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 8830c09486..543d2d3f8b 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -263,7 +263,7 @@ b'y (shape=(2, 3) dtype=float32) = '
 0.0, 1.0, ...
 """
     with context.eager_mode():
-      t = tf.constant(np.array(range(6)), shape=[2,3], dtype=tf.float32)
+      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_full):
         check_ops.assert_none_equal(t, t, message="This is the error message.",
-- 
GitLab


From a7e8ad18a61b251ef42c0260dd80a12cea8f268c Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Thu, 4 Oct 2018 09:20:31 -0700
Subject: [PATCH 0354/1085] Experimental interpreter, kernels, and example
 running TensorFlow Lite on a microcontroller

PiperOrigin-RevId: 215748973
---
 .../contrib/lite/experimental/micro/BUILD     |   76 +
 .../contrib/lite/experimental/micro/README.md |  114 ++
 .../lite/experimental/micro/compatibility.h   |   32 +
 .../micro/examples/micro_speech/BUILD         |   28 +
 .../micro_speech/micro_speech_test.cc         |   55 +
 .../micro_speech/tiny_conv_model_data.cc      | 1672 +++++++++++++++++
 .../micro_speech/tiny_conv_model_data.h       |   27 +
 .../lite/experimental/micro/kernels/BUILD     |  107 ++
 .../micro/kernels/all_ops_resolver.cc         |   43 +
 .../micro/kernels/all_ops_resolver.h          |   34 +
 .../micro/kernels/depthwise_conv.cc           |  208 ++
 .../micro/kernels/depthwise_conv_test.cc      |  406 ++++
 .../micro/kernels/fully_connected.cc          |  184 ++
 .../micro/kernels/fully_connected_test.cc     |  643 +++++++
 .../experimental/micro/kernels/softmax.cc     |  213 +++
 .../micro/kernels/softmax_test.cc             |  220 +++
 .../experimental/micro/kernels/test_utils.h   |  170 ++
 .../micro/micro_error_reporter.cc             |   78 +
 .../experimental/micro/micro_error_reporter.h |   34 +
 .../micro/micro_error_reporter_test.cc        |   25 +
 .../experimental/micro/micro_interpreter.cc   |  310 +++
 .../experimental/micro/micro_interpreter.h    |   71 +
 .../micro/micro_interpreter_test.cc           |  197 ++
 .../micro/micro_mutable_op_resolver.cc        |   80 +
 .../micro/micro_mutable_op_resolver.h         |   46 +
 .../micro/micro_mutable_op_resolver_test.cc   |   83 +
 .../micro/simple_tensor_allocator.cc          |  149 ++
 .../micro/simple_tensor_allocator.h           |   51 +
 .../micro/simple_tensor_allocator_test.cc     |  144 ++
 .../lite/experimental/micro/testing/BUILD     |   17 +
 .../micro/testing/Dockerfile.bluepill         |   21 +
 .../experimental/micro/testing/bluepill.resc  |   36 +
 .../experimental/micro/testing/micro_test.bzl |   64 +
 .../experimental/micro/testing/micro_test.h   |  138 ++
 .../micro/testing/test_bluepill_binary.sh     |   54 +
 .../micro/testing/test_linux_binary.sh        |   39 +
 .../experimental/micro/tools/make/Makefile    |  166 ++
 .../micro/tools/make/download_dependencies.sh |   73 +
 .../tools/make/targets/bluepill_makefile.inc  |   65 +
 .../lite/kernels/internal/compatibility.h     |   23 +
 .../contrib/lite/kernels/internal/types.h     |    3 +-
 41 files changed, 6197 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/micro/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/README.md
 create mode 100644 tensorflow/contrib/lite/experimental/micro/compatibility.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
 create mode 100755 tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
 create mode 100755 tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
 create mode 100644 tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
 create mode 100755 tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
 create mode 100644 tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc

diff --git a/tensorflow/contrib/lite/experimental/micro/BUILD b/tensorflow/contrib/lite/experimental/micro/BUILD
new file mode 100644
index 0000000000..df1036bc8b
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/BUILD
@@ -0,0 +1,76 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+cc_library(
+    name = "micro_framework",
+    srcs = [
+        "micro_error_reporter.cc",
+        "micro_interpreter.cc",
+        "micro_mutable_op_resolver.cc",
+        "simple_tensor_allocator.cc",
+    ],
+    hdrs = [
+        "compatibility.h",
+        "micro_error_reporter.h",
+        "micro_interpreter.h",
+        "micro_mutable_op_resolver.h",
+        "simple_tensor_allocator.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:schema_fbs_version",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_error_reporter_test",
+    srcs = [
+        "micro_error_reporter_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_mutable_op_resolver_test",
+    srcs = [
+        "micro_mutable_op_resolver_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "micro_interpreter_test",
+    srcs = [
+        "micro_interpreter_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "simple_tensor_allocator_test",
+    srcs = [
+        "simple_tensor_allocator_test.cc",
+    ],
+    deps = [
+        ":micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/README.md b/tensorflow/contrib/lite/experimental/micro/README.md
new file mode 100644
index 0000000000..414cafde4d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/README.md
@@ -0,0 +1,114 @@
+# TensorFlow Lite for Microcontrollers
+
+This an experimental port of TensorFlow Lite aimed at micro controllers and other devices with only kilobytes of memory. It doesn't require any operating system support, any standard C or C++ libraries, or dynamic memory allocation, so it's designed to be portable even to 'bare metal' systems. The core runtime fits in 16KB on a Cortex M3, and with enough operators to run a speech keyword detection model, takes up a total of 22KB.
+
+The design goals are for the framework to be:
+
+- **Readable**: We want embedded software engineers to be able to understand what's required to run ML inference without having to study research papers. We've tried to keep the code base small, modular, and have reference implementations of all operations to help with this.
+
+- **Easy to modify**: We know that there are a lot of different platforms and requirements in the embedded world, and we don't expect to cover all of them in one framework. Instead, we're hoping that it can be a good starting point for developers to build on top of to meet their own needs. For example, we tried to make it easy to replace the implementations of key computational operators that are often crucial for performance, without having to touch the data flow and other runtime code. We want it to make more sense to use our workflow to handle things like model import and less-important operations, and customize the parts that matter, rather than having to reimplement everything in your own engine.
+
+- **Well-tested**: If you're modifying code, you need to know if your changes are correct. Having an easy way to test lets you develop much faster. To help there, we've written tests for all the components, and we've made sure that the tests can be run on almost any platform, with no dependencies apart from the ability to log text to a debug console somewhere. We also provide an easy way to run all the tests on-device as part of an automated test framework, and we use qemu/Renode emulation so that tests can be run even without physical devices present.
+
+- **Easy to integrate**: We want to be as open a system as possible, and use the best code available for each platform. To do that, we're going to rely on projects like [CMSIS-NN](https://www.keil.com/pack/doc/CMSIS/NN/html/index.html), [uTensor](https://github.com/uTensor/uTensor), and other vendor libraries to handle as much performance-critical code as possible. We know that there are an increasing number of options to accelerate neural networks on microcontrollers, so we're aiming to be a good host for deploying those hardware technologies too.
+
+- **Compatible**: We're using the same file schema, interpreter API, and kernel interface as regular TensorFlow Lite, so we leverage the large existing set of tools, documentation, and examples for the project. The biggest barrier to deploying ML models is getting them from a training environment into a form that's easy to run inference on, so we see reusing this rich ecosystem as being crucial to being easily usable. We also hope to integrate this experimental work back into the main codebase in the future.
+
+To meet those goals, we've made some tradeoffs:
+
+- **Simple C++**: To help with readability, our code is written in a modern version of C++, but we generally treat it as a "better C", rather relying on more complex features such as template meta-programming. As mentioned earlier, we avoid any use of dynamic memory allocation (new/delete) or the standard C/C++ libraries, so we believe this should still be fairly portable. It does mean that some older devices with C-only toolchains won't be supported, but we're hoping that the reference operator implementations (which are simple C-like functions) can still be useful in those cases. The interfaces are also designed to be C-only, so it should be possible to integrate the resulting library with pure C projects.
+
+- **Interpreted**: Code generation is a popular pattern for embedded code, because it gives standalone code that's easy to modify and step through, but we've chosen to go with an interpreted approach. In our internal microcontroller work we've found that using an extremely stripped-down interpreter with almost no dependencies gives us a lot of the same advantages, but is easier to maintain. For example, when new updates come out for the underlying library, you can just merge your local modifications in a single step, rather than having to regenerate new code and then patch in any changes you subsequently made. The coarse granularity of the interpreted primitives means that each operation call typically takes hundreds of thousands of instruction cycles at least, so we don't see noticeable performance gains from avoiding what's essentially a single switch statement at the interpreter level to call each operation. We're still working on improving the packaging though, for example we're considering having the ability to snapshot all the source files and headers used for a particular model, being able to compile the code and data together as a library, and then access it through a minimal set of C interface calls which hide the underlying complexity.
+
+- **Flatbuffers**: We represent our models using [the standard flatbuffer schema used by the rest of TensorFlow Lite](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema.fbs), with the difference that we always keep it in read-only program memory (typically flash) rather than relying on having a file system to read it from. This is a good fit because flatbuffer's serialized format is designed to be mapped into memory without requiring any extra memory allocations or modifications to access it. All of the functions to read model values work directly on the serialized bytes, and large sections of data like weights are directly accessible as sequential C-style arrays of their data type, with no strides or unpacking needed. We do get a lot of value from using flatbuffers, but there is a cost in complexity. The flat buffer library code is all inline [inside the main headers](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/schema/schema_generated.h), but it isn't straightforward to inspect their implementations, and the model data structures aren't easy to comprehend from the debugger. The header for the schema itself also has to be periodically updated when new information is added to the file format, though we try to handle that transparently for most developers by checking in a pre-generated version.
+
+- **Code Duplication**: Some of the code in this prototype largely duplicates the logic in other parts of the TensorFlow Lite code base, for example the operator wrappers. We've tried to keep share as much as we can between the two interpreters, but there are some assumptions built into the original runtime that make this difficult. We'll be working on modularizing the main interpreter so that we can move to an entirely shared system.
+
+This initial preview release is designed to get early feedback, and is not intended to be a final product. It only includes enough operations to run a simple keyword recognition model, and the implementations are not optimized. We're hoping this will be a good way to get feedback and collaborate to improve the framework.
+
+## Getting Started
+
+Building requires a Linux or OS X machine.
+
+ - Open a terminal
+ - Download the TensorFlow source with `git clone https://github.com/tensorflow`
+ - Enter the source root directory by running `cd tensorflow`
+ - Download the dependencies by running `tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh`. This may take a few minutes
+ - Build and test the library with `make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test`
+
+You should see a series of compilation steps, followed by "~~~ALL TESTS PASSED~~~" for the various tests of the code that it will run. If there's an error, you should get an informative message from make about what went wrong.
+
+These tests are all built as simple binaries with few dependencies, so you can run them manually. For example, here's how to run the depthwise convolution test, and its output:
+
+```
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/linux_x86_64/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test
+
+Testing SimpleTest
+Testing SimpleTestQuantized
+Testing SimpleTestRelu
+Testing SimpleTestReluQuantized
+4/4 tests passed
+~ALL TESTS PASSED~~~
+```
+
+Looking at the [depthwise_conv_test.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc) code, you'll see a sequence that looks like this:
+
+```
+...
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+...
+}
+...
+TF_LITE_MICRO_TESTS_END
+```
+
+These macros work a lot like [the Google test framework](https://github.com/google/googletest), but they don't require any dependencies and just write results to stderr, rather than aborting the program. If all the tests pass, then "~~~ALL TESTS PASSED~~~" is output, and the test harness that runs the binary during the make process knows that everything ran correctly. If there's an error, the lack of the expected string lets the harness know that the test failed.
+
+So, why are we running tests in this complicated way? So far, we've been building binaries that run locally on the Mac OS or Linux machine you're building on, but this approach becomes important when we're targeting simple micro controller devices.
+
+## Building for the "Blue Pill" STM32F103
+
+The goal of this library is to enable machine learning on resource-constrained micro controllers and DSPs, and as part of that we've targeted the ["Blue Pill" STM32F103-compatible development board](https://github.com/google/googletest) as a cheap and popular platform. It only has 20KB of RAM and 64KB of flash, so it's a good device to ensure we can run efficiently on small chips.
+
+It's fairly easy to [buy and wire up a physical board](https://github.com/google/stm32_bare_lib#wiring-up-your-blue-pill), but even if you don't have an actual device, the [Renode project](https://renode.io/) makes it easy to run a faithful emulation on your desktop machine. You'll need [Docker](https://www.docker.com/) installed, but once you have that set up, try running the following command:
+
+`make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test`
+
+You should see a similar set of outputs as you did in the previous section, with the addition of some extra Docker logging messages. These are because we're using Docker to run the Renode micro controller emulation tool, and the tests themselves are being run on a simulated STM32F103 device. The communication channels between an embedded device and the host are quite limited, so the test harness looks at the output of the debug log to see if tests have passed, just as it did in the previous section. This makes it a very flexible way to run cross-platform tests, even when a platform has no operating system facilities, as long as it can output debugging text logs.
+
+To understand what's happening here, try running the same depthwise convolution test, but through the emulated device test harness, with the following command:
+
+```
+tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh \
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test
+
+```
+
+You should see output that looks something like this:
+
+```
+Sending build context to Docker daemon   21.5kB
+Step 1/2 : FROM antmicro/renode:latest
+ ---> 1b670a243e8f
+Step 2/2 : LABEL maintainer="Pete Warden <petewarden@google.com>"
+ ---> Using cache
+ ---> 3afcd410846d
+Successfully built 3afcd410846d
+Successfully tagged renode_bluepill:latest
+LOGS:
+...
+03:27:32.4340 [INFO] machine-0: Machine started.
+03:27:32.4790 [DEBUG] cpu.uartSemihosting: [+0.22s host +0s virt 0s virt from start] Testing SimpleTest
+03:27:32.4812 [DEBUG] cpu.uartSemihosting: [+2.21ms host +0s virt 0s virt from start]   Testing SimpleTestQuantized
+03:27:32.4833 [DEBUG] cpu.uartSemihosting: [+2.14ms host +0s virt 0s virt from start]   Testing SimpleTestRelu
+03:27:32.4834 [DEBUG] cpu.uartSemihosting: [+0.18ms host +0s virt 0s virt from start]   Testing SimpleTestReluQuantized
+03:27:32.4838 [DEBUG] cpu.uartSemihosting: [+0.4ms host +0s virt 0s virt from start]   4/4 tests passed
+03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+41µs host +0s virt 0s virt from start]   ~~~ALL TESTS PASSED~~~
+03:27:32.4839 [DEBUG] cpu.uartSemihosting: [+5µs host +0s virt 0s virt from start]   
+...
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test: PASS
+```
+
+There's a lot of output here, but you should be able to see that the same tests that were covered when we ran locally on the development machine show up in the debug logs here, along with the magic string "~~~ALL TESTS PASSED~~~". This is the exact same code as before, just compiled and run on the STM32F103 rather than your desktop. We hope that the simplicity of this testing approach will help make adding support for new platforms as easy as possible.
diff --git a/tensorflow/contrib/lite/experimental/micro/compatibility.h b/tensorflow/contrib/lite/experimental/micro/compatibility.h
new file mode 100644
index 0000000000..4f0fd9f312
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/compatibility.h
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
+
+// C++ will automatically create class-specific delete operators for virtual
+// objects, which by default call the global delete function. For embedded
+// applications we want to avoid this, and won't be calling new/delete on these
+// objects, so we need to override the default implementation with one that does
+// nothing to avoid linking in ::delete().
+// This macro needs to be included in all subclasses of a virtual base class in
+// the private section.
+#ifdef TF_LITE_STATIC_MEMORY
+#define TF_LITE_REMOVE_VIRTUAL_DELETE \
+  void operator delete(void* p) {}
+#else
+#define TF_LITE_REMOVE_VIRTUAL_DELETE
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_COMPATIBILITY_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
new file mode 100644
index 0000000000..447c584387
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
@@ -0,0 +1,28 @@
+# Description:
+#   TensorFlow Lite microcontroller example.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+tflite_micro_cc_test(
+    name = "micro_speech_test",
+    srcs = [
+        "micro_speech_test.cc",
+        "tiny_conv_model_data.cc",
+        "tiny_conv_model_data.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite:schema_fbs_version",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/kernels:all_ops_resolver",
+        "//tensorflow/contrib/lite/experimental/micro/kernels:micro_ops",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
new file mode 100644
index 0000000000..86cd056a72
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+#include "tensorflow/contrib/lite/version.h"
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestInvoke) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
+  if (model->version() != TFLITE_SCHEMA_VERSION) {
+    error_reporter->Report(
+        "Model provided is schema version %d not equal "
+        "to supported version %d.\n",
+        model->version(), TFLITE_SCHEMA_VERSION);
+  }
+  tflite::ops::micro::AllOpsResolver resolver;
+
+  const int tensor_arena_size = 10 * 1024;
+  uint8_t tensor_arena[tensor_arena_size];
+  tflite::SimpleTensorAllocator tensor_allocator(tensor_arena,
+                                                 tensor_arena_size);
+
+  tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator,
+                                       error_reporter);
+  TfLiteStatus invoke_status = interpreter.Invoke();
+  if (invoke_status != kTfLiteOk) {
+    error_reporter->Report("Invoke failed\n");
+  }
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status);
+
+  error_reporter->Report("Ran successfully\n");
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
new file mode 100644
index 0000000000..f1f9e0e219
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
@@ -0,0 +1,1672 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+
+const unsigned char g_tiny_conv_model_data[] = {
+    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
+    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x4d, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
+    0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00,
+    0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00,
+    0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff,
+    0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff,
+    0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff,
+    0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83,
+    0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac,
+    0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6,
+    0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2,
+    0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9,
+    0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e,
+    0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57,
+    0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac,
+    0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a,
+    0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac,
+    0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1,
+    0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d,
+    0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b,
+    0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c,
+    0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87,
+    0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4,
+    0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8,
+    0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1,
+    0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c,
+    0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6,
+    0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2,
+    0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6,
+    0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf,
+    0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98,
+    0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45,
+    0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4,
+    0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91,
+    0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac,
+    0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9,
+    0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4,
+    0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42,
+    0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9,
+    0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99,
+    0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac,
+    0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b,
+    0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae,
+    0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3,
+    0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83,
+    0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb,
+    0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1,
+    0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e,
+    0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7,
+    0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4,
+    0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5,
+    0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4,
+    0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3,
+    0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9,
+    0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8,
+    0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b,
+    0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7,
+    0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6,
+    0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0,
+    0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9,
+    0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff,
+    0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50,
+    0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d,
+    0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e,
+    0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43,
+    0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c,
+    0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f,
+    0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49,
+    0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56,
+    0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45,
+    0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a,
+    0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f,
+    0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e,
+    0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45,
+    0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53,
+    0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44,
+    0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f,
+    0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c,
+    0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54,
+    0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b,
+    0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a,
+    0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50,
+    0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a,
+    0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48,
+    0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40,
+    0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44,
+    0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e,
+    0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51,
+    0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49,
+    0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a,
+    0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55,
+    0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48,
+    0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52,
+    0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b,
+    0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47,
+    0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54,
+    0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d,
+    0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d,
+    0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47,
+    0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c,
+    0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e,
+    0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51,
+    0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48,
+    0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f,
+    0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52,
+    0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43,
+    0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b,
+    0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48,
+    0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e,
+    0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c,
+    0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55,
+    0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42,
+    0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56,
+    0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d,
+    0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43,
+    0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e,
+    0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b,
+    0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46,
+    0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b,
+    0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b,
+    0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e,
+    0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d,
+    0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48,
+    0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c,
+    0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51,
+    0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e,
+    0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f,
+    0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e,
+    0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41,
+    0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47,
+    0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b,
+    0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42,
+    0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44,
+    0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43,
+    0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54,
+    0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47,
+    0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47,
+    0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46,
+    0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42,
+    0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41,
+    0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62,
+    0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50,
+    0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a,
+    0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45,
+    0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f,
+    0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45,
+    0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45,
+    0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c,
+    0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45,
+    0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e,
+    0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51,
+    0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46,
+    0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46,
+    0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d,
+    0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b,
+    0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47,
+    0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f,
+    0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d,
+    0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42,
+    0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d,
+    0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43,
+    0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41,
+    0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43,
+    0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c,
+    0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b,
+    0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d,
+    0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41,
+    0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53,
+    0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f,
+    0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41,
+    0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51,
+    0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40,
+    0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d,
+    0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a,
+    0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d,
+    0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f,
+    0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46,
+    0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49,
+    0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52,
+    0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50,
+    0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49,
+    0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52,
+    0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c,
+    0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50,
+    0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e,
+    0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d,
+    0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d,
+    0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c,
+    0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48,
+    0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44,
+    0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48,
+    0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d,
+    0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49,
+    0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a,
+    0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52,
+    0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40,
+    0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d,
+    0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e,
+    0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e,
+    0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48,
+    0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50,
+    0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43,
+    0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40,
+    0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d,
+    0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f,
+    0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e,
+    0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45,
+    0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47,
+    0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54,
+    0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d,
+    0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d,
+    0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52,
+    0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45,
+    0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c,
+    0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53,
+    0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e,
+    0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36,
+    0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49,
+    0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44,
+    0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c,
+    0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41,
+    0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47,
+    0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f,
+    0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48,
+    0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45,
+    0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47,
+    0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49,
+    0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d,
+    0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a,
+    0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48,
+    0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e,
+    0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43,
+    0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d,
+    0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d,
+    0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52,
+    0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c,
+    0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47,
+    0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51,
+    0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b,
+    0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41,
+    0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38,
+    0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42,
+    0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53,
+    0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48,
+    0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48,
+    0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a,
+    0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a,
+    0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e,
+    0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e,
+    0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b,
+    0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d,
+    0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e,
+    0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55,
+    0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37,
+    0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44,
+    0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44,
+    0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e,
+    0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a,
+    0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57,
+    0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e,
+    0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41,
+    0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49,
+    0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e,
+    0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44,
+    0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49,
+    0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d,
+    0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52,
+    0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42,
+    0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48,
+    0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a,
+    0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41,
+    0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48,
+    0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61,
+    0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f,
+    0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31,
+    0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49,
+    0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e,
+    0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c,
+    0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a,
+    0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47,
+    0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44,
+    0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45,
+    0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46,
+    0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a,
+    0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c,
+    0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43,
+    0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e,
+    0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f,
+    0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d,
+    0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b,
+    0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f,
+    0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49,
+    0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49,
+    0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51,
+    0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a,
+    0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51,
+    0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59,
+    0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41,
+    0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47,
+    0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45,
+    0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47,
+    0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48,
+    0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c,
+    0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a,
+    0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38,
+    0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50,
+    0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c,
+    0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50,
+    0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48,
+    0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42,
+    0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57,
+    0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44,
+    0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e,
+    0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42,
+    0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d,
+    0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45,
+    0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f,
+    0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49,
+    0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37,
+    0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48,
+    0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45,
+    0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52,
+    0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49,
+    0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48,
+    0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54,
+    0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49,
+    0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f,
+    0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e,
+    0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41,
+    0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42,
+    0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53,
+    0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43,
+    0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b,
+    0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49,
+    0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48,
+    0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43,
+    0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48,
+    0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42,
+    0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46,
+    0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45,
+    0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a,
+    0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44,
+    0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51,
+    0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47,
+    0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46,
+    0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50,
+    0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57,
+    0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e,
+    0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44,
+    0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42,
+    0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45,
+    0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42,
+    0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51,
+    0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48,
+    0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d,
+    0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49,
+    0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54,
+    0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41,
+    0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d,
+    0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51,
+    0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b,
+    0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45,
+    0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a,
+    0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49,
+    0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44,
+    0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a,
+    0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f,
+    0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c,
+    0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e,
+    0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48,
+    0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52,
+    0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40,
+    0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49,
+    0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50,
+    0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55,
+    0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40,
+    0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48,
+    0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47,
+    0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37,
+    0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53,
+    0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a,
+    0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a,
+    0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e,
+    0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d,
+    0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60,
+    0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40,
+    0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55,
+    0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52,
+    0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55,
+    0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46,
+    0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b,
+    0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47,
+    0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a,
+    0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42,
+    0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c,
+    0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51,
+    0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49,
+    0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47,
+    0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e,
+    0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49,
+    0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61,
+    0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46,
+    0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f,
+    0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49,
+    0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e,
+    0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a,
+    0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46,
+    0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49,
+    0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e,
+    0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c,
+    0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e,
+    0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a,
+    0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42,
+    0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c,
+    0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f,
+    0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e,
+    0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46,
+    0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d,
+    0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f,
+    0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a,
+    0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52,
+    0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42,
+    0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52,
+    0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c,
+    0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b,
+    0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41,
+    0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42,
+    0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51,
+    0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c,
+    0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c,
+    0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40,
+    0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47,
+    0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42,
+    0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b,
+    0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e,
+    0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f,
+    0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53,
+    0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e,
+    0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52,
+    0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e,
+    0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50,
+    0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42,
+    0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e,
+    0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48,
+    0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49,
+    0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48,
+    0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a,
+    0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43,
+    0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e,
+    0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44,
+    0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55,
+    0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47,
+    0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b,
+    0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48,
+    0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a,
+    0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49,
+    0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43,
+    0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f,
+    0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b,
+    0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50,
+    0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b,
+    0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e,
+    0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44,
+    0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48,
+    0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58,
+    0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b,
+    0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62,
+    0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56,
+    0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c,
+    0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53,
+    0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52,
+    0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46,
+    0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47,
+    0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48,
+    0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49,
+    0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52,
+    0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40,
+    0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b,
+    0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56,
+    0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46,
+    0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61,
+    0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47,
+    0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57,
+    0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49,
+    0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f,
+    0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f,
+    0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e,
+    0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b,
+    0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a,
+    0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c,
+    0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38,
+    0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51,
+    0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58,
+    0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53,
+    0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60,
+    0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49,
+    0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53,
+    0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44,
+    0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40,
+    0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47,
+    0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45,
+    0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74,
+    0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41,
+    0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57,
+    0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a,
+    0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c,
+    0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b,
+    0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55,
+    0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50,
+    0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54,
+    0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60,
+    0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f,
+    0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54,
+    0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46,
+    0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48,
+    0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f,
+    0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50,
+    0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f,
+    0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46,
+    0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55,
+    0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43,
+    0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49,
+    0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53,
+    0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45,
+    0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58,
+    0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45,
+    0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c,
+    0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d,
+    0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47,
+    0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a,
+    0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c,
+    0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47,
+    0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59,
+    0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c,
+    0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43,
+    0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e,
+    0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d,
+    0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52,
+    0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58,
+    0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46,
+    0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55,
+    0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f,
+    0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52,
+    0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45,
+    0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e,
+    0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70,
+    0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46,
+    0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50,
+    0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48,
+    0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45,
+    0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a,
+    0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b,
+    0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51,
+    0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49,
+    0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a,
+    0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e,
+    0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f,
+    0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50,
+    0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52,
+    0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48,
+    0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57,
+    0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48,
+    0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43,
+    0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52,
+    0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42,
+    0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47,
+    0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47,
+    0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a,
+    0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54,
+    0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45,
+    0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e,
+    0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43,
+    0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51,
+    0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44,
+    0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50,
+    0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c,
+    0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56,
+    0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c,
+    0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49,
+    0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b,
+    0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d,
+    0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46,
+    0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57,
+    0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f,
+    0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b,
+    0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42,
+    0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41,
+    0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e,
+    0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44,
+    0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64,
+    0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42,
+    0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c,
+    0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51,
+    0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45,
+    0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45,
+    0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48,
+    0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f,
+    0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49,
+    0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51,
+    0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51,
+    0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44,
+    0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49,
+    0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45,
+    0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46,
+    0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a,
+    0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48,
+    0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50,
+    0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50,
+    0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46,
+    0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f,
+    0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41,
+    0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d,
+    0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51,
+    0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49,
+    0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a,
+    0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45,
+    0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e,
+    0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51,
+    0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56,
+    0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49,
+    0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d,
+    0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57,
+    0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a,
+    0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d,
+    0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a,
+    0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c,
+    0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50,
+    0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50,
+    0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a,
+    0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b,
+    0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e,
+    0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47,
+    0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b,
+    0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e,
+    0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48,
+    0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53,
+    0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a,
+    0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52,
+    0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49,
+    0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43,
+    0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40,
+    0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f,
+    0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a,
+    0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47,
+    0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d,
+    0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52,
+    0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52,
+    0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51,
+    0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60,
+    0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47,
+    0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59,
+    0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d,
+    0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47,
+    0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d,
+    0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39,
+    0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46,
+    0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b,
+    0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c,
+    0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47,
+    0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44,
+    0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42,
+    0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54,
+    0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53,
+    0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56,
+    0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a,
+    0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a,
+    0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54,
+    0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48,
+    0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44,
+    0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45,
+    0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47,
+    0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b,
+    0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49,
+    0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51,
+    0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50,
+    0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51,
+    0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a,
+    0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a,
+    0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48,
+    0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57,
+    0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e,
+    0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48,
+    0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41,
+    0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51,
+    0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38,
+    0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42,
+    0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c,
+    0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a,
+    0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43,
+    0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48,
+    0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a,
+    0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47,
+    0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a,
+    0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44,
+    0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50,
+    0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56,
+    0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46,
+    0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b,
+    0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a,
+    0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51,
+    0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d,
+    0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41,
+    0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b,
+    0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e,
+    0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e,
+    0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52,
+    0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56,
+    0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a,
+    0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a,
+    0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e,
+    0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e,
+    0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49,
+    0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47,
+    0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54,
+    0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c,
+    0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42,
+    0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38,
+    0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50,
+    0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51,
+    0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e,
+    0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55,
+    0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e,
+    0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49,
+    0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44,
+    0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54,
+    0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f,
+    0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44,
+    0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47,
+    0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e,
+    0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d,
+    0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a,
+    0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48,
+    0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44,
+    0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43,
+    0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b,
+    0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d,
+    0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51,
+    0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49,
+    0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a,
+    0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f,
+    0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d,
+    0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49,
+    0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37,
+    0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a,
+    0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48,
+    0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42,
+    0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39,
+    0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45,
+    0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45,
+    0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e,
+    0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46,
+    0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45,
+    0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a,
+    0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58,
+    0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d,
+    0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b,
+    0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f,
+    0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f,
+    0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d,
+    0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50,
+    0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42,
+    0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a,
+    0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44,
+    0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52,
+    0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47,
+    0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42,
+    0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44,
+    0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45,
+    0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a,
+    0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d,
+    0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36,
+    0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50,
+    0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39,
+    0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b,
+    0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40,
+    0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42,
+    0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43,
+    0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b,
+    0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44,
+    0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e,
+    0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b,
+    0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43,
+    0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43,
+    0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f,
+    0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41,
+    0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42,
+    0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43,
+    0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f,
+    0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a,
+    0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41,
+    0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45,
+    0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a,
+    0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42,
+    0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46,
+    0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b,
+    0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b,
+    0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c,
+    0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49,
+    0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a,
+    0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46,
+    0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d,
+    0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51,
+    0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b,
+    0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45,
+    0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43,
+    0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53,
+    0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d,
+    0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b,
+    0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44,
+    0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a,
+    0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51,
+    0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c,
+    0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59,
+    0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46,
+    0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d,
+    0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a,
+    0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b,
+    0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54,
+    0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e,
+    0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42,
+    0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a,
+    0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d,
+    0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47,
+    0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42,
+    0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37,
+    0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66,
+    0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57,
+    0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d,
+    0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43,
+    0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50,
+    0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26,
+    0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48,
+    0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33,
+    0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b,
+    0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31,
+    0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43,
+    0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d,
+    0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d,
+    0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45,
+    0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46,
+    0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62,
+    0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42,
+    0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46,
+    0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47,
+    0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a,
+    0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44,
+    0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b,
+    0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52,
+    0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36,
+    0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56,
+    0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38,
+    0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45,
+    0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c,
+    0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e,
+    0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51,
+    0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58,
+    0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e,
+    0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b,
+    0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f,
+    0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f,
+    0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34,
+    0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e,
+    0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35,
+    0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48,
+    0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32,
+    0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44,
+    0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b,
+    0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49,
+    0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e,
+    0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63,
+    0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59,
+    0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a,
+    0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49,
+    0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c,
+    0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46,
+    0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a,
+    0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43,
+    0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50,
+    0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40,
+    0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a,
+    0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d,
+    0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43,
+    0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a,
+    0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c,
+    0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63,
+    0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41,
+    0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46,
+    0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46,
+    0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42,
+    0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49,
+    0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47,
+    0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48,
+    0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45,
+    0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47,
+    0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d,
+    0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b,
+    0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b,
+    0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40,
+    0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52,
+    0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52,
+    0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d,
+    0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c,
+    0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43,
+    0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49,
+    0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b,
+    0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c,
+    0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50,
+    0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45,
+    0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f,
+    0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42,
+    0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43,
+    0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46,
+    0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30,
+    0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62,
+    0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56,
+    0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c,
+    0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54,
+    0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48,
+    0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59,
+    0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46,
+    0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57,
+    0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47,
+    0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46,
+    0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50,
+    0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42,
+    0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d,
+    0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b,
+    0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54,
+    0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62,
+    0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51,
+    0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40,
+    0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47,
+    0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e,
+    0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46,
+    0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64,
+    0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f,
+    0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c,
+    0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a,
+    0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47,
+    0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49,
+    0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d,
+    0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43,
+    0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55,
+    0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51,
+    0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64,
+    0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46,
+    0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44,
+    0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50,
+    0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56,
+    0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e,
+    0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c,
+    0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52,
+    0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b,
+    0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c,
+    0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f,
+    0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c,
+    0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a,
+    0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a,
+    0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e,
+    0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43,
+    0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55,
+    0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48,
+    0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d,
+    0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41,
+    0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f,
+    0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46,
+    0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f,
+    0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47,
+    0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36,
+    0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40,
+    0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f,
+    0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b,
+    0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67,
+    0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44,
+    0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c,
+    0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42,
+    0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c,
+    0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43,
+    0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69,
+    0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48,
+    0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60,
+    0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d,
+    0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39,
+    0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f,
+    0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46,
+    0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47,
+    0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50,
+    0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58,
+    0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58,
+    0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b,
+    0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b,
+    0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d,
+    0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d,
+    0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47,
+    0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62,
+    0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c,
+    0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51,
+    0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f,
+    0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47,
+    0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a,
+    0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30,
+    0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f,
+    0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61,
+    0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d,
+    0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50,
+    0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e,
+    0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64,
+    0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b,
+    0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67,
+    0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43,
+    0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53,
+    0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48,
+    0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34,
+    0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47,
+    0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59,
+    0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47,
+    0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d,
+    0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44,
+    0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40,
+    0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b,
+    0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52,
+    0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d,
+    0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74,
+    0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e,
+    0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56,
+    0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40,
+    0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e,
+    0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42,
+    0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47,
+    0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c,
+    0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52,
+    0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51,
+    0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57,
+    0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44,
+    0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51,
+    0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d,
+    0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66,
+    0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f,
+    0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66,
+    0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54,
+    0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55,
+    0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52,
+    0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a,
+    0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e,
+    0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d,
+    0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60,
+    0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e,
+    0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49,
+    0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42,
+    0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40,
+    0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e,
+    0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50,
+    0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72,
+    0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46,
+    0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59,
+    0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46,
+    0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32,
+    0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c,
+    0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59,
+    0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e,
+    0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54,
+    0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42,
+    0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f,
+    0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f,
+    0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e,
+    0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48,
+    0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71,
+    0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b,
+    0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65,
+    0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55,
+    0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e,
+    0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44,
+    0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d,
+    0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f,
+    0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c,
+    0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a,
+    0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51,
+    0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b,
+    0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52,
+    0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c,
+    0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e,
+    0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44,
+    0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69,
+    0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c,
+    0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53,
+    0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46,
+    0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48,
+    0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47,
+    0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23,
+    0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f,
+    0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49,
+    0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43,
+    0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46,
+    0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41,
+    0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60,
+    0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55,
+    0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71,
+    0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d,
+    0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58,
+    0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49,
+    0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45,
+    0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43,
+    0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b,
+    0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e,
+    0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c,
+    0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56,
+    0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b,
+    0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45,
+    0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f,
+    0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55,
+    0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75,
+    0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52,
+    0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63,
+    0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40,
+    0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39,
+    0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e,
+    0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42,
+    0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c,
+    0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41,
+    0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a,
+    0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a,
+    0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45,
+    0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e,
+    0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46,
+    0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66,
+    0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c,
+    0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61,
+    0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47,
+    0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42,
+    0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e,
+    0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f,
+    0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45,
+    0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b,
+    0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51,
+    0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40,
+    0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d,
+    0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e,
+    0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48,
+    0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a,
+    0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48,
+    0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b,
+    0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e,
+    0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53,
+    0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48,
+    0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47,
+    0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a,
+    0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41,
+    0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49,
+    0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40,
+    0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51,
+    0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c,
+    0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45,
+    0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a,
+    0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54,
+    0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d,
+    0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53,
+    0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51,
+    0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47,
+    0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39,
+    0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46,
+    0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48,
+    0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34,
+    0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40,
+    0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48,
+    0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46,
+    0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52,
+    0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44,
+    0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52,
+    0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f,
+    0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e,
+    0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b,
+    0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49,
+    0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f,
+    0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56,
+    0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47,
+    0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47,
+    0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45,
+    0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46,
+    0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40,
+    0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47,
+    0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e,
+    0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a,
+    0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b,
+    0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f,
+    0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b,
+    0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f,
+    0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50,
+    0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47,
+    0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c,
+    0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43,
+    0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48,
+    0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48,
+    0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e,
+    0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46,
+    0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54,
+    0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46,
+    0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b,
+    0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e,
+    0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53,
+    0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41,
+    0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a,
+    0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49,
+    0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58,
+    0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49,
+    0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46,
+    0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30,
+    0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f,
+    0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b,
+    0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48,
+    0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46,
+    0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49,
+    0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43,
+    0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d,
+    0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a,
+    0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f,
+    0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c,
+    0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59,
+    0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45,
+    0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48,
+    0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49,
+    0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a,
+    0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e,
+    0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b,
+    0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43,
+    0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a,
+    0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41,
+    0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a,
+    0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43,
+    0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51,
+    0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e,
+    0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c,
+    0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47,
+    0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e,
+    0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d,
+    0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d,
+    0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47,
+    0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d,
+    0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46,
+    0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a,
+    0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40,
+    0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c,
+    0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e,
+    0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48,
+    0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f,
+    0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c,
+    0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51,
+    0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54,
+    0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44,
+    0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44,
+    0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e,
+    0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48,
+    0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c,
+    0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c,
+    0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c,
+    0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c,
+    0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f,
+    0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38,
+    0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f,
+    0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43,
+    0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46,
+    0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f,
+    0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e,
+    0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44,
+    0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c,
+    0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52,
+    0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f,
+    0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41,
+    0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41,
+    0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a,
+    0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48,
+    0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30,
+    0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46,
+    0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46,
+    0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49,
+    0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d,
+    0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41,
+    0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63,
+    0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42,
+    0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e,
+    0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48,
+    0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28,
+    0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37,
+    0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58,
+    0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45,
+    0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34,
+    0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46,
+    0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35,
+    0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44,
+    0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43,
+    0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d,
+    0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55,
+    0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43,
+    0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47,
+    0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e,
+    0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48,
+    0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a,
+    0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c,
+    0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48,
+    0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44,
+    0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54,
+    0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36,
+    0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50,
+    0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e,
+    0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a,
+    0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51,
+    0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d,
+    0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e,
+    0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52,
+    0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58,
+    0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47,
+    0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a,
+    0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45,
+    0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53,
+    0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44,
+    0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30,
+    0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51,
+    0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38,
+    0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c,
+    0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43,
+    0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42,
+    0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d,
+    0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45,
+    0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55,
+    0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a,
+    0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d,
+    0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41,
+    0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e,
+    0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40,
+    0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34,
+    0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d,
+    0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35,
+    0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45,
+    0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39,
+    0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e,
+    0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56,
+    0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43,
+    0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c,
+    0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d,
+    0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c,
+    0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41,
+    0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40,
+    0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e,
+    0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47,
+    0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49,
+    0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b,
+    0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a,
+    0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31,
+    0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c,
+    0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b,
+    0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41,
+    0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e,
+    0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50,
+    0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c,
+    0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47,
+    0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30,
+    0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43,
+    0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46,
+    0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47,
+    0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e,
+    0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43,
+    0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37,
+    0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c,
+    0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46,
+    0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d,
+    0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55,
+    0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c,
+    0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54,
+    0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47,
+    0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22,
+    0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a,
+    0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e,
+    0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d,
+    0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e,
+    0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46,
+    0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e,
+    0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50,
+    0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f,
+    0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47,
+    0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a,
+    0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b,
+    0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e,
+    0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c,
+    0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a,
+    0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39,
+    0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e,
+    0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50,
+    0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e,
+    0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48,
+    0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f,
+    0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41,
+    0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37,
+    0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47,
+    0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47,
+    0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49,
+    0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45,
+    0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c,
+    0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61,
+    0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e,
+    0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a,
+    0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45,
+    0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47,
+    0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43,
+    0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f,
+    0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c,
+    0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c,
+    0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42,
+    0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e,
+    0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45,
+    0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57,
+    0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e,
+    0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54,
+    0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b,
+    0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b,
+    0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41,
+    0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f,
+    0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40,
+    0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38,
+    0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40,
+    0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a,
+    0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45,
+    0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a,
+    0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f,
+    0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57,
+    0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47,
+    0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e,
+    0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47,
+    0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b,
+    0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45,
+    0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41,
+    0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49,
+    0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44,
+    0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46,
+    0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37,
+    0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47,
+    0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35,
+    0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48,
+    0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43,
+    0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e,
+    0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d,
+    0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a,
+    0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57,
+    0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40,
+    0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38,
+    0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49,
+    0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42,
+    0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f,
+    0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23,
+    0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e,
+    0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37,
+    0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c,
+    0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45,
+    0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50,
+    0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c,
+    0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b,
+    0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d,
+    0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51,
+    0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41,
+    0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e,
+    0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b,
+    0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d,
+    0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37,
+    0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d,
+    0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34,
+    0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49,
+    0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b,
+    0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45,
+    0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54,
+    0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46,
+    0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c,
+    0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c,
+    0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b,
+    0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42,
+    0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42,
+    0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f,
+    0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c,
+    0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f,
+    0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30,
+    0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50,
+    0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45,
+    0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a,
+    0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a,
+    0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49,
+    0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49,
+    0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42,
+    0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a,
+    0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47,
+    0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42,
+    0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55,
+    0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f,
+    0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47,
+    0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30,
+    0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53,
+    0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d,
+    0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f,
+    0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44,
+    0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52,
+    0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58,
+    0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e,
+    0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44,
+    0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51,
+    0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e,
+    0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40,
+    0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54,
+    0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48,
+    0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f,
+    0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b,
+    0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43,
+    0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44,
+    0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40,
+    0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f,
+    0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50,
+    0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48,
+    0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46,
+    0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46,
+    0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44,
+    0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a,
+    0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46,
+    0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e,
+    0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b,
+    0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41,
+    0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a,
+    0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f,
+    0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f,
+    0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53,
+    0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e,
+    0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45,
+    0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42,
+    0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47,
+    0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
+    0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,
+    0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00,
+    0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00,
+    0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76,
+    0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,
+    0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff,
+    0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
+    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f,
+    0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75,
+    0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61,
+    0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a,
+    0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00,
+    0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,
+    0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3,
+    0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,
+    0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00,
+    0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
+    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
+    0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e,
+    0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56,
+    0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73,
+    0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd,
+    0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,
+    0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,
+    0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
+const int g_tiny_conv_model_data_len = 19800;
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
new file mode 100644
index 0000000000..2953cc852d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h
@@ -0,0 +1,27 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is a standard TensorFlow Lite model file that has been converted into a
+// C data array, so it can be easily compiled into a binary for devices that
+// don't have a file system. It was created using the command:
+// xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
+
+extern const unsigned char g_tiny_conv_model_data[];
+extern const int g_tiny_conv_model_data_len;
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_TINY_CONV_MODEL_DATA_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/BUILD b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD
new file mode 100644
index 0000000000..a012f950e6
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/BUILD
@@ -0,0 +1,107 @@
+package(default_visibility = [
+    "//visibility:public",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts")
+load(
+    "//tensorflow/contrib/lite/experimental/micro/testing:micro_test.bzl",
+    "tflite_micro_cc_test",
+)
+
+cc_library(
+    name = "micro_ops",
+    srcs = [
+        "depthwise_conv.cc",
+        "fully_connected.cc",
+        "softmax.cc",
+    ],
+    hdrs = [
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/kernels:kernel_util",
+        "//tensorflow/contrib/lite/kernels:op_macros",
+        "//tensorflow/contrib/lite/kernels:padding",
+        "//tensorflow/contrib/lite/kernels/internal:quantization_util",
+        "//tensorflow/contrib/lite/kernels/internal:reference_base",
+        "//tensorflow/contrib/lite/kernels/internal:tensor",
+    ],
+)
+
+cc_library(
+    name = "all_ops_resolver",
+    srcs = [
+        "all_ops_resolver.cc",
+    ],
+    hdrs = [
+        "all_ops_resolver.h",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        ":micro_ops",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+    ],
+)
+
+cc_library(
+    name = "test_utils",
+    srcs = [
+    ],
+    hdrs = [
+        "test_utils.h",
+    ],
+    copts = tflite_copts(),
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "depthwise_conv_test",
+    srcs = [
+        "depthwise_conv_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "fully_connected_test",
+    srcs = [
+        "fully_connected_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "softmax_test",
+    srcs = [
+        "softmax_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        ":test_utils",
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+        "//tensorflow/contrib/lite/experimental/micro/testing:micro_test",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
new file mode 100644
index 0000000000..bd0a37badb
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+
+TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
+TfLiteRegistration* Micro_Register_DEPTHWISE_CONV_2D() {
+  return Register_DEPTHWISE_CONV_2D();
+}
+
+TfLiteRegistration* Register_FULLY_CONNECTED();
+TfLiteRegistration* Micro_Register_FULLY_CONNECTED() {
+  return Register_FULLY_CONNECTED();
+}
+
+TfLiteRegistration* Register_SOFTMAX();
+TfLiteRegistration* Micro_Register_SOFTMAX() { return Register_SOFTMAX(); }
+
+AllOpsResolver::AllOpsResolver() {
+  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
+             Micro_Register_DEPTHWISE_CONV_2D());
+  AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Micro_Register_FULLY_CONNECTED(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SOFTMAX, Micro_Register_SOFTMAX());
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
new file mode 100644
index 0000000000..f836064a3f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
+
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+
+class AllOpsResolver : public MicroMutableOpResolver {
+ public:
+  AllOpsResolver();
+
+ private:
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
new file mode 100644
index 0000000000..4f17263181
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv.cc
@@ -0,0 +1,208 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/padding.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace depthwise_conv {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kFilterTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  TfLitePaddingValues padding;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
+                             TfLiteDepthwiseConvParams* params, int width,
+                             int height, int filter_width, int filter_height,
+                             int out_width, int out_height,
+                             const TfLiteType data_type, OpData* data) {
+  data->padding.height = ComputePadding(params->stride_height, 1, height,
+                                        filter_height, out_height);
+  data->padding.width =
+      ComputePadding(params->stride_width, 1, width, filter_width, out_width);
+
+  // Note that quantized inference requires that all tensors have their
+  // parameters set. This is usually done during quantized training.
+  if (data_type != kTfLiteFloat32) {
+    const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    const TfLiteTensor* bias =
+        GetOptionalInputTensor(context, node, kBiasTensor);
+    TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+    double real_multiplier = 0.0;
+    TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+        context, input, filter, bias, output, &real_multiplier));
+    int exponent;
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
+    data->output_shift = -exponent;
+    CalculateActivationRangeUint8(params->activation, output,
+                                  &data->output_activation_min,
+                                  &data->output_activation_max);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+void EvalFloat(TfLiteContext* context, TfLiteNode* node,
+               TfLiteDepthwiseConvParams* params, OpData* data,
+               const TfLiteTensor* input, const TfLiteTensor* filter,
+               const TfLiteTensor* bias, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(filter), GetTensorData<float>(filter),
+      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
+      GetTensorData<float>(output));
+}
+
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                   TfLiteDepthwiseConvParams* params, OpData* data,
+                   const TfLiteTensor* input, const TfLiteTensor* filter,
+                   const TfLiteTensor* bias, TfLiteTensor* output) {
+  const int32_t input_offset = -input->params.zero_point;
+  const int32_t filter_offset = -filter->params.zero_point;
+  const int32_t output_offset = output->params.zero_point;
+
+  tflite::DepthwiseParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.depth_multiplier = params->depth_multiplier;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = -data->output_shift;
+
+  tflite::reference_ops::DepthwiseConv(
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+      GetTensorShape(filter), GetTensorData<uint8_t>(filter),
+      GetTensorShape(bias), GetTensorData<int32_t>(bias),
+      GetTensorShape(output), GetTensorData<uint8_t>(output));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  const TfLiteTensor* bias =
+      (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
+
+  const TfLiteType data_type = input->type;
+  int width = SizeOfDimension(input, 2);
+  int height = SizeOfDimension(input, 1);
+  int filter_width = SizeOfDimension(filter, 2);
+  int filter_height = SizeOfDimension(filter, 1);
+  int out_width = ComputeOutSize(params->padding, width, filter_width,
+                                 params->stride_width);
+  int out_height = ComputeOutSize(params->padding, height, filter_height,
+                                  params->stride_height);
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
+                                        filter_width, filter_height, out_width,
+                                        out_height, data_type, data));
+
+  // TODO(aselle): Consider whether float conv and quantized conv should be
+  // separate ops to avoid dispatch overhead here.
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      EvalFloat(context, node, params, data, input, filter, bias, output);
+      break;
+    case kTfLiteUInt8:
+      EvalQuantized(context, node, params, data, input, filter, bias, output);
+      break;
+    default:
+      context->ReportError(context, "Type %d not currently supported.",
+                           input->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace depthwise_conv
+
+TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
+  static TfLiteRegistration r = {depthwise_conv::Init, depthwise_conv::Free,
+                                 depthwise_conv::Prepare, depthwise_conv::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
new file mode 100644
index 0000000000..169899c471
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test.cc
@@ -0,0 +1,406 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestDepthwiseConvFloat(std::initializer_list<int> input_dims_data,
+                            std::initializer_list<float> input_data,
+                            std::initializer_list<int> filter_dims_data,
+                            std::initializer_list<float> filter_data,
+                            std::initializer_list<int> bias_dims_data,
+                            std::initializer_list<float> bias_data,
+                            std::initializer_list<float> expected_output_data,
+                            std::initializer_list<int> output_dims_data,
+                            TfLiteFusedActivation activation,
+                            float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(filter_data, filter_dims, "filter_tensor"),
+      CreateFloatTensor(bias_data, bias_dims, "bias_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  int input_depth = input_dims->data[3];
+  int output_depth = filter_dims->data[3];
+  int depth_mul = output_depth / input_depth;
+  TfLiteDepthwiseConvParams builtin_data = {
+      kTfLitePaddingValid, 1, 1, depth_mul, activation,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestDepthwiseConvQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data, float input_min, float input_max,
+    std::initializer_list<int> filter_dims_data,
+    std::initializer_list<uint8_t> filter_data, float filter_min,
+    float filter_max, std::initializer_list<int> bias_dims_data,
+    std::initializer_list<int32_t> bias_data, float bias_min, float bias_max,
+    std::initializer_list<uint8_t> expected_output_data,
+    std::initializer_list<int> output_dims_data, float output_min,
+    float output_max, TfLiteFusedActivation activation, uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* filter_dims = IntArrayFromInitializer(filter_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(filter_data, filter_dims, "filter_tensor",
+                            filter_min, filter_max),
+      CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min,
+                              bias_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  int input_depth = input_dims->data[3];
+  int output_depth = filter_dims->data[3];
+  int depth_mul = output_depth / input_depth;
+  TfLiteDepthwiseConvParams builtin_data = {
+      kTfLitePaddingValid, 1, 1, depth_mul, activation,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 8;
+  float output_data[output_dims_count];
+  tflite::testing::TestDepthwiseConvFloat(  //
+      {4, 1, 3, 2, 2},                      // Input shape.
+      {
+          1, 2, 7, 8,    // Input values.
+          3, 4, 9, 10,   //
+          5, 6, 11, 12,  //
+      },
+      {4, 1, 2, 2, 4},  // Filters shape.
+      {
+          1, 2, 3, 4,        // Filters values.
+          -9, 10, -11, 12,   //
+          5, 6, 7, 8,        //
+          13, -14, 15, -16,  //
+      },
+      {1, 4},  // Bias shape.
+      {
+          1, 2, 3, 4,  // Bias values.
+      },
+      {
+          71, -34, 99, -20,  // Expected results.
+          91, -26, 127, -4,  //
+      },
+      {4, 1, 2, 1, 4},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float filter_min = -63.5f;
+  const float filter_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 8;
+  uint8_t output_data[output_dims_count];
+
+  tflite::testing::TestDepthwiseConvQuantized(  //
+      {4, 1, 3, 2, 2},                          // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),
+          F2Q(2, input_min, input_max),
+          F2Q(7, input_min, input_max),
+          F2Q(8, input_min, input_max),
+          F2Q(3, input_min, input_max),
+          F2Q(4, input_min, input_max),
+          F2Q(9, input_min, input_max),
+          F2Q(10, input_min, input_max),
+          F2Q(5, input_min, input_max),
+          F2Q(6, input_min, input_max),
+          F2Q(11, input_min, input_max),
+          F2Q(12, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {4, 1, 2, 2, 4},       // Filter shape.
+      {
+          // Filter values.
+          F2Q(1, filter_min, filter_max),
+          F2Q(2, filter_min, filter_max),
+          F2Q(3, filter_min, filter_max),
+          F2Q(4, filter_min, filter_max),
+          F2Q(-9, filter_min, filter_max),
+          F2Q(10, filter_min, filter_max),
+          F2Q(-11, filter_min, filter_max),
+          F2Q(12, filter_min, filter_max),
+          F2Q(5, filter_min, filter_max),
+          F2Q(6, filter_min, filter_max),
+          F2Q(7, filter_min, filter_max),
+          F2Q(8, filter_min, filter_max),
+          F2Q(13, filter_min, filter_max),
+          F2Q(-14, filter_min, filter_max),
+          F2Q(15, filter_min, filter_max),
+          F2Q(-16, filter_min, filter_max),
+      },
+      filter_min, filter_max,  // Filter quantization range.
+      {1, 4},                  // Bias shape.
+      {
+          // Bias values.
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+          F2Q32(4, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(71, output_min, output_max),
+          F2Q(-34, output_min, output_max),
+          F2Q(99, output_min, output_max),
+          F2Q(-20, output_min, output_max),
+          F2Q(91, output_min, output_max),
+          F2Q(-26, output_min, output_max),
+          F2Q(127, output_min, output_max),
+          F2Q(-4, output_min, output_max),
+      },
+      {4, 1, 2, 1, 4},         // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestRelu) {
+  const int output_dims_count = 8;
+  float output_data[output_dims_count];
+  tflite::testing::TestDepthwiseConvFloat(  //
+      {4, 1, 3, 2, 2},                      // Input shape.
+      {
+          1, 2, 7, 8,    // Input values.
+          3, 4, 9, 10,   //
+          5, 6, 11, 12,  //
+      },
+      {4, 1, 2, 2, 4},  // Filters shape.
+      {
+          1, 2, 3, 4,        // Filters values.
+          -9, 10, -11, 12,   //
+          5, 6, 7, 8,        //
+          13, -14, 15, -16,  //
+      },
+      {1, 4},  // Bias shape.
+      {
+          1, 2, 3, 4,  // Bias values.
+      },
+      {
+          71, 0, 99, 0,   // Expected results.
+          91, 0, 127, 0,  //
+      },
+      {4, 1, 2, 1, 4},  // Output shape.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestReluQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float filter_min = -63.5f;
+  const float filter_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 8;
+  uint8_t output_data[output_dims_count];
+
+  tflite::testing::TestDepthwiseConvQuantized(  //
+      {4, 1, 3, 2, 2},                          // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),
+          F2Q(2, input_min, input_max),
+          F2Q(7, input_min, input_max),
+          F2Q(8, input_min, input_max),
+          F2Q(3, input_min, input_max),
+          F2Q(4, input_min, input_max),
+          F2Q(9, input_min, input_max),
+          F2Q(10, input_min, input_max),
+          F2Q(5, input_min, input_max),
+          F2Q(6, input_min, input_max),
+          F2Q(11, input_min, input_max),
+          F2Q(12, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {4, 1, 2, 2, 4},       // Filter shape.
+      {
+          // Filter values.
+          F2Q(1, filter_min, filter_max),
+          F2Q(2, filter_min, filter_max),
+          F2Q(3, filter_min, filter_max),
+          F2Q(4, filter_min, filter_max),
+          F2Q(-9, filter_min, filter_max),
+          F2Q(10, filter_min, filter_max),
+          F2Q(-11, filter_min, filter_max),
+          F2Q(12, filter_min, filter_max),
+          F2Q(5, filter_min, filter_max),
+          F2Q(6, filter_min, filter_max),
+          F2Q(7, filter_min, filter_max),
+          F2Q(8, filter_min, filter_max),
+          F2Q(13, filter_min, filter_max),
+          F2Q(-14, filter_min, filter_max),
+          F2Q(15, filter_min, filter_max),
+          F2Q(-16, filter_min, filter_max),
+      },
+      filter_min, filter_max,  // Filter quantization range.
+      {1, 4},                  // Bias shape.
+      {
+          // Bias values.
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+          F2Q32(4, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(71, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(99, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(91, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(127, output_min, output_max),
+          F2Q(0, output_min, output_max),
+      },
+      {4, 1, 2, 1, 4},         // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
new file mode 100644
index 0000000000..1e9e54cafb
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace fully_connected {
+namespace {
+
+struct OpData {
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  // The index of the temporary tensor where the quantized inputs are cached.
+  int input_quantized_index;
+};
+
+constexpr int kInputTensor = 0;
+constexpr int kWeightsTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context,
+                             TfLiteFullyConnectedParams* params,
+                             TfLiteType data_type, const TfLiteTensor* input,
+                             const TfLiteTensor* filter,
+                             const TfLiteTensor* bias, TfLiteTensor* output,
+                             OpData* data) {
+  TfLiteStatus status = kTfLiteOk;
+  if (data_type != kTfLiteFloat32) {
+    double real_multiplier = 0.0;
+    TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+        context, input, filter, bias, output, &real_multiplier));
+    int exponent;
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
+    data->output_shift = -exponent;
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+  }
+  return status;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                           TfLiteFullyConnectedParams* params, OpData* data,
+                           const TfLiteTensor* input,
+                           const TfLiteTensor* filter, const TfLiteTensor* bias,
+                           TfLiteTensor* output) {
+  const int32_t input_offset = -input->params.zero_point;
+  const int32_t filter_offset = -filter->params.zero_point;
+  const int32_t output_offset = output->params.zero_point;
+
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
+  op_params.output_shift = -data->output_shift;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+
+#define TF_LITE_FULLY_CONNECTED(output_data_type)                      \
+  reference_ops::FullyConnected(                                       \
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
+      GetTensorShape(filter), GetTensorData<uint8_t>(filter),          \
+      GetTensorShape(bias), GetTensorData<int32_t>(bias),              \
+      GetTensorShape(output), GetTensorData<output_data_type>(output), \
+      nullptr)
+  switch (output->type) {
+    case kTfLiteUInt8:
+      TF_LITE_FULLY_CONNECTED(uint8_t);
+      break;
+    case kTfLiteInt16:
+      TF_LITE_FULLY_CONNECTED(int16_t);
+      break;
+    default:
+      context->ReportError(
+          context,
+          "Quantized FullyConnected expects output data type uint8 or int16");
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteFullyConnectedParams* params, OpData* data,
+                       const TfLiteTensor* input, const TfLiteTensor* filter,
+                       const TfLiteTensor* bias, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  tflite::reference_ops::FullyConnected(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(filter), GetTensorData<float>(filter),
+      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
+      GetTensorData<float>(output));
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TfLiteType data_type = input->type;
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input,
+                                        filter, bias, output, data));
+
+  switch (filter->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      return EvalFloat(context, node, params, data, input, filter, bias,
+                       output);
+    case kTfLiteUInt8:
+      return EvalQuantized(context, node, params, data, input, filter, bias,
+                           output);
+
+    default:
+      context->ReportError(context, "Type %d not currently supported.",
+                           filter->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace fully_connected
+
+TfLiteRegistration* Register_FULLY_CONNECTED() {
+  static TfLiteRegistration r = {fully_connected::Init, fully_connected::Free,
+                                 fully_connected::Prepare,
+                                 fully_connected::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
new file mode 100644
index 0000000000..b42bf4c3bc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/fully_connected_test.cc
@@ -0,0 +1,643 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestFullyConnectedFloat(std::initializer_list<int> input_dims_data,
+                             std::initializer_list<float> input_data,
+                             std::initializer_list<int> weights_dims_data,
+                             std::initializer_list<float> weights_data,
+                             std::initializer_list<int> bias_dims_data,
+                             std::initializer_list<float> bias_data,
+                             std::initializer_list<float> expected_output_data,
+                             std::initializer_list<int> output_dims_data,
+                             TfLiteFusedActivation activation,
+                             float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(weights_data, weights_dims, "weights_tensor"),
+      CreateFloatTensor(bias_data, bias_dims, "bias_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteFullyConnectedParams builtin_data = {
+      activation,
+      kTfLiteFullyConnectedWeightsFormatDefault,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestFullyConnectedQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data, float input_min, float input_max,
+    std::initializer_list<int> weights_dims_data,
+    std::initializer_list<uint8_t> weights_data, float weights_min,
+    float weights_max, std::initializer_list<int> bias_dims_data,
+    std::initializer_list<int32_t> bias_data, float bias_min, float bias_max,
+    std::initializer_list<uint8_t> expected_output_data,
+    std::initializer_list<int> output_dims_data, float output_min,
+    float output_max, TfLiteFusedActivation activation, uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* weights_dims = IntArrayFromInitializer(weights_dims_data);
+  TfLiteIntArray* bias_dims = IntArrayFromInitializer(bias_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 3;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(weights_data, weights_dims, "weights_tensor",
+                            weights_min, weights_max),
+      CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_min,
+                              bias_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteFullyConnectedParams builtin_data = {
+      activation,
+      kTfLiteFullyConnectedWeightsFormatDefault,
+  };
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {3, 0, 1, 2};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 3};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 10},                            // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, 2, 3,  // Bias values.
+      },
+      {
+          24, 25, 26, 58, 59, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest2) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 2},                             // Input shape.
+      {
+          1, 2,  // b = 0
+          2, 1,  // b = 1
+      },
+      {2, 1, 2},  // Weights shape.
+      {
+          2, 4,  // u = 0
+      },
+      {1, 1},  // Bias shape.
+      {
+          1,  // Bias values.
+      },
+      {
+          11, 9,  // Expected results.
+      },
+      {2, 2, 1},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestRelu) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {2, 2, 10},                            // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1,  2,  3,  4,  5,  6,  7,  8,  9,  10,   // u = 0
+          -1, -2, -3, -4, -5, -6, -7, -8, -9, -10,  // u = 1
+          1,  2,  3,  4,  5,  6,  7,  8,  9,  10,   // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, -2, 3,  // Bias values.
+      },
+      {
+          24, 0, 26, 58, 0, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedRelu) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max),  F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max),  F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max),  F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max),  F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max),  F2Q(10, weights_min, weights_max),
+          F2Q(-1, weights_min, weights_max), F2Q(-2, weights_min, weights_max),
+          F2Q(-3, weights_min, weights_max), F2Q(-4, weights_min, weights_max),
+          F2Q(-5, weights_min, weights_max), F2Q(-6, weights_min, weights_max),
+          F2Q(-7, weights_min, weights_max), F2Q(-8, weights_min, weights_max),
+          F2Q(-9, weights_min, weights_max), F2Q(-10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max),  F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max),  F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max),  F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max),  F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max),  F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(0, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(0, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActRelu, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedOutputMultiplierGreaterThan1) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -127.0f;
+  const float input_max = 128.0f;
+  const float weights_min = -127.0f;
+  const float weights_max = 128.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 256.0f * (1 << 24);
+  const float output_min = -63.5f;
+  const float output_max = 64.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {2, 2, 10},                                // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInput) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedFloat(  //
+      {4, 1, 1, 5, 1},                       // Input shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+          1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+      },
+      {2, 3, 10},  // Weights shape.
+      {
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+          1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+      },
+      {1, 3},  // Bias shape.
+      {
+          1, 2, 3,  // Bias values.
+      },
+      {
+          24, 25, 26, 58, 59, 60,  // Expected results.
+      },
+      {2, 2, 3},  // Output shape.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInputQuantized) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float weights_min = -63.5f;
+  const float weights_max = 64.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 64.0f * (1 << 24);
+  const float output_min = -127.0f;
+  const float output_max = 128.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {4, 1, 1, 5, 1},                           // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedOutputMultiplierGreaterThan1) {
+  using tflite::testing::F2Q;
+  using tflite::testing::F2Q32;
+
+  const float input_min = -127.0f;
+  const float input_max = 128.0f;
+  const float weights_min = -127.0f;
+  const float weights_max = 128.0f;
+  const float bias_min = 0.0f;
+  const float bias_max = 256.0f * (1 << 24);
+  const float output_min = -63.5f;
+  const float output_max = 64.0f;
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestFullyConnectedQuantized(  //
+      {4, 1, 1, 5, 1},                           // Input shape.
+      {
+          // Input values.
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(8, input_min, input_max),
+          F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max),
+          F2Q(1, input_min, input_max),  F2Q(2, input_min, input_max),
+          F2Q(3, input_min, input_max),  F2Q(4, input_min, input_max),
+          F2Q(5, input_min, input_max),  F2Q(6, input_min, input_max),
+          F2Q(7, input_min, input_max),  F2Q(-8, input_min, input_max),
+          F2Q(9, input_min, input_max),  F2Q(-10, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantization range.
+      {2, 3, 10},            // Weights shape.
+      {
+          // Weight values.
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+          F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max),
+          F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max),
+          F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max),
+          F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max),
+          F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max),
+      },
+      weights_min, weights_max,  // Weights quantization range.
+      {1, 3},                    // Bias shape.
+      {
+          F2Q32(1, bias_min, bias_max),
+          F2Q32(2, bias_min, bias_max),
+          F2Q32(3, bias_min, bias_max),
+      },
+      bias_min, bias_max,  // Bias quantization range.
+      {
+          // Expected results.
+          F2Q(24, output_min, output_max),
+          F2Q(25, output_min, output_max),
+          F2Q(26, output_min, output_max),
+          F2Q(58, output_min, output_max),
+          F2Q(59, output_min, output_max),
+          F2Q(60, output_min, output_max),
+      },
+      {2, 2, 3},               // Output shape.
+      output_min, output_max,  // Output quantization range.
+      kTfLiteActNone, output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
new file mode 100644
index 0000000000..a4019a067c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax.cc
@@ -0,0 +1,213 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace activations {
+namespace {
+
+struct OpData {
+  int32_t input_multiplier = 0;
+  int input_left_shift = 0;
+  int32_t input_range_radius = 0;
+  int diff_min = 0;
+};
+
+TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
+                                    const TfLiteTensor* input,
+                                    TfLiteTensor* output,
+                                    const TfLiteSoftmaxParams* params,
+                                    OpData* data) {
+  if (input->type == kTfLiteUInt8) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
+
+    static const int kScaledDiffIntegerBits = 5;
+
+    tflite::PreprocessSoftmaxScaling(
+        params->beta, input->params.scale, kScaledDiffIntegerBits,
+        &data->input_multiplier, &data->input_left_shift);
+    data->diff_min = -1.0 * tflite::CalculateInputRadius(
+                                kScaledDiffIntegerBits, data->input_left_shift);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+// Takes a 1D tensor and performs softmax along it.
+void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int input_size = input->dims->data[0];
+  tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
+                                 output->data.f);
+}
+
+// Takes a 2D tensor and perform softmax along the last dimension.
+void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  tflite::reference_ops::Softmax(input->data.f, input_size, batch_size,
+                                 params->beta, output->data.f);
+}
+
+void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
+  // always traverses the last dimension of a 4D tensor, we will pretend our 1D
+  // tensor is 4D in a special way. We will convert a (Y) shape into a (1,
+  // 1, 1, Y) shape.
+  const int input_size = input->dims->data[0];
+  const int32_t shape_data[4] = {1, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(op_params, shape,
+                                 GetTensorData<uint8_t>(input), shape,
+                                 GetTensorData<uint8_t>(output));
+}
+
+void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
+  // always traverses the last dimension of a 4D tensor, we will pretend our 2D
+  // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X,
+  // 1, 1, Y) shape.
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(op_params, shape,
+                                 GetTensorData<uint8_t>(input), shape,
+                                 GetTensorData<uint8_t>(output));
+}
+
+// Takes a 4D tensor and perform softmax along the forth dimension.
+void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
+  tflite::reference_ops::Softmax(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(output), GetTensorData<float>(output));
+}
+
+void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  tflite::reference_ops::Softmax(
+      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+      GetTensorShape(output), GetTensorData<uint8_t>(output));
+}
+
+TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* output = GetOutput(context, node, 0);
+
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(
+      CalculateSoftmaxOpData(context, input, output, params, data));
+
+  // TODO(ahentz): consider an implementation that works for many (all?)
+  // dimensions.
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    case kTfLiteUInt8: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    default:
+      context->ReportError(
+          context, "Only float32 and uint8_t supported currently, got %d.",
+          input->type);
+      return kTfLiteError;
+  }
+}
+}  // namespace activations
+
+TfLiteRegistration* Register_SOFTMAX() {
+  static TfLiteRegistration r = {activations::Init, activations::Free,
+                                 activations::SoftmaxPrepare,
+                                 activations::SoftmaxEval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
new file mode 100644
index 0000000000..df7d87d623
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
@@ -0,0 +1,220 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+namespace {
+
+void TestSoftmaxFloat(std::initializer_list<int> input_dims_data,
+                      std::initializer_list<float> input_data,
+                      std::initializer_list<float> expected_output_data,
+                      std::initializer_list<int> output_dims_data,
+                      float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 2;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor"),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSoftmaxParams builtin_data = {1.0f};
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {1, 0};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 1};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestSoftmaxQuantized(std::initializer_list<int> input_dims_data,
+                          std::initializer_list<uint8_t> input_data,
+                          float input_min, float input_max,
+                          std::initializer_list<uint8_t> expected_output_data,
+                          std::initializer_list<int> output_dims_data,
+                          float output_min, float output_max,
+                          uint8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 1;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min,
+                            input_max),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SOFTMAX, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSoftmaxParams builtin_data = {1.0f};
+  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  size_t init_data_size = 0;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+
+  int inputs_array_data[] = {1, 0};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 1};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+  int temporaries_array_data[] = {0};
+  TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(SimpleTest) {
+  const int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestSoftmaxFloat(  //
+      {2, 2, 5},                      // Input shape.
+      {
+          1.0, 2.0, 3.0, 4.0, 5.0,       // b = 0
+          -1.0, -2.0, -3.0, -4.0, -5.0,  // b = 0
+      },
+      {
+          // Expected results.
+          0.011656231,
+          0.031684921,
+          0.086128544,
+          0.234121657,
+          0.636408647,
+          0.636408647,
+          0.234121657,
+          0.086128544,
+          0.031684921,
+          0.011656231,
+      },
+      {2, 2, 3},  // Output shape.
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+  using tflite::testing::F2Q;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+  const int output_dims_count = 6;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestSoftmaxQuantized(  //
+      {2, 1, 5},                          // Input shape.
+      {
+          F2Q(1.0, input_min, input_max),
+          F2Q(2.0, input_min, input_max),
+          F2Q(3.0, input_min, input_max),
+          F2Q(4.0, input_min, input_max),
+          F2Q(5.0, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantized range.
+      {
+          // Expected results.
+          F2Q(0.011656231, output_min, output_max),
+          F2Q(0.031684921, output_min, output_max),
+          F2Q(0.086128544, output_min, output_max),
+          F2Q(0.234121657, output_min, output_max),
+          F2Q(0.636408647, output_min, output_max),
+      },
+      {2, 1, 3},               // Output shape.
+      output_min, output_max,  // Output quantized range.
+      output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
new file mode 100644
index 0000000000..789a48ece8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h
@@ -0,0 +1,170 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
+
+#include <cstdarg>
+#include <initializer_list>
+#include <limits>
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/kernels/test_utils.h"
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+
+// How many elements are in the array with this shape.
+inline int ElementCount(const TfLiteIntArray& dims) {
+  int result = 1;
+  for (int i = 0; i < dims.size; ++i) {
+    result *= dims.data[i];
+  }
+  return result;
+}
+
+// Wrapper to forward kernel errors to the interpreter's error reporter.
+inline void ReportOpError(struct TfLiteContext* context, const char* format,
+                          ...) {
+  ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
+  va_list args;
+  va_start(args, format);
+  error_reporter->Report(format, args);
+  va_end(args);
+}
+
+// Derives the quantization scaling factor from a min and max range.
+template <typename T>
+inline float ScaleFromMinMax(const float min, const float max) {
+  return (max - min) / ((std::numeric_limits<T>::max() * 1.0) -
+                        std::numeric_limits<T>::min());
+}
+
+// Derives the quantization zero point from a min and max range.
+template <typename T>
+inline int ZeroPointFromMinMax(const float min, const float max) {
+  return static_cast<int>((-min / ScaleFromMinMax<T>(min, max)) + 0.5f);
+}
+
+// Converts a float value into an unsigned eight-bit quantized value.
+inline uint8_t F2Q(const float value, const float min, const float max) {
+  int32_t result = ZeroPointFromMinMax<uint8_t>(min, max) +
+                   (value / ScaleFromMinMax<uint8_t>(min, max)) + 0.5f;
+  if (result < 0) {
+    result = 0;
+  }
+  if (result > 256) {
+    result = 256;
+  }
+  return result;
+}
+
+// Converts a float value into a signed thirty-two-bit quantized value.
+inline uint8_t F2Q32(const float value, const float min, const float max) {
+  return static_cast<int32_t>((value - ZeroPointFromMinMax<int32_t>(min, max)) /
+                              ScaleFromMinMax<int32_t>(min, max));
+}
+
+inline void PopulateContext(TfLiteTensor* tensors, int tensors_size,
+                            TfLiteContext* context) {
+  context->tensors_size = tensors_size;
+  context->tensors = tensors;
+  context->impl_ = static_cast<void*>(micro_test::reporter);
+  context->GetExecutionPlan = nullptr;
+  context->ResizeTensor = nullptr;
+  context->ReportError = ReportOpError;
+  context->AddTensors = nullptr;
+  context->GetNodeAndRegistration = nullptr;
+  context->ReplaceSubgraphsWithDelegateKernels = nullptr;
+  context->recommended_num_threads = 1;
+  context->GetExternalContext = nullptr;
+  context->SetExternalContext = nullptr;
+}
+
+inline TfLiteIntArray* IntArrayFromInts(const int* int_array) {
+  return const_cast<TfLiteIntArray*>(
+      reinterpret_cast<const TfLiteIntArray*>(int_array));
+}
+
+inline TfLiteIntArray* IntArrayFromInitializer(
+    std::initializer_list<int> int_initializer) {
+  return IntArrayFromInts(int_initializer.begin());
+}
+
+inline TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
+                                      const char* name) {
+  const size_t bytes = ElementCount(*dims) * sizeof(float);
+  return {
+      kTfLiteFloat32, {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           {},
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateFloatTensor(std::initializer_list<float> data,
+                                      TfLiteIntArray* dims, const char* name) {
+  return CreateFloatTensor(data.begin(), dims, name);
+}
+
+inline TfLiteTensor CreateQuantizedTensor(const uint8_t* data,
+                                          TfLiteIntArray* dims,
+                                          const char* name, float min,
+                                          float max) {
+  const size_t bytes = ElementCount(*dims) * sizeof(uint8_t);
+  const TfLiteQuantizationParams q_params = {
+      ScaleFromMinMax<uint8_t>(min, max),
+      ZeroPointFromMinMax<uint8_t>(min, max)};
+  return {
+      kTfLiteUInt8,   {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           q_params,
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateQuantizedTensor(std::initializer_list<uint8_t> data,
+                                          TfLiteIntArray* dims,
+                                          const char* name, float min,
+                                          float max) {
+  return CreateQuantizedTensor(data.begin(), dims, name, min, max);
+}
+
+inline TfLiteTensor CreateQuantized32Tensor(const int32_t* data,
+                                            TfLiteIntArray* dims,
+                                            const char* name, float min,
+                                            float max) {
+  const size_t bytes = ElementCount(*dims) * sizeof(int32_t);
+  const TfLiteQuantizationParams q_params = {
+      ScaleFromMinMax<int32_t>(min, max),
+      ZeroPointFromMinMax<int32_t>(min, max)};
+  return {
+      kTfLiteUInt8,   {const_cast<int*>(reinterpret_cast<const int*>(data))},
+      dims,           q_params,
+      kTfLiteMemNone, bytes,
+      nullptr,        name};
+}
+
+inline TfLiteTensor CreateQuantized32Tensor(std::initializer_list<int32_t> data,
+                                            TfLiteIntArray* dims,
+                                            const char* name, float min,
+                                            float max) {
+  return CreateQuantized32Tensor(data.begin(), dims, name, min, max);
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_KERNELS_TEST_UTILS_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
new file mode 100644
index 0000000000..99dd883661
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.cc
@@ -0,0 +1,78 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+#ifdef TF_LITE_MCU_DEBUG_LOG
+#include <debug_log.h>
+#else  // TF_LITE_MCU_DEBUG_LOG
+#include <cstdint>
+#include <cstdio>
+void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
+void DebugLogInt32(int32_t i) { fprintf(stderr, "%d", i); }
+void DebugLogUInt32(uint32_t i) { fprintf(stderr, "%d", i); }
+void DebugLogHex(uint32_t i) { fprintf(stderr, "0x%8x", i); }
+void DebugLogFloat(float i) { fprintf(stderr, "%f", i); }
+#endif  // TF_LITE_MCU_DEBUG_LOG
+
+namespace tflite {
+namespace {
+void DebugLogPrintf(const char* format, va_list args) {
+  const int output_cache_size = 64;
+  char output_cache[output_cache_size + 1];
+  int output_cache_index = 0;
+  const char* current = format;
+  while (*current != 0) {
+    if (*current == '%') {
+      const char next = *(current + 1);
+      if ((next == 'd') || (next == 's')) {
+        current += 1;
+        if (output_cache_index > 0) {
+          output_cache[output_cache_index] = 0;
+          DebugLog(output_cache);
+          output_cache_index = 0;
+        }
+        if (next == 'd') {
+          DebugLogInt32(va_arg(args, int));
+        } else if (next == 's') {
+          DebugLog(va_arg(args, char*));
+        }
+      }
+    } else {
+      output_cache[output_cache_index] = *current;
+      output_cache_index += 1;
+    }
+    if (output_cache_index >= output_cache_size) {
+      output_cache[output_cache_index] = 0;
+      DebugLog(output_cache);
+      output_cache_index = 0;
+    }
+    current += 1;
+  }
+  if (output_cache_index > 0) {
+    output_cache[output_cache_index] = 0;
+    DebugLog(output_cache);
+    output_cache_index = 0;
+  }
+  DebugLog("\n");
+}
+}  // namespace
+
+int MicroErrorReporter::Report(const char* format, va_list args) {
+  DebugLogPrintf(format, args);
+  return 0;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
new file mode 100644
index 0000000000..33e54f7990
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
+
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+namespace tflite {
+
+class MicroErrorReporter : public ErrorReporter {
+ public:
+  ~MicroErrorReporter() {}
+  int Report(const char* format, va_list args) override;
+
+ private:
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_ERROR_REPORTER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
new file mode 100644
index 0000000000..ef3c32050c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_error_reporter_test.cc
@@ -0,0 +1,25 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+int main(int argc, char** argv) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+  error_reporter->Report("Number: %d", 42);
+  error_reporter->Report("Badly-formed format string %");
+  error_reporter->Report("Another % badly-formed %% format string");
+  error_reporter->Report("~~~%s~~~", "ALL TESTS PASSED");
+}
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
new file mode 100644
index 0000000000..0f38991bb0
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.cc
@@ -0,0 +1,310 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+namespace tflite {
+namespace {
+const int kStackDataAllocatorSize = 128;
+class StackDataAllocator : public BuiltinDataAllocator {
+ public:
+  void* Allocate(size_t size) override {
+    if (size > kStackDataAllocatorSize) {
+      return nullptr;
+    } else {
+      return data_;
+    }
+  }
+  void Deallocate(void* data) override {
+    // Do nothing.
+  }
+
+ private:
+  uint8_t data_[kStackDataAllocatorSize];
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
+  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+    return registration->custom_name;
+  } else {
+    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
+  }
+}
+
+void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
+  MicroInterpreter* interpreter =
+      static_cast<MicroInterpreter*>(context->impl_);
+  va_list args;
+  va_start(args, format);
+  interpreter->error_reporter()->Report(format, args);
+  va_end(args);
+}
+
+}  // namespace
+
+MicroInterpreter::MicroInterpreter(const Model* model,
+                                   const OpResolver& op_resolver,
+                                   SimpleTensorAllocator* tensor_allocator,
+                                   ErrorReporter* error_reporter)
+    : model_(model),
+      op_resolver_(op_resolver),
+      tensor_allocator_(tensor_allocator),
+      error_reporter_(error_reporter),
+      initialization_status_(kTfLiteOk) {
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+      model->buffers();
+  auto* subgraphs = model->subgraphs();
+  if (subgraphs->size() != 1) {
+    error_reporter->Report("Only 1 subgraph is currently supported.\n");
+    initialization_status_ = kTfLiteError;
+    return;
+  }
+  subgraph_ = (*subgraphs)[0];
+  tensors_ = subgraph_->tensors();
+  operators_ = subgraph_->operators();
+
+  context_.tensors_size = tensors_->Length();
+  context_.tensors =
+      reinterpret_cast<TfLiteTensor*>(tensor_allocator_->AllocateMemory(
+          sizeof(TfLiteTensor) * context_.tensors_size));
+  for (int i = 0; i < subgraph_->inputs()->Length(); ++i) {
+    const int tensor_index = subgraph_->inputs()->Get(i);
+    const auto* tensor = tensors_->Get(tensor_index);
+    initialization_status_ = tensor_allocator_->AllocateTensor(
+        *tensor, 0, operators_->Length(), buffers, error_reporter,
+        &context_.tensors[tensor_index]);
+    if (initialization_status_ != kTfLiteOk) {
+      return;
+    }
+  }
+
+  int* first_created = reinterpret_cast<int*>(
+      tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length()));
+  int* last_used = reinterpret_cast<int*>(
+      tensor_allocator_->AllocateMemory(sizeof(int) * tensors_->Length()));
+  for (int i = 0; i < tensors_->Length(); ++i) {
+    first_created[i] = -1;
+    last_used[i] = -1;
+  }
+
+  for (int i = (operators_->Length() - 1); i >= 0; --i) {
+    const auto* op = operators_->Get(i);
+    for (int n = 0; n < op->inputs()->Length(); ++n) {
+      const int tensor_index = op->inputs()->Get(n);
+      if ((last_used[tensor_index] == -1) || (last_used[tensor_index] < i)) {
+        last_used[tensor_index] = i;
+      }
+    }
+    for (int n = 0; n < op->outputs()->Length(); ++n) {
+      const int tensor_index = op->outputs()->Get(n);
+      const int create_before = i;
+      int destroy_after = last_used[tensor_index];
+      if (destroy_after == -1) {
+        destroy_after = operators_->Length();
+      }
+      const auto* tensor = tensors_->Get(tensor_index);
+      if (!tensor->is_variable()) {
+        initialization_status_ = tensor_allocator_->AllocateTensor(
+            *tensor, create_before, destroy_after, buffers, error_reporter,
+            &context_.tensors[tensor_index]);
+        if (initialization_status_ != kTfLiteOk) {
+          return;
+        }
+        first_created[tensor_index] = i;
+      }
+    }
+  }
+
+  for (int i = 0; i < tensors_->Length(); ++i) {
+    const auto* tensor = tensors_->Get(i);
+    const bool is_read_only = (first_created[i] == -1) && (last_used[i] != -1);
+    if (tensor->is_variable() || is_read_only) {
+      initialization_status_ = tensor_allocator_->AllocateTensor(
+          *tensor, 0, operators_->Length(), buffers, error_reporter,
+          &context_.tensors[i]);
+      if (initialization_status_ != kTfLiteOk) {
+        return;
+      }
+    }
+  }
+  context_.impl_ = static_cast<void*>(this);
+  context_.GetExecutionPlan = nullptr;
+  context_.ResizeTensor = nullptr;
+  context_.ReportError = ReportOpError;
+  context_.AddTensors = nullptr;
+  context_.GetNodeAndRegistration = nullptr;
+  context_.ReplaceSubgraphsWithDelegateKernels = nullptr;
+  context_.recommended_num_threads = 1;
+  context_.GetExternalContext = nullptr;
+  context_.SetExternalContext = nullptr;
+}
+
+TfLiteStatus MicroInterpreter::Invoke() {
+  if (initialization_status_ != kTfLiteOk) {
+    error_reporter_->Report("Invoke() called after initialization failed\n");
+    return kTfLiteError;
+  }
+  TfLiteStatus status = kTfLiteOk;
+  auto opcodes = model_->operator_codes();
+  for (int i = 0; i < operators_->Length(); ++i) {
+    const auto* op = operators_->Get(i);
+    int index = op->opcode_index();
+    if (index < 0 || index >= opcodes->size()) {
+      error_reporter_->Report("Missing registration for opcode_index %d\n",
+                              index);
+      return kTfLiteError;
+    }
+    auto opcode = (*opcodes)[index];
+    const TfLiteRegistration* registration = nullptr;
+    status = GetRegistrationFromOpCode(opcode, op_resolver_, error_reporter_,
+                                       &registration);
+    if (status != kTfLiteOk) {
+      return status;
+    }
+    if (registration == nullptr) {
+      error_reporter_->Report("Skipping op for opcode_index %d\n", index);
+      return kTfLiteError;
+    }
+    BuiltinOperator op_type =
+        static_cast<BuiltinOperator>(registration->builtin_code);
+
+    if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
+      error_reporter_->Report(
+          "Found builtin operator %s with custom options.\n",
+          EnumNameBuiltinOperator(op_type));
+    }
+    StackDataAllocator stack_data_allocator;
+    const char* custom_data = nullptr;
+    size_t custom_data_size = 0;
+    unsigned char* builtin_data = nullptr;
+    if (op->custom_options()) {
+      custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
+      custom_data_size = op->custom_options()->size();
+    } else {
+      TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
+                                        &stack_data_allocator,
+                                        (void**)(&builtin_data)));
+    }
+
+    const char* init_data;
+    size_t init_data_size;
+    if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+      init_data = custom_data;
+      init_data_size = custom_data_size;
+    } else {
+      init_data = reinterpret_cast<const char*>(builtin_data);
+      init_data_size = 0;
+    }
+    void* user_data = nullptr;
+    if (registration->init) {
+      user_data = registration->init(&context_, init_data, init_data_size);
+    }
+
+    const int kMaxInputs = 16;
+    int inputs_data[kMaxInputs + 1];
+    TfLiteIntArray* inputs_array =
+        reinterpret_cast<TfLiteIntArray*>(inputs_data);
+    if (op->inputs()->Length() >= kMaxInputs) {
+      error_reporter_->Report("Too many inputs (%d)\n", op->inputs()->Length());
+      return kTfLiteError;
+    }
+    inputs_array->size = op->inputs()->Length();
+    for (int n = 0; n < op->inputs()->Length(); ++n) {
+      inputs_array->data[n] = op->inputs()->Get(n);
+    }
+
+    const int kMaxOutputs = 16;
+    int outputs_data[kMaxOutputs + 1];
+    TfLiteIntArray* outputs_array =
+        reinterpret_cast<TfLiteIntArray*>(outputs_data);
+    if (op->outputs()->Length() >= kMaxOutputs) {
+      error_reporter_->Report("Too many outputs (%d)\n",
+                              op->outputs()->Length());
+      return kTfLiteError;
+    }
+    outputs_array->size = op->outputs()->Length();
+    for (int n = 0; n < op->outputs()->Length(); ++n) {
+      outputs_array->data[n] = op->outputs()->Get(n);
+    }
+
+    const int kMaxTemporaries = 16;
+    int temporaries_data[kMaxTemporaries + 1];
+    TfLiteIntArray* temporaries_array =
+        reinterpret_cast<TfLiteIntArray*>(temporaries_data);
+    temporaries_array->size = 0;
+
+    TfLiteNode node;
+    node.inputs = inputs_array;
+    node.outputs = outputs_array;
+    node.temporaries = temporaries_array;
+    node.user_data = user_data;
+    node.builtin_data = reinterpret_cast<void*>(builtin_data);
+    node.custom_initial_data = custom_data;
+    node.custom_initial_data_size = custom_data_size;
+    node.delegate = nullptr;
+    if (registration->prepare) {
+      TfLiteStatus prepare_status = registration->prepare(&context_, &node);
+      if (prepare_status != kTfLiteOk) {
+        error_reporter_->Report(
+            "Node %s (number %d) failed to prepare with status %d",
+            OpNameFromRegistration(registration), i, prepare_status);
+        return kTfLiteError;
+      }
+    }
+
+    if (registration->invoke) {
+      TfLiteStatus invoke_status = registration->invoke(&context_, &node);
+      if (invoke_status != kTfLiteOk) {
+        error_reporter_->Report(
+            "Node %s (number %d) failed to invoke with status %d",
+            OpNameFromRegistration(registration), i, invoke_status);
+        return kTfLiteError;
+      }
+    }
+
+    if (registration->free) {
+      registration->free(&context_, user_data);
+    }
+  }
+  return status;
+}
+
+TfLiteTensor* MicroInterpreter::input(int index) {
+  const flatbuffers::Vector<int32_t>* inputs = subgraph_->inputs();
+  const size_t length = inputs->Length();
+  if ((index < 0) || (index >= length)) {
+    error_reporter_->Report("Input index %d out of range (length is %d)", index,
+                            length);
+    return nullptr;
+  }
+  return &(context_.tensors[inputs->Get(index)]);
+}
+
+TfLiteTensor* MicroInterpreter::output(int index) {
+  const flatbuffers::Vector<int32_t>* outputs = subgraph_->outputs();
+  const size_t length = outputs->Length();
+  if ((index < 0) || (index >= outputs->Length())) {
+    error_reporter_->Report("Output index %d out of range (length is %d)",
+                            index, length);
+    return nullptr;
+  }
+  return &(context_.tensors[outputs->Get(index)]);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
new file mode 100644
index 0000000000..a88514cde8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter.h
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
+
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/core/api/op_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+class MicroInterpreter {
+ public:
+  // The lifetime of the model, op resolver, allocator, and error reporter must
+  // be at least as long as that of the interpreter object, since the
+  // interpreter may need to access them at any time. This means that you should
+  // usually create them with the same scope as each other, for example having
+  // them all allocated on the stack as local variables through a top-level
+  // function.
+  // The interpreter doesn't do any deallocation of any of the pointed-to
+  // objects, ownership remains with the caller.
+  MicroInterpreter(const Model* model, const OpResolver& op_resolver,
+                   SimpleTensorAllocator* tensor_allocator,
+                   ErrorReporter* error_reporter);
+
+  TfLiteStatus Invoke();
+
+  size_t tensors_size() const { return context_.tensors_size; }
+  TfLiteTensor* tensor(int tensor_index);
+
+  TfLiteTensor* input(int index);
+  size_t inputs_size() const { return subgraph_->inputs()->Length(); }
+
+  TfLiteTensor* output(int index);
+  size_t outputs_size() const { return subgraph_->outputs()->Length(); }
+
+  TfLiteStatus initialization_status() const { return initialization_status_; }
+
+  ErrorReporter* error_reporter() { return error_reporter_; }
+
+ private:
+  const Model* model_;
+  const OpResolver& op_resolver_;
+  SimpleTensorAllocator* tensor_allocator_;
+  ErrorReporter* error_reporter_;
+
+  TfLiteStatus initialization_status_;
+  const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
+  const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
+  TfLiteContext context_;
+
+  const SubGraph* subgraph_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_INTERPRETER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
new file mode 100644
index 0000000000..251e5f7203
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc
@@ -0,0 +1,197 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+void* MockInit(TfLiteContext* context, const char* buffer, size_t length) {
+  // Do nothing.
+  return nullptr;
+}
+
+void MockFree(TfLiteContext* context, void* buffer) {
+  // Do nothing.
+}
+
+TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input = &context->tensors[node->inputs->data[0]];
+  const int32_t* input_data = input->data.i32;
+  const TfLiteTensor* weight = &context->tensors[node->inputs->data[1]];
+  const uint8_t* weight_data = weight->data.uint8;
+  TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
+  int32_t* output_data = output->data.i32;
+  output_data[0] = input_data[0] + weight_data[0];
+  return kTfLiteOk;
+}
+
+class MockOpResolver : public OpResolver {
+ public:
+  const TfLiteRegistration* FindOp(BuiltinOperator op,
+                                   int version) const override {
+    return nullptr;
+  }
+  const TfLiteRegistration* FindOp(const char* op, int version) const override {
+    if (strcmp(op, "mock_custom") == 0) {
+      static TfLiteRegistration r = {MockInit, MockFree, MockPrepare,
+                                     MockInvoke};
+      return &r;
+    } else {
+      return nullptr;
+    }
+  }
+};
+
+class StackAllocator : public flatbuffers::Allocator {
+ public:
+  StackAllocator() : data_(data_backing_), data_size_(0) {}
+
+  uint8_t* allocate(size_t size) override {
+    if ((data_size_ + size) > kStackAllocatorSize) {
+      // TODO(petewarden): Add error reporting beyond returning null!
+      return nullptr;
+    }
+    uint8_t* result = data_;
+    data_ += size;
+    data_size_ += size;
+    return result;
+  }
+
+  void deallocate(uint8_t* p, size_t) override {}
+
+  static StackAllocator& instance() {
+    // Avoid using true dynamic memory allocation to be portable to bare metal.
+    static char inst_memory[sizeof(StackAllocator)];
+    static StackAllocator* inst = new (inst_memory) StackAllocator;
+    return *inst;
+  }
+
+  static constexpr int kStackAllocatorSize = 4096;
+
+ private:
+  uint8_t data_backing_[kStackAllocatorSize];
+  uint8_t* data_;
+  int data_size_;
+};
+
+const Model* BuildMockModel() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder builder(StackAllocator::kStackAllocatorSize,
+                                         &StackAllocator::instance());
+  constexpr size_t buffer_data_size = 1;
+  const uint8_t buffer_data[buffer_data_size] = {21};
+  constexpr size_t buffers_size = 2;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(builder),
+      CreateBuffer(builder,
+                   builder.CreateVector(buffer_data, buffer_data_size))};
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {1};
+  constexpr size_t tensors_size = 3;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT32, 0,
+                   builder.CreateString("test_input_tensor"), 0, false),
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_UINT8, 1,
+                   builder.CreateString("test_weight_tensor"), 0, false),
+      CreateTensor(builder,
+                   builder.CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT32, 0,
+                   builder.CreateString("test_output_tensor"), 0, false),
+  };
+  constexpr size_t inputs_size = 1;
+  const int32_t inputs[inputs_size] = {0};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {2};
+  constexpr size_t operator_inputs_size = 2;
+  const int32_t operator_inputs[operator_inputs_size] = {0, 1};
+  constexpr size_t operator_outputs_size = 1;
+  const int32_t operator_outputs[operator_outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> operators[operators_size] = {CreateOperator(
+      builder, 0, builder.CreateVector(operator_inputs, operator_inputs_size),
+      builder.CreateVector(operator_outputs, operator_outputs_size),
+      BuiltinOptions_NONE)};
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(builder, builder.CreateVector(tensors, tensors_size),
+                     builder.CreateVector(inputs, inputs_size),
+                     builder.CreateVector(outputs, outputs_size),
+                     builder.CreateVector(operators, operators_size),
+                     builder.CreateString("test_subgraph"))};
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(builder, BuiltinOperator_CUSTOM, "mock_custom",
+                               0)};
+  const Offset<Model> model_offset = CreateModel(
+      builder, 0, builder.CreateVector(operator_codes, operator_codes_size),
+      builder.CreateVector(subgraphs, subgraphs_size),
+      builder.CreateString("test_model"),
+      builder.CreateVector(buffers, buffers_size));
+  FinishModelBuffer(builder, model_offset);
+  void* model_pointer = builder.GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestInterpreter) {
+  const tflite::Model* model = tflite::BuildMockModel();
+  TF_LITE_MICRO_EXPECT_NE(nullptr, model);
+  tflite::MockOpResolver mock_resolver;
+  constexpr size_t allocator_buffer_size = 1024;
+  uint8_t allocator_buffer[allocator_buffer_size];
+  tflite::SimpleTensorAllocator simple_tensor_allocator(allocator_buffer,
+                                                        allocator_buffer_size);
+  tflite::MicroInterpreter interpreter(
+      model, mock_resolver, &simple_tensor_allocator, micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
+  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size());
+
+  TfLiteTensor* input = interpreter.input(0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, input);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, input->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, input->data.i32);
+  input->data.i32[0] = 21;
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter.Invoke());
+
+  TfLiteTensor* output = interpreter.output(0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, output);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, output->bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, output->data.i32);
+  TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
new file mode 100644
index 0000000000..40c21c6448
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.cc
@@ -0,0 +1,80 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+namespace tflite {
+
+const TfLiteRegistration* MicroMutableOpResolver::FindOp(
+    tflite::BuiltinOperator op, int version) const {
+  for (int i = 0; i < registrations_len_; ++i) {
+    const TfLiteRegistration& registration = registrations_[i];
+    if ((registration.builtin_code == op) &&
+        (registration.version == version)) {
+      return &registration;
+    }
+  }
+  return nullptr;
+}
+
+const TfLiteRegistration* MicroMutableOpResolver::FindOp(const char* op,
+                                                         int version) const {
+  for (int i = 0; i < registrations_len_; ++i) {
+    const TfLiteRegistration& registration = registrations_[i];
+    if ((registration.builtin_code == -1) &&
+        (strcmp(registration.custom_name, op) == 0) &&
+        (registration.version == version)) {
+      return &registration;
+    }
+  }
+  return nullptr;
+}
+
+void MicroMutableOpResolver::AddBuiltin(tflite::BuiltinOperator op,
+                                        TfLiteRegistration* registration,
+                                        int min_version, int max_version) {
+  for (int version = min_version; version <= max_version; ++version) {
+    if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) {
+      // TODO(petewarden) - Add error reporting hooks so we can report this!
+      return;
+    }
+    TfLiteRegistration* new_registration = &registrations_[registrations_len_];
+    registrations_len_ += 1;
+
+    *new_registration = *registration;
+    new_registration->builtin_code = op;
+    new_registration->version = version;
+  }
+}
+
+void MicroMutableOpResolver::AddCustom(const char* name,
+                                       TfLiteRegistration* registration,
+                                       int min_version, int max_version) {
+  for (int version = min_version; version <= max_version; ++version) {
+    if (registrations_len_ >= TFLITE_REGISTRATIONS_MAX) {
+      // TODO(petewarden) - Add error reporting hooks so we can report this!
+      return;
+    }
+    TfLiteRegistration* new_registration = &registrations_[registrations_len_];
+    registrations_len_ += 1;
+
+    *new_registration = *registration;
+    new_registration->builtin_code = -1;
+    new_registration->custom_name = name;
+    new_registration->version = version;
+  }
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
new file mode 100644
index 0000000000..f3750a2484
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
+
+#include "tensorflow/contrib/lite/core/api/op_resolver.h"
+#include "tensorflow/contrib/lite/experimental/micro/compatibility.h"
+
+#ifndef TFLITE_REGISTRATIONS_MAX
+#define TFLITE_REGISTRATIONS_MAX (128)
+#endif
+
+namespace tflite {
+
+class MicroMutableOpResolver : public OpResolver {
+ public:
+  const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
+                                   int version) const override;
+  const TfLiteRegistration* FindOp(const char* op, int version) const override;
+  void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
+                  int min_version = 1, int max_version = 1);
+  void AddCustom(const char* name, TfLiteRegistration* registration,
+                 int min_version = 1, int max_version = 1);
+
+ private:
+  TfLiteRegistration registrations_[TFLITE_REGISTRATIONS_MAX];
+  int registrations_len_ = 0;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
new file mode 100644
index 0000000000..5420a33e87
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver_test.cc
@@ -0,0 +1,83 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_mutable_op_resolver.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+void* MockInit(TfLiteContext* context, const char* buffer, size_t length) {
+  // Do nothing.
+  return nullptr;
+}
+
+void MockFree(TfLiteContext* context, void* buffer) {
+  // Do nothing.
+}
+
+TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestOperations) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroMutableOpResolver;
+  using tflite::OpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  MicroMutableOpResolver micro_mutable_op_resolver;
+  micro_mutable_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 0, 2);
+  micro_mutable_op_resolver.AddCustom("mock_custom", &r, 0, 3);
+  OpResolver* resolver = &micro_mutable_op_resolver;
+
+  const TfLiteRegistration* registration =
+      resolver->FindOp(BuiltinOperator_CONV_2D, 0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+
+  registration = resolver->FindOp(BuiltinOperator_CONV_2D, 10);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp(BuiltinOperator_RELU, 0);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp("mock_custom", 0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+
+  registration = resolver->FindOp("mock_custom", 10);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+
+  registration = resolver->FindOp("nonexistent_custom", 0);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
new file mode 100644
index 0000000000..8c090a20a5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.cc
@@ -0,0 +1,149 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h"
+
+#include "tensorflow/contrib/lite/core/api/flatbuffer_conversions.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
+                              ErrorReporter* reporter) {
+  switch (type) {
+    case kTfLiteFloat32:
+      *size = sizeof(float);
+      break;
+    case kTfLiteInt16:
+      *size = sizeof(int16_t);
+      break;
+    case kTfLiteInt32:
+      *size = sizeof(int32_t);
+      break;
+    case kTfLiteUInt8:
+      *size = sizeof(uint8_t);
+      break;
+    case kTfLiteInt64:
+      *size = sizeof(int64_t);
+      break;
+    case kTfLiteBool:
+      *size = sizeof(bool);
+      break;
+    case kTfLiteComplex64:
+      *size = sizeof(float) * 2;
+      break;
+    default:
+      reporter->Report(
+          "Only float32, int16, int32, int64, uint8, bool, complex64 "
+          "supported currently.");
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus BytesRequired(const tflite::Tensor& flatbuffer_tensor,
+                           size_t dims_size, size_t* bytes,
+                           ErrorReporter* error_reporter) {
+  TfLiteType tf_lite_type;
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &tf_lite_type, error_reporter));
+  size_t type_size;
+  TF_LITE_ENSURE_STATUS(
+      TfLiteTypeSizeOf(tf_lite_type, &type_size, error_reporter));
+  *bytes = dims_size * type_size;
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteStatus SimpleTensorAllocator::AllocateTensor(
+    const tflite::Tensor& flatbuffer_tensor, int create_before,
+    int destroy_after,
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+    ErrorReporter* error_reporter, TfLiteTensor* result) {
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &result->type, error_reporter));
+  result->is_variable = flatbuffer_tensor.is_variable();
+
+  result->data.raw = nullptr;
+  result->bytes = 0;
+  if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
+    if (auto* array = buffer->data()) {
+      if (size_t array_size = array->size()) {
+        result->data.raw =
+            const_cast<char*>(reinterpret_cast<const char*>(array->data()));
+        TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, array_size,
+                                            &result->bytes, error_reporter));
+      }
+    }
+  }
+  if (result->data.raw) {
+    result->allocation_type = kTfLiteMmapRo;
+  } else {
+    int data_size = 1;
+    for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
+      data_size *= flatbuffer_tensor.shape()->Get(n);
+    }
+    TF_LITE_ENSURE_STATUS(BytesRequired(flatbuffer_tensor, data_size,
+                                        &result->bytes, error_reporter));
+    result->data.raw = reinterpret_cast<char*>(AllocateMemory(result->bytes));
+    if (result->data.raw == nullptr) {
+      const char* tensor_name = flatbuffer_tensor.name()->c_str();
+      if (tensor_name == nullptr) {
+        tensor_name = "<None>";
+      }
+      error_reporter->Report(
+          "Couldn't allocate memory for tensor '%s', wanted %d bytes but only "
+          "%d were available",
+          tensor_name, result->bytes, (data_size_max_ - data_size_));
+      return kTfLiteError;
+    }
+    result->allocation_type = kTfLiteArenaRw;
+  }
+  result->dims = reinterpret_cast<TfLiteIntArray*>(
+      AllocateMemory(sizeof(int) * (flatbuffer_tensor.shape()->Length() + 1)));
+  result->dims->size = flatbuffer_tensor.shape()->Length();
+  for (int n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
+    result->dims->data[n] = flatbuffer_tensor.shape()->Get(n);
+  }
+  if (flatbuffer_tensor.quantization()) {
+    result->params.scale = flatbuffer_tensor.quantization()->scale()->Get(0);
+    result->params.zero_point =
+        flatbuffer_tensor.quantization()->zero_point()->Get(0);
+  }
+  result->allocation = nullptr;
+  if (flatbuffer_tensor.name()) {
+    result->name = flatbuffer_tensor.name()->c_str();
+  } else {
+    result->name = "<No name>";
+  }
+  result->delegate = nullptr;
+  result->buffer_handle = 0;
+  result->data_is_stale = false;
+  return kTfLiteOk;
+}
+
+uint8_t* SimpleTensorAllocator::AllocateMemory(size_t size) {
+  if ((data_size_ + size) > data_size_max_) {
+    // TODO(petewarden): Add error reporting beyond returning null!
+    return nullptr;
+  }
+  uint8_t* result = data_;
+  data_ += size;
+  data_size_ += size;
+  return result;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
new file mode 100644
index 0000000000..4f16a9d0e5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator.h
@@ -0,0 +1,51 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
+
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// TODO(petewarden): This allocator never frees up or reuses  any memory, even
+// though we have enough information about lifetimes of the tensors to do so.
+// This makes it pretty wasteful, so we should use a more intelligent method.
+class SimpleTensorAllocator {
+ public:
+  SimpleTensorAllocator(uint8_t* buffer, int buffer_size)
+      : data_size_(0), data_size_max_(buffer_size), data_(buffer) {}
+
+  TfLiteStatus AllocateTensor(
+      const tflite::Tensor& flatbuffer_tensor, int create_before,
+      int destroy_after,
+      const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+      ErrorReporter* error_reporter, TfLiteTensor* result);
+
+  uint8_t* AllocateMemory(size_t size);
+
+  int GetDataSize() const { return data_size_; }
+
+ private:
+  int data_size_;
+  int data_size_max_;
+  uint8_t* data_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_SIMPLE_TENSOR_ALLOCATOR_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
new file mode 100644
index 0000000000..c835427243
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
@@ -0,0 +1,144 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
+
+#include "tensorflow/contrib/lite/experimental/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace {
+class StackAllocator : public flatbuffers::Allocator {
+ public:
+  StackAllocator() : data_(data_backing_), data_size_(0) {}
+
+  uint8_t* allocate(size_t size) override {
+    if ((data_size_ + size) > kStackAllocatorSize) {
+      // TODO(petewarden): Add error reporting beyond returning null!
+      return nullptr;
+    }
+    uint8_t* result = data_;
+    data_ += size;
+    data_size_ += size;
+    return result;
+  }
+
+  void deallocate(uint8_t* p, size_t) override {}
+
+  static StackAllocator& instance() {
+    // Avoid using true dynamic memory allocation to be portable to bare metal.
+    static char inst_memory[sizeof(StackAllocator)];
+    static StackAllocator* inst = new (inst_memory) StackAllocator;
+    return *inst;
+  }
+
+  static constexpr int kStackAllocatorSize = 4096;
+
+ private:
+  uint8_t data_backing_[kStackAllocatorSize];
+  uint8_t* data_;
+  int data_size_;
+};
+
+flatbuffers::FlatBufferBuilder* BuilderInstance() {
+  static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)];
+  static flatbuffers::FlatBufferBuilder* inst =
+      new (inst_memory) flatbuffers::FlatBufferBuilder(
+          StackAllocator::kStackAllocatorSize, &StackAllocator::instance());
+  return inst;
+}
+
+const Tensor* Create1dTensor(int size) {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {size};
+  const Offset<Tensor> tensor_offset = CreateTensor(
+      *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+      TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, false);
+  builder->Finish(tensor_offset);
+  void* tensor_pointer = builder->GetBufferPointer();
+  const Tensor* tensor = flatbuffers::GetRoot<Tensor>(tensor_pointer);
+  return tensor;
+}
+
+const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* CreateBuffers() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>
+      buffers_offset = builder->CreateVector(buffers, buffers_size);
+  builder->Finish(buffers_offset);
+  void* buffers_pointer = builder->GetBufferPointer();
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* result =
+      flatbuffers::GetRoot<flatbuffers::Vector<flatbuffers::Offset<Buffer>>>(
+          buffers_pointer);
+  return result;
+}
+
+}  // namespace
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestAllocateTensor) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  const tflite::Tensor* tensor = tflite::Create1dTensor(100);
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>* buffers =
+      tflite::CreateBuffers();
+
+  TfLiteTensor allocated_tensor;
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk,
+      allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter,
+                               &allocated_tensor));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type);
+  TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, allocated_tensor.data.i32);
+}
+
+TF_LITE_MICRO_TEST(TestTooLarge) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  const tflite::Tensor* tensor = tflite::Create1dTensor(10000);
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>* buffers =
+      tflite::CreateBuffers();
+
+  TfLiteTensor allocated_tensor;
+  TF_LITE_MICRO_EXPECT_NE(
+      kTfLiteOk,
+      allocator.AllocateTensor(*tensor, 0, 1, buffers, micro_test::reporter,
+                               &allocated_tensor));
+}
+
+TF_LITE_MICRO_TEST(TestJustFits) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleTensorAllocator allocator(arena, arena_size);
+
+  uint8_t* result = allocator.AllocateMemory(arena_size);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, result);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/BUILD b/tensorflow/contrib/lite/experimental/micro/testing/BUILD
new file mode 100644
index 0000000000..0d23be5712
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/BUILD
@@ -0,0 +1,17 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["test_linux_binary.sh"])
+
+cc_library(
+    name = "micro_test",
+    hdrs = [
+        "micro_test.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite/experimental/micro:micro_framework",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
new file mode 100644
index 0000000000..7d6d81af0f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill
@@ -0,0 +1,21 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This docker configuration file lets you emulate a Blue Pill board
+# on an x86 desktop or laptop, which can be useful for debugging and
+# automated testing.
+FROM antmicro/renode:latest
+
+LABEL maintainer="Pete Warden <petewarden@google.com>"
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
new file mode 100644
index 0000000000..9333dc42bf
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
@@ -0,0 +1,36 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+using sysbus
+
+mach create
+machine LoadPlatformDescription @platforms/cpus/stm32f103.repl
+
+# These lines are needed to show the results of DebugLog calls in the output.
+machine LoadPlatformDescriptionFromString "uartSemihosting: UART.SemihostingUart @ cpu"
+showAnalyzer cpu.uartSemihosting Antmicro.Renode.Analyzers.LoggingUartAnalyzer
+
+logFile @/tmp/renode_bluepill_log.txt
+
+macro reset
+"""
+    sysbus LoadELF $bin
+"""
+
+runMacro $reset
+
+emulation RunFor @1
+
+quit
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
new file mode 100644
index 0000000000..91e349cb24
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
@@ -0,0 +1,64 @@
+"""Rules for simple testing without dependencies by parsing output logs."""
+
+def tflite_micro_cc_test(
+        name,
+        expected_in_logs = "~~~ALL TESTS PASSED~~~",
+        srcs = [],
+        includes = [],
+        defines = [],
+        copts = [],
+        nocopts = "",
+        linkopts = [],
+        deps = [],
+        visibility = None):
+    """Tests a C/C++ binary without testing framework  dependencies`.
+
+    Runs a C++ binary, and tests that the output logs contain the
+    expected value. This is a deliberately spartan way of testing, to match
+    what's available when testing microcontroller binaries.
+
+    Args:
+      name: a unique name for this rule.
+      expected_in_logs: A regular expression that is required to be
+                        present in the binary's logs for the test to pass.
+      srcs: sources to compile (C, C++, ld scripts).
+      includes: include paths to add to this rule and its dependents.
+      defines: list of `VAR` or `VAR=VAL` to pass to CPP for this rule and
+               its dependents.
+      copts: gcc compilation flags for this rule only.
+      nocopts: list of gcc compilation flags to remove for this rule
+               only. No regexp like for `cc_library`.
+      linkopts: `gcc` flags to add to the linking phase. For "pure" ld flags,
+                prefix them with the `-Wl,` prefix here.
+      deps: dependencies. only `tflite_bare_metal_cc_library()` dependencies
+            allowed.
+      visibility: visibility.
+    """
+    native.cc_binary(
+        name = name + "_binary",
+        srcs = srcs,
+        includes = includes,
+        defines = defines,
+        copts = copts,
+        nocopts = nocopts,
+        linkopts = linkopts,
+        deps = deps,
+        visibility = visibility,
+    )
+    native.sh_test(
+        name = name,
+        size = "medium",
+        srcs = [
+            "//tensorflow/contrib/lite/experimental/micro/testing:test_linux_binary.sh",
+        ],
+        args = [
+            native.package_name() + "/" + name + "_binary",
+            "'" + expected_in_logs + "'",
+        ],
+        data = [
+            name + "_binary",
+            # Internal test dependency placeholder
+        ],
+        deps = [
+        ],
+    )
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
new file mode 100644
index 0000000000..104509c9dc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
@@ -0,0 +1,138 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// An ultra-lightweight testing framework designed for use with microcontroller
+// applications. Its only dependency is on TensorFlow Lite's ErrorReporter
+// interface, where log messages are output. This is designed to be usable even
+// when no standard C or C++ libraries are available, and without any dynamic
+// memory allocation or reliance on global constructors.
+//
+// To build a test, you use syntax similar to gunit, but with some extra
+// decoration to create a hidden 'main' function containing each of the tests to
+// be run. Your code should look something like:
+// ----------------------------------------------------------------------------
+// #include "path/to/this/header"
+//
+// TF_LITE_MICRO_TESTS_BEGIN
+//
+// TF_LITE_MICRO_TEST(SomeTest) {
+//   TF_LITE_LOG_EXPECT_EQ(true, true);
+// }
+//
+// TF_LITE_MICRO_TESTS_END
+// ----------------------------------------------------------------------------
+// If you compile this for your platform, you'll get a normal binary that you
+// should be able to run. Executing it will output logging information like this
+// to stderr (or whatever equivalent is available and written to by
+// ErrorReporter):
+// ----------------------------------------------------------------------------
+// Testing SomeTest
+// 1/1 tests passed
+// ~~~ALL TESTS PASSED~~~
+// ----------------------------------------------------------------------------
+// This is designed to be human-readable, so you can just run tests manually,
+// but the string "~~~ALL TESTS PASSED~~~" should only appear if all of the
+// tests do pass. This makes it possible to integrate with automated test
+// systems by scanning the output logs and looking for that magic value.
+//
+// This framework is intended to be a rudimentary alternative to no testing at
+// all on systems that struggle to run more conventional approaches, so use with
+// caution!
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
+
+#include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
+
+namespace micro_test {
+extern int tests_passed;
+extern int tests_failed;
+extern bool is_test_complete;
+extern bool did_test_fail;
+extern tflite::ErrorReporter* reporter;
+}  // namespace micro_test
+
+#define TF_LITE_MICRO_TESTS_BEGIN              \
+  namespace micro_test {                       \
+  int tests_passed;                            \
+  int tests_failed;                            \
+  bool is_test_complete;                       \
+  bool did_test_fail;                          \
+  tflite::ErrorReporter* reporter;             \
+  }                                            \
+                                               \
+  int main(int argc, char** argv) {            \
+    micro_test::tests_passed = 0;              \
+    micro_test::tests_failed = 0;              \
+    tflite::MicroErrorReporter error_reporter; \
+    micro_test::reporter = &error_reporter;
+
+#define TF_LITE_MICRO_TESTS_END                                \
+  micro_test::reporter->Report(                                \
+      "%d/%d tests passed", micro_test::tests_passed,          \
+      (micro_test::tests_failed + micro_test::tests_passed));  \
+  if (micro_test::tests_failed == 0) {                         \
+    micro_test::reporter->Report("~~~ALL TESTS PASSED~~~\n");  \
+  } else {                                                     \
+    micro_test::reporter->Report("~~~SOME TESTS FAILED~~~\n"); \
+  }                                                            \
+  }
+
+// TODO(petewarden): I'm going to hell for what I'm doing to this poor for loop.
+#define TF_LITE_MICRO_TEST(name)                                           \
+  micro_test::reporter->Report("Testing %s", #name);                       \
+  for (micro_test::is_test_complete = false,                               \
+      micro_test::did_test_fail = false;                                   \
+       !micro_test::is_test_complete; micro_test::is_test_complete = true, \
+      micro_test::tests_passed += (micro_test::did_test_fail) ? 0 : 1,     \
+      micro_test::tests_failed += (micro_test::did_test_fail) ? 1 : 0)
+
+#define TF_LITE_MICRO_EXPECT(x)                                                \
+  do {                                                                         \
+    if (!(x)) {                                                                \
+      micro_test::reporter->Report(#x " failed at %s:%d", __FILE__, __LINE__); \
+      micro_test::did_test_fail = true;                                        \
+    }                                                                          \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_EQ(x, y)                                         \
+  do {                                                                        \
+    if ((x) != (y)) {                                                         \
+      micro_test::reporter->Report(#x " == " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                 \
+      micro_test::did_test_fail = true;                                       \
+    }                                                                         \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_NE(x, y)                                         \
+  do {                                                                        \
+    if ((x) == (y)) {                                                         \
+      micro_test::reporter->Report(#x " != " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                 \
+      micro_test::did_test_fail = true;                                       \
+    }                                                                         \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon)                      \
+  do {                                                                \
+    auto delta = ((x) > (y)) ? ((x) - (y)) : ((y) - (x));             \
+    if (delta > epsilon) {                                            \
+      micro_test::reporter->Report(#x " near " #y " failed at %s:%d", \
+                                   __FILE__, __LINE__);               \
+      micro_test::did_test_fail = true;                               \
+    }                                                                 \
+  } while (false)
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
new file mode 100755
index 0000000000..07742a8262
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
@@ -0,0 +1,54 @@
+#!/bin/bash -e
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests a 'bluepill' STM32F103 ELF by parsing the log output of Renode emulation.
+#
+# First argument is the ELF location.
+# Second argument is a regular expression that's required to be in the output logs
+# for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+mkdir -p ${MICRO_LOG_PATH}
+
+docker build -t renode_bluepill \
+  -f ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill \
+  ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/
+
+docker run \
+  --log-driver=none -a stdout -a stderr \
+  -v ${ROOT_DIR}:/workspace \
+  -v /tmp:/tmp \
+  -it renode_bluepill \
+  /bin/bash -c "renode -P 5000 --disable-xwt -e '
+\$bin?=@/workspace/$1
+s @/workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
+' 2>&1 >${MICRO_LOG_FILENAME}"
+
+echo "LOGS:"
+cat ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
+
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
new file mode 100755
index 0000000000..24131a6d2d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
@@ -0,0 +1,39 @@
+#!/bin/bash -e
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests a Linux binary by parsing the log output.
+#
+# First argument is the binary location.
+# Second argument is a regular expression that's required to be in the output logs
+# for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+mkdir -p ${MICRO_LOG_PATH}
+
+$1 2>&1 | tee ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
+
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
new file mode 100644
index 0000000000..880bb4763c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
@@ -0,0 +1,166 @@
+MAKEFILE_DIR := tensorflow/contrib/lite/experimental/micro/tools/make
+
+# Try to figure out the host system
+HOST_OS :=
+ifeq ($(OS),Windows_NT)
+	HOST_OS = windows
+else
+	UNAME_S := $(shell uname -s)
+	ifeq ($(UNAME_S),Linux)
+		HOST_OS := linux
+	endif
+	ifeq ($(UNAME_S),Darwin)
+		HOST_OS := osx
+	endif
+endif
+
+HOST_ARCH := $(shell if [[ $(shell uname -m) =~ i[345678]86 ]]; then echo x86_32; else echo $(shell uname -m); fi)
+
+# Override these on the make command line to target a specific architecture. For example:
+# make -f tensorflow/contrib/lite/Makefile TARGET=rpi TARGET_ARCH=armv7l
+TARGET := $(HOST_OS)
+TARGET_ARCH := $(HOST_ARCH)
+
+INCLUDES := \
+-I. \
+-I$(MAKEFILE_DIR)/../../../../../ \
+-I$(MAKEFILE_DIR)/../../../../../../ \
+-I$(MAKEFILE_DIR)/downloads/ \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
+-I$(MAKEFILE_DIR)/downloads/flatbuffers/include \
+-I$(OBJDIR)
+# This is at the end so any globally-installed frameworks like protobuf don't
+# override local versions in the source tree.
+INCLUDES += -I/usr/local/include
+
+TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_linux_binary.sh
+
+MICROLITE_LIBS := -lm
+
+# There are no rules for compiling objects for the host system (since we don't
+# generate things like the protobuf compiler that require that), so all of
+# these settings are for the target compiler.
+CXXFLAGS := -O3 -DNDEBUG
+CXXFLAGS += --std=c++11 -g -DTF_LITE_STATIC_MEMORY
+CCFLAGS := -DNDEBUG -g -DTF_LITE_STATIC_MEMORY
+LDOPTS := -L/usr/local/lib
+ARFLAGS := -r
+TARGET_TOOLCHAIN_PREFIX :=
+CC_PREFIX :=
+
+# This library is the main target for this makefile. It will contain a minimal
+# runtime that can be linked in to other programs.
+MICROLITE_LIB_NAME := libtensorflow-microlite.a
+
+# Test binary for the microcontroller speech model.
+MICRO_SPEECH_TEST_SRCS := \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+
+MICROLITE_TEST_SRCS := \
+$(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \
+$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*test.cc)
+
+MICROLITE_CC_BASE_SRCS := \
+$(wildcard tensorflow/contrib/lite/experimental/micro/*.cc) \
+$(wildcard tensorflow/contrib/lite/experimental/micro/kernels/*.cc) \
+tensorflow/contrib/lite/c/c_api_internal.c \
+tensorflow/contrib/lite/core/api/error_reporter.cc \
+tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc \
+tensorflow/contrib/lite/core/api/op_resolver.cc \
+tensorflow/contrib/lite/kernels/kernel_util.cc \
+tensorflow/contrib/lite/kernels/internal/quantization_util.cc
+MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS))
+
+# These target-specific makefiles should modify or replace options like
+# CXXFLAGS or LIBS to work for a specific targetted architecture. All logic
+# based on platforms or architectures should happen within these files, to
+# keep this main makefile focused on the sources and dependencies.
+include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)
+
+ALL_SRCS := \
+	$(MICRO_SPEECH_TEST_SRCS) \
+	$(MICROLITE_CC_SRCS) \
+	$(MICROLITE_TEST_SRCS)
+
+# Where compiled objects are stored.
+GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)/
+OBJDIR := $(GENDIR)obj/
+BINDIR := $(GENDIR)bin/
+LIBDIR := $(GENDIR)lib/
+
+MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME)
+
+MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
+
+CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++
+CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc
+AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar
+
+MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))
+
+MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
+
+MICROLITE_TEST_TARGETS := $(addprefix $(BINDIR), \
+$(patsubst %_test.cc,%.test_target,$(MICROLITE_TEST_SRCS)))
+
+# For normal manually-created TensorFlow C++ source files.
+$(OBJDIR)%.o: %.cc
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
+
+# For normal manually-created TensorFlow C source files.
+$(OBJDIR)%.o: %.c
+	@mkdir -p $(dir $@)
+	$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
+
+# The target that's compiled if there's no command-line arguments.
+all: $(MICROLITE_LIB_PATH) $(MICRO_SPEECH_TEST_BINARY)
+
+microlite: $(MICROLITE_LIB_PATH)
+
+# Hack for generating schema file bypassing flatbuffer parsing
+tensorflow/contrib/lite/schema/schema_generated.h:
+	@cp -u tensorflow/contrib/lite/schema/schema_generated.h.OPENSOURCE tensorflow/contrib/lite/schema/schema_generated.h
+
+# Gathers together all the objects we've compiled into a single '.a' archive.
+$(MICROLITE_LIB_PATH): tensorflow/contrib/lite/schema/schema_generated.h $(MICROLITE_LIB_OBJS)
+	@mkdir -p $(dir $@)
+	$(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS)
+
+$(MICRO_SPEECH_TEST_BINARY): $(MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(MICRO_SPEECH_TEST_BINARY) $(MICRO_SPEECH_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
+micro_speech_test: $(MICRO_SPEECH_TEST_BINARY)
+micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
+
+test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
+	$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+$(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $@ $< \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
+$(BINDIR)%.test_target: $(BINDIR)%_test
+	$(TEST_SCRIPT) $< '~~~ALL TESTS PASSED~~~'
+
+$(info $(MICROLITE_TEST_TARGETS))
+
+test: test_micro_speech $(MICROLITE_TEST_TARGETS)
+
+# Gets rid of all generated files.
+clean:
+	rm -rf $(MAKEFILE_DIR)/gen
+
+$(DEPDIR)/%.d: ;
+.PRECIOUS: $(DEPDIR)/%.d
+.PRECIOUS: $(BINDIR)%_test
+
+-include $(patsubst %,$(DEPDIR)/%.d,$(basename $(ALL_SRCS)))
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
new file mode 100755
index 0000000000..4c2ff8545d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR/../../../../../../.."
+
+DOWNLOADS_DIR=tensorflow/contrib/lite/experimental/micro/tools/make/downloads
+BZL_FILE_PATH=tensorflow/workspace.bzl
+
+# Ensure it is being run from repo root
+if [ ! -f $BZL_FILE_PATH ]; then
+  echo "Could not find ${BZL_FILE_PATH}":
+  echo "Likely you are not running this from the root directory of the repository.";
+  exit 1;
+fi
+
+GEMMLOWP_URL="https://github.com/google/gemmlowp/archive/719139ce755a0f31cbf1c37f7f98adcc7fc9f425.zip"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz"
+CMSIS_URL="https://github.com/ARM-software/CMSIS_5/archive/5.4.0.zip"
+STM32_BARE_LIB_URL="https://github.com/google/stm32_bare_lib/archive/50e0da307a2821bb54af1f57b969e6b76cb89d32.zip"
+
+download_and_extract() {
+  local usage="Usage: download_and_extract URL DIR"
+  local url="${1:?${usage}}"
+  local dir="${2:?${usage}}"
+  echo "downloading ${url}" >&2
+  mkdir -p "${dir}"
+  if [[ "${url}" == *gz ]]; then
+    curl -Ls "${url}" | tar -C "${dir}" --strip-components=1 -xz
+  elif [[ "${url}" == *zip ]]; then
+    tempdir=$(mktemp -d)
+    tempdir2=$(mktemp -d)
+
+    curl -L ${url} > ${tempdir}/zipped.zip
+    unzip ${tempdir}/zipped.zip -d ${tempdir2}
+
+    # If the zip file contains nested directories, extract the files from the
+    # inner directory.
+    if ls ${tempdir2}/*/* 1> /dev/null 2>&1; then
+      # unzip has no strip components, so unzip to a temp dir, and move the
+      # files we want from the tempdir to destination.
+      cp -R ${tempdir2}/*/* ${dir}/
+    else
+      cp -R ${tempdir2}/* ${dir}/
+    fi
+    rm -rf ${tempdir2} ${tempdir}
+  fi
+
+  # Delete any potential BUILD files, which would interfere with Bazel builds.
+  find "${dir}" -type f -name '*BUILD' -delete
+}
+
+download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
+download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
+download_and_extract "${CMSIS_URL}" "${DOWNLOADS_DIR}/cmsis"
+download_and_extract "${STM32_BARE_LIB_URL}" "${DOWNLOADS_DIR}/stm32_bare_lib"
+
+echo "download_dependencies.sh completed successfully." >&2
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc
new file mode 100644
index 0000000000..022a8422dc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/targets/bluepill_makefile.inc
@@ -0,0 +1,65 @@
+# Settings for Blue Pill platforms.
+ifeq ($(TARGET), bluepill)
+  TARGET_ARCH := cortex-m3
+  TARGET_TOOLCHAIN_PREFIX := arm-none-eabi-
+
+  PLATFORM_FLAGS = \
+    -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \
+    -DTF_LITE_STATIC_MEMORY \
+    -DTF_LITE_MCU_DEBUG_LOG \
+    -fno-rtti \
+    -fmessage-length=0 \
+    -fno-exceptions \
+    -fno-unwind-tables \
+    -fno-builtin \
+    -ffunction-sections \
+    -fdata-sections \
+    -funsigned-char \
+    -MMD \
+    -mcpu=cortex-m3 \
+    -mthumb \
+    -std=gnu++11 \
+    -Wvla \
+    -Wall \
+    -Wextra \
+    -Wno-unused-parameter \
+    -Wno-missing-field-initializers \
+    -Wno-write-strings \
+    -Wno-sign-compare \
+    -fno-delete-null-pointer-checks \
+    -fomit-frame-pointer \
+    -fpermissive \
+    -nostdlib \
+    -g \
+    -Os
+  CXXFLAGS += $(PLATFORM_FLAGS)
+  CCFLAGS += $(PLATFORM_FLAGS)
+  LDFLAGS += \
+    -T $(MAKEFILE_DIR)/downloads/stm32_bare_lib/stm32_linker_layout.lds \
+    -Wl,-Map=$(MAKEFILE_DIR)/gen/$(TARGET).map,--cref \
+    -Wl,--gc-sections
+	BUILD_TYPE := micro
+  MICROLITE_LIBS := \
+    -lm
+  INCLUDES += \
+    -isystem$(MAKEFILE_DIR)/downloads/cmsis/CMSIS/Core/Include/ \
+    -I$(MAKEFILE_DIR)/downloads/stm32_bare_lib/include
+  MICROLITE_CC_SRCS += \
+    $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.c) \
+    $(wildcard $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/*.cc)
+    TEST_SCRIPT := tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
+  # These are tests that don't currently work on the blue pill.
+  EXCLUDED_TESTS := \
+    tensorflow/contrib/lite/experimental/micro/micro_interpreter_test.cc \
+    tensorflow/contrib/lite/experimental/micro/simple_tensor_allocator_test.cc
+  MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
+
+# These are microcontroller-specific rules for converting the ELF output
+# of the linker into a binary image that can be loaded directly.
+OBJCOPY := $(TARGET_TOOLCHAIN_PREFIX)objcopy
+
+$(BINDIR)/%.bin: $(BINDIR)/%
+	@mkdir -p $(dir $@)
+	$(OBJCOPY) $< $@ -O binary
+
+endif
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/kernels/internal/compatibility.h b/tensorflow/contrib/lite/kernels/internal/compatibility.h
index b87cf2b60d..7c176e0fa1 100644
--- a/tensorflow/contrib/lite/kernels/internal/compatibility.h
+++ b/tensorflow/contrib/lite/kernels/internal/compatibility.h
@@ -84,4 +84,27 @@ using uint16 = std::uint16_t;
 using int32 = std::int32_t;
 using uint32 = std::uint32_t;
 
+// TFLITE_DEPRECATED()
+//
+// Duplicated from absl/base/macros.h to avoid pulling in that library.
+// Marks a deprecated class, struct, enum, function, method and variable
+// declarations. The macro argument is used as a custom diagnostic message (e.g.
+// suggestion of a better alternative).
+//
+// Example:
+//
+//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
+//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
+//
+// Every usage of a deprecated entity will trigger a warning when compiled with
+// clang's `-Wdeprecated-declarations` option. This option is turned off by
+// default, but the warnings will be reported by clang-tidy.
+#if defined(__clang__) && __cplusplus >= 201103L
+#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
+#endif
+
+#ifndef TFLITE_DEPRECATED
+#define TFLITE_DEPRECATED(message)
+#endif
+
 #endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index b39347758a..64a39dd2a2 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 #include <cstring>
 
-#include "absl/base/macros.h"
 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
 
 namespace tflite {
@@ -441,7 +440,7 @@ inline int FlatSize(const Dims<N>& dims) {
   return flat_size;
 }
 
-ABSL_DEPRECATED("Prefer FlatSize.")
+TFLITE_DEPRECATED("Prefer FlatSize.")
 inline int RequiredBufferSizeForDims(const Dims<4>& dims) {
   return FlatSize(dims);
 }
-- 
GitLab


From ac22e1583aed390d78d2e87a4bf8a6ec39400ec4 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 4 Oct 2018 09:21:05 -0700
Subject: [PATCH 0355/1085] Gracefully disallow updating resource variables
 with invalid shapes.

During graph construction, the shape function for AssignAddVariableOp etc.
would raise an error when the value being "assign add"ed to the variable
has an incompatible shape.

With eager execution, no such validation was being made which triggerred
an assertion failure in eigen:
https://github.com/eigenteam/eigen-git-mirror/blob/7d97e1cbbe4424fda39e31c88def7c0863897640/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h#L479

This change prevents that assertion failure.

PiperOrigin-RevId: 215749071
---
 tensorflow/core/kernels/resource_variable_ops.cc         | 6 ++++++
 .../python/kernel_tests/resource_variable_ops_test.py    | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 23d76986bf..678d675c4a 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -426,6 +426,12 @@ class AssignUpdateVariableOp : public OpKernel {
     // ADD if value's refcount was 1.
     mutex_lock ml(*variable->mu());
     Tensor* var_tensor = variable->tensor();
+    OP_REQUIRES(context, var_tensor->shape().IsSameSize(value.shape()),
+                errors::InvalidArgument("Cannot update variable with shape ",
+                                        var_tensor->shape().DebugString(),
+                                        " using a Tensor with shape ",
+                                        value.shape().DebugString(),
+                                        ", shapes must be equal."));
     OP_REQUIRES_OK(context,
                    PrepareToUpdateVariable<Device, T>(context, var_tensor));
     functor::DenseUpdate<Device, T, Op> update_functor;
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 1365d4b240..a9fd93e9f8 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -142,7 +142,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       v = resource_variable_ops.ResourceVariable(1.0)
     ops.reset_default_graph()
     v.assign(2.0)  # Note: this fails if we run convert_to_tensor on not the
-                   # variable graph.
+    # variable graph.
 
   def testFetchHandle(self):
     with self.cached_session():
@@ -908,6 +908,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(Exception, r"shape.*2.*3"):
       state_ops.scatter_update(v, [0, 1], [0, 1, 2])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testAssignIncompatibleShape(self):
+    v = resource_variable_ops.ResourceVariable([0, 1, 2, 3])
+    self.evaluate(v.initializer)
+    with self.assertRaisesRegexp(Exception, r"hapes must be equal"):
+      self.assertAllEqual(self.evaluate(v.assign_add(1)), [1, 2, 3, 4])
+
 
 class _MixedPrecisionVariableTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 1fb84c2e41c454939a02a69093cb214673eab343 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 4 Oct 2018 09:26:19 -0700
Subject: [PATCH 0356/1085] Add ability to vectorize nodes that do not derive
 from function arguments. (This indirectly handles "Const" outputs
 automagically, since they are always unstacked.)

PiperOrigin-RevId: 215749824
---
 .../core/grappler/optimizers/data/BUILD       |   1 +
 .../optimizers/data/map_vectorization.cc      |   2 +-
 .../optimizers/data/vectorization_utils.cc    | 247 ++++++++++++++++-
 .../data/vectorization_utils_test.cc          | 251 ++++++++++++++++++
 .../optimization/map_vectorization_test.py    |   4 +
 5 files changed, 492 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 755af3361e..ee7c14e3ab 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -524,6 +524,7 @@ cc_library(
     deps = [
         ":function_utils",
         ":graph_utils",
+        "//tensorflow/cc:ops",
         "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index 9328a7ca99..ba521e79bc 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -44,7 +44,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
   // Function inputs and outputs are the same as original, just
   // with different shapes.
   *vectorized_func->mutable_signature() = orig_func.signature();
-  graph_utils::SetUniqueGraphFunctionName("vectorized_function", library,
+  graph_utils::SetUniqueGraphFunctionName("naively_vectorized_fn", library,
                                           vectorized_func);
 
   // Add MapDefun node
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 2d6cf562b1..344c420902 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,10 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
-#include <memory>
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
+#include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/device_base.h"
@@ -28,13 +28,13 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
-#include "tensorflow/core/lib/strings/scanner.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -45,6 +45,22 @@ namespace {
 // Describes a tensor with its operation Node and output position
 typedef std::pair<Node*, int> TensorDesc;
 
+// Equivalent to python Pfor's WrappedTensor struct
+struct WrappedTensor {
+  TensorDesc tensor;
+
+  // Whether the tensor is stacked, i.e. represents the results of applying
+  // the operation on all slices of the input, where each row i of the
+  // tensor corresponds to the op's output on slice i of the input. False
+  // if the tensor is not stacked, i.e. represents the result of the op on
+  // a single slice of the input, where the result does not vary between
+  // slices.
+  bool stacked;
+
+  WrappedTensor(TensorDesc&& tensor, bool stacked)
+      : tensor(std::move(tensor)), stacked(stacked) {}
+};
+
 const char* const kRetValOp = "_Retval";
 
 void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
@@ -132,7 +148,8 @@ class Vectorization {
                    const NodeDef& map_defun_node, FunctionDef** result);
 
  private:
-  // Converts FunctionDefs to Graphs.
+  // Converts FunctionDefs to Graphs and adds mappings from
+  // arg nodes and unstacked nodes to the corresponding nodes in outer_scope_.
   Status Initialize(const FunctionDef& outer_scope,
                     const NodeDef& map_defun_node);
 
@@ -162,9 +179,30 @@ class Vectorization {
   //    the conversion map.
   Status AddConversionMapping(Node* op_node);
 
-  // Maps a tensor to the corresponding vectorized tensor. For example,
-  // {"Cast" Node*, 0} -> {"Vectorize/Cast" Node*, 0}
-  std::map<TensorDesc, TensorDesc> conversion_map_;
+  // Given a tensor t in `unstacked`, stacks it by doing the equivalent of
+  // tf.tile(tf.expand_dims(t, 0), [n, 1, 1, ...]) where n is dimension 0 of
+  // inputs to `map_defun_node_`. This stacked tensor will be compatible with
+  // the expected output shape of `map_defun_node_`.
+  // This is equivalent to the _stack function in python Pfor.
+  Status StackTensor(WrappedTensor* unstacked, TensorDesc* result);
+
+  // Recursively looks for unstacked nodes in the `map_defun_fn_` graph by
+  // doing a depth-first search from the ret nodes. Lifts nodes that are
+  // unstacked (i.e. don't derive from arg nodes) into `outer_scope_` directly
+  // and add mappings to `conversion_map_`.
+  Status AddUnstackedNodeMappings();
+
+  // Recursive helper for `AddUnstackedNodeMappings`, returns true if tensor
+  // is unstacked.
+  bool AddUnstackedNodeMappingsHelper(TensorDesc&& tensor, Status* status);
+
+  // Add mappings from `map_defun_fn_` arg nodes to `map_defun_node_` input
+  // nodes to `conversion_map_`.
+  Status AddArgNodeMappings();
+
+  // Maps a tensor to the corresponding WrappedTensor. For example,
+  // {"Cast" Node*, 0} -> WrappedTensor({"Vectorize/Cast" Node*, 0}, true)
+  std::map<TensorDesc, WrappedTensor> conversion_map_;
 
   // Unconvertible ret nodes
   std::set<Node*> unconvertible_;
@@ -180,6 +218,10 @@ class Vectorization {
   std::unique_ptr<Graph> outer_scope_;
   std::unique_ptr<FunctionBody> map_defun_fn_;
   Node* map_defun_node_ = nullptr;  // Owned by `outer_scope`
+
+  // Caches the loop_len_node_ needed for tiling unstacked output. This
+  // corresponds to a vector with one element.
+  Node* loop_len_node_ = nullptr;  // Owned by `outer_scope`
   Status status_;
 };
 
@@ -224,7 +266,7 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
 
   // Add output mappings.
   for (size_t i = 0; i < op_node->num_outputs(); ++i) {
-    conversion_map_.insert({{op_node, i}, std::move(output_ports[i])});
+    conversion_map_.insert({{op_node, i}, {std::move(output_ports[i]), true}});
   }
 
   return Status::OK();
@@ -242,10 +284,22 @@ Status Vectorization::ConvertOutput(int output_position) {
   if (auto found = gtl::FindOrNull(conversion_map_, output)) {
     // It's possible the output already has a mapping, if it comes from a node
     // that has already been converted.
-    converted_output = *found;
+    if (found->stacked) {
+      converted_output = found->tensor;
+    } else {
+      // Some outputs may be unstacked if they don't derive from arg nodes
+      // (for example, if a function returns a constant). For these, we
+      // have to add extra nodes to tile it in the 0th dimension.
+      TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
+    }
   } else {
+    // Note: All unstacked nodes are converted ahead of time in `Initialize`,
+    // and here we assume that all op vectorizers create only stacked outputs.
+    // This may not hold in the future, as more vectorizers are added that
+    // may actually create unstacked outputs. For example, see the `Shape`
+    // converter in third_party/tensorflow/python/ops/parallel_for/pfor.py
     TF_RETURN_IF_ERROR(AddConversionMapping(output.first));
-    converted_output = conversion_map_.at(output);
+    converted_output = conversion_map_.at(output).tensor;
   }
 
   ReplaceEdgeSources({map_defun_node_, output_position}, converted_output,
@@ -297,6 +351,7 @@ void Vectorization::VectorizeHelper() {
     map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
+
 Status Vectorization::Initialize(const FunctionDef& outer_scope,
                                  const NodeDef& map_defun_node) {
   // Convert outer_scope and map_defun_fn to FunctionBodys so we can
@@ -337,16 +392,184 @@ Status Vectorization::Initialize(const FunctionDef& outer_scope,
   }
   map_defun_node_ = outer_scope_->FindNodeId(node_id);
 
-  // Add mappings from map_defun_fn_ arg nodes to map_defun_node_ input nodes to
-  // the conversion map
+  TF_RETURN_IF_ERROR(AddArgNodeMappings());
+
+  TF_RETURN_IF_ERROR(AddUnstackedNodeMappings());
+  loop_len_node_ = nullptr;
+
+  return Status::OK();
+}
+
+// TODO(rachelim): It might be profitable to use the C++ API for this instead of
+// NodeBuilder
+Status Vectorization::StackTensor(WrappedTensor* unstacked,
+                                  TensorDesc* result) {
+  // Note that all these nodes are necessary as the size of the batch may not be
+  // constant.
+  if (unstacked->stacked) {
+    return errors::Internal("Can only stack unstacked tensor.");
+  }
+
+  Graph* g = outer_scope_.get();
+  auto node_builder = [](StringPiece op) {
+    return NodeBuilder(strings::StrCat("vectorized/stack/", op), op);
+  };
+
+  auto make_const = [&node_builder](const Input::Initializer& val, Graph* graph,
+                                    Node** result) {
+    TF_RETURN_IF_ERROR(val.status);
+    return node_builder("Const")
+        .Attr("value", val.tensor)
+        .Attr("dtype", val.tensor.dtype())
+        .Finalize(graph, result);
+  };
+
+  // If loop_len_node_ hasn't been created yet, add the node and cache it.
+  if (loop_len_node_ == nullptr) {
+    Node* input_node;
+    TF_RETURN_IF_ERROR(map_defun_node_->input_node(0, &input_node));
+
+    Node* shape_node;
+    TF_RETURN_IF_ERROR(
+        node_builder("Shape").Input(input_node).Finalize(g, &shape_node));
+
+    Node* const_vec_0;
+    TF_RETURN_IF_ERROR(make_const({0}, g, &const_vec_0));
+    Node* const_vec_1;
+    TF_RETURN_IF_ERROR(make_const({1}, g, &const_vec_1));
+
+    Node* strided_slice_node;
+    TF_RETURN_IF_ERROR(node_builder("StridedSlice")
+                           .Input(shape_node)   // input
+                           .Input(const_vec_0)  // begin
+                           .Input(const_vec_1)  // end
+                           .Input(const_vec_1)  // strides
+                           .Finalize(g, &strided_slice_node));
+
+    // Produces a vector of length 1
+    TF_RETURN_IF_ERROR(node_builder("Reshape")
+                           .Input(strided_slice_node)  // tensor
+                           .Input(const_vec_1)         // shape
+                           .Finalize(g, &loop_len_node_));
+  }
+
+  Node* ones_shape;
+  TF_RETURN_IF_ERROR(node_builder("Shape")
+                         .Input(unstacked->tensor.first)  // input
+                         .Finalize(g, &ones_shape));
+
+  Node* ones;
+  TF_RETURN_IF_ERROR(
+      node_builder("OnesLike").Input(ones_shape).Finalize(g, &ones));
+
+  Node* const_0;
+  TF_RETURN_IF_ERROR(make_const(0, g, &const_0));
+
+  Node* multiples;
+  TF_RETURN_IF_ERROR(node_builder("Concat")
+                         .Input(const_0)                           // concat_dim
+                         .Input({{loop_len_node_, 0}, {ones, 0}})  // values
+                         .Finalize(g, &multiples));
+
+  Node* expand_dims;
+  TF_RETURN_IF_ERROR(node_builder("ExpandDims")
+                         .Input(unstacked->tensor.first)  // input
+                         .Input(const_0)                  // dim
+                         .Finalize(g, &expand_dims));
+
+  TF_RETURN_IF_ERROR(node_builder("Tile")
+                         .Input(expand_dims)  // input
+                         .Input(multiples)    // multiples
+                         .Finalize(g, &result->first));
+  result->second = 0;
+  return Status::OK();
+}
+
+Status Vectorization::AddArgNodeMappings() {
   for (auto arg_node : map_defun_fn_->arg_nodes) {
     Node* input_node;
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {input_node, 0}});
+    conversion_map_.insert({{arg_node, 0}, {{input_node, 0}, true}});
+
+    // Control inputs
+    conversion_map_.insert({{arg_node, Graph::kControlSlot},
+                            {{input_node, Graph::kControlSlot}, true}});
   }
+  return Status::OK();
+}
 
+bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
+                                                   Status* status) {
+  if (auto found = gtl::FindOrNull(conversion_map_, tensor)) {
+    return !found->stacked;
+  }
+
+  if (tensor.first->op_def().is_stateful()) {
+    // We don't lift stateful nodes directly out of the MapDefun, since they may
+    // have to be executed N times.
+    return false;
+  }
+
+  bool is_unstacked = true;
+  for (auto edge : tensor.first->in_edges()) {
+    // Ignore Source nodes. Note that these are also ignored in the
+    // GraphToFunctionDef conversion.
+    if (edge->src()->IsSource()) continue;
+
+    // A node is unstacked if all of its inputs are unstacked
+    is_unstacked &= AddUnstackedNodeMappingsHelper(
+        {edge->src(), edge->src_output()}, status);
+  }
+
+  if (!is_unstacked) {
+    return false;
+  }
+
+  // If the node is unstacked, we copy it into outer_scope_ and
+  // add it to the map. Note that we don't clean up the nodes that are copied
+  // in map_defun_fn_, and rely on them being pruned out later.
+  Node* node = outer_scope_->AddNode(tensor.first->def(), status);
+  if (!status->ok()) return true;
+
+  // Add input edges to nodes that should already have been lifted.
+  for (auto edge : tensor.first->in_edges()) {
+    // Ignore Source nodes. Note that these are also ignored in the
+    // GraphToFunctionDef conversion.
+    if (edge->src()->IsSource()) continue;
+
+    if (auto found = gtl::FindOrNull(conversion_map_,
+                                     {edge->src(), edge->src_output()})) {
+      outer_scope_->AddEdge(found->tensor.first, found->tensor.second, node,
+                            edge->dst_input());
+    } else {
+      status->Update(errors::Internal(
+          "Could not find input conversion even though we did depth first "
+          "conversion."));
+    }
+  }
+
+  // Add output mappings
+  for (int i = 0; i < tensor.first->num_outputs(); ++i) {
+    conversion_map_.insert(
+        {{tensor.first, i}, WrappedTensor({node, i}, false)});
+  }
+  conversion_map_.insert({{tensor.first, Graph::kControlSlot},
+                          WrappedTensor({node, Graph::kControlSlot}, false)});
+
+  return true;
+}
+
+Status Vectorization::AddUnstackedNodeMappings() {
+  SetVector<Node*> unstacked_nodes;
+  Status s;
+  for (const auto& ret_node : map_defun_fn_->ret_nodes) {
+    const Edge* in_edge = nullptr;
+    TF_RETURN_IF_ERROR(ret_node->input_edge(0, &in_edge));
+    AddUnstackedNodeMappingsHelper({in_edge->src(), in_edge->src_output()}, &s);
+    TF_RETURN_IF_ERROR(s);
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index 1ff62217dd..a958d706c1 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -670,6 +670,257 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
               cast_node.input(1) == control_input);
 }
 
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   |                        |   |
+// |   |           +------+     |   |
+// |   |           |Const |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |               +------+         |
+// |               |Const |         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// |               |Stack*|         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeConst) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2)},
+      {{"ret0", "Const:output:0"}});
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   |                        |   |
+// |   |           +------+     |   |
+// |   |           |Const |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   |           | Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |               +------+         |
+// |               |Const |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               | Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               |Stack*|         |
+// |               +---+--+         |
+// |                   |            |
+// |                   |            |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2)},
+      {{"ret0", "Cast:y:0"}});
+  AddCastNode("Cast", {"Const:output:0"}, DT_INT32, DT_INT64, false, &inner);
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int64"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  auto const_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Const", *vectorized));
+  auto cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
+  EXPECT_EQ(cast_node.input(0).substr(0, cast_node.input(0).find(':')),
+            const_node.name());
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +------+     |   |
+// |   |                        |   |
+// |   | +------+  +------+     |   |
+// |   | |Const |  |Const |     |   |
+// |   | +---+--+  +---+--+     |   |
+// |   |     :     +---v--+     |   |
+// |   |     ::::::> Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +------+         |
+// |                                |
+// |                                |
+// |               +------+         |
+// |     +------+  |Const |         |
+// |     |Const |  +---+--+         |
+// |     +---+--+      |            |
+// |         :     +---v--+         |
+// |         ::::::> Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               +Stack*+         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+// *Not actually a Stack node, but does the equivalent.
+//
+TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2),
+       FunctionDefHelper::Const("ConstDep", 3)},
+      {{"ret0", "Cast:y:0"}});
+  AddCastNode("Cast", {"Const:output:0", "^ConstDep"}, DT_INT32, DT_INT64,
+              false, &inner);
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int64"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+
+  FunctionDef* vectorized;
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+
+  auto find_const = [vectorized](int val) -> const NodeDef* {
+    for (const auto& n : vectorized->node_def()) {
+      if (n.attr().at("value").tensor().int_val(0) == val) {
+        return &n;
+      }
+    }
+    return nullptr;
+  };
+
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  auto const_node = find_const(2);
+  auto const_dep_node = find_const(3);
+  auto cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
+  EXPECT_EQ(cast_node.input(0).substr(0, cast_node.input(0).find(':')),
+            const_node->name());
+  EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
+}
+
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 32ebc49c40..971a2d94b9 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -78,6 +78,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("Basic", lambda x: (x, x + 1), None),
+      ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
@@ -207,6 +208,9 @@ class MapVectorizationBenchmark(test.Benchmark):
   def benchmarkAddConst(self):
     self._benchmark_helper(lambda *args: [x + 1 for x in args], "add_const")
 
+  def benchmarkReturnConst(self):
+    self._benchmark_helper(lambda *args: [constant_op.constant(2)], "ret_const")
+
   def benchmarkSelect(self):
     self._benchmark_helper(lambda *args: args[0], "select")
 
-- 
GitLab


From c2552cd33c05fa84f280e766e33ba01308ffbcb2 Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Thu, 4 Oct 2018 09:42:13 -0700
Subject: [PATCH 0357/1085]  Skip numeric checking in BROADCAST mode.

PiperOrigin-RevId: 215752559
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 3aa5b6efa1..8d15c857f8 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -177,14 +177,29 @@ def _create_or_get_iterations_per_loop():
           use_resource=True)
 
 
-def _sync_variables_ops():
-  # Gets the variables back from TPU nodes. This means the variables updated
-  # by TPU will now be *synced* to host memory.
-  return [
-      array_ops.check_numerics(v.read_value(),
-                               'Gradient for %s is NaN' % v.name).op
-      for v in variables.trainable_variables()
-  ]
+def _sync_variables_ops(ctx):
+  """Create varriables synchronization ops.
+
+  Gets the variables back from TPU nodes. This means the variables updated
+  by TPU will now be *synced* to host memory.
+  In BROADCAST mode, we skip this sync since the variables are ususally too
+  big to transmit via RPC.
+
+  Args:
+    ctx: A `_InternalTPUContext` instance with mode.
+
+  Returns:
+    A list of sync ops.
+  """
+
+  if not ctx.is_input_broadcast_with_iterators():
+    return [
+        array_ops.check_numerics(v.read_value(),
+                                 'Gradient for %s is NaN' % v.name).op
+        for v in variables.trainable_variables()
+    ]
+  else:
+    return [control_flow_ops.no_op()]
 
 
 def _increase_eval_step_op(iterations_per_loop):
@@ -2567,7 +2582,7 @@ class TPUEstimator(estimator_lib.Estimator):
 
           summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
           with ops.control_dependencies([loss]):
-            update_ops = _sync_variables_ops()
+            update_ops = _sync_variables_ops(ctx)
 
           # Validate the TPU training graph to catch basic errors
           _validate_tpu_training_graph()
@@ -2600,7 +2615,7 @@ class TPUEstimator(estimator_lib.Estimator):
             # After TPU evaluation computation is done (the mean_loss tensor),
             # reads all variables back from TPU and updates the eval step
             # counter properly
-            internal_ops_to_run = _sync_variables_ops()
+            internal_ops_to_run = _sync_variables_ops(ctx)
             internal_ops_to_run.append(
                 _increase_eval_step_op(iterations_per_loop_var))
             with ops.control_dependencies(internal_ops_to_run):
@@ -2645,7 +2660,7 @@ class TPUEstimator(estimator_lib.Estimator):
          scaffold, prediction_hooks) = _predict_on_tpu_system(
              ctx, model_fn_wrapper, dequeue_fn)
         with ops.control_dependencies([dummy_predict_op]):
-          internal_ops_to_run = _sync_variables_ops()
+          internal_ops_to_run = _sync_variables_ops(ctx)
           with ops.control_dependencies(internal_ops_to_run):
             dummy_predict_op = control_flow_ops.no_op()
 
-- 
GitLab


From 5e1b45d0a8aa3f268745cdc683c26d9ebdd1ea8b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 10:10:58 -0700
Subject: [PATCH 0358/1085] Automated rollback of commit
 f22037abf5a6f4581f5fb6013f72f91747f22965

PiperOrigin-RevId: 215757701
---
 tensorflow/compiler/jit/xla_device_context.cc    | 15 ++++-----------
 tensorflow/compiler/jit/xla_device_context.h     |  3 +--
 .../xla/service/generic_transfer_manager.cc      |  2 +-
 .../xla/service/generic_transfer_manager.h       |  7 +++----
 .../compiler/xla/service/transfer_manager.h      | 16 +---------------
 5 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index e083652978..af83c792e5 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -75,9 +75,8 @@ XlaTransferManager::XlaTransferManager(
   }
 }
 
-Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
-                                                   Tensor* device_tensor,
-                                                   bool buffer_is_fresh) const {
+Status XlaTransferManager::TransferLiteralToDevice(
+    const Tensor& host_tensor, Tensor* device_tensor) const {
   xla::Shape xla_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
                                            host_tensor.shape(), &xla_shape));
@@ -98,11 +97,8 @@ Status XlaTransferManager::TransferLiteralToDevice(const Tensor& host_tensor,
     // synchronized.
     host_to_device_stream_->ThenWaitFor(stream_.get());
   }
-  xla::TransferManager::TransferToDeviceHint hint =
-      buffer_is_fresh ? xla::TransferManager::kBufferUndefined
-                      : xla::TransferManager::kNoHint;
   TF_RETURN_IF_ERROR(transfer_manager_->TransferLiteralToDeviceAsync(
-      host_to_device_stream_.get(), *literal, shaped_buffer, hint));
+      host_to_device_stream_.get(), *literal, shaped_buffer));
   if (UseMultipleStreams()) {
     auto event = std::make_shared<se::Event>(stream_->parent());
     TF_RET_CHECK(event->Init()) << "Event failed to initialize!";
@@ -169,7 +165,6 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
     return;
   }
   TensorShape shape = shape_or_status.ValueOrDie();
-  bool buffer_is_fresh = false;
   if (!xla_tensor->has_shaped_buffer()) {
     Status s =
         xla_tensor->AllocateShapedBuffer(device_tensor->dtype(), shape, client_,
@@ -178,7 +173,6 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
       done(s);
       return;
     }
-    buffer_is_fresh = true;
   }
 
   Status status;
@@ -189,8 +183,7 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
           "Tensor::CopyFrom failed when copying from CPU to XLA device"));
       return;
     }
-    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor,
-                                     buffer_is_fresh);
+    status = TransferLiteralToDevice(reshaped_cpu_tensor, device_tensor);
   } else {
     se::DeviceMemoryBase dev_dst_ptr =
         XlaTensor::DeviceMemoryFromTensor(*device_tensor);
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index a4c0c296fc..df82421294 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -67,8 +67,7 @@ class XlaTransferManager {
 
  private:
   Status TransferLiteralToDevice(const Tensor& host_tensor,
-                                 Tensor* device_tensor,
-                                 bool buffer_is_fresh) const;
+                                 Tensor* device_tensor) const;
   void TransferLiteralFromDevice(Tensor* host_tensor,
                                  const Tensor& device_tensor,
                                  const StatusCallback& done) const;
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index f92fde7f46..bec02e14f9 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -98,7 +98,7 @@ Status GenericTransferManager::TransferLiteralFromDeviceInternal(
 
 Status GenericTransferManager::TransferLiteralToDeviceAsync(
     se::Stream* stream, const LiteralSlice& literal,
-    const ShapedBuffer& device_buffer, TransferToDeviceHint /*hint*/) {
+    const ShapedBuffer& device_buffer) {
   const Shape& shape = literal.shape();
   VLOG(2) << "transferring literal shape to device: "
           << ShapeUtil::HumanString(shape)
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index b1cba82b9f..86c8b1c145 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -45,10 +45,9 @@ class GenericTransferManager : public TransferManager {
                                  MutableBorrowingLiteral literal,
                                  std::function<void(Status)> done) override;
 
-  Status TransferLiteralToDeviceAsync(se::Stream* stream,
-                                      const LiteralSlice& literal,
-                                      const ShapedBuffer& device_buffer,
-                                      TransferToDeviceHint hint) override;
+  Status TransferLiteralToDeviceAsync(
+      se::Stream* stream, const LiteralSlice& literal,
+      const ShapedBuffer& device_buffer) override;
 
   Status TransferLiteralToInfeed(se::StreamExecutor* executor,
                                  const LiteralSlice& literal) override;
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index 9199e32d0f..f952e64af2 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -89,16 +89,6 @@ class TransferManager {
                                          const LiteralSlice& literal,
                                          const ShapedBuffer& device_buffer);
 
-  // Hint type given to TransferLiteralToDeviceAsync.
-  enum TransferToDeviceHint {
-    // No hint available.
-    kNoHint,
-
-    // The destination buffer is undefined on the device, meaning it can be
-    // transferred to eagerly rather than waiting for Stream ordering.
-    kBufferUndefined,
-  };
-
   // Transfers the given literal into the previously allocated device memory
   // represented by the given ShapedBuffer using the given executor. The shape
   // of the ShapedBuffer and DeviceShape(literal.shape()) must be compatible,
@@ -106,13 +96,9 @@ class TransferManager {
   //
   // This operation is performed asynchronously on the given stream. It returns
   // once the transfer is enqueued.
-  //
-  // The optional hint can allow implementations to optimize transfers. It is
-  // not mandatory for an implementation to obey the hint.
   virtual Status TransferLiteralToDeviceAsync(
       se::Stream* stream, const LiteralSlice& literal,
-      const ShapedBuffer& device_buffer,
-      TransferToDeviceHint hint = kNoHint) = 0;
+      const ShapedBuffer& device_buffer) = 0;
 
   // Convenience methods for transferring an array to or from the device at a
   // known address. This avoids having to construct a ShapedBuffer just to
-- 
GitLab


From 100714d9e5eb723525eb54142769f9bd8eec5edd Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Thu, 4 Oct 2018 10:11:56 -0700
Subject: [PATCH 0359/1085] Fix quantization util test to pass with defined
 behavior on 32-bit architectures.

PiperOrigin-RevId: 215757844
---
 .../contrib/lite/kernels/internal/quantization_util_test.cc     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
index 14281f25c6..25ea72b886 100644
--- a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
+++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc
@@ -259,7 +259,7 @@ TEST(QuantizationUtilTest, IntegerFrExpVersusDouble) {
   EXPECT_EQ(double_shift, 1);
 
   result = IntegerFrExp(123.45, &shift);
-  EXPECT_NEAR(result, (0.964453 * (1L << 31)), 1000);
+  EXPECT_NEAR(result, (0.964453 * (1LL << 31)), 1000);
   EXPECT_EQ(shift, 7);
   double_result = std::frexp(123.45, &double_shift);
   EXPECT_NEAR(double_result, 0.964453, 1e-5);
-- 
GitLab


From 8622f05a62948d8966be8962a6a33e0a8b5a116d Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 4 Oct 2018 10:17:02 -0700
Subject: [PATCH 0360/1085] Don't CHECK-fail on malformed graphs in deadness
 analysis

Instead return a friendlier failed Status from the following two methods which
used to CHECK-fail before:  GetIncomingPreds, FindUniqueBackedge.

While at it, also rename GetIncomingPreds to GetInputPreds to be consistent with
the variable names.

PiperOrigin-RevId: 215758757
---
 tensorflow/compiler/jit/deadness_analysis.cc | 77 ++++++++++++++------
 1 file changed, 55 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index e0b9932d80..b7ae7fbeb3 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
+#include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/hash/hash.h"
@@ -579,7 +580,8 @@ class DeadnessAnalysisImpl : public DeadnessAnalysis {
  private:
   enum class EdgeKind { kDataAndControl, kDataOnly, kControlOnly };
 
-  std::vector<Predicate*> GetIncomingPreds(Node* n, EdgeKind edge_kind);
+  Status GetInputPreds(Node* n, EdgeKind edge_kind,
+                       std::vector<Predicate*>* result);
 
   // Sets the predicate for output `output_idx` of `n` to `pred`.  Sets the i'th
   // bit of `should_revisit` if `pred` is different from the current predicate
@@ -625,9 +627,10 @@ TensorId InputEdgeToTensorId(const Edge* e) {
   return TensorId(e->src()->name(), e->src_output());
 }
 
-std::vector<Predicate*> DeadnessAnalysisImpl::GetIncomingPreds(
-    Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind) {
-  std::vector<Predicate*> incoming_preds;
+Status DeadnessAnalysisImpl::GetInputPreds(
+    Node* n, DeadnessAnalysisImpl::EdgeKind edge_kind,
+    std::vector<Predicate*>* result) {
+  result->clear();
   for (const Edge* in_edge : n->in_edges()) {
     bool should_process =
         edge_kind == EdgeKind::kDataAndControl ||
@@ -636,17 +639,27 @@ std::vector<Predicate*> DeadnessAnalysisImpl::GetIncomingPreds(
 
     if (should_process) {
       auto it = predicate_map_.find(InputEdgeToTensorId(in_edge));
-      CHECK(it != predicate_map_.end()) << n->name();
-      incoming_preds.push_back(it->second);
+      if (it == predicate_map_.end()) {
+        GraphCycles graph_cycles;
+        TF_RETURN_IF_ERROR(CreateCycleDetectionGraph(&graph_, &graph_cycles));
+
+        // If we didn't return with an error above then the graph is probably
+        // fine and we have a bug in deadness analysis.
+        return errors::Internal("Could not find input ", in_edge->DebugString(),
+                                " to ", n->name(),
+                                " when visiting the graph in post-order.  Most "
+                                "likely indicates a bug in deadness analysis.");
+      }
+      result->push_back(it->second);
     }
   }
-  return incoming_preds;
+  return Status::OK();
 }
 
 Status DeadnessAnalysisImpl::HandleSwitch(Node* n,
                                           std::vector<bool>* should_revisit) {
-  std::vector<Predicate*> input_preds =
-      GetIncomingPreds(n, EdgeKind::kDataAndControl);
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
   const Edge* pred_edge;
   TF_RETURN_IF_ERROR(n->input_edge(1, &pred_edge));
   Predicate* true_switch = predicate_factory_.MakeSymbolPredicate(
@@ -675,17 +688,31 @@ Status DeadnessAnalysisImpl::HandleSwitch(Node* n,
 }
 
 namespace {
-const Edge* FindUniqueBackedge(Node* merge) {
+Status CreateMultipleNextIterationInputsError(Node* merge) {
+  std::vector<string> backedges;
+  for (const Edge* backedge : merge->in_edges()) {
+    if (backedge->src()->IsNextIteration()) {
+      backedges.push_back(absl::StrCat("  ", SummarizeNode(*backedge->src())));
+    }
+  }
+  return errors::InvalidArgument(
+      "Multiple NextIteration inputs to merge node ", SummarizeNode(*merge),
+      ": \n", absl::StrJoin(backedges, "\n"),
+      "\nMerge nodes can have at most one incoming NextIteration edge.");
+}
+
+Status FindUniqueBackedge(Node* merge, const Edge** result) {
+  *result = nullptr;
   CHECK(merge->IsMerge());
-  const Edge* result = nullptr;
   for (const Edge* e : merge->in_edges()) {
     if (e->src()->IsNextIteration()) {
-      CHECK_EQ(result, nullptr)
-          << "Multiple backedges to " << merge->DebugString();
-      result = e;
+      if (*result != nullptr) {
+        return CreateMultipleNextIterationInputsError(merge);
+      }
+      *result = e;
     }
   }
-  return result;
+  return Status::OK();
 }
 
 // If `backedge_predicate` is equal to `symbolic_predicate` & Step where Step
@@ -764,9 +791,12 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n,
       return Status::OK();
     }
 
+    std::vector<Predicate*> input_preds;
+    TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataOnly, &input_preds));
+
     // We're visiting this merge for the first time and it is a acyclic merge.
-    Predicate* input_data_pred = predicate_factory_.MakeOrPredicate(
-        GetIncomingPreds(n, EdgeKind::kDataOnly));
+    Predicate* input_data_pred =
+        predicate_factory_.MakeOrPredicate(input_preds);
     SetPredicate(n, {0, 1, Graph::kControlSlot}, input_data_pred,
                  should_revisit);
     return Status::OK();
@@ -777,7 +807,9 @@ Status DeadnessAnalysisImpl::HandleMerge(Node* n,
     // of an unvisited backedge.  Try to pattern match the predicate expression
     // for that backedge (which should be visited now) into an and recurrence
     // for the merge node.
-    if (const Edge* unique_backedge = FindUniqueBackedge(n)) {
+    const Edge* unique_backedge;
+    TF_RETURN_IF_ERROR(FindUniqueBackedge(n, &unique_backedge));
+    if (unique_backedge) {
       if (Predicate* step = DeduceStepPredicate(
               &predicate_factory_, it->second,
               predicate_map_[InputEdgeToTensorId(unique_backedge)])) {
@@ -808,8 +840,8 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n,
                                         std::vector<bool>* should_revisit) {
   // In addition to being alive or dead based on the inputs, a _Recv can also
   // acquire a dead signal from a _Send.
-  std::vector<Predicate*> input_preds =
-      GetIncomingPreds(n, EdgeKind::kDataAndControl);
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
   input_preds.push_back(predicate_factory_.MakeSymbolPredicate(
       TensorId(n->name(), 0), /*must_be_true=*/false));
   SetPredicate(n, {0, Graph::kControlSlot},
@@ -821,8 +853,9 @@ Status DeadnessAnalysisImpl::HandleRecv(Node* n,
 Status DeadnessAnalysisImpl::HandleGeneric(Node* n,
                                            std::vector<bool>* should_revisit) {
   // Generally nodes are alive iff all their inputs are alive.
-  Predicate* pred = predicate_factory_.MakeAndPredicate(
-      GetIncomingPreds(n, EdgeKind::kDataAndControl));
+  std::vector<Predicate*> input_preds;
+  TF_RETURN_IF_ERROR(GetInputPreds(n, EdgeKind::kDataAndControl, &input_preds));
+  Predicate* pred = predicate_factory_.MakeAndPredicate(input_preds);
   for (int output_idx = 0; output_idx < n->num_outputs(); output_idx++) {
     SetPredicate(n, output_idx, pred, should_revisit);
   }
-- 
GitLab


From 8ac087482f7224273fb6697a66191b2661e86477 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Thu, 4 Oct 2018 10:27:57 -0700
Subject: [PATCH 0361/1085] Add tensorflow_estimator pip package to
 install_pip_packages.sh

We will need this for remote-build presubmits to pass.

PiperOrigin-RevId: 215760872
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 7f293e8604..329d05342a 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -124,6 +124,10 @@ pip3 install keras_preprocessing==1.0.5 --no-deps
 pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
 
+# Estimator
+pip2 install tensorflow_estimator --no-deps
+pip3 install tensorflow_estimator --no-deps
+
 # Install last working version of setuptools.
 pip2 install --upgrade setuptools==39.1.0
 pip3 install --upgrade setuptools==39.1.0
-- 
GitLab


From 419fff9de94ea9573f2e368fd6a68fdf54c59bab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 10:44:58 -0700
Subject: [PATCH 0362/1085] Implement LiteralBase::Slice for all primitive type

PiperOrigin-RevId: 215764305
---
 tensorflow/compiler/xla/literal.cc | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index deeb140b8f..177f39cc74 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -727,16 +727,34 @@ Literal LiteralBase::Slice(absl::Span<const int64> start_indices,
       ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions,
                                      LayoutUtil::MinorToMajor(shape()));
   switch (result_shape.element_type()) {
-    case F32:
-      return SliceInternal<float>(result_shape, start_indices);
+    case PRED:
+      return SliceInternal<bool>(result_shape, start_indices);
+    case U8:
+      return SliceInternal<uint8>(result_shape, start_indices);
+    case U16:
+      return SliceInternal<uint16>(result_shape, start_indices);
+    case U32:
+      return SliceInternal<uint32>(result_shape, start_indices);
+    case U64:
+      return SliceInternal<uint64>(result_shape, start_indices);
+    case S8:
+      return SliceInternal<int8>(result_shape, start_indices);
+    case S16:
+      return SliceInternal<int16>(result_shape, start_indices);
+    case S32:
+      return SliceInternal<int32>(result_shape, start_indices);
+    case S64:
+      return SliceInternal<int64>(result_shape, start_indices);
+    case F16:
+      return SliceInternal<half>(result_shape, start_indices);
     case BF16:
       return SliceInternal<bfloat16>(result_shape, start_indices);
+    case F32:
+      return SliceInternal<float>(result_shape, start_indices);
+    case F64:
+      return SliceInternal<double>(result_shape, start_indices);
     case C64:
       return SliceInternal<complex64>(result_shape, start_indices);
-    case S32:
-      return SliceInternal<int32>(result_shape, start_indices);
-    case U32:
-      return SliceInternal<uint32>(result_shape, start_indices);
     default:
       LOG(FATAL) << "not yet implemented: "
                  << PrimitiveType_Name(result_shape.element_type());
-- 
GitLab


From 5e9bd578802fcfff5de9729332eea4ae85c05c9e Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 4 Oct 2018 10:46:16 -0700
Subject: [PATCH 0363/1085] [tf.data] Fix C++ shape inference for
 `Dataset.concatenate()`.

Previously, we were returning an unknown shape in
`Dataset::output_shapes()` for the "most specific compatible shape"
between the two inputs. While this does not cause correctness problems
(since the unknown shape *is* compatible), we gain the ability to
raise errors earlier when more shape information is available.

PiperOrigin-RevId: 215764530
---
 tensorflow/core/kernels/data/concatenate_dataset_op.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
index a04f150e71..9607e9444c 100644
--- a/tensorflow/core/kernels/data/concatenate_dataset_op.cc
+++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc
@@ -171,16 +171,16 @@ class ConcatenateDatasetOp : public BinaryDatasetOpKernel {
 
     static PartialTensorShape MostSpecificCompatibleShape(
         const PartialTensorShape& ts1, const PartialTensorShape& ts2) {
-      PartialTensorShape output_tensorshape;
       if (ts1.dims() != ts2.dims() || ts1.unknown_rank() || ts2.unknown_rank())
-        return output_tensorshape;
+        return PartialTensorShape();
+      PartialTensorShape output_tensorshape({});
       auto dims1 = ts1.dim_sizes();
       auto dims2 = ts2.dim_sizes();
       for (int d = 0; d < ts1.dims(); d++) {
         if (dims1[d] == dims2[d])
-          output_tensorshape.Concatenate(dims1[d]);
+          output_tensorshape.AddDim(dims1[d]);
         else
-          output_tensorshape.Concatenate(-1);
+          output_tensorshape.AddDim(-1);
       }
       return output_tensorshape;
     }
-- 
GitLab


From e1a8f4b03df2ef84538c01788b6043eb723cd046 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:04:41 -0700
Subject: [PATCH 0364/1085] Automated rollback of commit
 8dc7bc7764150253c03a666eee84fc48f867d6a2

PiperOrigin-RevId: 215768310
---
 .../toco/graph_transformations/resolve_constant_binary.cc | 8 --------
 .../resolve_constant_concatenation.cc                     | 7 -------
 .../graph_transformations/resolve_constant_fake_quant.cc  | 7 -------
 .../toco/graph_transformations/resolve_constant_fill.cc   | 7 -------
 .../toco/graph_transformations/resolve_constant_gather.cc | 8 --------
 .../toco/graph_transformations/resolve_constant_pack.cc   | 8 --------
 .../resolve_constant_random_uniform.cc                    | 7 -------
 .../toco/graph_transformations/resolve_constant_range.cc  | 8 --------
 .../graph_transformations/resolve_constant_reshape.cc     | 7 -------
 .../toco/graph_transformations/resolve_constant_select.cc | 8 --------
 .../resolve_constant_shape_or_rank.cc                     | 8 --------
 .../toco/graph_transformations/resolve_constant_slice.cc  | 8 --------
 .../resolve_constant_strided_slice.cc                     | 8 --------
 .../toco/graph_transformations/resolve_constant_tile.cc   | 7 -------
 .../graph_transformations/resolve_constant_transpose.cc   | 8 --------
 .../toco/graph_transformations/resolve_constant_unary.cc  | 8 --------
 16 files changed, 122 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index 3e57d3f467..f7e5aa6609 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -191,14 +191,6 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, binary_op->outputs[0])) {
-    return false;
-  }
-
   // Test for binary ops of types that we know how to resolve
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index c6c5035a51..d916ae0ddf 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -144,13 +144,6 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, concat_op->outputs[0])) {
-    return false;
-  }
-
   for (const string& input_name : concat_op->inputs) {
     // We only expect constant unquantized arrays as input, otherwise we return.
     // We  also make sure the shapes of the input arrays are known and they are
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index 3d797533c9..f5f2f77460 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -69,13 +69,6 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   const auto* fakequant_op =
       static_cast<const FakeQuantOperator*>(fakequant_base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) {
-    return false;
-  }
-
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
     return false;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index 2cb1e64f3a..f6f95481b5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -52,13 +52,6 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 4dfe203a25..36d7dad0ce 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -71,14 +71,6 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index 6f44025dd4..e86616574d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -59,14 +59,6 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index c9f2b95d09..88d06d7dc7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -70,13 +70,6 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index e347286dd4..1a0ba9e2bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -28,14 +28,6 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto* op = static_cast<RangeOperator*>(base_op);
 
   CHECK_EQ(op->inputs.size(), 3);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index bfdaa8aafd..a6f665b5f0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -33,13 +33,6 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index 3a95d39cd4..e880a3f44d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -37,14 +37,6 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 3);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 452bef1f16..8a0e3e8995 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -27,14 +27,6 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index 58d6797e1c..b35c3e19c4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -96,14 +96,6 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index e275447a0c..8853ed87e6 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -114,14 +114,6 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
       static_cast<const StridedSliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 378a38f14b..5cfa1a5582 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -105,13 +105,6 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index 5d3f4a6240..fe15dfa06f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -111,14 +111,6 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, op->outputs[0])) {
-    return false;
-  }
-
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index e35ed0898b..c698a9567a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -48,14 +48,6 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
 bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
-
-  // If the output of this op is a non-discardable array such as an input_array
-  // or a state array of the model, then this is a job for RemoveUnusedOp, not
-  // for constants-propagation.
-  if (!IsDiscardableArray(*model, unary_op->outputs[0])) {
-    return false;
-  }
-
   // Test for unary ops of types that we know how to resolve.
   switch (unary_op->type) {
     case OperatorType::kCast:
-- 
GitLab


From 6850dafeeaaa48efa748134688844bd079ef3949 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:09:52 -0700
Subject: [PATCH 0365/1085] collective_param_resolver_local.cc: delete
 DCHECK(!ir->out_mu.try_lock()); in a lambda

UNLOCK_FUNCTION(ir->out_mu) annotates that the lock is held on entry.
try_lock() should not be called.

PiperOrigin-RevId: 215769341
---
 .../core/common_runtime/collective_param_resolver_local.cc       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
index 3b2dc6a050..7cb90de3c7 100644
--- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc
+++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc
@@ -522,7 +522,6 @@ void CollectiveParamResolverLocal::CallInitInstanceSharedParams(
   InitInstanceSharedParams(
       gr, cp, ir,
       [this, ir, done](const Status& s) UNLOCK_FUNCTION(ir->out_mu) {
-        DCHECK(!ir->out_mu.try_lock());
         DCHECK(ir->out_mu_available);
         ir->status.Update(s);
         ir->out_mu.unlock();
-- 
GitLab


From c8d5054e8c12800f0c3db0e51f3d5902e04eaa37 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 4 Oct 2018 11:24:41 -0700
Subject: [PATCH 0366/1085] Roll forward change "Skip control flow
 functionalization if there is no Switch or Merge node.".

PiperOrigin-RevId: 215772272
---
 .../tf2xla/functionalize_control_flow.cc      | 129 ++++++++++++------
 .../core/common_runtime/constant_folding.cc   |  37 ++---
 .../core/common_runtime/constant_folding.h    |   4 +
 .../core/common_runtime/graph_optimizer.cc    |   5 +-
 .../core/common_runtime/graph_optimizer.h     |   5 +-
 5 files changed, 122 insertions(+), 58 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 36c6f5d316..28e09d7b79 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, string>* canonicalized_name_to_new_name) {
+    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
+    bool* modified) {
+  *modified = false;
+
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -92,6 +95,19 @@ Status FunctionalizeControlFlowForFunction(
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
 
+  // Check if the graph has Switch or Merge node before optimizing the graph.
+  bool has_switch_or_merge = false;
+  for (Node* n : body->graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
+  // We cannot return here directly if the graph has no Switch/Merge.
+  // It might contain function call nodes, or If/While nodes with Switch/Merge
+  // in function body. We still need to rewrite those functions and modify
+  // corresponding nodes.
+
   // Call graph optimizer. The most important optimization we need is constant
   // folding, which will replace ops like Shape/BroadcastGradientArgs with
   // constant shape input. Without this optimization, those ops might become
@@ -129,6 +145,13 @@ Status FunctionalizeControlFlowForFunction(
         absl::StrCat("functionalize_control_flow_after_opt_", func_name),
         *optimized_graph, fld);
   }
+  // Some inlined functions might have Switch/Merge nodes.
+  for (Node* n : optimized_graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
+  }
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -151,10 +174,15 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
+      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already functionalized this function, skip functionalization
-        // but still rewrite the node.
-        new_name = iter->second;
+        // If we already processed this function, check if it was rewritten. If
+        // the function was rewritten, the entry will be non-empty. Otherwise
+        // the entry will be empty.
+        function_modified = iter->second.has_value();
+        if (function_modified) {
+          new_name = iter->second.value();
+        }
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -166,42 +194,62 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name));
-        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+            canonicalized_name_to_new_name, &function_modified));
+        if (function_modified) {
+          // If the function was rewritten, add an non-empty entry. So later we
+          // know we have processed this function, and it was rewritten into
+          // another function.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+        } else {
+          // If the function was not rewritten, add an empty entry. So later
+          // we know we have processed this function, and it does not need to be
+          // rewritten.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
+        }
+      }
+      if (function_modified) {
+        *modified = true;
+
+        // Notice that if "n" is a function call, RewriteAssociatedFunction()
+        // will delete it and create a new node instead, making "n" an invalid
+        // pointer. That's fine because in that case, associated_functions will
+        // only have one member and the loop will only run once.
+        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+            optimized_graph.get(), n, fld, associated_function, new_name));
       }
-      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
-      // delete it and create a new node instead, making "n" an invalid pointer.
-      // That's fine because in that case, associated_functions will only have
-      // one member and the loop will only run once.
-      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Functionalize the function body.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *optimized_graph, fld);
-  }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *optimized_graph, fld);
+  if (has_switch_or_merge) {
+    *modified = true;
+
+    // Functionalize the function body.
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+          *optimized_graph, fld);
+    }
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
+          *optimized_graph, fld);
+    }
   }
-  FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                        &functionalized_fdef));
 
-  // Add rewritten FunctionDef into library.
-  if (func_name == new_func_name) {
-    VLOG(2) << "Replacing function " << func_name;
-    TF_RETURN_IF_ERROR(
-        fld->ReplaceFunction(new_func_name, functionalized_fdef));
-  } else {
-    VLOG(2) << "Adding function " << new_func_name;
-    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+  if (*modified) {
+    // Add rewritten FunctionDef into library.
+    FunctionDef functionalized_fdef;
+    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
+                                          &functionalized_fdef));
+    if (func_name == new_func_name) {
+      VLOG(2) << "Replacing function " << func_name;
+      TF_RETURN_IF_ERROR(
+          fld->ReplaceFunction(new_func_name, functionalized_fdef));
+    } else {
+      VLOG(2) << "Adding function " << new_func_name;
+      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+    }
   }
 
   return ret_status;
@@ -227,7 +275,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, string> canonicalized_name_to_new_name;
+  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -242,12 +290,15 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
+      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name));
-      n->ClearAttr(func_attr);
-      func.set_name(new_func_name);
-      n->AddAttr(func_attr, func);
+          &canonicalized_name_to_new_name, &modified));
+      if (modified) {
+        n->ClearAttr(func_attr);
+        func.set_name(new_func_name);
+        n->AddAttr(func_attr, func);
+      }
     }
   }
 
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 419867ff58..db137f1a19 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -466,7 +466,7 @@ Graph* GetConstantGraph(
 bool ReplaceTensorWithConstant(
     Graph* graph, Device* partition_device, NodeAndOutput tensor,
     const Tensor& constant, const gtl::FlatSet<Node*>& control_deps,
-    int64 max_constant_size_in_bytes,
+    int64 max_constant_size_in_bytes, bool disable_memory_output_type_check,
     const ConstantFoldNameGenerator& generate_new_name) {
   // Be conservative when replacing a tensor with a constant, when not
   // running on CPU.
@@ -535,21 +535,23 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
-  if (partition_device && device_type != DEVICE_CPU) {
-    MemoryType original_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
-                             &original_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    MemoryType const_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
-                             &const_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    if (original_output_memory_type != const_output_memory_type) {
-      return false;
+  if (!disable_memory_output_type_check) {
+    if (partition_device && device_type != DEVICE_CPU) {
+      MemoryType original_output_memory_type;
+      if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
+                               &original_output_memory_type)
+               .ok()) {
+        return false;
+      }
+      MemoryType const_output_memory_type;
+      if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
+                               &const_output_memory_type)
+               .ok()) {
+        return false;
+      }
+      if (original_output_memory_type != const_output_memory_type) {
+        return false;
+      }
     }
   }
   for (auto edge : edges_to_remove) {
@@ -658,7 +660,8 @@ Status ConstantFold(const ConstantFoldingOptions& opts,
         constant_control_deps[tensors_to_replace[c].first];
     if (ReplaceTensorWithConstant(
             graph, partition_device, tensors_to_replace[c], outputs[c],
-            control_deps, opts.max_constant_size_in_bytes, generate_new_name)) {
+            control_deps, opts.max_constant_size_in_bytes,
+            opts.disable_memory_output_type_check, generate_new_name)) {
       ++num_nodes_replaced;
     }
   }
diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h
index a9a84f761b..4c71b7bd27 100644
--- a/tensorflow/core/common_runtime/constant_folding.h
+++ b/tensorflow/core/common_runtime/constant_folding.h
@@ -45,6 +45,10 @@ struct ConstantFoldingOptions {
   // optimization.
   int64 max_constant_size_in_bytes = 10 * 1024 * 1024;
 
+  // If disable_memory_output_type_check is true, we will disable output memory
+  // type check for constant node replacement.
+  bool disable_memory_output_type_check = false;
+
   // A generator for the name suffix of constant folded nodes. A
   // default id generator that monotonically increases is used if nullptr is
   // passed.
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 37a979a8f1..91194bc86f 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -39,7 +39,8 @@ void GraphOptimizer::Optimize(
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
     const std::function<bool(const Node*)>& cse_consider_fn,
-    const std::function<bool(const Node*)>& cf_consider_fn) {
+    const std::function<bool(const Node*)>& cf_consider_fn,
+    bool cf_disable_memory_output_type_check) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -64,6 +65,8 @@ void GraphOptimizer::Optimize(
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = shape_map;
       cf_opts.consider = cf_consider_fn;
+      cf_opts.disable_memory_output_type_check =
+          cf_disable_memory_output_type_check;
       if (opts_.max_folded_constant_in_bytes() > 0) {
         cf_opts.max_constant_size_in_bytes =
             opts_.max_folded_constant_in_bytes();
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index 789cc56942..8954e9612d 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -47,13 +47,16 @@ class GraphOptimizer {
   // returns true will be considered for CSE.
   // If cf_consider_fn is not null then only nodes for which cf_consider_fn
   // returns true will be considered for CF.
+  // If cf_disable_memory_output_type_check is true, CF will discard output
+  // memory type check for constant node replacement.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
       const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
-      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
+      const std::function<bool(const Node*)>& cf_consider_fn = nullptr,
+      bool cf_disable_memory_output_type_check = false);
 
   const OptimizerOptions& options() { return opts_; }
 
-- 
GitLab


From 700c3325311e16be9bb4856cbf944d1871ff35c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:30:52 -0700
Subject: [PATCH 0367/1085] Add "encoding" attribute to string substr op, which
 controls how each "character" is treated:   * BYTE: Position & length refer
 to bytes in the string.  (Default)   * UTF8: The string is interpreted as
 UTF-8 encoded Unicode code points, and position & length are treated relative
 to them.

RELNOTES: Add option to get substring using Unicode characters
PiperOrigin-RevId: 215773373
---
 .../api_def/base_api/api_def_Substr.pbtxt     |  10 +
 .../api_def/python_api/api_def_Substr.pbtxt   |   8 +-
 tensorflow/core/kernels/BUILD                 |   7 +-
 tensorflow/core/kernels/string_util.cc        |   4 -
 tensorflow/core/kernels/string_util.h         |  44 ++
 tensorflow/core/kernels/substr_op.cc          | 162 +++++-
 tensorflow/core/kernels/substr_op_test.cc     | 100 +++-
 tensorflow/core/ops/string_ops.cc             |   1 +
 .../python/kernel_tests/substr_op_test.py     | 503 ++++++++++++------
 tensorflow/python/ops/string_ops.py           |  16 +
 .../tools/api/golden/v1/tensorflow.pbtxt      |   2 +-
 .../api/golden/v1/tensorflow.strings.pbtxt    |   2 +-
 .../tools/api/golden/v2/tensorflow.pbtxt      |   2 +-
 .../api/golden/v2/tensorflow.strings.pbtxt    |   2 +-
 14 files changed, 655 insertions(+), 208 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
index 5246090ab3..fe0fcc9508 100644
--- a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
@@ -16,6 +16,16 @@ END
     name: "len"
     description: <<END
 Scalar defining the number of characters to include in each substring
+END
+  }
+  attr {
+    name: "unit"
+    description: <<END
+The unit that is used to create the substring.  One of: `"BYTE"` (for
+defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
+encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
+`unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
+UTF-8.
 END
   }
   out_arg {
diff --git a/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
index 4778d7927c..4fb9ee56e9 100644
--- a/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Substr.pbtxt
@@ -1,10 +1,4 @@
 op {
   graph_op_name: "Substr"
-  endpoint {
-    name: "strings.substr"
-  }
-  endpoint {
-    name: "substr"
-    deprecated: true
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 9439ab332c..3a920f26f3 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -4458,7 +4458,12 @@ cc_library(
     name = "string_util",
     srcs = ["string_util.cc"],
     hdrs = ["string_util.h"],
-    deps = ["//tensorflow/core:lib"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "@icu//:common",
+    ],
 )
 
 STRING_DEPS = [
diff --git a/tensorflow/core/kernels/string_util.cc b/tensorflow/core/kernels/string_util.cc
index 3a9803a052..92c73220d8 100644
--- a/tensorflow/core/kernels/string_util.cc
+++ b/tensorflow/core/kernels/string_util.cc
@@ -16,10 +16,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/errors.h"
 
-namespace {
-inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; }
-}  // namespace
-
 namespace tensorflow {
 
 // Sets unit value based on str.
diff --git a/tensorflow/core/kernels/string_util.h b/tensorflow/core/kernels/string_util.h
index 390cf57702..d40e93ea33 100644
--- a/tensorflow/core/kernels/string_util.h
+++ b/tensorflow/core/kernels/string_util.h
@@ -30,6 +30,9 @@ enum class UnicodeEncoding { UTF8 };
 // TODO(edloper): Add support for: UTF32_CHAR, etc.
 enum class CharUnit { BYTE, UTF8_CHAR };
 
+// Whether or not the given byte is the trailing byte of a UTF-8/16/32 char.
+inline bool IsTrailByte(char x) { return static_cast<signed char>(x) < -0x40; }
+
 // Sets `encoding` based on `str`.
 Status ParseUnicodeEncoding(const string& str, UnicodeEncoding* encoding);
 
@@ -40,6 +43,47 @@ Status ParseCharUnit(const string& str, CharUnit* unit);
 // Result may be incorrect if the input string is not valid UTF-8.
 int32 UTF8StrLen(const string& string);
 
+// Get the next UTF8 character position starting at the given position and
+// skipping the given number of characters. Position is a byte offset, and
+// should never be `null`. The function return true if successful. However, if
+// the end of the string is reached before the requested characters, then the
+// position will point to the end of string and this function will return false.
+template <typename T>
+bool ForwardNUTF8CharPositions(const StringPiece in,
+                               const T num_utf8_chars_to_shift, T* pos) {
+  const size_t size = in.size();
+  T utf8_chars_counted = 0;
+  while (utf8_chars_counted < num_utf8_chars_to_shift && *pos < size) {
+    // move forward one utf-8 character
+    do {
+      ++*pos;
+    } while (IsTrailByte(in[*pos]) && *pos < size);
+    ++utf8_chars_counted;
+  }
+  return utf8_chars_counted == num_utf8_chars_to_shift;
+}
+
+// Get the previous UTF8 character position starting at the given position and
+// skipping the given number of characters. Position is a byte offset with a
+// positive value, relative to the beginning of the string, and should never be
+// `null`. The function return true if successful. However, if the beginning of
+// the string is reached before the requested character, then the position will
+// point to the beginning of the string and this function will return false.
+template <typename T>
+bool BackNUTF8CharPositions(const StringPiece in,
+                            const T num_utf8_chars_to_shift, T* pos) {
+  const size_t start = 0;
+  T utf8_chars_counted = 0;
+  while (utf8_chars_counted < num_utf8_chars_to_shift && (*pos > start)) {
+    // move back one utf-8 character
+    do {
+      --*pos;
+    } while (IsTrailByte(in[*pos]) && *pos > start);
+    ++utf8_chars_counted;
+  }
+  return utf8_chars_counted == num_utf8_chars_to_shift;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index 07f1d6e767..93c427039d 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/string_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
@@ -37,7 +38,11 @@ namespace tensorflow {
 template <typename T>
 class SubstrOp : public OpKernel {
  public:
-  using OpKernel::OpKernel;
+  explicit SubstrOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    string unit;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("unit", &unit));
+    OP_REQUIRES_OK(ctx, ParseCharUnit(unit, &unit_));
+  }
 
   void Compute(OpKernelContext* context) override {
     // Get inputs
@@ -69,11 +74,23 @@ class SubstrOp : public OpKernel {
             tensorflow::internal::SubtleMustCopy(len_tensor.scalar<T>()());
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
           StringPiece in(input(i));
-          OP_REQUIRES(
-              context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-              errors::InvalidArgument("pos ", pos, " out of range for string",
-                                      "b'", in, "' at index ", i));
-          StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+          T byte_pos = pos;
+          T byte_len = len;
+          switch (unit_) {
+            case CharUnit::UTF8_CHAR:
+              OP_REQUIRES(
+                  context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string at index ", i));
+              break;
+            case CharUnit::BYTE:
+              byte_pos = AdjustedPosIndex(byte_pos, in);
+              OP_REQUIRES(
+                  context, FastBoundsCheck(byte_pos, in.size() + 1),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string b'", in, "' at index ", i));
+          }
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       } else {
@@ -84,11 +101,23 @@ class SubstrOp : public OpKernel {
           StringPiece in(input(i));
           const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i));
           const T len = tensorflow::internal::SubtleMustCopy(len_flat(i));
-          OP_REQUIRES(
-              context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-              errors::InvalidArgument("pos ", pos, " out of range for string",
-                                      "b'", in, "' at index ", i));
-          StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+          T byte_pos = pos;
+          T byte_len = len;
+          switch (unit_) {
+            case CharUnit::UTF8_CHAR:
+              OP_REQUIRES(
+                  context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string at index ", i));
+              break;
+            case CharUnit::BYTE:
+              byte_pos = AdjustedPosIndex(byte_pos, in);
+              OP_REQUIRES(
+                  context, FastBoundsCheck(byte_pos, in.size() + 1),
+                  errors::InvalidArgument("pos ", pos, " out of range for ",
+                                          "string b'", in, "' at index ", i));
+          }
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       }
@@ -151,12 +180,24 @@ class SubstrOp : public OpKernel {
             StringPiece in(input_bcast(i));
             const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i));
             const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i));
-            OP_REQUIRES(
-                context,
-                FastBoundsCheck(std::abs(pos), input_bcast(i).size() + 1),
-                errors::InvalidArgument("pos ", pos, " out of range for string",
-                                        "b'", in, "' at index ", i));
-            StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+            T byte_pos = pos;
+            T byte_len = len;
+            switch (unit_) {
+              case CharUnit::UTF8_CHAR:
+                OP_REQUIRES(
+                    context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                    errors::InvalidArgument("pos ", pos, " out of range for ",
+                                            "string at index ", i));
+                break;
+              case CharUnit::BYTE:
+                byte_pos = AdjustedPosIndex(byte_pos, in);
+                OP_REQUIRES(
+                    context,
+                    FastBoundsCheck(byte_pos, input_bcast(i).size() + 1),
+                    errors::InvalidArgument("pos ", pos, " out of range for ",
+                                            "string b'", in, "' at index ", i));
+            }
+            StringPiece sub_in = in.substr(byte_pos, byte_len);
             output(i).assign(sub_in.data(), sub_in.size());
           }
           break;
@@ -205,12 +246,24 @@ class SubstrOp : public OpKernel {
                   tensorflow::internal::SubtleMustCopy(pos_bcast(i, j));
               const T len =
                   tensorflow::internal::SubtleMustCopy(len_bcast(i, j));
-              OP_REQUIRES(
-                  context, FastBoundsCheck(std::abs(pos), in.size() + 1),
-                  errors::InvalidArgument("pos ", pos, " out of range for ",
-                                          "string b'", in, "' at index (", i,
-                                          ", ", j, ")"));
-              StringPiece sub_in = in.substr(AdjustedPosIndex(pos, in), len);
+              T byte_pos = pos;
+              T byte_len = len;
+              switch (unit_) {
+                case CharUnit::UTF8_CHAR:
+                  OP_REQUIRES(
+                      context, UpdatePosAndLenForUtf8(in, &byte_pos, &byte_len),
+                      errors::InvalidArgument("pos ", pos, " out of range for ",
+                                              "string at index ", i));
+                  break;
+                case CharUnit::BYTE:
+                  byte_pos = AdjustedPosIndex(byte_pos, in);
+                  OP_REQUIRES(
+                      context, FastBoundsCheck(byte_pos, in.size() + 1),
+                      errors::InvalidArgument("pos ", pos, " out of range for ",
+                                              "string b'", in, "' at index (",
+                                              i, ", ", j, ")"));
+              }
+              StringPiece sub_in = in.substr(byte_pos, byte_len);
               output(i, j).assign(sub_in.data(), sub_in.size());
             }
           }
@@ -227,12 +280,73 @@ class SubstrOp : public OpKernel {
  private:
   // This adjusts the requested position. Note it does not perform any bound
   // checks.
-  T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
+  static inline T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
     if (pos_requested < 0) {
       return s.size() + pos_requested;
     }
     return pos_requested;
   }
+
+  // Return true if successful; otherwise, return false if the `pos` argument
+  // is out of range in the string.
+  static inline bool UpdatePosAndLenForUtf8(const StringPiece in, T* pos,
+                                            T* len) {
+    if (*pos >= 0) {
+      return UpdatePositivePosAndLenForUtf8(in, *pos, *len, pos, len);
+    } else {
+      return UpdateNegativePosAndLenForUtf8(in, *pos, *len, pos, len);
+    }
+  }
+
+  static bool UpdatePositivePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
+    *char_pos = 0;
+    // Determine byte position of the substring start.
+    if (!ForwardNUTF8CharPositions(in, pos, char_pos)) {
+      return false;
+    }
+    // Determine position of the end of the substring.
+    // The length will be capped at the end of the string, and we ignore whether
+    // the string had enough characters to handle it or not.
+    *char_len = *char_pos;
+    ForwardNUTF8CharPositions(in, len, char_len);
+    // The length in bytes is the position end of the substring less the start.
+    *char_len = *char_len - *char_pos;
+    return true;
+  }
+
+  // This function expects a negative position relative to the end of the
+  // string, but will update the character position to a positive number
+  // relative to the beginning of the string.
+  static bool UpdateNegativePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
+    // Initially treat the length as position of the end of the substring.
+    *char_len = in.size();
+    // This is the number of character to skip from the end of the string to
+    // arrive at the position where the substring should end.
+    T utf8_chars_to_skip = -pos - len;
+    if (utf8_chars_to_skip < 0) {
+      utf8_chars_to_skip = 0;
+    }
+    // Find the byte position where the substring should end using the computed
+    // number of characters to skip.
+    if (!BackNUTF8CharPositions(in, utf8_chars_to_skip, char_len)) {
+      return false;
+    }
+    // Next, determine where the substring should begin. The number of chars to
+    // skip is the requested position minus the chars we've previously skipped.
+    *char_pos = *char_len;
+    if (!BackNUTF8CharPositions(in, -pos - utf8_chars_to_skip, char_pos)) {
+      return false;
+    }
+    // The length in bytes is the position end of the substring less the start.
+    *char_len = *char_len - *char_pos;
+    return true;
+  }
+
+  CharUnit unit_ = CharUnit::BYTE;
 };
 
 #define REGISTER_SUBSTR(type)                                      \
diff --git a/tensorflow/core/kernels/substr_op_test.cc b/tensorflow/core/kernels/substr_op_test.cc
index 2e07050260..ea6b1ed500 100644
--- a/tensorflow/core/kernels/substr_op_test.cc
+++ b/tensorflow/core/kernels/substr_op_test.cc
@@ -42,7 +42,7 @@ limitations under the License.
 namespace tensorflow {
 
 // Test data from the TensorFlow README.md.
-const char* lines[] = {
+const char* ascii_lines[] = {
     "**TensorFlow** is an open source software library for numerical "
     "computation using data flow graphs.",
     "The graph nodes represent mathematical operations, while the graph edges "
@@ -64,17 +64,76 @@ const char* lines[] = {
     "backwards compatibility guarantee like C++, Go, Java, JavaScript and "
     "Swift."};
 
+const char* unicode_lines[] = {
+    "TensorFlow\xe6\x98\xaf\xe4\xb8\x80\xe4\xb8\xaa\xe4\xbd\xbf\xe7\x94\xa8\xe6"
+    "\x95\xb0\xe6\x8d\xae\xe6\xb5\x81\xe5\x9b\xbe\xe8\xbf\x9b\xe8\xa1\x8c\xe6"
+    "\x95\xb0\xe5\x80\xbc\xe8\xae\xa1\xe7\xae\x97\xe7\x9a\x84\xe5\xbc\x80\xe6"
+    "\xba\x90\xe8\xbd\xaf\xe4\xbb\xb6\xe5\xba\x93\xe3\x80\x82",
+    "\xe5\x9b\xbe\xe5\xbd\xa2\xe8\x8a\x82\xe7\x82\xb9\xe8\xa1\xa8\xe7\xa4\xba"
+    "\xe6\x95\xb0\xe5\xad\xa6\xe8\xbf\x90\xe7\xae\x97\xef\xbc\x8c\xe8\x80\x8c"
+    "\xe5\x9b\xbe\xe5\xbd\xa2\xe8\xbe\xb9\xe7\xbc\x98\xe8\xa1\xa8\xe7\xa4\xba"
+    "\xe5\x9c\xa8\xe5\xae\x83\xe4\xbb\xac\xe4\xb9\x8b\xe9\x97\xb4\xe6\xb5\x81"
+    "\xe5\x8a\xa8\xe7\x9a\x84\xe5\xa4\x9a\xe7\xbb\xb4\xe6\x95\xb0\xe6\x8d\xae"
+    "\xe9\x98\xb5\xe5\x88\x97\xef\xbc\x88\xe5\xbc\xa0\xe9\x87\x8f\xef\xbc\x89"
+    "\xe3\x80\x82",
+    "\xe8\xbf\x99\xe7\xa7\x8d\xe7\x81\xb5\xe6\xb4\xbb\xe7\x9a\x84\xe4\xbd\x93"
+    "\xe7\xb3\xbb\xe7\xbb\x93\xe6\x9e\x84\xe4\xbd\xbf\xe6\x82\xa8\xe5\x8f\xaf"
+    "\xe4\xbb\xa5\xe5\xb0\x86\xe8\xae\xa1\xe7\xae\x97\xe9\x83\xa8\xe7\xbd\xb2"
+    "\xe5\x88\xb0\xe6\xa1\x8c\xe9\x9d\xa2\xef\xbc\x8c\xe6\x9c\x8d\xe5\x8a\xa1"
+    "\xe5\x99\xa8\xe6\x88\x96\xe7\xa7\xbb\xe5\x8a\xa8\xe8\xae\xbe\xe5\xa4\x87"
+    "\xe4\xb8\xad\xe7\x9a\x84\xe4\xb8\x80\xe4\xb8\xaa\xe6\x88\x96\xe5\xa4\x9a"
+    "\xe4\xb8\xaa CPU\xe6\x88\x96GPU\xef\xbc\x8c\xe8\x80\x8c\xe6\x97\xa0\xe9"
+    "\x9c\x80\xe9\x87\x8d\xe5\x86\x99\xe4\xbb\xa3\xe7\xa0\x81\xe3\x80\x82",
+    "TensorFlow\xe8\xbf\x98\xe5\x8c\x85\xe6\x8b\xac[TensorBoard]\xef\xbc\x88"
+    "https://www.tensorflow.org/guide/summaries_and_tensorboard\xef\xbc\x89\xef"
+    "\xbc\x8c\xe8\xbf\x99\xe6\x98\xaf\xe4\xb8\x80\xe4\xb8\xaa\xe6\x95\xb0\xe6"
+    "\x8d\xae\xe5\x8f\xaf\xe8\xa7\x86\xe5\x8c\x96\xe5\xb7\xa5\xe5\x85\xb7\xe5"
+    "\x8c\x85\xe3\x80\x82",
+    "TensorFlow\xe6\x9c\x80\xe5\x88\x9d\xe6\x98\xaf\xe7\x94\xb1\xe7\xa0\x94\xe7"
+    "\xa9\xb6\xe4\xba\xba\xe5\x91\x98\xe5\x92\x8c\xe5\xb7\xa5\xe7\xa8\x8b\xe5"
+    "\xb8\x88\xe5\x9c\xa8Google\xe6\x9c\xba\xe5\x99\xa8\xe6\x99\xba\xe8\x83\xbd"
+    "\xe7\xa0\x94\xe7\xa9\xb6\xe7\xbb\x84\xe7\xbb\x87\xe7\x9a\x84Google Brain"
+    "\xe5\x9b\xa2\xe9\x98\x9f\xe5\xbc\x80\xe5\x8f\x91\xe7\x9a\x84\xef\xbc\x8c"
+    "\xe7\x9b\xae\xe7\x9a\x84\xe6\x98\xaf\xe8\xbf\x9b\xe8\xa1\x8c\xe6\x9c\xba"
+    "\xe5\x99\xa8\xe5\xad\xa6\xe4\xb9\xa0\xe5\x92\x8c\xe6\xb7\xb1\xe5\xba\xa6"
+    "\xe7\xa5\x9e\xe7\xbb\x8f\xe7\xbd\x91\xe7\xbb\x9c\xe7\xa0\x94\xe7\xa9\xb6"
+    "\xe3\x80\x82",
+    "\xe8\xaf\xa5\xe7\xb3\xbb\xe7\xbb\x9f\xe8\xb6\xb3\xe4\xbb\xa5\xe9\x80\x82"
+    "\xe7\x94\xa8\xe4\xba\x8e\xe5\x90\x84\xe7\xa7\x8d\xe5\x85\xb6\xe4\xbb\x96"
+    "\xe9\xa2\x86\xe5\x9f\x9f\xe4\xb9\x9f\xe6\x98\xaf\xe5\xa6\x82\xe6\xad\xa4"
+    "\xe3\x80\x82",
+    "TensorFlow\xe6\x8f\x90\xe4\xbe\x9b\xe7\xa8\xb3\xe5\xae\x9a\xe7\x9a\x84"
+    "Python API\xe5\x92\x8c C API\xef\xbc\x8c\xe4\xbb\xa5\xe5\x8f\x8a\xe6\xb2"
+    "\xa1\xe6\x9c\x89 API\xe5\x90\x91\xe5\x90\x8e\xe5\x85\xbc\xe5\xae\xb9\xe6"
+    "\x80\xa7\xe4\xbf\x9d\xe8\xaf\x81\xef\xbc\x8c\xe5\xa6\x82 C ++\xef\xbc\x8c"
+    "Go\xef\xbc\x8cJava\xef\xbc\x8cJavaScript\xe5\x92\x8cSwift\xe3\x80\x82",
+};
+
+const char* const kByteUnit = "BYTE";
+const char* const kUTF8Unit = "UTF8_CHAR";
+
 Tensor GetTestTensor(int batch) {
-  const int sz = TF_ARRAYSIZE(lines);
+  const int sz = TF_ARRAYSIZE(ascii_lines);
+  Tensor t(DT_STRING, {batch});
+  auto s = t.flat<string>();
+  for (int i = 0; i < batch; ++i) {
+    s(i) = ascii_lines[i % sz];
+  }
+  return t;
+}
+
+Tensor GetTestUTF8Tensor(int batch) {
+  const int sz = TF_ARRAYSIZE(unicode_lines);
   Tensor t(DT_STRING, {batch});
   auto s = t.flat<string>();
   for (int i = 0; i < batch; ++i) {
-    s(i) = lines[i % sz];
+    s(i) = unicode_lines[i % sz];
   }
   return t;
 }
 
-Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len) {
+Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len,
+                        const char* const unit) {
   Graph* g = new Graph(OpRegistry::Global());
   Tensor position(DT_INT32, TensorShape({}));
   position.flat<int32>().setConstant(pos);
@@ -85,21 +144,46 @@ Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len) {
                   .Input(test::graph::Constant(g, input))
                   .Input(test::graph::Constant(g, position))
                   .Input(test::graph::Constant(g, length))
+                  .Attr("unit", unit)
                   .Finalize(g, nullptr /* node */));
   return g;
 }
 
-void BM_Substr(int iters, int batch_size) {
+void BM_SubstrByte(int iters, int batch_size) {
   testing::StopTiming();
   testing::ItemsProcessed(static_cast<int64>(iters));
   testing::UseRealTime();
   Tensor input = GetTestTensor(batch_size);
-  Graph* g = SetupSubstrGraph(input, 3, 30);
+  Graph* g = SetupSubstrGraph(input, 3, 30, kByteUnit);
+  testing::StartTiming();
+  test::Benchmark("cpu", g).Run(iters);
+}
+
+void BM_SubstrUTF8(int iters, int batch_size) {
+  testing::StopTiming();
+  testing::ItemsProcessed(static_cast<int64>(iters));
+  testing::UseRealTime();
+  Tensor input = GetTestUTF8Tensor(batch_size);
+  Graph* g = SetupSubstrGraph(input, 3, 30, kUTF8Unit);
   testing::StartTiming();
   test::Benchmark("cpu", g).Run(iters);
 }
 
-BENCHMARK(BM_Substr)->Arg(1)->Arg(8)->Arg(16)->Arg(32)->Arg(64)->Arg(128)->Arg(
-    256);
+BENCHMARK(BM_SubstrByte)
+    ->Arg(1)
+    ->Arg(8)
+    ->Arg(16)
+    ->Arg(32)
+    ->Arg(64)
+    ->Arg(128)
+    ->Arg(256);
+BENCHMARK(BM_SubstrUTF8)
+    ->Arg(1)
+    ->Arg(8)
+    ->Arg(16)
+    ->Arg(32)
+    ->Arg(64)
+    ->Arg(128)
+    ->Arg(256);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index b4fbde54d9..94d71a4113 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -223,6 +223,7 @@ REGISTER_OP("Substr")
     .Input("len: T")
     .Output("output: string")
     .Attr("T: {int32, int64}")
+    .Attr("unit: {'BYTE', 'UTF8_CHAR'} = 'BYTE'")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle pos_shape = c->input(1);
       ShapeHandle len_shape = c->input(2);
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index cd3fe14883..37aa624b07 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -28,270 +28,448 @@ from tensorflow.python.platform import test
 
 class SubstrOpTest(test.TestCase, parameterized.TestCase):
 
-  def _testScalarString(self, dtype):
-    test_string = b"Hello"
-    position = np.array(1, dtype)
+  @parameterized.parameters(
+      (np.int32, 1, "BYTE"),
+      (np.int64, 1, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 1, "UTF8_CHAR"),
+      (np.int64, 1, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testScalarString(self, dtype, pos, unit):
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"He\xc3\xc3\U0001f604".encode("utf-8"),
+    }[unit]
+    expected_value = {
+        "BYTE": b"ell",
+        "UTF8_CHAR": u"e\xc3\xc3".encode("utf-8"),
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    expected_value = b"ell"
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Negative position.
-    test_string = b"Hello"
-    position = np.array(-4, dtype)
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testScalarString_EdgeCases(self, dtype, unit):
+    # Empty string
+    test_string = {
+        "BYTE": b"",
+        "UTF8_CHAR": u"".encode("utf-8"),
+    }[unit]
+    expected_value = b""
+    position = np.array(0, dtype)
     length = np.array(3, dtype)
-    expected_value = b"ell"
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Position is equal to the length of string.
-    test_string = b""
+    # Full string
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
     position = np.array(0, dtype)
-    length = np.array(2, dtype)
-    expected_value = b""
-
-    substr_op = string_ops.substr(test_string, position, length)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-    # Negative position magnitude is equal to the length of string.
-    test_string = b"yo"
-    position = np.array(-2, dtype)
-    length = np.array(1, dtype)
-    expected_value = b"y"
-
-    substr_op = string_ops.substr(test_string, position, length)
+      self.assertAllEqual(substr, test_string)
+
+    # Full string (Negative)
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(-5, dtype)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-  def _testVectorStrings(self, dtype):
-    test_string = [b"Hello", b"World"]
-    position = np.array(1, dtype)
-    length = np.array(3, dtype)
-    expected_value = [b"ell", b"orl"]
-
-    substr_op = string_ops.substr(test_string, position, length)
+      self.assertAllEqual(substr, test_string)
+
+    # Length is larger in magnitude than a negative position
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    expected_string = {
+        "BYTE": b"ello",
+        "UTF8_CHAR": u"\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(-4, dtype)
+    length = np.array(5, dtype)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
-      self.assertAllEqual(substr, expected_value)
-
-    # Negative position.
-    test_string = [b"Hello", b"World"]
-    position = np.array(-4, dtype)
+      self.assertAllEqual(substr, expected_string)
+
+  @parameterized.parameters(
+      (np.int32, 1, "BYTE"),
+      (np.int64, 1, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 1, "UTF8_CHAR"),
+      (np.int64, 1, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testVectorStrings(self, dtype, pos, unit):
+    test_string = {
+        "BYTE": [b"Hello", b"World"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"H\xc3llo",
+                                                  u"W\U0001f604rld"]],
+    }[unit]
+    expected_value = {
+        "BYTE": [b"ell", b"orl"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"\xc3ll", u"\U0001f604rl"]],
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    expected_value = [b"ell", b"orl"]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testMatrixStrings(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testMatrixStrings(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"He\xc3\xc3o",
+                                                   u"W\U0001f604rld",
+                                                   u"d\xfcd\xea"]]],
+    }[unit]
     position = np.array(1, dtype)
     length = np.array(4, dtype)
-    expected_value = [[b"en", b"leve", b"welv"], [b"hirt", b"ourt", b"ifte"],
-                      [b"ixte", b"even", b"ight"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"en", b"leve", b"welv"], [b"hirt", b"ourt", b"ifte"],
+                 [b"ixte", b"even", b"ight"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227n",
+                                                   u"\u053c\u025bv\u025b",
+                                                   u"w\u0c1dlv"]],
+                      [x.encode("utf-8") for x in [u"e\xc3\xc3o",
+                                                   u"\U0001f604rld",
+                                                   u"\xfcd\xea"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-    # Negative position
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
-    position = np.array(-2, dtype)
+    position = np.array(-3, dtype)
     length = np.array(2, dtype)
-    expected_value = [[b"en", b"en", b"ve"], [b"en", b"en", b"en"],
-                      [b"en", b"en", b"en"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"te", b"ve", b"lv"], [b"ee", b"ee", b"ee"],
+                 [b"ee", b"ee", b"ee"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227",
+                                                   u"v\u025b", u"lv"]],
+                      [x.encode("utf-8") for x in [u"\xc3\xc3", u"rl",
+                                                   u"\xfcd"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testElementWisePosLen(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testElementWisePosLen(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"He\xc3\xc3o",
+                                                   u"W\U0001f604rld",
+                                                   u"d\xfcd\xea"]],
+                      [x.encode("utf-8") for x in [u"sixt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]]],
+    }[unit]
     position = np.array([[1, -4, 3], [1, 2, -4], [-5, 2, 3]], dtype)
     length = np.array([[2, 2, 4], [4, 3, 2], [5, 5, 5]], dtype)
-    expected_value = [[b"en", b"ev", b"lve"], [b"hirt", b"urt", b"te"],
-                      [b"xteen", b"vente", b"hteen"]]
-
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"en", b"ev", b"lve"], [b"hirt", b"urt", b"te"],
+                 [b"xteen", b"vente", b"hteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227n",
+                                                   u"\u025bv",
+                                                   u"lv\u025b"]],
+                      [x.encode("utf-8") for x in [u"e\xc3\xc3o",
+                                                   u"rld",
+                                                   u"d\xfc"]],
+                      [x.encode("utf-8") for x in [u"xt\xea\xean",
+                                                   u"\U00010299ente",
+                                                   u"h\x86een"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testBroadcast(self, dtype):
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testBroadcast(self, dtype, unit):
     # Broadcast pos/len onto input string
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"],
-                   [b"nineteen", b"twenty", b"twentyone"]]
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"],
+                 [b"nineteen", b"twenty", b"twentyone"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                   u"f\U0001f604urt\xea\xean",
+                                                   u"f\xcd\ua09ctee\ua0e4"]],
+                      [x.encode("utf-8") for x in [u"s\xcdxt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]],
+                      [x.encode("utf-8") for x in [u"nineteen",
+                                                   u"twenty",
+                                                   u"twentyone"]]],
+    }[unit]
     position = np.array([1, -4, 3], dtype)
     length = np.array([1, 2, 3], dtype)
-    expected_value = [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"],
-                      [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"],
+                 [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d227",
+                                                   u"\u025bv", u"lv\u025b"]],
+                      [x.encode("utf-8") for x in [u"h", u"t\xea", u"tee"]],
+                      [x.encode("utf-8") for x in [u"\xcd", u"te", u"h\x86e"]],
+                      [x.encode("utf-8") for x in [u"i", u"en", u"nty"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
     # Broadcast input string onto pos/len
-    test_string = [b"thirteen", b"fourteen", b"fifteen"]
+    test_string = {
+        "BYTE": [b"thirteen", b"fourteen", b"fifteen"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                  u"f\U0001f604urt\xea\xean",
+                                                  u"f\xcd\ua09ctee\ua0e4"]],
+    }[unit]
     position = np.array([[1, -2, 3], [-3, 2, 1], [5, 5, -5]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    expected_value = [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"],
-                      [b"ee", b"ee", b"ft"]]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"],
+                 [b"ee", b"ee", b"ft"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"h\xcdr", u"\xean", u"t"]],
+                      [x.encode("utf-8") for x in [u"\xea", u"ur",
+                                                   u"\xcd\ua09ct"]],
+                      [x.encode("utf-8") for x in [u"\xea\xea", u"\xea\xea",
+                                                   u"\ua09ct"]]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
     # Test 1D broadcast
-    test_string = b"thirteen"
-    position = np.array([1, -5, 7], dtype)
+    test_string = {
+        "BYTE": b"thirteen",
+        "UTF8_CHAR": u"th\xcdrt\xea\xean".encode("utf-8"),
+    }[unit]
+    position = np.array([1, -4, 7], dtype)
     length = np.array([3, 2, 1], dtype)
-    expected_value = [b"hir", b"rt", b"n"]
-    substr_op = string_ops.substr(test_string, position, length)
+    expected_value = {
+        "BYTE": [b"hir", b"te", b"n"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"h\xcdr", u"t\xea", u"n"]],
+    }[unit]
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       substr = substr_op.eval()
       self.assertAllEqual(substr, expected_value)
 
-  def _testBadBroadcast(self, dtype):
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testBadBroadcast(self, dtype, unit):
     test_string = [[b"ten", b"eleven", b"twelve"],
                    [b"thirteen", b"fourteen", b"fifteen"],
                    [b"sixteen", b"seventeen", b"eighteen"]]
     position = np.array([1, 2, -3, 4], dtype)
     length = np.array([1, 2, 3, 4], dtype)
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-  def _testOutOfRangeError(self, dtype):
+      string_ops.substr(test_string, position, length, unit=unit)
+
+  @parameterized.parameters(
+      (np.int32, 6, "BYTE"),
+      (np.int64, 6, "BYTE"),
+      (np.int32, -6, "BYTE"),
+      (np.int64, -6, "BYTE"),
+      (np.int32, 6, "UTF8_CHAR"),
+      (np.int64, 6, "UTF8_CHAR"),
+      (np.int32, -6, "UTF8_CHAR"),
+      (np.int64, -6, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_Scalar(self, dtype, pos, unit):
     # Scalar/Scalar
-    test_string = b"Hello"
-    position = np.array(7, dtype)
-    length = np.array(3, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
-    with self.cached_session():
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-    # Scalar/Scalar (with negative)
-    test_string = b"Hello"
-    position = np.array(-7, dtype)
+    test_string = {
+        "BYTE": b"Hello",
+        "UTF8_CHAR": u"H\xc3ll\U0001f604".encode("utf-8"),
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(3, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, 4, "BYTE"),
+      (np.int64, 4, "BYTE"),
+      (np.int32, -4, "BYTE"),
+      (np.int64, -4, "BYTE"),
+      (np.int32, 4, "UTF8_CHAR"),
+      (np.int64, 4, "UTF8_CHAR"),
+      (np.int32, -4, "UTF8_CHAR"),
+      (np.int64, -4, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_VectorScalar(self, dtype, pos, unit):
     # Vector/Scalar
-    test_string = [b"good", b"good", b"bad", b"good"]
-    position = np.array(4, dtype)
-    length = np.array(1, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
-    with self.cached_session():
-      with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-    # Vector/Scalar (with negative)
-    test_string = [b"good", b"good", b"bad", b"good"]
-    position = np.array(-4, dtype)
+    test_string = {
+        "BYTE": [b"good", b"good", b"bad", b"good"],
+        "UTF8_CHAR": [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"b\xc3d",
+                                                  u"g\xc3\xc3d"]],
+    }[unit]
+    position = np.array(pos, dtype)
     length = np.array(1, dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_MatrixMatrix(self, dtype, unit):
     # Matrix/Matrix
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
-                   [b"good", b"good", b"good"]]
+    test_string = {
+        "BYTE": [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
+                 [b"good", b"good", b"good"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"b\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]]],
+    }[unit]
     position = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
+        substr_op.eval()
 
     # Matrix/Matrix (with negative)
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"],
-                   [b"good", b"good", b"good"]]
     position = np.array([[1, 2, -3], [1, 2, -4], [1, 2, -3]], dtype)
     length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testOutOfRangeError_Broadcast(self, dtype, unit):
     # Broadcast
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]]
+    test_string = {
+        "BYTE": [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"g\xc3\xc3d"]],
+                      [x.encode("utf-8") for x in [u"g\xc3\xc3d", u"g\xc3\xc3d",
+                                                   u"b\xc3d"]]],
+    }[unit]
     position = np.array([1, 2, 4], dtype)
     length = np.array([1, 2, 3], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
+        substr_op.eval()
 
     # Broadcast (with negative)
-    test_string = [[b"good", b"good", b"good"], [b"good", b"good", b"bad"]]
     position = np.array([-1, -2, -4], dtype)
     length = np.array([1, 2, 3], dtype)
-    substr_op = string_ops.substr(test_string, position, length)
+    substr_op = string_ops.substr(test_string, position, length, unit=unit)
     with self.cached_session():
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        substr = substr_op.eval()
-
-  def _testMismatchPosLenShapes(self, dtype):
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
+        substr_op.eval()
+
+  @parameterized.parameters(
+      (np.int32, "BYTE"),
+      (np.int64, "BYTE"),
+      (np.int32, "UTF8_CHAR"),
+      (np.int64, "UTF8_CHAR"),
+  )
+  def testMismatchPosLenShapes(self, dtype, unit):
+    test_string = {
+        "BYTE": [[b"ten", b"eleven", b"twelve"],
+                 [b"thirteen", b"fourteen", b"fifteen"],
+                 [b"sixteen", b"seventeen", b"eighteen"]],
+        "UTF8_CHAR": [[x.encode("utf-8") for x in [u"\U0001d229\U0001d227n",
+                                                   u"\xc6\u053c\u025bv\u025bn",
+                                                   u"tw\u0c1dlv\u025b"]],
+                      [x.encode("utf-8") for x in [u"th\xcdrt\xea\xean",
+                                                   u"f\U0001f604urt\xea\xean",
+                                                   u"f\xcd\ua09ctee\ua0e4"]],
+                      [x.encode("utf-8") for x in [u"s\xcdxt\xea\xean",
+                                                   u"se\U00010299enteen",
+                                                   u"ei\U0001e920h\x86een"]]],
+    }[unit]
     position = np.array([[1, 2, 3]], dtype)
     length = np.array([2, 3, 4], dtype)
     # Should fail: position/length have different rank
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
+      string_ops.substr(test_string, position, length)
 
     position = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]], dtype)
     length = np.array([[2, 3, 4]], dtype)
     # Should fail: position/length have different dimensionality
     with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-    # Negative position.
-    test_string = [[b"ten", b"eleven", b"twelve"],
-                   [b"thirteen", b"fourteen", b"fifteen"],
-                   [b"sixteen", b"seventeen", b"eighteen"]]
-    position = np.array([[-1, -2, -3]], dtype)
-    length = np.array([1, 2, 3], dtype)
-    # Should fail: position/length have different rank
-    with self.assertRaises(ValueError):
-      substr_op = string_ops.substr(test_string, position, length)
-
-  @parameterized.parameters(np.int32, np.int64)
-  def testAll(self, dtype):
-    self._testScalarString(dtype)
-    self._testVectorStrings(dtype)
-    self._testMatrixStrings(dtype)
-    self._testElementWisePosLen(dtype)
-    self._testBroadcast(dtype)
-    self._testBadBroadcast(dtype)
-    self._testOutOfRangeError(dtype)
-    self._testMismatchPosLenShapes(dtype)
+      string_ops.substr(test_string, position, length)
 
   def testWrongDtype(self):
     with self.cached_session():
@@ -300,6 +478,11 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase):
       with self.assertRaises(TypeError):
         string_ops.substr(b"test", 3, 1.0)
 
+  def testInvalidUnit(self):
+    with self.cached_session():
+      with self.assertRaises(ValueError):
+        string_ops.substr(b"test", 3, 1, unit="UTF8")
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 0812f901a2..f26388efea 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -347,6 +347,22 @@ def string_length(input, name=None, unit="BYTE"):
 string_length.__doc__ = gen_string_ops.string_length.__doc__
 
 
+@tf_export("substr")
+@deprecation.deprecated(None, "Use `tf.strings.substr` instead of `tf.substr`.")
+def substr_deprecated(input, pos, len, name=None, unit="BYTE"):
+  return substr(input, pos, len, name=name, unit=unit)
+
+substr_deprecated.__doc__ = gen_string_ops.substr.__doc__
+
+
+@tf_export("strings.substr")
+def substr(input, pos, len, name=None, unit="BYTE"):
+  return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
+
+
+substr.__doc__ = gen_string_ops.substr.__doc__
+
+
 ops.NotDifferentiable("RegexReplace")
 ops.NotDifferentiable("StringToHashBucket")
 ops.NotDifferentiable("StringToHashBucketFast")
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index c1cc7322f0..247dfcc1ca 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -2094,7 +2094,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "subtract"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index ebdaf57231..5ba48e7f57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -34,7 +34,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "to_hash_bucket"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 571abc3b19..978afcf985 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -1934,7 +1934,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "subtract"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index ebdaf57231..5ba48e7f57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -34,7 +34,7 @@ tf_module {
   }
   member_method {
     name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
   }
   member_method {
     name: "to_hash_bucket"
-- 
GitLab


From 31619b408551907030dc25d8270f8997a0d9e6aa Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Thu, 4 Oct 2018 11:34:55 -0700
Subject: [PATCH 0368/1085] Add xla library into contrib_py

PiperOrigin-RevId: 215774158
---
 tensorflow/contrib/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index fbe0573d5d..fa06d351d4 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -29,6 +29,7 @@ py_library(
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
         "//tensorflow/contrib/coder:coder_py",
         "//tensorflow/contrib/compiler:compiler_py",
+        "//tensorflow/contrib/compiler:xla",
         "//tensorflow/contrib/autograph",
         "//tensorflow/contrib/constrained_optimization",
         "//tensorflow/contrib/copy_graph:copy_graph_py",
-- 
GitLab


From 2cc1096e86380755427dc2a3fa43e81bbf471813 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Thu, 4 Oct 2018 11:45:29 -0700
Subject: [PATCH 0369/1085] Remove trailing spaces in expected output

---
 tensorflow/python/kernel_tests/check_ops_test.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 543d2d3f8b..222606348f 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -246,22 +246,19 @@ b'Condition x != y did not hold for every single element:'
 b'x (shape=(2, 3) dtype=float32) = '
 0.0, 1.0, 2.0, 3.0, 4.0, 5.0
 b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0
-"""
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
     expected_error_msg_default = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
 b'Condition x != y did not hold for every single element:'
 b'x (shape=(2, 3) dtype=float32) = '
 0.0, 1.0, 2.0, ...
 b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, ...
-"""
+0.0, 1.0, 2.0, ..."""
     expected_error_msg_short = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
 b'Condition x != y did not hold for every single element:'
 b'x (shape=(2, 3) dtype=float32) = '
 0.0, 1.0, ...
 b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, ...
-"""
+0.0, 1.0, ..."""
     with context.eager_mode():
       t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
-- 
GitLab


From 2390b48b11efda60a0f68a683c94af9612a5306f Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 4 Oct 2018 11:54:24 -0700
Subject: [PATCH 0370/1085] Add a separator between shape and dtype in cache
 key encoding.

It was possible that we could mix shapes and types (T111 could mean a tensor of dtype 1 and shape (1, 1) or a tensor of dtype 11 and shape (1)).

PiperOrigin-RevId: 215777629
---
 tensorflow/python/eager/function_test.py  | 44 +++++++++++++++++++++--
 tensorflow/python/eager/pywrap_tfe_src.cc | 34 +++++++++---------
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 9ce367a837..a2cfb4b476 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1255,6 +1255,44 @@ class FunctionTest(test.TestCase):
     defined(Foo())
     self.assertEqual(len(defined._function_cache), 2)
 
+  def testCacheTensorShapeDtypeCollision(self):
+
+    def func(t):
+      return t + t
+
+    defined = function.defun(func)
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 1)
+
+    t = constant_op.constant([1.0], dtype=dtypes.complex128)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 2)
+
+  def testCacheTensorUnknownShapesCollision(self):
+
+    def func(t):
+      return t + t
+
+    with context.graph_mode(), self.cached_session():
+      defined = function.defun(func)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 1)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 2)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None, None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 3)
+
+      t = constant_op.constant(1.0, dtype=dtypes.float32)
+      defined(t)
+      self.assertEqual(len(defined._function_cache), 4)
+
   def testPythonFunctionWithDefaultArgs(self):
 
     def func(foo, bar=1, baz=2):
@@ -1271,17 +1309,17 @@ class FunctionTest(test.TestCase):
       return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn(('tRRR', (0, 1, 20)), cache_keys())
+    self.assertIn(('URRR', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn(('tRRR', (1, 1, 2)), cache_keys())
+    self.assertIn(('URRR', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
     self.assertEqual(len(defined._function_cache), 2)
 
     defined(1, 2, 3)
-    self.assertIn(('tRRR', (1, 2, 3)), cache_keys())
+    self.assertIn(('URRR', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index ae1e12f9c3..6193f40ce8 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -2747,11 +2747,15 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs,
 }
 
 namespace {
-
-tensorflow::int64 GetPyNoneHash() {
-  tensorflow::int64 py_none_hash = PyObject_Hash(Py_None);
-  return py_none_hash;
-}
+const char kTensor[] = "T";
+const char kIndexedSlices[] = "I";
+const char kList[] = "L";
+const char kTuple[] = "U";
+const char kDict[] = "D";
+const char kRaw[] = "R";
+const char kShape[] = "s";
+const char kDType[] = "d";
+const char kNone[] = "n";
 
 struct EncodeResult {
   string str;
@@ -2784,8 +2788,10 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
     TFE_TensorHandle* t = EagerTensor_Handle(arg);
     tensorflow::TensorShape tensor_shape;
     TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape));
-    absl::StrAppend(&result->str, t->handle->dtype);
 
+    absl::StrAppend(&result->str, kDType, t->handle->dtype);
+
+    absl::StrAppend(&result->str, kShape);
     for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
       absl::StrAppend(&result->str, dim_size);
     }
@@ -2812,7 +2818,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   tensorflow::DataType dtype =
       static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get()));
 
-  absl::StrAppend(&result->str, dtype);
+  absl::StrAppend(&result->str, kDType, dtype);
   static char _shape_tuple[] = "_shape_tuple";
   tensorflow::Safe_PyObjectPtr shape_tuple(
       PyObject_CallMethod(arg, _shape_tuple, nullptr));
@@ -2824,10 +2830,11 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
 
   if (shape_tuple.get() == Py_None) {
     // Unknown shape, encode that directly.
-    absl::StrAppend(&result->str, GetPyNoneHash());
+    absl::StrAppend(&result->str, kNone);
     return tensorflow::Status::OK();
   }
 
+  absl::StrAppend(&result->str, kShape);
   tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast(
       shape_tuple.get(), "shape_tuple didn't return a sequence"));
 
@@ -2835,7 +2842,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       absl::StrAppend(&result->str, MakeInt(item));
     }
@@ -2844,13 +2851,6 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   return tensorflow::Status::OK();
 }
 
-const char kTensor[] = "T";
-const char kIndexedSlices[] = "I";
-const char kList[] = "L";
-const char kTuple[] = "t";
-const char kDict[] = "D";
-const char kRaw[] = "R";
-
 tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result);
 
 // This function doesn't set the type of sequence before
@@ -2864,7 +2864,7 @@ tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
     }
-- 
GitLab


From b82c4dad705bffac6d14a189605c9ece89f8c17b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:55:48 -0700
Subject: [PATCH 0371/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 215777837

---
 tensorflow/go/op/wrappers.go | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index b4d4db3e4d..a7bbb80c82 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -29094,6 +29094,17 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 	return op.Output(0)
 }
 
+// SubstrAttr is an optional argument to Substr.
+type SubstrAttr func(optionalAttr)
+
+// SubstrUnit sets the optional unit attribute to value.
+// If not specified, defaults to "BYTE"
+func SubstrUnit(value string) SubstrAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
 // Return substrings from `Tensor` of strings.
 //
 // For each string in the input `Tensor`, creates a substring starting at index
@@ -29178,15 +29189,20 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 //	len: Scalar defining the number of characters to include in each substring
 //
 // Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output) (output tf.Output) {
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "Substr",
 		Input: []tf.Input{
 			input, pos, len,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
-- 
GitLab


From 2667ed3bf01e7153f466b27c450fc2b662c00bdd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 11:59:37 -0700
Subject: [PATCH 0372/1085] Makes sure Keras Layer's `__call__` is always used
 in Eager.

Currently if a Layer is invoked with the Functional API in Eager, `__call__` is only used
during setup, and thereafter `call` is used internally. This limits the ability
to add pre/post processing steps to `call` in Eager in the future.
Additionally, the Subclassed Model API already always uses `__call__` in Eager.

PiperOrigin-RevId: 215778408
---
 tensorflow/python/keras/engine/network.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 8d34006967..918488bd7a 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1028,7 +1028,10 @@ class Network(base_layer.Layer):
                 output_tensors, output_masks = layer._call_and_compute_mask(
                     computed_tensor, **kwargs)
               else:
-                output_tensors = layer.call(computed_tensor, **kwargs)
+                if context.executing_eagerly():
+                  output_tensors = layer(computed_tensor, **kwargs)
+                else:
+                  output_tensors = layer.call(computed_tensor, **kwargs)
                 if hasattr(layer, 'compute_mask'):
                   output_masks = layer.compute_mask(computed_tensor,
                                                     computed_mask)
@@ -1049,7 +1052,10 @@ class Network(base_layer.Layer):
                 output_tensors, output_masks = layer._call_and_compute_mask(
                     computed_tensors, **kwargs)
               else:
-                output_tensors = layer.call(computed_tensors, **kwargs)
+                if context.executing_eagerly():
+                  output_tensors = layer(computed_tensors, **kwargs)
+                else:
+                  output_tensors = layer.call(computed_tensors, **kwargs)
                 if hasattr(layer, 'compute_mask'):
                   output_masks = layer.compute_mask(computed_tensors,
                                                     computed_masks)
-- 
GitLab


From 5bdd0f7c2807ed413cfc60319f1e75b1e6a4a5b5 Mon Sep 17 00:00:00 2001
From: Paul Donnelly <pauldonnelly@google.com>
Date: Thu, 4 Oct 2018 12:12:39 -0700
Subject: [PATCH 0373/1085] Remove obsolete TODO.

PiperOrigin-RevId: 215780734
---
 tensorflow/core/kernels/dequantize_op.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 42fbf95cd3..28940e0849 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -96,8 +96,6 @@ class DequantizeOp : public OpKernel {
             output);
       }
     } else if (mode_ == QUANTIZE_MODE_SCALED) {
-      // TODO(pauldonnelly): Update QuantizeAndDequantizeV2 and
-      // QuantizeAndDequantizeV3 to match this SCALED mode again.
       const float scale_factor =
           std::numeric_limits<T>::min() == 0
               ? (max_range / std::numeric_limits<T>::max())
-- 
GitLab


From 900d115135656229e3667025f925eb92687dce18 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 12:29:50 -0700
Subject: [PATCH 0374/1085] [XLA] Move FusionQueue class declaration into
 separate header

PiperOrigin-RevId: 215783391
---
 tensorflow/compiler/xla/service/BUILD         |  9 ++++
 .../compiler/xla/service/fusion_queue.h       | 53 +++++++++++++++++++
 .../xla/service/instruction_fusion.cc         |  1 +
 .../compiler/xla/service/instruction_fusion.h | 28 +---------
 4 files changed, 64 insertions(+), 27 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/fusion_queue.h

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index f329a27e14..2f8bab0614 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1323,11 +1323,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "fusion_queue",
+    hdrs = ["fusion_queue.h"],
+    deps = [
+        ":hlo",
+    ],
+)
+
 cc_library(
     name = "instruction_fusion",
     srcs = ["instruction_fusion.cc"],
     hdrs = ["instruction_fusion.h"],
     deps = [
+        ":fusion_queue",
         ":hlo",
         ":hlo_pass",
         "//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/fusion_queue.h b/tensorflow/compiler/xla/service/fusion_queue.h
new file mode 100644
index 0000000000..1208a7dda8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/fusion_queue.h
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+
+namespace xla {
+
+// A queue interface that allows implementations to choose fusion candidates in
+// custom order.
+class FusionQueue {
+ public:
+  FusionQueue() = default;
+  virtual ~FusionQueue() = default;
+
+  // Dequeues the next fusion candidates: a consumer and the list of producers
+  // as operand indices.
+  virtual std::pair<HloInstruction*, std::vector<int64>>
+  DequeueNextInstructionAndOperandsToFuseInOrder() = 0;
+
+  // A callback passed to the queue implementation right before the producer is
+  // fused into the consumer.
+  virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {}
+
+  // A callback passed to the queue implementation right after the fusion is
+  // created. Note that original_producer could have been destroyed.
+  virtual void OnFusingInstruction(HloInstruction* fusion,
+                                   HloInstruction* original_producer,
+                                   HloInstruction* original_consumer) {}
+
+  // A callback passed to the queue implementation to notify the removal of an
+  // instruction.
+  virtual void RemoveInstruction(HloInstruction* instruction) = 0;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_FUSION_QUEUE_H_
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 5a99c40df4..69a4c160ee 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
+#include "tensorflow/compiler/xla/service/fusion_queue.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index da2032f6c7..f14c667520 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -17,6 +17,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_INSTRUCTION_FUSION_H_
 
+#include "tensorflow/compiler/xla/service/fusion_queue.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
@@ -25,33 +26,6 @@ limitations under the License.
 
 namespace xla {
 
-// A queue interface that allows implementations to choose fusion candidates in
-// custom order.
-class FusionQueue {
- public:
-  FusionQueue() = default;
-  virtual ~FusionQueue() = default;
-
-  // Dequeues the next fusion candidates: a consumer and the list of producers
-  // as operand indices.
-  virtual std::pair<HloInstruction*, std::vector<int64>>
-  DequeueNextInstructionAndOperandsToFuseInOrder() = 0;
-
-  // A callback passed to the queue implementation right before the producer is
-  // fused into the consumer.
-  virtual void PreFusion(HloInstruction* producer, HloInstruction* consumer) {}
-
-  // A callback passed to the queue implementation right after the fusion is
-  // created. Note that original_producer could have been destroyed.
-  virtual void OnFusingInstruction(HloInstruction* fusion,
-                                   HloInstruction* original_producer,
-                                   HloInstruction* original_consumer) {}
-
-  // A callback passed to the queue implementation to notify the removal of an
-  // instruction.
-  virtual void RemoveInstruction(HloInstruction* instruction) = 0;
-};
-
 // HLO pass which performs instruction fusion. Instructions are fused
 // "vertically", meaning producing instructions are fused into their consumers
 // with the intent that the loops which compute their values will be fused in
-- 
GitLab


From 2c75da86ffdb9d04b2b94ce89891f17a8656da22 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 4 Oct 2018 12:41:23 -0700
Subject: [PATCH 0375/1085] [tf.data] Clean up tests for
 `tf.data.experimental`.

This change splits up large test files into smaller ones, and re-enables tests that were disabled for obsolete reasons.

PiperOrigin-RevId: 215785396
---
 .../python/data/experimental/benchmarks/BUILD |  25 +
 .../map_benchmark.py}                         | 114 ---
 .../data/experimental/kernel_tests/BUILD      | 545 ++++++------
 .../kernel_tests/batch_dataset_op_test.py     | 686 ---------------
 .../bucket_by_sequence_length_test.py         | 322 +++++++
 .../kernel_tests/bucketing_test.py            | 824 ------------------
 ...ing_ops_test.py => copy_to_device_test.py} | 417 +--------
 .../experimental/kernel_tests/counter_test.py |  51 ++
 ...dataset_op_test.py => csv_dataset_test.py} |   4 +-
 .../dataset_serialization_test_base.py        | 692 ---------------
 .../dense_to_sparse_batch_test.py             | 124 +++
 ...t_op_test.py => enumerate_dataset_test.py} |  26 +-
 .../function_buffering_resource_test.py       | 247 ++++++
 .../kernel_tests/group_by_reducer_test.py     | 199 +++++
 .../kernel_tests/group_by_window_test.py      | 367 ++++++++
 .../kernel_tests/ignore_errors_test.py        | 115 +++
 .../make_batched_features_dataset_test.py     | 239 +++++
 ...t_ops_test.py => make_csv_dataset_test.py} | 425 +--------
 .../make_tf_record_dataset_test.py            | 243 ++++++
 .../kernel_tests/map_and_batch_test.py        | 337 +++++++
 ...ps_test.py => override_threadpool_test.py} |   6 +-
 ...op_test.py => parallel_interleave_test.py} |   4 +-
 ..._test.py => parse_example_dataset_test.py} |   4 +-
 .../kernel_tests/prefetch_to_device_test.py   | 234 +++++
 .../reader_dataset_ops_test_base.py           |   4 +-
 ...ple_test.py => rejection_resample_test.py} |   4 +-
 ...p_test.py => restructured_dataset_test.py} |   4 +-
 .../{scan_dataset_op_test.py => scan_test.py} |   4 +-
 .../kernel_tests/serialization/BUILD          |  22 +-
 .../checkpoint_input_pipeline_hook_test.py}   |   0
 ...arse_example_dataset_serialization_test.py |   2 +-
 .../sql_dataset_serialization_test.py         |   4 +-
 .../serialization_integration_test.py         |  85 --
 ..._op_test.py => shuffle_and_repeat_test.py} |   2 +-
 ...dataset_op_test.py => sql_dataset_test.py} |   6 +-
 ..._test_base.py => sql_dataset_test_base.py} |   3 +-
 .../kernel_tests/stats_dataset_ops_test.py    |   2 +-
 ...r_ops_test.py => tf_record_writer_test.py} |   2 +-
 .../experimental/kernel_tests/unbatch_test.py | 300 +++++++
 ...ique_dataset_op_test.py => unique_test.py} |   4 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  31 +-
 41 files changed, 3172 insertions(+), 3557 deletions(-)
 create mode 100644 tensorflow/python/data/experimental/benchmarks/BUILD
 rename tensorflow/python/data/experimental/{kernel_tests/map_dataset_op_test.py => benchmarks/map_benchmark.py} (71%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{prefetching_ops_test.py => copy_to_device_test.py} (56%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/counter_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{csv_dataset_op_test.py => csv_dataset_test.py} (99%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{range_dataset_op_test.py => enumerate_dataset_test.py} (68%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{reader_dataset_ops_test.py => make_csv_dataset_test.py} (57%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{threadpool_dataset_ops_test.py => override_threadpool_test.py} (94%)
 rename tensorflow/python/data/experimental/kernel_tests/{interleave_dataset_op_test.py => parallel_interleave_test.py} (99%)
 rename tensorflow/python/data/experimental/kernel_tests/{parsing_ops_test.py => parse_example_dataset_test.py} (99%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{resample_test.py => rejection_resample_test.py} (97%)
 rename tensorflow/python/data/experimental/kernel_tests/{dataset_constructor_op_test.py => restructured_dataset_test.py} (95%)
 rename tensorflow/python/data/experimental/kernel_tests/{scan_dataset_op_test.py => scan_test.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{iterator_ops_test.py => serialization/checkpoint_input_pipeline_hook_test.py} (100%)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{shuffle_dataset_op_test.py => shuffle_and_repeat_test.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{sql_dataset_op_test.py => sql_dataset_test.py} (99%)
 rename tensorflow/python/data/experimental/kernel_tests/{sql_dataset_op_test_base.py => sql_dataset_test_base.py} (98%)
 rename tensorflow/python/data/experimental/kernel_tests/{writer_ops_test.py => tf_record_writer_test.py} (98%)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
 rename tensorflow/python/data/experimental/kernel_tests/{unique_dataset_op_test.py => unique_test.py} (96%)

diff --git a/tensorflow/python/data/experimental/benchmarks/BUILD b/tensorflow/python/data/experimental/benchmarks/BUILD
new file mode 100644
index 0000000000..b9398aebe7
--- /dev/null
+++ b/tensorflow/python/data/experimental/benchmarks/BUILD
@@ -0,0 +1,25 @@
+package(default_visibility = ["//tensorflow:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_test(
+    name = "map_benchmark",
+    size = "medium",
+    srcs = ["map_benchmark.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/experimental/benchmarks/map_benchmark.py
similarity index 71%
rename from tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
rename to tensorflow/python/data/experimental/benchmarks/map_benchmark.py
index 2f0bd1456b..ad253cffa5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_benchmark.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 
 import hashlib
 import itertools
-import os
 import time
 
 import numpy as np
@@ -27,128 +26,15 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
-from tensorflow.python.util import compat
 
 _NUMPY_RANDOM_SEED = 42
 
 
-class MapDatasetTest(test_base.DatasetTestBase):
-
-  def testMapIgnoreError(self):
-    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.check_numerics(x, "message")).apply(
-            error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testParallelMapIgnoreError(self):
-    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).map(
-            lambda x: array_ops.check_numerics(x, "message"),
-            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testReadFileIgnoreError(self):
-
-    def write_string_to_file(value, filename):
-      with open(filename, "w") as f:
-        f.write(value)
-
-    filenames = [
-        os.path.join(self.get_temp_dir(), "file_%d.txt" % i) for i in range(5)
-    ]
-    for filename in filenames:
-      write_string_to_file(filename, filename)
-
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(filenames).map(
-            io_ops.read_file,
-            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # All of the files are present.
-      sess.run(init_op)
-      for filename in filenames:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Delete one of the files.
-      os.remove(filenames[0])
-
-      # Attempting to read filenames[0] will fail, but ignore_errors()
-      # will catch the error.
-      sess.run(init_op)
-      for filename in filenames[1:]:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testCaptureResourceInMapFn(self):
-
-    def _build_ds(iterator):
-
-      def _map_fn(x):
-        get_next = iterator.get_next()
-        return x * get_next
-
-      return dataset_ops.Dataset.range(10).map(_map_fn)
-
-    def _build_graph():
-      captured_iterator = dataset_ops.Dataset.range(
-          10).make_initializable_iterator()
-      ds = _build_ds(captured_iterator)
-      iterator = ds.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      return captured_iterator.initializer, init_op, get_next
-
-    with ops.Graph().as_default() as g:
-      captured_init_op, init_op, get_next = _build_graph()
-      with self.session(graph=g) as sess:
-        sess.run(captured_init_op)
-        sess.run(init_op)
-        for i in range(10):
-          self.assertEquals(i * i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-
 class MapDatasetBenchmark(test.Benchmark):
 
   # The purpose of this benchmark is to compare the performance of chaining vs
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index f56127f3ef..4eef9580ad 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -8,75 +8,62 @@ load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 load("//tensorflow:tensorflow.bzl", "py_test")
 
 py_test(
-    name = "batch_dataset_op_test",
+    name = "bucket_by_sequence_length_test",
     size = "medium",
-    srcs = ["batch_dataset_op_test.py"],
+    srcs = ["bucket_by_sequence_length_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",  # (b/79552534)
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:session",
         "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
         "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
+cuda_py_test(
+    name = "copy_to_device_test",
+    size = "small",
+    srcs = ["copy_to_device_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python/compat:compat",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
 py_test(
-    name = "bucketing_test",
-    size = "medium",
-    srcs = ["bucketing_test.py"],
+    name = "counter_test",
+    size = "small",
+    srcs = ["counter_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/experimental/ops:grouping",
+        "//tensorflow/python/data/experimental/ops:counter",
         "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "csv_dataset_op_test",
+    name = "csv_dataset_test",
     size = "medium",
-    srcs = ["csv_dataset_op_test.py"],
+    srcs = ["csv_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -97,25 +84,18 @@ py_test(
 )
 
 py_test(
-    name = "dataset_constructor_op_test",
-    size = "medium",
-    srcs = ["dataset_constructor_op_test.py"],
+    name = "dense_to_sparse_batch_test",
+    srcs = ["dense_to_sparse_batch_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "nomac",  # b/62040583
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
         "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -124,11 +104,6 @@ py_test(
     size = "medium",
     srcs = ["directed_interleave_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
@@ -140,15 +115,68 @@ py_test(
     ],
 )
 
+py_test(
+    name = "enumerate_dataset_test",
+    size = "small",
+    srcs = ["enumerate_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:enumerate_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
+py_test(
+    name = "filter_dataset_op_test",
+    size = "medium",
+    srcs = ["filter_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "function_buffering_resource_test",
+    size = "small",
+    srcs = ["function_buffering_resource_test.py"],
+    additional_deps = [
+        "//tensorflow/python/data/experimental/ops:prefetching_ops",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:function",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/ops:iterator_ops",
+    ],
+    tags = ["no_windows_gpu"],
+)
+
 py_test(
     name = "get_single_element_test",
     size = "small",
     srcs = ["get_single_element_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -165,19 +193,20 @@ py_test(
 )
 
 py_test(
-    name = "indexed_dataset_ops_test",
-    srcs = ["indexed_dataset_ops_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    name = "group_by_reducer_test",
+    size = "medium",
+    srcs = ["group_by_reducer_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
@@ -185,107 +214,134 @@ py_test(
 )
 
 py_test(
-    name = "interleave_dataset_op_test",
+    name = "group_by_window_test",
     size = "medium",
-    srcs = ["interleave_dataset_op_test.py"],
+    srcs = ["group_by_window_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "notap",
-    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python/data/experimental/ops:grouping",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "@six_archive//:six",
+        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "iterator_ops_test",
-    size = "small",
-    srcs = ["iterator_ops_test.py"],
+    name = "ignore_errors_test",
+    srcs = ["ignore_errors_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:error_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
     ],
+)
+
+py_test(
+    name = "indexed_dataset_ops_test",
+    srcs = ["indexed_dataset_ops_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/experimental/ops:indexed_dataset_ops",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/estimator:estimator_py",
+        "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "map_dataset_op_test",
+    name = "make_batched_features_dataset_test",
     size = "medium",
-    srcs = ["map_dataset_op_test.py"],
+    srcs = ["make_batched_features_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "noasan",  # times out
-        "optonly",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:nest",
+        "//third_party/py/numpy",
     ],
+)
+
+py_test(
+    name = "make_csv_dataset_test",
+    size = "medium",
+    srcs = ["make_csv_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:batching",
-        "//tensorflow/python/data/experimental/ops:error_ops",
-        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python/data/experimental/ops:readers",
         "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
         "//third_party/py/numpy",
     ],
 )
 
 py_test(
-    name = "filter_dataset_op_test",
+    name = "make_tf_record_dataset_test",
     size = "medium",
-    srcs = ["filter_dataset_op_test.py"],
+    srcs = ["make_tf_record_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    tags = ["no_pip"],
+    deps = [
+        ":reader_dataset_ops_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python/data/experimental/ops:readers",
+        "//tensorflow/python/data/util:nest",
     ],
+)
+
+py_test(
+    name = "map_and_batch_test",
+    size = "medium",
+    srcs = ["map_and_batch_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
         "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:optimization",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -294,11 +350,7 @@ py_test(
     size = "small",
     srcs = ["map_defun_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
@@ -317,15 +369,56 @@ py_test(
 )
 
 py_test(
-    name = "parsing_ops_test",
+    name = "override_threadpool_test",
     size = "small",
-    srcs = ["parsing_ops_test.py"],
+    srcs = ["override_threadpool_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python/data/experimental/ops:threadpool",
+        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "parallel_interleave_test",
+    size = "medium",
+    srcs = ["parallel_interleave_test.py"],
     srcs_version = "PY2AND3",
     tags = [
         "no_oss",
         "no_pip",
-        "no_windows",
+        "notap",
     ],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:script_ops",
+        "//tensorflow/python:sparse_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/data/experimental/ops:interleave_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "parse_example_dataset_test",
+    size = "small",
+    srcs = ["parse_example_dataset_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
@@ -344,53 +437,20 @@ py_test(
 )
 
 cuda_py_test(
-    name = "prefetching_ops_test",
+    name = "prefetch_to_device_test",
     size = "small",
-    srcs = ["prefetching_ops_test.py"],
+    srcs = ["prefetch_to_device_test.py"],
     additional_deps = [
         "//tensorflow/python/data/experimental/ops:prefetching_ops",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:function",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python/compat:compat",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "no_windows_gpu",
-    ],
-)
-
-py_test(
-    name = "range_dataset_op_test",
-    size = "small",
-    srcs = ["range_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
-    deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/data/experimental/ops:counter",
-        "//tensorflow/python/data/experimental/ops:enumerate_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
+    tags = ["no_windows_gpu"],
 )
 
 py_library(
@@ -421,41 +481,12 @@ py_library(
 )
 
 py_test(
-    name = "reader_dataset_ops_test",
-    size = "medium",
-    srcs = ["reader_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
-    deps = [
-        ":reader_dataset_ops_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python/data/experimental/ops:readers",
-        "//tensorflow/python/data/ops:readers",
-        "//tensorflow/python/data/util:nest",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "resample_test",
+    name = "rejection_resample_test",
     size = "medium",
-    srcs = ["resample_test.py"],
+    srcs = ["rejection_resample_test.py"],
     shard_count = 2,
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
         "noasan",
         "optonly",
     ],
@@ -477,15 +508,27 @@ py_test(
 )
 
 py_test(
-    name = "scan_dataset_op_test",
-    size = "small",
-    srcs = ["scan_dataset_op_test.py"],
+    name = "restructured_dataset_test",
+    size = "medium",
+    srcs = ["restructured_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/util:nest",
     ],
+)
+
+py_test(
+    name = "scan_test",
+    size = "small",
+    srcs = ["scan_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -503,14 +546,12 @@ py_test(
 )
 
 py_test(
-    name = "shuffle_dataset_op_test",
+    name = "shuffle_and_repeat_test",
     size = "medium",
-    srcs = ["shuffle_dataset_op_test.py"],
+    srcs = ["shuffle_and_repeat_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_pip",
-        "no_windows",
         "optonly",
     ],
     deps = [
@@ -525,8 +566,8 @@ py_test(
 )
 
 py_library(
-    name = "sql_dataset_op_test_base",
-    srcs = ["sql_dataset_op_test_base.py"],
+    name = "sql_dataset_test_base",
+    srcs = ["sql_dataset_test_base.py"],
     srcs_version = "PY2AND3",
     visibility = [
         "//tensorflow/python/data/experimental/kernel_tests:__pkg__",
@@ -543,17 +584,13 @@ py_library(
 )
 
 py_test(
-    name = "sql_dataset_op_test",
+    name = "sql_dataset_test",
     size = "small",
-    srcs = ["sql_dataset_op_test.py"],
+    srcs = ["sql_dataset_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
-        ":sql_dataset_op_test_base",
+        ":sql_dataset_test_base",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -565,11 +602,7 @@ py_test(
     size = "medium",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    tags = ["no_pip"],
     deps = [
         ":reader_dataset_ops_test_base",
         ":stats_dataset_test_base",
@@ -595,68 +628,60 @@ py_library(
 )
 
 py_test(
-    name = "threadpool_dataset_ops_test",
+    name = "tf_record_writer_test",
     size = "small",
-    srcs = ["threadpool_dataset_ops_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    srcs = ["tf_record_writer_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:script_ops",
-        "//tensorflow/python/data/experimental/ops:threadpool",
-        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:writers",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/data/ops:readers",
     ],
 )
 
 py_test(
-    name = "unique_dataset_op_test",
-    size = "small",
-    srcs = ["unique_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    name = "unbatch_test",
+    size = "medium",
+    srcs = ["unbatch_test.py"],
     deps = [
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:unique",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
 py_test(
-    name = "writer_ops_test",
+    name = "unique_test",
     size = "small",
-    srcs = ["writer_ops_test.py"],
-    tags = [
-        "no_oss",
-        "no_pip",
-        "no_windows",
-    ],
+    srcs = ["unique_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
     deps = [
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
+        "//tensorflow/python:errors",
         "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:writers",
+        "//tensorflow/python/data/experimental/ops:unique",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:readers",
     ],
 )
diff --git a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
deleted file mode 100644
index 956b4518f6..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
+++ /dev/null
@@ -1,686 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import time
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.data.experimental.ops import batching
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import test
-from tensorflow.python.util import compat
-
-
-class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  def testDenseToSparseBatchDataset(self):
-    components = np.random.randint(12, size=(100,)).astype(np.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x], x)).apply(
-            batching.dense_to_sparse_batch(4, [12]))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      for start in range(0, len(components), 4):
-        results = sess.run(get_next)
-        self.assertAllEqual([[i, j]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)], results.indices)
-        self.assertAllEqual(
-            [c for c in components[start:start + 4] for _ in range(c)],
-            results.values)
-        self.assertAllEqual([min(4,
-                                 len(components) - start), 12],
-                            results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testDenseToSparseBatchDatasetWithUnknownShape(self):
-    components = np.random.randint(5, size=(40,)).astype(np.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x, x], x)).apply(
-            batching.dense_to_sparse_batch(
-                4, [5, None])).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      for start in range(0, len(components), 4):
-        results = sess.run(get_next)
-        self.assertAllEqual([[i, j, z]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)
-                             for z in range(c)], results.indices)
-        self.assertAllEqual([
-            c
-            for c in components[start:start + 4] for _ in range(c)
-            for _ in range(c)
-        ], results.values)
-        self.assertAllEqual([
-            min(4,
-                len(components) - start), 5,
-            np.max(components[start:start + 4])
-        ], results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testDenseToSparseBatchDatasetWithInvalidShape(self):
-    input_tensor = array_ops.constant([[1]])
-    with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
-      dataset_ops.Dataset.from_tensors(input_tensor).apply(
-          batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
-
-  def testDenseToSparseBatchDatasetShapeErrors(self):
-    input_tensor = array_ops.placeholder(dtypes.int32)
-    iterator = (
-        dataset_ops.Dataset.from_tensors(input_tensor).apply(
-            batching.dense_to_sparse_batch(4, [12]))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # Initialize with an input tensor of incompatible rank.
-      sess.run(init_op, feed_dict={input_tensor: [[1]]})
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "incompatible with the row shape"):
-        sess.run(get_next)
-
-      # Initialize with an input tensor that is larger than `row_shape`.
-      sess.run(init_op, feed_dict={input_tensor: range(13)})
-      with self.assertRaisesRegexp(errors.DataLossError,
-                                   "larger than the row shape"):
-        sess.run(get_next)
-
-  def testUnbatchWithUnknownRankInput(self):
-    placeholder = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
-        batching.unbatch())
-    iterator = dataset.make_initializable_iterator()
-    next_elem = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
-      for i in range(4):
-        self.assertEqual(i, sess.run(next_elem))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_elem)
-
-  def testUnbatchScalarDataset(self):
-    data = tuple([math_ops.range(10) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = (dtypes.int32,) * 3
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i,) * 3, sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchDatasetWithStrings(self):
-    data = tuple([math_ops.range(10) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
-    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchDatasetWithSparseTensor(self):
-    st = sparse_tensor.SparseTensorValue(
-        indices=[[i, i] for i in range(10)],
-        values=list(range(10)),
-        dense_shape=[10, 10])
-    data = dataset_ops.Dataset.from_tensors(st)
-    data = data.apply(batching.unbatch())
-    data = data.batch(5)
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        st_row = sess.run(next_element)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchDatasetWithDenseAndSparseTensor(self):
-    st = sparse_tensor.SparseTensorValue(
-        indices=[[i, i] for i in range(10)],
-        values=list(range(10)),
-        dense_shape=[10, 10])
-    data = dataset_ops.Dataset.from_tensors((list(range(10)), st))
-    data = data.apply(batching.unbatch())
-    data = data.batch(5)
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        dense_elem, st_row = sess.run(next_element)
-        self.assertEqual(i, dense_elem)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchSingleElementTupleDataset(self):
-    data = tuple([(math_ops.range(10),) for _ in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = ((dtypes.int32,),) * 3
-    data = data.batch(2)
-    self.assertEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i,),) * 3, sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchMultiElementTupleDataset(self):
-    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
-                   array_ops.fill([10], "hi")) for i in range(3)])
-    data = dataset_ops.Dataset.from_tensor_slices(data)
-    expected_types = ((dtypes.int32, dtypes.string),) * 3
-    data = data.batch(2)
-    self.assertAllEqual(expected_types, data.output_types)
-    data = data.apply(batching.unbatch())
-    self.assertAllEqual(expected_types, data.output_types)
-
-    iterator = data.make_one_shot_iterator()
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
-                         sess.run(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
-
-  def testUnbatchEmpty(self):
-    data = dataset_ops.Dataset.from_tensors(
-        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
-         constant_op.constant([], shape=[0, 4, 0])))
-    data = data.apply(batching.unbatch())
-    iterator = data.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testUnbatchStaticShapeMismatch(self):
-    data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
-                                             np.arange(9)))
-    with self.assertRaises(ValueError):
-      data.apply(batching.unbatch())
-
-  def testUnbatchDynamicShapeMismatch(self):
-    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
-    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
-    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
-    data = data.apply(batching.unbatch())
-    iterator = data.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # Mismatch in the 0th dimension.
-      sess.run(
-          iterator.initializer,
-          feed_dict={
-              ph1: np.arange(7).astype(np.int32),
-              ph2: np.arange(8).astype(np.int32)
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
-
-      # No 0th dimension (i.e. scalar value) for one component.
-      sess.run(
-          iterator.initializer,
-          feed_dict={
-              ph1: np.arange(7).astype(np.int32),
-              ph2: 7
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
-
-  @parameterized.named_parameters(
-      ("Default", None, None),
-      ("SequentialCalls", 1, None),
-      ("ParallelCalls", 2, None),
-      ("ParallelBatches", None, 10),
-  )
-  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
-    """Test a dataset that maps a TF function across its input elements."""
-    # The pipeline is TensorSliceDataset ->
-    # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
-    components = (np.arange(7),
-                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
-                  np.array(37.0) * np.arange(7))
-
-    count = array_ops.placeholder(dtypes.int64, shape=[])
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
-    def _map_fn(x, y, z):
-      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
-            batching.map_and_batch(
-                map_func=_map_fn,
-                batch_size=batch_size,
-                num_parallel_calls=num_parallel_calls,
-                num_parallel_batches=num_parallel_batches))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    self.assertEqual([[None] + list(c.shape[1:]) for c in components],
-                     [t.shape.as_list() for t in get_next])
-
-    with self.cached_session() as sess:
-      # Batch of a finite input, where the batch_size divides the
-      # total number of elements.
-      sess.run(init_op, feed_dict={count: 28, batch_size: 14})
-      num_batches = (28 * 7) // 14
-      for i in range(num_batches):
-        result = sess.run(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(14):
-            self.assertAllEqual(component[(i * 14 + j) % 7]**2,
-                                result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Batch of a finite input, where the batch_size does not
-      # divide the total number of elements.
-      sess.run(init_op, feed_dict={count: 14, batch_size: 8})
-
-      # We expect (num_batches - 1) full-sized batches.
-      num_batches = int(math.ceil((14 * 7) / 8))
-      for i in range(num_batches - 1):
-        result = sess.run(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(8):
-            self.assertAllEqual(component[(i * 8 + j) % 7]**2,
-                                result_component[j])
-      result = sess.run(get_next)
-      for component, result_component in zip(components, result):
-        for j in range((14 * 7) % 8):
-          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
-                              result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Batch of an empty input should fail straight away.
-      sess.run(init_op, feed_dict={count: 0, batch_size: 8})
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Empty batch should be an initialization time error.
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(init_op, feed_dict={count: 14, batch_size: 0})
-
-  @parameterized.named_parameters(
-      ("Even", False),
-      ("Uneven", True),
-  )
-  def testMapAndBatchPartialBatch(self, drop_remainder):
-    iterator = (
-        dataset_ops.Dataset.range(10).apply(
-            batching.map_and_batch(
-                lambda x: array_ops.reshape(x * x, [1]),
-                batch_size=4,
-                drop_remainder=drop_remainder)).make_one_shot_iterator())
-    if drop_remainder:
-      self.assertEqual([4, 1], iterator.output_shapes.as_list())
-    else:
-      self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
-      if not drop_remainder:
-        self.assertAllEqual([[64], [81]], sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testMapAndBatchYieldsPartialBatch(self):
-    iterator = (dataset_ops.Dataset.range(10)
-                .apply(batching.map_and_batch(
-                    lambda x: array_ops.reshape(x * x, [1]), 4))
-                .make_one_shot_iterator())
-    self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
-      self.assertAllEqual([[64], [81]], sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testMapAndBatchParallelGetNext(self):
-    iterator = (dataset_ops.Dataset.range(50000)
-                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
-                .make_one_shot_iterator())
-    elements = []
-    for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session() as sess:
-      for i in range(5):
-        got = sess.run(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
-
-  def testMapAndBatchParallelGetNextDropRemainder(self):
-    iterator = (
-        dataset_ops.Dataset.range(49999).apply(
-            batching.map_and_batch(
-                lambda x: x, batch_size=100, drop_remainder=True))
-        .make_one_shot_iterator())
-    elements = []
-    for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session() as sess:
-      for i in range(4):
-        got = sess.run(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
-
-  def testMapAndBatchSparse(self):
-
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0]], values=(i * [1]), dense_shape=[1])
-
-    iterator = dataset_ops.Dataset.range(10).apply(
-        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(2):
-        actual = sess.run(get_next)
-        expected = sparse_tensor.SparseTensorValue(
-            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
-            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
-            dense_shape=[5, 1])
-        self.assertTrue(sparse_tensor.is_sparse(actual))
-        self.assertSparseValuesEqual(actual, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testMapAndBatchFails(self):
-    """Test a dataset that maps a TF function across its input elements."""
-    dataset = dataset_ops.Dataset.from_tensors(
-        array_ops.check_numerics(
-            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    with self.cached_session() as sess:
-      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
-        sess.run(init_op, feed_dict={batch_size: 14})
-
-  def testMapAndBatchShapeMismatch(self):
-    """Test a dataset that maps a TF function across its input elements."""
-
-    def generator():
-      yield [1]
-      yield [2]
-      yield [3]
-      yield [[4, 5, 6]]
-
-    dataset = dataset_ops.Dataset.from_generator(
-        generator, output_types=dtypes.int32)
-    batch_size = 4
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "number of elements does not match"):
-        sess.run(get_next)
-
-  def testMapAndBatchImplicitDispose(self):
-    # Tests whether a map and batch dataset will be cleaned up correctly when
-    # the pipeline does not run it until exhaustion.
-    # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
-    # MapAndBatchDataset(f=square_3, batch_size=100).
-    components = (np.arange(1000),
-                  np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
-                  np.array(37.0) * np.arange(1000))
-
-    def _map_fn(x, y, z):
-      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
-        1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
-    dataset = dataset.prefetch(5)
-    iterator = dataset.make_one_shot_iterator()
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(3):
-        sess.run(get_next)
-
-  @parameterized.named_parameters(
-      ("1", 0),
-      ("2", 5),
-      ("3", 10),
-      ("4", 90),
-      ("5", 95),
-      ("6", 99),
-  )
-  def testMapAndBatchOutOfRangeError(self, threshold):
-
-    def raising_py_fn(i):
-      if i >= threshold:
-        raise StopIteration()
-      else:
-        return i
-
-    iterator = (
-        dataset_ops.Dataset.range(100).apply(
-            batching.map_and_batch(
-                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
-                batch_size=10)).make_one_shot_iterator())
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(threshold // 10):
-        self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
-      if threshold % 10 != 0:
-        self.assertAllEqual(
-            [threshold // 10 * 10 + j for j in range(threshold % 10)],
-            sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  @parameterized.named_parameters(
-      ("1", False, dtypes.bool),
-      ("2", -42, dtypes.int8),
-      ("3", -42, dtypes.int16),
-      ("4", -42, dtypes.int32),
-      ("5", -42, dtypes.int64),
-      ("6", 42, dtypes.uint8),
-      ("7", 42, dtypes.uint16),
-      ("8", 42.0, dtypes.float16),
-      ("9", 42.0, dtypes.float32),
-      ("10", 42.0, dtypes.float64),
-      ("11", b"hello", dtypes.string),
-  )
-  def testMapAndBatchTypes(self, element, dtype):
-    def gen():
-      yield element
-
-    dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
-        batching.map_and_batch(lambda x: x, batch_size=10))
-
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(10):
-        self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
-
-
-class UnbatchDatasetBenchmark(test.Benchmark):
-
-  def benchmarkNativeUnbatch(self):
-    batch_sizes = [1, 2, 5, 10, 20, 50]
-    elems_per_trial = 10000
-    with ops.Graph().as_default():
-      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
-      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
-      dataset = dataset.batch(batch_size_placeholder)
-      dataset = dataset.apply(batching.unbatch())
-      dataset = dataset.skip(elems_per_trial)
-      iterator = dataset.make_initializable_iterator()
-      next_element = iterator.get_next()
-
-      with session.Session() as sess:
-        for batch_size in batch_sizes:
-          deltas = []
-          for _ in range(5):
-            sess.run(
-                iterator.initializer,
-                feed_dict={batch_size_placeholder: batch_size})
-            start = time.time()
-            sess.run(next_element.op)
-            end = time.time()
-            deltas.append((end - start) / elems_per_trial)
-
-          median_wall_time = np.median(deltas)
-          print("Unbatch (native) batch size: %d Median wall time per element:"
-                " %f microseconds" % (batch_size, median_wall_time * 1e6))
-          self.report_benchmark(
-              iters=10000,
-              wall_time=median_wall_time,
-              name="benchmark_unbatch_dataset_native_batch_size_%d" %
-              batch_size)
-
-  # Include a benchmark of the previous `unbatch()` implementation that uses
-  # a composition of more primitive ops. Eventually we'd hope to generate code
-  # that is as good in both cases.
-  def benchmarkOldUnbatchImplementation(self):
-    batch_sizes = [1, 2, 5, 10, 20, 50]
-    elems_per_trial = 10000
-    with ops.Graph().as_default():
-      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
-      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
-      dataset = dataset.batch(batch_size_placeholder)
-      dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices)
-      dataset = dataset.skip(elems_per_trial)
-      iterator = dataset.make_initializable_iterator()
-      next_element = iterator.get_next()
-
-      with session.Session() as sess:
-        for batch_size in batch_sizes:
-          deltas = []
-          for _ in range(5):
-            sess.run(
-                iterator.initializer,
-                feed_dict={batch_size_placeholder: batch_size})
-            start = time.time()
-            sess.run(next_element.op)
-            end = time.time()
-            deltas.append((end - start) / elems_per_trial)
-
-          median_wall_time = np.median(deltas)
-          print("Unbatch (unfused) batch size: %d Median wall time per element:"
-                " %f microseconds" % (batch_size, median_wall_time * 1e6))
-          self.report_benchmark(
-              iters=10000,
-              wall_time=median_wall_time,
-              name="benchmark_unbatch_dataset_unfused_batch_size_%d" %
-              batch_size)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
new file mode 100644
index 0000000000..3903ec49b9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
@@ -0,0 +1,322 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.bucket_by_sequence_length()."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+def _element_length_fn(x, y=None):
+  del y
+  return array_ops.shape(x)[0]
+
+
+def _to_sparse_tensor(record):
+  return sparse_tensor.SparseTensor(**record)
+
+
+def _format_record(array, sparse):
+  if sparse:
+    return {
+        "values": array,
+        "indices": [[i] for i in range(len(array))],
+        "dense_shape": (len(array),)
+    }
+  return array
+
+
+def _get_record_type(sparse):
+  if sparse:
+    return {
+        "values": dtypes.int64,
+        "indices": dtypes.int64,
+        "dense_shape": dtypes.int64
+    }
+  return dtypes.int32
+
+
+def _get_record_shape(sparse):
+  if sparse:
+    return {
+        "values": tensor_shape.TensorShape([None,]),
+        "indices": tensor_shape.TensorShape([None, 1]),
+        "dense_shape": tensor_shape.TensorShape([1,])
+    }
+  return tensor_shape.TensorShape([None])
+
+
+class BucketBySequenceLengthTest(test_base.DatasetTestBase):
+
+  def testBucket(self):
+
+    boundaries = [10, 20, 30]
+    batch_sizes = [10, 8, 4, 2]
+    lengths = [8, 13, 25, 35]
+
+    def build_dataset(sparse):
+      def _generator():
+        # Produce 1 batch for each bucket
+        elements = []
+        for batch_size, length in zip(batch_sizes, lengths):
+          record_len = length - 1
+          for _ in range(batch_size):
+            elements.append([1] * record_len)
+            record_len = length
+        random.shuffle(elements)
+        for el in elements:
+          yield (_format_record(el, sparse),)
+      dataset = dataset_ops.Dataset.from_generator(
+          _generator,
+          (_get_record_type(sparse),),
+          (_get_record_shape(sparse),))
+      if sparse:
+        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
+      return dataset
+
+    def _test_bucket_by_padding(no_padding):
+      dataset = build_dataset(sparse=no_padding)
+      dataset = dataset.apply(
+          grouping.bucket_by_sequence_length(
+              _element_length_fn,
+              boundaries,
+              batch_sizes,
+              no_padding=no_padding))
+      batch, = dataset.make_one_shot_iterator().get_next()
+
+      with self.cached_session() as sess:
+        batches = []
+        for _ in range(4):
+          batches.append(sess.run(batch))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(batch)
+      batch_sizes_val = []
+      lengths_val = []
+      for batch in batches:
+        shape = batch.dense_shape if no_padding else batch.shape
+        batch_size = shape[0]
+        length = shape[1]
+        batch_sizes_val.append(batch_size)
+        lengths_val.append(length)
+        sum_check = batch.values.sum() if no_padding else batch.sum()
+        self.assertEqual(sum_check, batch_size * length - 1)
+      self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
+      self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
+      self.assertEqual(sorted(lengths), sorted(lengths_val))
+
+    for no_padding in (True, False):
+      _test_bucket_by_padding(no_padding)
+
+  def testPadToBoundary(self):
+
+    boundaries = [10, 20, 30]
+    batch_sizes = [10, 8, 4, 2]
+    lengths = [8, 13, 25]
+
+    def element_gen():
+      # Produce 1 batch for each bucket
+      elements = []
+      for batch_size, length in zip(batch_sizes[:-1], lengths):
+        for _ in range(batch_size):
+          elements.append([1] * length)
+      random.shuffle(elements)
+      for el in elements:
+        yield (el,)
+      for _ in range(batch_sizes[-1]):
+        el = [1] * (boundaries[-1] + 5)
+        yield (el,)
+
+    element_len = lambda el: array_ops.shape(el)[0]
+    dataset = dataset_ops.Dataset.from_generator(
+        element_gen, (dtypes.int64,), ([None],)).apply(
+            grouping.bucket_by_sequence_length(
+                element_len, boundaries, batch_sizes,
+                pad_to_bucket_boundary=True))
+    batch, = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      batches = []
+      for _ in range(3):
+        batches.append(sess.run(batch))
+      with self.assertRaisesOpError("bucket_boundaries"):
+        sess.run(batch)
+    batch_sizes_val = []
+    lengths_val = []
+    for batch in batches:
+      batch_size = batch.shape[0]
+      length = batch.shape[1]
+      batch_sizes_val.append(batch_size)
+      lengths_val.append(length)
+    batch_sizes = batch_sizes[:-1]
+    self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
+    self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
+    self.assertEqual([boundary - 1 for boundary in sorted(boundaries)],
+                     sorted(lengths_val))
+
+  def testPadToBoundaryNoExtraneousPadding(self):
+
+    boundaries = [3, 7, 11]
+    batch_sizes = [2, 2, 2, 2]
+    lengths = range(1, 11)
+
+    def element_gen():
+      for length in lengths:
+        yield ([1] * length,)
+
+    element_len = lambda element: array_ops.shape(element)[0]
+    dataset = dataset_ops.Dataset.from_generator(
+        element_gen, (dtypes.int64,), ([None],)).apply(
+            grouping.bucket_by_sequence_length(
+                element_len, boundaries, batch_sizes,
+                pad_to_bucket_boundary=True))
+    batch, = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      batches = []
+      for _ in range(5):
+        batches.append(sess.run(batch))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(batch)
+
+    self.assertAllEqual(batches[0], [[1, 0],
+                                     [1, 1]])
+    self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0],
+                                     [1, 1, 1, 1, 0, 0]])
+    self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0],
+                                     [1, 1, 1, 1, 1, 1]])
+    self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
+                                     [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
+    self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
+
+  def testTupleElements(self):
+
+    def build_dataset(sparse):
+      def _generator():
+        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
+        label = [1, 2, 1, 2]
+        for x, y in zip(text, label):
+          yield (_format_record(x, sparse), y)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=_generator,
+          output_types=(_get_record_type(sparse), dtypes.int32),
+          output_shapes=(_get_record_shape(sparse),
+                         tensor_shape.TensorShape([])))
+      if sparse:
+        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
+      return dataset
+
+    def _test_tuple_elements_by_padding(no_padding):
+      dataset = build_dataset(sparse=no_padding)
+      dataset = dataset.apply(grouping.bucket_by_sequence_length(
+          element_length_func=_element_length_fn,
+          bucket_batch_sizes=[2, 2, 2],
+          bucket_boundaries=[0, 8],
+          no_padding=no_padding))
+      shapes = dataset.output_shapes
+      self.assertEqual([None, None], shapes[0].as_list())
+      self.assertEqual([None], shapes[1].as_list())
+
+    for no_padding in (True, False):
+      _test_tuple_elements_by_padding(no_padding)
+
+  def testBucketSparse(self):
+    """Tests bucketing of sparse tensors (case where `no_padding` == True).
+
+    Test runs on following dataset:
+      [
+        [0],
+        [0, 1],
+        [0, 1, 2]
+        ...
+        [0, ..., max_len - 1]
+      ]
+    Sequences are bucketed by length and batched with
+      `batch_size` < `bucket_size`.
+    """
+
+    min_len = 0
+    max_len = 100
+    batch_size = 7
+    bucket_size = 10
+
+    def _build_dataset():
+      input_data = [range(i+1) for i in range(min_len, max_len)]
+      def generator_fn():
+        for record in input_data:
+          yield _format_record(record, sparse=True)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=generator_fn,
+          output_types=_get_record_type(sparse=True))
+      dataset = dataset.map(_to_sparse_tensor)
+      return dataset
+
+    def _compute_expected_batches():
+      """Computes expected batch outputs and stores in a set."""
+      all_expected_sparse_tensors = set()
+      for bucket_start_len in range(min_len, max_len, bucket_size):
+        for batch_offset in range(0, bucket_size, batch_size):
+          batch_start_len = bucket_start_len + batch_offset
+          batch_end_len = min(batch_start_len + batch_size,
+                              bucket_start_len + bucket_size)
+          expected_indices = []
+          expected_values = []
+          for length in range(batch_start_len, batch_end_len):
+            for val in range(length + 1):
+              expected_indices.append((length - batch_start_len, val))
+              expected_values.append(val)
+          expected_sprs_tensor = (tuple(expected_indices),
+                                  tuple(expected_values))
+          all_expected_sparse_tensors.add(expected_sprs_tensor)
+      return all_expected_sparse_tensors
+
+    def _compute_batches(dataset):
+      """Computes actual batch outputs of dataset and stores in a set."""
+      batch = dataset.make_one_shot_iterator().get_next()
+      all_sparse_tensors = set()
+      with self.cached_session() as sess:
+        with self.assertRaises(errors.OutOfRangeError):
+          while True:
+            output = sess.run(batch)
+            sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
+                           tuple(output.values))
+            all_sparse_tensors.add(sprs_tensor)
+      return all_sparse_tensors
+
+    dataset = _build_dataset()
+    boundaries = range(min_len + bucket_size + 1, max_len, bucket_size)
+    dataset = dataset.apply(grouping.bucket_by_sequence_length(
+        _element_length_fn,
+        boundaries,
+        [batch_size] * (len(boundaries) + 1),
+        no_padding=True))
+    batches = _compute_batches(dataset)
+    expected_batches = _compute_expected_batches()
+    self.assertEqual(batches, expected_batches)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py b/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
deleted file mode 100644
index 153a03989b..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/bucketing_test.py
+++ /dev/null
@@ -1,824 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the experimental input pipeline ops."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import random
-
-import numpy as np
-
-from tensorflow.python.data.experimental.ops import grouping
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import test
-
-
-class GroupByReducerTest(test_base.DatasetTestBase):
-
-  def checkResults(self, dataset, shapes, values):
-    self.assertEqual(shapes, dataset.output_shapes)
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      for expected in values:
-        got = sess.run(get_next)
-        self.assertEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  def testSum(self):
-    reducer = grouping.Reducer(
-        init_func=lambda _: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).apply(
-          grouping.group_by_reducer(lambda x: x % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
-
-  def testAverage(self):
-
-    def reduce_fn(x, y):
-      return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / (
-          x[1] + 1), x[1] + 1
-
-    reducer = grouping.Reducer(
-        init_func=lambda _: (0.0, 0.0),
-        reduce_func=reduce_fn,
-        finalize_func=lambda x, _: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).apply(
-          grouping.group_by_reducer(
-              lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[i - 1, i])
-
-  def testConcat(self):
-    components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
-    reducer = grouping.Reducer(
-        init_func=lambda x: "",
-        reduce_func=lambda x, y: x + y[0],
-        finalize_func=lambda x: x)
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensor_slices(components),
-           dataset_ops.Dataset.range(2 * i))).apply(
-               grouping.group_by_reducer(lambda x, y: y % 2, reducer))
-      self.checkResults(
-          dataset,
-          shapes=tensor_shape.scalar(),
-          values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
-
-  def testSparseSum(self):
-    def _sparse(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=np.array([[0, 0]]),
-          values=(i * np.array([1], dtype=np.int64)),
-          dense_shape=np.array([1, 1]))
-
-    reducer = grouping.Reducer(
-        init_func=lambda _: _sparse(np.int64(0)),
-        reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]),
-        finalize_func=lambda x: x.values[0])
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply(
-          grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
-
-  def testChangingStateShape(self):
-
-    def reduce_fn(x, _):
-      # Statically known rank, but dynamic length.
-      larger_dim = array_ops.concat([x[0], x[0]], 0)
-      # Statically unknown rank.
-      larger_rank = array_ops.expand_dims(x[1], 0)
-      return larger_dim, larger_rank
-
-    reducer = grouping.Reducer(
-        init_func=lambda x: ([0], 1),
-        reduce_func=reduce_fn,
-        finalize_func=lambda x, y: (x, y))
-
-    for i in range(1, 11):
-      dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply(
-          grouping.group_by_reducer(lambda x: x, reducer))
-      self.assertEqual([None], dataset.output_shapes[0].as_list())
-      self.assertIs(None, dataset.output_shapes[1].ndims)
-      iterator = dataset.make_one_shot_iterator()
-      get_next = iterator.get_next()
-      with self.cached_session() as sess:
-        x, y = sess.run(get_next)
-        self.assertAllEqual([0] * (2**i), x)
-        self.assertAllEqual(np.array(1, ndmin=i), y)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  def testTypeMismatch(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32),
-        reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64),
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        TypeError,
-        "The element types for the new state must match the initial state."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: np.int64(0), reducer))
-
-  # TODO(b/78665031): Remove once non-scalar keys are supported.
-  def testInvalidKeyShape(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        ValueError, "`key_func` must return a single tf.int64 tensor."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer))
-
-  # TODO(b/78665031): Remove once non-int64 keys are supported.
-  def testInvalidKeyType(self):
-    reducer = grouping.Reducer(
-        init_func=lambda x: np.int64(0),
-        reduce_func=lambda x, y: x + y,
-        finalize_func=lambda x: x)
-
-    dataset = dataset_ops.Dataset.range(10)
-    with self.assertRaisesRegexp(
-        ValueError, "`key_func` must return a single tf.int64 tensor."):
-      dataset.apply(
-          grouping.group_by_reducer(lambda _: "wrong", reducer))
-
-  def testTuple(self):
-    def init_fn(_):
-      return np.array([], dtype=np.int64), np.int64(0)
-
-    def reduce_fn(state, value):
-      s1, s2 = state
-      v1, v2 = value
-      return array_ops.concat([s1, [v1]], 0), s2 + v2
-
-    def finalize_fn(s1, s2):
-      return s1, s2
-
-    reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
-    dataset = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply(
-            grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
-    get_next = dataset.make_one_shot_iterator().get_next()
-    with self.cached_session() as sess:
-      x, y = sess.run(get_next)
-      self.assertAllEqual(x, np.asarray([x for x in range(10)]))
-      self.assertEqual(y, 45)
-
-
-class GroupByWindowTest(test_base.DatasetTestBase):
-
-  def testSimple(self):
-    components = np.random.randint(100, size=(200,)).astype(np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
-        .apply(
-            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          result = sess.run(get_next)
-          self.assertTrue(
-              all(x % 2 == 0
-                  for x in result) or all(x % 2 == 1)
-              for x in result)
-          counts.append(result.shape[0])
-
-      self.assertEqual(len(components), sum(counts))
-      num_full_batches = len([c for c in counts if c == 4])
-      self.assertGreaterEqual(num_full_batches, 24)
-      self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
-
-  def testImmediateOutput(self):
-    components = np.array(
-        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
-            grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      # The input is infinite, so this test demonstrates that:
-      # 1. We produce output without having to consume the entire input,
-      # 2. Different buckets can produce output at different rates, and
-      # 3. For deterministic input, the output is deterministic.
-      for _ in range(3):
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-        self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
-        self.assertAllEqual([2, 2, 2, 2], sess.run(get_next))
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-
-  def testSmallGroups(self):
-    components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
-      # The small outputs at the end are deterministically produced in key
-      # order.
-      self.assertAllEqual([0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1], sess.run(get_next))
-
-  def testEmpty(self):
-    iterator = (
-        dataset_ops.Dataset.range(4).apply(
-            grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Window size must be greater than zero, but got 0."):
-        print(sess.run(get_next))
-
-  def testReduceFuncError(self):
-    components = np.random.randint(100, size=(200,)).astype(np.int64)
-
-    def reduce_func(_, xs):
-      # Introduce an incorrect padded shape that cannot (currently) be
-      # detected at graph construction time.
-      return xs.padded_batch(
-          4,
-          padded_shapes=(tensor_shape.TensorShape([]),
-                         constant_op.constant([5], dtype=dtypes.int64) * -1))
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
-            grouping.group_by_window(lambda x, _: x % 2, reduce_func,
-                                     32)).make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-
-  def testConsumeWindowDatasetMoreThanOnce(self):
-    components = np.random.randint(50, size=(200,)).astype(np.int64)
-
-    def reduce_func(key, window):
-      # Apply two different kinds of padding to the input: tight
-      # padding, and quantized (to a multiple of 10) padding.
-      return dataset_ops.Dataset.zip((
-          window.padded_batch(
-              4, padded_shapes=tensor_shape.TensorShape([None])),
-          window.padded_batch(
-              4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),
-      ))
-
-    iterator = (
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
-        .apply(grouping.group_by_window(
-            lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
-            reduce_func, 4))
-        .make_initializable_iterator())
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          tight_result, multiple_of_10_result = sess.run(get_next)
-          self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
-          self.assertAllEqual(tight_result,
-                              multiple_of_10_result[:, :tight_result.shape[1]])
-          counts.append(tight_result.shape[0])
-      self.assertEqual(len(components), sum(counts))
-
-
-# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
-# Currently, they use a constant batch size, though should be made to use a
-# different batch size per key.
-class BucketTest(test_base.DatasetTestBase):
-
-  def _dynamicPad(self, bucket, window, window_size):
-    # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
-    # generic form of padded_batch that pads every component
-    # dynamically and does not rely on static shape information about
-    # the arguments.
-    return dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.from_tensors(bucket),
-         window.padded_batch(
-             32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
-                 [None]), tensor_shape.TensorShape([3])))))
-
-  def testSingleBucket(self):
-
-    def _map_fn(v):
-      return (v, array_ops.fill([v], v),
-              array_ops.fill([3], string_ops.as_string(v)))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda x, y, z: 0,
-            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      which_bucket, bucketed_values = sess.run(get_next)
-
-      self.assertEqual(0, which_bucket)
-
-      expected_scalar_int = np.arange(32, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
-      for i in range(32):
-        expected_unk_int64[i, :i] = i
-      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values[2])
-
-  def testEvenOddBuckets(self):
-
-    def _map_fn(v):
-      return (v, array_ops.fill([v], v),
-              array_ops.fill([3], string_ops.as_string(v)))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
-            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      # Get two minibatches (one containing even values, one containing odds)
-      which_bucket_even, bucketed_values_even = sess.run(get_next)
-      which_bucket_odd, bucketed_values_odd = sess.run(get_next)
-
-      # Count number of bucket_tensors.
-      self.assertEqual(3, len(bucketed_values_even))
-      self.assertEqual(3, len(bucketed_values_odd))
-
-      # Ensure bucket 0 was used for all minibatch entries.
-      self.assertAllEqual(0, which_bucket_even)
-      self.assertAllEqual(1, which_bucket_odd)
-
-      # Test the first bucket outputted, the events starting at 0
-      expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i] = 2 * i
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
-
-      # Test the second bucket outputted, the odds starting at 1
-      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
-
-  def testEvenOddBucketsFilterOutAllOdd(self):
-
-    def _map_fn(v):
-      return {
-          "x": v,
-          "y": array_ops.fill([v], v),
-          "z": array_ops.fill([3], string_ops.as_string(v))
-      }
-
-    def _dynamic_pad_fn(bucket, window, _):
-      return dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensors(bucket),
-           window.padded_batch(
-               32, {
-                   "x": tensor_shape.TensorShape([]),
-                   "y": tensor_shape.TensorShape([None]),
-                   "z": tensor_shape.TensorShape([3])
-               })))
-
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
-        .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
-
-    bucketed_dataset = input_dataset.apply(
-        grouping.group_by_window(
-            lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
-            lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
-
-    iterator = bucketed_dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-
-      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
-      which_bucket0, bucketed_values_even0 = sess.run(get_next)
-      which_bucket1, bucketed_values_even1 = sess.run(get_next)
-
-      # Ensure that bucket 1 was completely filtered out
-      self.assertAllEqual(0, which_bucket0)
-      self.assertAllEqual(0, which_bucket1)
-      self.assertAllEqual(
-          np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
-      self.assertAllEqual(
-          np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
-
-  def testDynamicWindowSize(self):
-    components = np.arange(100).astype(np.int64)
-
-    # Key fn: even/odd
-    # Reduce fn: batches of 5
-    # Window size fn: even=5, odd=10
-
-    def window_size_func(key):
-      window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64)
-      return window_sizes[key]
-
-    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
-        grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20),
-                                 None, window_size_func))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        batches = 0
-        while True:
-          result = sess.run(get_next)
-          is_even = all(x % 2 == 0 for x in result)
-          is_odd = all(x % 2 == 1 for x in result)
-          self.assertTrue(is_even or is_odd)
-          expected_batch_size = 5 if is_even else 10
-          self.assertEqual(expected_batch_size, result.shape[0])
-          batches += 1
-
-      self.assertEqual(batches, 15)
-
-
-def _element_length_fn(x, y=None):
-  del y
-  return array_ops.shape(x)[0]
-
-
-def _to_sparse_tensor(record):
-  return sparse_tensor.SparseTensor(**record)
-
-
-def _format_record(array, sparse):
-  if sparse:
-    return {
-        "values": array,
-        "indices": [[i] for i in range(len(array))],
-        "dense_shape": (len(array),)
-    }
-  return array
-
-
-def _get_record_type(sparse):
-  if sparse:
-    return {
-        "values": dtypes.int64,
-        "indices": dtypes.int64,
-        "dense_shape": dtypes.int64
-    }
-  return dtypes.int32
-
-
-def _get_record_shape(sparse):
-  if sparse:
-    return {
-        "values": tensor_shape.TensorShape([None,]),
-        "indices": tensor_shape.TensorShape([None, 1]),
-        "dense_shape": tensor_shape.TensorShape([1,])
-    }
-  return tensor_shape.TensorShape([None])
-
-
-class BucketBySequenceLength(test_base.DatasetTestBase):
-
-  def testBucket(self):
-
-    boundaries = [10, 20, 30]
-    batch_sizes = [10, 8, 4, 2]
-    lengths = [8, 13, 25, 35]
-
-    def build_dataset(sparse):
-      def _generator():
-        # Produce 1 batch for each bucket
-        elements = []
-        for batch_size, length in zip(batch_sizes, lengths):
-          record_len = length - 1
-          for _ in range(batch_size):
-            elements.append([1] * record_len)
-            record_len = length
-        random.shuffle(elements)
-        for el in elements:
-          yield (_format_record(el, sparse),)
-      dataset = dataset_ops.Dataset.from_generator(
-          _generator,
-          (_get_record_type(sparse),),
-          (_get_record_shape(sparse),))
-      if sparse:
-        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
-      return dataset
-
-    def _test_bucket_by_padding(no_padding):
-      dataset = build_dataset(sparse=no_padding)
-      dataset = dataset.apply(
-          grouping.bucket_by_sequence_length(
-              _element_length_fn,
-              boundaries,
-              batch_sizes,
-              no_padding=no_padding))
-      batch, = dataset.make_one_shot_iterator().get_next()
-
-      with self.cached_session() as sess:
-        batches = []
-        for _ in range(4):
-          batches.append(sess.run(batch))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(batch)
-      batch_sizes_val = []
-      lengths_val = []
-      for batch in batches:
-        shape = batch.dense_shape if no_padding else batch.shape
-        batch_size = shape[0]
-        length = shape[1]
-        batch_sizes_val.append(batch_size)
-        lengths_val.append(length)
-        sum_check = batch.values.sum() if no_padding else batch.sum()
-        self.assertEqual(sum_check, batch_size * length - 1)
-      self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
-      self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
-      self.assertEqual(sorted(lengths), sorted(lengths_val))
-
-    for no_padding in (True, False):
-      _test_bucket_by_padding(no_padding)
-
-  def testPadToBoundary(self):
-
-    boundaries = [10, 20, 30]
-    batch_sizes = [10, 8, 4, 2]
-    lengths = [8, 13, 25]
-
-    def element_gen():
-      # Produce 1 batch for each bucket
-      elements = []
-      for batch_size, length in zip(batch_sizes[:-1], lengths):
-        for _ in range(batch_size):
-          elements.append([1] * length)
-      random.shuffle(elements)
-      for el in elements:
-        yield (el,)
-      for _ in range(batch_sizes[-1]):
-        el = [1] * (boundaries[-1] + 5)
-        yield (el,)
-
-    element_len = lambda el: array_ops.shape(el)[0]
-    dataset = dataset_ops.Dataset.from_generator(
-        element_gen, (dtypes.int64,), ([None],)).apply(
-            grouping.bucket_by_sequence_length(
-                element_len, boundaries, batch_sizes,
-                pad_to_bucket_boundary=True))
-    batch, = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(3):
-        batches.append(sess.run(batch))
-      with self.assertRaisesOpError("bucket_boundaries"):
-        sess.run(batch)
-    batch_sizes_val = []
-    lengths_val = []
-    for batch in batches:
-      batch_size = batch.shape[0]
-      length = batch.shape[1]
-      batch_sizes_val.append(batch_size)
-      lengths_val.append(length)
-    batch_sizes = batch_sizes[:-1]
-    self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
-    self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
-    self.assertEqual([boundary - 1 for boundary in sorted(boundaries)],
-                     sorted(lengths_val))
-
-  def testPadToBoundaryNoExtraneousPadding(self):
-
-    boundaries = [3, 7, 11]
-    batch_sizes = [2, 2, 2, 2]
-    lengths = range(1, 11)
-
-    def element_gen():
-      for length in lengths:
-        yield ([1] * length,)
-
-    element_len = lambda element: array_ops.shape(element)[0]
-    dataset = dataset_ops.Dataset.from_generator(
-        element_gen, (dtypes.int64,), ([None],)).apply(
-            grouping.bucket_by_sequence_length(
-                element_len, boundaries, batch_sizes,
-                pad_to_bucket_boundary=True))
-    batch, = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(5):
-        batches.append(sess.run(batch))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(batch)
-
-    self.assertAllEqual(batches[0], [[1, 0],
-                                     [1, 1]])
-    self.assertAllEqual(batches[1], [[1, 1, 1, 0, 0, 0],
-                                     [1, 1, 1, 1, 0, 0]])
-    self.assertAllEqual(batches[2], [[1, 1, 1, 1, 1, 0],
-                                     [1, 1, 1, 1, 1, 1]])
-    self.assertAllEqual(batches[3], [[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
-                                     [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
-    self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
-                                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
-
-  def testTupleElements(self):
-
-    def build_dataset(sparse):
-      def _generator():
-        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
-        label = [1, 2, 1, 2]
-        for x, y in zip(text, label):
-          yield (_format_record(x, sparse), y)
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=_generator,
-          output_types=(_get_record_type(sparse), dtypes.int32),
-          output_shapes=(_get_record_shape(sparse),
-                         tensor_shape.TensorShape([])))
-      if sparse:
-        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
-      return dataset
-
-    def _test_tuple_elements_by_padding(no_padding):
-      dataset = build_dataset(sparse=no_padding)
-      dataset = dataset.apply(grouping.bucket_by_sequence_length(
-          element_length_func=_element_length_fn,
-          bucket_batch_sizes=[2, 2, 2],
-          bucket_boundaries=[0, 8],
-          no_padding=no_padding))
-      shapes = dataset.output_shapes
-      self.assertEqual([None, None], shapes[0].as_list())
-      self.assertEqual([None], shapes[1].as_list())
-
-    for no_padding in (True, False):
-      _test_tuple_elements_by_padding(no_padding)
-
-  def testBucketSparse(self):
-    """Tests bucketing of sparse tensors (case where `no_padding` == True).
-
-    Test runs on following dataset:
-      [
-        [0],
-        [0, 1],
-        [0, 1, 2]
-        ...
-        [0, ..., max_len - 1]
-      ]
-    Sequences are bucketed by length and batched with
-      `batch_size` < `bucket_size`.
-    """
-
-    min_len = 0
-    max_len = 100
-    batch_size = 7
-    bucket_size = 10
-
-    def _build_dataset():
-      input_data = [range(i+1) for i in range(min_len, max_len)]
-      def generator_fn():
-        for record in input_data:
-          yield _format_record(record, sparse=True)
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=generator_fn,
-          output_types=_get_record_type(sparse=True))
-      dataset = dataset.map(_to_sparse_tensor)
-      return dataset
-
-    def _compute_expected_batches():
-      """Computes expected batch outputs and stores in a set."""
-      all_expected_sparse_tensors = set()
-      for bucket_start_len in range(min_len, max_len, bucket_size):
-        for batch_offset in range(0, bucket_size, batch_size):
-          batch_start_len = bucket_start_len + batch_offset
-          batch_end_len = min(batch_start_len + batch_size,
-                              bucket_start_len + bucket_size)
-          expected_indices = []
-          expected_values = []
-          for length in range(batch_start_len, batch_end_len):
-            for val in range(length + 1):
-              expected_indices.append((length - batch_start_len, val))
-              expected_values.append(val)
-          expected_sprs_tensor = (tuple(expected_indices),
-                                  tuple(expected_values))
-          all_expected_sparse_tensors.add(expected_sprs_tensor)
-      return all_expected_sparse_tensors
-
-    def _compute_batches(dataset):
-      """Computes actual batch outputs of dataset and stores in a set."""
-      batch = dataset.make_one_shot_iterator().get_next()
-      all_sparse_tensors = set()
-      with self.cached_session() as sess:
-        with self.assertRaises(errors.OutOfRangeError):
-          while True:
-            output = sess.run(batch)
-            sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
-                           tuple(output.values))
-            all_sparse_tensors.add(sprs_tensor)
-      return all_sparse_tensors
-
-    dataset = _build_dataset()
-    boundaries = range(min_len + bucket_size + 1, max_len, bucket_size)
-    dataset = dataset.apply(grouping.bucket_by_sequence_length(
-        _element_length_fn,
-        boundaries,
-        [batch_size] * (len(boundaries) + 1),
-        no_padding=True))
-    batches = _compute_batches(dataset)
-    expected_batches = _compute_expected_batches()
-    self.assertEqual(batches, expected_batches)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
similarity index 56%
rename from tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index 7d7b842c17..adfacf1c9f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetching_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -12,440 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for prefetching_ops."""
+"""Tests for `tf.data.experimental.copy_to_device()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
-
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import test
 
 
-class PrefetchingKernelsOpsTest(test_base.DatasetTestBase):
-
-  def setUp(self):
-    self._event = threading.Event()
-
-  def _create_ds_and_iterator(self, device0, initializable=False):
-
-    def gen():
-      for i in range(1, 10):
-        yield [float(i)]
-        if i == 6:
-          self._event.set()
-
-    with ops.device(device0):
-      ds = dataset_ops.Dataset.from_generator(gen, (dtypes.float32))
-      if initializable:
-        ds_iterator = ds.make_initializable_iterator()
-      else:
-        ds_iterator = ds.make_one_shot_iterator()
-      return (ds, ds_iterator)
-
-  def _create_ops(self, ds, ds_iterator, buffer_name, device0, device1):
-    ds_iterator_handle = ds_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _remote_fn(h):
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          h, ds.output_types, ds.output_shapes)
-      return remote_iterator.get_next()
-
-    target = constant_op.constant(device0)
-    with ops.device(device1):
-      buffer_resource_handle = prefetching_ops.function_buffering_resource(
-          f=_remote_fn,
-          output_types=[dtypes.float32],
-          target_device=target,
-          string_arg=ds_iterator_handle,
-          buffer_size=3,
-          shared_name=buffer_name)
-
-    with ops.device(device1):
-      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
-          function_buffer_resource=buffer_resource_handle,
-          output_types=[dtypes.float32])
-      reset_op = prefetching_ops.function_buffering_resource_reset(
-          function_buffer_resource=buffer_resource_handle)
-      destroy_op = resource_variable_ops.destroy_resource_op(
-          buffer_resource_handle, ignore_lookup_error=True)
-
-    return (prefetch_op, reset_op, destroy_op)
-
-  def _prefetch_fn_helper_one_shot(self, buffer_name, device0, device1):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=False)
-    prefetch_op, _, destroy_op = self._create_ops(ds, ds_iterator, buffer_name,
-                                                  device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
-
-  def testSameDeviceCPU(self):
-    self._prefetch_fn_helper_one_shot("same_device_cpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/cpu:0")
-
-  def testDifferentDeviceCPU(self):
-    self._prefetch_fn_helper_one_shot("diff_device_cpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/cpu:1")
-
-  def testDifferentDeviceCPUGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    self._prefetch_fn_helper_one_shot("cpu_gpu",
-                                      "/job:localhost/replica:0/task:0/cpu:0",
-                                      "/job:localhost/replica:0/task:0/gpu:0")
-
-  def testReinitialization(self):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/cpu:1"
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
-    prefetch_op, reset_op, destroy_op = self._create_ops(
-        ds, ds_iterator, "reinit", device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      # Lets reset the function buffering resource and reinitialize the
-      # iterator. Should be able to go through this again.
-      self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [4.0])
-      self._event.wait()
-      elem = sess.run(prefetch_op)
-      self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
-
-  def testReinitializationOutOfRange(self):
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/cpu:1"
-    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
-    prefetch_op, reset_op, destroy_op = self._create_ops(
-        ds, ds_iterator, "reinit", device0, device1)
-
-    with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
-      for i in range(1, 10):
-        elem = sess.run(prefetch_op)
-        self.assertEqual(elem, [float(i)])
-      # Try fetching after its over twice to test out end of sequence.
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      # Now reset everything and try it out again.
-      self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
-      for i in range(1, 10):
-        elem = sess.run(prefetch_op)
-        self.assertEqual(elem, [float(i)])
-      # Try fetching after its over twice to test out end of sequence.
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      sess.run(destroy_op)
-
-  def testStringsGPU(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    device0 = "/job:localhost/replica:0/task:0/cpu:0"
-    device1 = "/job:localhost/replica:0/task:0/gpu:0"
-
-    ds = dataset_ops.Dataset.from_tensor_slices(["a", "b", "c"])
-    ds_iterator = ds.make_one_shot_iterator()
-    ds_iterator_handle = ds_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _remote_fn(h):
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          h, ds.output_types, ds.output_shapes)
-      return remote_iterator.get_next()
-
-    target = constant_op.constant(device0)
-    with ops.device(device1):
-      buffer_resource_handle = prefetching_ops.function_buffering_resource(
-          f=_remote_fn,
-          output_types=[dtypes.string],
-          target_device=target,
-          string_arg=ds_iterator_handle,
-          buffer_size=3,
-          shared_name="strings")
-
-    with ops.device(device1):
-      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
-          function_buffer_resource=buffer_resource_handle,
-          output_types=[dtypes.string])
-      destroy_op = resource_variable_ops.destroy_resource_op(
-          buffer_resource_handle, ignore_lookup_error=True)
-
-    with self.cached_session() as sess:
-      self.assertEqual([b"a"], sess.run(prefetch_op))
-      self.assertEqual([b"b"], sess.run(prefetch_op))
-      self.assertEqual([b"c"], sess.run(prefetch_op))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
-
-      sess.run(destroy_op)
-
-
-class PrefetchToDeviceTest(test_base.DatasetTestBase):
-
-  def testPrefetchToDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToSameDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device(
-            "/job:localhost/replica:0/task:0/device:CPU:0"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchDictToDevice(self):
-    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element["a"].dtype)
-    self.assertEqual([], next_element["a"].shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        self.assertEqual({"a": i}, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchSparseTensorsToDevice(self):
-    def make_tensor(i):
-      return sparse_tensor.SparseTensorValue(
-          indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2])
-    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)
-
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_one_shot_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      for i in range(10):
-        actual = sess.run(next_element)
-        self.assertAllEqual([i], actual.values)
-        self.assertAllEqual([[0, 0]], actual.indices)
-        self.assertAllEqual([2, 2], actual.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceGpu(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/gpu:0"))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceWithReInit(self):
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/cpu:1"))
-
-    # NOTE(mrry): This device block creates the "host" dataset and iterator on
-    # /cpu:0, and ensures that the prefetching is across devices. In typical use
-    # this would not be necessary, because the GPU device would not support any
-    # of the dataset-related ops.
-    with ops.device("/cpu:0"):
-      iterator = device_dataset.make_initializable_iterator()
-
-    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
-    self.assertEqual(host_dataset.output_types, iterator.output_types)
-    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
-    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
-    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
-    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
-
-    next_element = iterator.get_next()
-    self.assertEqual(dtypes.int64, next_element.dtype)
-    self.assertEqual([], next_element.shape)
-
-    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=worker_config) as sess:
-      sess.run(iterator.initializer)
-      for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToDeviceGpuWithReInit(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops.prefetch_to_device("/gpu:0"))
-
-    iterator = device_dataset.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-
 class CopyToDeviceTest(test_base.DatasetTestBase):
 
   def testCopyToDevice(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/counter_test.py b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
new file mode 100644
index 0000000000..4e114ac479
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
@@ -0,0 +1,51 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.Counter`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import counter
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.framework import dtypes
+from tensorflow.python.platform import test
+
+
+class CounterTest(test_base.DatasetTestBase):
+
+  def testCounter(self):
+    """Test dataset construction using `count`."""
+    iterator = (counter.Counter(start=3, step=4)
+                .make_one_shot_iterator())
+    get_next = iterator.get_next()
+    self.assertEqual([], get_next.shape.as_list())
+    self.assertEqual(dtypes.int64, get_next.dtype)
+
+    negative_iterator = (counter.Counter(start=0, step=-1)
+                         .make_one_shot_iterator())
+    negative_get_next = negative_iterator.get_next()
+
+    with self.cached_session() as sess:
+      self.assertEqual(3, sess.run(get_next))
+      self.assertEqual(3 + 4, sess.run(get_next))
+      self.assertEqual(3 + 2 * 4, sess.run(get_next))
+
+      self.assertEqual(0, sess.run(negative_get_next))
+      self.assertEqual(-1, sess.run(negative_get_next))
+      self.assertEqual(-2, sess.run(negative_get_next))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
index 4ee1779710..fb75be1fbc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for CsvDatasetOp."""
+"""Tests for `tf.data.experimental.CsvDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -44,7 +44,7 @@ from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class CsvDatasetOpTest(test_base.DatasetTestBase):
+class CsvDatasetTest(test_base.DatasetTestBase):
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
deleted file mode 100644
index 7f435b8239..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/dataset_serialization_test_base.py
+++ /dev/null
@@ -1,692 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base class for testing serializable datasets."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import numpy as np
-
-from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.util import nest
-
-
-def remove_variants(get_next_op):
-  # TODO(b/72408568): Remove this once session.run can get
-  # variant tensors.
-  """Remove variants from a nest structure, so sess.run will execute."""
-
-  def _remove_variant(x):
-    if isinstance(x, ops.Tensor) and x.dtype == dtypes.variant:
-      return ()
-    else:
-      return x
-
-  return nest.map_structure(_remove_variant, get_next_op)
-
-
-class DatasetSerializationTestBase(test.TestCase):
-  """Base class for testing serializable datasets."""
-
-  def tearDown(self):
-    self._delete_ckpt()
-
-  # TODO(b/72657739): Remove sparse_tensor argument, which is to test the
-  # (deprecated) saveable `SparseTensorSliceDataset`, once the API
-  # `from_sparse_tensor_slices()`and related tests are deleted.
-  def run_core_tests(self, ds_fn1, ds_fn2, num_outputs, sparse_tensors=False):
-    """Runs the core tests.
-
-    Args:
-      ds_fn1: 0-argument function that returns a Dataset.
-      ds_fn2: 0-argument function that returns a Dataset different from
-        ds_fn1. If None, verify_restore_in_modified_graph test is not run.
-      num_outputs: Total number of outputs expected from this Dataset.
-      sparse_tensors: Whether dataset is built from SparseTensor(s).
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_unused_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_fully_used_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_exhausted_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_init_before_restore(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_multiple_breaks(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_reset_restored_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    self.verify_restore_in_empty_graph(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
-    if ds_fn2:
-      self.verify_restore_in_modified_graph(
-          ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
-
-  def verify_unused_iterator(self,
-                             ds_fn,
-                             num_outputs,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Verifies that saving and restoring an unused iterator works.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn, [0],
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_fully_used_iterator(self, ds_fn, num_outputs,
-                                 sparse_tensors=False):
-    """Verifies that saving and restoring a fully used iterator works.
-
-    Note that this only checks saving and restoring an iterator from which
-    `num_outputs` items have been produced but does not check for an
-    exhausted iterator, i.e., one from which an OutOfRange error has been
-    returned.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn, [num_outputs], num_outputs, sparse_tensors=sparse_tensors)
-
-  def verify_exhausted_iterator(self, ds_fn, num_outputs, sparse_tensors=False):
-    """Verifies that saving and restoring an exhausted iterator works.
-
-    An exhausted iterator is one which has returned an OutOfRange error.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        verify_exhausted=True,
-        sparse_tensors=sparse_tensors)
-    actual = self.gen_outputs(
-        ds_fn, [],
-        0,
-        ckpt_saved=True,
-        verify_exhausted=True,
-        sparse_tensors=sparse_tensors)
-    self.assertEqual(len(actual), 0)
-
-  def verify_init_before_restore(self,
-                                 ds_fn,
-                                 num_outputs,
-                                 sparse_tensors=False,
-                                 verify_exhausted=True):
-    """Verifies that restoring into an already initialized iterator works.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn,
-        self.gen_break_points(num_outputs),
-        num_outputs,
-        init_before_restore=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_multiple_breaks(self,
-                             ds_fn,
-                             num_outputs,
-                             num_breaks=10,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Attempts to save/restore at multiple break points.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      num_breaks: The number of break points. These are uniformly spread in
-        [0, num_outputs] both inclusive.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    self.verify_run_with_breaks(
-        ds_fn,
-        self.gen_break_points(num_outputs, num_breaks),
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-  def verify_reset_restored_iterator(self,
-                                     ds_fn,
-                                     num_outputs,
-                                     break_point=None,
-                                     sparse_tensors=False,
-                                     verify_exhausted=True):
-    """Attempts to re-initialize a restored iterator.
-
-    This is useful when restoring a training checkpoint during validation.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Collect ground truth containing all outputs.
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Skip some items and save checkpoint.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Restore from checkpoint and then run init_op.
-    with ops.Graph().as_default() as g:
-      saver = self._import_meta_graph()
-      init_op, get_next_op = self._get_iterator_ops_from_collection(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        self._initialize(init_op, sess)
-        for _ in range(num_outputs):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-    self.match(expected, actual)
-
-  def verify_restore_in_modified_graph(self,
-                                       ds_fn1,
-                                       ds_fn2,
-                                       num_outputs,
-                                       break_point=None,
-                                       sparse_tensors=False,
-                                       verify_exhausted=True):
-    """Attempts to restore an iterator in a modified graph.
-
-    Builds an input pipeline using ds_fn1, runs it for `break_point` steps
-    and saves a checkpoint. Then builds a new graph using ds_fn2, restores
-    the checkpoint from ds_fn1 and verifies that the restore is successful.
-
-    Args:
-      ds_fn1: See `run_core_tests`.
-      ds_fn2: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Skip `break_point` items and store the remaining produced from ds_fn1
-    # in `expected`.
-    self.gen_outputs(
-        ds_fn1, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-    expected = self.gen_outputs(
-        ds_fn1, [],
-        num_outputs - break_point,
-        ckpt_saved=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Generate `break_point` items from ds_fn1 and save checkpoint.
-    self.gen_outputs(
-        ds_fn1, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Build graph for ds_fn2 but load checkpoint for ds_fn1.
-    with ops.Graph().as_default() as g:
-      _, get_next_op, saver = self._build_graph(
-          ds_fn2, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-    self.match(expected, actual)
-
-  def verify_restore_in_empty_graph(self,
-                                    ds_fn,
-                                    num_outputs,
-                                    break_point=None,
-                                    sparse_tensors=False,
-                                    verify_exhausted=True):
-    """Attempts to restore an iterator in an empty graph.
-
-    Builds an input pipeline using ds_fn, runs it for `break_point` steps
-    and saves a checkpoint. Then builds a new empty graph, restores
-    the checkpoint from ds_fn and verifies that the restore is successful.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    break_point = num_outputs // 2 if not break_point else break_point
-
-    # Skip `break_point` items and store the remaining produced from ds_fn
-    # in `expected`.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs - break_point,
-        ckpt_saved=True,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    # Generate `break_point` items from ds_fn and save checkpoint.
-    self.gen_outputs(
-        ds_fn, [],
-        break_point,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=False)
-
-    actual = []
-    # Build an empty graph but load checkpoint for ds_fn.
-    with ops.Graph().as_default() as g:
-      get_next_op, saver = self._build_empty_graph(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._restore(saver, sess)
-        for _ in range(num_outputs - break_point):
-          actual.append(sess.run(get_next_op))
-        if verify_exhausted:
-          with self.assertRaises(errors.OutOfRangeError):
-            sess.run(get_next_op)
-
-    self.match(expected, actual)
-
-  def verify_error_on_save(self,
-                           ds_fn,
-                           num_outputs,
-                           error,
-                           break_point=None,
-                           sparse_tensors=False):
-    """Attempts to save a non-saveable iterator.
-
-    Args:
-      ds_fn: See `run_core_tests`.
-      num_outputs: See `run_core_tests`.
-      error: Declared error when trying to save iterator.
-      break_point: Break point. Optional. Defaults to num_outputs/2.
-      sparse_tensors: See `run_core_tests`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-
-    break_point = num_outputs // 2 if not break_point else break_point
-    with ops.Graph().as_default() as g:
-      init_op, get_next_op, saver = self._build_graph(
-          ds_fn, sparse_tensors=sparse_tensors)
-      get_next_op = remove_variants(get_next_op)
-      with self.session(graph=g) as sess:
-        self._initialize(init_op, sess)
-        for _ in range(break_point):
-          sess.run(get_next_op)
-        with self.assertRaises(error):
-          self._save(sess, saver)
-
-  def verify_run_with_breaks(self,
-                             ds_fn,
-                             break_points,
-                             num_outputs,
-                             init_before_restore=False,
-                             sparse_tensors=False,
-                             verify_exhausted=True):
-    """Verifies that ds_fn() produces the same outputs with and without breaks.
-
-    1. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
-       *without* stopping at break points.
-    2. Builds a Dataset using `ds_fn` and produces `num_outputs` items from it
-       with stopping at break points.
-
-    Deep matches outputs from 1 and 2.
-
-    Args:
-      ds_fn: See `gen_outputs`.
-      break_points: See `gen_outputs`.
-      num_outputs: See `gen_outputs`.
-      init_before_restore: See `gen_outputs`.
-      sparse_tensors: See `run_core_tests`.
-      verify_exhausted: See `gen_outputs`.
-
-    Raises:
-      AssertionError if any test fails.
-    """
-    expected = self.gen_outputs(
-        ds_fn, [],
-        num_outputs,
-        init_before_restore=init_before_restore,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    actual = self.gen_outputs(
-        ds_fn,
-        break_points,
-        num_outputs,
-        init_before_restore=init_before_restore,
-        sparse_tensors=sparse_tensors,
-        verify_exhausted=verify_exhausted)
-
-    self.match(expected, actual)
-
-  def gen_outputs(self,
-                  ds_fn,
-                  break_points,
-                  num_outputs,
-                  ckpt_saved=False,
-                  init_before_restore=False,
-                  sparse_tensors=False,
-                  verify_exhausted=True,
-                  save_checkpoint_at_end=True):
-    """Generates elements from input dataset while stopping at break points.
-
-    Produces `num_outputs` outputs and saves the state of the iterator in the
-    Saver checkpoint.
-
-    Args:
-      ds_fn: 0-argument function that returns the dataset.
-      break_points: A list of integers. For each `break_point` in
-        `break_points`, we produce outputs till `break_point` number of items
-        have been produced and then checkpoint the state. The current graph
-        and session are destroyed and a new graph and session are used to
-        produce outputs till next checkpoint or till `num_outputs` elements
-        have been produced. `break_point` must be <= `num_outputs`.
-      num_outputs: The total number of outputs to produce from the iterator.
-      ckpt_saved: Whether a checkpoint already exists. If False, we build the
-        graph from ds_fn.
-      init_before_restore: Whether init should be called before saver.restore.
-        This is just so that we can verify that restoring an already initialized
-        iterator works.
-      sparse_tensors:  Whether dataset is built from SparseTensor(s).
-      verify_exhausted: Whether to verify that the iterator has been exhausted
-        after producing `num_outputs` elements.
-      save_checkpoint_at_end: Whether to save a checkpoint after producing all
-        outputs. If False, checkpoints are saved each break point but not at the
-        end. Note that checkpoints overwrite each other so there is always only
-        a single checkpoint available. Defaults to True.
-
-    Returns:
-      A list of `num_outputs` items.
-    """
-    outputs = []
-
-    def get_ops():
-      if ckpt_saved:
-        saver = self._import_meta_graph()
-        init_op, get_next_op = self._get_iterator_ops_from_collection(
-            ds_fn, sparse_tensors=sparse_tensors)
-      else:
-        init_op, get_next_op, saver = self._build_graph(
-            ds_fn, sparse_tensors=sparse_tensors)
-      return init_op, get_next_op, saver
-
-    for i in range(len(break_points) + 1):
-      with ops.Graph().as_default() as g:
-        init_op, get_next_op, saver = get_ops()
-        get_next_op = remove_variants(get_next_op)
-        with self.session(graph=g) as sess:
-          if ckpt_saved:
-            if init_before_restore:
-              self._initialize(init_op, sess)
-            self._restore(saver, sess)
-          else:
-            self._initialize(init_op, sess)
-          start = break_points[i - 1] if i > 0 else 0
-          end = break_points[i] if i < len(break_points) else num_outputs
-          num_iters = end - start
-          for _ in range(num_iters):
-            outputs.append(sess.run(get_next_op))
-          if i == len(break_points) and verify_exhausted:
-            with self.assertRaises(errors.OutOfRangeError):
-              sess.run(get_next_op)
-          if save_checkpoint_at_end or i < len(break_points):
-            self._save(sess, saver)
-            ckpt_saved = True
-
-    return outputs
-
-  def match(self, expected, actual):
-    """Matches nested structures.
-
-    Recursively matches shape and values of `expected` and `actual`.
-    Handles scalars, numpy arrays and other python sequence containers
-    e.g. list, dict.
-
-    Args:
-      expected: Nested structure 1.
-      actual: Nested structure 2.
-
-    Raises:
-      AssertionError if matching fails.
-    """
-    if isinstance(expected, np.ndarray):
-      expected = expected.tolist()
-    if isinstance(actual, np.ndarray):
-      actual = actual.tolist()
-    self.assertEqual(type(expected), type(actual))
-
-    if nest.is_sequence(expected):
-      self.assertEqual(len(expected), len(actual))
-      if isinstance(expected, dict):
-        for key1, key2 in zip(sorted(expected), sorted(actual)):
-          self.assertEqual(key1, key2)
-          self.match(expected[key1], actual[key2])
-      else:
-        for item1, item2 in zip(expected, actual):
-          self.match(item1, item2)
-    else:
-      self.assertEqual(expected, actual)
-
-  def does_not_match(self, expected, actual):
-    with self.assertRaises(AssertionError):
-      self.match(expected, actual)
-
-  def gen_break_points(self, num_outputs, num_samples=10):
-    """Generates `num_samples` breaks points in [0, num_outputs]."""
-    return np.linspace(0, num_outputs, num_samples, dtype=int)
-
-  def _build_graph(self, ds_fn, sparse_tensors=False):
-    iterator = ds_fn().make_initializable_iterator()
-
-    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    init_op = iterator.initializer
-    if sparse_tensors:
-      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
-    else:
-      get_next = iterator.get_next()
-    self._add_iterator_ops_to_collection(init_op, get_next, ds_fn,
-                                         sparse_tensors)
-    saver = saver_lib.Saver(allow_empty=True)
-    return init_op, get_next, saver
-
-  def _build_empty_graph(self, ds_fn, sparse_tensors=False):
-    iterator = iterator_ops.Iterator.from_structure(
-        self._get_output_types(ds_fn),
-        output_shapes=self._get_output_shapes(ds_fn),
-        output_classes=self._get_output_classes(ds_fn))
-    saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-    if sparse_tensors:
-      get_next = sparse_tensor.SparseTensor(*iterator.get_next())
-    else:
-      get_next = iterator.get_next()
-    saver = saver_lib.Saver(allow_empty=True)
-    return get_next, saver
-
-  def _add_iterator_ops_to_collection(self,
-                                      init_op,
-                                      get_next,
-                                      ds_fn,
-                                      sparse_tensors=False):
-    ops.add_to_collection("iterator_ops", init_op)
-    # `get_next` may be a tuple e.g. in TensorSliceDataset. Since Collections
-    # do not support tuples we flatten the tensors and restore the shape in
-    # `_get_iterator_ops_from_collection`.
-    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
-      ops.add_to_collection("iterator_ops", get_next.indices)
-      ops.add_to_collection("iterator_ops", get_next.values)
-      ops.add_to_collection("iterator_ops", get_next.dense_shape)
-      return
-
-    get_next_list = nest.flatten(get_next)
-    for i, output_class in enumerate(
-        nest.flatten(self._get_output_classes(ds_fn))):
-      if output_class is sparse_tensor.SparseTensor:
-        ops.add_to_collection("iterator_ops", get_next_list[i].indices)
-        ops.add_to_collection("iterator_ops", get_next_list[i].values)
-        ops.add_to_collection("iterator_ops", get_next_list[i].dense_shape)
-      else:
-        ops.add_to_collection("iterator_ops", get_next_list[i])
-
-  def _get_iterator_ops_from_collection(self, ds_fn, sparse_tensors=False):
-    all_ops = ops.get_collection("iterator_ops")
-    if sparse_tensors:  # specific for deprecated `from_sparse_tensor_slices`.
-      init_op, indices, values, dense_shape = all_ops
-      return init_op, sparse_tensor.SparseTensor(indices, values, dense_shape)
-    get_next_list = []
-    i = 1
-    for output_class in nest.flatten(self._get_output_classes(ds_fn)):
-      if output_class is sparse_tensor.SparseTensor:
-        indices, values, dense_shape = all_ops[i:i + 3]
-        i += 3
-        get_next_list.append(
-            sparse_tensor.SparseTensor(indices, values, dense_shape))
-      else:
-        get_next_list.append(all_ops[i])
-        i += 1
-    return all_ops[0], nest.pack_sequence_as(
-        self._get_output_types(ds_fn), get_next_list)
-
-  def _get_output_types(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_types
-
-  def _get_output_shapes(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_shapes
-
-  def _get_output_classes(self, ds_fn):
-    with ops.Graph().as_default():
-      return ds_fn().output_classes
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def _latest_ckpt(self):
-    return checkpoint_management.latest_checkpoint(self.get_temp_dir())
-
-  def _save(self, sess, saver):
-    saver.save(sess, self._ckpt_path())
-
-  def _restore(self, saver, sess):
-    sess.run(lookup_ops.tables_initializer())
-    saver.restore(sess, self._latest_ckpt())
-
-  def _initialize(self, init_op, sess):
-    sess.run(variables.global_variables_initializer())
-    sess.run(lookup_ops.tables_initializer())
-    sess.run(init_op)
-
-  def _import_meta_graph(self):
-    meta_file_path = self._ckpt_path() + ".meta"
-    return saver_lib.import_meta_graph(meta_file_path)
-
-  def _delete_ckpt(self):
-    # Remove all checkpoint files.
-    prefix = self._ckpt_path()
-    pattern = prefix + "*"
-    files = gfile.Glob(pattern)
-    map(gfile.Remove, files)
diff --git a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
new file mode 100644
index 0000000000..73be6cbcca
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
@@ -0,0 +1,124 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.dense_to_sparse_batch()."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class DenseToSparseBatchTest(test_base.DatasetTestBase):
+
+  def testDenseToSparseBatchDataset(self):
+    components = np.random.randint(12, size=(100,)).astype(np.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([x], x)).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      for start in range(0, len(components), 4):
+        results = sess.run(get_next)
+        self.assertAllEqual([[i, j]
+                             for i, c in enumerate(components[start:start + 4])
+                             for j in range(c)], results.indices)
+        self.assertAllEqual(
+            [c for c in components[start:start + 4] for _ in range(c)],
+            results.values)
+        self.assertAllEqual([min(4,
+                                 len(components) - start), 12],
+                            results.dense_shape)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testDenseToSparseBatchDatasetWithUnknownShape(self):
+    components = np.random.randint(5, size=(40,)).astype(np.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([x, x], x)).apply(
+            batching.dense_to_sparse_batch(
+                4, [5, None])).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      for start in range(0, len(components), 4):
+        results = sess.run(get_next)
+        self.assertAllEqual([[i, j, z]
+                             for i, c in enumerate(components[start:start + 4])
+                             for j in range(c)
+                             for z in range(c)], results.indices)
+        self.assertAllEqual([
+            c
+            for c in components[start:start + 4] for _ in range(c)
+            for _ in range(c)
+        ], results.values)
+        self.assertAllEqual([
+            min(4,
+                len(components) - start), 5,
+            np.max(components[start:start + 4])
+        ], results.dense_shape)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testDenseToSparseBatchDatasetWithInvalidShape(self):
+    input_tensor = array_ops.constant([[1]])
+    with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
+      dataset_ops.Dataset.from_tensors(input_tensor).apply(
+          batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
+
+  def testDenseToSparseBatchDatasetShapeErrors(self):
+    input_tensor = array_ops.placeholder(dtypes.int32)
+    iterator = (
+        dataset_ops.Dataset.from_tensors(input_tensor).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # Initialize with an input tensor of incompatible rank.
+      sess.run(init_op, feed_dict={input_tensor: [[1]]})
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "incompatible with the row shape"):
+        sess.run(get_next)
+
+      # Initialize with an input tensor that is larger than `row_shape`.
+      sess.run(init_op, feed_dict={input_tensor: range(13)})
+      with self.assertRaisesRegexp(errors.DataLossError,
+                                   "larger than the row shape"):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
similarity index 68%
rename from tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
index 22412c3965..e54235d9f8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Test RangeDataset."""
+"""Tests for `tf.data.experimental.enumerate_dataset()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.data.experimental.ops import enumerate_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
@@ -28,7 +27,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.platform import test
 
 
-class RangeDatasetTest(test_base.DatasetTestBase):
+class EnumerateDatasetTest(test_base.DatasetTestBase):
 
   def testEnumerateDataset(self):
     components = (["a", "b"], [1, 2], [37.0, 38])
@@ -52,27 +51,6 @@ class RangeDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testCounter(self):
-    """Test dataset construction using `count`."""
-    iterator = (counter.Counter(start=3, step=4)
-                .make_one_shot_iterator())
-    get_next = iterator.get_next()
-    self.assertEqual([], get_next.shape.as_list())
-    self.assertEqual(dtypes.int64, get_next.dtype)
-
-    negative_iterator = (counter.Counter(start=0, step=-1)
-                         .make_one_shot_iterator())
-    negative_get_next = negative_iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.assertEqual(3, sess.run(get_next))
-      self.assertEqual(3 + 4, sess.run(get_next))
-      self.assertEqual(3 + 2 * 4, sess.run(get_next))
-
-      self.assertEqual(0, sess.run(negative_get_next))
-      self.assertEqual(-1, sess.run(negative_get_next))
-      self.assertEqual(-2, sess.run(negative_get_next))
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
new file mode 100644
index 0000000000..399fd284f4
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
@@ -0,0 +1,247 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the private `FunctionBufferingResource` used in prefetching."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import threading
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import function
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import test
+
+
+class FunctionBufferingResourceTest(test_base.DatasetTestBase):
+
+  def setUp(self):
+    self._event = threading.Event()
+
+  def _create_ds_and_iterator(self, device0, initializable=False):
+
+    def gen():
+      for i in range(1, 10):
+        yield [float(i)]
+        if i == 6:
+          self._event.set()
+
+    with ops.device(device0):
+      ds = dataset_ops.Dataset.from_generator(gen, (dtypes.float32))
+      if initializable:
+        ds_iterator = ds.make_initializable_iterator()
+      else:
+        ds_iterator = ds.make_one_shot_iterator()
+      return (ds, ds_iterator)
+
+  def _create_ops(self, ds, ds_iterator, buffer_name, device0, device1):
+    ds_iterator_handle = ds_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _remote_fn(h):
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          h, ds.output_types, ds.output_shapes)
+      return remote_iterator.get_next()
+
+    target = constant_op.constant(device0)
+    with ops.device(device1):
+      buffer_resource_handle = prefetching_ops.function_buffering_resource(
+          f=_remote_fn,
+          output_types=[dtypes.float32],
+          target_device=target,
+          string_arg=ds_iterator_handle,
+          buffer_size=3,
+          shared_name=buffer_name)
+
+    with ops.device(device1):
+      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
+          function_buffer_resource=buffer_resource_handle,
+          output_types=[dtypes.float32])
+      reset_op = prefetching_ops.function_buffering_resource_reset(
+          function_buffer_resource=buffer_resource_handle)
+      destroy_op = resource_variable_ops.destroy_resource_op(
+          buffer_resource_handle, ignore_lookup_error=True)
+
+    return (prefetch_op, reset_op, destroy_op)
+
+  def _prefetch_fn_helper_one_shot(self, buffer_name, device0, device1):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=False)
+    prefetch_op, _, destroy_op = self._create_ops(ds, ds_iterator, buffer_name,
+                                                  device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      sess.run(destroy_op)
+
+  def testSameDeviceCPU(self):
+    self._prefetch_fn_helper_one_shot("same_device_cpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/cpu:0")
+
+  def testDifferentDeviceCPU(self):
+    self._prefetch_fn_helper_one_shot("diff_device_cpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/cpu:1")
+
+  def testDifferentDeviceCPUGPU(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    self._prefetch_fn_helper_one_shot("cpu_gpu",
+                                      "/job:localhost/replica:0/task:0/cpu:0",
+                                      "/job:localhost/replica:0/task:0/gpu:0")
+
+  def testReinitialization(self):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/cpu:1"
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
+    prefetch_op, reset_op, destroy_op = self._create_ops(
+        ds, ds_iterator, "reinit", device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      sess.run(ds_iterator.initializer)
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      # Lets reset the function buffering resource and reinitialize the
+      # iterator. Should be able to go through this again.
+      self._event.clear()
+      sess.run(reset_op)
+      sess.run(ds_iterator.initializer)
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [1.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [2.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [3.0])
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [4.0])
+      self._event.wait()
+      elem = sess.run(prefetch_op)
+      self.assertEqual(elem, [5.0])
+      sess.run(destroy_op)
+
+  def testReinitializationOutOfRange(self):
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/cpu:1"
+    ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True)
+    prefetch_op, reset_op, destroy_op = self._create_ops(
+        ds, ds_iterator, "reinit", device0, device1)
+
+    with self.test_session(config=worker_config) as sess:
+      sess.run(ds_iterator.initializer)
+      for i in range(1, 10):
+        elem = sess.run(prefetch_op)
+        self.assertEqual(elem, [float(i)])
+      # Try fetching after its over twice to test out end of sequence.
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      # Now reset everything and try it out again.
+      self._event.clear()
+      sess.run(reset_op)
+      sess.run(ds_iterator.initializer)
+      for i in range(1, 10):
+        elem = sess.run(prefetch_op)
+        self.assertEqual(elem, [float(i)])
+      # Try fetching after its over twice to test out end of sequence.
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      sess.run(destroy_op)
+
+  def testStringsGPU(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    device0 = "/job:localhost/replica:0/task:0/cpu:0"
+    device1 = "/job:localhost/replica:0/task:0/gpu:0"
+
+    ds = dataset_ops.Dataset.from_tensor_slices(["a", "b", "c"])
+    ds_iterator = ds.make_one_shot_iterator()
+    ds_iterator_handle = ds_iterator.string_handle()
+
+    @function.Defun(dtypes.string)
+    def _remote_fn(h):
+      remote_iterator = iterator_ops.Iterator.from_string_handle(
+          h, ds.output_types, ds.output_shapes)
+      return remote_iterator.get_next()
+
+    target = constant_op.constant(device0)
+    with ops.device(device1):
+      buffer_resource_handle = prefetching_ops.function_buffering_resource(
+          f=_remote_fn,
+          output_types=[dtypes.string],
+          target_device=target,
+          string_arg=ds_iterator_handle,
+          buffer_size=3,
+          shared_name="strings")
+
+    with ops.device(device1):
+      prefetch_op = prefetching_ops.function_buffering_resource_get_next(
+          function_buffer_resource=buffer_resource_handle,
+          output_types=[dtypes.string])
+      destroy_op = resource_variable_ops.destroy_resource_op(
+          buffer_resource_handle, ignore_lookup_error=True)
+
+    with self.cached_session() as sess:
+      self.assertEqual([b"a"], sess.run(prefetch_op))
+      self.assertEqual([b"b"], sess.run(prefetch_op))
+      self.assertEqual([b"c"], sess.run(prefetch_op))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(prefetch_op)
+
+      sess.run(destroy_op)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
new file mode 100644
index 0000000000..9030328593
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
@@ -0,0 +1,199 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.group_by_reducer()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class GroupByReducerTest(test_base.DatasetTestBase):
+
+  def checkResults(self, dataset, shapes, values):
+    self.assertEqual(shapes, dataset.output_shapes)
+    get_next = dataset.make_one_shot_iterator().get_next()
+    with self.cached_session() as sess:
+      for expected in values:
+        got = sess.run(get_next)
+        self.assertEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testSum(self):
+    reducer = grouping.Reducer(
+        init_func=lambda _: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).apply(
+          grouping.group_by_reducer(lambda x: x % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+
+  def testAverage(self):
+
+    def reduce_fn(x, y):
+      return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / (
+          x[1] + 1), x[1] + 1
+
+    reducer = grouping.Reducer(
+        init_func=lambda _: (0.0, 0.0),
+        reduce_func=reduce_fn,
+        finalize_func=lambda x, _: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).apply(
+          grouping.group_by_reducer(
+              lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[i - 1, i])
+
+  def testConcat(self):
+    components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
+    reducer = grouping.Reducer(
+        init_func=lambda x: "",
+        reduce_func=lambda x, y: x + y[0],
+        finalize_func=lambda x: x)
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensor_slices(components),
+           dataset_ops.Dataset.range(2 * i))).apply(
+               grouping.group_by_reducer(lambda x, y: y % 2, reducer))
+      self.checkResults(
+          dataset,
+          shapes=tensor_shape.scalar(),
+          values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
+
+  def testSparseSum(self):
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=np.array([[0, 0]]),
+          values=(i * np.array([1], dtype=np.int64)),
+          dense_shape=np.array([1, 1]))
+
+    reducer = grouping.Reducer(
+        init_func=lambda _: _sparse(np.int64(0)),
+        reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]),
+        finalize_func=lambda x: x.values[0])
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply(
+          grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer))
+      self.checkResults(
+          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+
+  def testChangingStateShape(self):
+
+    def reduce_fn(x, _):
+      # Statically known rank, but dynamic length.
+      larger_dim = array_ops.concat([x[0], x[0]], 0)
+      # Statically unknown rank.
+      larger_rank = array_ops.expand_dims(x[1], 0)
+      return larger_dim, larger_rank
+
+    reducer = grouping.Reducer(
+        init_func=lambda x: ([0], 1),
+        reduce_func=reduce_fn,
+        finalize_func=lambda x, y: (x, y))
+
+    for i in range(1, 11):
+      dataset = dataset_ops.Dataset.from_tensors(np.int64(0)).repeat(i).apply(
+          grouping.group_by_reducer(lambda x: x, reducer))
+      self.assertEqual([None], dataset.output_shapes[0].as_list())
+      self.assertIs(None, dataset.output_shapes[1].ndims)
+      iterator = dataset.make_one_shot_iterator()
+      get_next = iterator.get_next()
+      with self.cached_session() as sess:
+        x, y = sess.run(get_next)
+        self.assertAllEqual([0] * (2**i), x)
+        self.assertAllEqual(np.array(1, ndmin=i), y)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+  def testTypeMismatch(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32),
+        reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64),
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        TypeError,
+        "The element types for the new state must match the initial state."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: np.int64(0), reducer))
+
+  # TODO(b/78665031): Remove once non-scalar keys are supported.
+  def testInvalidKeyShape(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        ValueError, "`key_func` must return a single tf.int64 tensor."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer))
+
+  # TODO(b/78665031): Remove once non-int64 keys are supported.
+  def testInvalidKeyType(self):
+    reducer = grouping.Reducer(
+        init_func=lambda x: np.int64(0),
+        reduce_func=lambda x, y: x + y,
+        finalize_func=lambda x: x)
+
+    dataset = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegexp(
+        ValueError, "`key_func` must return a single tf.int64 tensor."):
+      dataset.apply(
+          grouping.group_by_reducer(lambda _: "wrong", reducer))
+
+  def testTuple(self):
+    def init_fn(_):
+      return np.array([], dtype=np.int64), np.int64(0)
+
+    def reduce_fn(state, value):
+      s1, s2 = state
+      v1, v2 = value
+      return array_ops.concat([s1, [v1]], 0), s2 + v2
+
+    def finalize_fn(s1, s2):
+      return s1, s2
+
+    reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn)
+    dataset = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply(
+            grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
+    get_next = dataset.make_one_shot_iterator().get_next()
+    with self.cached_session() as sess:
+      x, y = sess.run(get_next)
+      self.assertAllEqual(x, np.asarray([x for x in range(10)]))
+      self.assertEqual(y, 45)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
new file mode 100644
index 0000000000..557d56e8b9
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
@@ -0,0 +1,367 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.group_by_window()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
+# Currently, they use a constant batch size, though should be made to use a
+# different batch size per key.
+class GroupByWindowTest(test_base.DatasetTestBase):
+
+  def _dynamicPad(self, bucket, window, window_size):
+    # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
+    # generic form of padded_batch that pads every component
+    # dynamically and does not rely on static shape information about
+    # the arguments.
+    return dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.from_tensors(bucket),
+         window.padded_batch(
+             32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
+                 [None]), tensor_shape.TensorShape([3])))))
+
+  def testSingleBucket(self):
+
+    def _map_fn(v):
+      return (v, array_ops.fill([v], v),
+              array_ops.fill([3], string_ops.as_string(v)))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda x, y, z: 0,
+            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      which_bucket, bucketed_values = sess.run(get_next)
+
+      self.assertEqual(0, which_bucket)
+
+      expected_scalar_int = np.arange(32, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
+      for i in range(32):
+        expected_unk_int64[i, :i] = i
+      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values[2])
+
+  def testEvenOddBuckets(self):
+
+    def _map_fn(v):
+      return (v, array_ops.fill([v], v),
+              array_ops.fill([3], string_ops.as_string(v)))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
+            lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      # Get two minibatches (one containing even values, one containing odds)
+      which_bucket_even, bucketed_values_even = sess.run(get_next)
+      which_bucket_odd, bucketed_values_odd = sess.run(get_next)
+
+      # Count number of bucket_tensors.
+      self.assertEqual(3, len(bucketed_values_even))
+      self.assertEqual(3, len(bucketed_values_odd))
+
+      # Ensure bucket 0 was used for all minibatch entries.
+      self.assertAllEqual(0, which_bucket_even)
+      self.assertAllEqual(1, which_bucket_odd)
+
+      # Test the first bucket outputted, the events starting at 0
+      expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2 * i] = 2 * i
+        expected_vec3_str = np.vstack(
+            3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
+
+      # Test the second bucket outputted, the odds starting at 1
+      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
+      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
+        expected_vec3_str = np.vstack(
+            3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
+
+      self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
+      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
+      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
+
+  def testEvenOddBucketsFilterOutAllOdd(self):
+
+    def _map_fn(v):
+      return {
+          "x": v,
+          "y": array_ops.fill([v], v),
+          "z": array_ops.fill([3], string_ops.as_string(v))
+      }
+
+    def _dynamic_pad_fn(bucket, window, _):
+      return dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensors(bucket),
+           window.padded_batch(
+               32, {
+                   "x": tensor_shape.TensorShape([]),
+                   "y": tensor_shape.TensorShape([None]),
+                   "z": tensor_shape.TensorShape([3])
+               })))
+
+    input_dataset = (
+        dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
+        .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
+
+    bucketed_dataset = input_dataset.apply(
+        grouping.group_by_window(
+            lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
+            lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
+
+    iterator = bucketed_dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+
+      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
+      which_bucket0, bucketed_values_even0 = sess.run(get_next)
+      which_bucket1, bucketed_values_even1 = sess.run(get_next)
+
+      # Ensure that bucket 1 was completely filtered out
+      self.assertAllEqual(0, which_bucket0)
+      self.assertAllEqual(0, which_bucket1)
+      self.assertAllEqual(
+          np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
+      self.assertAllEqual(
+          np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
+
+  def testDynamicWindowSize(self):
+    components = np.arange(100).astype(np.int64)
+
+    # Key fn: even/odd
+    # Reduce fn: batches of 5
+    # Window size fn: even=5, odd=10
+
+    def window_size_func(key):
+      window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64)
+      return window_sizes[key]
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
+        grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20),
+                                 None, window_size_func))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        batches = 0
+        while True:
+          result = sess.run(get_next)
+          is_even = all(x % 2 == 0 for x in result)
+          is_odd = all(x % 2 == 1 for x in result)
+          self.assertTrue(is_even or is_odd)
+          expected_batch_size = 5 if is_even else 10
+          self.assertEqual(expected_batch_size, result.shape[0])
+          batches += 1
+
+      self.assertEqual(batches, 15)
+
+  def testSimple(self):
+    components = np.random.randint(100, size=(200,)).astype(np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
+        .apply(
+            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      counts = []
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          result = sess.run(get_next)
+          self.assertTrue(
+              all(x % 2 == 0
+                  for x in result) or all(x % 2 == 1)
+              for x in result)
+          counts.append(result.shape[0])
+
+      self.assertEqual(len(components), sum(counts))
+      num_full_batches = len([c for c in counts if c == 4])
+      self.assertGreaterEqual(num_full_batches, 24)
+      self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
+
+  def testImmediateOutput(self):
+    components = np.array(
+        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
+            grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      # The input is infinite, so this test demonstrates that:
+      # 1. We produce output without having to consume the entire input,
+      # 2. Different buckets can produce output at different rates, and
+      # 3. For deterministic input, the output is deterministic.
+      for _ in range(3):
+        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+        self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
+        self.assertAllEqual([2, 2, 2, 2], sess.run(get_next))
+        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+
+  def testSmallGroups(self):
+    components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).apply(
+            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
+                                     4)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+      self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
+      # The small outputs at the end are deterministically produced in key
+      # order.
+      self.assertAllEqual([0, 0, 0], sess.run(get_next))
+      self.assertAllEqual([1], sess.run(get_next))
+
+  def testEmpty(self):
+    iterator = (
+        dataset_ops.Dataset.range(4).apply(
+            grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          "Window size must be greater than zero, but got 0."):
+        print(sess.run(get_next))
+
+  def testReduceFuncError(self):
+    components = np.random.randint(100, size=(200,)).astype(np.int64)
+
+    def reduce_func(_, xs):
+      # Introduce an incorrect padded shape that cannot (currently) be
+      # detected at graph construction time.
+      return xs.padded_batch(
+          4,
+          padded_shapes=(tensor_shape.TensorShape([]),
+                         constant_op.constant([5], dtype=dtypes.int64) * -1))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
+            grouping.group_by_window(lambda x, _: x % 2, reduce_func,
+                                     32)).make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(get_next)
+
+  def testConsumeWindowDatasetMoreThanOnce(self):
+    components = np.random.randint(50, size=(200,)).astype(np.int64)
+
+    def reduce_func(key, window):
+      # Apply two different kinds of padding to the input: tight
+      # padding, and quantized (to a multiple of 10) padding.
+      return dataset_ops.Dataset.zip((
+          window.padded_batch(
+              4, padded_shapes=tensor_shape.TensorShape([None])),
+          window.padded_batch(
+              4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),
+      ))
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
+        .apply(grouping.group_by_window(
+            lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
+            reduce_func, 4))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      counts = []
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          tight_result, multiple_of_10_result = sess.run(get_next)
+          self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
+          self.assertAllEqual(tight_result,
+                              multiple_of_10_result[:, :tight_result.shape[1]])
+          counts.append(tight_result.shape[0])
+      self.assertEqual(len(components), sum(counts))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
new file mode 100644
index 0000000000..c0ec1486ab
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
@@ -0,0 +1,115 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.ignore_errors()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import error_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import io_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+_NUMPY_RANDOM_SEED = 42
+
+
+class IgnoreErrorsTest(test_base.DatasetTestBase):
+
+  def testMapIgnoreError(self):
+    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(components)
+        .map(lambda x: array_ops.check_numerics(x, "message")).apply(
+            error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for x in [1., 2., 3., 5.]:
+        self.assertEqual(x, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testParallelMapIgnoreError(self):
+    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(components).map(
+            lambda x: array_ops.check_numerics(x, "message"),
+            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for x in [1., 2., 3., 5.]:
+        self.assertEqual(x, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testReadFileIgnoreError(self):
+
+    def write_string_to_file(value, filename):
+      with open(filename, "w") as f:
+        f.write(value)
+
+    filenames = [
+        os.path.join(self.get_temp_dir(), "file_%d.txt" % i) for i in range(5)
+    ]
+    for filename in filenames:
+      write_string_to_file(filename, filename)
+
+    dataset = (
+        dataset_ops.Dataset.from_tensor_slices(filenames).map(
+            io_ops.read_file,
+            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # All of the files are present.
+      sess.run(init_op)
+      for filename in filenames:
+        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Delete one of the files.
+      os.remove(filenames[0])
+
+      # Attempting to read filenames[0] will fail, but ignore_errors()
+      # will catch the error.
+      sess.run(init_op)
+      for filename in filenames[1:]:
+        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
new file mode 100644
index 0000000000..5ee94e14dc
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
@@ -0,0 +1,239 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.make_batched_features_dataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+
+
+class MakeBatchedFeaturesDatasetTest(
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
+
+  def testRead(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 10]:
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from file 0.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames[0],
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                0,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from file 1.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames[1],
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                1,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from both files.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames,
+                label_key="label",
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(
+                sess,
+                batch_size,
+                num_epochs=num_epochs,
+                label_key_provided=True)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess, label_key_provided=True)
+
+        with ops.Graph().as_default() as g:
+          with self.session(graph=g) as sess:
+            # Basic test: read from both files.
+            self.outputs = self.make_batch_feature(
+                filenames=self.test_filenames,
+                num_epochs=num_epochs,
+                batch_size=batch_size).make_one_shot_iterator().get_next()
+            self.verify_records(sess, batch_size, num_epochs=num_epochs)
+            with self.assertRaises(errors.OutOfRangeError):
+              self._next_actual_batch(sess)
+
+  def testReadWithEquivalentDataset(self):
+    features = {
+        "file": parsing_ops.FixedLenFeature([], dtypes.int64),
+        "record": parsing_ops.FixedLenFeature([], dtypes.int64),
+    }
+    dataset = (
+        core_readers.TFRecordDataset(self.test_filenames)
+        .map(lambda x: parsing_ops.parse_single_example(x, features))
+        .repeat(10).batch(2))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
+          range(self._num_files), 2, 10):
+        actual_batch = sess.run(next_element)
+        self.assertAllEqual(file_batch, actual_batch["file"])
+        self.assertAllEqual(record_batch, actual_batch["record"])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testReadWithFusedShuffleRepeatDataset(self):
+    num_epochs = 5
+    total_records = num_epochs * self._num_records
+    for batch_size in [1, 2]:
+      # Test that shuffling with same seed produces the same result.
+      with ops.Graph().as_default() as g:
+        with self.session(graph=g) as sess:
+          outputs1 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          outputs2 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          for _ in range(total_records // batch_size):
+            batch1 = self._run_actual_batch(outputs1, sess)
+            batch2 = self._run_actual_batch(outputs2, sess)
+            for i in range(len(batch1)):
+              self.assertAllEqual(batch1[i], batch2[i])
+
+      # Test that shuffling with different seeds produces a different order.
+      with ops.Graph().as_default() as g:
+        with self.session(graph=g) as sess:
+          outputs1 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=5).make_one_shot_iterator().get_next()
+          outputs2 = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              shuffle=True,
+              shuffle_seed=15).make_one_shot_iterator().get_next()
+          all_equal = True
+          for _ in range(total_records // batch_size):
+            batch1 = self._run_actual_batch(outputs1, sess)
+            batch2 = self._run_actual_batch(outputs2, sess)
+            for i in range(len(batch1)):
+              all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
+          self.assertFalse(all_equal)
+
+  def testParallelReadersAndParsers(self):
+    num_epochs = 5
+    for batch_size in [1, 2]:
+      for reader_num_threads in [2, 4]:
+        for parser_num_threads in [2, 4]:
+          with ops.Graph().as_default() as g:
+            with self.session(graph=g) as sess:
+              self.outputs = self.make_batch_feature(
+                  filenames=self.test_filenames,
+                  label_key="label",
+                  num_epochs=num_epochs,
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
+                  ).get_next()
+              self.verify_records(
+                  sess,
+                  batch_size,
+                  num_epochs=num_epochs,
+                  label_key_provided=True,
+                  interleave_cycle_length=reader_num_threads)
+              with self.assertRaises(errors.OutOfRangeError):
+                self._next_actual_batch(sess, label_key_provided=True)
+
+          with ops.Graph().as_default() as g:
+            with self.session(graph=g) as sess:
+              self.outputs = self.make_batch_feature(
+                  filenames=self.test_filenames,
+                  num_epochs=num_epochs,
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
+                  ).get_next()
+              self.verify_records(
+                  sess,
+                  batch_size,
+                  num_epochs=num_epochs,
+                  interleave_cycle_length=reader_num_threads)
+              with self.assertRaises(errors.OutOfRangeError):
+                self._next_actual_batch(sess)
+
+  def testDropFinalBatch(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 10]:
+        with ops.Graph().as_default():
+          # Basic test: read from file 0.
+          outputs = self.make_batch_feature(
+              filenames=self.test_filenames[0],
+              label_key="label",
+              num_epochs=num_epochs,
+              batch_size=batch_size,
+              drop_final_batch=True).make_one_shot_iterator().get_next()
+          for tensor in nest.flatten(outputs):
+            if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
+              self.assertEqual(tensor.shape[0], batch_size)
+
+  def testIndefiniteRepeatShapeInference(self):
+    dataset = self.make_batch_feature(
+        filenames=self.test_filenames[0],
+        label_key="label",
+        num_epochs=None,
+        batch_size=32)
+    for shape, clazz in zip(nest.flatten(dataset.output_shapes),
+                            nest.flatten(dataset.output_classes)):
+      if issubclass(clazz, ops.Tensor):
+        self.assertEqual(32, shape[0])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
similarity index 57%
rename from tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
index a02f4bd14f..e4bf089184 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.make_csv_dataset()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -23,226 +23,16 @@ import zlib
 
 import numpy as np
 
-from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
-class ReadBatchFeaturesTest(
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
-
-  def testRead(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 10]:
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 0.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames[0],
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                0,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 1.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames[1],
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                1,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames,
-                label_key="label",
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = self.make_batch_feature(
-                filenames=self.test_filenames,
-                num_epochs=num_epochs,
-                batch_size=batch_size).make_one_shot_iterator().get_next()
-            self.verify_records(sess, batch_size, num_epochs=num_epochs)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess)
-
-  def testReadWithEquivalentDataset(self):
-    features = {
-        "file": parsing_ops.FixedLenFeature([], dtypes.int64),
-        "record": parsing_ops.FixedLenFeature([], dtypes.int64),
-    }
-    dataset = (
-        core_readers.TFRecordDataset(self.test_filenames)
-        .map(lambda x: parsing_ops.parse_single_example(x, features))
-        .repeat(10).batch(2))
-    iterator = dataset.make_initializable_iterator()
-    init_op = iterator.initializer
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
-          range(self._num_files), 2, 10):
-        actual_batch = sess.run(next_element)
-        self.assertAllEqual(file_batch, actual_batch["file"])
-        self.assertAllEqual(record_batch, actual_batch["record"])
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testReadWithFusedShuffleRepeatDataset(self):
-    num_epochs = 5
-    total_records = num_epochs * self._num_records
-    for batch_size in [1, 2]:
-      # Test that shuffling with same seed produces the same result.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          outputs2 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              self.assertAllEqual(batch1[i], batch2[i])
-
-      # Test that shuffling with different seeds produces a different order.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=5).make_one_shot_iterator().get_next()
-          outputs2 = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              shuffle=True,
-              shuffle_seed=15).make_one_shot_iterator().get_next()
-          all_equal = True
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
-          self.assertFalse(all_equal)
-
-  def testParallelReadersAndParsers(self):
-    num_epochs = 5
-    for batch_size in [1, 2]:
-      for reader_num_threads in [2, 4]:
-        for parser_num_threads in [2, 4]:
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = self.make_batch_feature(
-                  filenames=self.test_filenames,
-                  label_key="label",
-                  num_epochs=num_epochs,
-                  batch_size=batch_size,
-                  reader_num_threads=reader_num_threads,
-                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
-                  ).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
-                  num_epochs=num_epochs,
-                  label_key_provided=True,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess, label_key_provided=True)
-
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = self.make_batch_feature(
-                  filenames=self.test_filenames,
-                  num_epochs=num_epochs,
-                  batch_size=batch_size,
-                  reader_num_threads=reader_num_threads,
-                  parser_num_threads=parser_num_threads).make_one_shot_iterator(
-                  ).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
-                  num_epochs=num_epochs,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess)
-
-  def testDropFinalBatch(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 10]:
-        with ops.Graph().as_default():
-          # Basic test: read from file 0.
-          outputs = self.make_batch_feature(
-              filenames=self.test_filenames[0],
-              label_key="label",
-              num_epochs=num_epochs,
-              batch_size=batch_size,
-              drop_final_batch=True).make_one_shot_iterator().get_next()
-          for tensor in nest.flatten(outputs):
-            if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
-              self.assertEqual(tensor.shape[0], batch_size)
-
-  def testIndefiniteRepeatShapeInference(self):
-    dataset = self.make_batch_feature(
-        filenames=self.test_filenames[0],
-        label_key="label",
-        num_epochs=None,
-        batch_size=32)
-    for shape, clazz in zip(nest.flatten(dataset.output_shapes),
-                            nest.flatten(dataset.output_classes)):
-      if issubclass(clazz, ops.Tensor):
-        self.assertEqual(32, shape[0])
-
-
 class MakeCsvDatasetTest(test_base.DatasetTestBase):
 
   def _make_csv_dataset(self, filenames, batch_size, num_epochs=1, **kwargs):
@@ -866,218 +656,5 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
       self.assertEqual(32, shape[0])
 
 
-class MakeTFRecordDatasetTest(
-    reader_dataset_ops_test_base.TFRecordDatasetTestBase):
-
-  def _interleave(self, iterators, cycle_length):
-    pending_iterators = iterators
-    open_iterators = []
-    num_open = 0
-    for i in range(cycle_length):
-      if pending_iterators:
-        open_iterators.append(pending_iterators.pop(0))
-        num_open += 1
-
-    while num_open:
-      for i in range(min(cycle_length, len(open_iterators))):
-        if open_iterators[i] is None:
-          continue
-        try:
-          yield next(open_iterators[i])
-        except StopIteration:
-          if pending_iterators:
-            open_iterators[i] = pending_iterators.pop(0)
-          else:
-            open_iterators[i] = None
-            num_open -= 1
-
-  def _next_expected_batch(self,
-                           file_indices,
-                           batch_size,
-                           num_epochs,
-                           cycle_length,
-                           drop_final_batch,
-                           use_parser_fn):
-
-    def _next_record(file_indices):
-      for j in file_indices:
-        for i in range(self._num_records):
-          yield j, i
-
-    def _next_record_interleaved(file_indices, cycle_length):
-      return self._interleave([_next_record([i]) for i in file_indices],
-                              cycle_length)
-
-    record_batch = []
-    batch_index = 0
-    for _ in range(num_epochs):
-      if cycle_length == 1:
-        next_records = _next_record(file_indices)
-      else:
-        next_records = _next_record_interleaved(file_indices, cycle_length)
-      for f, r in next_records:
-        record = self._record(f, r)
-        if use_parser_fn:
-          record = record[1:]
-        record_batch.append(record)
-        batch_index += 1
-        if len(record_batch) == batch_size:
-          yield record_batch
-          record_batch = []
-          batch_index = 0
-    if record_batch and not drop_final_batch:
-      yield record_batch
-
-  def _verify_records(self,
-                      sess,
-                      outputs,
-                      batch_size,
-                      file_index,
-                      num_epochs,
-                      interleave_cycle_length,
-                      drop_final_batch,
-                      use_parser_fn):
-    if file_index is not None:
-      file_indices = [file_index]
-    else:
-      file_indices = range(self._num_files)
-
-    for expected_batch in self._next_expected_batch(
-        file_indices, batch_size, num_epochs, interleave_cycle_length,
-        drop_final_batch, use_parser_fn):
-      actual_batch = sess.run(outputs)
-      self.assertAllEqual(expected_batch, actual_batch)
-
-  def _read_test(self, batch_size, num_epochs, file_index=None,
-                 num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
-    if file_index is None:
-      file_pattern = self.test_filenames
-    else:
-      file_pattern = self.test_filenames[file_index]
-
-    if parser_fn:
-      fn = lambda x: string_ops.substr(x, 1, 999)
-    else:
-      fn = None
-
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        outputs = readers.make_tf_record_dataset(
-            file_pattern=file_pattern,
-            num_epochs=num_epochs,
-            batch_size=batch_size,
-            parser_fn=fn,
-            num_parallel_reads=num_parallel_reads,
-            drop_final_batch=drop_final_batch,
-            shuffle=False).make_one_shot_iterator().get_next()
-        self._verify_records(
-            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
-            interleave_cycle_length=num_parallel_reads,
-            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(outputs)
-
-  def testRead(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        # Basic test: read from file 0.
-        self._read_test(batch_size, num_epochs, 0)
-
-        # Basic test: read from file 1.
-        self._read_test(batch_size, num_epochs, 1)
-
-        # Basic test: read from both files.
-        self._read_test(batch_size, num_epochs)
-
-        # Basic test: read from both files, with parallel reads.
-        self._read_test(batch_size, num_epochs, num_parallel_reads=8)
-
-  def testDropFinalBatch(self):
-    for batch_size in [1, 2, 10]:
-      for num_epochs in [1, 3]:
-        # Read from file 0.
-        self._read_test(batch_size, num_epochs, 0, drop_final_batch=True)
-
-        # Read from both files.
-        self._read_test(batch_size, num_epochs, drop_final_batch=True)
-
-        # Read from both files, with parallel reads.
-        self._read_test(batch_size, num_epochs, num_parallel_reads=8,
-                        drop_final_batch=True)
-
-  def testParserFn(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        for drop_final_batch in [False, True]:
-          self._read_test(batch_size, num_epochs, parser_fn=True,
-                          drop_final_batch=drop_final_batch)
-          self._read_test(batch_size, num_epochs, num_parallel_reads=8,
-                          parser_fn=True, drop_final_batch=drop_final_batch)
-
-  def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1,
-                    seed=None):
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.make_tf_record_dataset(
-            file_pattern=self.test_filenames,
-            num_epochs=num_epochs,
-            batch_size=batch_size,
-            num_parallel_reads=num_parallel_reads,
-            shuffle=True,
-            shuffle_seed=seed)
-        iterator = dataset.make_initializable_iterator()
-        next_element = iterator.get_next()
-
-        sess.run(iterator.initializer)
-        first_batches = []
-        try:
-          while True:
-            first_batches.append(sess.run(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        sess.run(iterator.initializer)
-        second_batches = []
-        try:
-          while True:
-            second_batches.append(sess.run(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        self.assertEqual(len(first_batches), len(second_batches))
-        if seed is not None:
-          # if you set a seed, should get the same results
-          for i in range(len(first_batches)):
-            self.assertAllEqual(first_batches[i], second_batches[i])
-
-        expected = []
-        for f in range(self._num_files):
-          for r in range(self._num_records):
-            expected.extend([self._record(f, r)] * num_epochs)
-
-        for batches in (first_batches, second_batches):
-          actual = []
-          for b in batches:
-            actual.extend(b)
-          self.assertAllEqual(sorted(expected), sorted(actual))
-
-  def testShuffle(self):
-    for batch_size in [1, 2]:
-      for num_epochs in [1, 3]:
-        for num_parallel_reads in [1, 2]:
-          # Test that all expected elements are produced
-          self._shuffle_test(batch_size, num_epochs, num_parallel_reads)
-          # Test that elements are produced in a consistent order if
-          # you specify a seed.
-          self._shuffle_test(batch_size, num_epochs, num_parallel_reads,
-                             seed=21345)
-
-  def testIndefiniteRepeatShapeInference(self):
-    dataset = readers.make_tf_record_dataset(
-        file_pattern=self.test_filenames, num_epochs=None, batch_size=32)
-    for shape in nest.flatten(dataset.output_shapes):
-      self.assertEqual(32, shape[0])
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
new file mode 100644
index 0000000000..657cf3c00e
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
@@ -0,0 +1,243 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.make_tf_record_dataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
+from tensorflow.python.data.experimental.ops import readers
+from tensorflow.python.data.util import nest
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+
+
+class MakeTFRecordDatasetTest(
+    reader_dataset_ops_test_base.TFRecordDatasetTestBase):
+
+  def _interleave(self, iterators, cycle_length):
+    pending_iterators = iterators
+    open_iterators = []
+    num_open = 0
+    for i in range(cycle_length):
+      if pending_iterators:
+        open_iterators.append(pending_iterators.pop(0))
+        num_open += 1
+
+    while num_open:
+      for i in range(min(cycle_length, len(open_iterators))):
+        if open_iterators[i] is None:
+          continue
+        try:
+          yield next(open_iterators[i])
+        except StopIteration:
+          if pending_iterators:
+            open_iterators[i] = pending_iterators.pop(0)
+          else:
+            open_iterators[i] = None
+            num_open -= 1
+
+  def _next_expected_batch(self,
+                           file_indices,
+                           batch_size,
+                           num_epochs,
+                           cycle_length,
+                           drop_final_batch,
+                           use_parser_fn):
+
+    def _next_record(file_indices):
+      for j in file_indices:
+        for i in range(self._num_records):
+          yield j, i
+
+    def _next_record_interleaved(file_indices, cycle_length):
+      return self._interleave([_next_record([i]) for i in file_indices],
+                              cycle_length)
+
+    record_batch = []
+    batch_index = 0
+    for _ in range(num_epochs):
+      if cycle_length == 1:
+        next_records = _next_record(file_indices)
+      else:
+        next_records = _next_record_interleaved(file_indices, cycle_length)
+      for f, r in next_records:
+        record = self._record(f, r)
+        if use_parser_fn:
+          record = record[1:]
+        record_batch.append(record)
+        batch_index += 1
+        if len(record_batch) == batch_size:
+          yield record_batch
+          record_batch = []
+          batch_index = 0
+    if record_batch and not drop_final_batch:
+      yield record_batch
+
+  def _verify_records(self,
+                      sess,
+                      outputs,
+                      batch_size,
+                      file_index,
+                      num_epochs,
+                      interleave_cycle_length,
+                      drop_final_batch,
+                      use_parser_fn):
+    if file_index is not None:
+      file_indices = [file_index]
+    else:
+      file_indices = range(self._num_files)
+
+    for expected_batch in self._next_expected_batch(
+        file_indices, batch_size, num_epochs, interleave_cycle_length,
+        drop_final_batch, use_parser_fn):
+      actual_batch = sess.run(outputs)
+      self.assertAllEqual(expected_batch, actual_batch)
+
+  def _read_test(self, batch_size, num_epochs, file_index=None,
+                 num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
+    if file_index is None:
+      file_pattern = self.test_filenames
+    else:
+      file_pattern = self.test_filenames[file_index]
+
+    if parser_fn:
+      fn = lambda x: string_ops.substr(x, 1, 999)
+    else:
+      fn = None
+
+    with ops.Graph().as_default() as g:
+      with self.session(graph=g) as sess:
+        outputs = readers.make_tf_record_dataset(
+            file_pattern=file_pattern,
+            num_epochs=num_epochs,
+            batch_size=batch_size,
+            parser_fn=fn,
+            num_parallel_reads=num_parallel_reads,
+            drop_final_batch=drop_final_batch,
+            shuffle=False).make_one_shot_iterator().get_next()
+        self._verify_records(
+            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
+            interleave_cycle_length=num_parallel_reads,
+            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(outputs)
+
+  def testRead(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        # Basic test: read from file 0.
+        self._read_test(batch_size, num_epochs, 0)
+
+        # Basic test: read from file 1.
+        self._read_test(batch_size, num_epochs, 1)
+
+        # Basic test: read from both files.
+        self._read_test(batch_size, num_epochs)
+
+        # Basic test: read from both files, with parallel reads.
+        self._read_test(batch_size, num_epochs, num_parallel_reads=8)
+
+  def testDropFinalBatch(self):
+    for batch_size in [1, 2, 10]:
+      for num_epochs in [1, 3]:
+        # Read from file 0.
+        self._read_test(batch_size, num_epochs, 0, drop_final_batch=True)
+
+        # Read from both files.
+        self._read_test(batch_size, num_epochs, drop_final_batch=True)
+
+        # Read from both files, with parallel reads.
+        self._read_test(batch_size, num_epochs, num_parallel_reads=8,
+                        drop_final_batch=True)
+
+  def testParserFn(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        for drop_final_batch in [False, True]:
+          self._read_test(batch_size, num_epochs, parser_fn=True,
+                          drop_final_batch=drop_final_batch)
+          self._read_test(batch_size, num_epochs, num_parallel_reads=8,
+                          parser_fn=True, drop_final_batch=drop_final_batch)
+
+  def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1,
+                    seed=None):
+    with ops.Graph().as_default() as g:
+      with self.session(graph=g) as sess:
+        dataset = readers.make_tf_record_dataset(
+            file_pattern=self.test_filenames,
+            num_epochs=num_epochs,
+            batch_size=batch_size,
+            num_parallel_reads=num_parallel_reads,
+            shuffle=True,
+            shuffle_seed=seed)
+        iterator = dataset.make_initializable_iterator()
+        next_element = iterator.get_next()
+
+        sess.run(iterator.initializer)
+        first_batches = []
+        try:
+          while True:
+            first_batches.append(sess.run(next_element))
+        except errors.OutOfRangeError:
+          pass
+
+        sess.run(iterator.initializer)
+        second_batches = []
+        try:
+          while True:
+            second_batches.append(sess.run(next_element))
+        except errors.OutOfRangeError:
+          pass
+
+        self.assertEqual(len(first_batches), len(second_batches))
+        if seed is not None:
+          # if you set a seed, should get the same results
+          for i in range(len(first_batches)):
+            self.assertAllEqual(first_batches[i], second_batches[i])
+
+        expected = []
+        for f in range(self._num_files):
+          for r in range(self._num_records):
+            expected.extend([self._record(f, r)] * num_epochs)
+
+        for batches in (first_batches, second_batches):
+          actual = []
+          for b in batches:
+            actual.extend(b)
+          self.assertAllEqual(sorted(expected), sorted(actual))
+
+  def testShuffle(self):
+    for batch_size in [1, 2]:
+      for num_epochs in [1, 3]:
+        for num_parallel_reads in [1, 2]:
+          # Test that all expected elements are produced
+          self._shuffle_test(batch_size, num_epochs, num_parallel_reads)
+          # Test that elements are produced in a consistent order if
+          # you specify a seed.
+          self._shuffle_test(batch_size, num_epochs, num_parallel_reads,
+                             seed=21345)
+
+  def testIndefiniteRepeatShapeInference(self):
+    dataset = readers.make_tf_record_dataset(
+        file_pattern=self.test_filenames, num_epochs=None, batch_size=32)
+    for shape in nest.flatten(dataset.output_shapes):
+      self.assertEqual(32, shape[0])
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
new file mode 100644
index 0000000000..afd0fc3abf
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -0,0 +1,337 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.map_and_batch()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import script_ops
+from tensorflow.python.platform import test
+
+
+class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("Default", None, None),
+      ("SequentialCalls", 1, None),
+      ("ParallelCalls", 2, None),
+      ("ParallelBatches", None, 10),
+  )
+  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
+    """Test a dataset that maps a TF function across its input elements."""
+    # The pipeline is TensorSliceDataset ->
+    # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
+    components = (np.arange(7),
+                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                  np.array(37.0) * np.arange(7))
+
+    count = array_ops.placeholder(dtypes.int64, shape=[])
+    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    iterator = (
+        dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
+            batching.map_and_batch(
+                map_func=_map_fn,
+                batch_size=batch_size,
+                num_parallel_calls=num_parallel_calls,
+                num_parallel_batches=num_parallel_batches))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    self.assertEqual([[None] + list(c.shape[1:]) for c in components],
+                     [t.shape.as_list() for t in get_next])
+
+    with self.cached_session() as sess:
+      # Batch of a finite input, where the batch_size divides the
+      # total number of elements.
+      sess.run(init_op, feed_dict={count: 28, batch_size: 14})
+      num_batches = (28 * 7) // 14
+      for i in range(num_batches):
+        result = sess.run(get_next)
+        for component, result_component in zip(components, result):
+          for j in range(14):
+            self.assertAllEqual(component[(i * 14 + j) % 7]**2,
+                                result_component[j])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Batch of a finite input, where the batch_size does not
+      # divide the total number of elements.
+      sess.run(init_op, feed_dict={count: 14, batch_size: 8})
+
+      # We expect (num_batches - 1) full-sized batches.
+      num_batches = int(math.ceil((14 * 7) / 8))
+      for i in range(num_batches - 1):
+        result = sess.run(get_next)
+        for component, result_component in zip(components, result):
+          for j in range(8):
+            self.assertAllEqual(component[(i * 8 + j) % 7]**2,
+                                result_component[j])
+      result = sess.run(get_next)
+      for component, result_component in zip(components, result):
+        for j in range((14 * 7) % 8):
+          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+                              result_component[j])
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Batch of an empty input should fail straight away.
+      sess.run(init_op, feed_dict={count: 0, batch_size: 8})
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+      # Empty batch should be an initialization time error.
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(init_op, feed_dict={count: 14, batch_size: 0})
+
+  @parameterized.named_parameters(
+      ("Even", False),
+      ("Uneven", True),
+  )
+  def testMapAndBatchPartialBatch(self, drop_remainder):
+    iterator = (
+        dataset_ops.Dataset.range(10).apply(
+            batching.map_and_batch(
+                lambda x: array_ops.reshape(x * x, [1]),
+                batch_size=4,
+                drop_remainder=drop_remainder)).make_one_shot_iterator())
+    if drop_remainder:
+      self.assertEqual([4, 1], iterator.output_shapes.as_list())
+    else:
+      self.assertEqual([None, 1], iterator.output_shapes.as_list())
+    next_element = iterator.get_next()
+    with self.cached_session() as sess:
+      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
+      if not drop_remainder:
+        self.assertAllEqual([[64], [81]], sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testMapAndBatchYieldsPartialBatch(self):
+    iterator = (dataset_ops.Dataset.range(10)
+                .apply(batching.map_and_batch(
+                    lambda x: array_ops.reshape(x * x, [1]), 4))
+                .make_one_shot_iterator())
+    self.assertEqual([None, 1], iterator.output_shapes.as_list())
+    next_element = iterator.get_next()
+    with self.cached_session() as sess:
+      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
+      self.assertAllEqual([[64], [81]], sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testMapAndBatchParallelGetNext(self):
+    iterator = (dataset_ops.Dataset.range(50000)
+                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
+                .make_one_shot_iterator())
+    elements = []
+    for _ in range(100):
+      elements.append(iterator.get_next())
+    with self.cached_session() as sess:
+      for i in range(5):
+        got = sess.run(elements)
+        got.sort(key=lambda x: x[0])
+        expected = []
+        for j in range(100):
+          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+        self.assertAllEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elements)
+
+  def testMapAndBatchParallelGetNextDropRemainder(self):
+    iterator = (
+        dataset_ops.Dataset.range(49999).apply(
+            batching.map_and_batch(
+                lambda x: x, batch_size=100, drop_remainder=True))
+        .make_one_shot_iterator())
+    elements = []
+    for _ in range(100):
+      elements.append(iterator.get_next())
+    with self.cached_session() as sess:
+      for i in range(4):
+        got = sess.run(elements)
+        got.sort(key=lambda x: x[0])
+        expected = []
+        for j in range(100):
+          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+        self.assertAllEqual(got, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(elements)
+
+  def testMapAndBatchSparse(self):
+
+    def _sparse(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+    iterator = dataset_ops.Dataset.range(10).apply(
+        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      for i in range(2):
+        actual = sess.run(get_next)
+        expected = sparse_tensor.SparseTensorValue(
+            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+            dense_shape=[5, 1])
+        self.assertTrue(sparse_tensor.is_sparse(actual))
+        self.assertSparseValuesEqual(actual, expected)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  def testMapAndBatchFails(self):
+    """Test a dataset that maps a TF function across its input elements."""
+    dataset = dataset_ops.Dataset.from_tensors(
+        array_ops.check_numerics(
+            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
+    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
+    iterator = (
+        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    with self.cached_session() as sess:
+      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
+        sess.run(init_op, feed_dict={batch_size: 14})
+
+  def testMapAndBatchShapeMismatch(self):
+    """Test a dataset that maps a TF function across its input elements."""
+
+    def generator():
+      yield [1]
+      yield [2]
+      yield [3]
+      yield [[4, 5, 6]]
+
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int32)
+    batch_size = 4
+    iterator = (
+        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+        .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      sess.run(init_op)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "number of elements does not match"):
+        sess.run(get_next)
+
+  def testMapAndBatchImplicitDispose(self):
+    # Tests whether a map and batch dataset will be cleaned up correctly when
+    # the pipeline does not run it until exhaustion.
+    # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
+    # MapAndBatchDataset(f=square_3, batch_size=100).
+    components = (np.arange(1000),
+                  np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
+                  np.array(37.0) * np.arange(1000))
+
+    def _map_fn(x, y, z):
+      return math_ops.square(x), math_ops.square(y), math_ops.square(z)
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
+        1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
+    dataset = dataset.prefetch(5)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(3):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", 0),
+      ("2", 5),
+      ("3", 10),
+      ("4", 90),
+      ("5", 95),
+      ("6", 99),
+  )
+  def testMapAndBatchOutOfRangeError(self, threshold):
+
+    def raising_py_fn(i):
+      if i >= threshold:
+        raise StopIteration()
+      else:
+        return i
+
+    iterator = (
+        dataset_ops.Dataset.range(100).apply(
+            batching.map_and_batch(
+                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
+                batch_size=10)).make_one_shot_iterator())
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(threshold // 10):
+        self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
+      if threshold % 10 != 0:
+        self.assertAllEqual(
+            [threshold // 10 * 10 + j for j in range(threshold % 10)],
+            sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", False, dtypes.bool),
+      ("2", -42, dtypes.int8),
+      ("3", -42, dtypes.int16),
+      ("4", -42, dtypes.int32),
+      ("5", -42, dtypes.int64),
+      ("6", 42, dtypes.uint8),
+      ("7", 42, dtypes.uint16),
+      ("8", 42.0, dtypes.float16),
+      ("9", 42.0, dtypes.float32),
+      ("10", 42.0, dtypes.float64),
+      ("11", b"hello", dtypes.string),
+  )
+  def testMapAndBatchTypes(self, element, dtype):
+    def gen():
+      yield element
+
+    dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
+        batching.map_and_batch(lambda x: x, batch_size=10))
+
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(10):
+        self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
similarity index 94%
rename from tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index 4432dcb05a..5e419a9b2f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/threadpool_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline statistics gathering ops."""
+"""Tests for the private `override_threadpool()` transformation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -32,8 +32,8 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class OverrideThreadpoolDatasetTest(test_base.DatasetTestBase,
-                                    parameterized.TestCase):
+class OverrideThreadpoolTest(test_base.DatasetTestBase,
+                             parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("1", 1, None),
diff --git a/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 560902caad..90ac250df7 100644
--- a/tensorflow/python/data/experimental/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.parallel_interleave()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -37,7 +37,7 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class ParallelInterleaveDatasetTest(test_base.DatasetTestBase):
+class ParallelInterleaveTest(test_base.DatasetTestBase):
 
   def setUp(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
index 13f924b656..723e709ae8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tensorflow.ops.parsing_ops."""
+"""Tests for `tf.data.experimental.parse_example_dataset()."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -73,7 +73,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
     i += 1
 
 
-class ParseExampleTest(test_base.DatasetTestBase):
+class ParseExampleDatasetTest(test_base.DatasetTestBase):
 
   def _test(self,
             input_tensor,
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
new file mode 100644
index 0000000000..f73725366c
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -0,0 +1,234 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.prefetch_to_device()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import prefetching_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+
+
+class PrefetchToDeviceTest(test_base.DatasetTestBase):
+
+  def testPrefetchToDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToSameDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device(
+            "/job:localhost/replica:0/task:0/device:CPU:0"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchDictToDevice(self):
+    host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element["a"].dtype)
+    self.assertEqual([], next_element["a"].shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        self.assertEqual({"a": i}, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchSparseTensorsToDevice(self):
+    def make_tensor(i):
+      return sparse_tensor.SparseTensorValue(
+          indices=[[0, 0]], values=(i*[1]), dense_shape=[2, 2])
+    host_dataset = dataset_ops.Dataset.range(10).map(make_tensor)
+
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_one_shot_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      for i in range(10):
+        actual = sess.run(next_element)
+        self.assertAllEqual([i], actual.values)
+        self.assertAllEqual([[0, 0]], actual.indices)
+        self.assertAllEqual([2, 2], actual.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/gpu:0"))
+
+    iterator = device_dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceWithReInit(self):
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/cpu:1"))
+
+    # NOTE(mrry): This device block creates the "host" dataset and iterator on
+    # /cpu:0, and ensures that the prefetching is across devices. In typical use
+    # this would not be necessary, because the GPU device would not support any
+    # of the dataset-related ops.
+    with ops.device("/cpu:0"):
+      iterator = device_dataset.make_initializable_iterator()
+
+    self.assertEqual(host_dataset.output_types, device_dataset.output_types)
+    self.assertEqual(host_dataset.output_types, iterator.output_types)
+    self.assertEqual(host_dataset.output_shapes, device_dataset.output_shapes)
+    self.assertEqual(host_dataset.output_shapes, iterator.output_shapes)
+    self.assertEqual(host_dataset.output_classes, device_dataset.output_classes)
+    self.assertEqual(host_dataset.output_classes, iterator.output_classes)
+
+    next_element = iterator.get_next()
+    self.assertEqual(dtypes.int64, next_element.dtype)
+    self.assertEqual([], next_element.shape)
+
+    worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
+    with self.test_session(config=worker_config) as sess:
+      sess.run(iterator.initializer)
+      for i in range(5):
+        self.assertEqual(i, sess.run(next_element))
+      sess.run(iterator.initializer)
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testPrefetchToDeviceGpuWithReInit(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    host_dataset = dataset_ops.Dataset.range(10)
+    device_dataset = host_dataset.apply(
+        prefetching_ops.prefetch_to_device("/gpu:0"))
+
+    iterator = device_dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      for i in range(5):
+        self.assertEqual(i, sess.run(next_element))
+      sess.run(iterator.initializer)
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index b6ab80d132..fe0b3b5f3b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -63,11 +63,11 @@ class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase):
     return filenames
 
 
-class ReadBatchFeaturesTestBase(test_base.DatasetTestBase):
+class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing `make_batched_feature_dataset`."""
 
   def setUp(self):
-    super(ReadBatchFeaturesTestBase, self).setUp()
+    super(MakeBatchedFeaturesDatasetTestBase, self).setUp()
     self._num_files = 2
     self._num_records = 7
     self.test_filenames = self._createFiles()
diff --git a/tensorflow/python/data/experimental/kernel_tests/resample_test.py b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
similarity index 97%
rename from tensorflow/python/data/experimental/kernel_tests/resample_test.py
rename to tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
index 775648c943..4c879dbae6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.rejection_resample()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -58,7 +58,7 @@ def _time_resampling(
   return end_time - start_time
 
 
-class ResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
+class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("InitialDistributionKnown", True),
diff --git a/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
similarity index 95%
rename from tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
index 3fc7157bc5..516e489d04 100644
--- a/tensorflow/python/data/experimental/kernel_tests/dataset_constructor_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for the private `_RestructuredDataset` transformation."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,7 +26,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class DatasetConstructorTest(test_base.DatasetTestBase):
+class RestructuredDatasetTest(test_base.DatasetTestBase):
 
   def testRestructureDataset(self):
     components = (array_ops.placeholder(dtypes.int32),
diff --git a/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/scan_test.py
index 78ec80de23..0730455431 100644
--- a/tensorflow/python/data/experimental/kernel_tests/scan_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.scan()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -34,7 +34,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ScanDatasetTest(test_base.DatasetTestBase):
+class ScanTest(test_base.DatasetTestBase):
 
   def _counting_dataset(self, start, scan_fn):
     return dataset_ops.Dataset.from_tensors(0).repeat().apply(
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index 58a335ae4f..e556b65b7c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -69,6 +69,26 @@ py_test(
     ],
 )
 
+py_test(
+    name = "checkpoint_input_pipeline_hook_test",
+    size = "small",
+    srcs = ["checkpoint_input_pipeline_hook_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/experimental/ops:iterator_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
 py_test(
     name = "concatenate_dataset_serialization_test",
     size = "small",
@@ -580,7 +600,7 @@ py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_op_test_base",
+        "//tensorflow/python/data/experimental/kernel_tests:sql_dataset_test_base",
         "//tensorflow/python/data/experimental/ops:readers",
     ],
 )
diff --git a/tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/checkpoint_input_pipeline_hook_test.py
similarity index 100%
rename from tensorflow/python/data/experimental/kernel_tests/iterator_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/checkpoint_input_pipeline_hook_test.py
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
index a0dd6960b0..b3dfe21486 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/parse_example_dataset_serialization_test.py
@@ -23,7 +23,7 @@ from tensorflow.python.platform import test
 
 
 class ParseExampleDatasetSerializationTest(
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase,
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase,
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def ParseExampleDataset(self, num_repeat, batch_size):
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
index b179770ce3..006279bbe1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/sql_dataset_serialization_test.py
@@ -19,7 +19,7 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
 from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.framework import dtypes
@@ -28,7 +28,7 @@ from tensorflow.python.platform import test
 
 
 class SqlDatasetSerializationTest(
-    sql_dataset_op_test_base.SqlDatasetTestBase,
+    sql_dataset_test_base.SqlDatasetTestBase,
     dataset_serialization_test_base.DatasetSerializationTestBase):
 
   def _build_dataset(self, num_repeats):
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
deleted file mode 100644
index 88d5c896c9..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/serialization_integration_test.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Integration test for dataset serialization."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import saver as saver_lib
-
-
-class SerializationIntegrationTest(test.TestCase):
-
-  def _build_input_pipeline(self, name, num_outputs):
-    with ops.name_scope(name):
-      ds = dataset_ops.Dataset.range(num_outputs).shuffle(
-          10, reshuffle_each_iteration=False).prefetch(10)
-      iterator = ds.make_initializable_iterator()
-      saveable = contrib_iterator_ops.make_saveable_from_iterator(iterator)
-      ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
-      return iterator.initializer, iterator.get_next()
-
-  def _build_graph(self, num_pipelines, num_outputs):
-    init_ops = []
-    get_next_ops = []
-    for i in range(num_pipelines):
-      name = "input_pipeline_%d" % i
-      init_op, get_next_op = self._build_input_pipeline(name, num_outputs)
-      init_ops.append(init_op)
-      get_next_ops.append(get_next_op)
-    saver = saver_lib.Saver()
-    return init_ops, get_next_ops, saver
-
-  def _ckpt_path(self):
-    return os.path.join(self.get_temp_dir(), "iterator")
-
-  def testConcurrentSaves(self):
-    num_pipelines = 100
-    num_outputs = 100
-    break_point = 10
-    all_outputs = [[] for _ in range(num_pipelines)]
-    with ops.Graph().as_default() as g:
-      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
-                                                        num_outputs)
-      with self.session(graph=g) as sess:
-        sess.run(init_ops)
-        for _ in range(break_point):
-          output = sess.run(get_next_ops)
-          for i in range(num_pipelines):
-            all_outputs[i].append(output[i])
-        saver.save(sess, self._ckpt_path())
-
-    with ops.Graph().as_default() as g:
-      init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
-                                                        num_outputs)
-      with self.session(graph=g) as sess:
-        saver.restore(sess, self._ckpt_path())
-        for _ in range(num_outputs - break_point):
-          output = sess.run(get_next_ops)
-          for i in range(num_pipelines):
-            all_outputs[i].append(output[i])
-
-    for output in all_outputs:
-      self.assertSequenceEqual(sorted(output), range(num_outputs))
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 50895b5945..c208963a86 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.shuffle_and_repeat()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
similarity index 99%
rename from tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index 301f75488a..a2c1169638 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for experimental sql input op."""
+"""Tests for `tf.data.experimental.SqlDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.kernel_tests import sql_dataset_op_test_base
+from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 
 
-class SqlDatasetTest(sql_dataset_op_test_base.SqlDatasetTestBase):
+class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that SqlDataset can read from a database table.
   def testReadResultSet(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
rename to tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
index a135c357f0..6aaaa90c65 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_op_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Base class for testing SqlDataset."""
-
+"""Base class for testing `tf.data.experimental.SqlDataset`."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 19f5a62d45..427654cd76 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -280,7 +280,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
 class FeatureStatsDatasetTest(
     stats_dataset_test_base.StatsDatasetTestBase,
-    reader_dataset_ops_test_base.ReadBatchFeaturesTestBase):
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
 
   def testFeaturesStats(self):
     num_epochs = 5
diff --git a/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
similarity index 98%
rename from tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
rename to tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
index 25a2e63ba1..8fd0ad50c4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/writer_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.TFRecordWriter`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
new file mode 100644
index 0000000000..0278a208cb
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
@@ -0,0 +1,300 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.unbatch()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  def testUnbatchWithUnknownRankInput(self):
+    placeholder = array_ops.placeholder(dtypes.int32)
+    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
+        batching.unbatch())
+    iterator = dataset.make_initializable_iterator()
+    next_elem = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
+      for i in range(4):
+        self.assertEqual(i, sess.run(next_elem))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_elem)
+
+  def testUnbatchScalarDataset(self):
+    data = tuple([math_ops.range(10) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = (dtypes.int32,) * 3
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual((i,) * 3, sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchDatasetWithStrings(self):
+    data = tuple([math_ops.range(10) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    data = data.map(lambda x, y, z: (x, string_ops.as_string(y), z))
+    expected_types = (dtypes.int32, dtypes.string, dtypes.int32)
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchDatasetWithSparseTensor(self):
+    st = sparse_tensor.SparseTensorValue(
+        indices=[[i, i] for i in range(10)],
+        values=list(range(10)),
+        dense_shape=[10, 10])
+    data = dataset_ops.Dataset.from_tensors(st)
+    data = data.apply(batching.unbatch())
+    data = data.batch(5)
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        st_row = sess.run(next_element)
+        self.assertEqual([i], st_row.indices)
+        self.assertEqual([i], st_row.values)
+        self.assertEqual([10], st_row.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchDatasetWithDenseAndSparseTensor(self):
+    st = sparse_tensor.SparseTensorValue(
+        indices=[[i, i] for i in range(10)],
+        values=list(range(10)),
+        dense_shape=[10, 10])
+    data = dataset_ops.Dataset.from_tensors((list(range(10)), st))
+    data = data.apply(batching.unbatch())
+    data = data.batch(5)
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        dense_elem, st_row = sess.run(next_element)
+        self.assertEqual(i, dense_elem)
+        self.assertEqual([i], st_row.indices)
+        self.assertEqual([i], st_row.values)
+        self.assertEqual([10], st_row.dense_shape)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchSingleElementTupleDataset(self):
+    data = tuple([(math_ops.range(10),) for _ in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = ((dtypes.int32,),) * 3
+    data = data.batch(2)
+    self.assertEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(((i,),) * 3, sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchMultiElementTupleDataset(self):
+    data = tuple([(math_ops.range(10 * i, 10 * i + 10),
+                   array_ops.fill([10], "hi")) for i in range(3)])
+    data = dataset_ops.Dataset.from_tensor_slices(data)
+    expected_types = ((dtypes.int32, dtypes.string),) * 3
+    data = data.batch(2)
+    self.assertAllEqual(expected_types, data.output_types)
+    data = data.apply(batching.unbatch())
+    self.assertAllEqual(expected_types, data.output_types)
+
+    iterator = data.make_one_shot_iterator()
+    op = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for i in range(10):
+        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
+                         sess.run(op))
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(op)
+
+  def testUnbatchEmpty(self):
+    data = dataset_ops.Dataset.from_tensors(
+        (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
+         constant_op.constant([], shape=[0, 4, 0])))
+    data = data.apply(batching.unbatch())
+    iterator = data.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testUnbatchStaticShapeMismatch(self):
+    data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
+                                             np.arange(9)))
+    with self.assertRaises(ValueError):
+      data.apply(batching.unbatch())
+
+  def testUnbatchDynamicShapeMismatch(self):
+    ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
+    ph2 = array_ops.placeholder(dtypes.int32, shape=None)
+    data = dataset_ops.Dataset.from_tensors((ph1, ph2))
+    data = data.apply(batching.unbatch())
+    iterator = data.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      # Mismatch in the 0th dimension.
+      sess.run(
+          iterator.initializer,
+          feed_dict={
+              ph1: np.arange(7).astype(np.int32),
+              ph2: np.arange(8).astype(np.int32)
+          })
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(next_element)
+
+      # No 0th dimension (i.e. scalar value) for one component.
+      sess.run(
+          iterator.initializer,
+          feed_dict={
+              ph1: np.arange(7).astype(np.int32),
+              ph2: 7
+          })
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(next_element)
+
+
+class UnbatchBenchmark(test.Benchmark):
+
+  def benchmarkNativeUnbatch(self):
+    batch_sizes = [1, 2, 5, 10, 20, 50]
+    elems_per_trial = 10000
+    with ops.Graph().as_default():
+      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
+      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
+      dataset = dataset.batch(batch_size_placeholder)
+      dataset = dataset.apply(batching.unbatch())
+      dataset = dataset.skip(elems_per_trial)
+      iterator = dataset.make_initializable_iterator()
+      next_element = iterator.get_next()
+
+      with session.Session() as sess:
+        for batch_size in batch_sizes:
+          deltas = []
+          for _ in range(5):
+            sess.run(
+                iterator.initializer,
+                feed_dict={batch_size_placeholder: batch_size})
+            start = time.time()
+            sess.run(next_element.op)
+            end = time.time()
+            deltas.append((end - start) / elems_per_trial)
+
+          median_wall_time = np.median(deltas)
+          print("Unbatch (native) batch size: %d Median wall time per element:"
+                " %f microseconds" % (batch_size, median_wall_time * 1e6))
+          self.report_benchmark(
+              iters=10000,
+              wall_time=median_wall_time,
+              name="benchmark_unbatch_dataset_native_batch_size_%d" %
+              batch_size)
+
+  # Include a benchmark of the previous `unbatch()` implementation that uses
+  # a composition of more primitive ops. Eventually we'd hope to generate code
+  # that is as good in both cases.
+  def benchmarkOldUnbatchImplementation(self):
+    batch_sizes = [1, 2, 5, 10, 20, 50]
+    elems_per_trial = 10000
+    with ops.Graph().as_default():
+      dataset = dataset_ops.Dataset.from_tensors("element").repeat(None)
+      batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
+      dataset = dataset.batch(batch_size_placeholder)
+      dataset = dataset.flat_map(dataset_ops.Dataset.from_tensor_slices)
+      dataset = dataset.skip(elems_per_trial)
+      iterator = dataset.make_initializable_iterator()
+      next_element = iterator.get_next()
+
+      with session.Session() as sess:
+        for batch_size in batch_sizes:
+          deltas = []
+          for _ in range(5):
+            sess.run(
+                iterator.initializer,
+                feed_dict={batch_size_placeholder: batch_size})
+            start = time.time()
+            sess.run(next_element.op)
+            end = time.time()
+            deltas.append((end - start) / elems_per_trial)
+
+          median_wall_time = np.median(deltas)
+          print("Unbatch (unfused) batch size: %d Median wall time per element:"
+                " %f microseconds" % (batch_size, median_wall_time * 1e6))
+          self.report_benchmark(
+              iters=10000,
+              wall_time=median_wall_time,
+              name="benchmark_unbatch_dataset_unfused_batch_size_%d" %
+              batch_size)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
similarity index 96%
rename from tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
rename to tensorflow/python/data/experimental/kernel_tests/unique_test.py
index b5a0b20f3f..847cff26b0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unique_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.experimental.unique()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,7 +26,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class UniqueDatasetTest(test_base.DatasetTestBase):
+class UniqueTest(test_base.DatasetTestBase):
 
   def _testSimpleHelper(self, dtype, test_cases):
     """Test the `unique()` transformation on a list of test cases.
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 230ae3f3fd..0c372ebb10 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the experimental input pipeline ops."""
+"""Tests for `tf.data.Dataset.map()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -267,6 +267,35 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testCaptureIterator(self):
+
+    def _build_ds(iterator):
+
+      def _map_fn(x):
+        get_next = iterator.get_next()
+        return x * get_next
+
+      return dataset_ops.Dataset.range(10).map(_map_fn)
+
+    def _build_graph():
+      captured_iterator = dataset_ops.Dataset.range(
+          10).make_initializable_iterator()
+      ds = _build_ds(captured_iterator)
+      iterator = ds.make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      return captured_iterator.initializer, init_op, get_next
+
+    with ops.Graph().as_default() as g:
+      captured_init_op, init_op, get_next = _build_graph()
+      with self.session(graph=g) as sess:
+        sess.run(captured_init_op)
+        sess.run(init_op)
+        for i in range(10):
+          self.assertEqual(i * i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
   def testCaptureHashTable(self):
     # NOTE(mrry): We must use the V2 variants of `HashTable`
     # etc. because these produce a `tf.resource`-typed output that is
-- 
GitLab


From 158b6b8becb6afd08f9d6c87f0c7f144ba5f0584 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Thu, 4 Oct 2018 12:59:38 -0700
Subject: [PATCH 0376/1085] Use weak symbols to inject flex delegates

PiperOrigin-RevId: 215788183
---
 tensorflow/contrib/lite/BUILD                 |  26 ++++++++--
 tensorflow/contrib/lite/delegates/flex/BUILD  |   4 +-
 .../contrib/lite/delegates/flex/delegate.cc   |   9 ++++
 tensorflow/contrib/lite/interpreter.h         |  15 +++---
 tensorflow/contrib/lite/interpreter_test.cc   |   6 ++-
 tensorflow/contrib/lite/model.cc              |  35 ++++++++++----
 tensorflow/contrib/lite/model_flex_test.cc    |  45 ++++++++++++++++++
 tensorflow/contrib/lite/model_test.cc         |  22 +++++++++
 .../contrib/lite/testdata/multi_add_flex.bin  | Bin 0 -> 1052 bytes
 tensorflow/contrib/lite/tools/benchmark/BUILD |  24 ++--------
 .../tools/benchmark/benchmark_tflite_model.cc |  12 -----
 .../tools/benchmark/benchmark_tflite_model.h  |   6 ---
 12 files changed, 141 insertions(+), 63 deletions(-)
 create mode 100644 tensorflow/contrib/lite/model_flex_test.cc
 create mode 100644 tensorflow/contrib/lite/testdata/multi_add_flex.bin

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index f3ebe3b245..787a85644c 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -4,6 +4,7 @@ package(default_visibility = [
 
 licenses(["notice"])  # Apache 2.0
 
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
 
 exports_files(glob([
@@ -165,10 +166,6 @@ cc_library(
         "stderr_reporter.h",
     ],
     copts = tflite_copts(),
-    defines = select({
-        ":with_tflite_flex": ["TFLITE_FLEX"],
-        "//conditions:default": [],
-    }),
     linkopts = [
     ] + select({
         "//tensorflow:android": [
@@ -276,6 +273,7 @@ cc_test(
         "testdata/0_subgraphs.bin",
         "testdata/2_subgraphs.bin",
         "testdata/empty_model.bin",
+        "testdata/multi_add_flex.bin",
         "testdata/test_model.bin",
         "testdata/test_model_broken.bin",
     ],
@@ -283,6 +281,26 @@ cc_test(
         ":framework",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
+        "//tensorflow/contrib/lite/testing:util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+# Test model framework with the flex library linked into the target.
+tf_cc_test(
+    name = "model_flex_test",
+    size = "small",
+    srcs = ["model_flex_test.cc"],
+    data = [
+        "testdata/multi_add_flex.bin",
+    ],
+    tags = ["no_windows"],  # TODO(b/116667551): No weak symbols with MSVC.
+    deps = [
+        ":framework",
+        "//tensorflow/contrib/lite/core/api",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/kernels:builtin_ops",
         "//tensorflow/contrib/lite/testing:util",
         "@com_google_googletest//:gtest",
     ],
diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index 9dd38958e5..9b89ed4f84 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -2,7 +2,7 @@
 # This is a TF Lite delegate that is powered by TensorFlow's Eager.
 #
 package(default_visibility = [
-    "//visibility:public",
+    "//visibility:private",
 ])
 
 licenses(["notice"])  # Apache 2.0
@@ -50,6 +50,7 @@ cc_library(
     hdrs = [
         "delegate.h",
     ],
+    visibility = ["//visibility:public"],
     deps = [
         ":buffer_map",
         ":delegate_data",
@@ -66,6 +67,7 @@ cc_library(
             "//tensorflow/core:lib",
         ],
     }),
+    alwayslink = 1,
 )
 
 tf_cc_test(
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
index ba065a8ff5..c72b0cf513 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -83,6 +83,15 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
 }  // namespace delegate
 }  // namespace flex
 
+// Corresponding weak declaration found in lite/model.cc.
+std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>
+AcquireFlexDelegate() {
+  return std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
+      tflite::FlexDelegate::Create().release(), [](TfLiteDelegate* delegate) {
+        delete reinterpret_cast<tflite::FlexDelegate*>(delegate);
+      });
+}
+
 std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
   std::unique_ptr<flex::DelegateData> delegate_data;
   if (!flex::DelegateData::Create(&delegate_data).ok()) {
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 7ef736d01b..651a97e9dc 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -349,6 +349,10 @@ class Interpreter {
     return context_.allow_fp32_relax_to_fp16;
   }
 
+  // Owning handle to a TfLiteDelegate instance.
+  using TfLiteDelegatePtr =
+      std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
+
   // Allow a delegate to look at the graph and modify the graph to handle
   // parts of the graph themselves. After this is called, the graph may
   // contain new nodes that replace 1 more nodes.
@@ -574,19 +578,11 @@ class Interpreter {
                                  TfLiteExternalContextType type,
                                  TfLiteExternalContext* ctx);
 
-  using TfLiteDelegatePtr =
-      std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
-
   // Variant of the public ModifyGraphWithDelegate method that additionally
   // Assumes ownership of the provided delegate.
   // WARNING: This is an experimental API and subject to change.
-  template <typename Delegate>
-  TfLiteStatus ModifyGraphWithDelegate(std::unique_ptr<Delegate> typed_delegate,
+  TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegatePtr delegate,
                                        bool allow_dynamic_tensors = false) {
-    TfLiteDelegatePtr delegate(typed_delegate.release(),
-                               [](TfLiteDelegate* delegate) {
-                                 delete static_cast<Delegate*>(delegate);
-                               });
     // Note that we retain ownership of the delegate even if graph modification
     // fails, as delegate use will be in an indeterminate state at that point.
     owned_delegates_.push_back(std::move(delegate));
@@ -676,6 +672,7 @@ class Interpreter {
   // List of delegates that have been installed and are owned by this
   // interpreter instance. Useful if client delegate ownership is burdensome.
   // WARNING: This is an experimental API and subject to change.
+  // TODO(b/116667551): Use TfLiteExternalContext for storing state.
   std::vector<TfLiteDelegatePtr> owned_delegates_;
 
   std::unique_ptr<MemoryPlanner> memory_planner_;
diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc
index cdede430e2..6c71d5a8d7 100644
--- a/tensorflow/contrib/lite/interpreter_test.cc
+++ b/tensorflow/contrib/lite/interpreter_test.cc
@@ -30,7 +30,11 @@ class InterpreterTest : public ::testing::Test {
   template <typename Delegate>
   static TfLiteStatus ModifyGraphWithDelegate(
       Interpreter* interpreter, std::unique_ptr<Delegate> delegate) {
-    return interpreter->ModifyGraphWithDelegate(std::move(delegate));
+    Interpreter::TfLiteDelegatePtr tflite_delegate(
+        delegate.release(), [](TfLiteDelegate* delegate) {
+          delete reinterpret_cast<Delegate*>(delegate);
+        });
+    return interpreter->ModifyGraphWithDelegate(std::move(tflite_delegate));
   }
 
  protected:
diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index d50c345194..d7b109ac1a 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -27,9 +27,6 @@ limitations under the License.
 #ifndef TFLITE_MCU
 #include "tensorflow/contrib/lite/nnapi_delegate.h"
 #endif
-#if defined(TFLITE_FLEX)
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif
 #include "tensorflow/contrib/lite/version.h"
 
 namespace tflite {
@@ -43,6 +40,25 @@ ErrorReporter* ValidateErrorReporter(ErrorReporter* e) {
 
 const char* kEmptyTensorName = "";
 
+// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but
+// we avoid the absl dependency for binary size reasons.
+#ifdef __has_attribute
+#define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define TFLITE_HAS_ATTRIBUTE(x) 0
+#endif
+
+#if TFLITE_HAS_ATTRIBUTE(weak) || (defined(__GNUC__) && !defined(__clang__))
+// Using weak symbols for the flex delegate allows automatic injection of the
+// delegate simply by adding it as a dependency. See also the strong override in
+// lite/delegates/flex/delegate.cc.
+__attribute__((weak)) Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() {
+  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+}
+#else
+Interpreter::TfLiteDelegatePtr (*AcquireFlexDelegate)() = nullptr;
+#endif
+
 #ifndef TFLITE_MCU
 // Loads a model from `filename`. If `mmap_file` is true then use mmap,
 // otherwise make a copy of the model in a buffer.
@@ -450,13 +466,14 @@ TfLiteStatus InterpreterBuilder::operator()(
   }
   (**interpreter).SetVariables(std::move(variables));
 
-#if defined(TFLITE_FLEX)
-  if (auto delegate = FlexDelegate::Create()) {
-    (**interpreter)
-        .ModifyGraphWithDelegate(std::move(delegate),
-                                 /*allow_dynamic_tensors=*/true);
+  // TODO(b/116667551): Only create the flex delegate if the model has flex ops.
+  if (AcquireFlexDelegate != nullptr) {
+    if (auto flex_delegate = AcquireFlexDelegate()) {
+      (**interpreter)
+          .ModifyGraphWithDelegate(std::move(flex_delegate),
+                                   /*allow_dynamic_tensors=*/true);
+    }
   }
-#endif
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/contrib/lite/model_flex_test.cc b/tensorflow/contrib/lite/model_flex_test.cc
new file mode 100644
index 0000000000..52e76bee49
--- /dev/null
+++ b/tensorflow/contrib/lite/model_flex_test.cc
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/model.h"
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/testing/util.h"
+
+namespace tflite {
+
+// Ensures that a model with TensorFlow ops can be imported as long as the
+// appropriate delegate is linked into the client.
+TEST(FlexModel, WithFlexDelegate) {
+  auto model = FlatBufferModel::BuildFromFile(
+      "tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+
+  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc
index ec7d46af7c..b969bea5dc 100644
--- a/tensorflow/contrib/lite/model_test.cc
+++ b/tensorflow/contrib/lite/model_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 #include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/testing/util.h"
 
 // Comparison for TfLiteRegistration. Since TfLiteRegistration is a C object,
@@ -193,6 +194,27 @@ TEST(BasicFlatBufferModel, TestModelInInterpreter) {
   }
 }
 
+// Test that loading a model with TensorFlow ops fails when the flex delegate is
+// not linked into the target.
+TEST(FlexModel, FailureWithoutFlexDelegate) {
+  auto model = FlatBufferModel::BuildFromFile(
+      "tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+  ASSERT_TRUE(model);
+
+  // Note that creation will succeed when using the BuiltinOpResolver, but
+  // unless the appropriate delegate is linked into the target or the client
+  // explicitly installs the delegate, execution will fail.
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(InterpreterBuilder(*model,
+                               ops::builtin::BuiltinOpResolver{})(&interpreter),
+            kTfLiteOk);
+  ASSERT_TRUE(interpreter);
+
+  // As the flex ops weren't resolved implicitly by the flex delegate, runtime
+  // allocation and execution will fail.
+  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteError);
+}
+
 // This tests on a flatbuffer that defines a shape of 2 to be a memory mapped
 // buffer. But the buffer is provided to be only 1 element.
 TEST(BasicFlatBufferModel, TestBrokenMmap) {
diff --git a/tensorflow/contrib/lite/testdata/multi_add_flex.bin b/tensorflow/contrib/lite/testdata/multi_add_flex.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9aac2155fedd11b81ed32e587655dfe53e5749a9
GIT binary patch
literal 1052
zcmb1OU|<Mw^D$;%;A4<rU}4~3;9(G85MkhBU|?WoU|?9n%)r3Iz`!8Dz`&ruz`(%B
zz`&5fz`(!{(&z8&ui%`YSC(2-lA5B&z`?-4V8g(`V8Xz_pu@nxpu)hwAj81GAi@AK
zg9T*HzyJS5{{8<i@$dhCnScNPEByQaU*+Hb{~G`P|JV8V|343d2;3DQdkq*E7(i|W
znZ1XRfq{vEfq|8Qfq{*IfdOP51Jq3*agcl2p>~EaFfjCh>}6zNh+$-4*u%iUaEF0`
zVIRnD1_lO@I*^$le}Vjz2r>v{=KufyL1G}YLE;Py_0ABQf#DZaFUW3Wy-85L4tVsM
z{r~?T<PVVBk@Y4+_156gTLje$auc%N3aH+c|Ns9_gQ@|A0kWD(sG5%d|NrlTssV)+
zNDat6Ait$R)qvt2lmb9<xWWLGPCzll08J^NbOJL6qz@F=ATu+eX0kAVAS5k)U|?V<
z0EICF1B1c;|NlX017r^<U4i@t(gM;83SW>LL4E;=n}X7!V@e8x8W&@w5VK=SiWFmt
z6l10muYO8uS!Qyom2*I-m4OyJ7h{MJlK~?Gldh(S2#W^DtQmOh0mTK#9+180_EbRa
zNhH!95pXI8g&xQ>P|5}6B1rnj=0}iyAiF_s1kuQT%!JyVB*mCaj2}T}fXut_|Nnnb
eHU+Uk>Dw(QwE`4uFK~#1;+vg;1DtQM=>q_$K#X(%

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index 502e181139..71bf61657e 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -40,7 +40,7 @@ cc_binary(
     srcs = [
         "benchmark_main.cc",
     ],
-    copts = common_copts + ["-DTFLITE_FLEX"],
+    copts = common_copts,
     linkopts = tflite_linkopts() + select({
         "//tensorflow:android": [
             "-pie",  # Android 5.0 and later supports only PIE
@@ -49,8 +49,9 @@ cc_binary(
         "//conditions:default": [],
     }),
     deps = [
-        ":benchmark_tflite_model_plus_flex_lib",
+        ":benchmark_tflite_model_lib",
         ":logging",
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
     ],
 )
 
@@ -110,25 +111,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "benchmark_tflite_model_plus_flex_lib",
-    srcs = [
-        "benchmark_tflite_model.cc",
-        "logging.h",
-    ],
-    hdrs = ["benchmark_tflite_model.h"],
-    copts = common_copts + ["-DTFLITE_FLEX"],
-    deps = [
-        ":benchmark_model_lib",
-        ":logging",
-        "//tensorflow/contrib/lite:framework",
-        "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite/delegates/flex:delegate",
-        "//tensorflow/contrib/lite/kernels:builtin_ops",
-        "//tensorflow/contrib/lite/profiling:profile_summarizer",
-    ],
-)
-
 cc_library(
     name = "benchmark_params",
     srcs = [
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
index 463d5993f4..2a3df7f289 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -23,9 +23,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#ifdef TFLITE_FLEX
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/op_resolver.h"
@@ -305,15 +302,6 @@ void BenchmarkTfLiteModel::Init() {
 
   interpreter->UseNNAPI(use_nnapi);
 
-#ifdef TFLITE_FLEX
-  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
-  delegate_ = FlexDelegate::Create();
-  if (delegate_) {
-    interpreter->ModifyGraphWithDelegate(delegate_.get(),
-                                         /*allow_dynamic_tensors=*/true);
-  }
-#endif  // TFLITE_FLEX
-
   auto interpreter_inputs = interpreter->inputs();
 
   if (!inputs.empty()) {
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
index b091e18a29..25a302b2aa 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h
@@ -20,9 +20,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#ifdef TFLITE_FLEX
-#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
-#endif  // TFLITE_FLEX
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
 #include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h"
@@ -73,9 +70,6 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   void PrepareInputsAndOutputs() override;
 
  private:
-#ifdef TFLITE_FLEX
-  std::unique_ptr<FlexDelegate> delegate_;
-#endif  // TFLITE_FLEX
   std::unique_ptr<tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> interpreter;
   std::vector<InputLayerInfo> inputs;
-- 
GitLab


From 074ff471fefbcf3bfd49914ad80bd9f9751df363 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Thu, 4 Oct 2018 13:00:49 -0700
Subject: [PATCH 0377/1085] Temporarily disable testCondInDefun test in
 control_flow_ops_py_test

PiperOrigin-RevId: 215788359
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index a1be77601c..c7e89dd5f9 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -3422,7 +3422,8 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(r.numpy(), 10)
       self.assertFalse(isinstance(r, list))
 
-  def testCondInDefun(self):
+  # TODO(b/117279927): Re-enable once msan failure is fixed.
+  def DISABLED_testCondInDefun(self):
     if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
       return unittest.skip("b/113346829 (gpu failure)")
 
-- 
GitLab


From 7fcb05ff475a0c6c1076eacf9d11e17323d98bc2 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 4 Oct 2018 13:01:33 -0700
Subject: [PATCH 0378/1085] [tf.data] Add a notion of `captured args` to
 MapDefun

PiperOrigin-RevId: 215788485
---
 .../api_def/base_api/api_def_MapDefun.pbtxt   | 23 +++++--
 .../optimizers/data/map_vectorization.cc      |  1 +
 .../data/vectorization_utils_test.cc          |  3 +
 tensorflow/core/kernels/data/map_defun_op.cc  | 68 +++++++++----------
 tensorflow/core/ops/dataset_ops.cc            | 11 ++-
 .../kernel_tests/map_defun_op_test.py         | 12 ++++
 .../python/data/experimental/ops/map_defun.py |  8 ++-
 7 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt b/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
index 4433693759..d158f4b502 100644
--- a/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MapDefun.pbtxt
@@ -4,22 +4,33 @@ op {
   in_arg {
     name: "arguments"
     description: <<END
-    A list of tensors whose types are Targuments, corresponding to the inputs the
-    function should be mapped over.
+    A list of tensors whose types are `Targuments`, corresponding to the inputs
+    the function should be mapped over.
+END
+  }
+  in_arg {
+    name: "captured_inputs"
+    description: <<END
+    A list of tensors whose types are `Tcaptured`, corresponding to the captured
+    inputs of the defun.
 END
   }
   out_arg {
     name: "output"
     description: <<END
-    A list of output tensors whose types are output_types and whose dimensions 0
-    are the same as the dimensions 0 of the tensors in arguments, and whose
-    remaining dimensions correspond to those in output_shapes.
+    A list of output tensors whose types are `output_types` and whose dimensions
+    0 are the same as the dimensions 0 of the tensors in `arguments`, and whose
+    remaining dimensions correspond to those in `output_shapes`.
 END
   }
   attr {
     name: "Targuments"
     description: "A list of types."
   }
+  attr {
+    name: "Tcaptured"
+    description: "A list of types."
+  }
   attr {
     name: "output_types"
     description: "A list of types."
@@ -29,6 +40,6 @@ END
     description: "A list of shapes."
   }
   summary: <<END
-  Maps a function on the list of tensors unpacked from inputs on dimension 0.
+  Maps a function on the list of tensors unpacked from arguments on dimension 0.
 END
 }
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index ba521e79bc..a9254ed58b 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -67,6 +67,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
     map_defun_node->add_input(input.name());
   }
   (*map_defun_node->mutable_attr())["Targuments"] = t_args;
+  AddNodeAttr("Tcaptured", DataTypeVector(), map_defun_node);
 
   // Set return values to match output names
   string output_prefix = strings::StrCat(map_defun_node->name(), ":output:");
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index a958d706c1..a6020e36bb 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -55,6 +55,7 @@ NodeDef* AddMapDefunNode(const string& name, const std::vector<string>& inputs,
   func.set_name(function_name);
   NodeDef* node = function_utils::AddNode(name, "MapDefun", inputs, {}, fn);
   graph_transforms::SetNodeAttr("Targuments", t_arguments, node);
+  graph_transforms::SetNodeAttr("Tcaptured", DataTypeVector(), node);
   graph_transforms::SetNodeAttr("output_types", output_types, node);
   graph_transforms::SetNodeAttr("output_shapes", output_shapes, node);
   graph_transforms::SetNodeAttr("f", func, node);
@@ -142,6 +143,8 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
+  Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
+  LOG(ERROR) << s;
   EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
diff --git a/tensorflow/core/kernels/data/map_defun_op.cc b/tensorflow/core/kernels/data/map_defun_op.cc
index 6657f2b2b3..705b0393de 100644
--- a/tensorflow/core/kernels/data/map_defun_op.cc
+++ b/tensorflow/core/kernels/data/map_defun_op.cc
@@ -62,24 +62,6 @@ class MapDefunOp : public AsyncOpKernel {
 
   ~MapDefunOp() override {}
 
-  Status GetInputBatchSize(OpKernelContext* ctx, int64* batch_size) {
-    // Validates inputs and gets the size of their leading dimension.
-    *batch_size = ctx->input(0).dims() > 0 ? ctx->input(0).dim_size(0) : -1;
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      if (ctx->input(i).dims() == 0) {
-        return errors::InvalidArgument(
-            "All inputs must have rank at least 1. Input ", i,
-            " has a rank of 0.");
-      } else if (ctx->input(i).dim_size(0) != *batch_size) {
-        return errors::InvalidArgument(
-            "All inputs must have the same dimension 0. Input ", i,
-            " has leading dimension ", ctx->input(i).dim_size(0),
-            ", while all previous inputs have leading dimension ", batch_size);
-      }
-    }
-    return Status::OK();
-  }
-
   void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
     ComputeOptions* compute_opts = nullptr;
 
@@ -150,8 +132,9 @@ class MapDefunOp : public AsyncOpKernel {
     // all calls to the function are complete. This struct also encapsulates
     // all the components that need to be passed to each MapFunctionCallFrame.
 
-    const std::vector<Tensor> args;
+    OpInputList args;
     const std::vector<TensorShape> arg_shapes;
+    OpInputList captured_inputs;
     const int64 batch_size;
 
     // Output of a compute call
@@ -161,26 +144,31 @@ class MapDefunOp : public AsyncOpKernel {
 
     // Create a copy of output_shapes because every `Compute` may expect a
     // different output shape.
-    ComputeOptions(std::vector<Tensor> args,
+    ComputeOptions(OpInputList args, OpInputList captured_inputs,
                    std::vector<TensorShape> arg_shapes, int64 batch_size,
                    const std::vector<PartialTensorShape>& output_shapes_attr)
-        : args(std::move(args)),
+        : args(args),
           arg_shapes(std::move(arg_shapes)),
+          captured_inputs(captured_inputs),
           batch_size(batch_size),
           output_shapes(output_shapes_attr) {}
   };
 
   // Get inputs to Compute and check that they are valid.
   Status SetupArgs(OpKernelContext* ctx, ComputeOptions** compute_opts) {
-    int64 batch_size =
-        ctx->input(0).dims() > 0 ? ctx->input(0).dim_size(0) : -1;
+    OpInputList arguments;
+    TF_RETURN_IF_ERROR(ctx->input_list("arguments", &arguments));
+    OpInputList captured_inputs;
+    TF_RETURN_IF_ERROR(ctx->input_list("captured_inputs", &captured_inputs));
+
+    int64 batch_size = arguments[0].dims() > 0 ? arguments[0].dim_size(0) : -1;
 
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      if (ctx->input(i).dims() == 0) {
+    for (size_t i = 0; i < arguments.size(); ++i) {
+      if (arguments[i].dims() == 0) {
         return errors::InvalidArgument(
             "All inputs must have rank at least 1. Input ", i,
             " has a rank of 0.");
-      } else if (ctx->input(i).dim_size(0) != batch_size) {
+      } else if (arguments[i].dim_size(0) != batch_size) {
         return errors::InvalidArgument(
             "All inputs must have the same dimension 0. Input ", i,
             " has leading dimension ", ctx->input(i).dim_size(0),
@@ -188,19 +176,17 @@ class MapDefunOp : public AsyncOpKernel {
       }
     }
 
-    std::vector<Tensor> args;
     std::vector<TensorShape> arg_shapes;
-    args.reserve(ctx->num_inputs());
-    arg_shapes.reserve(ctx->num_inputs());
+    arg_shapes.reserve(arguments.size());
 
-    for (size_t i = 0; i < ctx->num_inputs(); ++i) {
-      args.push_back(ctx->input(i));
-      arg_shapes.push_back(ctx->input(i).shape());
+    for (size_t i = 0; i < arguments.size(); ++i) {
+      arg_shapes.push_back(arguments[i].shape());
       arg_shapes.at(i).RemoveDim(0);
     }
 
-    *compute_opts = new ComputeOptions(std::move(args), std::move(arg_shapes),
-                                       batch_size, output_shapes_);
+    *compute_opts =
+        new ComputeOptions(arguments, captured_inputs, std::move(arg_shapes),
+                           batch_size, output_shapes_);
     return Status::OK();
   }
 
@@ -235,12 +221,21 @@ class MapDefunOp : public AsyncOpKernel {
     }
 
     Status GetArg(int index, Tensor* val) const override {
-      if (index < 0 || index >= compute_opts_->args.size()) {
+      if (index < 0 || index >= compute_opts_->args.size() +
+                                    compute_opts_->captured_inputs.size()) {
         return errors::InvalidArgument(
             "Mismatch in number of function inputs.");
       }
+
+      if (index >= compute_opts_->args.size()) {
+        // The function is calling for a captured input
+        *val =
+            compute_opts_->captured_inputs[index - compute_opts_->args.size()];
+        return Status::OK();
+      }
+
       bool result =
-          val->CopyFrom(compute_opts_->args.at(index).Slice(iter_, iter_ + 1),
+          val->CopyFrom(compute_opts_->args[index].Slice(iter_, iter_ + 1),
                         compute_opts_->arg_shapes.at(index));
       if (!result) {
         return errors::Internal("GetArg failed.");
@@ -248,7 +243,6 @@ class MapDefunOp : public AsyncOpKernel {
         // Ensure alignment
         *val = tensor::DeepCopy(*val);
       }
-
       return Status::OK();
     }
 
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 889a6a4640..ec22eee874 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -903,14 +903,18 @@ REGISTER_OP("ModelDataset")
 
 REGISTER_OP("MapDefun")
     .Input("arguments: Targuments")
+    .Input("captured_inputs: Tcaptured")
     .Output("output: output_types")
     .Attr("Targuments: list(type) >= 1")
+    .Attr("Tcaptured: list(type) >= 0 = []")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("f: func")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       std::vector<PartialTensorShape> output_shapes;
       TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes));
+      DataTypeVector t_args;
+      TF_RETURN_IF_ERROR(c->GetAttr("Targuments", &t_args));
       if (output_shapes.size() != c->num_outputs()) {
         return errors::InvalidArgument(
             "`output_shapes` must be the same length as `output_types` (",
@@ -918,10 +922,11 @@ REGISTER_OP("MapDefun")
       }
 
       int64 dim_zero = -1;
-      for (size_t i = 0; i < static_cast<size_t>(c->num_inputs()); ++i) {
+      for (size_t i = 0; i < t_args.size(); ++i) {
         if (c->Rank(c->input(i)) == 0) {
           return errors::InvalidArgument(
-              "Inputs must have rank at least 1. Input ", i, " has rank of 0");
+              "Arguments must have rank at least 1. Input ", i,
+              " has rank of 0.");
         }
         auto dim_handle = c->Dim(c->input(i), 0);
         if (c->ValueKnown(dim_handle)) {
@@ -929,7 +934,7 @@ REGISTER_OP("MapDefun")
             dim_zero = c->Value(dim_handle);
           } else if (c->Value(dim_handle) != dim_zero) {
             return errors::InvalidArgument(
-                "Inputs must have the same dimension 0.");
+                "Arguments must have the same dimension 0.");
           }
         }
       }
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 612ee332c4..ae9dedb0ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -235,6 +235,18 @@ class MapDefunTest(test_base.DatasetTestBase):
       sess.close()
       thread.join()
 
+  def testMapDefunWithCapturedInputs(self):
+    c = constant_op.constant(2)
+
+    @function.Defun(dtypes.int32)
+    def fn(x):
+      return x + c
+
+    x = constant_op.constant([1, 2, 3, 4])
+    map_defun_op = map_defun.map_defun(fn, [x], [dtypes.int32], [()])[0]
+    expected = x + c
+    self.assertAllEqual(self.evaluate(expected), self.evaluate(map_defun_op))
+
 
 class MapDefunBenchmark(test.Benchmark):
 
diff --git a/tensorflow/python/data/experimental/ops/map_defun.py b/tensorflow/python/data/experimental/ops/map_defun.py
index 3d0d0993c9..3ac1158d8b 100644
--- a/tensorflow/python/data/experimental/ops/map_defun.py
+++ b/tensorflow/python/data/experimental/ops/map_defun.py
@@ -47,10 +47,12 @@ def map_defun(fn, elems, output_dtypes, output_shapes):
   if not isinstance(elems, list):
     raise ValueError("`elems` must be a list of tensors.")
   if not isinstance(output_dtypes, list):
-    raise ValueError("`output_dtypes` must be a list of tensors.")
+    raise ValueError("`output_dtypes` must be a list of `tf.DType` objects.")
   if not isinstance(output_shapes, list):
-    raise ValueError("`output_shapes` must be a list of tensors.")
+    raise ValueError("`output_shapes` must be a list of `tf.TensorShape` "
+                     "objects.")
 
   elems = [ops.convert_to_tensor(e) for e in elems]
   output_shapes = [tensor_shape.TensorShape(s) for s in output_shapes]
-  return gen_dataset_ops.map_defun(elems, output_dtypes, output_shapes, fn)
+  return gen_dataset_ops.map_defun(elems, fn.captured_inputs, output_dtypes,
+                                   output_shapes, fn)
-- 
GitLab


From b949f9ee60522ca43f7f8a89b15ea6eeed2ac570 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 4 Oct 2018 13:14:07 -0700
Subject: [PATCH 0379/1085] Enable masking through a Sequential model.

PiperOrigin-RevId: 215790636
---
 tensorflow/python/keras/engine/input_layer.py |  1 +
 .../python/keras/engine/topology_test.py      | 31 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index 8a4018a0df..6a69d0ed90 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -82,6 +82,7 @@ class InputLayer(base_layer.Layer):
     self.built = True
     self.sparse = sparse
     self.batch_size = batch_size
+    self.supports_masking = True
 
     if isinstance(input_shape, tensor_shape.TensorShape):
       input_shape = tuple(input_shape.as_list())
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index a0da96334b..b4488033cd 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
+from tensorflow.python.training import rmsprop
 
 try:
   import yaml  # pylint:disable=g-import-not-at-top
@@ -1182,6 +1183,36 @@ class DefaultShapeInferenceBehaviorTest(test.TestCase):
     output = model(sample_input)
     self.assertEqual(output.shape, (1, 3))
 
+  @test_util.run_in_graph_and_eager_modes()
+  def test_sequential_as_downstream_of_masking_layer(self):
+    inputs = keras.layers.Input(shape=(3, 4))
+    x = keras.layers.Masking(mask_value=0., input_shape=(3, 4))(inputs)
+
+    s = keras.Sequential()
+    s.add(keras.layers.Dense(5, input_shape=(4,)))
+
+    x = keras.layers.wrappers.TimeDistributed(s)(x)
+    model = keras.Model(inputs=inputs, outputs=x)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(1e-3), loss='mse')
+
+    model_input = np.random.randint(
+        low=1, high=5, size=(10, 3, 4)).astype('float32')
+    for i in range(4):
+      model_input[i, i:, :] = 0.
+    model.fit(model_input,
+              np.random.random((10, 3, 5)), epochs=1, batch_size=6)
+
+    if not context.executing_eagerly():
+      # Note: this doesn't work in eager due to DeferredTensor/ops compatibility
+      # issue.
+      mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)]
+      mask_outputs += [model.layers[2].compute_mask(
+          model.layers[2].input, mask_outputs[-1])]
+      func = keras.backend.function([model.input], mask_outputs)
+      mask_outputs_val = func([model_input])
+      self.assertAllClose(mask_outputs_val[0], np.any(model_input, axis=-1))
+      self.assertAllClose(mask_outputs_val[1], np.any(model_input, axis=-1))
+
 
 class GraphUtilsTest(test.TestCase):
 
-- 
GitLab


From ddb815c5fab23237c7561eacd36aaab0e8c34d47 Mon Sep 17 00:00:00 2001
From: mdfaijul <md.faijul.amin@intel.com>
Date: Thu, 4 Oct 2018 13:20:27 -0700
Subject: [PATCH 0380/1085] Fixed style with clang-format[llvm-3.9.0]

---
 tensorflow/core/graph/mkl_layout_pass.cc | 345 ++++++++++-------------
 1 file changed, 145 insertions(+), 200 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index a3cff18535..44b52303e3 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -299,124 +299,90 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
-    rinfo_.push_back(
-        {csinfo_.addn,  mkl_op_registry::GetMklOpName(csinfo_.addn),
-         CopyAttrsAddN, AddNRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.add,       mkl_op_registry::GetMklOpName(csinfo_.add),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.avg_pool, mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-         CopyAttrsPooling, AlwaysRewrite,
-         nullptr});
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite, nullptr});
+    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.avg_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling,
-                      AlwaysRewrite,
-                      nullptr});
+                      CopyAttrsPooling, AlwaysRewrite, nullptr});
     // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending
     // on if context contains Conv2D.
     rinfo_.push_back({csinfo_.bias_add_grad,
                       csinfo_.mkl_conv2d_with_bias_backprop_bias,
-                      CopyAttrsBiasAddGrad,
-                      ContextMatchRewrite,
+                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
                       &biasaddgrad_conv2dwithbias_context_});
     // BiasAddGrad gets written into BiasAddGrad depending on if context
     // contains MatMul.
-    rinfo_.push_back({csinfo_.bias_add_grad,       csinfo_.matmul,
-                      CopyAttrsBiasAddGrad,        ContextMatchRewrite,
+    rinfo_.push_back({csinfo_.bias_add_grad, csinfo_.matmul,
+                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
                       &biasaddgrad_matmul_context_});
-    rinfo_.push_back(
-        {csinfo_.concat,  mkl_op_registry::GetMklOpName(csinfo_.concat),
-         CopyAttrsConcat, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.concatv2,  mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-         CopyAttrsConcatV2, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.conv2d,  mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-         CopyAttrsConv2D, AlwaysRewrite,
-         nullptr});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv2D,
-                      AlwaysRewrite,
-                      nullptr});
+                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv2D,
-                      AlwaysRewrite,
-                      nullptr});
+                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
 
     rinfo_.push_back({csinfo_.fused_batch_norm,
                       mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
-                      CopyAttrsFusedBatchNorm,
-                      AlwaysRewrite,
-                      nullptr});
-    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
-                      mkl_op_registry::GetMklOpName(
-                          csinfo_.fused_batch_norm_grad),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
     rinfo_.push_back(
-        {csinfo_.identity,  mkl_op_registry::GetMklOpName(csinfo_.identity),
-         CopyAttrsIdentity, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back({csinfo_.lrn,  mkl_op_registry::GetMklOpName(csinfo_.lrn),
-                      CopyAttrsLRN, AlwaysRewrite,
-                      nullptr});
-    rinfo_.push_back(
-        {csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-         CopyAttrsLRN,     AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-         CopyAttrsPooling, NonDepthBatchWisePoolRewrite,
-         nullptr});
+        {csinfo_.fused_batch_norm_grad,
+         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+         CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsIdentity, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
+                      CopyAttrsLRN, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr});
     rinfo_.push_back({csinfo_.max_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling,
-                      AlwaysRewrite,
-                      nullptr});
-    rinfo_.push_back(
-        {csinfo_.maximum,   mkl_op_registry::GetMklOpName(csinfo_.maximum),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.mul,       mkl_op_registry::GetMklOpName(csinfo_.mul),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.relu,      mkl_op_registry::GetMklOpName(csinfo_.relu),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.relu_grad, mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
-    rinfo_.push_back(
-        {csinfo_.reshape,  mkl_op_registry::GetMklOpName(csinfo_.reshape),
-         CopyAttrsReshape, AlwaysRewrite,
-         nullptr});
+                      CopyAttrsPooling, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.squared_difference,
                       mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType,
-                      AlwaysRewrite,
-                      nullptr});
-    rinfo_.push_back(
-        {csinfo_.sub,       mkl_op_registry::GetMklOpName(csinfo_.sub),
-         CopyAttrsDataType, AlwaysRewrite,
-         nullptr});
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
     wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
 
     // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add,
-                      0,                  csinfo_.mkl_conv2d_with_bias});
+    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add, 0,
+                      csinfo_.mkl_conv2d_with_bias});
 
     biasaddgrad_matmul_context_ = {csinfo_.bias_add_grad, csinfo_.matmul,
                                    IsBiasAddGradInMatMulContext};
@@ -1192,7 +1158,7 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
     for (const Edge* e : filter_node->out_edges()) {
       if (e->dst()->type_string() == csinfo_.mkl_conv2d &&
           e->dst_input() == kConv2DFilterInputSlotIdx
-              /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
           VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
                   << " feeding multiple Conv2D nodes: "
@@ -2107,7 +2073,7 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
   // BiasAddGrad is not an Mkl layer, so we make an exception for it.
   if (n->type_string() != csinfo_.bias_add_grad) {
     if (!mkl_op_registry::IsMklOp(
-             mkl_op_registry::GetMklOpName(n->type_string()), T)) {
+            mkl_op_registry::GetMklOpName(n->type_string()), T)) {
       return nullptr;
     }
   }
@@ -2527,125 +2493,109 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
-    rinfo_.push_back(
-        {csinfo_.addn,  mkl_op_registry::GetMklOpName(csinfo_.addn),
-         CopyAttrsAddN, AddNRewrite});
-    rinfo_.push_back(
-        {csinfo_.add,       mkl_op_registry::GetMklOpName(csinfo_.add),
-         CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.avg_pool, mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-         CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
+                      CopyAttrsAddN, AddNRewrite});
+    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
+                      CopyAttrsPooling, AlwaysRewrite});
     rinfo_.push_back({csinfo_.avg_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling,
-                      AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.avg_pool3d, mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d),
-         CopyAttrsPooling,   AlwaysRewrite});
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.avg_pool3d,
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d),
+                      CopyAttrsPooling, AlwaysRewrite});
     rinfo_.push_back({csinfo_.avg_pool3d_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d_grad),
-                      CopyAttrsPooling,
-                      AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.concat,  mkl_op_registry::GetMklOpName(csinfo_.concat),
-         CopyAttrsConcat, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.concatv2,  mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-         CopyAttrsConcatV2, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.conv2d, mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-         CopyAttrsConv,  AlwaysRewrite});
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concat,
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
+                      CopyAttrsConcat, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.concatv2,
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
+                      CopyAttrsConcatV2, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv2d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_with_bias, csinfo_.mkl_conv2d_with_bias,
-                      CopyAttrsConv,            AlwaysRewrite});
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv,
-                      AlwaysRewrite});
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_filter_with_bias,
-                      csinfo_.mkl_conv2d_grad_filter_with_bias,
-                      CopyAttrsConv,
+                      csinfo_.mkl_conv2d_grad_filter_with_bias, CopyAttrsConv,
                       AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv2d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv,
-                      AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.conv3d, mkl_op_registry::GetMklOpName(csinfo_.conv3d),
-         CopyAttrsConv,  AlwaysRewrite});
+                      CopyAttrsConv, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.conv3d,
+                      mkl_op_registry::GetMklOpName(csinfo_.conv3d),
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv3d_grad_filter,
                       mkl_op_registry::GetMklOpName(csinfo_.conv3d_grad_filter),
-                      CopyAttrsConv,
-                      AlwaysRewrite});
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.conv3d_grad_input,
                       mkl_op_registry::GetMklOpName(csinfo_.conv3d_grad_input),
-                      CopyAttrsConv,
-                      AlwaysRewrite});
+                      CopyAttrsConv, AlwaysRewrite});
     rinfo_.push_back({csinfo_.fused_batch_norm,
                       mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
-                      CopyAttrsFusedBatchNorm,
-                      AlwaysRewrite});
-    rinfo_.push_back({csinfo_.fused_batch_norm_grad,
-                      mkl_op_registry::GetMklOpName(
-                          csinfo_.fused_batch_norm_grad),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite});
     rinfo_.push_back(
-        {csinfo_.identity,  mkl_op_registry::GetMklOpName(csinfo_.identity),
-         CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.lrn,  mkl_op_registry::GetMklOpName(csinfo_.lrn),
+        {csinfo_.fused_batch_norm_grad,
+         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
+         CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.identity,
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
                       CopyAttrsLRN, LrnRewrite});
-    rinfo_.push_back(
-        {csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-         CopyAttrsLRN,     LrnGradRewrite});
-    rinfo_.push_back(
-        {csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-         CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
+    rinfo_.push_back({csinfo_.lrn_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
+                      CopyAttrsLRN, LrnGradRewrite});
+    rinfo_.push_back({csinfo_.max_pool,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
     rinfo_.push_back({csinfo_.max_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling,
-                      MaxpoolGradRewrite});
-    rinfo_.push_back(
-        {csinfo_.max_pool3d, mkl_op_registry::GetMklOpName(csinfo_.max_pool3d),
-         CopyAttrsPooling,   NonDepthBatchWisePoolRewrite});
+                      CopyAttrsPooling, MaxpoolGradRewrite});
+    rinfo_.push_back({csinfo_.max_pool3d,
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool3d),
+                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
     rinfo_.push_back({csinfo_.max_pool3d_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool3d_grad),
-                      CopyAttrsPooling,
-                      AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.maximum,   mkl_op_registry::GetMklOpName(csinfo_.maximum),
-         CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.mul,       mkl_op_registry::GetMklOpName(csinfo_.mul),
-         CopyAttrsDataType, AlwaysRewrite});
+                      CopyAttrsPooling, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite});
 #ifdef INTEL_MKL_QUANTIZED
     rinfo_.push_back({csinfo_.quantized_avg_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_avg_pool),
-                      CopyAttrsQuantizedPooling,
-                      AlwaysRewrite});
+                      CopyAttrsQuantizedPooling, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_concatv2,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_concatv2),
-                      CopyAttrsConcatV2,
-                      AlwaysRewrite});
+                      CopyAttrsConcatV2, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d),
-                      CopyAttrsQuantizedConv2D,
-                      AlwaysRewrite});
+                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_requantize),
                       CopyAttrsQuantizedConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.quantized_conv2d_with_bias,
-                      mkl_op_registry::GetMklOpName(
-                          csinfo_.quantized_conv2d_with_bias),
-                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_with_bias,
+         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_with_bias),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_and_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_bias_and_requantize),
                       CopyAttrsQuantizedConv2D, AlwaysRewrite});
-    rinfo_.push_back({csinfo_.quantized_conv2d_and_relu,
-                      mkl_op_registry::GetMklOpName(
-                          csinfo_.quantized_conv2d_and_relu),
-                      CopyAttrsQuantizedConv2D, AlwaysRewrite});
+    rinfo_.push_back(
+        {csinfo_.quantized_conv2d_and_relu,
+         mkl_op_registry::GetMklOpName(csinfo_.quantized_conv2d_and_relu),
+         CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_and_relu_and_requantize,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_and_relu_and_requantize),
@@ -2661,8 +2611,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
          CopyAttrsQuantizedConv2D, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_max_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.quantized_max_pool),
-                      CopyAttrsQuantizedPooling,
-                      AlwaysRewrite});
+                      CopyAttrsQuantizedPooling, AlwaysRewrite});
     rinfo_.push_back({csinfo_.quantized_conv2d_with_bias_sum_and_relu,
                       mkl_op_registry::GetMklOpName(
                           csinfo_.quantized_conv2d_with_bias_sum_and_relu),
@@ -2678,16 +2627,15 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
              csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize),
          CopyAttrsQuantizedConv2D, AlwaysRewrite});
 #endif
-    rinfo_.push_back(
-        {csinfo_.relu,      mkl_op_registry::GetMklOpName(csinfo_.relu),
-         CopyAttrsDataType, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.relu_grad, mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.relu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite});
 #ifdef INTEL_MKL_QUANTIZED
-    rinfo_.push_back(
-        {csinfo_.requantize,  mkl_op_registry::GetMklOpName(csinfo_.requantize),
-         CopyAttrsRequantize, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.requantize,
+                      mkl_op_registry::GetMklOpName(csinfo_.requantize),
+                      CopyAttrsRequantize, AlwaysRewrite});
 #endif
     /*
     rinfo_.push_back({csinfo_.tanh,
@@ -2697,23 +2645,21 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
                       mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
                       CopyAttrsDataType, AlwaysRewrite});
     */
-    rinfo_.push_back(
-        {csinfo_.reshape,  mkl_op_registry::GetMklOpName(csinfo_.reshape),
-         CopyAttrsReshape, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.slice,  mkl_op_registry::GetMklOpName(csinfo_.slice),
-         CopyAttrsSlice, AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.softmax,   mkl_op_registry::GetMklOpName(csinfo_.softmax),
-         CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.reshape,
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
+                      CopyAttrsReshape, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.slice,
+                      mkl_op_registry::GetMklOpName(csinfo_.slice),
+                      CopyAttrsSlice, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.softmax,
+                      mkl_op_registry::GetMklOpName(csinfo_.softmax),
+                      CopyAttrsDataType, AlwaysRewrite});
 
     rinfo_.push_back({csinfo_.squared_difference,
                       mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType,
-                      AlwaysRewrite});
-    rinfo_.push_back(
-        {csinfo_.sub,       mkl_op_registry::GetMklOpName(csinfo_.sub),
-         CopyAttrsDataType, AlwaysRewrite});
+                      CopyAttrsDataType, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
@@ -2722,11 +2668,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
         {csinfo_.max_pool3d, csinfo_.max_pool3d_grad, 0, 1, 1, 3});
 
     // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.conv2d,           csinfo_.bias_add,
+    minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add,
                       csinfo_.conv2d_with_bias, GetConv2DOrBiasAdd});
 
-    minfo_.push_back({csinfo_.conv2d_grad_filter,
-                      csinfo_.bias_add_grad,
+    minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad,
                       csinfo_.conv2d_grad_filter_with_bias,
                       GetConv2DBackpropFilterOrBiasAddGrad});
   }
@@ -3506,7 +3451,7 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
       if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
            e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
           e->dst_input() == kConv2DFilterInputSlotIdx
-              /* filter is 2nd input of Conv2D and _MklConv2D. */) {
+          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
           VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
                   << " feeding multiple Conv2D nodes: "
-- 
GitLab


From 23a698e670a10eff362c575eb1297c2b4f0bbe11 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 13:18:18 -0700
Subject: [PATCH 0381/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 215791283
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 88 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 26 ++++++
 2 files changed, 114 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 33f18ae13f..780c6f6448 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -30566,6 +30566,52 @@ op {
     type: "func"
   }
 }
+op {
+  name: "MapDefun"
+  input_arg {
+    name: "arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "captured_inputs"
+    type_list_attr: "Tcaptured"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "output_types"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "Tcaptured"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+}
 op {
   name: "MapIncompleteSize"
   output_arg {
@@ -71843,6 +71889,48 @@ op {
     }
   }
 }
+op {
+  name: "Substr"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  input_arg {
+    name: "pos"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "len"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
+}
 op {
   name: "Sum"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0e58a9475d..0d8997c1bd 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15262,6 +15262,10 @@ op {
     name: "arguments"
     type_list_attr: "Targuments"
   }
+  input_arg {
+    name: "captured_inputs"
+    type_list_attr: "Tcaptured"
+  }
   output_arg {
     name: "output"
     type_list_attr: "output_types"
@@ -15272,6 +15276,15 @@ op {
     has_minimum: true
     minimum: 1
   }
+  attr {
+    name: "Tcaptured"
+    type: "list(type)"
+    default_value {
+      list {
+      }
+    }
+    has_minimum: true
+  }
   attr {
     name: "output_types"
     type: "list(type)"
@@ -33748,6 +33761,19 @@ op {
       }
     }
   }
+  attr {
+    name: "unit"
+    type: "string"
+    default_value {
+      s: "BYTE"
+    }
+    allowed_values {
+      list {
+        s: "BYTE"
+        s: "UTF8_CHAR"
+      }
+    }
+  }
 }
 op {
   name: "Sum"
-- 
GitLab


From 589e876139f4c7fbdf96edaa16fdcfe12c7a4b03 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 4 Oct 2018 13:20:58 -0700
Subject: [PATCH 0382/1085] Error out when PartitionedCall is created with the
 wrong number of arguments.

(used to be a segfault)

PiperOrigin-RevId: 215791737
---
 tensorflow/core/kernels/partitioned_function_ops.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index fdb4c84c46..3979e4b53a 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -97,6 +97,13 @@ class PartitionedCallOp : public AsyncOpKernel {
         OP_REQUIRES_ASYNC(ctx, fbody != nullptr,
                           errors::Internal("Could not find handle ", handle),
                           done);
+        OP_REQUIRES_ASYNC(
+            ctx, args.size() == fbody->arg_nodes.size(),
+            errors::InvalidArgument(
+                "Wrong number of arguments to the op; function expects ",
+                fbody->arg_nodes.size(), " but PartitionedCall received ",
+                args.size()),
+            done);
         // We need to pass global op_registry as default_registry when creating
         // graph. So that graph optimization passes can lookup all possible ops
         // by name.
-- 
GitLab


From 9e8c7afa5867bd19b6684458566b064148b2665b Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Thu, 4 Oct 2018 13:34:31 -0700
Subject: [PATCH 0383/1085] Add TF_BUILD_TEST_TIMEOUT to
 ci_parameterized_build.sh

PiperOrigin-RevId: 215793932
---
 .../tools/ci_build/ci_parameterized_build.sh  | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 99bdedf7b4..fdff867ff0 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -83,6 +83,9 @@
 #                     Use the specified configurations when building.
 #                     When set, overrides TF_BUILD_IS_OPT and TF_BUILD_MAVX
 #                     options, as this will replace the two.
+#   TF_BUILD_TEST_TIMEOUT:
+#                     Sets the value of bazel --test_timeout, defaults to -1
+#                     which uses the bazel defaults.
 #   TF_SKIP_CONTRIB_TESTS:
 #                     If set to any non-empty or non-0 value, will skip running
 #                     contrib tests.
@@ -125,6 +128,8 @@ NO_DOCKER_OPT_FLAG="--genrule_strategy=standalone"
 
 DO_DOCKER=1
 
+# Bazel uses defaults for all test sizes when given `-1`.
+TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}
 
 # Helpful flags:
 # --test_summary=detailed: Tell us more about which targets are being built
@@ -132,7 +137,16 @@ DO_DOCKER=1
 # --build_tests_only: Don't build targets depended on by tests if the test is
 #                     disabled. Also saves some compilation time. Otherwise,
 #                     tries to build everything.
-BAZEL_TEST_FLAGS="--test_summary=detailed --build_tests_only --keep_going"
+# --test_timeout: Test timeouts in the order short,moderate,long,eternal.
+# --test_env: Environment variables to set when running bazel tests. These are
+#             especially important when using --run_under with
+#             parallel_gpu_execute.
+BAZEL_TEST_FLAGS=""\
+"--test_summary=detailed --build_tests_only --keep_going "\
+"--test_timeout=${TF_BUILD_TEST_TIMEOUT} "\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
 BAZEL_BUILD_FLAGS="--keep_going"
 
 BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}"
@@ -148,13 +162,6 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
-# Environment variables to set when running bazel tests.  These are especially
-# important when using --run_under with parallel_gpu_execute.
-BAZEL_TEST_ENV=""\
-"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
-"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
-"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
-
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -415,11 +422,11 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
       "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
 "--run_under=${PARALLEL_GPU_TEST_CMD} "\
 "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
-- 
GitLab


From 9f2d1e2cf6be4a17b6318b429447a71d9d48af32 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 13:35:31 -0700
Subject: [PATCH 0384/1085] Few more fixes for issued in parsing invalid HLO
 module proto.

PiperOrigin-RevId: 215794086
---
 tensorflow/compiler/xla/literal.cc                |  8 ++++----
 .../compiler/xla/service/hlo_instruction.cc       |  4 ++--
 .../compiler/xla/service/hlo_parser_test.cc       |  2 +-
 tensorflow/compiler/xla/service/hlo_sharding.cc   | 15 +++++++++++++++
 tensorflow/compiler/xla/shape_util.cc             |  7 ++-----
 5 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 177f39cc74..656ce720a1 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -1945,11 +1945,11 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) {
       }
     } break;
     case TUPLE:
-      LOG(FATAL) << "Should not be called on tuple shapes: "
-                 << ShapeUtil::HumanString(subshape());
-      break;
+      return InvalidArgument("Should not be called on tuple shapes: %s",
+                             ShapeUtil::HumanString(subshape()));
     default:
-      LOG(FATAL) << "Unhandled primitive type " << subshape().element_type();
+      return InvalidArgument("Is called on unsupported shape: %s",
+                             ShapeUtil::HumanString(subshape()));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index fb91adc302..2f6db7cd7c 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -465,8 +465,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       break;
     }
     case HloOpcode::kIota:
-      TF_RET_CHECK(proto.dimensions_size() <= 1)
-          << "Iota instruction should have at most 1 dimension but sees "
+      TF_RET_CHECK(proto.dimensions_size() == 1)
+          << "Iota instruction should have 1 dimension but sees "
           << proto.dimensions_size();
       instruction = CreateIota(proto.shape(), proto.dimensions(0));
       break;
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index b618510640..255123d331 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1304,7 +1304,7 @@ TEST_F(HloParserTest, MoreConstants) {
 
 ENTRY %SelectScalarS32True.v4 () -> s32[] {
   %constant.2 = pred[] constant(true)
-  %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,3]1,2,3,4}
+  %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,2]1,2,3,4}
   %constant = s32[] constant(42)
   %select = s32[] select(pred[] %constant.2, s32[] %constant.1, s32[] %constant)
 }
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 94c7bafd3b..188f4acc79 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/overflow_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace xla {
@@ -377,6 +378,20 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
       << "Maximal sharding is expected to have single device assignment, but "
       << proto.tile_assignment_devices().size() << " has provided.";
 
+  TF_RET_CHECK(proto.tile_assignment_devices().size() > 1);
+  TF_RET_CHECK(!proto.tile_assignment_dimensions().empty());
+
+  // RE: the product of tile assignment tensor dimensions must be
+  // equal to tile_assignment_devices.size().
+  int64 product_of_dimensions = 1;
+  for (auto dimension : proto.tile_assignment_dimensions()) {
+    TF_RET_CHECK(dimension > 0);
+    product_of_dimensions =
+        MultiplyWithoutOverflow(product_of_dimensions, dimension);
+    TF_RET_CHECK(product_of_dimensions > 0);
+  }
+  TF_RET_CHECK(product_of_dimensions == proto.tile_assignment_devices().size());
+
   // Some versions of gcc cannot infer the TileAssignment constructor from a
   // braced initializer-list, so create one manually.
   std::vector<int64> devices(proto.tile_assignment_devices().begin(),
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 476a9fe868..d244923532 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -869,11 +869,8 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
     return Status::OK();
   }
 
-  if (Rank(shape) != shape.dimensions_size()) {
-    return InvalidArgument(
-        "shape's rank is mismatched with dimension count; rank=%d "
-        "dimensions_size=%d",
-        Rank(shape), shape.dimensions_size());
+  if (LayoutUtil::IsSparseArray(shape) && Rank(shape) == 0) {
+    return InvalidArgument("sparse arrays must have rank > 0");
   }
   for (int64 i = 0; i < Rank(shape); ++i) {
     int64 dimension = shape.dimensions(i);
-- 
GitLab


From d96e073e77929006c519cd3082461d9757865dd7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 4 Oct 2018 13:42:48 -0700
Subject: [PATCH 0385/1085] [TF:XLA] Fix inverted condition in randomized test.

PiperOrigin-RevId: 215795518
---
 tensorflow/compiler/tests/randomized_tests.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 7a96f4c25c..dc119fb0f8 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -1820,7 +1820,7 @@ TEST_F(OpTest, Diag) {
     do {
       dims = RandomDims(1);
       size = TensorShape(dims).num_elements();
-    } while (size * size < tf_xla_max_tensor_size);
+    } while (size * size > tf_xla_max_tensor_size);
     return ExpectTfAndXlaOutputsAreClose(
         OpTestBuilder("Diag").RandomInput(type, dims).Attr("T", type));
   });
-- 
GitLab


From 08ecc62a38dc58e85cb46ad281486d1c75b1db9b Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Thu, 4 Oct 2018 13:43:31 -0700
Subject: [PATCH 0386/1085] [TF:XLA] Improve the accounting for subcomputations
 in the List scheduler to avoid double-counting.

PiperOrigin-RevId: 215795640
---
 .../xla/service/hlo_memory_scheduler.cc       | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index bf30764488..5cee865b7a 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -195,13 +195,15 @@ class ListScheduler {
     return entry;
   }
 
-  // Returns the number of bytes freed if the HLO instruction is scheduled.
-  // If the instruction calls subcomputations, we count the memory used by the
-  // subcomputations as memory "defined" by the instruction. This is not
-  // entirely accurate, because subcomputation memory will be freed after the
-  // instruction finishes. But it is more accurate than not taking
-  // subcomputations into account at all. In the future, we may improve
-  // accounting for subcomputation memory (b/65409243).
+  // Returns the number of bytes freed *after* the HLO instruction finishes.
+  // The current List algorithm only considers two states for an instruction:
+  // right before it runs, and after it finishes. We don't represent memory
+  // usage during the execution of an instruction. But if the instruction calls
+  // subcomputations, they are only live during the instruction's execution.
+  // We end up counting the memory used by subcomputations as memory "defined"
+  // by the instruction. This is not entirely accurate, but it is more accurate
+  // than not taking subcomputations into account at all. In the future, we may
+  // improve accounting for subcomputation memory (b/65409243).
   int64 BytesFreedIfScheduled(const ReadyListEntry& entry) {
     int64 freed_bytes = 0;
     for (const auto& kv : entry.used_buffer_unscheduled_use_counts) {
@@ -223,7 +225,18 @@ class ListScheduler {
         }
       }
     }
-    return freed_bytes - entry.bytes_defined - max_subcomputation_bytes;
+    int64 bytes_defined;
+    if (max_subcomputation_bytes > 0 &&
+        (entry.instruction->opcode() == HloOpcode::kWhile ||
+         entry.instruction->opcode() == HloOpcode::kCall ||
+         entry.instruction->opcode() == HloOpcode::kConditional)) {
+      // The output buffer of while/call/conditional is always aliased with the
+      // output buffer of the root instruction in the body. Don't double count.
+      bytes_defined = max_subcomputation_bytes;
+    } else {
+      bytes_defined = entry.bytes_defined + max_subcomputation_bytes;
+    }
+    return freed_bytes - bytes_defined;
   }
 
   // Constructs the scheduling priority of the given instruction.
-- 
GitLab


From 4c1da53840fed235409cb2c571ea081e28388f75 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 4 Oct 2018 13:53:19 -0700
Subject: [PATCH 0387/1085] Internal change.

PiperOrigin-RevId: 215797256
---
 tensorflow/python/kernel_tests/depthwise_conv_op_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 6aee2eb0a3..737a73f97a 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -131,7 +131,7 @@ class DepthwiseConv2DTest(test.TestCase):
     with self.session(graph=graph, use_gpu=use_gpu) as sess:
       tolerance = {
           dtypes.float16: 4e-2,
-          dtypes.float32: 1e-6,
+          dtypes.float32: 1e-5,
           dtypes.float64: 1e-12,
       }[data_type]
 
-- 
GitLab


From a2e48d849f5c7a97b788ba8d2499e95aaef95945 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 14:18:22 -0700
Subject: [PATCH 0388/1085] Fix problem in quantized version of Comparison op
 handler

PiperOrigin-RevId: 215801773
---
 tensorflow/contrib/lite/kernels/comparisons.cc   | 16 +++++-----------
 .../contrib/lite/kernels/comparisons_test.cc     | 11 +++++++++++
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/comparisons.cc b/tensorflow/contrib/lite/kernels/comparisons.cc
index f765235e04..3926af5b97 100644
--- a/tensorflow/contrib/lite/kernels/comparisons.cc
+++ b/tensorflow/contrib/lite/kernels/comparisons.cc
@@ -66,31 +66,25 @@ TfLiteStatus ComparisonPrepare(TfLiteContext* context, TfLiteNode* node) {
     if (input1->type == kTfLiteUInt8) {                                        \
       auto input1_offset = -input1->params.zero_point;                         \
       auto input2_offset = -input2->params.zero_point;                         \
-      const int left_shift = 20;                                               \
-      const double twice_max_input_scale =                                     \
-          2 * std::max(input1->params.scale, input2->params.scale);            \
-      const double real_input1_multiplier =                                    \
-          input1->params.scale / twice_max_input_scale;                        \
-      const double real_input2_multiplier =                                    \
-          input2->params.scale / twice_max_input_scale;                        \
+      const int left_shift = 8;                                                \
                                                                                \
       int32 input1_multiplier;                                                 \
       int input1_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier,              \
+      QuantizeMultiplierSmallerThanOneExp(input1->params.scale,                \
                                           &input1_multiplier, &input1_shift);  \
       int32 input2_multiplier;                                                 \
       int input2_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier,              \
+      QuantizeMultiplierSmallerThanOneExp(input2->params.scale,                \
                                           &input2_multiplier, &input2_shift);  \
                                                                                \
       ComparisonParams op_params;                                              \
       op_params.left_shift = left_shift;                                       \
       op_params.input1_offset = input1_offset;                                 \
       op_params.input1_multiplier = input1_multiplier;                         \
-      op_params.input1_shift = -input1_shift;                                  \
+      op_params.input1_shift = input1_shift;                                   \
       op_params.input2_offset = input2_offset;                                 \
       op_params.input2_multiplier = input2_multiplier;                         \
-      op_params.input2_shift = -input2_shift;                                  \
+      op_params.input2_shift = input2_shift;                                   \
       if (requires_broadcast) {                                                \
         reference_ops::Broadcast4DSlow##opname##WithScaling(                   \
             op_params, GetTensorShape(input1), GetTensorData<uint8_t>(input1), \
diff --git a/tensorflow/contrib/lite/kernels/comparisons_test.cc b/tensorflow/contrib/lite/kernels/comparisons_test.cc
index 67a91c17fd..04c8bf2e30 100644
--- a/tensorflow/contrib/lite/kernels/comparisons_test.cc
+++ b/tensorflow/contrib/lite/kernels/comparisons_test.cc
@@ -402,6 +402,17 @@ TEST(ComparisonsTest, GreaterQuantized) {
   EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, true, false));
 }
 
+TEST(ComparisonsTest, GreaterQuantizedSmallRange) {
+  ComparisonOpModel model({TensorType_UINT8, {1, 2, 2, 1}, 0.0, 1.0},
+                          {TensorType_UINT8, {1, 2, 2, 1}, 0.0, 2.0},
+                          TensorType_UINT8, BuiltinOperator_GREATER);
+  model.QuantizeAndPopulate<uint8_t>(model.input1(), {1.0, 0.5, 0.35, 0.1});
+  model.QuantizeAndPopulate<uint8_t>(model.input2(), {1.01, 0.25, 0.3, 0.4});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAre(false, true, true, false));
+}
+
 TEST(ComparisonsTest, GreaterEqualQuantized) {
   const float kMin = -1.f;
   const float kMax = 128.f;
-- 
GitLab


From b01ea7a51c07f6d2988d7f2aa117374591d1e25a Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 4 Oct 2018 14:18:58 -0700
Subject: [PATCH 0389/1085] Rename "Inliner" to "MapInliner".

PiperOrigin-RevId: 215801897
---
 tensorflow/compiler/xla/service/BUILD         | 69 +++++++++----------
 tensorflow/compiler/xla/service/cpu/BUILD     |  2 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  4 +-
 .../compiler/xla/service/interpreter/BUILD    |  2 +-
 .../xla/service/interpreter/compiler.cc       |  2 +-
 .../service/{inliner.cc => map_inliner.cc}    | 19 +++--
 .../xla/service/{inliner.h => map_inliner.h}  | 22 +++---
 .../{inliner_test.cc => map_inliner_test.cc}  | 20 +++---
 8 files changed, 68 insertions(+), 72 deletions(-)
 rename tensorflow/compiler/xla/service/{inliner.cc => map_inliner.cc} (87%)
 rename tensorflow/compiler/xla/service/{inliner.h => map_inliner.h} (59%)
 rename tensorflow/compiler/xla/service/{inliner_test.cc => map_inliner_test.cc} (95%)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2f8bab0614..4797cf3330 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1841,42 +1841,6 @@ tf_cc_test(
     ],
 )
 
-cc_library(
-    name = "inliner",
-    srcs = ["inliner.cc"],
-    hdrs = ["inliner.h"],
-    deps = [
-        ":hlo",
-        ":hlo_pass",
-        ":hlo_query",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
-tf_cc_test(
-    name = "inliner_test",
-    srcs = ["inliner_test.cc"],
-    deps = [
-        ":cpu_plugin",
-        ":hlo",
-        ":hlo_matchers",
-        ":inliner",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
-        "//tensorflow/compiler/xla/tests:literal_test_util",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "@com_google_absl//absl/memory",
-    ],
-)
-
 cc_library(
     name = "computation_placer",
     srcs = ["computation_placer.cc"],
@@ -3492,6 +3456,39 @@ cc_library(
     deps = ["//tensorflow/core:lib"],
 )
 
+cc_library(
+    name = "map_inliner",
+    srcs = ["map_inliner.cc"],
+    hdrs = ["map_inliner.h"],
+    deps = [
+        ":hlo",
+        ":hlo_pass",
+        ":hlo_query",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "map_inliner_test",
+    srcs = ["map_inliner_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_matchers",
+        ":map_inliner",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_verified_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 tf_cc_test(
     name = "hlo_casting_utils_test",
     srcs = ["hlo_casting_utils_test.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index ae4c6e962d..58abb330a6 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -94,6 +94,7 @@ cc_library(
         ":target_machine_features",
         "@com_google_absl//absl/types:span",
         "//tensorflow/compiler/tf2xla:cpu_function_runtime",
+        "//tensorflow/compiler/xla/service:map_inliner",
         "//tensorflow/compiler/xla/service:scatter_expander",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:protobuf_util",
@@ -127,7 +128,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
         "//tensorflow/compiler/xla/service:hlo_verifier",
         "//tensorflow/compiler/xla/service:indexed_array_analysis",
-        "//tensorflow/compiler/xla/service:inliner",
         "//tensorflow/compiler/xla/service:llvm_compiler",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index afc94f2185..5834f67285 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -86,8 +86,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h"
 #include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/indexed_array_analysis.h"
-#include "tensorflow/compiler/xla/service/inliner.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/scatter_expander.h"
@@ -249,7 +249,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       &pipeline, module->config().debug_options(),
       ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
-  pipeline.AddPass<Inliner>();
+  pipeline.AddPass<MapInliner>();
 
   // TODO(b/65775800): Fix wrong output bug in Call and remove the CallInliner
   // pass.
diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD
index 146c9052f1..1484e14df1 100644
--- a/tensorflow/compiler/xla/service/interpreter/BUILD
+++ b/tensorflow/compiler/xla/service/interpreter/BUILD
@@ -45,8 +45,8 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
         "//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
-        "//tensorflow/compiler/xla/service:inliner",
         "//tensorflow/compiler/xla/service:layout_assignment",
+        "//tensorflow/compiler/xla/service:map_inliner",
         "//tensorflow/compiler/xla/service:reshape_mover",
         "//tensorflow/compiler/xla/service:while_loop_simplifier",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 27fe89375d..7c79eb7d79 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -28,9 +28,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_pass_fix.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
 #include "tensorflow/compiler/xla/service/hlo_subcomputation_unification.h"
-#include "tensorflow/compiler/xla/service/inliner.h"
 #include "tensorflow/compiler/xla/service/interpreter/executable.h"
 #include "tensorflow/compiler/xla/service/layout_assignment.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "tensorflow/compiler/xla/status_macros.h"
diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/map_inliner.cc
similarity index 87%
rename from tensorflow/compiler/xla/service/inliner.cc
rename to tensorflow/compiler/xla/service/map_inliner.cc
index 50c408f5bb..2200ef054a 100644
--- a/tensorflow/compiler/xla/service/inliner.cc
+++ b/tensorflow/compiler/xla/service/map_inliner.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/inliner.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 
 #include <memory>
 #include <string>
@@ -32,10 +32,10 @@ limitations under the License.
 
 namespace xla {
 
-// InlinerVisitor traverses the HLO computation and inlines maps.
-class InlinerVisitor : public DfsHloVisitorWithDefault {
+// MapInlinerVisitor traverses the HLO computation and inlines maps.
+class MapInlinerVisitor : public DfsHloVisitorWithDefault {
  public:
-  explicit InlinerVisitor(HloComputation* computation)
+  explicit MapInlinerVisitor(HloComputation* computation)
       : computation_(computation) {}
 
   // Default visitor action is to do nothing and return OK.
@@ -49,24 +49,23 @@ class InlinerVisitor : public DfsHloVisitorWithDefault {
   StatusOr<bool> Run(HloComputation* computation);
 
  private:
-  // Current HloComputation instance the InlinerVisitor is traversing.
+  // Current HloComputation instance the MapInlinerVisitor is traversing.
   HloComputation* computation_;
 
   // Whether algebraic simplification has occurred.
   bool changed_ = false;
 };
 
-StatusOr<bool> InlinerVisitor::Run(HloComputation* computation) {
+StatusOr<bool> MapInlinerVisitor::Run(HloComputation* computation) {
   changed_ = false;
   computation_ = computation;
   TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(this));
   return changed_;
 }
 
-Status InlinerVisitor::HandleMap(HloInstruction* map) {
+Status MapInlinerVisitor::HandleMap(HloInstruction* map) {
   HloComputation* function = map->to_apply();
   HloInstruction& root = *function->root_instruction();
-  // TODO(b/29249531): Add DCE pass to remove unused HloComputations.
   // Only inlining functions that are simply a single operation until a better
   // profitability model for inlining is defined.
   if (hlo_query::AllOperandsAreParameters(root)) {
@@ -112,8 +111,8 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
   return Status::OK();
 }
 
-StatusOr<bool> Inliner::Run(HloModule* module) {
-  InlinerVisitor visitor(/*computation=*/nullptr);
+StatusOr<bool> MapInliner::Run(HloModule* module) {
+  MapInlinerVisitor visitor(/*computation=*/nullptr);
   bool changed = false;
   for (HloComputation* computation : module->computations()) {
     TF_ASSIGN_OR_RETURN(bool computation_changed, visitor.Run(computation));
diff --git a/tensorflow/compiler/xla/service/inliner.h b/tensorflow/compiler/xla/service/map_inliner.h
similarity index 59%
rename from tensorflow/compiler/xla/service/inliner.h
rename to tensorflow/compiler/xla/service/map_inliner.h
index e20af08fb7..b679118118 100644
--- a/tensorflow/compiler/xla/service/inliner.h
+++ b/tensorflow/compiler/xla/service/map_inliner.h
@@ -13,27 +13,27 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
 
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
 namespace xla {
 
-// A pass which performs inlining. Which can result, for example, in functions
-// that were previously being mapped by Map instead directly applied to the
-// forwarded operands (i.e., map({X, Y}, max) -> max(X, Y)).
-class Inliner : public HloModulePass {
+// A pass which performs map inlining. This replaces kMap instructions with
+// their equivalent sequence of array operations. For example:
+//   map({X, Y}, add) -> add(X, Y)).
+class MapInliner : public HloModulePass {
  public:
-  ~Inliner() override = default;
-  absl::string_view name() const override { return "inline"; }
+  ~MapInliner() override = default;
+  absl::string_view name() const override { return "map-inline"; }
 
-  // Run inlining on the given computation. Returns whether the computation was
-  // changed.
+  // Run map inlining on the given computation. Returns whether the computation
+  // was changed.
   StatusOr<bool> Run(HloModule* module) override;
 };
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_INLINER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_MAP_INLINER_H_
diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/map_inliner_test.cc
similarity index 95%
rename from tensorflow/compiler/xla/service/inliner_test.cc
rename to tensorflow/compiler/xla/service/map_inliner_test.cc
index 98e0f2cfd7..84059dd0f7 100644
--- a/tensorflow/compiler/xla/service/inliner_test.cc
+++ b/tensorflow/compiler/xla/service/map_inliner_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/inliner.h"
+#include "tensorflow/compiler/xla/service/map_inliner.h"
 
 #include <memory>
 #include <utility>
@@ -35,10 +35,10 @@ namespace op = xla::testing::opcode_matchers;
 namespace xla {
 namespace {
 
-using InlinerTest = HloVerifiedTestBase;
+using MapInlinerTest = HloVerifiedTestBase;
 
 // Test that `map` with `max` is transformed to `max`
-TEST_F(InlinerTest, MapMax) {
+TEST_F(MapInlinerTest, MapMax) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto max_builder = HloComputation::Builder(TestName());
@@ -63,7 +63,7 @@ TEST_F(InlinerTest, MapMax) {
   hlo_module->AddEmbeddedComputation(std::move(max_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(),
               op::Maximum(lhs, rhs));
@@ -75,7 +75,7 @@ TEST_F(InlinerTest, MapMax) {
 }
 
 // Test that `constant` function is changed to `broadcast`.
-TEST_F(InlinerTest, MapConstant) {
+TEST_F(MapInlinerTest, MapConstant) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto const2_builder = HloComputation::Builder(TestName());
@@ -97,7 +97,7 @@ TEST_F(InlinerTest, MapConstant) {
   hlo_module->AddEmbeddedComputation(std::move(const2_f32));
   hlo_module->AddEntryComputation(std::move(computation));
   HloInstruction* root = hlo_module->entry_computation()->root_instruction();
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   root = hlo_module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::Broadcast(op::Constant()));
@@ -108,7 +108,7 @@ TEST_F(InlinerTest, MapConstant) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
-TEST_F(InlinerTest, MapSubtractOppositeOrder) {
+TEST_F(MapInlinerTest, MapSubtractOppositeOrder) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   // Note that the parameter ordinals are in the opposite order to their
@@ -135,7 +135,7 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   hlo_module->AddEmbeddedComputation(std::move(max_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(),
           op::Subtract(rhs, lhs));
@@ -146,7 +146,7 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
-TEST_F(InlinerTest, MapParameter) {
+TEST_F(MapInlinerTest, MapParameter) {
   Shape r0f32 = ShapeUtil::MakeShape(F32, {});
 
   auto param_builder = HloComputation::Builder(TestName());
@@ -167,7 +167,7 @@ TEST_F(InlinerTest, MapParameter) {
   hlo_module->AddEmbeddedComputation(std::move(param_f32));
   hlo_module->AddEntryComputation(std::move(computation));
 
-  Inliner inliner;
+  MapInliner inliner;
   EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie());
   EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs);
 
-- 
GitLab


From b74c9aa65fcbe615495a972a5021e983707d02f6 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 4 Oct 2018 14:24:25 -0700
Subject: [PATCH 0390/1085] Add apidefs for the list ops.

PiperOrigin-RevId: 215802845
---
 .../api_def/python_api/api_defTensorListPushBackBatch.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_EmptyTensorList.pbtxt     | 4 ++++
 .../api_def/python_api/api_def_TensorListConcatLists.pbtxt    | 4 ++++
 .../api_def/python_api/api_def_TensorListElementShape.pbtxt   | 4 ++++
 .../api_def/python_api/api_def_TensorListFromTensor.pbtxt     | 4 ++++
 .../core/api_def/python_api/api_def_TensorListGather.pbtxt    | 4 ++++
 .../core/api_def/python_api/api_def_TensorListGetItem.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListLength.pbtxt    | 4 ++++
 .../core/api_def/python_api/api_def_TensorListPopBack.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListPushBack.pbtxt  | 4 ++++
 .../core/api_def/python_api/api_def_TensorListReserve.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListScatter.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListSetItem.pbtxt   | 4 ++++
 .../core/api_def/python_api/api_def_TensorListStack.pbtxt     | 4 ++++
 14 files changed, 56 insertions(+)
 create mode 100644 tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt

diff --git a/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt b/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
new file mode 100644
index 0000000000..3d937c745c
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_defTensorListPushBackBatch.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPushBackBatch"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt b/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
new file mode 100644
index 0000000000..44f25b5d93
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_EmptyTensorList.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EmptyTensorList"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
new file mode 100644
index 0000000000..45fc55e71e
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListConcatLists.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListConcatLists"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
new file mode 100644
index 0000000000..e1ad713e7f
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListElementShape.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListElementShape"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
new file mode 100644
index 0000000000..4aaefba3c5
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListFromTensor.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListFromTensor"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
new file mode 100644
index 0000000000..aaf607d70e
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListGather.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListGather"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
new file mode 100644
index 0000000000..3bb5f39cbc
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListGetItem.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListGetItem"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
new file mode 100644
index 0000000000..a04c20bb8a
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListLength.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListLength"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
new file mode 100644
index 0000000000..9287162f22
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListPopBack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPopBack"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
new file mode 100644
index 0000000000..da2bc11721
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListPushBack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListPushBack"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
new file mode 100644
index 0000000000..77e63747d5
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListReserve.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListReserve"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
new file mode 100644
index 0000000000..0015189d7f
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListScatter.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListScatter"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
new file mode 100644
index 0000000000..4999ee7ad9
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListSetItem.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListSetItem"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt
new file mode 100644
index 0000000000..2dc7b2784b
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_TensorListStack.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "TensorListStack"
+  visibility: HIDDEN
+}
-- 
GitLab


From ac7b84de8803edbb2d4da573b3f8704e9fad8fa8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 14:45:32 -0700
Subject: [PATCH 0391/1085] Internal change.

PiperOrigin-RevId: 215806953
---
 tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index 9f62ac3f2c..c22a457a71 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -113,6 +113,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // input configuration.
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
 
+  TF_LITE_ENSURE_EQ(context, input->dims->size, 3);
   const int batch_size = input->dims->data[0];
   const int max_time = input->dims->data[1];
   const int fw_num_units = fw_input_weights->dims->data[0];
-- 
GitLab


From 6f3d1517d104b3537a70c3c8b2600a065707a6a3 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Thu, 4 Oct 2018 13:34:31 -0700
Subject: [PATCH 0392/1085] Add TF_BUILD_TEST_TIMEOUT to
 ci_parameterized_build.sh

PiperOrigin-RevId: 215793932
---
 .../tools/ci_build/ci_parameterized_build.sh  | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 99bdedf7b4..fdff867ff0 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -83,6 +83,9 @@
 #                     Use the specified configurations when building.
 #                     When set, overrides TF_BUILD_IS_OPT and TF_BUILD_MAVX
 #                     options, as this will replace the two.
+#   TF_BUILD_TEST_TIMEOUT:
+#                     Sets the value of bazel --test_timeout, defaults to -1
+#                     which uses the bazel defaults.
 #   TF_SKIP_CONTRIB_TESTS:
 #                     If set to any non-empty or non-0 value, will skip running
 #                     contrib tests.
@@ -125,6 +128,8 @@ NO_DOCKER_OPT_FLAG="--genrule_strategy=standalone"
 
 DO_DOCKER=1
 
+# Bazel uses defaults for all test sizes when given `-1`.
+TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}
 
 # Helpful flags:
 # --test_summary=detailed: Tell us more about which targets are being built
@@ -132,7 +137,16 @@ DO_DOCKER=1
 # --build_tests_only: Don't build targets depended on by tests if the test is
 #                     disabled. Also saves some compilation time. Otherwise,
 #                     tries to build everything.
-BAZEL_TEST_FLAGS="--test_summary=detailed --build_tests_only --keep_going"
+# --test_timeout: Test timeouts in the order short,moderate,long,eternal.
+# --test_env: Environment variables to set when running bazel tests. These are
+#             especially important when using --run_under with
+#             parallel_gpu_execute.
+BAZEL_TEST_FLAGS=""\
+"--test_summary=detailed --build_tests_only --keep_going "\
+"--test_timeout=${TF_BUILD_TEST_TIMEOUT} "\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
 BAZEL_BUILD_FLAGS="--keep_going"
 
 BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}"
@@ -148,13 +162,6 @@ ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
-# Environment variables to set when running bazel tests.  These are especially
-# important when using --run_under with parallel_gpu_execute.
-BAZEL_TEST_ENV=""\
-"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
-"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
-"--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB} "
-
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 EXTRA_PARAMS=""
@@ -415,11 +422,11 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   if [[ ${CTYPE} == cpu* ]] || \
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
       "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
-    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${BAZEL_TEST_ENV} ${OPT_FLAG} "\
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
 "--local_test_jobs=${TF_GPU_COUNT} "\
 "--run_under=${PARALLEL_GPU_TEST_CMD} "\
 "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
-- 
GitLab


From a742575879db1df48daf929b8d29e43a1d168dd7 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Thu, 4 Oct 2018 14:55:14 -0700
Subject: [PATCH 0393/1085] Automated rollback of commit
 6b538d9ce54e878576131cde0c76e43a893180c2

PiperOrigin-RevId: 215808649
---
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 tensorflow/tensorflow.bzl                     | 39 ++++++++-----------
 .../tools/pip_package/pip_smoke_test.py       |  2 +-
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 10ec0dbe1c..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -306,7 +306,6 @@ cuda_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
     tags = [
-        "no_oss",  # TODO(b/116813115): Investigate timeout and re-enable.
         "no_windows_gpu",
     ],
 )
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index dead44c57e..cad5de1b0c 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1798,29 +1798,22 @@ def cuda_py_test(
         flaky = 0,
         xla_enabled = False,
         grpc_enabled = False):
-    if main == None:
-        main = name + ".py"
-    for config in ["cpu", "gpu"]:
-        test_name = name
-        test_tags = tags
-        if config == "gpu":
-            test_name += "_gpu"
-            test_tags = test_tags + tf_cuda_tests_tags()
-        tf_py_test(
-            name = test_name,
-            size = size,
-            srcs = srcs,
-            data = data,
-            main = main,
-            args = args,
-            tags = test_tags,
-            shard_count = shard_count,
-            additional_deps = additional_deps,
-            kernels = kernels,
-            flaky = flaky,
-            xla_enabled = xla_enabled,
-            grpc_enabled = grpc_enabled,
-        )
+    test_tags = tags + tf_cuda_tests_tags()
+    tf_py_test(
+        name = name,
+        size = size,
+        srcs = srcs,
+        data = data,
+        main = main,
+        args = args,
+        tags = test_tags,
+        shard_count = shard_count,
+        additional_deps = additional_deps,
+        kernels = kernels,
+        flaky = flaky,
+        xla_enabled = xla_enabled,
+        grpc_enabled = grpc_enabled,
+    )
 
 register_extension_info(
     extension_name = "cuda_py_test",
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index e7f9628fa6..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -142,7 +142,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = ["_test", "_test.py", "_test_gpu", "_test_gpu.py"]
+  ignore_extensions = ["_test", "_test.py"]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
-- 
GitLab


From 2e2e89699c1186eef157911b57e4b062de376ce9 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Thu, 4 Oct 2018 14:59:43 -0700
Subject: [PATCH 0394/1085] Add basic TensorList op support in bridge.

* Add kernels for TensorListReserve. EmptyTensorList, TensorListElementShape, TensorListPushBack, TensorlistPopBack;
* Treat list type pretty much identical to Stack in the bridge for now;
* Support variant output by treating variant like a uint8 and leaving the interpretation up to the XlaExpression (variant type does not support tensor_data());

PiperOrigin-RevId: 215809335
---
 tensorflow/compiler/tests/BUILD               |  16 ++
 .../compiler/tests/tensor_list_ops_test.py    | 105 ++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |   2 +
 .../tf2xla/kernels/tensor_list_ops.cc         | 226 ++++++++++++++++++
 tensorflow/compiler/tf2xla/xla_op_kernel.cc   |  40 +++-
 tensorflow/compiler/tf2xla/xla_op_kernel.h    |   5 +
 6 files changed, 384 insertions(+), 10 deletions(-)
 create mode 100644 tensorflow/compiler/tests/tensor_list_ops_test.py
 create mode 100644 tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index ee36729fd1..ba2401ed26 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -894,6 +894,22 @@ tf_xla_py_test(
     ],
 )
 
+tf_xla_py_test(
+    name = "tensor_list_ops_test",
+    size = "small",
+    srcs = ["tensor_list_ops_test.py"],
+    # TensorList ops are not implemented in the on-demand compilation model yet.
+    disabled_backends = "cpu_ondemand",
+    deps = [
+        ":xla_test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:list_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
 tf_xla_py_test(
     name = "ternary_ops_test",
     size = "small",
diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
new file mode 100644
index 0000000000..b556723eec
--- /dev/null
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py
@@ -0,0 +1,105 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ops which manipulate lists of tensors via bridge."""
+
+# pylint: disable=g-bad-name
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from tensorflow.compiler.tests import xla_test
+from tensorflow.python.client import session
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import list_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.platform import test
+from tensorflow.python.training import server_lib
+
+
+def scalar_shape():
+  return ops.convert_to_tensor([], dtype=dtypes.int32)
+
+
+class ListOpsTest(xla_test.XLATestCase):
+
+  def testElementShape(self):
+    with self.cached_session() as sess, self.test_scope():
+      dim = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=(dim, 15), num_elements=20,
+          element_dtype=dtypes.float32)
+      e32 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int32)
+      e64 = list_ops.tensor_list_element_shape(l, shape_type=dtypes.int64)
+      self.assertAllEqual(sess.run(e32, {dim: 10}), (10, 15))
+      self.assertAllEqual(sess.run(e64, {dim: 7}), (7, 15))
+
+  def testPushPop(self):
+    with self.cached_session() as sess, self.test_scope():
+      num = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=(7, 15), num_elements=num, element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(1.0, shape=(7, 15)))
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(2.0, shape=(7, 15)))
+      l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      self.assertAllEqual(sess.run(e2, {num: 10}), 2.0 * np.ones((7, 15)))
+      self.assertAllEqual(sess.run(e1, {num: 10}), 1.0 * np.ones((7, 15)))
+
+  def testPushPopSeparateLists(self):
+    with self.cached_session() as sess, self.test_scope():
+      num = array_ops.placeholder(dtypes.int32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=scalar_shape(),
+          num_elements=num,
+          element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+      l2 = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
+      l3 = list_ops.tensor_list_push_back(l, constant_op.constant(3.0))
+      _, e11 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      l2, e21 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32)
+      l2, e22 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32)
+      l3, e31 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
+      l3, e32 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
+      result = sess.run([e11, [e21, e22], [e31, e32]], {num: 20})
+      self.assertEqual(result, [1.0, [2.0, 1.0], [3.0, 1.0]])
+
+  def testEmptyTensorList(self):
+    dim = 7
+    with self.cached_session() as sess, self.test_scope():
+      p = array_ops.placeholder(dtypes.int32)
+      l = list_ops.empty_tensor_list(
+          element_shape=(p, 15), element_dtype=dtypes.float32)
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(1.0, shape=(dim, 15)))
+      _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "Use TensorListReserve instead"):
+        self.assertEqual(sess.run(e, {p: dim}), 1.0 * np.ones((dim, 15)))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 9a7130f253..95a010a119 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -95,6 +95,7 @@ tf_kernel_library(
         "stateless_random_ops.cc",
         "strided_slice_op.cc",
         "tensor_array_ops.cc",
+        "tensor_list_ops.cc",
         "tile_ops.cc",
         "topk_op.cc",
         "training_ops.cc",
@@ -158,6 +159,7 @@ tf_kernel_library(
         "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/kernels:conv_ops",
         "//tensorflow/core/kernels:cwise_op",
+        "//tensorflow/core/kernels:list_kernels",
         "//tensorflow/core/kernels:no_op",
         "//tensorflow/core/kernels:ops_util",
         "//tensorflow/core/kernels:pooling_ops",
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
new file mode 100644
index 0000000000..74d4fcc425
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc
@@ -0,0 +1,226 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// XLA TensorList operators.
+
+#include <limits>
+#include <vector>
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace {
+
+Status GetTensorListShape(xla::XlaBuilder* builder, xla::XlaOp op,
+                          TensorShape* tensor_list_shape) {
+  auto shape_or_status = builder->GetShape(op);
+  if (!shape_or_status.ok()) {
+    return shape_or_status.status();
+  }
+  xla::Shape shape = shape_or_status.ValueOrDie();
+  TF_RET_CHECK(xla::ShapeUtil::IsTuple(shape));
+  return XLAShapeToTensorShape(xla::ShapeUtil::GetTupleElementShape(shape, 0),
+                               tensor_list_shape);
+}
+
+class TensorListReserveOp : public XlaOpKernel {
+ public:
+  explicit TensorListReserveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    TensorShape element_shape;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape));
+    int64 num_elements;
+    OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_elements));
+
+    TensorShape tensor_shape;
+    tensor_shape.AddDim(num_elements);
+    tensor_shape.AppendShape(element_shape);
+
+    xla::XlaBuilder* b = ctx->builder();
+    ctx->SetOutput(0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_),
+                                                    tensor_shape.dim_sizes()),
+                                     xla::ConstantR0<int32>(b, 0)}));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListReserveOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListReserve")
+                    .CompileTimeConstInput("element_shape")
+                    .CompileTimeConstInput("num_elements"),
+                TensorListReserveOp);
+
+class EmptyTensorListOp : public XlaOpKernel {
+ public:
+  explicit EmptyTensorListOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    ctx->CtxFailure(
+        errors::InvalidArgument("XLA compilation requires a fixed tensor list "
+                                "size. Use TensorListReserve instead."));
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(EmptyTensorListOp);
+};
+
+REGISTER_XLA_OP(Name("EmptyTensorList"), EmptyTensorListOp);
+
+class TensorListElementShapeOp : public XlaOpKernel {
+ public:
+  explicit TensorListElementShapeOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("shape_type", &shape_type_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, GetTensorListShape(b, ctx->Input(0), &shape));
+    shape.RemoveDim(0);
+
+    switch (shape_type_) {
+      case DT_INT64:
+        ctx->SetOutput(0, xla::ConstantR1<int64>(b, shape.dim_sizes()));
+        break;
+      case DT_INT32: {
+        std::vector<int32> size;
+        for (int64 s : shape.dim_sizes()) {
+          size.push_back(s);
+        }
+        ctx->SetOutput(0, xla::ConstantR1<int32>(b, size));
+        break;
+      }
+      default:
+        ctx->CtxFailure(
+            errors::InvalidArgument("Unsupported shape type requested"));
+        return;
+    }
+  }
+
+ private:
+  DataType shape_type_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListElementShapeOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListElementShape"), TensorListElementShapeOp);
+
+class TensorListPushBackOp : public XlaOpKernel {
+ public:
+  explicit TensorListPushBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    xla::XlaOp list = ctx->Input(0);
+    TensorShape elem_shape = ctx->InputShape(1);
+
+    xla::XlaOp ta = xla::GetTupleElement(list, 0);
+    xla::XlaOp index = xla::GetTupleElement(list, 1);
+    xla::XlaOp value = ctx->Input(1);
+
+    // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0].
+    auto start_indices =
+        xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0<int32>(b, 0),
+                 xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
+
+    TensorShape slice_shape = elem_shape;
+    slice_shape.InsertDim(0, 1LL);
+    auto update = xla::Reshape(value, slice_shape.dim_sizes());
+
+    // TODO(phawkins): We don't check the index is in bounds --- there is no
+    // error mechanism in XLA.
+    ctx->SetOutput(
+        0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices),
+                          index + xla::ConstantR0<int32>(b, 1)}));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListPushBackOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListPushBack"), TensorListPushBackOp);
+
+class TensorListPopBackOp : public XlaOpKernel {
+ public:
+  explicit TensorListPopBackOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &dtype_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    xla::XlaOp state = ctx->Input(0);
+
+    TensorShape shape;
+    OP_REQUIRES_OK(ctx, GetTensorListShape(b, state, &shape));
+
+    xla::XlaOp ta = xla::GetTupleElement(state, 0);
+    xla::XlaOp index = xla::GetTupleElement(state, 1);
+
+    index = index - xla::ConstantR0<int32>(b, 1);
+
+    // start_indices of the DynamicSlice are [index, 0, 0, ..., 0].
+    auto start_indices =
+        xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0<int32>(b, 0),
+                 xla::MakeEdgePaddingConfig({{0, shape.dims() - 1}}));
+
+    auto slice_shape = shape.dim_sizes();
+    slice_shape[0] = 1LL;
+
+    // TODO(phawkins): We don't check the index is in bounds --- there is no
+    // error mechanism in XLA.
+    xla::XlaOp read = xla::DynamicSlice(ta, start_indices, slice_shape);
+    // Remove the leading '1' dimension.
+    std::vector<int64> value_shape(slice_shape.begin() + 1, slice_shape.end());
+
+    ctx->SetOutput(0, xla::Tuple(b, {ta, index}));
+    ctx->SetOutput(1, xla::Reshape(read, value_shape));
+  }
+
+ private:
+  DataType dtype_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(TensorListPopBackOp);
+};
+
+REGISTER_XLA_OP(Name("TensorListPopBack"), TensorListPopBackOp);
+
+}  // anonymous namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 2a9eaeee14..dd3498ef7a 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -455,23 +455,43 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type,
   return Status::OK();
 }
 
+Status XlaOpKernelContext::allocate_output(int index, const xla::Shape& shape,
+                                           Tensor** output) {
+  // The step's default allocator is the dummy XlaCompilationAllocator which
+  // simply allocates a metadata buffer to hold the expression to which it
+  // corresponds.
+  if (expected_output_dtype(index) == DT_VARIANT) {
+    // tensor_data() is not supported for variant Tensor (i.e.,
+    // DataTypeCanUseMemcpy is false for DT_VARIANT), and so storing the
+    // XlaExpression inside the Tensor's tensor_data() does not work for
+    // variant. Instead construct a uint8 tensor and store the expression in its
+    // value.
+    // TODO(jpienaar): This should be refactored to stop masquerading
+    // XlaExpressions as Tensors.
+    *output = new Tensor();
+    TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(
+        context_->allocate_temp(DT_UINT8, tensor_shape, *output));
+    context_->set_output(index, **output);
+  } else {
+    TensorShape tensor_shape;
+    TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &tensor_shape));
+    TF_RETURN_IF_ERROR(context_->allocate_output(index, tensor_shape, output));
+  }
+  return Status::OK();
+}
+
 void XlaOpKernelContext::SetOutput(int index, const xla::XlaOp& handle) {
   // Makes the host Tensor that will refer to the expression.
   Tensor* output = nullptr;
-  auto shape = builder()->GetShape(handle);
-  if (!shape.ok()) {
-    SetStatus(shape.status());
+  auto shape_or = builder()->GetShape(handle);
+  if (!shape_or.ok()) {
+    SetStatus(shape_or.status());
     return;
   }
 
-  // The step's default allocator is the dummy XlaCompilationAllocator which
-  // simply allocates a metadata buffer to hold the expression to which it
-  // corresponds.
-  TensorShape tensor_shape;
-  OP_REQUIRES_OK(context_,
-                 XLAShapeToTensorShape(shape.ValueOrDie(), &tensor_shape));
   OP_REQUIRES_OK(context_,
-                 context_->allocate_output(index, tensor_shape, &output));
+                 allocate_output(index, shape_or.ValueOrDie(), &output));
 
   // The expression is stored in the tensor's data buffer. Fill in the
   // fields now.
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h
index a3a0d10cc0..aa00a45496 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@@ -255,6 +255,11 @@ class XlaOpKernelContext {
   // Returns the tensor of input `name`.
   const Tensor& GetInputTensorByName(absl::string_view name);
 
+  // Wraps OpKernelContext's allocate_output method while providing special
+  // behavior for DT_VARIANT: a variant is treated as DT_UINT8 scalar as the
+  // type to allow mapping for variant to more generic types.
+  Status allocate_output(int index, const xla::Shape& shape, Tensor** output);
+
   OpKernelContext* const context_;
 };
 
-- 
GitLab


From bf94614e9540e23d808bdc15ce1af1f53f662d13 Mon Sep 17 00:00:00 2001
From: Michael Gielda <mgielda@antmicro.com>
Date: Fri, 5 Oct 2018 00:09:37 +0200
Subject: [PATCH 0395/1085] Take ALL TESTS PASSED in ticks for good formatting

---
 tensorflow/contrib/lite/experimental/micro/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/experimental/micro/README.md b/tensorflow/contrib/lite/experimental/micro/README.md
index 414cafde4d..9add470761 100644
--- a/tensorflow/contrib/lite/experimental/micro/README.md
+++ b/tensorflow/contrib/lite/experimental/micro/README.md
@@ -64,7 +64,7 @@ TF_LITE_MICRO_TEST(SimpleTest) {
 TF_LITE_MICRO_TESTS_END
 ```
 
-These macros work a lot like [the Google test framework](https://github.com/google/googletest), but they don't require any dependencies and just write results to stderr, rather than aborting the program. If all the tests pass, then "~~~ALL TESTS PASSED~~~" is output, and the test harness that runs the binary during the make process knows that everything ran correctly. If there's an error, the lack of the expected string lets the harness know that the test failed.
+These macros work a lot like [the Google test framework](https://github.com/google/googletest), but they don't require any dependencies and just write results to stderr, rather than aborting the program. If all the tests pass, then `~~~ALL TESTS PASSED~~~` is output, and the test harness that runs the binary during the make process knows that everything ran correctly. If there's an error, the lack of the expected string lets the harness know that the test failed.
 
 So, why are we running tests in this complicated way? So far, we've been building binaries that run locally on the Mac OS or Linux machine you're building on, but this approach becomes important when we're targeting simple micro controller devices.
 
-- 
GitLab


From f1ed49830ee66afdad0ae13fa22722754b278ce2 Mon Sep 17 00:00:00 2001
From: Michael Gielda <mgielda@antmicro.com>
Date: Fri, 5 Oct 2018 00:11:10 +0200
Subject: [PATCH 0396/1085] Use ticks in all three occurrences of ALL TESTS...

---
 tensorflow/contrib/lite/experimental/micro/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/micro/README.md b/tensorflow/contrib/lite/experimental/micro/README.md
index 9add470761..6b7712c25d 100644
--- a/tensorflow/contrib/lite/experimental/micro/README.md
+++ b/tensorflow/contrib/lite/experimental/micro/README.md
@@ -36,7 +36,7 @@ Building requires a Linux or OS X machine.
  - Download the dependencies by running `tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh`. This may take a few minutes
  - Build and test the library with `make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test`
 
-You should see a series of compilation steps, followed by "~~~ALL TESTS PASSED~~~" for the various tests of the code that it will run. If there's an error, you should get an informative message from make about what went wrong.
+You should see a series of compilation steps, followed by `~~~ALL TESTS PASSED~~~` for the various tests of the code that it will run. If there's an error, you should get an informative message from make about what went wrong.
 
 These tests are all built as simple binaries with few dependencies, so you can run them manually. For example, here's how to run the depthwise convolution test, and its output:
 
@@ -111,4 +111,4 @@ LOGS:
 tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/tensorflow/contrib/lite/experimental/micro/kernels/depthwise_conv_test: PASS
 ```
 
-There's a lot of output here, but you should be able to see that the same tests that were covered when we ran locally on the development machine show up in the debug logs here, along with the magic string "~~~ALL TESTS PASSED~~~". This is the exact same code as before, just compiled and run on the STM32F103 rather than your desktop. We hope that the simplicity of this testing approach will help make adding support for new platforms as easy as possible.
+There's a lot of output here, but you should be able to see that the same tests that were covered when we ran locally on the development machine show up in the debug logs here, along with the magic string `~~~ALL TESTS PASSED~~~`. This is the exact same code as before, just compiled and run on the STM32F103 rather than your desktop. We hope that the simplicity of this testing approach will help make adding support for new platforms as easy as possible.
-- 
GitLab


From d7748891881493b7c6c957fe6dda98bfe5428209 Mon Sep 17 00:00:00 2001
From: Cong Xu <cong.xu@intel.com>
Date: Thu, 4 Oct 2018 15:12:24 -0700
Subject: [PATCH 0397/1085] [INTEL MKL] Update TF_BUILD_VERSION to r1.11 and
 add packages

Signed-off-by: Cong Xu <cong.xu@intel.com>
---
 tensorflow/tools/docker/Dockerfile.devel-mkl-horovod | 4 ++--
 tensorflow/tools/docker/Dockerfile.mkl-horovod       | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 48f2400569..9649e1deaf 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -3,7 +3,7 @@ FROM ubuntu:16.04
 LABEL maintainer="Cong Xu <cong.xu@intel.com>"
 
 # These parameters can be overridden by parameterized_docker_build.sh
-ARG TF_BUILD_VERSION=r1.9
+ARG TF_BUILD_VERSION=r1.11
 ARG PYTHON="python"
 ARG PYTHON3_DEV=""
 ARG WHL_DIR="/tmp/pip"
@@ -29,7 +29,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         openjdk-8-jdk \
         openjdk-8-jre-headless \
         wget \
-        numactl \
+        libnuma-dev \
         openssh-client \
         openssh-server \
         && \
diff --git a/tensorflow/tools/docker/Dockerfile.mkl-horovod b/tensorflow/tools/docker/Dockerfile.mkl-horovod
index 4daf4fefff..0432cd5e80 100755
--- a/tensorflow/tools/docker/Dockerfile.mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.mkl-horovod
@@ -6,7 +6,7 @@ LABEL maintainer="Cong Xu <cong.xu@intel.com>"
 ARG TF_WHL_URL
 
 # Optional parameters
-ARG TF_BUILD_VERSION=r1.9
+ARG TF_BUILD_VERSION=r1.11
 ARG PYTHON="python"
 ARG PYTHON_DEV="python-dev"
 ARG PIP="pip"
@@ -25,6 +25,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         rsync \
         software-properties-common \
         unzip \
+        wget \
+        libnuma-dev \
+        openssh-client \
+        openssh-server \
         && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
-- 
GitLab


From 26d3617d2ab5f4874b73059be524e94b9535465b Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Thu, 4 Oct 2018 15:11:26 -0700
Subject: [PATCH 0398/1085] Avoid creating control edges on not-this-graph.

PiperOrigin-RevId: 215811680
---
 tensorflow/python/eager/function.py       | 17 +++++++----------
 tensorflow/python/ops/control_flow_ops.py |  3 +++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index dd9f5e233c..2750461fb2 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -269,15 +269,6 @@ class FuncGraph(ops.Graph):
   def variables(self, var_list):
     self._weak_variables = [weakref.ref(v) for v in var_list]
 
-  def control_dependencies(self, control_inputs):
-    # Drop control dependencies to outside of the graph. TODO(b/117109273)
-    # unclear how to capture an op, not a tensor.
-    if not control_inputs:
-      return super(FuncGraph, self).control_dependencies(control_inputs)
-    return super(FuncGraph, self).control_dependencies(
-        [c for c in control_inputs
-         if getattr(c, "graph", None) is self])
-
   def create_op(
       self,
       op_type,
@@ -503,6 +494,9 @@ class _EagerDefinedFunction(object):
 
     Returns:
       The outputs of the function call.
+
+    Raises:
+      ValueError: if the number of arguments is incorrect.
     """
 
     executing_eagerly = ctx.executing_eagerly()
@@ -536,6 +530,10 @@ class _EagerDefinedFunction(object):
       # TODO(akshayka): Either remove this if the FunctionLibraryRuntime
       # creates `PartitionedCallOp` kernels by default, or remove the previous
       # branch if a TPU kernel is registered for `PartitionedCall`.
+      if len(args) != len(self.signature.input_arg):
+        raise ValueError(
+            "Arguments and signature arguments do not match: %s %s " %
+            (len(args), len(list(self.signature.input_arg))))
       outputs = functional_ops.partitioned_call(
           args=args,
           f=self,
@@ -756,7 +754,6 @@ class Function(object):
         BACKWARD_FUNCTION_ATTRIBUTE_NAME:
             self._backward_graph_function._inference_function.name})  # pylint: disable=protected-access
     forward_function_attr.update(self._attrs)
-
     self._forward_function = _EagerDefinedFunction(
         forward_function_name, self._func_graph, self._func_graph.inputs,
         self._func_graph.outputs + backwards_graph_captures,
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index f779c3d273..5bc217d355 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1333,6 +1333,9 @@ class ControlFlowState(object):
     """
     if util.IsLoopSwitch(op):
       return None
+    if op.graph._building_function:  # pylint: disable=protected-access
+      # The optimization here is tricky to apply to functions
+      return array_ops.zeros_like(op.outputs[index])
     dead_branch = util.IsSwitch(op)
     forward_ctxt = _GetWhileContext(op)
     grad_state = self._map.get(forward_ctxt)
-- 
GitLab


From bd99ed794264668ce77ed7527bc41df7aba3927b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 15:17:46 -0700
Subject: [PATCH 0399/1085] Fix bug in Grappler constant folding: The logic
 detecting full reductions was flawed. Added better test coverage.

Also added a extra test for a related symbolic shape inference operation that I first suspected to be broken.

PiperOrigin-RevId: 215812753
---
 .../grappler/costs/graph_properties_test.cc   |   6 +
 .../grappler/optimizers/constant_folding.cc   |  47 ++++---
 .../optimizers/constant_folding_test.cc       | 130 ++++++++++++------
 3 files changed, 118 insertions(+), 65 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index 362092a6cf..db10f586bc 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -1340,6 +1340,8 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {});
   Output g = ops::Shape(s.WithOpName("g"), c);
   Output h = ops::Fill(s.WithOpName("h"), g, zero);
+  Output zero_idx = ops::Const(s.WithOpName("zero_idx"), {0}, {1});
+  Output j = ops::Sum(s.WithOpName("j"), a, zero_idx);
 
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
@@ -1382,6 +1384,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) {
   ASSERT_EQ(2, shape_f.dim_size());
   EXPECT_EQ(shape_h.dim(0).size(), shape_c.dim(0).size());
   EXPECT_EQ(shape_h.dim(1).size(), shape_c.dim(1).size());
+
+  const auto shape_j = properties.GetOutputProperties("j").at(0).shape();
+  ASSERT_EQ(1, shape_j.dim_size());
+  EXPECT_EQ(shape_j.dim(0).size(), shape_a.dim(1).size());
 }
 
 TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index ca5d3a6dfd..3d0d95bba7 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -616,28 +616,37 @@ Status ConstantFolding::MaterializeReductionIndices(
     // We can't do anything if we don't know the rank of the input.
     return Status::OK();
   }
-  const int rank = input_prop.shape().dim_size();
-  if (rank == 0) {
+  const int input_rank = input_prop.shape().dim_size();
+  if (input_rank < 1) {
     // Unexpected graph, don't try to change it.
     return Status::OK();
   }
+  const OpInfo::TensorProperties& reduction_indices_prop = input_props[1];
+  DataType dtype = reduction_indices_prop.dtype();
+  if (dtype != DT_INT32 && dtype != DT_INT64) {
+    return Status::OK();
+  }
+  PartialTensorShape reduction_indices_shape(reduction_indices_prop.shape());
+  const int num_reduction_indices = reduction_indices_shape.num_elements();
+
   const std::vector<OpInfo::TensorProperties>& output_props =
       properties.GetOutputProperties(node->name());
   if (output_props.size() != 1) {
     return Status::OK();
   }
-  const bool keep_dims =
-      node->attr().count("keep_dims") && node->attr().at("keep_dims").b();
   const OpInfo::TensorProperties& output_prop = output_props[0];
-  PartialTensorShape output_shape(output_prop.shape());
-  if (output_shape.num_elements() != 1) {
-    bool full_reduction = false;
+  const int output_rank =
+      output_prop.shape().unknown_rank() ? -1 : output_prop.shape().dim_size();
+
+  bool full_reduction = output_rank == 0 || num_reduction_indices == input_rank;
+  if (!full_reduction) {
+    // A full reduction will generate a tensor of one of the shapes
+    // [], [1], [1, 1], [1, 1, ...]. Even if we do not know the number of
+    // elements in the output of the reduction, we may deduce it from reshape
+    // nodes following it.
     for (const NodeDef* fanout : node_map_->GetOutputs(node->name())) {
-      if (!IsReshape(*fanout) && !keep_dims) {
-        // Depending on how it's setup, a full reduction will generate a tensor
-        // of shape [], [1], [1, 1], [1, 1, ...]. If keep_dims isn't true, we
-        // rely on the existence of a reshape node following the reduction to
-        // ensure that the fanout is fed a scalar of the right shape.
+      full_reduction = false;
+      if (!IsReshape(*fanout)) {
         return Status::OK();
       }
       const std::vector<OpInfo::TensorProperties>& reshape_props =
@@ -658,20 +667,15 @@ Status ConstantFolding::MaterializeReductionIndices(
     }
   }
 
-  const OpInfo::TensorProperties& reduction_prop = input_props[1];
-  DataType dtype = reduction_prop.dtype();
-  if (dtype != DT_INT32 && dtype != DT_INT64) {
-    return Status::OK();
-  }
-  // We know it's a full reduction. We can generate the set of indices to
-  // reduce.
+  // We know it's a full reduction. We can generate the full set of indices to
+  // reduce as a constant node.
   string const_name = OptimizedNodeName(*node, "-reduction_indices");
   if (node_map_->GetNode(const_name)) {
     return Status::OK();
   }
   NodeDef* reduction_indices = graph_->add_node();
-  Tensor value(dtype, TensorShape({rank}));
-  for (int i = 0; i < rank; ++i) {
+  Tensor value(dtype, TensorShape({input_rank}));
+  for (int i = 0; i < input_rank; ++i) {
     if (dtype == DT_INT32) {
       value.vec<int32>()(i) = i;
     } else {
@@ -680,6 +684,7 @@ Status ConstantFolding::MaterializeReductionIndices(
   }
   TF_RETURN_IF_ERROR(
       CreateNodeDef(const_name, TensorValue(&value), reduction_indices));
+
   reduction_indices->set_device(node->device());
   string ctrl_dep =
       AddControlDependency(node->input(1), graph_, node_map_.get());
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index b09360a2c2..fab01edfed 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -2591,58 +2591,100 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) {
 }
 
 TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output input =
-      ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
-                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
-  Output indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
-  Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
-  Output size = ops::Const(s.WithOpName("size"), 1, {1});
-  Output reshape = ops::Reshape(s.WithOpName("reshape"), sum, size);
+  for (bool use_reshape : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output input =
+        ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
+                         ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+    // If use_reshape is false, we need to now the number of indices to apply
+    // the rewrite.
+    Output indices = ops::Placeholder(
+        s.WithOpName("indices"), DT_INT32,
+        ops::Placeholder::Shape(PartialTensorShape({use_reshape ? -1 : 2})));
+    Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
+    if (use_reshape) {
+      Output size = ops::Const(s.WithOpName("size"), 1, {1});
+      Output reshape = ops::Reshape(s.WithOpName("reshape"), sum, size);
+    }
 
-  GrapplerItem item;
-  TF_CHECK_OK(s.ToGraphDef(&item.graph));
-  item.fetch.push_back("reshape");
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch.push_back(use_reshape ? "reshape" : "sum");
 
-  auto input_t = GenerateRandomTensor<DT_FLOAT>(TensorShape({3, 4}));
-  Tensor indices_t(DT_INT32, TensorShape({2}));
-  indices_t.flat<int>()(0) = 0;
-  indices_t.flat<int>()(1) = 1;
-  auto tensors_expected = EvaluateNodes(
-      item.graph, item.fetch, {{"input", input_t}, {"indices", indices_t}});
-  EXPECT_EQ(1, tensors_expected.size());
+    auto input_t = GenerateRandomTensor<DT_FLOAT>(TensorShape({3, 4}));
+    Tensor indices_t(DT_INT32, TensorShape({2}));
+    indices_t.flat<int>()(0) = 0;
+    indices_t.flat<int>()(1) = 1;
+    auto tensors_expected = EvaluateNodes(
+        item.graph, item.fetch, {{"input", input_t}, {"indices", indices_t}});
+    EXPECT_EQ(1, tensors_expected.size());
 
-  ConstantFolding optimizer(nullptr /* cpu_device */);
-  GraphDef output;
-  Status status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
+    // Use aggressive mode to force the shape inference to propagate placeholder
+    // shapes.
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
 
-  // Run a second time to make sure the optimization is idempotent.
-  item.graph.Swap(&output);
-  status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
+    // Run a second time to make sure the optimization is idempotent.
+    item.graph.Swap(&output);
+    status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
 
-  int found = 0;
-  for (const auto& node : output.node()) {
-    if (node.name() == "ConstantFolding/sum-reduction_indices") {
-      ++found;
-      EXPECT_EQ("Const", node.op());
-      EXPECT_EQ("^indices", node.input(0));
-      EXPECT_EQ(2, TensorShape(node.attr().at("value").tensor().tensor_shape())
-                       .num_elements());
-    } else if (node.name() == "sum") {
-      ++found;
-      EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1));
-    } else if (node.name() == "indices") {
-      ++found;
+    int found = 0;
+    for (const auto& node : output.node()) {
+      if (node.name() == "ConstantFolding/sum-reduction_indices") {
+        ++found;
+        EXPECT_EQ("Const", node.op());
+        EXPECT_EQ("^indices", node.input(0));
+        EXPECT_EQ(2,
+                  TensorShape(node.attr().at("value").tensor().tensor_shape())
+                      .num_elements());
+      } else if (node.name() == "sum") {
+        ++found;
+        EXPECT_EQ("ConstantFolding/sum-reduction_indices", node.input(1));
+      } else if (node.name() == "indices") {
+        ++found;
+      }
     }
+    EXPECT_EQ(3, found);
+
+    auto tensors = EvaluateNodes(output, item.fetch,
+                                 {{"input", input_t}, {"indices", indices_t}});
+    EXPECT_EQ(1, tensors.size());
+    test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-5);
   }
-  EXPECT_EQ(3, found);
+}
 
-  auto tensors = EvaluateNodes(output, item.fetch,
-                               {{"input", input_t}, {"indices", indices_t}});
-  EXPECT_EQ(1, tensors.size());
-  test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-5);
+TEST_F(ConstantFoldingTest, MaterializeReductionIndices_NotFullReduction) {
+  for (bool input_rank_known : {true, false}) {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output input =
+        (input_rank_known ? ops::Placeholder(s.WithOpName("input"), DT_FLOAT,
+                                             ops::Placeholder::Shape(
+                                                 PartialTensorShape({-1, -1})))
+                          : ops::Placeholder(s.WithOpName("input"), DT_FLOAT));
+    Output indices =
+        ops::Placeholder(s.WithOpName("indices"), DT_INT32,
+                         ops::Placeholder::Shape(
+                             PartialTensorShape({input_rank_known ? 1 : 2})));
+    Output sum = ops::Sum(s.WithOpName("sum"), input, indices);
+
+    GrapplerItem item;
+    TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch.push_back("sum");
+
+    // Use aggressive mode to force the shape inference to propagate placeholder
+    // shapes.
+    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
+                              nullptr /* cpu_device */);
+    GraphDef output;
+    Status status = optimizer.Optimize(nullptr, item, &output);
+    TF_EXPECT_OK(status);
+
+    CompareGraphs(item.graph, output);
+  }
 }
 
 TEST_F(ConstantFoldingTest, LargeConstant) {
-- 
GitLab


From feda8c786948b1c7cc6bd9fe447781ceaff6b3d3 Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Thu, 4 Oct 2018 15:20:56 -0700
Subject: [PATCH 0400/1085] Fix for memory issue in micro test code, spotted by
 asan checks

PiperOrigin-RevId: 215813259
---
 .../lite/experimental/micro/kernels/softmax_test.cc       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
index df7d87d623..694456d8ac 100644
--- a/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
+++ b/tensorflow/contrib/lite/experimental/micro/kernels/softmax_test.cc
@@ -160,7 +160,7 @@ void TestSoftmaxQuantized(std::initializer_list<int> input_dims_data,
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(SimpleTest) {
-  const int output_dims_count = 6;
+  const int output_dims_count = 10;
   float output_data[output_dims_count];
   tflite::testing::TestSoftmaxFloat(  //
       {2, 2, 5},                      // Input shape.
@@ -181,7 +181,7 @@ TF_LITE_MICRO_TEST(SimpleTest) {
           0.031684921,
           0.011656231,
       },
-      {2, 2, 3},  // Output shape.
+      {2, 2, 5},  // Output shape.
       output_data);
 }
 
@@ -192,7 +192,7 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized) {
   const float input_max = 64.0f;
   const float output_min = 0.0f;
   const float output_max = (255.0f / 256.0f);
-  const int output_dims_count = 6;
+  const int output_dims_count = 5;
   uint8_t output_data[output_dims_count];
   tflite::testing::TestSoftmaxQuantized(  //
       {2, 1, 5},                          // Input shape.
@@ -212,7 +212,7 @@ TF_LITE_MICRO_TEST(SimpleTestQuantized) {
           F2Q(0.234121657, output_min, output_max),
           F2Q(0.636408647, output_min, output_max),
       },
-      {2, 1, 3},               // Output shape.
+      {2, 1, 5},               // Output shape.
       output_min, output_max,  // Output quantized range.
       output_data);
 }
-- 
GitLab


From 3a457c7252f09afd03483092ce9dcc7aa292b8c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 15:27:44 -0700
Subject: [PATCH 0401/1085] This CL fixes a bug in the eager benchmarks test
 that caused the defun tests to execute a different-sized matrix multiply than
 the eager tests.

PiperOrigin-RevId: 215814346
---
 tensorflow/python/eager/benchmarks_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py
index 3fe79ef244..2b0118c07f 100644
--- a/tensorflow/python/eager/benchmarks_test.py
+++ b/tensorflow/python/eager/benchmarks_test.py
@@ -353,7 +353,7 @@ class MicroBenchmarks(test.Benchmark):
                               num_iters,
                               execution_mode=None):
     f = function.defun(math_ops.matmul)
-    func = lambda: f(m, m, transpose_b)
+    func = lambda: f(m, m, transpose_b=transpose_b)
     self._run(func, num_iters, execution_mode=execution_mode)
 
   def _benchmark_defun_matmul_forward_backward(self,
@@ -366,7 +366,7 @@ class MicroBenchmarks(test.Benchmark):
     def func():
       with backprop.GradientTape() as gt:
         gt.watch(m)
-        y = f(m, m, transpose_b)
+        y = f(m, m, transpose_b=transpose_b)
       _ = gt.gradient(y, m)
 
     self._run(func, num_iters, execution_mode=execution_mode)
-- 
GitLab


From a08ca5bb74fcd828c19060216923ad0f378bb518 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Thu, 4 Oct 2018 15:29:58 -0700
Subject: [PATCH 0402/1085] Disable tensorrt:unary_test in OSS since it crashes
 with SEGV.

PiperOrigin-RevId: 215814732
---
 tensorflow/contrib/tensorrt/BUILD | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 9e8979bce4..5c16fcb760 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -455,7 +455,6 @@ cuda_py_tests(
         "test/multi_connection_neighbor_engine_test.py",
         "test/neighboring_engine_test.py",
         "test/rank_two_test.py",
-        "test/unary_test.py",
         "test/vgg_block_nchw_test.py",
         "test/vgg_block_test.py",
     ],
@@ -471,6 +470,25 @@ cuda_py_tests(
     ],
 )
 
+cuda_py_tests(
+    name = "tf_trt_integration_test_no_oss",
+    srcs = [
+        "test/unary_test.py",
+    ],
+    additional_deps = [
+        ":tf_trt_integration_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_oss",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_pip",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_windows",
+        "nomac",
+    ],
+)
+
 cc_library(
     name = "utils",
     srcs = ["convert/utils.cc"],
-- 
GitLab


From 68e869eb40ff0acd03515336bf31eecbabf97adc Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 4 Oct 2018 15:54:17 -0700
Subject: [PATCH 0403/1085] Disable tensorrt:unary_test in OSS since it crashes
 with SEGV. (#22754)

PiperOrigin-RevId: 215814732
---
 tensorflow/contrib/tensorrt/BUILD | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 9e8979bce4..5c16fcb760 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -455,7 +455,6 @@ cuda_py_tests(
         "test/multi_connection_neighbor_engine_test.py",
         "test/neighboring_engine_test.py",
         "test/rank_two_test.py",
-        "test/unary_test.py",
         "test/vgg_block_nchw_test.py",
         "test/vgg_block_test.py",
     ],
@@ -471,6 +470,25 @@ cuda_py_tests(
     ],
 )
 
+cuda_py_tests(
+    name = "tf_trt_integration_test_no_oss",
+    srcs = [
+        "test/unary_test.py",
+    ],
+    additional_deps = [
+        ":tf_trt_integration_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+    ],
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_oss",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_pip",  # TODO(b/117274186): re-enable in OSS after crash fixed
+        "no_windows",
+        "nomac",
+    ],
+)
+
 cc_library(
     name = "utils",
     srcs = ["convert/utils.cc"],
-- 
GitLab


From d6a2e7bcca5683c377b592f177bcac9aeb1c550f Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Thu, 4 Oct 2018 15:54:20 -0700
Subject: [PATCH 0404/1085] Fix unused imports.

PiperOrigin-RevId: 215819072
---
 tensorflow/compiler/tests/tensor_list_ops_test.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py
index b556723eec..5c079d595c 100644
--- a/tensorflow/compiler/tests/tensor_list_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_list_ops_test.py
@@ -20,22 +20,13 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.compiler.tests import xla_test
-from tensorflow.python.client import session
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import list_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.platform import test
-from tensorflow.python.training import server_lib
 
 
 def scalar_shape():
-- 
GitLab


From cf8e7cf89abb4a7783b9a99f17574ea128fa767a Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 4 Oct 2018 16:10:21 -0700
Subject: [PATCH 0405/1085] Pin ops with small integer inputs (already on the
 cpu) to the cpu in eager.

An environment variable (TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING) is provided to turn this off if necessary (its on by default).

PiperOrigin-RevId: 215821915
---
 .../core/common_runtime/eager/context.cc      |  4 +-
 .../core/common_runtime/eager/context.h       |  2 +
 .../core/common_runtime/eager/execute.cc      | 67 ++++++++++++++++---
 tensorflow/python/eager/core_test.py          | 28 ++++++++
 4 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 18420b60fd..f23cefb33d 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -70,7 +70,9 @@ EagerContext::EagerContext(const SessionOptions& opts,
       async_default_(async),
       log_memory_(LogMemory::IsEnabled()),
       env_(opts.env),
-      use_send_tensor_rpc_(false) {
+      use_send_tensor_rpc_(false),
+      pin_small_ops_to_cpu_(ReadBoolFromEnvVar(
+          "TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING", true)) {
   if (device_mgr_owned) {
     local_device_manager_.reset(device_mgr);
     local_unowned_device_manager_ = nullptr;
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 5ed6057ec6..15eeaa8066 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -202,6 +202,7 @@ class EagerContext {
   // EagerService.SendTensor RPC. If false, _Send/_Recv ops should be used
   // instead (which in-turn use WorkerService.RecvTensor RPCs).
   bool UseSendTensorRPC() { return use_send_tensor_rpc_; }
+  bool PinSmallOpsToCPU() { return pin_small_ops_to_cpu_; }
 
  private:
   void InitDeviceMapAndAsync();
@@ -293,6 +294,7 @@ class EagerContext {
 #endif
 
   bool use_send_tensor_rpc_;
+  const bool pin_small_ops_to_cpu_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 1bc63616d0..a52f933d75 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -579,19 +579,23 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
   return Status::OK();
 #endif
 }
-}  // namespace
 
-Status EagerExecute(EagerOperation* op,
-                    gtl::InlinedVector<TensorHandle*, 2>* retvals,
-                    int* num_retvals) {
-  // Ensure all resource-touching ops run in the device the resource is,
-  // regardless of anything else that has been specified. This is identical to
-  // the graph mode behavior.
+// The Op device may be updated if:
+// - A resource touching input is specified: all resource-touching ops run in
+// the device the resource is, regardless of anything else that has been
+// specified. This is identical to the graph mode behavior.
+//
+// - All op inputs are on the CPU, small (<64 elements) and integers
+// (int32/int64). This can be disabled by setting the environment variable
+// "TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING" to "0" or "false".
+Status MaybeUpdateOpDevice(EagerOperation* op) {
   EagerContext* ctx = op->EagerContext();
+  bool device_set_for_resource_variable = false;
+  bool all_inputs_eligible_for_cpu_pinning = ctx->PinSmallOpsToCPU();
+
   for (int i = 0; i < op->Inputs().size(); ++i) {
     Device* input_op_device = nullptr;
-    auto status = op->Inputs()[i]->OpDevice(&input_op_device);
-    if (!status.ok()) return status;
+    TF_RETURN_IF_ERROR(op->Inputs()[i]->OpDevice(&input_op_device));
     VLOG(2) << "for op " << op->Name() << " input " << i << " "
             << DataTypeString(op->Inputs()[i]->dtype) << " "
             << (input_op_device == nullptr ? "cpu" : input_op_device->name())
@@ -603,8 +607,53 @@ Status EagerExecute(EagerOperation* op,
               << d->name() << " because input #" << i
               << " is a resource in this device.";
       op->SetDevice(d);
+
+      device_set_for_resource_variable = true;
+      all_inputs_eligible_for_cpu_pinning = false;
+    } else if (all_inputs_eligible_for_cpu_pinning) {
+      TensorHandle* handle = op->Inputs()[i];
+
+      // Input is on CPU.
+      if (input_op_device != nullptr && input_op_device != ctx->HostCPU()) {
+        all_inputs_eligible_for_cpu_pinning = false;
+        continue;
+      }
+
+      if (handle->dtype != DataType::DT_INT32 &&
+          handle->dtype != DataType::DT_INT64) {
+        all_inputs_eligible_for_cpu_pinning = false;
+        continue;
+      }
+
+      int64 num_elements;
+      TF_RETURN_IF_ERROR(handle->NumElements(&num_elements));
+      if (num_elements > 64) {
+        all_inputs_eligible_for_cpu_pinning = false;
+      }
     }
   }
+
+  // Ops without inputs are usually ops that generate a tensor in some way and
+  // usually require being present on whatever device they are scheduled on
+  // - for e.g. VarHandleOp or _Recv).
+  // TODO(nareshmodi): Is it possible there is no int32/int64 CPU kernel for
+  // an op, but there is a GPU kernel?
+  if (!op->Inputs().empty() && all_inputs_eligible_for_cpu_pinning) {
+    VLOG(1) << "Forcing op " << op->Name()
+            << " to be on the CPU since all input tensors have an "
+               "int32/int64 dtype, and are small (less than 64 elements).";
+    op->SetDevice(ctx->HostCPU());
+  }
+
+  return Status::OK();
+}
+}  // namespace
+
+Status EagerExecute(EagerOperation* op,
+                    gtl::InlinedVector<TensorHandle*, 2>* retvals,
+                    int* num_retvals) {
+  TF_RETURN_IF_ERROR(MaybeUpdateOpDevice(op));
+
   bool op_is_local = IsLocal(op->EagerContext(), op->Device());
 
   if (op_is_local) {
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index fb5442b646..e601aa376f 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -631,6 +631,34 @@ class TFETest(test_util.TensorFlowTestCase):
     for t in tensors:
       self.assertIsInstance(t, ops.EagerTensor)
 
+  def testSmallIntegerOpsForcedToCPU(self):
+    if not context.context().num_gpus():
+      self.skipTest('No GPUs found')
+
+    a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.int64)
+    b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.int64)
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op forced to CPU since all constants are integers and small.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:CPU:0')
+
+    a = array_ops.zeros((8, 10), dtype=dtypes.int64)
+    b = array_ops.ones((8, 10), dtype=dtypes.int64)
+
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op not forced to CPU since the tensors are larger than 64 elements.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0')
+
+    a = constant_op.constant((1, 2, 3, 4, 5), dtype=dtypes.float32)
+    b = constant_op.constant((2, 3, 4, 5, 6), dtype=dtypes.float32)
+    with context.device('gpu:0'):
+      c = a + b
+
+    # Op not forced to CPU since the constants are not integers.
+    self.assertEqual(c.device, '/job:localhost/replica:0/task:0/device:GPU:0')
 
 class SendRecvTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From f8c008aa1833eab6c5ef4523e1bff2f2769c8ac0 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Thu, 4 Oct 2018 16:32:54 -0700
Subject: [PATCH 0406/1085] Further fixes to test case

---
 .../python/kernel_tests/check_ops_test.py     | 75 +++++++++----------
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 222606348f..8b6c978de1 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -240,44 +240,6 @@ First 2 elements of y:
       out = array_ops.identity(larry)
     self.evaluate(out)
 
-  def test_error_message_eager(self):
-    expected_error_msg_full = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
-    expected_error_msg_default = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, ...
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, ..."""
-    expected_error_msg_short = r"""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, ...
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, ..."""
-    with context.eager_mode():
-      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_full):
-        check_ops.assert_none_equal(t, t, message="This is the error message.",
-                               summarize=10)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_full):
-        check_ops.assert_equal(t, t, message="This is the error message.",
-                               summarize=-1)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_default):
-        check_ops.assert_equal(t, t, message="This is the error message.")
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_short):
-        check_ops.assert_equal(t, t, message="This is the error message.",
-                               summarize=2)
-
-
 
 class AssertNoneEqualTest(test.TestCase):
 
@@ -340,6 +302,43 @@ class AssertNoneEqualTest(test.TestCase):
       x = check_ops.assert_none_equal(t1, t2)
       assert x is None
 
+  def test_error_message_eager(self):
+    expected_error_msg_full = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
+    expected_error_msg_default = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, ...
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, ..."""
+    expected_error_msg_short = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, ...
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, ..."""
+    with context.eager_mode():
+      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_none_equal(t, t, message="This is the error message.",
+                               summarize=10)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_equal(t, t, message="This is the error message.",
+                               summarize=-1)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_default):
+        check_ops.assert_equal(t, t, message="This is the error message.")
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_short):
+        check_ops.assert_equal(t, t, message="This is the error message.",
+                               summarize=2)
+
 
 class AssertAllCloseTest(test.TestCase):
 
-- 
GitLab


From 4a00f2fc6514ad5ee60ab0a9645863fdf263499f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 16:29:47 -0700
Subject: [PATCH 0407/1085] Add Chaos Free Network (CFN) cell.

The implementation is based on: https://openreview.net/pdf?id=S1dIzvclg.

PiperOrigin-RevId: 215824867
---
 .../rnn/python/kernel_tests/rnn_cell_test.py  |  65 +++++++++
 tensorflow/contrib/rnn/python/ops/rnn_cell.py | 129 ++++++++++++++++++
 2 files changed, 194 insertions(+)

diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 0a27200015..aa1d7d2b01 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -1120,6 +1120,71 @@ class RNNCellTest(test.TestCase):
             r"input size \(3\) must be divisible by number_of_groups \(2\)"):
           gcell(glstm_input, gcell_zero_state)
 
+  def testCFNCell(self):
+    with self.cached_session() as sess:
+      with variable_scope.variable_scope("root"):
+        x = array_ops.zeros([1, 2])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.CFNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.17188203, 0.17188203]])
+      with variable_scope.variable_scope("other"):
+        # Test CFN with input_size != num_units.
+        x = array_ops.zeros([1, 3])
+        m = array_ops.zeros([1, 2])
+        cell = contrib_rnn_cell.CFNCell(
+            units=2,
+            kernel_initializer=initializers.Constant(0.5))
+        g, _ = cell(x, m)
+        sess.run([variables.global_variables_initializer()])
+        res = sess.run([g], {
+            x.name: np.array([[1., 1., 1.]]),
+            m.name: np.array([[0.1, 0.1]])
+        })
+        # Smoke test
+        self.assertAllClose(res[0], [[0.15535763, 0.15535763]])
+
+  def testCFNCellEndToEnd(self):
+    with self.cached_session() as sess:
+      input_shape = 10
+      output_shape = 5
+      timestep = 4
+      batch = 100
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = utils.to_categorical(y_train)
+      cell = contrib_rnn_cell.CFNCell(output_shape)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape))
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape))
+
+      outputs, state = rnn.dynamic_rnn(
+          cell, inputs, dtype=dtypes.float32)
+      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
+      self.assertEqual(state.shape.as_list(), [None, output_shape])
+      loss = losses.softmax_cross_entropy(predict, state)
+      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      _, outputs, state = sess.run(
+          [train_op, outputs, state], {inputs: x_train, predict: y_train})
+
+      self.assertEqual(len(outputs), batch)
+      self.assertEqual(len(state), batch)
+
   def testMinimalRNNCell(self):
     with self.cached_session() as sess:
       with variable_scope.variable_scope(
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 59a61af7b3..78cea8feb4 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -3510,3 +3510,132 @@ class MinimalRNNCell(rnn_cell_impl.LayerRNNCell):
 
     new_h = u * state + (1 - u) * feedforward
     return new_h, new_h
+
+
+class CFNCell(rnn_cell_impl.LayerRNNCell):
+  """Chaos Free Network cell.
+
+  The implementation is based on:
+
+    https://openreview.net/pdf?id=S1dIzvclg
+
+  Thomas Laurent, James von Brecht.
+  "A recurrent neural network without chaos." ICLR, 2017.
+
+  A CFN cell first projects the input to the hidden space. The hidden state
+  goes through a contractive mapping. The new hidden state is then calcuated
+  as a linear combination of the projected input and the contracted previous
+  hidden state, using decoupled input and forget gates.
+  """
+
+  def __init__(self,
+               units,
+               activation="tanh",
+               kernel_initializer="glorot_uniform",
+               bias_initializer="ones",
+               name=None,
+               dtype=None,
+               **kwargs):
+    """Initialize the parameters for a CFN cell.
+
+    Args:
+      units: int, The number of units in the CFN cell.
+      activation: Nonlinearity to use. Default: `tanh`.
+      kernel_initializer: Initializer for the `kernel` weights
+        matrix. Default: `glorot_uniform`.
+      bias_initializer: The initializer to use for the bias in the
+        gates. Default: `ones`.
+      name: String, the name of the cell.
+      dtype: Default dtype of the cell.
+      **kwargs: Dict, keyword named properties for common cell attributes.
+    """
+    super(CFNCell, self).__init__(name=name, dtype=dtype, **kwargs)
+
+    # Inputs must be 2-dimensional.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self.units = units
+    self.activation = activations.get(activation)
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.bias_initializer = initializers.get(bias_initializer)
+
+  @property
+  def state_size(self):
+    return self.units
+
+  @property
+  def output_size(self):
+    return self.units
+
+  def build(self, inputs_shape):
+    if inputs_shape[-1] is None:
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
+                       % str(inputs_shape))
+
+    input_size = inputs_shape[-1]
+    # pylint: disable=protected-access
+    # `self.kernel` contains V_{\theta}, V_{\eta}, W.
+    # `self.recurrent_kernel` contains U_{\theta}, U_{\eta}.
+    # `self.bias` contains b_{\theta}, b_{\eta}.
+    self.kernel = self.add_weight(
+        shape=[input_size, 3 * self.units],
+        name=rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        initializer=self.kernel_initializer)
+    self.recurrent_kernel = self.add_weight(
+        shape=[self.units, 2 * self.units],
+        name="recurrent_%s" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
+        initializer=self.kernel_initializer)
+    self.bias = self.add_weight(
+        shape=[2 * self.units],
+        name=rnn_cell_impl._BIAS_VARIABLE_NAME,
+        initializer=self.bias_initializer)
+    # pylint: enable=protected-access
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of CFN.
+
+    Args:
+      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
+      state: state Tensor, must be 2-D, `[batch, state_size]`.
+
+    Returns:
+      A tuple containing:
+
+      - Output: A `2-D` tensor with shape `[batch_size, state_size]`.
+      - New state: A `2-D` tensor with shape `[batch_size, state_size]`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    input_size = inputs.get_shape()[-1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    # The variable names u, v, w, b are consistent with the notations in the
+    # original paper.
+    v, w = array_ops.split(
+        value=self.kernel,
+        num_or_size_splits=[2 * self.units, self.units],
+        axis=1)
+    u = self.recurrent_kernel
+    b = self.bias
+
+    gates = math_ops.matmul(state, u) + math_ops.matmul(inputs, v)
+    gates = nn_ops.bias_add(gates, b)
+    gates = math_ops.sigmoid(gates)
+    theta, eta = array_ops.split(value=gates,
+                                 num_or_size_splits=2,
+                                 axis=1)
+
+    proj_input = math_ops.matmul(inputs, w)
+
+    # The input gate is (1 - eta), which is different from the original paper.
+    # This is for the propose of initialization. With the default
+    # bias_initializer `ones`, the input gate is initialized to a small number.
+    new_h = theta * self.activation(state) + (1 - eta) * self.activation(
+        proj_input)
+
+    return new_h, new_h
-- 
GitLab


From a9bb7d32dd073a1362045c21da23c99fef9fb2b5 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 4 Oct 2018 18:36:18 -0700
Subject: [PATCH 0408/1085] Add xla library into contrib_py (#22753)

PiperOrigin-RevId: 215774158
---
 tensorflow/contrib/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index fbe0573d5d..fa06d351d4 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -29,6 +29,7 @@ py_library(
         "//tensorflow/contrib/cluster_resolver:cluster_resolver_py",
         "//tensorflow/contrib/coder:coder_py",
         "//tensorflow/contrib/compiler:compiler_py",
+        "//tensorflow/contrib/compiler:xla",
         "//tensorflow/contrib/autograph",
         "//tensorflow/contrib/constrained_optimization",
         "//tensorflow/contrib/copy_graph:copy_graph_py",
-- 
GitLab


From 83ff640fa5026b8bd3cb9c2ceff9e99e8e03823a Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Thu, 4 Oct 2018 18:46:53 -0700
Subject: [PATCH 0409/1085] [XLA:GPU] Fix old-ptxas-version detection logic.

This was completely broken for CUDA versions > 9 and resulted in spurious warnings.

Reported in #22706#issuecomment-426861394 -- thank you!

PiperOrigin-RevId: 215841354
---
 tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index b4ae2e42c7..50e47542c4 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -401,7 +401,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) {
            "prefers >= 9.2.88).  Compilation of XLA kernels below will likely "
            "fail.\n\nYou do not need to update CUDA; cherry-picking the ptxas "
            "binary is sufficient.";
-  } else if ((vmaj < 9 || vmin < 2 || vdot < 88)) {
+  } else if (std::make_tuple(vmaj, vmin, vdot) < std::make_tuple(9, 2, 88)) {
     LOG(WARNING)
         << "*** WARNING *** You are using ptxas " << vmaj << "." << vmin << "."
         << vdot
-- 
GitLab


From 5608454c31bb298096bb6aa463b33baa2fa68f08 Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Thu, 4 Oct 2018 19:07:44 -0700
Subject: [PATCH 0410/1085] Add 'device' property to TPUMirroredVariable, so
 tf.train.init_from_checkpoint can be supported.

PiperOrigin-RevId: 215843249
---
 tensorflow/contrib/distribute/python/values.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 18ceba42c2..0dd78ba185 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -571,6 +571,10 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
           ValueError("Device %s not found in %s (current device %s)" %
                      (device, self._index.keys(), device_util.current())), e)
 
+  @property
+  def device(self):
+    return self._get().device
+
   # The arguments to update() are automatically unwrapped so the update()
   # function would normally see regular variables, not MirroredVariables.
   # However, the update function can still operate on wrapped MirroredVariables
-- 
GitLab


From f4cef34fad7b00a3b1f288ff5c95001c5b83c1f8 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 4 Oct 2018 19:26:26 -0700
Subject: [PATCH 0411/1085] Fix regression that caused xrange to be ignored.

PiperOrigin-RevId: 215844450
---
 tensorflow/python/autograph/operators/py_builtins.py          | 1 +
 tensorflow/python/autograph/pyct/inspect_utils.py             | 4 ++++
 .../python/autograph/pyct/static_analysis/live_values.py      | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py
index 91a2a22cc2..70e59272a9 100644
--- a/tensorflow/python/autograph/operators/py_builtins.py
+++ b/tensorflow/python/autograph/operators/py_builtins.py
@@ -228,5 +228,6 @@ BUILTIN_FUINCTIONS_MAP = {
     'len': len_,
     'print': print_,
     'range': range_,
+    # TODO(mdan): This might make more sense as tf.data.range.
     'xrange': range_,
 }
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index eef74599a7..1416988ea3 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -30,10 +30,14 @@ from tensorflow.python.util import tf_inspect
 
 
 def isbuiltin(f):
+  """Returns True if the argument is a built-in function."""
   # Note these return false for isinstance(f, types.BuiltinFunctionType) so we
   # need to specifically check for them.
   if f in (range, int, float):
     return True
+  if six.PY2:
+    if f in (xrange,):
+      return True
   if isinstance(f, types.BuiltinFunctionType):
     return True
   if tf_inspect.isbuiltin(f):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 36b9e7074d..4ceddce53b 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -24,6 +24,7 @@ from __future__ import division
 from __future__ import print_function
 
 import gast
+import six
 
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import transformer
@@ -35,6 +36,9 @@ from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 # These symbols are legal in Python, but don't appear in the namespace.
 _SPECIAL_SYMBOLS = {'range': range, 'print': print}
 
+if six.PY2:
+  _SPECIAL_SYMBOLS['xrange'] = xrange
+
 
 class LiveValueResolver(transformer.Base):
   """Annotates nodes with live values."""
-- 
GitLab


From 176e6993c5e11631389e05f82b3d71a3a367e392 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Thu, 4 Oct 2018 21:25:33 -0700
Subject: [PATCH 0412/1085] Fix link in eager notebook stub.

PiperOrigin-RevId: 215853105
---
 .../notebooks/automatic_differentiation.ipynb |    2 +-
 .../performance/xla/operation_semantics.md    | 2426 -----------------
 2 files changed, 1 insertion(+), 2427 deletions(-)
 delete mode 100644 tensorflow/docs_src/performance/xla/operation_semantics.md

diff --git a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
index 8fae622e12..446e340118 100644
--- a/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
+++ b/tensorflow/contrib/eager/python/examples/notebooks/automatic_differentiation.ipynb
@@ -65,7 +65,7 @@
         "\u003ca target=\"_blank\"  href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\n",
         "    \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
         "\u003c/td\u003e\u003ctd\u003e\n",
-        "\u003ca target=\"_blank\"  href=\"https://github.com/tensorflow/tensorflow/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e"
+        "\u003ca target=\"_blank\"  href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/eager/automatic_differentiation.ipynb\"\u003e\u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\u003c/td\u003e\u003c/table\u003e"
       ]
     }
   ],
diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md
deleted file mode 100644
index 96d269bec4..0000000000
--- a/tensorflow/docs_src/performance/xla/operation_semantics.md
+++ /dev/null
@@ -1,2426 +0,0 @@
-# Operation Semantics
-
-The following describes the semantics of operations defined in the
-[`XlaBuilder`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-interface. Typically, these operations map one-to-one to operations defined in
-the RPC interface in
-[`xla_data.proto`](https://www.tensorflow.org/code/tensorflow/compiler/xla/xla_data.proto).
-
-A note on nomenclature: the generalized data type XLA deals with is an
-N-dimensional array holding elements of some uniform type (such as 32-bit
-float). Throughout the documentation, *array* is used to denote an
-arbitrary-dimensional array. For convenience, special cases have more specific
-and familiar names; for example a *vector* is a 1-dimensional array and a
-*matrix* is a 2-dimensional array.
-
-## AllToAll
-
-See also
-[`XlaBuilder::AllToAll`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Alltoall is a collective operation that sends data from all cores to all cores.
-It has two phases:
-
-1.  the scatter phase. On each core, the operand is split into `split_count`
-    number of blocks along the `split_dimensions`, and the blocks are scattered
-    to all cores, e.g., the ith block is send to the ith core.
-2.  the gather phase. Each core concatenates the received blocks along the
-    `concat_dimension`.
-
-The participating cores can be configured by:
-
--   `replica_groups`: each ReplicaGroup contains a list of replica id. If empty,
-    all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
-    applied within subgroups in the specified order. For example, replica
-    groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
-    1, 2, 3, and in the gather phase, the received blocks will be concatenated
-    in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
-    5, 0, and the concatenation order is 4, 5, 0.
-
-Prerequisites:
-
--   The dimension size of the operand on the split_dimension is divisible by
-    split_count.
--   The operand's shape is not tuple.
-
-<b> `AllToAll(operand, split_dimension, concat_dimension, split_count,
-replica_groups)` </b>
-
-
-| Arguments          | Type                  | Semantics                       |
-| ------------------ | --------------------- | ------------------------------- |
-| `operand`          | `XlaOp`               | n dimensional input array       |
-| `split_dimension`  | `int64`               | A value in the interval `[0,    |
-:                    :                       : n)` that names the dimension    :
-:                    :                       : along which the operand is      :
-:                    :                       : split                           :
-| `concat_dimension` | `int64`               | a value in the interval `[0,    |
-:                    :                       : n)` that names the dimension    :
-:                    :                       : along which the split blocks    :
-:                    :                       : are concatenated                :
-| `split_count`      | `int64`               | the number of cores that        |
-:                    :                       : participate this operation. If  :
-:                    :                       : `replica_groups` is empty, this :
-:                    :                       : should be the number of         :
-:                    :                       : replicas; otherwise, this       :
-:                    :                       : should be equal to the number   :
-:                    :                       : of replicas in each group.      :
-| `replica_groups`   | `ReplicaGroup` vector | each group contains a list of   |
-:                    :                       : replica id.                     :
-
-Below shows an example of Alltoall.
-
-```
-XlaBuilder b("alltoall");
-auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
-AllToAll(x, /*split_dimension=*/1, /*concat_dimension=*/0, /*split_count=*/4);
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/xla/ops_alltoall.png">
-</div>
-
-In this example, there are 4 cores participating the Alltoall. On each core, the
-operand is split into 4 parts along dimension 0, so each part has shape
-f32[4,4]. The 4 parts are scattered to all cores. Then each core concatenates
-the received parts along dimension 1, in the order or core 0-4. So the output on
-each core has shape f32[16,4].
-
-## BatchNormGrad
-
-See also
-[`XlaBuilder::BatchNormGrad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Calculates gradients of batch norm.
-
-<b> `BatchNormGrad(operand, scale, mean, variance, grad_output, epsilon, feature_index)` </b>
-
-| Arguments       | Type                    | Semantics                        |
-| --------------- | ----------------------- | -------------------------------- |
-| `operand`       | `XlaOp`                 | n dimensional array to be        |
-:                 :                         : normalized (x)                   :
-| `scale`         | `XlaOp`                 | 1 dimensional array              |
-:                 :                         : (\\(\gamma\\))                   :
-| `mean`          | `XlaOp`                 | 1 dimensional array (\\(\mu\\))  |
-| `variance`      | `XlaOp`                 | 1 dimensional array              |
-:                 :                         : (\\(\sigma^2\\))                 :
-| `grad_output`   | `XlaOp`                 | Gradients passed to              |
-:                 :                         : `BatchNormTraining`              :
-:                 :                         : (\\( \nabla y\\))                :
-| `epsilon`       | `float`                 | Epsilon value (\\(\epsilon\\))   |
-| `feature_index` | `int64`                 | Index to feature dimension in    |
-:                 :                         : `operand`                        :
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the gradients with
-respect to `operand`, `offset` and `scale` across all the other dimensions. The
-`feature_index` must be a valid index for the feature dimension in `operand`.
-
-The three gradients are defined by the following formulas (assuming a
-4-dimensional tensor as `operand` and with feature dimension index \\(l\\),
-batch size `m` and spatial sizes `w` and `h`):
-
-\\[ \begin{split} c_l&=
-\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h
-\left( \nabla y_{ijkl} \frac{x_{ijkl} - \mu_l}{\sigma^2_l+\epsilon} \right)
-\\\\
-\nabla x_{ijkl} &= \frac{\gamma_{l}}{\sqrt{\sigma^2_{l}+\epsilon}}
-\left( \nabla y_{ijkl} - \mathrm{mean}(\nabla y) - c_l (x_{ijkl} - \mu_{l})
-\right)
-\\\\
-\nabla \gamma_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \left( \nabla y_{ijkl}
-\frac{x_{ijkl} - \mu_l}{\sqrt{\sigma^2_{l}+\epsilon}} \right)
-\\\\\
-\nabla \beta_l &= \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl}
-\end{split} \\]
-
-The inputs `mean` and `variance` represent moments value
-across batch and spatial dimensions.
-
-The output type is a tuple of three handles:
-
-| Outputs        | Type                    | Semantics                         |
-| -------------  | ----------------------- | --------------------------------- |
-| `grad_operand` | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `operand` (\\( \nabla x\\))       :
-| `grad_scale`   | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `scale` (\\( \nabla \gamma\\))    :
-| `grad_offset`  | `XlaOp`                 | gradient with respect to input    |
-:                :                         : `offset`(\\( \nabla \beta\\))     :
-
-## BatchNormInference
-
-See also
-[`XlaBuilder::BatchNormInference`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [the original batch normalization paper](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormInference(operand, scale, offset, mean, variance, epsilon, feature_index)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | ---------------------------------------
-`operand`       | `XlaOp` | n dimensional array to be normalized
-`scale`         | `XlaOp` | 1 dimensional array
-`offset`        | `XlaOp` | 1 dimensional array
-`mean`          | `XlaOp` | 1 dimensional array
-`variance`      | `XlaOp` | 1 dimensional array
-`epsilon`       | `float` | Epsilon value
-`feature_index` | `int64` | Index to feature dimension in `operand`
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and uses the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-`BatchNormInference`  is equivalent to calling `BatchNormTraining` without
-computing `mean` and `variance` for each batch. It uses the input `mean` and
-`variance` instead as estimated values. The purpose of this op is to reduce
-latency in inference, hence the name `BatchNormInference`.
-
-The output is an n-dimensional, normalized array with the same shape as input
-`operand`.
-
-## BatchNormTraining
-
-See also
-[`XlaBuilder::BatchNormTraining`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and [`the original batch normalization paper`](https://arxiv.org/abs/1502.03167)
-for a detailed description of the algorithm.
-
-Normalizes an array across batch and spatial dimensions.
-
-<b> `BatchNormTraining(operand, scale, offset, epsilon, feature_index)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | ----------------------------------------
-`operand`       | `XlaOp` | n dimensional array to be normalized (x)
-`scale`         | `XlaOp` | 1 dimensional array (\\(\gamma\\))
-`offset`        | `XlaOp` | 1 dimensional array (\\(\beta\\))
-`epsilon`       | `float` | Epsilon value (\\(\epsilon\\))
-`feature_index` | `int64` | Index to feature dimension in `operand`
-
-For each feature in the feature dimension (`feature_index` is the index for the
-feature dimension in `operand`), the operation calculates the mean and variance
-across all the other dimensions and uses the mean and variance to normalize each
-element in `operand`. The `feature_index` must be a valid index for the feature
-dimension in `operand`.
-
-The algorithm goes as follows for each batch in `operand` \\(x\\) that
-contains `m` elements with `w` and `h` as the size of spatial dimensions
-(assuming `operand` is an 4 dimensional array):
-
-- Calculates batch mean \\(\mu_l\\) for each feature `l` in feature dimension:
-\\(\mu_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h x_{ijkl}\\)
-
-- Calculates batch variance \\(\sigma^2_l\\):
-\\(\sigma^2_l=\frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (x_{ijkl} - \mu_l)^2\\)
-
-- Normalizes, scales and shifts:
-\\(y_{ijkl}=\frac{\gamma_l(x_{ijkl}-\mu_l)}{\sqrt[2]{\sigma^2_l+\epsilon}}+\beta_l\\)
-
-The epsilon value, usually a small number, is added to avoid divide-by-zero errors.
-
-The output type is a tuple of three `XlaOp`s:
-
-| Outputs      | Type                    | Semantics                            |
-| ------------ | ----------------------- | -------------------------------------|
-| `output`     | `XlaOp`                 | n dimensional array with the same    |
-:              :                         : shape as input `operand` (y)         :
-| `batch_mean` | `XlaOp`                 | 1 dimensional array (\\(\mu\\))      |
-| `batch_var`  | `XlaOp`                 | 1 dimensional array (\\(\sigma^2\\)) |
-
-The `batch_mean` and `batch_var` are moments calculated across the batch and
-spatial dimensions using the formulas above.
-
-## BitcastConvertType
-
-See also
-[`XlaBuilder::BitcastConvertType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Similar to a `tf.bitcast` in TensorFlow, performs an element-wise bitcast
-operation from a data shape to a target shape. The dimensions must match, and
-the conversion is an element-wise one; e.g. `s32` elements become `f32` elements
-via bitcast routine. Bitcast is implemented as a low-level cast, so machines
-with different floating-point representations will give different results.
-
-<b> `BitcastConvertType(operand, new_element_type)` </b>
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`operand`          | `XlaOp`         | array of type T with dims D
-`new_element_type` | `PrimitiveType` | type U
-
-The dimensions of the operand and the target shape must match. The bit-width of
-the source and destination element types must be equal. The source
-and destination element types must not be tuples.
-
-## Broadcast
-
-See also
-[`XlaBuilder::Broadcast`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Adds dimensions to an array by duplicating the data in the array.
-
-<b> `Broadcast(operand, broadcast_sizes)` </b>
-
-Arguments         | Type                | Semantics
------------------ | ------------------- | -------------------------------
-`operand`         | `XlaOp`             | The array to duplicate
-`broadcast_sizes` | `ArraySlice<int64>` | The sizes of the new dimensions
-
-The new dimensions are inserted on the left, i.e. if `broadcast_sizes` has
-values `{a0, ..., aN}` and the operand shape has dimensions `{b0, ..., bM}` then
-the shape of the output has dimensions `{a0, ..., aN, b0, ..., bM}`.
-
-The new dimensions index into copies of the operand, i.e.
-
-```
-output[i0, ..., iN, j0, ..., jM] = operand[j0, ..., jM]
-```
-
-For example, if `operand` is a scalar `f32` with value `2.0f`, and
-`broadcast_sizes` is `{2, 3}`, then the result will be an array with shape
-`f32[2, 3]` and all the values in the result will be `2.0f`.
-
-## Call
-
-See also
-[`XlaBuilder::Call`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Invokes a computation with the given arguments.
-
-<b> `Call(computation, args...)` </b>
-
-| Arguments     | Type                   | Semantics                           |
-| ------------- | ---------------------- | ----------------------------------- |
-| `computation` | `XlaComputation`       | computation of type `T_0, T_1, ..., |
-:               :                        : T_N -> S` with N parameters of      :
-:               :                        : arbitrary type                      :
-| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type       |
-
-The arity and types of the `args` must match the parameters of the
-`computation`. It is allowed to have no `args`.
-
-## Clamp
-
-See also
-[`XlaBuilder::Clamp`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Clamps an operand to within the range between a minimum and maximum value.
-
-<b> `Clamp(min, operand, max)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------
-`min`     | `XlaOp` | array of type T
-`operand` | `XlaOp` | array of type T
-`max`     | `XlaOp` | array of type T
-
-Given an operand and minimum and maximum values, returns the operand if it is in
-the range between the minimum and maximum, else returns the minimum value if the
-operand is below this range or the maximum value if the operand is above this
-range.  That is, `clamp(a, x, b) =  min(max(a, x), b)`.
-
-All three arrays must be the same shape. Alternatively, as a restricted form of
-[broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`.
-
-Example with scalar `min` and `max`:
-
-```
-let operand: s32[3] = {-1, 5, 9};
-let min: s32 = 0;
-let max: s32 = 6;
-==>
-Clamp(min, operand, max) = s32[3]{0, 5, 6};
-```
-
-## Collapse
-
-See also
-[`XlaBuilder::Collapse`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and the `tf.reshape` operation.
-
-Collapses dimensions of an array into one dimension.
-
-<b> `Collapse(operand, dimensions)` </b>
-
-Arguments    | Type           | Semantics
------------- | -------------- | -----------------------------------------------
-`operand`    | `XlaOp`        | array of type T
-`dimensions` | `int64` vector | in-order, consecutive subset of T's dimensions.
-
-Collapse replaces the given subset of the operand's dimensions by a single
-dimension. The input arguments are an arbitrary array of type T and a
-compile-time-constant vector of dimension indices. The dimension indices must be
-an in-order (low to high dimension numbers), consecutive subset of T's
-dimensions. Thus, {0, 1, 2}, {0, 1}, or {1, 2} are all valid dimension sets, but
-{1, 0} or {0, 2} are not. They are replaced by a single new dimension, in the
-same position in the dimension sequence as those they replace, with the new
-dimension size equal to the product of original dimension sizes. The lowest
-dimension number in `dimensions` is the slowest varying dimension (most major)
-in the loop nest which collapses these dimension, and the highest dimension
-number is fastest varying (most minor). See the `tf.reshape` operator
-if more general collapse ordering is needed.
-
-For example, let v be an array of 24 elements:
-
-```
-let v = f32[4x2x3] {{{10, 11, 12},  {15, 16, 17}},
-                    {{20, 21, 22},  {25, 26, 27}},
-                    {{30, 31, 32},  {35, 36, 37}},
-                    {{40, 41, 42},  {45, 46, 47}}};
-
-// Collapse to a single dimension, leaving one dimension.
-let v012 = Collapse(v, {0,1,2});
-then v012 == f32[24] {10, 11, 12, 15, 16, 17,
-                      20, 21, 22, 25, 26, 27,
-                      30, 31, 32, 35, 36, 37,
-                      40, 41, 42, 45, 46, 47};
-
-// Collapse the two lower dimensions, leaving two dimensions.
-let v01 = Collapse(v, {0,1});
-then v01 == f32[4x6] {{10, 11, 12, 15, 16, 17},
-                      {20, 21, 22, 25, 26, 27},
-                      {30, 31, 32, 35, 36, 37},
-                      {40, 41, 42, 45, 46, 47}};
-
-// Collapse the two higher dimensions, leaving two dimensions.
-let v12 = Collapse(v, {1,2});
-then v12 == f32[8x3] {{10, 11, 12},
-                      {15, 16, 17},
-                      {20, 21, 22},
-                      {25, 26, 27},
-                      {30, 31, 32},
-                      {35, 36, 37},
-                      {40, 41, 42},
-                      {45, 46, 47}};
-
-```
-
-## Concatenate
-
-See also
-[`XlaBuilder::ConcatInDim`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Concatenate composes an array from multiple array operands. The array is of the
-same rank as each of the input array operands (which must be of the same rank as
-each other) and contains the arguments in the order that they were specified.
-
-<b> `Concatenate(operands..., dimension)` </b>
-
-| Arguments   | Type                  | Semantics                              |
-| ----------- | --------------------- | -------------------------------------- |
-| `operands`  | sequence of N `XlaOp` | N arrays of type T with dimensions     |
-:             :                       : [L0, L1, ...]. Requires N >= 1.        :
-| `dimension` | `int64`               | A value in the interval `[0, N)` that  |
-:             :                       : names the dimension to be concatenated :
-:             :                       : between the `operands`.                :
-
-With the exception of `dimension` all dimensions must be the same. This is
-because XLA does not support "ragged" arrays. Also note that rank-0 values
-cannot be concatenated (as it's impossible to name the dimension along which the
-concatenation occurs).
-
-1-dimensional example:
-
-```
-Concat({{2, 3}, {4, 5}, {6, 7}}, 0)
->>> {2, 3, 4, 5, 6, 7}
-```
-
-2-dimensional example:
-
-```
-let a = {
-  {1, 2},
-  {3, 4},
-  {5, 6},
-};
-let b = {
-  {7, 8},
-};
-Concat({a, b}, 0)
->>> {
-  {1, 2},
-  {3, 4},
-  {5, 6},
-  {7, 8},
-}
-```
-
-Diagram:
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_concatenate.png">
-</div>
-
-## Conditional
-
-See also
-[`XlaBuilder::Conditional`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Conditional(pred, true_operand, true_computation, false_operand,
-false_computation)` </b>
-
-Arguments           | Type             | Semantics
-------------------- | ---------------- | ---------------------------------
-`pred`              | `XlaOp`          | Scalar of type `PRED`
-`true_operand`      | `XlaOp`          | Argument of type `T_0`
-`true_computation`  | `XlaComputation` | XlaComputation of type `T_0 -> S`
-`false_operand`     | `XlaOp`          | Argument of type `T_1`
-`false_computation` | `XlaComputation` | XlaComputation of type `T_1 -> S`
-
-Executes `true_computation` if `pred` is `true`, `false_computation` if `pred`
-is `false`, and returns the result.
-
-The `true_computation` must take in a single argument of type `T_0` and will be
-invoked with `true_operand` which must be of the same type. The
-`false_computation` must take in a single argument of type `T_1` and will be
-invoked with `false_operand` which must be of the same type. The type of the
-returned value of `true_computation` and `false_computation` must be the same.
-
-Note that only one of `true_computation` and `false_computation` will be
-executed depending on the value of `pred`.
-
-## Conv (convolution)
-
-See also
-[`XlaBuilder::Conv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-As ConvWithGeneralPadding, but the padding is specified in a short-hand way as
-either SAME or VALID. SAME padding pads the input (`lhs`) with zeroes so that
-the output has the same shape as the input when not taking striding into
-account. VALID padding simply means no padding.
-
-## ConvWithGeneralPadding (convolution)
-
-See also
-[`XlaBuilder::ConvWithGeneralPadding`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Computes a convolution of the kind used in neural networks. Here, a convolution
-can be thought of as a n-dimensional window moving across a n-dimensional base
-area and a computation is performed for each possible position of the window.
-
-| Arguments             | Type                 | Semantics                     |
-| --------------------- | -------------------- | ----------------------------- |
-| `lhs`                 | `XlaOp`              | rank n+2 array of inputs      |
-| `rhs`                 | `XlaOp`              | rank n+2 array of kernel      |
-:                       :                      : weights                       :
-| `window_strides`      | `ArraySlice<int64>`  | n-d array of kernel strides   |
-| `padding`             | `ArraySlice<         | n-d array of (low, high)      |
-:                       : pair<int64, int64>>` : padding                       :
-| `lhs_dilation`        | `ArraySlice<int64>`  | n-d lhs dilation factor array |
-| `rhs_dilation`        | `ArraySlice<int64>`  | n-d rhs dilation factor array |
-| `feature_group_count` | int64                | the number of feature groups  |
-
-Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2
-array describing the base area. This is called the input, even though of course
-the rhs is also an input. In a neural network, these are the input activations.
-The n+2 dimensions are, in this order:
-
-*   `batch`: Each coordinate in this dimension represents an independent input
-    for which convolution is carried out.
-*   `z/depth/features`: Each (y,x) position in the base area has a vector
-    associated to it, which goes into this dimension.
-*   `spatial_dims`: Describes the `n` spatial dimensions that define the base
-    area that the window moves across.
-
-The `rhs` argument is a rank n+2 array describing the convolutional
-filter/kernel/window. The dimensions are, in this order:
-
-*   `output-z`: The `z` dimension of the output.
-*   `input-z`: The size of this dimension times `feature_group_count` should
-    equal the size of the `z` dimension in lhs.
-*   `spatial_dims`: Describes the `n` spatial dimensions that define the n-d
-    window that moves across the base area.
-
-The `window_strides` argument specifies the stride of the convolutional window
-in the spatial dimensions. For example, if the stride in the first spatial
-dimension is 3, then the window can only be placed at coordinates where the
-first spatial index is divisible by 3.
-
-The `padding` argument specifies the amount of zero padding to be applied to the
-base area. The amount of padding can be negative -- the absolute value of
-negative padding indicates the number of elements to remove from the specified
-dimension before doing the convolution. `padding[0]` specifies the padding for
-dimension `y` and `padding[1]` specifies the padding for dimension `x`. Each
-pair has the low padding as the first element and the high padding as the second
-element. The low padding is applied in the direction of lower indices while the
-high padding is applied in the direction of higher indices. For example, if
-`padding[1]` is `(2,3)` then there will be a padding by 2 zeroes on the left and
-by 3 zeroes on the right in the second spatial dimension. Using padding is
-equivalent to inserting those same zero values into the input (`lhs`) before
-doing the convolution.
-
-The `lhs_dilation` and `rhs_dilation` arguments specify the dilation factor to
-be applied to the lhs and rhs, respectively, in each spatial dimension. If the
-dilation factor in a spatial dimension is d, then d-1 holes are implicitly
-placed between each of the entries in that dimension, increasing the size of the
-array. The holes are filled with a no-op value, which for convolution means
-zeroes.
-
-Dilation of the rhs is also called atrous convolution. For more details, see
-`tf.nn.atrous_conv2d`. Dilation of the lhs is also called transposed
-convolution. For more details, see `tf.nn.conv2d_transpose`.
-
-The `feature_group_count` argument (default value 1) can be used for grouped
-convolutions. `feature_group_count` needs to be a divisor of both the input and
-the output feature dimension. If `feature_group_count` is greater than 1, it
-means that conceptually the input and output feature dimension and the `rhs`
-output feature dimension are split evenly into `feature_group_count` many
-groups, each group consisting of a consecutive subsequence of features. The
-input feature dimension of `rhs` needs to be equal to the `lhs` input feature
-dimension divided by `feature_group_count` (so it already has the size of a
-group of input features). The i-th groups are used together to compute
-`feature_group_count` many separate convolutions. The results of these
-convolutions are concatenated together in the output feature dimension.
-
-For depthwise convolution the `feature_group_count` argument would be set to the
-input feature dimension, and the filter would be reshaped from
-`[filter_height, filter_width, in_channels, channel_multiplier]` to
-`[filter_height, filter_width, 1, in_channels * channel_multiplier]`. For more
-details, see `tf.nn.depthwise_conv2d`.
-
-The output shape has these dimensions, in this order:
-
-*   `batch`: Same size as `batch` on the input (`lhs`).
-*   `z`: Same size as `output-z` on the kernel (`rhs`).
-*   `spatial_dims`: One value for each valid placement of the convolutional
-    window.
-
-The valid placements of the convolutional window are determined by the strides
-and the size of the base area after padding.
-
-To describe what a convolution does, consider a 2d convolution, and pick some
-fixed `batch`, `z`, `y`, `x` coordinates in the output. Then `(y,x)` is a
-position of a corner of the window within the base area (e.g. the upper left
-corner, depending on how you interpret the spatial dimensions). We now have a 2d
-window, taken from the base area, where each 2d point is associated to a 1d
-vector, so we get a 3d box. From the convolutional kernel, since we fixed the
-output coordinate `z`, we also have a 3d box. The two boxes have the same
-dimensions, so we can take the sum of the element-wise products between the two
-boxes (similar to a dot product). That is the output value.
-
-Note that if `output-z` is e.g., 5, then each position of the window produces 5
-values in the output into the `z` dimension of the output. These values differ
-in what part of the convolutional kernel is used - there is a separate 3d box of
-values used for each `output-z` coordinate. So you could think of it as 5
-separate convolutions with a different filter for each of them.
-
-Here is pseudo-code for a 2d convolution with padding and striding:
-
-```
-for (b, oz, oy, ox) {  // output coordinates
-  value = 0;
-  for (iz, ky, kx) {  // kernel coordinates and input z
-    iy = oy*stride_y + ky - pad_low_y;
-    ix = ox*stride_x + kx - pad_low_x;
-    if ((iy, ix) inside the base area considered without padding) {
-      value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
-    }
-  }
-  output(b, oz, oy, ox) = value;
-}
-```
-
-## ConvertElementType
-
-See also
-[`XlaBuilder::ConvertElementType`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Similar to an element-wise `static_cast` in C++, performs an element-wise
-conversion operation from a data shape to a target shape. The dimensions must
-match, and the conversion is an element-wise one; e.g. `s32` elements become
-`f32` elements via an `s32`-to-`f32` conversion routine.
-
-<b> `ConvertElementType(operand, new_element_type)` </b>
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`operand`          | `XlaOp`         | array of type T with dims D
-`new_element_type` | `PrimitiveType` | type U
-
-The dimensions of the operand and the target shape must match. The source and
-destination element types must not be tuples.
-
-A conversion such as `T=s32` to `U=f32` will perform a normalizing int-to-float
-conversion routine such as round-to-nearest-even.
-
-> Note: The precise float-to-int and visa-versa conversions are currently
-> unspecified, but may become additional arguments to the convert operation in
-> the future.  Not all possible conversions have been implemented for all
->targets.
-
-```
-let a: s32[3] = {0, 1, 2};
-let b: f32[3] = convert(a, f32);
-then b == f32[3]{0.0, 1.0, 2.0}
-```
-
-## CrossReplicaSum
-
-See also
-[`XlaBuilder::CrossReplicaSum`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Computes a sum across replicas.
-
-<b> `CrossReplicaSum(operand)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | -----------------------------
-`operand` | `XlaOp` | Array to sum across replicas.
-| `replica_group_ids`    | `int64` vector | Group ID for each replica.      |
-
-The output shape is the same as the input shape. For example, if there are two
-replicas and the operand has the value `(1.0, 2.5)` and `(3.0, 5.25)`
-respectively on the two replicas, then the output value from this op will be
-`(4.0, 7.75)` on both replicas.
-
-`replica_group_ids` identifies the group ID of each replica. The group ID must
-either be empty (all replicas belong to a single group), or contain the same
-number of elements as the number of replicas. For example, if
-`replica_group_ids` = {0, 1, 2, 3, 0, 1, 2, 3} has eight replicas, there are
-four subgroups of replica IDs: {0, 4}, {1, 5}, {2, 6}, and {3, 7}. The size of
-each subgroup *must* be identical, so, for example, using:
-`replica_group_ids` = {0, 1, 2, 0} for four replicas is invalid.
-
-Computing the result of CrossReplicaSum requires having one input from each
-replica, so if one replica executes a CrossReplicaSum node more times than
-another, then the former replica will wait forever. Since the replicas are all
-running the same program, there are not a lot of ways for that to happen, but it
-is possible when a while loop's condition depends on data from infeed and the
-data that is infed causes the while loop to iterate more times on one replica
-than another.
-
-## CustomCall
-
-See also
-[`XlaBuilder::CustomCall`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Call a user-provided function within a computation.
-
-<b> `CustomCall(target_name, args..., shape)` </b>
-
-| Arguments     | Type                   | Semantics                         |
-| ------------- | ---------------------- | --------------------------------- |
-| `target_name` | `string`               | Name of the function. A call      |
-:               :                        : instruction will be emitted which :
-:               :                        : targets this symbol name.         :
-| `args`        | sequence of N `XlaOp`s | N arguments of arbitrary type,    |
-:               :                        : which will be passed to the       :
-:               :                        : function.                         :
-| `shape`       | `Shape`                | Output shape of the function      |
-
-The function signature is the same, regardless of the arity or type of args:
-
-```
-extern "C" void target_name(void* out, void** in);
-```
-
-For example, if CustomCall is used as follows:
-
-```
-let x = f32[2] {1,2};
-let y = f32[2x3] {{10, 20, 30}, {40, 50, 60}};
-
-CustomCall("myfunc", {x, y}, f32[3x3])
-```
-
-Here is an example of an implementation of `myfunc`:
-
-```
-extern "C" void myfunc(void* out, void** in) {
-  float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
-  float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
-  EXPECT_EQ(1, x[0]);
-  EXPECT_EQ(2, x[1]);
-  EXPECT_EQ(10, y[0][0]);
-  EXPECT_EQ(20, y[0][1]);
-  EXPECT_EQ(30, y[0][2]);
-  EXPECT_EQ(40, y[1][0]);
-  EXPECT_EQ(50, y[1][1]);
-  EXPECT_EQ(60, y[1][2]);
-  float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
-  z[0][0] = x[1] + y[1][0];
-  // ...
-}
-```
-
-The user-provided function must not have side-effects and its execution must be
-idempotent.
-
-> Note: The opaque nature of the user-provided function restricts optimization
-> opportunities for the compiler. Try to express your computation in terms of
-> native XLA ops whenever possible; only use CustomCall as a last resort.
-
-## Dot
-
-See also
-[`XlaBuilder::Dot`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Dot(lhs, rhs)` </b>
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------
-`lhs`     | `XlaOp` | array of type T
-`rhs`     | `XlaOp` | array of type T
-
-The exact semantics of this operation depend on the ranks of the operands:
-
-| Input                   | Output                | Semantics               |
-| ----------------------- | --------------------- | ----------------------- |
-| vector [n] `dot` vector | scalar                | vector dot product      |
-: [n]                     :                       :                         :
-| matrix [m x k] `dot`    | vector [m]            | matrix-vector           |
-: vector [k]              :                       : multiplication          :
-| matrix [m x k] `dot`    | matrix [m x n]        | matrix-matrix           |
-: matrix [k x n]          :                       : multiplication          :
-
-The operation performs sum of products over the last dimension of `lhs` and the
-one-before-last dimension of `rhs`. These are the "contracted" dimensions. The
-contracted dimensions of `lhs` and `rhs` must be of the same size. In practice,
-it can be used to perform dot products between vectors, vector/matrix
-multiplications or matrix/matrix multiplications.
-
-## DotGeneral
-
-See also
-[`XlaBuilder::DotGeneral`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `DotGeneral(lhs, rhs, dimension_numbers)` </b>
-
-Arguments           | Type                  | Semantics
-------------------- | --------------------- | ---------------
-`lhs`               | `XlaOp`               | array of type T
-`rhs`               | `XlaOp`               | array of type T
-`dimension_numbers` | `DotDimensionNumbers` | array of type T
-
-As Dot, but allows contracting and batch dimension numbers to be specified for
-both the 'lhs' and 'rhs'.
-
-| DotDimensionNumbers Fields | Type                    | Semantics
-| --------- | ----------------------- | ---------------
-| 'lhs_contracting_dimensions' | repeated int64 | 'lhs' contracting dimension numbers |
-| 'rhs_contracting_dimensions' | repeated int64 | 'rhs' contracting dimension numbers |
-| 'lhs_batch_dimensions' | repeated int64 | 'lhs' batch dimension numbers |
-| 'rhs_batch_dimensions' | repeated int64 | 'rhs' batch dimension numbers |
-
-DotGeneral performs the sum of products over contracting dimensions specified
-in 'dimension_numbers'.
-
-Associated contracting dimension numbers from the 'lhs' and 'rhs' do not need
-to be the same, but must be listed in the same order in both
-'lhs/rhs_contracting_dimensions' arrays and have the same dimension sizes.
-There must be exactly one contracting dimension on both 'lhs' and 'rhs'.
-
-Example with contracting dimension numbers:
-
-```
-lhs = { {1.0, 2.0, 3.0},
-        {4.0, 5.0, 6.0} }
-
-rhs = { {1.0, 1.0, 1.0},
-        {2.0, 2.0, 2.0} }
-
-DotDimensionNumbers dnums;
-dnums.add_lhs_contracting_dimensions(1);
-dnums.add_rhs_contracting_dimensions(1);
-
-DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
-                                 {15.0, 30.0} }
-```
-
-Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
-dimension number, must be listed in the same order in both arrays, must
-have the same dimension sizes, and must be ordered before contracting and
-non-contracting/non-batch dimension numbers.
-
-Example with batch dimension numbers (batch size 2, 2x2 matrices):
-
-```
-lhs = { { {1.0, 2.0},
-          {3.0, 4.0} },
-        { {5.0, 6.0},
-          {7.0, 8.0} } }
-
-rhs = { { {1.0, 0.0},
-          {0.0, 1.0} },
-        { {1.0, 0.0},
-          {0.0, 1.0} } }
-
-DotDimensionNumbers dnums;
-dnums.add_lhs_contracting_dimensions(2);
-dnums.add_rhs_contracting_dimensions(1);
-dnums.add_lhs_batch_dimensions(0);
-dnums.add_rhs_batch_dimensions(0);
-
-DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
-                                   {3.0, 4.0} },
-                                 { {5.0, 6.0},
-                                   {7.0, 8.0} } }
-```
-
-| Input                               | Output            | Semantics        |
-| ----------------------------------- | ----------------- | ---------------- |
-| [b0, m, k] `dot` [b0, k, n]         | [b0, m, n]        |  batch matmul    |
-| [b0, b1, m, k] `dot` [b0, b1, k, n] | [b0, b1, m, n]    |  batch matmul    |
-
-It follows that the resulting dimension number starts with the batch dimension,
-then the 'lhs' non-contracting/non-batch dimension, and finally the 'rhs'
-non-contracting/non-batch dimension.
-
-## DynamicSlice
-
-See also
-[`XlaBuilder::DynamicSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-DynamicSlice extracts a sub-array from the input array at dynamic
-`start_indices`. The size of the slice in each dimension is passed in
-`size_indices`, which specify the end point of exclusive slice intervals in each
-dimension: [start, start + size). The shape of `start_indices` must be rank ==
-1, with dimension size equal to the rank of `operand`.
-
-<b> `DynamicSlice(operand, start_indices, size_indices)` </b>
-
-| Arguments       | Type                | Semantics                           |
-| --------------- | ------------------- | ----------------------------------- |
-| `operand`       | `XlaOp`             | N dimensional array of type T       |
-| `start_indices` | `XlaOp`             | Rank 1 array of N integers          |
-:                 :                     : containing the starting indices of  :
-:                 :                     : the slice for each dimension. Value :
-:                 :                     : must be greater than or equal to    :
-:                 :                     : zero.                               :
-| `size_indices`  | `ArraySlice<int64>` | List of N integers containing the   |
-:                 :                     : slice size for each dimension. Each :
-:                 :                     : value must be strictly greater than :
-:                 :                     : zero, and start + size must be less :
-:                 :                     : than or equal to the size of the    :
-:                 :                     : dimension to avoid wrapping modulo  :
-:                 :                     : dimension size.                     :
-
-The effective slice indices are computed by applying the following
-transformation for each index `i` in `[1, N)` before performing the slice:
-
-```
-start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - size_indices[i])
-```
-
-This ensures that the extracted slice is always in-bounds with respect to the
-operand array. If the slice is in-bounds before the transformation is applied,
-the transformation has no effect.
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let s = {2}
-
-DynamicSlice(a, s, {2}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let s = {2, 1}
-
-DynamicSlice(b, s, {2, 2}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-## DynamicUpdateSlice
-
-See also
-[`XlaBuilder::DynamicUpdateSlice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-DynamicUpdateSlice generates a result which is the value of the input array
-`operand`, with a slice `update` overwritten at `start_indices`.
-The shape of `update` determines the shape of the sub-array of the result which
-is updated.
-The shape of `start_indices` must be rank == 1, with dimension size equal to
-the rank of `operand`.
-
-<b> `DynamicUpdateSlice(operand, update, start_indices)` </b>
-
-| Arguments       | Type    | Semantics                                        |
-| --------------- | ------- | ------------------------------------------------ |
-| `operand`       | `XlaOp` | N dimensional array of type T                    |
-| `update`        | `XlaOp` | N dimensional array of type T containing the     |
-:                 :         : slice update. Each dimension of update shape     :
-:                 :         : must be strictly greater than zero, and start +  :
-:                 :         : update must be less than or equal to the operand :
-:                 :         : size for each dimension to avoid generating      :
-:                 :         : out-of-bounds update indices.                    :
-| `start_indices` | `XlaOp` | Rank 1 array of N integers containing the        |
-:                 :         : starting indices of the slice for each           :
-:                 :         : dimension. Value must be greater than or equal   :
-:                 :         : to zero.                                         :
-
-The effective slice indices are computed by applying the following
-transformation for each index `i` in `[1, N)` before performing the slice:
-
-```
-start_indices[i] = clamp(start_indices[i], 0, operand.dimension_size[i] - update.dimension_size[i])
-```
-
-This ensures that the updated slice is always in-bounds with respect to the
-operand array. If the slice is in-bounds before the transformation is applied,
-the transformation has no effect.
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-let u = {5.0, 6.0}
-let s = {2}
-
-DynamicUpdateSlice(a, u, s) produces:
-  {0.0, 1.0, 5.0, 6.0, 4.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-let u =
- { {12.0,  13.0},
-   {14.0,  15.0},
-   {16.0,  17.0} }
-
-let s = {1, 1}
-
-DynamicUpdateSlice(b, u, s) produces:
- { {0.0,  1.0,  2.0},
-   {3.0, 12.0, 13.0},
-   {6.0, 14.0, 15.0},
-   {9.0, 16.0, 17.0} }
-```
-
-## Element-wise binary arithmetic operations
-
-See also
-[`XlaBuilder::Add`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A set of element-wise binary arithmetic operations is supported.
-
-<b> `Op(lhs, rhs)` </b>
-
-Where `Op` is one of `Add` (addition), `Sub` (subtraction), `Mul`
-(multiplication), `Div` (division), `Rem` (remainder), `Max` (maximum), `Min`
-(minimum), `LogicalAnd` (logical AND), or `LogicalOr` (logical OR).
-
-Arguments | Type    | Semantics
---------- | ------- | ----------------------------------------
-`lhs`     | `XlaOp` | left-hand-side operand: array of type T
-`rhs`     | `XlaOp` | right-hand-side operand: array of type T
-
-The arguments' shapes have to be either similar or compatible. See the
-[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays. In this variant, operations between arrays of
-different ranks are *not* supported, unless one of the operands is a scalar.
-
-When `Op` is `Rem`, the sign of the result is taken from the dividend, and the
-absolute value of the result is always less than the divisor's absolute value.
-
-Integer division overflow (signed/unsigned division/remainder by zero or signed
-divison/remainder of `INT_SMIN` with `-1`) produces an implementation defined
-value.
-
-An alternative variant with different-rank broadcasting support exists for these
-operations:
-
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
-
-Where `Op` is the same as above. This variant of the operation should be used
-for arithmetic operations between arrays of different ranks (such as adding a
-matrix to a vector).
-
-The additional `broadcast_dimensions` operand is a slice of integers used to
-expand the rank of the lower-rank operand up to the rank of the higher-rank
-operand. `broadcast_dimensions` maps the dimensions of the lower-rank shape to
-the dimensions of the higher-rank shape. The unmapped dimensions of the expanded
-shape are filled with dimensions of size one. Degenerate-dimension broadcasting
-then broadcasts the shapes along these degenerate dimensions to equalize the
-shapes of both operands. The semantics are described in detail on the
-[broadcasting page](../../performance/xla/broadcasting.md).
-
-## Element-wise comparison operations
-
-See also
-[`XlaBuilder::Eq`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A set of standard element-wise binary comparison operations is supported. Note
-that standard IEEE 754 floating-point comparison semantics apply when comparing
-floating-point types.
-
-<b> `Op(lhs, rhs)` </b>
-
-Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge`
-(greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt`
-(less-than).
-
-Arguments | Type    | Semantics
---------- | ------- | ----------------------------------------
-`lhs`     | `XlaOp` | left-hand-side operand: array of type T
-`rhs`     | `XlaOp` | right-hand-side operand: array of type T
-
-The arguments' shapes have to be either similar or compatible. See the
-[broadcasting](../../performance/xla/broadcasting.md) documentation about what it means for shapes to
-be compatible. The result of an operation has a shape which is the result of
-broadcasting the two input arrays with the element type `PRED`. In this variant,
-operations between arrays of different ranks are *not* supported, unless one of
-the operands is a scalar.
-
-An alternative variant with different-rank broadcasting support exists for these
-operations:
-
-<b> `Op(lhs, rhs, broadcast_dimensions)` </b>
-
-Where `Op` is the same as above. This variant of the operation should be used
-for comparison operations between arrays of different ranks (such as adding a
-matrix to a vector).
-
-The additional `broadcast_dimensions` operand is a slice of integers specifying
-the dimensions to use for broadcasting the operands. The semantics are described
-in detail on the [broadcasting page](../../performance/xla/broadcasting.md).
-
-## Element-wise unary functions
-
-XlaBuilder supports these element-wise unary functions:
-
-<b>`Abs(operand)`</b> Element-wise abs `x -> |x|`.
-
-<b>`Ceil(operand)`</b> Element-wise ceil `x -> ⌈x⌉`.
-
-<b>`Cos(operand)`</b> Element-wise cosine `x -> cos(x)`.
-
-<b>`Exp(operand)`</b> Element-wise natural exponential `x -> e^x`.
-
-<b>`Floor(operand)`</b> Element-wise floor `x -> ⌊x⌋`.
-
-<b>`IsFinite(operand)`</b> Tests whether each element of `operand` is finite,
-i.e., is not positive or negative infinity, and is not `NaN`. Returns an array
-of `PRED` values with the same shape as the input, where each element is `true`
-if and only if the corresponding input element is finite.
-
-<b>`Log(operand)`</b> Element-wise natural logarithm `x -> ln(x)`.
-
-<b>`LogicalNot(operand)`</b> Element-wise logical not `x -> !(x)`.
-
-<b>`Neg(operand)`</b> Element-wise negation `x -> -x`.
-
-<b>`Sign(operand)`</b> Element-wise sign operation `x -> sgn(x)` where
-
-$$\text{sgn}(x) = \begin{cases} -1 & x < 0\\ 0 & x = 0\\ 1 & x > 0 \end{cases}$$
-
-using the comparison operator of the element type of `operand`.
-
-<b>`Tanh(operand)`</b> Element-wise hyperbolic tangent `x -> tanh(x)`.
-
-
-Arguments | Type    | Semantics
---------- | ------- | ---------------------------
-`operand` | `XlaOp` | The operand to the function
-
-The function is applied to each element in the `operand` array, resulting in an
-array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
-
-## Gather
-
-The XLA gather operation stitches together several slices (each slice at a
-potentially different runtime offset) of an input array.
-
-### General Semantics
-
-See also
-[`XlaBuilder::Gather`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-For a more intuitive description, see the "Informal Description" section below.
-
-<b> `gather(operand, start_indices, offset_dims, collapsed_slice_dims, slice_sizes, start_index_map)` </b>
-
-|Arguments         | Type                    | Semantics                       |
-|----------------- | ----------------------- | --------------------------------|
-|`operand`         | `XlaOp`                 | The array we’re gathering       |
-:                  :                         : from.                           :
-|`start_indices`   | `XlaOp`                 | Array containing the starting  |
-:                  :                         : indices of the slices we gather.:
-|`index_vector_dim` | `int64`                | The dimension in                |
-:                  :                         : `start_indices` that "contains" :
-:                  :                         : the starting indices.  See      :
-:                  :                         : below for a detailed            :
-:                  :                         : description.                    :
-|`offset_dims`     | `ArraySlice<int64>`     | The set of dimensions in  the   :
-:                  :                         : output shape that offset into a :
-:                  :                         : array sliced from operand.     :
-|`slice_sizes`     | `ArraySlice<int64>`      | `slice_sizes[i]` is the bounds |
-:                  :                          : for the slice on dimension `i`.:
-|`collapsed_slice_dims` | `ArraySlice<int64>` | The set of dimensions in each  :
-|                  :                          | slice that are collapsed away. :
-|                  :                          | These dimensions must have size:
-|                  :                          | 1.                             |
-|`start_index_map` | `ArraySlice<int64>`      | A map that describes how to map|
-:                  :                          : indices in `start_indices` to  :
-:                  :                          : to legal indices into operand. :
-
-For convenience, we label dimensions in the output array not in `offset_dims`
-as `batch_dims`.
-
-The output is an array of rank `batch_dims.size` + `operand.rank` -
-`collapsed_slice_dims`.size.
-
-If `index_vector_dim` is equal to `start_indices.rank` we implicitly consider
-`start_indices` to have a trailing `1` dimension (i.e. if `start_indices` was of
-shape `[6,7]` and `index_vector_dim` is `2` then we implicitly consider the
-shape of `start_indices` to be `[6,7,1]`).
-
-The bounds for the output array along dimension `i` is computed as follows:
-
-  1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
-     some `k`) then we pick the corresponding dimension bounds out of
-     `start_indices.shape`, skipping `index_vector_dim` (i.e. pick
-     `start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
-     `start_indices.shape.dims`[`k`+`1`] otherwise).
-
-  2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
-     some `k`) then we pick the corresponding bound out of `slice_sizes` after
-     accounting for `collapsed_slice_dims` (i.e. we pick
-     `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
-     with the bounds at indices `collapsed_slice_dims` removed).
-
-Formally, the operand index `In` corresponding to an output index `Out` is
-computed as follows:
-
-  1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
-     vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
-     Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
-     this is well defined even if `G` is empty -- if `G` is empty then `S` =
-     `start_indices`.
-
-  2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
-     scattering `S` using `start_index_map`.  More precisely:
-       1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
-          `start_index_map.size`.
-       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
-
-  3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
-     at the offset dimensions in `Out` according to the `collapsed_slice_dims`
-     set.  More precisely:
-       1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
-          `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
-          (`expand_offset_dims` is defined below).
-       2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
-  4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
-     addition.
-
-`expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`)
-and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
-`offset.size` is `4`, `operand.rank` is `6` and `collapsed_slice_dims` is {`0`,
-`2`} then `expand_offset_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}.
-
-### Informal Description and Examples
-
-Informally, every index `Out` in the output array corresponds to an element `E`
-in the operand array, computed as follows:
-
-  - We use the batch dimensions in `Out` to look up a starting index from
-    `start_indices`.
-
-  - We use `start_index_map` to map the starting index (which may have size less
-    than operand.rank) to a "full" starting index into operand.
-
-  - We dynamic-slice out a slice with size `slice_sizes` using the full starting
-    index.
-
-  - We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
-    Since all collapsed slice dimensions have to have bound 1 this reshape is
-    always legal.
-
-  - We use the offset dimensions in `Out` to index into this slice to get the
-    input element, `E`, corresponding to output index `Out`.
-
-`index_vector_dim` is set to `start_indices.rank` - `1` in all of the
-examples that follow.  More interesting values for `index_vector_dim` does not
-change the operation fundamentally, but makes the visual representation more
-cumbersome.
-
-To get an intuition on how all of the above fits together, let's look at an
-example that gathers 5 slices of shape `[8,6]` from a `[16,11]` array.  The
-position of a slice into the `[16,11]` array can be represented as an index
-vector of shape `S64[2]`, so the set of 5 positions can be represented as a
-`S64[5,2]` array.
-
-The behavior of the gather operation can then be depicted as an index
-transformation that takes [`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>], an index in
-the output shape, and maps it to an element in the input array in the following
-way:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_0.svg">
-</div>
-
-We first select an (`X`,`Y`) vector from the gather indices array using `G`.
-The element in the output array at index
-[`G`,`O`<sub>`0`</sub>,`O`<sub>`1`</sub>] is then the element in the input
-array at index [`X`+`O`<sub>`0`</sub>,`Y`+`O`<sub>`1`</sub>].
-
-`slice_sizes` is `[8,6]`, which decides the range of W<sub>`0`</sub> and
-W<sub>`1`</sub>, and this in turn decides the bounds of the slice.
-
-This gather operation acts as a batch dynamic slice with `G` as the batch
-dimension.
-
-The gather indices may be multidimensional.  For instance, a more general
-version of the example above using a "gather indices" array of shape `[4,5,2]`
-would translate indices like this:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_1.svg">
-</div>
-
-Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
-`G`<sub>`1`</sub> as the batch dimensions.  The slice size is still `[8,6]`.
-
-The gather operation in XLA generalizes the informal semantics outlined above in
-the following ways:
-
- 1. We can configure which dimensions in the output shape are the offset
-    dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
-    the last example).  The output batch dimensions (dimensions containing
-    `G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
-    the output dimensions that are not offset dimensions.
-
- 2. The number of output offset dimensions explicitly present in the output
-    shape may be smaller than the input rank.  These "missing" dimensions, which
-    are listed explicitly as `collapsed_slice_dims`, must have a slice size of
-    `1`.  Since they have a slice size of `1` the only valid index for them is
-    `0` and eliding them does not introduce ambiguity.
-
- 3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
-    example) may have fewer elements than the input array rank, and an explicit
-    mapping dictates how the index should be expanded to have the same rank as
-    the input.
-
-As a final example, we use (2) and (3) to implement `tf.gather_nd`:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/ops_xla_gather_2.svg">
-</div>
-
-`G`<sub>`0`</sub> and `G`<sub>`1`</sub> are used to slice out a starting index
-from the gather indices array as usual, except the starting index has only one
-element, `X`.  Similarly, there is only one output offset index with the value
-`O`<sub>`0`</sub>.  However, before being used as indices into the input array,
-these are expanded in accordance to "Gather Index Mapping" (`start_index_map` in
-the formal description) and "Offset Mapping" (`expand_offset_dims` in the formal
-description) into [`0`,`O`<sub>`0`</sub>] and [`X`,`0`] respectively, adding up
-to [`X`,`O`<sub>`0`</sub>].  In other words, the output index
-[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`O`<sub>`0`</sub>] maps to the input index
-[`GatherIndices`[`G`<sub>`0`</sub>,`G`<sub>`1`</sub>,`0`],`X`] which gives us
-the semantics for `tf.gather_nd`.
-
-`slice_sizes` for this case is `[1,11]`.  Intuitively this means that every
-index `X` in the gather indices array picks an entire row and the result is the
-concatenation of all these rows.
-
-## GetTupleElement
-
-See also
-[`XlaBuilder::GetTupleElement`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Indexes into a tuple with a compile-time-constant value.
-
-The value must be a compile-time-constant so that shape inference can determine
-the type of the resulting value.
-
-This is analogous to `std::get<int N>(t)` in C++. Conceptually:
-
-```
-let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-let s: s32 = 5;
-let t: (f32[10], s32) = tuple(v, s);
-let element_1: s32 = gettupleelement(t, 1);  // Inferred shape matches s32.
-```
-
-See also `tf.tuple`.
-
-## Infeed
-
-See also
-[`XlaBuilder::Infeed`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Infeed(shape)` </b>
-
-| Argument | Type    | Semantics                                             |
-| -------- | ------- | ----------------------------------------------------- |
-| `shape`  | `Shape` | Shape of the data read from the Infeed interface. The |
-:          :         : layout field of the shape must be set to match the    :
-:          :         : layout of the data sent to the device; otherwise its  :
-:          :         : behavior is undefined.                                :
-
-Reads a single data item from the implicit Infeed streaming interface of the
-device, interpreting the data as the given shape and its layout, and returns a
-`XlaOp` of the data. Multiple Infeed operations are allowed in a
-computation, but there must be a total order among the Infeed operations. For
-example, two Infeeds in the code below have a total order since there is a
-dependency between the while loops.
-
-```
-result1 = while (condition, init = init_value) {
-  Infeed(shape)
-}
-
-result2 = while (condition, init = result1) {
-  Infeed(shape)
-}
-```
-
-Nested tuple shapes are not supported. For an empty tuple shape, the Infeed
-operation is effectively a no-op and proceeds without reading any data from the
-Infeed of the device.
-
-> Note: We plan to allow multiple Infeed operations without a total order, in
-> which case the compiler will provide information about how the Infeed
-> operations are serialized in the compiled program.
-
-## Iota
-
-<b> `Iota()` </b>
-
-Builds a constant literal on device rather than a potentially large host
-transfer.  Creates a rank 1 tensor of values starting at zero and incrementing
-by one.
-
-Arguments          | Type            | Semantics
------------------- | --------------- | ---------------------------
-`type`             | `PrimitiveType` | type U
-`size`             | `int64`         | The number of elements in the tensor.
-
-## Map
-
-See also
-[`XlaBuilder::Map`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Map(operands..., computation)` </b>
-
-| Arguments         | Type                   | Semantics                      |
-| ----------------- | ---------------------- | ------------------------------ |
-| `operands`        | sequence of N `XlaOp`s | N arrays of types T_0..T_{N-1} |
-| `computation`     | `XlaComputation`       | computation of type `T_0, T_1, |
-:                   :                        : ..., T_{N + M -1} -> S` with N :
-:                   :                        : parameters of type T and M of  :
-:                   :                        : arbitrary type                 :
-| `dimensions`      | `int64` array          | array of map dimensions        |
-
-Applies a scalar function over the given `operands` arrays, producing an array
-of the same dimensions where each element is the result of the mapped function
-applied to the corresponding elements in the input arrays.
-
-The mapped function is an arbitrary computation with the restriction that it has
-N inputs of scalar type `T` and a single output with type `S`. The output has
-the same dimensions as the operands except that the element type T is replaced
-with S.
-
-For example: `Map(op1, op2, op3, computation, par1)` maps `elem_out <-
-computation(elem1, elem2, elem3, par1)` at each (multi-dimensional) index in the
-input arrays to produce the output array.
-
-## Pad
-
-See also
-[`XlaBuilder::Pad`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Pad(operand, padding_value, padding_config)` </b>
-
-| Arguments        | Type            | Semantics                               |
-| ---------------- | --------------- | --------------------------------------- |
-| `operand`        | `XlaOp`         | array of type `T`                       |
-| `padding_value`  | `XlaOp`         | scalar of type `T` to fill in the added |
-:                  :                 : padding                                 :
-| `padding_config` | `PaddingConfig` | padding amount on both edges (low,      |
-:                  :                 : high) and between the elements of each  :
-:                  :                 : dimension                               :
-
-Expands the given `operand` array by padding around the array as well as between
-the elements of the array with the given `padding_value`. `padding_config`
-specifies the amount of edge padding and the interior padding for each
-dimension.
-
-`PaddingConfig` is a repeated field of `PaddingConfigDimension`, which contains
-three fields for each dimension: `edge_padding_low`, `edge_padding_high`, and
-`interior_padding`. `edge_padding_low` and `edge_padding_high` specify the
-amount of padding added at the low-end (next to index 0) and the high-end (next
-to the highest index) of each dimension respectively. The amount of edge padding
-can be negative -- the absolute value of negative padding indicates the number
-of elements to remove from the specified dimension. `interior_padding` specifies
-the amount of padding added between any two elements in each dimension. Interior
-padding occurs logically before edge padding, so in the case of negative edge
-padding elements are removed from the interior-padded operand. This operation is
-a no-op if the edge padding pairs are all (0, 0) and the interior padding values
-are all 0. The figure below shows examples of different `edge_padding` and
-`interior_padding` values for a two-dimensional array.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_pad.png">
-</div>
-
-## Recv
-
-See also
-[`XlaBuilder::Recv`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Recv(shape, channel_handle)` </b>
-
-| Arguments        | Type            | Semantics                            |
-| ---------------- | --------------- | ------------------------------------ |
-| `shape`          | `Shape`         | shape of the data to receive         |
-| `channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair |
-
-Receives data of the given shape from a `Send` instruction in another
-computation that shares the same channel handle. Returns a
-XlaOp for the received data.
-
-The client API of `Recv` operation represents synchronous communication.
-However, the instruction is internally decomposed into 2 HLO instructions
-(`Recv` and `RecvDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateRecv` and `HloInstruction::CreateRecvDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Recv(const Shape& shape, int64 channel_id)`</b>
-
-Allocates resources required to receive data from a `Send` instruction with the
-same channel_id. Returns a context for the allocated resources, which is used
-by a following `RecvDone` instruction to wait for the completion of the data
-transfer. The context is a tuple of {receive buffer (shape), request identifier
-(U32)} and it can only be used by a `RecvDone` instruction.
-
-<b> `RecvDone(HloInstruction context)` </b>
-
-Given a context created by a `Recv` instruction, waits for the data transfer to
-complete and returns the received data.
-
-## Reduce
-
-See also
-[`XlaBuilder::Reduce`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Applies a reduction function to one or more arrays in parallel.
-
-<b> `Reduce(operands..., init_values..., computation, dimensions)` </b>
-
-Arguments     | Type                  | Semantics
-------------- | --------------------- | ---------------------------------------
-`operands`    | Sequence of N `XlaOp` | N arrays of types `T_0, ..., T_N`.
-`init_values` | Sequence of N `XlaOp` | N scalars of types `T_0, ..., T_N`.
-`computation` | `XlaComputation`      | computation of type
-              :                       : `T_0, ..., T_N, T_0, ..., T_N -> Collate(T_0, ..., T_N)`
-`dimensions`  | `int64` array         | unordered array of dimensions to reduce
-
-Where:
-* N is required to be greater or equal to 1.
-* All input arrays must have the same dimensions.
-* If `N = 1`, `Collate(T)` is `T`.
-* If `N > 1`, `Collate(T_0, ..., T_N)` is a tuple of `N` elements of type `T`.
-
-The output of the op is `Collate(Q_0, ..., Q_N)` where `Q_i` is an array of type
-`T_i`, the dimensions of which are described below.
-
-This operation reduces one or more dimensions of each input array into scalars.
-The rank of each returned array is `rank(operand) - len(dimensions)`.
-`init_value` is the initial value used for every reduction and may be inserted
-anywhere during computation by the back-end. In most cases, `init_value` is an
-identity of the reduction function (for example, 0 for addition). The applied
-`computation` is always passed the `init_value` on the left-hand side.
-
-The evaluation order of the reduction function is arbitrary and may be
-non-deterministic. Therefore, the reduction function should not be overly
-sensitive to reassociation.
-
-Some reduction functions like addition are not strictly associative for floats.
-However, if the range of the data is limited, floating-point addition is close
-enough to being associative for most practical uses. It is possible to conceive
-of some completely non-associative reductions, however, and these will produce
-incorrect or unpredictable results in XLA reductions.
-
-As an example, when reducing across one dimension in a single 1D array with
-values [10, 11, 12, 13], with reduction function `f` (this is `computation`)
-then that could be computed as
-
-`f(10, f(11, f(12, f(init_value, 13)))`
-
-but there are also many other possibilities, e.g.
-
-`f(init_value, f(f(10, f(init_value, 11)), f(f(init_value, 12), f(init_value, 13))))`
-
-The following is a rough pseudo-code example of how reduction could be
-implemented, using summation as the reduction computation with an initial value
-of 0.
-
-```python
-result_shape <- remove all dims in dimensions from operand_shape
-
-# Iterate over all elements in result_shape. The number of r's here is equal
-# to the rank of the result
-for r0 in range(result_shape[0]), r1 in range(result_shape[1]), ...:
-  # Initialize this result element
-  result[r0, r1...] <- 0
-
-  # Iterate over all the reduction dimensions
-  for d0 in range(dimensions[0]), d1 in range(dimensions[1]), ...:
-    # Increment the result element with the value of the operand's element.
-    # The index of the operand's element is constructed from all ri's and di's
-    # in the right order (by construction ri's and di's together index over the
-    # whole operand shape).
-    result[r0, r1...] += operand[ri... di]
-```
-
-Here's an example of reducing a 2D array (matrix). The shape has rank 2,
-dimension 0 of size 2 and dimension 1 of size 3:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_2d_matrix.png">
-</div>
-
-Results of reducing dimensions 0 or 1 with an "add" function:
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_2d_matrix.png">
-</div>
-
-Note that both reduction results are 1D arrays. The diagram shows one as column
-and another as row just for visual convenience.
-
-For a more complex example, here is a 3D array. Its rank is 3, dimension 0 of
-size 4, dimension 1 of size 2 and dimension 2 of size 3. For simplicity, the
-values 1 to 6 are replicated across dimension 0.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_from_3d_matrix.png">
-</div>
-
-Similarly to the 2D example, we can reduce just one dimension. If we reduce
-dimension 0, for example, we get a rank-2 array where all values across
-dimension 0 were folded into a scalar:
-
-```text
-|  4   8  12 |
-| 16  20  24 |
-```
-
-If we reduce dimension 2, we also get a rank-2 array where all values across
-dimension 2 were folded into a scalar:
-
-```text
-| 6  15 |
-| 6  15 |
-| 6  15 |
-| 6  15 |
-```
-
-Note that the relative order between the remaining dimensions in the input is
-preserved in the output, but some dimensions may get assigned new numbers (since
-the rank changes).
-
-We can also reduce multiple dimensions. Add-reducing dimensions 0 and 1 produces
-the 1D array `| 20 28 36 |`.
-
-Reducing the 3D array over all its dimensions produces the scalar `84`.
-
-When `N > 1`, reduce function application is slightly more complex, as it is
-applied simultaneously to all inputs. For example, consider the following
-reduction function, which can be used to compute the max and the argmax of a
-a 1-D tensor in parallel:
-
-```
-f: (Float, Int, Float, Int) -> Float, Int
-f(max, argmax, value, index):
-  if value >= argmax:
-    return (value, index)
-  else:
-    return (max, argmax)
-```
-
-For 1-D Input arrays `V = Float[N], K = Int[N]`, and init values
-`I_V = Float, I_K =  Int`, the result `f_(N-1)` of reducing across the only
-input dimension is equivalent to the following recursive application:
-```
-f_0 = f(I_V, I_K, V_0, K_0)
-f_1 = f(f_0.first, f_0.second, V_1, K_1)
-...
-f_(N-1) = f(f_(N-2).first, f_(N-2).second, V_(N-1), K_(N-1))
-```
-
-Applying this reduction to an array of values, and an array of sequential
-indices (i.e. iota), will co-iterate over the arrays, and return a tuple
-containing the maximal value and the matching index.
-
-## ReducePrecision
-
-See also
-[`XlaBuilder::ReducePrecision`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Models the effect of converting floating-point values to a lower-precision
-format (such as IEEE-FP16) and back to the original format.  The number of
-exponent and mantissa bits in the lower-precision format can be specified
-arbitrarily, although all bit sizes may not be supported on all hardware
-implementations.
-
-<b> `ReducePrecision(operand, mantissa_bits, exponent_bits)` </b>
-
-Arguments       | Type    | Semantics
---------------- | ------- | -------------------------------------------------
-`operand`       | `XlaOp` | array of floating-point type `T`.
-`exponent_bits` | `int32` | number of exponent bits in lower-precision format
-`mantissa_bits` | `int32` | number of mantissa bits in lower-precision format
-
-The result is an array of type `T`.  The input values are rounded to the nearest
-value representable with the given number of mantissa bits (using "ties to even"
-semantics), and any values that exceed the range specified by the number of
-exponent bits are clamped to positive or negative infinity.  `NaN` values are
-retained, although they may be converted to canonical `NaN` values.
-
-The lower-precision format must have at least one exponent bit (in order to
-distinguish a zero value from an infinity, since both have a zero mantissa), and
-must have a non-negative number of mantissa bits.  The number of exponent or
-mantissa bits may exceed the corresponding value for type `T`; the corresponding
-portion of the conversion is then simply a no-op.
-
-## ReduceWindow
-
-See also
-[`XlaBuilder::ReduceWindow`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Applies a reduction function to all elements in each window of the input
-multi-dimensional array, producing an output multi-dimensional array with the
-same number of elements as the number of valid positions of the window. A
-pooling layer can be expressed as a `ReduceWindow`. Similar to
-[`Reduce`](#reduce), the applied `computation` is always passed the `init_value`
-on the left-hand side.
-
-<b> `ReduceWindow(operand, init_value, computation, window_dimensions,
-window_strides, padding)` </b>
-
-| Arguments           | Type                | Semantics                        |
-| ------------------- | ------------------- | -------------------------------- |
-| `operand`           | `XlaOp`             | N dimensional array containing   |
-:                     :                     : elements of type T. This is the  :
-:                     :                     : base area on which the window is :
-:                     :                     : placed.                          :
-| `init_value`        | `XlaOp`             | Starting value for the           |
-:                     :                     : reduction. See [Reduce](#reduce) :
-:                     :                     : for details.                     :
-| `computation`       | `XlaComputation`    | Reduction function of type `T, T |
-:                     :                     : -> T`, to apply to all elements  :
-:                     :                     : in each window                   :
-| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : dimension values                 :
-| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : stride values                    :
-| `padding`           | `Padding`           | padding type for window          |
-:                     :                     : (Padding\:\:kSame or             :
-:                     :                     : Padding\:\:kValid)               :
-
-Below code and figure shows an example of using `ReduceWindow`. Input is a
-matrix of size [4x6] and both window_dimensions and window_stride_dimensions are
-[2x3].
-
-```
-// Create a computation for the reduction (maximum).
-XlaComputation max;
-{
-  XlaBuilder builder(client_, "max");
-  auto y = builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "y");
-  auto x = builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "x");
-  builder.Max(y, x);
-  max = builder.Build().ConsumeValueOrDie();
-}
-
-// Create a ReduceWindow computation with the max reduction computation.
-XlaBuilder builder(client_, "reduce_window_2x3");
-auto shape = ShapeUtil::MakeShape(F32, {4, 6});
-auto input = builder.Parameter(0, shape, "input");
-builder.ReduceWindow(
-    input, *max,
-    /*init_val=*/builder.ConstantLiteral(LiteralUtil::MinValue(F32)),
-    /*window_dimensions=*/{2, 3},
-    /*window_stride_dimensions=*/{2, 3},
-    Padding::kValid);
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:35%" src="https://www.tensorflow.org/images/ops_reduce_window.png">
-</div>
-
-Stride of 1 in a dimension specifies that the position of a window in the
-dimension is 1 element away from its adjacent window. In order to specify that
-no windows overlap with each other, window_stride_dimensions should be equal to
-window_dimensions. The figure below illustrates the use of two different stride
-values. Padding is applied to each dimension of the input and the calculations
-are the same as though the input came in with the dimensions it has after
-padding.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:75%" src="https://www.tensorflow.org/images/ops_reduce_window_stride.png">
-</div>
-
-The evaluation order of the reduction function is arbitrary and may be
-non-deterministic. Therefore, the reduction function should not be overly
-sensitive to reassociation. See the discussion about associativity in the
-context of [`Reduce`](#reduce) for more details.
-
-## Reshape
-
-See also
-[`XlaBuilder::Reshape`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h)
-and the [`Collapse`](#collapse) operation.
-
-Reshapes the dimensions of an array into a new configuration.
-
-<b> `Reshape(operand, new_sizes)` </b>
-<b> `Reshape(operand, dimensions, new_sizes)` </b>
-
-Arguments    | Type           | Semantics
------------- | -------------- | ---------------------------------------
-`operand`    | `XlaOp`        | array of type T
-`dimensions` | `int64` vector | order in which dimensions are collapsed
-`new_sizes`  | `int64` vector | vector of sizes of new dimensions
-
-Conceptually, reshape first flattens an array into a one-dimensional vector of
-data values, and then refines this vector into a new shape. The input arguments
-are an arbitrary array of type T, a compile-time-constant vector of dimension
-indices, and a compile-time-constant vector of dimension sizes for the result.
-The values in the `dimension` vector, if given, must be a permutation of all of
-T's dimensions; the default if not given is `{0, ..., rank - 1}`. The order of
-the dimensions in `dimensions` is from slowest-varying dimension (most major) to
-fastest-varying dimension (most minor) in the loop nest which collapses the
-input array into a single dimension. The `new_sizes` vector determines the size
-of the output array. The value at index 0 in `new_sizes` is the size of
-dimension 0, the value at index 1 is the size of dimension 1, and so on. The
-product of the `new_size` dimensions must equal the product of the operand's
-dimension sizes. When refining the collapsed array into the multidimensional
-array defined by `new_sizes`, the dimensions in `new_sizes` are ordered from
-slowest varying (most major) and to fastest varying (most minor).
-
-For example, let v be an array of 24 elements:
-
-```
-let v = f32[4x2x3] {{{10, 11, 12}, {15, 16, 17}},
-                    {{20, 21, 22}, {25, 26, 27}},
-                    {{30, 31, 32}, {35, 36, 37}},
-                    {{40, 41, 42}, {45, 46, 47}}};
-
-In-order collapse:
-let v012_24 = Reshape(v, {0,1,2}, {24});
-then v012_24 == f32[24] {10, 11, 12, 15, 16, 17, 20, 21, 22, 25, 26, 27,
-                         30, 31, 32, 35, 36, 37, 40, 41, 42, 45, 46, 47};
-
-let v012_83 = Reshape(v, {0,1,2}, {8,3});
-then v012_83 == f32[8x3] {{10, 11, 12}, {15, 16, 17},
-                          {20, 21, 22}, {25, 26, 27},
-                          {30, 31, 32}, {35, 36, 37},
-                          {40, 41, 42}, {45, 46, 47}};
-
-Out-of-order collapse:
-let v021_24 = Reshape(v, {1,2,0}, {24});
-then v012_24 == f32[24]  {10, 20, 30, 40, 11, 21, 31, 41, 12, 22, 32, 42,
-                          15, 25, 35, 45, 16, 26, 36, 46, 17, 27, 37, 47};
-
-let v021_83 = Reshape(v, {1,2,0}, {8,3});
-then v021_83 == f32[8x3] {{10, 20, 30}, {40, 11, 21},
-                          {31, 41, 12}, {22, 32, 42},
-                          {15, 25, 35}, {45, 16, 26},
-                          {36, 46, 17}, {27, 37, 47}};
-
-
-let v021_262 = Reshape(v, {1,2,0}, {2,6,2});
-then v021_262 == f32[2x6x2] {{{10, 20}, {30, 40},
-                              {11, 21}, {31, 41},
-                              {12, 22}, {32, 42}},
-                             {{15, 25}, {35, 45},
-                              {16, 26}, {36, 46},
-                              {17, 27}, {37, 47}}};
-```
-
-As a special case, reshape can transform a single-element array to a scalar and
-vice versa. For example,
-
-```
-Reshape(f32[1x1] {{5}}, {0,1}, {}) == 5;
-Reshape(5, {}, {1,1}) == f32[1x1] {{5}};
-```
-
-## Rev (reverse)
-
-See also
-[`XlaBuilder::Rev`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b>`Rev(operand, dimensions)`</b>
-
-Arguments    | Type                | Semantics
------------- | ------------------- | ---------------------
-`operand`    | `XlaOp`             | array of type T
-`dimensions` | `ArraySlice<int64>` | dimensions to reverse
-
-Reverses the order of elements in the `operand` array along the specified
-`dimensions`, generating an output array of the same shape. Each element of the
-operand array at a multidimensional index is stored into the output array at a
-transformed index. The multidimensional index is transformed by reversing the
-index in each dimension to be reversed (i.e., if a dimension of size N is one of
-the reversing dimensions, its index i is transformed into N - 1 - i).
-
-One use for the `Rev` operation is to reverse the convolution weight array along
-the two window dimensions during the gradient computation in neural networks.
-
-## RngNormal
-
-See also
-[`XlaBuilder::RngNormal`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output of a given shape with random numbers generated following
-the $$N(\mu, \sigma)$$ normal distribution. The parameters $$\mu$$ and
-$$\sigma$$, and output shape have to have a floating point elemental type. The
-parameters furthermore have to be scalar valued.
-
-<b>`RngNormal(mu, sigma, shape)`</b>
-
-| Arguments | Type    | Semantics                                           |
-| --------- | ------- | --------------------------------------------------- |
-| `mu`      | `XlaOp` | Scalar of type T specifying mean of generated       |
-:           :         : numbers                                   :
-| `sigma`   | `XlaOp` | Scalar of type T specifying standard deviation of   |
-:           :         : generated numbers                                   :
-| `shape`   | `Shape` | Output shape of type T                              |
-
-## RngUniform
-
-See also
-[`XlaBuilder::RngUniform`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output of a given shape with random numbers generated following
-the uniform distribution over the interval $$[a,b)$$. The parameters and output
-element type have to be a boolean type, an integral type or a floating point
-types, and the types have to be consistent. The CPU and GPU backends currently
-only support F64, F32, F16, BF16, S64, U64, S32 and U32. Furthermore, the
-parameters need to be scalar valued. If $$b <= a$$ the result is
-implementation-defined.
-
-<b>`RngUniform(a, b, shape)`</b>
-
-| Arguments | Type                    | Semantics                         |
-| --------- | ----------------------- | --------------------------------- |
-| `a`       | `XlaOp`                 | Scalar of type T specifying lower |
-:           :                         : limit of interval                 :
-| `b`       | `XlaOp`                 | Scalar of type T specifying upper |
-:           :                         : limit of interval                 :
-| `shape`   | `Shape`                 | Output shape of type T            |
-
-## Scatter
-
-The XLA scatter operation generates a result which is the value of the input
-tensor `operand`, with several slices (at indices specified by
-`scatter_indices`) updated with the values in `updates` using
-`update_computation`.
-
-See also
-[`XlaBuilder::Scatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `scatter(operand, scatter_indices, updates, update_computation, index_vector_dim, update_window_dims, inserted_window_dims, scatter_dims_to_operand_dims)` </b>
-
-|Arguments         | Type                   | Semantics                        |
-|------------------|------------------------|----------------------------------|
-|`operand`         | `XlaOp`                | Tensor to be scattered into.     |
-|`scatter_indices` | `XlaOp`                | Tensor containing the starting   |
-:                  :                        : indices of the slices that must  :
-:                  :                        : be scattered to.                 :
-|`updates`         | `XlaOp`                | Tensor containing the values that|
-:                  :                        : must be used for scattering.     :
-|`update_computation`| `XlaComputation`     | Computation to be used for       |
-:                  :                        : combining the existing values in :
-:                  :                        : the input tensor and the updates :
-:                  :                        : during scatter. This computation :
-:                  :                        : should be of type `T, T -> T`.   :
-|`index_vector_dim`| `int64`                | The dimension in                 |
-:                  :                        : `scatter_indices` that contains  :
-:                  :                        : the starting indices.            :
-|`update_window_dims`| `ArraySlice<int64>`  | The set of dimensions in         |
-:                  :                        : `updates` shape that are _window :
-:                  :                        : dimensions_.                     :
-|`inserted_window_dims`| `ArraySlice<int64>`| The set of _window dimensions_   |
-:                  :                        : that must be inserted into       :
-:                  :                        : `updates` shape.                 :
-|`scatter_dims_to_operand_dims`| `ArraySlice<int64>`  | A dimensions map from  |
-:                  :                        : the scatter indices to the       :
-:                  :                        : operand index space. This array  :
-:                  :                        : is interpreted as mapping `i` to :
-:                  :                        : `scatter_dims_to_operand_dims[i]`:
-:                  :                        : . It has to be one-to-one and    :
-:                  :                        : total.                           :
-
-If `index_vector_dim` is equal to `scatter_indices.rank` we implicitly consider
-`scatter_indices` to have a trailing `1` dimension.
-
-We define `update_scatter_dims` of type `ArraySlice<int64>` as the set of
-dimensions in `updates` shape that are not in `update_window_dims`, in ascending
-order.
-
-The arguments of scatter should follow these constraints:
-
-  - `updates` tensor must be of rank `update_window_dims.size +
-  scatter_indices.rank - 1`.
-
-  - Bounds of dimension `i` in `updates` must conform to the following:
-      - If `i` is present in `update_window_dims` (i.e. equal to
-        `update_window_dims`[`k`] for some `k`), then the bound of dimension
-        `i` in `updates` must not exceed the corresponding bound of `operand`
-        after accounting for the `inserted_window_dims` (i.e.
-        `adjusted_window_bounds`[`k`], where `adjusted_window_bounds` contains
-        the bounds of `operand` with the bounds at indices
-        `inserted_window_dims` removed).
-      - If `i` is present in `update_scatter_dims` (i.e. equal to
-        `update_scatter_dims`[`k`] for some `k`), then the bound of dimension
-        `i` in `updates` must be equal to the corresponding bound of
-        `scatter_indices`, skipping `index_vector_dim` (i.e.
-        `scatter_indices.shape.dims`[`k`], if `k` < `index_vector_dim` and
-        `scatter_indices.shape.dims`[`k+1`] otherwise).
-
-  - `update_window_dims` must be in ascending order, not have any repeating
-    dimension numbers, and be in the range `[0, updates.rank)`.
-
-  - `inserted_window_dims` must be in ascending order, not have any
-    repeating dimension numbers, and be in the range `[0, operand.rank)`.
-
-  - `scatter_dims_to_operand_dims.size` must be equal to
-    `scatter_indices`[`index_vector_dim`], and its values must be in the range
-    `[0, operand.rank)`.
-
-For a given index `U` in the `updates` tensor, the corresponding index `I` in
-the `operand` tensor into which this update has to be applied is computed as
-follows:
-
-  1. Let `G` = { `U`[`k`] for `k` in `update_scatter_dims` }. Use `G` to look up
-     an index vector `S` in the `scatter_indices` tensor such that `S`[`i`] =
-     `scatter_indices`[Combine(`G`, `i`)] where Combine(A, b) inserts b at
-     positions `index_vector_dim` into A.
-  2. Create an index `S`<sub>`in`</sub> into `operand` using `S` by scattering
-     `S` using the `scatter_dims_to_operand_dims` map. More formally:
-       1. `S`<sub>`in`</sub>[`scatter_dims_to_operand_dims`[`k`]] = `S`[`k`] if
-          `k` < `scatter_dims_to_operand_dims.size`.
-       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
-  3. Create an index `W`<sub>`in`</sub> into `operand` by scattering the indices
-     at `update_window_dims` in `U` according to `inserted_window_dims`.
-     More formally:
-       1. `W`<sub>`in`</sub>[`window_dims_to_operand_dims`(`k`)] = `U`[`k`] if
-          `k` < `update_window_dims.size`, where `window_dims_to_operand_dims`
-          is the monotonic function with domain [`0`, `update_window_dims.size`)
-          and range [`0`, `operand.rank`) \\ `inserted_window_dims`. (For
-          example, if `update_window_dims.size` is `4`, `operand.rank` is `6`,
-          and `inserted_window_dims` is {`0`, `2`} then
-          `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`,
-          `3`→`5`}).
-       2. `W`<sub>`in`</sub>[`_`] = `0` otherwise.
-  4. `I` is `W`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
-     addition.
-
-In summary, the scatter operation can be defined as follows.
-
-   - Initialize `output` with `operand`, i.e. for all indices `O` in the
-     `operand` tensor:\
-       `output`[`O`] = `operand`[`O`]
-   - For every index `U` in the `updates` tensor and the corresponding index `O`
-     in the `operand` tensor:\
-       `output`[`O`] = `update_computation`(`output`[`O`], `updates`[`U`])
-
-The order in which updates are applied is non-deterministic. So, when multiple
-indices in `updates` refer to the same index in `operand`, the corresponding
-value in `output` will be non-deterministic.
-
-Note that the first parameter that is passed into the `update_computation` will
-always be the current value from the `output` tensor and the second parameter
-will always be the value from the `updates` tensor. This is important
-specifically for cases when the `update_computation` is _not commutative_.
-
-Informally, the scatter op can be viewed as an _inverse_ of the gather op, i.e.
-the scatter op updates the elements in the input that are extracted by the
-corresponding gather op.
-
-For a detailed informal description and examples, refer to the
-"Informal Description" section under `Gather`.
-
-## Select
-
-See also
-[`XlaBuilder::Select`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Constructs an output array from elements of two input arrays, based on the
-values of a predicate array.
-
-<b> `Select(pred, on_true, on_false)` </b>
-
-Arguments  | Type    | Semantics
----------- | ------- | ------------------
-`pred`     | `XlaOp` | array of type PRED
-`on_true`  | `XlaOp` | array of type T
-`on_false` | `XlaOp` | array of type T
-
-The arrays `on_true` and `on_false` must have the same shape. This is also the
-shape of the output array. The array `pred` must have the same dimensionality as
-`on_true` and `on_false`, with the `PRED` element type.
-
-For each element `P` of `pred`, the corresponding element of the output array is
-taken from `on_true` if the value of `P` is `true`, and from `on_false` if the
-value of `P` is `false`. As a restricted form of [broadcasting]
-(broadcasting.md), `pred` can be a scalar of type `PRED`. In this case, the
-output array is taken wholly from `on_true` if `pred` is `true`, and from
-`on_false` if `pred` is `false`.
-
-Example with non-scalar `pred`:
-
-```
-let pred: PRED[4] = {true, false, false, true};
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 200, 300, 4};
-```
-
-Example with scalar `pred`:
-
-```
-let pred: PRED = true;
-let v1: s32[4] = {1, 2, 3, 4};
-let v2: s32[4] = {100, 200, 300, 400};
-==>
-Select(pred, v1, v2) = s32[4]{1, 2, 3, 4};
-```
-
-Selections between tuples are supported. Tuples are considered to be scalar
-types for this purpose. If `on_true` and `on_false` are tuples (which must have
-the same shape!) then `pred` has to be a scalar of type `PRED`.
-
-## SelectAndScatter
-
-See also
-[`XlaBuilder::SelectAndScatter`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-This operation can be considered as a composite operation that first computes
-`ReduceWindow` on the `operand` array to select an element from each window, and
-then scatters the `source` array to the indices of the selected elements to
-construct an output array with the same shape as the operand array. The binary
-`select` function is used to select an element from each window by applying it
-across each window, and it is called with the property that the first
-parameter's index vector is lexicographically less than the second parameter's
-index vector. The `select` function returns `true` if the first parameter is
-selected and returns `false` if the second parameter is selected, and the
-function must hold transitivity (i.e., if `select(a, b)` and `select(b, c)` are
-`true`, then `select(a, c)` is also `true`) so that the selected element does
-not depend on the order of the elements traversed for a given window.
-
-The function `scatter` is applied at each selected index in the output array. It
-takes two scalar parameters:
-
-1.  Current value at the selected index in the output array
-2.  The scatter value from `source` that applies to the selected index
-
-It combines the two parameters and returns a scalar value that's used to update
-the value at the selected index in the output array. Initially, all indices of
-the output array are set to `init_value`.
-
-The output array has the same shape as the `operand` array and the `source`
-array must have the same shape as the result of applying a `ReduceWindow`
-operation on the `operand` array. `SelectAndScatter` can be used to
-backpropagate the gradient values for a pooling layer in a neural network.
-
-<b>`SelectAndScatter(operand, select, window_dimensions, window_strides,
-padding, source, init_value, scatter)`</b>
-
-| Arguments           | Type                | Semantics                        |
-| ------------------- | ------------------- | -------------------------------- |
-| `operand`           | `XlaOp`             | array of type T over which the   |
-:                     :                     : windows slide                    :
-| `select`            | `XlaComputation`    | binary computation of type `T, T |
-:                     :                     : -> PRED`, to apply to all        :
-:                     :                     : elements in each window; returns :
-:                     :                     : `true` if the first parameter is :
-:                     :                     : selected and returns `false` if  :
-:                     :                     : the second parameter is selected :
-| `window_dimensions` | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : dimension values                 :
-| `window_strides`    | `ArraySlice<int64>` | array of integers for window     |
-:                     :                     : stride values                    :
-| `padding`           | `Padding`           | padding type for window          |
-:                     :                     : (Padding\:\:kSame or             :
-:                     :                     : Padding\:\:kValid)               :
-| `source`            | `XlaOp`             | array of type T with the values  |
-:                     :                     : to scatter                       :
-| `init_value`        | `XlaOp`             | scalar value of type T for the   |
-:                     :                     : initial value of the output      :
-:                     :                     : array                            :
-| `scatter`           | `XlaComputation`    | binary computation of type `T, T |
-:                     :                     : -> T`, to apply each scatter     :
-:                     :                     : source element with its          :
-:                     :                     : destination element              :
-
-The figure below shows examples of using `SelectAndScatter`, with the `select`
-function computing the maximal value among its parameters. Note that when the
-windows overlap, as in the figure (2) below, an index of the `operand` array may
-be selected multiple times by different windows. In the figure, the element of
-value 9 is selected by both of the top windows (blue and red) and the binary
-addition `scatter` function produces the output element of value 8 (2 + 6).
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%"
-    src="https://www.tensorflow.org/images/ops_scatter_to_selected_window_element.png">
-</div>
-
-The evaluation order of the `scatter` function is arbitrary and may be
-non-deterministic. Therefore, the `scatter` function should not be overly
-sensitive to reassociation. See the discussion about associativity in the
-context of [`Reduce`](#reduce) for more details.
-
-## Send
-
-See also
-[`XlaBuilder::Send`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `Send(operand, channel_handle)` </b>
-
-Arguments        | Type            | Semantics
----------------- | --------------- | -----------------------------------------
-`operand`        | `XlaOp`         | data to send (array of type T)
-`channel_handle` | `ChannelHandle` | unique identifier for each send/recv pair
-
-Sends the given operand data to a `Recv` instruction in another computation
-that shares the same channel handle. Does not return any data.
-
-Similar to the `Recv` operation, the client API of `Send` operation represents
-synchronous communication, and is internally decomposed into 2 HLO instructions
-(`Send` and `SendDone`) to enable asynchronous data transfers. See also
-[`HloInstruction::CreateSend` and `HloInstruction::CreateSendDone`](https://www.tensorflow.org/code/tensorflow/compiler/xla/service/hlo_instruction.h).
-
-<b>`Send(HloInstruction operand, int64 channel_id)`</b>
-
-Initiates an asynchronous transfer of the operand to the resources allocated by
-the `Recv` instruction with the same channel id. Returns a context, which is
-used by a following `SendDone` instruction to wait for the completion of the
-data transfer. The context is a tuple of {operand (shape), request identifier
-(U32)} and it can only be used by a `SendDone` instruction.
-
-<b> `SendDone(HloInstruction context)` </b>
-
-Given a context created by a `Send` instruction, waits for the data transfer to
-complete.  The instruction does not return any data.
-
-<b> Scheduling of channel instructions </b>
-
-The execution order of the 4 instructions for each channel (`Recv`, `RecvDone`,
-`Send`, `SendDone`) is as below.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:70%" src="../../images/send_recv_order.png">
-</div>
-
-* `Recv` happens before `Send`
-* `Send` happens before `RecvDone`
-* `Recv` happens before `RecvDone`
-* `Send` happens before `SendDone`
-
-When the backend compilers generate a linear schedule for each computation that
-communicates via channel instructions, there must not be cycles across the
-computations. For example, below schedules lead to deadlocks.
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="../../images/send_recv_schedule.png">
-</div>
-
-## Slice
-
-See also
-[`XlaBuilder::Slice`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-Slicing extracts a sub-array from the input array. The sub-array is of the same
-rank as the input and contains the values inside a bounding box within the input
-array where the dimensions and indices of the bounding box are given as
-arguments to the slice operation.
-
-<b> `Slice(operand, start_indices, limit_indices)` </b>
-
-| Arguments       | Type                | Semantics                            |
-| --------------- | ------------------- | ------------------------------------ |
-| `operand`       | `XlaOp`             | N dimensional array of type T        |
-| `start_indices` | `ArraySlice<int64>` | List of N integers containing the    |
-:                 :                     : starting indices of the slice for    :
-:                 :                     : each dimension. Values must be       :
-:                 :                     : greater than or equal to zero.       :
-| `limit_indices` | `ArraySlice<int64>` | List of N integers containing the    |
-:                 :                     : ending indices (exclusive) for the   :
-:                 :                     : slice for each dimension. Each value :
-:                 :                     : must be greater than or equal to the :
-:                 :                     : respective `start_indices` value for :
-:                 :                     : the dimension and less than or equal :
-:                 :                     : to the size of the dimension.        :
-
-1-dimensional example:
-
-```
-let a = {0.0, 1.0, 2.0, 3.0, 4.0}
-Slice(a, {2}, {4}) produces:
-  {2.0, 3.0}
-```
-
-2-dimensional example:
-
-```
-let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
-
-Slice(b, {2, 1}, {4, 3}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
-```
-
-## Sort
-
-See also
-[`XlaBuilder::Sort`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-There are two versions of the Sort instruction: a single-operand and a
-two-operand version.
-
-<b>`Sort(operand)`</b>
-
-Arguments   | Type    | Semantics
------------ | ------- | --------------------
-`operand`   | `XlaOp` | The operand to sort.
-`dimension` | `int64` | The dimension along which to sort.
-
-Sorts the elements in the operand in ascending order along the provided
-dimension. For example, for a rank-2 (matrix) operand, a `dimension` value of 0
-will sort each column independently, and a `dimension` value of 1 will sort each
-row independently. If the operand's elements have floating point type, and the
-operand contains NaN elements, the order of elements in the output is
-implementation-defined.
-
-<b>`Sort(key, value)`</b>
-
-Sorts both the key and the value operands. The keys are sorted as in the
-single-operand version. The values are sorted according to the order of their
-corresponding keys. For example, if the inputs are `keys = [3, 1]` and
-`values = [42, 50]`, then the output of the sort is the tuple 
-`{[1, 3], [50, 42]}`.
-
-The sort is not guaranteed to be stable, that is, if the keys array contains
-duplicates, the order of their corresponding values may not be preserved.
-
-Arguments   | Type    | Semantics
------------ | ------- | -------------------
-`keys`      | `XlaOp` | The sort keys.
-`values`    | `XlaOp` | The values to sort.
-`dimension` | `int64` | The dimension along which to sort.
-
-The `keys` and `values` must have the same dimensions, but may have different
-element types.
-
-## Transpose
-
-See also the `tf.reshape` operation.
-
-<b>`Transpose(operand)`</b>
-
-Arguments     | Type                | Semantics
-------------- | ------------------- | ------------------------------
-`operand`     | `XlaOp`             | The operand to transpose.
-`permutation` | `ArraySlice<int64>` | How to permute the dimensions.
-
-
-Permutes the operand dimensions with the given permutation, so
-`∀ i . 0 ≤ i < rank ⇒ input_dimensions[permutation[i]] = output_dimensions[i]`.
-
-This is the same as Reshape(operand, permutation,
-                            Permute(permutation, operand.shape.dimensions)).
-
-## Tuple
-
-See also
-[`XlaBuilder::Tuple`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-A tuple containing a variable number of data handles, each of which has its own
-shape.
-
-This is analogous to `std::tuple` in C++. Conceptually:
-
-```
-let v: f32[10] = f32[10]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-let s: s32 = 5;
-let t: (f32[10], s32) = tuple(v, s);
-```
-
-Tuples can be deconstructed (accessed) via the [`GetTupleElement`]
-(#gettupleelement) operation.
-
-## While
-
-See also
-[`XlaBuilder::While`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
-
-<b> `While(condition, body, init)` </b>
-
-| Arguments   | Type             | Semantics                                |
-| ----------- | ---------------- | ---------------------------------------- |
-| `condition` | `XlaComputation` | XlaComputation of type `T -> PRED` which |
-:             :                  : defines the termination condition of the :
-:             :                  : loop.                                    :
-| `body`      | `XlaComputation` | XlaComputation of type `T -> T` which    |
-:             :                  : defines the body of the loop.            :
-| `init`      | `T`              | Initial value for the parameter of       |
-:             :                  : `condition` and `body`.                  :
-
-Sequentially executes the `body` until the `condition` fails. This is similar to
-a typical while loop in many other languages except for the differences and
-restrictions listed below.
-
-*   A `While` node returns a value of type `T`, which is the result from the
-    last execution of the `body`.
-*   The shape of the type `T` is statically determined and must be the same
-    across all iterations.
-
-The T parameters of the computations are initialized with the `init` value in
-the first iteration and are automatically updated to the new result from `body`
-in each subsequent iteration.
-
-One main use case of the `While` node is to implement the repeated execution of
-training in neural networks. Simplified pseudocode is shown below with a graph
-that represents the computation. The code can be found in
-[`while_test.cc`](https://www.tensorflow.org/code/tensorflow/compiler/xla/tests/while_test.cc).
-The type `T` in this example is a `Tuple` consisting of an `int32` for the
-iteration count and a `vector[10]` for the accumulator. For 1000 iterations, the
-loop keeps adding a constant vector to the accumulator.
-
-```
-// Pseudocode for the computation.
-init = {0, zero_vector[10]} // Tuple of int32 and float[10].
-result = init;
-while (result(0) < 1000) {
-  iteration = result(0) + 1;
-  new_vector = result(1) + constant_vector[10];
-  result = {iteration, new_vector};
-}
-```
-
-<div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="https://www.tensorflow.org/images/ops_while.png">
-</div>
-- 
GitLab


From 57d31aa599c83014397a22bbb8f1a27a33b0ade3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 4 Oct 2018 22:30:20 -0700
Subject: [PATCH 0413/1085] Remove dependency on epsilon for diagonal shampoo.

PiperOrigin-RevId: 215857772
---
 .../contrib/opt/python/training/shampoo.py       | 16 +++++++++++-----
 .../contrib/opt/python/training/shampoo_test.py  |  8 ++++----
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/shampoo.py b/tensorflow/contrib/opt/python/training/shampoo.py
index f161521b97..e542f46892 100644
--- a/tensorflow/contrib/opt/python/training/shampoo.py
+++ b/tensorflow/contrib/opt/python/training/shampoo.py
@@ -108,7 +108,8 @@ class ShampooOptimizer(optimizer.Optimizer):
       precond_update_interval: We should update the preconditioners after
                                this many steps. Default = 1. Usually less than
                                svd_interval.
-      epsilon:  epsilon * I_n is added to each mat_gbar_j for stability
+      epsilon:  epsilon * I_n is added to each mat_gbar_j for stability for
+                non-diagonal version of shampoo.
       alpha:  total power of the preconditioners.
       use_iterative_root: should the optimizer use SVD (faster) or the
                           iterative root method (for TPU) for finding the
@@ -394,15 +395,20 @@ class ShampooOptimizer(optimizer.Optimizer):
           assert self._mat_gbar_decay == 1.0
           mat_g_updated = state_ops.scatter_add(mat_g, indices,
                                                 mat_gbar_weight_t * grad_outer)
-          mat_h = math_ops.pow(
-              array_ops.gather(mat_g_updated, indices) + self._epsilon,
-              neg_alpha)
+          mat_g_updated_slice = array_ops.gather(mat_g_updated, indices)
+          mat_h = array_ops.where(
+              math_ops.greater(mat_g_updated_slice, 0),
+              math_ops.pow(mat_g_updated_slice, neg_alpha),
+              array_ops.zeros_like(mat_g_updated_slice))
         else:
           mat_g_updated = self._weighted_average(mat_g,
                                                  self._mat_gbar_decay,
                                                  mat_gbar_decay_t,
                                                  mat_gbar_weight_t * grad_outer)
-          mat_h = math_ops.pow(mat_g_updated + self._epsilon, neg_alpha)
+          mat_h = array_ops.where(
+              math_ops.greater(mat_g_updated, 0),
+              math_ops.pow(mat_g_updated, neg_alpha),
+              array_ops.zeros_like(mat_g_updated))
 
         # Need to do the transpose to ensure that the tensor becomes
         # a d_{i+1} x ... x d_n x d_0 x ... d_i tensor as described above.
diff --git a/tensorflow/contrib/opt/python/training/shampoo_test.py b/tensorflow/contrib/opt/python/training/shampoo_test.py
index a2fd8fbd87..e88c8221a0 100644
--- a/tensorflow/contrib/opt/python/training/shampoo_test.py
+++ b/tensorflow/contrib/opt/python/training/shampoo_test.py
@@ -279,7 +279,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
       # Update rule is var = var - lr * gg^{-0.5} * grad
       # lr = 1
       mat_g = (grad_np * grad_np)
-      new_val_np = init_var_np - np.power(mat_g + RIDGE_EPSILON, -0.5) * grad_np
+      new_val_np = init_var_np - np.power(mat_g, -0.5) * grad_np
 
       self.assertAllCloseAccordingToType(
           new_val_np, new_val, atol=TOLERANCE, rtol=TOLERANCE)
@@ -288,7 +288,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
       new_val = sess.run(var)
 
       mat_g += (grad_np_2 * grad_np_2)
-      new_val_np -= np.power(mat_g + RIDGE_EPSILON, -0.5) * grad_np_2
+      new_val_np -= np.power(mat_g, -0.5) * grad_np_2
 
       self.assertAllCloseAccordingToType(
           new_val_np, new_val, atol=TOLERANCE, rtol=TOLERANCE)
@@ -339,7 +339,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
 
       mat_g1 = np.sum(
           grad_np * grad_np, axis=1, keepdims=True) / grad_np.shape[0]
-      mat_left = np.power(mat_g1 + RIDGE_EPSILON, -0.25)
+      mat_left = np.power(mat_g1, -0.25)
       mat_g2 = np.dot(grad_np.transpose(), grad_np) / grad_np.shape[1]
       mat_right = np_power(mat_g2 + RIDGE_EPSILON * np.eye(size[1]), -0.25)
       new_val_np = init_var_np - np.dot(grad_np * mat_left, mat_right)
@@ -353,7 +353,7 @@ class ShampooTest(test.TestCase, parameterized.TestCase):
 
       mat_g1 += np.sum(
           grad_np_2 * grad_np_2, axis=1, keepdims=True) / grad_np_2.shape[0]
-      mat_left = np.power(mat_g1 + RIDGE_EPSILON, -0.25)
+      mat_left = np.power(mat_g1, -0.25)
       mat_g2 += np.dot(grad_np_2.transpose(), grad_np_2) / grad_np_2.shape[1]
       mat_right = np_power(mat_g2 + RIDGE_EPSILON * np.eye(size[1]), -0.25)
       new_val_np -= np.dot(grad_np_2 * mat_left, mat_right)
-- 
GitLab


From 3b94d75a9e10ef8ef33760d0ef6aad326e1353ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 01:22:02 -0700
Subject: [PATCH 0414/1085] Merge the different LSTM EvalFloat/EvalHybrid calls
 into a single file.

PiperOrigin-RevId: 215870962
---
 tensorflow/contrib/lite/kernels/BUILD         |  13 +-
 .../kernels/bidirectional_sequence_lstm.cc    | 333 +------
 .../lite/kernels/internal/kernel_utils.cc     | 598 ------------
 .../lite/kernels/internal/kernel_utils.h      | 184 ----
 tensorflow/contrib/lite/kernels/lstm.cc       | 300 +-----
 tensorflow/contrib/lite/kernels/lstm_eval.cc  | 909 ++++++++++++++++++
 tensorflow/contrib/lite/kernels/lstm_eval.h   |  79 ++
 .../kernels/unidirectional_sequence_lstm.cc   | 310 +-----
 8 files changed, 1061 insertions(+), 1665 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/lstm_eval.cc
 create mode 100644 tensorflow/contrib/lite/kernels/lstm_eval.h

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 95e387814d..68636fb070 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -234,11 +234,11 @@ cc_library(
         ":activation_functor",
         ":eigen_support",
         ":kernel_util",
+        ":lstm_eval",
         ":op_macros",
         ":padding",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite:string_util",
-        "//tensorflow/contrib/lite:util",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/kernels:gemm_support",
         "//tensorflow/contrib/lite/kernels/internal:audio_utils",
@@ -254,6 +254,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "lstm_eval",
+    srcs = ["lstm_eval.cc"],
+    hdrs = ["lstm_eval.h"],
+    deps = [
+        "//tensorflow/contrib/lite/c:c_api_internal",
+        "//tensorflow/contrib/lite/kernels/internal:kernel_utils",
+        "//tensorflow/contrib/lite/kernels/internal:tensor_utils",
+    ],
+)
+
 cc_library(
     name = "builtin_ops",
     srcs = ["register.cc"],
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 0532528f52..a326827b1e 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -694,330 +695,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
-    const TfLiteTensor* aux_input_to_input_weights,
-    const TfLiteTensor* aux_input_to_forget_weights,
-    const TfLiteTensor* aux_input_to_cell_weights,
-    const TfLiteTensor* aux_input_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
-
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existense of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  // Index the scratch buffers pointers to the global scratch buffer.
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  float* aux_input_ptr = nullptr;
-  float* aux_input_to_input_weights_ptr = nullptr;
-  float* aux_input_to_forget_weights_ptr = nullptr;
-  float* aux_input_to_cell_weights_ptr = nullptr;
-  float* aux_input_to_output_weights_ptr = nullptr;
-  if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
-    aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f;
-    aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f;
-    aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
-  }
-
-  // Loop through the sequence.
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[2];
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr_time =
-        output->data.f + t_rel * output_step + output_offset;
-
-    kernel_utils::LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
-        input_to_cell_weights->data.f, input_to_output_weights->data.f,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
-        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
-        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
-        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
-        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
-        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
-        params, n_batch, n_cell, n_input, aux_input_size, n_output,
-        activation_state->data.f, cell_state->data.f, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        output_ptr_time);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
-    const TfLiteTensor* aux_input_to_input_weights,
-    const TfLiteTensor* aux_input_to_forget_weights,
-    const TfLiteTensor* aux_input_to_cell_weights,
-    const TfLiteTensor* aux_input_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
-    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
-    TfLiteTensor* output_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  const float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* output_state_ptr = output_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_aux_input_ptr =
-      (aux_input_quantized == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(aux_input_quantized->data.uint8);
-  int8_t* quantized_output_state_ptr =
-      reinterpret_cast<int8_t*>(output_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  // Auxiliary input and weights.
-  float* aux_input_ptr = nullptr;
-  int8_t* aux_input_to_input_weights_ptr = nullptr;
-  int8_t* aux_input_to_forget_weights_ptr = nullptr;
-  int8_t* aux_input_to_cell_weights_ptr = nullptr;
-  int8_t* aux_input_to_output_weights_ptr = nullptr;
-  float aux_input_to_input_weights_scale = 0.0f;
-  float aux_input_to_forget_weights_scale = 0.0f;
-  float aux_input_to_cell_weights_scale = 0.0f;
-  float aux_input_to_output_weights_scale = 0.0f;
-  if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
-    aux_input_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_forget_weights->data.uint8);
-    aux_input_to_cell_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_cell_weights->data.uint8);
-    aux_input_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_output_weights->data.uint8);
-    aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale;
-    aux_input_to_forget_weights_scale =
-        aux_input_to_forget_weights->params.scale;
-    aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale;
-    aux_input_to_output_weights_scale =
-        aux_input_to_output_weights->params.scale;
-  }
-
-  // Feed the sequence into the LSTM step-by-step.
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[2];
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
-
-    kernel_utils::LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-        input_to_forget_weights_ptr, input_to_forget_weights_scale,
-        input_to_cell_weights_ptr, input_to_cell_weights_scale,
-        input_to_output_weights_ptr, input_to_output_weights_scale,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
-        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, aux_input_size, n_output, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        scaling_factors_ptr, prod_scaling_factors_ptr,
-        recovered_cell_weights_ptr, quantized_input_ptr,
-        quantized_aux_input_ptr, quantized_output_state_ptr,
-        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
-  }
-
-  return kTfLiteOk;
-}
-
 // The LSTM Op engine.
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
@@ -1157,7 +834,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      TfLiteStatus fw_pass_status = EvalFloat(
+      TfLiteStatus fw_pass_status = lstm_eval::EvalFloat(
           input, fw_input_to_input_weights, fw_input_to_forget_weights,
           fw_input_to_cell_weights, fw_input_to_output_weights,
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
@@ -1172,7 +849,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
-      TfLiteStatus bw_pass_status = EvalFloat(
+      TfLiteStatus bw_pass_status = lstm_eval::EvalFloat(
           input, bw_input_to_input_weights, bw_input_to_forget_weights,
           bw_input_to_cell_weights, bw_input_to_output_weights,
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
@@ -1208,7 +885,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, kRecoveredCellWeights);
 
-      TfLiteStatus fw_pass_status = EvalHybrid(
+      TfLiteStatus fw_pass_status = lstm_eval::EvalHybrid(
           input, fw_input_to_input_weights, fw_input_to_forget_weights,
           fw_input_to_cell_weights, fw_input_to_output_weights,
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
@@ -1226,7 +903,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
-      TfLiteStatus bw_pass_status = EvalHybrid(
+      TfLiteStatus bw_pass_status = lstm_eval::EvalHybrid(
           input, bw_input_to_input_weights, bw_input_to_forget_weights,
           bw_input_to_cell_weights, bw_input_to_output_weights,
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
index 56e9367878..083e5839bd 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
@@ -169,603 +169,5 @@ void RnnBatchStep(
                                         hidden_state_ptr_batch);
 }
 
-void LstmStep(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr,
-    float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch) {
-  LstmStepWithAuxInput(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr,
-      input_to_cell_weights_ptr, input_to_output_weights_ptr,
-      /*aux_input_ptr_batch=*/nullptr,
-      /*aux_input_to_input_weights_ptr=*/nullptr,
-      /*aux_input_to_forget_weights_ptr=*/nullptr,
-      /*aux_input_to_cell_weights_ptr=*/nullptr,
-      /*aux_input_to_output_weights_ptr=*/nullptr,
-      recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr,
-      recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr,
-      cell_to_input_weights_ptr, cell_to_forget_weights_ptr,
-      cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr,
-      cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, /*n_aux_input=*/0,
-      n_output, output_state_ptr, cell_state_ptr, input_gate_scratch,
-      forget_gate_scratch, cell_scratch, output_gate_scratch, output_ptr_batch);
-}
-
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch) {
-  // Since we have already checked that weights are all there or none, we can
-  // check the existense of only one to the get the condition.
-  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
-  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-  // Initialize scratch buffers with bias.
-  if (!use_cifg) {
-    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
-                                          input_gate_scratch);
-  }
-  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
-                                        forget_gate_scratch);
-  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                        cell_scratch);
-  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
-                                        output_gate_scratch);
-
-  // For each batch and cell: compute input_weight * input.
-  if (!use_cifg) {
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-        input_gate_scratch, /*result_stride=*/1);
-  }
-
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      forget_gate_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      cell_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
-      output_gate_scratch, /*result_stride=*/1);
-
-  // If auxiliary input is available then compute aux_input_weight * aux_input
-  if (aux_input_ptr_batch != nullptr) {
-    if (!use_cifg) {
-      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-          aux_input_to_input_weights_ptr, n_cell, n_aux_input,
-          aux_input_ptr_batch, n_batch, input_gate_scratch,
-          /*result_stride=*/1);
-    }
-
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_forget_weights_ptr, n_cell, n_aux_input,
-        aux_input_ptr_batch, n_batch, forget_gate_scratch, /*result_stride=*/1);
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_cell_weights_ptr, n_cell, n_aux_input, aux_input_ptr_batch,
-        n_batch, cell_scratch, /*result_stride=*/1);
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_to_output_weights_ptr, n_cell, n_aux_input,
-        aux_input_ptr_batch, n_batch, output_gate_scratch, /*result_stride=*/1);
-  }
-
-  // For each batch and cell: compute recurrent_weight * output_state.
-  if (!use_cifg) {
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr,
-        n_batch, input_gate_scratch, /*result_stride=*/1);
-  }
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, forget_gate_scratch,
-      /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, cell_scratch, /*result_stride=*/1);
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr,
-      n_batch, output_gate_scratch,
-      /*result_stride=*/1);
-
-  // For each batch and cell: update input gate.
-  if (!use_cifg) {
-    if (use_peephole) {
-      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-          cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
-          input_gate_scratch);
-    }
-    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
-                                       input_gate_scratch);
-  }
-
-  // For each batch and cell: update forget gate.
-  if (use_peephole) {
-    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
-        forget_gate_scratch);
-  }
-  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
-                                     forget_gate_scratch);
-
-  // For each batch and cell: update the cell.
-  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
-                                         n_batch * n_cell, cell_state_ptr);
-  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
-                                        params->activation, cell_scratch);
-  if (use_cifg) {
-    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
-                             forget_gate_scratch);
-    tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
-  } else {
-    tensor_utils::VectorVectorCwiseProductAccumulate(
-        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
-  }
-  if (params->cell_clip > 0.0) {
-    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
-                             params->cell_clip, cell_state_ptr);
-  }
-
-  // For each batch and cell: update the output gate.
-  if (use_peephole) {
-    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-        cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
-        output_gate_scratch);
-  }
-  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
-                                     output_gate_scratch);
-  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
-                                        params->activation, cell_scratch);
-  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
-                                         n_batch * n_cell, output_gate_scratch);
-
-  // For each batch: update the projection and output_state.
-  const bool use_projection_weight = (projection_weights_ptr != nullptr);
-  const bool use_projection_bias = (projection_bias_ptr != nullptr);
-  if (use_projection_weight) {
-    if (use_projection_bias) {
-      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                            n_batch, output_ptr_batch);
-    } else {
-      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
-    }
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch,
-        output_ptr_batch, /*result_stride=*/1);
-    if (params->proj_clip > 0.0) {
-      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                               params->proj_clip, output_ptr_batch);
-    }
-  } else {
-    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                             output_ptr_batch);
-  }
-  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                           output_state_ptr);
-}
-
-void LstmStep(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_output, float* input_gate_scratch, float* forget_gate_scratch,
-    float* cell_scratch, float* output_gate_scratch, float* scaling_factors,
-    float* product_scaling_factors, float* recovered_cell_weights,
-    int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch) {
-  LstmStepWithAuxInput(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale,
-      input_to_forget_weights_ptr, input_to_forget_weights_scale,
-      input_to_cell_weights_ptr, input_to_cell_weights_scale,
-      input_to_output_weights_ptr, input_to_output_weights_scale,
-      /*aux_input_ptr_batch=*/nullptr,
-      /*aux_input_to_input_weights_ptr=*/nullptr,
-      /*aux_input_to_input_weights_scale=*/0.0f,
-      /*aux_input_to_forget_weights_ptr=*/nullptr,
-      /*aux_input_to_forget_weights_scale=*/0.0f,
-      /*aux_input_to_cell_weights_ptr=*/nullptr,
-      /*aux_input_to_cell_weights_scale=*/0.0f,
-      /*aux_input_to_output_weights_ptr=*/nullptr,
-      /*aux_input_to_output_weights_scale=*/0.0f,
-      recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale,
-      recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale,
-      recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale,
-      recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale,
-      cell_to_input_weights_ptr, cell_to_input_weights_scale,
-      cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
-      cell_to_output_weights_ptr, cell_to_output_weights_scale,
-      input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-      output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale,
-      projection_bias_ptr, params, n_batch, n_cell, n_input,
-      /*n_aux_input=*/0, n_output, input_gate_scratch, forget_gate_scratch,
-      cell_scratch, output_gate_scratch, scaling_factors,
-      product_scaling_factors, recovered_cell_weights,
-      quantized_input_ptr_batch,
-      /*quantized_aux_input_ptr_batch=*/nullptr, quantized_output_state_ptr,
-      quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
-      output_ptr_batch);
-    }
-
-    void LstmStepWithAuxInput(
-        const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-        float input_to_input_weights_scale,
-        const int8_t* input_to_forget_weights_ptr,
-        float input_to_forget_weights_scale,
-        const int8_t* input_to_cell_weights_ptr,
-        float input_to_cell_weights_scale,
-        const int8_t* input_to_output_weights_ptr,
-        float input_to_output_weights_scale, const float* aux_input_ptr_batch,
-        const int8_t* aux_input_to_input_weights_ptr,
-        float aux_input_to_input_weights_scale,
-        const int8_t* aux_input_to_forget_weights_ptr,
-        float aux_input_to_forget_weights_scale,
-        const int8_t* aux_input_to_cell_weights_ptr,
-        float aux_input_to_cell_weights_scale,
-        const int8_t* aux_input_to_output_weights_ptr,
-        float aux_input_to_output_weights_scale,
-        const int8_t* recurrent_to_input_weights_ptr,
-        float recurrent_to_input_weights_scale,
-        const int8_t* recurrent_to_forget_weights_ptr,
-        float recurrent_to_forget_weights_scale,
-        const int8_t* recurrent_to_cell_weights_ptr,
-        float recurrent_to_cell_weights_scale,
-        const int8_t* recurrent_to_output_weights_ptr,
-        float recurrent_to_output_weights_scale,
-        const int8_t* cell_to_input_weights_ptr,
-        float cell_to_input_weights_scale,
-        const int8_t* cell_to_forget_weights_ptr,
-        float cell_to_forget_weights_scale,
-        const int8_t* cell_to_output_weights_ptr,
-        float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-        const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-        const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-        float projection_weights_scale, const float* projection_bias_ptr,
-        const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-        int n_aux_input, int n_output, float* input_gate_scratch,
-        float* forget_gate_scratch, float* cell_scratch,
-        float* output_gate_scratch, float* scaling_factors,
-        float* product_scaling_factors, float* recovered_cell_weights,
-        int8_t* quantized_input_ptr_batch,
-        int8_t* quantized_aux_input_ptr_batch,
-        int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr,
-        float* output_state_ptr, float* cell_state_ptr,
-        float* output_ptr_batch) {
-      // Since we have already checked that weights are all there or none, we
-      // can check the existense of only one to the get the condition.
-      const bool use_cifg = (input_to_input_weights_ptr == nullptr);
-      const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-      // Initialize scratch buffers with bias.
-      if (!use_cifg) {
-        tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
-                                              n_batch, input_gate_scratch);
-      }
-      tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell,
-                                            n_batch, forget_gate_scratch);
-      tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                            cell_scratch);
-      tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell,
-                                            n_batch, output_gate_scratch);
-
-      if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_input;
-          tensor_utils::SymmetricQuantizeFloats(
-              input_ptr_batch + offset, n_input,
-              quantized_input_ptr_batch + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute input_weight * input.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * input_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              input_to_input_weights_ptr, n_cell, n_input,
-              quantized_input_ptr_batch, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_forget_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            forget_gate_scratch,
-            /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_cell_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * input_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            input_to_output_weights_ptr, n_cell, n_input,
-            quantized_input_ptr_batch, product_scaling_factors, n_batch,
-            output_gate_scratch,
-            /*result_stride=*/1);
-      }
-
-      if (aux_input_ptr_batch != nullptr &&
-          !tensor_utils::IsZeroVector(aux_input_ptr_batch, n_batch * n_input)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_input;
-          tensor_utils::SymmetricQuantizeFloats(
-              aux_input_ptr_batch + offset, n_input,
-              quantized_aux_input_ptr_batch + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute input_weight * input.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * aux_input_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              aux_input_to_input_weights_ptr, n_cell, n_input,
-              quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_forget_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            forget_gate_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_cell_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * aux_input_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            aux_input_to_output_weights_ptr, n_cell, n_input,
-            quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
-            output_gate_scratch, /*result_stride=*/1);
-      }
-
-      if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
-        // Save quantization and matmul computation for all zero input.
-        float unused_min, unused_max;
-        for (int b = 0; b < n_batch; ++b) {
-          const int offset = b * n_output;
-          tensor_utils::SymmetricQuantizeFloats(
-              output_state_ptr + offset, n_output,
-              quantized_output_state_ptr + offset, &unused_min, &unused_max,
-              &scaling_factors[b]);
-        }
-        // For each batch and cell: compute recurrent_weight * output_state.
-        if (!use_cifg) {
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * recurrent_to_input_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              recurrent_to_input_weights_ptr, n_cell, n_output,
-              quantized_output_state_ptr, product_scaling_factors, n_batch,
-              input_gate_scratch, /*result_stride=*/1);
-        }
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_forget_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_forget_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            forget_gate_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_cell_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_cell_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            cell_scratch, /*result_stride=*/1);
-
-        for (int b = 0; b < n_batch; ++b) {
-          product_scaling_factors[b] =
-              scaling_factors[b] * recurrent_to_output_weights_scale;
-        }
-        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-            recurrent_to_output_weights_ptr, n_cell, n_output,
-            quantized_output_state_ptr, product_scaling_factors, n_batch,
-            output_gate_scratch, /*result_stride=*/1);
-      }
-
-      // Save quantization and matmul computation for all zero input.
-      bool is_cell_state_all_zeros =
-          tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
-
-      // For each batch and cell: update input gate.
-      if (!use_cifg) {
-        if (use_peephole && !is_cell_state_all_zeros) {
-          tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell,
-                                             cell_to_input_weights_scale,
-                                             recovered_cell_weights);
-          tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-              recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-              input_gate_scratch);
-        }
-        tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
-                                           input_gate_scratch);
-      }
-
-      // For each batch and cell: update forget gate.
-      if (use_peephole && !is_cell_state_all_zeros) {
-        tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell,
-                                           cell_to_forget_weights_scale,
-                                           recovered_cell_weights);
-        tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-            recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-            forget_gate_scratch);
-      }
-      tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
-                                         forget_gate_scratch);
-
-      // For each batch and cell: update the cell.
-      tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch,
-                                             cell_state_ptr, n_batch * n_cell,
-                                             cell_state_ptr);
-      tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
-                                            params->activation, cell_scratch);
-      if (use_cifg) {
-        tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
-                                 forget_gate_scratch);
-        tensor_utils::VectorVectorCwiseProductAccumulate(
-            cell_scratch, forget_gate_scratch, n_batch * n_cell,
-            cell_state_ptr);
-      } else {
-        tensor_utils::VectorVectorCwiseProductAccumulate(
-            cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
-      }
-      if (params->cell_clip > 0.0) {
-        tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
-                                 params->cell_clip, cell_state_ptr);
-      }
-
-      is_cell_state_all_zeros =
-          tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
-      // For each batch and cell: update the output gate.
-      if (use_peephole && !is_cell_state_all_zeros) {
-        tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell,
-                                           cell_to_output_weights_scale,
-                                           recovered_cell_weights);
-        tensor_utils::VectorBatchVectorCwiseProductAccumulate(
-            recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
-            output_gate_scratch);
-      }
-      tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
-                                         output_gate_scratch);
-      tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
-                                            params->activation, cell_scratch);
-      tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
-                                             n_batch * n_cell,
-                                             output_gate_scratch);
-
-      // For each batch: update the projection and output_state.
-      const bool use_projection_weight = (projection_weights_ptr != nullptr);
-      const bool use_projection_bias = (projection_bias_ptr != nullptr);
-      if (use_projection_weight) {
-        if (use_projection_bias) {
-          tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                                n_batch, output_ptr_batch);
-        } else {
-          tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
-        }
-        if (!tensor_utils::IsZeroVector(output_gate_scratch,
-                                        n_batch * n_cell)) {
-          // Save quantization and matmul computation for all zero input.
-          float unused_min, unused_max;
-          for (int b = 0; b < n_batch; ++b) {
-            const int offset = b * n_cell;
-            tensor_utils::SymmetricQuantizeFloats(
-                output_gate_scratch + offset, n_cell,
-                quantized_cell_state_ptr + offset, &unused_min, &unused_max,
-                &scaling_factors[b]);
-          }
-          for (int b = 0; b < n_batch; ++b) {
-            product_scaling_factors[b] =
-                scaling_factors[b] * projection_weights_scale;
-          }
-          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-              projection_weights_ptr, n_output, n_cell,
-              quantized_cell_state_ptr, product_scaling_factors, n_batch,
-              output_ptr_batch,
-              /*result_stride=*/1);
-        }
-        if (params->proj_clip > 0.0) {
-          tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                                   params->proj_clip, output_ptr_batch);
-        }
-      } else {
-        tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                                 output_ptr_batch);
-      }
-      tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                               output_state_ptr);
-    }
-
 }  // namespace kernel_utils
 }  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
index b5558cce55..74e0a4a53d 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
@@ -76,190 +76,6 @@ void RnnBatchStep(
     int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
     float* hidden_state_ptr_batch, float* output_ptr_batch);
 
-// Performs an LSTM batch inference step for input specified by input_ptr_batch.
-// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
-// biases (*_bias_ptr), and buffers (*_scratch), along with additional
-// parameters:
-//  - params: various LSTM params including activation, clipping, etc.,
-//  - n_batch: size of batch,
-//  - n_cell: number of cells (or units),
-//  - n_input: the input size,
-//  - n_output: the output size.
-//
-// The pointers to the cell and output state and the output are updated.
-//
-// The pointers with the suffix "_batch" point to data aligned in batch_major
-// order, and each step processes batch_size many inputs from input_ptr_batch,
-// and updates batch_size many cell and output states.
-void LstmStep(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr,
-    float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch);
-
-// Same as above but includes an auxiliary input with the corresponding weights.
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
-    const float* input_to_forget_weights_ptr,
-    const float* input_to_cell_weights_ptr,
-    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
-    const float* aux_input_to_input_weights_ptr,
-    const float* aux_input_to_forget_weights_ptr,
-    const float* aux_input_to_cell_weights_ptr,
-    const float* aux_input_to_output_weights_ptr,
-    const float* recurrent_to_input_weights_ptr,
-    const float* recurrent_to_forget_weights_ptr,
-    const float* recurrent_to_cell_weights_ptr,
-    const float* recurrent_to_output_weights_ptr,
-    const float* cell_to_input_weights_ptr,
-    const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* output_ptr_batch);
-
-// Same as above but with quantized weight matrices. In detail:
-// Input of size 'n_batch * n_input':
-//   input_ptr_batch
-//
-// LSTM weights:
-// Quantized input weights of size 'n_cell * n_input':
-//   input_to_input_weights            - optional (can be nullptr)
-//   input_to_forget_weights
-//   input_to_cell_weights
-//   input_to_input_weights
-// Quantized recurrent weights of size 'n_cell * n_output':
-//   recurrent_to_input_weights        - optional
-//   recurrent_to_forget_weights
-//   recurrent_to_cell_weights
-//   recurrent_to_input_weights
-// Quantized peephole weights of size 'n_cell', representing diagonal matrices.
-//   cell_to_input_weights             - optional
-//   cell_to_cell_weights              - optional
-//   cell_to_output_weights            - optional
-// Quantized projection weights of size 'n_output * n_cell'
-//   projection_weights_ptr            - optional
-// Weight scales (scalars) for each of the weights above.
-//   input_to_input_weights_scale      - optional
-//   input_to_forget_weights_scale
-//   input_to_cell_weights_scale
-//   input_to_output_weights_scale
-//   recurrent_to_input_weights_scale  - optional
-//   recurrent_to_forget_weights_scale
-//   recurrent_to_cell_weights_scale
-//   recurrent_to_output_weights_scale
-//   cell_to_input_weights_scale,
-//   cell_to_forget_weights_scale,
-//   cell_to_output_weights_scale,
-//   projection_weights_scale          - optional
-// Gate biases of size 'n_cell':
-//   input_gate_bias_ptr               - optional
-//   forget_gate_bias_ptr
-//   cell_gate_bias_ptr
-//   output_gate_bias_ptr
-//
-// Temporary pre-allocated storage for quantized values:
-//   quantized_input_ptr_batch (same size as input_ptr_batch)
-//   quantized_output_state_ptr (same size as output_state_ptr)
-//   quantized_cell_state_ptr (same size as cell_state_ptr)
-// Temporary pre-allocated storage for recovered values:
-//   recovered_cell_weights (same size as cell_to_*_weights)
-//
-// Outputs:
-//   output_state_ptr - size 'n_batch * n_output'
-//   cell_state_ptr   - size 'n_batch * n_cell'
-//   output_ptr_batch - size 'n_batch * n_output'
-void LstmStep(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_output, float* input_gate_scratch, float* forget_gate_scratch,
-    float* cell_scratch, float* output_gate_scratch, float* scaling_factors,
-    float* product_scaling_factors, float* recovered_cell_weights,
-    int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch);
-
-void LstmStepWithAuxInput(
-    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
-    float input_to_input_weights_scale,
-    const int8_t* input_to_forget_weights_ptr,
-    float input_to_forget_weights_scale,
-    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
-    const int8_t* input_to_output_weights_ptr,
-    float input_to_output_weights_scale, const float* aux_input_ptr_batch,
-    const int8_t* aux_input_to_input_weights_ptr,
-    float aux_input_to_input_weights_scale,
-    const int8_t* aux_input_to_forget_weights_ptr,
-    float aux_input_to_forget_weights_scale,
-    const int8_t* aux_input_to_cell_weights_ptr,
-    float aux_input_to_cell_weights_scale,
-    const int8_t* aux_input_to_output_weights_ptr,
-    float aux_input_to_output_weights_scale,
-    const int8_t* recurrent_to_input_weights_ptr,
-    float recurrent_to_input_weights_scale,
-    const int8_t* recurrent_to_forget_weights_ptr,
-    float recurrent_to_forget_weights_scale,
-    const int8_t* recurrent_to_cell_weights_ptr,
-    float recurrent_to_cell_weights_scale,
-    const int8_t* recurrent_to_output_weights_ptr,
-    float recurrent_to_output_weights_scale,
-    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
-    const int8_t* cell_to_forget_weights_ptr,
-    float cell_to_forget_weights_scale,
-    const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* scaling_factors, float* product_scaling_factors,
-    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
-    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch);
-
 }  // namespace kernel_utils
 }  // namespace tflite
 #endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_
diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index 5b996d00bc..16d67a1a93 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/tensor.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -424,263 +425,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-// The LSTM Op engine.
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int n_batch = input->dims->data[0];
-  const int n_input = input->dims->data[1];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_ptr_batch = input->data.f;
-  const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f;
-  const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f;
-  const float* input_to_output_weights_ptr = input_to_output_weights->data.f;
-  const float* recurrent_to_forget_weights_ptr =
-      recurrent_to_forget_weights->data.f;
-  const float* recurrent_to_cell_weights_ptr =
-      recurrent_to_cell_weights->data.f;
-  const float* recurrent_to_output_weights_ptr =
-      recurrent_to_output_weights->data.f;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-  float* output_ptr_batch = output->data.f;
-
-  kernel_utils::LstmStep(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr,
-      input_to_cell_weights_ptr, input_to_output_weights_ptr,
-      recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr,
-      recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr,
-      cell_to_input_weights_ptr, cell_to_forget_weights_ptr,
-      cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr,
-      cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, n_output,
-      activation_state_ptr, cell_state_ptr, input_gate_scratch,
-      forget_gate_scratch, cell_scratch, output_gate_scratch, output_ptr_batch);
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
-    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
-    TfLiteTensor* activation_state_quantized,
-    TfLiteTensor* cell_state_quantized, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int n_batch = input->dims->data[0];
-  const int n_input = input->dims->data[1];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  const float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_ptr_batch = input->data.f;
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-  float* output_ptr_batch = output->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_activation_state_ptr =
-      reinterpret_cast<int8_t*>(activation_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  kernel_utils::LstmStep(
-      input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale,
-      input_to_forget_weights_ptr, input_to_forget_weights_scale,
-      input_to_cell_weights_ptr, input_to_cell_weights_scale,
-      input_to_output_weights_ptr, input_to_output_weights_scale,
-      recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale,
-      recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale,
-      recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale,
-      recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale,
-      cell_to_input_weights_ptr, cell_to_input_weights_scale,
-      cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
-      cell_to_output_weights_ptr, cell_to_output_weights_scale,
-      input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-      output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale,
-      projection_bias_ptr, params, n_batch, n_cell, n_input, n_output,
-      input_gate_scratch, forget_gate_scratch, cell_scratch,
-      output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-      recovered_cell_weights_ptr, quantized_input_ptr,
-      quantized_activation_state_ptr, quantized_cell_state_ptr,
-      activation_state_ptr, cell_state_ptr, output_ptr_batch);
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
@@ -738,15 +482,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // TODO(mirkov): add a check that weights are all uint8s or all floats.
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      return EvalFloat(input, input_to_input_weights, input_to_forget_weights,
-                       input_to_cell_weights, input_to_output_weights,
-                       recurrent_to_input_weights, recurrent_to_forget_weights,
-                       recurrent_to_cell_weights, recurrent_to_output_weights,
-                       cell_to_input_weights, cell_to_forget_weights,
-                       cell_to_output_weights, input_gate_bias,
-                       forget_gate_bias, cell_bias, output_gate_bias,
-                       projection_weights, projection_bias, params,
-                       scratch_buffer, activation_state, cell_state, output);
+      return lstm_eval::EvalFloat(
+          input, input_to_input_weights, input_to_forget_weights,
+          input_to_cell_weights, input_to_output_weights,
+          recurrent_to_input_weights, recurrent_to_forget_weights,
+          recurrent_to_cell_weights, recurrent_to_output_weights,
+          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
+          output);
     }
     case kTfLiteUInt8: {
       TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
@@ -759,17 +509,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           GetTemporary(context, node, /*index=*/5);
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, /*index=*/6);
-      return EvalHybrid(
+      return lstm_eval::EvalHybrid(
           input, input_to_input_weights, input_to_forget_weights,
           input_to_cell_weights, input_to_output_weights,
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias,
-          projection_weights, projection_bias, params, scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, activation_state_quantized, cell_state_quantized,
-          activation_state, cell_state, output);
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, scaling_factors,
+          prod_scaling_factors, recovered_cell_weights, input_quantized,
+          /*aux_input_quantized=*/nullptr, activation_state_quantized,
+          cell_state_quantized, activation_state, cell_state, output);
     }
     default:
       context->ReportError(context, "Type %d is not currently supported.",
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
new file mode 100644
index 0000000000..c6c21eb085
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -0,0 +1,909 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
+
+#include <stdint.h>
+
+#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace lstm_eval {
+
+namespace {
+
+// Performs an LSTM batch inference step for input specified by input_ptr_batch.
+// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
+// biases (*_bias_ptr), and buffers (*_scratch), along with additional
+// parameters:
+//  - params: various LSTM params including activation, clipping, etc.,
+//  - n_batch: size of batch,
+//  - n_cell: number of cells (or units),
+//  - n_input: the input size,
+//  - n_output: the output size.
+//
+// The pointers to the cell and output state and the output are updated.
+//
+// The pointers with the suffix "_batch" point to data aligned in batch_major
+// order, and each step processes batch_size many inputs from input_ptr_batch,
+// and updates batch_size many cell and output states.
+inline void LstmStepWithAuxInput(
+    const float* input_ptr_batch, const float* input_to_input_weights_ptr,
+    const float* input_to_forget_weights_ptr,
+    const float* input_to_cell_weights_ptr,
+    const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch,
+    const float* aux_input_to_input_weights_ptr,
+    const float* aux_input_to_forget_weights_ptr,
+    const float* aux_input_to_cell_weights_ptr,
+    const float* aux_input_to_output_weights_ptr,
+    const float* recurrent_to_input_weights_ptr,
+    const float* recurrent_to_forget_weights_ptr,
+    const float* recurrent_to_cell_weights_ptr,
+    const float* recurrent_to_output_weights_ptr,
+    const float* cell_to_input_weights_ptr,
+    const float* cell_to_forget_weights_ptr,
+    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
+    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
+    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
+    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
+    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
+    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
+    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
+    float* output_ptr_batch) {
+  // Since we have already checked that weights are all there or none, we can
+  // check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
+  // Initialize scratch buffers with bias.
+  if (!use_cifg) {
+    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
+                                          input_gate_scratch);
+  }
+  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                        forget_gate_scratch);
+  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                        cell_scratch);
+  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                        output_gate_scratch);
+
+  // For each batch and cell: compute input_weight * input.
+  if (!use_cifg) {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+        input_gate_scratch, /*result_stride=*/1);
+  }
+
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      forget_gate_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      cell_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch,
+      output_gate_scratch, /*result_stride=*/1);
+
+  // If auxiliary input is available then compute aux_input_weight * aux_input
+  if (aux_input_ptr_batch != nullptr) {
+    if (!use_cifg) {
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_to_input_weights_ptr, n_cell, n_aux_input,
+          aux_input_ptr_batch, n_batch, input_gate_scratch,
+          /*result_stride=*/1);
+    }
+
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_forget_weights_ptr, n_cell, n_aux_input,
+        aux_input_ptr_batch, n_batch, forget_gate_scratch, /*result_stride=*/1);
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_cell_weights_ptr, n_cell, n_aux_input, aux_input_ptr_batch,
+        n_batch, cell_scratch, /*result_stride=*/1);
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_output_weights_ptr, n_cell, n_aux_input,
+        aux_input_ptr_batch, n_batch, output_gate_scratch, /*result_stride=*/1);
+  }
+
+  // For each batch and cell: compute recurrent_weight * output_state.
+  if (!use_cifg) {
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr,
+        n_batch, input_gate_scratch, /*result_stride=*/1);
+  }
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, forget_gate_scratch,
+      /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, cell_scratch, /*result_stride=*/1);
+  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+      recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr,
+      n_batch, output_gate_scratch,
+      /*result_stride=*/1);
+
+  // For each batch and cell: update input gate.
+  if (!use_cifg) {
+    if (use_peephole) {
+      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+          cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
+          input_gate_scratch);
+    }
+    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
+                                       input_gate_scratch);
+  }
+
+  // For each batch and cell: update forget gate.
+  if (use_peephole) {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
+        forget_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
+                                     forget_gate_scratch);
+
+  // For each batch and cell: update the cell.
+  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
+                                         n_batch * n_cell, cell_state_ptr);
+  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  if (use_cifg) {
+    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
+                             forget_gate_scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  } else {
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  }
+  if (params->cell_clip > 0.0) {
+    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
+                             params->cell_clip, cell_state_ptr);
+  }
+
+  // For each batch and cell: update the output gate.
+  if (use_peephole) {
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
+        output_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
+                                     output_gate_scratch);
+  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
+                                         n_batch * n_cell, output_gate_scratch);
+
+  // For each batch: update the projection and output_state.
+  const bool use_projection_weight = (projection_weights_ptr != nullptr);
+  const bool use_projection_bias = (projection_bias_ptr != nullptr);
+  if (use_projection_weight) {
+    if (use_projection_bias) {
+      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                            n_batch, output_ptr_batch);
+    } else {
+      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch,
+        output_ptr_batch, /*result_stride=*/1);
+    if (params->proj_clip > 0.0) {
+      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                               params->proj_clip, output_ptr_batch);
+    }
+  } else {
+    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                             output_ptr_batch);
+  }
+  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                           output_state_ptr);
+}
+
+// Same as above but with quantized weight matrices. In detail:
+// Input of size 'n_batch * n_input':
+//   input_ptr_batch
+//
+// LSTM weights:
+// Quantized input weights of size 'n_cell * n_input':
+//   input_to_input_weights            - optional (can be nullptr)
+//   input_to_forget_weights
+//   input_to_cell_weights
+//   input_to_input_weights
+// Quantized recurrent weights of size 'n_cell * n_output':
+//   recurrent_to_input_weights        - optional
+//   recurrent_to_forget_weights
+//   recurrent_to_cell_weights
+//   recurrent_to_input_weights
+// Quantized peephole weights of size 'n_cell', representing diagonal matrices.
+//   cell_to_input_weights             - optional
+//   cell_to_cell_weights              - optional
+//   cell_to_output_weights            - optional
+// Quantized projection weights of size 'n_output * n_cell'
+//   projection_weights_ptr            - optional
+// Weight scales (scalars) for each of the weights above.
+//   input_to_input_weights_scale      - optional
+//   input_to_forget_weights_scale
+//   input_to_cell_weights_scale
+//   input_to_output_weights_scale
+//   recurrent_to_input_weights_scale  - optional
+//   recurrent_to_forget_weights_scale
+//   recurrent_to_cell_weights_scale
+//   recurrent_to_output_weights_scale
+//   cell_to_input_weights_scale,
+//   cell_to_forget_weights_scale,
+//   cell_to_output_weights_scale,
+//   projection_weights_scale          - optional
+// Gate biases of size 'n_cell':
+//   input_gate_bias_ptr               - optional
+//   forget_gate_bias_ptr
+//   cell_gate_bias_ptr
+//   output_gate_bias_ptr
+//
+// Temporary pre-allocated storage for quantized values:
+//   quantized_input_ptr_batch (same size as input_ptr_batch)
+//   quantized_output_state_ptr (same size as output_state_ptr)
+//   quantized_cell_state_ptr (same size as cell_state_ptr)
+// Temporary pre-allocated storage for recovered values:
+//   recovered_cell_weights (same size as cell_to_*_weights)
+//
+// Outputs:
+//   output_state_ptr - size 'n_batch * n_output'
+//   cell_state_ptr   - size 'n_batch * n_cell'
+//   output_ptr_batch - size 'n_batch * n_output'
+inline void LstmStepWithAuxInput(
+    const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
+    float input_to_input_weights_scale,
+    const int8_t* input_to_forget_weights_ptr,
+    float input_to_forget_weights_scale,
+    const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale,
+    const int8_t* input_to_output_weights_ptr,
+    float input_to_output_weights_scale, const float* aux_input_ptr_batch,
+    const int8_t* aux_input_to_input_weights_ptr,
+    float aux_input_to_input_weights_scale,
+    const int8_t* aux_input_to_forget_weights_ptr,
+    float aux_input_to_forget_weights_scale,
+    const int8_t* aux_input_to_cell_weights_ptr,
+    float aux_input_to_cell_weights_scale,
+    const int8_t* aux_input_to_output_weights_ptr,
+    float aux_input_to_output_weights_scale,
+    const int8_t* recurrent_to_input_weights_ptr,
+    float recurrent_to_input_weights_scale,
+    const int8_t* recurrent_to_forget_weights_ptr,
+    float recurrent_to_forget_weights_scale,
+    const int8_t* recurrent_to_cell_weights_ptr,
+    float recurrent_to_cell_weights_scale,
+    const int8_t* recurrent_to_output_weights_ptr,
+    float recurrent_to_output_weights_scale,
+    const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale,
+    const int8_t* cell_to_forget_weights_ptr,
+    float cell_to_forget_weights_scale,
+    const int8_t* cell_to_output_weights_ptr,
+    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
+    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
+    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
+    float projection_weights_scale, const float* projection_bias_ptr,
+    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
+    int n_aux_input, int n_output, float* input_gate_scratch,
+    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
+    float* scaling_factors, float* product_scaling_factors,
+    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
+    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
+    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
+    float* cell_state_ptr, float* output_ptr_batch) {
+  // Since we have already checked that weights are all there or none, we
+  // can check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+  const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
+  // Initialize scratch buffers with bias.
+  if (!use_cifg) {
+    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
+                                          input_gate_scratch);
+  }
+  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                        forget_gate_scratch);
+  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                        cell_scratch);
+  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                        output_gate_scratch);
+
+  if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset,
+          &unused_min, &unused_max, &scaling_factors[b]);
+    }
+    // For each batch and cell: compute input_weight * input.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * input_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          input_to_input_weights_ptr, n_cell, n_input,
+          quantized_input_ptr_batch, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_forget_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, forget_gate_scratch,
+        /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_cell_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * input_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        input_to_output_weights_ptr, n_cell, n_input, quantized_input_ptr_batch,
+        product_scaling_factors, n_batch, output_gate_scratch,
+        /*result_stride=*/1);
+  }
+
+  if (aux_input_ptr_batch != nullptr &&
+      !tensor_utils::IsZeroVector(aux_input_ptr_batch, n_batch * n_input)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_input;
+      tensor_utils::SymmetricQuantizeFloats(
+          aux_input_ptr_batch + offset, n_input,
+          quantized_aux_input_ptr_batch + offset, &unused_min, &unused_max,
+          &scaling_factors[b]);
+    }
+    // For each batch and cell: compute input_weight * input.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * aux_input_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_to_input_weights_ptr, n_cell, n_input,
+          quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_forget_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        forget_gate_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_cell_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * aux_input_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        aux_input_to_output_weights_ptr, n_cell, n_input,
+        quantized_aux_input_ptr_batch, product_scaling_factors, n_batch,
+        output_gate_scratch, /*result_stride=*/1);
+  }
+
+  if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
+    // Save quantization and matmul computation for all zero input.
+    float unused_min, unused_max;
+    for (int b = 0; b < n_batch; ++b) {
+      const int offset = b * n_output;
+      tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output,
+                                            quantized_output_state_ptr + offset,
+                                            &unused_min, &unused_max,
+                                            &scaling_factors[b]);
+    }
+    // For each batch and cell: compute recurrent_weight * output_state.
+    if (!use_cifg) {
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * recurrent_to_input_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          recurrent_to_input_weights_ptr, n_cell, n_output,
+          quantized_output_state_ptr, product_scaling_factors, n_batch,
+          input_gate_scratch, /*result_stride=*/1);
+    }
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_forget_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_forget_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        forget_gate_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_cell_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_cell_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        cell_scratch, /*result_stride=*/1);
+
+    for (int b = 0; b < n_batch; ++b) {
+      product_scaling_factors[b] =
+          scaling_factors[b] * recurrent_to_output_weights_scale;
+    }
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_to_output_weights_ptr, n_cell, n_output,
+        quantized_output_state_ptr, product_scaling_factors, n_batch,
+        output_gate_scratch, /*result_stride=*/1);
+  }
+
+  // Save quantization and matmul computation for all zero input.
+  bool is_cell_state_all_zeros =
+      tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
+
+  // For each batch and cell: update input gate.
+  if (!use_cifg) {
+    if (use_peephole && !is_cell_state_all_zeros) {
+      tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell,
+                                         cell_to_input_weights_scale,
+                                         recovered_cell_weights);
+      tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+          recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+          input_gate_scratch);
+    }
+    tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
+                                       input_gate_scratch);
+  }
+
+  // For each batch and cell: update forget gate.
+  if (use_peephole && !is_cell_state_all_zeros) {
+    tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell,
+                                       cell_to_forget_weights_scale,
+                                       recovered_cell_weights);
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+        forget_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
+                                     forget_gate_scratch);
+
+  // For each batch and cell: update the cell.
+  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
+                                         n_batch * n_cell, cell_state_ptr);
+  tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  if (use_cifg) {
+    tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell,
+                             forget_gate_scratch);
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  } else {
+    tensor_utils::VectorVectorCwiseProductAccumulate(
+        cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr);
+  }
+  if (params->cell_clip > 0.0) {
+    tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell,
+                             params->cell_clip, cell_state_ptr);
+  }
+
+  is_cell_state_all_zeros =
+      tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell);
+  // For each batch and cell: update the output gate.
+  if (use_peephole && !is_cell_state_all_zeros) {
+    tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell,
+                                       cell_to_output_weights_scale,
+                                       recovered_cell_weights);
+    tensor_utils::VectorBatchVectorCwiseProductAccumulate(
+        recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
+        output_gate_scratch);
+  }
+  tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
+                                     output_gate_scratch);
+  tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
+                                        params->activation, cell_scratch);
+  tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
+                                         n_batch * n_cell, output_gate_scratch);
+
+  // For each batch: update the projection and output_state.
+  const bool use_projection_weight = (projection_weights_ptr != nullptr);
+  const bool use_projection_bias = (projection_bias_ptr != nullptr);
+  if (use_projection_weight) {
+    if (use_projection_bias) {
+      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                            n_batch, output_ptr_batch);
+    } else {
+      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+    }
+    if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
+      // Save quantization and matmul computation for all zero input.
+      float unused_min, unused_max;
+      for (int b = 0; b < n_batch; ++b) {
+        const int offset = b * n_cell;
+        tensor_utils::SymmetricQuantizeFloats(
+            output_gate_scratch + offset, n_cell,
+            quantized_cell_state_ptr + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+      }
+      for (int b = 0; b < n_batch; ++b) {
+        product_scaling_factors[b] =
+            scaling_factors[b] * projection_weights_scale;
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr,
+          product_scaling_factors, n_batch, output_ptr_batch,
+          /*result_stride=*/1);
+    }
+    if (params->proj_clip > 0.0) {
+      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                               params->proj_clip, output_ptr_batch);
+    }
+  } else {
+    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                             output_ptr_batch);
+  }
+  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                           output_state_ptr);
+}
+}  // namespace
+
+TfLiteStatus EvalFloat(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output) {
+  const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
+  const int n_batch = input->dims->data[input->dims->size - 2];
+  const int n_input = input->dims->data[input->dims->size - 1];
+  const int aux_input_size =
+      (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0;
+
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->dims->data[0];
+  const int n_output = recurrent_to_output_weights->dims->data[1];
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existense of only one to the get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+
+  // Index the scratch buffers pointers to the global scratch buffer.
+  float* input_gate_scratch = nullptr;
+  float* cell_scratch = nullptr;
+  float* forget_gate_scratch = nullptr;
+  float* output_gate_scratch = nullptr;
+  if (use_cifg) {
+    cell_scratch = scratch_buffer->data.f;
+    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+  } else {
+    input_gate_scratch = scratch_buffer->data.f;
+    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
+  }
+
+  // Check optional tensors, the respective pointers can be null.
+  const float* input_to_input_weights_ptr =
+      (use_cifg) ? nullptr : input_to_input_weights->data.f;
+  const float* recurrent_to_input_weights_ptr =
+      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
+  const float* input_gate_bias_ptr =
+      (use_cifg) ? nullptr : input_gate_bias->data.f;
+  const float* cell_to_input_weights_ptr =
+      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
+  const float* cell_to_forget_weights_ptr =
+      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
+  const float* cell_to_output_weights_ptr =
+      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
+  const float* projection_weights_ptr =
+      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
+  const float* projection_bias_ptr =
+      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+
+  float* aux_input_ptr = nullptr;
+  float* aux_input_to_input_weights_ptr = nullptr;
+  float* aux_input_to_forget_weights_ptr = nullptr;
+  float* aux_input_to_cell_weights_ptr = nullptr;
+  float* aux_input_to_output_weights_ptr = nullptr;
+  if (aux_input_size > 0) {
+    aux_input_ptr = aux_input->data.f;
+    aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
+    aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f;
+    aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f;
+    aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
+  }
+
+  // Loop through the sequence.
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr_time =
+        output->data.f + t_rel * output_step + output_offset;
+
+    LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
+        input_to_cell_weights->data.f, input_to_output_weights->data.f,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
+        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
+        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
+        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
+        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
+        params, n_batch, n_cell, n_input, aux_input_size, n_output,
+        activation_state->data.f, cell_state->data.f, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        output_ptr_time);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalHybrid(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
+    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
+    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
+    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
+    TfLiteTensor* output_state, TfLiteTensor* cell_state,
+    TfLiteTensor* output) {
+  const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
+  const int n_batch = input->dims->data[input->dims->size - 2];
+  const int n_input = input->dims->data[input->dims->size - 1];
+  const int aux_input_size =
+      (aux_input) ? aux_input->dims->data[aux_input->dims->size - 1] : 0;
+  // n_cell and n_output will be the same size when there is no projection.
+  const int n_cell = input_to_output_weights->dims->data[0];
+  const int n_output = recurrent_to_output_weights->dims->data[1];
+
+  // Since we have already checked that weights are all there or none, we can
+  // check the existence of only one to get the condition.
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  const bool use_peephole = (cell_to_output_weights != nullptr);
+
+  float* input_gate_scratch = nullptr;
+  float* cell_scratch = nullptr;
+  float* forget_gate_scratch = nullptr;
+  float* output_gate_scratch = nullptr;
+  if (use_cifg) {
+    cell_scratch = scratch_buffer->data.f;
+    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+  } else {
+    input_gate_scratch = scratch_buffer->data.f;
+    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
+    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
+    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
+  }
+
+  // Check optional tensors, the respective pointers can be null.
+  int8_t* input_to_input_weights_ptr = nullptr;
+  float input_to_input_weights_scale = 1.0f;
+  int8_t* recurrent_to_input_weights_ptr = nullptr;
+  float recurrent_to_input_weights_scale = 1.0f;
+  float* input_gate_bias_ptr = nullptr;
+  if (!use_cifg) {
+    input_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
+    recurrent_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
+    input_gate_bias_ptr = input_gate_bias->data.f;
+    input_to_input_weights_scale = input_to_input_weights->params.scale;
+    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
+  }
+
+  int8_t* cell_to_input_weights_ptr = nullptr;
+  int8_t* cell_to_forget_weights_ptr = nullptr;
+  int8_t* cell_to_output_weights_ptr = nullptr;
+  float cell_to_input_weights_scale = 1.0f;
+  float cell_to_forget_weights_scale = 1.0f;
+  float cell_to_output_weights_scale = 1.0f;
+  if (use_peephole) {
+    if (!use_cifg) {
+      cell_to_input_weights_ptr =
+          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
+      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
+    }
+    cell_to_forget_weights_ptr =
+        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
+    cell_to_output_weights_ptr =
+        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
+    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
+    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
+  }
+
+  const int8_t* projection_weights_ptr =
+      (projection_weights == nullptr)
+          ? nullptr
+          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
+  const float projection_weights_scale =
+      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
+  const float* projection_bias_ptr =
+      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+
+  // Required tensors, pointers are non-null.
+  const int8_t* input_to_forget_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
+  const float input_to_forget_weights_scale =
+      input_to_forget_weights->params.scale;
+  const int8_t* input_to_cell_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
+  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
+  const int8_t* input_to_output_weights_ptr =
+      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
+  const float input_to_output_weights_scale =
+      input_to_output_weights->params.scale;
+  const int8_t* recurrent_to_forget_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
+  const float recurrent_to_forget_weights_scale =
+      recurrent_to_forget_weights->params.scale;
+  const int8_t* recurrent_to_cell_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
+  const float recurrent_to_cell_weights_scale =
+      recurrent_to_cell_weights->params.scale;
+  const int8_t* recurrent_to_output_weights_ptr =
+      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
+  const float recurrent_to_output_weights_scale =
+      recurrent_to_output_weights->params.scale;
+  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
+  const float* cell_bias_ptr = cell_bias->data.f;
+  const float* output_gate_bias_ptr = output_gate_bias->data.f;
+
+  float* output_state_ptr = output_state->data.f;
+  float* cell_state_ptr = cell_state->data.f;
+
+  // Temporary storage for quantized values and scaling factors.
+  int8_t* quantized_input_ptr =
+      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  int8_t* quantized_aux_input_ptr =
+      (aux_input_quantized == nullptr)
+          ? nullptr
+          : reinterpret_cast<int8_t*>(aux_input_quantized->data.uint8);
+  int8_t* quantized_output_state_ptr =
+      reinterpret_cast<int8_t*>(output_state_quantized->data.uint8);
+  int8_t* quantized_cell_state_ptr =
+      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
+  float* scaling_factors_ptr = scaling_factors->data.f;
+  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
+  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
+
+  // Auxiliary input and weights.
+  float* aux_input_ptr = nullptr;
+  int8_t* aux_input_to_input_weights_ptr = nullptr;
+  int8_t* aux_input_to_forget_weights_ptr = nullptr;
+  int8_t* aux_input_to_cell_weights_ptr = nullptr;
+  int8_t* aux_input_to_output_weights_ptr = nullptr;
+  float aux_input_to_input_weights_scale = 0.0f;
+  float aux_input_to_forget_weights_scale = 0.0f;
+  float aux_input_to_cell_weights_scale = 0.0f;
+  float aux_input_to_output_weights_scale = 0.0f;
+  if (aux_input_size > 0) {
+    aux_input_ptr = aux_input->data.f;
+    aux_input_to_input_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
+    aux_input_to_forget_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_forget_weights->data.uint8);
+    aux_input_to_cell_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_cell_weights->data.uint8);
+    aux_input_to_output_weights_ptr =
+        reinterpret_cast<int8_t*>(aux_input_to_output_weights->data.uint8);
+    aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale;
+    aux_input_to_forget_weights_scale =
+        aux_input_to_forget_weights->params.scale;
+    aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale;
+    aux_input_to_output_weights_scale =
+        aux_input_to_output_weights->params.scale;
+  }
+
+  // Feed the sequence into the LSTM step-by-step.
+  const int input_step = n_batch * n_input;
+  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  for (int t = 0; t < max_time; t++) {
+    // If this is the forward_sequence, step forward, otherwise step backwards.
+    const int t_rel = forward_sequence ? t : max_time - t - 1;
+    const float* input_ptr = input->data.f + t_rel * input_step;
+    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
+
+    LstmStepWithAuxInput(
+        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
+        input_to_forget_weights_ptr, input_to_forget_weights_scale,
+        input_to_cell_weights_ptr, input_to_cell_weights_scale,
+        input_to_output_weights_ptr, input_to_output_weights_scale,
+        aux_input_ptr, aux_input_to_input_weights_ptr,
+        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
+        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
+        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
+        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
+        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
+        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
+        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
+        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
+        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
+        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
+        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
+        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
+        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
+        n_input, aux_input_size, n_output, input_gate_scratch,
+        forget_gate_scratch, cell_scratch, output_gate_scratch,
+        scaling_factors_ptr, prod_scaling_factors_ptr,
+        recovered_cell_weights_ptr, quantized_input_ptr,
+        quantized_aux_input_ptr, quantized_output_state_ptr,
+        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace lstm_eval
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.h b/tensorflow/contrib/lite/kernels/lstm_eval.h
new file mode 100644
index 0000000000..adf8cf0f64
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.h
@@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
+
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace lstm_eval {
+
+TfLiteStatus EvalFloat(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output);
+
+TfLiteStatus EvalHybrid(
+    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
+    const TfLiteTensor* input_to_forget_weights,
+    const TfLiteTensor* input_to_cell_weights,
+    const TfLiteTensor* input_to_output_weights,
+    const TfLiteTensor* recurrent_to_input_weights,
+    const TfLiteTensor* recurrent_to_forget_weights,
+    const TfLiteTensor* recurrent_to_cell_weights,
+    const TfLiteTensor* recurrent_to_output_weights,
+    const TfLiteTensor* cell_to_input_weights,
+    const TfLiteTensor* cell_to_forget_weights,
+    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* aux_input_to_input_weights,
+    const TfLiteTensor* aux_input_to_forget_weights,
+    const TfLiteTensor* aux_input_to_cell_weights,
+    const TfLiteTensor* aux_input_to_output_weights,
+    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
+    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
+    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
+    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
+    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
+    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
+    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
+    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
+    TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output);
+
+}  // namespace lstm_eval
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_LSTM_EVAL_H_
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index 63817bd886..ec9cf38b83 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/kernels/lstm_eval.h"
 #include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
@@ -429,273 +430,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-// The LSTM Op engine.
-TfLiteStatus EvalFloat(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  const float* input_to_input_weights_ptr =
-      (use_cifg) ? nullptr : input_to_input_weights->data.f;
-  const float* recurrent_to_input_weights_ptr =
-      (use_cifg) ? nullptr : recurrent_to_input_weights->data.f;
-  const float* input_gate_bias_ptr =
-      (use_cifg) ? nullptr : input_gate_bias->data.f;
-  const float* cell_to_input_weights_ptr =
-      (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr;
-  const float* cell_to_forget_weights_ptr =
-      (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
-  const float* cell_to_output_weights_ptr =
-      (use_peephole) ? cell_to_output_weights->data.f : nullptr;
-  const float* projection_weights_ptr =
-      (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f;
-  const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f;
-  const float* input_to_output_weights_ptr = input_to_output_weights->data.f;
-  const float* recurrent_to_forget_weights_ptr =
-      recurrent_to_forget_weights->data.f;
-  const float* recurrent_to_cell_weights_ptr =
-      recurrent_to_cell_weights->data.f;
-  const float* recurrent_to_output_weights_ptr =
-      recurrent_to_output_weights->data.f;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Feed the sequence into the LSTM step-by-step.
-  for (int t = 0; t < max_time; t++) {
-    const float* input_ptr_batch = input->data.f + t * n_batch * n_input;
-    float* output_ptr_batch = output->data.f + t * n_batch * n_output;
-
-    kernel_utils::LstmStep(
-        input_ptr_batch, input_to_input_weights_ptr,
-        input_to_forget_weights_ptr, input_to_cell_weights_ptr,
-        input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
-        recurrent_to_forget_weights_ptr, recurrent_to_cell_weights_ptr,
-        recurrent_to_output_weights_ptr, cell_to_input_weights_ptr,
-        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-        input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
-        output_gate_bias_ptr, projection_weights_ptr, projection_bias_ptr,
-        params, n_batch, n_cell, n_input, n_output, activation_state_ptr,
-        cell_state_ptr, input_gate_scratch, forget_gate_scratch, cell_scratch,
-        output_gate_scratch, output_ptr_batch);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHybrid(
-    const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
-    const TfLiteTensor* input_to_forget_weights,
-    const TfLiteTensor* input_to_cell_weights,
-    const TfLiteTensor* input_to_output_weights,
-    const TfLiteTensor* recurrent_to_input_weights,
-    const TfLiteTensor* recurrent_to_forget_weights,
-    const TfLiteTensor* recurrent_to_cell_weights,
-    const TfLiteTensor* recurrent_to_output_weights,
-    const TfLiteTensor* cell_to_input_weights,
-    const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights,
-    const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
-    const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
-    const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, TfLiteTensor* scratch_buffer,
-    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
-    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
-    TfLiteTensor* activation_state_quantized,
-    TfLiteTensor* cell_state_quantized, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
-  const int n_input = input->dims->data[2];
-  // n_cell and n_output will be the same size when there is no projection.
-  const int n_cell = input_to_output_weights->dims->data[0];
-  const int n_output = recurrent_to_output_weights->dims->data[1];
-
-  // Since we have already checked that weights are all there or none, we can
-  // check the existence of only one to get the condition.
-  const bool use_cifg = (input_to_input_weights == nullptr);
-  const bool use_peephole = (cell_to_output_weights != nullptr);
-
-  float* input_gate_scratch = nullptr;
-  float* cell_scratch = nullptr;
-  float* forget_gate_scratch = nullptr;
-  float* output_gate_scratch = nullptr;
-  if (use_cifg) {
-    cell_scratch = scratch_buffer->data.f;
-    forget_gate_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-  } else {
-    input_gate_scratch = scratch_buffer->data.f;
-    cell_scratch = scratch_buffer->data.f + n_cell * n_batch;
-    forget_gate_scratch = scratch_buffer->data.f + 2 * n_cell * n_batch;
-    output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch;
-  }
-
-  // Check optional tensors, the respective pointers can be null.
-  int8_t* input_to_input_weights_ptr = nullptr;
-  float input_to_input_weights_scale = 1.0f;
-  int8_t* recurrent_to_input_weights_ptr = nullptr;
-  float recurrent_to_input_weights_scale = 1.0f;
-  float* input_gate_bias_ptr = nullptr;
-  if (!use_cifg) {
-    input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(input_to_input_weights->data.uint8);
-    recurrent_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(recurrent_to_input_weights->data.uint8);
-    input_gate_bias_ptr = input_gate_bias->data.f;
-    input_to_input_weights_scale = input_to_input_weights->params.scale;
-    recurrent_to_input_weights_scale = recurrent_to_input_weights->params.scale;
-  }
-
-  int8_t* cell_to_input_weights_ptr = nullptr;
-  int8_t* cell_to_forget_weights_ptr = nullptr;
-  int8_t* cell_to_output_weights_ptr = nullptr;
-  float cell_to_input_weights_scale = 1.0f;
-  float cell_to_forget_weights_scale = 1.0f;
-  float cell_to_output_weights_scale = 1.0f;
-  if (use_peephole) {
-    if (!use_cifg) {
-      cell_to_input_weights_ptr =
-          reinterpret_cast<int8_t*>(cell_to_input_weights->data.uint8);
-      cell_to_input_weights_scale = cell_to_input_weights->params.scale;
-    }
-    cell_to_forget_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_forget_weights->data.uint8);
-    cell_to_output_weights_ptr =
-        reinterpret_cast<int8_t*>(cell_to_output_weights->data.uint8);
-    cell_to_forget_weights_scale = cell_to_forget_weights->params.scale;
-    cell_to_output_weights_scale = cell_to_output_weights->params.scale;
-  }
-
-  const int8_t* projection_weights_ptr =
-      (projection_weights == nullptr)
-          ? nullptr
-          : reinterpret_cast<int8_t*>(projection_weights->data.uint8);
-  float projection_weights_scale =
-      (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
-  const float* projection_bias_ptr =
-      (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
-
-  // Required tensors, pointers are non-null.
-  const int8_t* input_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_forget_weights->data.uint8);
-  const float input_to_forget_weights_scale =
-      input_to_forget_weights->params.scale;
-  const int8_t* input_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_cell_weights->data.uint8);
-  const float input_to_cell_weights_scale = input_to_cell_weights->params.scale;
-  const int8_t* input_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(input_to_output_weights->data.uint8);
-  const float input_to_output_weights_scale =
-      input_to_output_weights->params.scale;
-  const int8_t* recurrent_to_forget_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_forget_weights->data.uint8);
-  const float recurrent_to_forget_weights_scale =
-      recurrent_to_forget_weights->params.scale;
-  const int8_t* recurrent_to_cell_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_cell_weights->data.uint8);
-  const float recurrent_to_cell_weights_scale =
-      recurrent_to_cell_weights->params.scale;
-  const int8_t* recurrent_to_output_weights_ptr =
-      reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
-  const float recurrent_to_output_weights_scale =
-      recurrent_to_output_weights->params.scale;
-  const float* forget_gate_bias_ptr = forget_gate_bias->data.f;
-  const float* cell_bias_ptr = cell_bias->data.f;
-  const float* output_gate_bias_ptr = output_gate_bias->data.f;
-
-  float* activation_state_ptr = activation_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
-  // Temporary storage for quantized values and scaling factors.
-  int8_t* quantized_input_ptr =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
-  int8_t* quantized_activation_state_ptr =
-      reinterpret_cast<int8_t*>(activation_state_quantized->data.uint8);
-  int8_t* quantized_cell_state_ptr =
-      reinterpret_cast<int8_t*>(cell_state_quantized->data.uint8);
-  float* scaling_factors_ptr = scaling_factors->data.f;
-  float* prod_scaling_factors_ptr = prod_scaling_factors->data.f;
-  float* recovered_cell_weights_ptr = recovered_cell_weights->data.f;
-
-  // Feed the sequence into the LSTM step-by-step.
-  for (int t = 0; t < max_time; t++) {
-    const float* input_ptr_batch = input->data.f + t * n_batch * n_input;
-    float* output_ptr_batch = output->data.f + t * n_batch * n_output;
-
-    kernel_utils::LstmStep(
-        input_ptr_batch, input_to_input_weights_ptr,
-        input_to_input_weights_scale, input_to_forget_weights_ptr,
-        input_to_forget_weights_scale, input_to_cell_weights_ptr,
-        input_to_cell_weights_scale, input_to_output_weights_ptr,
-        input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
-        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, n_output, input_gate_scratch, forget_gate_scratch,
-        cell_scratch, output_gate_scratch, scaling_factors_ptr,
-        prod_scaling_factors_ptr, recovered_cell_weights_ptr,
-        quantized_input_ptr, quantized_activation_state_ptr,
-        quantized_cell_state_ptr, activation_state_ptr, cell_state_ptr,
-        output_ptr_batch);
-  }
-  return kTfLiteOk;
-}
-
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
@@ -750,15 +484,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      return EvalFloat(input, input_to_input_weights, input_to_forget_weights,
-                       input_to_cell_weights, input_to_output_weights,
-                       recurrent_to_input_weights, recurrent_to_forget_weights,
-                       recurrent_to_cell_weights, recurrent_to_output_weights,
-                       cell_to_input_weights, cell_to_forget_weights,
-                       cell_to_output_weights, input_gate_bias,
-                       forget_gate_bias, cell_bias, output_gate_bias,
-                       projection_weights, projection_bias, params,
-                       scratch_buffer, activation_state, cell_state, output);
+      return lstm_eval::EvalFloat(
+          input, input_to_input_weights, input_to_forget_weights,
+          input_to_cell_weights, input_to_output_weights,
+          recurrent_to_input_weights, recurrent_to_forget_weights,
+          recurrent_to_cell_weights, recurrent_to_output_weights,
+          cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
+          output);
     }
     case kTfLiteUInt8: {
       TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
@@ -771,17 +511,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           GetTemporary(context, node, /*index=*/5);
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, /*index=*/6);
-      return EvalHybrid(
+      return lstm_eval::EvalHybrid(
           input, input_to_input_weights, input_to_forget_weights,
           input_to_cell_weights, input_to_output_weights,
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
-          input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias,
-          projection_weights, projection_bias, params, scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, activation_state_quantized, cell_state_quantized,
-          activation_state, cell_state, output);
+          /*aux_input=*/nullptr,
+          /*aux_input_to_input_weights=*/nullptr,
+          /*aux_input_to_forget_weights=*/nullptr,
+          /*aux_input_to_cell_weights=*/nullptr,
+          /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
+          forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
+          projection_bias, params, /*forward_sequence=*/true,
+          /*output_offset=*/0, scratch_buffer, scaling_factors,
+          prod_scaling_factors, recovered_cell_weights, input_quantized,
+          /*aux_input_quantized=*/nullptr, activation_state_quantized,
+          cell_state_quantized, activation_state, cell_state, output);
     }
     default:
       context->ReportError(context, "Type %d is not currently supported.",
-- 
GitLab


From 123de2797a4348c963b597096762085bfa09eab1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 02:01:34 -0700
Subject: [PATCH 0415/1085] compat: Update forward compatibility horizon to
 2018-10-05

PiperOrigin-RevId: 215874612
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 76e08610ba..8f4e8e0b98 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 4)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 5)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 4eae9db0cc3fd7db9f2096e7d19cc69d5ffddb5d Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 5 Oct 2018 21:12:03 +0800
Subject: [PATCH 0416/1085] Improve performance of clip_by_norm when the input
 is IndexedSlices

---
 tensorflow/python/ops/clip_ops.py      | 19 ++++---
 tensorflow/python/ops/clip_ops_test.py | 75 +++++++++++++++++++++++---
 2 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 45516068f4..d1a445070a 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -131,7 +131,7 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
   an optimizer.
 
   Args:
-    t: A `Tensor`.
+    t: A `Tensor` or `IndexedSlices`.
     clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
     axes: A 1-D (vector) `Tensor` of type int32 containing the dimensions
       to use for computing the L2-norm. If `None` (the default), uses all
@@ -139,26 +139,29 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
     name: A name for the operation (optional).
 
   Returns:
-    A clipped `Tensor`.
+    A clipped `Tensor` or `IndexedSlices`.
   """
   with ops.name_scope(name, "clip_by_norm", [t, clip_norm]) as name:
-    t = ops.convert_to_tensor(t, name="t")
+    values = ops.convert_to_tensor(
+        t.values if isinstance(t, ops.IndexedSlices) else t, name="t")
 
     # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
-    l2sum = math_ops.reduce_sum(t * t, axes, keepdims=True)
+    l2sum = math_ops.reduce_sum(values * values, axes, keepdims=True)
     pred = l2sum > 0
     # Two-tap tf.where trick to bypass NaN gradients
     l2sum_safe = array_ops.where(pred, l2sum, array_ops.ones_like(l2sum))
     l2norm = array_ops.where(pred, math_ops.sqrt(l2sum_safe), l2sum)
-    intermediate = t * clip_norm
+    intermediate = values * clip_norm
     # Assert that the shape is compatible with the initial shape,
     # to prevent unintentional broadcasting.
-    _ = t.shape.merge_with(intermediate.shape)
-    tclip = array_ops.identity(
+    _ = values.shape.merge_with(intermediate.shape)
+    values_clip = array_ops.identity(
         intermediate / math_ops.maximum(l2norm, clip_norm), name=name)
 
-  return tclip
+    if isinstance(t, ops.IndexedSlices):
+      return ops.IndexedSlices(values_clip, t.indices, t.dense_shape)
 
+    return values_clip     
 
 @tf_export("linalg.global_norm", "global_norm")
 @deprecation.deprecated_endpoints("global_norm")
diff --git a/tensorflow/python/ops/clip_ops_test.py b/tensorflow/python/ops/clip_ops_test.py
index 444cd0f62c..8ac41e984a 100644
--- a/tensorflow/python/ops/clip_ops_test.py
+++ b/tensorflow/python/ops/clip_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import numerics
 from tensorflow.python.platform import test
@@ -29,7 +30,7 @@ class ClipOpsTest(test.TestCase):
   def __init__(self, method_name="runTest"):
     super(ClipOpsTest, self).__init__(method_name)
 
-  def _testClipByNorm(self, inputs, max_norm, expected):
+  def _testClipTensorByNorm(self, inputs, max_norm, expected):
     with self.cached_session() as sess:
       input_op = constant_op.constant(inputs)
       clipped = clip_ops.clip_by_norm(input_op, max_norm)
@@ -37,13 +38,75 @@ class ClipOpsTest(test.TestCase):
       result, _ = sess.run([clipped, check_op])
     self.assertAllClose(result, expected)
 
-  def testClipByNorm(self):
+  def _testClipIndexedSlicesByNorm(self,
+                                   values,
+                                   indices,
+                                   shape,
+                                   max_norm,
+                                   axes):
+     with self.cached_session() as sess:
+       values = constant_op.constant(values)
+       indices = constant_op.constant(indices)
+       shape = constant_op.constant(shape)
+       # IndexedSlices mode
+       indixed_slices = ops.IndexedSlices(values, indices, shape)
+       clipped = clip_ops.clip_by_norm(indixed_slices, max_norm, axes)
+       # clipped should be IndexedSlices
+       self.assertIsInstance(clipped, ops.IndexedSlices)
+       clipped = ops.convert_to_tensor(clipped)
+       
+       # Tensor mode
+       dense_tensor = ops.convert_to_tensor(indixed_slices)
+       dense_clipped = clip_ops.clip_by_norm(dense_tensor, max_norm, axes)
+       result, expected = sess.run([clipped, dense_clipped])
+     self.assertAllClose(result, expected)
+      
+  def testClipTensorByNorm(self):
     # Simple example
-    self._testClipByNorm([[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]], 4.0,
-                         [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]])
+    self._testClipTensorByNorm(
+        [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]], 4.0,
+        [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]])
     # Zero norm
-    self._testClipByNorm([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], 4.0,
-                         [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
+    self._testClipTensorByNorm(
+        [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], 4.0,
+        [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
+
+  def testClipIndexedSlicesByNorm(self):
+    values = [[[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]],
+              [[0.0, 2.0, 0.0], [0.0, 0.0, -1.0]]]
+    indices = [2, 6]
+    shape = [10, 2, 3]
+    # Axes == None
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, None)
+
+    # Axes == 0
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, 0)
+
+    # Axes == 1
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, 1)
+
+    # Axes == 2
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, 1)
+
+    # Axes == [0, 1]
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, [0, 1])
+
+    # Axes == [0, 1]
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, [0, 2])
+
+    # Axes == [0, 1]
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, [1, 2])
+
+    # Axes == [0, 1]
+    self._testClipIndexedSlicesByNorm(
+        values, indices, shape, 4.0, [0, 1, 2])
 
 
 if __name__ == "__main__":
-- 
GitLab


From 6775ded4f887030b724fbcacbc3444b03844c073 Mon Sep 17 00:00:00 2001
From: knight <1004815462@qq.com>
Date: Fri, 5 Oct 2018 22:40:52 +0800
Subject: [PATCH 0417/1085] Update README.md

---
 tensorflow/compiler/plugin/README.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/compiler/plugin/README.md b/tensorflow/compiler/plugin/README.md
index 9dd0d2bdab..af56461a1c 100644
--- a/tensorflow/compiler/plugin/README.md
+++ b/tensorflow/compiler/plugin/README.md
@@ -11,6 +11,3 @@ For integration into the unit test system, see the files:
 
 - tensorflow/compiler/tests/plugin.bzl
 - tensorflow/compiler/xla/tests/plugin.bzl
-
-
-- 
-- 
GitLab


From 92c8a77ba480bf4aeddea412cc1d2988f6ad81cd Mon Sep 17 00:00:00 2001
From: HyoukJoong Lee <hyouklee@google.com>
Date: Fri, 5 Oct 2018 07:46:22 -0700
Subject: [PATCH 0418/1085] Use absl::Span for HloModuleGroupMetadata

PiperOrigin-RevId: 215905026
---
 .../compiler/xla/service/hlo_module_group_metadata.cc     | 2 +-
 .../compiler/xla/service/hlo_module_group_metadata.h      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
index 83352ef91b..b4aac4c807 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc
@@ -59,7 +59,7 @@ string HloModuleGroupMetadata::TrackedInstruction::ToString() const {
 }
 
 /* static */ StatusOr<std::unique_ptr<HloModuleGroupMetadata>>
-HloModuleGroupMetadata::Build(const std::vector<HloModule*>& modules) {
+HloModuleGroupMetadata::Build(absl::Span<HloModule* const> modules) {
   auto metadata = absl::make_unique<HloModuleGroupMetadata>(modules);
   TF_RETURN_IF_ERROR(metadata->Build());
   return std::move(metadata);
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
index 0311b73207..928df0f5a7 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h
@@ -102,14 +102,14 @@ class HloModuleGroupMetadata {
     HloInstruction* recv_done = nullptr;
   };
 
-  explicit HloModuleGroupMetadata(const std::vector<HloModule*>& modules)
-      : modules_(modules) {}
+  explicit HloModuleGroupMetadata(absl::Span<HloModule* const> modules)
+      : modules_(modules.begin(), modules.end()) {}
 
   ~HloModuleGroupMetadata() = default;
 
   // Build and return the metadata for the given modules.
   static StatusOr<std::unique_ptr<HloModuleGroupMetadata>> Build(
-      const std::vector<HloModule*>& modules);
+      absl::Span<HloModule* const> modules);
 
   // Returns true if the instruction is one of the 4 channel instructions (Send,
   // Recv, SendDone, RecvDone).
@@ -274,7 +274,7 @@ class HloModuleGroupMetadata {
   int64 max_channel_id_ = -1;
 
   // The modules that this metadata was built from.
-  const std::vector<HloModule*>& modules_;
+  const std::vector<HloModule*> modules_;
 
   absl::flat_hash_map<HloModule*, std::unique_ptr<TuplePointsToAnalysis>>
       points_to_analyses_;
-- 
GitLab


From 388ed2929ea024adcfb76ea9ddd78a38a87470b7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 5 Oct 2018 08:03:19 -0700
Subject: [PATCH 0419/1085] [TF:XLA] Move broadcasting code out of BroadcastTo
 op into a common helper library.

Change XlaBinaryOp::Broadcast to use the BroadcastTo lowering, since it produces fewer extraneous reshapes and transposes. Even if the reshapes and transposes would later optimize away, this yields more readable output and makes life easier for HLO rewrites that run early.

Change in preparation for removing reshapes from SoftmaxCrossEntropyWithLogits.

PiperOrigin-RevId: 215906847
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 .../compiler/tf2xla/kernels/binary_ops.cc     | 10 +-
 .../tf2xla/kernels/broadcast_to_op.cc         | 63 +------------
 .../compiler/tf2xla/kernels/cwise_ops.cc      | 57 +++---------
 .../compiler/tf2xla/kernels/cwise_ops.h       |  3 +-
 tensorflow/compiler/tf2xla/lib/BUILD          | 16 ++++
 tensorflow/compiler/tf2xla/lib/broadcast.cc   | 93 +++++++++++++++++++
 tensorflow/compiler/tf2xla/lib/broadcast.h    | 32 +++++++
 8 files changed, 165 insertions(+), 110 deletions(-)
 create mode 100644 tensorflow/compiler/tf2xla/lib/broadcast.cc
 create mode 100644 tensorflow/compiler/tf2xla/lib/broadcast.h

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 95a010a119..224e5ea123 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -121,6 +121,7 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:batch_dot",
+        "//tensorflow/compiler/tf2xla/lib:broadcast",
         "//tensorflow/compiler/tf2xla/lib:cholesky",
         "//tensorflow/compiler/tf2xla/lib:qr",
         "//tensorflow/compiler/tf2xla/lib:random",
diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index a988d3c33e..47e517a657 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -64,7 +64,7 @@ XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions));
 // }
 static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto y_equals_0 = xla::Eq(y, zero);
   auto zeros = xla::ZerosLike(x);
@@ -84,7 +84,7 @@ XLA_MAKE_BINARY(DivNoNan,
 // }
 static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   if (DataTypeIsUnsigned(dtype)) {
     return xla::Div(x, y);
   }
@@ -105,7 +105,7 @@ XLA_MAKE_BINARY(FloorDiv,
 
 static xla::XlaOp XlogyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                             xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto is_zero = xla::Eq(x, zero);
   return xla::Select(is_zero, zero, xla::Mul(x, xla::Log(y)));
@@ -114,7 +114,7 @@ XLA_MAKE_BINARY(Xlogy, XlogyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
 
 static xla::XlaOp XdivyImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                             xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto is_zero = xla::Eq(x, zero);
   return xla::Select(is_zero, zero, xla::Div(x, y));
@@ -126,7 +126,7 @@ XLA_MAKE_BINARY(Xdivy, XdivyImpl(b, input_type(0), lhs, rhs, broadcast_helper));
 // return (x < T(0)) == (y < T(0)) ? trunc_mod : std::fmod(trunc_mod + y, y);
 static xla::XlaOp FloorModImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
-  std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper);
   auto zero = XlaHelpers::Zero(b, dtype);
   auto same_sign = xla::Eq(xla::Lt(x, zero), xla::Lt(y, zero));
   auto trunc_mod = xla::Rem(x, y);
diff --git a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
index 696c1c39be..9bb11fb67e 100644
--- a/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/broadcast_to_op.cc
@@ -13,16 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/algorithm/container.h"
-#include "tensorflow/compiler/tf2xla/shape_util.h"
-#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
-#include "tensorflow/compiler/xla/client/lib/constants.h"
-#include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/util/bcast.h"
 
 namespace tensorflow {
 namespace {
@@ -37,59 +32,9 @@ class BroadcastToOp : public XlaOpKernel {
     TensorShape output_shape;
     OP_REQUIRES_OK(context, context->ConstantInputAsShape(1, &output_shape));
 
-    OP_REQUIRES(context, input_shape.dims() <= output_shape.dims(),
-                errors::InvalidArgument(
-                    "Input rank (", input_shape.dims(),
-                    ") must be less than or equal to the output rank (",
-                    output_shape.dims(), ")"));
-
-    auto input_dims = input_shape.dim_sizes();
-    auto output_dims = output_shape.dim_sizes();
-
-    // Broadcasting is done right-to-left on right-aligned dimensions; reverse
-    // the two vectors so elements to be broadcast are aligned.
-    absl::c_reverse(input_dims);
-    absl::c_reverse(output_dims);
-
-    std::vector<int64> broadcast_dims;
-    std::vector<int64> broadcast_shape;
-    for (int i = 0; i < output_shape.dims(); ++i) {
-      if (i < input_shape.dims()) {
-        OP_REQUIRES(
-            context,
-            (output_dims[i] == 0 && input_dims[i] == 0) ||
-                (input_dims[i] != 0 && output_dims[i] % input_dims[i] == 0),
-            errors::InvalidArgument("invalid shape to broadcast from ",
-                                    input_shape.DebugString(), " to ",
-                                    output_shape.DebugString()));
-
-        broadcast_dims.push_back(broadcast_shape.size());
-        if (output_dims[i] == input_dims[i]) {
-          broadcast_shape.push_back(output_dims[i]);
-        } else if (output_dims[i] != input_dims[i]) {
-          // Add dimensions [I, O/I], which we will later flatten to just
-          // [O]. We must do this in two phases since XLA broadcasting does not
-          // support tiling.
-          broadcast_shape.push_back(input_dims[i]);
-          broadcast_shape.push_back(output_dims[i] / input_dims[i]);
-        }
-      } else {
-        broadcast_shape.push_back(output_dims[i]);
-      }
-    }
-    absl::c_reverse(broadcast_dims);
-    int broadcast_shape_size = broadcast_shape.size();
-    for (int64& broadcast_dim : broadcast_dims) {
-      broadcast_dim = broadcast_shape_size - broadcast_dim - 1;
-    }
-    absl::c_reverse(broadcast_shape);
-    xla::XlaOp output = xla::Reshape(
-        xla::BroadcastInDim(context->Input(0),
-                            xla::ShapeUtil::MakeShape(
-                                context->input_xla_type(0), broadcast_shape),
-                            broadcast_dims),
-        output_shape.dim_sizes());
-    context->SetOutput(0, output);
+    auto output = BroadcastTo(context->Input(0), output_shape.dim_sizes());
+    OP_REQUIRES_OK(context, output.status());
+    context->SetOutput(0, output.ValueOrDie());
   }
 };
 
diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
index ef1015552d..234f7b4a01 100644
--- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h"
 
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
@@ -39,7 +40,8 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) {
   // compute valid broadcast shapes, but rely below on XLA to
   // automatically perform the broadcast assuming its valid shapes are
   // a superset of TensorFlow's valid shapes.
-  BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape));
+  BCast bcast(BCast::FromShape(lhs_shape), BCast::FromShape(rhs_shape),
+              /*fewer_dims_optimization=*/false);
   if (!bcast.IsValid()) {
     ctx->SetStatus(errors::InvalidArgument("Incompatible shapes: ",
                                            lhs_shape.DebugString(), " vs. ",
@@ -86,51 +88,18 @@ void XlaBinaryOp::Compile(XlaOpKernelContext* ctx) {
 }
 
 /* static */ std::pair<xla::XlaOp, xla::XlaOp> XlaBinaryOp::Broadcast(
-    xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs,
-    const BCast& broadcast_helper) {
-  // Manually construct the broadcasting since MapN does not do
-  // automatic broadcasting. The bcast helper ensures that
-  // lhs.reshape(bcast.x_reshape()).broadcast(bcast.x_bcast()) and
-  // rhs.reshape(bcast.y_reshape()).broadcast(bcast.y_bcast()) have
-  // the same shape, so can be operated on by MapN.
-
-  // First reshape the inputs, which should be a metadata-only
-  // operation since we are flattening the dimensions in order.
-  auto lhs_shaped = xla::Reshape(lhs, broadcast_helper.x_reshape());
-  auto rhs_shaped = xla::Reshape(rhs, broadcast_helper.y_reshape());
-
-  // Next broadcast the necessary input dimensions. We rely on the
-  // XLA optimizer to be smart about the fact that we are asking
-  // it to broadcast size 1 on some of these dimensions, to avoid
-  // adding complexity to this code.
-  auto lhs_broadcast = xla::Broadcast(lhs_shaped, broadcast_helper.x_bcast());
-  int lhs_size = broadcast_helper.x_bcast().size();
-  auto rhs_broadcast = xla::Broadcast(rhs_shaped, broadcast_helper.y_bcast());
-  int rhs_size = broadcast_helper.y_bcast().size();
-
-  // Now reshape them to the correct output shape. After the
-  // broadcast each side is twice as wide as it should be, since the
-  // broadcast dimensions were prepended to the shape. Reshape
-  // flattening each original dimension with the prepended broadcast
-  // dimension. E.g. if we started out with lhs_shaped with shape
-  // [5,2,3] and x_bcast was [2,1,7] then lhs_broadcast would have
-  // shape [2,1,7,5,2,3] and we want to reshape it to [10,2,21].
-  std::vector<int64> lhs_reorder;
-  for (int i = 0; i < lhs_size; ++i) {
-    lhs_reorder.push_back(i);
-    lhs_reorder.push_back(i + lhs_size);
+    xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper) {
+  auto lhs_output = BroadcastTo(lhs, broadcast_helper.output_shape());
+  if (!lhs_output.ok()) {
+    xla::XlaOp error = lhs.builder()->ReportError(lhs_output.status());
+    return {error, error};
   }
-  auto lhs_output =
-      xla::Reshape(lhs_broadcast, lhs_reorder, broadcast_helper.output_shape());
-  std::vector<int64> rhs_reorder;
-  for (int i = 0; i < rhs_size; ++i) {
-    rhs_reorder.push_back(i);
-    rhs_reorder.push_back(i + rhs_size);
+  auto rhs_output = BroadcastTo(rhs, broadcast_helper.output_shape());
+  if (!rhs_output.ok()) {
+    xla::XlaOp error = rhs.builder()->ReportError(rhs_output.status());
+    return {error, error};
   }
-  auto rhs_output =
-      xla::Reshape(rhs_broadcast, rhs_reorder, broadcast_helper.output_shape());
-
-  return {lhs_output, rhs_output};
+  return {lhs_output.ValueOrDie(), rhs_output.ValueOrDie()};
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
index 6653944a91..516ead4bfe 100644
--- a/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
+++ b/tensorflow/compiler/tf2xla/kernels/cwise_ops.h
@@ -67,8 +67,7 @@ class XlaBinaryOp : public XlaOpKernel {
   // 'broadcast_helper', yielding arguments 'lhs' and 'rhs' that have the same
   // shape.
   static std::pair<xla::XlaOp, xla::XlaOp> Broadcast(
-      xla::XlaBuilder* builder, const xla::XlaOp& lhs, const xla::XlaOp& rhs,
-      const BCast& broadcast_helper);
+      xla::XlaOp lhs, xla::XlaOp rhs, const BCast& broadcast_helper);
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD
index 8597e7f139..1ce3930fd1 100644
--- a/tensorflow/compiler/tf2xla/lib/BUILD
+++ b/tensorflow/compiler/tf2xla/lib/BUILD
@@ -31,6 +31,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "broadcast",
+    srcs = ["broadcast.cc"],
+    hdrs = ["broadcast.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 cc_library(
     name = "cholesky",
     srcs = ["cholesky.cc"],
diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.cc b/tensorflow/compiler/tf2xla/lib/broadcast.cc
new file mode 100644
index 0000000000..3e402ef855
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/lib/broadcast.cc
@@ -0,0 +1,93 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/lib/broadcast.h"
+
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace tensorflow {
+
+xla::StatusOr<xla::XlaOp> BroadcastTo(xla::XlaOp input,
+                                      absl::Span<int64 const> output_dims) {
+  xla::XlaBuilder* builder = input.builder();
+  TF_ASSIGN_OR_RETURN(xla::Shape input_shape, builder->GetShape(input));
+  absl::Span<int64 const> input_dims =
+      xla::AsInt64Slice(input_shape.dimensions());
+
+  if (input_dims == output_dims) {
+    return input;
+  }
+
+  if (input_dims.size() > output_dims.size()) {
+    return errors::InvalidArgument(
+        "Input shape (", xla::ShapeUtil::HumanString(input_shape),
+        ") must have rank less than or equal to the output shape [",
+        absl::StrJoin(output_dims, ","), "]");
+  }
+
+  std::vector<int64> broadcast_dims;
+  std::vector<int64> broadcast_shape;
+  auto input_it = input_dims.rbegin();
+  for (auto output_it = output_dims.rbegin(); output_it != output_dims.rend();
+       ++output_it) {
+    if (input_it != input_dims.rend()) {
+      if (!(*output_it == 0 && *input_it == 0) &&
+          !(*input_it != 0 && *output_it % *input_it == 0)) {
+        return errors::InvalidArgument("Invalid shape broadcast from ",
+                                       xla::ShapeUtil::HumanString(input_shape),
+                                       " to [", absl::StrJoin(output_dims, ","),
+                                       "]");
+      }
+
+      broadcast_dims.push_back(broadcast_shape.size());
+      if (*output_it == *input_it) {
+        broadcast_shape.push_back(*output_it);
+      } else if (*output_it != *input_it) {
+        // Add dimensions [I, O/I], which we will later flatten to just
+        // [O]. We must do this in two phases since XLA broadcasting does not
+        // support tiling.
+        broadcast_shape.push_back(*input_it);
+        broadcast_shape.push_back(*output_it / *input_it);
+      }
+      ++input_it;
+    } else {
+      broadcast_shape.push_back(*output_it);
+    }
+  }
+  TF_RET_CHECK(input_it == input_dims.rend());
+
+  absl::c_reverse(broadcast_dims);
+  int broadcast_shape_size = broadcast_shape.size();
+  for (int64& broadcast_dim : broadcast_dims) {
+    broadcast_dim = broadcast_shape_size - broadcast_dim - 1;
+  }
+  absl::c_reverse(broadcast_shape);
+  xla::XlaOp output = xla::BroadcastInDim(
+      input,
+      xla::ShapeUtil::MakeShape(input_shape.element_type(), broadcast_shape),
+      broadcast_dims);
+  if (broadcast_shape != output_dims) {
+    output = xla::Reshape(output, output_dims);
+  }
+  return output;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/broadcast.h b/tensorflow/compiler/tf2xla/lib/broadcast.h
new file mode 100644
index 0000000000..591e696f06
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/lib/broadcast.h
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
+#define TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
+
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace tensorflow {
+
+// Broadcasts 'input' up to shape 'output_dims', using TensorFlow broadcasting
+// rules. Supports broadcasting a dimension of size x to size x*y, i.e., tiling.
+xla::StatusOr<xla::XlaOp> BroadcastTo(xla::XlaOp input,
+                                      absl::Span<int64 const> output_dims);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_TF2XLA_LIB_BROADCAST_H_
-- 
GitLab


From cea6b4959152981ab778001f30ff9ad87bb4fc9e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:34:28 -0700
Subject: [PATCH 0420/1085] Relax some unnecessary 4D array restrictions

PiperOrigin-RevId: 215910400
---
 tensorflow/contrib/lite/kernels/internal/types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 64a39dd2a2..c6bc6074d4 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -268,8 +268,9 @@ class RuntimeShape {
   // This creates a shape padded to the desired size with the specified value.
   RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
       : size_(0) {
+    // If the following check fails, it is likely because a 4D-only kernel is
+    // being used with an array of larger dimension count.
     TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
-    TFLITE_CHECK_LE(new_shape_size, kMaxSmallSize);
     Resize(new_shape_size);
     const int size_increase = new_shape_size - shape.DimensionsCount();
     for (int i = 0; i < size_increase; ++i) {
-- 
GitLab


From 53faa313b7628cd8c9fbb836544cc6482cafb7a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:46:54 -0700
Subject: [PATCH 0421/1085] Switch NCCL to build from open source (version
 2.3.5-5) by default.

Note to users manually patching ptxas from a later toolkit version:
Building NCCL requires the same version of ptxas and nvlink.

PiperOrigin-RevId: 215911973
---
 configure.py                        |   17 +-
 tensorflow/workspace.bzl            |   10 +-
 third_party/gpus/cuda_configure.bzl | 1979 ++++++++++++++-------------
 third_party/nccl/LICENSE            |  231 +---
 third_party/nccl/archive.BUILD      |  179 +++
 third_party/nccl/build_defs.bzl.tpl |  210 +++
 third_party/nccl/nccl_archive.BUILD |   68 -
 third_party/nccl/nccl_configure.bzl |  214 +--
 8 files changed, 1592 insertions(+), 1316 deletions(-)
 create mode 100644 third_party/nccl/archive.BUILD
 create mode 100644 third_party/nccl/build_defs.bzl.tpl
 delete mode 100644 third_party/nccl/nccl_archive.BUILD

diff --git a/configure.py b/configure.py
index a88fdb3555..65b4622995 100644
--- a/configure.py
+++ b/configure.py
@@ -35,7 +35,6 @@ except ImportError:
 
 _DEFAULT_CUDA_VERSION = '9.0'
 _DEFAULT_CUDNN_VERSION = '7'
-_DEFAULT_NCCL_VERSION = '2.2'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -1109,18 +1108,17 @@ def set_tf_nccl_install_path(environ_cp):
     raise ValueError('Currently NCCL is only supported on Linux platforms.')
 
   ask_nccl_version = (
-      'Please specify the NCCL version you want to use. If NCCL %s is not '
-      'installed, then you can use version 1.3 that can be fetched '
-      'automatically but it may have worse performance with multiple GPUs. '
-      '[Default is %s]: ') % (_DEFAULT_NCCL_VERSION, _DEFAULT_NCCL_VERSION)
+      'Please specify the locally installed NCCL version you want to use. '
+      '[Default is to use https://github.com/nvidia/nccl]: ')
 
   for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_nccl_version = get_from_env_or_user_or_default(
-        environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION)
-    tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
+        environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, '')
+
+    if not tf_nccl_version:
+      break  # No need to get install path, building the open source code.
 
-    if tf_nccl_version == '1':
-      break  # No need to get install path, NCCL 1 is a GitHub repo.
+    tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
 
     # Look with ldconfig first if we can find the library in paths
     # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
@@ -1232,7 +1230,6 @@ def set_tf_nccl_install_path(environ_cp):
   environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
   write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
 
-
 def get_native_cuda_compute_capabilities(environ_cp):
   """Get native cuda compute capabilities.
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 72f3fd0cf8..8df41f96b8 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -585,12 +585,12 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "nccl_archive",
-        build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"),
-        sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176",
-        strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7",
+        build_file = clean_dep("//third_party:nccl/archive.BUILD"),
+        sha256 = "19132b5127fa8e02d95a09795866923f04064c8f1e0770b2b42ab551408882a4",
+        strip_prefix = "nccl-f93fe9bfd94884cec2ba711897222e0df5569a53",
         urls = [
-            "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
-            "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz",
+            "https://mirror.bazel.build/github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz",
+            "https://github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz",
         ],
     )
 
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 69f4599c16..831a3067b2 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -126,118 +126,141 @@ load(
 )
 
 def _get_python_bin(repository_ctx):
-    """Gets the python bin path."""
-    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
-    if python_bin != None:
-        return python_bin
-    python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
-    python_bin_path = repository_ctx.which(python_bin_name)
-    if python_bin_path != None:
-        return str(python_bin_path)
-    auto_configure_fail("Cannot find python in PATH, please make sure " +
-                        "python is installed and add its directory in PATH, or --define " +
-                        "%s='/something/else'.\nPATH=%s" % (
-                            _PYTHON_BIN_PATH,
-                            repository_ctx.os.environ.get("PATH", ""),
-                        ))
+  """Gets the python bin path."""
+  python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+  if python_bin != None:
+    return python_bin
+  python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python"
+  python_bin_path = repository_ctx.which(python_bin_name)
+  if python_bin_path != None:
+    return str(python_bin_path)
+  auto_configure_fail(
+      "Cannot find python in PATH, please make sure " +
+      "python is installed and add its directory in PATH, or --define " +
+      "%s='/something/else'.\nPATH=%s" % (
+          _PYTHON_BIN_PATH,
+          repository_ctx.os.environ.get("PATH", ""),
+      ))
+
 
 def _get_nvcc_tmp_dir_for_windows(repository_ctx):
-    """Return the tmp directory for nvcc to generate intermediate source files."""
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
-    return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
+  """Return the tmp directory for nvcc to generate intermediate source files."""
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+  return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
 
-def _get_msvc_compiler(repository_ctx):
-    vc_path = find_vc_path(repository_ctx)
-    return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-def _get_win_cuda_defines(repository_ctx):
-    """Return CROSSTOOL defines for Windows"""
-
-    # If we are not on Windows, return empty vaules for Windows specific fields.
-    # This ensures the CROSSTOOL file parser is happy.
-    if not _is_windows(repository_ctx):
-        return {
-            "%{msvc_env_tmp}": "",
-            "%{msvc_env_path}": "",
-            "%{msvc_env_include}": "",
-            "%{msvc_env_lib}": "",
-            "%{msvc_cl_path}": "",
-            "%{msvc_ml_path}": "",
-            "%{msvc_link_path}": "",
-            "%{msvc_lib_path}": "",
-            "%{cxx_builtin_include_directory}": "",
-        }
-
-    vc_path = find_vc_path(repository_ctx)
-    if not vc_path:
-        auto_configure_fail("Visual C++ build tools not found on your machine." +
-                            "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using")
-        return {}
-
-    env = setup_vc_env_vars(repository_ctx, vc_path)
-    escaped_paths = escape_string(env["PATH"])
-    escaped_include_paths = escape_string(env["INCLUDE"])
-    escaped_lib_paths = escape_string(env["LIB"])
-    escaped_tmp_dir = escape_string(
-        get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"),
-    )
+def _get_msvc_compiler(repository_ctx):
+  vc_path = find_vc_path(repository_ctx)
+  return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
 
-    msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
-    msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace("\\", "/")
-    msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace("\\", "/")
-    msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace("\\", "/")
 
-    # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
-    # The generated files are guranteed to have unique name, so they can share the same tmp directory
-    escaped_cxx_include_directories = ["cxx_builtin_include_directory: \"%s\"" % _get_nvcc_tmp_dir_for_windows(repository_ctx)]
-    for path in escaped_include_paths.split(";"):
-        if path:
-            escaped_cxx_include_directories.append("cxx_builtin_include_directory: \"%s\"" % path)
+def _get_win_cuda_defines(repository_ctx):
+  """Return CROSSTOOL defines for Windows"""
 
+  # If we are not on Windows, return empty vaules for Windows specific fields.
+  # This ensures the CROSSTOOL file parser is happy.
+  if not _is_windows(repository_ctx):
     return {
-        "%{msvc_env_tmp}": escaped_tmp_dir,
-        "%{msvc_env_path}": escaped_paths,
-        "%{msvc_env_include}": escaped_include_paths,
-        "%{msvc_env_lib}": escaped_lib_paths,
-        "%{msvc_cl_path}": msvc_cl_path,
-        "%{msvc_ml_path}": msvc_ml_path,
-        "%{msvc_link_path}": msvc_link_path,
-        "%{msvc_lib_path}": msvc_lib_path,
-        "%{cxx_builtin_include_directory}": "\n".join(escaped_cxx_include_directories),
+        "%{msvc_env_tmp}": "",
+        "%{msvc_env_path}": "",
+        "%{msvc_env_include}": "",
+        "%{msvc_env_lib}": "",
+        "%{msvc_cl_path}": "",
+        "%{msvc_ml_path}": "",
+        "%{msvc_link_path}": "",
+        "%{msvc_lib_path}": "",
+        "%{cxx_builtin_include_directory}": "",
     }
 
+  vc_path = find_vc_path(repository_ctx)
+  if not vc_path:
+    auto_configure_fail(
+        "Visual C++ build tools not found on your machine." +
+        "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using"
+    )
+    return {}
+
+  env = setup_vc_env_vars(repository_ctx, vc_path)
+  escaped_paths = escape_string(env["PATH"])
+  escaped_include_paths = escape_string(env["INCLUDE"])
+  escaped_lib_paths = escape_string(env["LIB"])
+  escaped_tmp_dir = escape_string(
+      get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
+          "\\", "\\\\"),)
+
+  msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
+  msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
+      "\\", "/")
+  msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
+      "\\", "/")
+  msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
+      "\\", "/")
+
+  # nvcc will generate some temporary source files under %{nvcc_tmp_dir}
+  # The generated files are guranteed to have unique name, so they can share the same tmp directory
+  escaped_cxx_include_directories = [
+      "cxx_builtin_include_directory: \"%s\"" %
+      _get_nvcc_tmp_dir_for_windows(repository_ctx)
+  ]
+  for path in escaped_include_paths.split(";"):
+    if path:
+      escaped_cxx_include_directories.append(
+          "cxx_builtin_include_directory: \"%s\"" % path)
+
+  return {
+      "%{msvc_env_tmp}":
+          escaped_tmp_dir,
+      "%{msvc_env_path}":
+          escaped_paths,
+      "%{msvc_env_include}":
+          escaped_include_paths,
+      "%{msvc_env_lib}":
+          escaped_lib_paths,
+      "%{msvc_cl_path}":
+          msvc_cl_path,
+      "%{msvc_ml_path}":
+          msvc_ml_path,
+      "%{msvc_link_path}":
+          msvc_link_path,
+      "%{msvc_lib_path}":
+          msvc_lib_path,
+      "%{cxx_builtin_include_directory}":
+          "\n".join(escaped_cxx_include_directories),
+  }
+
 # TODO(dzc): Once these functions have been factored out of Bazel's
 # cc_configure.bzl, load them from @bazel_tools instead.
 # BEGIN cc_configure common functions.
 def find_cc(repository_ctx):
-    """Find the C++ compiler."""
-    if _is_windows(repository_ctx):
-        return _get_msvc_compiler(repository_ctx)
-
-    if _use_cuda_clang(repository_ctx):
-        target_cc_name = "clang"
-        cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
-        if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
-            return "extra_tools/bin/clang"
-    else:
-        target_cc_name = "gcc"
-        cc_path_envvar = _GCC_HOST_COMPILER_PATH
-    cc_name = target_cc_name
-
-    if cc_path_envvar in repository_ctx.os.environ:
-        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
-        if cc_name_from_env:
-            cc_name = cc_name_from_env
-    if cc_name.startswith("/"):
-        # Absolute path, maybe we should make this supported by our which function.
-        return cc_name
-    cc = repository_ctx.which(cc_name)
-    if cc == None:
-        fail(("Cannot find {}, either correct your path or set the {}" +
-              " environment variable").format(target_cc_name, cc_path_envvar))
-    return cc
+  """Find the C++ compiler."""
+  if _is_windows(repository_ctx):
+    return _get_msvc_compiler(repository_ctx)
+
+  if _use_cuda_clang(repository_ctx):
+    target_cc_name = "clang"
+    cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
+    if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
+      return "extra_tools/bin/clang"
+  else:
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+  cc_name = target_cc_name
+
+  if cc_path_envvar in repository_ctx.os.environ:
+    cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+    if cc_name_from_env:
+      cc_name = cc_name_from_env
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this supported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(("Cannot find {}, either correct your path or set the {}" +
+          " environment variable").format(target_cc_name, cc_path_envvar))
+  return cc
+
 
 _INC_DIR_MARKER_BEGIN = "#include <...>"
 
@@ -246,80 +269,82 @@ _OSX_FRAMEWORK_SUFFIX = " (framework directory)"
 _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
 
 def _cxx_inc_convert(path):
-    """Convert path returned by cc -E xc++ in a complete path."""
-    path = path.strip()
-    if path.endswith(_OSX_FRAMEWORK_SUFFIX):
-        path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
-    return path
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  if path.endswith(_OSX_FRAMEWORK_SUFFIX):
+    path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
+  return path
+
 
 def _normalize_include_path(repository_ctx, path):
-    """Normalizes include paths before writing them to the crosstool.
+  """Normalizes include paths before writing them to the crosstool.
 
     If path points inside the 'crosstool' folder of the repository, a relative
     path is returned.
     If path points outside the 'crosstool' folder, an absolute path is returned.
     """
-    path = str(repository_ctx.path(path))
-    crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+  path = str(repository_ctx.path(path))
+  crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
+
+  if path.startswith(crosstool_folder):
+    # We drop the path to "$REPO/crosstool" and a trailing path separator.
+    return path[len(crosstool_folder) + 1:]
+  return path
 
-    if path.startswith(crosstool_folder):
-        # We drop the path to "$REPO/crosstool" and a trailing path separator.
-        return path[len(crosstool_folder) + 1:]
-    return path
 
 def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
-    """Compute the list of default C or C++ include directories."""
-    if lang_is_cpp:
-        lang = "c++"
-    else:
-        lang = "c"
-    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
-    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
-    if index1 == -1:
-        return []
-    index1 = result.stderr.find("\n", index1)
-    if index1 == -1:
-        return []
-    index2 = result.stderr.rfind("\n ")
-    if index2 == -1 or index2 < index1:
-        return []
-    index2 = result.stderr.find("\n", index2 + 1)
-    if index2 == -1:
-        inc_dirs = result.stderr[index1 + 1:]
-    else:
-        inc_dirs = result.stderr[index1 + 1:index2].strip()
+  """Compute the list of default C or C++ include directories."""
+  if lang_is_cpp:
+    lang = "c++"
+  else:
+    lang = "c"
+  result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [
+      _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
+      for p in inc_dirs.split("\n")
+  ]
 
-    return [
-        _normalize_include_path(repository_ctx, _cxx_inc_convert(p))
-        for p in inc_dirs.split("\n")
-    ]
 
 def get_cxx_inc_directories(repository_ctx, cc):
-    """Compute the list of default C and C++ include directories."""
-
-    # For some reason `clang -xc` sometimes returns include paths that are
-    # different from the ones from `clang -xc++`. (Symlink and a dir)
-    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
-    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
-    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
-
-    includes_cpp_set = depset(includes_cpp)
-    return includes_cpp + [
-        inc
-        for inc in includes_c
-        if inc not in includes_cpp_set
-    ]
+  """Compute the list of default C and C++ include directories."""
+
+  # For some reason `clang -xc` sometimes returns include paths that are
+  # different from the ones from `clang -xc++`. (Symlink and a dir)
+  # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+  includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+  includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+  includes_cpp_set = depset(includes_cpp)
+  return includes_cpp + [
+      inc for inc in includes_c if inc not in includes_cpp_set
+  ]
+
 
 def auto_configure_fail(msg):
-    """Output failure message when cuda configuration fails."""
-    red = "\033[0;31m"
-    no_color = "\033[0m"
-    fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
+  """Output failure message when cuda configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
 
 # END cc_configure common functions (see TODO above).
 
 def _host_compiler_includes(repository_ctx, cc):
-    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -330,14 +355,15 @@ def _host_compiler_includes(repository_ctx, cc):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
-    inc_entries = []
-    for inc_dir in inc_dirs:
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
-    return "\n".join(inc_entries)
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
 
 def _cuda_include_path(repository_ctx, cuda_config):
-    """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
+  """Generates the cxx_builtin_include_directory entries for cuda inc dirs.
 
     Args:
       repository_ctx: The repository context.
@@ -348,39 +374,41 @@ def _cuda_include_path(repository_ctx, cuda_config):
       host compiler include directories, which can be added to the CROSSTOOL
       file.
     """
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_config.cuda_toolkit_path,
-                                        ".exe" if cuda_config.cpu_value == "Windows" else "",
-                                    ))
-    result = repository_ctx.execute([
-        nvcc_path,
-        "-v",
-        "/dev/null",
-        "-o",
-        "/dev/null",
-    ])
-    target_dir = ""
-    for one_line in result.stderr.splitlines():
-        if one_line.startswith("#$ _TARGET_DIR_="):
-            target_dir = (cuda_config.cuda_toolkit_path + "/" +
-                          one_line.replace("#$ _TARGET_DIR_=", "") + "/include")
-    inc_entries = []
-    if target_dir != "":
-        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
-    default_include = cuda_config.cuda_toolkit_path + "/include"
-    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" %
-                       default_include)
-    return "\n".join(inc_entries)
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_config.cuda_toolkit_path,
+      ".exe" if cuda_config.cpu_value == "Windows" else "",
+  ))
+  result = repository_ctx.execute([
+      nvcc_path,
+      "-v",
+      "/dev/null",
+      "-o",
+      "/dev/null",
+  ])
+  target_dir = ""
+  for one_line in result.stderr.splitlines():
+    if one_line.startswith("#$ _TARGET_DIR_="):
+      target_dir = (
+          cuda_config.cuda_toolkit_path + "/" + one_line.replace(
+              "#$ _TARGET_DIR_=", "") + "/include")
+  inc_entries = []
+  if target_dir != "":
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % target_dir)
+  default_include = cuda_config.cuda_toolkit_path + "/include"
+  inc_entries.append(
+      "  cxx_builtin_include_directory: \"%s\"" % default_include)
+  return "\n".join(inc_entries)
+
 
 def _enable_cuda(repository_ctx):
-    if "TF_NEED_CUDA" in repository_ctx.os.environ:
-        enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
-        return enable_cuda == "1"
-    return False
+  if "TF_NEED_CUDA" in repository_ctx.os.environ:
+    enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
+    return enable_cuda == "1"
+  return False
+
 
-def _cuda_toolkit_path(repository_ctx):
-    """Finds the cuda toolkit directory.
+def cuda_toolkit_path(repository_ctx):
+  """Finds the cuda toolkit directory.
 
     Args:
       repository_ctx: The repository context.
@@ -388,27 +416,31 @@ def _cuda_toolkit_path(repository_ctx):
     Returns:
       A speculative real path of the cuda toolkit install directory.
     """
-    cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
-    if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
-        cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
-    if not repository_ctx.path(cuda_toolkit_path).exists:
-        auto_configure_fail("Cannot find cuda toolkit path.")
-    return str(repository_ctx.path(cuda_toolkit_path).realpath)
+  cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
+  if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
+    cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
+  if not repository_ctx.path(cuda_toolkit_path).exists:
+    auto_configure_fail("Cannot find cuda toolkit path.")
+  return str(repository_ctx.path(cuda_toolkit_path).realpath)
+
 
 def _cudnn_install_basedir(repository_ctx):
-    """Finds the cudnn install directory."""
-    cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
-    if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
-        cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
-    if not repository_ctx.path(cudnn_install_path).exists:
-        auto_configure_fail("Cannot find cudnn install path.")
-    return cudnn_install_path
+  """Finds the cudnn install directory."""
+  cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
+  if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
+    cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
+  if not repository_ctx.path(cudnn_install_path).exists:
+    auto_configure_fail("Cannot find cudnn install path.")
+  return cudnn_install_path
+
 
 def matches_version(environ_version, detected_version):
-    """Checks whether the user-specified version matches the detected version.
+  """Checks whether the user-specified version matches the detected version.
 
-    This function performs a weak matching so that if the user specifies only the
-    major or major and minor versions, the versions are still considered matching
+    This function performs a weak matching so that if the user specifies only
+    the
+    major or major and minor versions, the versions are still considered
+    matching
     if the version parts match. To illustrate:
 
         environ_version  detected_version  result
@@ -424,25 +456,25 @@ def matches_version(environ_version, detected_version):
         variables.
       detected_version: The version autodetected from the CUDA installation on
         the system.
-
     Returns: True if user-specified version matches detected version and False
       otherwise.
-    """
-    environ_version_parts = environ_version.split(".")
-    detected_version_parts = detected_version.split(".")
-    if len(detected_version_parts) < len(environ_version_parts):
-        return False
-    for i, part in enumerate(detected_version_parts):
-        if i >= len(environ_version_parts):
-            break
-        if part != environ_version_parts[i]:
-            return False
-    return True
+  """
+  environ_version_parts = environ_version.split(".")
+  detected_version_parts = detected_version.split(".")
+  if len(detected_version_parts) < len(environ_version_parts):
+    return False
+  for i, part in enumerate(detected_version_parts):
+    if i >= len(environ_version_parts):
+      break
+    if part != environ_version_parts[i]:
+      return False
+  return True
+
 
 _NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
 
 def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
-    """Detects the version of CUDA installed on the system.
+  """Detects the version of CUDA installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -452,64 +484,61 @@ def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
       String containing the version of CUDA.
     """
 
-    # Run nvcc --version and find the line containing the CUDA version.
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" %
-                                    (
-                                        cuda_toolkit_path,
-                                        ".exe" if cpu_value == "Windows" else "",
-                                    ))
-    if not nvcc_path.exists:
-        auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
-    result = repository_ctx.execute([str(nvcc_path), "--version"])
-    if result.stderr:
-        auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
-    lines = result.stdout.splitlines()
-    version_line = lines[len(lines) - 1]
-    if version_line.find(_NVCC_VERSION_PREFIX) == -1:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-
-    # Parse the CUDA version from the line containing the CUDA version.
-    prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
-    parts = prefix_removed.split(",")
-    if len(parts) != 2 or len(parts[0]) < 2:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-    full_version = parts[1].strip()
-    if full_version.startswith("V"):
-        full_version = full_version[1:]
-
-    # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDA_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        auto_configure_fail(
-            ("CUDA version detected from nvcc (%s) does not match " +
-             "TF_CUDA_VERSION (%s)") % (full_version, environ_version),
-        )
-
-    # We only use the version consisting of the major and minor version numbers.
-    version_parts = full_version.split(".")
-    if len(version_parts) < 2:
-        auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
-    if cpu_value == "Windows":
-        version = "64_%s%s" % (version_parts[0], version_parts[1])
-    else:
-        version = "%s.%s" % (version_parts[0], version_parts[1])
-    return version
+  # Run nvcc --version and find the line containing the CUDA version.
+  nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
+      cuda_toolkit_path,
+      ".exe" if cpu_value == "Windows" else "",
+  ))
+  if not nvcc_path.exists:
+    auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
+  result = repository_ctx.execute([str(nvcc_path), "--version"])
+  if result.stderr:
+    auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
+  lines = result.stdout.splitlines()
+  version_line = lines[len(lines) - 1]
+  if version_line.find(_NVCC_VERSION_PREFIX) == -1:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+
+  # Parse the CUDA version from the line containing the CUDA version.
+  prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
+  parts = prefix_removed.split(",")
+  if len(parts) != 2 or len(parts[0]) < 2:
+    auto_configure_fail(
+        "Could not parse CUDA version from nvcc --version. Got: %s" %
+        result.stdout,)
+  full_version = parts[1].strip()
+  if full_version.startswith("V"):
+    full_version = full_version[1:]
+
+  # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDA_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    auto_configure_fail(
+        ("CUDA version detected from nvcc (%s) does not match " +
+         "TF_CUDA_VERSION (%s)") % (full_version, environ_version),)
+
+  # We only use the version consisting of the major and minor version numbers.
+  version_parts = full_version.split(".")
+  if len(version_parts) < 2:
+    auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
+  if cpu_value == "Windows":
+    version = "64_%s%s" % (version_parts[0], version_parts[1])
+  else:
+    version = "%s.%s" % (version_parts[0], version_parts[1])
+  return version
+
 
 _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
 _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR"
 _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL"
 
 def find_cuda_define(repository_ctx, header_dir, header_file, define):
-    """Returns the value of a #define in a header file.
+  """Returns the value of a #define in a header file.
 
     Greps through a header file and returns the value of the specified #define.
     If the #define is not found, then raise an error.
@@ -524,52 +553,52 @@ def find_cuda_define(repository_ctx, header_dir, header_file, define):
       The value of the #define found in the header.
     """
 
-    # Confirm location of the header and grep for the line defining the macro.
-    h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
-    if not h_path.exists:
-        auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
-    result = repository_ctx.execute(
-        # Grep one more lines as some #defines are splitted into two lines.
-        ["grep", "--color=never", "-A1", "-E", define, str(h_path)],
-    )
-    if result.stderr:
-        auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
-
-    # Parse the version from the line defining the macro.
-    if result.stdout.find(define) == -1:
-        auto_configure_fail("Cannot find line containing '%s' in %s" %
-                            (define, h_path))
-
-    # Split results to lines
-    lines = result.stdout.split("\n")
-    num_lines = len(lines)
-    for l in range(num_lines):
-        line = lines[l]
-        if define in line:  # Find the line with define
-            version = line
-            if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
-                version = version[:-1] + lines[l + 1]
-            break
-
-    # Remove any comments
-    version = version.split("//")[0]
-
-    # Remove define name
-    version = version.replace(define, "").strip()
-
-    # Remove the code after the version number.
-    version_end = version.find(" ")
-    if version_end != -1:
-        if version_end == 0:
-            auto_configure_fail(
-                "Cannot extract the version from line containing '%s' in %s" %
-                (define, str(h_path)),
-            )
-        version = version[:version_end].strip()
-    return version
+  # Confirm location of the header and grep for the line defining the macro.
+  h_path = repository_ctx.path("%s/%s" % (header_dir, header_file))
+  if not h_path.exists:
+    auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path)))
+  result = repository_ctx.execute(
+      # Grep one more lines as some #defines are splitted into two lines.
+      ["grep", "--color=never", "-A1", "-E", define,
+       str(h_path)],)
+  if result.stderr:
+    auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr))
+
+  # Parse the version from the line defining the macro.
+  if result.stdout.find(define) == -1:
+    auto_configure_fail(
+        "Cannot find line containing '%s' in %s" % (define, h_path))
+
+  # Split results to lines
+  lines = result.stdout.split("\n")
+  num_lines = len(lines)
+  for l in range(num_lines):
+    line = lines[l]
+    if define in line:  # Find the line with define
+      version = line
+      if l != num_lines - 1 and line[-1] == "\\":  # Add next line, if multiline
+        version = version[:-1] + lines[l + 1]
+      break
+
+  # Remove any comments
+  version = version.split("//")[0]
+
+  # Remove define name
+  version = version.replace(define, "").strip()
+
+  # Remove the code after the version number.
+  version_end = version.find(" ")
+  if version_end != -1:
+    if version_end == 0:
+      auto_configure_fail(
+          "Cannot extract the version from line containing '%s' in %s" %
+          (define, str(h_path)),)
+    version = version[:version_end].strip()
+  return version
+
 
 def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
-    """Detects the version of cuDNN installed on the system.
+  """Detects the version of cuDNN installed on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -579,68 +608,68 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
     Returns:
       A string containing the version of cuDNN.
     """
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cudnn_install_basedir,
-    )
-    major_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_PATCHLEVEL,
-    )
-    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-
-    # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDNN_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        cudnn_h_path = repository_ctx.path("%s/include/cudnn.h" %
-                                           cudnn_install_basedir)
-        auto_configure_fail(
-            ("cuDNN version detected from %s (%s) does not match " +
-             "TF_CUDNN_VERSION (%s)") %
-            (str(cudnn_h_path), full_version, environ_version),
-        )
-
-    # We only use the major version since we use the libcudnn libraries that are
-    # only versioned with the major version (e.g. libcudnn.so.5).
-    version = major_version
-    if cpu_value == "Windows":
-        version = "64_" + version
-    return version
-
-def _compute_capabilities(repository_ctx):
-    """Returns a list of strings representing cuda compute capabilities."""
-    if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
-        return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-    capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
-    capabilities = capabilities_str.split(",")
-    for capability in capabilities:
-        # Workaround for Skylark's lack of support for regex. This check should
-        # be equivalent to checking:
-        #     if re.match("[0-9]+.[0-9]+", capability) == None:
-        parts = capability.split(".")
-        if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
-            auto_configure_fail("Invalid compute capability: %s" % capability)
-    return capabilities
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cudnn_install_basedir,
+  )
+  major_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MAJOR,
+  )
+  minor_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_MINOR,
+  )
+  patch_version = find_cuda_define(
+      repository_ctx,
+      cudnn_header_dir,
+      "cudnn.h",
+      _DEFINE_CUDNN_PATCHLEVEL,
+  )
+  full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+
+  # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
+  # match the detected version.
+  environ_version = ""
+  if _TF_CUDNN_VERSION in repository_ctx.os.environ:
+    environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
+  if environ_version and not matches_version(environ_version, full_version):
+    cudnn_h_path = repository_ctx.path(
+        "%s/include/cudnn.h" % cudnn_install_basedir)
+    auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
+                         "TF_CUDNN_VERSION (%s)") %
+                        (str(cudnn_h_path), full_version, environ_version),)
+
+  # We only use the major version since we use the libcudnn libraries that are
+  # only versioned with the major version (e.g. libcudnn.so.5).
+  version = major_version
+  if cpu_value == "Windows":
+    version = "64_" + version
+  return version
+
+
+def compute_capabilities(repository_ctx):
+  """Returns a list of strings representing cuda compute capabilities."""
+  if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
+    return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+  capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES]
+  capabilities = capabilities_str.split(",")
+  for capability in capabilities:
+    # Workaround for Skylark's lack of support for regex. This check should
+    # be equivalent to checking:
+    #     if re.match("[0-9]+.[0-9]+", capability) == None:
+    parts = capability.split(".")
+    if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
+      auto_configure_fail("Invalid compute capability: %s" % capability)
+  return capabilities
+
 
 def get_cpu_value(repository_ctx):
-    """Returns the name of the host operating system.
+  """Returns the name of the host operating system.
 
     Args:
       repository_ctx: The repository context.
@@ -648,20 +677,22 @@ def get_cpu_value(repository_ctx):
     Returns:
       A string containing the name of the host operating system.
     """
-    os_name = repository_ctx.os.name.lower()
-    if os_name.startswith("mac os"):
-        return "Darwin"
-    if os_name.find("windows") != -1:
-        return "Windows"
-    result = repository_ctx.execute(["uname", "-s"])
-    return result.stdout.strip()
+  os_name = repository_ctx.os.name.lower()
+  if os_name.startswith("mac os"):
+    return "Darwin"
+  if os_name.find("windows") != -1:
+    return "Windows"
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
 
 def _is_windows(repository_ctx):
-    """Returns true if the host operating system is windows."""
-    return get_cpu_value(repository_ctx) == "Windows"
+  """Returns true if the host operating system is windows."""
+  return get_cpu_value(repository_ctx) == "Windows"
+
 
 def _lib_name(lib, cpu_value, version = "", static = False):
-    """Constructs the platform-specific name of a library.
+  """Constructs the platform-specific name of a library.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -672,23 +703,24 @@ def _lib_name(lib, cpu_value, version = "", static = False):
     Returns:
       The platform-specific name of the library.
     """
-    if cpu_value in ("Linux", "FreeBSD"):
-        if static:
-            return "lib%s.a" % lib
-        else:
-            if version:
-                version = ".%s" % version
-            return "lib%s.so%s" % (lib, version)
-    elif cpu_value == "Windows":
-        return "%s.lib" % lib
-    elif cpu_value == "Darwin":
-        if static:
-            return "lib%s.a" % lib
-        elif version:
-            version = ".%s" % version
-        return "lib%s%s.dylib" % (lib, version)
+  if cpu_value in ("Linux", "FreeBSD"):
+    if static:
+      return "lib%s.a" % lib
     else:
-        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+      if version:
+        version = ".%s" % version
+      return "lib%s.so%s" % (lib, version)
+  elif cpu_value == "Windows":
+    return "%s.lib" % lib
+  elif cpu_value == "Darwin":
+    if static:
+      return "lib%s.a" % lib
+    elif version:
+      version = ".%s" % version
+    return "lib%s%s.dylib" % (lib, version)
+  else:
+    auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
 
 def _find_cuda_lib(
         lib,
@@ -697,7 +729,7 @@ def _find_cuda_lib(
         basedir,
         version = "",
         static = False):
-    """Finds the given CUDA or cuDNN library on the system.
+  """Finds the given CUDA or cuDNN library on the system.
 
     Args:
       lib: The name of the library, such as "cudart"
@@ -712,15 +744,16 @@ def _find_cuda_lib(
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(lib, cpu_value, version, static)
-    for relative_path in CUDA_LIB_PATHS:
-        path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
-    auto_configure_fail("Cannot find cuda library %s" % file_name)
+  file_name = _lib_name(lib, cpu_value, version, static)
+  for relative_path in CUDA_LIB_PATHS:
+    path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name))
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+  auto_configure_fail("Cannot find cuda library %s" % file_name)
+
 
 def _find_cupti_header_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cupti.h
+  """Returns the path to the directory containing cupti.h
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -732,14 +765,17 @@ def _find_cupti_header_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the cupti header.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_HEADER_PATHS:
-        if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_HEADER_PATHS:
+    if repository_ctx.path(
+        "%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
+      [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS]))
+
 
 def _find_cupti_lib(repository_ctx, cuda_config):
-    """Finds the cupti library on the system.
+  """Finds the cupti library on the system.
 
     On most systems, the cupti library is not installed in the same directory as
     the other CUDA libraries but rather in a special extras/CUPTI directory.
@@ -753,23 +789,23 @@ def _find_cupti_lib(repository_ctx, cuda_config):
         file_name: The basename of the library found on the system.
         path: The full path to the library.
     """
-    file_name = _lib_name(
-        "cupti",
-        cuda_config.cpu_value,
-        cuda_config.cuda_version,
-    )
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_LIB_PATHS:
-        path = repository_ctx.path(
-            "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),
-        )
-        if path.exists:
-            return struct(file_name = file_name, path = str(path.realpath))
+  file_name = _lib_name(
+      "cupti",
+      cuda_config.cpu_value,
+      cuda_config.cuda_version,
+  )
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUPTI_LIB_PATHS:
+    path = repository_ctx.path(
+        "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),)
+    if path.exists:
+      return struct(file_name=file_name, path=str(path.realpath))
+
+  auto_configure_fail("Cannot find cupti library %s" % file_name)
 
-    auto_configure_fail("Cannot find cupti library %s" % file_name)
 
 def _find_libs(repository_ctx, cuda_config):
-    """Returns the CUDA and cuDNN libraries on the system.
+  """Returns the CUDA and cuDNN libraries on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -778,64 +814,75 @@ def _find_libs(repository_ctx, cuda_config):
     Returns:
       Map of library names to structs of filename and path.
     """
-    cpu_value = cuda_config.cpu_value
-    return {
-        "cuda": _find_cuda_lib("cuda", repository_ctx, cpu_value, cuda_config.cuda_toolkit_path),
-        "cudart": _find_cuda_lib(
-            "cudart",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudart_static": _find_cuda_lib(
-            "cudart_static",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-            static = True,
-        ),
-        "cublas": _find_cuda_lib(
-            "cublas",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cusolver": _find_cuda_lib(
-            "cusolver",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "curand": _find_cuda_lib(
-            "curand",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cufft": _find_cuda_lib(
-            "cufft",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
-        ),
-        "cudnn": _find_cuda_lib(
-            "cudnn",
-            repository_ctx,
-            cpu_value,
-            cuda_config.cudnn_install_basedir,
-            cuda_config.cudnn_version,
-        ),
-        "cupti": _find_cupti_lib(repository_ctx, cuda_config),
-    }
+  cpu_value = cuda_config.cpu_value
+  return {
+      "cuda":
+          _find_cuda_lib("cuda", repository_ctx, cpu_value,
+                         cuda_config.cuda_toolkit_path),
+      "cudart":
+          _find_cuda_lib(
+              "cudart",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudart_static":
+          _find_cuda_lib(
+              "cudart_static",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+              static=True,
+          ),
+      "cublas":
+          _find_cuda_lib(
+              "cublas",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cusolver":
+          _find_cuda_lib(
+              "cusolver",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "curand":
+          _find_cuda_lib(
+              "curand",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cufft":
+          _find_cuda_lib(
+              "cufft",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cuda_toolkit_path,
+              cuda_config.cuda_version,
+          ),
+      "cudnn":
+          _find_cuda_lib(
+              "cudnn",
+              repository_ctx,
+              cpu_value,
+              cuda_config.cudnn_install_basedir,
+              cuda_config.cudnn_version,
+          ),
+      "cupti":
+          _find_cupti_lib(repository_ctx, cuda_config),
+  }
+
 
 def _find_cuda_include_path(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cuda.h
+  """Returns the path to the directory containing cuda.h
 
     Args:
       repository_ctx: The repository context.
@@ -844,14 +891,16 @@ def _find_cuda_include_path(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists:
+      return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
+
 
 def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
-    """Returns the path to the directory containing cudnn.h
+  """Returns the path to the directory containing cudnn.h
 
     Args:
       repository_ctx: The repository context.
@@ -861,15 +910,17 @@ def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
     Returns:
       The path of the directory containing the cudnn header.
     """
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path("%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
-            return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
-    if repository_ctx.path("/usr/include/cudnn.h").exists:
-        return "/usr/include"
-    auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+  for relative_path in CUDA_INCLUDE_PATHS:
+    if repository_ctx.path(
+        "%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists:
+      return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
+  if repository_ctx.path("/usr/include/cudnn.h").exists:
+    return "/usr/include"
+  auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+
 
 def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing libdevice in bitcode format.
+  """Returns the path to the directory containing libdevice in bitcode format.
 
     Args:
       repository_ctx: The repository context.
@@ -878,19 +929,23 @@ def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
     Returns:
       The path of the directory containing the CUDA headers.
     """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for libdevice_file in NVVM_LIBDEVICE_FILES:
-        for relative_path in NVVM_LIBDEVICE_PATHS:
-            if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path, libdevice_file)).exists:
-                return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  for libdevice_file in NVVM_LIBDEVICE_FILES:
+    for relative_path in NVVM_LIBDEVICE_PATHS:
+      if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path,
+                                          libdevice_file)).exists:
+        return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
+  auto_configure_fail(
+      "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path)
+
 
 def _cudart_static_linkopt(cpu_value):
-    """Returns additional platform-specific linkopts for cudart."""
-    return "" if cpu_value == "Darwin" else "\"-lrt\","
+  """Returns additional platform-specific linkopts for cudart."""
+  return "" if cpu_value == "Darwin" else "\"-lrt\","
+
 
 def _get_cuda_config(repository_ctx):
-    """Detects and returns information about the CUDA installation on the system.
+  """Detects and returns information about the CUDA installation on the system.
 
     Args:
       repository_ctx: The repository context.
@@ -904,35 +959,39 @@ def _get_cuda_config(repository_ctx):
         compute_capabilities: A list of the system's CUDA compute capabilities.
         cpu_value: The name of the host operating system.
     """
-    cpu_value = get_cpu_value(repository_ctx)
-    cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
-    cuda_version = _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value)
-    cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
-    cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value)
-    return struct(
-        cuda_toolkit_path = cuda_toolkit_path,
-        cudnn_install_basedir = cudnn_install_basedir,
-        cuda_version = cuda_version,
-        cudnn_version = cudnn_version,
-        compute_capabilities = _compute_capabilities(repository_ctx),
-        cpu_value = cpu_value,
-    )
+  cpu_value = get_cpu_value(repository_ctx)
+  toolkit_path = cuda_toolkit_path(repository_ctx)
+  cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
+  cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
+  cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir,
+                                 cpu_value)
+  return struct(
+      cuda_toolkit_path=toolkit_path,
+      cudnn_install_basedir=cudnn_install_basedir,
+      cuda_version=cuda_version,
+      cudnn_version=cudnn_version,
+      compute_capabilities=compute_capabilities(repository_ctx),
+      cpu_value=cpu_value,
+  )
+
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
-    if not out:
-        out = tpl.replace(":", "/")
-    repository_ctx.template(
-        out,
-        Label("//third_party/gpus/%s.tpl" % tpl),
-        substitutions,
-    )
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions,
+  )
+
 
 def _file(repository_ctx, label):
-    repository_ctx.template(
-        label.replace(":", "/"),
-        Label("//third_party/gpus/%s.tpl" % label),
-        {},
-    )
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {},
+  )
+
 
 _DUMMY_CROSSTOOL_BZL_FILE = """
 def error_gpu_disabled():
@@ -960,81 +1019,99 @@ error_gpu_disabled()
 """
 
 def _create_dummy_repository(repository_ctx):
-    cpu_value = get_cpu_value(repository_ctx)
+  cpu_value = get_cpu_value(repository_ctx)
+
+  # Set up BUILD file for cuda/.
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}": "False",
+          "%{cuda_extra_copts}": "[]",
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              _lib_name("cuda", cpu_value),
+          "%{cudart_static_lib}":
+              _lib_name(
+                  "cudart_static",
+                  cpu_value,
+                  static=True,
+              ),
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cpu_value),
+          "%{cudart_lib}":
+              _lib_name("cudart", cpu_value),
+          "%{cublas_lib}":
+              _lib_name("cublas", cpu_value),
+          "%{cusolver_lib}":
+              _lib_name("cusolver", cpu_value),
+          "%{cudnn_lib}":
+              _lib_name("cudnn", cpu_value),
+          "%{cufft_lib}":
+              _lib_name("cufft", cpu_value),
+          "%{curand_lib}":
+              _lib_name("curand", cpu_value),
+          "%{cupti_lib}":
+              _lib_name("cupti", cpu_value),
+          "%{cuda_include_genrules}":
+              "",
+          "%{cuda_headers}":
+              "",
+      },
+  )
 
-    # Set up BUILD file for cuda/.
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "False",
-            "%{cuda_extra_copts}": "[]",
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": _lib_name("cuda", cpu_value),
-            "%{cudart_static_lib}": _lib_name(
-                "cudart_static",
-                cpu_value,
-                static = True,
-            ),
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
-            "%{cudart_lib}": _lib_name("cudart", cpu_value),
-            "%{cublas_lib}": _lib_name("cublas", cpu_value),
-            "%{cusolver_lib}": _lib_name("cusolver", cpu_value),
-            "%{cudnn_lib}": _lib_name("cudnn", cpu_value),
-            "%{cufft_lib}": _lib_name("cufft", cpu_value),
-            "%{curand_lib}": _lib_name("curand", cpu_value),
-            "%{cupti_lib}": _lib_name("cupti", cpu_value),
-            "%{cuda_include_genrules}": "",
-            "%{cuda_headers}": "",
-        },
-    )
+  # Create dummy files for the CUDA toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:cuda.
+  repository_ctx.file("cuda/cuda/include/cuda.h", "")
+  repository_ctx.file("cuda/cuda/include/cublas.h", "")
+  repository_ctx.file("cuda/cuda/include/cudnn.h", "")
+  repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
+  repository_ctx.file(
+      "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
+  repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              _DEFAULT_CUDA_VERSION,
+          "%{cudnn_version}":
+              _DEFAULT_CUDNN_VERSION,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+              ]),
+          "%{cuda_toolkit_path}":
+              _DEFAULT_CUDA_TOOLKIT_PATH,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
 
-    # Create dummy files for the CUDA toolkit since they are still required by
-    # tensorflow/core/platform/default/build_config:cuda.
-    repository_ctx.file("cuda/cuda/include/cuda.h", "")
-    repository_ctx.file("cuda/cuda/include/cublas.h", "")
-    repository_ctx.file("cuda/cuda/include/cudnn.h", "")
-    repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "")
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value))
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
-    _tpl(
-        repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": _DEFAULT_CUDA_VERSION,
-            "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
-            "%{cuda_compute_capabilities}": ",".join([
-                "CudaVersion(\"%s\")" % c
-                for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-            ]),
-            "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH,
-        },
-        "cuda/cuda/cuda_config.h",
-    )
+  # If cuda_configure is not configured to build with GPU support, and the user
+  # attempts to build with --config=cuda, add a dummy build rule to intercept
+  # this and fail with an actionable error message.
+  repository_ctx.file(
+      "crosstool/error_gpu_disabled.bzl",
+      _DUMMY_CROSSTOOL_BZL_FILE,
+  )
+  repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
-    # If cuda_configure is not configured to build with GPU support, and the user
-    # attempts to build with --config=cuda, add a dummy build rule to intercept
-    # this and fail with an actionable error message.
-    repository_ctx.file(
-        "crosstool/error_gpu_disabled.bzl",
-        _DUMMY_CROSSTOOL_BZL_FILE,
-    )
-    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
 
 def _execute(
         repository_ctx,
@@ -1042,35 +1119,35 @@ def _execute(
         error_msg = None,
         error_details = None,
         empty_stdout_fine = False):
-    """Executes an arbitrary shell command.
+  """Executes an arbitrary shell command.
 
     Args:
       repository_ctx: the repository_ctx object
       cmdline: list of strings, the command to execute
       error_msg: string, a summary of the error if the command fails
       error_details: string, details about the error or steps to fix it
-      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
-        it's an error
-    Return:
-      the result of repository_ctx.execute(cmdline)
-    """
-    result = repository_ctx.execute(cmdline)
-    if result.stderr or not (empty_stdout_fine or result.stdout):
-        auto_configure_fail(
-            "\n".join([
-                error_msg.strip() if error_msg else "Repository command failed",
-                result.stderr.strip(),
-                error_details if error_details else "",
-            ]),
-        )
-    return result
+      empty_stdout_fine: bool, if True, an empty stdout result is fine,
+        otherwise it's an error
+    Return: the result of repository_ctx.execute(cmdline)
+  """
+  result = repository_ctx.execute(cmdline)
+  if result.stderr or not (empty_stdout_fine or result.stdout):
+    auto_configure_fail(
+        "\n".join([
+            error_msg.strip() if error_msg else "Repository command failed",
+            result.stderr.strip(),
+            error_details if error_details else "",
+        ]),)
+  return result
+
 
 def _norm_path(path):
-    """Returns a path with '/' and remove the trailing slash."""
-    path = path.replace("\\", "/")
-    if path[-1] == "/":
-        path = path[:-1]
-    return path
+  """Returns a path with '/' and remove the trailing slash."""
+  path = path.replace("\\", "/")
+  if path[-1] == "/":
+    path = path[:-1]
+  return path
+
 
 def symlink_genrule_for_dir(
         repository_ctx,
@@ -1079,167 +1156,174 @@ def symlink_genrule_for_dir(
         genrule_name,
         src_files = [],
         dest_files = []):
-    """Returns a genrule to symlink(or copy if on Windows) a set of files.
+  """Returns a genrule to symlink(or copy if on Windows) a set of files.
 
     If src_dir is passed, files will be read from the given directory; otherwise
     we assume files are in src_files and dest_files
     """
-    if src_dir != None:
-        src_dir = _norm_path(src_dir)
-        dest_dir = _norm_path(dest_dir)
-        files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
-
-        # Create a list with the src_dir stripped to use for outputs.
-        dest_files = files.replace(src_dir, "").splitlines()
-        src_files = files.splitlines()
-    command = []
-    if not _is_windows(repository_ctx):
-        # We clear folders that might have been generated previously to avoid
-        # undesired inclusions
-        command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
-        command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
-        command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
-        command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
-    outs = []
-    for i in range(len(dest_files)):
-        if dest_files[i] != "":
-            # If we have only one file to link we do not want to use the dest_dir, as
-            # $(@D) will include the full path to the file.
-            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
-
-            # Copy the headers to create a sandboxable setup.
-            cmd = "cp -f"
-            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
-            outs.append('        "' + dest_dir + dest_files[i] + '",')
-    genrule = _genrule(
-        src_dir,
-        genrule_name,
-        " && ".join(command),
-        "\n".join(outs),
-    )
-    return genrule
+  if src_dir != None:
+    src_dir = _norm_path(src_dir)
+    dest_dir = _norm_path(dest_dir)
+    files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
+
+    # Create a list with the src_dir stripped to use for outputs.
+    dest_files = files.replace(src_dir, "").splitlines()
+    src_files = files.splitlines()
+  command = []
+  if not _is_windows(repository_ctx):
+    # We clear folders that might have been generated previously to avoid
+    # undesired inclusions
+    command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi')
+    command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+    command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+    command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi')
+  outs = []
+  for i in range(len(dest_files)):
+    if dest_files[i] != "":
+      # If we have only one file to link we do not want to use the dest_dir, as
+      # $(@D) will include the full path to the file.
+      dest = "$(@D)/" + dest_dir + dest_files[i] if len(
+          dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+      # Copy the headers to create a sandboxable setup.
+      cmd = "cp -f"
+      command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+      outs.append('        "' + dest_dir + dest_files[i] + '",')
+  genrule = _genrule(
+      src_dir,
+      genrule_name,
+      " && ".join(command),
+      "\n".join(outs),
+  )
+  return genrule
+
 
 def _genrule(src_dir, genrule_name, command, outs):
-    """Returns a string with a genrule.
+  """Returns a string with a genrule.
 
     Genrule executes the given command and produces the given outputs.
     """
-    return (
-        "genrule(\n" +
-        '    name = "' +
-        genrule_name + '",\n' +
-        "    outs = [\n" +
-        outs +
-        "\n    ],\n" +
-        '    cmd = """\n' +
-        command +
-        '\n   """,\n' +
-        ")\n"
-    )
+  return (
+      "genrule(\n" + '    name = "' + genrule_name + '",\n' + "    outs = [\n" +
+      outs + "\n    ],\n" + '    cmd = """\n' + command + '\n   """,\n' + ")\n")
+
 
 def _read_dir(repository_ctx, src_dir):
-    """Returns a string with all files in a directory.
+  """Returns a string with all files in a directory.
 
     Finds all files inside a directory, traversing subfolders and following
     symlinks. The returned string contains the full path of all files
     separated by line breaks.
     """
-    if _is_windows(repository_ctx):
-        src_dir = src_dir.replace("/", "\\")
-        find_result = _execute(
-            repository_ctx,
-            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
-            empty_stdout_fine = True,
-        )
+  if _is_windows(repository_ctx):
+    src_dir = src_dir.replace("/", "\\")
+    find_result = _execute(
+        repository_ctx,
+        ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
+        empty_stdout_fine=True,
+    )
+
+    # src_files will be used in genrule.outs where the paths must
+    # use forward slashes.
+    result = find_result.stdout.replace("\\", "/")
+  else:
+    find_result = _execute(
+        repository_ctx,
+        ["find", src_dir, "-follow", "-type", "f"],
+        empty_stdout_fine=True,
+    )
+    result = find_result.stdout
+  return result
 
-        # src_files will be used in genrule.outs where the paths must
-        # use forward slashes.
-        result = find_result.stdout.replace("\\", "/")
-    else:
-        find_result = _execute(
-            repository_ctx,
-            ["find", src_dir, "-follow", "-type", "f"],
-            empty_stdout_fine = True,
-        )
-        result = find_result.stdout
-    return result
 
 def _flag_enabled(repository_ctx, flag_name):
-    if flag_name in repository_ctx.os.environ:
-        value = repository_ctx.os.environ[flag_name].strip()
-        return value == "1"
-    return False
+  if flag_name in repository_ctx.os.environ:
+    value = repository_ctx.os.environ[flag_name].strip()
+    return value == "1"
+  return False
+
 
 def _use_cuda_clang(repository_ctx):
-    return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+  return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
+
 
 def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
-    if _use_cuda_clang(repository_ctx):
-        capability_flags = ["--cuda-gpu-arch=sm_" +
-                            cap.replace(".", "") for cap in compute_capabilities]
-    else:
-        # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
-        capability_flags = []
-    return str(capability_flags)
+  if _use_cuda_clang(repository_ctx):
+    capability_flags = [
+        "--cuda-gpu-arch=sm_" + cap.replace(".", "")
+        for cap in compute_capabilities
+    ]
+  else:
+    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
+    # TODO(csigg): Make this consistent with cuda clang and pass to crosstool.
+    capability_flags = []
+  return str(capability_flags)
+
 
 def _create_local_cuda_repository(repository_ctx):
-    """Creates the repository containing files set up to build with CUDA."""
-    cuda_config = _get_cuda_config(repository_ctx)
+  """Creates the repository containing files set up to build with CUDA."""
+  cuda_config = _get_cuda_config(repository_ctx)
 
-    cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cuda_config.cudnn_install_basedir,
-    )
-    cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
-    nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
-
-    # Set up symbolic links for the cuda toolkit by creating genrules to do
-    # symlinking. We create one genrule for each directory we want to track under
-    # cuda_toolkit_path
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    genrules = [symlink_genrule_for_dir(
-        repository_ctx,
-        cuda_include_path,
-        "cuda/include",
-        "cuda-include",
-    )]
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        nvvm_libdevice_dir,
-        "cuda/nvvm/libdevice",
-        "cuda-nvvm",
-    ))
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        cupti_header_dir,
-        "cuda/extras/CUPTI/include",
-        "cuda-extras",
-    ))
-
-    cuda_libs = _find_libs(repository_ctx, cuda_config)
-    cuda_lib_src = []
-    cuda_lib_dest = []
-    for lib in cuda_libs.values():
-        cuda_lib_src.append(lib.path)
-        cuda_lib_dest.append("cuda/lib/" + lib.file_name)
-    genrules.append(symlink_genrule_for_dir(
-        repository_ctx,
-        None,
-        "",
-        "cuda-lib",
-        cuda_lib_src,
-        cuda_lib_dest,
-    ))
-
-    # Set up the symbolic links for cudnn if cndnn was not installed to
-    # CUDA_TOOLKIT_PATH.
-    included_files = _read_dir(repository_ctx, cuda_include_path).replace(
-        cuda_include_path,
-        "",
-    ).splitlines()
-    if "/cudnn.h" not in included_files:
-        genrules.append(symlink_genrule_for_dir(
+  cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
+  cudnn_header_dir = _find_cudnn_header_dir(
+      repository_ctx,
+      cuda_config.cudnn_install_basedir,
+  )
+  cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
+  nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
+
+  # Set up symbolic links for the cuda toolkit by creating genrules to do
+  # symlinking. We create one genrule for each directory we want to track under
+  # cuda_toolkit_path
+  cuda_toolkit_path = cuda_config.cuda_toolkit_path
+  genrules = [
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cuda_include_path,
+          "cuda/include",
+          "cuda-include",
+      )
+  ]
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          nvvm_libdevice_dir,
+          "cuda/nvvm/libdevice",
+          "cuda-nvvm",
+      ))
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          cupti_header_dir,
+          "cuda/extras/CUPTI/include",
+          "cuda-extras",
+      ))
+
+  cuda_libs = _find_libs(repository_ctx, cuda_config)
+  cuda_lib_src = []
+  cuda_lib_dest = []
+  for lib in cuda_libs.values():
+    cuda_lib_src.append(lib.path)
+    cuda_lib_dest.append("cuda/lib/" + lib.file_name)
+  genrules.append(
+      symlink_genrule_for_dir(
+          repository_ctx,
+          None,
+          "",
+          "cuda-lib",
+          cuda_lib_src,
+          cuda_lib_dest,
+      ))
+
+  # Set up the symbolic links for cudnn if cndnn was not installed to
+  # CUDA_TOOLKIT_PATH.
+  included_files = _read_dir(repository_ctx, cuda_include_path).replace(
+      cuda_include_path,
+      "",
+  ).splitlines()
+  if "/cudnn.h" not in included_files:
+    genrules.append(
+        symlink_genrule_for_dir(
             repository_ctx,
             None,
             "cuda/include/",
@@ -1247,204 +1331,229 @@ def _create_local_cuda_repository(repository_ctx):
             [cudnn_header_dir + "/cudnn.h"],
             ["cudnn.h"],
         ))
-    else:
-        genrules.append(
-            "filegroup(\n" +
-            '    name = "cudnn-include",\n' +
-            "    srcs = [],\n" +
-            ")\n",
-        )
-
-    # Set up BUILD file for cuda/
-    _tpl(
-        repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                cuda_config.compute_capabilities,
-            ),
-        },
-    )
-    _tpl(
-        repository_ctx,
-        "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
-        {
-            "%{cuda_driver_lib}": cuda_libs["cuda"].file_name,
-            "%{cudart_static_lib}": cuda_libs["cudart_static"].file_name,
-            "%{cudart_static_linkopt}": _cudart_static_linkopt(
-                cuda_config.cpu_value,
-            ),
-            "%{cudart_lib}": cuda_libs["cudart"].file_name,
-            "%{cublas_lib}": cuda_libs["cublas"].file_name,
-            "%{cusolver_lib}": cuda_libs["cusolver"].file_name,
-            "%{cudnn_lib}": cuda_libs["cudnn"].file_name,
-            "%{cufft_lib}": cuda_libs["cufft"].file_name,
-            "%{curand_lib}": cuda_libs["curand"].file_name,
-            "%{cupti_lib}": cuda_libs["cupti"].file_name,
-            "%{cuda_include_genrules}": "\n".join(genrules),
-            "%{cuda_headers}": ('":cuda-include",\n' +
-                                '        ":cudnn-include",'),
-        },
-        "cuda/BUILD",
-    )
-
-    is_cuda_clang = _use_cuda_clang(repository_ctx)
+  else:
+    genrules.append(
+        "filegroup(\n" + '    name = "cudnn-include",\n' + "    srcs = [],\n" +
+        ")\n",)
+
+  # Set up BUILD file for cuda/
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  cuda_config.compute_capabilities,
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD",
+      {
+          "%{cuda_driver_lib}":
+              cuda_libs["cuda"].file_name,
+          "%{cudart_static_lib}":
+              cuda_libs["cudart_static"].file_name,
+          "%{cudart_static_linkopt}":
+              _cudart_static_linkopt(cuda_config.cpu_value,),
+          "%{cudart_lib}":
+              cuda_libs["cudart"].file_name,
+          "%{cublas_lib}":
+              cuda_libs["cublas"].file_name,
+          "%{cusolver_lib}":
+              cuda_libs["cusolver"].file_name,
+          "%{cudnn_lib}":
+              cuda_libs["cudnn"].file_name,
+          "%{cufft_lib}":
+              cuda_libs["cufft"].file_name,
+          "%{curand_lib}":
+              cuda_libs["curand"].file_name,
+          "%{cupti_lib}":
+              cuda_libs["cupti"].file_name,
+          "%{cuda_include_genrules}":
+              "\n".join(genrules),
+          "%{cuda_headers}": ('":cuda-include",\n' + '        ":cudnn-include",'
+                             ),
+      },
+      "cuda/BUILD",
+  )
 
-    should_download_clang = is_cuda_clang and _flag_enabled(
-        repository_ctx,
-        _TF_DOWNLOAD_CLANG,
-    )
-    if should_download_clang:
-        download_clang(repository_ctx, "crosstool/extra_tools")
-
-    # Set up crosstool/
-    cc = find_cc(repository_ctx)
-    cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
-
-    host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
-    cuda_defines = {}
-    # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
-    # https://github.com/bazelbuild/bazel/issues/760).
-    # However, this stops our custom clang toolchain from picking the provided
-    # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
-    # toolchain.
-    # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
-    #       flag from the CROSSTOOL completely (see
-    #       https://github.com/bazelbuild/bazel/issues/5634)
-    if should_download_clang:
-      cuda_defines["%{linker_bin_path_flag}"] = ""
-    else:
-      cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+  is_cuda_clang = _use_cuda_clang(repository_ctx)
 
-    if is_cuda_clang:
-        cuda_defines["%{host_compiler_path}"] = str(cc)
-        cuda_defines["%{host_compiler_warnings}"] = """
+  should_download_clang = is_cuda_clang and _flag_enabled(
+      repository_ctx,
+      _TF_DOWNLOAD_CLANG,
+  )
+  if should_download_clang:
+    download_clang(repository_ctx, "crosstool/extra_tools")
+
+  # Set up crosstool/
+  cc = find_cc(repository_ctx)
+  cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
+
+  host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath)
+  cuda_defines = {}
+  # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
+  # https://github.com/bazelbuild/bazel/issues/760).
+  # However, this stops our custom clang toolchain from picking the provided
+  # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
+  # toolchain.
+  # TODO: when bazel stops adding '-B/usr/bin' by default, remove this
+  #       flag from the CROSSTOOL completely (see
+  #       https://github.com/bazelbuild/bazel/issues/5634)
+  if should_download_clang:
+    cuda_defines["%{linker_bin_path_flag}"] = ""
+  else:
+    cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"'
+
+  if is_cuda_clang:
+    cuda_defines["%{host_compiler_path}"] = str(cc)
+    cuda_defines["%{host_compiler_warnings}"] = """
         # Some parts of the codebase set -Werror and hit this warning, so
         # switch it off for now.
         flag: "-Wno-invalid-partial-specialization"
     """
-        cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
-        _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
-        repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
-        repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
-    else:
-        cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
-        cuda_defines["%{host_compiler_warnings}"] = ""
-
-        # nvcc has the system include paths built in and will automatically
-        # search them; we cannot work around that, so we add the relevant cuda
-        # system paths to the allowed compiler specific include paths.
-        cuda_defines["%{host_compiler_includes}"] = (
-            host_compiler_includes + "\n" +
-            _cuda_include_path(repository_ctx, cuda_config) +
-            "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
-            "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
-        nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" %
-                                            (
-                                                cuda_config.cuda_toolkit_path,
-                                                ".exe" if _is_windows(repository_ctx) else "",
-                                            )))
-        _tpl(
-            repository_ctx,
-            "crosstool:BUILD",
-            {
-                "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
-                "%{win_linker_files}": ":windows_msvc_wrapper_files",
-            },
-        )
-        wrapper_defines = {
-            "%{cpu_compiler}": str(cc),
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{nvcc_path}": nvcc_path,
-            "%{gcc_host_compiler_path}": str(cc),
-            "%{cuda_compute_capabilities}": ", ".join(
-                ["\"%s\"" % c for c in cuda_config.compute_capabilities],
-            ),
-            "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
-        }
-        _tpl(
-            repository_ctx,
-            "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.py",
-            wrapper_defines,
-        )
-        _tpl(
-            repository_ctx,
-            "crosstool:windows/msvc_wrapper_for_nvcc.bat",
-            {
-                "%{python_binary}": _get_python_bin(repository_ctx),
-            },
-        )
-
+    cuda_defines["%{host_compiler_includes}"] = host_compiler_includes
+    _tpl(repository_ctx, "crosstool:BUILD", {
+        "%{linker_files}": ":empty",
+        "%{win_linker_files}": ":empty"
+    })
+    repository_ctx.file(
+        "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
+    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
+  else:
+    cuda_defines[
+        "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
+    cuda_defines["%{host_compiler_warnings}"] = ""
+
+    # nvcc has the system include paths built in and will automatically
+    # search them; we cannot work around that, so we add the relevant cuda
+    # system paths to the allowed compiler specific include paths.
+    cuda_defines["%{host_compiler_includes}"] = (
+        host_compiler_includes + "\n" + _cuda_include_path(
+            repository_ctx, cuda_config) +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
+        "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir)
+    nvcc_path = str(
+        repository_ctx.path("%s/bin/nvcc%s" % (
+            cuda_config.cuda_toolkit_path,
+            ".exe" if _is_windows(repository_ctx) else "",
+        )))
     _tpl(
         repository_ctx,
-        "crosstool:CROSSTOOL",
-        cuda_defines + _get_win_cuda_defines(repository_ctx),
-        out = "crosstool/CROSSTOOL",
+        "crosstool:BUILD",
+        {
+            "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc",
+            "%{win_linker_files}": ":windows_msvc_wrapper_files",
+        },
     )
-
-    # Set up cuda_config.h, which is used by
-    # tensorflow/stream_executor/dso_loader.cc.
+    wrapper_defines = {
+        "%{cpu_compiler}":
+            str(cc),
+        "%{cuda_version}":
+            cuda_config.cuda_version,
+        "%{nvcc_path}":
+            nvcc_path,
+        "%{gcc_host_compiler_path}":
+            str(cc),
+        "%{cuda_compute_capabilities}":
+            ", ".join(
+                ["\"%s\"" % c for c in cuda_config.compute_capabilities],),
+        "%{nvcc_tmp_dir}":
+            _get_nvcc_tmp_dir_for_windows(repository_ctx),
+    }
     _tpl(
         repository_ctx,
-        "cuda:cuda_config.h",
-        {
-            "%{cuda_version}": cuda_config.cuda_version,
-            "%{cudnn_version}": cuda_config.cudnn_version,
-            "%{cuda_compute_capabilities}": ",".join(
-                [
-                    "CudaVersion(\"%s\")" % c
-                    for c in cuda_config.compute_capabilities
-                ],
-            ),
-            "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
-        },
-        "cuda/cuda/cuda_config.h",
+        "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+        wrapper_defines,
     )
-
-def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
-    """Creates pointers to a remotely configured repo set up to build with CUDA."""
     _tpl(
         repository_ctx,
-        "cuda:build_defs.bzl",
-        {
-            "%{cuda_is_configured}": "True",
-            "%{cuda_extra_copts}": _compute_cuda_extra_copts(
-                repository_ctx,
-                _compute_capabilities(repository_ctx),
-            ),
-        },
+        "crosstool:windows/msvc_wrapper_for_nvcc.py",
+        wrapper_defines,
     )
     _tpl(
         repository_ctx,
-        "cuda:remote.BUILD",
+        "crosstool:windows/msvc_wrapper_for_nvcc.bat",
         {
-            "%{remote_cuda_repo}": remote_config_repo,
+            "%{python_binary}": _get_python_bin(repository_ctx),
         },
-        "cuda/BUILD",
     )
-    _tpl(repository_ctx, "crosstool:remote.BUILD", {
-        "%{remote_cuda_repo}": remote_config_repo,
-    }, "crosstool/BUILD")
+
+  _tpl(
+      repository_ctx,
+      "crosstool:CROSSTOOL",
+      cuda_defines + _get_win_cuda_defines(repository_ctx),
+      out="crosstool/CROSSTOOL",
+  )
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(
+      repository_ctx,
+      "cuda:cuda_config.h",
+      {
+          "%{cuda_version}":
+              cuda_config.cuda_version,
+          "%{cudnn_version}":
+              cuda_config.cudnn_version,
+          "%{cuda_compute_capabilities}":
+              ",".join([
+                  "CudaVersion(\"%s\")" % c
+                  for c in cuda_config.compute_capabilities
+              ],),
+          "%{cuda_toolkit_path}":
+              cuda_config.cuda_toolkit_path,
+      },
+      "cuda/cuda/cuda_config.h",
+  )
+
+
+def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
+  """Creates pointers to a remotely configured repo set up to build with CUDA."""
+  _tpl(
+      repository_ctx,
+      "cuda:build_defs.bzl",
+      {
+          "%{cuda_is_configured}":
+              "True",
+          "%{cuda_extra_copts}":
+              _compute_cuda_extra_copts(
+                  repository_ctx,
+                  compute_capabilities(repository_ctx),
+              ),
+      },
+  )
+  _tpl(
+      repository_ctx,
+      "cuda:remote.BUILD",
+      {
+          "%{remote_cuda_repo}": remote_config_repo,
+      },
+      "cuda/BUILD",
+  )
+  _tpl(repository_ctx, "crosstool:remote.BUILD", {
+      "%{remote_cuda_repo}": remote_config_repo,
+  }, "crosstool/BUILD")
+
 
 def _cuda_autoconf_impl(repository_ctx):
-    """Implementation of the cuda_autoconf repository rule."""
-    if not _enable_cuda(repository_ctx):
-        _create_dummy_repository(repository_ctx)
-    elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
-        _create_remote_cuda_repository(
-            repository_ctx,
-            repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
-        )
-    else:
-        _create_local_cuda_repository(repository_ctx)
+  """Implementation of the cuda_autoconf repository rule."""
+  if not _enable_cuda(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ:
+    _create_remote_cuda_repository(
+        repository_ctx,
+        repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO],
+    )
+  else:
+    _create_local_cuda_repository(repository_ctx)
+
 
 cuda_configure = repository_rule(
     implementation = _cuda_autoconf_impl,
diff --git a/third_party/nccl/LICENSE b/third_party/nccl/LICENSE
index 146d9b765c..b958518186 100644
--- a/third_party/nccl/LICENSE
+++ b/third_party/nccl/LICENSE
@@ -1,203 +1,30 @@
-Copyright 2018 The TensorFlow Authors.  All rights reserved.
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2018, The TensorFlow Authors.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+ Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+  * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
+    Laboratory, the U.S. Department of Energy, nor the names of their
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ The U.S. Department of Energy funded the development of this software
+ under subcontract 7078610 with Lawrence Berkeley National Laboratory.
diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD
new file mode 100644
index 0000000000..f57f04c75e
--- /dev/null
+++ b/third_party/nccl/archive.BUILD
@@ -0,0 +1,179 @@
+# NVIDIA NCCL 2
+# A package of optimized primitives for collective multi-GPU communication.
+
+licenses(["restricted"])
+
+exports_files(["LICENSE.txt"])
+
+load(
+    "@local_config_nccl//:build_defs.bzl",
+    "device_link",
+    "gen_nccl_h",
+    "nccl_library",
+    "rdc_copts",
+)
+load(
+    "@local_config_cuda//cuda:build_defs.bzl",
+    "cuda_default_copts",
+)
+
+# Generate the nccl.h header file.
+gen_nccl_h(
+    name = "nccl_h",
+    output = "src/nccl.h",
+    template = "src/nccl.h.in",
+)
+
+nccl_library(
+    name = "src_hdrs",
+    hdrs = [
+        "src/nccl.h",
+        # src/include/common_coll.h #includes "collectives/collectives.h".
+        # All other #includes of collectives.h are patched in process_srcs.
+        "src/collectives/collectives.h",
+    ],
+    strip_include_prefix = "src",
+)
+
+nccl_library(
+    name = "include_hdrs",
+    hdrs = glob(["src/include/*.h"]),
+    strip_include_prefix = "src/include",
+)
+
+filegroup(
+    name = "device_hdrs",
+    srcs = glob(["src/collectives/device/*.h"]),
+)
+
+filegroup(
+    name = "device_srcs",
+    srcs = [
+        "src/collectives/device/all_gather.cu",
+        "src/collectives/device/all_reduce.cu",
+        "src/collectives/device/broadcast.cu",
+        "src/collectives/device/reduce.cu",
+        "src/collectives/device/reduce_scatter.cu",
+    ],
+)
+
+nccl_library(
+    name = "sum",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=0"] + rdc_copts(),
+    prefix = "sum_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "prod",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=1"] + rdc_copts(),
+    prefix = "_prod",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "min",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=2"] + rdc_copts(),
+    prefix = "min_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "max",
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=3"] + rdc_copts(),
+    prefix = "max_",
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+nccl_library(
+    name = "functions",
+    srcs = [
+        ":device_hdrs",
+        "src/collectives/device/functions.cu",
+    ],
+    copts = rdc_copts(),
+    deps = [
+        ":src_hdrs",
+        ":include_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+    linkstatic = True,
+)
+
+device_link(
+    name = "device_code",
+    srcs = [
+        ":functions",
+        ":max",
+        ":min",
+        ":prod",
+        ":sum",
+    ],
+)
+
+# Primary NCCL target.
+nccl_library(
+    name = "nccl",
+    srcs = glob(
+        include = ["src/**/*.cu"],
+        # Exclude device-library code.
+        exclude = ["src/collectives/device/**"],
+    ) + [
+        # Required for header inclusion checking (see
+        # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
+        # Files in src/ which #include "nccl.h" load it from there rather than
+        # from the virtual includes directory.
+        "src/nccl.h",
+    ],
+    hdrs = ["src/nccl.h"],
+    include_prefix = "third_party/nccl",
+    strip_include_prefix = "src",
+    copts = cuda_default_copts(),
+    deps = [
+        ":device_code",
+        ":functions",
+        ":include_hdrs",
+        ":max",
+        ":min",
+        ":prod",
+        ":src_hdrs",
+        ":sum",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
new file mode 100644
index 0000000000..ede1d3dad5
--- /dev/null
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -0,0 +1,210 @@
+"""Repository rule for NCCL."""
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
+
+def _gen_nccl_h_impl(ctx):
+    """Creates nccl.h from a template."""
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "${nccl:Major}": "2",
+            "${nccl:Minor}": "3",
+            "${nccl:Patch}": "5",
+            "${nccl:Suffix}": "",
+            "${nccl:Version}": "2305",
+        },
+    )
+gen_nccl_h = rule(
+    implementation = _gen_nccl_h_impl,
+    attrs = {
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Creates the NCCL header file."""
+
+
+def _process_srcs_impl(ctx):
+    """Appends .cc to .cu files, patches include directives."""
+    files = []
+    for src in ctx.files.srcs:
+        if not src.is_source:
+          # Process only once, specifically "src/nccl.h".
+          files.append(src)
+          continue
+        name = src.basename
+        if src.extension == "cu":
+            name = ctx.attr.prefix + name + ".cc"
+        file = ctx.actions.declare_file(name, sibling = src)
+        ctx.actions.expand_template(
+            output = file,
+            template = src,
+            substitutions = {
+                "\"collectives.h": "\"collectives/collectives.h",
+                "\"../collectives.h": "\"collectives/collectives.h",
+                "#if __CUDACC_VER_MAJOR__":
+                    "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__",
+                # Substitutions are applied in order.
+                "std::nullptr_t": "nullptr_t",
+                "nullptr_t": "std::nullptr_t",
+            },
+        )
+        files.append(file)
+    return [DefaultInfo(files = depset(files))]
+_process_srcs = rule(
+    implementation = _process_srcs_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "prefix": attr.string(default = ""),
+    },
+)
+"""Processes the NCCL srcs so they can be compiled with bazel and clang."""
+
+
+def nccl_library(name, srcs=None, hdrs=None, prefix=None, **kwargs):
+    """Processes the srcs and hdrs and creates a cc_library."""
+
+    _process_srcs(
+        name = name + "_srcs",
+        srcs = srcs,
+        prefix = prefix,
+    )
+    _process_srcs(
+        name = name + "_hdrs",
+        srcs = hdrs,
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + "_srcs"] if srcs else [],
+        hdrs = [name + "_hdrs"] if hdrs else [],
+        **kwargs
+    )
+
+
+def rdc_copts():
+    """Returns copts for compiling relocatable device code."""
+
+    # The global functions can not have a lower register count than the
+    # device functions. This is enforced by setting a fixed register count.
+    # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
+    maxrregcount = "-maxrregcount=96"
+
+    return cuda_default_copts() + select({
+          "@local_config_cuda//cuda:using_nvcc": [
+              "-nvcc_options",
+              "relocatable-device-code=true",
+              "-nvcc_options",
+              "ptxas-options=" + maxrregcount,
+          ],
+          "@local_config_cuda//cuda:using_clang": [
+              "-fcuda-rdc",
+              "-Xcuda-ptxas",
+              maxrregcount,
+          ],
+          "//conditions:default": [],
+      }) + ["-fvisibility=hidden"]
+
+
+def _filter_impl(ctx):
+    suffix = ctx.attr.suffix
+    files = [src for src in ctx.files.srcs if src.path.endswith(suffix)]
+    return [DefaultInfo(files = depset(files))]
+_filter = rule(
+    implementation = _filter_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "suffix": attr.string(),
+    },
+)
+"""Filters the srcs to the ones ending with suffix."""
+
+
+def _gen_link_src_impl(ctx):
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
+        substitutions = {
+            "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path,
+            "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path,
+        },
+    )
+_gen_link_src = rule(
+    implementation = _gen_link_src_impl,
+    attrs = {
+        "register_hdr": attr.label(allow_single_file = True),
+        "fatbin_hdr": attr.label(allow_single_file = True),
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
+    },
+)
+"""Patches the include directives for the link.stub file."""
+
+
+def device_link(name, srcs):
+    """Links seperately compiled relocatable device code into a cc_library."""
+
+    # From .a and .pic.a archives, just use the latter.
+    _filter(
+        name = name + "_pic_a",
+        srcs = srcs,
+        suffix = ".pic.a",
+    )
+
+    # Device-link to cubins for each architecture.
+    images = []
+    cubins = []
+    for arch in %{gpu_architectures}:
+        cubin = "%s_%s.cubin" % (name, arch)
+        register_hdr = "%s_%s.h" % (name, arch)
+        nvlink = "@local_config_nccl//:nvlink"
+        cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink +
+            "--arch=%s $(SRCS) " % arch +
+            "--register-link-binaries=$(location %s) " % register_hdr +
+            "--output-file=$(location %s)" % cubin)
+        native.genrule(
+            name = "%s_%s" % (name, arch),
+            outs = [register_hdr, cubin],
+            srcs = [name + "_pic_a"],
+            cmd = cmd,
+            tools = [nvlink],
+        )
+        images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin))
+        cubins.append(cubin)
+
+    # Generate fatbin header from all cubins.
+    fatbin_hdr = name + ".fatbin.h"
+    fatbinary = "@local_config_nccl//:cuda/bin/fatbinary"
+    cmd = ("PATH=$$CUDA_TOOLKIT_PATH/bin:$$PATH " + # for bin2c
+          "$(location %s) -64 --cmdline=--compile-only --link " % fatbinary +
+          "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) +
+          "--embedded-fatbin=$@")
+    native.genrule(
+        name = name + "_fatbin_h",
+        outs = [fatbin_hdr],
+        srcs = cubins,
+        cmd = cmd,
+        tools = [fatbinary],
+    )
+
+    # Generate the source file #including the headers generated above.
+    _gen_link_src(
+        name = name + "_cc",
+        # Include just the last one, they are equivalent.
+        register_hdr = register_hdr,
+        fatbin_hdr = fatbin_hdr,
+        template = "@local_config_nccl//:cuda/bin/crt/link.stub",
+        output = name + ".cc",
+    )
+
+    # Compile the source file into the cc_library.
+    native.cc_library(
+        name = name,
+        srcs = [name + "_cc"],
+        textual_hdrs = [register_hdr, fatbin_hdr],
+        deps = [
+            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cudart_static",
+        ],
+    )
diff --git a/third_party/nccl/nccl_archive.BUILD b/third_party/nccl/nccl_archive.BUILD
deleted file mode 100644
index a05899e38d..0000000000
--- a/third_party/nccl/nccl_archive.BUILD
+++ /dev/null
@@ -1,68 +0,0 @@
-# NVIDIA nccl
-# A package of optimized primitives for collective multi-GPU communication.
-
-licenses(["notice"])  # BSD
-
-exports_files(["LICENSE.txt"])
-
-load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
-
-SRCS = [
-    "src/all_gather.cu",
-    "src/all_reduce.cu",
-    "src/broadcast.cu",
-    "src/core.cu",
-    "src/libwrap.cu",
-    "src/reduce.cu",
-    "src/reduce_scatter.cu",
-]
-
-# Copy .cu to .cu.cc so they can be in srcs of cc_library.
-[
-    genrule(
-        name = "gen_" + src,
-        srcs = [src],
-        outs = [src + ".cc"],
-        cmd = "cp $(location " + src + ") $(location " + src + ".cc)",
-    )
-    for src in SRCS
-]
-
-SRCS_CU_CC = [src + ".cc" for src in SRCS]
-
-cc_library(
-    name = "nccl",
-    srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])),
-    hdrs = if_cuda(["src/nccl.h"]),
-    copts = [
-        "-DCUDA_MAJOR=0",
-        "-DCUDA_MINOR=0",
-        "-DNCCL_MAJOR=0",
-        "-DNCCL_MINOR=0",
-        "-DNCCL_PATCH=0",
-        "-Iexternal/nccl_archive/src",
-        "-O3",
-    ] + cuda_default_copts(),
-    include_prefix = "third_party/nccl",
-    linkopts = select({
-        "@org_tensorflow//tensorflow:android": [
-            "-pie",
-        ],
-        "@org_tensorflow//tensorflow:darwin": [
-            "-Wl,-framework",
-            "-Wl,CoreFoundation",
-            "-Wl,-framework",
-            "-Wl,Security",
-        ],
-        "@org_tensorflow//tensorflow:ios": [],
-        "@org_tensorflow//tensorflow:windows": [
-            "-DEFAULTLIB:ws2_32.lib",
-        ],
-        "//conditions:default": [
-            "-lrt",
-        ],
-    }),
-    strip_include_prefix = "src",
-    visibility = ["//visibility:public"],
-    deps = ["@local_config_cuda//cuda:cuda_headers"],
-)
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index d78fe8f3aa..7f00df0962 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -11,12 +11,16 @@
 load(
     "//third_party/gpus:cuda_configure.bzl",
     "auto_configure_fail",
+    "compute_capabilities",
+    "cuda_toolkit_path",
     "find_cuda_define",
     "matches_version",
 )
 
-_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 _NCCL_HDR_PATH = "NCCL_HDR_PATH"
+_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
+_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
 _TF_NCCL_VERSION = "TF_NCCL_VERSION"
 _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
 
@@ -37,6 +41,12 @@ cc_library(
 """
 
 _NCCL_ARCHIVE_BUILD_CONTENT = """
+exports_files([
+    "cuda/bin/crt/link.stub",
+    "cuda/bin/fatbinary",
+    "nvlink",
+])
+
 filegroup(
   name = "LICENSE",
   data = ["@nccl_archive//:LICENSE.txt"],
@@ -50,113 +60,125 @@ alias(
 )
 """
 
-# Local build results in dynamic link and the license should not be included.
-_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl")
-_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl")
+def _label(file):
+    return Label("//third_party/nccl:{}".format(file))
 
 def _find_nccl_header(repository_ctx, nccl_install_path):
-  """Finds the NCCL header on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
+    """Finds the NCCL header on the system.
 
-  Returns:
-    The path to the NCCL header.
-  """
-  header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
-  if not header_path.exists:
-    auto_configure_fail("Cannot find %s" % str(header_path))
-  return header_path
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
 
+    Returns:
+      The path to the NCCL header.
+    """
+    header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
+    if not header_path.exists:
+        auto_configure_fail("Cannot find %s" % str(header_path))
+    return header_path
 
 def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
-  """Checks whether the header file matches the specified version of NCCL.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library install directory.
-    nccl_version: The expected NCCL version.
-
-  Returns:
-    A string containing the library version of NCCL.
-  """
-  header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
-  if not header_path.exists:
-    header_path = _find_nccl_header(repository_ctx, nccl_install_path)
-  header_dir = str(header_path.realpath.dirname)
-  major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MAJOR)
-  minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_MINOR)
-  patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h",
-                                   _DEFINE_NCCL_PATCH)
-  header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-  if not matches_version(nccl_version, header_version):
-    auto_configure_fail(
-        ("NCCL library version detected from %s/nccl.h (%s) does not match " +
-         "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
-        (header_dir, header_version, nccl_version))
-
-
-def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version):
-  """Finds the given NCCL library on the system.
-
-  Args:
-    repository_ctx: The repository context.
-    nccl_install_path: The NCCL library installation directory.
-    nccl_version: The version of NCCL library files as returned
-      by _nccl_version.
-
-  Returns:
-    The path to the NCCL library.
-  """
-  lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path,
-                                                           nccl_version))
-  if not lib_path.exists:
-    auto_configure_fail("Cannot find NCCL library %s" % str(lib_path))
-  return lib_path
-
+    """Checks whether the header file matches the specified version of NCCL.
+
+    Args:
+      repository_ctx: The repository context.
+      nccl_install_path: The NCCL library install directory.
+      nccl_hdr_path: The NCCL header path.
+      nccl_version: The expected NCCL version.
+
+    Returns:
+      A string containing the library version of NCCL.
+    """
+    header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
+    if not header_path.exists:
+        header_path = _find_nccl_header(repository_ctx, nccl_install_path)
+    header_dir = str(header_path.realpath.dirname)
+    major_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MAJOR,
+    )
+    minor_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_MINOR,
+    )
+    patch_version = find_cuda_define(
+        repository_ctx,
+        header_dir,
+        "nccl.h",
+        _DEFINE_NCCL_PATCH,
+    )
+    header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
+    if not matches_version(nccl_version, header_version):
+        auto_configure_fail(
+            ("NCCL library version detected from %s/nccl.h (%s) does not match " +
+             "TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
+            (header_dir, header_version, nccl_version),
+        )
 
 def _nccl_configure_impl(repository_ctx):
-  """Implementation of the nccl_configure repository rule."""
-  if _TF_NCCL_VERSION not in repository_ctx.os.environ:
-    # Add a dummy build file to make bazel query happy.
-    repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
-    return
-
-  if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
-    # Forward to the pre-configured remote repository.
-    repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, {
-        "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
-    })
-    return
-
-  nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
-  if matches_version("1", nccl_version):
-    # Alias to GitHub target from @nccl_archive.
-    if not matches_version(nccl_version, "1.3"):
-      auto_configure_fail(
-          "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version)
-    repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
-  else:
-    # Create target for locally installed NCCL.
-    nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-    nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
-    _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
-    repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
-        "%{version}": nccl_version,
-        "%{install_path}": nccl_install_path,
-        "%{hdr_path}": nccl_hdr_path,
-    })
-
+    """Implementation of the nccl_configure repository rule."""
+    if _TF_NCCL_VERSION not in repository_ctx.os.environ:
+        # Add a dummy build file to make bazel query happy.
+        repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
+        return
+
+    if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
+        # Forward to the pre-configured remote repository.
+        repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), {
+            "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
+        })
+        return
+
+    nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
+    if nccl_version == "":
+        # Alias to open source build from @nccl_archive.
+        repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
+
+        # TODO(csigg): implement and reuse in cuda_configure.bzl.
+        gpu_architectures = [
+            "sm_" + capability.replace(".", "")
+            for capability in compute_capabilities(repository_ctx)
+        ]
+
+        # Round-about way to make the list unique.
+        gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
+        repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), {
+            "%{gpu_architectures}": str(gpu_architectures),
+        })
+
+        repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda")
+
+        # Temporary work-around for setups which symlink ptxas to a newer
+        # version. The versions of nvlink and ptxas need to agree, so we find
+        # nvlink next to the real location of ptxas. This is only temporary and
+        # will be removed again soon.
+        nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname
+        repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink")
+    else:
+        # Create target for locally installed NCCL.
+        nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
+        nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
+        _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
+        repository_ctx.template("BUILD", _label("system.BUILD.tpl"), {
+            "%{version}": nccl_version,
+            "%{install_path}": nccl_install_path,
+            "%{hdr_path}": nccl_hdr_path,
+        })
 
 nccl_configure = repository_rule(
-    implementation=_nccl_configure_impl,
-    environ=[
-        _NCCL_INSTALL_PATH,
+    implementation = _nccl_configure_impl,
+    environ = [
+        _CUDA_TOOLKIT_PATH,
         _NCCL_HDR_PATH,
+        _NCCL_INSTALL_PATH,
         _TF_NCCL_VERSION,
+        _TF_CUDA_COMPUTE_CAPABILITIES,
+        _TF_NCCL_CONFIG_REPO,
     ],
 )
 """Detects and configures the NCCL configuration.
-- 
GitLab


From d258207f1583df4faa452265b051879af6c15dac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 08:55:53 -0700
Subject: [PATCH 0422/1085] BEGIN_PUBLIC Automated rollback of PR #21945
 END_PUBLIC Automated rollback of commit
 863f61412fcc654840c6b67473b742ea4e5e964e. Revert #21945.

PiperOrigin-RevId: 215913175
---
 tensorflow/python/ops/array_ops.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index e3e4d5f910..4be9c532f4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1407,13 +1407,8 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      a = ops.convert_to_tensor(a, name="a")
-      if not a.get_shape().ndims:
-        rank = gen_array_ops.rank(a)
-        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
-      else:
-        rank = a.get_shape().ndims
-        perm = (rank - 1) - np.arange(rank)
+      rank = gen_array_ops.rank(a)
+      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From 5a43e01ef0f8cb86d836a4d1c08a246630e26f8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 09:29:00 -0700
Subject: [PATCH 0423/1085] Update XlaSort to match the underlying HLO.

PiperOrigin-RevId: 215917470
---
 tensorflow/compiler/tests/sort_ops_test.py    | 18 ++++++++++++++-
 .../compiler/tf2xla/kernels/sort_ops.cc       | 17 +++++++++++++-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     | 23 ++++++++++++++++++-
 tensorflow/compiler/tf2xla/python/xla.py      | 12 ++++++----
 .../compiler/xla/service/hlo_verifier.cc      |  2 +-
 5 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py
index dbf4beb693..57f0ab7a9e 100644
--- a/tensorflow/compiler/tests/sort_ops_test.py
+++ b/tensorflow/compiler/tests/sort_ops_test.py
@@ -48,13 +48,29 @@ class XlaSortOpTest(xla_test.XLATestCase):
         self.assertAllClose(v, result, rtol=1e-3)
 
   def testSort(self):
-    supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32])
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
     for dtype in supported_types.intersection(self.numeric_types):
       x = np.arange(101, dtype=dtype)
       np.random.shuffle(x)
       self._assertOpOutputMatchesExpected(
           xla.sort, [x], expected=[np.arange(101, dtype=dtype)])
 
+  def testKeyValueSort(self):
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
+    for key_type in supported_types.intersection(self.numeric_types):
+      for value_type in supported_types.intersection(self.numeric_types):
+        x = np.arange(101, dtype=key_type)
+        np.random.shuffle(x)
+        y = (-x).astype(value_type)
+        self._assertOpOutputMatchesExpected(
+            xla.key_value_sort, [x, y],
+            expected=[
+                np.arange(101, dtype=key_type),
+                -np.arange(101, dtype=value_type)
+            ])
+
   def testTopK(self):
     supported_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
index aaeeae01cc..45f03d8c21 100644
--- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
@@ -25,11 +25,26 @@ class XlaSortOp : public XlaOpKernel {
   explicit XlaSortOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
 
   void Compile(XlaOpKernelContext* context) override {
-    context->SetOutput(0, xla::Sort(context->Input(0)));
+    context->SetOutput(0, xla::Sort(context->Input("input")));
   }
 };
 
 REGISTER_XLA_OP(Name("XlaSort"), XlaSortOp);
 
+class XlaKeyValueSortOp : public XlaOpKernel {
+ public:
+  explicit XlaKeyValueSortOp(OpKernelConstruction* context)
+      : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    xla::XlaOp result =
+        xla::Sort(context->Input("keys"), context->Input("values"));
+    context->SetOutput(0, xla::GetTupleElement(result, 0));
+    context->SetOutput(1, xla::GetTupleElement(result, 1));
+  }
+};
+
+REGISTER_XLA_OP(Name("XlaKeyValueSort"), XlaKeyValueSortOp);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 733eeed3c6..557911553d 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -354,12 +354,33 @@ Wraps the XLA Sort operator, documented at
  https://www.tensorflow.org/performance/xla/operation_semantics#sort
 .
 
-Sorts a tensor. Currently only rank 1 sorts in ascending order are supported.
+Sorts a tensor. Currently only sorts in ascending order are supported.
 
 input: A `Tensor` of type T.
 output: A `Tensor` of type T.
 )doc");
 
+REGISTER_OP("XlaKeyValueSort")
+    .Input("keys: K")
+    .Input("values: V")
+    .Output("sorted_keys: K")
+    .Output("sorted_values: V")
+    .Attr("K: realnumbertype")
+    .Attr("V: type")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Wraps the XLA Sort operator, documented at
+ https://www.tensorflow.org/performance/xla/operation_semantics#sort
+.
+
+Sorts a tensor. Currently only sorts in ascending order are supported.
+
+keys: A `Tensor` of type K.
+values: A `Tensor` of type V.
+sorted_keys: A `Tensor` of type K.
+sorted_values: A `Tensor` of type V.
+)doc");
+
 // TODO(b/37549631) setting the While Op to always be stateful is too
 // conservative.
 REGISTER_OP("XlaWhile")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 27dd18a9bb..bc7924c371 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -212,9 +212,9 @@ bitcast_convert_type = array_ops.bitcast
 
 def broadcast(x, dims, name=None):
   x = ops.convert_to_tensor(x)
-  shape = array_ops.concat(
-      [constant_op.constant(dims),
-       array_ops.shape(x)], axis=0)
+  shape = array_ops.concat([constant_op.constant(dims),
+                            array_ops.shape(x)],
+                           axis=0)
   return array_ops.broadcast_to(x, shape, name=name)
 
 
@@ -332,12 +332,13 @@ def reduce_window(operand,
     init: a scalar tensor representing the initial value for the reduction
     reducer: a reduction function that combines a pair of scalars.
     window_dimensions: shape of the window, as a list of integers
-    window_strides: inter-window strides, as a list of integers. Optional;
-      if omitted, defaults to strides of 1.
+    window_strides: inter-window strides, as a list of integers. Optional; if
+      omitted, defaults to strides of 1.
     padding: padding to apply to 'operand'. List of (low, high) pairs of
       integers that specify the padding to apply before and after each
       dimension. Optional; if omitted, defaults to no padding.
     name: the operator name, or None.
+
   Returns:
     A tensor that represents the output of the reduce_window operator.
   """
@@ -377,4 +378,5 @@ def slice(x, start_dims, limit_dims, strides):
 
 
 sort = gen_xla_ops.xla_sort
+key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index b5498bb936..c22ee03388 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -548,6 +548,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
     case HloOpcode::kTupleSelect:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
+    case HloOpcode::kSort:
     case HloOpcode::kTuple:
     case HloOpcode::kWhile:
       break;
@@ -1153,7 +1154,6 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-
   for (auto* computation : module->computations()) {
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
-- 
GitLab


From 8b7c789e7401fe56b4f648a04f675a3cb69119e5 Mon Sep 17 00:00:00 2001
From: Jing Li <jingli@google.com>
Date: Fri, 5 Oct 2018 09:54:40 -0700
Subject: [PATCH 0424/1085] - Don't set tpu optimizer parameter variable during
 weight initialization if the optimizer isn't set, e.g. loading weights and
 then predict. - Add load_weights for `KerasTpuModel`.

PiperOrigin-RevId: 215920993
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index a3a7fd8bb0..af183b3232 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -1998,6 +1998,9 @@ class KerasTPUModel(models.Model):
 
     logging.info('Setting weights on TPU model.')
     cloned_model.set_weights(weights)
+    if self._tpu_model.optimizer is None:
+      # tpu_model may not be compiled, e.g., loading weights and then predict.
+      return
     for k, v in six.iteritems(cpu_optimizer_config):
       opt_var = getattr(self._tpu_model.optimizer, k)
       if isinstance(opt_var, variables.Variable):
@@ -2052,6 +2055,10 @@ class KerasTPUModel(models.Model):
     self._cpu_model.set_weights(weights)
     self._tpu_weights_initialized = False
 
+  def load_weights(self, filepath, by_name=False):
+    self._cpu_model.load_weights(filepath, by_name)
+    self._tpu_weights_initialized = False
+
 
 # pylint: disable=bad-continuation
 def _validate_shapes(model):
-- 
GitLab


From d493a7f2fdbbc29a292741135f4c1598352e876b Mon Sep 17 00:00:00 2001
From: Mingsheng Hong <hongm@google.com>
Date: Fri, 5 Oct 2018 10:31:23 -0700
Subject: [PATCH 0425/1085] When running a native/builtin op via eager C API,
 automatically fill in default attr values that are not overridden e.g.
 transpose_a in the matmul op).

This is required for backward compatibility (a binary built via an older version
of TF should still run on a newer version of TF, where some ops may have added
attrs).

For non-eager graph building, the default attr values of graph ops are added by
tensorflow::AddDefaultsToNodeDef().

We ran into this issue when running the same S4TF test cases via eager APIs --
some tests failed due to "missing attrs", but are fixed by this patch.

PiperOrigin-RevId: 215927271
---
 tensorflow/c/eager/c_api_test_util.cc            |  2 --
 .../core/common_runtime/eager/attr_builder.cc    | 16 ++++++++++++++++
 .../core/common_runtime/eager/attr_builder.h     |  6 ++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 5607c9dcb0..008f088c2d 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -99,8 +99,6 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) {
   TFE_OpAddInput(op, b, status);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
-  TFE_OpSetAttrBool(op, "transpose_a", 0);
-  TFE_OpSetAttrBool(op, "transpose_b", 0);
   TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a));
 
   return op;
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc
index cf1cd4134e..5c8369de87 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.cc
+++ b/tensorflow/core/common_runtime/eager/attr_builder.cc
@@ -136,6 +136,22 @@ void AttrBuilder::FillAttrValueMap(AttrValueMap* m,
       m->insert(*it);
     }
   }
+  // For any attr-value pairs that exist in the op def (from op registry) but
+  // not `m`, fill them into `m`, so that we can run a TFE_Op without having to
+  // specify all the default attr values (e.g. for matmul, the `transpose_a`
+  // attr defaults to false).
+  const OpDef* op_def = nullptr;
+  Status s = OpDefForOp(op_name_.c_str(), &op_def);
+  // This is expected, if this op is a custom function, and is therefore not
+  // present in the op registry.
+  if (!s.ok()) return;
+
+  DCHECK(op_def);
+  for (const auto& attr_def : op_def->attr()) {
+    if (attr_def.has_default_value() && !m->count(attr_def.name())) {
+      SetInAttrValueMap(m, attr_def.name(), attr_def.default_value());
+    }
+  }
 }
 
 const NodeDef& AttrBuilder::BuildNodeDef() {
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index cbe6a1cb50..c114ea4ba0 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h
@@ -110,6 +110,12 @@ class AttrBuilder {
   using AttrVec = tensorflow::gtl::InlinedVector<std::pair<StringPiece, T>, 2>;
 
   void MayBeInitializeNodeDef();
+  // Fill `m` with the attr-value pairs set via AttrBuilder::Set() so far, as
+  // well as any default attr-value pairs from the associated op_def, if there
+  // is one.
+  //
+  // If `include_those_in_node_def` is true, also include any attr-value pairs
+  // from `node_def_`.
   void FillAttrValueMap(AttrValueMap* m, bool include_those_in_node_def) const;
 
   template <class T>
-- 
GitLab


From e2f80439c5bfee56581875219ea83cc5307854f5 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 5 Oct 2018 10:37:16 -0700
Subject: [PATCH 0426/1085] Refactoring TFLite export code. Unify OperatorCode
 generation logic.

PiperOrigin-RevId: 215928419
---
 tensorflow/contrib/lite/toco/tflite/export.cc | 176 ++++++++++--------
 tensorflow/contrib/lite/toco/tflite/export.h  |  19 +-
 .../contrib/lite/toco/tflite/export_test.cc   |  77 +++++---
 3 files changed, 163 insertions(+), 109 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 45ca7f7f0c..f6f76e48a4 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -63,21 +63,21 @@ bool IsControlFlowOp(const string& tensorflow_op) {
   return false;
 }
 
-details::OperatorKey GetOperatorKey(
-    const ::toco::Operator& op,
-    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
-    bool allow_flex_ops) {
-  string custom_code;
-  if (op.type == OperatorType::kUnsupported) {
-    const TensorFlowUnsupportedOperator& unsupported_op =
-        static_cast<const TensorFlowUnsupportedOperator&>(op);
-    custom_code = unsupported_op.tensorflow_op;
-  }
-  int version = 1;
-  if (ops_by_type.count(op.type) != 0) {
-    version = ops_by_type.at(op.type)->GetVersion(op);
+// Map from operator name to TF Lite enum value, for all builtins.
+const std::map<string, BuiltinOperator>& GetBuiltinOpsMap() {
+  static std::map<string, BuiltinOperator>* builtin_ops = nullptr;
+  if (builtin_ops == nullptr) {
+    builtin_ops = new std::map<string, BuiltinOperator>();
+
+    for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
+      BuiltinOperator op = static_cast<BuiltinOperator>(i);
+      string name = EnumNameBuiltinOperator(op);
+      if (op != BuiltinOperator_CUSTOM && !name.empty()) {
+        (*builtin_ops)[name] = op;
+      }
+    }
   }
-  return details::OperatorKey(op.type, custom_code, version, allow_flex_ops);
+  return *builtin_ops;
 }
 
 void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
@@ -91,27 +91,59 @@ void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
 
 namespace details {
 
-OperatorKey::OperatorKey(OperatorType type, const std::string& custom_code,
-                         int version, bool allow_flex_ops) {
-  this->type = type;
-  this->custom_code = custom_code;
-  this->version = version;
-
-  if (type == OperatorType::kUnsupported) {
-    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-    // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
-      // Memorize the original TensorFlow op name.
-      this->flex_tensorflow_op = custom_code;
-      // Prefix the custom code of the flex op.
-      this->custom_code = string(::tflite::kFlexCustomCodePrefix) + custom_code;
-      this->is_flex_op = true;
-
-      if (IsControlFlowOp(this->flex_tensorflow_op)) {
-        is_unsupported_flex_op = true;
+OperatorKey GetOperatorKey(
+    const ::toco::Operator& op,
+    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
+    bool allow_flex_ops) {
+  string name = HelpfulOperatorTypeName(op);
+  const auto& builtin_ops = GetBuiltinOpsMap();
+
+  bool is_builtin = false;
+  OperatorKey key;
+  if (ops_by_type.count(op.type) != 0) {
+    key.version = ops_by_type.at(op.type)->GetVersion(op);
+    name = ops_by_type.at(op.type)->name();
+    is_builtin = (builtin_ops.count(name) > 0);
+  }
+
+  if (is_builtin) {
+    // For TFLite supported builtin ops, find out its BuiltinOperator enum used
+    // in FlatBuffer.
+    key.type = builtin_ops.at(name);
+  } else {
+    key.type = BuiltinOperator_CUSTOM;
+
+    key.is_custom_op = true;
+    if (op.type == OperatorType::kUnsupported) {
+      const TensorFlowUnsupportedOperator& unsupported_op =
+          static_cast<const TensorFlowUnsupportedOperator&>(op);
+      const auto tensorflow_op = unsupported_op.tensorflow_op;
+
+      // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+      // to populate a regular custom op. We need to find a way to fix this.
+      if (allow_flex_ops) {
+        // Memorize the original TensorFlow op name.
+        key.flex_tensorflow_op = tensorflow_op;
+        // Prefix the custom code of the flex op.
+        key.custom_code =
+            string(::tflite::kFlexCustomCodePrefix) + tensorflow_op;
+        key.is_flex_op = true;
+
+        if (IsControlFlowOp(tensorflow_op)) {
+          key.is_unsupported_flex_op = true;
+        }
+      } else {
+        key.custom_code = tensorflow_op;
       }
+    } else {
+      // For Toco-supported/TFLite-unsupported ops, currently we produce a
+      // custom op. This gives developers a chance to implement custom ops.
+      // TODO(b/116800229): Also produce Toco-supported/TFLite-unsupported ops
+      // as Flex ops when Flex mode is enabled.
+      key.custom_code = name;
     }
   }
+  return key;
 }
 
 void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) {
@@ -145,6 +177,7 @@ void LoadOperatorsMap(
     ++index;
   }
 }
+
 }  // namespace details
 
 Offset<Vector<Offset<Tensor>>> ExportTensors(
@@ -230,7 +263,7 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
     const Model& model,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     const details::OperatorsMap& operators_map, FlatBufferBuilder* builder,
-    std::set<string>* unsupported_ops, const ExportParams& params) {
+    const ExportParams& params) {
   // Map from operator name to TF Lite enum value, for all builtins.
   std::map<string, BuiltinOperator> builtin_ops;
   for (int i = BuiltinOperator_MIN; i <= BuiltinOperator_MAX; ++i) {
@@ -247,37 +280,16 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
 
   for (const auto& op : model.operators) {
     const details::OperatorKey operator_key =
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
     int op_index = operators_map.at(operator_key);
-    int op_version = operator_key.version;
 
-    string name = HelpfulOperatorTypeName(*op);
-    bool is_builtin = false;
-    if (ops_by_type.count(op->type) != 0) {
-      name = ops_by_type.at(op->type)->name();
-      is_builtin = (builtin_ops.count(name) > 0);
+    flatbuffers::Offset<flatbuffers::String> custom_code = 0;
+    if (!operator_key.custom_code.empty()) {
+      custom_code = builder->CreateString(operator_key.custom_code);
     }
 
-    if (is_builtin) {
-      ordered_opcodes[op_index] =
-          CreateOperatorCode(*builder, builtin_ops[name], 0, op_version);
-    } else {
-      // This could be a kUnsupported, in which case we should be
-      // able to retrieve the original Tensorflow name from the OperatorKey, or
-      // this could be a proper TOCO operator that is completely unknown to TF
-      // Lite.
-      if (!operator_key.custom_code.empty()) {
-        name = operator_key.custom_code;
-      }
-      // Either way, this is an operator that is not supported by TF Lite,
-      // so we output it as a custom op and add it to the error summary.
-      if (unsupported_ops) {
-        unsupported_ops->insert(name);
-      }
-      ordered_opcodes[op_index] =
-          CreateOperatorCode(*builder, BuiltinOperator_CUSTOM,
-                             builder->CreateString(name), op_version);
-    }
+    ordered_opcodes[op_index] = CreateOperatorCode(
+        *builder, operator_key.type, custom_code, operator_key.version);
   }
 
   std::vector<Offset<OperatorCode>> opcode_vector;
@@ -312,7 +324,7 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     }
 
     int op_index = operators_map.at(
-        GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -386,9 +398,8 @@ void Export(
   Array empty_array;
   buffers_to_write.push_back(&empty_array);
 
-  std::set<string> unsupported_ops;
-  auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map,
-                                      &builder, &unsupported_ops, params);
+  auto op_codes =
+      ExportOperatorCodes(model, ops_by_type, operators_map, &builder, params);
 
   for (const auto& op : model.operators) {
     if (op->type == OperatorType::kFakeQuant) {
@@ -398,7 +409,20 @@ void Export(
                       "for --std_values and --mean_values.";
     }
   }
-  if (!unsupported_ops.empty()) {
+
+  std::set<string> custom_ops;
+  std::set<string> unsupported_flex_ops;
+  for (const auto& it : operators_map) {
+    const details::OperatorKey& key = it.first;
+    if (key.is_custom_op) {
+      custom_ops.insert(key.custom_code);
+    }
+    if (key.is_unsupported_flex_op) {
+      unsupported_flex_ops.insert(key.flex_tensorflow_op);
+    }
+  }
+
+  if (!custom_ops.empty()) {
     if (!params.allow_custom_ops) {
       // Remove ExpandDims and ReorderAxes from unimplemented list unless they
       // compose the list. Both ops are removed during graph transformations.
@@ -406,14 +430,14 @@ void Export(
       // transformation is unable to run because the output shape is not
       // defined. This causes unnecessary confusion during model conversion
       // time.
-      std::set<string> unsupported_ops_final;
-      for (const auto& op_type : unsupported_ops) {
+      std::set<string> custom_ops_final;
+      for (const auto& op_type : custom_ops) {
         if (op_type != "ReorderAxes" && op_type != "ExpandDims") {
-          unsupported_ops_final.insert(op_type);
+          custom_ops_final.insert(op_type);
         }
       }
-      if (unsupported_ops_final.empty()) {
-        unsupported_ops_final = unsupported_ops;
+      if (custom_ops_final.empty()) {
+        custom_ops_final = custom_ops;
       }
 
       LOG(QFATAL)
@@ -423,13 +447,13 @@ void Export(
              "--allow_custom_ops, or by setting allow_custom_ops=True "
              "when calling tf.contrib.lite.TFLiteConverter(). Here is a list "
              "of operators for which  you will need custom implementations: "
-          << absl::StrJoin(unsupported_ops_final, ", ") << ".";
+          << absl::StrJoin(custom_ops_final, ", ") << ".";
     }
 
     std::set<string> unsupported_control_flow_ops;
     // Check if unsupported ops contains control flow ops. It's impossible
     // to implement these ops as custom ops at the moment.
-    for (const auto& op : unsupported_ops) {
+    for (const auto& op : custom_ops) {
       if (IsControlFlowOp(op)) {
         unsupported_control_flow_ops.insert(op);
       }
@@ -441,14 +465,6 @@ void Export(
     }
   }
 
-  std::set<string> unsupported_flex_ops;
-  for (const auto& it : operators_map) {
-    const details::OperatorKey& key = it.first;
-    if (key.is_unsupported_flex_op) {
-      unsupported_flex_ops.insert(key.custom_code);
-    }
-  }
-
   if (!unsupported_flex_ops.empty()) {
     LOG(QFATAL) << "Some of the operators in the model are not supported by "
                    "TensorFlow Flex runtime: "
diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h
index 9efb282c6c..c627f48086 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.h
+++ b/tensorflow/contrib/lite/toco/tflite/export.h
@@ -81,16 +81,20 @@ using TensorsMap = std::unordered_map<string, int>;
 // Only when `type` is `kUnsupported`, `custom_code` is filled to
 // identify which operation is used.
 struct OperatorKey {
-  OperatorKey(OperatorType type, const std::string& custom_code, int version,
-              bool allow_flex_ops = false);
+  OperatorKey() {}
+  OperatorKey(::tflite::BuiltinOperator type, const std::string& custom_code,
+              int version)
+      : type(type), custom_code(custom_code), version(version) {}
 
   // Only `type`, `custom_code` and `version` is used to compute hash and
   // identity.
-  OperatorType type;
+  ::tflite::BuiltinOperator type = ::tflite::BuiltinOperator_CUSTOM;
   std::string custom_code;
-  int version;
+  int version = 1;
 
-  // THe fields below are not used to compute hash and identity.
+  // The fields below are not used to compute hash and identity.
+  // TODO(ycling): Consider to change these fields to accessor functions.
+  bool is_custom_op = false;
   bool is_flex_op = false;
   bool is_unsupported_flex_op = false;
   // The original TensorFlow op name for the flex op. Filled only when
@@ -124,6 +128,11 @@ struct OperatorKey {
   };
 };
 
+OperatorKey GetOperatorKey(
+    const ::toco::Operator& op,
+    const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
+    bool allow_flex_ops);
+
 // A maps from operator type to its final position in the TF Lite buffer.
 using OperatorsMap = std::unordered_map<OperatorKey, int, OperatorKey::Hash>;
 
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index a71a64d56f..d48ab78285 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -105,13 +105,15 @@ TEST_F(ExportTest, LoadOperatorsMap) {
 
   details::OperatorsMap operators;
   const auto ops_by_type = BuildOperatorByTypeMap();
-  // TODO(ycling): Add a test for allow_flex_ops.
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
-  EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]);
-  EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]);
-  EXPECT_EQ(2, operators[details::OperatorKey(OperatorType::kSub, "", 1)]);
-  EXPECT_EQ(3, operators[details::OperatorKey(OperatorType::kUnsupported,
+  EXPECT_EQ(
+      0, operators[details::OperatorKey(::tflite::BuiltinOperator_ADD, "", 1)]);
+  EXPECT_EQ(1, operators[details::OperatorKey(::tflite::BuiltinOperator_CONV_2D,
+                                              "", 1)]);
+  EXPECT_EQ(2, operators[details::OperatorKey(::tflite::BuiltinOperator_CUSTOM,
                                               "MyCrazyOp", 1)]);
+  EXPECT_EQ(
+      3, operators[details::OperatorKey(::tflite::BuiltinOperator_SUB, "", 1)]);
 }
 
 TEST_F(ExportTest, Export) {
@@ -133,7 +135,7 @@ TEST_F(ExportTest, Export) {
   }
 
   EXPECT_THAT(names, ElementsAre("builtin:ADD", "builtin:CONV_2D",
-                                 "builtin:SUB", "custom:MyCrazyOp"));
+                                 "custom:MyCrazyOp", "builtin:SUB"));
 
   std::vector<uint32_t> indices;
   auto operators = (*model->subgraphs())[0]->operators();
@@ -142,7 +144,7 @@ TEST_F(ExportTest, Export) {
     indices.push_back(op->opcode_index());
   }
 
-  EXPECT_THAT(indices, ElementsAre(1, 0, 3, 2));
+  EXPECT_THAT(indices, ElementsAre(1, 0, 2, 3));
 }
 
 TEST_F(ExportTest, QuantizeWeights) {
@@ -257,7 +259,8 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV1) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(1, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 1)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 1)));
 }
 
 TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV2) {
@@ -268,7 +271,8 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithOpV2) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(1, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 2)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 2)));
 }
 
 TEST_F(VersionedOpExportTest, LoadOperatorsMapWithBothVersions) {
@@ -280,8 +284,10 @@ TEST_F(VersionedOpExportTest, LoadOperatorsMapWithBothVersions) {
   details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false);
 
   EXPECT_EQ(2, operators.size());
-  EXPECT_EQ(0, operators.at(details::OperatorKey(OperatorType::kConv, "", 1)));
-  EXPECT_EQ(1, operators.at(details::OperatorKey(OperatorType::kConv, "", 2)));
+  EXPECT_EQ(0, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 1)));
+  EXPECT_EQ(1, operators.at(details::OperatorKey(
+                   ::tflite::BuiltinOperator_CONV_2D, "", 2)));
 }
 
 TEST_F(VersionedOpExportTest, Export) {
@@ -314,38 +320,61 @@ TEST_F(VersionedOpExportTest, Export) {
 }
 
 TEST(OperatorKeyTest, TestBuiltinOp) {
-  details::OperatorKey key(OperatorType::kConv, "", 2);
-  EXPECT_EQ(key.type, OperatorType::kConv);
+  auto op = absl::make_unique<ConvOperator>();
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, false);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CONV_2D);
   EXPECT_EQ(key.custom_code, "");
-  EXPECT_EQ(key.version, 2);
+  EXPECT_EQ(key.version, 1);
+}
+
+TEST(OperatorKeyTest, TestCustomOp) {
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "MyCrazyCustomOp";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, false);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+  EXPECT_EQ(key.custom_code, "MyCrazyCustomOp");
+  EXPECT_EQ(key.version, 1);
 }
 
 TEST(OperatorKeyTest, TestFlexOp) {
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "BatchMatMul";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
   {
-    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
-                             false);
-    EXPECT_EQ(key.type, OperatorType::kUnsupported);
+    const auto key = details::GetOperatorKey(*op, ops_by_type, false);
     // It shouldn't be converted to Flex op if `allow_flex_op` is false.
-    EXPECT_EQ(key.custom_code, "SomeUnsupportedOp");
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "BatchMatMul");
     EXPECT_EQ(key.version, 1);
     EXPECT_FALSE(key.is_flex_op);
   }
 
   {
-    details::OperatorKey key(OperatorType::kUnsupported, "SomeUnsupportedOp", 1,
-                             true);
-    EXPECT_EQ(key.type, OperatorType::kUnsupported);
     // Verify that the custom op name is prefixed by "Flex" and `is_flex_op`
     // is true.
-    EXPECT_EQ(key.custom_code, "FlexSomeUnsupportedOp");
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "FlexBatchMatMul");
     EXPECT_EQ(key.version, 1);
     EXPECT_TRUE(key.is_flex_op);
   }
 }
 
 TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
-  details::OperatorKey key(OperatorType::kUnsupported, "Merge", 1, true);
-  EXPECT_EQ(key.type, OperatorType::kUnsupported);
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "Merge";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
   EXPECT_EQ(key.custom_code, "FlexMerge");
   EXPECT_EQ(key.version, 1);
   EXPECT_TRUE(key.is_flex_op);
-- 
GitLab


From dd8afaad37fdb284dce3518a9be22aca1c25e475 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 10:48:51 -0700
Subject: [PATCH 0427/1085] Fix documentation.

PiperOrigin-RevId: 215930596
---
 tensorflow/python/framework/importer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index c6595918ae..c9ac27e788 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -370,7 +370,8 @@ def import_graph_def(graph_def,
 
   Returns:
     A list of `Operation` and/or `Tensor` objects from the imported graph,
-    corresponding to the names in `return_elements`.
+    corresponding to the names in `return_elements`,
+    and None if `returns_elements` is None.
 
   Raises:
     TypeError: If `graph_def` is not a `GraphDef` proto,
-- 
GitLab


From f410ffc1699e864e84857089183db0d952ada7fe Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Thu, 26 Jul 2018 15:44:39 +0200
Subject: [PATCH 0428/1085] make sparsemax nan and infinity safe

logits that are -inf will be given 0 probability and logits that are
inf will result in a nan output. Likewise if all logits are -inf the
output will also be nan.

This is done by using where operators, mostly because 0 * inf = nan
and x/0 = sign(x) inf following the IEEE 754 standard. However these
results are not mathematically correct in the context of the sparsemax
algorithm.

Fixes: https://github.com/tensorflow/tensorflow/issues/15564
---
 .../kernel_tests/sparsemax_loss_test.py       | 64 +++++++++++++++++++
 .../python/kernel_tests/sparsemax_test.py     | 63 +++++++++++++++++-
 .../contrib/sparsemax/python/ops/sparsemax.py | 30 ++++++++-
 .../sparsemax/python/ops/sparsemax_loss.py    | 32 ++++++++--
 4 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
index 360e7dbe75..2db76a6d56 100644
--- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
+++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py
@@ -109,6 +109,66 @@ class SparsemaxLossTest(test.TestCase):
         np_loss, tf_loss_out, half_atol=1e-2, half_rtol=5e-3)
     self.assertShapeEqual(np_loss, tf_loss_op)
 
+  def _test_sparsemax_loss_of_nan(self, dtype, random, use_gpu):
+    """check sparsemax-loss transfers nan"""
+    q = np.asarray([
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1]
+    ])
+    z_nan = np.asarray([
+        [0, np.nan, 0],
+        [0, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ]).astype(dtype)
+
+    _, tf_loss_nan = self._tf_sparsemax_loss(z_nan, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan],
+        tf_loss_nan)
+
+  def _test_sparsemax_loss_of_inf(self, dtype, random, use_gpu):
+    """check sparsemax-loss is infinity safe"""
+    q = np.asarray([
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 0, 1]
+    ])
+    z_neg = np.asarray([
+        [0, -np.inf, 0],
+        [0, -np.inf, -np.inf],
+        [-np.inf, -np.inf, 0],
+        [-np.inf, -np.inf, -np.inf],
+    ]).astype(dtype)
+    z_pos = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, np.inf],
+        [np.inf, np.inf, 0],
+        [np.inf, np.inf, np.inf]
+    ]).astype(dtype)
+    z_mix = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, -np.inf],
+        [-np.inf, np.inf, 0],
+        [-np.inf, np.inf, -np.inf]
+    ]).astype(dtype)
+
+    _, tf_loss_neg = self._tf_sparsemax_loss(z_neg, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [0.25, np.inf, 0, np.nan],
+        tf_loss_neg)
+
+    _, tf_loss_pos = self._tf_sparsemax_loss(z_pos, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan, np.nan],
+        tf_loss_pos)
+
+    _, tf_loss_mix = self._tf_sparsemax_loss(z_mix, q, dtype, use_gpu)
+    self.assertAllCloseAccordingToType(
+        [np.nan, np.nan, np.nan, np.nan],
+        tf_loss_mix)
+
   def _test_constant_add(self, dtype, random, use_gpu):
     """check sparsemax-loss proposition 3"""
     z = random.uniform(low=-3, high=3, size=(test_obs, 10))
@@ -198,6 +258,10 @@ class SparsemaxLossTest(test.TestCase):
 
     self._test_sparsemax_loss_against_numpy(dtype, random, use_gpu=False)
 
+    self._test_sparsemax_loss_of_nan(dtype, random, use_gpu=False)
+
+    self._test_sparsemax_loss_of_inf(dtype, random, use_gpu=False)
+
     self._test_constant_add(dtype, random, use_gpu=False)
 
     self._test_sparsemax_loss_positive(dtype, random, use_gpu=False)
diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
index 259e62bd86..38c6dd15db 100644
--- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
+++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py
@@ -87,6 +87,61 @@ class SparsemaxTest(test.TestCase):
         p_sparemax, tf_sparsemax_out, half_atol=5e-3)
     self.assertShapeEqual(p_sparemax, tf_sparsemax_op)
 
+  def _test_sparsemax_of_nan(self, dtype, random, use_gpu):
+    """check sparsemax transfers nan"""
+    z_nan = np.asarray([
+        [0, np.nan, 0],
+        [0, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+    ]).astype(dtype)
+
+    _, tf_sparsemax_nan = self._tf_sparsemax(z_nan, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_nan)
+
+  def _test_sparsemax_of_inf(self, dtype, random, use_gpu):
+    """check sparsemax is infinity safe"""
+    z_neg = np.asarray([
+        [0, -np.inf, 0],
+        [0, -np.inf, -np.inf],
+        [-np.inf, -np.inf, -np.inf],
+    ]).astype(dtype)
+    z_pos = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, np.inf],
+        [np.inf, np.inf, np.inf]
+    ]).astype(dtype)
+    z_mix = np.asarray([
+        [0, np.inf, 0],
+        [0, np.inf, -np.inf],
+        [-np.inf, np.inf, -np.inf]
+    ]).astype(dtype)
+
+    _, tf_sparsemax_neg = self._tf_sparsemax(z_neg, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [0.5, 0, 0.5],
+        [1, 0, 0],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_neg)
+
+    _, tf_sparsemax_pos = self._tf_sparsemax(z_pos, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_pos)
+
+    _, tf_sparsemax_mix = self._tf_sparsemax(z_mix, dtype, use_gpu)
+    self.assertAllCloseAccordingToType([
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan],
+        [np.nan, np.nan, np.nan]
+    ], tf_sparsemax_mix)
+
+
   def _test_sparsemax_of_zero(self, dtype, random, use_gpu):
     """check sparsemax proposition 1, part 1"""
     z = np.zeros((1, 10))
@@ -97,7 +152,7 @@ class SparsemaxTest(test.TestCase):
     self.assertAllCloseAccordingToType(p_sparemax, tf_sparsemax_out)
     self.assertShapeEqual(p_sparemax, tf_sparsemax_op)
 
-  def _test_sparsemax_of_inf(self, dtype, random, use_gpu):
+  def _test_sparsemax_of_to_inf(self, dtype, random, use_gpu):
     """check sparsemax proposition 1, part 2"""
     z = random.uniform(low=-3, high=3, size=(test_obs, 10))
 
@@ -210,10 +265,14 @@ class SparsemaxTest(test.TestCase):
 
     self._test_sparsemax_against_numpy(dtype, random, use_gpu=False)
 
-    self._test_sparsemax_of_zero(dtype, random, use_gpu=False)
+    self._test_sparsemax_of_nan(dtype, random, use_gpu=False)
 
     self._test_sparsemax_of_inf(dtype, random, use_gpu=False)
 
+    self._test_sparsemax_of_zero(dtype, random, use_gpu=False)
+
+    self._test_sparsemax_of_to_inf(dtype, random, use_gpu=False)
+
     self._test_constant_add(dtype, random, use_gpu=False)
 
     self._test_permutation(dtype, random, use_gpu=False)
diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
index e617af2ff1..f903b629c7 100644
--- a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
+++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py
@@ -49,7 +49,14 @@ def sparsemax(logits, name=None):
     obs = array_ops.shape(logits)[0]
     dims = array_ops.shape(logits)[1]
 
-    z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]
+    # In the paper, they call the logits z.
+    # The mean(logits) can be substracted from logits to make the algorithm
+    # more numerically stable. the instability in this algorithm comes mostly
+    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
+    # to zero. However, in practise the numerical instability issues are very
+    # minor and substacting the mean causes extra issues with inf and nan
+    # input.
+    z = logits
 
     # sort z
     z_sorted, _ = nn.top_k(z, k=dims)
@@ -64,10 +71,27 @@ def sparsemax(logits, name=None):
     k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)
 
     # calculate tau(z)
-    indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1)
+    # If there are inf values or all values are -inf, the k_z will be zero,
+    # this is mathematically invalid and will also cause the gather_nd to fail.
+    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
+    # fixed later (see p_safe) by returning p = nan. This results in the same
+    # behavior as softmax.
+    k_z_safe = math_ops.maximum(k_z, 1)
+    indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1)
     tau_sum = array_ops.gather_nd(z_cumsum, indices)
     tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)
 
     # calculate p
-    return math_ops.maximum(
+    p = math_ops.maximum(
         math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
+    # If k_z = 0 or if z = nan, then the input is invalid
+    p_safe = array_ops.where(
+        math_ops.logical_or(
+            math_ops.equal(k_z, 0),
+            math_ops.is_nan(z_cumsum[:, -1])
+        ),
+        array_ops.fill([obs, dims], math_ops.cast(float('nan'), logits.dtype)),
+        p
+    )
+
+    return p_safe
diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
index 582d1e6136..9095cfe267 100644
--- a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
+++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py
@@ -47,14 +47,34 @@ def sparsemax_loss(logits, sparsemax, labels, name=None):
     sparsemax = ops.convert_to_tensor(sparsemax, name="sparsemax")
     labels = ops.convert_to_tensor(labels, name="labels")
 
-    shifted_logits = logits - \
-        math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis]
+    # In the paper, they call the logits z.
+    # A constant can be substracted from logits to make the algorithm
+    # more numerically stable in theory. However, there are really no major
+    # source numerical instability in this algorithm.
+    z = logits
 
     # sum over support
-    support = math_ops.cast(sparsemax > 0, sparsemax.dtype)
-    sum_s = support * sparsemax * (shifted_logits - 0.5 * sparsemax)
+    # Use a conditional where instead of a multiplication to support z = -inf.
+    # If z = -inf, and there is no support (sparsemax = 0), a multiplication
+    # would cause 0 * -inf = nan, which is not correct in this case.
+    sum_s = array_ops.where(
+        math_ops.logical_or(sparsemax > 0, math_ops.is_nan(sparsemax)),
+        sparsemax * (z - 0.5 * sparsemax),
+        array_ops.zeros_like(sparsemax)
+    )
 
     # - z_k + ||q||^2
-    q_part = labels * (0.5 * labels - shifted_logits)
+    q_part = labels * (0.5 * labels - z)
+    # Fix the case where labels = 0 and z = -inf, where q_part would
+    # otherwise be 0 * -inf = nan. But since the lables = 0, no cost for
+    # z = -inf should be consideredself.
+    # The code below also coveres the case where z = inf. Howeverm in this
+    # caose the sparsemax will be nan, which means the sum_s will also be nan,
+    # therefor this case doesn't need addtional special treatment.
+    q_part_safe = array_ops.where(
+        math_ops.logical_and(math_ops.equal(labels, 0), math_ops.is_inf(z)),
+        array_ops.zeros_like(z),
+        q_part
+    )
 
-    return math_ops.reduce_sum(sum_s + q_part, axis=1)
+    return math_ops.reduce_sum(sum_s + q_part_safe, axis=1)
-- 
GitLab


From 376e9fb3260b64a80aeb444649d8d1c908fdb287 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Fri, 5 Oct 2018 11:12:15 -0700
Subject: [PATCH 0429/1085] Oops

---
 tensorflow/python/kernel_tests/check_ops_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 8b6c978de1..d4c618d34b 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -329,14 +329,14 @@ b'y \(shape=\(2, 3\) dtype=float32\) = '
                                summarize=10)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_full):
-        check_ops.assert_equal(t, t, message="This is the error message.",
+        check_ops.assert_none_equal(t, t, message="This is the error message.",
                                summarize=-1)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_default):
-        check_ops.assert_equal(t, t, message="This is the error message.")
+        check_ops.assert_none_equal(t, t, message="This is the error message.")
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_short):
-        check_ops.assert_equal(t, t, message="This is the error message.",
+        check_ops.assert_none_equal(t, t, message="This is the error message.",
                                summarize=2)
 
 
-- 
GitLab


From b1325838aaf902e52fae4b085c6396848c445062 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 5 Oct 2018 11:13:53 -0700
Subject: [PATCH 0430/1085] Declare that stateless random ops are not
 differentiable in C++ code.

PiperOrigin-RevId: 215935319
---
 tensorflow/core/BUILD                        |  1 +
 tensorflow/core/ops/stateless_random_grad.cc | 23 ++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tensorflow/core/ops/stateless_random_grad.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 6a3ee3c1cb..900a0e11c4 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1242,6 +1242,7 @@ cc_library(
     srcs = [
         "ops/math_grad.cc",
         "ops/random_grad.cc",
+        "ops/stateless_random_grad.cc",
     ],
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
diff --git a/tensorflow/core/ops/stateless_random_grad.cc b/tensorflow/core/ops/stateless_random_grad.cc
new file mode 100644
index 0000000000..331e1d0152
--- /dev/null
+++ b/tensorflow/core/ops/stateless_random_grad.cc
@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/function.h"
+
+namespace tensorflow {
+REGISTER_OP_NO_GRADIENT("StatelessRandomUniform");
+REGISTER_OP_NO_GRADIENT("StatelessRandomNormal");
+REGISTER_OP_NO_GRADIENT("StatelessTruncatedNormal");
+REGISTER_OP_NO_GRADIENT("StatelessMultinomial");
+}  // end namespace tensorflow
-- 
GitLab


From 1e446b37620dcdca73e855c83efcc0d14bb68a8c Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Fri, 5 Oct 2018 11:27:03 -0700
Subject: [PATCH 0431/1085] Make gradient tape stack thread local

PiperOrigin-RevId: 215937618
---
 tensorflow/python/eager/pywrap_tfe_src.cc | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 6193f40ce8..6d3ef9a37b 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1228,8 +1228,9 @@ static PyTypeObject TFE_Py_Tape_Type = {
 // GIL, which is always held when any TFE_Py_* methods are called. We should
 // revisit this if/when decide to not hold the GIL while manipulating the tape
 // stack.
-static tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* tape_set = nullptr;
 tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* GetTapeSet() {
+  thread_local tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>* tape_set{
+      nullptr};
   if (tape_set == nullptr) {
     tape_set = new tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*>;
   }
@@ -1264,27 +1265,10 @@ class SafeTapeSet {
   tensorflow::gtl::CompactPointerSet<TFE_Py_Tape*> tape_set_;
 };
 
-// xcode 7 doesn't define thread_local, so for compatibility we implement our
-// own. TODO(apassos) remove once we can deprecate xcode 7.
-#ifndef __APPLE__
 bool* ThreadTapeIsStopped() {
   thread_local bool thread_tape_is_stopped{false};
   return &thread_tape_is_stopped;
 }
-#else
-static std::unordered_map<std::thread::id, bool>* tape_is_stopped = nullptr;
-bool* ThreadTapeIsStopped() {
-  if (tape_is_stopped == nullptr) {
-    tape_is_stopped = new std::unordered_map<std::thread::id, bool>;
-  }
-  auto it = tape_is_stopped->find(std::this_thread::get_id());
-  if (it != tape_is_stopped->end()) {
-    return &(it->second);
-  }
-  return &(tape_is_stopped->emplace(std::this_thread::get_id(), false)
-               .first->second);
-}
-#endif
 
 void TFE_Py_TapeSetStopOnThread() { *ThreadTapeIsStopped() = true; }
 
-- 
GitLab


From 496bc1589831da2f00e6d49b12c68b97301730d4 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Fri, 5 Oct 2018 11:38:34 -0700
Subject: [PATCH 0432/1085] Disable
 micro/examples/micro_speech:micro_speech_test test under msan

PiperOrigin-RevId: 215939542
---
 .../lite/experimental/micro/examples/micro_speech/BUILD        | 3 +++
 .../contrib/lite/experimental/micro/testing/micro_test.bzl     | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
index 447c584387..dad58b6c1c 100644
--- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
@@ -17,6 +17,9 @@ tflite_micro_cc_test(
         "tiny_conv_model_data.cc",
         "tiny_conv_model_data.h",
     ],
+    tags = [
+        "nomsan",
+    ],
     deps = [
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite/experimental/micro:micro_framework",
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
index 91e349cb24..916e3eeac3 100644
--- a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.bzl
@@ -10,6 +10,7 @@ def tflite_micro_cc_test(
         nocopts = "",
         linkopts = [],
         deps = [],
+        tags = [],
         visibility = None):
     """Tests a C/C++ binary without testing framework  dependencies`.
 
@@ -43,6 +44,7 @@ def tflite_micro_cc_test(
         nocopts = nocopts,
         linkopts = linkopts,
         deps = deps,
+        tags = tags,
         visibility = visibility,
     )
     native.sh_test(
@@ -61,4 +63,5 @@ def tflite_micro_cc_test(
         ],
         deps = [
         ],
+        tags = tags,
     )
-- 
GitLab


From 03b4161326897453fa6b2803b873954607f7623b Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Fri, 5 Oct 2018 11:49:19 -0700
Subject: [PATCH 0433/1085] [XLA] Extend the HLO verifier to check that
 non-layout-changing instructions preserve operand layouts.

Add an std::function member to the HloVerifier for a backend to specify the
function object used to determine whether an instruction can change layouts.
Use the function object to find out the non-layout-changing instructions and
check that such instructions should produce results with the same layouts as
its operands.

Add test cases.

PiperOrigin-RevId: 215941282
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  9 ++-
 .../xla/service/gpu/nvptx_compiler.cc         | 21 ++++--
 .../compiler/xla/service/hlo_verifier.cc      | 34 +++++++++-
 .../compiler/xla/service/hlo_verifier.h       | 14 +++-
 .../compiler/xla/service/hlo_verifier_test.cc | 67 +++++++++++++++++++
 .../compiler/xla/tests/hlo_test_base.cc       | 14 ++--
 tensorflow/compiler/xla/tests/hlo_test_base.h |  8 ++-
 8 files changed, 149 insertions(+), 19 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4797cf3330..2b292ed053 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -2450,6 +2450,7 @@ tf_cc_test(
         ":hlo",
         ":hlo_parser",
         ":hlo_verifier",
+        ":layout_assignment",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:types",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 5834f67285..68c715a086 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -327,8 +327,13 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn(
   {
     auto& pass = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
         "simplification after layout assignement");
-    pass.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                          /*allow_mixed_precision=*/false);
+    // TODO(b/117156505): When the bug is fixed, the CPU backend should not
+    // produce layout changing elementwise operations. We will then pass
+    // LayoutAssignment::InstructionCanChangeLayout to the HLO verifier to
+    // enable stricter verification.
+    pass.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false);
     pass.AddPass<HloPassFix<AlgebraicSimplifier>>(
         /*is_layout_sensitive=*/true,
         [](const Shape&, const Shape&) { return true; },
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 50e47542c4..ac6c2c5565 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -239,8 +239,10 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
   {
     HloPassPipeline pipeline("post-layout_assignment");
-    pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                              /*allow_mixed_precision=*/false);
+    pipeline.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
 
     // The LayoutAssignment pass may leave behind kCopy instructions which are
     // duplicate or NOPs, so remove them with algebraic simplification and CSE.
@@ -286,8 +288,10 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
   {
     HloPassFix<HloPassPipeline> fusion("fusion");
-    fusion.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                            /*allow_mixed_precision=*/false);
+    fusion.AddInvariantChecker<HloVerifier>(
+        /*layout_sensitive=*/true,
+        /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/false);
     fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/true);
     fusion.AddPass<FusionMerger>();
@@ -299,7 +303,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
     HloPassPipeline reduce_pipeline("reduce-precision");
     reduce_pipeline.AddInvariantChecker<HloVerifier>(
-        /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false);
+        /*is_layout_sensitive=*/true, /*allow_mixed_precision=*/false,
+        LayoutAssignment::InstructionCanChangeLayout);
     ReducePrecisionInsertion::AddPasses(
         &reduce_pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
@@ -325,8 +330,10 @@ Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) {
   // (b/27180329). Therefore, in that case, we set the output to be a copy of
   // the parameter.
   HloPassPipeline pipeline("GPU-ir-emit-prepare");
-  pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/true,
-                                            /*allow_mixed_precision=*/false);
+  pipeline.AddInvariantChecker<HloVerifier>(
+      /*layout_sensitive=*/true,
+      /*allow_mixed_precision=*/false,
+      LayoutAssignment::InstructionCanChangeLayout);
 
   // Copy insertion should be performed immediately before IR emission to avoid
   // inserting unnecessary copies (later pass adds an instruction which
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index c22ee03388..fad3b14ec2 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1042,7 +1042,10 @@ Status CheckElementwiseInstruction(HloInstruction* instruction) {
 // not check result shape as that is checked in the ShapeVerifier.
 class InstructionVerifier : public DfsHloVisitorWithDefault {
  public:
-  InstructionVerifier() {}
+  explicit InstructionVerifier(std::function<bool(const HloInstruction*)>
+                                   instruction_can_change_layout_func)
+      : instruction_can_change_layout_func_(
+            instruction_can_change_layout_func) {}
 
   Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
@@ -1143,8 +1146,34 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  Status Postprocess(HloInstruction* instruction) override {
+    if (instruction_can_change_layout_func_ &&
+        LayoutUtil::IsDenseArray(instruction->shape()) &&
+        !instruction_can_change_layout_func_(instruction)) {
+      const Shape& result_shape = instruction->shape();
+      const Layout& result_layout = result_shape.layout();
+      for (HloInstruction* operand : instruction->operands()) {
+        const Shape& operand_shape = operand->shape();
+        if (LayoutUtil::IsDenseArray(operand_shape) &&
+            ShapeUtil::Rank(operand_shape) == ShapeUtil::Rank(result_shape)) {
+          const Layout& operand_layout = operand_shape.layout();
+          TF_RET_CHECK(LayoutUtil::Equal(result_layout, operand_layout))
+              << "Instruction shouldn't change layouts "
+              << instruction->ToString() << " From "
+              << ShapeUtil::HumanString(result_shape) << " To "
+              << ShapeUtil::HumanString(operand_shape);
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+
  private:
   absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
+  // Determines whether an instruction can change layouts.
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace
@@ -1158,7 +1187,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
 
-    InstructionVerifier instruction_verifier;
+    InstructionVerifier instruction_verifier(
+        instruction_can_change_layout_func_);
     TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 6d16586c2c..cb49cb95ba 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -155,11 +155,17 @@ class HloVerifier : public HloModulePass {
  public:
   using ShapeVerifierFactory = std::function<std::unique_ptr<ShapeVerifier>()>;
 
-  explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision)
+  explicit HloVerifier(bool layout_sensitive, bool allow_mixed_precision,
+                       std::function<bool(const HloInstruction*)>
+                           instruction_can_change_layout_func = {})
       : shape_verifier_factory_([layout_sensitive, allow_mixed_precision] {
           return absl::make_unique<ShapeVerifier>(layout_sensitive,
                                                   allow_mixed_precision);
-        }) {}
+        }),
+        instruction_can_change_layout_func_(
+            std::move(instruction_can_change_layout_func)) {
+    CHECK(instruction_can_change_layout_func_ == nullptr || layout_sensitive);
+  }
 
   // Uses custom shape verification.
   explicit HloVerifier(ShapeVerifierFactory shape_verifier_factory)
@@ -177,6 +183,10 @@ class HloVerifier : public HloModulePass {
   // being a DfsHloVisitor, is stateful. We want a clean object
   // for each run of the verifier.
   ShapeVerifierFactory shape_verifier_factory_;
+
+  // Determines whether an instruction can change layouts.
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
index 8f0423bb1c..afe01e5487 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/service/layout_assignment.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
@@ -50,6 +51,14 @@ class HloVerifierTestAllowMixedPrecision : public HloTestBase {
                     /*allow_mixed_precision_in_hlo_verifier=*/true) {}
 };
 
+class HloVerifierTestLayoutSensitive : public HloTestBase {
+ public:
+  HloVerifierTestLayoutSensitive()
+      : HloTestBase(/*verifier_layout_sensitive=*/true,
+                    /*allow_mixed_precision_in_hlo_verifier=*/false,
+                    LayoutAssignment::InstructionCanChangeLayout) {}
+};
+
 TEST_F(HloVerifierTest, NullInstructionParent) {
   HloComputation::Builder builder(TestName());
   const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
@@ -358,5 +367,63 @@ TEST_F(HloVerifierTest, ConvNegativeBaseDilationNotAllowed) {
               HasSubstr("non-positive base area dilation factor"));
 }
 
+static const char* const kAddWithLayoutChangeHlo = R"(
+   HloModule AddWithLayoutChange
+    ENTRY AddWithLayoutChange {
+      par0 = f32[3,4]{1,0} parameter(0)
+      par1 = f32[3,4]{0,1} parameter(1)
+      ROOT add0 = f32[3,4]{1,0} add(par0,par1)
+    }
+  )";
+
+TEST_F(HloVerifierTest, AddWithLayoutChange) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_TRUE(status.ok());
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, AddWithLayoutChangeNotAllowed) {
+  TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, SliceWithLayoutChangeNotAllowed) {
+  const char* const kSliceWithLayoutChangeHlo = R"(
+   HloModule SliceWithLayoutChange
+    ENTRY SliceWithLayoutChange {
+      par0 = f32[4,5]{0,1} parameter(0)
+      par1 = s32[2] parameter(1)
+      ROOT dslice0 = f32[3,4]{1,0} dynamic-slice(par0, par1),
+        dynamic_slice_sizes={3,4}
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseHloString(kSliceWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
+
+TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) {
+  const char* const kConcatWithLayoutChangeHlo = R"(
+   HloModule ConcatWithLayoutChange
+   ENTRY ConcatWithLayoutChange {
+      par0 = f32[3,5]{0,1} parameter(0)
+      par1 = f32[3,3]{1,0} parameter(1)
+      ROOT concat0 = f32[3,8]{1,0} concatenate(f32[3,5] par0, f32[3,3] par1),
+        dimensions={1}
+   }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseHloString(kConcatWithLayoutChangeHlo));
+  auto status = verifier().Run(module.get()).status();
+  ASSERT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("Instruction shouldn't change layouts"));
+}
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index bdd4fd7e3d..7ab2ecda58 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -86,19 +86,25 @@ ProgramShape GetProgramShapeWithLayout(const HloModule& module) {
 }  // namespace
 
 HloTestBase::HloTestBase(bool verifier_layout_sensitive,
-                         bool allow_mixed_precision_in_hlo_verifier)
+                         bool allow_mixed_precision_in_hlo_verifier,
+                         std::function<bool(const HloInstruction*)>
+                             instruction_can_change_layout_func)
     : HloTestBase(GetTestPlatform(), GetReferencePlatform(),
                   verifier_layout_sensitive,
-                  allow_mixed_precision_in_hlo_verifier) {}
+                  allow_mixed_precision_in_hlo_verifier,
+                  instruction_can_change_layout_func) {}
 
 HloTestBase::HloTestBase(se::Platform* test_platform,
                          se::Platform* reference_platform,
                          bool verifier_layout_sensitive,
-                         bool allow_mixed_precision_in_hlo_verifier)
+                         bool allow_mixed_precision_in_hlo_verifier,
+                         std::function<bool(const HloInstruction*)>
+                             instruction_can_change_layout_func)
     : test_runner_(test_platform), reference_runner_(reference_platform) {
   hlo_verifier_ = absl::make_unique<HloVerifier>(
       /*layout_sensitive=*/verifier_layout_sensitive,
-      /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier);
+      /*allow_mixed_precision=*/allow_mixed_precision_in_hlo_verifier,
+      instruction_can_change_layout_func);
 }
 
 std::unique_ptr<HloModule> HloTestBase::CreateNewModule(const string& name) {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h
index 0ae4bdc104..217428befa 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.h
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.h
@@ -88,14 +88,18 @@ class HloTestBase : public ::testing::Test {
   // interpreter is the only supported backend, it will be both the test backend
   // and the reference backend.
   HloTestBase(bool verifier_layout_sensitive = false,
-              bool allow_mixed_precision_in_hlo_verifier = true);
+              bool allow_mixed_precision_in_hlo_verifier = true,
+              std::function<bool(const HloInstruction*)>
+                  instruction_can_change_layout_func = {});
 
   // If your test doesn't use interpreter as the reference backend, you can use
   // this constructor. Note that your test target is responsible for linking in
   // both needed backends.
   HloTestBase(se::Platform* test_platform, se::Platform* reference_platform,
               bool verifier_layout_sensitive = false,
-              bool allow_mixed_precision_in_hlo_verifier = true);
+              bool allow_mixed_precision_in_hlo_verifier = true,
+              std::function<bool(const HloInstruction*)>
+                  instruction_can_change_layout_func = {});
 
   ~HloTestBase() override {}
 
-- 
GitLab


From 0541a277d5c74cf8e99c9f5a7a015926d1a05214 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 5 Oct 2018 12:09:01 -0700
Subject: [PATCH 0434/1085] Do 2 warmup runs in
 assert_no_new_pyobjects_executing_eagerly.

PiperOrigin-RevId: 215944829
---
 tensorflow/python/framework/test_util.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4ec4b41b5e..95925bb471 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -506,9 +506,9 @@ def disable_control_flow_v2(unused_msg):
 def assert_no_new_pyobjects_executing_eagerly(f):
   """Decorator for asserting that no new Python objects persist after a test.
 
-  Runs the test multiple times executing eagerly, first as a warmup and then
-  several times to let objects accumulate. The warmup helps ignore caches which
-  do not grow as the test is run repeatedly.
+  Runs the test multiple times executing eagerly, first as a warmup and then to
+  let objects accumulate. The warmup helps ignore caches which do not grow as
+  the test is run repeatedly.
 
   Useful for checking that there are no missing Py_DECREFs in the C exercised by
   a bit of Python.
@@ -518,7 +518,14 @@ def assert_no_new_pyobjects_executing_eagerly(f):
     """Warms up, gets an object count, runs the test, checks for new objects."""
     with context.eager_mode():
       gc.disable()
-      f(self, **kwargs)
+      # Run the test 2 times as warmup, in an attempt to fill up caches, which
+      # should not grow as the test is run repeatedly below.
+      #
+      # TODO(b/117156879): Running warmup twice is black magic; we have seen
+      # tests that fail with 1 warmup run, and pass with 2, on various versions
+      # of python2.7.x.
+      for _ in range(2):
+        f(self, **kwargs)
       gc.collect()
       previous_count = len(gc.get_objects())
       if ops.has_default_graph():
-- 
GitLab


From d016650ca7636c96c6664bed2cf3a2fa8a3c674b Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 5 Oct 2018 12:17:31 -0700
Subject: [PATCH 0435/1085] Revert constant folding to previous state.

PiperOrigin-RevId: 215946205
---
 .../tf2xla/functionalize_control_flow.cc      | 64 +++----------------
 .../core/common_runtime/constant_folding.cc   | 35 +++-------
 .../core/common_runtime/constant_folding.h    |  4 --
 .../core/common_runtime/graph_optimizer.cc    |  5 +-
 .../core/common_runtime/graph_optimizer.h     |  5 +-
 5 files changed, 20 insertions(+), 93 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 28e09d7b79..0362682bd6 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -94,8 +94,9 @@ Status FunctionalizeControlFlowForFunction(
     }
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
+  Graph* g = body->graph;
 
-  // Check if the graph has Switch or Merge node before optimizing the graph.
+  // Check if the graph has Switch or Merge node.
   bool has_switch_or_merge = false;
   for (Node* n : body->graph->nodes()) {
     if (n->type_string() == "Switch" || n->type_string() == "Merge") {
@@ -108,58 +109,13 @@ Status FunctionalizeControlFlowForFunction(
   // in function body. We still need to rewrite those functions and modify
   // corresponding nodes.
 
-  // Call graph optimizer. The most important optimization we need is constant
-  // folding, which will replace ops like Shape/BroadcastGradientArgs with
-  // constant shape input. Without this optimization, those ops might become
-  // dynamic input for then/else body function and XLA will complain that input
-  // is not compile time constant. We enable function inlining as well, because
-  // otherwise we won't be able to infer shape for any node depending on
-  // function call nodes.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
-        *body->graph, fld);
-  }
-  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
-  // underlying pointer, thus we create a new Graph and copy from body->graph.
-  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
-  CopyGraph(*body->graph, optimized_graph.get());
-  OptimizerOptions opts;
-  opts.set_opt_level(OptimizerOptions::L0);
-  opts.set_do_function_inlining(true);
-  opts.set_do_constant_folding(true);
-  GraphOptimizer optimizer(opts);
-  auto cf_consider_fn = [](const Node* n) {
-    // Skip SymbolicGradient op when doing constant folding.
-    // Enabling SymbolicGradient op in constant folding requires
-    // flr->device() to be non-null, and here we have not constructed
-    // proper Device object yet (it will be constructed in XlaCompiler).
-    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
-  };
-  optimizer.Optimize(flr, flr->env(),
-                     /*device=*/nullptr, &optimized_graph,
-                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
-                     cf_consider_fn);
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_opt_", func_name),
-        *optimized_graph, fld);
-  }
-  // Some inlined functions might have Switch/Merge nodes.
-  for (Node* n : optimized_graph->nodes()) {
-    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
-      has_switch_or_merge = true;
-      break;
-    }
-  }
-
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
   // might involve node deletion/addition. Avoid modifying nodes while iterating
   // it.
   std::vector<std::pair<Node*, std::vector<AssociatedFunctionInfo>>>
       nodes_to_associated_functions;
-  for (auto* n : optimized_graph->nodes()) {
+  for (auto* n : g->nodes()) {
     auto associated_functions = GetAssociatedFunctions(*n, flr);
     if (!associated_functions.empty()) {
       nodes_to_associated_functions.push_back({n, associated_functions});
@@ -215,7 +171,7 @@ Status FunctionalizeControlFlowForFunction(
         // pointer. That's fine because in that case, associated_functions will
         // only have one member and the loop will only run once.
         TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-            optimized_graph.get(), n, fld, associated_function, new_name));
+            g, n, fld, associated_function, new_name));
       }
     }
   }
@@ -227,21 +183,21 @@ Status FunctionalizeControlFlowForFunction(
     if (VLOG_IS_ON(4)) {
       dump_graph::DumpGraphToFile(
           absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-          *optimized_graph, fld);
+          *g, fld);
     }
-    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(g, fld));
     if (VLOG_IS_ON(4)) {
       dump_graph::DumpGraphToFile(
-          absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-          *optimized_graph, fld);
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name), *g,
+          fld);
     }
   }
 
   if (*modified) {
     // Add rewritten FunctionDef into library.
     FunctionDef functionalized_fdef;
-    TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                          &functionalized_fdef));
+    TF_RETURN_IF_ERROR(
+        GraphToFunctionDef(*g, new_func_name, &functionalized_fdef));
     if (func_name == new_func_name) {
       VLOG(2) << "Replacing function " << func_name;
       TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index db137f1a19..e81e61b633 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -466,23 +466,23 @@ Graph* GetConstantGraph(
 bool ReplaceTensorWithConstant(
     Graph* graph, Device* partition_device, NodeAndOutput tensor,
     const Tensor& constant, const gtl::FlatSet<Node*>& control_deps,
-    int64 max_constant_size_in_bytes, bool disable_memory_output_type_check,
+    int64 max_constant_size_in_bytes,
     const ConstantFoldNameGenerator& generate_new_name) {
   // Be conservative when replacing a tensor with a constant, when not
   // running on CPU.
   // 1) Do not replace another constant.
   // 2) If the destination tensor is not an int32 tensor, and has HOST_MEMORY
   // constraint, do not replace it.
-  // 3) If the size of the constant in bytes is too large (>
+  // 3) If the destination tensor is an int32 tensor, and has DEVICE_MEMORY
+  // constraint, do not replace it.
+  // 4) If the size of the constant in bytes is too large (>
   // max_constant_in_bytes), do not replace it. This prevents the size of the
   // Graph from growing too large.
-  // 4) If the constant op created does not have a kernel implementation
+  // 5) If the constant op created does not have a kernel implementation
   // for the device, do not use it.
   // TODO(keveman): Consider adding a new constant op that has a kernel
   // implementation for all types, but with HostMemory constraint on it's
   // output.
-  // 5) If the constant op for the device has different output memory type
-  // from the original op output memory type, do not replace it.
   if (tensor.first->IsConstant()) {
     return false;
   }
@@ -497,7 +497,8 @@ bool ReplaceTensorWithConstant(
       return false;
     }
     bool is_int32 = tensor.first->output_type(tensor.second) == DT_INT32;
-    if (memory_type == HOST_MEMORY && !is_int32) {
+    if ((memory_type == HOST_MEMORY && !is_int32) ||
+        (memory_type == DEVICE_MEMORY && is_int32)) {
       return false;
     }
   }
@@ -535,25 +536,6 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
-  if (!disable_memory_output_type_check) {
-    if (partition_device && device_type != DEVICE_CPU) {
-      MemoryType original_output_memory_type;
-      if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
-                               &original_output_memory_type)
-               .ok()) {
-        return false;
-      }
-      MemoryType const_output_memory_type;
-      if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
-                               &const_output_memory_type)
-               .ok()) {
-        return false;
-      }
-      if (original_output_memory_type != const_output_memory_type) {
-        return false;
-      }
-    }
-  }
   for (auto edge : edges_to_remove) {
     graph->AddEdge(constant_node, 0, edge->dst(), edge->dst_input());
     graph->RemoveEdge(edge);
@@ -660,8 +642,7 @@ Status ConstantFold(const ConstantFoldingOptions& opts,
         constant_control_deps[tensors_to_replace[c].first];
     if (ReplaceTensorWithConstant(
             graph, partition_device, tensors_to_replace[c], outputs[c],
-            control_deps, opts.max_constant_size_in_bytes,
-            opts.disable_memory_output_type_check, generate_new_name)) {
+            control_deps, opts.max_constant_size_in_bytes, generate_new_name)) {
       ++num_nodes_replaced;
     }
   }
diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h
index 4c71b7bd27..a9a84f761b 100644
--- a/tensorflow/core/common_runtime/constant_folding.h
+++ b/tensorflow/core/common_runtime/constant_folding.h
@@ -45,10 +45,6 @@ struct ConstantFoldingOptions {
   // optimization.
   int64 max_constant_size_in_bytes = 10 * 1024 * 1024;
 
-  // If disable_memory_output_type_check is true, we will disable output memory
-  // type check for constant node replacement.
-  bool disable_memory_output_type_check = false;
-
   // A generator for the name suffix of constant folded nodes. A
   // default id generator that monotonically increases is used if nullptr is
   // passed.
diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc
index 91194bc86f..37a979a8f1 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.cc
+++ b/tensorflow/core/common_runtime/graph_optimizer.cc
@@ -39,8 +39,7 @@ void GraphOptimizer::Optimize(
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
     const std::function<bool(const Node*)>& cse_consider_fn,
-    const std::function<bool(const Node*)>& cf_consider_fn,
-    bool cf_disable_memory_output_type_check) {
+    const std::function<bool(const Node*)>& cf_consider_fn) {
   Graph* g = graph->get();
   DumpGraph("Initial", g);
 
@@ -65,8 +64,6 @@ void GraphOptimizer::Optimize(
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = shape_map;
       cf_opts.consider = cf_consider_fn;
-      cf_opts.disable_memory_output_type_check =
-          cf_disable_memory_output_type_check;
       if (opts_.max_folded_constant_in_bytes() > 0) {
         cf_opts.max_constant_size_in_bytes =
             opts_.max_folded_constant_in_bytes();
diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h
index 8954e9612d..789cc56942 100644
--- a/tensorflow/core/common_runtime/graph_optimizer.h
+++ b/tensorflow/core/common_runtime/graph_optimizer.h
@@ -47,16 +47,13 @@ class GraphOptimizer {
   // returns true will be considered for CSE.
   // If cf_consider_fn is not null then only nodes for which cf_consider_fn
   // returns true will be considered for CF.
-  // If cf_disable_memory_output_type_check is true, CF will discard output
-  // memory type check for constant node replacement.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
       const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
-      const std::function<bool(const Node*)>& cf_consider_fn = nullptr,
-      bool cf_disable_memory_output_type_check = false);
+      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
 
   const OptimizerOptions& options() { return opts_; }
 
-- 
GitLab


From c220d6c53fa6debf0fd0baaa0b1915fc1d716b07 Mon Sep 17 00:00:00 2001
From: Akshay Modi <akshaym@users.noreply.github.com>
Date: Fri, 5 Oct 2018 12:28:51 -0700
Subject: [PATCH 0436/1085] Add a separator between shape and dtype in cache
 key encoding. (#22742)

It was possible that we could mix shapes and types (T111 could mean a tensor of dtype 1 and shape (1, 1) or a tensor of dtype 11 and shape (1)).

PiperOrigin-RevId: 215777629
---
 tensorflow/python/eager/function_test.py  | 44 +++++++++++++++++++++--
 tensorflow/python/eager/pywrap_tfe_src.cc | 34 +++++++++---------
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 9ce367a837..a2cfb4b476 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1255,6 +1255,44 @@ class FunctionTest(test.TestCase):
     defined(Foo())
     self.assertEqual(len(defined._function_cache), 2)
 
+  def testCacheTensorShapeDtypeCollision(self):
+
+    def func(t):
+      return t + t
+
+    defined = function.defun(func)
+    t = constant_op.constant([[1.0]], dtype=dtypes.complex64)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 1)
+
+    t = constant_op.constant([1.0], dtype=dtypes.complex128)
+    defined(t)
+    self.assertEqual(len(defined._function_cache), 2)
+
+  def testCacheTensorUnknownShapesCollision(self):
+
+    def func(t):
+      return t + t
+
+    with context.graph_mode(), self.cached_session():
+      defined = function.defun(func)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=None)
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 1)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 2)
+
+      p = array_ops.placeholder(dtype=dtypes.float32, shape=[None, None])
+      defined(p)
+      self.assertEqual(len(defined._function_cache), 3)
+
+      t = constant_op.constant(1.0, dtype=dtypes.float32)
+      defined(t)
+      self.assertEqual(len(defined._function_cache), 4)
+
   def testPythonFunctionWithDefaultArgs(self):
 
     def func(foo, bar=1, baz=2):
@@ -1271,17 +1309,17 @@ class FunctionTest(test.TestCase):
       return tuple(key[0] for key in defined._function_cache)
 
     # `True` corresponds to the fact that we're executing eagerly
-    self.assertIn(('tRRR', (0, 1, 20)), cache_keys())
+    self.assertIn(('URRR', (0, 1, 20)), cache_keys())
 
     defined(1)  # bar=1, baz=2
-    self.assertIn(('tRRR', (1, 1, 2)), cache_keys())
+    self.assertIn(('URRR', (1, 1, 2)), cache_keys())
 
     # This matches the previous call.
     defined(foo=1)
     self.assertEqual(len(defined._function_cache), 2)
 
     defined(1, 2, 3)
-    self.assertIn(('tRRR', (1, 2, 3)), cache_keys())
+    self.assertIn(('URRR', (1, 2, 3)), cache_keys())
 
     # This matches the previous call.
     defined(1, bar=2, baz=3)
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index ae1e12f9c3..6193f40ce8 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -2747,11 +2747,15 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs,
 }
 
 namespace {
-
-tensorflow::int64 GetPyNoneHash() {
-  tensorflow::int64 py_none_hash = PyObject_Hash(Py_None);
-  return py_none_hash;
-}
+const char kTensor[] = "T";
+const char kIndexedSlices[] = "I";
+const char kList[] = "L";
+const char kTuple[] = "U";
+const char kDict[] = "D";
+const char kRaw[] = "R";
+const char kShape[] = "s";
+const char kDType[] = "d";
+const char kNone[] = "n";
 
 struct EncodeResult {
   string str;
@@ -2784,8 +2788,10 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
     TFE_TensorHandle* t = EagerTensor_Handle(arg);
     tensorflow::TensorShape tensor_shape;
     TF_RETURN_IF_ERROR(t->handle->Shape(&tensor_shape));
-    absl::StrAppend(&result->str, t->handle->dtype);
 
+    absl::StrAppend(&result->str, kDType, t->handle->dtype);
+
+    absl::StrAppend(&result->str, kShape);
     for (tensorflow::int64 dim_size : tensor_shape.dim_sizes()) {
       absl::StrAppend(&result->str, dim_size);
     }
@@ -2812,7 +2818,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   tensorflow::DataType dtype =
       static_cast<tensorflow::DataType>(MakeInt(dtype_enum.get()));
 
-  absl::StrAppend(&result->str, dtype);
+  absl::StrAppend(&result->str, kDType, dtype);
   static char _shape_tuple[] = "_shape_tuple";
   tensorflow::Safe_PyObjectPtr shape_tuple(
       PyObject_CallMethod(arg, _shape_tuple, nullptr));
@@ -2824,10 +2830,11 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
 
   if (shape_tuple.get() == Py_None) {
     // Unknown shape, encode that directly.
-    absl::StrAppend(&result->str, GetPyNoneHash());
+    absl::StrAppend(&result->str, kNone);
     return tensorflow::Status::OK();
   }
 
+  absl::StrAppend(&result->str, kShape);
   tensorflow::Safe_PyObjectPtr shape_seq(PySequence_Fast(
       shape_tuple.get(), "shape_tuple didn't return a sequence"));
 
@@ -2835,7 +2842,7 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(shape_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       absl::StrAppend(&result->str, MakeInt(item));
     }
@@ -2844,13 +2851,6 @@ tensorflow::Status TFE_Py_EncodeTensor(PyObject* arg, EncodeResult* result) {
   return tensorflow::Status::OK();
 }
 
-const char kTensor[] = "T";
-const char kIndexedSlices[] = "I";
-const char kList[] = "L";
-const char kTuple[] = "t";
-const char kDict[] = "D";
-const char kRaw[] = "R";
-
 tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, EncodeResult* result);
 
 // This function doesn't set the type of sequence before
@@ -2864,7 +2864,7 @@ tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
   for (int i = 0; i < len; ++i) {
     PyObject* item = PySequence_Fast_GET_ITEM(arg_seq.get(), i);
     if (item == Py_None) {
-      absl::StrAppend(&result->str, GetPyNoneHash());
+      absl::StrAppend(&result->str, kNone);
     } else {
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelper(item, result));
     }
-- 
GitLab


From 58845f229be9b5ba2e1e36150bff5ba7a85920d8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:25:22 -0700
Subject: [PATCH 0437/1085] Profiler collects the number of replicas and num
 cores per replica used in the model.

PiperOrigin-RevId: 215947354
---
 tensorflow/contrib/tpu/profiler/tf_op_stats.proto | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
index f88dc51636..1e66801efd 100644
--- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
+++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto
@@ -168,6 +168,12 @@ message RunEnvironmentResult {
   optional HostIndependentJobInfoResult host_independent_job_info = 5;
   // Host-dependent job information.
   repeated HostDependentJobInfoResult host_dependent_job_info = 6;
+  // The number of replicas, corresponds to input parallelism.
+  // If there is no model parallelism, replica_count = tpu_core_count
+  optional int32 replica_count = 7;
+  // The number of cores used for a single replica, e.g. model parallelism.
+  // If there is no model parallelism, then num_cores_per_replica = 1
+  optional int32 num_cores_per_replica = 8;
 }
 
 // The types of host operations that are tracked.
-- 
GitLab


From 6919ab5787e6384d709adf051dc1ce99236b76bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:33:55 -0700
Subject: [PATCH 0438/1085] Convert TensorFlow's jpeg dependency to new third
 party import method.

PiperOrigin-RevId: 215948571
---
 tensorflow/workspace.bzl                        | 14 ++------------
 third_party/jpeg/BUILD                          |  2 +-
 third_party/jpeg/{jpeg.BUILD => BUILD.bazel}    | 11 ++++++-----
 .../jpeg.BUILD => jpeg/BUILD.system}            |  0
 third_party/jpeg/jpeg_helpers.BUILD.bazel       |  1 +
 third_party/jpeg/workspace.bzl                  | 17 +++++++++++++++++
 6 files changed, 27 insertions(+), 18 deletions(-)
 rename third_party/jpeg/{jpeg.BUILD => BUILD.bazel} (99%)
 rename third_party/{systemlibs/jpeg.BUILD => jpeg/BUILD.system} (100%)
 create mode 100644 third_party/jpeg/jpeg_helpers.BUILD.bazel
 create mode 100644 third_party/jpeg/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8df41f96b8..b9ced1bd6c 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -22,10 +22,12 @@ load(
 )
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
+load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 
 def initialize_third_party():
     flatbuffers()
     icu()
+    jpeg()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
@@ -246,18 +248,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "jpeg",
-        build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
-        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
-        strip_prefix = "libjpeg-turbo-2.0.0",
-        system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
-            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
-        ],
-    )
-
     tf_http_archive(
         name = "png_archive",
         build_file = clean_dep("//third_party:png.BUILD"),
diff --git a/third_party/jpeg/BUILD b/third_party/jpeg/BUILD
index 5b01f6e3e4..e3aec1fce9 100644
--- a/third_party/jpeg/BUILD
+++ b/third_party/jpeg/BUILD
@@ -1 +1 @@
-licenses(["notice"])
+# Needed to make this a package.
diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/BUILD.bazel
similarity index 99%
rename from third_party/jpeg/jpeg.BUILD
rename to third_party/jpeg/BUILD.bazel
index 1b9b9bf2f5..5243e995a3 100644
--- a/third_party/jpeg/jpeg.BUILD
+++ b/third_party/jpeg/BUILD.bazel
@@ -162,9 +162,9 @@ cc_library(
     hdrs = [
         "simd/powerpc/jccolext-altivec.c",
         "simd/powerpc/jcgryext-altivec.c",
+        "simd/powerpc/jcsample.h",
         "simd/powerpc/jdcolext-altivec.c",
         "simd/powerpc/jdmrgext-altivec.c",
-        "simd/powerpc/jcsample.h",
         "simd/powerpc/jsimd_altivec.h",
     ],
     copts = libjpegturbo_copts,
@@ -186,7 +186,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.o",
         "simd/x86_64/jccolor-sse2.o",
         "simd/x86_64/jcgray-avx2.o",
@@ -213,6 +212,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.o",
         "simd/x86_64/jquanti-avx2.o",
         "simd/x86_64/jquanti-sse2.o",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.o",
     ],
     copts = libjpegturbo_copts,
@@ -322,9 +322,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm/jsimd.c",
         "simd/arm/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -343,9 +343,9 @@ cc_library(
         "jpeglib.h",
         "jsimd.h",
         "jsimddct.h",
-        "simd/jsimd.h",
         "simd/arm64/jsimd.c",
         "simd/arm64/jsimd_neon.S",
+        "simd/jsimd.h",
     ],
     copts = libjpegturbo_copts,
     nocopts = libjpegturbo_nocopts,
@@ -366,7 +366,6 @@ cc_library(
         "jsimd.h",
         "jsimddct.h",
         "simd/jsimd.h",
-        "simd/x86_64/jsimd.c",
         "simd/x86_64/jccolor-avx2.obj",
         "simd/x86_64/jccolor-sse2.obj",
         "simd/x86_64/jcgray-avx2.obj",
@@ -393,6 +392,7 @@ cc_library(
         "simd/x86_64/jquantf-sse2.obj",
         "simd/x86_64/jquanti-avx2.obj",
         "simd/x86_64/jquanti-sse2.obj",
+        "simd/x86_64/jsimd.c",
         "simd/x86_64/jsimdcpu.obj",
     ],
     copts = libjpegturbo_copts,
@@ -603,6 +603,7 @@ JCONFIGINT_WIN_SUBSTITUTIONS = {
 }
 
 JCONFIGINT_NOWIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
+
 JCONFIGINT_WIN_SUBSTITUTIONS.update(JCONFIGINT_COMMON_SUBSTITUTIONS)
 
 template_rule(
diff --git a/third_party/systemlibs/jpeg.BUILD b/third_party/jpeg/BUILD.system
similarity index 100%
rename from third_party/systemlibs/jpeg.BUILD
rename to third_party/jpeg/BUILD.system
diff --git a/third_party/jpeg/jpeg_helpers.BUILD.bazel b/third_party/jpeg/jpeg_helpers.BUILD.bazel
new file mode 100644
index 0000000000..5b01f6e3e4
--- /dev/null
+++ b/third_party/jpeg/jpeg_helpers.BUILD.bazel
@@ -0,0 +1 @@
+licenses(["notice"])
diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
new file mode 100644
index 0000000000..4b517240ec
--- /dev/null
+++ b/third_party/jpeg/workspace.bzl
@@ -0,0 +1,17 @@
+"""loads the jpeg library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "jpeg",
+        urls = [
+            "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+        ],
+        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
+        strip_prefix = "libjpeg-turbo-2.0.0",
+        build_file = "//third_party/jpeg:BUILD.bazel",
+        # build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
+        system_build_file = "//third_party/jpeg:BUILD.system",
+    )
-- 
GitLab


From 5ad138bf188204163b97eb605e89d8c503993529 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Fri, 5 Oct 2018 12:44:12 -0700
Subject: [PATCH 0439/1085] Do 2 warmup runs in
 assert_no_new_pyobjects_executing_eagerly. (#22776)

PiperOrigin-RevId: 215944829
---
 tensorflow/python/framework/test_util.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 6673bc5561..aa8ecf8161 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -506,9 +506,9 @@ def disable_control_flow_v2(unused_msg):
 def assert_no_new_pyobjects_executing_eagerly(f):
   """Decorator for asserting that no new Python objects persist after a test.
 
-  Runs the test multiple times executing eagerly, first as a warmup and then
-  several times to let objects accumulate. The warmup helps ignore caches which
-  do not grow as the test is run repeatedly.
+  Runs the test multiple times executing eagerly, first as a warmup and then to
+  let objects accumulate. The warmup helps ignore caches which do not grow as
+  the test is run repeatedly.
 
   Useful for checking that there are no missing Py_DECREFs in the C exercised by
   a bit of Python.
@@ -518,7 +518,14 @@ def assert_no_new_pyobjects_executing_eagerly(f):
     """Warms up, gets an object count, runs the test, checks for new objects."""
     with context.eager_mode():
       gc.disable()
-      f(self, **kwargs)
+      # Run the test 2 times as warmup, in an attempt to fill up caches, which
+      # should not grow as the test is run repeatedly below.
+      #
+      # TODO(b/117156879): Running warmup twice is black magic; we have seen
+      # tests that fail with 1 warmup run, and pass with 2, on various versions
+      # of python2.7.x.
+      for _ in range(2):
+        f(self, **kwargs)
       gc.collect()
       previous_count = len(gc.get_objects())
       if ops.has_default_graph():
-- 
GitLab


From ef838969b95de39353a3ba495c335cbb14a0c9b5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:44:45 -0700
Subject: [PATCH 0440/1085] Brings V2 Optimizers into Keras w/ Keras signatures

PiperOrigin-RevId: 215950207
---
 .../contrib/distribute/python/combinations.py |   16 +-
 .../distribute/python/minimize_loss_test.py   |    5 -
 tensorflow/contrib/optimizer_v2/BUILD         |   11 +-
 tensorflow/contrib/optimizer_v2/adadelta.py   |   75 +-
 tensorflow/contrib/optimizer_v2/adagrad.py    |   79 +-
 .../contrib/optimizer_v2/adagrad_test.py      |    3 -
 tensorflow/contrib/optimizer_v2/adam.py       |  129 +-
 .../optimizer_v2/checkpointable_utils_test.py |   68 +-
 .../contrib/optimizer_v2/gradient_descent.py  |   40 +-
 tensorflow/contrib/optimizer_v2/momentum.py   |   69 +-
 .../contrib/optimizer_v2/optimizer_v2.py      | 1205 +--------------
 tensorflow/contrib/optimizer_v2/rmsprop.py    |  154 +-
 tensorflow/python/keras/BUILD                 |  155 ++
 .../python/keras/optimizer_v2/adadelta.py     |  116 ++
 .../keras/optimizer_v2/adadelta_test.py       |  166 ++
 .../python/keras/optimizer_v2/adagrad.py      |  119 ++
 .../python/keras/optimizer_v2/adagrad_test.py |  276 ++++
 tensorflow/python/keras/optimizer_v2/adam.py  |  203 +++
 .../python/keras/optimizer_v2/adam_test.py    |  333 ++++
 .../optimizer_v2/checkpointable_utils_test.py |  761 ++++++++++
 .../python/keras/optimizer_v2/optimizer_v2.py | 1349 +++++++++++++++++
 .../keras/optimizer_v2/optimizer_v2_test.py   |  277 ++++
 .../python/keras/optimizer_v2/rmsprop.py      |  239 +++
 .../python/keras/optimizer_v2/rmsprop_test.py |  444 ++++++
 tensorflow/python/keras/optimizer_v2/sgd.py   |  170 +++
 .../python/keras/optimizer_v2/sgd_test.py     |  759 ++++++++++
 26 files changed, 5487 insertions(+), 1734 deletions(-)
 create mode 100644 tensorflow/python/keras/optimizer_v2/adadelta.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adadelta_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adagrad.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adagrad_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adam.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/adam_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/optimizer_v2.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/rmsprop.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/rmsprop_test.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/sgd.py
 create mode 100644 tensorflow/python/keras/optimizer_v2/sgd_test.py

diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index cff4b0a463..63a163e76c 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -349,26 +349,26 @@ mirrored_strategy_with_two_gpus = NamedDistribution(
     required_gpus=2)
 
 
-adam_optimizer_v1_fn = NamedObject(
-    "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
 gradient_descent_optimizer_v1_fn = NamedObject(
     "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2))
 adagrad_optimizer_v1_fn = NamedObject(
     "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
+adam_optimizer_v1_fn = NamedObject("AdamV1",
+                                   lambda: adam.AdamOptimizer(0.001, epsilon=1))
 rmsprop_optimizer_v1_fn = NamedObject(
     "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))
-optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn,
-                 adagrad_optimizer_v1_fn]
 
-adam_optimizer_v2_fn = NamedObject(
-    "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))
+optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]
+
 gradient_descent_optimizer_v2_fn = NamedObject(
     "GradientDescentV2",
     lambda: gradient_descent_v2.GradientDescentOptimizer(0.2))
 adagrad_optimizer_v2_fn = NamedObject(
     "AdagradV2", lambda: adagrad_v2.AdagradOptimizer(0.001))
-optimizers_v2 = [adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn,
-                 adagrad_optimizer_v2_fn]
+adam_optimizer_v2_fn = NamedObject(
+    "AdamV2", lambda: adam_v2.AdamOptimizer(0.001, epsilon=1))
+
+optimizers_v2 = [gradient_descent_optimizer_v2_fn, adagrad_optimizer_v2_fn]
 
 graph_and_eager_modes = ["graph", "eager"]
 
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index ba147e7824..60e134055f 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -179,11 +179,6 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def get_expected_variables(optimizer_fn, num_parameter_devices):
         variables_map = {
             "GradientDescent": ["dense/kernel", "dense/bias"],
-            "Adam": [
-                "dense/kernel", "dense/bias", "beta1_power", "beta2_power",
-                "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam",
-                "dense/bias/Adam_1"
-            ],
             "Adagrad": [
                 "dense/kernel/Adagrad", "dense/kernel",
                 "dense/bias/Adagrad", "dense/bias"
diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD
index 3ba3ee29ec..2cf445a85e 100644
--- a/tensorflow/contrib/optimizer_v2/BUILD
+++ b/tensorflow/contrib/optimizer_v2/BUILD
@@ -47,15 +47,8 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:distribute",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
+        "//tensorflow/python:util",
+        "//tensorflow/python/keras:optimizer_v2",
     ],
 )
 
diff --git a/tensorflow/contrib/optimizer_v2/adadelta.py b/tensorflow/contrib/optimizer_v2/adadelta.py
index b206f9f61b..9d73bddd1c 100644
--- a/tensorflow/contrib/optimizer_v2/adadelta.py
+++ b/tensorflow/contrib/optimizer_v2/adadelta.py
@@ -18,17 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adadelta
+from tensorflow.python.util import deprecation
 
 
-class AdadeltaOptimizer(optimizer_v2.OptimizerV2):
+class AdadeltaOptimizer(adadelta.Adadelta):
   """Optimizer that implements the Adadelta algorithm.
 
   See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
   ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,
                use_locking=False, name="Adadelta"):
     """Construct a new Adadelta optimizer.
@@ -48,66 +52,5 @@ class AdadeltaOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients.  Defaults to "Adadelta".
     """
-    super(AdadeltaOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("rho", rho)
-    self._set_hyper("epsilon", epsilon)
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      state.zeros_slot(v, "accum")
-      state.zeros_slot(v, "accum_update")
-
-  def _apply_dense(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.apply_adadelta(
-        var,
-        accum,
-        accum_update,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _resource_apply_dense(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.resource_apply_adadelta(
-        var.handle,
-        accum.handle,
-        accum_update.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.sparse_apply_adadelta(
-        var,
-        accum,
-        accum_update,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    accum = state.get_slot(var, "accum")
-    accum_update = state.get_slot(var, "accum_update")
-    return training_ops.resource_sparse_apply_adadelta(
-        var.handle,
-        accum.handle,
-        accum_update.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("rho", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad,
-        indices,
-        use_locking=self._use_locking)
+    super(AdadeltaOptimizer, self).__init__(
+        learning_rate=learning_rate, rho=rho, epsilon=epsilon, name=name)
diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py
index dab1e02716..716361e29c 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad.py
@@ -18,15 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.util import deprecation
 
 
-class AdagradOptimizer(optimizer_v2.OptimizerV2):
+class AdagradOptimizer(adagrad.Adagrad):
   """Optimizer that implements the Adagrad algorithm.
 
   See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
@@ -34,6 +30,10 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
   [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, initial_accumulator_value=0.1,
                use_locking=False, name="Adagrad"):
     """Construct a new Adagrad optimizer.
@@ -54,64 +54,7 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2):
     Raises:
       ValueError: If the `initial_accumulator_value` is invalid.
     """
-    if initial_accumulator_value <= 0.0:
-      raise ValueError("initial_accumulator_value must be positive: %s" %
-                       initial_accumulator_value)
-    super(AdagradOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-
-    self._initial_accumulator_value = initial_accumulator_value
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      dtype = v.dtype.base_dtype
-      if v.get_shape().is_fully_defined():
-        init = init_ops.constant_initializer(self._initial_accumulator_value,
-                                             dtype=dtype)
-      else:
-        def init(v=v, dtype=dtype):
-          # Use a Tensor instead of initializer if variable does not have
-          # static shape.
-          init_constant = gen_array_ops.fill(array_ops.shape(v),
-                                             self._initial_accumulator_value)
-          return math_ops.cast(init_constant, dtype)
-      state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
-                                         "accumulator")
-
-  def _apply_dense(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.apply_adagrad(
-        var,
-        acc,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _resource_apply_dense(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.resource_apply_adagrad(
-        var.handle,
-        acc.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.sparse_apply_adagrad(
-        var,
-        acc,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    acc = state.get_slot(var, "accumulator")
-    return training_ops.resource_sparse_apply_adagrad(
-        var.handle,
-        acc.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        indices,
-        use_locking=self._use_locking)
+    super(AdagradOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        initial_accumulator_value=initial_accumulator_value,
+        name=name)
diff --git a/tensorflow/contrib/optimizer_v2/adagrad_test.py b/tensorflow/contrib/optimizer_v2/adagrad_test.py
index debaaaeeba..320e41567f 100644
--- a/tensorflow/contrib/optimizer_v2/adagrad_test.py
+++ b/tensorflow/contrib/optimizer_v2/adagrad_test.py
@@ -68,9 +68,6 @@ class AdagradOptimizerTest(test.TestCase):
   def testBasicResource(self):
     self.doTestBasic(use_locking=False, use_resource=True)
 
-  def testBasicLocked(self):
-    self.doTestBasic(use_locking=True)
-
   def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
       with self.cached_session():
diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py
index 04b1552b61..363e020757 100644
--- a/tensorflow/contrib/optimizer_v2/adam.py
+++ b/tensorflow/contrib/optimizer_v2/adam.py
@@ -18,22 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.util import deprecation
 
 
-class AdamOptimizer(optimizer_v2.OptimizerV2):
+class AdamOptimizer(adam.Adam):
   """Optimizer that implements the Adam algorithm.
 
   See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
   ([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8,
                use_locking=False, name="Adam"):
     """Construct a new Adam optimizer.
@@ -87,111 +86,9 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name for the operations created when applying gradients.
         Defaults to "Adam".
     """
-    super(AdamOptimizer, self).__init__(use_locking, name)
-
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("beta1", beta1)
-    self._set_hyper("beta2", beta2)
-    self._set_hyper("epsilon", epsilon)
-
-  def _get_beta_accumulators(self, state=None):
-    if state is None:
-      state = self._get_per_graph_state()
-    return (state.get_non_slot("beta1_power"),
-            state.get_non_slot("beta2_power"))
-
-  def _create_vars(self, var_list, state):
-    # Non-slot variables end up on the same device(s).
-    state.create_non_slot(initial_value=lambda: state.get_hyper("beta1"),
-                          name="beta1_power")
-    state.create_non_slot(initial_value=lambda: state.get_hyper("beta2"),
-                          name="beta2_power")
-
-    # Create slots for the first and second moments.
-    for v in var_list:
-      state.zeros_slot(v, "m")
-      state.zeros_slot(v, "v")
-
-  def _apply_dense(self, grad, var, state):
-    m = state.get_slot(var, "m")
-    v = state.get_slot(var, "v")
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    return training_ops.apply_adam(
-        var, m, v,
-        math_ops.cast(beta1_power, var.dtype.base_dtype),
-        math_ops.cast(beta2_power, var.dtype.base_dtype),
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        state.get_hyper("beta1", var.dtype.base_dtype),
-        state.get_hyper("beta2", var.dtype.base_dtype),
-        state.get_hyper("epsilon", var.dtype.base_dtype),
-        grad, use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    m = state.get_slot(var, "m")
-    v = state.get_slot(var, "v")
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    return training_ops.resource_apply_adam(
-        var.handle, m.handle, v.handle,
-        math_ops.cast(beta1_power, grad.dtype.base_dtype),
-        math_ops.cast(beta2_power, grad.dtype.base_dtype),
-        state.get_hyper("learning_rate", grad.dtype.base_dtype),
-        state.get_hyper("beta1", grad.dtype.base_dtype),
-        state.get_hyper("beta2", grad.dtype.base_dtype),
-        state.get_hyper("epsilon", grad.dtype.base_dtype),
-        grad, use_locking=self._use_locking)
-
-  def _apply_sparse_shared(self, grad, var, indices, scatter_add, state):
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
-    beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
-    lr_t = state.get_hyper("learning_rate", var.dtype.base_dtype)
-    beta1_t = state.get_hyper("beta1", var.dtype.base_dtype)
-    beta2_t = state.get_hyper("beta2", var.dtype.base_dtype)
-    epsilon_t = state.get_hyper("epsilon", var.dtype.base_dtype)
-    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
-    # m_t = beta1 * m + (1 - beta1) * g_t
-    m = state.get_slot(var, "m")
-    m_scaled_g_values = grad * (1 - beta1_t)
-    m_t = state_ops.assign(m, m * beta1_t,
-                           use_locking=self._use_locking)
-    with ops.control_dependencies([m_t]):
-      m_t = scatter_add(m, indices, m_scaled_g_values)
-    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
-    v = state.get_slot(var, "v")
-    v_scaled_g_values = (grad * grad) * (1 - beta2_t)
-    v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
-    with ops.control_dependencies([v_t]):
-      v_t = scatter_add(v, indices, v_scaled_g_values)
-    v_sqrt = math_ops.sqrt(v_t)
-    var_update = state_ops.assign_sub(var,
-                                      lr * m_t / (v_sqrt + epsilon_t),
-                                      use_locking=self._use_locking)
-    return control_flow_ops.group(*[var_update, m_t, v_t])
-
-  def _apply_sparse(self, grad, var, state):
-    return self._apply_sparse_shared(
-        grad.values, var, grad.indices,
-        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
-            x, i, v, use_locking=self._use_locking),
-        state)
-
-  def _resource_scatter_add(self, x, i, v):
-    with ops.control_dependencies(
-        [resource_variable_ops.resource_scatter_add(
-            x.handle, i, v)]):
-      return x.value()
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    return self._apply_sparse_shared(
-        grad, var, indices, self._resource_scatter_add, state)
-
-  def _finish(self, state):
-    # Update the power accumulators.
-    beta1_power, beta2_power = self._get_beta_accumulators(state)
-    update_beta1 = beta1_power.assign(
-        beta1_power * state.get_hyper("beta1"),
-        use_locking=self._use_locking)
-    update_beta2 = beta2_power.assign(
-        beta2_power * state.get_hyper("beta2"),
-        use_locking=self._use_locking)
-    return control_flow_ops.group(update_beta1, update_beta2)
+    super(AdamOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        beta_1=beta1,
+        beta_2=beta2,
+        epsilon=epsilon,
+        name=name)
diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
index e13b82d1d2..3c68ef995a 100644
--- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
@@ -130,8 +130,8 @@ class CheckpointingTests(test.TestCase):
         # non-Layer dependency of the model
         "model/_non_layer/a_variable",
         # The optimizer creates two non-slot variables
-        "optimizer/beta1_power",
-        "optimizer/beta2_power",
+        "optimizer/beta_1_power",
+        "optimizer/beta_2_power",
         # Slot variables
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
@@ -161,21 +161,20 @@ class CheckpointingTests(test.TestCase):
         "my_model/dense/kernel",
         named_variables["model/_named_dense/kernel" + suffix].full_name)
     self.assertEqual(
-        "beta1_power",
-        named_variables["optimizer/beta1_power" + suffix].full_name)
+        "beta_1_power",
+        named_variables["optimizer/beta_1_power" + suffix].full_name)
     self.assertEqual(
-        "beta2_power",
-        named_variables["optimizer/beta2_power" + suffix].full_name)
+        "beta_2_power",
+        named_variables["optimizer/beta_2_power" + suffix].full_name)
     # Spot check the generated protocol buffers.
     self.assertEqual("optimizer",
                      serialized_graph.nodes[0].children[1].local_name)
     optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
         1].node_id]
-    self.assertEqual("beta1_power",
-                     optimizer_node.children[0].local_name)
-    self.assertEqual("beta1_power",
-                     serialized_graph.nodes[optimizer_node.children[0].node_id]
-                     .attributes[0].full_name)
+    self.assertEqual("beta_1_power", optimizer_node.children[0].local_name)
+    self.assertEqual(
+        "beta_1_power", serialized_graph.nodes[
+            optimizer_node.children[0].node_id].attributes[0].full_name)
     self.assertEqual(
         "my_model/dense/kernel",
         serialized_graph.nodes[optimizer_node.slot_variables[0]
@@ -241,9 +240,10 @@ class CheckpointingTests(test.TestCase):
     on_create_model = MyModel()
     on_create_optimizer = adam.AdamOptimizer(
         0.001,
-        # Preserve beta1_power and beta2_power when appying gradients so we can
-        # test that they've been restored correctly.
-        beta1=1.0, beta2=1.0)
+        # Preserve beta_1_power and beta_2_power when appying gradients
+        # so we can test that they've been restored correctly.
+        beta1=1.0,
+        beta2=1.0)
     on_create_root = util.Checkpoint(
         optimizer=on_create_optimizer, model=on_create_model)
     # Deferred restoration
@@ -263,9 +263,9 @@ class CheckpointingTests(test.TestCase):
     dummy_var = resource_variable_ops.ResourceVariable([1.])
     on_create_optimizer.minimize(loss=dummy_var.read_value)
     status.assert_consumed()
-    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
-    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
-    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
+    beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
 
   # TODO(allenl): Debug garbage created by this test in python3.
   def testDeferredRestorationUsageEager(self):
@@ -477,7 +477,7 @@ class CheckpointingTests(test.TestCase):
     no_slot_status.run_restore_ops()
     self.assertEqual(12., self.evaluate(new_root.var))
     new_root.optimizer = adam.AdamOptimizer(0.1)
-    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
+    with self.assertRaisesRegexp(AssertionError, "beta_1_power"):
       slot_status.assert_consumed()
     self.assertEqual(12., self.evaluate(new_root.var))
     if context.executing_eagerly():
@@ -556,8 +556,8 @@ class CheckpointingTests(test.TestCase):
         self.evaluate(first_variable.assign([1.]))
         self.evaluate(optimizer.get_slot(
             var=first_variable, name="m").assign([2.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(3.))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(3.))
 
       # Save and load in a second graph
       second_graph = ops.Graph()
@@ -571,29 +571,29 @@ class CheckpointingTests(test.TestCase):
         self.evaluate(second_variable.assign([4.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([5.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(6.))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(6.))
         save_path = second_root_checkpointable.save(checkpoint_prefix)
         self.evaluate(second_variable.assign([7.]))
         self.evaluate(optimizer.get_slot(
             var=second_variable, name="m").assign([8.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
         status = second_root_checkpointable.restore(save_path)
         status.assert_consumed().run_restore_ops()
         self.assertAllEqual([4.], self.evaluate(second_variable))
         self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
             var=second_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
 
       # Check that the first graph is unmolested
       with first_graph.as_default(), first_session.as_default():
         self.assertAllEqual([1.], self.evaluate(first_variable))
         self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
             var=first_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(3., self.evaluate(beta1_power))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta_1_power))
 
 
 class TemplateTests(test.TestCase):
@@ -659,8 +659,8 @@ class CheckpointCompatibilityTests(test.TestCase):
     self.evaluate(model._named_dense.bias.assign([1.]))
     self.evaluate(optimizer.get_slot(
         var=model._named_dense.bias, name="m").assign([2.]))
-    beta1_power, _ = optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(3.))
+    beta_1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(3.))
     return root_checkpointable
 
   def _set_sentinels(self, root_checkpointable):
@@ -669,8 +669,8 @@ class CheckpointCompatibilityTests(test.TestCase):
         root_checkpointable.optimizer.get_slot(
             var=root_checkpointable.model._named_dense.bias, name="m")
         .assign([102.]))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(103.))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(103.))
 
   def _check_sentinels(self, root_checkpointable):
     self.assertAllEqual(
@@ -678,8 +678,8 @@ class CheckpointCompatibilityTests(test.TestCase):
     self.assertAllEqual([2.], self.evaluate(
         root_checkpointable.optimizer.get_slot(
             var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.assertAllEqual(3., self.evaluate(beta1_power))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta_1_power))
 
   def _write_name_based_checkpoint(self):
     checkpoint_directory = self.get_temp_dir()
diff --git a/tensorflow/contrib/optimizer_v2/gradient_descent.py b/tensorflow/contrib/optimizer_v2/gradient_descent.py
index 945c8de559..8bdf408217 100644
--- a/tensorflow/contrib/optimizer_v2/gradient_descent.py
+++ b/tensorflow/contrib/optimizer_v2/gradient_descent.py
@@ -18,15 +18,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.util import deprecation
 
 
-class GradientDescentOptimizer(optimizer_v2.OptimizerV2):
+class GradientDescentOptimizer(sgd.SGD):
   """Optimizer that implements the gradient descent algorithm."""
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, use_locking=False, name="GradientDescent"):
     """Construct a new gradient descent optimizer.
 
@@ -41,29 +43,5 @@ class GradientDescentOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients. Defaults to "GradientDescent".
     """
-    super(GradientDescentOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-
-  def _apply_dense(self, grad, var, state):
-    return training_ops.apply_gradient_descent(
-        var,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, handle, state):
-    lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
-    return training_ops.resource_apply_gradient_descent(
-        handle.handle, lr, grad, use_locking=self._use_locking)
-
-  def _resource_apply_sparse_duplicate_indices(
-      self, grad, handle, indices, state):
-    lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
-    return resource_variable_ops.resource_scatter_add(
-        handle.handle, indices, -grad * lr)
-
-  def _apply_sparse_duplicate_indices(self, grad, var, state):
-    delta = ops.IndexedSlices(
-        grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.indices, grad.dense_shape)
-    return var.scatter_sub(delta, use_locking=self._use_locking)
+    super(GradientDescentOptimizer, self).__init__(
+        learning_rate=learning_rate, name=name)
diff --git a/tensorflow/contrib/optimizer_v2/momentum.py b/tensorflow/contrib/optimizer_v2/momentum.py
index 0a5aadc2d1..0636f7e356 100644
--- a/tensorflow/contrib/optimizer_v2/momentum.py
+++ b/tensorflow/contrib/optimizer_v2/momentum.py
@@ -18,11 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.training import training_ops
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.util import deprecation
 
 
-class MomentumOptimizer(optimizer_v2.OptimizerV2):
+class MomentumOptimizer(sgd.SGD):
   """Optimizer that implements the Momentum algorithm.
 
   Computes (if `use_nesterov = False`):
@@ -39,6 +39,10 @@ class MomentumOptimizer(optimizer_v2.OptimizerV2):
   when that part of the variable was used in the forward pass.
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, learning_rate, momentum,
                use_locking=False, name="Momentum", use_nesterov=False):
     """Construct a new Momentum optimizer.
@@ -68,57 +72,8 @@ class MomentumOptimizer(optimizer_v2.OptimizerV2):
     optimizer functions.
     @end_compatibility
     """
-    super(MomentumOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("momentum", momentum)
-    self._use_nesterov = use_nesterov
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      state.zeros_slot(v, "momentum")
-
-  def _apply_dense(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.apply_momentum(
-        var,
-        mom,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.resource_apply_momentum(
-        var.handle,
-        mom.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov)
-
-  def _apply_sparse(self, grad, var, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.sparse_apply_momentum(
-        var,
-        mom,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad.values,
-        grad.indices,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov).op
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    mom = state.get_slot(var, "momentum")
-    return training_ops.resource_sparse_apply_momentum(
-        var.handle,
-        mom.handle,
-        state.get_hyper("learning_rate", var.dtype.base_dtype),
-        grad,
-        indices,
-        state.get_hyper("momentum", var.dtype.base_dtype),
-        use_locking=self._use_locking,
-        use_nesterov=self._use_nesterov)
+    super(MomentumOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        momentum=momentum,
+        name=name,
+        nesterov=use_nesterov)
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 53e27c08c4..9c98dd93b4 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -20,462 +20,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.util import deprecation
 
-from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gradients
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import distribute as distribute_lib
-from tensorflow.python.training import distribution_strategy_context
-from tensorflow.python.training import optimizer as optimizer_v1
-from tensorflow.python.training import slot_creator
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.util import nest
 
-
-class _OptimizableVariable(object):
-  """Interface for abstracting over variables in the optimizers."""
-
-  @abc.abstractmethod
-  def target(self):
-    """Returns the optimization target for this variable."""
-    raise NotImplementedError("Calling an abstract method.")
-
-  @abc.abstractmethod
-  def update_op(self, optimizer, g, *args):
-    """Returns the update ops for updating the variable."""
-    raise NotImplementedError("Calling an abstract method.")
-
-
-class _RefVariableProcessor(_OptimizableVariable):
-  """Processor for Variable."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v._ref()  # pylint: disable=protected-access
-
-  def update_op(self, optimizer, g, *args):
-    if isinstance(g, ops.Tensor):
-      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
-      if self._v.constraint is not None:
-        with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v.constraint(self._v))
-      else:
-        return update_op
-    else:
-      assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
-                                                "tensor nor IndexedSlices.")
-      if self._v.constraint is not None:
-        raise RuntimeError(
-            "Cannot use a constraint function on a sparse variable.")
-      # pylint: disable=protected-access
-      return optimizer._apply_sparse_duplicate_indices(g, self._v, *args)
-
-
-class _DenseReadResourceVariableProcessor(_OptimizableVariable):
-  """Processor for dense ResourceVariables."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    # pylint: disable=protected-access
-    update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
-    if self._v.constraint is not None:
-      with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
-    else:
-      return update_op
-
-
-class _DenseResourceVariableProcessor(_OptimizableVariable):
-  """Processor for dense ResourceVariables."""
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    # pylint: disable=protected-access
-    if isinstance(g, ops.IndexedSlices):
-      if self._v.constraint is not None:
-        raise RuntimeError(
-            "Cannot use a constraint function on a sparse variable.")
-      return optimizer._resource_apply_sparse_duplicate_indices(
-          g.values, self._v, g.indices, *args)
-    update_op = optimizer._resource_apply_dense(g, self._v, *args)
-    if self._v.constraint is not None:
-      with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
-    else:
-      return update_op
-
-
-class _TensorProcessor(_OptimizableVariable):
-  """Processor for ordinary Tensors.
-
-  Even though a Tensor can't really be updated, sometimes it is useful to
-  compute the gradients with respect to a Tensor using the optimizer. Updating
-  the Tensor is, of course, unsupported.
-  """
-
-  def __init__(self, v):
-    self._v = v
-
-  def target(self):
-    return self._v
-
-  def update_op(self, optimizer, g, *args):
-    raise NotImplementedError("Trying to update a Tensor ", self._v)
-
-
-def _get_processor(v):
-  """The processor of v."""
-  if context.executing_eagerly():
-    if isinstance(v, ops.Tensor):
-      return _TensorProcessor(v)
-    else:
-      return _DenseResourceVariableProcessor(v)
-  if v.op.type == "VarHandleOp":
-    return _DenseResourceVariableProcessor(v)
-  if isinstance(v, variables.Variable):
-    return _RefVariableProcessor(v)
-  if isinstance(v, ops.Tensor):
-    return _TensorProcessor(v)
-  raise NotImplementedError("Trying to optimize unsupported type ", v)
-
-
-def _var_key_v2(var):
-  """Key for representing a primary variable, for looking up slots."""
-  # pylint: disable=protected-access
-  if hasattr(var, "_distributed_container"):
-    distributed_container = var._distributed_container()
-    assert distributed_container is not None
-    if context.executing_eagerly():
-      return distributed_container._unique_id
-    return distributed_container._shared_name
-  if context.executing_eagerly():
-    return var._unique_id
-  return var.op.name
-
-
-def _resolve(value, name):
-  if callable(value):
-    value = value()
-  return ops.convert_to_tensor(value, name=name)
-
-
-def _is_dynamic(value):
-  """Returns true if __init__ arg `value` should be re-evaluated each step."""
-  if callable(value): return True
-  # Don't need to do anything special in graph mode, since dynamic values
-  # will propagate correctly automatically.
-  # TODO(josh11b): Add per-device caching across steps using variables for
-  # truly static values once we add distributed support.
-  if context.executing_eagerly() and isinstance(
-      value, resource_variable_ops.ResourceVariable):
-    return True
-  return False
-
-
-class _OptimizerV2State(object):
-  """Holds per-graph and per-step optimizer state.
-
-  Use _init_with_static_hyper() to create the state for a graph, and then
-  _copy_with_dynamic_hyper() to convert that to state for a particular step.
-  The difference between the two is that the former only has hyper
-  parameter values that are static and the latter also has values that
-  can change every step (according to _is_dynamic()).
-  """
-
-  def __init__(self, op_name):
-    self._op_name = op_name
-
-  def _init_with_static_hyper(self, hyper):
-    """Initialize a fresh state object from hyper dict."""
-    # self._hyper contains a dict from name to a dict with the Tensor values.
-    # This dict starts with a single item with key "None" with the hyper
-    # parameter value converted to a Tensor. Other items have dtype keys
-    # with that Tensor cast to that dtype.
-    with ops.init_scope():
-      self._hyper = {name: {None: ops.convert_to_tensor(value, name=name)}
-                     for name, (dynamic, value) in sorted(hyper.items())
-                     if not dynamic}
-    self._slots = {}
-    self._non_slot_dict = {}
-    # Extra state to help Optimizers implement Checkpointable. Holds information
-    # about variables which will be restored as soon as they're created.
-    self._deferred_dependencies = {}  # Non-slot variables
-    self._deferred_slot_restorations = {}  # Slot variables
-
-  def _copy_with_dynamic_hyper(self, hyper, distribution, non_slot_devices):
-    """Create a new state object for a particular step."""
-    ret = _OptimizerV2State(self._op_name)
-    # pylint: disable=protected-access
-    ret._slots = self._slots
-    ret._non_slot_dict = self._non_slot_dict
-    ret._deferred_dependencies = self._deferred_dependencies
-    ret._deferred_slot_restorations = self._deferred_slot_restorations
-    ret._hyper = {name: {None: _resolve(value, name)}
-                  for name, (dynamic, value) in sorted(hyper.items())
-                  if dynamic}
-    ret._hyper.update(self._hyper)
-    ret._non_slot_devices = non_slot_devices
-    ret._distribution = distribution
-    return ret
-
-  def _variables(self):
-    """Returns a list of all variables held by self."""
-    optimizer_variables = list(self._non_slot_dict.values())
-    for variable_dict in self._slots.values():
-      for slot_for_variable in variable_dict.values():
-        optimizer_variables.append(slot_for_variable)
-    # Sort variables by name so that the return is deterministic.
-    return sorted(optimizer_variables, key=lambda v: v.name)
-
-  def _slot_dict(self, slot_name):
-    """Returns a dict for caching slots created under the given name.
-
-    Args:
-      slot_name: Name for the slot.
-
-    Returns:
-      A dict that maps primary `Variable` objects to the slot created
-      for that variable, under the given slot name.
-    """
-    named_slots = self._slots.get(slot_name, None)
-    if named_slots is None:
-      named_slots = {}
-      self._slots[slot_name] = named_slots
-    return named_slots
-
-  def create_slot(self, var, val, slot_name, optional_op_name=None):
-    """Find or create a slot for a variable.
-
-    Args:
-      var: A `Variable` object.
-      val: A `Tensor`.  The initial value of the slot.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_slot(
-          var, val, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def create_slot_with_initializer(self, var, initializer, shape, dtype,
-                                   slot_name, optional_op_name=None):
-    """Find or create a slot for a variable, using an Initializer.
-
-    Args:
-      var: A `Variable` object.
-      initializer: An `Initializer`.  The initial value of the slot.
-      shape: Shape of the initial value of the slot.
-      dtype: Type of the value of the slot.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_slot_with_initializer(
-          var, initializer, shape, dtype, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def zeros_slot(self, var, slot_name, optional_op_name=None):
-    """Find or create a slot initialized with 0.0.
-
-    Args:
-      var: A `Variable` object.
-      slot_name: Name for the slot.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-
-    Returns:
-      A `Variable` object.
-    """
-    named_slots = self._slot_dict(slot_name)
-    var_key = _var_key_v2(var)
-    if var_key not in named_slots:
-      new_slot_variable = slot_creator.create_zeros_slot(
-          var, optional_op_name or self._op_name)
-      self._restore_slot_variable(
-          slot_name=slot_name, variable=var,
-          slot_variable=new_slot_variable)
-      named_slots[var_key] = new_slot_variable
-    return named_slots[var_key]
-
-  def _create_or_restore_slot_variable(
-      self, slot_variable_position, slot_name, variable,
-      optional_op_name=None):
-    """Restore a slot variable's value, possibly creating it.
-
-    Called when a variable which has an associated slot variable is created or
-    restored. When executing eagerly, we create the slot variable with a
-    restoring initializer.
-
-    No new variables are created when graph building. Instead,
-    _restore_slot_variable catches these after normal creation and adds restore
-    ops to the graph. This method is nonetheless important when graph building
-    for the case when a slot variable has already been created but `variable`
-    has just been added to a dependency graph (causing us to realize that the
-    slot variable needs to be restored).
-
-    Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
-      slot_name: The name of this `Optimizer`'s slot to restore into.
-      variable: The variable object this slot is being created for.
-      optional_op_name: Name to use when scoping the Variable that
-        needs to be created for the slot.
-    """
-    slot_variable = self.get_slot(var=variable, name=slot_name)
-    if (slot_variable is None and context.executing_eagerly() and
-        slot_variable_position.is_simple_variable()
-        # Defer slot variable creation if there is an active variable creator
-        # scope. Generally we'd like to eagerly create/restore slot variables
-        # when possible, but this may mean that scopes intended to catch
-        # `variable` also catch its eagerly created slot variable
-        # unintentionally (specifically make_template would add a dependency on
-        # a slot variable if not for this case). Deferring is mostly harmless
-        # (aside from double initialization), and makes variable creator scopes
-        # behave the same way they do when graph building.
-        and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
-      initializer = checkpointable.CheckpointInitialValue(
-          checkpoint_position=slot_variable_position)
-      slot_variable = self.create_slot(
-          var=variable,
-          val=initializer,
-          slot_name=slot_name,
-          optional_op_name=optional_op_name)
-      # Optimizers do not have unconditional dependencies on their slot
-      # variables (nor do any other objects). They are only saved if the
-      # variables they were created for are also saved.
-    if slot_variable is not None:
-      # If we've either made this slot variable, or if we've pulled out an
-      # existing slot variable, we should restore it.
-      slot_variable_position.restore(slot_variable)
-    else:
-      # We didn't make the slot variable. Defer restoring until it gets created
-      # normally. We keep a list rather than the one with the highest restore
-      # UID in case slot variables have their own dependencies, in which case
-      # those could differ between restores.
-      variable_key = _var_key_v2(variable)
-      self._deferred_slot_restorations.setdefault(
-          slot_name, {}).setdefault(variable_key, []).append(
-              slot_variable_position)
-
-  def get_slot(self, var, name):
-    """Return a slot named `name` created for `var` by the Optimizer.
-
-    Some `Optimizer` subclasses use additional variables.  For example
-    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
-    gives access to these `Variable` objects if for some reason you need them.
-
-    Use `get_slot_names()` to get the list of slot names created by the
-    `Optimizer`.
-
-    Args:
-      var: A variable passed to `minimize()` or `apply_gradients()`.
-      name: A string.
-
-    Returns:
-      The `Variable` for the slot if it was created, `None` otherwise.
-    """
-    named_slots = self._slots.get(name, None)
-    if not named_slots:
-      return None
-    return named_slots.get(_var_key_v2(var), None)
-
-  def get_slot_names(self):
-    """Return a list of the names of slots created by the `Optimizer`.
-
-    See `get_slot()`.
-
-    Returns:
-      A list of strings.
-    """
-    return sorted(self._slots.keys())
-
-  def create_non_slot(self, initial_value, name, colocate_with=None):
-    """Add an extra variable, not associated with a slot."""
-    v = self._non_slot_dict.get(name, None)
-    if v is None:
-      if colocate_with is None: colocate_with = self._non_slot_devices
-      with self._distribution.colocate_vars_with(colocate_with):
-        # TODO(josh11b): Use get_variable() except for the legacy Adam use case.
-        v = variable_scope.variable(initial_value, name=name, trainable=False)
-      self._non_slot_dict[name] = v
-      deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
-      for checkpoint_position in sorted(
-          deferred_dependencies_list,
-          key=lambda restore: restore.checkpoint.restore_uid,
-          reverse=True):
-        checkpoint_position.restore(v)
-    return v
-
-  def _restore_slot_variable(self, slot_name, variable, slot_variable):
-    """Restore a newly created slot variable's value."""
-    variable_key = _var_key_v2(variable)
-    deferred_restorations = self._deferred_slot_restorations.get(
-        slot_name, {}).pop(variable_key, [])
-    # Iterate over restores, highest restore UID first to minimize the number
-    # of assignments.
-    deferred_restorations.sort(key=lambda position: position.restore_uid,
-                               reverse=True)
-    for checkpoint_position in deferred_restorations:
-      checkpoint_position.restore(slot_variable)
-
-  def get_non_slot(self, name):
-    """Returns the non-slot variable identified by `name`."""
-    return self._non_slot_dict.get(name, None)
-
-  def get_hyper(self, name, dtype=None):
-    """Returns the `name` hyper parameter, optionally cast to `dtype`."""
-    dtype_dict = self._hyper[name]
-    # Do we have the value cast to dtype already cached? This should always
-    # succeed when dtype is None.
-    if dtype in dtype_dict:
-      return dtype_dict[dtype]
-    # Not cached, cast to dtype and save the result in the cache.
-    result = math_ops.cast(dtype_dict[None], dtype)
-    dtype_dict[dtype] = result
-    return result
-
-
-class OptimizerV2(optimizer_v1.Optimizer):
+class OptimizerV2(optimizer_v2.OptimizerV2):
   """Updated base class for optimizers.
 
   This class defines the API to add Ops to train a model.  You never use this
@@ -586,6 +135,10 @@ class OptimizerV2(optimizer_v1.Optimizer):
   GATE_OP = 1
   GATE_GRAPH = 2
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self, use_locking, name):
     """Create a new Optimizer.
 
@@ -606,746 +159,4 @@ class OptimizerV2(optimizer_v1.Optimizer):
       RuntimeError: If _create_slots has been overridden instead of
           _create_vars.
     """
-    # Note: We intentionally don't call parent __init__.
-
-    # Optimizer._create_slots was replaced by _create_vars in OptimizerV2.
-    if (self.__class__._create_slots.__code__ is not  # pylint: disable=protected-access
-        OptimizerV2._create_slots.__code__):
-      raise RuntimeError("Override _create_vars instead of _create_slots when "
-                         "descending from OptimizerV2 (class %s)" %
-                         self.__class__.__name__)
-    if not name:
-      raise ValueError("Must specify the optimizer name")
-
-    self._use_locking = use_locking
-    self._name = name
-    # Map from graph_key to state for that graph. We use the graph_key
-    # since it works in both eager and graph mode, and gives the outer
-    # graph inside functions.
-    tower_context = distribution_strategy_context.get_tower_context()
-    if tower_context is None:
-      # In a cross-tower context for a DistributionStrategy, which means
-      # only one Optimizer will be created, not one per tower.
-      self._per_graph_state = {}
-    else:
-      # We use get_tower_context().merge_call() to get a single dict
-      # shared across all model replicas when running with a
-      # DistributionStrategy.
-      self._per_graph_state = tower_context.merge_call(lambda _: {})
-
-    # Hyper parameters, and whether they should be re-evaluated every step.
-    self._hyper = {}
-
-  def _set_hyper(self, name, value):
-    self._hyper[name] = (_is_dynamic(value), value)
-
-  def minimize(self, loss, global_step=None, var_list=None,
-               gate_gradients=GATE_OP, aggregation_method=None,
-               colocate_gradients_with_ops=False, name=None,
-               grad_loss=None, stop_gradients=None,
-               scale_loss_by_num_towers=None):
-    """Add operations to minimize `loss` by updating `var_list`.
-
-    This method simply combines calls `compute_gradients()` and
-    `apply_gradients()`. If you want to process the gradient before applying
-    them call `compute_gradients()` and `apply_gradients()` explicitly instead
-    of using this function.
-
-    Args:
-      loss: A `Tensor` containing the value to minimize.
-      global_step: Optional `Variable` to increment by one after the
-        variables have been updated.
-      var_list: Optional list or tuple of `Variable` objects to update to
-        minimize `loss`.  Defaults to the list of variables collected in
-        the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      name: Optional name for the returned operation.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
-        through.
-      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
-        down by the number of towers. By default, auto-detects whether this
-        is needed.
-
-    Returns:
-      An Operation that updates the variables in `var_list`.  If `global_step`
-      was not `None`, that operation also increments `global_step`.
-
-    Raises:
-      ValueError: If some of the variables are not `Variable` objects.
-
-    @compatibility(eager)
-    When eager execution is enabled, `loss` should be a Python function that
-    takes elements of `var_list` as arguments and computes the value to be
-    minimized. If `var_list` is None, `loss` should take no arguments.
-    Minimization (and gradient computation) is done with respect to the
-    elements of `var_list` if not None, else with respect to any trainable
-    variables created during the execution of the `loss` function.
-    `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and
-    `grad_loss` are ignored when eager execution is enabled.
-    @end_compatibility
-    """
-    grads_and_vars = self.compute_gradients(
-        loss, var_list=var_list, gate_gradients=gate_gradients,
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        grad_loss=grad_loss, stop_gradients=stop_gradients,
-        scale_loss_by_num_towers=scale_loss_by_num_towers)
-
-    vars_with_grad = [v for g, v in grads_and_vars if g is not None]
-    if not vars_with_grad:
-      raise ValueError(
-          "No gradients provided for any variable, check your graph for ops"
-          " that do not support gradients, between variables %s and loss %s." %
-          ([str(v) for _, v in grads_and_vars], loss))
-
-    return self.apply_gradients(grads_and_vars, global_step=global_step,
-                                name=name)
-
-  def compute_gradients(self, loss, var_list=None,
-                        gate_gradients=GATE_OP,
-                        aggregation_method=None,
-                        colocate_gradients_with_ops=False,
-                        grad_loss=None, stop_gradients=None,
-                        scale_loss_by_num_towers=None):
-    """Compute gradients of `loss` for the variables in `var_list`.
-
-    This is the first part of `minimize()`.  It returns a list
-    of (gradient, variable) pairs where "gradient" is the gradient
-    for "variable".  Note that "gradient" can be a `Tensor`, an
-    `IndexedSlices`, or `None` if there is no gradient for the
-    given variable.
-
-    Args:
-      loss: A Tensor containing the value to minimize or a callable taking
-        no arguments which returns the value to minimize. When eager execution
-        is enabled it must be a callable.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
-        `loss`.  Defaults to the list of variables collected in the graph
-        under the key `GraphKeys.TRAINABLE_VARIABLES`.
-      gate_gradients: How to gate the computation of gradients.  Can be
-        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with
-        the corresponding op.
-      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
-        through.
-      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
-        down by the number of towers. By default, auto-detects whether this
-        is needed.
-
-    Returns:
-      A list of (gradient, variable) pairs. Variable is always present, but
-      gradient can be `None`.
-
-    Raises:
-      TypeError: If `var_list` contains anything else than `Variable` objects.
-      ValueError: If some arguments are invalid.
-      RuntimeError: If called with eager execution enabled and `loss` is
-        not callable.
-
-    @compatibility(eager)
-    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
-    and `colocate_gradients_with_ops` are ignored.
-    @end_compatibility
-    """
-    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
-    if callable(loss):
-      with backprop.GradientTape() as tape:
-        if var_list is not None:
-          tape.watch(var_list)
-        loss_value = loss()
-
-        # Scale loss for number of towers (callable-loss case). In this case,
-        # we have to be careful to call distribute_lib.get_loss_reduction()
-        # *after* loss() is evaluated, so we know what loss reduction it uses.
-        if scale_loss_by_num_towers is None:
-          scale_loss_by_num_towers = (
-              distribute_lib.get_loss_reduction() ==
-              variable_scope.VariableAggregation.MEAN)
-        if scale_loss_by_num_towers:
-          num_towers = distribution_strategy_context.get_distribution_strategy(
-          ).num_towers
-          if num_towers > 1:
-            loss_value *= 1. / num_towers
-
-      if var_list is None:
-        var_list = tape.watched_variables()
-      grads = tape.gradient(loss_value, var_list, grad_loss)
-      return list(zip(grads, var_list))
-    if context.executing_eagerly():
-      raise RuntimeError(
-          "`loss` passed to Optimizer.compute_gradients should "
-          "be a function when eager execution is enabled.")
-
-    # Scale loss for number of towers (non-callable-loss case).
-    if scale_loss_by_num_towers is None:
-      scale_loss_by_num_towers = (
-          distribute_lib.get_loss_reduction() ==
-          variable_scope.VariableAggregation.MEAN)
-    if scale_loss_by_num_towers:
-      num_towers = distribution_strategy_context.get_distribution_strategy(
-      ).num_towers
-      if num_towers > 1:
-        loss *= 1. / num_towers
-
-    if gate_gradients not in [optimizer_v1.Optimizer.GATE_NONE,
-                              optimizer_v1.Optimizer.GATE_OP,
-                              optimizer_v1.Optimizer.GATE_GRAPH]:
-      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
-                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
-                       gate_gradients)
-    self._assert_valid_dtypes([loss])
-    if grad_loss is not None:
-      self._assert_valid_dtypes([grad_loss])
-    if var_list is None:
-      var_list = (
-          variables.trainable_variables() +
-          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
-    else:
-      var_list = nest.flatten(var_list)
-    # pylint: disable=protected-access
-    var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
-    # pylint: enable=protected-access
-    processors = [_get_processor(v) for v in var_list]
-    if not var_list:
-      raise ValueError("No variables to optimize.")
-    var_refs = [p.target() for p in processors]
-    grads = gradients.gradients(
-        loss, var_refs, grad_ys=grad_loss,
-        gate_gradients=(gate_gradients == optimizer_v1.Optimizer.GATE_OP),
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        stop_gradients=stop_gradients)
-    if gate_gradients == optimizer_v1.Optimizer.GATE_GRAPH:
-      grads = control_flow_ops.tuple(grads)
-    grads_and_vars = list(zip(grads, var_list))
-    self._assert_valid_dtypes(
-        [v for g, v in grads_and_vars
-         if g is not None and v.dtype != dtypes.resource])
-    return grads_and_vars
-
-  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
-    """Apply gradients to variables.
-
-    This is the second part of `minimize()`. It returns an `Operation` that
-    applies gradients.
-
-    Args:
-      grads_and_vars: List of (gradient, variable) pairs as returned by
-        `compute_gradients()`.
-      global_step: Optional `Variable` to increment by one after the
-        variables have been updated.
-      name: Optional name for the returned operation.  Default to the
-        name passed to the `Optimizer` constructor.
-
-    Returns:
-      An `Operation` that applies the specified gradients. If `global_step`
-      was not None, that operation also increments `global_step`.
-
-    Raises:
-      TypeError: If `grads_and_vars` is malformed.
-      ValueError: If none of the variables have gradients.
-    """
-    # This is a default implementation of apply_gradients() that can be shared
-    # by most optimizers.  It relies on the subclass implementing the following
-    # methods: _create_vars(), _prepare(), _apply_dense(), and _apply_sparse().
-
-    # Filter out variables with gradients of `None`.
-    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
-    if not grads_and_vars:
-      raise ValueError("No variables provided.")
-    filtered = tuple((g, v) for (g, v) in grads_and_vars if g is not None)
-    if not filtered:
-      raise ValueError("No gradients provided for any variable: %s." %
-                       ([str(v) for _, v in grads_and_vars],))
-    return distribution_strategy_context.get_tower_context().merge_call(
-        self._distributed_apply, filtered, global_step=global_step, name=name)
-
-  def _get_or_create_state(self, var_list=None):
-    """Either looks up or creates `_OptimizerV2State`.
-
-    If any variables are available, they should be passed via the `var_list`
-    argument, and these will be used to determine the graph to create/retrieve
-    state for. Otherwise the returned state is for the current default graph.
-
-    Args:
-      var_list: A list of variables to extract a graph from.
-
-    Returns:
-      An `_OptimizerV2State` object.
-    """
-    # Determine the graph_key from the current graph.
-    eager_execution = context.executing_eagerly()
-    if eager_execution or var_list is None:
-      graph = ops.get_default_graph()
-    else:
-      graph = ops._get_graph_from_inputs(var_list)  # pylint: disable=protected-access
-    assert graph is not None
-    graph_key = graph._graph_key  # pylint: disable=protected-access
-
-    # Get the per graph state by looking up the graph_key.
-    if graph_key in self._per_graph_state:
-      per_graph_state = self._per_graph_state[graph_key]
-    else:
-      per_graph_state = _OptimizerV2State(self._name)
-      per_graph_state._init_with_static_hyper(self._hyper)  # pylint: disable=protected-access
-      self._per_graph_state[graph_key] = per_graph_state
-    return per_graph_state
-
-  def _distributed_apply(self, distribution, grads_and_vars, global_step, name):
-    """`apply_gradients` for use with a `DistributionStrategy`."""
-    reduced_grads = distribution.batch_reduce(
-        variable_scope.VariableAggregation.SUM, grads_and_vars)
-    var_list = [v for _, v in grads_and_vars]
-    grads_and_vars = zip(reduced_grads, var_list)
-
-    unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)]
-    eager_execution = context.executing_eagerly()
-    if eager_execution:
-      # Give a clear error in this case instead of "name not supported
-      # for Eager Tensors" when we compute non_slot_devices.
-      for v in unwrapped_var_list:
-        if isinstance(v, ops.Tensor):
-          raise NotImplementedError("Trying to update a Tensor ", v)
-
-    with ops.name_scope(name, self._name) as name:
-      per_graph_state = self._get_or_create_state(var_list=unwrapped_var_list)
-      # Include the current value of any dynamic hyper parameters in `state`.
-      non_slot_devices = distribution.non_slot_devices(var_list)
-      state = per_graph_state._copy_with_dynamic_hyper(  # pylint: disable=protected-access
-          self._hyper, distribution, non_slot_devices)
-
-    # Create any slot and non-slot variables we need in `state`.
-    with ops.init_scope():
-      self._create_vars(var_list, state)
-
-    with ops.name_scope(name):  # Re-enter name_scope created above
-      # Give the child class a chance to do something before we start
-      # applying gradients.
-      self._prepare(state)
-
-      def update(v, g):
-        """Update variable `v` using gradient `g`."""
-        assert v is not None
-
-        # Convert the grad to Tensor or IndexedSlices if necessary, and
-        # look up a processor for each variable's type.
-        try:
-          g = ops.convert_to_tensor_or_indexed_slices(g)
-        except TypeError:
-          raise TypeError(
-              "Gradient must be convertible to a Tensor"
-              " or IndexedSlices, or None: %s" % g)
-        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
-          raise TypeError(
-              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
-        processor = _get_processor(v)
-
-        # We colocate all ops created in _apply_dense or _apply_sparse
-        # on the same device as the variable.
-        # TODO(apassos): figure out how to get the variable name here.
-        scope_name = "" if eager_execution else v.op.name
-        # device_policy is set because non-mirrored tensors will be read in
-        # `update_op`.
-        # TODO(josh11b): Make different state objects for each device to
-        # avoid needing to set the device_policy.
-        with ops.name_scope("update_" + scope_name), \
-            context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-          return processor.update_op(self, g, state)
-
-      # Use the processors to update the variables.
-      update_ops = []
-      for grad, var in grads_and_vars:
-        update_ops.extend(distribution.update(var, update, grad, grouped=False))
-
-      # Give the child class a chance to do something after applying
-      # gradients
-      def finish():
-        # TODO(josh11b): Make different state objects for each device to
-        # avoid needing to set the device_policy.
-        with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-          return self._finish(state)
-
-      update_ops = control_flow_ops.group(update_ops)
-      with ops.control_dependencies([update_ops]):
-        finish_updates = distribution.update_non_slot(
-            non_slot_devices, finish, grouped=False)
-      # We said grouped=False, which means finish_updates is always a list.
-      # It will be [None] when finish() returns None.
-      if finish_updates == [None]:
-        finish_updates = [update_ops]
-
-      # Update `global_step` (if any).
-      if global_step is None:
-        apply_updates = distribution.group(finish_updates, name=name)
-      else:
-        with ops.control_dependencies(finish_updates):
-
-          def update_global_step(global_step, name):
-            return global_step.assign_add(1, read_value=False, name=name)
-
-          apply_updates = distribution.update(
-              global_step, update_global_step, name)
-
-      # Add the training op to the TRAIN_OP graph collection in graph mode.
-      if not eager_execution:
-        if isinstance(apply_updates, ops.Tensor):
-          apply_updates = apply_updates.op
-        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
-        if apply_updates not in train_op:
-          train_op.append(apply_updates)
-
-      return apply_updates
-
-  def get_slot(self, var, name):
-    """Return a slot named `name` created for `var` by the Optimizer.
-
-    Some `Optimizer` subclasses use additional variables.  For example
-    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
-    gives access to these `Variable` objects if for some reason you need them.
-
-    Use `get_slot_names()` to get the list of slot names created by the
-    `Optimizer`.
-
-    Args:
-      var: A variable passed to `minimize()` or `apply_gradients()`.
-      name: A string.
-
-    Returns:
-      The `Variable` for the slot if it was created, `None` otherwise.
-    """
-    state = self._get_state_for_var(var)
-    return state.get_slot(var, name) if state is not None else None
-
-  def get_slot_names(self):
-    """Return a list of the names of slots created by the `Optimizer`.
-
-    See `get_slot()`.
-
-    Returns:
-      A list of strings.
-    """
-    state = self._get_per_graph_state()
-    return state.get_slot_names() if state is not None else []
-
-  def variables(self):
-    """A list of variables which encode the current state of `Optimizer`.
-
-    Includes slot variables and additional global variables created by the
-    optimizer in the current default graph.
-
-    Returns:
-      A list of variables.
-    """
-    state = self._get_per_graph_state()
-    return state._variables() if state is not None else []  # pylint: disable=protected-access
-
-  # --------------
-  # Methods to be implemented by subclasses if they want to use the
-  # inherited implementation of apply_gradients() or compute_gradients().
-  # --------------
-  def _create_vars(self, var_list, state):
-    """Create all slots needed by the variables and any non-slot variables.
-
-    Args:
-      var_list: A list of `Variable` objects.
-      state: An object with these methods:
-        `create_slot(var, val, slot_name, optional_op_name)`,
-        `create_slot_with_initializer(`
-            `var, initializer, shape, dtype, slot_name, optional_op_name)`,
-        `zeros_slot(var, slot_name, optional_op_name)`,
-        `create_non_slot_variable(initial_value, name, colocate_with)`,
-        `get_hyper(name)`
-    """
-    # No slots needed by default
-    pass
-
-  def _prepare(self, state):
-    """Code to execute before applying gradients.
-
-    Note that most uses of _prepare() in Optimizer have been subsumed
-    by explicit support for hyper parameters in OptimizerV2
-
-    Args:
-      state: An object with a `get_hyper(name)` method.
-
-    Returns:
-      Return value will be ignored.
-    """
-    pass
-
-  def _apply_dense(self, grad, var, state):
-    """Add ops to apply dense gradients to `var`.
-
-    Args:
-      grad: A `Tensor`.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    raise NotImplementedError()
-
-  def _resource_apply_dense(self, grad, handle, state):
-    """Add ops to apply dense gradients to the variable `handle`.
-
-    Args:
-      grad: a `Tensor` representing the gradient.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    raise NotImplementedError()
-
-  def _resource_apply_sparse_duplicate_indices(
-      self, grad, handle, indices, state):
-    """Add ops to apply sparse gradients to `handle`, with repeated indices.
-
-    Optimizers which override this method must deal with repeated indices. See
-    the docstring of `_apply_sparse_duplicate_indices` for details. By default
-    the correct behavior, to sum non-unique indices and their associated
-    gradients, is enforced by first pre-processing `grad` and `indices` and
-    passing them on to `_resource_apply_sparse`. Optimizers which deal correctly
-    with duplicate indices may instead override this method to avoid the
-    overhead of summing.
-
-    Args:
-      grad: a `Tensor` representing the gradient for the affected indices.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      indices: a `Tensor` of integral type representing the indices for
-       which the gradient is nonzero. Indices may be repeated.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    # pylint: disable=protected-access
-    summed_grad, unique_indices = optimizer_v1._deduplicate_indexed_slices(
-        values=grad, indices=indices)
-    # pylint: enable=protected-access
-    return self._resource_apply_sparse(
-        summed_grad, handle, unique_indices, state)
-
-  def _resource_apply_sparse(self, grad, handle, indices, state):
-    """Add ops to apply sparse gradients to the variable `handle`.
-
-    Similar to `_apply_sparse`, the `indices` argument to this method has been
-    de-duplicated. Optimizers which deal correctly with non-unique indices may
-    instead override `_resource_apply_sparse_duplicate_indices` to avoid this
-    overhead.
-
-    Args:
-      grad: a `Tensor` representing the gradient for the affected indices.
-      handle: a `Tensor` of dtype `resource` which points to the variable
-       to be updated.
-      indices: a `Tensor` of integral type representing the indices for
-       which the gradient is nonzero. Indices are unique.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation` which updates the value of the variable.
-    """
-    raise NotImplementedError()
-
-  def _apply_sparse_duplicate_indices(self, grad, var, state):
-    """Add ops to apply sparse gradients to `var`, with repeated sparse indices.
-
-    Optimizers which override this method must deal with IndexedSlices objects
-    such as the following:
-
-      IndexedSlicesValue(values=[1, 1], indices=[0, 0], dense_shape=[1])
-
-    The correct interpretation is:
-
-      IndexedSlicesValue(values=[2], indices=[0], dense_shape=[1])
-
-    Many optimizers deal incorrectly with repeated indices when updating based
-    on sparse gradients (e.g. summing squares rather than squaring the sum, or
-    applying momentum terms multiple times). Adding first is always the correct
-    behavior, so this is enforced here by reconstructing the IndexedSlices to
-    have only unique indices, then calling _apply_sparse.
-
-    Optimizers which deal correctly with repeated indices may instead override
-    this method to avoid the overhead of summing indices.
-
-    Args:
-      grad: `IndexedSlices`.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    # pylint: disable=protected-access
-    summed_values, unique_indices = optimizer_v1._deduplicate_indexed_slices(
-        values=grad.values, indices=grad.indices)
-    # pylint: enable=protected-access
-    gradient_no_duplicate_indices = ops.IndexedSlices(
-        indices=unique_indices,
-        values=summed_values,
-        dense_shape=grad.dense_shape)
-    return self._apply_sparse(gradient_no_duplicate_indices, var, state)
-
-  def _apply_sparse(self, grad, var, state):
-    """Add ops to apply sparse gradients to `var`.
-
-    The IndexedSlices object passed to `grad` in this function is by default
-    pre-processed in `_apply_sparse_duplicate_indices` to remove duplicate
-    indices (see its docstring for details). Optimizers which can tolerate or
-    have correct special cases for duplicate sparse indices may override
-    `_apply_sparse_duplicate_indices` instead of this function, avoiding that
-    overhead.
-
-    Args:
-      grad: `IndexedSlices`, with no repeated indices.
-      var: A `Variable` object.
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      An `Operation`.
-    """
-    raise NotImplementedError()
-
-  def _finish(self, state):
-    """Do what is needed to finish the update.
-
-    This is called inside a scope colocated with any non-slot variables.
-
-    Args:
-      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
-        and `get_hyper(name)` methods.
-
-    Returns:
-      The operation to apply updates, or None if no updates.
-    """
-    return None
-
-  # --------------
-  # Utility methods for subclasses.
-  # --------------
-  def _get_per_graph_state(self):
-    # pylint: disable=protected-access
-    return self._per_graph_state.get(ops.get_default_graph()._graph_key, None)
-
-  def _get_state_for_var(self, var):
-    # pylint: disable=protected-access
-    return self._per_graph_state.get(var._graph_key, None)
-
-  # --------------
-  # Overridden methods from Checkpointable.
-  # --------------
-
-  def _track_checkpointable(self, *args, **kwargs):
-    """Optimizers may not track dependencies. Raises an error."""
-    raise NotImplementedError(
-        "Optimizers may not have dependencies. File a feature request if this "
-        "limitation bothers you.")
-
-  @property
-  def _checkpoint_dependencies(self):
-    """From Checkpointable. Gather graph-specific non-slot variables to save."""
-    current_graph_non_slot_variables = []
-    state = self._get_per_graph_state()
-    if state is not None:
-      for name, variable_object in sorted(
-          state._non_slot_dict.items(),  # pylint: disable=protected-access
-          # Avoid comparing variables
-          key=lambda item: item[0]):
-        current_graph_non_slot_variables.append(
-            checkpointable.CheckpointableReference(
-                name=name, ref=variable_object))
-    # Note: ignores super(); Optimizers may not have any dependencies outside of
-    # state objects.
-    return current_graph_non_slot_variables
-
-  def _lookup_dependency(self, name):
-    """From Checkpointable. Find a non-slot variable in the current graph."""
-    state = self._get_per_graph_state()
-    if state is None:
-      return None
-    else:
-      return state.get_non_slot(name)
-
-  @property
-  def _deferred_dependencies(self):
-    """Lets Checkpointable know where non-slot variables are created.
-
-    If necessary, creates a new state object for the current default graph.
-    Checkpointable will then add entries to that state's deferred dependency
-    dictionary. The state object will check that dictionary when creating
-    non-slot variables, restoring their value if an entry is found.
-
-    Returns:
-      A dictionary which holds deferred dependencies for the current default
-      graph.
-    """
-    state = self._get_or_create_state()
-    return state._deferred_dependencies  # pylint: disable=protected-access
-
-  def _create_or_restore_slot_variable(
-      self, slot_variable_position, slot_name, variable):
-    """Checkpointable: Restore a slot variable's value, possibly creating it.
-
-    Called when a variable which has an associated slot variable is created or
-    restored.
-
-    Args:
-      slot_variable_position: A `checkpointable._CheckpointPosition` object
-        indicating the slot variable `Checkpointable` object to be restored.
-      slot_name: The name of this `Optimizer`'s slot to restore into.
-      variable: The variable object this slot is being created for.
-    """
-    state = self._get_or_create_state(var_list=[variable])
-    state._create_or_restore_slot_variable(  # pylint: disable=protected-access
-        slot_variable_position=slot_variable_position,
-        slot_name=slot_name,
-        variable=variable,
-        optional_op_name=self._name)
-
-  # --------------
-  # Unsupported parent methods
-  # --------------
-  def _slot_dict(self, slot_name):
-    raise NotImplementedError(
-        "_slot_dict() method unsupported in OptimizerV2")
-
-  def _get_or_make_slot(self, var, val, slot_name, op_name):
-    raise NotImplementedError(
-        "_get_or_make_slot() method unsupported in OptimizerV2")
-
-  def _get_or_make_slot_with_initializer(self, var, initializer, shape, dtype,
-                                         slot_name, op_name):
-    raise NotImplementedError(
-        "_get_or_make_slot_with_initializer() method unsupported in "
-        "OptimizerV2")
-
-  def _create_non_slot_variable(self, initial_value, name, colocate_with):
-    raise NotImplementedError(
-        "_create_non_slot_variable() method unsupported in OptimizerV2")
-
-  def _get_non_slot_variable(self, name, graph=None):
-    raise NotImplementedError(
-        "_get_non_slot_variable() method unsupported in OptimizerV2")
-
-  def _non_slot_variables(self):
-    raise NotImplementedError(
-        "_non_slot_variables() method unsupported in OptimizerV2")
+    super(OptimizerV2, self).__init__(name)
diff --git a/tensorflow/contrib/optimizer_v2/rmsprop.py b/tensorflow/contrib/optimizer_v2/rmsprop.py
index 3de53405ec..090e257ddc 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop.py
@@ -41,19 +41,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.optimizer_v2 import optimizer_v2
-from tensorflow.python.ops import array_ops
+from tensorflow.python.keras.optimizer_v2 import rmsprop
+from tensorflow.python.util import deprecation
 
-from tensorflow.python.training import training_ops
 
-
-class RMSPropOptimizer(optimizer_v2.OptimizerV2):
+class RMSPropOptimizer(rmsprop.RMSProp):
   """Optimizer that implements the RMSProp algorithm.
 
   See the
   [paper](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf).
   """
 
+  @deprecation.deprecated_args(
+      "2018-10-01",
+      "`use_locking = True` is no longer supported and will be ignored.",
+      ("use_locking", [False]))
   def __init__(self,
                learning_rate,
                decay=0.9,
@@ -96,138 +98,10 @@ class RMSPropOptimizer(optimizer_v2.OptimizerV2):
       name: Optional name prefix for the operations created when applying
         gradients. Defaults to "RMSProp".
     """
-    super(RMSPropOptimizer, self).__init__(use_locking, name)
-    self._set_hyper("learning_rate", learning_rate)
-    self._set_hyper("decay", decay)
-    self._set_hyper("momentum", momentum)
-    self._set_hyper("epsilon", epsilon)
-
-    self._centered = centered
-
-  def _create_vars(self, var_list, state):
-    for v in var_list:
-      init_rms = state.get_hyper(
-          "epsilon", v.dtype.base_dtype) * array_ops.ones_like(v)
-      state.create_slot_with_initializer(v, init_rms, v.get_shape(),
-                                         v.dtype.base_dtype, "rms")
-      if self._centered:
-        state.zeros_slot(v, "mg")
-      state.zeros_slot(v, "momentum")
-
-  def _apply_dense(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.apply_centered_rms_prop(
-          var,
-          mg,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          # epsilon is now the rms initial value and is not added to the
-          # denominator anymore, hence calling the kernel op with epsilon=0.
-          0,
-          grad,
-          use_locking=self._use_locking).op
-    else:
-      return training_ops.apply_rms_prop(
-          var,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking).op
-
-  def _resource_apply_dense(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.resource_apply_centered_rms_prop(
-          var.handle,
-          mg.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.resource_apply_rms_prop(
-          var.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          use_locking=self._use_locking)
-
-  def _apply_sparse(self, grad, var, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = state.get_slot(var, "mg")
-      return training_ops.sparse_apply_centered_rms_prop(
-          var,
-          mg,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad.values,
-          grad.indices,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.sparse_apply_rms_prop(
-          var,
-          rms,
-          mom,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad.values,
-          grad.indices,
-          use_locking=self._use_locking)
-
-  def _resource_apply_sparse(self, grad, var, indices, state):
-    rms = state.get_slot(var, "rms")
-    mom = state.get_slot(var, "momentum")
-    if self._centered:
-      mg = self.get_slot(var, "mg")
-      return training_ops.resource_sparse_apply_centered_rms_prop(
-          var.handle,
-          mg.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          indices,
-          use_locking=self._use_locking)
-    else:
-      return training_ops.resource_sparse_apply_rms_prop(
-          var.handle,
-          rms.handle,
-          mom.handle,
-          state.get_hyper("learning_rate", var.dtype.base_dtype),
-          state.get_hyper("decay", var.dtype.base_dtype),
-          state.get_hyper("momentum", var.dtype.base_dtype),
-          0,
-          grad,
-          indices,
-          use_locking=self._use_locking)
+    super(RMSPropOptimizer, self).__init__(
+        learning_rate=learning_rate,
+        rho=decay,
+        momentum=momentum,
+        epsilon=epsilon,
+        centered=centered,
+        name=name)
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 4a72c4b3f3..c4d23f117f 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -62,6 +62,7 @@ py_library(
         ":backend",
         ":engine",
         ":layers",
+        ":optimizer_v2",
         "//tensorflow/python/saved_model",
         "//tensorflow/python:training",
     ],
@@ -189,6 +190,30 @@ py_library(
     ],
 )
 
+py_library(
+    name = "optimizer_v2",
+    srcs = [
+        "optimizer_v2/adadelta.py",
+        "optimizer_v2/adagrad.py",
+        "optimizer_v2/adam.py",
+        "optimizer_v2/optimizer_v2.py",
+        "optimizer_v2/rmsprop.py",
+        "optimizer_v2/sgd.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:distribute",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+    ],
+)
+
 py_test(
     name = "integration_test",
     size = "medium",
@@ -827,3 +852,133 @@ py_library(
         "//third_party/py/numpy",
     ],
 )
+
+cuda_py_test(
+    name = "adadelta_test",
+    size = "medium",
+    srcs = ["optimizer_v2/adadelta_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adagrad_test",
+    size = "small",
+    srcs = ["optimizer_v2/adagrad_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adam_test",
+    size = "small",
+    srcs = ["optimizer_v2/adam_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "checkpointable_utils_test",
+    srcs = ["optimizer_v2/checkpointable_utils_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@six_archive//:six",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:layers_base",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras",
+    ],
+    tags = ["notsan"],
+)
+
+cuda_py_test(
+    name = "sgd_test",
+    size = "medium",
+    srcs = ["optimizer_v2/sgd_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:resources",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+cuda_py_test(
+    name = "optimizer_v2_test",
+    size = "medium",
+    srcs = ["optimizer_v2/optimizer_v2_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:clip_ops",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:variables",
+    ],
+)
+
+cuda_py_test(
+    name = "rmsprop_test",
+    size = "small",
+    srcs = ["optimizer_v2/rmsprop_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+    tags = ["optonly"],
+)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
new file mode 100644
index 0000000000..d3b3c9c12e
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -0,0 +1,116 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adadelta for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.training import training_ops
+
+
+class Adadelta(optimizer_v2.OptimizerV2):
+  """Adadelta optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values.
+
+  See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
+  ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate. It is recommended
+        to leave it at the default value.
+      rho: float hyperparameter >= 0. The decay rate.
+      epsilon: float hyperparameter >= 0. Fuzz factor. A constant epsilon used
+        to better condition the grad update.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'Adadelta'.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               rho=0.95,
+               epsilon=1e-8,
+               name="Adadelta"):
+    super(Adadelta, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("rho", rho)
+    self._set_hyper("epsilon", epsilon)
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      state.zeros_slot(v, "accum")
+      state.zeros_slot(v, "accum_update")
+
+  def _apply_dense(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.apply_adadelta(
+        var,
+        accum,
+        accum_update,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _resource_apply_dense(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.resource_apply_adadelta(
+        var.handle,
+        accum.handle,
+        accum_update.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.sparse_apply_adadelta(
+        var,
+        accum,
+        accum_update,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad.values,
+        grad.indices,
+        use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    accum = state.get_slot(var, "accum")
+    accum_update = state.get_slot(var, "accum_update")
+    return training_ops.resource_sparse_apply_adadelta(
+        var.handle,
+        accum.handle,
+        accum_update.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("rho", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        indices,
+        use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
new file mode 100644
index 0000000000..6e48f92e4f
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -0,0 +1,166 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Adadelta Optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras.optimizer_v2 import adadelta
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class AdadeltaOptimizerTest(test.TestCase):
+
+  def doTestBasic(self, use_resource=False):
+    num_updates = 4  # number of ADADELTA steps to perform
+    for dtype in [dtypes.half, dtypes.float32]:
+      for grad in [0.2, 0.1, 0.01]:
+        for lr in [1.0, 0.5, 0.1]:
+          with self.cached_session():
+            var0_init = [1.0, 2.0]
+            var1_init = [3.0, 4.0]
+            if use_resource:
+              var0 = resource_variable_ops.ResourceVariable(
+                  var0_init, dtype=dtype)
+              var1 = resource_variable_ops.ResourceVariable(
+                  var1_init, dtype=dtype)
+            else:
+              var0 = variables.Variable(var0_init, dtype=dtype)
+              var1 = variables.Variable(var1_init, dtype=dtype)
+
+            grads = constant_op.constant([grad, grad], dtype=dtype)
+
+            accum = 0.0
+            accum_update = 0.0
+
+            # ADADELTA gradient optimizer
+            rho = 0.95
+            epsilon = 1e-8
+            adadelta_opt = adadelta.Adadelta(lr, rho, epsilon)
+            adadelta_update = adadelta_opt.apply_gradients(
+                zip([grads, grads], [var0, var1]))
+
+            opt_vars = adadelta_opt.variables()
+            self.assertStartsWith(opt_vars[0].name, var0._shared_name)
+            self.assertStartsWith(opt_vars[1].name, var0._shared_name)
+            self.assertStartsWith(opt_vars[2].name, var1._shared_name)
+            self.assertStartsWith(opt_vars[3].name, var1._shared_name)
+            self.assertEqual(4, len(opt_vars))
+
+            variables.global_variables_initializer().run()
+
+            # Assign slots
+            slot = [None] * 2
+            slot_update = [None] * 2
+            self.assertEqual(["accum", "accum_update"],
+                             adadelta_opt.get_slot_names())
+            slot[0] = adadelta_opt.get_slot(var0, "accum")
+            self.assertEquals(slot[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot[0] in variables.trainable_variables())
+
+            slot_update[0] = adadelta_opt.get_slot(var0, "accum_update")
+            self.assertEquals(slot_update[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot_update[0] in variables.trainable_variables())
+
+            slot[1] = adadelta_opt.get_slot(var1, "accum")
+            self.assertEquals(slot[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot[1] in variables.trainable_variables())
+
+            slot_update[1] = adadelta_opt.get_slot(var1, "accum_update")
+            self.assertEquals(slot_update[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot_update[1] in variables.trainable_variables())
+
+            # Fetch params to validate initial values
+            self.assertAllClose(var0_init, var0.eval())
+            self.assertAllClose(var1_init, var1.eval())
+
+            update = [None] * num_updates
+            tot_update = 0
+            for step in range(num_updates):
+              # Run adadelta update for comparison
+              adadelta_update.run()
+
+              # Perform initial update without previous accum values
+              accum = accum * rho + (grad**2) * (1 - rho)
+              update[step] = (np.sqrt(accum_update + epsilon) *
+                              (1. / np.sqrt(accum + epsilon)) * grad)
+              accum_update = (accum_update * rho + (update[step]**2) *
+                              (1.0 - rho))
+              tot_update += update[step] * lr
+
+              # Check that the accumulators have been updated
+              for slot_idx in range(2):
+                self.assertAllCloseAccordingToType(
+                    np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
+                    slot[slot_idx].eval(),
+                    rtol=1e-5)
+
+                self.assertAllCloseAccordingToType(
+                    np.array(
+                        [accum_update, accum_update],
+                        dtype=dtype.as_numpy_dtype()),
+                    slot_update[slot_idx].eval(),
+                    rtol=1e-5)
+
+              # Check that the parameters have been updated
+              self.assertAllCloseAccordingToType(
+                  np.array(
+                      [var0_init[0] - tot_update, var0_init[1] - tot_update],
+                      dtype=dtype.as_numpy_dtype()),
+                  var0.eval(),
+                  rtol=1e-5)
+
+              self.assertAllCloseAccordingToType(
+                  np.array(
+                      [var1_init[0] - tot_update, var1_init[1] - tot_update],
+                      dtype=dtype.as_numpy_dtype()),
+                  var1.eval(),
+                  rtol=1e-5)
+
+  def testBasic(self):
+    self.doTestBasic(use_resource=False)
+
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        loss = pred * pred
+        sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize(loss)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            [[-111, -138]], var0.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
new file mode 100644
index 0000000000..2d8cec2300
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -0,0 +1,119 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adagrad optimizer for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import training_ops
+
+
+class Adagrad(optimizer_v2.OptimizerV2):
+  """Adagrad optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values.
+
+  See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
+  or this
+  [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
+
+  The learning_rate arg below is a hyperparameter, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      initial_accumulator_value: A floating point value. Starting value for the
+        accumulators, must be positive.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'Adagrad'.
+
+  Raises:
+    ValueError: If the `initial_accumulator_value` is invalid.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               initial_accumulator_value=0.1,
+               name="Adagrad"):
+    if initial_accumulator_value <= 0.0:
+      raise ValueError("initial_accumulator_value must be positive: %s" %
+                       initial_accumulator_value)
+    super(Adagrad, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+
+    self._initial_accumulator_value = initial_accumulator_value
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      dtype = v.dtype.base_dtype
+      if v.get_shape().is_fully_defined():
+        init = init_ops.constant_initializer(self._initial_accumulator_value,
+                                             dtype=dtype)
+      else:
+        def init(v=v, dtype=dtype):
+          # Use a Tensor instead of initializer if variable does not have
+          # static shape.
+          init_constant = gen_array_ops.fill(array_ops.shape(v),
+                                             self._initial_accumulator_value)
+          return math_ops.cast(init_constant, dtype)
+      state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
+                                         "accumulator")
+
+  def _apply_dense(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.apply_adagrad(
+        var,
+        acc,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _resource_apply_dense(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.resource_apply_adagrad(
+        var.handle,
+        acc.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.sparse_apply_adagrad(
+        var,
+        acc,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad.values,
+        grad.indices,
+        use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    acc = state.get_slot(var, "accumulator")
+    return training_ops.resource_sparse_apply_adagrad(
+        var.handle,
+        acc.handle,
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        grad,
+        indices,
+        use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
new file mode 100644
index 0000000000..fc4ef5c399
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
@@ -0,0 +1,276 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for aggregate operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class AdagradOptimizerTest(test.TestCase):
+
+  def doTestBasic(self, use_resource=False):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+          var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+        else:
+          var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+          var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 3 steps of adagrad
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testBasic(self):
+    self.doTestBasic()
+
+  def testBasicResource(self):
+    self.doTestBasic(use_resource=True)
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable(
+            [[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        loss = pred * pred
+        sgd_op = adagrad.Adagrad(1.0).minimize(loss)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType(
+            [[1.0, 2.0], [3.0, 4.0]], var0.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            [[0, 1], [3, 4]], var0.eval(), atol=0.01)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(
+            constant_op.constant(3.0), initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 3 steps of adagrad
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testSparseBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
+        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]),
+            constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(
+                [0.01], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        ada_opt = adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([[1.0], [2.0]], var0.eval())
+        self.assertAllClose([[3.0], [4.0]], var1.eval())
+        # Run 3 step of sgd
+        for _ in range(3):
+          ada_update.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType(
+            np.array([[-1.6026098728179932], [2.0]]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([[3.0], [3.715679168701172]]), var1.eval())
+
+  def testSparseRepeatedIndices(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        repeated_index_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        aggregated_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update = adagrad.Adagrad(3.0).apply_gradients(
+            [(grad_repeated_index, repeated_index_update_var)])
+        aggregated_update = adagrad.Adagrad(3.0).apply_gradients(
+            [(grad_aggregated, aggregated_update_var)])
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            repeated_index_update_var.eval())
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              repeated_index_update_var.eval())
+
+  def testSparseRepeatedIndicesResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var_repeated = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype)
+        loss_repeated = math_ops.reduce_sum(
+            embedding_ops.embedding_lookup(var_repeated, [0, 0]))
+        var_aggregated = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype)
+        loss_aggregated = 2 * math_ops.reduce_sum(
+            embedding_ops.embedding_lookup(var_aggregated, [0]))
+        update_op_repeated = adagrad.Adagrad(2.0).minimize(loss_repeated)
+        update_op_aggregated = adagrad.Adagrad(2.0).minimize(loss_aggregated)
+        variables.global_variables_initializer().run()
+        self.assertAllCloseAccordingToType(
+            var_repeated.eval(), var_aggregated.eval())
+        for _ in range(3):
+          update_op_repeated.run()
+          update_op_aggregated.run()
+          self.assertAllCloseAccordingToType(
+              var_repeated.eval(), var_aggregated.eval())
+
+  def testSparseStability(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        shape = [1, 6]
+        var0 = variables.Variable(
+            [[
+                0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
+                -0.0105945
+            ]],
+            dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [[
+                    -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
+                    -8.4877e-05, -9.48906e-05
+                ]],
+                shape=shape,
+                dtype=dtype),
+            constant_op.constant([0]),
+            constant_op.constant(shape))
+        ada_opt = adagrad.Adagrad(1.0, initial_accumulator_value=0.1)
+        ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
+        self.assertEqual(["accumulator"], ada_opt.get_slot_names())
+        slot0 = ada_opt.get_slot(var0, "accumulator")
+        init = variables.global_variables_initializer()
+        for _ in range(100):
+          init.run()
+          ada_update.run()
+          self.assertAllCloseAccordingToType(
+              np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval())
+          self.assertAllCloseAccordingToType(
+              np.array([[
+                  0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573,
+                  -0.01029443
+              ]]), var0.eval())
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        ada_opt = adagrad.Adagrad(3.0)
+        # Apply the optimizer twice.  Both applications will use
+        # the same accums.
+        ada_update1 = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        ada_update2 = ada_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        self.assertEqual(["accumulator"], ada_opt.get_slot_names())
+        slot0 = ada_opt.get_slot(var0, "accumulator")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = ada_opt.get_slot(var1, "accumulator")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values.
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Mix the first and the second adagrad for 3 steps.
+        ada_update1.run()
+        ada_update2.run()
+        ada_update1.run()
+        # Validate updated params (the same as with only 1 Adagrad).
+        self.assertAllCloseAccordingToType(
+            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
+
+  def testDynamicShapeVariable_Ok(self):
+    with self.cached_session():
+      v = variable_scope.get_variable("v", initializer=constant_op.constant(1.),
+                                      validate_shape=False)
+      self.assertFalse(v.shape.is_fully_defined())
+      # Creating optimizer should cause no exception.
+      adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
new file mode 100644
index 0000000000..8367228d7a
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -0,0 +1,203 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adam optimizer for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import training_ops
+
+
+class Adam(optimizer_v2.OptimizerV2):
+  r"""Adam Optimizer.
+
+  Default parameters follow those provided in the original paper.
+
+  See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
+  ([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
+
+  Some of the args below are hyperparameters where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Initialization:
+
+  $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$
+  $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$
+  $$t := 0 \text{(Initialize timestep)}$$
+  The update rule for `variable` with gradient `g` uses an optimization
+  described at the end of section2 of the paper:
+
+  $$t := t + 1$$
+  $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+
+  $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+  $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+  $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
+
+  The default value of 1e-8 for epsilon might not be a good default in
+  general. For example, when training an Inception network on ImageNet a
+  current good choice is 1.0 or 0.1. Note that since AdamOptimizer uses the
+  formulation just before Section 2.1 of the Kingma and Ba paper rather than
+  the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon
+  hat" in the paper.
+
+  The sparse implementation of this algorithm (used when the gradient is an
+  IndexedSlices object, typically because of `tf.gather` or an embedding
+  lookup in the forward pass) does apply momentum to variable slices even if
+  they were not used in the forward pass (meaning they have a gradient equal
+  to zero). Momentum decay (beta1) is also applied to the entire momentum
+  accumulator. This means that the sparse behavior is equivalent to the dense
+  behavior (in contrast to some momentum implementations which ignore momentum
+  unless a variable slice was actually used).
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      beta_1: float hyperparameter, 0 < beta_1 < 1. Generally close to 1. The
+        exponential decay rate for the 1st moment estimates.
+      beta_2: float hyperparameter, 0 < beta_2 < 1. Generally close to 1. The
+        exponential decay rate for the 2nd moment estimates.
+      epsilon: float hyperparameter >= 0. Fuzz factor. This epsilon is "epsilon
+        hat" in the Kingma and Ba paper (in the formula just before Section
+        2.1), not the epsilon in Algorithm 1 of the paper.
+      name: Optional name for the operations created when applying gradients.
+        Defaults to "Adam".
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               beta_1=0.9,
+               beta_2=0.999,
+               epsilon=1e-8,
+               name="Adam"):
+    super(Adam, self).__init__(name)
+
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("beta_1", beta_1)
+    self._set_hyper("beta_2", beta_2)
+    self._set_hyper("epsilon", epsilon)
+
+  def _get_beta_accumulators(self, state=None):
+    if state is None:
+      state = self._get_per_graph_state()
+    return (state.get_non_slot("beta_1_power"),
+            state.get_non_slot("beta_2_power"))
+
+  def _create_vars(self, var_list, state):
+    # Non-slot variables end up on the same device(s).
+    state.create_non_slot(
+        initial_value=lambda: state.get_hyper("beta_1"), name="beta_1_power")
+    state.create_non_slot(
+        initial_value=lambda: state.get_hyper("beta_2"), name="beta_2_power")
+
+    # Create slots for the first and second moments.
+    for v in var_list:
+      state.zeros_slot(v, "m")
+      state.zeros_slot(v, "v")
+
+  def _apply_dense(self, grad, var, state):
+    m = state.get_slot(var, "m")
+    v = state.get_slot(var, "v")
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    return training_ops.apply_adam(
+        var,
+        m,
+        v,
+        math_ops.cast(beta_1_power, var.dtype.base_dtype),
+        math_ops.cast(beta_2_power, var.dtype.base_dtype),
+        state.get_hyper("learning_rate", var.dtype.base_dtype),
+        state.get_hyper("beta_1", var.dtype.base_dtype),
+        state.get_hyper("beta_2", var.dtype.base_dtype),
+        state.get_hyper("epsilon", var.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    m = state.get_slot(var, "m")
+    v = state.get_slot(var, "v")
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    return training_ops.resource_apply_adam(
+        var.handle,
+        m.handle,
+        v.handle,
+        math_ops.cast(beta_1_power, grad.dtype.base_dtype),
+        math_ops.cast(beta_2_power, grad.dtype.base_dtype),
+        state.get_hyper("learning_rate", grad.dtype.base_dtype),
+        state.get_hyper("beta_1", grad.dtype.base_dtype),
+        state.get_hyper("beta_2", grad.dtype.base_dtype),
+        state.get_hyper("epsilon", grad.dtype.base_dtype),
+        grad,
+        use_locking=self._use_locking)
+
+  def _apply_sparse_shared(self, grad, var, indices, scatter_add, state):
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    beta_1_power = math_ops.cast(beta_1_power, var.dtype.base_dtype)
+    beta_2_power = math_ops.cast(beta_2_power, var.dtype.base_dtype)
+    lr_t = state.get_hyper("learning_rate", var.dtype.base_dtype)
+    beta_1_t = state.get_hyper("beta_1", var.dtype.base_dtype)
+    beta_2_t = state.get_hyper("beta_2", var.dtype.base_dtype)
+    epsilon_t = state.get_hyper("epsilon", var.dtype.base_dtype)
+    lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))
+    # m_t = beta_1 * m + (1 - beta_1) * g_t
+    m = state.get_slot(var, "m")
+    m_scaled_g_values = grad * (1 - beta_1_t)
+    m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
+    with ops.control_dependencies([m_t]):
+      m_t = scatter_add(m, indices, m_scaled_g_values)
+    # v_t = beta_2 * v + (1 - beta_2) * (g_t * g_t)
+    v = state.get_slot(var, "v")
+    v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
+    v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
+    with ops.control_dependencies([v_t]):
+      v_t = scatter_add(v, indices, v_scaled_g_values)
+    v_sqrt = math_ops.sqrt(v_t)
+    var_update = state_ops.assign_sub(var,
+                                      lr * m_t / (v_sqrt + epsilon_t),
+                                      use_locking=self._use_locking)
+    return control_flow_ops.group(*[var_update, m_t, v_t])
+
+  def _apply_sparse(self, grad, var, state):
+    return self._apply_sparse_shared(
+        grad.values, var, grad.indices,
+        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking),
+        state)
+
+  def _resource_scatter_add(self, x, i, v):
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_add(
+            x.handle, i, v)]):
+      return x.value()
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    return self._apply_sparse_shared(
+        grad, var, indices, self._resource_scatter_add, state)
+
+  def _finish(self, state):
+    # Update the power accumulators.
+    beta_1_power, beta_2_power = self._get_beta_accumulators(state)
+    update_beta_1 = beta_1_power.assign(
+        beta_1_power * state.get_hyper("beta_1"), use_locking=self._use_locking)
+    update_beta_2 = beta_2_power.assign(
+        beta_2_power * state.get_hyper("beta_2"), use_locking=self._use_locking)
+    return control_flow_ops.group(update_beta_1, update_beta_2)
diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py
new file mode 100644
index 0000000000..77796317a1
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@@ -0,0 +1,333 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Adam optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def adam_update_numpy(param,
+                      g_t,
+                      t,
+                      m,
+                      v,
+                      alpha=0.001,
+                      beta1=0.9,
+                      beta2=0.999,
+                      epsilon=1e-8):
+  alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t)
+
+  m_t = beta1 * m + (1 - beta1) * g_t
+  v_t = beta2 * v + (1 - beta2) * g_t * g_t
+
+  param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon)
+  return param_t, m_t, v_t
+
+
+class AdamOptimizerTest(test.TestCase):
+
+  def doTestSparse(self, use_resource=False):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = adam.Adam()
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSparse(self):
+    self.doTestSparse(use_resource=False)
+
+  def testResourceSparse(self):
+    self.doTestSparse(use_resource=True)
+
+  def testSparseDevicePlacement(self):
+    for index_dtype in [dtypes.int32, dtypes.int64]:
+      with self.test_session(force_gpu=test.is_gpu_available()):
+        # If a GPU is available, tests that all optimizer ops can be placed on
+        # it (i.e. they have GPU kernels).
+        var = variables.Variable([[1.0], [2.0]])
+        indices = constant_op.constant([0, 1], dtype=index_dtype)
+        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        optimizer = adam.Adam(3.0)
+        minimize_op = optimizer.minimize(gathered_sum)
+        variables.global_variables_initializer().run()
+        minimize_op.run()
+
+  def testSparseRepeatedIndices(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        repeated_index_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        aggregated_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update = adam.Adam().apply_gradients(
+            [(grad_repeated_index, repeated_index_update_var)])
+        aggregated_update = adam.Adam().apply_gradients(
+            [(grad_aggregated, aggregated_update_var)])
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            repeated_index_update_var.eval())
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              repeated_index_update_var.eval())
+
+  def doTestBasic(self, use_resource=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      with self.session(graph=ops.Graph()):
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          var0 = resource_variable_ops.ResourceVariable(
+              var0_np, name="var0_%d" % i)
+          var1 = resource_variable_ops.ResourceVariable(
+              var1_np, name="var1_%d" % i)
+        else:
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        opt = adam.Adam()
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        opt_variables = opt.variables()
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+        self.assertTrue(beta1_power is not None)
+        self.assertTrue(beta2_power is not None)
+        self.assertIn(beta1_power, opt_variables)
+        self.assertIn(beta2_power, opt_variables)
+
+        with ops.Graph().as_default():
+          # Shouldn't return non-slot variables from other graphs.
+          self.assertEqual(0, len(opt.variables()))
+
+        if not context.executing_eagerly():
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          if not context.executing_eagerly():
+            self.evaluate(update)
+          elif t > 1:
+            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+
+          self.assertAllCloseAccordingToType(0.9**(t + 1),
+                                             self.evaluate(beta1_power))
+          self.assertAllCloseAccordingToType(0.999**(t + 1),
+                                             self.evaluate(beta2_power))
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam.Adam(constant_op.constant(0.001))
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam.Adam()
+        update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        # Run 3 steps of intertwined Adam1 and Adam2.
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          if t % 2 == 0:
+            update1.run()
+          else:
+            update2.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testTwoSessions(self):
+    optimizer = adam.Adam()
+    g = ops.Graph()
+    with g.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+        optimizer.apply_gradients([(grads0, var0)])
+
+    gg = ops.Graph()
+    with gg.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+
+        # If the optimizer saves any state not keyed by graph the following line
+        # fails.
+        optimizer.apply_gradients([(grads0, var0)])
+
+  def testSlotsUniqueEager(self):
+    with context.eager_mode():
+      v1 = resource_variable_ops.ResourceVariable(1.)
+      v2 = resource_variable_ops.ResourceVariable(1.)
+      opt = adam.Adam(1.)
+      opt.minimize(lambda: v1 + v2)
+      # There should be two non-slot variables, and two unique slot variables
+      # for v1 and v2 respectively.
+      self.assertEqual(6, len(set(opt.variables())))
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
new file mode 100644
index 0000000000..338c04148b
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
@@ -0,0 +1,761 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# TODO(josh11b): Forked from contrib/eager/python to test OptimizerV2 the same way
+# OptimizerV1 is tested. This file should be removed once the fork is resolved.
+
+import functools
+import os
+
+import six
+
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as core_saver
+from tensorflow.python.training import training_util
+from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.checkpointable import util
+
+
+class NonLayerCheckpointable(tracking.Checkpointable):
+
+  def __init__(self):
+    super(NonLayerCheckpointable, self).__init__()
+    self.a_variable = util.add_variable(
+        self, name="a_variable", shape=[])
+
+
+# pylint: disable=not-callable
+class MyModel(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(MyModel, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Checkpointables which aren't Layers.
+    self._non_layer = NonLayerCheckpointable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class _MirroringSaveable(
+    core_saver.BaseSaverBuilder.ResourceVariableSaveable):
+
+  def __init__(self, primary_variable, mirrored_variable, name):
+    self._primary_variable = primary_variable
+    self._mirrored_variable = mirrored_variable
+    super(_MirroringSaveable, self).__init__(
+        self._primary_variable, "", name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    """Restore the same value into both variables."""
+    tensor, = restored_tensors
+    return control_flow_ops.group(
+        self._primary_variable.assign(tensor),
+        self._mirrored_variable.assign(tensor))
+
+
+class CheckpointingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    # A nuisance Model using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_model = MyModel()
+    optimizer = adam.Adam(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value),
+          global_step=optimizer_step)
+      optimizer.minimize(
+          lambda: other_model(input_value),
+          global_step=optimizer_step)
+    else:
+      train_op = optimizer.minimize(
+          model(input_value), global_step=optimizer_step)
+      optimizer.minimize(
+          other_model(input_value),
+          global_step=optimizer_step)
+      self.evaluate(util.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    named_variables, serialized_graph, _ = (
+        util._serialize_object_graph(
+            root_checkpointable, saveables_cache=None))
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "optimizer_step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta_1_power",
+        "optimizer/beta_2_power",
+        # Slot variables
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
+    )
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    expected_checkpoint_names = [
+        name + suffix for name in expected_checkpoint_names]
+    # The Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend(
+        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    named_variables = {v.name: v for v in named_variables}
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual(
+        "global_step",
+        named_variables["optimizer_step" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense_1/kernel",
+        named_variables["model/_second/kernel" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        named_variables["model/_named_dense/kernel" + suffix].full_name)
+    self.assertEqual(
+        "beta_1_power",
+        named_variables["optimizer/beta_1_power" + suffix].full_name)
+    self.assertEqual(
+        "beta_2_power",
+        named_variables["optimizer/beta_2_power" + suffix].full_name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[1].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        1].node_id]
+    self.assertEqual("beta_1_power", optimizer_node.children[0].local_name)
+    self.assertEqual(
+        "beta_1_power", serialized_graph.nodes[
+            optimizer_node.children[0].node_id].attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .original_variable_node_id]
+        .attributes[0].full_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual(
+        "my_model/dense/kernel/Adam",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .slot_variable_node_id]
+        .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel/Adam:0",
+        optimizer.get_slot(
+            var=model._named_dense.kernel,
+            name="m").name)
+    self.assertEqual(
+        "model/_named_dense/kernel" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .original_variable_node_id].attributes[0].checkpoint_key)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    self.assertEqual(
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .slot_variable_node_id].attributes[0].checkpoint_key)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveRestore(self):
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model)
+    input_value = constant_op.constant([[3.]])
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value))
+    else:
+      train_op = optimizer.minimize(model(input_value))
+      # TODO(allenl): Make initialization more pleasant when graph building.
+      root_checkpointable.save_counter  # pylint: disable=pointless-statement
+      self.evaluate(util.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
+    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
+    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
+    save_path = root_checkpointable.save(file_prefix=prefix)
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
+    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
+    optimizer_variables = self.evaluate(optimizer.variables())
+    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
+    # Immediate restoration
+    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
+    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
+    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
+    if not context.executing_eagerly():
+      return  # Restore-on-create is only supported when executing eagerly
+    on_create_model = MyModel()
+    on_create_optimizer = adam.Adam(
+        0.001,
+        # Preserve beta_1_power and beta_2_power when appying gradients
+        # so we can test that they've been restored correctly.
+        beta_1=1.0,
+        beta_2=1.0)
+    on_create_root = util.Checkpoint(
+        optimizer=on_create_optimizer, model=on_create_model)
+    # Deferred restoration
+    status = on_create_root.restore(save_path=save_path)
+    on_create_model(constant_op.constant([[3.]]))  # create variables
+    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
+    self.assertAllEqual([42.],
+                        self.evaluate(
+                            on_create_model._named_dense.variables[1]))
+    on_create_m_bias_slot = on_create_optimizer.get_slot(
+        on_create_model._named_dense.variables[1], "m")
+    # Optimizer slot variables are created when the original variable is
+    # restored.
+    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
+    self.assertAllEqual(optimizer_variables[2:],
+                        self.evaluate(on_create_optimizer.variables()))
+    dummy_var = resource_variable_ops.ResourceVariable([1.])
+    on_create_optimizer.minimize(loss=dummy_var.read_value)
+    status.assert_consumed()
+    beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
+
+  # TODO(allenl): Debug garbage created by this test in python3.
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      model = MyModel()
+      optimizer = adam.Adam(0.001)
+      root = util.Checkpoint(
+          optimizer=optimizer, model=model,
+          optimizer_step=training_util.get_or_create_global_step())
+      root.restore(checkpoint_management.latest_checkpoint(
+          checkpoint_directory))
+      for _ in range(num_training_steps):
+        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
+        input_value = constant_op.constant([[3.]])
+        optimizer.minimize(
+            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
+            global_step=root.optimizer_step)
+      root.save(file_prefix=checkpoint_prefix)
+      self.assertEqual((training_continuation + 1) * num_training_steps,
+                       root.optimizer_step.numpy())
+
+  def testUsageGraph(self):
+    """Expected usage when graph building."""
+    with context.graph_mode():
+      num_training_steps = 10
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      for training_continuation in range(3):
+        with ops.Graph().as_default():
+          model = MyModel()
+          optimizer = adam.Adam(0.001)
+          root = util.Checkpoint(
+              optimizer=optimizer, model=model,
+              global_step=training_util.get_or_create_global_step())
+          input_value = constant_op.constant([[3.]])
+          train_op = optimizer.minimize(
+              model(input_value),
+              global_step=root.global_step)
+          checkpoint_path = checkpoint_management.latest_checkpoint(
+              checkpoint_directory)
+          with self.session(graph=ops.get_default_graph()) as session:
+            status = root.restore(save_path=checkpoint_path)
+            status.initialize_or_restore(session=session)
+            if checkpoint_path is None:
+              self.assertEqual(0, training_continuation)
+              with self.assertRaises(AssertionError):
+                status.assert_consumed()
+            else:
+              status.assert_consumed()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix, session=session)
+            self.assertEqual((training_continuation + 1) * num_training_steps,
+                             session.run(root.global_step))
+            self.assertEqual(training_continuation + 1,
+                             session.run(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAgnosticUsage(self):
+    """Graph/eager agnostic usage."""
+    # Does create garbage when executing eagerly due to ops.Graph() creation.
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with ops.Graph().as_default(), self.test_session(
+          graph=ops.get_default_graph()), test_util.device(use_gpu=True):
+        model = MyModel()
+        optimizer = adam.Adam(0.001)
+        root = util.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        input_value = constant_op.constant([[3.]])
+        train_fn = functools.partial(
+            optimizer.minimize,
+            functools.partial(model, input_value),
+            global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+
+  # pylint: disable=cell-var-from-loop
+  @test_util.run_in_graph_and_eager_modes
+  def testWithDefun(self):
+    num_training_steps = 2
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with ops.Graph().as_default(), self.test_session(
+          graph=ops.get_default_graph()), test_util.device(use_gpu=True):
+        model = MyModel()
+        # Don't actually train so we can test variable values
+        optimizer = adam.Adam(0.)
+        root = util.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        def train_fn():
+          @function.defun
+          def _call_model(x):
+            return model(x)
+          with backprop.GradientTape() as tape:
+            loss = _call_model(constant_op.constant([[3.]]))
+          gradients = tape.gradient(loss, model.variables)
+          return optimizer.apply_gradients(zip(gradients, model.variables),
+                                           global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(
+              self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        if training_continuation > 0:
+          status.assert_consumed()
+          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
+        else:
+          self.evaluate(model.variables[0].assign([[42.]]))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+  # pylint: enable=cell-var-from-loop
+
+  def testAnonymousVarsInInit(self):
+
+    class Model(training.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.w = resource_variable_ops.ResourceVariable(0.0)
+        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.vars = [self.w, self.b]
+
+      def call(self, x):
+        return x * self.w + self.b
+
+    with context.eager_mode():
+      model = Model()
+      optimizer = adam.Adam(learning_rate=0.05)
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      checkpoint = util.Checkpoint(
+          model=model, optimizer=optimizer)
+      for _ in range(2):
+        checkpoint.save(checkpoint_prefix)
+        with backprop.GradientTape() as tape:
+          loss = (constant_op.constant(1.)
+                  - model(constant_op.constant(1.))) ** 2
+        grad = tape.gradient(loss, model.vars)
+        optimizer.apply_gradients(
+            [(g, v) for g, v in zip(grad, model.vars)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeferredSlotRestoration(self):
+    checkpoint_directory = self.get_temp_dir()
+
+    root = tracking.Checkpointable()
+    root.var = util.add_variable(
+        root, name="var", initializer=0.)
+    optimizer = adam.Adam(0.1)
+    if context.executing_eagerly():
+      optimizer.minimize(root.var.read_value)
+    else:
+      train_op = optimizer.minimize(root.var)
+      # Note that `optimizer` has not been added as a dependency of
+      # `root`. Create a one-off grouping so that slot variables for `root.var`
+      # get initialized too.
+      self.evaluate(util.gather_initializers(
+          util.Checkpoint(root=root, optimizer=optimizer)))
+      self.evaluate(train_op)
+    self.evaluate(state_ops.assign(root.var, 12.))
+    no_slots_path = util.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "no_slots"))
+    root.optimizer = optimizer
+    self.evaluate(state_ops.assign(root.var, 13.))
+    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
+                                   14.))
+    slots_path = util.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "with_slots"))
+    new_root = tracking.Checkpointable()
+    # Load the slot-containing checkpoint (deferred), then immediately overwrite
+    # the non-slot variable (also deferred).
+    slot_status = util.CheckpointableSaver(
+        new_root).restore(slots_path)
+    no_slot_status = util.CheckpointableSaver(
+        new_root).restore(no_slots_path)
+    with self.assertRaises(AssertionError):
+      no_slot_status.assert_consumed()
+    new_root.var = util.add_variable(
+        new_root, name="var", shape=[])
+    no_slot_status.assert_consumed()
+    no_slot_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    new_root.optimizer = adam.Adam(0.1)
+    with self.assertRaisesRegexp(AssertionError, "beta_1_power"):
+      slot_status.assert_consumed()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    if context.executing_eagerly():
+      # Slot variables are only created with restoring initializers when
+      # executing eagerly.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+    else:
+      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
+                    None)
+    if context.executing_eagerly():
+      new_root.optimizer.minimize(new_root.var.read_value)
+    else:
+      train_op = new_root.optimizer.minimize(new_root.var)
+      # The slot variable now exists; restore() didn't create it, but we should
+      # now have a restore op for it.
+      slot_status.run_restore_ops()
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+      self.evaluate(train_op)
+    slot_status.assert_consumed()
+
+  def testManySavesGraph(self):
+    """Saves after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.Adam(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(util.gather_initializers(obj))
+        saver = util.CheckpointableSaver(obj)
+        saver.save(checkpoint_prefix)
+        before_ops = graph.get_operations()
+        saver.save(checkpoint_prefix)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testManyRestoresGraph(self):
+    """Restores after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.Adam(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(util.gather_initializers(obj))
+        saver = util.CheckpointableSaver(obj)
+        save_path = saver.save(checkpoint_prefix)
+        saver.restore(save_path)
+        before_ops = graph.get_operations()
+        saver.restore(save_path)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testMultipleGraphsNonSlotVariables(self):
+    with context.graph_mode():
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      optimizer = adam.Adam(0.001)
+      # Construct a model in one graph
+      first_graph = ops.Graph()
+      first_session = session_lib.Session(graph=first_graph)
+      with first_graph.as_default(), first_session.as_default():
+        first_variable = resource_variable_ops.ResourceVariable([1.])
+        first_root_checkpointable = util.Checkpoint(
+            optimizer=optimizer, variable=first_variable)
+        train_op = optimizer.minimize(first_variable.read_value)
+        self.evaluate(util.gather_initializers(
+            first_root_checkpointable))
+        self.evaluate(train_op)
+        self.evaluate(first_variable.assign([1.]))
+        self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m").assign([2.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(3.))
+
+      # Save and load in a second graph
+      second_graph = ops.Graph()
+      with second_graph.as_default(), session_lib.Session(graph=second_graph):
+        second_variable = resource_variable_ops.ResourceVariable([1.])
+        second_root_checkpointable = util.Checkpoint(
+            optimizer=optimizer, variable=second_variable)
+        train_op = optimizer.minimize(second_variable.read_value)
+        second_root_checkpointable.restore(None).initialize_or_restore()
+        self.evaluate(train_op)
+        self.evaluate(second_variable.assign([4.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([5.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta_1_power.assign(6.))
+        save_path = second_root_checkpointable.save(checkpoint_prefix)
+        self.evaluate(second_variable.assign([7.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([8.]))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
+        status = second_root_checkpointable.restore(save_path)
+        status.assert_consumed().run_restore_ops()
+        self.assertAllEqual([4.], self.evaluate(second_variable))
+        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m")))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta_1_power))
+
+      # Check that the first graph is unmolested
+      with first_graph.as_default(), first_session.as_default():
+        self.assertAllEqual([1.], self.evaluate(first_variable))
+        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m")))
+        beta_1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta_1_power))
+
+
+class TemplateTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_checkpointable_save_restore(self):
+
+    def _templated():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      v2 = variable_scope.get_variable(
+          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      return v, v + 1., v2
+
+    save_template = template.make_template("s1", _templated)
+    v1_save, _, v2_save = save_template()
+    optimizer = adam.Adam(0.0)
+    save_root = util.Checkpoint(
+        my_template=save_template, optimizer=optimizer)
+    optimizer.minimize(v1_save.read_value)
+    self.evaluate([v.initializer for v in optimizer.variables()])
+    self.evaluate(v1_save.assign([12.]))
+    self.evaluate(v2_save.assign([14.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _templated)
+    load_optimizer = adam.Adam(0.0)
+    load_root = util.Checkpoint(
+        my_template=load_template, optimizer=load_optimizer)
+    status = load_root.restore(save_path)
+    var, var_plus_one, var2 = load_template()
+    load_optimizer.minimize(var.read_value)
+    self.assertEqual(2, len(load_template._checkpoint_dependencies))
+    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
+    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([12.], self.evaluate(var))
+    self.assertAllEqual([13.], self.evaluate(var_plus_one))
+    self.assertAllEqual([14.], self.evaluate(var2))
+
+
+class CheckpointCompatibilityTests(test.TestCase):
+
+  def _initialized_model(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    optimizer = adam.Adam(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = util.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    train_op = optimizer.minimize(
+        functools.partial(model, input_value),
+        global_step=optimizer_step)
+    self.evaluate(util.gather_initializers(
+        root_checkpointable))
+    self.evaluate(train_op)
+    # A regular variable, a slot variable, and a non-slot Optimizer variable
+    # with known values to check when loading.
+    self.evaluate(model._named_dense.bias.assign([1.]))
+    self.evaluate(optimizer.get_slot(
+        var=model._named_dense.bias, name="m").assign([2.]))
+    beta_1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(3.))
+    return root_checkpointable
+
+  def _set_sentinels(self, root_checkpointable):
+    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
+    self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")
+        .assign([102.]))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.evaluate(beta_1_power.assign(103.))
+
+  def _check_sentinels(self, root_checkpointable):
+    self.assertAllEqual(
+        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
+    self.assertAllEqual([2.], self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")))
+    beta_1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta_1_power))
+
+  def _write_name_based_checkpoint(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        name_saver = core_saver.Saver()
+        return name_saver.save(
+            sess=session, save_path=checkpoint_prefix,
+            global_step=root.optimizer_step)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLoadFromNameBasedSaver(self):
+    """Save a name-based checkpoint, load it using the object-based API."""
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = util.CheckpointableSaver(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      if context.executing_eagerly():
+        self._check_sentinels(root)
+      if context.executing_eagerly():
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_consumed()
+      else:
+        # When graph building, we haven't read any keys, so we don't know
+        # whether the restore will be complete.
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_consumed()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      status.initialize_or_restore()
+      self._check_sentinels(root)
+
+  # TODO(allenl): Test for the core name-based saver loading object-based
+  # checkpoints once object-based checkpointing is in core.
+
+  def testSaveGraphLoadEager(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        save_path = root.save(
+            session=session, file_prefix=checkpoint_prefix)
+    with context.eager_mode():
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      root.restore(save_path).assert_consumed()
+      self._check_sentinels(root)
+
+  def testSaveEagerLoadGraph(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.eager_mode():
+      root = self._initialized_model()
+      save_path = root.save(file_prefix=checkpoint_prefix)
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.test_session(
+          graph=save_graph):
+        root = self._initialized_model()
+        self._set_sentinels(root)
+        root.restore(save_path).assert_consumed().run_restore_ops()
+        self._check_sentinels(root)
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
new file mode 100644
index 0000000000..bd5557f4fd
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -0,0 +1,1349 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Version 2 of class Optimizer."""
+# pylint: disable=g-bad-name
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.training import distribution_strategy_context
+from tensorflow.python.training import optimizer as optimizer_v1
+from tensorflow.python.training import slot_creator
+from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.util import nest
+
+
+class _OptimizableVariable(object):
+  """Interface for abstracting over variables in the optimizers."""
+
+  @abc.abstractmethod
+  def target(self):
+    """Returns the optimization target for this variable."""
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def update_op(self, optimizer, g, *args):
+    """Returns the update ops for updating the variable."""
+    raise NotImplementedError("Calling an abstract method.")
+
+
+class _RefVariableProcessor(_OptimizableVariable):
+  """Processor for Variable."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v._ref()  # pylint: disable=protected-access
+
+  def update_op(self, optimizer, g, *args):
+    if isinstance(g, ops.Tensor):
+      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
+      if self._v.constraint is not None:
+        with ops.control_dependencies([update_op]):
+          return self._v.assign(self._v.constraint(self._v))
+      else:
+        return update_op
+    else:
+      assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
+                                                "tensor nor IndexedSlices.")
+      if self._v.constraint is not None:
+        raise RuntimeError(
+            "Cannot use a constraint function on a sparse variable.")
+      # pylint: disable=protected-access
+      return optimizer._apply_sparse_duplicate_indices(g, self._v, *args)
+
+
+class _DenseReadResourceVariableProcessor(_OptimizableVariable):
+  """Processor for dense ResourceVariables."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    # pylint: disable=protected-access
+    update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
+    if self._v.constraint is not None:
+      with ops.control_dependencies([update_op]):
+        return self._v.assign(self._v.constraint(self._v))
+    else:
+      return update_op
+
+
+class _DenseResourceVariableProcessor(_OptimizableVariable):
+  """Processor for dense ResourceVariables."""
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    # pylint: disable=protected-access
+    if isinstance(g, ops.IndexedSlices):
+      if self._v.constraint is not None:
+        raise RuntimeError(
+            "Cannot use a constraint function on a sparse variable.")
+      return optimizer._resource_apply_sparse_duplicate_indices(
+          g.values, self._v, g.indices, *args)
+    update_op = optimizer._resource_apply_dense(g, self._v, *args)
+    if self._v.constraint is not None:
+      with ops.control_dependencies([update_op]):
+        return self._v.assign(self._v.constraint(self._v))
+    else:
+      return update_op
+
+
+class _TensorProcessor(_OptimizableVariable):
+  """Processor for ordinary Tensors.
+
+  Even though a Tensor can't really be updated, sometimes it is useful to
+  compute the gradients with respect to a Tensor using the optimizer. Updating
+  the Tensor is, of course, unsupported.
+  """
+
+  def __init__(self, v):
+    self._v = v
+
+  def target(self):
+    return self._v
+
+  def update_op(self, optimizer, g, *args):
+    raise NotImplementedError("Trying to update a Tensor ", self._v)
+
+
+def _get_processor(v):
+  """The processor of v."""
+  if context.executing_eagerly():
+    if isinstance(v, ops.Tensor):
+      return _TensorProcessor(v)
+    else:
+      return _DenseResourceVariableProcessor(v)
+  if v.op.type == "VarHandleOp":
+    return _DenseResourceVariableProcessor(v)
+  if isinstance(v, variables.Variable):
+    return _RefVariableProcessor(v)
+  if isinstance(v, ops.Tensor):
+    return _TensorProcessor(v)
+  raise NotImplementedError("Trying to optimize unsupported type ", v)
+
+
+def _var_key_v2(var):
+  """Key for representing a primary variable, for looking up slots."""
+  # pylint: disable=protected-access
+  if hasattr(var, "_distributed_container"):
+    distributed_container = var._distributed_container()
+    assert distributed_container is not None
+    if context.executing_eagerly():
+      return distributed_container._unique_id
+    return distributed_container._shared_name
+  if context.executing_eagerly():
+    return var._unique_id
+  return var.op.name
+
+
+def _resolve(value, name):
+  if callable(value):
+    value = value()
+  return ops.convert_to_tensor(value, name=name)
+
+
+def _is_dynamic(value):
+  """Returns true if __init__ arg `value` should be re-evaluated each step."""
+  if callable(value): return True
+  # Don't need to do anything special in graph mode, since dynamic values
+  # will propagate correctly automatically.
+  # TODO(josh11b): Add per-device caching across steps using variables for
+  # truly static values once we add distributed support.
+  if context.executing_eagerly() and isinstance(
+      value, resource_variable_ops.ResourceVariable):
+    return True
+  return False
+
+
+class _OptimizerV2State(object):
+  """Holds per-graph and per-step optimizer state.
+
+  Use _init_with_static_hyper() to create the state for a graph, and then
+  _copy_with_dynamic_hyper() to convert that to state for a particular step.
+  The difference between the two is that the former only has hyper
+  parameter values that are static and the latter also has values that
+  can change every step (according to _is_dynamic()).
+  """
+
+  def __init__(self, op_name):
+    self._op_name = op_name
+
+  def _init_with_static_hyper(self, hyper):
+    """Initialize a fresh state object from hyper dict."""
+    # self._hyper contains a dict from name to a dict with the Tensor values.
+    # This dict starts with a single item with key "None" with the hyper
+    # parameter value converted to a Tensor. Other items have dtype keys
+    # with that Tensor cast to that dtype.
+    with ops.init_scope():
+      self._hyper = {name: {None: ops.convert_to_tensor(value, name=name)}
+                     for name, (dynamic, value) in sorted(hyper.items())
+                     if not dynamic}
+    self._slots = {}
+    self._non_slot_dict = {}
+    # Extra state to help Optimizers implement Checkpointable. Holds information
+    # about variables which will be restored as soon as they're created.
+    self._deferred_dependencies = {}  # Non-slot variables
+    self._deferred_slot_restorations = {}  # Slot variables
+
+  def _copy_with_dynamic_hyper(self, hyper, distribution, non_slot_devices):
+    """Create a new state object for a particular step."""
+    ret = _OptimizerV2State(self._op_name)
+    # pylint: disable=protected-access
+    ret._slots = self._slots
+    ret._non_slot_dict = self._non_slot_dict
+    ret._deferred_dependencies = self._deferred_dependencies
+    ret._deferred_slot_restorations = self._deferred_slot_restorations
+    ret._hyper = {name: {None: _resolve(value, name)}
+                  for name, (dynamic, value) in sorted(hyper.items())
+                  if dynamic}
+    ret._hyper.update(self._hyper)
+    ret._non_slot_devices = non_slot_devices
+    ret._distribution = distribution
+    return ret
+
+  def _variables(self):
+    """Returns a list of all variables held by self."""
+    optimizer_variables = list(self._non_slot_dict.values())
+    for variable_dict in self._slots.values():
+      for slot_for_variable in variable_dict.values():
+        optimizer_variables.append(slot_for_variable)
+    # Sort variables by name so that the return is deterministic.
+    return sorted(optimizer_variables, key=lambda v: v.name)
+
+  def _slot_dict(self, slot_name):
+    """Returns a dict for caching slots created under the given name.
+
+    Args:
+      slot_name: Name for the slot.
+
+    Returns:
+      A dict that maps primary `Variable` objects to the slot created
+      for that variable, under the given slot name.
+    """
+    named_slots = self._slots.get(slot_name, None)
+    if named_slots is None:
+      named_slots = {}
+      self._slots[slot_name] = named_slots
+    return named_slots
+
+  def create_slot(self, var, val, slot_name, optional_op_name=None):
+    """Find or create a slot for a variable.
+
+    Args:
+      var: A `Variable` object.
+      val: A `Tensor`.  The initial value of the slot.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_slot(
+          var, val, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def create_slot_with_initializer(self, var, initializer, shape, dtype,
+                                   slot_name, optional_op_name=None):
+    """Find or create a slot for a variable, using an Initializer.
+
+    Args:
+      var: A `Variable` object.
+      initializer: An `Initializer`.  The initial value of the slot.
+      shape: Shape of the initial value of the slot.
+      dtype: Type of the value of the slot.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_slot_with_initializer(
+          var, initializer, shape, dtype, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def zeros_slot(self, var, slot_name, optional_op_name=None):
+    """Find or create a slot initialized with 0.0.
+
+    Args:
+      var: A `Variable` object.
+      slot_name: Name for the slot.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+
+    Returns:
+      A `Variable` object.
+    """
+    named_slots = self._slot_dict(slot_name)
+    var_key = _var_key_v2(var)
+    if var_key not in named_slots:
+      new_slot_variable = slot_creator.create_zeros_slot(
+          var, optional_op_name or self._op_name)
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=new_slot_variable)
+      named_slots[var_key] = new_slot_variable
+    return named_slots[var_key]
+
+  def _create_or_restore_slot_variable(
+      self, slot_variable_position, slot_name, variable,
+      optional_op_name=None):
+    """Restore a slot variable's value, possibly creating it.
+
+    Called when a variable which has an associated slot variable is created or
+    restored. When executing eagerly, we create the slot variable with a
+    restoring initializer.
+
+    No new variables are created when graph building. Instead,
+    _restore_slot_variable catches these after normal creation and adds restore
+    ops to the graph. This method is nonetheless important when graph building
+    for the case when a slot variable has already been created but `variable`
+    has just been added to a dependency graph (causing us to realize that the
+    slot variable needs to be restored).
+
+    Args:
+      slot_variable_position: A `checkpointable._CheckpointPosition` object
+        indicating the slot variable `Checkpointable` object to be restored.
+      slot_name: The name of this `Optimizer`'s slot to restore into.
+      variable: The variable object this slot is being created for.
+      optional_op_name: Name to use when scoping the Variable that
+        needs to be created for the slot.
+    """
+    slot_variable = self.get_slot(var=variable, name=slot_name)
+    if (slot_variable is None and context.executing_eagerly() and
+        slot_variable_position.is_simple_variable()
+        # Defer slot variable creation if there is an active variable creator
+        # scope. Generally we'd like to eagerly create/restore slot variables
+        # when possible, but this may mean that scopes intended to catch
+        # `variable` also catch its eagerly created slot variable
+        # unintentionally (specifically make_template would add a dependency on
+        # a slot variable if not for this case). Deferring is mostly harmless
+        # (aside from double initialization), and makes variable creator scopes
+        # behave the same way they do when graph building.
+        and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
+      initializer = checkpointable.CheckpointInitialValue(
+          checkpoint_position=slot_variable_position)
+      slot_variable = self.create_slot(
+          var=variable,
+          val=initializer,
+          slot_name=slot_name,
+          optional_op_name=optional_op_name)
+      # Optimizers do not have unconditional dependencies on their slot
+      # variables (nor do any other objects). They are only saved if the
+      # variables they were created for are also saved.
+    if slot_variable is not None:
+      # If we've either made this slot variable, or if we've pulled out an
+      # existing slot variable, we should restore it.
+      slot_variable_position.restore(slot_variable)
+    else:
+      # We didn't make the slot variable. Defer restoring until it gets created
+      # normally. We keep a list rather than the one with the highest restore
+      # UID in case slot variables have their own dependencies, in which case
+      # those could differ between restores.
+      variable_key = _var_key_v2(variable)
+      self._deferred_slot_restorations.setdefault(
+          slot_name, {}).setdefault(variable_key, []).append(
+              slot_variable_position)
+
+  def get_slot(self, var, name):
+    """Return a slot named `name` created for `var` by the Optimizer.
+
+    Some `Optimizer` subclasses use additional variables.  For example
+    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
+    gives access to these `Variable` objects if for some reason you need them.
+
+    Use `get_slot_names()` to get the list of slot names created by the
+    `Optimizer`.
+
+    Args:
+      var: A variable passed to `minimize()` or `apply_gradients()`.
+      name: A string.
+
+    Returns:
+      The `Variable` for the slot if it was created, `None` otherwise.
+    """
+    named_slots = self._slots.get(name, None)
+    if not named_slots:
+      return None
+    return named_slots.get(_var_key_v2(var), None)
+
+  def get_slot_names(self):
+    """Return a list of the names of slots created by the `Optimizer`.
+
+    See `get_slot()`.
+
+    Returns:
+      A list of strings.
+    """
+    return sorted(self._slots.keys())
+
+  def create_non_slot(self, initial_value, name, colocate_with=None):
+    """Add an extra variable, not associated with a slot."""
+    v = self._non_slot_dict.get(name, None)
+    if v is None:
+      if colocate_with is None: colocate_with = self._non_slot_devices
+      with self._distribution.colocate_vars_with(colocate_with):
+        # TODO(josh11b): Use get_variable() except for the legacy Adam use case.
+        v = variable_scope.variable(initial_value, name=name, trainable=False)
+      self._non_slot_dict[name] = v
+      deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
+      for checkpoint_position in sorted(
+          deferred_dependencies_list,
+          key=lambda restore: restore.checkpoint.restore_uid,
+          reverse=True):
+        checkpoint_position.restore(v)
+    return v
+
+  def _restore_slot_variable(self, slot_name, variable, slot_variable):
+    """Restore a newly created slot variable's value."""
+    variable_key = _var_key_v2(variable)
+    deferred_restorations = self._deferred_slot_restorations.get(
+        slot_name, {}).pop(variable_key, [])
+    # Iterate over restores, highest restore UID first to minimize the number
+    # of assignments.
+    deferred_restorations.sort(key=lambda position: position.restore_uid,
+                               reverse=True)
+    for checkpoint_position in deferred_restorations:
+      checkpoint_position.restore(slot_variable)
+
+  def get_non_slot(self, name):
+    """Returns the non-slot variable identified by `name`."""
+    return self._non_slot_dict.get(name, None)
+
+  def get_hyper(self, name, dtype=None):
+    """Returns the `name` hyper parameter, optionally cast to `dtype`."""
+    dtype_dict = self._hyper[name]
+    # Do we have the value cast to dtype already cached? This should always
+    # succeed when dtype is None.
+    if dtype in dtype_dict:
+      return dtype_dict[dtype]
+    # Not cached, cast to dtype and save the result in the cache.
+    result = math_ops.cast(dtype_dict[None], dtype)
+    dtype_dict[dtype] = result
+    return result
+
+
+class OptimizerV2(optimizer_v1.Optimizer):
+  """Updated base class for optimizers.
+
+  This class defines the API to add Ops to train a model.  You never use this
+  class directly, but instead instantiate one of its subclasses such as
+  `GradientDescentOptimizer`, `AdagradOptimizer`, or `MomentumOptimizer`.
+
+  ### Usage
+
+  ```python
+  # Create an optimizer with the desired parameters.
+  opt = GradientDescentOptimizer(learning_rate=0.1)
+  # Add Ops to the graph to minimize a cost by updating a list of variables.
+  # "cost" is a Tensor, and the list of variables contains tf.Variable
+  # objects.
+  opt_op = opt.minimize(cost, var_list=<list of variables>)
+  ```
+
+  In the training program you will just have to run the returned Op.
+
+  ```python
+  # Execute opt_op to do one step of training:
+  opt_op.run()
+  ```
+
+  ### Processing gradients before applying them.
+
+  Calling `minimize()` takes care of both computing the gradients and
+  applying them to the variables.  If you want to process the gradients
+  before applying them you can instead use the optimizer in three steps:
+
+  1.  Compute the gradients with `compute_gradients()`.
+  2.  Process the gradients as you wish.
+  3.  Apply the processed gradients with `apply_gradients()`.
+
+  Example:
+
+  ```python
+  # Create an optimizer.
+  opt = GradientDescentOptimizer(learning_rate=0.1)
+
+  # Compute the gradients for a list of variables.
+  grads_and_vars = opt.compute_gradients(loss, <list of variables>)
+
+  # grads_and_vars is a list of tuples (gradient, variable).  Do whatever you
+  # need to the 'gradient' part, for example cap them, etc.
+  capped_grads_and_vars = [(MyCapper(gv[0]), gv[1]) for gv in grads_and_vars]
+
+  # Ask the optimizer to apply the capped gradients.
+  opt.apply_gradients(capped_grads_and_vars)
+  ```
+
+  ### Gating Gradients
+
+  Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
+  argument that controls the degree of parallelism during the application of
+  the gradients.
+
+  The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
+
+  <b>`GATE_NONE`</b>: Compute and apply gradients in parallel.  This provides
+  the maximum parallelism in execution, at the cost of some non-reproducibility
+  in the results.  For example the two gradients of `matmul` depend on the input
+  values: With `GATE_NONE` one of the gradients could be applied to one of the
+  inputs _before_ the other gradient is computed resulting in non-reproducible
+  results.
+
+  <b>`GATE_OP`</b>: For each Op, make sure all gradients are computed before
+  they are used.  This prevents race conditions for Ops that generate gradients
+  for multiple inputs where the gradients depend on the inputs.
+
+  <b>`GATE_GRAPH`</b>: Make sure all gradients for all variables are computed
+  before any one of them is used.  This provides the least parallelism but can
+  be useful if you want to process all gradients before applying any of them.
+
+  ### Slots
+
+  Some optimizer subclasses, such as `MomentumOptimizer` and `AdagradOptimizer`
+  allocate and manage additional variables associated with the variables to
+  train.  These are called <i>Slots</i>.  Slots have names and you can ask the
+  optimizer for the names of the slots that it uses.  Once you have a slot name
+  you can ask the optimizer for the variable it created to hold the slot value.
+
+  This can be useful if you want to log debug a training algorithm, report stats
+  about the slots, etc.
+
+  ### Non-slot variables
+
+  Some optimizer subclasses, such as `AdamOptimizer` have variables that
+  are not associated with the variables to train, just the step itself.
+
+  ### Hyper parameters
+
+  These are arguments passed to the optimizer subclass constructor
+  (the `__init__` method), and then passed to `self._set_hyper()`.
+  They can be either regular Python values (like 1.0), tensors, or
+  callables. If they are callable, the callable will be called during
+  `apply_gradients()` to get the value for the hyper parameter.
+
+  ### State
+
+  Internal methods are passed a `state` argument with the correct
+  values to use for the slot and non-slot variables, and the hyper
+  parameters.
+  """
+
+  # Values for gate_gradients.
+  GATE_NONE = 0
+  GATE_OP = 1
+  GATE_GRAPH = 2
+
+  def __init__(self, name):
+    """Create a new Optimizer.
+
+    This must be called by the constructors of subclasses.
+    Note that Optimizer instances should not bind to a single graph,
+    and so shouldn't keep Tensors as member variables. Generally
+    you should be able to use the _set_hyper()/state.get_hyper()
+    facility instead.
+
+    Args:
+      name: A non-empty string.  The name to use for accumulators created
+        for the optimizer.
+
+    Raises:
+      ValueError: If name is malformed.
+      RuntimeError: If _create_slots has been overridden instead of
+          _create_vars.
+    """
+    # Note: We intentionally don't call parent __init__.
+
+    # Optimizer._create_slots was replaced by _create_vars in OptimizerV2.
+    if (self.__class__._create_slots.__code__ is not  # pylint: disable=protected-access
+        OptimizerV2._create_slots.__code__):
+      raise RuntimeError("Override _create_vars instead of _create_slots when "
+                         "descending from OptimizerV2 (class %s)" %
+                         self.__class__.__name__)
+    if not name:
+      raise ValueError("Must specify the optimizer name")
+
+    self._use_locking = False
+    self._name = name
+    # Map from graph_key to state for that graph. We use the graph_key
+    # since it works in both eager and graph mode, and gives the outer
+    # graph inside functions.
+    tower_context = distribution_strategy_context.get_tower_context()
+    if tower_context is None:
+      # In a cross-tower context for a DistributionStrategy, which means
+      # only one Optimizer will be created, not one per tower.
+      self._per_graph_state = {}
+    else:
+      # We use get_tower_context().merge_call() to get a single dict
+      # shared across all model replicas when running with a
+      # DistributionStrategy.
+      self._per_graph_state = tower_context.merge_call(lambda _: {})
+
+    # Hyper parameters, and whether they should be re-evaluated every step.
+    self._hyper = {}
+
+  def _set_hyper(self, name, value):
+    self._hyper[name] = (_is_dynamic(value), value)
+
+  def minimize(self, loss, global_step=None, var_list=None,
+               gate_gradients=GATE_OP, aggregation_method=None,
+               colocate_gradients_with_ops=False, name=None,
+               grad_loss=None, stop_gradients=None,
+               scale_loss_by_num_towers=None):
+    """Add operations to minimize `loss` by updating `var_list`.
+
+    This method simply combines calls `compute_gradients()` and
+    `apply_gradients()`. If you want to process the gradient before applying
+    them call `compute_gradients()` and `apply_gradients()` explicitly instead
+    of using this function.
+
+    Args:
+      loss: A `Tensor` containing the value to minimize.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      var_list: Optional list or tuple of `Variable` objects to update to
+        minimize `loss`.  Defaults to the list of variables collected in
+        the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      name: Optional name for the returned operation.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
+        through.
+      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
+        down by the number of towers. By default, auto-detects whether this
+        is needed.
+
+    Returns:
+      An Operation that updates the variables in `var_list`.  If `global_step`
+      was not `None`, that operation also increments `global_step`.
+
+    Raises:
+      ValueError: If some of the variables are not `Variable` objects.
+
+    @compatibility(eager)
+    When eager execution is enabled, `loss` should be a Python function that
+    takes elements of `var_list` as arguments and computes the value to be
+    minimized. If `var_list` is None, `loss` should take no arguments.
+    Minimization (and gradient computation) is done with respect to the
+    elements of `var_list` if not None, else with respect to any trainable
+    variables created during the execution of the `loss` function.
+    `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and
+    `grad_loss` are ignored when eager execution is enabled.
+    @end_compatibility
+    """
+    grads_and_vars = self.compute_gradients(
+        loss, var_list=var_list, gate_gradients=gate_gradients,
+        aggregation_method=aggregation_method,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        grad_loss=grad_loss, stop_gradients=stop_gradients,
+        scale_loss_by_num_towers=scale_loss_by_num_towers)
+
+    vars_with_grad = [v for g, v in grads_and_vars if g is not None]
+    if not vars_with_grad:
+      raise ValueError(
+          "No gradients provided for any variable, check your graph for ops"
+          " that do not support gradients, between variables %s and loss %s." %
+          ([str(v) for _, v in grads_and_vars], loss))
+
+    return self.apply_gradients(grads_and_vars, global_step=global_step,
+                                name=name)
+
+  def compute_gradients(self, loss, var_list=None,
+                        gate_gradients=GATE_OP,
+                        aggregation_method=None,
+                        colocate_gradients_with_ops=False,
+                        grad_loss=None, stop_gradients=None,
+                        scale_loss_by_num_towers=None):
+    """Compute gradients of `loss` for the variables in `var_list`.
+
+    This is the first part of `minimize()`.  It returns a list
+    of (gradient, variable) pairs where "gradient" is the gradient
+    for "variable".  Note that "gradient" can be a `Tensor`, an
+    `IndexedSlices`, or `None` if there is no gradient for the
+    given variable.
+
+    Args:
+      loss: A Tensor containing the value to minimize or a callable taking
+        no arguments which returns the value to minimize. When eager execution
+        is enabled it must be a callable.
+      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+        `loss`.  Defaults to the list of variables collected in the graph
+        under the key `GraphKeys.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with
+        the corresponding op.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
+        through.
+      scale_loss_by_num_towers: Optional boolean. If true, scale the loss
+        down by the number of towers. By default, auto-detects whether this
+        is needed.
+
+    Returns:
+      A list of (gradient, variable) pairs. Variable is always present, but
+      gradient can be `None`.
+
+    Raises:
+      TypeError: If `var_list` contains anything else than `Variable` objects.
+      ValueError: If some arguments are invalid.
+      RuntimeError: If called with eager execution enabled and `loss` is
+        not callable.
+
+    @compatibility(eager)
+    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
+    and `colocate_gradients_with_ops` are ignored.
+    @end_compatibility
+    """
+    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
+    if callable(loss):
+      with backprop.GradientTape() as tape:
+        if var_list is not None:
+          tape.watch(var_list)
+        loss_value = loss()
+
+        # Scale loss for number of towers (callable-loss case). In this case,
+        # we have to be careful to call distribute_lib.get_loss_reduction()
+        # *after* loss() is evaluated, so we know what loss reduction it uses.
+        if scale_loss_by_num_towers is None:
+          scale_loss_by_num_towers = (
+              distribute_lib.get_loss_reduction() ==
+              variable_scope.VariableAggregation.MEAN)
+        if scale_loss_by_num_towers:
+          num_towers = distribution_strategy_context.get_distribution_strategy(
+          ).num_towers
+          if num_towers > 1:
+            loss_value *= 1. / num_towers
+
+      if var_list is None:
+        var_list = tape.watched_variables()
+      grads = tape.gradient(loss_value, var_list, grad_loss)
+      return list(zip(grads, var_list))
+    if context.executing_eagerly():
+      raise RuntimeError(
+          "`loss` passed to Optimizer.compute_gradients should "
+          "be a function when eager execution is enabled.")
+
+    # Scale loss for number of towers (non-callable-loss case).
+    if scale_loss_by_num_towers is None:
+      scale_loss_by_num_towers = (
+          distribute_lib.get_loss_reduction() ==
+          variable_scope.VariableAggregation.MEAN)
+    if scale_loss_by_num_towers:
+      num_towers = distribution_strategy_context.get_distribution_strategy(
+      ).num_towers
+      if num_towers > 1:
+        loss *= 1. / num_towers
+
+    if gate_gradients not in [optimizer_v1.Optimizer.GATE_NONE,
+                              optimizer_v1.Optimizer.GATE_OP,
+                              optimizer_v1.Optimizer.GATE_GRAPH]:
+      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
+                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
+                       gate_gradients)
+    self._assert_valid_dtypes([loss])
+    if grad_loss is not None:
+      self._assert_valid_dtypes([grad_loss])
+    if var_list is None:
+      var_list = (
+          variables.trainable_variables() +
+          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
+    else:
+      var_list = nest.flatten(var_list)
+    # pylint: disable=protected-access
+    var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
+    # pylint: enable=protected-access
+    processors = [_get_processor(v) for v in var_list]
+    if not var_list:
+      raise ValueError("No variables to optimize.")
+    var_refs = [p.target() for p in processors]
+    grads = gradients.gradients(
+        loss, var_refs, grad_ys=grad_loss,
+        gate_gradients=(gate_gradients == optimizer_v1.Optimizer.GATE_OP),
+        aggregation_method=aggregation_method,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        stop_gradients=stop_gradients)
+    if gate_gradients == optimizer_v1.Optimizer.GATE_GRAPH:
+      grads = control_flow_ops.tuple(grads)
+    grads_and_vars = list(zip(grads, var_list))
+    self._assert_valid_dtypes(
+        [v for g, v in grads_and_vars
+         if g is not None and v.dtype != dtypes.resource])
+    return grads_and_vars
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to variables.
+
+    This is the second part of `minimize()`. It returns an `Operation` that
+    applies gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the `Optimizer` constructor.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+
+    Raises:
+      TypeError: If `grads_and_vars` is malformed.
+      ValueError: If none of the variables have gradients.
+    """
+    # This is a default implementation of apply_gradients() that can be shared
+    # by most optimizers.  It relies on the subclass implementing the following
+    # methods: _create_vars(), _prepare(), _apply_dense(), and _apply_sparse().
+
+    # Filter out variables with gradients of `None`.
+    grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works.
+    if not grads_and_vars:
+      raise ValueError("No variables provided.")
+    filtered = tuple((g, v) for (g, v) in grads_and_vars if g is not None)
+    if not filtered:
+      raise ValueError("No gradients provided for any variable: %s." %
+                       ([str(v) for _, v in grads_and_vars],))
+    return distribution_strategy_context.get_tower_context().merge_call(
+        self._distributed_apply, filtered, global_step=global_step, name=name)
+
+  def _get_or_create_state(self, var_list=None):
+    """Either looks up or creates `_OptimizerV2State`.
+
+    If any variables are available, they should be passed via the `var_list`
+    argument, and these will be used to determine the graph to create/retrieve
+    state for. Otherwise the returned state is for the current default graph.
+
+    Args:
+      var_list: A list of variables to extract a graph from.
+
+    Returns:
+      An `_OptimizerV2State` object.
+    """
+    # Determine the graph_key from the current graph.
+    eager_execution = context.executing_eagerly()
+    if eager_execution or var_list is None:
+      graph = ops.get_default_graph()
+    else:
+      graph = ops._get_graph_from_inputs(var_list)  # pylint: disable=protected-access
+    assert graph is not None
+    graph_key = graph._graph_key  # pylint: disable=protected-access
+
+    # Get the per graph state by looking up the graph_key.
+    if graph_key in self._per_graph_state:
+      per_graph_state = self._per_graph_state[graph_key]
+    else:
+      per_graph_state = _OptimizerV2State(self._name)
+      per_graph_state._init_with_static_hyper(self._hyper)  # pylint: disable=protected-access
+      self._per_graph_state[graph_key] = per_graph_state
+    return per_graph_state
+
+  def _distributed_apply(self, distribution, grads_and_vars, global_step, name):
+    """`apply_gradients` for use with a `DistributionStrategy`."""
+    reduced_grads = distribution.batch_reduce(
+        variable_scope.VariableAggregation.SUM, grads_and_vars)
+    var_list = [v for _, v in grads_and_vars]
+    grads_and_vars = zip(reduced_grads, var_list)
+
+    unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)]
+    eager_execution = context.executing_eagerly()
+    if eager_execution:
+      # Give a clear error in this case instead of "name not supported
+      # for Eager Tensors" when we compute non_slot_devices.
+      for v in unwrapped_var_list:
+        if isinstance(v, ops.Tensor):
+          raise NotImplementedError("Trying to update a Tensor ", v)
+
+    with ops.name_scope(name, self._name) as name:
+      per_graph_state = self._get_or_create_state(var_list=unwrapped_var_list)
+      # Include the current value of any dynamic hyper parameters in `state`.
+      non_slot_devices = distribution.non_slot_devices(var_list)
+      state = per_graph_state._copy_with_dynamic_hyper(  # pylint: disable=protected-access
+          self._hyper, distribution, non_slot_devices)
+
+    # Create any slot and non-slot variables we need in `state`.
+    with ops.init_scope():
+      self._create_vars(var_list, state)
+
+    with ops.name_scope(name):  # Re-enter name_scope created above
+      # Give the child class a chance to do something before we start
+      # applying gradients.
+      self._prepare(state)
+
+      def update(v, g):
+        """Update variable `v` using gradient `g`."""
+        assert v is not None
+
+        # Convert the grad to Tensor or IndexedSlices if necessary, and
+        # look up a processor for each variable's type.
+        try:
+          g = ops.convert_to_tensor_or_indexed_slices(g)
+        except TypeError:
+          raise TypeError(
+              "Gradient must be convertible to a Tensor"
+              " or IndexedSlices, or None: %s" % g)
+        if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
+          raise TypeError(
+              "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
+        processor = _get_processor(v)
+
+        # We colocate all ops created in _apply_dense or _apply_sparse
+        # on the same device as the variable.
+        # TODO(apassos): figure out how to get the variable name here.
+        scope_name = "" if eager_execution else v.op.name
+        # device_policy is set because non-mirrored tensors will be read in
+        # `update_op`.
+        # TODO(josh11b): Make different state objects for each device to
+        # avoid needing to set the device_policy.
+        with ops.name_scope("update_" + scope_name), \
+            context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
+          return processor.update_op(self, g, state)
+
+      # Use the processors to update the variables.
+      update_ops = []
+      for grad, var in grads_and_vars:
+        update_ops.extend(distribution.update(var, update, grad, grouped=False))
+
+      # Give the child class a chance to do something after applying
+      # gradients
+      def finish():
+        # TODO(josh11b): Make different state objects for each device to
+        # avoid needing to set the device_policy.
+        with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
+          return self._finish(state)
+
+      update_ops = control_flow_ops.group(update_ops)
+      with ops.control_dependencies([update_ops]):
+        finish_updates = distribution.update_non_slot(
+            non_slot_devices, finish, grouped=False)
+      # We said grouped=False, which means finish_updates is always a list.
+      # It will be [None] when finish() returns None.
+      if finish_updates == [None]:
+        finish_updates = [update_ops]
+
+      # Update `global_step` (if any).
+      if global_step is None:
+        apply_updates = distribution.group(finish_updates, name=name)
+      else:
+        with ops.control_dependencies(finish_updates):
+
+          def update_global_step(global_step, name):
+            return global_step.assign_add(1, read_value=False, name=name)
+
+          apply_updates = distribution.update(global_step, update_global_step,
+                                              name)
+
+      # Add the training op to the TRAIN_OP graph collection in graph mode.
+      if not eager_execution:
+        if isinstance(apply_updates, ops.Tensor):
+          apply_updates = apply_updates.op
+        train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+        if apply_updates not in train_op:
+          train_op.append(apply_updates)
+
+      return apply_updates
+
+  def get_slot(self, var, name):
+    """Return a slot named `name` created for `var` by the Optimizer.
+
+    Some `Optimizer` subclasses use additional variables.  For example
+    `Momentum` and `Adagrad` use variables to accumulate updates.  This method
+    gives access to these `Variable` objects if for some reason you need them.
+
+    Use `get_slot_names()` to get the list of slot names created by the
+    `Optimizer`.
+
+    Args:
+      var: A variable passed to `minimize()` or `apply_gradients()`.
+      name: A string.
+
+    Returns:
+      The `Variable` for the slot if it was created, `None` otherwise.
+    """
+    state = self._get_state_for_var(var)
+    return state.get_slot(var, name) if state is not None else None
+
+  def get_slot_names(self):
+    """Return a list of the names of slots created by the `Optimizer`.
+
+    See `get_slot()`.
+
+    Returns:
+      A list of strings.
+    """
+    state = self._get_per_graph_state()
+    return state.get_slot_names() if state is not None else []
+
+  def variables(self):
+    """A list of variables which encode the current state of `Optimizer`.
+
+    Includes slot variables and additional global variables created by the
+    optimizer in the current default graph.
+
+    Returns:
+      A list of variables.
+    """
+    state = self._get_per_graph_state()
+    return state._variables() if state is not None else []  # pylint: disable=protected-access
+
+  # --------------
+  # Methods to be implemented by subclasses if they want to use the
+  # inherited implementation of apply_gradients() or compute_gradients().
+  # --------------
+  def _create_vars(self, var_list, state):
+    """Create all slots needed by the variables and any non-slot variables.
+
+    Args:
+      var_list: A list of `Variable` objects.
+      state: An object with these methods:
+        `create_slot(var, val, slot_name, optional_op_name)`,
+        `create_slot_with_initializer(`
+            `var, initializer, shape, dtype, slot_name, optional_op_name)`,
+        `zeros_slot(var, slot_name, optional_op_name)`,
+        `create_non_slot_variable(initial_value, name, colocate_with)`,
+        `get_hyper(name)`
+    """
+    # No slots needed by default
+    pass
+
+  def _prepare(self, state):
+    """Code to execute before applying gradients.
+
+    Note that most uses of _prepare() in Optimizer have been subsumed
+    by explicit support for hyper parameters in OptimizerV2
+
+    Args:
+      state: An object with a `get_hyper(name)` method.
+
+    Returns:
+      Return value will be ignored.
+    """
+    pass
+
+  def _apply_dense(self, grad, var, state):
+    """Add ops to apply dense gradients to `var`.
+
+    Args:
+      grad: A `Tensor`.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    raise NotImplementedError()
+
+  def _resource_apply_dense(self, grad, handle, state):
+    """Add ops to apply dense gradients to the variable `handle`.
+
+    Args:
+      grad: a `Tensor` representing the gradient.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
+  def _resource_apply_sparse_duplicate_indices(
+      self, grad, handle, indices, state):
+    """Add ops to apply sparse gradients to `handle`, with repeated indices.
+
+    Optimizers which override this method must deal with repeated indices. See
+    the docstring of `_apply_sparse_duplicate_indices` for details. By default
+    the correct behavior, to sum non-unique indices and their associated
+    gradients, is enforced by first pre-processing `grad` and `indices` and
+    passing them on to `_resource_apply_sparse`. Optimizers which deal correctly
+    with duplicate indices may instead override this method to avoid the
+    overhead of summing.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      indices: a `Tensor` of integral type representing the indices for
+       which the gradient is nonzero. Indices may be repeated.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    # pylint: disable=protected-access
+    summed_grad, unique_indices = optimizer_v1._deduplicate_indexed_slices(
+        values=grad, indices=indices)
+    # pylint: enable=protected-access
+    return self._resource_apply_sparse(
+        summed_grad, handle, unique_indices, state)
+
+  def _resource_apply_sparse(self, grad, handle, indices, state):
+    """Add ops to apply sparse gradients to the variable `handle`.
+
+    Similar to `_apply_sparse`, the `indices` argument to this method has been
+    de-duplicated. Optimizers which deal correctly with non-unique indices may
+    instead override `_resource_apply_sparse_duplicate_indices` to avoid this
+    overhead.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable
+       to be updated.
+      indices: a `Tensor` of integral type representing the indices for
+       which the gradient is nonzero. Indices are unique.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
+  def _apply_sparse_duplicate_indices(self, grad, var, state):
+    """Add ops to apply sparse gradients to `var`, with repeated sparse indices.
+
+    Optimizers which override this method must deal with IndexedSlices objects
+    such as the following:
+
+      IndexedSlicesValue(values=[1, 1], indices=[0, 0], dense_shape=[1])
+
+    The correct interpretation is:
+
+      IndexedSlicesValue(values=[2], indices=[0], dense_shape=[1])
+
+    Many optimizers deal incorrectly with repeated indices when updating based
+    on sparse gradients (e.g. summing squares rather than squaring the sum, or
+    applying momentum terms multiple times). Adding first is always the correct
+    behavior, so this is enforced here by reconstructing the IndexedSlices to
+    have only unique indices, then calling _apply_sparse.
+
+    Optimizers which deal correctly with repeated indices may instead override
+    this method to avoid the overhead of summing indices.
+
+    Args:
+      grad: `IndexedSlices`.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    # pylint: disable=protected-access
+    summed_values, unique_indices = optimizer_v1._deduplicate_indexed_slices(
+        values=grad.values, indices=grad.indices)
+    # pylint: enable=protected-access
+    gradient_no_duplicate_indices = ops.IndexedSlices(
+        indices=unique_indices,
+        values=summed_values,
+        dense_shape=grad.dense_shape)
+    return self._apply_sparse(gradient_no_duplicate_indices, var, state)
+
+  def _apply_sparse(self, grad, var, state):
+    """Add ops to apply sparse gradients to `var`.
+
+    The IndexedSlices object passed to `grad` in this function is by default
+    pre-processed in `_apply_sparse_duplicate_indices` to remove duplicate
+    indices (see its docstring for details). Optimizers which can tolerate or
+    have correct special cases for duplicate sparse indices may override
+    `_apply_sparse_duplicate_indices` instead of this function, avoiding that
+    overhead.
+
+    Args:
+      grad: `IndexedSlices`, with no repeated indices.
+      var: A `Variable` object.
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      An `Operation`.
+    """
+    raise NotImplementedError()
+
+  def _finish(self, state):
+    """Do what is needed to finish the update.
+
+    This is called inside a scope colocated with any non-slot variables.
+
+    Args:
+      state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`,
+        and `get_hyper(name)` methods.
+
+    Returns:
+      The operation to apply updates, or None if no updates.
+    """
+    return None
+
+  # --------------
+  # Utility methods for subclasses.
+  # --------------
+  def _get_per_graph_state(self):
+    # pylint: disable=protected-access
+    return self._per_graph_state.get(ops.get_default_graph()._graph_key, None)
+
+  def _get_state_for_var(self, var):
+    # pylint: disable=protected-access
+    return self._per_graph_state.get(var._graph_key, None)
+
+  # --------------
+  # Overridden methods from Checkpointable.
+  # --------------
+
+  def _track_checkpointable(self, *args, **kwargs):
+    """Optimizers may not track dependencies. Raises an error."""
+    raise NotImplementedError(
+        "Optimizers may not have dependencies. File a feature request if this "
+        "limitation bothers you.")
+
+  @property
+  def _checkpoint_dependencies(self):
+    """From Checkpointable. Gather graph-specific non-slot variables to save."""
+    current_graph_non_slot_variables = []
+    state = self._get_per_graph_state()
+    if state is not None:
+      for name, variable_object in sorted(
+          state._non_slot_dict.items(),  # pylint: disable=protected-access
+          # Avoid comparing variables
+          key=lambda item: item[0]):
+        current_graph_non_slot_variables.append(
+            checkpointable.CheckpointableReference(
+                name=name, ref=variable_object))
+    # Note: ignores super(); Optimizers may not have any dependencies outside of
+    # state objects.
+    return current_graph_non_slot_variables
+
+  def _lookup_dependency(self, name):
+    """From Checkpointable. Find a non-slot variable in the current graph."""
+    state = self._get_per_graph_state()
+    if state is None:
+      return None
+    else:
+      return state.get_non_slot(name)
+
+  @property
+  def _deferred_dependencies(self):
+    """Lets Checkpointable know where non-slot variables are created.
+
+    If necessary, creates a new state object for the current default graph.
+    Checkpointable will then add entries to that state's deferred dependency
+    dictionary. The state object will check that dictionary when creating
+    non-slot variables, restoring their value if an entry is found.
+
+    Returns:
+      A dictionary which holds deferred dependencies for the current default
+      graph.
+    """
+    state = self._get_or_create_state()
+    return state._deferred_dependencies  # pylint: disable=protected-access
+
+  def _create_or_restore_slot_variable(
+      self, slot_variable_position, slot_name, variable):
+    """Checkpointable: Restore a slot variable's value, possibly creating it.
+
+    Called when a variable which has an associated slot variable is created or
+    restored.
+
+    Args:
+      slot_variable_position: A `checkpointable._CheckpointPosition` object
+        indicating the slot variable `Checkpointable` object to be restored.
+      slot_name: The name of this `Optimizer`'s slot to restore into.
+      variable: The variable object this slot is being created for.
+    """
+    state = self._get_or_create_state(var_list=[variable])
+    state._create_or_restore_slot_variable(  # pylint: disable=protected-access
+        slot_variable_position=slot_variable_position,
+        slot_name=slot_name,
+        variable=variable,
+        optional_op_name=self._name)
+
+  # --------------
+  # Unsupported parent methods
+  # --------------
+  def _slot_dict(self, slot_name):
+    raise NotImplementedError(
+        "_slot_dict() method unsupported in OptimizerV2")
+
+  def _get_or_make_slot(self, var, val, slot_name, op_name):
+    raise NotImplementedError(
+        "_get_or_make_slot() method unsupported in OptimizerV2")
+
+  def _get_or_make_slot_with_initializer(self, var, initializer, shape, dtype,
+                                         slot_name, op_name):
+    raise NotImplementedError(
+        "_get_or_make_slot_with_initializer() method unsupported in "
+        "OptimizerV2")
+
+  def _create_non_slot_variable(self, initial_value, name, colocate_with):
+    raise NotImplementedError(
+        "_create_non_slot_variable() method unsupported in OptimizerV2")
+
+  def _get_non_slot_variable(self, name, graph=None):
+    raise NotImplementedError(
+        "_get_non_slot_variable() method unsupported in OptimizerV2")
+
+  def _non_slot_variables(self):
+    raise NotImplementedError(
+        "_non_slot_variables() method unsupported in OptimizerV2")
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
new file mode 100644
index 0000000000..a6c939393e
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -0,0 +1,277 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional test for OptimizerV2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class OptimizerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def testBasic(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
+      # Note that for eager execution, minimize expects a function instead of a
+      # Tensor.
+      global_step = resource_variable_ops.ResourceVariable(
+          array_ops.zeros([], dtypes.int64), name='global_step_%d' % i)
+      sgd_op = sgd.SGD(3.0)
+
+      self.evaluate(variables.global_variables_initializer())
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+      # Run 1 step of sgd through optimizer
+      opt_op = sgd_op.minimize(loss, global_step, [var0, var1])
+      self.evaluate(opt_op)
+      # Validate updated params
+      self.assertAllClose([-14., -13.], self.evaluate(var0))
+      self.assertAllClose([-6., -5.], self.evaluate(var1))
+
+  def testAggregationMethod(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        cost = 5 * var0 + 3 * var1
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name='global_step')
+        sgd_op = sgd.SGD(3.0)
+        opt_op = sgd_op.minimize(
+            cost,
+            global_step, [var0, var1],
+            aggregation_method=gradients_impl.AggregationMethod.
+            EXPERIMENTAL_ACCUMULATE_N)
+
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd through optimizer
+        opt_op.run()
+        # Validate updated params
+        self.assertAllClose([-14., -13.], var0.eval())
+        self.assertAllClose([-6., -5.], var1.eval())
+
+  def testPrecomputedGradient(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        cost = 5 * var0 + 3 * var1
+        grad_loss = constant_op.constant([42, -42], dtype=dtype)
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name='global_step')
+        sgd_op = sgd.SGD(3.0)
+        opt_op = sgd_op.minimize(
+            cost, global_step, [var0, var1], grad_loss=grad_loss)
+
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd through optimizer
+        opt_op.run()
+        # Validate updated params
+        self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)],
+                            var0.eval())
+        self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)],
+                            var1.eval())
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoVariables(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        var0 = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype, trainable=False, name='a')
+        var1 = resource_variable_ops.ResourceVariable(
+            [3.0, 4.0], dtype=dtype, trainable=False, name='b')
+        return 5 * var0 + var1
+      # pylint: enable=cell-var-from-loop
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError, 'No.*variables'):
+        sgd_op.minimize(loss)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradients(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        return 5 * var0
+      # pylint: enable=cell-var-from-loop
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError, 'No gradients'):
+        # var1 has no gradient
+        sgd_op.minimize(loss, var_list=[var1])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradientsForAnyVariables_Minimize(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return constant_op.constant(5.0)
+
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'No gradients provided for any variable'):
+        sgd_op.minimize(loss, var_list=[var0, var1])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testNoGradientsForAnyVariables_ApplyGradients(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      sgd_op = sgd.SGD(3.0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'No gradients provided for any variable'):
+        sgd_op.apply_gradients([(None, var0), (None, var1)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testGradientsAsVariables(self):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+      var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+      def loss():
+        return 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
+
+      sgd_op = sgd.SGD(3.0)
+      grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1])
+      # Convert gradients to tf.Variables
+      converted_grads = [
+          resource_variable_ops.ResourceVariable(array_ops.zeros([2], dtype),
+                                                 name='c_%d_%d' % (i, j))
+          for j, gv in enumerate(grads_and_vars)
+      ]
+      convert_ops = [
+          state_ops.assign(converted_grads[j], gv[0])
+          for j, gv in enumerate(grads_and_vars)
+      ]
+
+      self.evaluate(variables.global_variables_initializer())
+      # Run convert_ops to achieve the gradietns converting
+      self.evaluate(convert_ops)
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+      self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      # Run 1 step of sgd through optimizer
+      converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
+      opt_op = sgd_op.apply_gradients(converted_grads_and_vars)
+      self.evaluate(opt_op)
+
+      # Validate updated params
+      self.assertAllClose([-14., -13.], self.evaluate(var0))
+      self.assertAllClose([-6., -5.], self.evaluate(var1))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testComputeGradientsWithTensors(self):
+    x = ops.convert_to_tensor(1.0)
+    def f():
+      return x * x
+
+    sgd_op = sgd.SGD(3.0)
+    grads_and_vars = sgd_op.compute_gradients(f, [x])
+    self.assertEqual(1, len(grads_and_vars))
+    grad, x_as_var = grads_and_vars[0]
+    self.assertIs(x, x_as_var)
+    self.assertEqual(2.0, self.evaluate(grad))
+
+    with self.assertRaises(NotImplementedError):
+      sgd_op.apply_gradients(grads_and_vars)
+
+  def testTrainOp(self):
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0])
+      var1 = variables.Variable([3.0, 4.0])
+      cost = 5 * var0 + 3 * var1
+      global_step = variables.Variable(
+          array_ops.zeros([], dtypes.int64), name='global_step')
+      sgd_op = sgd.SGD(3.0)
+      opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
+      self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
+
+  def testConstraint(self):
+    constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
+    constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0],
+                                constraint=constraint_01)
+      var1 = variables.Variable([3.0, 4.0],
+                                constraint=constraint_0)
+      cost = 5 * var0 + 3 * var1
+      global_step = variables.Variable(
+          array_ops.zeros([], dtypes.int64), name='global_step')
+      sgd_op = sgd.SGD(3.0)
+      opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
+
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Run 1 step of sgd through optimizer
+      opt_op.run()
+      # Validate updated params
+      self.assertAllClose([-0.1, -0.1], var0.eval())
+      self.assertAllClose([0., 0.], var1.eval())
+
+  def testStopGradients(self):
+    with self.cached_session():
+      var0 = variables.Variable([1.0, 2.0], name='var0')
+      var1 = variables.Variable([3.0, 4.0], name='var1')
+      var0_id = array_ops.identity(var0)
+      cost = 5 * var0_id + 3 * var1
+      sgd_op = sgd.SGD(3.0)
+      grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1],
+                                                stop_gradients=[var0_id])
+      grad_dict = {var.op.name: grad for grad, var in grads_and_vars}
+      self.assertIsNone(grad_dict['var0'])
+      self.assertIsNotNone(grad_dict['var1'])
+
+  def testDoNotOverrideCreateSlots(self):
+    class ShouldNotOverrideCreateSlots(optimizer_v2.OptimizerV2):
+
+      def _create_slots(self, var_list):
+        """In OptimizerV2 _create_slots was renamed _create_vars."""
+        return var_list
+
+    with self.assertRaises(RuntimeError):
+      ShouldNotOverrideCreateSlots('name')
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
new file mode 100644
index 0000000000..2748d8eff7
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -0,0 +1,239 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""RMSprop optimizer for Tensorflow.
+
+rmsprop algorithm [tieleman2012rmsprop]
+
+A detailed description of rmsprop.
+
+- maintain a moving (discounted) average of the square of gradients
+- divide gradient by the root of this average
+
+mean_square = rho * mean_square{t-1} + (1-rho) * gradient ** 2
+mom = momentum * mom{t-1} + learning_rate * g_t / sqrt(mean_square)
+delta = - mom
+
+This implementation of RMSProp uses plain momentum, not Nesterov momentum.
+
+The centered version additionally maintains a moving (discounted) average of the
+gradients, and uses that average to estimate the variance:
+
+mean_grad = rho * mean_square{t-1} + (1-rho) * gradient
+mean_square = rho * mean_square{t-1} + (1-rho) * gradient ** 2
+mom = momentum * mom{t-1} + learning_rate * g_t /
+    sqrt(mean_square - mean_grad**2)
+delta = - mom
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import array_ops
+
+from tensorflow.python.training import training_ops
+
+
+class RMSProp(optimizer_v2.OptimizerV2):
+  """RMSProp optimizer.
+
+  It is recommended to leave the parameters of this optimizer at their default
+  values (except the learning rate, which can be freely tuned).
+
+  This optimizer is usually a good choice for recurrent neural networks.
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Note that in the dense implementation of this algorithm, variables and their
+  corresponding accumulators (momentum, gradient moving average, square
+  gradient moving average) will be updated even if the gradient is zero
+  (i.e. accumulators will decay, momentum will be applied). The sparse
+  implementation (used when the gradient is an `IndexedSlices` object,
+  typically because of `tf.gather` or an embedding lookup in the forward pass)
+  will not update variable slices or their accumulators unless those slices
+  were used in the forward pass (nor is there an "eventual" correction to
+  account for these omitted updates). This leads to more efficient updates for
+  large embedding lookup tables (where most of the slices are not accessed in
+  a particular graph execution), but differs from the published algorithm.
+
+  Arguments:
+      learning_rate: A float hyperparameter >= 0. The learning rate.
+      rho: A float hyperparameter >= 0. Discounting factor for the
+        history/coming gradient.
+      momentum: A float hyperparameter >= 0.
+      epsilon: A float hyperparameter >= 0 . Small value to initialize the
+        average square gradient variable and avoid zero denominator.
+      centered: If True, gradients are normalized by the estimated variance of
+        the gradient; if False, by the uncentered second moment. Setting this to
+        True may help with training, but is slightly more expensive in terms of
+        computation and memory. Defaults to False.
+      name: Optional name prefix for the operations created when applying
+        gradients. Defaults to "RMSProp".
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               rho=0.9,
+               momentum=None,
+               epsilon=1e-10,
+               centered=False,
+               name="RMSProp"):
+    super(RMSProp, self).__init__(name)
+    # Momentum default is `None` for consistency with SGD
+    # but underlying implementation uses `momentum` hyperparameter here
+    # regardless unlike SGD. Since extneral Keras RMSProp does not have
+    # a `momentum` weight, for compatibility with external Keras h5 files,
+    # when  `momentum` was set as `None` we should ignore the `momentum`
+    # variable in `get_weights` and not require it in `set_weights`.
+    if momentum is None:
+      momentum = 0.0
+    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("rho", rho)
+    self._set_hyper("momentum", momentum)
+    self._set_hyper("epsilon", epsilon)
+
+    self._centered = centered
+
+  def _create_vars(self, var_list, state):
+    for v in var_list:
+      init_rms = state.get_hyper(
+          "epsilon", v.dtype.base_dtype) * array_ops.ones_like(v)
+      state.create_slot_with_initializer(v, init_rms, v.get_shape(),
+                                         v.dtype.base_dtype, "rms")
+      if self._centered:
+        state.zeros_slot(v, "mg")
+      state.zeros_slot(v, "momentum")
+
+  def _apply_dense(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.apply_centered_rms_prop(
+          var,
+          mg,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          # epsilon is now the rms initial value and is not added to the
+          # denominator anymore, hence calling the kernel op with epsilon=0.
+          0,
+          grad,
+          use_locking=self._use_locking).op
+    else:
+      return training_ops.apply_rms_prop(
+          var,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.resource_apply_centered_rms_prop(
+          var.handle,
+          mg.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.resource_apply_rms_prop(
+          var.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = state.get_slot(var, "mg")
+      return training_ops.sparse_apply_centered_rms_prop(
+          var,
+          mg,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad.values,
+          grad.indices,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.sparse_apply_rms_prop(
+          var,
+          rms,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad.values,
+          grad.indices,
+          use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    rms = state.get_slot(var, "rms")
+    mom = state.get_slot(var, "momentum")
+    if self._centered:
+      mg = self.get_slot(var, "mg")
+      return training_ops.resource_sparse_apply_centered_rms_prop(
+          var.handle,
+          mg.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          indices,
+          use_locking=self._use_locking)
+    else:
+      return training_ops.resource_sparse_apply_rms_prop(
+          var.handle,
+          rms.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          state.get_hyper("rho", var.dtype.base_dtype),
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          0,
+          grad,
+          indices,
+          use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
new file mode 100644
index 0000000000..2c5eccdc5b
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -0,0 +1,444 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for rmsprop optimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import math
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import rmsprop
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+_DATA_TYPES = [dtypes.half, dtypes.float32]
+
+_TEST_PARAM_VALUES = [
+    # learning_rate, rho, momentum, epsilon, centered, use_resource
+    [0.5, 0.9, 0.0, 1.0, True, False],
+    [0.5, 0.9, 0.0, 1.0, False, False],
+    [0.5, 0.9, 0.0, 1.0, True, True],
+    [0.5, 0.9, 0.0, 1.0, False, True],
+    [0.1, 0.9, 0.0, 1.0, True, False],
+    [0.5, 0.95, 0.0, 1.0, False, False],
+    [0.5, 0.8, 0.0, 1e-3, True, False],
+    [0.5, 0.8, 0.9, 1e-3, True, False],
+]
+
+
+class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
+
+  def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, rho, momentum,
+                            centered):
+    rms_t = rms * rho + (1 - rho) * g * g
+    if centered:
+      mg_t = mg * rho + (1 - rho) * g
+      denom_t = rms_t - mg_t * mg_t
+    else:
+      mg_t = mg
+      denom_t = rms_t
+    mom_t = momentum * mom + lr * g / np.sqrt(denom_t, dtype=denom_t.dtype)
+    var_t = var - mom_t
+    return var_t, mg_t, rms_t, mom_t
+
+  def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom,
+                                   lr, rho, momentum, centered):
+    mg_t = copy.deepcopy(mg)
+    rms_t = copy.deepcopy(rms)
+    mom_t = copy.deepcopy(mom)
+    var_t = copy.deepcopy(var)
+    for i in range(len(gindexs)):
+      gindex = gindexs[i]
+      gvalue = gvalues[i]
+      rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue
+      denom_t = rms_t[gindex]
+      if centered:
+        mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue
+        denom_t -= mg_t[gindex] * mg_t[gindex]
+      mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(denom_t)
+      var_t[gindex] = var[gindex] - mom_t[gindex]
+    return var_t, mg_t, rms_t, mom_t
+
+  @parameterized.named_parameters(
+      *test_util.generate_combinations_with_testcase_name(
+          dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+  def testDense(self, dtype, param_value):
+    (learning_rate, rho, momentum, epsilon, centered,
+     use_resource) = tuple(param_value)
+    with self.test_session(use_gpu=True):
+      # Initialize variables for numpy implementation.
+      var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+      grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
+      var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+      grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)
+
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(var0_np)
+        var1 = resource_variable_ops.ResourceVariable(var1_np)
+      else:
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+      grads0 = constant_op.constant(grads0_np)
+      grads1 = constant_op.constant(grads1_np)
+      opt = rmsprop.RMSProp(
+          learning_rate=learning_rate,
+          rho=rho,
+          momentum=momentum,
+          epsilon=epsilon,
+          centered=centered)
+
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      mg0 = opt.get_slot(var0, "mg")
+      self.assertEqual(mg0 is not None, centered)
+      mg1 = opt.get_slot(var1, "mg")
+      self.assertEqual(mg1 is not None, centered)
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+
+      # Run 4 steps of RMSProp
+      for _ in range(4):
+        update.run()
+
+        var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
+            var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, rho,
+            momentum, centered)
+        var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
+            var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, rho,
+            momentum, centered)
+
+        # Validate updated params
+        if centered:
+          self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+          self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+        self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+        self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+        self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+        self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+        self.assertAllCloseAccordingToType(
+            var0_np, var0.eval(), half_rtol=0.01, half_atol=0.01)
+        self.assertAllCloseAccordingToType(
+            var1_np, var1.eval(), half_rtol=0.01, half_atol=0.01)
+
+  @parameterized.parameters([dtypes.float32, dtypes.float64])
+  def testMinimizeSparseResourceVariable(self, dtype):
+    with self.cached_session():
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+      pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+      loss = pred * pred
+      sgd_op = rmsprop.RMSProp(
+          learning_rate=1.0, rho=0.0, momentum=0.0, epsilon=0.0,
+          centered=False).minimize(loss)
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+      # Run 1 step of sgd
+      sgd_op.run()
+      # Validate updated params
+      self.assertAllCloseAccordingToType(
+          [[0., 1.]], var0.eval(), atol=0.01)
+
+  @parameterized.parameters([dtypes.float32, dtypes.float64])
+  def testMinimizeSparseResourceVariableCentered(self, dtype):
+    with self.cached_session():
+      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+      x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+      pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+      loss = pred * pred
+      sgd_op = rmsprop.RMSProp(
+          learning_rate=1.0, rho=0.1, momentum=0.0, epsilon=1.0,
+          centered=True).minimize(loss)
+      variables.global_variables_initializer().run()
+      # Fetch params to validate initial values
+      self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+      # Run 1 step of sgd
+      sgd_op.run()
+      # Validate updated params
+      self.assertAllCloseAccordingToType(
+          [[-7/3.0, -4/3.0]], var0.eval(), atol=0.01)
+
+  @parameterized.named_parameters(
+      *test_util.generate_combinations_with_testcase_name(
+          dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+  def testSparse(self, dtype, param_value):
+    (learning_rate, rho, momentum, epsilon, centered, _) = tuple(param_value)
+    with self.test_session(use_gpu=True):
+      # Initialize variables for numpy implementation.
+      var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+      grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
+      var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+      grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)
+
+      var0 = variables.Variable(var0_np)
+      var1 = variables.Variable(var1_np)
+      grads0_np_indices = np.array([0], dtype=np.int32)
+      grads0 = ops.IndexedSlices(
+          constant_op.constant(grads0_np),
+          constant_op.constant(grads0_np_indices), constant_op.constant([1]))
+      grads1_np_indices = np.array([1], dtype=np.int32)
+      grads1 = ops.IndexedSlices(
+          constant_op.constant(grads1_np),
+          constant_op.constant(grads1_np_indices), constant_op.constant([1]))
+      opt = rmsprop.RMSProp(
+          learning_rate=learning_rate,
+          rho=rho,
+          momentum=momentum,
+          epsilon=epsilon,
+          centered=centered)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      mg0 = opt.get_slot(var0, "mg")
+      self.assertEqual(mg0 is not None, centered)
+      mg1 = opt.get_slot(var1, "mg")
+      self.assertEqual(mg1 is not None, centered)
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      rms0_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      rms1_np = np.array([epsilon, epsilon], dtype=dtype.as_numpy_dtype)
+      mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+      mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+
+      # Run 4 steps of RMSProp
+      for _ in range(4):
+        update.run()
+
+        var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
+            var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np,
+            learning_rate, rho, momentum, centered)
+        var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
+            var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np,
+            learning_rate, rho, momentum, centered)
+
+        # Validate updated params
+        if centered:
+          self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+          self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+        self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+        self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+        self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+        self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+        self.assertAllCloseAccordingToType(var0_np, var0.eval())
+        self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  @parameterized.parameters(_DATA_TYPES)
+  def testWithoutMomentum(self, dtype):
+    with self.test_session(use_gpu=True):
+      var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+      opt = rmsprop.RMSProp(
+          learning_rate=2.0, rho=0.9, momentum=0.0, epsilon=1.0)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Step 1: the rms accumulators where 1. So we should see a normal
+      # update: v -= grad * learning_rate
+      update.run()
+      # Check the root mean square accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901, 0.901]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001, 0.90001]), rms1.eval())
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
+          ]), var1.eval())
+      # Step 2: the root mean square accumulators contain the previous update.
+      update.run()
+      # Check the rms accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
+          ]), var1.eval())
+
+  @parameterized.parameters(_DATA_TYPES)
+  def testWithMomentum(self, dtype):
+    with self.test_session(use_gpu=True):
+      var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+      var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+
+      opt = rmsprop.RMSProp(
+          learning_rate=2.0, rho=0.9, momentum=0.5, epsilon=1.0)
+      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      variables.global_variables_initializer().run()
+
+      rms0 = opt.get_slot(var0, "rms")
+      self.assertIsNotNone(rms0)
+      rms1 = opt.get_slot(var1, "rms")
+      self.assertIsNotNone(rms1)
+      mom0 = opt.get_slot(var0, "momentum")
+      self.assertIsNotNone(mom0)
+      mom1 = opt.get_slot(var1, "momentum")
+      self.assertIsNotNone(mom1)
+
+      # Fetch params to validate initial values
+      self.assertAllClose([1.0, 2.0], var0.eval())
+      self.assertAllClose([3.0, 4.0], var1.eval())
+      # Step 1: rms = 1, mom = 0. So we should see a normal
+      # update: v -= grad * learning_rate
+      update.run()
+      # Check the root mean square accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901, 0.901]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001, 0.90001]), rms1.eval())
+      # Check the momentum accumulators
+      self.assertAllCloseAccordingToType(
+          np.array([(0.1 * 2.0 / math.sqrt(0.901)),
+                    (0.1 * 2.0 / math.sqrt(0.901))]), mom0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([(0.01 * 2.0 / math.sqrt(0.90001)),
+                    (0.01 * 2.0 / math.sqrt(0.90001))]), mom1.eval())
+
+      # Check that the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901))
+          ]), var0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001))
+          ]), var1.eval())
+
+      # Step 2: the root mean square accumulators contain the previous update.
+      update.run()
+      # Check the rms accumulators.
+      self.assertAllCloseAccordingToType(
+          np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)),
+              0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+              (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))
+          ]), mom0.eval())
+      self.assertAllCloseAccordingToType(
+          np.array([
+              0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)),
+              0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+              (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))
+          ]), mom1.eval())
+
+      # Check the parameters.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+               (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001))),
+              2.0 - (0.1 * 2.0 / math.sqrt(0.901)) -
+              (0.5 * (0.1 * 2.0 / math.sqrt(0.901)) +
+               (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001)))
+          ]), var0.eval())
+
+      self.assertAllCloseAccordingToType(
+          np.array([
+              3.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+               (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5))),
+              4.0 - (0.01 * 2.0 / math.sqrt(0.90001)) -
+              (0.5 * (0.01 * 2.0 / math.sqrt(0.90001)) +
+               (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)))
+          ]), var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/sgd.py b/tensorflow/python/keras/optimizer_v2/sgd.py
new file mode 100644
index 0000000000..f5583691f7
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/sgd.py
@@ -0,0 +1,170 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Momentum for TensorFlow."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.training import training_ops
+
+
+class SGD(optimizer_v2.OptimizerV2):
+  """Stochastic gradient descent optimizer.
+
+  Includes support for momentum and Nesterov momentum.
+
+  Computes (if `nesterov = False`):
+
+  ```
+  accumulation = momentum * accumulation + gradient
+  variable -= learning_rate * accumulation
+  ```
+
+  Some of the args below are hyperparameters, where a hyperparameter is
+  defined as a scalar Tensor, a regular Python value, or a callable (which
+  will be evaluated when `apply_gradients` is called) returning a scalar
+  Tensor or a Python value.
+
+  Note that in the dense version of this algorithm, `accumulation` is updated
+  and applied regardless of a gradient's value, whereas the sparse version (when
+  the gradient is an `IndexedSlices`, typically because of `tf.gather` or an
+  embedding) only updates variable slices and corresponding `accumulation` terms
+  when that part of the variable was used in the forward pass.
+
+  @compatibility(eager)
+  When eager execution is enabled, learning_rate and momentum can each be a
+  callable that takes no arguments and returns the actual value to use. This
+  can be useful for changing these values across different invocations of
+  optimizer functions.
+  @end_compatibility
+
+  Arguments:
+      learning_rate: float hyperparameter >= 0. Learning rate.
+      momentum: float hyperparameter >= 0 or None. Parameter that accelerates
+        SGD in the relevant direction and dampens oscillations.
+      nesterov: boolean. Whether to apply Nesterov momentum. See [Sutskever et
+        al., 2013](http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). This
+          implementation always computes gradients at the value of the
+          variable(s) passed to the optimizer. Using Nesterov Momentum makes the
+          variable(s) track the values called `theta_t + mu*v_t` in the paper.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to 'SGD'.
+  """
+
+  def __init__(self,
+               learning_rate=0.001,
+               momentum=None,
+               nesterov=False,
+               name="SGD"):
+    super(SGD, self).__init__(name)
+    self._set_hyper("learning_rate", learning_rate)
+    # Only create momentum variables and use momentum ops if needed.
+    if momentum is not None:
+      self._set_hyper("momentum", momentum)
+      self._use_nesterov = nesterov
+      self._use_momentum = True
+    else:
+      self._use_momentum = False
+
+  def _create_vars(self, var_list, state):
+    if self._use_momentum:
+      for v in var_list:
+        state.zeros_slot(v, "momentum")
+
+  def _apply_dense(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.apply_momentum(
+          var,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov).op
+    else:
+      return training_ops.apply_gradient_descent(
+          var,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.resource_apply_momentum(
+          var.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov)
+    else:
+      lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
+      return training_ops.resource_apply_gradient_descent(
+          var.handle, lr, grad, use_locking=self._use_locking)
+
+  def _apply_sparse(self, grad, var, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.sparse_apply_momentum(
+          var,
+          mom,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad.values,
+          grad.indices,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov).op
+    else:
+      return super(SGD, self)._apply_sparse(grad, var, state)
+
+  def _resource_apply_sparse(self, grad, var, indices, state):
+    if self._use_momentum:
+      mom = state.get_slot(var, "momentum")
+      return training_ops.resource_sparse_apply_momentum(
+          var.handle,
+          mom.handle,
+          state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad,
+          indices,
+          state.get_hyper("momentum", var.dtype.base_dtype),
+          use_locking=self._use_locking,
+          use_nesterov=self._use_nesterov)
+    else:
+      return super(SGD, self)._resource_apply_sparse(grad, var, indices, state)
+
+  def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, state):
+    if self._use_momentum:
+      return super(SGD, self)._resource_apply_sparse_duplicate_indices(
+          grad, var, indices, state)
+    else:
+      lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
+      return resource_variable_ops.resource_scatter_add(var.handle, indices,
+                                                        -grad * lr)
+
+  def _apply_sparse_duplicate_indices(self, grad, var, state):
+    if self._use_momentum:
+      return super(SGD, self)._apply_sparse_duplicate_indices(grad, var, state)
+    else:
+      delta = ops.IndexedSlices(
+          grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype),
+          grad.indices, grad.dense_shape)
+      return var.scatter_sub(delta, use_locking=self._use_locking)
diff --git a/tensorflow/python/keras/optimizer_v2/sgd_test.py b/tensorflow/python/keras/optimizer_v2/sgd_test.py
new file mode 100644
index 0000000000..eb39aac283
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/sgd_test.py
@@ -0,0 +1,759 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Momentum."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.optimizer_v2 import sgd
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import resources
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class GradientDescentOptimizerTest(test.TestCase):
+
+  def testBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        optimizer = sgd.SGD(3.0)
+        sgd_op = optimizer.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        self.assertEqual(0, len(optimizer.variables()))
+
+  def testBasicResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        resources.initialize_resources([var0, var1]).run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testMinimizeResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(var0, x) + var1
+        loss = pred * pred
+        sgd_op = sgd.SGD(1.0).minimize(loss)
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        resources.initialize_resources([var0, var1]).run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
+        np_grad = 2 * np_pred
+        self.assertAllCloseAccordingToType(
+            [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
+
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+        var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        pred += var1
+        loss = pred * pred
+        sgd_op = sgd.SGD(1.0).minimize(loss)
+        # TODO(apassos) calling initialize_resources on all resources here
+        # doesn't work because the sessions and graph are reused across unit
+        # tests and this would mean trying to reinitialize variables. Figure out
+        # a long-term solution for this.
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
+        np_grad = 2 * np_pred
+        self.assertAllCloseAccordingToType(
+            [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        lrate = constant_op.constant(3.0)
+        sgd_op = sgd.SGD(lrate).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+
+  def testGradWrtRef(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        opt = sgd.SGD(3.0)
+        values = [1.0, 3.0]
+        vars_ = [variables.Variable([v], dtype=dtype) for v in values]
+        grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_)
+        variables.global_variables_initializer().run()
+        for grad, _ in grads_and_vars:
+          self.assertAllCloseAccordingToType([1.0], grad.eval())
+
+  def testWithGlobalStep(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(0, trainable=False)
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]), global_step=global_step)
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval())
+        self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params and global_step
+        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01],
+                                           var1.eval())
+        self.assertAllCloseAccordingToType(1, global_step.eval())
+
+  def testSparseBasic(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
+        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]), constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]), constant_op.constant([2, 1]))
+        sgd_op = sgd.SGD(3.0).apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Fetch params to validate initial values
+        self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
+        self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
+        # Run 1 step of sgd
+        sgd_op.run()
+        # Validate updated params
+        self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
+                                           var0.eval())
+        self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
+                                           var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
+
+
+class MomentumOptimizerTest(test.TestCase):
+
+  def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum):
+    var = var + accum * lr * momentum
+    accum = accum * momentum + g
+    var = var - lr * accum
+    var = var - accum * lr * momentum
+    return var, accum
+
+  def doTestBasic(self, use_resource=False, use_callable_params=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      if use_resource:
+        var0 = resource_variable_ops.ResourceVariable(
+            [1.0, 2.0], dtype=dtype, name="var0_%d" % i)
+        var1 = resource_variable_ops.ResourceVariable(
+            [3.0, 4.0], dtype=dtype, name="var1_%d" % i)
+      else:
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+      grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+      grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+      learning_rate = lambda: 2.0
+      momentum = lambda: 0.9
+      if not use_callable_params:
+        learning_rate = learning_rate()
+        momentum = momentum()
+      mom_opt = sgd.SGD(learning_rate=learning_rate, momentum=momentum)
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
+
+      if not context.executing_eagerly():
+        self.evaluate(variables.global_variables_initializer())
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+      # Check we have slots
+      self.assertEqual(["momentum"], mom_opt.get_slot_names())
+      slot0 = mom_opt.get_slot(var0, "momentum")
+      self.assertEquals(slot0.get_shape(), var0.get_shape())
+      slot1 = mom_opt.get_slot(var1, "momentum")
+      self.assertEquals(slot1.get_shape(), var1.get_shape())
+      if not context.executing_eagerly():
+        self.assertFalse(slot0 in variables.trainable_variables())
+        self.assertFalse(slot1 in variables.trainable_variables())
+
+      # Step 1: the momentum accumulators where 0. So we should see a normal
+      # update: v -= grad * learning_rate
+      if not context.executing_eagerly():
+        self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
+          self.evaluate(var1))
+      # Step 2: the momentum accumulators contain the previous update.
+      if context.executing_eagerly():
+        mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      else:
+        self.evaluate(mom_update)
+      # Check that the momentum accumulators have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
+          self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(
+          np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+          self.evaluate(slot1))
+      # Check that the parameters have been updated.
+      self.assertAllCloseAccordingToType(
+          np.array([
+              1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+              2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+          ]), self.evaluate(var0))
+      self.assertAllCloseAccordingToType(
+          np.array([
+              2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                  (0.9 * 0.01 + 0.01) * 2.0)
+          ]), self.evaluate(var1))
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testBasicCallableParams(self):
+    with context.eager_mode():
+      self.doTestBasic(use_resource=True, use_callable_params=True)
+
+  def testVariablesAcrossGraphs(self):
+    optimizer = sgd.SGD(0.01, 0.5)
+    with ops.Graph().as_default():
+      var0 = resource_variable_ops.ResourceVariable(
+          [1.0, 2.0], dtype=dtypes.float32, name="var0")
+      var1 = resource_variable_ops.ResourceVariable(
+          [3.0, 4.0], dtype=dtypes.float32, name="var1")
+      loss = math_ops.reduce_sum(var0 + var1)
+      optimizer.minimize(loss)
+      optimizer_variables = optimizer.variables()
+      self.assertStartsWith(optimizer_variables[0].name, "var0")
+      self.assertStartsWith(optimizer_variables[1].name, "var1")
+      self.assertEquals(2, len(optimizer_variables))
+
+    with ops.Graph().as_default():
+      var2 = resource_variable_ops.ResourceVariable(
+          [1.0, 2.0], dtype=dtypes.float32, name="var2")
+      var3 = resource_variable_ops.ResourceVariable(
+          [3.0, 4.0], dtype=dtypes.float32, name="var3")
+      loss = math_ops.reduce_sum(var2 + var3)
+      optimizer.minimize(loss)
+      optimizer_variables = optimizer.variables()
+      self.assertStartsWith(optimizer_variables[0].name, "var2")
+      self.assertStartsWith(optimizer_variables[1].name, "var3")
+      self.assertEquals(2, len(optimizer_variables))
+
+  def testNesterovMomentum(self):
+    for dtype in [dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        cost = 5 * var0 * var0 + 3 * var1
+        global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64), name="global_step")
+        mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True)
+        opt_op = mom_op.minimize(cost, global_step, [var0, var1])
+        variables.global_variables_initializer().run()
+        for t in range(1, 5):
+          opt_op.run()
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+          self.assertAllClose(var0_np, var0.eval())
+          self.assertAllClose(var1_np, var1.eval())
+
+  def testSparseNesterovMomentum(self):
+    for dtype in [dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        grads = []
+        for t in range(1, 5):
+          grads.append(var0_np * 10)
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        loss = 5 * var0 * var0 + 3 * var1
+        mom_op = sgd.SGD(learning_rate=2.0, momentum=0.9, nesterov=True)
+        x_feed = array_ops.placeholder(dtype)
+        y_feed = ops.IndexedSlices(
+            x_feed, constant_op.constant([0, 1]), constant_op.constant([2]))
+        grads_and_vars = [(y_feed, var0), (constant_op.constant(
+            [3.0, 3.0], dtype=dtype), var1)]
+        opt_update = mom_op.apply_gradients(grads_and_vars)
+        variables.global_variables_initializer().run()
+        for t in range(1, 5):
+          opt_update.run(feed_dict={x_feed: grads[t - 1]})
+          var0_np, accum0_np = self._update_nesterov_momentum_numpy(
+              var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
+          var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
+                                                                    accum1_np,
+                                                                    3, 2.0, 0.9)
+          self.assertAllClose(var0_np, var0.eval())
+          self.assertAllClose(var1_np, var1.eval())
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeSparseResourceVariable(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      # This test invokes the ResourceSparseApplyMomentum operation, which
+      # did not have a registered GPU kernel as of April 2018. With graph
+      # execution, the placement algorithm notices this and automatically
+      # places the variable in CPU (host) memory. With eager execution,
+      # the variable would be placed in GPU memory if available, which
+      # would then conflict with the future invocation of the
+      # ResourceSparseApplyMomentum operation.
+      # To work around this discrepancy, for now we force the variable
+      # to be placed on CPU.
+      with ops.device("/cpu:0"):
+        var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+
+      # pylint: disable=cell-var-from-loop
+      def loss():
+        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+        return pred * pred
+      # pylint: enable=cell-var-from-loop
+
+      opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
+      sgd_op = opt.minimize(loss)
+      self.evaluate(variables.global_variables_initializer())
+      # Run 1 step of sgd
+      self.evaluate(sgd_op)
+      # Validate updated params
+      self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
+    # This test invokes the ResourceSparseApplyMomentum operation, which
+    # did not have a registered GPU kernel as of April 2018. With graph
+    # execution, the placement algorithm notices this and automatically
+    # places the variable in CPU (host) memory. With eager execution,
+    # the variable would be placed in GPU memory if available, which
+    # would then conflict with the future invocation of the
+    # ResourceSparseApplyMomentum operation.
+    # To work around this discrepancy, for now we force the variable
+    # to be placed on CPU.
+    with ops.device("/cpu:0"):
+      var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))
+
+    def loss():
+      return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]]))
+
+    opt = sgd.SGD(learning_rate=1.0, momentum=0.0)
+    sgd_op = opt.minimize(loss)
+    self.evaluate(variables.global_variables_initializer())
+    self.evaluate(sgd_op)
+    self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
+
+  def testTensorLearningRateAndMomentum(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        mom_opt = sgd.SGD(
+            learning_rate=constant_op.constant(2.0),
+            momentum=constant_op.constant(0.9))
+        mom_update = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+        # Check we have slots
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        self.assertFalse(slot0 in variables.trainable_variables())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+        self.assertFalse(slot1 in variables.trainable_variables())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Step 1: the momentum accumulators where 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval())
+        self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval())
+        # Step 2: the momentum accumulators contain the previous update.
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([
+                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([
+                2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval())
+
+  def _dbParamsMom01(self):
+    """Return dist-belief momentum values.
+
+    Return values been generated from the dist-belief momentum unittest,
+    running with a learning rate of 0.1 and a momentum of 0.1.
+
+    These values record how a parameter vector of size 10, initialized with 0.0,
+    gets updated with 10 consecutive momentum steps.  It uses random gradients.
+
+    Returns:
+      db_grad: The gradients to apply
+      db_out: The parameters after the momentum update.
+    """
+    db_grad = [[]] * 10
+    db_out = [[]] * 10
+    # pylint: disable=line-too-long
+    db_grad[0] = [
+        0.00096264342, 0.17914793, 0.93945462, 0.41396621, 0.53037018,
+        0.93197989, 0.78648776, 0.50036013, 0.55345792, 0.96722615
+    ]
+    db_out[0] = [
+        -9.6264346e-05, -0.017914793, -0.093945466, -0.041396622, -0.053037018,
+        -0.093197994, -0.078648776, -0.050036013, -0.055345792, -0.096722618
+    ]
+    db_grad[1] = [
+        0.17075552, 0.88821375, 0.20873757, 0.25236958, 0.57578111, 0.15312378,
+        0.5513742, 0.94687688, 0.16012503, 0.22159521
+    ]
+    db_out[1] = [
+        -0.017181443, -0.10852765, -0.12421377, -0.070773244, -0.11591884,
+        -0.11783017, -0.14165108, -0.14972731, -0.076892875, -0.1285544
+    ]
+    db_grad[2] = [
+        0.35077485, 0.47304362, 0.44412705, 0.44368884, 0.078527533, 0.81223965,
+        0.31168157, 0.43203235, 0.16792089, 0.24644311
+    ]
+    db_out[2] = [
+        -0.053967446, -0.1648933, -0.1716533, -0.1180798, -0.13005978,
+        -0.20151734, -0.17911947, -0.20289968, -0.095839672, -0.15638189
+    ]
+    db_grad[3] = [
+        0.9694621, 0.75035888, 0.28171822, 0.83813518, 0.53807181, 0.3728098,
+        0.81454384, 0.03848977, 0.89759839, 0.93665648
+    ]
+    db_out[3] = [
+        -0.15459226, -0.24556576, -0.20456907, -0.20662397, -0.18528105,
+        -0.24716705, -0.2643207, -0.21206589, -0.18749419, -0.2528303
+    ]
+    db_grad[4] = [
+        0.38578293, 0.8536852, 0.88722926, 0.66276771, 0.13678469, 0.94036359,
+        0.69107032, 0.81897682, 0.5433259, 0.67860287
+    ]
+    db_out[4] = [
+        -0.20323303, -0.33900154, -0.29658359, -0.28175515, -0.20448165,
+        -0.34576839, -0.34194785, -0.29488021, -0.25099224, -0.33033544
+    ]
+    db_grad[5] = [
+        0.27885768, 0.76100707, 0.24625534, 0.81354135, 0.18959245, 0.48038563,
+        0.84163809, 0.41172323, 0.83259648, 0.44941229
+    ]
+    db_out[5] = [
+        -0.23598288, -0.42444581, -0.33041057, -0.3706224, -0.22536094,
+        -0.40366709, -0.43387437, -0.34433398, -0.34060168, -0.38302717
+    ]
+    db_grad[6] = [
+        0.27233034, 0.056316052, 0.5039115, 0.24105175, 0.35697976, 0.75913221,
+        0.73577434, 0.16014607, 0.57500273, 0.071136251
+    ]
+    db_out[6] = [
+        -0.26649091, -0.43862185, -0.38418442, -0.40361428, -0.26314685,
+        -0.48537019, -0.51664448, -0.36529395, -0.40706289, -0.39540997
+    ]
+    db_grad[7] = [
+        0.58697265, 0.2494842, 0.08106143, 0.39954534, 0.15892942, 0.12683646,
+        0.74053431, 0.16033, 0.66625422, 0.73515922
+    ]
+    db_out[7] = [
+        -0.32823896, -0.46498787, -0.39766794, -0.446868, -0.28281838,
+        -0.50622416, -0.59897494, -0.38342294, -0.48033443, -0.47016418
+    ]
+    db_grad[8] = [
+        0.8215279, 0.41994119, 0.95172721, 0.68000203, 0.79439718, 0.43384039,
+        0.55561525, 0.22567581, 0.93331909, 0.29438227
+    ]
+    db_out[8] = [
+        -0.41656655, -0.50961858, -0.49418902, -0.51919359, -0.36422527,
+        -0.55169362, -0.6627695, -0.40780342, -0.58099347, -0.50707781
+    ]
+    db_grad[9] = [
+        0.68297005, 0.67758518, 0.1748755, 0.13266537, 0.70697063, 0.055731893,
+        0.68593478, 0.50580865, 0.12602448, 0.093537711
+    ]
+    db_out[9] = [
+        -0.49369633, -0.58184016, -0.52132869, -0.5396927, -0.44306302,
+        -0.56181377, -0.73774242, -0.46082234, -0.60366184, -0.52012295
+    ]
+    # pylint: enable=line-too-long
+    return db_grad, db_out
+
+  def testLikeDistBeliefMom01(self):
+    with self.cached_session():
+      db_grad, db_out = self._dbParamsMom01()
+      num_samples = len(db_grad)
+      var0 = variables.Variable([0.0] * num_samples)
+      grads0 = constant_op.constant([0.0] * num_samples)
+      mom_opt = sgd.SGD(learning_rate=0.1, momentum=0.1)
+      mom_update = mom_opt.apply_gradients(zip([grads0], [var0]))
+      variables.global_variables_initializer().run()
+      for i in xrange(num_samples):
+        mom_update.run(feed_dict={grads0: db_grad[i]})
+        self.assertAllClose(np.array(db_out[i]), var0.eval())
+
+  def testSparse(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
+        var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2]))
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(
+                [[.1, .1]], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([4, 2]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(
+                [[.01, .01], [.01, .01]], dtype=dtype),
+            constant_op.constant([2, 3]),
+            constant_op.constant([4, 2]))
+        mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9)
+        mom_update = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        # Check we have slots
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([0, 0], var0.eval()[0])
+        self.assertAllClose([0, 0], var0.eval()[1])
+        self.assertAllClose([1, 1], var1.eval()[2])
+
+        # Step 1: the momentum accumulators are 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0, 0]), slot0.eval()[0])
+        self.assertAllCloseAccordingToType(np.array([.1, .1]), slot0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([.01, .01]), slot1.eval()[2])
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(np.array([0, 0]), var0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), var0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), var1.eval()[2])
+        # Step 2: the momentum accumulators contain the previous update.
+        mom_update.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllClose(np.array([0, 0]), slot0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
+            slot1.eval()[2])
+        # Check that the parameters have been updated.
+        self.assertAllClose(np.array([0, 0]), var0.eval()[0])
+        self.assertAllCloseAccordingToType(
+            np.array([
+                -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), -(0.1 * 2.0) - (
+                    (0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval()[1])
+        self.assertAllCloseAccordingToType(
+            np.array([
+                0.98 - ((0.9 * 0.01 + 0.01) * 2.0), 0.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval()[2])
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+        mom_opt = sgd.SGD(learning_rate=2.0, momentum=0.9)
+        mom_update1 = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        mom_update2 = mom_opt.apply_gradients(
+            zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        self.assertEqual(["momentum"], mom_opt.get_slot_names())
+        slot0 = mom_opt.get_slot(var0, "momentum")
+        self.assertEquals(slot0.get_shape(), var0.get_shape())
+        slot1 = mom_opt.get_slot(var1, "momentum")
+        self.assertEquals(slot1.get_shape(), var1.get_shape())
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Step 1: the momentum accumulators where 0. So we should see a normal
+        # update: v -= grad * learning_rate
+        mom_update1.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval())
+        self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval())
+        # Step 2: the second momentum accumulators contain the previous update.
+        mom_update2.run()
+        # Check that the momentum accumulators have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval())
+        # Check that the parameters have been updated.
+        self.assertAllCloseAccordingToType(
+            np.array([
+                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
+                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
+            ]), var0.eval())
+        self.assertAllCloseAccordingToType(
+            np.array([
+                2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
+                    (0.9 * 0.01 + 0.01) * 2.0)
+            ]), var1.eval())
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 3f54f1f60413cbd3e9a5a4126f8ae04bc4e06abc Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Fri, 5 Oct 2018 12:45:56 -0700
Subject: [PATCH 0441/1085] Workaround build errors in Android NDK r14b.

PiperOrigin-RevId: 215950376
---
 tensorflow/tools/ci_build/Dockerfile.android | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index dcf077791a..7e72eb0cbf 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -45,9 +45,14 @@ ENV ANDROID_NDK_FILENAME android-ndk-r14b-linux-x86_64.zip
 ENV ANDROID_NDK_URL https://dl.google.com/android/repository/${ANDROID_NDK_FILENAME}
 ENV ANDROID_NDK_HOME ${ANDROID_DEV_HOME}/ndk
 ENV PATH ${PATH}:${ANDROID_NDK_HOME}
+# Workaround for b/117156972: inject missing #include into NDK versions of
+# futex.h.
 RUN cd ${ANDROID_DEV_HOME} && \
     wget -q ${ANDROID_NDK_URL} && \
     unzip ${ANDROID_NDK_FILENAME} -d ${ANDROID_DEV_HOME} && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-arm/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-mips/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-x86/usr/include/linux/futex.h && \
     rm ${ANDROID_NDK_FILENAME} && \
     bash -c "ln -s ${ANDROID_DEV_HOME}/android-ndk-* ${ANDROID_NDK_HOME}"
 
-- 
GitLab


From 7c642ef7f713a53b8c04730b33a4d55da8915ac1 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Fri, 5 Oct 2018 12:58:11 -0700
Subject: [PATCH 0442/1085] [tf.data] Fix noisy warning. (#22778)

PiperOrigin-RevId: 215607171
---
 tensorflow/python/data/ops/dataset_ops.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 46ce191f7b..b7e19055f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1831,9 +1831,10 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn("Encountered a nested dataset with options. These "
-                          "options will not be applied to the outer dataset.")
+          if t.options() != Options():
+            warnings.warn("Encountered a nested dataset with non-default "
+                          "options. These options will not be propagated to "
+                          "the outer dataset.")
         else:
           try:
             t = ops.convert_to_tensor(t)
-- 
GitLab


From 3427a3c638fb92a172d390266ed62403f9140f7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 12:52:22 -0700
Subject: [PATCH 0443/1085] Internal change.

PiperOrigin-RevId: 215951354
---
 tensorflow/contrib/lite/kernels/BUILD        | 1 +
 tensorflow/contrib/lite/kernels/lstm_eval.cc | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index 68636fb070..d2d8073abd 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -259,6 +259,7 @@ cc_library(
     srcs = ["lstm_eval.cc"],
     hdrs = ["lstm_eval.h"],
     deps = [
+        ":op_macros",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite/kernels/internal:kernel_utils",
         "//tensorflow/contrib/lite/kernels/internal:tensor_utils",
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index c6c21eb085..20a4e30009 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h"
+#include "tensorflow/contrib/lite/kernels/op_macros.h"
 
 namespace tflite {
 namespace ops {
@@ -599,6 +600,7 @@ TfLiteStatus EvalFloat(
     const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
     TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
     TfLiteTensor* cell_state, TfLiteTensor* output) {
+  TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
   const int n_input = input->dims->data[input->dims->size - 1];
@@ -716,6 +718,7 @@ TfLiteStatus EvalHybrid(
     TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
     TfLiteTensor* output_state, TfLiteTensor* cell_state,
     TfLiteTensor* output) {
+  TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
   const int n_input = input->dims->data[input->dims->size - 1];
-- 
GitLab


From ec451f5ab43467d7cb4ae7736f2de16331441e0b Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Fri, 5 Oct 2018 12:53:50 -0700
Subject: [PATCH 0444/1085] Break up build --define <option_name>=true into two
 steps: 1) define bazel config    build:<bazel_config_name> --define
 <option_name>s=true 2) set the config    build --config=<bazel_config_name>

PiperOrigin-RevId: 215951614
---
 configure.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 65b4622995..89dc79b6b6 100644
--- a/configure.py
+++ b/configure.py
@@ -383,7 +383,9 @@ def set_build_var(environ_cp,
   var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default)))
   environ_cp[var_name] = var
   if var == '1':
-    write_to_bazelrc('build --define %s=true' % option_name)
+    write_to_bazelrc(
+        'build:%s --define %s=true' % (bazel_config_name, option_name))
+    write_to_bazelrc('build --config=%s' % bazel_config_name)
   elif bazel_config_name is not None:
     # TODO(mikecase): Migrate all users of configure.py to use --config Bazel
     # options and not to set build configs through environment variables.
-- 
GitLab


From 1c278d84421c9abac9b52f179cdb4ce397be83db Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Fri, 5 Oct 2018 13:15:17 -0700
Subject: [PATCH 0445/1085] [tf.data] Fix bug in
 `tf.data.experimental.unbatch()`. (#22777)

Previously, if the rank of the input to this transformation was
statically unknown, we would erroneously report that the output is a
scalar, and violate downstream shape integrity checks. Instead, in
that case the output shape should be unknown.

PiperOrigin-RevId: 215683027
---
 tensorflow/core/kernels/data/unbatch_dataset_op.cc | 13 +++++++++----
 .../kernel_tests/batch_dataset_op_test.py          | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
index 81c432b938..74908994b4 100644
--- a/tensorflow/core/kernels/data/unbatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/unbatch_dataset_op.cc
@@ -41,11 +41,16 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
         : DatasetBase(DatasetContext(ctx)), input_(input) {
       input_->Ref();
       for (const PartialTensorShape& shape : input->output_shapes()) {
-        gtl::InlinedVector<int64, 4> partial_dim_sizes;
-        for (int i = 1; i < shape.dims(); ++i) {
-          partial_dim_sizes.push_back(shape.dim_size(i));
+        if (!shape.unknown_rank()) {
+          gtl::InlinedVector<int64, 4> partial_dim_sizes;
+          for (int i = 1; i < shape.dims(); ++i) {
+            partial_dim_sizes.push_back(shape.dim_size(i));
+          }
+          shapes_.emplace_back(std::move(partial_dim_sizes));
+        } else {
+          // If the input shape is unknown, the output shape will be unknown.
+          shapes_.emplace_back();
         }
-        shapes_.emplace_back(std::move(partial_dim_sizes));
       }
     }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
index 8703b2810e..956b4518f6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/batch_dataset_op_test.py
@@ -131,6 +131,20 @@ class BatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                                    "larger than the row shape"):
         sess.run(get_next)
 
+  def testUnbatchWithUnknownRankInput(self):
+    placeholder = array_ops.placeholder(dtypes.int32)
+    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
+        batching.unbatch())
+    iterator = dataset.make_initializable_iterator()
+    next_elem = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
+      for i in range(4):
+        self.assertEqual(i, sess.run(next_elem))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_elem)
+
   def testUnbatchScalarDataset(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
-- 
GitLab


From 7fe33c5bf90d840ea1e1df5f402fd76db01612a4 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Fri, 5 Oct 2018 13:16:48 -0700
Subject: [PATCH 0446/1085] pylint stuff

---
 tensorflow/python/kernel_tests/check_ops_test.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index d4c618d34b..93aee7dc1a 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -322,22 +322,23 @@ b'x \(shape=\(2, 3\) dtype=float32\) = '
 b'y \(shape=\(2, 3\) dtype=float32\) = '
 0.0, 1.0, ..."""
     with context.eager_mode():
-      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
+      t = constant_op.constant(np.array(range(6)), shape=[2, 3], 
+          dtype=np.float32)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_full):
         check_ops.assert_none_equal(t, t, message="This is the error message.",
-                               summarize=10)
+                                    summarize=10)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_full):
         check_ops.assert_none_equal(t, t, message="This is the error message.",
-                               summarize=-1)
+                                    summarize=-1)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_default):
         check_ops.assert_none_equal(t, t, message="This is the error message.")
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_short):
         check_ops.assert_none_equal(t, t, message="This is the error message.",
-                               summarize=2)
+                                    summarize=2)
 
 
 class AssertAllCloseTest(test.TestCase):
-- 
GitLab


From a4d3196c7755a2bbaa0165d84ce2f83c6eb215fe Mon Sep 17 00:00:00 2001
From: Jeremy Lau <30300826+fdxmw@users.noreply.github.com>
Date: Fri, 5 Oct 2018 13:26:11 -0700
Subject: [PATCH 0447/1085] Workaround build errors in Android NDK r14b.
 (#22779)

PiperOrigin-RevId: 215950376
---
 tensorflow/tools/ci_build/Dockerfile.android | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/tools/ci_build/Dockerfile.android b/tensorflow/tools/ci_build/Dockerfile.android
index dcf077791a..7e72eb0cbf 100644
--- a/tensorflow/tools/ci_build/Dockerfile.android
+++ b/tensorflow/tools/ci_build/Dockerfile.android
@@ -45,9 +45,14 @@ ENV ANDROID_NDK_FILENAME android-ndk-r14b-linux-x86_64.zip
 ENV ANDROID_NDK_URL https://dl.google.com/android/repository/${ANDROID_NDK_FILENAME}
 ENV ANDROID_NDK_HOME ${ANDROID_DEV_HOME}/ndk
 ENV PATH ${PATH}:${ANDROID_NDK_HOME}
+# Workaround for b/117156972: inject missing #include into NDK versions of
+# futex.h.
 RUN cd ${ANDROID_DEV_HOME} && \
     wget -q ${ANDROID_NDK_URL} && \
     unzip ${ANDROID_NDK_FILENAME} -d ${ANDROID_DEV_HOME} && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-arm/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-mips/usr/include/linux/futex.h && \
+    sed -i 15i"#include <linux/compiler.h>" ${ANDROID_DEV_HOME}/android-ndk-r14b/platforms/android-14/arch-x86/usr/include/linux/futex.h && \
     rm ${ANDROID_NDK_FILENAME} && \
     bash -c "ln -s ${ANDROID_DEV_HOME}/android-ndk-* ${ANDROID_NDK_HOME}"
 
-- 
GitLab


From f14287eabf69c57a2d2e044c311f2db1413cb6a5 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 5 Oct 2018 13:24:34 -0700
Subject: [PATCH 0448/1085] Copy device from If op to the lowered ops. Enable
 GPU tests for cond_v2.

PiperOrigin-RevId: 215956220
---
 tensorflow/core/common_runtime/lower_if_op.cc |  9 +++-
 tensorflow/python/kernel_tests/BUILD          |  3 +-
 .../python/kernel_tests/cond_v2_test.py       | 49 +++++++++----------
 .../kernel_tests/control_flow_ops_py_test.py  |  5 --
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index a02084f223..9306386117 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -107,6 +107,8 @@ CondBuilder::CondBuilder(Node* if_op, const string& then_fn_name,
       then_call_builder_(NewName("then"), then_fn_name, graph->op_registry()),
       else_call_builder_(NewName("else"), else_fn_name, graph->op_registry()) {
   TF_CHECK_OK(if_op_->input_node(0, &pred_));
+  then_call_builder_.Device(if_op_->requested_device());
+  else_call_builder_.Device(if_op_->requested_device());
 }
 
 Status CondBuilder::CreatePivotNodes() {
@@ -117,15 +119,18 @@ Status CondBuilder::CreatePivotNodes() {
       NodeBuilder(NewName("switch_pred"), "Switch", graph_->op_registry())
           .Input(NodeOut(pred_, 0))
           .Input(NodeOut(pred_, 0))
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &switch_pred));
   control_predecessor_ = switch_pred;
   TF_RETURN_IF_ERROR(
       NodeBuilder(NewName("pivot_f"), "Identity", graph_->op_registry())
           .Input(switch_pred, kElseBranch)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &pivot_f_));
   TF_RETURN_IF_ERROR(
       NodeBuilder(NewName("pivot_t"), "Identity", graph_->op_registry())
           .Input(switch_pred, kThenBranch)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &pivot_t_));
   return Status::OK();
 }
@@ -140,6 +145,7 @@ Status CondBuilder::AddInput(Node* src, int src_output) {
       NodeBuilder(NewName(src->name()), "Switch", graph_->op_registry())
           .Input(src, src_output)
           .Input(pred_, 0)
+          .Device(if_op_->requested_device())
           .Finalize(graph_, &input));
   then_call_builder_.Input(input, kThenBranch);
   else_call_builder_.Input(input, kElseBranch);
@@ -178,6 +184,7 @@ Status CondBuilder::AddOutputs() {
     TF_RETURN_IF_ERROR(
         NodeBuilder(graph_->NewName("merge"), "Merge", graph_->op_registry())
             .Input({NodeOut(then_call_node_, i), NodeOut(else_call_node_, i)})
+            .Device(if_op_->requested_device())
             .Finalize(graph_, &merges[i]));
     outputs_[i] = NodeOut(merges[i], 0);
   }
@@ -218,7 +225,7 @@ Status InlineCallInGraph(Node* n, const FunctionLibraryDefinition& flib,
 Status CondBuilder::BuildLoweredIfOutput() {
   // Build the identity node output.
   NodeBuilder ib(name_, "IdentityN");
-  ib.Input(outputs_);
+  ib.Input(outputs_).Device(if_op_->requested_device());
   return ib.Finalize(graph_, &lowered_if_output_);
 }
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index e055ef1c1b..4e8639dfc8 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3255,7 +3255,7 @@ tf_py_test(
     tags = ["no_pip"],
 )
 
-tf_py_test(
+cuda_py_test(
     name = "cond_v2_test",
     size = "medium",
     srcs = ["cond_v2_test.py"],
@@ -3272,7 +3272,6 @@ tf_py_test(
         "//tensorflow/python:training",
     ],
     grpc_enabled = True,
-    tags = ["no_gpu"],  # TODO(b/111656070)
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 377c041675..ec875aae59 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -172,7 +172,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [y])
 
   def testNestedDefunInCond(self):
-    self.skipTest("b/110550782")
+    self.skipTest("b/117284369")
 
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -198,7 +198,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [y])
 
   def testDoubleNestedDefunInCond(self):
-    self.skipTest("b/110550782")
+    self.skipTest("b/117284369")
 
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -468,7 +468,6 @@ class CondV2Test(test.TestCase):
             }), [5., 0.])
 
   def testBuildCondAndGradientInsideDefun(self):
-    self.skipTest("b/110550782")
 
     def build_graph():
       pred_outer = array_ops.placeholder(dtypes.bool, name="pred_outer")
@@ -502,29 +501,29 @@ class CondV2Test(test.TestCase):
 
       return grads, pred_outer, pred_inner
 
-    with ops.Graph().as_default():
+    with ops.Graph().as_default(), self.session(
+        graph=ops.get_default_graph()) as sess:
       grads, pred_outer, pred_inner = build_graph()
-      with self.session(graph=ops.get_default_graph()) as sess:
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: True,
-                pred_inner: True
-            }), [0., 0.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: True,
-                pred_inner: False
-            }), [0., 0.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: False,
-                pred_inner: True
-            }), [4., 2.])
-        self.assertSequenceEqual(
-            sess.run(grads, {
-                pred_outer: False,
-                pred_inner: False
-            }), [5., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: True,
+              pred_inner: True
+          }), [0., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: True,
+              pred_inner: False
+          }), [0., 0.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: False,
+              pred_inner: True
+          }), [4., 2.])
+      self.assertSequenceEqual(
+          sess.run(grads, {
+              pred_outer: False,
+              pred_inner: False
+          }), [5., 0.])
 
   def testSecondDerivative(self):
     with self.cached_session() as sess:
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index c7e89dd5f9..7fae5249aa 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 import collections
 import math
 import time
-import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
@@ -661,7 +660,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
       sess.run(r)
 
-  @test_util.disable_control_flow_v2("b/113346829 (gpu failure)")
   def testCondGrad_1(self):
     graph = ops.Graph()
     with graph.as_default():
@@ -3424,9 +3422,6 @@ class EagerTest(test.TestCase):
 
   # TODO(b/117279927): Re-enable once msan failure is fixed.
   def DISABLED_testCondInDefun(self):
-    if "GPU" in [d.device_type for d in device_lib.list_local_devices()]:
-      return unittest.skip("b/113346829 (gpu failure)")
-
     with context.eager_mode():
 
       @eager_function.defun
-- 
GitLab


From b891e8f3c2f4c61014eb4613081410e8bb3107ff Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Fri, 5 Oct 2018 13:32:10 -0700
Subject: [PATCH 0449/1085] pylint issues in fix for pylint issues

---
 tensorflow/python/kernel_tests/check_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 93aee7dc1a..5d953a3a38 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -323,7 +323,7 @@ b'y \(shape=\(2, 3\) dtype=float32\) = '
 0.0, 1.0, ..."""
     with context.eager_mode():
       t = constant_op.constant(np.array(range(6)), shape=[2, 3], 
-          dtype=np.float32)
+                               dtype=np.float32)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    expected_error_msg_full):
         check_ops.assert_none_equal(t, t, message="This is the error message.",
-- 
GitLab


From 0c37dcc02f54395d2bde3cc5850574c8f98f1b46 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Fri, 5 Oct 2018 13:32:24 -0700
Subject: [PATCH 0450/1085] [XLA] Use the highest possible precision for large
 Iota inputs.

PiperOrigin-RevId: 215957327
---
 tensorflow/compiler/xla/tests/convolution_test.cc | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 070b092d18..b851db14ec 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -91,7 +91,14 @@ class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest {
     XlaBuilder builder(TestName());
     auto lhs = ConstantR4FromArray4D<T>(&builder, *alhs);
     auto rhs = ConstantR4FromArray4D<T>(&builder, *arhs);
-    Conv(lhs, rhs, {1, 1}, Padding::kValid);
+    PrecisionConfig precision;
+    // The left hand side of the convolution is numbers between 0 and 2304 which
+    // requires at least 11 mantissa bits and the DEFAULT precision config is
+    // allowed to round to bfloat16 which only has 7 mantissa bits.
+    precision.add_operand_precision(PrecisionConfig::HIGHEST);
+    precision.add_operand_precision(PrecisionConfig::DEFAULT);
+    Conv(lhs, rhs, {1, 1}, Padding::kValid, /*feature_group_count=*/1,
+         &precision);
 
     ComputeAndCompare(&builder, {}, error_spec_);
   }
-- 
GitLab


From 35b2936f80c42aea48987581211c33e27152652d Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@gadde.mtv.corp.google.com>
Date: Fri, 5 Oct 2018 13:10:19 -0700
Subject: [PATCH 0451/1085] Adding release notes for 1.12

---
 RELEASE.md | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index 20e1d9217b..6e1db4a930 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,46 @@
+# Release 1.12.0
+
+## Major Features and Improvements
+
+
+## Breaking Changes
+
+## Bug Fixes and Other Changes
+
+* tf.data:
+  * All C++ code moves to the `tensorflow::data` namespace.
+  * Support evaluating with a tf.data.Dataset
+  * Adding support for `num_parallel_calls` to `tf.data.Dataset.interleave`.
+* `tf.contrib`:
+  * Remove tf.contrib.linalg.  Library now in tf.linalg.
+  * Replace any calls to tf.contrib.get_signature_def_by_key(metagraph_def, signature_def_key) with meta_graph_def.signature_def[signature_def_key]. Catching a ValueError exception thrown by tf.contrib.get_signature_def_by_key should be replaced by catching a KeyError exception.
+  * Keras model can be exported to the SavedModel format using tf.contrib.saved_model.save_keras_model().
+* `tf.contrib.data`
+  * deprecated, and replaced by `tf.data.experimental`.
+* Other:
+  * Instead of jemalloc, revert back to using system malloc.
+  * Build TensorFlow with XLA support included by default.
+  * Removed integer types from tf.nn.softplus and tf.nn.softsign OpDefs. This is a bugfix; these ops were never meant to support integers.
+  * Allow subslicing Tensors with a single dimension.
+  * Add option to calculate string length in Unicode characters
+  * Add functionality to SubSlice a tensor.
+  * Add searchsorted (ie lower/upper_bound) op.
+  * Adding model explainability to Boosted Trees.
+  * Support negative positions for tf.substr
+  * There was previously a bug in the bijector_impl where the _reduce_jacobian_det_over_event does not handle scalar ILDJ implementations properly.
+  * In tf eager execution, allow re-entering a GradientTape context
+  * Add tf_api_version flag. If --define=tf_api_version=2 flag is passed in, then bazel will build TensorFlow API version 2.0. Note that TensorFlow 2.0 is under active development and has no guarantees at this point.
+  * Added additional compression options to TfRecordWriter
+  * Performance improvements for regex full match operations.
+  * Replace tf.GraphKeys.VARIABLES with tf.GraphKeys.GLOBAL_VARIABLES
+  * Removed unused dynamic learning rate support.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+(David) Siu-Kei Muk, Ag Ramesh, Anton Dmitriev, Artem Sobolev, Avijit-Nervana, Bairen Yi, Bruno Goncalves, By Shen, candy.dc, Cheng Chen, Clayne Robison, coder3101, Dao Zhang, Elms, Fei Hu, feiquan, Geoffrey Irving, Guozhong Zhuang, hellcom, Hoeseong Kim, imsheridan, Jason Furmanek, Jason Zaman, Jenny Sahng, jiefangxuanyan, Johannes Bannhofer, Jonathan Homer, Koan-Sin Tan, kouml, Loo Rong Jie, Lukas Geiger, manipopopo, Ming Li, Moritz KröGer, Naurril, Niranjan Hasabnis, Pan Daoxin, Peng Yu, pengwa, rasmi, Roger Xin, Roland Fernandez, Sami Kama, Samuel Matzek, Sangjung Woo, Sergei Lebedev, Sergii Khomenko, shaohua, Shaohua Zhang, Shujian2015, Sunitha Kambhampati, tomguluson92, ViníCius Camargo, wangsiyu, weidankong, Wen-Heng (Jack) Chung, William D. Irons, Xin Jin, Yan Facai (颜发才), Yanbo Liang, Yash Katariya, Yong Tang, 在原佐为
+
 # Release 1.11.0
 
 ## Major Features and Improvements
-- 
GitLab


From 4d69a79b1ebd0c2180959c1047fbc9db106701e1 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 5 Oct 2018 13:33:38 -0700
Subject: [PATCH 0452/1085] Handle Range & BatchMatMul in partial Flex mode

PiperOrigin-RevId: 215957535
---
 .../contrib/lite/toco/import_tensorflow.cc    | 37 ++++++++-
 tensorflow/contrib/lite/toco/model.h          |  9 +-
 tensorflow/contrib/lite/toco/tflite/export.cc | 83 +++++++++++--------
 .../contrib/lite/toco/tflite/export_test.cc   | 34 ++++++++
 .../contrib/lite/toco/tflite/operator.cc      | 32 ++++---
 .../contrib/lite/toco/tflite/operator.h       |  6 ++
 6 files changed, 155 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 5eaf6e27fc..133ef79a34 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -477,6 +477,30 @@ string CreateConstArray(Model* model, string const& name,
   return array_name;
 }
 
+// Retain TensorFlow NodeDef in Toco Operator.
+//
+// If an op is supported by Toco but not supported by TFLite, TFLite exporter
+// will use the retained NodeDef to populate a Flex op when Flex mode is
+// enabled.
+//
+// This can't be easily applied to all operations, because a TensorFlow node
+// may become multiple Toco operators. Thus we need to call this function in
+// operator conversion functions one by one whenever feasible.
+//
+// This may cause problems if a graph transformation rule changes parameters
+// of the node. When calling this function, please check if any existing
+// graph transformation rule will change an existing operator with the same
+// type.
+//
+// This provides a route to handle Toco-supported & TFLite-unsupported ops
+// in Flex mode. However it's not a solid solution. Eventually we should
+// get rid of this.
+// TODO(b/117327937): Implement all Toco-supported ops in TFLite, and remove
+// this function.
+void RetainTensorFlowNodeDef(const NodeDef& node, Operator* op) {
+  node.SerializeToString(&op->tensorflow_node_def);
+}
+
 tensorflow::Status ConvertConstOperator(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
@@ -990,6 +1014,10 @@ tensorflow::Status ConvertBatchMatMulOperator(
   auto* batch_matmul = new BatchMatMulOperator;
   batch_matmul->inputs = {node.input(0), node.input(1)};
   batch_matmul->outputs = {node.name()};
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, batch_matmul);
+
   model->operators.emplace_back(batch_matmul);
   return tensorflow::Status::OK();
 }
@@ -1081,7 +1109,10 @@ tensorflow::Status ConvertUnsupportedOperator(
 
   auto* op = new TensorFlowUnsupportedOperator;
   op->tensorflow_op = node.op();
-  node.SerializeToString(&op->tensorflow_node_def);
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, op);
+
   model->operators.emplace_back(op);
 
   // Parse inputs.
@@ -1605,6 +1636,10 @@ tensorflow::Status ConvertRangeOperator(
   op->inputs.push_back(node.input(1));
   op->inputs.push_back(node.input(2));
   op->outputs.push_back(node.name());
+
+  // For Flex mode. Please read the comments of the function.
+  RetainTensorFlowNodeDef(node, op);
+
   model->operators.emplace_back(op);
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 6e207fdf54..61f1f095e9 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -376,6 +376,13 @@ struct Operator {
   // looks unused.
   bool unresolved_outputs = false;
 
+  // A serialized tensorflow::NodeDef string.
+  // The field is filled only when importing from TensorFlow.
+  // It's guaranteed to be filled for `TensorFlowUnsupportedOperator`.
+  // It's not guaranteed to be filled for other ops. Ops created by graph
+  // transformations won't have TensorFlow NodeDef.
+  string tensorflow_node_def;
+
  protected:
   // Constructor used by subclasses for specific OperatorType's.
   explicit Operator(OperatorType t)
@@ -1535,8 +1542,6 @@ struct TensorFlowUnsupportedOperator : Operator {
 
   // The original TF operation type. Used for diagnostic purposes.
   string tensorflow_op;
-  // A serialized tensorflow::NodeDef string.
-  string tensorflow_node_def;
   // A boolean indicating if the unsupported op should be treated as quantized.
   bool quantized = false;
   // A boolean indicating if the unsupported op output should allow float values
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index f6f76e48a4..3b34cd6285 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -95,11 +95,13 @@ OperatorKey GetOperatorKey(
     const ::toco::Operator& op,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     bool allow_flex_ops) {
+  // Get the op name (by Toco definition).
   string name = HelpfulOperatorTypeName(op);
-  const auto& builtin_ops = GetBuiltinOpsMap();
 
   bool is_builtin = false;
   OperatorKey key;
+
+  const auto& builtin_ops = GetBuiltinOpsMap();
   if (ops_by_type.count(op.type) != 0) {
     key.version = ops_by_type.at(op.type)->GetVersion(op);
     name = ops_by_type.at(op.type)->name();
@@ -110,37 +112,46 @@ OperatorKey GetOperatorKey(
     // For TFLite supported builtin ops, find out its BuiltinOperator enum used
     // in FlatBuffer.
     key.type = builtin_ops.at(name);
-  } else {
-    key.type = BuiltinOperator_CUSTOM;
-
-    key.is_custom_op = true;
-    if (op.type == OperatorType::kUnsupported) {
-      const TensorFlowUnsupportedOperator& unsupported_op =
-          static_cast<const TensorFlowUnsupportedOperator&>(op);
-      const auto tensorflow_op = unsupported_op.tensorflow_op;
-
-      // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
-      // to populate a regular custom op. We need to find a way to fix this.
-      if (allow_flex_ops) {
-        // Memorize the original TensorFlow op name.
-        key.flex_tensorflow_op = tensorflow_op;
-        // Prefix the custom code of the flex op.
-        key.custom_code =
-            string(::tflite::kFlexCustomCodePrefix) + tensorflow_op;
-        key.is_flex_op = true;
-
-        if (IsControlFlowOp(tensorflow_op)) {
-          key.is_unsupported_flex_op = true;
-        }
-      } else {
-        key.custom_code = tensorflow_op;
-      }
+    return key;
+  }
+
+  // The logic below is all for custom ops.
+  key.is_custom_op = true;
+  key.type = BuiltinOperator_CUSTOM;
+
+  if (op.type == OperatorType::kUnsupported) {
+    const TensorFlowUnsupportedOperator& unsupported_op =
+        static_cast<const TensorFlowUnsupportedOperator&>(op);
+    const auto tensorflow_op = unsupported_op.tensorflow_op;
+
+    // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
+    // to populate a regular custom op. We need to find a way to fix this.
+    if (allow_flex_ops) {
+      key.is_flex_op = true;
+      key.flex_tensorflow_op = tensorflow_op;
+      key.custom_code =
+          string(::tflite::kFlexCustomCodePrefix) + key.flex_tensorflow_op;
     } else {
-      // For Toco-supported/TFLite-unsupported ops, currently we produce a
-      // custom op. This gives developers a chance to implement custom ops.
-      // TODO(b/116800229): Also produce Toco-supported/TFLite-unsupported ops
-      // as Flex ops when Flex mode is enabled.
-      key.custom_code = name;
+      key.custom_code = tensorflow_op;
+    }
+  } else if (allow_flex_ops && !op.tensorflow_node_def.empty()) {
+    // For Toco-supported/TFLite-unsupported ops, if the TensorFlow NodeDef
+    // is retained in the Toco Operator, we produce a Flex op if Flex mode
+    // is enabled.
+    key.is_flex_op = true;
+    key.flex_tensorflow_op = name;
+    key.custom_code =
+        string(::tflite::kFlexCustomCodePrefix) + key.flex_tensorflow_op;
+  } else {
+    // If Flex is disabled or the original TensorFlow NodeDef isn't available,
+    // we produce a custom op. This gives developers a chance to implemenr
+    // custom ops.
+    key.custom_code = name;
+  }
+
+  if (key.is_flex_op) {
+    if (IsControlFlowOp(key.flex_tensorflow_op)) {
+      key.is_unsupported_flex_op = true;
     }
   }
   return key;
@@ -323,8 +334,9 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
       outputs.push_back(tensors_map.at(output));
     }
 
-    int op_index = operators_map.at(
-        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops));
+    const auto key =
+        details::GetOperatorKey(*op, ops_by_type, params.allow_flex_ops);
+    int op_index = operators_map.at(key);
 
     auto tflite_op_it = ops_by_type.find(op->type);
     BaseOperator* tflite_op = tflite_op_it == ops_by_type.end()
@@ -349,6 +361,11 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
           variable_tensor_indices->insert(variable_tensor_index);
         }
       }
+    } else if (key.is_flex_op && !op->tensorflow_node_def.empty()) {
+      auto fbb = WriteFlexOpOptions(op->tensorflow_node_def);
+      if (fbb) {
+        options = Options::Custom(builder->CreateVector(fbb->GetBuffer()));
+      }
     }
     // The only supported CustomOptionFormat is FLEXBUFFERS now.
     op_vector.push_back(CreateOperator(
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index d48ab78285..eda1aa78a3 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/tflite/builtin_operator.h"
 #include "tensorflow/contrib/lite/toco/tflite/operator.h"
 #include "tensorflow/contrib/lite/toco/tflite/types.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 
 namespace toco {
 namespace tflite {
@@ -382,6 +383,39 @@ TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
   EXPECT_TRUE(key.is_unsupported_flex_op);
 }
 
+TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
+  // Test Toco-supported/TFLite-unsupported operators.
+  // TODO(ycling): The test will be broken if Range is implemented in TFLite.
+  // Find a more robust way to test the fallback logic.
+  auto op = absl::make_unique<RangeOperator>();
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+
+  {
+    // If NodeDef isn't retained in the Toco op, a regular custom op
+    // will be exported.
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "Range");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_FALSE(key.is_flex_op);
+  }
+
+  ::tensorflow::NodeDef node_def;
+  node_def.set_name("Range");
+  node_def.set_op("Range");
+  node_def.SerializeToString(&op->tensorflow_node_def);
+
+  {
+    // If NodeDef is retained in the Toco op, a Flex op will be exported.
+    const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+    EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+    EXPECT_EQ(key.custom_code, "FlexRange");
+    EXPECT_EQ(key.version, 1);
+    EXPECT_TRUE(key.is_flex_op);
+  }
+}
+
 // TODO(ahentz): tests for tensors, inputs, outputs, opcodes and operators.
 
 }  // namespace
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 9addbb81e7..ed37535fe0 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -1157,6 +1157,25 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
   int GetVersion(const Operator& op) const override { return 1; }
 };
 
+std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
+    const string& tensorflow_node_def) {
+  auto fbb = absl::make_unique<flexbuffers::Builder>();
+
+  ::tensorflow::NodeDef node_def;
+  if (!node_def.ParseFromString(tensorflow_node_def)) {
+    LOG(ERROR) << "Failed to parse TensorFlow NodeDef";
+    return {};
+  }
+
+  fbb->Vector([&]() {
+    fbb->String(node_def.op());
+    fbb->String(tensorflow_node_def);
+  });
+  fbb->Finish();
+  LOG(INFO) << "Writing flex op: " << node_def.op();
+  return std::unique_ptr<flexbuffers::Builder>(fbb.release());
+}
+
 class TensorFlowUnsupported : public BaseOperator {
  public:
   TensorFlowUnsupported(const string& name, OperatorType type,
@@ -1192,6 +1211,9 @@ class TensorFlowUnsupported : public BaseOperator {
 
   std::unique_ptr<flexbuffers::Builder> WriteOptions(
       const TensorFlowUnsupportedOperator& op) const {
+    if (allow_flex_ops_) {
+      return WriteFlexOpOptions(op.tensorflow_node_def);
+    }
     auto fbb = absl::make_unique<flexbuffers::Builder>();
 
     ::tensorflow::NodeDef node_def;
@@ -1200,16 +1222,6 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (allow_flex_ops_) {
-      fbb->Vector([&]() {
-        fbb->String(node_def.op());
-        fbb->String(op.tensorflow_node_def);
-      });
-      fbb->Finish();
-      LOG(INFO) << "Writing flex op: " << node_def.op();
-      return std::unique_ptr<flexbuffers::Builder>(fbb.release());
-    }
-
     bool has_valid_attr = false;
     size_t map_start = fbb->StartMap();
     for (const auto& pair : node_def.attr()) {
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 13d9f6c49a..6e4e0a16d1 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_CONTRIB_LITE_TOCO_TFLITE_OPERATOR_H_
 
 #include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
 #include "tensorflow/contrib/lite/schema/schema_generated.h"
 #include "tensorflow/contrib/lite/toco/model.h"
 
@@ -36,6 +37,11 @@ std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
 std::map<OperatorType, std::unique_ptr<BaseOperator>> BuildOperatorByTypeMap(
     bool allow_flex_ops = false);
 
+// Write the custom option FlexBuffer with a serialized TensorFlow NodeDef
+// for a Flex op.
+std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
+    const string& tensorflow_node_def);
+
 // These are the flatbuffer types for custom and builtin options.
 using CustomOptions = flatbuffers::Vector<uint8_t>;
 using BuiltinOptions = void;
-- 
GitLab


From 9335fae4a972d848ba20bc90e300a0c723e6f7dd Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 5 Oct 2018 13:39:14 -0700
Subject: [PATCH 0453/1085] Adding release notes for 1.12

---
 RELEASE.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 6e1db4a930..bbd816a4d4 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,7 +1,9 @@
 # Release 1.12.0
 
 ## Major Features and Improvements
-
+* tf.data optimizations. Users can now represent, get and set options of a tf.data pipeline.
+  tf.data.AUTOTUNE enables the level of parallelism to be determined at runtime.
+* Keras models can now be directly exported to the SavedModel format and used with Tensorflow Serving.
 
 ## Breaking Changes
 
-- 
GitLab


From efcf11cd44dfe8ddc441aa58f1b21ff7c8444568 Mon Sep 17 00:00:00 2001
From: shengfuintel <sheng.fu@intel.com>
Date: Fri, 5 Oct 2018 13:47:52 -0700
Subject: [PATCH 0454/1085] Clean up the code under INTEL_MKL_ML_ONLY

---
 tensorflow/core/graph/mkl_layout_pass.cc      | 2177 +----------------
 tensorflow/core/graph/mkl_layout_pass_test.cc | 1865 --------------
 2 files changed, 1 insertion(+), 4041 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 7394b1cddf..42a35727db 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -45,2181 +45,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifdef INTEL_MKL_ML_ONLY
-
-// This pass implements rewriting of graph to support following scenarios:
-// (A) Merging nodes in the graph
-// (B) Rewriting a node in the graph to a new node
-//     Rewrite happens under following 2 scenarios:
-//     1) Propagating Mkl layout as an additional output tensor
-//        (we will loosely call a tensor that carries Mkl layout as Mkl tensor
-//         henceforth.) from every Mkl supported NN layer.
-//     2) Context-based rewrite: This is needed in order to optimize
-//        gradient ops of Conv2D+AddBias. Gradient op of both the Conv2D and
-//        MatMul is BiasAddGrad, and we need to rewrite BiasAddGrad into
-//        Conv2D-specific BiasAddGrad, and MatMul-specific BiasAddGrad.
-//        This is context-specific optimization, where the context is the
-//        forward operator that the BiasAddGrad corresponds to.
-//
-// Example of A : Merging nodes in the graph
-// -----------------------------------------
-// Currently, we merge Conv2D+AddBias together. Consider Conv2D and BiasAdd as:
-//
-//           O = Conv2D(A, B)
-//           P = BiasAdd(O, C)
-//
-// We merge them into Conv2DWithBias as:
-//           P = _MklConv2DWithBias(A, A_m, B, B_m, C, C_m)
-//
-// The meaning of A_m, B_m and C_m is explained in B.1.
-//
-// Merge rules:
-//  - The merge for Conv2D and BiasAdd happens when the output of Conv2D _only_
-//    goes to BiasAdd.
-//  - Also, the intersection of attributes of both the nodes must have same
-//    values.
-//  - Both the nodes must have been assigned to same device (if any).
-//
-// Example of B.1 : Rewriting nodes to Mkl nodes
-// ---------------------------------------------
-// Consider a Relu node. Current definition of Relu node looks like:
-//
-//           O = Relu(A)
-//
-// Relu has 1 input (A), and 1 output (O).
-//
-// This rewrite pass will generate a new graph node for Relu (new node is
-// called MklRelu) as:
-//
-//          O, O_m = MklRelu(A, A_m)
-//
-// MklRelu has 2 inputs (A and A_m) and 2 outputs (O and O_m). Here input A is
-// same as input A of Relu; output O is same as output O of Relu. O_m is the
-// additional output tensor that will be set by MklRelu, and it represents
-// Mkl tensor corresponding to O -- in other words, O_m is some kind of
-// metadata for O. A_m is additional input of Relu, and it represents metadata
-// for A - as O_m is metadata for O, A_m is metadata for A. MklRelu receives
-// this metadata from previous node in the graph.
-//
-// When a previous node in the graph is an Mkl node, A_m will represent a valid
-// Mkl tensor. But when a previous node is not an Mkl node, A_m will represent
-// a dummy Mkl tensor.
-//
-// Rewriting rules:
-//  - Selection of a node for rewriting happens by registering the op type of
-//    the node with the rewriting pass. If the op type is not registered, then
-//    all nodes of this op type will not be rewritten.
-//  - Number of inputs after rewriting:
-//      Since for every input Tensorflow tensor, the rewritten node gets Mkl
-//      tensor(s), rewritten node gets 2*N inputs, where N is the number of
-//      inputs for the original node.
-//  - Number of outputs after rewriting:
-//      Since for every output Tensorflow tensor, the rewritten node generates
-//      Mkl tensor(s), the rewritten node generates 2*N outputs, where N is the
-//      number of outputs of the original node.
-//  - Ordering of Tensorflow tensors and Mkl tensors:
-//      Since every rewritten node generates twice the number of inputs and
-//      outputs, one could imagine various orderings among Tensorflow tensors
-//      and Mkl tensors. E.g., assume an op 'Conv2D' that takes (A, B) as
-//      inputs, then the new op '_MklConv2D' can take inputs A, B, A_m and B_m
-//      in A, A_m, B, B_m order or it can also take them in A, B, A_m, B_m
-//      order. Among N inputs one can get N! permutations.
-//
-//      So the question is: which order do we follow? We support 2 types of
-//      orderings: (1) interleaved, and (2) contiguous. Interleaved ordering
-//      follows an intuitive order where an Mkl tensor follows the
-//      corresponding Tensorflow tensor immediately. In the context of the
-//      above example, it will be: A, A_m, B, B_m. Note that the ordering rule
-//      applies to both the inputs and outputs. Contiguous ordering means
-//      all the Tensorflow tensors are contiguous followed by all the Mkl
-//      tensors. We use contiguous ordering as default.
-//
-// Graph rewrite algorithm:
-//      Algorithm: Graph Rewrite
-//      Input: Graph G, Names of the nodes to rewrite and their new names
-//      Output: Modified Graph G' if the nodes are modified, G otherwise.
-//      Start:
-//        N = Topological_Sort(G) // N is a set of nodes in toposort order.
-//        foreach node n in N
-//        do
-//          if (Is_MKL_Op(n))  // Can this node accept an Mkl layout as input.
-//          then
-//            E = set of <incoming edge and its src_output slot> of n
-//            E' = {}   // a new set of edges for rewritten node
-//            foreach <e,s> in E
-//            do
-//              E' U {<e,s>}  // First copy edge which generates Tensorflow
-//                            // tensor as it is
-//              m = Source node of edge e
-//              if Is_Rewritten(m)  // Did we rewrite this node in this pass?
-//              then
-//                E' U {<m,s+1>}    // If yes, then m will generate an Mkl
-//                                  // tensor as an additional output.
-//              else
-//                d = Generate_Dummy_Mkl_Tensor()  // If not, generate a dummy
-//                                                 // Mkl tensor.
-//                E' U {<d,0>}  // The dummy Mkl tensor has only 1 output slot.
-//              fi
-//            done
-//            n' = Build_New_Node(G,new_name,E')
-//            Mark_Rewritten(n')  // Mark the new node as being rewritten.
-//          fi
-//        done
-//
-//      Explanation:
-//        For graph rewrite, we visit nodes of the input graph in the
-//        topological sort order. With this ordering, we visit nodes in the
-//        top-to-bottom fashion. We need this order because while visiting a
-//        node we want that all of its input nodes are visited and rewritten if
-//        applicable. This is because if we need to rewrite a given node
-//        then all of its input nodes need to be fixed (in other words they
-//        cannot be deleted later.)
-//
-//        While visiting a node, we first check if the op type of the node is
-//        an Mkl op. If it is, then we rewrite that node after constructing
-//        new inputs to the node. If the op type of the node is not Mkl op,
-//        then we do not rewrite that node.
-//
-// Handling workspace propagation for certain ops:
-//
-//        Certain backward ops in MKL (MaxPool, LRN and BatchNorm) require
-//        passing of a workspace from their respective forward ops. Workspace
-//        tensors provide memory for storing results of intermediate operations
-//        which are helpful in backward propagation. TensorFlow does not have
-//        a notion of a workspace and as a result does not allow producing
-//        additional outputs from these forward ops. For these ops, we need
-//        to add 2 extra edges between forward ops and their corresponding
-//        backward ops - the first extra edge carries a workspace tensor and
-//        the second one carries an Mkl tensor for the workspace tensor.
-//
-//        Example:
-//
-//        Typical graph for MaxPool and its gradient looks like:
-//
-//        A = MaxPool(T)
-//        B = MaxPoolGrad(X, A, Y)
-//
-//        We will transform this graph to propagate the workspace as:
-//        (with the contiguous ordering)
-//
-//        A, W, A_m, W_m = MklMaxPool(T, T_m)
-//        B, B_m = MklMaxPoolGrad(X, A, Y, W, X_m, A_m, Y_m, W_m)
-//
-//        Here W is the workspace tensor. Transformed tensor names with the
-//        suffix _m are Mkl tensors, and this transformation has been done
-//        using the algorithm discussed earlier. The transformation for
-//        workspace propagation only adds extra outputs (W, W_m) for a forward
-//        op and connects them to the corresponding backward ops.
-//
-//        Terms:
-//
-//        Forward op name = name of the op in the forward pass
-//          where a workspace tensor originates (MaxPool in this example)
-//        Backward op name = name of the op in the backward pass that receives
-//          a workspace tensor from the forward op (MaxPoolGrad in the example)
-//        Slot = Position of the output or input slot that will be
-//               used by the workspace tensor (1 for MklMaxPool as W is the 2nd
-//               output of MaxPool (0 is 1st); 3 for MklMaxPoolGrad)
-//
-//        Question:
-//
-//        How do we associate a backward op to a forward op? There can be more
-//        than one op with the exact same name.
-//
-//        In this example, we associate MaxPoolGrad with MaxPool. But there
-//        could be more than one MaxPool ops. To solve this problem, we look
-//        for _direct_ edge between a forward op and a backward op (tensor A is
-//        flowing along this edge in the example).
-//
-//        How do we transform forward and backward ops when there is no direct
-//        edge between them? In such a case, we generate dummy tensors for
-//        workspace tensors. For the example, transformation of MaxPool will
-//        be exactly same as it would be when there is a direct edge between
-//        the forward and the backward op --- it is just that MaxPool won't
-//        generate any workspace tensor. For MaxPoolGrad, the transformation
-//        will also be same, but instead of connecting W and W_m with the
-//        outputs of MaxPool, we will produce dummy tensors for them, and we
-//        will set workspace_enabled attribute to false.
-//
-// Example of B.2 : Context-based node rewrite
-// -------------------------------------------
-// Consider BiasAddGrad op as:
-//
-//           O = _MklConv2D(A, B, C, A_m, B_m, C_m)
-//           P = BiasAddGrad(O)
-//
-// Then we rewrite it as:
-//
-//           P = Conv2DWithBiasBackpropBias(O, O_m)
-//
-// Rewrite of BiasAddGrad into Conv2DWithBiasBackpropBias takes place depending
-// on the matching 'context'. The term context is loosely related to which
-// forward op is _associated_ to BiasAddGrad. If it is _MklConv2DWithBias then
-// we consider it Conv2D context; if it is MatMul, then it is MatMul context.
-
-class MklLayoutRewritePass : public GraphOptimizationPass {
- public:
-  MklLayoutRewritePass() {
-    // NOTE: names are alphabetically sorted.
-    csinfo_.addn = "AddN";
-    csinfo_.avg_pool = "AvgPool";
-    csinfo_.avg_pool_grad = "AvgPoolGrad";
-    csinfo_.bias_add = "BiasAdd";
-    csinfo_.bias_add_grad = "BiasAddGrad";
-    csinfo_.concat = "Concat";
-    csinfo_.concatv2 = "ConcatV2";
-    csinfo_.conv2d = "Conv2D";
-    csinfo_.conv2d_grad_input = "Conv2DBackpropInput";
-    csinfo_.conv2d_grad_filter = "Conv2DBackpropFilter";
-    csinfo_.fused_batch_norm = "FusedBatchNorm";
-    csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
-    csinfo_.identity = "Identity";
-    csinfo_.lrn = "LRN";
-    csinfo_.lrn_grad = "LRNGrad";
-    csinfo_.matmul = "MatMul";
-    csinfo_.max_pool = "MaxPool";
-    csinfo_.max_pool_grad = "MaxPoolGrad";
-    csinfo_.mkl_conv2d = "_MklConv2D";
-    csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput";
-    csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter";
-    csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
-    csinfo_.mkl_conv2d_with_bias_backprop_bias =
-        "_MklConv2DWithBiasBackpropBias";
-    csinfo_.relu = "Relu";
-    csinfo_.relu_grad = "ReluGrad";
-    csinfo_.reshape = "Reshape";
-    csinfo_.split = "Split";
-    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
-    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
-    // MklInputConversion op is added before it.
-    csinfo_.add = "Add";
-    csinfo_.maximum = "Maximum";
-    csinfo_.mul = "Mul";
-    csinfo_.squared_difference = "SquaredDifference";
-    csinfo_.sub = "Sub";
-    // End - element-wise ops. See note above.
-
-    // NOTE: names are alphabetically sorted.
-    rinfo_.push_back({csinfo_.addn, mkl_op_registry::GetMklOpName(csinfo_.addn),
-                      CopyAttrsAddN, AddNRewrite, nullptr});
-    rinfo_.push_back({csinfo_.add, mkl_op_registry::GetMklOpName(csinfo_.add),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.avg_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.avg_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending
-    // on if context contains Conv2D.
-    rinfo_.push_back({csinfo_.bias_add_grad,
-                      csinfo_.mkl_conv2d_with_bias_backprop_bias,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
-                      &biasaddgrad_conv2dwithbias_context_});
-    // BiasAddGrad gets written into BiasAddGrad depending on if context
-    // contains MatMul.
-    rinfo_.push_back({csinfo_.bias_add_grad, csinfo_.matmul,
-                      CopyAttrsBiasAddGrad, ContextMatchRewrite,
-                      &biasaddgrad_matmul_context_});
-    rinfo_.push_back({csinfo_.concat,
-                      mkl_op_registry::GetMklOpName(csinfo_.concat),
-                      CopyAttrsConcat, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.concatv2,
-                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
-                      CopyAttrsConcatV2, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d_grad_filter,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.conv2d_grad_input,
-                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
-                      CopyAttrsConv2D, AlwaysRewrite, nullptr});
-
-    rinfo_.push_back({csinfo_.fused_batch_norm,
-                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
-                      CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
-    rinfo_.push_back(
-        {csinfo_.fused_batch_norm_grad,
-         mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
-         CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.identity,
-                      mkl_op_registry::GetMklOpName(csinfo_.identity),
-                      CopyAttrsIdentity, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.lrn_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
-                      CopyAttrsLRN, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.max_pool,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
-                      CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr});
-    rinfo_.push_back({csinfo_.max_pool_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
-                      CopyAttrsPooling, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.relu_grad,
-                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.reshape,
-                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
-                      CopyAttrsReshape, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.squared_difference,
-                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.sub, mkl_op_registry::GetMklOpName(csinfo_.sub),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-
-    // Add info about which ops to add workspace edge to and the slots.
-    wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
-    wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3});
-
-    // Add a rule for merging nodes
-    minfo_.push_back({csinfo_.mkl_conv2d, csinfo_.bias_add, 0,
-                      csinfo_.mkl_conv2d_with_bias});
-
-    biasaddgrad_matmul_context_ = {csinfo_.bias_add_grad, csinfo_.matmul,
-                                   IsBiasAddGradInMatMulContext};
-
-    biasaddgrad_conv2dwithbias_context_ = {
-        csinfo_.bias_add_grad, csinfo_.mkl_conv2d_with_bias,
-        IsBiasAddGradInConv2DWithBiasContext};
-
-    cinfo_.push_back(&biasaddgrad_matmul_context_);
-    cinfo_.push_back(&biasaddgrad_conv2dwithbias_context_);
-  }
-
-  // Standard interface to run pass
-  Status Run(const GraphOptimizationPassOptions& options);
-
-  // Helper function which does most of heavy lifting for rewriting
-  // Mkl nodes to propagate Mkl tensor as additional output
-  //
-  // Extracts common functionality between Run public interface and
-  // test interface.
-  //
-  // @return true, if and only if graph is mutated; false otherwise.
-  bool RunPass(std::unique_ptr<Graph>* g);
-
-  /// Structure to specify the context information used in a node rewrite rule
-  typedef struct {
-    string node;  // Name of the node to be rewritten
-    string fwd;   // Name of the node in the forward pass that this node
-                  // corresponds to
-    std::function<bool(const Node*, const Node**, void* c)> context_match_fn;
-  } ContextInfo;
-
-  /// Structure to specify the name of an original node, its new name after
-  /// rewrite, the number of inputs to the original node, the function to
-  /// be used to copy attributes for the op, and the rule (if any) which
-  /// must hold for rewriting the node
-  typedef struct {
-    string name;      // Original name of op of the node in the graph
-    string new_name;  // New name of the op of the node in the graph
-    // A function handler to copy attributes from an old node to a new node.
-    std::function<void(const Node*, NodeBuilder*)> copy_attrs;
-    // A rule under which to rewrite this node
-    std::function<bool(const Node*, const ContextInfo* c)> rewrite_rule;
-    // ContextInfo, if any, to be used for rewrite
-    ContextInfo* context;
-  } RewriteInfo;
-
-  /// Structure to specify a forward op, a backward op, and the slot numbers
-  /// in the forward and backward ops where we will add a workspace edge.
-  typedef struct {
-    string fwd_op;    // Name of a forward op in the graph
-    string bwd_op;    // Name of a backward op in the graph
-    int fwd_slot;     // Output slot in the forward op node where actual
-                      // output tensor resides
-    int bwd_slot;     // Input slot in the backward op node where actual
-                      // input tensor resides
-    int ws_fwd_slot;  // Output slot in the forward op node where workspace
-                      // edge is added
-    int ws_bwd_slot;  // Input slot in the backward op node where workspace
-                      // edge is added
-  } WorkSpaceInfo;
-
-  /// Structure to specify information used in node merge
-  typedef struct {
-    string pred;      // Predecessor node string
-    string succ;      // Successor node string
-    int op;           // The operand no the predecessor node corresponds
-                      // to the successor node
-    string new_node;  // Name of the node after merge
-  } MergeInfo;
-
-  /// Structure to store all constant strings
-  /// NOTE: names are alphabetically sorted.
-  typedef struct {
-    string addn;
-    string add;
-    string avg_pool;
-    string avg_pool_grad;
-    string bias_add;
-    string bias_add_grad;
-    string concat;
-    string concatv2;
-    string conv2d;
-    string conv2d_grad_input;
-    string conv2d_grad_filter;
-    string fused_batch_norm;
-    string fused_batch_norm_grad;
-    string identity;
-    string lrn;
-    string lrn_grad;
-    string matmul;
-    string max_pool;
-    string max_pool_grad;
-    string maximum;
-    string mkl_conv2d;
-    string mkl_conv2d_grad_input;
-    string mkl_conv2d_grad_filter;
-    string mkl_conv2d_with_bias;
-    string mkl_conv2d_with_bias_backprop_bias;
-    string mul;
-    string relu;
-    string relu_grad;
-    string reshape;
-    string split;
-    string squared_difference;
-    string sub;
-  } ConstStringsInfo;
-
- private:
-  /// Maintain info about nodes to rewrite
-  std::vector<RewriteInfo> rinfo_;
-
-  /// Maintain info about nodes to add workspace edge
-  std::vector<WorkSpaceInfo> wsinfo_;
-
-  /// Maintain info about nodes to be merged
-  std::vector<MergeInfo> minfo_;
-
-  /// Maintain info about nodes to rewrite
-  static std::vector<ContextInfo*> cinfo_;
-
-  /// Maintain structure of constant strings
-  static ConstStringsInfo csinfo_;
-
-  /// Context variables used in referencing rules
-  static ContextInfo biasaddgrad_matmul_context_;
-  static ContextInfo biasaddgrad_conv2dwithbias_context_;
-
- private:
-  // Is OpDef::ArgDef a list type? It could be N * T or list(type).
-  // Refer to opdef.proto for details of list type.
-  inline bool ArgIsList(const OpDef::ArgDef& arg) const {
-    return !arg.type_list_attr().empty() || !arg.number_attr().empty();
-  }
-
-  // Get length of a list in 'n' if 'arg' is of list type. Refer to
-  // description of ArgIsList for definition of list type.
-  inline int GetTensorListLength(const OpDef::ArgDef& arg, Node* n) {
-    CHECK_EQ(ArgIsList(arg), true);
-    int N = 0;
-    const string attr_name = !arg.type_list_attr().empty()
-                                 ? arg.type_list_attr()
-                                 : arg.number_attr();
-    if (!arg.type_list_attr().empty()) {
-      std::vector<DataType> value;
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &value));
-      N = value.size();
-    } else {
-      TF_CHECK_OK(GetNodeAttr(n->def(), attr_name, &N));
-    }
-    return N;
-  }
-
-  // Can op represented by node 'n' run on DEVICE_CPU?
-  // Op can run on CPU with MKL if the runtime assigned device or the
-  // user requested device contains device CPU, or both are empty.
-  bool CanOpRunOnCPUDevice(const Node* n) {
-    bool result = true;
-    string reason;
-
-    // Substring that should be checked for in device name for CPU device.
-    const char* const kCPUDeviceSubStr = "CPU";
-
-    // If Op has been specifically assigned to a non-CPU device, then No.
-    if (!n->assigned_device_name().empty() &&
-       !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) {
-      result = false;
-      reason = "Op has been assigned a runtime device that is not CPU.";
-    }
-
-    // If user has specifically assigned this op to a non-CPU device, then No.
-    if (!n->def().device().empty() &&
-       !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) {
-      result = false;
-      reason = "User has assigned a device that is not CPU.";
-    }
-
-    if (result == false) {
-      VLOG(1) << "MklLayoutRewritePass: Skipping rewriting of the node "
-              << n->type_string() << ", reason: " << reason;
-    }
-
-    // Otherwise Yes.
-    return result;
-  }
-
-  // Return a node that can be merged with input node 'n'
-  //
-  // @return pointer to the node if we can find such a
-  // node. Otherwise, it returns nullptr.
-  Node* CheckForNodeMerge(const Node* n) const;
-
-  // Merge predecessor node with its successor.
-  // Currently, we merge Conv2D with BiasAdd only.
-  //
-  // Input nodes succ and pred may be deleted if the call to
-  // this function is successful. Attempt to use the pointers
-  // after the call to function may result in undefined behaviors.
-  //
-  // @input g - input graph, succ - successor node, pred - predecessor node
-  // @return Status::OK(), if merging is successful and supported.
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case nodes are merged. Otherwise, it is
-  //         not updated.
-  Status MergeNode(std::unique_ptr<Graph>* g, Node* succ, Node* pred);
-
-  // Check if the node 'n' has any applicable rewrite rule
-  // We check for 2 scenarios for rewrite.
-  //
-  // @return RewriteInfo* for the applicable rewrite rule
-  const RewriteInfo* CheckForNodeRewrite(const Node* n) const;
-
-  // Default rewrite rule to be used in scenario 1 for rewrite.
-  // @return - true (since we want to always rewrite)
-  static bool AlwaysRewrite(const Node* n, const ContextInfo* c = nullptr) {
-    return true;
-  }
-
-  // Check if we are performing pooling on depth or batch. If it is, then we
-  // do not rewrite MaxPool node to Mkl version.
-  // @return - true (if it is not a depth/batch wise pooling case);
-  //           false otherwise.
-  static bool NonDepthBatchWisePoolRewrite(const Node* n,
-                                           const ContextInfo* c) {
-    CHECK_NOTNULL(n);
-
-    string data_format_str;
-    TensorFormat data_format;
-    std::vector<int32> ksize, strides;
-    CHECK_EQ(GetNodeAttr(n->def(), "ksize", &ksize).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "strides", &strides).ok(), true);
-    CHECK_EQ(GetNodeAttr(n->def(), "data_format", &data_format_str).ok(), true);
-    CHECK_EQ(FormatFromString(data_format_str, &data_format), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (GetTensorDim(ksize, data_format, 'N') == 1 &&
-        GetTensorDim(strides, data_format, 'N') == 1 &&
-        GetTensorDim(ksize, data_format, 'C') == 1 &&
-        GetTensorDim(strides, data_format, 'C') == 1) {
-      return true;
-    }
-
-    return false;
-  }
-
-  static bool AddNRewrite(const Node* n, const ContextInfo* c) {
-    CHECK_NOTNULL(n);
-
-    int num;
-    CHECK_EQ(GetNodeAttr(n->def(), "N", &num).ok(), true);
-
-    // Condition that specifies non-batch-wise and non-depth-wise pooling.
-    if (num == 2) {
-      return true;
-    }
-
-    return false;
-  }
-  // Is BiasAddGrad node in 'n' is associated with Conv2DWithBias node
-  // specified in contextinfo 'ci'. Function updates fwd_node to point
-  // to Conv2DWithBias node if 'n' is associated with Conv2DWithBias.
-  //
-  // Association checks for one of the following graphs:
-  //
-  // Graph A:
-  //
-  // _ = Conv2DWithBias(F, I, _)
-  // ..
-  // _ = Conv2DBackpropFilter(F, _, G)
-  // _ = Conv2DBackpropInput(_, I, G)
-  // _ = BiasAddGrad(G)
-  //
-  // OR
-  //
-  // Graph B:
-  //
-  // _ = Conv2DWithBias(F, _, _)
-  // ..
-  // _ = Conv2DBackpropFilter(F, _, G)
-  // _ = BiasAddGrad(G)
-  //
-  // Here F, G, and I are graph nodes; _ represents graph nodes that we
-  // don't care here.
-  //
-  // @return - true (if BiasAddGrad is associated with Conv2DWithBias);
-  //           false otherwise.
-  static bool IsBiasAddGradInConv2DWithBiasContext(const Node* n,
-                                                   const Node** fwd_node,
-                                                   void* ci) {
-    CHECK_NOTNULL(n);
-    CHECK_NOTNULL(fwd_node);
-    CHECK_NOTNULL(ci);
-    *fwd_node = nullptr;
-
-    CHECK_EQ(n->type_string(), csinfo_.bias_add_grad);
-
-    // Get the only 1 input of BiasAddGrad.
-    CHECK_EQ(n->num_inputs(), 1);
-    const Node* bias_add_grad_inp = nullptr;
-    TF_CHECK_OK(n->input_node(0, &bias_add_grad_inp));
-    CHECK_NOTNULL(bias_add_grad_inp);
-
-    // Check if this input also goes to BackpropFilter and BackpropInput
-    // as 3rd input.
-    bool found_backprop_input = false;
-    bool found_backprop_filter = false;
-    Node* backprop_filter_node = nullptr;
-    Node* backprop_input_node = nullptr;
-
-    for (const Edge* e : bias_add_grad_inp->out_edges()) {
-      Node* third_input = nullptr;
-      if (e->dst()->type_string() == csinfo_.conv2d_grad_input ||
-          e->dst()->type_string() == csinfo_.mkl_conv2d_grad_input) {
-        // Third input (index 2) of BackpropInput
-        TF_CHECK_OK(e->dst()->input_node(2, &third_input));
-        // Third input (index 2) of BackpropInput must be same as the input
-        // of BiasAddGrad.
-        if (third_input == bias_add_grad_inp) {
-          found_backprop_input = true;
-          backprop_input_node = e->dst();
-        }
-      }
-
-      if (e->dst()->type_string() == csinfo_.conv2d_grad_filter ||
-          e->dst()->type_string() == csinfo_.mkl_conv2d_grad_filter) {
-        // Third input (index 2) of BackpropFilter
-        TF_CHECK_OK(e->dst()->input_node(2, &third_input));
-        // Third input (index 2) of BackpropFilter must be same as the input
-        // of BiasAddGrad.
-        if (third_input == bias_add_grad_inp) {
-          found_backprop_filter = true;
-          backprop_filter_node = e->dst();
-        }
-      }
-
-      // If we found both the nodes, then we can stop the search.
-      if (found_backprop_input && found_backprop_filter) {
-        break;
-      }
-    }
-
-    // If BackpropFilter node is not found, then this is not
-    // Conv2DWithBias context. For 2nd graph in the example above, only
-    // BackpropFilter would be present.
-    if (!found_backprop_filter) {
-      return false;
-    }
-
-    // Otherwise, we found the nodes.
-    CHECK_NOTNULL(backprop_filter_node);
-    if (found_backprop_input) {
-      CHECK_NOTNULL(backprop_input_node);
-    }
-
-    // Now that we confirmed that this is Conv2DWithBias context, we need to
-    // get access to the forward node (Conv2DWithBias). 2nd input of
-    // Conv2DWithBias is same as the 2nd input of Conv2DBackpropInput; 1st
-    // input of Conv2DWithBias is same as the 1st input of Conv2DBackpropFilter
-    // (This comes from definition of gradient computation for Conv2D).
-    if (found_backprop_input) {
-      // Graph A in the example.
-      Node* second_inp_of_input = nullptr;
-      Node* first_inp_of_filter = nullptr;
-      TF_CHECK_OK(backprop_input_node->input_node(1, &second_inp_of_input));
-      TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter));
-      CHECK_NOTNULL(second_inp_of_input);
-      CHECK_NOTNULL(first_inp_of_filter);
-
-      // Now we need to find out Conv2DWithBias node from these input nodes.
-      // Conv2DWithBias node is the node that accepts both the nodes
-      // second_inp_of_input and first_inp_of_filter in 2nd and 1st input slots.
-      for (const Edge* fe : first_inp_of_filter->out_edges()) {
-        if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-            fe->dst_input() == 0) {
-          for (const Edge* ie : second_inp_of_input->out_edges()) {
-            if (ie->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-                ie->dst_input() == 1 && fe->dst() == ie->dst()) {
-              VLOG(1) << "MklLayoutRewritePass: found "
-                      << fe->dst()->DebugString()
-                      << " as the forward node for matching context, backward"
-                      << " node is: " << n->DebugString();
-              *fwd_node = fe->dst();
-              return true;
-            }
-          }
-        }
-      }
-    } else {
-      // We did not find BackpropInput, so we work with BackpropFilter only.
-      // Graph B in the example.
-      Node* first_inp_of_filter = nullptr;
-      TF_CHECK_OK(backprop_filter_node->input_node(0, &first_inp_of_filter));
-      CHECK_NOTNULL(first_inp_of_filter);
-
-      // Now we need to find out Conv2DWithBias node from first input of
-      // BackpropFIlter. Conv2DWithBias node is the node that accepts
-      // first_inp_of_filter in 1st input slot.
-      for (const Edge* fe : first_inp_of_filter->out_edges()) {
-        if (fe->dst()->type_string() == csinfo_.mkl_conv2d_with_bias &&
-            fe->dst_input() == 0) {
-          VLOG(1) << "MklLayoutRewritePass: found " << fe->dst()->DebugString()
-                  << " as the forward node for matching context, backward"
-                  << " node is: " << n->DebugString();
-          *fwd_node = fe->dst();
-          return true;
-        }
-      }
-    }
-
-    return false;
-  }
-
-  // Is BiasAddGrad node in 'n' is associated with MatMul node
-  // specified in contextinfo 'ci'. Function does not update fwd_node.
-  //
-  // @return - true (if BiasAddGrad is associated with MatMul);
-  //           false otherwise.
-  static bool IsBiasAddGradInMatMulContext(const Node* n, const Node** fwd_node,
-                                           void* ci) {
-    return (!IsBiasAddGradInConv2DWithBiasContext(n, fwd_node, ci));
-  }
-
-  // Rewrite rule that uses context-information for matching,
-  // used in scenario 2.
-  //
-  // @input - Node 'n' for which to search for matching context
-  // @input - The context 'c' under which to rewrite
-  // @return - true if we can rewrite node under context 'c';
-  //           false otherwise.
-  static bool ContextMatchRewrite(const Node* n, const ContextInfo* c);
-
-  // Helper function that searches the matching contextinfo for the node.
-  //
-  // @input n - Node (gradient op) whose contextinfo is to be searched,
-  //        fwd_node - pointer to node from the forward pass that this node
-  //        belongs to. fwd_node cannot be NULL.
-  // @return Matching contextinfo in case a match is found; null otherwise.
-  //         Also updates *fwd_node with pointer to forward node that this
-  //         context matches.
-  static const ContextInfo* SearchMatchingContext(const Node* n,
-                                                  const Node** fwd_node);
-
-  // Rewrites input node to a new node specified by its matching rewrite info.
-  //
-  // Method first searches matching rewrite info for input node and then
-  // uses that info to rewrite.
-  //
-  // Input node may be deleted in case of rewrite. Attempt to use the node
-  // after the call can result in undefined behaviors.
-  //
-  // @input  g - input graph, n - Node to be rewritten,
-  //         ri - matching rewriteinfo
-  // @return Status::OK(), if the input node is rewritten;
-  //         Returns appropriate Status error code otherwise.
-  //         Graph is updated in case the input node is rewritten.
-  //         Otherwise, it is not updated.
-  Status RewriteNode(std::unique_ptr<Graph>* g, Node* n, const RewriteInfo* ri);
-
-  // Get nodes that will feed a list of TF tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of TF tensors
-  // @output output_nodes - the list of new nodes creating TF tensors
-  //
-  // @return None
-  void GetNodesProducingTFTensorList(
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-      int* input_idx, int list_length,
-      std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get nodes that will feed a list of Mkl tensors to the new
-  // node that we are constructing.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting
-  // @input inputs - inputs to old node that we are using for constructing
-  //                 new inputs,
-  // @input input_idx - the index in the 'inputs' vector pointing to the
-  //                    current input that we have processed so far
-  // @output input_idx - index will be incremented by the number of nodes
-  //                     from 'inputs' that are processed
-  // @input list_length - The expected length of list of Mkl tensors
-  // @output output_nodes - the list of new nodes creating Mkl tensors
-  //
-  // @return None
-  void GetNodesProducingMklTensorList(
-      std::unique_ptr<Graph>* g, Node* orig_node,
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-      int* input_idx, int list_length,
-      std::vector<NodeBuilder::NodeOut>* output_nodes);
-
-  // Get a node that will feed an Mkl tensor to the new
-  // node that we are constructing. The output node could be (1) 'n'
-  // if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-  // if 'n' is not an Mkl layer.
-  //
-  // @input g - input graph,
-  // @input orig_node - Original node that we are rewriting,
-  // @input n - Node based on which we are creating Mkl node,
-  // @input n_output_slot - the output slot of node 'n'
-  //            which is feeding to the node that we are constructing
-  // @output mkl_node - the new node that will feed Mkl tensor
-  // @output mkl_node_output_slot - the slot number of mkl_node that
-  //                                will feed the tensor
-  // @return None
-  void GetNodeProducingMklTensor(std::unique_ptr<Graph>* g, Node* orig_node,
-                                 Node* n, int n_output_slot, Node** mkl_node,
-                                 int* mkl_node_output_slot);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'old_node'. Inputs to 'nb' are
-  // set up in contiguous fashion. 'workspace_tensors' carry graph nodes
-  // producing workspace edges if 'are_workspace_tensors_available' is true.
-  // Otherwise, 'workspace_tensors' is empty vector.
-  //
-  // For details, refer to 'Ordering of inputs after rewriting' section in the
-  // documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  int SetUpContiguousInputs(
-      std::unique_ptr<Graph>* g,
-      const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-      NodeBuilder* nb, Node* old_node,
-      std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-      bool are_workspace_tensors_available);
-
-  // Setup new inputs using old inputs 'inputs' for the rewritten node in 'nb'
-  // in graph 'g'. Original node is input in 'orig_node'.
-  //
-  // For details, refer to 'Ordering of Tensorflow tensors and Mkl tensors'
-  // section in the documentation above.
-  //
-  // Returns Status::OK() if setting up inputs is successful, otherwise
-  // returns appropriate status code.
-  Status SetUpInputs(std::unique_ptr<Graph>* g,
-                     const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs,
-                     NodeBuilder* nb, Node* orig_node);
-
-  // Add workspace edge on the input or output side of Node 'orig_node' by using
-  // NodeBuilder 'nb' for the new node provided. If 'orig_node' does not dictate
-  // adding workspace edge then do not add it. Workspace Tensorflow and Mkl
-  // tensors, if they need to be added, will be set into these tensors.
-  // If we set workspace tensors, then are_ws_tensors_added should be true.
-  void AddWorkSpaceEdgeIfNeeded(std::unique_ptr<Graph>* g, Node* orig_node,
-                                NodeBuilder* nb,
-                                std::vector<NodeBuilder::NodeOut>* ws_tensors,
-                                bool* are_ws_tensors_added);
-
-  // Functions specific to operators to copy attributes
-  // We need operator-specific function to copy attributes because the framework
-  // does not provide any generic function for it.
-  // NOTE: names are alphabetically sorted.
-  static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsIdentity(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
-
-  // Generate a graph node in graph 'g' representing a dummy Mkl tensor node,
-  // using node for original node 'orig_node' and return it in '*out'.
-  // TODO(nhasabni) We should move this to mkl_util.h
-  void GetDummyMklTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                             Node* orig_node);
-  void GetDummyWorkspaceTensorNode(std::unique_ptr<Graph>* g, Node** out,
-                                   Node* orig_node);
-};
-
-MklLayoutRewritePass::ConstStringsInfo MklLayoutRewritePass::csinfo_;
-MklLayoutRewritePass::ContextInfo
-    MklLayoutRewritePass::biasaddgrad_conv2dwithbias_context_;
-MklLayoutRewritePass::ContextInfo
-    MklLayoutRewritePass::biasaddgrad_matmul_context_;
-std::vector<MklLayoutRewritePass::ContextInfo*> MklLayoutRewritePass::cinfo_;
-
-// We register Mkl rewrite pass for phase 1 in post partitioning group.
-// We register it here so that we get a complete picture of all users of Mkl
-// nodes. Do not change the ordering of the Mkl passes.
-const OptimizationPassRegistry::Grouping kMklLayoutRewritePassGroup =
-    OptimizationPassRegistry::POST_PARTITIONING;
-#ifdef ENABLE_MKL
-REGISTER_OPTIMIZATION(kMklLayoutRewritePassGroup, 1, MklLayoutRewritePass);
-#endif  // ENABLE_MKL
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for creating new node
-//////////////////////////////////////////////////////////////////////////
-
-static void FillInputs(const Node* n,
-                       gtl::InlinedVector<Node*, 4>* control_edges,
-                       gtl::InlinedVector<std::pair<Node*, int>, 4>* in) {
-  control_edges->clear();
-  for (const Edge* e : n->in_edges()) {
-    if (e->IsControlEdge()) {
-      control_edges->push_back(e->src());
-    } else {
-      (*in)[e->dst_input()] = std::make_pair(e->src(), e->src_output());
-    }
-  }
-  std::sort(control_edges->begin(), control_edges->end());
-  if (n->op_def().is_commutative()) {
-    // For commutative inputs, we sort the input by the input Node*
-    // to get a canonical ordering (so that add(a,b) and add(b, a) will
-    // hash to the same value if is_commutative is true for 'add').
-    std::sort(in->begin(), in->end());
-  }
-}
-
-void MklLayoutRewritePass::GetNodesProducingTFTensorList(
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
-    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If input node 'n' is just producing a single tensor at
-    // output slot 'slot' then we just add that single node.
-    output_nodes->push_back(NodeBuilder::NodeOut(n, slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr<Graph>* g,
-                                                 Node** out, Node* orig_node) {
-  // We use a tensor of shape {8} and value 0,0,0,0,0,0,0,0 to represent
-  // dummy Mkl tensor. 8 = 2*size_t.
-  const DataType dt = DataTypeToEnum<uint8>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0};
-  proto.set_tensor_content(string(reinterpret_cast<const char*>(zero), 8));
-  TensorShape dummy_shape({8});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-                  .Attr("value", proto)
-                  .Attr("dtype", dt)
-                  .Device(orig_node->def().device())  // We place this node on
-                                                      // the same device as the
-                                                      // device of the original
-                                                      // node.
-                  .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(
-        orig_node->input_node(0, const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::GetNodesProducingMklTensorList(
-    std::unique_ptr<Graph>* g, Node* orig_node,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& inputs, int* input_idx,
-    int list_length, std::vector<NodeBuilder::NodeOut>* output_nodes) {
-  CHECK_LT(*input_idx, inputs.size());
-  CHECK_GT(list_length, 0);
-  CHECK_NOTNULL(output_nodes);
-  output_nodes->reserve(list_length);
-
-  while (list_length != 0) {
-    CHECK_GT(list_length, 0);
-    CHECK_LT(*input_idx, inputs.size());
-    Node* n = inputs[*input_idx].first;
-    int slot = inputs[*input_idx].second;
-    // If 'n' is producing a single tensor, then create a single Mkl tensor
-    // node.
-    Node* mkl_node = nullptr;
-    int mkl_node_output_slot = 0;
-    GetNodeProducingMklTensor(g, orig_node, n, slot, &mkl_node,
-                              &mkl_node_output_slot);
-    output_nodes->push_back(
-        NodeBuilder::NodeOut(mkl_node, mkl_node_output_slot));
-    (*input_idx)++;
-    list_length--;
-  }
-}
-
-// Get an input node that will feed Mkl tensor to the new
-// node that we are constructing. An input node could be (1) 'n'
-// if it is Mkl layer, or (2) a dummy node producing dummy Mkl tensor
-// if 'n' is not an Mkl layer.
-void MklLayoutRewritePass::GetNodeProducingMklTensor(
-    std::unique_ptr<Graph>* g, Node* orig_node, Node* n, int n_output_slot,
-    Node** mkl_node, int* mkl_node_output_slot) {
-  CHECK_NOTNULL(n);
-  CHECK_NOTNULL(mkl_node);
-  CHECK_NOTNULL(mkl_node_output_slot);
-
-  // If this is an MKL op, then it will create extra output for MKL layout.
-  DataType T;
-  if (GetNodeAttr(n->def(), "T", &T).ok() &&
-      mkl_op_registry::IsMklOp(n->type_string(), T)) {
-    // If this is an MKL op, then it will generate an edge that will receive
-    // Mkl tensor from a node.
-    // output slot number for Mkl tensor would be N+slot number of TensorFlow
-    // tensor, where N is total number of TensorFlow tensors.
-    *mkl_node = n;
-    *mkl_node_output_slot =
-        GetTensorMetaDataIndex(n_output_slot, n->num_outputs());
-  } else {
-    // If we have not visited the node and rewritten it, then we need
-    // to create a dummy node that will feed a dummy Mkl tensor to this node.
-    // DummyMklTensor node has no input and generates only 1 output
-    // (dummy Mkl tensor) as output slot number 0.
-    GetDummyMklTensorNode(g, mkl_node, orig_node);
-    CHECK_NOTNULL(*mkl_node);
-    *mkl_node_output_slot = 0;
-  }
-}
-
-int MklLayoutRewritePass::SetUpContiguousInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node,
-    std::vector<NodeBuilder::NodeOut>* workspace_tensors,
-    bool are_workspace_tensors_available) {
-  CHECK_NOTNULL(workspace_tensors);
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-
-  // TODO(nhasabni): Temporary solution to connect filter input of
-  // BackpropInput with the converted filter from Conv2D.
-  bool do_connect_conv2d_backprop_input_filter = false;
-  Node* conv2d_node = nullptr;
-  // Filter node is 2nd input (slot index 1) of Conv2D.
-  int kConv2DFilterInputSlotIdx = 1;
-  int kConv2DBackpropInputFilterInputSlotIdx = 1;
-  int kConv2DFilterOutputSlotIdx = 1;
-  if (old_node->type_string() == csinfo_.conv2d_grad_input) {
-    // We need to find Conv2D node from Conv2DBackpropInput.
-    // For that let's first find filter node that is 2nd input (slot 1)
-    // of BackpropInput.
-    Node* filter_node = nullptr;
-    TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx,
-                                     &filter_node));
-    CHECK_NOTNULL(filter_node);
-
-    // Now check which nodes receive from filter_node. Filter feeds as
-    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
-    for (const Edge* e : filter_node->out_edges()) {
-      if (e->dst()->type_string() == csinfo_.mkl_conv2d &&
-          e->dst_input() == kConv2DFilterInputSlotIdx
-          /* filter is 2nd input of Conv2D and _MklConv2D. */) {
-        if (conv2d_node != nullptr) {
-          VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
-                  << " feeding multiple Conv2D nodes: "
-                  << filter_node->DebugString();
-          // We will not connect filter input of Conv2DBackpropInput
-          // to be safe here.
-          do_connect_conv2d_backprop_input_filter = false;
-          break;
-        } else {
-          conv2d_node = e->dst();
-          do_connect_conv2d_backprop_input_filter = true;
-        }
-      }
-    }
-  }
-
-  // Number of input slots to original op
-  // Input slots are represented by .Input() calls in REGISTER_OP.
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  // Actual number of inputs can be greater than or equal to number
-  // of Input slots because inputs of type list could be unfolded.
-  CHECK_GE(old_node_inputs.size(), old_node_input_slots);
-  int nn_slot_idx = 0;  // slot index for inputs of new node
-
-  // Let's copy all inputs (TF tensors) of original node to new node.
-  int iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingTFTensorList(old_node_inputs, &iidx, N,
-                                    &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx);
-      } else {
-        nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second);
-      }
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Tensorflow tensor for
-  // workspace here because Tensorflow tensor for workspace is the
-  // last tensor in the list of Tensorflow tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Tensorflow tensor
-    nb->Input((*workspace_tensors)[0].node, (*workspace_tensors)[0].index);
-    nn_slot_idx++;
-  }
-
-  // Let's now setup all Mkl inputs to new node.
-  // Number of Mkl inputs must be same as number of TF inputs.
-  iidx = 0;
-  for (int on_slot_idx = 0; on_slot_idx < old_node_input_slots; on_slot_idx++) {
-    // An input slot could be a single tensor or a list. We need
-    // to handle this case accordingly.
-    CHECK_LT(iidx, old_node_inputs.size());
-    const OpDef::ArgDef& arg = old_node->op_def().input_arg(on_slot_idx);
-    if (ArgIsList(arg)) {
-      std::vector<NodeBuilder::NodeOut> new_node_inputs;
-      int N = GetTensorListLength(arg, old_node);
-      GetNodesProducingMklTensorList(g, old_node, old_node_inputs, &iidx, N,
-                                     &new_node_inputs);
-      nb->Input(new_node_inputs);
-      nn_slot_idx++;
-    } else {
-      Node* mkl_node = nullptr;
-      int mkl_node_output_slot = 0;
-      // Special case for connecting filter input of Conv2DBackpropInput
-      if (do_connect_conv2d_backprop_input_filter &&
-          iidx == kConv2DBackpropInputFilterInputSlotIdx) {
-        GetNodeProducingMklTensor(g, old_node, conv2d_node,
-                                  kConv2DFilterOutputSlotIdx, &mkl_node,
-                                  &mkl_node_output_slot);
-      } else {
-        GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
-                                  old_node_inputs[iidx].second, &mkl_node,
-                                  &mkl_node_output_slot);
-      }
-      nb->Input(mkl_node, mkl_node_output_slot);
-      iidx++;
-      nn_slot_idx++;
-    }
-  }
-
-  // If workspace tensors are available for this op and we are using
-  // contiguous ordering then we need to add Mkl tensor for
-  // workspace here because Mkl tensor for workspace is the
-  // last tensor in the list of Mkl tensors.
-  if (are_workspace_tensors_available) {
-    CHECK_EQ(workspace_tensors->size(), 2);
-    // Mkl tensor
-    nb->Input((*workspace_tensors)[1].node, (*workspace_tensors)[1].index);
-    nn_slot_idx++;
-  }
-
-  return nn_slot_idx;
-}
-
-Status MklLayoutRewritePass::SetUpInputs(
-    std::unique_ptr<Graph>* g,
-    const gtl::InlinedVector<std::pair<Node*, int>, 4>& old_node_inputs,
-    NodeBuilder* nb, Node* old_node) {
-  // Let's check if we need to add workspace tensors for this node.
-  // We add workspace edge only for MaxPool, LRN and BatchNorm.
-  std::vector<NodeBuilder::NodeOut> workspace_tensors;
-  bool are_workspace_tensors_available = false;
-  AddWorkSpaceEdgeIfNeeded(g, old_node, nb, &workspace_tensors,
-                           &are_workspace_tensors_available);
-
-  int new_node_input_slots = 0;
-  if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-    // TODO(nhasabni): implement this function just for same of completion.
-    // We do not use interleaved ordering right now.
-    return Status(
-        error::Code::UNIMPLEMENTED,
-        "Interleaved ordering of tensors is currently not supported.");
-  } else {
-    CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-    new_node_input_slots = SetUpContiguousInputs(
-        g, old_node_inputs, nb, old_node, &workspace_tensors,
-        are_workspace_tensors_available);
-  }
-
-  // Sanity check
-  int old_node_input_slots = old_node->op_def().input_arg_size();
-  if (!are_workspace_tensors_available) {
-    // If we are not adding workspace tensors for this op, then the total
-    // number of input slots to the new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors.
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2);
-  } else {
-    // If we are adding workspace tensors for this op, then the total
-    // The total number of input slots to new node _must_ be 2 times the number
-    // of input slots to the original node: N original Tensorflow tensors and
-    // N for Mkl tensors corresponding to each Tensorflow tensors plus 2
-    // (for workspace Tensorflow tensor and workspace Mkl tensor).
-    CHECK_EQ(new_node_input_slots, old_node_input_slots * 2 + 2);
-  }
-
-  return Status::OK();
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to workspace pass
-//////////////////////////////////////////////////////////////////////////
-
-// TODO(nhasabni) We should move this to mkl_util.h.
-void MklLayoutRewritePass::GetDummyWorkspaceTensorNode(
-    std::unique_ptr<Graph>* g, Node** out, Node* orig_node) {
-  // We use a tensor of shape {1} and value 0 to represent
-  // dummy float tensor. We need this as a dummy workspace tensor.
-  // Workspace tensor has type float.
-  const DataType dt = DataTypeToEnum<float>::v();
-  TensorProto proto;
-  proto.set_dtype(dt);
-  float zero[1] = {0};
-  proto.set_tensor_content(string(reinterpret_cast<char*>(&zero), 4));
-  TensorShape dummy_shape({1});
-  dummy_shape.AsProto(proto.mutable_tensor_shape());
-  TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const")
-                  .Attr("value", proto)
-                  .Attr("dtype", dt)
-                  .Device(orig_node->def().device())  // We place this node on
-                                                      // same the device as the
-                                                      // device of the original
-                                                      // node.
-                  .Finalize(&**g, out));
-  CHECK_NOTNULL(*out); // Make sure we got a valid object before using it
-
-  // If number of inputs to the original node is > 0, then we add
-  // control dependency between 1st input (index 0) of the original node and
-  // the dummy Mkl node. This is needed because control-flow ops such as Enter,
-  // Merge, etc, require frame_name of the dummy Mkl node to be same as the
-  // rewritten node. Adding control edge between 1st input of the original node
-  // and the dummy Mkl node ensures that the dummy node is in the same frame
-  // as the original node. Choosing 1st input is not necessary - any input of
-  // the original node is fine because all the inputs of a node are always in
-  // the same frame.
-  if (orig_node->num_inputs() > 0) {
-    Node* orig_input0 = nullptr;
-    TF_CHECK_OK(
-        orig_node->input_node(0, const_cast<const Node**>(&orig_input0)));
-    CHECK_NOTNULL((*g)->AddControlEdge(orig_input0, *out));
-  }
-
-  (*out)->set_assigned_device_name(orig_node->assigned_device_name());
-}
-
-void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
-    std::unique_ptr<Graph>* g, Node* orig_node, NodeBuilder* nb,
-    std::vector<NodeBuilder::NodeOut>* ws_tensors, bool* are_ws_tensors_added) {
-  bool workspace_edge_added = false;  // Default initializer
-  CHECK_NOTNULL(are_ws_tensors_added);
-  *are_ws_tensors_added = false;  // Default initializer
-
-  DataType T;
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  for (auto ws : wsinfo_) {
-    if (orig_node->type_string() == ws.fwd_op &&
-        mkl_op_registry::IsMklOp(
-            mkl_op_registry::GetMklOpName(orig_node->type_string()), T)) {
-      // If this op is a fwd op, then we need to check if there is an
-      // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
-      // an edge, then we just add an attribute on this node for setting
-      // workspace_passed to true. We don't add actual workspace edge
-      // in this node. Actual workspace edge gets added in the backward
-      // op for this node.
-      for (const Edge* e : orig_node->out_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            e->dst()->type_string() == ws.bwd_op &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      if (!workspace_edge_added) {
-        // If we are here, then we did not find backward operator for this
-        // node.
-        nb->Attr("workspace_enabled", false);
-      }
-    } else if (orig_node->type_string() == ws.bwd_op &&
-               mkl_op_registry::IsMklOp(
-                   mkl_op_registry::GetMklOpName(orig_node->type_string()),
-                   T)) {
-      // If this op is a bwd op, then we need to add workspace edge and
-      // it's Mkl tensor edge between its corresponding fwd op and this
-      // op. Corresponding fwd op is specified in 'fwd_op' field of
-      // workspace info. fwd_slot and bwd_slot in workspace info specify
-      // an edge between which slots connect forward and backward op.
-      // Once all these criteria match, we add a workspace edge between
-      // ws_fwd_slot and ws_bwd_slot. Its corresponding Mkl tensor is
-      // determined by interleaved/contiguous ordering. Function
-      // DataIndexToMetaDataIndex tells us the location of Mkl tensor
-      // from the location of the Tensorflow tensor.
-      for (const Edge* e : orig_node->in_edges()) {
-        if (e->src_output() == ws.fwd_slot &&
-            // We would have rewritten the forward op, so we need to use
-            // GetMklOpName call to get its Mkl name.
-            e->src()->type_string() ==
-                mkl_op_registry::GetMklOpName(ws.fwd_op) &&
-            e->dst_input() == ws.bwd_slot) {
-          nb->Attr("workspace_enabled", true);
-          CHECK_NOTNULL(ws_tensors);
-          // Add workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(e->src(), ws.ws_fwd_slot));
-          // Add Mkl tensor edge for workspace edge between fwd op and bwd op.
-          ws_tensors->push_back(NodeBuilder::NodeOut(
-              e->src(), DataIndexToMetaDataIndex(ws.ws_fwd_slot,
-                                                 e->src()->num_outputs())));
-          *are_ws_tensors_added = true;
-          // In terms of input ordering, we add these calls to add Input
-          // here because workspace edge (and its Mkl tensor) is the last
-          // edge in the fwdop and bwdop. So all inputs before workspace
-          // tensor have been added by SetUpInputs function.
-          VLOG(1) << "MklLayoutRewritePass: workspace_enabled for "
-                  << orig_node->type_string();
-          workspace_edge_added = true;
-          // We found the edge that we were looking for, so break.
-          break;
-        }
-      }
-
-      // If we are here means we did not find fwd op that feeds to this
-      // bwd op. So in this case, we need to generate dummy tensors for
-      // workspace input and Mkl tensor for workspace, and set
-      // workspace_enabled to false.
-      if (!workspace_edge_added) {
-        nb->Attr("workspace_enabled", false);
-        Node* dmt_ws = nullptr;      // Dummy tensor for workspace
-        Node* dmt_mkl_ws = nullptr;  // Dummy Mkl tensor for workspace
-        GetDummyWorkspaceTensorNode(g, &dmt_ws, orig_node);
-        GetDummyMklTensorNode(g, &dmt_mkl_ws, orig_node);
-        CHECK_NOTNULL(dmt_ws);
-        CHECK_NOTNULL(dmt_mkl_ws);
-        CHECK_NOTNULL(ws_tensors);
-        // We add dummy tensor as workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_ws, 0));
-        // We add dummy tensor as Mkl tensor for workspace tensor.
-        ws_tensors->push_back(NodeBuilder::NodeOut(dmt_mkl_ws, 0));
-        *are_ws_tensors_added = true;
-        VLOG(1) << "MklLayoutRewritePass: dummy workspace_enabled for "
-                << orig_node->type_string();
-      }
-    } else {
-      // If this node does not match any workspace info, then we do not
-      // do anything special for workspace propagation for it.
-    }
-  }
-}
-
-//////////////////////////////////////////////////////////////////////////
-// Op-specific functions to copy attributes from old node to new node
-//////////////////////////////////////////////////////////////////////////
-
-void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> strides;
-  bool use_cudnn_on_gpu;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(
-      GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-  nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu);
-}
-
-void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node,
-                                         NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node,
-                                                NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  std::vector<int32> strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("strides", strides);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsIdentity(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  // Add attributes to new node.
-  nb->Attr("T", T);
-}
-
-void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
-                                        NodeBuilder* nb) {
-  DataType T;
-  int depth_radius;
-  float bias;
-  float alpha;
-  float beta;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "depth_radius", &depth_radius));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "bias", &bias));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "beta", &beta));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("depth_radius", depth_radius);
-  nb->Attr("bias", bias);
-  nb->Attr("alpha", alpha);
-  nb->Attr("beta", beta);
-}
-
-void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
-                                            NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  string padding;
-  std::vector<int32> ksize, strides;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "ksize", &ksize));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("ksize", ksize);
-  nb->Attr("strides", strides);
-  nb->Attr("padding", padding);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-}
-
-void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node,
-                                            NodeBuilder* nb) {
-  DataType T;
-  DataType Tshape;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tshape", &Tshape));
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("Tshape", Tshape);
-}
-
-void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node,
-                                          NodeBuilder* nb) {
-  DataType T;
-  string data_format;
-  int num_split;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_split", &num_split));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("num_split", num_split);
-  nb->Attr("data_format", data_format);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node,
-                                           NodeBuilder* nb) {
-  DataType T;
-  int N;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-}
-
-void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node,
-                                             NodeBuilder* nb) {
-  DataType T;
-  int N;
-  DataType tidx;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "N", &N));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tidx", &tidx));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("N", N);
-  nb->Attr("Tidx", tidx);
-}
-
-void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
-                                                   NodeBuilder* nb) {
-  DataType T;
-  float epsilon;
-  string data_format;
-  bool is_training;
-
-  // Get all attributes from old node.
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
-  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "is_training", &is_training));
-
-  // Add attributes to new node.
-  nb->Attr("T", T);
-  nb->Attr("epsilon", epsilon);
-  nb->Attr("data_format", data_format);
-  nb->Attr("is_training", is_training);
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions related to node merge pass
-//////////////////////////////////////////////////////////////////////////
-
-Node* MklLayoutRewritePass::CheckForNodeMerge(const Node* a) const {
-  // TODO(nhasabni) Add check for type of node similar to CheckForNodeRewrite
-  // once we support BiasAddGrad as Mkl layer.
-
-  // Search for all matching mergeinfo.
-  // We allow more than one match for extensibility.
-  std::vector<const MergeInfo*> matching_mi;
-  for (auto mi = minfo_.cbegin(); mi != minfo_.cend(); ++mi) {
-    if (a->type_string() == mi->succ) {
-      matching_mi.push_back(&*mi);
-    }
-  }
-
-  for (const MergeInfo* mi : matching_mi) {
-    const int N_in = a->num_inputs();
-    if (mi->op >= N_in) {
-      continue;
-    }
-
-    // Get the control edges and input of node
-    gtl::InlinedVector<Node*, 4> a_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> a_in(N_in);
-    FillInputs(a, &a_control_edges, &a_in);
-
-    // Get operand op of the operator
-    Node* b = nullptr;
-    b = a_in[mi->op].first;
-    if (b == nullptr || (b->type_string() != mi->pred)) {
-      // NOTE: Should the first check be assert?
-      continue;
-    }
-
-    const int B_in = b->num_inputs();
-    gtl::InlinedVector<Node*, 4> b_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> b_in(B_in);
-    FillInputs(b, &b_control_edges, &b_in);
-
-    // Shouldn't merge if a and b have different control edges.
-    if (a_control_edges != b_control_edges) {
-      continue;
-    } else {
-      // We found a match.
-      return b;
-    }
-  }
-
-  return nullptr;
-}
-
-Status MklLayoutRewritePass::MergeNode(std::unique_ptr<Graph>* g, Node* succ,
-                                       Node* pred) {
-  CHECK_NOTNULL(succ);
-  CHECK_NOTNULL(pred);
-
-  if (succ->type_string() == csinfo_.bias_add &&
-      pred->type_string() == csinfo_.mkl_conv2d) {
-    // 1. Get all attributes from input nodes.
-    DataType T_pred, T_succ;
-    string padding;
-    std::vector<int32> strides;
-    string data_format_pred, data_format_succ;
-    bool use_cudnn_on_gnu;
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred));
-    TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides));
-    TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred));
-    TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ));
-    TF_CHECK_OK(
-        GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu));
-    // We check to ensure that data formats of both succ and pred are same.
-    // We expect them to be same, so we can enforce this as assert.
-    // But assert can be too strict, so we enforce this as a check.
-    // If the check fails, then we do not merge two nodes.
-    // We also do same check for devices.
-    if (data_format_pred != data_format_succ || T_pred != T_succ ||
-        pred->assigned_device_name() != succ->assigned_device_name() ||
-        pred->def().device() != succ->def().device()) {
-      return Status(error::Code::INVALID_ARGUMENT,
-                    "data_format or T attribute or devices of Conv2D and "
-                    "BiasAdd do not match. Will skip node merge optimization");
-    }
-
-    const int succ_num = succ->num_inputs();
-    gtl::InlinedVector<Node*, 4> succ_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> succ_in(succ_num);
-    FillInputs(succ, &succ_control_edges, &succ_in);
-
-    const int pred_num = pred->num_inputs();
-    gtl::InlinedVector<Node*, 4> pred_control_edges;
-    gtl::InlinedVector<std::pair<Node*, int>, 4> pred_in(pred_num);
-    FillInputs(pred, &pred_control_edges, &pred_in);
-
-    // We need to ensure that there is only 1 edge between Conv2D and AddBias.
-    // Otherwise, merging is semantically incorrect.
-    if (pred->out_edges().size() != 1) {
-      return Status(error::Code::INVALID_ARGUMENT,
-                    "Conv2D has multiple outputs."
-                    "Will skip node merge optimization");
-    }
-
-    for (const Edge* e : pred->out_edges()) {
-      if (e->dst() != succ) {
-        return Status(error::Code::INVALID_ARGUMENT,
-                      "Conv2D does not feed to BiasAdd."
-                      "Will skip node merge optimization");
-      }
-    }
-
-    // 2. Get inputs from both the nodes.
-    // Find the 2 inputs from the conv and the bias from the add Bias.
-    // Get operand 0, 1 of conv2D and their Mkl tensors.
-    CHECK_EQ(pred->in_edges().size(), 4);  // _MklConv2D must have 4 inputs.
-    // Get operand 1 of add_bias
-    // BiasAdd must have 2 inputs: Conv, bias
-    CHECK_EQ(succ->in_edges().size(), 2);
-    Node* oper3_mkl = nullptr;  // Mkl tensor corresponding to oper3
-    int oper3_mkl_slot = 0;     // For dummy MKL tensor node, output slot is 0.
-    GetDummyMklTensorNode(g, &oper3_mkl, pred);  // Get dummy Mkl tensor node
-    // as BiasAdd does not have Mkl tensor as input.
-    CHECK_NOTNULL(oper3_mkl);
-
-    // We will use the node name of BiasAdd as the name of new node
-    // Build new node. We use same name as original node, but change the op
-    // name.
-    NodeBuilder nb(succ->name(), csinfo_.mkl_conv2d_with_bias);
-    if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
-      nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
-      // pred_in[1] will be Mkl tensor for In1 if we follow interleaved
-      // ordering, and it will be 2nd Tensorflow tensor for Conv2D if
-      // we follow contiguous ordering.
-      nb.Input(pred_in[1].first, pred_in[1].second);  // Mkl for In1
-      nb.Input(pred_in[2].first, pred_in[2].second);  // In2 of Conv2D
-      nb.Input(pred_in[3].first, pred_in[3].second);  // Mkl for In2
-      nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
-      nb.Input(oper3_mkl, oper3_mkl_slot);            // Mkl for In2 of BiasAdd
-    } else {
-      CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-      nb.Input(pred_in[0].first, pred_in[0].second);  // In1 of Conv2D
-      // pred_in[1] will be Mkl tensor for In1 if we follow interleaved
-      // ordering, and it will be 2nd Tensorflow tensor for Conv2D if
-      // we follow contiguous ordering.
-      nb.Input(pred_in[1].first, pred_in[1].second);  // In2 of Conv2D
-      nb.Input(succ_in[1].first, succ_in[1].second);  // In2 of BiasAdd
-      nb.Input(pred_in[2].first, pred_in[2].second);  // Mkl for In1 of Conv2D
-      nb.Input(pred_in[3].first, pred_in[3].second);  // Mkl for In2 of Conv2D
-      nb.Input(oper3_mkl, oper3_mkl_slot);            // Mkl for In2 of BiasAdd
-    }
-
-    // Copy attributes from Conv2D to Conv2DWithBias.
-    CopyAttrsConv2D(const_cast<const Node*>(pred), &nb);
-
-    // Copy the device assigned to old node to new node.
-    nb.Device(succ->def().device());
-
-    // Create node.
-    Node* new_node;
-    TF_CHECK_OK(nb.Finalize(&**g, &new_node));
-    CHECK_NOTNULL(new_node);
-
-    // Set the Mkl layer label for this op.
-    new_node->AddAttr("_kernel", mkl_op_registry::kMklOpLabel);
-
-    // Incoming data edges from 'pred' node and 'succ' node to new 'new_node'
-    // node are already copied in BuildNode. We handle control edges now.
-    for (const Edge* e : pred->in_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-      }
-    }
-    for (const Edge* e : succ->in_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-      }
-    }
-
-    // Incoming edges are fixed, we will fix the outgoing edges now.
-    // First, we will fix outgoing control edges from 'pred' node.
-    // We don't need to handle outgoing data edges from 'pred' node
-    // because pred has only 1 output going to succ node (we enforced
-    // this check for merge already).
-    for (const Edge* e : pred->out_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-      }
-    }
-
-    // Second, we will fix outgoing control and data edges from 'succ' node.
-    for (const Edge* e : succ->out_edges()) {
-      if (e->IsControlEdge()) {
-        CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-      } else {
-        CHECK_NOTNULL(
-            (*g)->AddEdge(new_node, e->src_output(), e->dst(), e->dst_input()));
-      }
-    }
-
-    // Copy device assigned to old node to new node.
-    // It's ok to use pred or succ as we have enforced a check that
-    // both have same device assigned.
-    new_node->set_assigned_device_name(pred->assigned_device_name());
-
-    VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString()
-            << ", and node: " << succ->DebugString()
-            << ", into node:" << new_node->DebugString();
-
-    (*g)->RemoveNode(succ);
-    (*g)->RemoveNode(pred);
-
-    return Status::OK();
-  }
-
-  return Status(error::Code::UNIMPLEMENTED,
-                "Unimplemented case for node merge optimization.");
-}
-
-//////////////////////////////////////////////////////////////////////////
-//           Helper functions for node rewrite
-//////////////////////////////////////////////////////////////////////////
-
-Status MklLayoutRewritePass::RewriteNode(std::unique_ptr<Graph>* g,
-                                         Node* orig_node,
-                                         const RewriteInfo* ri) {
-  CHECK_NOTNULL(ri);
-  CHECK_NOTNULL(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: Original node:" << orig_node->DebugString();
-
-  // Check if this is scenario 2 (context-based rewrite).
-  // Get the matching ContextInfo if it is.
-  const Node* fwd_node = nullptr;
-  const ContextInfo* ci = nullptr;
-  bool is_context_based_rewrite = false;
-  if ((ci = SearchMatchingContext(orig_node, &fwd_node)) != nullptr) {
-    is_context_based_rewrite = true;
-
-    // Sanity checks for context-based rewrite (if any)
-    if (orig_node->type_string() == csinfo_.bias_add_grad &&
-        ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) {
-      CHECK_NOTNULL(fwd_node);
-      DataType orig_T, ctx_T;
-      string orig_data_format, ctx_data_format;
-      TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &orig_T));
-      TF_CHECK_OK(
-          GetNodeAttr(orig_node->def(), "data_format", &orig_data_format));
-      TF_CHECK_OK(GetNodeAttr(fwd_node->def(), "T", &ctx_T));
-      TF_CHECK_OK(
-          GetNodeAttr(fwd_node->def(), "data_format", &ctx_data_format));
-
-      if (orig_data_format != ctx_data_format || orig_T != ctx_T ||
-          orig_node->assigned_device_name() !=
-              fwd_node->assigned_device_name() ||
-          orig_node->def().device() != fwd_node->def().device()) {
-        return Status(
-            error::Code::INVALID_ARGUMENT,
-            "data_format or T attribute or devices of BiasAddGrad and "
-            "Conv2D do not match. Will skip node rewrite optimization");
-      }
-    } else if (orig_node->type_string() == csinfo_.bias_add_grad &&
-               ri->new_name == csinfo_.matmul) {
-      // When BiasAddGrad has MatMul in context, we do not do any rewrite
-      // and leave BiasAddGrad as it is. But we check for this condition
-      // when we check for node rewrite rule. So we should not even come
-      // here for MatMul. So we will fail now.
-      return Status(
-          error::Code::INVALID_ARGUMENT,
-          "No rewrite is required for BiasAddGrad for MatMul context.");
-    }
-  }
-
-  // Get all inputs.
-  int num_inputs = orig_node->in_edges().size();
-
-  // Drop count for control edges from inputs
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      num_inputs--;
-    }
-  }
-
-  gtl::InlinedVector<Node*, 4> control_edges;
-  gtl::InlinedVector<std::pair<Node*, int>, 4> inputs(num_inputs);
-  FillInputs(orig_node, &control_edges, &inputs);
-
-  // Build new node. We use same name as original node, but change the op name.
-  NodeBuilder nb(orig_node->name().c_str(), ri->new_name.c_str());
-  // Copy user-specified device assigned to original node to new node.
-  nb.Device(orig_node->def().device());
-  // Set up new inputs to the rewritten node.
-  Status s = SetUpInputs(g, inputs, &nb, orig_node);
-  if (s != Status::OK()) {
-    return s;
-  }
-
-  // Copy attributes from original node to new node (for scenario 1).
-  // For context-based rewrite, we use context to copy the attributes.
-  if (is_context_based_rewrite) {
-    if (orig_node->type_string() == csinfo_.bias_add_grad &&
-        ri->new_name == csinfo_.mkl_conv2d_with_bias_backprop_bias) {
-      CHECK_NOTNULL(fwd_node);
-      ri->copy_attrs(fwd_node, &nb);
-    } else {
-      return Status(error::Code::UNIMPLEMENTED,
-                    "Unimplemented case for node rewrite optimization.");
-    }
-  } else {
-    ri->copy_attrs(const_cast<const Node*>(orig_node), &nb);
-  }
-  // Set the Mkl layer label for this op.
-  nb.Attr("_kernel", mkl_op_registry::kMklOpLabel);
-
-  // Finalize graph and get new node.
-  Node* new_node = nullptr;
-  TF_CHECK_OK(nb.Finalize(&**g, &new_node));
-  CHECK_NOTNULL(new_node);
-
-  // Incoming data edges from 'orig_node' node to new 'new_node' node are
-  // already copied in BuildNode. We need to handle control edges now.
-  for (const Edge* e : orig_node->in_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node));
-    }
-  }
-
-  // Copy outgoing edges from 'orig_node' node to new
-  // 'new_node' node, since the output also follows same ordering among
-  // Tensorflow tensors and Mkl tensors. We need to connect Tensorflow
-  // tensors appropriately. Specifically, nth output of the original node
-  // will become 2*nth output of the Mkl node for the interleaved ordering
-  // of the tensors. For the contiguous ordering of the tensors, it will be n.
-  // GetTensorDataIndex provides this mapping function.
-  for (const Edge* e : orig_node->out_edges()) {
-    if (e->IsControlEdge()) {
-      CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst()));
-    } else {
-      CHECK_NOTNULL((*g)->AddEdge(
-          new_node,
-          GetTensorDataIndex(e->src_output(), e->src()->num_outputs()),
-          e->dst(), e->dst_input()));
-    }
-  }
-
-  // Copy the runtime device assigned from original code to new node.
-  new_node->set_assigned_device_name(orig_node->assigned_device_name());
-
-  // Delete original node and mark new node as rewritten.
-  (*g)->RemoveNode(orig_node);
-
-  VLOG(1) << "MklLayoutRewritePass: New node:" << new_node->DebugString();
-  return Status::OK();
-}
-
-const MklLayoutRewritePass::ContextInfo*
-MklLayoutRewritePass::SearchMatchingContext(const Node* n,
-                                            const Node** fwd_node) {
-  CHECK_NOTNULL(n);
-  CHECK_NOTNULL(fwd_node);
-  *fwd_node = nullptr;
-
-  // Search for matching contextinfo based on node name and call
-  // callback function using matching contextinfo.
-  // There could be more than one matching contextinfos but whichever
-  // matches first is returned.
-  for (auto ci = cinfo_.cbegin(); ci != cinfo_.cend(); ++ci) {
-    if (n->type_string() == (*ci)->node &&
-        (*ci)->context_match_fn(n, fwd_node, *ci)) {
-      VLOG(1) << "Found context as matching: " << (*ci)->fwd;
-      return *ci;
-    }
-  }
-  return nullptr;
-}
-
-bool MklLayoutRewritePass::ContextMatchRewrite(const Node* n,
-                                               const ContextInfo* c) {
-  const Node* fwd_node = nullptr;
-  return SearchMatchingContext(n, &fwd_node) == c;
-}
-
-const MklLayoutRewritePass::RewriteInfo*
-MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
-  CHECK_NOTNULL(n);
-
-  // First check if node along with its type is supported by MKL layer.
-  // We do not want to rewrite an op into Mkl op if types are not supported.
-  // E.g., MklRelu does not support INT32. So we cannot rewrite Relu to
-  // MklRelu if type is INT32.
-  DataType T;
-  if (!GetNodeAttr(n->def(), "T", &T).ok()) {
-    return nullptr;
-  }
-
-  // BiasAddGrad is not an Mkl layer, so we make an exception for it.
-  if (n->type_string() != csinfo_.bias_add_grad) {
-    if (!mkl_op_registry::IsMklOp(
-            mkl_op_registry::GetMklOpName(n->type_string()), T)) {
-      return nullptr;
-    }
-  }
-
-  // For elementwise node, we reuse the Eigen implementation and pass the MKL
-  // metadata tensor through so we can avoid conversions. However, if all
-  // incoming edges are in TF format, we don't need all this overhead, so
-  // replace the elementwise node only if at least one of its parents is a MKL
-  // node.
-  //
-  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
-  // eigen code to reduce cross-library dependency.
-  if (mkl_op_registry::IsMklElementWiseOp(
-          mkl_op_registry::GetMklOpName(n->type_string()), T)) {
-    bool incoming_mkl_edge = false;
-    for (auto parent : n->in_edges()) {
-      if (mkl_op_registry::IsMklOp(
-              mkl_op_registry::GetMklOpName(parent->src()->type_string()), T)) {
-        incoming_mkl_edge = true;
-        break;
-      } else {
-        VLOG(1) << "Non-MKL parent is: " << parent->src()->type_string();
-      }
-    }
-    if (incoming_mkl_edge == false) {
-      VLOG(1) << "Skipping replacement of elementwise node which has no MKL "
-                 "parents.";
-      return nullptr;
-    }
-  }
-
-  // We support 2 types of node rewrites:
-  // 1. Rewriting BiasAddGrad depending on its MklConv2DWithBias context.
-  // 2. Rewriting an op to Mkl op always
-  // We return true if any of these 2 conditions is met.
-
-  // Find matching RewriteInfo and then check that rewrite rule applies.
-  for (auto ri = rinfo_.cbegin(); ri != rinfo_.cend(); ++ri) {
-    if (n->type_string().compare(ri->name) == 0 &&
-        ri->rewrite_rule(n, ri->context)) {
-      // If we are rewriting BiasAddGrad into BiasAddGrad for MatMul context,
-      // then we just return directly.
-      if (n->type_string() == csinfo_.bias_add_grad &&
-          ri->context->fwd == csinfo_.matmul &&
-          ri->new_name == csinfo_.bias_add_grad) {
-        return nullptr;
-      }
-      return &*ri;
-    }
-  }
-
-  // Else return not found.
-  return nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//              Run function for the pass
-///////////////////////////////////////////////////////////////////////////////
-
-bool MklLayoutRewritePass::RunPass(std::unique_ptr<Graph>* g) {
-  bool result = false;
-  CHECK_NOTNULL(g);
-
-  DumpGraph("Before running MklLayoutRewritePass", &**g);
-
-  std::vector<Node*> order;
-  GetReversePostOrder(**g, &order);  // This will give us topological sort.
-
-  for (Node* n : order) {
-    // If node is not an op or it cannot run on CPU device, then skip.
-    if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) {
-      continue;
-    }
-
-    const RewriteInfo* ri = nullptr;
-    Node* predn = nullptr;
-    // We will first search if node is to be rewritten
-    if ((ri = CheckForNodeRewrite(n)) != nullptr) {
-      string node_name = n->name();
-      string op_name = n->type_string();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled node " << node_name
-              << " with op " << op_name << " for rewrite using"
-              << " layout optimization.";
-
-      if (RewriteNode(g, n, ri) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: rewrote node " << node_name
-                << " with op " << op_name << " for Mkl layout optimization.";
-        result = true;
-      }
-    } else if ((predn = CheckForNodeMerge(n)) != nullptr) {
-      // Otherwise, we will check if the node is to be merged.
-      string n1_name = n->name();
-      string n2_name = predn->name();
-
-      VLOG(1) << "MklLayoutRewritePass: Scheduled nodes " << n1_name << " and "
-              << n2_name << " for merging";
-
-      if (MergeNode(g, n, predn) == Status::OK()) {
-        VLOG(1) << "MklLayoutRewritePass: Merged nodes " << n1_name << " and "
-                << n2_name;
-        result = true;
-      }
-    }
-  }
-
-  DumpGraph("After running MklLayoutRewritePass", &**g);
-
-  return result;
-}
-
-bool RunMklLayoutRewritePass(std::unique_ptr<Graph>* g) {
-  return MklLayoutRewritePass().RunPass(g);
-}
-
-Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
-  if (options.graph == nullptr && options.partition_graphs == nullptr) {
-    return Status::OK();
-  }
-
-  auto process_graph = [&](std::unique_ptr<Graph>* g) {
-    // Get the ownership of a graph
-    std::unique_ptr<Graph>* ng = std::move(g);
-    RunPass(ng);
-    // Return the ownership of a graph back
-    g->reset(ng->release());
-  };
-
-  if (kMklLayoutRewritePassGroup !=
-      OptimizationPassRegistry::POST_PARTITIONING) {
-    // For any pre-partitioning phase, a graph is stored in options.graph.
-    process_graph(options.graph);
-  } else {
-    // For post partitioning phase, graphs are stored in
-    // options.partition_graphs.
-    for (auto& pg : *options.partition_graphs) {
-      process_graph(&pg.second);
-    }
-  }
-
-  return Status::OK();
-}
-
-#else   // INTEL_MKL_ML_ONLY
-
 // This pass implements rewriting of graph to support following scenarios:
 // (A) Merging nodes in the graph
 // (B) Rewriting a node in the graph to a new node
@@ -4539,7 +2364,7 @@ Status MklLayoutRewritePass::Run(const GraphOptimizationPassOptions& options) {
 
   return Status::OK();
 }
-#endif  // INTEL_MKL_ML_ONLY
+
 }  // namespace tensorflow
 
 #endif
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 77640e287c..0eda8170f8 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -37,1869 +37,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifdef INTEL_MKL_ML_ONLY
-
-namespace {
-
-const char kCPUDevice[] = "/job:a/replica:0/task:0/device:CPU:0";
-const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
-
-static void InitGraph(const string& s, Graph* graph,
-                      const string& device = kCPUDevice) {
-  GraphDef graph_def;
-
-  auto parser = protobuf::TextFormat::Parser();
-  //  parser.AllowRelaxedWhitespace(true);
-  CHECK(parser.MergeFromString(s, &graph_def)) << s;
-  GraphConstructorOptions opts;
-  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, graph));
-
-  for (Node* node : graph->nodes()) {
-    node->set_assigned_device_name(device);
-  }
-}
-
-class MklLayoutPassTest : public ::testing::Test {
- public:
-  MklLayoutPassTest() : graph_(OpRegistry::Global()) {}
-
-  void InitGraph(const string& s, const string& device = kCPUDevice) {
-    ::tensorflow::InitGraph(s, &graph_, device);
-    original_ = CanonicalGraphString(&graph_);
-  }
-
-  static bool IncludeNode(const Node* n) { return n->IsOp(); }
-
-  static string EdgeId(const Node* n, int index) {
-    if (index == 0) {
-      return n->name();
-    } else if (index == Graph::kControlSlot) {
-      return strings::StrCat(n->name(), ":control");
-    } else {
-      return strings::StrCat(n->name(), ":", index);
-    }
-  }
-
-  string CanonicalGraphString(Graph* g) {
-    std::vector<string> nodes;
-    std::vector<string> edges;
-    for (const Node* n : g->nodes()) {
-      if (IncludeNode(n)) {
-        nodes.push_back(strings::StrCat(n->name(), "(", n->type_string(), ")"));
-      }
-    }
-    for (const Edge* e : g->edges()) {
-      if (IncludeNode(e->src()) && IncludeNode(e->dst())) {
-        edges.push_back(strings::StrCat(EdgeId(e->src(), e->src_output()), "->",
-                                        EdgeId(e->dst(), e->dst_input())));
-      }
-    }
-    // Canonicalize
-    std::sort(nodes.begin(), nodes.end());
-    std::sort(edges.begin(), edges.end());
-    return strings::StrCat(str_util::Join(nodes, ";"), "|",
-                           str_util::Join(edges, ";"));
-  }
-
-  string DoMklLayoutOptimizationPass() {
-    string before = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "Before MKL layout rewrite pass: " << before;
-
-    std::unique_ptr<Graph>* ug = new std::unique_ptr<Graph>(&graph_);
-    RunMklLayoutRewritePass(ug);
-
-    string result = CanonicalGraphString(&graph_);
-    LOG(ERROR) << "After MKL layout rewrite pass:  " << result;
-    return result;
-  }
-
-  const string& OriginalGraph() const { return original_; }
-
-  Graph graph_;
-  string original_;
-};
-
-REGISTER_OP("Input").Output("o: float").SetIsStateful();
-REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
-REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
-REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
-REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
-REGISTER_OP("_MklInput2")
-    .Output("o: uint8")
-    .Output("o1: uint8")
-    .SetIsStateful();
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to node merge optiimization
-/////////////////////////////////////////////////////////////////////
-
-TEST_F(MklLayoutPassTest, Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Zeta);D(Zeta)|"
-            "A->C;A->D;B->C:1;B->D:1");
-}
-
-// Test set 1: Conv2D + AddBias
-
-// C=_MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved ordering)
-// C=_MklConv2D(A,B,M,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous ordering)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->E;"
-            "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;M->E:3;"
-            "N->E:4;Y->Z:1");
-}
-
-// C=_MklConv2D(A,M:1,B,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved)
-// C=_MklConv2D(A,B,M:1,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous)
-// Test for correct output slots selected
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive1) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput2'}"
-      "node { name: 'N' op: '_MklInput2'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M:1', 'N:1']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput2);N(_MklInput2);Y(Input);Z(Zeta)|A->E;"
-            "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;"
-            "M:1->E:3;N:1->E:4;Y->Z:1");
-}
-
-// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y);
-// This is a case of node rewrite followed by node merge.
-// We will first rewrite Conv2D to _MklConv2D, and then merge _MklConv2D
-// with BiasAdd to produce _MklConv2DWithBias.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive2) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|"
-            "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
-            "DMT/_2->E:5;E->Z;Y->Z:1");
-}
-
-// Graph contains only _MklConv2D, no AddBias.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_NoAddBias) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);M(_MklInput);N(_MklInput)|"
-            "A->C;B->C:1;M->C:2;N->C:3");
-}
-
-// _MklConv2D output does not go to BiasAdd.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }");  // Output of _MklConv2D does not go to BiasAdd.
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);"
-            "M(_MklInput);N(_MklInput)|A->C;B->C:1;D->F;E->F:1;M->C:2;N->C:3");
-}
-
-// _MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
-// Merge should not be done in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D', 'E'] }"  // Conv2D has two outputs.
-                              // No merge should happen.
-      "node { name: 'G' op: 'Zeta'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);"
-            "G(Zeta);M(_MklInput);N(_MklInput)|A->C;B->C:1;C->G;D->F;"
-            "E->F:1;E->G:1;M->C:2;N->C:3");
-}
-
-// data_format attribute value mismatch. Merge should not be done
-// in such case.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_AttrMismatch) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NHCW' } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);M(_MklInput);"
-            "N(_MklInput)|A->C;B->C:1;C->E;D->E:1;M->C:2;N->C:3");
-}
-
-// Test set 2: _MklConv2D..BiasAddGrad -> _MklConv2DWithBiasBackpropBias
-// rewrite tests
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(_MklConv2DWithBiasBackpropBias);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G;B->D:1;"
-            "B->I:1;C->D:2;D->E;DMT/_0->J:1;E->G:2;E->I:2;E->J;"
-            "E:control->DMT/_0:control;F->G:1;H->I;M->D:3;M->G:3;M->I:3;"
-            "N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput. But nodes do not match criteria for rewrite. So
-// rewrite should not happen.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
-            "B->I:1;C->D:2;D->E;E->G;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
-            "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter
-// and BackpropInput. But nodes do not match criteria for rewrite. So
-// rewrite should not happen.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'Int32Input'}"
-      "node { name: 'I' op: '_MklConv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['H', 'B', 'E', 'M', 'N', 'O']}"
-      "node { name: 'J' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
-            "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
-            "B->I:1;C->D:2;D->E;E->G:2;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
-            "M->I:3;N->D:4;N->G:4;N->I:4;O->D:5;O->G:5;O->I:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);"
-            "H(_MklConv2DWithBiasBackpropBias);M(_MklInput);N(_MklInput);"
-            "O(_MklInput)|A->D;A->E:1;A->G;B->D:1;C->D:2;D->E;DMT/_0->H:1;"
-            "E->G:2;E->H;E:control->DMT/_0:control;F->G:1;M->D:3;M->G:3;"
-            "N->D:4;N->G:4;O->D:5;O->G:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-// But BackpropFilter node inputs do not satisfy criteria for rewrite.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['E', 'F', 'A', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
-            "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
-            "O->G:5");
-}
-
-// BiasAddGrad rewrite to BackpropBias in the presence of BackpropFilter only
-// But BackpropFilter node inputs do not satisfy criteria for rewrite.
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'Int32Input'}"
-      "node { name: 'G' op: '_MklConv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'F', 'E', 'M', 'N', 'O'] }"
-      "node { name: 'H' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
-            "C->D:2;D->E;E->G:2;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
-            "O->G:5");
-}
-
-// No _MklConv2DWithBias in context, but _MklConv2D in context.
-// No rewrite for BiasAddGrad should happen.
-// C=_MklConv2D(A,M,B,N); D=Zeta(C,A); E=BiasAddGrad(D) (for interleaved)
-// C=_MklConv2D(A,B,M,N); D=Zeta(C,A); E=BiasAddGrad(D) (for contiguous)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
-            "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
-            "M->C:2;N->C:3");
-}
-
-// No Conv2D in the context for BiasAddGrad. No rewrite should happen.
-// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Polygamma'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// No Conv2D in the context for BiasAddGrad, but MatMul in context.
-// Rewrite should happen, but name of BiasAddGrad does not change.
-// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'MatMul'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'transpose_a'      value { b: false } }"
-      " attr { key: 'transpose_b'      value { b: false } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests
-// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'MatMul'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'transpose_a'      value { b: false } }"
-      " attr { key: 'transpose_b'      value { b: false } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-// No MatMul in the context for BiasAddGrad. No rewrite should happen.
-// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
-TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Negative_NoMatMul) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Polygamma'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['C', 'A']}"
-      "node { name: 'E' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
-            "A->C;A->D:1;B->C:1;C->D;D->E");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node to Mkl node
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op; No Mkl layer on the input and on the output.
-// We will generate dummy Mkl tensor as 2nd input of Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-// 2 Conv2D Ops in sequence. Both should get transformed and 1st Conv2D will
-// have 2 outputs, both of which will be inputs to next Conv2D.
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
-            "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
-}
-
-// Conv2D with INT32 which is not supported by Mkl
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
-  InitGraph(
-      "node { name: 'A' op: 'HalfInput'}"
-      "node { name: 'B' op: 'HalfInput'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_HALF } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
-            "A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropInput'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['B', 'A', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
-            "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
-            "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
-            "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
-      "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
-      "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
-      "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// Concat with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;"
-            "G:control->DMT/_4:control;H->I:1");
-}
-
-// Concat with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;"
-            "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
-}
-
-// ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
-            "B:control->DMT/_0:control;B:control->DMT/_1:control;"
-            "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:4;DMT/_2->D:5");
-}
-
-// ConcatV2 with 2 Mkl layers feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
-            "C:control->DMT/_0:control;C:control->DMT/_1:control;"
-            "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
-            "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;"
-            "F:2->H:4;G->H:2;H->I:1");
-}
-
-// ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D']}"
-      "node { name: 'G' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'H' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'H'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
-            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
-            "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;"
-            "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
-            "G->H:2;H->I:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
-            "DMT/_1->C:2");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
-            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Int32Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
-            "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
-            "DMT/_1->C:3");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'I' op: 'Int32Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'AvgPoolGrad' "
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['I', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
-            "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
-            "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
-            "I:control->DMT/_1:control");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNormGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
-            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
-            "DMT/_0->F:5;DMT/_1->F:6;DMT/_2->F:7;DMT/_3->F:8;DMT/_4->F:9;"
-            "E->F:4;F->G:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-//  Unit tests related to rewriting node for workspace edges
-/////////////////////////////////////////////////////////////////////
-
-/* Test LRN->MaxPool->MaxPoolGrad->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B'] }"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['B', 'C', 'D'] }"
-      "node { name: 'F' op: 'Input'}"
-      "node { name: 'G' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['E', 'F', 'B'] }"
-      "node { name: 'H' op: 'Input'}"
-      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['H', 'G'] }");
-  EXPECT_EQ(
-      DoMklLayoutOptimizationPass(),
-      "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
-      "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
-      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
-      "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
-      "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
-      "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
-}
-
-/* Test LRN->LRNGrad replacement by workspace nodes. */
-TEST_F(MklLayoutPassTest, LRN_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-/* Test LRN->LRNGrad replacement when only one of them is present. */
-TEST_F(MklLayoutPassTest, LRN_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-/* Test LRN->LRNGrad negative case, where single LRN feeds
-   2 LRNGrad nodes at different slots. */
-TEST_F(MklLayoutPassTest, LRN_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'LRN'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'LRNGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'alpha'        value { f: 0.001 } }"
-      " attr { key: 'beta'         value { f: 0.75 } }"
-      " attr { key: 'bias'         value { f: 1.0 } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'depth_radius' value { i: 2 } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['E', 'F'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
-            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
-            "A:control->DMT/_0:control;B->E:2;"
-            "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
-            "C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "C:control->DMT/_3:control;C:control->DMT/_4:control;"
-            "C:control->DMT/_5:control;C:control->DMT/_6:control;"
-            "D->E:1;D->F:2;DMT/_0->B:1;DMT/_1->F:3;DMT/_2->F:7;DMT/_3->F:4;"
-            "DMT/_4->F:6;DMT/_5->E:4;DMT/_6->E:5;E->G;F->G:1");
-}
-
-/* Test MaxPool->MaxPoolGrad replacement by workspace+rewrite nodes. */
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['C', 'B', 'D'] }"
-      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'E'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
-            "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
-            "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
-            "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
-}
-
-// Test MaxPool>MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPool node but workspace edges will not
-// be present.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
-            "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
-}
-
-// Test MaxPoolGrad replacement when only one of them is present.
-// In this case, we will rewrite MaxPoolGrad and for workspace tensor and
-// its Mkl part, we will generate dummy tensor.
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'MaxPoolGrad'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:3, i:3} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
-      " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
-            "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
-            "A:control->DMT/_2:control;A:control->DMT/_3:control;"
-            "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
-            "DMT/_1->D:7;DMT/_2->D:4;DMT/_3->D:5;DMT/_4->D:6");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:2, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NCHW)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 2, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for batch-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:2} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Test MaxPool handling for depth-wise pooling (NHWC)
-// No rewrite should take place in such case
-TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }");
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-// Single Conv2D Op on GPU device
-// No rewrite should happen
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Conv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['B', 'C'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'O' op: '_MklInput'}"
-      "node { name: 'D' op: '_MklConv2DWithBias'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['D', 'A']}"
-      "node { name: 'F' op: 'BiasAddGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['E'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
-            "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
-            "M->D:3;N->D:4;O->D:5");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Int32Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Conv2DBackpropFilter'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
-            "A->D;A->E;B->D:1;C->D:2;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Relu'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'ReluGrad'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'C'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'MaxPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'AvgPool'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NHWC' } }"
-      " attr { key: 'ksize'        value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'      value { s: 'VALID' } }"
-      " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
-      " input: ['A'] }"
-      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'B'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
-}
-
-// Concat Op test: Concat with no Mkl layer feeding it
-TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Concat'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
-            "B->D:1;B:1->D:2;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Const' "
-      " attr { key: 'dtype' value { type: DT_INT32 } }"
-      " attr { key: 'value' value { "
-      "    tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } "
-      "    int_val: 0 } } } }"
-      "node { name: 'B' op: 'InputList'"
-      " attr { key: 'N'                value { i: 2 } }}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'ConcatV2'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'Tidx'             value { type: DT_INT32 } }"
-      " attr { key: 'N'                value { i: 2 } }"
-      " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['C', 'D'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
-            "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Input'}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'Input'}"
-      "node { name: 'F' op: 'FusedBatchNorm'"
-      " attr { key: 'T'            value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'  value { s: 'NCHW' } }"
-      " attr { key: 'epsilon'      value { f: 0.0001 } }"
-      " attr { key: 'is_training'  value { b: true } }"
-      " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
-      " input: ['A', 'F'] }",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Input);D(Input);E(Input);"
-            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
-            "E->F:4;F->G:1");
-}
-
-TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
-  CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
-  InitGraph(
-      "node { name: 'A' op: 'Input'}"
-      "node { name: 'B' op: 'Input'}"
-      "node { name: 'M' op: '_MklInput'}"
-      "node { name: 'N' op: '_MklInput'}"
-      "node { name: 'C' op: '_MklConv2D'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " attr { key: 'use_cudnn_on_gpu' value { b: false } }"
-      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
-      " attr { key: 'padding'          value { s: 'SAME' } }"
-      " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Input'}"
-      "node { name: 'E' op: 'BiasAdd'"
-      " attr { key: 'T'                value { type: DT_FLOAT } }"
-      " attr { key: 'data_format'      value { s: 'NCHW' } }"
-      " input: ['C', 'D'] }"
-      "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Zeta'"
-      " attr {key: 'T'                 value { type: DT_FLOAT } }"
-      " input: ['E', 'Y']}",
-      kGPUDevice);
-  EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
-            "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
-}
-
-/////////////////////////////////////////////////////////////////////
-
-static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
-  testing::StopTiming();
-  string s;
-  for (int in = 0; in < 10; in++) {
-    s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in);
-  }
-  random::PhiloxRandom philox(301, 17);
-  random::SimplePhilox rnd(&philox);
-  for (int op = 0; op < op_nodes; op++) {
-    s += strings::Printf(
-        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
-        "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
-        op, rnd.Uniform(10), rnd.Uniform(10));
-  }
-
-  bool first = true;
-  while (iters > 0) {
-    Graph* graph = new Graph(OpRegistry::Global());
-    InitGraph(s, graph);
-    int N = graph->num_node_ids();
-    if (first) {
-      testing::SetLabel(strings::StrCat("Per graph node.  Nodes: ", N));
-      first = false;
-    }
-    {
-      testing::StartTiming();
-      std::unique_ptr<Graph> ug(graph);
-      RunMklLayoutRewritePass(&ug);
-      testing::StopTiming();
-    }
-    iters -= N;  // Our benchmark units are individual graph nodes,
-                 // not whole graphs
-    // delete graph;
-  }
-}
-BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
-
-}  // namespace
-
-#else  // INTEL_MKL_ML_ONLY
-
 // NOTE: Unit tests in this file rely on a topological sorted graph for
 // printing. But since sibling nodes of a node in the topologically sorted graph
 // can be printed in different orders, tests may fail if the order in which
@@ -3602,8 +1739,6 @@ BENCHMARK(BM_MklLayoutRewritePass)->Arg(1000)->Arg(10000);
 
 }  // namespace
 
-#endif  // INTEL_MKL_ML_ONLY
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL && ENABLE_MKL
-- 
GitLab


From 470101040d2174ddcb41990e5e16ed6dfa6f6436 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 13:33:43 -0700
Subject: [PATCH 0455/1085] Remove commented out code errantly checked in.

PiperOrigin-RevId: 215957544
---
 third_party/jpeg/workspace.bzl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
index 4b517240ec..2bb7dacd32 100644
--- a/third_party/jpeg/workspace.bzl
+++ b/third_party/jpeg/workspace.bzl
@@ -12,6 +12,5 @@ def repo():
         sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
         strip_prefix = "libjpeg-turbo-2.0.0",
         build_file = "//third_party/jpeg:BUILD.bazel",
-        # build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"),
         system_build_file = "//third_party/jpeg:BUILD.system",
     )
-- 
GitLab


From ae0bc6f006497cc04a2ee75166d4ec71c7154fd8 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 5 Oct 2018 13:34:01 -0700
Subject: [PATCH 0456/1085] [tf.data] Adding specialization for `MapDataset`,
 `ParallelMapDataset`, and `MapAndBatchDataset` whose user-provided functions
 have the property that each output argument take its value directly from an
 input argument (e.g. `lambda x, y: y, x`). This specialization can produce
 the result without having to schedule the function using the executor.

PiperOrigin-RevId: 215957592
---
 tensorflow/core/kernels/data/BUILD            |  14 ++
 tensorflow/core/kernels/data/dataset_utils.cc |  47 +++++
 tensorflow/core/kernels/data/dataset_utils.h  |  20 ++
 .../core/kernels/data/dataset_utils_test.cc   |  46 +++++
 .../core/kernels/data/filter_dataset_op.cc    | 162 +++++++---------
 .../kernels/data/map_and_batch_dataset_op.cc  | 180 +++++++++++-------
 .../core/kernels/data/map_dataset_op.cc       |  56 ++++--
 .../kernels/data/parallel_map_dataset_op.cc   |  73 ++++---
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  20 ++
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  80 ++++++--
 .../python/data/kernel_tests/test_base.py     |  29 +++
 15 files changed, 520 insertions(+), 230 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 451f8c1a6c..37c1c54786 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,6 +45,16 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "dataset_utils_test",
+    srcs = ["dataset_utils_test.cc"],
+    deps = [
+        ":dataset_utils",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -205,6 +215,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -232,6 +243,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -245,6 +257,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -285,6 +298,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e10833f525..a40f7f2146 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,10 +15,57 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices) {
+  FunctionLibraryRuntime::Handle fn_handle;
+  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
+      func.name(), AttrSlice(&func.attr()), &fn_handle));
+  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
+    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
+    if (!s.ok()) {
+      LOG(WARNING) << "Failed to release handle: " << s.error_message();
+    }
+  });
+
+  const FunctionBody* fn_body =
+      ctx->function_library()->GetFunctionBody(fn_handle);
+  indices->resize(fn_body->ret_nodes.size());
+  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
+    Node* ret_node = fn_body->ret_nodes[i];
+    Node* ret_input_node;
+    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
+    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
+    } else {
+      indices->clear();
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
+  std::map<int, int> last_use;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    last_use[indices[i]] = i;
+  }
+  std::vector<bool> can_move;
+  can_move.resize(indices.size());
+  for (size_t i = 0; i < indices.size(); ++i) {
+    can_move[i] = last_use[indices[i]] == i;
+  }
+  return can_move;
+}
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 6ec1350cd4..d777062293 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,6 +22,26 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+// This method is used to determine whether we can short-circuit the evaluation
+// of the user-defined function `func`. Short-circuting is possible if every
+// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
+// (y,x)`, or `f(x) = (x,x)`).
+//
+// If short-circuiting is possible, the method stores the mapping from output
+// indices to input indices in `indices`. Otherwise, `indices` will be empty.
+//
+// Returns non-ok status if analysis of the function fails.
+//
+// TODO(jsimsa): Extend this to support constants as well.
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices);
+
+// Given a vector that maps output indices to input indices, return a vector
+// that identifies for which output indices can we move the input (assuming
+// output indices are processed left to right).
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
new file mode 100644
index 0000000000..43295b8ebb
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+TEST(DatasetUtils, ComputeMoveVector) {
+  struct TestCase {
+    std::vector<int> indices;
+    std::vector<bool> expected;
+  };
+
+  TestCase test_cases[] = {
+      TestCase{{}, {}},
+      TestCase{{1}, {true}},
+      TestCase{{1, 1}, {false, true}},
+      TestCase{{1, 2}, {true, true}},
+      TestCase{{1, 1, 2}, {false, true, true}},
+      TestCase{{1, 2, 2}, {true, false, true}},
+  };
+
+  for (auto& test_case : test_cases) {
+    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
+  }
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 00884314a9..be7d182a1f 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -31,67 +33,84 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using FilterIteratorPredicate =
+      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
+
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    FunctionLibraryRuntime::Handle pred_handle;
-    OP_REQUIRES_OK(ctx,
-                   ctx->function_library()->Instantiate(
-                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
-    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
-      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
-    });
-
-    const FunctionBody* pred_body =
-        ctx->function_library()->GetFunctionBody(pred_handle);
-    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
-                errors::InvalidArgument(
-                    "predicate function must have a single return value."));
-    Node* ret_node = pred_body->ret_nodes[0];
-    Node* ret_input_node;
-    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
-
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    if (ret_input_node->def().op() == "_Arg") {
-      int32 index = -1;
-      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
-      *output = new FilterTensorDataset(ctx, input, func_,
-                                        std::move(captured_func), index);
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+    OP_REQUIRES(ctx, indices.size() <= 1,
+                errors::InvalidArgument(
+                    "predicate function has more than one return value."));
+
+    FilterIteratorPredicate filter_pred;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      filter_pred = [raw_captured_func](IteratorContext* ctx,
+                                        const std::vector<Tensor>& args,
+                                        bool* out_matched) {
+        std::vector<Tensor> result;
+        TF_RETURN_IF_ERROR(
+            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
+
+        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+            result[0].NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = result[0].scalar<bool>()();
+        return Status::OK();
+      };
     } else {
-      *output = new FilterFunctionDataset(ctx, input, func_,
-                                          std::move(captured_func));
+      filter_pred = [indices](IteratorContext* ctx,
+                              const std::vector<Tensor>& args,
+                              bool* out_matched) {
+        const Tensor& predicate = args[indices[0]];
+        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = predicate.scalar<bool>()();
+        return Status::OK();
+      };
     }
+
+    *output = new Dataset(ctx, input, func_, std::move(captured_func),
+                          std::move(filter_pred));
   }
 
  private:
-  const int graph_def_version_;
-
-  class FilterDatasetBase : public DatasetBase {
+  class Dataset : public DatasetBase {
    public:
-    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                      const NameAttrList& func,
-                      std::unique_ptr<CapturedFunction> captured_func)
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func,
+            FilterIteratorPredicate filter_pred)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          filter_pred_(std::move(filter_pred)) {
       input_->Ref();
     }
 
-    ~FilterDatasetBase() override { input_->Unref(); }
+    ~Dataset() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
+          filter_pred_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -133,17 +152,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(IteratorContext* ctx,
-                                     const std::vector<Tensor>& element,
-                                     bool* out_matched) const = 0;
-
    private:
-    class Iterator : public DatasetIterator<FilterDatasetBase> {
+    class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<FilterDatasetBase>(params),
+      explicit Iterator(const Params& params,
+                        FilterIteratorPredicate filter_pred)
+          : DatasetIterator<Dataset>(params),
             filtered_elements_(0),
-            dropped_elements_(0) {
+            dropped_elements_(0),
+            filter_pred_(std::move(filter_pred)) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -180,8 +197,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(
-              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -251,64 +267,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
+      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
-
-   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-  };
-
-  class FilterFunctionDataset : public FilterDatasetBase {
-   public:
-    using FilterDatasetBase::FilterDatasetBase;
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-      // stack-rip the iterators and use async kernels.
-      std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(
-          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
-
-      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-          result[0].NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = result[0].scalar<bool>()();
-      return Status::OK();
-    }
-  };
-
-  class FilterTensorDataset : public FilterDatasetBase {
-   public:
-    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
-                        const NameAttrList& func,
-                        std::unique_ptr<CapturedFunction> captured_func,
-                        int32 index)
-        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
-          index_(index) {}
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      const Tensor& predicate = element[index_];
-      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = predicate.scalar<bool>()();
-      return Status::OK();
-    }
-
-   private:
-    const int32 index_;
+    const FilterIteratorPredicate filter_pred_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..f9aaa3080e 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -29,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -41,6 +43,10 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapAndBatchIteratorFunction =
+      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
+
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -91,31 +97,66 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_, func_,
-                          std::move(captured_func), &ctx->eigen_cpu_device());
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapAndBatchIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
+                                    std::move(done), prefix);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
+    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_,
+                          std::move(captured_func), &ctx->eigen_cpu_device(),
+                          std::move(map_func));
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device)
+            const Eigen::ThreadPoolDevice* device,
+            MapAndBatchIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
-          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device) {
+          device_(device),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -123,8 +164,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
+          map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -143,7 +185,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -165,7 +207,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(map_fn_, &f);
+      b->BuildAttrValue(func_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -185,12 +227,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
+      explicit Iterator(const Params& params,
+                        MapAndBatchIteratorFunction map_func)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
+            map_func_(std::move(map_func)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -297,44 +341,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
-      void Callback(const std::shared_ptr<IteratorContext>& ctx,
-                    const std::shared_ptr<BatchResult>& result,
-                    const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
-        result->UpdateStatus(status);
-        if (status.ok()) {
-          EnsureOutputAllocated(ctx, result, return_values);
-          for (size_t i = 0; i < return_values->size(); ++i) {
-            const Tensor& tensor = return_values->at(i);
-            Tensor* batch = &(result->output)[i];
-            if (tensor.NumElements() !=
-                (batch->NumElements() / batch->dim_size(0))) {
-              TensorShape batch_shape = batch->shape();
-              batch_shape.RemoveDim(0);
-              result->UpdateStatus(errors::InvalidArgument(
-                  "Cannot add tensor to the batch: number of elements does not "
-                  "match. Shapes are: [tensor]: ",
-                  tensor.shape().DebugString(),
-                  ", [batch]: ", batch_shape.DebugString()));
-              break;
-            }
-            // TODO(mrry): Add a version of DoParallelConcat that allows us to
-            // move `tensor` where possible, to speed up string tensor batching.
-            Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                *dataset()->device_, tensor, offset, batch);
-            if (!copy_status.ok()) {
-              result->UpdateStatus(copy_status);
-              break;
-            }
-          }
-          {
-            mutex_lock l(result->mu);
-            result->num_elements++;
-          }
-        }
-        CallCompleted(result);
-      }
-
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -363,21 +369,48 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        // Call `captured_func_(input_element)`, using `Callback` to store the
-        // result in `result`.
-        (*ctx->runner())(std::bind(
-            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
-                                   std::vector<Tensor> input_element) {
-              std::shared_ptr<std::vector<Tensor>> return_values(
-                  new std::vector<Tensor>());
-              dataset()->captured_func_->RunAsync(
-                  ctx.get(), std::move(input_element), return_values.get(),
-                  [this, ctx, result, return_values, offset](Status status) {
-                    Callback(ctx, result, return_values, offset, status);
-                  },
-                  prefix());
-            },
-            ctx, std::move(input_element)));
+        std::shared_ptr<std::vector<Tensor>> return_values =
+            std::make_shared<std::vector<Tensor>>();
+        auto done = [this, ctx, result, return_values, offset](Status status) {
+          result->UpdateStatus(status);
+          if (status.ok()) {
+            EnsureOutputAllocated(ctx, result, return_values);
+            for (size_t i = 0; i < return_values->size(); ++i) {
+              const Tensor& tensor = return_values->at(i);
+              Tensor* batch = &(result->output)[i];
+              if (tensor.NumElements() !=
+                  (batch->NumElements() / batch->dim_size(0))) {
+                TensorShape batch_shape = batch->shape();
+                batch_shape.RemoveDim(0);
+                result->UpdateStatus(errors::InvalidArgument(
+                    "Cannot add tensor to the batch: number of elements does "
+                    "not match. Shapes are: [tensor]: ",
+                    tensor.shape().DebugString(),
+                    ", [batch]: ", batch_shape.DebugString()));
+                break;
+              }
+              // TODO(mrry): Add a version of DoParallelConcat that allows us to
+              // move `tensor` where possible, to speed up string tensor
+              // batching.
+              Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                  *dataset()->device_, tensor, offset, batch);
+              if (!copy_status.ok()) {
+                result->UpdateStatus(copy_status);
+                break;
+              }
+            }
+            {
+              mutex_lock l(result->mu);
+              result->num_elements++;
+            }
+          }
+          CallCompleted(result);
+        };
+
+        // Apply the map function on `input_element`, storing the result in
+        // `return_values`, and invoking `done` when finished.
+        map_func_(ctx.get(), prefix(), std::move(input_element),
+                  std::move(return_values), std::move(done));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -404,7 +437,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -509,8 +542,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.emplace_back(
-                    new BatchResult(dataset()->batch_size_));
+                batch_results_.push_back(
+                    std::make_shared<BatchResult>(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -527,7 +560,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
+        batch_results_.push_back(
+            std::make_shared<BatchResult>(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -653,6 +687,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
+      const MapAndBatchIteratorFunction map_func_;
+
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -671,9 +707,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
+    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..0abb2eb4f3 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,7 +17,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -28,6 +30,9 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapIteratorFunction = std::function<Status(
+      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
+
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -43,8 +48,36 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        std::map<int, int> counts;
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        return Status::OK();
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_);
+                          output_types_, output_shapes_, std::move(map_func));
   }
 
  private:
@@ -54,13 +87,15 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
+            const std::vector<PartialTensorShape>& output_shapes,
+            MapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes) {
+          output_shapes_(output_shapes),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -68,8 +103,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Map")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -116,8 +151,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params) {}
+      explicit Iterator(const Params& params, MapIteratorFunction map_func)
+          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -139,10 +174,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-        // stack-rip the iterators and use async kernels.
-        Status s =
-            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
+        Status s = map_func_(ctx, args, out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -167,6 +199,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
+      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -174,6 +207,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..a34bb172d4 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -56,9 +57,49 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    ParallelMapIteratorFunction map_func;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
+                                    std::move(done), prefix);
+      };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](IteratorContext* ctx, const string& prefix,
+                              std::vector<Tensor> args,
+                              std::vector<Tensor>* out_tensors,
+                              StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
+                                     out_tensors, std::move(done)));
+        };
+      }
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [indices, can_move](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        std::map<int, int> counts;
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (can_move[i]) {
+            out_tensors->push_back(std::move(args[indices[i]]));
+          } else {
+            out_tensors->push_back(args[indices[i]]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func));
+                          std::move(captured_func), std::move(map_func));
   }
 
  private:
@@ -69,7 +110,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func)
+            std::unique_ptr<CapturedFunction> captured_func,
+            ParallelMapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -77,7 +119,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -89,26 +132,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      ParallelMapIteratorFunction map_func =
-          [this, new_prefix](IteratorContext* ctx,
-                             std::vector<Tensor> input_element,
-                             std::vector<Tensor>* result, StatusCallback done) {
-            captured_func_->RunAsync(ctx, std::move(input_element), result,
-                                     std::move(done), new_prefix);
-          };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](
-                       IteratorContext* ctx, std::vector<Tensor> input_element,
-                       std::vector<Tensor>* result, StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        };
-      }
-
-      return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(init_func), std::move(map_func),
-                                    num_parallel_calls_);
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
+          std::move(init_func), map_func_, num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -176,6 +202,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
+    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -179,7 +180,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -208,15 +209,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
-    // Call `func_(input_element)`, store the result in `result->return_values`,
-    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    map_func_(ctx.get(), std::move(input_element), &result->return_values,
-              std::move(done));
+    // Apply the map function on `input_element`, storing the result in
+    // `result->return_values`, and invoking `done` when finished.
+    map_func_(ctx.get(), prefix(), std::move(input_element),
+              &result->return_values, std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -349,9 +350,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(
-      new ParallelMapIterator(params, input_dataset, std::move(init_func),
-                              std::move(map_func), num_parallel_calls));
+  return MakeUnique<ParallelMapIterator>(
+      params, input_dataset, std::move(init_func), std::move(map_func),
+      num_parallel_calls);
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index dc26c5cf25..813f13c9e4 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, std::vector<Tensor>,
+    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 1d1a717062..7de5ea8860 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx,
+      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index afd0fc3abf..0703955fd4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,6 +332,26 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Identity", None, lambda x: x, None),
+      ("Replicate", None, lambda x: (x, x), None),
+      ("Swap", (None, None), lambda x, y: (y, x), None),
+      ("Project", (None, None), lambda x, y: x, None),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(
+            *sess.run(self.structuredElement(structure, shape=[10])))
+      else:
+        expected = map_fn(
+            sess.run(self.structuredElement(structure, shape=[10])))
+      self.assertAllEqual(expected, sess.run(get_next))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index 6b7afafa5d..a0c6b37a6d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testReturnComponent(self):
+  def testShortCircuit(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 0c372ebb10..6efbe31ca1 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,19 +783,57 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
+  @parameterized.named_parameters(
+      ("SequentialIdentity", None, lambda x: x, None),
+      ("SequentialReplicate", None, lambda x: (x, x), None),
+      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
+      ("SequentialProject", (None, None), lambda x, y: x, None),
+      ("ParallelIdentity", None, lambda x: x, 10),
+      ("ParallelReplicate", None, lambda x: (x, x), 10),
+      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
+      ("ParallelProject", (None, None), lambda x, y: x, 10),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().map(
+        map_fn, num_parallel_calls=num_parallel_calls)
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(*sess.run(self.structuredElement(structure)))
+      else:
+        expected = map_fn(sess.run(self.structuredElement(structure)))
+      self.assertEqual(expected, sess.run(get_next))
+
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda x: x
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                lambda x: x,
+                map_fn,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -813,25 +851,39 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", chain_length, median_wall_time))
+                  (print_label, chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                (chain_length, benchmark_label))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda *xs: xs
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              lambda *xs: xs,
+              map_fn,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -849,14 +901,12 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", fan_out, median_wall_time))
+                  (print_label, fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" %
-                (fan_out, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
+                                                             benchmark_label))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b730e10949..b73a94e683 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,10 +19,13 @@ from __future__ import print_function
 
 import re
 
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -107,3 +110,29 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
+
+  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns a singleton dataset with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return dataset_ops.Dataset.from_tensors(
+          array_ops.zeros(shape, dtype=dtype))
+    else:
+      return dataset_ops.Dataset.zip(
+          tuple([
+              self.structuredDataset(substructure, shape, dtype)
+              for substructure in structure
+          ]))
+
+  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns an element with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return array_ops.zeros(shape, dtype=dtype)
+    else:
+      return tuple([
+          self.structuredElement(substructure, shape, dtype)
+          for substructure in structure
+      ])
-- 
GitLab


From 6123677f264c615042a816e713f7f1204685e544 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 5 Oct 2018 14:18:41 -0700
Subject: [PATCH 0457/1085] Fix bug in nonpip builds in
 ci_parameterized_build.sh

The extra spaces were confusing bash's string-line-continuation from
the backslash `\` on the previous line.

PiperOrigin-RevId: 215964853
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index fdff867ff0..489722c0e9 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -423,7 +423,7 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
     NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
-      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
     NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
-- 
GitLab


From ec7d70ea64520d0c5fe9a3c287c7ca4cf4d705b1 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 5 Oct 2018 14:33:51 -0700
Subject: [PATCH 0458/1085] Updated after fixing comments

---
 RELEASE.md | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index bbd816a4d4..56ac898d6e 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,28 +1,27 @@
 # Release 1.12.0
 
 ## Major Features and Improvements
-* tf.data optimizations. Users can now represent, get and set options of a tf.data pipeline.
-  tf.data.AUTOTUNE enables the level of parallelism to be determined at runtime.
-* Keras models can now be directly exported to the SavedModel format and used with Tensorflow Serving.
-
-## Breaking Changes
+* tf.data users can now use `tf.data.experimental.AUTOTUNE` for the num_parallel_calls argument of `tf.data.Dataset.interleave`, `tf.data.Dataset.map`, and `tf.data.experimental.map_and_batch` to dynamically tune the level of parallelism at runtime.
+* Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving.
+* TensorFlow builds with XLA support by default, does not enable XLA by
+  default, but makes it accessible via default binary builds.
 
 ## Bug Fixes and Other Changes
 
 * tf.data:
+  * tf.data users can now represent, get, and set options of TensorFlow input pipelines using `tf.data.Options()`, `tf.data.Dataset.options()`, and `tf.data.Dataset.with_options()` respectively.
   * All C++ code moves to the `tensorflow::data` namespace.
-  * Support evaluating with a tf.data.Dataset
   * Adding support for `num_parallel_calls` to `tf.data.Dataset.interleave`.
 * `tf.contrib`:
-  * Remove tf.contrib.linalg.  Library now in tf.linalg.
-  * Replace any calls to tf.contrib.get_signature_def_by_key(metagraph_def, signature_def_key) with meta_graph_def.signature_def[signature_def_key]. Catching a ValueError exception thrown by tf.contrib.get_signature_def_by_key should be replaced by catching a KeyError exception.
-  * Keras model can be exported to the SavedModel format using tf.contrib.saved_model.save_keras_model().
+  * Removed `tf.contrib.linalg`. `tf.linalg` should be used instead.
+  * Replace any calls to `tf.contrib.get_signature_def_by_key(metagraph_def, signature_def_key)` with `meta_graph_def.signature_def[signature_def_key]`. Catching a ValueError exception thrown by `tf.contrib.get_signature_def_by_key` should be replaced by catching a KeyError exception.
 * `tf.contrib.data`
-  * deprecated, and replaced by `tf.data.experimental`.
+  * deprecated, and replaced by tf.data.experimental.
+* Keras:
+  * Keras model now supports evaluating with a `tf.data.Dataset`.
 * Other:
   * Instead of jemalloc, revert back to using system malloc.
-  * Build TensorFlow with XLA support included by default.
-  * Removed integer types from tf.nn.softplus and tf.nn.softsign OpDefs. This is a bugfix; these ops were never meant to support integers.
+  * Removed integer types from `tf.nn.softplus` and `tf.nn.softsign` OpDefs. This is a bugfix; these ops were never meant to support integers.
   * Allow subslicing Tensors with a single dimension.
   * Add option to calculate string length in Unicode characters
   * Add functionality to SubSlice a tensor.
@@ -34,7 +33,7 @@
   * Add tf_api_version flag. If --define=tf_api_version=2 flag is passed in, then bazel will build TensorFlow API version 2.0. Note that TensorFlow 2.0 is under active development and has no guarantees at this point.
   * Added additional compression options to TfRecordWriter
   * Performance improvements for regex full match operations.
-  * Replace tf.GraphKeys.VARIABLES with tf.GraphKeys.GLOBAL_VARIABLES
+  * Replace tf.GraphKeys.VARIABLES with `tf.GraphKeys.GLOBAL_VARIABLES`
   * Removed unused dynamic learning rate support.
 
 ## Thanks to our Contributors
-- 
GitLab


From 2edf99ec610bb0cd81635222fa6a84e0190d3c27 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 5 Oct 2018 14:35:23 -0700
Subject: [PATCH 0459/1085] Fix multiline

---
 RELEASE.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 56ac898d6e..1077593e51 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -3,8 +3,7 @@
 ## Major Features and Improvements
 * tf.data users can now use `tf.data.experimental.AUTOTUNE` for the num_parallel_calls argument of `tf.data.Dataset.interleave`, `tf.data.Dataset.map`, and `tf.data.experimental.map_and_batch` to dynamically tune the level of parallelism at runtime.
 * Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving.
-* TensorFlow builds with XLA support by default, does not enable XLA by
-  default, but makes it accessible via default binary builds.
+* TensorFlow builds with XLA support by default, does not enable XLA by default, but makes it accessible via default binary builds.
 
 ## Bug Fixes and Other Changes
 
-- 
GitLab


From c221f04b7efff5929f3a6d090983b52f3aa16166 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 5 Oct 2018 14:44:47 -0700
Subject: [PATCH 0460/1085] Automated rollback of commit
 ae0bc6f006497cc04a2ee75166d4ec71c7154fd8

PiperOrigin-RevId: 215969360
---
 tensorflow/core/kernels/data/BUILD            |  14 --
 tensorflow/core/kernels/data/dataset_utils.cc |  47 -----
 tensorflow/core/kernels/data/dataset_utils.h  |  20 --
 .../core/kernels/data/dataset_utils_test.cc   |  46 -----
 .../core/kernels/data/filter_dataset_op.cc    | 162 +++++++++-------
 .../kernels/data/map_and_batch_dataset_op.cc  | 180 +++++++-----------
 .../core/kernels/data/map_dataset_op.cc       |  56 ++----
 .../kernels/data/parallel_map_dataset_op.cc   |  73 +++----
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  20 --
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  80 ++------
 .../python/data/kernel_tests/test_base.py     |  29 ---
 15 files changed, 230 insertions(+), 520 deletions(-)
 delete mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 37c1c54786..451f8c1a6c 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,16 +45,6 @@ cc_library(
     ],
 )
 
-tf_cc_test(
-    name = "dataset_utils_test",
-    srcs = ["dataset_utils_test.cc"],
-    deps = [
-        ":dataset_utils",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-    ],
-)
-
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -215,7 +205,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -243,7 +232,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -257,7 +245,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -298,7 +285,6 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
-        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index a40f7f2146..e10833f525 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,57 +15,10 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/common_runtime/function.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
-Status ComputeShortCircuitIndices(OpKernelContext* ctx,
-                                  const NameAttrList& func,
-                                  std::vector<int>* indices) {
-  FunctionLibraryRuntime::Handle fn_handle;
-  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
-      func.name(), AttrSlice(&func.attr()), &fn_handle));
-  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
-    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
-    if (!s.ok()) {
-      LOG(WARNING) << "Failed to release handle: " << s.error_message();
-    }
-  });
-
-  const FunctionBody* fn_body =
-      ctx->function_library()->GetFunctionBody(fn_handle);
-  indices->resize(fn_body->ret_nodes.size());
-  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
-    Node* ret_node = fn_body->ret_nodes[i];
-    Node* ret_input_node;
-    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
-    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
-      TF_RETURN_IF_ERROR(
-          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
-    } else {
-      indices->clear();
-      break;
-    }
-  }
-  return Status::OK();
-}
-
-std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
-  std::map<int, int> last_use;
-  for (size_t i = 0; i < indices.size(); ++i) {
-    last_use[indices[i]] = i;
-  }
-  std::vector<bool> can_move;
-  can_move.resize(indices.size());
-  for (size_t i = 0; i < indices.size(); ++i) {
-    can_move[i] = last_use[indices[i]] == i;
-  }
-  return can_move;
-}
-
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index d777062293..6ec1350cd4 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,26 +22,6 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
-// This method is used to determine whether we can short-circuit the evaluation
-// of the user-defined function `func`. Short-circuting is possible if every
-// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
-// (y,x)`, or `f(x) = (x,x)`).
-//
-// If short-circuiting is possible, the method stores the mapping from output
-// indices to input indices in `indices`. Otherwise, `indices` will be empty.
-//
-// Returns non-ok status if analysis of the function fails.
-//
-// TODO(jsimsa): Extend this to support constants as well.
-Status ComputeShortCircuitIndices(OpKernelContext* ctx,
-                                  const NameAttrList& func,
-                                  std::vector<int>* indices);
-
-// Given a vector that maps output indices to input indices, return a vector
-// that identifies for which output indices can we move the input (assuming
-// output indices are processed left to right).
-std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
-
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
deleted file mode 100644
index 43295b8ebb..0000000000
--- a/tensorflow/core/kernels/data/dataset_utils_test.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/data/dataset_utils.h"
-
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace data {
-namespace {
-
-TEST(DatasetUtils, ComputeMoveVector) {
-  struct TestCase {
-    std::vector<int> indices;
-    std::vector<bool> expected;
-  };
-
-  TestCase test_cases[] = {
-      TestCase{{}, {}},
-      TestCase{{1}, {true}},
-      TestCase{{1, 1}, {false, true}},
-      TestCase{{1, 2}, {true, true}},
-      TestCase{{1, 1, 2}, {false, true, true}},
-      TestCase{{1, 2, 2}, {true, false, true}},
-  };
-
-  for (auto& test_case : test_cases) {
-    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
-  }
-}
-
-}  // namespace
-}  // namespace data
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index be7d182a1f..00884314a9 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,11 +18,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -33,84 +31,67 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using FilterIteratorPredicate =
-      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
-
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx) {
+      : UnaryDatasetOpKernel(ctx),
+        graph_def_version_(ctx->graph_def_version()) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
+    FunctionLibraryRuntime::Handle pred_handle;
+    OP_REQUIRES_OK(ctx,
+                   ctx->function_library()->Instantiate(
+                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
+    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
+      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
+    });
+
+    const FunctionBody* pred_body =
+        ctx->function_library()->GetFunctionBody(pred_handle);
+    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
+                errors::InvalidArgument(
+                    "predicate function must have a single return value."));
+    Node* ret_node = pred_body->ret_nodes[0];
+    Node* ret_input_node;
+    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
+
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-    OP_REQUIRES(ctx, indices.size() <= 1,
-                errors::InvalidArgument(
-                    "predicate function has more than one return value."));
-
-    FilterIteratorPredicate filter_pred;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      filter_pred = [raw_captured_func](IteratorContext* ctx,
-                                        const std::vector<Tensor>& args,
-                                        bool* out_matched) {
-        std::vector<Tensor> result;
-        TF_RETURN_IF_ERROR(
-            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
-
-        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-            result[0].NumElements() != 1) {
-          return errors::InvalidArgument(
-              "Filter predicate `f` must return a scalar bool.");
-        }
-        *out_matched = result[0].scalar<bool>()();
-        return Status::OK();
-      };
+    if (ret_input_node->def().op() == "_Arg") {
+      int32 index = -1;
+      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
+      *output = new FilterTensorDataset(ctx, input, func_,
+                                        std::move(captured_func), index);
     } else {
-      filter_pred = [indices](IteratorContext* ctx,
-                              const std::vector<Tensor>& args,
-                              bool* out_matched) {
-        const Tensor& predicate = args[indices[0]];
-        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-          return errors::InvalidArgument(
-              "Filter predicate `f` must return a scalar bool.");
-        }
-        *out_matched = predicate.scalar<bool>()();
-        return Status::OK();
-      };
+      *output = new FilterFunctionDataset(ctx, input, func_,
+                                          std::move(captured_func));
     }
-
-    *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          std::move(filter_pred));
   }
 
  private:
-  class Dataset : public DatasetBase {
+  const int graph_def_version_;
+
+  class FilterDatasetBase : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input,
-            const NameAttrList& func,
-            std::unique_ptr<CapturedFunction> captured_func,
-            FilterIteratorPredicate filter_pred)
+    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
+                      const NameAttrList& func,
+                      std::unique_ptr<CapturedFunction> captured_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)),
-          filter_pred_(std::move(filter_pred)) {
+          captured_func_(std::move(captured_func)) {
       input_->Ref();
     }
 
-    ~Dataset() override { input_->Unref(); }
+    ~FilterDatasetBase() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
-          filter_pred_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -152,15 +133,17 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
+    virtual Status EvaluatePredicate(IteratorContext* ctx,
+                                     const std::vector<Tensor>& element,
+                                     bool* out_matched) const = 0;
+
    private:
-    class Iterator : public DatasetIterator<Dataset> {
+    class Iterator : public DatasetIterator<FilterDatasetBase> {
      public:
-      explicit Iterator(const Params& params,
-                        FilterIteratorPredicate filter_pred)
-          : DatasetIterator<Dataset>(params),
+      explicit Iterator(const Params& params)
+          : DatasetIterator<FilterDatasetBase>(params),
             filtered_elements_(0),
-            dropped_elements_(0),
-            filter_pred_(std::move(filter_pred)) {
+            dropped_elements_(0) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -197,7 +180,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(
+              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -267,14 +251,64 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
-      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
+
+   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-    const FilterIteratorPredicate filter_pred_;
+  };
+
+  class FilterFunctionDataset : public FilterDatasetBase {
+   public:
+    using FilterDatasetBase::FilterDatasetBase;
+
+   protected:
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
+      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
+      // stack-rip the iterators and use async kernels.
+      std::vector<Tensor> result;
+      TF_RETURN_IF_ERROR(
+          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
+
+      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+          result[0].NumElements() != 1) {
+        return errors::InvalidArgument(
+            "Filter predicate `f` must return a scalar bool.");
+      }
+      *out_matched = result[0].scalar<bool>()();
+      return Status::OK();
+    }
+  };
+
+  class FilterTensorDataset : public FilterDatasetBase {
+   public:
+    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
+                        const NameAttrList& func,
+                        std::unique_ptr<CapturedFunction> captured_func,
+                        int32 index)
+        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
+          index_(index) {}
+
+   protected:
+    Status EvaluatePredicate(IteratorContext* ctx,
+                             const std::vector<Tensor>& element,
+                             bool* out_matched) const override {
+      const Tensor& predicate = element[index_];
+      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+        return errors::InvalidArgument(
+            "Filter predicate `f` must return a scalar bool.");
+      }
+      *out_matched = predicate.scalar<bool>()();
+      return Status::OK();
+    }
+
+   private:
+    const int32 index_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f9aaa3080e..bf08970560 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -30,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -43,10 +41,6 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using MapAndBatchIteratorFunction =
-      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
-                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
-
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -97,66 +91,31 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    MapAndBatchIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](
-                     IteratorContext* ctx, const string& prefix,
-                     std::vector<Tensor> args,
-                     std::shared_ptr<std::vector<Tensor>> out_tensors,
-                     StatusCallback done) {
-        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
-                                    std::move(done), prefix);
-      };
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](
-                     IteratorContext* ctx, const string& prefix,
-                     std::vector<Tensor> args,
-                     std::shared_ptr<std::vector<Tensor>> out_tensors,
-                     StatusCallback done) {
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        done(Status::OK());
-      };
-    }
-
-    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_,
-                          std::move(captured_func), &ctx->eigen_cpu_device(),
-                          std::move(map_func));
+    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_, func_,
+                          std::move(captured_func), &ctx->eigen_cpu_device());
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input,
-            const NameAttrList& func, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
+            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device,
-            MapAndBatchIteratorFunction map_func)
+            const Eigen::ThreadPoolDevice* device)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
-          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
+          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device),
-          map_func_(std::move(map_func)) {
+          device_(device) {
       input_->Ref();
     }
 
@@ -164,9 +123,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
-          map_func_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -185,7 +143,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -207,7 +165,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(func_, &f);
+      b->BuildAttrValue(map_fn_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -227,14 +185,12 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params,
-                        MapAndBatchIteratorFunction map_func)
+      explicit Iterator(const Params& params)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)),
-            map_func_(std::move(map_func)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -341,6 +297,44 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
+      void Callback(const std::shared_ptr<IteratorContext>& ctx,
+                    const std::shared_ptr<BatchResult>& result,
+                    const std::shared_ptr<std::vector<Tensor>>& return_values,
+                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
+        result->UpdateStatus(status);
+        if (status.ok()) {
+          EnsureOutputAllocated(ctx, result, return_values);
+          for (size_t i = 0; i < return_values->size(); ++i) {
+            const Tensor& tensor = return_values->at(i);
+            Tensor* batch = &(result->output)[i];
+            if (tensor.NumElements() !=
+                (batch->NumElements() / batch->dim_size(0))) {
+              TensorShape batch_shape = batch->shape();
+              batch_shape.RemoveDim(0);
+              result->UpdateStatus(errors::InvalidArgument(
+                  "Cannot add tensor to the batch: number of elements does not "
+                  "match. Shapes are: [tensor]: ",
+                  tensor.shape().DebugString(),
+                  ", [batch]: ", batch_shape.DebugString()));
+              break;
+            }
+            // TODO(mrry): Add a version of DoParallelConcat that allows us to
+            // move `tensor` where possible, to speed up string tensor batching.
+            Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                *dataset()->device_, tensor, offset, batch);
+            if (!copy_status.ok()) {
+              result->UpdateStatus(copy_status);
+              break;
+            }
+          }
+          {
+            mutex_lock l(result->mu);
+            result->num_elements++;
+          }
+        }
+        CallCompleted(result);
+      }
+
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -369,48 +363,21 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        std::shared_ptr<std::vector<Tensor>> return_values =
-            std::make_shared<std::vector<Tensor>>();
-        auto done = [this, ctx, result, return_values, offset](Status status) {
-          result->UpdateStatus(status);
-          if (status.ok()) {
-            EnsureOutputAllocated(ctx, result, return_values);
-            for (size_t i = 0; i < return_values->size(); ++i) {
-              const Tensor& tensor = return_values->at(i);
-              Tensor* batch = &(result->output)[i];
-              if (tensor.NumElements() !=
-                  (batch->NumElements() / batch->dim_size(0))) {
-                TensorShape batch_shape = batch->shape();
-                batch_shape.RemoveDim(0);
-                result->UpdateStatus(errors::InvalidArgument(
-                    "Cannot add tensor to the batch: number of elements does "
-                    "not match. Shapes are: [tensor]: ",
-                    tensor.shape().DebugString(),
-                    ", [batch]: ", batch_shape.DebugString()));
-                break;
-              }
-              // TODO(mrry): Add a version of DoParallelConcat that allows us to
-              // move `tensor` where possible, to speed up string tensor
-              // batching.
-              Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                  *dataset()->device_, tensor, offset, batch);
-              if (!copy_status.ok()) {
-                result->UpdateStatus(copy_status);
-                break;
-              }
-            }
-            {
-              mutex_lock l(result->mu);
-              result->num_elements++;
-            }
-          }
-          CallCompleted(result);
-        };
-
-        // Apply the map function on `input_element`, storing the result in
-        // `return_values`, and invoking `done` when finished.
-        map_func_(ctx.get(), prefix(), std::move(input_element),
-                  std::move(return_values), std::move(done));
+        // Call `captured_func_(input_element)`, using `Callback` to store the
+        // result in `result`.
+        (*ctx->runner())(std::bind(
+            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
+                                   std::vector<Tensor> input_element) {
+              std::shared_ptr<std::vector<Tensor>> return_values(
+                  new std::vector<Tensor>());
+              dataset()->captured_func_->RunAsync(
+                  ctx.get(), std::move(input_element), return_values.get(),
+                  [this, ctx, result, return_values, offset](Status status) {
+                    Callback(ctx, result, return_values, offset, status);
+                  },
+                  prefix());
+            },
+            ctx, std::move(input_element)));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -437,7 +404,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
+          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -542,8 +509,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.push_back(
-                    std::make_shared<BatchResult>(dataset()->batch_size_));
+                batch_results_.emplace_back(
+                    new BatchResult(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -560,8 +527,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.push_back(
-            std::make_shared<BatchResult>(dataset()->batch_size_));
+        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -687,8 +653,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
-      const MapAndBatchIteratorFunction map_func_;
-
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -707,9 +671,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
-    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index 0abb2eb4f3..f112e1dc43 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,9 +17,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -30,9 +28,6 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
-  using MapIteratorFunction = std::function<Status(
-      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
-
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -48,36 +43,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    MapIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](IteratorContext* ctx,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors) {
-        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
-      };
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](IteratorContext* ctx,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors) {
-        std::map<int, int> counts;
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        return Status::OK();
-      };
-    }
-
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_, std::move(map_func));
+                          output_types_, output_shapes_);
   }
 
  private:
@@ -87,15 +54,13 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes,
-            MapIteratorFunction map_func)
+            const std::vector<PartialTensorShape>& output_shapes)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes),
-          map_func_(std::move(map_func)) {
+          output_shapes_(output_shapes) {
       input_->Ref();
     }
 
@@ -103,8 +68,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return MakeUnique<Iterator>(
-          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::Map")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -151,8 +116,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params, MapIteratorFunction map_func)
-          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -174,7 +139,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        Status s = map_func_(ctx, args, out_tensors);
+        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
+        // stack-rip the iterators and use async kernels.
+        Status s =
+            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -199,7 +167,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
-      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -207,7 +174,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index a34bb172d4..6abe6c8338 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
-#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -57,49 +56,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
-    std::vector<int> indices;
-    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
-
-    ParallelMapIteratorFunction map_func;
-    if (indices.empty()) {
-      CapturedFunction* raw_captured_func = captured_func.get();
-      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors,
-                                     StatusCallback done) {
-        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
-                                    std::move(done), prefix);
-      };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](IteratorContext* ctx, const string& prefix,
-                              std::vector<Tensor> args,
-                              std::vector<Tensor>* out_tensors,
-                              StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
-                                     out_tensors, std::move(done)));
-        };
-      }
-    } else {
-      std::vector<bool> can_move = ComputeMoveVector(indices);
-      map_func = [indices, can_move](IteratorContext* ctx, const string& prefix,
-                                     std::vector<Tensor> args,
-                                     std::vector<Tensor>* out_tensors,
-                                     StatusCallback done) {
-        std::map<int, int> counts;
-        for (size_t i = 0; i < indices.size(); ++i) {
-          if (can_move[i]) {
-            out_tensors->push_back(std::move(args[indices[i]]));
-          } else {
-            out_tensors->push_back(args[indices[i]]);
-          }
-        }
-        done(Status::OK());
-      };
-    }
-
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func), std::move(map_func));
+                          std::move(captured_func));
   }
 
  private:
@@ -110,8 +69,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func,
-            ParallelMapIteratorFunction map_func)
+            std::unique_ptr<CapturedFunction> captured_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -119,8 +77,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)),
-          map_func_(std::move(map_func)) {
+          captured_func_(std::move(captured_func)) {
       input_->Ref();
     }
 
@@ -132,9 +89,26 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      return NewParallelMapIterator(
-          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
-          std::move(init_func), map_func_, num_parallel_calls_);
+      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
+      ParallelMapIteratorFunction map_func =
+          [this, new_prefix](IteratorContext* ctx,
+                             std::vector<Tensor> input_element,
+                             std::vector<Tensor>* result, StatusCallback done) {
+            captured_func_->RunAsync(ctx, std::move(input_element), result,
+                                     std::move(done), new_prefix);
+          };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](
+                       IteratorContext* ctx, std::vector<Tensor> input_element,
+                       std::vector<Tensor>* result, StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
+                                     result, std::move(done)));
+        };
+      }
+
+      return NewParallelMapIterator({this, new_prefix}, input_,
+                                    std::move(init_func), std::move(map_func),
+                                    num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -202,7 +176,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
-    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ebf41925c9..13bd4b6036 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,7 +22,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -180,7 +179,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
+      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -209,15 +208,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
+    // Call `func_(input_element)`, store the result in `result->return_values`,
+    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    // Apply the map function on `input_element`, storing the result in
-    // `result->return_values`, and invoking `done` when finished.
-    map_func_(ctx.get(), prefix(), std::move(input_element),
-              &result->return_values, std::move(done));
+    map_func_(ctx.get(), std::move(input_element), &result->return_values,
+              std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -350,9 +349,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return MakeUnique<ParallelMapIterator>(
-      params, input_dataset, std::move(init_func), std::move(map_func),
-      num_parallel_calls);
+  return std::unique_ptr<IteratorBase>(
+      new ParallelMapIterator(params, input_dataset, std::move(init_func),
+                              std::move(map_func), num_parallel_calls));
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index 813f13c9e4..dc26c5cf25 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+    std::function<void(IteratorContext*, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 7de5ea8860..1d1a717062 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
+      auto map_fn = [this](IteratorContext* ctx,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index 0703955fd4..afd0fc3abf 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,26 +332,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
-  @parameterized.named_parameters(
-      ("Identity", None, lambda x: x, None),
-      ("Replicate", None, lambda x: (x, x), None),
-      ("Swap", (None, None), lambda x, y: (y, x), None),
-      ("Project", (None, None), lambda x, y: x, None),
-  )
-  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
-    dataset = self.structuredDataset(structure).repeat().apply(
-        batching.map_and_batch(map_fn, batch_size=10))
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(
-            *sess.run(self.structuredElement(structure, shape=[10])))
-      else:
-        expected = map_fn(
-            sess.run(self.structuredElement(structure, shape=[10])))
-      self.assertAllEqual(expected, sess.run(get_next))
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index a0c6b37a6d..6b7afafa5d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testShortCircuit(self):
+  def testReturnComponent(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 6efbe31ca1..0c372ebb10 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
+        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,57 +783,19 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
-  @parameterized.named_parameters(
-      ("SequentialIdentity", None, lambda x: x, None),
-      ("SequentialReplicate", None, lambda x: (x, x), None),
-      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
-      ("SequentialProject", (None, None), lambda x, y: x, None),
-      ("ParallelIdentity", None, lambda x: x, 10),
-      ("ParallelReplicate", None, lambda x: (x, x), 10),
-      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
-      ("ParallelProject", (None, None), lambda x, y: x, 10),
-  )
-  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
-    dataset = self.structuredDataset(structure).repeat().map(
-        map_fn, num_parallel_calls=num_parallel_calls)
-    get_next = dataset.make_one_shot_iterator().get_next()
-
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(*sess.run(self.structuredElement(structure)))
-      else:
-        expected = map_fn(sess.run(self.structuredElement(structure)))
-      self.assertEqual(expected, sess.run(get_next))
-
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for mode in ["general", "single-threaded", "short-circuit"]:
-        if mode == "general":
-          map_fn = lambda x: x + 1
-          use_inter_op_parallelism = True
-          print_label = ""
-          benchmark_label = ""
-        if mode == "single-threaded":
-          map_fn = lambda x: x + 1
-          use_inter_op_parallelism = False
-          print_label = " (single threaded mode)"
-          benchmark_label = "_single_threaded"
-        if mode == "short-circuit":
-          map_fn = lambda x: x
-          use_inter_op_parallelism = True  # should not have any significance
-          print_label = " (short circuit mode)"
-          benchmark_label = "_short_circuit"
-
+      for use_inter_op_parallelism in [False, True]:
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                map_fn,
+                lambda x: x,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -851,39 +813,25 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (print_label, chain_length, median_wall_time))
+                  (" (single threaded mode)" if not use_inter_op_parallelism
+                   else "", chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, benchmark_label))
+                (chain_length, "_single_threaded"
+                 if not use_inter_op_parallelism else ""))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for mode in ["general", "single-threaded", "short-circuit"]:
-        if mode == "general":
-          map_fn = lambda *xs: [x + 1 for x in xs]
-          use_inter_op_parallelism = True
-          print_label = ""
-          benchmark_label = ""
-        if mode == "single-threaded":
-          map_fn = lambda *xs: [x + 1 for x in xs]
-          use_inter_op_parallelism = False
-          print_label = " (single threaded mode)"
-          benchmark_label = "_single_threaded"
-        if mode == "short-circuit":
-          map_fn = lambda *xs: xs
-          use_inter_op_parallelism = True  # should not have any significance
-          print_label = " (short circuit mode)"
-          benchmark_label = "_short_circuit"
-
+      for use_inter_op_parallelism in [False, True]:
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              map_fn,
+              lambda *xs: xs,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -901,12 +849,14 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (print_label, fan_out, median_wall_time))
+                  (" (single threaded mode)" if not use_inter_op_parallelism
+                   else "", fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
-                                                             benchmark_label))
+                name="benchmark_map_dataset_fan_out_%d%s" %
+                (fan_out, "_single_threaded"
+                 if not use_inter_op_parallelism else ""))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b73a94e683..b730e10949 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,13 +19,10 @@ from __future__ import print_function
 
 import re
 
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -110,29 +107,3 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
-
-  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
-    """Returns a singleton dataset with the given structure."""
-    if shape is None:
-      shape = []
-    if structure is None:
-      return dataset_ops.Dataset.from_tensors(
-          array_ops.zeros(shape, dtype=dtype))
-    else:
-      return dataset_ops.Dataset.zip(
-          tuple([
-              self.structuredDataset(substructure, shape, dtype)
-              for substructure in structure
-          ]))
-
-  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
-    """Returns an element with the given structure."""
-    if shape is None:
-      shape = []
-    if structure is None:
-      return array_ops.zeros(shape, dtype=dtype)
-    else:
-      return tuple([
-          self.structuredElement(substructure, shape, dtype)
-          for substructure in structure
-      ])
-- 
GitLab


From 07921022ddc68aacbf210acc62545a90e3091fb1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 14:57:15 -0700
Subject: [PATCH 0461/1085] Add deprecation call-out for tf_mobile

PiperOrigin-RevId: 215971335
---
 .../lite/g3doc/tfmobile/android_build.md       | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/index.md       | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/ios_build.md   | 18 +++++++++++++++++-
 .../lite/g3doc/tfmobile/linking_libs.md        | 18 +++++++++++++++++-
 .../contrib/lite/g3doc/tfmobile/optimizing.md  | 18 +++++++++++++++++-
 .../lite/g3doc/tfmobile/prepare_models.md      | 18 +++++++++++++++++-
 6 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
index b0f32a8d6c..2eb776d10c 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md
@@ -1,6 +1,22 @@
-
 # Building TensorFlow on Android
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 To get you started working with TensorFlow on Android, we'll walk through two
 ways to build our TensorFlow mobile demos and deploying them on an Android
 device. The first is Android Studio, which lets you build and deploy in an
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
index 49ad35d4e6..15f0fd3961 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md
@@ -1,6 +1,22 @@
-
 # Overview
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 TensorFlow was designed to be a good deep learning solution for mobile
 platforms. Currently we have two solutions for deploying machine learning
 applications on mobile and embedded devices: TensorFlow for Mobile and
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
index be8b4100c8..d922907cdc 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md
@@ -1,6 +1,22 @@
-
 # Building TensorFlow on iOS
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 ## Using CocoaPods
 
 The simplest way to get started with TensorFlow on iOS is using the CocoaPods
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
index 4d4bb3bc08..fd0e322c93 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md
@@ -1,6 +1,22 @@
-
 # Integrating TensorFlow libraries
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 Once you have made some progress on a model that addresses the problem you’re
 trying to solve, it’s important to test it out inside your application
 immediately. There are often unexpected differences between your training data
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
index 7436594fd8..59ff8e774c 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md
@@ -1,6 +1,22 @@
-
 # Optimizing for mobile
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 There are some special issues that you have to deal with when you’re trying to
 ship on mobile or embedded devices, and you’ll need to think about these as
 you’re developing your model.
diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
index d1c67d4c61..1d373251dd 100644
--- a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
+++ b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md
@@ -1,6 +1,22 @@
-
 # Preparing models for mobile deployment
 
+Warning: We expect to deprecate TensorFlow Mobile in early 2019
+
+<div class="caution">
+  <p>
+    <a href="../">TensorFlow Lite</a> is our main mobile and embedded offering. We are
+    working hard to close the feature gap between TensorFlow Mobile and
+    TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We
+    will give ample notice to our users when we get to that point and will
+    provide help and support to ensure easy migrations.
+  </p>
+  <p>
+    In the meantime, please use TensorFlow Lite. If you have a feature request,
+    such as a missing op, please post to our <a
+    href="https://github.com/tensorflow/tensorflow/issues">GitHub</a>.
+  </p>
+</div>
+
 The requirements for storing model information during training are very
 different from when you want to release it as part of a mobile app. This section
 covers the tools involved in converting from a training model to something
-- 
GitLab


From 1e104d80826fed95f9fad6f07f68e35cae3527b2 Mon Sep 17 00:00:00 2001
From: Geoffrey Irving <irving@naml.us>
Date: Wed, 19 Sep 2018 09:33:19 -0700
Subject: [PATCH 0462/1085] Expand stateless random generators to match their
 stateful cousins

stateless_random_uniform now take minval+maxval and handles ints,
and stateless_normal/stateless_truncated_normal take mean+stddev.
Additionally, all of the stateless functions now have proper doc
strings.

This is step one of moving stateless random numbers out of contrib.
---
 tensorflow/contrib/stateless/BUILD            |   5 +-
 tensorflow/contrib/stateless/__init__.py      |   9 +-
 .../kernel_tests/stateless_random_ops_test.py | 156 ++++++-------
 .../contrib/stateless/python/stateless_ops.py | 214 ++++++++++++++++++
 .../api_def_StatelessRandomUniformInt.pbtxt   |  46 ++++
 tensorflow/core/kernels/random_op.cc          |  34 +--
 .../core/kernels/stateless_random_ops.cc      | 155 ++++++++-----
 tensorflow/core/ops/stateless_random_ops.cc   |  53 +++--
 8 files changed, 491 insertions(+), 181 deletions(-)
 create mode 100644 tensorflow/contrib/stateless/python/stateless_ops.py
 create mode 100644 tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt

diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD
index a217397c1a..e9ddec8889 100644
--- a/tensorflow/contrib/stateless/BUILD
+++ b/tensorflow/contrib/stateless/BUILD
@@ -11,7 +11,10 @@ load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
 
 py_library(
     name = "stateless",
-    srcs = ["__init__.py"],
+    srcs = [
+        "__init__.py",
+        "python/stateless_ops.py",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:framework_ops",
diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py
index fe23fe0dd8..30d0a7ab6a 100644
--- a/tensorflow/contrib/stateless/__init__.py
+++ b/tensorflow/contrib/stateless/__init__.py
@@ -32,16 +32,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import ops
-
 # pylint: disable=wildcard-import
-from tensorflow.python.ops.gen_stateless_random_ops import *
+from tensorflow.contrib.stateless.python.stateless_ops import *
 
 from tensorflow.python.util.all_util import remove_undocumented
 
-ops.NotDifferentiable("StatelessMultinomial")
-ops.NotDifferentiable("StatelessRandomNormal")
-ops.NotDifferentiable("StatelessRandomUniform")
-ops.NotDifferentiable("StatelessTruncatedNormal")
-
 remove_undocumented(__name__)
diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
index d724a5c014..c0c1430d84 100644
--- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
+++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 import numpy as np
 from tensorflow.contrib import stateless
 from tensorflow.python.framework import constant_op
@@ -27,10 +29,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
-CASES = [(stateless.stateless_random_uniform, random_ops.random_uniform),
-         (stateless.stateless_random_normal, random_ops.random_normal),
-         (stateless.stateless_truncated_normal, random_ops.truncated_normal)]
-
 
 def invert_philox(key, value):
   """Invert the Philox bijection."""
@@ -51,96 +49,102 @@ def invert_philox(key, value):
 
 class StatelessOpsTest(test.TestCase):
 
-  def testMatchStateful(self):
+  def _test_match(self, cases):
     # Stateless ops should be the same as stateful ops on the first call
     # after seed scrambling.
+    cases = tuple(cases)
     key = 0x3ec8f720, 0x02461e29
     for seed in (7, 17), (11, 5), (2, 3):
       preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64)
       preseed = preseed[::2] | preseed[1::2] << 32
       random_seed.set_random_seed(seed[0])
       with self.test_session(use_gpu=True):
-        for stateless_op, stateful_op in CASES:
-          for shape in (), (3,), (2, 5):
-            stateful = stateful_op(shape, seed=seed[1])
-            pure = stateless_op(shape, seed=preseed)
-            self.assertAllEqual(stateful.eval(), pure.eval())
+        for stateless_op, stateful_op in cases:
+          stateful = stateful_op(seed=seed[1])
+          pure = stateless_op(seed=preseed)
+          self.assertAllEqual(stateful.eval(), pure.eval())
 
-  def testDeterminism(self):
+  def _test_determinism(self, cases):
     # Stateless values should be equal iff the seeds are equal (roughly)
+    cases = tuple(cases)
     with self.test_session(use_gpu=True):
       for seed_type in [dtypes.int32, dtypes.int64]:
         seed_t = array_ops.placeholder(seed_type, shape=[2])
         seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-        for stateless_op, _ in CASES:
-          for shape in (), (3,), (2, 5):
-            pure = stateless_op(shape, seed=seed_t)
-            values = [(seed, pure.eval(feed_dict={seed_t: seed}))
-                      for seed in seeds]
-            for s0, v0 in values:
-              for s1, v1 in values:
-                self.assertEqual(s0 == s1, np.all(v0 == v1))
-
-  def testShapeType(self):
-    with self.test_session(use_gpu=True):
-      for shape_dtype in [dtypes.int32, dtypes.int64]:
-        seed_t = array_ops.placeholder(dtypes.int64, shape=[2])
-        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-        for stateless_op, _ in CASES:
-          for shape in (), (3,), (2, 5):
-            pure = stateless_op(constant_op.constant(shape, dtype=shape_dtype),
-                                seed=seed_t)
-            values = [(seed, pure.eval(feed_dict={seed_t: seed}))
-                      for seed in seeds]
-            for s0, v0 in values:
-              for s1, v1 in values:
-                self.assertEqual(s0 == s1, np.all(v0 == v1))
-
-  def testMatchStatefulMultinomial(self):
-    # Stateless ops should be the same as stateful ops on the first call
-    # after seed scrambling.
-    key = 0x3ec8f720, 0x02461e29
-    num_samples = 4
-    for logits_dtype in np.float16, np.float32, np.float64:
-      for output_dtype in dtypes.int32, dtypes.int64:
-        for seed in (7, 17), (11, 5), (2, 3):
-          preseed = invert_philox(key,
-                                  (seed[0], 0, seed[1], 0)).astype(np.uint64)
-          preseed = preseed[::2] | preseed[1::2] << 32
-          random_seed.set_random_seed(seed[0])
-          with self.test_session(use_gpu=True):
-            for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
-                                                      [0.25, 0.75]]):
-              logits_t = constant_op.constant(logits, dtype=logits_dtype)
-              stateful = random_ops.multinomial(
-                  logits_t,
-                  num_samples,
-                  seed=seed[1],
-                  output_dtype=output_dtype)
-              pure = stateless.stateless_multinomial(
-                  logits_t,
-                  num_samples,
-                  seed=preseed,
-                  output_dtype=output_dtype)
-              self.assertAllEqual(stateful.eval(), pure.eval())
+        for stateless_op, _ in cases:
+          pure = stateless_op(seed=seed_t)
+          values = [(seed, pure.eval(feed_dict={seed_t: seed}))
+                    for seed in seeds]
+          for s0, v0 in values:
+            for s1, v1 in values:
+              self.assertEqual(s0 == s1, np.all(v0 == v1))
 
-  def testDeterminismMultinomial(self):
-    # Stateless values should be equal iff the seeds are equal (roughly)
+  def _float_cases(self, shape_dtypes=(None,)):
+    float_cases = (
+        # Uniform distribution, with and without range
+        (stateless.stateless_random_uniform, random_ops.random_uniform, {}),
+        (stateless.stateless_random_uniform, random_ops.random_uniform,
+         dict(minval=2.2, maxval=7.1)),
+        # Normal distribution, with and without mean+stddev
+        (stateless.stateless_random_normal, random_ops.random_normal, {}),
+        (stateless.stateless_random_normal, random_ops.random_normal,
+         dict(mean=2, stddev=3)),
+        # Truncated normal distribution, with and without mean+stddev
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}),
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal,
+         dict(mean=3, stddev=4)),
+    )
+    for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
+      for shape_dtype in shape_dtypes:
+        for shape in (), (3,), (2, 5):
+          if shape_dtype is not None:
+            shape = constant_op.constant(shape, dtype=shape_dtype)
+          for stateless_op, stateful_op, kwds in float_cases:
+            kwds = dict(shape=shape, dtype=dtype, **kwds)
+            yield (functools.partial(stateless_op, **kwds),
+                   functools.partial(stateful_op, **kwds))
+
+  def _int_cases(self, shape_dtypes=(None,)):
+    for shape_dtype in shape_dtypes:
+      for shape in (), (3,), (2, 5):
+        if shape_dtype is not None:
+          shape = constant_op.constant(shape, dtype=shape_dtype)
+        for dtype in dtypes.int32, dtypes.int64:
+          kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape)
+          yield (functools.partial(stateless.stateless_random_uniform, **kwds),
+                 functools.partial(random_ops.random_uniform, **kwds))
+
+  def _multinomial_cases(self):
     num_samples = 10
-    with self.test_session(use_gpu=True):
-      for seed_type in [dtypes.int32, dtypes.int64]:
-        seed_t = array_ops.placeholder(seed_type, shape=[2])
-        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
+    for logits_dtype in np.float16, np.float32, np.float64:
+      for output_dtype in dtypes.int32, dtypes.int64:
         for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
                                                   [0.25, 0.75]]):
-          pure = stateless.stateless_multinomial(
-              logits, num_samples, seed=seed_t)
-          values = [
-              (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds
-          ]
-          for s0, v0 in values:
-            for s1, v1 in values:
-              self.assertEqual(s0 == s1, np.all(v0 == v1))
+          kwds = dict(logits=constant_op.constant(logits, dtype=logits_dtype),
+                      num_samples=num_samples,
+                      output_dtype=output_dtype)
+          yield (functools.partial(stateless.stateless_multinomial, **kwds),
+                 functools.partial(random_ops.multinomial, **kwds))
+
+  def testMatchFloat(self):
+    self._test_match(self._float_cases())
+
+  def testMatchInt(self):
+    self._test_match(self._int_cases())
+
+  def testMatchMultinomial(self):
+    self._test_match(self._multinomial_cases())
+
+  def testDeterminismFloat(self):
+    self._test_determinism(self._float_cases(
+        shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismInt(self):
+    self._test_determinism(self._int_cases(
+        shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismMultinomial(self):
+    self._test_determinism(self._multinomial_cases())
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/stateless/python/stateless_ops.py b/tensorflow/contrib/stateless/python/stateless_ops.py
new file mode 100644
index 0000000000..db9b7a87f2
--- /dev/null
+++ b/tensorflow/contrib/stateless/python/stateless_ops.py
@@ -0,0 +1,214 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Stateless random ops which take seed as a tensor input."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.ops import gen_stateless_random_ops
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import math_ops
+
+ops.NotDifferentiable("StatelessMultinomial")
+ops.NotDifferentiable("StatelessRandomNormal")
+ops.NotDifferentiable("StatelessRandomUniform")
+ops.NotDifferentiable("StatelessRandomUniformInt")
+ops.NotDifferentiable("StatelessTruncatedNormal")
+
+
+def stateless_random_uniform(shape,
+                             seed,
+                             minval=0,
+                             maxval=None,
+                             dtype=dtypes.float32,
+                             name=None):
+  """Outputs deterministic pseudorandom values from a uniform distribution.
+
+  This is a stateless version of `tf.random_uniform`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  The generated values follow a uniform distribution in the range
+  `[minval, maxval)`. The lower bound `minval` is included in the range, while
+  the upper bound `maxval` is excluded.
+
+  For floats, the default range is `[0, 1)`.  For ints, at least `maxval` must
+  be specified explicitly.
+
+  In the integer case, the random integers are slightly biased unless
+  `maxval - minval` is an exact power of two.  The bias is small for values of
+  `maxval - minval` significantly smaller than the range of the output (either
+  `2**32` or `2**64`).
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the
+      range of random values to generate.  Defaults to 0.
+    maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on
+      the range of random values to generate.  Defaults to 1 if `dtype` is
+      floating point.
+    dtype: The type of the output: `float16`, `float32`, `float64`, `int32`,
+      or `int64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random uniform values.
+
+  Raises:
+    ValueError: If `dtype` is integral and `maxval` is not specified.
+  """
+  dtype = dtypes.as_dtype(dtype)
+  if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32,
+                   dtypes.float64, dtypes.int32, dtypes.int64):
+    raise ValueError("Invalid dtype %r" % dtype)
+  if maxval is None:
+    if dtype.is_integer:
+      raise ValueError("Must specify maxval for integer dtype %r" % dtype)
+    maxval = 1
+  with ops.name_scope(name, "stateless_random_uniform",
+                      [shape, seed, minval, maxval]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    minval = ops.convert_to_tensor(minval, dtype=dtype, name="min")
+    maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max")
+    if dtype.is_integer:
+      return gen_stateless_random_ops.stateless_random_uniform_int(
+          shape, seed=seed, minval=minval, maxval=maxval, name=name)
+    else:
+      rnd = gen_stateless_random_ops.stateless_random_uniform(
+          shape, seed=seed, dtype=dtype)
+      return math_ops.add(rnd * (maxval - minval), minval, name=name)
+
+
+def stateless_random_normal(shape,
+                            seed,
+                            mean=0.0,
+                            stddev=1.0,
+                            dtype=dtypes.float32,
+                            name=None):
+  """Outputs deterministic pseudorandom values from a normal distribution.
+
+  This is a stateless version of `tf.random_normal`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the normal
+      distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the normal distribution.
+    dtype: The type of the output.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random normal values.
+  """
+  with ops.name_scope(name, "stateless_random_normal",
+                      [shape, seed, mean, stddev]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean")
+    stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev")
+    rnd = gen_stateless_random_ops.stateless_random_normal(shape, seed, dtype)
+    return math_ops.add(rnd * stddev, mean, name=name)
+
+
+def stateless_truncated_normal(shape,
+                               seed,
+                               mean=0.0,
+                               stddev=1.0,
+                               dtype=dtypes.float32,
+                               name=None):
+  """Outputs deterministic pseudorandom values, truncated normally distributed.
+
+  This is a stateless version of `tf.truncated_normal`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  The generated values follow a normal distribution with specified mean and
+  standard deviation, except that values whose magnitude is more than 2 standard
+  deviations from the mean are dropped and re-picked.
+
+  Args:
+    shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    mean: A 0-D Tensor or Python value of type `dtype`. The mean of the
+      truncated normal distribution.
+    stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation
+      of the normal distribution, before truncation.
+    dtype: The type of the output.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tensor of the specified shape filled with random truncated normal values.
+  """
+  with ops.name_scope(name, "stateless_truncated_normal",
+                      [shape, seed, mean, stddev]) as name:
+    shape = random_ops._ShapeTensor(shape)  # pylint: disable=protected-access
+    mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean")
+    stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev")
+    rnd = gen_stateless_random_ops.stateless_truncated_normal(
+        shape, seed, dtype)
+    return math_ops.add(rnd * stddev, mean, name=name)
+
+
+def stateless_multinomial(logits,
+                          num_samples,
+                          seed,
+                          output_dtype=dtypes.int64,
+                          name=None):
+  """Draws deterministic pseudorandom samples from a multinomial distribution.
+
+  This is a stateless version of `tf.multinomial`: if run twice with the
+  same seeds, it will produce the same pseudorandom numbers.  The output is
+  consistent across multiple runs on the same hardware (and between CPU
+  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
+  hardware.
+
+  Example:
+
+  ```python
+  # samples has shape [1, 5], where each value is either 0 or 1 with equal
+  # probability.
+  samples = tf.contrib.stateless.stateless_multinomial(
+      tf.log([[10., 10.]]), 5, seed=[7, 17])
+  ```
+
+  Args:
+    logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice
+      `[i, :]` represents the unnormalized log-probabilities for all classes.
+    num_samples: 0-D.  Number of independent samples to draw for each row slice.
+    seed: A shape [2] integer Tensor of seeds to the random number generator.
+    name: Optional name for the operation.
+    output_dtype: integer type to use for the output. Defaults to int64.
+
+  Returns:
+    The drawn samples of shape `[batch_size, num_samples]`.
+  """
+  with ops.name_scope(name, "stateless_multinomial", [logits, seed]):
+    logits = ops.convert_to_tensor(logits, name="logits")
+    return gen_stateless_random_ops.stateless_multinomial(
+        logits, num_samples, seed, output_dtype=output_dtype)
diff --git a/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt
new file mode 100644
index 0000000000..b6a6dbdf54
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_StatelessRandomUniformInt.pbtxt
@@ -0,0 +1,46 @@
+op {
+  graph_op_name: "StatelessRandomUniformInt"
+  visibility: HIDDEN
+  in_arg {
+    name: "shape"
+    description: <<END
+The shape of the output tensor.
+END
+  }
+  in_arg {
+    name: "seed"
+    description: <<END
+2 seeds (shape [2]).
+END
+  }
+  in_arg {
+    name: "minval"
+    description: <<END
+Minimum value (inclusive, scalar).
+END
+  }
+  in_arg {
+    name: "maxval"
+    description: <<END
+Maximum value (exclusive, scalar).
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Random values with specified shape.
+END
+  }
+  attr {
+    name: "dtype"
+    description: <<END
+The type of the output.
+END
+  }
+  summary: "Outputs deterministic pseudorandom random integers from a uniform distribution."
+  description: <<END
+The generated values follow a uniform distribution in the range `[minval, maxval)`.
+
+The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
+END
+}
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 04a53697c0..3810d817ca 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -489,13 +489,15 @@ class RandomGammaOp : public OpKernel {
       Name("RandomGamma").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"),        \
       RandomGammaOp<TYPE>)
 
-#define REGISTER_INT(IntType)                                   \
-  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")              \
-                              .Device(DEVICE_CPU)               \
-                              .HostMemory("shape")              \
-                              .HostMemory("minval")             \
-                              .HostMemory("maxval")             \
-                              .TypeConstraint<IntType>("Tout"), \
+#define REGISTER_INT(IntType)                                                 \
+  template struct functor::FillPhiloxRandom<                                  \
+      CPUDevice, random::UniformDistribution<random::PhiloxRandom, IntType>>; \
+  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")                            \
+                              .Device(DEVICE_CPU)                             \
+                              .HostMemory("shape")                            \
+                              .HostMemory("minval")                           \
+                              .HostMemory("maxval")                           \
+                              .TypeConstraint<IntType>("Tout"),               \
                           RandomUniformIntOp<CPUDevice, IntType>);
 
 TF_CALL_half(REGISTER);
@@ -538,14 +540,16 @@ TF_CALL_int64(REGISTER_INT);
           random::TruncatedNormalDistribution<                                 \
               random::SingleSampleAdapter<random::PhiloxRandom>, TYPE>>);
 
-#define REGISTER_INT(IntType)                                   \
-  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")              \
-                              .Device(DEVICE_GPU)               \
-                              .HostMemory("shape")              \
-                              .HostMemory("minval")             \
-                              .HostMemory("maxval")             \
-                              .TypeConstraint<int32>("T")       \
-                              .TypeConstraint<IntType>("Tout"), \
+#define REGISTER_INT(IntType)                                                 \
+  template struct functor::FillPhiloxRandom<                                  \
+      GPUDevice, random::UniformDistribution<random::PhiloxRandom, IntType>>; \
+  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")                            \
+                              .Device(DEVICE_GPU)                             \
+                              .HostMemory("shape")                            \
+                              .HostMemory("minval")                           \
+                              .HostMemory("maxval")                           \
+                              .TypeConstraint<int32>("T")                     \
+                              .TypeConstraint<IntType>("Tout"),               \
                           RandomUniformIntOp<GPUDevice, IntType>);
 
 TF_CALL_half(REGISTER);
diff --git a/tensorflow/core/kernels/stateless_random_ops.cc b/tensorflow/core/kernels/stateless_random_ops.cc
index eab176c7fb..925f5291a6 100644
--- a/tensorflow/core/kernels/stateless_random_ops.cc
+++ b/tensorflow/core/kernels/stateless_random_ops.cc
@@ -113,74 +113,109 @@ class StatelessRandomOp : public StatelessRandomOpBase {
   }
 };
 
-#define REGISTER(TYPE)                                                 \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomUniform")                                   \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<CPUDevice, random::UniformDistribution<        \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomNormal")                                    \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<CPUDevice, random::NormalDistribution<         \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessTruncatedNormal")                                 \
-          .Device(DEVICE_CPU)                                          \
-          .HostMemory("shape")                                         \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<                                               \
-          CPUDevice,                                                   \
-          random::TruncatedNormalDistribution<                         \
-              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+template <typename Device, typename IntType>
+class StatelessRandomUniformIntOp : public StatelessRandomOpBase {
+ public:
+  using StatelessRandomOpBase::StatelessRandomOpBase;
 
-TF_CALL_half(REGISTER);
-TF_CALL_float(REGISTER);
-TF_CALL_double(REGISTER);
+  void Fill(OpKernelContext* context, random::PhiloxRandom random,
+            Tensor* output) override {
+    const Tensor& minval = context->input(2);
+    const Tensor& maxval = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(minval.shape()),
+                errors::InvalidArgument("minval must be 0-D, got shape ",
+                                        minval.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(maxval.shape()),
+                errors::InvalidArgument("maxval must be 0-D, got shape ",
+                                        maxval.shape().DebugString()));
+
+    // Verify that minval < maxval.  Note that we'll never reach this point for
+    // empty output.  Zero impossible things are fine.
+    const auto lo = minval.scalar<IntType>()();
+    const auto hi = maxval.scalar<IntType>()();
+    OP_REQUIRES(
+        context, lo < hi,
+        errors::InvalidArgument("Need minval < maxval, got ", lo, " >= ", hi));
+
+    // Build distribution
+    typedef random::UniformDistribution<random::PhiloxRandom, IntType>
+        Distribution;
+    Distribution dist(lo, hi);
+
+    auto flat = output->flat<IntType>();
+    // Reuse the compute kernels from the stateful random ops
+    functor::FillPhiloxRandom<Device, Distribution>()(
+        context, context->eigen_device<Device>(), random, flat.data(),
+        flat.size(), dist);
+  }
+};
 
-#undef REGISTER
+#define REGISTER(DEVICE, TYPE)                                              \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessRandomUniform")                                        \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<DEVICE##Device, random::UniformDistribution<        \
+                                            random::PhiloxRandom, TYPE> >); \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessRandomNormal")                                         \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<DEVICE##Device, random::NormalDistribution<         \
+                                            random::PhiloxRandom, TYPE> >); \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("StatelessTruncatedNormal")                                      \
+          .Device(DEVICE_##DEVICE)                                          \
+          .HostMemory("shape")                                              \
+          .HostMemory("seed")                                               \
+          .TypeConstraint<TYPE>("dtype"),                                   \
+      StatelessRandomOp<                                                    \
+          DEVICE##Device,                                                   \
+          random::TruncatedNormalDistribution<                              \
+              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+
+#define REGISTER_INT(DEVICE, TYPE)                            \
+  REGISTER_KERNEL_BUILDER(Name("StatelessRandomUniformInt")   \
+                              .Device(DEVICE_##DEVICE)        \
+                              .HostMemory("shape")            \
+                              .HostMemory("seed")             \
+                              .HostMemory("minval")           \
+                              .HostMemory("maxval")           \
+                              .TypeConstraint<TYPE>("dtype"), \
+                          StatelessRandomUniformIntOp<DEVICE##Device, TYPE>);
+
+#define REGISTER_CPU(TYPE) REGISTER(CPU, TYPE)
+#define REGISTER_GPU(TYPE) REGISTER(GPU, TYPE)
+#define REGISTER_INT_CPU(TYPE) REGISTER_INT(CPU, TYPE)
+#define REGISTER_INT_GPU(TYPE) REGISTER_INT(GPU, TYPE)
+
+TF_CALL_half(REGISTER_CPU);
+TF_CALL_bfloat16(REGISTER_CPU);
+TF_CALL_float(REGISTER_CPU);
+TF_CALL_double(REGISTER_CPU);
+TF_CALL_int32(REGISTER_INT_CPU);
+TF_CALL_int64(REGISTER_INT_CPU);
 
 #if GOOGLE_CUDA
 
-#define REGISTER(TYPE)                                                 \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomUniform")                                   \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<GPUDevice, random::UniformDistribution<        \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessRandomNormal")                                    \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<GPUDevice, random::NormalDistribution<         \
-                                       random::PhiloxRandom, TYPE> >); \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("StatelessTruncatedNormal")                                 \
-          .Device(DEVICE_GPU)                                          \
-          .HostMemory("shape")                                         \
-          .HostMemory("seed")                                          \
-          .TypeConstraint<TYPE>("dtype"),                              \
-      StatelessRandomOp<                                               \
-          GPUDevice,                                                   \
-          random::TruncatedNormalDistribution<                         \
-              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE> >);
+TF_CALL_half(REGISTER_GPU);
+TF_CALL_float(REGISTER_GPU);
+TF_CALL_double(REGISTER_GPU);
+TF_CALL_int32(REGISTER_INT_GPU);
+TF_CALL_int64(REGISTER_INT_GPU);
 
-TF_CALL_half(REGISTER);
-TF_CALL_float(REGISTER);
-TF_CALL_double(REGISTER);
+#endif  // GOOGLE_CUDA
 
 #undef REGISTER
-
-#endif  // GOOGLE_CUDA
+#undef REGISTER_INT
+#undef REGISTER_CPU
+#undef REGISTER_GPU
+#undef REGISTER_INT_CPU
+#undef REGISTER_INT_GPU
 
 }  // namespace
 
diff --git a/tensorflow/core/ops/stateless_random_ops.cc b/tensorflow/core/ops/stateless_random_ops.cc
index 742709fb18..f919a21d60 100644
--- a/tensorflow/core/ops/stateless_random_ops.cc
+++ b/tensorflow/core/ops/stateless_random_ops.cc
@@ -19,42 +19,55 @@ limitations under the License.
 namespace tensorflow {
 
 using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
-static Status StatelessShape(shape_inference::InferenceContext* context) {
+static Status StatelessShape(InferenceContext* c) {
   // Check seed shape
   ShapeHandle seed;
-  TF_RETURN_IF_ERROR(context->WithRank(context->input(1), 1, &seed));
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &seed));
   DimensionHandle unused;
-  TF_RETURN_IF_ERROR(context->WithValue(context->Dim(seed, 0), 2, &unused));
+  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(seed, 0), 2, &unused));
 
   // Set output shape
   ShapeHandle out;
-  TF_RETURN_IF_ERROR(context->MakeShapeFromShapeTensor(0, &out));
-  context->set_output(0, out);
+  TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
+  c->set_output(0, out);
   return Status::OK();
 }
 
-#define REGISTER_STATELESS_OP(name)                  \
-  REGISTER_OP(name)                                  \
-      .Input("shape: T")                             \
-      .Input("seed: Tseed")                          \
-      .Output("output: dtype")                       \
-      .Attr("dtype: {half,float,double} = DT_FLOAT") \
-      .Attr("T: {int32, int64} = DT_INT32")          \
-      .Attr("Tseed: {int32, int64} = DT_INT64")      \
+#define REGISTER_STATELESS_OP(name)                           \
+  REGISTER_OP(name)                                           \
+      .Input("shape: T")                                      \
+      .Input("seed: Tseed")                                   \
+      .Output("output: dtype")                                \
+      .Attr("dtype: {half,bfloat16,float,double} = DT_FLOAT") \
+      .Attr("T: {int32, int64} = DT_INT32")                   \
+      .Attr("Tseed: {int32, int64} = DT_INT64")               \
       .SetShapeFn(StatelessShape)
 
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessRandomUniform");
-
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessRandomNormal");
-
-// This op is exposed through contrib/stateless only.  The interface may change.
 REGISTER_STATELESS_OP("StatelessTruncatedNormal");
 
-// This op is exposed through contrib/stateless only.  The interface may change.
+#undef REGISTER_STATELESS_OP
+
+REGISTER_OP("StatelessRandomUniformInt")
+    .Input("shape: T")
+    .Input("seed: Tseed")
+    .Input("minval: dtype")
+    .Input("maxval: dtype")
+    .Output("output: dtype")
+    .Attr("dtype: {int32, int64}")
+    .Attr("T: {int32, int64}")
+    .Attr("Tseed: {int32, int64} = DT_INT64")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      return StatelessShape(c);
+    });
+
 REGISTER_OP("StatelessMultinomial")
     .Input("logits: T")
     .Input("num_samples: int32")
@@ -80,6 +93,4 @@ REGISTER_OP("StatelessMultinomial")
       return Status::OK();
     });
 
-#undef REGISTER_STATELESS_OP
-
 }  // namespace tensorflow
-- 
GitLab


From deb3b3eb154410d7488317c1f047abbaa749dea2 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Fri, 5 Oct 2018 15:08:07 -0700
Subject: [PATCH 0463/1085] Fix bug in nonpip builds in
 ci_parameterized_build.sh (#22784)

The extra spaces were confusing bash's string-line-continuation from
the backslash `\` on the previous line.

PiperOrigin-RevId: 215964853
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index fdff867ff0..489722c0e9 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -423,7 +423,7 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
      [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
     # CPU only command, fully parallel.
     NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
-      "${EXTRA_ARGS} -- ${BAZEL_TARGET}"
+"${EXTRA_ARGS} -- ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == gpu* ]]; then
     # GPU only command, run as many jobs as the GPU count only.
     NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
-- 
GitLab


From c966b5eed60a570f2121cb84ddb4ece84c413719 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 15:08:18 -0700
Subject: [PATCH 0464/1085] Add DistributionStrategy support to moving average
 APIs.

Fixes #21405.

PiperOrigin-RevId: 215973401
---
 tensorflow/contrib/distribute/python/BUILD    |  18 +++
 .../distribute/python/moving_averages_test.py | 141 ++++++++++++++++++
 tensorflow/python/training/moving_averages.py |  49 +++---
 3 files changed, 189 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/contrib/distribute/python/moving_averages_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 8267612236..76d5b59ce1 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -411,6 +411,24 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "moving_averages_test",
+    srcs = ["moving_averages_test.py"],
+    additional_deps = [
+        ":combinations",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variables",
+    ],
+    tags = [
+        "no_pip",
+    ],
+)
+
 cuda_py_test(
     name = "optimizer_v2_test",
     srcs = ["optimizer_v2_test.py"],
diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py
new file mode 100644
index 0000000000..119352ad91
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/moving_averages_test.py
@@ -0,0 +1,141 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for training.moving_averages when using a DistributionStrategy."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.contrib.distribute.python import combinations
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.training import moving_averages
+
+
+all_combinations = combinations.combine(
+    distribution=[combinations.default_strategy,
+                  combinations.one_device_strategy,
+                  combinations.mirrored_strategy_with_gpu_and_cpu],
+    mode=["graph"])
+
+
+class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(all_combinations)
+  def testTowerModeWithoutZeroDebias(self, distribution):
+    tower_id = [0]
+
+    def tower_fn():
+      var = variables.Variable([10.0, 11.0])
+      val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]])
+      tower_id[0] += 1
+      decay = 0.25
+      assign = moving_averages.assign_moving_average(
+          var, val, decay, zero_debias=False)
+      return var, assign
+
+    with distribution.scope(), self.cached_session() as sess:
+      var, assign = distribution.call_for_each_tower(tower_fn)
+      variables.global_variables_initializer().run()
+      self.assertAllClose([10.0, 11.0], var.eval())
+      sess.run(distribution.unwrap(assign))
+      # Mean of val across calls to tower_fn().
+      average_val = [1.0 + 0.5 * (tower_id[0] - 1),
+                     2.0 - 0.5 * (tower_id[0] - 1)]
+      val_weight = 1.0 - 0.25
+      self.assertAllClose(
+          [10.0 * 0.25 + average_val[0] * val_weight,
+           11.0 * 0.25 + average_val[1] * val_weight],
+          var.eval())
+
+  @combinations.generate(all_combinations)
+  def testTowerMode(self, distribution):
+    tower_id = [0]
+
+    def tower_fn():
+      var = variables.Variable([0.0, 0.0])
+      val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]])
+      tower_id[0] += 1
+      decay = 0.25
+      assign = moving_averages.assign_moving_average(var, val, decay)
+      return var, assign.op
+
+    with distribution.scope(), self.cached_session() as sess:
+      var, assign_op = distribution.call_for_each_tower(tower_fn)
+      variables.global_variables_initializer().run()
+      self.assertAllClose([0.0, 0.0], var.eval())
+      sess.run(distribution.unwrap(assign_op))
+      # Mean of val across calls to tower_fn().
+      average_val = [1.0 + 0.5 * (tower_id[0] - 1),
+                     2.0 - 0.5 * (tower_id[0] - 1)]
+      self.assertAllClose(average_val, var.eval())
+
+  @combinations.generate(all_combinations)
+  def testCrossTowerWithoutZeroDebias(self, distribution):
+    with distribution.scope(), self.cached_session() as sess:
+      var = variables.Variable([10.0, 11.0])
+      val = constant_op.constant([1.0, 2.0])
+      decay = 0.25
+      # NOTE(josh11b): We currently generate an error if val is a PerDevice value.
+      assign = moving_averages.assign_moving_average(
+          var, val, decay, zero_debias=False)
+
+      variables.global_variables_initializer().run()
+      self.assertAllClose([10.0, 11.0], var.eval())
+      sess.run(assign)
+      average_val = [1.0, 2.0]
+      val_weight = 1.0 - 0.25
+      self.assertAllClose(
+          [10.0 * 0.25 + average_val[0] * val_weight,
+           11.0 * 0.25 + average_val[1] * val_weight],
+          var.eval())
+      # Also try assign.op.
+      sess.run(assign.op)
+      orig_weight = 0.25 * 0.25
+      val_weight = 1.0 - orig_weight
+      self.assertAllClose(
+          [10.0 * orig_weight + average_val[0] * val_weight,
+           11.0 * orig_weight + average_val[1] * val_weight],
+          var.eval())
+
+  @combinations.generate(all_combinations)
+  def testCrossTower(self, distribution):
+    with distribution.scope(), self.cached_session() as sess:
+      var = variables.Variable([0.0, 0.0])
+      val = array_ops.placeholder(dtypes.float32)
+      decay = 0.25
+      # NOTE(josh11b): We currently generate an error if val is a PerDevice value.
+      assign = moving_averages.assign_moving_average(var, val, decay)
+
+      variables.global_variables_initializer().run()
+      self.assertAllClose([0.0, 0.0], var.eval())
+      sess.run(assign, feed_dict={val: [1.0, 2.0]})
+      self.assertAllClose([1.0, 2.0], var.eval())
+
+      # Also try assign.op.
+      sess.run(assign.op, feed_dict={val: [10.0, 0.0]})
+      self.assertAllClose(
+          [(1.0 * 0.25 + 10.0) / (1.0 * 0.25 + 1.0),
+           (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)],
+          var.eval())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index 041266da3e..89bfcaf4ad 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -25,6 +25,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import slot_creator
 from tensorflow.python.util.tf_export import tf_export
 
@@ -36,9 +37,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
   The moving average of 'variable' updated with 'value' is:
     variable * decay + value * (1 - decay)
 
-  The returned Operation sets 'variable' to the newly computed moving average.
-
-  The new value of 'variable' can be set with the 'AssignSub' op as:
+  The returned Operation sets 'variable' to the newly computed moving average,
+  by performing this subtraction:
      variable -= (1 - decay) * (variable - value)
 
   Since variables that are initialized to a `0` value will be `0` biased,
@@ -50,7 +50,7 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
 
   The names of the debias shadow variables, by default, include both the scope
   they were created in and the scope of the variables they debias. They are also
-  given a uniqifying-suffix.
+  given a uniquifying-suffix.
 
   E.g.:
 
@@ -58,8 +58,8 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
     with tf.variable_scope('scope1'):
       with tf.variable_scope('scope2'):
         var = tf.get_variable('foo')
-        tf.assign_moving_average(var, 0.0, 1.0)
-        tf.assign_moving_average(var, 0.0, 0.9)
+        update_1 = tf.assign_moving_average(var, 0.0, 1.0)
+        update_2 = tf.assign_moving_average(var, 0.0, 0.9)
 
     # var.name: 'scope1/scope2/foo'
     # shadow var names: 'scope1/scope2/scope1/scope2/foo/biased'
@@ -76,20 +76,33 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
     name: Optional name of the returned operation.
 
   Returns:
-    A reference to the input 'variable' tensor with the newly computed
-    moving average.
+    A tensor which if evaluated will compute and return the new moving average.
   """
+  def update_fn(v, value, decay=decay):
+    decay = ops.convert_to_tensor(1.0 - decay, name="decay")
+    if decay.dtype != v.dtype.base_dtype:
+      decay = math_ops.cast(decay, v.dtype.base_dtype)
+    if zero_debias:
+      update_delta = _zero_debias(v, value, decay)
+    else:
+      update_delta = (v - value) * decay
+    return state_ops.assign_sub(v, update_delta, name=scope)
+
   with ops.name_scope(name, "AssignMovingAvg",
                       [variable, value, decay]) as scope:
-    with ops.colocate_with(variable):
-      decay = ops.convert_to_tensor(1.0 - decay, name="decay")
-      if decay.dtype != variable.dtype.base_dtype:
-        decay = math_ops.cast(decay, variable.dtype.base_dtype)
-      if zero_debias:
-        update_delta = _zero_debias(variable, value, decay)
-      else:
-        update_delta = (variable - value) * decay
-      return state_ops.assign_sub(variable, update_delta, name=scope)
+    tower_context = distribution_strategy_context.get_tower_context()
+    if tower_context:
+      # In a tower context, we update variable using the mean of value across
+      # towers.
+      def merge_fn(strategy, v, value):
+        value = strategy.reduce(
+            variable_scope.VariableAggregation.MEAN, value, v)
+        return strategy.update(v, update_fn, value)
+
+      return tower_context.merge_call(merge_fn, variable, value)
+    else:
+      strategy = distribution_strategy_context.get_cross_tower_context()
+      return strategy.update(variable, update_fn, value)
 
 
 def weighted_moving_average(value,
@@ -379,8 +392,6 @@ class ExponentialMovingAverage(object):
 
     Raises:
       TypeError: If the arguments are not an allowed type.
-      ValueError: If the moving average of one of the variables is already
-        being computed.
     """
     # TODO(touts): op_scope
     if var_list is None:
-- 
GitLab


From 5ac6e1e4b8318bad2f2bc7e5a08a58a7ed31e4c6 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <penporn@google.com>
Date: Fri, 5 Oct 2018 15:43:32 -0700
Subject: [PATCH 0465/1085] Removes the INTEL_MKL_ML_ONLY option from the
 CMakeLists build file since the main logic for INTEL_MKL_ML_ONLY is getting
 removed in PR#22783. #22783

PiperOrigin-RevId: 215978712
---
 tensorflow/contrib/cmake/CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index f675c135f4..60f53b8b75 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -352,9 +352,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT)
     list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES})
     list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination)
     include_directories(${mkldnn_INCLUDE_DIRS})
-  else (tensorflow_ENABLE_MKLDNN_SUPPORT)
-    add_definitions(-DINTEL_MKL_ML_ONLY)
-  endif()
+  endif(tensorflow_ENABLE_MKLDNN_SUPPORT)
 endif (tensorflow_ENABLE_MKL_SUPPORT)
 
 if (tensorflow_ENABLE_GPU)
-- 
GitLab


From 4aad5382f0e7148d8489d24d8355b828b3f7811b Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 5 Oct 2018 15:43:58 -0700
Subject: [PATCH 0466/1085] Internal change

PiperOrigin-RevId: 215978771
---
 tensorflow/contrib/lite/java/BUILD            | 95 ++++++++++++++-----
 tensorflow/contrib/lite/java/aar_with_jni.bzl |  5 +-
 .../org/tensorflow/lite/TensorFlowLite.java   | 20 +++-
 .../tensorflow/lite/InterpreterFlexTest.java  | 46 +++++++++
 .../org/tensorflow/lite/InterpreterTest.java  | 14 +++
 5 files changed, 153 insertions(+), 27 deletions(-)
 create mode 100644 tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java

diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD
index 098ba7e773..e68cd26f81 100644
--- a/tensorflow/contrib/lite/java/BUILD
+++ b/tensorflow/contrib/lite/java/BUILD
@@ -11,6 +11,10 @@ load("//tensorflow/java:build_defs.bzl", "JAVACOPTS")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary")
 load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni")
 
+JAVA_SRCS = glob([
+    "src/main/java/org/tensorflow/lite/*.java",
+])
+
 # Building tensorflow-lite.aar including 4 variants of .so
 # To build an aar for release, run below command:
 # bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \
@@ -20,28 +24,38 @@ aar_with_jni(
     android_library = ":tensorflowlite",
 )
 
+# EXPERIMENTAL: AAR target that supports TensorFlow op execution with TFLite.
+aar_with_jni(
+    name = "tensorflow-lite-flex",
+    android_library = ":tensorflowlite_flex",
+)
+
 android_library(
     name = "tensorflowlite",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
+    manifest = "AndroidManifest.xml",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":tensorflowlite_native",
+        "@org_checkerframework_qual",
+    ],
+)
+
+# EXPERIMENTAL: Android target that supports TensorFlow op execution with TFLite.
+android_library(
+    name = "tensorflowlite_flex",
+    srcs = JAVA_SRCS,
     manifest = "AndroidManifest.xml",
     visibility = ["//visibility:public"],
     deps = [
-        ":tflite_runtime",
+        ":tensorflowlite_native_flex",
         "@org_checkerframework_qual",
     ],
 )
 
 android_library(
     name = "tensorflowlite_java",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
     visibility = ["//visibility:public"],
     deps = [
         "@org_checkerframework_qual",
@@ -50,16 +64,23 @@ android_library(
 
 java_library(
     name = "tensorflowlitelib",
-    srcs = glob(
-        [
-            "src/main/java/org/tensorflow/lite/*.java",
-        ],
-    ),
+    srcs = JAVA_SRCS,
     javacopts = JAVACOPTS,
     visibility = ["//visibility:public"],
     deps = [
         ":libtensorflowlite_jni.so",
-        "//tensorflow/contrib/lite/java/src/main/native",
+        "@org_checkerframework_qual",
+    ],
+)
+
+# EXPERIMENTAL: Java target that supports TensorFlow op execution with TFLite.
+java_library(
+    name = "tensorflowlitelib_flex",
+    srcs = JAVA_SRCS,
+    javacopts = JAVACOPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":libtensorflowlite_flex_jni.so",
         "@org_checkerframework_qual",
     ],
 )
@@ -72,7 +93,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.TensorFlowLiteTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -87,7 +107,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.DataTypeTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -110,7 +129,6 @@ java_test(
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest",
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
@@ -125,19 +143,37 @@ java_test(
     data = [
         "src/testdata/add.bin",
         "src/testdata/mobilenet.tflite.bin",
+        "//tensorflow/contrib/lite:testdata/multi_add_flex.bin",
     ],
     javacopts = JAVACOPTS,
     tags = ["no_oss"],
     test_class = "org.tensorflow.lite.InterpreterTest",
     visibility = ["//visibility:private"],
     deps = [
-        ":libtensorflowlite_jni.so",
         ":tensorflowlitelib",
         "@com_google_truth",
         "@junit",
     ],
 )
 
+java_test(
+    name = "InterpreterFlexTest",
+    size = "small",
+    srcs = ["src/test/java/org/tensorflow/lite/InterpreterFlexTest.java"],
+    data = [
+        "//tensorflow/contrib/lite:testdata/multi_add_flex.bin",
+    ],
+    javacopts = JAVACOPTS,
+    tags = ["no_oss"],
+    test_class = "org.tensorflow.lite.InterpreterFlexTest",
+    visibility = ["//visibility:private"],
+    deps = [
+        ":tensorflowlitelib_flex",
+        "@com_google_truth",
+        "@junit",
+    ],
+)
+
 java_test(
     name = "TensorTest",
     size = "small",
@@ -164,14 +200,29 @@ filegroup(
 )
 
 cc_library(
-    name = "tflite_runtime",
+    name = "tensorflowlite_native",
     srcs = ["libtensorflowlite_jni.so"],
     visibility = ["//visibility:public"],
 )
 
+cc_library(
+    name = "tensorflowlite_native_flex",
+    srcs = ["libtensorflowlite_flex_jni.so"],
+    visibility = ["//visibility:public"],
+)
+
 tflite_jni_binary(
     name = "libtensorflowlite_jni.so",
     deps = [
         "//tensorflow/contrib/lite/java/src/main/native",
     ],
 )
+
+# EXPERIMENTAL: Native target that supports TensorFlow op execution with TFLite.
+tflite_jni_binary(
+    name = "libtensorflowlite_flex_jni.so",
+    deps = [
+        "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/java/src/main/native",
+    ],
+)
diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl
index 9d2aead266..360d622b1b 100644
--- a/tensorflow/contrib/lite/java/aar_with_jni.bzl
+++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl
@@ -30,7 +30,10 @@ EOF
         # In some platforms we don't have an Android SDK/NDK and this target
         # can't be built. We need to prevent the build system from trying to
         # use the target in that case.
-        tags = ["manual"],
+        tags = [
+            "manual",
+            "no_cuda_on_cpu_tap",
+        ],
     )
 
     native.genrule(
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
index 711638a9f9..d5447b3bf8 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
@@ -18,7 +18,8 @@ package org.tensorflow.lite;
 /** Static utility methods loading the TensorFlowLite runtime. */
 public final class TensorFlowLite {
 
-  private static final String LIBNAME = "tensorflowlite_jni";
+  private static final String PRIMARY_LIBNAME = "tensorflowlite_jni";
+  private static final String FALLBACK_LIBNAME = "tensorflowlite_flex_jni";
 
   private TensorFlowLite() {}
 
@@ -29,13 +30,24 @@ public final class TensorFlowLite {
    * Load the TensorFlowLite runtime C library.
    */
   static boolean init() {
+    Throwable primaryLibException;
     try {
-      System.loadLibrary(LIBNAME);
+      System.loadLibrary(PRIMARY_LIBNAME);
       return true;
     } catch (UnsatisfiedLinkError e) {
-      System.err.println("TensorFlowLite: failed to load native library: " + e.getMessage());
-      return false;
+      primaryLibException = e;
     }
+
+    try {
+      System.loadLibrary(FALLBACK_LIBNAME);
+      return true;
+    } catch (UnsatisfiedLinkError e) {
+      // If the fallback fails, log the error for the primary load instead.
+      System.err.println(
+          "TensorFlowLite: failed to load native library: " + primaryLibException.getMessage());
+    }
+
+    return false;
   }
 
   static {
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
new file mode 100644
index 0000000000..2791c3864b
--- /dev/null
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
@@ -0,0 +1,46 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.io.File;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link org.tensorflow.lite.Interpreter} that validate execution with models that
+ * have TensorFlow ops.
+ */
+@RunWith(JUnit4.class)
+public final class InterpreterFlexTest {
+
+  private static final File FLEX_MODEL_FILE =
+      new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+
+  /** Smoke test validating that flex model loading works when the flex delegate is linked. */
+  @Test
+  public void testFlexModel() throws Exception {
+    try (Interpreter interpreter = new Interpreter(FLEX_MODEL_FILE)) {
+      assertThat(interpreter.getInputTensorCount()).isEqualTo(4);
+      assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+      assertThat(interpreter.getOutputTensorCount()).isEqualTo(4);
+      assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
+      interpreter.run(new float[1], new float[1]);
+    }
+  }
+}
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index a98fca0132..f8b73c7cf3 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -43,6 +43,9 @@ public final class InterpreterTest {
   private static final File MOBILENET_MODEL_FILE =
       new File("tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin");
 
+  private static final File FLEX_MODEL_FILE =
+      new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin");
+
   @Test
   public void testInterpreter() throws Exception {
     Interpreter interpreter = new Interpreter(MODEL_FILE);
@@ -345,4 +348,15 @@ public final class InterpreterTest {
     interpreter.close();
     interpreter.close();
   }
+
+  /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */
+  @Test
+  public void testFlexModel() throws Exception {
+    try {
+      new Interpreter(FLEX_MODEL_FILE);
+      fail();
+    } catch (IllegalStateException e) {
+      // Expected failure.
+    }
+  }
 }
-- 
GitLab


From 89c887558d8b0067213c39a79d5d048d3422b6dd Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 5 Oct 2018 16:02:49 -0700
Subject: [PATCH 0467/1085] [TF:XLA] Bump open source abseil revision to
 e821380d69a549dc64900693942789d21aa4df5e

PiperOrigin-RevId: 215981413
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b9ced1bd6c..6f5aa85b01 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -112,11 +112,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "507903ef9353cb25cccd0a6840048fdd348fd20e98314d694f04a990c0f277e3",
-        strip_prefix = "abseil-cpp-f21d187b80e3b7f08fb279775ea9c8b48c636030",
+        sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82",
+        strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/f21d187b80e3b7f08fb279775ea9c8b48c636030.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
         ],
     )
 
-- 
GitLab


From 1daaf0fabee1c59af00e14f358d08ac9f5390b9f Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 5 Oct 2018 16:32:30 -0700
Subject: [PATCH 0468/1085] Orders non-resource-affecting stateful ops in
 defuns.

PiperOrigin-RevId: 215985679
---
 tensorflow/python/eager/function.py                |  7 +++++++
 tensorflow/python/kernel_tests/logging_ops_test.py | 13 +++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 2750461fb2..f06148b5d2 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1906,8 +1906,10 @@ class AutomaticControlDependencies(object):
               last_op_using_resource_tensor[inp] = op
         ops_which_must_run = set([op])
         continue
+      found_resource = False
       for inp in op.inputs:
         if inp.dtype == dtypes_module.resource:
+          found_resource = True
           # Deal with switches, finally.
           if inp.op.type == "Switch":
             self._process_switch(inp.op, ops_which_must_run,
@@ -1922,6 +1924,11 @@ class AutomaticControlDependencies(object):
           if inp in merge_for_resource:
             merge_for_resource[inp]._add_control_input(op)  # pylint: disable=protected-access
           last_op_using_resource_tensor[inp] = op
+      if (op.op_def.is_stateful and not found_resource
+          and op._control_flow_context is None):  # pylint: disable=protected-access
+        if None in last_op_using_resource_tensor:
+          op._add_control_input(last_op_using_resource_tensor[None])  # pylint: disable=protected-access
+        last_op_using_resource_tensor[None] = op
       control_inputs = [c for c in control_inputs
                         if c._control_flow_context is op._control_flow_context]  # pylint: disable=protected-access
       op._add_control_inputs(control_inputs)  # pylint: disable=protected-access
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index 4beddd00bb..2f19ecc0e6 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -306,6 +306,19 @@ class PrintV2Test(test.TestCase):
           logging_ops.print_v2(tensor)
         self.assertTrue((expected + "\n") in printed.contents())
 
+  def testPrintsOrderedInDefun(self):
+    with context.eager_mode():
+
+      @function.defun
+      def prints():
+        logging_ops.print_v2("A")
+        logging_ops.print_v2("B")
+        logging_ops.print_v2("C")
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        prints()
+      self.assertTrue(("A\nB\nC\n") in printed.contents())
+
   @test_util.run_in_graph_and_eager_modes()
   def testPrintInDefunWithoutExplicitEvalOfPrint(self):
     @function.defun
-- 
GitLab


From 29af23aeadd1d6fccbfa4223b58dad8f5b8df4f8 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 5 Oct 2018 16:47:07 -0700
Subject: [PATCH 0469/1085] Fix api_compatibility_test diff for large files.
 assertEqual might be applied instead of assertMultiLineEqual if input is too
 large (https://bugs.python.org/issue11763). This change is switching to use
 unified_diff in that case.

PiperOrigin-RevId: 215987656
---
 tensorflow/python/util/protobuf/compare.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/util/protobuf/compare.py b/tensorflow/python/util/protobuf/compare.py
index a0e6bf65cf..3a3af4bffa 100644
--- a/tensorflow/python/util/protobuf/compare.py
+++ b/tensorflow/python/util/protobuf/compare.py
@@ -63,6 +63,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import difflib
 
 import six
 
@@ -101,10 +102,19 @@ def assertProtoEqual(self, a, b, check_initialized=True,  # pylint: disable=inva
     if normalize_numbers:
       NormalizeNumberFields(pb)
 
-  self.assertMultiLineEqual(
-      text_format.MessageToString(a, descriptor_pool=pool),
-      text_format.MessageToString(b, descriptor_pool=pool),
-      msg=msg)
+  a_str = text_format.MessageToString(a, descriptor_pool=pool)
+  b_str = text_format.MessageToString(b, descriptor_pool=pool)
+
+  # Some Python versions would perform regular diff instead of multi-line
+  # diff if string is longer than 2**16. We substitute this behavior
+  # with a call to unified_diff instead to have easier-to-read diffs.
+  # For context, see: https://bugs.python.org/issue11763.
+  if len(a_str) < 2**16 and len(b_str) < 2**16:
+    self.assertMultiLineEqual(a_str, b_str, msg=msg)
+  else:
+    diff = '\n' + ''.join(difflib.unified_diff(a_str.splitlines(True),
+                                               b_str.splitlines(True)))
+    self.fail('%s : %s' % (msg, diff))
 
 
 def NormalizeNumberFields(pb):
-- 
GitLab


From 55081a9d21ab42834ac4fb70351e3d2ee13ef78b Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 5 Oct 2018 16:47:51 -0700
Subject: [PATCH 0470/1085] [XLA:GPU] Use a struct for the return value of
 CudnnConvolutionAlgorithmPicker::PickBestAlgorithm.

Using a struct lets us return additional data -- namely, the elapsed time to
run the best algo -- without adding a fourth entry to the tuple, which would be
confusing.

No functional change.

PiperOrigin-RevId: 215987795
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  1 +
 .../gpu/cudnn_convolution_algorithm_picker.cc | 40 ++++++++-----------
 .../gpu/cudnn_convolution_algorithm_picker.h  | 11 ++++-
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 522e9f5948..7b84f691f6 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -404,6 +404,7 @@ cc_library(
         "//tensorflow/core:stream_executor_no_cuda",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/time",
         "@com_google_absl//absl/types:optional",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 7125673887..590c0a7d54 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -145,7 +145,7 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
 // cache misses and doing extra work.  Overall, caching doesn't seem worth the
 // trouble, but we may want to revisit this if we ever find a model where
 // caching would speed up compilation a lot.
-StatusOr<std::tuple<int64, bool, int64>>
+StatusOr<CudnnConvolutionAlgorithmPicker::AutotuneResult>
 CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     HloCustomCallInstruction* instr) {
   // TODO(timshen): for now only check fp16. It can be expanded to other types,
@@ -316,9 +316,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
             << AlgorithmToString(best_result.algorithm()) << ", takes "
             << best_result.elapsed_time_in_ms() << "ms, and uses "
             << best_result_bytes_used << "B of scratch memory.";
-    return std::make_tuple(best_result.algorithm().algo_id(),
-                           best_result.algorithm().tensor_ops_enabled(),
-                           best_result_bytes_used);
+    return AutotuneResult{best_result.algorithm().algo_id(),
+                          best_result.algorithm().tensor_ops_enabled(),
+                          best_result_bytes_used,
+                          absl::Milliseconds(best_result.elapsed_time_in_ms())};
   }
 
   return InternalError(
@@ -331,37 +332,30 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
     HloInstruction* instr) {
   CHECK(IsCustomCallToDnnConvolution(*instr));
 
-  StatusOr<std::tuple<int64, bool, int64>> alg_scratch_and_tc =
+  StatusOr<AutotuneResult> best_algo_or =
       PickBestAlgorithm(Cast<HloCustomCallInstruction>(instr));
-
-  if (!alg_scratch_and_tc.ok()) {
-    LOG(ERROR) << alg_scratch_and_tc.status();
+  if (!best_algo_or.ok()) {
+    LOG(ERROR) << best_algo_or.status();
     return false;
   }
 
-  int64 algorithm;
-  bool tensor_ops_enabled;
-  int64 scratch_bytes;
-
-  std::tie(algorithm, tensor_ops_enabled, scratch_bytes) =
-      alg_scratch_and_tc.ConsumeValueOrDie();
-
-  VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and "
-          << NumBytesToString(scratch_bytes)
+  auto best_algo = std::move(best_algo_or).ValueOrDie();
+  VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm
+          << " and " << NumBytesToString(best_algo.scratch_bytes)
           << " of scratch memory: " << instr->ToString()
-          << " tensor_ops_enabled: " << tensor_ops_enabled;
+          << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled;
 
   // Replace instr with a new CustomCall which has the correct algorithm, and
   // whose output shape has the appropriate amount of scratch memory.
   HloComputation* computation = instr->parent();
-  Shape new_call_shape =
-      ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0),
-                                 ShapeUtil::MakeShape(U8, {scratch_bytes})});
+  Shape new_call_shape = ShapeUtil::MakeTupleShape(
+      {instr->shape().tuple_shapes(0),
+       ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})});
 
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
                       instr->backend_config<CudnnConvBackendConfig>());
-  backend_config.set_algorithm(algorithm);
-  backend_config.set_tensor_ops_enabled(tensor_ops_enabled);
+  backend_config.set_algorithm(best_algo.algorithm);
+  backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled);
 
   HloInstruction* new_call = computation->AddInstruction(
       instr->CloneWithNewOperands(new_call_shape, instr->operands()));
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
index aeda2fc7f8..136c32210a 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
 
+#include "absl/time/time.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
@@ -47,10 +48,16 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass {
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
+  struct AutotuneResult {
+    int64 algorithm;
+    bool tensor_ops_enabled;
+    int64 scratch_bytes;
+    absl::Duration runtime;
+  };
+
   StatusOr<bool> RunOnComputation(HloComputation* computation);
   StatusOr<bool> RunOnInstruction(HloInstruction* instr);
-  StatusOr<std::tuple<int64, bool, int64>> PickBestAlgorithm(
-      HloCustomCallInstruction* instr);
+  StatusOr<AutotuneResult> PickBestAlgorithm(HloCustomCallInstruction* instr);
 
   se::StreamExecutor* stream_exec_;                   // never null
   DeviceMemoryAllocator* allocator_;                  // may be null
-- 
GitLab


From ab97f1323bd2a98d20ed82dc3ff8585481961f0d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 16:59:39 -0700
Subject: [PATCH 0471/1085] Automated rollback of commit
 d258207f1583df4faa452265b051879af6c15dac

PiperOrigin-RevId: 215989111
---
 tensorflow/python/ops/array_ops.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 4be9c532f4..e3e4d5f910 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1407,8 +1407,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
         gen_array_ops.conjugate_transpose
         if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
     if perm is None:
-      rank = gen_array_ops.rank(a)
-      perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      a = ops.convert_to_tensor(a, name="a")
+      if not a.get_shape().ndims:
+        rank = gen_array_ops.rank(a)
+        perm = (rank - 1) - gen_math_ops._range(0, rank, 1)
+      else:
+        rank = a.get_shape().ndims
+        perm = (rank - 1) - np.arange(rank)
       ret = transpose_fn(a, perm, name=name)
       # NOTE(mrry): Setting the shape explicitly because
       #   reverse is not handled by the shape function.
-- 
GitLab


From 15d399cd8590c18dc643d979883fe4201c8ea631 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 5 Oct 2018 17:01:01 -0700
Subject: [PATCH 0472/1085] [tf.data vectorization] Feed inputs to vectorizers
 with notion of stackedness

PiperOrigin-RevId: 215989259
---
 .../optimizers/data/vectorization/BUILD       |  10 ++
 .../data/vectorization/cast_vectorizer.cc     |  16 +--
 .../data/vectorization/unpack_vectorizer.cc   |  16 +--
 .../data/vectorization/vectorizer.h           |  19 ++-
 .../data/vectorization/vectorizer_registry.cc |   2 -
 .../data/vectorization/vectorizer_registry.h  |  15 +--
 .../vectorization/vectorizer_registry_test.cc |  11 +-
 .../data/vectorization/wrapped_tensor.h       |  44 +++++++
 .../optimizers/data/vectorization_utils.cc    | 116 +++++++++---------
 9 files changed, 144 insertions(+), 105 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h

diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 37aa24b947..985d6c6c3a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -12,10 +12,20 @@ VECTORIZER_DEPS = [
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
 ] + tf_protos_all()
 
+cc_library(
+    name = "wrapped_tensor",
+    hdrs = ["wrapped_tensor.h"],
+    deps = [
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "vectorizer",
     hdrs = ["vectorizer.h"],
     deps = [
+        ":wrapped_tensor",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
     ] + tf_protos_all(),
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
index 3af6bab409..f445157531 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/cast_vectorizer.cc
@@ -19,13 +19,13 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
+namespace {
 
 class CastVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* input_ports,
-                   std::vector<Port>* output_ports) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     Status s;
     if (node.num_inputs() != 1) {
       return errors::Internal("Cast op should only have one input.");
@@ -35,15 +35,17 @@ class CastVectorizer : public Vectorizer {
     auto new_cast_node = outer_scope->AddNode(node.def(), &s);
     TF_RETURN_IF_ERROR(s);
 
-    // Add input and output mappings
-    input_ports->push_back({new_cast_node, 0});
-    output_ports->push_back({new_cast_node, 0});
+    outer_scope->AddEdge(inputs[0].node, inputs[0].output_index, new_cast_node,
+                         0);
+
+    // Add output mappings
+    outputs->push_back({new_cast_node, 0, true});
     return Status::OK();
   }
 };
 
 REGISTER_VECTORIZER("Cast", CastVectorizer);
 
-}  // namespace vectorization_utils
+}  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
index 74ce520ce1..f1ba741821 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
@@ -19,15 +19,15 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
+namespace {
 
 class UnpackVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* input_ports,
-                   std::vector<Port>* output_ports) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     Status s;
-    if (node.num_inputs() != 1) {
+    if (node.num_inputs() != 1 || inputs.size() != 1) {
       return errors::Internal("Unpack op should only have one input.");
     }
 
@@ -39,13 +39,13 @@ class UnpackVectorizer : public Vectorizer {
     int new_axis = node.def().attr().at("axis").i() + 1;
     new_unpack_node->AddAttr("axis", new_axis);
 
-    // Add the input mappings
-    input_ports->push_back({new_unpack_node, 0});
+    outer_scope->AddEdge(inputs[0].node, inputs[0].output_index,
+                         new_unpack_node, 0);
 
     // Add the output mappings
     int num = node.def().attr().at("num").i();
     for (int i = 0; i < num; ++i) {
-      output_ports->push_back({new_unpack_node, i});
+      outputs->push_back({new_unpack_node, i, true});
     }
 
     return Status::OK();
@@ -54,6 +54,6 @@ class UnpackVectorizer : public Vectorizer {
 
 REGISTER_VECTORIZER("Unpack", UnpackVectorizer);
 
-}  // namespace vectorization_utils
+}  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
index 56eb88c95e..8d4676aae0 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer.h
@@ -18,15 +18,12 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
-
-// Describes a tensor with its operation Node and output position
-typedef std::pair<Node*, int> Port;
 
 // Interface for vectorization of TensorFlow operations. See `CastVectorizer`
 // for an example.
@@ -36,17 +33,17 @@ class Vectorizer {
 
   // Vectorizes an operation, `node`, by adding Node(s) to `outer_scope`
   // that produce the same vector output(s) as executing `node`'s op
-  // on elements of the vector inputs. The new Node(s) collectively have the
+  // on elements of `inputs`. The new Node(s) collectively have the
   // same number of input and output ports as the node being converted.
-  // Adds mappings for the new nodes' input and output ports to `inputs` and
-  // `outputs` respectively, where the i'th Port in inputs/outputs
-  // corresponds to the i'th input/output port of the node to be converted.
+  // Adds edges between the newly created nodes and nodes in `inputs`, and adds
+  // mappings to the new nodes' output ports to `outputs`, where the i'th
+  // value in `outputs` corresponds to the i'th output port of the node
+  // to be converted.
   virtual Status Vectorize(const Node& node, Graph* outer_scope,
-                           std::vector<Port>* input_ports,
-                           std::vector<Port>* output_ports) = 0;
+                           std::vector<WrappedTensor>&& inputs,
+                           std::vector<WrappedTensor>* outputs) = 0;
 };
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_VECTORIZER_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
index a6551e36ac..e1cf77a7d5 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.cc
@@ -19,7 +19,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 VectorizerRegistry* VectorizerRegistry::Global() {
   static VectorizerRegistry* registry = new VectorizerRegistry;
@@ -42,6 +41,5 @@ void VectorizerRegistry::Register(const string& op_type,
   vectorizers_.insert(std::pair<const string&, std::unique_ptr<Vectorizer>>(
       op_type, std::move(vectorizer)));
 }
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
index 16159d47ca..ad54c74933 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h
@@ -23,7 +23,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 // A global VectorizerRegistry is used to hold all the vectorizers.
 class VectorizerRegistry {
@@ -59,16 +58,12 @@ class VectorizerRegistration {
 #define REGISTER_VECTORIZER_UNIQ_HELPER(ctr, op_type, vectorizer) \
   REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)
 
-#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)                  \
-  static ::tensorflow::grappler::vectorization_utils::                      \
-      vectorizer_registration::VectorizerRegistration                       \
-          vectorizer_registration_##ctr(                                    \
-              op_type,                                                      \
-              ::std::unique_ptr<                                            \
-                  ::tensorflow::grappler::vectorization_utils::Vectorizer>( \
-                  new vectorizer()))
+#define REGISTER_VECTORIZER_UNIQ(ctr, op_type, vectorizer)                \
+  static ::tensorflow::grappler::vectorizer_registration::                \
+      VectorizerRegistration vectorizer_registration_##ctr(               \
+          op_type, ::std::unique_ptr<::tensorflow::grappler::Vectorizer>( \
+                       new vectorizer()))
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
index 663ceba027..054aeb9a8f 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry_test.cc
@@ -20,13 +20,12 @@ limitations under the License.
 
 namespace tensorflow {
 namespace grappler {
-namespace vectorization_utils {
 
 class TestVectorizer : public Vectorizer {
  public:
   Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<Port>* inputs,
-                   std::vector<Port>* outputs) override {
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
     return Status::OK();
   }
 };
@@ -43,10 +42,10 @@ TEST(TestVectorizer, TestTestVectorizer) {
   NodeDef node_def;
   Status s;
   Node* node = g.AddNode(node_def, &s);
-  std::vector<Port> inputs, outputs;
-  EXPECT_TRUE(vectorizer->Vectorize(*node, &g, &inputs, &outputs).ok());
+  std::vector<WrappedTensor> inputs, outputs;
+  EXPECT_TRUE(
+      vectorizer->Vectorize(*node, &g, std::move(inputs), &outputs).ok());
 }
 
-}  // namespace vectorization_utils
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h
new file mode 100644
index 0000000000..4439b4ab4e
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/wrapped_tensor.h
@@ -0,0 +1,44 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
+
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+namespace grappler {
+
+// Represents a tensor that has been vectorized.
+struct WrappedTensor {
+  Node* const node;
+  const int output_index;
+
+  // Whether the tensor is stacked, i.e. represents the results of applying
+  // the operation on all slices of the input, where each row i of the
+  // tensor corresponds to the op's output on slice i of the input. False
+  // if the tensor is not stacked, i.e. represents the result of the op on
+  // a single slice of the input, where the result does not vary between
+  // slices.
+  bool stacked;
+
+  WrappedTensor(Node* node, int output_index, bool stacked)
+      : node(node), output_index(output_index), stacked(stacked) {}
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_VECTORIZATION_WRAPPED_TENSOR_H_
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 344c420902..ba857ab5d9 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -45,22 +45,6 @@ namespace {
 // Describes a tensor with its operation Node and output position
 typedef std::pair<Node*, int> TensorDesc;
 
-// Equivalent to python Pfor's WrappedTensor struct
-struct WrappedTensor {
-  TensorDesc tensor;
-
-  // Whether the tensor is stacked, i.e. represents the results of applying
-  // the operation on all slices of the input, where each row i of the
-  // tensor corresponds to the op's output on slice i of the input. False
-  // if the tensor is not stacked, i.e. represents the result of the op on
-  // a single slice of the input, where the result does not vary between
-  // slices.
-  bool stacked;
-
-  WrappedTensor(TensorDesc&& tensor, bool stacked)
-      : tensor(std::move(tensor)), stacked(stacked) {}
-};
-
 const char* const kRetValOp = "_Retval";
 
 void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
@@ -239,34 +223,48 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
     return errors::Unimplemented("No vectorizer registered for op: ",
                                  op_node->type_string());
   }
-  std::vector<Port> input_ports, output_ports;
-  input_ports.reserve(op_node->num_inputs());
-  output_ports.reserve(op_node->num_outputs());
-  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
-                                           &input_ports, &output_ports));
+  std::vector<WrappedTensor> inputs, outputs;
+  inputs.reserve(op_node->num_inputs());
+  outputs.reserve(op_node->num_outputs());
 
   std::vector<const Edge*> input_edges;
   TF_RETURN_IF_ERROR(op_node->input_edges(&input_edges));
 
-  if (op_node->num_outputs() != output_ports.size() ||
-      op_node->num_inputs() != input_ports.size() ||
-      input_edges.size() != input_ports.size()) {
-    return errors::Internal("Vectorizer inputs/outputs don't match.");
-  }
-
-  // Promote the inputs of the op to MapDefun outputs and connect the edges
-  // accordingly.
+  // The inputs for the node to be converted may already have been converted
+  // themselves. For those that are not, we promote them to MapDefun outputs.
   for (size_t i = 0; i < op_node->num_inputs(); ++i) {
     auto edge = input_edges[i];
-    TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_,
-                                         {edge->src(), edge->src_output()}));
-    outer_scope_->AddEdge(map_defun_node_, map_defun_fn_->ret_nodes.size() - 1,
-                          input_ports[i].first, input_ports[i].second);
+    if (auto found = gtl::FindOrNull(conversion_map_,
+                                     {edge->src(), edge->src_output()})) {
+      inputs.push_back(*found);
+    } else {
+      // TODO(rachelim): Handle the case where unconverted inputs are unstacked.
+      // We assume that all unconverted inputs will be stacked, since we
+      // converted all unstacked nodes in `Initialize`. However, it's actually
+      // possible that yet-unconverted nodes may produce unstacked outputs after
+      // they are vectorized. (For example, see the "Shape" converter in
+      // tensorflow/python/ops/parallel_for/pfor.py). If a vectorizer expects
+      // an unstacked input but receives a stacked one, vectorizer->Vectorize
+      // will return an error.
+      TF_RETURN_IF_ERROR(AddMapDefunOutput(map_defun_fn_.get(), map_defun_node_,
+                                           {edge->src(), edge->src_output()}));
+      int output_index = map_defun_fn_->ret_nodes.size() - 1;
+      inputs.push_back({map_defun_node_, output_index, true});
+    }
+  }
+
+  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
+                                           std::move(inputs), &outputs));
+
+  if (op_node->num_outputs() != outputs.size()) {
+    return errors::Internal(
+        "Number of vectorizer outputs does not match. Expected: ",
+        op_node->num_outputs(), " Actual: ", outputs.size());
   }
 
   // Add output mappings.
   for (size_t i = 0; i < op_node->num_outputs(); ++i) {
-    conversion_map_.insert({{op_node, i}, {std::move(output_ports[i]), true}});
+    conversion_map_.insert({{op_node, i}, outputs[i]});
   }
 
   return Status::OK();
@@ -281,25 +279,22 @@ Status Vectorization::ConvertOutput(int output_position) {
 
   TensorDesc output({ret_edge->src(), ret_edge->src_output()});
   TensorDesc converted_output;
-  if (auto found = gtl::FindOrNull(conversion_map_, output)) {
-    // It's possible the output already has a mapping, if it comes from a node
-    // that has already been converted.
-    if (found->stacked) {
-      converted_output = found->tensor;
-    } else {
-      // Some outputs may be unstacked if they don't derive from arg nodes
-      // (for example, if a function returns a constant). For these, we
-      // have to add extra nodes to tile it in the 0th dimension.
-      TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
-    }
-  } else {
-    // Note: All unstacked nodes are converted ahead of time in `Initialize`,
-    // and here we assume that all op vectorizers create only stacked outputs.
-    // This may not hold in the future, as more vectorizers are added that
-    // may actually create unstacked outputs. For example, see the `Shape`
-    // converter in third_party/tensorflow/python/ops/parallel_for/pfor.py
+
+  // It's possible the output already has a mapping, if it comes from a node
+  // that has already been converted.
+  auto found = gtl::FindOrNull(conversion_map_, output);
+  if (!found) {
     TF_RETURN_IF_ERROR(AddConversionMapping(output.first));
-    converted_output = conversion_map_.at(output).tensor;
+    found = &conversion_map_.at(output);
+  }
+
+  if (found->stacked) {
+    converted_output = {found->node, found->output_index};
+  } else {
+    // Some outputs may be unstacked if they don't derive from arg nodes
+    // (for example, if a function returns a constant). For these, we
+    // have to add extra nodes to tile it in the 0th dimension.
+    TF_RETURN_IF_ERROR(StackTensor(found, &converted_output));
   }
 
   ReplaceEdgeSources({map_defun_node_, output_position}, converted_output,
@@ -455,7 +450,7 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 
   Node* ones_shape;
   TF_RETURN_IF_ERROR(node_builder("Shape")
-                         .Input(unstacked->tensor.first)  // input
+                         .Input(unstacked->node)  // input
                          .Finalize(g, &ones_shape));
 
   Node* ones;
@@ -473,8 +468,8 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 
   Node* expand_dims;
   TF_RETURN_IF_ERROR(node_builder("ExpandDims")
-                         .Input(unstacked->tensor.first)  // input
-                         .Input(const_0)                  // dim
+                         .Input(unstacked->node)  // input
+                         .Input(const_0)          // dim
                          .Finalize(g, &expand_dims));
 
   TF_RETURN_IF_ERROR(node_builder("Tile")
@@ -491,11 +486,11 @@ Status Vectorization::AddArgNodeMappings() {
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {{input_node, 0}, true}});
+    conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}});
 
     // Control inputs
     conversion_map_.insert({{arg_node, Graph::kControlSlot},
-                            {{input_node, Graph::kControlSlot}, true}});
+                            {input_node, Graph::kControlSlot, true}});
   }
   return Status::OK();
 }
@@ -541,7 +536,7 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
 
     if (auto found = gtl::FindOrNull(conversion_map_,
                                      {edge->src(), edge->src_output()})) {
-      outer_scope_->AddEdge(found->tensor.first, found->tensor.second, node,
+      outer_scope_->AddEdge(found->node, found->output_index, node,
                             edge->dst_input());
     } else {
       status->Update(errors::Internal(
@@ -552,11 +547,10 @@ bool Vectorization::AddUnstackedNodeMappingsHelper(TensorDesc&& tensor,
 
   // Add output mappings
   for (int i = 0; i < tensor.first->num_outputs(); ++i) {
-    conversion_map_.insert(
-        {{tensor.first, i}, WrappedTensor({node, i}, false)});
+    conversion_map_.insert({{tensor.first, i}, WrappedTensor(node, i, false)});
   }
   conversion_map_.insert({{tensor.first, Graph::kControlSlot},
-                          WrappedTensor({node, Graph::kControlSlot}, false)});
+                          WrappedTensor(node, Graph::kControlSlot, false)});
 
   return true;
 }
-- 
GitLab


From 4831740f90eaf266a99d3ffa7d390d54325b689f Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 5 Oct 2018 17:05:17 -0700
Subject: [PATCH 0473/1085] [XLA:GPU] Remove hidden flag for disabling
 heuristic layout assignment.

Heuristic NCHW/NHWC layout assignment works great; we've never had to flip this
flag.  Might as well remove it and simplify things a bit.

PiperOrigin-RevId: 215989807
---
 tensorflow/compiler/xla/service/gpu/BUILD     | 11 -------
 .../xla/service/gpu/gpu_layout_assignment.cc  | 11 ++-----
 .../compiler/xla/service/gpu/gpu_options.cc   | 28 ----------------
 .../compiler/xla/service/gpu/gpu_options.h    | 33 -------------------
 4 files changed, 2 insertions(+), 81 deletions(-)
 delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.cc
 delete mode 100644 tensorflow/compiler/xla/service/gpu/gpu_options.h

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 7b84f691f6..350fd32537 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -781,7 +781,6 @@ cc_library(
     srcs = ["gpu_layout_assignment.cc"],
     hdrs = ["gpu_layout_assignment.h"],
     deps = [
-        ":gpu_options",
         ":ir_emission_utils",
         ":stream_executor_util",
         "//tensorflow/compiler/xla:shape_util",
@@ -882,16 +881,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "gpu_options",
-    srcs = ["gpu_options.cc"],
-    hdrs = ["gpu_options.h"],
-    deps = [
-        "//tensorflow/compiler/xla/service:hlo_module_config",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
 cc_library(
     name = "stream_executor_util",
     srcs = ["stream_executor_util.cc"],
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 74352f26aa..1ffe855750 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <memory>
 
 #include "tensorflow/compiler/xla/layout_util.h"
-#include "tensorflow/compiler/xla/service/gpu/gpu_options.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -125,14 +124,8 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall(
     DataLayout input;
     FilterLayout filter;
     DataLayout output;
-    if (ConvUseLayoutHeuristic(instr->GetModule()->config())) {
-      std::tie(input, filter, output) =
-          HeuristicLayoutAssignment(instr, stream_executor_);
-    } else {
-      input = DataLayout::kBatchDepthYX;
-      filter = FilterLayout::kOutputInputYX;
-      output = DataLayout::kBatchDepthYX;
-    }
+    std::tie(input, filter, output) =
+        HeuristicLayoutAssignment(instr, stream_executor_);
 
     TF_ASSIGN_OR_RETURN(
         std::tie(*input_shape->mutable_layout(),
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/compiler/xla/service/gpu/gpu_options.cc
deleted file mode 100644
index 35b4b4e20b..0000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_options.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/gpu/gpu_options.h"
-#include "tensorflow/core/lib/gtl/map_util.h"
-
-namespace xla {
-namespace gpu {
-
-bool ConvUseLayoutHeuristic(const HloModuleConfig& config) {
-  return !config.debug_options().xla_backend_extra_options().count(
-      "xla_gpu_experimental_conv_disable_layout_heuristic");
-}
-
-}  // namespace gpu
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/compiler/xla/service/gpu/gpu_options.h
deleted file mode 100644
index 498d4a9495..0000000000
--- a/tensorflow/compiler/xla/service/gpu/gpu_options.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-
-#include "tensorflow/compiler/xla/service/hlo_module_config.h"
-
-// Helper functions for querying options that are specific to the GPU backend.
-
-namespace xla {
-namespace gpu {
-
-// Returns true if we should use heuristics to assign convolution layouts, as
-// opposed to always assigning NCHW.
-bool ConvUseLayoutHeuristic(const HloModuleConfig& config);
-
-}  // namespace gpu
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_
-- 
GitLab


From 213d76a6ed77a696883502c53a3a4f81d2ee4042 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 5 Oct 2018 17:34:30 -0700
Subject: [PATCH 0474/1085] Simply the logic for bubbling captured tensors when
 building cond_v2 grad. The current logic tries to bubble the forward pass
 tensor to the outermost graph. That might not always be do-able e.g. when the
 cond is inside a while loop it will need to know accumulator logic for
 while_loop. So instead, the cond_grad now captures tensors from the forward
 If op's graph. When the grad If op is built these tensors will be
 appropriately captured by the surrounding FuncGraph.

PiperOrigin-RevId: 215993009
---
 .../kernel_tests/control_flow_ops_py_test.py  |  6 +--
 tensorflow/python/ops/cond_v2_impl.py         | 48 ++++++++-----------
 2 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 7fae5249aa..baea5c0f6d 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -661,8 +661,7 @@ class ControlFlowTest(test.TestCase):
       sess.run(r)
 
   def testCondGrad_1(self):
-    graph = ops.Graph()
-    with graph.as_default():
+    with self.cached_session():
       x = constant_op.constant(10.0, name="x")
       pred = math_ops.less(1, 2)
       fn1 = lambda: array_ops.identity(x)
@@ -670,8 +669,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
 
       grad = gradients_impl.gradients(r, [x])[0]
-      with self.cached_session():
-        self.assertAllEqual(1.0, grad.eval())
+      self.assertAllEqual(1.0, grad.eval())
 
   def testCondGrad_2(self):
     with self.cached_session():
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index 195ad11c71..c9aa4d4889 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -282,9 +282,10 @@ def _resolve_grad_inputs(cond_graph, grad_graph):
      as is.
   2. Tensors in the forward pass graph. These tensors may not be "live"
      when the gradient is being computed. We replace such references by their
-     corresponding tensor in the least common ancestor graph of `grad_graph` and
-     `cond_graph`. Since we export intermediate tensors for all branch
-     functions, this is always possible.
+     corresponding tensor in `cond_graph.outer_graph`. In the case of nested
+     control flow or functions, the gradient logic handling
+     `grad_graph.outer_graph` will make sure the tensor from
+     `cond_graph.outer_graph` is also correctly captured.
 
   Args:
     cond_graph: function.FuncGraph. The forward-pass function.
@@ -296,24 +297,23 @@ def _resolve_grad_inputs(cond_graph, grad_graph):
   new_inputs = []
 
   for t in grad_graph.external_captures:
+    # `t` must either be in `grad_graph.outer_graph` or in the forward
+    # `cond_graph`.
     if t.graph != grad_graph.outer_graph:
-      # `t` is a tensor in `cond_graph` or one of its ancestors. We bubble this
-      # tensor to the least common ancestor of the `cond_graph` and
-      # `grad_graph` so that it is "in-scope" for `grad_graph`.
-      # TODO(srbs): `_is_ancestor` calls may be expensive. Compute the least
-      # common ancestor once and re-use.
-      assert _is_ancestor(cond_graph, t.graph)
-      while not _is_ancestor(grad_graph, t.graph):
-        assert isinstance(t.graph, _function.FuncGraph)
-        if t in t.graph.internal_captures:
-          # TODO(srbs): Consider building a map of internal_captures ->
-          # external_captures instead of searching for `t` twice.
-          t = t.graph.external_captures[t.graph.internal_captures.index(t)]
-        else:
-          # Note: All intermediate tensors are output by the If op.
-          # TODO(srbs): .index() calls may be expensive. Optimize.
-          t = t.graph._if.outputs[t.graph.outputs.index(t)]
-      assert _is_ancestor(grad_graph, t.graph)
+      assert t.graph == cond_graph
+      # `internal_captures` are not treated as intermediates and hence not added
+      # to If op outputs. So we get the outer tensor corresponding to those
+      # from the list of `external_captures`.
+      try:
+        t = t.graph._if.outputs[t.graph.outputs.index(t)]
+      except ValueError:
+        index = t.graph.internal_captures.index(t)
+        t = t.graph.external_captures[index]
+
+      # Note: We rely on the capturing logic of the gradient If op graph to
+      # correctly capture the tensors in `cond_graph.outer_graph`. Both cond_v2
+      # and while_v2 handle this while building their gradient functions.
+      assert t.graph == cond_graph.outer_graph
     new_inputs.append(t)
 
   return new_inputs
@@ -492,11 +492,3 @@ def _get_output_shapes(true_graph_outputs, false_graph_outputs):
       for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
   ]
   return output_shapes
-
-
-def _is_ancestor(graph, maybe_ancestor):
-  if maybe_ancestor == graph:
-    return True
-  if isinstance(graph, _function.FuncGraph):
-    return _is_ancestor(graph.outer_graph, maybe_ancestor)
-  return False
-- 
GitLab


From 1484bad99cfd46cb63a839643cfce917b6f0cdd8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 19:18:32 -0700
Subject: [PATCH 0475/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216000752
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 224 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  59 +++++
 2 files changed, 283 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 780c6f6448..0753316724 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -70896,6 +70896,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessRandomUniform"
   input_arg {
@@ -70993,6 +71049,118 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomUniform"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
+op {
+  name: "StatelessRandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
@@ -71090,6 +71258,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessTruncatedNormal"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessWhile"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 0d8997c1bd..14cc9df9a2 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -32978,6 +32978,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -33033,6 +33034,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
@@ -33065,6 +33067,62 @@ op {
     }
   }
 }
+op {
+  name: "StatelessRandomUniformInt"
+  input_arg {
+    name: "shape"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "seed"
+    type_attr: "Tseed"
+  }
+  input_arg {
+    name: "minval"
+    type_attr: "dtype"
+  }
+  input_arg {
+    name: "maxval"
+    type_attr: "dtype"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "dtype"
+  }
+  attr {
+    name: "dtype"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tseed"
+    type: "type"
+    default_value {
+      type: DT_INT64
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "StatelessTruncatedNormal"
   input_arg {
@@ -33088,6 +33146,7 @@ op {
     allowed_values {
       list {
         type: DT_HALF
+        type: DT_BFLOAT16
         type: DT_FLOAT
         type: DT_DOUBLE
       }
-- 
GitLab


From 45f594a0bce42787356700c0e20f5fbc47193fa3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 5 Oct 2018 19:45:59 -0700
Subject: [PATCH 0476/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216001984

---
 tensorflow/go/op/wrappers.go | 712 +++++++++++++++++------------------
 1 file changed, 356 insertions(+), 356 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index a7bbb80c82..5d17605e37 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -9640,36 +9640,6 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...
 	return op.Output(0)
 }
 
-// Returns the element-wise sum of a list of tensors.
-//
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
-//
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-//
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
-//
-// Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // RandomShuffleAttr is an optional argument to RandomShuffle.
 type RandomShuffleAttr func(optionalAttr)
 
@@ -10383,206 +10353,65 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.
 	return scope.AddOperation(opspec)
 }
 
-// Encode audio data using the WAV file format.
-//
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
-//
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
-//
-// Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
-//
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeWav",
-		Input: []tf.Input{
-			audio, sample_rate,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes atan of x element-wise.
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Atan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
-
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AdaMax algorithm.
+// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+// is alive, any other request to use `MutexLock` with this mutex will wait.
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+// This is particularly useful for creating a critical section when used in
+// conjunction with `MutexLockIdentity`:
 //
-// Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
-		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
-
-// AssertSummarize sets the optional summarize attribute to value.
+// ```python
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Asserts that the given condition is true.
+// mutex = mutex_v2(
+//   shared_name=handle_name, container=container, name=name)
 //
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// def execute_in_critical_section(fn, *args, **kwargs):
+//   lock = gen_resource_variable_ops.mutex_lock(mutex)
 //
-// Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
+//   with ops.control_dependencies([lock]):
+//     r = fn(*args, **kwargs)
 //
-// Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Assert",
-		Input: []tf.Input{
-			condition, tf.OutputList(data),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Split a `SparseTensor` into `num_split` tensors along one dimension.
+//   with ops.control_dependencies(nest.flatten(r)):
+//     with ops.colocate_with(mutex):
+//       ensure_lock_exists = mutex_lock_identity(lock)
 //
-// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
-// For example, if `split_dim = 1` and `num_split = 2` and the input is
+//     # Make sure that if any element of r is accessed, all of
+//     # them are executed together.
+//     r = nest.map_structure(tf.identity, r)
 //
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
+//   with ops.control_dependencies([ensure_lock_exists]):
+//     return nest.map_structure(tf.identity, r)
+// ```
 //
-// Graphically the output tensors are:
+// While `fn` is running in the critical section, no other functions which wish to
+// use this critical section may run.
 //
-//     output_tensor[0] = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
+// Often the use case is that two executions of the same graph, in parallel,
+// wish to run `fn`; and we wish to ensure that only one of them executes
+// at a time.  This is especially important if `fn` modifies one or more
+// variables at a time.
 //
-//     output_tensor[1] = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
+// It is also useful if two separate functions must share a resource, but we
+// wish to ensure the usage is exclusive.
 //
 // Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[0, rank(shape))`.
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
-//	num_split: The number of ways to split.
+//	mutex: The mutex resource to lock.
 //
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
+// Returns A tensor that keeps a shared pointer to a lock on the mutex;
+// when the Tensor is destroyed, the use count on the shared pointer is decreased
+// by 1.  When it reaches 0, the lock is released.
+func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "SparseSplit",
+		Type: "MutexLock",
 		Input: []tf.Input{
-			split_dim, indices, values, shape,
+			mutex,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	return output_indices, output_values, output_shape
+	return op.Output(0)
 }
 
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
@@ -11611,89 +11440,321 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringToNumber",
+		Type: "StringToNumber",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
+type ResourceApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+//
+// This Op does not require `a_indices` be sorted in standard lexicographic order.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+//	b: `ndims`-D Tensor.  With shape `a_shape`.
+func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Encode audio data using the WAV file format.
+//
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
+//
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+//
+// Arguments:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
+//
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeWav",
+		Input: []tf.Input{
+			audio, sample_rate,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes atan of x element-wise.
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
+
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdaMax",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
+
+// AssertSummarize sets the optional summarize attribute to value.
+//
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Asserts that the given condition is true.
+//
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
+//
+// Arguments:
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
+//
+// Returns the created operation.
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Assert",
 		Input: []tf.Input{
-			string_tensor,
+			condition, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
-type ResourceApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+// Split a `SparseTensor` into `num_split` tensors along one dimension.
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Ftrl-proximal scheme.
+// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
+// For example, if `split_dim = 1` and `num_split = 2` and the input is
 //
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
 //
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
+// Graphically the output tensors are:
 //
-//	lr_power: Scaling factor. Must be a scalar.
+//     output_tensor[0] = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
 //
-// Returns the created operation.
-func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+//     output_tensor[1] = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
+// `[0, rank(shape))`.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//	num_split: The number of ways to split.
+//
+// Returns A list of 1-D tensors represents the values of the output sparse
+// tensors.A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrlV2",
+		Type: "SparseSplit",
 		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+			split_dim, indices, values, shape,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	return output_indices, output_values, output_shape
 }
 
-// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+// Returns the element-wise sum of a list of tensors.
 //
-// This Op does not require `a_indices` be sorted in standard lexicographic order.
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
+//
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+//
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
 //
 // Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-//	b: `ndims`-D Tensor.  With shape `a_shape`.
-func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseAdd",
+		Type: "AccumulateNV2",
 		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
+			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -13925,67 +13986,6 @@ func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
-//
-// is alive, any other request to use `MutexLock` with this mutex will wait.
-//
-// This is particularly useful for creating a critical section when used in
-// conjunction with `MutexLockIdentity`:
-//
-// ```python
-//
-// mutex = mutex_v2(
-//   shared_name=handle_name, container=container, name=name)
-//
-// def execute_in_critical_section(fn, *args, **kwargs):
-//   lock = gen_resource_variable_ops.mutex_lock(mutex)
-//
-//   with ops.control_dependencies([lock]):
-//     r = fn(*args, **kwargs)
-//
-//   with ops.control_dependencies(nest.flatten(r)):
-//     with ops.colocate_with(mutex):
-//       ensure_lock_exists = mutex_lock_identity(lock)
-//
-//     # Make sure that if any element of r is accessed, all of
-//     # them are executed together.
-//     r = nest.map_structure(tf.identity, r)
-//
-//   with ops.control_dependencies([ensure_lock_exists]):
-//     return nest.map_structure(tf.identity, r)
-// ```
-//
-// While `fn` is running in the critical section, no other functions which wish to
-// use this critical section may run.
-//
-// Often the use case is that two executions of the same graph, in parallel,
-// wish to run `fn`; and we wish to ensure that only one of them executes
-// at a time.  This is especially important if `fn` modifies one or more
-// variables at a time.
-//
-// It is also useful if two separate functions must share a resource, but we
-// wish to ensure the usage is exclusive.
-//
-// Arguments:
-//	mutex: The mutex resource to lock.
-//
-// Returns A tensor that keeps a shared pointer to a lock on the mutex;
-// when the Tensor is destroyed, the use count on the shared pointer is decreased
-// by 1.  When it reaches 0, the lock is released.
-func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MutexLock",
-		Input: []tf.Input{
-			mutex,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // StringFormatAttr is an optional argument to StringFormat.
 type StringFormatAttr func(optionalAttr)
 
@@ -16807,26 +16807,6 @@ func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values
 	return op.Output(0), op.Output(1)
 }
 
-// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
-//
-// The Hurwitz zeta function is defined as:
-//
-//
-// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Zeta",
-		Input: []tf.Input{
-			x, q,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns a list of tensors with the same shapes and contents as the input
 //
 // tensors.
@@ -18873,6 +18853,26 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D
 	return op.Output(0)
 }
 
+// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+//
+// The Hurwitz zeta function is defined as:
+//
+//
+// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Zeta",
+		Input: []tf.Input{
+			x, q,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Inverse fast Fourier transform.
 //
 // Computes the inverse 1-dimensional discrete Fourier transform over the
@@ -22757,6 +22757,21 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output
 	return op.Output(0)
 }
 
+// Computes hyperbolic tangent of `x` element-wise.
+func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Tanh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the maximum along segments of a tensor.
 //
 // Read
@@ -22794,21 +22809,6 @@ func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.
 	return op.Output(0)
 }
 
-// Computes hyperbolic tangent of `x` element-wise.
-func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tanh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that skips `count` elements from the `input_dataset`.
 //
 // Arguments:
-- 
GitLab


From 7d3bfc143a74d8e49f138841a07f7f4693b0a911 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 5 Oct 2018 20:07:12 -0700
Subject: [PATCH 0477/1085] Add the plumbing for an autograph flag to defun.
 Disabled and experimental for now.

PiperOrigin-RevId: 216003028
---
 tensorflow/python/eager/BUILD       |  1 +
 tensorflow/python/eager/function.py | 61 +++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index d0c1a93118..cae809a7c3 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -251,6 +251,7 @@ py_library(
         "//tensorflow/python:gradients_impl",
         "//tensorflow/python:graph_to_function_def",
         "//tensorflow/python:util",
+        "//tensorflow/python/autograph",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:core",
         "//tensorflow/python/eager:execute",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f06148b5d2..bafe07de2b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -31,6 +31,7 @@ import six
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import function_pb2
+from tensorflow.python import autograph
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
@@ -877,7 +878,8 @@ def func_graph_from_py_func(name,
                             args,
                             kwargs,
                             signature=None,
-                            func_graph=None):
+                            func_graph=None,
+                            experimental_autograph=False):
   """Returns a `FuncGraph` generated from `python_func`.
 
   Args:
@@ -894,6 +896,8 @@ def func_graph_from_py_func(name,
       inputs.
     func_graph: Optional. An instance of FuncGraph. If provided, we will use
       this graph else a new one is built and returned.
+    experimental_autograph: whether to use autograph to compile `python_func`.
+      See https://www.tensorflow.org/guide/autograph for more information.
 
   Returns:
     A FuncGraph.
@@ -939,7 +943,17 @@ def func_graph_from_py_func(name,
 
     this_tape = tape.push_new_tape()
     try:
-      func_outputs = python_func(*func_args, **func_kwargs)
+      if experimental_autograph:
+        func_outputs = autograph.converted_call(
+            python_func,
+            autograph.ConversionOptions(
+                verbose=True,
+                recursive=True,
+                force_conversion=False,
+                strip_decorators=(defun,),
+                arg_types={}), *func_args, **func_kwargs)
+      else:
+        func_outputs = python_func(*func_args, **func_kwargs)
       # invariant: `func_outputs` contains only Tensors and `None`s.
       func_outputs = nest.map_structure(convert, func_outputs)
 
@@ -1035,7 +1049,8 @@ class PolymorphicFunction(object):
                python_function,
                name,
                input_signature=None,
-               attributes=None):
+               attributes=None,
+               experimental_autograph=False):
     """Initializes a polymorphic function.
 
     Args:
@@ -1045,7 +1060,10 @@ class PolymorphicFunction(object):
         specifying the input signature of this function. If `None`, a separate
         function is instantiated for each inferred input signature.
       attributes: dict, extra keyword arguments that will be added as attribute
-         of the function.
+        of the function.
+      experimental_autograph: whether to use autograph to compile
+        `python_function`. See https://www.tensorflow.org/guide/autograph for
+        more information.
 
     Raises:
       ValueError: if `input_signature` is not None and the `python_function`'s
@@ -1061,6 +1079,7 @@ class PolymorphicFunction(object):
       self._args_to_prepend = tuple()
       self._kwargs_to_include = {}
     self._name = name
+    self._experimental_autograph = experimental_autograph
     self._function_cache = collections.OrderedDict()
     self._function_attributes = attributes or {}
 
@@ -1286,8 +1305,13 @@ class PolymorphicFunction(object):
 
       if graph_function is None:
         graph_function = Function(
-            func_graph_from_py_func(self._name, self._python_function, args,
-                                    kwargs, self._input_signature),
+            func_graph_from_py_func(
+                self._name,
+                self._python_function,
+                args,
+                kwargs,
+                self._input_signature,
+                experimental_autograph=self._experimental_autograph),
             self._function_attributes)
         self._function_cache[cache_key] = graph_function
       return graph_function, [
@@ -1348,7 +1372,7 @@ def _validate_signature(signature):
                     "a possibly nested sequence of TensorSpec objects.")
 
 
-def defun(func=None, input_signature=None):
+def defun(func=None, input_signature=None, experimental_autograph=False):
   """Compiles a Python function into a callable TensorFlow graph.
 
   `defun` (short for "define function") trace-compiles a Python function
@@ -1657,6 +1681,10 @@ def defun(func=None, input_signature=None):
       function is instantiated for each inferred input signature.  If a
       signature is specified, every input to `func` must be a `Tensor`, and
       `func` cannot accept `**kwargs`.
+    experimental_autograph: Whether `func` should be compiled before
+      constructing the graph. See https://www.tensorflow.org/guide/autograph
+      for more information.
+
 
   Returns:
      If `func` is not None, returns a callable that will execute the compiled
@@ -1668,10 +1696,16 @@ def defun(func=None, input_signature=None):
     TypeError: If `input_signature` is neither `None` nor a sequence of
       `tf.contrib.eager.TensorSpec` objects.
   """
-  return defun_with_attributes(func=func, input_signature=input_signature)
+  return defun_with_attributes(
+      func=func,
+      input_signature=input_signature,
+      experimental_autograph=experimental_autograph)
 
 
-def defun_with_attributes(func=None, input_signature=None, attributes=None):
+def defun_with_attributes(func=None,
+                          input_signature=None,
+                          attributes=None,
+                          experimental_autograph=False):
   """Compiles a Python function into a callable TensorFlow graph.
 
   This function supports adding extra function attributes. See detailed
@@ -1686,6 +1720,7 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None):
       attributes. Currently only support primitive types as value, and only
       whitelisted attribute name is allowed. Unwhitelisted attribute name or
       unsupported value will result into ValueError.
+    experimental_autograph: same as defun()'s experimental_autograph.
 
   Returns:
     Same as the return value of defun, with attributes added to the function in
@@ -1702,8 +1737,12 @@ def defun_with_attributes(func=None, input_signature=None, attributes=None):
       name = "function"
     return tf_decorator.make_decorator(
         function,
-        PolymorphicFunction(function, name, input_signature=input_signature,
-                            attributes=attributes))
+        PolymorphicFunction(
+            function,
+            name,
+            input_signature=input_signature,
+            attributes=attributes,
+            experimental_autograph=experimental_autograph))
 
   # This code path is for the `foo = tfe.defun(foo, ...)` use case
   if func is not None:
-- 
GitLab


From d37204edfaad3c7cbd361687422d40e2b9bb4d87 Mon Sep 17 00:00:00 2001
From: Yicheng Fan <thunderfyc@gmail.com>
Date: Sat, 6 Oct 2018 11:19:10 +0800
Subject: [PATCH 0478/1085] Remove unused python import

---
 tensorflow/contrib/opt/python/training/adamax.py | 2 --
 tensorflow/python/training/adam.py               | 2 --
 2 files changed, 4 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py
index debb6d99af..cf5f6efd5f 100644
--- a/tensorflow/contrib/opt/python/training/adamax.py
+++ b/tensorflow/contrib/opt/python/training/adamax.py
@@ -23,8 +23,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.training import adam
 from tensorflow.python.training import training_ops
 
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 4b31fac6c3..ef515e2f65 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -23,8 +23,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
-- 
GitLab


From fb92d456476c36210cea3b76393f584a306f092b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 6 Oct 2018 02:01:17 -0700
Subject: [PATCH 0479/1085] compat: Update forward compatibility horizon to
 2018-10-06

PiperOrigin-RevId: 216021117
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 8f4e8e0b98..d85fb00414 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 5)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 7a138d9ccefdc587073c613e5eb3aee16d72f89e Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Fri, 5 Oct 2018 23:08:08 +0800
Subject: [PATCH 0480/1085] systemlibs: unbundle icu

---
 third_party/icu/BUILD.system                 | 25 ++++++++++++++++++++
 third_party/icu/workspace.bzl                |  1 +
 third_party/systemlibs/syslibs_configure.bzl |  1 +
 3 files changed, 27 insertions(+)
 create mode 100644 third_party/icu/BUILD.system

diff --git a/third_party/icu/BUILD.system b/third_party/icu/BUILD.system
new file mode 100644
index 0000000000..328e412a8c
--- /dev/null
+++ b/third_party/icu/BUILD.system
@@ -0,0 +1,25 @@
+licenses(["notice"])  # Apache 2.0
+
+filegroup(
+    name = "icu4c/LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "icu4j/main/shared/licenses/LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "common",
+    deps = [
+        ":icuuc",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "icuuc",
+    linkopts = ["-licuuc"],
+    visibility = ["//visibility:private"],
+)
diff --git a/third_party/icu/workspace.bzl b/third_party/icu/workspace.bzl
index bfebf4219b..a4f653e026 100644
--- a/third_party/icu/workspace.bzl
+++ b/third_party/icu/workspace.bzl
@@ -12,4 +12,5 @@ def repo():
             "https://github.com/unicode-org/icu/archive/release-62-1.tar.gz",
         ],
         build_file = "//third_party/icu:BUILD.bazel",
+        system_build_file = "//third_party/icu:BUILD.system",
     )
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index b03d3380d7..dbf4fd6e32 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -23,6 +23,7 @@ VALID_LIBS = [
     "gast_archive",
     "gif_archive",
     "grpc",
+    "icu",
     "jpeg",
     "jsoncpp_git",
     "lmdb",
-- 
GitLab


From 5c0a6bdfeb1848b0146a36706d921dde06ba160a Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Sat, 6 Oct 2018 10:04:16 -0700
Subject: [PATCH 0481/1085] [XLA] Add base and window dilation support to
 ReduceWindow

PiperOrigin-RevId: 216041507
---
 .../tf2xla/kernels/reduce_window_op.cc        | 21 +++++++-
 .../compiler/tf2xla/kernels/scan_ops.cc       |  3 +-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     |  2 +
 tensorflow/compiler/tf2xla/python/xla.py      |  6 +++
 tensorflow/compiler/xla/client/xla_builder.cc | 15 ++++--
 tensorflow/compiler/xla/client/xla_builder.h  |  6 +++
 .../xla/python/local_computation_builder.cc   |  5 +-
 .../xla/python/local_computation_builder.h    |  2 +
 tensorflow/compiler/xla/python/xla_client.py  | 25 ++++++++-
 .../xla/service/algebraic_simplifier.cc       |  6 +++
 .../compiler/xla/service/cpu/ir_emitter.cc    | 27 +++++++---
 .../xla/service/gpu/elemental_ir_emitter.cc   | 26 ++++++----
 .../xla/service/hlo_evaluator_test.cc         | 52 +++++++++++++++++++
 .../xla/service/hlo_evaluator_typed_visitor.h | 13 ++++-
 .../compiler/xla/tests/reduce_window_test.cc  | 12 ++++-
 15 files changed, 191 insertions(+), 30 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
index 8102faad28..8eee5b1299 100644
--- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc
@@ -40,10 +40,16 @@ class ReduceWindowOp : public XlaOpKernel {
 
     std::vector<int64> window_dimensions;
     std::vector<int64> window_strides;
+    std::vector<int64> base_dilations;
+    std::vector<int64> window_dilations;
     OP_REQUIRES_OK(context, context->ConstantInputAsIntVector(
                                 "window_dimensions", &window_dimensions));
     OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("window_strides",
                                                               &window_strides));
+    OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("base_dilations",
+                                                              &base_dilations));
+    OP_REQUIRES_OK(context, context->ConstantInputAsIntVector(
+                                "window_dilations", &window_dilations));
 
     const int rank = input_shape.dims();
     OP_REQUIRES(context, rank == window_dimensions.size(),
@@ -56,6 +62,16 @@ class ReduceWindowOp : public XlaOpKernel {
                     "The size of window_strides must be equal to the input "
                     "rank (",
                     window_strides.size(), " vs. ", rank, ")"));
+    OP_REQUIRES(context, rank == base_dilations.size(),
+                errors::InvalidArgument(
+                    "The size of base_dilations must be equal to the input "
+                    "rank (",
+                    base_dilations.size(), " vs. ", rank, ")"));
+    OP_REQUIRES(context, rank == window_dilations.size(),
+                errors::InvalidArgument(
+                    "The size of window_dilations must be equal to the input "
+                    "rank (",
+                    window_dilations.size(), " vs. ", rank, ")"));
 
     // Build the reducer function.
     XlaCompiler::Argument reducer_arg;
@@ -102,7 +118,8 @@ class ReduceWindowOp : public XlaOpKernel {
 
     xla::XlaOp output = xla::ReduceWindowWithGeneralPadding(
         context->Input(0), context->Input(1), *reducer.computation,
-        window_dimensions, window_strides, padding);
+        window_dimensions, window_strides, base_dilations, window_dilations,
+        padding);
     context->SetOutput(0, output);
   }
 
@@ -115,6 +132,8 @@ class ReduceWindowOp : public XlaOpKernel {
 REGISTER_XLA_OP(Name("XlaReduceWindow")
                     .CompileTimeConstInput("window_dimensions")
                     .CompileTimeConstInput("window_strides")
+                    .CompileTimeConstInput("base_dilations")
+                    .CompileTimeConstInput("window_dilations")
                     .CompileTimeConstInput("padding"),
                 ReduceWindowOp);
 
diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
index ab094d7dd1..57afd608de 100644
--- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc
@@ -104,7 +104,8 @@ class ScanOp : public XlaOpKernel {
     }
     auto output = xla::ReduceWindowWithGeneralPadding(
         XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init,
-        *reducer, window_dims, window_strides, padding);
+        *reducer, window_dims, window_strides,
+        /*base_dilations=*/{}, /*window_dilations=*/{}, padding);
     output =
         XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0));
 
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 557911553d..bd2c0a5ee8 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -283,6 +283,8 @@ REGISTER_OP("XlaReduceWindow")
     .Input("init_value: T")
     .Input("window_dimensions: Tindices")
     .Input("window_strides: Tindices")
+    .Input("base_dilations: Tindices")
+    .Input("window_dilations: Tindices")
     .Input("padding: Tindices")
     .Attr("T: numbertype")
     .Attr("Tindices: {int32, int64}")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index bc7924c371..5e86b5d8ec 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -320,6 +320,8 @@ def reduce_window(operand,
                   reducer,
                   window_dimensions,
                   window_strides=None,
+                  base_dilations=None,
+                  window_dilations=None,
                   padding=None,
                   name=None):
   """Wraps the XLA ReduceWindow operator.
@@ -343,12 +345,16 @@ def reduce_window(operand,
     A tensor that represents the output of the reduce_window operator.
   """
   window_strides = window_strides or [1] * len(window_dimensions)
+  base_dilations = base_dilations or [1] * len(window_dimensions)
+  window_dilations = window_dilations or [1] * len(window_dimensions)
   padding = padding or [(0, 0)] * len(window_dimensions)
   return gen_xla_ops.xla_reduce_window(
       input=operand,
       init_value=init,
       window_dimensions=window_dimensions,
       window_strides=window_strides,
+      base_dilations=base_dilations,
+      window_dilations=window_dilations,
       padding=padding,
       computation=reducer,
       name=name)
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index d196252db1..6b31831010 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1789,9 +1789,9 @@ XlaOp XlaBuilder::ReduceWindow(const XlaOp& operand, const XlaOp& init_value,
     std::vector<std::pair<int64, int64>> padding_values =
         MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions,
                     window_strides, padding);
-    return ReduceWindowWithGeneralPadding(operand, init_value, computation,
-                                          window_dimensions, window_strides,
-                                          padding_values);
+    return ReduceWindowWithGeneralPadding(
+        operand, init_value, computation, window_dimensions, window_strides,
+        /*base_dilations=*/{}, /*window_dilations=*/{}, padding_values);
   });
 }
 
@@ -1800,6 +1800,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
@@ -1810,7 +1812,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding(
                         computation.GetProgramShape());
     TF_ASSIGN_OR_RETURN(*instr.mutable_window(),
                         MakeWindow(window_dimensions, window_strides, padding,
-                                   /*lhs_dilation=*/{}, /*rhs_dilation=*/{}));
+                                   /*lhs_dilation=*/base_dilations,
+                                   /*rhs_dilation=*/window_dilations));
     TF_ASSIGN_OR_RETURN(
         *instr.mutable_shape(),
         ShapeInference::InferReduceWindowShape(operand_shape, init_shape,
@@ -2800,10 +2803,12 @@ XlaOp ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return operand.builder()->ReduceWindowWithGeneralPadding(
       operand, init_value, computation, window_dimensions, window_strides,
-      padding);
+      base_dilations, window_dilations, padding);
 }
 
 XlaOp CrossReplicaSum(const XlaOp& operand,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index cd0d5ca5d3..2e14e47a35 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -671,6 +671,8 @@ class XlaBuilder {
       const XlaComputation& computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64>> padding);
 
   // Returns the sum of the operand value within each subgroup of replicas. All
@@ -1245,6 +1247,8 @@ class XlaBuilder {
       const XlaComputation& computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64>> padding);
   friend XlaOp CrossReplicaSum(const XlaOp& operand,
                                absl::Span<const ReplicaGroup> replica_groups);
@@ -1818,6 +1822,8 @@ XlaOp ReduceWindowWithGeneralPadding(
     const XlaComputation& computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding);
 
 // Returns the sum of the operand value within each subgroup of replicas. All
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index cd5fd33029..ffa336f304 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -532,10 +532,13 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding(
     const LocalComputation& local_computation,
     absl::Span<const int64> window_dimensions,
     absl::Span<const int64> window_strides,
+    absl::Span<const int64> base_dilations,
+    absl::Span<const int64> window_dilations,
     absl::Span<const std::pair<int64, int64>> padding) {
   return xla::ReduceWindowWithGeneralPadding(
       operand.op(), init_value.op(), local_computation.computation(),
-      window_dimensions, window_strides, padding);
+      window_dimensions, window_strides, base_dilations, window_dilations,
+      padding);
 }
 
 LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu,
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 2166bb6721..43332e0abd 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -278,6 +278,8 @@ class LocalComputationBuilder {
       const LocalComputation& local_computation,
       absl::Span<const int64> window_dimensions,
       absl::Span<const int64> window_strides,
+      absl::Span<const int64> base_dilations,
+      absl::Span<const int64> window_dilations,
       absl::Span<const std::pair<int64, int64> > padding);
 
   LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma,
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index bb303c5678..f8197488fb 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -995,7 +995,30 @@ class ComputationBuilder(object):
         window_strides)
     return self._client.ReduceWindowWithGeneralPadding(
         operand, init_value, computation_to_apply.c_local_computation,
-        window_dimensions, window_strides, pads)
+        window_dimensions, window_strides, (), (), pads)
+
+  def ReduceWindowWithGeneralPadding(
+      self, operand, init_value, computation_to_apply, window_dimensions,
+      window_strides, base_dilations, window_dilations, padding):
+    """Enqueues a windowed reduction operation onto the computation.
+
+    Args:
+      operand: reduction operand (LocalOp).
+      init_value: reduction initial value (LocalOp).
+      computation_to_apply: a binary reduction function (Computation).
+      window_dimensions: dimensions of window (sequence of integers).
+      window_strides: strides for window (sequence of integers).
+      base_dilations: dilations for the base (sequence of integers).
+      window_dilations: dilations for window (sequence of integers).
+      padding: length-N array-like of pairs of integers of (low, high) padding.
+
+    Returns:
+      A LocalOp representing the added ReduceWindow op.
+    """
+    return self._client.ReduceWindowWithGeneralPadding(
+        operand, init_value, computation_to_apply.c_local_computation,
+        window_dimensions, window_strides, base_dilations, window_dilations,
+        padding)
 
   def RngNormal(self, mu, sigma, dims):
     """Enqueues an RngNormal operation onto the computation.
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 75dae7a714..86d9dbea90 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -2057,6 +2057,12 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
     return Status::OK();
   }
 
+  // Bail on dilation.
+  if (window_util::HasDilation(window)) {
+    VLOG(10) << "Not folding pad into reduce-window as there is dilation.";
+    return Status::OK();
+  }
+
   VLOG(10) << "Considering folding Pad: " << pad->ToString()
            << "\ninto reduce-window: " << reduce_window->ToString()
            << (convert != nullptr
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index a70abb117a..b2abdb39a5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -688,8 +688,25 @@ StatusOr<llvm::Value*> IrEmitter::EmitTargetElementLoopBodyForReduceWindow(
   for (size_t i = 0; i < index.size(); ++i) {
     llvm::Value* strided_index =
         NSWMul(index[i], b_.getInt64(window.dimensions(i).stride()));
-    input_index[i] = NSWSub(NSWAdd(strided_index, window_index[i]),
-                            b_.getInt64(window.dimensions(i).padding_low()));
+    input_index[i] = NSWSub(
+        NSWAdd(strided_index,
+               NSWMul(window_index[i],
+                      b_.getInt64(window.dimensions(i).window_dilation()))),
+        b_.getInt64(window.dimensions(i).padding_low()));
+
+    // We need to verify that we are not in the dilated base area.
+    llvm::Value* dilation_condition = ICmpEQ(
+        SRem(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())),
+        b_.getInt64(0));
+    if (in_bounds_condition == nullptr) {
+      in_bounds_condition = dilation_condition;
+    } else {
+      in_bounds_condition = And(in_bounds_condition, dilation_condition);
+    }
+
+    // Apply base dilation to the index.
+    input_index[i] =
+        SDiv(input_index[i], b_.getInt64(window.dimensions(i).base_dilation()));
 
     // We need to check if 0 <= input_index[i] < bound, as otherwise we are in
     // the padding so that we can skip the computation. That is equivalent to
@@ -728,12 +745,6 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) {
       /*operands=*/{reduce_window->operand(0)},
       /*supported_types=*/{F32, BF16, S32, F16}));
 
-  // TODO(b/31410564): Implement dilation for reduce-window.
-  if (window_util::HasDilation(reduce_window->window())) {
-    return Unimplemented(
-        "Dilation for ReduceWindow is not implemented on CPU.");
-  }
-
   // Pseudo code for reduce window:
   //
   //   for (coordinates O in the output)
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index c1aaa4bf04..6dcdaf1cfe 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -358,13 +358,6 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator(
         const HloInstruction* operand = hlo->operand(0);
         const Window& window = hlo->window();
 
-        // TODO(b/31410564): Implement dilation for reduce-window.
-        if (window_util::HasDilation(window)) {
-          return Unimplemented(
-              "Dilation for reduce-window not implemented on GPU. "
-              "See b/31410564.");
-        }
-
         PrimitiveType operand_element_type = operand->shape().element_type();
         llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry(
             llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_),
@@ -397,9 +390,24 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator(
         for (size_t i = 0; i < index.size(); ++i) {
           llvm::Value* stridden_index = NSWMul(
               index[i], index_typed_const(window.dimensions(i).stride()));
+          input_index[i] = NSWSub(
+              NSWAdd(stridden_index,
+                     NSWMul(window_index[i],
+                            index_typed_const(
+                                window.dimensions(i).window_dilation()))),
+              index_typed_const(window.dimensions(i).padding_low()));
+
+          // We need to verify that we are not in the dilated base area.
+          llvm::Value* dilation_condition = ICmpEQ(
+              SRem(input_index[i],
+                   index_typed_const(window.dimensions(i).base_dilation())),
+              index_typed_const(0));
+          in_bounds = And(in_bounds, dilation_condition);
+
+          // Apply base dilation to the index.
           input_index[i] =
-              NSWSub(NSWAdd(stridden_index, window_index[i]),
-                     index_typed_const(window.dimensions(i).padding_low()));
+              SDiv(input_index[i],
+                   index_typed_const(window.dimensions(i).base_dilation()));
 
           // We must check whether 0 ≤ input_index[i] < bound, as otherwise
           // we are in the pad and so can skip the computation. This
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index cee11a8a21..608a42bb60 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -1463,6 +1463,58 @@ TEST_P(HloEvaluatorTest, ReduceWindowMax) {
   EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
 }
 
+TEST_P(HloEvaluatorTest, ReduceWindowMaxWindowDilation) {
+  HloComputation::Builder b(TestName());
+
+  // arg:
+  // f32[3,3] {
+  //  { 1, 2, 3 },
+  //  { 5, 6, 7 },
+  //  { 9, 10, 11 },
+  // }
+  auto arg_array = absl::make_unique<Array2D<float>>(3, 3);
+  arg_array->FillUnique(1.0f);
+  auto arg_literal = LiteralUtil::CreateR2FromArray2D<float>(*arg_array);
+
+  HloInstruction* arg_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
+
+  auto init_value = b.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(0.f)));
+
+  HloComputation::Builder max_computation("max");
+  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  auto param_lhs = max_computation.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
+  auto param_rhs = max_computation.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
+  max_computation.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs));
+  auto max_func = module().AddEmbeddedComputation(max_computation.Build());
+
+  Window window;
+  WindowDimension dim;
+  dim.set_size(2);
+  dim.set_stride(1);
+  dim.set_padding_low(0);
+  dim.set_padding_high(0);
+  dim.set_window_dilation(2);
+  dim.set_base_dilation(1);
+  *window.add_dimensions() = dim;
+  *window.add_dimensions() = dim;
+
+  Shape shape = ShapeUtil::MakeShape(F32, {1, 1});
+  b.AddInstruction(HloInstruction::CreateReduceWindow(
+      shape, arg_instruction, init_value, window, max_func));
+
+  module().AddEntryComputation(b.Build());
+
+  Literal result = Evaluate();
+
+  auto expected = LiteralUtil::CreateR2<float>({{11}});
+  EXPECT_TRUE(LiteralTestUtil::Equal(expected, result));
+}
+
 TEST_P(HloEvaluatorTest, ReduceWindowAdd) {
   HloComputation::Builder b(TestName());
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index b2d12c94b8..a450dc6ff5 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -2613,8 +2613,17 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       std::vector<int64> base_index(rank);
       bool out_of_bound = false;
       for (int64 i = 0; i < rank; ++i) {
-        base_index[i] = window_count_index[i] * window.dimensions(i).stride() +
-                        window_index[i] - window.dimensions(i).padding_low();
+        base_index[i] =
+            window_count_index[i] * window.dimensions(i).stride() +
+            window_index[i] * window.dimensions(i).window_dilation() -
+            window.dimensions(i).padding_low();
+        // We are not in the base area if the dilation placed us out of bounds.
+        if (base_index[i] % window.dimensions(i).base_dilation() != 0) {
+          out_of_bound = true;
+          break;
+        }
+        // Apply the dilation to the base area.
+        base_index[i] /= window.dimensions(i).base_dilation();
         if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) {
           out_of_bound = true;
           break;
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index c25ccafaf8..22fe4a2670 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -638,6 +638,8 @@ class R4ReduceWindowTest : public ReduceWindowTestBase,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
         /*window_strides=*/param.strides,
+        /*base_dilations=*/{},
+        /*window_dilations=*/{},
         /*padding=*/padding);
 
     CHECK(reducer == kAdd || reducer == kMax);
@@ -1158,7 +1160,10 @@ class R2ReduceWindowTest : public ReduceWindowTestBase,
         /*init_value=*/init_value,
         /*computation=*/computation,
         /*window_dimensions=*/param.window_bounds,
-        /*window_strides=*/param.strides, /*padding=*/padding);
+        /*window_strides=*/param.strides,
+        /*base_dilations=*/{},
+        /*window_dilations=*/{},
+        /*padding=*/padding);
 
     auto reduce_func = param.reducer == kAdd
                            ? +[](float a, float b) { return a + b; }
@@ -1369,7 +1374,10 @@ TEST_P(R1ReduceWindowTest, DoIt) {
       /*init_value=*/init_value,
       /*computation=*/computation,
       /*window_dimensions=*/param.window_bounds,
-      /*window_strides=*/param.strides, /*padding=*/padding);
+      /*window_strides=*/param.strides,
+      /*base_dilations=*/{},
+      /*window_dilations=*/{},
+      /*padding=*/padding);
 
   auto reduce_func = param.reducer == kAdd
                          ? +[](float a, float b) { return a + b; }
-- 
GitLab


From e93a18954689b6d522560f5273f6d3320d545b2e Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Sat, 6 Oct 2018 13:49:25 -0700
Subject: [PATCH 0482/1085] Mark tensorflow/contrib/tpu:datasets_test flaky

It fails 1/1000 runs in OSS builds.

PiperOrigin-RevId: 216050192
---
 tensorflow/contrib/tpu/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 10ed1c2891..8c36d5a297 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -302,6 +302,7 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
         ":datasets",
     ],
+    flaky = 1,  # TODO(b/117363808): fails 1/1000 OSS runs
     grpc_enabled = True,
 )
 
-- 
GitLab


From cee6187a7d8aec61655546b6df6f84fecf21cc0a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Sat, 6 Oct 2018 14:08:08 -0700
Subject: [PATCH 0483/1085] Mark tensorflow/contrib/tpu:datasets_test flaky
 (#22799)

It fails 1/1000 runs in OSS builds.

PiperOrigin-RevId: 216050192
---
 tensorflow/contrib/tpu/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 0c4bdab191..c22c385d9c 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -299,6 +299,7 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
         ":datasets",
     ],
+    flaky = 1,  # TODO(b/117363808): fails 1/1000 OSS runs
     grpc_enabled = True,
 )
 
-- 
GitLab


From 7fa6a6b42bc9d562e2b1cc765ca78d281b51f734 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 6 Oct 2018 21:00:57 -0700
Subject: [PATCH 0484/1085] Add SequenceLSTMOptions to schema to decouple the
 sequential Op from the LSTM.

PiperOrigin-RevId: 216066634
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |   7 +
 .../lite/core/api/flatbuffer_conversions.cc   |  15 +-
 .../kernels/unidirectional_sequence_lstm.cc   |  14 +-
 .../unidirectional_sequence_lstm_test.cc      |  11 +-
 tensorflow/contrib/lite/schema/schema.fbs     |   8 +
 .../contrib/lite/schema/schema_generated.h    | 162 +++++++++++++++++-
 6 files changed, 205 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index 44daf7adaa..1e65c3cee2 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -186,6 +186,13 @@ typedef struct {
   TfLiteLSTMKernelType kernel_type;
 } TfLiteLSTMParams;
 
+typedef struct {
+  // Parameters for the LSTM kernel.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+} TfLiteUnidirectionalSequenceLSTMParams;
+
 typedef struct {
   // Parameters for the LSTM kernel.
   TfLiteFusedActivation activation;
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index eac7db9a88..b092e5ee54 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -371,7 +371,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
     case BuiltinOperator_LSTM: {
       auto params = allocator->AllocatePOD<TfLiteLSTMParams>();
       if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
@@ -391,6 +390,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
+    case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
+      auto* params =
+          allocator->AllocatePOD<TfLiteUnidirectionalSequenceLSTMParams>();
+      if (auto* seq_lstm_params =
+              op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
+        params->activation =
+            parse_activation(seq_lstm_params->fused_activation_function());
+        params->cell_clip = seq_lstm_params->cell_clip();
+        params->proj_clip = seq_lstm_params->proj_clip();
+      }
+      *builtin_data = reinterpret_cast<void*>(params);
+      break;
+    }
+
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
       auto params =
           allocator->AllocatePOD<TfLiteBidirectionalSequenceLSTMParams>();
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index ec9cf38b83..89d57e4599 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -431,7 +431,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
+  const auto* params =
+      reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
+          node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
   const TfLiteTensor* input_to_input_weights =
@@ -482,6 +484,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
+  // Copy out the LSTM specific params so they can be passed in the function.
+  TfLiteLSTMParams lstm_params;
+  lstm_params.activation = params->activation;
+  lstm_params.cell_clip = params->cell_clip;
+  lstm_params.proj_clip = params->proj_clip;
+
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
       return lstm_eval::EvalFloat(
@@ -496,7 +504,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true,
           /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
           output);
     }
@@ -523,7 +531,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true,
           /*output_offset=*/0, scratch_buffer, scaling_factors,
           prod_scaling_factors, recovered_cell_weights, input_quantized,
           /*aux_input_quantized=*/nullptr, activation_state_quantized,
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
index cd3aac0532..c97b0fdd61 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
@@ -110,11 +110,12 @@ class UnidirectionalLSTMOpModel : public SingleOpModel {
 
     output_ = AddOutput(TensorType_FLOAT32);
 
-    SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-                 BuiltinOptions_LSTMOptions,
-                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH,
-                                   cell_clip, proj_clip)
-                     .Union());
+    SetBuiltinOp(
+        BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+        BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+        CreateUnidirectionalSequenceLSTMOptions(
+            builder_, ActivationFunctionType_TANH, cell_clip, proj_clip)
+            .Union());
     BuildInterpreter(input_shapes);
   }
 
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index ff8430827c..cb7a282743 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -250,6 +250,7 @@ union BuiltinOptions {
   FillOptions,
   BidirectionalSequenceLSTMOptions,
   BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -394,6 +395,13 @@ table LSTMOptions {
   kernel_type: LSTMKernelType = FULL;
 }
 
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+}
+
 table BidirectionalSequenceLSTMOptions {
   fused_activation_function:ActivationFunctionType;
   cell_clip: float; // Optional, 0.0 means no clipping
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index f3cb113c9c..e7b7a59def 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT;
 struct LSTMOptions;
 struct LSTMOptionsT;
 
+struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsT;
+
 struct BidirectionalSequenceLSTMOptions;
 struct BidirectionalSequenceLSTMOptionsT;
 
@@ -681,11 +684,12 @@ enum BuiltinOptions {
   BuiltinOptions_FillOptions = 68,
   BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
   BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
+  BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions
+  BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -757,7 +761,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] {
     BuiltinOptions_ZerosLikeOptions,
     BuiltinOptions_FillOptions,
     BuiltinOptions_BidirectionalSequenceLSTMOptions,
-    BuiltinOptions_BidirectionalSequenceRNNOptions
+    BuiltinOptions_BidirectionalSequenceRNNOptions,
+    BuiltinOptions_UnidirectionalSequenceLSTMOptions
   };
   return values;
 }
@@ -835,6 +840,7 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "FillOptions",
     "BidirectionalSequenceLSTMOptions",
     "BidirectionalSequenceRNNOptions",
+    "UnidirectionalSequenceLSTMOptions",
     nullptr
   };
   return names;
@@ -1129,6 +1135,10 @@ template<> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
 };
 
+template<> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1720,6 +1730,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
       reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
   }
+  UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -3469,6 +3487,84 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
 
 flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef UnidirectionalSequenceLSTMOptions TableType;
+  ActivationFunctionType fused_activation_function;
+  float cell_clip;
+  float proj_clip;
+  UnidirectionalSequenceLSTMOptionsT()
+      : fused_activation_function(ActivationFunctionType_NONE),
+        cell_clip(0.0f),
+        proj_clip(0.0f) {
+  }
+};
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnidirectionalSequenceLSTMOptionsT NativeTableType;
+  enum {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8
+  };
+  ActivationFunctionType fused_activation_function() const {
+    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           verifier.EndTable();
+  }
+  UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  UnidirectionalSequenceLSTMOptionsBuilder &operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
+  flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f) {
+  UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
   typedef BidirectionalSequenceLSTMOptions TableType;
   ActivationFunctionType fused_activation_function;
@@ -6488,6 +6584,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
     return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
   }
+  const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6799,6 +6898,10 @@ template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_optio
   return builtin_options_as_BidirectionalSequenceRNNOptions();
 }
 
+template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -7809,6 +7912,38 @@ inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBuffe
       _kernel_type);
 }
 
+inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new UnidirectionalSequenceLSTMOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = cell_clip(); _o->cell_clip = _e; };
+  { auto _e = proj_clip(); _o->proj_clip = _e; };
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  return tflite::CreateUnidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip);
+}
+
 inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new BidirectionalSequenceLSTMOptionsT();
   UnPackTo(_o, _resolver);
@@ -9620,6 +9755,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -9918,6 +10057,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -10204,6 +10347,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptionsT *>(value);
       return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10490,6 +10637,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast<BidirectionalSequenceRNNOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -10847,6 +10998,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 367f7d651f19c5b111ea0292243eab81fb4058c7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 7 Oct 2018 02:01:04 -0700
Subject: [PATCH 0485/1085] compat: Update forward compatibility horizon to
 2018-10-07

PiperOrigin-RevId: 216079665
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index d85fb00414..ee56480b00 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 6)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From f9371cfe1913cb4b4d13530d95eb9027983a2424 Mon Sep 17 00:00:00 2001
From: Marcela Morales Quispe <marcela.morales.quispe@gmail.com>
Date: Sun, 7 Oct 2018 12:47:10 -0500
Subject: [PATCH 0486/1085] [Documentation] Format code block.

Signed-off-by: Marcela Morales Quispe <marcela.morales.quispe@gmail.com>
---
 ISSUE_TEMPLATE.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index 52faed9297..b3d84ad8c9 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -29,9 +29,11 @@ You can collect some of this information using our environment capture script:
 
 https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh
 
-You can obtain the TensorFlow version with
+You can obtain the TensorFlow version with:
 
+```bash
 python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
+```
 
 ### Describe the problem
 Describe the problem clearly here. Be sure to convey here why it's a bug in TensorFlow or a feature request.
-- 
GitLab


From 968792dd9e4b5cf7ed4c648d2dc46838212047a9 Mon Sep 17 00:00:00 2001
From: Marcela Morales Quispe <marcela.morales.quispe@gmail.com>
Date: Sun, 7 Oct 2018 13:10:12 -0500
Subject: [PATCH 0487/1085] [Documentation] Format example list.

Signed-off-by: Marcela Morales Quispe <marcela.morales.quispe@gmail.com>
---
 CODE_OF_CONDUCT.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 5fff9d05a1..94d4215440 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -7,18 +7,18 @@ In the interest of fostering an open and welcoming environment, we as contributo
 
 Examples of behavior that contributes to creating a positive environment include:
 
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
+* Using welcoming and inclusive language.
+* Being respectful of differing viewpoints and experiences.
+* Gracefully accepting constructive criticism.
+* Focusing on what is best for the community.
+* Showing empathy towards other community members.
 
 Examples of unacceptable behavior by participants include:
 
-* The use of sexualized language or imagery and unwelcome sexual attention or advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* The use of sexualized language or imagery and unwelcome sexual attention or advances.
+* Trolling, insulting/derogatory comments, and personal or political attacks.
+* Public or private harassment.
+* Publishing others' private information, such as a physical or electronic address, without explicit permission.
 * Conduct which could reasonably be considered inappropriate for the forum in which it occurs. 
 
 All TensorFlow forums and spaces are meant for professional interactions, and any behavior which could reasonably be considered inappropriate in a professional setting is unacceptable.
-- 
GitLab


From 137b6f6e2ae0dd2c31eb20a5746986b69c942afd Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Sun, 7 Oct 2018 19:18:22 -0700
Subject: [PATCH 0488/1085] Removed code inside #ifdef MKL_ML_ONLY

---
 tensorflow/core/kernels/mkl_aggregate_ops.cc  | 257 +-----------------
 tensorflow/core/kernels/mkl_identity_op.cc    |  31 ---
 .../core/kernels/mkl_input_conversion_op.cc   | 197 --------------
 tensorflow/core/kernels/mkl_reshape_op.cc     | 106 +-------
 tensorflow/core/kernels/mkl_tfconv_op.h       |  58 ----
 tensorflow/core/kernels/mkl_transpose_op.cc   |   8 -
 6 files changed, 2 insertions(+), 655 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index 20aa1f7ea1..ae1e6b6751 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -24,268 +24,14 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 
-#ifndef INTEL_MKL_ML_ONLY
+#include "tensorflow/core/util/mkl_util.h"
 #include "mkldnn.hpp"
 using mkldnn::stream;
 using mkldnn::sum;
-#else
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
-#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifdef INTEL_MKL_ML_ONLY
-
-template <typename Device, typename T>
-class MklAddNOp : public OpKernel {
- public:
-  explicit MklAddNOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* ctx) override {
-    const int num = ctx->num_inputs();
-    OP_REQUIRES(ctx, num / 2 == 2,
-                errors::InvalidArgument("Only additions of two tensors "
-                                        "supported by MKL. Num inputs: ",
-                                        num));
-
-    MklAddNOpContext mkl_context;
-    size_t src1_idx = 0, src2_idx = 1;
-    const Tensor& input0 = MklGetInput(ctx, src1_idx);
-    GetMklShape(ctx, src1_idx, &(mkl_context.input1_shape));
-    bool input1_in_mkl_format = mkl_context.input1_shape.IsMklTensor();
-
-    const Tensor& input1 = MklGetInput(ctx, src2_idx);
-    GetMklShape(ctx, src2_idx, &(mkl_context.input2_shape));
-    bool input2_in_mkl_format = mkl_context.input2_shape.IsMklTensor();
-
-    // if the shapes of two tensors are not same raise op error
-    TensorShape src1_shape, src2_shape;
-    src1_shape = input0.shape();
-    src2_shape = input1.shape();
-    if (!src1_shape.IsSameSize(src2_shape)) {
-      ctx->SetStatus(errors::InvalidArgument(
-          "Inputs to operation ", this->name(), " of type ",
-          this->type_string(), " must have the same size and shape.  Input 0: ",
-          src1_shape.DebugString(), " != input 1: ", src2_shape.DebugString()));
-    }
-    // handle the case of a scalar
-    if (!input1_in_mkl_format && input0.dims() == 0) {
-      const TensorShape& o_shape = input0.shape();
-      Tensor* out_tensor = nullptr;
-      mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
-                                mkl_context.output_shape);
-      float user_i1 = (input0.scalar<T>()());
-      float user_i2 = (input1.scalar<T>()());
-      out_tensor->scalar<T>()() = std::plus<float>{}(user_i1, user_i2);
-      return;
-    }
-
-    mkl_context.in_dims = input1_in_mkl_format
-                              ? mkl_context.input1_shape.GetDimension()
-                              : input0.dims();
-    mkl_context.in_dims = input2_in_mkl_format
-                              ? mkl_context.input2_shape.GetDimension()
-                              : input1.dims();
-
-    // If there is nothing to compute, return.
-    if (!input1_in_mkl_format && !input2_in_mkl_format) {
-      const TensorShape& o_shape = input0.shape();
-      if (o_shape.num_elements() == 0) {
-        Tensor* out_tensor = nullptr;
-        mkl_context.output_shape.SetMklTensor(false);
-        AllocateOutputSetMklShape(ctx, src1_idx, &out_tensor, o_shape,
-                                  mkl_context.output_shape);
-        return;
-      }
-    }
-
-    mkl_context.in_sizes = new size_t[mkl_context.in_dims];
-    mkl_context.in_strides = new size_t[mkl_context.in_dims];
-    // Generate size, stride for input if input is in MKL format.
-    if (input1_in_mkl_format || input2_in_mkl_format) {
-      const MklShape* tmp_mkl_shape = (input1_in_mkl_format)
-                                          ? &mkl_context.input1_shape
-                                          : &mkl_context.input2_shape;
-      for (int i = 0; i < mkl_context.in_dims; i++) {
-        mkl_context.in_sizes[i] = tmp_mkl_shape->GetSizes()[i];
-        mkl_context.in_strides[i] = tmp_mkl_shape->GetStrides()[i];
-      }
-    } else {
-      for (int i = 0; i < mkl_context.in_dims; i++) {
-        mkl_context.in_sizes[i] =
-            input0.dim_size((mkl_context.in_dims - 1) - i);
-      }
-      mkl_context.in_strides[0] = 1;
-      for (int i = 1; i < mkl_context.in_dims; i++) {
-        mkl_context.in_strides[i] =
-            mkl_context.in_strides[i - 1] * mkl_context.in_sizes[i - 1];
-      }
-    }
-    std::vector<float> coeff(2, 1.0);
-    mkl_context.MklCreateInputLayouts(ctx);
-    CHECK_EQ(dnnSumCreate_F32(&mkl_context.Eltwise, mkl_context.attributes, 2,
-                              mkl_context.lt_input1, &coeff[0]),
-             E_SUCCESS);
-
-    Tensor mkl_tmp_input1_buf_tensor, mkl_tmp_input2_buf_tensor;
-    mkl_context.MklPrepareAddNInputs(ctx, &mkl_tmp_input1_buf_tensor,
-                                     &mkl_tmp_input2_buf_tensor);
-    Tensor* output = nullptr;
-    if (input1_in_mkl_format || input2_in_mkl_format) {
-      TensorShape tf_shape;
-      mkl_context.output_shape.SetMklTensor(true);
-      mkl_context.output_shape.SetMklLayout(mkl_context.Eltwise,
-                                            dnnResourceDst);
-
-      mkl_context.output_shape.SetTfLayout(
-          mkl_context.in_dims, mkl_context.in_sizes, mkl_context.in_strides);
-      if (input1_in_mkl_format == true) {
-        mkl_context.output_shape.SetTfDimOrder(
-            mkl_context.in_dims, mkl_context.input1_shape.GetTfToMklDimMap());
-      } else {
-        mkl_context.output_shape.SetTfDimOrder(
-            mkl_context.in_dims, mkl_context.input2_shape.GetTfToMklDimMap());
-      }
-      tf_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                          mkl_context.output_shape.GetMklLayout())) /
-                      sizeof(T));
-
-      AllocateOutputSetMklShape(ctx, src1_idx, &output, tf_shape,
-                                mkl_context.output_shape);
-    } else {
-      const TensorShape& o_shape = input1.shape();
-      mkl_context.output_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, src1_idx, &output, o_shape,
-                                mkl_context.output_shape);
-    }
-
-    mkl_context.Eltwise_res[dnnResourceDst] =
-        static_cast<void*>(output->flat<T>().data());
-
-    // Execute convolution
-    CHECK_EQ(dnnExecute_F32(mkl_context.Eltwise, mkl_context.Eltwise_res),
-             E_SUCCESS);
-
-    mkl_context.MklCleanup();
-  }
-
- private:
-  typedef struct {
-    int in_dims;
-    size_t* in_sizes = nullptr;
-    size_t* in_strides = nullptr;
-    dnnPrimitive_t Eltwise = nullptr;
-    dnnPrimitiveAttributes_t attributes = nullptr;
-    void* Eltwise_res[dnnResourceNumber];
-    dnnLayout_t lt_input1 = nullptr, lt_input2 = nullptr;
-    MklShape input1_shape, input2_shape, output_shape;
-
-    void MklCreateInputLayouts(OpKernelContext* context) {
-      bool input1_in_mkl_format = input1_shape.IsMklTensor();
-      if (!input1_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input1, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
-      } else {
-        lt_input1 = static_cast<dnnLayout_t>(input1_shape.GetCurLayout());
-      }
-
-      bool input2_in_mkl_format = input2_shape.IsMklTensor();
-      if (!input2_in_mkl_format) {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input2, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
-      } else {
-        lt_input2 = static_cast<dnnLayout_t>(input2_shape.GetCurLayout());
-      }
-    }
-
-    void MklPrepareAddNInputs(OpKernelContext* context,
-                              Tensor* mkl_tmp_input1_buf_tensor,
-                              Tensor* mkl_tmp_input2_buf_tensor) {
-      bool mkl_convert_input1, mkl_convert_input2;
-      dnnPrimitive_t mkl_prim_convert_input1 = nullptr,
-                     mkl_prim_convert_input2 = nullptr;
-      dnnLayout_t mkl_lt_internal_input1 = nullptr,
-                  mkl_lt_internal_input2 = nullptr;
-      void *mkl_buf_convert_input1 = nullptr, *mkl_buf_convert_input2 = nullptr;
-      dnnResourceType_t dnnResourceMultipleSrc2 =
-          (dnnResourceType_t)(dnnResourceMultipleSrc + 1);
-      // Compare with internal layouts and convert if needed
-      const Tensor& input1 = MklGetInput(context, 0);
-
-      void* mkl_buf_input1 =
-          const_cast<void*>(static_cast<const void*>(input1.flat<T>().data()));
-
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
-                   &mkl_lt_internal_input1, Eltwise, dnnResourceMultipleSrc),
-               E_SUCCESS);
-      mkl_convert_input1 =
-          !dnnLayoutCompare_F32(mkl_lt_internal_input1, lt_input1);
-      if (mkl_convert_input1) {
-        CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input1, lt_input1,
-                                         mkl_lt_internal_input1),
-                 E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_input1_buf_tensor,
-                       mkl_lt_internal_input1, &mkl_buf_convert_input1);
-        CHECK_EQ(
-            dnnConversionExecute_F32(mkl_prim_convert_input1, mkl_buf_input1,
-                                     mkl_buf_convert_input1),
-            E_SUCCESS);
-        dnnDelete_F32(mkl_prim_convert_input1);
-      }
-      dnnLayoutDelete_F32(mkl_lt_internal_input1);
-
-      Eltwise_res[dnnResourceMultipleSrc] =
-          (mkl_convert_input1) ? mkl_buf_convert_input1 : mkl_buf_input1;
-
-      const Tensor& input2 = MklGetInput(context, 1);
-      void* mkl_buf_input2 =
-          const_cast<void*>(static_cast<const void*>(input2.flat<T>().data()));
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
-                   &mkl_lt_internal_input2, Eltwise, dnnResourceMultipleSrc2),
-               E_SUCCESS);
-      mkl_convert_input2 =
-          !dnnLayoutCompare_F32(mkl_lt_internal_input2, lt_input2);
-      if (mkl_convert_input2) {
-        CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input2, lt_input2,
-                                         mkl_lt_internal_input2),
-                 E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_input2_buf_tensor,
-                       mkl_lt_internal_input2, &mkl_buf_convert_input2);
-        CHECK_EQ(
-            dnnConversionExecute_F32(mkl_prim_convert_input2, mkl_buf_input2,
-                                     mkl_buf_convert_input2),
-            E_SUCCESS);
-        dnnDelete_F32(mkl_prim_convert_input2);
-      }
-      dnnLayoutDelete_F32(mkl_lt_internal_input2);
-
-      Eltwise_res[dnnResourceMultipleSrc2] =
-          (mkl_convert_input2) ? mkl_buf_convert_input2 : mkl_buf_input2;
-    }
-
-    void MklCleanup() {
-      bool input1_in_mkl_format = input1_shape.IsMklTensor();
-      bool input2_in_mkl_format = input2_shape.IsMklTensor();
-      dnnDelete_F32(Eltwise);
-      if (!input1_in_mkl_format || !input2_in_mkl_format) {
-        delete[] in_sizes;
-        delete[] in_strides;
-      }
-      if (!input1_in_mkl_format) {
-        dnnLayoutDelete_F32(lt_input1);
-      }
-      if (!input2_in_mkl_format) {
-        dnnLayoutDelete_F32(lt_input2);
-      }
-    }
-  } MklAddNOpContext;
-};
-
-#else  // INTEL_MKL_ML_ONLY
 template <typename Device, typename T>
 class MklAddNOp : public OpKernel {
  public:
@@ -505,7 +251,6 @@ class MklAddNOp : public OpKernel {
   }
 };
 
-#endif
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklAddN")                          \
                               .Device(DEVICE_CPU)                   \
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index b57e816028..18bd0bedbd 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -24,42 +24,12 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 
-#ifdef INTEL_MKL_ML_ONLY
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
 #include "tensorflow/core/util/mkl_util.h"
-
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
-#endif
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifdef INTEL_MKL_ML_ONLY
-
-template <typename Device, typename T>
-class MklIdentityOp : public OpKernel {
- public:
-  explicit MklIdentityOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    MklShape mkl_shape_input;
-    GetMklShape(context, 0, &mkl_shape_input);
-    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
-
-    if (input_in_mkl_format) {
-      ForwardMklTensorInToOut(context, 0, 0);
-    } else {
-      ForwardTfTensorInToOut(context, 0, 0);
-    }
-  }
-
-  bool IsExpensive() override { return false; }
-};
-
-#else
 
 template <typename Device, typename T>
 class MklIdentityOp : public OpKernel {
@@ -83,7 +53,6 @@ class MklIdentityOp : public OpKernel {
   bool IsExpensive() override { return false; }
 };
 
-#endif
 
 #define REGISTER_MKL_CPU(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("_MklIdentity")                      \
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
index 84ee241b8e..6ee8c1d402 100644
--- a/tensorflow/core/kernels/mkl_input_conversion_op.cc
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -31,12 +31,8 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/mkl_tfconv_op.h"
 #include "tensorflow/core/util/mkl_util.h"
-
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
-
 using mkldnn::stream;
-#endif
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -60,198 +56,6 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 //     convert the TF format input to MKL format
 ///////////////////////////////////////////////////////////
 
-#ifdef INTEL_MKL_ML_ONLY
-template <typename Device, typename T>
-class MklInputConversionOp : public OpKernel {
- public:
-  explicit MklInputConversionOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
-    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
-    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
-  }
-
- private:
-  void Compute(OpKernelContext* context) override {
-    // Check if input tensors are in MKL format.
-    const Tensor& input_tensor_0 = MklGetInput(context, 0);
-    MklShape input_shape_0;
-    GetMklShape(context, 0, &input_shape_0);
-
-    const Tensor& input_tensor_1 = MklGetInput(context, 1);
-    MklShape input_shape_1;
-    GetMklShape(context, 1, &input_shape_1);
-
-    bool tf_shapes_are_same = MklCompareShapes(&context->input(0).shape(),
-                                               &context->input(1).shape());
-
-    VLOG(1) << "MklInputConversionOp: Input shapes are "
-            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
-            << context->input(0).shape().DebugString() << " and "
-            << context->input(1).shape().DebugString();
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // if both inputs are in TF format, just copy input tensors to output.
-    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
-      VLOG(1) << "MklInputConversionOp: No conversion needed, "
-              << "copying TF inputs to output";
-
-      ForwardTfTensorInToOut(context, 0, 0);
-      ForwardTfTensorInToOut(context, 1, 1);
-      return;
-    }
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // If both inputs are in MKL format
-    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
-      // If both have the same shape, pass them through
-      if (tf_shapes_are_same) {
-        VLOG(1) << "MklInputConversionOp: No conversion needed, "
-                << "copying MKL inputs with identical shapes to output";
-
-        ForwardMklTensorInToOut(context, 0, 0);
-        ForwardMklTensorInToOut(context, 1, 1);
-        return;
-      }
-
-      // Sanity check
-      bool mkl_shapes_are_same =
-          MklCompareShapes(&input_shape_0, &input_shape_1);
-      if (mkl_shapes_are_same) {
-        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
-                        "different but MKL shapes are same";
-      }
-
-      // Both have different shapes, so broadcast will be necessary.
-      // Convert to TF and pass both tensors through (we can't do broadcast
-      // with MKL tensors)
-      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
-              << "converted MKL inputs to TF format";
-
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_, 0);
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_, 1);
-      SetDummyMklShapeOutput(context, 0);
-      SetDummyMklShapeOutput(context, 1);
-      return;
-    }
-
-    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    // One input is MKL and one is TF. If no broadcast is needed, convert
-    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
-    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
-
-    const Tensor* mkl_tensor;
-    const MklShape* mkl_shape;
-    const Tensor* tf_tensor;
-    MklShape* tf_mkl_shape;
-    uint32 mkl_tensor_index;
-    uint32 tf_tensor_index;
-    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
-      mkl_tensor = &input_tensor_0;
-      mkl_shape = &input_shape_0;
-      mkl_tensor_index = 0;
-      tf_tensor = &input_tensor_1;
-      tf_mkl_shape = &input_shape_1;
-      tf_tensor_index = 1;
-    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
-      mkl_tensor = &input_tensor_1;
-      mkl_shape = &input_shape_1;
-      mkl_tensor_index = 1;
-      tf_tensor = &input_tensor_0;
-      tf_mkl_shape = &input_shape_0;
-      tf_tensor_index = 0;
-    } else {
-      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
-                      "shapes for MKL "
-                   << "element-wise op";
-    }
-
-    // Broadcast is needed if the shapes are not the same
-    bool broadcast_needed;
-
-    size_t in0_size = 1;
-    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
-      in0_size *= mkl_shape->tf_dim_size(i);
-
-    size_t in1_size = 1;
-    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
-      in1_size *= tf_tensor->shape().dim_size(i);
-
-    broadcast_needed = (in0_size != in1_size);
-
-    if (!broadcast_needed) {
-      // Both shapes are same, convert the TF input to MKL
-      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
-      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
-              << " to MKL format";
-
-      // Create MklShape
-      Tensor* tensor_out;
-      MklShape mkl_output_mkl_shape;
-      mkl_output_mkl_shape.SetMklTensor(true);
-      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
-                                       mkl_shape->GetSizes(),
-                                       mkl_shape->GetStrides());
-      mkl_output_mkl_shape.SetTfDimOrder(mkl_shape->GetDimension());
-
-      // ** Temporarily borrow the layout from the MKL input **
-      mkl_output_mkl_shape.SetMklLayout(mkl_shape->GetCurLayout());
-
-      // Create output tensor
-      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
-                                mkl_tensor->shape(), mkl_output_mkl_shape);
-
-      // Since the shapes are the same, use information from the other tensor
-      tf_mkl_shape->SetTfLayout(mkl_shape->GetDimension(),
-                                mkl_shape->GetSizes(), mkl_shape->GetStrides());
-      // Convert the data format
-      tf_mkl_shape->GetConvertedFlatData(
-          mkl_shape->GetCurLayout(),
-          const_cast<T*>(tf_tensor->flat<T>().data()),
-          const_cast<T*>(tensor_out->flat<T>().data()));
-
-      // ** Release the borrowed layout to avoid double deletion
-      //    in the destructor call **
-      mkl_output_mkl_shape.SetMklLayout(nullptr);
-
-      // -- The tensor in MKL format passes through --
-      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
-    } else {
-      // Broadcast is needed, so convert the MKL input to TF
-      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
-      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
-              << " to TF format";
-      MklToTfOp<Device, T>::ConvertMklToTf(this, context, data_format_str,
-                                           op_data_type, has_avx512f_,
-                                           mkl_tensor_index);
-      SetDummyMklShapeOutput(context, mkl_tensor_index);
-
-      // The tensor in TF format passes through
-      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
-    }
-
-    VLOG(1) << "MklInputConversionOp: Shapes (output): "
-            << context->mutable_output(0)->shape().DebugString() << " and "
-            << context->mutable_output(1)->shape().DebugString();
-
-    VLOG(1) << "MklInputConversion completed successfully.";
-  }
-
- private:
-  /// Data format of the operation
-  string data_format_str;
-
-  /// Data type of the operation
-  DataType op_data_type;
-
-  /// CPUIDInfo
-  bool has_avx512f_ = false;
-};
-
-#else
-
 template <typename Device, typename T>
 class MklInputConversionOp : public OpKernel {
  public:
@@ -488,7 +292,6 @@ class MklInputConversionOp : public OpKernel {
   bool has_avx512f_ = false;
 };
 
-#endif
 
 ///////////////////////////////////////////////////////////
 //               Register kernel
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index d9a7893a53..358233c913 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -24,15 +24,9 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 
-#ifndef INTEL_MKL_ML_ONLY
+#include "tensorflow/core/util/mkl_util.h"
 #include "mkldnn.hpp"
 using mkldnn::stream;
-#else
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
-
-#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
@@ -41,103 +35,6 @@ class MklReshapeOp : public OpKernel {
  public:
   explicit MklReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
-#ifdef INTEL_MKL_ML_ONLY
-  void Compute(OpKernelContext* context) override {
-    const Tensor& input = MklGetInput(context, 0);
-    const Tensor& sizes = MklGetInput(context, 1);
-
-    // Preliminary validation of sizes.
-    OP_REQUIRES(context, IsLegacyVector(sizes.shape()),
-                errors::InvalidArgument("sizes input must be 1-D, not shape ",
-                                        sizes.shape().DebugString()));
-
-    // Compute the output shape.  Determine product of specified
-    // dimensions, and find the index of the unspecified one.
-    TensorShape shape;
-    int64 product = 1;
-    int unknown_index = -1;
-    switch (sizes.dtype()) {
-      case DT_INT32:
-        OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product,
-                                                     &unknown_index, &shape));
-        break;
-      case DT_INT64:
-        OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product,
-                                                     &unknown_index, &shape));
-        break;
-      default:
-        context->CtxFailure(errors::InvalidArgument(
-            "desired shape must be a DT_INT32 or DT_INT64 vector, not a ",
-            DataTypeString(sizes.dtype())));
-        return;
-    }
-    if (unknown_index != -1) {
-      OP_REQUIRES(
-          context, product > 0,
-          errors::InvalidArgument("Reshape cannot infer the missing input size "
-                                  "for an empty tensor unless all specified "
-                                  "input sizes are non-zero"));
-      const int64 missing = input.NumElements() / product;
-      OP_REQUIRES(
-          context, product * missing == input.NumElements(),
-          errors::InvalidArgument(
-              "Input to reshape is a tensor with ", input.NumElements(),
-              " values, but the requested shape requires a multiple of ",
-              product));
-      shape.set_dim(unknown_index, missing);
-    }
-    OP_REQUIRES(context, shape.num_elements() == input.NumElements(),
-                errors::InvalidArgument("Input to reshape is a tensor with ",
-                                        input.NumElements(),
-                                        " values, but the requested shape has ",
-                                        shape.num_elements()));
-
-    MklShape mkl_shape_input;
-    GetMklShape(context, 0, &mkl_shape_input);
-    bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
-    if (input_in_mkl_format) {
-      TensorShape& shape_to = shape;
-      TensorShape shape_from;
-      for (size_t i = 0; i < mkl_shape_input.GetDimension(); i++) {
-        // Outermost to innermost dimension
-        shape_from.AddDim(
-            mkl_shape_input.GetSizes()[mkl_shape_input.tf_dim_idx(i)]);
-      }
-
-      if (shape_from == shape_to) {
-        CopyMklTensorInToOut(context, 0, 0);
-        return;
-      } else {
-        // Allocate output tensor.
-        Tensor* output_tensor = NULL;
-        MklShape mkl_shape_output;
-        mkl_shape_output.SetMklTensor(false);
-        AllocateOutputSetMklShape(context, 0, &output_tensor, shape_to,
-                                  mkl_shape_output);
-
-        // Get output layout pointer.
-        dnnLayout_t output_layout =
-            static_cast<dnnLayout_t>(mkl_shape_input.GetTfLayout());
-
-        // Execute DNNConversion.
-        // Note: we  assume an MKL tensor always have float as its data type.
-        void* input_buffer =
-            static_cast<void*>(const_cast<float*>(input.flat<float>().data()));
-        void* output_buffer = static_cast<void*>(
-            const_cast<float*>(output_tensor->flat<float>().data()));
-        mkl_shape_input.GetConvertedFlatData(output_layout, input_buffer,
-                                             output_buffer);
-
-        VLOG(1) << "MKLToTFConversion complete successfully.";
-        return;
-      }
-    } else {
-      CopyTfTensorInToOutWithShape(context, 0, 0, shape);
-    }
-  }
-
-#else
-
  private:
   // When the input tensor is in MKL layout and we are reshaping the tensor to a
   // different shape than its actual shape, then we use MKLDNN reorder primitive
@@ -316,7 +213,6 @@ class MklReshapeOp : public OpKernel {
     }
   }
 
-#endif  // INTEL_MKL_ML_ONLY
 
  private:
   const int kInputSlotIdx = 0;
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index 894c2e34e8..eabdb74553 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -32,15 +32,9 @@ limitations under the License.
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#ifdef INTEL_MKL_ML_ONLY
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
 #include "tensorflow/core/util/mkl_util.h"
 
-#ifndef INTEL_MKL_ML_ONLY
 using mkldnn::stream;
-#endif
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -64,7 +58,6 @@ class MklToTfOp : public OpKernel {
     VLOG(1) << "MKLToTFConversion complete successfully.";
   }
 
-#ifndef INTEL_MKL_ML_ONLY
   static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
                              string data_format_str, DataType op_data_type,
                              bool has_avx512f, uint input_number) {
@@ -125,57 +118,6 @@ class MklToTfOp : public OpKernel {
                           __FILE__, ":", __LINE__));
     }
   }
-#else
-  static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
-                             string data_format_str, DataType op_data_type,
-                             bool has_avx512f, uint32 input_number) {
-    // Check that input tensor is in MKL format.
-    const Tensor& input_tensor = MklGetInput(context, input_number);
-    MklShape input_shape;
-    GetMklShape(context, input_number, &input_shape);
-
-    // if input is already in Tf format, then just copy input tensor to output.
-    if (!input_shape.IsMklTensor()) {
-      context->set_output(input_number, input_tensor);
-      VLOG(1) << "MKLToTFConversion: No conversion needed, "
-              << "copying input to output";
-      return;
-    }
-
-    // Check that input data type is same as operator data type and that it is
-    // same as output data type.
-    DataType input_data_type = op_kernel->input_type(input_number);
-    DataType output_data_type = op_kernel->output_type(input_number);
-    CHECK_EQ(op_data_type, input_data_type);
-    CHECK_EQ(op_data_type, output_data_type);
-
-    TensorShape output_shape;
-    size_t ndims = input_shape.GetDimension();
-    size_t* in_sizes = new size_t[ndims];
-    for (size_t i = 0; i < ndims; i++) {
-      // Outermost to innermost dimension
-      output_shape.AddDim(input_shape.GetSizes()[input_shape.tf_dim_idx(i)]);
-      in_sizes[i] = input_shape.GetSizes()[i];
-    }
-
-    // Allocate output tensor.
-    Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape,
-                                                     &output_tensor));
-
-    dnnLayout_t output_layout =
-        static_cast<dnnLayout_t>(input_shape.GetTfLayout());
-    // Execute DNNConversion.
-    void* input_buffer =
-        static_cast<void*>(const_cast<T*>(input_tensor.flat<T>().data()));
-    delete[] in_sizes;
-    void* output_buffer =
-        static_cast<void*>(const_cast<T*>(output_tensor->flat<T>().data()));
-    input_shape.GetConvertedFlatData(output_layout, input_buffer,
-                                     output_buffer);
-    VLOG(1) << "MKLToTFConversion complete successfully.";
-  }
-#endif
 
  private:
   /// Data format of the operation
diff --git a/tensorflow/core/kernels/mkl_transpose_op.cc b/tensorflow/core/kernels/mkl_transpose_op.cc
index 6bbe271c54..1a54aa585a 100644
--- a/tensorflow/core/kernels/mkl_transpose_op.cc
+++ b/tensorflow/core/kernels/mkl_transpose_op.cc
@@ -25,12 +25,10 @@ limitations under the License.
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/kernels/transpose_op.h"
 
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
 #include "tensorflow/core/util/mkl_util.h"
 
 using mkldnn::stream;
-#endif
 
 namespace tensorflow {
 
@@ -106,7 +104,6 @@ static const char kMKLConjugateTranspose = 'C';
 
 #endif  // if !defined(INTEL_MKL_DNN_ONLY)
 
-#ifndef INTEL_MKL_ML_ONLY
 // MKL-DNN based Transpose implementation
 template <typename T>
 Status MKLTransposeND(OpKernelContext* ctx, const Tensor& in, Tensor* out,
@@ -154,7 +151,6 @@ Status MKLTransposeND(OpKernelContext* context, const Tensor& in_tensor,
     return errors::Aborted("Operation received an exception:", error_msg);
   }
 }
-#endif  // #ifndef INTEL_MKL_ML_ONLY
 
 }  // namespace
 
@@ -181,7 +177,6 @@ Status MklTransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
   }
 #endif
 
-#ifndef INTEL_MKL_ML_ONLY
   // MKL-DNN has limit on the maximum number of dimensions in a tensor.
   // Fallback to Eigen for not supported cases.
   if (in.dims() <= TENSOR_MAX_DIMS) {
@@ -194,7 +189,6 @@ Status MklTransposeCpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
         break;
     }
   }
-#endif
 
   // Fallback to eigen if transpose parameters not supported by MKL or MKL-DNN
   typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -227,7 +221,6 @@ Status MklConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx,
   }
 #endif
 
-#ifndef INTEL_MKL_ML_ONLY
   // MKL-DNN has limit on the maximum number of dimensions in a tensor.
   // Fallback to Eigen for not supported cases.
   if (in.dims() <= TENSOR_MAX_DIMS) {
@@ -240,7 +233,6 @@ Status MklConjugateTransposeCpuOp::DoTranspose(OpKernelContext* ctx,
         break;
     }
   }
-#endif
 
   // Fallback to eigen if transpose parameters not supported by MKL or MKL-DNN
   typedef Eigen::ThreadPoolDevice CPUDevice;
-- 
GitLab


From 0975e71e0d9451e3a5d8a1581a31c0dc9aef994a Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Mon, 8 Oct 2018 10:43:25 +0800
Subject: [PATCH 0489/1085] refine the pylint

---
 tensorflow/python/ops/clip_ops.py      |  2 +-
 tensorflow/python/ops/clip_ops_test.py | 34 +++++++++++++-------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index d1a445070a..f45e25fc91 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -161,7 +161,7 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
     if isinstance(t, ops.IndexedSlices):
       return ops.IndexedSlices(values_clip, t.indices, t.dense_shape)
 
-    return values_clip     
+    return values_clip
 
 @tf_export("linalg.global_norm", "global_norm")
 @deprecation.deprecated_endpoints("global_norm")
diff --git a/tensorflow/python/ops/clip_ops_test.py b/tensorflow/python/ops/clip_ops_test.py
index 8ac41e984a..4037cb6026 100644
--- a/tensorflow/python/ops/clip_ops_test.py
+++ b/tensorflow/python/ops/clip_ops_test.py
@@ -44,23 +44,23 @@ class ClipOpsTest(test.TestCase):
                                    shape,
                                    max_norm,
                                    axes):
-     with self.cached_session() as sess:
-       values = constant_op.constant(values)
-       indices = constant_op.constant(indices)
-       shape = constant_op.constant(shape)
-       # IndexedSlices mode
-       indixed_slices = ops.IndexedSlices(values, indices, shape)
-       clipped = clip_ops.clip_by_norm(indixed_slices, max_norm, axes)
-       # clipped should be IndexedSlices
-       self.assertIsInstance(clipped, ops.IndexedSlices)
-       clipped = ops.convert_to_tensor(clipped)
-       
-       # Tensor mode
-       dense_tensor = ops.convert_to_tensor(indixed_slices)
-       dense_clipped = clip_ops.clip_by_norm(dense_tensor, max_norm, axes)
-       result, expected = sess.run([clipped, dense_clipped])
-     self.assertAllClose(result, expected)
-      
+    with self.cached_session() as sess:
+      values = constant_op.constant(values)
+      indices = constant_op.constant(indices)
+      shape = constant_op.constant(shape)
+      # IndexedSlices mode
+      indixed_slices = ops.IndexedSlices(values, indices, shape)
+      clipped = clip_ops.clip_by_norm(indixed_slices, max_norm, axes)
+      # clipped should be IndexedSlices
+      self.assertIsInstance(clipped, ops.IndexedSlices)
+      clipped = ops.convert_to_tensor(clipped)
+
+      # Tensor mode
+      dense_tensor = ops.convert_to_tensor(indixed_slices)
+      dense_clipped = clip_ops.clip_by_norm(dense_tensor, max_norm, axes)
+      result, expected = sess.run([clipped, dense_clipped])
+    self.assertAllClose(result, expected)
+
   def testClipTensorByNorm(self):
     # Simple example
     self._testClipTensorByNorm(
-- 
GitLab


From 393a13c1b1a7d51b0871a6d4b3d3413d8e1765bf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 02:03:43 -0700
Subject: [PATCH 0490/1085] compat: Update forward compatibility horizon to
 2018-10-08

PiperOrigin-RevId: 216151605
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index ee56480b00..349c84e13c 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 3bdf3c592472c2b54c513417de8d9b538d3f6078 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:08:31 -0700
Subject: [PATCH 0491/1085] Make ExecutorState preserve the thread context.

PiperOrigin-RevId: 216187878
---
 tensorflow/core/common_runtime/executor.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 2c48084cab..40ec1502da 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -54,6 +54,7 @@ limitations under the License.
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/context.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -1240,6 +1241,7 @@ class ExecutorState {
   StepStatsCollectorInterface* const stats_collector_;
   const tracing::TraceCollector* const trace_collector_;
   const tracing::EventCollector* const event_collector_;
+  Context context_;
 
   // QUESTION: Make it a checkpoint::TensorSliceReaderCacheWrapper
   // instead of a pointer?  (avoids having to delete).
@@ -1367,6 +1369,7 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl)
       trace_collector_(tracing::GetTraceCollector()),
       event_collector_(
           tracing::GetEventCollector(tracing::EventCategory::kCompute)),
+      context_(ContextKind::kThread),
       slice_reader_cache_(new checkpoint::TensorSliceReaderCacheWrapper),
       call_frame_(args.call_frame),
       impl_(impl),
@@ -1586,6 +1589,7 @@ bool MightTrace(const NodeItem& item,
 }
 
 void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
+  WithContext wc(context_);
   const GraphView& gview = impl_->gview_;
   TaggedNodeSeq ready;
   TaggedNodeReadyQueue inline_ready;
-- 
GitLab


From 53961cc2f16dea9d9b2286950c1e4d4c0a3743c5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:22:48 -0700
Subject: [PATCH 0492/1085] Improve const correctness of HloDomainMap

PiperOrigin-RevId: 216189458
---
 tensorflow/compiler/xla/service/hlo_domain_map.cc | 12 +++++++-----
 tensorflow/compiler/xla/service/hlo_domain_map.h  | 14 +++++++-------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 6ca1255ede..c6d02f9f67 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -42,18 +42,19 @@ namespace xla {
   return std::move(domain_map);
 }
 
-bool HloDomainMap::InSameDomain(HloInstruction* instruction1,
-                                HloInstruction* instruction2) const {
+bool HloDomainMap::InSameDomain(const HloInstruction* instruction1,
+                                const HloInstruction* instruction2) const {
   int64 domain_id1 = GetDomainId(instruction1);
   int64 domain_id2 = GetDomainId(instruction2);
   return domain_id1 >= 0 && domain_id1 == domain_id2;
 }
 
-int64 HloDomainMap::GetDomainId(HloInstruction* instruction) const {
+int64 HloDomainMap::GetDomainId(const HloInstruction* instruction) const {
   return FindOrDefault(instruction_to_domain_, instruction, -1);
 }
 
-int64 HloDomainMap::GetDomainMetadataId(HloInstruction* instruction) const {
+int64 HloDomainMap::GetDomainMetadataId(
+    const HloInstruction* instruction) const {
   return FindOrDie(domain_metadata_id_, instruction);
 }
 
@@ -200,7 +201,8 @@ StatusOr<std::unique_ptr<DomainMetadata::Domain>> HloDomainMap::CreateDomain(
   return std::move(domain);
 }
 
-bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const {
+bool HloDomainMap::IsDomainInstruction(
+    const HloInstruction* instruction) const {
   if (instruction->opcode() != HloOpcode::kDomain) {
     return false;
   }
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index c8d581b746..bce7d1aa7c 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -58,21 +58,21 @@ class HloDomainMap {
   }
 
   // Checks whether two instructions are within the same domain.
-  bool InSameDomain(HloInstruction* instruction1,
-                    HloInstruction* instruction2) const;
+  bool InSameDomain(const HloInstruction* instruction1,
+                    const HloInstruction* instruction2) const;
 
   // Checks whether instruction is a kDomain instruction of the kind we are
   // currently processing.
-  bool IsDomainInstruction(HloInstruction* instruction) const;
+  bool IsDomainInstruction(const HloInstruction* instruction) const;
 
   // Retrieves the domain identifier of the instruction, or -1 in case
   // instruction is not found within any domain.
-  int64 GetDomainId(HloInstruction* instruction) const;
+  int64 GetDomainId(const HloInstruction* instruction) const;
 
   // Returns the unique id of the domain metadata for the domain the given
   // instruction belongs to. The given instruction must not be a kDomain
   // instruction since each domain instruction is associated with 2 domains.
-  int64 GetDomainMetadataId(HloInstruction* instruction) const;
+  int64 GetDomainMetadataId(const HloInstruction* instruction) const;
 
  private:
   // Map used for representing instruction ordering, i.e.
@@ -119,8 +119,8 @@ class HloDomainMap {
 
   string domain_kind_;
   std::vector<std::unique_ptr<DomainMetadata::Domain>> instruction_domains_;
-  absl::flat_hash_map<HloInstruction*, int64> instruction_to_domain_;
-  absl::flat_hash_map<HloInstruction*, int64> domain_metadata_id_;
+  absl::flat_hash_map<const HloInstruction*, int64> instruction_to_domain_;
+  absl::flat_hash_map<const HloInstruction*, int64> domain_metadata_id_;
 };
 
 }  // namespace xla
-- 
GitLab


From 75f57a8b7836a1ed3cda8ba81c88f6caf15cf0c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 08:35:14 -0700
Subject: [PATCH 0493/1085] Remove Dims from types.h, create build structure.

PiperOrigin-RevId: 216191084
---
 .../contrib/lite/kernels/internal/BUILD       | 16 ++++++++++++
 .../lite/kernels/internal/legacy_types.h      | 26 +++++++++++++++++++
 .../internal/reference/legacy_reference_ops.h |  7 ++++-
 .../internal/reference/reference_ops.h        |  5 ----
 4 files changed, 48 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/contrib/lite/kernels/internal/legacy_types.h

diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index afb5ec05df..5c9ca6e910 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -49,6 +49,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "legacy_types",
+    srcs = [],
+    hdrs = [
+        "compatibility.h",
+        "legacy_types.h",
+        "types.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite/kernels:op_macros",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+)
+
 config_setting(
     name = "arm",
     values = {
@@ -198,6 +212,7 @@ cc_library(
         ":strided_slice_logic",
         ":tensor_utils",
         ":types",
+        ":legacy_types",
         ":legacy_reference_base",
         ":round",
         "//third_party/eigen3",
@@ -336,6 +351,7 @@ cc_library(
         ":quantization_util",
         ":round",
         ":strided_slice_logic",
+        ":legacy_types",
         ":types",
         "@gemmlowp",
         "//tensorflow/contrib/lite/c:c_api_internal",
diff --git a/tensorflow/contrib/lite/kernels/internal/legacy_types.h b/tensorflow/contrib/lite/kernels/internal/legacy_types.h
new file mode 100644
index 0000000000..2e4d3137f5
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/legacy_types.h
@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
+
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+// TODO(b/116772710): Insert legacy Dims<> code in here.
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
index be99240b1f..c8b64cfd96 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h
@@ -19,10 +19,10 @@ limitations under the License.
 #include <sys/types.h>
 
 #include "tensorflow/contrib/lite/kernels/internal/common.h"
+#include "tensorflow/contrib/lite/kernels/internal/legacy_types.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
-#include "tensorflow/contrib/lite/kernels/internal/types.h"
 
 namespace tflite {
 
@@ -30,6 +30,11 @@ namespace reference_ops {
 
 static constexpr int kDepthwiseReverseShift = -1;
 
+inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
+  shape->BuildFrom(
+      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
+}
+
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 59f17ae854..19d23fa80b 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -100,11 +100,6 @@ gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
 
 namespace reference_ops {
 
-inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) {
-  shape->BuildFrom(
-      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
-}
-
 template <typename T>
 int CountLeadingZeros(T integer_input) {
   static_assert(std::is_unsigned<T>::value,
-- 
GitLab


From 5f308cb408eb46ec9af0546be6b9ae1d5166b185 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:06:04 -0700
Subject: [PATCH 0494/1085] Optimize PinToHostOptimizer by adding cache, also
 add PinToHostOptimizer to benchmarks.

original runtime: 4.83492736816 secs
w/ cache runtime: 2.19033999443 secs

PiperOrigin-RevId: 216195286
---
 tensorflow/core/grappler/op_types.cc          |  22 ++-
 .../optimizers/pin_to_host_optimizer.cc       | 162 ++++++++++++------
 .../optimizers/pin_to_host_optimizer.h        |   4 +-
 .../optimizers/pin_to_host_optimizer_test.cc  |  76 +++++---
 4 files changed, 179 insertions(+), 85 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 1b5a215987..cbf5c8e038 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -102,15 +102,19 @@ bool IsConjugateTranspose(const NodeDef& node) {
 }
 
 bool IsControlFlow(const NodeDef& node) {
-  // clang-format off
-  return node.op() == "ControlTrigger" ||
-         node.op() == "Enter" ||
-         node.op() == "Exit" ||
-         node.op() == "LoopCond" ||
-         node.op() == "Merge" ||
-         node.op() == "NextIteration" ||
-         node.op() == "Switch";
-  // clang-format on
+  // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative
+  // string comparison.
+  static const gtl::FlatSet<string>* const kControFlowOps =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
+          "ControlTrigger",
+          "Enter",
+          "Exit",
+          "LoopCond",
+          "Merge",
+          "NextIteration",
+          "Switch",
+      }));
+  return kControFlowOps->count(node.op()) > 0;
 }
 
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 8ed4271fa4..29a3b2b74c 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -25,16 +25,29 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace grappler {
+
 namespace internal {
 
+namespace {
 // TODO(williamchan): Change this constant to be something smarter, maybe
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
+struct OpDevicePortHasher {
+  std::size_t operator()(const std::tuple<string, string, int>& x) const {
+    uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x)));
+
+    return Hash64Combine(code, hash<int>()(std::get<2>(x)));
+  }
+};
+using OpDevicePortOnHostMap =
+    gtl::FlatMap<std::tuple<string, string, int>, bool, OpDevicePortHasher>;
+
 // All the nodes that should be blacklisted and not swapped.
 bool IsBlacklisted(const NodeDef& node) {
   return
@@ -82,10 +95,10 @@ Status TryFindKernelDef(const std::vector<DeviceType>& devices,
 
 // Checks if a node's output port is host friendly.
 // Roughly this means checking if the output port is on Host memory.
-Status IsNodeOutputPortHostFriendly(const GraphView& graph,
-                                    GraphProperties* properties,
-                                    const NodeDef& node, int port_id,
-                                    bool* is_candidate) {
+Status IsNodeOutputPortHostFriendly(
+    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
+    int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
+    bool* is_candidate) {
   *is_candidate = false;
 
   // Make sure we are not a blacklisted op.
@@ -117,7 +130,8 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     for (const auto& fanin : graph.GetFanins(node, false)) {
       bool fanin_candidate = false;
       TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+          graph, properties, *fanin.node, fanin.port_id,
+          op_device_outport_pinned_to_host_cache, &fanin_candidate));
       if (!fanin_candidate) {
         return Status::OK();
       }
@@ -132,11 +146,22 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     return Status::OK();
   }
 
+  // Check `op_device_outport_pinned_to_host_cache` for our
+  // {op, device, port_id} combo to see if the arg is pinned on Host.
+  const std::tuple<string, string, int> cache_key(node.op(), node.device(),
+                                                  port_id);
+  auto it = op_device_outport_pinned_to_host_cache->find(cache_key);
+  if (it != op_device_outport_pinned_to_host_cache->end()) {
+    *is_candidate = it->second;
+    return Status::OK();
+  }
+
   // Check if op's output port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -146,6 +171,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     LOG(WARNING) << "Invalid port: " << port_id << "!\n"
                  << node.DebugString() << "\n"
                  << op->DebugString();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -155,6 +181,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
                        &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -166,22 +193,35 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
     }
   }
 
+  op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate);
+
   return Status::OK();
 }
 
 // Checks if a node's input port is Host friendly.
 // Roughly this means checking if the input port is on Host memory.
-bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
+bool IsNodeInputPortHostFriendly(
+    const NodeDef& node, int port_id,
+    OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) {
   // If node is on Host, assume its inputs are Host friendly.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     return true;
   }
 
+  // Check `op_device_inport_pinned_to_host_cache` for our
+  // {op, device, port_id} combo to see if the arg is pinned on Host.
+  std::tuple<string, string, int> cache_key(node.op(), node.device(), port_id);
+  auto it = op_device_inport_pinned_to_host_cache->find(cache_key);
+  if (it != op_device_inport_pinned_to_host_cache->end()) {
+    return it->second;
+  }
+
   // Check if op's input port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
+    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
   const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
@@ -192,16 +232,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
+    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
 
   // Check if the input_arg is pinned to Host.
   for (const string& host_memory_arg : kernel->host_memory_arg()) {
     if (op->input_arg(input_arg_id).name() == host_memory_arg) {
+      op_device_inport_pinned_to_host_cache->emplace(cache_key, true);
       return true;
     }
   }
 
+  op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
+
   return false;
 }
 
@@ -211,18 +255,20 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
 // 2] Check if node can run on Host.
 // 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
 //    ints, and pinned to Host).
-Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
-                           const NodeDef& node, bool* is_candidate) {
+Status IsNodeHostCandidate(
+    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
+    OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
+    bool* is_candidate) {
   *is_candidate = false;
 
-  // Check if node already on CPU.
-  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
-    *is_candidate = true;
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
     return Status::OK();
   }
 
-  // Skip these node types.
-  if (IsBlacklisted(node)) {
+  // Check if node already on CPU.
+  if (str_util::StrContains(node.device(), DEVICE_CPU)) {
+    *is_candidate = true;
     return Status::OK();
   }
 
@@ -232,17 +278,6 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
     return Status::OK();
   }
 
-  // Check all inputs are Host friendly.
-  for (const GraphView::OutputPort& fanin :
-       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
-    bool fanin_candidate = false;
-    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
-    if (!fanin_candidate) {
-      return Status::OK();
-    }
-  }
-
   // Check all outputs are Host friendly.
   if (!properties->has_properties()) {
     // This is an expensive call, call it lazily.
@@ -255,16 +290,42 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
     }
   }
 
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id,
+        op_device_outport_pinned_to_host_cache, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
+    }
+  }
+
   *is_candidate = true;
   return Status::OK();
 }
 
-string TryFindHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, const string& device) {
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (const auto& node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
+        node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
+}  // end namespace
+
+// Tries to swap `device` to a Host device from `devices`. Returns true iff
+// there was a swap.
+bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, string* device) {
   // Force this node onto the CPU.
-  if (device.empty() && has_device_cpu) {
-    return "/device:CPU:0";
-  } else if (str_util::StrContains(device, DEVICE_GPU)) {
+  if (device->empty() && has_device_cpu) {
+    *device = "/device:CPU:0";
+    return true;
+  } else if (str_util::StrContains(*device, DEVICE_GPU)) {
     // Sometimes the cluster can have:
     //   devices = {"/device:CPU:0", "/device:XLA_GPU:0"}
     // and we need to handle them properly.
@@ -272,27 +333,19 @@ string TryFindHostDevice(const gtl::FlatSet<string>& devices,
          {std::pair<string, string>("GPU", "CPU:0"),
           std::pair<string, string>("/device", "/device:CPU:0")}) {
       const string device_host =
-          strings::StrCat(device.substr(0, device.rfind(device_match.first)),
+          strings::StrCat(device->substr(0, device->rfind(device_match.first)),
                           device_match.second);
       if (devices.find(device_host) != devices.end()) {
-        return device_host;
+        *device = device_host;
+        return true;
       }
     }
   }
 
-  // We couldn't find an appropriate Host device, return original device.
-  return device;
-}
-
-bool IsTPUGraphDef(const GraphDef& def) {
-  for (const auto& node : def.node()) {
-    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
-        node.op() == "TPUPartitionedCall") {
-      return true;
-    }
-  }
+  // We couldn't find an appropriate Host device, return false.
   return false;
 }
+
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -324,20 +377,26 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // All the Const nodes, and their original devices in topological order.
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
+  // Cache to map {op, device, port} -> bool on whether it is pinned to host.
+  internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache;
+  internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache;
+
   for (auto& node : *optimized_graph->mutable_node()) {
     bool is_candidate = false;
-    TF_RETURN_IF_ERROR(
-        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
+    TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate(
+        graph, &properties, node, &op_device_outport_pinned_to_host_cache,
+        &is_candidate));
     if (!is_candidate) {
       continue;
     }
 
-    if (IsConstant(node)) {
-      const_nodes.emplace_back(&node, node.device());
+    const string original_device = node.device();
+    const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu,
+                                                       node.mutable_device());
+    // Keep track of all Const nodes that we swapped.
+    if (swapped && IsConstant(node)) {
+      const_nodes.emplace_back(&node, original_device);
     }
-    // Try and swap the device to Host.
-    node.set_device(
-        internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
   }
 
   // Traverse all `const_nodes`, and map them back to GPU greedily.
@@ -349,8 +408,9 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
       // The consumer is not Host friendly, swap it back to the original device.
-      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
-                                                 fanout.port_id)) {
+      if (!internal::IsNodeInputPortHostFriendly(
+              *fanout.node, fanout.port_id,
+              &op_device_inport_pinned_to_host_cache)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
index d557a03463..bed4a9ef95 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
@@ -26,8 +26,8 @@ namespace tensorflow {
 namespace grappler {
 namespace internal {
 // Try and find an appropriate Host device in `devices` given `device`.
-string TryFindHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, const string& device);
+bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, string* device);
 }  // end namespace internal
 
 // Optimize TensorFlow ops that should be swapped into the CPU to avoid
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 7c64529441..9bb030b220 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -28,30 +28,60 @@ namespace {
 
 class PinToHostOptimizerTest : public GrapplerTest {};
 
-TEST_F(PinToHostOptimizerTest, TryFindHostDevice) {
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) {
   gtl::FlatSet<string> devices = {};
-  EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC"));
-
-  devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"),
-            "/device:CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"),
-            "/device:CPU:0");
-
-  devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
-            "/device:XLA_CPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
-            "/device:XLA_CPU:0");
-
-  devices = {"/device:XLA_GPU:0"};
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
-            "/device:XLA_GPU:0");
-  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
-            "/device:XLA_GPU:*");
+
+  string device = "ABC";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "ABC");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
+  EXPECT_EQ(device, "/device:CPU:0");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_TRUE(device.empty());
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_CPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_CPU:0");
+}
+
+TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) {
+  gtl::FlatSet<string> devices = {"/device:XLA_GPU:0"};
+
+  string device = "";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_TRUE(device.empty());
+
+  device = "/device:XLA_GPU:0";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_GPU:0");
+
+  device = "/device:XLA_GPU:*";
+  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
+  EXPECT_EQ(device, "/device:XLA_GPU:*");
 }
 
 TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) {
-- 
GitLab


From 411b9baa39636030181fdff15d2e985824b03d61 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Mon, 8 Oct 2018 09:42:50 -0700
Subject: [PATCH 0495/1085] Reduce tolerances for rmsprop_test float16, to fix
 OSS builds.

PiperOrigin-RevId: 216200439
---
 tensorflow/contrib/optimizer_v2/rmsprop_test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
index 44301ffe9e..83f5971039 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
@@ -157,8 +157,11 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
         self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
         self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
         self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
-        self.assertAllCloseAccordingToType(var0_np, var0.eval())
-        self.assertAllCloseAccordingToType(var1_np, var1.eval())
+        # TODO(b/117393988): Reduce tolerances for float16.
+        self.assertAllCloseAccordingToType(
+            var0_np, var0.eval(), half_rtol=3e-3, half_atol=3e-3)
+        self.assertAllCloseAccordingToType(
+            var1_np, var1.eval(), half_rtol=3e-3, half_atol=3e-3)
 
   @parameterized.parameters([dtypes.float32, dtypes.float64])
   def testMinimizeSparseResourceVariable(self, dtype):
-- 
GitLab


From f435e776216c7a86f619a17064fd6e1deee638b3 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Mon, 8 Oct 2018 09:49:38 -0700
Subject: [PATCH 0496/1085] Avoid adding spurious ops when colocating with
 resource variables.

Prior to this change, tf.colocate_with(v) would insert spurious operations (a ReadVariableOp and an Identity) in the graph when v is a resource variable, and then
colocate the operations within the block with those newly added, otherwise disconnected, operations.

This commit avoids adding the unnecessary ReadVariableOp/Identity nodes and colocates
operations within the block with the VarHandleOp.

PiperOrigin-RevId: 216201638
---
 .../python/parameter_server_strategy_test.py  |  4 ++-
 tensorflow/python/framework/ops.py            | 28 ++++++++++++++++---
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 353d11a583..9c112e4f85 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -262,7 +262,9 @@ class ParameterServerStrategyTestBase(
           h = f + 1.0
         self.assertEqual(
             device_util.canonicalize(u.device), tower_variable_device)
-        self.assertEqual(device_util.canonicalize(x.device), h.device)
+        self.assertEqual(
+            device_util.canonicalize(x.device),
+            device_util.canonicalize(h.device))
         return y_add, z_add, f
 
       y, z, f = d.call_for_each_tower(model_fn)
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 8bb177939e..77c2bc930e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -4140,10 +4140,7 @@ class Graph(object):
     if op is None and not ignore_existing:
       raise ValueError("Trying to reset colocation (op is None) but "
                        "ignore_existing is not True")
-
-    if op is not None and not isinstance(op, Operation):
-      # We always want to colocate with the reference op.
-      op = internal_convert_to_tensor_or_indexed_slices(op, as_ref=True).op
+    op = _op_to_colocate_with(op)
 
     # By default, colocate_with resets the device function stack,
     # since colocate_with is typically used in specific internal
@@ -6168,4 +6165,27 @@ def _operation_conversion_error(op, dtype=None, name=None, as_ref=False):
                                                                name, as_ref))
 
 
+def _op_to_colocate_with(v):
+  """Operation object corresponding to v to use for colocation constraints."""
+  if v is None:
+    return None
+  if isinstance(v, Operation):
+    return v
+  # We always want to colocate with the reference op.
+  # When 'v' is a ResourceVariable, the reference op is the handle creating op.
+  #
+  # What this should be is:
+  # if isinstance(v, ResourceVariable):
+  #   return v.handle.op
+  # However, that would require a circular import dependency.
+  # As of October 2018, there were attempts underway to remove
+  # colocation constraints altogether. Assuming that will
+  # happen soon, perhaps this hack to work around the circular
+  # import dependency is acceptable.
+  if hasattr(v, "handle") and hasattr(v.handle, "op") and isinstance(
+      v.handle.op, Operation):
+    return v.handle.op
+  return internal_convert_to_tensor_or_indexed_slices(v, as_ref=True).op
+
+
 register_tensor_conversion_function(Operation, _operation_conversion_error)
-- 
GitLab


From d2e1c28499c05f589e8db31cebe8cbab0e224a9e Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Mon, 8 Oct 2018 09:55:06 -0700
Subject: [PATCH 0497/1085] Address comments

---
 RELEASE.md | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 1077593e51..cb020d0310 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,39 +1,38 @@
 # Release 1.12.0
 
 ## Major Features and Improvements
-* tf.data users can now use `tf.data.experimental.AUTOTUNE` for the num_parallel_calls argument of `tf.data.Dataset.interleave`, `tf.data.Dataset.map`, and `tf.data.experimental.map_and_batch` to dynamically tune the level of parallelism at runtime.
 * Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving.
-* TensorFlow builds with XLA support by default, does not enable XLA by default, but makes it accessible via default binary builds.
+* Keras model now supports evaluating with a `tf.data.Dataset`.
+* TensorFlow binaries are built with XLA support linked in by default.
 
 ## Bug Fixes and Other Changes
 
 * tf.data:
   * tf.data users can now represent, get, and set options of TensorFlow input pipelines using `tf.data.Options()`, `tf.data.Dataset.options()`, and `tf.data.Dataset.with_options()` respectively.
+* tf.data users can now use `tf.data.experimental.AUTOTUNE` for the num_parallel_calls argument of `tf.data.Dataset.interleave`, `tf.data.Dataset.map`, and `tf.data.experimental.map_and_batch` to dynamically tune the level of parallelism at runtime.
   * All C++ code moves to the `tensorflow::data` namespace.
-  * Adding support for `num_parallel_calls` to `tf.data.Dataset.interleave`.
+  * Add support for `num_parallel_calls` to `tf.data.Dataset.interleave`.
 * `tf.contrib`:
-  * Removed `tf.contrib.linalg`. `tf.linalg` should be used instead.
+  * Remove `tf.contrib.linalg`. `tf.linalg` should be used instead.
   * Replace any calls to `tf.contrib.get_signature_def_by_key(metagraph_def, signature_def_key)` with `meta_graph_def.signature_def[signature_def_key]`. Catching a ValueError exception thrown by `tf.contrib.get_signature_def_by_key` should be replaced by catching a KeyError exception.
 * `tf.contrib.data`
-  * deprecated, and replaced by tf.data.experimental.
-* Keras:
-  * Keras model now supports evaluating with a `tf.data.Dataset`.
+  * Deprecate, and replace by tf.data.experimental.
 * Other:
   * Instead of jemalloc, revert back to using system malloc.
-  * Removed integer types from `tf.nn.softplus` and `tf.nn.softsign` OpDefs. This is a bugfix; these ops were never meant to support integers.
+  * Remove integer types from `tf.nn.softplus` and `tf.nn.softsign` OpDefs. This is a bugfix; these ops were never meant to support integers.
   * Allow subslicing Tensors with a single dimension.
   * Add option to calculate string length in Unicode characters
   * Add functionality to SubSlice a tensor.
   * Add searchsorted (ie lower/upper_bound) op.
-  * Adding model explainability to Boosted Trees.
+  * Add model explainability to Boosted Trees.
   * Support negative positions for tf.substr
   * There was previously a bug in the bijector_impl where the _reduce_jacobian_det_over_event does not handle scalar ILDJ implementations properly.
   * In tf eager execution, allow re-entering a GradientTape context
   * Add tf_api_version flag. If --define=tf_api_version=2 flag is passed in, then bazel will build TensorFlow API version 2.0. Note that TensorFlow 2.0 is under active development and has no guarantees at this point.
-  * Added additional compression options to TfRecordWriter
+  * Add additional compression options to TfRecordWriter
   * Performance improvements for regex full match operations.
   * Replace tf.GraphKeys.VARIABLES with `tf.GraphKeys.GLOBAL_VARIABLES`
-  * Removed unused dynamic learning rate support.
+  * Remove unused dynamic learning rate support.
 
 ## Thanks to our Contributors
 
@@ -70,7 +69,7 @@ This release contains contributions from many people at Google, as well as:
   * `tf.data.Dataset.list_files()` raises an exception at initialization time if the argument matches no files.
   * Renamed BigTable class to BigtableTable for clarity
   * Document use of the Cloud Bigtable API
-  * Adding `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
+  * Add `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
   * Generalization of `tf.contrib.data.sliding_window_batch`.
 * INC:
   * Runtime improvements to triangular solve.
-- 
GitLab


From 87315f41ced19136819cef56ef37636c52c474de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:49:59 -0700
Subject: [PATCH 0498/1085] Remove Raises documentation on imperative_grads for
 ValueErrror not raised.

PiperOrigin-RevId: 216201714
---
 tensorflow/python/eager/imperative_grad.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py
index 5f5af4ab6c..5c35860e9d 100644
--- a/tensorflow/python/eager/imperative_grad.py
+++ b/tensorflow/python/eager/imperative_grad.py
@@ -51,11 +51,6 @@ def imperative_grad(
 
   Raises:
     RuntimeError: if something goes wrong.
-    ValueError: if there is no sequence of differentiable operations connecting
-     a source and any target Tensor. This can happen either if the target is
-     not computed based on the source, if the tracing was set up incorrectly,
-     or if only non-differentiable functions of the source were used in the
-     computation of target.
   """
   return pywrap_tensorflow.TFE_Py_TapeGradient(
       tape._tape,  # pylint: disable=protected-access
-- 
GitLab


From 8dd1901de92bb3a370c40cc420a58bdceb408738 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Mon, 8 Oct 2018 10:01:56 -0700
Subject: [PATCH 0499/1085] Reduce tolerances for rmsprop_test float16, to fix
 OSS builds. (#22820)

PiperOrigin-RevId: 216200439
---
 tensorflow/contrib/optimizer_v2/rmsprop_test.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
index 44301ffe9e..83f5971039 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
@@ -157,8 +157,11 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
         self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
         self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
         self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
-        self.assertAllCloseAccordingToType(var0_np, var0.eval())
-        self.assertAllCloseAccordingToType(var1_np, var1.eval())
+        # TODO(b/117393988): Reduce tolerances for float16.
+        self.assertAllCloseAccordingToType(
+            var0_np, var0.eval(), half_rtol=3e-3, half_atol=3e-3)
+        self.assertAllCloseAccordingToType(
+            var1_np, var1.eval(), half_rtol=3e-3, half_atol=3e-3)
 
   @parameterized.parameters([dtypes.float32, dtypes.float64])
   def testMinimizeSparseResourceVariable(self, dtype):
-- 
GitLab


From 07df147ab20c4a5329148e5fb5f7f6b187cb73a4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 09:50:08 -0700
Subject: [PATCH 0500/1085] Enable PinToHostOptimizer.

PiperOrigin-RevId: 216201732
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index c3d70a1fdf..3f33b16ba8 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -107,7 +107,8 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT("scoped_allocator",
          new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(),
                                       cfg_.scoped_allocator_opts()));
-  MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
+  MK_OPT("pin_to_host",
+         new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
 
   return std::unique_ptr<GraphOptimizer>();
 }
@@ -139,7 +140,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
+  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -527,7 +528,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
+         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
-- 
GitLab


From da3abf6afeaf781b932bce9ccb6c17da911e49b6 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 8 Oct 2018 09:53:31 -0700
Subject: [PATCH 0501/1085] Benchmark for comparing original cond and cond_v2
 performance.

This benchmark creates many intermediates values, so we can make sure there's no performance overhead (it looks like there might be currently, or it might be from some other difference). It also runs in a defun and in legacy graph mode.

Results from my machine:

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_defun"
  iters: 500
  wall_time: 1.25822591782
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_defun"
  iters: 500
  wall_time: 5.99376106262
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v1_graph"
  iters: 500
  wall_time: 2.05277585983
}

entry {
  name: "CondWithManyIntermediatesBenchmark.benchmark_cond_v2_graph"
  iters: 500
  wall_time: 2.84808516502
}

Clearly we have some work to do! I haven't looked into the time differences at all yet.

PiperOrigin-RevId: 216202325
---
 tensorflow/python/BUILD                       |  13 ++
 .../python/ops/control_flow_ops_benchmark.py  | 122 ++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 tensorflow/python/ops/control_flow_ops_benchmark.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index da3c56db92..822d596995 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5196,6 +5196,19 @@ cuda_py_test(
     main = "ops/concat_benchmark.py",
 )
 
+cuda_py_test(
+    name = "control_flow_ops_benchmark",
+    srcs = ["ops/control_flow_ops_benchmark.py"],
+    additional_deps = [
+        ":client_testlib",
+        ":constant_op",
+        ":control_flow_ops",
+        ":framework_ops",
+        "//tensorflow/python/eager:function",
+    ],
+    main = "ops/control_flow_ops_benchmark.py",
+)
+
 cuda_py_test(
     name = "conv2d_benchmark",
     size = "large",
diff --git a/tensorflow/python/ops/control_flow_ops_benchmark.py b/tensorflow/python/ops/control_flow_ops_benchmark.py
new file mode 100644
index 0000000000..9ba5ff2c0f
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_ops_benchmark.py
@@ -0,0 +1,122 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark for control flow ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class CondWithManyIntermediatesBenchmark(test.Benchmark):
+  """Checks the runtime performance of outputting all intermediates."""
+
+  NUM_INTERMEDIATES = 1000
+  NUM_ITERS = 500
+  NUM_WARM_UP_ITERS = 50
+
+  def _create_cond(self, x):
+
+    def branch_fn():
+      # Use a random value so the adds can't be constant folded.
+      return x + sum(random_ops.random_normal([])
+                     for _ in range(self.NUM_INTERMEDIATES))
+
+    # Use a dynamic predicate to make sure the cond isn't constant folded.
+    return control_flow_ops.cond(math_ops.not_equal(x, -1),
+                                 branch_fn, lambda: 0.0)
+
+  def _benchmark_defun(self):
+    """Benchmarks cond in a defun."""
+
+    @function.defun
+    def cond_fn(x):
+      return self._create_cond(x)
+
+    # Warm up
+    for _ in range(self.NUM_WARM_UP_ITERS):
+      cond_fn(0.0)
+
+    start_time = time.time()
+
+    for _ in range(self.NUM_ITERS):
+      cond_fn(0.0)
+
+    self.report_benchmark(
+        wall_time=time.time() - start_time,
+        iters=self.NUM_ITERS)
+
+  def _benchmark_graph(self):
+    """Benchmarks cond in legacy graph mode."""
+    with context.graph_mode():
+      with ops.Graph().as_default():
+        x = array_ops.placeholder(dtypes.float32)
+        cond_val = self._create_cond(x)
+
+        with session.Session() as sess:
+          cond_fn = sess.make_callable(cond_val, [x])
+
+          # Warm up
+          for _ in range(self.NUM_WARM_UP_ITERS):
+            cond_fn(0.0)
+
+          start_time = time.time()
+
+          for _ in range(self.NUM_ITERS):
+            cond_fn(0.0)
+
+          self.report_benchmark(
+              wall_time=time.time() - start_time,
+              iters=self.NUM_ITERS)
+
+  def benchmark_cond_v1_defun(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = False
+    self._benchmark_defun()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v2_defun(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = True
+    self._benchmark_defun()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v1_graph(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = False
+    self._benchmark_graph()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+  def benchmark_cond_v2_graph(self):
+    old_val = control_flow_ops.ENABLE_COND_V2
+    control_flow_ops.ENABLE_COND_V2 = True
+    self._benchmark_graph()
+    control_flow_ops.ENABLE_COND_V2 = old_val
+
+if __name__ == "__main__":
+  ops.enable_eager_execution()
+  test.main()
-- 
GitLab


From 6dd826b856acf6b060379251bfd91a950ee2b0af Mon Sep 17 00:00:00 2001
From: Makoto Uchida <muchida@google.com>
Date: Mon, 8 Oct 2018 10:00:18 -0700
Subject: [PATCH 0502/1085] Fix typo

PiperOrigin-RevId: 216203408
---
 .../experimental/kernel_tests/reader_dataset_ops_test_base.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index fe0b3b5f3b..77df8310d4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -64,7 +64,7 @@ class FixedLengthRecordDatasetTestBase(test_base.DatasetTestBase):
 
 
 class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
-  """Base class for setting up and testing `make_batched_feature_dataset`."""
+  """Base class for setting up and testing `make_batched_features_dataset`."""
 
   def setUp(self):
     super(MakeBatchedFeaturesDatasetTestBase, self).setUp()
-- 
GitLab


From 0e1ba8886b6a333b1ed8ed7548c55041c34e9623 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:09:50 -0700
Subject: [PATCH 0503/1085] Fix compilation in unique_op when Eigen::Index !=
 int64.

PiperOrigin-RevId: 216205396
---
 tensorflow/core/kernels/unique_op.cc | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 3559baa18e..3bdcfc90b8 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -108,7 +108,7 @@ class UniqueOp : public OpKernel {
 
       std::unordered_map<T, TIndex> uniq;
       uniq.reserve(2 * N);
-      for (int64 i = 0, j = 0; i < N; ++i) {
+      for (Eigen::Index i = 0, j = 0; i < N; ++i) {
         auto it = uniq.insert(std::make_pair(Tin(i), j));
         idx_vec(i) = it.first->second;
         if (it.second) {
@@ -131,19 +131,20 @@ class UniqueOp : public OpKernel {
       // General implementation when unique is run over multiple elements.
       auto Tin = input.shaped<T, 3>(new_sizes);
 
-      auto hash_fn = [&Tin](const int64& key) {
+      auto hash_fn = [&Tin](const Eigen::Index& key) {
         size_t h = 0;
-        for (int64 i = 0; i < Tin.dimension(0); i++) {
-          for (int64 j = 0; j < Tin.dimension(2); j++) {
+        for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
+          for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
             h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
           }
         }
         return h;
       };
 
-      auto equal_to_fn = [&Tin](const int64& lhs, const int64& rhs) {
-        for (int64 i = 0; i < Tin.dimension(0); i++) {
-          for (int64 j = 0; j < Tin.dimension(2); j++) {
+      auto equal_to_fn = [&Tin](const Eigen::Index& lhs,
+                                const Eigen::Index& rhs) {
+        for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
+          for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
             if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
               return false;
             }
-- 
GitLab


From 0e42fd6d0a88b30ab57959f38c79bea19d745ec3 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 8 Oct 2018 10:14:58 -0700
Subject: [PATCH 0504/1085] [tf.data] Adding specialization for `MapDataset`,
 `ParallelMapDataset`, and `MapAndBatchDataset` whose user-provided functions
 have the property that each output argument take its value directly from an
 input argument (e.g. `lambda x, y: y, x`). This specialization can produce
 the result without having to schedule the function using the executor.

PiperOrigin-RevId: 216206232
---
 tensorflow/core/kernels/data/BUILD            |  14 ++
 tensorflow/core/kernels/data/dataset_utils.cc |  47 +++++
 tensorflow/core/kernels/data/dataset_utils.h  |  20 ++
 .../core/kernels/data/dataset_utils_test.cc   |  46 +++++
 .../core/kernels/data/filter_dataset_op.cc    | 162 ++++++---------
 .../kernels/data/map_and_batch_dataset_op.cc  | 187 +++++++++++-------
 .../core/kernels/data/map_dataset_op.cc       |  62 ++++--
 .../kernels/data/parallel_map_dataset_op.cc   |  79 +++++---
 .../kernels/data/parallel_map_iterator.cc     |  17 +-
 .../core/kernels/data/parallel_map_iterator.h |   2 +-
 .../kernels/data/parse_example_dataset_op.cc  |   2 +-
 .../kernel_tests/map_and_batch_test.py        |  31 +++
 .../kernel_tests/filter_dataset_op_test.py    |   2 +-
 .../data/kernel_tests/map_dataset_op_test.py  |  95 +++++++--
 .../python/data/kernel_tests/test_base.py     |  29 +++
 15 files changed, 565 insertions(+), 230 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/dataset_utils_test.cc

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 451f8c1a6c..37c1c54786 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -45,6 +45,16 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "dataset_utils_test",
+    srcs = ["dataset_utils_test.cc"],
+    deps = [
+        ":dataset_utils",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -205,6 +215,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -232,6 +243,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -245,6 +257,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -285,6 +298,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         ":parallel_map_iterator",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index e10833f525..a40f7f2146 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -15,10 +15,57 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
 
 namespace tensorflow {
 namespace data {
 
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices) {
+  FunctionLibraryRuntime::Handle fn_handle;
+  TF_RETURN_IF_ERROR(ctx->function_library()->Instantiate(
+      func.name(), AttrSlice(&func.attr()), &fn_handle));
+  auto cleanup = gtl::MakeCleanup([ctx, fn_handle]() {
+    Status s = ctx->function_library()->ReleaseHandle(fn_handle);
+    if (!s.ok()) {
+      LOG(WARNING) << "Failed to release handle: " << s.error_message();
+    }
+  });
+
+  const FunctionBody* fn_body =
+      ctx->function_library()->GetFunctionBody(fn_handle);
+  indices->resize(fn_body->ret_nodes.size());
+  for (size_t i = 0; i < fn_body->ret_nodes.size(); ++i) {
+    Node* ret_node = fn_body->ret_nodes[i];
+    Node* ret_input_node;
+    TF_RETURN_IF_ERROR(ret_node->input_node(0, &ret_input_node));
+    if (ret_input_node->def().op() == FunctionLibraryDefinition::kArgOp) {
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(ret_input_node->def(), "index", &((*indices)[i])));
+    } else {
+      indices->clear();
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
+  std::map<int, int> last_use;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    last_use[indices[i]] = i;
+  }
+  std::vector<bool> can_move;
+  can_move.resize(indices.size());
+  for (size_t i = 0; i < indices.size(); ++i) {
+    can_move[i] = last_use[indices[i]] == i;
+  }
+  return can_move;
+}
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index 6ec1350cd4..d777062293 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -22,6 +22,26 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+// This method is used to determine whether we can short-circuit the evaluation
+// of the user-defined function `func`. Short-circuting is possible if every
+// function output corresponds to one of its inputs (e.g. `f(x) = x`, `f(x,y) =
+// (y,x)`, or `f(x) = (x,x)`).
+//
+// If short-circuiting is possible, the method stores the mapping from output
+// indices to input indices in `indices`. Otherwise, `indices` will be empty.
+//
+// Returns non-ok status if analysis of the function fails.
+//
+// TODO(jsimsa): Extend this to support constants as well.
+Status ComputeShortCircuitIndices(OpKernelContext* ctx,
+                                  const NameAttrList& func,
+                                  std::vector<int>* indices);
+
+// Given a vector that maps output indices to input indices, return a vector
+// that identifies for which output indices can we move the input (assuming
+// output indices are processed left to right).
+std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
+
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
     int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc
new file mode 100644
index 0000000000..43295b8ebb
--- /dev/null
+++ b/tensorflow/core/kernels/data/dataset_utils_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_utils.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+TEST(DatasetUtils, ComputeMoveVector) {
+  struct TestCase {
+    std::vector<int> indices;
+    std::vector<bool> expected;
+  };
+
+  TestCase test_cases[] = {
+      TestCase{{}, {}},
+      TestCase{{1}, {true}},
+      TestCase{{1, 1}, {false, true}},
+      TestCase{{1, 2}, {true, true}},
+      TestCase{{1, 1, 2}, {false, true, true}},
+      TestCase{{1, 2, 2}, {true, false, true}},
+  };
+
+  for (auto& test_case : test_cases) {
+    EXPECT_EQ(test_case.expected, ComputeMoveVector(test_case.indices));
+  }
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index 00884314a9..be7d182a1f 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -31,67 +33,84 @@ namespace {
 
 class FilterDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using FilterIteratorPredicate =
+      std::function<Status(IteratorContext*, std::vector<Tensor>, bool*)>;
+
   explicit FilterDatasetOp(OpKernelConstruction* ctx)
-      : UnaryDatasetOpKernel(ctx),
-        graph_def_version_(ctx->graph_def_version()) {
+      : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    FunctionLibraryRuntime::Handle pred_handle;
-    OP_REQUIRES_OK(ctx,
-                   ctx->function_library()->Instantiate(
-                       func_.name(), AttrSlice(&func_.attr()), &pred_handle));
-    auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() {
-      OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle));
-    });
-
-    const FunctionBody* pred_body =
-        ctx->function_library()->GetFunctionBody(pred_handle);
-    OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1,
-                errors::InvalidArgument(
-                    "predicate function must have a single return value."));
-    Node* ret_node = pred_body->ret_nodes[0];
-    Node* ret_input_node;
-    OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node));
-
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    if (ret_input_node->def().op() == "_Arg") {
-      int32 index = -1;
-      OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index));
-      *output = new FilterTensorDataset(ctx, input, func_,
-                                        std::move(captured_func), index);
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+    OP_REQUIRES(ctx, indices.size() <= 1,
+                errors::InvalidArgument(
+                    "predicate function has more than one return value."));
+
+    FilterIteratorPredicate filter_pred;
+    if (indices.empty()) {
+      CapturedFunction* raw_captured_func = captured_func.get();
+      filter_pred = [raw_captured_func](IteratorContext* ctx,
+                                        const std::vector<Tensor>& args,
+                                        bool* out_matched) {
+        std::vector<Tensor> result;
+        TF_RETURN_IF_ERROR(
+            raw_captured_func->RunWithBorrowedArgs(ctx, args, &result));
+
+        if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
+            result[0].NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = result[0].scalar<bool>()();
+        return Status::OK();
+      };
     } else {
-      *output = new FilterFunctionDataset(ctx, input, func_,
-                                          std::move(captured_func));
+      filter_pred = [indices](IteratorContext* ctx,
+                              const std::vector<Tensor>& args,
+                              bool* out_matched) {
+        const Tensor& predicate = args[indices[0]];
+        if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
+          return errors::InvalidArgument(
+              "Filter predicate `f` must return a scalar bool.");
+        }
+        *out_matched = predicate.scalar<bool>()();
+        return Status::OK();
+      };
     }
+
+    *output = new Dataset(ctx, input, func_, std::move(captured_func),
+                          std::move(filter_pred));
   }
 
  private:
-  const int graph_def_version_;
-
-  class FilterDatasetBase : public DatasetBase {
+  class Dataset : public DatasetBase {
    public:
-    FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input,
-                      const NameAttrList& func,
-                      std::unique_ptr<CapturedFunction> captured_func)
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func,
+            FilterIteratorPredicate filter_pred)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          filter_pred_(std::move(filter_pred)) {
       input_->Ref();
     }
 
-    ~FilterDatasetBase() override { input_->Unref(); }
+    ~Dataset() override { input_->Unref(); }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Filter")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Filter")},
+          filter_pred_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -133,17 +152,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       return Status::OK();
     }
 
-    virtual Status EvaluatePredicate(IteratorContext* ctx,
-                                     const std::vector<Tensor>& element,
-                                     bool* out_matched) const = 0;
-
    private:
-    class Iterator : public DatasetIterator<FilterDatasetBase> {
+    class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<FilterDatasetBase>(params),
+      explicit Iterator(const Params& params,
+                        FilterIteratorPredicate filter_pred)
+          : DatasetIterator<Dataset>(params),
             filtered_elements_(0),
-            dropped_elements_(0) {
+            dropped_elements_(0),
+            filter_pred_(std::move(filter_pred)) {
         std::vector<string> components =
             str_util::Split(params.prefix, "::", str_util::SkipEmpty());
         prefix_end_ = components.back();
@@ -180,8 +197,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          TF_RETURN_IF_ERROR(
-              dataset()->EvaluatePredicate(ctx, *out_tensors, &matched));
+          TF_RETURN_IF_ERROR(filter_pred_(ctx, *out_tensors, &matched));
           if (!matched) {
             // Clear the output tensor list since it didn't match.
             out_tensors->clear();
@@ -251,64 +267,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
       int64 filtered_elements_ GUARDED_BY(mu_);
       int64 dropped_elements_ GUARDED_BY(mu_);
+      const FilterIteratorPredicate filter_pred_;
       string prefix_end_;
     };
 
     const DatasetBase* const input_;
     const NameAttrList func_;
-
-   protected:
     const std::unique_ptr<CapturedFunction> captured_func_;
-  };
-
-  class FilterFunctionDataset : public FilterDatasetBase {
-   public:
-    using FilterDatasetBase::FilterDatasetBase;
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-      // stack-rip the iterators and use async kernels.
-      std::vector<Tensor> result;
-      TF_RETURN_IF_ERROR(
-          captured_func_->RunWithBorrowedArgs(ctx, element, &result));
-
-      if (result.size() != 1 || result[0].dtype() != DT_BOOL ||
-          result[0].NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = result[0].scalar<bool>()();
-      return Status::OK();
-    }
-  };
-
-  class FilterTensorDataset : public FilterDatasetBase {
-   public:
-    FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input,
-                        const NameAttrList& func,
-                        std::unique_ptr<CapturedFunction> captured_func,
-                        int32 index)
-        : FilterDatasetBase(ctx, input, func, std::move(captured_func)),
-          index_(index) {}
-
-   protected:
-    Status EvaluatePredicate(IteratorContext* ctx,
-                             const std::vector<Tensor>& element,
-                             bool* out_matched) const override {
-      const Tensor& predicate = element[index_];
-      if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) {
-        return errors::InvalidArgument(
-            "Filter predicate `f` must return a scalar bool.");
-      }
-      *out_matched = predicate.scalar<bool>()();
-      return Status::OK();
-    }
-
-   private:
-    const int32 index_;
+    const FilterIteratorPredicate filter_pred_;
   };
 
  private:
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bf08970560..f45a239793 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/inplace_ops_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
@@ -29,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -41,6 +43,10 @@ namespace {
 // transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapAndBatchIteratorFunction =
+      std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
+                         std::shared_ptr<std::vector<Tensor>>, StatusCallback)>;
+
   explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx)
       : UnaryDatasetOpKernel(ctx),
         op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) {
@@ -91,31 +97,73 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
                                                  &captured_func));
 
-    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
-                          drop_remainder, output_types_, output_shapes_, func_,
-                          std::move(captured_func), &ctx->eigen_cpu_device());
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapAndBatchIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors.get(),
+                                    std::move(done), prefix);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args,
+                     std::shared_ptr<std::vector<Tensor>> out_tensors,
+                     StatusCallback done) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
+    *output = new Dataset(ctx, input, func_, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_,
+                          std::move(captured_func), &ctx->eigen_cpu_device(),
+                          std::move(map_func));
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            const NameAttrList& func, int64 batch_size,
             int64 num_parallel_calls, bool drop_remainder,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
-            const Eigen::ThreadPoolDevice* device)
+            const Eigen::ThreadPoolDevice* device,
+            MapAndBatchIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          func_(func),
           batch_size_(batch_size),
           num_parallel_calls_(num_parallel_calls),
           drop_remainder_(drop_remainder),
           output_types_(output_types),
           output_shapes_(output_shapes),
-          map_fn_(func),
           captured_func_(std::move(captured_func)),
-          device_(device) {
+          device_(device),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -123,8 +171,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::MapAndBatch")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::MapAndBatch")},
+          map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -143,7 +192,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      TF_RETURN_IF_ERROR(b->AddFunction(ctx, map_fn_.name()));
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* batch_size_node;
@@ -165,7 +214,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         other_arguments_types.emplace_back(t.dtype());
       }
       AttrValue f;
-      b->BuildAttrValue(map_fn_, &f);
+      b->BuildAttrValue(func_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
@@ -185,12 +234,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
+      explicit Iterator(const Params& params,
+                        MapAndBatchIteratorFunction map_func)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
             num_parallel_calls_(std::make_shared<model::SharedState>(
-                params.dataset->num_parallel_calls_, mu_, cond_var_)) {}
+                params.dataset->num_parallel_calls_, mu_, cond_var_)),
+            map_func_(std::move(map_func)) {}
 
       ~Iterator() override {
         mutex_lock l(*mu_);
@@ -297,44 +348,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         int64 num_calls;  // access guarded by owner's mutex
       };
 
-      void Callback(const std::shared_ptr<IteratorContext>& ctx,
-                    const std::shared_ptr<BatchResult>& result,
-                    const std::shared_ptr<std::vector<Tensor>>& return_values,
-                    int64 offset, const Status& status) LOCKS_EXCLUDED(*mu_) {
-        result->UpdateStatus(status);
-        if (status.ok()) {
-          EnsureOutputAllocated(ctx, result, return_values);
-          for (size_t i = 0; i < return_values->size(); ++i) {
-            const Tensor& tensor = return_values->at(i);
-            Tensor* batch = &(result->output)[i];
-            if (tensor.NumElements() !=
-                (batch->NumElements() / batch->dim_size(0))) {
-              TensorShape batch_shape = batch->shape();
-              batch_shape.RemoveDim(0);
-              result->UpdateStatus(errors::InvalidArgument(
-                  "Cannot add tensor to the batch: number of elements does not "
-                  "match. Shapes are: [tensor]: ",
-                  tensor.shape().DebugString(),
-                  ", [batch]: ", batch_shape.DebugString()));
-              break;
-            }
-            // TODO(mrry): Add a version of DoParallelConcat that allows us to
-            // move `tensor` where possible, to speed up string tensor batching.
-            Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                *dataset()->device_, tensor, offset, batch);
-            if (!copy_status.ok()) {
-              result->UpdateStatus(copy_status);
-              break;
-            }
-          }
-          {
-            mutex_lock l(result->mu);
-            result->num_elements++;
-          }
-        }
-        CallCompleted(result);
-      }
-
       void CallCompleted(const std::shared_ptr<BatchResult>& result)
           LOCKS_EXCLUDED(*mu_) {
         mutex_lock l(*mu_);
@@ -363,21 +376,48 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           return;
         }
 
-        // Call `captured_func_(input_element)`, using `Callback` to store the
-        // result in `result`.
-        (*ctx->runner())(std::bind(
-            [this, result, offset](std::shared_ptr<IteratorContext> ctx,
-                                   std::vector<Tensor> input_element) {
-              std::shared_ptr<std::vector<Tensor>> return_values(
-                  new std::vector<Tensor>());
-              dataset()->captured_func_->RunAsync(
-                  ctx.get(), std::move(input_element), return_values.get(),
-                  [this, ctx, result, return_values, offset](Status status) {
-                    Callback(ctx, result, return_values, offset, status);
-                  },
-                  prefix());
-            },
-            ctx, std::move(input_element)));
+        std::shared_ptr<std::vector<Tensor>> return_values =
+            std::make_shared<std::vector<Tensor>>();
+        auto done = [this, ctx, result, return_values, offset](Status status) {
+          result->UpdateStatus(status);
+          if (status.ok()) {
+            EnsureOutputAllocated(ctx, result, return_values);
+            for (size_t i = 0; i < return_values->size(); ++i) {
+              const Tensor& tensor = return_values->at(i);
+              Tensor* batch = &(result->output)[i];
+              if (tensor.NumElements() !=
+                  (batch->NumElements() / batch->dim_size(0))) {
+                TensorShape batch_shape = batch->shape();
+                batch_shape.RemoveDim(0);
+                result->UpdateStatus(errors::InvalidArgument(
+                    "Cannot add tensor to the batch: number of elements does "
+                    "not match. Shapes are: [tensor]: ",
+                    tensor.shape().DebugString(),
+                    ", [batch]: ", batch_shape.DebugString()));
+                break;
+              }
+              // TODO(mrry): Add a version of DoParallelConcat that allows us to
+              // move `tensor` where possible, to speed up string tensor
+              // batching.
+              Status copy_status = ::tensorflow::functor::DoParallelConcat(
+                  *dataset()->device_, tensor, offset, batch);
+              if (!copy_status.ok()) {
+                result->UpdateStatus(copy_status);
+                break;
+              }
+            }
+            {
+              mutex_lock l(result->mu);
+              result->num_elements++;
+            }
+          }
+          CallCompleted(result);
+        };
+
+        // Apply the map function on `input_element`, storing the result in
+        // `return_values`, and invoking `done` when finished.
+        map_func_(ctx.get(), prefix(), std::move(input_element),
+                  std::move(return_values), std::move(done));
       }
 
       Status CopyPartialBatch(Tensor* output, const Tensor& value,
@@ -404,7 +444,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
-          std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+          auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
           runner_thread_.reset(ctx->env()->StartThread(
               {}, "runner_thread",
               std::bind(&Iterator::RunnerThread, this, ctx_copy)));
@@ -509,8 +549,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
             while (!busy()) {
               if (call_counter_ % dataset()->batch_size_ == 0) {
-                batch_results_.emplace_back(
-                    new BatchResult(dataset()->batch_size_));
+                batch_results_.push_back(
+                    std::make_shared<BatchResult>(dataset()->batch_size_));
               }
               int64 offset = call_counter_++ % dataset()->batch_size_;
               new_calls.emplace_back(batch_results_.back(), offset);
@@ -527,7 +567,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                              size_t index) EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
-        batch_results_.emplace_back(new BatchResult(dataset()->batch_size_));
+        batch_results_.push_back(
+            std::make_shared<BatchResult>(dataset()->batch_size_));
         std::shared_ptr<BatchResult> result = batch_results_.back();
         string prefix = strings::StrCat("batch_results_", index);
         mutex_lock l(result->mu);
@@ -653,6 +694,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       const std::shared_ptr<condition_variable> cond_var_;
       // Identifies the maximum number of parallel calls.
       const std::shared_ptr<model::SharedState> num_parallel_calls_;
+      const MapAndBatchIteratorFunction map_func_;
+
       // Counts the number of outstanding calls for this batch.
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
       // Counts the total number of calls.
@@ -671,9 +714,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     const bool drop_remainder_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
-    const NameAttrList map_fn_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const Eigen::ThreadPoolDevice* device_;  // not owned
+    const MapAndBatchIteratorFunction map_func_;
   };
 
   const int op_version_;
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index f112e1dc43..6b6ffabf4f 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -17,7 +17,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -28,6 +30,9 @@ namespace {
 
 class MapDatasetOp : public UnaryDatasetOpKernel {
  public:
+  using MapIteratorFunction = std::function<Status(
+      IteratorContext*, std::vector<Tensor>, std::vector<Tensor>*)>;
+
   explicit MapDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
@@ -43,8 +48,42 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    MapIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](IteratorContext* ctx,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors) {
+        return raw_captured_func->Run(ctx, std::move(args), out_tensors);
+      };
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, std::vector<Tensor> args,
+                     std::vector<Tensor>* out_tensors) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        return Status::OK();
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_);
+                          output_types_, output_shapes_, std::move(map_func));
   }
 
  private:
@@ -54,13 +93,15 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
+            const std::vector<PartialTensorShape>& output_shapes,
+            MapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
-          output_shapes_(output_shapes) {
+          output_shapes_(output_shapes),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -68,8 +109,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Map")}));
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Map")}, map_func_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -116,8 +157,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
-          : DatasetIterator<Dataset>(params) {}
+      explicit Iterator(const Params& params, MapIteratorFunction map_func)
+          : DatasetIterator<Dataset>(params), map_func_(std::move(map_func)) {}
 
       Status Initialize(IteratorContext* ctx) override {
         TF_RETURN_IF_ERROR(
@@ -139,10 +180,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
           return Status::OK();
         }
 
-        // TODO(mrry): Avoid blocking a threadpool thread. We will need to
-        // stack-rip the iterators and use async kernels.
-        Status s =
-            dataset()->captured_func_->Run(ctx, std::move(args), out_tensors);
+        Status s = map_func_(ctx, args, out_tensors);
         if (errors::IsOutOfRange(s)) {
           // `f` may deliberately raise `errors::OutOfRange` to indicate
           // that we should terminate the iteration early.
@@ -167,6 +205,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
      private:
       std::unique_ptr<IteratorBase> input_impl_;
+      const MapIteratorFunction map_func_;
     };
 
     const DatasetBase* const input_;
@@ -174,6 +213,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const MapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 6abe6c8338..3a14924fba 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/parallel_map_iterator.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -56,9 +57,55 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
                                                  use_inter_op_parallelism_,
                                                  &captured_func));
 
+    std::vector<int> indices;
+    OP_REQUIRES_OK(ctx, ComputeShortCircuitIndices(ctx, func_, &indices));
+
+    ParallelMapIteratorFunction map_func;
+    CapturedFunction* raw_captured_func = captured_func.get();
+    if (indices.empty()) {
+      map_func = [raw_captured_func](IteratorContext* ctx, const string& prefix,
+                                     std::vector<Tensor> args,
+                                     std::vector<Tensor>* out_tensors,
+                                     StatusCallback done) {
+        raw_captured_func->RunAsync(ctx, std::move(args), out_tensors,
+                                    std::move(done), prefix);
+      };
+      if (!use_inter_op_parallelism_) {
+        map_func = [map_func](IteratorContext* ctx, const string& prefix,
+                              std::vector<Tensor> args,
+                              std::vector<Tensor>* out_tensors,
+                              StatusCallback done) {
+          (*ctx->runner())(std::bind(map_func, ctx, prefix, std::move(args),
+                                     out_tensors, std::move(done)));
+        };
+      }
+    } else {
+      std::vector<bool> can_move = ComputeMoveVector(indices);
+      map_func = [raw_captured_func, indices, can_move](
+                     IteratorContext* ctx, const string& prefix,
+                     std::vector<Tensor> args, std::vector<Tensor>* out_tensors,
+                     StatusCallback done) {
+        const std::vector<Tensor>& captured_inputs =
+            raw_captured_func->captured_inputs();
+        size_t num_args = args.size();
+        for (size_t i = 0; i < indices.size(); ++i) {
+          if (indices[i] < num_args) {
+            if (can_move[i]) {
+              out_tensors->push_back(std::move(args[indices[i]]));
+            } else {
+              out_tensors->push_back(args[indices[i]]);
+            }
+          } else {
+            out_tensors->push_back(captured_inputs[indices[i] - num_args]);
+          }
+        }
+        done(Status::OK());
+      };
+    }
+
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
                           output_shapes_, use_inter_op_parallelism_,
-                          std::move(captured_func));
+                          std::move(captured_func), std::move(map_func));
   }
 
  private:
@@ -69,7 +116,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
             bool use_inter_op_parallelism,
-            std::unique_ptr<CapturedFunction> captured_func)
+            std::unique_ptr<CapturedFunction> captured_func,
+            ParallelMapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
@@ -77,7 +125,8 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
-          captured_func_(std::move(captured_func)) {
+          captured_func_(std::move(captured_func)),
+          map_func_(std::move(map_func)) {
       input_->Ref();
     }
 
@@ -89,26 +138,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
         return captured_func_->Instantiate(ctx);
       };
 
-      const string& new_prefix = strings::StrCat(prefix, "::ParallelMap");
-      ParallelMapIteratorFunction map_func =
-          [this, new_prefix](IteratorContext* ctx,
-                             std::vector<Tensor> input_element,
-                             std::vector<Tensor>* result, StatusCallback done) {
-            captured_func_->RunAsync(ctx, std::move(input_element), result,
-                                     std::move(done), new_prefix);
-          };
-      if (!use_inter_op_parallelism_) {
-        map_func = [map_func](
-                       IteratorContext* ctx, std::vector<Tensor> input_element,
-                       std::vector<Tensor>* result, StatusCallback done) {
-          (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element),
-                                     result, std::move(done)));
-        };
-      }
-
-      return NewParallelMapIterator({this, new_prefix}, input_,
-                                    std::move(init_func), std::move(map_func),
-                                    num_parallel_calls_);
+      return NewParallelMapIterator(
+          {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
+          std::move(init_func), map_func_, num_parallel_calls_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -176,6 +208,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
+    const ParallelMapIteratorFunction map_func_;
   };
 
   DataTypeVector output_types_;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index 13bd4b6036..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -179,7 +180,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
-      std::shared_ptr<IteratorContext> ctx_copy(new IteratorContext(*ctx));
+      auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
           std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
@@ -208,15 +209,15 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return;
     }
 
-    // Call `func_(input_element)`, store the result in `result->return_values`,
-    // and notify `result->notification` to unblock a consumer.
     auto done = [this, result](Status status) {
       result->status.Update(status);
       CallCompleted(result);
     };
 
-    map_func_(ctx.get(), std::move(input_element), &result->return_values,
-              std::move(done));
+    // Apply the map function on `input_element`, storing the result in
+    // `result->return_values`, and invoking `done` when finished.
+    map_func_(ctx.get(), prefix(), std::move(input_element),
+              &result->return_values, std::move(done));
   }
 
   Status ProcessResult(const std::shared_ptr<InvocationResult>& result,
@@ -349,9 +350,9 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
     ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return std::unique_ptr<IteratorBase>(
-      new ParallelMapIterator(params, input_dataset, std::move(init_func),
-                              std::move(map_func), num_parallel_calls));
+  return MakeUnique<ParallelMapIterator>(
+      params, input_dataset, std::move(init_func), std::move(map_func),
+      num_parallel_calls);
 }
 
 }  // namespace data
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index dc26c5cf25..813f13c9e4 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -30,7 +30,7 @@ namespace data {
 // 3. A `std::vector<Tensor>*` to which the function will write the result.
 // 4. A `StatusCallback` that should be invoked when the function is complete.
 using ParallelMapIteratorFunction =
-    std::function<void(IteratorContext*, std::vector<Tensor>,
+    std::function<void(IteratorContext*, const string&, std::vector<Tensor>,
                        std::vector<Tensor>*, StatusCallback)>;
 
 // Returns a new iterator that applies `map_func` to the elements of
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 1d1a717062..7de5ea8860 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -182,7 +182,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      auto map_fn = [this](IteratorContext* ctx,
+      auto map_fn = [this](IteratorContext* ctx, const string& prefix,
                            std::vector<Tensor> input_element,
                            std::vector<Tensor>* result, StatusCallback done) {
         (*ctx->runner())([this, ctx, input_element, result, done]() {
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index afd0fc3abf..d444c4082e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -332,6 +332,37 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       for _ in range(10):
         self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Identity", None, lambda x: x, None),
+      ("Replicate", None, lambda x: (x, x), None),
+      ("Swap", (None, None), lambda x, y: (y, x), None),
+      ("Project", (None, None), lambda x, y: x, None),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(
+            *sess.run(self.structuredElement(structure, shape=[10])))
+      else:
+        expected = map_fn(
+            sess.run(self.structuredElement(structure, shape=[10])))
+      self.assertAllEqual(expected, sess.run(get_next))
+
+  def testShortCircuitCapturedInput(self):
+    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = self.structuredDataset(None).repeat().apply(
+        batching.map_and_batch(lambda x: captured_t, batch_size=10))
+    iterator = dataset.make_initializable_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={captured_t: 42})
+      self.assertAllEqual([42] * 10, sess.run(get_next))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
index 6b7afafa5d..a0c6b37a6d 100644
--- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py
@@ -156,7 +156,7 @@ class FilterDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testReturnComponent(self):
+  def testShortCircuit(self):
     iterator = (
         dataset_ops.Dataset.zip(
             (dataset_ops.Dataset.range(10),
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 0c372ebb10..4683b1db91 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -622,7 +622,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _sparse(i))
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -649,7 +649,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op)
       for i in range(10):
         actual = sess.run(get_next)
-        self.assertTrue(isinstance(actual, sparse_tensor.SparseTensorValue))
+        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
         self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
@@ -783,19 +783,72 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
+  @parameterized.named_parameters(
+      ("SequentialIdentity", None, lambda x: x, None),
+      ("SequentialReplicate", None, lambda x: (x, x), None),
+      ("SequentialSwap", (None, None), lambda x, y: (y, x), None),
+      ("SequentialProject", (None, None), lambda x, y: x, None),
+      ("ParallelIdentity", None, lambda x: x, 10),
+      ("ParallelReplicate", None, lambda x: (x, x), 10),
+      ("ParallelSwap", (None, None), lambda x, y: (y, x), 10),
+      ("ParallelProject", (None, None), lambda x, y: x, 10),
+  )
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+    dataset = self.structuredDataset(structure).repeat().map(
+        map_fn, num_parallel_calls=num_parallel_calls)
+    get_next = dataset.make_one_shot_iterator().get_next()
+
+    with self.cached_session() as sess:
+      if isinstance(structure, tuple):
+        expected = map_fn(*sess.run(self.structuredElement(structure)))
+      else:
+        expected = map_fn(sess.run(self.structuredElement(structure)))
+      self.assertEqual(expected, sess.run(get_next))
+
+  @parameterized.named_parameters(
+      ("Sequential", None),
+      ("Parallel", 10),
+  )
+  def testShortCircuitCapturedInput(self, num_parallel_calls):
+    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    dataset = self.structuredDataset(None).repeat().map(
+        lambda x: captured_t, num_parallel_calls=num_parallel_calls)
+    iterator = dataset.make_initializable_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer, feed_dict={captured_t: 42})
+      self.assertEqual(42, sess.run(get_next))
+
 
 class MapDatasetBenchmark(test.Benchmark):
 
   def benchmarkChainOfMaps(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda x: x + 1
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda x: x
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
           for _ in range(chain_length):
             dataset = dataset_ops.MapDataset(
                 dataset,
-                lambda x: x,
+                map_fn,
                 use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -813,25 +866,39 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset chain length%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", chain_length, median_wall_time))
+                  (print_label, chain_length, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
                 name="benchmark_map_dataset_chain_latency_%d%s" %
-                (chain_length, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                (chain_length, benchmark_label))
 
   def benchmarkMapFanOut(self):
     fan_outs = [1, 2, 5, 10, 20, 50, 100]
     for fan_out in fan_outs:
-      for use_inter_op_parallelism in [False, True]:
+      for mode in ["general", "single-threaded", "short-circuit"]:
+        if mode == "general":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = True
+          print_label = ""
+          benchmark_label = ""
+        if mode == "single-threaded":
+          map_fn = lambda *xs: [x + 1 for x in xs]
+          use_inter_op_parallelism = False
+          print_label = " (single threaded mode)"
+          benchmark_label = "_single_threaded"
+        if mode == "short-circuit":
+          map_fn = lambda *xs: xs
+          use_inter_op_parallelism = True  # should not have any significance
+          print_label = " (short circuit mode)"
+          benchmark_label = "_short_circuit"
+
         with ops.Graph().as_default():
           dataset = dataset_ops.Dataset.from_tensors(
               tuple(0 for _ in range(fan_out))).repeat(None)
           dataset = dataset_ops.MapDataset(
               dataset,
-              lambda *xs: xs,
+              map_fn,
               use_inter_op_parallelism=use_inter_op_parallelism)
           iterator = dataset.make_one_shot_iterator()
           next_element = iterator.get_next()
@@ -849,14 +916,12 @@ class MapDatasetBenchmark(test.Benchmark):
 
             median_wall_time = np.median(deltas) / 100
             print("Map dataset fan out%s: %d Median wall time: %f" %
-                  (" (single threaded mode)" if not use_inter_op_parallelism
-                   else "", fan_out, median_wall_time))
+                  (print_label, fan_out, median_wall_time))
             self.report_benchmark(
                 iters=1000,
                 wall_time=median_wall_time,
-                name="benchmark_map_dataset_fan_out_%d%s" %
-                (fan_out, "_single_threaded"
-                 if not use_inter_op_parallelism else ""))
+                name="benchmark_map_dataset_fan_out_%d%s" % (fan_out,
+                                                             benchmark_label))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index b730e10949..b73a94e683 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -19,10 +19,13 @@ from __future__ import print_function
 
 import re
 
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -107,3 +110,29 @@ class DatasetTestBase(test.TestCase):
       with self.assertRaisesRegexp(exception_class,
                                    re.escape(expected_message)):
         self.evaluate(next2())
+
+  def structuredDataset(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns a singleton dataset with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return dataset_ops.Dataset.from_tensors(
+          array_ops.zeros(shape, dtype=dtype))
+    else:
+      return dataset_ops.Dataset.zip(
+          tuple([
+              self.structuredDataset(substructure, shape, dtype)
+              for substructure in structure
+          ]))
+
+  def structuredElement(self, structure, shape=None, dtype=dtypes.int64):
+    """Returns an element with the given structure."""
+    if shape is None:
+      shape = []
+    if structure is None:
+      return array_ops.zeros(shape, dtype=dtype)
+    else:
+      return tuple([
+          self.structuredElement(substructure, shape, dtype)
+          for substructure in structure
+      ])
-- 
GitLab


From a04cd08ee7a8c5245d76a59849e1f7e8ba8a3f52 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 8 Oct 2018 10:20:52 -0700
Subject: [PATCH 0505/1085] Allow TensorSpec objects as arguments to defun's
 get_concrete_function

Will be helpful for specifying serving signatures when exporting SavedModels

PiperOrigin-RevId: 216207284
---
 tensorflow/python/eager/function.py      | 24 +++++----------
 tensorflow/python/eager/function_test.py | 37 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index bafe07de2b..93168826b1 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -855,20 +855,12 @@ class Function(object):
     return ret
 
 
-def _get_defun_inputs_from_signature(signature):
-  """Maps a signature to graph-construction inputs."""
-  function_inputs = [
-      graph_placeholder(spec.dtype, spec.shape)
-      for spec in nest.flatten(signature)
-  ]
-  return nest.pack_sequence_as(signature, function_inputs)
-
-
 def _get_defun_inputs_from_args(args):
   """Maps python function args to graph-construction inputs."""
   function_inputs = [
       graph_placeholder(arg.dtype, arg.shape)
-      if isinstance(arg, ops.Tensor) else arg for arg in nest.flatten(args)
+      if isinstance(arg, (ops.Tensor, tensor_spec.TensorSpec))
+      else arg for arg in nest.flatten(args)
   ]
   return nest.pack_sequence_as(args, function_inputs)
 
@@ -912,12 +904,12 @@ def func_graph_from_py_func(name,
   with func_graph.as_default(), AutomaticControlDependencies() as a:
     variable_scope.get_variable_scope().set_use_resource(True)
 
-    if signature is None:
-      func_args = _get_defun_inputs_from_args(args)
-      func_kwargs = _get_defun_inputs_from_args(kwargs)
-    else:
-      func_args = _get_defun_inputs_from_signature(signature)
-      func_kwargs = {}
+    if signature is not None:
+      args = signature
+      kwargs = {}
+
+    func_args = _get_defun_inputs_from_args(args)
+    func_kwargs = _get_defun_inputs_from_args(kwargs)
 
     # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
     # Variables to help check whether mutation happens in calling the function
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index a2cfb4b476..57e545be69 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -172,6 +172,43 @@ class FunctionTest(test.TestCase):
     out = sq_op(t)
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
+  def testInputSpecGraphFunction(self):
+    matmul = function.defun(math_ops.matmul)
+
+    @function.defun
+    def sq(a):
+      return matmul(a, a)
+
+    sq_op = sq.get_concrete_function(
+        tensor_spec.TensorSpec((None, None), dtypes.float32))
+    self.assertEqual([None, None], sq_op.output_shapes.as_list())
+
+    t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    out1 = sq_op(t1)
+    self.assertAllEqual(out1, math_ops.matmul(t1, t1).numpy())
+
+    t2 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    out2 = sq_op(t2)
+    self.assertAllEqual(out2, math_ops.matmul(t2, t2).numpy())
+
+  def testNestedInputSpecGraphFunction(self):
+    matmul = function.defun(math_ops.matmul)
+
+    @function.defun
+    def sq(mats):
+      ((a, b),) = mats
+      return matmul(a, b)
+
+    sq_op = sq.get_concrete_function(
+        [(tensor_spec.TensorSpec((None, None), dtypes.float32),
+          tensor_spec.TensorSpec((None, None), dtypes.float32))])
+    self.assertEqual([None, None], sq_op.output_shapes.as_list())
+
+    t1 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+    t2 = constant_op.constant([[1.4, 2.4], [3.4, 4.4]])
+    out = sq_op(t1, t2)  # Flattened structure for inputs to the graph function
+    self.assertAllEqual(out, math_ops.matmul(t1, t2).numpy())
+
   def testExecutingStatelessDefunConcurrently(self):
 
     @function.defun
-- 
GitLab


From 049d98c84ca7474459175914ca49c1fa3c11581d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:28:59 -0700
Subject: [PATCH 0506/1085] Wait for shared resources to initialize before
 initializing local resources. shared resources are very similar to global
 variables functionally and they are initialized at the same time but since
 workers are only waiting for global variables being initialized, there is a
 race condition that sometimes the shared resource is not ready.

PiperOrigin-RevId: 216208679
---
 tensorflow/python/training/monitored_session.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 82f0e3be52..a479f38165 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -195,8 +195,12 @@ class Scaffold(object):
           default_ready_op)
     if self._ready_for_local_init_op is None:
       def default_ready_for_local_init_op():
-        return variables.report_uninitialized_variables(
-            variables.global_variables())
+        return array_ops.concat([
+            variables.report_uninitialized_variables(
+                variables.global_variables()),
+            resources.report_uninitialized_resources(
+                resources.shared_resources())
+        ], 0)
       self._ready_for_local_init_op = Scaffold.get_or_default(
           'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP,
           default_ready_for_local_init_op)
-- 
GitLab


From 153decedefc8da1fbd0717f4223b4b053e7aa517 Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Mon, 8 Oct 2018 10:36:38 -0700
Subject: [PATCH 0507/1085] Add support for SequenceExamples to
 sequence_feature_columns

PiperOrigin-RevId: 216210141
---
 .../contrib/estimator/python/estimator/rnn.py |  54 +-
 tensorflow/contrib/feature_column/BUILD       |  21 +
 .../feature_column/sequence_feature_column.py |  72 +-
 ...equence_feature_column_integration_test.py | 280 ++++++
 .../sequence_feature_column_test.py           | 912 ++++++++++++------
 .../python/feature_column/feature_column.py   |  53 +-
 tensorflow/python/ops/parsing_ops.py          |  13 +-
 7 files changed, 1018 insertions(+), 387 deletions(-)
 create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py

diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py
index 98660bb731..c595f47395 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn.py
@@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
@@ -92,55 +91,6 @@ def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'):
   return rnn_cell_fn
 
 
-def _concatenate_context_input(sequence_input, context_input):
-  """Replicates `context_input` across all timesteps of `sequence_input`.
-
-  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
-  This value is appended to `sequence_input` on dimension 2 and the result is
-  returned.
-
-  Args:
-    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
-      padded_length, d0]`.
-    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
-
-  Returns:
-    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
-    d0 + d1]`.
-
-  Raises:
-    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
-      not have rank 2.
-  """
-  seq_rank_check = check_ops.assert_rank(
-      sequence_input,
-      3,
-      message='sequence_input must have rank 3',
-      data=[array_ops.shape(sequence_input)])
-  seq_type_check = check_ops.assert_type(
-      sequence_input,
-      dtypes.float32,
-      message='sequence_input must have dtype float32; got {}.'.format(
-          sequence_input.dtype))
-  ctx_rank_check = check_ops.assert_rank(
-      context_input,
-      2,
-      message='context_input must have rank 2',
-      data=[array_ops.shape(context_input)])
-  ctx_type_check = check_ops.assert_type(
-      context_input,
-      dtypes.float32,
-      message='context_input must have dtype float32; got {}.'.format(
-          context_input.dtype))
-  with ops.control_dependencies(
-      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
-    padded_length = array_ops.shape(sequence_input)[1]
-    tiled_context_input = array_ops.tile(
-        array_ops.expand_dims(context_input, 1),
-        array_ops.concat([[1], [padded_length], [1]], 0))
-  return array_ops.concat([sequence_input, tiled_context_input], 2)
-
-
 def _select_last_activations(activations, sequence_lengths):
   """Selects the nth set of activations for each n in `sequence_length`.
 
@@ -222,8 +172,8 @@ def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns,
         context_input = feature_column_lib.input_layer(
             features=features,
             feature_columns=context_feature_columns)
-        sequence_input = _concatenate_context_input(sequence_input,
-                                                    context_input)
+        sequence_input = seq_fc.concatenate_context_input(
+            context_input, sequence_input)
 
     cell = rnn_cell_fn(mode)
     # Ignore output state.
diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD
index aab7d0c9e8..a926ffd598 100644
--- a/tensorflow/contrib/feature_column/BUILD
+++ b/tensorflow/contrib/feature_column/BUILD
@@ -27,6 +27,7 @@ py_library(
         "//tensorflow/python:check_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:sparse_ops",
         "//tensorflow/python:tensor_shape",
@@ -46,9 +47,29 @@ py_test(
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
         "//tensorflow/python/feature_column",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "sequence_feature_column_integration_test",
+    srcs = ["python/feature_column/sequence_feature_column_integration_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":sequence_feature_column",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:util",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/keras:layers",
     ],
 )
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
index 05bcdac2ca..dd6da35ed0 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py
@@ -33,7 +33,6 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variable_scope
 
 # pylint: disable=protected-access
-# TODO(b/73827486): Support SequenceExample.
 
 
 def sequence_input_layer(
@@ -110,6 +109,7 @@ def sequence_input_layer(
     output_tensors = []
     sequence_lengths = []
     ordered_columns = []
+
     for column in sorted(feature_columns, key=lambda x: x.name):
       ordered_columns.append(column)
       with variable_scope.variable_scope(
@@ -121,17 +121,67 @@ def sequence_input_layer(
         # Flattens the final dimension to produce a 3D Tensor.
         num_elements = column._variable_shape.num_elements()
         shape = array_ops.shape(dense_tensor)
+        target_shape = [shape[0], shape[1], num_elements]
         output_tensors.append(
-            array_ops.reshape(
-                dense_tensor,
-                shape=array_ops.concat([shape[:2], [num_elements]], axis=0)))
+            array_ops.reshape(dense_tensor, shape=target_shape))
         sequence_lengths.append(sequence_length)
+
     fc._verify_static_batch_size_equality(output_tensors, ordered_columns)
     fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns)
     sequence_length = _assert_all_equal_and_return(sequence_lengths)
+
     return array_ops.concat(output_tensors, -1), sequence_length
 
 
+def concatenate_context_input(context_input, sequence_input):
+  """Replicates `context_input` across all timesteps of `sequence_input`.
+
+  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
+  This value is appended to `sequence_input` on dimension 2 and the result is
+  returned.
+
+  Args:
+    context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
+    sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,
+      padded_length, d0]`.
+
+  Returns:
+    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
+    d0 + d1]`.
+
+  Raises:
+    ValueError: If `sequence_input` does not have rank 3 or `context_input` does
+      not have rank 2.
+  """
+  seq_rank_check = check_ops.assert_rank(
+      sequence_input,
+      3,
+      message='sequence_input must have rank 3',
+      data=[array_ops.shape(sequence_input)])
+  seq_type_check = check_ops.assert_type(
+      sequence_input,
+      dtypes.float32,
+      message='sequence_input must have dtype float32; got {}.'.format(
+          sequence_input.dtype))
+  ctx_rank_check = check_ops.assert_rank(
+      context_input,
+      2,
+      message='context_input must have rank 2',
+      data=[array_ops.shape(context_input)])
+  ctx_type_check = check_ops.assert_type(
+      context_input,
+      dtypes.float32,
+      message='context_input must have dtype float32; got {}.'.format(
+          context_input.dtype))
+  with ops.control_dependencies(
+      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
+    padded_length = array_ops.shape(sequence_input)[1]
+    tiled_context_input = array_ops.tile(
+        array_ops.expand_dims(context_input, 1),
+        array_ops.concat([[1], [padded_length], [1]], 0))
+  return array_ops.concat([sequence_input, tiled_context_input], 2)
+
+
 def sequence_categorical_column_with_identity(
     key, num_buckets, default_value=None):
   """Returns a feature column that represents sequences of integers.
@@ -453,9 +503,17 @@ class _SequenceNumericColumn(
         [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape],
         axis=0)
     dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)
-    sequence_length = fc._sequence_length_from_sparse_tensor(
-        sp_tensor, num_elements=self._variable_shape.num_elements())
+
+    # Get the number of timesteps per example
+    # For the 2D case, the raw values are grouped according to num_elements;
+    # for the 3D case, the grouping happens in the third dimension, and
+    # sequence length is not affected.
+    num_elements = (self._variable_shape.num_elements()
+                    if sp_tensor.shape.ndims == 2 else 1)
+    seq_length = fc._sequence_length_from_sparse_tensor(
+        sp_tensor, num_elements=num_elements)
+
     return fc._SequenceDenseColumn.TensorSequenceLengthPair(
-        dense_tensor=dense_tensor, sequence_length=sequence_length)
+        dense_tensor=dense_tensor, sequence_length=seq_length)
 
 # pylint: enable=protected-access
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py
new file mode 100644
index 0000000000..d8ca363627
--- /dev/null
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py
@@ -0,0 +1,280 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for sequence feature columns with SequenceExamples."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import string
+import tempfile
+
+from google.protobuf import text_format
+
+from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.keras.layers import recurrent
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class SequenceFeatureColumnIntegrationTest(test.TestCase):
+
+  def _make_sequence_example(self):
+    example = example_pb2.SequenceExample()
+    example.context.feature['int_ctx'].int64_list.value.extend([5])
+    example.context.feature['float_ctx'].float_list.value.extend([123.6])
+    for val in range(0, 10, 2):
+      feat = feature_pb2.Feature()
+      feat.int64_list.value.extend([val] * val)
+      example.feature_lists.feature_list['int_list'].feature.extend([feat])
+    for val in range(1, 11, 2):
+      feat = feature_pb2.Feature()
+      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
+      example.feature_lists.feature_list['str_list'].feature.extend([feat])
+
+    return example
+
+  def _build_feature_columns(self):
+    col = fc.categorical_column_with_identity(
+        'int_ctx', num_buckets=100)
+    ctx_cols = [
+        fc.embedding_column(col, dimension=10),
+        fc.numeric_column('float_ctx')]
+
+    identity_col = sfc.sequence_categorical_column_with_identity(
+        'int_list', num_buckets=10)
+    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
+        'bytes_list', hash_bucket_size=100)
+    seq_cols = [
+        fc.embedding_column(identity_col, dimension=10),
+        fc.embedding_column(bucket_col, dimension=20)]
+
+    return ctx_cols, seq_cols
+
+  def test_sequence_example_into_input_layer(self):
+    examples = [_make_sequence_example().SerializeToString()] * 100
+    ctx_cols, seq_cols = self._build_feature_columns()
+
+    def _parse_example(example):
+      ctx, seq = parsing_ops.parse_single_sequence_example(
+          example,
+          context_features=fc.make_parse_example_spec(ctx_cols),
+          sequence_features=fc.make_parse_example_spec(seq_cols))
+      ctx.update(seq)
+      return ctx
+
+    ds = dataset_ops.Dataset.from_tensor_slices(examples)
+    ds = ds.map(_parse_example)
+    ds = ds.batch(20)
+
+    # Test on a single batch
+    features = ds.make_one_shot_iterator().get_next()
+
+    # Tile the context features across the sequence features
+    seq_layer, _ = sfc.sequence_input_layer(features, seq_cols)
+    ctx_layer = fc.input_layer(features, ctx_cols)
+    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
+
+    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
+    output = rnn_layer(input_layer)
+
+    with self.cached_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      features_r = sess.run(features)
+      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
+
+      output_r = sess.run(output)
+      self.assertAllEqual(output_r.shape, [20, 10])
+
+
+class SequenceExampleParsingTest(test.TestCase):
+
+  def test_seq_ex_in_sequence_categorical_column_with_identity(self):
+    self._test_parsed_sequence_example(
+        'int_list', sfc.sequence_categorical_column_with_identity,
+        10, [3, 6], [2, 4, 6])
+
+  def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_hash_bucket,
+        10, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self):
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list,
+        list(string.ascii_lowercase), [3, 4],
+        [compat.as_bytes(x) for x in 'acg'])
+
+  def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self):
+    _, fname = tempfile.mkstemp()
+    with open(fname, 'w') as f:
+      f.write(string.ascii_lowercase)
+    self._test_parsed_sequence_example(
+        'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file,
+        fname, [3, 4], [compat.as_bytes(x) for x in 'acg'])
+
+  def _test_parsed_sequence_example(
+      self, col_name, col_fn, col_arg, shape, values):
+    """Helper function to check that each FeatureColumn parses correctly.
+
+    Args:
+      col_name: string, name to give to the feature column. Should match
+        the name that the column will parse out of the features dict.
+      col_fn: function used to create the feature column. For example,
+        sequence_numeric_column.
+      col_arg: second arg that the target feature column is expecting.
+      shape: the expected dense_shape of the feature after parsing into
+        a SparseTensor.
+      values: the expected values at index [0, 2, 6] of the feature
+        after parsing into a SparseTensor.
+    """
+    example = _make_sequence_example()
+    columns = [
+        fc.categorical_column_with_identity('int_ctx', num_buckets=100),
+        fc.numeric_column('float_ctx'),
+        col_fn(col_name, col_arg)
+    ]
+    context, seq_features = parsing_ops.parse_single_sequence_example(
+        example.SerializeToString(),
+        context_features=fc.make_parse_example_spec(columns[:2]),
+        sequence_features=fc.make_parse_example_spec(columns[2:]))
+
+    with self.cached_session() as sess:
+      ctx_result, seq_result = sess.run([context, seq_features])
+      self.assertEqual(list(seq_result[col_name].dense_shape), shape)
+      self.assertEqual(
+          list(seq_result[col_name].values[[0, 2, 6]]), values)
+      self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1])
+      self.assertEqual(ctx_result['int_ctx'].values[0], 5)
+      self.assertEqual(list(ctx_result['float_ctx'].shape), [1])
+      self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
+
+
+_SEQ_EX_PROTO = """
+context {
+  feature {
+    key: "float_ctx"
+    value {
+      float_list {
+        value: 123.6
+      }
+    }
+  }
+  feature {
+    key: "int_ctx"
+    value {
+      int64_list {
+        value: 5
+      }
+    }
+  }
+}
+feature_lists {
+  feature_list {
+    key: "bytes_list"
+    value {
+      feature {
+        bytes_list {
+          value: "a"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "b"
+          value: "c"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "d"
+          value: "e"
+          value: "f"
+          value: "g"
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "float_list"
+    value {
+      feature {
+        float_list {
+          value: 1.0
+        }
+      }
+      feature {
+        float_list {
+          value: 3.0
+          value: 3.0
+          value: 3.0
+        }
+      }
+      feature {
+        float_list {
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "int_list"
+    value {
+      feature {
+        int64_list {
+          value: 2
+          value: 2
+        }
+      }
+      feature {
+        int64_list {
+          value: 4
+          value: 4
+          value: 4
+          value: 4
+        }
+      }
+      feature {
+        int64_list {
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+        }
+      }
+    }
+  }
+}
+"""
+
+
+def _make_sequence_example():
+  example = example_pb2.SequenceExample()
+  return text_format.Parse(_SEQ_EX_PROTO, example)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index 45d7b74046..929e83523a 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc
@@ -28,28 +29,61 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import monitored_session
 
 
-class SequenceInputLayerTest(test.TestCase):
+class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # example 0, ids [1]
+           # example 1, ids [2, 0]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 2, 0),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [2, 0]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           indices=(
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 0, 0, 1),
+           dense_shape=(2, 2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[2], [0]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(1, 1, 1, 2, 0),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
+           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_embedding_column(
+      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      expected_sequence_length):
 
-  def test_embedding_column(self):
     vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [2, 0]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2))
-
     embedding_dimension_a = 2
     embedding_values_a = (
         (1., 2.),  # id 0
@@ -70,14 +104,6 @@ class SequenceInputLayerTest(test.TestCase):
         return embedding_values
       return _initializer
 
-    expected_input_layer = [
-        # example 0, ids_a [2], ids_b [1]
-        [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
-        # example 1, ids_a [0, 1], ids_b [2, 0]
-        [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],
-    ]
-    expected_sequence_length = [1, 2]
-
     categorical_column_a = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column_a = fc.embedding_column(
@@ -233,29 +259,53 @@ class SequenceInputLayerTest(test.TestCase):
           },
           feature_columns=shared_embedding_columns)
 
-  def test_indicator_column(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # example 0, ids [1]
+           # example 1, ids [1, 0]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 1, 0),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [1, 0]
+           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_a': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           indices=(
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 0, 0, 1),
+           dense_shape=(2, 2, 2)),
+       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[1], [0]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(1, 1, 1, 1, 0),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
+           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
+           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_indicator_column(
+      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      expected_sequence_length):
     vocabulary_size_a = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
     vocabulary_size_b = 2
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [1]
-        # example 1, ids [1, 0]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 1, 0),
-        dense_shape=(2, 2))
-
-    expected_input_layer = [
-        # example 0, ids_a [2], ids_b [1]
-        [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
-        # example 1, ids_a [0, 1], ids_b [1, 0]
-        [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]],
-    ]
-    expected_sequence_length = [1, 2]
 
     categorical_column_a = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size_a)
@@ -298,18 +348,32 @@ class SequenceInputLayerTest(test.TestCase):
           features={'aaa': sparse_input},
           feature_columns=[indicator_column_a])
 
-  def test_numeric_column(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_input_layer = [
-        [[0.], [1.]],
-        [[10.], [0.]],
-    ]
-    expected_sequence_length = [2, 1]
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           indices=((0, 0), (0, 1), (1, 0)),
+           values=(0., 1., 10.),
+           dense_shape=(2, 2)),
+       'expected_input_layer': [
+           [[0.], [1.]],
+           [[10.], [0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(20, 3, 5., 3., 8.),
+           dense_shape=(2, 2, 2)),
+       'expected_input_layer': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  def test_numeric_column(
+      self, sparse_input, expected_input_layer, expected_sequence_length):
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -321,21 +385,38 @@ class SequenceInputLayerTest(test.TestCase):
       self.assertAllEqual(
           expected_sequence_length, sequence_length.eval(session=sess))
 
-  def test_numeric_column_multi_dim(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
+           # example 1, [10., 11., 12., 13.]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 4)),
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      )
+  def test_numeric_column_multi_dim(
+      self, sparse_input, expected_input_layer, expected_sequence_length):
     """Tests sequence_input_layer for multi-dimensional numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-        # example 1, [[[10., 11.],  [12., 13.]]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
-                 (1, 0), (1, 1), (1, 2), (1, 3)),
-        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-        dense_shape=(2, 8))
-    # The output of numeric_column._get_dense_tensor should be flattened.
-    expected_input_layer = [
-        [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-        [[10., 11., 12., 13.], [0., 0., 0., 0.]],
-    ]
-    expected_sequence_length = [2, 1]
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -377,6 +458,134 @@ class SequenceInputLayerTest(test.TestCase):
           r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'):
         sess.run(sequence_length)
 
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_shape': [2, 2, 4]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
+                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 4)),
+       'expected_shape': [2, 2, 4]},
+      )
+  def test_static_shape_from_tensors_numeric(
+      self, sparse_input, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    input_layer, _ = sfc.sequence_input_layer(
+        features={'aaa': sparse_input},
+        feature_columns=[numeric_column])
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected_shape': [4, 2, 3]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 0, 2),
+           dense_shape=(4, 2, 2)),
+       'expected_shape': [4, 2, 3]}
+      )
+  def test_static_shape_from_tensors_indicator(
+      self, sparse_input, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    input_layer, _ = sfc.sequence_input_layer(
+        features={'aaa': sparse_input}, feature_columns=[indicator_column])
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+
+class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
+  """Tests the utility fn concatenate_context_input."""
+
+  def test_concatenate_context_input(self):
+    seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2))
+    context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    input_layer = sfc.concatenate_context_input(context_input, seq_input)
+
+    expected = np.array([
+        [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]],
+        [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]]
+    ], dtype=np.float32)
+    with monitored_session.MonitoredSession() as sess:
+      output = sess.run(input_layer)
+      self.assertAllEqual(expected, output)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_3',
+       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))},
+      {'testcase_name': 'rank_gt_3',
+       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))}
+      )
+  def test_sequence_input_throws_error(self, seq_input):
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  @parameterized.named_parameters(
+      {'testcase_name': 'rank_lt_2',
+       'context_input': ops.convert_to_tensor(np.arange(100))},
+      {'testcase_name': 'rank_gt_2',
+       'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))}
+      )
+  def test_context_input_throws_error(self, context_input):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_seq_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    context_input = math_ops.cast(context_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'sequence_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
+  def test_integer_context_input_throws_error(self):
+    seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
+    context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
+    seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
+    with self.assertRaisesRegexp(
+        TypeError, 'context_input must have dtype float32'):
+      sfc.concatenate_context_input(context_input, seq_input)
+
 
 class InputLayerTest(test.TestCase):
   """Tests input_layer with sequence feature columns."""
@@ -443,75 +652,79 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
   test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
 
 
-class SequenceCategoricalColumnWithIdentityTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
-    column = sfc.sequence_categorical_column_with_identity(
-        'aaa', num_buckets=3)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((1, 2, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
+class SequenceCategoricalColumnWithIdentityTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(1, 2, 0),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((1, 2, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=(6, 7, 8),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=(6, 7, 8),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
+    column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-  def test_get_sparse_tensors_inputs3d(self):
-    """Tests _get_sparse_tensors when the input is already 3D Tensor."""
-    column = sfc.sequence_categorical_column_with_identity(
-        'aaa', num_buckets=3)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=(1, 2, 0),
-        dense_shape=(2, 2, 1))
-
-    with self.assertRaisesRegexp(
-        errors.InvalidArgumentError,
-        r'Column aaa expected ID tensor of rank 2\.\s*'
-        r'id_tensor shape:\s*\[2 2 1\]'):
-      id_weight_pair = column._get_sparse_tensors(
-          _LazyBuilder({'aaa': inputs}))
-      with monitored_session.MonitoredSession() as sess:
-        id_weight_pair.id_tensor.eval(session=sess)
-
-
-class SequenceCategoricalColumnWithHashBucketTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceCategoricalColumnWithHashBucketTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('omar', 'stringer', 'marlo'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           # Ignored to avoid hash dependence in test.
+           values=np.array((0, 0, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'stringer', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           # Ignored to avoid hash dependence in test.
+           values=np.array((0, 0, 0), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_hash_bucket(
         'aaa', hash_bucket_size=10)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('omar', 'stringer', 'marlo'),
-        dense_shape=(2, 2))
-
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        # Ignored to avoid hash dependence in test.
-        values=np.array((0, 0, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_indices_shape(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
 
 
-class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
+class SequenceCategoricalColumnWithVocabularyFileTest(
+    test.TestCase, parameterized.TestCase):
 
   def _write_vocab(self, vocab_strings, file_name):
     vocab_file = os.path.join(self.get_temp_dir(), file_name)
@@ -527,68 +740,120 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase):
                                                         'wire_vocabulary.txt')
     self._wire_vocabulary_size = 3
 
-  def test_get_sparse_tensors(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('marlo', 'skywalker', 'omar'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((2, -1, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'skywalker', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=np.array((0, -1, 2), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_vocabulary_file(
         key='aaa',
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size)
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('marlo', 'skywalker', 'omar'),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((2, -1, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase):
-
-  def test_get_sparse_tensors(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceCategoricalColumnWithVocabularyListTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=('marlo', 'skywalker', 'omar'),
+           dense_shape=(2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           values=np.array((2, -1, 0), dtype=np.int64),
+           dense_shape=(2, 2, 1))},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=('omar', 'skywalker', 'marlo'),
+           dense_shape=(2, 2, 2)),
+       'expected': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           values=np.array((0, -1, 2), dtype=np.int64),
+           dense_shape=(2, 2, 2))}
+      )
+  def test_get_sparse_tensors(self, inputs, expected):
     column = sfc.sequence_categorical_column_with_vocabulary_list(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
-    inputs = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=('marlo', 'skywalker', 'omar'),
-        dense_shape=(2, 2))
-    expected_sparse_ids = sparse_tensor.SparseTensorValue(
-        indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-        values=np.array((2, -1, 0), dtype=np.int64),
-        dense_shape=(2, 2, 1))
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
       _assert_sparse_tensor_value(
-          self,
-          expected_sparse_ids,
-          id_weight_pair.id_tensor.eval(session=sess))
-
-
-class SequenceEmbeddingColumnTest(test.TestCase):
-
-  def test_get_sequence_dense_tensor(self):
+          self, expected, id_weight_pair.id_tensor.eval(session=sess))
+
+
+class SequenceEmbeddingColumnTest(
+    test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected': [
+           # example 0, ids [2]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 2.], [3., 5.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [1]
+           [[3., 5.], [0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 0, 2),
+           dense_shape=(4, 2, 2)),
+       'expected': [
+           # example 0, ids [[2]]
+           [[7., 11.], [0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[2, 3.5], [7., 11.]],
+           # example 2, ids []
+           [[0., 0.], [0., 0.]],
+           # example 3, ids [[1], [0, 2]]
+           [[3., 5.], [4., 6.5]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 2))
-
     embedding_dimension = 2
     embedding_values = (
         (1., 2.),  # id 0
@@ -601,17 +866,6 @@ class SequenceEmbeddingColumnTest(test.TestCase):
       self.assertIsNone(partition_info)
       return embedding_values
 
-    expected_lookups = [
-        # example 0, ids [2]
-        [[7., 11.], [0., 0.]],
-        # example 1, ids [0, 1]
-        [[1., 2.], [3., 5.]],
-        # example 2, ids []
-        [[0., 0.], [0., 0.]],
-        # example 3, ids [1]
-        [[3., 5.], [0., 0.]],
-    ]
-
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
@@ -619,24 +873,35 @@ class SequenceEmbeddingColumnTest(test.TestCase):
         initializer=_initializer)
 
     embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
     self.assertItemsEqual(
         ('embedding_weights:0',), tuple([v.name for v in global_vars]))
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
-      self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
-
-  def test_sequence_length(self):
+      self.assertAllEqual(expected, embedding_lookup.eval(session=sess))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 2),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length = [1, 2]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
@@ -644,7 +909,7 @@ class SequenceEmbeddingColumnTest(test.TestCase):
         categorical_column, dimension=2)
 
     _, sequence_length = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -855,56 +1120,87 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase):
           expected_sequence_length_b, sequence_length_b.eval(session=sess))
 
 
-class SequenceIndicatorColumnTest(test.TestCase):
-
-  def test_get_sequence_dense_tensor(self):
+class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
+           values=(2, 0, 1, 1),
+           dense_shape=(4, 2)),
+       'expected': [
+           # example 0, ids [2]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [0, 1]
+           [[1., 0., 0.], [0., 1., 0.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [1]
+           [[0., 1., 0.], [0., 0., 0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [2, 2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           values=(2, 0, 1, 2, 1, 2, 2),
+           dense_shape=(4, 2, 2)),
+       'expected': [
+           # example 0, ids [[2]]
+           [[0., 0., 1.], [0., 0., 0.]],
+           # example 1, ids [[0, 1], [2]]
+           [[1., 1., 0.], [0., 0., 1.]],
+           # example 2, ids []
+           [[0., 0., 0.], [0., 0., 0.]],
+           # example 3, ids [[1], [2, 2]]
+           [[0., 1., 0.], [0., 0., 2.]]]}
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 2))
-
-    expected_lookups = [
-        # example 0, ids [2]
-        [[0., 0., 1.], [0., 0., 0.]],
-        # example 1, ids [0, 1]
-        [[1., 0., 0.], [0., 1., 0.]],
-        # example 2, ids []
-        [[0., 0., 0.], [0., 0., 0.]],
-        # example 3, ids [1]
-        [[0., 1., 0.], [0., 0., 0.]],
-    ]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     indicator_column = fc.indicator_column(categorical_column)
 
     indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
-
-  def test_sequence_length(self):
+      self.assertAllEqual(expected, indicator_tensor.eval(session=sess))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2, 0, 1),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2, 0, 1, 2),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2]}
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length):
     vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    expected_sequence_length = [1, 2]
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     indicator_column = fc.indicator_column(categorical_column)
 
     _, sequence_length = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -938,7 +1234,7 @@ class SequenceIndicatorColumnTest(test.TestCase):
           expected_sequence_length, sequence_length.eval(session=sess))
 
 
-class SequenceNumericColumnTest(test.TestCase):
+class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   def test_defaults(self):
     a = sfc.sequence_numeric_column('aaa')
@@ -971,25 +1267,36 @@ class SequenceNumericColumnTest(test.TestCase):
     with self.assertRaisesRegexp(TypeError, 'must be a callable'):
       sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
 
-  def test_get_sequence_dense_tensor(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_dense_tensor = [
-        [[0.], [1.]],
-        [[10.], [0.]],
-    ]
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           indices=((0, 0), (0, 1), (1, 0)),
+           values=(0., 1., 10.),
+           dense_shape=(2, 2)),
+       'expected': [
+           [[0.], [1.]],
+           [[10.], [0.]]]},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           values=(20, 3, 5., 3., 8.),
+           dense_shape=(2, 2, 2)),
+       'expected': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]]},
+      )
+  def test_get_sequence_dense_tensor(self, inputs, expected):
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
+      self.assertAllEqual(expected, dense_tensor.eval(session=sess))
 
   def test_get_sequence_dense_tensor_with_normalizer_fn(self):
 
@@ -1026,41 +1333,34 @@ class SequenceNumericColumnTest(test.TestCase):
       self.assertAllEqual(
           expected_dense_tensor, dense_tensor.eval(session=sess))
 
-  def test_get_sequence_dense_tensor_with_shape(self):
-    """Tests get_sequence_dense_tensor with shape !=(1,)."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
-        # example 1, [[10., 11., 12.]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
-                 (1, 0), (1, 1), (1, 2)),
-        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
-        dense_shape=(2, 6))
-    expected_dense_tensor = [
-        [[0., 1., 2.], [3., 4., 5.]],
-        [[10., 11., 12.], [0., 0., 0.]],
-    ]
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))
-
-    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_dense_tensor, dense_tensor.eval(session=sess))
-
-  def test_get_dense_tensor_multi_dim(self):
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 8)),
+       'expected_dense_tensor': [
+           [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
+           [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
+      {'testcase_name': '3D',
+       'sparse_input': sparse_tensor.SparseTensorValue(
+           indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
+                    (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
+                    (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
+           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           dense_shape=(2, 2, 8)),
+       'expected_dense_tensor': [
+           [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
+            [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
+           [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]],
+            [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
+      )
+  def test_get_dense_tensor_multi_dim(
+      self, sparse_input, expected_dense_tensor):
     """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-        # example 1, [[[10., 11.],  [12., 13.]]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
-                 (1, 0), (1, 1), (1, 2), (1, 3)),
-        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-        dense_shape=(2, 8))
-    expected_dense_tensor = [
-        [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
-        [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]],
-    ]
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1070,43 +1370,55 @@ class SequenceNumericColumnTest(test.TestCase):
       self.assertAllEqual(
           expected_dense_tensor, dense_tensor.eval(session=sess))
 
-  def test_sequence_length(self):
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
-        # example 1, [[10., 11., 12.]]
-        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
-                 (1, 0), (1, 1), (1, 2)),
-        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
-        dense_shape=(2, 6))
-    expected_sequence_length = [2, 1]
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2., 0., 1.),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '3D',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2., 0., 1., 2.),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (1,)},
+      {'testcase_name': '2D_with_shape',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           indices=((0, 0), (1, 0), (1, 1)),
+           values=(2., 0., 1.),
+           dense_shape=(2, 2)),
+       'expected_sequence_length': [1, 1],
+       'shape': (2,)},
+      {'testcase_name': '3D_with_shape',
+       'inputs': sparse_tensor.SparseTensorValue(
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           values=(2., 0., 1., 2.),
+           dense_shape=(2, 2, 2)),
+       'expected_sequence_length': [1, 2],
+       'shape': (2,)},
+      )
+  def test_sequence_length(self, inputs, expected_sequence_length, shape):
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
 
     _, sequence_length = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+        _LazyBuilder({'aaa': inputs}))
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
       self.assertAllEqual(expected_sequence_length, sequence_length)
       self.assertEqual(np.int64, sequence_length.dtype)
 
-  def test_sequence_length_with_shape(self):
-    """Tests _sequence_length with shape !=(1,)."""
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, values [[0.], [1]]
-        # example 1, [[10.]]
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    expected_sequence_length = [2, 1]
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    _, sequence_length = numeric_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
-
-    with monitored_session.MonitoredSession() as sess:
-      self.assertAllEqual(
-          expected_sequence_length, sequence_length.eval(session=sess))
-
   def test_sequence_length_with_empty_rows(self):
     """Tests _sequence_length when some examples do not have ids."""
     sparse_input = sparse_tensor.SparseTensorValue(
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 5352796174..28a8286544 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2660,6 +2660,7 @@ class _EmbeddingColumn(
         inputs=inputs,
         weight_collections=weight_collections,
         trainable=trainable)
+
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
     sequence_length = _sequence_length_from_sparse_tensor(
         sparse_tensors.id_tensor)
@@ -3383,6 +3384,16 @@ class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn,
 
 
 def _verify_static_batch_size_equality(tensors, columns):
+  """Validates that the first dim (batch size) of all tensors are equal or None.
+
+  Args:
+    tensors: list of tensors to check.
+    columns: list of feature columns matching tensors. Will be used for error
+      messaging.
+
+  Raises:
+    ValueError: if one of the tensors has a variant batch size
+  """
   # bath_size is a tf.Dimension object.
   expected_batch_size = None
   for i in range(0, len(tensors)):
@@ -3403,9 +3414,18 @@ def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
   with ops.name_scope(None, 'sequence_length') as name_scope:
     row_ids = sp_tensor.indices[:, 0]
     column_ids = sp_tensor.indices[:, 1]
+    # Add one to convert column indices to element length
     column_ids += array_ops.ones_like(column_ids)
-    seq_length = math_ops.to_int64(
-        math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements)
+    # Get the number of elements we will have per example/row
+    seq_length = math_ops.segment_max(column_ids, segment_ids=row_ids)
+
+    # The raw values are grouped according to num_elements;
+    # how many entities will we have after grouping?
+    # Example: orig tensor [[1, 2], [3]], col_ids = (0, 1, 1),
+    # row_ids = (0, 0, 1), seq_length = [2, 1]. If num_elements = 2,
+    # these will get grouped, and the final seq_length is [1, 1]
+    seq_length = math_ops.to_int64(math_ops.ceil(seq_length / num_elements))
+
     # If the last n rows do not have ids, seq_length will have shape
     # [batch_size - n]. Pad the remaining values with zeros.
     n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
@@ -3439,25 +3459,14 @@ class _SequenceCategoricalColumn(
     sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
     id_tensor = sparse_tensors.id_tensor
     weight_tensor = sparse_tensors.weight_tensor
-    # Expands final dimension, so that embeddings are not combined during
-    # embedding lookup.
-    check_id_rank = check_ops.assert_equal(
-        array_ops.rank(id_tensor), 2,
-        data=[
-            'Column {} expected ID tensor of rank 2. '.format(self.name),
-            'id_tensor shape: ', array_ops.shape(id_tensor)])
-    with ops.control_dependencies([check_id_rank]):
-      id_tensor = sparse_ops.sparse_reshape(
-          id_tensor,
-          shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
+
+    # Expands third dimension, if necessary so that embeddings are not
+    # combined during embedding lookup. If the tensor is already 3D, leave
+    # as-is.
+    shape = array_ops.shape(id_tensor)
+    target_shape = [shape[0], shape[1], -1]
+    id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
     if weight_tensor is not None:
-      check_weight_rank = check_ops.assert_equal(
-          array_ops.rank(weight_tensor), 2,
-          data=[
-              'Column {} expected weight tensor of rank 2.'.format(self.name),
-              'weight_tensor shape:', array_ops.shape(weight_tensor)])
-      with ops.control_dependencies([check_weight_rank]):
-        weight_tensor = sparse_ops.sparse_reshape(
-            weight_tensor,
-            shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
+      weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
+
     return _CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index ff50fe0d09..a2da6412ed 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -217,21 +217,21 @@ def _features_to_raw_params(features, types):
       feature = features[key]
       if isinstance(feature, VarLenFeature):
         if VarLenFeature not in types:
-          raise ValueError("Unsupported VarLenFeature %s." % feature)
+          raise ValueError("Unsupported VarLenFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         sparse_keys.append(key)
         sparse_types.append(feature.dtype)
       elif isinstance(feature, SparseFeature):
         if SparseFeature not in types:
-          raise ValueError("Unsupported SparseFeature %s." % feature)
+          raise ValueError("Unsupported SparseFeature %s." % (feature,))
 
         if not feature.index_key:
           raise ValueError(
-              "Missing index_key for SparseFeature %s." % feature)
+              "Missing index_key for SparseFeature %s." % (feature,))
         if not feature.value_key:
           raise ValueError(
-              "Missing value_key for SparseFeature %s." % feature)
+              "Missing value_key for SparseFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         index_keys = feature.index_key
@@ -260,7 +260,7 @@ def _features_to_raw_params(features, types):
           sparse_types.append(feature.dtype)
       elif isinstance(feature, FixedLenFeature):
         if FixedLenFeature not in types:
-          raise ValueError("Unsupported FixedLenFeature %s." % feature)
+          raise ValueError("Unsupported FixedLenFeature %s." % (feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         if feature.shape is None:
@@ -281,7 +281,8 @@ def _features_to_raw_params(features, types):
           dense_defaults[key] = feature.default_value
       elif isinstance(feature, FixedLenSequenceFeature):
         if FixedLenSequenceFeature not in types:
-          raise ValueError("Unsupported FixedLenSequenceFeature %s." % feature)
+          raise ValueError("Unsupported FixedLenSequenceFeature %s." % (
+              feature,))
         if not feature.dtype:
           raise ValueError("Missing type for feature %s." % key)
         if feature.shape is None:
-- 
GitLab


From 8ef3e7c8c053cb6dad530e13c478bbd406ea2c95 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 8 Oct 2018 10:43:01 -0700
Subject: [PATCH 0508/1085] Part 1/3 of the feature sync to the Keras 2.2.4
 API.

PiperOrigin-RevId: 216211279
---
 tensorflow/python/keras/activations.py        |   5 +
 tensorflow/python/keras/activations_test.py   |  10 +
 tensorflow/python/keras/backend.py            |  81 ++++++--
 tensorflow/python/keras/backend_test.py       |  44 ++++-
 tensorflow/python/keras/callbacks.py          |   4 +
 tensorflow/python/keras/engine/network.py     |   9 +-
 .../python/keras/layers/convolutional.py      | 177 ++++++++++++-----
 .../python/keras/layers/convolutional_test.py |  31 +++
 tensorflow/python/keras/layers/pooling.py     | 185 +++++++++++++-----
 .../python/keras/layers/pooling_test.py       |  30 +++
 tensorflow/python/keras/layers/wrappers.py    |   3 +
 tensorflow/python/keras/testing_utils.py      |   5 +-
 tensorflow/python/keras/utils/conv_utils.py   |  45 +++--
 .../python/keras/utils/multi_gpu_utils.py     |  17 +-
 .../keras/utils/multi_gpu_utils_test.py       |  26 +++
 tensorflow/python/keras/utils/np_utils.py     |   5 +-
 .../v1/tensorflow.keras.activations.pbtxt     |   4 +
 .../golden/v1/tensorflow.keras.backend.pbtxt  |   4 +-
 ...low.keras.layers.-average-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   2 +-
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   2 +-
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution2-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution3-d-transpose.pbtxt |   2 +-
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   2 +-
 ....keras.layers.-global-max-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   2 +-
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   2 +-
 .../golden/v1/tensorflow.keras.utils.pbtxt    |   2 +-
 .../v2/tensorflow.keras.activations.pbtxt     |   4 +
 .../golden/v2/tensorflow.keras.backend.pbtxt  |   4 +-
 ...low.keras.layers.-average-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   2 +-
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   2 +-
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution2-d-transpose.pbtxt |   2 +-
 ...ras.layers.-convolution3-d-transpose.pbtxt |   2 +-
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +-
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   2 +-
 ....keras.layers.-global-max-pooling1-d.pbtxt |   2 +-
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   2 +-
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   2 +-
 .../golden/v2/tensorflow.keras.utils.pbtxt    |   2 +-
 46 files changed, 581 insertions(+), 172 deletions(-)

diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 99645de736..d69791ce8d 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -160,6 +160,11 @@ def sigmoid(x):
   return nn.sigmoid(x)
 
 
+@tf_export('keras.activations.exponential')
+def exponential(x):
+  return math_ops.exp(x)
+
+
 @tf_export('keras.activations.hard_sigmoid')
 def hard_sigmoid(x):
   """Hard sigmoid activation function.
diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py
index dd0bbcff39..ad238cb0a9 100644
--- a/tensorflow/python/keras/activations_test.py
+++ b/tensorflow/python/keras/activations_test.py
@@ -169,6 +169,16 @@ class KerasActivationsTest(test.TestCase):
     expected = np.tanh(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
+  def test_exponential(self):
+    with self.cached_session():
+      test_values = np.random.random((2, 5))
+      x = keras.backend.placeholder(ndim=2)
+      exp = keras.activations.exponential(x)
+      f = keras.backend.function([x], [exp])
+      result = f([test_values])[0]
+    expected = np.exp(test_values)
+    self.assertAllClose(result, expected, rtol=1e-05)
+
   def test_linear(self):
     x = np.random.random((10, 5))
     self.assertAllClose(x, keras.activations.linear(x))
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 63e776a06b..13f52fbae7 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2223,7 +2223,7 @@ def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
 
 
 @tf_export('keras.backend.batch_normalization')
-def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
+def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
   """Applies batch normalization on x given mean, var, beta and gamma.
 
   I.e. returns:
@@ -2235,11 +2235,49 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
       var: Variance of batch.
       beta: Tensor with which to center the input.
       gamma: Tensor by which to scale the input.
+      axis: Integer, the axis that should be normalized.
+          (typically the features axis).
       epsilon: Fuzz factor.
 
   Returns:
       A tensor.
   """
+  if ndim(x) == 4:
+    # The CPU implementation of `fused_batch_norm` only supports NHWC
+    if axis == 1 or axis == -3:
+      tf_data_format = 'NCHW'
+    elif axis == 3 or axis == -1:
+      tf_data_format = 'NHWC'
+    else:
+      tf_data_format = None
+
+    if (tf_data_format == 'NHWC' or
+        tf_data_format == 'NCHW' and _has_nchw_support()):
+      # The mean / var / beta / gamma tensors may be broadcasted
+      # so they may have extra axes of size 1, which should be squeezed.
+      if ndim(mean) > 1:
+        mean = array_ops.reshape(mean, [-1])
+      if ndim(var) > 1:
+        var = array_ops.reshape(var, [-1])
+      if beta is None:
+        beta = zeros_like(mean)
+      elif ndim(beta) > 1:
+        beta = array_ops.reshape(beta, [-1])
+      if gamma is None:
+        gamma = ones_like(mean)
+      elif ndim(gamma) > 1:
+        gamma = array_ops.reshape(gamma, [-1])
+    y, _, _ = nn.fused_batch_norm(
+        x,
+        gamma,
+        beta,
+        epsilon=epsilon,
+        mean=mean,
+        variance=var,
+        data_format=tf_data_format,
+        is_training=False
+    )
+    return y
   return nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
 
 
@@ -2880,7 +2918,7 @@ class Function(object):
 
     if session_kwargs:
       raise ValueError('Some keys in session_kwargs are not supported at this '
-                       'time: %s', session_kwargs.keys())
+                       'time: %s', (session_kwargs.keys(),))
 
     self._callable_fn = None
     self._feed_arrays = None
@@ -3798,19 +3836,23 @@ def _preprocess_conv1d_input(x, data_format):
   return x, tf_data_format
 
 
-def _preprocess_conv2d_input(x, data_format):
+def _preprocess_conv2d_input(x, data_format, force_transpose=False):
   """Transpose and cast the input before the conv2d.
 
   Arguments:
       x: input tensor.
       data_format: string, `"channels_last"` or `"channels_first"`.
+      force_transpose: Boolean. If True, the input will always be transposed
+          from NCHW to NHWC if `data_format` is `"channels_first"`.
+          If False, the transposition only occurs on CPU (GPU ops are
+          assumed to support NCHW).
 
   Returns:
       A tensor.
   """
   tf_data_format = 'NHWC'
   if data_format == 'channels_first':
-    if not _has_nchw_support():
+    if not _has_nchw_support() or force_transpose:
       x = array_ops.transpose(x, (0, 2, 3, 1))  # NCHW -> NHWC
     else:
       tf_data_format = 'NCHW'
@@ -3958,7 +4000,8 @@ def conv2d_transpose(x,
                      output_shape,
                      strides=(1, 1),
                      padding='valid',
-                     data_format=None):
+                     data_format=None,
+                     dilation_rate=(1, 1)):
   """2D deconvolution (i.e.
 
   transposed convolution).
@@ -3972,6 +4015,7 @@ def conv2d_transpose(x,
       data_format: string, `"channels_last"` or `"channels_first"`.
           Whether to use Theano or TensorFlow/CNTK data format
           for inputs/kernels/outputs.
+      dilation_rate: Tuple of 2 integers.
 
   Returns:
       A tensor, result of transposed 2D convolution.
@@ -3987,7 +4031,13 @@ def conv2d_transpose(x,
   if isinstance(output_shape, (tuple, list)):
     output_shape = array_ops.stack(output_shape)
 
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+  # `atrous_conv2d_transpose` only supports NHWC format, even on GPU.
+  if data_format == 'channels_first' and dilation_rate != (1, 1):
+    force_transpose = True
+  else:
+    force_transpose = False
+
+  x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose)
 
   if data_format == 'channels_first' and tf_data_format == 'NHWC':
     output_shape = (output_shape[0], output_shape[2], output_shape[3],
@@ -4002,13 +4052,18 @@ def conv2d_transpose(x,
   else:
     strides = (1, 1) + strides
 
-  x = nn.conv2d_transpose(
-      x,
-      kernel,
-      output_shape,
-      strides,
-      padding=padding,
-      data_format=tf_data_format)
+  if dilation_rate == (1, 1):
+    x = nn.conv2d_transpose(x, kernel, output_shape, strides,
+                            padding=padding,
+                            data_format=tf_data_format)
+  else:
+    assert dilation_rate[0] == dilation_rate[1]
+    x = nn.atrous_conv2d_transpose(
+        x,
+        kernel,
+        output_shape,
+        rate=dilation_rate[0],
+        padding=padding)
   if data_format == 'channels_first' and tf_data_format == 'NHWC':
     x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
   return x
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..0834448699 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -26,6 +26,7 @@ from tensorflow.python import keras
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import nn
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_inspect
@@ -1381,6 +1382,36 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(mean.get_shape().as_list(), [3,])
     self.assertEqual(var.get_shape().as_list(), [3,])
 
+  def test_batch_normalization(self):
+    g_val = np.random.random((3,))
+    b_val = np.random.random((3,))
+    gamma = keras.backend.variable(g_val)
+    beta = keras.backend.variable(b_val)
+
+    # 3D NHC case
+    val = np.random.random((10, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 3])
+
+    # 4D NHWC case
+    val = np.random.random((10, 5, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1, 2), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3])
+
+    # 4D NCHW case
+    val = np.random.random((10, 3, 5, 5))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 2, 3), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
+
 
 class TestCTC(test.TestCase):
 
@@ -1506,12 +1537,13 @@ class TestRandomOps(test.TestCase):
       self.assertAllClose(np.min(y), -2., atol=0.1)
 
   def test_string_input(self):
-    seq = keras.Sequential([
-        keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string),
-        keras.layers.Lambda(lambda x: x[0])
-    ])
-    preds = seq.predict([['tensorflow eager']])
-    self.assertEqual(preds.shape, (1,))
+    with self.cached_session():
+      seq = keras.Sequential([
+          keras.layers.InputLayer(input_shape=(1,), dtype=dtypes.string),
+          keras.layers.Lambda(lambda x: x[0])
+      ])
+      preds = seq.predict([['tensorflow eager']])
+      self.assertEqual(preds.shape, (1,))
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 6dfbbf3694..3d6000f223 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -781,6 +781,10 @@ class LearningRateScheduler(Callback):
       print('\nEpoch %05d: LearningRateScheduler reducing learning '
             'rate to %s.' % (epoch + 1, lr))
 
+  def on_epoch_end(self, epoch, logs=None):
+    logs = logs or {}
+    logs['lr'] = K.get_value(self.model.optimizer.lr)
+
 
 @tf_export('keras.callbacks.TensorBoard')
 class TensorBoard(Callback):
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 918488bd7a..5969fea2b2 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1641,10 +1641,11 @@ class Network(base_layer.Layer):
         ValueError: if `summary()` is called before the model is built.
     """
     if not self.built:
-      raise ValueError('This model has never been called, thus its weights '
-                       'have not yet been created, so no summary can be '
-                       'displayed. Build the model first '
-                       '(e.g. by calling it on some data).')
+      raise ValueError('This model has not yet been built. '
+                       'Build the model first by calling `build()` or calling '
+                       '`fit()` with some data, or specify '
+                       'an `input_shape` argument in the first layer(s) for '
+                       'automatic build.')
     layer_utils.print_summary(self,
                               line_length=line_length,
                               positions=positions,
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index d00def07bb..8f5872385c 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -645,6 +645,14 @@ class Conv2DTranspose(Conv2D):
           Specifying any stride value != 1 is incompatible with specifying
           any `dilation_rate` value != 1.
       padding: one of `"valid"` or `"same"` (case-insensitive).
+      output_padding: An integer or tuple/list of 2 integers,
+          specifying the amount of padding along the height and width
+          of the output tensor.
+          Can be a single integer to specify the same value for all
+          spatial dimensions.
+          The amount of output padding along a given dimension must be
+          lower than the stride along that same dimension.
+          If set to `None` (default), the output shape is inferred.
       data_format: A string,
           one of `channels_last` (default) or `channels_first`.
           The ordering of the dimensions in the inputs.
@@ -700,7 +708,9 @@ class Conv2DTranspose(Conv2D):
                kernel_size,
                strides=(1, 1),
                padding='valid',
+               output_padding=None,
                data_format=None,
+               dilation_rate=(1, 1),
                activation=None,
                use_bias=True,
                kernel_initializer='glorot_uniform',
@@ -717,6 +727,7 @@ class Conv2DTranspose(Conv2D):
         strides=strides,
         padding=padding,
         data_format=data_format,
+        dilation_rate=dilation_rate,
         activation=activations.get(activation),
         use_bias=use_bias,
         kernel_initializer=initializers.get(kernel_initializer),
@@ -728,6 +739,16 @@ class Conv2DTranspose(Conv2D):
         bias_constraint=constraints.get(bias_constraint),
         **kwargs)
 
+    self.output_padding = output_padding
+    if self.output_padding is not None:
+      self.output_padding = conv_utils.normalize_tuple(
+          self.output_padding, 2, 'output_padding')
+      for stride, out_pad in zip(self.strides, self.output_padding):
+        if out_pad >= stride:
+          raise ValueError('Stride ' + str(self.strides) + ' must be '
+                           'greater than output padding ' +
+                           str(self.output_padding))
+
   def build(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     if len(input_shape) != 4:
@@ -769,51 +790,50 @@ class Conv2DTranspose(Conv2D):
     inputs_shape = array_ops.shape(inputs)
     batch_size = inputs_shape[0]
     if self.data_format == 'channels_first':
-      c_axis, h_axis, w_axis = 1, 2, 3
+      h_axis, w_axis = 2, 3
     else:
-      c_axis, h_axis, w_axis = 3, 1, 2
+      h_axis, w_axis = 1, 2
 
     height, width = inputs_shape[h_axis], inputs_shape[w_axis]
     kernel_h, kernel_w = self.kernel_size
     stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_h = out_pad_w = None
+    else:
+      out_pad_h, out_pad_w = self.output_padding
+
     # Infer the dynamic output shape:
     out_height = conv_utils.deconv_output_length(height,
                                                  kernel_h,
-                                                 self.padding,
-                                                 stride_h)
+                                                 padding=self.padding,
+                                                 output_padding=out_pad_h,
+                                                 stride=stride_h,
+                                                 dilation=self.dilation_rate[0])
     out_width = conv_utils.deconv_output_length(width,
                                                 kernel_w,
-                                                self.padding,
-                                                stride_w)
+                                                padding=self.padding,
+                                                output_padding=out_pad_w,
+                                                stride=stride_w,
+                                                dilation=self.dilation_rate[1])
     if self.data_format == 'channels_first':
       output_shape = (batch_size, self.filters, out_height, out_width)
-      strides = (1, 1, stride_h, stride_w)
     else:
       output_shape = (batch_size, out_height, out_width, self.filters)
-      strides = (1, stride_h, stride_w, 1)
 
     output_shape_tensor = array_ops.stack(output_shape)
-    outputs = nn.conv2d_transpose(
+    outputs = backend.conv2d_transpose(
         inputs,
         self.kernel,
         output_shape_tensor,
-        strides,
-        padding=self.padding.upper(),
-        data_format=conv_utils.convert_data_format(self.data_format, ndim=4))
+        strides=self.strides,
+        padding=self.padding,
+        data_format=self.data_format,
+        dilation_rate=self.dilation_rate)
 
     if not context.executing_eagerly():
       # Infer the static output shape:
-      out_shape = inputs.get_shape().as_list()
-      out_shape[c_axis] = self.filters
-      out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis],
-                                                          kernel_h,
-                                                          self.padding,
-                                                          stride_h)
-      out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis],
-                                                          kernel_w,
-                                                          self.padding,
-                                                          stride_w)
+      out_shape = self.compute_output_shape(inputs.shape)
       outputs.set_shape(out_shape)
 
     if self.use_bias:
@@ -837,13 +857,33 @@ class Conv2DTranspose(Conv2D):
     kernel_h, kernel_w = self.kernel_size
     stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_h = out_pad_w = None
+    else:
+      out_pad_h, out_pad_w = self.output_padding
+
     output_shape[c_axis] = self.filters
     output_shape[h_axis] = conv_utils.deconv_output_length(
-        output_shape[h_axis], kernel_h, self.padding, stride_h)
+        output_shape[h_axis],
+        kernel_h,
+        padding=self.padding,
+        output_padding=out_pad_h,
+        stride=stride_h,
+        dilation=self.dilation_rate[0])
     output_shape[w_axis] = conv_utils.deconv_output_length(
-        output_shape[w_axis], kernel_w, self.padding, stride_w)
+        output_shape[w_axis],
+        kernel_w,
+        padding=self.padding,
+        output_padding=out_pad_w,
+        stride=stride_w,
+        dilation=self.dilation_rate[1])
     return tensor_shape.TensorShape(output_shape)
 
+  def get_config(self):
+    config = super(Conv2DTranspose, self).get_config()
+    config['output_padding'] = self.output_padding
+    return config
+
 
 @tf_export('keras.layers.Conv3DTranspose',
            'keras.layers.Convolution3DTranspose')
@@ -878,6 +918,14 @@ class Conv3DTranspose(Conv3D):
           Specifying any stride value != 1 is incompatible with specifying
           any `dilation_rate` value != 1.
       padding: one of `"valid"` or `"same"` (case-insensitive).
+      output_padding: An integer or tuple/list of 3 integers,
+          specifying the amount of padding along the depth, height, and
+          width.
+          Can be a single integer to specify the same value for all
+          spatial dimensions.
+          The amount of output padding along a given dimension must be
+          lower than the stride along that same dimension.
+          If set to `None` (default), the output shape is inferred.
       data_format: A string,
           one of `channels_last` (default) or `channels_first`.
           The ordering of the dimensions in the inputs.
@@ -943,6 +991,7 @@ class Conv3DTranspose(Conv3D):
                kernel_size,
                strides=(1, 1, 1),
                padding='valid',
+               output_padding=None,
                data_format=None,
                activation=None,
                use_bias=True,
@@ -971,6 +1020,16 @@ class Conv3DTranspose(Conv3D):
         bias_constraint=constraints.get(bias_constraint),
         **kwargs)
 
+    self.output_padding = output_padding
+    if self.output_padding is not None:
+      self.output_padding = conv_utils.normalize_tuple(
+          self.output_padding, 3, 'output_padding')
+      for stride, out_pad in zip(self.strides, self.output_padding):
+        if out_pad >= stride:
+          raise ValueError('Stride ' + str(self.strides) + ' must be '
+                           'greater than output padding ' +
+                           str(self.output_padding))
+
   def build(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape)
     if len(input_shape) != 5:
@@ -1012,11 +1071,9 @@ class Conv3DTranspose(Conv3D):
     inputs_shape = array_ops.shape(inputs)
     batch_size = inputs_shape[0]
     if self.data_format == 'channels_first':
-      c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4
+      d_axis, h_axis, w_axis = 2, 3, 4
     else:
-      c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3
-
-    self.input_spec = InputSpec(ndim=5, axes={c_axis: inputs_shape[c_axis]})
+      d_axis, h_axis, w_axis = 1, 2, 3
 
     depth = inputs_shape[d_axis]
     height = inputs_shape[h_axis]
@@ -1025,19 +1082,27 @@ class Conv3DTranspose(Conv3D):
     kernel_d, kernel_h, kernel_w = self.kernel_size
     stride_d, stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_d = out_pad_h = out_pad_w = None
+    else:
+      out_pad_d, out_pad_h, out_pad_w = self.output_padding
+
     # Infer the dynamic output shape:
     out_depth = conv_utils.deconv_output_length(depth,
                                                 kernel_d,
-                                                self.padding,
-                                                stride_d)
+                                                padding=self.padding,
+                                                output_padding=out_pad_d,
+                                                stride=stride_d)
     out_height = conv_utils.deconv_output_length(height,
                                                  kernel_h,
-                                                 self.padding,
-                                                 stride_h)
+                                                 padding=self.padding,
+                                                 output_padding=out_pad_h,
+                                                 stride=stride_h)
     out_width = conv_utils.deconv_output_length(width,
                                                 kernel_w,
-                                                self.padding,
-                                                stride_w)
+                                                padding=self.padding,
+                                                output_padding=out_pad_w,
+                                                stride=stride_w)
     if self.data_format == 'channels_first':
       output_shape = (batch_size, self.filters, out_depth, out_height,
                       out_width)
@@ -1058,20 +1123,7 @@ class Conv3DTranspose(Conv3D):
 
     if not context.executing_eagerly():
       # Infer the static output shape:
-      out_shape = inputs.get_shape().as_list()
-      out_shape[c_axis] = self.filters
-      out_shape[d_axis] = conv_utils.deconv_output_length(out_shape[d_axis],
-                                                          kernel_d,
-                                                          self.padding,
-                                                          stride_d)
-      out_shape[h_axis] = conv_utils.deconv_output_length(out_shape[h_axis],
-                                                          kernel_h,
-                                                          self.padding,
-                                                          stride_h)
-      out_shape[w_axis] = conv_utils.deconv_output_length(out_shape[w_axis],
-                                                          kernel_w,
-                                                          self.padding,
-                                                          stride_w)
+      out_shape = self.compute_output_shape(inputs.shape)
       outputs.set_shape(out_shape)
 
     if self.use_bias:
@@ -1109,15 +1161,38 @@ class Conv3DTranspose(Conv3D):
     kernel_d, kernel_h, kernel_w = self.kernel_size
     stride_d, stride_h, stride_w = self.strides
 
+    if self.output_padding is None:
+      out_pad_d = out_pad_h = out_pad_w = None
+    else:
+      out_pad_d, out_pad_h, out_pad_w = self.output_padding
+
     output_shape[c_axis] = self.filters
     output_shape[d_axis] = conv_utils.deconv_output_length(
-        output_shape[d_axis], kernel_d, self.padding, stride_d)
+        output_shape[d_axis],
+        kernel_d,
+        padding=self.padding,
+        output_padding=out_pad_d,
+        stride=stride_d)
     output_shape[h_axis] = conv_utils.deconv_output_length(
-        output_shape[h_axis], kernel_h, self.padding, stride_h)
+        output_shape[h_axis],
+        kernel_h,
+        padding=self.padding,
+        output_padding=out_pad_h,
+        stride=stride_h)
     output_shape[w_axis] = conv_utils.deconv_output_length(
-        output_shape[w_axis], kernel_w, self.padding, stride_w)
+        output_shape[w_axis],
+        kernel_w,
+        padding=self.padding,
+        output_padding=out_pad_w,
+        stride=stride_w)
     return tensor_shape.TensorShape(output_shape)
 
+  def get_config(self):
+    config = super(Conv3DTranspose, self).get_config()
+    config.pop('dilation_rate')
+    config['output_padding'] = self.output_padding
+    return config
+
 
 class SeparableConv(Conv):
   """Abstract base layer for separable nD convolution.
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index cad5e4c8bd..f88d632ab5 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -204,6 +204,9 @@ class Conv2DTransposeTest(test.TestCase):
     if test.is_gpu_available(cuda_only=True):
       self._run_test(kwargs, 'data_format', ['channels_first'])
 
+    kwargs['strides'] = (2, 2)
+    self._run_test(kwargs, 'output_padding', [(1, 1)])
+
   def test_conv2dtranspose_regularizers(self):
     kwargs = {
         'filters': 3,
@@ -239,6 +242,31 @@ class Conv2DTransposeTest(test.TestCase):
       self.assertEqual(layer.kernel.constraint, k_constraint)
       self.assertEqual(layer.bias.constraint, b_constraint)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_conv2d_transpose_dilation(self):
+    testing_utils.layer_test(keras.layers.Conv2DTranspose,
+                             kwargs={'filters': 2,
+                                     'kernel_size': 3,
+                                     'padding': 'same',
+                                     'data_format': 'channels_last',
+                                     'dilation_rate': (2, 2)},
+                             input_shape=(2, 5, 6, 3))
+
+    input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32)
+    expected_output = np.float32([[192, 228, 192, 228],
+                                  [336, 372, 336, 372],
+                                  [192, 228, 192, 228],
+                                  [336, 372, 336, 372]]).reshape((1, 4, 4, 1))
+    testing_utils.layer_test(keras.layers.Conv2DTranspose,
+                             input_data=input_data,
+                             kwargs={'filters': 1,
+                                     'kernel_size': 3,
+                                     'padding': 'same',
+                                     'data_format': 'channels_last',
+                                     'dilation_rate': (2, 2),
+                                     'kernel_initializer': 'ones'},
+                             expected_output=expected_output)
+
 
 class Conv3DTransposeTest(test.TestCase):
 
@@ -270,6 +298,9 @@ class Conv3DTransposeTest(test.TestCase):
     if test.is_gpu_available(cuda_only=True):
       self._run_test(kwargs, 'data_format', ['channels_first'])
 
+    kwargs['strides'] = (2, 2, 2)
+    self._run_test(kwargs, 'output_padding', [(1, 1, 1)])
+
   def test_conv3dtranspose_regularizers(self):
     kwargs = {
         'filters': 3,
diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py
index 912e8bd619..72a9c1d629 100644
--- a/tensorflow/python/keras/layers/pooling.py
+++ b/tensorflow/python/keras/layers/pooling.py
@@ -18,12 +18,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine.base_layer import InputSpec
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.util.tf_export import tf_export
 
@@ -41,16 +44,18 @@ class Pooling1D(Layer):
       strides of the pooling operation.
     padding: A string. The padding method, either 'valid' or 'same'.
       Case-insensitive.
-    data_format: A string, one of `channels_last` (default) or `channels_first`.
+    data_format: A string,
+      one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
       `channels_last` corresponds to inputs with shape
-      `(batch, length, channels)` while `channels_first` corresponds to
-      inputs with shape `(batch, channels, length)`.
+      `(batch, steps, features)` while `channels_first`
+      corresponds to inputs with shape
+      `(batch, features, steps)`.
     name: A string, the name of the layer.
   """
 
   def __init__(self, pool_function, pool_size, strides,
-               padding='valid', data_format=None,
+               padding='valid', data_format='channels_last',
                name=None, **kwargs):
     super(Pooling1D, self).__init__(name=name, **kwargs)
     if data_format is None:
@@ -65,45 +70,39 @@ class Pooling1D(Layer):
     self.input_spec = InputSpec(ndim=3)
 
   def call(self, inputs):
-    # There is no TF op for 1D pooling, hence we make the inputs 4D.
-    if self.data_format == 'channels_last':
-      # input is NWC, make it NHWC
-      inputs = array_ops.expand_dims(inputs, 1)
-      # pool on the W dim
-      pool_shape = (1, 1) + self.pool_size + (1,)
-      strides = (1, 1) + self.strides + (1,)
-      data_format = 'NHWC'
-    else:
-      # input is NCW, make it NCHW
-      inputs = array_ops.expand_dims(inputs, 2)
-      # pool on the W dim
-      pool_shape = (1, 1, 1) + self.pool_size
-      strides = (1, 1, 1) + self.strides
-      data_format = 'NCHW'
-
+    pad_axis = 2 if self.data_format == 'channels_last' else 3
+    inputs = array_ops.expand_dims(inputs, pad_axis)
     outputs = self.pool_function(
         inputs,
-        ksize=pool_shape,
-        strides=strides,
-        padding=self.padding.upper(),
-        data_format=data_format)
-
-    if self.data_format == 'channels_last':
-      return array_ops.squeeze(outputs, 1)
-    else:
-      return array_ops.squeeze(outputs, 2)
+        self.pool_size + (1,),
+        strides=self.strides + (1,),
+        padding=self.padding,
+        data_format=self.data_format)
+    return array_ops.squeeze(outputs, pad_axis)
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    length = conv_utils.conv_output_length(input_shape[1], self.pool_size[0],
-                                           self.padding, self.strides[0])
-    return tensor_shape.TensorShape([input_shape[0], length, input_shape[2]])
+    if self.data_format == 'channels_first':
+      steps = input_shape[2]
+      features = input_shape[1]
+    else:
+      steps = input_shape[1]
+      features = input_shape[2]
+    length = conv_utils.conv_output_length(steps,
+                                           self.pool_size[0],
+                                           self.padding,
+                                           self.strides[0])
+    if self.data_format == 'channels_first':
+      return tensor_shape.TensorShape([input_shape[0], features, length])
+    else:
+      return tensor_shape.TensorShape([input_shape[0], length, features])
 
   def get_config(self):
     config = {
         'strides': self.strides,
         'pool_size': self.pool_size,
-        'padding': self.padding
+        'padding': self.padding,
+        'data_format': self.data_format,
     }
     base_config = super(Pooling1D, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -119,19 +118,36 @@ class MaxPooling1D(Pooling1D):
           E.g. 2 will halve the input.
           If None, it will default to `pool_size`.
       padding: One of `"valid"` or `"same"` (case-insensitive).
+      data_format: A string,
+          one of `channels_last` (default) or `channels_first`.
+          The ordering of the dimensions in the inputs.
+          `channels_last` corresponds to inputs with shape
+          `(batch, steps, features)` while `channels_first`
+          corresponds to inputs with shape
+          `(batch, features, steps)`.
 
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
-      3D tensor with shape: `(batch_size, downsampled_steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, downsampled_steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, downsampled_steps)`
   """
 
   def __init__(self, pool_size=2, strides=None,
-               padding='valid', data_format=None, **kwargs):
+               padding='valid', data_format='channels_last', **kwargs):
 
     super(MaxPooling1D, self).__init__(
-        nn.max_pool,
+        functools.partial(backend.pool2d, pool_mode='max'),
         pool_size=pool_size,
         strides=strides,
         padding=padding,
@@ -149,18 +165,35 @@ class AveragePooling1D(Pooling1D):
           E.g. 2 will halve the input.
           If None, it will default to `pool_size`.
       padding: One of `"valid"` or `"same"` (case-insensitive).
+      data_format: A string,
+          one of `channels_last` (default) or `channels_first`.
+          The ordering of the dimensions in the inputs.
+          `channels_last` corresponds to inputs with shape
+          `(batch, steps, features)` while `channels_first`
+          corresponds to inputs with shape
+          `(batch, features, steps)`.
 
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
-      3D tensor with shape: `(batch_size, downsampled_steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, downsampled_steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, downsampled_steps)`
   """
 
   def __init__(self, pool_size=2, strides=None,
-               padding='valid', data_format=None, **kwargs):
+               padding='valid', data_format='channels_last', **kwargs):
     super(AveragePooling1D, self).__init__(
-        nn.avg_pool,
+        functools.partial(backend.pool2d, pool_mode='avg'),
         pool_size=pool_size,
         strides=strides,
         padding=padding,
@@ -561,41 +594,96 @@ class GlobalPooling1D(Layer):
   """Abstract class for different global pooling 1D layers.
   """
 
-  def __init__(self, **kwargs):
+  def __init__(self, data_format='channels_last', **kwargs):
     super(GlobalPooling1D, self).__init__(**kwargs)
     self.input_spec = InputSpec(ndim=3)
+    self.data_format = conv_utils.normalize_data_format(data_format)
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
+    if self.data_format == 'channels_first':
+      return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
+    else:
+      return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
 
   def call(self, inputs):
     raise NotImplementedError
 
+  def get_config(self):
+    config = {'data_format': self.data_format}
+    base_config = super(GlobalPooling1D, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
 
 @tf_export('keras.layers.GlobalAveragePooling1D',
            'keras.layers.GlobalAvgPool1D')
 class GlobalAveragePooling1D(GlobalPooling1D):
   """Global average pooling operation for temporal data.
 
+  Arguments:
+    data_format: A string,
+        one of `channels_last` (default) or `channels_first`.
+        The ordering of the dimensions in the inputs.
+        `channels_last` corresponds to inputs with shape
+        `(batch, steps, features)` while `channels_first`
+        corresponds to inputs with shape
+        `(batch, features, steps)`.
+
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
       2D tensor with shape:
       `(batch_size, features)`
   """
 
-  def call(self, inputs):
-    return backend.mean(inputs, axis=1)
+  def __init__(self, data_format='channels_last', **kwargs):
+    super(GlobalAveragePooling1D, self).__init__(data_format=data_format,
+                                                 **kwargs)
+    self.supports_masking = True
+
+  def call(self, inputs, mask=None):
+    steps_axis = 1 if self.data_format == 'channels_last' else 2
+    if mask is not None:
+      mask = math_ops.cast(mask, backend.floatx())
+      input_shape = inputs.shape.as_list()
+      broadcast_shape = [-1, input_shape[steps_axis], 1]
+      mask = array_ops.reshape(mask, broadcast_shape)
+      inputs *= mask
+      return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum(
+          mask, axis=steps_axis)
+    else:
+      return backend.mean(inputs, axis=steps_axis)
+
+  def compute_mask(self, inputs, mask=None):
+    return None
 
 
 @tf_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D')
 class GlobalMaxPooling1D(GlobalPooling1D):
   """Global max pooling operation for temporal data.
 
+  Arguments:
+    data_format: A string,
+        one of `channels_last` (default) or `channels_first`.
+        The ordering of the dimensions in the inputs.
+        `channels_last` corresponds to inputs with shape
+        `(batch, steps, features)` while `channels_first`
+        corresponds to inputs with shape
+        `(batch, features, steps)`.
+
   Input shape:
-      3D tensor with shape: `(batch_size, steps, features)`.
+      - If `data_format='channels_last'`:
+          3D tensor with shape:
+          `(batch_size, steps, features)`
+      - If `data_format='channels_first'`:
+          3D tensor with shape:
+          `(batch_size, features, steps)`
 
   Output shape:
       2D tensor with shape:
@@ -603,7 +691,8 @@ class GlobalMaxPooling1D(GlobalPooling1D):
   """
 
   def call(self, inputs):
-    return backend.max(inputs, axis=1)
+    steps_axis = 1 if self.data_format == 'channels_last' else 2
+    return backend.max(inputs, axis=steps_axis)
 
 
 class GlobalPooling2D(Layer):
diff --git a/tensorflow/python/keras/layers/pooling_test.py b/tensorflow/python/keras/layers/pooling_test.py
index 2cd9939e66..936e73ecf9 100644
--- a/tensorflow/python/keras/layers/pooling_test.py
+++ b/tensorflow/python/keras/layers/pooling_test.py
@@ -18,11 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import rmsprop
 
 
 class GlobalPoolingTest(test.TestCase):
@@ -31,8 +34,26 @@ class GlobalPoolingTest(test.TestCase):
   def test_globalpooling_1d(self):
     testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D,
                              input_shape=(3, 4, 5))
+    testing_utils.layer_test(keras.layers.pooling.GlobalMaxPooling1D,
+                             kwargs={'data_format': 'channels_first'},
+                             input_shape=(3, 4, 5))
     testing_utils.layer_test(
         keras.layers.pooling.GlobalAveragePooling1D, input_shape=(3, 4, 5))
+    testing_utils.layer_test(keras.layers.pooling.GlobalAveragePooling1D,
+                             kwargs={'data_format': 'channels_first'},
+                             input_shape=(3, 4, 5))
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_globalpooling_1d_masking_support(self):
+    model = keras.Sequential()
+    model.add(keras.layers.Masking(mask_value=0., input_shape=(3, 4)))
+    model.add(keras.layers.GlobalAveragePooling1D())
+    model.compile(loss='mae', optimizer=rmsprop.RMSPropOptimizer(0.001))
+
+    model_input = np.random.random((2, 3, 4))
+    model_input[0, 1:, :] = 0
+    output = model.predict(model_input)
+    self.assertAllClose(output[0], model_input[0, 0, :])
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_globalpooling_2d(self):
@@ -172,6 +193,10 @@ class Pooling1DTest(test.TestCase):
             kwargs={'strides': stride,
                     'padding': padding},
             input_shape=(3, 5, 4))
+    testing_utils.layer_test(
+        keras.layers.MaxPooling1D,
+        kwargs={'data_format': 'channels_first'},
+        input_shape=(3, 2, 6))
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_averagepooling_1d(self):
@@ -183,6 +208,11 @@ class Pooling1DTest(test.TestCase):
                     'padding': padding},
             input_shape=(3, 5, 4))
 
+    testing_utils.layer_test(
+        keras.layers.AveragePooling1D,
+        kwargs={'data_format': 'channels_first'},
+        input_shape=(3, 2, 6))
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index a1933c11b0..d19d0b5f8c 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -587,6 +587,9 @@ class Bidirectional(Wrapper):
       output = y * y_rev
     elif self.merge_mode is None:
       output = [y, y_rev]
+    else:
+      raise ValueError(
+          'Unrecognized value for `merge_mode`: %s' % (self.merge_mode))
 
     # Properly set learning phase
     if (getattr(y, '_uses_learning_phase', False) or
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 501b50ba5f..2fae094a1e 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -166,8 +166,9 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
     if expected_dim is not None:
       if expected_dim != actual_dim:
         raise AssertionError(
-            'When testing layer %s, for input %s, found output_shape='
-            '%s but expected to find %s.\nFull kwargs: %s' %
+            'When testing layer %s **after deserialization**, '
+            'for input %s, found output_shape='
+            '%s but expected to find inferred shape %s.\nFull kwargs: %s' %
             (layer_cls.__name__,
              x,
              actual_output_shape,
diff --git a/tensorflow/python/keras/utils/conv_utils.py b/tensorflow/python/keras/utils/conv_utils.py
index 8ebca1418d..f486e631e5 100644
--- a/tensorflow/python/keras/utils/conv_utils.py
+++ b/tensorflow/python/keras/utils/conv_utils.py
@@ -137,26 +137,49 @@ def conv_input_length(output_length, filter_size, padding, stride):
   return (output_length - 1) * stride - 2 * pad + filter_size
 
 
-def deconv_output_length(input_length, filter_size, padding, stride):
+def deconv_output_length(input_length, filter_size, padding,
+                         output_padding=None, stride=0, dilation=1):
   """Determines output length of a transposed convolution given input length.
 
   Arguments:
-      input_length: integer.
-      filter_size: integer.
-      padding: one of "same", "valid", "full".
-      stride: integer.
+      input_length: Integer.
+      filter_size: Integer.
+      padding: one of `"same"`, `"valid"`, `"full"`.
+      output_padding: Integer, amount of padding along the output dimension.
+          Can be set to `None` in which case the output length is inferred.
+      stride: Integer.
+      dilation: Integer.
 
   Returns:
       The output length (integer).
   """
+  assert padding in {'same', 'valid', 'full'}
   if input_length is None:
     return None
-  input_length *= stride
-  if padding == 'valid':
-    input_length += max(filter_size - stride, 0)
-  elif padding == 'full':
-    input_length -= (stride + filter_size - 2)
-  return input_length
+
+  # Get the dilated kernel size
+  filter_size = filter_size + (filter_size - 1) * (dilation - 1)
+
+  # Infer length if output padding is None, else compute the exact length
+  if output_padding is None:
+    if padding == 'valid':
+      length = input_length * stride + max(filter_size - stride, 0)
+    elif padding == 'full':
+      length = input_length * stride - (stride + filter_size - 2)
+    elif padding == 'same':
+      length = input_length * stride
+
+  else:
+    if padding == 'same':
+      pad = filter_size // 2
+    elif padding == 'valid':
+      pad = 0
+    elif padding == 'full':
+      pad = filter_size - 1
+
+    length = ((input_length - 1) * stride + filter_size - 2 * pad +
+              output_padding)
+  return length
 
 
 def normalize_data_format(value):
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils.py b/tensorflow/python/keras/utils/multi_gpu_utils.py
index e1c49bc852..04b2ea8fe3 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils.py
@@ -244,9 +244,24 @@ def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
         for o in range(len(outputs)):
           all_outputs[o].append(outputs[o])
 
+  # Deduplicate output names to handle Siamese networks.
+  occurrences = {}
+  for n in model.output_names:
+    if n not in occurrences:
+      occurrences[n] = 1
+    else:
+      occurrences[n] += 1
+  conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
+  output_names = []
+  for n in model.output_names:
+    if n in conflict_counter:
+      conflict_counter[n] += 1
+      n += '_%d' % conflict_counter[n]
+    output_names.append(n)
+
   # Merge outputs under expected scope.
   with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
     merged = []
-    for name, outputs in zip(model.output_names, all_outputs):
+    for name, outputs in zip(output_names, all_outputs):
       merged.append(concatenate(outputs, axis=0, name=name))
     return Model(model.inputs, merged)
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
index 3d0351a11f..1780ab6587 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py
@@ -198,5 +198,31 @@ class TestMultiGPUModel(test.TestCase):
       parallel_model.compile(loss='mean_squared_error', optimizer='adam')
       parallel_model.train_on_batch(x, y)
 
+  def test_multi_gpu_with_siamese_network(self):
+    gpus = 2
+
+    if not check_if_compatible_devices(gpus=gpus):
+      return
+
+    with self.cached_session():
+      input_shape = (3,)
+      nested_model = keras.models.Sequential([
+          keras.layers.Dense(32, input_shape=input_shape),
+          keras.layers.Dense(1)
+      ], name='nested')
+
+      input1 = keras.Input(input_shape)
+      input2 = keras.Input(input_shape)
+      score1 = nested_model(input1)
+      score2 = nested_model(input2)
+      score_sum = keras.layers.Add(name='add')([score1, score2])
+
+      siamese = keras.models.Model(inputs=[input1, input2],
+                                   outputs=[score_sum, score1, score2],
+                                   name='siamese')
+      parallel_siamese = keras.utils.multi_gpu_model(siamese, gpus)
+      self.assertEqual(parallel_siamese.output_names,
+                       ['add', 'nested_1', 'nested_2'])
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/utils/np_utils.py b/tensorflow/python/keras/utils/np_utils.py
index c24e87308b..3763999bff 100644
--- a/tensorflow/python/keras/utils/np_utils.py
+++ b/tensorflow/python/keras/utils/np_utils.py
@@ -22,7 +22,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 @tf_export('keras.utils.to_categorical')
-def to_categorical(y, num_classes=None):
+def to_categorical(y, num_classes=None, dtype='float32'):
   """Converts a class vector (integers) to binary class matrix.
 
   E.g. for use with categorical_crossentropy.
@@ -31,6 +31,7 @@ def to_categorical(y, num_classes=None):
       y: class vector to be converted into a matrix
           (integers from 0 to num_classes).
       num_classes: total number of classes.
+      dtype: The data type expected by the input. Default: `'float32'`.
 
   Returns:
       A binary matrix representation of the input. The classes axis is placed
@@ -44,7 +45,7 @@ def to_categorical(y, num_classes=None):
   if not num_classes:
     num_classes = np.max(y) + 1
   n = y.shape[0]
-  categorical = np.zeros((n, num_classes), dtype=np.float32)
+  categorical = np.zeros((n, num_classes), dtype=dtype)
   categorical[np.arange(n), y] = 1
   output_shape = input_shape + (num_classes,)
   categorical = np.reshape(categorical, output_shape)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
index 2e9de9ebb2..eb315e356d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "elu"
     argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], "
   }
+  member_method {
+    name: "exponential"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index a71a59e269..9feb7c09b8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -46,7 +46,7 @@ tf_module {
   }
   member_method {
     name: "batch_normalization"
-    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], "
+    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], "
   }
   member_method {
     name: "batch_set_value"
@@ -98,7 +98,7 @@ tf_module {
   }
   member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], "
+    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], "
   }
   member_method {
     name: "conv3d"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index c3dd2ad046..014f5828fa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index c440604aae..a6e4856de9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 065bb4d35b..381839d6de 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index c7ba6056f9..2933f9f4b3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 8f4f7918ab..9c9c7461c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 93c442bd55..44ca598724 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 5ea61d118d..a8094c0bde 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 11dca17c6d..3ebe162f57 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 278429af6f..c0a53b847b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 935a69ab2f..ff6c6f3ec4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 238d96cca6..d26da270e7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index 4a45bf7997..524c5fd69e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
index 81b91d2780..138d97b11f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt
@@ -70,6 +70,6 @@ tf_module {
   }
   member_method {
     name: "to_categorical"
-    argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
index 2e9de9ebb2..eb315e356d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "elu"
     argspec: "args=[\'x\', \'alpha\'], varargs=None, keywords=None, defaults=[\'1.0\'], "
   }
+  member_method {
+    name: "exponential"
+    argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index a71a59e269..9feb7c09b8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -46,7 +46,7 @@ tf_module {
   }
   member_method {
     name: "batch_normalization"
-    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'0.001\'], "
+    argspec: "args=[\'x\', \'mean\', \'var\', \'beta\', \'gamma\', \'axis\', \'epsilon\'], varargs=None, keywords=None, defaults=[\'-1\', \'0.001\'], "
   }
   member_method {
     name: "batch_set_value"
@@ -98,7 +98,7 @@ tf_module {
   }
   member_method {
     name: "conv2d_transpose"
-    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\'], "
+    argspec: "args=[\'x\', \'kernel\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilation_rate\'], varargs=None, keywords=None, defaults=[\'(1, 1)\', \'valid\', \'None\', \'(1, 1)\'], "
   }
   member_method {
     name: "conv3d"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index c3dd2ad046..014f5828fa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index c440604aae..a6e4856de9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index 065bb4d35b..381839d6de 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index c7ba6056f9..2933f9f4b3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 8f4f7918ab..9c9c7461c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'dilation_rate\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1)\', \'valid\', \'None\', \'None\', \'(1, 1)\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 93c442bd55..44ca598724 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'filters\', \'kernel_size\', \'strides\', \'padding\', \'output_padding\', \'data_format\', \'activation\', \'use_bias\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'(1, 1, 1)\', \'valid\', \'None\', \'None\', \'None\', \'True\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 5ea61d118d..a8094c0bde 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 11dca17c6d..3ebe162f57 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
@@ -111,7 +111,7 @@ tf_class {
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 278429af6f..c0a53b847b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 935a69ab2f..ff6c6f3ec4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 238d96cca6..d26da270e7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index 4a45bf7997..524c5fd69e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -83,7 +83,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'None\'], "
+    argspec: "args=[\'self\', \'pool_size\', \'strides\', \'padding\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'2\', \'None\', \'valid\', \'channels_last\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
index 81b91d2780..138d97b11f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt
@@ -70,6 +70,6 @@ tf_module {
   }
   member_method {
     name: "to_categorical"
-    argspec: "args=[\'y\', \'num_classes\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'y\', \'num_classes\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'float32\'], "
   }
 }
-- 
GitLab


From 5474ecfc623a826ca307e75057732ba190660d67 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Mon, 8 Oct 2018 10:53:44 -0700
Subject: [PATCH 0509/1085] Address additional comments

---
 RELEASE.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index cb020d0310..58d918895c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -2,14 +2,15 @@
 
 ## Major Features and Improvements
 * Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving.
-* Keras model now supports evaluating with a `tf.data.Dataset`.
+* Keras models now support evaluating with a `tf.data.Dataset`.
 * TensorFlow binaries are built with XLA support linked in by default.
 
 ## Bug Fixes and Other Changes
 
 * tf.data:
   * tf.data users can now represent, get, and set options of TensorFlow input pipelines using `tf.data.Options()`, `tf.data.Dataset.options()`, and `tf.data.Dataset.with_options()` respectively.
-* tf.data users can now use `tf.data.experimental.AUTOTUNE` for the num_parallel_calls argument of `tf.data.Dataset.interleave`, `tf.data.Dataset.map`, and `tf.data.experimental.map_and_batch` to dynamically tune the level of parallelism at runtime.
+  * New `tf.data.Dataset.reduce()` API allows users to reduce a finite dataset to a single element using a user-provided reduce function.
+  * New `tf.data.Dataset.window()` API allows users to create finite windows of input dataset; when combined with the `tf.data.Dataset.reduce()` API, this allows users to implement customized batching.
   * All C++ code moves to the `tensorflow::data` namespace.
   * Add support for `num_parallel_calls` to `tf.data.Dataset.interleave`.
 * `tf.contrib`:
@@ -18,7 +19,7 @@
 * `tf.contrib.data`
   * Deprecate, and replace by tf.data.experimental.
 * Other:
-  * Instead of jemalloc, revert back to using system malloc.
+  * Instead of jemalloc, revert back to using system malloc since it simplifies build and has comparable performance.
   * Remove integer types from `tf.nn.softplus` and `tf.nn.softsign` OpDefs. This is a bugfix; these ops were never meant to support integers.
   * Allow subslicing Tensors with a single dimension.
   * Add option to calculate string length in Unicode characters
-- 
GitLab


From d1588d72a820423cab36977ca97221aba01be713 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 8 Oct 2018 10:43:03 -0700
Subject: [PATCH 0510/1085] Add a utility that allows finding a name for an
 entity, relative to an existing namespace.

PiperOrigin-RevId: 216211286
---
 .../python/autograph/pyct/inspect_utils.py    | 34 +++++++++++++++++++
 .../autograph/pyct/inspect_utils_test.py      | 19 +++++++++++
 2 files changed, 53 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 1416988ea3..29c406c248 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -67,6 +67,40 @@ def getnamespace(f):
   return namespace
 
 
+def getqualifiedname(namespace, object_, max_depth=2):
+  """Returns the name by which a value can be referred to in a given namespace.
+
+  This function will recurse inside modules, but it will not search objects for
+  attributes. The recursion depth is controlled by max_depth.
+
+  Args:
+    namespace: Dict[str, Any], the namespace to search into.
+    object_: Any, the value to search.
+    max_depth: Optional[int], a limit to the recursion depth when searching
+        inside modules.
+  Returns: Union[str, None], the fully-qualified name that resolves to the value
+      o, or None if it couldn't be found.
+  """
+  for name, value in namespace.items():
+    # The value may be referenced by more than one symbol, case in which
+    # any symbol will be fine. If the program contains symbol aliases that
+    # change over time, this may capture a symbol that will later point to
+    # something else.
+    # TODO(mdan): Prefer the symbol that matches the value type name.
+    if object_ is value:
+      return name
+
+  # TODO(mdan): Use breadth-first search and avoid visiting modules twice.
+  if max_depth:
+    for name, value in namespace.items():
+      if tf_inspect.ismodule(value):
+        name_in_module = getqualifiedname(value.__dict__, object_,
+                                          max_depth - 1)
+        if name_in_module is not None:
+          return '{}.{}'.format(name, name_in_module)
+  return None
+
+
 def _get_unbound_function(m):
   # TODO(mdan): Figure out why six.get_unbound_function fails in some cases.
   # The failure case is for tf.keras.Model.
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index f3eb027822..11074debfc 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from functools import wraps
+import imp
 
 import six
 
@@ -127,6 +128,24 @@ class InspectUtilsTest(test.TestCase):
     self.assertEqual(ns['closed_over_primitive'], closed_over_primitive)
     self.assertTrue('local_var' not in ns)
 
+  def test_getqualifiedname(self):
+    foo = object()
+    qux = imp.new_module('quxmodule')
+    bar = imp.new_module('barmodule')
+    baz = object()
+    bar.baz = baz
+
+    ns = {
+        'foo': foo,
+        'bar': bar,
+        'qux': qux,
+    }
+
+    self.assertIsNone(inspect_utils.getqualifiedname(ns, inspect_utils))
+    self.assertEqual(inspect_utils.getqualifiedname(ns, foo), 'foo')
+    self.assertEqual(inspect_utils.getqualifiedname(ns, bar), 'bar')
+    self.assertEqual(inspect_utils.getqualifiedname(ns, baz), 'bar.baz')
+
   def test_getmethodclass(self):
 
     self.assertEqual(
-- 
GitLab


From 0691d49fb6e15740b8ddf8019fea4edb91bca914 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 10:43:54 -0700
Subject: [PATCH 0511/1085] Convert TensorFlow's nasm dependency to new third
 party import method.

PiperOrigin-RevId: 216211467
---
 tensorflow/workspace.bzl                        | 15 ++-------------
 third_party/nasm/BUILD                          |  1 +
 third_party/{nasm.BUILD => nasm/BUILD.bazel}    | 12 ++++++------
 .../nasm.BUILD => nasm/BUILD.system}            |  0
 third_party/nasm/workspace.bzl                  | 17 +++++++++++++++++
 5 files changed, 26 insertions(+), 19 deletions(-)
 create mode 100644 third_party/nasm/BUILD
 rename third_party/{nasm.BUILD => nasm/BUILD.bazel} (100%)
 rename third_party/{systemlibs/nasm.BUILD => nasm/BUILD.system} (100%)
 create mode 100644 third_party/nasm/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6f5aa85b01..adeac62e43 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -23,11 +23,13 @@ load(
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
+load("//third_party/nasm:workspace.bzl", nasm = "repo")
 
 def initialize_third_party():
     flatbuffers()
     icu()
     jpeg()
+    nasm()
 
 # Sanitize a dependency so that it works correctly from code that includes
 # TensorFlow as a submodule.
@@ -235,19 +237,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "nasm",
-        build_file = clean_dep("//third_party:nasm.BUILD"),
-        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
-        strip_prefix = "nasm-2.13.03",
-        system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"),
-        urls = [
-            "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
-            "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
-            "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
-        ],
-    )
-
     tf_http_archive(
         name = "png_archive",
         build_file = clean_dep("//third_party:png.BUILD"),
diff --git a/third_party/nasm/BUILD b/third_party/nasm/BUILD
new file mode 100644
index 0000000000..e3aec1fce9
--- /dev/null
+++ b/third_party/nasm/BUILD
@@ -0,0 +1 @@
+# Needed to make this a package.
diff --git a/third_party/nasm.BUILD b/third_party/nasm/BUILD.bazel
similarity index 100%
rename from third_party/nasm.BUILD
rename to third_party/nasm/BUILD.bazel
index d746a65e7e..c68d713946 100644
--- a/third_party/nasm.BUILD
+++ b/third_party/nasm/BUILD.bazel
@@ -137,12 +137,6 @@ cc_binary(
         ":windows": ["config/msvc.h"],
         "//conditions:default": [],
     }),
-    includes = [
-        "asm",
-        "include",
-        "output",
-        "x86",
-    ],
     copts = select({
         ":windows": [],
         "//conditions:default": [
@@ -157,6 +151,12 @@ cc_binary(
             "HAVE_SYS_TYPES_H",
         ],
     }),
+    includes = [
+        "asm",
+        "include",
+        "output",
+        "x86",
+    ],
     visibility = ["@jpeg//:__pkg__"],
 )
 
diff --git a/third_party/systemlibs/nasm.BUILD b/third_party/nasm/BUILD.system
similarity index 100%
rename from third_party/systemlibs/nasm.BUILD
rename to third_party/nasm/BUILD.system
diff --git a/third_party/nasm/workspace.bzl b/third_party/nasm/workspace.bzl
new file mode 100644
index 0000000000..6d50f6fcad
--- /dev/null
+++ b/third_party/nasm/workspace.bzl
@@ -0,0 +1,17 @@
+"""loads the nasm library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "nasm",
+        urls = [
+            "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+            "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2",
+            "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2",
+        ],
+        sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011",
+        strip_prefix = "nasm-2.13.03",
+        build_file = "//third_party/nasm:BUILD.bazel",
+        system_build_file = "//third_party/nasm:BUILD.system",
+    )
-- 
GitLab


From 3f0155133d668cf6cee1f1fb362d2a75c04836e3 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 8 Oct 2018 10:52:15 -0700
Subject: [PATCH 0512/1085] Fix support for a single tensor to be passed to
 target_tensors

PiperOrigin-RevId: 216212953
---
 tensorflow/python/keras/engine/training.py             | 6 ++++--
 tensorflow/python/keras/engine/training_distributed.py | 4 ----
 tensorflow/python/keras/engine/training_test.py        | 4 ++++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 2ebb4cf99f..ff2ae54ad4 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -563,9 +563,11 @@ class Model(Network):
         for name in self.output_names:
           tmp_target_tensors.append(target_tensors.get(name, None))
         target_tensors = tmp_target_tensors
+      elif tensor_util.is_tensor(target_tensors):
+        target_tensors = [target_tensors]
       else:
-        raise TypeError('Expected `target_tensors` to be '
-                        'a list or dict, but got:', target_tensors)
+        raise TypeError('Expected `target_tensors` to be a list or tuple or '
+                        'dict or a single tensor, but got:', target_tensors)
 
     for i in range(len(self.outputs)):
       if i in skip_target_indices:
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 04e8d079c0..ac759ef3aa 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -820,10 +820,6 @@ def _clone_and_build_model(model, inputs=None, targets=None):
     optimizer_config = model.optimizer.get_config()
     optimizer = model.optimizer.__class__.from_config(optimizer_config)
 
-  # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
-  # single tensor should be OK but it throws an error in that case.
-  if targets is not None and not isinstance(targets, (list, dict, tuple)):
-    targets = [targets]
   if isinstance(targets, tuple):
     targets = nest.flatten(targets)
   cloned_model.compile(
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 54ad74c08b..868fd1dc69 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -1865,6 +1865,10 @@ class TestTrainingWithDataTensors(test.TestCase):
       model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target])
       model.train_on_batch(input_val, None)
 
+      # single-output, as single tensor
+      model.compile(optimizer='rmsprop', loss='mse', target_tensors=target)
+      model.train_on_batch(input_val, None)
+
       # single-output, as dict
       model.compile(optimizer='rmsprop', loss='mse',
                     target_tensors={'dense': target})
-- 
GitLab


From 7d92890cb215f2f563fac96f1e3bde712a8749f8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 11:18:12 -0700
Subject: [PATCH 0513/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216217887
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 0753316724..9df0ece69b 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -28980,6 +28980,74 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "LeakyRelu"
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "activations"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
+op {
+  name: "LeakyReluGrad"
+  input_arg {
+    name: "gradients"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "features"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "backprops"
+    type_attr: "T"
+  }
+  attr {
+    name: "alpha"
+    type: "float"
+    default_value {
+      f: 0.2
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+}
 op {
   name: "LearnedUnigramCandidateSampler"
   input_arg {
-- 
GitLab


From 1221a8e38a402513560ee71e6982b7cd8b6d901b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 11:54:12 -0700
Subject: [PATCH 0514/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216224026

---
 tensorflow/go/op/wrappers.go | 228 +++++++++++++++++------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 5d17605e37..fe99915a6c 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -7221,6 +7221,45 @@ func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.
 	return components
 }
 
+// Deprecated. Use TensorArrayGradV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
+func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayWriteV2",
+		Input: []tf.Input{
+			handle, index, value, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Writes the given dataset to the given file using the TFRecord format.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to write.
+//	filename: A scalar string tensor representing the filename to use.
+//	compression_type: A scalar string tensor containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//
+// Returns the created operation.
+func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetToTFRecord",
+		Input: []tf.Input{
+			input_dataset, filename, compression_type,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Computes rectified linear 6: `min(max(features, 0), 6)`.
 func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
 	if scope.Err() != nil {
@@ -8251,44 +8290,6 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt
 	return op.Output(0)
 }
 
-// Bucketizes 'input' based on 'boundaries'.
-//
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
-//
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
-//
-// Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
-//
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
-	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
 type FusedBatchNormV2Attr func(optionalAttr)
 
@@ -10980,6 +10981,44 @@ func Tan(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// Bucketizes 'input' based on 'boundaries'.
+//
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
+//
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
+//
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // EncodeJpegAttr is an optional argument to EncodeJpeg.
 type EncodeJpegAttr func(optionalAttr)
 
@@ -21413,43 +21452,6 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Computes the minimum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the min is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMin",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
 type SdcaOptimizerAttr func(optionalAttr)
 
@@ -21924,6 +21926,43 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Computes the minimum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the min is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMin",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the sum along segments of a tensor.
 //
 // Read
@@ -29878,28 +29917,6 @@ func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
 	return op.Output(0)
 }
 
-// Writes the given dataset to the given file using the TFRecord format.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to write.
-//	filename: A scalar string tensor representing the filename to use.
-//	compression_type: A scalar string tensor containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//
-// Returns the created operation.
-func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetToTFRecord",
-		Input: []tf.Input{
-			input_dataset, filename, compression_type,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // AvgPool3DAttr is an optional argument to AvgPool3D.
 type AvgPool3DAttr func(optionalAttr)
 
@@ -31692,23 +31709,6 @@ func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayGradV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
-func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV2",
-		Input: []tf.Input{
-			handle, index, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SparseReduceMaxAttr is an optional argument to SparseReduceMax.
 type SparseReduceMaxAttr func(optionalAttr)
 
-- 
GitLab


From 723fd1245ed650ad07e5049faec021f4f0f6d408 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 8 Oct 2018 12:03:09 -0700
Subject: [PATCH 0515/1085] Fix the steps_per_epoch when training on mnist

PiperOrigin-RevId: 216225505
---
 tensorflow/contrib/distribute/python/examples/keras_mnist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/python/examples/keras_mnist.py b/tensorflow/contrib/distribute/python/examples/keras_mnist.py
index a84ef04196..da7f8c548f 100644
--- a/tensorflow/contrib/distribute/python/examples/keras_mnist.py
+++ b/tensorflow/contrib/distribute/python/examples/keras_mnist.py
@@ -113,7 +113,7 @@ def main(_):
                 distribute=strategy)
 
   # Train the model with the train dataset.
-  model.fit(x=train_ds, epochs=20, steps_per_epoch=310)
+  model.fit(x=train_ds, epochs=20, steps_per_epoch=468)
 
   # Evaluate the model with the eval dataset.
   score = model.evaluate(eval_ds, steps=10, verbose=0)
-- 
GitLab


From dcd3b4307a3095e3f18aef53f5034787e3cc3af6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:09:54 -0700
Subject: [PATCH 0516/1085] Remove the restrictions that constant resolution of
 reduce_sum operators must be on axis 0, and can only be on 1 or 2-d inputs.

PiperOrigin-RevId: 216226776
---
 .../resolve_constant_unary.cc                 |  93 +++++++++---
 .../toco/graph_transformations/tests/BUILD    |  13 ++
 .../tests/resolve_constant_unary_test.cc      | 140 ++++++++++++++++++
 3 files changed, 229 insertions(+), 17 deletions(-)
 create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index c698a9567a..5364eebbc9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -27,6 +27,73 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace toco {
+namespace {
+
+// Using the function reducer, reduce input along all axes in axes.
+// Put the reduced data in output, which should aleady be appropriately sized.
+// check_output_shape is set to what this code computes the final shape
+// to be, so it can be cross checked with the shape computation logic.
+void ReduceGeneric(bool keep_dims, const std::vector<int>& axes,
+                   const Shape& input_shape, const std::vector<float>& input,
+                   Shape* check_output_shape, std::vector<float>* output,
+                   const std::function<float(float, float)>& reducer) {
+  if (!IsNonEmpty(input_shape)) {
+    // Zero-dimensions will break the NextIndices() logic, so just early out if
+    // we have an empty shape.
+    return;
+  }
+
+  // Set up output_shape to be the same length as input_shape, with
+  // appropriate dimensions squashed to 1.  If keep_dims is false, we'll strip
+  // out the one dimensions at the end, but it's convenient to leave them for
+  // now.  We recompute the shape because we need the output shape to have
+  // 1-dims in all the squashed dimensions; the shape from shape computation may
+  // remove those squashed dimensions, depending on the options used.
+  Shape output_shape = input_shape;
+
+  // Reduction mask will be elementwise multiplied against the input
+  // indices to figure out the output index for the element.
+  std::vector<int> reduction_mask(input_shape.dimensions_count(), 1);
+  for (int axis : axes) {
+    CHECK_GE(axis, 0);
+    CHECK_LT(axis, input_shape.dimensions_count());
+    reduction_mask[axis] = 0;
+    output_shape.mutable_dims()->at(axis) = 1;
+  }
+
+  std::vector<int> output_indices(input_shape.dimensions_count());
+  for (int input_offset = 0; input_offset < input.size(); ++input_offset) {
+    std::vector<int> input_indices = ReverseOffset(input_shape, input_offset);
+    // Calculate the output location by squashing input indices to 0
+    // in reduced axes.
+    for (int i = 0; i < input_shape.dimensions_count(); ++i) {
+      output_indices[i] = input_indices[i] * reduction_mask[i];
+    }
+    int output_offset = Offset(output_shape, output_indices);
+    if (input_indices == output_indices) {
+      // Base element for the reduced axes
+      output->at(output_offset) = input.at(input_offset);
+    } else {
+      // Reduce with existing element.
+      output->at(output_offset) =
+          reducer(output->at(output_offset), input.at(input_offset));
+    }
+  }
+
+  if (!keep_dims) {
+    // Strip out the dims from output_shape.
+    std::vector<int> new_dims;
+    for (int i = 0; i < output_shape.dimensions_count(); ++i) {
+      if (reduction_mask[i]) {
+        new_dims.push_back(output_shape.dims(i));
+      }
+    }
+    output_shape.mutable_dims()->swap(new_dims);
+  }
+  *check_output_shape = output_shape;
+}
+
+}  // namespace
 
 bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
   auto& output_array = model->GetArray(op.outputs[0]);
@@ -176,27 +243,19 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
     auto& axis_array = model->GetArray(unary_op->inputs[1]);
     CHECK(axis_array.data_type == ArrayDataType::kInt32);
-    int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
-    CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds";
 
-    // We currently only handle reduction on axis 0.
-    CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported";
-    // We currently only handle 1-D and 2-D input tensors.
-    CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported";
     // We only support keep_dims=true; shape prop will need to change otherwise.
     auto sum_op = static_cast<const TensorFlowSumOperator*>(unary_op);
-    CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported";
+    Shape check_output_shape;
 
-    std::vector<int> indices(input_shape.dimensions_count());
-    for (int i = 0; i < input_shape.dims(1); ++i) {
-      indices[1] = i;
-      float sum = 0.f;
-      for (int j = 0; j < input_shape.dims(0); ++j) {
-        indices[0] = j;
-        sum += (*input_float_data)[Offset(input_shape, indices)];
-      }
-      output_float_data[i] = sum;
-    }
+    ReduceGeneric(
+        sum_op->keep_dims, axis_array.GetBuffer<ArrayDataType::kInt32>().data,
+        input_shape, *input_float_data, &check_output_shape, &output_float_data,
+        [](float existing, float current) -> float {
+          return existing + current;
+        });
+    CHECK(check_output_shape == output_shape)
+        << "Shape propagation output shape doesn't match output shape from op";
   } else if (unary_op->type == OperatorType::kReduceMin) {
     // At the moment only full reduction across all dimensions is supported.
     // TODO(starka): Output should not be padded.
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
index acf1e3ede5..6f1be298ca 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD
@@ -30,3 +30,16 @@ tf_cc_test(
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+tf_cc_test(
+    name = "resolve_constant_unary_test",
+    srcs = ["resolve_constant_unary_test.cc"],
+    tags = ["no_oss"],
+    deps = [
+        "//tensorflow/contrib/lite/toco:graph_transformations",
+        "//tensorflow/contrib/lite/toco:model",
+        "//tensorflow/contrib/lite/toco:tooling_util",
+        "@com_google_absl//absl/memory",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
new file mode 100644
index 0000000000..a53abc9941
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
@@ -0,0 +1,140 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <tuple>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "absl/memory/memory.h"
+#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h"
+#include "tensorflow/contrib/lite/toco/model.h"
+#include "tensorflow/contrib/lite/toco/tooling_util.h"
+
+namespace toco {
+
+namespace {
+
+void RunResolveSum(const std::vector<float>& input,
+                   const std::vector<int>& input_shape,
+                   const std::vector<int>& axis,
+                   const std::vector<int>& output_shape,
+                   const std::vector<float>& expected_output) {
+  Model model;
+  Array& input0 = model.GetOrCreateArray("input0");
+  Array& input1 = model.GetOrCreateArray("input1");
+  Array& output = model.GetOrCreateArray("output");
+
+  *input0.mutable_shape()->mutable_dims() = input_shape;
+  input0.data_type = ArrayDataType::kFloat;
+  input0.GetMutableBuffer<ArrayDataType::kFloat>().data = input;
+
+  *input1.mutable_shape()->mutable_dims() = {static_cast<int>(axis.size())};
+  input1.GetMutableBuffer<ArrayDataType::kInt32>().data = axis;
+  input1.data_type = ArrayDataType::kInt32;
+
+  *output.mutable_shape()->mutable_dims() = output_shape;
+
+  auto sum_op = absl::make_unique<TensorFlowSumOperator>();
+  sum_op->keep_dims = true;
+  sum_op->inputs = {"input0", "input1"};
+  sum_op->outputs = {"output"};
+  model.operators.push_back(std::move(sum_op));
+  ResolveConstantUnaryOperator().Run(&model, 0);
+  EXPECT_EQ(model.GetArray("output").GetBuffer<ArrayDataType::kFloat>().data,
+            expected_output);
+  EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape);
+}
+
+// Reduce a 2d array across axis 0
+TEST(ResolveConstantUnary, ResolveSumAxis0_2D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {3, 1, 4, 1,
+       5, 9, 2, 6,
+       5, 3, 5, 8},
+
+      // Input shape
+      {3, 4},
+
+      // Axes
+      {0},
+
+      // Expected output shape,
+      {1, 4},
+
+      // Expected output
+      {13, 13, 11, 15});
+  // clang-format on
+}
+
+// Reduce a 2d array across axis 1
+TEST(ResolveConstantUnary, ResolveSumAxis1_2D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {3, 1, 4, 1,
+       5, 9, 2, 6,
+       5, 3, 5, 8},
+
+      // Input shape
+      {3, 4},
+
+      // Axes
+      {1},
+
+      // Expected output shape,
+      {3, 1},
+
+      // Expected output
+      {9, 22, 21});
+  // clang-format on
+}
+
+// Reduce a 3d tensor across axes 0 and 2.
+TEST(ResolveConstantUnary, ResolveSumAxis0_2_3D) {
+  // clang-format off
+  RunResolveSum(
+      // Input data
+      {  0,   1,   2,
+         3,  10,  11,
+        12,  13,  20,
+        21,  22,  23,
+
+       100, 101, 102,
+       103, 110, 111,
+       112, 113, 120,
+       121, 122, 123,
+
+       200, 201, 202,
+       203, 210, 211,
+       212, 213, 220,
+       221, 222, 223 },
+
+      // Input shape
+      {3, 4, 3},
+
+      // Axes
+      {0, 2},
+
+      // Expected output shape,
+      {1, 4, 1},
+
+      // Expected output, generated using octave.
+      { 909, 972, 1035, 1098});
+  // clang-format on
+}
+
+}  // namespace
+}  // namespace toco
-- 
GitLab


From d3595b1534a855f3d0da35d3f1dd8b5d464b1b70 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:34:00 -0700
Subject: [PATCH 0517/1085] Fix a couple of reference leaks

PiperOrigin-RevId: 216230391
---
 tensorflow/python/pywrap_tfe.i | 1 +
 tensorflow/python/util/util.cc | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index 61e0abbfcb..adbce95c6f 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -209,6 +209,7 @@ limitations under the License.
     SWIG_fail;
   } else {
     int num_outputs = $1->size();
+    Py_CLEAR($result);
     $result = PyList_New(num_outputs);
     for (int i = 0; i < num_outputs; ++i) {
       PyObject *output;
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 7b3e618e84..11eb9ce947 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -825,18 +825,16 @@ PyObject* IsNamedtuple(PyObject* o, bool strict) {
 }
 
 PyObject* SameNamedtuples(PyObject* o1, PyObject* o2) {
-  PyObject* f1 = PyObject_GetAttrString(o1, "_fields");
-  PyObject* f2 = PyObject_GetAttrString(o2, "_fields");
+  Safe_PyObjectPtr f1 = make_safe(PyObject_GetAttrString(o1, "_fields"));
+  Safe_PyObjectPtr f2 = make_safe(PyObject_GetAttrString(o2, "_fields"));
   if (f1 == nullptr || f2 == nullptr) {
-    Py_XDECREF(f1);
-    Py_XDECREF(f2);
     PyErr_SetString(
         PyExc_RuntimeError,
         "Expected namedtuple-like objects (that have _fields attr)");
     return nullptr;
   }
 
-  if (PyObject_RichCompareBool(f1, f2, Py_NE)) {
+  if (PyObject_RichCompareBool(f1.get(), f2.get(), Py_NE)) {
     Py_RETURN_FALSE;
   }
 
-- 
GitLab


From 9b558126e31d25ec4e82cb4f50033d6eca44349a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 12:58:29 -0700
Subject: [PATCH 0518/1085] Add timeout mechanism to Grappler meta optimizer.
 This is only a best-effort mechanism, since the meta optimizer only checks if
 it has been cancelled before running each sub-optimizer. We can add
 cancellation to each sub-optimizer if necessary.

PiperOrigin-RevId: 216234262
---
 .../grappler/optimizers/graph_optimizer.h     | 21 ++++++
 .../grappler/optimizers/meta_optimizer.cc     | 68 ++++++++++++++++++-
 .../core/grappler/optimizers/meta_optimizer.h | 15 +++-
 .../optimizers/meta_optimizer_test.cc         | 62 +++++++++++++++++
 .../core/protobuf/rewriter_config.proto       |  4 ++
 5 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index 765dd13263..bd6bf9f860 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -16,8 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 
+#include <atomic>
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -29,6 +32,7 @@ struct GrapplerItem;
 // optimization of a GrapplerItem for running on a cluster.
 class GraphOptimizer {
  public:
+  GraphOptimizer() : is_cancelled_(false) {}
   virtual ~GraphOptimizer() {}
 
   virtual string name() const = 0;
@@ -45,8 +49,25 @@ class GraphOptimizer {
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
+
+  // Best effort cancellation. Sets is_cancelled to true and requests that the
+  // optimizer returns as soon as possible from active calls to Optimize() or
+  // FeedBack().
+  void Cancel() { is_cancelled_ = true; }
+
+  bool is_cancelled() const { return is_cancelled_; }
+
+ private:
+  std::atomic<bool> is_cancelled_;
 };
 
+#define GRAPPLER_RETURN_IF_CANCELLED()                                  \
+  do {                                                                  \
+    if (is_cancelled()) {                                               \
+      return errors::DeadlineExceeded(this->name(), " was cancelled."); \
+    }                                                                   \
+  } while (0)
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 3f33b16ba8..7488cedec5 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
+
+#include <memory>
+
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
@@ -37,7 +40,11 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/notification.h"
+#include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -115,6 +122,21 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 #undef MK_OPT
 
+MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
+    : cpu_device_(cpu_device), cfg_(cfg) {
+  // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs()
+  // if we want to the threadpool for parallelizing Grappler
+  const int kNumThreads = 1;
+  thread_pool_ = absl::make_unique<thread::ThreadPool>(
+      Env::Default(), "MetaOptimizerThreadPool", kNumThreads);
+}
+
+MetaOptimizer::~MetaOptimizer() {
+  // The ThreadPool destructor waits for threads to finish, so we don't
+  // pull the rug out from under them.
+  thread_pool_.reset();
+}
+
 Status MetaOptimizer::InitializeOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   if (cfg_.disable_meta_optimizer()) {
@@ -310,6 +332,7 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
 
     VLOG(4) << "Starting optimization iteration " << iteration;
     for (const auto& optimizer : optimizers) {
+      GRAPPLER_RETURN_IF_CANCELLED();
       // Some optimizers can run only once.
       if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue;
       // Some must run only on the last iteration.
@@ -368,6 +391,7 @@ Status MetaOptimizer::RunOptimizer(
   // resets optimized_graph to an empty graph.
   optimized_graph->Swap(&optimized_item->graph);
   *optimized_graph = GraphDef();
+  // TODO(rmlarsen): Add timeout for individual optimizers.
   Status status =
       optimizer->Optimize(cluster, *optimized_item, optimized_graph);
   uint64 end_us = Env::Default()->NowMicros();
@@ -389,14 +413,15 @@ Status MetaOptimizer::RunOptimizer(
   return status;
 }
 
-Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-                               GraphDef* optimized_graph) {
+Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary(
+    Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) {
   VLOG(1) << "Starting optimization for grappler item: " << item.id;
   optimization_results_.clear();
 
   // 1. Optimize main graph
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
+  GRAPPLER_RETURN_IF_CANCELLED();
 
   // Skip optimizing functions if this is a TPU graph. Currently, Grappler
   // passes do not handle TPU functions correctly in a variety of ways (Note
@@ -432,6 +457,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     optimize_function_library = false;
 
     for (const FunctionDef& func : optimized_graph->library().function()) {
+      GRAPPLER_RETURN_IF_CANCELLED();
+
       const string& func_name = func.signature().name();
 
       // Skip already optimized functions.
@@ -506,6 +533,43 @@ void MetaOptimizer::PrintResult() {
   }
 }
 
+Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
+                               GraphDef* optimized_graph) {
+  const int64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000;
+  const int64 timeout_usec = (cfg_.meta_optimizer_timeout_ms() == 0
+                                  ? kFiveMinutesInUsec
+                                  : cfg_.meta_optimizer_timeout_ms() * 1000);
+  if (timeout_usec < 0) {
+    return OptimizeMainGraphAndFunctionLibrary(cluster, item, optimized_graph);
+  }
+
+  GraphDef optimized_with_timeout;
+  Status status;
+  Notification done;
+  thread_pool_->Schedule(
+      [this, cluster, &done, &optimized_with_timeout, &item, &status]() {
+        status = this->OptimizeMainGraphAndFunctionLibrary(
+            cluster, item, &optimized_with_timeout);
+        done.Notify();
+      });
+
+  const bool notified = WaitForNotificationWithTimeout(&done, timeout_usec);
+  if (notified && status.ok()) {
+    optimized_graph->Swap(&optimized_with_timeout);
+  } else {
+    *optimized_graph = item.graph;
+    if (!notified) {
+      this->Cancel();
+      done.WaitForNotification();
+      status = errors::DeadlineExceeded(
+          "Grappler MetaOptimizer timed out after ",
+          static_cast<float>(timeout_usec) / (1000 * 1000), " seconds");
+      LOG(WARNING) << status.error_message();
+    }
+  }
+  return status;
+}
+
 void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
                              const GraphDef& pruned_graph, double result) {
   // Nothing to do for MetaOptimizer.
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 99a0a33ffa..35d6a4559b 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
@@ -28,9 +29,8 @@ namespace grappler {
 // Run the other grappler optimizers based on the specified rewriter config.
 class MetaOptimizer : public GraphOptimizer {
  public:
-  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
-      : cpu_device_(cpu_device), cfg_(cfg) {}
-  ~MetaOptimizer() override = default;
+  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg);
+  ~MetaOptimizer();
 
   string name() const override { return "meta_optimizer"; };
 
@@ -65,9 +65,18 @@ class MetaOptimizer : public GraphOptimizer {
   Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
                        GraphDef* optimized_graph);
 
+  // Run optimization passes over the main graph and for functions in the
+  // function library.
+  Status OptimizeMainGraphAndFunctionLibrary(Cluster* cluster,
+                                             const GrapplerItem& item,
+                                             GraphDef* optimized_graph);
+
   DeviceBase* const cpu_device_;  // may be NULL
   RewriterConfig cfg_;
 
+  // Thread pool used for launching optimizers asynchronously.
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
+
   struct OptimizerResult {
     string optimizer_name;
     string result;
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 3f3f43382f..7f1dd91f09 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -461,6 +461,68 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
   EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
 }
 
+class SleepingOptimizer : public CustomGraphOptimizer {
+ public:
+  SleepingOptimizer() {}
+  string name() const override { return "test_optimizer"; }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override {
+    *optimized_graph = item.graph;
+    optimized_graph->add_node();
+    sleep(1);
+    return Status::OK();
+  }
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+};
+
+REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer);
+
+TEST_F(MetaOptimizerTest, OptimizerTimesOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+  GraphDef output;
+  const Status status = optimizer.Optimize(nullptr, item, &output);
+  EXPECT_EQ(status.error_message(),
+            "Grappler MetaOptimizer timed out after 1.5 seconds");
+  // Make sure the graph was reverted to the original regardless of when the
+  // optimizer timed out.
+  CompareGraphs(item.graph, output);
+}
+
+TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE);
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+  GraphDef output;
+  const Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_EQ(item.graph.node_size() + 1, output.node_size());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 8c31468ff5..7ccd54b818 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -83,6 +83,10 @@ message RewriterConfig {
   // Controls how many times we run the optimizers in meta optimizer (default
   // is once).
   NumIterationsType meta_optimizer_iterations = 12;
+  // Maximum number of milliseconds to spend optimizing a single graph before
+  // timing out. If equal to 0 the system picks a default (currently 5 minutes).
+  // If less than 0 the optimizer will never time out.
+  int64 meta_optimizer_timeout_ms = 20;
 
   // The minimum number of nodes in a graph to optimizer. For smaller graphs,
   // optimization is skipped.
-- 
GitLab


From 76ab96c8a5b2d77dfc191c94ff54fd5e52c561f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 13:31:52 -0700
Subject: [PATCH 0519/1085] Changed Adam algorithm variant formula from
 sqrt(max(v, epsilon**2)) to sqrt(v + epsilon**2) and changed flag name
 accordingly.

PiperOrigin-RevId: 216240045
---
 tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index 8529b48c15..c2e3be03db 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -62,9 +62,9 @@ message FtrlParameters {
 // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
 // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
 // order to get correct results; a warning will be printed otherwise (which may
-// change to an error in the future). If use_max_with_epsilon is set, the Adam
+// change to an error in the future). If use_sum_inside_sqrt is set, the Adam
 // variable update formula will be changed from m / (sqrt(v) + epsilon) to
-// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU
+// m / sqrt(v + epsilon**2); this option improves the performance of TPU
 // training and is not expected to harm model quality.
 message AdamParameters {
   float beta1 = 3;
@@ -73,7 +73,7 @@ message AdamParameters {
   float initial_m = 6;
   float initial_v = 7;
   bool use_non_lazy_adam = 8;
-  bool use_max_with_epsilon = 9;
+  bool use_sum_inside_sqrt = 10;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-- 
GitLab


From b052c51374f558c25a29c70918d79205dfec808b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 13:46:12 -0700
Subject: [PATCH 0520/1085] Add tf.BenchmarkConfig that returns a session
 config appropriate for benchmarking. At the moment, it returns a default
 config with only Grappler dependency optimizer disabled. Many benchmarks wrap
 the subgraph they want to time in control_flow_ops.group() to avoid including
 the overhead of copying the output back to the Python client in the
 measurement. In the graph, this only adds a control dependency between the
 subgraph output and the fetch node, which in turn (often) causes the
 dependency optimizer to turn all nodes in the graph into no-ops.

PiperOrigin-RevId: 216242463
---
 .../python/kernel_tests/benchmark_test.py     |  2 +-
 .../python/kernel_tests/cholesky_op_test.py   |  7 ++-
 .../kernel_tests/determinant_op_test.py       |  9 +--
 .../kernel_tests/matrix_band_part_op_test.py  |  5 +-
 .../matrix_exponential_op_test.py             |  5 +-
 .../kernel_tests/matrix_inverse_op_test.py    |  5 +-
 .../kernel_tests/matrix_logarithm_op_test.py  |  3 +-
 .../kernel_tests/matrix_solve_ls_op_test.py   |  5 +-
 .../kernel_tests/matrix_solve_op_test.py      |  5 +-
 .../sparse_tensors_map_ops_test.py            |  3 +-
 .../python/kernel_tests/where_op_test.py      |  5 +-
 tensorflow/python/ops/image_ops_test.py       | 62 +++++++++----------
 tensorflow/python/platform/benchmark.py       | 14 +++++
 .../tools/api/golden/v1/tensorflow.test.pbtxt |  4 ++
 .../tools/api/golden/v2/tensorflow.test.pbtxt |  4 ++
 15 files changed, 84 insertions(+), 54 deletions(-)

diff --git a/tensorflow/python/kernel_tests/benchmark_test.py b/tensorflow/python/kernel_tests/benchmark_test.py
index 78b6e38d94..5777a5d097 100644
--- a/tensorflow/python/kernel_tests/benchmark_test.py
+++ b/tensorflow/python/kernel_tests/benchmark_test.py
@@ -64,7 +64,7 @@ class TestReportingBenchmark(test.Benchmark):
                 "other_key": "string"})
 
   def benchmark_times_an_op(self):
-    with session.Session() as sess:
+    with session.Session(config=benchmark.benchmark_config()) as sess:
       a = constant_op.constant(0.0)
       a_plus_a = a + a
       return self.run_op_benchmark(
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index 782e6b5068..2ebf74a4d7 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
@@ -327,7 +328,7 @@ class CholeskyBenchmark(test.Benchmark):
   def benchmarkCholeskyOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = variables.Variable(self._GenerateMatrix(shape))
         l = linalg_ops.cholesky(matrix)
@@ -341,7 +342,7 @@ class CholeskyBenchmark(test.Benchmark):
 
       if test.is_gpu_available(True):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/device:GPU:0"):
           matrix = variables.Variable(self._GenerateMatrix(shape))
           l = linalg_ops.cholesky(matrix)
@@ -359,7 +360,7 @@ class CholeskyBenchmark(test.Benchmark):
       for shape in self.shapes:
         matrix = self._GenerateMatrix(shape)
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device(device):
           l = variables.Variable(np.linalg.cholesky(matrix))
           grad_matrix = variables.Variable(
diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py
index a52b2c0dc3..fb114f9f24 100644
--- a/tensorflow/python/kernel_tests/determinant_op_test.py
+++ b/tensorflow/python/kernel_tests/determinant_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -185,8 +186,8 @@ class MatrixDeterminantBenchmark(test.Benchmark):
 
   def benchmarkMatrixDeterminantOp(self):
     for shape in self.shapes:
-      with ops.Graph().as_default(), session.Session() as sess, ops.device(
-          "/cpu:0"):
+      with ops.Graph().as_default(), session.Session(
+          config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         d = linalg_ops.matrix_determinant(matrix)
         variables.global_variables_initializer().run()
@@ -198,8 +199,8 @@ class MatrixDeterminantBenchmark(test.Benchmark):
             name="matrix_determinant_cpu_{shape}".format(shape=shape))
 
       if test.is_gpu_available(True):
-        with ops.Graph().as_default(), session.Session() as sess, ops.device(
-            "/gpu:0"):
+        with ops.Graph().as_default(), session.Session(
+            config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"):
           matrix = self._GenerateMatrix(shape)
           d = linalg_ops.matrix_determinant(matrix)
           variables.global_variables_initializer().run()
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index 68d626de2c..a0ef3a607e 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test as test_lib
 
 
@@ -109,7 +110,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
     for shape_ in self.shapes:
       for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix = variables.Variable(array_ops.ones(shape_))
           band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
@@ -123,7 +124,7 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
 
         if test_lib.is_gpu_available(True):
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/gpu:0"):
             matrix = variables.Variable(array_ops.ones(shape_))
             band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index 0386e91276..9630c052b8 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -181,7 +182,7 @@ class MatrixExponentialBenchmark(test.Benchmark):
   def benchmarkMatrixExponentialOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         expm = linalg_impl.matrix_exponential(matrix)
@@ -195,7 +196,7 @@ class MatrixExponentialBenchmark(test.Benchmark):
 
       if test.is_gpu_available(True):
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/gpu:0"):
           matrix = self._GenerateMatrix(shape)
           expm = linalg_impl.matrix_exponential(matrix)
diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
index 720ba806e9..8bda04b53d 100644
--- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -179,7 +180,7 @@ class MatrixInverseBenchmark(test.Benchmark):
     for adjoint in False, True:
       for shape in self.shapes:
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix = self._GenerateMatrix(shape)
           inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
@@ -193,7 +194,7 @@ class MatrixInverseBenchmark(test.Benchmark):
 
         if test.is_gpu_available(True):
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/gpu:0"):
             matrix = self._GenerateMatrix(shape)
             inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index 723a15fbd1..3205e211d9 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -159,7 +160,7 @@ class MatrixLogarithmBenchmark(test.Benchmark):
   def benchmarkMatrixLogarithmOp(self):
     for shape in self.shapes:
       with ops.Graph().as_default(), \
-          session.Session() as sess, \
+          session.Session(config=benchmark.benchmark_config()) as sess, \
           ops.device("/cpu:0"):
         matrix = self._GenerateMatrix(shape)
         logm = gen_linalg_ops.matrix_logarithm(matrix)
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index de495968a7..225a10e117 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test as test_lib
 
 
@@ -313,7 +314,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark):
       for num_rhs in 1, 2, matrix_shape[-1]:
 
         with ops.Graph().as_default(), \
-            session.Session() as sess, \
+            session.Session(config=benchmark.benchmark_config()) as sess, \
             ops.device("/cpu:0"):
           matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
           x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
@@ -328,7 +329,7 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark):
 
         if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
           with ops.Graph().as_default(), \
-                session.Session() as sess, \
+                session.Session(config=benchmark.benchmark_config()) as sess, \
                 ops.device("/gpu:0"):
             matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
             x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index b8f2736b7b..264df2565c 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -167,7 +168,7 @@ class MatrixSolveBenchmark(test.Benchmark):
         for num_rhs in 1, 2, matrix_shape[-1]:
 
           with ops.Graph().as_default(), \
-              session.Session() as sess, \
+              session.Session(config=benchmark.benchmark_config()) as sess, \
               ops.device("/cpu:0"):
             matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
             x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
@@ -185,7 +186,7 @@ class MatrixSolveBenchmark(test.Benchmark):
 
           if run_gpu_test:
             with ops.Graph().as_default(), \
-                session.Session() as sess, \
+                session.Session(config=benchmark.benchmark_config()) as sess, \
                 ops.device("/gpu:0"):
               matrix, rhs = self._GenerateTestData(matrix_shape, num_rhs)
               x = linalg_ops.matrix_solve(matrix, rhs, adjoint=adjoint)
diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
index 31e84341ae..fdfe1001b8 100644
--- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 # pylint: disable=protected-access
@@ -192,7 +193,7 @@ class BenchmarkSparseTensorsMapVsSerialization(test.Benchmark):
         sorted(zip(indices_batch, indices_value)), dtype=np.int64)
     values = ["feature_value_for_embedding_lookup"] * num_elements
     shape = np.asarray([batch_size, num_elements], dtype=np.int64)
-    with session.Session() as sess:
+    with session.Session(config=benchmark.benchmark_config()) as sess:
       with ops.device("/cpu:0"):
         indices = variables.Variable(indices)
         values = variables.Variable(values)
diff --git a/tensorflow/python/kernel_tests/where_op_test.py b/tensorflow/python/kernel_tests/where_op_test.py
index 29fb002ef4..04ac589432 100644
--- a/tensorflow/python/kernel_tests/where_op_test.py
+++ b/tensorflow/python/kernel_tests/where_op_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
 
 
@@ -160,7 +161,7 @@ class WhereBenchmark(test.Benchmark):
           x = random_ops.random_uniform((m, n), dtype=dtypes.float32) <= p
           v = resource_variable_ops.ResourceVariable(x)
           op = array_ops.where(v)
-        with session.Session() as sess:
+        with session.Session(config=benchmark.benchmark_config()) as sess:
           v.initializer.run()
           r = self.run_op_benchmark(sess, op, min_iters=100, name=name)
           gb_processed_input = m * n / 1.0e9
@@ -186,7 +187,7 @@ class WhereBenchmark(test.Benchmark):
           y = resource_variable_ops.ResourceVariable(y_gen)
           c = resource_variable_ops.ResourceVariable(c_gen)
           op = array_ops.where(c, x, y)
-        with session.Session() as sess:
+        with session.Session(config=benchmark.benchmark_config()) as sess:
           x.initializer.run()
           y.initializer.run()
           c.initializer.run()
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 35fdee4fad..ff86df6346 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -602,20 +602,19 @@ class AdjustHueBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        delta = constant_op.constant(0.1, dtype=dtypes.float32)
-        outputs = image_ops.adjust_hue(inputs, delta)
-        run_op = control_flow_ops.group(outputs)
-        sess.run(variables.global_variables_initializer())
-        for i in xrange(warmup_rounds + benchmark_rounds):
-          if i == warmup_rounds:
-            start = time.time()
-          sess.run(run_op)
+    with self.benchmark_session(config=config, device=device) as sess:
+      inputs = variables.Variable(
+          random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
+          trainable=False,
+          dtype=dtypes.float32)
+      delta = constant_op.constant(0.1, dtype=dtypes.float32)
+      outputs = image_ops.adjust_hue(inputs, delta)
+      run_op = control_flow_ops.group(outputs)
+      sess.run(variables.global_variables_initializer())
+      for i in xrange(warmup_rounds + benchmark_rounds):
+        if i == warmup_rounds:
+          start = time.time()
+        sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -646,21 +645,20 @@ class AdjustSaturationBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session("", graph=ops.Graph(), config=config) as sess:
-      with ops.device(device):
-        inputs = variables.Variable(
-            random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
-            trainable=False,
-            dtype=dtypes.float32)
-        delta = constant_op.constant(0.1, dtype=dtypes.float32)
-        outputs = image_ops.adjust_saturation(inputs, delta)
-        run_op = control_flow_ops.group(outputs)
-        sess.run(variables.global_variables_initializer())
-        for _ in xrange(warmup_rounds):
-          sess.run(run_op)
-        start = time.time()
-        for _ in xrange(benchmark_rounds):
-          sess.run(run_op)
+    with self.benchmark_session(config=config, device=device) as sess:
+      inputs = variables.Variable(
+          random_ops.random_uniform(image_shape, dtype=dtypes.float32) * 255,
+          trainable=False,
+          dtype=dtypes.float32)
+      delta = constant_op.constant(0.1, dtype=dtypes.float32)
+      outputs = image_ops.adjust_saturation(inputs, delta)
+      run_op = control_flow_ops.group(outputs)
+      sess.run(variables.global_variables_initializer())
+      for _ in xrange(warmup_rounds):
+        sess.run(run_op)
+      start = time.time()
+      for _ in xrange(benchmark_rounds):
+        sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -699,7 +697,7 @@ class ResizeBilinearBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
@@ -747,7 +745,7 @@ class ResizeBicubicBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
@@ -804,7 +802,7 @@ class ResizeAreaBenchmark(test.Benchmark):
         deps = [resize_op]
       benchmark_op = control_flow_ops.group(*deps)
 
-    with session.Session() as sess:
+    with self.benchmark_session() as sess:
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py
index fa17b17d10..4f7abb311a 100644
--- a/tensorflow/python/platform/benchmark.py
+++ b/tensorflow/python/platform/benchmark.py
@@ -27,6 +27,7 @@ import time
 import six
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.core.util import test_log_pb2
 from tensorflow.python.client import timeline
 from tensorflow.python.platform import app
@@ -182,6 +183,19 @@ class Benchmark(six.with_metaclass(_BenchmarkRegistrar, object)):
         throughput=throughput, extras=extras)
 
 
+@tf_export("test.benchmark_config")
+def benchmark_config():
+  """Returns a tf.ConfigProto for disabling the dependency optimizer.
+
+    Returns:
+      A TensorFlow ConfigProto object.
+  """
+  config = config_pb2.ConfigProto()
+  config.graph_options.rewrite_options.dependency_optimization = (
+      rewriter_config_pb2.RewriterConfig.OFF)
+  return config
+
+
 @tf_export("test.Benchmark")
 class TensorFlowBenchmark(Benchmark):
   """Abstract class that provides helpers for TensorFlow benchmarks."""
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
index abe9b068ae..984c584c9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.test.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "assert_equal_graph_def"
     argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "benchmark_config"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "compute_gradient"
     argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
index abe9b068ae..984c584c9e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "assert_equal_graph_def"
     argspec: "args=[\'actual\', \'expected\', \'checkpoint_v2\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
+  member_method {
+    name: "benchmark_config"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "compute_gradient"
     argspec: "args=[\'x\', \'x_shape\', \'y\', \'y_shape\', \'x_init_value\', \'delta\', \'init_targets\', \'extra_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\', \'0.001\', \'None\', \'None\'], "
-- 
GitLab


From df0753ae15be34003df8af09a643917734b1fe16 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 2 Sep 2018 23:15:22 +0000
Subject: [PATCH 0521/1085] Support 3/4/5 dimensional input for bias_add with
 NCHW data format

This fix tries to address part of the issue raised in 20527 where
biad_add only support 4 dimensional input with NCHW data format,
and is causing tf.layers.conv3d to not suppport dynamic shapes
with `channel_first` data format.

This fix add the 3/4/5 dimensional input for bias_add with NCHW
data format as the first step to address the issue. Follow up PR
will be added to fix tf.layers.conv3d issue.
This fix also only adds bias_add support, BiasGradOp will be worked
on later (TODO).

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/bias_op.cc | 74 ++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 24 deletions(-)

diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index e15ea82e7d..7c66558554 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -43,11 +43,12 @@ typedef Eigen::SyclDevice SYCLDevice;
 namespace {
 
 void GetBiasValueDims(const Tensor& value_tensor, TensorFormat data_format,
-                      int32* batch, int32* height, int32* width,
+                      int32* batch, int32* height, int32* width, int32* depth,
                       int32* channel) {
   *batch = 1;
-  *width = 1;
   *height = 1;
+  *width = 1;
+  *depth = 1;
   *channel = 1;
   if (data_format == FORMAT_NHWC) {
     int32 channel_dim = value_tensor.dims() - 1;
@@ -56,14 +57,14 @@ void GetBiasValueDims(const Tensor& value_tensor, TensorFormat data_format,
       *batch *= static_cast<int32>(value_tensor.dim_size(i));
     }
   } else if (data_format == FORMAT_NCHW) {
-    int32 channel_dim = value_tensor.dims() - 3;
-    int32 height_dim = value_tensor.dims() - 2;
-    int32 width_dim = value_tensor.dims() - 1;
-    *channel = static_cast<int32>(value_tensor.dim_size(channel_dim));
-    *height = static_cast<int32>(value_tensor.dim_size(height_dim));
-    *width = static_cast<int32>(value_tensor.dim_size(width_dim));
-    for (int32 i = 0; i < channel_dim; i++) {
-      *batch *= static_cast<int32>(value_tensor.dim_size(i));
+    *batch = static_cast<int32>(value_tensor.dim_size(0));
+    *channel = static_cast<int32>(value_tensor.dim_size(1));
+    *height = static_cast<int32>(value_tensor.dim_size(2));
+    if (value_tensor.dims() > 3) {
+      *width = static_cast<int32>(value_tensor.dim_size(3));
+    }
+    if (value_tensor.dims() > 4) {
+      *depth = static_cast<int32>(value_tensor.dim_size(4));
     }
   }
 }
@@ -109,10 +110,7 @@ class BiasOp : public BinaryOp<T> {
     // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
     size_t channel_dim;
     if (data_format_ == FORMAT_NCHW) {
-      OP_REQUIRES(context, input.dims() == 4,
-                  errors::InvalidArgument(
-                      "NCHW format supports only 4D input tensor."));
-      channel_dim = 1;
+      channel_dim = 1; // NCHW always have channel dim in 1 (with 3, 4, 5 dimensions data).
     } else {
       channel_dim = input.shape().dims() - 1;  // End of code by intel_tf.
     }
@@ -132,14 +130,41 @@ class BiasOp : public BinaryOp<T> {
 
     // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
     if (data_format_ == FORMAT_NCHW) {
-      int32 batch, height, width, channel;
-      GetBiasValueDims(input, data_format_, &batch, &height, &width, &channel);
-      Eigen::DSizes<Eigen::Index, 4> four_dims(1, channel, 1, 1);
-      Eigen::DSizes<Eigen::Index, 4> broad_cast_dims(batch, 1, height, width);
-      const Device& d = context->eigen_device<Device>();
-      output->tensor<T, 4>().device(d) =
-          input.tensor<T, 4>() +
-          bias.tensor<T, 1>().reshape(four_dims).broadcast(broad_cast_dims);
+      int32 batch, height, width, depth, channel;
+      GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth, &channel);
+      switch (input.shape().dims()) {
+        case 3: {
+          Eigen::DSizes<int32, 3> three_dims(1, channel, 1);
+          Eigen::DSizes<int32, 3> broad_cast_dims(batch, 1, height);
+          const Device& d = context->eigen_device<Device>();
+          output->tensor<T, 3>().device(d) =
+              input.tensor<T, 3>() +
+              bias.tensor<T, 1>().reshape(three_dims).broadcast(broad_cast_dims);
+          }
+          break;
+      case 4: {
+          Eigen::DSizes<int32, 4> four_dims(1, channel, 1, 1);
+          Eigen::DSizes<int32, 4> broad_cast_dims(batch, 1, height, width);
+          const Device& d = context->eigen_device<Device>();
+          output->tensor<T, 4>().device(d) =
+              input.tensor<T, 4>() +
+              bias.tensor<T, 1>().reshape(four_dims).broadcast(broad_cast_dims);
+        }
+        break;
+      case 5: {
+          Eigen::DSizes<int32, 5> four_dims(1, channel, 1, 1, 1);
+          Eigen::DSizes<int32, 5> broad_cast_dims(batch, 1, height, width, depth);
+          const Device& d = context->eigen_device<Device>();
+          output->tensor<T, 5>().device(d) =
+              input.tensor<T, 5>() +
+              bias.tensor<T, 1>().reshape(four_dims).broadcast(broad_cast_dims);
+        }
+        break;
+      default:
+        OP_REQUIRES(context, false,
+                    errors::InvalidArgument("Only ranks up to 5 supported: ",
+                                            input.shape().DebugString()));
+      }
       return;
     }  // End of code by intel_tf.
 
@@ -229,8 +254,8 @@ class BiasGradOp : public OpKernel {
                         std::numeric_limits<int32>::max()),
         errors::InvalidArgument("BiasGrad requires tensor size <= int32 max"));
 
-    int32 batch, height, width, channel;
-    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width,
+    int32 batch, height, width, depth, channel;
+    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width, &depth,
                      &channel);
     Tensor* output = nullptr;
     TensorShape output_shape{channel};
@@ -243,6 +268,7 @@ class BiasGradOp : public OpKernel {
       output->template flat<T>().setZero();
     } else {
       // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
+      // TODO (yongtang): Add 3/4/5 dimensional data support for NCHW format.
       if (data_format_ == FORMAT_NCHW) {
         OP_REQUIRES(context, output_backprop.dims() == 4,
                     errors::InvalidArgument(
-- 
GitLab


From 4be1bc2847a66704e23bde9437e52aa9afe7987c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 2 Sep 2018 23:25:23 +0000
Subject: [PATCH 0522/1085] Fix format issue with clang-format

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/bias_op.cc | 41 +++++++++++++++---------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index 7c66558554..a1b965dc1a 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -110,7 +110,8 @@ class BiasOp : public BinaryOp<T> {
     // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
     size_t channel_dim;
     if (data_format_ == FORMAT_NCHW) {
-      channel_dim = 1; // NCHW always have channel dim in 1 (with 3, 4, 5 dimensions data).
+      channel_dim = 1;  // NCHW always have channel dim in 1 (with 3, 4, 5
+                        // dimensions data).
     } else {
       channel_dim = input.shape().dims() - 1;  // End of code by intel_tf.
     }
@@ -131,39 +132,39 @@ class BiasOp : public BinaryOp<T> {
     // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
     if (data_format_ == FORMAT_NCHW) {
       int32 batch, height, width, depth, channel;
-      GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth, &channel);
+      GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth,
+                       &channel);
       switch (input.shape().dims()) {
         case 3: {
           Eigen::DSizes<int32, 3> three_dims(1, channel, 1);
           Eigen::DSizes<int32, 3> broad_cast_dims(batch, 1, height);
           const Device& d = context->eigen_device<Device>();
-          output->tensor<T, 3>().device(d) =
-              input.tensor<T, 3>() +
-              bias.tensor<T, 1>().reshape(three_dims).broadcast(broad_cast_dims);
-          }
-          break;
-      case 4: {
+          output->tensor<T, 3>().device(d) = input.tensor<T, 3>() +
+                                             bias.tensor<T, 1>()
+                                                 .reshape(three_dims)
+                                                 .broadcast(broad_cast_dims);
+        } break;
+        case 4: {
           Eigen::DSizes<int32, 4> four_dims(1, channel, 1, 1);
           Eigen::DSizes<int32, 4> broad_cast_dims(batch, 1, height, width);
           const Device& d = context->eigen_device<Device>();
           output->tensor<T, 4>().device(d) =
               input.tensor<T, 4>() +
               bias.tensor<T, 1>().reshape(four_dims).broadcast(broad_cast_dims);
-        }
-        break;
-      case 5: {
+        } break;
+        case 5: {
           Eigen::DSizes<int32, 5> four_dims(1, channel, 1, 1, 1);
-          Eigen::DSizes<int32, 5> broad_cast_dims(batch, 1, height, width, depth);
+          Eigen::DSizes<int32, 5> broad_cast_dims(batch, 1, height, width,
+                                                  depth);
           const Device& d = context->eigen_device<Device>();
           output->tensor<T, 5>().device(d) =
               input.tensor<T, 5>() +
               bias.tensor<T, 1>().reshape(four_dims).broadcast(broad_cast_dims);
-        }
-        break;
-      default:
-        OP_REQUIRES(context, false,
-                    errors::InvalidArgument("Only ranks up to 5 supported: ",
-                                            input.shape().DebugString()));
+        } break;
+        default:
+          OP_REQUIRES(context, false,
+                      errors::InvalidArgument("Only ranks up to 5 supported: ",
+                                              input.shape().DebugString()));
       }
       return;
     }  // End of code by intel_tf.
@@ -255,8 +256,8 @@ class BiasGradOp : public OpKernel {
         errors::InvalidArgument("BiasGrad requires tensor size <= int32 max"));
 
     int32 batch, height, width, depth, channel;
-    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width, &depth,
-                     &channel);
+    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width,
+                     &depth, &channel);
     Tensor* output = nullptr;
     TensorShape output_shape{channel};
     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
-- 
GitLab


From 44e88008ad4b5704357816b726ff86cf7d2e4dd3 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 2 Sep 2018 23:25:47 +0000
Subject: [PATCH 0523/1085] Update shape function to support 3/4/5 NCHW
 dataformat for bias_add

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/framework/common_shape_fns.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 50403b4004..f3243ae8e7 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -228,12 +228,12 @@ Status BiasAddShape(shape_inference::InferenceContext* c) {
   if (s.ok() && data_format == "NCHW") {
     // Merge the length of bias_shape into the third to last dimension
     ShapeHandle first;
-    TF_RETURN_IF_ERROR(c->Subshape(input_shape, 0, -3, &first));
+    TF_RETURN_IF_ERROR(c->Subshape(input_shape, 0, 1, &first));
 
     ShapeHandle last;
-    TF_RETURN_IF_ERROR(c->Subshape(input_shape, -2, &last));
+    TF_RETURN_IF_ERROR(c->Subshape(input_shape, 2, &last));
 
-    DimensionHandle input_bias_dim = c->Dim(input_shape, -3);
+    DimensionHandle input_bias_dim = c->Dim(input_shape, 1);
     DimensionHandle merged_bias_dim;
     TF_RETURN_IF_ERROR(c->Merge(input_bias_dim, bias_dim, &merged_bias_dim));
     ShapeHandle merged_bias = c->Vector(merged_bias_dim);
-- 
GitLab


From 593f2522b69555b73546cfc3d3aa08680826cda1 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 5 Sep 2018 09:04:43 +0000
Subject: [PATCH 0524/1085] Enable GPU support for bias_add with 3/4/5
 dimensional NCHW format

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/bias_op.cc        | 10 +++++-----
 tensorflow/core/kernels/bias_op_gpu.cu.cc |  4 ++--
 tensorflow/core/kernels/bias_op_gpu.h     |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index a1b965dc1a..0ce8c1c445 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -357,8 +357,8 @@ class BiasOp<GPUDevice, T> : public BinaryOp<T> {
     OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()),
                 errors::InvalidArgument("Biases must be 1D: ",
                                         bias.shape().DebugString()));
-    int32 batch, height, width, channel;
-    GetBiasValueDims(input, data_format_, &batch, &height, &width, &channel);
+    int32 batch, height, width, depth, channel;
+    GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth, &channel);
     OP_REQUIRES(context, bias.shape().dim_size(0) == channel,
                 errors::InvalidArgument(
                     "Must provide as many biases as the channel dimension "
@@ -371,7 +371,7 @@ class BiasOp<GPUDevice, T> : public BinaryOp<T> {
     if (input.NumElements() > 0) {
       BiasGPU<T>::compute(context->template eigen_device<Device>(),
                           input.flat<T>().data(), bias.flat<T>().data(),
-                          output->flat<T>().data(), batch, width, height,
+                          output->flat<T>().data(), batch, width, height, depth,
                           channel, data_format_);
     }
   }
@@ -543,8 +543,8 @@ class BiasGradOp<GPUDevice, T> : public OpKernel {
                 TensorShapeUtils::IsMatrixOrHigher(output_backprop.shape()),
                 errors::InvalidArgument("Input tensor must be at least 2D: ",
                                         output_backprop.shape().DebugString()));
-    int32 batch, height, width, channel;
-    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width,
+    int32 batch, height, width, depth, channel;
+    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width, &depth,
                      &channel);
     Tensor* output = nullptr;
     TensorShape output_shape{channel};
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index 1a7211a7cb..4d0137e4c7 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -75,10 +75,10 @@ __global__ void BiasNCHWKernel(int32 nthreads, const T* input, const T* bias,
 // dimension.
 template <typename T>
 void BiasGPU<T>::compute(const GPUDevice& d, const T* input, const T* bias,
-                         T* output, int32 batch, int32 height, int32 width,
+                         T* output, int32 batch, int32 height, int32 width, int depth,
                          int32 channel, TensorFormat data_format) {
   const int32 bias_size = channel;
-  const int32 image_size = height * width;
+  const int32 image_size = height * width * depth;
   const int32 total_count = batch * bias_size * image_size;
   if (total_count == 0) {
     return;
diff --git a/tensorflow/core/kernels/bias_op_gpu.h b/tensorflow/core/kernels/bias_op_gpu.h
index c1051f43c9..ecdd88f8ff 100644
--- a/tensorflow/core/kernels/bias_op_gpu.h
+++ b/tensorflow/core/kernels/bias_op_gpu.h
@@ -31,7 +31,7 @@ typedef Eigen::GpuDevice GPUDevice;
 template <typename T>
 struct BiasGPU {
   static void compute(const GPUDevice& d, const T* input, const T* bias,
-                      T* output, int32 batch, int32 height, int32 width,
+                      T* output, int32 batch, int32 height, int32 width, int32 depth,
                       int32 channel, TensorFormat data_format);
 };
 
-- 
GitLab


From 08a8a7218b698ef0b1d8a66ce8b521f687600abc Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 22:54:20 +0000
Subject: [PATCH 0525/1085] Fix BiasAddGradShape for NCHW, channel should be
 second from the left,

not third-from-right as we could have 3/4/5 dimensional cases

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/framework/common_shape_fns.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index f3243ae8e7..14befdfa36 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -266,7 +266,7 @@ Status BiasAddGradShape(shape_inference::InferenceContext* c) {
 
   if (s.ok() && data_format == "NCHW") {
     TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 3, &input_shape));
-    c->set_output(0, c->Vector(c->Dim(input_shape, -3)));
+    c->set_output(0, c->Vector(c->Dim(input_shape, 1)));
   } else {
     TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 2, &input_shape));
     c->set_output(0, c->Vector(c->Dim(input_shape, -1)));
-- 
GitLab


From f2e1ac3e1aa93f8fd4dc853fb5c5b01f665b0a45 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 23:45:49 +0000
Subject: [PATCH 0526/1085] Adjust _NHWCToNCHW and _NCHWToNHWC in
 bias_op_test.py

The _NHWCToNCHW and _NCHWToNHWC in bias_op_test.py
only took into considerations about 4D situation,
as it assume that third-to-last dim is the channel
dimension.

This fix adjust to use second-to-first dim as the channel
dimension so that 3D/4D/5D data could be handled by NCHW format.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/bias_op_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index 42ba665725..7479c9ddf8 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -74,16 +74,16 @@ class BiasAddTest(test.TestCase):
   def _NHWCToNCHW(self, np_value):
     # fill the input value to at least 3-dimension
     np_value = self._AtLeast3d(np_value)
-    # move the last dimension to third-to-last
+    # move the last dimension to second
     np_dim = list(range(np_value.ndim))
-    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-1:]) + list(np_dim[-3:-1])
+    np_dim_new = list(np_dim[0:1]) + list(np_dim[-1:]) + list(np_dim[1:-1])
     return np.transpose(np_value, np_dim_new)
 
   def _NCHWToNHWC(self, np_value):
     assert len(np_value.shape) >= 3
     np_dim = list(range(np_value.ndim))
-    # move the third-to-last dimension to the last
-    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-2:]) + list(np_dim[-3:-2])
+    # move the second dimension to the last
+    np_dim_new = list(np_dim[0:1]) + list(np_dim[2:]) + list(np_dim[1:2])
     return np.transpose(np_value, np_dim_new)
 
   def _testBiasNCHW(self, np_inputs, np_bias, use_gpu):
-- 
GitLab


From 9fe498311f7494e04fad573b49689eb251e7b701 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 23:49:11 +0000
Subject: [PATCH 0527/1085] Enable all test cases for bias_add with both NCHW
 and NHWC, and add 3D/4D/5D cases

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/bias_op_test.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index 7479c9ddf8..b4f4f286a8 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -96,10 +96,11 @@ class BiasAddTest(test.TestCase):
 
   def _testAll(self, np_inputs, np_bias):
     self._testBias(np_inputs, np_bias, use_gpu=False)
+    self._testBiasNCHW(np_inputs, np_bias, use_gpu=False)
     if np_inputs.dtype in [np.float16, np.float32, np.float64]:
       self._testBias(np_inputs, np_bias, use_gpu=True)
-      if test.is_gpu_available(cuda_only=True):
-        self._testBiasNCHW(np_inputs, np_bias, use_gpu=True)
+      self._testBiasNCHW(np_inputs, np_bias, use_gpu=True)
+
 
   def testInputDims(self):
     with self.assertRaises(ValueError):
@@ -132,6 +133,16 @@ class BiasAddTest(test.TestCase):
       self._testAll(
           np.random.rand(4, 3, 3).astype(t), np.random.rand(3).astype(t))
 
+  def test4DFloatTypes(self):
+    for t in [np.float16, np.float32, np.float64]:
+      self._testAll(
+          np.random.rand(4, 3, 2, 3).astype(t), np.random.rand(3).astype(t))
+
+  def test5DFloatTypes(self):
+    for t in [np.float16, np.float32, np.float64]:
+      self._testAll(
+          np.random.rand(4, 3, 2, 3, 4).astype(t), np.random.rand(4).astype(t))
+
   def _testGradient(self, np_input, bias, dtype, data_format, use_gpu):
     with self.test_session(use_gpu=use_gpu):
       if data_format == "NCHW":
-- 
GitLab


From fa80cc6102b46fbb8bf8fde619c077fb301a5e9b Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Sep 2018 23:58:10 +0000
Subject: [PATCH 0528/1085] Enable gradient tests for bias_add with NCHW and 4D

3D/5D does not work for BiadAddGrad with NCHW so disable.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../python/kernel_tests/bias_op_test.py       | 25 +++++++------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index b4f4f286a8..fe988234d9 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -30,19 +30,6 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
-def GetTestConfigs():
-  """Get all the valid tests configs to run.
-
-  Returns:
-    all the valid test configs as tuples of data_format and use_gpu.
-  """
-  test_configs = [("NHWC", False), ("NHWC", True)]
-  if test.is_gpu_available(cuda_only=True):
-    # "NCHW" format is currently only supported on CUDA.
-    test_configs += [("NCHW", True)]
-  return test_configs
-
-
 class BiasAddTest(test.TestCase):
 
   def _npBias(self, inputs, bias):
@@ -199,7 +186,9 @@ class BiasAddTest(test.TestCase):
       self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
 
   def testGradientTensor(self):
-    for (data_format, use_gpu) in GetTestConfigs():
+    # TODO (yongtang): BiasAddGrad with NCHW only works 4D. Reenable once
+    # all dimensions are supported.
+    for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
         np_input = np.array(
             [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
@@ -208,7 +197,9 @@ class BiasAddTest(test.TestCase):
         self._testGradient(np_input, bias, dtype, data_format, use_gpu)
 
   def testGradientTensor4D(self):
-    for (data_format, use_gpu) in GetTestConfigs():
+    # BiasAddGrad with NCHW support 4D so all are enabled.
+    for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True),
+                                   ("NCHW", False), ("NCHW", True)]:
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
         np_input = np.arange(
             1.0, 49.0, dtype=dtype.as_numpy_dtype).reshape(
@@ -222,7 +213,9 @@ class BiasAddTest(test.TestCase):
       self._testAll(np.random.randn(*shape), np.random.randn(shape[-1]))
 
   def testEmptyGradient(self):
-    for data_format, use_gpu in GetTestConfigs():
+    # TODO (yongtang): BiasAddGrad with NCHW only works 4D. Reenable once
+    # all dimensions are supported.
+    for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
       for shape in (0, 0), (2, 0), (0, 2), (4, 3, 0), (4, 0, 3), (0, 4, 3):
         self._testGradient(
             np.random.randn(*shape),
-- 
GitLab


From 5f5dae3edd0ab466e02ef58018ba3823f14143f8 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 10 Sep 2018 21:39:16 +0000
Subject: [PATCH 0529/1085] Fix incorrect shape test in
 common_shape_fns_test.cc

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/framework/common_shape_fns_test.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index 919e0967c0..14fb550e1e 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -278,9 +278,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Finalize(&def));
     InferenceContext c(TF_GRAPH_DEF_VERSION, &def, op_def,
                        {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {}, {}, {});
-    TF_EXPECT_OK(BiasAddShape(&c));
-    ShapeHandle output = c.output(0);
-    EXPECT_EQ("[8,6,4,2,3,4,5]", c.DebugString(output));
+    EXPECT_FALSE(BiasAddShape(&c).ok());
   }
 
   {
-- 
GitLab


From c5562864c3c6b1d28d91eb38b5ec0e013678f3a2 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 10 Sep 2018 21:40:00 +0000
Subject: [PATCH 0530/1085] Fix incorrect shape test

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/framework/common_shape_fns_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index 14fb550e1e..7c395679d3 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -289,7 +289,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
     InferenceContext c(TF_GRAPH_DEF_VERSION, &def, op_def,
-                       {S({10, 11, 12}), S({10})}, {}, {}, {});
+                       {S({10, 11, 12}), S({11})}, {}, {}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[10,11,12]", c.DebugString(output));
@@ -369,7 +369,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                        {S({8, 6, 4, 2, 3, 4, 5})}, {}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
-    EXPECT_EQ(3, c.Value(c.Dim(output, 0)));
+    EXPECT_EQ(6, c.Value(c.Dim(output, 0)));
   }
 
   {
@@ -382,7 +382,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                        {}, {}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
-    EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
+    EXPECT_EQ(11, c.Value(c.Dim(output, 0)));
   }
 
   {
-- 
GitLab


From 8d5b6f7d6ad796562be38fc4e4e2b57e0087be75 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 11 Sep 2018 02:11:27 +0000
Subject: [PATCH 0531/1085] Fix `Experimental clang-format Check` failure

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/bias_op.cc        | 17 ++++----
 tensorflow/core/kernels/bias_op_gpu.cu.cc | 47 +++++++++++------------
 tensorflow/core/kernels/bias_op_gpu.h     |  4 +-
 3 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index 0ce8c1c445..6e6325f8ae 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -250,9 +250,8 @@ class BiasGradOp : public OpKernel {
                                         output_backprop.shape().DebugString()));
 
     OP_REQUIRES(
-        context,
-        FastBoundsCheck(output_backprop.NumElements(),
-                        std::numeric_limits<int32>::max()),
+        context, FastBoundsCheck(output_backprop.NumElements(),
+                                 std::numeric_limits<int32>::max()),
         errors::InvalidArgument("BiasGrad requires tensor size <= int32 max"));
 
     int32 batch, height, width, depth, channel;
@@ -358,7 +357,8 @@ class BiasOp<GPUDevice, T> : public BinaryOp<T> {
                 errors::InvalidArgument("Biases must be 1D: ",
                                         bias.shape().DebugString()));
     int32 batch, height, width, depth, channel;
-    GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth, &channel);
+    GetBiasValueDims(input, data_format_, &batch, &height, &width, &depth,
+                     &channel);
     OP_REQUIRES(context, bias.shape().dim_size(0) == channel,
                 errors::InvalidArgument(
                     "Must provide as many biases as the channel dimension "
@@ -544,8 +544,8 @@ class BiasGradOp<GPUDevice, T> : public OpKernel {
                 errors::InvalidArgument("Input tensor must be at least 2D: ",
                                         output_backprop.shape().DebugString()));
     int32 batch, height, width, depth, channel;
-    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width, &depth,
-                     &channel);
+    GetBiasValueDims(output_backprop, data_format_, &batch, &height, &width,
+                     &depth, &channel);
     Tensor* output = nullptr;
     TensorShape output_shape{channel};
     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
@@ -560,10 +560,7 @@ class BiasGradOp<GPUDevice, T> : public OpKernel {
     int device_id = stream->parent()->device_ordinal();
     DataType dtype = output_backprop.dtype();
     BiasAddParams bias_parameters = {
-        {batch, height * width, channel},
-        data_format_,
-        dtype,
-        device_id,
+        {batch, height * width, channel}, data_format_, dtype, device_id,
     };
 
     // Autotune two algorithm: customized
diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index 4d0137e4c7..334ba46ca7 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -75,8 +75,8 @@ __global__ void BiasNCHWKernel(int32 nthreads, const T* input, const T* bias,
 // dimension.
 template <typename T>
 void BiasGPU<T>::compute(const GPUDevice& d, const T* input, const T* bias,
-                         T* output, int32 batch, int32 height, int32 width, int depth,
-                         int32 channel, TensorFormat data_format) {
+                         T* output, int32 batch, int32 height, int32 width,
+                         int depth, int32 channel, TensorFormat data_format) {
   const int32 bias_size = channel;
   const int32 image_size = height * width * depth;
   const int32 total_count = batch * bias_size * image_size;
@@ -85,14 +85,14 @@ void BiasGPU<T>::compute(const GPUDevice& d, const T* input, const T* bias,
   }
   CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d);
   if (data_format == FORMAT_NHWC) {
-    BiasNHWCKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            config.virtual_thread_count, input, bias, output, bias_size);
+    BiasNHWCKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        config.virtual_thread_count, input, bias, output, bias_size);
   } else {
-    BiasNCHWKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            config.virtual_thread_count, input, bias, output, bias_size,
-            image_size);
+    BiasNCHWKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        config.virtual_thread_count, input, bias, output, bias_size,
+        image_size);
   }
 }
 
@@ -214,10 +214,10 @@ void BiasGradGPU<T>::compute(const GPUDevice& d, const T* output_backprop,
   // Check if we have enough shared memory.
   if (shared_memory_size <= max_shared_memory_size) {
     if (data_format == FORMAT_NHWC) {
-      BiasGradNHWC_SharedAtomics<T>
-          <<<config.block_count, config.thread_per_block, shared_memory_size,
-             d.stream()>>>(total_count, output_backprop, bias_backprop,
-                           bias_size);
+      BiasGradNHWC_SharedAtomics<
+          T><<<config.block_count, config.thread_per_block, shared_memory_size,
+               d.stream()>>>(total_count, output_backprop, bias_backprop,
+                             bias_size);
     } else {
       // Round up the block count to multiple of bias_size.
       int group_size = (config.block_count + bias_size - 1) / bias_size;
@@ -225,24 +225,23 @@ void BiasGradGPU<T>::compute(const GPUDevice& d, const T* output_backprop,
       if (config.thread_per_block < kWarpSize) {
         config.thread_per_block = kWarpSize;
       }
-      BiasGradNCHW_SharedAtomics<T>
-          <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-              output_backprop, bias_backprop, batch, bias_size, image_size,
-              group_size);
+      BiasGradNCHW_SharedAtomics<
+          T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+          output_backprop, bias_backprop, batch, bias_size, image_size,
+          group_size);
     }
   } else {
     // Note that even if we don't have enough shared memory to fit the entire
     // output block, it is possible to process one group of elements at a time.
     // But for now, we simply fall back to the naive implementation.
     if (data_format == FORMAT_NHWC) {
-      BiasGradNHWC_Naive<T>
-          <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-              total_count, output_backprop, bias_backprop, bias_size);
+      BiasGradNHWC_Naive<
+          T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+          total_count, output_backprop, bias_backprop, bias_size);
     } else {
-      BiasGradNCHW_Naive<T>
-          <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-              total_count, output_backprop, bias_backprop, bias_size,
-              image_size);
+      BiasGradNCHW_Naive<
+          T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+          total_count, output_backprop, bias_backprop, bias_size, image_size);
     }
   }
 }
diff --git a/tensorflow/core/kernels/bias_op_gpu.h b/tensorflow/core/kernels/bias_op_gpu.h
index ecdd88f8ff..a0b2ce4f9b 100644
--- a/tensorflow/core/kernels/bias_op_gpu.h
+++ b/tensorflow/core/kernels/bias_op_gpu.h
@@ -31,8 +31,8 @@ typedef Eigen::GpuDevice GPUDevice;
 template <typename T>
 struct BiasGPU {
   static void compute(const GPUDevice& d, const T* input, const T* bias,
-                      T* output, int32 batch, int32 height, int32 width, int32 depth,
-                      int32 channel, TensorFormat data_format);
+                      T* output, int32 batch, int32 height, int32 width,
+                      int32 depth, int32 channel, TensorFormat data_format);
 };
 
 template <typename T>
-- 
GitLab


From 494bbdfced3fd8596721d12e73676c4967f452e4 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 8 Oct 2018 13:48:19 -0700
Subject: [PATCH 0532/1085] Allow using more than one converter in the testing
 harness.

PiperOrigin-RevId: 216242862
---
 tensorflow/python/autograph/core/converter_testing.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index dc2d419d34..fcdbd0a82c 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -128,7 +128,13 @@ class TestCase(test.TestCase):
   @contextlib.contextmanager
   def converted(self, entity, converter_module, namespace, *tf_symbols):
     node, ctx = self.prepare(entity, namespace)
-    node = converter_module.transform(node, ctx)
+
+    if not isinstance(converter_module, (list, tuple)):
+      converter_module = (converter_module,)
+    for m in converter_module:
+      node = m.transform(node, ctx)
+      node = converter.standard_analysis(node, ctx, is_initial=True)
+
     with self.compiled(node, namespace, *tf_symbols) as result:
       yield result
 
-- 
GitLab


From eec9ca8f0baccd249a49046fe31b460903e44850 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 8 Oct 2018 13:50:12 -0700
Subject: [PATCH 0533/1085] Partial support tfe.defun in tf.gradients.

Doesn't attempt to deal with cases where we might have already generated
the functiondef for the parent function as in that case we cannot easily
modify the forward pass.

PiperOrigin-RevId: 216243224
---
 .../core/common_runtime/shape_refiner.cc      |  5 ++
 tensorflow/core/framework/shape_inference.cc  |  9 ++
 tensorflow/core/framework/shape_inference.h   |  9 +-
 tensorflow/core/graph/graph.cc                | 13 +++
 tensorflow/core/graph/graph.h                 |  5 ++
 tensorflow/core/graph/node_builder.cc         |  8 +-
 tensorflow/core/ops/resource_variable_ops.cc  |  3 +-
 tensorflow/python/eager/function.py           | 87 ++++++++++---------
 tensorflow/python/eager/function_test.py      | 18 +++-
 tensorflow/python/framework/op_def_library.py |  3 +-
 .../python/kernel_tests/cond_v2_test.py       |  1 +
 tensorflow/python/ops/custom_gradient.py      | 44 ++++++++++
 tensorflow/python/ops/gradients_impl.py       | 30 +++----
 tensorflow/python/ops/while_v2.py             |  3 +-
 14 files changed, 169 insertions(+), 69 deletions(-)

diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index fa4d1eda62..9488a44778 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -288,6 +288,11 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port,
         "output_port '", output_port, "' is out of range, ", "node '",
         node->name(), "' has ", node->num_outputs(), " outputs");
   }
+  // Note: it's possible, if the node's been updated, that the shape inference
+  // context doesn't have the right number of outputs.
+  if (node->num_outputs() > c->num_outputs()) {
+    TF_RETURN_IF_ERROR(c->ExpandOutputs(node->num_outputs()));
+  }
 
   // Check compatibility, and merge the shapes.
   ShapeHandle existing_shape = c->output(output_port);
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 3e77028a5f..4dcc80680f 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -239,6 +239,15 @@ void InferenceContext::PreInputInit(
   output_handle_shapes_and_types_.resize(num_outputs);
 }
 
+Status InferenceContext::ExpandOutputs(int new_output_size) {
+  if (new_output_size < outputs_.size()) {
+    return errors::InvalidArgument("Trying to reduce number of outputs of op.");
+  }
+  outputs_.resize(new_output_size, nullptr);
+  output_handle_shapes_and_types_.resize(new_output_size);
+  return Status::OK();
+}
+
 void InferenceContext::PostInputInit(
     std::vector<std::unique_ptr<std::vector<ShapeAndType>>> input_handle_data) {
   int num_inputs_from_node_def = 0;
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 81258b55b3..e3885b7d9e 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -323,13 +323,13 @@ class InferenceContext {
     return input_tensors_as_shapes_;
   }
 
-  ShapeHandle output(int64 idx) const { return outputs_[idx]; }
-  void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; }
+  ShapeHandle output(int64 idx) const { return outputs_.at(idx); }
+  void set_output(int idx, ShapeHandle shape) { outputs_.at(idx) = shape; }
   Status set_output(StringPiece output_name,
                     const std::vector<ShapeHandle>& shapes);
 
   int num_outputs() const { return outputs_.size(); }
-  ShapeHandle output(int idx) const { return outputs_[idx]; }
+  ShapeHandle output(int idx) const { return outputs_.at(idx); }
   Status output(StringPiece output_name,
                 std::vector<ShapeHandle>* output) const;
 
@@ -645,6 +645,9 @@ class InferenceContext {
     return merged_dims_;
   }
 
+  // Adds new outputs; useful when mutating the graph.
+  Status ExpandOutputs(int new_output_size);
+
  private:
   // Creates and stores shapes for use in InferenceContext.
   class ShapeManager {
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 7a4a0096fa..6f068546d2 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -142,6 +142,19 @@ void Node::Clear() {
   assigned_device_name_index_ = 0;
 }
 
+void Node::UpdateProperties() {
+  DataTypeVector inputs;
+  DataTypeVector outputs;
+  Status status =
+      InOutTypesForNode(props_->node_def, *(props_->op_def), &inputs, &outputs);
+  if (!status.ok()) {
+    LOG(ERROR) << "Failed at updating node: " << status;
+    return;
+  }
+  props_ = std::make_shared<NodeProperties>(props_->op_def, props_->node_def,
+                                            inputs, outputs);
+}
+
 const string& Node::name() const { return props_->node_def.name(); }
 const string& Node::type_string() const { return props_->node_def.op(); }
 const NodeDef& Node::def() const { return props_->node_def; }
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 2944951f82..228b1331d9 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -171,6 +171,7 @@ class Node {
   template <typename T>
   void AddAttr(const string& name, const T& val) {
     SetAttrValue(val, AddAttrHelper(name));
+    UpdateProperties();
   }
 
   void ClearAttr(const string& name);
@@ -211,6 +212,10 @@ class Node {
   // e.g. in AddAttr.
   void MaybeCopyOnWrite();
 
+  // Called after an attr has changed. Decides whether we need to update some
+  // property of the node (stored in props_).
+  void UpdateProperties();
+
   AttrValue* AddAttrHelper(const string& name);
 
   // A set of mutually exclusive classes for different kinds of nodes,
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index d92874909f..68a20fcc5f 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -140,10 +140,10 @@ void NodeBuilder::AddIndexError(const Node* node, int i) {
         strings::StrCat("Attempt to add nullptr Node to node with type ",
                         def_builder_.op_def().name()));
   } else {
-    errors_.emplace_back(
-        strings::StrCat("Attempt to add output ", i, " of ", node->name(),
-                        " not in range [0, ", node->num_outputs(),
-                        ") to node with type ", def_builder_.op_def().name()));
+    errors_.emplace_back(strings::StrCat(
+        "Attempt to add output ", i, " of ", node->name(), " not in range [0, ",
+        node->num_outputs(), ") to node with type ",
+        def_builder_.op_def().name(), ". Node: ", node->DebugString()));
   }
 }
 
diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc
index adc9cd1486..65bdde375b 100644
--- a/tensorflow/core/ops/resource_variable_ops.cc
+++ b/tensorflow/core/ops/resource_variable_ops.cc
@@ -216,7 +216,8 @@ REGISTER_OP("VarIsInitializedOp")
 Status VariableShapeShapeFn(InferenceContext* c) {
   auto* handle_data = c->input_handle_shapes_and_types(0);
   if (handle_data == nullptr || handle_data->empty()) {
-    return errors::InvalidArgument("Handle doesn't have shape information.");
+    c->set_output(0, c->Vector(c->UnknownDim()));
+    return Status::OK();
   }
   ShapeHandle var_shape = (*handle_data)[0].shape;
   int64 rank = c->RankKnown(var_shape) ? c->Rank(var_shape)
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 93168826b1..99bf375ea7 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -46,6 +46,7 @@ from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
@@ -81,49 +82,10 @@ def _create_substitute_placeholder(value, name=None, dtype=None):
   with ops.control_dependencies(None):
     placeholder = graph_placeholder(
         dtype=dtype or value.dtype, shape=value.shape, name=name)
-  _copy_handle_data(value, placeholder)
+  custom_gradient.copy_handle_data(value, placeholder)
   return placeholder
 
 
-def _copy_handle_data(source_t, target_t):
-  """Copies HandleData for variant and resource type tensors if available.
-
-  The CppShapeInferenceResult::HandleData proto contains information about the
-  shapes and types of the element tensors of resource/variant type tensors.
-  We need to copy this across function boundaries, i.e., when capturing a
-  placeholder or when returning a function tensor as output. If we don't do this
-  the element tensors will have unknown shapes, e.g., if a TensorList variant
-  tensor is captured as a placeholder, elements popped from that list would have
-  unknown shape.
-
-  Args:
-    source_t: The tensor to copy HandleData from.
-    target_t: The tensor to copy HandleData to.
-  """
-  if (target_t.dtype == dtypes_module.resource or
-      target_t.dtype == dtypes_module.variant):
-    if isinstance(source_t, ops.EagerTensor):
-      handle_data = source_t._handle_data  # pylint: disable=protected-access
-    else:
-      handle_data = resource_variable_ops.get_resource_handle_data(source_t)
-    if handle_data is not None and handle_data.is_set:
-      # pylint: disable=protected-access
-      pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph,
-                                              target_t._as_tf_output(),
-                                              handle_data.SerializeToString())
-      # pylint: enable=protected-access
-      # Ensure that shapes and dtypes are propagated.
-      shapes, types = zip(*[(pair.shape, pair.dtype)
-                            for pair in handle_data.shape_and_type])
-      ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes]
-      shapes = [[d.size for d in s.dim]
-                if not s.unknown_rank else None for s in shapes]
-      pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
-          target_t._op._graph._c_graph,  # pylint: disable=protected-access
-          target_t._as_tf_output(),  # pylint: disable=protected-access
-          shapes, ranks, types)
-
-
 def _get_device_functions(ctx, graph):
   """Returns a tuple of device functions representing the device stack."""
   if ctx.executing_eagerly():
@@ -547,7 +509,7 @@ class _EagerDefinedFunction(object):
       for i, shape in enumerate(self._output_shapes):
         outputs[i].set_shape(shape)
       for i, func_graph_output in enumerate(self._func_graph_outputs):
-        _copy_handle_data(func_graph_output, outputs[i])
+        custom_gradient.copy_handle_data(func_graph_output, outputs[i])
       return outputs
 
 
@@ -658,7 +620,48 @@ class Function(object):
     if tape.should_record(tensor_inputs) or tape.should_record(captures):
       return self._backprop_call(args)
 
-    outputs = self._inference_function.call(ctx, args)
+    # Only need to override the gradient in graph mode and when we have outputs.
+    if context.executing_eagerly() or not self.outputs:
+      outputs = self._inference_function.call(ctx, args)
+    else:
+      name = "PartitionedCall-%s" % ops.uid()
+
+      @ops.RegisterGradient(name)
+      def grad_fn(op, *doutputs):  # pylint: disable=unused-variable
+        """Gradients of this function."""
+        if op.graph is not ops.get_default_graph():
+          # TODO(apassos) this will still emit SymbolicGradient ops when
+          # nested defuns are being differentiated. We need to somehow figure
+          # out a way to update the FunctionDef corresponding to the calling
+          # function when mutating a call to the forward pass.
+          return gradients_impl._SymGrad(op, list(doutputs))  # pylint: disable=protected-access
+        if self._backward_graph_function is None:
+          self._construct_backprop_function()
+        self._forward_function.add_to_graph(op.graph)
+        func = attr_value_pb2.AttrValue(
+            func=attr_value_pb2.NameAttrList(
+                name=self._forward_function.name))
+        # pylint: disable=protected-access
+        op._set_attr("f", func)
+        types = attr_value_pb2.AttrValue.ListValue(
+            type=self._forward_function._output_types)
+        op._set_attr("Tout", attr_value_pb2.AttrValue(list=types))
+        for i in range(
+            len(outputs), len(self._forward_function._output_types)):
+          t = ops.Tensor(op, i, self._forward_function._output_types[i])
+          t.set_shape(self._forward_function._output_shapes[i])
+          func_graph_output = self._forward_function._func_graph_outputs[i]
+          custom_gradient.copy_handle_data(func_graph_output, t)
+          op._outputs.append(t)
+        # pylint: enable=protected-access
+        side_outputs = op.outputs[len(outputs):]
+        return self._backward_graph_function(
+            *(list(doutputs) + list(side_outputs)))
+
+      with ops.get_default_graph().gradient_override_map(
+          {"PartitionedCall": name}):
+        outputs = self._inference_function.call(ctx, args)
+
     return self._build_call_outputs(outputs)
 
   @property
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 57e545be69..e46bde098b 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -286,7 +286,23 @@ class FunctionTest(test.TestCase):
       c = constant_op.constant([[2.]])
       f_c = f(c)
       g, = gradients_impl.gradients(f_c, c)
-      self.assertAllEqual(sess.run(g), [[1.0]])
+      self.assertAllEqual(sess.run(g).values, [[1.0]])
+
+  def testNoSymGradNestedDefun(self):
+
+    @function.defun
+    def outer():
+
+      @function.defun
+      def f(x):
+        return array_ops.gather_nd(x, [[0]])
+
+      c = constant_op.constant([[2.]])
+      f_c = f(c)
+      g, = gradients_impl.gradients(f_c, c)
+      self.assertTrue(isinstance(g, ops.IndexedSlices))
+
+    outer()
 
   def testNestedInputsGraphFunction(self):
     matmul = function.defun(math_ops.matmul)
diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py
index e85bba11cd..9955a9a2cd 100644
--- a/tensorflow/python/framework/op_def_library.py
+++ b/tensorflow/python/framework/op_def_library.py
@@ -482,7 +482,8 @@ class OpDefLibrary(object):
               else:
                 raise TypeError("%s that don't all match." % prefix)
             else:
-              raise TypeError("%s that are invalid." % prefix)
+              raise TypeError(
+                  "%s that are invalid. Tensors: %s" % (prefix, values))
 
           types = [x.dtype for x in values]
           inputs.extend(values)
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index ec875aae59..a424a0f219 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -153,6 +153,7 @@ class CondV2Test(test.TestCase):
         self.assertIn("foo_cond_1_false", ops.get_default_graph()._functions)
 
   def testDefunInCond(self):
+    self.skipTest("b/117293122")
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
 
diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py
index d7834ba350..bfe23834b7 100644
--- a/tensorflow/python/ops/custom_gradient.py
+++ b/tensorflow/python/ops/custom_gradient.py
@@ -18,9 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape as tape_lib
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
@@ -33,6 +35,45 @@ from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
 
+def copy_handle_data(source_t, target_t):
+  """Copies HandleData for variant and resource type tensors if available.
+
+  The CppShapeInferenceResult::HandleData proto contains information about the
+  shapes and types of the element tensors of resource/variant type tensors.
+  We need to copy this across function boundaries, i.e., when capturing a
+  placeholder or when returning a function tensor as output. If we don't do this
+  the element tensors will have unknown shapes, e.g., if a TensorList variant
+  tensor is captured as a placeholder, elements popped from that list would have
+  unknown shape.
+
+  Args:
+    source_t: The tensor to copy HandleData from.
+    target_t: The tensor to copy HandleData to.
+  """
+  if (target_t.dtype == dtypes.resource or
+      target_t.dtype == dtypes.variant):
+    if isinstance(source_t, ops.EagerTensor):
+      handle_data = source_t._handle_data  # pylint: disable=protected-access
+    else:
+      handle_data = resource_variable_ops.get_resource_handle_data(source_t)
+    if handle_data is not None and handle_data.is_set:
+      # pylint: disable=protected-access
+      pywrap_tensorflow.SetHandleShapeAndType(target_t.graph._c_graph,
+                                              target_t._as_tf_output(),
+                                              handle_data.SerializeToString())
+      # pylint: enable=protected-access
+      # Ensure that shapes and dtypes are propagated.
+      shapes, types = zip(*[(pair.shape, pair.dtype)
+                            for pair in handle_data.shape_and_type])
+      ranks = [len(s.dim) if not s.unknown_rank else -1 for s in shapes]
+      shapes = [[d.size for d in s.dim]
+                if not s.unknown_rank else None for s in shapes]
+      pywrap_tensorflow.TF_GraphSetOutputHandleShapesAndTypes_wrapper(
+          target_t._op._graph._c_graph,  # pylint: disable=protected-access
+          target_t._as_tf_output(),  # pylint: disable=protected-access
+          shapes, ranks, types)
+
+
 @tf_export("custom_gradient")
 def custom_gradient(f):
   """Decorator to define a function with a custom gradient.
@@ -180,8 +221,11 @@ def _graph_mode_decorator(f, *args, **kwargs):
     input_grads = nest.flatten(input_grads)
     return ([None] * len(flat_result)) + input_grads + variable_grads
 
+  original_tensors = all_tensors
   with ops.get_default_graph().gradient_override_map({"IdentityN": name}):
     all_tensors = array_ops.identity_n(all_tensors)
+  for ot, t in zip(original_tensors, all_tensors):
+    copy_handle_data(ot, t)
   return nest.pack_sequence_as(
       structure=result, flat_sequence=all_tensors[:len(flat_result)])
 
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index aac95037dc..6909fcaed5 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -800,23 +800,21 @@ def _GradientsHelper(ys,
         # pylint: enable=protected-access
         has_out_grads = any(isinstance(g, ops.Tensor) or g for g in out_grads)
         if has_out_grads and (op not in stop_ops):
-          if is_func_call:
-            if is_partitioned_call:
-              func_call = src_graph._get_function(  # pylint: disable=protected-access
-                  compat.as_bytes(op.get_attr("f").name))
+          try:
+            grad_fn = ops.get_gradient_function(op)
+          except LookupError:
+            if is_func_call:
+              if is_partitioned_call:
+                func_call = src_graph._get_function(  # pylint: disable=protected-access
+                    compat.as_bytes(op.get_attr("f").name))
+              else:
+                func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
+              # Note that __defun is not set if the graph is
+              # imported. If it's set, we prefer to access the original
+              # defun.
+              func_call = getattr(op, "__defun", func_call)
+              grad_fn = func_call.python_grad_func
             else:
-              func_call = src_graph._get_function(op.type)  # pylint: disable=protected-access
-            # Note that __defun is not set if the graph is
-            # imported. If it's set, we prefer to access the original
-            # defun.
-            func_call = getattr(op, "__defun", func_call)
-            grad_fn = func_call.python_grad_func
-          else:
-            # A grad_fn must be defined, either as a function or as None
-            # for ops that do not have gradients.
-            try:
-              grad_fn = ops.get_gradient_function(op)
-            except LookupError:
               raise LookupError(
                   "No gradient defined for operation '%s' (op type: %s)" %
                   (op.name, op.type))
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 8e88a84d60..0419656143 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -37,6 +37,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl as cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import list_ops
@@ -580,7 +581,7 @@ def _check_shapes_compat(output_tensors, shape_invariants, input_tensors):
 
 def _copy_handle_data(src_tensors, tgt_tensors):
   for src_t, tgt_t in zip(src_tensors, tgt_tensors):
-    function._copy_handle_data(src_t, tgt_t)
+    custom_gradient.copy_handle_data(src_t, tgt_t)
 
 
 # TODO(srbs): Move to common utils for cond_v2 and while_v2.
-- 
GitLab


From 13b47e6c4f9d7b295948b1057139bf676e394b6f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 14:16:55 -0700
Subject: [PATCH 0534/1085] Automated rollback of commit
 295b3c80555cc82d8d70faf96a47681e1d904b9c

PiperOrigin-RevId: 216247929
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 ---
 .../kernels/data/map_and_batch_dataset_op.cc  |  9 ++++---
 .../core/kernels/data/model_dataset_op.cc     | 10 ++++---
 .../data/parallel_interleave_dataset_op.cc    | 27 +++++++++++--------
 .../kernels/data/parallel_map_iterator.cc     |  9 ++++---
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 ++++---
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..8acd6cc724 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,10 +16,8 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
-#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
-#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -27,13 +25,11 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f45a239793..0fb721cd7c 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -445,9 +445,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              std::bind(&Iterator::RunnerThread, this, ctx_copy));
         }
       }
 
@@ -703,7 +704,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 9aa505f4f1..859df57962 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -126,9 +127,10 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_.reset(ctx->env()->StartThread(
-              {}, "optimize_thread",
-              [this, new_ctx]() { OptimizeThread(new_ctx); }));
+          optimize_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
+          optimize_thread_->Schedule(
+              [this, new_ctx]() { OptimizeThread(new_ctx); });
         }
         return Status::OK();
       }
@@ -167,7 +169,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 6b6b3d6ab9..9c836b836e 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -481,9 +482,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
           }
         }
         return Status::OK();
@@ -580,9 +582,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(ctx->env()->StartThread(
-                {}, "worker_thread",
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
+            worker_threads_.emplace_back(
+                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
+            worker_threads_.back()->Schedule(
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1047,7 +1050,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
+          GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1389,9 +1393,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_.reset(ctx->env()->StartThread(
-              {}, "runner_thread",
-              [this, new_ctx]() { RunnerThread(new_ctx); }));
+          runner_thread_ =
+              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+          runner_thread_->Schedule(
+              [this, new_ctx]() { RunnerThread(new_ctx); });
         }
       }
 
@@ -1645,7 +1650,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ebf41925c9..e69274e4f2 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -181,9 +181,10 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-      runner_thread_.reset(ctx->env()->StartThread(
-          {}, "runner_thread",
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
+      runner_thread_ =
+          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
+      runner_thread_->Schedule(
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
     }
   }
 
@@ -331,7 +332,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index 754ed772db..e9c38eb8a0 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -256,10 +257,11 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
+        prefetch_thread_ =
+            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_.reset(ctx->env()->StartThread(
-            {}, "prefetch_thread",
-            [this, new_ctx]() { PrefetchThread(new_ctx); }));
+        prefetch_thread_->Schedule(
+            [this, new_ctx]() { PrefetchThread(new_ctx); });
       }
       return Status::OK();
     }
@@ -363,7 +365,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 3f76695bb1..7bb2077b62 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        thread_pool_(new thread::ThreadPool(
-            ctx->env(), ThreadOptions(),
-            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
-            1 /* num_threads */, false /* low_latency_hint */)) {}
+        background_worker_(
+            ctx->env(),
+            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
+  }
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    thread_pool_->Schedule([this, ctx, done]() {
+    background_worker_.Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
+  BackgroundWorker background_worker_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From 09b0fc199129e0f487a39741bdf674cf09035cbc Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 14:17:24 -0700
Subject: [PATCH 0535/1085] [tf.data] Choose non-deterministic seed once per
 Python-level `Dataset` object.

This changes the behavior of randomness-introducing datasets (`tf.data.Dataset.shuffle()`, `tf.data.experimental.shuffle_and_repeat()`, and `tf.data.experimental.RandomDataset`). Previously, when you used the same `tf.data.Dataset` object multiple times in a pipeline (e.g. by zipping two datasets derived from the same randomness-introducing dataset) *and* you did not specify an explicit `seed`, the implementation would choose different non-deterministic seeds for each use of the `Dataset` object.

With this change, the seed will be chosen once per `Dataset` (technically, once per `Dataset`-`Graph` combination, due to the vagaries of capturing state in `Dataset.make_one_shot_iterator()`), which means that all uses of the same dataset object will observe the same sequence of values.

This change also revealed a small bug in how `Dataset.shuffle(..., reshuffle_each_iteration=False)` is serialized when an explicit seed is specified. The op-level seed was dropped, which could lead to non-deterministic behavior. This change fixes that issue by forwarding the op-level seed to the appropriate place.

PiperOrigin-RevId: 216248013
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 .../data/experimental/kernel_tests/BUILD      | 13 ++++++
 .../kernel_tests/random_dataset_test.py       | 45 +++++++++++++++++++
 .../kernel_tests/shuffle_and_repeat_test.py   | 21 ++++++++-
 .../data/experimental/ops/random_ops.py       | 21 +++++++--
 .../data/experimental/ops/shuffle_ops.py      | 21 +++++++--
 tensorflow/python/data/kernel_tests/BUILD     |  1 +
 .../kernel_tests/shuffle_dataset_op_test.py   | 25 ++++++++++-
 tensorflow/python/data/ops/dataset_ops.py     | 22 +++++++--
 tensorflow/python/data/util/BUILD             |  1 +
 tensorflow/python/data/util/random_seed.py    |  5 ++-
 .../python/data/util/random_seed_test.py      | 13 +++++-
 12 files changed, 174 insertions(+), 16 deletions(-)
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 66466d6a36..9f54c381a9 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed) {}
+          seed2_(seed2) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 4eef9580ad..a67f6ff031 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -453,6 +453,18 @@ cuda_py_test(
     tags = ["no_windows_gpu"],
 )
 
+py_test(
+    name = "random_dataset_test",
+    srcs = ["random_dataset_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python/data/experimental/ops:random_ops",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_library(
     name = "reader_dataset_ops_test_base",
     testonly = 1,
@@ -562,6 +574,7 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
new file mode 100644
index 0000000000..d403a575ec
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
@@ -0,0 +1,45 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.RandomDataset()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.data.experimental.ops import random_ops
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+
+
+class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("NoSeed", None),
+      ("WithSeed", 42),
+  )
+  def testZipRandomDataset(self, seed):
+    dataset = random_ops.RandomDataset(seed=seed).take(30)
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index c208963a86..883169495f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import shuffle_ops
@@ -27,7 +28,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class ShuffleAndRepeatTest(test_base.DatasetTestBase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
@@ -110,6 +111,24 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
       with self.session(graph=g) as sess:
         sess.run(get_next_op)
 
+  @parameterized.named_parameters(
+      ("NoSeed", None),
+      ("WithSeed", 42),
+  )
+  def testShuffleAndRepeatAndZipDataset(self, seed):
+    dataset = dataset_ops.Dataset.range(10).apply(
+        shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed))
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
index e3a2aeab31..25d7fbf691 100644
--- a/tensorflow/python/data/experimental/ops/random_ops.py
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -33,13 +33,26 @@ class RandomDataset(dataset_ops.DatasetSource):
   def __init__(self, seed=None):
     """A `Dataset` of pseudorandom values."""
     super(RandomDataset, self).__init__()
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     return gen_dataset_ops.random_dataset(
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
+        seed=seed, seed2=seed2, **dataset_ops.flat_structure(self))
 
   @property
   def output_classes(self):
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
index a4307212da..a82e4b7d09 100644
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -39,17 +39,32 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
     else:
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     # pylint: disable=protected-access
     input_resource = self._input_dataset._as_variant_tensor()
     return gen_dataset_ops.shuffle_and_repeat_dataset(
         input_resource,
         buffer_size=self._buffer_size,
         count=self._count,
-        seed=self._seed,
-        seed2=self._seed2,
+        seed=seed,
+        seed2=seed2,
         **dataset_ops.flat_structure(self))
     # pylint: enable=protected-access
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..ecb24103b3 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,6 +443,7 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 347af18576..6001721726 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import collections
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -31,7 +32,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase):
+class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testShuffleDataset(self):
     components = (
@@ -209,5 +210,27 @@ class ShuffleDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
+  @parameterized.named_parameters(
+      ("ReshuffleEachIterationNoSeed", None, True),
+      ("ReshuffleEachIterationWithSeed", 42, True),
+      ("NoReshuffleEachIterationNoSeed", None, False),
+      ("NoReshuffleEachIterationWithSeed", 42, False),
+  )
+  def testShuffleAndZipDataset(self, seed, reshuffle):
+    dataset = (dataset_ops.Dataset.range(10)
+               .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle)
+               .repeat(3))
+    dataset = dataset_ops.Dataset.zip((dataset, dataset))
+    iterator = dataset.make_one_shot_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      for _ in range(30):
+        x, y = sess.run(next_element)
+        self.assertEqual(x, y)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index b7e19055f2..2d036fd0d6 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2254,18 +2254,34 @@ class ShuffleDataset(UnaryDataset):
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-    self._seed, self._seed2 = random_seed.get_seed(seed)
+
+    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
+    # is iterated over, and cache it in `self._graph_seed_map`. This supports
+    # two features: iterating over the same `ShuffleDataset` twice in the same
+    # pipeline and observing the same order (by tying the seeds together with
+    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
+    # which requires the stateful RNG op to be created inside the same graph as
+    # the dataset.
+    self._original_seed = seed
+    self._graph_seed_map = {}
+
     if reshuffle_each_iteration is None:
       self._reshuffle_each_iteration = True
     else:
       self._reshuffle_each_iteration = reshuffle_each_iteration
 
   def _as_variant_tensor(self):
+    try:
+      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
+    except KeyError:
+      seed, seed2 = random_seed.get_seed(self._original_seed)
+      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
+
     return gen_dataset_ops.shuffle_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
-        seed=self._seed,
-        seed2=self._seed2,
+        seed=seed,
+        seed2=seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
         **flat_structure(self))
 
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 39082ce370..95bf3209d7 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -142,6 +142,7 @@ py_test(
         ":random_seed",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:random_ops",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py
index d5169f7a53..d24df6d957 100644
--- a/tensorflow/python/data/util/random_seed.py
+++ b/tensorflow/python/data/util/random_seed.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 
 
 def get_seed(seed):
@@ -37,7 +38,7 @@ def get_seed(seed):
 
   Returns:
     A tuple of two `tf.int64` scalar tensors that should be used for the local
-    seed of the calling dataset.
+    seeds of the calling dataset.
   """
   seed, seed2 = random_seed.get_seed(seed)
   if seed is None:
@@ -45,7 +46,7 @@ def get_seed(seed):
   else:
     seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
   if seed2 is None:
-    seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
+    seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64)
   else:
     with ops.name_scope("seed2") as scope:
       seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py
index a809151e6e..5df2e38c62 100644
--- a/tensorflow/python/data/util/random_seed_test.py
+++ b/tensorflow/python/data/util/random_seed_test.py
@@ -41,7 +41,6 @@ class RandomSeedTest(test.TestCase):
         # (input_graph_seed, input_op_seed)
         # and output from get_seed:
         # (output_graph_seed, output_op_seed)
-        ((None, None), (0, 0)),
         ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)),
         ((1, 1), (1, 1)),
         ((0, 0), (0, 2**31 - 1)),  # Avoid nondeterministic (0, 0) output
@@ -78,6 +77,18 @@ class RandomSeedTest(test.TestCase):
       self.assertEqual((g_seed, op_seed), toutput, msg=msg)
       random_seed.set_random_seed(None)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNondeterministicRandomSeed(self):
+    random_seed.set_random_seed(None)
+    op_seeds = []
+    for _ in range(50):
+      g_seed, op_seed = data_random_seed.get_seed(None)
+      g_seed = self.evaluate(g_seed)
+      op_seed = self.evaluate(op_seed)
+      self.assertEqual(0, g_seed)
+      self.assertNotEqual(0, op_seed)
+      op_seeds.append(op_seed)
+    self.assertGreater(len(set(op_seeds)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From bc5635dc3ac78007caee88fabd81d23ad945b637 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Mon, 8 Oct 2018 14:19:49 -0700
Subject: [PATCH 0536/1085] Update performance documentation.

PiperOrigin-RevId: 216248418
---
 .../performance/model_size_vs_accuracy.png    | Bin 0 -> 18946 bytes
 .../performance/model_size_vs_latency.png     | Bin 0 -> 21380 bytes
 tensorflow/contrib/lite/g3doc/performance.md  |  21 ++++++++++++------
 3 files changed, 14 insertions(+), 7 deletions(-)
 create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png

diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png
new file mode 100644
index 0000000000000000000000000000000000000000..44d0ccd3128dea1c947e57ccbc4e18b2d34cef88
GIT binary patch
literal 18946
zcmeAS@N?(olHy`uVBq!ia0y~yU`l6TV0gyC#=yWZ;h>T$0|Ns~v6E*A2L}g74M$1`
z0|NtRfk$L90|U1Z2s2)~Tla^7fx)uGHKHUqKdq!Zu_%=xH?gE3C%+^oGfAN=wWv5V
zKTp9(&q&W$M<Ju6q`*pFAE7`mzbIW_S;U2vfkA=6)5S5QV$Pepl`)}LfBpD)J~4=e
zaj%7fmR}3AOg~45vBmV@S;BqhzQOy`?#_NT;n@tSn^P}cG7&XpJ+o)gGE3EE&txnf
zwlpz62=#CjNV&D&{>R2{3l6>mg7dz=KI;8FuKNAgIqk|e>$ZNkl5!0cX%TSZP;7BD
z(0a)LW*(Wp#6lP(WS|f<hf}ddz)3<t^Bh=;r$ZziBqPA)(q#;i;b2xeI#a-D{fZSD
zCQ`hTl9H@)uiwA-pKX>qZ}t3*$;XZI@9lYR70#jPrL1|bMT7O#m6gpiXG#_p7Vi9J
zo_&p{*UdR3B;+K^xBqM7_RebKl@8jN<hm+!^)c!EJqH)N_cKInOky=oKc{o?$2>I^
z6_#GN!|C&D!!9l4P&~x}GEw!_wYA<34h_3%|NW_yv#*P35=^x&c_DClxxc!CLW6t1
zT;a=0s%(5R5tkB0Z|y35y&`6((d+B$^}p@oHqO#)(WqTwA-rgcgjr6+I;Nz%yGnWF
z?QE_aa&mI=$lJ})3SIT!!Gi;TettfA>J*cLvGHWj$!dw&+1`7rzW#W*eEz37)>&6H
z-n@CUVf*&<V?B~7X=!2-*H;EF&$zis_3SKD<^@GhPxVL`Hg$<=GkrK;|8KISaT>#g
z&FTI+y1G~Om6etM{Qb+gqvog4T<h{>MNdyzSXyqZ{G7J;FDonS!)MQ?W#6Bq>iy#C
zYH^0$Wp5W1K0X$*CPJ{fx>`&>ZchH2lP6E++}zZ<e*eE&+1J+<{`&GV=B<05%)-#s
zVK;8v2v{AaYkF<U6cM{WADm@mWHxNuw#;|7*^(to1cZc~&dxF|j$Q0G*Xqsv{qhYr
zH>Y1-A8+5rC)+i3s_4Ie|K`|KPMW40os^v1Ja?|Fu$qs8uCA}`|3^o=v*JMU)5OZX
zqx!p^s8)!Aon4&myYKhwmwQdsTC#lk^WRmi0&R|6#)@8?{{H?#!ooXCUJA{eIg^Er
z&8XzXg_z@ezTc}>QBf&)e2kY**6PTwudgF_m+3OB4qKb_{M_7@wzi^QUoyAl+?@3K
zPW`{n^JmPMAs{H|n317T{r%n3-S78l8>|!4joOfYUM}tItfLnfyH|dC;<@kmyBiyu
zmn>14G-=Y0hwbt!;`Um}T9>I<TFyK-*V?$^L&CeeyWJBK6doV%SGTdTId59^?aj%l
z+TlCO-p<;Ue|ecNkDQG}L*Qb!k8ii%|McRbvR=#%hju>MPrd1{udR)Uin_GFzJ6EP
z+f%zrUtifa+dTi%uh;88->ZIK`2Sz+p+ko*yewhxnPt*xoPO@Z?fm^8kIUCTc>TJ2
zV&KENJ8bOipx~9Wt2r@2(b>SvEbZhZ)gwob7QVS*$R}fQ!2Es<^MS?g{V%Sp6u!Hw
zR9Q`}t)qj(s_c!&*H>3Re)@DO^g!N?4ULY?Y>8P}Q7bH31XMi)o%Vc^k&&5F`}>>b
z=Vxbwm-)?I_2%B*=$BVkPM$Px-Z}=al$0ftCQe*9W7aG!as9ZE^U)iVS}*OZt#%C*
zIePncbmYF8owv57pO<3|xODmQ)ZOLpwbIhkPCY)}fAyR1Y%|TcxVWxG8mDe;&7OK~
zZS-^L`|DzNPm!~&(ztf*T32gp>!<bgf4xthI1wP?8tB#|v9RXP$KyNu_7^?v3R>zV
z`t-xY!(EFsyb=-?Tw3NkJLY(gq_Nvf+v;ui{<ZN)E;=*Q*ga6hb!XMrRY{qdD^=9h
zwYwH+Y|XkF6t*T}p^A#illd3^uTMVSck1{1{p<I|MMp=g>gxIiinvB@NND`@Y<B)L
zOLb%8V9oe_dxXTqrY(Q_;9zst-o3T!YAx?uy9Q2)+?=MFdwW}LmXG%|oxtCBw`5LU
zbou2MIdKVz6<e~ddIgHOg516;boH`17KM*iUKewcV3V{IUL+!EmLoCSEcej;`}XIj
zq{YO@ynOl6!op(3Zy6bxj=nxN1vRy{sI6H~vrZR&e&&1W^5u=2Hy=KBj7?fv`d;n#
z*mrk#hp*TXSO2%P?9GjX^7Vfl3knSW{rmT`Z<nfy%7xwK`OnVJUvI})_4%2v-yDmD
z-*S_a4<GB5-dX*9-MwrcIh%;R|MM;{^WF3F+3b)N0gMgu^?xSD?k;=y;DN*MZ*MKj
z-psg{eP>_o>}Ga;qm&Z@3{~IX`DSIUvbtOS`&;g->+9z~-?L+fMNn|?*KMN5;`g&N
zgs+cV>fSFGvOZ2$TU&dX|NM1!KPJZWSbl74Zx@!7Jh`v-_l9lTjvYSycxRH3lZZa3
zTy>R^ky#PFT@O_3<lo=7^Hxr7uA!;v(`RRAzuFhQJufjopTA-5T-ojU_s?COwm58U
z)D-RTz8yO(KxzDNJHMxo&yz<-yU&ZvpEyxa*1Bwob30$mjsivh>aZ}eW_JE#w{Ooj
zOg{GH)&APw-?HxW%iF1FYIZ(7J^l6D<;Bm>y$VToaA>ev%K6~kJHLVggYWO|K0a%H
zKjlCJW7YbNiHF%XZQAtW=4SP)tHYPa?X6m}Y}u;d*;})&KKk|bb>+FuTemK)`T42p
zwvLWYk?oR+GiRRs`}=#b?Ba_VMY;Xb=6!9v(nW7?nF<LDM{Y=9>{_(K@Xn4x=lnlU
zPft%j(7;&p|L^yl8ygxE54XLv_2Wpa0M`+bd#g<M|M|o%CMLG?o3ymFal!!xyMI5D
z<!q~toSkj{^L+ik#rLM0B^+Sb_v@ARyE{7%H?#9cY{{5dwQlZQ+1Onr4-fO(s~8$i
zT(d@JUHtxY3!U4!deki}W*j)+AZJt2ut)<`=sLIaJ$(6c=JOl<_Wx!)d6Kg4&!^Kd
zzpKBzICyn+_{|N8%r9TQT(W$5@vAGEda=8fZ1)ci4*vE1{rM*+C;xohZ~x-XPGg45
z%*>oSJ0`ZYu=x1;Zrrr#)c3dzrLV6UW?$1ebm-8P^>YHZ|M*eS#v^&?-{0Ri_f#57
zN=hC%eq2~gtZm{%L1p*8C-?vVGdDCeOi4>CdUL~Yo=xSU?Dcz>_3c}_e7SM`zdy_G
zHM8IRJ~c8jGBGpr<kQpBi$#~%R((-0GZVA7w+AU-?A~ut_^9QUUCobzKY#z;+*i9>
zB;9F`6F4MS#_zA2w_LPXCw7+zH#hgItE=5-o8?N_R2V3`_pu~=d2zAi?X9WHd}lv;
z^=el7lia_*zAjn1^x}@fWUbIuF8k~LcJ%h@8qD*Ws<koes+Nqb?8Z%-l3rX`=rhkI
za<1N=)eQ{{ee(8xOTDIEQ8zQsyW?@ROZ4XMa{b1}#w#lVn_pdBZJc^)%5pKYW_JFx
z|Ns7ed4FGCSXj99e;cp#is0pZVPRowA~rVlN}E5k{Ql$P<2!pQC;$Jw|Nr8uudiN(
zuaDcCabW=?BO~LLb+NN!cb9!UDjpxOvnaL1N;ducJl2GdkB;uxu>+KHpPrgJ(<qfI
zGBPqEI{Ncj^ZOspS-(%n&+l(-b)C1|{D0oZM@MsRZRzY1)i$d7@*?K+gpf566Kj8e
z+n9WuZ&mpEzQv1`+4<!{Zbxs=>zz1JaNW9fJD06&6=?Ha{G_L!Td_r>)T-pgfd>bh
zE1%CT_p%aEbW!vyRBI7fdSzwsa!^{>o_{~=`cz>j3A>rsM4UQ;GLCczmb|*cX>V_@
zd8Vb}=!p|E!OcI$i4&0<bR3E;j1wREa45D29BS|Yndrok=s-lf@W_$}&R+A@uGKAl
zbw%@~0gqErk&&2~n5t~s!HIibs8^nx|HRGyWOv;p@xLnizg%5i1;xd`-+8`Y=xF3z
zt5S>FUnQRHi2_beP9Hvf(t2sY)Az*H{-pOm74u)7abG5bOt^Ud?Af=i=cZ@+f*PPA
zCoWyubmLdlh6KlTZ^PC`iF)rBR`=7nc73KixXs#Oq@(Sf7@c8aJ6-Nz?mxRKJ=;Uu
zE@W)l<n?ax_WyBn{(ifi?|Sd~%gf6vKRj@p_&rzx(&kMK37dB4#{2UIfA>GPl%MMm
z6D0L~vG9y-Uu}~2+b`bkaC+vcH`mw8A8zMQKQTd3!|Pk&a=*EYLRW{G<lYkD=jWHR
zt#V0AQ=6>rf9&pE*|lM(x8JW@9k)DocUj5%d$Q{4>c{$Iy~D!9f|vWLs;aWi^*cU6
z(Ye9i{jG>=;-@DkEv&5{KYHX85D+kLhLl;(4DDqy)Ai$z9X{-Qx38<~(9xr;4!swz
zU;nO}{^O^}OWSnsX*v&gzu)J)_~L^{j}G11n%&vkn|fh^<B{XXFYl==Hp{uuFmq;P
zU>hjNIBd7g?|tauBGZ4_sQ9hF=lLfewpx_U>0Y?~oZb1#g$ozP?5UWzI(+@7kH_VU
zzq|;vv9Xb`C{S>8blkCf_v9HfI(F`~Tphmt*sWW$f|vV!d~#AaYI~mVbiG&xi^@+b
z4<A0Xu&}tWHrjk!-rZA+T)QK-<wzzbCRYBsyQ@??F)?wDZS}MrJ1p+py_<P`UF^CU
zSyxs#zP`5Bvg(UQ?yW6>=W}mtP|VECba8VF+MXBN)YJsBoZ<8|-NoVS<8Iu%xp&K*
zeH}{Dmfd}Q&sMMB_vp*Z%P~6&8qb{ZVPa+m<+lI7uJ7;Y=vWZ6)N4)L-d!u4KneUv
z@ESXznad4MPn<OGWO=*t*KL&*6#<*me0zF&F1##padj<xdP?-on>PjJ<?IJeo#GM}
z7XJ3`?&)7&UtfG#vL)kU(~ln&>tc5=iv!g+64KI-A3r`kQQ3W-)cc9b?jJsWJb2^A
zjI67xF7B(X_M2-pHQ%S+JmCOC=@#AP7Tukl58LJI9_;;oPdnsXm-9rw?6j1W36mx{
zxw*0R%iAA2cdjqK?q_S=-(QRdNk=%g<=zgPq6o@+O7a^yHP2s-TNh*b_5HD9$5K*K
zo>cR?21e{E(M(KCY-w#3l$2cAHz(6%R?P8#wZ9`p)Rr|evu~>WoVG4*Z<pKRgBLGO
zyl^2vNl6J*WR<?U^6=y1<0csw9_)NvA=$OYV)ORx>6eyxURf6_ZEbD6X6@R{dwVR+
z%*<xYNIAdk+M3A8FJ8P@#N`Nbi+^Hz?mwru^-rHXammS9V>RjPi;K!eMn*4gZPh+K
zU0>bSR<>bv`1(n6=J2HWXS=z%O`1B@H6UPu6>nx%)+)LG;kmcBscL9+oH^t3?*9Js
zpiawcyG>13*2l{yCnrBUJA1ibX4(6Db9a}&7ZMe94G9qebuN^Y7G1cm!{u6b{$?{f
zKL;P5nvs!^w6rv+$~$lWKjy{f@O3dKPfgWk+)@0TPf=0PRpjMrRZUILqJ{0qMThp<
zO3AAVb6?b_Oj`P6@5lQdix;LX75Q44o|YygB;*tkA@T9!$Cj3s2~(!Hl$4lEn>Ovw
zx7+zOUoN_Xx^L?K^8(hzSnm7rh+E1eqoK8x)z8mQqpPX6m-q9tv&u?JP1ob9dFAEh
zlai7yyetV>71B9jg21lQ*KW;hyagpCDaU#wv+hsT3SAVm)XOB}0z<Fc;SNFN9W_6T
z*3DorRja*kY;JBIwkE>S!J*;aw>kT~Cb|9m{g_ApB)|Qi1Ly7kuelezt?qACpRDyU
zkjUNL<%Wia4{vNtJ~PjDwr%w{o3BxMd3l>S-u_qT{`XS7@}mEzN%kjuLB*_Bc3jcC
zYc@5yeN(>Jzqd6uH(j=Nv95NT+3^z^|5qLpKk4n^!O<<QpZ5IR-1DXT6ss@pT6$uc
zY4-JXXFonZUis&TVb`L92L~9X%=3D-Z#R$Lmcy8EcUS3`cXxNgvX+EoW`yAu%kFzc
z2an#9iuN{+d9yb1%QWxC)|nbsu4Zd*E^_7m_4Rf8>ebq6YHB9g*Ln<-k6qYXUCzcU
zb)-kqIOEzH$;EEHTB`&=qYQxz%F4WZOTSf3KcnFl7Z<m*$mCP1#--A8&(6#|Jjb%Q
z=<Vw8^>XU|^K?$-wSw|?T;wNCMr^|pWY=P-qZ{-!W`dITxpgr+oBWms?_aZK&4<sQ
zm34G@{Qdn)tYq`@^5)o7ZUTk-l)`D#rX9U@y@{2(D0h3-)m0w9rA@O$d}o`vuD*I{
zeZ2joNt3#^*ekkx)ZFyw%*m6T8#WjmI&{dQ_!*C=R>%VX`F5bheP)(v_l6Ay&(6&~
z?Ji#%5~9eV=(Tc<PHXPMi(g`@ShoNC{bJ`Aj@wR)Po4_$(pk1&JbnM)Z_yLKZBIEV
zbZ(yQ>7B*TSH$i%J3UQT`_jJ_fuqYKmYMR)SUCLs^|i2|z+v&l2{UGJI5;^mnPy$l
z=o4rWSn7hmUghTGJb3zacirD#7dIq2xA90eElQCue|1GOYHOD3<z>DfKYVy_aj|<!
zTAESir<6N)?_SK>TJ!tu_E)91wr0D3e|NXAs3_^pjg2X(sfMMmLV|*VF1#$^ku+lY
zQGCTOP(;mtUeD&u#(H{sEUc`e62X0^r|Um{_;6vKyQ}NNM~|2qrcRw&^>@k?5fiDi
zr+qdHEMl1{;;!D<*!bhe4~7TNpSxdQ7dz7=Qz$Yr()HhdZ$+1voBrnTxGO!Xm@{o=
zcD+}XTKoE=8z0B;>@EFPd5)Wx_vxdf-Jmw-wYAa34-c_MZO=P<ppluAi|f$cyK|2m
zaXEALET~^`?b@|DR;69){_`&ED$U+i@{;Mm$;s-R{QSoc9&F6JyK7_V>o76hs3V(F
zPjhf{pT7M5+S+JNZtll#Zf-tu^ytHP@BDU`z5Q~m<jae|TU#<0&%3=L(fQJ)OC3Eu
zOY-jSVqs-9EO_9s_3zv5_tUPgi#5r+Gedvhk4H<NgmVOb>e^%B?d_fX``cTcm>mKQ
zPft(ZoN`j=Ue#+|v;2E!Rs=4-vA@24-sdp1a#r@0wj!*Y-G6Du^c~fnJ;kTaEH`cb
z9q!CoS;5<I=8TV%lM@Fw_u-2dCw6pjC@Cqeh}x<Z7#IlZV1=v*XngYINlfLf9fiu~
zd3O%nzc1hL<Vni5oSRK6S8DqC`Q6!7+8w<;@8X(BV<u+ija#>d{ysHvv#f-K$L6%N
z7BxQ%Cd3x3joi$}&E0Kd`1ao3**!fyG3!s9J=@#LEpAltAt7vi+*$ScHH*F(CmrEv
z<C8rV9$$NOj%9Jh->=u>E%q<{X#3g71JtY(=~dD{SN8s1@9NdsS678Hg9^BspHHVV
ze0hI=e)+vh_xF{_$9fXa%rJEL#;>lgzkU(l*QpbXMU{#s?|Bg}_1P%%pWUNLmp<ig
z-)i#s3xhsGPjBzl>-+2f*L{6&**b0aZtH8;u2mMLWM`lL^78V_TU)c&{Z&*{ym8~k
zisScp6gGePQnF{yo}#Z`ukUMaZqB^1Vd1T9RbR7WVq!LI+H^{I^5VtG^CGgYu5$hV
z?=L9%iE4+XytuHiGH&zAm74qZ?W_5AGyUhYr5n5+S3gUg0BR*`O1ajZkA(E00~Wi1
z2HIE-=<om2w0LoHV7o$#hU=R(XX97=T|Z~eoQ~e!r&m@6GuTvp>FDX<0gZ8edV1Q^
z-=F=#tE;P3)YX}9UY<2^;>48HR6!AuC4SDcX3bi4{D0`}ZMmFWTuR2qlYf7I|M~U$
z{m*vwuTl8=`ug!3H)d3Rezr1rR@Wi{LBWOpYJY!wYm#$g!-eZ2P7>Rcbk9wiJo)DK
zeEHhn-$K8yjoRA9Z~te)GT+%owZBSAUj^AnM=CLF%e{T<&d%a)lWF_8xVe+BuZw;4
z?c3Yiky|n@a>rjkeY*Sda)0IvD}&YLY$`q+x~_B9ZR^VJohvuqGb;{GzLy!j{lax4
zV`F6vjgIg4s{5Du&c3v>I6Wvh7}QhBxU<7>-MV!a*4Cfj?S8-Na__r$?|fz$G`jUl
z1#Zua{q^M~b4vfW?5$B>8yjS5e|>RWef83o%wVgM7Y*yz>t9_RUcc<RO~C_)egFSe
zv++nUq==^|yY(D6dX!aMTs$c``R9|#{_|eWa^g6=H1OZQiV2KG#l^w(l~c<iBe`y$
zKepkQhss7y%^Q)kjaT``R++ACoH)@<)@j?8Ejrsud%HI4Hyk<Q^3`)o{r}(hL)J!3
zT^YRmndQ3B)nPgJ_sK>_-%gT@`u_d<^Di$i>(`!S(%07)(~s+!JzM(qwY87W&9w#%
z#OXi1wY2b*;`7Y2hp#{K@cjSzo^f$-@-3@do02k$dwRI)iau5r2@44wdUUk==FZ~h
zo7?`L(&&9ZbGqZFMf0Dy?iXJh{Y03Fr$a|D;D;*Hy12bwJByxf*t(Tf!OU#hG~MV&
zj~*TBku<&{;q2t};^N}=s(US*!dGVRd46tg_wL=+=gyr2jra9lJbk)bGkDp9hYt^C
zuity@;o){qZ|}#G{p}PrG&tt^xd#LY{QLLMXNCcz!u6)p&z?P#u`20!@+8HrPo}fI
zo&D@AQ)LSaiH0dtMA&$xTo$|azPP-cUsF>vt7dQ2S0x372buYkjn1F&INY`HY~+_|
z+<M=x-H4j?Fe(4+qU|wUu`@UoTNJgYEj4j@D3aF6kZt??Wwp%x!-p4#=nEEnc@Y>B
z8+-L_>gj2Qd3P+@_~p-CkFSqiQR6?uz;P~@lv$31b@{s`>F4J$q@<)2e0t(p_VyO2
z__(pZ-oE(xxwEE4(}JHcX4RFwxzV_Oz5d#$tx4C`M3%g`&}f={?ZbzM&NenSpb_2s
zb-#5*wL%_TTIxN=x?J!2>lGm@h5YB)DB9W0tN-&@zO_m=b>hT{ZTGCI_j;#^sOszM
zOR5SuiC9Wn8Y`^scX_6HrfA8M^iOvmdhE@Wlasr$CQ?{gS-HeYHZU+SW@izrwYBw~
zyLTJkT-2bT*YfpPR;}URyA6!YS1xYbFP@%#eVwYBTAN$1)XL+3LUVF-LRW<ZZr6|5
zvB0jvDCfq8<;UheesHjPOU6YeadGjwpHId0;`XfA_VkYW%{`UHdpEu=etXMQ&41pR
zXJ=;{nwqA*zqeN>Y756)zxL_V#aFIe2`b&=>;Hz<KJr_Ab<x$V;@IU`S63Z5aztd6
zzhaBPQvXKf^vK9aP<L_042d&m&lZ-IEz7#P>ZoA!OySuRf;!4B+x$K~ReSlryEkrF
zoVW6<U#h|7JzekWZkGf1m-EM`9Q}Cch)KZ%hhN{{``_PJdojbLa^4+NGqbb{3mkv^
z`t|AC?fmCi#^&bgii!&lRQUS%oH%tVXnSyQ@KyWc{qpBuUS7U3bhX&#^z&i2|4G$5
z{yZ~r%9JAwjLZ>Rvqa0v${fnp)&KeEK4;DxE}Q@BgMxz%&Gr8Xw>xntdQGo!KL;A&
zn_-v?>UM$p_zDUQ`TKsh`OUR@`10k+TU)b>|6UT-_nXF{@kfZUSIRV~pShr{OwGVR
z;6$8*NqciM^TUS^Z7M&twDZdsJvhMl;Jnh=YuBbNS)#Hn=VnmC9#QSECl?pHSKW5&
zlR0@if4}eTZMj!#V|SOOzPz;b)!v+i3l}nM$-ds_JKK!$rgGi$b8~~w2a2@rE`J}i
z+;1+^V?ig5K&z+GbI#5%Y+mTxUi9}@>9;pG4}ZU3e}0;7v_<(lncLg*)fE*Rd8N%n
z($_4V2Pp_ZjjW7}6;VGxrRDnl|D?pl#TPAJeDyIXVj37hen>mez-W?vjfX*6TH38&
zu6Ofh<JjG0q08&%gT{P6d?)~wspk3jQch3PT@$(4tybf~mzS5%%(0yO<x9zyC|%!K
zCW+6^%w*`1Ht$=qL`BLp>xlaN8mG_C&R)%)Ygx?Z@9*#9^Q0_5u|;EbiY6~}&<s_k
zy1!ooCf=FwM@vue!@u9}FK<rww<>)#<$2+|M{)lir&j*|`~BtB)#9?&Wi^`DCm+A_
zS$*e4e{ewu8jCu*ZHk+V3yX6*-_y0*?>)M)G5P%6g(v3g2!6;}=0E@3<Kz9_Rz1^C
z-Msm8sc4!Lhhm)X&vSE^EL#?|ucorJ`q`P8KYsmU0!`}e_15kXSNCQSiQAfW^~KfI
z)9-ByFTBD78qidgZ4z+m5SsS>MCAE-wvqem_U_8f%JK>c5i!rdcjQLI|Jp!s59ZO)
zC((2M{`q`fMNRF}+Gz8-zhA?}^yALVv#tK|zW)F0*O?aJF`SQ|o>;HV%*i>^!YM3a
zmLu``+1ccupPrtXZ$JOn*Vp3dHDJ?Hz4jfw*(t34<Im^whQ`LpzrMVjF?;sskH_Vc
z^YhQ2nQ6Rn<3`PM7T{jgsg|1V&E@xNr^oIt1Euua`TNgaT<p%t!}H|ALT4TsiwU;X
z-$c^qKn!-;cXVcFCud+_po+SBa$4H5clr1C*}lEKU4PmGP$y3D)U2BB&AzkEKEB<4
zKPfG3+Uu3z@j8ycl25wlWMpN3KI+z=SG8%8hW?j_H^62pZLgVr>BF~gX)iA=y|b%S
z`})5vTTDRZid&Dw!Cj@VyKEdlUhmkXa)09H`St&1nr2^n@c6NFN{Wh^nc0$M%O=g7
z={a35_Q!)}{ww?w#lVK@yYD+1`Sa6LN&7k*(9EB$?c5zZEY6%go65-xHce?te&M(9
zWy_Y`xOr2wR0pgnHE!Q`$jo=gBX!XHHmC=vxxpfOSBWOSoK3|u?)aTWtaJUA*F{D~
zK79UsdEDm2!)+#+ms<Mme!aN47;Lr1Qm#GE-_-yAE&uM_yB)iB85KS8*j4s6YQ@&G
zbLPz9;O1slFg6yhuCD&|<>ldn&Fq|fe9zw8+zgsVss8?M&cX6|u(betf2;;|b6Yf2
zxnwM7+t=G&Ul*HvWkukR-@hNfcyZ#(%ga0K{#Grtwk&wS0BU_5JlOd3^mOCWS0SgS
zYO_0BT@^atz6$Kwpk)F6*;yu1tHRg&eSLKmG=^}nnf=S_>;32ET0egMI{Lv+XkVw`
z>#I;cdAl>m<?DTZetH_Yr^4{vyLXl6B6k+4YKO05d2nv7bxTW&&EeBpdnZkrq#|Ag
zZb(1zFIqewH15(}H)*=kB9Q~<&-)jamWJ*sdATU(-kwNLZ|`6c*FYm<<Ed3&Ux}9N
z`t;<a@KgE!KiZ4Fzl(JZ6j|yyS?yE{r|^_DYjmC-=@j;wYgIaB&6+jK;}%zbe#ZJO
zI84T-V#5+=0}t>B^3Tb4UZ__>^D?N>!X;z*`*FYhja{YMe0+R6ze!6<ZY+L&j;VY9
z-QDGnU%m3$nst?7MdW6++TY(?7hinv@L}V`iGqcNg&`}0o?4#v1_#rn#Rsa-foAZu
z!q=_IQ&&?fdUS-7LCQ2MB!Qigk@3|OwF9&B_nkc0%-;A$^W2s#CONse^DPS?dRH8H
zlyhfCVMJ8as@v11P3w_1Ul$<m@8_3xe_!pFx3|OZnioCs$h)(nar0*5W;R~XnuFG=
zU%{XuwnaJ`1yR9)Q(}%zo;XoZQc|*Y*N)a!){h@Q?))}$=FE?Ozu&h{o&ias&W+0H
zUte8)yn6jUC0pCMU%r&=`}4{B)z#JEG8@5*Hl|pzilu9Zt$DEhe%;}F_vXzq%?5d+
zw38d;PL7X7q84i-w`PShF*A27i-E*CjMg7`q+@D2_3G;I;*XD9gMx#v{>{F&M)T4E
zNS0p2wA{3vSNho9-Q_nor}MwPz5V&Qxz-_TA~sqifme)Z6m#jEQ&LizVV*B1CME_N
zI-6hj%QN;lcqYMVPYkPA`m$xq7;fCSQSk5(tB{b;j`H_$6WP^3<_C(2R-C^%!!Wtx
z!vn_8&(ALpUmvHUs%n^jZ_lF+UvT)Gsmv;zHg#(2)TyGN!L-=jWp{QIF5dA-2oxq7
zljm8j%}hyYS-(EN@Td~l$Y-Jz=OdT<&5hVoVYog2{<D7je*ybJGKxn|hd;1QcXV`Q
zsQLT#`YXF$H|1bR+2*?7z#|<aqfHi%z`0I9WRXSm{<^*Q{vK}UuYY+E<PfJRrL1D<
z%l+mC{hv2$)~QBj_JY#VsgJ<ET*M&~F-JFcm&x^ShuirjO)>%|vZ{cr44fwVW6QJq
zkr5FmYBN9%7I0m1VB+R^w$+O+W;DEU%{+Ygu%tnP10pyg=J?Jsd3nCs*QcPMU_sj1
zS*}u3KyGr03{5Bu+g+CX=FJ<8_<c4Enwpv}uC9U0{pK#}gNBsmhk2X-{r#Ps^5uEr
zv17-+yuZKSqZtxTtGRT}85tQF7#jy~KYQkkhqt%(r3D}tDxO-!)V*0Nbk&C1-({uN
z78VlG(YKlOYo4AGEwP&WD>XH>b^iSMxBeX!k6&;xqocdK`rHL@-QhK#se7}icG!l5
zgG^c1*ZID^we{zl&F6o7yPeOyXT#*l!inI)>cVGdB(*|TILx=JW!O>s+pPNgyQO7s
zZ|VQzIt3aLZI`cGk*6QGXU5IV>Cdg)mS27vS^w_G$H$;1=3GDb#csV<ivRrmYiMNj
z=+4gKR~yfrJLlr+YHP}8Z*MPWU-!ppz3co569lThyjZyXZLj&g2M-UoGeks1?fbG1
z<Vxq@gu<}>b+w?W!HNothYufuhIm1P;WMoeMJHoJ{omK|KYsmM<y|r*6x3gHTKsb7
z7mmEo-!B%w6x`P8HhJpOCsylXb{6H_-DP?|@R;{>J<tp@sBh!v7xukh!mw$ocDRt3
z*s)e_@gm*Epv`H%c7MNIPVd^jqHFW=n_IKR&CJY>-dZl;#1S}&OXnP@k#V@4f8&-d
zM=o8O#Lh2gkbJBs{gsM=fq=Zcyh3%<m3#N*E%Tji7k>or@Y&?m4#tLtpiv5)$W1Qq
zw=yv?ftm)fyGj%-EF$jKzrSbu=FOXiH|&;D=6P$jeSLAUxm#Sn=;^7cHrH?OF4xzM
z-ge~b)v5pf{+>K#%8~H+T2~(*o_qK1Ra93$w_NTw*UD$M8Sh-b_Sv(g_y7NA9lN`1
zY2DvnYa%u>F}McKu&FdUckbMd9XkX>M3&5(KVyam%4pom>sQ04)cyPUe8=wHi?3#Z
zD(~Q8-4+3-JziWfUdi9z-PMWM;Ba?c$V#EzWp9)I{rTDYN<mk*_wDWNn+qN~$=TPP
zX=3HRvAaCK@%h@Aos*_&hcgvCKPT(t<ivIL{_01GURJ)oK?Q|D|E^7HoT%0&TP>H6
zo5$slmZlc9c3R+Kw}n@;cGms10(Bn!{`~#>^Uvq=7c)$BqPO)(nPxG3cs4sfXn$R8
ziIwcTckfDGU*k14Hr9*Z=aZA8V_W?#<<XH&pV?-;PoAXo$=jd1xj7xw#gEvMA^7as
zGZq$>5BI9y8zvp$&<bC7=FiX18@Fvc_xZ1fudk}9si?iZ{l0&{vOzNzmzH{adV427
zIWf^B=f(u*cD}+74;(?|wvdpJgn6EfnwlDDWyj;={g2<j_urazb%s$Y*XL(v7uWv&
zwq)6|qAxE3*Q{N8^2`|(6_u9Y<$i@PE-2R2)&`2S_S^rHC@%i2^sjI4tVxq5dEMPV
zce*Q5UA!nTp>W#FnI|W!`)h=+lbNvh+|9}A{#<ttzPr1-u(Wh()Yhyy_Vx2lobV{U
z^5)!J>m9px6@7Z*xhj19yw`gSlaCpsosnR;bLS3dTDD~V`lfl_{RMVb|Ch(bzBzmI
zW1O@?^_A7t-{18}8n@Y2f4i`wFnQ9XNgqCa0!`0dSryvd$jm-z=FF4R<Lf%ZRxd4j
zda9zb(lGfL&$oAXy%Q1?K<P<QQPIWK71Z?HQ~5dL(h^QSKE8;EhzBoUo^0h7@967G
zyR{|Lp{(%krd01ed-i||zO~WYul;uL`1$E6XjJUmw{LfL6f%R>;Z!{4Wl-~*)8V&V
z*}`JR$H&JnZ%lS)`1kw%|D>#}F2Ci6FJGQ~u$i6Npzcq_xw+QUm-{CtC-2#_=f=)r
zb#c9z2l4-ZiBI@@SkcaI-<7@YacvVO2%Pk`Lz;!wlwuW2cW&cZn0&l%&);vi8P>$^
zp7!u?yZEl=h6aXGuggb|wpM?Cw=x$r=l130<<_cSo40iOdQ?99#&7qd;aIQq>hJFy
z%j)#w_pQ<6nlod@iGKTkCth4!-1w%N?Rc;B@fQ~tuMAqs_3`7!S3A|z)M|b_Y_Iut
zGyRpFYanP|pOKOA{GVAxToua7%3t4G-j<Y<^tb)~{=2;0+d3E5z!_$_Qt$WuUboBt
z-*dGVfkj(1KFr&!8NAFO`52F0?5-nAy{FHxELIC%=A(J#=w$d{T<t>#Z)xk-%l!|(
z%swy48@;#6bi&`r<+IK6pZ)py*~QgWP*l{lpkPB(#NRKM{WZM4ZF#vh`?`Ud*}3B4
zzxMxs`hR<W|NP(I-x)uA`SRq&#l<czE(|yCA6^~4o+)5slB=9;)t5W#cjn&S#=^=9
zDl`B7{{Hjp_4tn;J}^9Za&oc@Y|>C5_tut^->pF1*|W1uRa8|O1!QH<Hna23v(^JG
zNthrKF^93_^|juk-Qt@oJ|^kt==|9K|Mz~U*gJ;~fo5vgtX=zco4c#))RTe64S~6@
zlJc_FY(IPF%8c?`*RI|Mt<m|)_VaK%e{z2Q`g_T27uLmEA3A&(G--agjn~4`a^dBd
zprTXSJnzU2i}LwtX=y%lECOXOt_fPo_3z)mA3uI9xSExDbCc?H{dl%b%V+H_eZA~^
zW5XfP@Kn^=Y0G?PUs~wQUQ=6p^<_?ZdAXX;42Q*Ty(^-(uiN#e%Pw|TN#chG2e;(h
z6cQH~|Mm5C_#6L;T#79MuC9xp@T|3Q&B)MTWMs^_w`b<A($|-^=f~fxe!uro2mgtE
z&)<A`dHLwI_`(yFQJ+6-wFtUoeD&|MWyiLZZb?0Q;hBWw)48`M|EvWquJDZegwoB`
zkJ|pE`uxqStHU`tI205V7^0)2+4y8S7HP=XRD9TJEnIJ8bZJ9UMseAk?uWY%PduWw
zBq2TboOSN&OTp33`H7%LTE`{*`;XQn9&X!`dwbek>+)6I=bmjpH?6|X%}uK6^_!P#
zH|xI)05v5N6+Z2Kp7Z+J+8Z}-K79N3?2C(wL3M2F3`S9K(??(7{-ZZ%&F`xi7)$`o
z!MuIjJAc0X_jh-b@9ZdSjdBSCH?wSZGtIxYWy6Mq4-XE$xv{a?y8PXRHIbWRUMGQ@
zxE<4%2Y6>bSrM{QDEU~=!vl@XuA=?m2GY!LKG1%fMigX0hZDg?%%DylS!-U1nJx6`
zdUYs#T}<MG1C6h~^-7!1urAlTc=6(!8ylI8jg4PjUw{7A)@)F3e3bzx_!Q$bUCoM~
zJ$d315g`FuG+|lX#wDtCVY$D&Rr$L!t=!_EA)57jze#b6>*+w|O%>xTj~cD5d~ksA
z)%ErL&CSfQyGjloQK>#~?(*fy*5&U++&PZAExu?_@*<$rT3NYSD|FR`#qRyf=b2Z3
z%K<GYPOtaz;raXPE3<-%N=xSDWqa>5FhZJEF)tp=+t>Mge|I<Y#s)>u2+gZ2EA?&F
z6k7xqt<jqHNC(_hnQ!y^`kKhYuh;L7lTqBW0mn3Ak!*Tm;=xvKaSu;V#)SL(YH!@W
zZC}3MH0w&j>1n$9BGr!bwpuXHFv%2}t{<;vVlrj1d;g~D@9&hl{5TX_G@`rS%-bvj
zTH$topY8cCPnXZH>+0;}bZ+ChSbgu*RBccPm7)CI9YqzDB`ch3SIz#|?^!j;ar!B)
z5Vi`RFH7xj6g<}9`@dp=-1M0<JtwRAe)#%zY3=WCI+2@N#B`$y-rccua&q$U@i}t+
z`t%z&B2G=!KHe^0=kfQ~*UqjkB{j9St=ZQnO_*?Cfn)QFi;LOI-``8UwIvf&{BPJ`
zU}R*(!O5wps@l4K-><F}D>RIZj3f+`SaNS|dH8&O{j=@&>y%ARr_PumacfKF;Ts#1
z87)d*iTwNbZ^`oI!HeB^m-);*v@UjcM_=Eu!-tu(v$Ln^L@wGlZL+%mrtIr_Y3Jwh
zIz&WBI669lrs{g#7T5W&3R?^6e;AmX>o0rn7U${dsrpaO&}Gu}Kc0Ee1ylZ?f?TF3
zZOgrFwzT;9xt-tE#_kUL_~lE;q{)-HOM9Q4oh|N~(faNF{r4KZY45^hmc2Ri`D($j
zywvCyvR@;(xw)Udxw-jNbpGC@OO`Cr@SSZI$|q;DqU8O(*gz52nFfhXM%CYPTmwa(
z9%y9ty1dL+6uSC0YHe8MpC22G{{O38ns#=US4qhhAxX)VCYhI90!3UcD?TiE^z!9O
zjjlyLv(0>OZb)oiq|x>1!9nJu7cT~SdU*w1TNk_f%G&7g${!yVPMS1n(Uo<v(K}0D
zPXim6dwZK#P|&1B8eN+*E-Iy-n`7CvNTaK{xw+{7zrQx?t!-^%SB9>hR(0;i#^kGZ
z#Wocm99BkeUsv?^*VpNj-#jn*@Zg}(+xr1n%1Du)Cn6mj9M)-`onQY?GCn?@Pu9w%
zl}mJ9(XxjTwJMX2XU?p;9dya~=eq)xy`}Yju~}QA7A;!D!NKvM*Zkgz(mxZG-G6*M
zF3<Sl!b0YcA3v(7sWE|8{hm2{*3jJi`0d-XC#(B|mLh-KUH(39TmJoZ_m)qaHS5&c
z=<N(IZfsP(cI_GiXo42B!u-QS=R=1NXWrdqI>C0)%}uGNm+#9H78c%B`ubR(to4iQ
z>*ekLei7ccZ(mlmn;V;*ot@7di-}cVU)g?*0+q5Fa}ZN)_v?PI-Iaf+g%dQWd~U8a
z^G$x%H4z&ZX&kOd1T8YUb8mIHzMAhW7Y`4Pj~_p_zH(oD(WdnGdJ)%$$K~sfoII&{
z{hzS9U%<K;%hKC7Zp`TE;Sq6NSp57P3k!>g)-)N=P{z$osUhoPB+bpuwZhh{*w!m)
z%;xUyZujFsbNa4jpmO<B_!54Wh=>RQdHMByj~HBBTnc`F%PpPB>#To<;mw-)b^25H
zeVeH4Zj^mZCnhGQ=xzS(ZMxFtc`Tde@7uC<>(NuExEdZjNC*lJ?(FYZS5aXpFaKWi
z`^~Mb+TPRkTmv`BdvNgb)@Jq0n>SC!wrWa82ZzG$f_+tAwHRRI<>#~S@3RH9HbJ`$
zE-Y|-^-t0`ZNh{J4fE&U5B?3Rn~rRr=+B|ig4Sp_yLo@w*;zhwEDX>8ySX{N_}LlB
zEm69`%l%UC?kdfi9k%-D?c37+{{Cs_=6Lo>nSScce0FB$lI6=6U(L$Aw1m?*{anqf
z>=jsB5AFwcmA-D<zP)_j=bql)mlqea?=F8IHl2BA@pHcUb-y$pK76=i*RDmEU;cQv
z`~AFI`#}N8<-Yg{&t3@wb8~aQ`F62Mf74Snd;b38X<vVI;-r7i_okyX3G7*a8nw2x
zJb3yvRHt^Ymx}%`tm&SaacSekiEX0Jvo}AE;khr{r(&kaYMottFEe`jyjxO7!$8w%
z0*k&(ezaynPmjr$Yd4=)X@Li>9;HsMJ_oM1aP1I6sa&VLdgOyz5v}N6`7Hh0o13rF
z!~TUrYoSfo%QRnDoj-j3eE;!&dGTLAj`z!7-dkNRWtQVHS<QEbRcY3~<9WBY_3kWw
zzOToCTUAG=Cw6z)%G<ZM=civ><f>m2tFNzL`gexkT&s@{4mOML1Es-4jkuGKaI6U}
zTE9FSRyUpe_vUo%?{A6u|NnR=f>v<oMsM2_@P@VW+9RHXOG`Xs_SMY1a3P>i&i2&1
zySu;2J%9cB^pB5^<K1Soe)|4sP5Amae_grx$2TM%{_$*fe%ve5RspAuNncKhrdzK5
z>i_?J<vDkCf$39@{d~c)EZ6q@jrZrDXkNHi@tC*n*GqK<CMKp2A3p}({`ccC|Gk>e
zzFVSn=gpfp$GY4v*Sh+f4ydPo@#4hL)nSYRqN1&vHy6*FF=_H-Ny{P?IXOAd0;>lP
z5*Ysc{rmFvcKy4%N<j-S`t5#s*rva~w-?mx3=a<nO^XEu3B}ib6*V+81oiQMYAgi}
ztN;7^d-ZKg<#JK2kbr(uW8=eDuTHftfA^&G<EyKyr)Y=sMMp<7C@Cw0n!kE_dS_-B
zGJAWUj=6r;F!2xzXg?FEZS(HkI}2-T<FqppUyE(`?%4zC$jZsd?fG^qTgI+t#+EH6
zc0Zp8H#Id~cv-^2%KGsA`|~ZF!VDK)mb{w2bEjoHpX@2~eoakH9x0O!w_d4>%l+lI
z-e`N%^gh!5Oe}|Di$Tizn8Iy;v8}!FS@*Y@jaSLQV8WFvA?M~=yLXG}UfCA8ISsUs
z<lJ2A<4vsG5_UB^Zk4^eV+opH0IjOKv$MFf^JMMa{lC6k_J4U{A+x&wyeAJ1Hv7!8
znfc^NibC}l4GoQs&(F^bi-;U~dwY9lcX#r?KR@5x-#`C;{eM|=bMt?n=l?e-eB_dK
zWyQgR&Fq#H9}?EtS(Uyz(kZNd<&cVohQ@XFiOe=NKPJdpmtEMHeB8!R&bCS<JUqPg
z_uudL)9>skoMT@vXKlT^CjNH3?+k;+;N^ZR7f-Y<e;0Dw$jAt^xVEmY?#xVMc4K2>
z(26b4LLzSN<Oc^DcdncI`QZ1@&(0n`d$u=hZPe4*$Df{_9=y<r71Y_AV_9rb{H&*H
zuGds8&~jZNAtA1>QlRYW{QZ<@I?4?5iJ0G8qI3fT13`lt$E5S0WLXzI;W*sJ%e=t3
zo$tu;<C8%Py58U03#!YmT@%aC&-a^WGc(_(`un@oi;G-elvwTAv&X~N_vy8@(F`ST
zZU|;)XP=s`@BjE%@6O84YRCKK)opC%Tw5Ex`0~q$?Rm21`S)sGy*4v6To}DQPsX~e
zr>BPplm)83zgxR4JtgJA<Hv`ObPBHwUmrK`b8>R>%AloPyK29@2rLybRA*<ktoWdy
z8@1)Y(W9-PCA8h*`m1tle}7|LQ2F_pi>s^Xt18fB$)_hLbMEbt3=Y0r^ZCc?_4`+S
z=gZm}wIwQd-D}YLiXT6IIDORwZFJlf{_gSYB;lZWJNJgKj{|MznlNF4(|@7s87BUB
z|7p7hf@=1{qM}n<{~zfTo}wM@ch}6!Oifvt8Qd-ZZ0vuWSIVU0=H~Rv%Y3Ep+_|$N
zV&kG)W@%?6KvP~1n)$!X%g)Q2mtNv|KikOAP*Gdk`>vm_Z|Hv+`#PKD{_~fGt&KV}
z&o(-j`O5W~pQX4b!Ka+3S`|%?m*bu^Yt}67^G+O!M^?X?Ds8!WvswK!>^0{5t(yqd
zn4#|va-r9kzvpd5>aH*OzUSN9+r?jBg@&z-nwsrZe5^-OMMdSo^XJp^=YyuLe=}#?
z-DTR?*vOE@#~>pk!@<x0{N!YHP%>v`=VPcB%iOkYn~JKc=&OSr%lA&3sO%1!cfY(X
zH@Y;wt}^HPI@#G~xl9iZwQ}!##~UI7o}k`a^%Xoptr*9;KBmxYm2d1B-`=m=X3m!S
z?00)t<+tl=A}4nWt3SJGU;QoT+#Jj1u+^!ruB==Wz1=VOa@N+0>gvZYU!FWO(|Bdz
zVz+&UXVIsY{cCwcSA`tBe_vi+UVclIu5R?UC!e04cKF5-u|4nZCmR8Vn~Q^Vqqn7e
ze|Ptl@9*+=cOG8$w^y~XkqHkEFR_xnU;90FUh(a%+3w(}aL_`rn!jJKgN8n3W!K7u
zuMX2?<B@3CyxEwUjb}mR<}}cJhe6esj5%}XzPz>db#V7pnS1x{wY0ad=4W}bqw=#_
z^tPPDv$ITJIWsQzpMTE0|M9V2Wlhaaw_d4_C)MX)*i%{jH7fnwoP(Dy3nwT4eGlsV
zJ_=nNSX37+4Qc|E#;%^T+>}4QTUqt*`Rem$e0wurXPoX@c-GM9R6W<HP;nulrs>nK
z7tj6u4>38u{OqMmla?%5BHpv^$KL<{et&s&l{-24@$+-unctM&Pru}c7(EpK`!8y{
zO@DaalQSo<FSYTiP5icKng9HMXR=pFTBWC_@AY2}8qiq3D*Rm|!wJ0$;K70Ff2^se
z7lBsEuMXGWo4fwe3dsX6E-v=AVo_`nc(k<YsT-sb;bjcoyTPHzb3*&0Ljz(r$sy1-
zE?qV4P2kNS$dirYfA!+`{c+mPy;tb_oTEpNPA%bJd+a)&L31sZ)=pkto_@(bpKOo|
zk;er-zPY(MeR_$&qnm$9TWdb5gZ8PQ^rIHt?|$-hYK4LW2e{`u0kQ`Lya}3dckOSp
zd-v{vn(k`8vpzhX9?x`B{zhJY{`$I%AM0Xwds$q16u#U~HYO$pRLcb$*8lm)KG(YZ
z*u{$z&&{>|{NP}-P~WDC*queLwf-+ktRQQDwI{!yK2g~{XlqvJoVjza*8bdUFXEc`
z=tw7M75Un*)9?5Hj|)l^x2&z4X;W#$D{bbo`s$_S{_=8mH5?8hAtKM7J$v;#?ahsi
zs-k5RFHfJo{O|6_9R-TKQYHsZp6s-({x-$s#8IdCI5s|+70;IV&9!<_VznS=Z~Uj<
zcmD*{AJaILaP-I#jr4PK0!3T{g@lDg-QC?mYg$jOiQGKpy#4<@EsM1y`S|!wfnw(E
zY@5nWn+hH}xdw`)K0MTV3bdwJXrW|FPY(}hgxM=3BxGjIei^G056j|bJ(rev3YW&$
zz3&Ij5x%^(cJ`#HQ<utEm-&4B@IhnZjPB-rd*0mK?C$CBA1>k=X!q-dvZ?;Wi4%AB
zX`lV_RCRy7IcQhwmZ-IpX3bg!+P}9leEqyh6DJ1Fc_=1bTlsS}X!+c!IhMsDuA0&B
zr~j!l-+yMlp;1xV0nj#&(o)t1Nk_X(vaWQzc=1B0?^FRRJ3DBdC<AD@!M5DnUKWcc
z6~hNpoITrvlt8Oro}QXo{Py(wzk)I{XFfbUd@;kMqN?iChlkEd$;sWe&mJw%*N2X~
zob>kh_kVY1XRy%1sT#GFC;bZw44T<^7sc!>y0SKUIvcN4!KWuDmHIZM*y!qaE!t4|
zIjyFqrpt2Gr0MtHKb!v3*sW`GxJyWgh)3$>vkVNJ?x1r47A{-}nhtA7Jw0vV)htQN
zqLi7l_c33{Fe!O?srA{}*_(@=dNKU_zW;w>dOEwq*H>46-v9qEe3Ig{6$cI+h_N?0
z{p&aDr}}yqclY4cVY**;%recMcJn{W+Q`jqpv7-@`+9noL~c&ol7D~R;dcJ%2`Q@1
z_xDRz?oY9<`C)K%Rp{Z17X$Bxi{00+|9kh3QT;KG!f#w2US5yZZohZv{CR$DZS7;d
z(#OBPzRt<befZqDK7RW@8<sfhl)3lGJp6vYKK=W<yP&f2&#%|(ciu9U;+5)M*0M-D
z5;C4M_ut>&%qM=k_HX+0@5Fh;$PVKsclNxyyFkOD|9_sZ2W=GW?CeyPN&Psn-rQeY
zKw!da`-aqye?NV{f42VXBKzbe7RCiHU%uS)^;$HKq*2Sh+TTn8YoknidV39xjFP^*
zxVW^;<8S7hw{IC&L~Yfo{q^PHlarG}R)_h@YW4N@F3q~SN>wtc^XA@a^L6Xig{%(a
zo$%ee@6SE?|LgZwb9b)Fe$4PsJot3hnIN7mliQY@>xoJe%v`DXNK;#h^`tYq=ZXby
zmMQqQ8F6wcy;9Or^}ej5s_h%7qx|7OMwvtK=1m8-_~;s!UaMBjn|1y0_ivT6zWm=^
zbnRC5&ofD9@10*`Tb=Li?Y*b|f84Eihuirt{h6h=E_QcY_4jv*s;aJ53nU~Z6Aw1A
zGW>dfJtaB0dDg61CwYpUc)#y_Si@LVT|IH;%uZSBGLO{Dx%+s^bL3Xn@=P|CRckr0
zE_Qc9dV0HaJD=y%{qtAvD}LU0ZK}!TgWuoZzj*PY;@`Q`r?c;j={s=10W{*yFlo}H
z4V9mto#ZLL^yjzq+miYE%x$yf8yg!v{QTMkn9k3$ZSL&sEXhi;s`&9iQAk*L;@r8s
zpcdMn&(F`lc>S80;mY;vi??kPyLRo`WYK3W((j6eB_ukoTnQ;DFJHWHA>-DqTMJ4{
z=dLkOcde_l3tJPxsI9HNGRNxd^oM<ClXvw?PWP6~@Uf};6Tu+B)#_wnWyR(0?mlty
zWMwC3=hhfKajsS;78aI{etCPpvK7Ls)~)l?nCi7KM5}f0-n{`UL!3AiqoSf(mM>o(
zv9D(5hfkk`)~;PUS?1-D*Kgj4e13jjJU2Ht=w`_r{euS&COtgVnsls3^3bJAK{5Mk
zDsTK;crnAJq-4v6ZQI0(cE(JZHqEc-%j5JFg&Q|+6buUsyJV-PrRBw;=;G$qwqg5r
z@xH#kLpN_mrlhC)L)1yWEM6S6vSsVmtsAy(75)DH{`?~+j$aRWtkfcqn3(wJ(b4Xx
z+kVS`8r|7gsq@{i#)&~tP;esO!%SZ#B_$=<Lr;1-IyyYs4J?(Fl$0(h`kd?N=;$cn
zm~0G}J~nsWJUJ(4=cU*8?%iu(Zr;9s|9|<fn@e5>ZP~J=r)oCi@qT&ty1IYzM-8Uz
zE`P6NXD7!XATG{sYiqlw^0V72`|@`(U%q?^2n$m?x*|a1!ILK}txk<=*61+&^)D|d
zFL%$+f4?GGY0-Z1$;L`j<(t#byOoxj?%K7BfuUc{_EP8WRmYDXPrkg&ckw%QLqoxf
z7cXAQZgpzBawX(tX;qb#jEu~dTOwj&PHV%Im6e&<`D7RvB&4Jcoj%Q-nVGq`ue0M{
zoX@$A45uHzekrM|vmbb{T%zXZCss#C#{jLV5xdKDSFKvbz_51h+7I8pwKX?0gVyKY
z&p%!9_gCqTUAqpQJPBHTcIocP_Dk2Vw}1FhaN=o^h6vXj+iEce0Xez8o*telQ>Pw0
zdD1iLq*O&qU~H^xO-;>)tgBi(cJ6%m_4W1UU+Y$`1g-u#bLPyFO$!z%WSXv7w~kLu
zO)cu{{_Onx_LVDFhTWbrWy(bM$sNl~)Y4r{6q}lxC(fGHHQ&B|(W+Hk+1J)ge3g`(
z{P4}2o>i-~=GasUS(m?KVVGl4$n^U9`s3Z=`WsSDi|wiY&d2cK)m7~!OP4NOvV<jT
ztJJSwzm(NXvbIY3`ueU|x9-^I=jR0l1s8_CfAolHYZUL+D9~oIBS(%fFnoJ=xBK(+
z^NaWFkx4%{=VFwsbU;uLXn{f2)RQSjpcS=OuU_@=^}Slf_K<;}pZ~<uqMREW7`3K)
zX-z-<R6|$yZ~S3Tm*5*cC;1vWma#7l>fD}x|IxFvvp;<OdUaLxrzf0w_xJHKeE9Ok
zB|BUD*6rJYF)=-7&iF7i%%4Ah%b}2ffQC<>iq^#Km14MX^{VNk=67q?t__Tj@BjPz
z`{DEF`S0Dk=bSfv+O)Q=F0Lm}pDx_IS=fKR-O+DvZ!i7z?%g}bT)Wy|EXBp29iK9v
z%*e~r<Kp51tu0&o>(C*m88c@tT(^!dE-voIjzZ;UXJ#_j*49c$NiABun7R1*IoWTv
zf)}qGebUpR=97@1kd%}Z5E9aI?b@~fyX6&G4jnpF@aoFS|6BI1T&bz0tvzw}>~8Pr
zdJnIyjowl8wCkGL)vH%Ky1ShV3k|<~{krgChC%*4n-y1I3I3jEnk}}k_P5!JJNNI)
zyZ6a7c6W297~S0PHa90HAu+LWx_-PD|Alw&-W@wL)A;}8YUM>M+n1YmW*l;Hb5k-i
z6XR-iT7Bp2SzjyN$dC{gV`JkJDMmYX?|yu&SDK-LonP+3>+9<o81(h^Q&LhE?A$5o
z@IU{Wl(cmF-~GC}x)(A`Zfwhy4$%_T($d=U>h`T$0zyJfPoAV?iu$NcPQ1A()x+PP
z{la{Ku&^+`SGUid^Lu^j4CAq5$1G}pne@q8n?0IgVr~v9KE0>uFkbj?ZEb99EFmG$
zbEB_gncliNf)`shZrU_yozI`xm>3x)W#xq-T8TM1Jxi9TF#O%`%FW5y*xbx~ZEbY>
z(Qa{OhBt5DE(}`v;O$%9UN>fDX6Be31&&??|JJTt$tfr(cp<}t!64(PuC_LJZ*T97
z&FTFi%fePqoHVJa_V+i(r9lgqFK0h^@L)kn$(5|v@80ztZs&LAP>l63P_VG5xUuoW
zhYt<=_SsccRZW;Mfq|Qwo8j+r>%V{gwB@?H1ka!M<L@VRC8b}<?EG>r4h{!q+?kgA
z`r29#508emYu`$^xVh9T&%36gB(-$0d;g>3{ql?7J-@y_o`Z`kDap;`Y~fQ#30VN8
i4RDu~$OY+t1_pnZqV0LozZe)87(8A5T-G@yGywoWIUg1P

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png
new file mode 100644
index 0000000000000000000000000000000000000000..94a6310612828db2370d19a094795341478e90f8
GIT binary patch
literal 21380
zcmeAS@N?(olHy`uVBq!ia0y~yV9H})V0gyC#=yXkbA_{?fq{Xg*vT`5gM)*kh9jke
zfq{Xuz$3Dlfq`2Hgc&d0t^32kz+hS88c`CQpH@<ySd_|;n^;nilV6gPnWRvbT2!2w
zpQm7?XQXGWqmWTjQedU8k5HhOUzDz|EaJk-z@Wh3>EaktG3U+Q%9zlrzkYl?|8PND
zXP2eo1Yr)12Z3y+>L&z@56dJKED8wBPn#AvD|1%bjkU9v#r1Al5_B-T?&v8--xFKr
z9OYzx)1)~;v#aS;#kKeR_3yM9o75g$eEzrUW7pnma`X0vKf4@Ny?$x1mu8EA6NloF
z+Vg96F@snw0;0;Dv%##+2}gW66k7xixp+KM11U>XQYjR6;!tcc5S+AwQ?W(BX$FVq
zofd(qN~WfwbN$>C6BBp-){EZOlXrL5-K(X?`eaY9iQN3|RfJ-TfhkBw#>GXfrrFn?
z9BSo0ddj-wMT6gRWmD6wRZPGCPt%L-nmk!}+O%mVnU`Aj)&8C|cP{UNNvhs6%yOf)
z9t)qf+t*E4NNB^>t!rC4S_Jr+Ko&D>$+<acs&;tR?RI|oXScRyUzNDXYybC)aQ@z}
zV%p*BRIIIMCm-(<l#qzHo5-o?vK8bb#seQ79{%}i_4+MuSFF%*>yZe|nSXk^zH!nK
z4k^<tm(S16&a|yIb6YIfuxOD|T>altDYG1p)nRLYJZzWulCds&;<0A!+Kn4GK770V
z{<0M-R@~X~?afW+>3Xp*E-Ykb;FU5_P*Gv=_V#9ot9ZzIukLp)!@SyWk*}_-T>N*&
z%$XZ=ZW=N4$=mm>U$4(AWpd!+#fhP-!=_g4oiRhg#>S>=YVDto$AA3(&Ag%RZ`G}B
zx!p^bs+yUZ)%<wa{`B7A_}ZUOr(0NCCue4Qf}Hf_vOnXFs;^r9_J2)YzI=JhcFFSP
z$tF^r9UTiwUthENb|aaEg{9!*Bi9G@&X)oM18x5Ocx;k$W5V}))&9G~0s{q$pPgA)
z`ubW#RMew)@6OFMPOo^mbovaF%%F`s{`~#>^T*@<_dn`d1VTMH73HeV&NdI9ZJr;N
zduvOer>Ezt4I2V(-ng;i%9Rk-6;q~&ES(Z`xSc=z-@kvaPEFO;QdM=ew6Iul-u{1#
z=F%&Vj&@I7ym;}izu)hNrll?W^zB<%gO-LyfS2aiJH_W$ZrETjb@JrJ=jK{3x3RUo
zI!V=gN{|=Gv#YYNuUmR-TkdVMuP2oISG>HueD&Mg+gIPYb0_JmqM~Bft1By2`=a;P
z?cKC(TiE{p|8}1`c`|U@wr#Jv_4fs^va)9V|M&OR#^Z9q?)`GQg?Fy54iBAcReI&*
zWc95^{QUe|i=KKdof70b+bs0<w%lMZ&C-vLT({=m-xu_F-(B0<|9`)~y0S9(REEi_
zmKK)O#KeVj)$Q!!Zr->da=ZTCp2}6h%l%%RnQ5$LZXVv++PZSV0)?ehf;2TXxdIlu
z^#)yE7aQE!*|{q1>?|!!O;63GSJ>ri7R1EFT>1F;_|%ITA)TF_uMV|xXI)<xd+OG$
zC{a;S&81VU>i_N8v|&R){hyESD-F37-|>U;&Xyy&*VaU4eR^^->u?+ItCh>=1+lWS
zMm-9Ph**(zWksOQ&B#qDov&_gULNG7DK9U-wc=ya+Purle4`$HJ1$?pVwPz(C@eF-
zYKN@}xOeZK*UH=KmR@%~4*mN2dikrXtC!2wehK{gYW4b6hYmR{4SCslyHi*_WU*WC
zmGk!h*Gvi0va*UQeSK~14ZFm|#H?dIl1t<AUtCz2_3X?{FU_TX^K4dvlK8r~y{o1K
zojP~UPd9RtOYFXyop0Eq8mo7O{{Q#)>YmEaQ-YSN=lS{hWu2a;d#kLfYL}6@c{tb~
zWzWt?uDy5pa<I3_?Jb#$udR!XepNbm&YX~#m^ndSD_Ob4RxF=ix6A1xm*T2^o~c}l
zCWqVk(~owE>O^l_bHw0fE4R3YhQ@-*&(ESFKR-L`>Fv#|U~4ODZM{3G)O@a8?X0FI
zrZsEVu8iMr_x1Jl^fNOI|NMMDU$pD>%jNT*?S8-S@uQ>NM~)ralylR_?*E_9LErb*
z{51Oe`+NGgH#ct`XI$<-KkfRu*jrm)UtfRz)6>&GANSjP*&d&u=)C3o->@|i2aC_!
zy1%`(mGQ^7+xe0E>uk@?Hea58e%_y7uh)z2YLCvnv7zzYT<gmV9GPETUw{7B*Vo=P
z5=E;oFZVZ2IKZG=mY$yerONjCxw)6WW!~Dd@>})WTU$?_JjwXr%uM4gd3SeR`P|mh
z^5LxceFhL=Q1j!1^*(V$m#dSG_;M`Fytl_vH+oyj-(O#OWGpsB#2#+rT^YETZMpya
zYp3e||1HnS$zj;AZQHXmGmVeF%G<Pg^Ww0zQ87CT8rQ7R*|cfXj-sbr?)`Fm7ymeO
z=FE$$tHu58eyX_l$!G@Vn@C9-rEu8U*|oH`itesEKhJh+c6w^+#L1HnKRVj&y%$uD
zJwHER-OzC2RPAu1loJAL&t1Me`NoZie}6vv&#QiC$;8C;?uFc^PoE@AG6dG||7Z35
z++1aC?e2ZGzZXqma#((Os>QK$=lXc1%|yEG&CSEBzim%WPd}c&|8H2<-3^I{J9>Mc
zJ~-GcV_nwsd|vgtzIU!1fnPxtk;vwfmqFX|?w(rQZx^*DX8m<x0f7ToRtBFuckbDj
zmzV#%j{l!@V?!bX3k%DKlj`#ezP*Xmi{0ho;?h#KX6shdz182-E-mql*;&N8_~MED
z{eO@7+y4#8v)Q<Dql>$H@yko9Q-a>y+dF%H-LKAbbFHV}D1UWjWz62HsXsqI7uSA&
zeZ9Q8|GX!!*Y97pU_nDu6VtahHxF-2KJK+Qtyjvl;@{8boc#RHA06%f@#`1Ug!%J9
ziDqZbPb1T;D+}`O?wT=k>fiZ+OGS3>+<CO_)9248PoG{KxY%vR?Ae<W53@ab^k_@+
zER#&9v^2Hp`tjFJf$GvLD}%Q#`T6JP=Of3CP5b?Z!L3*7=$6dO9)5mqnnxEpx1Tt3
zhULJuYhwQX{%!p7=N33NZ`iP5#Rbb&fvfI9=87(cYooX8eGGl-%Ppq!;G(<y($Ljm
zC9kjbDmu3bNJ()WcyV#@$IqV^*Z%%i^5(`ym;Srq@wKjXb#}S8w;eros_S4g`{aoe
z1M_Uc!oq64-AuQzun5?m7u&`w?N(7?;oQ!ryYc<0soFPp7ORJah209Dd3w73@k^H`
z?bu<lXz}9AYilHZXPZ4eYkprvUw{3X*L;#jExOU$KK%K7J~=OM-PHQOU$1Y;xY)F-
z^!28clS0$<<Kv!L*8O}cuA`&l^{3|3Np%($mW0&Qqg|rfTlB*i=ggVI!N<2ObamL3
z_3`t+d?|^md@B0q@86eqcbiX}Hm&5{otd$_%clN*!w^^f)|82f>BZgM=B2N%r9L?^
zF=l^V>}q~a&W9f!9yapPYU#YNa`xZPP8^DIR=2n1Cg0eQILEGb*OkO5#Up=RtM0T2
zJW>n|2srTP=VwXNtSKtF(00!w#gaES3}0Pe@9*ZuCamta=3POHfT)V|QcxR7K+OrH
z(}_dTr$r?XDk5YKYBg~PE5e#sE4CcbH8&U6*Vn(bJ>P$K+1rZebITbj9=3{S+}xzf
z*38&^&><;FsUW%SBd4%hLVkY#?Ag-4zrVkJPE=I1wYiy@kB`sA)ipHi1lWe}rk_8R
z@3h{Ubv5brwY67P2DdjgG3_dSeeANoJ?{bg|3CfN_~l}9{_{$kO_(#sr>MwiciG#d
zJ39*B+}hgREv|1={LJUuySv?f%MahZD_dV*zh=!EE$tGJqdR^FE&aOVWr@|RD=V27
ztmUoP%_*0hoGfXW#9}qK@7!GL$y2Agy1B8<Hp@M9{rYq^Ua5rC)YkRu^`*`8)~wmk
z3Tjfmi=O(cFFG(#kgd7#Sg-Ws*RQ>o`^{~T=62k@bLYobtJh~`vo2e@G%-DWxomcJ
zws%xi)Xb+~kMYm;lGb9q@cOHYn%bnPQ;#+<GFucq;dsCQzn$-FGgW*0`5QJEJUch{
z^p}^H|9{)6q^#W8)5D^mskt&@le*UQiy24G`0Oh-6ulI;%>CAxFCMdJ&U=-!`OFy~
zy_g*V3v0Da3wN)q`}^z1j~^QfAG?8CeGiUV7wGT%p(J1TLy^JE%xsQjv77a?H#axm
zxN##PIoa9C$!TLze*Sx*>m1zN$~rnc4E+53-`?Ggt~6Ka^7Qb~*y?-aujj4PAiLx;
zjV}k!&3*RpVc^4BZQZRl_5W-lBO@7N_SellJKOy6lP4~2ZfpgQwJ&YWzW(UZBc_J^
z`|aavzlwTkmc6^<xwq=;hHcx9oj>2dV~2%a?5-nAy{C6{cOO1~e*LCHVxaKZkslh>
zAMNJGrl_bWA}X3_BE`wUu^@hboz2fDljqpi%PkdQYi?Bap2l!tdw%@2_3{0C_u6jF
zzW(gA{(hDPj?HX7v(0$@{rwr1`Oo)jX5-b;&Vj^v*izNJyu3Uai-H9%Kd-Hg_P!gx
z{PM<(i;IF@J2^XZ^78Io{t?m>kg6;WLhdbeWR(`*yS~aG{oI_S+x|8mU8csAA8O(J
z{=`ld+=)2i{CIumZT<a!f?i&~v%6e>x?b!q_2;1KOtD4ad+Y4rA|5##33YY#Ha^*|
zAg%`6>Te&82>V~yS6jVj&z=+K&MjN9qT|Dd0vlV~!s6n`m;LRpp1Zu*oqt~CGf5d)
zSxM_M9k<03Pfk`Z{`;$xhmF~}osae3_1nv*DB0M^h>MHI#Kv}Zc50e0n+<A>PoF-0
z+uT{Rx>m2&ZV-FSE?-mNKQD0VnaALMPoN+>pUi_77Z;mkU+Y;HyL(gealSo!_Hgj<
zsHmy2`TP5?i`m)K+RExZU2kdJ-l{h@H@ENEW3#L5t=Ic|dl_%+tv2`c^fbx8H^;8_
z*M_vSQc6ln5jzSLJ3Bj7*X_=|Z6+ivY+3O^L0m89z~#%6<!q~_NCt1uySpWM?wmO%
z&YxFTRc)Q78!aR(?3|FGz`!l8cjW$k`Rwf1)t_pX90zyNzVqtq>*wU=8X6lPPB54-
zYu2ewVf78$wq0AswmN#d-m_=VjvPJu@oIQ{qJhMVD=UR(&YY>DuAcoo^Xe+ms=f0b
zJV@A<f4}bbYdOoJmIn_KzEs)z&N5k;etzDYo12%Pnek6cLBZkm(%mz6EM30*^ZfsR
z+Na0Y{hVhj59)+DaTMNP5xRQXx7~5;mv7jx;Y{t+Af1Q}4M&c+2nh>&dU{^0x>I5m
zyRr8FuG;m<Ts%A}S67A3v8fcA>(}1g%nS<JD=UL<+`fHzkt_GQ*xhY=_x_DN&IoGr
zwg^13d;a2uhnrj5HRfq^=lUilD%RE2>BQ}s(bUAm;5X0a<ki*Tjnd9bH8nLQdfk$e
zlC~^7D`lFM@%#R(XVtl`uC8<J>*wX(-X>a|zt(1L^me!P*NwBU>EzD%GBbVN!+!gJ
z3Z|x0uUrYqy1wr0%HZYyu5F%Beh1XoEsS3sxmnH7&~VS6Pp4z{*U55ob8pMJd1yu8
zVh>;6rOC(pG?r#uSs|ExeH|+();>NyUi|8cW=&0vgh2vBL2_=fxw*Mj*_#ueo}S*C
zUGo0k+&6D>Zf(!+_gntBncr?f=H+EF_H}y>6|%9h3CYR%d3l}MTg$Nh!pi-1QXd&6
zxISAwO=nq7)RMhE{aa?foM^T>j+2uU)C;Wn|Mz=&iBGAXQT)X{u5Di6g4L%5)FN&X
zc+~jl?c2WxzEybjqxa!Mq8?3~J-d7Qbn&;hw=a*~UAE`*IctU`OO{+%8La;1?ORSB
z9u<3g`G!4vY?zptH*Vj)Jb1a^QMK*W|Nnm9xPANaix($?dP%T~W~-O?ovf>?j$T_E
z4NAM6ot-&%c1)aWUH<9W?EFu!R<AF5bwzW{nl%x-N;Da~r|W@Q5W&m+K!rR=1=EBn
zQ(Q_)Ox*kBQXe1d{qtD<|AG}ORzQlgBhKeCLe|($IJTL4+hfCO<MVnq&fQ|pl<2)`
zFX+BhHP33UU-XWxkXBcTm2C9(ywnd54l=Z~v}Al<d*s+LAt52A0}l>1|NMAdKF~{0
zTG~4xKwxXu)kUeNr`@=Hdvo>occ-|bL4_5+;o?BUZ?kLne64S{oKrTF+w!N3-|jP0
zOxq=YpWT<`e^2YH?Dj>Alz7;fd!@~fUB5p4+S=&g{dKix&gR#g$pTkVQVf}ynKy3U
zWK77*^V?hX^~9MoI${5=CxcRv;=84#?3ZuWy<)olsk{GOtiHyh=TEq6$~E@RQrf*R
zXgTvm<QnZ=#<z#oqLPxHDJd!p>F4LEDk?6#v`)3_^vRPGpXWNk(}J<B`RzA{=3Dj@
z>%5-2#_8OA8waPbqN&HqR%^{Xz7|rAeR#co|D>r?T@w=(pFMjv$F^EbT3VVRCnv|E
z_!-Z(ZQDe|#EO1?N|mdAW4QLuvgrPup%D=hiHV5^dL=vLul$UD=WgV@BXi<6Bil=9
z;41S@&gSds`OlpE>Sq2nT@yTcFXO#X@f-0Ww*p@N{QUgnsZ&f3etmtNb8nAidU|?I
zO-;n!DpQ8Qz`!r>@5?jP*Vivvw1@#D2I^W^&E*4$EM9!_^l4>P)z&9Zo>)hH2?fQ-
zqD5<7NB&v+>r_~#ZJwm0WJ^m+KuAc-#S9e%g@(Gnzb<aejc#gcs`&kO`^yUpnU&r9
zSQ4I`n8+h<CsSQrEoGA7P*P&T@b=bLWd((XZMnCT^776ta_zqG`s-BDZJ;)YT$1wc
zttqEw9gfl0u#CwHbUwf0o$hMIRsUCRIdb;w*<NOLz9|~{S679ujBmC3bVB*hbNm0A
zUt@xo`=!3Rvhur)8@OVO{<%H>*ssHqf9BLIjQErm=<HwK-e&Xq+8U|Flea?hq)A$u
z)ij>s=ewO>*8D5xlAOLiIre;0;N9p?Yod+MoGG~~H6Pk8+-_ie`QrTq=X&Rp{fet+
zuL;!xr5#+OZ3J4Bpdl_|2JJw`xGYu8OFuW~;PK<_+j4JTS{G~m?EHNG13i+)8#Zn{
zm|&1#AQ7>@Zg1I{-|zQ>S`2-CeL1(bcxvC@QK;P4*LUOgZDB#dz(_@IP?+4gen$K4
zk!R0pzFc%Kd2?gpn>RUk?%w_Q^(*UxtKsoazg~~m-&P+KBy{oOMH$<wDPO*n%&U5(
z={L`Y^MJSh-XqV>&fd6n>(R@Xh3o6<Z*9wsj$8;acDj~s;pT1Ij@`N?#o+DjJ!kG*
zPj7GS;{P{pL^L%utq5H$rW?KO$h~{>?(QyUPRP&qUmdoVfrE=n$;>Pa(%!ZyS@lGA
z`;{vpd@>de%l+mufNI~oyu6mSwxXApR2eQ^y0l^Q=F5E>w{BI{)m?k02;`lPcHgC6
zcif#adGg@}j?FX7^W{LDhdVn8K~1H(e#f_DUY=o+DHLDx@#y`1ekmy`6DLk&m}6T#
zt)-=9f307jp{1qer#-$MTc*F9Xl4w`MaM&KtqBs|-qggz#w&H?!a`?Bivk4(-&rOX
zU%B_o`R=d#%dlbN#)oHS8b{r|xjB8~#*K^`oBxC*CMLGDwlY3=_^@&N_V2d)<pX!M
zw6Ij(Q*%KyE|ax#!!n(hdTF}#$#ibsY`lK&w^?EHCQe-V_h0@0@AW%&>|l8C{=I)m
ziAiN;<&q^!Cd`@BGkv=Fym|9f)YT_Xn&ebhXLs@9#UH<ahlkw&H7EtXul0Iecza_q
zdwl(0({=0CN${{SJUcV<@TsZVo<2S<%P(JC<jQUL_e(Itt*zPa4h{^UUgDiQb6mT{
z7G8hdDE<D(&!0a7Lqc3KGBn=a+PWIv19+;I=eBraGdusJdGq>OTUqt=^i1;Z?6@Ob
z{`wm4(vZ$i`)Yqrt2}dMW$;X^(yW!o@4T~sM2F+d&fB0~`kkG{H#enn^YQVSWL@dF
zb0@}co{i_*TU#ZK(|8mW6=%#m`G_0rh0uj(j=X)l{eIe=9fdW&UM_!8vMZ^y{@+h^
z?XWcuUa#M;rlPXsQnLtH?RL*QVe4WdpE>qQo2NZG(m7RV0;sTP(TLsoc+UTy=j%_L
zJI9uAZA~PPjKze%zrTl9C4luU+EFCDJ#uqeVrJ&aZ*OmdY8*++B9+I-dRKSd0OexE
zBf6)2@8tQN{CJq(UO-IjnDKcV=eoMOrR?CQrDB)1dEw@b8xzmWFg$YP2&i@b>C-0z
zsoqdQNCm#bb!X@8RiUdteEyt#d71B(_3`t&#q~d(um87MD+xRpFR)#soax@Jt=W$s
zKR!IuIDJdMkd#?Y#LGr-yS(GkDc?KGd}klqlzKYj?yjw7^Jn`lPkw!E?NqrrU?2XN
zUnIOebamLr+xhzgH>aIl)!_~*hn=P@yi<NBCMM>@sZ*=EA|PJLopSrJS?VbftCAND
z-QC<<v#wq`b$xAg_(~+ZoSd90KA$x&EG?bN`V`zaSryp2PiwA0qEqM*C9pYpi|^=d
zd3SfWaq20Ny1!q;zrDY|{_m9MXJ#7z`0?Yyw%q7jTeFY<`T3b~N5w~_ef#!VSXexG
z{rdD$@97KyUWSQ>Sl+ySTUcDIUDmr|g@%lbOhG|`f|k~+y!YT5<j|6-w;z{1IKaqI
z_U_Kgsi7%W0>#C}cE4UITa~@>C@V92_wL<~U%x;DeN|sy9er|ga%V?}f|63x&f@2r
z5)ZRYnmpOl&kr=%*CTD-7rVQx@Z%%bv~zP#-rHM!@ny-5-Mb&Zf8RfK>ec&uN<k@;
z<8n~mBQbgT`f&Aa=*5(>nc1|K7M2Iceiz@nckc{n6zN8UTaQGe-}1%9&(Ec#rcRtV
z(b36?Y2UtmCK(qP7~<pO&GPPe6cl`jJ*o-{ti)BPWVHT1o1LGxTkRz%`G0$RTf1Z;
zqA;^4e>Z2^1=HMHQ;MITW1KK;THD;YbIW?xuGP)V%uJmIE<<f{PQ58Sedf#(=XO4j
z``^9uTkO_*;_TU{S5^l9`EuDGG$+u}(NUy70j0Rp4O@Qs<(8`w|JI#2;i06g%xF>l
zZOygS>RR!BetkCi!b)#btCpMvw@?kInrRysRzIB@E@5A118R~T?~|3WD&gSf=AJWm
z?$2MZ*H`}ec>J_&rK2NbGaGNxiwg^RWGn<4)O=@6m^iU<>sC`qNlBBuI}!}i($eSV
zST2^W{`Mv^Cnsma#*Lca6wc4H?cTn<yfeGCwN+3|jIE))-F@Znj#CpSOi(y~yTzk=
zCa9CGoSRnVv-E7<@4ne_>yKZ*E*>9$-`Z|tP}f9d_eXEuoXK9lciGh6zkdshih?7)
zt7}pH|9??G54Cc0a&aYHUl)62bvS=^cJ`k?e=1(D-M(eL|KmrGD*pfd{@y|k9NEF)
z?~ZepXCLpAt@-=)`i@<@k_;pmW*DdQO;-0;Q&4DF>OK9^p335|wNb1N_xII;MuNY6
z`*vkr>}>u0e}o#?c%=+-Z<*NG+A_|ttKIc&VG}459yz?^lF;_NySo-n(c1d%xO}}%
z;IhjvBerA&uKeBJ)+Qt+RptL!*oouggO$O{`7A9hLyOj07C-xNEjmB*w!h6smi>P|
zc`uF4e{y2to4dQkmx|=x+LCy4Q|gtK!OJhLQ-A0G)bBuaNJxliAt-CBrJkE(d32k$
zaVGPRACLP%&G&Xb*+(Cb%cnm))H=1wwy<8yys)^SK*7dl&XOfdASJ?&+j;vk>?%Ga
z%$YMs!m4D28P}Hd^K$bl9&v(tHPfey-@JKKL{wBzPOfkHa`n8tJW!E)tFr3Li-X4J
zZJHM^R!%<NcQnJqsPa?Fy7>Kl2NM!rTv*5u^?rI#m#X))2ag^#tzNBdXlVHKSjpv&
zkB@)+^y$*J;NalG(o)t7X=wpog@1p2tv1tcHqd>_H&xE2V#13T8E(B&trs&?OiZRM
zSg=4SxEkcW?~|F?`3k<g2%Ke_?RI@#?9GkI>~V2%ENpB-!otnVm#c@bi#d4cP}5TH
z=|St`Y(bNEadC37yUUiAzrV)-8u4ASL}iwFzTbSi+KA0*yn=#)Ix#yY9654C=z8qt
zG~VTYb00l8*c`K?fYIOIzv|12z%-w`yGk$jl@=8#+1t;bGDRe6TaKrX4^LxbW7OYn
z{e35XeSMvIdz-F&?Uz7Rd&fe1t?kFpUb-~t;$rvDr}g(6l)sBfJ2$6u>sC`mMa4N*
zrK_}(K(*4l8PlhWZ`-zQ#>|<C1`-~=zE6*I3P(goXUp%ox;9$8xVZSyqenkptzK^+
z!Bg_;O6Qp~K9eR--uih<)>W>(asCAb1v@97&%eKKuKN5Mp}leblhu4*+}x~Q{q2pW
z@v{9~92^&_UjCc412oIFWcl*PZ{DnVt1PD{T6H}4?ygcE34?}CVRfU-ODg|<-~a#V
z&F1r$w&&kp)e*>{*s>#eYu43Eb=O~i6%iAgwsU<@7b~~egc&nDl9H4(H8rF9Z^yT*
z{68m<>Y9Y!-*?_vwKvW`AVA=L-S4-t#}$3TTkjQi_x7sV*vw&O=lgIgd;P^Fp2BSW
zaz69zYF8<Nn%j@&6_k{yC@VX^W;}iF+_DWD1nlkY#q{HR932@M)cxl@d3AO5f!>9e
zrc4p(aZ@%lG`w{IGzP!rZ~oQ2)#Z;KKi1UJdi3Y#=bYQy*50VjewulAmuc1Bd6Apb
zGWX_sY35Gx);zka^mRc|(XQX@iaudG-|vvMDp_&ne6Rh_CzGq?Kuv+b$gFE?K#{7y
z|Buo2_3_6K9ct2z-e!<>MI$IU_~ng_%3hjMrddaBZOvx<@a0R&pC2FRSe0_g%geLz
zNHE-q*m(Tx+1@@`>rFL3i{45xv+*QcT@|_||GwN@zvJxkH4c}T`C8Wfsc3!7!^5M}
z_h!lz*jfM^n>kaah&*uHBqk(uXsUL2fY-%+wbf$!aW(w&dkjRaLa(J{|Bv_Z;Q0Lf
z{PMcLzZhb6m2`SeR{MBVJU+y;q5SaS!_9lEzlW7e@bd;os=3@z02kV;{GFVbKx3Qt
z>;M0il~>PAi(0k$>^qfw&GHprE-ZA8y2J(=Y;R;{e`jX}F8K<>Px;>Iaa+92PvgMR
zZt?uzP0IzGI6f};@a@~a$M?(>wSsrP-{CvUq*E_;*M~o!&o7>m^!3$MpSf0B5e*gd
zLTSS!mUr*o9XWoS*<4L5=k~T-cuQ#Y1*Ok^etr>=k(tg*_W%E9-EaS|VksM<ofN$x
zfzdeq+!D{pYA?Q4O$mBYV#OzG6(R~Af&4DJIB?^;1S{|D%O@26w7T|HN8;vM)qihi
zC#I=ocXXW!es`Sn^Yio1k8}!yx*yK%e1$(gJmgZ%UAS<eN!}d^adGjrImdb=LBqQS
z5(W}HYvT9Et*%BH{m6VOZJu}J;zhyg>S`q=rFUQcwFo$MRG-QS>G_)`pa1uMBH!}Z
zXHI^0?U(h8Q_m(ItJ}=K_~W+CIh-H2Pno^r=FOXd>*MxX+kNEL-*eztueA4+{l8wV
zzBNh7eEJzLFE1r4t67WP`!^LncKi3Szdj%e9LP=D_iA!)N51x?M)soLR_rc5b0
zz7br`o!Zs>GRMBOG5e?M(NpWiMLrgPV%=vwCC+zZXS?H0=cQ|-wjR2Ddv^M~%4IjC
zve)nZb}P64^~J^RE4!ahQ!_K0wqSum*40(6r>E&|+`c{i`@6dgph37*q1p^hO-&uW
zy;J*@TLhd!W-q-R?4^=x4jR1pJU@8Glc=MPo$YQrZS(r=|5@zYx9`dG=ac8oUHkXW
z+E#AyN6((QWn`={vyX|2k}4}JyJfd++qNZ3mrjk;0~t2G=k)deVS5GY-FMpNz2Ebh
zkKy{d*u`aUZ`Hp}dVH)m`PP=q7bRB9d}o*WPe0ey)1zW!B-F4u{rsc}6AJv#X`h-t
zd9pIBy5Ic!^bNDh7=4Ys$9WGdYEx2I_rG=K$c$6-Hx~(SkFWnbb)vHSq{)*Hzu*7=
z+`(q{pI<Kf|NOrHf9-Mc>`O~Jzx{r3aq-LR>*ZmC`>U<1zv-BnnK7hJ`wt%sG2Nfi
zyJMAd_NSR*nfjNNbIa;aT1_}M#kAeq_nuDHi%y9rUUS!^WEu-6Yt^4SC95^pqEKnJ
zS?;5a$K{w8)c*dKb9>v{b$Zj_J+WKj%m4lR=i=@@dHQsBFE6f>Cr?&XRz7_B(zB?@
zNK;eu$@AyJva-GV_y3QL1&vVc5dU-AeDQ8)XPvAU-BY8lT)2F9q2l>nrmL5oIC6Nw
zog=&&-{wg+#I4s)KL4MSk8hdxbiFwig-p@W(bM!|Pf6$RX<WTpJ1;Nq$noQ!KOUE7
zzOXLVnu&=i<MuY)+FxH1FD`O@@wMvA*|U*53P2;P7S`6n0s;=Z%ie0$6kE;hOFZ0`
zdAqf>)g<eR$Lp8cr+Xxge|$L1?>%Mt^5w=A9}*t8rN!N=e0OY5<>wt`Z=+hj?=O2R
zb>8MPkDs64mh9_&Cnu{<-}(O5*6hmfcgx?q&Jdk6V@5>Isp+M!uic%j8M|li-kZD2
z^%?wazlLnwTGA@8Xw%G@FC&c3%<XVox8v58#ffug?YMPCIcb8nciYa>HhGJJrdo$&
zrKca)-~Xq{Z~5W{3mSfYe*XB)o0xZ<;p<`|7v(%TF){U>ed((!LDwDL-rm06Ece9R
zsxKGaEsLITK#T3Ov(2YlUWpPsJIgfr>#M8fwKcA1W}EjX9&WpP@plVohT6`~?#c7#
zmsf}Do8{a%aM|BJHfcY<-H!&4vriY~Ya1F)+*|!U>+SRR@9WQ3`?T}RpWFZcZ+@hG
zI7i^pGiOS^nBTZ?zu<fQa*jgtQ*R5SH>GfPi|H!q>helUOA84Lzkl_2+pA04^W*D&
zJY;|1W+W*iv*v2-o{EXj=T-B8#*NG0#pEPCY-Z=bwA5Q%Mn>kxo6YALJbZn1%@%&}
z4PPIpYGNXyudi=a^1>k}M~C6`G~MI}2O7V;yv)wd&VH}rG4I<MJ9b#?DtYO2yid0B
z-_PeKPo8`@+22ml)>c+tUcRQLrYko2@v&Z;|9?K;`gT@s|E;ar*}t!?iJaUgYhCp1
z%}pW0c{Y`kqPAvT+>q$Z5EK;j;?h#?b?esc`TOlQs8xBRGGEHR&gS$qUFHXO%kP7R
zZN$aJ4;?xb@amY#WMOr`Cuhy?A31%x+bs9ihtKEjC(oaM{>#hDot>Qr6AT1|g%58^
zJq_w8G?;O2KA3Re(4nUJb-z}gd0z4NS7}pIQ$bl-+JOefnEn6u>u>w>_pkTg`7DYp
z0*@R+@*b5vI>I?kKmOd#;^#j;pSS<{Y<9jy*_(*aBW3<9tgM2vvc7qFdhL9&i-MQ?
zMcuW2zsI?qPu8O3g+TGMGl@q!1f%XQbZ)<~t5kdczhBz#_x;v;`7+gi#gBW{?>|17
z?7yk{d)}kRk3n<y;8sQ4@)<KE_~h+;e0+G)&&_#wsFnN7Jlom2(c3oE|F5&LvH5X*
z|G%RxoWdE`)<}N*`0>c`<KnxI1^e3`T^GB1%jaW9jtGc|gyhU$vqop%?|0T!UtfhP
z9h0>#yRb3YJ@@z2@VLY~I|^m&Y9z9=Uvt@>{`mO#=JfM&YsEkRe!u_vD=u#Cn|rIx
zKR-L0{OrukD{CSrm*20Qe&B$EkgzZ(506gSy4jNr&d)OC=I7_1SMjLRXg+8e%gWf@
zX2<(vL0aD2-tKSRKXs~TWo2bbT3Xl(IZ&rEI4tkcw@;__!&khid;08|TUM5qcKA9S
z!za6Hf0woK%kv$W>~H6pmZlcIE{1VI{r`Uv5fK+QBs$mB)NJ8w^cEHucXx4NVQXg0
z&(AL^F8=xV`+de2S67RFeRcKmM0dG`xwp5?v8$am*SdVtlqoLP*Tou2@VL0T7XJG3
z^7g&<ojWaSe|>ql`~AMf8#YXkulv#X@9*#IudS`EEbQ#YiHBHn4?T)bO-Z@1B2YQE
z`gosg)ceP0(*x%H_)%e1`l@C1YVGLlc~4(lTpV@xKqE6_L3#Q54@XX)JbALEy?t}m
zRV~nJsAt7@w&l+Lwr<hv+0uD=c}LG(-ck7Yw%Rq&TF2%^XUgw@#vMSb3(n2AKfnLq
zSN*krPEFMo5uIvVfA;T*`}gNxkFTHG($d29{mhG!UAOb@dublse!s4J@#5r-pJI2H
ziALu-t-l@|5z!IE1?pqUJ>ur#N_u#x71WF8=jYea)!n*l!v+Hx8JQUtg-I)qM@2*w
z{QUH^=>5q%cVre{T#@_y*|V(we~#P#J9uwz^~TMclaKXCM#(O?`}}ORj=!HDA0s1U
z&9|HBQT_3e=QrBy+_^I%BI3?PL(r&`rq<@;y7u<-=gg7e;^yvNJ0oCw-d(SH$NzEM
zyu456mft&gb?Xl)mAkvk(|>+?dZ4#_x0G3q$K73}jnbzJo<4gvY358%Cnu(P^X8?_
zyMO+CKP$Ib#`_vsi-HH+K7!Uw$Xb^byt<<K;Fxuhy8k?u1D7ssIx|(X>f4)>|Nj2|
z`DXL^F5b!e{(ie1v#(}m<>zNpD}B?`)8AGx#O^LrwX%|OsAgk0JKJ1cQ?ql=9-B>@
zHvRZ`T)y$YY5}jbnTopla;N`%cizQ0IXhQ=JSzV3-rn6|Vb_Y^TwKhq9kzzy!HJ2=
zNAJ0YczrxBUw>hdYj@FOLC{cj>`dD{+v;yi($CM^^WhM;N&dY(TFJYf=j7(jG)Q#n
z{3L0dCLkr%HG8)7*6izQT3TMKi;Ig7A3xsCEv^UZanCZ%4tpl>05ocAQK*!cmnYHd
z7PT!W^3C7%`?IdB5Ij9iSJ}d1#-2Sk3~A@*`OY@W<>2OC>_6Y`*6zIRmtKGU^7?vz
z>gj2j8|T?pPy2qanqOUA{jKb_kB^TVXI)v5bo|?ki_M;s)fP^1T7KEE{$EWSkK~~{
zJBzz{ciy^rQ&2zvG+w3~y$v)1!vGq@ySUi>@uNphmzVkG&pvVQ$>Yb1d29V<8o6rQ
z*ZwkD9lrkAlarHgosyhi|F3f9`NBIp3iteY)NNsH{rJtBp5425m#wS){q5q;;`B3T
zPycm@;!tccsI)5GT)mj*`T6<JoB8b?ytugdXxp)Ed3U2;9=xl|&(EJbPyeI+`aPex
z&fERg0oAyjyR1rIO_)5nIQ;qAb2pd8L)OoD|5b3FaxNr|UHm_2G-b(IAOF&AH|IXl
zeOfoipyWlsoH=tmJUt)IEx)(WwOdT|Rj%Oo`P=VRz3x-{Zpx~sr#EQ}r(#P*#+2DR
zQq$6mik^66U0LCHexB{mN8S4GUf(kd+@|I`Yl<b~ySv}Eg>JvN*gd?Q!-+%DWvb@)
zqYI==vltR`bI<bI|8dxyc6LX_$0T_7K-;`<_s^q`+~sSJoYvoeW~z30-tVuAK*J|3
zD`cLk<(W^nUlqQ7-nXfXRr0RpUtZ=Lv%9R9k(n)|TmzJ8RwzB)_c&};wE7d#P<><L
z$>MPp3lDjRf(A(+9V`g%jJ7oH=<xb>D|@}5goMZQb8}Z)i-7t)Aq#hV-O1bj=dv$S
zN8t@*UDu-r4;t352Q6J-%2R0)R`WS<@1C5wxjAS+Q&t$hK<B91iQ0(VJ3A)k-ri<d
z{q4;wK37l!K6K5QBcNe`U8S$5{n)r=%bE*K_w@ICV3Ib^(+Lw36m(o4x7WkZZ&~>I
zI2RWeh7ItsNi`#*NrjJ(F`5hQooSrDDf_x!+53BE&&)L5n({tz%G|kUZ){8kwRTqq
zFZU~L2Q}h5w8GLJO`ALS>{RV=uRkg2>ERiIALVSTE-Y|tPD<UlabrtM3xj}=(4kvf
zv$uW{RCZf%`Q?i{JB=eFBU4gRHe_B_tEsKcys&^VEG+EDuV2$V*yPnf)5FJ)x8K`a
z9S#j)wsO;us_mdPyn6%gZ^;ZkH`kio0oo0ygLMP`8T@)WJw9S@l__XAq_kbBMW8-9
zv`^~HOylJ@z6tx=1g^X;ZCRwE?A~|e?AhLf&Fs@H5B`KUA+=4XMXRc*ZL0fQm0Nvx
zSLv-2*Y7`k`*yAF^)FX~{cW!(Cx<S)7541f%M#J5@O6uz%YgE9_RO0wq2T<v*t?%T
zf9B-oPChfkP{yic#jeAeit6gm8>{Z|TV?8B{;aSqGB7Z3>fOt0A~$!bW*0v@v+_zd
zzl_BMMrO8v<$khax=}~6*Y7<xLDAXk&G&D&^FQCKejj?X_{NSxWp%$f3(DT!Vz{z4
zdiuG!*8ktWPESqUn1A1{sHo_}hYz5!G*An(xnuRTlYcCOr)sFGx^B(B?sq%y^0Hnp
zfB$eV&8hR|t-G})bMcfQEgc=7wGkT^9jODY)=f-IjQSb>UAuet?%#WNwf;V|CG+x?
z;`6r4d)$&s+k;m9`0?WvpW_`yh!<`y?zdYtWmjRoZS}X1W;WiSzP`S!6B86~)xR!#
ze{b!sl9!9_+_?j4!S5=1+NGtV6S98)zg?d`d<a;#Ze7;NNvgLhca^_i_v_o+)gZ?l
z>ycdC)zx)ncK*JVXU?1n`Tgzf)d`BuU9q<_Ojccwua5<JV&{$>D_*>KQIx#v<&BNW
zr!HR(zU}Af8K|eHcdM+XW{;6nZ`PX|8?}^_79IIJ-=ff|vbs9^_W!@{>qDQPn>)3;
zyL*;ZY1YQ)pU)Vd&-lIf%O&qur}g)TcxjpzKl6F|@ZrK+TeFwD^~<e2dSi3C|JP&E
z`5ONI{!bq~Sg<Yk_Oef(K24cBcW%~=4GXjG@7vqe*B2IF_j760%caw&1bJ=EzaRIz
z^2tQ^5GSW5(Bg3YeLog``tl`&t=VyH?CvnlrB}9QUk^!0SWr|{6m;J9`<zdoJ_S{O
zdlLv=41P4J{{8Gpul^L5saJYW<(vBJ_wVfZHaAz-i~H;A*Zx_(exH-((Q|XHSB9^b
zyLRo`kDosU#l?>=^`5??<fTwyVd0OTKSNVbTzz+8p|eiprWV(3vAdV$ycAn@#LHL|
zG*o_mHgU?7BlZ7($Ctgi;aFE^*Vx#2X1@LWW_JEfsi(zsbaX%qD89bBntXnqEofoU
zdAr{|Q>KV8fO32jE4PQ2*Q4$C>yppTGIeorNk~j|OiEHZJzZbF?5=6{wFxt3te9(W
zV>73xr^hIH*VM_Ag@6A1d24HOQW6Ib&y`ot&&~CQ3_Mxr@BdTOS$%I~GP|sG*_Cae
z%HUY9^zny>+qG@npa1yy_~zzxe)m3^hrhnQ1}#B}icL&Z++F@Y?cScsGjl8_&oa&a
z^ltZiAqfeOq@<*c5f6_UWF;jjZB9FT=<wlY^?4Od-|toTtIw~Qba!{Tu)Mr~MTJH9
z`nc4OkB)){$CHou8G_axzP-JC?(tKny7ug`F)}vRF5!Q4E;Q<qn2e0ibiG)M!bdFI
zwr$gi-8JRpWcAIJpVMSyWNvIq<({nW&$b}{{yxyiXLWV8Tff{{P=Aa;$|U2!mdwi^
zzI-`y`SRqKmzPif@!+9jSVY8$6BCtZ8mIG}on^ZET|tY$qM(Y(%EavK?wK<s*_s=#
zTnX`;V*#3zm^W|Uf!;)0!egxlx3}l-e_Oge|Gr;AfkD;Yd1}71E^JQs|Muo)aGFoO
zsDOZjj}K3PQDODxv*w=u{>N|LjJ*9k3^eDFb;aX0DBuqsJjlQ!Z@0(kysn+yJiobC
z4`01Hb!~05W!0AzVde!792oB0z3bh(*94`h>lhOwBPb{+!Nay^@7|MV&p!Qrzdrr;
zw%l6*D`q3LeYwusDZcYty7%$6qNk^VPS^kWcw8veYHpuvx7gHO^%)r&3l}c@ZekV;
zYP;y@>PlJ`rIdoo=BdlV)<)fWU$yt&He+#(r4btv96K*gZ)s@}S}atbK4p=TAuAi(
zr$3+18y7rqD6x{|<KsK}>T8v)qoZTZhlA{(R?E>>IyyQ#YJL{&{5Wg&>|_(Et#9w#
ziFrH!|BvJLH+Glnmz9;BnPKR>dbjDnPxGGpT3T9yR&3u)pPy-4=cT#t@3&|fS=rOJ
z|H9yn;6;yCm8=K}xdWOH`~80Z`Skg<YR*fXI@<ddFIJY6l#JrPadzd3KY#v2?5Qwh
zWo2cUV^cY4b@+NF^ER*GpdbUOUeH|Y+GulztSc)PzO7&V?(S}3VPWUMK*8td=YvA8
zqOvm7;@Gvd(TlIYo@tsr?b_{SQyyJe8GP&g>uYNdpFFwp$i{Q$&fThmwRsoZn2-nB
zb>QWtq_5w<W{u9;&yUk4UE7f8TvSvPvA3#p=f|k1s7Z6@#%9H4WN3gEmfqc6e*F~4
zjT;ggw?$oQWM*%WzFfODVk1+7vU{J%?+ic9qw@8C9J|GIPxZRrk@dXu&e==z(4j*!
zj8eJQMsHsSPPnJ8u8EvHV}?Xr-Op6eM9lj2`a5^-?2$6<0<GxSnCu=C6C+`q)^ly~
zlb0_~{`mN~u&iuZ>gj1b5(W$n8#Wl+xpPOtsw87$Hb}tS-2B$|{PUCj?M_0+q~B|t
zJ8-~Z-_K{#OiWA-(*OVRadIk}n23ajhZ`9g&9JMj>U<2E;%Q`N-?Aj*-=Cj4QCm2|
z!>=b@j=KqO_cAgvmb|#Y*zoJ?YvZaf88I<2A3lG++%_fu`0?ZIOO~kcusLshc>D5l
z|IJljv-bUX#LdIT%+}nvxB9zL?ky88F0O*&;>EGM%Y0^=@ir7bJ~nak<mSzrjV~|v
zKYsUato8MMwZB1)m1obcnq?ksem}2Xs!-nG>v}Ky|9^@>wFeg$*DC8NT$KX*E2<}W
zR?n2n-2Cb9*Vm_Cg-tY@ZIIY>YfokI%2M<FTBfGLYCZwc+j3UEGkDik6RP#jtE$Q>
zc$rV)%S%f?)$2HMd}OG8y>>fjtgE!W(0}KS9T!%IuYbk&xKVB9!CDv1qo9>6GYpfx
zYL+Uu2rSCEckf<Bb#-{TL^){C`p56z)1$(#WpRJs4QcoC+j!o2r+mNex9;(N`Rj23
zqM(NAj;Shn-#bBzu@eumgsqQ@ePaw+fIa((?DpSBmUvE<Fv$p5s$ZuA>dIeMwaxq9
zcy_kAu)O^Fhlkt2t=5VK61`oe?3W{V*|zqazP@o?`oR-<x*zlJ^B5b?R1TgY=XqyU
zq@N#OVq&6B<R%tx@6)gN+&L8GB-|D!y)plN1F~L?VaJ5Y=ckyqN7h?3s~*0iS}CmN
z-sZLX=9bLMFYfKNuKo38<+}jTn!`=+5~?;XdAsa2tLvXRHI482PFBr2^u?n;+Vs}W
zVs+4ZAIqX8yPO4`I@r%$daJ2AY1`~|;vye^p4a?hej}tX#OaUirN?2upe-V%St9cF
ze+>I=zeSX`E3azsTb_JriRZ0v?ecXedL)gz-X<GJaBy*Hg>5T-?gttTIGAwY=FLd!
zw40k!XUv=_8m)fe=+V|ISFYSXwz2B#E3cVfj1?6d>;C?l8g+epe*C6Qn;2FEFF$9y
zf1AM4lRIku=3nJ5w4Zcj?W{fj|NY*Qb=50+Mug3etgEYz&N9v3QSi`d<=5kVvWr)&
z=$K#sFEZLhT0upn<-`dO(30JvCmq|=lp5Bq)qTJB`#ssCfsviJ->!+@KkwJq*WoWs
zTQ{sr|G52C>kfI(M`65uGM0xHxpx2fzW;x(pSu1rJDoFU&vJ5eKYrYAueY`C>8Yta
z@^*9X?X4D;mR>#e|Ig?1Bch|37hDHT9Ig$!zCPZ*PtF#!s@H3dzMfv6UhFQ=2-3Xz
zf0ac=ML(WQ_CImv%#q{A+2!Tu3mpgTqtFaqmS7<9;lqc7|Ns8}`1$kVuF~vjI+09w
zA|5KkS3q~`hSfejB{~mz&2*r3ekSK@&|U%P((1Y8_g0=r7Fl%2tt`9KmA7_fshH`P
zo>%uabtwxydcS(wk+rVfVmo&2%Br_K(94-oTU%RLP;lV-_37Tz^%%|nO`2hl$n@>o
zw;eloZmj?R?-j?-d74^UNmo_`o|$JW9UXo96`y;^#_4fYolj3sFaG)IsZgn*p&@7}
zPe$g<sj1pqKZVEFin_bIv++nASU$hbD=%+dSbAF8G3oq02j^H8cg=l`b5O40&Bo&$
z{r&28cKfbK$k+cV6cQGGd2g>ZsGGGqe0|#Ub91ATwx*qx0>x*=2ZiWuIf~%s5)YOJ
zR;pe1jvW@0CQk+p$#r&izPY`<Klk>w&_9V>fqehJoqh4{!Uh-QwScpfgde>NjqZ=G
zsj&g6zq?z!p`(MN>gy}lq9UWcRbQ2ql$aV2dod<XoY*66zHXP(K6Npjhy%*~HcGE|
z+Aeu@ZSCm}L1j=RcTIhNclY!*Ug=Li9`}RBr}<<oK$CHx#ROK>-`_1Qe}8WcsNW!d
z!nutn@%g#AOO`CjNVP71r=qFZxh{5hl7U1=SJ$I2FE5LBeSCJSbKSaiC9ke<dV70=
z_P(x-+Ir&DsifcE-m<W>FTVad^U@Md+v;yA=jYinR8&@8TotPQrONi;!Gk&X_w7CN
z`pD6vg5u)t?R>H?ZfsOO-Y>6iX}QzMkSkCSvh-0sX@d4Tv#%Ku%hlGIeeIBVq7~f8
zF}d`9O{kQ#w6~KJQ^Di8a{2lB&(6*D&dJeHcI$Ciei<~Vv1!w$6DLkwXzTOw^z!1`
zoPPe<_4xXuXMEgN%7+Q7`z=X6-Un(}MBO|!Rok-ekA-g37KS_?%evohw?{SI-&L9o
zYP!C-xH$953c;B(XWrWQ?AbG)Stgx3i=StudTG|w)^>lr6ME&W>h&{c&UESSPfJU4
zOH6#2+yD3Vh5v8kSOdHaQ%{K;>ya!zZpalFn3iVsOFHMt>%fl(R!n>2xcu_P-R1dr
z?%oA$F?n!jXK|F#HMHLDMo4crwDv;%<e;t%8w@~W@-i|qtE^=Ld!<Y=92^`#i=tT%
zWSDHqx~c^lU(U+XTDfxNkz>cQJg@WH|7i%~YIyP_<x5p<r}*FaiifPA#pgdiKW7Bh
zbk=2W9$Z-&%mCVrHd)=DDPVV5?xxL~-&_5|){tRDYxQh5I{*0j^W)<2HI9CMd<Bos
z{{oFNT`%NRJaTwJRMnkr`S<&JdwD@C<v@$CEG<P>Uw>Ir{^wF%O3IRHEc0xu!(Q**
z9iMY!L*wP;{*T|itGj>nTlv!`PizXlhe&pHb!~ZmWoL1^-One&LBYX4pG@}Ouw~1W
zCnqOI6{VU;dHVP~xt+g1_2MGe9$D*YQCqVXUVmNrxYzv0&*$?&k(rs9dCPKs-LFhB
zy_gf{=2~ao+M)@H^!WXCA|fIT0#Z_5*Vn~<{Q7li`uTaFEzYWTQ%;;cn|i!Y_R8w;
z^+$f@UyF>4OiWE}-MiP;YHr_z2?Blc_I^o8O1ZbTC|X#|`0}NMhmAQbEKEg3Ma263
z+2;Y9(|q3^W?|sw=Dt_;S~n+WN&1uZPtMN(FWg*qDb1?%bN$oRCwnA~R|GHTn_v4)
zl9Q7&B075V#EFgDx0|c`%}F>j!?2>dT3AMgr(yBp#oOW|ZES3|ls|vRUjOGKJ81sx
z=+Q(2iB;Bfs`V5V8T0eszv6QanXm=EHgvhw?Kgg(rfJD*?XCK{%FL(u>#NX5j~*2i
z7N&pND9XSfbP{wfz`^_X=dW0y!LVr2q8In~+ZQ}`eYt+$FRrB`*5&V(#O<w8`(AhZ
z$H&K#rdd~*XKn!vZ7q5g@-yJ&&f@2wT?q`Wt*tzgMlE)=zaG4NxpMFK%X=UzmF~<a
zto{4z>y6vDgLjwZ?)&rU^eVoe`H`E`c-#NSSAUlixmsyg@Mm{XV1S~bfR;j_a<6C^
ztIH9;3kuT&J~(xl7!(#cdf!SjdCMX>`-(&F_K4`2^F2#?e0|usO=ir`;O$yumLwfm
zxU=P!1d~=mvP<JJ*6FREe>clVhv_jomQA;rKl9JV=Wq5sPn`ee$C>JLzwMSTU9x=n
zY{&>)#7@Qeg|lrcjgB5YDk3ia`RDWbpKoQaXZ&!0nIE*V@@TiXWz`pr$H#h^Z_O{S
z{{43Q$`vaXY}s;U$NSl)J1Re`X@{*zxV|p7N6Pfoy5oT>Lv&(yUHNe<!QjEGSGSU@
z7xJxKxiaVGrlqGp-ntb9sv^JKTb7}4>C&ZNTdnu|Emzjk>RK7R{L$mbhhJS?&2Fm(
zT3vnX<UEn@7gmL??&#}NQ&CBAc-$jrJ8RP>qgk_V{eC5DS(M_yU$A@k?#XlK+Af%}
z&%2)6ykW@_6}^}p2Y!BjZdv+DWarME-!(uhD-KzCJ}m18O`=bqe*9RkG<)iv6DK_Q
z<ZL*$y#IN1V+8Z9-F`0a?u##GaPaY&Wm?6>#GH9^bMwb<-=5tqzt6g0)hf^ogx>WX
zrLV;}IXPKaSs%WCpP#dPOZE4>q@<*dj*bJjZ_j@6B;|4Xr%#_i`}=oo30<{z?b~f@
zFD-O#@96C{-S=+B$KsL_lYQ6k-J93Yz_4odYW81SLFwmkmq^}s&@l~$h7;f3-v0Q>
z6BCW(V@HopoH^68xY$@#RrSm))6*4ts;aE@_4Qs}UN<%*GM~5mz2;+#rzhv}KH10e
z|9`N9_V91pw(WQ}sC88N`B`8@#EFlOk4J9Jn!54k&al<0+S=WhE(Jk$B6n?Eduf_(
z^sX(TMdju1x2?VO=jUh3+Fx4=@AR4Z&%Jv6dhvq;jE4>#V*9hVR(|VN%{SKZhY#hl
zvaGW{bSRhgjdi?p^z2CS3EYY;5-ehuEfXy&|9-vB$;YRrrImHz*p&>EkkC-js>_s=
zB`0?_H8FjAcbEIXp32WFR<B;Xaid_v(Qfg_4<0l$H#09>xKKn)?AwlAruzE*e}8|M
zp5L9Fm-p<`)6)zst*sB=yx{?jE`SDlzJC2WW%~5ZNk_Sk^+-0ytp{xy-}_H?+qP{k
zE-nQR4lqvBi9B?$nf>OT%FQ45986HKvzs?nJKU)9)05plmhHQL>y}8}yt=+7{q>74
zZ)Ym_8*RJ)=Z-1<Uh6j{zvMA?*Ju$)Qdm+TosxcTj-svY+)I}(Im%aGT;!^vs#^Hy
z2<PN!?t5S6b1&Lk{r%Gk<^CBm_O-vZc)nJamX_vWYu>mv=y+Upt*E&8@#y@$M|TuH
z=HTH;d2?gqvEt6o&Wh^l&#UAAt~&p?^Z%{*nCze}r5Rb7PoErpEu*pW=uMxEoQf?H
z7CS?&I=)odo}Q-rxWE37^15~Fn(gYGoR~B<HCJ5Cn*3N%QSr^q&FmTVX=jceJzCLi
zrmo)p>{;5YSFb>WHpc1a-W2{hE?++<_x3i)+uC#O>-Sx+u)3vHx<%pjov_R|H#XXC
z3!P<MuIK9Ny65-1-D=xC>wn+Af9b*nhp4Ei_18she+N6JPG7XvIoMl!w_jLUwI7FK
zOX-9|xeZa#(I-!zW`1xMJl}je<>7&ZPhY;IJUulvwr}~eWkv7qSei)jt`1*smY2fw
zc6)@gvoj|jU)tGOraN};{%-MamT7iIW~Srfiw_<@K0M1byQ=s2)vHtQ?k@j)OgewT
z#f%xY)n?*)F$}lT&nYP>?b7jU_$Ij<wA^j>?Cd*TOLpy=_4W1j;J7$H2M30_y1F;F
zwyyTS-~D6#YF=MvEBzLMLz`yqeR_4T!4%!~US3`m&t|5dICaYCoQm%ZgG7&YxtdG1
zc0Y~Sv9{sTtTwsxt8M30y5>%(iIn`=8!3Ns=GTeS!tV=*uZuZ&_;B;n)6+MXy^Sid
zl3ly@?)_DpN;XMd{H0r6BE(l_c6VO!hRunUCzdGod2lEmNtkpfw_(-l)tNatOSW#E
z+SJ7K?EL)u&s&+NElD<gwEBOvSEeuX_5D|aqb?uFx6^z2{N?i{6`O^+m+cmQ^!C!y
zisB8M6_4C*ZJjkQe=4_P%Z8dwtG3L)bxY)%`1McAo4nSUnFN+U;h63({^*Ixw4a}!
zZ!UiB=hiQm`_AC^TK(06;=k|KZf7$zaq9`#nEaB5Z_1TTuTy^iU;KEkY=oRqXRB1X
zThEKi(2W949H-S=qwPz!yI;FLeczF?IqVNlX72guwdLx+!#Ad1?FiZt*)FIYqVw(Z
zgc?r8BL*rU#pc>i#kR{eOmdT6J#Xrys~w<D^V7-ccYY;Wyq{=#?`ERI^Hu>Tj>7km
zz7NY@Tu_v&d?L8aXXc@0zO&B=9WFrV+1zz#?bfYRL0ykKcVbSRK7I1Ui3NN1)SP%M
zac*<aCMnQT#|<@MfjdOS#Eeo;iKzL{>)E^4wy?19$&)8Ml`@Ab!&W7mK3cZ?$_m>#
zg_6nrw+_$r^<Q~&V@capy<MQ0sW#!&5px29gOe}yf;I_WUhdCdDbu}d_h-+Sp?-0D
z{DM!{&o7nMgIksp7P#XpXgC?Pi=IJTFGj(@;KRJw)vni_{%+r%nVEa|TpG)^WVfIv
z8SWSU-`VUPwIzSwPqy&z@ZxsGUml-QYgN*(xfi9?o|(CM)v8r?`>$(pne+4WuZ!84
zR1+H;+uPp$a!o)|l2T`9r?c#=gpBILU5%4#AW3abq<r?tnV~k1&5ED-=*8{vNKI9(
z+B@&e86Pp-sFZgbK*J4+ZBs(e?6`UB)~9RH`5!@@kXyIBoSc~K?ChTLd4p!Pk~|_^
zAF>I6cd;$md061?gy<{RToEo?qnfVe8YoiC4w_u!P~0~=)aEf{iQkd};lCf&*Z<|V
zx3_;>@8{>YWa-k4Sy#1|`OWQeTm0~(`urvP_T9U)u<QTz;B!`g9`e^aEDb9B`>WL0
zP1EYNE@;>;`PG$`KYsj}FlSEAGY{AHc}2^vW^wZI9Xo%%|G)tUE^h9#va4RCyuP;f
z$B!QmUcdIPsj)eG_H0O4m{ImM9Z(v%>~G&2x8A&bMy#gO9oFW93wG?>sTmp?dg|ff
z_NmjRiMa-fs9IV^diwd96_+fso?E<3qpRrWr&LuNo0yNEJ_UhB%8cFBcU`tjKi(%B
zeCW`jDbJpzxdw_neZ77^RLi%?^Z&Sjn#Cfnfm52<`L&9lpEJ!bIJ9EduBe$7g^Lzl
zehHGfaU<gC-|zRslaiCC_Vn<8%s46@A9AnqxomOUP2HJ=!sVcS@9pw+Guq{<KwF`6
za&KSX$->Sa92=Ya?1f9Y)pMQ)*Z2Q3oo$})wscZi+O$>0=eFhFKlkeDYDvo?6$XC0
z9}8wK7QXv(j`374*W_en23}s?9lN8;%jZ5kmSAw<%9WOb2@cCI8`k})=##ZRHc8c+
zq2k@n=Qp<J&!4&YjTe`BVYrd8ac6)3`=UR4zu$Y@sXouZ+2zshl5?QZB{@0J8s58S
z0z2=OoVzI=Uvu!}Wc8V**<!`d&pkae(>SfquVi`gIV(BUnFfhWo}QjLcXnJnbFq_8
zPEJlmRrTT1r$-^<m1)O%B=3~puRYeu;qULCm6dfyCNn%}M`zvT*I(b<-#`DxjflyQ
zUteE;{Mxl?>GLX&`AqDbS8{Hq-}2<cZM<t@cVD}c*co@{r?s&J&z&8G&7dii`~Uxa
z|MO+Jz3W6@Mx9f0EpP9uwFa#!IW<)~=kBhlsi&vSv~;@o@Y&hfPo6vph>Y~i&DGtW
zclXhSh0f3TR{MjJ>a^Ljy&W7F^78VICGReO|7?BzU+wB|Zze|Pi>)X=r=_K}V$GVA
zUteDO%(0mG>QxpK6BB6gC-<F!t9tprb2hfNot>SIOM?VOM2>U_DyIZ?w(c}em#_K2
zXlQ82aOBvrQ;%4<i@v-FRP&t$TJus?R(9mb5zxAfSFc|iO7J**e`~7SD3Z76zLAlU
zPu^rXtCEhhv&}cx|F4sgl|3sl^M&X0IV-Pbt&83578)uV8X8(rRTUHxa)iI`1GBNa
zrq%Ax<zlk3XFonZ-uUhIvZ}pu;^N}Pt?Ii9)6d<xd-vz<`~TPuY)n4>;oCQ}@Qg=%
zadB~=^(Ze&tVCQD?d|7hUS1|BCFNCCW_BU2LO*WLjWZL=RHZ+3)$J^OEe2Bj^=i0<
zwKcOr>M4=D`}@u^^V=|N(GR$>F}dCHu*lq!>6_1~`OP_Tb94GqjtvTTpZcu7K6%1~
zhOpH~@7<fH8@<gS|DKJpyOveS^vz98OrYUy9yaF1ZoQA@mfuS})+3qL7gFNk>8Yr#
z%^e*b{pauBn_IKRO-)UY96eh2_!#e>zkfab{mu0_Hi-m5&MJ%G+zH)Ssk)><7*uI&
z5Q0l-f;W9Du4xg09Muu##AUA7aul_*aH@%w`<LmtBb~zgiznaOw`cj`*X#G6ySUi>
zx2SXZx3{;`-^5IuHLJ^O>7&#7`%UKgdwV-C4J!QhCNjAvYpayKz5SIeQ%g%rhu>m~
z#m~+d=1FL^Nc;o2C1TFS#qPmjVQ#^}!VK<xG8;Q&&s<*@d-~7M&+Inmo}Hcj^QnG)
zX3g5L)hExKG1>R;#p3=CA3qxYJoEAK@rzkoQ_|Bn7d`c20PW)D=2q6$&OTtssko*Y
z<d<#{Ny(E3o7o%JSzo%pqp<nKiwqMf-h&4ZPMI==As{^5y`sXx&dyFJe&3xX)6=V}
ztQ;L3fBgLU@lm(_rCYadJ>D7}9SvGQ`0?vk*R^4rOJ9dEfCe2mZQg8I{Y|H?uI`xR
zhCZ`=QBhGB7Z-!%V?0hyPM~qq=^OX|c*O12CzHA9VPQ$hmO`<$ixx4Zr$3i7cgf4i
zdGhb~d-I=f=Gj(P{WoeAINSzttGKv(TAJF~vuCGFnPQNAP3Q5>DN{uL{Qaw`shN1V
zjo0S)o6QV=-j(lve9>M0>F4wI&rhn)e*$Wc|M@)szlyeYckpt*g;%pSZru2C>y3+9
zTd!QX5)c;VmYJ!kzvn~K$)5)fIBbd1jaoacql078q)7}OK0Yb8x8=^UF7MNg-ZtaR
zs#1&6S0apzjB3Z{%$WlkxJ*q=O-W1p^yRXD`Olb)42^yJ_JOu>mA<|v+5h|3ucBvX
zX1@Pr)hckfW$jL80jG|4875MzR;@a5;)H^c(WNIbj*g7g-`}w&+}l%m<?7XoH*TC*
zKELkNart^4hmepdg=t#$_VbIMp94*PGR&GatK!2!_Ub*~o}8Tg^VRD05gU_O0|NtF
z+S|*2?_0A*r>m>$#Oc$IZ|CoS`o8|Z^~Q}GkA2>rfB)U5nXALsKYI1**3%02g;hU3
zB!2q*x$^(t@4n~c?dw2?U1(`(ad2}#-gsONwDbYA#63U%eEq-A^2hpQZ$G}T(jp-T
zPKRq;yTw2g(w)NUD^{=G{kl-Z_2JIv^FT{QYpeeM{eIuSu+XrbUq0=4pX{Hf`t=X@
ze!pj2=^q&>xpL)7(1xEIH*Q?GcI{a0_q*+3tKXL9u(PvENlERJiQkaGxH@dDl7WGM
zy83cC`~M#f^FMy}%&oH0($Ue;p}Nr`>&gndzqw8vuhq7DwF-!7D=IclnIdu}YwO96
z6D9~eUVBbh-4C=$X!2y?;%8?bp0EFB%&>Fk&O3W5g+cRJn>KIWS@u@y!oQw7OTDL8
z?Roe0_4Ul`?8hsY&r7<yt5if(bmpA(>({H>+0Em(|8v2j`q`5wpt+PEFBbPVd`sH6
zW5<ruH|&31?Dq+qP*zY<0$RWNWb5^~*I%tc=1!eD)#l3uXNTX#wR(DbH*Vhi`pZ)B
z$Oi@EJSUFsr!QY-#yH-L%*xW@<mAjSk&=;>{rRAozu{Z@{YQ@<fBg9I;c@$ahH-Im
z3_i2XdP`ql6O@zli;k8qD=Pyns6F=Rxu2=2>7EaVxVLQG%2+aU<>`|rA6{DO{pU@(
zy|LWotgWD#|M)+T#MeY`@0&JF?9rn~pxt99)1RN2d02b>o~A__$6R@3A3b_h^+(Ui
z*?Hr}jSH79ZS|dP_VC#=wk`Ladb_$_eJpnBc<8#cB(6-gMc~oA>Yq=iNA4(ixFAPY
zNLcvrGT+%h9`e^)a7^S>)R}unOVp`D@!PA_>la;q$!60Rw_e@KDyqv&ai4q2G4P2a
xhrCYoKz683op1;$;_(pNkW0i<z8?9{*qp7ev-Q5CBm)BjgQu&X%Q~loCIDfAeCYrH

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md
index 6b7943caf8..ed11452716 100644
--- a/tensorflow/contrib/lite/g3doc/performance.md
+++ b/tensorflow/contrib/lite/g3doc/performance.md
@@ -3,8 +3,15 @@
 
 Mobile and embedded devices have limited computational resources and it is important to keep your application resource efficient. We have compiled a list of best practices and strategies you can use to optimize your model and application when using Tensorflow Lite.
 
-## Choose the most efficient model for the problem
-Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
+## Choose the best model for the task
+Depending on the task you will need to make a tradeoff between model complexity and size. If your task requires high accuracy then you may need a large and complex model. Some tasks may work with a less precise model, for these tasks it is better to use a smaller but less precise model. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. For example, graphs below show accuracy and latency tradeoff for some common image classification models.
+
+![accuracy vs model size](images/performance/model_size_vs_accuracy.png "Accuracy vs Model size")
+
+
+![latency vs model size](images/performance/model_size_vs_latency.png "Latency vs Model size")
+
+One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices.
 
 You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for
 [image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and
@@ -12,25 +19,25 @@ You can retrain the listed models on your own dataset by using transfer learning
 
 
 ## Profile your model
-Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
+Once you have selected a candidate model that is right for your task, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time.
 
 ## Profile and optimize operators in the graph
 If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator.
  This scenario should be rare as Tensorflow Lite has optimized versions for most ops. However you may be able to write a faster version of a custom op, if you know the constraints in which the operator is executed. Check out our [custom operator documentation](custom_operators.md).
 
 ## Quantize your model
-If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well.
+If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. 
 
 ## Tweak the number of threads
-Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads.
+Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads. Multi-threaded execution however comes at the cost of increased performance variability depending on what else is been executed concurrently. This is particularly the case for mobile apps. For example, isolated tests may show 2x speed up vs single-threaded but if another app is executing at the same time may result in worst performance than single-threaded.
 
 ## Eliminate redundant copies
-Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
+If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151).
 
 ## Profile your application with platform specific tools
 Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform.
 
-## Use hardware accelerators available on the device
+## Evaluate whether your model benefits from using hardware accelerators available on the device
 Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android.
 You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance.
 
-- 
GitLab


From 396a8a4105edd409d0821c4d5d0b920b315ffb72 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Mon, 8 Oct 2018 14:26:43 -0700
Subject: [PATCH 0537/1085] Add custom call with layout constraints. Add a
 variant of CustomCall which specifies arbitrary layout constraints on the
 operands and result. The existing non-layout-constrained CustomCall is
 changed to have no layout preference and can now be assigned arbitrary
 layouts by layout assignment.

PiperOrigin-RevId: 216249615
---
 .../compiler/tf2xla/kernels/index_ops_cpu.cc  |  22 +-
 tensorflow/compiler/xla/client/xla_builder.cc |  43 +++-
 tensorflow/compiler/xla/client/xla_builder.h  |  22 +-
 tensorflow/compiler/xla/layout_util.cc        |   6 +
 tensorflow/compiler/xla/layout_util.h         |   4 +
 .../xla/service/gpu/gpu_layout_assignment.cc  |  10 -
 .../xla/service/gpu/gpu_layout_assignment.h   |   2 -
 tensorflow/compiler/xla/service/hlo.proto     |   9 +-
 .../compiler/xla/service/hlo_instruction.cc   |  28 ++-
 .../compiler/xla/service/hlo_instruction.h    |  10 +
 .../compiler/xla/service/hlo_instructions.cc  |  33 ++-
 .../compiler/xla/service/hlo_instructions.h   |  32 ++-
 tensorflow/compiler/xla/service/hlo_parser.cc | 101 ++++++++--
 .../compiler/xla/service/hlo_parser_test.cc   |  67 ++++++
 .../compiler/xla/service/hlo_verifier.cc      |  22 +-
 .../compiler/xla/service/layout_assignment.cc | 108 +++++-----
 .../compiler/xla/service/layout_assignment.h  |  13 --
 .../xla/service/layout_assignment_test.cc     | 190 ++++++++++++++++++
 tensorflow/compiler/xla/shape_util.cc         |   2 +-
 .../compiler/xla/tests/custom_call_test.cc    |  50 ++++-
 20 files changed, 650 insertions(+), 124 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
index 3d81ae9eb8..f210bfbd88 100644
--- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
+++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc
@@ -88,20 +88,30 @@ class ArgMaxCustomCallOp : public XlaOpKernel {
           xla::ConstantLiteral(&b, xla::LiteralUtil::CreateR0<int32>(dim)));
     }
 
-    xla::Shape xla_shape =
-        xla::ShapeUtil::MakeShape(xla::S64, output_shape.dim_sizes());
+    // The argmax function expects row-major layout.
+    xla::Shape xla_shape = xla::ShapeUtil::MakeShapeWithDescendingLayout(
+        xla::S64, output_shape.dim_sizes());
+    std::vector<xla::Shape> arg_shapes;
+    for (const xla::XlaOp& arg : args) {
+      auto shape_status = b.GetShape(arg);
+      OP_REQUIRES_OK(ctx, shape_status.status());
+      xla::Shape arg_shape = shape_status.ConsumeValueOrDie();
+      *arg_shape.mutable_layout() = xla::LayoutUtil::MakeDescendingLayout(
+          xla::ShapeUtil::Rank(arg_shape));
+      arg_shapes.push_back(std::move(arg_shape));
+    }
 
     // Tell XLA to call the custom code, defined in
     // index_ops_kernel_argmax_float_1d.cc.
     xla::XlaOp output;
     switch (input_shape.dims()) {
       case 1:
-        output =
-            xla::CustomCall(&b, "argmax_float_1d_xla_impl", args, xla_shape);
+        output = xla::CustomCallWithLayout(&b, "argmax_float_1d_xla_impl", args,
+                                           xla_shape, arg_shapes);
         break;
       case 2:
-        output =
-            xla::CustomCall(&b, "argmax_float_2d_xla_impl", args, xla_shape);
+        output = xla::CustomCallWithLayout(&b, "argmax_float_2d_xla_impl", args,
+                                           xla_shape, arg_shapes);
         break;
       default:
         OP_REQUIRES(ctx, false,
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 6b31831010..e7cf9ae363 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1279,9 +1279,10 @@ XlaOp XlaBuilder::AfterAll(absl::Span<const XlaOp> tokens) {
   });
 }
 
-XlaOp XlaBuilder::CustomCall(const string& call_target_name,
-                             absl::Span<const XlaOp> operands,
-                             const Shape& shape, const string& opaque) {
+XlaOp XlaBuilder::CustomCall(
+    const string& call_target_name, absl::Span<const XlaOp> operands,
+    const Shape& shape, const string& opaque,
+    absl::optional<absl::Span<const Shape>> operand_shapes_with_layout) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     if (absl::StartsWith(call_target_name, "$")) {
@@ -1293,6 +1294,31 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name,
     *instr.mutable_shape() = shape;
     instr.set_custom_call_target(call_target_name);
     instr.set_custom_call_opaque(opaque);
+    if (operand_shapes_with_layout.has_value()) {
+      if (!LayoutUtil::HasLayout(shape)) {
+        return InvalidArgument(
+            "Result shape must have layout for custom call with constrained "
+            "layout.");
+      }
+      if (operands.size() != operand_shapes_with_layout->size()) {
+        return InvalidArgument(
+            "Must specify a shape with layout for each operand for custom call "
+            "with constrained layout; given %d shapes, expected %d",
+            operand_shapes_with_layout->size(), operands.size());
+      }
+      instr.set_constrain_layout(true);
+      int64 operand_num = 0;
+      for (const Shape& operand_shape : *operand_shapes_with_layout) {
+        if (!LayoutUtil::HasLayout(operand_shape)) {
+          return InvalidArgument(
+              "No layout specified for operand %d for custom call with "
+              "constrained layout.",
+              operand_num);
+        }
+        *instr.add_operand_shapes_with_layout() = operand_shape;
+        ++operand_num;
+      }
+    }
     return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands);
   });
 }
@@ -2690,7 +2716,16 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation,
 XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                  absl::Span<const XlaOp> operands, const Shape& shape,
                  const string& opaque) {
-  return builder->CustomCall(call_target_name, operands, shape, opaque);
+  return builder->CustomCall(call_target_name, operands, shape, opaque,
+                             /*operand_shapes_with_layout=*/absl::nullopt);
+}
+
+XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name,
+                           absl::Span<const XlaOp> operands, const Shape& shape,
+                           absl::Span<const Shape> operand_shapes_with_layout,
+                           const string& opaque) {
+  return builder->CustomCall(call_target_name, operands, shape, opaque,
+                             operand_shapes_with_layout);
 }
 
 XlaOp Complex(const XlaOp& real, const XlaOp& imag,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 2e14e47a35..9ceede7a79 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -577,9 +577,10 @@ class XlaBuilder {
              absl::Span<const XlaOp> operands);
 
   // Enqueues a custom call instruction onto the computation.
-  XlaOp CustomCall(const string& call_target_name,
-                   absl::Span<const XlaOp> operands, const Shape& shape,
-                   const string& opaque);
+  XlaOp CustomCall(
+      const string& call_target_name, absl::Span<const XlaOp> operands,
+      const Shape& shape_with_layout, const string& opaque,
+      absl::optional<absl::Span<const Shape>> operand_shapes_with_layout);
 
   // The following methods enqueue element-wise binary arithmetic operations
   // onto the computation. The shapes of the operands have to match unless one
@@ -1197,6 +1198,10 @@ class XlaBuilder {
   friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                           absl::Span<const XlaOp> operands, const Shape& shape,
                           const string& opaque);
+  friend XlaOp CustomCallWithLayout(
+      XlaBuilder* builder, const string& call_target_name,
+      absl::Span<const XlaOp> operands, const Shape& shape_with_layout,
+      absl::Span<const Shape> operand_shapes_with_layout, const string& opaque);
   friend XlaOp Complex(const XlaOp& real, const XlaOp& imag,
                        absl::Span<const int64> broadcast_dimensions);
   friend XlaOp Conj(const XlaOp& operand);
@@ -1732,6 +1737,17 @@ XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name,
                  absl::Span<const XlaOp> operands, const Shape& shape,
                  const string& opaque = "");
 
+// Overload which constructs a custom call with fixed layouts. The operands will
+// have the layouts specified by |operand_shapes_with_layout| when provided to
+// external code, and the external code is expected to produce a result with the
+// layout specified by |shape_with_layout|. All shapes in |shape_with_layout|
+// and |operand_shapes_with_layout| must have layouts.
+XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name,
+                           absl::Span<const XlaOp> operands,
+                           const Shape& shape_with_layout,
+                           absl::Span<const Shape> operand_shapes_with_layout,
+                           const string& opaque = "");
+
 // The following methods enqueue element-wise binary arithmetic operations
 // onto the computation. The shapes of the operands have to match unless one
 // of the operands is a scalar, or an explicit broadcast dimension is given
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index d310335618..3c8db9aa45 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -65,6 +65,12 @@ void SetDefaultLayoutToContainer(
   return layout;
 }
 
+/* static */ Layout LayoutUtil::MakeDescendingLayout(int64 rank) {
+  std::vector<int64> layout(rank);
+  std::iota(layout.rbegin(), layout.rend(), static_cast<int64>(0));
+  return MakeLayout(layout);
+}
+
 /* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor(
     absl::Span<const int64> major_to_minor) {
   Layout layout;
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index b78883c2d8..af032b1cae 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -40,6 +40,10 @@ class LayoutUtil {
   static Layout MakeLayoutFromMajorToMinor(
       absl::Span<const int64> major_to_minor);
 
+  // Returns a layout with descending ((i.e. {n, n-1, ..., 0}) minor-to-major
+  // dimensions.
+  static Layout MakeDescendingLayout(int64 rank);
+
   // Creates a sparse layout with the given maximum number of elements. (This is
   // a convenience function for protobuf construction.)
   static Layout MakeSparseLayout(int64 max_sparse_elements);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 1ffe855750..8c9a8adc61 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -213,16 +213,6 @@ Status GpuLayoutAssignment::AddBackendConstraints(
   return Status::OK();
 }
 
-bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout(
-    const HloInstruction* instruction) {
-  // - Inputs to cudnn batchnorm custom calls don't need the major-first layout
-  //   (i.e. {n, n-1, ...0}) -- we can handle any layout.
-  // - Inputs to cudnn convolution require custom layouts handled in
-  //   AddBackendConstraints.
-  return !IsCustomCallToDnnBatchNorm(*instruction) &&
-         !IsCustomCallToDnnConvolution(*instruction);
-}
-
 Status GpuLayoutAssignment::PropagateOperandConstraint(
     const OperandLayoutConstraint& layout_constraint,
     LayoutConstraints* constraints) {
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index 4ba7989e9c..6a48e55fd2 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -46,8 +46,6 @@ class GpuLayoutAssignment : public LayoutAssignment {
   Status PropagateBufferConstraint(
       const BufferLayoutConstraint& buffer_constraint,
       LayoutConstraints* constraints) override;
-  bool CustomCallRequiresMajorFirstLayout(
-      const HloInstruction* instruction) override;
 
  private:
   Status AddBackendConstraintsToDnnConvCustomCall(
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 1ea26ddd5b..a0eb9e6ddc 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 56
+// Next ID: 58
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -184,6 +184,13 @@ message HloInstructionProto {
   // Sharding for kDomain instructions.
   xla.OpSharding domain_entry_sharding = 54;
   xla.OpSharding domain_exit_sharding = 55;
+
+  // For custom call this indicates that the layouts are constrained. If
+  // constrain_layout is true then the 'shape' field must contain a layout, and
+  // 'operand_shapes_with_layout' must contain a shape with layout for each
+  // operand.
+  bool constrain_layout = 56;
+  repeated Shape operand_shapes_with_layout = 57;
 }
 
 // Serialization of HloComputation.
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 2f6db7cd7c..5c3908a9a4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -396,9 +396,22 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           operands(1), operands(2), computations(1));
       break;
     case HloOpcode::kCustomCall:
-      instruction = CreateCustomCall(proto.shape(), all_operands(),
-                                     proto.custom_call_target(),
-                                     proto.custom_call_opaque());
+      if (proto.constrain_layout()) {
+        // A proto RepeatedPtrField cannot be converted to a Span (it is a
+        // vector of pointers essentially) so create a vector of shapes to pass
+        // in.
+        std::vector<Shape> operand_shapes;
+        for (const Shape& shape : proto.operand_shapes_with_layout()) {
+          operand_shapes.push_back(shape);
+        }
+        instruction = CreateCustomCall(
+            proto.shape(), all_operands(), proto.custom_call_target(),
+            operand_shapes, proto.custom_call_opaque());
+      } else {
+        instruction = CreateCustomCall(proto.shape(), all_operands(),
+                                       proto.custom_call_target(),
+                                       proto.custom_call_opaque());
+      }
       if (proto.has_window()) {
         static_cast<HloCustomCallInstruction*>(instruction.get())
             ->set_window(proto.window());
@@ -1142,6 +1155,15 @@ bool HloInstruction::HasSideEffect() const {
       shape, operands, custom_call_target, opaque);
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCustomCall(
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
+    absl::string_view custom_call_target,
+    absl::Span<const Shape> operand_shapes_with_layout,
+    absl::string_view opaque) {
+  return absl::make_unique<HloCustomCallInstruction>(
+      shape, operands, custom_call_target, opaque, operand_shapes_with_layout);
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateTuple(
     absl::Span<HloInstruction* const> elements) {
   std::vector<Shape> element_shapes;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 374862c4b6..44f776ebac 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -734,6 +734,16 @@ class HloInstruction {
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       absl::string_view custom_call_target, absl::string_view opaque = "");
 
+  // Overload which constrains the layouts of the operand and result. 'shape'
+  // and 'operand_shapes_with_layout' must have layouts.
+  // 'operand_shapes_with_layout' must have a compatible element for each
+  // operand.
+  static std::unique_ptr<HloInstruction> CreateCustomCall(
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
+      absl::string_view custom_call_target,
+      absl::Span<const Shape> operand_shapes_with_layout,
+      absl::string_view opaque = "");
+
   // Creates a tuple instruction with the given elements. This is a convenience
   // wrapper around CreateVariadic.
   static std::unique_ptr<HloInstruction> CreateTuple(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 152d8eacdb..2ec233eaec 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1825,7 +1825,24 @@ HloCustomCallInstruction::HloCustomCallInstruction(
     : HloInstruction(HloOpcode::kCustomCall, shape),
       custom_call_target_(custom_call_target.begin(), custom_call_target.end()),
       opaque_(opaque.begin(), opaque.end()),
-      feature_group_count_(1) {
+      feature_group_count_(1),
+      layout_constrained_(false) {
+  for (auto operand : operands) {
+    AppendOperand(operand);
+  }
+}
+
+HloCustomCallInstruction::HloCustomCallInstruction(
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
+    absl::string_view custom_call_target, absl::string_view opaque,
+    absl::Span<const Shape> operand_shapes_with_layout)
+    : HloInstruction(HloOpcode::kCustomCall, shape),
+      custom_call_target_(custom_call_target.begin(), custom_call_target.end()),
+      opaque_(opaque.begin(), opaque.end()),
+      feature_group_count_(1),
+      layout_constrained_(true),
+      operand_shapes_with_layout_(operand_shapes_with_layout.begin(),
+                                  operand_shapes_with_layout.end()) {
   for (auto operand : operands) {
     AppendOperand(operand);
   }
@@ -1843,6 +1860,12 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const {
   proto.set_custom_call_target(custom_call_target_);
   proto.set_custom_call_opaque(opaque_);
   proto.set_feature_group_count(feature_group_count_);
+  if (layout_constrained()) {
+    proto.set_constrain_layout(true);
+    for (const Shape& shape : operand_shapes_with_layout_) {
+      *proto.add_operand_shapes_with_layout() = shape;
+    }
+  }
   return proto;
 }
 
@@ -1870,6 +1893,14 @@ std::vector<string> HloCustomCallInstruction::ExtraAttributesToStringImpl(
   if (!opaque_.empty()) {
     extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\""));
   }
+  if (layout_constrained()) {
+    std::vector<string> shape_strings;
+    for (const Shape& shape : operand_shapes_with_layout_) {
+      shape_strings.push_back(ShapeUtil::HumanStringWithLayout(shape));
+    }
+    extra.push_back(StrCat("operand_layout_constraints={",
+                           StrJoin(shape_strings, ", "), "}"));
+  }
   return extra;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index e169604072..4c5fc759a3 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1053,10 +1053,19 @@ class HloSelectAndScatterInstruction : public HloInstruction {
 
 class HloCustomCallInstruction : public HloInstruction {
  public:
-  explicit HloCustomCallInstruction(const Shape& shape,
-                                    absl::Span<HloInstruction* const> operands,
-                                    absl::string_view custom_call_target,
-                                    absl::string_view opaque);
+  HloCustomCallInstruction(const Shape& shape,
+                           absl::Span<HloInstruction* const> operands,
+                           absl::string_view custom_call_target,
+                           absl::string_view opaque);
+
+  // Constructor for a custom call with constrained layout. 'shape' and
+  // 'operands_with_layout' must all have layouts.
+  HloCustomCallInstruction(const Shape& shape,
+                           absl::Span<HloInstruction* const> operands,
+                           absl::string_view custom_call_target,
+                           absl::string_view opaque,
+                           absl::Span<const Shape> operand_shapes_with_layout);
+
   const Window& window() const override {
     CHECK(window_ != nullptr);
     return *window_;
@@ -1085,6 +1094,16 @@ class HloCustomCallInstruction : public HloInstruction {
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
+  // Returns whether the result and operand layouts are constrained.
+  bool layout_constrained() const { return layout_constrained_; }
+
+  // Returns the shapes (with layout) of the operands. CHECKs if this custom
+  // call does not have constrained layouts.
+  const std::vector<Shape>& operand_shapes_with_layout() const {
+    CHECK(layout_constrained());
+    return operand_shapes_with_layout_;
+  }
+
  private:
   std::vector<string> ExtraAttributesToStringImpl(
       const HloPrintOptions& options) const override;
@@ -1106,6 +1125,11 @@ class HloCustomCallInstruction : public HloInstruction {
   std::unique_ptr<ConvolutionDimensionNumbers> convolution_dimension_numbers_;
   // The number of feature groups. This is used for grouped convolutions.
   int64 feature_group_count_;
+  // Whether the result and operand layouts are constrained.
+  bool layout_constrained_;
+  // For layout-constrained custom calls, this vector holds the shape with
+  // layout for each operand.
+  std::vector<Shape> operand_shapes_with_layout_;
 };
 
 class HloPadInstruction : public HloInstruction {
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index dd62988bcc..96f9ff6654 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -174,6 +174,7 @@ class HloParser {
     kDistribution,
     kDomain,
     kPrecisionList,
+    kShapeList
   };
 
   struct AttrConfig {
@@ -240,6 +241,7 @@ class HloParser {
 
   bool ParseSliceRanges(SliceRanges* result);
   bool ParsePrecisionList(std::vector<PrecisionConfig::Precision>* result);
+  bool ParseShapeList(std::vector<Shape>* result);
   bool ParseInt64List(const TokKind start, const TokKind end,
                       const TokKind delim,
                       std::vector<tensorflow::int64>* result);
@@ -1341,6 +1343,7 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
       optional<Window> window;
       optional<ConvolutionDimensionNumbers> dnums;
       optional<int64> feature_group_count;
+      optional<std::vector<Shape>> operand_layout_constraints;
       attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString,
                                      &custom_call_target};
       attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque};
@@ -1349,12 +1352,52 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
                              AttrTy::kConvolutionDimensionNumbers, &dnums};
       attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64,
                                       &feature_group_count};
+      attrs["operand_layout_constraints"] = {
+          /*required=*/false, AttrTy::kShapeList, &operand_layout_constraints};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
-      instruction = builder->AddInstruction(
-          HloInstruction::CreateCustomCall(shape, operands, *custom_call_target,
-                                           opaque.has_value() ? *opaque : ""));
+      if (operand_layout_constraints.has_value()) {
+        if (!LayoutUtil::HasLayout(shape)) {
+          return Error(lexer_.GetLoc(),
+                       "Layout must be set on layout-constrained custom call");
+        }
+        if (operands.size() != operand_layout_constraints->size()) {
+          return Error(lexer_.GetLoc(),
+                       StrCat("Expected ", operands.size(),
+                              " operand layout constraints, ",
+                              operand_layout_constraints->size(), " given"));
+        }
+        for (int64 i = 0; i < operands.size(); ++i) {
+          const Shape& operand_shape_with_layout =
+              (*operand_layout_constraints)[i];
+          if (!LayoutUtil::HasLayout(operand_shape_with_layout)) {
+            return Error(lexer_.GetLoc(),
+                         StrCat("Operand layout constraint shape ",
+                                ShapeUtil::HumanStringWithLayout(
+                                    operand_shape_with_layout),
+                                " for operand ", i, " does not have a layout"));
+          }
+          if (!ShapeUtil::Compatible(operand_shape_with_layout,
+                                     operands[i]->shape())) {
+            return Error(
+                lexer_.GetLoc(),
+                StrCat(
+                    "Operand layout constraint shape ",
+                    ShapeUtil::HumanStringWithLayout(operand_shape_with_layout),
+                    " for operand ", i,
+                    " is not compatible with operand shape ",
+                    ShapeUtil::HumanStringWithLayout(operands[i]->shape())));
+          }
+        }
+        instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+            shape, operands, *custom_call_target, *operand_layout_constraints,
+            opaque.has_value() ? *opaque : ""));
+      } else {
+        instruction = builder->AddInstruction(HloInstruction::CreateCustomCall(
+            shape, operands, *custom_call_target,
+            opaque.has_value() ? *opaque : ""));
+      }
       if (window.has_value()) {
         instruction->set_window(*window);
       }
@@ -2533,6 +2576,15 @@ bool HloParser::ParseAttributeHelper(
             ->emplace(result);
         return true;
       }
+      case AttrTy::kShapeList: {
+        std::vector<Shape> result;
+        if (!ParseShapeList(&result)) {
+          return false;
+        }
+        static_cast<optional<std::vector<Shape>>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
     }
   }();
   if (!success) {
@@ -2825,6 +2877,23 @@ bool HloParser::ParsePrecisionList(
                    parse_and_add_item);
 }
 
+// shapelist ::= '{' shapes '}'
+// precision_elements
+//   ::= /*empty*/
+//   ::= shape (',' shape)*
+bool HloParser::ParseShapeList(std::vector<Shape>* result) {
+  auto parse_and_add_item = [&]() {
+    Shape shape;
+    if (!ParseShape(&shape)) {
+      return false;
+    }
+    result->push_back(std::move(shape));
+    return true;
+  };
+  return ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma,
+                   parse_and_add_item);
+}
+
 // int64list ::= start int64_elements end
 // int64_elements
 //   ::= /*empty*/
@@ -2832,23 +2901,15 @@ bool HloParser::ParsePrecisionList(
 bool HloParser::ParseInt64List(const TokKind start, const TokKind end,
                                const TokKind delim,
                                std::vector<tensorflow::int64>* result) {
-  if (!ParseToken(start, StrCat("expects an int64 list starting with ",
-                                TokKindToString(start)))) {
-    return false;
-  }
-  if (lexer_.GetKind() == end) {
-    // empty
-  } else {
-    do {
-      tensorflow::int64 i;
-      if (!ParseInt64(&i)) {
-        return false;
-      }
-      result->push_back(i);
-    } while (EatIfPresent(delim));
-  }
-  return ParseToken(
-      end, StrCat("expects an int64 list to end with ", TokKindToString(end)));
+  auto parse_and_add_item = [&]() {
+    tensorflow::int64 i;
+    if (!ParseInt64(&i)) {
+      return false;
+    }
+    result->push_back(i);
+    return true;
+  };
+  return ParseList(start, end, delim, parse_and_add_item);
 }
 
 bool HloParser::ParseList(const TokKind start, const TokKind end,
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 255123d331..17538c05bc 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -802,6 +802,43 @@ ENTRY %ConstantUnsignedNoOverflow () -> u64[] {
   ROOT %constant = u64[] constant(9223372036854775807)
 }
 
+)"
+},
+// CustomCallWithLayoutConstraints
+{
+"CustomCallWithLayoutConstraints",
+R"(HloModule CustomCallWithLayoutConstraints
+
+ENTRY %CustomCallWithLayoutConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[123,4]{1,0}}
+}
+
+)"
+},
+// CustomCallWithLayoutConstraintsNoOperands
+{
+"CustomCallWithLayoutConstraintsNoOperands",
+R"(HloModule CustomCallWithLayoutConstraintsNoOperands
+
+ENTRY %CustomCallWithLayoutConstraints () -> f32[1,2,3] {
+  ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(), custom_call_target="baz", operand_layout_constraints={}
+}
+
+)"
+},
+// CustomCallWithLayoutConstraintsTupleShapes
+{
+"CustomCallWithLayoutConstraintsTupleShapes",
+R"(HloModule CustomCallWithLayoutConstraintsTupleShapes
+
+ENTRY %CustomCallWithLayoutConstraints (p0: (f32[2,2], f32[42,2,3]), p1: f32[123,4]) -> (f32[1,2,3], f32[1,2,3]) {
+  %p0 = (f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = (f32[1,2,3]{0,2,1}, f32[1,2,3]{1,2,0}) custom-call((f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={(f32[2,2]{1,0}, f32[42,2,3]{2,0,1}), f32[123,4]{1,0}}
+}
+
 )"
 },
   });
@@ -2069,5 +2106,35 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] {
                              op::Broadcast(), op::Multiply(), op::Add()));
 }
 
+TEST_F(HloParserTest, CustomCallWrongNumberofOperandConstraints) {
+  const string original = R"(HloModule CustomCallWrongNumberofOperandConstraints
+
+ENTRY %CustomCallWrongNumberofOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}}
+}
+
+)";
+  ExpectHasSubstr(ParseHloString(original).status().error_message(),
+                  "Expected 2 operand layout constraints, 1 given");
+}
+
+TEST_F(HloParserTest, CustomCallIncompatibleOperandConstraints) {
+  const string original = R"(HloModule CustomCallIncompatibleOperandConstraints
+
+ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] {
+  %p0 = f32[42,2,3]{0,1,2} parameter(0)
+  %p1 = f32[123,4]{0,1} parameter(1)
+  ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[555,5]{1,0}}
+}
+
+)";
+  ExpectHasSubstr(ParseHloString(original).status().error_message(),
+                  "operand 1 is not compatible with operand shape");
+}
+
+// custom call incompatible shape.
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 496fe1795d..be3bee5975 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -360,7 +360,27 @@ Status ShapeVerifier::HandleCall(HloInstruction* call) {
   return CheckShape(call, call->to_apply()->root_instruction()->shape());
 }
 
-Status ShapeVerifier::HandleCustomCall(HloInstruction*) { return Status::OK(); }
+Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) {
+  const HloCustomCallInstruction* custom_call =
+      DynCast<const HloCustomCallInstruction>(instruction);
+  TF_RET_CHECK(custom_call != nullptr);
+  if (custom_call->layout_constrained()) {
+    // If the layout is constrained, verify all the respective shapes have
+    // layouts and that the constrained operand shapes match the shapes of the
+    // operands.
+    TF_RET_CHECK(LayoutUtil::HasLayout(custom_call->shape()));
+    TF_RET_CHECK(custom_call->operand_count() ==
+                 custom_call->operand_shapes_with_layout().size());
+    for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+      const Shape& operand_shape_with_layout =
+          custom_call->operand_shapes_with_layout()[i];
+      TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(),
+                                         operand_shape_with_layout));
+      TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout));
+    }
+  }
+  return Status::OK();
+}
 
 Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
   return CheckShape(slice,
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index cc4a342e9d..ad65b147c1 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -419,6 +419,16 @@ Status LayoutAssignment::BuildHostChannelConstraints(
   return Status::OK();
 }
 
+namespace {
+
+bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) {
+  const HloCustomCallInstruction* custom_call =
+      DynCast<HloCustomCallInstruction>(instruction);
+  return custom_call != nullptr && custom_call->layout_constrained();
+}
+
+}  // namespace
+
 Status LayoutAssignment::AddMandatoryConstraints(
     const ComputationLayout* computation_layout,
     ChannelLayoutConstraints* channel_constraints, HloComputation* computation,
@@ -434,7 +444,6 @@ Status LayoutAssignment::AddMandatoryConstraints(
   // Constrain layouts of instructions which define values with pre-existing
   // layouts.
   for (auto* instruction : computation->instructions()) {
-    Shape const* shape_with_layout = nullptr;
     if (instruction->opcode() == HloOpcode::kInfeed) {
       // Infeed layouts must match the layout of the original inserted
       // instruction.
@@ -456,17 +465,21 @@ Status LayoutAssignment::AddMandatoryConstraints(
         if (parameter_layout.LayoutIsSet()) {
           // Parameter layouts must match the respective layout in
           // ComputationLayout, if there is one.
-          shape_with_layout = &parameter_layout.shape();
+          TF_RETURN_IF_ERROR(constraints->SetInstructionLayout(
+              parameter_layout.shape(), instruction));
         }
       }
-    }
-    if (shape_with_layout != nullptr) {
+    } else if (IsLayoutConstrainedCustomCall(instruction)) {
+      const HloCustomCallInstruction* custom_call =
+          DynCast<HloCustomCallInstruction>(instruction);
       TF_RETURN_IF_ERROR(
-          constraints->SetInstructionLayout(*shape_with_layout, instruction));
-    }
-
-    if (instruction->opcode() == HloOpcode::kSend ||
-        instruction->opcode() == HloOpcode::kRecv) {
+          constraints->SetInstructionLayout(custom_call->shape(), custom_call));
+      for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+        TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
+            custom_call->operand_shapes_with_layout()[i], custom_call, i));
+      }
+    } else if (instruction->opcode() == HloOpcode::kSend ||
+               instruction->opcode() == HloOpcode::kRecv) {
       CHECK(get_channel_constraints(instruction))
           << "Multi-module layout assignment requires ChannelLayoutConstraints";
       int64 channel_id = instruction->channel_id();
@@ -621,31 +634,6 @@ Status LayoutAssignment::AddMandatoryConstraints(
       TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
           false_computation_layout.parameter_shape(0), instruction, 2,
           /*mandatory=*/true));
-    } else if (instruction->opcode() == HloOpcode::kCustomCall) {
-      if (!CustomCallRequiresMajorFirstLayout(instruction)) {
-        continue;
-      }
-      // Add constraints for kCustomCall instruction operands and instructions.
-      // For now we only support major-first layouts for all inputs and outputs.
-      Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout(
-          instruction->shape().element_type(),
-          AsInt64Slice(instruction->shape().dimensions()));
-      TF_RETURN_IF_ERROR(
-          constraints->SetInstructionLayout(result_shape, instruction));
-      for (int64 i = 0; i < instruction->operand_count(); ++i) {
-        const Shape& operand_shape = instruction->operand(i)->shape();
-        // Opaque operands don't get a layout constraint.
-        if (ShapeUtil::IsOpaque(operand_shape)) {
-          continue;
-        }
-
-        Shape row_major_operand_shape =
-            ShapeUtil::MakeShapeWithDescendingLayout(
-                operand_shape.element_type(),
-                AsInt64Slice(operand_shape.dimensions()));
-        TF_RETURN_IF_ERROR(constraints->SetOperandLayout(
-            row_major_operand_shape, instruction, i));
-      }
     }
   }
   // Finally set the result layout to match ComputationLayout, if there is one.
@@ -676,16 +664,18 @@ Status CheckCallLayout(HloInstruction* call,
   return Status::OK();
 }
 
-// Custom calls have fixed input and output layouts.
-Status CheckCustomCallLayout(HloInstruction* custom_call) {
-  for (const HloInstruction* operand : custom_call->operands()) {
-    TF_RET_CHECK(
-        ShapeUtil::IsOpaque(operand->shape()) ||
-        LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout()));
+// Operands of layout-constrained custom calls must match the expected
+// constrained layouts.
+Status CheckCustomCallLayout(HloInstruction* instruction) {
+  if (IsLayoutConstrainedCustomCall(instruction)) {
+    const HloCustomCallInstruction* custom_call =
+        DynCast<HloCustomCallInstruction>(instruction);
+    for (int64 i = 0; i < custom_call->operand_count(); ++i) {
+      TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
+          custom_call->operand(i)->shape(),
+          custom_call->operand_shapes_with_layout()[i]));
+    }
   }
-  TF_RET_CHECK(
-      ShapeUtil::IsOpaque(custom_call->shape()) ||
-      LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout()));
   return Status::OK();
 }
 
@@ -932,9 +922,7 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
               FindOrDie(computation_layouts_, instruction->to_apply())));
           break;
         case HloOpcode::kCustomCall:
-          if (CustomCallRequiresMajorFirstLayout(instruction)) {
-            TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
-          }
+          TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction));
           break;
         case HloOpcode::kFusion:
           TF_RETURN_IF_ERROR(CheckFusionLayout(instruction));
@@ -1554,11 +1542,11 @@ Status LayoutAssignment::CalculateComputationLayout(
 
 Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
   // Clear existing layouts of the instructions.  All layouts must be assigned
-  // by the LayoutAssignment pass, except for those on infeeds, parameters,
-  // and the computation result. The latter two are specified in
-  // computation_layout, so we only need to keep the existing layouts for
-  // infeeds.  Clearing the layouts here avoids hiding potential bugs in the
-  // layout assignment pass that may accidentally use the existing layout.
+  // by the LayoutAssignment pass, except for those on parameters, the
+  // computation result, and a couple special cases. The former two are
+  // specified in computation_layout.  Clearing the layouts here avoids hiding
+  // potential bugs in the layout assignment pass that may accidentally use the
+  // existing layout.
   for (HloInstruction* instruction : computation->instructions()) {
     if (instruction->opcode() == HloOpcode::kBitcast) {
       // bitcasts are inherently layout sensitive and so a bitcast instruction
@@ -1567,7 +1555,9 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
           "Unexpected bitcast operation seen during layout assignment: %s.",
           instruction->ToString());
     }
-    if (instruction->opcode() != HloOpcode::kInfeed) {
+    // Some instructions carry mandatory layouts in their shape.
+    if (instruction->opcode() != HloOpcode::kInfeed &&
+        !IsLayoutConstrainedCustomCall(instruction)) {
       LayoutUtil::ClearLayout(instruction->mutable_shape());
     }
   }
@@ -1802,6 +1792,18 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   }
   TF_RETURN_IF_ERROR(Init());
 
+  // Verify computation layout is sane.
+  const HloComputation* entry = module->entry_computation();
+  TF_RET_CHECK(entry_computation_layout_->parameter_count() ==
+               entry->num_parameters());
+  for (int64 i = 0; i < entry->num_parameters(); ++i) {
+    TF_RET_CHECK(
+        ShapeUtil::Compatible(entry_computation_layout_->parameter_shape(i),
+                              entry->parameter_instruction(i)->shape()));
+  }
+  TF_RET_CHECK(ShapeUtil::Compatible(entry_computation_layout_->result_shape(),
+                                     entry->root_instruction()->shape()));
+
   // We do two passes. The first one we pass a nullptr ComputationLayout to
   // the RunOnComputation() calls (for non entry computations), and we register
   // the ComputationLayout which are naturally flowing in DFS fashion to the
@@ -1873,7 +1875,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kCrossReplicaSum:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
-    case HloOpcode::kCustomCall:
     case HloOpcode::kDivide:
     case HloOpcode::kDynamicSlice:
     case HloOpcode::kDynamicUpdateSlice:
@@ -1930,6 +1931,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kConstant:
     case HloOpcode::kConvolution:
     case HloOpcode::kCopy:
+    case HloOpcode::kCustomCall:
     case HloOpcode::kDomain:
     case HloOpcode::kDot:
     case HloOpcode::kFusion:
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 2d48e12263..cb56f4cd19 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -333,19 +333,6 @@ class LayoutAssignment : public HloModulePass {
       const ResultLayoutConstraint& layout_constraint,
       LayoutConstraints* constraints);
 
-  // By default LayoutAssignment ensures that inputs and outputs of CustomCalls
-  // have the "major-first" layout (i.e.  {n, n-1, ..., 0}).
-  //
-  // If this function returns true, LayoutAssignment does not set a layout for
-  // the given CustomCall.  It's up to the backend to set one in
-  // AddBackendConstraints, if necessary.
-  //
-  // Precondition: instruction->opcode() == HloOpcode::kCustomCall.
-  virtual bool CustomCallRequiresMajorFirstLayout(
-      const HloInstruction* /*instruction*/) {
-    return true;
-  }
-
   // Called after layouts of an instruction have been finalized to allow
   // subclasses to check for platform specific assumptions.
   virtual Status Verify(const HloInstruction* instruction) {
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 2c549cd872..ff6fdb5e4a 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -65,6 +65,27 @@ class LayoutAssignmentTest : public HloVerifiedTestBase {
         FindInstruction(module, name)->shape().layout().minor_to_major();
     return std::vector<int64>(minor_to_major.begin(), minor_to_major.end());
   }
+
+  void ExpectLayoutIs(const Shape& shape,
+                      absl::Span<const int64> minor_to_major) {
+    const Layout expected = LayoutUtil::MakeLayout(minor_to_major);
+    EXPECT_TRUE(LayoutUtil::Equal(shape.layout(), expected))
+        << "Expected layout " << expected << ", actual " << shape.layout();
+  }
+
+  void ExpectTupleLayoutIs(
+      const Shape& shape,
+      std::initializer_list<absl::Span<const int64>> minor_to_majors) {
+    int i = 0;
+    for (const absl::Span<const int64> minor_to_major : minor_to_majors) {
+      const Layout expected = LayoutUtil::MakeLayout(minor_to_major);
+      const Layout& actual = ShapeUtil::GetTupleElementShape(shape, i).layout();
+      EXPECT_TRUE(LayoutUtil::Equal(actual, expected))
+          << "Expected tuple element " << i << " layout " << expected
+          << ", actual " << actual;
+      ++i;
+    }
+  }
 };
 
 TEST_F(LayoutAssignmentTest, ComputationLayout) {
@@ -1102,5 +1123,174 @@ TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) {
   EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0));
 }
 
+TEST_F(LayoutAssignmentTest, CustomCallNotLayoutConstrained) {
+  const char* module_str = R"(
+HloModule CustomCallNotLayoutConstrained
+
+ENTRY %CustomCallWithNotLayoutConstrained (p: f32[42,2,3]) -> f32[1,2,3,4] {
+  %p = f32[42,2,3] parameter(0)
+  ROOT %custom-call = f32[1,2,3,4] custom-call(f32[42,2,3] %p), custom_call_target="baz"
+}
+)";
+  // Try with a couple different layouts. In each case the custom calls operand
+  // and result layout should match that of the computation.
+  {
+    TF_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<VerifiedHloModule> module,
+        ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+    ComputationLayout computation_layout = module->entry_computation_layout();
+    *computation_layout.mutable_parameter_layout(0) =
+        ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 2, 1}));
+    *computation_layout.mutable_result_layout() = ShapeLayout(
+        ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {3, 2, 0, 1}));
+    AssignLayouts(module.get(), &computation_layout);
+
+    HloInstruction* root = module->entry_computation()->root_instruction();
+    ASSERT_THAT(root, op::CustomCall(op::Parameter()));
+    ExpectLayoutIs(root->shape(), {3, 2, 0, 1});
+    ExpectLayoutIs(root->operand(0)->shape(), {0, 2, 1});
+  }
+  {
+    TF_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<VerifiedHloModule> module,
+        ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+    ComputationLayout computation_layout = module->entry_computation_layout();
+    *computation_layout.mutable_parameter_layout(0) =
+        ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 1, 2}));
+    *computation_layout.mutable_result_layout() = ShapeLayout(
+        ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {0, 2, 3, 1}));
+    AssignLayouts(module.get(), &computation_layout);
+
+    HloInstruction* root = module->entry_computation()->root_instruction();
+    ASSERT_THAT(root, op::CustomCall(op::Parameter()));
+    ExpectLayoutIs(root->shape(), {0, 2, 3, 1});
+    ExpectLayoutIs(root->operand(0)->shape(), {0, 1, 2});
+  }
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrained) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrained
+
+ENTRY %CustomCallWithLayoutConstraints (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] {
+  %p0 = f32[4,4] parameter(0)
+  %p1 = f32[2,3] parameter(1)
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(f32[4,4] %p0, f32[2,3] %p1), custom_call_target="baz", operand_layout_constraints={f32[4,4]{0,1}, f32[2,3]{1,0}}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0}));
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  // The custom call should be partially encapsulated in kCopy instructions
+  // because of the layout mismatches.
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall(op::Copy(), op::Parameter())));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+  ExpectLayoutIs(custom_call->operand(0)->shape(), {0, 1});
+  ExpectLayoutIs(custom_call->operand(1)->shape(), {1, 0});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedZeroOperands) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedZeroOperands
+
+ENTRY %CustomCallLayoutConstrainedZeroOperands () -> f32[1,2,3,4] {
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(), custom_call_target="baz", operand_layout_constraints={}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall()));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleOperand) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedTupleOperand
+
+ENTRY %CustomCallLayoutConstrainedTupleOperand (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] {
+  %p0 = f32[4,4] parameter(0)
+  %p1 = f32[2,3] parameter(1)
+  %tuple = (f32[4,4], f32[2,3]) tuple(%p0, %p1)
+  ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(%tuple), custom_call_target="baz", operand_layout_constraints={(f32[4,4]{1,0}, f32[2,3]{0,1})}
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_parameter_layout(1) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0}));
+  *computation_layout.mutable_result_layout() = ShapeLayout(
+      ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  ExpectLayoutIs(root->shape(), {2, 1, 0, 3});
+
+  ASSERT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::CustomCall(op::Tuple())));
+
+  const HloInstruction* custom_call =
+      module->entry_computation()->root_instruction()->operand(0);
+  ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1});
+  ExpectTupleLayoutIs(custom_call->operand(0)->shape(), {{1, 0}, {0, 1}});
+}
+
+TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleResult) {
+  const char* module_str = R"(
+HloModule CustomCallLayoutConstrainedTupleResult
+
+ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, f32[2,3]{0,1}) {
+  %p0 = f32[4,4] parameter(0)
+  ROOT %custom-call = (f32[4,4]{1,0}, f32[2,3]{0,1}) custom-call(%p0), custom_call_target="baz", operand_layout_constraints={f32[4,4]{1,0}}
+}
+)";
+  // Try with a couple different layouts. In each case the custom calls operand
+  // and result layout should match that of the computation.
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<VerifiedHloModule> module,
+      ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest()));
+  ComputationLayout computation_layout = module->entry_computation_layout();
+  *computation_layout.mutable_parameter_layout(0) =
+      ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}));
+  *computation_layout.mutable_result_layout() =
+      ShapeLayout(ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}),
+           ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})}));
+  AssignLayouts(module.get(), &computation_layout);
+
+  ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(),
+                      {{1, 0}, {1, 0}});
+
+  const HloInstruction* custom_call =
+      FindInstruction(module.get(), "custom-call");
+  ExpectTupleLayoutIs(custom_call->shape(), {{1, 0}, {0, 1}});
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index d244923532..7f0201942b 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -1645,7 +1645,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
 }
 
 std::ostream& operator<<(std::ostream& out, const Shape& shape) {
-  out << ShapeUtil::HumanString(shape);
+  out << ShapeUtil::HumanStringWithLayout(shape);
   return out;
 }
 
diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc
index a693fa3595..001490c6a8 100644
--- a/tensorflow/compiler/xla/tests/custom_call_test.cc
+++ b/tensorflow/compiler/xla/tests/custom_call_test.cc
@@ -105,8 +105,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) {
   LiteralTestUtil::ExpectR0Near<float>(10.0f, result, error_spec_);
 }
 
-XLA_TEST_F(CustomCallTest,
-           DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) {
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) {
   auto module = CreateNewModule();
   auto b = HloComputation::Builder(TestName());
 
@@ -130,6 +129,53 @@ XLA_TEST_F(CustomCallTest,
       Array3D<float>{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result);
 }
 
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) {
+  auto module = CreateNewModule();
+  auto b = HloComputation::Builder(TestName());
+
+  auto input =
+      b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p"));
+  b.AddInstruction(
+      HloInstruction::CreateCustomCall(r2f32_, {input}, "Add1ToValues"));
+
+  module->AddEntryComputation(b.Build());
+  ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0}));
+  ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1}));
+
+  Literal argument = LiteralUtil::CreateR2<float>({{1.f, 2.f}, {3.f, 4.f}});
+
+  // Note, the expected result is transposed! This is because the input and
+  // output layouts of the custom call differ and the called function just
+  // blindly adds one to each element.
+  Literal result = ExecuteAndTransfer(std::move(module), {&argument});
+  LiteralTestUtil::ExpectR2Equal<float>({{2.f, 4.f}, {3.f, 5.f}}, result);
+}
+
+XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) {
+  // The argument and result of the computation are set to different layouts,
+  // but the custom call is layout constrained to a fixed operand and result
+  // layout, so the correct result should be produced.
+  auto module = CreateNewModule();
+  auto b = HloComputation::Builder(TestName());
+
+  auto input =
+      b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p"));
+
+  const Shape& r2f32_dim0_major =
+      ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0});
+  b.AddInstruction(HloInstruction::CreateCustomCall(
+      r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major}));
+
+  module->AddEntryComputation(b.Build());
+  ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0}));
+  ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1}));
+
+  Literal argument = LiteralUtil::CreateR2<float>({{1.f, 2.f}, {3.f, 4.f}});
+
+  Literal result = ExecuteAndTransfer(std::move(module), {&argument});
+  LiteralTestUtil::ExpectR2Equal<float>({{2.f, 3.f}, {4.f, 5.f}}, result);
+}
+
 class CustomCallClientAPITest : public ClientLibraryTestBase {};
 
 // When using the client API, CustomCall targets can't begin with '$' -- these
-- 
GitLab


From af5b714179ff5e279ba27c024f453e2d75636ac9 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 8 Oct 2018 14:43:55 -0700
Subject: [PATCH 0538/1085] Add more logging to the convolution
 transformations.

PiperOrigin-RevId: 216252980
---
 .../xla/service/gpu/cudnn_convolution_algorithm_picker.cc      | 3 +++
 .../compiler/xla/service/gpu/cudnn_convolution_rewriter.cc     | 3 +++
 .../xla/service/gpu/cudnn_fused_convolution_rewriter.cc        | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index 590c0a7d54..6d4a72038f 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -360,6 +360,9 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
   HloInstruction* new_call = computation->AddInstruction(
       instr->CloneWithNewOperands(new_call_shape, instr->operands()));
 
+  VLOG(1) << "Replacing convolution " << instr->ToString() << " with "
+          << new_call->ToString();
+
   TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config));
 
   // Repackage new_call so it has the same shape as the original call, namely
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
index ef29237301..437d25727e 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
@@ -525,6 +525,9 @@ StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
   TF_RETURN_IF_ERROR(
       custom_call->set_backend_config(GetDefaultBackendConfig()));
 
+  VLOG(1) << "Replacing convolution " << conv->ToString() << " with "
+          << custom_call->ToString();
+
   // The CustomCall returns a tuple (conv_result, scratch_memory).  Extract out
   // the conv result and replace `conv` with it.
   TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction(
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
index 3761c19cfc..d508cbc2e1 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
@@ -234,7 +234,8 @@ StatusOr<std::unique_ptr<HloInstruction>> TryRewriteToCudnnForwardRelu(
   config.set_side_input_scale(alpha_side_input);
   TF_RETURN_IF_ERROR(new_conv->set_backend_config(config));
 
-  VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name();
+  VLOG(1) << "Replacing convolution " << conv->ToString() << " with "
+          << new_conv->ToString();
   return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0),
                                                new_conv, 0);
 }
-- 
GitLab


From b3bd7b378d00190fef831092836a5df62e39e7ed Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 8 Oct 2018 14:44:37 -0700
Subject: [PATCH 0539/1085] Ignore args and kwargs for defun's get_concrete_fn
 if `PolymorphicFunction` was created with an input_signature.

PiperOrigin-RevId: 216253122
---
 tensorflow/python/eager/function.py      | 14 ++++++++++++++
 tensorflow/python/eager/function_test.py |  9 ++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 99bf375ea7..ff138cad1e 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -664,6 +664,11 @@ class Function(object):
 
     return self._build_call_outputs(outputs)
 
+  @property
+  def name(self):
+    """Function name."""
+    return self._inference_function.name
+
   @property
   def graph(self):
     """Returns the graph from which this function was constructed."""
@@ -721,6 +726,10 @@ class Function(object):
     return nest.map_structure(lambda x: x.dtype if x is not None else None,
                               self._func_graph.structured_outputs)
 
+  def add_to_graph(self, g):
+    """Adds this function into the graph g."""
+    return self._inference_function.add_to_graph(g)
+
   def _construct_backprop_function(self):
     """Constructs the backprop function object for this function."""
     backwards_graph = FuncGraph(_backward_name(self._func_graph.name))
@@ -1133,6 +1142,8 @@ class PolymorphicFunction(object):
       *args: inputs to specialize on.
       **kwargs: inputs to specialize on.
     """
+    if self._input_signature:
+      args, kwargs = None, None
     graph_function, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
@@ -1322,6 +1333,9 @@ def register(func, *args, **kwargs):
   function definition into graph. Register function with different input param
   will result into multiple version of functions registered in graph.
 
+  Also, `args` and `kwargs` are ignored if this `PolymorphicFunction` was
+  created with an `input_signature`.
+
   Args:
     func: the PolymorphicFunction instance that generated by a @defun
     *args: input arguments for the Python function.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index e46bde098b..953f4300cf 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1841,11 +1841,10 @@ class FunctionTest(test.TestCase):
         # pylint: disable=protected-access
         self.assertEqual(len(graph._functions), 3)
 
-        # Test input param shape mismatch
-        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        with self.assertRaisesRegexp(
-            ValueError, 'Python inputs incompatible with input_signature'):
-          function.register(defun_matmul, t2, t2)
+        # Test register function with cache, note inputs are ignored.
+        function.register(defun_matmul)
+        graph = ops.get_default_graph()
+        self.assertEqual(len(graph._functions), 3)
 
   def testRegisterFunctionWithCache(self):
     def matmul(x, y):
-- 
GitLab


From 4e6045b06ca1d80e7c86a92ebbe839d849d5ae4a Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 19 Sep 2018 13:25:49 -0700
Subject: [PATCH 0540/1085] Add MatchingFilesDatasetOp

---
 tensorflow/core/kernels/data/BUILD            |  15 +
 .../kernels/data/matching_files_dataset_op.cc | 330 ++++++++++++++++++
 tensorflow/core/ops/dataset_ops.cc            |  12 +
 tensorflow/python/data/kernel_tests/BUILD     |  22 ++
 .../matching_files_dataset_op_test.py         | 240 +++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     |  37 +-
 6 files changed, 650 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/kernels/data/matching_files_dataset_op.cc
 create mode 100644 tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 37c1c54786..23ddf32be7 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -750,6 +750,7 @@ tf_kernel_library(
         ":map_and_batch_dataset_op",
         ":map_dataset_op",
         ":map_defun_op",
+        ":matching_files_dataset_op",
         ":model_dataset_op",
         ":multi_device_iterator_ops",
         ":optimize_dataset_op",
@@ -808,3 +809,17 @@ tf_kernel_library(
         "//tensorflow/core:lib_internal",
     ],
 )
+
+
+tf_kernel_library(
+    name = "matching_files_dataset_op",
+    srcs = ["matching_files_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
new file mode 100644
index 0000000000..b43517d0f5
--- /dev/null
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -0,0 +1,330 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <queue>
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/io/buffered_inputstream.h"
+#include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/lib/io/random_inputstream.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+
+namespace tensorflow {
+namespace data {
+
+namespace {
+
+constexpr int kNumThreads = 8;
+
+// Run a function in parallel using a ThreadPool, but skip the ThreadPool
+// on the iOS platform due to its problems with more than a few threads.
+void ForEach(int first, int last, const std::function<void(int)>& f) {
+#if TARGET_OS_IPHONE
+  for (int i = first; i < last; i++) {
+    f(i);
+  }
+#else
+  int num_threads = std::min(kNumThreads, last - first);
+  thread::ThreadPool threads(Env::Default(), "ForEach", num_threads);
+  for (int i = first; i < last; i++) {
+    threads.Schedule([f, i] { f(i); });
+  }
+#endif
+}
+
+}  // namespace
+
+namespace {
+
+class MatchingFilesDatasetOp : public DatasetOpKernel {
+ public:
+  using DatasetOpKernel::DatasetOpKernel;
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
+    const Tensor* patterns_t;
+    // NOTE(originally from ringwalt): Changing the input name "pattern" to
+    // "patterns" would break existing graphs.
+    OP_REQUIRES_OK(ctx, ctx->input("pattern", &patterns_t));
+    OP_REQUIRES(
+        ctx,
+        TensorShapeUtils::IsScalar(patterns_t->shape()) ||
+            TensorShapeUtils::IsVector(patterns_t->shape()),
+        errors::InvalidArgument(
+            "Input patterns tensor must be scalar or vector, but had shape: ",
+            patterns_t->shape().DebugString()));
+    const auto patterns = patterns_t->flat<string>();
+    size_t num_patterns = static_cast<size_t >(patterns.size());
+    std::vector<string> pattern_strs;
+    pattern_strs.reserve(num_patterns);
+
+    for (int i = 0; i < num_patterns; ++i) {
+      pattern_strs.push_back(patterns(i));
+    }
+
+    // keep the elements in the descending order
+    std::sort(pattern_strs.begin(), pattern_strs.end(), std::greater<string>());
+    *output = new Dataset(ctx, std::move(pattern_strs));
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, std::vector<string> patterns)
+        : DatasetBase(DatasetContext(ctx)),
+          pattern_(std::move(patterns)) {}
+
+    std::unique_ptr<IteratorBase> MakeIteratorInternal(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::FileName")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      static DataTypeVector* dtypes = new DataTypeVector({DT_STRING});
+      return *dtypes;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      static std::vector<PartialTensorShape>* shapes =
+          new std::vector<PartialTensorShape>({{}});
+      return *shapes;
+    }
+
+    string DebugString() const override {
+      return "MatchingFilesDatasetOp::Dataset";
+    }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* pattern = nullptr;
+      TF_RETURN_IF_ERROR(b->AddVector(pattern_, &pattern));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {pattern}, output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params) {}
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        Status ret;
+
+        while (!filepath_queue_.empty() ||
+            current_pattern_index_ < dataset()->pattern_.size()) {
+          // all the elements in the heap will be the matched file name or the
+          // potential directory
+          if (!filepath_queue_.empty()) {
+            string cur_file = filepath_queue_.top();
+            filepath_queue_.pop();
+
+            // we can also use isDectory() here. But IsDirectory call can be
+            // expensive for some FS
+            if (ctx->env()->MatchPath(cur_file, current_pattern_)){
+              Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
+              filepath_tensor.scalar<string>()() = cur_file;
+              out_tensors->emplace_back(std::move(filepath_tensor));
+              *end_of_sequence = false;
+              return Status::OK();
+            }
+
+            // in this case, cur_file is a directory. Then create a sub-pattern
+            // to continue the search
+            size_t pos = current_pattern_.find_first_of("*?[\\");
+            size_t len = current_pattern_.size() - pos;
+            string cur_pattern_suffix = current_pattern_.substr(pos, len);
+            string sub_pattern = strings::StrCat(cur_file,
+                                                 "/",
+                                                 cur_pattern_suffix);
+            Status s = UpdateIterator(ctx->env(), sub_pattern);
+            ret.Update(s);
+          } else {
+            // search a new pattern
+            current_pattern_ = dataset()->pattern_[current_pattern_index_];
+            Status s = UpdateIterator(ctx->env(), current_pattern_);
+            ret.Update(s);
+            ++current_pattern_index_;
+          }
+        }
+
+        *end_of_sequence = true;
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            full_name("current_pattern_index"),
+            current_pattern_index_));
+
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            full_name("current_pattern"),
+            current_pattern_));
+
+        if (!filepath_queue_.empty()) {
+          TF_RETURN_IF_ERROR(writer->WriteScalar(
+              full_name("queue_size"), filepath_queue_.size()));
+          for (int i = 0; i < filepath_queue_.size(); ++i) {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                full_name(strings::StrCat("queue_element_", i)),
+                filepath_queue_.top()));
+            filepath_queue_.pop();
+          }
+        }
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(mu_);
+        int64 current_pattern_index;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern_index"),
+                                              &current_pattern_index));
+        current_pattern_index_ = size_t(current_pattern_index);
+
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern"),
+                                              &current_pattern_));
+
+        int64 queue_size;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("queue_size"),
+                                              &queue_size));
+        for (int i = static_cast<int>(queue_size - 1); i >= 0; --i) {
+          string element;
+          TF_RETURN_IF_ERROR(reader->ReadScalar(
+              full_name(strings::StrCat("queue_element_", i)), &element));
+          filepath_queue_.push(element);
+        }
+        return Status::OK();
+      }
+
+     private:
+      Status UpdateIterator(Env *env, const string &pattern)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        string fixed_prefix = pattern.substr(0, pattern.find_first_of("*?[\\"));
+        string eval_pattern = pattern;
+        string dir(io::Dirname(fixed_prefix));
+
+        // If dir is empty then we need to fix up fixed_prefix and eval_pattern to
+        // include . as the top level directory.
+        if (dir.empty()) {
+          dir = ".";
+          fixed_prefix = io::JoinPath(dir, fixed_prefix);
+          eval_pattern = io::JoinPath(dir, pattern);
+        }
+
+        FileSystem* fs;
+        TF_RETURN_IF_ERROR(env->GetFileSystemForFile(dir, &fs));
+
+        filepath_queue_.push(dir);
+        Status ret;  //Status to return
+        // children_dir_status holds is_dir status for children. It can have three
+        // possible values: OK for true; FAILED_PRECONDITION for false; CANCELLED
+        // if we don't calculate IsDirectory (we might do that because there isn't
+        // any point in exploring that child path).
+
+        // DFS to find the first element in the iterator
+        while (!filepath_queue_.empty()) {
+          string cur_dir = filepath_queue_.top();
+          filepath_queue_.pop();
+          std::vector<string> children;
+          Status s = fs->GetChildren(cur_dir, &children);
+          ret.Update(s);
+
+          // if cur_dir has no children, there will two possible situations: 1)
+          // the cur_dir is an empty dir; 2) the cur_dir is actual a file
+          // instead of a director. For the first one, continue to search the
+          // heap; For the second one, if the file matches the pattern, add
+          // it to the heap and finish the search; otherwise, continue the next
+          // search
+          if (children.empty()) {
+            if (env->MatchPath(cur_dir, current_pattern_)) {
+              filepath_queue_.push(cur_dir);
+              return ret;
+            } else {
+              continue;
+            }
+          }
+
+          std::map<string, Status> children_dir_status;
+          // This IsDirectory call can be expensive for some FS. Parallelizing it.
+          ForEach(0, children.size(),
+                  [fs, &cur_dir, &children, &fixed_prefix,
+                      &children_dir_status] (int i) {
+                    const string child_path = io::JoinPath(cur_dir, children[i]);
+                    // In case the child_path doesn't start with the fixed_prefix then
+                    // we don't need to explore this path.
+                    if (!str_util::StartsWith(child_path, fixed_prefix)) {
+                      children_dir_status[child_path] =
+                          Status(tensorflow::error::CANCELLED,
+                                 "Operation not needed");
+                    } else {
+                      children_dir_status[child_path] = fs->IsDirectory(child_path);
+                    }
+                  });
+
+          for (const auto &child : children) {
+            const string child_dir_path = io::JoinPath(cur_dir, child);
+            const Status child_dir_status = children_dir_status[child];
+            // If the IsDirectory call was cancelled we bail.
+            if (child_dir_status.code() == tensorflow::error::CANCELLED) {
+              continue;
+            }
+
+            if (child_dir_status.ok()) {
+              //push the child dir for next search
+              filepath_queue_.push(child_dir_path);
+            } else {
+              // this case will be a file; if the file match the pattern, push
+              // it to the heap; otherwise, ignore it
+              if (env->MatchPath(child_dir_path, eval_pattern)) {
+                filepath_queue_.push(child_dir_path);
+              }
+            }
+          }
+        }
+        return ret;
+      }
+
+      mutex mu_;
+      //std::unique_ptr<std::priority_queue<string>> filepath_queue_ GUARDED_BY(mu_);
+      std::priority_queue<string> filepath_queue_ GUARDED_BY(mu_); // = new std::priority_queue<string>;
+      size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
+      string current_pattern_ GUARDED_BY(mu_);
+    };
+
+    const std::vector<string> pattern_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("MatchingFilesDataset").Device(DEVICE_CPU),
+                        MatchingFilesDatasetOp);
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
\ No newline at end of file
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index ec22eee874..26c2756836 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -619,6 +619,18 @@ REGISTER_OP("TextLineDataset")
       return shape_inference::ScalarShape(c);
     });
 
+REGISTER_OP("MatchingFilesDataset")
+    .Input("pattern: string")
+    .Output("handle: variant")
+    .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
+        // stateful to inhibit constant folding.
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle unused;
+      // `patterns` must be a scalar or a vector.
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
+      return shape_inference::ScalarShape(c);
+    });
+
 REGISTER_OP("SqlDataset")
     .Input("driver_name: string")
     .Input("data_source_name: string")
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index ecb24103b3..89b5fde727 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -331,6 +331,28 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "matching_files_dataset_op_test",
+    size = "small",
+    srcs = ["matching_files_dataset_op_test.py"],
+    additional_deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:io_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:iterator_ops",
+        "//tensorflow/python/data/ops:readers",
+    ],
+)
+
 tf_py_test(
     name = "prefetch_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
new file mode 100644
index 0000000000..ec2f165364
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -0,0 +1,240 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os import path
+import shutil
+import tempfile
+
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.data.ops.dataset_ops import MatchingFilesDataset
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops.gen_io_ops import matching_files
+from tensorflow.python.framework import errors
+
+import os
+import time
+from functools import partial
+
+
+try:
+  import psutil  # pylint: disable=g-import-not-at-top
+
+  psutil_import_succeeded = True
+except ImportError:
+  psutil_import_succeeded = False
+
+
+def timeit(fn, msg, N=0):
+  start = time.time()
+  res = fn()
+  end = time.time()
+  runtime = (end - start) * 1000
+  msg = '{}: time: {:.2f} ms'.format(msg, runtime)
+  if N:
+    msg += ' ({:.2f} ms per iteration)'.format(runtime / N)
+  print(msg)
+  return res
+
+
+width = 10
+depth = 2
+
+
+class MatchingFilesDatasetTest(test.TestCase):
+
+  def setUp(self):
+    self.tmp_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self.tmp_dir, ignore_errors=True)
+
+  def _touchTempFiles(self, filenames):
+    for filename in filenames:
+      open(path.join(self.tmp_dir, filename), 'a').close()
+
+  def testEmptyDirectory(self):
+    dataset = MatchingFilesDataset(path.join(self.tmp_dir, '*'))
+    with self.cached_session() as sess:
+      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
+      init_op = itr.make_initializer(dataset)
+      next_element = itr.get_next()
+      sess.run(init_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+  def testSimpleDirectory(self):
+    filenames = ['a', 'b', 'c']
+    self._touchTempFiles(filenames)
+
+    dataset = MatchingFilesDataset(path.join(self.tmp_dir, '*'))
+    with self.cached_session() as sess:
+      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
+      init_op = itr.make_initializer(dataset)
+      next_element = itr.get_next()
+      sess.run(init_op)
+
+      full_filenames = []
+      produced_filenames = []
+      for filename in filenames:
+        full_filenames.append(
+          compat.as_bytes(path.join(self.tmp_dir, filename)))
+        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
+      self.assertItemsEqual(full_filenames, produced_filenames)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(itr.get_next())
+
+  def testSimpleDirectoryInitializer(self):
+    filenames = ['a', 'b', 'c']
+    self._touchTempFiles(filenames)
+
+    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
+    dataset = MatchingFilesDataset(filename_placeholder)
+
+    with self.cached_session() as sess:
+      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
+      init_op = itr.make_initializer(dataset)
+      next_element = itr.get_next()
+      sess.run(
+        init_op,
+        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')})
+
+      full_filenames = []
+      produced_filenames = []
+      for filename in filenames:
+        full_filenames.append(
+          compat.as_bytes(path.join(self.tmp_dir, filename)))
+        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
+
+      self.assertItemsEqual(full_filenames, produced_filenames)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(itr.get_next())
+
+  def testFileSuffixes(self):
+    filenames = ['a.txt', 'b.py', 'c.py', 'd.pyc']
+    self._touchTempFiles(filenames)
+
+    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
+    dataset = MatchingFilesDataset(filename_placeholder)
+
+    with self.cached_session() as sess:
+      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
+      init_op = itr.make_initializer(dataset)
+      next_element = itr.get_next()
+      sess.run(
+        init_op,
+        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py')})
+
+      full_filenames = []
+      produced_filenames = []
+      for filename in filenames[1:-1]:
+        full_filenames.append(
+          compat.as_bytes(path.join(self.tmp_dir, filename)))
+        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
+      self.assertItemsEqual(full_filenames, produced_filenames)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(itr.get_next())
+
+  def testFileMiddles(self):
+    filenames = ['a.txt', 'b.py', 'c.pyc']
+    self._touchTempFiles(filenames)
+
+    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
+    dataset = MatchingFilesDataset(filename_placeholder)
+
+    with self.cached_session() as sess:
+      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
+      init_op = itr.make_initializer(dataset)
+      next_element = itr.get_next()
+      sess.run(
+        init_op,
+        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py*')})
+
+      full_filenames = []
+      produced_filenames = []
+      for filename in filenames[1:]:
+        full_filenames.append(
+          compat.as_bytes(path.join(self.tmp_dir, filename)))
+        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
+
+      self.assertItemsEqual(full_filenames, produced_filenames)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(itr.get_next())
+
+  def _load_data(self):
+    new_files = []
+    dir = "/tmp/test/"
+    if not os.path.exists(dir):
+      os.makedirs(dir)
+    base = tempfile.mkdtemp(prefix=dir)
+    print('saving files to dir: {}'.format(base))
+    for i in range(width):
+      new_base = os.path.join(base, str(i), *[str(j) for j in range(depth - 1)])
+      if not os.path.exists(new_base):
+        os.makedirs(new_base)
+      f = os.path.join(new_base, 'stuff.txt')
+      new_files.append(compat.as_bytes(f))
+      open(f, 'w').close()
+    return base, new_files
+
+  def _read_data(self, data, sess, N=1):
+    for _ in range(N):
+      sess.run(data)
+
+  def _read_data_with_result(self, data, sess, N=1):
+    result = []
+    for _ in range(N):
+      result.append(sess.run(data))
+    return result
+
+  def testPerformance(self):
+    base, test_filenames = self._load_data()
+    test_filenames.sort(reverse=True)
+    patterns = array_ops.placeholder(dtypes.string, shape=[None])
+    dataset = MatchingFilesDataset(patterns)
+    iterator = iterator_ops.Iterator.from_structure(dataset.output_types)
+    init_op = iterator.make_initializer(dataset)
+    get_next = iterator.get_next()
+    result = []
+    with self.cached_session() as sess:
+      search_patterns = [base + "/*/*/*.txt"]
+      sess.run(init_op, feed_dict={patterns: search_patterns})
+      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
+        "read first filename"))
+      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
+        "read second filename"))
+      N = width * len(search_patterns) - 2
+      filename = timeit(partial(self._read_data_with_result, get_next, sess, N),
+        'read {} more filenames'.format(N), N)
+      result.extend(filename)
+
+    matched_filenames = [compat.as_bytes(x) for x in result]
+    for file in matched_filenames:
+      print(file)
+    self.assertItemsEqual(matched_filenames, test_filenames)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2d036fd0d6..5150c7fb9a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2624,7 +2624,7 @@ class MapDataset(UnaryDataset):
     self._use_inter_op_parallelism = use_inter_op_parallelism
 
     wrapped_func = StructuredFunctionWrapper(
-        map_func, "Dataset.map()", input_dataset)
+      map_func, "Dataset.map()", input_dataset)
     self._output_classes = wrapped_func.output_classes
     self._output_shapes = wrapped_func.output_shapes
     self._output_types = wrapped_func.output_types
@@ -2633,11 +2633,11 @@ class MapDataset(UnaryDataset):
   def _as_variant_tensor(self):
     input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
     return gen_dataset_ops.map_dataset(
-        input_t,
-        self._map_func.captured_inputs,
-        f=self._map_func,
-        use_inter_op_parallelism=self._use_inter_op_parallelism,
-        **flat_structure(self))
+      input_t,
+      self._map_func.captured_inputs,
+      f=self._map_func,
+      use_inter_op_parallelism=self._use_inter_op_parallelism,
+      **flat_structure(self))
 
   @property
   def output_classes(self):
@@ -2652,6 +2652,31 @@ class MapDataset(UnaryDataset):
     return self._output_types
 
 
+class MatchingFilesDataset(Dataset):
+  """A `Dataset` that list the files according to the input patterns"""
+
+  def __init__(self, pattern):
+    super(MatchingFilesDataset, self).__init__()
+    self._pattern = ops.convert_to_tensor(
+      pattern, dtype=dtypes.string, name="pattern")
+
+
+  def _as_variant_tensor(self):
+    return gen_dataset_ops.matching_files_dataset(self._pattern)
+
+  @property
+  def output_classes(self):
+    return ops.Tensor
+
+  @property
+  def output_shapes(self):
+    return tensor_shape.scalar()
+
+  @property
+  def output_types(self):
+    return dtypes.string
+
+
 class ParallelMapDataset(MapDataset):
   """A `Dataset` that maps a function over elements in its input in parallel."""
 
-- 
GitLab


From a74b6598e919e06221bd793c2031182250cdcdff Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 19 Sep 2018 14:15:01 -0700
Subject: [PATCH 0541/1085] Update the performance test case

---
 .../kernel_tests/matching_files_dataset_op_test.py     | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index ec2f165364..73e5d3e4d1 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -56,8 +56,8 @@ def timeit(fn, msg, N=0):
   return res
 
 
-width = 10
-depth = 2
+width = 1000
+depth = 20
 
 
 class MatchingFilesDatasetTest(test.TestCase):
@@ -219,7 +219,9 @@ class MatchingFilesDatasetTest(test.TestCase):
     get_next = iterator.get_next()
     result = []
     with self.cached_session() as sess:
-      search_patterns = [base + "/*/*/*.txt"]
+      pattern = '{}/{}/*.txt'\
+        .format(base, os.path.join(*['**' for _ in range(depth)]))
+      search_patterns = [pattern]
       sess.run(init_op, feed_dict={patterns: search_patterns})
       result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
         "read first filename"))
@@ -231,8 +233,6 @@ class MatchingFilesDatasetTest(test.TestCase):
       result.extend(filename)
 
     matched_filenames = [compat.as_bytes(x) for x in result]
-    for file in matched_filenames:
-      print(file)
     self.assertItemsEqual(matched_filenames, test_filenames)
 
 
-- 
GitLab


From 4259ac37aa126ce1bdd9c92e9b3b1434a2dfc2c4 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 20 Sep 2018 16:07:07 -0700
Subject: [PATCH 0542/1085] Resovle conflicts in BUILD file

---
 tensorflow/core/kernels/data/BUILD | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 23ddf32be7..402ebd9e2b 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -703,6 +703,18 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "matching_files_dataset_op",
+    srcs = ["matching_files_dataset_op.cc"],
+    deps = [
+        ":dataset",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
 tf_kernel_library(
     name = "model_dataset_op",
     srcs = ["model_dataset_op.cc"],
@@ -808,18 +820,4 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
     ],
-)
-
-
-tf_kernel_library(
-    name = "matching_files_dataset_op",
-    srcs = ["matching_files_dataset_op.cc"],
-    deps = [
-        ":dataset",
-        "//tensorflow/core:dataset_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-    ],
-)
-
+)
\ No newline at end of file
-- 
GitLab


From 058cf57db6cd584b4c30e2f8ce80e61dc6190e7d Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 21 Sep 2018 21:57:51 -0700
Subject: [PATCH 0543/1085] Add a test case for shuffling

---
 .../matching_files_dataset_op_test.py         | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 73e5d3e4d1..a098ca6cb0 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -233,6 +233,33 @@ class MatchingFilesDatasetTest(test.TestCase):
       result.extend(filename)
 
     matched_filenames = [compat.as_bytes(x) for x in result]
+    self.assertEqual(matched_filenames, test_filenames)
+
+  def testShuffle(self):
+    self.maxDiff = None
+    base, test_filenames = self._load_data()
+    test_filenames.sort(reverse=True)
+    patterns = array_ops.placeholder(dtypes.string, shape=[None])
+    dataset = MatchingFilesDataset(patterns)
+    dataset = dataset.shuffle(buffer_size=10, reshuffle_each_iteration=False)
+    iterator = iterator_ops.Iterator.from_structure(dataset.output_types)
+    init_op = iterator.make_initializer(dataset)
+    get_next = iterator.get_next()
+    result = []
+    with self.cached_session() as sess:
+      pattern = '{}/{}/*.txt' \
+        .format(base, os.path.join(*['**' for _ in range(depth)]))
+      search_patterns = [pattern]
+      sess.run(init_op, feed_dict={patterns: search_patterns})
+      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
+        "read first filename"))
+      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
+        "read second filename"))
+      N = width * len(search_patterns) - 2
+      filename = timeit(partial(self._read_data_with_result, get_next, sess, N),
+        'read {} more filenames'.format(N), N)
+      result.extend(filename)
+    matched_filenames = [compat.as_bytes(x) for x in result]
     self.assertItemsEqual(matched_filenames, test_filenames)
 
 
-- 
GitLab


From 7496aa779843a94f1ad9868d69ec4adaea84295b Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 08:06:06 -0700
Subject: [PATCH 0544/1085] Update the iterator construction and add the test
 for nested directories and a benchmark test

---
 tensorflow/python/data/kernel_tests/BUILD     |   1 +
 .../matching_files_dataset_op_test.py         | 325 ++++++++----------
 2 files changed, 143 insertions(+), 183 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 89b5fde727..863d848c98 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -336,6 +336,7 @@ tf_py_test(
     size = "small",
     srcs = ["matching_files_dataset_op_test.py"],
     additional_deps = [
+        "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index a098ca6cb0..4a62a31144 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,47 +17,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from os import path
+import os
 import shutil
 import tempfile
+import time
+
+import numpy as np
 
-from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.client import session
 from tensorflow.python.data.ops.dataset_ops import MatchingFilesDataset
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.ops.gen_io_ops import matching_files
-from tensorflow.python.framework import errors
-
-import os
-import time
-from functools import partial
-
-
-try:
-  import psutil  # pylint: disable=g-import-not-at-top
-
-  psutil_import_succeeded = True
-except ImportError:
-  psutil_import_succeeded = False
-
-
-def timeit(fn, msg, N=0):
-  start = time.time()
-  res = fn()
-  end = time.time()
-  runtime = (end - start) * 1000
-  msg = '{}: time: {:.2f} ms'.format(msg, runtime)
-  if N:
-    msg += ' ({:.2f} ms per iteration)'.format(runtime / N)
-  print(msg)
-  return res
-
-
-width = 1000
-depth = 20
 
 
 class MatchingFilesDatasetTest(test.TestCase):
@@ -70,15 +42,12 @@ class MatchingFilesDatasetTest(test.TestCase):
 
   def _touchTempFiles(self, filenames):
     for filename in filenames:
-      open(path.join(self.tmp_dir, filename), 'a').close()
+      open(os.path.join(self.tmp_dir, filename), 'a').close()
 
   def testEmptyDirectory(self):
-    dataset = MatchingFilesDataset(path.join(self.tmp_dir, '*'))
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
-      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
-      init_op = itr.make_initializer(dataset)
-      next_element = itr.get_next()
-      sess.run(init_op)
+      next_element = dataset.make_one_shot_iterator().get_next()
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
@@ -86,181 +55,171 @@ class MatchingFilesDatasetTest(test.TestCase):
     filenames = ['a', 'b', 'c']
     self._touchTempFiles(filenames)
 
-    dataset = MatchingFilesDataset(path.join(self.tmp_dir, '*'))
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
-      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
-      init_op = itr.make_initializer(dataset)
-      next_element = itr.get_next()
-      sess.run(init_op)
+      next_element = dataset.make_one_shot_iterator().get_next()
 
-      full_filenames = []
-      produced_filenames = []
+      expected_filenames = []
+      actual_filenames = []
       for filename in filenames:
-        full_filenames.append(
-          compat.as_bytes(path.join(self.tmp_dir, filename)))
-        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
-      self.assertItemsEqual(full_filenames, produced_filenames)
+        expected_filenames.append(
+          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
+
+      self.assertItemsEqual(expected_filenames, actual_filenames)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(itr.get_next())
+        sess.run(next_element)
 
   def testSimpleDirectoryInitializer(self):
     filenames = ['a', 'b', 'c']
     self._touchTempFiles(filenames)
 
-    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
-    dataset = MatchingFilesDataset(filename_placeholder)
-
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
-      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
-      init_op = itr.make_initializer(dataset)
-      next_element = itr.get_next()
-      sess.run(
-        init_op,
-        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')})
-
-      full_filenames = []
-      produced_filenames = []
+      next_element = dataset.make_one_shot_iterator().get_next()
+      expected_filenames = []
+      actual_filenames = []
       for filename in filenames:
-        full_filenames.append(
-          compat.as_bytes(path.join(self.tmp_dir, filename)))
-        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(full_filenames, produced_filenames)
+        expected_filenames.append(
+          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
+      self.assertItemsEqual(expected_filenames, actual_filenames)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(itr.get_next())
+        sess.run(next_element)
 
   def testFileSuffixes(self):
     filenames = ['a.txt', 'b.py', 'c.py', 'd.pyc']
     self._touchTempFiles(filenames)
 
-    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
-    dataset = MatchingFilesDataset(filename_placeholder)
-
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*.py'))
     with self.cached_session() as sess:
-      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
-      init_op = itr.make_initializer(dataset)
-      next_element = itr.get_next()
-      sess.run(
-        init_op,
-        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py')})
-
-      full_filenames = []
-      produced_filenames = []
+      next_element = dataset.make_one_shot_iterator().get_next()
+      expected_filenames = []
+      actual_filenames = []
       for filename in filenames[1:-1]:
-        full_filenames.append(
-          compat.as_bytes(path.join(self.tmp_dir, filename)))
-        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
-      self.assertItemsEqual(full_filenames, produced_filenames)
+        expected_filenames.append(
+          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
+      self.assertItemsEqual(expected_filenames, actual_filenames)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(itr.get_next())
+        sess.run(next_element)
 
   def testFileMiddles(self):
     filenames = ['a.txt', 'b.py', 'c.pyc']
     self._touchTempFiles(filenames)
 
-    filename_placeholder = array_ops.placeholder(dtypes.string, shape=[])
-    dataset = MatchingFilesDataset(filename_placeholder)
-
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*.py*'))
     with self.cached_session() as sess:
-      itr = iterator_ops.Iterator.from_structure(dataset.output_types)
-      init_op = itr.make_initializer(dataset)
-      next_element = itr.get_next()
-      sess.run(
-        init_op,
-        feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py*')})
-
-      full_filenames = []
-      produced_filenames = []
+      next_element = dataset.make_one_shot_iterator().get_next()
+      expected_filenames = []
+      actual_filenames = []
       for filename in filenames[1:]:
-        full_filenames.append(
-          compat.as_bytes(path.join(self.tmp_dir, filename)))
-        produced_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(full_filenames, produced_filenames)
+        expected_filenames.append(
+          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
+      self.assertItemsEqual(expected_filenames, actual_filenames)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(itr.get_next())
-
-  def _load_data(self):
-    new_files = []
-    dir = "/tmp/test/"
-    if not os.path.exists(dir):
-      os.makedirs(dir)
-    base = tempfile.mkdtemp(prefix=dir)
-    print('saving files to dir: {}'.format(base))
+        sess.run(next_element)
+
+  def testNestedDirectories(self):
+    filenames = []
+    width = 8
+    depth = 4
     for i in range(width):
-      new_base = os.path.join(base, str(i), *[str(j) for j in range(depth - 1)])
-      if not os.path.exists(new_base):
-        os.makedirs(new_base)
-      f = os.path.join(new_base, 'stuff.txt')
-      new_files.append(compat.as_bytes(f))
-      open(f, 'w').close()
-    return base, new_files
-
-  def _read_data(self, data, sess, N=1):
-    for _ in range(N):
-      sess.run(data)
-
-  def _read_data_with_result(self, data, sess, N=1):
-    result = []
-    for _ in range(N):
-      result.append(sess.run(data))
-    return result
-
-  def testPerformance(self):
-    base, test_filenames = self._load_data()
-    test_filenames.sort(reverse=True)
-    patterns = array_ops.placeholder(dtypes.string, shape=[None])
-    dataset = MatchingFilesDataset(patterns)
-    iterator = iterator_ops.Iterator.from_structure(dataset.output_types)
-    init_op = iterator.make_initializer(dataset)
-    get_next = iterator.get_next()
-    result = []
-    with self.cached_session() as sess:
-      pattern = '{}/{}/*.txt'\
-        .format(base, os.path.join(*['**' for _ in range(depth)]))
-      search_patterns = [pattern]
-      sess.run(init_op, feed_dict={patterns: search_patterns})
-      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
-        "read first filename"))
-      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
-        "read second filename"))
-      N = width * len(search_patterns) - 2
-      filename = timeit(partial(self._read_data_with_result, get_next, sess, N),
-        'read {} more filenames'.format(N), N)
-      result.extend(filename)
-
-    matched_filenames = [compat.as_bytes(x) for x in result]
-    self.assertEqual(matched_filenames, test_filenames)
-
-  def testShuffle(self):
-    self.maxDiff = None
-    base, test_filenames = self._load_data()
-    test_filenames.sort(reverse=True)
-    patterns = array_ops.placeholder(dtypes.string, shape=[None])
+      for j in range(depth):
+        new_base = os.path.join(self.tmp_dir, str(i),
+          *[str(dir_name) for dir_name in range(j)])
+        os.makedirs(new_base, exist_ok=True)
+        for f in ['a.txt', 'b.py', 'c.pyc']:
+          filename = os.path.join(new_base, f)
+          filenames.append(filename)
+          open(filename, 'w').close()
+
+    patterns = []
+    for i in range(depth):
+      pattern = '{}/{}/*.txt'.format(
+        self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]))
+      patterns.append(pattern)
+
     dataset = MatchingFilesDataset(patterns)
-    dataset = dataset.shuffle(buffer_size=10, reshuffle_each_iteration=False)
-    iterator = iterator_ops.Iterator.from_structure(dataset.output_types)
-    init_op = iterator.make_initializer(dataset)
-    get_next = iterator.get_next()
-    result = []
     with self.cached_session() as sess:
-      pattern = '{}/{}/*.txt' \
-        .format(base, os.path.join(*['**' for _ in range(depth)]))
-      search_patterns = [pattern]
-      sess.run(init_op, feed_dict={patterns: search_patterns})
-      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
-        "read first filename"))
-      result.extend(timeit(partial(self._read_data_with_result, get_next, sess),
-        "read second filename"))
-      N = width * len(search_patterns) - 2
-      filename = timeit(partial(self._read_data_with_result, get_next, sess, N),
-        'read {} more filenames'.format(N), N)
-      result.extend(filename)
-    matched_filenames = [compat.as_bytes(x) for x in result]
-    self.assertItemsEqual(matched_filenames, test_filenames)
+      next_element = dataset.make_one_shot_iterator().get_next()
+      expected_filenames = [compat.as_bytes(file)
+                            for file in filenames if file.endswith('.txt')]
+      actual_filenames = []
+      while True:
+        try:
+          actual_filenames.append(compat.as_bytes(sess.run(next_element)))
+        except errors.OutOfRangeError:
+          break
+
+      self.assertItemsEqual(expected_filenames, actual_filenames)
+
+
+class MatchingFilesDatasetBenchmark(test.Benchmark):
+
+  def benchmarkNestedDirectories(self):
+    tmp_dir = tempfile.mkdtemp()
+    width = 1000
+    depth = 10
+    for i in range(width):
+      for j in range(depth):
+        new_base = os.path.join(tmp_dir, str(i),
+          *[str(dir_name) for dir_name in range(j)])
+        if not os.path.exists(new_base):
+          os.makedirs(new_base)
+        for f in ['a.txt', 'b.py', 'c.pyc']:
+          filename = os.path.join(new_base, f)
+          open(filename, 'w').close()
+
+    patterns = []
+    for i in range(depth):
+      pattern = '{}/{}/*.txt'.format(tmp_dir,
+        os.path.join(*['**' for _ in range(i + 1)]))
+      patterns.append(pattern)
+
+    deltas = []
+    iters = 3
+    for _ in range(iters):
+      with ops.Graph().as_default():
+        dataset = MatchingFilesDataset(patterns)
+        next_element = dataset.make_one_shot_iterator().get_next()
+
+        with session.Session() as sess:
+          sub_deltas = []
+          while True:
+            try:
+              start = time.time()
+              sess.run(next_element)
+              end = time.time()
+              sub_deltas.append(end - start)
+            except errors.OutOfRangeError:
+              break
+          deltas.append(sub_deltas)
+
+    median_deltas = np.median(deltas, axis=0)
+    print("Nested directory size (width*depth): %d*%d Median wall time: "
+          "%fs (read first filename), %fs (read second filename), avg %fs"
+          " (read %d more filenames)" % (width, depth,
+                                         median_deltas[0],
+                                         median_deltas[1],
+                                         np.average(median_deltas[2:]),
+                                         len(median_deltas) - 2))
+    self.report_benchmark(
+        iters=iters,
+        wall_time=np.sum(median_deltas),
+        extras={"read first file:": median_deltas[0],
+                "read second file:": median_deltas[1],
+                "avg time for reading %d more filenames:" %
+                (len(median_deltas) - 2):
+                np.average(median_deltas[2:])},
+        name="benchmark_matching_files_dataset_nesteddirectory(%d*%d)" %
+             (width, depth))
+
+    shutil.rmtree(tmp_dir, ignore_errors=True)
 
 
 if __name__ == "__main__":
-- 
GitLab


From 453bed8e4b91431765c138e7d09c1419ea9588a8 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 08:09:46 -0700
Subject: [PATCH 0545/1085] Improve the kernel implementation (missing newline,
 ForEach, comment format)

---
 tensorflow/core/kernels/data/BUILD            |   2 +-
 .../kernels/data/matching_files_dataset_op.cc | 119 ++++++++----------
 2 files changed, 50 insertions(+), 71 deletions(-)

diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 402ebd9e2b..59c20033b0 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -820,4 +820,4 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
     ],
-)
\ No newline at end of file
+)
diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index b43517d0f5..d608d9d43c 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -31,30 +31,10 @@ limitations under the License.
 
 namespace tensorflow {
 namespace data {
-
 namespace {
 
-constexpr int kNumThreads = 8;
-
-// Run a function in parallel using a ThreadPool, but skip the ThreadPool
-// on the iOS platform due to its problems with more than a few threads.
-void ForEach(int first, int last, const std::function<void(int)>& f) {
-#if TARGET_OS_IPHONE
-  for (int i = first; i < last; i++) {
-    f(i);
-  }
-#else
-  int num_threads = std::min(kNumThreads, last - first);
-  thread::ThreadPool threads(Env::Default(), "ForEach", num_threads);
-  for (int i = first; i < last; i++) {
-    threads.Schedule([f, i] { f(i); });
-  }
-#endif
-}
-
-}  // namespace
-
-namespace {
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
 
 class MatchingFilesDatasetOp : public DatasetOpKernel {
  public:
@@ -62,16 +42,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override {
     const Tensor* patterns_t;
-    // NOTE(originally from ringwalt): Changing the input name "pattern" to
-    // "patterns" would break existing graphs.
-    OP_REQUIRES_OK(ctx, ctx->input("pattern", &patterns_t));
-    OP_REQUIRES(
-        ctx,
-        TensorShapeUtils::IsScalar(patterns_t->shape()) ||
-            TensorShapeUtils::IsVector(patterns_t->shape()),
-        errors::InvalidArgument(
-            "Input patterns tensor must be scalar or vector, but had shape: ",
-            patterns_t->shape().DebugString()));
+    OP_REQUIRES_OK(ctx, ctx->input("patterns", &patterns_t));
     const auto patterns = patterns_t->flat<string>();
     size_t num_patterns = static_cast<size_t >(patterns.size());
     std::vector<string> pattern_strs;
@@ -81,8 +52,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       pattern_strs.push_back(patterns(i));
     }
 
-    // keep the elements in the descending order
-    std::sort(pattern_strs.begin(), pattern_strs.end(), std::greater<string>());
+    // keep the elements in the ascending order
+    std::sort(pattern_strs.begin(), pattern_strs.end());
     *output = new Dataset(ctx, std::move(pattern_strs));
   }
 
@@ -96,7 +67,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
       return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::FileName")}));
+          new Iterator({this, strings::StrCat(prefix, "::MatchingFiles")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -118,9 +89,9 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      Node* pattern = nullptr;
-      TF_RETURN_IF_ERROR(b->AddVector(pattern_, &pattern));
-      TF_RETURN_IF_ERROR(b->AddDataset(this, {pattern}, output));
+      Node* patterns_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddVector(pattern_, &patterns_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(this, {patterns_node}, output));
       return Status::OK();
     }
 
@@ -138,14 +109,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
         while (!filepath_queue_.empty() ||
             current_pattern_index_ < dataset()->pattern_.size()) {
-          // all the elements in the heap will be the matched file name or the
-          // potential directory
+          // All the elements in the heap will be the matched filename or the
+          // potential directory.
           if (!filepath_queue_.empty()) {
             string cur_file = filepath_queue_.top();
             filepath_queue_.pop();
 
-            // we can also use isDectory() here. But IsDirectory call can be
-            // expensive for some FS
+            // We can also use isDectory() here. But IsDirectory call can be
+            // expensive for some FS.
             if (ctx->env()->MatchPath(cur_file, current_pattern_)){
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
               filepath_tensor.scalar<string>()() = cur_file;
@@ -154,20 +125,20 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               return Status::OK();
             }
 
-            // in this case, cur_file is a directory. Then create a sub-pattern
-            // to continue the search
+            // In this case, cur_file is a directory. Then create a sub-pattern
+            // to continue the search.
             size_t pos = current_pattern_.find_first_of("*?[\\");
             size_t len = current_pattern_.size() - pos;
             string cur_pattern_suffix = current_pattern_.substr(pos, len);
             string sub_pattern = strings::StrCat(cur_file,
                                                  "/",
                                                  cur_pattern_suffix);
-            Status s = UpdateIterator(ctx->env(), sub_pattern);
+            Status s = UpdateIterator(ctx, sub_pattern);
             ret.Update(s);
           } else {
             // search a new pattern
             current_pattern_ = dataset()->pattern_[current_pattern_index_];
-            Status s = UpdateIterator(ctx->env(), current_pattern_);
+            Status s = UpdateIterator(ctx, current_pattern_);
             ret.Update(s);
             ++current_pattern_index_;
           }
@@ -224,14 +195,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
      private:
-      Status UpdateIterator(Env *env, const string &pattern)
+      Status UpdateIterator(IteratorContext* ctx, const string &pattern)
       EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         string fixed_prefix = pattern.substr(0, pattern.find_first_of("*?[\\"));
         string eval_pattern = pattern;
         string dir(io::Dirname(fixed_prefix));
 
-        // If dir is empty then we need to fix up fixed_prefix and eval_pattern to
-        // include . as the top level directory.
+        // If dir is empty then we need to fix up fixed_prefix and eval_pattern
+        // to include . as the top level directory.
         if (dir.empty()) {
           dir = ".";
           fixed_prefix = io::JoinPath(dir, fixed_prefix);
@@ -239,16 +210,16 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         }
 
         FileSystem* fs;
-        TF_RETURN_IF_ERROR(env->GetFileSystemForFile(dir, &fs));
+        TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
 
         filepath_queue_.push(dir);
         Status ret;  //Status to return
-        // children_dir_status holds is_dir status for children. It can have three
-        // possible values: OK for true; FAILED_PRECONDITION for false; CANCELLED
-        // if we don't calculate IsDirectory (we might do that because there isn't
-        // any point in exploring that child path).
+        // children_dir_status holds is_dir status for children. It can have
+        // three possible values: OK for true; FAILED_PRECONDITION for false;
+        // CANCELLED if we don't calculate IsDirectory (we might do that because
+        // there isn't any point in exploring that child path).
 
-        // DFS to find the first element in the iterator
+        // DFS to find the first element in the iterator.
         while (!filepath_queue_.empty()) {
           string cur_dir = filepath_queue_.top();
           filepath_queue_.pop();
@@ -256,14 +227,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           Status s = fs->GetChildren(cur_dir, &children);
           ret.Update(s);
 
-          // if cur_dir has no children, there will two possible situations: 1)
+          // If cur_dir has no children, there will two possible situations: 1)
           // the cur_dir is an empty dir; 2) the cur_dir is actual a file
           // instead of a director. For the first one, continue to search the
-          // heap; For the second one, if the file matches the pattern, add
+          // heap. For the second one, if the file matches the pattern, add
           // it to the heap and finish the search; otherwise, continue the next
-          // search
+          // search.
           if (children.empty()) {
-            if (env->MatchPath(cur_dir, current_pattern_)) {
+            if (ctx->env()->MatchPath(cur_dir, eval_pattern)) {
               filepath_queue_.push(cur_dir);
               return ret;
             } else {
@@ -272,13 +243,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           }
 
           std::map<string, Status> children_dir_status;
-          // This IsDirectory call can be expensive for some FS. Parallelizing it.
-          ForEach(0, children.size(),
+          // This IsDirectory call can be expensive for some FS. Parallelizing
+          // it.
+          ForEach(ctx, 0, children.size(),
                   [fs, &cur_dir, &children, &fixed_prefix,
                       &children_dir_status] (int i) {
                     const string child_path = io::JoinPath(cur_dir, children[i]);
-                    // In case the child_path doesn't start with the fixed_prefix then
-                    // we don't need to explore this path.
+                    // In case the child_path doesn't start with the
+                    // fixed_prefix, then we don't need to explore this path.
                     if (!str_util::StartsWith(child_path, fixed_prefix)) {
                       children_dir_status[child_path] =
                           Status(tensorflow::error::CANCELLED,
@@ -300,9 +272,9 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               //push the child dir for next search
               filepath_queue_.push(child_dir_path);
             } else {
-              // this case will be a file; if the file match the pattern, push
-              // it to the heap; otherwise, ignore it
-              if (env->MatchPath(child_dir_path, eval_pattern)) {
+              // This case will be a file: if the file matches the pattern, push
+              // it to the heap; otherwise, ignore it.
+              if (ctx->env()->MatchPath(child_dir_path, eval_pattern)) {
                 filepath_queue_.push(child_dir_path);
               }
             }
@@ -311,9 +283,16 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         return ret;
       }
 
+      static void ForEach(IteratorContext* ctx, int first, int last,
+          const std::function<void(int)>& f) {
+        for (int i = first; i < last ; i++) {
+          (*ctx->runner())([f, i] {std::bind(f, i);});
+        }
+      }
+
       mutex mu_;
-      //std::unique_ptr<std::priority_queue<string>> filepath_queue_ GUARDED_BY(mu_);
-      std::priority_queue<string> filepath_queue_ GUARDED_BY(mu_); // = new std::priority_queue<string>;
+      std::priority_queue<string, std::vector<string>, std::less<string>>
+          filepath_queue_ GUARDED_BY(mu_);
       size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
       string current_pattern_ GUARDED_BY(mu_);
     };
@@ -327,4 +306,4 @@ REGISTER_KERNEL_BUILDER(Name("MatchingFilesDataset").Device(DEVICE_CPU),
 
 }  // namespace
 }  // namespace data
-}  // namespace tensorflow
\ No newline at end of file
+}  // namespace tensorflow
-- 
GitLab


From 7ef0ddfd06a799220dfd4989ed8067036efcdcb5 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 08:11:02 -0700
Subject: [PATCH 0546/1085] change pattern to be patterns in Python API

---
 tensorflow/core/ops/dataset_ops.cc        |  2 +-
 tensorflow/python/data/ops/dataset_ops.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 26c2756836..14596e7f4e 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -620,7 +620,7 @@ REGISTER_OP("TextLineDataset")
     });
 
 REGISTER_OP("MatchingFilesDataset")
-    .Input("pattern: string")
+    .Input("patterns: string")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
         // stateful to inhibit constant folding.
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 5150c7fb9a..063700e335 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2653,16 +2653,16 @@ class MapDataset(UnaryDataset):
 
 
 class MatchingFilesDataset(Dataset):
-  """A `Dataset` that list the files according to the input patterns"""
+  """A `Dataset` that list the files according to the input patterns."""
 
-  def __init__(self, pattern):
+  def __init__(self, patterns):
     super(MatchingFilesDataset, self).__init__()
-    self._pattern = ops.convert_to_tensor(
-      pattern, dtype=dtypes.string, name="pattern")
+    self._patterns = ops.convert_to_tensor(
+      patterns, dtype=dtypes.string, name="patterns")
 
 
   def _as_variant_tensor(self):
-    return gen_dataset_ops.matching_files_dataset(self._pattern)
+    return gen_dataset_ops.matching_files_dataset(self._patterns)
 
   @property
   def output_classes(self):
-- 
GitLab


From 17e093dffe0d48563c40ce3a1bbfc998b0777689 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 10:48:41 -0700
Subject: [PATCH 0547/1085] Address the coding style issues

---
 .../kernels/data/matching_files_dataset_op.cc | 90 +++++++++----------
 .../matching_files_dataset_op_test.py         | 45 ++++------
 2 files changed, 62 insertions(+), 73 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index d608d9d43c..ced8b304d7 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -13,21 +13,21 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <queue>
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/buffered_inputstream.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/lib/io/record_reader.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/lib/io/zlib_inputstream.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 
 namespace tensorflow {
 namespace data {
@@ -44,7 +44,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
     const Tensor* patterns_t;
     OP_REQUIRES_OK(ctx, ctx->input("patterns", &patterns_t));
     const auto patterns = patterns_t->flat<string>();
-    size_t num_patterns = static_cast<size_t >(patterns.size());
+    size_t num_patterns = static_cast<size_t>(patterns.size());
     std::vector<string> pattern_strs;
     pattern_strs.reserve(num_patterns);
 
@@ -61,8 +61,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
   class Dataset : public DatasetBase {
    public:
     Dataset(OpKernelContext* ctx, std::vector<string> patterns)
-        : DatasetBase(DatasetContext(ctx)),
-          pattern_(std::move(patterns)) {}
+        : DatasetBase(DatasetContext(ctx)), pattern_(std::move(patterns)) {}
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
@@ -108,7 +107,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         Status ret;
 
         while (!filepath_queue_.empty() ||
-            current_pattern_index_ < dataset()->pattern_.size()) {
+               current_pattern_index_ < dataset()->pattern_.size()) {
           // All the elements in the heap will be the matched filename or the
           // potential directory.
           if (!filepath_queue_.empty()) {
@@ -117,7 +116,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
             // We can also use isDectory() here. But IsDirectory call can be
             // expensive for some FS.
-            if (ctx->env()->MatchPath(cur_file, current_pattern_)){
+            if (ctx->env()->MatchPath(cur_file, current_pattern_)) {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
               filepath_tensor.scalar<string>()() = cur_file;
               out_tensors->emplace_back(std::move(filepath_tensor));
@@ -130,9 +129,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             size_t pos = current_pattern_.find_first_of("*?[\\");
             size_t len = current_pattern_.size() - pos;
             string cur_pattern_suffix = current_pattern_.substr(pos, len);
-            string sub_pattern = strings::StrCat(cur_file,
-                                                 "/",
-                                                 cur_pattern_suffix);
+            string sub_pattern =
+                strings::StrCat(cur_file, "/", cur_pattern_suffix);
             Status s = UpdateIterator(ctx, sub_pattern);
             ret.Update(s);
           } else {
@@ -152,16 +150,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(mu_);
         TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name("current_pattern_index"),
-            current_pattern_index_));
+            full_name("current_pattern_index"), current_pattern_index_));
 
-        TF_RETURN_IF_ERROR(writer->WriteScalar(
-            full_name("current_pattern"),
-            current_pattern_));
+        TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_pattern"),
+                                               current_pattern_));
 
         if (!filepath_queue_.empty()) {
-          TF_RETURN_IF_ERROR(writer->WriteScalar(
-              full_name("queue_size"), filepath_queue_.size()));
+          TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("queue_size"),
+                                                 filepath_queue_.size()));
           for (int i = 0; i < filepath_queue_.size(); ++i) {
             TF_RETURN_IF_ERROR(writer->WriteScalar(
                 full_name(strings::StrCat("queue_element_", i)),
@@ -175,16 +171,16 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                              IteratorStateReader* reader) override {
         mutex_lock l(mu_);
         int64 current_pattern_index;
-        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern_index"),
-                                              &current_pattern_index));
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            full_name("current_pattern_index"), &current_pattern_index));
         current_pattern_index_ = size_t(current_pattern_index);
 
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern"),
                                               &current_pattern_));
 
         int64 queue_size;
-        TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("queue_size"),
-                                              &queue_size));
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("queue_size"), &queue_size));
         for (int i = static_cast<int>(queue_size - 1); i >= 0; --i) {
           string element;
           TF_RETURN_IF_ERROR(reader->ReadScalar(
@@ -195,8 +191,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
      private:
-      Status UpdateIterator(IteratorContext* ctx, const string &pattern)
-      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      Status UpdateIterator(IteratorContext* ctx, const string& pattern)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         string fixed_prefix = pattern.substr(0, pattern.find_first_of("*?[\\"));
         string eval_pattern = pattern;
         string dir(io::Dirname(fixed_prefix));
@@ -213,7 +209,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
 
         filepath_queue_.push(dir);
-        Status ret;  //Status to return
+        Status ret;  // Status to return
         // children_dir_status holds is_dir status for children. It can have
         // three possible values: OK for true; FAILED_PRECONDITION for false;
         // CANCELLED if we don't calculate IsDirectory (we might do that because
@@ -245,22 +241,22 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           std::map<string, Status> children_dir_status;
           // This IsDirectory call can be expensive for some FS. Parallelizing
           // it.
-          ForEach(ctx, 0, children.size(),
-                  [fs, &cur_dir, &children, &fixed_prefix,
-                      &children_dir_status] (int i) {
-                    const string child_path = io::JoinPath(cur_dir, children[i]);
-                    // In case the child_path doesn't start with the
-                    // fixed_prefix, then we don't need to explore this path.
-                    if (!str_util::StartsWith(child_path, fixed_prefix)) {
-                      children_dir_status[child_path] =
-                          Status(tensorflow::error::CANCELLED,
-                                 "Operation not needed");
-                    } else {
-                      children_dir_status[child_path] = fs->IsDirectory(child_path);
-                    }
-                  });
-
-          for (const auto &child : children) {
+          ForEach(
+              ctx, 0, children.size(),
+              [fs, &cur_dir, &children, &fixed_prefix,
+               &children_dir_status](int i) {
+                const string child_path = io::JoinPath(cur_dir, children[i]);
+                // In case the child_path doesn't start with the fixed_prefix,
+                // then we don't need to explore this path.
+                if (!str_util::StartsWith(child_path, fixed_prefix)) {
+                  children_dir_status[child_path] = Status(
+                      tensorflow::error::CANCELLED, "Operation not needed");
+                } else {
+                  children_dir_status[child_path] = fs->IsDirectory(child_path);
+                }
+              });
+
+          for (const auto& child : children) {
             const string child_dir_path = io::JoinPath(cur_dir, child);
             const Status child_dir_status = children_dir_status[child];
             // If the IsDirectory call was cancelled we bail.
@@ -269,7 +265,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             }
 
             if (child_dir_status.ok()) {
-              //push the child dir for next search
+              // push the child dir for next search
               filepath_queue_.push(child_dir_path);
             } else {
               // This case will be a file: if the file matches the pattern, push
@@ -284,9 +280,9 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
       static void ForEach(IteratorContext* ctx, int first, int last,
-          const std::function<void(int)>& f) {
-        for (int i = first; i < last ; i++) {
-          (*ctx->runner())([f, i] {std::bind(f, i);});
+                          const std::function<void(int)>& f) {
+        for (int i = first; i < last; i++) {
+          (*ctx->runner())([f, i] { std::bind(f, i); });
         }
       }
 
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 4a62a31144..a2c6b78256 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -45,6 +45,8 @@ class MatchingFilesDatasetTest(test.TestCase):
       open(os.path.join(self.tmp_dir, filename), 'a').close()
 
   def testEmptyDirectory(self):
+    """Test the matchingfiles dataset with an empty directory"""
+
     dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
@@ -52,36 +54,20 @@ class MatchingFilesDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testSimpleDirectory(self):
-    filenames = ['a', 'b', 'c']
-    self._touchTempFiles(filenames)
+    """Test the matchingfiles dataset with a simple directory"""
 
-    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
-    with self.cached_session() as sess:
-      next_element = dataset.make_one_shot_iterator().get_next()
-
-      expected_filenames = []
-      actual_filenames = []
-      for filename in filenames:
-        expected_filenames.append(
-          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
-        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(expected_filenames, actual_filenames)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testSimpleDirectoryInitializer(self):
     filenames = ['a', 'b', 'c']
     self._touchTempFiles(filenames)
 
     dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
+
       expected_filenames = []
       actual_filenames = []
       for filename in filenames:
         expected_filenames.append(
-          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
         actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
       self.assertItemsEqual(expected_filenames, actual_filenames)
@@ -89,6 +75,8 @@ class MatchingFilesDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testFileSuffixes(self):
+    """Test the matchingfiles dataset using the suffixes of filename"""
+
     filenames = ['a.txt', 'b.py', 'c.py', 'd.pyc']
     self._touchTempFiles(filenames)
 
@@ -99,7 +87,7 @@ class MatchingFilesDatasetTest(test.TestCase):
       actual_filenames = []
       for filename in filenames[1:-1]:
         expected_filenames.append(
-          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
         actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
       self.assertItemsEqual(expected_filenames, actual_filenames)
@@ -107,6 +95,8 @@ class MatchingFilesDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testFileMiddles(self):
+    """Test the matchingfiles dataset using the middles of filename"""
+
     filenames = ['a.txt', 'b.py', 'c.pyc']
     self._touchTempFiles(filenames)
 
@@ -117,7 +107,7 @@ class MatchingFilesDatasetTest(test.TestCase):
       actual_filenames = []
       for filename in filenames[1:]:
         expected_filenames.append(
-          compat.as_bytes(os.path.join(self.tmp_dir, filename)))
+            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
         actual_filenames.append(compat.as_bytes(sess.run(next_element)))
 
       self.assertItemsEqual(expected_filenames, actual_filenames)
@@ -125,13 +115,15 @@ class MatchingFilesDatasetTest(test.TestCase):
         sess.run(next_element)
 
   def testNestedDirectories(self):
+    """Test the matchingfiles dataset with nested directories"""
+
     filenames = []
     width = 8
     depth = 4
     for i in range(width):
       for j in range(depth):
         new_base = os.path.join(self.tmp_dir, str(i),
-          *[str(dir_name) for dir_name in range(j)])
+                                *[str(dir_name) for dir_name in range(j)])
         os.makedirs(new_base, exist_ok=True)
         for f in ['a.txt', 'b.py', 'c.pyc']:
           filename = os.path.join(new_base, f)
@@ -141,7 +133,7 @@ class MatchingFilesDatasetTest(test.TestCase):
     patterns = []
     for i in range(depth):
       pattern = '{}/{}/*.txt'.format(
-        self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]))
+          self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]))
       patterns.append(pattern)
 
     dataset = MatchingFilesDataset(patterns)
@@ -168,7 +160,7 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
     for i in range(width):
       for j in range(depth):
         new_base = os.path.join(tmp_dir, str(i),
-          *[str(dir_name) for dir_name in range(j)])
+                                *[str(dir_name) for dir_name in range(j)])
         if not os.path.exists(new_base):
           os.makedirs(new_base)
         for f in ['a.txt', 'b.py', 'c.pyc']:
@@ -178,7 +170,8 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
     patterns = []
     for i in range(depth):
       pattern = '{}/{}/*.txt'.format(tmp_dir,
-        os.path.join(*['**' for _ in range(i + 1)]))
+                                     os.path.join(
+                                         *['**' for _ in range(i + 1)]))
       patterns.append(pattern)
 
     deltas = []
@@ -217,7 +210,7 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
                 (len(median_deltas) - 2):
                 np.average(median_deltas[2:])},
         name="benchmark_matching_files_dataset_nesteddirectory(%d*%d)" %
-             (width, depth))
+        (width, depth))
 
     shutil.rmtree(tmp_dir, ignore_errors=True)
 
-- 
GitLab


From 1f25931abcd036f2d0d94c368cd82ab02ffc5449 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 11:28:03 -0700
Subject: [PATCH 0548/1085] Change pattern_ to patterns_

---
 .../core/kernels/data/matching_files_dataset_op.cc     | 10 +++++-----
 tensorflow/python/data/ops/dataset_ops.py              |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index ced8b304d7..bfb893e856 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -61,7 +61,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
   class Dataset : public DatasetBase {
    public:
     Dataset(OpKernelContext* ctx, std::vector<string> patterns)
-        : DatasetBase(DatasetContext(ctx)), pattern_(std::move(patterns)) {}
+        : DatasetBase(DatasetContext(ctx)), patterns_(std::move(patterns)) {}
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
@@ -89,7 +89,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
       Node* patterns_node = nullptr;
-      TF_RETURN_IF_ERROR(b->AddVector(pattern_, &patterns_node));
+      TF_RETURN_IF_ERROR(b->AddVector(patterns_, &patterns_node));
       TF_RETURN_IF_ERROR(b->AddDataset(this, {patterns_node}, output));
       return Status::OK();
     }
@@ -107,7 +107,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         Status ret;
 
         while (!filepath_queue_.empty() ||
-               current_pattern_index_ < dataset()->pattern_.size()) {
+               current_pattern_index_ < dataset()->patterns_.size()) {
           // All the elements in the heap will be the matched filename or the
           // potential directory.
           if (!filepath_queue_.empty()) {
@@ -135,7 +135,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             ret.Update(s);
           } else {
             // search a new pattern
-            current_pattern_ = dataset()->pattern_[current_pattern_index_];
+            current_pattern_ = dataset()->patterns_[current_pattern_index_];
             Status s = UpdateIterator(ctx, current_pattern_);
             ret.Update(s);
             ++current_pattern_index_;
@@ -293,7 +293,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       string current_pattern_ GUARDED_BY(mu_);
     };
 
-    const std::vector<string> pattern_;
+    const std::vector<string> patterns_;
   };
 };
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 063700e335..d053902fc7 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2660,7 +2660,6 @@ class MatchingFilesDataset(Dataset):
     self._patterns = ops.convert_to_tensor(
       patterns, dtype=dtypes.string, name="patterns")
 
-
   def _as_variant_tensor(self):
     return gen_dataset_ops.matching_files_dataset(self._patterns)
 
-- 
GitLab


From f6b2a32657a54e2faeefa34b9516ed5a43cb5530 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 26 Sep 2018 20:06:00 -0700
Subject: [PATCH 0549/1085] Update the RestoreInternal function since Max Heap
 is changed to Min Heap

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index bfb893e856..7bb8481762 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -181,7 +181,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         int64 queue_size;
         TF_RETURN_IF_ERROR(
             reader->ReadScalar(full_name("queue_size"), &queue_size));
-        for (int i = static_cast<int>(queue_size - 1); i >= 0; --i) {
+        for (int i = 0; i < queue_size; i++) {
           string element;
           TF_RETURN_IF_ERROR(reader->ReadScalar(
               full_name(strings::StrCat("queue_element_", i)), &element));
-- 
GitLab


From 9972054a906bba9b1042410de224b570514cd9e7 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 27 Sep 2018 16:31:36 -0700
Subject: [PATCH 0550/1085] Update the test according to the recent refactoring
 of tf.data.tests

---
 tensorflow/python/data/kernel_tests/BUILD     | 39 ++++++++-----------
 .../matching_files_dataset_op_test.py         |  3 +-
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 863d848c98..5a77538383 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -289,6 +289,22 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "matching_files_dataset_op_test",
+    size = "small",
+    srcs = ["matching_files_dataset_op_test.py"],
+    additional_deps = [
+        ":test_base",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 cuda_py_test(
     name = "multi_device_iterator_test",
     size = "medium",
@@ -331,29 +347,6 @@ cuda_py_test(
     ],
 )
 
-tf_py_test(
-    name = "matching_files_dataset_op_test",
-    size = "small",
-    srcs = ["matching_files_dataset_op_test.py"],
-    additional_deps = [
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dataset_ops_gen",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:iterator_ops",
-        "//tensorflow/python/data/ops:readers",
-    ],
-)
-
 tf_py_test(
     name = "prefetch_dataset_op_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index a2c6b78256..37626355c2 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -25,6 +25,7 @@ import time
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops.dataset_ops import MatchingFilesDataset
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -32,7 +33,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class MatchingFilesDatasetTest(test.TestCase):
+class MatchingFilesDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
     self.tmp_dir = tempfile.mkdtemp()
-- 
GitLab


From 28836116a400a65eed9494390956033eee64c18d Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 28 Sep 2018 09:08:01 -0700
Subject: [PATCH 0551/1085] Minor updates on comments

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 7bb8481762..f052cdac52 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -33,7 +33,7 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../ops/dataset_ops.cc for a high-level
+// See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
 class MatchingFilesDatasetOp : public DatasetOpKernel {
@@ -48,7 +48,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
     std::vector<string> pattern_strs;
     pattern_strs.reserve(num_patterns);
 
-    for (int i = 0; i < num_patterns; ++i) {
+    for (int i = 0; i < num_patterns; i++) {
       pattern_strs.push_back(patterns(i));
     }
 
-- 
GitLab


From 3fac3225211b337a4691c2498cc9b743f26dded8 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 28 Sep 2018 22:02:59 -0700
Subject: [PATCH 0552/1085] Fix the issues when parallelizing IsDirectory call

---
 .../kernels/data/matching_files_dataset_op.cc | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index f052cdac52..9f2b210871 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/lib/io/zlib_inputstream.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
 
 namespace tensorflow {
 namespace data {
@@ -238,27 +239,36 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             }
           }
 
-          std::map<string, Status> children_dir_status;
+          std::vector<Status> children_dir_status;
+          children_dir_status.resize(children.size());
+
           // This IsDirectory call can be expensive for some FS. Parallelizing
           // it.
-          ForEach(
-              ctx, 0, children.size(),
-              [fs, &cur_dir, &children, &fixed_prefix,
-               &children_dir_status](int i) {
-                const string child_path = io::JoinPath(cur_dir, children[i]);
-                // In case the child_path doesn't start with the fixed_prefix,
-                // then we don't need to explore this path.
-                if (!str_util::StartsWith(child_path, fixed_prefix)) {
-                  children_dir_status[child_path] = Status(
-                      tensorflow::error::CANCELLED, "Operation not needed");
-                } else {
-                  children_dir_status[child_path] = fs->IsDirectory(child_path);
-                }
-              });
-
-          for (const auto& child : children) {
-            const string child_dir_path = io::JoinPath(cur_dir, child);
-            const Status child_dir_status = children_dir_status[child];
+          auto is_directory_fn = [fs, &cur_dir, &children, &fixed_prefix,
+              &children_dir_status](int i) {
+            const string child_path = io::JoinPath(cur_dir, children[i]);
+            // In case the child_path doesn't start with the fixed_prefix, then
+            // we don't need to explore this path.
+            if (!str_util::StartsWith(child_path, fixed_prefix)) {
+              children_dir_status[i] = Status(
+                  tensorflow::error::CANCELLED, "Operation not needed");
+            } else {
+              children_dir_status[i] = fs->IsDirectory(child_path);
+            }
+          };
+
+          BlockingCounter counter(children.size());
+          for (int i = 0; i < children.size(); i++) {
+            (*ctx->runner())([&is_directory_fn, &counter, i] {
+              is_directory_fn(i);
+              counter.DecrementCount();
+            });
+          }
+          counter.Wait();
+
+          for (int i = 0; i < children.size(); i++) {
+            const string child_dir_path = io::JoinPath(cur_dir, children[i]);
+            const Status child_dir_status = children_dir_status[i];
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
               continue;
@@ -279,13 +289,6 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         return ret;
       }
 
-      static void ForEach(IteratorContext* ctx, int first, int last,
-                          const std::function<void(int)>& f) {
-        for (int i = first; i < last; i++) {
-          (*ctx->runner())([f, i] { std::bind(f, i); });
-        }
-      }
-
       mutex mu_;
       std::priority_queue<string, std::vector<string>, std::less<string>>
           filepath_queue_ GUARDED_BY(mu_);
-- 
GitLab


From 5fd998c8876a5aac4eef71801280d3fc1c8f39a2 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 28 Sep 2018 22:06:02 -0700
Subject: [PATCH 0553/1085] Remvoe the comments that are no longer accurate

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 9f2b210871..4afd84f9c7 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -34,9 +34,6 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// See documentation in ../../ops/dataset_ops.cc for a high-level
-// description of the following op.
-
 class MatchingFilesDatasetOp : public DatasetOpKernel {
  public:
   using DatasetOpKernel::DatasetOpKernel;
-- 
GitLab


From 0238bcdb51f68560ec1c3ca6469fe97953f36903 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 28 Sep 2018 22:20:32 -0700
Subject: [PATCH 0554/1085] Update the coding style

---
 .../kernel_tests/matching_files_dataset_op_test.py | 10 +++++-----
 tensorflow/python/data/ops/dataset_ops.py          | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 37626355c2..a000b9b816 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -46,7 +46,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
       open(os.path.join(self.tmp_dir, filename), 'a').close()
 
   def testEmptyDirectory(self):
-    """Test the matchingfiles dataset with an empty directory"""
+    """Test the MatchingFiles dataset with an empty directory"""
 
     dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
@@ -55,7 +55,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
         sess.run(next_element)
 
   def testSimpleDirectory(self):
-    """Test the matchingfiles dataset with a simple directory"""
+    """Test the MatchingFiles dataset with a simple directory"""
 
     filenames = ['a', 'b', 'c']
     self._touchTempFiles(filenames)
@@ -76,7 +76,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
         sess.run(next_element)
 
   def testFileSuffixes(self):
-    """Test the matchingfiles dataset using the suffixes of filename"""
+    """Test the MatchingFiles dataset using the suffixes of filename"""
 
     filenames = ['a.txt', 'b.py', 'c.py', 'd.pyc']
     self._touchTempFiles(filenames)
@@ -96,7 +96,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
         sess.run(next_element)
 
   def testFileMiddles(self):
-    """Test the matchingfiles dataset using the middles of filename"""
+    """Test the MatchingFiles dataset using the middles of filename"""
 
     filenames = ['a.txt', 'b.py', 'c.pyc']
     self._touchTempFiles(filenames)
@@ -116,7 +116,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
         sess.run(next_element)
 
   def testNestedDirectories(self):
-    """Test the matchingfiles dataset with nested directories"""
+    """Test the MatchingFiles dataset with nested directories"""
 
     filenames = []
     width = 8
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d053902fc7..05a7f5d41c 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2624,7 +2624,7 @@ class MapDataset(UnaryDataset):
     self._use_inter_op_parallelism = use_inter_op_parallelism
 
     wrapped_func = StructuredFunctionWrapper(
-      map_func, "Dataset.map()", input_dataset)
+        map_func, "Dataset.map()", input_dataset)
     self._output_classes = wrapped_func.output_classes
     self._output_shapes = wrapped_func.output_shapes
     self._output_types = wrapped_func.output_types
@@ -2633,11 +2633,11 @@ class MapDataset(UnaryDataset):
   def _as_variant_tensor(self):
     input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
     return gen_dataset_ops.map_dataset(
-      input_t,
-      self._map_func.captured_inputs,
-      f=self._map_func,
-      use_inter_op_parallelism=self._use_inter_op_parallelism,
-      **flat_structure(self))
+        input_t,
+        self._map_func.captured_inputs,
+        f=self._map_func,
+        use_inter_op_parallelism=self._use_inter_op_parallelism,
+        **flat_structure(self))
 
   @property
   def output_classes(self):
@@ -2658,7 +2658,7 @@ class MatchingFilesDataset(Dataset):
   def __init__(self, patterns):
     super(MatchingFilesDataset, self).__init__()
     self._patterns = ops.convert_to_tensor(
-      patterns, dtype=dtypes.string, name="patterns")
+        patterns, dtype=dtypes.string, name="patterns")
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.matching_files_dataset(self._patterns)
-- 
GitLab


From d87024b2e94e9dfab751ddf543c192d1a4f0f3c7 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 28 Sep 2018 22:30:23 -0700
Subject: [PATCH 0555/1085] Add the API defination pbtxt

---
 .../core/api_def/base_api/api_def_MatchingFilesDataset.pbtxt  | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_MatchingFilesDataset.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_MatchingFilesDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatchingFilesDataset.pbtxt
new file mode 100644
index 0000000000..ab2a33108d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_MatchingFilesDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "MatchingFilesDataset"
+  visibility: HIDDEN
+}
-- 
GitLab


From c03ab93355af474cba2eeec95be355b923185d20 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sun, 30 Sep 2018 12:58:34 -0700
Subject: [PATCH 0556/1085] Add the serialization test

---
 ...tching_files_dataset_serialization_test.py | 65 +++++++++++++++++++
 .../kernel_tests/serialization/BUILD          | 12 ++++
 2 files changed, 77 insertions(+)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py

diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py b/tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py
new file mode 100644
index 0000000000..41926f7ae1
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py
@@ -0,0 +1,65 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the MatchingFilesDataset serialization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.ops.dataset_ops import MatchingFilesDataset
+from tensorflow.python.platform import test
+
+
+class MatchingFilesDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def _build_iterator_graph(self, test_patterns):
+    return MatchingFilesDataset(test_patterns)
+
+  def testMatchingFilesCore(self):
+    tmp_dir = tempfile.mkdtemp()
+    width = 16
+    depth = 8
+    for i in range(width):
+      for j in range(depth):
+        new_base = os.path.join(tmp_dir, str(i),
+                                *[str(dir_name) for dir_name in range(j)])
+        if not os.path.exists(new_base):
+          os.makedirs(new_base)
+        for f in ['a.txt', 'b.py', 'c.pyc']:
+          filename = os.path.join(new_base, f)
+          open(filename, 'w').close()
+
+    patterns = []
+    for i in range(depth):
+      pattern = '{}/{}/*.txt'.format(tmp_dir,
+                                     os.path.join(
+                                         *['**' for _ in range(i + 1)]))
+      patterns.append(pattern)
+
+    num_outputs = width * depth
+    self.run_core_tests(
+        lambda: self._build_iterator_graph(patterns),
+        lambda: self._build_iterator_graph(patterns[0:depth // 2]), num_outputs)
+
+    shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index e556b65b7c..e8101c038a 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -332,6 +332,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "matching_files_dataset_serialization_test",
+    size = "small",
+    srcs = ["matching_files_dataset_serialization_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_serialization_test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "optimize_dataset_serialization_test",
     size = "small",
-- 
GitLab


From 89d05bfa83f0adc1563c035c99e7f8d0ce58627f Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sun, 30 Sep 2018 13:20:56 -0700
Subject: [PATCH 0557/1085] Resolve several issues in MatchingFiles Dataset
 kernel

1) Resolve the issue in the parallel call of IsDirectory by adding the wait of all the scheduled function to finish and revising the data structure to be thread-safe.
2) Resolve a bug in serialization.
3) Refactor the logic to be more straightforward and easy to understand.
---
 .../kernels/data/matching_files_dataset_op.cc | 107 +++++++++---------
 1 file changed, 56 insertions(+), 51 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 4afd84f9c7..f3b6769bb8 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/buffered_inputstream.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/lib/io/zlib_inputstream.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/lib/core/blocking_counter.h"
 
 namespace tensorflow {
 namespace data {
@@ -50,8 +50,6 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       pattern_strs.push_back(patterns(i));
     }
 
-    // keep the elements in the ascending order
-    std::sort(pattern_strs.begin(), pattern_strs.end());
     *output = new Dataset(ctx, std::move(pattern_strs));
   }
 
@@ -109,32 +107,39 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           // All the elements in the heap will be the matched filename or the
           // potential directory.
           if (!filepath_queue_.empty()) {
-            string cur_file = filepath_queue_.top();
+            string current_file = filepath_queue_.top();
             filepath_queue_.pop();
 
             // We can also use isDectory() here. But IsDirectory call can be
             // expensive for some FS.
-            if (ctx->env()->MatchPath(cur_file, current_pattern_)) {
+            if (ctx->env()->MatchPath(current_file, current_pattern_)) {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
-              filepath_tensor.scalar<string>()() = cur_file;
+              filepath_tensor.scalar<string>()() = current_file;
               out_tensors->emplace_back(std::move(filepath_tensor));
               *end_of_sequence = false;
               return Status::OK();
             }
 
-            // In this case, cur_file is a directory. Then create a sub-pattern
-            // to continue the search.
-            size_t pos = current_pattern_.find_first_of("*?[\\");
-            size_t len = current_pattern_.size() - pos;
-            string cur_pattern_suffix = current_pattern_.substr(pos, len);
-            string sub_pattern =
-                strings::StrCat(cur_file, "/", cur_pattern_suffix);
-            Status s = UpdateIterator(ctx, sub_pattern);
+            // In this case, current_file is a directory. Then continue the
+            // search.
+            const string& current_dir = current_file;
+            Status s = UpdateIterator(ctx, current_dir, current_pattern_);
             ret.Update(s);
           } else {
             // search a new pattern
             current_pattern_ = dataset()->patterns_[current_pattern_index_];
-            Status s = UpdateIterator(ctx, current_pattern_);
+            string fixed_prefix = current_pattern_.substr(
+                0, current_pattern_.find_first_of("*?[\\"));
+            string current_dir(io::Dirname(fixed_prefix));
+
+            // If dir is empty then we need to fix up fixed_prefix and
+            // current_pattern_ to include . as the top level directory.
+            if (current_dir.empty()) {
+              current_dir = ".";
+              current_pattern_ = io::JoinPath(current_dir, current_pattern_);
+            }
+
+            Status s = UpdateIterator(ctx, current_dir, current_pattern_);
             ret.Update(s);
             ++current_pattern_index_;
           }
@@ -156,13 +161,17 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         if (!filepath_queue_.empty()) {
           TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("queue_size"),
                                                  filepath_queue_.size()));
-          for (int i = 0; i < filepath_queue_.size(); ++i) {
+          int i = 0;
+          while (!filepath_queue_.empty()) {
             TF_RETURN_IF_ERROR(writer->WriteScalar(
                 full_name(strings::StrCat("queue_element_", i)),
                 filepath_queue_.top()));
             filepath_queue_.pop();
+            i++;
           }
         }
+
+        return Status::OK();
       }
 
       Status RestoreInternal(IteratorContext* ctx,
@@ -176,32 +185,27 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern"),
                                               &current_pattern_));
 
-        int64 queue_size;
-        TF_RETURN_IF_ERROR(
-            reader->ReadScalar(full_name("queue_size"), &queue_size));
-        for (int i = 0; i < queue_size; i++) {
-          string element;
-          TF_RETURN_IF_ERROR(reader->ReadScalar(
-              full_name(strings::StrCat("queue_element_", i)), &element));
-          filepath_queue_.push(element);
+        if (reader->Contains(full_name("queue_size"))) {
+          int64 queue_size;
+          TF_RETURN_IF_ERROR(
+              reader->ReadScalar(full_name("queue_size"), &queue_size));
+          for (int i = 0; i < queue_size; i++) {
+            string element;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("queue_element_", i)), &element));
+            filepath_queue_.push(element);
+          }
         }
+
         return Status::OK();
       }
 
      private:
-      Status UpdateIterator(IteratorContext* ctx, const string& pattern)
+      Status UpdateIterator(IteratorContext* ctx, const string& dir,
+                            const string& eval_pattern)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        string fixed_prefix = pattern.substr(0, pattern.find_first_of("*?[\\"));
-        string eval_pattern = pattern;
-        string dir(io::Dirname(fixed_prefix));
-
-        // If dir is empty then we need to fix up fixed_prefix and eval_pattern
-        // to include . as the top level directory.
-        if (dir.empty()) {
-          dir = ".";
-          fixed_prefix = io::JoinPath(dir, fixed_prefix);
-          eval_pattern = io::JoinPath(dir, pattern);
-        }
+        string fixed_prefix =
+            eval_pattern.substr(0, eval_pattern.find_first_of("*?[\\"));
 
         FileSystem* fs;
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
@@ -215,21 +219,21 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
         // DFS to find the first element in the iterator.
         while (!filepath_queue_.empty()) {
-          string cur_dir = filepath_queue_.top();
+          string current_dir = filepath_queue_.top();
           filepath_queue_.pop();
           std::vector<string> children;
-          Status s = fs->GetChildren(cur_dir, &children);
+          Status s = fs->GetChildren(current_dir, &children);
           ret.Update(s);
 
-          // If cur_dir has no children, there will two possible situations: 1)
-          // the cur_dir is an empty dir; 2) the cur_dir is actual a file
-          // instead of a director. For the first one, continue to search the
-          // heap. For the second one, if the file matches the pattern, add
+          // If current_dir has no children, there will two possible situations:
+          // 1) the current_dir is an empty dir; 2) the current_dir is actual a
+          // file instead of a director. For the first one, continue to search
+          // the heap. For the second one, if the file matches the pattern, add
           // it to the heap and finish the search; otherwise, continue the next
           // search.
           if (children.empty()) {
-            if (ctx->env()->MatchPath(cur_dir, eval_pattern)) {
-              filepath_queue_.push(cur_dir);
+            if (ctx->env()->MatchPath(current_dir, eval_pattern)) {
+              filepath_queue_.push(current_dir);
               return ret;
             } else {
               continue;
@@ -241,14 +245,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
           // This IsDirectory call can be expensive for some FS. Parallelizing
           // it.
-          auto is_directory_fn = [fs, &cur_dir, &children, &fixed_prefix,
-              &children_dir_status](int i) {
-            const string child_path = io::JoinPath(cur_dir, children[i]);
+          auto is_directory_fn = [fs, &current_dir, &children, &fixed_prefix,
+                                  &children_dir_status](int i) {
+            const string child_path = io::JoinPath(current_dir, children[i]);
             // In case the child_path doesn't start with the fixed_prefix, then
             // we don't need to explore this path.
             if (!str_util::StartsWith(child_path, fixed_prefix)) {
-              children_dir_status[i] = Status(
-                  tensorflow::error::CANCELLED, "Operation not needed");
+              children_dir_status[i] =
+                  Status(tensorflow::error::CANCELLED, "Operation not needed");
             } else {
               children_dir_status[i] = fs->IsDirectory(child_path);
             }
@@ -264,7 +268,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           counter.Wait();
 
           for (int i = 0; i < children.size(); i++) {
-            const string child_dir_path = io::JoinPath(cur_dir, children[i]);
+            const string child_dir_path =
+                io::JoinPath(current_dir, children[i]);
             const Status child_dir_status = children_dir_status[i];
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
@@ -287,7 +292,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
       mutex mu_;
-      std::priority_queue<string, std::vector<string>, std::less<string>>
+      std::priority_queue<string, std::vector<string>, std::greater<string>>
           filepath_queue_ GUARDED_BY(mu_);
       size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
       string current_pattern_ GUARDED_BY(mu_);
-- 
GitLab


From 68a4cee2d99672d6e7f45a2d9659f8cab54a2d42 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sun, 30 Sep 2018 13:33:07 -0700
Subject: [PATCH 0558/1085] Adjust the size of nested directories to make it
 finish in 60s for different hardwares

---
 .../python/data/kernel_tests/matching_files_dataset_op_test.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index a000b9b816..7c72b8043f 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -156,7 +156,7 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
 
   def benchmarkNestedDirectories(self):
     tmp_dir = tempfile.mkdtemp()
-    width = 1000
+    width = 500
     depth = 10
     for i in range(width):
       for j in range(depth):
-- 
GitLab


From 4cf6f3c379fd3f0c5c8edc08ff216022a4749c8f Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Mon, 1 Oct 2018 22:55:01 -0700
Subject: [PATCH 0559/1085] Fix the coding style issue in
 tensorflow/core/ops/dataset_ops.cc

---
 tensorflow/core/ops/dataset_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 14596e7f4e..38a97ae653 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -623,7 +623,7 @@ REGISTER_OP("MatchingFilesDataset")
     .Input("patterns: string")
     .Output("handle: variant")
     .SetIsStateful()  // TODO(b/65524810): Source dataset ops must be marked
-        // stateful to inhibit constant folding.
+                      // stateful to inhibit constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `patterns` must be a scalar or a vector.
-- 
GitLab


From e851cdcc7e95d501b07af35dd40e4f938c7aa38c Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Mon, 1 Oct 2018 23:11:13 -0700
Subject: [PATCH 0560/1085] Add dependencies for
 matching_files_dataset_serialization_test.py

---
 .../python/data/experimental/kernel_tests/serialization/BUILD    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index e8101c038a..a5bfd3afd5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -339,6 +339,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_serialization_test_base",
+        "//tensorflow/python:client_testlib",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
-- 
GitLab


From 847ddaedf2cdd59c2d6bf49576466ef440a903f7 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 2 Oct 2018 00:07:44 -0700
Subject: [PATCH 0561/1085] Move matching_files_dataset_serialization_test.py
 to tf.data.experimental

---
 .../serialization/matching_files_dataset_serialization_test.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tensorflow/{contrib/data/python => python/data/experimental}/kernel_tests/serialization/matching_files_dataset_serialization_test.py (95%)

diff --git a/tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
similarity index 95%
rename from tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py
rename to tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
index 41926f7ae1..d2e6f92726 100644
--- a/tensorflow/contrib/data/python/kernel_tests/serialization/matching_files_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
@@ -21,7 +21,7 @@ import os
 import shutil
 import tempfile
 
-from tensorflow.contrib.data.python.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.ops.dataset_ops import MatchingFilesDataset
 from tensorflow.python.platform import test
 
-- 
GitLab


From c45d576f8d332c386507fab0ecc443624e1e5f5b Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 2 Oct 2018 09:49:19 -0700
Subject: [PATCH 0562/1085] Add the implementation of _inputs() function

---
 tensorflow/python/data/ops/dataset_ops.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 05a7f5d41c..8f3190d3bb 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2663,6 +2663,9 @@ class MatchingFilesDataset(Dataset):
   def _as_variant_tensor(self):
     return gen_dataset_ops.matching_files_dataset(self._patterns)
 
+  def _inputs(self):
+    return []
+
   @property
   def output_classes(self):
     return ops.Tensor
-- 
GitLab


From c69e84b5fb1ae7d0d272c37a59a6c94839211af1 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 2 Oct 2018 09:59:45 -0700
Subject: [PATCH 0563/1085] Make the os.makedirs compatable with Python2

---
 .../python/data/kernel_tests/matching_files_dataset_op_test.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 7c72b8043f..6e5d845922 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -125,7 +125,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
       for j in range(depth):
         new_base = os.path.join(self.tmp_dir, str(i),
                                 *[str(dir_name) for dir_name in range(j)])
-        os.makedirs(new_base, exist_ok=True)
+        os.makedirs(new_base)
         for f in ['a.txt', 'b.py', 'c.pyc']:
           filename = os.path.join(new_base, f)
           filenames.append(filename)
-- 
GitLab


From ce2e925493cead88de6546ad2754d953694d91c3 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 2 Oct 2018 11:26:45 -0700
Subject: [PATCH 0564/1085] Rename the variable name from  to  to avoid the
 built-in

---
 .../data/kernel_tests/matching_files_dataset_op_test.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 6e5d845922..ac8beaf9e7 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -140,8 +140,8 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
     dataset = MatchingFilesDataset(patterns)
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
-      expected_filenames = [compat.as_bytes(file)
-                            for file in filenames if file.endswith('.txt')]
+      expected_filenames = [compat.as_bytes(filename)
+                            for filename in filenames if filename.endswith('.txt')]
       actual_filenames = []
       while True:
         try:
-- 
GitLab


From 1a35352d7368adefdca0cf8eaa58ea589d4c48c8 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 2 Oct 2018 12:21:17 -0700
Subject: [PATCH 0565/1085] short the line to be inside 80 characters

---
 .../python/data/kernel_tests/matching_files_dataset_op_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index ac8beaf9e7..cd2bb118fb 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -141,7 +141,8 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
       expected_filenames = [compat.as_bytes(filename)
-                            for filename in filenames if filename.endswith('.txt')]
+                            for filename in filenames
+                            if filename.endswith('.txt')]
       actual_filenames = []
       while True:
         try:
-- 
GitLab


From 3dcb7ebf144fce8d8825e2f12f3707c3e5ca5995 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 4 Oct 2018 23:29:00 -0700
Subject: [PATCH 0566/1085] Address the comments from the internal review

---
 .../kernels/data/matching_files_dataset_op.cc | 110 ++++++++++--------
 ...tching_files_dataset_serialization_test.py |   6 +-
 .../matching_files_dataset_op_test.py         |  27 +++--
 3 files changed, 83 insertions(+), 60 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index f3b6769bb8..f11dd238df 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -46,7 +46,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
     std::vector<string> pattern_strs;
     pattern_strs.reserve(num_patterns);
 
-    for (int i = 0; i < num_patterns; i++) {
+    for (size_t i = 0; i < num_patterns; i++) {
       pattern_strs.push_back(patterns(i));
     }
 
@@ -100,39 +100,39 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
-        Status ret;
+        Status ret;  // Status to return
 
         while (!filepath_queue_.empty() ||
                current_pattern_index_ < dataset()->patterns_.size()) {
-          // All the elements in the heap will be the matched filename or the
-          // potential directory.
+          // All the elements in the heap will be the matched filenames or the
+          // potential directories.
           if (!filepath_queue_.empty()) {
-            string current_file = filepath_queue_.top();
+            const PathStatus current_path = filepath_queue_.top();
             filepath_queue_.pop();
 
-            // We can also use isDectory() here. But IsDirectory call can be
-            // expensive for some FS.
-            if (ctx->env()->MatchPath(current_file, current_pattern_)) {
+            if (!current_path.second) {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
-              filepath_tensor.scalar<string>()() = current_file;
+              filepath_tensor.scalar<string>()() =
+                  std::move(current_path.first);
               out_tensors->emplace_back(std::move(filepath_tensor));
               *end_of_sequence = false;
               return Status::OK();
             }
 
-            // In this case, current_file is a directory. Then continue the
+            // In this case, current_path is a directory. Then continue the
             // search.
-            const string& current_dir = current_file;
-            Status s = UpdateIterator(ctx, current_dir, current_pattern_);
+            Status s =
+                UpdateIterator(ctx, current_path.first, current_pattern_);
             ret.Update(s);
           } else {
             // search a new pattern
             current_pattern_ = dataset()->patterns_[current_pattern_index_];
-            string fixed_prefix = current_pattern_.substr(
-                0, current_pattern_.find_first_of("*?[\\"));
+            StringPiece fixed_prefix =
+                StringPiece(current_pattern_)
+                    .substr(0, current_pattern_.find_first_of("*?[\\"));
             string current_dir(io::Dirname(fixed_prefix));
 
-            // If dir is empty then we need to fix up fixed_prefix and
+            // If current_dir is empty then we need to fix up fixed_prefix and
             // current_pattern_ to include . as the top level directory.
             if (current_dir.empty()) {
               current_dir = ".";
@@ -146,7 +146,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         }
 
         *end_of_sequence = true;
-        return Status::OK();
+        return ret;
       }
 
      protected:
@@ -163,9 +163,12 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                                                  filepath_queue_.size()));
           int i = 0;
           while (!filepath_queue_.empty()) {
+            TF_RETURN_IF_ERROR(
+                writer->WriteScalar(full_name(strings::StrCat("path_", i)),
+                                    filepath_queue_.top().first));
             TF_RETURN_IF_ERROR(writer->WriteScalar(
-                full_name(strings::StrCat("queue_element_", i)),
-                filepath_queue_.top()));
+                full_name(strings::StrCat("path_status_", i)),
+                filepath_queue_.top().second));
             filepath_queue_.pop();
             i++;
           }
@@ -190,10 +193,13 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           TF_RETURN_IF_ERROR(
               reader->ReadScalar(full_name("queue_size"), &queue_size));
           for (int i = 0; i < queue_size; i++) {
-            string element;
+            string path;
+            int64 path_status;
             TF_RETURN_IF_ERROR(reader->ReadScalar(
-                full_name(strings::StrCat("queue_element_", i)), &element));
-            filepath_queue_.push(element);
+                full_name(strings::StrCat("path_", i)), &path));
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                full_name(strings::StrCat("path_status_", i)), &path_status));
+            filepath_queue_.push(PathStatus(path, path_status));
           }
         }
 
@@ -204,55 +210,56 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       Status UpdateIterator(IteratorContext* ctx, const string& dir,
                             const string& eval_pattern)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        string fixed_prefix =
-            eval_pattern.substr(0, eval_pattern.find_first_of("*?[\\"));
+        StringPiece fixed_prefix =
+            StringPiece(eval_pattern)
+                .substr(0, eval_pattern.find_first_of("*?[\\"));
 
         FileSystem* fs;
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
 
-        filepath_queue_.push(dir);
+        filepath_queue_.push(PathStatus(dir, true));
         Status ret;  // Status to return
-        // children_dir_status holds is_dir status for children. It can have
-        // three possible values: OK for true; FAILED_PRECONDITION for false;
-        // CANCELLED if we don't calculate IsDirectory (we might do that because
-        // there isn't any point in exploring that child path).
 
         // DFS to find the first element in the iterator.
         while (!filepath_queue_.empty()) {
-          string current_dir = filepath_queue_.top();
+          const PathStatus current_path = filepath_queue_.top();
+
+          // All the files in the heap are matched with the pattern, so finish
+          // the search if current_path is a file.
+          if (!current_path.second) {
+            return Status::OK();
+          }
+
           filepath_queue_.pop();
+
+          // If current_path is a directory, search its children.
+          const string& current_dir = current_path.first;
           std::vector<string> children;
           Status s = fs->GetChildren(current_dir, &children);
           ret.Update(s);
 
-          // If current_dir has no children, there will two possible situations:
-          // 1) the current_dir is an empty dir; 2) the current_dir is actual a
-          // file instead of a director. For the first one, continue to search
-          // the heap. For the second one, if the file matches the pattern, add
-          // it to the heap and finish the search; otherwise, continue the next
-          // search.
-          if (children.empty()) {
-            if (ctx->env()->MatchPath(current_dir, eval_pattern)) {
-              filepath_queue_.push(current_dir);
-              return ret;
-            } else {
-              continue;
-            }
+          // If GetChildren() fails, continue the next search.
+          if (!s.ok()) {
+            continue;
           }
 
+          // children_dir_status holds is_dir status for children. It can have
+          // three possible values: OK for true; FAILED_PRECONDITION for false;
+          // CANCELLED if we don't calculate IsDirectory (we might do that
+          // because there isn't any point in exploring that child path).
           std::vector<Status> children_dir_status;
           children_dir_status.resize(children.size());
 
           // This IsDirectory call can be expensive for some FS. Parallelizing
           // it.
-          auto is_directory_fn = [fs, &current_dir, &children, &fixed_prefix,
+          auto is_directory_fn = [fs, current_dir, &children, &fixed_prefix,
                                   &children_dir_status](int i) {
             const string child_path = io::JoinPath(current_dir, children[i]);
             // In case the child_path doesn't start with the fixed_prefix, then
             // we don't need to explore this path.
             if (!str_util::StartsWith(child_path, fixed_prefix)) {
               children_dir_status[i] =
-                  Status(tensorflow::error::CANCELLED, "Operation not needed");
+                  errors::Cancelled("Operation not needed");
             } else {
               children_dir_status[i] = fs->IsDirectory(child_path);
             }
@@ -268,22 +275,24 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           counter.Wait();
 
           for (int i = 0; i < children.size(); i++) {
-            const string child_dir_path =
+            const string& child_dir_path =
                 io::JoinPath(current_dir, children[i]);
-            const Status child_dir_status = children_dir_status[i];
+            const Status& child_dir_status = children_dir_status[i];
+
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
+              ret.Update(child_dir_status);
               continue;
             }
 
             if (child_dir_status.ok()) {
               // push the child dir for next search
-              filepath_queue_.push(child_dir_path);
+              filepath_queue_.push(PathStatus(child_dir_path, true));
             } else {
               // This case will be a file: if the file matches the pattern, push
               // it to the heap; otherwise, ignore it.
               if (ctx->env()->MatchPath(child_dir_path, eval_pattern)) {
-                filepath_queue_.push(child_dir_path);
+                filepath_queue_.push(PathStatus(child_dir_path, false));
               }
             }
           }
@@ -292,7 +301,10 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
       mutex mu_;
-      std::priority_queue<string, std::vector<string>, std::greater<string>>
+      // True means the path is a directory; False means the path is a filename.
+      typedef std::pair<string, bool> PathStatus;
+      std::priority_queue<PathStatus, std::vector<PathStatus>,
+                          std::greater<PathStatus>>
           filepath_queue_ GUARDED_BY(mu_);
       size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
       string current_pattern_ GUARDED_BY(mu_);
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
index d2e6f92726..41146c9786 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
@@ -48,9 +48,9 @@ class MatchingFilesDatasetSerializationTest(
 
     patterns = []
     for i in range(depth):
-      pattern = '{}/{}/*.txt'.format(tmp_dir,
-                                     os.path.join(
-                                         *['**' for _ in range(i + 1)]))
+      pattern = os.path.join(tmp_dir,
+                             os.path.join(*['**' for _ in range(i + 1)]),
+                             '*.txt')
       patterns.append(pattern)
 
     num_outputs = width * depth
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index cd2bb118fb..4b59500bd4 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -45,6 +45,16 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
     for filename in filenames:
       open(os.path.join(self.tmp_dir, filename), 'a').close()
 
+  def testNonExistingDirectory(self):
+    """Test the MatchingFiles dataset with a non-existing directory"""
+
+    self.tearDown()
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
+    with self.cached_session() as sess:
+      next_element = dataset.make_one_shot_iterator().get_next()
+      with self.assertRaises(errors.NotFoundError):
+        sess.run(next_element)
+
   def testEmptyDirectory(self):
     """Test the MatchingFiles dataset with an empty directory"""
 
@@ -98,15 +108,15 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
   def testFileMiddles(self):
     """Test the MatchingFiles dataset using the middles of filename"""
 
-    filenames = ['a.txt', 'b.py', 'c.pyc']
+    filenames = ['aa.txt', 'bb.py', 'bbc.pyc', 'cc.pyc']
     self._touchTempFiles(filenames)
 
-    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*.py*'))
+    dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, 'b*.py*'))
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
       expected_filenames = []
       actual_filenames = []
-      for filename in filenames[1:]:
+      for filename in filenames[1:3]:
         expected_filenames.append(
             compat.as_bytes(os.path.join(self.tmp_dir, filename)))
         actual_filenames.append(compat.as_bytes(sess.run(next_element)))
@@ -133,8 +143,8 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
 
     patterns = []
     for i in range(depth):
-      pattern = '{}/{}/*.txt'.format(
-          self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]))
+      pattern = os.path.join(
+          self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]), '*.txt')
       patterns.append(pattern)
 
     dataset = MatchingFilesDataset(patterns)
@@ -171,9 +181,10 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
 
     patterns = []
     for i in range(depth):
-      pattern = '{}/{}/*.txt'.format(tmp_dir,
-                                     os.path.join(
-                                         *['**' for _ in range(i + 1)]))
+      pattern = os.path.join(tmp_dir,
+                             os.path.join(*['**' for _ in range(i + 1)]),
+                             '*.txt')
+
       patterns.append(pattern)
 
     deltas = []
-- 
GitLab


From efcefef05d317e94b107bf3c56906c653c217a31 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 5 Oct 2018 00:46:44 -0700
Subject: [PATCH 0567/1085] Bail the error

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index f11dd238df..7dd40c9e44 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -281,7 +281,6 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
-              ret.Update(child_dir_status);
               continue;
             }
 
-- 
GitLab


From a79bf6cd55588d9122e7ec4fdf0ff9e1d50a43d6 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 5 Oct 2018 14:46:02 -0700
Subject: [PATCH 0568/1085] Minor change on coding style

---
 .../core/kernels/data/matching_files_dataset_op.cc     | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 7dd40c9e44..e2aa45b38a 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -100,7 +100,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
-        Status ret;  // Status to return
+        Status ret;
 
         while (!filepath_queue_.empty() ||
                current_pattern_index_ < dataset()->patterns_.size()) {
@@ -121,9 +121,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
             // In this case, current_path is a directory. Then continue the
             // search.
-            Status s =
-                UpdateIterator(ctx, current_path.first, current_pattern_);
-            ret.Update(s);
+            ret.Update(
+                UpdateIterator(ctx, current_path.first, current_pattern_));
           } else {
             // search a new pattern
             current_pattern_ = dataset()->patterns_[current_pattern_index_];
@@ -139,8 +138,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               current_pattern_ = io::JoinPath(current_dir, current_pattern_);
             }
 
-            Status s = UpdateIterator(ctx, current_dir, current_pattern_);
-            ret.Update(s);
+            ret.Update(UpdateIterator(ctx, current_dir, current_pattern_));
             ++current_pattern_index_;
           }
         }
-- 
GitLab


From 220c0f90af05ed1ca86831258888cc80757654fd Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Mon, 8 Oct 2018 15:00:36 -0700
Subject: [PATCH 0569/1085] [XLA] Simplify loop nesting in HandleConvolution

The calculation of a spatial coordinate in the kernel and activations is not
dependent on which part of the contracted dimension (input feature) we are in.

Rather than nesting the loops, the loops can be siblings:
- One loop over spatial dimensions
- One loop over the input feature group

This reduces the nesting depth which makes the code a little more readable and
might be slightly faster due work invariant in the spatial loop getting hoisted
out.

PiperOrigin-RevId: 216255839
---
 .../xla/service/hlo_evaluator_typed_visitor.h | 96 +++++++++----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index a450dc6ff5..84fbbd3e0c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -1072,66 +1072,66 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
       // Convolve input feature with kernel.
       do {
+        // Find corresponding spatial dimension index for input (lhs).
+        int64 lhs_linear_spatial_index = 0;
+        int64 rhs_linear_spatial_index = 0;
+        for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) {
+          // Spatial dimension number for input (lhs) and output.
+          const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki);
+          const int64 output_spatial_dim = dnums.output_spatial_dimensions(ki);
+
+          // Calculate lhs (input) index without taking base dilation into
+          // account.
+          const auto& window_dim = window.dimensions(ki);
+          const int64 undilated_index =
+              out_index[output_spatial_dim] * window_dim.stride() -
+              window_dim.padding_low() +
+              rhs_spatial_index[ki] * window_dim.window_dilation();
+          // Skip if the lhs (input) index is to be dilated.  As an
+          // optimization, skip this mod if there's no dilation.
+          if (window_dim.base_dilation() > 1 &&
+              undilated_index % window_dim.base_dilation() != 0) {
+            goto cnt;
+          }
+
+          // Calculate the actual lhs (input) index after dilation.  As an
+          // optimization, skip this integer divide if there's no dilation.
+          int64 lhs_spatial_index;
+          if (window_dim.base_dilation() > 1) {
+            lhs_spatial_index = undilated_index / window_dim.base_dilation();
+          } else {
+            lhs_spatial_index = undilated_index;
+          }
+
+          // Skip if input index is not in bounds.
+          if (!(lhs_spatial_index >= 0 &&
+                lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) {
+            goto cnt;
+          }
+
+          lhs_linear_spatial_index +=
+              lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim];
+          rhs_linear_spatial_index +=
+              (window_dim.window_reversal()
+                   ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
+                   : rhs_spatial_index[ki]) *
+              rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)];
+        }
+
         for (int64 rhs_iz = 0; rhs_iz < input_feature_group_size; ++rhs_iz) {
           const int64 iz =
               feature_group_index * input_feature_group_size + rhs_iz;
 
-          int64 lhs_linear_index = 0;
+          int64 lhs_linear_index = lhs_linear_spatial_index;
           lhs_linear_index += out_index[output_batch_dim] *
                               lhs_dim_multipliers[input_batch_dim];
           lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim];
 
-          int64 rhs_linear_index = 0;
+          int64 rhs_linear_index = rhs_linear_spatial_index;
           rhs_linear_index += out_index[output_z_dim] *
                               rhs_dim_multipliers[kernel_output_z_dim];
           rhs_linear_index += rhs_iz * rhs_dim_multipliers[kernel_input_z_dim];
 
-          // Find corresponding spatial dimension index for input (lhs).
-          for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) {
-            // Spatial dimension number for input (lhs) and output.
-            const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki);
-            const int64 output_spatial_dim =
-                dnums.output_spatial_dimensions(ki);
-
-            // Calculate lhs (input) index without taking base dilation into
-            // account.
-            const auto& window_dim = window.dimensions(ki);
-            const int64 undilated_index =
-                out_index[output_spatial_dim] * window_dim.stride() -
-                window_dim.padding_low() +
-                rhs_spatial_index[ki] * window_dim.window_dilation();
-            // Skip if the lhs (input) index is to be dilated.  As an
-            // optimization, skip this mod if there's no dilation.
-            if (window_dim.base_dilation() > 1 &&
-                undilated_index % window_dim.base_dilation() != 0) {
-              goto cnt;
-            }
-
-            // Calculate the actual lhs (input) index after dilation.  As an
-            // optimization, skip this integer divide if there's no dilation.
-            int64 lhs_spatial_index;
-            if (window_dim.base_dilation() > 1) {
-              lhs_spatial_index = undilated_index / window_dim.base_dilation();
-            } else {
-              lhs_spatial_index = undilated_index;
-            }
-            lhs_linear_index +=
-                lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim];
-
-            // Skip if input index is not in bounds.
-            if (!(lhs_spatial_index >= 0 &&
-                  lhs_spatial_index <
-                      lhs_shape.dimensions(input_spatial_dim))) {
-              goto cnt;
-            }
-
-            rhs_linear_index +=
-                (window_dim.window_reversal()
-                     ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
-                     : rhs_spatial_index[ki]) *
-                rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)];
-          }
-
           result_val +=
               static_cast<ElementwiseT>(lhs_literal_data[lhs_linear_index]) *
               static_cast<ElementwiseT>(rhs_literal_data[rhs_linear_index]);
-- 
GitLab


From 5da3cebe00111aa43e34b5a3fc12d1a97b838ba7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 15:02:13 -0700
Subject: [PATCH 0570/1085] Automated rollback of commit
 09b0fc199129e0f487a39741bdf674cf09035cbc

PiperOrigin-RevId: 216256115
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 .../data/experimental/kernel_tests/BUILD      | 13 ------
 .../kernel_tests/random_dataset_test.py       | 45 -------------------
 .../kernel_tests/shuffle_and_repeat_test.py   | 21 +--------
 .../data/experimental/ops/random_ops.py       | 21 ++-------
 .../data/experimental/ops/shuffle_ops.py      | 21 ++-------
 tensorflow/python/data/kernel_tests/BUILD     |  1 -
 .../kernel_tests/shuffle_dataset_op_test.py   | 25 +----------
 tensorflow/python/data/ops/dataset_ops.py     | 22 ++-------
 tensorflow/python/data/util/BUILD             |  1 -
 tensorflow/python/data/util/random_seed.py    |  5 +--
 .../python/data/util/random_seed_test.py      | 13 +-----
 12 files changed, 16 insertions(+), 174 deletions(-)
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 9f54c381a9..66466d6a36 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed2) {}
+          seed2_(seed) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index a67f6ff031..4eef9580ad 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -453,18 +453,6 @@ cuda_py_test(
     tags = ["no_windows_gpu"],
 )
 
-py_test(
-    name = "random_dataset_test",
-    srcs = ["random_dataset_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python/data/experimental/ops:random_ops",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
 py_library(
     name = "reader_dataset_ops_test_base",
     testonly = 1,
@@ -574,7 +562,6 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
deleted file mode 100644
index d403a575ec..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/random_dataset_test.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for `tf.data.experimental.RandomDataset()`."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-
-from tensorflow.python.data.experimental.ops import random_ops
-from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-
-
-class RandomDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ("NoSeed", None),
-      ("WithSeed", 42),
-  )
-  def testZipRandomDataset(self, seed):
-    dataset = random_ops.RandomDataset(seed=seed).take(30)
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 883169495f..c208963a86 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import shuffle_ops
@@ -28,7 +27,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 
 
-class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
@@ -111,24 +110,6 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.session(graph=g) as sess:
         sess.run(get_next_op)
 
-  @parameterized.named_parameters(
-      ("NoSeed", None),
-      ("WithSeed", 42),
-  )
-  def testShuffleAndRepeatAndZipDataset(self, seed):
-    dataset = dataset_ops.Dataset.range(10).apply(
-        shuffle_ops.shuffle_and_repeat(10, count=3, seed=seed))
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
index 25d7fbf691..e3a2aeab31 100644
--- a/tensorflow/python/data/experimental/ops/random_ops.py
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -33,26 +33,13 @@ class RandomDataset(dataset_ops.DatasetSource):
   def __init__(self, seed=None):
     """A `Dataset` of pseudorandom values."""
     super(RandomDataset, self).__init__()
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
+    self._seed, self._seed2 = random_seed.get_seed(seed)
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     return gen_dataset_ops.random_dataset(
-        seed=seed, seed2=seed2, **dataset_ops.flat_structure(self))
+        seed=self._seed,
+        seed2=self._seed2,
+        **dataset_ops.flat_structure(self))
 
   @property
   def output_classes(self):
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
index a82e4b7d09..a4307212da 100644
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -39,32 +39,17 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryDataset):
     else:
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
+    self._seed, self._seed2 = random_seed.get_seed(seed)
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     # pylint: disable=protected-access
     input_resource = self._input_dataset._as_variant_tensor()
     return gen_dataset_ops.shuffle_and_repeat_dataset(
         input_resource,
         buffer_size=self._buffer_size,
         count=self._count,
-        seed=seed,
-        seed2=seed2,
+        seed=self._seed,
+        seed2=self._seed2,
         **dataset_ops.flat_structure(self))
     # pylint: enable=protected-access
 
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index ecb24103b3..c7295d6e69 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,7 +443,6 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
-        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 6001721726..347af18576 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 
 import collections
 
-from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -32,7 +31,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+class ShuffleDatasetTest(test_base.DatasetTestBase):
 
   def testShuffleDataset(self):
     components = (
@@ -210,27 +209,5 @@ class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  @parameterized.named_parameters(
-      ("ReshuffleEachIterationNoSeed", None, True),
-      ("ReshuffleEachIterationWithSeed", 42, True),
-      ("NoReshuffleEachIterationNoSeed", None, False),
-      ("NoReshuffleEachIterationWithSeed", 42, False),
-  )
-  def testShuffleAndZipDataset(self, seed, reshuffle):
-    dataset = (dataset_ops.Dataset.range(10)
-               .shuffle(10, seed=seed, reshuffle_each_iteration=reshuffle)
-               .repeat(3))
-    dataset = dataset_ops.Dataset.zip((dataset, dataset))
-    iterator = dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(30):
-        x, y = sess.run(next_element)
-        self.assertEqual(x, y)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2d036fd0d6..b7e19055f2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -2254,34 +2254,18 @@ class ShuffleDataset(UnaryDataset):
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-
-    # NOTE(mrry): We generate the seed-pair once per graph in which the dataset
-    # is iterated over, and cache it in `self._graph_seed_map`. This supports
-    # two features: iterating over the same `ShuffleDataset` twice in the same
-    # pipeline and observing the same order (by tying the seeds together with
-    # a randomly-generated seed), and using `Dataset.make_one_shot_iterator()`,
-    # which requires the stateful RNG op to be created inside the same graph as
-    # the dataset.
-    self._original_seed = seed
-    self._graph_seed_map = {}
-
+    self._seed, self._seed2 = random_seed.get_seed(seed)
     if reshuffle_each_iteration is None:
       self._reshuffle_each_iteration = True
     else:
       self._reshuffle_each_iteration = reshuffle_each_iteration
 
   def _as_variant_tensor(self):
-    try:
-      seed, seed2 = self._graph_seed_map[ops.get_default_graph()]
-    except KeyError:
-      seed, seed2 = random_seed.get_seed(self._original_seed)
-      self._graph_seed_map[ops.get_default_graph()] = (seed, seed2)
-
     return gen_dataset_ops.shuffle_dataset(
         self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
-        seed=seed,
-        seed2=seed2,
+        seed=self._seed,
+        seed2=self._seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
         **flat_structure(self))
 
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 95bf3209d7..39082ce370 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -142,7 +142,6 @@ py_test(
         ":random_seed",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:random_ops",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py
index d24df6d957..d5169f7a53 100644
--- a/tensorflow/python/data/util/random_seed.py
+++ b/tensorflow/python/data/util/random_seed.py
@@ -24,7 +24,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
 
 
 def get_seed(seed):
@@ -38,7 +37,7 @@ def get_seed(seed):
 
   Returns:
     A tuple of two `tf.int64` scalar tensors that should be used for the local
-    seeds of the calling dataset.
+    seed of the calling dataset.
   """
   seed, seed2 = random_seed.get_seed(seed)
   if seed is None:
@@ -46,7 +45,7 @@ def get_seed(seed):
   else:
     seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
   if seed2 is None:
-    seed2 = random_ops.random_uniform([], 1, 2**63 - 1, dtype=dtypes.int64)
+    seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
   else:
     with ops.name_scope("seed2") as scope:
       seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py
index 5df2e38c62..a809151e6e 100644
--- a/tensorflow/python/data/util/random_seed_test.py
+++ b/tensorflow/python/data/util/random_seed_test.py
@@ -41,6 +41,7 @@ class RandomSeedTest(test.TestCase):
         # (input_graph_seed, input_op_seed)
         # and output from get_seed:
         # (output_graph_seed, output_op_seed)
+        ((None, None), (0, 0)),
         ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)),
         ((1, 1), (1, 1)),
         ((0, 0), (0, 2**31 - 1)),  # Avoid nondeterministic (0, 0) output
@@ -77,18 +78,6 @@ class RandomSeedTest(test.TestCase):
       self.assertEqual((g_seed, op_seed), toutput, msg=msg)
       random_seed.set_random_seed(None)
 
-  @test_util.run_in_graph_and_eager_modes
-  def testNondeterministicRandomSeed(self):
-    random_seed.set_random_seed(None)
-    op_seeds = []
-    for _ in range(50):
-      g_seed, op_seed = data_random_seed.get_seed(None)
-      g_seed = self.evaluate(g_seed)
-      op_seed = self.evaluate(op_seed)
-      self.assertEqual(0, g_seed)
-      self.assertNotEqual(0, op_seed)
-      op_seeds.append(op_seed)
-    self.assertGreater(len(set(op_seeds)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From b055d78b0edbf117ec5f7f2662d3bb2781ae02b3 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 8 Oct 2018 15:09:57 -0700
Subject: [PATCH 0571/1085] Fix issue with type inference for ops with fixed
 output types

Use the ArgDef::type field when available for propagating
the output types from a given unsupported operator.

PiperOrigin-RevId: 216257741
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc |  7 +++++--
 .../contrib/lite/toco/import_tensorflow_test.cc   | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 133ef79a34..32f22e1ea0 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1151,11 +1151,14 @@ tensorflow::Status ConvertUnsupportedOperator(
     op->output_data_types.push_back(ConvertDataType(output_type));
   } else if (op_def != nullptr) {
     for (const auto& output_arg : op_def->output_arg()) {
-      if (HasAttr(node, output_arg.type_attr())) {
+      if (output_arg.type() != tensorflow::DT_INVALID) {
+        op->output_data_types.push_back(ConvertDataType(output_arg.type()));
+      } else if (HasAttr(node, output_arg.type_attr())) {
         op->output_data_types.push_back(
             ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
       } else {
-        LOG(INFO) << "Op node missing output type attribute: " << node.name();
+        LOG(WARNING) << "Op node missing output type attribute: "
+                     << node.name();
         op->output_data_types.clear();
         break;
       }
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index 8a236d4444..cd9a144b52 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -235,6 +235,21 @@ TEST_P(TypeImportTest, BasicTypeInference) {
 INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
                         ::testing::ValuesIn(UnaryTestTypes()));
 
+TEST(ImportTest, TypeInferenceWithFixedOutputType) {
+  // Create an op that has a fixed output type (bool).
+  Model model;
+  EXPECT_TRUE(ImportNode(BuildNode("IsFinite", {{1, 2}, {2, 3}}), &model).ok());
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+
+  // The static output type should be indicated in the imported op.
+  ASSERT_THAT(op->output_data_types,
+              ::testing::ElementsAre(ArrayDataType::kBool));
+}
+
 TEST(ImportTest, FailedTypeInference) {
   // Create a unary op with no Type ("T") annotation.
   NodeDef node;
-- 
GitLab


From 0b13d0806b061deaec0e96cfdca1ae4509174f89 Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Mon, 8 Oct 2018 15:24:56 -0700
Subject: [PATCH 0572/1085] Simple comment fix in CheckpointInputPipelineHook.

PiperOrigin-RevId: 216260216
---
 tensorflow/python/data/experimental/ops/iterator_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
index 72d7d58f06..5eb2563977 100644
--- a/tensorflow/python/data/experimental/ops/iterator_ops.py
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -198,7 +198,7 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
     # is run *after* this hook. That is troublesome because
     # 1. If a checkpoint exists and this hook restores it, the initializer hook
     #    will override it.
-    # 2. If no checkpoint exists, this hook will try to save an initialized
+    # 2. If no checkpoint exists, this hook will try to save an uninitialized
     #    iterator which will result in an exception.
     #
     # As a temporary fix we enter the following implicit contract between this
-- 
GitLab


From a991acba07ce6c5903ee84e4a72d3d59e22b77fc Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Mon, 8 Oct 2018 15:26:34 -0700
Subject: [PATCH 0573/1085] Internal Change.

PiperOrigin-RevId: 216260437
---
 tensorflow/contrib/__init__.py | 8 --------
 tensorflow/python/__init__.py  | 7 -------
 2 files changed, 15 deletions(-)

diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index e71b0e0ae3..f52a1a7bab 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -21,14 +21,6 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.tools import component_api_helper
-component_api_helper.package_hook(
-    parent_package_str=(
-        "tensorflow.contrib"),
-    child_package_str=(
-        "tensorflow_estimator.contrib.estimator"))
-del component_api_helper
-
 # Add projects here, they will show up under tf.contrib.
 from tensorflow.contrib import autograph
 from tensorflow.contrib import batching
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index 4921ecc43c..a2ab63bb48 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -48,13 +48,6 @@ import numpy as np
 
 from tensorflow.python import pywrap_tensorflow
 
-from tensorflow.python.tools import component_api_helper
-component_api_helper.package_hook(
-    parent_package_str='tensorflow.python',
-    child_package_str=(
-        'tensorflow_estimator.python.estimator'))
-del component_api_helper
-
 # Protocol buffers
 from tensorflow.core.framework.graph_pb2 import *
 from tensorflow.core.framework.node_def_pb2 import *
-- 
GitLab


From eb0f862ba60f41e8d0f06ceb6fc65f7f9905a25a Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 15:27:40 -0700
Subject: [PATCH 0574/1085] Automated rollback of commit
 13b47e6c4f9d7b295948b1057139bf676e394b6f

PiperOrigin-RevId: 216260575
---
 tensorflow/core/kernels/data/iterator_ops.cc  |  4 +++
 .../kernels/data/map_and_batch_dataset_op.cc  |  9 +++----
 .../core/kernels/data/model_dataset_op.cc     | 10 +++----
 .../data/parallel_interleave_dataset_op.cc    | 27 ++++++++-----------
 .../kernels/data/parallel_map_iterator.cc     |  9 +++----
 .../core/kernels/data/prefetch_dataset_op.cc  | 10 +++----
 tensorflow/core/kernels/data/writer_ops.cc    | 12 ++++-----
 7 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 8acd6cc724..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -16,8 +16,10 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
+#include "tensorflow/core/common_runtime/threadpool_device.h"
 #include "tensorflow/core/framework/iterator.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_op_kernel.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
@@ -25,11 +27,13 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/data/optional_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace data {
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 0fb721cd7c..f45a239793 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -445,10 +445,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              std::bind(&Iterator::RunnerThread, this, ctx_copy));
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              std::bind(&Iterator::RunnerThread, this, ctx_copy)));
         }
       }
 
@@ -704,7 +703,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       std::unique_ptr<IteratorBase> input_impl_;
       // Buffer for storing the (intermediate) batch results.
       std::deque<std::shared_ptr<BatchResult>> batch_results_ GUARDED_BY(*mu_);
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc
index 859df57962..9aa505f4f1 100644
--- a/tensorflow/core/kernels/data/model_dataset_op.cc
+++ b/tensorflow/core/kernels/data/model_dataset_op.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -127,10 +126,9 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!optimize_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          optimize_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "optimize_thread");
-          optimize_thread_->Schedule(
-              [this, new_ctx]() { OptimizeThread(new_ctx); });
+          optimize_thread_.reset(ctx->env()->StartThread(
+              {}, "optimize_thread",
+              [this, new_ctx]() { OptimizeThread(new_ctx); }));
         }
         return Status::OK();
       }
@@ -169,7 +167,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel {
       mutex mu_;
       condition_variable cond_var_;
       std::shared_ptr<model::Model> model_;
-      std::unique_ptr<BackgroundWorker> optimize_thread_ GUARDED_BY(mu_);
+      std::unique_ptr<Thread> optimize_thread_ GUARDED_BY(mu_);
       bool cancelled_ GUARDED_BY(mu_) = false;
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
     };
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 9c836b836e..6b6b3d6ab9 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -482,10 +481,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
           worker_threads_.reserve(dataset()->num_threads());
           for (size_t i = 0; i < dataset()->num_threads(); ++i) {
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
           }
         }
         return Status::OK();
@@ -582,10 +580,9 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
             }
             workers_[i].SetInputs(s, std::move(args));
             std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-            worker_threads_.emplace_back(
-                MakeUnique<BackgroundWorker>(ctx->env(), "worker_thread"));
-            worker_threads_.back()->Schedule(
-                [this, new_ctx, i]() { WorkerThread(new_ctx, i); });
+            worker_threads_.emplace_back(ctx->env()->StartThread(
+                {}, "worker_thread",
+                [this, new_ctx, i]() { WorkerThread(new_ctx, i); }));
             if (i < dataset()->cycle_length_) {
               interleave_indices_.push_back(i);
             } else {
@@ -1050,8 +1047,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       // The worker threads. This must be last to ensure the
       // threads have exited before any other members are deallocated.
       // TODO(b/65178177): Avoid allocating additional threads.
-      std::vector<std::unique_ptr<BackgroundWorker>> worker_threads_
-          GUARDED_BY(mu_);
+      std::vector<std::unique_ptr<Thread>> worker_threads_ GUARDED_BY(mu_);
     };
 
     const DatasetBase* const input_;
@@ -1393,10 +1389,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
           std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-          runner_thread_ =
-              MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-          runner_thread_->Schedule(
-              [this, new_ctx]() { RunnerThread(new_ctx); });
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "runner_thread",
+              [this, new_ctx]() { RunnerThread(new_ctx); }));
         }
       }
 
@@ -1650,7 +1645,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       int64 num_calls_ GUARDED_BY(*mu_) = 0;
 
       std::unique_ptr<thread::ThreadPool> thread_pool_;
-      std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
 
       // Identifies whether background activity should be cancelled.
       bool cancelled_ GUARDED_BY(*mu_) = false;
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index e69274e4f2..ebf41925c9 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -181,10 +181,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
-      runner_thread_ =
-          MakeUnique<BackgroundWorker>(ctx->env(), "runner_thread");
-      runner_thread_->Schedule(
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy));
+      runner_thread_.reset(ctx->env()->StartThread(
+          {}, "runner_thread",
+          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
     }
   }
 
@@ -332,7 +331,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
   // Buffer for storing the invocation results.
   std::deque<std::shared_ptr<InvocationResult>> invocation_results_
       GUARDED_BY(*mu_);
-  std::unique_ptr<BackgroundWorker> runner_thread_ GUARDED_BY(*mu_);
+  std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
index e9c38eb8a0..754ed772db 100644
--- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -257,11 +256,10 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     Status EnsurePrefetchThreadStarted(IteratorContext* ctx)
         EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       if (!prefetch_thread_) {
-        prefetch_thread_ =
-            MakeUnique<BackgroundWorker>(ctx->env(), "prefetch_thread");
         std::shared_ptr<IteratorContext> new_ctx(new IteratorContext(*ctx));
-        prefetch_thread_->Schedule(
-            [this, new_ctx]() { PrefetchThread(new_ctx); });
+        prefetch_thread_.reset(ctx->env()->StartThread(
+            {}, "prefetch_thread",
+            [this, new_ctx]() { PrefetchThread(new_ctx); }));
       }
       return Status::OK();
     }
@@ -365,7 +363,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     string prefix_end_;
     PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_);
     std::deque<BufferElement> buffer_ GUARDED_BY(mu_);
-    std::unique_ptr<BackgroundWorker> prefetch_thread_ GUARDED_BY(mu_);
+    std::unique_ptr<Thread> prefetch_thread_ GUARDED_BY(mu_);
     bool cancelled_ GUARDED_BY(mu_) = false;
     bool prefetch_thread_finished_ GUARDED_BY(mu_) = false;
   };
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 7bb2077b62..3f76695bb1 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -29,10 +29,10 @@ class ToTFRecordOp : public AsyncOpKernel {
  public:
   explicit ToTFRecordOp(OpKernelConstruction* ctx)
       : AsyncOpKernel(ctx),
-        background_worker_(
-            ctx->env(),
-            strings::StrCat("to_tf_record_op_", SanitizeThreadSuffix(name()))) {
-  }
+        thread_pool_(new thread::ThreadPool(
+            ctx->env(), ThreadOptions(),
+            strings::StrCat("to_tf_record__op_", SanitizeThreadSuffix(name())),
+            1 /* num_threads */, false /* low_latency_hint */)) {}
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
@@ -50,7 +50,7 @@ class ToTFRecordOp : public AsyncOpKernel {
     // The call to `iterator->GetNext()` may block and depend on an
     // inter-op thread pool thread, so we issue the call from the
     // owned thread pool.
-    background_worker_.Schedule([this, ctx, done]() {
+    thread_pool_->Schedule([this, ctx, done]() {
       string filename;
       OP_REQUIRES_OK_ASYNC(
           ctx, ParseScalarArgument<string>(ctx, "filename", &filename), done);
@@ -97,7 +97,7 @@ class ToTFRecordOp : public AsyncOpKernel {
   }
 
  private:
-  BackgroundWorker background_worker_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("DatasetToTFRecord").Device(DEVICE_CPU),
-- 
GitLab


From cb057ea64032e551027c8f9058a9d28a258c9d6b Mon Sep 17 00:00:00 2001
From: Chris Leary <leary@google.com>
Date: Mon, 8 Oct 2018 15:42:17 -0700
Subject: [PATCH 0575/1085] [XLA] Make overly-specific ShapeUtil predicate a
 little more general.

PiperOrigin-RevId: 216263039
---
 tensorflow/compiler/xla/service/hlo_instruction_test.cc | 3 ++-
 tensorflow/compiler/xla/service/hlo_query.cc            | 2 +-
 tensorflow/compiler/xla/shape_util.cc                   | 5 +++--
 tensorflow/compiler/xla/shape_util.h                    | 5 ++++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
index c1b7c3832b..d93351fe04 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc
@@ -135,7 +135,8 @@ TEST_F(HloInstructionTest, BasicProperties) {
   auto parameter = HloInstruction::CreateParameter(1, r0f32_, "foo");
 
   EXPECT_EQ(HloOpcode::kParameter, parameter->opcode());
-  EXPECT_TRUE(ShapeUtil::IsScalarF32(parameter->shape()));
+  EXPECT_TRUE(ShapeUtil::IsScalarWithElementType(parameter->shape(), F32));
+  EXPECT_FALSE(ShapeUtil::IsScalarWithElementType(parameter->shape(), S32));
   EXPECT_EQ(0, parameter->operand_count());
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc
index 2a07b6fcbc..2d5197be9e 100644
--- a/tensorflow/compiler/xla/service/hlo_query.cc
+++ b/tensorflow/compiler/xla/service/hlo_query.cc
@@ -24,7 +24,7 @@ namespace hlo_query {
 
 bool IsConstantR0F32(HloInstruction* instruction, float* out) {
   if (instruction->opcode() == HloOpcode::kConstant &&
-      ShapeUtil::IsScalarF32(instruction->shape())) {
+      ShapeUtil::IsScalarWithElementType(instruction->shape(), F32)) {
     *out = instruction->literal().Get<float>({});
     return true;
   }
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 7f0201942b..9267de3cfc 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -461,8 +461,9 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   return ShapeUtil::IsArray(shape) && ElementsIn(shape) == 0;
 }
 
-/* static */ bool ShapeUtil::IsScalarF32(const Shape& shape) {
-  return shape.element_type() == F32 && Rank(shape) == 0;
+/* static */ bool ShapeUtil::IsScalarWithElementType(
+    const Shape& shape, PrimitiveType element_type) {
+  return IsScalar(shape) && shape.element_type() == element_type;
 }
 
 namespace {
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index d8bb27beae..73f541d505 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -312,7 +312,10 @@ class ShapeUtil {
   static bool IsEffectiveScalar(const Shape& shape) {
     return IsArray(shape) && TrueRank(shape) == 0;
   }
-  static bool IsScalarF32(const Shape& shape);
+
+  // Returns whether "shape" is a scalar (array) with the given element_type.
+  static bool IsScalarWithElementType(const Shape& shape,
+                                      PrimitiveType element_type);
 
   // Extracts the size of the shape's dimension at dimension number
   // GetDimensionNumber(dimension_number).
-- 
GitLab


From 783627bf63cdfa467e7811f2bf8330555d66f313 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 15:55:18 -0700
Subject: [PATCH 0576/1085] Convert TensorFlow's aws dependency to new third
 party import method.

PiperOrigin-RevId: 216265275
---
 tensorflow/workspace.bzl                   | 14 +++-----------
 third_party/aws/BUILD                      |  1 +
 third_party/{aws.BUILD => aws/BUILD.bazel} |  0
 third_party/aws/workspace.bzl              | 15 +++++++++++++++
 4 files changed, 19 insertions(+), 11 deletions(-)
 create mode 100644 third_party/aws/BUILD
 rename third_party/{aws.BUILD => aws/BUILD.bazel} (100%)
 create mode 100644 third_party/aws/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index adeac62e43..40c226a861 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -20,12 +20,15 @@ load(
     "//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl",
     "def_file_filter_configure",
 )
+load("//third_party/aws:workspace.bzl", aws = "repo")
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 load("//third_party/nasm:workspace.bzl", nasm = "repo")
 
 def initialize_third_party():
+    """ Load third party repositories.  See above load() statements. """
+    aws()
     flatbuffers()
     icu()
     jpeg()
@@ -585,17 +588,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "aws",
-        build_file = clean_dep("//third_party:aws.BUILD"),
-        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
-        strip_prefix = "aws-sdk-cpp-1.3.15",
-        urls = [
-            "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
-            "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
-        ],
-    )
-
     java_import_external(
         name = "junit",
         jar_sha256 = "59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a",
diff --git a/third_party/aws/BUILD b/third_party/aws/BUILD
new file mode 100644
index 0000000000..2f5d02becb
--- /dev/null
+++ b/third_party/aws/BUILD
@@ -0,0 +1 @@
+# Dummy BUILD file to make this directory a package.
diff --git a/third_party/aws.BUILD b/third_party/aws/BUILD.bazel
similarity index 100%
rename from third_party/aws.BUILD
rename to third_party/aws/BUILD.bazel
diff --git a/third_party/aws/workspace.bzl b/third_party/aws/workspace.bzl
new file mode 100644
index 0000000000..c216638154
--- /dev/null
+++ b/third_party/aws/workspace.bzl
@@ -0,0 +1,15 @@
+"""loads the aws library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "aws",
+        urls = [
+            "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+            "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz",
+        ],
+        sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c",
+        strip_prefix = "aws-sdk-cpp-1.3.15",
+        build_file = "//third_party/aws:BUILD.bazel",
+    )
-- 
GitLab


From 46d296b2d03ddbb6f0723d213fdfa9c5226e1e2a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 8 Oct 2018 16:24:49 -0700
Subject: [PATCH 0577/1085] Internal change

PiperOrigin-RevId: 216270385
---
 tensorflow/contrib/lite/build_def.bzl | 40 +++++++++++++++++++++++----
 tensorflow/contrib/lite/testing/BUILD |  4 +--
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 7ef26de69f..b9e933a8b6 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -212,7 +212,8 @@ def json_to_tflite(name, src, out):
 
 # This is the master list of generated examples that will be made into tests. A
 # function called make_XXX_tests() must also appear in generate_examples.py.
-# Disable a test by commenting it out. If you do, add a link to a bug or issue.
+# Disable a test by adding it to the blacklists specified in
+# generated_test_models_failing().
 def generated_test_models():
     return [
         "add",
@@ -291,12 +292,38 @@ def generated_test_models():
         "tile",
         "topk",
         "transpose",
-        #"transpose_conv",   # disabled due to b/111213074
+        "transpose_conv",
         "unpack",
         "where",
         "zeros_like",
     ]
 
+# List of models that fail generated tests for the conversion mode.
+# If you have to disable a test, please add here with a link to the appropriate
+# bug or issue.
+def generated_test_models_failing(conversion_mode):
+    if not conversion_mode:
+        return [
+            "transpose_conv",  # disabled due to b/111213074
+        ]
+
+    if conversion_mode == "toco-flex":
+        # TODO(b/117328698): Fix and enable the known flex failures.
+        return [
+            "arg_min_max",
+            "div",
+            "floor_div",
+            "gather ",
+            "lstm ",
+            "resize_bilinear",
+            "space_to_batch_nd",
+            "split",
+            "transpose",
+            "unpack",
+        ]
+
+    return []
+
 def generated_test_conversion_modes():
     """Returns a list of conversion modes."""
 
@@ -313,10 +340,14 @@ def generated_test_models_all():
     tests = generated_test_models()
     options = []
     for conversion_mode in conversion_modes:
+        failing_tests = generated_test_models_failing(conversion_mode)
         for test in tests:
+            tags = []
+            if test in failing_tests:
+                tags.append("notap")
             if conversion_mode:
                 test += "_%s" % conversion_mode
-            options.append((conversion_mode, test))
+            options.append((conversion_mode, test, tags))
     return options
 
 def gen_zip_test(name, test_name, conversion_mode, **kwargs):
@@ -336,9 +367,6 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs):
         # if conversion_mode == "pb2lite":
         #     toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite"
         flags = "--ignore_toco_errors --run_with_flex"
-        kwargs["tags"].append("skip_already_failing")
-        kwargs["tags"].append("no_oss")
-        kwargs["tags"].append("notap")
 
     gen_zipped_test_file(
         name = "zip_%s" % test_name,
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index f0bfec2338..45baad782a 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -35,7 +35,7 @@ load(
         ":zip_%s" % test_name,
     ],
     shard_count = 20,
-    tags = [
+    tags = tags + [
         "gen_zip_test",
         "no_oss",
         "tflite_not_portable_intentional",
@@ -61,7 +61,7 @@ load(
             "//tensorflow/core:android_tensorflow_test_lib",
         ],
     }),
-) for conversion_mode, test_name in generated_test_models_all()]
+) for conversion_mode, test_name, tags in generated_test_models_all()]
 
 test_suite(
     name = "generated_zip_tests",
-- 
GitLab


From 8815f34385eb28f1cfcb53bebd526c11573f3027 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 8 Oct 2018 16:25:40 -0700
Subject: [PATCH 0578/1085] Avoid calling get_default_graph() during
 tf.enable_eager_execution()

PiperOrigin-RevId: 216270497
---
 tensorflow/python/framework/ops.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 77c2bc930e..140bd098a6 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -5457,8 +5457,7 @@ def enable_eager_execution_internal(config=None,
         "tf.contrib.eager.ASYNC")
   if context.default_execution_mode == context.GRAPH_MODE:
     graph_mode_has_been_used = (
-        _default_session_stack.stack
-        or len(get_default_graph().get_operations()) > 0)  # pylint: disable=g-explicit-length-test
+        _default_graph_stack._global_default_graph is not None) # pylint: disable=protected-access
     if graph_mode_has_been_used:
       raise ValueError(
           "tf.enable_eager_execution must be called at program startup.")
-- 
GitLab


From 49643265c3f1f279a93bd8bc3a126e11e979bc44 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 8 Oct 2018 17:14:47 -0700
Subject: [PATCH 0579/1085] Remove deprecations for some of the endpoints in
 ApiDef files. These changes are made according to
 https://github.com/tensorflow/community/pull/16.

I am keeping a few symbols deprecated not mentioned in the doc:
tf.diag - it seems best to keep it next to tf.linalg.diag, so that the two are easy to compare and decide which one to use. The plan is to rename tf.diag to tf.tensor_diag.
tf.is_nan - similar to tf.is_inf, tf.is_finite, tf.is_numeric_tensor which are all getting deprecated and replaced by symbols in tf.debugging.
tf.string_to_number - other string endpoints in root namespace are getting deprecated: for e.g. tf.substr, tf.string_join.
tf.dequantize - all quantization ops should be under tf.quantize. I probably missed this one.
tf.check_numerics - similar to other debugging ops that are getting moved to tf.debugging.
tf.squared_difference - moved to tf.math namespace and not as popular as some other math ops such as tf.add to justify keeping endpoint in root.
tf.decode_raw - similar to other ops such as tf.decode_csv that are getting moved to tf.io.decode_csv.

PiperOrigin-RevId: 216278010
---
 tensorflow/core/api_def/python_api/api_def_Acos.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Add.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_AsString.pbtxt     | 1 -
 tensorflow/core/api_def/python_api/api_def_Asin.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Atan.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Cos.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Equal.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Exp.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Floor.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_Greater.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt | 1 -
 tensorflow/core/api_def/python_api/api_def_Less.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt    | 1 -
 tensorflow/core/api_def/python_api/api_def_Log.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt        | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt   | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt   | 1 -
 tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt    | 1 -
 tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt      | 1 -
 tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt     | 1 -
 tensorflow/core/api_def/python_api/api_def_Sin.pbtxt          | 1 -
 tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt         | 1 -
 tensorflow/core/api_def/python_api/api_def_Tan.pbtxt          | 1 -
 29 files changed, 29 deletions(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
index 1fd8baf05f..f4d7f498b2 100644
--- a/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Acos.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "acos"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
index f7946652ef..e921f26d1e 100644
--- a/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Acosh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "acosh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
index fb505a91ac..4c6f387ebd 100644
--- a/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Add.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "add"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
index ea65543a76..d51defc376 100644
--- a/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_AsString.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "as_string"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
index eedf4553c6..b13f5c398f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Asin.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "asin"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
index 10c2fb356e..89a3f9da44 100644
--- a/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Asinh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "asinh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
index 03dd5dc848..4403a2379c 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atan.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atan"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
index 85b27bd881..56eed0f0fb 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atan2.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atan2"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
index ee7c0600d6..a8f5e792f0 100644
--- a/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Atanh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "atanh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
index 1af8c0c2c9..db52d25ff2 100644
--- a/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Cos.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "cos"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
index 2de87df40d..74bf573565 100644
--- a/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Cosh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "cosh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
index 78aa1b3bc5..34717e74bc 100644
--- a/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Equal.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
index 70323fe5b4..38a9078d9f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "exp"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
index 9b93caa0b1..14accd2b20 100644
--- a/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Floor.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "floor"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
index 7de60d44c4..7926deaa3b 100644
--- a/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Greater.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "greater"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
index 9c8975c2a9..21bbb1b094 100644
--- a/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_GreaterEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "greater_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
index 055df2922a..0b5f06e99f 100644
--- a/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Less.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "less"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
index d2803ddb69..afc4f2a8c9 100644
--- a/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LessEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "less_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
index 26d2473b9c..ac4a4454c7 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "log"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
index d85b6dccec..5a2d77a417 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "log1p"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
index 80bd98b740..d4e6a7a380 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_and"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
index b2244c44b1..49068738a4 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_not"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
index cf78b52e07..a5133962dc 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalOr.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "logical_or"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
index bcff379b71..130729ece1 100644
--- a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "maximum"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
index 9aae74226a..8aded1f154 100644
--- a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "minimum"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
index f37317854f..07fe3b6af1 100644
--- a/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_NotEqual.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "not_equal"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
index 9c19a1a177..a2b776ee0c 100644
--- a/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Sin.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "sin"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
index 155e58e6d5..38c7c729bf 100644
--- a/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Sinh.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "sinh"
-    deprecated: true
   }
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
index ffa92f5580..20cfac05fd 100644
--- a/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Tan.pbtxt
@@ -5,6 +5,5 @@ op {
   }
   endpoint {
     name: "tan"
-    deprecated: true
   }
 }
-- 
GitLab


From 03d097bc96080981098ffdbaf1b3465e6e153a6a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 17:33:22 -0700
Subject: [PATCH 0580/1085] Consolidate device parameter arguments into a
 shared DeviceInfo struct

PiperOrigin-RevId: 216280197
---
 tensorflow/core/grappler/costs/cost_estimator.h           | 5 +++++
 tensorflow/core/grappler/costs/op_level_cost_estimator.cc | 2 +-
 tensorflow/core/grappler/costs/op_level_cost_estimator.h  | 6 ------
 tensorflow/python/grappler/cluster.i                      | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index e91f0cc9da..569d9da683 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -30,6 +30,11 @@ struct GrapplerItem;
 constexpr int64 kMemoryUnknown = -1ll;
 constexpr int64 kZeroMemory = 0ll;
 
+struct DeviceInfo {
+  double gigaops;     // Billions of operations executed per second.
+  double gb_per_sec;  // Bandwidth to main memory in GB per second.
+};
+
 // Holds the set of things we might want to estimate or measure in Grappler.
 // Always produce execution time. Other fields are optional depending on the
 // estimator being used.
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 71f4d9fd05..f363f2915f 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -372,7 +372,7 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const {
   return costs;
 }
 
-OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
+DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
     const DeviceProperties& device) const {
   double gflops = -1;
   double gb_per_sec = -1;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index a277dfdf65..dd1ee39cb2 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -40,12 +40,6 @@ class OpLevelCostEstimator {
 
   virtual Costs PredictCosts(const OpContext& op_context) const;
 
-  // Basic device performance info, sufficient for roofline estimate.
-  struct DeviceInfo {
-    double gigaops;     // Billions of operations executed per second.
-    double gb_per_sec;  // Bandwidth to main memory in GB per second.
-  };
-
   // Returns basic device performance info.
   virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const;
 
diff --git a/tensorflow/python/grappler/cluster.i b/tensorflow/python/grappler/cluster.i
index 6816e20407..87795ffcfb 100644
--- a/tensorflow/python/grappler/cluster.i
+++ b/tensorflow/python/grappler/cluster.i
@@ -308,7 +308,7 @@ static PyObject* TF_GetSupportedDevices(GCluster cluster, GItem item) {
 
 static double TF_EstimatePerformance(const tensorflow::NamedDevice& device) {
   tensorflow::grappler::OpLevelCostEstimator estimator;
-  tensorflow::grappler::OpLevelCostEstimator::DeviceInfo info =
+  tensorflow::grappler::DeviceInfo info =
       estimator.GetDeviceInfo(device.properties());
   return info.gigaops;
 }
-- 
GitLab


From 4ff7b81514ea1b86295bc74b620e3c1d3e127e6f Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Mon, 8 Oct 2018 17:37:44 -0700
Subject: [PATCH 0581/1085] Fix the seeding for `Dataset.shuffle(...,
 reshuffle_each_iteration=False)`.

Previously, we were passing the first (graph-level) seed for both the
graph-level and op-level seeds when creating a C++ dataset. This
change passes the op-level seed to the appropriate point, and adds a test
for the behavior with graph-but-not-op-level seeds.

PiperOrigin-RevId: 216280641
---
 .../core/kernels/data/shuffle_dataset_op.cc   |  2 +-
 tensorflow/python/data/kernel_tests/BUILD     |  3 ++
 .../kernel_tests/shuffle_dataset_op_test.py   | 35 ++++++++++++++++++-
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 66466d6a36..9f54c381a9 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -485,7 +485,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                      int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed) {}
+          seed2_(seed2) {}
 
     string DebugString() const override {
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index c7295d6e69..671b7ca1bb 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -443,12 +443,15 @@ tf_py_test(
     srcs = ["shuffle_dataset_op_test.py"],
     additional_deps = [
         ":test_base",
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:random_seed",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:iterator_ops",
     ],
diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 347af18576..8694f58a24 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import collections
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.kernel_tests import test_base
@@ -27,11 +28,13 @@ from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-class ShuffleDatasetTest(test_base.DatasetTestBase):
+class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testShuffleDataset(self):
     components = (
@@ -209,5 +212,35 @@ class ShuffleDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
+  @parameterized.named_parameters(
+      ("ReshuffleGraphLevelSeed", True, 38, None),
+      ("ReshuffleOpLevelSeed", True, None, 42),
+      ("ReshuffleGraphAndOpLevelSeed", True, 38, 42),
+      ("NoReshuffleGraphLevelSeed", False, 38, None),
+      ("NoReshuffleOpLevelSeed", False, None, 42),
+      ("NoReshuffleGraphAndOpLevelSeed", False, 38, 42),
+  )
+  def testShuffleSeed(self, reshuffle, graph_level_seed, op_level_seed):
+    results = []
+    for _ in range(2):
+      with ops.Graph().as_default() as g:
+        random_seed.set_random_seed(graph_level_seed)
+        dataset = dataset_ops.Dataset.range(10).shuffle(
+            10, seed=op_level_seed, reshuffle_each_iteration=reshuffle).repeat(
+                3)
+        iterator = dataset.make_one_shot_iterator()
+        next_element = iterator.get_next()
+
+        run_results = []
+        with self.session(graph=g) as sess:
+          for _ in range(30):
+            run_results.append(sess.run(next_element))
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(next_element)
+        results.append(run_results)
+
+    self.assertAllEqual(results[0], results[1])
+
+
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 934fde5b8c60987db36438ab4f70f8a91bce306b Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Mon, 8 Oct 2018 17:40:07 -0700
Subject: [PATCH 0582/1085] Register int64 SUM GPU kernel.

PiperOrigin-RevId: 216280913
---
 tensorflow/core/kernels/reduction_ops_sum.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index 5318d8c133..cf0d0f5c71 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -51,6 +51,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .HostMemory("reduction_indices"),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_int64(REGISTER_GPU_KERNELS);
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
-- 
GitLab


From d58712b7fc8de0e1f87fe2ea5221bc3c85230ed3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 18:12:42 -0700
Subject: [PATCH 0583/1085] Add a tracing::ScopedActivity event to track the
 duration of a Session::Run() call for better xprof tracing. Also annotate
 synchronous op execution with the session-run id (or step_id) as metadata
 leveraging the support introduced in cl/215985561. This should enable
 highlighting the duration of a Session::Run and all the ops that ran in it
 for visualizing latency regressions in the case of CPU inference.

PiperOrigin-RevId: 216284682
---
 tensorflow/core/common_runtime/direct_session.cc |  4 ++++
 tensorflow/core/common_runtime/executor.cc       | 12 ++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 458e133b68..52c1cd2691 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -64,6 +64,7 @@ limitations under the License.
 #include "tensorflow/core/platform/device_tracer.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/tracing.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/env_var.h"
@@ -453,6 +454,9 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options,
                                   CallFrameInterface* call_frame,
                                   ExecutorsAndKeys* executors_and_keys,
                                   RunMetadata* run_metadata) {
+  string session_id_meta = strings::StrCat("SessionRun #id=", step_id, "#");
+  tracing::ScopedActivity activity(session_id_meta);
+
   const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1);
 
   std::unique_ptr<DebuggerStateInterface> debugger_state;
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 40ec1502da..eb69d1991c 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -1771,14 +1771,18 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
             // The OpKernel may create child activities (such as GPU kernel
             // launches), so use a `ScopedAnnotation` to relate these activities
             // in the trace.
-            tracing::ScopedAnnotation activity(op_name,
-                                               op_kernel->type_string());
+            tracing::ScopedAnnotation activity(
+                op_name, strings::StrCat(op_kernel->type_string(),
+                                         "#id=", step_id_, "#"));
             device->Compute(op_kernel, &ctx);
           } else {
             // Use the cheaper `ScopedActivity` to trace just the OpKernel
             // execution.
-            tracing::ScopedActivity activity(op_name, op_kernel->type_string(),
-                                             item.kernel_is_expensive);
+            tracing::ScopedActivity activity(
+                op_name,
+                strings::StrCat(op_kernel->type_string(), "#id=", step_id_,
+                                "#"),
+                item.kernel_is_expensive);
             device->Compute(op_kernel, &ctx);
           }
         } else {
-- 
GitLab


From 3a0434e6ff6bc8c68dd15933e005352f4cdf9a6e Mon Sep 17 00:00:00 2001
From: "Li, Yiqiang" <yiqiang.li@intel.com>
Date: Tue, 9 Oct 2018 09:58:50 +0800
Subject: [PATCH 0584/1085] Fix bug in MklSlice op when allocating output
 tensor.

Wrongly "+1" for output shape, that will cause CopyFrom failure in MklToTf op because of tensor size and shape mismatch.
---
 tensorflow/core/kernels/mkl_slice_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index d63e14adf6..85cabeb92b 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -327,7 +327,7 @@ class MklDnnSliceOp : public OpKernel {
       output_mkl_shape->SetTfLayout(input_mkl_shape.GetDimension(), output_dims,
                                     input_mkl_shape.GetTfDataFormat());
 
-      output_tf_shape.AddDim((output_pd->get_size() / sizeof(T)) + 1);
+      output_tf_shape.AddDim(output_pd->get_size() / sizeof(T));
     } else {
       // If input is not in Mkl layout, then output won't be in Mkl layout.
       output_mkl_shape->SetMklTensor(false);
-- 
GitLab


From 6605eb19bd1ee64d7e58ca982ee560346809e2be Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Mon, 8 Oct 2018 21:17:20 -0700
Subject: [PATCH 0585/1085] Add the temporary logs

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index e2aa45b38a..2abac92e5d 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -137,6 +137,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               current_dir = ".";
               current_pattern_ = io::JoinPath(current_dir, current_pattern_);
             }
+            std::cout << "Input pattern: " << current_pattern_
+                      << "; Current dir: " << current_dir << std::endl;
 
             ret.Update(UpdateIterator(ctx, current_dir, current_pattern_));
             ++current_pattern_index_;
@@ -213,6 +215,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                 .substr(0, eval_pattern.find_first_of("*?[\\"));
 
         FileSystem* fs;
+        Status fs_status = ctx->env()->GetFileSystemForFile(dir, &fs);
+        std::cout << "GetFileSystemForFile status: " << fs_status << std::endl;
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
 
         filepath_queue_.push(PathStatus(dir, true));
@@ -234,6 +238,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           const string& current_dir = current_path.first;
           std::vector<string> children;
           Status s = fs->GetChildren(current_dir, &children);
+          std::cout << "GetChildren status: " << s.ToString()
+                    << "; Children size: " << children.size() << std::endl;
           ret.Update(s);
 
           // If GetChildren() fails, continue the next search.
-- 
GitLab


From 375c109659d2d0e6265447dffdeb460693b3cccf Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Mon, 8 Oct 2018 21:18:36 -0700
Subject: [PATCH 0586/1085] [XLA] Introduce input/output alias config.

- This CL intruduces input/output alias config in HLO module that allows any HLO pass to configure it. Once the alias_config is set, each backend needs to follow the contract during execution time to make sure the input and output are indeed aliased.

- Copy insertion / buffer assignment and alias analysis has been updated to correctly honor the config and avoid any possible liveness interference.

PiperOrigin-RevId: 216299501
---
 tensorflow/compiler/xla/service/BUILD         |  21 ++
 .../compiler/xla/service/buffer_assignment.cc |  34 ++--
 .../compiler/xla/service/buffer_value.h       |   3 +
 .../compiler/xla/service/copy_insertion.cc    |  85 +++++++-
 .../xla/service/copy_insertion_test.cc        | 183 +++++++++++++++++
 tensorflow/compiler/xla/service/hlo.proto     |  29 +++
 .../xla/service/hlo_alias_analysis.cc         |  46 ++++-
 .../xla/service/hlo_alias_analysis_test.cc    | 175 +++++++++++++++++
 .../xla/service/hlo_dataflow_analysis.cc      |   2 +-
 .../service/hlo_input_output_alias_config.cc  | 172 ++++++++++++++++
 .../service/hlo_input_output_alias_config.h   | 101 ++++++++++
 .../hlo_input_output_alias_config_test.cc     | 184 ++++++++++++++++++
 tensorflow/compiler/xla/service/hlo_module.cc |   9 +
 tensorflow/compiler/xla/service/hlo_module.h  |  14 ++
 .../compiler/xla/service/hlo_verifier.cc      |   2 +
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 16 files changed, 1037 insertions(+), 25 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2b292ed053..26ebb88e96 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -294,6 +294,7 @@ cc_library(
     srcs = [
         "dfs_hlo_visitor.cc",
         "hlo_computation.cc",
+        "hlo_input_output_alias_config.cc",
         "hlo_instruction.cc",
         "hlo_instructions.cc",
         "hlo_module.cc",
@@ -308,6 +309,7 @@ cc_library(
         "hlo_clone_context.h",
         "hlo_computation.h",
         "hlo_domain_metadata.h",
+        "hlo_input_output_alias_config.h",
         "hlo_instruction.h",
         "hlo_instructions.h",
         "hlo_module.h",
@@ -1268,6 +1270,25 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_input_output_alias_config_test",
+    srcs = ["hlo_input_output_alias_config_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_dce",
+        ":hlo_memory_scheduler",
+        ":hlo_ordering",
+        ":hlo_parser",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+        "@com_google_absl//absl/algorithm:container",
+    ],
+)
+
 cc_library(
     name = "hlo_memory_scheduler",
     srcs = ["hlo_memory_scheduler.cc"],
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 2c2d1626c2..d5d6a044a8 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice(
 
 void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
                                      int64 size) {
-  VLOG(4) << "Trying to add " << buffer << " to " << this;
+  VLOG(4) << "Trying to add " << buffer << " to allocation #" << index();
   CHECK(assigned_buffers_.count(&buffer) == 0)
       << "LogicalBuffer " << buffer << " already assigned to allocation "
       << index_;
@@ -784,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     }
   }
 
-  if (allow_input_output_aliasing_ && allocation->maybe_live_out()) {
-    const HloComputation* entry_computation =
-        assignment->module_->entry_computation();
-    for (auto param : entry_computation->parameter_instructions()) {
-      for (auto& param_buffer :
-           assignment->points_to_analysis().GetBuffersDefinedByInstruction(
-               param)) {
-        if (assignment->liveness().MayInterfere(*param_buffer, buffer)) {
-          VLOG(4) << "Can't assign: Parameter interference with result";
-          return false;
-        }
-      }
-    }
-  }
-
   // If the buffer is live out of the computation then it should only be
   // assigned a buffer which exactly fits the result to avoid wasting memory
   // (result buffers can have arbitrary lifetimes).
@@ -1434,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets(
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
 // in the same allocation (currently just supports kWhile, kCall, and
-// kConditional).
+// kConditional and input output aliasing).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
     std::vector<ColocatedBufferSet>* colocated_buffer_sets) {
   const TuplePointsToAnalysis& points_to_analysis =
       buffer_liveness.points_to_analysis();
+
+  // Set up colocated buffer set for input and output.
+  module->input_output_alias_config().ForEachAlias(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& param_index) {
+        std::vector<const LogicalBuffer*> colocated_set;
+        AddBufferToColocatedSet(module->entry_computation()->root_instruction(),
+                                output_index, points_to_analysis,
+                                &colocated_set);
+        AddBufferToColocatedSet(
+            module->entry_computation()->parameter_instruction(param_number),
+            param_index, points_to_analysis, &colocated_set);
+        AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
+      });
+
   for (const HloComputation* computation : module->MakeComputationPostOrder()) {
     if (computation->IsFusionComputation()) {
       continue;
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 69b3646356..11d8abc5ba 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -141,6 +141,9 @@ class BufferValue {
   // operator< is required for std::set.
   bool operator<(const BufferValue& other) const { return id_ < other.id_; }
 
+  bool operator==(const BufferValue& other) const { return id_ == other.id_; }
+  bool operator!=(const BufferValue& other) const { return id_ != other.id_; }
+
   virtual string ToString() const = 0;
 
   // TODO(lauj) rename LogicalBufferProto to BufferValueProto.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index f35324aa35..cfe025fdd1 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -40,10 +40,12 @@ namespace {
 
 using absl::StrAppend;
 
-bool IsEntryParameterValue(const HloValue& value) {
+bool IsReadonlyEntryParameterValue(const HloValue& value) {
   const HloComputation* computation = value.defining_instruction()->parent();
   return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
-         computation == computation->parent()->entry_computation();
+         computation == computation->parent()->entry_computation() &&
+         !computation->parent()->input_output_alias_config().ParameterHasAlias(
+             value.defining_instruction()->parameter_number());
 }
 
 bool IsConstantValue(const HloValue& value) {
@@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) {
 }
 
 bool ValueIsReadOnly(const HloValue& value) {
-  return IsConstantValue(value) || IsEntryParameterValue(value);
+  return IsConstantValue(value) || IsReadonlyEntryParameterValue(value);
 }
 
 // Data structure describing the action which should be taken on parts of a
@@ -332,6 +334,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis,
   return Status::OK();
 }
 
+// Conservatively adds copies before root instruction of entry computation and
+// each aliased parameter to resolve interference of aliased input and output
+// buffer. We later rely on the CopyRemover to drop the unnecessary ones.
+Status AddCopiesForAliasedInputOutputs(HloModule* module) {
+  HloComputation* entry = module->entry_computation();
+  HloInstruction* root = entry->root_instruction();
+
+  ShapeTree<bool> output_indices_to_copy(root->shape());
+  std::vector<ShapeTree<HloInstruction*>> copied_parameters;
+  bool has_alias = false;
+  for (auto* param : entry->parameter_instructions()) {
+    bool param_has_alias = false;
+    ShapeTree<bool> param_indices_to_copy(param->shape());
+
+    module->input_output_alias_config().ForEachAlias(
+        [&](const ShapeIndex& output_index, int64 param_number,
+            const ShapeIndex& param_index) {
+          if (param_number == param->parameter_number()) {
+            param_has_alias = true;
+            *(param_indices_to_copy.mutable_element(param_index)) = true;
+            *(output_indices_to_copy.mutable_element(output_index)) = true;
+          }
+        });
+
+    if (!param_has_alias) {
+      continue;
+    }
+
+    has_alias = true;
+    // Store a snapshot of users before DeepCopyInstruction, as
+    // DeepCopyInstruction introduces new users of the instruction.
+    std::vector<HloInstruction*> users = param->users();
+    ShapeTree<HloInstruction*> param_copy_tree(param->shape(),
+                                               /*init_value=*/nullptr);
+    TF_ASSIGN_OR_RETURN(HloInstruction * copied,
+                        entry->DeepCopyInstruction(
+                            param, &param_indices_to_copy, &param_copy_tree));
+    for (HloInstruction* user : users) {
+      TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied));
+    }
+
+    copied_parameters.push_back(param_copy_tree);
+  }
+
+  if (!has_alias) {
+    return Status::OK();
+  }
+
+  // Add copies before root instruction.
+  ShapeTree<HloInstruction*> output_copy_tree(root->shape(),
+                                              /*init_value=*/nullptr);
+
+  TF_ASSIGN_OR_RETURN(HloInstruction * root_copied,
+                      root->parent()->DeepCopyInstruction(
+                          root, &output_indices_to_copy, &output_copy_tree));
+
+  // Add control dependencies between the input/output copies.
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& input_index) -> Status {
+        HloInstruction* from =
+            copied_parameters[param_number].element(input_index);
+        HloInstruction* to = output_copy_tree.element(output_index);
+
+        TF_RET_CHECK(from != nullptr);
+        TF_RET_CHECK(to != nullptr);
+        TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to));
+        return Status::OK();
+      }));
+
+  entry->set_root_instruction(root_copied);
+
+  return Status::OK();
+}
+
 // Removes any control dependencies to or from the given instruction.
 Status StripControlDependenciesFrom(HloInstruction* instruction) {
   while (!instruction->control_successors().empty()) {
@@ -953,6 +1030,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) {
       }
     }
   }
+
+  TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 892d0d7b54..3096206c34 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1351,6 +1351,189 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) {
   EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
 }
 
+TEST_F(CopyInsertionTest, CrossingParameters) {
+  // Test a case where two parameters' dataflow cross with each other while
+  // input and output are aliased with same index:
+  //
+  //  (p0 ,  p1)
+  //   | \   /|
+  //   |  \ / |
+  // alias X  alias
+  //   |  / \ |
+  //   | /   \|
+  //  (p1  ,  p0)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 4);
+}
+
+TEST_F(CopyInsertionTest, ParametersAliasing) {
+  // Test a case where two parameters' dataflow don't interfere with each other
+  // while aliased.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias   alias
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 2);
+}
+
+TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias    |
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 2);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+  builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    Add----+
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto add = builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, negate0, negate1));
+  builder.AddInstruction(HloInstruction::CreateTuple({add, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
 TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
   // Test a while instruction with a body which permutes its tuple parameter
   // elements and applies one operation to one of the elements. The addition of
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index a0eb9e6ddc..82c8fb1904 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -225,6 +225,32 @@ message HloScheduleProto {
   map<int64, InstructionSequence> sequences = 1;
 }
 
+message HloInputOutputAliasProto {
+  // The following proto describes a pair of aliased an input
+  // (described by parameter number and a ShapeIndex of the parameter)
+  // and an output (described by a ShapeIndex of the root
+  // instruction). For example:
+  //
+  // entry = {
+  //  output_shape_index={1},
+  //  parameter_number=0,
+  //  parameter_shape_index={1, 2},
+  // }
+  //
+  // This entry indicates that the first paremter's {1, 2} element is
+  // aliased with the {1} element of the root instruction.
+  message AliasEntryProto {
+    // ShapeIndex of the root hlo.
+    repeated int64 output_shape_index = 1;
+    // Number of the parameter in entry computation.
+    int64 parameter_number = 2;
+    // ShapeIndex of the parameter instruction.
+    repeated int64 parameter_shape_index = 3;
+  }
+
+  repeated AliasEntryProto entries = 1;
+}
+
 // Serialization of HloModule.
 message HloModuleProto {
   string name = 1;
@@ -243,6 +269,9 @@ message HloModuleProto {
 
   // The schedule for this module.
   HloScheduleProto schedule = 7;
+
+  // Describes alias information between inputs and outputs.
+  HloInputOutputAliasProto input_output_alias = 8;
 }
 
 // Serialization of LogicalBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index c3da12e273..cf8e6594cb 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -59,8 +59,9 @@ class BufferValueMap {
   // construction process.
   using BufferNumber = int64;
 
-  explicit BufferValueMap(const HloDataflowAnalysis& dataflow)
-      : dataflow_(dataflow) {
+  explicit BufferValueMap(HloModule* module,
+                          const HloDataflowAnalysis& dataflow)
+      : module_(module), dataflow_(dataflow) {
     buffers_.reserve(dataflow_.values().size());
     value_to_buffer_number_.reserve(dataflow_.values().size());
     for (const HloValue* value : dataflow_.values()) {
@@ -171,6 +172,42 @@ class BufferValueMap {
     return value_to_buffer_number_.at(&value);
   }
 
+  void ComputeInputOutputAliasedBuffers(
+      const HloValue& value, std::vector<BufferNumber>* aliased_buffers) {
+    // Get parameter value from an aliased_input object.
+    const auto get_parameter_value =
+        [this](const std::pair<int64, ShapeIndex>& aliased_input)
+        -> const HloValue& {
+      int64 param_number = aliased_input.first;
+      const ShapeIndex& param_index = aliased_input.second;
+      return dataflow_.GetUniqueValueAt(
+          module_->entry_computation()->parameter_instruction(param_number),
+          param_index);
+    };
+
+    // If the value shows up in a root instruction, alias it with parameter
+    // intruction.
+    for (const HloPosition& pos : value.positions()) {
+      if (pos.instruction == module_->entry_computation()->root_instruction()) {
+        ShapeIndex output_index = pos.index;
+
+        auto aliased_input =
+            module_->input_output_alias_config().GetAliasedParameter(
+                output_index);
+        if (aliased_input) {
+          aliased_buffers->push_back(
+              GetBufferForValue(get_parameter_value(*aliased_input)));
+        }
+      }
+    }
+
+    // If the value is parameter instruction itself, alias it with itself.
+    if (value.instruction()->opcode() == HloOpcode::kParameter &&
+        value.instruction()->parent() == module_->entry_computation()) {
+      aliased_buffers->push_back(GetBufferForValue(value));
+    }
+  }
+
   void ComputeWhileAliasedBuffers(const HloValue& value,
                                   std::vector<BufferNumber>* aliased_buffers) {
     VLOG(3) << "Compute kWhile aliases";
@@ -278,6 +315,7 @@ class BufferValueMap {
       VLOG(2) << "Use of value " << value.ToShortString() << ": " << use;
     }
     std::vector<BufferNumber> aliased_buffers;
+    ComputeInputOutputAliasedBuffers(value, &aliased_buffers);
     ComputeWhileAliasedBuffers(value, &aliased_buffers);
     ComputeConditionalAliasedBuffers(value, &aliased_buffers);
     // Uniquify aliased buffers.
@@ -288,6 +326,8 @@ class BufferValueMap {
     return aliased_buffers;
   }
 
+  HloModule* module_;
+
   // Dataflow analysis used to construct the buffer map.
   const HloDataflowAnalysis& dataflow_;
 
@@ -461,7 +501,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
                                                /*bitcast_defines_value=*/false,
                                                fusion_can_share_buffer));
 
-  BufferValueMap buffer_map(alias_analysis->dataflow_analysis());
+  BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis());
   buffer_map.MergeAliasedBuffers();
 
   // Create a vector of HloBuffers, one for each set of values in the
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
index 0cd0ab36fc..5c8d97b2d1 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
@@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) {
   EXPECT_FALSE(AnyValuesInSameBufferInterfere());
 }
 
+TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) {
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) {
+  // parameter 0 aliased with output 1 and parameter 1 aliased with output 0.
+  //
+  //  (p0 ,  p1)
+  //     \   /
+  //      \ /
+  // alias X
+  //      / \
+  //     /   \
+  //  (p0  ,  p1)
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  // Every Ops in this graph are aliased with each other.
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) {
+  // Test a simple single while instruction can be aliased with input and output
+  // of the computation.
+  //
+  // body((F32[], F32[]) %tuple_param):
+  //   %add = Add(%tuple_param{0}, %tuple_param{1})
+  //   return Tuple(%tuple_param{0}, %add)
+  //
+  // condition((F32[], F32[]) %tuple_param):
+  //   return Constant(false)
+  //
+  // entry:
+  //   %param1 = param1
+  //   %while = While(%param1, body, condition)
+  //   %while_1 = GTE(%while, 0)
+  //   %while_2 = GTE(%while, 1)
+  //   %negate_1 = Negate(%while_1)
+  //   %negate_2 = Negate(%while_2)
+  //   return Tuple(negate_1, negate_2)
+  //
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Element 0 passes transparently through the body.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  auto add = body_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1));
+  auto body_tuple = body_builder.AddInstruction(
+      HloInstruction::CreateTuple({body_element_0, add}));
+  HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build());
+
+  // Condition computation trivially returns a constant "false".
+  auto cond_builder = HloComputation::Builder("condition");
+  auto cond_param = cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
+  HloComputation* condition =
+      module_->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(tuple_shape, condition, body, param));
+  auto while_element_1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0));
+  auto while_element_2 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1));
+  auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_1));
+  auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_2));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_THAT(
+      GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})),
+      UnorderedElementsAre(GetValueDefinedAt(param, {1}),
+                           GetValueDefinedAt(xla_while, /*index=*/{1}),
+                           GetValueDefinedAt(body_param, {1}),
+                           GetValueDefinedAt(cond_param, {1}),
+                           GetValueDefinedAt(add),
+                           GetValueDefinedAt(negate_2)));
+
+  EXPECT_THAT(
+      analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(),
+      UnorderedElementsAre(
+          HloPosition{param, {1}}, HloPosition{xla_while, {1}},
+          HloPosition{while_element_2, {}}, HloPosition{body_param, {1}},
+          HloPosition{body_element_1, {}}, HloPosition{add, {}},
+          HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}},
+          HloPosition{cond_param, {1}}, HloPosition{negate_2, {}}));
+
+  EXPECT_FALSE(AnyValuesInSameBufferInterfere());
+}
+
 TEST_F(HloAliasAnalysisTest, SingleCall) {
   // Test a single call of a subcomputation. The subcomputation adds its two
   // array-shaped parameters.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index c22adcdd8d..f401eac016 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction,
 
 const HloValue& HloDataflowAnalysis::GetValueDefinedAt(
     const HloInstruction* instruction, const ShapeIndex& index) const {
-  CHECK(ValueIsDefinedAt(instruction, index));
+  CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString();
   return GetUniqueValueAt(instruction, index);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
new file mode 100644
index 0000000000..9ad98e5038
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
@@ -0,0 +1,172 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index,
+                                             int64 param_number,
+                                             const ShapeIndex& param_index) {
+  // Output can't be aliased with multiple parameters.
+  TF_RET_CHECK(!alias_.element(output_index));
+  (*alias_.mutable_element(output_index)) =
+      std::make_pair(param_number, param_index);
+  return Status::OK();
+}
+
+HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const {
+  HloInputOutputAliasProto result;
+  alias_.ForEachElement(
+      [&](const ShapeIndex& index,
+          const absl::optional<std::pair<int64, ShapeIndex>>& data) {
+        if (data) {
+          HloInputOutputAliasProto::AliasEntryProto entry;
+          for (int64 i : index) {
+            entry.add_output_shape_index(i);
+          }
+          entry.set_parameter_number(data->first);
+          for (int64 i : data->second) {
+            entry.add_parameter_shape_index(i);
+          }
+          result.add_entries()->Swap(&entry);
+        }
+      });
+  return result;
+}
+
+StatusOr<HloInputOutputAliasConfig> HloInputOutputAliasConfig::CreateFromProto(
+    const HloModule* module, const HloInputOutputAliasProto& proto) {
+  HloInputOutputAliasConfig result(
+      module->entry_computation()->root_instruction()->shape());
+  for (const HloInputOutputAliasProto::AliasEntryProto& entry :
+       proto.entries()) {
+    ShapeIndex output_index(entry.output_shape_index().begin(),
+                            entry.output_shape_index().end());
+
+    int64 param_number = entry.parameter_number();
+    ShapeIndex param_index(entry.parameter_shape_index().begin(),
+                           entry.parameter_shape_index().end());
+    TF_RETURN_IF_ERROR(
+        result.SetUpAlias(output_index, param_number, param_index));
+  }
+
+  return result;
+}
+
+string HloInputOutputAliasConfig::ToString() const {
+  std::vector<string> pieces;
+  pieces.push_back("HloInputOutputAliasConfig");
+
+  ForEachAlias([&](const ShapeIndex& output_index, int64 param_number,
+                   const ShapeIndex& param_index) {
+    pieces.push_back(absl::StrFormat(
+        "  OutputIndex %s is aliased with parameter %lld at %s:",
+        output_index.ToString(), param_number, param_index.ToString()));
+  });
+
+  return absl::StrJoin(pieces, "\n");
+}
+
+bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const {
+  bool output = false;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex&,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number) {
+          output = true;
+        }
+      });
+  return output;
+}
+
+absl::optional<ShapeIndex> HloInputOutputAliasConfig::GetAliasedOutput(
+    int64 param_number, const ShapeIndex& param_index) const {
+  absl::optional<ShapeIndex> output;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number &&
+            alias->second == param_index) {
+          output = output_index;
+        }
+      });
+  return output;
+}
+
+absl::optional<std::pair<int64, ShapeIndex>>
+HloInputOutputAliasConfig::GetAliasedParameter(
+    const ShapeIndex& output_index) const {
+  CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index));
+  return alias_.element(output_index);
+}
+
+void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const {
+  alias_.ForEachElement(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          fn(output_index, aliased->first, aliased->second);
+        }
+      });
+}
+
+Status HloInputOutputAliasConfig::ForEachAliasWithStatus(
+    AliasFnWithStatus fn) const {
+  return alias_.ForEachElementWithStatus(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second));
+        }
+        return Status::OK();
+      });
+}
+
+Status HloInputOutputAliasConfig::Verify(const HloModule& module) const {
+  std::vector<ShapeTree<bool>> param_has_seen;
+  const HloComputation* entry = module.entry_computation();
+  for (int64 i = 0; i < entry->num_parameters(); ++i) {
+    HloInstruction* param = entry->parameter_instruction(i);
+    param_has_seen.emplace_back(param->shape());
+  }
+  return ForEachAliasWithStatus([&](const ShapeIndex& output_index,
+                                    int64 param_number,
+                                    const ShapeIndex& param_index) -> Status {
+    const HloInstruction* root = entry->root_instruction();
+
+    const Shape& param_shape =
+        entry->parameter_instruction(param_number)->shape();
+    const Shape& output_shape = root->shape();
+    TF_RET_CHECK(entry->num_parameters() > param_number);
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index));
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index));
+
+    // Check each param_number and param_index pair only show up once. No
+    // input can be aliased with output buffers.
+    TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false);
+
+    *(param_has_seen[param_number].mutable_element(param_index)) = true;
+
+    return Status::OK();
+  });
+}
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config) {
+  out << config.ToString();
+  return out;
+}
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
new file mode 100644
index 0000000000..02c46f65c8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
@@ -0,0 +1,101 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+
+#include <utility>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+class HloModule;
+
+// This class specifies the alias map from output index to parameter number and
+// parameter index in the entry computation.
+class HloInputOutputAliasConfig {
+ public:
+  HloInputOutputAliasConfig() = default;
+
+  explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {}
+
+  virtual ~HloInputOutputAliasConfig() = default;
+
+  // Sets up alias config from `output_index` to `param_index` at
+  // `param_number`.
+  Status SetUpAlias(const ShapeIndex& output_index, int64 param_number,
+                    const ShapeIndex& param_index);
+
+  // Returns true if the given parameter is aliased with one of the output
+  // buffers.
+  bool ParameterHasAlias(int64 param_number) const;
+
+  // (De)Serializes an HloInputOutoutAliasConfig to/from an
+  // HloInputOutoutAliasProto.
+  HloInputOutputAliasProto ToProto() const;
+
+  static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
+      const HloModule* module, const HloInputOutputAliasProto& proto);
+
+  // Returns the output index that the given parameter and parameter index is
+  // aliased with. A nullopt is returned if there is no output that is aliased
+  // with the parameter number and index.
+  absl::optional<ShapeIndex> GetAliasedOutput(
+      int64 param_number, const ShapeIndex& param_index) const;
+
+  // Returns the number of parameter and index of the parameter buffer that the
+  // given output buffer index is aliased with. A nullopt is returned if there
+  // is no parameter is aliased with the specific output.
+  absl::optional<std::pair<int64, ShapeIndex>> GetAliasedParameter(
+      const ShapeIndex& output_index) const;
+
+  using AliasFn =
+      std::function<void(const ShapeIndex& output_index, int64 param_number,
+                         const ShapeIndex& param_index)>;
+
+  // Iterates through each aliased output and input.
+  void ForEachAlias(AliasFn fn) const;
+
+  using AliasFnWithStatus =
+      std::function<Status(const ShapeIndex& output_index, int64 param_number,
+                           const ShapeIndex& param_index)>;
+
+  // Verifies that the given config is valid for the given module.
+  // Specifically, the config's input and output should be in-bound and size of
+  // the aliased buffers should match.
+  Status Verify(const HloModule& module) const;
+
+  Status ForEachAliasWithStatus(AliasFnWithStatus fn) const;
+
+  string ToString() const;
+
+ private:
+  // A ShapeTree which indicates the list of buffers that's expected to be
+  // aliased. The key on this shape tree represents the output index. The value
+  // is a pair of parameter number and index into the buffer. If the value is
+  // nullopt, it means there is no parameter aliasing for this output.
+  ShapeTree<absl::optional<std::pair<int64, ShapeIndex>>> alias_;
+};
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
new file mode 100644
index 0000000000..3b61ff04e6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+
+#include <memory>
+#include <string>
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_ordering.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+class HloInputOutputAliasConfigTest : public HloTestBase {
+ protected:
+  void expect_aliased(const ShapeIndex& output_index, int64 param_number,
+                      const ShapeIndex& param_index,
+                      const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_TRUE(aliased_output);
+    EXPECT_EQ(aliased_output.value(), output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_TRUE(aliased_param);
+    EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index));
+  }
+
+  void expect_not_aliased(const ShapeIndex& output_index, int64 param_number,
+                          const ShapeIndex& param_index,
+                          const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_FALSE(aliased_output && aliased_output == output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_FALSE(aliased_param && aliased_param->first == param_number &&
+                 aliased_param->second == param_index);
+  }
+};
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                 /*param_index=*/{}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/1,
+                 /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  param = (f32[], f32[]) parameter(0)
+  gte1 = f32[] get-tuple-element(%param), index=0
+  gte2 = f32[] get-tuple-element(%param), index=1
+  ROOT root = (f32[], f32[]) tuple(%gte1, %gte2)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{0}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{1}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                 /*param_index=*/{0}, config);
+
+  expect_aliased(/*output_index=*/{1}, /*param_number=*/0,
+                 /*param_index=*/{1}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.Verify(*module));
+}
+
+TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                     /*param_index=*/{}));
+}
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 93e04eb3db..547f74a0ed 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -73,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal(
       config_.SetDefaultComputationLayout(
           entry_computation_->ComputeProgramShape());
     }
+    input_output_alias_config_ = HloInputOutputAliasConfig(
+        entry_computation_->root_instruction()->shape());
   }
 
   if (uniquify_identifiers) {
@@ -252,6 +254,9 @@ HloModuleProto HloModule::ToProto() const {
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
+
+  *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
+
   return proto;
 }
 
@@ -328,6 +333,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
   }
   TF_RET_CHECK(module->entry_computation_ != nullptr);
 
+  TF_ASSIGN_OR_RETURN(module->input_output_alias_config_,
+                      HloInputOutputAliasConfig::CreateFromProto(
+                          module.get(), proto.input_output_alias()));
+
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
   absl::flat_hash_set<string> computation_names;
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 735804e827..9b9dc3ba9f 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
@@ -212,6 +213,15 @@ class HloModule {
     return result;
   }
 
+  // input_output_alias_config indicates the list of aliased buffers that are
+  // expected from the module.
+  HloInputOutputAliasConfig& input_output_alias_config() {
+    return input_output_alias_config_;
+  }
+  const HloInputOutputAliasConfig& input_output_alias_config() const {
+    return input_output_alias_config_;
+  }
+
   // Returns the number of unique intruction ids given out.  All ids up to
   // this point are guaranteed to be in the range [0..NumUniqueInstructionIds())
   int NumUniqueInstructionIds() const { return next_unique_id_; }
@@ -284,6 +294,10 @@ class HloModule {
   // sequential order of instructions for each non-fusion computation in the
   // module.
   absl::optional<HloSchedule> schedule_;
+
+  // alias_config indicates the alias information of input/output buffers that
+  // are expected from the module.
+  HloInputOutputAliasConfig input_output_alias_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index be3bee5975..2902a11a42 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1220,6 +1220,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(module->schedule().Verify());
   }
 
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module));
+
   return false;
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 73f541d505..51cedce7f0 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -72,7 +72,7 @@ class ShapeIndex {
   void push_back(int64 value) { indices_.push_back(value); }
   void pop_back() { indices_.pop_back(); }
 
-  // push_front is O(n^2), but shapes don't usually have a ton of dimensions.
+  // push_front is O(n), but shapes don't usually have a ton of dimensions.
   void push_front(int64 value) { indices_.insert(indices_.begin(), value); }
 
   using container_type = absl::InlinedVector<int64, 2>;
-- 
GitLab


From a593c6885bec8c545665ec2f25d794777be55ba9 Mon Sep 17 00:00:00 2001
From: Reed Wanderman-Milne <reedwm@google.com>
Date: Mon, 8 Oct 2018 21:23:08 -0700
Subject: [PATCH 0587/1085] Automated rollback of commit
 07df147ab20c4a5329148e5fb5f7f6b187cb73a4

PiperOrigin-RevId: 216299809
---
 tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 7488cedec5..225c0a91e3 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -114,8 +114,7 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT("scoped_allocator",
          new ScopedAllocatorOptimizer(cfg_.scoped_allocator_optimization(),
                                       cfg_.scoped_allocator_opts()));
-  MK_OPT("pin_to_host",
-         new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
+  MK_OPT("small_op", new PinToHostOptimizer(cfg_.pin_to_host_optimization()));
 
   return std::unique_ptr<GraphOptimizer>();
 }
@@ -162,7 +161,7 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.remapping() != RewriterConfig::OFF) {
     optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
   }
-  if (cfg_.pin_to_host_optimization() != RewriterConfig::OFF) {
+  if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
@@ -592,7 +591,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
          cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
          cfg.debug_stripper() == RewriterConfig::ON ||
          cfg.scoped_allocator_optimization() == RewriterConfig::ON ||
-         cfg.pin_to_host_optimization() != RewriterConfig::OFF ||
+         cfg.pin_to_host_optimization() == RewriterConfig::ON ||
          !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 
-- 
GitLab


From d1f0494b89a31298df7743018c0a3fa388ac16a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 22:13:04 -0700
Subject: [PATCH 0588/1085] Add Floor_mod to schema.

PiperOrigin-RevId: 216303340
---
 tensorflow/contrib/lite/builtin_ops.h         |   1 +
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   5 +
 .../contrib/lite/schema/schema_generated.h    | 124 +++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index 7809d114e2..6117cbf9f1 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -120,6 +120,7 @@ typedef enum {
   kTfLiteBuiltinSquare = 92,
   kTfLiteBuiltinZerosLike = 93,
   kTfLiteBuiltinFill = 94,
+  kTfLiteBuiltinFloorMod = 95,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index b092e5ee54..890d9c04bb 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -651,6 +651,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_SQUARE:
     case BuiltinOperator_ZEROS_LIKE:
     case BuiltinOperator_FILL:
+    case BuiltinOperator_FLOOR_MOD:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index f23a0ccb80..c7005eb53e 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -679,6 +679,7 @@ TfLiteStatus AddOpsAndParams(
       case tflite::BuiltinOperator_SQUARE:
       case tflite::BuiltinOperator_ZEROS_LIKE:
       case tflite::BuiltinOperator_FILL:
+      case tflite::BuiltinOperator_FLOOR_MOD:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index cb7a282743..2b36209e5f 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -176,6 +176,7 @@ enum BuiltinOperator : byte {
   SQUARE = 92,
   ZEROS_LIKE = 93,
   FILL = 94,
+  FLOOR_MOD = 95,
 }
 
 // Options for the builtin operators.
@@ -251,6 +252,7 @@ union BuiltinOptions {
   BidirectionalSequenceLSTMOptions,
   BidirectionalSequenceRNNOptions,
   UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -618,6 +620,9 @@ table ZerosLikeOptions {
 table FillOptions {
 }
 
+table FloorModOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index e7b7a59def..3aaa99ec55 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -241,6 +241,9 @@ struct ZerosLikeOptionsT;
 struct FillOptions;
 struct FillOptionsT;
 
+struct FloorModOptions;
+struct FloorModOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -401,11 +404,12 @@ enum BuiltinOperator {
   BuiltinOperator_SQUARE = 92,
   BuiltinOperator_ZEROS_LIKE = 93,
   BuiltinOperator_FILL = 94,
+  BuiltinOperator_FLOOR_MOD = 95,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_FILL
+  BuiltinOperator_MAX = BuiltinOperator_FLOOR_MOD
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -500,7 +504,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] {
     BuiltinOperator_REDUCE_ANY,
     BuiltinOperator_SQUARE,
     BuiltinOperator_ZEROS_LIKE,
-    BuiltinOperator_FILL
+    BuiltinOperator_FILL,
+    BuiltinOperator_FLOOR_MOD
   };
   return values;
 }
@@ -602,6 +607,7 @@ inline const char * const *EnumNamesBuiltinOperator() {
     "SQUARE",
     "ZEROS_LIKE",
     "FILL",
+    "FLOOR_MOD",
     nullptr
   };
   return names;
@@ -685,11 +691,12 @@ enum BuiltinOptions {
   BuiltinOptions_BidirectionalSequenceLSTMOptions = 69,
   BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
   BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
+  BuiltinOptions_FloorModOptions = 72,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_UnidirectionalSequenceLSTMOptions
+  BuiltinOptions_MAX = BuiltinOptions_FloorModOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -762,7 +769,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[72] {
     BuiltinOptions_FillOptions,
     BuiltinOptions_BidirectionalSequenceLSTMOptions,
     BuiltinOptions_BidirectionalSequenceRNNOptions,
-    BuiltinOptions_UnidirectionalSequenceLSTMOptions
+    BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+    BuiltinOptions_FloorModOptions
   };
   return values;
 }
@@ -841,6 +849,7 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "BidirectionalSequenceLSTMOptions",
     "BidirectionalSequenceRNNOptions",
     "UnidirectionalSequenceLSTMOptions",
+    "FloorModOptions",
     nullptr
   };
   return names;
@@ -1139,6 +1148,10 @@ template<> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
 };
 
+template<> struct BuiltinOptionsTraits<FloorModOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1738,6 +1751,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
       reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
   }
+  FloorModOptionsT *AsFloorModOptions() {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<FloorModOptionsT *>(value) : nullptr;
+  }
+  const FloorModOptionsT *AsFloorModOptions() const {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<const FloorModOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -6241,6 +6262,46 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(
 
 flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct FloorModOptionsT : public flatbuffers::NativeTable {
+  typedef FloorModOptions TableType;
+  FloorModOptionsT() {
+  }
+};
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FloorModOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorModOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorModOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
+  flatbuffers::Offset<FloorModOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorModOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FloorModOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -6587,6 +6648,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
     return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
   }
+  const FloorModOptions *builtin_options_as_FloorModOptions() const {
+    return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast<const FloorModOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6902,6 +6966,10 @@ template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_opt
   return builtin_options_as_UnidirectionalSequenceLSTMOptions();
 }
 
+template<> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const {
+  return builtin_options_as_FloorModOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -9286,6 +9354,29 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
       _fbb);
 }
 
+inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new FloorModOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorModOptions> FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFloorModOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFloorModOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -9759,6 +9850,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -10061,6 +10156,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -10351,6 +10450,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptionsT *>(value);
       return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const FloorModOptionsT *>(value);
+      return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10641,6 +10744,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<UnidirectionalSequenceLSTMOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_FloorModOptions: {
+      value = new FloorModOptionsT(*reinterpret_cast<FloorModOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -11003,6 +11110,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<FloorModOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From e27ee15fa45a5f4e43e10ed1fe0eb3a1feb4253a Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Mon, 8 Oct 2018 23:12:08 -0700
Subject: [PATCH 0589/1085] Refactor CalculateOutputSize() from
 VirtualScheduler protected member function to utils; Refactor EstimateSize()
 from memory_optimizer.cc to utils; some small changes for readability
 improvement

PiperOrigin-RevId: 216307257
---
 tensorflow/core/grappler/costs/BUILD          |   1 +
 tensorflow/core/grappler/costs/utils.cc       |  40 ++++++-
 tensorflow/core/grappler/costs/utils.h        |  11 ++
 tensorflow/core/grappler/costs/utils_test.cc  | 112 +++++++++++++-----
 .../core/grappler/costs/virtual_scheduler.cc  |  48 ++------
 .../core/grappler/costs/virtual_scheduler.h   |  22 ++--
 .../grappler/costs/virtual_scheduler_test.cc  |  48 +-------
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../grappler/optimizers/memory_optimizer.cc   |  26 +---
 9 files changed, 161 insertions(+), 148 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f3dc2c2091..46eacd3a06 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -236,6 +236,7 @@ tf_cc_test(
     name = "virtual_scheduler_test",
     srcs = ["virtual_scheduler_test.cc"],
     deps = [
+        ":utils",
         ":virtual_placer",
         ":virtual_scheduler",
         "//tensorflow/cc:cc_ops",
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 5415324b48..2fcadf1de3 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -74,7 +74,8 @@ static std::vector<TensorProto> ExtractTensors(const AttrValue& attr_value) {
       }
       break;
     }
-    default: {}
+    default: {
+    }
   }
   return tensors;
 }
@@ -201,6 +202,43 @@ std::vector<OpInfo::TensorProperties> FindInputFeatures(
   return inputs;
 }
 
+int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
+  int64 size = DataTypeSize(BaseType(prop.dtype()));
+  TensorShapeProto shape = prop.shape();
+
+  // Can't infer the size if the rank is unknown. It has to be at least a
+  // scalar though.
+  if (shape.unknown_rank()) {
+    LOG(WARNING) << "CalculateTensorSize() -- unknown rank";
+    return size;
+  }
+
+  // If one of the dimensions is unknown statically, assume it's at least one.
+  for (int i = 0; i < shape.dim_size(); ++i) {
+    if (shape.dim(i).size() < 0) {
+      shape.mutable_dim(i)->set_size(1);
+      LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i;
+    }
+  }
+
+  int64 num_elems = TensorShape(shape).num_elements();
+  return num_elems * size;
+}
+
+int64 CalculateOutputSize(
+    const std::vector<OpInfo::TensorProperties>& output_properties,
+    const int port_num) {
+  if (port_num < 0) return 4;  // 4B for control dependency.
+
+  if (port_num >= output_properties.size()) {
+    LOG(ERROR) << "CalculateOutputSize() -- port_num: " << port_num
+               << " >= output_properties.size(): " << output_properties.size();
+    return 0;
+  }
+
+  return CalculateTensorSize(output_properties[port_num]);
+}
+
 DeviceProperties GetDeviceInfo(const string& device_str) {
   DeviceProperties unknown;
   unknown.set_type("UNKNOWN");
diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h
index 5fd6717712..ea64e5a41d 100644
--- a/tensorflow/core/grappler/costs/utils.h
+++ b/tensorflow/core/grappler/costs/utils.h
@@ -43,6 +43,17 @@ std::vector<OpInfo::TensorProperties> FindInputFeatures(
     const std::unordered_map<string, const CostGraphDef::Node*>& name_to_cost,
     const std::unordered_map<string, const NodeDef*>& name_to_node);
 
+// Returns the size of tensor (unit: bytes). For tensor shape with unknown rank,
+// it assumes the tensor to be scalar. For any unknown dimension, it assumes
+// size one.
+int64 CalculateTensorSize(const OpInfo::TensorProperties& prop);
+
+// Returns the size of output at port_num (unit: bytes). A special case is
+// port_num -1, which is for control dependency and assumed to be 4 bytes.
+int64 CalculateOutputSize(
+    const std::vector<OpInfo::TensorProperties>& output_properties,
+    int port_num);
+
 // Returns the DeviceProperties of the device on which 'node' runs.
 DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node);
 DeviceProperties GetDeviceInfo(const string& device_str);
diff --git a/tensorflow/core/grappler/costs/utils_test.cc b/tensorflow/core/grappler/costs/utils_test.cc
index baa654f475..db5c11f0fe 100644
--- a/tensorflow/core/grappler/costs/utils_test.cc
+++ b/tensorflow/core/grappler/costs/utils_test.cc
@@ -26,36 +26,42 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-class UtilsTest : public ::testing::Test {
- public:
-  void CreateConstOp(const string& name, std::initializer_list<int64> dims,
-                     NodeDef* node) {
-    Tensor tensor(DT_FLOAT, TensorShape(dims));
-    for (int64 i = 0; i < tensor.NumElements(); ++i) {
-      tensor.flat<float>()(i) = i / 10.0f;
-    }
-    TF_CHECK_OK(NodeDefBuilder(name, "Const")
-                    .Attr("dtype", DT_FLOAT)
-                    .Attr("value", tensor)
-                    .Finalize(node));
-  }
+namespace {
 
-  void CreateConstSizesOp(const string& name, const std::vector<int32>& sizes,
-                          NodeDef* node) {
-    TensorShape shape;
-    shape.AddDim(sizes.size());
-    Tensor tensor(DT_INT32, shape);
-    for (int64 i = 0; i < tensor.NumElements(); ++i) {
-      tensor.flat<int32>()(i) = sizes[i];
-    }
-    TF_CHECK_OK(NodeDefBuilder(name, "Const")
-                    .Attr("dtype", DT_INT32)
-                    .Attr("value", tensor)
-                    .Finalize(node));
-  }
-};
+void CreateConstOp(const string& name, std::initializer_list<int64> dims,
+                   NodeDef* node) {
+  Tensor tensor(DT_FLOAT, TensorShape(dims));
+  for (int64 i = 0; i < tensor.NumElements(); ++i)
+    tensor.flat<float>()(i) = i / 10.0f;
+  TF_CHECK_OK(NodeDefBuilder(name, "Const")
+                  .Attr("dtype", DT_FLOAT)
+                  .Attr("value", tensor)
+                  .Finalize(node));
+}
 
-TEST_F(UtilsTest, ConvOpInfo) {
+void CreateConstSizesOp(const string& name, const std::vector<int32>& sizes,
+                        NodeDef* node) {
+  TensorShape shape;
+  shape.AddDim(sizes.size());
+  Tensor tensor(DT_INT32, shape);
+  for (int64 i = 0; i < tensor.NumElements(); ++i)
+    tensor.flat<int32>()(i) = sizes[i];
+  TF_CHECK_OK(NodeDefBuilder(name, "Const")
+                  .Attr("dtype", DT_INT32)
+                  .Attr("value", tensor)
+                  .Finalize(node));
+}
+
+// Helper method for converting shapes vector to TensorProperty.
+OpInfo::TensorProperties ShapeToTensorProperty(const std::vector<int>& shapes,
+                                               const DataType& data_type) {
+  OpInfo::TensorProperties prop;
+  prop.set_dtype(data_type);
+  for (int shape : shapes) prop.mutable_shape()->add_dim()->set_size(shape);
+  return prop;
+}
+
+TEST(UtilsTest, ConvOpInfo) {
   int batch = 32;
   int rows = 7;
   int cols = 9;
@@ -146,7 +152,7 @@ TEST_F(UtilsTest, ConvOpInfo) {
   }
 }
 
-TEST_F(UtilsTest, TestSkipControlInput) {
+TEST(UtilsTest, TestSkipControlInput) {
   GraphDef graph;
   TF_CHECK_OK(NodeDefBuilder("constant", "Const")
                   .Attr("dtype", DT_INT32)
@@ -172,6 +178,52 @@ TEST_F(UtilsTest, TestSkipControlInput) {
   EXPECT_TRUE(node_found);
 }
 
+TEST(UtilsTest, CalculateTensorSize) {
+  // Test normal usage.
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1,
+            CalculateTensorSize(ShapeToTensorProperty({1}, DT_FLOAT)));
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4,
+            CalculateTensorSize(ShapeToTensorProperty({4, 4}, DT_FLOAT)));
+  EXPECT_EQ(DataTypeSize(DT_HALF) * 10 * 10 * 10,
+            CalculateTensorSize(ShapeToTensorProperty({10, 10, 10}, DT_HALF)));
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 100 * 7 * 8 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT)));
+
+  // Test unknown rank: assumes the tensor to be a scalar.
+  OpInfo::TensorProperties t = ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT);
+  t.mutable_shape()->set_unknown_rank(true);
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1, CalculateTensorSize(t));
+
+  // Test unknown shape: assumes unknown shape (-1) to have size 1.
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 1 * 7 * 8 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT)));
+  EXPECT_EQ(
+      DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99,
+      CalculateTensorSize(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT)));
+}
+
+TEST(UtilsTest, CalculateOutputSize) {
+  // Create a set of tensor properties.
+  std::vector<OpInfo::TensorProperties> output = {
+      ShapeToTensorProperty({4, 4}, DT_FLOAT),          // 0
+      ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT)  // 1
+  };
+
+  // Test valid outputs.
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 4 * 4, CalculateOutputSize(output, 0));
+  EXPECT_EQ(DataTypeSize(DT_FLOAT) * 1 * 7 * 1 * 99,
+            CalculateOutputSize(output, 1));
+
+  // port_num -1 is for control dependency: hard coded 4B.
+  EXPECT_EQ(4, CalculateOutputSize(output, -1));
+
+  // Invalid port_num (though it may be an error) shall yield zero
+  // output size.
+  EXPECT_EQ(0, CalculateOutputSize(output, 2));
+}
+
 // Class for testing TensorSizeHistogram.
 class TestTensorSizeHistogram : public TensorSizeHistogram {
  public:
@@ -285,5 +337,7 @@ TEST(DeviceClassTest, GetDeviceClassForNonChannelDevice) {
   EXPECT_EQ("//GPU", GetDeviceClassForNonChannelDevice("/device:GPU:7"));
 }
 
+}  // namespace
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 037a823096..5b93fb128f 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -473,6 +473,7 @@ Status VirtualScheduler::Init() {
     VLOG(1) << "Some feed nodes were not consumed by the fetch fanin: "
             << str_util::Join(feed_nodes, ",");
   }
+
   initialized_ = true;
   return Status::OK();
 }
@@ -695,38 +696,6 @@ NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
   return it->second;
 }
 
-int64 VirtualScheduler::CalculateOutputSize(
-    const std::vector<OpInfo::TensorProperties>& output_properties,
-    const int port_num) const {
-  if (port_num < 0) {
-    return 4;  // 4B for control dependency.
-  }
-
-  if (port_num >= output_properties.size()) {
-    VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- "
-            << "port_num: " << port_num
-            << " >= output_properties.size(): " << output_properties.size();
-    return 0;
-  }
-
-  const auto& output = output_properties[port_num];
-  int64 output_size = DataTypeSize(BaseType(output.dtype()));
-
-  for (const auto& dim : output.shape().dim()) {
-    auto dim_size = dim.size();
-    if (dim_size < 0) {
-      // Zero output size if there's any unknown dim.
-      output_size = 0;
-      VLOG(3) << "VirtualScheduler::CalculateOutputSize() -- "
-              << "unknown dim: " << output_size;
-      break;
-    }
-    output_size *= dim_size;
-  }
-
-  return output_size;
-}
-
 Costs& VirtualScheduler::FindOrCreateZero(const string& op_name,
                                           std::map<string, Costs>* op_cost) {
   auto it = op_cost->find(op_name);
@@ -744,7 +713,10 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
   const NodeDef* node = ready_nodes_->GetCurrNode();
   const string& op_name = node->op();
 
-  // Also keep track of op counts and times per op (with their shapes).
+  auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
+  op_cost = CombineCosts(op_cost, node_costs);
+
+  // Also keep track of op counts and costs per op (with their shapes).
   OpContext op_context = GetCurrNode();
   string node_description = GetOpDescription(op_context.op_info);
   op_counts_[node_description] += 1;
@@ -752,9 +724,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
       std::make_pair(node_costs.execution_time.asMicroSeconds().count(),
                      !node_costs.inaccurate);
 
-  auto& op_cost = FindOrCreateZero(op_name, &op_to_cost_);
-  op_cost = CombineCosts(op_cost, node_costs);
-
   // Update node and device states.
   auto& node_state = node_map_[node];
   auto& device = device_[node_state.device_name];
@@ -795,7 +764,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
           << ", scheduled: " << node_state.time_scheduled.count()
           << ", finished: " << node_state.time_finished.count();
 
-  // Increment num_inputs_ready of the output nodes
+  // Increment num_inputs_ready of the output nodes and maybe add to ready nodes
   for (const auto& port_num_output_pair : node_state.outputs) {
     for (auto* output_node : port_num_output_pair.second) {
       auto& output_state = node_map_[output_node];
@@ -812,7 +781,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
     }
   }
 
-  // Increment num_outputs_executed of the input nodes.
+  // Increment num_outputs_executed of the input nodes and maybe update memory.
   for (const auto& input_port : node_state.inputs) {
     auto* input = input_port.first;
     auto port = input_port.second;
@@ -841,7 +810,6 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
     }
   }
 
-  // Remove the current node; assume FIFO.
   ready_nodes_->RemoveCurrNode();
 
   return !ready_nodes_->Empty();
@@ -1007,7 +975,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
     return Summary();
   }
 
-  // Fill RunMetadata.
+  // Fill RunMetadata's step_stats and partition_graphs fields.
   StepStats* stepstats = metadata->mutable_step_stats();
   for (const auto& device : device_) {
     GraphDef* device_partition_graph = metadata->add_partition_graphs();
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 0e66e8a463..bead84af29 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -107,10 +107,10 @@ struct DeviceState {
       mem_usage_snapshot_at_peak;
 
   Costs device_costs;
-  std::map<string, Costs> op_to_cost;    // Per-op cost.
-  std::map<string, int64> op_to_memory;  // Per-op memory usage at peak usage.
-  int64 memory_usage;
-  int64 max_memory_usage;
+  std::map<string, Costs> op_to_cost;  // Per-op cost.
+
+  int64 memory_usage;      // Current temporary memory usage
+  int64 max_memory_usage;  // Max temporary memory usage
 
   DeviceState() {
     device_costs = Costs::ZeroCosts();
@@ -283,13 +283,6 @@ class VirtualScheduler {
     return &node_map_;
   }
 
- protected:
-  // Returns the size of output at port_num (unit: bytes). A special case is
-  // port_num -1, which is for control dependency and assumed to be 4 bytes.
-  int64 CalculateOutputSize(
-      const std::vector<OpInfo::TensorProperties>& output_properties,
-      const int port_num) const;
-
  private:
   // Constants.
   const string kAttrInputSrc = "input_source_";
@@ -321,8 +314,11 @@ class VirtualScheduler {
   std::vector<std::unique_ptr<NodeDef>> additional_nodes_;
 
   // Stats:
-  std::map<string, int> op_counts_;  // Op counts with key with input shape.
-  // Individual op costs (with input shapes).
+  // Op counts with key with input shape.
+  // Example key: "[Op=AssignSub, input_shapes=[[7,1,160,160][7,1,160,160]]"
+  std::map<string, int> op_counts_;
+  // Individual op costs with key with input shape.
+  // Integer field for execution time in micro seconds.
   // Boolean field for whether the cost is accurate.
   std::map<string, std::pair<int, bool>> op_costs_;
 
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 80889afc86..99272dd7e9 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -19,12 +19,14 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
+#include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/costs/virtual_placer.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
 namespace grappler {
+
 // Class for testing virtual scheduler.
 class TestVirtualScheduler : public VirtualScheduler {
  public:
@@ -33,7 +35,6 @@ class TestVirtualScheduler : public VirtualScheduler {
       : VirtualScheduler(grappler_item, use_static_shapes, cluster,
                          &ready_node_manager_) {}
 
-  FRIEND_TEST(VirtualSchedulerTest, CalculateOutputSize);
   FRIEND_TEST(VirtualSchedulerTest, MemoryUsage);
   FRIEND_TEST(VirtualSchedulerTest, ControlDependency);
   FRIEND_TEST(VirtualSchedulerTest, ComplexDependency);
@@ -1034,17 +1035,6 @@ versions {
     }
   }
 
-  // Helper method for converting shape vector to TensorProperty.
-  OpInfo::TensorProperties ShapeToTensorProperty(
-      const std::vector<int> shape, const DataType& data_type) const {
-    OpInfo::TensorProperties tensor_property;
-    tensor_property.set_dtype(data_type);
-    for (const auto& x : shape) {
-      tensor_property.mutable_shape()->add_dim()->set_size(x);
-    }
-    return tensor_property;
-  }
-
   // SetUp() inits cluster_ and placer_.
   std::unique_ptr<VirtualCluster> cluster_;
   std::unique_ptr<VirtualPlacer> placer_;
@@ -1729,38 +1719,6 @@ TEST_F(VirtualSchedulerTest, InitAndBasicScheduling) {
   EXPECT_EQ(2, ops_executed["c1"].op_info.inputs_size());
 }
 
-TEST_F(VirtualSchedulerTest, CalculateOutputSize) {
-  // Init.
-  CreateGrapplerItemWithAddN();
-  InitScheduler();
-
-  // Create a set of tensor properties.
-  std::vector<OpInfo::TensorProperties> output;
-  output.push_back(ShapeToTensorProperty({4, 4}, DT_FLOAT));           // 0
-  output.push_back(ShapeToTensorProperty({1}, DT_FLOAT));              // 1
-  output.push_back(ShapeToTensorProperty({10, 10, 10}, DT_HALF));      // 2
-  output.push_back(ShapeToTensorProperty({100, 7, 8, 99}, DT_FLOAT));  // 3
-  output.push_back(ShapeToTensorProperty({-1, 7, 8, 99}, DT_FLOAT));   // 4
-  output.push_back(ShapeToTensorProperty({-1, 7, -1, 99}, DT_FLOAT));  // 4
-
-  // port_num -1 is for control dependency: hard coded 4B.
-  EXPECT_EQ(4, scheduler_->CalculateOutputSize(output, -1));
-
-  // Test valid outputs.
-  EXPECT_EQ(4 * 4 * 4, scheduler_->CalculateOutputSize(output, 0));
-  EXPECT_EQ(4 * 1, scheduler_->CalculateOutputSize(output, 1));
-  EXPECT_EQ(2 * 10 * 10 * 10, scheduler_->CalculateOutputSize(output, 2));
-  EXPECT_EQ(4 * 100 * 7 * 8 * 99, scheduler_->CalculateOutputSize(output, 3));
-
-  // Any unknown shape (-1) shall yield zero output size.
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 4));
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 5));
-
-  // Invalid port_num (though it may be an error) shall yield zero
-  // output size.
-  EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 6));
-}
-
 TEST_F(VirtualSchedulerTest, MemoryUsage) {
   // Init.
   CreateGrapplerItemWithAddN();
@@ -2041,7 +1999,7 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
     for (const auto& output_property : output_properties_) {
       output_properties.push_back(output_property);
     }
-    return scheduler_->CalculateOutputSize(output_properties, 0);
+    return CalculateOutputSize(output_properties, 0);
   };
 
   // Validate transfer size.
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index c708f84948..e898377ded 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -423,6 +423,7 @@ cc_library(
         "//tensorflow/core/grappler/clusters:virtual_cluster",
         "//tensorflow/core/grappler/costs:graph_memory",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/costs:utils",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler/utils:traversal",
     ],
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index c775a26914..73f0977242 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/clusters/virtual_cluster.h"
 #include "tensorflow/core/grappler/costs/graph_memory.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
+#include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/graph_view.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -43,6 +44,8 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+
 // Prefix added to nodes which are recomputed.
 const char* kRecomputedNodePrefix = "Recomputed";
 const char* kRecomputeTriggerNodePrefix = "RecomputeTrigger";
@@ -744,25 +747,6 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap,
   return Status::OK();
 }
 
-static int64 EstimateSize(const OpInfo::TensorProperties& t) {
-  DataType dtype = t.dtype();
-  int64 size = DataTypeSize(dtype);
-  TensorShapeProto shape = t.shape();
-  if (shape.unknown_rank()) {
-    // Can't infer the size if the rank is unknown. It has to be at least a
-    // scalar though.
-    return size;
-  }
-  // If one of the dimensions is unknown statically, assume it's at least one.
-  for (int i = 0; i < shape.dim_size(); ++i) {
-    if (shape.dim(i).size() < 0) {
-      shape.mutable_dim(i)->set_size(1);
-    }
-  }
-  int64 num_elems = TensorShape(shape).num_elements();
-  return num_elems * size;
-}
-
 struct SwapInfo {
   std::vector<int> inputs_to_swap;
   Costs::NanoSeconds time_to_swap = 0;
@@ -1149,7 +1133,7 @@ bool SwappingPass(RewriterConfig::MemOptType optimization_level,
     int64 bytes_to_swap = 0;
     for (int64 input_id : swap_info.inputs_to_swap) {
       const OpInfo::TensorProperties& t = props[input_id];
-      bytes_to_swap += EstimateSize(t);
+      bytes_to_swap += CalculateTensorSize(t);
     }
     // Let's assume we're going to swap over PCIe running at 16 GBps.
     swap_info.time_to_swap = bytes_to_swap / 16;
@@ -1299,6 +1283,8 @@ Status RelaxAllocatorConstraints(GraphDef* optimized_graph) {
   return Status::OK();
 }
 
+}  // namespace
+
 Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* optimized_graph) {
   *optimized_graph = item.graph;
-- 
GitLab


From 129bb5e845ccb2ab6339e85d39545800dac6ca33 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 8 Oct 2018 23:42:02 -0700
Subject: [PATCH 0590/1085] Automated rollback of commit
 5f308cb408eb46ec9af0546be6b9ae1d5166b185

PiperOrigin-RevId: 216309111
---
 tensorflow/core/grappler/op_types.cc          |  22 +--
 .../optimizers/pin_to_host_optimizer.cc       | 162 ++++++------------
 .../optimizers/pin_to_host_optimizer.h        |   4 +-
 .../optimizers/pin_to_host_optimizer_test.cc  |  76 +++-----
 4 files changed, 85 insertions(+), 179 deletions(-)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index cbf5c8e038..1b5a215987 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -102,19 +102,15 @@ bool IsConjugateTranspose(const NodeDef& node) {
 }
 
 bool IsControlFlow(const NodeDef& node) {
-  // TODO(williamchan): Add a microbenchmark to compare FlatSet vs. iterative
-  // string comparison.
-  static const gtl::FlatSet<string>* const kControFlowOps =
-      CHECK_NOTNULL((new gtl::FlatSet<string>{
-          "ControlTrigger",
-          "Enter",
-          "Exit",
-          "LoopCond",
-          "Merge",
-          "NextIteration",
-          "Switch",
-      }));
-  return kControFlowOps->count(node.op()) > 0;
+  // clang-format off
+  return node.op() == "ControlTrigger" ||
+         node.op() == "Enter" ||
+         node.op() == "Exit" ||
+         node.op() == "LoopCond" ||
+         node.op() == "Merge" ||
+         node.op() == "NextIteration" ||
+         node.op() == "Switch";
+  // clang-format on
 }
 
 bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 29a3b2b74c..8ed4271fa4 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -25,29 +25,16 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace grappler {
-
 namespace internal {
 
-namespace {
 // TODO(williamchan): Change this constant to be something smarter, maybe
 // dynamically determined.
 constexpr int64 kTensorMaxSize = 64;
 
-struct OpDevicePortHasher {
-  std::size_t operator()(const std::tuple<string, string, int>& x) const {
-    uint64 code = Hash64Combine(Hash64(std::get<0>(x)), Hash64(std::get<1>(x)));
-
-    return Hash64Combine(code, hash<int>()(std::get<2>(x)));
-  }
-};
-using OpDevicePortOnHostMap =
-    gtl::FlatMap<std::tuple<string, string, int>, bool, OpDevicePortHasher>;
-
 // All the nodes that should be blacklisted and not swapped.
 bool IsBlacklisted(const NodeDef& node) {
   return
@@ -95,10 +82,10 @@ Status TryFindKernelDef(const std::vector<DeviceType>& devices,
 
 // Checks if a node's output port is host friendly.
 // Roughly this means checking if the output port is on Host memory.
-Status IsNodeOutputPortHostFriendly(
-    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
-    int port_id, OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
-    bool* is_candidate) {
+Status IsNodeOutputPortHostFriendly(const GraphView& graph,
+                                    GraphProperties* properties,
+                                    const NodeDef& node, int port_id,
+                                    bool* is_candidate) {
   *is_candidate = false;
 
   // Make sure we are not a blacklisted op.
@@ -130,8 +117,7 @@ Status IsNodeOutputPortHostFriendly(
     for (const auto& fanin : graph.GetFanins(node, false)) {
       bool fanin_candidate = false;
       TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-          graph, properties, *fanin.node, fanin.port_id,
-          op_device_outport_pinned_to_host_cache, &fanin_candidate));
+          graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
       if (!fanin_candidate) {
         return Status::OK();
       }
@@ -146,22 +132,11 @@ Status IsNodeOutputPortHostFriendly(
     return Status::OK();
   }
 
-  // Check `op_device_outport_pinned_to_host_cache` for our
-  // {op, device, port_id} combo to see if the arg is pinned on Host.
-  const std::tuple<string, string, int> cache_key(node.op(), node.device(),
-                                                  port_id);
-  auto it = op_device_outport_pinned_to_host_cache->find(cache_key);
-  if (it != op_device_outport_pinned_to_host_cache->end()) {
-    *is_candidate = it->second;
-    return Status::OK();
-  }
-
   // Check if op's output port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -171,7 +146,6 @@ Status IsNodeOutputPortHostFriendly(
     LOG(WARNING) << "Invalid port: " << port_id << "!\n"
                  << node.DebugString() << "\n"
                  << op->DebugString();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -181,7 +155,6 @@ Status IsNodeOutputPortHostFriendly(
                        &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
-    op_device_outport_pinned_to_host_cache->emplace(cache_key, false);
     return Status::OK();
   }
 
@@ -193,35 +166,22 @@ Status IsNodeOutputPortHostFriendly(
     }
   }
 
-  op_device_outport_pinned_to_host_cache->emplace(cache_key, *is_candidate);
-
   return Status::OK();
 }
 
 // Checks if a node's input port is Host friendly.
 // Roughly this means checking if the input port is on Host memory.
-bool IsNodeInputPortHostFriendly(
-    const NodeDef& node, int port_id,
-    OpDevicePortOnHostMap* op_device_inport_pinned_to_host_cache) {
+bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) {
   // If node is on Host, assume its inputs are Host friendly.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     return true;
   }
 
-  // Check `op_device_inport_pinned_to_host_cache` for our
-  // {op, device, port_id} combo to see if the arg is pinned on Host.
-  std::tuple<string, string, int> cache_key(node.op(), node.device(), port_id);
-  auto it = op_device_inport_pinned_to_host_cache->find(cache_key);
-  if (it != op_device_inport_pinned_to_host_cache->end()) {
-    return it->second;
-  }
-
   // Check if op's input port is pinned to HostMemory.
   const OpDef* op = nullptr;
   Status s = OpRegistry::Global()->LookUpOpDef(node.op(), &op);
   if (!s.ok()) {
     LOG(WARNING) << "Could not find OpDef for : " << node.op();
-    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
   const int input_arg_id = OpInputPortIdToArgId(node, *op, port_id);
@@ -232,20 +192,16 @@ bool IsNodeInputPortHostFriendly(
       {node.device().c_str(), DEVICE_GPU, DEVICE_CPU}, node, &kernel);
   if (!s.ok()) {
     LOG(INFO) << "Could not find KernelDef for: " << node.op();
-    op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
     return false;
   }
 
   // Check if the input_arg is pinned to Host.
   for (const string& host_memory_arg : kernel->host_memory_arg()) {
     if (op->input_arg(input_arg_id).name() == host_memory_arg) {
-      op_device_inport_pinned_to_host_cache->emplace(cache_key, true);
       return true;
     }
   }
 
-  op_device_inport_pinned_to_host_cache->emplace(cache_key, false);
-
   return false;
 }
 
@@ -255,29 +211,38 @@ bool IsNodeInputPortHostFriendly(
 // 2] Check if node can run on Host.
 // 3] Check all input/outputs are Host "friendly" (atm, friendly means small,
 //    ints, and pinned to Host).
-Status IsNodeHostCandidate(
-    const GraphView& graph, GraphProperties* properties, const NodeDef& node,
-    OpDevicePortOnHostMap* op_device_outport_pinned_to_host_cache,
-    bool* is_candidate) {
+Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties,
+                           const NodeDef& node, bool* is_candidate) {
   *is_candidate = false;
 
-  // Skip these node types.
-  if (IsBlacklisted(node)) {
-    return Status::OK();
-  }
-
   // Check if node already on CPU.
   if (str_util::StrContains(node.device(), DEVICE_CPU)) {
     *is_candidate = true;
     return Status::OK();
   }
 
+  // Skip these node types.
+  if (IsBlacklisted(node)) {
+    return Status::OK();
+  }
+
   // Check the node can be run on CPU.
   Status s = TryFindKernelDef({DEVICE_CPU}, node, nullptr);
   if (!s.ok()) {
     return Status::OK();
   }
 
+  // Check all inputs are Host friendly.
+  for (const GraphView::OutputPort& fanin :
+       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
+    bool fanin_candidate = false;
+    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
+        graph, properties, *fanin.node, fanin.port_id, &fanin_candidate));
+    if (!fanin_candidate) {
+      return Status::OK();
+    }
+  }
+
   // Check all outputs are Host friendly.
   if (!properties->has_properties()) {
     // This is an expensive call, call it lazily.
@@ -290,42 +255,16 @@ Status IsNodeHostCandidate(
     }
   }
 
-  // Check all inputs are Host friendly.
-  for (const GraphView::OutputPort& fanin :
-       graph.GetFanins(node, /*include_controlling_nodes=*/false)) {
-    bool fanin_candidate = false;
-    TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-        graph, properties, *fanin.node, fanin.port_id,
-        op_device_outport_pinned_to_host_cache, &fanin_candidate));
-    if (!fanin_candidate) {
-      return Status::OK();
-    }
-  }
-
   *is_candidate = true;
   return Status::OK();
 }
 
-bool IsTPUGraphDef(const GraphDef& def) {
-  for (const auto& node : def.node()) {
-    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
-        node.op() == "TPUPartitionedCall") {
-      return true;
-    }
-  }
-  return false;
-}
-}  // end namespace
-
-// Tries to swap `device` to a Host device from `devices`. Returns true iff
-// there was a swap.
-bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, string* device) {
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device) {
   // Force this node onto the CPU.
-  if (device->empty() && has_device_cpu) {
-    *device = "/device:CPU:0";
-    return true;
-  } else if (str_util::StrContains(*device, DEVICE_GPU)) {
+  if (device.empty() && has_device_cpu) {
+    return "/device:CPU:0";
+  } else if (str_util::StrContains(device, DEVICE_GPU)) {
     // Sometimes the cluster can have:
     //   devices = {"/device:CPU:0", "/device:XLA_GPU:0"}
     // and we need to handle them properly.
@@ -333,19 +272,27 @@ bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
          {std::pair<string, string>("GPU", "CPU:0"),
           std::pair<string, string>("/device", "/device:CPU:0")}) {
       const string device_host =
-          strings::StrCat(device->substr(0, device->rfind(device_match.first)),
+          strings::StrCat(device.substr(0, device.rfind(device_match.first)),
                           device_match.second);
       if (devices.find(device_host) != devices.end()) {
-        *device = device_host;
-        return true;
+        return device_host;
       }
     }
   }
 
-  // We couldn't find an appropriate Host device, return false.
-  return false;
+  // We couldn't find an appropriate Host device, return original device.
+  return device;
 }
 
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (const auto& node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUExecute" ||
+        node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
 }  // end namespace internal
 
 Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
@@ -377,26 +324,20 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // All the Const nodes, and their original devices in topological order.
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
-  // Cache to map {op, device, port} -> bool on whether it is pinned to host.
-  internal::OpDevicePortOnHostMap op_device_outport_pinned_to_host_cache;
-  internal::OpDevicePortOnHostMap op_device_inport_pinned_to_host_cache;
-
   for (auto& node : *optimized_graph->mutable_node()) {
     bool is_candidate = false;
-    TF_RETURN_IF_ERROR(internal::IsNodeHostCandidate(
-        graph, &properties, node, &op_device_outport_pinned_to_host_cache,
-        &is_candidate));
+    TF_RETURN_IF_ERROR(
+        internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
     if (!is_candidate) {
       continue;
     }
 
-    const string original_device = node.device();
-    const bool swapped = internal::TrySwapToHostDevice(devices, has_device_cpu,
-                                                       node.mutable_device());
-    // Keep track of all Const nodes that we swapped.
-    if (swapped && IsConstant(node)) {
-      const_nodes.emplace_back(&node, original_device);
+    if (IsConstant(node)) {
+      const_nodes.emplace_back(&node, node.device());
     }
+    // Try and swap the device to Host.
+    node.set_device(
+        internal::TryFindHostDevice(devices, has_device_cpu, node.device()));
   }
 
   // Traverse all `const_nodes`, and map them back to GPU greedily.
@@ -408,9 +349,8 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     // this node back onto the original device.
     for (const GraphView::InputPort& fanout : graph.GetFanouts(*node, false)) {
       // The consumer is not Host friendly, swap it back to the original device.
-      if (!internal::IsNodeInputPortHostFriendly(
-              *fanout.node, fanout.port_id,
-              &op_device_inport_pinned_to_host_cache)) {
+      if (!internal::IsNodeInputPortHostFriendly(*fanout.node,
+                                                 fanout.port_id)) {
         node->set_device(device);
         break;
       }
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
index bed4a9ef95..d557a03463 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.h
@@ -26,8 +26,8 @@ namespace tensorflow {
 namespace grappler {
 namespace internal {
 // Try and find an appropriate Host device in `devices` given `device`.
-bool TrySwapToHostDevice(const gtl::FlatSet<string>& devices,
-                         bool has_device_cpu, string* device);
+string TryFindHostDevice(const gtl::FlatSet<string>& devices,
+                         bool has_device_cpu, const string& device);
 }  // end namespace internal
 
 // Optimize TensorFlow ops that should be swapped into the CPU to avoid
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
index 9bb030b220..7c64529441 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc
@@ -28,60 +28,30 @@ namespace {
 
 class PinToHostOptimizerTest : public GrapplerTest {};
 
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceNoDevices) {
+TEST_F(PinToHostOptimizerTest, TryFindHostDevice) {
   gtl::FlatSet<string> devices = {};
-
-  string device = "ABC";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "ABC");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceCpuXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, true, &device));
-  EXPECT_EQ(device, "/device:CPU:0");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaCpuXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_TRUE(device.empty());
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_CPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_TRUE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_CPU:0");
-}
-
-TEST_F(PinToHostOptimizerTest, TrySwapToHostDeviceXlaGpu) {
-  gtl::FlatSet<string> devices = {"/device:XLA_GPU:0"};
-
-  string device = "";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_TRUE(device.empty());
-
-  device = "/device:XLA_GPU:0";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_GPU:0");
-
-  device = "/device:XLA_GPU:*";
-  EXPECT_FALSE(internal::TrySwapToHostDevice(devices, false, &device));
-  EXPECT_EQ(device, "/device:XLA_GPU:*");
+  EXPECT_EQ("ABC", internal::TryFindHostDevice(devices, false, "ABC"));
+
+  devices = {"/device:CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, ""), "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:0"),
+            "/device:CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, true, "/device:XLA_GPU:*"),
+            "/device:CPU:0");
+
+  devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_CPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_CPU:0");
+
+  devices = {"/device:XLA_GPU:0"};
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), "");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"),
+            "/device:XLA_GPU:0");
+  EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"),
+            "/device:XLA_GPU:*");
 }
 
 TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) {
-- 
GitLab


From 597f04e949285f7e72682c7c3a6ed656a5aedb1e Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 9 Oct 2018 00:38:47 -0700
Subject: [PATCH 0591/1085] Add mroe logging infor

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 2abac92e5d..74494a302c 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -239,7 +239,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           std::vector<string> children;
           Status s = fs->GetChildren(current_dir, &children);
           std::cout << "GetChildren status: " << s.ToString()
-                    << "; Children size: " << children.size() << std::endl;
+                    << "; Children size: " << children.size()
+                    << "; Heap size: " << filepath_queue_.size() << std::endl;
           ret.Update(s);
 
           // If GetChildren() fails, continue the next search.
@@ -282,6 +283,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             const string& child_dir_path =
                 io::JoinPath(current_dir, children[i]);
             const Status& child_dir_status = children_dir_status[i];
+            std::cout << "Child dir path: " << child_dir_path << std::endl;
 
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
-- 
GitLab


From a198ca7d9bbc752a322c59b9a30519eab1b6730c Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 9 Oct 2018 00:56:23 -0700
Subject: [PATCH 0592/1085] Enable support for PRED values in KeyValueSort for
 the HloEvaluator.

PiperOrigin-RevId: 216315110
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index eec8d242fa..6cba46135c 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/index_util.h"
@@ -1279,7 +1280,9 @@ StatusOr<Literal> EvaluateSortInternal(HloInstruction* sort,
                     return SafeLess<KeyType>(a.first, b.first);
                   });
         std::vector<KeyType> result_keys;
-        std::vector<ValueType> result_values;
+        // We use a InlinedVector here because we need to convert it to an
+        // absl::Span later, and this would not work with std::vector<bool>.
+        absl::InlinedVector<ValueType, 10> result_values;
         for (const auto& key_value : key_value_vector) {
           result_keys.push_back(key_value.first);
           result_values.push_back(key_value.second);
@@ -1316,6 +1319,9 @@ StatusOr<Literal> EvaluateSortCurried(HloInstruction* sort,
                                       const Literal& keys_literal,
                                       const Literal& values_literal) {
   switch (sort->operand(1)->shape().element_type()) {
+    case PRED:
+      return EvaluateSortInternal<KeyType, bool>(sort, keys_literal,
+                                                 values_literal);
     case F32:
       return EvaluateSortInternal<KeyType, float>(sort, keys_literal,
                                                   values_literal);
-- 
GitLab


From 69f60d4c8cb5edb6fdc63b837b6db29562d28744 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 02:09:06 -0700
Subject: [PATCH 0593/1085] compat: Update forward compatibility horizon to
 2018-10-09

PiperOrigin-RevId: 216323343
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 349c84e13c..0e14c0e044 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From d5a7e27a1f7d2be65edc2b82c737d82ffe40ecde Mon Sep 17 00:00:00 2001
From: knight <badgangkiller@gmail.com>
Date: Tue, 9 Oct 2018 19:10:43 +0800
Subject: [PATCH 0594/1085] improve
 contrib/kafka/python/kernel_tests/kafka_test.sh

1. add `docker pull` step
2. add some print message
---
 .../kafka/python/kernel_tests/kafka_test.sh   | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
index adf027b8e7..def41c670f 100644
--- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
+++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
@@ -22,23 +22,25 @@ if [ "$#" -ne 2 ]; then
   exit 1
 fi
 
+action=$1
 container=$2
-if [ "$1" == "start" ]; then
+if [ "$action" == "start" ]; then
+    echo "pull spotify/kafka"
+    docker pull spotify/kafka
+    echo "pull spotify/kafka successfully"
     docker run -d --rm --net=host --name=$container spotify/kafka
-    echo Wait 5 secs until kafka is up and running
+    echo "Wait 5 secs until kafka is up and running"
     sleep 5
-    echo Create test topic
+    echo "Create test topic"
     docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test'
-    echo Create test message
+    echo "Create test message"
     docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test'
-    echo Produce test message
+    echo "Produce test message"
     docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test'
-
-    echo Container $container started successfully
-elif [ "$1" == "stop" ]; then
+    echo "Container $container started successfully"
+elif [ "$action" == "stop" ]; then
     docker rm -f $container
-
-    echo Container $container stopped successfully
+    echo "Container $container removed successfully"
 else
   echo "Usage: $0 start|stop <kafka container name>" >&2
   exit 1
-- 
GitLab


From e730b261f9028b2f3430461b82c30c86b9ece22f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 06:58:06 -0700
Subject: [PATCH 0595/1085] Automated rollback of commit
 375c109659d2d0e6265447dffdeb460693b3cccf

PiperOrigin-RevId: 216350134
---
 tensorflow/compiler/xla/service/BUILD         |  21 --
 .../compiler/xla/service/buffer_assignment.cc |  34 ++--
 .../compiler/xla/service/buffer_value.h       |   3 -
 .../compiler/xla/service/copy_insertion.cc    |  85 +-------
 .../xla/service/copy_insertion_test.cc        | 183 -----------------
 tensorflow/compiler/xla/service/hlo.proto     |  29 ---
 .../xla/service/hlo_alias_analysis.cc         |  46 +----
 .../xla/service/hlo_alias_analysis_test.cc    | 175 -----------------
 .../xla/service/hlo_dataflow_analysis.cc      |   2 +-
 .../service/hlo_input_output_alias_config.cc  | 172 ----------------
 .../service/hlo_input_output_alias_config.h   | 101 ----------
 .../hlo_input_output_alias_config_test.cc     | 184 ------------------
 tensorflow/compiler/xla/service/hlo_module.cc |   9 -
 tensorflow/compiler/xla/service/hlo_module.h  |  14 --
 .../compiler/xla/service/hlo_verifier.cc      |   2 -
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 16 files changed, 25 insertions(+), 1037 deletions(-)
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
 delete mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 26ebb88e96..2b292ed053 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -294,7 +294,6 @@ cc_library(
     srcs = [
         "dfs_hlo_visitor.cc",
         "hlo_computation.cc",
-        "hlo_input_output_alias_config.cc",
         "hlo_instruction.cc",
         "hlo_instructions.cc",
         "hlo_module.cc",
@@ -309,7 +308,6 @@ cc_library(
         "hlo_clone_context.h",
         "hlo_computation.h",
         "hlo_domain_metadata.h",
-        "hlo_input_output_alias_config.h",
         "hlo_instruction.h",
         "hlo_instructions.h",
         "hlo_module.h",
@@ -1270,25 +1268,6 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "hlo_input_output_alias_config_test",
-    srcs = ["hlo_input_output_alias_config_test.cc"],
-    deps = [
-        ":hlo",
-        ":hlo_dce",
-        ":hlo_memory_scheduler",
-        ":hlo_ordering",
-        ":hlo_parser",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:test",
-        "@com_google_absl//absl/algorithm:container",
-    ],
-)
-
 cc_library(
     name = "hlo_memory_scheduler",
     srcs = ["hlo_memory_scheduler.cc"],
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index d5d6a044a8..2c2d1626c2 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice(
 
 void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
                                      int64 size) {
-  VLOG(4) << "Trying to add " << buffer << " to allocation #" << index();
+  VLOG(4) << "Trying to add " << buffer << " to " << this;
   CHECK(assigned_buffers_.count(&buffer) == 0)
       << "LogicalBuffer " << buffer << " already assigned to allocation "
       << index_;
@@ -784,6 +784,21 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     }
   }
 
+  if (allow_input_output_aliasing_ && allocation->maybe_live_out()) {
+    const HloComputation* entry_computation =
+        assignment->module_->entry_computation();
+    for (auto param : entry_computation->parameter_instructions()) {
+      for (auto& param_buffer :
+           assignment->points_to_analysis().GetBuffersDefinedByInstruction(
+               param)) {
+        if (assignment->liveness().MayInterfere(*param_buffer, buffer)) {
+          VLOG(4) << "Can't assign: Parameter interference with result";
+          return false;
+        }
+      }
+    }
+  }
+
   // If the buffer is live out of the computation then it should only be
   // assigned a buffer which exactly fits the result to avoid wasting memory
   // (result buffers can have arbitrary lifetimes).
@@ -1419,28 +1434,13 @@ BufferAssigner::MergeColocatedBufferSets(
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
 // in the same allocation (currently just supports kWhile, kCall, and
-// kConditional and input output aliasing).
+// kConditional).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
     std::vector<ColocatedBufferSet>* colocated_buffer_sets) {
   const TuplePointsToAnalysis& points_to_analysis =
       buffer_liveness.points_to_analysis();
-
-  // Set up colocated buffer set for input and output.
-  module->input_output_alias_config().ForEachAlias(
-      [&](const ShapeIndex& output_index, int64 param_number,
-          const ShapeIndex& param_index) {
-        std::vector<const LogicalBuffer*> colocated_set;
-        AddBufferToColocatedSet(module->entry_computation()->root_instruction(),
-                                output_index, points_to_analysis,
-                                &colocated_set);
-        AddBufferToColocatedSet(
-            module->entry_computation()->parameter_instruction(param_number),
-            param_index, points_to_analysis, &colocated_set);
-        AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
-      });
-
   for (const HloComputation* computation : module->MakeComputationPostOrder()) {
     if (computation->IsFusionComputation()) {
       continue;
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 11d8abc5ba..69b3646356 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -141,9 +141,6 @@ class BufferValue {
   // operator< is required for std::set.
   bool operator<(const BufferValue& other) const { return id_ < other.id_; }
 
-  bool operator==(const BufferValue& other) const { return id_ == other.id_; }
-  bool operator!=(const BufferValue& other) const { return id_ != other.id_; }
-
   virtual string ToString() const = 0;
 
   // TODO(lauj) rename LogicalBufferProto to BufferValueProto.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index cfe025fdd1..f35324aa35 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -40,12 +40,10 @@ namespace {
 
 using absl::StrAppend;
 
-bool IsReadonlyEntryParameterValue(const HloValue& value) {
+bool IsEntryParameterValue(const HloValue& value) {
   const HloComputation* computation = value.defining_instruction()->parent();
   return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
-         computation == computation->parent()->entry_computation() &&
-         !computation->parent()->input_output_alias_config().ParameterHasAlias(
-             value.defining_instruction()->parameter_number());
+         computation == computation->parent()->entry_computation();
 }
 
 bool IsConstantValue(const HloValue& value) {
@@ -53,7 +51,7 @@ bool IsConstantValue(const HloValue& value) {
 }
 
 bool ValueIsReadOnly(const HloValue& value) {
-  return IsConstantValue(value) || IsReadonlyEntryParameterValue(value);
+  return IsConstantValue(value) || IsEntryParameterValue(value);
 }
 
 // Data structure describing the action which should be taken on parts of a
@@ -334,81 +332,6 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis,
   return Status::OK();
 }
 
-// Conservatively adds copies before root instruction of entry computation and
-// each aliased parameter to resolve interference of aliased input and output
-// buffer. We later rely on the CopyRemover to drop the unnecessary ones.
-Status AddCopiesForAliasedInputOutputs(HloModule* module) {
-  HloComputation* entry = module->entry_computation();
-  HloInstruction* root = entry->root_instruction();
-
-  ShapeTree<bool> output_indices_to_copy(root->shape());
-  std::vector<ShapeTree<HloInstruction*>> copied_parameters;
-  bool has_alias = false;
-  for (auto* param : entry->parameter_instructions()) {
-    bool param_has_alias = false;
-    ShapeTree<bool> param_indices_to_copy(param->shape());
-
-    module->input_output_alias_config().ForEachAlias(
-        [&](const ShapeIndex& output_index, int64 param_number,
-            const ShapeIndex& param_index) {
-          if (param_number == param->parameter_number()) {
-            param_has_alias = true;
-            *(param_indices_to_copy.mutable_element(param_index)) = true;
-            *(output_indices_to_copy.mutable_element(output_index)) = true;
-          }
-        });
-
-    if (!param_has_alias) {
-      continue;
-    }
-
-    has_alias = true;
-    // Store a snapshot of users before DeepCopyInstruction, as
-    // DeepCopyInstruction introduces new users of the instruction.
-    std::vector<HloInstruction*> users = param->users();
-    ShapeTree<HloInstruction*> param_copy_tree(param->shape(),
-                                               /*init_value=*/nullptr);
-    TF_ASSIGN_OR_RETURN(HloInstruction * copied,
-                        entry->DeepCopyInstruction(
-                            param, &param_indices_to_copy, &param_copy_tree));
-    for (HloInstruction* user : users) {
-      TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied));
-    }
-
-    copied_parameters.push_back(param_copy_tree);
-  }
-
-  if (!has_alias) {
-    return Status::OK();
-  }
-
-  // Add copies before root instruction.
-  ShapeTree<HloInstruction*> output_copy_tree(root->shape(),
-                                              /*init_value=*/nullptr);
-
-  TF_ASSIGN_OR_RETURN(HloInstruction * root_copied,
-                      root->parent()->DeepCopyInstruction(
-                          root, &output_indices_to_copy, &output_copy_tree));
-
-  // Add control dependencies between the input/output copies.
-  TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus(
-      [&](const ShapeIndex& output_index, int64 param_number,
-          const ShapeIndex& input_index) -> Status {
-        HloInstruction* from =
-            copied_parameters[param_number].element(input_index);
-        HloInstruction* to = output_copy_tree.element(output_index);
-
-        TF_RET_CHECK(from != nullptr);
-        TF_RET_CHECK(to != nullptr);
-        TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to));
-        return Status::OK();
-      }));
-
-  entry->set_root_instruction(root_copied);
-
-  return Status::OK();
-}
-
 // Removes any control dependencies to or from the given instruction.
 Status StripControlDependenciesFrom(HloInstruction* instruction) {
   while (!instruction->control_successors().empty()) {
@@ -1030,8 +953,6 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) {
       }
     }
   }
-
-  TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 3096206c34..892d0d7b54 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1351,189 +1351,6 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) {
   EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
 }
 
-TEST_F(CopyInsertionTest, CrossingParameters) {
-  // Test a case where two parameters' dataflow cross with each other while
-  // input and output are aliased with same index:
-  //
-  //  (p0 ,  p1)
-  //   | \   /|
-  //   |  \ / |
-  // alias X  alias
-  //   |  / \ |
-  //   | /   \|
-  //  (p1  ,  p0)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 4);
-}
-
-TEST_F(CopyInsertionTest, ParametersAliasing) {
-  // Test a case where two parameters' dataflow don't interfere with each other
-  // while aliased.
-  //
-  //  (p0 ,  p1)
-  //   |      |
-  //   |      |
-  // alias   alias
-  //   |      |
-  //   |      |
-  //  (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-  InsertCopies(module.get());
-
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
-                        op::Copy(op::GetTupleElement(param, 1))));
-
-  EXPECT_EQ(CountCopies(*module), 2);
-}
-
-TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //  (p0 ,  p1)
-  //   |      |
-  //   |      |
-  // alias    |
-  //   |      |
-  //   |      |
-  //  (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
-                        op::Copy(op::GetTupleElement(param, 1))));
-
-  EXPECT_EQ(CountCopies(*module), 2);
-}
-
-TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //   +-- (p0 ,  p1)
-  //   |    |      |
-  //   |    |      |
-  // alias Negate  Negate
-  //   |    |      |
-  //   |    |      |
-  //   +-- (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-  builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 0);
-}
-
-TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) {
-  // Test a case where one parameter is aliased with result while another one
-  // isn't.
-  //
-  //   +-- (p0 ,  p1)
-  //   |    |      |
-  //   |    |      |
-  // alias Negate  Negate
-  //   |    |      |
-  //   |    Add----+
-  //   |    |      |
-  //   +-- (p0 ,  p1)
-  auto module = CreateNewModule();
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-
-  auto add = builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape_, HloOpcode::kAdd, negate0, negate1));
-  builder.AddInstruction(HloInstruction::CreateTuple({add, negate1}));
-  module->AddEntryComputation(builder.Build());
-  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  InsertCopies(module.get());
-
-  EXPECT_EQ(CountCopies(*module), 0);
-}
-
 TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
   // Test a while instruction with a body which permutes its tuple parameter
   // elements and applies one operation to one of the elements. The addition of
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 82c8fb1904..a0eb9e6ddc 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -225,32 +225,6 @@ message HloScheduleProto {
   map<int64, InstructionSequence> sequences = 1;
 }
 
-message HloInputOutputAliasProto {
-  // The following proto describes a pair of aliased an input
-  // (described by parameter number and a ShapeIndex of the parameter)
-  // and an output (described by a ShapeIndex of the root
-  // instruction). For example:
-  //
-  // entry = {
-  //  output_shape_index={1},
-  //  parameter_number=0,
-  //  parameter_shape_index={1, 2},
-  // }
-  //
-  // This entry indicates that the first paremter's {1, 2} element is
-  // aliased with the {1} element of the root instruction.
-  message AliasEntryProto {
-    // ShapeIndex of the root hlo.
-    repeated int64 output_shape_index = 1;
-    // Number of the parameter in entry computation.
-    int64 parameter_number = 2;
-    // ShapeIndex of the parameter instruction.
-    repeated int64 parameter_shape_index = 3;
-  }
-
-  repeated AliasEntryProto entries = 1;
-}
-
 // Serialization of HloModule.
 message HloModuleProto {
   string name = 1;
@@ -269,9 +243,6 @@ message HloModuleProto {
 
   // The schedule for this module.
   HloScheduleProto schedule = 7;
-
-  // Describes alias information between inputs and outputs.
-  HloInputOutputAliasProto input_output_alias = 8;
 }
 
 // Serialization of LogicalBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index cf8e6594cb..c3da12e273 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -59,9 +59,8 @@ class BufferValueMap {
   // construction process.
   using BufferNumber = int64;
 
-  explicit BufferValueMap(HloModule* module,
-                          const HloDataflowAnalysis& dataflow)
-      : module_(module), dataflow_(dataflow) {
+  explicit BufferValueMap(const HloDataflowAnalysis& dataflow)
+      : dataflow_(dataflow) {
     buffers_.reserve(dataflow_.values().size());
     value_to_buffer_number_.reserve(dataflow_.values().size());
     for (const HloValue* value : dataflow_.values()) {
@@ -172,42 +171,6 @@ class BufferValueMap {
     return value_to_buffer_number_.at(&value);
   }
 
-  void ComputeInputOutputAliasedBuffers(
-      const HloValue& value, std::vector<BufferNumber>* aliased_buffers) {
-    // Get parameter value from an aliased_input object.
-    const auto get_parameter_value =
-        [this](const std::pair<int64, ShapeIndex>& aliased_input)
-        -> const HloValue& {
-      int64 param_number = aliased_input.first;
-      const ShapeIndex& param_index = aliased_input.second;
-      return dataflow_.GetUniqueValueAt(
-          module_->entry_computation()->parameter_instruction(param_number),
-          param_index);
-    };
-
-    // If the value shows up in a root instruction, alias it with parameter
-    // intruction.
-    for (const HloPosition& pos : value.positions()) {
-      if (pos.instruction == module_->entry_computation()->root_instruction()) {
-        ShapeIndex output_index = pos.index;
-
-        auto aliased_input =
-            module_->input_output_alias_config().GetAliasedParameter(
-                output_index);
-        if (aliased_input) {
-          aliased_buffers->push_back(
-              GetBufferForValue(get_parameter_value(*aliased_input)));
-        }
-      }
-    }
-
-    // If the value is parameter instruction itself, alias it with itself.
-    if (value.instruction()->opcode() == HloOpcode::kParameter &&
-        value.instruction()->parent() == module_->entry_computation()) {
-      aliased_buffers->push_back(GetBufferForValue(value));
-    }
-  }
-
   void ComputeWhileAliasedBuffers(const HloValue& value,
                                   std::vector<BufferNumber>* aliased_buffers) {
     VLOG(3) << "Compute kWhile aliases";
@@ -315,7 +278,6 @@ class BufferValueMap {
       VLOG(2) << "Use of value " << value.ToShortString() << ": " << use;
     }
     std::vector<BufferNumber> aliased_buffers;
-    ComputeInputOutputAliasedBuffers(value, &aliased_buffers);
     ComputeWhileAliasedBuffers(value, &aliased_buffers);
     ComputeConditionalAliasedBuffers(value, &aliased_buffers);
     // Uniquify aliased buffers.
@@ -326,8 +288,6 @@ class BufferValueMap {
     return aliased_buffers;
   }
 
-  HloModule* module_;
-
   // Dataflow analysis used to construct the buffer map.
   const HloDataflowAnalysis& dataflow_;
 
@@ -501,7 +461,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
                                                /*bitcast_defines_value=*/false,
                                                fusion_can_share_buffer));
 
-  BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis());
+  BufferValueMap buffer_map(alias_analysis->dataflow_analysis());
   buffer_map.MergeAliasedBuffers();
 
   // Create a vector of HloBuffers, one for each set of values in the
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
index 5c8d97b2d1..0cd0ab36fc 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
@@ -217,181 +217,6 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) {
   EXPECT_FALSE(AnyValuesInSameBufferInterfere());
 }
 
-TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) {
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-
-  auto negate0 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
-  auto negate1 = builder.AddInstruction(
-      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
-
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  // Cannot alias an output twice.
-  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-}
-
-TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) {
-  // parameter 0 aliased with output 1 and parameter 1 aliased with output 0.
-  //
-  //  (p0 ,  p1)
-  //     \   /
-  //      \ /
-  // alias X
-  //      / \
-  //     /   \
-  //  (p0  ,  p1)
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-  auto gte0 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
-  auto gte1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
-
-  // Cannot alias an output twice.
-  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  // Every Ops in this graph are aliased with each other.
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
-  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
-            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
-}
-
-TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) {
-  // Test a simple single while instruction can be aliased with input and output
-  // of the computation.
-  //
-  // body((F32[], F32[]) %tuple_param):
-  //   %add = Add(%tuple_param{0}, %tuple_param{1})
-  //   return Tuple(%tuple_param{0}, %add)
-  //
-  // condition((F32[], F32[]) %tuple_param):
-  //   return Constant(false)
-  //
-  // entry:
-  //   %param1 = param1
-  //   %while = While(%param1, body, condition)
-  //   %while_1 = GTE(%while, 0)
-  //   %while_2 = GTE(%while, 1)
-  //   %negate_1 = Negate(%while_1)
-  //   %negate_2 = Negate(%while_2)
-  //   return Tuple(negate_1, negate_2)
-  //
-  const Shape tuple_shape =
-      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
-
-  // Element 0 passes transparently through the body.
-  auto body_builder = HloComputation::Builder("body");
-  auto body_param = body_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "param"));
-  auto body_element_0 = body_builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
-  auto body_element_1 = body_builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
-  auto add = body_builder.AddInstruction(HloInstruction::CreateBinary(
-      scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1));
-  auto body_tuple = body_builder.AddInstruction(
-      HloInstruction::CreateTuple({body_element_0, add}));
-  HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build());
-
-  // Condition computation trivially returns a constant "false".
-  auto cond_builder = HloComputation::Builder("condition");
-  auto cond_param = cond_builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "param"));
-  cond_builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
-  HloComputation* condition =
-      module_->AddEmbeddedComputation(cond_builder.Build());
-
-  auto builder = HloComputation::Builder(TestName());
-  auto param = builder.AddInstruction(
-      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
-
-  auto xla_while = builder.AddInstruction(
-      HloInstruction::CreateWhile(tuple_shape, condition, body, param));
-  auto while_element_1 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0));
-  auto while_element_2 = builder.AddInstruction(
-      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1));
-  auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary(
-      scalar_shape_, HloOpcode::kNegate, while_element_1));
-  auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      scalar_shape_, HloOpcode::kNegate, while_element_2));
-  auto tuple =
-      builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2}));
-  module_->AddEntryComputation(builder.Build());
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
-  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
-      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
-
-  const HloAliasAnalysis& analysis = RunAnalysis();
-
-  EXPECT_THAT(
-      GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})),
-      UnorderedElementsAre(GetValueDefinedAt(param, {1}),
-                           GetValueDefinedAt(xla_while, /*index=*/{1}),
-                           GetValueDefinedAt(body_param, {1}),
-                           GetValueDefinedAt(cond_param, {1}),
-                           GetValueDefinedAt(add),
-                           GetValueDefinedAt(negate_2)));
-
-  EXPECT_THAT(
-      analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(),
-      UnorderedElementsAre(
-          HloPosition{param, {1}}, HloPosition{xla_while, {1}},
-          HloPosition{while_element_2, {}}, HloPosition{body_param, {1}},
-          HloPosition{body_element_1, {}}, HloPosition{add, {}},
-          HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}},
-          HloPosition{cond_param, {1}}, HloPosition{negate_2, {}}));
-
-  EXPECT_FALSE(AnyValuesInSameBufferInterfere());
-}
-
 TEST_F(HloAliasAnalysisTest, SingleCall) {
   // Test a single call of a subcomputation. The subcomputation adds its two
   // array-shaped parameters.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index f401eac016..c22adcdd8d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction,
 
 const HloValue& HloDataflowAnalysis::GetValueDefinedAt(
     const HloInstruction* instruction, const ShapeIndex& index) const {
-  CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString();
+  CHECK(ValueIsDefinedAt(instruction, index));
   return GetUniqueValueAt(instruction, index);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
deleted file mode 100644
index 9ad98e5038..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
-#include "tensorflow/compiler/xla/service/hlo_module.h"
-
-namespace xla {
-Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index,
-                                             int64 param_number,
-                                             const ShapeIndex& param_index) {
-  // Output can't be aliased with multiple parameters.
-  TF_RET_CHECK(!alias_.element(output_index));
-  (*alias_.mutable_element(output_index)) =
-      std::make_pair(param_number, param_index);
-  return Status::OK();
-}
-
-HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const {
-  HloInputOutputAliasProto result;
-  alias_.ForEachElement(
-      [&](const ShapeIndex& index,
-          const absl::optional<std::pair<int64, ShapeIndex>>& data) {
-        if (data) {
-          HloInputOutputAliasProto::AliasEntryProto entry;
-          for (int64 i : index) {
-            entry.add_output_shape_index(i);
-          }
-          entry.set_parameter_number(data->first);
-          for (int64 i : data->second) {
-            entry.add_parameter_shape_index(i);
-          }
-          result.add_entries()->Swap(&entry);
-        }
-      });
-  return result;
-}
-
-StatusOr<HloInputOutputAliasConfig> HloInputOutputAliasConfig::CreateFromProto(
-    const HloModule* module, const HloInputOutputAliasProto& proto) {
-  HloInputOutputAliasConfig result(
-      module->entry_computation()->root_instruction()->shape());
-  for (const HloInputOutputAliasProto::AliasEntryProto& entry :
-       proto.entries()) {
-    ShapeIndex output_index(entry.output_shape_index().begin(),
-                            entry.output_shape_index().end());
-
-    int64 param_number = entry.parameter_number();
-    ShapeIndex param_index(entry.parameter_shape_index().begin(),
-                           entry.parameter_shape_index().end());
-    TF_RETURN_IF_ERROR(
-        result.SetUpAlias(output_index, param_number, param_index));
-  }
-
-  return result;
-}
-
-string HloInputOutputAliasConfig::ToString() const {
-  std::vector<string> pieces;
-  pieces.push_back("HloInputOutputAliasConfig");
-
-  ForEachAlias([&](const ShapeIndex& output_index, int64 param_number,
-                   const ShapeIndex& param_index) {
-    pieces.push_back(absl::StrFormat(
-        "  OutputIndex %s is aliased with parameter %lld at %s:",
-        output_index.ToString(), param_number, param_index.ToString()));
-  });
-
-  return absl::StrJoin(pieces, "\n");
-}
-
-bool HloInputOutputAliasConfig::ParameterHasAlias(int64 param_number) const {
-  bool output = false;
-  alias_.ForEachElement(
-      [&](const xla::ShapeIndex&,
-          absl::optional<std::pair<int64, ShapeIndex>> alias) {
-        if (alias && alias->first == param_number) {
-          output = true;
-        }
-      });
-  return output;
-}
-
-absl::optional<ShapeIndex> HloInputOutputAliasConfig::GetAliasedOutput(
-    int64 param_number, const ShapeIndex& param_index) const {
-  absl::optional<ShapeIndex> output;
-  alias_.ForEachElement(
-      [&](const xla::ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> alias) {
-        if (alias && alias->first == param_number &&
-            alias->second == param_index) {
-          output = output_index;
-        }
-      });
-  return output;
-}
-
-absl::optional<std::pair<int64, ShapeIndex>>
-HloInputOutputAliasConfig::GetAliasedParameter(
-    const ShapeIndex& output_index) const {
-  CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index));
-  return alias_.element(output_index);
-}
-
-void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const {
-  alias_.ForEachElement(
-      [&](const ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
-        if (aliased) {
-          fn(output_index, aliased->first, aliased->second);
-        }
-      });
-}
-
-Status HloInputOutputAliasConfig::ForEachAliasWithStatus(
-    AliasFnWithStatus fn) const {
-  return alias_.ForEachElementWithStatus(
-      [&](const ShapeIndex& output_index,
-          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
-        if (aliased) {
-          TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second));
-        }
-        return Status::OK();
-      });
-}
-
-Status HloInputOutputAliasConfig::Verify(const HloModule& module) const {
-  std::vector<ShapeTree<bool>> param_has_seen;
-  const HloComputation* entry = module.entry_computation();
-  for (int64 i = 0; i < entry->num_parameters(); ++i) {
-    HloInstruction* param = entry->parameter_instruction(i);
-    param_has_seen.emplace_back(param->shape());
-  }
-  return ForEachAliasWithStatus([&](const ShapeIndex& output_index,
-                                    int64 param_number,
-                                    const ShapeIndex& param_index) -> Status {
-    const HloInstruction* root = entry->root_instruction();
-
-    const Shape& param_shape =
-        entry->parameter_instruction(param_number)->shape();
-    const Shape& output_shape = root->shape();
-    TF_RET_CHECK(entry->num_parameters() > param_number);
-    TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index));
-    TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index));
-
-    // Check each param_number and param_index pair only show up once. No
-    // input can be aliased with output buffers.
-    TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false);
-
-    *(param_has_seen[param_number].mutable_element(param_index)) = true;
-
-    return Status::OK();
-  });
-}
-
-std::ostream& operator<<(std::ostream& out,
-                         const HloInputOutputAliasConfig& config) {
-  out << config.ToString();
-  return out;
-}
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
deleted file mode 100644
index 02c46f65c8..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
-
-#include <utility>
-
-#include "absl/types/optional.h"
-#include "tensorflow/compiler/xla/service/hlo.pb.h"
-#include "tensorflow/compiler/xla/shape_tree.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-
-namespace xla {
-
-class HloModule;
-
-// This class specifies the alias map from output index to parameter number and
-// parameter index in the entry computation.
-class HloInputOutputAliasConfig {
- public:
-  HloInputOutputAliasConfig() = default;
-
-  explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {}
-
-  virtual ~HloInputOutputAliasConfig() = default;
-
-  // Sets up alias config from `output_index` to `param_index` at
-  // `param_number`.
-  Status SetUpAlias(const ShapeIndex& output_index, int64 param_number,
-                    const ShapeIndex& param_index);
-
-  // Returns true if the given parameter is aliased with one of the output
-  // buffers.
-  bool ParameterHasAlias(int64 param_number) const;
-
-  // (De)Serializes an HloInputOutoutAliasConfig to/from an
-  // HloInputOutoutAliasProto.
-  HloInputOutputAliasProto ToProto() const;
-
-  static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
-      const HloModule* module, const HloInputOutputAliasProto& proto);
-
-  // Returns the output index that the given parameter and parameter index is
-  // aliased with. A nullopt is returned if there is no output that is aliased
-  // with the parameter number and index.
-  absl::optional<ShapeIndex> GetAliasedOutput(
-      int64 param_number, const ShapeIndex& param_index) const;
-
-  // Returns the number of parameter and index of the parameter buffer that the
-  // given output buffer index is aliased with. A nullopt is returned if there
-  // is no parameter is aliased with the specific output.
-  absl::optional<std::pair<int64, ShapeIndex>> GetAliasedParameter(
-      const ShapeIndex& output_index) const;
-
-  using AliasFn =
-      std::function<void(const ShapeIndex& output_index, int64 param_number,
-                         const ShapeIndex& param_index)>;
-
-  // Iterates through each aliased output and input.
-  void ForEachAlias(AliasFn fn) const;
-
-  using AliasFnWithStatus =
-      std::function<Status(const ShapeIndex& output_index, int64 param_number,
-                           const ShapeIndex& param_index)>;
-
-  // Verifies that the given config is valid for the given module.
-  // Specifically, the config's input and output should be in-bound and size of
-  // the aliased buffers should match.
-  Status Verify(const HloModule& module) const;
-
-  Status ForEachAliasWithStatus(AliasFnWithStatus fn) const;
-
-  string ToString() const;
-
- private:
-  // A ShapeTree which indicates the list of buffers that's expected to be
-  // aliased. The key on this shape tree represents the output index. The value
-  // is a pair of parameter number and index into the buffer. If the value is
-  // nullopt, it means there is no parameter aliasing for this output.
-  ShapeTree<absl::optional<std::pair<int64, ShapeIndex>>> alias_;
-};
-
-std::ostream& operator<<(std::ostream& out,
-                         const HloInputOutputAliasConfig& config);
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
deleted file mode 100644
index 3b61ff04e6..0000000000
--- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
-
-#include <memory>
-#include <string>
-
-#include "absl/algorithm/container.h"
-#include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_dce.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
-#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h"
-#include "tensorflow/compiler/xla/service/hlo_opcode.h"
-#include "tensorflow/compiler/xla/service/hlo_ordering.h"
-#include "tensorflow/compiler/xla/service/hlo_parser.h"
-#include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
-#include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace xla {
-namespace {
-class HloInputOutputAliasConfigTest : public HloTestBase {
- protected:
-  void expect_aliased(const ShapeIndex& output_index, int64 param_number,
-                      const ShapeIndex& param_index,
-                      const HloInputOutputAliasConfig& config) {
-    absl::optional<ShapeIndex> aliased_output =
-        config.GetAliasedOutput(param_number, param_index);
-
-    EXPECT_TRUE(aliased_output);
-    EXPECT_EQ(aliased_output.value(), output_index);
-
-    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
-        config.GetAliasedParameter(output_index);
-
-    EXPECT_TRUE(aliased_param);
-    EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index));
-  }
-
-  void expect_not_aliased(const ShapeIndex& output_index, int64 param_number,
-                          const ShapeIndex& param_index,
-                          const HloInputOutputAliasConfig& config) {
-    absl::optional<ShapeIndex> aliased_output =
-        config.GetAliasedOutput(param_number, param_index);
-
-    EXPECT_FALSE(aliased_output && aliased_output == output_index);
-
-    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
-        config.GetAliasedParameter(output_index);
-
-    EXPECT_FALSE(aliased_param && aliased_param->first == param_number &&
-                 aliased_param->second == param_index);
-  }
-};
-
-TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
-                                 /*param_index=*/{}));
-
-  expect_aliased(/*output_index=*/{0}, /*param_number=*/1,
-                 /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
-                     /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                     /*param_index=*/{}, config);
-}
-
-TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  param = (f32[], f32[]) parameter(0)
-  gte1 = f32[] get-tuple-element(%param), index=0
-  gte2 = f32[] get-tuple-element(%param), index=1
-  ROOT root = (f32[], f32[]) tuple(%gte1, %gte2)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{0}));
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
-                                 /*param_index=*/{1}));
-
-  expect_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                 /*param_index=*/{0}, config);
-
-  expect_aliased(/*output_index=*/{1}, /*param_number=*/0,
-                 /*param_index=*/{1}, config);
-
-  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
-                     /*param_index=*/{}, config);
-
-  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
-                     /*param_index=*/{}, config);
-}
-
-TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  ASSERT_IS_NOT_OK(config.Verify(*module));
-}
-
-TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) {
-  const string module_str = R"(
-HloModule TEST
-
-ENTRY main {
-  a = f32[] parameter(0)
-  b = f32[] parameter(1)
-  ROOT root = (f32[], f32[]) tuple(%a, %b)
-}
-)";
-  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
-                          ParseHloString(module_str));
-
-  HloInputOutputAliasConfig config(
-      module->entry_computation()->root_instruction()->shape());
-
-  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
-                                 /*param_index=*/{}));
-
-  ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
-                                     /*param_index=*/{}));
-}
-}  // namespace
-}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 547f74a0ed..93e04eb3db 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -73,8 +73,6 @@ HloComputation* HloModule::AddComputationInternal(
       config_.SetDefaultComputationLayout(
           entry_computation_->ComputeProgramShape());
     }
-    input_output_alias_config_ = HloInputOutputAliasConfig(
-        entry_computation_->root_instruction()->shape());
   }
 
   if (uniquify_identifiers) {
@@ -254,9 +252,6 @@ HloModuleProto HloModule::ToProto() const {
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
-
-  *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
-
   return proto;
 }
 
@@ -333,10 +328,6 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
   }
   TF_RET_CHECK(module->entry_computation_ != nullptr);
 
-  TF_ASSIGN_OR_RETURN(module->input_output_alias_config_,
-                      HloInputOutputAliasConfig::CreateFromProto(
-                          module.get(), proto.input_output_alias()));
-
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
   absl::flat_hash_set<string> computation_names;
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 9b9dc3ba9f..735804e827 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -31,7 +31,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
-#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
@@ -213,15 +212,6 @@ class HloModule {
     return result;
   }
 
-  // input_output_alias_config indicates the list of aliased buffers that are
-  // expected from the module.
-  HloInputOutputAliasConfig& input_output_alias_config() {
-    return input_output_alias_config_;
-  }
-  const HloInputOutputAliasConfig& input_output_alias_config() const {
-    return input_output_alias_config_;
-  }
-
   // Returns the number of unique intruction ids given out.  All ids up to
   // this point are guaranteed to be in the range [0..NumUniqueInstructionIds())
   int NumUniqueInstructionIds() const { return next_unique_id_; }
@@ -294,10 +284,6 @@ class HloModule {
   // sequential order of instructions for each non-fusion computation in the
   // module.
   absl::optional<HloSchedule> schedule_;
-
-  // alias_config indicates the alias information of input/output buffers that
-  // are expected from the module.
-  HloInputOutputAliasConfig input_output_alias_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 2902a11a42..be3bee5975 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1220,8 +1220,6 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(module->schedule().Verify());
   }
 
-  TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module));
-
   return false;
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 51cedce7f0..73f541d505 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -72,7 +72,7 @@ class ShapeIndex {
   void push_back(int64 value) { indices_.push_back(value); }
   void pop_back() { indices_.pop_back(); }
 
-  // push_front is O(n), but shapes don't usually have a ton of dimensions.
+  // push_front is O(n^2), but shapes don't usually have a ton of dimensions.
   void push_front(int64 value) { indices_.insert(indices_.begin(), value); }
 
   using container_type = absl::InlinedVector<int64, 2>;
-- 
GitLab


From ec82efd4ceb433e409ce518bd20c500076d79d10 Mon Sep 17 00:00:00 2001
From: Grzegorz Pawelczak <grzegorzp@graphcore.ai>
Date: Tue, 9 Oct 2018 15:03:46 +0100
Subject: [PATCH 0596/1085] [XLA] Query whether to enable XLA support on MacOS
 with no as a default

---
 configure.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.py b/configure.py
index 89dc79b6b6..08e3c17b18 100644
--- a/configure.py
+++ b/configure.py
@@ -1566,7 +1566,6 @@ def main():
 
   if is_macos():
     environ_cp['TF_NEED_TENSORRT'] = '0'
-    environ_cp['TF_ENABLE_XLA'] = '0'
 
   # The numpy package on ppc64le uses OpenBLAS which has multi-threading
   # issues that lead to incorrect answers.  Set OMP_NUM_THREADS=1 at
@@ -1577,8 +1576,9 @@ def main():
 
   set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite',
                 'with_ignite_support', True, 'ignite')
+  xla_enabled_by_default = is_linux()
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
-                True, 'xla')
+                xla_enabled_by_default, 'xla')
 
   set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
   if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
-- 
GitLab


From a9a44b070bf639ee9bd60f0fd21157a297cd7f82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 07:41:42 -0700
Subject: [PATCH 0597/1085] Removed unused load statements from the core BUILD.

PiperOrigin-RevId: 216354906
---
 tensorflow/core/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 900a0e11c4..acea8e2217 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -73,12 +73,10 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "cc_header_only_library",
-    "full_path",
     "if_android",
     "if_ios",
     "if_linux_x86_64",
     "if_mobile",
-    "if_not_mobile",
     "if_not_windows",
     "if_windows",
     "tf_cc_test",
-- 
GitLab


From a0ed9452d5c7f897e26788d8dca5164cb6fba023 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 07:54:16 -0700
Subject: [PATCH 0598/1085] Fixing Toco for exporting graphs with strings

If the graph contains not constant array with strings it fails because the
array's size can't be estimated.

PiperOrigin-RevId: 216356162
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index e3f27e9e2a..083a96ad9d 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -1237,11 +1237,15 @@ void DedupeConstantArrays(Model* model, size_t min_size) {
         lhs_array.final_data_type != ArrayDataType::kNone
             ? lhs_array.final_data_type
             : lhs_array.data_type;
-    size_t array_byte_size =
-        lhs_array.buffer->Length() * ElementSize(final_data_type);
-    if (array_byte_size < min_size) {
-      // Too small; skip.
-      continue;
+    // Ignore small arrays, don't check string arrays because it is not possible
+    // to estimate its size.
+    if (final_data_type != ArrayDataType::kString) {
+      size_t array_byte_size =
+          lhs_array.buffer->Length() * ElementSize(final_data_type);
+      if (array_byte_size < min_size) {
+        // Too small; skip.
+        continue;
+      }
     }
 
     auto next_lhs_array_it = lhs_array_it;
-- 
GitLab


From cadcacc6224bcbb8a05bf3b70d625d9024a9c0f3 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 9 Oct 2018 08:16:49 -0700
Subject: [PATCH 0599/1085] Allowing for mixture of V1 and V2 feature columns
 usage in canned estimators. This is required for TF hub use cases where users
 might send in new feature columns to old model code. Implemented this support
 by making V2 feature columns support the V1 API. This is needed temporarily
 and would definitely be removed by TF 2.0, possibly earlier depending on what
 guarantees are provided by TF hub.

The only case we don't allow here is mixing in V2 shared embedding columns with V1 Feature columns. V2 Shared FC's depend on a SharedEmbeddingState manager that would have to be passed in to the various API's and there wasn't really a very clean way to make that work.

Mixing V2 feature columns with V1 shared embedding columns is fine though and along with all other combinations

PiperOrigin-RevId: 216359041
---
 .../canned/dnn_linear_combined_test.py        |  107 +-
 .../estimator/canned/dnn_testing_utils.py     |  109 +
 .../estimator/canned/linear_testing_utils.py  |   64 +
 tensorflow/python/feature_column/BUILD        |    1 +
 .../python/feature_column/feature_column.py   |    4 +
 .../feature_column/feature_column_v2.py       |  869 ++++-
 .../feature_column/feature_column_v2_test.py  | 3294 ++++++++++++++---
 7 files changed, 3772 insertions(+), 676 deletions(-)

diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
index ae968e717a..ab945d7b1a 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -317,16 +317,10 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       writer_cache.FileWriterCache.clear()
       shutil.rmtree(self._model_dir)
 
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          fc_impl):
-    linear_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
+  def _test_complete_flow_helper(
+      self, linear_feature_columns, dnn_feature_columns, feature_spec,
+      train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
     est = dnn_linear_combined.DNNLinearCombinedRegressor(
         linear_feature_columns=linear_feature_columns,
         dnn_hidden_units=(2, 2),
@@ -351,14 +345,63 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
     self.assertAllEqual((batch_size, label_dimension), predictions.shape)
 
     # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
-  def test_numpy_input_fn(self, fc_impl):
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    linear_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with numpy_input_fn."""
     label_dimension = 2
     batch_size = 10
@@ -381,7 +424,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -390,7 +433,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_pandas_input_fn(self, fc_impl):
+  def test_numpy_input_fn_basic(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_numpy_input_fn_mix1(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_numpy_input_fn_mix2(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with pandas_input_fn."""
     if not HAS_PANDAS:
       return
@@ -415,7 +467,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         shuffle=False)
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=train_input_fn,
         eval_input_fn=eval_input_fn,
         predict_input_fn=predict_input_fn,
@@ -424,7 +476,16 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
-  def test_input_fn_from_parse_example(self, fc_impl):
+  def test_pandas_input_fn_basic(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_pandas_input_fn_mix1(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_pandas_input_fn_mix2(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run):
     """Tests complete flow with input_fn constructed from parse_example."""
     label_dimension = 2
     batch_size = 10
@@ -466,7 +527,7 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
       features.pop('y')
       return features, None
 
-    self._test_complete_flow(
+    fn_to_run(
         train_input_fn=_train_input_fn,
         eval_input_fn=_eval_input_fn,
         predict_input_fn=_predict_input_fn,
@@ -475,6 +536,18 @@ class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size,
         fc_impl=fc_impl)
 
+  def test_input_fn_from_parse_example_basic(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow)
+
+  def test_input_fn_from_parse_example_mix1(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix1)
+
+  def test_input_fn_from_parse_example_mix2(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix2)
+
 
 # A function to mimic dnn-classifier init reuse same tests.
 def _dnn_classifier_fn(hidden_units,
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index cd66d0a3bd..71d7e54783 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -34,6 +34,7 @@ from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.canned import prediction_keys
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -479,6 +480,60 @@ class BaseDNNModelFnTest(object):
           else:
             self.fail('Invalid mode: {}'.format(mode))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        training_util.create_global_step()
+        head = mock_head(
+            self,
+            hidden_units=hidden_units,
+            logits_dimension=logits_dimension,
+            expected_logits=expected_logits)
+        estimator_spec = self._dnn_model_fn(
+            features={
+                'age': constant_op.constant(inputs[0]),
+                'height': constant_op.constant(inputs[1])
+            },
+            labels=constant_op.constant([[1]]),
+            mode=mode,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                feature_column.numeric_column('age'),
+                feature_column_v2.numeric_column('height')
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          if mode == model_fn.ModeKeys.TRAIN:
+            sess.run(estimator_spec.train_op)
+          elif mode == model_fn.ModeKeys.EVAL:
+            sess.run(estimator_spec.loss)
+          elif mode == model_fn.ModeKeys.PREDICT:
+            sess.run(estimator_spec.predictions)
+          else:
+            self.fail('Invalid mode: {}'.format(mode))
+
   def test_features_tensor_raises_value_error(self):
     """Tests that passing a Tensor for features raises a ValueError."""
     hidden_units = (2, 2)
@@ -806,6 +861,60 @@ class BaseDNNLogitFnTest(object):
               checkpoint_dir=self._model_dir) as sess:
             self.assertAllClose(expected_logits, sess.run(logits))
 
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        # Global step needed for MonitoredSession, which is in turn used to
+        # explicitly set variable weights through a checkpoint.
+        training_util.create_global_step()
+        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+        # the checkpoint naming is shared.
+        with variable_scope.variable_scope('dnn'):
+          input_layer_partitioner = (
+              partitioned_variables.min_max_variable_partitioner(
+                  max_partitions=0, min_slice_size=64 << 20))
+          logit_fn = self._dnn_logit_fn_builder(
+              units=logits_dimension,
+              hidden_units=hidden_units,
+              feature_columns=[
+                  feature_column.numeric_column('age'),
+                  feature_column_v2.numeric_column('height')
+              ],
+              activation_fn=nn.relu,
+              dropout=None,
+              input_layer_partitioner=input_layer_partitioner,
+              batch_norm=False)
+          logits = logit_fn(
+              features={
+                  'age': constant_op.constant(inputs[0]),
+                  'height': constant_op.constant(inputs[1])
+              },
+              mode=mode)
+          with monitored_session.MonitoredTrainingSession(
+              checkpoint_dir=self._model_dir) as sess:
+            self.assertAllClose(expected_logits, sess.run(logits))
+
 
 class BaseDNNWarmStartingTest(object):
 
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 827352a70b..2cfa2a8e15 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -400,6 +400,45 @@ class BaseLinearRegressorEvaluationTest(object):
     # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
     self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
 
+  def test_evaluation_for_multiple_feature_columns_mix(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    batch_size = 2
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column_v2.numeric_column('height')
+    ]
+
+    def _input_fn():
+      features_ds = dataset_ops.Dataset.from_tensor_slices({
+          'age': np.array([20, 40]),
+          'height': np.array([4, 8])
+      })
+      labels_ds = dataset_ops.Dataset.from_tensor_slices(
+          np.array([[213.], [421.]]))
+      return (dataset_ops.Dataset.zip((features_ds, labels_ds))
+              .batch(batch_size).repeat(None))
+
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns, model_dir=self._model_dir)
+
+    eval_metrics = est.evaluate(input_fn=_input_fn, steps=1)
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
+    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
 
 class BaseLinearRegressorPredictTest(object):
 
@@ -497,6 +536,31 @@ class BaseLinearRegressorPredictTest(object):
     # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
     self.assertAllClose([[80.2]], predicted_scores)
 
+  def testTwoFeatureColumnsMix(self):
+    """Tests predict with two feature columns."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(feature_column.numeric_column('x0'),
+                         feature_column_v2.numeric_column('x1')),
+        model_dir=self._model_dir)
+
+    def _predict_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices({
+          'x0': np.array([[2.]]),
+          'x1': np.array([[3.]])
+      }).batch(1)
+
+    predictions = linear_regressor.predict(input_fn=_predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
+    self.assertAllClose([[80.2]], predicted_scores)
+
   def testSparseCombiner(self):
     w_a = 2.0
     w_b = 3.0
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index ac53a84eef..82acde584e 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -54,6 +54,7 @@ py_library(
     srcs = ["feature_column_v2.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":feature_column",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:control_flow_ops",
diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 28a8286544..8a11ca142c 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -121,6 +121,10 @@ Example of building model using FeatureColumns, this can be used in a
 
 NOTE: Functions prefixed with "_" indicate experimental or private parts of
 the API subject to change, and should not be relied upon!
+
+NOTE: The new feature columns are being developed in feature_column_v2.py and
+are a somewhat duplicate of the code here. Please make sure to update logic
+in both places.
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index b79373c475..6d089de991 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -136,6 +136,7 @@ import six
 
 
 from tensorflow.python.eager import context
+from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
@@ -157,9 +158,16 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 
 
+_FEATURE_COLUMN_DEPRECATION_DATE = '2018-11-30'
+_FEATURE_COLUMN_DEPRECATION = ('The old _FeatureColumn APIs are being '
+                               'deprecated. Please use the new FeatureColumn '
+                               'APIs instead.')
+
+
 class StateManager(object):
   """Manages the state associated with FeatureColumns.
 
@@ -440,10 +448,6 @@ class FeatureLayer(Layer):
     return (input_shape[0], total_elements)
 
 
-def _strip_leading_slashes(name):
-  return name.rsplit('/', 1)[-1]
-
-
 class LinearModel(Layer):
   """Produces a linear prediction `Tensor` based on given `feature_columns`.
 
@@ -775,12 +779,12 @@ def embedding_column(
     categorical_column, dimension, combiner='mean', initializer=None,
     ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None,
     trainable=True):
-  """`_DenseColumn` that converts from sparse, categorical input.
+  """`DenseColumn` that converts from sparse, categorical input.
 
   Use this when your inputs are sparse, but you want to convert them to a dense
   representation (e.g., to feed to a DNN).
 
-  Inputs must be a `_CategoricalColumn` created by any of the
+  Inputs must be a `CategoricalColumn` created by any of the
   `categorical_column_*` function. Here is an example of using
   `embedding_column` with `DNNClassifier`:
 
@@ -814,12 +818,12 @@ def embedding_column(
   ```
 
   Args:
-    categorical_column: A `_CategoricalColumn` created by a
+    categorical_column: A `CategoricalColumn` created by a
       `categorical_column_with_*` function. This column produces the sparse IDs
       that are inputs to the embedding lookup.
     dimension: An integer specifying dimension of the embedding, must be > 0.
-    combiner: A string specifying how to reduce if there are multiple entries
-      in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
+    combiner: A string specifying how to reduce if there are multiple entries in
+      a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
       'mean' the default. 'sqrtn' often achieves good accuracy, in particular
       with bag-of-words columns. Each of this can be thought as example level
       normalizations on the column. For more information, see
@@ -830,14 +834,14 @@ def embedding_column(
       `1/sqrt(dimension)`.
     ckpt_to_load_from: String representing checkpoint name/pattern from which to
       restore column weights. Required if `tensor_name_in_ckpt` is not `None`.
-    tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from
-      which to restore the column weights. Required if `ckpt_to_load_from` is
-      not `None`.
+    tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from which
+      to restore the column weights. Required if `ckpt_to_load_from` is not
+      `None`.
     max_norm: If not `None`, embedding values are l2-normalized to this value.
     trainable: Whether or not the embedding is trainable. Default is True.
 
   Returns:
-    `_DenseColumn` that converts from sparse input.
+    `DenseColumn` that converts from sparse input.
 
   Raises:
     ValueError: if `dimension` not > 0.
@@ -1181,7 +1185,7 @@ def bucketized_column(source_column, boundaries):
       one-dimensional.
     ValueError: If `boundaries` is not a sorted list or tuple.
   """
-  if not isinstance(source_column, NumericColumn):
+  if not isinstance(source_column, (NumericColumn, fc_old._NumericColumn)):  # pylint: disable=protected-access
     raise ValueError(
         'source_column must be a column generated with numeric_column(). '
         'Given: {}'.format(source_column))
@@ -1390,7 +1394,7 @@ def categorical_column_with_vocabulary_file(key,
 
 def categorical_column_with_vocabulary_list(
     key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0):
-  """A `_CategoricalColumn` with in-memory vocabulary.
+  """A `CategoricalColumn` with in-memory vocabulary.
 
   Use this when your inputs are in string or integer format, and you have an
   in-memory vocabulary mapping each value to an integer ID. By default,
@@ -1439,14 +1443,14 @@ def categorical_column_with_vocabulary_list(
   ```
 
   Args:
-    key: A unique string identifying the input feature. It is used as the
-      column name and the dictionary key for feature parsing configs, feature
-      `Tensor` objects, and feature columns.
+    key: A unique string identifying the input feature. It is used as the column
+      name and the dictionary key for feature parsing configs, feature `Tensor`
+      objects, and feature columns.
     vocabulary_list: An ordered iterable defining the vocabulary. Each feature
       is mapped to the index of its value (if present) in `vocabulary_list`.
       Must be castable to `dtype`.
-    dtype: The type of features. Only string and integer types are supported.
-      If `None`, it will be inferred from `vocabulary_list`.
+    dtype: The type of features. Only string and integer types are supported. If
+      `None`, it will be inferred from `vocabulary_list`.
     default_value: The integer ID value to return for out-of-vocabulary feature
       values, defaults to `-1`. This can not be specified with a positive
       `num_oov_buckets`.
@@ -1604,7 +1608,7 @@ def indicator_column(categorical_column):
 
 def weighted_categorical_column(
     categorical_column, weight_feature_key, dtype=dtypes.float32):
-  """Applies weight values to a `_CategoricalColumn`.
+  """Applies weight values to a `CategoricalColumn`.
 
   Use this when each of your sparse inputs has both an ID and a value. For
   example, if you're representing text documents as a collection of word
@@ -1655,7 +1659,7 @@ def weighted_categorical_column(
   the same indices and dense shape.
 
   Args:
-    categorical_column: A `_CategoricalColumn` created by
+    categorical_column: A `CategoricalColumn` created by
       `categorical_column_with_*` functions.
     weight_feature_key: String key for weight values.
     dtype: Type of weights, such as `tf.float32`. Only float and integer weights
@@ -1788,12 +1792,13 @@ def crossed_column(keys, hash_bucket_size, hash_key=None):
         'keys must be a list with length > 1. Given: {}'.format(keys))
   for key in keys:
     if (not isinstance(key, six.string_types) and
-        not isinstance(key, CategoricalColumn)):
+        not isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn))):  # pylint: disable=protected-access
       raise ValueError(
           'Unsupported key type. All keys must be either string, or '
           'categorical column except HashedCategoricalColumn. '
           'Given: {}'.format(key))
-    if isinstance(key, HashedCategoricalColumn):
+    if isinstance(key,
+                  (HashedCategoricalColumn, fc_old._HashedCategoricalColumn)):  # pylint: disable=protected-access
       raise ValueError(
           'categorical_column_with_hash_bucket is not supported for crossing. '
           'Hashing before crossing will increase probability of collision. '
@@ -1882,6 +1887,16 @@ class FeatureColumn(object):
     """
     pass
 
+  @abc.abstractproperty
+  def _is_v2_column(self):
+    """Returns whether this FeatureColumn is fully conformant to the new API.
+
+    This is needed for composition type cases where an EmbeddingColumn etc.
+    might take in old categorical columns as input and then we want to use the
+    old API.
+    """
+    pass
+
 
 class DenseColumn(FeatureColumn):
   """Represents a column which can be represented as `Tensor`.
@@ -1927,6 +1942,8 @@ def is_feature_column_v2(feature_columns):
   for feature_column in feature_columns:
     if not isinstance(feature_column, FeatureColumn):
       return False
+    if not feature_column._is_v2_column:  # pylint: disable=protected-access
+      return False
   return True
 
 
@@ -2201,19 +2218,6 @@ class FeatureTransformationCache(object):
           lambda: feature_tensor)
 
 
-# TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py
-def _shape_offsets(shape):
-  """Returns moving offset for each dimension given shape."""
-  offsets = []
-  for dim in reversed(shape):
-    if offsets:
-      offsets.append(dim * offsets[-1])
-    else:
-      offsets.append(dim)
-  offsets.reverse()
-  return offsets
-
-
 # TODO(ptucker): Move to third_party/tensorflow/python/ops/sparse_ops.py
 def _to_sparse_input_and_drop_ignore_values(input_tensor, ignore_value=None):
   """Converts a `Tensor` to a `SparseTensor`, dropping ignore_value cells.
@@ -2306,11 +2310,16 @@ def _normalize_feature_columns(feature_columns):
 
 class NumericColumn(
     DenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'NumericColumn',
         ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))):
   """see `numeric_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2325,6 +2334,27 @@ class NumericColumn(
                                         self.default_value)
     }
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
+  def _transform_input_tensor(self, input_tensor):
+    if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
+      raise ValueError(
+          'The corresponding Tensor of numerical column must be a Tensor. '
+          'SparseTensor is not supported. key: {}'.format(self.key))
+    if self.normalizer_fn is not None:
+      input_tensor = self.normalizer_fn(input_tensor)
+    return math_ops.to_float(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = inputs.get(self.key)
+    return self._transform_input_tensor(input_tensor)
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class.
 
@@ -2342,19 +2372,19 @@ class NumericColumn(
       ValueError: If a SparseTensor is passed in.
     """
     input_tensor = transformation_cache.get(self.key, state_manager)
-    if isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
-      raise ValueError(
-          'The corresponding Tensor of numerical column must be a Tensor. '
-          'SparseTensor is not supported. key: {}'.format(self.key))
-    if self.normalizer_fn is not None:
-      input_tensor = self.normalizer_fn(input_tensor)
-    return math_ops.to_float(input_tensor)
+    return self._transform_input_tensor(input_tensor)
 
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.TensorShape(self.shape)
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns dense `Tensor` representing numeric feature.
 
@@ -2371,12 +2401,28 @@ class NumericColumn(
     # representation created by _transform_feature.
     return transformation_cache.get(self, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    return inputs.get(self)
+
 
-class BucketizedColumn(DenseColumn, CategoricalColumn,
-                       collections.namedtuple('BucketizedColumn',
-                                              ('source_column', 'boundaries'))):
+class BucketizedColumn(
+    DenseColumn,
+    CategoricalColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
+    collections.namedtuple('BucketizedColumn',
+                           ('source_column', 'boundaries'))):
   """See `bucketized_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.source_column, FeatureColumn) and
+            self.source_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2387,6 +2433,21 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
     """See `FeatureColumn` base class."""
     return self.source_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.source_column._parse_example_spec  # pylint: disable=protected-access
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Returns bucketized categorical `source_column` tensor."""
+    source_tensor = inputs.get(self.source_column)
+    return math_ops._bucketize(  # pylint: disable=protected-access
+        source_tensor,
+        boundaries=self.boundaries)
+
   def transform_feature(self, transformation_cache, state_manager):
     """Returns bucketized categorical `source_column` tensor."""
     source_tensor = transformation_cache.get(self.source_column, state_manager)
@@ -2400,24 +2461,45 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
     return tensor_shape.TensorShape(
         tuple(self.source_column.shape) + (len(self.boundaries) + 1,))
 
-  def get_dense_tensor(self, transformation_cache, state_manager):
-    """Returns one hot encoded dense `Tensor`."""
-    input_tensor = transformation_cache.get(self, state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
+  def _get_dense_tensor_for_input_tensor(self, input_tensor):
     return array_ops.one_hot(
         indices=math_ops.to_int64(input_tensor),
         depth=len(self.boundaries) + 1,
         on_value=1.,
         off_value=0.)
 
+  def get_dense_tensor(self, transformation_cache, state_manager):
+    """Returns one hot encoded dense `Tensor`."""
+    input_tensor = transformation_cache.get(self, state_manager)
+    return self._get_dense_tensor_for_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    input_tensor = inputs.get(self)
+    return self._get_dense_tensor_for_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """See `CategoricalColumn` base class."""
     # By construction, source_column is always one-dimensional.
     return (len(self.boundaries) + 1) * self.source_column.shape[0]
 
-  def get_sparse_tensors(self, transformation_cache, state_manager):
-    """Converts dense inputs to SparseTensor so downstream code can use it."""
-    input_tensor = transformation_cache.get(self, state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
+  def _get_sparse_tensors_for_input_tensor(self, input_tensor):
     batch_size = array_ops.shape(input_tensor)[0]
     # By construction, source_column is always one-dimensional.
     source_dimension = self.source_column.shape[0]
@@ -2443,15 +2525,38 @@ class BucketizedColumn(DenseColumn, CategoricalColumn,
         dense_shape=dense_shape)
     return CategoricalColumn.IdWeightPair(sparse_tensor, None)
 
+  def get_sparse_tensors(self, transformation_cache, state_manager):
+    """Converts dense inputs to SparseTensor so downstream code can use it."""
+    input_tensor = transformation_cache.get(self, state_manager)
+    return self._get_sparse_tensors_for_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    """Converts dense inputs to SparseTensor so downstream code can use it."""
+    del weight_collections
+    del trainable
+    input_tensor = inputs.get(self)
+    return self._get_sparse_tensors_for_input_tensor(input_tensor)
+
 
 class EmbeddingColumn(
-    DenseColumn, SequenceDenseColumn,
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'EmbeddingColumn',
         ('categorical_column', 'dimension', 'combiner', 'initializer',
          'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
   """See `embedding_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2462,18 +2567,35 @@ class EmbeddingColumn(
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   def transform_feature(self, transformation_cache, state_manager):
     """Transforms underlying `categorical_column`."""
     return transformation_cache.get(self.categorical_column, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    return inputs.get(self.categorical_column)
+
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.vector(self.dimension)
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return self.variable_shape
+
   def create_state(self, state_manager):
     """Creates the embedding lookup variable."""
-    embedding_shape = (self.categorical_column.num_buckets, self.dimension)
+    embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
     state_manager.create_variable(
         self,
         name='embedding_weights',
@@ -2482,17 +2604,11 @@ class EmbeddingColumn(
         trainable=self.trainable,
         initializer=self.initializer)
 
-  def _get_dense_tensor_internal(self, transformation_cache, state_manager):
-    """Private method that follows the signature of _get_dense_tensor."""
-    # Get sparse IDs and weights.
-    sparse_tensors = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
+  def _get_dense_tensor_internal_helper(self, sparse_tensors,
+                                        embedding_weights):
     sparse_ids = sparse_tensors.id_tensor
     sparse_weights = sparse_tensors.weight_tensor
 
-    embedding_weights = state_manager.get_variable(
-        self, name='embedding_weights')
-
     if self.ckpt_to_load_from is not None:
       to_restore = embedding_weights
       if isinstance(to_restore, variables.PartitionedVariable):
@@ -2510,6 +2626,30 @@ class EmbeddingColumn(
         name='%s_weights' % self.name,
         max_norm=self.max_norm)
 
+  def _get_dense_tensor_internal(self, sparse_tensors, state_manager):
+    """Private method that follows the signature of get_dense_tensor."""
+    embedding_weights = state_manager.get_variable(
+        self, name='embedding_weights')
+    return self._get_dense_tensor_internal_helper(sparse_tensors,
+                                                  embedding_weights)
+
+  def _old_get_dense_tensor_internal(self, sparse_tensors, weight_collections,
+                                     trainable):
+    """Private method that follows the signature of _get_dense_tensor."""
+    embedding_shape = (self.categorical_column._num_buckets, self.dimension)  # pylint: disable=protected-access
+    if (weight_collections and
+        ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections):
+      weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
+    embedding_weights = variable_scope.get_variable(
+        name='embedding_weights',
+        shape=embedding_shape,
+        dtype=dtypes.float32,
+        initializer=self.initializer,
+        trainable=self.trainable and trainable,
+        collections=weight_collections)
+    return self._get_dense_tensor_internal_helper(sparse_tensors,
+                                                  embedding_weights)
+
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns tensor after doing the embedding lookup.
 
@@ -2535,7 +2675,30 @@ class EmbeddingColumn(
           'sequence_input_layer instead of input_layer. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    return self._get_dense_tensor_internal(transformation_cache, state_manager)
+    # Get sparse IDs and weights.
+    sparse_tensors = self.categorical_column.get_sparse_tensors(
+        transformation_cache, state_manager)
+    return self._get_dense_tensor_internal(sparse_tensors, state_manager)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In embedding_column: {}. '
+          'categorical_column must not be of type _SequenceCategoricalColumn. '
+          'Suggested fix A: If you wish to use input_layer, use a '
+          'non-sequence categorical_column_with_*. '
+          'Suggested fix B: If you wish to create sequence input, use '
+          'sequence_input_layer instead of input_layer. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    sparse_tensors = self.categorical_column._get_sparse_tensors(  # pylint: disable=protected-access
+        inputs, weight_collections, trainable)
+    return self._old_get_dense_tensor_internal(sparse_tensors,
+                                               weight_collections, trainable)
 
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
@@ -2547,21 +2710,40 @@ class EmbeddingColumn(
           'Suggested fix: Use one of sequence_categorical_column_with_*. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    dense_tensor = self._get_dense_tensor_internal(  # pylint: disable=protected-access
+    sparse_tensors = self.categorical_column.get_sequence_sparse_tensors(
         transformation_cache, state_manager)
-    sparse_tensors = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    dense_tensor = self._get_dense_tensor_internal(sparse_tensors,
+                                                   state_manager)
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
 
-
-def _get_graph_for_variable(var):
-  if isinstance(var, variables.PartitionedVariable):
-    return list(var)[0].graph
-  else:
-    return var.graph
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    if not isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In embedding_column: {}. '
+          'categorical_column must be of type _SequenceCategoricalColumn '
+          'to use sequence_input_layer. '
+          'Suggested fix: Use one of sequence_categorical_column_with_*. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    dense_tensor = self._old_get_dense_tensor_internal(
+        sparse_tensors,
+        weight_collections=weight_collections,
+        trainable=trainable)
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+        sparse_tensors.id_tensor)
+    return SequenceDenseColumn.TensorSequenceLengthPair(
+        dense_tensor=dense_tensor, sequence_length=sequence_length)
 
 
 class SharedEmbeddingStateManager(Layer):
@@ -2633,8 +2815,17 @@ def maybe_create_shared_state_manager(feature_columns):
   return None
 
 
+def _raise_shared_embedding_column_error():
+  raise ValueError('SharedEmbeddingColumns are not supported in '
+                   '`linear_model` or `input_layer`. Please use '
+                   '`FeatureLayer` or `LinearModel` instead.')
+
+
 class SharedEmbeddingColumn(
-    DenseColumn, SequenceDenseColumn,
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'SharedEmbeddingColumn',
         ('categorical_column', 'dimension', 'combiner', 'initializer',
@@ -2642,6 +2833,10 @@ class SharedEmbeddingColumn(
          'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
   """See `embedding_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2662,15 +2857,26 @@ class SharedEmbeddingColumn(
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  def _parse_example_spec(self):
+    return _raise_shared_embedding_column_error()
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class."""
     return transformation_cache.get(self.categorical_column, state_manager)
 
+  def _transform_feature(self, inputs):
+    return _raise_shared_embedding_column_error()
+
   @property
   def variable_shape(self):
     """See `DenseColumn` base class."""
     return tensor_shape.vector(self.dimension)
 
+  @property
+  def _variable_shape(self):
+    return _raise_shared_embedding_column_error()
+
   def create_state(self, state_manager):
     """Creates the shared embedding lookup variable."""
     if not isinstance(state_manager, SharedEmbeddingStateManager):
@@ -2731,6 +2937,9 @@ class SharedEmbeddingColumn(
                                        self.categorical_column))
     return self._get_dense_tensor_internal(transformation_cache, state_manager)
 
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    return _raise_shared_embedding_column_error()
+
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
     if not isinstance(self.categorical_column, SequenceCategoricalColumn):
@@ -2745,11 +2954,17 @@ class SharedEmbeddingColumn(
                                                   state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
 
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    return _raise_shared_embedding_column_error()
+
 
 def _create_tuple(shape, value):
   """Returns a tuple with given shape and filled with value."""
@@ -2858,10 +3073,15 @@ def _check_default_value(shape, default_value, dtype, key):
 
 class HashedCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('HashedCategoricalColumn',
                            ('key', 'hash_bucket_size', 'dtype'))):
   """see `categorical_column_with_hash_bucket`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2872,10 +3092,14 @@ class HashedCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
+  def _transform_input_tensor(self, input_tensor):
     """Hashes the values in the feature_column."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
     if not isinstance(input_tensor, sparse_tensor_lib.SparseTensor):
       raise ValueError('SparseColumn input must be a SparseTensor.')
 
@@ -2899,24 +3123,55 @@ class HashedCategoricalColumn(
     return sparse_tensor_lib.SparseTensor(
         input_tensor.indices, sparse_id_values, input_tensor.dense_shape)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Hashes the values in the feature_column."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.hash_bucket_size
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class VocabularyFileCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('VocabularyFileCategoricalColumn',
                            ('key', 'vocabulary_file', 'vocabulary_size',
                             'num_oov_buckets', 'dtype', 'default_value'))):
   """See `categorical_column_with_vocabulary_file`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2927,11 +3182,14 @@ class VocabularyFileCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Creates a lookup table for the vocabulary."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Creates a lookup table for the vocabulary."""
     if self.dtype.is_integer != input_tensor.dtype.is_integer:
       raise ValueError(
           'Column dtype and SparseTensors dtype must be compatible. '
@@ -2957,25 +3215,56 @@ class VocabularyFileCategoricalColumn(
         key_dtype=key_dtype,
         name='{}_lookup'.format(self.key)).lookup(input_tensor)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Creates a lookup table for the vocabulary."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.vocabulary_size + self.num_oov_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class VocabularyListCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'VocabularyListCategoricalColumn',
         ('key', 'vocabulary_list', 'dtype', 'default_value', 'num_oov_buckets'))
 ):
   """See `categorical_column_with_vocabulary_list`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -2986,11 +3275,14 @@ class VocabularyListCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(self.dtype)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Creates a lookup table for the vocabulary list."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Creates a lookup table for the vocabulary list."""
     if self.dtype.is_integer != input_tensor.dtype.is_integer:
       raise ValueError(
           'Column dtype and SparseTensors dtype must be compatible. '
@@ -3015,24 +3307,55 @@ class VocabularyListCategoricalColumn(
         dtype=key_dtype,
         name='{}_lookup'.format(self.key)).lookup(input_tensor)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Creates a lookup table for the vocabulary list."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return len(self.vocabulary_list) + self.num_oov_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class IdentityCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('IdentityCategoricalColumn',
                            ('key', 'number_buckets', 'default_value'))):
 
   """See `categorical_column_with_identity`."""
 
+  @property
+  def _is_v2_column(self):
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3043,11 +3366,14 @@ class IdentityCategoricalColumn(
     """See `FeatureColumn` base class."""
     return {self.key: parsing_ops.VarLenFeature(dtypes.int64)}
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Returns a SparseTensor with identity values."""
-    input_tensor = _to_sparse_input_and_drop_ignore_values(
-        transformation_cache.get(self.key, state_manager))
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
 
+  def _transform_input_tensor(self, input_tensor):
+    """Returns a SparseTensor with identity values."""
     if not input_tensor.dtype.is_integer:
       raise ValueError(
           'Invalid input, not integer. key: {} dtype: {}'.format(
@@ -3082,24 +3408,56 @@ class IdentityCategoricalColumn(
         values=values,
         dense_shape=input_tensor.dense_shape)
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Returns a SparseTensor with identity values."""
+    input_tensor = _to_sparse_input_and_drop_ignore_values(
+        transformation_cache.get(self.key, state_manager))
+    return self._transform_input_tensor(input_tensor)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    input_tensor = _to_sparse_input_and_drop_ignore_values(inputs.get(self.key))
+    return self._transform_input_tensor(input_tensor)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.number_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 class WeightedCategoricalColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple(
         'WeightedCategoricalColumn',
         ('categorical_column', 'weight_feature_key', 'dtype'))):
   """See `weighted_categorical_column`."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3116,15 +3474,29 @@ class WeightedCategoricalColumn(
     config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype)
     return config
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    config = self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+    if self.weight_feature_key in config:
+      raise ValueError('Parse config {} already exists for {}.'.format(
+          config[self.weight_feature_key], self.weight_feature_key))
+    config[self.weight_feature_key] = parsing_ops.VarLenFeature(self.dtype)
+    return config
+
   @property
   def num_buckets(self):
     """See `DenseColumn` base class."""
     return self.categorical_column.num_buckets
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Applies weights to tensor generated from `categorical_column`'."""
-    weight_tensor = transformation_cache.get(self.weight_feature_key,
-                                             state_manager)
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.categorical_column._num_buckets  # pylint: disable=protected-access
+
+  def _transform_weight_tensor(self, weight_tensor):
     if weight_tensor is None:
       raise ValueError('Missing weights {}.'.format(self.weight_feature_key))
     weight_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
@@ -3138,27 +3510,63 @@ class WeightedCategoricalColumn(
           weight_tensor, ignore_value=0.0)
     if not weight_tensor.dtype.is_floating:
       weight_tensor = math_ops.to_float(weight_tensor)
+    return weight_tensor
+
+  def transform_feature(self, transformation_cache, state_manager):
+    """Applies weights to tensor generated from `categorical_column`'."""
+    weight_tensor = transformation_cache.get(self.weight_feature_key,
+                                             state_manager)
+    weight_tensor = self._transform_weight_tensor(weight_tensor)
     return (transformation_cache.get(self.categorical_column, state_manager),
             weight_tensor)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Applies weights to tensor generated from `categorical_column`'."""
+    weight_tensor = inputs.get(self.weight_feature_key)
+    weight_tensor = self._transform_weight_tensor(weight_tensor)
+    return (inputs.get(self.categorical_column), weight_tensor)
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     tensors = transformation_cache.get(self, state_manager)
     return CategoricalColumn.IdWeightPair(tensors[0], tensors[1])
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    del weight_collections
+    del trainable
+    tensors = inputs.get(self)
+    return CategoricalColumn.IdWeightPair(tensors[0], tensors[1])
+
 
 class CrossedColumn(
     CategoricalColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('CrossedColumn',
                            ('keys', 'hash_bucket_size', 'hash_key'))):
   """See `crossed_column`."""
 
+  @property
+  def _is_v2_column(self):
+    for key in _collect_leaf_level_keys(self):
+      if isinstance(key, six.string_types):
+        continue
+      if not isinstance(key, FeatureColumn):
+        return False
+      if not key._is_v2_column:  # pylint: disable=protected-access
+        return False
+    return True
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
     feature_names = []
     for key in _collect_leaf_level_keys(self):
-      if isinstance(key, FeatureColumn):
+      if isinstance(key, (FeatureColumn, fc_old._FeatureColumn)):  # pylint: disable=protected-access
         feature_names.append(key.name)
       else:  # key must be a string
         feature_names.append(key)
@@ -3171,17 +3579,25 @@ class CrossedColumn(
     for key in self.keys:
       if isinstance(key, FeatureColumn):
         config.update(key.parse_example_spec)
+      elif isinstance(key, fc_old._FeatureColumn):  # pylint: disable=protected-access
+        config.update(key._parse_example_spec)  # pylint: disable=protected-access
       else:  # key must be a string
         config.update({key: parsing_ops.VarLenFeature(dtypes.string)})
     return config
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.parse_example_spec
+
   def transform_feature(self, transformation_cache, state_manager):
     """Generates a hashed sparse cross from the input tensors."""
     feature_tensors = []
     for key in _collect_leaf_level_keys(self):
       if isinstance(key, six.string_types):
         feature_tensors.append(transformation_cache.get(key, state_manager))
-      elif isinstance(key, CategoricalColumn):
+      elif isinstance(key, (fc_old._CategoricalColumn, CategoricalColumn)):  # pylint: disable=protected-access
         ids_and_weights = key.get_sparse_tensors(transformation_cache,
                                                  state_manager)
         if ids_and_weights.weight_tensor is not None:
@@ -3197,16 +3613,54 @@ class CrossedColumn(
         num_buckets=self.hash_bucket_size,
         hash_key=self.hash_key)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    """Generates a hashed sparse cross from the input tensors."""
+    feature_tensors = []
+    for key in _collect_leaf_level_keys(self):
+      if isinstance(key, six.string_types):
+        feature_tensors.append(inputs.get(key))
+      elif isinstance(key, (CategoricalColumn, fc_old._CategoricalColumn)):  # pylint: disable=protected-access
+        ids_and_weights = key._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+        if ids_and_weights.weight_tensor is not None:
+          raise ValueError(
+              'crossed_column does not support weight_tensor, but the given '
+              'column populates weight_tensor. '
+              'Given column: {}'.format(key.name))
+        feature_tensors.append(ids_and_weights.id_tensor)
+      else:
+        raise ValueError('Unsupported column type. Given: {}'.format(key))
+    return sparse_ops.sparse_cross_hashed(
+        inputs=feature_tensors,
+        num_buckets=self.hash_bucket_size,
+        hash_key=self.hash_key)
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.hash_bucket_size
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.num_buckets
+
   def get_sparse_tensors(self, transformation_cache, state_manager):
     """See `CategoricalColumn` base class."""
     return CategoricalColumn.IdWeightPair(
         transformation_cache.get(self, state_manager), None)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    """See `CategoricalColumn` base class."""
+    del weight_collections
+    del trainable
+    return CategoricalColumn.IdWeightPair(inputs.get(self), None)
+
 
 def _collect_leaf_level_keys(cross):
   """Collects base keys by expanding all nested crosses.
@@ -3382,9 +3836,12 @@ def _prune_invalid_weights(sparse_ids, sparse_weights):
   return sparse_ids, sparse_weights
 
 
-class IndicatorColumn(DenseColumn, SequenceDenseColumn,
-                      collections.namedtuple('IndicatorColumn',
-                                             ('categorical_column'))):
+class IndicatorColumn(
+    DenseColumn,
+    SequenceDenseColumn,
+    fc_old._DenseColumn,  # pylint: disable=protected-access
+    fc_old._SequenceDenseColumn,  # pylint: disable=protected-access
+    collections.namedtuple('IndicatorColumn', ('categorical_column'))):
   """Represents a one-hot column for use in deep networks.
 
   Args:
@@ -3392,28 +3849,17 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
       `categorical_column_with_*` function.
   """
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
     return '{}_indicator'.format(self.categorical_column.name)
 
-  def transform_feature(self, transformation_cache, state_manager):
-    """Returns dense `Tensor` representing feature.
-
-    Args:
-      transformation_cache: A `FeatureTransformationCache` object to access
-        features.
-      state_manager: A `StateManager` to create / access resources such as
-        lookup tables.
-
-    Returns:
-      Transformed feature `Tensor`.
-
-    Raises:
-      ValueError: if input rank is not known at graph building time.
-    """
-    id_weight_pair = self.categorical_column.get_sparse_tensors(
-        transformation_cache, state_manager)
+  def _transform_id_weight_pair(self, id_weight_pair):
     id_tensor = id_weight_pair.id_tensor
     weight_tensor = id_weight_pair.weight_tensor
 
@@ -3422,7 +3868,7 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
       weighted_column = sparse_ops.sparse_merge(
           sp_ids=id_tensor,
           sp_values=weight_tensor,
-          vocab_size=int(self.variable_shape[-1]))
+          vocab_size=int(self._variable_shape[-1]))
       # Remove (?, -1) index
       weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0],
                                                 weighted_column.dense_shape)
@@ -3435,22 +3881,62 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     # input_layer are float32.
     one_hot_id_tensor = array_ops.one_hot(
         dense_id_tensor,
-        depth=self.variable_shape[-1],
+        depth=self._variable_shape[-1],
         on_value=1.0,
         off_value=0.0)
 
     # Reduce to get a multi-hot per example.
     return math_ops.reduce_sum(one_hot_id_tensor, axis=[-2])
 
+  def transform_feature(self, transformation_cache, state_manager):
+    """Returns dense `Tensor` representing feature.
+
+    Args:
+      transformation_cache: A `FeatureTransformationCache` object to access
+        features.
+      state_manager: A `StateManager` to create / access resources such as
+        lookup tables.
+
+    Returns:
+      Transformed feature `Tensor`.
+
+    Raises:
+      ValueError: if input rank is not known at graph building time.
+    """
+    id_weight_pair = self.categorical_column.get_sparse_tensors(
+        transformation_cache, state_manager)
+    return self._transform_id_weight_pair(id_weight_pair)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    id_weight_pair = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    return self._transform_id_weight_pair(id_weight_pair)
+
   @property
   def parse_example_spec(self):
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   @property
   def variable_shape(self):
     """Returns a `TensorShape` representing the shape of the dense `Tensor`."""
-    return tensor_shape.TensorShape([1, self.categorical_column.num_buckets])
+    if isinstance(self.categorical_column, FeatureColumn):
+      return tensor_shape.TensorShape([1, self.categorical_column.num_buckets])
+    else:
+      return tensor_shape.TensorShape([1, self.categorical_column._num_buckets])  # pylint: disable=protected-access
+
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _variable_shape(self):
+    return tensor_shape.TensorShape([1, self.categorical_column._num_buckets])  # pylint: disable=protected-access
 
   def get_dense_tensor(self, transformation_cache, state_manager):
     """Returns dense `Tensor` representing feature.
@@ -3481,6 +3967,27 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     # representation created by transform_feature.
     return transformation_cache.get(self, state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    del weight_collections
+    del trainable
+    if isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In indicator_column: {}. '
+          'categorical_column must not be of type _SequenceCategoricalColumn. '
+          'Suggested fix A: If you wish to use input_layer, use a '
+          'non-sequence categorical_column_with_*. '
+          'Suggested fix B: If you wish to create sequence input, use '
+          'sequence_input_layer instead of input_layer. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    # Feature has been already transformed. Return the intermediate
+    # representation created by transform_feature.
+    return inputs.get(self)
+
   def get_sequence_dense_tensor(self, transformation_cache, state_manager):
     """See `SequenceDenseColumn` base class."""
     if not isinstance(self.categorical_column, SequenceCategoricalColumn):
@@ -3496,7 +4003,36 @@ class IndicatorColumn(DenseColumn, SequenceDenseColumn,
     dense_tensor = transformation_cache.get(self, state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    sequence_length = _sequence_length_from_sparse_tensor(
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
+        sparse_tensors.id_tensor)
+    return SequenceDenseColumn.TensorSequenceLengthPair(
+        dense_tensor=dense_tensor, sequence_length=sequence_length)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sequence_dense_tensor(self,
+                                 inputs,
+                                 weight_collections=None,
+                                 trainable=None):
+    # Do nothing with weight_collections and trainable since no variables are
+    # created in this function.
+    del weight_collections
+    del trainable
+    if not isinstance(
+        self.categorical_column,
+        (SequenceCategoricalColumn, fc_old._SequenceCategoricalColumn)):  # pylint: disable=protected-access
+      raise ValueError(
+          'In indicator_column: {}. '
+          'categorical_column must be of type _SequenceCategoricalColumn '
+          'to use sequence_input_layer. '
+          'Suggested fix: Use one of sequence_categorical_column_with_*. '
+          'Given (type {}): {}'.format(self.name, type(self.categorical_column),
+                                       self.categorical_column))
+    # Feature has been already transformed. Return the intermediate
+    # representation created by _transform_feature.
+    dense_tensor = inputs.get(self)
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
         sparse_tensors.id_tensor)
     return SequenceDenseColumn.TensorSequenceLengthPair(
         dense_tensor=dense_tensor, sequence_length=sequence_length)
@@ -3518,27 +4054,18 @@ def _verify_static_batch_size_equality(tensors, columns):
                 expected_batch_size, tensors[i].shape[0]))
 
 
-def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
-  """Returns a [batch_size] Tensor with per-example sequence length."""
-  with ops.name_scope(None, 'sequence_length') as name_scope:
-    row_ids = sp_tensor.indices[:, 0]
-    column_ids = sp_tensor.indices[:, 1]
-    column_ids += array_ops.ones_like(column_ids)
-    seq_length = math_ops.to_int64(
-        math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements)
-    # If the last n rows do not have ids, seq_length will have shape
-    # [batch_size - n]. Pad the remaining values with zeros.
-    n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
-    padding = array_ops.zeros(n_pad, dtype=seq_length.dtype)
-    return array_ops.concat([seq_length, padding], axis=0, name=name_scope)
-
-
-class SequenceCategoricalColumn(FeatureColumn,
-                                collections.namedtuple(
-                                    'SequenceCategoricalColumn',
-                                    ('categorical_column'))):
+class SequenceCategoricalColumn(
+    FeatureColumn,
+    fc_old._CategoricalColumn,  # pylint: disable=protected-access
+    collections.namedtuple('SequenceCategoricalColumn',
+                           ('categorical_column'))):
   """Represents sequences of categorical data."""
 
+  @property
+  def _is_v2_column(self):
+    return (isinstance(self.categorical_column, FeatureColumn) and
+            self.categorical_column._is_v2_column)  # pylint: disable=protected-access
+
   @property
   def name(self):
     """See `FeatureColumn` base class."""
@@ -3549,16 +4076,46 @@ class SequenceCategoricalColumn(FeatureColumn,
     """See `FeatureColumn` base class."""
     return self.categorical_column.parse_example_spec
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _parse_example_spec(self):
+    return self.categorical_column._parse_example_spec  # pylint: disable=protected-access
+
   def transform_feature(self, transformation_cache, state_manager):
     """See `FeatureColumn` base class."""
     return self.categorical_column.transform_feature(transformation_cache,
                                                      state_manager)
 
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _transform_feature(self, inputs):
+    return self.categorical_column._transform_feature(inputs)  # pylint: disable=protected-access
+
   @property
   def num_buckets(self):
     """Returns number of buckets in this sparse feature."""
     return self.categorical_column.num_buckets
 
+  @property
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _num_buckets(self):
+    return self.categorical_column._num_buckets  # pylint: disable=protected-access
+
+  def _get_sparse_tensors_helper(self, sparse_tensors):
+    id_tensor = sparse_tensors.id_tensor
+    weight_tensor = sparse_tensors.weight_tensor
+    # Expands third dimension, if necessary so that embeddings are not
+    # combined during embedding lookup. If the tensor is already 3D, leave
+    # as-is.
+    shape = array_ops.shape(id_tensor)
+    target_shape = [shape[0], shape[1], -1]
+    id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
+    if weight_tensor is not None:
+      weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
+    return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
+
   def get_sequence_sparse_tensors(self, transformation_cache, state_manager):
     """Returns an IdWeightPair.
 
@@ -3580,27 +4137,11 @@ class SequenceCategoricalColumn(FeatureColumn,
     """
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
-    id_tensor = sparse_tensors.id_tensor
-    weight_tensor = sparse_tensors.weight_tensor
-    # Expands final dimension, so that embeddings are not combined during
-    # embedding lookup.
-    check_id_rank = check_ops.assert_equal(
-        array_ops.rank(id_tensor), 2,
-        data=[
-            'Column {} expected ID tensor of rank 2. '.format(self.name),
-            'id_tensor shape: ', array_ops.shape(id_tensor)])
-    with ops.control_dependencies([check_id_rank]):
-      id_tensor = sparse_ops.sparse_reshape(
-          id_tensor,
-          shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0))
-    if weight_tensor is not None:
-      check_weight_rank = check_ops.assert_equal(
-          array_ops.rank(weight_tensor), 2,
-          data=[
-              'Column {} expected weight tensor of rank 2.'.format(self.name),
-              'weight_tensor shape:', array_ops.shape(weight_tensor)])
-      with ops.control_dependencies([check_weight_rank]):
-        weight_tensor = sparse_ops.sparse_reshape(
-            weight_tensor,
-            shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0))
-    return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
+    return self._get_sparse_tensors_helper(sparse_tensors)
+
+  @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE,
+                          _FEATURE_COLUMN_DEPRECATION)
+  def _get_sparse_tensors(self, inputs, weight_collections=None,
+                          trainable=None):
+    sparse_tensors = self.categorical_column._get_sparse_tensors(inputs)  # pylint: disable=protected-access
+    return self._get_sparse_tensors_helper(sparse_tensors)
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index d3787146ed..31bc0485ef 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -31,12 +31,8 @@ from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.feature_column import feature_column_v2 as fc
-from tensorflow.python.feature_column.feature_column_v2 import _transform_features
-from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn
-from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer
-from tensorflow.python.feature_column.feature_column_v2 import FeatureTransformationCache
-from tensorflow.python.feature_column.feature_column_v2 import StateManager
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -46,6 +42,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
@@ -60,15 +57,29 @@ def _initialized_session(config=None):
   return sess
 
 
+def get_linear_model_bias(name='linear_model'):
+  with variable_scope.variable_scope(name, reuse=True):
+    return variable_scope.get_variable('bias_weights')
+
+
+def get_linear_model_column_var(column, name='linear_model'):
+  return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                            name + '/' + column.name)[0]
+
+
 class LazyColumnTest(test.TestCase):
 
   def test_transformations_called_once(self):
 
-    class TransformCounter(FeatureColumn):
+    class TransformCounter(fc.FeatureColumn):
 
       def __init__(self):
         self.num_transform = 0
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'TransformCounter'
@@ -81,7 +92,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     column = TransformCounter()
     self.assertEqual(0, column.num_transform)
@@ -92,7 +103,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_returns_transform_output(self):
 
-    class Transformer(FeatureColumn):
+    class Transformer(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -105,7 +120,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     column = Transformer()
     self.assertEqual('Output', transformation_cache.get(column, None))
@@ -113,7 +128,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_does_not_pollute_given_features_dict(self):
 
-    class Transformer(FeatureColumn):
+    class Transformer(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -127,12 +146,12 @@ class LazyColumnTest(test.TestCase):
         pass
 
     features = {'a': [[2], [3.]]}
-    transformation_cache = FeatureTransformationCache(features=features)
+    transformation_cache = fc.FeatureTransformationCache(features=features)
     transformation_cache.get(Transformer(), None)
     self.assertEqual(['a'], list(features.keys()))
 
   def test_error_if_feature_is_not_found(self):
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(ValueError,
                                  'bbb is not in features dictionary'):
@@ -143,7 +162,11 @@ class LazyColumnTest(test.TestCase):
 
   def test_not_supported_feature_column(self):
 
-    class NotAProperColumn(FeatureColumn):
+    class NotAProperColumn(fc.FeatureColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
 
       @property
       def name(self):
@@ -157,7 +180,7 @@ class LazyColumnTest(test.TestCase):
       def parse_example_spec(self):
         pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(ValueError,
                                  'NotAProperColumn is not supported'):
@@ -168,7 +191,7 @@ class LazyColumnTest(test.TestCase):
     class NotAFeatureColumn(object):
       pass
 
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={'a': [[2], [3.]]})
     with self.assertRaisesRegexp(
         TypeError, '"key" must be either a "str" or "FeatureColumn".'):
@@ -176,7 +199,7 @@ class LazyColumnTest(test.TestCase):
 
   def test_expand_dim_rank_1_sparse_tensor_empty_batch(self):
     # empty 1-D sparse tensor:
-    transformation_cache = FeatureTransformationCache(
+    transformation_cache = fc.FeatureTransformationCache(
         features={
             'a':
                 sparse_tensor.SparseTensor(
@@ -201,6 +224,7 @@ class NumericColumnTest(test.TestCase):
     self.assertIsNone(a.default_value)
     self.assertEqual(dtypes.float32, a.dtype)
     self.assertIsNone(a.normalizer_fn)
+    self.assertTrue(a._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -317,7 +341,9 @@ class NumericColumnTest(test.TestCase):
       return input_tensor + 2.
 
     price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
-    output = _transform_features({'price': [[1., 2.], [5., 6.]]}, [price], None)
+    output = fc._transform_features({
+        'price': [[1., 2.], [5., 6.]]
+    }, [price], None)
     with self.cached_session():
       self.assertAllEqual([[3., 4.], [7., 8.]], output[price].eval())
 
@@ -327,7 +353,7 @@ class NumericColumnTest(test.TestCase):
       return input_tensor + 2.
 
     price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price': [[1., 2.], [5., 6.]]
     })
     self.assertEqual(
@@ -336,7 +362,7 @@ class NumericColumnTest(test.TestCase):
 
   def test_sparse_tensor_not_supported(self):
     price = fc.numeric_column('price')
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0]], values=[0.3], dense_shape=[1, 1])
@@ -370,6 +396,20 @@ class NumericColumnTest(test.TestCase):
         sess.run(price_var.assign([[10.]]))
         self.assertAllClose([[10.], [50.]], predictions.eval())
 
+  def test_old_linear_model(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.]], price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price_var.assign([[10.]]))
+        self.assertAllClose([[10.], [50.]], predictions.eval())
+
 
 class BucketizedColumnTest(test.TestCase):
 
@@ -404,6 +444,13 @@ class BucketizedColumnTest(test.TestCase):
   def test_name(self):
     a = fc.numeric_column('aaa', dtype=dtypes.int32)
     b = fc.bucketized_column(a, boundaries=[0, 1])
+    self.assertTrue(b._is_v2_column)
+    self.assertEqual('aaa_bucketized', b.name)
+
+  def test_is_v2_column_old_numeric(self):
+    a = fc_old.numeric_column('aaa', dtype=dtypes.int32)
+    b = fc.bucketized_column(a, boundaries=[0, 1])
+    self.assertFalse(b._is_v2_column)
     self.assertEqual('aaa_bucketized', b.name)
 
   def test_parse_spec(self):
@@ -445,7 +492,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformed_tensor = _transform_features({
+      transformed_tensor = fc._transform_features({
           'price': [[-1., 1.], [5., 6.]]
       }, [bucketized_price], None)
       with _initialized_session():
@@ -457,7 +504,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[1])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1.], [1.], [5.], [6.]]
       })
       with _initialized_session():
@@ -476,7 +523,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1., 1.], [5., 6.]]
       })
       with _initialized_session():
@@ -493,7 +540,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[1])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1.], [1.], [5.], [6.]]
       })
       with _initialized_session() as sess:
@@ -511,7 +558,7 @@ class BucketizedColumnTest(test.TestCase):
     price = fc.numeric_column('price', shape=[2])
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'price': [[-1., 1.], [5., 6.]]
       })
       with _initialized_session() as sess:
@@ -529,7 +576,7 @@ class BucketizedColumnTest(test.TestCase):
   def test_sparse_tensor_input_not_supported(self):
     price = fc.numeric_column('price')
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 1])
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'price':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0]], values=[0.3], dense_shape=[1, 1])
@@ -599,6 +646,85 @@ class BucketizedColumnTest(test.TestCase):
         sess.run(bias.assign([1.]))
         self.assertAllClose([[81.], [141.]], predictions.eval())
 
+  def test_old_linear_model_one_input_value(self):
+    """Tests linear_model() for input with shape=[1]."""
+    price = fc.numeric_column('price', shape=[1])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1.], [1.], [5.], [6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight variable per bucket, all initialized to zero.
+        self.assertAllClose([[0.], [0.], [0.], [0.], [0.]],
+                            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]]))
+        # price -1. is in the 0th bucket, whose weight is 10.
+        # price 1. is in the 1st bucket, whose weight is 20.
+        # price 5. is in the 3rd bucket, whose weight is 40.
+        # price 6. is in the 4th bucket, whose weight is 50.
+        self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval())
+
+  def test_old_linear_model_two_input_values(self):
+    """Tests linear_model() for input with shape=[2]."""
+    price = fc.numeric_column('price', shape=[2])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1., 1.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight per bucket per input column, all initialized to zero.
+        self.assertAllClose(
+            [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]],
+            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.],
+                                         [60.], [70.], [80.], [90.], [100.]]))
+        # 1st example:
+        #   price -1. is in the 0th bucket, whose weight is 10.
+        #   price 1. is in the 6th bucket, whose weight is 70.
+        # 2nd example:
+        #   price 5. is in the 3rd bucket, whose weight is 40.
+        #   price 6. is in the 9th bucket, whose weight is 100.
+        self.assertAllClose([[80.], [140.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[81.], [141.]], predictions.eval())
+
+  def test_old_linear_model_one_input_value_old_numeric(self):
+    """Tests linear_model() for input with shape=[1]."""
+    price = fc_old.numeric_column('price', shape=[1])
+    bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
+    with ops.Graph().as_default():
+      features = {'price': [[-1.], [1.], [5.], [6.]]}
+      predictions = fc_old.linear_model(features, [bucketized_price])
+      bias = get_linear_model_bias()
+      bucketized_price_var = get_linear_model_column_var(bucketized_price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        # One weight variable per bucket, all initialized to zero.
+        self.assertAllClose([[0.], [0.], [0.], [0.], [0.]],
+                            bucketized_price_var.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval())
+        sess.run(
+            bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]]))
+        # price -1. is in the 0th bucket, whose weight is 10.
+        # price 1. is in the 1st bucket, whose weight is 20.
+        # price 5. is in the 3rd bucket, whose weight is 40.
+        # price 6. is in the 4th bucket, whose weight is 50.
+        self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval())
+        sess.run(bias.assign([1.]))
+        self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval())
+
 
 class HashedCategoricalColumnTest(test.TestCase):
 
@@ -608,6 +734,7 @@ class HashedCategoricalColumnTest(test.TestCase):
     self.assertEqual('aaa', a.key)
     self.assertEqual(10, a.hash_bucket_size)
     self.assertEqual(dtypes.string, a.dtype)
+    self.assertTrue(a._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -675,7 +802,9 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=['omar', 'stringer', 'marlo'],
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    outputs = _transform_features({'wire': wire_tensor}, [hashed_sparse], None)
+    outputs = fc._transform_features({
+        'wire': wire_tensor
+    }, [hashed_sparse], None)
     output = outputs[hashed_sparse]
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [6, 4, 1]
@@ -705,7 +834,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=[101.],
         indices=[[0, 0]],
         dense_shape=[1, 1])
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'a_int': int_tensor,
         'a_string': string_tensor,
         'a_float': float_tensor
@@ -720,7 +849,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         'wire', 10, dtype=dtypes.int64)
     wire_tensor = sparse_tensor.SparseTensor(
         values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       transformation_cache.get(hashed_sparse, None)
 
@@ -731,7 +860,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=[101, 201, 301],
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     output = transformation_cache.get(hashed_sparse, None)
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [3, 7, 5]
@@ -745,7 +874,7 @@ class HashedCategoricalColumnTest(test.TestCase):
         values=constant_op.constant([101, 201, 301], dtype=dtypes.int32),
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
-    transformation_cache = FeatureTransformationCache({'wire': wire_tensor})
+    transformation_cache = fc.FeatureTransformationCache({'wire': wire_tensor})
     output = transformation_cache.get(hashed_sparse, None)
     # Check exact hashed output. If hashing changes this test will break.
     expected_values = [3, 7, 5]
@@ -754,7 +883,7 @@ class HashedCategoricalColumnTest(test.TestCase):
 
   def test_get_sparse_tensors(self):
     hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'wire':
             sparse_tensor.SparseTensor(
                 values=['omar', 'stringer', 'marlo'],
@@ -769,7 +898,7 @@ class HashedCategoricalColumnTest(test.TestCase):
 
   def test_get_sparse_tensors_dense_input(self):
     hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10)
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'wire': (('omar', ''), ('stringer', 'marlo'))
     })
     id_weight_pair = hashed_sparse.get_sparse_tensors(transformation_cache,
@@ -800,6 +929,28 @@ class HashedCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6
         self.assertAllClose(((4.,), (6.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_hash_bucket('wire', 4)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 3: wire_var[3] = 4
+        # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6
+        self.assertAllClose(((4.,), (6.,)), predictions.eval())
+
 
 class CrossedColumnTest(test.TestCase):
 
@@ -841,8 +992,20 @@ class CrossedColumnTest(test.TestCase):
     a = fc.numeric_column('a', dtype=dtypes.int32)
     b = fc.bucketized_column(a, boundaries=[0, 1])
     crossed1 = fc.crossed_column(['d1', 'd2'], 10)
+    self.assertTrue(crossed1._is_v2_column)
+
+    crossed2 = fc.crossed_column([b, 'c', crossed1], 10)
+    self.assertTrue(crossed2._is_v2_column)
+    self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name)
+
+  def test_is_v2_column(self):
+    a = fc_old.numeric_column('a', dtype=dtypes.int32)
+    b = fc.bucketized_column(a, boundaries=[0, 1])
+    crossed1 = fc.crossed_column(['d1', 'd2'], 10)
+    self.assertTrue(crossed1._is_v2_column)
 
     crossed2 = fc.crossed_column([b, 'c', crossed1], 10)
+    self.assertFalse(crossed2._is_v2_column)
     self.assertEqual('a_bucketized_X_c_X_d1_X_d2', crossed2.name)
 
   def test_name_ordered_alphabetically(self):
@@ -927,7 +1090,7 @@ class CrossedColumnTest(test.TestCase):
             indices=[[0, 0], [1, 0], [1, 1]],
             dense_shape=[2, 2]),
     }
-    outputs = _transform_features(features, [price_cross_wire], None)
+    outputs = fc._transform_features(features, [price_cross_wire], None)
     output = outputs[price_cross_wire]
     with self.cached_session() as sess:
       output_val = sess.run(output)
@@ -943,7 +1106,7 @@ class CrossedColumnTest(test.TestCase):
     crossed1 = fc.crossed_column(['d1', 'd2'], 10)
     crossed2 = fc.crossed_column([b, 'c', crossed1], 15, hash_key=5)
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'a':
               constant_op.constant(((-1., .5), (.5, 1.))),
           'c':
@@ -983,7 +1146,7 @@ class CrossedColumnTest(test.TestCase):
     b = fc.bucketized_column(a, boundaries=(0, 1))
     crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
     with ops.Graph().as_default():
-      transformation_cache = FeatureTransformationCache({
+      transformation_cache = fc.FeatureTransformationCache({
           'a':
               constant_op.constant(((-1., .5), (.5, 1.))),
           'c':
@@ -1040,6 +1203,10 @@ class CrossedColumnTest(test.TestCase):
     class _TestColumnWithWeights(fc.CategoricalColumn):
       """Produces sparse IDs and sparse weights."""
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'test_column'
@@ -1092,6 +1259,146 @@ class CrossedColumnTest(test.TestCase):
                     dense_shape=(2, 2)),
         })
 
+  def test_old_linear_model(self):
+    """Tests linear_model.
+
+    Uses data from test_get_sparse_tesnsors_simple.
+    """
+    a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,))
+    b = fc.bucketized_column(a, boundaries=(0, 1))
+    crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'a':
+              constant_op.constant(((-1., .5), (.5, 1.))),
+          'c':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=['cA', 'cB', 'cC'],
+                  dense_shape=(2, 2)),
+      }, (crossed,))
+      bias = get_linear_model_bias()
+      crossed_var = get_linear_model_column_var(crossed)
+      with _initialized_session() as sess:
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)),
+                            crossed_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
+        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
+        self.assertAllClose(((3.,), (14.,)), predictions.eval())
+        sess.run(bias.assign((.1,)))
+        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
+
+  def test_old_linear_model_with_weights(self):
+
+    class _TestColumnWithWeights(fc.CategoricalColumn,
+                                 fc_old._CategoricalColumn):
+      """Produces sparse IDs and sparse weights."""
+
+      @property
+      def _is_v2_column(self):
+        return True
+
+      @property
+      def name(self):
+        return 'test_column'
+
+      @property
+      def parse_example_spec(self):
+        return {
+            self.name:
+                parsing_ops.VarLenFeature(dtypes.int32),
+            '{}_weights'.format(self.name):
+                parsing_ops.VarLenFeature(dtypes.float32),
+        }
+
+      @property
+      def _parse_example_spec(self):
+        return self.parse_example_spec
+
+      @property
+      def num_buckets(self):
+        return 5
+
+      @property
+      def _num_buckets(self):
+        return self.num_buckets
+
+      def transform_feature(self, transformation_cache, state_manager):
+        raise ValueError('Should not be called.')
+
+      def _transform_feature(self, inputs):
+        return (inputs.get(self.name),
+                inputs.get('{}_weights'.format(self.name)))
+
+      def get_sparse_tensors(self, transformation_cache, state_manager):
+        raise ValueError('Should not be called.')
+
+      def _get_sparse_tensors(self,
+                              inputs,
+                              weight_collections=None,
+                              trainable=None):
+        """Populates both id_tensor and weight_tensor."""
+        ids_and_weights = inputs.get(self)
+        return fc.CategoricalColumn.IdWeightPair(
+            id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1])
+
+    t = _TestColumnWithWeights()
+    crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError,
+          'crossed_column does not support weight_tensor.*{}'.format(t.name)):
+        fc_old.linear_model({
+            t.name:
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[0, 1, 2],
+                    dense_shape=(2, 2)),
+            '{}_weights'.format(t.name):
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=[1., 10., 2.],
+                    dense_shape=(2, 2)),
+            'c':
+                sparse_tensor.SparseTensor(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=['cA', 'cB', 'cC'],
+                    dense_shape=(2, 2)),
+        }, (crossed,))
+
+  def test_old_linear_model_old_numeric(self):
+    """Tests linear_model.
+
+    Uses data from test_get_sparse_tesnsors_simple.
+    """
+    a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,))
+    b = fc.bucketized_column(a, boundaries=(0, 1))
+    crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'a':
+              constant_op.constant(((-1., .5), (.5, 1.))),
+          'c':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=['cA', 'cB', 'cC'],
+                  dense_shape=(2, 2)),
+      }, (crossed,))
+      bias = get_linear_model_bias()
+      crossed_var = get_linear_model_column_var(crossed)
+      with _initialized_session() as sess:
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)),
+                            crossed_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,))))
+        # Expected ids after cross = (1, 0, 1, 3, 4, 2)
+        self.assertAllClose(((3.,), (14.,)), predictions.eval())
+        sess.run(bias.assign((.1,)))
+        self.assertAllClose(((3.1,), (14.1,)), predictions.eval())
+
 
 class LinearModelTest(test.TestCase):
 
@@ -1108,6 +1415,10 @@ class LinearModelTest(test.TestCase):
 
     class NotSupportedColumn(fc.FeatureColumn):
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'NotSupportedColumn'
@@ -1189,6 +1500,10 @@ class LinearModelTest(test.TestCase):
 
     class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn):
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return 'dense_and_sparse_column'
@@ -1735,60 +2050,1519 @@ class LinearModelTest(test.TestCase):
         self.assertAllClose([[25.], [105.]], predictions2.eval())
 
 
-class FeatureLayerTest(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes()
-  def test_retrieving_input(self):
-    features = {'a': [0.]}
-    feature_layer = FeatureLayer(fc.numeric_column('a'))
-    inputs = self.evaluate(feature_layer(features))
-    self.assertAllClose([[0.]], inputs)
+class OldLinearModelTest(test.TestCase):
 
-  def test_reuses_variables(self):
-    with context.eager_mode():
-      sparse_input = sparse_tensor.SparseTensor(
-          indices=((0, 0), (1, 0), (2, 0)),
-          values=(0, 1, 2),
-          dense_shape=(3, 3))
+  def test_raises_if_empty_feature_columns(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'feature_columns must not be empty'):
+      fc_old.linear_model(features={}, feature_columns=[])
 
-      # Create feature columns (categorical and embedding).
-      categorical_column = fc.categorical_column_with_identity(
-          key='a', num_buckets=3)
-      embedding_dimension = 2
-      def _embedding_column_initializer(shape, dtype, partition_info):
-        del shape  # unused
-        del dtype  # unused
-        del partition_info  # unused
-        embedding_values = (
-            (1, 0),  # id 0
-            (0, 1),  # id 1
-            (1, 1))  # id 2
-        return embedding_values
+  def test_should_be_feature_column(self):
+    with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'):
+      fc_old.linear_model(features={'a': [[0]]}, feature_columns='NotSupported')
 
-      embedding_column = fc.embedding_column(
-          categorical_column,
-          dimension=embedding_dimension,
-          initializer=_embedding_column_initializer)
+  def test_should_be_dense_or_categorical_column(self):
 
-      feature_layer = FeatureLayer([embedding_column])
-      features = {'a': sparse_input}
+    class NotSupportedColumn(fc.FeatureColumn, fc_old._FeatureColumn):
 
-      inputs = feature_layer(features)
-      variables = feature_layer.variables
+      @property
+      def _is_v2_column(self):
+        return True
 
-      # Sanity check: test that the inputs are correct.
-      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+      @property
+      def name(self):
+        return 'NotSupportedColumn'
+
+      def transform_feature(self, transformation_cache, state_manager):
+        pass
+
+      def _transform_feature(self, inputs):
+        pass
+
+      @property
+      def parse_example_spec(self):
+        pass
+
+      @property
+      def _parse_example_spec(self):
+        pass
+
+    with self.assertRaisesRegexp(
+        ValueError, 'must be either a _DenseColumn or _CategoricalColumn'):
+      fc_old.linear_model(
+          features={'a': [[0]]}, feature_columns=[NotSupportedColumn()])
+
+  def test_does_not_support_dict_columns(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Expected feature_columns to be iterable, found dict.'):
+      fc_old.linear_model(
+          features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
+
+  def test_raises_if_duplicate_name(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Duplicate feature column name found for columns'):
+      fc_old.linear_model(
+          features={'a': [[0]]},
+          feature_columns=[fc.numeric_column('a'),
+                           fc.numeric_column('a')])
+
+  def test_dense_bias(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        sess.run(price_var.assign([[10.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[15.], [55.]], predictions.eval())
+
+  def test_sparse_bias(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast])
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval())
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [10015.]], predictions.eval())
+
+  def test_dense_and_sparse_bias(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [wire_cast, price])
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        sess.run(price_var.assign([[10.]]))
+        self.assertAllClose([[1015.], [10065.]], predictions.eval())
+
+  def test_dense_and_sparse_column(self):
+    """When the column is both dense and sparse, uses sparse tensors."""
+
+    class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn,
+                                fc_old._DenseColumn, fc_old._CategoricalColumn):
+
+      @property
+      def _is_v2_column(self):
+        return True
+
+      @property
+      def name(self):
+        return 'dense_and_sparse_column'
+
+      @property
+      def parse_example_spec(self):
+        return {self.name: parsing_ops.VarLenFeature(self.dtype)}
+
+      @property
+      def _parse_example_spec(self):
+        return self.parse_example_spec
+
+      def transform_feature(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _transform_feature(self, inputs):
+        return inputs.get(self.name)
+
+      @property
+      def variable_shape(self):
+        return self.variable_shape
+
+      @property
+      def _variable_shape(self):
+        return self.variable_shape
+
+      def get_dense_tensor(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _get_dense_tensor(self, inputs):
+        raise ValueError('Should not use this method.')
+
+      @property
+      def num_buckets(self):
+        return 4
+
+      @property
+      def _num_buckets(self):
+        return self.num_buckets
+
+      def get_sparse_tensors(self, transformation_cache, state_manager):
+        raise ValueError('Should not use this method.')
+
+      def _get_sparse_tensors(self,
+                              inputs,
+                              weight_collections=None,
+                              trainable=None):
+        sp_tensor = sparse_tensor.SparseTensor(
+            indices=[[0, 0], [1, 0], [1, 1]],
+            values=[2, 0, 3],
+            dense_shape=[2, 2])
+        return fc.CategoricalColumn.IdWeightPair(sp_tensor, None)
+
+    dense_and_sparse_column = _DenseAndSparseColumn()
+    with ops.Graph().as_default():
+      sp_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {dense_and_sparse_column.name: sp_tensor}
+      predictions = fc_old.linear_model(features, [dense_and_sparse_column])
+      bias = get_linear_model_bias()
+      dense_and_sparse_column_var = get_linear_model_column_var(
+          dense_and_sparse_column)
+      with _initialized_session() as sess:
+        sess.run(
+            dense_and_sparse_column_var.assign([[10.], [100.], [1000.],
+                                                [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [10015.]], predictions.eval())
+
+  def test_dense_multi_output(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      predictions = fc_old.linear_model(features, [price], units=3)
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((1, 3)), price_var.eval())
+        sess.run(price_var.assign([[10., 100., 1000.]]))
+        sess.run(bias.assign([5., 6., 7.]))
+        self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]],
+                            predictions.eval())
+
+  def test_sparse_multi_output(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast], units=3)
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval())
+        sess.run(
+            wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.],
+                                  [1000., 1100., 1200.],
+                                  [10000., 11000., 12000.]]))
+        sess.run(bias.assign([5., 6., 7.]))
+        self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]],
+                            predictions.eval())
+
+  def test_dense_multi_dimension(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [price])
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([[0.], [0.]], price_var.eval())
+        sess.run(price_var.assign([[10.], [100.]]))
+        self.assertAllClose([[210.], [650.]], predictions.eval())
+
+  def test_sparse_multi_rank(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = array_ops.sparse_placeholder(dtypes.string)
+      wire_value = sparse_tensor.SparseTensorValue(
+          values=['omar', 'stringer', 'marlo', 'omar'],  # hashed = [2, 0, 3, 2]
+          indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]],
+          dense_shape=[2, 2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(features, [wire_cast])
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval())
+        self.assertAllClose(
+            np.zeros((2, 1)),
+            predictions.eval(feed_dict={wire_tensor: wire_value}))
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        self.assertAllClose(
+            [[1010.], [11000.]],
+            predictions.eval(feed_dict={wire_tensor: wire_value}))
+
+  def test_sparse_combiner(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {'wire_cast': wire_tensor}
+      predictions = fc_old.linear_model(
+          features, [wire_cast], sparse_combiner='mean')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [5010.]], predictions.eval())
+
+  def test_sparse_combiner_with_negative_weights(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights')
+
+    with ops.Graph().as_default():
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar', 'stringer', 'marlo'],  # hashed to = [2, 0, 3]
+          indices=[[0, 0], [1, 0], [1, 1]],
+          dense_shape=[2, 2])
+      features = {
+          'wire_cast': wire_tensor,
+          'weights': constant_op.constant([[1., 1., -1.0]])
+      }
+      predictions = fc_old.linear_model(
+          features, [wire_cast_weights], sparse_combiner='sum')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      with _initialized_session() as sess:
+        sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]]))
+        sess.run(bias.assign([5.]))
+        self.assertAllClose([[1005.], [-9985.]], predictions.eval())
+
+  def test_dense_multi_dimension_multi_output(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      predictions = fc_old.linear_model(features, [price], units=3)
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose(np.zeros((3,)), bias.eval())
+        self.assertAllClose(np.zeros((2, 3)), price_var.eval())
+        sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]]))
+        sess.run(bias.assign([2., 3., 4.]))
+        self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]],
+                            predictions.eval())
+
+  def test_raises_if_shape_mismatch(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      with self.assertRaisesRegexp(
+          Exception,
+          r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
+        fc_old.linear_model(features, [price])
+
+  def test_dense_reshaping(self):
+    price = fc.numeric_column('price', shape=[1, 2])
+    with ops.Graph().as_default():
+      features = {'price': [[[1., 2.]], [[5., 6.]]]}
+      predictions = fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.]], price_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price_var.assign([[10.], [100.]]))
+        self.assertAllClose([[210.], [650.]], predictions.eval())
+
+  def test_dense_multi_column(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      predictions = fc_old.linear_model(features, [price1, price2])
+      bias = get_linear_model_bias()
+      price1_var = get_linear_model_column_var(price1)
+      price2_var = get_linear_model_column_var(price2)
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias.eval())
+        self.assertAllClose([[0.], [0.]], price1_var.eval())
+        self.assertAllClose([[0.]], price2_var.eval())
+        self.assertAllClose([[0.], [0.]], predictions.eval())
+        sess.run(price1_var.assign([[10.], [100.]]))
+        sess.run(price2_var.assign([[1000.]]))
+        sess.run(bias.assign([7.]))
+        self.assertAllClose([[3217.], [4657.]], predictions.eval())
+
+  def test_fills_cols_to_vars(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      cols_to_vars = {}
+      fc_old.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars)
+      bias = get_linear_model_bias()
+      price1_var = get_linear_model_column_var(price1)
+      price2_var = get_linear_model_column_var(price2)
+      self.assertAllEqual(cols_to_vars['bias'], [bias])
+      self.assertAllEqual(cols_to_vars[price1], [price1_var])
+      self.assertAllEqual(cols_to_vars[price2], [price2_var])
+
+  def test_fills_cols_to_vars_partitioned_variables(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2', shape=3)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [6., 7.]],
+          'price2': [[3., 4., 5.], [8., 9., 10.]]
+      }
+      cols_to_vars = {}
+      with variable_scope.variable_scope(
+          'linear',
+          partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)):
+        fc_old.linear_model(
+            features, [price1, price2], cols_to_vars=cols_to_vars)
+      with _initialized_session():
+        self.assertEqual([0.], cols_to_vars['bias'][0].eval())
+        # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables.
+        self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval())
+        self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval())
+        # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and
+        # a [1, 1] Variable.
+        self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval())
+        self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval())
+
+  def test_fills_cols_to_output_tensors(self):
+    # Provide three _DenseColumn's to input_layer: a _NumericColumn, a
+    # _BucketizedColumn, and an _EmbeddingColumn.  Only the _EmbeddingColumn
+    # creates a Variable.
+    apple_numeric_column = fc.numeric_column('apple_numeric_column')
+    banana_dense_feature = fc.numeric_column('banana_dense_feature')
+    banana_dense_feature_bucketized = fc.bucketized_column(
+        banana_dense_feature, boundaries=[0.])
+    cherry_sparse_column = fc.categorical_column_with_hash_bucket(
+        'cherry_sparse_feature', hash_bucket_size=5)
+    dragonfruit_embedding_column = fc.embedding_column(
+        cherry_sparse_column, dimension=10)
+    with ops.Graph().as_default():
+      features = {
+          'apple_numeric_column': [[3.], [4.]],
+          'banana_dense_feature': [[-1.], [4.]],
+          'cherry_sparse_feature': [['a'], ['x']],
+      }
+      cols_to_output_tensors = {}
+      all_cols = [
+          apple_numeric_column, banana_dense_feature_bucketized,
+          dragonfruit_embedding_column
+      ]
+      input_layer = fc_old.input_layer(
+          features, all_cols, cols_to_output_tensors=cols_to_output_tensors)
+
+      # We check the mapping by checking that we have the right keys,
+      # and that the values (output_tensors) were indeed the ones used to
+      # form the input layer.
+      self.assertItemsEqual(all_cols, cols_to_output_tensors.keys())
+      input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]]
+      output_tensors = [tensor for tensor in cols_to_output_tensors.values()]
+      self.assertItemsEqual(input_layer_inputs, output_tensors)
+
+  def test_dense_collection(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price], weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      self.assertIn(bias, my_vars)
+      self.assertIn(price_var, my_vars)
+
+  def test_sparse_collection(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast], weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      self.assertIn(bias, my_vars)
+      self.assertIn(wire_cast_var, my_vars)
+
+  def test_dense_trainable_default(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price])
+      bias = get_linear_model_bias()
+      price_var = get_linear_model_column_var(price)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertIn(bias, trainable_vars)
+      self.assertIn(price_var, trainable_vars)
+
+  def test_sparse_trainable_default(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast])
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      bias = get_linear_model_bias()
+      wire_cast_var = get_linear_model_column_var(wire_cast)
+      self.assertIn(bias, trainable_vars)
+      self.assertIn(wire_cast_var, trainable_vars)
+
+  def test_dense_trainable_false(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default() as g:
+      features = {'price': [[1.], [5.]]}
+      fc_old.linear_model(features, [price], trainable=False)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertEqual([], trainable_vars)
+
+  def test_sparse_trainable_false(self):
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      wire_tensor = sparse_tensor.SparseTensor(
+          values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      features = {'wire_cast': wire_tensor}
+      fc_old.linear_model(features, [wire_cast], trainable=False)
+      trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertEqual([], trainable_vars)
+
+  def test_column_order(self):
+    price_a = fc.numeric_column('price_a')
+    price_b = fc.numeric_column('price_b')
+    wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4)
+    with ops.Graph().as_default() as g:
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+          'wire_cast':
+              sparse_tensor.SparseTensor(
+                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      }
+      fc_old.linear_model(
+          features, [price_a, wire_cast, price_b],
+          weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      self.assertIn('price_a', my_vars[0].name)
+      self.assertIn('price_b', my_vars[1].name)
+      self.assertIn('wire_cast', my_vars[2].name)
+
+    with ops.Graph().as_default() as g:
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+          'wire_cast':
+              sparse_tensor.SparseTensor(
+                  values=['omar'], indices=[[0, 0]], dense_shape=[1, 1])
+      }
+      fc_old.linear_model(
+          features, [wire_cast, price_b, price_a],
+          weight_collections=['my-vars'])
+      my_vars = g.get_collection('my-vars')
+      self.assertIn('price_a', my_vars[0].name)
+      self.assertIn('price_b', my_vars[1].name)
+      self.assertIn('wire_cast', my_vars[2].name)
+
+  def test_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1.], [5.], [7.]],  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+    with self.assertRaisesRegexp(
+        ValueError,
+        'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+      fc_old.linear_model(features, [price1, price2])
+
+  def test_subset_of_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    price3 = fc.numeric_column('price3')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]],  # batchsize = 2
+          'price3': [[3.], [4.], [5.]]  # batchsize = 3
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc_old.linear_model(features, [price1, price2, price3])
+
+  def test_runtime_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      predictions = fc_old.linear_model(features, [price1, price2])
+      with _initialized_session() as sess:
+        with self.assertRaisesRegexp(errors.OpError,
+                                     'must have the same size and shape'):
+          sess.run(
+              predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]})
+
+  def test_runtime_batch_size_matches(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+          'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+      }
+      predictions = fc_old.linear_model(features, [price1, price2])
+      with _initialized_session() as sess:
+        sess.run(
+            predictions,
+            feed_dict={
+                features['price1']: [[1.], [5.]],
+                features['price2']: [[1.], [5.]],
+            })
+
+  def test_with_1d_sparse_tensor(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
+        price, boundaries=[
+            0.,
+            10.,
+            100.,
+        ])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price':
+            constant_op.constant([
+                -1.,
+                12.,
+            ]),
+        'body-style':
+            sparse_tensor.SparseTensor(
+                indices=((0,), (1,)),
+                values=('sedan', 'hardtop'),
+                dense_shape=(2,)),
+    }
+    self.assertEqual(1, features['price'].shape.ndims)
+    self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
+
+    net = fc_old.linear_model(features, [price_buckets, body_style])
+    with _initialized_session() as sess:
+      bias = get_linear_model_bias()
+      price_buckets_var = get_linear_model_column_var(price_buckets)
+      body_style_var = get_linear_model_column_var(body_style)
+
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+
+  def test_with_1d_unknown_shape_sparse_tensor(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
+        price, boundaries=[
+            0.,
+            10.,
+            100.,
+        ])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+        'body-style': array_ops.sparse_placeholder(dtypes.string),
+        'country': array_ops.placeholder(dtypes.string),
+    }
+    self.assertIsNone(features['price'].shape.ndims)
+    self.assertIsNone(features['body-style'].get_shape().ndims)
+
+    price_data = np.array([-1., 12.])
+    body_style_data = sparse_tensor.SparseTensorValue(
+        indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
+    country_data = np.array(['US', 'CA'])
+
+    net = fc_old.linear_model(features, [price_buckets, body_style, country])
+    bias = get_linear_model_bias()
+    price_buckets_var = get_linear_model_column_var(price_buckets)
+    body_style_var = get_linear_model_column_var(body_style)
+    with _initialized_session() as sess:
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          sess.run(
+                              net,
+                              feed_dict={
+                                  features['price']: price_data,
+                                  features['body-style']: body_style_data,
+                                  features['country']: country_data
+                              }))
+
+  def test_with_rank_0_feature(self):
+    price = fc.numeric_column('price')
+    features = {
+        'price': constant_op.constant(0),
+    }
+    self.assertEqual(0, features['price'].shape.ndims)
+
+    # Static rank 0 should fail
+    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
+      fc_old.linear_model(features, [price])
+
+    # Dynamic rank 0 should fail
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+    }
+    net = fc_old.linear_model(features, [price])
+    self.assertEqual(1, net.shape[1])
+    with _initialized_session() as sess:
+      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
+        sess.run(net, feed_dict={features['price']: np.array(1)})
+
+  def test_multiple_linear_models(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features1 = {'price': [[1.], [5.]]}
+      features2 = {'price': [[2.], [10.]]}
+      predictions1 = fc_old.linear_model(features1, [price])
+      predictions2 = fc_old.linear_model(features2, [price])
+      bias1 = get_linear_model_bias(name='linear_model')
+      bias2 = get_linear_model_bias(name='linear_model_1')
+      price_var1 = get_linear_model_column_var(price, name='linear_model')
+      price_var2 = get_linear_model_column_var(price, name='linear_model_1')
+      with _initialized_session() as sess:
+        self.assertAllClose([0.], bias1.eval())
+        sess.run(price_var1.assign([[10.]]))
+        sess.run(bias1.assign([5.]))
+        self.assertAllClose([[15.], [55.]], predictions1.eval())
+        self.assertAllClose([0.], bias2.eval())
+        sess.run(price_var2.assign([[10.]]))
+        sess.run(bias2.assign([5.]))
+        self.assertAllClose([[25.], [105.]], predictions2.eval())
+
+  def test_linear_model_v1_shared_embedding_all_other_v2(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v2
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)  # v2
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v1_shared_embedding_with_v2_cat_all_other_v2(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v2
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)  # v2
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v1_v2_mix(self):
+    price = fc.numeric_column('price')  # v2
+    some_sparse_column = fc_old.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v1
+    some_embedding_column = fc_old.embedding_column(
+        some_sparse_column, dimension=10)  # v1
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v1
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc_old.linear_model(features, all_cols)
+      bias = get_linear_model_bias()
+      with _initialized_session():
+        self.assertAllClose([0.], bias.eval())
+
+  def test_linear_model_v2_shared_embedding_all_other_v1(self):
+    price = fc_old.numeric_column('price')  # v1
+    some_sparse_column = fc_old.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)  # v1
+    some_embedding_column = fc_old.embedding_column(
+        some_sparse_column, dimension=10)  # v1
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)  # v2
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)  # v2
+    shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns_v2(
+        [categorical_column_a, categorical_column_b], dimension=2)  # v2
+    all_cols = [
+        price, some_embedding_column, shared_embedding_a, shared_embedding_b
+    ]
+
+    with ops.Graph().as_default():
+      features = {
+          'price': [[3.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      with self.assertRaisesRegexp(ValueError,
+                                   'SharedEmbeddingColumns are not supported'):
+        fc_old.linear_model(features, all_cols)
+
+
+class FeatureLayerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes()
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    feature_layer = fc.FeatureLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(feature_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      feature_layer = fc.FeatureLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = feature_layer(features)
+      variables = feature_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
+
+      # Check that only one variable was created.
+      self.assertEqual(1, len(variables))
+
+      # Check that invoking feature_layer on the same features does not create
+      # additional variables
+      _ = feature_layer(features)
+      self.assertEqual(1, len(variables))
+      self.assertEqual(variables[0], feature_layer.variables[0])
+
+  def test_feature_column_feature_layer_gradient(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      feature_layer = fc.FeatureLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      def scale_matrix():
+        matrix = feature_layer(features)
+        return 2 * matrix
+
+      # Sanity check: Verify that scale_matrix returns the correct output.
+      self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())
+
+      # Check that the returned gradient is correct.
+      grad_function = backprop.implicit_grad(scale_matrix)
+      grads_and_vars = grad_function()
+      indexed_slice = grads_and_vars[0][0]
+      gradient = grads_and_vars[0][0].values
+
+      self.assertAllEqual([0, 1, 2], indexed_slice.indices)
+      self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
+
+  def test_raises_if_empty_feature_columns(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'feature_columns must not be empty'):
+      fc.FeatureLayer(feature_columns=[])(features={})
+
+  def test_should_be_dense_column(self):
+    with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'):
+      fc.FeatureLayer(feature_columns=[
+          fc.categorical_column_with_hash_bucket('wire_cast', 4)
+      ])(
+          features={
+              'a': [[0]]
+          })
+
+  def test_does_not_support_dict_columns(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Expected feature_columns to be iterable, found dict.'):
+      fc.FeatureLayer(feature_columns={'a': fc.numeric_column('a')})(
+          features={
+              'a': [[0]]
+          })
+
+  def test_bare_column(self):
+    with ops.Graph().as_default():
+      features = features = {'a': [0.]}
+      net = fc.FeatureLayer(fc.numeric_column('a'))(features)
+      with _initialized_session():
+        self.assertAllClose([[0.]], net.eval())
+
+  def test_column_generator(self):
+    with ops.Graph().as_default():
+      features = features = {'a': [0.], 'b': [1.]}
+      columns = (fc.numeric_column(key) for key in features)
+      net = fc.FeatureLayer(columns)(features)
+      with _initialized_session():
+        self.assertAllClose([[0., 1.]], net.eval())
+
+  def test_raises_if_duplicate_name(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Duplicate feature column name found for columns'):
+      fc.FeatureLayer(
+          feature_columns=[fc.numeric_column('a'),
+                           fc.numeric_column('a')])(
+                               features={
+                                   'a': [[0]]
+                               })
+
+  def test_one_column(self):
+    price = fc.numeric_column('price')
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1.], [5.]], net.eval())
+
+  def test_multi_dimension(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1., 2.], [5., 6.]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
+
+  def test_compute_output_shape(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2', shape=4)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [5., 6.]],
+          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
+      }
+      feature_layer = fc.FeatureLayer([price1, price2])
+      self.assertEqual((None, 6), feature_layer.compute_output_shape((None,)))
+      net = feature_layer(features)
+      with _initialized_session():
+        self.assertAllClose(
+            [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval())
+
+  def test_raises_if_shape_mismatch(self):
+    price = fc.numeric_column('price', shape=2)
+    with ops.Graph().as_default():
+      features = {'price': [[1.], [5.]]}
+      with self.assertRaisesRegexp(
+          Exception,
+          r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
+        fc.FeatureLayer([price])(features)
+
+  def test_reshaping(self):
+    price = fc.numeric_column('price', shape=[1, 2])
+    with ops.Graph().as_default():
+      features = {'price': [[[1., 2.]], [[5., 6.]]]}
+      net = fc.FeatureLayer([price])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
+
+  def test_multi_column(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1., 2.], [5., 6.]],
+          'price2': [[3.], [4.]]
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+
+  def test_cols_to_output_tensors(self):
+    price1 = fc.numeric_column('price1', shape=2)
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      cols_dict = {}
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      feature_layer = fc.FeatureLayer([price1, price2])
+      net = feature_layer(features, cols_dict)
+      with _initialized_session():
+        self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval())
+        self.assertAllClose([[3.], [4.]], cols_dict[price2].eval())
+        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+
+  def test_column_order(self):
+    price_a = fc.numeric_column('price_a')
+    price_b = fc.numeric_column('price_b')
+    with ops.Graph().as_default():
+      features = {
+          'price_a': [[1.]],
+          'price_b': [[3.]],
+      }
+      net1 = fc.FeatureLayer([price_a, price_b])(features)
+      net2 = fc.FeatureLayer([price_b, price_a])(features)
+      with _initialized_session():
+        self.assertAllClose([[1., 3.]], net1.eval())
+        self.assertAllClose([[1., 3.]], net2.eval())
+
+  def test_fails_for_categorical_column(self):
+    animal = fc.categorical_column_with_identity('animal', num_buckets=4)
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'):
+        fc.FeatureLayer([animal])(features)
+
+  def test_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[1.], [5.], [7.]],  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc.FeatureLayer([price1, price2])(features)
+
+  def test_subset_of_static_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    price3 = fc.numeric_column('price3')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]],  # batchsize = 2
+          'price3': [[3.], [4.], [5.]]  # batchsize = 3
+      }
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
+        fc.FeatureLayer([price1, price2, price3])(features)
+
+  def test_runtime_batch_size_mismatch(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
+          'price2': [[3.], [4.]]  # batchsize = 2
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session() as sess:
+        with self.assertRaisesRegexp(errors.OpError,
+                                     'Dimensions of inputs should match'):
+          sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})
+
+  def test_runtime_batch_size_matches(self):
+    price1 = fc.numeric_column('price1')
+    price2 = fc.numeric_column('price2')
+    with ops.Graph().as_default():
+      features = {
+          'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+          'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
+      }
+      net = fc.FeatureLayer([price1, price2])(features)
+      with _initialized_session() as sess:
+        sess.run(
+            net,
+            feed_dict={
+                features['price1']: [[1.], [5.]],
+                features['price2']: [[1.], [5.]],
+            })
+
+  def test_multiple_layers_with_same_embedding_column(self):
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+
+    with ops.Graph().as_default():
+      features = {
+          'sparse_feature': [['a'], ['x']],
+      }
+      all_cols = [some_embedding_column]
+      fc.FeatureLayer(all_cols)(features)
+      fc.FeatureLayer(all_cols)(features)
+      # Make sure that 2 variables get created in this case.
+      self.assertEqual(2, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      expected_var_names = [
+          'feature_layer/sparse_feature_embedding/embedding_weights:0',
+          'feature_layer_1/sparse_feature_embedding/embedding_weights:0'
+      ]
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_multiple_layers_with_same_shared_embedding_column(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    shared_state_manager = fc.SharedEmbeddingStateManager(
+        name='shared_feature_layer')
+
+    with ops.Graph().as_default():
+      features = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      all_cols = [embedding_column_a, embedding_column_b]
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager)(
+              features)
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager)(
+              features)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      self.assertItemsEqual(
+          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    all_cols = [embedding_column_a, embedding_column_b]
+
+    with ops.Graph().as_default():
+      shared_state_manager1 = fc.SharedEmbeddingStateManager(
+          name='shared_feature_layer')
+      features = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager1)(
+              features)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+
+    with ops.Graph().as_default():
+      shared_state_manager2 = fc.SharedEmbeddingStateManager(
+          name='shared_feature_layer')
+      features1 = {
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+
+      fc.FeatureLayer(
+          all_cols, shared_state_manager=shared_state_manager2)(
+              features1)
+      # Make sure that only 1 variable gets created in this case.
+      self.assertEqual(1, len(
+          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      self.assertItemsEqual(
+          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+
+  def test_with_numpy_input_fn(self):
+    embedding_values = (
+        (1., 2., 3., 4., 5.),  # id 0
+        (6., 7., 8., 9., 10.),  # id 1
+        (11., 12., 13., 14., 15.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    # one_hot_body_style has 3 dims in feature_layer.
+    one_hot_body_style = fc.indicator_column(body_style)
+    # embedded_body_style has 5 dims in feature_layer.
+    embedded_body_style = fc.embedding_column(
+        body_style, dimension=5, initializer=_initializer)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'price': np.array([11., 12., 13., 14.]),
+            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
+        },
+        batch_size=2,
+        shuffle=False)
+    features = input_fn()
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_body_style])(
+        features)
+    self.assertEqual(1 + 3 + 5, net.shape[1])
+    with _initialized_session() as sess:
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
+           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
+          sess.run(net))
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def test_with_1d_sparse_tensor(self):
+    embedding_values = (
+        (1., 2., 3., 4., 5.),  # id 0
+        (6., 7., 8., 9., 10.),  # id 1
+        (11., 12., 13., 14., 15.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+
+    # one_hot_body_style has 3 dims in feature_layer.
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    one_hot_body_style = fc.indicator_column(body_style)
+
+    # embedded_body_style has 5 dims in feature_layer.
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+    embedded_country = fc.embedding_column(
+        country, dimension=5, initializer=_initializer)
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': constant_op.constant([11., 12.,]),
+        'body-style': sparse_tensor.SparseTensor(
+            indices=((0,), (1,)),
+            values=('sedan', 'hardtop'),
+            dense_shape=(2,)),
+        # This is dense tensor for the categorical_column.
+        'country': constant_op.constant(['CA', 'US']),
+    }
+    self.assertEqual(1, features['price'].shape.ndims)
+    self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
+    self.assertEqual(1, features['country'].shape.ndims)
+
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])(
+        features)
+    self.assertEqual(1 + 3 + 5, net.shape[1])
+    with _initialized_session() as sess:
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
+           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
+          sess.run(net))
+
+  def test_with_1d_unknown_shape_sparse_tensor(self):
+    embedding_values = (
+        (1., 2.),  # id 0
+        (6., 7.),  # id 1
+        (11., 12.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+
+    # one_hot_body_style has 3 dims in feature_layer.
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    one_hot_body_style = fc.indicator_column(body_style)
+
+    # embedded_body_style has 5 dims in feature_layer.
+    country = fc.categorical_column_with_vocabulary_list(
+        'country', vocabulary_list=['US', 'JP', 'CA'])
+    embedded_country = fc.embedding_column(
+        country, dimension=2, initializer=_initializer)
+
+    # Provides 1-dim tensor and dense tensor.
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+        'body-style': array_ops.sparse_placeholder(dtypes.string),
+        # This is dense tensor for the categorical_column.
+        'country': array_ops.placeholder(dtypes.string),
+    }
+    self.assertIsNone(features['price'].shape.ndims)
+    self.assertIsNone(features['body-style'].get_shape().ndims)
+    self.assertIsNone(features['country'].shape.ndims)
+
+    price_data = np.array([11., 12.])
+    body_style_data = sparse_tensor.SparseTensorValue(
+        indices=((0,), (1,)),
+        values=('sedan', 'hardtop'),
+        dense_shape=(2,))
+    country_data = np.array([['US'], ['CA']])
+
+    net = fc.FeatureLayer([price, one_hot_body_style, embedded_country])(
+        features)
+    self.assertEqual(1 + 3 + 2, net.shape[1])
+    with _initialized_session() as sess:
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
+          sess.run(
+              net,
+              feed_dict={
+                  features['price']: price_data,
+                  features['body-style']: body_style_data,
+                  features['country']: country_data
+              }))
+
+  def test_with_rank_0_feature(self):
+    # price has 1 dimension in feature_layer
+    price = fc.numeric_column('price')
+    features = {
+        'price': constant_op.constant(0),
+    }
+    self.assertEqual(0, features['price'].shape.ndims)
+
+    # Static rank 0 should fail
+    with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
+      fc.FeatureLayer([price])(features)
+
+    # Dynamic rank 0 should fail
+    features = {
+        'price': array_ops.placeholder(dtypes.float32),
+    }
+    net = fc.FeatureLayer([price])(features)
+    self.assertEqual(1, net.shape[1])
+    with _initialized_session() as sess:
+      with self.assertRaisesOpError('Feature .* cannot have rank 0'):
+        sess.run(net, feed_dict={features['price']: np.array(1)})
+
+
+class InputLayerTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_retrieving_input(self):
+    features = {'a': [0.]}
+    input_layer = fc_old.InputLayer(fc.numeric_column('a'))
+    inputs = self.evaluate(input_layer(features))
+    self.assertAllClose([[0.]], inputs)
+
+  def test_reuses_variables(self):
+    with context.eager_mode():
+      sparse_input = sparse_tensor.SparseTensor(
+          indices=((0, 0), (1, 0), (2, 0)),
+          values=(0, 1, 2),
+          dense_shape=(3, 3))
+
+      # Create feature columns (categorical and embedding).
+      categorical_column = fc.categorical_column_with_identity(
+          key='a', num_buckets=3)
+      embedding_dimension = 2
+
+      def _embedding_column_initializer(shape, dtype, partition_info):
+        del shape  # unused
+        del dtype  # unused
+        del partition_info  # unused
+        embedding_values = (
+            (1, 0),  # id 0
+            (0, 1),  # id 1
+            (1, 1))  # id 2
+        return embedding_values
+
+      embedding_column = fc.embedding_column(
+          categorical_column,
+          dimension=embedding_dimension,
+          initializer=_embedding_column_initializer)
+
+      input_layer = fc_old.InputLayer([embedding_column])
+      features = {'a': sparse_input}
+
+      inputs = input_layer(features)
+      variables = input_layer.variables
+
+      # Sanity check: test that the inputs are correct.
+      self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)
 
       # Check that only one variable was created.
       self.assertEqual(1, len(variables))
 
-      # Check that invoking feature_layer on the same features does not create
+      # Check that invoking input_layer on the same features does not create
       # additional variables
-      _ = feature_layer(features)
+      _ = input_layer(features)
       self.assertEqual(1, len(variables))
-      self.assertEqual(variables[0], feature_layer.variables[0])
+      self.assertEqual(variables[0], input_layer.variables[0])
 
-  def test_feature_column_feature_layer_gradient(self):
+  def test_feature_column_input_layer_gradient(self):
     with context.eager_mode():
       sparse_input = sparse_tensor.SparseTensor(
           indices=((0, 0), (1, 0), (2, 0)),
@@ -1815,11 +3589,11 @@ class FeatureLayerTest(test.TestCase):
           dimension=embedding_dimension,
           initializer=_embedding_column_initializer)
 
-      feature_layer = FeatureLayer([embedding_column])
+      input_layer = fc_old.InputLayer([embedding_column])
       features = {'a': sparse_input}
 
       def scale_matrix():
-        matrix = feature_layer(features)
+        matrix = input_layer(features)
         return 2 * matrix
 
       # Sanity check: Verify that scale_matrix returns the correct output.
@@ -1834,32 +3608,32 @@ class FeatureLayerTest(test.TestCase):
       self.assertAllEqual([0, 1, 2], indexed_slice.indices)
       self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)
 
+
+class FunctionalInputLayerTest(test.TestCase):
+
   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
-      FeatureLayer(feature_columns=[])(features={})
+      fc_old.input_layer(features={}, feature_columns=[])
 
   def test_should_be_dense_column(self):
-    with self.assertRaisesRegexp(ValueError, 'must be a DenseColumn'):
-      FeatureLayer(feature_columns=[
-          fc.categorical_column_with_hash_bucket('wire_cast', 4)
-      ])(
-          features={
-              'a': [[0]]
-          })
+    with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'):
+      fc_old.input_layer(
+          features={'a': [[0]]},
+          feature_columns=[
+              fc.categorical_column_with_hash_bucket('wire_cast', 4)
+          ])
 
   def test_does_not_support_dict_columns(self):
     with self.assertRaisesRegexp(
         ValueError, 'Expected feature_columns to be iterable, found dict.'):
-      FeatureLayer(feature_columns={'a': fc.numeric_column('a')})(
-          features={
-              'a': [[0]]
-          })
+      fc_old.input_layer(
+          features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
 
   def test_bare_column(self):
     with ops.Graph().as_default():
       features = features = {'a': [0.]}
-      net = FeatureLayer(fc.numeric_column('a'))(features)
+      net = fc_old.input_layer(features, fc.numeric_column('a'))
       with _initialized_session():
         self.assertAllClose([[0.]], net.eval())
 
@@ -1867,25 +3641,23 @@ class FeatureLayerTest(test.TestCase):
     with ops.Graph().as_default():
       features = features = {'a': [0.], 'b': [1.]}
       columns = (fc.numeric_column(key) for key in features)
-      net = FeatureLayer(columns)(features)
+      net = fc_old.input_layer(features, columns)
       with _initialized_session():
         self.assertAllClose([[0., 1.]], net.eval())
 
   def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
         ValueError, 'Duplicate feature column name found for columns'):
-      FeatureLayer(
+      fc_old.input_layer(
+          features={'a': [[0]]},
           feature_columns=[fc.numeric_column('a'),
-                           fc.numeric_column('a')])(
-                               features={
-                                   'a': [[0]]
-                               })
+                           fc.numeric_column('a')])
 
   def test_one_column(self):
     price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1.], [5.]], net.eval())
 
@@ -1893,25 +3665,10 @@ class FeatureLayerTest(test.TestCase):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1., 2.], [5., 6.]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
 
-  def test_compute_output_shape(self):
-    price1 = fc.numeric_column('price1', shape=2)
-    price2 = fc.numeric_column('price2', shape=4)
-    with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [5., 6.]],
-          'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
-      }
-      feature_layer = FeatureLayer([price1, price2])
-      self.assertEqual((None, 6), feature_layer.compute_output_shape((None,)))
-      net = feature_layer(features)
-      with _initialized_session():
-        self.assertAllClose(
-            [[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]], net.eval())
-
   def test_raises_if_shape_mismatch(self):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
@@ -1919,13 +3676,13 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           Exception,
           r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
-        FeatureLayer([price])(features)
+        fc_old.input_layer(features, [price])
 
   def test_reshaping(self):
     price = fc.numeric_column('price', shape=[1, 2])
     with ops.Graph().as_default():
       features = {'price': [[[1., 2.]], [[5., 6.]]]}
-      net = FeatureLayer([price])(features)
+      net = fc_old.input_layer(features, [price])
       with _initialized_session():
         self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
 
@@ -1933,26 +3690,128 @@ class FeatureLayerTest(test.TestCase):
     price1 = fc.numeric_column('price1', shape=2)
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
-      features = {
-          'price1': [[1., 2.], [5., 6.]],
-          'price2': [[3.], [4.]]
-      }
-      net = FeatureLayer([price1, price2])(features)
+      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session():
         self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
 
-  def test_cols_to_output_tensors(self):
-    price1 = fc.numeric_column('price1', shape=2)
-    price2 = fc.numeric_column('price2')
+  def test_fills_cols_to_vars(self):
+    # Provide three _DenseColumn's to input_layer: a _NumericColumn, a
+    # _BucketizedColumn, and an _EmbeddingColumn.  Only the _EmbeddingColumn
+    # creates a Variable.
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
     with ops.Graph().as_default():
-      cols_dict = {}
-      features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
-      feature_layer = FeatureLayer([price1, price2])
-      net = feature_layer(features, cols_dict)
-      with _initialized_session():
-        self.assertAllClose([[1., 2.], [5., 6.]], cols_dict[price1].eval())
-        self.assertAllClose([[3.], [4.]], cols_dict[price2].eval())
-        self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+      }
+      cols_to_vars = {}
+      all_cols = [price1, dense_feature_bucketized, some_embedding_column]
+      fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
+      self.assertIsInstance(cols_to_vars[some_embedding_column][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+
+  def test_fills_cols_to_vars_shared_embedding(self):
+    # Provide 5 DenseColumn's to input_layer: a NumericColumn, a
+    # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The
+    # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns
+    # shared one variable.
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+    categorical_column_a = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_old.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    shared_embedding_a, shared_embedding_b = fc_old.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b], dimension=2)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+          'aaa':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 1, 0),
+                  dense_shape=(2, 2)),
+          'bbb':
+              sparse_tensor.SparseTensor(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(1, 2, 1),
+                  dense_shape=(2, 2)),
+      }
+      cols_to_vars = {}
+      all_cols = [
+          price1, dense_feature_bucketized, some_embedding_column,
+          shared_embedding_a, shared_embedding_b
+      ]
+      fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
+      self.assertEqual(1, len(cols_to_vars[shared_embedding_a]))
+      # This is a bug in the current implementation and should be fixed in the
+      # new one.
+      self.assertEqual(0, len(cols_to_vars[shared_embedding_b]))
+      self.assertIsInstance(cols_to_vars[some_embedding_column][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10])
+      self.assertIsInstance(cols_to_vars[shared_embedding_a][0],
+                            variables_lib.Variable)
+      self.assertAllEqual(cols_to_vars[shared_embedding_a][0].shape, [3, 2])
+
+  def test_fills_cols_to_vars_partitioned_variables(self):
+    price1 = fc.numeric_column('price1')
+    dense_feature = fc.numeric_column('dense_feature')
+    dense_feature_bucketized = fc.bucketized_column(
+        dense_feature, boundaries=[0.])
+    some_sparse_column = fc.categorical_column_with_hash_bucket(
+        'sparse_feature', hash_bucket_size=5)
+    some_embedding_column = fc.embedding_column(
+        some_sparse_column, dimension=10)
+    with ops.Graph().as_default():
+      features = {
+          'price1': [[3.], [4.]],
+          'dense_feature': [[-1.], [4.]],
+          'sparse_feature': [['a'], ['x']],
+      }
+      cols_to_vars = {}
+      all_cols = [price1, dense_feature_bucketized, some_embedding_column]
+      with variable_scope.variable_scope(
+          'input_from_feature_columns',
+          partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)):
+        fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
+      self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
+      self.assertEqual(0, len(cols_to_vars[price1]))
+      self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
+      self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
+      self.assertEqual(
+          'input_from_feature_columns/input_layer/sparse_feature_embedding/'
+          'embedding_weights/part_0:0',
+          cols_to_vars[some_embedding_column][0].name)
+      self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [2, 10])
+      self.assertAllEqual(cols_to_vars[some_embedding_column][1].shape, [2, 10])
+      self.assertAllEqual(cols_to_vars[some_embedding_column][2].shape, [1, 10])
 
   def test_column_order(self):
     price_a = fc.numeric_column('price_a')
@@ -1962,8 +3821,8 @@ class FeatureLayerTest(test.TestCase):
           'price_a': [[1.]],
           'price_b': [[3.]],
       }
-      net1 = FeatureLayer([price_a, price_b])(features)
-      net2 = FeatureLayer([price_b, price_a])(features)
+      net1 = fc_old.input_layer(features, [price_a, price_b])
+      net2 = fc_old.input_layer(features, [price_b, price_a])
       with _initialized_session():
         self.assertAllClose([[1., 3.]], net1.eval())
         self.assertAllClose([[1., 3.]], net2.eval())
@@ -1976,8 +3835,8 @@ class FeatureLayerTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
-      with self.assertRaisesRegexp(Exception, 'must be a DenseColumn'):
-        FeatureLayer([animal])(features)
+      with self.assertRaisesRegexp(Exception, 'must be a _DenseColumn'):
+        fc_old.input_layer(features, [animal])
 
   def test_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -1990,7 +3849,7 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError,
           'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        FeatureLayer([price1, price2])(features)
+        fc_old.input_layer(features, [price1, price2])
 
   def test_subset_of_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -2005,7 +3864,7 @@ class FeatureLayerTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError,
           'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
-        FeatureLayer([price1, price2, price3])(features)
+        fc_old.input_layer(features, [price1, price2, price3])
 
   def test_runtime_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
@@ -2015,7 +3874,7 @@ class FeatureLayerTest(test.TestCase):
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
           'price2': [[3.], [4.]]  # batchsize = 2
       }
-      net = FeatureLayer([price1, price2])(features)
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session() as sess:
         with self.assertRaisesRegexp(errors.OpError,
                                      'Dimensions of inputs should match'):
@@ -2029,7 +3888,7 @@ class FeatureLayerTest(test.TestCase):
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
           'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
       }
-      net = FeatureLayer([price1, price2])(features)
+      net = fc_old.input_layer(features, [price1, price2])
       with _initialized_session() as sess:
         sess.run(
             net,
@@ -2049,181 +3908,39 @@ class FeatureLayerTest(test.TestCase):
           'sparse_feature': [['a'], ['x']],
       }
       all_cols = [some_embedding_column]
-      FeatureLayer(all_cols)(features)
-      FeatureLayer(all_cols)(features)
-      # Make sure that 2 variables get created in this case.
-      self.assertEqual(2, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-      expected_var_names = [
-          'feature_layer/sparse_feature_embedding/embedding_weights:0',
-          'feature_layer_1/sparse_feature_embedding/embedding_weights:0'
-      ]
-      self.assertItemsEqual(
-          expected_var_names,
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-
-  def test_multiple_layers_with_same_shared_embedding_column(self):
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension)
-    shared_state_manager = fc.SharedEmbeddingStateManager(
-        name='shared_feature_layer')
-
-    with ops.Graph().as_default():
-      features = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-      all_cols = [embedding_column_a, embedding_column_b]
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager)(
-              features)
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager)(
-              features)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-      self.assertItemsEqual(
-          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
-          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
-
-  def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=3)
-    embedding_dimension = 2
-    embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
-        [categorical_column_b, categorical_column_a],
-        dimension=embedding_dimension)
-    all_cols = [embedding_column_a, embedding_column_b]
-
-    with ops.Graph().as_default():
-      shared_state_manager1 = fc.SharedEmbeddingStateManager(
-          name='shared_feature_layer')
-      features = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager1)(
-              features)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
-          ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
-
-    with ops.Graph().as_default():
-      shared_state_manager2 = fc.SharedEmbeddingStateManager(
-          name='shared_feature_layer')
-      features1 = {
-          'aaa':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(0, 1, 0),
-                  dense_shape=(2, 2)),
-          'bbb':
-              sparse_tensor.SparseTensor(
-                  indices=((0, 0), (1, 0), (1, 1)),
-                  values=(1, 2, 1),
-                  dense_shape=(2, 2)),
-      }
-
-      FeatureLayer(
-          all_cols, shared_state_manager=shared_state_manager2)(
-              features1)
-      # Make sure that only 1 variable gets created in this case.
-      self.assertEqual(1, len(
+      fc_old.input_layer(features, all_cols)
+      fc_old.input_layer(features, all_cols)
+      # Make sure that 2 variables get created in this case.
+      self.assertEqual(2, len(
           ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
+      expected_var_names = [
+          'input_layer/sparse_feature_embedding/embedding_weights:0',
+          'input_layer_1/sparse_feature_embedding/embedding_weights:0'
+      ]
       self.assertItemsEqual(
-          ['shared_feature_layer/aaa_bbb_shared_embedding:0'],
+          expected_var_names,
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
 
-  def test_with_numpy_input_fn(self):
-    embedding_values = (
-        (1., 2., 3., 4., 5.),  # id 0
-        (6., 7., 8., 9., 10.),  # id 1
-        (11., 12., 13., 14., 15.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      del shape, dtype, partition_info
-      return embedding_values
-
-    # price has 1 dimension in feature_layer
-    price = fc.numeric_column('price')
-    body_style = fc.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-    # one_hot_body_style has 3 dims in feature_layer.
-    one_hot_body_style = fc.indicator_column(body_style)
-    # embedded_body_style has 5 dims in feature_layer.
-    embedded_body_style = fc.embedding_column(
-        body_style, dimension=5, initializer=_initializer)
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([11., 12., 13., 14.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = FeatureLayer([price, one_hot_body_style, embedded_body_style])(
-        features)
-    self.assertEqual(1 + 3 + 5, net.shape[1])
-    with _initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      # Each row is formed by concatenating `embedded_body_style`,
-      # `one_hot_body_style`, and `price` in order.
-      self.assertAllEqual(
-          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
-           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
-          sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
   def test_with_1d_sparse_tensor(self):
     embedding_values = (
         (1., 2., 3., 4., 5.),  # id 0
         (6., 7., 8., 9., 10.),  # id 1
         (11., 12., 13., 14., 15.)  # id 2
     )
+
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
       return embedding_values
 
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
 
-    # one_hot_body_style has 3 dims in feature_layer.
+    # one_hot_body_style has 3 dims in input_layer.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)
 
-    # embedded_body_style has 5 dims in feature_layer.
+    # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
@@ -2231,28 +3948,34 @@ class FeatureLayerTest(test.TestCase):
 
     # Provides 1-dim tensor and dense tensor.
     features = {
-        'price': constant_op.constant([11., 12.,]),
-        'body-style': sparse_tensor.SparseTensor(
-            indices=((0,), (1,)),
-            values=('sedan', 'hardtop'),
-            dense_shape=(2,)),
+        'price':
+            constant_op.constant([
+                11.,
+                12.,
+            ]),
+        'body-style':
+            sparse_tensor.SparseTensor(
+                indices=((0,), (1,)),
+                values=('sedan', 'hardtop'),
+                dense_shape=(2,)),
         # This is dense tensor for the categorical_column.
-        'country': constant_op.constant(['CA', 'US']),
+        'country':
+            constant_op.constant(['CA', 'US']),
     }
     self.assertEqual(1, features['price'].shape.ndims)
     self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
     self.assertEqual(1, features['country'].shape.ndims)
 
-    net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
+    net = fc_old.input_layer(features,
+                             [price, one_hot_body_style, embedded_country])
     self.assertEqual(1 + 3 + 5, net.shape[1])
     with _initialized_session() as sess:
 
       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
-      self.assertAllEqual(
-          [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
-           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
-          sess.run(net))
+      self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
+                           [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
+                          sess.run(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     embedding_values = (
@@ -2260,19 +3983,20 @@ class FeatureLayerTest(test.TestCase):
         (6., 7.),  # id 1
         (11., 12.)  # id 2
     )
+
     def _initializer(shape, dtype, partition_info):
       del shape, dtype, partition_info
       return embedding_values
 
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
 
-    # one_hot_body_style has 3 dims in feature_layer.
+    # one_hot_body_style has 3 dims in input_layer.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)
 
-    # embedded_body_style has 5 dims in feature_layer.
+    # embedded_body_style has 5 dims in input_layer.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
@@ -2291,12 +4015,11 @@ class FeatureLayerTest(test.TestCase):
 
     price_data = np.array([11., 12.])
     body_style_data = sparse_tensor.SparseTensorValue(
-        indices=((0,), (1,)),
-        values=('sedan', 'hardtop'),
-        dense_shape=(2,))
+        indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
     country_data = np.array([['US'], ['CA']])
 
-    net = FeatureLayer([price, one_hot_body_style, embedded_country])(features)
+    net = fc_old.input_layer(features,
+                             [price, one_hot_body_style, embedded_country])
     self.assertEqual(1 + 3 + 2, net.shape[1])
     with _initialized_session() as sess:
 
@@ -2313,7 +4036,7 @@ class FeatureLayerTest(test.TestCase):
               }))
 
   def test_with_rank_0_feature(self):
-    # price has 1 dimension in feature_layer
+    # price has 1 dimension in input_layer
     price = fc.numeric_column('price')
     features = {
         'price': constant_op.constant(0),
@@ -2322,13 +4045,13 @@ class FeatureLayerTest(test.TestCase):
 
     # Static rank 0 should fail
     with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
-      FeatureLayer([price])(features)
+      fc_old.input_layer(features, [price])
 
     # Dynamic rank 0 should fail
     features = {
         'price': array_ops.placeholder(dtypes.float32),
     }
-    net = FeatureLayer([price])(features)
+    net = fc_old.input_layer(features, [price])
     self.assertEqual(1, net.shape[1])
     with _initialized_session() as sess:
       with self.assertRaisesOpError('Feature .* cannot have rank 0'):
@@ -2337,10 +4060,14 @@ class FeatureLayerTest(test.TestCase):
 
 class MakeParseExampleSpecTest(test.TestCase):
 
-  class _TestFeatureColumn(FeatureColumn,
+  class _TestFeatureColumn(fc.FeatureColumn,
                            collections.namedtuple('_TestFeatureColumn',
                                                   ('parse_spec'))):
 
+    @property
+    def _is_v2_column(self):
+      return True
+
     @property
     def name(self):
       return '_TestFeatureColumn'
@@ -2458,6 +4185,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.string)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -2501,7 +4229,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None)
+    column.get_sparse_tensors(
+        fc.FeatureTransformationCache({
+            'aaa': inputs
+        }), None)
     with self.assertRaisesRegexp(errors.OpError, 'file_does_not_exist'):
       with self.cached_session():
         lookup_ops.tables_initializer().run()
@@ -2525,7 +4256,10 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    column.get_sparse_tensors(FeatureTransformationCache({'aaa': inputs}), None)
+    column.get_sparse_tensors(
+        fc.FeatureTransformationCache({
+            'aaa': inputs
+        }), None)
     with self.assertRaisesRegexp(errors.OpError, 'Invalid vocab_size'):
       with self.cached_session():
         lookup_ops.tables_initializer().run()
@@ -2564,7 +4298,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2580,7 +4314,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2616,7 +4350,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2637,7 +4371,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2659,7 +4393,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(self,
                                   sparse_tensor.SparseTensorValue(
@@ -2675,7 +4409,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': (('marlo', ''), ('skywalker', 'omar'))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2699,7 +4433,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2723,7 +4457,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar', 'heisenberg'),
         dense_shape=(2, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2749,7 +4483,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2773,7 +4507,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2795,7 +4529,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         dtype=dtypes.int32,
         default_value=default_value)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': ((11, -1, -1), (100, 30, -1), (-1, -1, 22))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2820,7 +4554,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -2859,6 +4593,32 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
         self.assertAllClose(((3.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_vocabulary_file(
+        key='wire',
+        vocabulary_file=self._wire_vocabulary_file_name,
+        vocabulary_size=self._wire_vocabulary_size,
+        num_oov_buckets=1)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 2: wire_var[2] = 3
+        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
+        self.assertAllClose(((3.,), (5.,)), predictions.eval())
+
 
 class VocabularyListCategoricalColumnTest(test.TestCase):
 
@@ -2871,6 +4631,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.string)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -2973,7 +4734,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -2987,7 +4748,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -3044,7 +4805,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3065,7 +4826,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(
           self,
@@ -3080,7 +4841,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': (('marlo', ''), ('skywalker', 'omar'))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3103,7 +4864,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar'),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3126,7 +4887,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=('marlo', 'skywalker', 'omar', 'heisenberg'),
         dense_shape=(2, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3149,7 +4910,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=np.array((11, 100, 30, 22), dtype=np.int32),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3170,10 +4931,10 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         dtype=dtypes.int32,
         default_value=default_value)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa':
-                np.array(
-                    ((11, -1, -1), (100, 30, -1), (-1, -1, 22)), dtype=np.int32)
+                np.array(((11, -1, -1), (100, 30, -1), (-1, -1, 22)),
+                         dtype=np.int32)
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
     with _initialized_session():
@@ -3196,7 +4957,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         values=(11, 100, 30, 22),
         dense_shape=(3, 3))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3234,6 +4995,31 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
         # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
         self.assertAllClose(((3.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    wire_column = fc.categorical_column_with_vocabulary_list(
+        key='aaa',
+        vocabulary_list=('omar', 'stringer', 'marlo'),
+        num_oov_buckets=1)
+    self.assertEqual(4, wire_column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          wire_column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=('marlo', 'skywalker', 'omar'),
+                  dense_shape=(2, 2))
+      }, (wire_column,))
+      bias = get_linear_model_bias()
+      wire_var = get_linear_model_column_var(wire_column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval()
+        # 'marlo' -> 2: wire_var[2] = 3
+        # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5
+        self.assertAllClose(((3.,), (5.,)), predictions.eval())
+
 
 class IdentityCategoricalColumnTest(test.TestCase):
 
@@ -3245,6 +5031,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.int64)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
 
   def test_key_should_be_string(self):
     with self.assertRaisesRegexp(ValueError, 'key must be a string.'):
@@ -3285,7 +5072,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'Invalid input, not integer'):
       column.get_sparse_tensors(
-          FeatureTransformationCache({
+          fc.FeatureTransformationCache({
               'aaa': inputs
           }), None)
 
@@ -3317,7 +5104,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(0, 1, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3336,7 +5123,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0, 1, 0),
         dense_shape=(2, 2))
-    id_tensor = _transform_features({'aaa': inputs}, [column], None)[column]
+    id_tensor = fc._transform_features({'aaa': inputs}, [column], None)[column]
     with _initialized_session():
       _assert_sparse_tensor_value(
           self,
@@ -3349,7 +5136,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
   def test_get_sparse_tensors_dense_input(self):
     column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': ((0, -1), (1, 0))
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3369,7 +5156,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, -1, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3385,7 +5172,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, 99, 0),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3402,7 +5189,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=(1, -1, 99),
         dense_shape=(2, 2))
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3426,7 +5213,7 @@ class IdentityCategoricalColumnTest(test.TestCase):
         values=input_values,
         dense_shape=input_shape)
     id_weight_pair = column.get_sparse_tensors(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': inputs
         }), None)
     self.assertIsNone(id_weight_pair.weight_tensor)
@@ -3465,6 +5252,28 @@ class IdentityCategoricalColumnTest(test.TestCase):
         # weight_var[2] + weight_var[1] = 3+2 = 5
         self.assertAllClose(((1.,), (5.,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    column = fc.categorical_column_with_identity(key='aaa', num_buckets=3)
+    self.assertEqual(3, column.num_buckets)
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          column.name:
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] = 1
+        # weight_var[2] + weight_var[1] = 3+2 = 5
+        self.assertAllClose(((1.,), (5.,)), predictions.eval())
+
 
 class TransformFeaturesTest(test.TestCase):
 
@@ -3483,8 +5292,8 @@ class TransformFeaturesTest(test.TestCase):
                   indices=[[0, 0], [1, 0], [1, 1]],
                   dense_shape=[2, 2])
       }
-      transformed = _transform_features(features,
-                                        [bucketized_price, hashed_sparse], None)
+      transformed = fc._transform_features(
+          features, [bucketized_price, hashed_sparse], None)
       with _initialized_session():
         self.assertIn(bucketized_price.name, transformed[bucketized_price].name)
         self.assertAllEqual([[0], [3]], transformed[bucketized_price].eval())
@@ -3494,11 +5303,15 @@ class TransformFeaturesTest(test.TestCase):
   def test_column_order(self):
     """When the column is both dense and sparse, uses sparse tensors."""
 
-    class _LoggerColumn(FeatureColumn):
+    class _LoggerColumn(fc.FeatureColumn):
 
       def __init__(self, name):
         self._name = name
 
+      @property
+      def _is_v2_column(self):
+        return True
+
       @property
       def name(self):
         return self._name
@@ -3516,12 +5329,12 @@ class TransformFeaturesTest(test.TestCase):
       column1 = _LoggerColumn('1')
       column2 = _LoggerColumn('2')
       call_logger = {'count': 0}
-      _transform_features({}, [column1, column2], None)
+      fc._transform_features({}, [column1, column2], None)
       self.assertEqual(0, column1.call_order)
       self.assertEqual(1, column2.call_order)
 
       call_logger = {'count': 0}
-      _transform_features({}, [column2, column1], None)
+      fc._transform_features({}, [column2, column1], None)
       self.assertEqual(0, column1.call_order)
       self.assertEqual(1, column2.call_order)
 
@@ -3534,17 +5347,19 @@ class IndicatorColumnTest(test.TestCase):
     self.assertEqual(indicator_a.categorical_column.name, 'a')
     self.assertEqual(indicator_a.name, 'a_indicator')
     self.assertEqual(indicator_a.variable_shape, [1, 4])
+    self.assertTrue(indicator_a._is_v2_column)
 
-    b = fc.categorical_column_with_hash_bucket('b', hash_bucket_size=100)
+    b = fc_old.categorical_column_with_hash_bucket('b', hash_bucket_size=100)
     indicator_b = fc.indicator_column(b)
     self.assertEqual(indicator_b.categorical_column.name, 'b')
     self.assertEqual(indicator_b.name, 'b_indicator')
     self.assertEqual(indicator_b.variable_shape, [1, 100])
+    self.assertFalse(indicator_b._is_v2_column)
 
   def test_1D_shape_succeeds(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_hash_bucket('animal', 4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal': ['fox', 'fox']
     })
     output = transformation_cache.get(animal, None)
@@ -3555,7 +5370,7 @@ class IndicatorColumnTest(test.TestCase):
     # TODO(ispir/cassandrax): Swith to categorical_column_with_keys when ready.
     animal = fc.indicator_column(
         fc.categorical_column_with_hash_bucket('animal', 4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [1, 0]],
@@ -3570,7 +5385,7 @@ class IndicatorColumnTest(test.TestCase):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
 
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [0, 1]], values=[1, 1], dense_shape=[1, 2])
@@ -3582,7 +5397,7 @@ class IndicatorColumnTest(test.TestCase):
   def test_multi_hot2(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
-    transformation_cache = FeatureTransformationCache({
+    transformation_cache = fc.FeatureTransformationCache({
         'animal':
             sparse_tensor.SparseTensor(
                 indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
@@ -3632,8 +5447,8 @@ class IndicatorColumnTest(test.TestCase):
             values=('marlo', 'skywalker', 'omar'),
             dense_shape=(2, 2))
     }
-    indicator_tensor = _transform_features(features, [a_indicator],
-                                           None)[a_indicator]
+    indicator_tensor = fc._transform_features(features, [a_indicator],
+                                              None)[a_indicator]
     with _initialized_session():
       self.assertAllEqual([[0, 0, 1], [1, 0, 0]], indicator_tensor.eval())
 
@@ -3647,8 +5462,8 @@ class IndicatorColumnTest(test.TestCase):
         'ids': constant_op.constant([['c', 'b', 'a']]),
         'weights': constant_op.constant([[2., 4., 6.]])
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval())
 
@@ -3662,8 +5477,8 @@ class IndicatorColumnTest(test.TestCase):
         'ids': constant_op.constant([['c', 'b', 'unknown']]),
         'weights': constant_op.constant([[2., 4., 6.]])
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval())
 
@@ -3675,8 +5490,8 @@ class IndicatorColumnTest(test.TestCase):
     features = {
         'ids': constant_op.constant([['c', 'b', 'unknown']]),
     }
-    indicator_tensor = _transform_features(features, [indicator],
-                                           None)[indicator]
+    indicator_tensor = fc._transform_features(features, [indicator],
+                                              None)[indicator]
     with _initialized_session():
       self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval())
 
@@ -3700,6 +5515,44 @@ class IndicatorColumnTest(test.TestCase):
         weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
         self.assertAllClose([[2. + 3.]], predictions.eval())
 
+  def test_old_linear_model(self):
+    animal = fc.indicator_column(
+        fc.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+
+      predictions = fc_old.linear_model(features, [animal])
+      weight_var = get_linear_model_column_var(animal)
+      with _initialized_session():
+        # All should be zero-initialized.
+        self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
+        self.assertAllClose([[0.]], predictions.eval())
+        weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
+        self.assertAllClose([[2. + 3.]], predictions.eval())
+
+  def test_old_linear_model_old_categorical(self):
+    animal = fc.indicator_column(
+        fc_old.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+
+      predictions = fc_old.linear_model(features, [animal])
+      weight_var = get_linear_model_column_var(animal)
+      with _initialized_session():
+        # All should be zero-initialized.
+        self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval())
+        self.assertAllClose([[0.]], predictions.eval())
+        weight_var.assign([[1.], [2.], [3.], [4.]]).eval()
+        self.assertAllClose([[2. + 3.]], predictions.eval())
+
   def test_feature_layer(self):
     animal = fc.indicator_column(
         fc.categorical_column_with_identity('animal', num_buckets=4))
@@ -3709,12 +5562,38 @@ class IndicatorColumnTest(test.TestCase):
               sparse_tensor.SparseTensor(
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
-      net = FeatureLayer([animal])(features)
+      net = fc.FeatureLayer([animal])(features)
+      with _initialized_session():
+        self.assertAllClose([[0., 1., 1., 0.]], net.eval())
+
+  def test_input_layer(self):
+    animal = fc.indicator_column(
+        fc.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      net = fc_old.input_layer(features, [animal])
+      with _initialized_session():
+        self.assertAllClose([[0., 1., 1., 0.]], net.eval())
+
+  def test_input_layer_old_categorical(self):
+    animal = fc.indicator_column(
+        fc_old.categorical_column_with_identity('animal', num_buckets=4))
+    with ops.Graph().as_default():
+      features = {
+          'animal':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
+      }
+      net = fc_old.input_layer(features, [animal])
       with _initialized_session():
         self.assertAllClose([[0., 1., 1., 0.]], net.eval())
 
 
-class _TestStateManager(StateManager):
+class _TestStateManager(fc.StateManager):
 
   def __init__(self, trainable=True):
     # Dict of feature_column to a dict of variables.
@@ -3771,6 +5650,15 @@ class EmbeddingColumnTest(test.TestCase):
     self.assertEqual({
         'aaa': parsing_ops.VarLenFeature(dtypes.int64)
     }, embedding_column.parse_example_spec)
+    self.assertTrue(embedding_column._is_v2_column)
+
+  def test_is_v2_column(self):
+    categorical_column = fc_old.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension)
+    self.assertFalse(embedding_column._is_v2_column)
 
   def test_all_constructor_args(self):
     categorical_column = fc.categorical_column_with_identity(
@@ -3860,7 +5748,7 @@ class EmbeddingColumnTest(test.TestCase):
             values=(0, 1, 0),
             dense_shape=(2, 2))
     }
-    outputs = _transform_features(features, [a, a_embedded], None)
+    outputs = fc._transform_features(features, [a, a_embedded], None)
     output_a = outputs[a]
     output_embedded = outputs[a_embedded]
     with _initialized_session():
@@ -3905,19 +5793,79 @@ class EmbeddingColumnTest(test.TestCase):
     )
 
     # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension,
+        initializer=_initializer)
+    state_manager = _TestStateManager()
+    embedding_column.create_state(state_manager)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup = embedding_column.get_dense_tensor(
+        fc.FeatureTransformationCache({
+            'aaa': sparse_input
+        }), state_manager)
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
+
+  def test_get_dense_tensor_old_categorical(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc_old.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
-        categorical_column, dimension=embedding_dimension,
+        categorical_column,
+        dimension=embedding_dimension,
         initializer=_initializer)
-    state_manager = _TestStateManager()
-    embedding_column.create_state(state_manager)
 
     # Provide sparse input and get dense result.
-    embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+    embedding_lookup = embedding_column._get_dense_tensor(
+        fc_old._LazyBuilder({
             'aaa': sparse_input
-        }), state_manager)
+        }))
 
     # Assert expected embedding variable and lookups.
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -3977,7 +5925,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': sparse_input
         }), state_manager)
 
@@ -4040,7 +5988,7 @@ class EmbeddingColumnTest(test.TestCase):
     input_values = array_ops.placeholder(dtype=dtypes.int64)
     input_shape = array_ops.placeholder(dtype=dtypes.int64)
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa':
                 sparse_tensor.SparseTensorValue(
                     indices=input_indices,
@@ -4108,7 +6056,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup = embedding_column.get_dense_tensor(
-        FeatureTransformationCache({
+        fc.FeatureTransformationCache({
             'aaa': sparse_input
         }), state_manager)
 
@@ -4120,7 +6068,263 @@ class EmbeddingColumnTest(test.TestCase):
       self.assertAllEqual(embedding_values, global_vars[0].eval())
       self.assertAllEqual(expected_lookups, embedding_lookup.eval())
 
-  def test_linear_model(self):
+  def test_linear_model(self):
+    # Inputs.
+    batch_size = 4
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(batch_size, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual(embedding_shape, shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return zeros_embedding_values
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    with ops.Graph().as_default():
+      model = fc.LinearModel((embedding_column,))
+      predictions = model({categorical_column.name: sparse_input})
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_embedding/weights:0',
+          'linear_model/aaa_embedding/embedding_weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v for v in ops.get_collection(
+              ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_embedding/embedding_weights:0']
+      linear_weights = trainable_vars[
+          'linear_model/aaa_embedding/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
+
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # example 2, ids [], embedding[2] = [0, 0]
+        # example 3, ids [1], embedding[3] = [3, 5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
+        self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
+
+  def test_feature_layer(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    l = fc.FeatureLayer((embedding_column,))
+    feature_layer = l({'aaa': sparse_input})
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in trainable_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_feature_layer_not_trainable(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer,
+        trainable=False)
+
+    # Provide sparse input and get dense result.
+    feature_layer = fc.FeatureLayer((embedding_column,))({'aaa': sparse_input})
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    self.assertItemsEqual(
+        [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_input_layer(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    feature_layer = fc_old.input_layer({
+        'aaa': sparse_input
+    }, (embedding_column,))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
+                          tuple([v.name for v in trainable_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
+      self.assertAllEqual(expected_lookups, feature_layer.eval())
+
+  def test_old_linear_model(self):
     # Inputs.
     batch_size = 4
     vocabulary_size = 3
@@ -4137,6 +6341,7 @@ class EmbeddingColumnTest(test.TestCase):
     embedding_dimension = 2
     embedding_shape = (vocabulary_size, embedding_dimension)
     zeros_embedding_values = np.zeros(embedding_shape)
+
     def _initializer(shape, dtype, partition_info):
       self.assertAllEqual(embedding_shape, shape)
       self.assertEqual(dtypes.float32, dtype)
@@ -4152,8 +6357,9 @@ class EmbeddingColumnTest(test.TestCase):
         initializer=_initializer)
 
     with ops.Graph().as_default():
-      model = fc.LinearModel((embedding_column,))
-      predictions = model({categorical_column.name: sparse_input})
+      predictions = fc_old.linear_model({
+          categorical_column.name: sparse_input
+      }, (embedding_column,))
       expected_var_names = (
           'linear_model/bias_weights:0',
           'linear_model/aaa_embedding/weights:0',
@@ -4163,15 +6369,14 @@ class EmbeddingColumnTest(test.TestCase):
           expected_var_names,
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
       trainable_vars = {
-          v.name: v for v in ops.get_collection(
-              ops.GraphKeys.TRAINABLE_VARIABLES)
+          v.name: v
+          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
       }
       self.assertItemsEqual(expected_var_names, trainable_vars.keys())
       bias = trainable_vars['linear_model/bias_weights:0']
       embedding_weights = trainable_vars[
           'linear_model/aaa_embedding/embedding_weights:0']
-      linear_weights = trainable_vars[
-          'linear_model/aaa_embedding/weights:0']
+      linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0']
       with _initialized_session():
         # Predictions with all zero weights.
         self.assertAllClose(np.zeros((1,)), bias.eval())
@@ -4195,8 +6400,9 @@ class EmbeddingColumnTest(test.TestCase):
         # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
         self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
 
-  def test_feature_layer(self):
+  def test_old_linear_model_old_categorical(self):
     # Inputs.
+    batch_size = 4
     vocabulary_size = 3
     sparse_input = sparse_tensor.SparseTensorValue(
         # example 0, ids [2]
@@ -4205,114 +6411,70 @@ class EmbeddingColumnTest(test.TestCase):
         # example 3, ids [1]
         indices=((0, 0), (1, 0), (1, 4), (3, 0)),
         values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+        dense_shape=(batch_size, 5))
 
     # Embedding variable.
     embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-      self.assertEqual(dtypes.float32, dtype)
-      self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups = (
-        # example 0, ids [2], embedding = [7, 11]
-        (7., 11.),
-        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-        (2., 3.5),
-        # example 2, ids [], embedding = [0, 0]
-        (0., 0.),
-        # example 3, ids [1], embedding = [3, 5]
-        (3., 5.),
-    )
-
-    # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc.embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
-        initializer=_initializer)
-
-    # Provide sparse input and get dense result.
-    l = FeatureLayer((embedding_column,))
-    feature_layer = l({'aaa': sparse_input})
-
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in trainable_vars]))
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, trainable_vars[0].eval())
-      self.assertAllEqual(expected_lookups, feature_layer.eval())
-
-  def test_feature_layer_not_trainable(self):
-    # Inputs.
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        # example 2, ids []
-        # example 3, ids [1]
-        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
-        values=(2, 0, 1, 1),
-        dense_shape=(4, 5))
+    embedding_shape = (vocabulary_size, embedding_dimension)
+    zeros_embedding_values = np.zeros(embedding_shape)
 
-    # Embedding variable.
-    embedding_dimension = 2
-    embedding_values = (
-        (1., 2.),  # id 0
-        (3., 5.),  # id 1
-        (7., 11.)  # id 2
-    )
     def _initializer(shape, dtype, partition_info):
-      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertAllEqual(embedding_shape, shape)
       self.assertEqual(dtypes.float32, dtype)
       self.assertIsNone(partition_info)
-      return embedding_values
-
-    # Expected lookup result, using combiner='mean'.
-    expected_lookups = (
-        # example 0, ids [2], embedding = [7, 11]
-        (7., 11.),
-        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
-        (2., 3.5),
-        # example 2, ids [], embedding = [0, 0]
-        (0., 0.),
-        # example 3, ids [1], embedding = [3, 5]
-        (3., 5.),
-    )
+      return zeros_embedding_values
 
     # Build columns.
-    categorical_column = fc.categorical_column_with_identity(
+    categorical_column = fc_old.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     embedding_column = fc.embedding_column(
         categorical_column,
         dimension=embedding_dimension,
-        initializer=_initializer,
-        trainable=False)
+        initializer=_initializer)
 
-    # Provide sparse input and get dense result.
-    feature_layer = FeatureLayer((embedding_column,))({'aaa': sparse_input})
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          categorical_column.name: sparse_input
+      }, (embedding_column,))
+      expected_var_names = (
+          'linear_model/bias_weights:0',
+          'linear_model/aaa_embedding/weights:0',
+          'linear_model/aaa_embedding/embedding_weights:0',
+      )
+      self.assertItemsEqual(
+          expected_var_names,
+          [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
+      trainable_vars = {
+          v.name: v
+          for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      }
+      self.assertItemsEqual(expected_var_names, trainable_vars.keys())
+      bias = trainable_vars['linear_model/bias_weights:0']
+      embedding_weights = trainable_vars[
+          'linear_model/aaa_embedding/embedding_weights:0']
+      linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0']
+      with _initialized_session():
+        # Predictions with all zero weights.
+        self.assertAllClose(np.zeros((1,)), bias.eval())
+        self.assertAllClose(zeros_embedding_values, embedding_weights.eval())
+        self.assertAllClose(
+            np.zeros((embedding_dimension, 1)), linear_weights.eval())
+        self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval())
 
-    # Assert expected embedding variable and lookups.
-    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('feature_layer/aaa_embedding/embedding_weights:0',),
-                          tuple([v.name for v in global_vars]))
-    self.assertItemsEqual(
-        [], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-    with _initialized_session():
-      self.assertAllEqual(embedding_values, global_vars[0].eval())
-      self.assertAllEqual(expected_lookups, feature_layer.eval())
+        # Predictions with all non-zero weights.
+        embedding_weights.assign((
+            (1., 2.),  # id 0
+            (3., 5.),  # id 1
+            (7., 11.)  # id 2
+        )).eval()
+        linear_weights.assign(((4.,), (6.,))).eval()
+        # example 0, ids [2], embedding[0] = [7, 11]
+        # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5]
+        # example 2, ids [], embedding[2] = [0, 0]
+        # example 3, ids [1], embedding[3] = [3, 5]
+        # sum(embeddings * linear_weights)
+        # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42]
+        self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval())
 
 
 class SharedEmbeddingColumnTest(test.TestCase):
@@ -4530,8 +6692,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
             values=(1, 2, 1),
             dense_shape=(2, 2)),
     }
-    outputs = _transform_features(features, [a, a_embedded, b, b_embedded],
-                                  None)
+    outputs = fc._transform_features(features, [a, a_embedded, b, b_embedded],
+                                     None)
     output_a = outputs[a]
     output_a_embedded = outputs[a_embedded]
     output_b = outputs[b]
@@ -4599,9 +6761,9 @@ class SharedEmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup_a = embedding_column_a.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
     embedding_lookup_b = embedding_column_b.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
 
     # Assert expected embedding variable and lookups.
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
@@ -4665,9 +6827,9 @@ class SharedEmbeddingColumnTest(test.TestCase):
 
     # Provide sparse input and get dense result.
     embedding_lookup_a = embedding_column_a.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
     embedding_lookup_b = embedding_column_b.get_dense_tensor(
-        FeatureTransformationCache(input_features), state_manager)
+        fc.FeatureTransformationCache(input_features), state_manager)
 
     with _initialized_session() as sess:
       sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
@@ -4852,7 +7014,7 @@ class SharedEmbeddingColumnTest(test.TestCase):
     }
 
     # Provide sparse input and get dense result.
-    feature_layer = FeatureLayer(
+    feature_layer = fc.FeatureLayer(
         feature_columns=(embedding_column_b, embedding_column_a,
                          embedding_column_c, embedding_column_d),
         shared_state_manager=shared_state_manager)(
@@ -4946,6 +7108,14 @@ class WeightedCategoricalColumnTest(test.TestCase):
         'ids': parsing_ops.VarLenFeature(dtypes.int64),
         'values': parsing_ops.VarLenFeature(dtypes.float32)
     }, column.parse_example_spec)
+    self.assertTrue(column._is_v2_column)
+
+  def test_is_v2_column(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc_old.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    self.assertFalse(column._is_v2_column)
 
   def test_deep_copy(self):
     """Tests deepcopy of categorical_column_with_hash_bucket."""
@@ -4987,7 +7157,10 @@ class WeightedCategoricalColumnTest(test.TestCase):
         values=('omar', 'stringer', 'marlo'),
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(ValueError, 'Bad dtype'):
-      _transform_features({'ids': strings, 'values': strings}, (column,), None)
+      fc._transform_features({
+          'ids': strings,
+          'values': strings
+      }, (column,), None)
 
   def test_column_name_collision(self):
     with self.assertRaisesRegexp(ValueError, r'Parse config.*already exists'):
@@ -5007,7 +7180,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         dense_shape=(2, 2))
     with self.assertRaisesRegexp(
         ValueError, 'values is not in features dictionary'):
-      _transform_features({'ids': inputs}, (column,), None)
+      fc._transform_features({'ids': inputs}, (column,), None)
 
   def test_parse_example(self):
     a = fc.categorical_column_with_vocabulary_list(
@@ -5056,7 +7229,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0.5, 1.0, 0.1),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': inputs,
         'values': weights,
     }, (column,), None)[column]
@@ -5085,7 +7258,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(0.5, 1.0, 0.1),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': ((0, -1), (1, 0)),
         'values': weights,
     }, (column,), None)[column]
@@ -5114,7 +7287,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
         indices=((0, 0), (1, 0), (1, 1)),
         values=(2, 1, 0),
         dense_shape=(2, 2))
-    id_tensor, weight_tensor = _transform_features({
+    id_tensor, weight_tensor = fc._transform_features({
         'ids': inputs,
         'values': ((.5, 0.), (1., .1)),
     }, (column,), None)[column]
@@ -5236,6 +7409,137 @@ class WeightedCategoricalColumnTest(test.TestCase):
         # = 3*1 + 2*.1 = 3+.2 = 3.2
         self.assertAllClose(((.5,), (3.2,)), predictions.eval())
 
+  def test_old_linear_model(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(.5, 1., .1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
+  def test_old_linear_model_mismatched_shape(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(ValueError,
+                                   r'Dimensions.*are not compatible'):
+        fc_old.linear_model({
+            'ids':
+                sparse_tensor.SparseTensorValue(
+                    indices=((0, 0), (1, 0), (1, 1)),
+                    values=(0, 2, 1),
+                    dense_shape=(2, 2)),
+            'values':
+                sparse_tensor.SparseTensorValue(
+                    indices=((0, 0), (0, 1), (1, 0), (1, 1)),
+                    values=(.5, 11., 1., .1),
+                    dense_shape=(2, 2))
+        }, (column,))
+
+  def test_old_linear_model_mismatched_dense_values(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values': ((.5,), (1.,))
+      }, (column,),
+                                        sparse_combiner='mean')
+      # Disabling the constant folding optimizer here since it changes the
+      # error message differently on CPU and GPU.
+      config = config_pb2.ConfigProto()
+      config.graph_options.rewrite_options.constant_folding = (
+          rewriter_config_pb2.RewriterConfig.OFF)
+      with _initialized_session(config):
+        with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'):
+          predictions.eval()
+
+  def test_old_linear_model_mismatched_dense_shape(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values': ((.5,), (1.,), (.1,))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
+  def test_old_linear_model_old_categorical(self):
+    column = fc.weighted_categorical_column(
+        categorical_column=fc_old.categorical_column_with_identity(
+            key='ids', num_buckets=3),
+        weight_feature_key='values')
+    with ops.Graph().as_default():
+      predictions = fc_old.linear_model({
+          'ids':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(0, 2, 1),
+                  dense_shape=(2, 2)),
+          'values':
+              sparse_tensor.SparseTensorValue(
+                  indices=((0, 0), (1, 0), (1, 1)),
+                  values=(.5, 1., .1),
+                  dense_shape=(2, 2))
+      }, (column,))
+      bias = get_linear_model_bias()
+      weight_var = get_linear_model_column_var(column)
+      with _initialized_session():
+        self.assertAllClose((0.,), bias.eval())
+        self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval())
+        self.assertAllClose(((0.,), (0.,)), predictions.eval())
+        weight_var.assign(((1.,), (2.,), (3.,))).eval()
+        # weight_var[0] * weights[0, 0] = 1 * .5 = .5
+        # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1]
+        # = 3*1 + 2*.1 = 3+.2 = 3.2
+        self.assertAllClose(((.5,), (3.2,)), predictions.eval())
+
   # TODO(ptucker): Add test with embedding of weighted categorical.
 
 if __name__ == '__main__':
-- 
GitLab


From 45fb1429f86b5ee6589fd50d8325843b49f78409 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 08:48:02 -0700
Subject: [PATCH 0600/1085] Avoid extra calls to set_random_seed, as it is
 already called in tensorflowtestcase.

PiperOrigin-RevId: 216363450
---
 .../python/kernel_tests/linalg/linear_operator_addition_test.py | 2 --
 .../kernel_tests/linalg/linear_operator_block_diag_test.py      | 2 --
 .../kernel_tests/linalg/linear_operator_composition_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_diag_test.py     | 2 --
 .../kernel_tests/linalg/linear_operator_full_matrix_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_identity_test.py | 2 --
 .../kernel_tests/linalg/linear_operator_kronecker_test.py       | 2 --
 .../kernel_tests/linalg/linear_operator_low_rank_update_test.py | 2 --
 .../linalg/linear_operator_lower_triangular_test.py             | 2 --
 .../python/kernel_tests/linalg/linear_operator_util_test.py     | 2 --
 .../python/kernel_tests/linalg/linear_operator_zeros_test.py    | 2 --
 11 files changed, 22 deletions(-)

diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
index cf56168d63..628ed998c5 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py
@@ -19,14 +19,12 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_addition
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 add_operators = linear_operator_addition.add_operators
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
index 3ede2aceaa..30951b1b0e 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_block_diag_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_block_diag as block_diag
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
index 99497914f2..02f56db596 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
index 52861ae84a..0758349531 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
@@ -27,7 +26,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class LinearOperatorDiagTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
index 8373b5263f..8c2d2cf077 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class SquareLinearOperatorFullMatrixTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
index 0c3c6b390f..465a8194dd 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import random_ops
@@ -29,7 +28,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(2016)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
index 7e81c9c6c4..f039b60f64 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_kronecker_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_kronecker as kronecker
@@ -30,7 +29,6 @@ from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
index 61268607a4..207e5edf81 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
@@ -28,7 +27,6 @@ from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
index eb4bff915b..e3c8f5cb68 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py
@@ -17,14 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 linalg = linalg_lib
-random_seed.set_random_seed(23)
 
 
 class LinearOperatorLowerTriangularTest(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
index 86847d38c2..13218787e2 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
@@ -21,14 +21,12 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.linalg import linear_operator_util
 from tensorflow.python.platform import test
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(0)
 
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
index f0556304ad..ad97d1a93e 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_zeros_test.py
@@ -20,14 +20,12 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.linalg import linalg as linalg_lib
 from tensorflow.python.ops.linalg import linear_operator_test_util
 from tensorflow.python.platform import test
 
 
-random_seed.set_random_seed(23)
 rng = np.random.RandomState(2016)
 
 
-- 
GitLab


From 32b9901c0e20f82831a5cf0a42b016e7ff5197d0 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 9 Oct 2018 09:17:04 -0700
Subject: [PATCH 0601/1085] Internal change

PiperOrigin-RevId: 216367867
---
 tensorflow/contrib/lite/build_def.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index b9e933a8b6..b3607a761c 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -313,8 +313,8 @@ def generated_test_models_failing(conversion_mode):
             "arg_min_max",
             "div",
             "floor_div",
-            "gather ",
-            "lstm ",
+            "gather",
+            "lstm",
             "resize_bilinear",
             "space_to_batch_nd",
             "split",
-- 
GitLab


From df11cce2e600581087f29ef0b85286f7e582572d Mon Sep 17 00:00:00 2001
From: Tamara Norman <tamaranorman@google.com>
Date: Tue, 9 Oct 2018 09:18:53 -0700
Subject: [PATCH 0602/1085] Throw error when evaluating have variable target in
 GradientTape.

PiperOrigin-RevId: 216368178
---
 tensorflow/python/eager/backprop.py      |  9 ++++++++-
 tensorflow/python/eager/backprop_test.py | 12 +++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index deac29111f..44ce69ee60 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -868,6 +868,7 @@ class GradientTape(object):
     Raises:
       RuntimeError: if called inside the context of the tape, or if called more
        than once on a non-persistent tape.
+      ValueError: if called on variable target.
     """
     if self._tape is None:
       raise RuntimeError("GradientTape.gradient can only be called once on "
@@ -887,6 +888,12 @@ class GradientTape(object):
                             "gradient in order to compute higher order "
                             "derrivatives.", 1)
 
+    flat_targets = nest.flatten(target)
+    for t in flat_targets:
+      if resource_variable_ops.is_resource_variable(t):
+        raise ValueError("GradientTape.gradient is not supported for variable "
+                         "targets.")
+
     flat_sources = nest.flatten(sources)
     flat_sources = [_handle_or_self(x) for x in flat_sources]
 
@@ -896,7 +903,7 @@ class GradientTape(object):
 
     flat_grad = imperative_grad.imperative_grad(
         self._tape,
-        nest.flatten(target),
+        flat_targets,
         flat_sources,
         output_gradients=output_gradients)
 
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 32731747b7..7e5c9f3cb6 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -548,6 +548,17 @@ class BackpropTest(test.TestCase):
     grad = g.gradient(y, [x])[0]
     self.assertEqual(self.evaluate(grad), 6.0)
 
+  @test_util.assert_no_new_tensors
+  @test_util.run_in_graph_and_eager_modes
+  def testGadientTapeCalledOnConstantTarget(self):
+    with backprop.GradientTape() as g:
+      x = variables.Variable([3.0])
+      y = variables.Variable([2.0])
+    with self.assertRaisesRegexp(
+        ValueError,
+        'GradientTape.gradient is not supported for variable targets.'):
+      g.gradient(x, y)
+
   @test_util.run_in_graph_and_eager_modes
   def testGradientTapeWithCond(self):
     x = constant_op.constant(3.0)
@@ -982,7 +993,6 @@ class BackpropTest(test.TestCase):
     self.assertIsNone(dy)
     self.assertEqual(self.evaluate(dz), 3.0)
 
-
   @test_util.run_in_graph_and_eager_modes
   def testDifferentiatingScalarCache(self):
     # In the following test, if x2 = x1 (i.e the objects are the exact same),
-- 
GitLab


From 92d533d19c44ab838a1f7954350fdafd62cfa889 Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Tue, 9 Oct 2018 09:24:57 -0700
Subject: [PATCH 0603/1085] Change LOG(WARNING) to VLOG(1) in utils

PiperOrigin-RevId: 216369081
---
 tensorflow/core/grappler/costs/utils.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 2fcadf1de3..87b74e2952 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -209,7 +209,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   // Can't infer the size if the rank is unknown. It has to be at least a
   // scalar though.
   if (shape.unknown_rank()) {
-    LOG(WARNING) << "CalculateTensorSize() -- unknown rank";
+    VLOG(1) << "CalculateTensorSize() -- unknown rank";
     return size;
   }
 
@@ -217,7 +217,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   for (int i = 0; i < shape.dim_size(); ++i) {
     if (shape.dim(i).size() < 0) {
       shape.mutable_dim(i)->set_size(1);
-      LOG(WARNING) << "CalculateTensorSize() -- unknown dim: " << i;
+      VLOG(1) << "CalculateTensorSize() -- unknown dim: " << i;
     }
   }
 
-- 
GitLab


From 87d8055c74a65ec9fb2a13f38e6e2c5d30b7e2e4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:30:32 -0700
Subject: [PATCH 0604/1085] Correctly pre-reserve visit state in
 HloInstruction::PostOrderDFS

Previously we pre-reserverd the visit state based on the number of
instructions but then started to index it with the instruction unique ID
what can be larger then the instruction count. This resulted in some
very expensive re-allocations what can be eliminated by reserving the
correctly sized buffer.

PiperOrigin-RevId: 216369849
---
 tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5c3908a9a4..050d28b289 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2474,7 +2474,7 @@ template <typename Visitor>
 static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
                            const InternalCompareFunction* operand_order,
                            bool ignore_control_predecessors) {
-  visitor->ReserveVisitStates(root->GetModule()->instruction_count());
+  visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds());
 
   // dfs_stack holds pairs of <HloInstruction*->unique_id(), HloInstruction*>.
   //
-- 
GitLab


From 3e1a0792fb593953860162d57320c8602fd199eb Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 9 Oct 2018 09:32:50 -0700
Subject: [PATCH 0605/1085] Create SDCAOptimizerV2 op to fix the "adaptative"
 typo.

PiperOrigin-RevId: 216370193
---
 .../linear_optimizer/python/ops/sdca_ops.py   |  57 ++++--
 .../base_api/api_def_SdcaOptimizerV2.pbtxt    | 171 ++++++++++++++++++
 tensorflow/core/kernels/sdca_ops.cc           |   8 +-
 tensorflow/core/ops/sdca_ops.cc               |  28 +++
 tensorflow/python/ops/sdca_ops.py             |   1 +
 5 files changed, 246 insertions(+), 19 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index b98adf862b..48ac429701 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -22,6 +22,7 @@ import collections
 from six.moves import range
 
 from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -485,24 +486,44 @@ class SdcaModel(object):
         sparse_weights.append(batch_gathered_weights)
 
       # pylint: disable=protected-access
-      esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
-          sparse_example_indices,
-          sparse_feature_indices,
-          sparse_features_values,
-          self._convert_n_to_tensor(self._examples['dense_features']),
-          internal_convert_to_tensor(self._examples['example_weights']),
-          internal_convert_to_tensor(self._examples['example_labels']),
-          sparse_indices,
-          sparse_weights,
-          self._convert_n_to_tensor(self._slots[
-              'unshrinked_dense_features_weights']),
-          example_state_data,
-          loss_type=self._options['loss_type'],
-          l1=self._options['symmetric_l1_regularization'],
-          l2=self._symmetric_l2_regularization(),
-          num_loss_partitions=self._num_loss_partitions(),
-          num_inner_iterations=1,
-          adaptative=self._adaptive())
+      if compat.forward_compatible(year=2018, month=10, day=30):
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptive=self._adaptive())
+      else:
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptative=self._adaptive())
       # pylint: enable=protected-access
 
       with ops.control_dependencies([esu]):
diff --git a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
new file mode 100644
index 0000000000..c615dee8c7
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
@@ -0,0 +1,171 @@
+op {
+  graph_op_name: "SdcaOptimizerV2"
+  visibility: HIDDEN
+  in_arg {
+    name: "sparse_example_indices"
+    description: <<END
+a list of vectors which contain example indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_indices"
+    description: <<END
+a list of vectors which contain feature indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_values"
+    description: <<END
+a list of vectors which contains feature value
+associated with each feature group.
+END
+  }
+  in_arg {
+    name: "dense_features"
+    description: <<END
+a list of matrices which contains the dense feature values.
+END
+  }
+  in_arg {
+    name: "example_weights"
+    description: <<END
+a vector which contains the weight associated with each
+example.
+END
+  }
+  in_arg {
+    name: "example_labels"
+    description: <<END
+a vector which contains the label/target associated with each
+example.
+END
+  }
+  in_arg {
+    name: "sparse_indices"
+    description: <<END
+a list of vectors where each value is the indices which has
+corresponding weights in sparse_weights. This field maybe omitted for the
+dense approach.
+END
+  }
+  in_arg {
+    name: "sparse_weights"
+    description: <<END
+a list of vectors where each value is the weight associated with
+a sparse feature group.
+END
+  }
+  in_arg {
+    name: "dense_weights"
+    description: <<END
+a list of vectors where the values are the weights associated
+with a dense feature group.
+END
+  }
+  in_arg {
+    name: "example_state_data"
+    description: <<END
+a list of vectors containing the example state data.
+END
+  }
+  out_arg {
+    name: "out_example_state_data"
+    description: <<END
+a list of vectors containing the updated example state
+data.
+END
+  }
+  out_arg {
+    name: "out_delta_sparse_weights"
+    description: <<END
+a list of vectors where each value is the delta
+weights associated with a sparse feature group.
+END
+  }
+  out_arg {
+    name: "out_delta_dense_weights"
+    description: <<END
+a list of vectors where the values are the delta
+weights associated with a dense feature group.
+END
+  }
+  attr {
+    name: "loss_type"
+    description: <<END
+Type of the primal loss. Currently SdcaSolver supports logistic,
+squared and hinge losses.
+END
+  }
+  attr {
+    name: "adaptive"
+    default_value {
+      b: True
+    }
+    description: <<END
+Whether to use Adaptive SDCA for the inner loop.
+END
+  }
+  attr {
+    name: "num_sparse_features"
+    description: <<END
+Number of sparse feature groups to train on.
+END
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    description: <<END
+Number of sparse feature groups with values
+associated with it, otherwise implicitly treats values as 1.0.
+END
+  }
+  attr {
+    name: "num_dense_features"
+    description: <<END
+Number of dense feature groups to train on.
+END
+  }
+  attr {
+    name: "l1"
+    description: <<END
+Symmetric l1 regularization strength.
+END
+  }
+  attr {
+    name: "l2"
+    description: <<END
+Symmetric l2 regularization strength.
+END
+  }
+  attr {
+    name: "num_loss_partitions"
+    description: <<END
+Number of partitions of the global loss function.
+END
+  }
+  attr {
+    name: "num_inner_iterations"
+    description: <<END
+Number of iterations per mini-batch.
+END
+  }
+  summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
+  description: <<END
+linear models with L1 + L2 regularization. As global optimization objective is
+strongly-convex, the optimizer optimizes the dual objective at each step. The
+optimizer applies each update one example at a time. Examples are sampled
+uniformly, and the optimizer is learning rate free and enjoys linear convergence
+rate.
+
+[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+Shai Shalev-Shwartz, Tong Zhang. 2012
+
+$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+
+[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+Peter Richtarik, Martin Takac. 2015
+
+[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+END
+}
diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc
index 3bd4168dc7..d0e0b15da7 100644
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@@ -83,7 +83,11 @@ struct ComputeOptions {
           context, false,
           errors::InvalidArgument("Unsupported loss type: ", loss_type));
     }
-    OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive));
+    auto s = context->GetAttr("adaptative", &adaptive);
+    if (!s.ok()) {
+      s = context->GetAttr("adaptive", &adaptive);
+    }
+    OP_REQUIRES_OK(context, s);
     OP_REQUIRES_OK(
         context, context->GetAttr("num_sparse_features", &num_sparse_features));
     OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values",
@@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel {
 };
 REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU),
                         SdcaOptimizer);
+REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU),
+                        SdcaOptimizer);
 
 class SdcaShrinkL1 : public OpKernel {
  public:
diff --git a/tensorflow/core/ops/sdca_ops.cc b/tensorflow/core/ops/sdca_ops.cc
index fdf53a55dd..51d248f2d6 100644
--- a/tensorflow/core/ops/sdca_ops.cc
+++ b/tensorflow/core/ops/sdca_ops.cc
@@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer")
     .Output("out_delta_dense_weights: num_dense_features * float")
     .SetShapeFn(ApplySdcaOptimizerShapeFn);
 
+// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1.
+REGISTER_OP("SdcaOptimizerV2")
+    .Attr(
+        "loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss',"
+        "'smooth_hinge_loss', 'poisson_loss'}")
+    .Attr("adaptive : bool=false")
+    .Attr("num_sparse_features: int >= 0")
+    .Attr("num_sparse_features_with_values: int >= 0")
+    .Attr("num_dense_features: int >= 0")
+    .Attr("l1: float")
+    .Attr("l2: float")
+    .Attr("num_loss_partitions: int >= 1")
+    .Attr("num_inner_iterations: int >= 1")
+    .Input("sparse_example_indices: num_sparse_features * int64")
+    .Input("sparse_feature_indices: num_sparse_features * int64")
+    .Input("sparse_feature_values: num_sparse_features_with_values * float")
+    .Input("dense_features: num_dense_features * float")
+    .Input("example_weights: float")
+    .Input("example_labels: float")
+    .Input("sparse_indices: num_sparse_features * int64")
+    .Input("sparse_weights: num_sparse_features * float")
+    .Input("dense_weights: num_dense_features * float")
+    .Input("example_state_data: float")
+    .Output("out_example_state_data: float")
+    .Output("out_delta_sparse_weights: num_sparse_features * float")
+    .Output("out_delta_dense_weights: num_dense_features * float")
+    .SetShapeFn(ApplySdcaOptimizerShapeFn);
+
 REGISTER_OP("SdcaShrinkL1")
     .Attr("num_features: int >= 0")
     .Attr("l1: float")
diff --git a/tensorflow/python/ops/sdca_ops.py b/tensorflow/python/ops/sdca_ops.py
index 4d5aeec591..a1c68343ed 100644
--- a/tensorflow/python/ops/sdca_ops.py
+++ b/tensorflow/python/ops/sdca_ops.py
@@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import *
 
 ops.NotDifferentiable("SdcaFprint")
 ops.NotDifferentiable("SdcaOptimizer")
+ops.NotDifferentiable("SdcaOptimizerV2")
 ops.NotDifferentiable("SdcaShrinkL1")
-- 
GitLab


From 5d6adc910b8323b73a61d3089f3a3028be411e90 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:33:59 -0700
Subject: [PATCH 0606/1085] Improve docstring for tf.data.Dataset.shuffle()

PiperOrigin-RevId: 216370329
---
 tensorflow/python/data/ops/dataset_ops.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index b7e19055f2..cf52f7529a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -733,6 +733,11 @@ class Dataset(object):
   def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None):
     """Randomly shuffles the elements of this dataset.
 
+    This dataset fills a buffer with `buffer_size` elements, then randomly
+    samples elements from this buffer, replacing the selected elements with new
+    elements. For perfect shuffling, a buffer size greater than or equal to the
+    full size of the dataset is required.
+
     Args:
       buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the
         number of elements from this dataset from which the new
-- 
GitLab


From 3ef35b81fd753401e3d69989b3bd1146749cc3b3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 09:34:47 -0700
Subject: [PATCH 0607/1085] Include live-in symbols in liveness analysis. These
 are required for control flow conversion.

PiperOrigin-RevId: 216370439
---
 tensorflow/python/autograph/pyct/anno.py      |  1 +
 tensorflow/python/autograph/pyct/cfg.py       | 10 ++-
 .../pyct/static_analysis/liveness.py          | 36 +++++---
 .../pyct/static_analysis/liveness_test.py     | 86 +++++++++++++++++--
 4 files changed, 112 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index 1a52110ef3..5392e6ea03 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -91,6 +91,7 @@ class Static(NoValue):
   DEFINED_VARS_IN = (
       'Symbols defined when entering the node. See reaching_definitions.py.')
   LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.')
+  LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.')
 
 
 FAIL = object()
diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index fca0eb62e4..ec733ea38f 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -22,6 +22,10 @@ Once built, the CFG itself is immutable, but the values it holds need not be;
 they are usually annotated with information extracted by walking the graph.
 """
 
+# TODO(mdan): The notion of 'statements' below is inaccurate.
+# They should rather be called 'block statements', because they include
+# statements that may have a body, e.g. if and while.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -763,9 +767,9 @@ class AstToCfg(gast.NodeVisitor):
 
     self.builder.enter_section(node)
 
-    # TODO(mdan): Strictly speaking, this should be node.target + node.iter.
-    # A blind dataflow analysis would have to process both node.target and
-    # node.iter to properly process read and write access.
+    # Note: Strictly speaking, this should be node.target + node.iter.
+    # However, the activity analysis accounts for this inconsistency,
+    # so dataflow analysis produces the correct values.
     self.builder.enter_loop_section(node, node.iter)
     for stmt in node.body:
       self.visit(stmt)
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
index 41c903beb9..36960d0103 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
@@ -14,8 +14,13 @@
 # ==============================================================================
 """Live variable analysis.
 
-This analysis attaches a set containing the live symbols that are live at the
-exit of control flow statements.
+See https://en.wikipedia.org/wiki/Live_variable_analysis for a definition of
+the following idioms: live variable, live in, live out, which are used
+throughout this file.
+
+This analysis attaches the following:
+ * symbols that are live at the exit of control flow statements
+ * symbols that are live at the entry of control flow statements
 
 Requires activity analysis.
 """
@@ -164,23 +169,34 @@ class Annotator(transformer.Base):
     self.current_analyzer = parent_analyzer
     return node
 
-  def _aggregate_successors_live_in(self, node):
+  def _block_statement_live_out(self, node):
     successors = self.current_analyzer.graph.stmt_next[node]
-    node_live_out = set()
+    stmt_live_out = set()
     for s in successors:
-      node_live_out.update(self.current_analyzer.in_[s])
-    anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(node_live_out))
-    node = self.generic_visit(node)
+      stmt_live_out.update(self.current_analyzer.in_[s])
+    anno.setanno(node, anno.Static.LIVE_VARS_OUT, frozenset(stmt_live_out))
+    return node
+
+  def _block_statement_live_in(self, node, entry_node):
+    cfg_node = self.current_analyzer.graph.index[entry_node]
+    stmt_live_in = frozenset(self.current_analyzer.in_[cfg_node])
+    anno.setanno(node, anno.Static.LIVE_VARS_IN, stmt_live_in)
     return node
 
   def visit_If(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.test)
 
   def visit_For(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.iter)
 
   def visit_While(self, node):
-    return self._aggregate_successors_live_in(node)
+    node = self.generic_visit(node)
+    node = self._block_statement_live_out(node)
+    return self._block_statement_live_in(node, node.test)
 
 
 def resolve(node, source_info, graphs):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
index 0d5f369e92..7b67f8f608 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
@@ -47,14 +47,23 @@ class LivenessTest(test.TestCase):
 
   def assertHasLiveOut(self, node, expected):
     live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
-    live_out_str = set(str(v) for v in live_out)
+    live_out_strs = set(str(v) for v in live_out)
     if not expected:
       expected = ()
     if not isinstance(expected, tuple):
       expected = (expected,)
-    self.assertSetEqual(live_out_str, set(expected))
+    self.assertSetEqual(live_out_strs, set(expected))
 
-  def test_stacked_if(self):
+  def assertHasLiveIn(self, node, expected):
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_in_strs = set(str(v) for v in live_in)
+    if not expected:
+      expected = ()
+    if not isinstance(expected, tuple):
+      expected = (expected,)
+    self.assertSetEqual(live_in_strs, set(expected))
+
+  def test_live_out_stacked_if(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -69,7 +78,7 @@ class LivenessTest(test.TestCase):
     self.assertHasLiveOut(fn_body[0], ('a', 'x'))
     self.assertHasLiveOut(fn_body[1], 'x')
 
-  def test_stacked_if_else(self):
+  def test_live_out_stacked_if_else(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -86,7 +95,7 @@ class LivenessTest(test.TestCase):
     self.assertHasLiveOut(fn_body[0], 'a')
     self.assertHasLiveOut(fn_body[1], 'x')
 
-  def test_for_basic(self):
+  def test_live_out_for_basic(self):
 
     def test_fn(x, a):
       for i in range(a):
@@ -98,7 +107,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'x')
 
-  def test_attributes(self):
+  def test_live_out_attributes(self):
 
     def test_fn(x, a):
       if a > 0:
@@ -110,7 +119,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], ('x.y', 'x'))
 
-  def test_nested_functions(self):
+  def test_live_out_nested_functions(self):
 
     def test_fn(a, b):
       if b:
@@ -126,7 +135,7 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'a')
 
-  def test_nested_functions_isolation(self):
+  def test_live_out_nested_functions_isolation(self):
 
     def test_fn(b):
       if b:
@@ -144,6 +153,67 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'max')
 
+  def test_live_in_stacked_if(self):
+
+    def test_fn(x, a, b, c):
+      if a > 0:
+        x = b
+      if c > 1:
+        x = 0
+      return x
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'x'))
+    self.assertHasLiveIn(fn_body[1], ('c', 'x'))
+
+  def test_live_in_stacked_if_else(self):
+
+    def test_fn(x, a, b, c, d):
+      if a > 1:
+        x = b
+      else:
+        x = c
+      if d > 0:
+        x = 0
+      return x
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'b', 'c', 'd'))
+    self.assertHasLiveIn(fn_body[1], ('d', 'x'))
+
+  def test_live_in_for_basic(self):
+
+    def test_fn(x, y, a):
+      for i in a:
+        x = i
+        y += x
+        z = 0
+      return y, z
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z'))
+
+  def test_live_in_for_nested(self):
+
+    def test_fn(x, y, a):
+      for i in a:
+        for j in i:
+          x = i
+          y += x
+          z = j
+      return y, z
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z'))
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 37146b89788c2a0796ca6b863bde9c4c0dc4068e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 09:46:04 -0700
Subject: [PATCH 0608/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216372144

---
 tensorflow/go/op/wrappers.go | 222 +++++++++++++++++------------------
 1 file changed, 111 insertions(+), 111 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index fe99915a6c..eb6df2af46 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -10415,6 +10415,79 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
 	return op.Output(0)
 }
 
+// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+//
+// Arguments:
+//	serialized: A scalar string containing a serialized TensorProto proto.
+//	out_type: The type of the serialized tensor.  The provided type must match the
+// type of the serialized tensor and no implicit conversion will take place.
+//
+// Returns A Tensor of type `out_type`.
+func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "ParseTensor",
+		Input: []tf.Input{
+			serialized,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
+type MaxPoolWithArgmaxAttr func(optionalAttr)
+
+// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
+// If not specified, defaults to DT_INT64
+func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["Targmax"] = value
+	}
+}
+
+// Performs max pooling on the input and outputs both max values and indices.
+//
+// The indices in `argmax` are flattened, so that a maximum value at position
+// `[b, y, x, c]` becomes flattened index
+// `((b * height + y) * width + x) * channels + c`.
+//
+// The indices returned are always in `[0, height) x [0, width)` before flattening,
+// even if padding is involved and the mathematically correct answer is outside
+// (either negative or too large).  This is a bug, but fixing it is difficult to do
+// in a safe backwards compatible way, especially due to flattening.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
+func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolWithArgmax",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
 type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
@@ -14202,44 +14275,6 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu
 	return op.Output(0), op.Output(1)
 }
 
-// Computes the mean along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-// over `j` such that `segment_ids[j] == i` and `N` is the total number of
-// values summed.
-//
-// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMean",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
 type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
 
@@ -15941,79 +15976,6 @@ func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, label
 	return op.Output(0), op.Output(1)
 }
 
-// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
-//
-// Arguments:
-//	serialized: A scalar string containing a serialized TensorProto proto.
-//	out_type: The type of the serialized tensor.  The provided type must match the
-// type of the serialized tensor and no implicit conversion will take place.
-//
-// Returns A Tensor of type `out_type`.
-func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "ParseTensor",
-		Input: []tf.Input{
-			serialized,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
-type MaxPoolWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
-// If not specified, defaults to DT_INT64
-func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["Targmax"] = value
-	}
-}
-
-// Performs max pooling on the input and outputs both max values and indices.
-//
-// The indices in `argmax` are flattened, so that a maximum value at position
-// `[b, y, x, c]` becomes flattened index
-// `((b * height + y) * width + x) * channels + c`.
-//
-// The indices returned are always in `[0, height) x [0, width)` before flattening,
-// even if padding is involved and the mathematically correct answer is outside
-// (either negative or too large).  This is a bug, but fixing it is difficult to do
-// in a safe backwards compatible way, especially due to flattening.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
-func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolWithArgmax",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Returns the truth value of NOT x element-wise.
 func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
@@ -21926,6 +21888,44 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Computes the mean along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+// over `j` such that `segment_ids[j] == i` and `N` is the total number of
+// values summed.
+//
+// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMean",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the minimum along segments of a tensor.
 //
 // Read
-- 
GitLab


From 1b4402137a76c8085c160edfcc0c3be3cfa8fa3a Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Tue, 9 Oct 2018 10:05:11 -0700
Subject: [PATCH 0609/1085]   Fixes typo in Sort description.

PiperOrigin-RevId: 216375421
---
 tensorflow/compiler/xla/client/xla_builder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 9ceede7a79..933c0e7b44 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -2002,7 +2002,7 @@ XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
 // the last dimension is chosen by default.
 //
 // If both keys and values are provided:
-// * The keys and the values must tensors with the same dimensions. The
+// * The keys and the values must be tensors with the same dimensions. The
 // element types of the tensors may be different.
 // * The result is a tuple that consists of a sorted tensor of keys (along the
 // provided dimension, as above) as the first element, and a tensor with their
-- 
GitLab


From 11f32ebbdcd4eaf5e9e09fe27571e26ec0bd9dd8 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Tue, 9 Oct 2018 10:40:23 -0700
Subject: [PATCH 0610/1085] [tf.data vectorization] Handle captured inputs in
 MapVectorization optimization

PiperOrigin-RevId: 216381943
---
 .../optimizers/data/map_vectorization.cc      | 31 +++++++++-------
 .../optimizers/data/vectorization_utils.cc    | 35 ++++++++++++++++---
 .../optimization/map_vectorization_test.py    |  9 ++---
 3 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
index a9254ed58b..0576d075c2 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc
@@ -60,14 +60,24 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node,
     graph_utils::CopyAttribute(k, map_node, map_defun_node);
   }
 
+  // Note that the inputs to the function are either regular arguments (for
+  // which the function is mapped across their 0th dimension) or captured inputs
+  // (for which the function takes the argument wholesale). We can infer
+  // the split between these arguments from the `map_node`'s attrs.
+  // The Targuments attr on `map_node` corresponds to a list of types of
+  // MapDataset's captured inputs.
+  auto t_captured = map_node.attr().at("Targuments");
+
   // Get types of input arguments from original map function
-  AttrValue t_args;
+  DataTypeVector t_args;  // Regular arguments
   for (const auto& input : vectorized_func->signature().input_arg()) {
-    t_args.mutable_list()->add_type(input.type());
+    t_args.push_back(input.type());
     map_defun_node->add_input(input.name());
   }
-  (*map_defun_node->mutable_attr())["Targuments"] = t_args;
-  AddNodeAttr("Tcaptured", DataTypeVector(), map_defun_node);
+  // Erase the captured arguments from Targuments
+  t_args.erase(t_args.end() - t_captured.list().type_size(), t_args.end());
+  AddNodeAttr("Targuments", t_args, map_defun_node);
+  AddNodeAttr("Tcaptured", t_captured, map_defun_node);
 
   // Set return values to match output names
   string output_prefix = strings::StrCat(map_defun_node->name(), ":output:");
@@ -96,7 +106,9 @@ FunctionDef* AddVectorizedFunction(const NodeDef& map_node,
       *vectorized_func, map_defun_node, library, &result);
 
   if (!s.ok()) {
-    LOG(ERROR) << "VectorizeMapDefun failed: " << s;
+    LOG(WARNING) << "VectorizeMapDefun failed. The function will only be "
+                    "naively vectorized with MapDefun. Reason: "
+                 << s;
     return vectorized_func;
   }
   return result;
@@ -129,10 +141,6 @@ bool IsStatefulFn(const FunctionLibraryDefinition& library,
   return false;
 }
 
-bool HasCapturedInputs(const NodeDef& map_node) {
-  return map_node.attr().at("Targuments").list().type_size() > 0;
-}
-
 NodeDef MakeNewBatchNode(const NodeDef& old_batch_node,
                          const NodeDef& input_node,
                          const FunctionDef& vectorized_func,
@@ -239,15 +247,12 @@ Status MapVectorization::Optimize(Cluster* cluster, const GrapplerItem& item,
     // Check that this is a valid optimization.
     if (!IsOutputShapesFullyDefined(*input_node) ||
         !IsOutputShapesFullyDefined(*map_node) ||
-        IsStatefulFn(function_library, *orig_func) ||
-        HasCapturedInputs(*map_node)) {
+        IsStatefulFn(function_library, *orig_func)) {
       // 1. If any of the inputs have an unknown shape, don't optimize, since
       // inputs might not be batchable.
       // 2. If any of the map func outputs have an unknown shape, don't
       // optimize, so that batching errors surface as before.
       // 3. If the function is stateful, don't vectorize it.
-      // 4. TODO(rachelim): Make this work for MapDataset with captured inputs
-      // by tiling inputs or modifying the signature of MapDefun.
       continue;
     }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index ba857ab5d9..d977ff3198 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -253,8 +253,13 @@ Status Vectorization::AddConversionMapping(Node* op_node) {
     }
   }
 
-  TF_RETURN_IF_ERROR(vectorizer->Vectorize(*op_node, outer_scope_.get(),
-                                           std::move(inputs), &outputs));
+  Status s = vectorizer->Vectorize(*op_node, outer_scope_.get(),
+                                   std::move(inputs), &outputs);
+  if (!s.ok()) {
+    VLOG(2) << "Vectorizer for op \"" << op_node->type_string()
+            << "\" failed with error: " << s;
+    return s;
+  }
 
   if (op_node->num_outputs() != outputs.size()) {
     return errors::Internal(
@@ -481,17 +486,37 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
 }
 
 Status Vectorization::AddArgNodeMappings() {
-  for (auto arg_node : map_defun_fn_->arg_nodes) {
+  // Note that inputs to map_defun_fn_ are either regular arguments (for which
+  // the operations are mapped across their 0th dimension) or captured inputs
+  // (for which the operations apply to the argument wholesale).
+  int num_args =
+      map_defun_node_->attrs().Find("Targuments")->list().type_size();
+
+  auto add_conversion = [this](Node* arg_node, bool stacked) {
     Node* input_node;
     TF_RETURN_IF_ERROR(map_defun_node_->input_node(
         arg_node->attrs().Find("index")->i(), &input_node));
 
-    conversion_map_.insert({{arg_node, 0}, {input_node, 0, true}});
+    conversion_map_.insert({{arg_node, 0}, {input_node, 0, stacked}});
 
     // Control inputs
     conversion_map_.insert({{arg_node, Graph::kControlSlot},
-                            {input_node, Graph::kControlSlot, true}});
+                            {input_node, Graph::kControlSlot, stacked}});
+
+    return Status::OK();
+  };
+
+  // Regular arguments
+  for (int i = 0; i < num_args; ++i) {
+    TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], true));
+  }
+
+  // Captured inputs. These are applied (without slicing) to every iteration of
+  // the map function, hence are mapped to unstacked nodes.
+  for (int i = num_args; i < map_defun_fn_->arg_nodes.size(); ++i) {
+    TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], false));
   }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 971a2d94b9..803ff87924 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -105,15 +105,16 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testOptimizationWithCapturedInputs(self):
     # Tests that vectorization works with captured inputs
+    y = constant_op.constant(1, shape=(2,))
+    z = constant_op.constant(2, shape=(2,))
+
     def map_fn(x):
-      return x + y
+      return x, y, z
 
-    y = constant_op.constant(1, shape=(2,))
     base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2],
                                                            [3, 4]]).repeat(5)
-    # TODO(rachelim): when this optimization works, turn on expect_optimized
     unoptimized, optimized = self._get_test_datasets(
-        base_dataset, map_fn, expect_optimized=False)
+        base_dataset, map_fn, expect_optimized=True)
     self.assertDatasetsEqual(optimized, unoptimized)
 
   def testOptimizationIgnoreStateful(self):
-- 
GitLab


From aa8f428a9310b3fd8371bddf612e480b27618b2e Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 9 Oct 2018 10:47:19 -0700
Subject: [PATCH 0611/1085] Removing the _SHOULD_RECORD_SUMMARIES_NAME and
 _SUMMARY_WRITER_INIT_COLLECTION_NAME collections from the summaryV2
 implementation. Replacing them with global variables.

PiperOrigin-RevId: 216383152
---
 tensorflow/python/ops/summary_ops_v2.py | 56 +++++++++++++------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index a404507627..18cefb8e1c 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -43,11 +43,12 @@ from tensorflow.python.training import training_util
 from tensorflow.python.util import tf_contextlib
 
 
-# Name for a collection which is expected to have at most a single boolean
-# Tensor. If this tensor is True the summary ops will record summaries.
-_SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries"
+# A global dictionary mapping graph keys to boolean values indicating whether
+# we should record summaries for this particular graph or not.
+_SHOULD_RECORD_SUMMARIES = {}
 
-_SUMMARY_WRITER_INIT_COLLECTION_NAME = "_SUMMARY_WRITER_V2"
+# A global dictionary mapping graph keys to a list of summary writer init ops.
+_SUMMARY_WRITER_INIT_OP = {}
 
 _EXPERIMENT_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,256}$")
 _RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$")
@@ -56,14 +57,9 @@ _USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I)
 
 def should_record_summaries():
   """Returns boolean Tensor which is true if summaries should be recorded."""
-  should_record_collection = ops.get_collection(_SHOULD_RECORD_SUMMARIES_NAME)
-  if not should_record_collection:
-    return False
-  if len(should_record_collection) != 1:
-    raise ValueError(
-        "More than one tensor specified for whether summaries "
-        "should be recorded: %s" % should_record_collection)
-  return should_record_collection[0]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  return _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
 
 
 # TODO(apassos) consider how to handle local step here.
@@ -72,38 +68,41 @@ def record_summaries_every_n_global_steps(n, global_step=None):
   """Sets the should_record_summaries Tensor to true if global_step % n == 0."""
   if global_step is None:
     global_step = training_util.get_or_create_global_step()
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
     with ops.device("cpu:0"):
-      collection_ref[:] = [math_ops.equal(global_step % n, 0)]
+      _SHOULD_RECORD_SUMMARIES[key] = math_ops.equal(global_step % n, 0)
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 @tf_contextlib.contextmanager
 def always_record_summaries():
   """Sets the should_record_summaries Tensor to always true."""
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
-    collection_ref[:] = [True]
+    _SHOULD_RECORD_SUMMARIES[key] = True
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 @tf_contextlib.contextmanager
 def never_record_summaries():
   """Sets the should_record_summaries Tensor to always false."""
-  collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME)
-  old = collection_ref[:]
+  global _SHOULD_RECORD_SUMMARIES
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
   try:
-    collection_ref[:] = [False]
+    _SHOULD_RECORD_SUMMARIES[key] = False
     yield
   finally:
-    collection_ref[:] = old
+    _SHOULD_RECORD_SUMMARIES[key] = old
 
 
 class SummaryWriter(object):
@@ -143,7 +142,6 @@ class SummaryWriter(object):
       finally:
         context.context().summary_writer_resource = old
 
-
   def init(self):
     """Operation to initialize the summary writer resource."""
     if self._resource is not None:
@@ -311,7 +309,9 @@ def _make_summary_writer(name, factory, **kwargs):
   if not context.executing_eagerly():
     # TODO(apassos): Consider doing this instead.
     #   ops.get_default_session().run(init_op)
-    ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, init_op)
+    global _SUMMARY_WRITER_INIT_OP
+    key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+    _SUMMARY_WRITER_INIT_OP.setdefault(key, []).append(init_op)
   return SummaryWriter(resource, init_op_fn)
 
 
@@ -352,7 +352,9 @@ def summary_writer_initializer_op():
     raise RuntimeError(
         "tf.contrib.summary.summary_writer_initializer_op is only "
         "supported in graph mode.")
-  return ops.get_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME)
+  global _SUMMARY_WRITER_INIT_OP
+  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
+  return _SUMMARY_WRITER_INIT_OP.setdefault(key, [])
 
 
 def summary_writer_function(name, tensor, function, family=None):
-- 
GitLab


From 3e8af7ea6b70104b05be22797451d0218c9e5262 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 10:58:03 -0700
Subject: [PATCH 0612/1085] Internal change.

PiperOrigin-RevId: 216385202
---
 .../lite/testing/model_coverage/model_coverage_lib.py  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
index 72029ed03c..ab29f71138 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py
@@ -297,7 +297,7 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs):
   compare_models_random_data(tflite_model, tf_eval_func)
 
 
-def test_keras_model(filename, **kwargs):
+def test_keras_model(filename, input_arrays=None, input_shapes=None, **kwargs):
   """Validates the tf.keras model converts to a TFLite model.
 
   Converts the tf.keras model to TFLite and checks the accuracy of the model on
@@ -305,9 +305,15 @@ def test_keras_model(filename, **kwargs):
 
   Args:
     filename: Full filepath of HDF5 file containing the tf.keras model.
+    input_arrays: List of input tensors to freeze graph with.
+    input_shapes: Dict of strings representing input tensor names to list of
+      integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+      Automatically determined when input shapes is None (e.g., {"foo" : None}).
+        (default None)
     **kwargs: Additional arguments to be passed into the converter.
   """
-  converter = _lite.TFLiteConverter.from_keras_model_file(filename)
+  converter = _lite.TFLiteConverter.from_keras_model_file(
+      filename, input_arrays=input_arrays, input_shapes=input_shapes)
   tflite_model = _convert(converter, **kwargs)
 
   tf_eval_func = evaluate_keras_model(filename)
-- 
GitLab


From 1e4a3baad388b5d5250efdb19f91d5b670816fbe Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 11:03:57 -0700
Subject: [PATCH 0613/1085] Update TFLite Converter documentation.

PiperOrigin-RevId: 216386450
---
 tensorflow/contrib/lite/toco/README.md        |  9 +-
 .../lite/toco/g3doc/cmdline_examples.md       | 66 ++++++-------
 .../lite/toco/g3doc/cmdline_reference.md      |  8 +-
 .../contrib/lite/toco/g3doc/python_api.md     | 95 ++++++++++---------
 4 files changed, 93 insertions(+), 85 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md
index 2db6a627ab..91f6f618a3 100644
--- a/tensorflow/contrib/lite/toco/README.md
+++ b/tensorflow/contrib/lite/toco/README.md
@@ -1,6 +1,6 @@
-# TOCO: TensorFlow Lite Optimizing Converter
+# TensorFlow Lite Converter
 
-The TensorFlow Lite Optimizing Converter converts TensorFlow graphs into
+The TensorFlow Lite Converter converts TensorFlow graphs into
 TensorFlow Lite graphs. There are additional usages that are also detailed in
 the usage documentation.
 
@@ -14,9 +14,10 @@ Usage information is given in these documents:
 
 ## Where the converter fits in the TensorFlow landscape
 
-Once an application developer has a trained TensorFlow model, TOCO will accept
+Once an application developer has a trained TensorFlow model, the TensorFlow
+Lite Converter will accept
 that model and generate a TensorFlow Lite
-[FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports
+[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports
 [SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
 frozen graphs (models generated via
 [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
index aba7536cbd..e3c46eb377 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
@@ -1,7 +1,7 @@
-# TensorFlow Lite Optimizing Converter command-line examples
+# TensorFlow Lite Converter command-line examples
 
-This page provides examples on how to use TOCO via command line. It is
-complemented by the following documents:
+This page shows how to use the TensorFlow Lite Converter in the command line. It
+is complemented by the following documents:
 
 *   [README](../README.md)
 *   [Command-line glossary](cmdline_reference.md)
@@ -10,7 +10,7 @@ complemented by the following documents:
 Table of contents:
 
 *   [Command-line tools](#tools)
-    *   [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9)
+    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
 *   [Basic examples](#basic)
     *   [Convert a TensorFlow GraphDef](#graphdef)
     *   [Convert a TensorFlow SavedModel](#savedmodel)
@@ -31,27 +31,28 @@ Table of contents:
 
 ## Command-line tools <a name="tools"></a>
 
-There are two approaches to running TOCO via command line.
+There are two approaches to running the converter in the command line.
 
 *   `tflite_convert`: Starting from TensorFlow 1.9, the command-line tool
-    `tflite_convert` will be installed as part of the Python package. All of the
+    `tflite_convert` is installed as part of the Python package. All of the
     examples below use `tflite_convert` for simplicity.
     *   Example: `tflite_convert --output_file=...`
-*   `bazel`: In order to run the latest version of TOCO, [clone the TensorFlow
-    repository](https://www.tensorflow.org/install/source)
-    and use `bazel`. This is the recommended approach for converting models that
-    utilize new features that were not supported by TOCO in TensorFlow 1.9.
+*   `bazel`: In order to run the latest version of the TensorFlow Lite Converter
+    either install the nightly build using
+    [pip](https://www.tensorflow.org/install/pip) or
+    [clone the TensorFlow repository](https://www.tensorflow.org/install/source)
+    and use `bazel`.
     *   Example: `bazel run
         //tensorflow/contrib/lite/python:tflite_convert --
         --output_file=...`
 
-### Converting models prior to TensorFlow 1.9. <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
 
-The recommended approach for using TOCO prior to TensorFlow 1.9 is the [Python
-API](python_api.md#pre-tensorflow-1.9). If a command line tool is desired, the
-`toco` command line tool was available in TensorFlow 1.7. Enter `toco --help` in
-Terminal for additional details on the command-line flags available. There were
-no command line tools in TensorFlow 1.8.
+The recommended approach for using the converter prior to TensorFlow 1.9 is the
+[Python API](python_api.md#pre-tensorflow-1.9). If a command line tool is
+desired, the `toco` command line tool was available in TensorFlow 1.7. Enter
+`toco --help` in Terminal for additional details on the command-line flags
+available. There were no command line tools in TensorFlow 1.8.
 
 ## Basic examples <a name="basic"></a>
 
@@ -117,9 +118,9 @@ tflite_convert \
 
 ### Convert a TensorFlow GraphDef for quantized inference <a name="graphdef-quant"></a>
 
-TOCO is compatible with fixed point quantization models described
-[here](https://www.tensorflow.org/performance/quantization). These are float
-models with
+The TensorFlow Lite Converter is compatible with fixed point quantization models
+described [here](https://www.tensorflow.org/performance/quantization). These are
+float models with
 [`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization)
 ops inserted at the boundaries of fused layers to record min-max range
 information. This generates a quantized inference workload that reproduces the
@@ -141,12 +142,12 @@ tflite_convert \
 
 ### Use \"dummy-quantization\" to try out quantized inference on a float graph <a name="dummy-quant"></a>
 
-In order to evaluate the possible benefit of generating a quantized graph, TOCO
-allows "dummy-quantization" on float graphs. The flags `--default_ranges_min`
-and `--default_ranges_max` accept plausible values for the min-max ranges of the
-values in all arrays that do not have min-max information. "Dummy-quantization"
-will produce lower accuracy but will emulate the performance of a correctly
-quantized model.
+In order to evaluate the possible benefit of generating a quantized graph, the
+converter allows "dummy-quantization" on float graphs. The flags
+`--default_ranges_min` and `--default_ranges_max` accept plausible values for
+the min-max ranges of the values in all arrays that do not have min-max
+information. "Dummy-quantization" will produce lower accuracy but will emulate
+the performance of a correctly quantized model.
 
 The example below contains a model using Relu6 activation functions. Therefore,
 a reasonable guess is that most activation ranges should be contained in [0, 6].
@@ -207,10 +208,10 @@ tflite_convert \
 ### Specifying subgraphs
 
 Any array in the input file can be specified as an input or output array in
-order to extract subgraphs out of an input graph file. TOCO discards the parts
-of the graph outside of the specific subgraph. Use [graph
-visualizations](#graph-visualizations) to identify the input and output arrays
-that make up the desired subgraph.
+order to extract subgraphs out of an input graph file. The TensorFlow Lite
+Converter discards the parts of the graph outside of the specific subgraph. Use
+[graph visualizations](#graph-visualizations) to identify the input and output
+arrays that make up the desired subgraph.
 
 The follow command shows how to extract a single fused layer out of a TensorFlow
 GraphDef.
@@ -247,9 +248,10 @@ function tends to get fused).
 
 ## Graph visualizations
 
-TOCO can export a graph to the Graphviz Dot format for easy visualization via
-either the `--output_format` flag or the `--dump_graphviz_dir` flag. The
-subsections below outline the use cases for each.
+The converter can export a graph to the Graphviz Dot format for easy
+visualization using either the `--output_format` flag or the
+`--dump_graphviz_dir` flag. The subsections below outline the use cases for
+each.
 
 ### Using `--output_format=GRAPHVIZ_DOT` <a name="using-output-format-graphviz-dot"></a>
 
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
index 00bc8d4ccb..31200fd657 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
+++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
@@ -1,8 +1,8 @@
-# TensorFlow Lite Optimizing Converter command-line glossary
+# TensorFlow Lite Converter command-line glossary
 
-This page is complete reference of command-line flags used by TOCO's command
-line starting from TensorFlow 1.9 up until the most recent build of TensorFlow.
-It is complemented by the following other documents:
+This page is complete reference of command-line flags used by the TensorFlow
+Lite Converter's command line starting from TensorFlow 1.9 up until the most
+recent build of TensorFlow. It is complemented by the following other documents:
 
 *   [README](../README.md)
 *   [Command-line examples](cmdline_examples.md)
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md
index 8c31c3dca8..1f741360c6 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md
@@ -1,7 +1,8 @@
-# TensorFlow Lite Optimizing Converter & Interpreter Python API reference
+# TensorFlow Lite Converter & Interpreter Python API reference
 
-This page provides examples on how to use TOCO and the TensorFlow Lite
-interpreter via the Python API. It is complemented by the following documents:
+This page provides examples on how to use the TensorFlow Lite Converter and the
+TensorFlow Lite interpreter using the Python API. It is complemented by the
+following documents:
 
 *   [README](../README.md)
 *   [Command-line examples](cmdline_examples.md)
@@ -23,39 +24,35 @@ Table of contents:
     *   [Using the interpreter from model data](#interpreter-data)
 *   [Additional instructions](#additional-instructions)
     *   [Build from source code](#latest-package)
-    *   [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9)
+    *   [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11)
+    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
 
 ## High-level overview
 
-While the TensorFlow Lite Optimizing Converter can be used from the command
-line, it is often convenient to use it as part of a Python model build and
-training script. This is so that conversion can be part of your model
-development pipeline. This allows you to know early and often that you are
-designing a model that can be targeted to devices with mobile.
+While the TensorFlow Lite Converter can be used from the command line, it is
+often convenient to use in a Python script as part of the model development
+pipeline. This allows you to know early that you are designing a model that can
+be targeted to devices with mobile.
 
 ## API
 
 The API for converting TensorFlow models to TensorFlow Lite as of TensorFlow 1.9
-is `tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is
-`tf.contrib.lite.Interpreter`.
-
-**NOTE**: As of TensorFlow 1.12, the API for converting TensorFlow models to
-TFLite will be renamed to `TFLiteConverter`. `TFLiteConverter` is semantically
-identically to `TocoConverter`. The API is available at
-`tf.contrib.lite.TFLiteConverter` as of the Sept 26 `tf-nightly`.
-
-`TocoConverter` provides class methods based on the original format of the
-model. `TocoConverter.from_session()` is available for GraphDefs.
-`TocoConverter.from_saved_model()` is available for SavedModels.
-`TocoConverter.from_keras_model_file()` is available for `tf.Keras` files.
+is `tf.contrib.lite.TFLiteConverter`. The API for calling the Python intepreter
+is `tf.contrib.lite.Interpreter`.
+
+Note: Reference "Additional Instructions" sections for converting TensorFlow
+models to TensorFlow Lite
+[in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) and
+[prior to TensorFlow 1.9](#pre-tensorflow-1.9)
+
+`TFLiteConverter` provides class methods based on the original format of the
+model. `TFLiteConverter.from_session()` is available for GraphDefs.
+`TFLiteConverter.from_saved_model()` is available for SavedModels.
+`TFLiteConverter.from_keras_model_file()` is available for `tf.Keras` files.
 Example usages for simple float-point models are shown in
 [Basic Examples](#basic). Examples usages for more complex models is shown in
 [Complex Examples](#complex).
 
-**NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python
-interpreter when the conversion fails. This will be remedied as soon as
-possible.
-
 ## Basic examples <a name="basic"></a>
 
 The following section shows examples of how to convert a basic float-point model
@@ -76,7 +73,7 @@ out = tf.identity(val, name="out")
 
 with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   tflite_model = converter.convert()
   open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -89,7 +86,7 @@ TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and
 
 The example uses
 [Mobilenet_1.0_224](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz).
-The function only supports GraphDefs frozen via
+The function only supports GraphDefs frozen using
 [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py).
 
 ```python
@@ -99,7 +96,7 @@ graph_def_file = "/path/to/Downloads/mobilenet_v1_1.0_224/frozen_graph.pb"
 input_arrays = ["input"]
 output_arrays = ["MobilenetV1/Predictions/Softmax"]
 
-converter = tf.contrib.lite.TocoConverter.from_frozen_graph(
+converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph(
   graph_def_file, input_arrays, output_arrays)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
@@ -113,25 +110,26 @@ FlatBuffer.
 ```python
 import tensorflow as tf
 
-converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir)
+converter = tf.contrib.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
 
 For more complex SavedModels, the optional parameters that can be passed into
-`TocoConverter.from_saved_model()` are `input_arrays`, `input_shapes`,
+`TFLiteConverter.from_saved_model()` are `input_arrays`, `input_shapes`,
 `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are
-available by running `help(tf.contrib.lite.TocoConverter)`.
+available by running `help(tf.contrib.lite.TFLiteConverter)`.
 
 ### Exporting a tf.keras File <a name="basic-keras-file"></a>
 
 The following example shows how to convert a `tf.keras` model into a TensorFlow
-Lite FlatBuffer.
+Lite FlatBuffer. This example requires
+[`h5py`](http://docs.h5py.org/en/latest/build.html) to be installed.
 
 ```python
 import tensorflow as tf
 
-converter = tf.contrib.lite.TocoConverter.from_keras_model_file("keras_model.h5")
+converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file("keras_model.h5")
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -163,7 +161,7 @@ keras_file = "keras_model.h5"
 tf.keras.models.save_model(model, keras_file)
 
 # Convert to TensorFlow Lite model.
-converter = tf.contrib.lite.TocoConverter.from_keras_model_file(keras_file)
+converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(keras_file)
 tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
@@ -173,7 +171,7 @@ open("converted_model.tflite", "wb").write(tflite_model)
 For models where the default value of the attributes is not sufficient, the
 attribute's values should be set before calling `convert()`. In order to call
 any constants use `tf.contrib.lite.constants.<CONSTANT_NAME>` as seen below with
-`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TocoConverter)` in the Python
+`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TFLiteConverter)` in the Python
 terminal for detailed documentation on the attributes.
 
 Although the examples are demonstrated on GraphDefs containing only constants.
@@ -193,7 +191,7 @@ val = img + const
 out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output")
 
 with tf.Session() as sess:
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8
   input_arrays = converter.get_input_arrays()
   converter.quantized_input_stats = {input_arrays[0] : (0., 1.)}  # mean, std_dev
@@ -250,7 +248,7 @@ val = img + const
 out = tf.identity(val, name="out")
 
 with tf.Session() as sess:
-  converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out])
+  converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out])
   tflite_model = converter.convert()
 
 # Load TFLite model and allocate tensors.
@@ -262,13 +260,20 @@ interpreter.allocate_tensors()
 
 ### Build from source code <a name="latest-package"></a>
 
-In order to run the latest version of the TOCO Python API, clone the TensorFlow
-repository, configure the installation, and build and install the pip package.
-Detailed instructions are available
-[here](https://www.tensorflow.org/install/source).
+In order to run the latest version of the TensorFlow Lite Converter Python API,
+either install the nightly build with
+[pip](https://www.tensorflow.org/install/pip) (recommended) or
+[Docker](https://www.tensorflow.org/install/docker), or
+[build the pip package from source](https://www.tensorflow.org/install/source).
+
+### Converting models in TensorFlow 1.9 to TensorFlow 1.11 <a name="#pre-tensorflow-1.11"></a>
+
+To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through
+TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically
+identically to `TFLiteConverter`.
 
-### Converting models prior to TensorFlow 1.9. <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
 
-To use TOCO in TensorFlow 1.7 and TensorFlow 1.8, use the `toco_convert`
-function. Run `help(tf.contrib.lite.toco_convert)` to get details about accepted
-parameters.
+To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow
+1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)`
+to get details about accepted parameters.
-- 
GitLab


From 84ace0358526bb51c04a3bef4b3072b93b9d1bec Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 9 Oct 2018 11:16:32 -0700
Subject: [PATCH 0614/1085] Improves tf.function prototype.

Specifically:
 - renames from def_function
 - returns an object with well-defined methods
 - doesn't force-retrace twice
 - uses the python descriptor API ( https://docs.python.org/3/howto/descriptor.html )
   to remove the need for a tf.method
PiperOrigin-RevId: 216388957
---
 tensorflow/python/eager/def_function.py      | 188 +++++++++++++++----
 tensorflow/python/eager/def_function_test.py |  32 +++-
 2 files changed, 179 insertions(+), 41 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 8dcacd5c99..b23891d394 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -19,8 +19,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+import weakref
+
 from tensorflow.python.eager import context
-from tensorflow.python.eager import function
+from tensorflow.python.eager import function as function_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -165,71 +168,184 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
     self._cached_shape_as_list = None
 
 
-def _defun_with_scope(scope, fn):
+def _defun_with_scope(scope, fn, input_signature):
 
   def wrapped_fn(*args, **kwds):
     with variable_scope.variable_creator_scope(scope):
       return fn(*args, **kwds)
 
-  return function.defun(wrapped_fn)
+  return function_lib.defun(wrapped_fn, input_signature=input_signature)
 
 
-def def_function(fn):
-  """Defines a function as per the "functions, not sessions" document."""
+def _call_concrete(fn, args, unused_kwargs):
+  """Calls the given concrete function with only the tensor arguments."""
+
+  def inner():
+    # TODO(apassos) figure out what to do with kwargs and concrete functions.
+    return fn(*[x for x in args if isinstance(x, ops.Tensor)])
+
+  return inner
+
+
+class PolymorphicFunction(object):
+  """Wrapper class for the graph functions defined for a Python function.
+
+  See the documentation for `tf.function` for more information on the semantics
+  of defined functions.
 
-  # Wrapping the values in lists to bypass python's lack of way to mutate
-  # symbols from an outer scope.
-  first_call = [True]
-  function_to_call = []
+  PolymorphicFunction is thread-compatible.
+  """
+
+  def __init__(self,
+               python_function,
+               input_signature=None,):
+    """Initializes a polymorphic function.
+
+    Args:
+      python_function: the function to be wrapped.
+      input_signature: a possibly nested sequence of `TensorSpec` objects
+        specifying the input signature of this function. If `None`, a separate
+        function is instantiated for each inferred input signature.
+
+    Raises:
+      ValueError: if `input_signature` is not None and the `python_function`'s
+        argspec has keyword arguments.
+    """
+    self._python_function = python_function
+    self._input_signature = input_signature
+    self._created_variables = None
+    self._stateful_fn = None
+    self._descriptor_cache = weakref.WeakKeyDictionary()
 
-  # TODO(apassos) represent this as an object and not as a closure.
-  def decorated_fn(*args, **kwds):
-    """Graph function for fn."""
-    if not first_call[0]:
-      return function_to_call[0](*args, **kwds)
+  def _initialize(self, args, kwds):
+    """Initializes, on the first call."""
 
-    first_call[0] = False
-    created_variables = []
+    self._created_variables = []
 
-    def variable_creator_scope(unused_next_creator, **kwds):
+    def variable_capturing_scope(unused_next_creator, **kwds):
       """Creates UnliftedInitializerVariables and saves references to them."""
       v = UnliftedInitializerVariable(**kwds)
-      created_variables.append(v)
+      self._created_variables.append(v)
       return v
 
-    first_graph_function = _defun_with_scope(variable_creator_scope, fn)
+    self._stateful_fn = _defun_with_scope(
+        variable_capturing_scope, self._python_function, self._input_signature)
 
     # Force the definition of the function for these arguments
-    first_concrete = first_graph_function.get_concrete_function(*args, **kwds)
+    self._concrete_stateful_fn = self._stateful_fn.get_concrete_function(
+        *args, **kwds)
 
     def invalid_creator_scope(*unused_args, **unused_kwds):
       """Disables variable creation."""
       raise ValueError(
-          "def_function-decorated function tried to create "
-          "variables on second call.")
+          "tf.function-decorated function tried to create "
+          "variables on non-first call.")
 
-    second_graph_function = _defun_with_scope(invalid_creator_scope, fn)
+    self._stateless_fn = _defun_with_scope(
+        invalid_creator_scope, self._python_function, self._input_signature)
 
-    function_to_call.append(second_graph_function)
-    if not created_variables:
-      # Note: this retracing might be unnecessary, but running the function
-      # forever in the scope which disallows variable creation is safer than not
-      # doing so.
-      return second_graph_function(*args, **kwds)
+  def __call__(self, *args, **kwds):
+    """Calls the graph function."""
+    if self._created_variables:
+      # In this case we have created variables on the first call, so we run the
+      # defunned version which is guaranteed to never create variables.
+      return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
+    elif self._stateful_fn is not None:
+      # In this case we have not created variables on the first call. So we can
+      # run the first trace but we should fail if variables are created.
+      results = self._first_trace(*args, **kwds)
+      if self._created_variables:
+        raise ValueError("Creating variables on a non-first call to a function"
+                         " decorated with tf.function.")
+      return results
+
+    self._initialize(args, kwds)
+
+    if not self._created_variables:
+      # If we did not create any variables the trace we have is good enough.
+      return _call_concrete(self._concrete_stateful_fn, args, kwds)()
 
     def fn_with_cond(*inner_args, **inner_kwds):
       """Conditionally runs initialization if it's needed."""
       condition = True
-      for variable in created_variables:
+      for variable in self._created_variables:
         condition = condition and resource_variable_ops.var_is_initialized_op(
             variable.handle)
-      # We want to call second_graph_function if possible because it avoids
-      # recomputing potentially expensive initializers.
+      # We want to call stateless_fn if possible because it avoids recomputing
+      # potentially expensive initializers.
       return control_flow_ops.cond(
           condition,
-          lambda: second_graph_function(*inner_args, **inner_kwds),
-          lambda: first_concrete(*inner_args, **inner_kwds))
+          lambda: self._stateless_fn(*inner_args, **inner_kwds),
+          _call_concrete(self._concrete_stateful_fn, inner_args, inner_kwds))
+
+    return function_lib.defun(fn_with_cond)(*args, **kwds)
+
+  @property
+  def python_function(self):
+    """The python function wrapped in this tf.function."""
+    return self._python_function
+
+  def get_concrete_function(self, *args, **kwargs):
+    """Returns a `Function` object specialized to inputs and execution context.
+
+    `args` and `kwargs` are ignored if this `PolymorphicFunction` was created
+    with an `input_signature`.
+
+    Args:
+      *args: inputs to specialize on.
+      **kwargs: inputs to specialize on.
 
-    return function.defun(fn_with_cond)(*args, **kwds)
+    Raises:
+      ValueError: if this object has not yet been called on concrete values.
+    """
+    # TODO(apassos) figure out how to handle this case (what should we return
+    # here?)
+    if self._stateful_fn is None:
+      raise ValueError(
+          "Call this function with concrete values before asking for a"
+          " concrete function. Calling the function will ensure that, in"
+          " case this function creates variables, that those are properly"
+          " initialized.")
+    if self._created_variables:
+      # In this case we have created variables on the first call, so we run the
+      # defunned version which is guaranteed to never create variables.
+      return self._stateless_fn.get_concrete_function(*args, **kwargs)
+    elif self._stateful_fn is not None:
+      # In this case we have not created variables on the first call. So we can
+      # run the first trace but we should fail if variables are created.
+      concrete = self._first_trace.get_concrete_function(*args, **kwargs)
+      if self._created_variables:
+        raise ValueError("Creating variables on a non-first call to a function"
+                         " decorated with tf.function.")
+      return concrete
 
-  return decorated_fn
+  def __get__(self, instance, owner):
+    """Makes it possible to defun instance methods."""
+    del owner
+    # `instance` here is the instance that this `PolymorphicFunction` was
+    # accessed through; e.g., for
+    #
+    #   class Foo(object):
+    #
+    #     @function.defun
+    #     def bar(self):
+    #       ...
+    #
+    #   foo = Foo()
+    #   foo.bar()  # `foo.bar` is a `PolymorphicFunction` instance
+    #
+    # then `instance` will be `foo` (and `owner` will be `Foo`).  We create a
+    # new instance of PolymorphicFunction here to allow different instances each
+    # to create variables once, thereby allowing methods to be decorated with
+    # tf.function. Keeps a cache to avoid retracing the function every time the
+    # descriptor is accessed.
+    if instance not in self._descriptor_cache:
+      self._descriptor_cache[instance] = PolymorphicFunction(
+          functools.partial(self.python_function, instance),
+          self._input_signature)
+    return self._descriptor_cache[instance]
+
+
+def function(fn=None, input_signature=None):
+  """Defines a function as per the "functions, not sessions" document."""
+  return PolymorphicFunction(fn, input_signature)
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 804436c4bb..39bad726d0 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -29,7 +29,7 @@ class DefFunctionTest(test.TestCase):
 
   def testNoVariables(self):
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       return 2 * x
 
@@ -37,7 +37,7 @@ class DefFunctionTest(test.TestCase):
 
   def testFailIfVariablesAreCreatedMoreThanOnce(self):
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       return variables.Variable(1.0) + x
 
@@ -47,7 +47,7 @@ class DefFunctionTest(test.TestCase):
   def testFailIfVariablesAreCreatedMoreThanOnceNoWeakRef(self):
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       state.append(variables.Variable(1.0))
       return state[-1] + x
@@ -59,7 +59,7 @@ class DefFunctionTest(test.TestCase):
 
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       if not state:
         state.append(variables.Variable(2.0))
@@ -72,7 +72,7 @@ class DefFunctionTest(test.TestCase):
 
     state = []
 
-    @def_function.def_function
+    @def_function.function
     def fn(x):
       if not state:
         state.append(variables.Variable(2.0 * x))
@@ -81,6 +81,28 @@ class DefFunctionTest(test.TestCase):
     self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
     self.assertAllEqual(fn(constant_op.constant(3.0)), 6.0)
 
+  def testMethod(self):
+
+    class MyModel(object):
+
+      def __init__(self):
+        self.var = None
+
+      @def_function.function
+      def apply(self, x):
+        if self.var is None:
+          self.var = variables.Variable(2.0)
+        return self.var * x
+
+    m0 = MyModel()
+    self.assertAllEqual(m0.apply(3.0), 6.0)
+    # Calling twice to exercise that we do not recreate variables.
+    m0.var.assign(3.0)
+    self.assertAllEqual(m0.apply(3.0), 9.0)
+
+    m1 = MyModel()
+    self.assertAllEqual(m1.apply(3.0), 6.0)
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 931353c5f79c2d419afb3a5ecac59184c5558351 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 11:37:29 -0700
Subject: [PATCH 0615/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216392772
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 119 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 119 ++++++++++++++++++
 2 files changed, 238 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 9df0ece69b..dcea70dffb 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -58500,6 +58500,125 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "SdcaOptimizerV2"
+  input_arg {
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
+  }
+  input_arg {
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "example_labels"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  attr {
+    name: "loss_type"
+    type: "string"
+    allowed_values {
+      list {
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
+        s: "poisson_loss"
+      }
+    }
+  }
+  attr {
+    name: "adaptive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "SdcaShrinkL1"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 2048ad26ac..93a297458f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28140,6 +28140,125 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "SdcaOptimizerV2"
+  input_arg {
+    name: "sparse_example_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_feature_values"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features_with_values"
+  }
+  input_arg {
+    name: "dense_features"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_weights"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "example_labels"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "sparse_indices"
+    type: DT_INT64
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  input_arg {
+    name: "dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  input_arg {
+    name: "example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_example_state_data"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out_delta_sparse_weights"
+    type: DT_FLOAT
+    number_attr: "num_sparse_features"
+  }
+  output_arg {
+    name: "out_delta_dense_weights"
+    type: DT_FLOAT
+    number_attr: "num_dense_features"
+  }
+  attr {
+    name: "loss_type"
+    type: "string"
+    allowed_values {
+      list {
+        s: "logistic_loss"
+        s: "squared_loss"
+        s: "hinge_loss"
+        s: "smooth_hinge_loss"
+        s: "poisson_loss"
+      }
+    }
+  }
+  attr {
+    name: "adaptive"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "num_sparse_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "num_dense_features"
+    type: "int"
+    has_minimum: true
+  }
+  attr {
+    name: "l1"
+    type: "float"
+  }
+  attr {
+    name: "l2"
+    type: "float"
+  }
+  attr {
+    name: "num_loss_partitions"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "num_inner_iterations"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "SdcaShrinkL1"
   input_arg {
-- 
GitLab


From 12e164d1e7c0b197f06d5d3c2ed26318b89b5e4c Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 11:38:15 -0700
Subject: [PATCH 0616/1085] Return ::tensorflow::Status in Toco Graph
 Transformations. PiperOrigin-RevId: 216392908

---
 .../convert_expanddims_to_reshape.cc          | 16 +++++----
 .../convert_pure_conv_to_depthwise.cc         | 24 +++++++------
 .../convert_reorder_axes.cc                   | 15 +++++---
 .../convert_squeeze_to_reshape.cc             | 18 ++++++----
 .../convert_trivial_addn_to_add.cc            | 12 ++++---
 .../convert_trivial_pack_to_reshape.cc        | 16 +++++----
 .../convert_trivial_tile_to_concat.cc         | 16 +++++----
 .../convert_trivial_transpose_to_reshape.cc   | 16 +++++----
 .../create_im2col_arrays.cc                   | 12 ++++---
 .../toco/graph_transformations/dequantize.cc  | 14 +++++---
 .../graph_transformations/drop_fake_quant.cc  | 13 ++++---
 .../drop_im2col_arrays.cc                     | 11 +++---
 .../ensure_bias_vectors.cc                    |  9 +++--
 ...int8_weights_safe_for_fast_int8_kernels.cc | 14 ++++----
 .../fuse_activation_functions.cc              | 22 +++++++-----
 .../fuse_binary_into_following_affine.cc      | 32 +++++++++--------
 .../fuse_binary_into_preceding_affine.cc      | 36 ++++++++++---------
 .../fuse_broadcast_into_following_binary.cc   | 16 +++++----
 .../graph_transformations.cc                  |  2 +-
 .../graph_transformations.h                   | 29 +++++++++------
 .../graph_transformations/hardcode_min_max.cc |  7 ++--
 .../identify_dilated_conv.cc                  | 16 +++++----
 .../identify_l2_normalization.cc              | 22 +++++++-----
 .../graph_transformations/identify_l2_pool.cc | 15 ++++----
 .../graph_transformations/identify_lstm.cc    | 33 +++++++++--------
 .../identify_lstm_merge_inputs.cc             | 16 +++++----
 .../identify_lstm_split_inputs.cc             | 16 +++++----
 .../graph_transformations/identify_prelu.cc   | 19 +++++-----
 .../graph_transformations/identify_relu1.cc   | 17 +++++----
 .../make_initial_dequantize_operator.cc       |  8 +++--
 .../merge_reshape_into_preceding_transpose.cc | 26 +++++++-------
 .../move_binary_operator_before_reshape.cc    | 30 +++++++++-------
 ...gate_activation_function_into_constants.cc | 20 ++++++-----
 .../propagate_array_data_types.cc             | 18 ++++++----
 .../propagate_default_min_max.cc              |  8 +++--
 .../propagate_fake_quant_num_bits.cc          | 12 ++++---
 .../propagate_fixed_sizes.cc                  | 12 ++++---
 .../toco/graph_transformations/quantize.cc    | 13 ++++---
 ...minmax_and_narrow_range_from_fake_quant.cc | 12 ++++---
 .../remove_final_dequantize_op.cc             | 12 ++++---
 .../remove_tensorflow_assert.cc               | 10 ++++--
 .../remove_tensorflow_identity.cc             | 10 ++++--
 .../remove_trivial_binary.cc                  | 22 +++++++-----
 .../remove_trivial_concatenation.cc           | 12 ++++---
 .../remove_trivial_concatenation_input.cc     | 12 ++++---
 .../remove_trivial_fake_quant.cc              | 12 ++++---
 ...emove_trivial_quantized_activation_func.cc | 15 ++++----
 .../remove_trivial_quantized_min_max.cc       | 12 ++++---
 .../remove_trivial_reshape.cc                 | 12 ++++---
 .../remove_trivial_slice.cc                   | 11 +++---
 .../graph_transformations/remove_unused_op.cc | 15 ++++----
 .../reorder_elementwise_unary.cc              | 18 ++++++----
 .../reorder_reshape_transpose.cc              | 24 +++++++------
 .../resolve_batch_normalization.cc            | 12 ++++---
 .../resolve_batch_to_space_nd_attributes.cc   | 21 ++++++-----
 .../resolve_constant_binary.cc                | 16 +++++----
 .../resolve_constant_concatenation.cc         | 24 ++++++++-----
 .../resolve_constant_fake_quant.cc            | 16 +++++----
 .../resolve_constant_fill.cc                  | 26 ++++++++------
 .../resolve_constant_gather.cc                | 20 ++++++-----
 .../resolve_constant_pack.cc                  | 16 +++++----
 .../resolve_constant_random_uniform.cc        | 18 ++++++----
 .../resolve_constant_range.cc                 | 20 ++++++-----
 .../resolve_constant_reshape.cc               | 20 ++++++-----
 .../resolve_constant_select.cc                | 21 ++++++-----
 .../resolve_constant_shape_or_rank.cc         | 16 +++++----
 .../resolve_constant_slice.cc                 | 28 ++++++++-------
 .../resolve_constant_strided_slice.cc         | 20 ++++++-----
 .../resolve_constant_tile.cc                  | 16 +++++----
 .../resolve_constant_transpose.cc             | 18 ++++++----
 .../resolve_constant_unary.cc                 | 28 ++++++++-------
 .../resolve_fake_quant_args_from_vars.cc      | 14 +++++---
 .../resolve_gather_attributes.cc              | 20 +++++++----
 .../resolve_multiply_by_zero.cc               | 30 +++++++++-------
 .../resolve_pad_attributes.cc                 | 17 +++++----
 .../resolve_padv2_attributes.cc               | 17 +++++----
 .../resolve_reduce_attributes.cc              | 30 +++++++++++-----
 .../resolve_reorder_axes.cc                   | 13 ++++---
 .../resolve_reshape_attributes.cc             | 14 +++++---
 .../resolve_slice_attributes.cc               | 22 +++++++-----
 .../resolve_space_to_batch_nd_attributes.cc   | 21 ++++++-----
 .../resolve_squeeze_attributes.cc             | 12 ++++---
 .../resolve_strided_slice_attributes.cc       | 32 ++++++++++-------
 .../resolve_tensorflow_concat.cc              | 12 ++++---
 .../resolve_tensorflow_matmul.cc              | 12 ++++---
 .../resolve_tensorflow_merge.cc               | 12 ++++---
 .../resolve_tensorflow_switch.cc              | 12 ++++---
 .../resolve_transpose_attributes.cc           | 18 ++++++----
 .../shuffle_fc_weights.cc                     | 27 +++++++-------
 .../resolve_constant_concatenation_test.cc    | 15 ++++++--
 .../tests/resolve_constant_unary_test.cc      |  3 +-
 .../unfuse_activation_functions.cc            | 12 ++++---
 .../unpartition_embedding_lookup.cc           | 24 +++++++------
 .../unroll_batch_matmul.cc                    | 15 +++++---
 94 files changed, 1003 insertions(+), 617 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
index 310a88484c..8a945ac435 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertExpandDimsToReshape::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto expand_it = model->operators.begin() + op_index;
   if (expand_it->get()->type != OperatorType::kExpandDims) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   ExpandDimsOperator* expand_op =
       static_cast<ExpandDimsOperator*>(expand_it->get());
@@ -38,18 +41,18 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
   const auto& input_array = model->GetArray(expand_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& axis_array = model->GetArray(expand_op->inputs[1]);
   if (!axis_array.has_shape()) {
     // Yield until input axis array shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1);
   if (!axis_array.buffer) {
     // Yield until the input axis array is constant
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int axis = axis_array.GetBuffer<ArrayDataType::kInt32>().data[0];
   std::vector<int> reshape_dims(input_array.shape().dims());
@@ -90,7 +93,8 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(expand_it->get(), expand_op);
   model->operators.erase(expand_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
index e88839be5d..a151012891 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc
@@ -24,29 +24,32 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertPureConvToDepthwise::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto conv_it = model->operators.begin() + op_index;
   if (conv_it->get()->type != OperatorType::kConv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* conv_op = static_cast<ConvOperator*>(conv_it->get());
   if (conv_op->stride_width != conv_op->stride_height) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if ((conv_op->dilation_width_factor != 1) ||
       (conv_op->dilation_height_factor != 1)) {
     // Depthwise conv does not support dilation
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& input_array = model->GetArray(conv_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Shapes not propagated yet
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dims(3) != 1) {
     // Not a pure convolution: Conv does accumulation across the depth
     // dimension.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights_name = conv_op->inputs[1];
@@ -56,15 +59,15 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
         "Not changing %s to DepthwiseConv because the weights is consumed by "
         "another op.",
         LogName(*conv_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& weights_array = model->GetArray(weights_name);
   if (!weights_array.buffer) {
     // Yield until the weights are resolved as a constant array.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (weights_array.data_type != ArrayDataType::kFloat) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // At this point we know we have a pure conv. Rewrite it as DepthwiseConv.
   AddMessageF(
@@ -112,7 +115,8 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) {
   }
   *weights_array.mutable_shape()->mutable_dims() = {1, width, height, depth};
   weights_buffer.data = depthwise_conv_weights_data;
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
index 0d274fc687..4a264e1cf1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc
@@ -86,9 +86,12 @@ TransposeOperator* CreateTransposeFromReorderAxes(
 
 // Converts ReorderAxes into Transpose and Reshape which are compatible with the
 // TFLite interpreter.
-bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertReorderAxes::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto reorder_it = model->operators.begin() + op_index;
-  if (reorder_it->get()->type != OperatorType::kReorderAxes) return false;
+  if (reorder_it->get()->type != OperatorType::kReorderAxes)
+    return ::tensorflow::Status::OK();
 
   auto* reorder_op = static_cast<ReorderAxesOperator*>(reorder_it->get());
   CHECK_EQ(reorder_op->inputs.size(), 1);
@@ -113,8 +116,9 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
   // Yield if input array contains constants or if output array size has not
   // been adjusted to reflect the permutations in ReorderAxes. ReorderAxes will
   // be merged into a constant array when possible.
-  if (IsConstantParameterArray(*model, constant_input_array_name)) return false;
-  if (!output_array.has_shape()) return false;
+  if (IsConstantParameterArray(*model, constant_input_array_name))
+    return ::tensorflow::Status::OK();
+  if (!output_array.has_shape()) return ::tensorflow::Status::OK();
 
   const auto input_axes_order = reorder_op->input_axes_order;
   const auto output_axes_order = reorder_op->output_axes_order;
@@ -143,7 +147,8 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(reorder_it->get(), reorder_op);
   model->operators.erase(reorder_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
index 81cedb5dad..a0bd1ed4a4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc
@@ -30,10 +30,13 @@ namespace toco {
 // means that the data layout will never change with this op, just the shape.
 // By converting these to reshapes once we have run shape propagation we allow
 // standard reshape optimization transforms to do their magic.
-bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertSqueezeToReshape::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto squeeze_it = model->operators.begin() + op_index;
   if (squeeze_it->get()->type != OperatorType::kSqueeze) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto squeeze_op = static_cast<SqueezeOperator*>(squeeze_it->get());
   CHECK_EQ(squeeze_op->inputs.size(), 1);
@@ -42,16 +45,16 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
   const auto& input_array = model->GetArray(squeeze_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dimensions_count() == 0) {
     // Input array cannot be 0-D.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!model->HasArray(squeeze_op->outputs[0]) ||
       !model->GetArray(squeeze_op->outputs[0]).has_shape()) {
     // Yield until shape propagation has set the output shape for us.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We use the output shape that has been calculated by shape propagation.
@@ -59,7 +62,7 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
 
   // Empty shapes will not work as empty data arrays.
   if (output_shape.dimensions_count() == 0) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto* reshape_op = new TensorFlowReshapeOperator;
@@ -79,7 +82,8 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(squeeze_it->get(), squeeze_op);
   model->operators.erase(squeeze_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
index dcaaddbf3b..d7cacf77f4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc
@@ -20,10 +20,13 @@ namespace toco {
 
 // This pass will convert an AddN operator with only 2 inputs into a regular Add
 // operator, to which more optimizations may apply.
-bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialAddNToAdd::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto addn_it = model->operators.begin() + op_index;
   if (addn_it->get()->type != OperatorType::kAddN) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   AddNOperator* addn_op = static_cast<AddNOperator*>(addn_it->get());
   CHECK_GE(addn_op->inputs.size(), 2);
@@ -31,7 +34,7 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
 
   // We only reduce AddN with N=2 to a regular Add.
   if (addn_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Copy inputs & outputs to regular Add.
@@ -45,7 +48,8 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) {
   addn_it = add_it + 1;
   CHECK_EQ(addn_it->get(), addn_op);
   model->operators.erase(addn_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
index 75113a2a8c..78779243a9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc
@@ -25,27 +25,30 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialPackToReshape::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   auto pack_it = model->operators.begin() + op_index;
   if (pack_it->get()->type != OperatorType::kPack) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* pack_op = static_cast<PackOperator*>(pack_it->get());
   if (pack_op->inputs.size() > 1) {
     // Not trivial.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(pack_op->outputs.size(), 1);
 
   const auto& input_array = model->GetArray(pack_op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array.shape().dimensions_count() == 0) {
     // Input array cannot be 0-D.
     // (Unsure if this is TF behavior, but was required to get a test to pass.)
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Converting trivial %s to a reshape", LogName(*pack_op));
@@ -75,7 +78,8 @@ bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(pack_it->get(), pack_op);
   model->operators.erase(pack_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
index b689be0792..b6d712ca44 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc
@@ -21,10 +21,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialTileToConcat::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   auto tile_it = model->operators.begin() + op_index;
   if (tile_it->get()->type != OperatorType::kTile) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* tile_op = static_cast<TransposeOperator*>(tile_it->get());
 
@@ -34,13 +37,13 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
   if (!input_array.has_shape() || !multiples_array.has_shape() ||
       !output_array.has_shape()) {
     // Yield until PropagateFixedSizes has been run on this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Note: We can assume we have error checked inputs in PropagateFixedSizes.
 
   if (!multiples_array.buffer) {
     // Yield until the multiples is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   std::vector<int32> const& multiples =
       multiples_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -59,7 +62,7 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
     // The tile is non-trivial. Good luck.
     AddMessageF("Tile %s is non-trivial (has more than one multiply dimension)",
                 LogName(*tile_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The tile is like a concat.
@@ -88,7 +91,8 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(tile_it->get(), tile_op);
   model->operators.erase(tile_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
index 5a36a90b38..e5a96d4335 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
@@ -48,10 +48,13 @@ bool TransposeAffectsMemoryOrder(std::vector<int> perm,
 
 }  // namespace
 
-bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ConvertTrivialTransposeToReshape::Run(Model* model,
+                                                           std::size_t op_index,
+                                                           bool* modified) {
+  *modified = false;
   auto transpose_it = model->operators.begin() + op_index;
   if (transpose_it->get()->type != OperatorType::kTranspose) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   TransposeOperator* transpose_op =
       static_cast<TransposeOperator*>(transpose_it->get());
@@ -60,14 +63,14 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   const auto& output_array = model->GetArray(transpose_op->outputs[0]);
   if (!input_array.has_shape() || !output_array.has_shape()) {
     // Yield until PropagateFixedSizes has been run on this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Note: We can assume we have error checked inputs in PropagateFixedSizes.
 
   // Check that the permutation has propogated.
   std::vector<int> const& perm = transpose_op->perm;
   if (perm.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transpose is trivial if non-unitary dimensions remain in the same
@@ -76,7 +79,7 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   std::vector<int> const& output_dims = output_array.shape().dims();
 
   if (TransposeAffectsMemoryOrder(perm, input_dims)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transpose is trivial. Replace it with a Reshape op.
@@ -109,7 +112,8 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(transpose_it->get(), transpose_op);
   model->operators.erase(transpose_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
index 1e68cd678b..ebc0e9afca 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc
@@ -73,18 +73,22 @@ bool ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) {
   return true;
 }
 
-bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status CreateIm2colArrays::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
   switch (op->type) {
     case OperatorType::kConv:
-      return ProcessConvOperator(model, static_cast<ConvOperator*>(op));
+      *modified = ProcessConvOperator(model, static_cast<ConvOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kTransposeConv:
-      return ProcessTransposeConvOperator(
+      *modified = ProcessTransposeConvOperator(
           model, static_cast<TransposeConvOperator*>(op));
+      return ::tensorflow::Status::OK();
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
index 1688586733..2119174950 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc
@@ -186,24 +186,27 @@ bool DequantizeArray(const string& array_name,
 
 }  // namespace
 
-bool Dequantize::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status Dequantize::Run(Model* model, std::size_t op_index,
+                                     bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
   auto* op = op_it->get();
 
   if (op->type == OperatorType::kDequantize) {
     auto& input_array = model->GetArray(op->inputs[0]);
     if (input_array.data_type == ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (input_array.final_data_type != ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     input_array.data_type = ArrayDataType::kFloat;
     input_array.quantization_params = nullptr;
     auto& output_array = model->GetArray(op->outputs[0]);
     output_array.data_type = ArrayDataType::kFloat;
     output_array.quantization_params = nullptr;
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
 
   std::vector<string> arrays;
@@ -220,7 +223,8 @@ bool Dequantize::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
index 95558ef5ec..1555cf60a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc
@@ -25,21 +25,23 @@ limitations under the License.
 
 namespace toco {
 
-bool DropFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status DropFakeQuant::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (!fakequant_op->minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& output_array = model->GetArray(fakequant_op->outputs[0]);
   if (!output_array.minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop min/max inputs
@@ -50,7 +52,8 @@ bool DropFakeQuant::Run(Model* model, std::size_t op_index) {
   }
   fakequant_op->inputs.resize(1);
 
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
index f7fd878b7e..7d66ea5dd2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc
@@ -19,15 +19,17 @@ limitations under the License.
 
 namespace toco {
 
-bool DropIm2colArrays::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status DropIm2colArrays::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   auto conv_it = model->operators.begin() + op_index;
   if (conv_it->get()->type != OperatorType::kConv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* conv_op = static_cast<ConvOperator*>(conv_it->get());
   if (conv_op->outputs.size() < 2) {
     // Conv op does not have im2col.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop the im2col array.
@@ -36,7 +38,8 @@ bool DropIm2colArrays::Run(Model* model, std::size_t op_index) {
   conv_op->outputs.resize(1);
   AddMessageF("Dropped an im2col array for %s", LogName(*conv_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
index e80ed036b3..72b1dda3be 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc
@@ -62,17 +62,20 @@ bool ProcessLinearOperator(Model* model, Operator* op) {
 }
 }  // namespace
 
-bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status EnsureBiasVectors::Run(Model* model, std::size_t op_index,
+                                            bool* modified) {
+  *modified = false;
   auto* op = model->operators[op_index].get();
   if (op->type == OperatorType::kConv ||
       op->type == OperatorType::kDepthwiseConv ||
       op->type == OperatorType::kFullyConnected) {
     if (ProcessLinearOperator(model, op)) {
       AddMessageF("Added bias vector to %s as %s", LogName(*op), op->inputs[2]);
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
index c13fc0de75..60dcd52684 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
@@ -108,8 +108,9 @@ namespace toco {
 // we can foresee these 'fast int8 kernels' to remain important to have into
 // the 2020s.
 //
-bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
-                                                   std::size_t op_index) {
+::tensorflow::Status EnsureUint8WeightsSafeForFastInt8Kernels::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto& op = *model->operators[op_index];
   int weights_index = 0;
   switch (op.type) {
@@ -148,16 +149,16 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
       // That's why at the moment we only handle operators that use a GEMM
       // (Conv, fully-connected --- note that LSTM merely wraps a
       // fully-connected operator).
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   const string& name = op.inputs[weights_index];
   auto& array = model->GetArray(name);
   if (!array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (array.data_type != ArrayDataType::kUint8) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& buffer_data = array.GetMutableBuffer<ArrayDataType::kUint8>().data;
 
@@ -212,7 +213,8 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model,
     AddMessageF("Tweaked weights values for %s", LogName(op));
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
index c5ce3fcd95..88511a7d3c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc
@@ -25,27 +25,30 @@ limitations under the License.
 
 namespace toco {
 
-bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseActivationFunctions::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto ac_it = model->operators.begin() + op_index;
   const auto* ac_op = ac_it->get();
 
   if (ac_op->type != OperatorType::kRelu6 &&
       ac_op->type != OperatorType::kRelu1 &&
       ac_op->type != OperatorType::kRelu) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the op producing the array passed to this activation function
   Operator* op = GetOpWithOutput(*model, ac_op->inputs[0]);
 
-  if (!op) return false;
+  if (!op) return ::tensorflow::Status::OK();
 
   if (CountTrueOutputs(*model, *op) > 1) {
     AddMessageF(
         "Not fusing activation function %s into %s because it has more than "
         "one  consumed output",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->outputs[0], ac_op->inputs[0]);
@@ -57,7 +60,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function into %s because it is consumed by more "
         "than 1 other operator",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsDiscardableArray(*model, op->outputs[0])) {
@@ -65,7 +68,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s into %s because output %s it is not "
         "discardable",
         LogName(*ac_op), LogName(*op), op->outputs[0]);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->fused_activation_function != FusedActivationFunctionType::kNone) {
@@ -73,7 +76,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s into %s because it already has a "
         "fused activation function",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorSupportsFusedActivation(op->type)) {
@@ -81,7 +84,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
         "Not fusing activation function %s because the %s op doesn't support "
         "it",
         LogName(*ac_op), LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Fusing activation function %s into the preceding %s",
@@ -98,7 +101,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) {
   model->EraseArray(ac_op->inputs[0]);
   op->outputs[0] = ac_op->outputs[0];
   model->operators.erase(ac_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
index dcbbead517..0de22b8ff4 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc
@@ -150,14 +150,17 @@ void FuseMulOrDivParamsIntoFollowingAffine(Model* model, Operator* following_op,
 
 }  // namespace
 
-bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBinaryIntoFollowingAffine::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -175,12 +178,12 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can fuse into a constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -192,7 +195,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     if (index_of_constant_input != 1) {
       AddMessageF("Not fusing %s because the denominator is not constant",
                   LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -204,7 +207,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s into the following affine op, because we only know "
           "how to do so when the constant operand is a scalar",
           LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -212,7 +215,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
       FusedActivationFunctionType::kNone) {
     AddMessageF("Not fusing %s because it has a fused activation function",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* following_op = GetOpWithInput(*model, binary_op->outputs[0]);
@@ -221,7 +224,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not fusing %s because it is not consumed by exactly one other op",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (following_op->type != OperatorType::kConv &&
@@ -231,14 +234,14 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the following %s is not of one of the supported "
         "types",
         LogName(*binary_op), LogName(*following_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (following_op->inputs.size() < 3) {
     AddMessageF(
         "Not fusing %s because the following %s does not have a bias vector",
         LogName(*following_op), LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights = model->GetArray(following_op->inputs[1]);
@@ -248,7 +251,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the following %s has non-constant weights or "
         "bias arrays",
         LogName(*binary_op), LogName(*following_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Try to fuse the binary params into the following op's params
@@ -260,7 +263,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         AddMessageF(
             "Not fusing %s because the following %s does not use VALID padding",
             LogName(*binary_op), LogName(*following_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     if (following_op->type == OperatorType::kDepthwiseConv) {
@@ -269,7 +272,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
         AddMessageF(
             "Not fusing %s because the following %s does not use VALID padding",
             LogName(*binary_op), LogName(*following_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     FuseAddOrSubParamsIntoFollowingAffine(model, following_op, binary_op,
@@ -294,7 +297,8 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) {
     model->EraseArray(old_constant_param_name);
   }
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
index b324631579..b8da756d85 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc
@@ -188,14 +188,17 @@ void FuseMulOrDivParamsIntoPrecedingAffine(Model* model, Operator* preceding_op,
 }
 }  // namespace
 
-bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBinaryIntoPrecedingAffine::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -213,12 +216,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can fuse into a constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -230,7 +233,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
     if (index_of_constant_input != 1) {
       AddMessageF("Not fusing %s because the denominator is not constant",
                   LogName(*binary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -239,12 +242,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
   if (!preceding_op) {
     AddMessageF("Not fusing %s because it is not the output of another op",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const string& output_array : model->flags.output_arrays()) {
     if (preceding_op->outputs[0] == output_array) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -255,7 +258,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the preceding %s is not of one of the supported "
         "types",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (preceding_op->fused_activation_function !=
@@ -264,14 +267,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the preceding %s has a fused activation "
         "function",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (preceding_op->inputs.size() < 3) {
     AddMessageF(
         "Not fusing %s because the preceding %s does not have a bias vector",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& weights_name = preceding_op->inputs[1];
@@ -289,14 +292,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s because the preceding %s has a non-constant bias "
           "array",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (count_ops_consuming_bias > 1) {
       AddMessageF(
           "Not fusing %s because the bias of the preceding %s is consumed by "
           "another op",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   } else {
     if (!weights.buffer || !bias.buffer) {
@@ -304,14 +307,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
           "Not fusing %s because the preceding %s has non-constant weights or "
           "bias arrays",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (count_ops_consuming_weights > 1 || count_ops_consuming_bias > 1) {
       AddMessageF(
           "Not fusing %s because the weights or bias of the preceding %s is "
           "consumed by another op",
           LogName(*binary_op), LogName(*preceding_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -323,7 +326,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
         "Not fusing %s because the output of the preceding %s is consumed by "
         "another op",
         LogName(*binary_op), LogName(*preceding_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Fusing %s into the preceding %s", LogName(*binary_op),
@@ -352,7 +355,8 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) {
     model->EraseArray(old_constant_param_name);
   }
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
index 874d8def57..4848867b9a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc
@@ -51,19 +51,22 @@ bool IsBroadcastingOp(const Model& model, Operator* op) {
 // Finds an operation that looks like a broadcast (concat of the same sources
 // along the last dimension) and drops it by relying on the ability of certain
 // binary ops to perform an implicit broadcast.
-bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status FuseBroadcastIntoFollowingBinary::Run(Model* model,
+                                                           std::size_t op_index,
+                                                           bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
 
   // Test for binary ops of types that we know how to resolve
   if (binary_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // NOTE: either of these ops may be nullptr if the input array is constant.
@@ -78,14 +81,14 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
   if (!is_op_0_broadcast && !is_op_1_broadcast) {
     // Neither input is a broadcast-looking thing.
     AddMessageF("Neither input looks broadcasty");
-    return false;
+    return ::tensorflow::Status::OK();
   } else if (is_op_0_broadcast && is_op_1_broadcast) {
     AddMessageF(
         "Unable to fuse broadcast into %s as both inputs (%s, %s) are "
         "broadcasts",
         LogName(*binary_op), op[0] ? LogName(*op[0]) : "(?)",
         op[1] ? LogName(*op[1]) : "(?)");
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int broadcast_index = is_op_0_broadcast ? 0 : 1;
 
@@ -96,7 +99,8 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) {
   binary_op->inputs[broadcast_index] = op[broadcast_index]->inputs[0];
 
   // We leave the broadcast op in; it'll get cleaned up if it's not used later.
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
index 6961e23690..8b0bc2d865 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc
@@ -142,7 +142,7 @@ bool GraphTransformationsPass(int increment, Model* model,
     for (const auto& transformation : transformations) {
       CHECK(!changed_now);
       CHECK(transformation->Messages().empty());
-      changed_now = transformation->Run(model, op_index);
+      CHECK(transformation->Run(model, op_index, &changed_now).ok());
       const char* made_a_change_msg =
           changed_now ? "made a change" : "did NOT make a change";
       const int log_level =
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
index 4d213b3f9c..a89db320ea 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h
@@ -27,7 +27,8 @@ namespace toco {
 
 class GraphTransformation {
  public:
-  virtual bool Run(Model* model, std::size_t op_index) = 0;
+  virtual ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                                   bool* modified) = 0;
   virtual const char* Name() const = 0;
   virtual ~GraphTransformation() {}
   // Returns the list of messages that this graph transformation
@@ -104,11 +105,12 @@ class GraphTransformationsSet {
 void RunGraphTransformations(Model* model, const string& message,
                              const GraphTransformationsSet& transformations);
 
-#define DECLARE_GRAPH_TRANSFORMATION(GTName)               \
-  class GTName : public GraphTransformation {              \
-   public:                                                 \
-    bool Run(Model* model, std::size_t op_index) override; \
-    const char* Name() const override { return #GTName; }  \
+#define DECLARE_GRAPH_TRANSFORMATION(GTName)                     \
+  class GTName : public GraphTransformation {                    \
+   public:                                                       \
+    ::tensorflow::Status Run(Model* model, std::size_t op_index, \
+                             bool* modified) override;           \
+    const char* Name() const override { return #GTName; }        \
   };
 
 // List of all graph transformations
@@ -200,7 +202,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveGatherAttributes)
 
 class PropagateDefaultMinMax : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "PropagateDefaultMinMax"; }
 
   bool has_any_ranges_defined() const { return !type_ranges_.empty(); }
@@ -218,7 +221,8 @@ class PropagateDefaultMinMax : public GraphTransformation {
 
 class RemoveTrivialReshape : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "RemoveTrivialReshape"; }
   bool treat_expand_dims_as_trivial() const {
     return treat_expand_dims_as_trivial_;
@@ -233,7 +237,8 @@ class RemoveTrivialReshape : public GraphTransformation {
 
 class ResolveConstantFakeQuant : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "ResolveConstantFakeQuant"; }
 
   // True if the num_bits should adjust the final data type.
@@ -250,7 +255,8 @@ class ResolveConstantFakeQuant : public GraphTransformation {
 
 class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override {
     return "EnsureUint8WeightsSafeForFastInt8Kernels";
   }
@@ -267,7 +273,8 @@ class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation {
 
 class IdentifyDilatedConv : public GraphTransformation {
  public:
-  bool Run(Model* model, std::size_t op_index) override;
+  ::tensorflow::Status Run(Model* model, std::size_t op_index,
+                           bool* modified) override;
   const char* Name() const override { return "IdentifyDilatedConv"; }
   bool identify_depthwise_conv() const { return identify_depthwise_conv_; }
   void set_identify_depthwise_conv(bool val) { identify_depthwise_conv_ = val; }
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
index 3114fa93e8..72df53548b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -372,7 +372,9 @@ bool HardcodeMinMaxForLstmCell(Model* model, Operator* op) {
 }
 }  // namespace
 
-bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status HardcodeMinMax::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   bool changed = false;
@@ -467,7 +469,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) {
   if (changed) {
     AddMessageF("Hardcoded min-max through %s", LogName(*op));
   }
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
index aac77eb39e..9e4a3005a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc
@@ -168,7 +168,10 @@ bool ResolveDilatedConv(Model* model, Operator* conv_base_op, Operator* stb_op,
   return true;
 }
 
-bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyDilatedConv::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* stb_op = it->get();
 
@@ -176,17 +179,17 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   // ***************************************************************************
   // SpaceToBatch Op.
   if (stb_op->type != OperatorType::kSpaceToBatchND) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (stb_op->inputs.size() != 3) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(stb_op->outputs.size(), 1);
   // Extract the dilation factor from Input[1] of SpaceToBatch
   // TODO(mjmatthews): Support 2D dilation factors.
   const auto& block_shape_array = model->GetArray(stb_op->inputs[1]);
   if (!block_shape_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(block_shape_array.shape().dimensions_count(), 1);
   int dilation_factor =
@@ -195,7 +198,7 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
   // Expand Op
   auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]);
   if (!post_stb_op) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   bool has_expand_op = false;
   if (post_stb_op->type == OperatorType::kExpandDims) {
@@ -229,7 +232,8 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
index b78efd7fc3..78f60f52fb 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc
@@ -39,7 +39,10 @@ std::vector<std::unique_ptr<Operator>>::iterator FindOperator(
 }
 }  // namespace
 
-bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyL2Normalization::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto div_it = model->operators.begin() + op_index;
   const auto* div_or_mul_op = div_it->get();
   OperatorType expected_op_type_producing_div_or_mul_input;
@@ -48,7 +51,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   } else if (div_or_mul_op->type == OperatorType::kMul) {
     expected_op_type_producing_div_or_mul_input = OperatorType::kRsqrt;
   } else {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(div_or_mul_op->inputs.size(), 2);
   Operator* op_producing_div_or_mul_input[2] = {
@@ -58,14 +61,14 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   if (!op_producing_div_or_mul_input[1] ||
       op_producing_div_or_mul_input[1]->type !=
           expected_op_type_producing_div_or_mul_input) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator* sqrt_or_rsqrt_op = op_producing_div_or_mul_input[1];
   CHECK_EQ(sqrt_or_rsqrt_op->inputs.size(), 1);
   Operator* op_producing_sqrt_or_rsqrt_input =
       GetOpWithOutput(*model, sqrt_or_rsqrt_op->inputs[0]);
   if (!op_producing_sqrt_or_rsqrt_input) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // There may be an Add or a Maximum here, adding or clamping to a "small"
@@ -105,7 +108,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
           " because the operator producing the input to the square root, %s,"
           ", does not match the expected pattern",
           LogName(*op_producing_sqrt_or_rsqrt_input));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -116,7 +119,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: "
         "expected Sum op, got %s",
         LogName(*sum_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* square_op = GetOpWithOutput(*model, sum_op->inputs[0]);
@@ -125,7 +128,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: "
         "expected Square op, got %s",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(square_op->inputs.size(), 1);
@@ -135,7 +138,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Normalization subgraph: %s does not "
         "take the same input as the Mul/Div node",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create and emplace the new L2Normalization
@@ -162,7 +165,8 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op));
   model->EraseArray(div_or_mul_op->inputs[1]);
   model->operators.erase(FindOperator(model, div_or_mul_op));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
index 705e73779b..13664bb344 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc
@@ -38,11 +38,13 @@ std::vector<std::unique_ptr<Operator>>::iterator FindOperator(
 }
 }  // namespace
 
-bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyL2Pool::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   const auto sqrt_it = model->operators.begin() + op_index;
   const auto* sqrt_op = sqrt_it->get();
   if (sqrt_op->type != OperatorType::kSqrt) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(sqrt_op->inputs.size(), 1);
@@ -56,7 +58,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Giving up trying to identify L2Pool subgraph: "
         "expected AveragePool op, but Sqrt op has no preceding op");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (prev_to_sqrt_op->type != OperatorType::kAveragePool) {
@@ -64,7 +66,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Pool subgraph: "
         "expected AveragePool op, got %s",
         LogName(*prev_to_sqrt_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   avpool_op = static_cast<const AveragePoolOperator*>(prev_to_sqrt_op);
@@ -77,7 +79,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
         "Giving up trying to identify L2Pool subgraph: "
         "expected Square op, got %s",
         LogName(*square_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create and emplace L2Pool node.
@@ -107,7 +109,8 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, avpool_op));
   model->operators.erase(FindOperator(model, sqrt_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
index c0b014b45e..7fd8f906e2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc
@@ -132,7 +132,9 @@ bool MatchOperatorInputs(const Operator& op, const Model& model,
 
 }  // namespace
 
-bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyLstmCell::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   // This LSTM cell identification method is not invariant to commutation of
   // commutative operator inputs. For example, if input[0] and input[1] of the
   // final output multiplication were swapped, this method would not identify it
@@ -143,13 +145,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   auto op_it = model->operators.begin() + op_index;
   Operator* final_output_mul = op_it->get();
   if (final_output_mul->type != OperatorType::kMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator *state_output_tanh, *fc_output_sig;
   if (!MatchOperatorInputs(*final_output_mul, *model, OperatorType::kTanh,
                            &state_output_tanh, OperatorType::kLogistic,
                            &fc_output_sig)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State output TanH
@@ -158,7 +160,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   Operator* state_combine_add;
   if (!MatchOperatorInputs(*state_output_tanh, *model, OperatorType::kAdd,
                            &state_combine_add)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State forget & remember addition
@@ -166,7 +168,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_combine_add, *model, OperatorType::kMul,
                            &state_forget_mul, OperatorType::kMul,
                            &state_remember_mul)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const string prev_state = state_forget_mul->inputs[0];
 
@@ -175,7 +177,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_forget_mul, *model, OperatorType::kNone,
                            nullptr, OperatorType::kLogistic,
                            &state_forget_sig)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State remember gate
@@ -183,40 +185,40 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   if (!MatchOperatorInputs(*state_remember_mul, *model, OperatorType::kLogistic,
                            &state_remember_sig, OperatorType::kTanh,
                            &state_info_tanh)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // State remember "information" activation function
   Operator* fc_output_split;
   if (!MatchOperatorInputs(*state_info_tanh, *model, OperatorType::kSplit,
                            &fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // State remember gate activation function
   Operator* tmp;
   if (!MatchOperatorInputs(*state_remember_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // State forget gate activation function
   if (!MatchOperatorInputs(*state_forget_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Fully connected output activation function
   if (!MatchOperatorInputs(*fc_output_sig, *model, OperatorType::kSplit,
                            &tmp) ||
       (tmp != fc_output_split)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Fully connected output split
   Operator* fully_connected;
   if (!MatchOperatorInputs(*fc_output_split, *model, OperatorType::kNone,
                            nullptr, OperatorType::kFullyConnected,
                            &fully_connected)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Fully connected op
@@ -225,13 +227,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
                            OperatorType::kConcatenation, &concat_inputs,
                            OperatorType::kNone, nullptr, OperatorType::kNone,
                            nullptr)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (static_cast<FullyConnectedOperator*>(fully_connected)->weights_format !=
       FullyConnectedWeightsFormat::kDefault) {
     // Not yet implemented: experimental shuffled weights in fused LSTM cell.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Emplace a new LSTM cell operator
@@ -300,7 +302,8 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, *fully_connected));
   DeleteArrayIfUnused(concat_inputs->outputs[0], model);
   model->operators.erase(FindOperator(model, *concat_inputs));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
index 5b6a984ee1..6ccce923f3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc
@@ -25,19 +25,22 @@ limitations under the License.
 
 namespace toco {
 
-bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MergeLstmCellInputs::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   // Find lstm cell.
   auto op_it = model->operators.begin() + op_index;
   auto src_op = op_it->get();
   if (src_op->type != OperatorType::kLstmCell) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Already a compact LstmCell. Do not need to merge cell inputs.
   const auto* src_lstm_op = static_cast<LstmCellOperator*>(src_op);
   if (src_lstm_op->kernel_type != LstmCellOperator::KERNEL_FULL ||
       src_lstm_op->inputs.size() != kExtendedLstmInputCount) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Identify prev_activ_input, prev_state_input as required Op inputs,
@@ -45,12 +48,12 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
   string prev_activ_input;
   if (!GetMatchingRnnArray(model, src_op->outputs[kOutputTensor],
                            &prev_activ_input)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   string prev_state_input;
   if (!GetMatchingRnnArray(model, src_op->outputs[kCellStateTensor],
                            &prev_state_input)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get LstmCell's cell, input, output size.
@@ -184,7 +187,8 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUnused(src_op->inputs[kOutputGateBiasTensor], model);
   model->operators.erase(FindOp(*model, src_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
index 46d1fce50e..ad5120e2aa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc
@@ -25,19 +25,22 @@ limitations under the License.
 
 namespace toco {
 
-bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status SplitLstmCellInputs::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   // Find lstm cell.
   auto op_it = model->operators.begin() + op_index;
   auto curr_op = op_it->get();
   if (curr_op->type != OperatorType::kLstmCell) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* curr_lstm_op = static_cast<LstmCellOperator*>(curr_op);
   // Already an extended LstmCell. Do not need to split cell inputs.
   if (curr_lstm_op->kernel_type != LstmCellOperator::KERNEL_BASIC ||
       curr_lstm_op->inputs.size() != LstmCellOperator::NUM_INPUTS) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Make sure the WEIGHTS_INPUT and BIASES_INPUT are constant arrays,
@@ -46,13 +49,13 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
           *model, curr_op->inputs[LstmCellOperator::WEIGHTS_INPUT]) ||
       !IsConstantParameterArray(
           *model, curr_op->inputs[LstmCellOperator::BIASES_INPUT])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Make sure propagate_fixed_sizes has defined the size of the output.
   if (!model->GetArray(curr_op->outputs[LstmCellOperator::ACTIV_OUTPUT])
            .has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Emplace a new LstmCell operator with extended inputs (kernel/lstm.cc).
@@ -168,7 +171,8 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUnused(curr_op->inputs[LstmCellOperator::BIASES_INPUT], model);
   model->operators.erase(FindOp(*model, curr_op));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
index b90a156a0d..c11fee4dc9 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc
@@ -43,13 +43,15 @@ limitations under the License.
 
 namespace toco {
 
-bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyPRelu::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   const auto add_op_it = model->operators.begin() + op_index;
   const auto* add_op = add_op_it->get();
   if (add_op == nullptr || add_op->type != OperatorType::kAdd ||
       add_op->inputs.size() != 2 ||
       add_op->fused_activation_function != FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* relu_input_op = GetOpWithOutput(*model, add_op->inputs[0]);
@@ -57,7 +59,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
       relu_input_op->inputs.size() != 1 ||
       relu_input_op->fused_activation_function !=
           FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // TODO(ycling): Both Add and Mul are commutative. Support the case where
@@ -66,7 +68,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
   if (mul_op == nullptr || mul_op->type != OperatorType::kMul ||
       mul_op->inputs.size() != 2 ||
       mul_op->fused_activation_function != FusedActivationFunctionType::kNone) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto neg_alpha_tensor_name = mul_op->inputs[0];
@@ -75,7 +77,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
 
   if (relu_neg_input_op == nullptr ||
       relu_neg_input_op->inputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const Operator* final_input_op;
@@ -92,13 +94,13 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
         relu_neg_input_op->type != OperatorType::kRelu ||
         relu_neg_input_op->fused_activation_function !=
             FusedActivationFunctionType::kNone) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     final_input_op = neg_input_op;
   }
 
   if (relu_input_op->inputs[0] != final_input_op->inputs[0]) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto input_tensor_name = relu_input_op->inputs[0];
@@ -128,7 +130,8 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) {
   // intermediate tensors aren't used by other ops, those will be removed by
   // other graph transformation rules.
   model->operators.erase(FindOp(*model, add_op));
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
index 94820a0166..51d0629362 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc
@@ -56,13 +56,15 @@ int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op,
 }
 }  // namespace
 
-bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status IdentifyRelu1::Run(Model* model, std::size_t op_index,
+                                        bool* modified) {
+  *modified = false;
   // Follow sequences of min+max and max+min. First get the leading op.
   const auto op_it = model->operators.begin() + op_index;
   const auto* op_0 = op_it->get();
   if (op_0->type != OperatorType::kMinimum &&
       op_0->type != OperatorType::kMaximum) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the paired op and ensure it's the counter to the first.
@@ -71,17 +73,17 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
       (op_1->type != OperatorType::kMinimum &&
        op_1->type != OperatorType::kMaximum) ||
       op_0->type == op_1->type) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* min_op = op_0->type == OperatorType::kMinimum ? op_0 : op_1;
   const auto* max_op = op_0->type == OperatorType::kMaximum ? op_0 : op_1;
 
   if (min_op->inputs.size() != 2 || max_op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (min_op->outputs.size() != 1 || max_op->outputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the original input to the min+max pair.
@@ -90,7 +92,7 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
   int max_scalar_input_index =
       GetSingleScalarInputIndexOfBinaryOp(model, max_op, -1.0f);
   if (min_scalar_input_index == -1 || max_scalar_input_index == -1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   int op_0_scalar_input_index =
       op_0 == min_op ? min_scalar_input_index : max_scalar_input_index;
@@ -111,7 +113,8 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) {
   model->operators.erase(FindOperator(model, op_0));
   model->operators.erase(FindOperator(model, op_1));
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
index f684de08ab..5bf17d5b4c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc
@@ -97,7 +97,10 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op,
   return true;
 }
 
-bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MakeInitialDequantizeOperator::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   // This is effectively a transformation applied to edges.  We iterate over the
   // specified node (op) and proceed for input edges.
   const auto it = model->operators.begin() + op_index;
@@ -114,7 +117,8 @@ bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) {
       }
     }
   }
-  return change_made;
+  *modified = change_made;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
index 95bc7f7d4b..06de9b1cd8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc
@@ -102,18 +102,19 @@ std::vector<int32> ReshapeToTranspose(const Model& model,
 // to be merged if the reshape does not affect memory ordering and does not
 // affects the number of dimensions. This only occurs when only unary dimensions
 // are shifting position.
-bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
-                                             std::size_t op_index) {
+::tensorflow::Status MergeReshapeIntoPrecedingTranspose::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* reshape_op = ConvertOperator<TensorFlowReshapeOperator*>(
       it->get(), OperatorType::kReshape);
 
   if (reshape_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const string intermediate_name = reshape_op->inputs[0];
@@ -121,13 +122,13 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
 
   // Guarantee the input is only consume by the reshape.
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check for the parent operator.
   const auto& transpose_it = FindOpWithOutput(*model, intermediate_name);
   if (transpose_it == model->operators.end()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the parent operator and guarantee it is a transpose.
@@ -135,16 +136,16 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
       transpose_it->get(), OperatorType::kTranspose);
 
   if (transpose_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!ReshapeIsEquivalentToTranspose(*model, reshape_op,
                                       false /*allow_extra_unary_dimensions*/)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is not an output array.
@@ -153,7 +154,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
         "Cannot fuse %s and %s as it would invalidate the transpose "
         "output array.",
         LogName(*transpose_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Merging operations %s and %s", LogName(*transpose_op),
@@ -172,7 +173,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
 
   // Remove the reshape as passthrough operation.
   if (!RemoveTrivialPassthroughOp(this, model, op_index)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Update transpose_op's constant buffer to contain the new permutation.
@@ -184,7 +185,8 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model,
   // transpose_ops's shape will likely has changed.
   model->GetArray(transpose_op->outputs[0]).clear_shape();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
index 7f44c65285..f0d8d924ad 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc
@@ -54,7 +54,10 @@ bool IsTailOfShape(const Shape& tail, const Shape& shape) {
 //
 // Note we are testing for one particular case of a broader set of possible
 // binary-reshape op transformations. This transformation could be generalized.
-bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status MoveBinaryOperatorBeforeReshape::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   Operator* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
@@ -69,7 +72,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
       binary_op->type != OperatorType::kLessEqual &&
       binary_op->type != OperatorType::kGreater &&
       binary_op->type != OperatorType::kGreaterEqual) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // BINARY OP INPUT CHECKS
@@ -81,11 +84,11 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   if (!input_is_const[0] && !input_is_const[1]) {
     // To limit our scope, we require one constant input. Though there's no
     // reason this transformation wouldn't work with all variable inputs.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_is_const[0] && input_is_const[1]) {
     // Both inputs are constants. Leave this for constants propagation.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int constant_input_idx = input_is_const[0] ? 0 : 1;
   const int variable_input_idx = input_is_const[0] ? 1 : 0;
@@ -98,13 +101,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not moving %s because it's non-constant input shape is not resolved.",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsTailOfShape(
           model->GetArray(binary_op->inputs[constant_input_idx]).shape(),
           model->GetArray(binary_op->inputs[variable_input_idx]).shape())) {
     // Constant array shape must be the latter part of the variable shape.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // RESHAPE OP CHECKS
@@ -113,13 +116,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   if (reshape_it == model->operators.end()) {
     AddMessageF("Not moving %s because it's variable input is not connected.",
                 LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   Operator* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
     AddMessageF("Not moving %s because the preceding %s is not a reshape op",
                 LogName(*binary_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& reshape_input_array = model->GetArray(reshape_op->inputs[0]);
   if (!reshape_input_array.has_shape()) {
@@ -127,14 +130,14 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
         "Not moving %s because it's non-constant input shape is not resolved "
         "yet",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsTailOfShape(
           model->GetArray(binary_op->inputs[constant_input_idx]).shape(),
           model->GetArray(reshape_op->outputs[0]).shape())) {
     // Constant array shape must be the latter part of the binary op output
     // shape.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // EXTRA CHECKS ON CONNECTING ARRAY
@@ -143,7 +146,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
       AddMessageF(
           "Not moving %s because the output of reshape op %s is an output op.",
           LogName(*binary_op), LogName(*reshape_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   int count_ops_consuming_output =
@@ -154,7 +157,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
         "Not moving %s because the output of reshape op %s is consumed by "
         "another op",
         LogName(*binary_op), LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // SWAP ORDER OF BINARY AND RESHAPE OPS
@@ -172,7 +175,8 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) {
   // Clear binary output shape so it will be re-propagated
   model->GetArray(binary_op->outputs[0]).clear_shape();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
index cf17c49b10..9c1ed2b732 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc
@@ -26,20 +26,21 @@ limitations under the License.
 
 namespace toco {
 
-bool PropagateActivationFunctionIntoConstants::Run(Model* model,
-                                                   std::size_t op_index) {
+::tensorflow::Status PropagateActivationFunctionIntoConstants::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto ac_it = model->operators.begin() + op_index;
   const auto* ac_op = ac_it->get();
   if (ac_op->type != OperatorType::kRelu6 &&
       ac_op->type != OperatorType::kRelu1 &&
       ac_op->type != OperatorType::kRelu) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the op producing the array passed to this activation function.
   auto* src_op = GetOpWithOutput(*model, ac_op->inputs[0]);
   if (!src_op) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Ensure the src_op is not used without the activation function applied.
@@ -57,7 +58,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
       src_op_input = src_op->inputs[0];
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
   CHECK_EQ(src_op->outputs[0], ac_op->inputs[0]);
 
@@ -69,7 +70,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
         "Not propagating activation function %s into %s:%s because it is not "
         "constant",
         LogName(*ac_op), LogName(*src_op), src_op_input);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the array we'll be working with and ensure it's a compatible type.
@@ -79,7 +80,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
         "Not propagating activation function %s into %s:%s because it is "
         "non-float data",
         LogName(*ac_op), LogName(*src_op), src_op_input);
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& const_array_data =
       const_array.GetMutableBuffer<ArrayDataType::kFloat>().data;
@@ -108,14 +109,15 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model,
       }
       default:
         LOG(FATAL) << "Unsupported activation function " << LogName(*ac_op);
-        return false;
+        return ::tensorflow::Status::OK();
     }
     const_array_data[i] = new_value;
   }
 
   AddMessageF("Propagated activation function %s into %s:%s", LogName(*ac_op),
               LogName(*src_op), src_op_input);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 323eefcd3a..40cd6dea82 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -32,7 +32,10 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op,
 }
 }  // namespace
 
-bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateArrayDataTypes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
@@ -40,7 +43,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
   for (const auto& input : op->inputs) {
     if (!model->IsOptionalArray(input) &&
         model->GetArray(input).data_type == ArrayDataType::kNone) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   // Record data types of output before processing, so we can see at the
@@ -131,7 +134,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       auto* rand_op = static_cast<RandomUniformOperator*>(op);
       // The output type of RandomUniform is specified with an attribute
       if (rand_op->dtype == ArrayDataType::kNone) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       CHECK_EQ(op->outputs.size(), 1);
       SetDataTypeForAllOutputs(model, op, rand_op->dtype);
@@ -153,7 +156,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
       // This can make unsupported_op->output_data_types have more elements than
       // op->outputs.
       if (unsupported_op->output_data_types.size() < op->outputs.size()) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       for (int i = 0; i < op->outputs.size(); ++i) {
         const string& output = op->outputs[i];
@@ -164,7 +167,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
     }
     case OperatorType::kExpandDims: {
       // Yield on ExpandDim until it is converted to Reshape
-      return false;
+      return ::tensorflow::Status::OK();
     }
     case OperatorType::kSelect: {
       // Select produces outputs with the same type as their 2nd input
@@ -248,10 +251,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
   // Return true if any output data type changed, false if none changed.
   for (const auto& output : op->outputs) {
     if (old_output_data_types[output] != model->GetArray(output).data_type) {
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
index cd078ef189..3cf191436d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc
@@ -39,7 +39,10 @@ bool SupportsMinMax(const Array& array) {
 // When provided a set of min/max values for uint8 arrays this will rescale
 // the values for other data types as required and preserving the floating point
 // range within the new type.
-bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateDefaultMinMax::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
 
@@ -61,7 +64,8 @@ bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return did_change;
+  *modified = did_change;
+  return ::tensorflow::Status::OK();
 }
 
 // Sets the min/max on the given array, adjusting the reference_minmax for the
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
index 3ad6b0ec6f..d0113237ce 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc
@@ -277,11 +277,14 @@ bool RecursivelyForwardPropagateDataType(GraphTransformation* transformation,
 // nice logging and integration with the graphviz video dumping mode.
 // In general you should not copy this style of transformation and stick to
 // local-only changes as seen in the other transformations.
-bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateFakeQuantNumBits::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(op);
 
@@ -290,7 +293,7 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
                                            &quantized_data_type)) {
     AddMessageF("FakeQuant op %s num_bits=%d is out of range, ignoring",
                 LogName(*op), fakequant_op->num_bits);
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& final_minmax = *fakequant_op->minmax;
 
@@ -311,7 +314,8 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) {
   did_change |=
       RecursivelyForwardPropagateDataType(this, model, op, quantized_data_type);
 
-  return did_change;
+  *modified = did_change;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index d056a8add7..5496e2093e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -1622,7 +1622,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) {
 
 }  // namespace
 
-bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status PropagateFixedSizes::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   std::unordered_map<string, std::vector<int>> old_output_dims;
@@ -1836,7 +1839,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
           static_cast<TensorFlowUnsupportedOperator*>(op);
       // Attribute can be not specified, ignore it.
       if (unsupported_op->output_shapes.size() < op->outputs.size()) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       for (int i = 0; i < op->outputs.size(); ++i) {
         const string& output = op->outputs[i];
@@ -1886,10 +1889,11 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) {
         (old_output_dims[output] != model->GetArray(output).shape().dims())) {
       AddMessageF("Set shape of %s to [%s]", output,
                   absl::StrJoin(model->GetArray(output).shape().dims(), ","));
-      return true;
+      *modified = true;
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
index fb299c31b7..29ea17dc61 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
@@ -439,7 +439,9 @@ void FixMinMaxPostQuantization(GraphTransformation* transformation,
 
 }  // namespace
 
-bool Quantize::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status Quantize::Run(Model* model, std::size_t op_index,
+                                   bool* modified) {
+  *modified = false;
   // Our general "quantization" graph transformation consists in replacing
   //   QuantizedInputArrays[] ->
   //     DequantizeOperators[] ->
@@ -460,7 +462,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
   auto& op = *model->operators[op_index];
   if (op.type == OperatorType::kDequantize ||
       op.type == OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Our assumption here is that the input arrays are already quantized -
@@ -497,7 +499,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
       if (!array.minmax && !array.buffer) {
         LOG(ERROR) << "Can't quantize input array " << input
                    << " because it lacks min/max info";
-        return false;
+        return ::tensorflow::Status::OK();
       }
       const auto* other_op = GetOpWithOutput(*model, input);
       if (other_op && other_op->type != OperatorType::kDequantize) {
@@ -507,7 +509,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
             "which means that we should yield and let other ops "
             "get quantized first",
             LogName(op), input);
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -672,7 +674,8 @@ bool Quantize::Run(Model* model, std::size_t op_index) {
     }
   }
 
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
index eaa9d3bcda..0c32218ff2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc
@@ -51,18 +51,19 @@ bool ApplyAttrsToArray(GraphTransformation* transformation, Model* model,
 
 }  // end namespace
 
-bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
-                                                     std::size_t op_index) {
+::tensorflow::Status ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fq_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (!fq_op->minmax) {
     // Need to be resolved first by ResolveFakeQuantArgsFromVars.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At this point, this FakeQuantOperator should have a MinMax
@@ -74,7 +75,8 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model,
   bool changed = false;
   changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]);
   changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]);
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
index c3b2709a33..fe8023ab8f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc
@@ -25,11 +25,14 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveFinalDequantizeOp::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto dequantize_it = model->operators.begin() + op_index;
   const auto* dequantize_op = dequantize_it->get();
   if (dequantize_op->type != OperatorType::kDequantize) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& output = dequantize_op->outputs[0];
   // We can remove any dequantize op whose output is not consumed by
@@ -38,7 +41,7 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
   // in the middle of the graph might be designated as an output
   // array.
   if (CountOpsWithInput(*model, output)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // If one of the model's output arrays was actually the Dequantize op's
@@ -53,7 +56,8 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) {
   AddMessageF("Removed final %s", LogName(*dequantize_op));
   model->EraseArray(output);
   model->operators.erase(dequantize_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
index 73ad326299..be8c0acc7b 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc
@@ -23,11 +23,14 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTensorFlowAssert::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto assert_it = model->operators.begin() + op_index;
   const auto* assert_op = assert_it->get();
   if (assert_op->type != OperatorType::kAssert) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   bool changed = false;
@@ -54,7 +57,8 @@ bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) {
 
   // That's it. We can stop here, no need to duplicate the work that
   // RemoveUnusedOp will do removing this now-unused node.
-  return changed;
+  *modified = changed;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
index 7ec7752f25..37fe5fa3d7 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc
@@ -25,14 +25,18 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTensorFlowIdentity::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTensorFlowIdentity::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto passthru_it = model->operators.begin() + op_index;
   const auto* passthru_op = passthru_it->get();
   if (passthru_op->type != OperatorType::kIdentity) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
index 0dfdc40e4c..68c6fb65c5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc
@@ -46,14 +46,17 @@ bool AreAllBufferElementsEqualTo(const std::vector<Scalar>& buffer_data,
 // For example, an Add operator is trivial if
 // one of its operands is constant 0, a Mul operator is trivial
 // if one of its operands is constant 1, etc.
-bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialBinaryOperator::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   auto* binary_op = binary_it->get();
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
       binary_op->type != OperatorType::kSub &&
       binary_op->type != OperatorType::kDiv) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(binary_op->inputs.size(), 2);
@@ -66,12 +69,12 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can resolve here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants
     // propagation, not for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -84,7 +87,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto& input_array_1 = model->GetArray(binary_op->inputs[1]);
   if (!input_array_0.has_shape() || !input_array_1.has_shape()) {
     // Both input shapes must be known.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (input_array_0.shape().dimensions_count() ==
           input_array_1.shape().dimensions_count() &&
@@ -94,7 +97,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
         "(lhs %s, rhs %s)",
         LogName(*binary_op), ShapeToString(input_array_0.shape()),
         ShapeToString(input_array_1.shape()));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now check if the constant operand makes this binary
@@ -103,7 +106,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
       model->GetArray(binary_op->inputs[index_of_constant_input]);
   // For now, we only handle floats here.
   if (constant_input_array.data_type != ArrayDataType::kFloat) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& constant_input_float_data =
       constant_input_array.GetBuffer<ArrayDataType::kFloat>().data;
@@ -121,12 +124,13 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) {
   }
 
   if (!is_trivial) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now we know that this node is trivial, so we can remove it.
   AddMessageF("Removing trivial %s", LogName(*binary_op));
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
index 3ceb93d8ee..faaa2a828e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc
@@ -25,16 +25,20 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTrivialConcatenation::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialConcatenation::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto concat_it = model->operators.begin() + op_index;
   auto* concat_op = concat_it->get();
   if (concat_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (concat_op->inputs.size() != 1) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
index 936854a04f..ccfc181fe0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc
@@ -25,7 +25,10 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialConcatenationInput::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   // TensorFlow allows Concatenation nodes to have 0-D inputs,
   // and they are then treated as empty i.e. omitted from concatenation,
   // in violation of the notion that 0-D is equivalent to 1x1x1x1.
@@ -36,7 +39,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
   const auto concat_it = model->operators.begin() + op_index;
   auto* concat_op = concat_it->get();
   if (concat_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   std::vector<string> trivial_inputs;
   std::vector<string> nontrivial_inputs;
@@ -52,7 +55,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
   }
 
   if (trivial_inputs.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Drop trivial inputs.
@@ -63,7 +66,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) {
     }
   }
   concat_op->inputs = nontrivial_inputs;
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
index 2c8d04440f..5448a816bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc
@@ -64,23 +64,27 @@ bool IsFakeQuantTrivial(GraphTransformation* transformation, const Model& model,
 }  // namespace
 
 // Removes FakeQuant ops that are trivial (have no effect, are redundant, etc).
-bool RemoveTrivialFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialFakeQuant::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
   auto* op = op_it->get();
   if (op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(op);
 
   if (!IsFakeQuantTrivial(this, *model, *fakequant_op)) {
     AddMessageF("%s is not trivial", LogName(*fakequant_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*fakequant_op));
 
   CHECK_EQ(fakequant_op->inputs.size(), 1);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
index 752560e075..4133815285 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc
@@ -94,12 +94,13 @@ bool IsTrivialFusedActivationFunc(
 // Attempts to remove both fused and unfused activation functions if the
 // quantization params indicate that the representable values fall inside the
 // activation range.
-bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
-                                               std::size_t op_index) {
+::tensorflow::Status RemoveTrivialQuantizedActivationFunc::Run(
+    Model* model, std::size_t op_index, bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->inputs.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (IsTrivialUnfusedActivationFunc(this, *model, op->type, op->inputs[0])) {
@@ -107,7 +108,8 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
         "Removing trivial unfused activation function %s because the input "
         "minmax imply at least as tight a clamp anyway.",
         LogName(*op));
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
   if (IsTrivialFusedActivationFunc(this, *model, op->fused_activation_function,
                                    op->outputs[0])) {
@@ -117,9 +119,10 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model,
         "because the output quantization parameters imply at least as tight "
         "a clamp anyway.",
         LogName(*op));
-    return true;
+    *modified = true;
+    return ::tensorflow::Status::OK();
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
index 142c876b15..0f0ae4af69 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc
@@ -69,22 +69,26 @@ bool IsTrivialMinMax(GraphTransformation* transformation, const Model& model,
 
 // Attempts to remove min/max functions if the quantization params indicate that
 // the representable values fall inside the clip range.
-bool RemoveTrivialQuantizedMinMax::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialQuantizedMinMax::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if ((op->type != OperatorType::kMinimum &&
        op->type != OperatorType::kMaximum) ||
       op->inputs.size() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (IsTrivialMinMax(this, *model, op->type, op->inputs[0], op->inputs[1])) {
     AddMessageF(
         "Removing trivial min/max %s because the quantization parameters imply "
         "at least as tight a clamp anyway.",
         LogName(*op));
-    return RemoveTrivialPassthroughOp(this, model, op_index);
+    *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+    return ::tensorflow::Status::OK();
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
index 5295eeccec..1caf944879 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc
@@ -81,22 +81,26 @@ bool IsReshapeTrivial(const Model& model, const Operator& op,
 
 }  // namespace
 
-bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialReshape::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsReshapeTrivial(*model, *reshape_op, this)) {
     AddMessageF("%s is not trivial", LogName(*reshape_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*reshape_op));
 
   CHECK_EQ(reshape_op->inputs.size(), 2);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
index 0cbbcd7c81..dcb0148d58 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc
@@ -49,21 +49,24 @@ bool IsSliceTrivial(const Model& model, const Operator& op,
 
 }  // namespace
 
-bool RemoveTrivialSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveTrivialSlice::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* slice_op = reshape_it->get();
   if (slice_op->type != OperatorType::kSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsSliceTrivial(*model, *slice_op, this)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Removing trivial %s", LogName(*slice_op));
 
   CHECK_EQ(slice_op->inputs.size(), 3);
-  return RemoveTrivialPassthroughOp(this, model, op_index);
+  *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
index dde91234a8..3cd5d06bae 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc
@@ -25,7 +25,9 @@ limitations under the License.
 
 namespace toco {
 
-bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status RemoveUnusedOp::Run(Model* model, std::size_t op_index,
+                                         bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
 
@@ -58,7 +60,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
     for (const string& output_array : model->flags.output_arrays()) {
       if (output == output_array) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
     for (const auto& rnn_state : model->flags.rnn_states()) {
@@ -67,19 +69,19 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
         if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) ||
             !IsDiscardableArray(*model, rnn_state.state_array()) ||
             CountOpsWithInput(*model, rnn_state.state_array())) {
-          return false;
+          return ::tensorflow::Status::OK();
         }
       }
     }
     if (CountOpsWithInput(*model, output)) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   if (op->unresolved_outputs) {
     AddMessageF("Not discarding %s because it has unresolved outputs.",
                 LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Discarding %s because none of its outputs is used.",
@@ -105,7 +107,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) {
     }
   }
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
index 550de83018..3c8d411089 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc
@@ -63,29 +63,32 @@ bool IsMoveOperator(OperatorType optype) {
 
 // Swap elementwise operators such that all value operators occur before all
 // element move operators, e.g. negation then transpose.
-bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ReorderElementwiseUnary::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto element_op_it = model->operators.begin() + op_index;
   std::unique_ptr<Operator>& element_op = *element_op_it;
   if (!IsElementwiseOperator(element_op->type)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const string intermediate_name = element_op->inputs[0];
   auto it = FindOpWithOutput(*model, intermediate_name);
   if (it == model->operators.end()) {
     AddMessageF("No preceding operator");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   std::unique_ptr<Operator>& move_op = *it;
   if (!IsMoveOperator(move_op->type)) {
     AddMessageF("Preceding operator is not a move operator");
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
     AddMessageF("Input %s used elsewhere", intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is discardable.
@@ -94,7 +97,7 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
         "Cannot swap elementwise as it would invalidate %s which is "
         "an output array.",
         intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // op->inputs may change so we need to keep a value by copy.
@@ -147,7 +150,8 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) {
   // Swap the order of the operators.
   element_op.swap(move_op);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
index c907a597cb..a2c06e71e8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc
@@ -101,37 +101,40 @@ std::vector<int> ComputeNewPerm(std::vector<int> input_dims,
 
 // Swaps reshape-transpose to transpose-reshape whenever possible. This is
 // possible when the reshape does not affect memory ordering.
-bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ReorderReshapeTranspose::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto transpose_it = model->operators.begin() + op_index;
 
   TransposeOperator* transpose_op = ConvertOperator<TransposeOperator*>(
       transpose_it->get(), OperatorType::kTranspose);
 
   if (transpose_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) {
     // Wait for values to propagate.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Find the operator that produces the transpose op.
   auto reshape_it = FindOpWithOutput(*model, transpose_op->inputs[0]);
   if (reshape_it == model->operators.end()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   TensorFlowReshapeOperator* reshape_op =
       ConvertOperator<TensorFlowReshapeOperator*>(reshape_it->get(),
                                                   OperatorType::kReshape);
   if (reshape_op == nullptr) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Ignore if the reshape is uninitialized.
   if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Need to copy to keep static if permutated.
@@ -142,7 +145,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // Intermediate should not be consumed by any other operators.
   if (CountOpsWithInput(*model, intermediate_name) != 1) {
     AddMessageF("Input %s used elsewhere", intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that the intermediate is not an output array.
@@ -151,7 +154,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
         "Cannot reorder reshape-transpose as it would invalidate %s which is "
         "an output array.",
         intermediate_name);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Get the arrays.
@@ -173,7 +176,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // dimensions then it can be moved between the transpose.
   if (!ReshapeIsEquivalentToTranspose(*model, reshape_op,
                                       true /*allow_extra_unary_dims*/)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!IsDiscardableArray(*model, output_name)) {
@@ -242,7 +245,8 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) {
   // Swap the order of the operators.
   transpose_it->swap(*reshape_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
index 8f2c1f8162..a79779f55d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveBatchNormalization::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   auto bn_it = model->operators.begin() + op_index;
   if (bn_it->get()->type != OperatorType::kBatchNormalization) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* bn_op =
       static_cast<const BatchNormalizationOperator*>(bn_it->get());
@@ -53,7 +56,7 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
   // so we need to exit early if these buffers don't exist (i.e. if the params
   // haven't yet been resolved as constants).
   if (!mean_array.buffer || !multiplier_array.buffer || !offset_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Create the new Mul, Add operators
@@ -142,7 +145,8 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) {
   DCHECK_EQ(bn_it->get(), bn_op);
   model->operators.erase(bn_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
index b8b35161d7..d039d7d690 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc
@@ -24,31 +24,35 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveBatchToSpaceNDAttributes::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false;
+  if (op_it->get()->type != OperatorType::kBatchToSpaceND)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<BatchToSpaceNDOperator*>(op_it->get());
 
   // The attributes are resolved only when the 3 attributes (block_shape,
   // before_crops, after_crops) are all constant.
   if (!op->block_shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->inputs.size(), 3);
   if (!IsConstantParameterArray(*model, op->inputs[1]) ||
       !IsConstantParameterArray(*model, op->inputs[2]))
-    return false;
+    return ::tensorflow::Status::OK();
 
   // Handle crops
   const auto& crops_array = model->GetArray(op->inputs[2]);
-  if (!crops_array.has_shape()) return false;
+  if (!crops_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& crops_dims = crops_array.shape().dims();
   if (crops_dims.size() != 2) {
     // Code only handles crops of 2 dimensions. Perhaps another transformation
     // will delete this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const std::vector<int>& crops_buffer =
       crops_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -59,7 +63,7 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
 
   // Handle block_shape
   const auto& block_shape_array = model->GetArray(op->inputs[1]);
-  if (!block_shape_array.has_shape()) return false;
+  if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
   CHECK_EQ(block_shape_dims.size(), 1);
   const std::vector<int>& block_shape_buffer =
@@ -68,7 +72,8 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) {
     op->block_shape.push_back(block_shape_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
index f7e5aa6609..586f546a30 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc
@@ -188,7 +188,10 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 }
 }  // namespace
 
-bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantBinaryOperator::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
   // Test for binary ops of types that we know how to resolve
@@ -204,7 +207,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
       binary_op->type != OperatorType::kLessEqual &&
       binary_op->type != OperatorType::kGreater &&
       binary_op->type != OperatorType::kGreaterEqual) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(binary_op->inputs.size(), 2);
 
@@ -212,13 +215,13 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto& input1_array = model->GetArray(binary_op->inputs[1]);
   // Check if both inputs are constant parameters.
   if (!input0_array.buffer || !input1_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& output_array = model->GetArray(binary_op->outputs[0]);
   // Yield until the output array dims have been resolved.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At the moment we don't want to care about fused activation functions.
@@ -229,7 +232,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Not resolving constant %s because it has a fused activation function",
         LogName(*binary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check that input data types agree.
@@ -253,7 +256,8 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   AddMessageF("Resolved constant %s to the equivalent constant array",
               LogName(*binary_op));
   model->operators.erase(binary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
index d916ae0ddf..0c60fdfeb3 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc
@@ -135,11 +135,14 @@ void SetMinMaxForConcatenedArray(GraphTransformation* transformation,
 }  // namespace
 
 // Resolves the concatenation operator if all its inputs are constant arrays.
-bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantConcatenation::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto concat_it = model->operators.begin() + op_index;
   const auto* concat_base_op = concat_it->get();
   if (concat_base_op->type != OperatorType::kConcatenation) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
@@ -149,11 +152,15 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
     // We  also make sure the shapes of the input arrays are known and they are
     // all discardable.
     const Operator* input_op = GetOpWithOutput(*model, input_name);
-    if (input_op) return false;
-    if (!IsConstantParameterArray(*model, input_name)) return false;
-    if (!model->GetArray(input_name).has_shape()) return false;
-    if (model->GetArray(input_name).quantization_params) return false;
-    if (!IsDiscardableArray(*model, input_name)) return false;
+    if (input_op) return ::tensorflow::Status::OK();
+    if (!IsConstantParameterArray(*model, input_name))
+      return ::tensorflow::Status::OK();
+    if (!model->GetArray(input_name).has_shape())
+      return ::tensorflow::Status::OK();
+    if (model->GetArray(input_name).quantization_params)
+      return ::tensorflow::Status::OK();
+    if (!IsDiscardableArray(*model, input_name))
+      return ::tensorflow::Status::OK();
   }
 
   const int concatenation_axis = concat_op->axis;
@@ -205,7 +212,8 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
 
   // Remove concatenate operator.
   model->operators.erase(concat_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
index f5f2f77460..4f330fdd84 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc
@@ -59,11 +59,14 @@ void GetBoundsForQuantizedDataType(ArrayDataType quantized_data_type,
   }
 }
 
-bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantFakeQuant::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   const auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto* fakequant_op =
@@ -71,12 +74,12 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
 
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transformation only applies when the input array is constant.
   if (!IsConstantParameterArray(*model, fakequant_op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(fakequant_op->inputs[0]);
@@ -87,7 +90,7 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op,
                                            &quantized_data_type)) {
     AddMessageF("Unsupported FakeQuant num_bits=%d", fakequant_op->num_bits);
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Resolving constant %s", LogName(*fakequant_op));
@@ -136,7 +139,8 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   }
   model->operators.erase(fakequant_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
index f6f95481b5..5400d395ff 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc
@@ -41,11 +41,14 @@ bool ComputeFillArray(Model* model, FillOperator* op) {
   return true;
 }
 
-bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantFill::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   const auto fill_it = model->operators.begin() + op_index;
   auto* base_op = fill_it->get();
   if (base_op->type != OperatorType::kFill) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<FillOperator*>(base_op);
 
@@ -55,44 +58,44 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& val_array = model->GetArray(op->inputs[1]);
   if (!val_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[1])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(RequiredBufferSizeForShape(val_array.shape()), 1);
 
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!ComputeFillArray<ArrayDataType::kFloat>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kUint8:
       if (!ComputeFillArray<ArrayDataType::kUint8>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt32:
       if (!ComputeFillArray<ArrayDataType::kInt32>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt64:
       if (!ComputeFillArray<ArrayDataType::kInt64>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     default:
@@ -114,7 +117,8 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(fill_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
index 36d7dad0ce..6e3a6a69c2 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc
@@ -61,11 +61,14 @@ inline void Gather(const Array& input_array, int input_rank,
 // Resolves a constant Gather operation.
 // This simply performs the gather and produces the output array with the
 // appropriate values.
-bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantGather::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kGather) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const GatherOperator*>(base_op);
 
@@ -74,28 +77,28 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!op->axis) {
     // Yield until axis has been set by ResolveGatherAttributes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (op->axis.value() != 0) {
     // Only handling axis=0 for now.
     AddMessageF("%s has axis %d; only axis=0 is supported", LogName(*op),
                 op->axis.value());
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
   const Array& coords_array = model->GetArray(op->inputs[1]);
@@ -142,7 +145,8 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
index e86616574d..e257ec37e8 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc
@@ -49,11 +49,14 @@ void Pack(Model* model, PackOperator const& op) {
 
 }  // namespace
 
-bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantPack::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kPack) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const PackOperator*>(base_op);
 
@@ -62,18 +65,18 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const auto& input : op->inputs) {
     if (!IsConstantParameterArray(*model, input)) {
       // Yield if any input is mutable
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -111,7 +114,8 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
index 88d06d7dc7..db0fbba528 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc
@@ -59,11 +59,14 @@ bool ComputeRandomUniformArray(Model* model, RandomUniformOperator* op) {
   return true;
 }
 
-bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantRandomUniform::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* base_op = it->get();
   if (base_op->type != OperatorType::kRandomUniform) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<RandomUniformOperator*>(base_op);
 
@@ -73,12 +76,12 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if ((op->seed == 0) && (op->seed2 == 0)) {
@@ -86,13 +89,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
                  << "\" is truly random (using /dev/random system entropy). "
                     "Therefore, cannot resolve as constant. Set \"seed\" or "
                     "\"seed2\" attr non-zero to fix this";
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!ComputeRandomUniformArray<ArrayDataType::kFloat>(model, op)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     // For future support of double or half.
@@ -110,7 +113,8 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
index 1a0ba9e2bc..069d4dafaa 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc
@@ -19,11 +19,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantRange::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* base_op = it->get();
   if (base_op->type != OperatorType::kRange) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* op = static_cast<RangeOperator*>(base_op);
 
@@ -31,23 +34,23 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& limit_array = model->GetArray(op->inputs[1]);
   if (!limit_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& delta_array = model->GetArray(op->inputs[2]);
   if (!delta_array.has_shape()) {
     // Yield until all input dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   for (const auto& input : op->inputs) {
     if (!IsConstantParameterArray(*model, input)) {
       // yield if any input is mutable
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -55,7 +58,7 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1)
@@ -101,7 +104,8 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   // Delete the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
index a6f665b5f0..fccecef600 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc
@@ -22,11 +22,14 @@ limitations under the License.
 namespace toco {
 
 // Resolves a constant reshape operation by copying the buffer.
-bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantReshape::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TensorFlowReshapeOperator*>(base_op);
 
@@ -36,17 +39,17 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const Array& input_array = model->GetArray(op->inputs[0]);
@@ -54,7 +57,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
     AddMessageF("Constant reshape is non-trivial (%s -> %s)",
                 ShapeToString(input_array.shape()),
                 ShapeToString(output_array.shape()));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
@@ -95,7 +98,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
     default:
       LOG(FATAL) << "Unsupported data type: "
                  << ArrayDataTypeName(input_array.data_type);
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   AddMessageF("Resolving constant reshape of %s", LogName(*op));
@@ -112,7 +115,8 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
index e880a3f44d..ab1e0bd7a0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc
@@ -27,11 +27,14 @@ namespace toco {
 // This implementation is looking strictly for all-or-nothing on the select
 // condition. It's possible to enhance this by looking per-element and possibly
 // producing a Mul op.
-bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantSelect::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kSelect) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const SelectOperator*>(base_op);
 
@@ -40,23 +43,23 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require the cond input to be constant.
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& cond_array = model->GetArray(op->inputs[0]);
   CHECK(cond_array.data_type == ArrayDataType::kBool)
       << "Only bool conditions are supported";
   const auto& cond_data = cond_array.GetBuffer<ArrayDataType::kBool>().data;
   if (cond_data.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Check if the condition is the same for all elements.
@@ -67,12 +70,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
           "Cannot resolve %s as constant; cond_array has differing "
           "per-element values",
           LogName(*op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   // Pass-through the selected input.
-  return RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2);
+  *modified =
+      RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2);
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
index 8a0e3e8995..a1756a8207 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc
@@ -19,29 +19,32 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantShapeOrRank::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* op = it->get();
   if (!(op->type == OperatorType::kShape || op->type == OperatorType::kRank)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the input array's shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Compute the output
@@ -65,7 +68,8 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
index b35c3e19c4..869dfae98e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc
@@ -86,11 +86,14 @@ bool Slice(SliceOperator const& op, Array const& input_array,
 
 }  // namespace
 
-bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantSlice::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
@@ -99,49 +102,49 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->begin.empty() || op->size.empty()) {
     // Attributes have not resolved yet.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
   switch (output_array.data_type) {
     case ArrayDataType::kFloat:
       if (!Slice<ArrayDataType::kFloat>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kUint8:
       if (!Slice<ArrayDataType::kUint8>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt32:
       if (!Slice<ArrayDataType::kInt32>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     case ArrayDataType::kInt64:
       if (!Slice<ArrayDataType::kInt64>(*op, input_array, &output_array)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       break;
     default:
@@ -159,7 +162,8 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   // Erase the operator
   model->operators.erase(it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
index 8853ed87e6..99c5a64662 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc
@@ -103,11 +103,14 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array,
 
 }  // anonymous namespace
 
-bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantStridedSlice::Run(Model* model,
+                                                      std::size_t op_index,
+                                                      bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kStridedSlice) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const StridedSliceOperator* op =
@@ -117,28 +120,28 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (op->start_indices.empty() || op->stop_indices.empty() ||
       op->strides.empty()) {
     // Attributes have not resolved yet.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // Yield until the value shape has been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!IsConstantParameterArray(*model, op->inputs[0])) {
     // Yield until the value is constant.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(!output_array.buffer);
@@ -164,7 +167,8 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
 
   DeleteOpAndArraysIfUnused(model, it->get());
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
index 5cfa1a5582..c5e93c9bad 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc
@@ -97,11 +97,14 @@ inline void Tile(const Array& input_array, const Array& multiples_array,
 }  // namespace
 
 // Resolves a constant Tile operation.
-bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantTile::Run(Model* model,
+                                              std::size_t op_index,
+                                              bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kTile) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
@@ -110,17 +113,17 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
   const Array& multiples_array = model->GetArray(op->inputs[1]);
@@ -159,7 +162,8 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
index fe15dfa06f..b759c4d6dd 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc
@@ -101,11 +101,14 @@ void Transpose(Model* model, const Array& input_array,
 
 }  // namespace
 
-bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantTranspose::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   const auto* base_op = it->get();
   if (base_op->type != OperatorType::kTranspose) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* op = static_cast<const TransposeOperator*>(base_op);
 
@@ -114,17 +117,17 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (!output_array.has_shape()) {
     // Yield until the output shape has been set by PropagateFixedShapes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(op->inputs[0]);
 
@@ -132,7 +135,7 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   if (op->perm.empty()) {
     // Yield until perm has been populated by ResolveTransposeAttributes.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We currently only support 1-4 dimensions.
@@ -174,7 +177,8 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   // Erase the operator.
   model->operators.erase(it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
index 5364eebbc9..3034c1b1eb 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc
@@ -112,7 +112,10 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
   return true;
 }
 
-bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveConstantUnaryOperator::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
   // Test for unary ops of types that we know how to resolve.
@@ -133,28 +136,28 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     case OperatorType::kRelu:
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // Check if the input is a constant parameter.
   if (!IsConstantParameterArray(*model, unary_op->inputs[0])) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // if the unary op involves a tensor required by a rnn state, ignore it
   for (const auto& rnn_state : model->flags.rnn_states()) {
     if (unary_op->inputs[0] == rnn_state.back_edge_source_array()) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (unary_op->inputs[0] == rnn_state.state_array()) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
   auto& output_array = model->GetArray(unary_op->outputs[0]);
   if (!output_array.has_shape()) {
     // Yield until the output array dims have been resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // At the moment we don't want to care about fused activation functions.
@@ -166,7 +169,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         "Not resolving constant %s "
         " because it has a fused activation function",
         LogName(*unary_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The min-max is only copied for ops that copy data without arithmetic.
@@ -187,7 +190,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
           "Not resolving constant %s because we currently only support casting "
           "to float",
           LogName(*unary_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (cast_op->src_data_type != input_array.buffer->type) {
       AddMessageF(
@@ -197,7 +200,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     }
   } else {
     if (input_array.buffer->type != ArrayDataType::kFloat) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
     input_float_data = &(input_array.GetBuffer<ArrayDataType::kFloat>().data);
   }
@@ -239,7 +242,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
     CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs";
     if (!IsConstantParameterArray(*model, unary_op->inputs[1])) {
       AddMessageF("Axis input is non-constant");
-      return false;
+      return ::tensorflow::Status::OK();
     }
     auto& axis_array = model->GetArray(unary_op->inputs[1]);
     CHECK(axis_array.data_type == ArrayDataType::kInt32);
@@ -336,7 +339,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
         default:
           LOG(FATAL) << "Unsupported activation function "
                      << LogName(*unary_op);
-          return false;
+          return ::tensorflow::Status::OK();
       }
       output_float_data[i] = new_value;
     }
@@ -351,7 +354,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   AddMessageF("Resolved constant %s to the equivalent constant array",
               LogName(*unary_op));
   model->operators.erase(unary_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
index 0dda1fd0b3..eed971c1d5 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc
@@ -25,17 +25,20 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveFakeQuantArgsFromVars::Run(Model* model,
+                                                       std::size_t op_index,
+                                                       bool* modified) {
+  *modified = false;
   const auto fakequant_it = model->operators.begin() + op_index;
   auto* fakequant_base_op = fakequant_it->get();
   if (fakequant_base_op->type != OperatorType::kFakeQuant) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* fakequant_op = static_cast<FakeQuantOperator*>(fakequant_base_op);
 
   if (fakequant_op->minmax) {
     // Already resolved.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(fakequant_op->inputs.size(), 3);
@@ -43,7 +46,7 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
   // resolved to constant arrays.
   for (int i = 1; i <= 2; i++) {
     if (!IsConstantParameterArray(*model, fakequant_op->inputs[i])) {
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
 
@@ -74,7 +77,8 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) {
     DeleteArrayIfUsedOnce(fakequant_op->inputs[i], model);
   }
   fakequant_op->inputs.resize(1);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
index ce825c91af..69209b8dec 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc
@@ -24,20 +24,25 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveGatherAttributes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto* gather_op = model->operators[op_index].get();
-  if (gather_op->type != OperatorType::kGather) return false;
+  if (gather_op->type != OperatorType::kGather)
+    return ::tensorflow::Status::OK();
   auto* op = static_cast<GatherOperator*>(gather_op);
 
   if (op->axis) {
     // Attributes already resolved
-    return false;
+    return ::tensorflow::Status::OK();
   }
-  if (op->inputs.size() != 3) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
+  if (op->inputs.size() != 3) return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
 
   const auto& indices_array = model->GetArray(op->inputs[2]);
-  if (!indices_array.has_shape()) return false;
+  if (!indices_array.has_shape()) return ::tensorflow::Status::OK();
   const auto& axis_data = indices_array.GetBuffer<ArrayDataType::kInt32>().data;
   CHECK_EQ(axis_data.size(), 1)
       << "Multidimensional gather not supported on " << LogName(*op);
@@ -47,7 +52,8 @@ bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) {
   DeleteArrayIfUsedOnce(op->inputs[2], model);
   op->inputs.resize(2);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
index b2b2ea151b..ac94f45321 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc
@@ -51,27 +51,30 @@ void FillArrayWithZeros(Array* array) {
 // Removes a multiplication by array of constant zeros by making the output
 // array an array of constant zeros and removing the input arrays if they are no
 // longer needed.
-bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveMultiplyByZero::Run(Model* model,
+                                                std::size_t op_index,
+                                                bool* modified) {
+  *modified = false;
   const auto mul_it = model->operators.begin() + op_index;
   auto* mul_op = mul_it->get();
   if (mul_op->type != OperatorType::kMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto& output_array_name = mul_op->outputs[0];
   auto& output_array = model->GetArray(output_array_name);
 
   if (!IsDiscardableArray(*model, output_array_name)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Yield if the output shape is not known yet.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // This transformation only handles the case where one operand is all 0's and
@@ -83,12 +86,12 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   };
   if (!is_input_constant[0] && !is_input_constant[1]) {
     // Neither input is constant, so nothing we can resolve here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   if (is_input_constant[0] && is_input_constant[1]) {
     // Both inputs are constants. That's a job for constants propagation, not
     // for us to handle here.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int index_of_constant_input = is_input_constant[0] ? 0 : 1;
   const int index_of_variable_input = is_input_constant[0] ? 1 : 0;
@@ -105,7 +108,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kFloat>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kFloat>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kFloat>(&output_array);
     } break;
@@ -114,7 +117,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kUint8>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kUint8>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kUint8>(&output_array);
     } break;
@@ -123,7 +126,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kInt32>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kInt32>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kInt32>(&output_array);
     } break;
@@ -132,14 +135,14 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
           constant_input_array.GetBuffer<ArrayDataType::kInt64>().data;
       if (!AreAllBufferElementsZero<DataType<ArrayDataType::kInt64>>(
               constant_input_data)) {
-        return false;
+        return ::tensorflow::Status::OK();
       }
       FillArrayWithZeros<ArrayDataType::kInt64>(&output_array);
     } break;
     default:
       AddMessageF(
           "Cannot resolve multiply by 0 because of unsupported data type\n");
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // Erase input arrays to the multiply if no longer used
@@ -149,7 +152,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) {
   // Erase the multiply operator.
   model->operators.erase(mul_it);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
index 8a8e723cf7..adc87753bc 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc
@@ -24,19 +24,23 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolvePadAttributes::Run(Model* model,
+                                               std::size_t op_index,
+                                               bool* modified) {
+  *modified = false;
   const auto pad_it = model->operators.begin() + op_index;
   auto* pad_op = pad_it->get();
-  if (pad_op->type != OperatorType::kPad) return false;
+  if (pad_op->type != OperatorType::kPad) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<PadOperator*>(pad_op);
-  if (!op->left_padding.empty()) return false;
+  if (!op->left_padding.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 2);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   const auto& array = model->GetArray(op->inputs[1]);
-  if (!array.has_shape()) return false;
+  if (!array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& dims = array.shape().dims();
   CHECK_EQ(dims.size(), 2);
@@ -50,6 +54,7 @@ bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) {
 
   // TODO(dkalenichenko): Delete the extra input?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
index ebb023e342..1f0f17a37a 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc
@@ -24,19 +24,23 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolvePadV2Attributes::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto pad_it = model->operators.begin() + op_index;
   auto* pad_op = pad_it->get();
-  if (pad_op->type != OperatorType::kPadV2) return false;
+  if (pad_op->type != OperatorType::kPadV2) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<PadV2Operator*>(pad_op);
-  if (!op->left_padding.empty()) return false;
+  if (!op->left_padding.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 3);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   const auto& array = model->GetArray(op->inputs[1]);
-  if (!array.has_shape()) return false;
+  if (!array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& dims = array.shape().dims();
   CHECK_EQ(dims.size(), 2);
@@ -50,6 +54,7 @@ bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) {
 
   // TODO(dkalenichenko): Delete the extra input?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
index 73198ac7c0..c3246ab90f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc
@@ -39,23 +39,37 @@ bool ResolveAttributes(Model* model, T* op) {
   return true;
 }
 
-bool ResolveReduceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReduceAttributes::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   Operator* op = model->operators[op_index].get();
   switch (op->type) {
     case OperatorType::kMean:
-      return ResolveAttributes(model, static_cast<MeanOperator*>(op));
+      *modified = ResolveAttributes(model, static_cast<MeanOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kSum:
-      return ResolveAttributes(model, static_cast<TensorFlowSumOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowSumOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceProd:
-      return ResolveAttributes(model, static_cast<TensorFlowProdOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowProdOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceMin:
-      return ResolveAttributes(model, static_cast<TensorFlowMinOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMinOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kReduceMax:
-      return ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      return ::tensorflow::Status::OK();
     case OperatorType::kAny:
-      return ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      *modified =
+          ResolveAttributes(model, static_cast<TensorFlowMaxOperator*>(op));
+      return ::tensorflow::Status::OK();
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 }
 
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
index 8e150db6fa..ee5c4810e6 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc
@@ -78,11 +78,13 @@ void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order,
   }
 }
 
-bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReorderAxes::Run(Model* model, std::size_t op_index,
+                                             bool* modified) {
+  *modified = false;
   auto it = model->operators.begin() + op_index;
   auto* op = it->get();
   if (op->type != OperatorType::kReorderAxes) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* reorder_op = static_cast<ReorderAxesOperator*>(op);
 
@@ -93,11 +95,11 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   auto& input_array = model->GetArray(input_array_name);
   auto& output_array = model->GetArray(output_array_name);
   if (!input_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Yield until output dims have been resolved.
   if (!output_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Reorder the input array dims and buffer data
   if (input_array.buffer->type == ArrayDataType::kFloat) {
@@ -120,7 +122,8 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) {
   DeleteOpAndArraysIfUnused(model, op);
   RenameArray(model, output_array_name, input_array_name);
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
index b615c9a545..7b7a59264f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc
@@ -25,25 +25,29 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveReshapeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveReshapeAttributes::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   const auto reshape_it = model->operators.begin() + op_index;
   auto* reshape_op = reshape_it->get();
   if (reshape_op->type != OperatorType::kReshape) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto* op = static_cast<TensorFlowReshapeOperator*>(reshape_op);
 
-  if (!op->shape.empty()) return false;
+  if (!op->shape.empty()) return ::tensorflow::Status::OK();
 
   if (IsConstantParameterArray(*model, reshape_op->inputs[1])) {
     const auto& constant_input_array = model->GetArray(reshape_op->inputs[1]);
     op->shape = constant_input_array.GetBuffer<ArrayDataType::kInt32>().data;
   }
 
-  if (op->shape.empty()) return false;
+  if (op->shape.empty()) return ::tensorflow::Status::OK();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
index e760d08e5a..5a838168de 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc
@@ -24,29 +24,35 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSliceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSliceAttributes::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto slice_it = model->operators.begin() + op_index;
   auto* slice_op = slice_it->get();
-  if (slice_op->type != OperatorType::kSlice) return false;
+  if (slice_op->type != OperatorType::kSlice) return ::tensorflow::Status::OK();
 
   auto* op = static_cast<SliceOperator*>(slice_op);
-  if (!op->begin.empty()) return false;
+  if (!op->begin.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 3);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
 
   const auto& begin_array = model->GetArray(op->inputs[1]);
-  if (!begin_array.has_shape()) return false;
+  if (!begin_array.has_shape()) return ::tensorflow::Status::OK();
 
   const auto& size_array = model->GetArray(op->inputs[2]);
-  if (!size_array.has_shape()) return false;
+  if (!size_array.has_shape()) return ::tensorflow::Status::OK();
 
   op->begin = begin_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->size = size_array.GetBuffer<ArrayDataType::kInt32>().data;
 
   // TODO(dkalenichenko): Delete the extra inputs?
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
index fab50bec1f..3804145c4f 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc
@@ -24,16 +24,20 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSpaceToBatchNDAttributes::Run(Model* model,
+                                                          std::size_t op_index,
+                                                          bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kSpaceToBatchND) return false;
+  if (op_it->get()->type != OperatorType::kSpaceToBatchND)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<SpaceToBatchNDOperator*>(op_it->get());
 
   // The attributes are resolved only when the 3 attributes (block_shape,
   // before_paddings, after_paddings) are all constant.
   if (!op->block_shape.empty()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   const int block_shape_index = 1;
@@ -42,16 +46,16 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 3);
   if (!IsConstantParameterArray(*model, op->inputs[block_shape_index]) ||
       !IsConstantParameterArray(*model, op->inputs[paddings_index]))
-    return false;
+    return ::tensorflow::Status::OK();
 
   // Handle paddings.
   const auto& paddings_array = model->GetArray(op->inputs[paddings_index]);
-  if (!paddings_array.has_shape()) return false;
+  if (!paddings_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& paddings_dims = paddings_array.shape().dims();
   if (paddings_dims.size() != 2) {
     // Code only handles padding of 2 dimensions. Perhaps another transformation
     // will delete this op.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const std::vector<int>& paddings_buffer =
       paddings_array.GetBuffer<ArrayDataType::kInt32>().data;
@@ -63,7 +67,7 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
   // Handle block_shape.
   const auto& block_shape_array =
       model->GetArray(op->inputs[block_shape_index]);
-  if (!block_shape_array.has_shape()) return false;
+  if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK();
   const std::vector<int>& block_shape_dims = block_shape_array.shape().dims();
   CHECK_EQ(block_shape_dims.size(), 1);
   const std::vector<int>& block_shape_buffer =
@@ -72,7 +76,8 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) {
     op->block_shape.push_back(block_shape_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
index e8bb85704e..c601b0774e 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc
@@ -25,10 +25,13 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveSqueezeAttributes::Run(Model* model,
+                                                   std::size_t op_index,
+                                                   bool* modified) {
+  *modified = false;
   auto* squeeze_op = model->operators[op_index].get();
   if (squeeze_op->type != OperatorType::kSqueeze) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   DCHECK_EQ(squeeze_op->inputs.size(), 1);
   DCHECK_EQ(squeeze_op->outputs.size(), 1);
@@ -42,10 +45,11 @@ bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) {
           "Reshape op",
           LogName(*squeeze_op));
 
-      return RemoveTrivialPassthroughOp(this, model, op_index);
+      *modified = RemoveTrivialPassthroughOp(this, model, op_index);
+      return ::tensorflow::Status::OK();
     }
   }
-  return false;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
index 65132d7d1e..f54f5b42a1 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc
@@ -37,40 +37,47 @@ int PadAttributeArray(Array* attribute_array, std::vector<int> pad_values,
   return mask;
 }
 
-bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveStridedSliceAttributes::Run(Model* model,
+                                                        std::size_t op_index,
+                                                        bool* modified) {
+  *modified = false;
   const auto slice_it = model->operators.begin() + op_index;
   auto* slice_op = slice_it->get();
-  if (slice_op->type != OperatorType::kStridedSlice) return false;
+  if (slice_op->type != OperatorType::kStridedSlice)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<StridedSliceOperator*>(slice_op);
   if (!op->start_indices.empty()) {
     // We have already resolved these attributes
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(op->inputs.size(), 4);
   const auto& input_array = model->GetArray(op->inputs[0]);
   if (!input_array.has_shape()) {
     // We require the dimensionality of the input to pad the indices
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& start_array = model->GetArray(op->inputs[1]);
-  if (!start_array.has_shape()) return false;
+  if (!start_array.has_shape()) return ::tensorflow::Status::OK();
   if (toco::RequiredBufferSizeForShape(start_array.shape()) > 4) {
     // Only 1-4D arrays are supported for now.
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   auto& stop_array = model->GetArray(op->inputs[2]);
-  if (!stop_array.has_shape()) return false;
+  if (!stop_array.has_shape()) return ::tensorflow::Status::OK();
 
   auto& stride_array = model->GetArray(op->inputs[3]);
-  if (!stride_array.has_shape()) return false;
+  if (!stride_array.has_shape()) return ::tensorflow::Status::OK();
 
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[2])) return false;
-  if (!IsConstantParameterArray(*model, op->inputs[3])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[2]))
+    return ::tensorflow::Status::OK();
+  if (!IsConstantParameterArray(*model, op->inputs[3]))
+    return ::tensorflow::Status::OK();
 
   int num_input_axes = input_array.shape().dimensions_count();
   int start_indices_size = start_array.shape().dims(0);
@@ -112,6 +119,7 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) {
   op->stop_indices = stop_array.GetBuffer<ArrayDataType::kInt32>().data;
   op->strides = stride_array.GetBuffer<ArrayDataType::kInt32>().data;
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
index fa5ee89933..4927ccd95d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc
@@ -25,12 +25,15 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowConcat::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto concat_it = model->operators.begin() + op_index;
   const auto* tf_concat_op = concat_it->get();
   if (tf_concat_op->type != OperatorType::kConcat &&
       tf_concat_op->type != OperatorType::kConcatV2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_GE(tf_concat_op->inputs.size(), 2);
@@ -54,7 +57,7 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   if (!axis_array.buffer) {
     AddMessageF("Waiting for the axis of %s to be resolved to a constant",
                 LogName(*tf_concat_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK(axis_array.data_type == ArrayDataType::kInt32);
@@ -79,7 +82,8 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) {
   }
   // Remove the TensorFlowConcat op
   model->operators.erase(concat_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
index 65346c4fe4..da039da546 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc
@@ -55,10 +55,13 @@ TransposeOperator* FindTransposeOpWithInput(const Model& model,
 
 }  // namespace
 
-bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowMatMul::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   auto matmul_it = model->operators.begin() + op_index;
   if (matmul_it->get()->type != OperatorType::kMatMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* matmul_op =
       static_cast<const TensorFlowMatMulOperator*>(matmul_it->get());
@@ -73,7 +76,7 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
         "Not replacing %s by a FullyConnected operator, because it has "
         "the transpose_a attribute",
         LogName(*matmul_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Reorder the axes on the second input. TensorFlow uses row-major ordering
@@ -198,7 +201,8 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) {
 
   // erase the MatMul operator
   model->operators.erase(matmul_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
index 4edffe3d48..9beea3e937 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc
@@ -24,11 +24,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowMerge::Run(Model* model,
+                                                 std::size_t op_index,
+                                                 bool* modified) {
+  *modified = false;
   const auto merge_it = model->operators.begin() + op_index;
   const auto* merge_op = merge_it->get();
   if (merge_op->type != OperatorType::kMerge) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // We need to yield until this Merge node has only 1 input, which will mean
@@ -37,7 +40,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
   // non-selected inputs, so that at some point there will be only 1 input left.
   if (merge_op->inputs.size() > 1) {
     AddMessageF("Waiting for %s to be resolved", LogName(*merge_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // Now that the merge node has 1 input exactly, it is the same as an Identity
@@ -57,7 +60,8 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) {
   AddMessageF("Removing already-resolved %s", LogName(*merge_op));
   model->EraseArray(merge_op->outputs[0]);
   model->operators.erase(merge_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
index 8bef440afd..e215981b42 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc
@@ -24,11 +24,14 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTensorFlowSwitch::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
   const auto switch_it = model->operators.begin() + op_index;
   const auto* switch_op = switch_it->get();
   if (switch_op->type != OperatorType::kSwitch) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   CHECK_EQ(switch_op->inputs.size(), 2);
@@ -40,7 +43,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
     AddMessageF(
         "Waiting for the boolean predicate of %s to be resolved to a constant",
         LogName(*switch_op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   // The predicate should be boolean, and should consist of a single value.
@@ -119,7 +122,8 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) {
   // Remove the switch node itself.
   AddMessageF("Removing already-resolved %s", LogName(*switch_op));
   model->operators.erase(switch_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
index a657ee00af..aa7945391c 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc
@@ -24,19 +24,24 @@ limitations under the License.
 
 namespace toco {
 
-bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ResolveTransposeAttributes::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   const auto op_it = model->operators.begin() + op_index;
-  if (op_it->get()->type != OperatorType::kTranspose) return false;
+  if (op_it->get()->type != OperatorType::kTranspose)
+    return ::tensorflow::Status::OK();
 
   auto* op = static_cast<TransposeOperator*>(op_it->get());
-  if (!op->perm.empty()) return false;
+  if (!op->perm.empty()) return ::tensorflow::Status::OK();
 
   CHECK_EQ(op->inputs.size(), 2);
-  if (!IsConstantParameterArray(*model, op->inputs[1])) return false;
+  if (!IsConstantParameterArray(*model, op->inputs[1]))
+    return ::tensorflow::Status::OK();
 
   // Handling perm.
   const auto& perm_array = model->GetArray(op->inputs[1]);
-  if (!perm_array.has_shape()) return false;
+  if (!perm_array.has_shape()) return ::tensorflow::Status::OK();
 
   const std::vector<int>& perm_dims = perm_array.shape().dims();
   CHECK_EQ(perm_dims.size(), 1);
@@ -47,7 +52,8 @@ bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) {
     op->perm.push_back(perm_buffer[i]);
   }
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
index 22c258cec5..e9f24a29ab 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc
@@ -24,15 +24,17 @@ limitations under the License.
 
 namespace toco {
 
-bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status ShuffleFCWeights::Run(Model* model, std::size_t op_index,
+                                           bool* modified) {
+  *modified = false;
   Operator* op = model->operators[op_index].get();
   if (op->type != OperatorType::kFullyConnected) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   FullyConnectedOperator* fc_op = static_cast<FullyConnectedOperator*>(op);
   // Exit if this FC op already has shuffled weights
   if (fc_op->weights_format != FullyConnectedWeightsFormat::kDefault) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const Array& input_array = model->GetArray(fc_op->inputs[0]);
   const string& weights_name = fc_op->inputs[1];
@@ -46,11 +48,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
       output_array.data_type != ArrayDataType::kInt16 ||
       !input_array.quantization_params || !weights_array.quantization_params ||
       !output_array.quantization_params) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the shapes aren't known
   if (!input_array.has_shape() || !weights_array.has_shape()) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if, based on the known shapes, this FC op is not a GEMV.
   // The shuffling of FC weights is only useful to enable fast GEMV paths.
@@ -64,7 +66,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
           "the input shape is not 1D or 2D (possibly with additional inner "
           "dimensions of size 1)",
           LogName(*op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
   }
   if (input_shape.dims(0) != 1 && input_shape.dims(0) != 4) {
@@ -73,7 +75,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "the input shape's leading dimension, i.e. the 'batch size', is not "
         "equal to 1 or 4",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights shape isn't an integral multiple of the shuffled
   // block shape, 4x16. We don't want to have to write code dealing with
@@ -88,7 +90,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
   // two.
   const Shape& weights_shape = weights_array.shape();
   if (weights_shape.dimensions_count() != 2) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const int rows = weights_shape.dims(0);
   const int cols = weights_shape.dims(1);
@@ -97,11 +99,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "Not applying experimental shuffling to the weights of %s because its "
         "shape isn't a multiple of the shuffling block shape, 4x16",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights aren't already a constant array.
   if (!weights_array.buffer) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Exit if the weights are used by more than one op.
   if (CountOpsWithInput(*model, weights_name) != 1) {
@@ -109,7 +111,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
         "Not applying experimental shuffling to the weights of %s because that "
         "array is consumed by other operators",
         LogName(*op));
-    return false;
+    return ::tensorflow::Status::OK();
   }
   // Compute the shuffled weights
   auto& weights_data =
@@ -152,7 +154,8 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) {
   shuffled_input_workspace_array.GetOrCreateQuantizationParams() =
       input_array.GetQuantizationParams();
 
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
index 66cfed4ac2..e2a6f12481 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc
@@ -166,7 +166,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
@@ -185,7 +188,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
@@ -204,7 +210,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) {
   GraphTransformationsSet graph_transformation_set;
   graph_transformation_set.Add(new toco::ResolveConstantConcatenation);
   EXPECT_THAT(model.GetArrayMap().size(), 5);
-  (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0);
+  bool modified;
+  ASSERT_TRUE((*graph_transformation_set.begin())
+                  ->Run(&model, /*op_index=*/0, &modified)
+                  .ok());
   EXPECT_THAT(model.GetArrayMap().size(), 1);
 
   auto& concatenated_array = (*model.GetArrayMap().begin()).second;
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
index a53abc9941..57d85a0435 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc
@@ -50,7 +50,8 @@ void RunResolveSum(const std::vector<float>& input,
   sum_op->inputs = {"input0", "input1"};
   sum_op->outputs = {"output"};
   model.operators.push_back(std::move(sum_op));
-  ResolveConstantUnaryOperator().Run(&model, 0);
+  bool modified;
+  ASSERT_TRUE(ResolveConstantUnaryOperator().Run(&model, 0, &modified).ok());
   EXPECT_EQ(model.GetArray("output").GetBuffer<ArrayDataType::kFloat>().data,
             expected_output);
   EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
index 69bad2fa89..4ada5c3fd0 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc
@@ -25,13 +25,16 @@ limitations under the License.
 
 namespace toco {
 
-bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnfuseActivationFunctions::Run(Model* model,
+                                                    std::size_t op_index,
+                                                    bool* modified) {
+  *modified = false;
   const auto it = model->operators.begin() + op_index;
   auto* op = it->get();
 
   // If a conv operation has an im2col array, yield: it should be dropped first.
   if ((op->type == OperatorType::kConv) && (op->outputs.size() == 2)) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
 
   Operator* ac_op = nullptr;
@@ -46,7 +49,7 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
       ac_op = new Relu1Operator;
       break;
     default:
-      return false;
+      return ::tensorflow::Status::OK();
   }
 
   // At this point we know that the op has a fused activation function. At the
@@ -74,7 +77,8 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) {
 
   ac_op->inputs = {tmp_array_name};
   op->outputs = {tmp_array_name};
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
index dd9e26e68b..e19527968d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc
@@ -22,7 +22,10 @@ limitations under the License.
 
 namespace toco {
 
-bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnpartitionEmbeddingLookup::Run(Model* model,
+                                                     std::size_t op_index,
+                                                     bool* modified) {
+  *modified = false;
   // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather.
   // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup
   // This transform attempts to identify the len(params) > 1 case and collapse
@@ -47,7 +50,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   // First look for the final DynamicStitch.
   auto op_it = model->operators.begin() + op_index;
   if (op_it->get()->type != OperatorType::kDynamicStitch) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto* stitch_op = static_cast<DynamicStitchOperator*>(op_it->get());
 
@@ -72,7 +75,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because indices input %s into "
           "%s is unexpected",
           LogName(*op), LogName(*stitch_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (!indices_partition_op) {
       indices_partition_op = static_cast<DynamicPartitionOperator*>(op);
@@ -83,7 +86,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
             "Skipping because indices input %s into "
             "%s is from a different source op than others",
             LogName(*op), LogName(*stitch_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -92,12 +95,12 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   // The data for the indices must be a constant range of the array shape.
   if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) {
     AddMessageF("Skipping because indices partition data is non-constant");
-    return false;
+    return ::tensorflow::Status::OK();
   }
   auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]);
   if (indices_data_array.data_type == ArrayDataType::kNone) {
     // Yield until data types are propagated.
-    return false;
+    return ::tensorflow::Status::OK();
   }
   CHECK(indices_data_array.data_type == ArrayDataType::kInt32)
       << "Indices partition inputs must be int32";
@@ -117,7 +120,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because data input %s into %s "
           "is unexpected",
           LogName(*op), LogName(*stitch_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     gather_ops.push_back(static_cast<GatherOperator*>(op));
   }
@@ -132,7 +135,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
           "Skipping because data input %s into "
           "%s is unexpected",
           LogName(*op), LogName(*gather_op));
-      return false;
+      return ::tensorflow::Status::OK();
     }
     if (!data_partition_op) {
       data_partition_op = static_cast<DynamicPartitionOperator*>(op);
@@ -143,7 +146,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
             "Skipping because data input %s into "
             "%s is from a different source op than others",
             LogName(*op), LogName(*gather_op));
-        return false;
+        return ::tensorflow::Status::OK();
       }
     }
   }
@@ -236,7 +239,8 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) {
   DeleteOpAndArraysIfUnused(model, indices_partition_op);
   DeleteOpAndArraysIfUnused(model, data_partition_op);
   DeleteOpAndArraysIfUnused(model, stitch_op);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
index fedf4441e2..5ff39aa313 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc
@@ -36,10 +36,12 @@ namespace toco {
 //    slice_c = tf.matmul(slice_a, slice_b)
 //    result_slices[bat] = slice_c
 //  result = tf.stack(result_slices)
-bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
+::tensorflow::Status UnrollBatchMatMul::Run(Model* model, std::size_t op_index,
+                                            bool* modified) {
+  *modified = false;
   auto batch_op_it = model->operators.begin() + op_index;
   if (batch_op_it->get()->type != OperatorType::kBatchMatMul) {
-    return false;
+    return ::tensorflow::Status::OK();
   }
   const auto* batch_op =
       static_cast<const BatchMatMulOperator*>(batch_op_it->get());
@@ -47,7 +49,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
   // We must have the shape of at least one input to know our batch size.
   const auto& input_array_a = model->GetArray(batch_op->inputs[0]);
   const auto& input_array_b = model->GetArray(batch_op->inputs[1]);
-  if (!input_array_a.has_shape() || !input_array_b.has_shape()) return false;
+  if (!input_array_a.has_shape() || !input_array_b.has_shape())
+    return ::tensorflow::Status::OK();
 
   // We only support the rank 3 case. If you are batching on rank > 3 you'll
   // have to figure that out.
@@ -66,7 +69,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
     batch_op_it = matmul_op_it + 1;
     CHECK_EQ(batch_op_it->get(), batch_op);
     model->operators.erase(batch_op_it);
-    return true;
+    *modified = true;
+    return ::tensorflow::Status::OK();
   }
   CHECK_EQ(input_array_a.shape().dimensions_count(), 3)
       << "Input arrays must have rank 3";
@@ -167,7 +171,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) {
   CHECK(batch_op_it != model->operators.end());
   CHECK(batch_op_it->get() == batch_op);
   model->operators.erase(batch_op_it);
-  return true;
+  *modified = true;
+  return ::tensorflow::Status::OK();
 }
 
 }  // namespace toco
-- 
GitLab


From 072fcb995a3fd658ee2461b59b159498c710513d Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Tue, 9 Oct 2018 11:54:20 -0700
Subject: [PATCH 0617/1085] [tf.data] NUMA-aware MapAndBatch dataset.

PiperOrigin-RevId: 216395709
---
 ...f_ExperimentalNumaMapAndBatchDataset.pbtxt |   58 +
 tensorflow/core/framework/model.h             |    2 +-
 .../core/grappler/optimizers/data/BUILD       |   35 +
 .../optimizers/data/graph_test_utils.cc       |   16 +
 .../optimizers/data/graph_test_utils.h        |    6 +
 .../map_and_batch_numa_aware_replacement.cc   |   62 +
 .../map_and_batch_numa_aware_replacement.h    |   48 +
 ...p_and_batch_numa_aware_replacement_test.cc |  112 ++
 .../core/kernels/data/experimental/BUILD      |   17 +
 .../numa_map_and_batch_dataset_op.cc          | 1135 +++++++++++++++++
 .../kernels/data/map_and_batch_dataset_op.cc  |   38 +-
 .../core/ops/experimental_dataset_ops.cc      |   26 +
 .../kernel_tests/map_and_batch_test.py        |  280 +++-
 .../kernel_tests/optimization/BUILD           |    2 +
 .../optimization/model_dataset_op_test.py     |   11 +-
 .../optimization/optimize_dataset_op_test.py  |   16 +
 .../kernel_tests/serialization/BUILD          |   15 +
 ...ap_and_batch_dataset_serialization_test.py |   95 ++
 tensorflow/python/data/experimental/ops/BUILD |    1 +
 tensorflow/python/data/ops/dataset_ops.py     |    7 +-
 .../golden/v1/tensorflow.data.-options.pbtxt  |    4 +
 .../golden/v2/tensorflow.data.-options.pbtxt  |    4 +
 22 files changed, 1909 insertions(+), 81 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
 create mode 100644 tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py

diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
new file mode 100644
index 0000000000..243922d969
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
@@ -0,0 +1,58 @@
+op {
+  graph_op_name: "ExperimentalNumaMapAndBatchDataset"
+  visibility: HIDDEN
+  in_arg {
+    name: "input_dataset"
+    description: <<END
+A variant tensor representing the input dataset.
+END
+  }
+  in_arg {
+    name: "other_arguments"
+    description: <<END
+A list of tensors, typically values that were captured when building a closure
+for `f`.
+END
+  }
+  in_arg {
+    name: "batch_size"
+    description: <<END
+A scalar representing the number of elements to accumulate in a
+batch. It determines the number of concurrent invocations of `f` that process
+elements from `input_dataset` in parallel.
+END
+  }
+  in_arg {
+    name: "num_parallel_calls"
+    description: <<END
+A scalar representing the maximum number of parallel invocations of the `map_fn`
+function. Applying the `map_fn` on consecutive input elements in parallel has
+the potential to improve input pipeline throughput.
+END
+  }
+  in_arg {
+    name: "drop_remainder"
+    description: <<END
+A scalar representing whether the last batch should be dropped in case its size
+is smaller than desired.
+END
+  }
+  attr {
+    name: "f"
+    description: <<END
+A function to apply to the outputs of `input_dataset`.
+END
+  }
+  summary: "Creates a dataset that fuses mapping with batching."
+  description: <<END
+Creates a dataset that applies `f` to the outputs of `input_dataset` and then
+batches `batch_size` of them.
+
+Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
+to `batch_size * num_parallel_batches` copies of `f` in parallel.
+
+Unlike "MapAndBatchDatasetV2", this dataset uses a NUMA-aware thread scheduling
+policy. Because it uses the single-threaded executor, it only supports the
+function-based control flow ops.
+END
+}
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index eae0fa70e8..9596252664 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -335,7 +335,7 @@ class Model {
       if (name_ == "Map") {
         return Type::MAP;
       }
-      if (name_ == "MapAndBatch") {
+      if (name_ == "MapAndBatch" || name_ == "NumaMapAndBatch") {
         return Type::MAP_AND_BATCH;
       }
       if (name_ == "PaddedBatch") {
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index ee7c14e3ab..1c553044a8 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -414,6 +414,40 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "map_and_batch_numa_aware_replacement",
+    srcs = ["map_and_batch_numa_aware_replacement.cc"],
+    hdrs = ["map_and_batch_numa_aware_replacement.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_utils",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "map_and_batch_numa_aware_replacement_test",
+    srcs = ["map_and_batch_numa_aware_replacement_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_test_utils",
+        ":graph_utils",
+        ":map_and_batch_numa_aware_replacement",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+    ],
+)
+
 cc_library(
     name = "noop_elimination",
     srcs = ["noop_elimination.cc"],
@@ -490,6 +524,7 @@ cc_library(
         ":hoist_random_uniform",
         ":latency_all_edges",
         ":map_and_batch_fusion",
+        ":map_and_batch_numa_aware_replacement",
         ":map_and_filter_fusion",
         ":map_fusion",
         ":map_parallelization",
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
index b2eec7220e..1f03c6515c 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -44,6 +45,21 @@ NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
        {"output_types", gtl::ArraySlice<TensorShape>{}}});
 }
 
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name) {
+  return test::function::NDef(
+      name, "MapAndBatchDatasetV2",
+      {string(input_node_name), "", string(batch_size_node_name),
+       string(num_parallel_calls_node_name), string(drop_remainder_node_name)},
+      {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+       {"output_types", gtl::ArraySlice<TensorShape>{}}});
+}
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
index ca0fde997d..f7891d5e1f 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -29,6 +29,12 @@ NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
 NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
                        StringPiece function_name = "IsZero");
 
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name = "XTimesTwo");
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
new file mode 100644
index 0000000000..452089eb67
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.cc
@@ -0,0 +1,62 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h"
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+NodeDef MakeNumaAware(const NodeDef& node, MutableGraphView* graph) {
+  NodeDef numa_aware_node = node;
+  graph_utils::SetUniqueGraphNodeName("map_and_batch_numa_aware",
+                                      graph->GetGraph(), &numa_aware_node);
+  numa_aware_node.set_op("ExperimentalNumaMapAndBatchDataset");
+  return numa_aware_node;
+}
+
+}  // namespace
+
+Status MapAndBatchNumaAwareReplacement::Optimize(Cluster* cluster,
+                                                 const GrapplerItem& item,
+                                                 GraphDef* output) {
+  *output = item.graph;
+  MutableGraphView graph(output);
+  std::set<string> nodes_to_delete;
+
+  for (const NodeDef& node : item.graph.node()) {
+    if (node.op() != "MapAndBatchDatasetV2") continue;
+
+    auto* numa_node = graph.AddNode(MakeNumaAware(node, &graph));
+    graph.ReplaceInput(node, *numa_node);
+    nodes_to_delete.insert(node.name());
+  }
+  graph.DeleteNodes(nodes_to_delete);
+  return Status::OK();
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(MapAndBatchNumaAwareReplacement,
+                            "map_and_batch_numa_aware_replacement");
+
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
new file mode 100644
index 0000000000..3b2acd288b
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+namespace tensorflow {
+namespace grappler {
+
+class MapAndBatchNumaAwareReplacement : public CustomGraphOptimizer {
+ public:
+  MapAndBatchNumaAwareReplacement() = default;
+  ~MapAndBatchNumaAwareReplacement() override = default;
+
+  string name() const override {
+    return "map_and_batch_numa_aware_replacement";
+  }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* output) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimize_output, double result) override {}
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_AND_BATCH_NUMA_AWARE_REPLACEMENT_H_
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
new file mode 100644
index 0000000000..3c5c61d1c2
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement_test.cc
@@ -0,0 +1,112 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/map_and_batch_numa_aware_replacement.h"
+
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+TEST(MapAndBatchNumaAwareReplacementTest, ReplaceSimple) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {
+          NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+          NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+          NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+          NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+          NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}),
+          NDef("num_parallel_calls", "Const", {},
+               {{"value", 5}, {"dtype", DT_INT32}}),
+          NDef("drop_remainder", "Const", {},
+               {{"value", 0}, {"dtype", DT_BOOL}}),
+          graph_tests_utils::MakeMapAndBatchNode(
+              "map_and_batch", "range", "batch_size", "num_parallel_calls",
+              "drop_remainder"),
+      },
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MapAndBatchNumaAwareReplacement optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output));
+  EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output));
+}
+
+TEST(MapAndBatchNumaAawareReplacementTest, ReplaceWithExtraChild) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {
+          NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+          NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+          NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+          NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+          NDef("batch_size", "Const", {}, {{"value", 3}, {"dtype", DT_INT32}}),
+          NDef("num_parallel_calls", "Const", {},
+               {{"value", 5}, {"dtype", DT_INT32}}),
+          NDef("drop_remainder", "Const", {},
+               {{"value", 0}, {"dtype", DT_BOOL}}),
+          graph_tests_utils::MakeMapAndBatchNode(
+              "map_and_batch", "range", "batch_size", "num_parallel_calls",
+              "drop_remainder"),
+          NDef("cache", "CacheDataset", {"map_and_batch"}, {}),
+      },
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MapAndBatchNumaAwareReplacement optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map_and_batch", output));
+  EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MapAndBatchDatasetV2", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output));
+  EXPECT_TRUE(graph_utils::ContainsNodeWithOp("CacheDataset", output));
+
+  int numa_map_and_batch_component_id = graph_utils::FindGraphNodeWithOp(
+      "ExperimentalNumaMapAndBatchDataset", output);
+  auto& numa_map_and_batch_component =
+      output.node(numa_map_and_batch_component_id);
+  EXPECT_EQ(numa_map_and_batch_component.input(0), "range");
+
+  int cache_id = graph_utils::FindGraphNodeWithOp("CacheDataset", output);
+  auto& cache_node = output.node(cache_id);
+  EXPECT_EQ(cache_node.input(0), numa_map_and_batch_component.name());
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 43406db3ed..4cf5643bc0 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -102,6 +102,22 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "numa_map_and_batch_dataset_op",
+    srcs = ["numa_map_and_batch_dataset_op.cc"],
+    deps = [
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/kernels:inplace_ops",
+        "//tensorflow/core/kernels/data:captured_function",
+        "//tensorflow/core/kernels/data:dataset",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 tf_kernel_library(
     name = "unique_dataset_op",
     srcs = ["unique_dataset_op.cc"],
@@ -132,6 +148,7 @@ tf_kernel_library(
         ":ignore_errors_dataset_op",
         ":indexed_dataset",
         ":lmdb_dataset_op",
+        ":numa_map_and_batch_dataset_op",
         ":prefetching_kernels",
         ":threadpool_dataset_op",
         ":unique_dataset_op",
diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
new file mode 100644
index 0000000000..d83edb9667
--- /dev/null
+++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
@@ -0,0 +1,1135 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#define EIGEN_USE_THREADS
+
+#include <atomic>
+#include <utility>
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/data/captured_function.h"
+#include "tensorflow/core/kernels/data/dataset.h"
+#include "tensorflow/core/kernels/inplace_ops_functor.h"
+#include "tensorflow/core/lib/core/blocking_counter.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/numa.h"
+#include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+// kWindowSize is the fixed constant controlling the number of batch outputs
+// each NumaWorkerBlock may be processing at a time. This is currently a
+// constant and not user configurable to enable future performance optimizations
+// in the implementation.
+const int64 kWindowSize = 10;
+
+// Define a helper for more consistent logging.
+#define WORKER_VLOG(verbose_level)                                           \
+  VLOG(verbose_level) << "WorkerThread (" << numa_node << ", " << thread_num \
+                      << "): "
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit NumaMapAndBatchDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+ protected:
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 batch_size;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "batch_size", &batch_size));
+    OP_REQUIRES(
+        ctx, batch_size > 0,
+        errors::InvalidArgument("batch_size must be greater than zero."));
+
+    int64 num_parallel_calls;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
+                                            &num_parallel_calls));
+    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
+                errors::InvalidArgument(
+                    "num_parallel_calls must be greater than zero."));
+
+    bool drop_remainder;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "drop_remainder", &drop_remainder));
+
+    std::unique_ptr<CapturedFunction> captured_func;
+    OP_REQUIRES_OK(
+        ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
+                                      /* use_inter_op_parallelism = */ false,
+                                      &captured_func));
+
+    *output = new Dataset(ctx, input, batch_size, num_parallel_calls,
+                          drop_remainder, output_types_, output_shapes_, func_,
+                          std::move(captured_func));
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 batch_size,
+            int64 num_parallel_calls, bool drop_remainder,
+            const DataTypeVector& output_types,
+            const std::vector<PartialTensorShape>& output_shapes,
+            const NameAttrList& func,
+            std::unique_ptr<CapturedFunction> captured_func)
+        : DatasetBase(DatasetContext(ctx)),
+          input_(input),
+          batch_size_(batch_size),
+          num_parallel_calls_(num_parallel_calls),
+          drop_remainder_(drop_remainder),
+          output_types_(output_types),
+          output_shapes_(output_shapes),
+          func_(func),
+          captured_func_(std::move(captured_func)) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIteratorInternal(
+        const string& prefix) const override {
+      return std::unique_ptr<IteratorBase>(
+          new Iterator({this, strings::StrCat(prefix, "::NumaMapAndBatch")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() const override {
+      return "NumaMapAndBatchDatasetOp::Dataset";
+    }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+      Node* batch_size_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(batch_size_, &batch_size_node));
+      Node* num_parallel_calls_node;
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(num_parallel_calls_, &num_parallel_calls_node));
+      Node* drop_remainder_node;
+      TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder_node));
+
+      DataTypeVector other_arguments_types;
+      other_arguments_types.reserve(captured_func_->captured_inputs().size());
+      std::vector<Node*> other_arguments;
+      other_arguments.reserve(captured_func_->captured_inputs().size());
+      for (const Tensor& t : captured_func_->captured_inputs()) {
+        Node* node;
+        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        other_arguments.emplace_back(node);
+        other_arguments_types.emplace_back(t.dtype());
+      }
+      AttrValue f;
+      b->BuildAttrValue(func_, &f);
+      AttrValue other_arguments_types_attr;
+      b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this,
+          {std::make_pair(0, input_graph_node),
+           std::make_pair(2, batch_size_node),
+           std::make_pair(3, num_parallel_calls_node),
+           std::make_pair(4, drop_remainder_node)},  // Single tensor inputs.
+          {std::make_pair(1, other_arguments)},      // Tensor list inputs.
+          {std::make_pair("f", f),
+           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          output));
+      return Status::OK();
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params),
+            mu_(std::make_shared<mutex>()),
+            autotune_cond_var_(std::make_shared<condition_variable>()),
+            num_parallel_calls_(std::make_shared<model::SharedState>(
+                params.dataset->num_parallel_calls_, mu_, autotune_cond_var_)) {
+      }
+
+      ~Iterator() override {
+        mutex_lock l(*mu_);
+        cancelled_ = true;
+        VLOG(3) << "NumaMapAndBatchIterator::~Iterator: cancelling operations.";
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          workers_[i]->manager.Cancel();
+        }
+        VLOG(3) << "NumaMapAndBatchIterator::~Iterator: waiting for threads to "
+                   "shut down.";
+      }
+
+      Status Initialize(IteratorContext* ctx) override {
+        mutex_lock l(*mu_);
+        AddConstantParameter(ctx, "batch_size", dataset()->batch_size_);
+        if (num_parallel_calls_->value == kAutoTune) {
+          num_parallel_calls_->value = std::max(1, port::NUMANumNodes());
+          AddTunableParameter(ctx,
+                              /* name = */ "parallelism",
+                              /* state = */ num_parallel_calls_,
+                              /* min = */ num_parallel_calls_->value,
+                              /* max = */ port::NumSchedulableCPUs());
+        } else {
+          AddConstantParameter(ctx, "parallelism", num_parallel_calls_->value);
+        }
+        TF_RETURN_IF_ERROR(
+            dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
+        TF_RETURN_IF_ERROR(dataset()->captured_func_->Instantiate(ctx));
+        return Status::OK();
+      }
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        auto cleanup = gtl::MakeCleanup(
+            [] { VLOG(3) << "GetNextInternal call returning."; });
+        NumaWorkerBlock* worker = nullptr;
+        {
+          mutex_lock l(*mu_);
+          VLOG(3) << "GetNextInternal call; current block: " << cur_block_;
+          if (global_end_of_input_) {
+            *end_of_sequence = true;
+            return Status::OK();
+          }
+          TF_RETURN_IF_ERROR(EnsureBackgroundThreadsStarted(ctx));
+          worker = workers_[cur_block_].get();
+          cur_block_ = (cur_block_ + 1) % workers_.size();
+        }
+        TF_RETURN_IF_ERROR(worker->manager.GetBatch(
+            ctx, dataset()->drop_remainder_, &global_end_of_input_, out_tensors,
+            end_of_sequence));
+        return Status::OK();
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        mutex_lock l(*mu_);
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          if (!workers_[i]->manager.Quiesce()) {
+            return errors::Cancelled(
+                "The iterator was deleted before it could reach a "
+                "checkpointable state.");
+          }
+        }
+
+        TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("num_workers"), workers_.size()));
+
+        for (size_t i = 0; i < workers_.size(); ++i) {
+          size_t index = (cur_block_ + i) % workers_.size();
+          TF_RETURN_IF_ERROR(workers_[index]->manager.Save(writer, this, i));
+        }
+        return Status::OK();
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        mutex_lock l(*mu_);
+        TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_));
+        int64 num_workers = -1;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("num_workers"), &num_workers));
+        // Note: num_workers can be 0 if the iterator wasn't started when
+        // first checkpointed.
+        if (num_workers < 0) {
+          return errors::DataLoss(
+              "When restoring from checkpoint, we encountered a data "
+              "consistency error: num_workers has an invalid value: ",
+              num_workers);
+        }
+        if (port::NUMAEnabled()) {
+          int actual_numa_domains = port::NUMANumNodes();
+          if (actual_numa_domains != num_workers && num_workers > 0) {
+            LOG(WARNING) << "# NUMA domains mismatch when restoring from "
+                            "checkpoint: checkpoint has "
+                         << num_workers
+                         << " NUMA domains, while this host has: "
+                         << actual_numa_domains << " NUMA domains.";
+          }
+        }
+        if (num_workers > 1 && !port::NUMAEnabled()) {
+          LOG(WARNING) << "NUMA is not enabled for this process, but restoring "
+                          "a checkpoint that assumes "
+                       << num_workers << " NUMA domains.";
+        }
+        workers_.resize(num_workers);
+        for (size_t i = 0; i < num_workers; ++i) {
+          workers_[i] = MakeUnique<NumaWorkerBlock>(this);
+          TF_RETURN_IF_ERROR(
+              workers_[i]->manager.Restore(ctx, reader, this, i));
+        }
+        cur_block_ = 0;
+        return Status::OK();
+      }
+
+     private:
+      // NumaBlockManager manages all the state for a set of threads pinned to a
+      // single NUMA domain.
+      //
+      // The methods can be divided into 3 categories based on who should call
+      // them:
+      //
+      //  (1) RunnerThread: WaitForInputSpace, PushInputs, SetEndOfInput.
+      //  (2) WorkerThread: RetrieveInput, GetBatchTensors.
+      //      RecordBatchEntryComplete
+      //  (3) Client threads: GetBatch, Cancel, Save, Restore.
+      //
+      // Internally, we manage state in a circular buffer of size `kWindowSize`.
+      // There are 3 pointers into the circular buffer, and must maintain the
+      // following order: (1) next_input_batch_ (corresponding to the next input
+      // batch to be pulled from the input iterator), (2) next_input_
+      // (corresponding to the batch the WorkerThreads should pull from for
+      // their next inputs), and (3) next_output_ corresponding to the next
+      // value to be consumed by the output iterator.
+      //
+      // Methods return errors::Cancelled if the iteration is cancelled before
+      // completing.
+      //
+      // NumaBlockManager is thread safe.
+      class NumaBlockManager {
+       public:
+        explicit NumaBlockManager(Iterator* itr) : itr_(itr) {}
+
+        // WaitForInputSpace blocks until there is space in the circular buffer
+        // to begin processing a new batch of elements.
+        //
+        // Returns true when there is space, false if the Iterator is cancelled.
+        bool WaitForInputSpace(IteratorContext* ctx) {
+          mutex_lock l(mu_);
+
+          size_t next = (next_input_batch_ + 1) % kWindowSize;
+          DCHECK(next < kWindowSize) << next;
+
+          // Wait for space in the circular buffer.
+          while (!cancelled_ && batches_[next].state != BatchState::kEmpty) {
+            VLOG(3) << "Waiting for input space; next: " << next
+                    << ", next_output_: " << next_output_
+                    << ", next_input_batch_: " << next_input_batch_;
+            itr_->RecordStop(ctx);
+            runner_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+          if (cancelled_) {
+            VLOG(3) << "WaitForInputSpace cancelled.";
+            return false;
+          }
+
+          DCHECK(batches_[next].state == BatchState::kEmpty);
+
+          next_input_batch_ = next;
+          return true;
+        }
+
+        // PushInputs sets the inputs for the next batch as retrieved from the
+        // input iterator.
+        void PushInputs(const Status& status,
+                        std::vector<std::vector<Tensor>> inputs) {
+          mutex_lock l(mu_);
+
+          DCHECK(next_input_ < kWindowSize) << next_input_;
+          DCHECK(batches_[next_input_batch_].state == BatchState::kEmpty);
+          DCHECK(batches_[next_input_batch_].next_input_to_process == 0)
+              << batches_[next_input_batch_].next_input_to_process;
+          DCHECK(batches_[next_input_batch_].status.ok())
+              << batches_[next_input_batch_].status;
+
+          batches_[next_input_batch_].inputs.swap(inputs);
+          batches_[next_input_batch_].state = BatchState::kInputsFilled;
+          batches_[next_input_batch_].status.Update(status);
+          if (batches_[next_input_batch_].status.ok()) {
+            worker_cond_var_.notify_all();
+          } else {
+            client_cond_var_.notify_all();
+            batches_[next_input_batch_].error_index = 0;
+          }
+        }
+
+        // SetEndOfInput records the fact that we have reached the end of the
+        // input iterator, and that we should return end_of_sequence = true when
+        // we have exhaused all buffered batches.
+        void SetEndOfInput() {
+          mutex_lock l(mu_);
+          reached_eof_ = true;
+          worker_cond_var_.notify_all();
+          client_cond_var_.notify_all();
+        }
+
+        // RetrieveInput gets the next input tuple to be mapped by a worker
+        // thread.
+        //
+        // Returns true if an input was retrieved, false if the iterator has
+        // been cancelled.
+        bool RetrieveInput(IteratorContext* ctx, std::vector<Tensor>* input,
+                           uint64* index, size_t* sequence_number) {
+          mutex_lock l(mu_);
+
+          // Wait for inputs to be ready.
+          while (!cancelled_ &&
+                 batches_[next_input_].state != BatchState::kInputsFilled) {
+            itr_->RecordStop(ctx);
+            worker_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+
+          if (cancelled_) {
+            return false;
+          }
+
+          DCHECK(batches_[next_input_].next_input_to_process <
+                 batches_[next_input_].inputs.size())
+              << "next_input_: " << next_input_ << ", next_input_to_process: "
+              << batches_[next_input_].next_input_to_process
+              << ", inputs.size(): " << batches_[next_input_].inputs.size()
+              << ", state: " << static_cast<int32>(batches_[next_input_].state)
+              << ", this: " << this;
+          *index = batches_[next_input_].next_input_to_process;
+          *sequence_number = next_input_;
+          input->swap(batches_[next_input_]
+                          .inputs[batches_[next_input_].next_input_to_process]);
+          // Increment pointers.
+          batches_[next_input_].next_input_to_process++;
+
+          if (batches_[next_input_].next_input_to_process ==
+              batches_[next_input_].inputs.size()) {
+            batches_[next_input_].state = BatchState::kAllMapsStarted;
+            next_input_ = (next_input_ + 1) % kWindowSize;
+          }
+          return true;
+        }
+
+        // GetBatchTensors returns a pointer to the output batch tensors for the
+        // worker thread to copy into.
+        //
+        // allocate_output is a function taking a batch size, and a pointer to
+        // the output tuple of Tensors to allocate them. The allocate_output
+        // function is called at most once per output batch.
+        std::vector<Tensor>* GetBatchTensors(
+            size_t sequence_number,
+            std::function<void(size_t, std::vector<Tensor>*)> allocate_output) {
+          mutex_lock l(mu_);
+          DCHECK(sequence_number < kWindowSize) << sequence_number;
+          DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled ||
+                 batches_[sequence_number].state == BatchState::kAllMapsStarted)
+              << sequence_number;
+
+          if (batches_[sequence_number].outputs.empty()) {
+            allocate_output(batches_[sequence_number].inputs.size(),
+                            &batches_[sequence_number].outputs);
+          }
+          return &batches_[sequence_number].outputs;
+        }
+
+        // RecordBatchEntryComplete records an element of the batch has finished
+        // copying into the output tensors.
+        void RecordBatchEntryComplete(size_t sequence_number, uint64 index,
+                                      Status s) {
+          mutex_lock l(mu_);
+          DCHECK(sequence_number < kWindowSize) << sequence_number;
+          DCHECK(batches_[sequence_number].state == BatchState::kInputsFilled ||
+                 batches_[sequence_number].state == BatchState::kAllMapsStarted)
+              << sequence_number;
+
+          batches_[sequence_number].num_outputs_complete++;
+          if (!s.ok() && batches_[sequence_number].error_index > index) {
+            batches_[sequence_number].status = s;
+            batches_[sequence_number].error_index = index;
+          }
+
+          if (batches_[sequence_number].num_outputs_complete ==
+              batches_[sequence_number].inputs.size()) {
+            DCHECK(batches_[sequence_number].state ==
+                   BatchState::kAllMapsStarted);
+            batches_[sequence_number].state = BatchState::kOutputsComplete;
+            batches_[sequence_number].inputs.clear();  // Eagerly save memory.
+            batches_[sequence_number].inputs.shrink_to_fit();
+            client_cond_var_.notify_all();
+          }
+        }
+
+        // GetBatch retrieves the next output batch tensors.
+        Status GetBatch(IteratorContext* ctx, bool drop_remainder,
+                        bool* global_eof, std::vector<Tensor>* out_tensor,
+                        bool* end_of_sequence) {
+          mutex_lock l(mu_);
+          // Wait until one of 3 conditions occurs:
+          //  (1) we're cancelled.
+          //  (2) the state becomes kOutputsComplete
+          //  (3) state is empty && reached_eof.
+          while (!cancelled_ &&
+                 batches_[next_output_].state != BatchState::kOutputsComplete &&
+                 !(reached_eof_ &&
+                   batches_[next_output_].state == BatchState::kEmpty)) {
+            VLOG(3) << "Waiting in GetBatch.";
+            itr_->RecordStop(ctx);
+            client_cond_var_.wait(l);
+            itr_->RecordStart(ctx);
+          }
+
+          if (cancelled_) {
+            return errors::Cancelled(
+                "Cancelled in NumaMapAndBatch::GetNext call.");
+          }
+
+          if (reached_eof_ &&
+              batches_[next_output_].state == BatchState::kEmpty) {
+            VLOG(4) << "GetBatch returning end of sequence.";
+            *end_of_sequence = true;
+            *global_eof = true;
+            return Status::OK();
+          }
+
+          VLOG(3) << "Returning output index: " << next_output_
+                  << ", this: " << this;
+
+          *end_of_sequence = false;
+          Status s = batches_[next_output_].status;
+          if (s.ok()) {
+            out_tensor->swap(batches_[next_output_].outputs);
+          }
+          // Handle early termination.
+          if (errors::IsOutOfRange(s)) {
+            *global_eof = true;
+            s = Status::OK();
+            if (drop_remainder || batches_[next_output_].error_index == 0) {
+              *end_of_sequence = true;
+            } else {
+              std::vector<Tensor> true_outputs;
+              for (size_t i = 0; i < batches_[next_output_].outputs.size();
+                   ++i) {
+                TensorShape component_shape(
+                    batches_[next_output_].outputs[i].shape());
+                component_shape.set_dim(0, batches_[next_output_].error_index);
+                AllocatorAttributes attr;
+                attr.set_gpu_compatible(true);
+                Tensor component(ctx->allocator(attr),
+                                 batches_[next_output_].outputs[i].dtype(),
+                                 component_shape);
+                TF_RETURN_IF_ERROR(CopyPartialBatch(
+                    &component, batches_[next_output_].outputs[i],
+                    batches_[next_output_].error_index));
+                true_outputs.emplace_back(std::move(component));
+              }
+              out_tensor->swap(true_outputs);
+            }
+          }
+
+          batches_[next_output_].Reset();
+          next_output_ = (next_output_ + 1) % kWindowSize;
+          runner_cond_var_.notify_all();
+
+          return s;
+        }
+
+        void Cancel() {
+          mutex_lock l(mu_);
+          VLOG(3) << "Cancelling NUMA block.";
+          cancelled_ = true;
+          runner_cond_var_.notify_all();
+          worker_cond_var_.notify_all();
+          client_cond_var_.notify_all();
+        }
+
+        // Waits until all the worker threads have completed their work and all
+        // internal state has reached a "safe-point" where we can safely
+        // checkpoint.
+        //
+        // Returns true if completed successfully, false if cancelled while
+        // waiting.
+        bool Quiesce() {
+          mutex_lock l(mu_);
+          VLOG(3) << "Waiting until the operations have quiesced.";
+          while (!cancelled_ && !AllMapOperationsFinished()) {
+            client_cond_var_.wait(l);
+          }
+          if (cancelled_) {
+            return false;
+          }
+          return true;
+        }
+
+        Status Save(IteratorStateWriter* writer, Iterator* itr, size_t index) {
+          mutex_lock l(mu_);
+          string prefix = itr->full_name(strings::StrCat("numa_block_", index));
+          if (reached_eof_) {
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(prefix, "_end_of_input"), ""));
+          }
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            size_t index = (next_output_ + i) % kWindowSize;
+            if (batches_[index].state == BatchState::kEmpty) {
+              break;
+            }
+            string batch_prefix = strings::StrCat(prefix, "_batch_", i);
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(batch_prefix, "_code"),
+                static_cast<int64>(batches_[index].status.code())));
+            if (!batches_[index].status.ok()) {
+              TF_RETURN_IF_ERROR(
+                  writer->WriteScalar(strings::StrCat(batch_prefix, "_msg"),
+                                      batches_[index].status.error_message()));
+              TF_RETURN_IF_ERROR(writer->WriteScalar(
+                  strings::StrCat(batch_prefix, "_error_index"),
+                  batches_[index].error_index));
+            }
+
+            TF_RETURN_IF_ERROR(writer->WriteScalar(
+                strings::StrCat(batch_prefix, "_output_size"),
+                batches_[index].outputs.size()));
+            for (size_t j = 0; j < batches_[index].outputs.size(); ++j) {
+              string tensor_prefix =
+                  strings::StrCat(batch_prefix, "_output_", j);
+              if (!batches_[index].status.ok()) {
+                DCHECK(batches_[index].error_index >= 0 &&
+                       batches_[index].error_index <
+                           itr_->dataset()->batch_size_);
+                // If the batch is not full, we only store the first
+                // `error_index` values. The rest of the batch tensor might not
+                // be initialized, and accessing that will raise msan errors.
+                TF_RETURN_IF_ERROR(writer->WriteTensor(
+                    tensor_prefix, batches_[index].outputs[j].Slice(
+                                       0, batches_[index].error_index)));
+              } else {
+                TF_RETURN_IF_ERROR(writer->WriteTensor(
+                    tensor_prefix, batches_[index].outputs[j]));
+              }
+            }
+          }
+          return Status::OK();
+        }
+
+        Status Restore(IteratorContext* ctx, IteratorStateReader* reader,
+                       Iterator* itr, size_t index) {
+          mutex_lock l(mu_);
+          if (reached_eof_) {
+            return errors::FailedPrecondition(
+                "Already reached the end of the sequence.");
+          }
+          string prefix = itr->full_name(strings::StrCat("numa_block_", index));
+          reached_eof_ =
+              reader->Contains(strings::StrCat(prefix, "_end_of_input"));
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            string batch_prefix = strings::StrCat(prefix, "_batch_", i);
+            if (!reader->Contains(strings::StrCat(batch_prefix, "_code"))) {
+              break;
+            }
+            Batch batch;
+            batch.state = BatchState::kOutputsComplete;
+            int64 code_int;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                strings::StrCat(batch_prefix, "_code"), &code_int));
+            error::Code code = static_cast<error::Code>(code_int);
+            if (code != error::Code::OK) {
+              string error_message;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  strings::StrCat(batch_prefix, "_msg"), &error_message));
+              batch.status = Status(code, error_message);
+              int64 error_index_int = -1;
+              TF_RETURN_IF_ERROR(reader->ReadScalar(
+                  strings::StrCat(batch_prefix, "_error_index"),
+                  &error_index_int));
+              if (error_index_int < 0 ||
+                  error_index_int > itr->dataset()->batch_size_) {
+                return errors::FailedPrecondition(
+                    "Error index out of bounds when restoring from checkpoint; "
+                    "error index: ",
+                    error_index_int);
+              }
+              batch.error_index = static_cast<size_t>(error_index_int);
+            }
+            int64 output_size = -1;
+            TF_RETURN_IF_ERROR(reader->ReadScalar(
+                strings::StrCat(batch_prefix, "_output_size"), &output_size));
+            batch.outputs.reserve(output_size);
+            for (size_t j = 0; j < output_size; ++j) {
+              string tensor_name = strings::StrCat(batch_prefix, "_output_", j);
+              Tensor t;
+              TF_RETURN_IF_ERROR(reader->ReadTensor(tensor_name, &t));
+              batch.outputs.emplace_back(std::move(t));
+            }
+            batches_[i] = std::move(batch);
+          }
+          return Status::OK();
+        }
+
+       private:
+        bool AllMapOperationsFinished() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+          for (size_t i = 0; i < kWindowSize; ++i) {
+            if (batches_[i].state == BatchState::kInputsFilled ||
+                batches_[i].state == BatchState::kAllMapsStarted) {
+              return false;
+            }
+            if (batches_[i].state != BatchState::kOutputsComplete &&
+                !reached_eof_) {
+              return false;
+            }
+          }
+          return true;
+        }
+
+        // Batches begin in the `kEmpty` state. Once the RunnerThread has
+        // filled the `inputs` to a `Batch`, it transitions to the
+        // `kInputsFilled` state. At this point, the Worker threads run the map
+        // function and copy the outputs appropriately. Once all worker threads
+        // have started, it transitions to `kAllMapsStarted`. After the outputs
+        // are complete, the GetNext call can consume the outputs, and return
+        // the batch to the kEmpty state.
+        enum class BatchState {
+          kEmpty,
+          kInputsFilled,
+          kAllMapsStarted,
+          kOutputsComplete,
+        };
+
+        // Batch captures all the state of an output batch as it progresses
+        // through the machinery. Once the RunnerThread fills inputs, it
+        // transitions to `kInputsFilled`. At this point, the worker threads can
+        // work on it, incrementing outputs_complete for every element of the
+        // input set that is copied into the output Tensors. Once all the input
+        // tuples have been processed (i.e. num_outputs_complete ==
+        // inputs.size()), it transitions to the `kOutputsComplete` stage, where
+        // it is ready to be returned by a `GetBatch` call (called from
+        // `GetNextInternal`).
+        struct Batch {
+          BatchState state;
+          // Aggregates the Status of the input iterator's GetNext
+          // calls, in addition to the Status of the map function invocations.
+          //
+          // In the case where multiple non-OK statuses are encountered, we
+          // return the first one encountered.
+          Status status;
+          // In order to return the correct error status, we keep track of the
+          // error_index.
+          size_t error_index;
+          // The batch_size input tuples (or fewer in the case of the last
+          // batch).
+          // TODO(saeta): Avoid re-allocating vectors all the time!
+          std::vector<std::vector<Tensor>> inputs;
+          std::vector<Tensor> outputs;
+          size_t next_input_to_process;
+          size_t num_outputs_complete;
+
+          Batch() { Reset(); }
+
+          // Resets the Batch state (e.g. after consuming the outputs).
+          void Reset() {
+            state = BatchState::kEmpty;
+            status = Status::OK();
+            inputs.clear();
+            inputs.shrink_to_fit();
+            outputs.clear();
+            outputs.shrink_to_fit();
+            next_input_to_process = 0;
+            num_outputs_complete = 0;
+            error_index = -1;
+          }
+        };
+
+        Iterator* itr_;  // Not owned.
+        mutex mu_;
+        Batch batches_[kWindowSize] GUARDED_BY(mu_);
+        size_t next_input_batch_ GUARDED_BY(mu_) = -1;
+        size_t next_input_ GUARDED_BY(mu_) = 0;
+        size_t next_output_ GUARDED_BY(mu_) = 0;
+        bool cancelled_ GUARDED_BY(mu_) = false;
+        bool reached_eof_ GUARDED_BY(mu_) = false;
+
+        // The runner thread waits on this condition variable for space to be
+        // available. When the client thread takes a value out of the circular
+        // buffer, it notifies this condition variable that space is now
+        // available.
+        condition_variable runner_cond_var_ GUARDED_BY(mu_);
+        // The worker threads wait on this condition variable for available
+        // inputs. When the runner thread makes new inputs available, it
+        // notifies this condition variable.
+        condition_variable worker_cond_var_ GUARDED_BY(mu_);
+        // The client threads wait on this condition variable for avaiable
+        // batched outputs. When worker threads complete a batch, they notify
+        // this condition variable.
+        condition_variable client_cond_var_ GUARDED_BY(mu_);
+      };
+      // Mark NumaBlockManager as a friend of Iterator in order to call
+      // protected Iterator methods during checkpointing.
+      friend NumaBlockManager;
+
+      struct NumaWorkerBlock {
+        NumaBlockManager manager;
+        // TODO(saeta): Migrate to BackgroundWorker.
+        std::vector<std::unique_ptr<Thread>> threads;
+
+        explicit NumaWorkerBlock(Iterator* itr) : manager(itr) {}
+      };
+
+      static void CustomNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) {
+        ptr->~NumaWorkerBlock();
+        port::NUMAFree(ptr, sizeof(NumaWorkerBlock));
+      }
+      static void DefaultNumaWorkerBlockDeleter(NumaWorkerBlock* ptr) {
+        delete ptr;
+      }
+
+      static Status CopyPartialBatch(Tensor* output, const Tensor& value,
+                                     int64 num_elements) {
+        switch (value.dtype()) {
+#define HANDLE_TYPE(type)                                         \
+  case DataTypeToEnum<type>::value: {                             \
+    auto output_t = output->flat_outer_dims<type>();              \
+    auto value_t = value.flat_outer_dims<type>();                 \
+    for (size_t i = 0; i < num_elements; i++) {                   \
+      output_t.template chip<0>(i) = value_t.template chip<0>(i); \
+    }                                                             \
+    return Status::OK();                                          \
+  }
+          TF_CALL_DATASET_TYPES(HANDLE_TYPE);
+#undef HANDLE_TYPE
+          default:
+            return errors::InvalidArgument("Unsupported data type: ",
+                                           DataTypeString(value.dtype()));
+        }
+        return Status::OK();
+      }
+
+      Status EnsureBackgroundThreadsStarted(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+        if (curr_num_parallel_calls_ >= num_parallel_calls_->value) {
+          // All necessary threads have been started.
+          curr_num_parallel_calls_ = num_parallel_calls_->value;
+          return Status::OK();
+        }
+
+        VLOG(4) << "Starting workers";
+        bool numa_enabled = port::NUMAEnabled();
+
+        if (!numa_enabled) {
+          LOG(INFO) << "NUMA not enabled on this host.";
+        }
+
+        int num_numa_nodes = port::NUMANumNodes();
+        if (num_numa_nodes < 1) {
+          return errors::Internal("The number of NUMA nodes is invalid: ",
+                                  num_numa_nodes);
+        }
+
+        // Only resize when empty to support restoring from checkpoints.
+        if (workers_.empty()) {
+          VLOG(3) << "# NUMA Nodes: " << num_numa_nodes
+                  << ", # Parallel Calls: " << num_parallel_calls_->value;
+          workers_.resize(num_numa_nodes);
+        } else {
+          num_numa_nodes = workers_.size();
+        }
+
+        // Round up num_parallel_calls, with a minimum of 1.
+        const size_t num_threads_per_block =
+            std::max(1LL, (num_parallel_calls_->value + num_numa_nodes - 1) /
+                              num_numa_nodes);
+
+        VLOG(3) << "Starting " << num_threads_per_block * num_numa_nodes
+                << " worker threads, with " << num_threads_per_block
+                << " threads per block.";
+
+        // Only allocate new_ctx if required.
+        std::shared_ptr<IteratorContext> new_ctx;
+
+        for (int i = 0; i < num_numa_nodes; ++i) {
+          if (!workers_[i]) {
+            if (numa_enabled) {
+              // Allocate in appropriate NUMA domain.
+              // 4k page align.
+              void* ptr = port::NUMAMalloc(i, sizeof(NumaWorkerBlock), 0);
+              if (ptr != nullptr) {
+                NumaWorkerBlock* block = new (ptr) NumaWorkerBlock(this);
+                workers_[i] =
+                    std::unique_ptr<NumaWorkerBlock,
+                                    std::function<void(NumaWorkerBlock*)>>(
+                        block, CustomNumaWorkerBlockDeleter);
+              } else {
+                LOG(ERROR) << "Could not NUMA-allocate worker block: " << i;
+              }
+            }
+            // If the NUMA allocation fails, or NUMA is not enabled.
+            if (!workers_[i]) {
+              workers_[i] =
+                  std::unique_ptr<NumaWorkerBlock,
+                                  std::function<void(NumaWorkerBlock*)>>(
+                      new NumaWorkerBlock(this), DefaultNumaWorkerBlockDeleter);
+            }
+          }
+          // Be sure to start threads if num_parallel_calls_ has changed.
+          for (size_t j = workers_[i]->threads.size();
+               j < num_threads_per_block; ++j) {
+            VLOG(3) << "Starting worker " << i << ", " << j;
+            if (!new_ctx) {
+              new_ctx = std::make_shared<IteratorContext>(*ctx);
+            }
+            workers_[i]->threads.emplace_back(ctx->env()->StartThread(
+                {},
+                strings::StrCat("numa_map_and_batch_block_", i, "_thread_", j),
+                [this, new_ctx, i, j]() { WorkerThread(new_ctx, i, j); }));
+            VLOG(3) << "Worker " << i << ", " << j << " successfully started.";
+          }
+        }
+        if (!runner_thread_) {
+          if (!new_ctx) {
+            new_ctx = std::make_shared<IteratorContext>(*ctx);
+          }
+          runner_thread_.reset(ctx->env()->StartThread(
+              {}, "numa_map_runner_thread",
+              [this, new_ctx] { RunnerThread(new_ctx); }));
+        }
+        VLOG(3) << "All workers & runner thread started.";
+        return Status::OK();
+      }
+
+      void AllocateOutput(IteratorContext* ctx, size_t batch_size,
+                          const std::vector<Tensor>& map_fn_outputs,
+                          std::vector<Tensor>* batch_outputs) {
+        DCHECK(dataset()->output_dtypes().size() ==
+               dataset()->output_shapes().size());
+        DCHECK(map_fn_outputs.size() == dataset()->output_dtypes().size());
+        for (size_t i = 0; i < dataset()->output_dtypes().size(); ++i) {
+          TensorShape component_shape({static_cast<uint32>(batch_size)});
+          component_shape.AppendShape(map_fn_outputs.at(i).shape());
+          AllocatorAttributes attr;
+          attr.set_gpu_compatible(true);
+          Tensor component(ctx->allocator(attr), map_fn_outputs.at(i).dtype(),
+                           component_shape);
+          batch_outputs->emplace_back(std::move(component));
+        }
+      }
+
+      void RunnerThread(std::shared_ptr<IteratorContext> ctx)
+          LOCKS_EXCLUDED(mu_) {
+        RecordStart(ctx.get());
+        auto cleanup = gtl::MakeCleanup([this, &ctx] {
+          // Set end of input on all the managers in order to clean up in an
+          // orderly fashion.
+          VLOG(3) << "Setting End of Input on workers_[*]->manager";
+          for (size_t i = 0; i < workers_.size(); ++i) {
+            workers_[i]->manager.SetEndOfInput();
+          }
+          RecordStop(ctx.get());
+        });
+
+        const size_t num_blocks = workers_.size();
+
+        while (true) {
+          for (size_t block = 0; block < num_blocks; ++block) {
+            VLOG(4) << "RunnerThread waiting for input space in block: "
+                    << block;
+            if (TF_PREDICT_FALSE(
+                    !workers_[block]->manager.WaitForInputSpace(ctx.get()))) {
+              VLOG(3) << "RunnerThread exiting due to cancellation.";
+              return;
+            }
+            VLOG(4) << "RunnerThread has space; pulling on upstream for block "
+                    << block;
+
+            Status s;
+            std::vector<std::vector<Tensor>> inputs;
+            bool end_of_sequence = false;
+            for (size_t i = 0; i < dataset()->batch_size_; ++i) {
+              std::vector<Tensor> tuple;
+              s.Update(
+                  input_impl_->GetNext(ctx.get(), &tuple, &end_of_sequence));
+              if (!s.ok()) {
+                break;
+              }
+              if (end_of_sequence) {
+                VLOG(4) << "Runner thread encountered end of sequence.";
+                if (dataset()->drop_remainder_) {
+                  return;
+                }
+                break;
+              }
+              inputs.push_back(std::move(tuple));
+            }
+
+            VLOG(4) << "Moving inputs to block " << block
+                    << ", which has size: " << inputs.size();
+            if (!s.ok() || !inputs.empty()) {
+              workers_[block]->manager.PushInputs(s, std::move(inputs));
+              VLOG(4) << "Inputs moved into block " << block;
+            }
+            if (end_of_sequence) {
+              return;
+            }
+          }
+        }
+      }
+
+      void WorkerThread(std::shared_ptr<IteratorContext> ctx,
+                        const int numa_node, const int thread_num) {
+        RecordStart(ctx.get());
+        WORKER_VLOG(3) << "started.";
+        auto stop_cleanup =
+            gtl::MakeCleanup([this, numa_node, thread_num, &ctx]() {
+              RecordStop(ctx.get());
+              WORKER_VLOG(3) << "exiting.";
+            });
+
+        NumaWorkerBlock* block = workers_[numa_node].get();
+        port::NUMASetThreadNodeAffinity(numa_node);
+        const int num_numa_nodes = port::NUMANumNodes();
+        const int minimum_num_parallel_calls = thread_num * num_numa_nodes;
+
+        while (true) {
+          // Put threads to sleep based on autotuner.
+          {
+            mutex_lock l(*mu_);
+            while (minimum_num_parallel_calls >= num_parallel_calls_->value &&
+                   !cancelled_) {
+              RecordStop(ctx.get());
+              autotune_cond_var_->wait(l);
+              RecordStart(ctx.get());
+            }
+            if (cancelled_) {
+              return;
+            }
+          }
+
+          std::vector<Tensor> input;
+          uint64 index = 0;
+          size_t sequence_number = 0;
+          WORKER_VLOG(4) << "retrieving input.";
+          {
+            tracing::ScopedActivity trace(
+                "NumaMapAndBatch::Iterator::Worker::RetrieveInput");
+            if (!block->manager.RetrieveInput(ctx.get(), &input, &index,
+                                              &sequence_number)) {
+              return;
+            }
+          }
+
+          WORKER_VLOG(4) << "retrieved input; index: " << index
+                         << ", sequence_number: " << sequence_number;
+
+          std::vector<Tensor> return_values;
+          Status s;
+          {
+            tracing::ScopedActivity trace(
+                "NumaMapAndBatch::Iterator::Worker::FunctionExecution");
+            s = dataset()->captured_func_->Run(ctx.get(), std::move(input),
+                                               &return_values);
+          }
+          WORKER_VLOG(4) << "ran function for index: " << index
+                         << ", sequence_number: " << sequence_number;
+
+          if (s.ok()) {
+            std::vector<Tensor>* output = block->manager.GetBatchTensors(
+                sequence_number,
+                [this, ctx, &return_values](size_t batch_size,
+                                            std::vector<Tensor>* output) {
+                  AllocateOutput(ctx.get(), batch_size, return_values, output);
+                });
+            WORKER_VLOG(4) << "copying tensors to batch output.";
+            {
+              tracing::ScopedActivity trace(
+                  "NumaMapAndBatch::Iterator::Worker::BatchCopy");
+              for (size_t i = 0; i < return_values.size() && s.ok(); ++i) {
+                Tensor& tensor = return_values.at(i);
+                Tensor* batch = &output->at(i);
+                if (tensor.NumElements() !=
+                    (batch->NumElements() / batch->dim_size(0))) {
+                  s.Update(errors::InvalidArgument(
+                      "Cannot add tensor to the batch: number of elements does "
+                      "not match. Shapes are: [tensor]: ",
+                      tensor.shape().DebugString(),
+                      ", [batch]: ", batch->shape().DebugString()));
+                  break;
+                }
+                s.Update(batch_util::CopyElementToSlice(std::move(tensor),
+                                                        batch, index));
+              }
+            }
+          }
+
+          block->manager.RecordBatchEntryComplete(sequence_number, index, s);
+          WORKER_VLOG(4) << "finished index: " << index
+                         << ", sequence_number: " << sequence_number;
+        }
+      }
+
+      // mu_ protects shared internal state and is used to coordinate between
+      // the auto-tuner, client threads, worker threads, and the runner thread.
+      const std::shared_ptr<mutex> mu_;
+      const std::shared_ptr<condition_variable> autotune_cond_var_;
+      // The maximum number of parallel calls (can be auto-tuned).
+      const std::shared_ptr<model::SharedState> num_parallel_calls_;
+
+      // Caches the last-seen value of num_parallel_calls_->value to
+      // short-circuit starting workers.
+      int64 curr_num_parallel_calls_ GUARDED_BY(*mu_) = 0;
+
+      std::unique_ptr<IteratorBase> input_impl_;
+      int64 cur_block_ GUARDED_BY(*mu_) = 0;
+      bool global_end_of_input_ GUARDED_BY(*mu_) = false;
+      bool cancelled_ GUARDED_BY(*mu_) = false;
+      std::vector<std::unique_ptr<NumaWorkerBlock,
+                                  std::function<void(NumaWorkerBlock*)>>>
+          workers_;  // Const after initialization.
+      std::unique_ptr<Thread> runner_thread_ GUARDED_BY(*mu_);
+    };
+
+    const DatasetBase* const input_;
+    const int64 batch_size_;
+    const int64 num_parallel_calls_;
+    const bool drop_remainder_;
+    const DataTypeVector output_types_;
+    const std::vector<PartialTensorShape> output_shapes_;
+    const NameAttrList func_;
+    const std::unique_ptr<CapturedFunction> captured_func_;
+  };
+
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
+  NameAttrList func_;
+};
+
+REGISTER_KERNEL_BUILDER(
+    Name("ExperimentalNumaMapAndBatchDataset").Device(DEVICE_CPU),
+    NumaMapAndBatchDatasetOp);
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index f45a239793..bae56828dc 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -324,6 +324,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       }
 
      private:
+      // BatchResult encapsulates the output batch, as well as anciliary
+      // metadata required to execute the fused map-and-batch operation.
       struct BatchResult {
         explicit BatchResult(int64 batch_size) {
           end_of_input = false;
@@ -331,11 +333,23 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           num_elements = 0;
           output_allocated = false;
           status = Status::OK();
+          status_offset = -1;
         }
 
-        void UpdateStatus(const Status& s) {
-          mutex_lock l(mu);
-          status.Update(s);
+        // UpdateStatus updates the batch's aggregate Status.
+        //
+        // In order to ensure that exactly the first non-OK status is returned
+        // (required to make the behavior is observably identical to a
+        // sequential execution of map followed by batch), we must also keep
+        // track of the offset into the batch that produced `s`.
+        void UpdateStatus(const Status& s, int64 offset) {
+          if (TF_PREDICT_FALSE(!s.ok())) {
+            mutex_lock l(mu);
+            if (status.ok() || offset < status_offset) {
+              status = s;
+              status_offset = offset;
+            }
+          }
         }
 
         mutex mu;
@@ -344,6 +358,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         std::vector<Tensor> output;
         bool output_allocated GUARDED_BY(mu);
         Status status GUARDED_BY(mu);
+        int64 status_offset GUARDED_BY(mu);
         // Counts the number of outstanding calls for this batch.
         int64 num_calls;  // access guarded by owner's mutex
       };
@@ -379,7 +394,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
         std::shared_ptr<std::vector<Tensor>> return_values =
             std::make_shared<std::vector<Tensor>>();
         auto done = [this, ctx, result, return_values, offset](Status status) {
-          result->UpdateStatus(status);
+          result->UpdateStatus(status, offset);
           if (status.ok()) {
             EnsureOutputAllocated(ctx, result, return_values);
             for (size_t i = 0; i < return_values->size(); ++i) {
@@ -389,11 +404,14 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                   (batch->NumElements() / batch->dim_size(0))) {
                 TensorShape batch_shape = batch->shape();
                 batch_shape.RemoveDim(0);
-                result->UpdateStatus(errors::InvalidArgument(
-                    "Cannot add tensor to the batch: number of elements does "
-                    "not match. Shapes are: [tensor]: ",
-                    tensor.shape().DebugString(),
-                    ", [batch]: ", batch_shape.DebugString()));
+                result->UpdateStatus(
+                    errors::InvalidArgument(
+                        "Cannot add tensor to the batch: number of elements "
+                        "does "
+                        "not match. Shapes are: [tensor]: ",
+                        tensor.shape().DebugString(),
+                        ", [batch]: ", batch_shape.DebugString()),
+                    offset);
                 break;
               }
               // TODO(mrry): Add a version of DoParallelConcat that allows us to
@@ -402,7 +420,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
               Status copy_status = ::tensorflow::functor::DoParallelConcat(
                   *dataset()->device_, tensor, offset, batch);
               if (!copy_status.ok()) {
-                result->UpdateStatus(copy_status);
+                result->UpdateStatus(copy_status, offset);
                 break;
               }
             }
diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index f6bd5dce26..bbbecc50f8 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -138,6 +138,32 @@ REGISTER_OP("ExperimentalAssertNextDataset")
       return shape_inference::ScalarShape(c);
     });
 
+REGISTER_OP("ExperimentalNumaMapAndBatchDataset")
+    .Input("input_dataset: variant")
+    .Input("other_arguments: Targuments")
+    .Input("batch_size: int64")
+    .Input("num_parallel_calls: int64")
+    .Input("drop_remainder: bool")
+    .Output("handle: variant")
+    .Attr("f: func")
+    .Attr("Targuments: list(type) >= 0")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      // Use index from the end to retrieve the Input shapes,
+      // so that to avoid guessing the length of "other_arguments".
+      // batch_size, num_parallel_batches, and drop_remainder are 0-D scalars.
+      shape_inference::ShapeHandle unused;
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 3), 0, &unused));
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 2), 0, &unused));
+      TF_RETURN_IF_ERROR(
+          c->WithRank(c->input(c->num_inputs() - 1), 0, &unused));
+
+      return shape_inference::ScalarShape(c);
+    });
+
 REGISTER_OP("ExperimentalLMDBDataset")
     .Input("filenames: string")
     .Output("handle: variant")
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index d444c4082e..5ead6d1c75 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
@@ -38,12 +39,17 @@ from tensorflow.python.platform import test
 class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
-      ("Default", None, None),
-      ("SequentialCalls", 1, None),
-      ("ParallelCalls", 2, None),
-      ("ParallelBatches", None, 10),
+      ("Default", None, None, False),
+      ("SequentialCalls", 1, None, False),
+      ("ParallelCalls", 2, None, False),
+      ("ParallelBatches", None, 10, False),
+      ("DefaultNUMA", None, None, True),
+      ("SequentialCallsNUMA", 1, None, True),
+      ("ParallelCallsNUMA", 2, None, True),
+      ("ParallelBatchesNUMA", None, 10, True),
   )
-  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
+  def testMapAndBatch(self, num_parallel_calls, num_parallel_batches,
+                      numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
     # The pipeline is TensorSliceDataset ->
     # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
@@ -57,14 +63,20 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
 
-    iterator = (
+    dataset = (
         dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
             batching.map_and_batch(
                 map_func=_map_fn,
                 batch_size=batch_size,
                 num_parallel_calls=num_parallel_calls,
-                num_parallel_batches=num_parallel_batches))
-        .make_initializable_iterator())
+                num_parallel_batches=num_parallel_batches)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
+    iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -115,16 +127,25 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(init_op, feed_dict={count: 14, batch_size: 0})
 
   @parameterized.named_parameters(
-      ("Even", False),
-      ("Uneven", True),
+      ("Even", False, False),
+      ("Uneven", True, False),
+      ("EvenNUMA", False, True),
+      ("UnevenNUMA", True, True),
   )
-  def testMapAndBatchPartialBatch(self, drop_remainder):
-    iterator = (
+  def testMapAndBatchPartialBatch(self, drop_remainder, numa_aware):
+    dataset = (
         dataset_ops.Dataset.range(10).apply(
             batching.map_and_batch(
                 lambda x: array_ops.reshape(x * x, [1]),
                 batch_size=4,
-                drop_remainder=drop_remainder)).make_one_shot_iterator())
+                drop_remainder=drop_remainder)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     if drop_remainder:
       self.assertEqual([4, 1], iterator.output_shapes.as_list())
     else:
@@ -138,11 +159,21 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  def testMapAndBatchYieldsPartialBatch(self):
-    iterator = (dataset_ops.Dataset.range(10)
-                .apply(batching.map_and_batch(
-                    lambda x: array_ops.reshape(x * x, [1]), 4))
-                .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchYieldsPartialBatch(self, numa_aware):
+    dataset = (
+        dataset_ops.Dataset.range(10).apply(
+            batching.map_and_batch(lambda x: array_ops.reshape(x * x, [1]), 4)))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
+    iterator = dataset.make_one_shot_iterator()
     self.assertEqual([None, 1], iterator.output_shapes.as_list())
     next_element = iterator.get_next()
     with self.cached_session() as sess:
@@ -152,10 +183,19 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(next_element)
 
-  def testMapAndBatchParallelGetNext(self):
-    iterator = (dataset_ops.Dataset.range(50000)
-                .apply(batching.map_and_batch(lambda x: x, batch_size=100))
-                .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchParallelGetNext(self, numa_aware):
+    dataset = dataset_ops.Dataset.range(50000).apply(
+        batching.map_and_batch(lambda x: x, batch_size=100))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     elements = []
     for _ in range(100):
       elements.append(iterator.get_next())
@@ -165,17 +205,26 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(elements)
 
-  def testMapAndBatchParallelGetNextDropRemainder(self):
-    iterator = (
-        dataset_ops.Dataset.range(49999).apply(
-            batching.map_and_batch(
-                lambda x: x, batch_size=100, drop_remainder=True))
-        .make_one_shot_iterator())
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchParallelGetNextDropRemainder(self, numa_aware):
+    dataset = dataset_ops.Dataset.range(49999).apply(
+        batching.map_and_batch(
+            lambda x: x, batch_size=100, drop_remainder=True))
+
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+
     elements = []
     for _ in range(100):
       elements.append(iterator.get_next())
@@ -185,19 +234,29 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
-          expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
+          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(elements)
 
-  def testMapAndBatchSparse(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchSparse(self, numa_aware):
 
     def _sparse(i):
       return sparse_tensor.SparseTensorValue(
           indices=[[0]], values=(i * [1]), dense_shape=[1])
 
-    iterator = dataset_ops.Dataset.range(10).apply(
-        batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
+    dataset = dataset_ops.Dataset.range(10).apply(
+        batching.map_and_batch(_sparse, 5))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -214,21 +273,33 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testMapAndBatchFails(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchFails(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
     dataset = dataset_ops.Dataset.from_tensors(
         array_ops.check_numerics(
             constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
     batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
+    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     with self.cached_session() as sess:
       with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
         sess.run(init_op, feed_dict={batch_size: 14})
 
-  def testMapAndBatchShapeMismatch(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchShapeMismatch(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
 
     def generator():
@@ -240,9 +311,13 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.from_generator(
         generator, output_types=dtypes.int32)
     batch_size = 4
-    iterator = (
-        dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-        .make_initializable_iterator())
+    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_initializable_iterator()
+
     init_op = iterator.initializer
     get_next = iterator.get_next()
     with self.cached_session() as sess:
@@ -251,7 +326,11 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                                    "number of elements does not match"):
         sess.run(get_next)
 
-  def testMapAndBatchImplicitDispose(self):
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchImplicitDispose(self, numa_aware):
     # Tests whether a map and batch dataset will be cleaned up correctly when
     # the pipeline does not run it until exhaustion.
     # The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
@@ -266,6 +345,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
         1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
     dataset = dataset.prefetch(5)
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
     iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
@@ -274,26 +357,38 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(get_next)
 
   @parameterized.named_parameters(
-      ("1", 0),
-      ("2", 5),
-      ("3", 10),
-      ("4", 90),
-      ("5", 95),
-      ("6", 99),
+      ("1", 0, False),
+      ("2", 5, False),
+      ("3", 10, False),
+      ("4", 90, False),
+      ("5", 95, False),
+      ("6", 99, False),
+      ("1NUMA", 0, True),
+      ("2NUMA", 5, True),
+      ("3NUMA", 10, True),
+      ("4NUMA", 90, True),
+      ("5NUMA", 95, True),
+      ("6NUMA", 99, True),
   )
-  def testMapAndBatchOutOfRangeError(self, threshold):
+  def testMapAndBatchOutOfRangeError(self, threshold, numa_aware):
 
     def raising_py_fn(i):
-      if i >= threshold:
+      if i == threshold:
         raise StopIteration()
+      elif i > threshold:
+        raise RuntimeError("Alternate error; you shouldn't see me! (i: %s)" % i)
       else:
         return i
 
-    iterator = (
-        dataset_ops.Dataset.range(100).apply(
-            batching.map_and_batch(
-                lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
-                batch_size=10)).make_one_shot_iterator())
+    dataset = dataset_ops.Dataset.range(100).apply(
+        batching.map_and_batch(
+            lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
+            batch_size=10))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
@@ -307,25 +402,42 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         sess.run(get_next)
 
   @parameterized.named_parameters(
-      ("1", False, dtypes.bool),
-      ("2", -42, dtypes.int8),
-      ("3", -42, dtypes.int16),
-      ("4", -42, dtypes.int32),
-      ("5", -42, dtypes.int64),
-      ("6", 42, dtypes.uint8),
-      ("7", 42, dtypes.uint16),
-      ("8", 42.0, dtypes.float16),
-      ("9", 42.0, dtypes.float32),
-      ("10", 42.0, dtypes.float64),
-      ("11", b"hello", dtypes.string),
+      ("1", False, dtypes.bool, False),
+      ("2", -42, dtypes.int8, False),
+      ("3", -42, dtypes.int16, False),
+      ("4", -42, dtypes.int32, False),
+      ("5", -42, dtypes.int64, False),
+      ("6", 42, dtypes.uint8, False),
+      ("7", 42, dtypes.uint16, False),
+      ("8", 42.0, dtypes.float16, False),
+      ("9", 42.0, dtypes.float32, False),
+      ("10", 42.0, dtypes.float64, False),
+      ("11", b"hello", dtypes.string, False),
+      ("1NUMA", False, dtypes.bool, True),
+      ("2NUMA", -42, dtypes.int8, True),
+      ("3NUMA", -42, dtypes.int16, True),
+      ("4NUMA", -42, dtypes.int32, True),
+      ("5NUMA", -42, dtypes.int64, True),
+      ("6NUMA", 42, dtypes.uint8, True),
+      ("7NUMA", 42, dtypes.uint16, True),
+      ("8NUMA", 42.0, dtypes.float16, True),
+      ("9NUMA", 42.0, dtypes.float32, True),
+      ("10NUMA", 42.0, dtypes.float64, True),
+      ("11NUMA", b"hello", dtypes.string, True),
   )
-  def testMapAndBatchTypes(self, element, dtype):
+  def testMapAndBatchTypes(self, element, dtype, numa_aware):
+
     def gen():
       yield element
 
     dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
         batching.map_and_batch(lambda x: x, batch_size=10))
 
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+
     get_next = dataset.make_one_shot_iterator().get_next()
 
     with self.cached_session() as sess:
@@ -363,6 +475,40 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(iterator.initializer, feed_dict={captured_t: 42})
       self.assertAllEqual([42] * 10, sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("Normal", False),
+      ("NUMA", True),
+  )
+  def testMapAndBatchControlFlow(self, numa_aware):
+
+    def map_fn(x):
+      previous_cond_v2_value = control_flow_ops.ENABLE_COND_V2
+      control_flow_ops.ENABLE_COND_V2 = True
+      return_value = control_flow_ops.cond(x < 50, lambda: x + 1, lambda: x * x)
+      control_flow_ops.ENABLE_COND_V2 = previous_cond_v2_value
+      return return_value
+
+    dataset = dataset_ops.Dataset.range(100).apply(
+        batching.map_and_batch(map_fn, batch_size=10))
+    if numa_aware:
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+    with self.cached_session() as sess:
+      for i in range(10):
+        print("Case %d" % i)
+        if i < 5:
+          self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
+                              sess.run(get_next))
+        else:
+          self.assertAllEqual(
+              [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
+              sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index c92bb8b9bc..5a0a73fd83 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -161,6 +161,7 @@ py_test(
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -199,6 +200,7 @@ py_test(
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
+        "//tensorflow/python/data/experimental/ops:batching",
         "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
index 82516356df..d38255a6ea 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import time
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
@@ -29,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class ModelDatasetTest(test_base.DatasetTestBase):
+class ModelDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def testModelMap(self):
     k = 1024 * 1024
@@ -82,7 +83,11 @@ class ModelDatasetTest(test_base.DatasetTestBase):
           (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
            np.max(deltas)))
 
-  def testModelMapAndBatch(self):
+  @parameterized.named_parameters(
+      ("Default", False),
+      ("NUMA", True),
+  )
+  def testModelMapAndBatch(self, numa_aware):
     batch_size = 16
     k = 1024 * 1024
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
@@ -95,6 +100,8 @@ class ModelDatasetTest(test_base.DatasetTestBase):
             batch_size=batch_size))
     options = dataset_ops.Options()
     options.experimental_autotune = True
+    if numa_aware:
+      options.experimental_numa_aware = True
     iterator = dataset.with_options(options).make_one_shot_iterator()
     get_next = iterator.get_next()
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
index 760cd8cc4e..2ef29796ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_op_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
@@ -59,6 +60,21 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testNumaAwareRewrite(self):
+    dataset = dataset_ops.Dataset.range(10).apply(
+        optimization.assert_next(["NumaMapAndBatch"])).apply(
+            batching.map_and_batch(lambda x: x * x, 10))
+    options = dataset_ops.Options()
+    options.experimental_numa_aware = True
+    dataset = dataset.with_options(options)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      self.assertAllEqual([x * x for x in range(10)], sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testOptimizationStatefulFunction(self):
     dataset = dataset_ops.Dataset.range(10).map(
         lambda _: random_ops.random_uniform([])).batch(10)
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index e556b65b7c..a97cff9fbb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -306,6 +306,21 @@ py_test(
     ],
 )
 
+py_test(
+    name = "numa_map_and_batch_dataset_serialization_test",
+    size = "medium",
+    srcs = ["numa_map_and_batch_dataset_serialization_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":dataset_serialization_test_base",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python/data/experimental/ops:batching",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_test(
     name = "map_dataset_serialization_test",
     size = "medium",
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py
new file mode 100644
index 0000000000..04aab329cd
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/numa_map_and_batch_dataset_serialization_test.py
@@ -0,0 +1,95 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the MapAndBatchDataset serialization."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
+from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class MapAndBatchDatasetSerializationTest(
+    dataset_serialization_test_base.DatasetSerializationTestBase):
+
+  def testNumParallelBatches(self):
+    range_size = 11
+    num_repeats = 2
+    batch_size = 5
+    total_outputs = range_size * num_repeats
+    num_outputs_drop_remainder = total_outputs // batch_size
+    num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size))
+    num_parallel_batches = 2
+
+    def build_ds(range_start, drop_remainder=False):
+
+      def _map_fn(x):
+        return math_ops.square(x)
+
+      ds = dataset_ops.Dataset.range(
+          range_start, range_start + range_size).repeat(num_repeats).apply(
+              batching.map_and_batch(
+                  map_func=_map_fn,
+                  batch_size=batch_size,
+                  num_parallel_batches=num_parallel_batches,
+                  drop_remainder=drop_remainder))
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      return ds.with_options(options)
+
+    self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15),
+                        num_outputs_keep_remainder)
+    self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True),
+                        num_outputs_drop_remainder)
+
+  def testNumParallelCalls(self):
+    range_size = 11
+    num_repeats = 2
+    batch_size = 5
+    total_outputs = range_size * num_repeats
+    num_outputs_drop_remainder = total_outputs // batch_size
+    num_outputs_keep_remainder = int(math.ceil(total_outputs / batch_size))
+    num_parallel_calls = 7
+
+    def build_ds(range_start, drop_remainder=False):
+
+      def _map_fn(x):
+        return math_ops.square(x)
+
+      ds = dataset_ops.Dataset.range(
+          range_start, range_start + range_size).repeat(num_repeats).apply(
+              batching.map_and_batch(
+                  map_func=_map_fn,
+                  batch_size=batch_size,
+                  num_parallel_calls=num_parallel_calls,
+                  drop_remainder=drop_remainder))
+      options = dataset_ops.Options()
+      options.experimental_numa_aware = True
+      return ds.with_options(options)
+
+    self.run_core_tests(lambda: build_ds(10), lambda: build_ds(15),
+                        num_outputs_keep_remainder)
+    self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True),
+                        num_outputs_drop_remainder)
+
+
+if __name__ == "__main__":
+  test.main()
+
diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD
index 915d399f1b..46a9552b61 100644
--- a/tensorflow/python/data/experimental/ops/BUILD
+++ b/tensorflow/python/data/experimental/ops/BUILD
@@ -122,6 +122,7 @@ py_library(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dataset_ops_gen",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:experimental_dataset_ops_gen",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index cf52f7529a..6195747671 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1410,6 +1410,8 @@ class Options(object):
        "Whether to eliminate no-op transformations."),
       ("experimental_shuffle_and_repeat_fusion", bool,
        "Whether to fuse shuffle and repeat transformations."),
+      ("experimental_numa_aware", bool,
+       "Whether to use NUMA-aware operations."),
   ]:
 
     def _make_getter(name):  # pylint: disable=no-self-argument
@@ -1458,6 +1460,9 @@ class Options(object):
     for exp_opt in experimental_optimizations:
       if getattr(self, "experimental_" + exp_opt):
         result.append(exp_opt)
+
+    if getattr(self, "experimental_numa_aware"):
+      result.append("map_and_batch_numa_aware_replacement")
     return result
 
   def merge(self, options):
@@ -1485,7 +1490,7 @@ class Options(object):
           "experimental_map_and_filter_fusion", "experimental_map_fusion",
           "experimental_map_parallelization", "experimental_map_vectorization",
           "experimental_noop_elimination",
-          "experimental_shuffle_and_repeat_fusion"
+          "experimental_shuffle_and_repeat_fusion", "experimental_numa_aware",
       ]:
         this = getattr(result, name)
         that = getattr(other, name)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
index d15dccc173..22256996d3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "experimental_noop_elimination"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_numa_aware"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_shuffle_and_repeat_fusion"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
index d15dccc173..22256996d3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "experimental_noop_elimination"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_numa_aware"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_shuffle_and_repeat_fusion"
     mtype: "<type \'property\'>"
-- 
GitLab


From 8c2a52b26f21167ed0fcec7859850e38d0c216f9 Mon Sep 17 00:00:00 2001
From: Pavel Sountsov <siege@google.com>
Date: Tue, 9 Oct 2018 11:56:25 -0700
Subject: [PATCH 0618/1085] Silence tf.distributions deprecation messages
 caused by internal global function calls.

E.g. register_kl calls would trigger such warnings. This spam was exacerbated
by the fact that it happens before logging is initialized, so it is dumped
prominently to STDERR. Worse yet it also happened no matter whether the user
imported any symbols from tf.distributions or not as the relevant code is
executed when you import TensorFlow.

PiperOrigin-RevId: 216396036
---
 tensorflow/contrib/distributions/__init__.py  | 128 +++++++++---------
 .../python/ops/distributions/distributions.py |  35 ++---
 2 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 5cec93c4df..343eae3440 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -18,69 +18,73 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
+from tensorflow.python.util import deprecation
 
-from tensorflow.contrib.distributions.python.ops import bijectors
-from tensorflow.contrib.distributions.python.ops.autoregressive import *
-from tensorflow.contrib.distributions.python.ops.batch_reshape import *
-from tensorflow.contrib.distributions.python.ops.binomial import *
-from tensorflow.contrib.distributions.python.ops.cauchy import *
-from tensorflow.contrib.distributions.python.ops.chi2 import *
-from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
-from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
-from tensorflow.contrib.distributions.python.ops.deterministic import *
-from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular
-from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse
-from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform
-from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp
-from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse
-from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
-from tensorflow.contrib.distributions.python.ops.estimator import *
-from tensorflow.contrib.distributions.python.ops.geometric import *
-from tensorflow.contrib.distributions.python.ops.half_normal import *
-from tensorflow.contrib.distributions.python.ops.independent import *
-from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
-from tensorflow.contrib.distributions.python.ops.kumaraswamy import *
-from tensorflow.contrib.distributions.python.ops.logistic import *
-from tensorflow.contrib.distributions.python.ops.mixture import *
-from tensorflow.contrib.distributions.python.ops.mixture_same_family import *
-from tensorflow.contrib.distributions.python.ops.moving_stats import *
-from tensorflow.contrib.distributions.python.ops.mvn_diag import *
-from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import *
-from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import *
-from tensorflow.contrib.distributions.python.ops.mvn_tril import *
-from tensorflow.contrib.distributions.python.ops.negative_binomial import *
-from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
-from tensorflow.contrib.distributions.python.ops.onehot_categorical import *
-from tensorflow.contrib.distributions.python.ops.poisson import *
-from tensorflow.contrib.distributions.python.ops.poisson_lognormal import *
-from tensorflow.contrib.distributions.python.ops.quantized_distribution import *
-from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import *
-from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import *
-from tensorflow.contrib.distributions.python.ops.sample_stats import *
-from tensorflow.contrib.distributions.python.ops.seed_stream import *
-from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import *
-from tensorflow.contrib.distributions.python.ops.test_util import *
-from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import *
-from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import *
-from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import *
-from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import *
-from tensorflow.contrib.distributions.python.ops.wishart import *
-from tensorflow.python.ops.distributions.bernoulli import *
-from tensorflow.python.ops.distributions.beta import *
-from tensorflow.python.ops.distributions.categorical import *
-from tensorflow.python.ops.distributions.dirichlet import *
-from tensorflow.python.ops.distributions.dirichlet_multinomial import *
-from tensorflow.python.ops.distributions.distribution import *
-from tensorflow.python.ops.distributions.exponential import *
-from tensorflow.python.ops.distributions.gamma import *
-from tensorflow.python.ops.distributions.kullback_leibler import *
-from tensorflow.python.ops.distributions.laplace import *
-from tensorflow.python.ops.distributions.multinomial import *
-from tensorflow.python.ops.distributions.normal import *
-from tensorflow.python.ops.distributions.student_t import *
-from tensorflow.python.ops.distributions.transformed_distribution import *
-from tensorflow.python.ops.distributions.uniform import *
+
+# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member,g-import-not-at-top
+
+with deprecation.silence():
+  from tensorflow.contrib.distributions.python.ops import bijectors
+  from tensorflow.contrib.distributions.python.ops.autoregressive import *
+  from tensorflow.contrib.distributions.python.ops.batch_reshape import *
+  from tensorflow.contrib.distributions.python.ops.binomial import *
+  from tensorflow.contrib.distributions.python.ops.cauchy import *
+  from tensorflow.contrib.distributions.python.ops.chi2 import *
+  from tensorflow.contrib.distributions.python.ops.conditional_distribution import *
+  from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import *
+  from tensorflow.contrib.distributions.python.ops.deterministic import *
+  from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular
+  from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse
+  from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform
+  from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp
+  from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse
+  from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag
+  from tensorflow.contrib.distributions.python.ops.estimator import *
+  from tensorflow.contrib.distributions.python.ops.geometric import *
+  from tensorflow.contrib.distributions.python.ops.half_normal import *
+  from tensorflow.contrib.distributions.python.ops.independent import *
+  from tensorflow.contrib.distributions.python.ops.inverse_gamma import *
+  from tensorflow.contrib.distributions.python.ops.kumaraswamy import *
+  from tensorflow.contrib.distributions.python.ops.logistic import *
+  from tensorflow.contrib.distributions.python.ops.mixture import *
+  from tensorflow.contrib.distributions.python.ops.mixture_same_family import *
+  from tensorflow.contrib.distributions.python.ops.moving_stats import *
+  from tensorflow.contrib.distributions.python.ops.mvn_diag import *
+  from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import *
+  from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import *
+  from tensorflow.contrib.distributions.python.ops.mvn_tril import *
+  from tensorflow.contrib.distributions.python.ops.negative_binomial import *
+  from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
+  from tensorflow.contrib.distributions.python.ops.onehot_categorical import *
+  from tensorflow.contrib.distributions.python.ops.poisson import *
+  from tensorflow.contrib.distributions.python.ops.poisson_lognormal import *
+  from tensorflow.contrib.distributions.python.ops.quantized_distribution import *
+  from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import *
+  from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import *
+  from tensorflow.contrib.distributions.python.ops.sample_stats import *
+  from tensorflow.contrib.distributions.python.ops.seed_stream import *
+  from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import *
+  from tensorflow.contrib.distributions.python.ops.test_util import *
+  from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import *
+  from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import *
+  from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import *
+  from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import *
+  from tensorflow.contrib.distributions.python.ops.wishart import *
+  from tensorflow.python.ops.distributions.bernoulli import *
+  from tensorflow.python.ops.distributions.beta import *
+  from tensorflow.python.ops.distributions.categorical import *
+  from tensorflow.python.ops.distributions.dirichlet import *
+  from tensorflow.python.ops.distributions.dirichlet_multinomial import *
+  from tensorflow.python.ops.distributions.distribution import *
+  from tensorflow.python.ops.distributions.exponential import *
+  from tensorflow.python.ops.distributions.gamma import *
+  from tensorflow.python.ops.distributions.kullback_leibler import *
+  from tensorflow.python.ops.distributions.laplace import *
+  from tensorflow.python.ops.distributions.multinomial import *
+  from tensorflow.python.ops.distributions.normal import *
+  from tensorflow.python.ops.distributions.student_t import *
+  from tensorflow.python.ops.distributions.transformed_distribution import *
+  from tensorflow.python.ops.distributions.uniform import *
 
 # pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member
 
diff --git a/tensorflow/python/ops/distributions/distributions.py b/tensorflow/python/ops/distributions/distributions.py
index 59ed455e43..b18caa5b2e 100644
--- a/tensorflow/python/ops/distributions/distributions.py
+++ b/tensorflow/python/ops/distributions/distributions.py
@@ -17,21 +17,24 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.util import deprecation
 
-# pylint: disable=wildcard-import,unused-import
-from tensorflow.python.ops.distributions.bernoulli import Bernoulli
-from tensorflow.python.ops.distributions.beta import Beta
-from tensorflow.python.ops.distributions.categorical import Categorical
-from tensorflow.python.ops.distributions.dirichlet import Dirichlet
-from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial
-from tensorflow.python.ops.distributions.distribution import *
-from tensorflow.python.ops.distributions.exponential import Exponential
-from tensorflow.python.ops.distributions.gamma import Gamma
-from tensorflow.python.ops.distributions.kullback_leibler import *
-from tensorflow.python.ops.distributions.laplace import Laplace
-from tensorflow.python.ops.distributions.multinomial import Multinomial
-from tensorflow.python.ops.distributions.normal import Normal
-from tensorflow.python.ops.distributions.student_t import StudentT
-from tensorflow.python.ops.distributions.uniform import Uniform
-# pylint: enable=wildcard-import,unused-import
 
+# pylint: disable=wildcard-import,unused-import,g-import-not-at-top
+with deprecation.silence():
+  from tensorflow.python.ops.distributions.bernoulli import Bernoulli
+  from tensorflow.python.ops.distributions.beta import Beta
+  from tensorflow.python.ops.distributions.categorical import Categorical
+  from tensorflow.python.ops.distributions.dirichlet import Dirichlet
+  from tensorflow.python.ops.distributions.dirichlet_multinomial import DirichletMultinomial
+  from tensorflow.python.ops.distributions.distribution import *
+  from tensorflow.python.ops.distributions.exponential import Exponential
+  from tensorflow.python.ops.distributions.gamma import Gamma
+  from tensorflow.python.ops.distributions.kullback_leibler import *
+  from tensorflow.python.ops.distributions.laplace import Laplace
+  from tensorflow.python.ops.distributions.multinomial import Multinomial
+  from tensorflow.python.ops.distributions.normal import Normal
+  from tensorflow.python.ops.distributions.student_t import StudentT
+  from tensorflow.python.ops.distributions.uniform import Uniform
+# pylint: enable=wildcard-import,unused-import
+del deprecation
-- 
GitLab


From 0c6baae5af46bb22ea52db724e2194845d3bbf8c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 12:24:05 -0700
Subject: [PATCH 0619/1085] Add RaggedTensors to tf.core. Moving the
 RaggedGather op kernel.

PiperOrigin-RevId: 216400726
---
 tensorflow/core/BUILD                         |  15 +
 .../base_api/api_def_RaggedGather.pbtxt       |  81 +++++
 tensorflow/core/kernels/BUILD                 |  31 ++
 tensorflow/core/kernels/ragged_gather_op.cc   | 292 ++++++++++++++++++
 .../core/kernels/ragged_gather_op_test.cc     | 281 +++++++++++++++++
 tensorflow/core/ops/ragged_array_ops.cc       |  85 +++++
 6 files changed, 785 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
 create mode 100644 tensorflow/core/kernels/ragged_gather_op.cc
 create mode 100644 tensorflow/core/kernels/ragged_gather_op_test.cc
 create mode 100644 tensorflow/core/ops/ragged_array_ops.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index acea8e2217..9e7806342a 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1154,6 +1154,19 @@ tf_gen_op_libs(
     ],
 )
 
+cc_library(
+    name = "ragged_ops",
+    deps = [
+        ":ragged_array_ops_op_lib",
+    ],
+)
+
+tf_gen_op_libs(
+    op_lib_names = [
+        "ragged_array_ops",
+    ],
+)
+
 cc_library(
     name = "ops",
     visibility = ["//visibility:public"],
@@ -1187,6 +1200,7 @@ cc_library(
         ":nn_ops_op_lib",
         ":no_op_op_lib",
         ":parsing_ops_op_lib",
+        ":ragged_ops",
         ":random_ops_op_lib",
         ":remote_fused_graph_ops_op_lib",
         ":resource_variable_ops_op_lib",
@@ -1340,6 +1354,7 @@ cc_library(
         "//tensorflow/core/kernels:parameterized_truncated_normal_op",
         "//tensorflow/core/kernels:parsing",
         "//tensorflow/core/kernels:partitioned_function_ops",
+        "//tensorflow/core/kernels:ragged_ops",
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:random_poisson_op",
         "//tensorflow/core/kernels:remote_fused_graph_ops",
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
new file mode 100644
index 0000000000..240c987dda
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
@@ -0,0 +1,81 @@
+op {
+  graph_op_name: "RaggedGather"
+  visibility: HIDDEN
+  in_arg {
+    name: "params_nested_splits"
+    description: <<END
+The `nested_row_splits` tensors that define the row-partitioning for the
+`params` RaggedTensor input.
+END
+  }
+  in_arg {
+    name: "params_dense_values"
+    description: <<END
+The `inner_values` for the `params` RaggedTensor. There was a terminology change
+at the python level from dense_values to inner_values, so dense_values is the
+deprecated name.
+END
+  }
+  in_arg {
+    name: "indices"
+    description: <<END
+Indices in the outermost dimension of `params` of the values that should be
+gathered.
+END
+  }
+  out_arg {
+    name: "output_nested_splits"
+    description: <<END
+The `nested_row_splits` tensors that define the row-partitioning for the
+returned RaggedTensor.
+END
+  }
+  out_arg {
+    name: "output_dense_values"
+    description: "The `inner_values` for the returned RaggedTensor."
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    description: <<END
+The ragged rank of the `params` RaggedTensor. `params_nested_splits` should
+contain this number of `row_splits` tensors. This value should equal
+`params.ragged_rank`.
+END
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    description: <<END
+The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
+this number of `row_splits` tensors. This value should equal
+`indices.shape.ndims + params.ragged_rank - 1`.
+END
+  }
+  summary: <<END
+Gather ragged slices from `params` axis `0` according to `indices`.
+END
+  description: <<END
+Outputs a `RaggedTensor` output composed from `output_dense_values` and
+`output_nested_splits`, such that:
+
+```python
+output.shape = indices.shape + params.shape[1:]
+output.ragged_rank = indices.shape.ndims + params.ragged_rank
+output[i...j, d0...dn] = params[indices[i...j], d0...dn]
+```
+
+where
+
+* `params =
+   ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
+   provides the values that should be gathered.
+* `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
+   values should be gathered.
+* `output =
+   ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
+   is the output tensor.
+
+(Note: This c++ op is used to implement the higher-level python
+`tf.ragged.gather` op, which also supports ragged indices.)
+
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 3a920f26f3..1ca9c7b7f5 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -958,6 +958,37 @@ tf_kernel_library(
     ]) + ARRAY_DEPS,
 )
 
+cc_library(
+    name = "ragged_ops",
+    deps = [
+        ":ragged_gather_op",
+    ],
+)
+
+tf_kernel_library(
+    name = "ragged_gather_op",
+    srcs = ["ragged_gather_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_array_ops_op_lib",
+    ],
+)
+
+tf_cc_test(
+    name = "ragged_gather_op_test",
+    size = "small",
+    srcs = ["ragged_gather_op_test.cc"],
+    deps = [
+        ":ragged_gather_op",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_array_ops_op_lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+    ],
+)
+
 tf_kernel_library(
     name = "cudnn_rnn_kernels",
     srcs = ["cudnn_rnn_ops.cc"],
diff --git a/tensorflow/core/kernels/ragged_gather_op.cc b/tensorflow/core/kernels/ragged_gather_op.cc
new file mode 100644
index 0000000000..b2a342f637
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_gather_op.cc
@@ -0,0 +1,292 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/util/util.h"
+
+namespace tensorflow {
+
+namespace {
+
+// For each slice in `(start, limit)` in `value_slices`, append
+// `params_dense_values_in[start:limit] to `values_out`.  `value_size` indicates
+// the number of scalars contained in each value params_dense_values_in[i].
+template <typename VALUE_TYPE>
+void WriteValueSlices(const Tensor& params_dense_values_in,
+                      const std::vector<std::pair<int64, int64>>& value_slices,
+                      int64 value_size, Tensor* values_out) {
+  const auto& params_dense_values =
+      params_dense_values_in.flat_outer_dims<VALUE_TYPE, 2>();
+  auto values = values_out->flat_outer_dims<VALUE_TYPE, 2>();
+  int out_pos = 0;
+  for (const auto& slice : value_slices) {
+    for (int i = slice.first; i < slice.second; ++i) {
+      for (int j = 0; j < value_size; ++j) {
+        values(out_pos, j) = params_dense_values(i, j);
+      }
+      ++out_pos;
+    }
+  }
+}
+
+}  // namespace
+
+template <typename INDEX_TYPE>
+class RaggedGatherOpBase : public OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(OpKernelContext* context) override {
+    // Get the input Tensors.
+    OpInputList params_nested_splits_in;
+    OP_REQUIRES_OK(context, context->input_list("params_nested_splits",
+                                                &params_nested_splits_in));
+    const Tensor& params_dense_values_in =
+        context->input(params_nested_splits_in.size());
+    const Tensor& indices_in =
+        context->input(params_nested_splits_in.size() + 1);
+
+    DCHECK_GT(params_nested_splits_in.size(), 0);  // Enforced by REGISTER_OP.
+    int64 num_params = params_nested_splits_in[0].dim_size(0) - 1;
+    OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params));
+
+    OP_REQUIRES(context, params_dense_values_in.dims() > 0,
+                errors::InvalidArgument("params.rank must be nonzero"));
+    int64 num_params_dense_values = params_dense_values_in.dim_size(0);
+
+    // Calculate the `splits`, and store the value slices that we need to
+    // copy in `value_slices`.
+    std::vector<std::pair<int64, int64>> value_slices;
+    int64 num_values = 0;
+    std::vector<std::vector<int64>> out_splits;
+    OP_REQUIRES_OK(context, MakeSplits(indices_in, params_nested_splits_in,
+                                       num_params_dense_values, &out_splits,
+                                       &value_slices, &num_values));
+
+    // Write the output tensors.
+    OP_REQUIRES_OK(context, WriteSplits(out_splits, context));
+    OP_REQUIRES_OK(context,
+                   WriteValues(params_dense_values_in, value_slices,
+                               out_splits.size(), num_values, context));
+  }
+
+ private:
+  // Check if any indices are out-of-bounds.
+  ::tensorflow::Status ValidateIndices(const Tensor& indices_in,
+                                       int64 num_params) {
+    const auto& indices = indices_in.flat<INDEX_TYPE>();
+    for (int64 i = 0; i < indices.size(); ++i) {
+      int64 index = indices(i);
+      if (index < 0 || index >= num_params) {
+        return errors::InvalidArgument(
+            "indices", SliceDebugString(indices_in.shape(), i), " = ", index,
+            " is not in [0, ", num_params, ")");
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  // Construct the `splits` output tensors, encoded using a nested vector.
+  // Also find the slices of values that need to be copied, and store them
+  // in `value_slices`.  The total number of values that will be copied (which
+  // we need for allocating the output values tensor) is stored in `num_values`.
+  ::tensorflow::Status MakeSplits(
+      const Tensor& indices_in, const OpInputList& params_nested_splits_in,
+      int64 num_params_dense_values,
+      std::vector<std::vector<int64>>* out_splits,
+      std::vector<std::pair<int64, int64>>* value_slices, int64* num_values) {
+    *num_values = 0;
+    value_slices->clear();
+
+    int num_splits = indices_in.dims() - 1 + params_nested_splits_in.size();
+    out_splits->assign(num_splits, {0});
+
+    // Get Eigen tensors.
+    const auto& indices = indices_in.flat<INDEX_TYPE>();
+    std::vector<TTypes<int64>::ConstFlat> params_nested_splits;
+    params_nested_splits.reserve(params_nested_splits_in.size());
+    for (const auto& splits_in : params_nested_splits_in) {
+      params_nested_splits.push_back(splits_in.flat<int64>());
+    }
+
+    TF_RETURN_IF_ERROR(
+        ValidateSplits(params_nested_splits, num_params_dense_values));
+
+    // Add `splits` that come from all but the last dimension of the dense
+    // Tensor `indices`.  In particular, for each dimension D, we add a
+    // splits tensor whose values are:
+    //   range(splits.shape[D]*splits.shape[D+1] + 1, step=splits.shape[D+1])
+    // E.g., if indices.shape=[5, 3] then we will add a splits tensor
+    // [0, 3, 6, 9, 12, 15], since the outermost dimension has 5 elements,
+    // each of which contains 3 values.
+    for (int dim = 0; dim < indices_in.dims() - 1; ++dim) {
+      int stride = indices_in.dim_size(dim + 1);
+      int index = stride;
+      for (int i = 0; i < indices_in.dim_size(dim); ++i) {
+        out_splits->at(dim).push_back(index);
+        index += stride;
+      }
+    }
+
+    // Add `splits` that come from `params_nested_splits`.  Starting with the
+    // outermost ragged dimension (i.e., the first `splits` tensor), we work
+    // our way in, finding the range of values that should be copied.  As we
+    // go, we update the output `splits` for each dimension with the appropriate
+    // values.  In particular, the *lengths* of the slices from `param_splits`
+    // should be copied to generate corresponding slice lengths in the output
+    // splits.  E.g., if we are copying a ragged row with length 4, then we
+    // should add a new split point to out_splits that is 4 greater than the
+    // previous split point in out_splits.
+    for (int i = 0; i < indices.size(); ++i) {
+      int start = indices(i);
+      int limit = indices(i) + 1;
+
+      // Copy splits.
+      for (int dim = 0; dim < params_nested_splits.size(); ++dim) {
+        const auto& splits = params_nested_splits[dim];
+        int out_dim = dim + indices_in.dims() - 1;
+        if (out_dim >= 0) {
+          int64 delta = out_splits->at(out_dim).back() - splits(start);
+          for (int j = start; j < limit; ++j) {
+            out_splits->at(out_dim).push_back(splits(j + 1) + delta);
+          }
+        }
+        start = splits(start);
+        limit = splits(limit);
+      }
+      if (limit != start) {
+        value_slices->emplace_back(start, limit);
+        *num_values += limit - start;
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status ValidateSplits(
+      const std::vector<TTypes<int64>::ConstFlat>& params_nested_splits,
+      int64 num_params_dense_values) {
+    // Validate
+    for (int dim = 0; dim < params_nested_splits.size(); ++dim) {
+      const auto& splits = params_nested_splits[dim];
+      int64 last_split = (dim == params_nested_splits.size() - 1)
+                             ? num_params_dense_values
+                             : params_nested_splits[dim + 1].size();
+      if (splits.size() == 0) {
+        return errors::InvalidArgument("Ragged splits may not be empty");
+      }
+      if (splits(0) < 0) {
+        return errors::InvalidArgument("Ragged splits must be non-negative");
+      }
+      if (splits(splits.size() - 1) > last_split) {
+        return errors::InvalidArgument(
+            "Ragged splits must not point past values");
+      }
+      for (int i = 1; i < splits.size(); ++i) {
+        if (splits(i - 1) > splits(i)) {
+          return errors::InvalidArgument("Ragged splits must be sorted");
+        }
+      }
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status WriteSplits(
+      const std::vector<std::vector<int64>>& out_splits,
+      OpKernelContext* context) {
+    OpOutputList splits_out;
+    TF_RETURN_IF_ERROR(
+        context->output_list("output_nested_splits", &splits_out));
+    for (int i = 0; i < out_splits.size(); ++i) {
+      Tensor* splits;
+      int64 num_splits = out_splits[i].size();
+      TF_RETURN_IF_ERROR(
+          splits_out.allocate(i, TensorShape({num_splits}), &splits));
+      auto splits_flat = splits->flat<int64>();
+      std::copy_n(out_splits[i].data(), out_splits[i].size(),
+                  splits_flat.data());
+    }
+    return ::tensorflow::Status::OK();
+  }
+
+  ::tensorflow::Status WriteValues(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int values_index, int64 num_values, OpKernelContext* context) const {
+    Tensor* values_out = nullptr;
+    TensorShape values_shape = params_dense_values_in.shape();
+    values_shape.set_dim(0, num_values);
+    TF_RETURN_IF_ERROR(
+        context->allocate_output(values_index, values_shape, &values_out));
+    int64 value_size = params_dense_values_in.NumElements() /
+                       params_dense_values_in.dim_size(0);
+    CallWriteValueSlices(params_dense_values_in, value_slices, value_size,
+                         values_out);
+    return ::tensorflow::Status::OK();
+  }
+
+ protected:
+  // Call WriteValueSlices() using the appropriate VALUE_TYPE template
+  // parameter.  This pattern is used to reduce binary size.  In particular,
+  // this allows us to have two instantiations of this class (one for each
+  // index type), rather than 14 (one for each index type and value type),
+  // which cuts the binary size of this op from ~300k to <90k.
+  virtual void CallWriteValueSlices(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int64 value_size, Tensor* values_out) const = 0;
+};
+
+template <typename INDEX_TYPE, typename VALUE_TYPE>
+class RaggedGatherOp : public RaggedGatherOpBase<INDEX_TYPE> {
+ public:
+  using RaggedGatherOpBase<INDEX_TYPE>::RaggedGatherOpBase;
+
+ private:
+  void CallWriteValueSlices(
+      const Tensor& params_dense_values_in,
+      const std::vector<std::pair<int64, int64>>& value_slices,
+      int64 value_size, Tensor* values_out) const override {
+    WriteValueSlices<VALUE_TYPE>(params_dense_values_in, value_slices,
+                                 value_size, values_out);
+  }
+};
+
+#define REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(index_type, value_type)   \
+  REGISTER_KERNEL_BUILDER(Name("RaggedGather")                        \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<index_type>("Tindices") \
+                              .TypeConstraint<value_type>("Tvalues"), \
+                          RaggedGatherOp<index_type, value_type>);
+#define REGISTER_CPU_KERNEL(value_type)                  \
+  REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int32, value_type) \
+  REGISTER_CPU_KERNEL_WITH_INDEX_TYPE(int64, value_type)
+TF_CALL_POD_TYPES(REGISTER_CPU_KERNEL);
+TF_CALL_string(REGISTER_CPU_KERNEL);
+TF_CALL_QUANTIZED_TYPES(REGISTER_CPU_KERNEL);
+TF_CALL_quint16(REGISTER_CPU_KERNEL);
+TF_CALL_qint16(REGISTER_CPU_KERNEL);
+TF_CALL_uint32(REGISTER_CPU_KERNEL);
+TF_CALL_uint64(REGISTER_CPU_KERNEL);
+#undef REGISTER_CPU_KERNEL
+#undef REGISTER_CPU_KERNEL_WITH_INDEX_TYPE
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/ragged_gather_op_test.cc b/tensorflow/core/kernels/ragged_gather_op_test.cc
new file mode 100644
index 0000000000..47be788151
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_gather_op_test.cc
@@ -0,0 +1,281 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class RaggedGatherOpTest : public ::tensorflow::OpsTestBase {
+ protected:
+  // Builds the tensorflow test graph for RaggedGather.
+  template <typename VALUE_TYPE, typename INDEX_TYPE>
+  void BuildRaggedGatherGraph(
+      const TensorShape& indices_shape, const std::vector<INDEX_TYPE>& indices,
+      const std::vector<std::vector<int64>>& params_nested_splits,
+      const TensorShape& params_dense_values_shape,
+      const gtl::ArraySlice<VALUE_TYPE> params_dense_values) {
+    const auto& value_dtype = DataTypeToEnum<VALUE_TYPE>::v();
+    const auto& index_dtype = DataTypeToEnum<INDEX_TYPE>::v();
+    int64 PARAMS_RAGGED_RANK = params_nested_splits.size();
+    int64 num_splits = PARAMS_RAGGED_RANK + indices_shape.dims() - 1;
+    TF_ASSERT_OK(
+        NodeDefBuilder("tested_op", "RaggedGather")
+            .Input(FakeInput(PARAMS_RAGGED_RANK))  // params_nested_splits
+            .Input(FakeInput(value_dtype))         // params_dense_values
+            .Input(FakeInput(index_dtype))         // indices
+            .Attr("PARAMS_RAGGED_RANK", PARAMS_RAGGED_RANK)
+            .Attr("OUTPUT_RAGGED_RANK", num_splits)
+            .Attr("Tvalues", value_dtype)
+            .Attr("Tindices", index_dtype)
+            .Finalize(node_def()));
+    TF_ASSERT_OK(InitOp());
+    for (const auto& splits : params_nested_splits) {
+      int64 splits_size = splits.size();
+      AddInputFromArray<int64>(TensorShape({splits_size}), splits);
+    }
+    AddInputFromArray<VALUE_TYPE>(params_dense_values_shape,
+                                  params_dense_values);
+    AddInputFromArray<INDEX_TYPE>(indices_shape, indices);
+  }
+};
+
+TEST_F(RaggedGatherOpTest, RaggedGather) {
+  // indices = [2, 1, 0, 3]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  // params.shape = [4, None]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({4}),                     // indices.shape
+      {2, 1, 0, 3},                         // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[.4, .5, .6, .7], [.1, .2, .3], [], [.8, .9]]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 4, 4, 7, 9}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(1),
+      test::AsTensor<float>({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_3DParams) {
+  // indices = [2, 1, 0, 2, 3]
+  // params = [[[]], [[.1, 2], [.3]], [], [[.4, .5], [.6, .7, .8]], [[.9]]]
+  // params.shape = [5, None, None]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({5}),                             // indices.shape
+      {2, 1, 0, 2, 3},                              // indices
+      {{0, 1, 3, 3, 5, 6}, {0, 0, 2, 3, 5, 8, 9}},  // params_nested_splits
+      TensorShape({9}),                             // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}          // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[], [[.1, 2], [.3]], [[]], [], [[.4, .5], [.6, .7, .8]]]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 0, 2, 3, 3, 5}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 2, 3, 3, 5, 8}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(2), test::AsTensor<float>({.1, .2, .3, .4, .5, .6, .7, .8}),
+      0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_4DParams) {
+  // indices = [2, 1, 0, 2]
+  // params = [[[]], [[[1, 2], [3, 4], [5, 6]], [[7, 8]]], []]
+  // params.shape = [4, None, None, 2]
+  BuildRaggedGatherGraph<int32, int32>(
+      TensorShape({4}),              // indices.shape
+      {2, 1, 0, 2},                  // indices
+      {{0, 1, 3, 3}, {0, 0, 3, 4}},  // params_nested_splits
+      TensorShape({4, 2}),           // params_dense_values.shape
+      {1, 2, 3, 4, 5, 6, 7, 8}       // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[],
+  //            [[[1, 2], [3, 4], [5, 6]], [[7, 8]]],
+  //            [[]],
+  //            []]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 0, 2, 3, 3}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 3, 4, 4}));
+  test::ExpectTensorEqual<int32>(
+      *GetOutput(2),
+      test::AsTensor<int32>({1, 2, 3, 4, 5, 6, 7, 8}, TensorShape({4, 2})));
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_2DIndices) {
+  // indices = [[2, 1], [0, 3]]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2, 2}),                  // indices.shape
+      {2, 1, 0, 3},                         // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [ [ [.4, .5, .6, .7], [.1, .2, .3] ],
+  //             [ [],               [.8, .9]     ] ]
+  test::ExpectTensorEqual<int64>(*GetOutput(0),
+                                 test::AsTensor<int64>({0, 2, 4}));
+  test::ExpectTensorEqual<int64>(*GetOutput(1),
+                                 test::AsTensor<int64>({0, 4, 4, 7, 9}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(2),
+      test::AsTensor<float>({.4, .5, .6, .7, .1, .2, .3, .8, .9}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_ScalarIndices) {
+  // indices = 2
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({}),                      // indices.shape
+      {2},                                  // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [.4, .5, .6, .7]
+  test::ExpectTensorNear<float>(*GetOutput(0),
+                                test::AsTensor<float>({.4, .5, .6, .7}), 0.1);
+}
+
+TEST_F(RaggedGatherOpTest, RaggedGather_OutOfBounds) {
+  // indices = [2, 10]
+  // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]]
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {2, 10},                              // indices
+      {{0, 3, 3, 7, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("indices[1] = 10 is not in [0, 4)", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsNotSorted) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{0, 3, 5, 2, 9}},                    // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must be sorted", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsNegative) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{-1, 3, 2, 7, 9}},                   // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must be non-negative",
+            RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsEmpty) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({0}),  // indices.shape
+      {},                // indices
+      {{}},              // params_nested_splits
+      TensorShape({0}),  // params_dense_values.shape
+      {}                 // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits may not be empty", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, InvalidSplitsTooBig) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({2}),                     // indices.shape
+      {0, 2},                               // indices
+      {{0, 20, 40, 80, 100}},               // params_nested_splits
+      TensorShape({9}),                     // params_dense_values.shape
+      {.1, .2, .3, .4, .5, .6, .7, .8, .9}  // params_dense_values
+  );
+  EXPECT_EQ("Ragged splits must not point past values",
+            RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, BadValuesShape) {
+  BuildRaggedGatherGraph<float, int32>(
+      TensorShape({0}),  // indices.shape
+      {},                // indices
+      {{0}},             // params_nested_splits
+      TensorShape({}),   // params_dense_values.shape
+      {.1}               // params_dense_values
+  );
+  EXPECT_EQ("params.rank must be nonzero", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedGatherOpTest, ShapeFn) {
+  // RaggedGather(param_splits+, param_values, indices) -> [splits+, values]
+  ShapeInferenceTestOp op("RaggedGather");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(1);
+  INFER_OK(op, "?;?;?", "[?];?");
+  INFER_OK(op, "[?];[?];[?]", "[?];[?]");
+  INFER_OK(op, "[?];[?,?,?];[?]", "[?];[?,d1_1,d1_2]");
+  INFER_OK(op, "[5];[10];[15]", "[?];[?]");
+  INFER_OK(op, "[5];[10,2];[15]", "[?];[?,d1_1]");
+  INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[5];[];[]");
+  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[1,2];[];[5]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(2);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2);
+  INFER_OK(op, "?;?;?;?", "[?];[?];?");
+  INFER_OK(op, "[?];[?];[?];[?]", "[?];[?];[?]");
+  INFER_OK(op, "[?];[?];[?,?,?];[?]", "[?];[?];[?,d2_1,d2_2]");
+  INFER_OK(op, "[5];[10];[15];[20]", "[?];[?];[?]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(2);
+  INFER_OK(op, "?;?;?", "[?];[?];?");
+  INFER_OK(op, "[?];[?];[?,?]", "[?];[?];[?]");
+  INFER_OK(op, "[?];[?,?,?];[?,?]", "[?];[?];[?,d1_1,d1_2]");
+  INFER_OK(op, "[15];[20];[5,10]", "[?];[?];[?]");
+  INFER_OK(op, "[15];[20,2];[5,10]", "[?];[?];[?,d1_1]");
+
+  (*op.node_def.mutable_attr())["PARAMS_RAGGED_RANK"].set_i(1);
+  (*op.node_def.mutable_attr())["OUTPUT_RAGGED_RANK"].set_i(0);
+  INFER_OK(op, "[?];[?];[]", "[?]");
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/ragged_array_ops.cc b/tensorflow/core/ops/ragged_array_ops.cc
new file mode 100644
index 0000000000..4642579939
--- /dev/null
+++ b/tensorflow/core/ops/ragged_array_ops.cc
@@ -0,0 +1,85 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status RaggedGatherShapeFn(InferenceContext* c);
+
+//==============================================================================
+// Registered Ops
+//==============================================================================
+
+REGISTER_OP("RaggedGather")
+    .Input("params_nested_splits: PARAMS_RAGGED_RANK * int64")
+    .Input("params_dense_values: Tvalues")
+    .Input("indices: Tindices")
+    .Output("output_nested_splits: OUTPUT_RAGGED_RANK * int64")
+    .Output("output_dense_values: Tvalues")
+    .Attr("Tvalues: type")
+    .Attr("Tindices: {int32, int64}")
+    .Attr("PARAMS_RAGGED_RANK: int >= 1")
+    .Attr("OUTPUT_RAGGED_RANK: int >= 0")
+    .SetShapeFn(RaggedGatherShapeFn);
+
+//==============================================================================
+// Shape Functions
+//==============================================================================
+
+Status RaggedGatherShapeFn(InferenceContext* c) {
+  int num_splits;
+  int64 PARAMS_RAGGED_RANK;
+  TF_RETURN_IF_ERROR(
+      c->GetAttr<int64>("PARAMS_RAGGED_RANK", &PARAMS_RAGGED_RANK));
+  TF_RETURN_IF_ERROR(c->GetAttr<int>("OUTPUT_RAGGED_RANK", &num_splits));
+
+  // Check rank of `indices`.
+  ShapeHandle indices = c->input(PARAMS_RAGGED_RANK + 1);
+  TF_RETURN_IF_ERROR(
+      c->WithRank(indices, num_splits - PARAMS_RAGGED_RANK + 1, &indices));
+
+  // Check that all params_nested_splits have rank 1.
+  for (int64 i = 0; i < PARAMS_RAGGED_RANK; ++i) {
+    ShapeHandle splits = c->input(i);
+    TF_RETURN_IF_ERROR(c->WithRank(splits, 1, &splits));
+  }
+
+  // Check that `params_dense_values` has rank>=1.
+  ShapeHandle params_dense_values = c->input(PARAMS_RAGGED_RANK);
+  TF_RETURN_IF_ERROR(
+      c->WithRankAtLeast(params_dense_values, 1, &params_dense_values));
+
+  // Set the rank for the `splits` outputs.
+  for (int i = 0; i < num_splits; ++i) {
+    c->set_output(i, c->UnknownShapeOfRank(1));
+  }
+
+  // Calculate the `values` shape.
+  ShapeHandle value = c->UnknownShape();
+  ShapeHandle values = c->UnknownShape();
+  TF_RETURN_IF_ERROR(c->Subshape(params_dense_values, 1, &value));
+  TF_RETURN_IF_ERROR(c->Concatenate(c->UnknownShapeOfRank(1), value, &values));
+  c->set_output(num_splits, values);
+
+  return Status::OK();
+}
+
+}  // namespace tensorflow
-- 
GitLab


From 1e13c38980ec17d9f26c041f4b251ecb3a791a2c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 13:23:52 -0700
Subject: [PATCH 0620/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216410913
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 98 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 98 +++++++++++++++++++
 2 files changed, 196 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index dcea70dffb..cfb1055d3c 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -21858,6 +21858,54 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ExperimentalNumaMapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
@@ -43915,6 +43963,56 @@ op {
     }
   }
 }
+op {
+  name: "RaggedGather"
+  input_arg {
+    name: "params_nested_splits"
+    type: DT_INT64
+    number_attr: "PARAMS_RAGGED_RANK"
+  }
+  input_arg {
+    name: "params_dense_values"
+    type_attr: "Tvalues"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output_nested_splits"
+    type: DT_INT64
+    number_attr: "OUTPUT_RAGGED_RANK"
+  }
+  output_arg {
+    name: "output_dense_values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 93a297458f..05b97bffad 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -10365,6 +10365,54 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "ExperimentalNumaMapAndBatchDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "batch_size"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "drop_remainder"
+    type: DT_BOOL
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
@@ -22288,6 +22336,56 @@ op {
     }
   }
 }
+op {
+  name: "RaggedGather"
+  input_arg {
+    name: "params_nested_splits"
+    type: DT_INT64
+    number_attr: "PARAMS_RAGGED_RANK"
+  }
+  input_arg {
+    name: "params_dense_values"
+    type_attr: "Tvalues"
+  }
+  input_arg {
+    name: "indices"
+    type_attr: "Tindices"
+  }
+  output_arg {
+    name: "output_nested_splits"
+    type: DT_INT64
+    number_attr: "OUTPUT_RAGGED_RANK"
+  }
+  output_arg {
+    name: "output_dense_values"
+    type_attr: "Tvalues"
+  }
+  attr {
+    name: "Tvalues"
+    type: "type"
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "PARAMS_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "OUTPUT_RAGGED_RANK"
+    type: "int"
+    has_minimum: true
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
-- 
GitLab


From 9989788be25c846d087ac70b76cf78759a209a3e Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 13:31:58 -0700
Subject: [PATCH 0621/1085] Small cleanup in function_test.

PiperOrigin-RevId: 216412380
---
 tensorflow/python/framework/function_test.py | 27 ++++++--------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 87f567db0e..16d4903d79 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -1639,29 +1639,18 @@ class FunctionInlineControlTest(test.TestCase):
       self.assertEqual(MetadataHasCell(run_metadata), noinline)
 
 
-@function.Defun(*[dtypes.float32] * 3)
-def Linear(w, b, x):
-  return nn_ops.relu(math_ops.matmul(x, w) + b)
-
-
-@function.Defun(*[dtypes.float32] * 5)
-def Linear2(w1, b1, w2, b2, x):
-  return Linear(w2, b2, Linear(w1, b1, x))
-
-
-@function.Defun(*[dtypes.float32] * 3)
-def LinearWithCApi(w, b, x):
-  return nn_ops.relu(math_ops.matmul(x, w) + b)
-
+class ModuleFunctionTest(test.TestCase):
 
-@function.Defun(*[dtypes.float32] * 5)
-def Linear2WithCApi(w1, b1, w2, b2, x):
-  return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x))
+  def testBasic(self):
 
+    @function.Defun(*[dtypes.float32] * 3)
+    def LinearWithCApi(w, b, x):
+      return nn_ops.relu(math_ops.matmul(x, w) + b)
 
-class ModuleFunctionTest(test.TestCase):
+    @function.Defun(*[dtypes.float32] * 5)
+    def Linear2WithCApi(w1, b1, w2, b2, x):
+      return LinearWithCApi(w2, b2, LinearWithCApi(w1, b1, x))
 
-  def testBasic(self):
     with ops.Graph().as_default():
       a, b, c, d, e = [
           constant_op.constant([[_]], dtype=dtypes.float32) for _ in range(5)
-- 
GitLab


From 761298537adab7196d4f24fa07384f4cd6ffae91 Mon Sep 17 00:00:00 2001
From: Scott Leishman <scott.leishman@intel.com>
Date: Tue, 9 Oct 2018 20:39:01 +0000
Subject: [PATCH 0622/1085] Ensure all bazel options are incorporated during
 Intel mkl builds.

---
 tensorflow/tools/docker/Dockerfile.devel-mkl         | 4 +++-
 tensorflow/tools/docker/Dockerfile.devel-mkl-horovod | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index e433e9ebb2..e664b6066b 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -115,6 +115,7 @@ RUN export TAG_PREFIX="v" && \
     fi
 
 RUN yes "" | ${PYTHON} configure.py
+RUN cp .bazelrc /root/.bazelrc
 
 ENV CI_BUILD_PYTHON ${PYTHON}
 
@@ -125,7 +126,8 @@ ENV CI_BUILD_PYTHON ${PYTHON}
 # --copt=-march="avx" \
 # For haswell, broadwell, or skylake
 # --copt=-march="avx2" \
-COPY .bazelrc /root/.bazelrc
+COPY .bazelrc /root/.mkl.bazelrc
+RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc
 
 RUN tensorflow/tools/ci_build/builds/configured CPU \
     bazel --bazelrc=/root/.bazelrc build -c opt \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 48f2400569..136c775d6c 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -106,6 +106,7 @@ RUN export TAG_PREFIX="v" && \
     fi
 
 RUN yes "" | ${PYTHON} configure.py
+RUN cp .bazelrc /root/.bazelrc
 
 ENV CI_BUILD_PYTHON ${PYTHON}
 
@@ -116,7 +117,8 @@ ENV CI_BUILD_PYTHON ${PYTHON}
 # --copt=-march="avx" \
 # For haswell, broadwell, or skylake
 # --copt=-march="avx2" \
-COPY .bazelrc /root/.bazelrc
+COPY .bazelrc /root/.mkl.bazelrc
+RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc
 
 RUN tensorflow/tools/ci_build/builds/configured CPU \
     bazel --bazelrc=/root/.bazelrc build -c opt \
-- 
GitLab


From 5d9a7fdf4f02c2db487a03e7ad2d520f8847c4e3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 13:32:24 -0700
Subject: [PATCH 0623/1085] [XLA:GPU] Add an implementation of scatter for GPU

This simple has a kernel that runs on every element of the updates tensor,
figure out the right indices to perform the update, and applies it with an
atomic operation.

Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient.
Also TuplePointsToAnalysis doesn't know that it should alias the operand and
output buffers of a scatter, which would avoid a copy.

PiperOrigin-RevId: 216412467
---
 tensorflow/compiler/xla/service/gpu/BUILD     |   1 -
 .../xla/service/gpu/ir_emitter_unnested.cc    | 141 ++++++++++++++++++
 .../xla/service/gpu/ir_emitter_unnested.h     |   1 +
 .../xla/service/gpu/nvptx_compiler.cc         |   3 -
 .../compiler/xla/service/layout_assignment.cc |   2 +-
 5 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 350fd32537..0144d59097 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -705,7 +705,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:llvm_compiler",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
-        "//tensorflow/compiler/xla/service:scatter_expander",
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index c792dd2ddb..bef7a55301 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1958,6 +1958,147 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) {
   return Status::OK();
 }
 
+Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
+  const HloInstruction* operand = scatter->operand(0);
+  const HloInstruction* scatter_indices = scatter->operand(1);
+  const HloInstruction* updates = scatter->operand(2);
+  const ScatterDimensionNumbers& dim_numbers =
+      scatter->scatter_dimension_numbers();
+  CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape()));
+
+  std::vector<std::unique_ptr<Thunk>> thunks;
+
+  // Copy the operand into the output if it's not the same buffer already.
+  auto operand_buffer = GetAllocationSlice(*operand);
+  auto destination_buffer = GetAllocationSlice(*scatter);
+  if (operand_buffer != destination_buffer) {
+    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+        /*source_address=*/operand_buffer,
+        /*destination_buffer=*/destination_buffer,
+        /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter));
+  }
+
+  auto loop_body_emitter = [&](const IrArray::Index& index) -> Status {
+    std::vector<llvm::Value*> raw_window_multidim;
+    std::vector<llvm::Value*> input_scatter_multidim;
+    std::vector<int64> raw_window_bounds;
+
+    // Partition the index into window indices and scatter indices.
+    for (int64 i = 0, e = index.size(); i != e; ++i) {
+      // For window indices also remember the window size, this comes in handy
+      // later.
+      if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) {
+        raw_window_multidim.push_back(index[i]);
+        raw_window_bounds.push_back(updates->shape().dimensions(i));
+      } else {
+        input_scatter_multidim.push_back(index[i]);
+      }
+    }
+    DCHECK_EQ(raw_window_multidim.size(),
+              dim_numbers.update_window_dims_size());
+
+    // Apply inserted_window_dims to the window dimensions.
+    int64 raw_window_multidim_idx = 0;
+    std::vector<llvm::Value*> input_window_multidim;
+    std::vector<int64> input_window_bounds;
+    for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) {
+      if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) {
+        input_window_bounds.push_back(1);  // Trivial dimension.
+        input_window_multidim.push_back(index.GetConstantWithIndexType(0));
+      } else {
+        input_window_bounds.push_back(
+            raw_window_bounds[raw_window_multidim_idx]);
+        input_window_multidim.push_back(
+            raw_window_multidim[raw_window_multidim_idx]);
+        ++raw_window_multidim_idx;
+      }
+    }
+    DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape()));
+
+    // Insert a 1 dimension at the end if index_vector_dim requests one.
+    Shape scatter_indices_shape = scatter_indices->shape();
+    if (dim_numbers.index_vector_dim() ==
+        ShapeUtil::Rank(scatter_indices_shape)) {
+      scatter_indices_shape.add_dimensions(1);
+      scatter_indices_shape.mutable_layout()->add_minor_to_major(
+          dim_numbers.index_vector_dim());
+    }
+    llvm_ir::IrArray scatter_indices_reshaped =
+        GetIrArray(*scatter_indices, *scatter)
+            .CastToShape(scatter_indices_shape, &b_);
+
+    // Now load the indices corresponding to the current window from
+    // scatter_indices.
+    llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim,
+                                                    index.GetType());
+    raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr);
+    llvm::Value* is_in_bounds = b_.getTrue();
+    for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size();
+         i != e; ++i) {
+      // Our index is stored along index_vector_dim, insert that into the lookup
+      // index into scatter_indices.
+      raw_scatter_index_index[dim_numbers.index_vector_dim()] =
+          raw_scatter_index_index.GetConstantWithIndexType(i);
+
+      int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i);
+      llvm::Value* loaded_scatter_index =
+          scatter_indices_reshaped.EmitReadArrayElement(raw_scatter_index_index,
+                                                        &b_, "scatter_index");
+      // And add the index to our window index. This yields the output index.
+      llvm::Value* dim_offset =
+          Add(input_window_multidim[operand_dim],
+              IntCast(loaded_scatter_index, index.GetType(),
+                      /*isSigned=*/true));
+      input_window_multidim[operand_dim] = dim_offset;
+
+      // Also do the bounds check now.
+      int64 max_index = operand->shape().dimensions(operand_dim) -
+                        input_window_bounds[operand_dim] + 1;
+      // is_in_bounds = dim_offset >= 0 && dim_offset < dim_size-window_size+1
+      //   --> dim_offset u< dim_size-window_size+1
+      is_in_bounds =
+          And(is_in_bounds,
+              ICmpULT(dim_offset, index.GetConstantWithIndexType(max_index)));
+    }
+
+    llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse(
+        is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false);
+    llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_);
+    // All done, now just read from the calculated input from the window, and do
+    // an atomic store to the calculated location in the output.
+    llvm_ir::IrArray::Index input_window_index(input_window_multidim,
+                                               index.GetType());
+    llvm::Value* input_address =
+        GetIrArray(*updates, *scatter).EmitArrayElementAddress(index, &b_);
+    llvm::Value* output_address =
+        GetIrArray(*scatter, *scatter)
+            .EmitArrayElementAddress(input_window_index, &b_);
+    return EmitAtomicOperationForNestedComputation(
+        *scatter->to_apply(), output_address, input_address);
+  };
+
+  // Launch a kernel that reads every element in the updates tensor. We could
+  // also do one kernel per window instead if bounds checks turn out to be a
+  // bottleneck.
+  thunks.push_back(BuildKernelThunk(
+      scatter,
+      /*implements_whole_instruction=*/operand_buffer == destination_buffer));
+
+  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
+      updates->shape(), ir_emitter_context_->device_description());
+  UpdateLaunchDimensions(launch_dimensions,
+                         static_cast<KernelThunk*>(thunks.back().get()),
+                         ir_emitter_context_->llvm_module());
+
+  thunk_sequence_->emplace_back(
+      absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
+                             launch_dimensions, &b_)
+      .EmitLoop(IrName(scatter),
+                GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(),
+                                      &b_));
+}
+
 Status IrEmitterUnnested::HandleSelect(HloInstruction* select) {
   thunk_sequence_->push_back(
       BuildKernelThunk(select, /*implements_whole_instruction=*/true));
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index bd5db72051..2e36e7235b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter {
   Status HandleInfeed(HloInstruction* xla_infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
   Status HandleRng(HloInstruction* random) override;
+  Status HandleScatter(HloInstruction* scatter) override;
   Status HandleSelect(HloInstruction* select) override;
   Status HandleSort(HloInstruction* sort) override;
   Status HandleTupleSelect(HloInstruction* tuple_select) override;
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index ac6c2c5565..5409f65589 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -75,7 +75,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
-#include "tensorflow/compiler/xla/service/scatter_expander.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
@@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
       // elimination has to come after that pass.
       pipeline.AddPass<ZeroSizedHloElimination>();
 
-      pipeline.AddPass<ScatterExpander>();
-
       pass.AddPass<AlgebraicSimplifier>(
           /*is_layout_sensitive=*/false,
           [](const Shape&, const Shape&) { return false; });
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index ad65b147c1..2cf5fc94ac 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1908,6 +1908,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kRemainder:
     case HloOpcode::kReverse:
     case HloOpcode::kRoundNearestAfz:
+    case HloOpcode::kScatter:
     case HloOpcode::kSelect:
     case HloOpcode::kSelectAndScatter:
     case HloOpcode::kShiftLeft:
@@ -1946,7 +1947,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kReduce:
     case HloOpcode::kReshape:
     case HloOpcode::kRng:
-    case HloOpcode::kScatter:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
     case HloOpcode::kAfterAll:
-- 
GitLab


From eaaa3cebc72766dc55e5db5c8cb53fca0d1d0215 Mon Sep 17 00:00:00 2001
From: Muhammad Wildan <wildanrgssport@gmail.com>
Date: Wed, 10 Oct 2018 03:41:38 +0700
Subject: [PATCH 0624/1085] Update README.md

---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 57efb876c9..c582cf873c 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,12 @@ subscribing to
 [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
 
 ## Installation
-*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.*
+for install current release for CPU-only:
+pip install tensorflow 
+
+GPU package for CUDA-enabled GPU cards:
+pip install tensorflow-gpu
+
 
 People who are a little more adventurous can also try our nightly binaries:
 
-- 
GitLab


From 7e11278b5905fd2252e7c0ec245cde4af5c67c51 Mon Sep 17 00:00:00 2001
From: Muhammad Wildan <wildanrgssport@gmail.com>
Date: Wed, 10 Oct 2018 03:42:18 +0700
Subject: [PATCH 0625/1085] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index c582cf873c..62edc4c0b2 100644
--- a/README.md
+++ b/README.md
@@ -30,9 +30,11 @@ subscribing to
 
 ## Installation
 for install current release for CPU-only:
+
 pip install tensorflow 
 
 GPU package for CUDA-enabled GPU cards:
+
 pip install tensorflow-gpu
 
 
-- 
GitLab


From 56a14850210374491a09506b987b02038ae2b03e Mon Sep 17 00:00:00 2001
From: Muhammad Wildan <wildanrgssport@gmail.com>
Date: Wed, 10 Oct 2018 03:43:24 +0700
Subject: [PATCH 0626/1085] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 62edc4c0b2..9f3431fc4b 100644
--- a/README.md
+++ b/README.md
@@ -31,11 +31,11 @@ subscribing to
 ## Installation
 for install current release for CPU-only:
 
-pip install tensorflow 
+*pip install tensorflow*
 
 GPU package for CUDA-enabled GPU cards:
 
-pip install tensorflow-gpu
+*pip install tensorflow-gpu*
 
 
 People who are a little more adventurous can also try our nightly binaries:
-- 
GitLab


From 2499e8d4e0d960b96fe049c0f299c2d034305edf Mon Sep 17 00:00:00 2001
From: Muhammad Wildan <wildanrgssport@gmail.com>
Date: Wed, 10 Oct 2018 03:44:06 +0700
Subject: [PATCH 0627/1085] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 9f3431fc4b..34406f4ed7 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,9 @@ GPU package for CUDA-enabled GPU cards:
 *pip install tensorflow-gpu*
 
 
+
+
+
 People who are a little more adventurous can also try our nightly binaries:
 
 **Nightly pip packages**
-- 
GitLab


From 7b2f26280df8dee266d66e01a7ffac7a7eb25247 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 13:51:27 -0700
Subject: [PATCH 0628/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216416117

---
 tensorflow/go/op/wrappers.go | 728 +++++++++++++++++------------------
 1 file changed, 364 insertions(+), 364 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index eb6df2af46..f35117084a 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4396,6 +4396,172 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
+// Forwards `data` to the output port determined by `pred`.
+//
+// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+// the data goes to `output_false`.
+//
+// See also `RefSwitch` and `Merge`.
+//
+// Arguments:
+//	data: The tensor to be forwarded to the appropriate output.
+//	pred: A scalar that specifies which output port will receive data.
+//
+// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
+func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Switch",
+		Input: []tf.Input{
+			data, pred,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
+type AudioSpectrogramAttr func(optionalAttr)
+
+// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
+//
+// value: Whether to return the squared magnitude or just the
+// magnitude. Using squared magnitude can avoid extra calculations.
+// If not specified, defaults to false
+func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
+	return func(m optionalAttr) {
+		m["magnitude_squared"] = value
+	}
+}
+
+// Produces a visualization of audio data over time.
+//
+// Spectrograms are a standard way of representing audio information as a series of
+// slices of frequency information, one slice for each window of time. By joining
+// these together into a sequence, they form a distinctive fingerprint of the sound
+// over time.
+//
+// This op expects to receive audio data as an input, stored as floats in the range
+// -1 to 1, together with a window width in samples, and a stride specifying how
+// far to move the window between slices. From this it generates a three
+// dimensional output. The lowest dimension has an amplitude value for each
+// frequency during that time slice. The next dimension is time, with successive
+// frequency slices. The final dimension is for the channels in the input, so a
+// stereo audio input would have two here for example.
+//
+// This means the layout when converted and saved as an image is rotated 90 degrees
+// clockwise from a typical spectrogram. Time is descending down the Y axis, and
+// the frequency decreases from left to right.
+//
+// Each value in the result represents the square root of the sum of the real and
+// imaginary parts of an FFT on the current window of samples. In this way, the
+// lowest dimension represents the power of each frequency in the current window,
+// and adjacent windows are concatenated in the next dimension.
+//
+// To get a more intuitive and visual look at what this operation does, you can run
+// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
+// resulting spectrogram as a PNG image.
+//
+// Arguments:
+//	input: Float representation of audio data.
+//	window_size: How wide the input window is in samples. For the highest efficiency
+// this should be a power of two, but other values are accepted.
+//	stride: How widely apart the center of adjacent sample windows should be.
+//
+// Returns 3D representation of the audio frequencies as an image.
+func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSpectrogram",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
+type CTCBeamSearchDecoderAttr func(optionalAttr)
+
+// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+//
+// value: If true, merge repeated classes in output.
+// If not specified, defaults to true
+func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs beam search decoding on the logits given in input.
+//
+// A note about the attribute merge_repeated: For the beam search decoder,
+// this means that if consecutive entries in a beam are the same, only
+// the first of these is emitted.  That is, when the top path is "A B B B B",
+// "A B" is returned if merge_repeated = True but "A B B B B" is
+// returned if merge_repeated = False.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch)`.
+//	beam_width: A scalar >= 0 (beam search beam width).
+//	top_paths: A scalar >= 0, <= beam_width (controls output size).
+//
+// Returns A list (length: top_paths) of indices matrices.  Matrix j,
+// size `(total_decoded_outputs[j] x 2)`, has indices of a
+// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
+// size `(length total_decoded_outputs[j])`, has the values of a
+// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
+// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
+// sequence log-probabilities.
+func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCBeamSearchDecoder",
+		Input: []tf.Input{
+			inputs, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
+	}
+	log_probability = op.Output(idx)
+	return decoded_indices, decoded_values, decoded_shape, log_probability
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -5662,90 +5828,6 @@ func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_i
 	return op.Output(0)
 }
 
-// Computes natural logarithm of (1 + x) element-wise.
-//
-// I.e., \\(y = \log_e (1 + x)\\).
-func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Log1p",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes rectified linear 6 gradients for a Relu6 operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
-//	features: The features passed as input to the corresponding Relu6 operation, or
-// its output; using either one produces the same result.
-//
-// Returns The gradients:
-// `gradients * (features > 0) * (features < 6)`.
-func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6Grad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeBicubicAttr is an optional argument to ResizeBicubic.
-type ResizeBicubicAttr func(optionalAttr)
-
-// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// Resize `images` to `size` using bicubic interpolation.
-//
-// Input images can be of different types but output images are always float.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeBicubic",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes natural logarithm of x element-wise.
 //
 // I.e., \\(y = \log_e x\\).
@@ -5886,146 +5968,6 @@ func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
-type AudioSpectrogramAttr func(optionalAttr)
-
-// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
-//
-// value: Whether to return the squared magnitude or just the
-// magnitude. Using squared magnitude can avoid extra calculations.
-// If not specified, defaults to false
-func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
-	return func(m optionalAttr) {
-		m["magnitude_squared"] = value
-	}
-}
-
-// Produces a visualization of audio data over time.
-//
-// Spectrograms are a standard way of representing audio information as a series of
-// slices of frequency information, one slice for each window of time. By joining
-// these together into a sequence, they form a distinctive fingerprint of the sound
-// over time.
-//
-// This op expects to receive audio data as an input, stored as floats in the range
-// -1 to 1, together with a window width in samples, and a stride specifying how
-// far to move the window between slices. From this it generates a three
-// dimensional output. The lowest dimension has an amplitude value for each
-// frequency during that time slice. The next dimension is time, with successive
-// frequency slices. The final dimension is for the channels in the input, so a
-// stereo audio input would have two here for example.
-//
-// This means the layout when converted and saved as an image is rotated 90 degrees
-// clockwise from a typical spectrogram. Time is descending down the Y axis, and
-// the frequency decreases from left to right.
-//
-// Each value in the result represents the square root of the sum of the real and
-// imaginary parts of an FFT on the current window of samples. In this way, the
-// lowest dimension represents the power of each frequency in the current window,
-// and adjacent windows are concatenated in the next dimension.
-//
-// To get a more intuitive and visual look at what this operation does, you can run
-// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-// resulting spectrogram as a PNG image.
-//
-// Arguments:
-//	input: Float representation of audio data.
-//	window_size: How wide the input window is in samples. For the highest efficiency
-// this should be a power of two, but other values are accepted.
-//	stride: How widely apart the center of adjacent sample windows should be.
-//
-// Returns 3D representation of the audio frequencies as an image.
-func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSpectrogram",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
-type CTCBeamSearchDecoderAttr func(optionalAttr)
-
-// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If true, merge repeated classes in output.
-// If not specified, defaults to true
-func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs beam search decoding on the logits given in input.
-//
-// A note about the attribute merge_repeated: For the beam search decoder,
-// this means that if consecutive entries in a beam are the same, only
-// the first of these is emitted.  That is, when the top path is "A B B B B",
-// "A B" is returned if merge_repeated = True but "A B B B B" is
-// returned if merge_repeated = False.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch)`.
-//	beam_width: A scalar >= 0 (beam search beam width).
-//	top_paths: A scalar >= 0, <= beam_width (controls output size).
-//
-// Returns A list (length: top_paths) of indices matrices.  Matrix j,
-// size `(total_decoded_outputs[j] x 2)`, has indices of a
-// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
-// size `(length total_decoded_outputs[j])`, has the values of a
-// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
-// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
-// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
-// sequence log-probabilities.
-func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCBeamSearchDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	log_probability = op.Output(idx)
-	return decoded_indices, decoded_values, decoded_shape, log_probability
-}
-
 // MatrixInverseAttr is an optional argument to MatrixInverse.
 type MatrixInverseAttr func(optionalAttr)
 
@@ -9615,25 +9557,109 @@ func DecodeRawLittleEndian(value bool) DecodeRawAttr {
 
 // Reinterpret the bytes of a string as a vector of numbers.
 //
-// Arguments:
-//	bytes: All the elements must have the same length.
+// Arguments:
+//	bytes: All the elements must have the same length.
+//
+//
+// Returns A Tensor with one more dimension than the input `bytes`.  The
+// added dimension will have size equal to the length of the elements
+// of `bytes` divided by the number of bytes to represent `out_type`.
+func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeRaw",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes natural logarithm of (1 + x) element-wise.
+//
+// I.e., \\(y = \log_e (1 + x)\\).
+func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Log1p",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes rectified linear 6 gradients for a Relu6 operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
+//	features: The features passed as input to the corresponding Relu6 operation, or
+// its output; using either one produces the same result.
+//
+// Returns The gradients:
+// `gradients * (features > 0) * (features < 6)`.
+func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6Grad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeBicubicAttr is an optional argument to ResizeBicubic.
+type ResizeBicubicAttr func(optionalAttr)
+
+// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using bicubic interpolation.
+//
+// Input images can be of different types but output images are always float.
 //
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns A Tensor with one more dimension than the input `bytes`.  The
-// added dimension will have size equal to the length of the elements
-// of `bytes` divided by the number of bytes to represent `out_type`.
-func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"out_type": out_type}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeRaw",
+		Type: "ResizeBicubic",
 		Input: []tf.Input{
-			bytes,
+			images, size,
 		},
 		Attrs: attrs,
 	}
@@ -9641,6 +9667,52 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...
 	return op.Output(0)
 }
 
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+//
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV2",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // RandomShuffleAttr is an optional argument to RandomShuffle.
 type RandomShuffleAttr func(optionalAttr)
 
@@ -19332,65 +19404,6 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_
 	return op.Output(0)
 }
 
-// Computes the sum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \sum_j data_j\\) where sum is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentSum",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the lines of one or more text files.
-//
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar containing the number of bytes to buffer.
-func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TextLineDataset",
-		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns the set of files matching one or more glob patterns.
 //
 // Note that this routine only supports wildcard characters in the
@@ -21888,6 +21901,65 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Creates a dataset that emits the lines of one or more text files.
+//
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar containing the number of bytes to buffer.
+func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TextLineDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \sum_j data_j\\) where sum is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentSum.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentSum",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes the mean along segments of a tensor.
 //
 // Read
@@ -27977,52 +28049,6 @@ func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr)
 	return op.Output(0)
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-//
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV2",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Greedily selects a subset of bounding boxes in descending order of score,
 //
 // pruning away boxes that have high intersection-over-union (IOU) overlap
@@ -33131,29 +33157,3 @@ func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output,
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
-
-// Forwards `data` to the output port determined by `pred`.
-//
-// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-// the data goes to `output_false`.
-//
-// See also `RefSwitch` and `Merge`.
-//
-// Arguments:
-//	data: The tensor to be forwarded to the appropriate output.
-//	pred: A scalar that specifies which output port will receive data.
-//
-// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
-func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Switch",
-		Input: []tf.Input{
-			data, pred,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-- 
GitLab


From 1f556d3a4172c30cf461e7e66334b70ffad2d559 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 14:03:23 -0700
Subject: [PATCH 0629/1085] Do not create a graph as a global variable in
 tests.

PiperOrigin-RevId: 216418324
---
 .../copy_graph/python/util/copy_test.py       | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py
index ba97c78456..4d8651a79f 100644
--- a/tensorflow/contrib/copy_graph/python/util/copy_test.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py
@@ -26,15 +26,16 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
-graph1 = ops.Graph()
-graph2 = ops.Graph()
-
 
 class CopyVariablesTest(test.TestCase):
 
+  def setUp(self):
+    self.graph1 = ops.Graph()
+    self.graph2 = ops.Graph()
+
   def testVariableCopy(self):
 
-    with graph1.as_default():
+    with self.graph1.as_default():
       #Define a Variable in graph1
       some_var = variables.VariableV1(2)
       #Initialize session
@@ -43,13 +44,15 @@ class CopyVariablesTest(test.TestCase):
       variables.global_variables_initializer().run(session=sess1)
 
     #Make a copy of some_var in the defsult scope in graph2
-    copy1 = copy_elements.copy_variable_to_graph(some_var, graph2)
+    copy1 = copy_elements.copy_variable_to_graph(some_var, self.graph2)
 
     #Make another copy with different scope
-    copy2 = copy_elements.copy_variable_to_graph(some_var, graph2, "test_scope")
+    copy2 = copy_elements.copy_variable_to_graph(some_var,
+                                                 self.graph2,
+                                                 "test_scope")
 
     #Initialize both the copies
-    with graph2.as_default():
+    with self.graph2.as_default():
       #Initialize Session
       sess2 = session_lib.Session()
       #Initialize the Variables
@@ -67,9 +70,13 @@ class CopyVariablesTest(test.TestCase):
 
 class CopyOpsTest(test.TestCase):
 
+  def setUp(self):
+    self.graph1 = ops.Graph()
+    self.graph2 = ops.Graph()
+
   def testOpsCopy(self):
 
-    with graph1.as_default():
+    with self.graph1.as_default():
       #Initialize a basic expression y = ax + b
       x = array_ops.placeholder("float")
       a = variables.VariableV1(3.0)
@@ -82,21 +89,21 @@ class CopyOpsTest(test.TestCase):
       variables.global_variables_initializer().run(session=sess1)
 
     #First, initialize a as a Variable in graph2
-    a1 = copy_elements.copy_variable_to_graph(a, graph2)
+    a1 = copy_elements.copy_variable_to_graph(a, self.graph2)
 
     #Initialize a1 in graph2
-    with graph2.as_default():
+    with self.graph2.as_default():
       #Initialize session
       sess2 = session_lib.Session()
       #Initialize the Variable
       variables.global_variables_initializer().run(session=sess2)
 
     #Initialize a copy of y in graph2
-    y1 = copy_elements.copy_op_to_graph(y, graph2, [a1])
+    y1 = copy_elements.copy_op_to_graph(y, self.graph2, [a1])
 
     #Now that y has been copied, x must be copied too.
     #Get that instance
-    x1 = copy_elements.get_copied_op(x, graph2)
+    x1 = copy_elements.get_copied_op(x, self.graph2)
 
     #Compare values of y & y1 for a sample input
     #and check if they match
-- 
GitLab


From 5785c0202f4f84c464ef22d0ff180730813f59f3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 14:04:23 -0700
Subject: [PATCH 0630/1085] Improve the control flow conversion for loops by
 using dataflow analysis to construct the state. This is part of a larger
 refactoring which removes the reliance on the deprecated Scope.created field.

PiperOrigin-RevId: 216418556
---
 .../autograph/converters/control_flow.py      | 162 ++++++++++--------
 .../autograph/converters/control_flow_test.py |   4 +-
 .../python/autograph/pyct/qual_names.py       |   3 +
 3 files changed, 93 insertions(+), 76 deletions(-)

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index 416a60d2ee..70879f6c97 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -90,23 +90,11 @@ class ControlFlowTransformer(converter.Base):
       return templates.replace(
           template, test=test, body_name=body_name, orelse_name=orelse_name)
 
-  def _fmt_symbol_list(self, symbol_set):
+  def _fmt_symbols(self, symbol_set):
     if not symbol_set:
       return 'no variables'
     return ', '.join(map(str, symbol_set))
 
-  def _validate_no_live_vars_created(self, node):
-    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    live_vars_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
-    live_vars_created_in_body = live_vars_out & body_scope.created
-    if live_vars_created_in_body:
-      raise ValueError(
-          'The following variables are created inside the loop and used later:'
-          '\n%s\n'
-          'Variables must be declared outside loops because loops may not'
-          ' necessarily execute.' % self._fmt_symbol_list(
-              live_vars_created_in_body))
-
   def visit_If(self, node):
     node = self.generic_visit(node)
 
@@ -138,8 +126,8 @@ class ControlFlowTransformer(converter.Base):
           ' creates %s, while the false branch creates %s. Make sure all'
           ' these variables are initialized either in both'
           ' branches or before the if statement.' %
-          (self._fmt_symbol_list(created_in_body),
-           self._fmt_symbol_list(created_in_orelse)))
+          (self._fmt_symbols(created_in_body),
+           self._fmt_symbols(created_in_orelse)))
 
     # Alias the closure variables inside the conditional functions, to allow
     # the functions access to the respective variables.
@@ -206,51 +194,97 @@ class ControlFlowTransformer(converter.Base):
 
     return body_def + orelse_def + cond_expr
 
-  def visit_While(self, node):
-    self.generic_visit(node)
-
-    self._validate_no_live_vars_created(node)
-
+  def _get_loop_state(self, node):
     body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    body_closure = body_scope.modified - body_scope.created
-    all_referenced = body_scope.referenced
-
-    cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE)
-    cond_closure = set()
-    for s in cond_scope.used:
-      for root in s.support_set:
-        if root not in body_scope.created:
-          cond_closure.add(root)
-
-    state = list(body_closure)
-    if not state:
+    defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN)
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+    reserved_symbols = body_scope.referenced
+
+    # Note that it doesn't matter whether the variables are live after the loop.
+    # If the loop modifies them nonlocally (e.g. the result of an iteration
+    # depends on the previous iteration), then they need to be included in
+    # the loop state, regardless of whether they are later used or not.
+    loop_state = body_scope.modified & live_in
+
+    undefined_lives = loop_state - defined_in
+    # Only simple variables must be defined. The composite ones will be
+    # implicitly checked at runtime.
+    undefined_simple_lives = {v for v in undefined_lives if v.is_simple()}
+    if undefined_simple_lives:
+      raise NameError(
+          'cannot convert loop: it includes symbols that are undefined'
+          ' when entering the loop: {}'.format(
+              self._fmt_symbols(undefined_simple_lives)))
+
+    live_defs_in_loop = (body_scope.modified - live_in) & live_out
+    if live_defs_in_loop:
+      # TODO(mdan): Include reference to explanation why.
+      raise NotImplementedError(
+          'cannot convert loop: it includes symbols that are defined'
+          ' inside the loop, but used later: {}. To fix, initialize'
+          ' these symbols before the loop'.format(
+              self._fmt_symbols(live_defs_in_loop)))
+
+    if not loop_state:
       # TODO(mdan): Implement this properly.
-      # To complete this statement, we need to check whether any variable
-      # created inside the body scope is used before being modified outside the
-      # scope. This should be done during activity analysis, and in general
-      # should cover the case where variables may not be initialized.
-      raise ValueError('cannot convert while loop: no outputs')
+      # We need to check whether any variable created inside the body scope
+      # is used before being modified outside the scope. This should be done
+      # during activity analysis, and in general should cover the case where
+      # variables may not be initialized.
+      raise ValueError('cannot convert loop: no outputs')
+
+    return loop_state, reserved_symbols
 
+  def _state_constructs(self, loop_state, reserved_symbols):
+    loop_state = list(loop_state)
     state_ssf = [
-        self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state
+        self.ctx.namer.new_symbol(s.ssf(), reserved_symbols) for s in loop_state
     ]
     ssf_map = {
         name: ssf
-        for name, ssf in zip(state, state_ssf)
+        for name, ssf in zip(loop_state, state_ssf)
         if str(name) != ssf
     }
 
-    if len(state) == 1:
-      state = state[0]
+    if len(loop_state) == 1:
+      loop_state = loop_state[0]
       state_ssf = state_ssf[0]
-      state_ast_tuple = state
+      state_ast_tuple = loop_state
     else:
-      state_ast_tuple = gast.Tuple([n.ast() for n in state], None)
+      state_ast_tuple = gast.Tuple([n.ast() for n in loop_state], None)
+
+    return loop_state, state_ssf, state_ast_tuple, ssf_map
+
+  def visit_While(self, node):
+    self.generic_visit(node)
 
+    loop_state, reserved_symbols = self._get_loop_state(node)
+
+    # Note: one might expect we can dispatch based on the loop condition.
+    # But because that is dependent on the state, it cannot be evaluated ahead
+    # of time - doing that would risk duplicating any effects the condition has.
+    # Furthermore, we cannot evaluate slices and attributes, because they might
+    # trigger __getitem__ or __getattribute__.
+    #
+    # A case where this fails includes ops with side effects on a stateful
+    # resource captured in an object:
+    #
+    #   while self.v.read() > 0:
+    #     self.v.assign(1)
+    #
+    # TODO(mdan): Handle the case above.
+    cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE)
+    cond_closure = set()
+    for s in cond_scope.used:
+      cond_closure.update(s.support_set)
+    cond_closure -= loop_state
+
+    loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
+        loop_state, reserved_symbols)
     node_body = ast_util.rename_symbols(node.body, ssf_map)
     test = ast_util.rename_symbols(node.test, ssf_map)
 
-    # TODO(b/113118541) investigate the need-for and correctness-of extra_deps
     template = """
       def test_name(state_ssf):
         return test
@@ -262,12 +296,12 @@ class ControlFlowTransformer(converter.Base):
     """
     node = templates.replace(
         template,
-        state=state,
+        state=loop_state,
         state_ssf=state_ssf,
         state_ast_tuple=state_ast_tuple,
-        test_name=self.ctx.namer.new_symbol('loop_test', body_scope.referenced),
+        test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
         test=test,
-        body_name=self.ctx.namer.new_symbol('loop_body', body_scope.referenced),
+        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
         body=node_body,
         extra_deps=tuple(s.ast() for s in cond_closure),
     )
@@ -277,30 +311,9 @@ class ControlFlowTransformer(converter.Base):
   def visit_For(self, node):
     self.generic_visit(node)
 
-    self._validate_no_live_vars_created(node)
-
-    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-    body_closure = body_scope.modified - body_scope.created
-    all_referenced = body_scope.referenced
-
-    state = list(body_closure)
-
-    state_ssf = [
-        self.ctx.namer.new_symbol(s.ssf(), all_referenced) for s in state
-    ]
-    ssf_map = {
-        name: ssf
-        for name, ssf in zip(state, state_ssf)
-        if str(name) != ssf
-    }
-
-    if len(state) == 1:
-      state = state[0]
-      state_ssf = state_ssf[0]
-      state_ast_tuple = state
-    else:
-      state_ast_tuple = gast.Tuple([n.ast() for n in state], None)
-
+    loop_state, reserved_symbols = self._get_loop_state(node)
+    loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
+        loop_state, reserved_symbols)
     node_body = ast_util.rename_symbols(node.body, ssf_map)
     if anno.hasanno(node, 'extra_test'):
       extra_test = anno.getanno(node, 'extra_test')
@@ -321,14 +334,15 @@ class ControlFlowTransformer(converter.Base):
     """
     node = templates.replace(
         template,
-        state=state,
+        state=loop_state,
         state_ssf=state_ssf,
         state_ast_tuple=state_ast_tuple,
         iter_=node.iter,
         iterate=node.target,
-        extra_test_name=self.ctx.namer.new_symbol('extra_test', all_referenced),
+        extra_test_name=self.ctx.namer.new_symbol('extra_test',
+                                                  reserved_symbols),
         extra_test_expr=extra_test,
-        body_name=self.ctx.namer.new_symbol('loop_body', all_referenced),
+        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
         body=node_body)
 
     return node
diff --git a/tensorflow/python/autograph/converters/control_flow_test.py b/tensorflow/python/autograph/converters/control_flow_test.py
index cfa0ea920c..03fdfc804e 100644
--- a/tensorflow/python/autograph/converters/control_flow_test.py
+++ b/tensorflow/python/autograph/converters/control_flow_test.py
@@ -83,7 +83,7 @@ class ControlFlowTest(converter_testing.TestCase):
       return s
 
     node, ctx = self.prepare(bad_while_loop, {})
-    with self.assertRaises(transformer.AutographParseError):
+    with self.assertRaises(NameError):
       control_flow.transform(node, ctx)
 
   def test_if_basic(self):
@@ -232,7 +232,7 @@ class ControlFlowTest(converter_testing.TestCase):
       return s
 
     node, ctx = self.prepare(bad_for_loop, {})
-    with self.assertRaises(transformer.AutographParseError):
+    with self.assertRaises(NameError):
       control_flow.transform(node, ctx)
 
   def test_for_tuple_unpacking(self):
diff --git a/tensorflow/python/autograph/pyct/qual_names.py b/tensorflow/python/autograph/pyct/qual_names.py
index 334cbd7d38..6ad6199acf 100644
--- a/tensorflow/python/autograph/pyct/qual_names.py
+++ b/tensorflow/python/autograph/pyct/qual_names.py
@@ -99,6 +99,9 @@ class QN(object):
   def is_symbol(self):
     return isinstance(self.qn[0], str)
 
+  def is_simple(self):
+    return len(self.qn) <= 1
+
   def is_composite(self):
     return len(self.qn) > 1
 
-- 
GitLab


From 5c6ea51834ee410586233d67d43bdb4f1729261f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:07:03 -0700
Subject: [PATCH 0631/1085] Internal Change

PiperOrigin-RevId: 216419037
---
 tensorflow/contrib/lite/build_def.bzl | 2 ++
 tensorflow/contrib/lite/testing/BUILD | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index b3607a761c..05efee18e7 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -345,6 +345,7 @@ def generated_test_models_all():
             tags = []
             if test in failing_tests:
                 tags.append("notap")
+                tags.append("manual")
             if conversion_mode:
                 test += "_%s" % conversion_mode
             options.append((conversion_mode, test, tags))
@@ -450,6 +451,7 @@ def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
                 "no_oss",
                 "no_windows",
                 "notap",
+                "manual",
             ] + [test_suite_tag],
             deps = [
                 "//tensorflow/contrib/lite/testing:model_coverage_lib",
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 45baad782a..2edd420fea 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -380,6 +380,7 @@ py_test(
     srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "manual",
         "no_oss",
         "no_pip",
         "no_windows",
-- 
GitLab


From 4fa59ef694c19dc63d574b2d6a349cd753d9cdbd Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Tue, 9 Oct 2018 14:11:06 -0700
Subject: [PATCH 0632/1085] [tf.data] Lift parameterized test parameters into
 lambdas if they create TF ops.

The existing code triggers parts of the TensorFlow runtime that may not have been fully
initialized at the time the parameters are evaluated. Lifting into a lambda and invoking
the lambda inside the test method will achieve the proper order.

PiperOrigin-RevId: 216419757
---
 tensorflow/python/data/util/structure_test.py | 61 ++++++++++---------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/data/util/structure_test.py b/tensorflow/python/data/util/structure_test.py
index 2982763181..630a0c912b 100644
--- a/tensorflow/python/data/util/structure_test.py
+++ b/tensorflow/python/data/util/structure_test.py
@@ -34,52 +34,56 @@ from tensorflow.python.platform import test
 
 
 class StructureTest(test.TestCase, parameterized.TestCase):
-  # pylint disable=protected-access
 
+  # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
+  # will be executed before the (eager- or graph-mode) test environment has been
+  # set up.
+  # pylint: disable=g-long-lambda,protected-access
   @parameterized.parameters(
-      (constant_op.constant(37.0), structure.TensorStructure, [dtypes.float32],
-       [[]]), (sparse_tensor.SparseTensor(
-           indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-               structure.SparseTensorStructure, [dtypes.variant], [[3]]),
-      ((constant_op.constant(37.0), constant_op.constant([1, 2, 3])),
-       structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]), ({
-           "a": constant_op.constant(37.0),
-           "b": constant_op.constant([1, 2, 3])
-       }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
-      ({
-          "a":
-              constant_op.constant(37.0),
+      (lambda: constant_op.constant(37.0), structure.TensorStructure,
+       [dtypes.float32], [[]]),
+      (lambda: sparse_tensor.SparseTensor(
+          indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
+       structure.SparseTensorStructure, [dtypes.variant], [[3]]),
+      (lambda: (constant_op.constant(37.0), constant_op.constant([1, 2, 3])),
+       structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
+      (lambda: {
+          "a": constant_op.constant(37.0),
+          "b": constant_op.constant([1, 2, 3])
+      }, structure.NestedStructure, [dtypes.float32, dtypes.int32], [[], [3]]),
+      (lambda: {
+          "a": constant_op.constant(37.0),
           "b": (sparse_tensor.SparseTensor(
               indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
                 sparse_tensor.SparseTensor(
                     indices=[[3, 4]], values=[-1], dense_shape=[4, 5]))
       }, structure.NestedStructure,
        [dtypes.float32, dtypes.variant, dtypes.variant], [[], [3], [3]]))
-  def testFlatStructure(self, value, expected_structure, expected_types,
+  def testFlatStructure(self, value_fn, expected_structure, expected_types,
                         expected_shapes):
+    value = value_fn()
     s = structure.Structure.from_value(value)
     self.assertIsInstance(s, expected_structure)
     self.assertEqual(expected_types, s._flat_types)
     self.assertEqual(expected_shapes, s._flat_shapes)
 
   @parameterized.parameters(
-      (constant_op.constant(37.0), [
+      (lambda: constant_op.constant(37.0), lambda: [
           constant_op.constant(38.0),
           array_ops.placeholder(dtypes.float32),
           variables.Variable(100.0), 42.0,
           np.array(42.0, dtype=np.float32)
-      ], [constant_op.constant([1.0, 2.0]),
-          constant_op.constant(37)]),
-      (sparse_tensor.SparseTensor(
+      ], lambda: [constant_op.constant([1.0, 2.0]), constant_op.constant(37)]),
+      (lambda: sparse_tensor.SparseTensor(
           indices=[[3, 4]], values=[-1], dense_shape=[4, 5]),
-       [
+       lambda: [
            sparse_tensor.SparseTensor(
                indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
            sparse_tensor.SparseTensorValue(
                indices=[[1, 1], [3, 4]], values=[10, -1], dense_shape=[4, 5]),
            array_ops.sparse_placeholder(dtype=dtypes.int32),
            array_ops.sparse_placeholder(dtype=dtypes.int32, shape=[None, None])
-       ], [
+       ], lambda: [
            constant_op.constant(37, shape=[4, 5]),
            sparse_tensor.SparseTensor(
                indices=[[3, 4]], values=[-1], dense_shape=[5, 6]),
@@ -88,13 +92,13 @@ class StructureTest(test.TestCase, parameterized.TestCase):
            sparse_tensor.SparseTensor(
                indices=[[3, 4]], values=[-1.0], dense_shape=[4, 5])
        ]),
-      ({
+      (lambda: {
           "a": constant_op.constant(37.0),
           "b": constant_op.constant([1, 2, 3])
-      }, [{
+      }, lambda: [{
           "a": constant_op.constant(15.0),
           "b": constant_op.constant([4, 5, 6])
-      }], [{
+      }], lambda: [{
           "a": constant_op.constant(15.0),
           "b": constant_op.constant([4, 5, 6, 7])
       }, {
@@ -108,8 +112,11 @@ class StructureTest(test.TestCase, parameterized.TestCase):
                   indices=[[0], [1], [2]], values=[4, 5, 6], dense_shape=[3])
       }, (constant_op.constant(15.0), constant_op.constant([4, 5, 6]))]),
   )
-  def testIsCompatibleWithStructure(self, original_value, compatible_values,
-                                    incompatible_values):
+  def testIsCompatibleWithStructure(
+      self, original_value_fn, compatible_values_fn, incompatible_values_fn):
+    original_value = original_value_fn()
+    compatible_values = compatible_values_fn()
+    incompatible_values = incompatible_values_fn()
     s = structure.Structure.from_value(original_value)
     for compatible_value in compatible_values:
       self.assertTrue(
@@ -120,10 +127,6 @@ class StructureTest(test.TestCase, parameterized.TestCase):
           s.is_compatible_with(
               structure.Structure.from_value(incompatible_value)))
 
-  # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
-  # will be executed before the (eager- or graph-mode) test environment has been
-  # set up.
-  # pylint: disable=g-long-lambda
   @parameterized.parameters(
       (lambda: constant_op.constant(37.0),),
       (lambda: sparse_tensor.SparseTensor(
-- 
GitLab


From b145f46b735fe1e383be6629cafaa5269b07b7fb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:12:25 -0700
Subject: [PATCH 0633/1085] Add support for time-major input in the
 bidirectional RNN Op.

PiperOrigin-RevId: 216419983
---
 .../kernels/bidirectional_sequence_rnn.cc     | 251 ++++++++++++------
 .../bidirectional_sequence_rnn_test.cc        |  94 +++++--
 2 files changed, 247 insertions(+), 98 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index c22a457a71..f544dd5ffa 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -114,8 +114,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
 
   TF_LITE_ENSURE_EQ(context, input->dims->size, 3);
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int fw_num_units = fw_input_weights->dims->data[0];
   const int bw_num_units = bw_input_weights->dims->data[0];
   TF_LITE_ASSERT_EQ(input->dims->data[2], fw_input_weights->dims->data[1]);
@@ -237,8 +240,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Resize outputs.
   TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
   TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3);
-  fw_output_size_array->data[0] = batch_size;
-  fw_output_size_array->data[1] = max_time;
+  fw_output_size_array->data[0] = (time_major) ? max_time : batch_size;
+  fw_output_size_array->data[1] = (time_major) ? batch_size : max_time;
   fw_output_size_array->data[2] =
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   TF_LITE_ENSURE_OK(
@@ -266,8 +269,11 @@ TfLiteStatus EvalFloat(
     const TfLiteBidirectionalSequenceRNNParams* params,
     TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
     TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) {
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int input_size = input->dims->data[2];
   const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
 
@@ -292,48 +298,91 @@ TfLiteStatus EvalFloat(
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   const int bw_output_step =
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
-  for (int b = 0; b < batch_size; b++) {
+  if (time_major) {
+    // TODO(mirkov): add merge_outputs support for time_major inputs.
+    TF_LITE_ASSERT_EQ(params->merge_outputs, false);
+
     // Forward cell.
-    float* fw_hidden_state_ptr_batch =
-        fw_hidden_state->data.f + b * fw_num_units;
-    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
+    float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
     for (int s = 0; s < max_time; s++) {
       const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
+          input->data.f + s * input_size * batch_size;
       const float* aux_input_ptr_batch =
           (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
+              ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
-      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+      float* output_ptr_batch =
+          fw_output->data.f + s * fw_num_units * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
           fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
-          input_size, aux_input_size, fw_num_units, /*batch_size=*/1,
+          input_size, aux_input_size, fw_num_units, batch_size,
           params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
     }
     // Backward cell.
-    float* bw_hidden_state_ptr_batch =
-        bw_hidden_state->data.f + b * bw_num_units;
-    float* bw_output_offset =
-        params->merge_outputs
-            ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
-            : bw_output->data.f + b * bw_output_step * max_time;
+    float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f;
     for (int s = max_time - 1; s >= 0; s--) {
       const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
+          input->data.f + s * input_size * batch_size;
       const float* aux_input_ptr_batch =
           (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
+              ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
-      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+      float* output_ptr_batch =
+          bw_output->data.f + s * bw_num_units * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
           bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
-          input_size, aux_input_size, bw_num_units, /*batch_size=*/1,
+          input_size, aux_input_size, bw_num_units, batch_size,
           params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
     }
+  } else {
+    for (int b = 0; b < batch_size; b++) {
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch =
+          fw_hidden_state->data.f + b * fw_num_units;
+      float* fw_output_offset =
+          fw_output->data.f + b * fw_output_step * max_time;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
+            fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
+            input_size, aux_input_size, fw_num_units, /*batch_size=*/1,
+            params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch =
+          bw_hidden_state->data.f + b * bw_num_units;
+      float* bw_output_offset =
+          params->merge_outputs
+              ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
+              : bw_output->data.f + b * bw_output_step * max_time;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
+            bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
+            input_size, aux_input_size, bw_num_units, /*batch_size=*/1,
+            params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+    }
   }
   return kTfLiteOk;
 }
@@ -351,8 +400,11 @@ TfLiteStatus EvalHybrid(
     TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output,
     TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state,
     TfLiteTensor* bw_output) {
-  const int batch_size = input->dims->data[0];
-  const int max_time = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int batch_size =
+      (time_major) ? input->dims->data[1] : input->dims->data[0];
+  const int max_time =
+      (time_major) ? input->dims->data[0] : input->dims->data[1];
   const int input_size = input->dims->data[2];
   const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0;
 
@@ -403,55 +455,106 @@ TfLiteStatus EvalHybrid(
       params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units;
   const int bw_output_step =
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
-  for (int b = 0; b < batch_size; b++) {
-    // Forward cell.
-    float* fw_hidden_state_ptr_batch =
-        fw_hidden_state->data.f + b * fw_num_units;
-    float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time;
-    for (int s = 0; s < max_time; s++) {
-      const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
-      const float* aux_input_ptr_batch =
-          (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
-              : nullptr;
-      float* output_ptr_batch = fw_output_offset + s * fw_output_step;
-
-      kernel_utils::RnnBatchStep(
-          input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
-          aux_input_ptr_batch, aux_fw_input_weights_ptr,
-          aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
-          fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
-          fw_num_units, /*batch_size=*/1, params->activation,
-          quantized_input_ptr, aux_quantized_input_ptr,
-          fw_quantized_hidden_state_ptr, scaling_factors_ptr,
-          fw_hidden_state_ptr_batch, output_ptr_batch);
+  if (time_major) {
+    for (int t = 0; t < max_time; t++) {
+      // TODO(mirkov): add merge_outputs support for time_major inputs.
+      TF_LITE_ASSERT_EQ(params->merge_outputs, false);
+
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + s * input_size * batch_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + s * input_size * batch_size
+                : nullptr;
+        float* output_ptr_batch =
+            fw_output->data.f + s * fw_num_units * batch_size;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
+            aux_input_ptr_batch, aux_fw_input_weights_ptr,
+            aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
+            fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
+            fw_num_units, batch_size, params->activation, quantized_input_ptr,
+            aux_quantized_input_ptr, fw_quantized_hidden_state_ptr,
+            scaling_factors_ptr, fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + s * input_size * batch_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + s * input_size * batch_size
+                : nullptr;
+        float* output_ptr_batch =
+            bw_output->data.f + s * bw_num_units * batch_size;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
+            aux_input_ptr_batch, aux_bw_input_weights_ptr,
+            aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
+            bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
+            bw_num_units, batch_size, params->activation, quantized_input_ptr,
+            aux_quantized_input_ptr, bw_quantized_hidden_state_ptr,
+            scaling_factors_ptr, bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
     }
-    // Backward cell.
-    float* bw_hidden_state_ptr_batch =
-        bw_hidden_state->data.f + b * bw_num_units;
-    float* bw_output_offset =
-        params->merge_outputs
-            ? fw_output->data.f + b * bw_output_step * max_time
-            : bw_output->data.f + b * bw_output_step * max_time;
-    for (int s = max_time - 1; s >= 0; s--) {
-      const float* input_ptr_batch =
-          input->data.f + b * input_size * max_time + s * input_size;
-      const float* aux_input_ptr_batch =
-          (aux_input != nullptr)
-              ? aux_input->data.f + b * input_size * max_time + s * input_size
-              : nullptr;
-      float* output_ptr_batch = bw_output_offset + s * bw_output_step;
-
-      kernel_utils::RnnBatchStep(
-          input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
-          aux_input_ptr_batch, aux_bw_input_weights_ptr,
-          aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
-          bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
-          bw_num_units, /*batch_size=*/1, params->activation,
-          quantized_input_ptr, aux_quantized_input_ptr,
-          bw_quantized_hidden_state_ptr, scaling_factors_ptr,
-          bw_hidden_state_ptr_batch, output_ptr_batch);
+  } else {
+    for (int b = 0; b < batch_size; b++) {
+      // Forward cell.
+      float* fw_hidden_state_ptr_batch =
+          fw_hidden_state->data.f + b * fw_num_units;
+      float* fw_output_offset =
+          fw_output->data.f + b * fw_output_step * max_time;
+      for (int s = 0; s < max_time; s++) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = fw_output_offset + s * fw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
+            aux_input_ptr_batch, aux_fw_input_weights_ptr,
+            aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
+            fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
+            fw_num_units, /*batch_size=*/1, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            fw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            fw_hidden_state_ptr_batch, output_ptr_batch);
+      }
+      // Backward cell.
+      float* bw_hidden_state_ptr_batch =
+          bw_hidden_state->data.f + b * bw_num_units;
+      float* bw_output_offset =
+          params->merge_outputs
+              ? fw_output->data.f + b * bw_output_step * max_time
+              : bw_output->data.f + b * bw_output_step * max_time;
+      for (int s = max_time - 1; s >= 0; s--) {
+        const float* input_ptr_batch =
+            input->data.f + b * input_size * max_time + s * input_size;
+        const float* aux_input_ptr_batch =
+            (aux_input != nullptr)
+                ? aux_input->data.f + b * input_size * max_time + s * input_size
+                : nullptr;
+        float* output_ptr_batch = bw_output_offset + s * bw_output_step;
+
+        kernel_utils::RnnBatchStep(
+            input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
+            aux_input_ptr_batch, aux_bw_input_weights_ptr,
+            aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
+            bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
+            bw_num_units, /*batch_size=*/1, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            bw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            bw_hidden_state_ptr_batch, output_ptr_batch);
+      }
     }
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
index f555c472f5..6c179ca05d 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -654,7 +654,8 @@ const std::initializer_list<float> recurrent_weights = {
 class BidirectionalRNNOpModel : public SingleOpModel {
  public:
   BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units,
-                          int bw_units, int input_size, bool merge_outputs)
+                          int bw_units, int input_size, bool time_major,
+                          bool merge_outputs)
       : batches_(batches),
         sequence_len_(sequence_len),
         fw_units_(fw_units),
@@ -679,25 +680,29 @@ class BidirectionalRNNOpModel : public SingleOpModel {
       bw_output_ = AddOutput(TensorType_FLOAT32);
     }
 
-    SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
-                 BuiltinOptions_BidirectionalSequenceRNNOptions,
-                 CreateBidirectionalSequenceRNNOptions(
-                     builder_, /*time_major=*/false,
-                     ActivationFunctionType_RELU, merge_outputs)
-                     .Union());
+    SetBuiltinOp(
+        BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+        BuiltinOptions_BidirectionalSequenceRNNOptions,
+        CreateBidirectionalSequenceRNNOptions(
+            builder_, time_major, ActivationFunctionType_RELU, merge_outputs)
+            .Union());
+    const auto input_shape =
+        (time_major) ? std::vector<int>({sequence_len_, batches_, input_size_})
+                     : std::vector<int>({batches_, sequence_len_, input_size_});
+
     BuildInterpreter({
-        {batches_, sequence_len_, input_size_},  // input
-        {fw_units_, input_size_},                // fw_weights
-        {fw_units_, fw_units_},                  // fw_recurrent_weights
-        {fw_units_},                             // fw_bias
-        {batches_, fw_units_},                   // fw_hidden_state
-        {bw_units_, input_size_},                // bw_weights
-        {bw_units_, bw_units_},                  // bw_recurrent_weights
-        {bw_units_},                             // bw_bias
-        {batches_, bw_units_},                   // bw_hidden_state
-        {batches_, sequence_len_, 0},            // aux_input
-        {fw_units_, 0},                          // aux_fw_weights
-        {bw_units_, 0},                          // aux_bw_weights
+        input_shape,                   // input
+        {fw_units_, input_size_},      // fw_weights
+        {fw_units_, fw_units_},        // fw_recurrent_weights
+        {fw_units_},                   // fw_bias
+        {batches_, fw_units_},         // fw_hidden_state
+        {bw_units_, input_size_},      // bw_weights
+        {bw_units_, bw_units_},        // bw_recurrent_weights
+        {bw_units_},                   // bw_bias
+        {batches_, bw_units_},         // bw_hidden_state
+        {batches_, sequence_len_, 0},  // aux_input
+        {fw_units_, 0},                // aux_fw_weights
+        {bw_units_, 0},                // aux_bw_weights
     });
   }
 
@@ -770,7 +775,8 @@ class BidirectionalRNNOpModel : public SingleOpModel {
 TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -803,11 +809,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) {
   EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected)));
 }
 
-// Same as the previous test, yet with merged outputs.
+// Same as BlackBox test, but input is reshuffled to time_major format.
+TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajor) {
+  BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                              /*fw_units=*/16, /*bw_units=*/16,
+                              /*input_size=*/8, /*time_major=*/true,
+                              /*merge_outputs=*/false);
+  rnn.SetFwWeights(weights);
+  rnn.SetBwWeights(weights);
+  rnn.SetFwBias(biases);
+  rnn.SetBwBias(biases);
+  rnn.SetFwRecurrentWeights(recurrent_weights);
+  rnn.SetBwRecurrentWeights(recurrent_weights);
+
+  // const int input_sequence_size = rnn.input_size() * rnn.sequence_len();
+  // Insert the inputs in time_major format. The batch_major format is:
+  // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as:
+  // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15].
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* batch_start = rnn_input + i * rnn.input_size();
+    float* batch_end = batch_start + rnn.input_size();
+    // The two batches are identical.
+    rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end);
+    rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end);
+  }
+
+  rnn.Invoke();
+
+  std::vector<float> fw_expected;
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* golden_fw_start = rnn_golden_fw_output + i * rnn.num_fw_units();
+    float* golden_fw_end = golden_fw_start + rnn.num_fw_units();
+    fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end);
+    fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end);
+  }
+  EXPECT_THAT(rnn.GetFwOutput(), ElementsAreArray(ArrayFloatNear(fw_expected)));
+}
+
+// Same as BlackBox test, yet with merged outputs.
 TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/true);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/true);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -845,7 +889,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
 TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
   BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
   rnn.SetFwBias(biases);
@@ -891,7 +936,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
 TEST(BidirectionalRNNOpTest, EndToEndTest) {
   BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4,
                               /*fw_units=*/16, /*bw_units=*/16,
-                              /*input_size=*/8, /*merge_outputs=*/false);
+                              /*input_size=*/8, /*time_major=*/false,
+                              /*merge_outputs=*/false);
   const int output_size = 4;
   float dnn_weights[] = {
       -0.5782342,  -0.052212059, 0.73036242,  -0.81216097, -0.80088139,
-- 
GitLab


From fa1542234857acf56af6e7f0dbe8d2084a18fa00 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 14:19:07 -0700
Subject: [PATCH 0634/1085] [XLA:GPU] Pattern match atomic "apply" into an
 atomic store

Otherwise we'd emit a CAS loop.

PiperOrigin-RevId: 216421161
---
 .../compiler/xla/service/gpu/ir_emitter.cc    | 15 +++++
 .../compiler/xla/service/gpu/tests/BUILD      | 12 ++++
 .../xla/service/gpu/tests/gpu_atomic_test.cc  | 58 +++++++++++++++++++
 3 files changed, 85 insertions(+)
 create mode 100644 tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index b7c37bcf3c..47102347cb 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -179,6 +179,21 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation(
   bool is_atomic_integral = element_type == S32 || element_type == U32 ||
                             element_type == S64 || element_type == U64;
   llvm::Value* source = Load(source_address, "source");
+
+  // kCopy of RHS -> atomic store.
+  if (root_opcode == HloOpcode::kCopy &&
+      (element_type == F32 || is_atomic_integral) &&
+      computation.root_instruction()->operand(0)->opcode() ==
+          HloOpcode::kParameter &&
+      computation.root_instruction()->operand(0)->parameter_number() == 1) {
+    llvm::StoreInst* store = Store(source, output_address);
+    store->setAtomic(llvm::AtomicOrdering::Unordered);
+    // Derive a minimum alignment from the type. The optimizer can increase it
+    // later.
+    store->setAlignment(ShapeUtil::ByteSizeOfPrimitiveType(element_type));
+    return true;
+  }
+
   if (root_opcode == HloOpcode::kAdd) {
     // NVPTX supports atomicAdd on F32 and integer types.
     if (element_type == F32) {
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index a725533567..1f0436278c 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -223,3 +223,15 @@ tf_cc_test(
         "@com_google_absl//absl/strings",
     ],
 )
+
+tf_cc_test(
+    name = "gpu_atomic_test",
+    srcs = ["gpu_atomic_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla/tests:filecheck",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc
new file mode 100644
index 0000000000..6b18c4c637
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/tests/filecheck.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+namespace {
+
+class GpuAtomicTest : public GpuCodegenTest {};
+
+TEST_F(GpuAtomicTest, TestStore) {
+  const char* hlo_string = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+)";
+
+  CompileAndVerifyIr(hlo_string, R"(
+CHECK: store atomic{{.*}}unordered, align 4
+)");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
-- 
GitLab


From 35caff957424a60bd7d7e4e92a1ec87f617781c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 14:25:27 -0700
Subject: [PATCH 0635/1085] Export feature importance for oblivious tree nodes.

PiperOrigin-RevId: 216422334
---
 .../estimator_batch/custom_export_strategy.py         | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index 48f12a64f9..a3df272e69 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -196,6 +196,10 @@ def convert_to_universal_format(dtec, sorted_feature_names,
           matching_id = categorical_test.value.add()
           matching_id.int64_value = split.feature_id
           node.custom_left_child_test.Pack(categorical_test)
+        elif (node_type == "oblivious_dense_float_binary_split" or
+              node_type == "oblivious_categorical_id_binary_split"):
+          raise ValueError("Universal tree format doesn't support oblivious "
+                           "trees")
         else:
           raise ValueError("Unexpected node type %s" % node_type)
         node.left_child_id.value = split.left_id
@@ -229,6 +233,13 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats,
         split = tree_node.categorical_id_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
                                      num_sparse_float]
+      elif node_type == "oblivious_dense_float_binary_split":
+        split = tree_node.oblivious_dense_float_binary_split
+        split_column = feature_names[split.feature_column]
+      elif node_type == "oblivious_categorical_id_binary_split":
+        split = tree_node.oblivious_categorical_id_binary_split
+        split_column = feature_names[split.feature_column + num_dense_floats +
+                                     num_sparse_float]
       elif node_type == "categorical_id_set_membership_binary_split":
         split = tree_node.categorical_id_set_membership_binary_split
         split_column = feature_names[split.feature_column + num_dense_floats +
-- 
GitLab


From 950cf87104bfee28e2165fe368f66337b8a1336d Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Tue, 9 Oct 2018 14:36:33 -0700
Subject: [PATCH 0636/1085] [tf.data vectorization] Add vectorizer for `Add` op

PiperOrigin-RevId: 216424512
---
 tensorflow/core/graph/graph.cc                |   2 +-
 .../optimizers/data/vectorization/BUILD       |  34 ++--
 .../data/vectorization/add_vectorizer.cc      | 150 ++++++++++++++++++
 .../optimizers/data/vectorization_utils.cc    |  21 +--
 .../data/vectorization_utils_test.cc          | 103 ++++++++++--
 .../optimization/map_vectorization_test.py    |   1 +
 6 files changed, 280 insertions(+), 31 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 6f068546d2..a17491d4f7 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -34,7 +34,7 @@ namespace tensorflow {
 
 const int Graph::kControlSlot = -1;
 
-class NodeProperties {
+struct NodeProperties {
  public:
   NodeProperties(const OpDef* op_def, const NodeDef& node_def,
                  const DataTypeSlice inputs, const DataTypeSlice outputs)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 985d6c6c3a..09018d0124 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -9,7 +9,11 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
 
 VECTORIZER_DEPS = [
     ":vectorizer_registry",
+    "//tensorflow/cc:ops",
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
+    "//tensorflow/core:core_cpu",
+    "//tensorflow/cc:scope_internal",
+    "//tensorflow/cc:cc_ops",
 ] + tf_protos_all()
 
 cc_library(
@@ -42,6 +46,24 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "vectorizer_registry_test",
+    srcs = ["vectorizer_registry_test.cc"],
+    deps = [
+        ":vectorizer_registry",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
+
+cc_library(
+    name = "add_vectorizer",
+    srcs = ["add_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
 cc_library(
     name = "cast_vectorizer",
     srcs = ["cast_vectorizer.cc"],
@@ -61,20 +83,10 @@ cc_library(
     hdrs = ["vectorizer_registry.h"],
     visibility = ["//visibility:public"],
     deps = [
+        ":add_vectorizer",
         ":cast_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
         ":vectorizer_registry",
     ],
 )
-
-tf_cc_test(
-    name = "vectorizer_registry_test",
-    srcs = ["vectorizer_registry_test.cc"],
-    deps = [
-        ":vectorizer_registry",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ] + tf_protos_all(),
-)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
new file mode 100644
index 0000000000..d90a51b01a
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
@@ -0,0 +1,150 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/math_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+const char* const kExpandDimsPrefix = "vectorized/expanddims/";
+
+// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading
+// dimension, which may cause automatic broadcasting rules to expand the
+// input dimensions wrongly when the unstacked shapes have different ranks.
+// To avoid that, we reshape stacked inputs to the maximum rank they need
+// to be broadcasted to.
+//
+// For example, suppose we have inputs A and B, where A is a stacked tensor with
+// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with
+// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules
+// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that
+// the dimensions n and 7 are compatible, and if so, create an output of shape
+// [12, 7, 5]. However, correct addition of these inputs would create an output
+// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A
+// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before
+// broadcasting.
+Status ExpandDimsForBroadcast(std::vector<WrappedTensor>* inputs, Graph* g) {
+  Status status;
+  Scope parent = NewInternalScope(g, &status, nullptr);
+  Scope s = parent.NewSubScope(kExpandDimsPrefix);
+
+  // TODO(rachelim): We can potentially get rid of all these ops if shapes are
+  // known statically
+
+  Output const_0 = ops::Const(s, 0);
+  Output const_1 = ops::Const(s, 1);
+
+  std::vector<Output> ranks;
+  ranks.reserve(inputs->size());
+
+  // Get the stacked rank of each input
+  for (const auto& input : *inputs) {
+    Output rank = ops::Rank(s, Output(input.node, input.output_index));
+
+    if (!input.stacked) {
+      // If the input is unstacked, add 1
+      rank = ops::Add(s, rank, const_1);
+    }
+
+    ranks.push_back(rank);
+  }
+
+  // Pack the ranks into one tensor to get the max
+  Output packed_ranks = ops::Stack(s, ranks);
+
+  Output max_rank =
+      ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true));
+
+  std::vector<WrappedTensor> expanded_inputs;
+  expanded_inputs.reserve(inputs->size());
+
+  // For all inputs that are stacked, expand dimensions after dim 0.
+  for (size_t i = 0; i < inputs->size(); ++i) {
+    if (!inputs->at(i).stacked) {
+      expanded_inputs.push_back(inputs->at(i));
+      continue;
+    }
+
+    Output input(inputs->at(i).node, inputs->at(i).output_index);
+
+    // Number of dimensions to expand
+    Output rank_diff = ops::Sub(s, max_rank, ranks[i]);
+
+    // [1] * rank_diff
+    Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff);
+
+    Output const_vec_1 = ops::Const(s, {1});
+
+    Output shape = ops::Shape(s, input);
+
+    // shape[:1]
+    Output concat_pre =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().BeginMask(1));
+
+    // shape[1:]
+    Output concat_post =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().EndMask(1));
+
+    // tf.concat([shape[:1], ones, shape[1:]], 0)
+    Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0);
+
+    Output result = ops::Reshape(s, input, new_shape);
+
+    expanded_inputs.push_back({result.node(), 0, true});
+  }
+
+  inputs->swap(expanded_inputs);
+  return status;
+}
+
+class AddVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (node.num_inputs() != 2) {
+      return errors::Internal("Add op should only have two inputs.");
+    }
+
+    TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope));
+
+    // Add new Add node with the same op and attrs as the original node
+    Node* new_add_node;
+    TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add")
+                           .Input(inputs[0].node, inputs[0].output_index)
+                           .Input(inputs[1].node, inputs[1].output_index)
+                           .Finalize(outer_scope, &new_add_node));
+
+    // Add output mappings
+    outputs->push_back({new_add_node, 0, true});
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Add", AddVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index d977ff3198..8b93b1f2b8 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -64,9 +64,18 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
   }
 }
 
+// Update node attrs to keep its properties consistent with the function
+void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) {
+  map_defun_node->AddAttr("output_types", map_defun_fn->ret_types);
+
+  // TODO(rachelim): Propagate precise shapes if they're known, which may enable
+  // subsequent optimizations.
+  map_defun_node->AddAttr("output_shapes", std::vector<PartialTensorShape>(
+                                               map_defun_fn->ret_types.size()));
+}
+
 Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
                          const TensorDesc& output) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DataType type = output.first->output_type(output.second);
   int index = map_defun_fn->ret_nodes.size();
 
@@ -83,13 +92,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
   map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0);
   map_defun_fn->ret_nodes.push_back(ret_node);
   map_defun_fn->ret_types.push_back(type);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   return s;
 }
 
 void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                           FunctionBody* map_defun_fn, Node* map_defun_node) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DCHECK_LT(output_position, map_defun_fn->ret_nodes.size())
       << "Trying to remove output that doesn't exist. Output number: "
       << output_position;
@@ -102,6 +111,7 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                                 output_position);
   map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() +
                                 output_position);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   // Renumber the nodes and edges that come after
   for (int i = 0; i < num_later_outputs; ++i) {
@@ -342,13 +352,6 @@ void Vectorization::VectorizeHelper() {
   // need the MapDefun node and can delete it.
   if (map_defun_fn_->ret_nodes.empty()) {
     outer_scope_->RemoveNode(map_defun_node_);
-  } else {
-    // Update MapDefun node attrs accordingly
-    DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size());
-    map_defun_node_->AddAttr(
-        "output_shapes",
-        std::vector<PartialTensorShape>(map_defun_fn_->ret_types.size()));
-    map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index a6020e36bb..be498d150b 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   FunctionDef* vectorized;
   Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
   LOG(ERROR) << s;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_EQ(GetRetval(*vectorized, 0), "ret0");
@@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto map_defun_node = vectorized->node_def(
       function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized));
@@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& unpack_node = vectorized->node_def(
@@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   // They should be unchanged
   // We check this somewhat manually as the names of nodes may have changed
   EXPECT_EQ(vectorized->node_def_size(), 1);
@@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
@@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   auto const_node = vectorized->node_def(
@@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   *lib.add_function() = inner;
 
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto find_const = [vectorized](int val) -> const NodeDef* {
     for (const auto& n : vectorized->node_def()) {
@@ -924,6 +924,89 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
 }
 
+// Before:
+//
+//                   +------+
+// +-----------------+ Arg0 +----------------------+
+// |                 +---+--+                      |
+// |                     |                         |
+// |                 +---v--+                      |
+// |   +-------------+ Arg0 +------------------+   |
+// |   |             +---+--+                  |   |
+// |   |                 |                     |   |
+// |   |                 |          +-----+    |   |
+// |   |                 |          |Const|    |   |
+// |   |                 |          +-+---+    |   |
+// |   |                 |            |        |   |
+// |   |                 |   +--------+        |   |
+// |   |                 |   |                 |   |
+// |   |               +-v---v-+               |   |
+// |   |               |  Add  |               |   |
+// |   |               +-+-----+               |   |
+// |   |                 |                     |   |
+// |   |                 |                     |   |
+// |   | MapDefun      +-v----+                |   |
+// |   +---------------| Ret  |----------------+   |
+// |                   +--v---+                    |
+// |                      |                        |
+// |                      |                        |
+// |                   +--v----                    |
+// +-------------------| Ret  |--------------------+
+//                     +------+
+//
+//
+//  After:
+//
+//              +------+
+// +------------+ Arg0 +----------------------+
+// |            +---+--+                      |
+// |                |                         |
+// |                |              +-----+    |
+// |                |              |Const|    |
+// |              +-v---------+    +--+--+    |
+// |              |ExpandDims*|       |       |
+// |              +-----+-----+       |       |
+// |                    |             |       |
+// |                    +-----+ +-----+       |
+// |                          | |             |
+// |                        +-v-v-+           |
+// |                        | Add |           |
+// |                        +--+--+           |
+// |                           |              |
+// |                       +---v--+           |
+// +-----------------------+ Ret  +-----------+
+//                         +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
+  // Note that this checks that the "Add" vectorizer is successful, but does not
+  // check that the transformed function is correct (i.e. produces the same
+  // output as the unvectorized map defun). For the latter, the tests are in
+  // tensorflow/python/data/experimental/kernel_tests/optimization/
+  // map_vectorization_test.py
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2),
+       {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}},
+      {{"ret0", "Add:z:0"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
+
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 803ff87924..d1d6cf28ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -80,6 +80,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("Basic", lambda x: (x, x + 1), None),
       ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
+      ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
   def testOptimization(self, map_fn, num_parallel_calls):
-- 
GitLab


From a6fcb9d3d81e9207650eda1c899051ccbb97dec7 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 14:38:55 -0700
Subject: [PATCH 0637/1085] Avoid creating sparse tensor objects before library
 is initialized.

PiperOrigin-RevId: 216425002
---
 .../sequence_feature_column_test.py           | 482 +++++++++---------
 1 file changed, 255 insertions(+), 227 deletions(-)

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index 929e83523a..707f93b2da 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -39,18 +39,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
            # example 0, ids [1]
            # example 1, ids [2, 0]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 2, 0),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            # example 0, ids_a [2], ids_b [1]
            [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
@@ -58,20 +58,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
            [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # feature 0, ids [[2], [0, 1]]
            # feature 1, ids [[0, 0], [1]]
-           indices=(
+           'indices': (
                (0, 0, 0), (0, 1, 0), (0, 1, 1),
                (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 0, 0, 1),
-           dense_shape=(2, 2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
            # feature 0, ids [[1, 1], [1]]
            # feature 1, ids [[2], [0]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(1, 1, 1, 2, 0),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 2, 0),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
            [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
@@ -80,9 +80,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 2]},
       )
   def test_embedding_column(
-      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
       expected_sequence_length):
 
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
     vocabulary_size = 3
     embedding_dimension_a = 2
     embedding_values_a = (
@@ -261,18 +263,18 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
            # example 0, ids [1]
            # example 1, ids [1, 0]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 1, 0),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 1, 0),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            # example 0, ids_a [2], ids_b [1]
            [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
@@ -280,20 +282,20 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
            [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'sparse_input_a': sparse_tensor.SparseTensorValue(
+       'sparse_input_args_a': {
            # feature 0, ids [[2], [0, 1]]
            # feature 1, ids [[0, 0], [1]]
-           indices=(
+           'indices': (
                (0, 0, 0), (0, 1, 0), (0, 1, 1),
                (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 0, 0, 1),
-           dense_shape=(2, 2, 2)),
-       'sparse_input_b': sparse_tensor.SparseTensorValue(
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
            # feature 0, ids [[1, 1], [1]]
            # feature 1, ids [[1], [0]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(1, 1, 1, 1, 0),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 1, 0),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
            [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
@@ -302,8 +304,11 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 2]},
       )
   def test_indicator_column(
-      self, sparse_input_a, sparse_input_b, expected_input_layer,
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
       expected_sequence_length):
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+
     vocabulary_size_a = 3
     vocabulary_size_b = 2
 
@@ -350,30 +355,32 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [0., 1]
            # example 1, [10.]
-           indices=((0, 0), (0, 1), (1, 0)),
-           values=(0., 1., 10.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
        'expected_input_layer': [
            [[0.], [1.]],
            [[10.], [0.]]],
        'expected_sequence_length': [2, 1]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # feature 0, ids [[20, 3], [5]]
            # feature 1, ids [[3], [8]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(20, 3, 5., 3., 8.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20, 3, 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
        'expected_input_layer': [
            [[20.], [3.], [5.], [0.]],
            [[3.], [0.], [8.], [0.]]],
        'expected_sequence_length': [2, 2]},
       )
   def test_numeric_column(
-      self, sparse_input, expected_input_layer, expected_sequence_length):
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -387,27 +394,27 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
            # example 1, [10., 11., 12., 13.]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_input_layer': [
            # The output of numeric_column._get_dense_tensor should be flattened.
            [[0., 1., 2., 3.], [4., 5., 6., 7.]],
            [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
        'expected_sequence_length': [2, 1]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
            # example 1, [[10., 11., 12., 13.], []]
-           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
-                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 4)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
        'expected_input_layer': [
            # The output of numeric_column._get_dense_tensor should be flattened.
            [[0., 1., 2., 3.], [4., 5., 6., 7.]],
@@ -415,8 +422,10 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
        'expected_sequence_length': [2, 1]},
       )
   def test_numeric_column_multi_dim(
-      self, sparse_input, expected_input_layer, expected_sequence_length):
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
     """Tests sequence_input_layer for multi-dimensional numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, sequence_length = sfc.sequence_input_layer(
@@ -460,28 +469,29 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_shape': [2, 2, 4]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
            # example 1, [[10., 11., 12., 13.], []]
-           indices=((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                    (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
-                    (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 4)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
        'expected_shape': [2, 2, 4]},
       )
   def test_static_shape_from_tensors_numeric(
-      self, sparse_input, expected_shape):
+      self, sparse_input_args, expected_shape):
     """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     input_layer, _ = sfc.sequence_input_layer(
@@ -492,30 +502,31 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected_shape': [4, 2, 3]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [0, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 0, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
        'expected_shape': [4, 2, 3]}
       )
   def test_static_shape_from_tensors_indicator(
-      self, sparse_input, expected_shape):
+      self, sparse_input_args, expected_shape):
     """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=3)
     indicator_column = fc.indicator_column(categorical_column)
@@ -546,11 +557,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': 'rank_lt_3',
-       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(10, 10))},
+       'seq_input_arg': np.arange(100).reshape(10, 10)},
       {'testcase_name': 'rank_gt_3',
-       'seq_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 2, 2))}
+       'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)}
       )
-  def test_sequence_input_throws_error(self, seq_input):
+  def test_sequence_input_throws_error(self, seq_input_arg):
+    seq_input = ops.convert_to_tensor(seq_input_arg)
     context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10))
     seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
     context_input = math_ops.cast(context_input, dtype=dtypes.float32)
@@ -559,11 +571,12 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': 'rank_lt_2',
-       'context_input': ops.convert_to_tensor(np.arange(100))},
+       'context_input_arg': np.arange(100)},
       {'testcase_name': 'rank_gt_2',
-       'context_input': ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))}
+       'context_input_arg': np.arange(100).reshape(5, 5, 4)}
       )
-  def test_context_input_throws_error(self, context_input):
+  def test_context_input_throws_error(self, context_input_arg):
+    context_input = ops.convert_to_tensor(context_input_arg)
     seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4))
     seq_input = math_ops.cast(seq_input, dtype=dtypes.float32)
     context_input = math_ops.cast(context_input, dtype=dtypes.float32)
@@ -657,25 +670,27 @@ class SequenceCategoricalColumnWithIdentityTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(1, 2, 0),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((1, 2, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((1, 2, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=(6, 7, 8),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=(6, 7, 8),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': (6, 7, 8),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
 
     id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
@@ -691,27 +706,29 @@ class SequenceCategoricalColumnWithHashBucketTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('omar', 'stringer', 'marlo'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
            # Ignored to avoid hash dependence in test.
-           values=np.array((0, 0, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'stringer', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'stringer', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
            # Ignored to avoid hash dependence in test.
-           values=np.array((0, 0, 0), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+           'values': np.array((0, 0, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_hash_bucket(
         'aaa', hash_bucket_size=10)
 
@@ -742,25 +759,27 @@ class SequenceCategoricalColumnWithVocabularyFileTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('marlo', 'skywalker', 'omar'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((2, -1, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'skywalker', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=np.array((0, -1, 2), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_vocabulary_file(
         key='aaa',
         vocabulary_file=self._wire_vocabulary_file_name,
@@ -779,25 +798,27 @@ class SequenceCategoricalColumnWithVocabularyListTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=('marlo', 'skywalker', 'omar'),
-           dense_shape=(2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)),
-           values=np.array((2, -1, 0), dtype=np.int64),
-           dense_shape=(2, 2, 1))},
+       'inputs_args': {
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': ('marlo', 'skywalker', 'omar'),
+           'dense_shape': (2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)),
+           'values': np.array((2, -1, 0), dtype=np.int64),
+           'dense_shape': (2, 2, 1)}},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=('omar', 'skywalker', 'marlo'),
-           dense_shape=(2, 2, 2)),
-       'expected': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 2), (1, 0, 0), (1, 2, 0)),
-           values=np.array((0, -1, 2), dtype=np.int64),
-           dense_shape=(2, 2, 2))}
+       'inputs_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': ('omar', 'skywalker', 'marlo'),
+           'dense_shape': (2, 2, 2)},
+       'expected_args': {
+           'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)),
+           'values': np.array((0, -1, 2), dtype=np.int64),
+           'dense_shape': (2, 2, 2)}}
       )
-  def test_get_sparse_tensors(self, inputs, expected):
+  def test_get_sparse_tensors(self, inputs_args, expected_args):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
+    expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_vocabulary_list(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
@@ -815,14 +836,14 @@ class SequenceEmbeddingColumnTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected': [
            # example 0, ids [2]
            [[7., 11.], [0., 0.]],
@@ -833,15 +854,15 @@ class SequenceEmbeddingColumnTest(
            # example 3, ids [1]
            [[3., 5.], [0., 0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [0, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 0, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
        'expected': [
            # example 0, ids [[2]]
            [[7., 11.], [0., 0.]],
@@ -852,7 +873,8 @@ class SequenceEmbeddingColumnTest(
            # example 3, ids [[1], [0, 2]]
            [[3., 5.], [4., 6.5]]]}
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
     embedding_dimension = 2
     embedding_values = (
@@ -884,23 +906,24 @@ class SequenceEmbeddingColumnTest(
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 2),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2]}
       )
-  def test_sequence_length(self, inputs, expected_sequence_length):
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1124,14 +1147,14 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
-           indices=((0, 0), (1, 0), (1, 1), (3, 0)),
-           values=(2, 0, 1, 1),
-           dense_shape=(4, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
        'expected': [
            # example 0, ids [2]
            [[0., 0., 1.], [0., 0., 0.]],
@@ -1142,15 +1165,15 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
            # example 3, ids [1]
            [[0., 1., 0.], [0., 0., 0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
            # example 2, ids []
            # example 3, ids [[1], [2, 2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                    (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           values=(2, 0, 1, 2, 1, 2, 2),
-           dense_shape=(4, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 2, 2),
+           'dense_shape': (4, 2, 2)},
        'expected': [
            # example 0, ids [[2]]
            [[0., 0., 1.], [0., 0., 0.]],
@@ -1161,7 +1184,8 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
            # example 3, ids [[1], [2, 2]]
            [[0., 1., 0.], [0., 0., 2.]]]}
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1176,23 +1200,24 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2, 0, 1),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2, 0, 1, 2),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 2),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2]}
       )
-  def test_sequence_length(self, inputs, expected_sequence_length):
+  def test_sequence_length(self, inputs_args, expected_sequence_length):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     vocabulary_size = 3
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
@@ -1269,27 +1294,28 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, values [0., 1]
            # example 1, [10.]
-           indices=((0, 0), (0, 1), (1, 0)),
-           values=(0., 1., 10.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
        'expected': [
            [[0.], [1.]],
            [[10.], [0.]]]},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # feature 0, ids [[20, 3], [5]]
            # feature 1, ids [[3], [8]]
-           indices=((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           values=(20, 3, 5., 3., 8.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20, 3, 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
        'expected': [
            [[20.], [3.], [5.], [0.]],
            [[3.], [0.], [8.], [0.]]]},
       )
-  def test_get_sequence_dense_tensor(self, inputs, expected):
+  def test_get_sequence_dense_tensor(self, inputs_args, expected):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     numeric_column = sfc.sequence_numeric_column('aaa')
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1335,23 +1361,23 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
+       'sparse_input_args': {
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
-           indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                    (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 8)),
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
        'expected_dense_tensor': [
            [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
            [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]},
       {'testcase_name': '3D',
-       'sparse_input': sparse_tensor.SparseTensorValue(
-           indices=((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
-                    (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
-                    (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
-           values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           dense_shape=(2, 2, 8)),
+       'sparse_input_args': {
+           'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6),
+                       (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6),
+                       (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 8)},
        'expected_dense_tensor': [
            [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]],
             [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]],
@@ -1359,8 +1385,9 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
             [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]},
       )
   def test_get_dense_tensor_multi_dim(
-      self, sparse_input, expected_dense_tensor):
+      self, sparse_input_args, expected_dense_tensor):
     """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
 
     dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
@@ -1372,43 +1399,44 @@ class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       {'testcase_name': '2D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2., 0., 1.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (1,)},
       {'testcase_name': '3D',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2., 0., 1., 2.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (1,)},
       {'testcase_name': '2D_with_shape',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [2]
            # example 1, ids [0, 1]
-           indices=((0, 0), (1, 0), (1, 1)),
-           values=(2., 0., 1.),
-           dense_shape=(2, 2)),
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2., 0., 1.),
+           'dense_shape': (2, 2)},
        'expected_sequence_length': [1, 1],
        'shape': (2,)},
       {'testcase_name': '3D_with_shape',
-       'inputs': sparse_tensor.SparseTensorValue(
+       'inputs_args': {
            # example 0, ids [[2]]
            # example 1, ids [[0, 1], [2]]
-           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           values=(2., 0., 1., 2.),
-           dense_shape=(2, 2, 2)),
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2., 0., 1., 2.),
+           'dense_shape': (2, 2, 2)},
        'expected_sequence_length': [1, 2],
        'shape': (2,)},
       )
-  def test_sequence_length(self, inputs, expected_sequence_length, shape):
+  def test_sequence_length(self, inputs_args, expected_sequence_length, shape):
+    inputs = sparse_tensor.SparseTensorValue(**inputs_args)
     numeric_column = sfc.sequence_numeric_column('aaa', shape=shape)
 
     _, sequence_length = numeric_column._get_sequence_dense_tensor(
-- 
GitLab


From c1093a3757224257fed0f7a1959d0fc99d5c757f Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Tue, 9 Oct 2018 15:02:51 -0700
Subject: [PATCH 0638/1085] In TPUMirroredVariable, when setting
 _initializer_op and _initial_value attributes, set the attributes of all the
 contained variables. This fixes a bug that tf.train.init_from_checkpoint
 doesn't overwrite the initialization values correctly for
 TPUMirroredVariable.

PiperOrigin-RevId: 216429476
---
 tensorflow/contrib/distribute/python/values.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 0dd78ba185..472cb4230c 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -475,6 +475,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     self._aggregation = aggregation
     # Needed for GradientTape
     self._trainable = self._primary_var.trainable
+    # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s
+    # initializer is composed of the initializers of the components variables.
+    # However, in some cases, such as when restoring from a checkpoint, we may
+    # set the _initializer_op property on the entire `TPUMirroredVariable`.
+    self._initializer_op = None
 
   def _get(self, device=None):
     """Returns the value for the current device or raises a ValueError."""
@@ -704,8 +709,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
 
   @property
   def initializer(self):
-    return control_flow_ops.group(
-        [v.initializer for v in nest.flatten(self._index)])
+    if self._initializer_op:
+      init_op = self._initializer_op
+    else:
+      init_op = control_flow_ops.group(
+          [v.initializer for v in self._index.values()])
+    return init_op
 
   @property
   def graph(self):
-- 
GitLab


From 5f69248a692f7b47ea11930621f4f19d0397fe8c Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Tue, 9 Oct 2018 15:07:47 -0700
Subject: [PATCH 0639/1085] Make defun work under distributed strategies.

The core of the change is have the gradient tape capture
distributed variables instead of plain ResourceVariables.
In other words, we move the distribution awareness from defun
down to tape and rely on distributed variable magic to provide us
with the right variable at runtime.

In tower context, we always watch the container (e.g. MirroredVariable).
In cross tower context, we always watch all the components.

PiperOrigin-RevId: 216430530
---
 .../distribute/python/mirrored_strategy.py    | 23 +++++---
 .../python/mirrored_strategy_multigpu_test.py | 58 +++++++++++++++++++
 tensorflow/python/eager/backprop_test.py      | 24 ++++++++
 tensorflow/python/eager/function.py           | 53 ++---------------
 tensorflow/python/eager/tape.py               | 31 +++++++++-
 5 files changed, 128 insertions(+), 61 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index a32424b316..0f82508428 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -293,7 +293,8 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):
       collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
       l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
       for v in index.values():
-        l.remove(v)
+        if v in l:
+          l.remove(v)
     g.add_to_collections(collections, result)
   elif ops.GraphKeys.GLOBAL_STEP in collections:
     ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result)
@@ -461,16 +462,20 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
             # name as the absolute name of the variable.
             kwargs["name"] = "%s/replica_%d/" % (var0name, i)
             # Initialize replicas with the same value:
-            if context.executing_eagerly():
-              kwargs["initial_value"] = array_ops.identity(
-                  index[devices[0]].value())
-            else:
-              def initial_value_fn(device=d):
+            def initial_value_fn(device=d):
+              if context.executing_eagerly():
+                init_value = index[devices[0]].value()
+                return array_ops.identity(init_value)
+              else:
                 with ops.device(device):
-                  return array_ops.identity(index[devices[0]].initial_value)
-              kwargs["initial_value"] = initial_value_fn
+                  init_value = index[devices[0]].initial_value
+                  return array_ops.identity(init_value)
+            kwargs["initial_value"] = initial_value_fn
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
-            v = next_creator(*args, **kwargs)
+            # Don't record operations (e.g. other variable reads) during
+            # variable creation.
+            with tape.stop_recording():
+              v = next_creator(*args, **kwargs)
           assert not isinstance(v, values.DistributedVariable)
           index[d] = v
       return index
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index eeac528329..ed36639ce8 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import sys
 
+import numpy as np
+
 from tensorflow.contrib.distribute.python import mirrored_strategy
 from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.contrib.distribute.python import strategy_test_lib
@@ -34,7 +36,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training as keras_training
+from tensorflow.python.keras.layers import core as keras_core
 from tensorflow.python.layers import core
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell_impl
@@ -43,6 +48,8 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import device_util
 from tensorflow.python.training import distribution_strategy_context
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import optimizer as optimizer_lib
 from tensorflow.python.training import server_lib
 
 
@@ -1245,6 +1252,22 @@ class MockModel(object):
     return x
 
 
+class MiniModel(keras_training.Model):
+  """Minimal model for mnist.
+
+  Useful for testing and debugging on slow TPU simulators.
+  """
+
+  def __init__(self):
+    super(MiniModel, self).__init__(name="")
+    self.fc = keras_core.Dense(1, name="fc", kernel_initializer="ones",
+                               bias_initializer="ones")
+
+  def call(self, inputs, training=True):
+    inputs = array_ops.ones([1, 10])
+    return self.fc(inputs)
+
+
 class MirroredStrategyDefunTest(test.TestCase):
 
   def _skip_eager_if_gpus_less_than(self, num_gpus):
@@ -1365,6 +1388,41 @@ class MirroredStrategyDefunTest(test.TestCase):
                                         "GPU:0": 3.0 * 1.25})
     self._call_and_check(fn1, [factors], expected_result, [fn1])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testTrain(self):
+    self._skip_eager_if_gpus_less_than(1)
+
+    cpu_dev = device_util.canonicalize("CPU:0")
+    gpu_dev = device_util.canonicalize("GPU:0")
+    devices = [cpu_dev, gpu_dev]
+    dist = mirrored_strategy.MirroredStrategy(devices)
+
+    with dist.scope():
+      mock_model = MiniModel()
+      mock_model.call = function.defun(mock_model.call)
+
+      def loss_fn(ctx):
+        del ctx
+        return mock_model(array_ops.ones([1, 10]))
+
+      gradients_fn = backprop.implicit_grad(loss_fn)
+      gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn)
+      grads_and_vars = dist.call_for_each_tower(
+          gradients_fn, None, run_concurrently=False)
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.25)
+      update_ops = optimizer._distributed_apply(dist, grads_and_vars)  # pylint: disable=protected-access
+
+      if not context.executing_eagerly():
+        self.evaluate(variables.global_variables_initializer())
+        self.evaluate(update_ops)
+
+      updated_var_values = self.evaluate(mock_model.variables)
+      # All variables start at 1.0 and get two updates of 0.25.
+      self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0])
+      self.assertAllEqual([0.5], updated_var_values[1])
+
+
 
 class MultiWorkerMirroredStrategyTest(
     multi_worker_test_base.MultiWorkerTestBase,
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 7e5c9f3cb6..b1b20fafd2 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -258,6 +258,30 @@ class BackpropTest(test.TestCase):
       loss += v * v
     self.assertAllEqual(t.gradient(loss, v), 2.0)
 
+  def testAutomaticWatchedVariables(self):
+    with backprop.GradientTape() as t:
+      self.assertEqual(0, len(t.watched_variables()))
+      v = resource_variable_ops.ResourceVariable(1.0)
+      loss = v * v
+      self.assertAllEqual([v], t.watched_variables())
+
+      t.reset()
+      self.assertEqual(0, len(t.watched_variables()))
+      loss += v * v
+      self.assertAllEqual([v], t.watched_variables())
+
+  def testExplicitWatchedVariables(self):
+    with backprop.GradientTape() as t:
+      self.assertEqual(0, len(t.watched_variables()))
+      v = resource_variable_ops.ResourceVariable(1.0)
+      t.watch(v)
+      self.assertAllEqual([v], t.watched_variables())
+
+      t.reset()
+      self.assertEqual(0, len(t.watched_variables()))
+      t.watch(v)
+      self.assertAllEqual([v], t.watched_variables())
+
   @test_util.assert_no_new_tensors
   def testGradientNone(self):
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index ff138cad1e..f1a63adce1 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -51,7 +51,6 @@ from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
@@ -202,6 +201,7 @@ class FuncGraph(ops.Graph):
     # from the default graph even in eager mode. Maybe it should be part of the
     # eager context?
     self._distribution_strategy_stack = graph._distribution_strategy_stack
+    self._variable_creator_stack = graph._variable_creator_stack
     # Inherit the graph key, since this is used for matching variables in
     # optimizers.
     self._graph_key = graph._graph_key
@@ -563,17 +563,6 @@ class Function(object):
         self._func_graph.inputs, self._func_graph.outputs, self._attrs)
     self._backward_graph_function = None
 
-    # Map holding distributed variables, keyed by resource handle tensors.
-    self._distributed_variables = {}
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    for variable in self._func_graph.variables:
-      # If variable is not distributed, unwrap returns [variable].
-      component_variables = strategy.unwrap(variable)
-      # Only update the dictionary when the variable is actually distributed.
-      if (len(component_variables) > 1 or component_variables[0] != variable):
-        for component_variable in component_variables:
-          self._distributed_variables[component_variable.handle] = variable
-
   def __call__(self, *args):
     """Executes the wrapped function.
 
@@ -602,7 +591,6 @@ class Function(object):
       if v.trainable:
         tape.variable_accessed(v)
 
-    captures = self._resolve_captured_inputs()
     tensor_inputs = []
     for i, arg in enumerate(nest.flatten(args)):
       if isinstance(arg, resource_variable_ops.ResourceVariable):
@@ -615,9 +603,10 @@ class Function(object):
         raise ValueError("All inputs to `Function`s must be Tensors; "
                          "on invocation of %s, the %d-th input (%s) was not a "
                          "Tensor." % (self._func_graph.name, i, str(arg)))
-    args = tensor_inputs + captures
+    args = tensor_inputs + self._captured_inputs
 
-    if tape.should_record(tensor_inputs) or tape.should_record(captures):
+    if (tape.should_record(tensor_inputs) or
+        tape.should_record(self._captured_inputs)):
       return self._backprop_call(args)
 
     # Only need to override the gradient in graph mode and when we have outputs.
@@ -804,32 +793,6 @@ class Function(object):
                           args, backward_function)
     return self._build_call_outputs(real_outputs)
 
-  def _resolve_captured_inputs(self):
-    """Resolve captured distributed variables to their current values.
-
-    Some inputs can be distributed variables. Such variables yield a different
-    component (i.e. actual tf.Variable) variables depending on the context of
-    execution.
-
-    Returns:
-      a list of resolved captured input tensors.
-    """
-    if self._distributed_variables:
-      # Loop over each captured input and check if it corresponds to something
-      # distributed. If so, get its _distributed_container and fetch the
-      # component appropriate for the current execution context.
-      resolved_captured_inputs = self._captured_inputs[:]
-      for i, captured_input in enumerate(self._captured_inputs):
-        distributed_var = self._distributed_variables.get(captured_input, None)
-        if distributed_var is not None:
-          # distributed variables override __getattr__ and substitute the
-          # right component variable. In here, `distributed_var.handle`
-          # actually does the equivalent of
-          # distributed_var.get_current_component_var().handle.
-          resolved_captured_inputs[i] = distributed_var.handle
-      return resolved_captured_inputs
-    return self._captured_inputs
-
   def _build_call_outputs(self, result):
     """Maps the fdef output list to actual output structure.
 
@@ -1010,14 +973,6 @@ def func_graph_from_py_func(name,
         for x in _flatten(func_graph.structured_outputs)
         if x is not None)
 
-    # Some captured variables might be components of DistributedValues.
-    # Instead of storing non-distributed component variables, we
-    # store their distributed containers so we can retrieve the correct
-    # component variables at call-time.
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    for i, variable in enumerate(variables):
-      # If variable is not distributed value_container returns itself.
-      variables[i] = strategy.value_container(variable)
     func_graph.variables = variables
 
   # Register any other functions defined in the graph.
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 399d90223c..ade945f874 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -21,6 +21,15 @@ from __future__ import print_function
 import contextlib
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.util.lazy_loader import LazyLoader
+
+# There is a circular dependency between this, ops.py, and
+# distribution_strategy_context.
+# TODO(b/117329403): Remove this circular dependency.
+distribution_strategy_context = LazyLoader(
+    "distribute_lib", globals(),
+    "tensorflow.python.training."
+    "distribution_strategy_context")
 
 
 class Tape(object):
@@ -52,12 +61,28 @@ def watch(tape, tensor):
 
 def watch_variable(tape, variable):
   """Marks this variable to be watched by the given tape."""
-  pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, variable)  # pylint: disable=protected-access
+  strategy = distribution_strategy_context.get_distribution_strategy()
+  if distribution_strategy_context.get_tower_context():
+    variables = [strategy.value_container(variable)]
+  else:
+    variables = strategy.unwrap(variable)
+  for var in variables:
+    pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var)  # pylint: disable=protected-access
 
 
 def variable_accessed(variable):
-  """Notifies all tapes in the stack that a variable has been accessed."""
-  pywrap_tensorflow.TFE_Py_TapeVariableAccessed(variable)
+  """Notifies all tapes in the stack that a variable has been accessed.
+
+  Args:
+    variable: variable to be watched.
+  """
+  strategy = distribution_strategy_context.get_distribution_strategy()
+  if distribution_strategy_context.get_tower_context():
+    variables = [strategy.value_container(variable)]
+  else:
+    variables = strategy.unwrap(variable)
+  for var in variables:
+    pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
 
 
 def pop_tape(tape):
-- 
GitLab


From 771955e2b8be98a0b38fada41bd67f663397c87d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 15:18:21 -0700
Subject: [PATCH 0640/1085] Raises an appropriate error if `add_weight` is
 called on a Keras network.

PiperOrigin-RevId: 216432358
---
 tensorflow/python/keras/engine/network.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 5969fea2b2..266c48d304 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -432,6 +432,27 @@ class Network(base_layer.Layer):
           'assign variables to attributes and they will show up in the weights '
           'and variables properties.')
 
+  def add_weight(self,
+                 name,
+                 shape,
+                 dtype=None,
+                 initializer=None,
+                 regularizer=None,
+                 trainable=None,
+                 constraint=None,
+                 partitioner=None,
+                 use_resource=None,
+                 synchronization=variables.VariableSynchronization.AUTO,
+                 aggregation=variables.VariableAggregation.NONE,
+                 **kwargs):
+    if self._is_graph_network:
+      raise NotImplementedError('`add_weight` is not supported on Networks.')
+    else:
+      raise NotImplementedError(
+          '`add_weight` is not supported on Networks. However, you may '
+          'assign variables to attributes and they will show up in the weights '
+          'and variables properties.')
+
   def add_loss(self, *args, **kwargs):
     if context.executing_eagerly():
       raise NotImplementedError('`add_loss` is not supported on Networks '
-- 
GitLab


From f7d468552877cc014f144061cd59b6691ef951c4 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Tue, 9 Oct 2018 15:41:24 -0700
Subject: [PATCH 0641/1085] Made gold standard string smaller

---
 .../python/kernel_tests/check_ops_test.py     | 22 ++++---------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 5d953a3a38..971d4ee997 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -303,24 +303,10 @@ class AssertNoneEqualTest(test.TestCase):
       assert x is None
 
   def test_error_message_eager(self):
-    expected_error_msg_full = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0
-b'y \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
-    expected_error_msg_default = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, 2.0, ...
-b'y \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, 2.0, ..."""
-    expected_error_msg_short = r"""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, ...
-b'y \(shape=\(2, 3\) dtype=float32\) = '
-0.0, 1.0, ..."""
+    # Note that the following three strings are regexes
+    expected_error_msg_full = r"""0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
+    expected_error_msg_default = r"""0.0, 1.0, 2.0, \.\.\."""
+    expected_error_msg_short = r"""0.0, 1.0, .\.\."""
     with context.eager_mode():
       t = constant_op.constant(np.array(range(6)), shape=[2, 3], 
                                dtype=np.float32)
-- 
GitLab


From 496626332a95865af39c209b588eb7fa13caba51 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Tue, 9 Oct 2018 15:43:30 -0700
Subject: [PATCH 0642/1085] Missed a backslash

---
 tensorflow/python/kernel_tests/check_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 971d4ee997..5cdaf86299 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -306,7 +306,7 @@ class AssertNoneEqualTest(test.TestCase):
     # Note that the following three strings are regexes
     expected_error_msg_full = r"""0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""
     expected_error_msg_default = r"""0.0, 1.0, 2.0, \.\.\."""
-    expected_error_msg_short = r"""0.0, 1.0, .\.\."""
+    expected_error_msg_short = r"""0.0, 1.0, \.\.\."""
     with context.eager_mode():
       t = constant_op.constant(np.array(range(6)), shape=[2, 3], 
                                dtype=np.float32)
-- 
GitLab


From 69c4a426fc4a3afd83c8190467b07c17b8b2ed60 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 15:47:56 -0700
Subject: [PATCH 0643/1085] [XLA] Allow scatter to share the operand buffer
 with the output

This avoids a copy.

PiperOrigin-RevId: 216437329
---
 .../xla/service/hlo_dataflow_analysis.cc      |  1 +
 .../xla/service/hlo_dataflow_analysis_test.cc | 38 +++++++++++++++++++
 .../xla/service/tuple_points_to_analysis.cc   |  1 +
 .../service/tuple_points_to_analysis_test.cc  | 38 +++++++++++++++++++
 4 files changed, 78 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index c22adcdd8d..71122e73b1 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -1048,6 +1048,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser(
   }
 
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index 510d6360a1..d27786d160 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 811ac55e2d..ef4e69180d 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -756,6 +756,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser(
     }
   }
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index e9a07b14ed..a571bd571b 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
-- 
GitLab


From c98ffffcb4e0cc668c0ff7b73d51677a7eb7dcf4 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Tue, 9 Oct 2018 16:19:46 -0700
Subject: [PATCH 0644/1085] Part 2/3 of the update of tf.keras to the Keras
 2.2.4 API.

PiperOrigin-RevId: 216442569
---
 tensorflow/python/keras/backend.py            |  64 ++++++----
 tensorflow/python/keras/callbacks.py          | 101 +++++++++++----
 tensorflow/python/keras/callbacks_test.py     | 118 ++++++++++++++++--
 .../python/keras/layers/convolutional.py      |  14 ++-
 .../python/keras/layers/convolutional_test.py |  36 ++++++
 tensorflow/python/kernel_tests/rnn_test.py    |   4 +-
 .../golden/v1/tensorflow.keras.backend.pbtxt  |   2 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |   6 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |   2 +-
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   2 +-
 .../golden/v2/tensorflow.keras.backend.pbtxt  |   2 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |   6 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |   2 +-
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   2 +-
 14 files changed, 296 insertions(+), 65 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 13f52fbae7..7509ef9c59 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2338,7 +2338,8 @@ def permute_dimensions(x, pattern):
 
 
 @tf_export('keras.backend.resize_images')
-def resize_images(x, height_factor, width_factor, data_format):
+def resize_images(x, height_factor, width_factor, data_format,
+                  interpolation='nearest'):
   """Resizes the images contained in a 4D tensor.
 
   Arguments:
@@ -2346,40 +2347,55 @@ def resize_images(x, height_factor, width_factor, data_format):
       height_factor: Positive integer.
       width_factor: Positive integer.
       data_format: One of `"channels_first"`, `"channels_last"`.
+      interpolation: A string, one of `nearest` or `bilinear`.
 
   Returns:
       A tensor.
 
   Raises:
-      ValueError: if `data_format` is neither
-          `channels_last` or `channels_first`.
+      ValueError: in case of incorrect value for
+        `data_format` or `interpolation`.
   """
   if data_format == 'channels_first':
-    original_shape = int_shape(x)
-    new_shape = array_ops.shape(x)[2:]
-    new_shape *= constant_op.constant(
-        np.array([height_factor, width_factor]).astype('int32'))
+    rows, cols = 2, 3
+  elif data_format == 'channels_last':
+    rows, cols = 1, 2
+  else:
+    raise ValueError('Invalid `data_format` argument: %s' % (data_format,))
+
+  original_shape = int_shape(x)
+  new_shape = array_ops.shape(x)[rows:cols + 1]
+  new_shape *= constant_op.constant(
+      np.array([height_factor, width_factor], dtype='int32'))
+
+  if data_format == 'channels_first':
     x = permute_dimensions(x, [0, 2, 3, 1])
+  if interpolation == 'nearest':
     x = image_ops.resize_nearest_neighbor(x, new_shape)
+  elif interpolation == 'bilinear':
+    x = image_ops.resize_bilinear(x, new_shape)
+  else:
+    raise ValueError('interpolation should be one '
+                     'of "nearest" or "bilinear".')
+  if data_format == 'channels_first':
     x = permute_dimensions(x, [0, 3, 1, 2])
-    x.set_shape((None, None, original_shape[2] * height_factor
-                 if original_shape[2] is not None else None,
-                 original_shape[3] * width_factor
-                 if original_shape[3] is not None else None))
-    return x
-  elif data_format == 'channels_last':
-    original_shape = int_shape(x)
-    new_shape = array_ops.shape(x)[1:3]
-    new_shape *= constant_op.constant(
-        np.array([height_factor, width_factor]).astype('int32'))
-    x = image_ops.resize_nearest_neighbor(x, new_shape)
-    x.set_shape((None, original_shape[1] * height_factor
-                 if original_shape[1] is not None else None,
-                 original_shape[2] * width_factor
-                 if original_shape[2] is not None else None, None))
-    return x
+
+  if original_shape[rows] is None:
+    new_height = None
   else:
-    raise ValueError('Invalid data_format: ' + str(data_format))
+    new_height = original_shape[rows] * height_factor
+
+  if original_shape[cols] is None:
+    new_width = None
+  else:
+    new_width = original_shape[cols] * width_factor
+
+  if data_format == 'channels_first':
+    output_shape = (None, None, new_height, new_width)
+  else:
+    output_shape = (None, new_height, new_width, None)
+  x.set_shape(output_shape)
+  return x
 
 
 @tf_export('keras.backend.resize_volumes')
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 3d6000f223..4c12c83a4c 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -24,6 +24,7 @@ from collections import Iterable
 from collections import OrderedDict
 import copy
 import csv
+import io
 import json
 import math
 import os
@@ -606,24 +607,28 @@ class EarlyStopping(Callback):
   """Stop training when a monitored quantity has stopped improving.
 
   Arguments:
-      monitor: quantity to be monitored.
-      min_delta: minimum change in the monitored quantity
+      monitor: Quantity to be monitored.
+      min_delta: Minimum change in the monitored quantity
           to qualify as an improvement, i.e. an absolute
           change of less than min_delta, will count as no
           improvement.
-      patience: number of epochs with no improvement
+      patience: Number of epochs with no improvement
           after which training will be stopped.
       verbose: verbosity mode.
-      mode: one of {auto, min, max}. In `min` mode,
+      mode: One of `{"auto", "min", "max"}`. In `min` mode,
           training will stop when the quantity
           monitored has stopped decreasing; in `max`
           mode it will stop when the quantity
           monitored has stopped increasing; in `auto`
           mode, the direction is automatically inferred
           from the name of the monitored quantity.
-      baseline: baseline value for the monitored quantity.
+      baseline: Baseline value for the monitored quantity.
           Training will stop if the model doesn't show improvement over the
           baseline.
+      restore_best_weights: Whether to restore model weights from
+          the epoch with the best value of the monitored quantity.
+          If False, the model weights obtained at the last step of
+          training are used.
   """
 
   def __init__(self,
@@ -632,7 +637,8 @@ class EarlyStopping(Callback):
                patience=0,
                verbose=0,
                mode='auto',
-               baseline=None):
+               baseline=None,
+               restore_best_weights=False):
     super(EarlyStopping, self).__init__()
 
     self.monitor = monitor
@@ -642,6 +648,8 @@ class EarlyStopping(Callback):
     self.min_delta = abs(min_delta)
     self.wait = 0
     self.stopped_epoch = 0
+    self.restore_best_weights = restore_best_weights
+    self.best_weights = None
 
     if mode not in ['auto', 'min', 'max']:
       logging.warning('EarlyStopping mode %s is unknown, '
@@ -673,25 +681,37 @@ class EarlyStopping(Callback):
       self.best = np.Inf if self.monitor_op == np.less else -np.Inf
 
   def on_epoch_end(self, epoch, logs=None):
-    current = logs.get(self.monitor)
+    current = self.get_monitor_value(logs)
     if current is None:
-      logging.warning('Early stopping conditioned on metric `%s` '
-                      'which is not available. Available metrics are: %s',
-                      self.monitor, ','.join(list(logs.keys())))
       return
     if self.monitor_op(current - self.min_delta, self.best):
       self.best = current
       self.wait = 0
+      if self.restore_best_weights:
+        self.best_weights = self.model.get_weights()
     else:
       self.wait += 1
       if self.wait >= self.patience:
         self.stopped_epoch = epoch
         self.model.stop_training = True
+        if self.restore_best_weights:
+          if self.verbose > 0:
+            print('Restoring model weights from the end of the best epoch.')
+          self.model.set_weights(self.best_weights)
 
   def on_train_end(self, logs=None):
     if self.stopped_epoch > 0 and self.verbose > 0:
       print('Epoch %05d: early stopping' % (self.stopped_epoch + 1))
 
+  def get_monitor_value(self, logs):
+    logs = logs or {}
+    monitor_value = logs.get(self.monitor)
+    if monitor_value is None:
+      logging.warning('Early stopping conditioned on metric `%s` '
+                      'which is not available. Available metrics are: %s',
+                      self.monitor, ','.join(list(logs.keys())))
+    return monitor_value
+
 
 @tf_export('keras.callbacks.RemoteMonitor')
 class RemoteMonitor(Callback):
@@ -839,6 +859,12 @@ class TensorBoard(Callback):
           `embeddings_layer_names`. Numpy array (if the model has a single
           input) or list of Numpy arrays (if the model has multiple inputs).
           Learn [more about embeddings](https://www.tensorflow.org/programmers_guide/embedding)
+      update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`,
+          writes the losses and metrics to TensorBoard after each batch.
+          The same applies for `'epoch'`. If using an integer, let's say `1000`,
+          the callback will write the metrics and losses to TensorBoard every
+          1000 samples. Note that writing too frequently to TensorBoard
+          can slow down your training.
 
   Raises:
       ValueError: If histogram_freq is set and no validation data is provided.
@@ -862,7 +888,8 @@ class TensorBoard(Callback):
                embeddings_freq=0,
                embeddings_layer_names=None,
                embeddings_metadata=None,
-               embeddings_data=None):
+               embeddings_data=None,
+               update_freq='epoch'):
     super(TensorBoard, self).__init__()
     self.log_dir = log_dir
     self.histogram_freq = histogram_freq
@@ -882,6 +909,12 @@ class TensorBoard(Callback):
     self.embeddings_layer_names = embeddings_layer_names
     self.embeddings_metadata = embeddings_metadata
     self.embeddings_data = embeddings_data
+    if update_freq == 'batch':
+      self.update_freq = 1
+    else:
+      self.update_freq = update_freq
+    self._samples_seen = 0
+    self._samples_seen_at_last_write = 0
 
   def _init_writer(self):
     """Sets file writer."""
@@ -1045,13 +1078,17 @@ class TensorBoard(Callback):
       # use v2 summary ops
       with self.writer.as_default(), summary_ops_v2.always_record_summaries():
         for name, value in logs.items():
-          summary_ops_v2.scalar(name, value.item(), step=step)
+          if isinstance(value, np.ndarray):
+            value = value.item()
+          summary_ops_v2.scalar(name, value, step=step)
     else:
       # use FileWriter from v1 summary
       for name, value in logs.items():
+        if isinstance(value, np.ndarray):
+          value = value.item()
         summary = tf_summary.Summary()
         summary_value = summary.value.add()
-        summary_value.simple_value = value.item()
+        summary_value.simple_value = value
         summary_value.tag = name
         self.writer.add_summary(summary, step)
     self.writer.flush()
@@ -1076,10 +1113,14 @@ class TensorBoard(Callback):
     """Writes scalar summaries for metrics on every training batch."""
     # Don't output batch_size and batch number as Tensorboard summaries
     logs = logs or {}
-    batch_logs = {('batch_' + k): v
-                  for k, v in logs.items()
-                  if k not in ['batch', 'size', 'num_steps']}
-    self._write_custom_summaries(self._total_batches_seen, batch_logs)
+    self._samples_seen += logs.get('size', 1)
+    samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
+    if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
+      batch_logs = {('batch_' + k): v
+                    for k, v in logs.items()
+                    if k not in ['batch', 'size', 'num_steps']}
+      self._write_custom_summaries(self._total_batches_seen, batch_logs)
+      self._samples_seen_at_last_write = self._samples_seen
     self._total_batches_seen += 1
 
   def on_epoch_begin(self, epoch, logs=None):
@@ -1103,7 +1144,11 @@ class TensorBoard(Callback):
     logs = {('epoch_' + k): v
             for k, v in logs.items()
             if k not in ['batch', 'size', 'num_steps']}
-    self._write_custom_summaries(epoch, logs)
+    if self.update_freq == 'epoch':
+      step = epoch
+    else:
+      step = self._samples_seen
+    self._write_custom_summaries(step, logs)
 
     # pop the histogram summary op after each epoch
     if self.histogram_freq:
@@ -1309,7 +1354,12 @@ class CSVLogger(Callback):
     self.writer = None
     self.keys = None
     self.append_header = True
-    self.file_flags = 'b' if six.PY2 and os.name == 'nt' else ''
+    if six.PY2:
+      self.file_flags = 'b'
+      self._open_args = {}
+    else:
+      self.file_flags = ''
+      self._open_args = {'newline': '\n'}
     super(CSVLogger, self).__init__()
 
   def on_train_begin(self, logs=None):
@@ -1317,9 +1367,12 @@ class CSVLogger(Callback):
       if os.path.exists(self.filename):
         with open(self.filename, 'r' + self.file_flags) as f:
           self.append_header = not bool(len(f.readline()))
-      self.csv_file = open(self.filename, 'a' + self.file_flags)
+      mode = 'a'
     else:
-      self.csv_file = open(self.filename, 'w' + self.file_flags)
+      mode = 'w'
+    self.csv_file = io.open(self.filename,
+                            mode + self.file_flags,
+                            **self._open_args)
 
   def on_epoch_end(self, epoch, logs=None):
     logs = logs or {}
@@ -1345,9 +1398,13 @@ class CSVLogger(Callback):
       class CustomDialect(csv.excel):
         delimiter = self.sep
 
+      fieldnames = ['epoch'] + self.keys
+      if six.PY2:
+        fieldnames = [unicode(x) for x in fieldnames]
+
       self.writer = csv.DictWriter(
           self.csv_file,
-          fieldnames=['epoch'] + self.keys,
+          fieldnames=fieldnames,
           dialect=CustomDialect)
       if self.append_header:
         self.writer.writeheader()
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 467bc4cdc4..bb85347033 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -313,6 +313,42 @@ class KerasCallbacksTest(test.TestCase):
       hist = model.fit(data, labels, callbacks=[stopper], verbose=0, epochs=20)
       assert len(hist.epoch) >= patience
 
+  def test_EarlyStopping_final_weights_when_restoring_model_weights(self):
+
+    class DummyModel(object):
+
+      def __init__(self):
+        self.stop_training = False
+        self.weights = -1
+
+      def get_weights(self):
+        return self.weights
+
+      def set_weights(self, weights):
+        self.weights = weights
+
+      def set_weight_to_epoch(self, epoch):
+        self.weights = epoch
+
+    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss',
+                                               patience=2,
+                                               restore_best_weights=True)
+    early_stop.model = DummyModel()
+    losses = [0.2, 0.15, 0.1, 0.11, 0.12]
+    # The best configuration is in the epoch 2 (loss = 0.1000).
+    epochs_trained = 0
+    early_stop.on_train_begin()
+    for epoch in range(len(losses)):
+      epochs_trained += 1
+      early_stop.model.set_weight_to_epoch(epoch=epoch)
+      early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
+      if early_stop.model.stop_training:
+        break
+    # The best configuration is in epoch 2 (loss = 0.1000),
+    # and while patience = 2, we're restoring the best weights,
+    # so we end up at the epoch with the best weights, i.e. epoch 2
+    self.assertEqual(early_stop.model.get_weights(), 2)
+
   def test_RemoteMonitor(self):
     if requests is None:
       return
@@ -534,11 +570,15 @@ class KerasCallbacksTest(test.TestCase):
           batch_size=BATCH_SIZE,
           validation_data=(x_test, y_test),
           callbacks=cbks,
-          epochs=1,
+          epochs=2,
           verbose=0)
 
       with open(filepath) as csvfile:
-        output = ' '.join(csvfile.readlines())
+        list_lines = csvfile.readlines()
+        for line in list_lines:
+          assert line.count(sep) == 4
+        assert len(list_lines) == 5
+        output = ' '.join(list_lines)
         assert len(re.findall('epoch', output)) == 1
 
       os.remove(filepath)
@@ -1115,11 +1155,11 @@ class KerasCallbacksTest(test.TestCase):
     temp_dir = self.get_temp_dir()
     self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
 
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir)
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
     tb_cbk.writer = FileWriterStub(temp_dir)
 
     for batch in range(5):
-      tb_cbk.on_batch_end(batch, {'acc': np.float32(batch)})
+      tb_cbk.on_batch_end(batch, {'acc': batch})
     self.assertEqual(tb_cbk.writer.batches_logged, [0, 1, 2, 3, 4])
     self.assertEqual(tb_cbk.writer.summary_values, [0., 1., 2., 3., 4.])
     self.assertEqual(tb_cbk.writer.summary_tags, ['batch_acc'] * 5)
@@ -1147,14 +1187,17 @@ class KerasCallbacksTest(test.TestCase):
     temp_dir = self.get_temp_dir()
     self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
 
-    tb_cbk = keras.callbacks.TensorBoard(temp_dir)
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
     tb_cbk.writer = FileWriterStub(temp_dir)
 
-    tb_cbk.on_batch_end(0, {'acc': np.float32(5.0)})
-    tb_cbk.on_epoch_end(0, {'acc': np.float32(10.0)})
+    tb_cbk.on_batch_end(0, {'acc': 5.0})
     batch_step, batch_summary = tb_cbk.writer.batch_summary
     self.assertEqual(batch_step, 0)
     self.assertEqual(batch_summary.value[0].simple_value, 5.0)
+
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+    tb_cbk.on_epoch_end(0, {'acc': 10.0})
     epoch_step, epoch_summary = tb_cbk.writer.epoch_summary
     self.assertEqual(epoch_step, 0)
     self.assertEqual(epoch_summary.value[0].simple_value, 10.0)
@@ -1192,6 +1235,66 @@ class KerasCallbacksTest(test.TestCase):
 
     self.assertTrue(os.path.exists(temp_dir))
 
+  def test_TensorBoard_update_freq(self):
+
+    class FileWriterStub(object):
+
+      def __init__(self, logdir, graph=None):
+        self.logdir = logdir
+        self.graph = graph
+        self.batch_summaries = []
+        self.epoch_summaries = []
+
+      def add_summary(self, summary, step):
+        if 'batch_' in summary.value[0].tag:
+          self.batch_summaries.append((step, summary))
+        elif 'epoch_' in summary.value[0].tag:
+          self.epoch_summaries.append((step, summary))
+
+      def flush(self):
+        pass
+
+      def close(self):
+        pass
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+
+    # Epoch mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='epoch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(tb_cbk.writer.batch_summaries, [])
+    tb_cbk.on_epoch_end(0, {'acc': 10.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.epoch_summaries), 1)
+
+    # Batch mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq='batch')
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 1})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
+    # Integer mode
+    tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq=20)
+    tb_cbk.writer = FileWriterStub(temp_dir)
+
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertFalse(tb_cbk.writer.batch_summaries)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 1)
+    tb_cbk.on_batch_end(0, {'acc': 5.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    tb_cbk.on_batch_end(0, {'acc': 10.0, 'size': 10})
+    self.assertEqual(len(tb_cbk.writer.batch_summaries), 2)
+    self.assertFalse(tb_cbk.writer.epoch_summaries)
+
   def test_RemoteMonitorWithJsonPayload(self):
     if requests is None:
       self.skipTest('`requests` required to run this test')
@@ -1226,6 +1329,7 @@ class KerasCallbacksTest(test.TestCase):
   def test_fit_generator_with_callback(self):
 
     class TestCallback(keras.callbacks.Callback):
+
       def set_model(self, model):
         # Check the model operations for the optimizer operations that
         # the _make_train_function adds under a named scope for the
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index 8f5872385c..58024677ee 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -1951,6 +1951,7 @@ class UpSampling2D(Layer):
           It defaults to the `image_data_format` value found in your
           Keras config file at `~/.keras/keras.json`.
           If you never set it, then it will be "channels_last".
+      interpolation: A string, one of `nearest` or `bilinear`.
 
   Input shape:
       4D tensor with shape:
@@ -1967,10 +1968,18 @@ class UpSampling2D(Layer):
           `(batch, channels, upsampled_rows, upsampled_cols)`
   """
 
-  def __init__(self, size=(2, 2), data_format=None, **kwargs):
+  def __init__(self,
+               size=(2, 2),
+               data_format=None,
+               interpolation='nearest',
+               **kwargs):
     super(UpSampling2D, self).__init__(**kwargs)
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.size = conv_utils.normalize_tuple(size, 2, 'size')
+    if interpolation not in {'nearest', 'bilinear'}:
+      raise ValueError('`interpolation` argument should be one of `"nearest"` '
+                       'or `"bilinear"`.')
+    self.interpolation = interpolation
     self.input_spec = InputSpec(ndim=4)
 
   def compute_output_shape(self, input_shape):
@@ -1992,7 +2001,8 @@ class UpSampling2D(Layer):
 
   def call(self, inputs):
     return backend.resize_images(
-        inputs, self.size[0], self.size[1], self.data_format)
+        inputs, self.size[0], self.size[1], self.data_format,
+        interpolation=self.interpolation)
 
   def get_config(self):
     config = {'size': self.size, 'data_format': self.data_format}
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index f88d632ab5..bdc175b8b9 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -789,6 +789,42 @@ class UpSamplingTest(test.TestCase):
 
             np.testing.assert_allclose(np_output, expected_out)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_upsampling_2d_bilinear(self):
+    num_samples = 2
+    stack_size = 2
+    input_num_row = 11
+    input_num_col = 12
+    for data_format in ['channels_first', 'channels_last']:
+      if data_format == 'channels_first':
+        inputs = np.random.rand(num_samples, stack_size, input_num_row,
+                                input_num_col)
+      else:
+        inputs = np.random.rand(num_samples, input_num_row, input_num_col,
+                                stack_size)
+
+      testing_utils.layer_test(keras.layers.UpSampling2D,
+                               kwargs={'size': (2, 2),
+                                       'data_format': data_format,
+                                       'interpolation': 'bilinear'},
+                               input_shape=inputs.shape)
+
+      if not context.executing_eagerly():
+        for length_row in [2]:
+          for length_col in [2, 3]:
+            layer = keras.layers.UpSampling2D(
+                size=(length_row, length_col),
+                data_format=data_format)
+            layer.build(inputs.shape)
+            outputs = layer(keras.backend.variable(inputs))
+            np_output = keras.backend.eval(outputs)
+            if data_format == 'channels_first':
+              self.assertEqual(np_output.shape[2], length_row * input_num_row)
+              self.assertEqual(np_output.shape[3], length_col * input_num_col)
+            else:
+              self.assertEqual(np_output.shape[1], length_row * input_num_row)
+              self.assertEqual(np_output.shape[2], length_col * input_num_col)
+
   @tf_test_util.run_in_graph_and_eager_modes
   def test_upsampling_3d(self):
     num_samples = 2
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 2f6963f6b8..907e1277a9 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -571,8 +571,8 @@ class RNNTest(test.TestCase):
       cell.set_weights(tf_weights)
       [tf_out, tf_state] = sess.run([tf_out, tf_state], {inputs: x_train})
 
-    self.assertAllClose(tf_out, k_out)
-    self.assertAllClose(tf_state, k_state)
+    self.assertAllClose(tf_out, k_out, atol=1e-5)
+    self.assertAllClose(tf_state, k_state, atol=1e-5)
 
   def testBasicLSTMCellInterchangeWithLSTMCell(self):
     with self.session(graph=ops_lib.Graph()) as sess:
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
index 9feb7c09b8..5f0dfd7ae7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.backend.pbtxt
@@ -386,7 +386,7 @@ tf_module {
   }
   member_method {
     name: "resize_images"
-    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], "
   }
   member_method {
     name: "resize_volumes"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
index f71292856c..ed0f37647f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -5,7 +5,11 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], "
+    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "get_monitor_value"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
index e58ba18c1c..e9d53b7225 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], "
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index 40a56a0c94..b05e5ec84d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -82,7 +82,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], "
+    argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
index 9feb7c09b8..5f0dfd7ae7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.backend.pbtxt
@@ -386,7 +386,7 @@ tf_module {
   }
   member_method {
     name: "resize_images"
-    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'x\', \'height_factor\', \'width_factor\', \'data_format\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'nearest\'], "
   }
   member_method {
     name: "resize_volumes"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
index f71292856c..ed0f37647f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -5,7 +5,11 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\'], "
+    argspec: "args=[\'self\', \'monitor\', \'min_delta\', \'patience\', \'verbose\', \'mode\', \'baseline\', \'restore_best_weights\'], varargs=None, keywords=None, defaults=[\'val_loss\', \'0\', \'0\', \'0\', \'auto\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "get_monitor_value"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
index e58ba18c1c..e9d53b7225 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'log_dir\', \'histogram_freq\', \'batch_size\', \'write_graph\', \'write_grads\', \'write_images\', \'embeddings_freq\', \'embeddings_layer_names\', \'embeddings_metadata\', \'embeddings_data\', \'update_freq\'], varargs=None, keywords=None, defaults=[\'./logs\', \'0\', \'32\', \'True\', \'False\', \'False\', \'0\', \'None\', \'None\', \'None\', \'epoch\'], "
   }
   member_method {
     name: "on_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index 40a56a0c94..b05e5ec84d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -82,7 +82,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'size\', \'data_format\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\'], "
+    argspec: "args=[\'self\', \'size\', \'data_format\', \'interpolation\'], varargs=None, keywords=kwargs, defaults=[\'(2, 2)\', \'None\', \'nearest\'], "
   }
   member_method {
     name: "add_loss"
-- 
GitLab


From 86777950480e10bc43b36facc478e2d706f23852 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 9 Oct 2018 16:21:56 -0700
Subject: [PATCH 0645/1085] Internal change

PiperOrigin-RevId: 216442906
---
 tensorflow/contrib/lite/build_def.bzl | 18 +++++++++---------
 tensorflow/contrib/lite/testing/BUILD |  5 ++---
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 05efee18e7..f962a138f7 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -310,15 +310,8 @@ def generated_test_models_failing(conversion_mode):
     if conversion_mode == "toco-flex":
         # TODO(b/117328698): Fix and enable the known flex failures.
         return [
-            "arg_min_max",
-            "div",
-            "floor_div",
-            "gather",
             "lstm",
-            "resize_bilinear",
-            "space_to_batch_nd",
             "split",
-            "transpose",
             "unpack",
         ]
 
@@ -334,7 +327,8 @@ def generated_test_models_all():
     """Generates a list of all tests with the different converters.
 
     Returns:
-      List of tuples representing (conversion mode, name of test).
+      List of tuples representing:
+            (conversion mode, name of test, test tags, test args).
     """
     conversion_modes = generated_test_conversion_modes()
     tests = generated_test_models()
@@ -343,12 +337,18 @@ def generated_test_models_all():
         failing_tests = generated_test_models_failing(conversion_mode)
         for test in tests:
             tags = []
+            args = []
             if test in failing_tests:
                 tags.append("notap")
                 tags.append("manual")
             if conversion_mode:
                 test += "_%s" % conversion_mode
-            options.append((conversion_mode, test, tags))
+
+            # Flex conversion shouldn't suffer from the same conversion bugs
+            # listed for the default TFLite kernel backend.
+            if conversion_mode == "toco-flex":
+                args.append("--ignore_known_bugs=false")
+            options.append((conversion_mode, test, tags, args))
     return options
 
 def gen_zip_test(name, test_name, conversion_mode, **kwargs):
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 2edd420fea..3dc666f631 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -20,8 +20,7 @@ load(
     name = "zip_test_%s" % test_name,
     size = "large",
     srcs = ["generated_examples_zip_test.cc"],
-    args = [
-    ] + select({
+    args = args + select({
         "//tensorflow:android": [],
         "//conditions:default": [
             "--zip_file_path=$(location :zip_%s)" % test_name,
@@ -61,7 +60,7 @@ load(
             "//tensorflow/core:android_tensorflow_test_lib",
         ],
     }),
-) for conversion_mode, test_name, tags in generated_test_models_all()]
+) for conversion_mode, test_name, tags, args in generated_test_models_all()]
 
 test_suite(
     name = "generated_zip_tests",
-- 
GitLab


From 2f5ebc0ea5e6d500ea8cd925234c569d6b32fd4e Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 9 Oct 2018 16:22:22 -0700
Subject: [PATCH 0646/1085] [TF:XLA] Bump open source abseil revision to
 445998d7ac4e5d3c50411d377e3b50e960d2d6c2

PiperOrigin-RevId: 216442983
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 40c226a861..b03af53cff 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -117,11 +117,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "f186bf5d9fce3037c602a21f86facbdd317adecef36e1726ec7bc7b496943a82",
-        strip_prefix = "abseil-cpp-e821380d69a549dc64900693942789d21aa4df5e",
+        sha256 = "cd1650daecfdd5591502bb017c70777c959cf604a962352bd5312bef8d78a8c6",
+        strip_prefix = "abseil-cpp-445998d7ac4e5d3c50411d377e3b50e960d2d6c2",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/e821380d69a549dc64900693942789d21aa4df5e.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
         ],
     )
 
-- 
GitLab


From 6c391166b8b6ba43d2b0151e6fb9cf14864131a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:23:35 -0700
Subject: [PATCH 0647/1085] Add 'remove' operation to MutableHashTable and
 MutableDenseHashTable.

PiperOrigin-RevId: 216443201
---
 .../linear_optimizer/python/ops/sdca_ops.py   |   3 +-
 .../ops/sharded_mutable_dense_hashtable.py    |   2 +
 .../sharded_mutable_dense_hashtable_test.py   |   6 +
 tensorflow/contrib/lookup/lookup_ops.py       |  81 ++++-
 tensorflow/contrib/lookup/lookup_ops_test.py  | 336 +++++++++++++++---
 .../python/timeseries/math_utils.py           |  19 +-
 .../python/timeseries/math_utils_test.py      |   8 +-
 .../python/timeseries/state_management.py     |   1 +
 .../api_def_LookupTableRemoveV2.pbtxt         |  24 ++
 tensorflow/core/framework/lookup_interface.cc |   8 +
 tensorflow/core/framework/lookup_interface.h  |  17 +
 .../core/kernels/initializable_lookup_table.h |   6 +
 tensorflow/core/kernels/lookup_table_op.cc    | 184 +++++++++-
 .../core/ops/compat/ops_history.v1.pbtxt      |  20 ++
 tensorflow/core/ops/lookup_ops.cc             |  14 +
 15 files changed, 643 insertions(+), 86 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 48ac429701..b5099a0bf6 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -152,7 +152,8 @@ class SdcaModel(object):
         default_value=[0.0, 0.0, 0.0, 0.0],
         # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe
         # empty_key (that will never collide with actual payloads).
-        empty_key=[0, 0])
+        empty_key=[0, 0],
+        deleted_key=[1, 1])
 
     summary.scalar('approximate_duality_gap', self.approximate_duality_gap())
     summary.scalar('examples_seen', self._hashtable.size())
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
index 5015fb0848..44a869f7c2 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
@@ -48,6 +48,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface):
                value_dtype,
                default_value,
                empty_key,
+               deleted_key,
                num_shards=1,
                checkpoint=True,
                name='ShardedMutableHashTable'):
@@ -62,6 +63,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface):
                 value_dtype=value_dtype,
                 default_value=default_value,
                 empty_key=empty_key,
+                deleted_key=deleted_key,
                 checkpoint=checkpoint,
                 name='%s-%d-of-%d' % (name, i + 1, num_shards)))
       self._table_shards = table_shards
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
index 553b116a3b..2b56d0fa3a 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
@@ -33,6 +33,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
       with self.cached_session():
         default_val = -1
         empty_key = 0
+        deleted_key = -1
         keys = constant_op.constant([11, 12, 13], dtypes.int64)
         values = constant_op.constant([0, 1, 2], dtypes.int64)
         table = ShardedMutableDenseHashTable(
@@ -40,6 +41,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
             dtypes.int64,
             default_val,
             empty_key,
+            deleted_key,
             num_shards=num_shards)
         self.assertAllEqual(0, table.size().eval())
 
@@ -56,6 +58,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
       with self.cached_session():
         default_val = [-0.1, 0.2]
         empty_key = [0, 1]
+        deleted_key = [1, 0]
         keys = constant_op.constant([[11, 12], [13, 14], [15, 16]],
                                     dtypes.int64)
         values = constant_op.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]],
@@ -65,6 +68,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
             dtypes.float32,
             default_val,
             empty_key,
+            deleted_key,
             num_shards=num_shards)
         self.assertAllEqual(0, table.size().eval())
 
@@ -81,6 +85,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
   def testExportSharded(self):
     with self.cached_session():
       empty_key = -2
+      deleted_key = -3
       default_val = -1
       num_shards = 2
       keys = constant_op.constant([10, 11, 12], dtypes.int64)
@@ -90,6 +95,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase):
           dtypes.int64,
           default_val,
           empty_key,
+          deleted_key,
           num_shards=num_shards)
       self.assertAllEqual(0, table.size().eval())
 
diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py
index f83765a48d..5abef822e8 100644
--- a/tensorflow/contrib/lookup/lookup_ops.py
+++ b/tensorflow/contrib/lookup/lookup_ops.py
@@ -292,8 +292,8 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None):
 class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
   """A generic mutable hash table implementation.
 
-  Data can be inserted by calling the insert method. It does not support
-  initialization via the init method.
+  Data can be inserted by calling the insert method and removed by calling the
+  remove method. It does not support initialization via the init method.
 
   Example usage:
 
@@ -391,6 +391,34 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
       with ops.colocate_with(self._table_ref):
         return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name)
 
+  def remove(self, keys, name=None):
+    """Removes `keys` and its associated values from the table.
+
+    If a key is not present in the table, it is silently ignored.
+
+    Args:
+      keys: Keys to remove. Can be a tensor of any shape. Must match the table's
+        key type.
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+
+    Raises:
+      TypeError: when `keys` do not match the table data types.
+    """
+    if keys.dtype != self._key_dtype:
+      raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
+                      (self._key_dtype, keys.dtype))
+
+    with ops.name_scope(name, "%s_lookup_table_remove" % self._name,
+                        (self._table_ref, keys, self._default_value)) as name:
+      # pylint: disable=protected-access
+      op = gen_lookup_ops.lookup_table_remove_v2(
+          self._table_ref, keys, name=name)
+
+    return op
+
   def lookup(self, keys, name=None):
     """Looks up `keys` in a table, outputs the corresponding values.
 
@@ -487,11 +515,11 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase):
 class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
   """A generic mutable hash table implementation using tensors as backing store.
 
-  Data can be inserted by calling the insert method. It does not support
-  initialization via the init method.
+  Data can be inserted by calling the insert method and removed by calling the
+  remove method. It does not support initialization via the init method.
 
   It uses "open addressing" with quadratic reprobing to resolve collisions.
-  Compared to `MutableHashTable` the insert and lookup operations in a
+  Compared to `MutableHashTable` the insert, remove and lookup operations in a
   `MutableDenseHashTable` are typically faster, but memory usage can be higher.
   However, `MutableDenseHashTable` does not require additional memory for
   temporary tensors created during checkpointing and restore operations.
@@ -502,7 +530,9 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
   table = tf.contrib.lookup.MutableDenseHashTable(key_dtype=tf.int64,
                                                   value_dtype=tf.int64,
                                                   default_value=-1,
-                                                  empty_key=0)
+                                                  empty_key=0,
+                                                  deleted_key=-1)
+
   sess.run(table.insert(keys, values))
   out = table.lookup(query_keys)
   print(out.eval())
@@ -516,6 +546,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
                value_dtype,
                default_value,
                empty_key,
+               deleted_key,
                initial_num_buckets=None,
                shared_name=None,
                name="MutableDenseHashTable",
@@ -530,7 +561,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       value_dtype: the type of the value tensors.
       default_value: The value to use if a key is missing in the table.
       empty_key: the key to use to represent empty buckets internally. Must not
-        be used in insert or lookup operations.
+        be used in insert, remove or lookup operations.
       initial_num_buckets: the initial number of buckets.
       shared_name: If non-empty, this table will be shared under
         the given name across multiple sessions.
@@ -538,9 +569,12 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       checkpoint: if True, the contents of the table are saved to and restored
         from checkpoints. If `shared_name` is empty for a checkpointed table, it
         is shared using the table node name.
+      deleted_key: the key to use to represent deleted buckets internally. Must
+        not be used in insert, remove or lookup operations and be different from
+        the empty_key.
 
     Returns:
-      A `MutableHashTable` object.
+      A `MutableDenseHashTable` object.
 
     Raises:
       ValueError: If checkpoint is True and no name was specified.
@@ -555,6 +589,8 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
     use_node_name_sharing = checkpoint and shared_name is None
     empty_key = ops.convert_to_tensor(
         empty_key, dtype=key_dtype, name="empty_key")
+    deleted_key = ops.convert_to_tensor(
+        deleted_key, dtype=key_dtype, name="deleted_key")
     executing_eagerly = context.executing_eagerly()
     if executing_eagerly and shared_name is None:
       # TODO(allenl): This will leak memory due to kernel caching by the
@@ -564,6 +600,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
       shared_name = "table_%d" % (ops.uid(),)
     self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2(
         empty_key=empty_key,
+        deleted_key=deleted_key,
         shared_name=shared_name,
         use_node_name_sharing=use_node_name_sharing,
         value_dtype=value_dtype,
@@ -648,6 +685,34 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase):
             self._table_ref, keys, values, name=name)
       return op
 
+  def remove(self, keys, name=None):
+    """Removes `keys` and its associated values from the table.
+
+    If a key is not present in the table, it is silently ignored.
+
+    Args:
+      keys: Keys to remove. Can be a tensor of any shape. Must match the table's
+        key type.
+      name: A name for the operation (optional).
+
+    Returns:
+      The created Operation.
+
+    Raises:
+      TypeError: when `keys` do not match the table data types.
+    """
+    if keys.dtype != self._key_dtype:
+      raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." %
+                      (self._key_dtype, keys.dtype))
+
+    with ops.name_scope(name, "%s_lookup_table_remove" % self._name,
+                        (self._table_ref, keys, self._default_value)) as name:
+      # pylint: disable=protected-access
+      op = gen_lookup_ops.lookup_table_remove_v2(
+          self._table_ref, keys, name=name)
+
+    return op
+
   def export(self, name=None):
     """Returns tensors of all keys and values in the table.
 
diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py
index 9e9345e875..35b0d1bc44 100644
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
@@ -303,13 +303,17 @@ class MutableHashTableOpTest(test.TestCase):
   def testMutableHashTable(self):
     with self.cached_session():
       default_val = -1
-      keys = constant_op.constant(["brain", "salad", "surgery"])
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"])
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                       default_val)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["tarkus", "tank"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant(["brain", "salad", "tank"])
@@ -472,13 +476,18 @@ class MutableHashTableOpTest(test.TestCase):
   def testMutableHashTableOfTensors(self):
     with self.cached_session():
       default_val = constant_op.constant([-1, -1], dtypes.int64)
-      keys = constant_op.constant(["brain", "salad", "surgery"])
-      values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"])
+      values = constant_op.constant([[0, 1], [2, 3], [4, 5], [6, 7]],
+                                    dtypes.int64)
       table = lookup.MutableHashTable(dtypes.string, dtypes.int64,
                                       default_val)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["tarkus", "tank"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant(["brain", "salad", "tank"])
@@ -624,6 +633,26 @@ class MutableHashTableOpTest(test.TestCase):
       result = output.eval()
       self.assertAllEqual([0, 1, 3, -1], result)
 
+  def testMutableHashTableRemoveHighRank(self):
+    with self.test_session():
+      default_val = -1
+      keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]])
+      values = constant_op.constant([[0, 1], [2, 3]], dtypes.int64)
+      table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val)
+
+      table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["salad", "tarkus"])
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+
+      input_string = constant_op.constant(["brain", "salad", "tank", "tarkus"])
+      output = table.lookup(input_string)
+
+      result = output.eval()
+      self.assertAllEqual([0, -1, 3, -1], result)
+
   def testMutableHashTableOfTensorsFindHighRank(self):
     with self.cached_session():
       default_val = constant_op.constant([-1, -1, -1], dtypes.int64)
@@ -645,6 +674,30 @@ class MutableHashTableOpTest(test.TestCase):
       self.assertAllEqual(
           [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result)
 
+  def testMutableHashTableOfTensorsRemoveHighRank(self):
+    with self.test_session():
+      default_val = constant_op.constant([-1, -1, -1], dtypes.int64)
+      keys = constant_op.constant(["brain", "salad", "surgery"])
+      values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]],
+                                    dtypes.int64)
+      table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val)
+
+      table.insert(keys, values).run()
+      self.assertAllEqual(3, table.size().eval())
+
+      remove_string = constant_op.constant([["brain", "tank"]])
+      table.remove(remove_string).run()
+      self.assertAllEqual(2, table.size().eval())
+
+      input_string = constant_op.constant([["brain", "salad"],
+                                           ["surgery", "tank"]])
+      output = table.lookup(input_string)
+      self.assertAllEqual([2, 2, 3], output.get_shape())
+
+      result = output.eval()
+      self.assertAllEqual(
+          [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result)
+
   def testMultipleMutableHashTables(self):
     with self.cached_session() as sess:
       default_val = -1
@@ -792,13 +845,22 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
   def testBasic(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant([11, 12, 15], dtypes.int64)
@@ -806,17 +868,26 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual([3], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([0, 1, -1], result)
+      self.assertAllEqual([0, -1, -1], result)
 
   def testBasicBool(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([True, True, True], dtypes.bool)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([True, True, True, True], dtypes.bool)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.bool, default_value=False, empty_key=0)
+          dtypes.int64,
+          dtypes.bool,
+          default_value=False,
+          empty_key=0,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant([11, 15], dtypes.int64)
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
       input_string = constant_op.constant([11, 12, 15], dtypes.int64)
@@ -824,14 +895,30 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual([3], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([True, True, False], result)
+      self.assertAllEqual([False, True, False], result)
+
+  def testSameEmptyAndDeletedKey(self):
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=42)
+        self.assertAllEqual(0, table.size().eval())
 
   def testLookupUnknownShape(self):
     with self.cached_session():
       keys = constant_op.constant([11, 12, 13], dtypes.int64)
       values = constant_op.constant([0, 1, 2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
 
       table.insert(keys, values).run()
       self.assertAllEqual(3, table.size().eval())
@@ -844,45 +931,60 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
   def testMapStringToFloat(self):
     with self.cached_session():
-      keys = constant_op.constant(["a", "b", "c"], dtypes.string)
-      values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32)
+
+      keys = constant_op.constant(["a", "b", "c", "d"], dtypes.string)
+      values = constant_op.constant([0.0, 1.1, 2.2, 3.3], dtypes.float32)
       default_value = constant_op.constant(-1.5, dtypes.float32)
       table = lookup.MutableDenseHashTable(
           dtypes.string,
           dtypes.float32,
           default_value=default_value,
-          empty_key="")
+          empty_key="",
+          deleted_key="$")
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      remove_string = constant_op.constant(["b", "e"])
+      table.remove(remove_string).run()
       self.assertAllEqual(3, table.size().eval())
 
-      input_string = constant_op.constant(["a", "b", "d"], dtypes.string)
+      input_string = constant_op.constant(["a", "b", "d", "e"], dtypes.string)
       output = table.lookup(input_string)
-      self.assertAllEqual([3], output.get_shape())
+      self.assertAllEqual([4], output.get_shape())
 
       result = output.eval()
-      self.assertAllClose([0, 1.1, -1.5], result)
+      self.assertAllClose([0, -1.5, 3.3, -1.5], result)
 
   def testMapInt64ToFloat(self):
     for float_dtype in [dtypes.float32, dtypes.float64]:
       with self.cached_session():
-        keys = constant_op.constant([11, 12, 13], dtypes.int64)
-        values = constant_op.constant([0.0, 1.1, 2.2], float_dtype)
+
+        keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+        values = constant_op.constant([0.0, 1.1, 2.2, 3.3], float_dtype)
         default_value = constant_op.constant(-1.5, float_dtype)
         table = lookup.MutableDenseHashTable(
-            dtypes.int64, float_dtype, default_value=default_value, empty_key=0)
+            dtypes.int64,
+            float_dtype,
+            default_value=default_value,
+            empty_key=0,
+            deleted_key=-1)
         self.assertAllEqual(0, table.size().eval())
 
         table.insert(keys, values).run()
+        self.assertAllEqual(4, table.size().eval())
+
+        remove_string = constant_op.constant([12, 15], dtypes.int64)
+        table.remove(remove_string).run()
         self.assertAllEqual(3, table.size().eval())
 
-        input_string = constant_op.constant([11, 12, 15], dtypes.int64)
+        input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64)
         output = table.lookup(input_string)
-        self.assertAllEqual([3], output.get_shape())
+        self.assertAllEqual([4], output.get_shape())
 
         result = output.eval()
-        self.assertAllClose([0, 1.1, -1.5], result)
+        self.assertAllClose([0, -1.5, 3.3, -1.5], result)
 
   def testVectorValues(self):
     with self.cached_session():
@@ -895,6 +997,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=default_value,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=4)
       self.assertAllEqual(0, table.size().eval())
 
@@ -908,26 +1011,35 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(4, table.size().eval())
       self.assertAllEqual(8, len(table.export()[0].eval()))
 
-      input_string = constant_op.constant([11, 12, 15], dtypes.int64)
+      remove_string = constant_op.constant([12, 16], dtypes.int64)
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+      self.assertAllEqual(8, len(table.export()[0].eval()))
+
+      input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual(
-          [3, 4], output.shape, msg="Saw shape: %s" % output.shape)
+      self.assertAllEqual([4, 4],
+                          output.shape,
+                          msg="Saw shape: %s" % output.shape)
 
       result = output.eval()
-      self.assertAllEqual([[0, 1, 2, 3], [3, 4, 5, 6], [-1, -2, -3, -4]],
-                          result)
+      self.assertAllEqual(
+          [[0, 1, 2, 3], [-1, -2, -3, -4], [2, 3, 4, 5], [-1, -2, -3, -4]],
+          result)
 
   def testVectorKeys(self):
     with self.cached_session():
       keys = constant_op.constant([[0, 1], [1, 2], [1, 3]], dtypes.int64)
       values = constant_op.constant([10, 11, 12], dtypes.int64)
       empty_key = constant_op.constant([0, 3], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
@@ -940,13 +1052,18 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(4, table.size().eval())
       self.assertAllEqual(8, len(table.export()[0].eval()))
 
-      input_string = constant_op.constant([[0, 1], [1, 2], [0, 2]],
+      remove_string = constant_op.constant([[1, 2], [7, 8]], dtypes.int64)
+      table.remove(remove_string).run()
+      self.assertAllEqual(3, table.size().eval())
+      self.assertAllEqual(8, len(table.export()[0].eval()))
+
+      input_string = constant_op.constant([[0, 1], [1, 2], [1, 3], [0, 2]],
                                           dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([3], output.get_shape())
+      self.assertAllEqual([4], output.get_shape())
 
       result = output.eval()
-      self.assertAllEqual([10, 11, -1], result)
+      self.assertAllEqual([10, -1, 12, -1], result)
 
   def testResize(self):
     with self.cached_session():
@@ -957,6 +1074,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=4)
       self.assertAllEqual(0, table.size().eval())
 
@@ -964,31 +1082,42 @@ class MutableDenseHashTableOpTest(test.TestCase):
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(4, len(table.export()[0].eval()))
 
-      keys2 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64)
-      values2 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64)
+      keys2 = constant_op.constant([12, 99], dtypes.int64)
+      table.remove(keys2).run()
+      self.assertAllEqual(2, table.size().eval())
+      self.assertAllEqual(4, len(table.export()[0].eval()))
+
+      keys3 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64)
+      values3 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64)
 
-      table.insert(keys2, values2).run()
-      self.assertAllEqual(7, table.size().eval())
+      table.insert(keys3, values3).run()
+      self.assertAllEqual(6, table.size().eval())
       self.assertAllEqual(16, len(table.export()[0].eval()))
 
-      keys3 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18],
+      keys4 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18],
                                    dtypes.int64)
-      output = table.lookup(keys3)
-      self.assertAllEqual([-1, 0, 1, 3, 4, 5, 6, 7, -1], output.eval())
+      output = table.lookup(keys4)
+      self.assertAllEqual([-1, 0, -1, 3, 4, 5, 6, 7, -1], output.eval())
 
   def testExport(self):
     with self.cached_session():
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([1, 2, 3], dtypes.int64)
+
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([1, 2, 3, 4], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=-1,
           empty_key=100,
+          deleted_key=200,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+
+      keys2 = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
 
       exported_keys, exported_values = table.export()
@@ -1005,8 +1134,8 @@ class MutableDenseHashTableOpTest(test.TestCase):
       pairs = np.dstack((np_keys.flatten(), np_values.flatten()))[0]
       # sort by key
       pairs = pairs[pairs[:, 0].argsort()]
-      self.assertAllEqual([[11, 1], [12, 2], [13, 3], [100, 0], [100, 0],
-                           [100, 0], [100, 0], [100, 0]], pairs)
+      self.assertAllEqual([[11, 1], [13, 3], [14, 4], [100, 0], [100, 0],
+                           [100, 0], [100, 0], [200, 2]], pairs)
 
   def testSaveRestore(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
@@ -1015,13 +1144,15 @@ class MutableDenseHashTableOpTest(test.TestCase):
     with self.session(graph=ops.Graph()) as sess:
       default_value = -1
       empty_key = 0
-      keys = constant_op.constant([11, 12, 13], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      deleted_key = -1
+      keys = constant_op.constant([11, 12, 13, 14], dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1030,6 +1161,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([12, 15], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1043,6 +1179,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1062,7 +1199,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       input_string = constant_op.constant([10, 11, 12, 13, 14], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([-1, 0, 1, 2, -1], output.eval())
+      self.assertAllEqual([-1, 0, -1, 2, 3], output.eval())
 
   @test_util.run_in_graph_and_eager_modes
   def testObjectSaveRestore(self):
@@ -1071,6 +1208,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     default_value = -1
     empty_key = 0
+    deleted_key = -1
     keys = constant_op.constant([11, 12, 13], dtypes.int64)
     values = constant_op.constant([0, 1, 2], dtypes.int64)
     save_table = lookup.MutableDenseHashTable(
@@ -1078,6 +1216,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
         dtypes.int64,
         default_value=default_value,
         empty_key=empty_key,
+        deleted_key=deleted_key,
         name="t1",
         checkpoint=True,
         initial_num_buckets=32)
@@ -1097,6 +1236,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
         dtypes.int64,
         default_value=default_value,
         empty_key=empty_key,
+        deleted_key=deleted_key,
         name="t1",
         checkpoint=True,
         initial_num_buckets=64)
@@ -1124,14 +1264,18 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-2, -3], dtypes.int64)
       default_value = constant_op.constant([-1, -2], dtypes.int64)
-      keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64)
-      values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64)
+      keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]],
+                                  dtypes.int64)
+      values = constant_op.constant([[0, 1], [2, 3], [2, 4], [4, 5]],
+                                    dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1140,6 +1284,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([[12, 13], [16, 17]], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1149,12 +1298,14 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-2, -3], dtypes.int64)
       default_value = constant_op.constant([-1, -2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t1",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1184,14 +1335,17 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
-      keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64)
-      values = constant_op.constant([0, 1, 2], dtypes.int64)
+      keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]],
+                                  dtypes.int64)
+      values = constant_op.constant([0, 1, 2, 3], dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t2",
           checkpoint=True,
           initial_num_buckets=32)
@@ -1200,6 +1354,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
       self.assertAllEqual(0, table.size().eval())
       table.insert(keys, values).run()
+      self.assertAllEqual(4, table.size().eval())
+      self.assertAllEqual(32, len(table.export()[0].eval()))
+
+      keys2 = constant_op.constant([[12, 13], [15, 16]], dtypes.int64)
+      table.remove(keys2).run()
       self.assertAllEqual(3, table.size().eval())
       self.assertAllEqual(32, len(table.export()[0].eval()))
 
@@ -1209,12 +1368,14 @@ class MutableDenseHashTableOpTest(test.TestCase):
 
     with self.session(graph=ops.Graph()) as sess:
       empty_key = constant_op.constant([11, 13], dtypes.int64)
+      deleted_key = constant_op.constant([-1, -1], dtypes.int64)
       default_value = constant_op.constant(-1, dtypes.int64)
       table = lookup.MutableDenseHashTable(
           dtypes.int64,
           dtypes.int64,
           default_value=default_value,
           empty_key=empty_key,
+          deleted_key=deleted_key,
           name="t2",
           checkpoint=True,
           initial_num_buckets=64)
@@ -1235,7 +1396,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
       input_string = constant_op.constant(
           [[11, 12], [11, 14], [11, 15], [13, 14], [13, 15]], dtypes.int64)
       output = table.lookup(input_string)
-      self.assertAllEqual([0, 1, -1, 2, -1], output.eval())
+      self.assertAllEqual([0, 1, -1, 3, -1], output.eval())
 
   def testReprobe(self):
     with self.cached_session():
@@ -1248,6 +1409,7 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=0,
+          deleted_key=-1,
           initial_num_buckets=8)
       self.assertAllEqual(0, table.size().eval())
 
@@ -1267,7 +1429,11 @@ class MutableDenseHashTableOpTest(test.TestCase):
       keys = constant_op.constant([11, 0, 13], dtypes.int64)
       values = constant_op.constant([0, 1, 2], dtypes.int64)
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=12)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=12,
+          deleted_key=-1)
       self.assertAllEqual(0, table.size().eval())
 
       table.insert(keys, values).run()
@@ -1283,19 +1449,35 @@ class MutableDenseHashTableOpTest(test.TestCase):
   def testErrors(self):
     with self.cached_session():
       table = lookup.MutableDenseHashTable(
-          dtypes.int64, dtypes.int64, default_value=-1, empty_key=0)
+          dtypes.int64,
+          dtypes.int64,
+          default_value=-1,
+          empty_key=0,
+          deleted_key=-1)
 
       # Inserting the empty key returns an error
-      keys = constant_op.constant([11, 0], dtypes.int64)
-      values = constant_op.constant([0, 1], dtypes.int64)
+      keys1 = constant_op.constant([11, 0], dtypes.int64)
+      values1 = constant_op.constant([0, 1], dtypes.int64)
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "empty_key"):
-        table.insert(keys, values).run()
+        table.insert(keys1, values1).run()
 
       # Looking up the empty key returns an error
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "empty_key"):
-        table.lookup(keys).eval()
+        table.lookup(keys1).eval()
+
+      # Inserting the deleted key returns an error
+      keys2 = constant_op.constant([11, -1], dtypes.int64)
+      values2 = constant_op.constant([0, 1], dtypes.int64)
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table.insert(keys2, values2).run()
+
+      # Looking up the empty key returns an error
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "deleted_key"):
+        table.lookup(keys2).eval()
 
       # Arbitrary tensors of keys are not supported
       keys = constant_op.constant([[11, 0], [12, 1]], dtypes.int64)
@@ -1312,11 +1494,43 @@ class MutableDenseHashTableOpTest(test.TestCase):
           dtypes.int64,
           default_value=-1,
           empty_key=17,
+          deleted_key=-1,
           initial_num_buckets=12)
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "Number of buckets must be"):
         self.assertAllEqual(0, table2.size().eval())
 
+      with self.assertRaisesRegexp(
+          errors_impl.InvalidArgumentError,
+          "Empty and deleted keys must have same shape"):
+        table3 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=[1, 2])
+        self.assertAllEqual(0, table3.size().eval())
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "Empty and deleted keys cannot be equal"):
+        table4 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=42,
+            deleted_key=42)
+        self.assertAllEqual(0, table4.size().eval())
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   "Empty and deleted keys cannot be equal"):
+        table5 = lookup.MutableDenseHashTable(
+            dtypes.int64,
+            dtypes.int64,
+            default_value=-1,
+            empty_key=[1, 2, 3],
+            deleted_key=[1, 2, 3])
+        self.assertAllEqual(0, table5.size().eval())
+
 
 class IndexTableFromFile(test.TestCase):
 
@@ -2558,7 +2772,11 @@ class MutableDenseHashTableBenchmark(MutableHashTableBenchmark):
 
   def _create_table(self):
     return lookup.MutableDenseHashTable(
-        dtypes.int64, dtypes.float32, default_value=0.0, empty_key=-1)
+        dtypes.int64,
+        dtypes.float32,
+        default_value=0.0,
+        empty_key=-1,
+        deleted_key=-2)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
index 03da2b82e5..9c585fe6a7 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py
@@ -543,20 +543,25 @@ class TupleOfTensorsLookup(lookup.LookupInterface):
   overhead.
   """
 
-  def __init__(
-      self, key_dtype, default_values, empty_key, name, checkpoint=True):
+  def __init__(self,
+               key_dtype,
+               default_values,
+               empty_key,
+               deleted_key,
+               name,
+               checkpoint=True):
     default_values_flat = nest.flatten(default_values)
-    self._hash_tables = nest.pack_sequence_as(
-        default_values,
-        [TensorValuedMutableDenseHashTable(
+    self._hash_tables = nest.pack_sequence_as(default_values, [
+        TensorValuedMutableDenseHashTable(
             key_dtype=key_dtype,
             value_dtype=default_value.dtype.base_dtype,
             default_value=default_value,
             empty_key=empty_key,
+            deleted_key=deleted_key,
             name=name + "_{}".format(table_number),
             checkpoint=checkpoint)
-         for table_number, default_value
-         in enumerate(default_values_flat)])
+        for table_number, default_value in enumerate(default_values_flat)
+    ])
     self._name = name
 
   def lookup(self, keys):
diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
index c0de42b15b..91265b9b2e 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py
@@ -223,10 +223,12 @@ class TestLookupTable(test.TestCase):
     hash_table = math_utils.TupleOfTensorsLookup(
         key_dtype=dtypes.int64,
         default_values=[[
-            array_ops.ones([3, 2], dtype=dtypes.float32), array_ops.zeros(
-                [5], dtype=dtypes.float64)
-        ], array_ops.ones([7, 7], dtype=dtypes.int64)],
+            array_ops.ones([3, 2], dtype=dtypes.float32),
+            array_ops.zeros([5], dtype=dtypes.float64)
+        ],
+                        array_ops.ones([7, 7], dtype=dtypes.int64)],
         empty_key=-1,
+        deleted_key=-2,
         name="test_lookup")
     def stack_tensor(base_tensor):
       return array_ops.stack([base_tensor + 1, base_tensor + 2])
diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management.py b/tensorflow/contrib/timeseries/python/timeseries/state_management.py
index 13eecd4d82..138406c616 100644
--- a/tensorflow/contrib/timeseries/python/timeseries/state_management.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_management.py
@@ -149,6 +149,7 @@ class ChainingStateManager(_OverridableStateManager):
         key_dtype=dtypes.int64,
         default_values=self._start_state,
         empty_key=-1,
+        deleted_key=-2,
         name="cached_states",
         checkpoint=self._checkpoint_state)
 
diff --git a/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt
new file mode 100644
index 0000000000..333fe6f4b2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_LookupTableRemoveV2.pbtxt
@@ -0,0 +1,24 @@
+op {
+  graph_op_name: "LookupTableRemoveV2"
+  visibility: HIDDEN
+  endpoint {
+    name: "LookupTableRemove"
+  }
+  in_arg {
+    name: "table_handle"
+    description: <<END
+Handle to the table.
+END
+  }
+  in_arg {
+    name: "keys"
+    description: <<END
+Any shape.  Keys of the elements to remove.
+END
+  }
+  summary: "Removes keys and its associated values from a table."
+  description: <<END
+The tensor `keys` must of the same type as the keys of the table. Keys not
+already in the table are silently ignored.
+END
+}
diff --git a/tensorflow/core/framework/lookup_interface.cc b/tensorflow/core/framework/lookup_interface.cc
index bf3204ea6e..117adbf65c 100644
--- a/tensorflow/core/framework/lookup_interface.cc
+++ b/tensorflow/core/framework/lookup_interface.cc
@@ -71,6 +71,14 @@ Status LookupInterface::CheckKeyAndValueTensorsForImport(const Tensor& keys,
   return CheckKeyAndValueTensorsHelper(keys, values);
 }
 
+Status LookupInterface::CheckKeyTensorForRemove(const Tensor& keys) {
+  if (keys.dtype() != key_dtype()) {
+    return errors::InvalidArgument("Key must be type ", key_dtype(),
+                                   " but got ", keys.dtype());
+  }
+  return CheckKeyShape(keys.shape());
+}
+
 Status LookupInterface::CheckFindArguments(const Tensor& key,
                                            const Tensor& default_value) {
   TF_RETURN_IF_ERROR(CheckKeyAndValueTypes(key, default_value));
diff --git a/tensorflow/core/framework/lookup_interface.h b/tensorflow/core/framework/lookup_interface.h
index 0622dd06cb..d33945fd1b 100644
--- a/tensorflow/core/framework/lookup_interface.h
+++ b/tensorflow/core/framework/lookup_interface.h
@@ -64,6 +64,17 @@ class LookupInterface : public ResourceBase {
   virtual Status Insert(OpKernelContext* ctx, const Tensor& keys,
                         const Tensor& values) = 0;
 
+  // Removes elements from the table.
+  // This method is only implemented in mutable tables that can be updated over
+  // the execution of the graph. It returns Status::NotImplemented for read-only
+  // tables that are initialized once before they can be looked up.
+
+  // Returns the following statuses:
+  // - OK: when the remove finishes successfully.
+  // - InvalidArgument: if any of the preconditions on the lookup key fails.
+  // - Unimplemented: if the table does not support removals.
+  virtual Status Remove(OpKernelContext* ctx, const Tensor& keys) = 0;
+
   // Returns the number of elements in the table.
   virtual size_t size() const = 0;
 
@@ -107,6 +118,12 @@ class LookupInterface : public ResourceBase {
   virtual Status CheckKeyAndValueTensorsForImport(const Tensor& keys,
                                                   const Tensor& values);
 
+  // Check format of the key tensor for the Remove function.
+  // Returns OK if all the following requirements are satisfied, otherwise it
+  // returns InvalidArgument:
+  // - DataType of the tensor keys equals to the table key_dtype
+  virtual Status CheckKeyTensorForRemove(const Tensor& keys);
+
   // Check the arguments of a find operation. Returns OK if all the following
   // requirements are satisfied, otherwise it returns InvalidArgument:
   // - DataType of the tensor keys equals to the table key_dtype
diff --git a/tensorflow/core/kernels/initializable_lookup_table.h b/tensorflow/core/kernels/initializable_lookup_table.h
index 424fe5df3c..a14d4967a5 100644
--- a/tensorflow/core/kernels/initializable_lookup_table.h
+++ b/tensorflow/core/kernels/initializable_lookup_table.h
@@ -51,6 +51,12 @@ class InitializableLookupTable : public LookupInterface {
         "Insert not supported by InitializableLookupTable implementations");
   }
 
+  // Returns errors::Unimplemented.
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) final {
+    return errors::Unimplemented(
+        "Remove not supported by InitializableLookupTable implementations");
+  }
+
   Status ExportValues(OpKernelContext* context) override {
     return errors::Unimplemented(
         "ExportValues not supported by InitializableLookupTable "
diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index a495758861..0bc1ea77d6 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -89,6 +89,16 @@ class MutableHashTableOfScalars final : public LookupInterface {
     return DoInsert(false, keys, values);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) override {
+    const auto key_values = keys.flat<K>();
+
+    mutex_lock l(mu_);
+    for (int64 i = 0; i < key_values.size(); ++i) {
+      table_.erase(SubtleMustCopyIfIntegral(key_values(i)));
+    }
+    return Status::OK();
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override {
     return DoInsert(true, keys, values);
@@ -212,6 +222,16 @@ class MutableHashTableOfTensors final : public LookupInterface {
     return DoInsert(false, keys, values);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& keys) override {
+    const auto key_values = keys.flat<K>();
+
+    mutex_lock l(mu_);
+    for (int64 i = 0; i < key_values.size(); ++i) {
+      table_.erase(SubtleMustCopyIfIntegral(key_values(i)));
+    }
+    return Status::OK();
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override {
     return DoInsert(true, keys, values);
@@ -326,6 +346,29 @@ class MutableDenseHashTable final : public LookupInterface {
         empty_key_input->template shaped<K, 2>({1, key_shape_.num_elements()}),
         0);
 
+    const Tensor* deleted_key_input;
+    OP_REQUIRES_OK(ctx, ctx->input("deleted_key", &deleted_key_input));
+    OP_REQUIRES(ctx, key_shape_.IsSameSize(deleted_key_input->shape()),
+                errors::InvalidArgument(
+                    "Empty and deleted keys must have same shape, got shapes: ",
+                    key_shape_.DebugString(), " and ",
+                    deleted_key_input->shape().DebugString()));
+    deleted_key_ = PersistentTensor(*deleted_key_input);
+    deleted_key_hash_ = HashKey(deleted_key_input->template shaped<K, 2>(
+                                    {1, key_shape_.num_elements()}),
+                                0);
+
+    if (empty_key_hash_ == deleted_key_hash_) {
+      const int64 key_size = key_shape_.num_elements();
+      const auto empty_key_matrix =
+          empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+      const auto deleted_key_matrix =
+          deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+      OP_REQUIRES(
+          ctx, !IsEqualKey(empty_key_matrix, 0, deleted_key_matrix, 0),
+          errors::InvalidArgument("Empty and deleted keys cannot be equal"));
+    }
+
     int64 initial_num_buckets;
     OP_REQUIRES_OK(ctx, GetNodeAttr(kernel->def(), "initial_num_buckets",
                                     &initial_num_buckets));
@@ -360,6 +403,8 @@ class MutableDenseHashTable final : public LookupInterface {
         value_buckets_.AccessTensor(ctx)->template matrix<V>();
     const auto empty_key_matrix =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_matrix =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
     const int64 bit_mask = num_buckets_ - 1;
     // TODO(andreasst): parallelize using work_sharder
     for (int64 i = 0; i < num_elements; ++i) {
@@ -369,6 +414,11 @@ class MutableDenseHashTable final : public LookupInterface {
         return errors::InvalidArgument(
             "Using the empty_key as a table key is not allowed");
       }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_matrix, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
       int64 bucket_index = key_hash & bit_mask;
       int64 num_probes = 0;
       while (true) {
@@ -425,23 +475,40 @@ class MutableDenseHashTable final : public LookupInterface {
     return DoInsert(ctx, key, value, false);
   }
 
+  Status Remove(OpKernelContext* ctx, const Tensor& key) override
+      LOCKS_EXCLUDED(mu_) {
+    if (key.NumElements() != key.dim_size(0) * key_shape_.num_elements()) {
+      TensorShape expected_shape({key.dim_size(0)});
+      expected_shape.AppendShape(key_shape_);
+      return errors::InvalidArgument("Expected key shape ",
+                                     expected_shape.DebugString(), " got ",
+                                     key.shape().DebugString());
+    }
+    mutex_lock l(mu_);
+    return DoRemove(ctx, key);
+  }
+
   Status ImportValues(OpKernelContext* ctx, const Tensor& keys,
                       const Tensor& values) override LOCKS_EXCLUDED(mu_) {
     mutex_lock l(mu_);
     num_buckets_ = keys.dim_size(0);
     key_buckets_ = PersistentTensor(keys);
     value_buckets_ = PersistentTensor(values);
-    // Count the number of keys that are not the empty_key. This requires
-    // iterating through the whole table but that is OK as we only execute it
-    // during checkpoint restore.
+    // Count the number of keys that are not the empty_key or deleted_key.
+    // This requires iterating through the whole table but that is OK as we
+    // only execute it during checkpoint restore.
     num_entries_ = 0;
     const auto empty_key_tensor =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>(
             {1, key_shape_.num_elements()});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>(
+            {1, key_shape_.num_elements()});
     const auto key_buckets_tensor =
         key_buckets_.AccessTensor(ctx)->template matrix<K>();
     for (int64 i = 0; i < num_buckets_; ++i) {
-      if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0)) {
+      if (!IsEqualKey(key_buckets_tensor, i, empty_key_tensor, 0) &&
+          !IsEqualKey(key_buckets_tensor, i, deleted_key_tensor, 0)) {
         ++num_entries_;
       }
     }
@@ -498,7 +565,8 @@ class MutableDenseHashTable final : public LookupInterface {
 
  private:
   Status DoInsert(OpKernelContext* ctx, const Tensor& key, const Tensor& value,
-                  bool ignore_empty_key) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                  bool ignore_empty_and_deleted_key)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     const int64 num_elements = (key.dims() == 0) ? 1 : key.dim_size(0);
     const int64 value_size = value_shape_.num_elements();
     const int64 key_size = key_shape_.num_elements();
@@ -511,17 +579,27 @@ class MutableDenseHashTable final : public LookupInterface {
         value_buckets_.AccessTensor(ctx)->template matrix<V>();
     const auto empty_key_tensor =
         empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
     const int64 bit_mask = num_buckets_ - 1;
     for (int64 i = 0; i < num_elements; ++i) {
       const uint64 key_hash = HashKey(key_matrix, i);
       if (empty_key_hash_ == key_hash &&
           IsEqualKey(empty_key_tensor, 0, key_matrix, i)) {
-        if (ignore_empty_key) {
+        if (ignore_empty_and_deleted_key) {
           continue;
         }
         return errors::InvalidArgument(
             "Using the empty_key as a table key is not allowed");
       }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) {
+        if (ignore_empty_and_deleted_key) {
+          continue;
+        }
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
       int64 bucket_index = key_hash & bit_mask;
       int64 num_probes = 0;
       while (true) {
@@ -532,7 +610,9 @@ class MutableDenseHashTable final : public LookupInterface {
           }
           break;
         }
-        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) {
+        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0) ||
+            IsEqualKey(key_buckets_matrix, bucket_index, deleted_key_tensor,
+                       0)) {
           ++num_entries_;
           for (int64 j = 0; j < key_size; ++j) {
             key_buckets_matrix(bucket_index, j) =
@@ -556,6 +636,59 @@ class MutableDenseHashTable final : public LookupInterface {
     return Status::OK();
   }
 
+  Status DoRemove(OpKernelContext* ctx, const Tensor& key)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    const int64 num_elements = key.dim_size(0);
+    const int64 key_size = key_shape_.num_elements();
+    const auto key_matrix = key.shaped<K, 2>({num_elements, key_size});
+
+    auto key_buckets_matrix =
+        key_buckets_.AccessTensor(ctx)->template matrix<K>();
+    const auto empty_key_tensor =
+        empty_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_tensor =
+        deleted_key_.AccessTensor(ctx)->template shaped<K, 2>({1, key_size});
+    const auto deleted_key_flat =
+        deleted_key_.AccessTensor(ctx)->template flat<K>();
+    const int64 bit_mask = num_buckets_ - 1;
+    for (int64 i = 0; i < num_elements; ++i) {
+      const uint64 key_hash = HashKey(key_matrix, i);
+      if (empty_key_hash_ == key_hash &&
+          IsEqualKey(empty_key_tensor, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the empty_key as a table key is not allowed");
+      }
+      if (deleted_key_hash_ == key_hash &&
+          IsEqualKey(deleted_key_tensor, 0, key_matrix, i)) {
+        return errors::InvalidArgument(
+            "Using the deleted_key as a table key is not allowed");
+      }
+      int64 bucket_index = key_hash & bit_mask;
+      int64 num_probes = 0;
+      while (true) {
+        if (IsEqualKey(key_buckets_matrix, bucket_index, key_matrix, i)) {
+          --num_entries_;
+          for (int64 j = 0; j < key_size; ++j) {
+            key_buckets_matrix(bucket_index, j) =
+                SubtleMustCopyIfIntegral(deleted_key_flat(j));
+          }
+          break;
+        }
+        if (IsEqualKey(key_buckets_matrix, bucket_index, empty_key_tensor, 0)) {
+          break;
+        }
+        ++num_probes;
+        bucket_index =
+            (bucket_index + num_probes) & bit_mask;  // quadratic probing
+        if (num_probes >= num_buckets_) {
+          return errors::Internal(
+              "Internal error in MutableDenseHashTable remove");
+        }
+      }
+    }
+    return Status::OK();
+  }
+
   Status AllocateBuckets(OpKernelContext* ctx, int64 new_num_buckets)
       EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (new_num_buckets < 4 ||
@@ -639,7 +772,9 @@ class MutableDenseHashTable final : public LookupInterface {
   PersistentTensor value_buckets_ GUARDED_BY(mu_);
   PersistentTensor empty_key_;
   uint64 empty_key_hash_;
-};
+  PersistentTensor deleted_key_;
+  uint64 deleted_key_hash_;
+};  // namespace lookup
 
 }  // namespace lookup
 
@@ -717,6 +852,39 @@ REGISTER_KERNEL_BUILDER(Name("LookupTableInsert").Device(DEVICE_CPU),
 REGISTER_KERNEL_BUILDER(Name("LookupTableInsertV2").Device(DEVICE_CPU),
                         LookupTableInsertOp);
 
+// Table remove op.
+class LookupTableRemoveOp : public OpKernel {
+ public:
+  explicit LookupTableRemoveOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    lookup::LookupInterface* table;
+    OP_REQUIRES_OK(ctx, GetLookupTable("table_handle", ctx, &table));
+    core::ScopedUnref unref_me(table);
+
+    DataType expected_input_0 =
+        (ctx->input_dtype(0) == DT_RESOURCE) ? DT_RESOURCE : DT_STRING_REF;
+    DataTypeVector expected_inputs = {expected_input_0, table->key_dtype()};
+    OP_REQUIRES_OK(ctx, ctx->MatchSignature(expected_inputs, {}));
+
+    const Tensor& key = ctx->input(1);
+    OP_REQUIRES_OK(ctx, table->CheckKeyTensorForRemove(key));
+
+    int64 memory_used_before = 0;
+    if (ctx->track_allocations()) {
+      memory_used_before = table->MemoryUsed();
+    }
+    OP_REQUIRES_OK(ctx, table->Remove(ctx, key));
+    if (ctx->track_allocations()) {
+      ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+                                               memory_used_before);
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("LookupTableRemoveV2").Device(DEVICE_CPU),
+                        LookupTableRemoveOp);
+
 // Op that returns the size of the given table.
 class LookupTableSizeOp : public OpKernel {
  public:
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index cfb1055d3c..415e15b720 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -30320,6 +30320,22 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "LookupTableRemoveV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  is_stateful: true
+}
 op {
   name: "LookupTableSize"
   input_arg {
@@ -36706,6 +36722,10 @@ op {
     name: "empty_key"
     type_attr: "key_dtype"
   }
+  input_arg {
+    name: "deleted_key"
+    type_attr: "key_dtype"
+  }
   output_arg {
     name: "table_handle"
     type: DT_RESOURCE
diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc
index 72a77be70d..a0987cd982 100644
--- a/tensorflow/core/ops/lookup_ops.cc
+++ b/tensorflow/core/ops/lookup_ops.cc
@@ -214,6 +214,19 @@ REGISTER_OP("LookupTableInsertV2")
       return Status::OK();
     });
 
+REGISTER_OP("LookupTableRemoveV2")
+    .Input("table_handle: resource")
+    .Input("keys: Tin")
+    .Attr("Tin: type")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle handle;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &handle));
+
+      // TODO(turboale): Validate keys shape.
+      return Status::OK();
+    });
+
 REGISTER_OP("LookupTableSize")
     .Input("table_handle: Ref(string)")
     .Output("size: int64")
@@ -407,6 +420,7 @@ REGISTER_OP("MutableDenseHashTable")
 
 REGISTER_OP("MutableDenseHashTableV2")
     .Input("empty_key: key_dtype")
+    .Input("deleted_key: key_dtype")
     .Output("table_handle: resource")
     .Attr("container: string = ''")
     .Attr("shared_name: string = ''")
-- 
GitLab


From d78c747e9177fc93d43a580acef2b62eb1420859 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Tue, 9 Oct 2018 16:39:33 -0700
Subject: [PATCH 0648/1085] Make lite_test.py run in open source.

PiperOrigin-RevId: 216445964
---
 tensorflow/contrib/lite/python/BUILD        |  2 --
 tensorflow/contrib/lite/python/lite_test.py | 14 +++++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index 916788f215..be6c44d306 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -73,7 +73,6 @@ py_test(
     data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_windows",
     ],
     deps = [
@@ -172,7 +171,6 @@ py_test(
     srcs = ["convert_saved_model_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "no_windows",
     ],
     visibility = ["//visibility:public"],
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index d243a494f6..ef9bbded2a 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -591,11 +591,19 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
         'Unable to parse input file \'{}\'.'.format(graph_def_file),
         str(error.exception))
 
-  # TODO(nupurgarg): Test model loading in open source.
   def _initObjectDetectionArgs(self):
     # Initializes the arguments required for the object detection model.
-    self._graph_def_file = resource_loader.get_path_to_datafile(
-        'testdata/tflite_graph.pb')
+    # Looks for the model file which is saved in a different location interally
+    # and externally.
+    filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb')
+    if not os.path.exists(filename):
+      filename = os.path.join(
+          resource_loader.get_root_dir_with_all_resources(),
+          '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb')
+      if not os.path.exists(filename):
+        raise IOError("File '{0}' does not exist.".format(filename))
+
+    self._graph_def_file = filename
     self._input_arrays = ['normalized_input_image_tensor']
     self._output_arrays = [
         'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1',
-- 
GitLab


From ef9d2e7be9ae9fbcd4720d46e1f8a8cac902a1cd Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 16:44:25 -0700
Subject: [PATCH 0649/1085] Remove the deprecated created and IS_LOCAL
 abstractions from activity analysis.

PiperOrigin-RevId: 216446750
---
 tensorflow/python/autograph/pyct/anno.py      |   2 -
 .../pyct/static_analysis/activity.py          |  82 ++----
 .../pyct/static_analysis/activity_test.py     | 268 +++++++-----------
 .../pyct/static_analysis/live_values.py       |   5 +-
 4 files changed, 121 insertions(+), 236 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index 5392e6ea03..e1f4af46cd 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -63,10 +63,8 @@ class Static(NoValue):
   The enum values are used strictly for documentation purposes.
   """
 
-  # Deprecated - use reaching definitions instead.
   # Symbols
   # These flags are boolean.
-  IS_LOCAL = 'Symbol is local to the function scope being analyzed.'
   IS_PARAM = 'Symbol is a parameter to the function being analyzed.'
 
   # Scopes
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 086eda7574..cc159031ff 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -44,7 +44,6 @@ class Scope(object):
 
   Attributes:
     modified: identifiers modified in this scope
-    created: identifiers created in this scope
     used: identifiers referenced in this scope
   """
 
@@ -54,7 +53,8 @@ class Scope(object):
     Args:
       parent: A Scope or None.
       isolated: Whether the scope is isolated, that is, whether variables
-          created in this scope should be visible to the parent scope.
+          modified in this scope should be considered modified in the parent
+          scope.
       add_unknown_symbols: Whether to handle attributed and subscripts
           without having first seen the base name.
           E.g., analyzing the statement 'x.y = z' without first having seen 'x'.
@@ -63,13 +63,11 @@ class Scope(object):
     self.parent = parent
     self.add_unknown_symbols = add_unknown_symbols
     self.modified = set()
-    # TODO(mdan): Completely remove this.
-    self.created = set()
     self.used = set()
     self.params = {}
     self.returned = set()
 
-  # TODO(mdan): Rename to `locals`
+  # TODO(mdan): Rename to `reserved`
   @property
   def referenced(self):
     if not self.isolated and self.parent is not None:
@@ -77,8 +75,7 @@ class Scope(object):
     return self.used
 
   def __repr__(self):
-    return 'Scope{r=%s, c=%s, w=%s}' % (tuple(self.used), tuple(self.created),
-                                        tuple(self.modified))
+    return 'Scope{r=%s, w=%s}' % (tuple(self.used), tuple(self.modified))
 
   def copy_from(self, other):
     """Recursively copies the contents of this scope from another scope."""
@@ -88,7 +85,6 @@ class Scope(object):
       self.parent.copy_from(other.parent)
     self.isolated = other.isolated
     self.modified = copy.copy(other.modified)
-    self.created = copy.copy(other.created)
     self.used = copy.copy(other.used)
     self.params = copy.copy(other.params)
     self.returned = copy.copy(other.returned)
@@ -109,56 +105,28 @@ class Scope(object):
     if other.parent is not None:
       self.parent.merge_from(other.parent)
     self.modified |= other.modified
-    self.created |= other.created
     self.used |= other.used
     self.params.update(other.params)
     self.returned |= other.returned
 
-  def has(self, name):
-    if name in self.modified:
-      return True
-    elif self.parent is not None:
-      return self.parent.has(name)
-    return False
-
   def mark_read(self, name):
     self.used.add(name)
-    if self.parent is not None and name not in self.created:
+    if self.parent is not None and name not in self.params:
       self.parent.mark_read(name)
 
+  def mark_modified(self, name):
+    """Marks the given symbol as modified in the current scope."""
+    self.modified.add(name)
+    if not self.isolated:
+      if self.parent is not None:
+        self.parent.mark_modified(name)
+
   def mark_param(self, name, owner):
     # Assumption: all AST nodes have the same life span. This lets us use
     # a weak reference to mark the connection between a symbol node and the
     # function node whose argument that symbol is.
     self.params[name] = weakref.ref(owner)
 
-  def mark_creation(self, name, writes_create_symbol=False):
-    """Mark a qualified name as created."""
-    if name.is_composite():
-      parent = name.parent
-      if not writes_create_symbol:
-        return
-      else:
-        if not self.has(parent):
-          if self.add_unknown_symbols:
-            self.mark_read(parent)
-          else:
-            raise ValueError('Unknown symbol "%s".' % parent)
-    self.created.add(name)
-
-  def mark_write(self, name):
-    """Marks the given symbol as modified in the current scope."""
-    self.modified.add(name)
-    if self.isolated:
-      self.mark_creation(name)
-    else:
-      if self.parent is None:
-        self.mark_creation(name)
-      else:
-        if not self.parent.has(name):
-          self.mark_creation(name)
-        self.parent.mark_write(name)
-
   def mark_returned(self, name):
     self.returned.add(name)
     if not self.isolated and self.parent is not None:
@@ -197,10 +165,7 @@ class ActivityAnalyzer(transformer.Base):
         return True
     return False
 
-  def _track_symbol(self,
-                    node,
-                    composite_writes_alter_parent=False,
-                    writes_create_symbol=False):
+  def _track_symbol(self, node, composite_writes_alter_parent=False):
     # A QN may be missing when we have an attribute (or subscript) on a function
     # call. Example: a().b
     if not anno.hasanno(node, anno.Basic.QN):
@@ -208,11 +173,9 @@ class ActivityAnalyzer(transformer.Base):
     qn = anno.getanno(node, anno.Basic.QN)
 
     if isinstance(node.ctx, gast.Store):
-      self.scope.mark_write(qn)
+      self.scope.mark_modified(qn)
       if qn.is_composite and composite_writes_alter_parent:
-        self.scope.mark_write(qn.parent)
-      if writes_create_symbol:
-        self.scope.mark_creation(qn, writes_create_symbol=True)
+        self.scope.mark_modified(qn.parent)
       if self._in_aug_assign:
         self.scope.mark_read(qn)
     elif isinstance(node.ctx, gast.Load):
@@ -220,13 +183,11 @@ class ActivityAnalyzer(transformer.Base):
     elif isinstance(node.ctx, gast.Param):
       # Param contexts appear in function defs, so they have the meaning of
       # defining a variable.
-      self.scope.mark_write(qn)
+      self.scope.mark_modified(qn)
       self.scope.mark_param(qn, self.enclosing_entities[-1])
     else:
       raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn))
 
-    anno.setanno(node, NodeAnno.IS_LOCAL, self.scope.has(qn))
-
     if self._in_return_statement:
       self.scope.mark_returned(qn)
 
@@ -243,6 +204,12 @@ class ActivityAnalyzer(transformer.Base):
     self._exit_scope()
     return node
 
+  def visit_nonlocal(self, node):
+    raise NotImplementedError()
+
+  def visit_global(self, node):
+    raise NotImplementedError()
+
   def visit_Expr(self, node):
     return self._process_statement(node)
 
@@ -271,8 +238,7 @@ class ActivityAnalyzer(transformer.Base):
   def visit_Attribute(self, node):
     node = self.generic_visit(node)
     if self._in_constructor and self._node_sets_self_attribute(node):
-      self._track_symbol(
-          node, composite_writes_alter_parent=True, writes_create_symbol=True)
+      self._track_symbol(node, composite_writes_alter_parent=True)
     else:
       self._track_symbol(node)
     return node
@@ -336,7 +302,7 @@ class ActivityAnalyzer(transformer.Base):
     # of its name, along with the usage of any decorator accompany it.
     self._enter_scope(False)
     node.decorator_list = self.visit_block(node.decorator_list)
-    self.scope.mark_write(qual_names.QN(node.name))
+    self.scope.mark_modified(qual_names.QN(node.name))
     anno.setanno(node, anno.Static.SCOPE, self.scope)
     self._exit_scope()
 
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index d4a6ce8ac3..9a4f1bf09b 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -32,62 +32,63 @@ from tensorflow.python.platform import test
 
 class ScopeTest(test.TestCase):
 
+  def assertMissing(self, qn, scope):
+    self.assertNotIn(qn, scope.used)
+    self.assertNotIn(qn, scope.modified)
+
+  def assertReadOnly(self, qn, scope):
+    self.assertIn(qn, scope.used)
+    self.assertNotIn(qn, scope.modified)
+
+  def assertWriteOnly(self, qn, scope):
+    self.assertNotIn(qn, scope.used)
+    self.assertIn(qn, scope.modified)
+
+  def assertReadWrite(self, qn, scope):
+    self.assertIn(qn, scope.used)
+    self.assertIn(qn, scope.modified)
+
   def test_basic(self):
     scope = activity.Scope(None)
-    self.assertFalse(scope.has(QN('foo')))
+    self.assertMissing(QN('foo'), scope)
 
     scope.mark_read(QN('foo'))
-    self.assertFalse(scope.has(QN('foo')))
-
-    scope.mark_write(QN('foo'))
-    self.assertTrue(scope.has(QN('foo')))
+    self.assertReadOnly(QN('foo'), scope)
 
-    scope.mark_read(QN('bar'))
-    self.assertFalse(scope.has(QN('bar')))
+    scope.mark_modified(QN('foo'))
+    self.assertReadWrite(QN('foo'), scope)
 
   def test_copy_from(self):
     scope = activity.Scope(None)
-    scope.mark_write(QN('foo'))
-
+    scope.mark_modified(QN('foo'))
     other = activity.Scope(None)
     other.copy_from(scope)
 
-    self.assertTrue(QN('foo') in other.modified)
+    self.assertWriteOnly(QN('foo'), other)
 
-    scope.mark_write(QN('bar'))
+    scope.mark_modified(QN('bar'))
     scope.copy_from(other)
 
-    self.assertFalse(QN('bar') in scope.modified)
+    self.assertMissing(QN('bar'), scope)
 
-    scope.mark_write(QN('bar'))
+    scope.mark_modified(QN('bar'))
     scope.merge_from(other)
 
-    self.assertTrue(QN('bar') in scope.modified)
-    self.assertFalse(QN('bar') in other.modified)
+    self.assertWriteOnly(QN('bar'), scope)
+    self.assertMissing(QN('bar'), other)
 
   def test_copy_of(self):
     scope = activity.Scope(None)
     scope.mark_read(QN('foo'))
+    other = activity.Scope.copy_of(scope)
 
-    self.assertTrue(QN('foo') in activity.Scope.copy_of(scope).used)
+    self.assertReadOnly(QN('foo'), other)
 
     child_scope = activity.Scope(scope)
     child_scope.mark_read(QN('bar'))
+    other = activity.Scope.copy_of(child_scope)
 
-    self.assertTrue(QN('bar') in activity.Scope.copy_of(child_scope).used)
-
-  def test_nesting(self):
-    scope = activity.Scope(None)
-    scope.mark_write(QN('foo'))
-    scope.mark_read(QN('bar'))
-
-    child = activity.Scope(scope)
-    self.assertTrue(child.has(QN('foo')))
-    self.assertTrue(scope.has(QN('foo')))
-
-    child.mark_write(QN('bar'))
-    self.assertTrue(child.has(QN('bar')))
-    self.assertFalse(scope.has(QN('bar')))
+    self.assertReadOnly(QN('bar'), other)
 
   def test_referenced(self):
     scope = activity.Scope(None)
@@ -123,25 +124,6 @@ class ActivityAnalyzerTest(test.TestCase):
     node = activity.resolve(node, entity_info)
     return node, entity_info
 
-  def test_local_markers(self):
-
-    def test_fn(a):  # pylint:disable=unused-argument
-      b = c  # pylint:disable=undefined-variable
-      while b > 0:
-        b -= 1
-      return b
-
-    node, _ = self._parse_and_analyze(test_fn)
-    self.assertFalse(
-        anno.getanno(node.body[0].body[0].value,
-                     NodeAnno.IS_LOCAL))  # c in b = c
-    self.assertTrue(
-        anno.getanno(node.body[0].body[1].test.left,
-                     NodeAnno.IS_LOCAL))  # b in b > 0
-    self.assertTrue(
-        anno.getanno(node.body[0].body[2].value,
-                     NodeAnno.IS_LOCAL))  # b in return b
-
   def assertSymbolSetsAre(self, expected, actual, name):
     expected = set(expected)
     actual = set(str(s) for s in actual)
@@ -153,12 +135,10 @@ class ActivityAnalyzerTest(test.TestCase):
         '  Extra:    %s\n' % (name.upper(), expected, actual,
                               expected - actual, actual - expected))
 
-  def assertScopeIsRmc(self, scope, used, modified, created):
+  def assertScopeIs(self, scope, used, modified):
     """Assert the scope contains specific used, modified & created variables."""
     self.assertSymbolSetsAre(used, scope.used, 'read')
     self.assertSymbolSetsAre(modified, scope.modified, 'modified')
-    # Created is deprecated, we're no longer verifying it.
-    # self.assertSymbolSetsAre(created, scope.created, 'created')
 
   def test_print_statement(self):
 
@@ -181,7 +161,7 @@ class ActivityAnalyzerTest(test.TestCase):
       print_args_scope = anno.getanno(print_node, NodeAnno.ARGS_SCOPE)
     # We basically need to detect which variables are captured by the call
     # arguments.
-    self.assertScopeIsRmc(print_args_scope, ('a', 'b'), (), ())
+    self.assertScopeIs(print_args_scope, ('a', 'b'), ())
 
   def test_call_args(self):
 
@@ -195,8 +175,8 @@ class ActivityAnalyzerTest(test.TestCase):
     call_node = node.body[0].body[2].value
     # We basically need to detect which variables are captured by the call
     # arguments.
-    self.assertScopeIsRmc(
-        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), (), ())
+    self.assertScopeIs(
+        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), ())
 
   def test_call_args_attributes(self):
 
@@ -210,12 +190,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     call_node = node.body[0].body[1].value
-    self.assertScopeIsRmc(
-        anno.getanno(call_node, NodeAnno.ARGS_SCOPE),
-        ('a', 'a.b', 'a.c'),
-        (),
-        (),
-    )
+    self.assertScopeIs(
+        anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'a.b', 'a.c'), ())
 
   def test_call_args_subscripts(self):
 
@@ -230,12 +206,9 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     call_node = node.body[0].body[2].value
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
         anno.getanno(call_node, NodeAnno.ARGS_SCOPE),
-        ('a', 'a[0]', 'a[b]', 'b'),
-        (),
-        (),
-    )
+        ('a', 'a[0]', 'a[b]', 'b'), ())
 
   def test_while(self):
 
@@ -248,14 +221,13 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     while_node = node.body[0].body[1]
-    self.assertScopeIsRmc(
-        anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'),
-        ('c',))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'))
+    self.assertScopeIs(
         anno.getanno(while_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'),
-        ('b', 'c'), ('a', 'b', 'c'))
-    self.assertScopeIsRmc(
-        anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), (), ())
+        ('b', 'c'))
+    self.assertScopeIs(
+        anno.getanno(while_node, NodeAnno.COND_SCOPE), ('b',), ())
 
   def test_for(self):
 
@@ -268,11 +240,11 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     for_node = node.body[0].body[1]
-    self.assertScopeIsRmc(
-        anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'))
+    self.assertScopeIs(
         anno.getanno(for_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'),
-        ('b', 'c', '_'), ('a', 'b', 'c', '_'))
+        ('b', 'c', '_'))
 
   def test_if(self):
 
@@ -289,18 +261,16 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'),
-        ('y', 'z'))
-    # TODO(mdan): Double check: is it ok to not mark a local symbol as not read?
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'z', 'u'),
-        ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'y', 'z', 'u'),
+        ('x', 'y', 'z', 'u'))
+    self.assertScopeIs(
         anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('x', 'y'),
-        ('x', 'y', 'u'), ('y', 'u'))
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'z', 'u'),
+        ('x', 'y', 'u'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent,
         ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u'))
 
   def test_if_attributes(self):
@@ -316,24 +286,14 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE),
-        ('a', 'a.c'),
-        ('a.b', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE),
-        ('a', 'a.c'),
-        ('a.b', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent,
-        ('a', 'a.c', 'd'),
-        ('a.b', 'd'),
-        ('a', 'd'),
-    )
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'a.c'), ('a.b', 'd'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'a.c'),
+        ('a.b', 'd'))
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('a', 'a.c', 'd'),
+        ('a.b', 'd'))
 
   def test_if_subscripts(self):
 
@@ -348,25 +308,15 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     if_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.BODY_SCOPE),
-        ('a', 'b', 'c', 'a[c]'),
-        ('a[b]', 'd'),
-        ('d',),
-    )
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a', 'b', 'c', 'a[c]'),
+        ('a[b]', 'd'))
     # TODO(mdan): Should subscript writes (a[0] = 1) be considered to read "a"?
-    self.assertScopeIsRmc(
-        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE),
-        ('a', 'e'),
-        ('a[0]', 'd'),
-        ('d',),
-    )
-    self.assertScopeIsRmc(
+    self.assertScopeIs(
+        anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'e'), ('a[0]', 'd'))
+    self.assertScopeIs(
         anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent,
-        ('a', 'b', 'c', 'd', 'e', 'a[c]'),
-        ('d', 'a[b]', 'a[0]'),
-        ('a', 'b', 'c', 'd', 'e'),
-    )
+        ('a', 'b', 'c', 'd', 'e', 'a[c]'), ('d', 'a[b]', 'a[0]'))
 
   def test_nested_if(self):
 
@@ -380,12 +330,10 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     inner_if_node = node.body[0].body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',),
-        ('a',))
-    self.assertScopeIsRmc(
-        anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',),
-        ('a',))
+    self.assertScopeIs(
+        anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',))
+    self.assertScopeIs(
+        anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',))
 
   def test_nested_function(self):
 
@@ -404,11 +352,8 @@ class ActivityAnalyzerTest(test.TestCase):
     node, _ = self._parse_and_analyze(test_fn)
     fn_def_node = node.body[0].body[0]
 
-    self.assertScopeIsRmc(
-        anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), (
-            'x',
-            'y',
-        ))
+    self.assertScopeIs(
+        anno.getanno(fn_def_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',))
 
   def test_constructor_attributes(self):
 
@@ -420,12 +365,9 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(TestClass)
     init_node = node.body[0].body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(init_node, NodeAnno.BODY_SCOPE),
-        ('self', 'a', 'self.b'),
-        ('self', 'self.b', 'self.b.c'),
-        ('self', 'a', 'self.b'),
-    )
+    self.assertScopeIs(
+        anno.getanno(init_node, NodeAnno.BODY_SCOPE), ('self', 'a', 'self.b'),
+        ('self', 'self.b', 'self.b.c'))
 
   def test_aug_assign_subscripts(self):
 
@@ -434,12 +376,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('a', 'a[0]'),
-        ('a[0]',),
-        ('a',),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'a[0]'), ('a[0]',))
 
   def test_return_vars_are_read(self):
 
@@ -448,16 +386,7 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('c',),
-        (),
-        (
-            'a',
-            'b',
-            'c',
-        ),
-    )
+    self.assertScopeIs(anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('c',), ())
 
   def test_aug_assign(self):
 
@@ -466,12 +395,8 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('a', 'b'),
-        ('a'),
-        ('a', 'b'),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a', 'b'), ('a'))
 
   def test_aug_assign_rvalues(self):
 
@@ -485,23 +410,22 @@ class ActivityAnalyzerTest(test.TestCase):
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE),
-        ('foo', 'x'),
-        (),
-        ('x',),
-    )
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('foo', 'x'), ())
 
-  def test_params_created(self):
+  def test_params(self):
 
     def test_fn(a, b):  # pylint: disable=unused-argument
       return b
 
     node, _ = self._parse_and_analyze(test_fn)
     fn_node = node.body[0]
-    self.assertScopeIsRmc(
-        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('b',), (('')),
-        (('a', 'b')))
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b',), ())
+    self.assertScopeIs(body_scope.parent, ('b',), ('a', 'b'))
+
+    args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE)
+    self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params')
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index 4ceddce53b..dc363f9a47 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -28,7 +28,6 @@ import six
 
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import transformer
-from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
 # TODO(aqj): Do we need this? Do other builtins fail in similar ways
@@ -133,11 +132,9 @@ class LiveValueResolver(transformer.Base):
         anno.setanno(node, 'fqn',
                      anno.getanno(node.value, 'type_fqn') + (node.attr,))
     elif isinstance(node.value, gast.Name):
-      stem_name = node.value
-      # All nonlocal symbols should be fully resolved.
-      assert anno.hasanno(stem_name, NodeAnno.IS_LOCAL), stem_name
       # TODO(mdan): Figure out what to do when calling attribute on local object
       # Maybe just leave as-is?
+      pass
     return node
 
 
-- 
GitLab


From c770568935b85d506dc1a1f671822a7e122b5056 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:48:34 -0700
Subject: [PATCH 0650/1085] Internal change.

PiperOrigin-RevId: 216447412
---
 .../contrib/lite/kernels/sparse_output_fully_connected.cc  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
index 843ed0768c..226bba2d47 100644
--- a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
@@ -88,6 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const bool is_hybrid_op =
       (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32);
 
+  // Resize output.
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(1);
+  output_size_array->data[0] = 1;
+  TF_LITE_ENSURE_OK(context,
+                    context->ResizeTensor(context, output, output_size_array));
+
   if (is_hybrid_op) {
     TfLiteIntArrayFree(node->temporaries);
     node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors);
-- 
GitLab


From d4526cf9d1d58cbe480e7d2b8199620e0e9f0572 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:52:15 -0700
Subject: [PATCH 0651/1085] [XLA] Added xla::CreateModuleFromProto(...)
 combining loading module from proto and verifying it with HloVerifier.

PiperOrigin-RevId: 216447947
---
 tensorflow/compiler/xla/layout_util.cc        |   2 +-
 tensorflow/compiler/xla/service/BUILD         |   1 +
 .../compiler/xla/service/hlo_instruction.cc   |  14 ++-
 .../compiler/xla/service/hlo_proto_util.cc    |  12 ++
 .../compiler/xla/service/hlo_proto_util.h     |   6 +
 .../compiler/xla/service/hlo_verifier.cc      | 104 +++++++++++++++++-
 6 files changed, 132 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 3c8db9aa45..19667b7ed9 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -205,7 +205,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     return Status::OK();
   }
 
-  if (layout.format() == INVALID_FORMAT) {
+  if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) {
     return InvalidArgument(
         "Layout does not have a valid format: layout {%s}, shape {%s}",
         layout.ShortDebugString(), shape.ShortDebugString());
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 2b292ed053..f9f741aaee 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -3127,6 +3127,7 @@ cc_library(
         ":buffer_assignment",
         ":hlo",
         ":hlo_proto",
+        ":hlo_verifier",
         "//tensorflow/compiler/xla:status",
         "//tensorflow/compiler/xla:util",
     ],
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 050d28b289..09bcf8a9e7 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -305,6 +305,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
                                           proto.tuple_index());
       break;
     case HloOpcode::kReducePrecision:
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "ReducePrecision instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       instruction =
           CreateReducePrecision(proto.shape(), operands(0),
                                 proto.exponent_bits(), proto.mantissa_bits());
@@ -312,12 +315,16 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
     case HloOpcode::kInfeed: {
       const Shape& data_shape =
           ShapeUtil::GetTupleElementShape(proto.shape(), 0);
-      TF_RET_CHECK(proto.operand_ids_size() == 1);
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "Infeed instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       instruction =
           CreateInfeed(data_shape, operands(0), proto.infeed_config());
     } break;
     case HloOpcode::kOutfeed:
-      TF_RET_CHECK(proto.operand_ids_size() == 2);
+      TF_RET_CHECK(proto.operand_ids_size() == 2)
+          << "Outfeed instruction should have 2 operands but sees "
+          << proto.operand_ids_size();
       TF_RETURN_IF_ERROR(
           ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape()));
       instruction = CreateOutfeed(proto.outfeed_shape(), operands(0),
@@ -349,6 +356,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       break;
     }
     case HloOpcode::kCollectivePermute: {
+      TF_RET_CHECK(proto.operand_ids_size() == 1)
+          << "CollectivePermute instruction should have 1 operand but sees "
+          << proto.operand_ids_size();
       std::vector<std::pair<int64, int64>> source_target_pairs(
           proto.source_target_pairs_size());
       for (int i = 0; i < source_target_pairs.size(); i++) {
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index b9c0b0c4ee..026a0e8fba 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/service/hlo_proto_util.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
 
 #include <string>
 
@@ -36,6 +37,17 @@ HloProto MakeHloProto(const HloModule& module) {
   return proto;
 }
 
+StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config) {
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
+                      HloModule::CreateFromProto(proto, module_config));
+  TF_RETURN_IF_ERROR(
+      HloVerifier(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false)
+          .Run(module.get())
+          .status());
+  return std::move(module);
+}
+
 StatusOr<std::vector<const Shape*>> EntryComputationParameterShapes(
     const HloProto& hlo_proto) {
   if (!hlo_proto.has_hlo_module()) {
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h
index 3d9c375cd5..1db82dd6fc 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.h
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.h
@@ -35,6 +35,12 @@ HloProto MakeHloProto(const HloModule& module,
 // will not be included in the output.
 HloProto MakeHloProto(const HloModule& module);
 
+// Create an HLO state from serialized representation. In addition to
+// creating the proto with HloModule::CreateFromProto(...) it also
+// uses HloVerifier to ensure basic invariants are held.
+StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config);
+
 // Returns the shapes of the parameters of the entry computation. Shape pointers
 // refer to shapes inside of the given HloProto.
 StatusOr<std::vector<const Shape*>> EntryComputationParameterShapes(
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index be3bee5975..620458855f 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -27,6 +27,15 @@ limitations under the License.
 
 namespace xla {
 
+static Status CheckOperandCount(const HloInstruction* hlo, int expected) {
+  if (hlo->operand_count() != expected) {
+    return InternalError("Expected %d operands for %s instruction: %s",
+                         expected, HloOpcodeString(hlo->opcode()),
+                         hlo->ToString());
+  }
+  return Status::OK();
+}
+
 Status ShapeVerifier::HandleElementwiseUnary(HloInstruction* hlo) {
   return CheckUnaryShape(hlo);
 }
@@ -58,12 +67,14 @@ Status ShapeVerifier::HandleConcatenate(HloInstruction* concatenate) {
 }
 
 Status ShapeVerifier::HandleConvert(HloInstruction* convert) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1));
   return CheckShape(convert, ShapeInference::InferConvertShape(
                                  convert->operand(0)->shape(),
                                  convert->shape().element_type()));
 }
 
 Status ShapeVerifier::HandleBitcastConvert(HloInstruction* convert) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1));
   return CheckShape(convert, ShapeInference::InferBitcastConvertShape(
                                  convert->operand(0)->shape(),
                                  convert->shape().element_type()));
@@ -74,6 +85,7 @@ Status ShapeVerifier::HandleCopy(HloInstruction* copy) {
 }
 
 Status ShapeVerifier::HandleDot(HloInstruction* dot) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dot, 2));
   TF_ASSIGN_OR_RETURN(const Shape expected,
                       ShapeInference::InferDotOpShape(
                           dot->operand(0)->shape(), dot->operand(1)->shape(),
@@ -82,6 +94,7 @@ Status ShapeVerifier::HandleDot(HloInstruction* dot) {
 }
 
 Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(convolution, 2));
   TF_ASSIGN_OR_RETURN(
       const Shape expected,
       ShapeInference::InferConvolveShape(
@@ -92,6 +105,7 @@ Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
 }
 
 Status ShapeVerifier::HandleFft(HloInstruction* fft) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(fft, 1));
   TF_ASSIGN_OR_RETURN(
       const Shape expected,
       ShapeInference::InferFftShape(fft->operand(0)->shape(), fft->fft_type(),
@@ -118,11 +132,13 @@ Status ShapeVerifier::HandleAllToAll(HloInstruction* hlo) {
 }
 
 Status ShapeVerifier::HandleCollectivePermute(HloInstruction* hlo) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 1));
   return CheckShape(hlo, ShapeInference::InferCollectivePermuteShape(
                              hlo->operand(0)->shape()));
 }
 
 Status ShapeVerifier::HandleReducePrecision(HloInstruction* reduce_precision) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reduce_precision, 1));
   return CheckShape(reduce_precision, ShapeInference::InferReducePrecisionShape(
                                           reduce_precision->operand(0)->shape(),
                                           reduce_precision->exponent_bits(),
@@ -156,6 +172,7 @@ Status ShapeVerifier::CheckOperandAndParameter(
 }
 
 Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1));
   HloInfeedInstruction* infeed = Cast<HloInfeedInstruction>(instruction);
   TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 0));
 
@@ -166,6 +183,7 @@ Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleOutfeed(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
   HloOutfeedInstruction* outfeed = Cast<HloOutfeedInstruction>(instruction);
   TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 1));
 
@@ -192,10 +210,7 @@ bool ShapeVerifier::HasCompatibleElementTypes(const Shape& shape_0,
 }
 
 Status ShapeVerifier::HandleRng(HloInstruction* instruction) {
-  if (instruction->operand_count() != 2) {
-    return InternalError("Expected two operands for Rng instruction: %s",
-                         instruction->ToString());
-  }
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
 
   const Shape& shape_0 = instruction->operand(0)->shape();
   const Shape& shape_1 = instruction->operand(1)->shape();
@@ -244,12 +259,17 @@ Status ShapeVerifier::HandleRng(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleReverse(HloInstruction* reverse) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reverse, 1));
   return CheckShape(
       reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(),
                                                  reverse->dimensions()));
 }
 
 Status ShapeVerifier::HandleSort(HloInstruction* sort) {
+  if (sort->operand_count() < 1 || sort->operand_count() > 2) {
+    return InternalError("Expected 1 or 2 operands for %s instruction: %s",
+                         HloOpcodeString(sort->opcode()), sort->ToString());
+  }
   if (sort->operand_count() == 2 &&
       !ShapeUtil::SameDimensions(sort->operand(0)->shape(),
                                  sort->operand(1)->shape())) {
@@ -263,10 +283,12 @@ Status ShapeVerifier::HandleSort(HloInstruction* sort) {
 }
 
 Status ShapeVerifier::HandleConstant(HloInstruction* constant) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(constant, 0));
   return CheckShape(constant, constant->literal().shape());
 }
 
 Status ShapeVerifier::HandleIota(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 0));
   auto* iota = Cast<HloIotaInstruction>(instruction);
   const int64 rank = ShapeUtil::Rank(iota->shape());
   if (rank == 0) {
@@ -281,6 +303,7 @@ Status ShapeVerifier::HandleIota(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(get_tuple_element, 1));
   return CheckShape(get_tuple_element,
                     ShapeInference::InferGetTupleElementShape(
                         get_tuple_element->operand(0)->shape(),
@@ -288,6 +311,12 @@ Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) {
 }
 
 Status ShapeVerifier::HandleReduce(HloInstruction* reduce) {
+  if (reduce->operand_count() % 2 != 0) {
+    return InternalError(
+        "Expected an even number of operands for %s instruction: %s",
+        HloOpcodeString(reduce->opcode()), reduce->ToString());
+  }
+
   std::vector<const Shape*> operand_shapes;
   for (const HloInstruction* operand : reduce->operands()) {
     operand_shapes.push_back(&operand->shape());
@@ -298,10 +327,12 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) {
 }
 
 Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(bitcast, 1));
   return Status::OK();
 }
 
 Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(broadcast, 1));
   // HLO broadcast has no exact analog at the proto level so there is no
   // ShapeInference method. Check the output shape explicitly.
   const Shape& operand_shape = broadcast->operand(0)->shape();
@@ -322,6 +353,7 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) {
 }
 
 Status ShapeVerifier::HandleReshape(HloInstruction* reshape) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reshape, 1));
   // Check for mixed precision.
   TF_RETURN_IF_ERROR(CheckShape(reshape, reshape->shape()));
   TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) ==
@@ -330,12 +362,14 @@ Status ShapeVerifier::HandleReshape(HloInstruction* reshape) {
 }
 
 Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(transpose, 1));
   return CheckShape(
       transpose, ShapeInference::InferTransposeShape(
                      transpose->operand(0)->shape(), transpose->dimensions()));
 }
 
 Status ShapeVerifier::HandleParameter(HloInstruction* hlo) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 0));
   return Status::OK();
 }
 
@@ -383,6 +417,7 @@ Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(slice, 1));
   return CheckShape(slice,
                     ShapeInference::InferSliceShape(
                         slice->operand(0)->shape(), slice->slice_starts(),
@@ -390,6 +425,7 @@ Status ShapeVerifier::HandleSlice(HloInstruction* slice) {
 }
 
 Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2));
   return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape(
                                        dynamic_slice->operand(0)->shape(),
                                        dynamic_slice->operand(1)->shape(),
@@ -398,6 +434,7 @@ Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) {
 
 Status ShapeVerifier::HandleDynamicUpdateSlice(
     HloInstruction* dynamic_update_slice) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3));
   return CheckShape(dynamic_update_slice,
                     ShapeInference::InferDynamicUpdateSliceShape(
                         dynamic_update_slice->operand(0)->shape(),
@@ -427,6 +464,7 @@ Status ShapeVerifier::HandleMap(HloInstruction* map) {
 }
 
 Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(reduce_window, 2));
   return CheckShape(
       reduce_window,
       ShapeInference::InferReduceWindowShape(
@@ -436,6 +474,7 @@ Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) {
 }
 
 Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3));
   return CheckShape(
       instruction,
       ShapeInference::InferSelectAndScatterShape(
@@ -446,6 +485,7 @@ Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(xla_while, 1));
   TF_RETURN_IF_ERROR(
       CheckOperandAndParameter(xla_while, 0, xla_while->while_body(), 0));
   TF_RETURN_IF_ERROR(
@@ -465,6 +505,7 @@ Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) {
 }
 
 Status ShapeVerifier::HandleConditional(HloInstruction* conditional) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(conditional, 3));
   TF_RETURN_IF_ERROR(CheckOperandAndParameter(
       conditional, 1, conditional->true_computation(), 0));
   TF_RETURN_IF_ERROR(CheckOperandAndParameter(
@@ -479,12 +520,14 @@ Status ShapeVerifier::HandleConditional(HloInstruction* conditional) {
 }
 
 Status ShapeVerifier::HandlePad(HloInstruction* pad) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(pad, 2));
   return CheckShape(pad, ShapeInference::InferPadShape(pad->operand(0)->shape(),
                                                        pad->operand(1)->shape(),
                                                        pad->padding_config()));
 }
 
 Status ShapeVerifier::HandleSend(HloInstruction* send) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(send, 2));
   return CheckShape(send,
                     ShapeUtil::MakeTupleShape({send->operand(0)->shape(),
                                                ShapeUtil::MakeShape(U32, {}),
@@ -492,10 +535,12 @@ Status ShapeVerifier::HandleSend(HloInstruction* send) {
 }
 
 Status ShapeVerifier::HandleSendDone(HloInstruction* send_done) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(send_done, 1));
   return CheckShape(send_done, ShapeUtil::MakeTokenShape());
 }
 
 Status ShapeVerifier::HandleRecv(HloInstruction* recv) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(recv, 1));
   return CheckShape(
       recv, ShapeUtil::MakeTupleShape(
                 {ShapeUtil::GetTupleElementShape(recv->shape(), 0),
@@ -503,6 +548,7 @@ Status ShapeVerifier::HandleRecv(HloInstruction* recv) {
 }
 
 Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(recv_done, 1));
   return CheckShape(
       recv_done,
       ShapeUtil::MakeTupleShape(
@@ -512,6 +558,7 @@ Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) {
 
 Status ShapeVerifier::HandleBatchNormTraining(
     HloInstruction* batch_norm_training) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_training, 3));
   return CheckShape(batch_norm_training,
                     ShapeInference::InferBatchNormTrainingShape(
                         batch_norm_training->operand(0)->shape(),
@@ -522,6 +569,7 @@ Status ShapeVerifier::HandleBatchNormTraining(
 
 Status ShapeVerifier::HandleBatchNormInference(
     HloInstruction* batch_norm_inference) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_inference, 5));
   return CheckShape(batch_norm_inference,
                     ShapeInference::InferBatchNormInferenceShape(
                         batch_norm_inference->operand(0)->shape(),
@@ -533,6 +581,7 @@ Status ShapeVerifier::HandleBatchNormInference(
 }
 
 Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_grad, 5));
   return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape(
                                          batch_norm_grad->operand(0)->shape(),
                                          batch_norm_grad->operand(1)->shape(),
@@ -601,6 +650,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
 }  // namespace
 
 Status ShapeVerifier::HandleGather(HloInstruction* gather) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(gather, 2));
   return CheckShape(
       gather,
       ShapeInference::InferGatherShape(
@@ -609,6 +659,7 @@ Status ShapeVerifier::HandleGather(HloInstruction* gather) {
 }
 
 Status ShapeVerifier::HandleScatter(HloInstruction* scatter) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(scatter, 3));
   return CheckShape(
       scatter, ShapeInference::InferScatterShape(
                    scatter->operand(0)->shape(), scatter->operand(1)->shape(),
@@ -696,12 +747,14 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction,
 }
 
 Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1));
   return CheckShape(instruction,
                     ShapeInference::InferUnaryOpShape(instruction->opcode(),
                                                       instruction->operand(0)));
 }
 
 Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2));
   return CheckShape(
       instruction, ShapeInference::InferBinaryOpShape(instruction->opcode(),
                                                       instruction->operand(0),
@@ -709,6 +762,7 @@ Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) {
 }
 
 Status ShapeVerifier::CheckTernaryShape(const HloInstruction* instruction) {
+  TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3));
   return CheckShape(instruction,
                     ShapeInference::InferTernaryOpShape(
                         instruction->opcode(), instruction->operand(0),
@@ -816,6 +870,47 @@ Status VerifyEntryAndExitShapes(const HloModule& module) {
   return Status::OK();
 }
 
+// Verifies that entry computation layout matches characteristics of
+// entry computation.
+Status CheckEntryComputationLayout(const HloModule& module) {
+  const HloComputation* computation = module.entry_computation();
+  const auto& layout = module.entry_computation_layout();
+
+  // TODO(117498192): Change into a call to Compatible(...).
+  if (!ShapeUtil::CompatibleIgnoringFpPrecision(
+          computation->root_instruction()->shape(),
+          layout.result_layout().shape())) {
+    return InternalError(
+        "Shape of the root instruction of entry computation (%s) should be "
+        "compatible to one specified in module's entry computation layout (%s)",
+        ShapeUtil::HumanString(computation->root_instruction()->shape()),
+        ShapeUtil::HumanString(layout.result_layout().shape()));
+  }
+
+  if (computation->num_parameters() != layout.parameter_count()) {
+    return InternalError(
+        "Number of parameters in entry computation layout (%d) must be same "
+        "as number of parameters of entry computation computation (%d)",
+        layout.parameter_count(), computation->num_parameters());
+  }
+
+  for (int i = 0; i < computation->num_parameters(); ++i) {
+    if (!ShapeUtil::Compatible(computation->parameter_instruction(i)->shape(),
+                               layout.parameter_shape(i))) {
+      return InternalError(
+          "Shape of the entry computation parameter %d is %s should be "
+          "compatible to the one specified in module's entry computation "
+          "layout %s",
+          i,
+          ShapeUtil::HumanString(
+              computation->parameter_instruction(i)->shape()),
+          ShapeUtil::HumanString(layout.parameter_shape(i)));
+    }
+  }
+
+  return Status::OK();
+}
+
 // Checks if the given two instructions share the same channel id.
 Status CheckSameChannel(const HloInstruction* instr1,
                         const HloInstruction* instr2) {
@@ -1213,6 +1308,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
+  TF_RETURN_IF_ERROR(CheckEntryComputationLayout(*module));
   TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module));
 
   // If the module has a schedule, it must be valid.
-- 
GitLab


From 65b7d0b2f84c334327a295bf41bc06c7f6b8ffe5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 9 Oct 2018 16:52:56 -0700
Subject: [PATCH 0652/1085] [XLA:GPU] Elide the SequentialThunk when emitting
 scatter with no copy

We have a 1-element thunk sequence if we're not copying. That's still two
thunks and hlo profiling gets confused if it sees two thunks for the same
instruction and one of them claims to be the whole instruction.

PiperOrigin-RevId: 216448063
---
 .../xla/service/gpu/ir_emitter_unnested.cc         | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index bef7a55301..09486d291a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -2080,9 +2080,9 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
   // Launch a kernel that reads every element in the updates tensor. We could
   // also do one kernel per window instead if bounds checks turn out to be a
   // bottleneck.
-  thunks.push_back(BuildKernelThunk(
-      scatter,
-      /*implements_whole_instruction=*/operand_buffer == destination_buffer));
+  thunks.push_back(
+      BuildKernelThunk(scatter,
+                       /*implements_whole_instruction=*/thunks.empty()));
 
   LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
       updates->shape(), ir_emitter_context_->device_description());
@@ -2090,8 +2090,12 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
                          static_cast<KernelThunk*>(thunks.back().get()),
                          ir_emitter_context_->llvm_module());
 
-  thunk_sequence_->emplace_back(
-      absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  if (thunks.size() == 1) {
+    thunk_sequence_->push_back(std::move(thunks[0]));
+  } else {
+    thunk_sequence_->emplace_back(
+        absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  }
   return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
                              launch_dimensions, &b_)
       .EmitLoop(IrName(scatter),
-- 
GitLab


From bb5fc614a4a358b350ef8dd19cb7010760fa9b29 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 16:53:29 -0700
Subject: [PATCH 0653/1085] [XLA] Cleanup: Make AllocationTracker::Resolve
 const.

So that when resolving some global data, we don't have to worry whether
"Resolve" is going to mutate the real data.

PiperOrigin-RevId: 216448145
---
 tensorflow/compiler/xla/service/allocation_tracker.cc | 6 +++---
 tensorflow/compiler/xla/service/allocation_tracker.h  | 8 ++++----
 tensorflow/compiler/xla/service/service.cc            | 4 ++--
 tensorflow/compiler/xla/service/service.h             | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc
index 1ed6142dce..ef5e211646 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.cc
+++ b/tensorflow/compiler/xla/service/allocation_tracker.cc
@@ -176,13 +176,13 @@ StatusOr<std::vector<GlobalDataHandle>> AllocationTracker::DeconstructTuple(
 }
 
 StatusOr<std::vector<const ShapedBuffer*>> AllocationTracker::Resolve(
-    const GlobalDataHandle& data) {
+    const GlobalDataHandle& data) const {
   tensorflow::mutex_lock lock(mutex_);
   return AllocationTracker::ResolveInternal(data);
 }
 
 StatusOr<const ShapedBuffer*> AllocationTracker::ResolveForReplica(
-    const GlobalDataHandle& data, int replica_id) {
+    const GlobalDataHandle& data, int replica_id) const {
   tensorflow::mutex_lock lock(mutex_);
   TF_ASSIGN_OR_RETURN(std::vector<const ShapedBuffer*> replicated_buffers,
                       ResolveInternal(data));
@@ -196,7 +196,7 @@ StatusOr<const ShapedBuffer*> AllocationTracker::ResolveForReplica(
 }
 
 StatusOr<std::vector<const ShapedBuffer*>> AllocationTracker::ResolveInternal(
-    const GlobalDataHandle& data) {
+    const GlobalDataHandle& data) const {
   VLOG(2) << "resolve:" << data.handle();
   auto it = handle_to_shaped_buffers_.find(data.handle());
   if (it == handle_to_shaped_buffers_.end()) {
diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h
index 43feccee3c..98d1a302a9 100644
--- a/tensorflow/compiler/xla/service/allocation_tracker.h
+++ b/tensorflow/compiler/xla/service/allocation_tracker.h
@@ -65,13 +65,13 @@ class AllocationTracker {
   // replica, or provide an error status to say whether any of those buffers
   // were not found (or found, but found deallocated).
   StatusOr<std::vector<const ShapedBuffer*>> Resolve(
-      const GlobalDataHandle& data);
+      const GlobalDataHandle& data) const;
 
   // Resolves a handle from an XLA client and replica id to a shaped buffer, or
   // provide an error status to say whether it was not found (or found, but
   // found deallocated).
   StatusOr<const ShapedBuffer*> ResolveForReplica(const GlobalDataHandle& data,
-                                                  int replica_id);
+                                                  int replica_id) const;
 
  private:
   // Data structure encapsulating single memory allocation on the device.
@@ -87,7 +87,7 @@ class AllocationTracker {
   // Internal helper which resolves the given GlobalDataHandle to a
   // list of ScopedShapedBuffers.
   StatusOr<std::vector<const ShapedBuffer*>> ResolveInternal(
-      const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+      const GlobalDataHandle& data) const EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Internal helper which registers a vector of shaped buffers, one per
   // replica.  ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer.  If
@@ -113,7 +113,7 @@ class AllocationTracker {
   // maintained per device ordinal.
   using AllocationMap = absl::flat_hash_map<const void*, Allocation>;
 
-  tensorflow::mutex mutex_;
+  mutable tensorflow::mutex mutex_;
 
   // Backend to use with this tracker. The backend supplies the memory allocator
   // to use when deallocating memory.
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index b27a92f2a0..084df17951 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -207,7 +207,7 @@ Status Service::ValidateResultShape(const Shape& client_shape,
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
 Service::ResolveAndValidateArguments(
     absl::Span<const GlobalDataHandle* const> arguments,
-    absl::Span<se::StreamExecutor* const> stream_executors) {
+    absl::Span<se::StreamExecutor* const> stream_executors) const {
   CHECK_EQ(options_.number_of_replicas(), stream_executors.size());
   std::vector<std::vector<const ShapedBuffer*>> replicated_arguments;
   replicated_arguments.resize(options_.number_of_replicas());
@@ -590,7 +590,7 @@ StatusOr<std::vector<se::StreamExecutor*>> Service::GetExecutors(
 
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>> Service::GetArguments(
     const ExecutionOptions& execution_options,
-    absl::Span<const GlobalDataHandle* const> arguments) {
+    absl::Span<const GlobalDataHandle* const> arguments) const {
   // Resolve the allocations for the arguments of the computation, and create
   // a vector of device memory offsets for the arguments from the allocations.
   // In the case of partitioned computations, assume all arguments go on the
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 1f62fad4c8..8cf1a7b9f0 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -191,7 +191,7 @@ class Service : public ServiceInterface {
   // Prepare the arguments for executing parallel.
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>> GetArguments(
       const ExecutionOptions& execution_options,
-      absl::Span<const GlobalDataHandle* const> arguments);
+      absl::Span<const GlobalDataHandle* const> arguments) const;
 
  protected:
   friend class LocalExecutable;
@@ -208,7 +208,7 @@ class Service : public ServiceInterface {
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
   ResolveAndValidateArguments(
       absl::Span<const GlobalDataHandle* const> arguments,
-      absl::Span<se::StreamExecutor* const> stream_executors);
+      absl::Span<se::StreamExecutor* const> stream_executors) const;
 
   // Create a Hlo module config for the given program shape and arguments.
   // execution_options is optional; if not given a default is used.
-- 
GitLab


From 9bd459e4ceba14f9bb1af98d52a109325de952e8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:14:39 -0700
Subject: [PATCH 0654/1085] Adds an Objective-C API to TensorFlow Lite
 experimental.

PiperOrigin-RevId: 216451263
---
 .../contrib/lite/experimental/objc/BUILD      |  94 ++++
 .../contrib/lite/experimental/objc/README.md  |  10 +
 .../Configs/TensorFlowLiteObjc.tulsigen       |  60 +++
 .../project.tulsiconf                         |  17 +
 .../experimental/objc/apis/TFLInterpreter.h   | 188 ++++++++
 .../objc/apis/TFLInterpreterOptions.h         |  37 ++
 .../objc/apis/TFLQuantizationParameters.h     |  36 ++
 .../lite/experimental/objc/apis/TFLTensor.h   |  77 +++
 .../experimental/objc/sources/TFLErrorUtil.h  |  51 ++
 .../experimental/objc/sources/TFLErrorUtil.m  |  45 ++
 .../objc/sources/TFLInterpreter.mm            | 440 ++++++++++++++++++
 .../objc/sources/TFLInterpreterOptions.m      |  30 ++
 .../objc/sources/TFLQuantizationParameters.m  |  23 +
 .../objc/sources/TFLTensor+Internal.h         |  42 ++
 .../experimental/objc/sources/TFLTensor.m     |  54 +++
 .../objc/tests/TFLInterpreterOptionsTests.m   |  49 ++
 .../objc/tests/TFLInterpreterTests.m          | 266 +++++++++++
 .../tools/pip_package/pip_smoke_test.py       |   1 +
 18 files changed, 1520 insertions(+)
 create mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/objc/README.md
 create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
 create mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
 create mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
 create mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m

diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD
new file mode 100644
index 0000000000..236b96adb5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/BUILD
@@ -0,0 +1,94 @@
+# TensorFlow Lite Objective-C API.
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tools/build_defs/apple:ios.bzl", "ios_unit_test")
+
+SOURCES = glob([
+    "sources/*.h",
+    "sources/*.m",
+    "sources/*.mm",
+])
+
+API_HEADERS = glob([
+    "apis/*.h",
+])
+
+MINIMUM_OS_VERSION = "8.0"
+
+# Compiler flags for building regular non-test libraries.
+RELEASE_COPTS = [
+    # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++.
+    "-Wall",
+    # Warns if functions, variables, and types marked with the deprecated attribute are being used.
+    "-Wdeprecated-declarations",
+    # Warns for errors in documentation.
+    "-Wdocumentation",
+    # Turns all warnings into errors.
+    "-Werror",
+    # Enables extra warning flags that are not enabled by -Wall.
+    "-Wextra",
+    # Warns if a global function is defined without a previous prototype declaration.
+    "-Wmissing-prototypes",
+    # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison.
+    "-Wno-sign-compare",
+    # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks.
+    "-Wno-unused-parameter",
+    # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable.
+    "-Wshadow",
+    # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of ().
+    "-Wstrict-prototypes",
+    # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet.
+    "-Wundeclared-selector",
+
+    # Turn off warnings for headers not part of TensorFlow Lite Objective-C API.
+    "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/",
+]
+
+# Compiler flags for building test libraries.
+TEST_COPTS = RELEASE_COPTS + [
+    # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument.
+    "-Wno-nonnull",
+    # Disables warning when a global or local variable or type declaration shadows another.
+    "-Wno-shadow",
+]
+
+objc_library(
+    name = "TensorFlowLiteObjCLib",
+    srcs = SOURCES,
+    hdrs = API_HEADERS,
+    copts = RELEASE_COPTS,
+    deps = [
+        "//tensorflow/contrib/lite/experimental/c:c_api",
+    ],
+    alwayslink = 1,
+)
+
+ios_unit_test(
+    name = "TensorFlowLiteObjCTests",
+    size = "small",
+    minimum_os_version = MINIMUM_OS_VERSION,
+    deps = [":TensorFlowLiteObjCTestLib"],
+)
+
+objc_library(
+    name = "TensorFlowLiteObjCTestLib",
+    testonly = 1,
+    srcs = glob([
+        "tests/*.m",
+    ]),
+    hdrs = glob([
+        "apis/*.h",
+        "sources/*.h",
+        "tests/*.h",
+    ]),
+    copts = TEST_COPTS,
+    resources = [
+        "//tensorflow/contrib/lite:testdata/add.bin",
+    ],
+    deps = [
+        ":TensorFlowLiteObjCLib",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md
new file mode 100644
index 0000000000..e8f150b1e8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/README.md
@@ -0,0 +1,10 @@
+# TensorFlow Lite Objective-C API
+
+## TensorFlowLiteObjc Tulsi Project
+
+Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by
+running the following command in Terminal from the root source directory:
+
+```shell
+generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj
+```
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
new file mode 100644
index 0000000000..babb5902d3
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
@@ -0,0 +1,60 @@
+{
+  "sourceFilters" : [
+    "third_party/tensorflow/contrib/lite",
+    "third_party/tensorflow/contrib/lite/experimental/c",
+    "third_party/tensorflow/contrib/lite/experimental/objc",
+    "third_party/tensorflow/contrib/lite/experimental/objc/apis",
+    "third_party/tensorflow/contrib/lite/experimental/objc/sources",
+    "third_party/tensorflow/contrib/lite/experimental/objc/tests",
+    "third_party/tensorflow/contrib/lite/kernels",
+    "third_party/tensorflow/contrib/lite/kernels/internal",
+    "third_party/tensorflow/contrib/lite/nnapi",
+    "third_party/tensorflow/contrib/lite/schema",
+  ],
+  "buildTargets" : [
+    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib",
+    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests",
+  ],
+  "projectName" : "TensorFlowLiteObjC",
+  "optionSet" : {
+    "LaunchActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsRelease" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "EnvironmentVariables" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "CommandlineArguments" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPreActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "BazelBuildStartupOptionsDebug" : {
+      "p" : "$(inherited)"
+    },
+    "BuildActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "TestActionPostActionScript" : {
+      "p" : "$(inherited)"
+    },
+    "LaunchActionPostActionScript" : {
+      "p" : "$(inherited)"
+    }
+  },
+  "additionalFilePaths" : [
+    "third_party/tensorflow/contrib/lite/experimental/objc/BUILD",
+  ]
+}
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
new file mode 100644
index 0000000000..00299cd4cf
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
@@ -0,0 +1,17 @@
+{
+  "configDefaults" : {
+    "optionSet" : {
+      "BazelBuildOptionsDebug" : {
+        "p" : "--ios_minimum_os=8.0"
+      },
+      "BazelBuildOptionsRelease" : {
+        "p" : "--ios_minimum_os=8.0"
+      },
+    }
+  },
+  "projectName" : "TensorFlowLiteObjC",
+  "packages" : [
+    "third_party/tensorflow/contrib/lite/experimental/objc"
+  ],
+  "workspaceRoot" : "../../../../../../.."
+}
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
new file mode 100644
index 0000000000..c07ffc06ff
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
@@ -0,0 +1,188 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLInterpreterOptions;
+@class TFLTensor;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLInterpreterErrorCode
+ * This enum specifies various error codes related to `TFLInterpreter`.
+ */
+typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) {
+  /** Provided tensor index is invalid. */
+  TFLInterpreterErrorCodeInvalidTensorIndex,
+
+  /** Input data has invalid byte size. */
+  TFLInterpreterErrorCodeInvalidInputByteSize,
+
+  /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */
+  TFLInterpreterErrorCodeInvalidShape,
+
+  /** Provided model cannot be loaded. */
+  TFLInterpreterErrorCodeFailedToLoadModel,
+
+  /** Failed to create `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToCreateInterpreter,
+
+  /** Failed to invoke `TFLInterpreter`. */
+  TFLInterpreterErrorCodeFailedToInvoke,
+
+  /** Failed to retrieve a tensor. */
+  TFLInterpreterErrorCodeFailedToGetTensor,
+
+  /** Failed to resize an input tensor. */
+  TFLInterpreterErrorCodeFailedToResizeInputTensor,
+
+  /** Failed to copy data into an input tensor. */
+  TFLInterpreterErrorCodeFailedToCopyDataToInputTensor,
+
+  /** Failed to get data from an output tensor. */
+  TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor,
+
+  /** Failed to allocate memory for tensors. */
+  TFLInterpreterErrorCodeFailedToAllocateTensors,
+
+  /** Operaton not allowed without allocating memory for tensors first. */
+  TFLInterpreterErrorCodeAllocateTensorsRequired,
+
+  /** Operaton not allowed without invoking the interpreter first. */
+  TFLInterpreterErrorCodeInvokeInterpreterRequired,
+};
+
+/**
+ * A TensorFlow Lite model interpreter.
+ */
+@interface TFLInterpreter : NSObject
+
+/** The total number of input tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger inputTensorCount;
+
+/** The total number of output tensors. 0 if the interpreter creation failed. */
+@property(nonatomic, readonly) NSUInteger outputTensorCount;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the
+ * default interpreter options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and the default interpreter
+ *     options.
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath;
+
+/**
+ * Initializes a new TensorFlow Lite interpreter instance with the given model file path and
+ * options.
+ *
+ * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
+ * @param options Options to use for configuring the TensorFlow Lite interpreter.
+ *
+ * @return A new instance of `TFLInterpreter` with the given model and options.
+ */
+- (instancetype)initWithModelPath:(NSString *)modelPath
+                          options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER;
+
+/**
+ * Invokes the interpreter to run inference.
+ *
+ * @param error An optional error parameter populated when there is an error in invoking the
+ *     interpreter.
+ *
+ * @return Whether the invocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)invokeWithError:(NSError **)error;
+
+/**
+ * Returns the input tensor at the given index.
+ *
+ * @param index The index of an input tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the input
+ *     tensor.
+ *
+ * @return The input tensor at the given index. `nil` if there is an error.
+ */
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Returns the output tensor at the given index.
+ *
+ * @param index The index of an output tensor.
+ * @param error An optional error parameter populated when there is an error in looking up the
+ *     output tensor.
+ *
+ * @return The output tensor at the given index. `nil` if there is an error.
+ */
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned
+ * integers).
+ *
+ * @param index The index of an input tensor.
+ * @param shape Shape that the given input tensor should be resized to. It should be an array of
+ *     positive unsigned integer(s) containing the size of each dimension.
+ * @param error An optional error parameter populated when there is an error in resizing the input
+ *     tensor.
+ *
+ * @return Whether the input tensor was resized successfully. Returns NO if an error occurred.
+ */
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error;
+
+/**
+ * Copies the given data into the input tensor at the given index. This is allowed only before the
+ * interpreter is invoked.
+ *
+ * @param data The data to set. The byte size of the data must match what's required by the given
+ *     input tensor.
+ * @param index The index of an input tensor.
+ * @param error An optional error parameter populated when there is an error in setting the data.
+ *
+ * @return Whether the data was set into the input tensor successfully. Returns NO if an error
+ *     occurred.
+ */
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Gets the data from the output tensor at the given index. The interpreter invocation has to
+ * complete before the data can be retrieved from an output tensor.
+ *
+ * @param index The index of an output tensor.
+ * @param error An optional error parameter populated when there is an error in getting the data.
+ *
+ * @return The data of the output tensor at the given index. `nil` if there is an error.
+ */
+- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
+
+/**
+ * Allocates memory for tensors.
+ *
+ * @param error An optional error parameter populated when there is an error in allocating memory.
+ *
+ * @return Whether memory allocation is successful. Returns NO if an error occurred.
+ */
+- (BOOL)allocateTensorsWithError:(NSError **)error;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
new file mode 100644
index 0000000000..6461fbf017
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
@@ -0,0 +1,37 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Custom configuration options for a TensorFlow Lite interpreter. */
+@interface TFLInterpreterOptions : NSObject
+
+/**
+ * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting
+ * TensorFlow Lite to optimize the threading decision).
+ */
+@property(nonatomic) NSUInteger numberOfThreads;
+
+/**
+ * Initializes a new instance of `TFLInterpreterOptions`.
+ *
+ * @return A new instance of `TFLInterpreterOptions`.
+ */
+- (instancetype)init NS_DESIGNATED_INITIALIZER;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
new file mode 100644
index 0000000000..3d5cf793c5
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
@@ -0,0 +1,36 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Parameters for asymmetric quantization. Quantized values can be converted to float values using:
+ * `realValue = scale * (quantizedValue - zeroPoint)`.
+ */
+@interface TFLQuantizationParameters : NSObject
+
+/** Scale of asymmetric quantization. */
+@property(nonatomic, readonly) float scale;
+
+/** Zero point of asymmetric quantization. */
+@property(nonatomic, readonly) int32_t zeroPoint;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
new file mode 100644
index 0000000000..d08b8fc0e9
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
@@ -0,0 +1,77 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+@class TFLQuantizationParameters;
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorDataType
+ * This enum specifies supported TensorFlow Lite tensor data types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
+  /** Tensor data type not available. This indicates an error with the model. */
+  TFLTensorDataTypeNoType,
+
+  /** 32-bit single precision floating point. */
+  TFLTensorDataTypeFloat32,
+
+  /** 32-bit signed integer. */
+  TFLTensorDataTypeInt32,
+
+  /** 8-bit unsigned integer. */
+  TFLTensorDataTypeUInt8,
+
+  /** 64-bit signed integer. */
+  TFLTensorDataTypeInt64,
+
+  /** Boolean. */
+  TFLTensorDataTypeBool,
+
+  /** 16-bit signed integer. */
+  TFLTensorDataTypeInt16,
+};
+
+/**
+ * An input or output tensor in a TensorFlow Lite model.
+ */
+@interface TFLTensor : NSObject
+
+/** Name of the tensor. */
+@property(nonatomic, readonly, copy) NSString *name;
+
+/** Data type of the tensor. */
+@property(nonatomic, readonly) TFLTensorDataType dataType;
+
+/**
+ * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each
+ * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
+ * [2, 2, 3].
+ */
+@property(nonatomic, readonly, copy) NSArray<NSNumber *> *shape;
+
+/** Number of bytes for the tensor data. */
+@property(nonatomic, readonly) NSUInteger byteSize;
+
+/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */
+@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
new file mode 100644
index 0000000000..b6fd4763d6
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
@@ -0,0 +1,51 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import <Foundation/Foundation.h>
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Helper utility for error reporting. */
+@interface TFLErrorUtil : NSObject
+
+/**
+ * Creates and returns an interpreter error with the given error code and description.
+ *
+ * @param code Error code.
+ * @param description Error description.
+ *
+ * @return The created interpreter error with the given error code and description.
+ */
++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                          description:(NSString *)description;
+
+/**
+ * Creates and saves an interpreter error with the given error code and description.
+ *
+ * @param code Error code.
+ * @param description Error description.
+ * @param error Pointer to where to save the created error. If `nil`, no error will be saved.
+ */
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error;
+
+/** Unavailable. */
+- (instancetype)init NS_UNAVAILABLE;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
new file mode 100644
index 0000000000..756d69481c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
@@ -0,0 +1,45 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "TFLErrorUtil.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Error domain of TensorFlow Lite interpreter related errors. */
+static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter";
+
+@implementation TFLErrorUtil
+
+#pragma mark - Public
+
++ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                          description:(NSString *)description {
+  return [NSError errorWithDomain:TFLInterpreterErrorDomain
+                             code:code
+                         userInfo:@{NSLocalizedDescriptionKey : description}];
+}
+
++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
+                         description:(NSString *)description
+                               error:(NSError **)error {
+  if (error) {
+    *error = [NSError errorWithDomain:TFLInterpreterErrorDomain
+                                 code:code
+                             userInfo:@{NSLocalizedDescriptionKey : description}];
+  }
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
new file mode 100644
index 0000000000..0f940a5cf3
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
@@ -0,0 +1,440 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import "TFLErrorUtil.h"
+#import "TFLTensor+Internal.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * @enum TFLTensorType
+ * This enum specifies input or output tensor types.
+ */
+typedef NS_ENUM(NSUInteger, TFLTensorType) {
+  /** Input tensor type. */
+  TFLTensorTypeInput,
+
+  /** Output tensor type. */
+  TFLTensorTypeOutput,
+};
+
+// Names used for indicating input or output in error messages.
+static NSString *const kTFLInputDirection = @"input";
+static NSString *const kTFLOutputDirection = @"output";
+
+/**
+ * Error reporter for TFLInterpreter.
+ *
+ * @param user_data User data. Not used.
+ * @param format Error message which may contain argument formatting specifiers.
+ * @param args Values of the arguments in the error message.
+ */
+static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) {
+  NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]);
+}
+
+@interface TFLInterpreter ()
+
+/** TFL_Interpreter backed by C API. */
+@property(nonatomic, nullable) TFL_Interpreter *interpreter;
+
+/**
+ * An error in initializing the interpreter. If not `nil`, this error will be reported when the
+ * interpreter is used.
+ */
+@property(nonatomic, nullable) NSError *initializationError;
+
+@end
+
+@implementation TFLInterpreter
+
+#pragma mark - NSObject
+
+- (void)dealloc {
+  TFL_DeleteInterpreter(_interpreter);
+}
+
+#pragma mark - Public
+
+- (instancetype)initWithModelPath:(NSString *)modelPath {
+  return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]];
+}
+
+- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options {
+  self = [super init];
+
+  if (self != nil) {
+    const char *modelPathCString = modelPath.UTF8String;
+    NSString *pathErrorString =
+        [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath];
+    if (modelPathCString == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                     description:pathErrorString];
+      return self;
+    }
+
+    TFL_Model *model = TFL_NewModelFromFile(modelPathCString);
+    if (model == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
+                                     description:pathErrorString];
+      return self;
+    }
+
+    TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions();
+    if (cOptions == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                     description:@"Failed to create the interpreter."];
+      TFL_DeleteModel(model);
+      return self;
+    }
+
+    if (options.numberOfThreads > 0) {
+      TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads);
+    }
+    TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
+
+    _interpreter = TFL_NewInterpreter(model, cOptions);
+    if (_interpreter == nullptr) {
+      _initializationError =
+          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                     description:@"Failed to create the interpreter."];
+    } else {
+      _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter);
+      _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter);
+      if (_inputTensorCount <= 0 || _outputTensorCount <= 0) {
+        _initializationError =
+            [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
+                                       description:@"Failed to create the interpreter."];
+      }
+    }
+    TFL_DeleteInterpreterOptions(cOptions);
+    TFL_DeleteModel(model);
+  }
+
+  return self;
+}
+
+- (BOOL)invokeWithError:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke
+                                   description:@"Failed to invoke the interpreter."
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error];
+}
+
+- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
+    return nil;
+  }
+
+  return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error];
+}
+
+- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
+                         toShape:(NSArray<NSNumber *> *)shape
+                           error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return NO;
+  }
+
+  if (shape.count == 0) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                   description:@"Invalid shape. Must not be empty."
+                                         error:error];
+    return NO;
+  }
+
+  int cDimensions[self.inputTensorCount];
+  for (int d = 0; d < shape.count; ++d) {
+    int dimension = shape[d].intValue;
+    if (dimension <= 0) {
+      NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers.";
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
+                                     description:errorDescription
+                                           error:error];
+      return NO;
+    }
+    cDimensions[d] = dimension;
+  }
+
+  if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
+                                       (int32_t)shape.count) != kTfLiteOk) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
+    return NO;
+  }
+
+  TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
+  if (tensor == nullptr) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
+  if (data.length != byteSize) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).",
+                         (unsigned long)index, byteSize, (unsigned long)data.length];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).",
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return nil;
+  }
+
+  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
+    return nil;
+  }
+
+  const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
+  if (tensor == nullptr) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil
+        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
+                         description:errorDescription
+                               error:error];
+    return nil;
+  }
+
+  void *bytes = TFL_TensorData(tensor);
+  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
+  if (bytes == nullptr || byteSize == 0) {
+    NSString *errorDescription = [NSString
+        stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index];
+    [TFLErrorUtil
+        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
+                         description:errorDescription
+                               error:error];
+    return nil;
+  }
+
+  return [NSData dataWithBytes:bytes length:byteSize];
+}
+
+- (BOOL)allocateTensorsWithError:(NSError **)error {
+  if (self.initializationError != nil) {
+    [self saveInitializationErrorToDestination:error];
+    return NO;
+  }
+
+  if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) {
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors
+                                   description:@"Failed to allocate memory for tensors."
+                                         error:error];
+    return NO;
+  }
+  return YES;
+}
+
+#pragma mark - Private
+
+- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type
+                             atIndex:(NSUInteger)index
+                               error:(NSError **)error {
+  const TFL_Tensor *tensor = nullptr;
+  NSString *tensorType;
+  switch (type) {
+    case TFLTensorTypeInput:
+      tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
+      tensorType = kTFLInputDirection;
+      break;
+    case TFLTensorTypeOutput:
+      tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
+      tensorType = kTFLOutputDirection;
+      break;
+  }
+
+  if (tensor == nullptr) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+
+  const char *cName = TFL_TensorName(tensor);
+  if (cName == nullptr) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType,
+                                   (unsigned long)index];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+  NSString *name = [NSString stringWithUTF8String:cName];
+
+  TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)];
+
+  int32_t rank = TFL_TensorNumDims(tensor);
+  if (rank <= 0) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType,
+                                   (unsigned long)index, rank];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                   description:errorDescription
+                                         error:error];
+    return nil;
+  }
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank];
+  for (int32_t d = 0; d < rank; d++) {
+    int32_t dimension = TFL_TensorDim(tensor, d);
+    if (dimension <= 0) {
+      NSString *errorDescription =
+          [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).",
+                                     tensorType, (unsigned long)index, d, dimension];
+      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
+                                     description:errorDescription
+                                           error:error];
+      return nil;
+    }
+    shape[d] = @((NSUInteger)dimension);
+  }
+
+  // TODO: Set quantization parameters when C API supports it.
+  return [[TFLTensor alloc] initWithName:name
+                                dataType:dataType
+                                   shape:shape
+                                byteSize:(NSUInteger)TFL_TensorByteSize(tensor)
+                  quantizationParameters:nil];
+}
+
+- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType {
+  switch (cTensorType) {
+    case kTfLiteFloat32:
+      return TFLTensorDataTypeFloat32;
+    case kTfLiteInt32:
+      return TFLTensorDataTypeInt32;
+    case kTfLiteUInt8:
+      return TFLTensorDataTypeUInt8;
+    case kTfLiteInt64:
+      return TFLTensorDataTypeInt64;
+    case kTfLiteBool:
+      return TFLTensorDataTypeBool;
+    case kTfLiteInt16:
+      return TFLTensorDataTypeInt16;
+    case kTfLiteNoType:
+    case kTfLiteString:
+    case kTfLiteComplex64:
+      // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API.
+      return TFLTensorDataTypeNoType;
+  }
+}
+
+- (void)saveInitializationErrorToDestination:(NSError **)destination {
+  if (destination != NULL) {
+    *destination = self.initializationError;
+  }
+}
+
+- (BOOL)isValidTensorIndex:(NSUInteger)index
+                belowLimit:(NSUInteger)totalTensorCount
+                     error:(NSError **)error {
+  if (index >= totalTensorCount) {
+    NSString *errorDescription =
+        [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).",
+                                   (unsigned long)index, (unsigned long)(totalTensorCount - 1)];
+    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex
+                                   description:errorDescription
+                                         error:error];
+    return NO;
+  }
+
+  return YES;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
new file mode 100644
index 0000000000..1776688288
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
@@ -0,0 +1,30 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLInterpreterOptions
+
+#pragma mark - Public
+
+- (instancetype)init {
+  self = [super init];
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
new file mode 100644
index 0000000000..190f0479ce
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
@@ -0,0 +1,23 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@implementation TFLQuantizationParameters
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
new file mode 100644
index 0000000000..f2f13e5e5f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
@@ -0,0 +1,42 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLTensor (Internal)
+
+/**
+ * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters.
+ *
+ * @param name Name of the tensor.
+ * @param dataType Data type of the tensor.
+ * @param shape Shape of the tensor.
+ * @param byteSize Size of the tensor data in number of bytes.
+ * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not
+ *     use quantization.
+ *
+ * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization
+ *     parameters.
+ */
+- (instancetype)initWithName:(NSString *)name
+                    dataType:(TFLTensorDataType)dataType
+                       shape:(NSArray<NSNumber *> *)shape
+                    byteSize:(NSUInteger)byteSize
+      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
new file mode 100644
index 0000000000..adb1c5ad2c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+#import "TFLTensor+Internal.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface TFLTensor ()
+
+// Redefines readonly properties.
+@property(nonatomic, copy) NSString *name;
+@property(nonatomic) TFLTensorDataType dataType;
+@property(nonatomic, copy) NSArray<NSNumber *> *shape;
+@property(nonatomic) NSUInteger byteSize;
+@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters;
+
+@end
+
+@implementation TFLTensor
+
+#pragma mark - TFLTensor (Internal)
+
+- (instancetype)initWithName:(NSString *)name
+                    dataType:(TFLTensorDataType)dataType
+                       shape:(NSArray<NSNumber *> *)shape
+                    byteSize:(NSUInteger)byteSize
+      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters {
+  self = [super init];
+  if (self != nil) {
+    _name = [name copy];
+    _dataType = dataType;
+    _shape = [shape copy];
+    _byteSize = byteSize;
+    _quantizationParameters = quantizationParameters;
+  }
+  return self;
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
new file mode 100644
index 0000000000..17c495fa18
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
@@ -0,0 +1,49 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+
+#import <XCTest/XCTest.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/**
+ * Unit tests for TFLInterpreterOptions.
+ */
+@interface TFLInterpreterOptionsTests : XCTestCase
+@end
+
+@implementation TFLInterpreterOptionsTests
+
+#pragma mark - Tests
+
+- (void)testInit {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  XCTAssertEqual(options.numberOfThreads, 0);
+}
+
+- (void)testSetNumberOfThread {
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  options.numberOfThreads = 2;
+  XCTAssertEqual(options.numberOfThreads, 2);
+  options.numberOfThreads = 0;
+  XCTAssertEqual(options.numberOfThreads, 0);
+  options.numberOfThreads = 3;
+  XCTAssertEqual(options.numberOfThreads, 3);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
new file mode 100644
index 0000000000..9e6319a732
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
@@ -0,0 +1,266 @@
+// Copyright 2018 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
+
+#import <XCTest/XCTest.h>
+
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
+#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+/** Model resource name. */
+static NSString *const kAddModelResourceName = @"add";
+
+/** Model resource type. */
+static NSString *const kAddModelResourceType = @"bin";
+
+/** Rank of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorRank = 1U;
+
+/** Size of the first (and only) dimension of the input and output tensor in the Add model. */
+static const NSUInteger kAddModelTensorFirstDimensionSize = 2U;
+
+/** Invalid input tensor index. */
+static const NSUInteger kInvalidInputTensorIndex = 1U;
+
+/** Invalid output tensor index. */
+static const NSUInteger kInvalidOutputTensorIndex = 1U;
+
+/** Accurary used in comparing floating numbers. */
+static const float kTestAccuracy = 1E-5F;
+
+/**
+ * Unit tests for TFLInterpreter.
+ */
+@interface TFLInterpreterTests : XCTestCase
+
+/** Absolute path of the Add model resource. */
+@property(nonatomic, nullable) NSString *modelPath;
+
+/** Default interpreter using the Add model. */
+@property(nonatomic, nullable) TFLInterpreter *interpreter;
+
+@end
+
+@implementation TFLInterpreterTests
+
+#pragma mark - XCTestCase
+
+- (void)setUp {
+  [super setUp];
+
+  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
+  self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType];
+  self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
+  XCTAssertNotNil(self.interpreter);
+  XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]);
+}
+
+- (void)tearDown {
+  self.modelPath = nil;
+  self.interpreter = nil;
+
+  [super tearDown];
+}
+
+#pragma mark - Tests
+
+- (void)testSuccessfulFullRun {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter options.
+  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
+  XCTAssertNotNil(options);
+  options.numberOfThreads = 2;
+
+  // Creates the interpreter.
+  TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath
+                                                                        options:options];
+  XCTAssertNotNil(customInterpreter);
+
+  // Allocates memory for tensors.
+  NSError *error;
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies input and output tensor counts.
+  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
+  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
+
+  // Resizes the intput tensor.
+  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertNil(error);
+
+  // Re-allocates memory for tensors.
+  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(inputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
+  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32);
+  XCTAssertTrue([shape isEqualToArray:inputTensor.shape]);
+  XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:sizeof(float)];
+  XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertNil(error);
+
+  // Invokes the interpreter.
+  XCTAssertTrue([customInterpreter invokeWithError:&error]);
+  XCTAssertNil(error);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputTensor);
+  XCTAssertNil(error);
+  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
+  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32);
+  XCTAssertTrue([shape isEqualToArray:outputTensor.shape]);
+  XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
+
+  // Tries to query an invalid output tensor index.
+  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
+                                                                    error:&error];
+  XCTAssertNil(invalidOutputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+
+  // Gets the output tensor data.
+  error = nil;
+  NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error];
+  XCTAssertNotNil(outputData);
+  XCTAssertNil(error);
+  float output[kAddModelTensorFirstDimensionSize];
+  [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)];
+  XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy);
+  XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy);
+}
+
+- (void)testInitWithModelPath_invalidPath {
+  // Shape for both input and output tensor.
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+
+  // Creates the interpreter.
+  TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"];
+  XCTAssertNotNil(brokenInterpreter);
+  XCTAssertEqual(brokenInterpreter.inputTensorCount, 0);
+  XCTAssertEqual(brokenInterpreter.outputTensorCount, 0);
+
+  // Allocates memory for tensors.
+  NSError *error;
+  XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Resizes the intput tensor.
+  XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Verifies the input tensor.
+  TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error];
+  XCTAssertNil(inputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Copies the input data.
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:sizeof(float)];
+  XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Invokes the interpreter.
+  XCTAssertFalse([brokenInterpreter invokeWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Verifies the output tensor.
+  TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error];
+  XCTAssertNil(outputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+
+  // Gets the output tensor data.
+  NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error];
+  XCTAssertNil(outputData);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
+}
+
+- (void)testInvoke_beforeAllocation {
+  TFLInterpreter *interpreterWithoutAllocation =
+      [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
+  XCTAssertNotNil(interpreterWithoutAllocation);
+
+  NSError *error;
+  XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke);
+}
+
+- (void)testInputTensorAtIndex_invalidIndex {
+  NSError *error;
+  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex
+                                                          error:&error];
+  XCTAssertNil(inputTensor);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_invalidIndex {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex
+                                                    toShape:shape
+                                                      error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
+}
+
+- (void)testResizeInputTensorAtIndex_emptyShape {
+  NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testResizeInputTensorAtIndex_zeroDimensionSize {
+  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
+  shape[0] = [NSNumber numberWithUnsignedInteger:0];
+  NSError *error;
+  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
+}
+
+- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize {
+  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
+  float one = 1.f;
+  float three = 3.f;
+  [inputData appendBytes:&one length:sizeof(float)];
+  [inputData appendBytes:&three length:(sizeof(float) - 1)];
+  NSError *error;
+  XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
+  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..31b68c8f00 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -105,6 +105,7 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
     "//tensorflow/contrib/image:sparse_image_warp_test_data",
+    "//tools/build_defs/apple:ios.bzl",
 ]
 
 
-- 
GitLab


From 5be479930d3dcfa3edb863703b1d73b89d45f03c Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 9 Oct 2018 17:19:24 -0700
Subject: [PATCH 0655/1085] [XLA:GPU] Use CudnnConvKind in more places.

No functional change.

PiperOrigin-RevId: 216451881
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  1 +
 .../service/gpu/cudnn_convolution_runner.cc   | 99 ++++++++++---------
 .../xla/service/gpu/pad_for_tensor_cores.cc   | 84 +++++++++-------
 .../compiler/xla/service/gpu/pad_insertion.cc | 31 +++---
 4 files changed, 116 insertions(+), 99 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 0144d59097..62da43d68a 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -591,6 +591,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:hlo_creation_utils",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/compiler/xla/service:shape_inference",
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
index 89dd1bb272..a809c22b33 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
@@ -312,11 +312,12 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
 
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config,
                       conv->backend_config<CudnnConvBackendConfig>());
-  const auto& target = conv->custom_call_target();
+  TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(conv));
   const auto& lhs_shape = conv->operand(0)->shape();
   const auto& rhs_shape = conv->operand(1)->shape();
   const auto& conv_result_shape = conv->shape().tuple_shapes(0);
 
+  params.kind = kind;
   params.window = &conv->window();
   params.dnums = &conv->convolution_dimension_numbers();
   params.feature_group_count = conv->feature_group_count();
@@ -324,55 +325,55 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
       backend_config.algorithm(), backend_config.tensor_ops_enabled()));
   params.conv_result_scale = backend_config.conv_result_scale();
 
-  if (target == kCudnnConvForwardCallTarget) {
-    params.kind = CudnnConvKind::kForward;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &conv_result_shape;
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = result_buffer;
-  } else if (target == kCudnnConvBackwardInputCallTarget) {
-    params.kind = CudnnConvKind::kBackwardInput;
-    params.input_shape = &conv_result_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &lhs_shape;
-    params.input_buf = result_buffer;
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = operand_buffers[0];
-  } else if (target == kCudnnConvBackwardFilterCallTarget) {
-    params.kind = CudnnConvKind::kBackwardFilter;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &conv_result_shape;
-    params.output_shape = &rhs_shape;
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = result_buffer;
-    params.output_buf = operand_buffers[1];
-  } else if (target == kCudnnConvBiasActivationForwardCallTarget) {
-    params.kind = CudnnConvKind::kForwardActivation;
-    params.input_shape = &lhs_shape;
-    params.filter_shape = &rhs_shape;
-    params.output_shape = &conv_result_shape;
-    params.fusion.emplace();
-    auto& fusion = *params.fusion;
-    if (backend_config.activation_mode() <
-        static_cast<int64>(se::dnn::ActivationMode::kNumActivationModes)) {
-      fusion.mode = static_cast<se::dnn::ActivationMode>(
-          backend_config.activation_mode());
-    } else {
-      return InternalError("Bad activation mode: %s",
-                           backend_config.ShortDebugString());
-    }
-    fusion.side_input_scale = backend_config.side_input_scale();
-    params.input_buf = operand_buffers[0];
-    params.filter_buf = operand_buffers[1];
-    params.output_buf = result_buffer;
-    params.fusion->bias_buf = operand_buffers[2];
-    if (operand_buffers.size() >= 4) {
-      params.fusion->side_input_buf = operand_buffers[3];
+  switch (kind) {
+    case CudnnConvKind::kForward:
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &conv_result_shape;
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = result_buffer;
+      break;
+    case CudnnConvKind::kBackwardInput:
+      params.input_shape = &conv_result_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &lhs_shape;
+      params.input_buf = result_buffer;
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = operand_buffers[0];
+      break;
+    case CudnnConvKind::kBackwardFilter:
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &conv_result_shape;
+      params.output_shape = &rhs_shape;
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = result_buffer;
+      params.output_buf = operand_buffers[1];
+      break;
+    case CudnnConvKind::kForwardActivation: {
+      params.kind = CudnnConvKind::kForwardActivation;
+      params.input_shape = &lhs_shape;
+      params.filter_shape = &rhs_shape;
+      params.output_shape = &conv_result_shape;
+      params.fusion.emplace();
+      auto& fusion = *params.fusion;
+      if (backend_config.activation_mode() <
+          static_cast<int64>(se::dnn::ActivationMode::kNumActivationModes)) {
+        fusion.mode = static_cast<se::dnn::ActivationMode>(
+            backend_config.activation_mode());
+      } else {
+        return InternalError("Bad activation mode: %s",
+                             backend_config.ShortDebugString());
+      }
+      fusion.side_input_scale = backend_config.side_input_scale();
+      params.input_buf = operand_buffers[0];
+      params.filter_buf = operand_buffers[1];
+      params.output_buf = result_buffer;
+      params.fusion->bias_buf = operand_buffers[2];
+      if (operand_buffers.size() >= 4) {
+        params.fusion->side_input_buf = operand_buffers[3];
+      }
     }
-  } else {
-    return InternalError("Unexpected custom call target: %s", target);
   }
   return params;
 }
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index e3869b5c36..8f1f5a7bf5 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -105,38 +105,45 @@ static HloInstruction* PadInstruction(HloInstruction* instr,
 
 // Pads the input/output feature dimensions of the given cudnn convolution
 // custom-call to be multiples of kDesiredNumFeaturesFactor.
-static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
+static StatusOr<bool> PadFeaturesDims(HloCustomCallInstruction* conv) {
   CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0))
       << "conv must use 0 scratch bytes, i.e. this pass must be run "
          "before CudnnConvolutionAlgorithmPicker.";
 
-  const auto& target = conv->custom_call_target();
+  TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
   const auto& dnums = conv->convolution_dimension_numbers();
   auto* lhs = conv->mutable_operand(0);
   auto* rhs = conv->mutable_operand(1);
   const Shape& result_shape = conv->shape().tuple_shapes(0);
 
   Shape new_lhs_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBackwardFilterCallTarget) {
-      // LHS is "input".
-      return PadShape(lhs->shape(), {dnums.input_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+      case CudnnConvKind::kBackwardFilter:
+        // LHS is "input".
+        return PadShape(lhs->shape(), {dnums.input_feature_dimension()});
+      case CudnnConvKind::kBackwardInput:
+        // LHS is "output".
+        return PadShape(lhs->shape(), {dnums.output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    CHECK_EQ(target, kCudnnConvBackwardInputCallTarget);
-    // LHS is "output".
-    return PadShape(lhs->shape(), {dnums.output_feature_dimension()});
   }();
 
   Shape new_rhs_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBackwardInputCallTarget) {
-      // RHS is "filter".
-      return PadShape(rhs->shape(), {dnums.kernel_input_feature_dimension(),
-                                     dnums.kernel_output_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+      case CudnnConvKind::kBackwardInput:
+        // RHS is "filter".
+        return PadShape(rhs->shape(),
+                        {dnums.kernel_input_feature_dimension(),
+                         dnums.kernel_output_feature_dimension()});
+      case CudnnConvKind::kBackwardFilter:
+        // RHS is "output".
+        return PadShape(rhs->shape(), {dnums.output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget);
-    // RHS is "output".
-    return PadShape(rhs->shape(), {dnums.output_feature_dimension()});
   }();
 
   if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) &&
@@ -146,18 +153,21 @@ static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
   }
 
   Shape new_result_shape = [&] {
-    if (target == kCudnnConvForwardCallTarget) {
-      // Result is "output".
-      return PadShape(result_shape, {dnums.output_feature_dimension()});
+    switch (kind) {
+      case CudnnConvKind::kForward:
+        // Result is "output".
+        return PadShape(result_shape, {dnums.output_feature_dimension()});
+      case CudnnConvKind::kBackwardInput:
+        // Result is "input".
+        return PadShape(result_shape, {dnums.input_feature_dimension()});
+      case CudnnConvKind::kBackwardFilter:
+        // Result is "filter".
+        return PadShape(result_shape,
+                        {dnums.kernel_input_feature_dimension(),
+                         dnums.kernel_output_feature_dimension()});
+      case CudnnConvKind::kForwardActivation:
+        LOG(FATAL) << "Not yet implemented.";
     }
-    if (target == kCudnnConvBackwardInputCallTarget) {
-      // Result is "input".
-      return PadShape(result_shape, {dnums.input_feature_dimension()});
-    }
-    CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget);
-    // Result is "filter".
-    return PadShape(result_shape, {dnums.kernel_input_feature_dimension(),
-                                   dnums.kernel_output_feature_dimension()});
   }();
 
   // Check that padding wouldn't increase the total bytes read/written by this
@@ -223,16 +233,20 @@ static StatusOr<bool> PadFeaturesDims(HloInstruction* conv) {
   return true;
 }
 
-static std::vector<HloInstruction*> GetRelevantConvs(HloComputation* comp) {
-  std::vector<HloInstruction*> convs;
+static std::vector<HloCustomCallInstruction*> GetRelevantConvs(
+    HloComputation* comp) {
+  std::vector<HloCustomCallInstruction*> convs;
   for (HloInstruction* instr : comp->instructions()) {
-    if (IsCustomCallToDnnConvolution(*instr) &&
-        instr->operand(0)->shape().element_type() == F16 &&
+    if (!IsCustomCallToDnnConvolution(*instr)) {
+      continue;
+    }
+    auto* custom_call = Cast<HloCustomCallInstruction>(instr);
+    if (custom_call->operand(0)->shape().element_type() == F16 &&
         // TODO(timshen): Disable for fused conv for now. Implement it if it's
         // needed.
-        Cast<HloCustomCallInstruction>(instr)->custom_call_target() !=
+        custom_call->custom_call_target() !=
             kCudnnConvBiasActivationForwardCallTarget) {
-      convs.push_back(instr);
+      convs.push_back(custom_call);
     }
   }
   return convs;
@@ -241,7 +255,7 @@ static std::vector<HloInstruction*> GetRelevantConvs(HloComputation* comp) {
 StatusOr<bool> PadForTensorCores::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* comp : module->MakeNonfusionComputations()) {
-    for (HloInstruction* conv : GetRelevantConvs(comp)) {
+    for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
       TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv));
       changed |= result;
     }
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index b42a19e3a2..ae7abca7c6 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_creation_utils.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -378,25 +379,25 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
 
 StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
   bool changed = false;
-  std::vector<HloInstruction*> convs;
+  std::vector<HloCustomCallInstruction*> convs;
   for (auto* instr : computation->instructions()) {
     if (IsCustomCallToDnnConvolution(*instr)) {
-      convs.push_back(instr);
+      convs.push_back(Cast<HloCustomCallInstruction>(instr));
     }
   }
-  for (HloInstruction* instruction : convs) {
-    const auto& target = instruction->custom_call_target();
-    if (target == kCudnnConvForwardCallTarget ||
-        target == kCudnnConvBiasActivationForwardCallTarget) {
-      changed |= CanonicalizeForwardConvolution(instruction);
-    } else if (target == kCudnnConvBackwardFilterCallTarget) {
-      changed |= CanonicalizeBackwardFilterConvolution(instruction);
-    } else if (target == kCudnnConvBackwardInputCallTarget) {
-      changed |= CanonicalizeBackwardInputConvolution(instruction);
-    } else {
-      LOG(FATAL) << "Unknown custom call target for cudnn conv: "
-                 << instruction->ToString();
-    }
+  for (HloCustomCallInstruction* instruction : convs) {
+    TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instruction));
+    changed |= [&] {
+      switch (kind) {
+        case CudnnConvKind::kForward:
+        case CudnnConvKind::kForwardActivation:
+          return CanonicalizeForwardConvolution(instruction);
+        case CudnnConvKind::kBackwardInput:
+          return CanonicalizeBackwardInputConvolution(instruction);
+        case CudnnConvKind::kBackwardFilter:
+          return CanonicalizeBackwardFilterConvolution(instruction);
+      }
+    }();
   }
   return changed;
 }
-- 
GitLab


From ee1cb110360b12d752c9cb4ebbb76d33930f67d7 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Tue, 9 Oct 2018 17:23:45 -0700
Subject: [PATCH 0656/1085] Move tflite_convert g3docs, so they will be pulled
 into the site.

PiperOrigin-RevId: 216452447
---
 tensorflow/contrib/lite/g3doc/_book.yaml      |  9 ++++
 .../tflite_convert}/cmdline_examples.md       | 54 ++++++++-----------
 .../tflite_convert}/cmdline_reference.md      | 17 ++----
 .../lite/g3doc/tflite_convert/index.md        | 22 ++++++++
 .../tflite_convert}/python_api.md             | 29 ++--------
 .../tflite_convert}/toco_landscape.svg        |  0
 tensorflow/contrib/lite/toco/g3doc/README.md  |  3 ++
 7 files changed, 63 insertions(+), 71 deletions(-)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_examples.md (90%)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/cmdline_reference.md (93%)
 create mode 100644 tensorflow/contrib/lite/g3doc/tflite_convert/index.md
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/python_api.md (89%)
 rename tensorflow/contrib/lite/{toco/g3doc => g3doc/tflite_convert}/toco_landscape.svg (100%)
 create mode 100644 tensorflow/contrib/lite/toco/g3doc/README.md

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index de6914e536..f6ec387ad2 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -38,6 +38,15 @@ upper_tabs:
         path: /lite/ios
       - title: TensorFlow Lite for Raspberry Pi
         path: /lite/rpi
+      - heading: TFLite Converter
+      - title: Overview
+        path: /lite/tflite_convert/
+      - title: Python API
+        path: /lite/tflite_convert/python_api
+      - title: Command Line Examples
+        path: /lite/tflite_convert/cmdline_examples
+      - title: Command Line Reference
+        path: /lite/tflite_convert/cmdline_reference
 
       - title: TF Mobile
         style: accordion
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
similarity index 90%
rename from tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
index e3c46eb377..d88acfae80 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
@@ -1,33 +1,8 @@
 # TensorFlow Lite Converter command-line examples
 
-This page shows how to use the TensorFlow Lite Converter in the command line. It
-is complemented by the following documents:
-
-*   [README](../README.md)
-*   [Command-line glossary](cmdline_reference.md)
-*   [Python API examples](python_api.md)
-
-Table of contents:
-
-*   [Command-line tools](#tools)
-    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
-*   [Basic examples](#basic)
-    *   [Convert a TensorFlow GraphDef](#graphdef)
-    *   [Convert a TensorFlow SavedModel](#savedmodel)
-    *   [Convert a tf.keras model](#keras)
-*   [Quantization](#quantization)
-    *   [Convert a TensorFlow GraphDef for quantized inference](#graphdef-quant)
-    *   [Use "dummy-quantization" to try out quantized inference on a float
-        graph](#dummy-quant)
-*   [Specifying input and output arrays](#specifying-input-and-output-arrays)
-    *   [Multiple input arrays](#multiple-input-arrays)
-    *   [Multiple output arrays](#multiple-output-arrays)
-    *   [Specifying subgraphs](#specifying-subgraphs)
-*   [Graph visualizations](#graph-visualizations)
-    *   [Using --output_format=GRAPHVIZ_DOT](#using-output-format-graphviz-dot)
-    *   [Using --dump_graphviz_dir](#using-dump-graphviz-dir)
-    *   [Graph "video" logging](#graph-video-logging)
-    *   [Legend for the graph visualizations](#graphviz-legend)
+This page shows how to use the TensorFlow Lite Converter in the command line.
+
+[TOC]
 
 ## Command-line tools <a name="tools"></a>
 
@@ -325,10 +300,23 @@ As before, these can be rendered to PDFs:
 dot -Tpdf -O /tmp/toco_*.dot
 ```
 
-Sample output files can be seen here:
-
-*   [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf)
-*   [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf).
+Sample output files can be seen here below. Note that it is the same
+`AveragePool` node in the top right of each image.
+
+<table><tr>
+  <td>
+    <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf">
+      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_before.png"/>
+    </a>
+  </td>
+  <td>
+    <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf">
+      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_after.png"/>
+    </a>
+  </td>
+</tr>
+<tr><td>before</td><td>after</td></tr>
+</table>
 
 ### Graph "video" logging
 
@@ -347,7 +335,7 @@ change was introduced in the graph.
     *   Some typically heavy operators (e.g. Conv) are rendered in a
         <span style="background-color:#c53929;color:white;border:1px;border-style:solid;border-color:black;padding:1px">darker
         red</span>.
-*   Arrays are octogons with the following colors:
+*   Arrays are octagons with the following colors:
     *   Constant arrays are
         <span style="background-color:#4285f4;color:white;border:1px;border-style:solid;border-color:black;padding:1px">blue</span>.
     *   Activation arrays are gray:
diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
similarity index 93%
rename from tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
index 31200fd657..d65912fea6 100644
--- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
@@ -2,18 +2,9 @@
 
 This page is complete reference of command-line flags used by the TensorFlow
 Lite Converter's command line starting from TensorFlow 1.9 up until the most
-recent build of TensorFlow. It is complemented by the following other documents:
+recent build of TensorFlow.
 
-*   [README](../README.md)
-*   [Command-line examples](cmdline_examples.md)
-*   [Python API examples](python_api.md)
-
-Table of contents:
-
-*   [High-level flags](#high-level-flags)
-*   [Model flags](#model-flags)
-*   [Transformation flags](#transformation-flags)
-*   [Logging flags](#logging-flags)
+[TOC]
 
 ## High-level flags
 
@@ -32,7 +23,7 @@ files. The flag `--output_file` is always required. Additionally, either
 *   `--output_format`. Type: string. Default: `TFLITE`. Specifies the format of
     the output file. Allowed values:
     *   `TFLITE`: TensorFlow Lite FlatBuffer format.
-    *   `GRAPHVIZ_DOT`: GraphViz `.dot` format containg a visualization of the
+    *   `GRAPHVIZ_DOT`: GraphViz `.dot` format containing a visualization of the
         graph after graph transformations.
         *   Note that passing `GRAPHVIZ_DOT` to `--output_format` leads to loss
             of TFLite specific transformations. Therefore, the resulting
@@ -68,7 +59,7 @@ based on index.
 *   `--input_shapes`. Type: colon-separated list of comma-separated lists of
     integers. Each comma-separated list of integers gives the shape of one of
     the input arrays specified in
-    [TensorFlow convention](https://www.tensorflow.org/versions/r1.2/programmers_guide/dims_types#shape).
+    [TensorFlow convention](https://www.tensorflow.org/guide/dims_types#shape).
     *   Example: `--input_shapes=1,60,80,3` for a typical vision model means a
         batch size of 1, an input image height of 60, an input image width of
         80, and an input image depth of 3 (representing RGB channels).
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/index.md b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
new file mode 100644
index 0000000000..12ba0225f6
--- /dev/null
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
@@ -0,0 +1,22 @@
+# TensorFlow Lite Converter
+
+The TensorFlow Lite Converter converts TensorFlow graphs into
+TensorFlow Lite graphs. There are additional usages that are also detailed in
+the usage documentation.
+
+
+## Where the converter fits in the TensorFlow landscape
+
+Once an application developer has a trained TensorFlow model, the TensorFlow
+Lite Converter will accept
+that model and generate a TensorFlow Lite
+[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports
+[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
+frozen graphs (models generated via
+[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
+and `tf.Keras` model files.  The TensorFlow Lite FlatBuffer file can be shipped
+to client devices, generally mobile devices, where the TensorFlow Lite
+interpreter handles them on-device.  This flow is represented in the diagram
+below.
+
+![drawing](toco_landscape.svg)
diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
similarity index 89%
rename from tensorflow/contrib/lite/toco/g3doc/python_api.md
rename to tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
index 1f741360c6..e1c0e0c240 100644
--- a/tensorflow/contrib/lite/toco/g3doc/python_api.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
@@ -1,31 +1,10 @@
 # TensorFlow Lite Converter & Interpreter Python API reference
 
 This page provides examples on how to use the TensorFlow Lite Converter and the
-TensorFlow Lite interpreter using the Python API. It is complemented by the
-following documents:
-
-*   [README](../README.md)
-*   [Command-line examples](cmdline_examples.md)
-*   [Command-line glossary](cmdline_reference.md)
-
-Table of contents:
-
-*   [High-level overview](#high-level-overview)
-*   [API](#api)
-*   [Basic examples](#basic)
-    *   [Exporting a GraphDef from tf.Session](#basic-graphdef-sess)
-    *   [Exporting a GraphDef from file](#basic-graphdef-file)
-    *   [Exporting a SavedModel](#basic-savedmodel)
-    *   [Exporting a tf.keras File](#basic-keras-file)
-*   [Complex examples](#complex)
-    *   [Exporting a quantized GraphDef](#complex-quant)
-*   [TensorFlow Lite Python interpreter](#interpreter)
-    *   [Using the interpreter from a model file](#interpreter-file)
-    *   [Using the interpreter from model data](#interpreter-data)
-*   [Additional instructions](#additional-instructions)
-    *   [Build from source code](#latest-package)
-    *   [Converting models in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11)
-    *   [Converting models prior to TensorFlow 1.9](#pre-tensorflow-1.9)
+TensorFlow Lite interpreter using the Python API. 
+
+[TOC]
+
 
 ## High-level overview
 
diff --git a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg b/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
similarity index 100%
rename from tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg
rename to tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
diff --git a/tensorflow/contrib/lite/toco/g3doc/README.md b/tensorflow/contrib/lite/toco/g3doc/README.md
new file mode 100644
index 0000000000..2153b6cc63
--- /dev/null
+++ b/tensorflow/contrib/lite/toco/g3doc/README.md
@@ -0,0 +1,3 @@
+# TOCO
+
+These files have moved to [../../g3doc/tflite_convert](../../g3doc/tflite_convert)
-- 
GitLab


From eaebeb1d4d939fb9fd0b75e32a76151cb517bfb6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:24:11 -0700
Subject: [PATCH 0657/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216452496
---
 tensorflow/core/ops/ops.pbtxt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 05b97bffad..a8da95dea3 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15116,6 +15116,22 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "LookupTableRemoveV2"
+  input_arg {
+    name: "table_handle"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "keys"
+    type_attr: "Tin"
+  }
+  attr {
+    name: "Tin"
+    type: "type"
+  }
+  is_stateful: true
+}
 op {
   name: "LookupTableSize"
   input_arg {
@@ -17701,6 +17717,10 @@ op {
     name: "empty_key"
     type_attr: "key_dtype"
   }
+  input_arg {
+    name: "deleted_key"
+    type_attr: "key_dtype"
+  }
   output_arg {
     name: "table_handle"
     type: DT_RESOURCE
-- 
GitLab


From f0784e69761ef5b78480e9e8b1fd1aa558186646 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:35:41 -0700
Subject: [PATCH 0658/1085] Add support for modeling fast memory close to the
 processor/gpu

PiperOrigin-RevId: 216453979
---
 .../core/grappler/costs/cost_estimator.h      | 38 +++++++++-
 .../grappler/costs/op_level_cost_estimator.cc | 76 +++++++++++++------
 .../grappler/costs/op_level_cost_estimator.h  |  3 +-
 .../core/grappler/costs/virtual_scheduler.cc  | 31 +++++---
 4 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index 569d9da683..811e923b87 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -31,8 +31,37 @@ constexpr int64 kMemoryUnknown = -1ll;
 constexpr int64 kZeroMemory = 0ll;
 
 struct DeviceInfo {
-  double gigaops;     // Billions of operations executed per second.
-  double gb_per_sec;  // Bandwidth to main memory in GB per second.
+  // Billions of operations executed per second.
+  double gigaops;
+
+  // Bandwidth to main memory in GB per second.
+  double gb_per_sec;
+
+  // Read bandwidth to intermediate memory in GB per second.
+  double intermediate_read_gb_per_sec;
+
+  // Read bandwidth to intermediate memory in GB per second.
+  double intermediate_write_gb_per_sec;
+
+  DeviceInfo()
+      : gigaops(INFINITY),
+        gb_per_sec(INFINITY),
+        intermediate_read_gb_per_sec(INFINITY),
+        intermediate_write_gb_per_sec(INFINITY) {}
+
+  DeviceInfo(const DeviceInfo& input)
+      : gigaops(input.gigaops),
+        gb_per_sec(input.gb_per_sec),
+        intermediate_read_gb_per_sec(input.intermediate_read_gb_per_sec),
+        intermediate_write_gb_per_sec(input.intermediate_write_gb_per_sec) {}
+
+  DeviceInfo(double gigaops, double gb_per_sec,
+             double intermediate_read_gb_per_sec = INFINITY,
+             double intermediate_write_gb_per_sec = INFINITY)
+      : gigaops(gigaops),
+        gb_per_sec(gb_per_sec),
+        intermediate_read_gb_per_sec(intermediate_read_gb_per_sec),
+        intermediate_write_gb_per_sec(intermediate_write_gb_per_sec) {}
 };
 
 // Holds the set of things we might want to estimate or measure in Grappler.
@@ -101,6 +130,9 @@ struct Costs {
   // Memory access cost of running the graph.
   Duration memory_time;
 
+  // Intermediate memory access cost of running the graph
+  Duration intermediate_memory_time;
+
   // This field can be a very pessimistic estimate of the main memory
   // requirements of a graph. For example, it might assume that all activations
   // are live for all of a graph's execution.
@@ -146,6 +178,7 @@ Costs::Costs() {
   execution_time = Duration::zero();
   compute_time = Duration::zero();
   memory_time = Duration::zero();
+  intermediate_memory_time = Duration::zero();
   max_memory = kMemoryUnknown;
   persistent_memory = kMemoryUnknown;
   temporary_memory = kMemoryUnknown;
@@ -158,6 +191,7 @@ Costs Costs::ZeroCosts() {
   costs.execution_time = Duration::zero();
   costs.compute_time = Duration::zero();
   costs.memory_time = Duration::zero();
+  costs.intermediate_memory_time = Duration::zero();
   costs.max_memory = kZeroMemory;
   costs.persistent_memory = kZeroMemory;
   costs.temporary_memory = kZeroMemory;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index f363f2915f..76e5c989fc 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -420,7 +420,7 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
   DCHECK_LT(0, gflops) << device.DebugString();
   DCHECK_LT(0, gb_per_sec) << device.DebugString();
 
-  return {gflops, gb_per_sec};
+  return DeviceInfo(gflops, gb_per_sec);
 }
 
 Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const {
@@ -478,8 +478,8 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   bool unknown_shapes = false;
   const double input_size = CalculateInputSize(op_info, &unknown_shapes);
   const double output_size = CalculateOutputSize(op_info, &unknown_shapes);
-  const double total_io_bytes = input_size + output_size;
-  Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info);
+  Costs costs =
+      PredictOpCountBasedCost(operations, input_size, output_size, op_info);
   costs.inaccurate = unknown_shapes;
   costs.num_ops_with_unknown_shapes = unknown_shapes;
   costs.max_memory = output_size;
@@ -487,9 +487,13 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
 }
 
 Costs OpLevelCostEstimator::PredictOpCountBasedCost(
-    double operations, double total_io_bytes, const OpInfo& op_info) const {
+    double operations, double input_io_bytes, double output_io_bytes,
+    const OpInfo& op_info) const {
+  double total_io_bytes = input_io_bytes + output_io_bytes;
   const DeviceInfo device_info = GetDeviceInfo(op_info.device());
-  if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0) {
+  if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0 ||
+      device_info.intermediate_read_gb_per_sec <= 0 ||
+      device_info.intermediate_write_gb_per_sec <= 0) {
     VLOG(1) << "BAD DEVICE. Op:" << op_info.op()
             << " device type:" << op_info.device().type()
             << " device model:" << op_info.device().model();
@@ -504,9 +508,29 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3
           << " Memory Time (ns):" << memory_cost.count();
 
+  // Check if bytes > 0.  If it's not and the bandwidth is set to infinity
+  // then the result would be undefined.
+  double intermediate_read_time =
+      (input_io_bytes > 0)
+          ? std::ceil(input_io_bytes / device_info.intermediate_read_gb_per_sec)
+          : 0;
+
+  double intermediate_write_time =
+      (output_io_bytes > 0)
+          ? std::ceil(output_io_bytes /
+                      device_info.intermediate_write_gb_per_sec)
+          : 0;
+
+  Costs::NanoSeconds intermediate_memory_cost(intermediate_read_time +
+                                              intermediate_write_time);
+  VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3
+          << " Intermediate Memory Time (ns):"
+          << intermediate_memory_cost.count();
+
   Costs costs;
   costs.compute_time = compute_cost;
   costs.memory_time = memory_cost;
+  costs.intermediate_memory_time = intermediate_memory_cost;
   CombineCostsAndUpdateExecutionTime(&costs);
   return costs;
 }
@@ -1273,8 +1297,8 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice(
         CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes);
   }
 
-  const double total_io = input_size + output_size;
-  Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info);
+  Costs costs =
+      PredictOpCountBasedCost(op_count, input_size, output_size, op_info);
   costs.inaccurate = unknown_shapes;
   costs.num_ops_with_unknown_shapes = unknown_shapes;
   costs.max_memory = output_size;
@@ -1291,12 +1315,15 @@ Costs OpLevelCostEstimator::PredictFusedOp(
   // operations here; so we simply add the compute times of each component
   // operation, then update the execution time.
   Costs fused_cost = PredictOpCountBasedCost(0, op_context.op_info);
+
   fused_cost.compute_time = 0;
   fused_cost.inaccurate = false;
   for (auto& fused_op : fused_op_contexts) {
     auto op_cost = PredictCosts(fused_op);
+
     fused_cost.compute_time += op_cost.compute_time;
     fused_cost.inaccurate |= op_cost.inaccurate;
+    fused_cost.intermediate_memory_time += op_cost.intermediate_memory_time;
   }
 
   CombineCostsAndUpdateExecutionTime(&fused_cost);
@@ -1415,8 +1442,8 @@ Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const {
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1458,8 +1485,8 @@ Costs OpLevelCostEstimator::PredictMaxPoolGrad(
   const double total_output_size =
       CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1491,8 +1518,8 @@ Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const {
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1544,8 +1571,8 @@ Costs OpLevelCostEstimator::PredictAvgPoolGrad(
   const double total_output_size =
       CalculateOutputSize(op_info, &found_unknown_shapes);
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size, op_info);
+  Costs costs = PredictOpCountBasedCost(ops, total_input_size,
+                                        total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1590,9 +1617,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNorm(
     total_output_size = size_nhwc;
   }
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size + total_internal_read_size,
-      op_info);
+  Costs costs =
+      PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size,
+                              total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1624,9 +1651,9 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
   double total_internal_read_size = size_nhwc;
   double total_output_size = size_nhwc * 1 + size_c * 2;
 
-  Costs costs = PredictOpCountBasedCost(
-      ops, total_input_size + total_output_size + total_internal_read_size,
-      op_info);
+  Costs costs =
+      PredictOpCountBasedCost(ops, total_input_size + total_internal_read_size,
+                              total_output_size, op_info);
   costs.inaccurate = found_unknown_shapes;
   costs.num_ops_with_unknown_shapes = found_unknown_shapes;
   costs.max_memory = total_output_size;
@@ -1637,9 +1664,12 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
 void OpLevelCostEstimator::CombineCostsAndUpdateExecutionTime(
     Costs* costs) const {
   if (compute_memory_overlap_) {
-    costs->execution_time = std::max(costs->compute_time, costs->memory_time);
+    costs->execution_time =
+        std::max(costs->intermediate_memory_time,
+                 std::max(costs->compute_time, costs->memory_time));
   } else {
-    costs->execution_time = costs->compute_time + costs->memory_time;
+    costs->execution_time = costs->compute_time + costs->memory_time +
+                            costs->intermediate_memory_time;
   }
 }
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index dd1ee39cb2..84dd9213f7 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -54,7 +54,8 @@ class OpLevelCostEstimator {
   // Naive cost estimate based on the given operations count and the given total
   // io size in bytes. Sizes of op_info inputs and outputs are not taken into
   // consideration.
-  Costs PredictOpCountBasedCost(double operations, double total_io_bytes,
+  Costs PredictOpCountBasedCost(double operations, double input_io_bytes,
+                                double output_io_bytes,
                                 const OpInfo& op_info) const;
 
   // This family of routines counts the number of operations to perform the
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 5b93fb128f..5c5bdad1cb 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -47,6 +47,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
   result.execution_time += right.execution_time;
   result.compute_time += right.compute_time;
   result.memory_time += right.memory_time;
+  result.intermediate_memory_time += right.intermediate_memory_time;
 
   result.num_ops_total += right.num_ops_total;
   if (right.inaccurate) result.inaccurate = true;
@@ -825,23 +826,29 @@ Costs VirtualScheduler::Summary() const {
   VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count();
   VLOG(1) << "Expected compute time: " << graph_costs_.compute_time.count();
   VLOG(1) << "Expected memory time: " << graph_costs_.memory_time.count();
+  VLOG(1) << "Expected intermediate memory time: "
+          << graph_costs_.intermediate_memory_time.count();
   VLOG(1) << "Expected max memory: " << graph_costs_.max_memory;
   VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers;
   VLOG(1) << "Expected max per-op streaming buffers: "
           << graph_costs_.max_per_op_streaming;
 
-  VLOG(1) << "Per-op execution time / compute time / memory time:";
+  VLOG(1) << "Per-op execution time / compute time / memory time"
+          << " / intermediate memory time:";
   for (const auto& op_cost_pair : op_to_cost_) {
     const auto& op = op_cost_pair.first;
     const auto& cost = op_cost_pair.second.execution_time.count();
     const auto& compute_cost = op_cost_pair.second.compute_time.count();
     const auto& memory_cost = op_cost_pair.second.memory_time.count();
+    const auto& intermediate_memory_cost =
+        op_cost_pair.second.intermediate_memory_time.count();
     const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
     if (cost) {  // Skip printing out zero-cost ops.
       VLOG(1) << strings::Printf(
-          " + %30s : %c %10lld / %10lld / %10lld", op.c_str(),
+          " + %30s : %c %10lld / %10lld / %10lld / %10lld", op.c_str(),
           (is_op_cost_accurate ? ' ' : '~'), static_cast<int64>(cost),
-          static_cast<int64>(compute_cost), static_cast<int64>(memory_cost));
+          static_cast<int64>(compute_cost), static_cast<int64>(memory_cost),
+          static_cast<int64>(intermediate_memory_cost));
     }
   }
 
@@ -894,7 +901,8 @@ Costs VirtualScheduler::Summary() const {
             << " having unknown shapes";
 
     VLOG(1) << "Per-op execution time / compute time / memory time "
-               "(and memory usage at peak memory usage):";
+            << " / intermediate memory time"
+            << " (and memory usage at peak memory usage):";
 
     // Profile non-persistent op memory usage.
     for (const auto& node_port : state.mem_usage_snapshot_at_peak) {
@@ -910,6 +918,8 @@ Costs VirtualScheduler::Summary() const {
       const auto& cost = op_cost_pair.second.execution_time.count();
       const auto& compute_cost = op_cost_pair.second.compute_time.count();
       const auto& memory_cost = op_cost_pair.second.memory_time.count();
+      const auto& intermediate_memory_cost =
+          op_cost_pair.second.intermediate_memory_time.count();
       total_compute_time_ns += op_cost_pair.second.execution_time;
       const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
       if (!is_op_cost_accurate) {
@@ -927,12 +937,13 @@ Costs VirtualScheduler::Summary() const {
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << strings::Printf(" + %30s : %c %10lld / %10lld / %10lld",
-                                   op.c_str(),
-                                   (is_op_cost_accurate ? ' ' : '~'),
-                                   static_cast<int64>(cost),
-                                   static_cast<int64>(compute_cost),
-                                   static_cast<int64>(memory_cost))
+        VLOG(1) << strings::Printf(
+                       " + %30s : %c %10lld / %10lld / %10lld / %10lld",
+                       op.c_str(), (is_op_cost_accurate ? ' ' : '~'),
+                       static_cast<int64>(cost),
+                       static_cast<int64>(compute_cost),
+                       static_cast<int64>(memory_cost),
+                       static_cast<int64>(intermediate_memory_cost))
                 << " (" << strings::HumanReadableNumBytes(op_mem_usage) << " ["
                 << mem_usage_percent << "%] "
                 << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
-- 
GitLab


From ff1b492e75877c684b81335495b46b9503e89172 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Tue, 9 Oct 2018 17:40:24 -0700
Subject: [PATCH 0659/1085] Temporary code for fixing the Windows backslash
 issue

---
 .../kernels/data/matching_files_dataset_op.cc | 41 ++++++++++++++-----
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 74494a302c..0158b31d92 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -101,17 +101,29 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
         Status ret;
+        FileSystem* fs;
 
         while (!filepath_queue_.empty() ||
                current_pattern_index_ < dataset()->patterns_.size()) {
           // All the elements in the heap will be the matched filenames or the
           // potential directories.
           if (!filepath_queue_.empty()) {
-            const PathStatus current_path = filepath_queue_.top();
+            PathStatus current_path = filepath_queue_.top();
             filepath_queue_.pop();
 
+            TF_RETURN_IF_ERROR(
+                ctx->env()->GetFileSystemForFile(current_path.first, &fs));
+
             if (!current_path.second) {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
+
+              // Replace the forward slash by the backslash for Windows path
+              if (dataset()->patterns_[current_pattern_index_ - 1].find('\\') !=
+                  std::string::npos) {
+                std::replace(current_path.first.begin(),
+                             current_path.first.end(), '/', '\\');
+              }
+
               filepath_tensor.scalar<string>()() =
                   std::move(current_path.first);
               out_tensors->emplace_back(std::move(filepath_tensor));
@@ -122,10 +134,24 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             // In this case, current_path is a directory. Then continue the
             // search.
             ret.Update(
-                UpdateIterator(ctx, current_path.first, current_pattern_));
+                UpdateIterator(ctx, fs, current_path.first, current_pattern_));
           } else {
             // search a new pattern
             current_pattern_ = dataset()->patterns_[current_pattern_index_];
+            TF_RETURN_IF_ERROR(
+                ctx->env()->GetFileSystemForFile(current_pattern_, &fs));
+
+            // Windows paths contain backslashes and Windows APIs accept forward
+            // and backslashes equivalently, so we convert the pattern to use
+            // forward slashes exclusively. The backslash is used as the
+            // indicator of Windows paths. Note that this is not ideal, since
+            // the API expects backslash as an escape character, but no code
+            // appears to rely on this behavior
+            if (current_pattern_.find('\\') != std::string::npos) {
+              std::replace(current_pattern_.begin(), current_pattern_.end(),
+                           '\\', '/');
+            }
+
             StringPiece fixed_prefix =
                 StringPiece(current_pattern_)
                     .substr(0, current_pattern_.find_first_of("*?[\\"));
@@ -140,7 +166,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             std::cout << "Input pattern: " << current_pattern_
                       << "; Current dir: " << current_dir << std::endl;
 
-            ret.Update(UpdateIterator(ctx, current_dir, current_pattern_));
+            ret.Update(UpdateIterator(ctx, fs, current_dir, current_pattern_));
             ++current_pattern_index_;
           }
         }
@@ -207,18 +233,13 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       }
 
      private:
-      Status UpdateIterator(IteratorContext* ctx, const string& dir,
-                            const string& eval_pattern)
+      Status UpdateIterator(IteratorContext* ctx, FileSystem* fs,
+                            const string& dir, const string& eval_pattern)
           EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         StringPiece fixed_prefix =
             StringPiece(eval_pattern)
                 .substr(0, eval_pattern.find_first_of("*?[\\"));
 
-        FileSystem* fs;
-        Status fs_status = ctx->env()->GetFileSystemForFile(dir, &fs);
-        std::cout << "GetFileSystemForFile status: " << fs_status << std::endl;
-        TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(dir, &fs));
-
         filepath_queue_.push(PathStatus(dir, true));
         Status ret;  // Status to return
 
-- 
GitLab


From 75ee5ee51314feef5654ef315960c26d27d657a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 17:46:22 -0700
Subject: [PATCH 0660/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216455250

---
 tensorflow/go/op/wrappers.go | 111 ++++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index f35117084a..c6ecd75587 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4562,6 +4562,59 @@ func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Out
 	return decoded_indices, decoded_values, decoded_shape, log_probability
 }
 
+// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
+type CTCGreedyDecoderAttr func(optionalAttr)
+
+// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
+//
+// value: If True, merge repeated classes in output.
+// If not specified, defaults to false
+func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs greedy decoding on the logits given in inputs.
+//
+// A note about the attribute merge_repeated: if enabled, when
+// consecutive logits' maximum indices are the same, only the first of
+// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+// becomes "A B B" if merge_repeated = True and "A B B B B" if
+// merge_repeated = False.
+//
+// Regardless of the value of merge_repeated, if the maximum index of a given
+// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+// element is emitted.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+//
+// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
+// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
+// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
+// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
+// log-probabilities.
+func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCGreedyDecoder",
+		Input: []tf.Input{
+			inputs, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -18904,10 +18957,11 @@ func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2
 // Arguments:
 //	empty_key: The key used to represent empty key buckets internally. Must not
 // be used in insert or lookup operations.
+//
 //	value_dtype: Type of the table values.
 //
 // Returns Handle to a table.
-func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
+func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18918,7 +18972,7 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D
 	opspec := tf.OpSpec{
 		Type: "MutableDenseHashTableV2",
 		Input: []tf.Input{
-			empty_key,
+			empty_key, deleted_key,
 		},
 		Attrs: attrs,
 	}
@@ -33104,56 +33158,3 @@ func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_va
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
-
-// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
-type CTCGreedyDecoderAttr func(optionalAttr)
-
-// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If True, merge repeated classes in output.
-// If not specified, defaults to false
-func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs greedy decoding on the logits given in inputs.
-//
-// A note about the attribute merge_repeated: if enabled, when
-// consecutive logits' maximum indices are the same, only the first of
-// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-// becomes "A B B" if merge_repeated = True and "A B B B B" if
-// merge_repeated = False.
-//
-// Regardless of the value of merge_repeated, if the maximum index of a given
-// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-// element is emitted.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
-//
-// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
-// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
-// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
-// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
-// log-probabilities.
-func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCGreedyDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-- 
GitLab


From a8cc3cbdeb1563c05d75043c9901135f8b9be65a Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 17:50:47 -0700
Subject: [PATCH 0661/1085] Fix lite/kernels:add_test for Clang 8.0.0

PiperOrigin-RevId: 216455772
---
 tensorflow/contrib/lite/kernels/add_test.cc | 36 ++++++++++-----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc
index 0b58443211..261dd36ef0 100644
--- a/tensorflow/contrib/lite/kernels/add_test.cc
+++ b/tensorflow/contrib/lite/kernels/add_test.cc
@@ -108,7 +108,7 @@ TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(FloatAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -124,7 +124,7 @@ TEST(FloatAddOpModel, VariousInputShapes) {
 }
 
 TEST(FloatAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -161,7 +161,7 @@ TEST(IntegerAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(IntegerAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -176,7 +176,7 @@ TEST(IntegerAddOpModel, VariousInputShapes) {
 }
 
 TEST(IntegerAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -193,11 +193,11 @@ TEST(IntegerAddOpModel, WithBroadcast) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -217,11 +217,11 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
   const float kMin = -1.f;
   const float kMax = 32767.f / 32768.f;
   float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
@@ -240,12 +240,12 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
-                                                       {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
-                                                       {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {{-0.2, 0.6, 1.0, -0.1},
-                                                       {-0.2, 0.6, -0.1, 0.8}};
+  std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
+                                             {-0.8, 0.2, 0.7, 0.3}};
+  std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
+                                             {0.6, 0.4, -0.8, 0.5}};
+  std::vector<std::vector<float>> results = {{-0.2, 0.6, 1.0, -0.1},
+                                             {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
                           {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -262,7 +262,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
 
 TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
@@ -281,7 +281,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
 
 TEST(QuantizedAddOpModel, QuantizedWithBroadcast) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-- 
GitLab


From 2db20be49c660a0c475cb57fe0935791d66433ed Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 9 Oct 2018 17:59:06 -0700
Subject: [PATCH 0662/1085] Enable support for lambda functions in static
 analyses.

The CFG treats lambdas as ordinary expressions. The activity analysis ensures that variables masked by the lambda's arguments are not being tracked.

Note: lambdas do not allow direct modification (we exclude indirect mutation via function or methods).
PiperOrigin-RevId: 216456682
---
 tensorflow/python/autograph/pyct/cfg.py       |  4 --
 tensorflow/python/autograph/pyct/cfg_test.py  | 16 +++++++
 .../pyct/static_analysis/activity.py          | 44 ++++++++++++++++---
 .../pyct/static_analysis/activity_test.py     | 34 ++++++++++++++
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index ec733ea38f..fdfcd4dcc1 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -679,10 +679,6 @@ class AstToCfg(gast.NodeVisitor):
     self.cfgs[node] = self.builder.build()
     self.builder = self.builder_stack.pop()
 
-  def visit_Lambda(self, node):
-    # TODO(mdan): Treat like FunctionDef? That would be a separate CFG.
-    raise NotImplementedError()
-
   def visit_Return(self, node):
     self._process_exit_statement(node, gast.FunctionDef)
 
diff --git a/tensorflow/python/autograph/pyct/cfg_test.py b/tensorflow/python/autograph/pyct/cfg_test.py
index bd82e70f7d..d5870124bc 100644
--- a/tensorflow/python/autograph/pyct/cfg_test.py
+++ b/tensorflow/python/autograph/pyct/cfg_test.py
@@ -964,6 +964,22 @@ class AstToCfgTest(test.TestCase):
         ),
     )
 
+  def test_lambda_basic(self):
+
+    def test_fn(a):
+      a = lambda b: a + b
+      return a
+
+    graph, = self._build_cfg(test_fn).values()
+
+    self.assertGraphMatches(
+        graph,
+        (
+            ('a', 'a = lambda b: a + b', 'return a'),
+            ('a = lambda b: a + b', 'return a', None),
+        ),
+    )
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index cc159031ff..0ce410d522 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -146,8 +146,15 @@ class ActivityAnalyzer(transformer.Base):
   def __init__(self, context, parent_scope=None, add_unknown_symbols=False):
     super(ActivityAnalyzer, self).__init__(context)
     self.scope = Scope(parent_scope, None, add_unknown_symbols)
+
+    # Note: all these flags crucially rely on the respective nodes are
+    # leaves in the AST, that is, they cannot contain other statements.
     self._in_return_statement = False
     self._in_aug_assign = False
+    self._in_lambda = False
+    self._in_function_def_args = False
+
+    self._untracked_symbols = None
 
   @property
   def _in_constructor(self):
@@ -172,6 +179,13 @@ class ActivityAnalyzer(transformer.Base):
       return
     qn = anno.getanno(node, anno.Basic.QN)
 
+    # Ignore any untracked symbols.
+    if self._untracked_symbols:
+      if qn in self._untracked_symbols:
+        return
+      if qn.owner_set & set(self._untracked_symbols):
+        return
+
     if isinstance(node.ctx, gast.Store):
       self.scope.mark_modified(qn)
       if qn.is_composite and composite_writes_alter_parent:
@@ -181,12 +195,20 @@ class ActivityAnalyzer(transformer.Base):
     elif isinstance(node.ctx, gast.Load):
       self.scope.mark_read(qn)
     elif isinstance(node.ctx, gast.Param):
-      # Param contexts appear in function defs, so they have the meaning of
-      # defining a variable.
-      self.scope.mark_modified(qn)
-      self.scope.mark_param(qn, self.enclosing_entities[-1])
+      if self._in_function_def_args:
+        # In function defs have the meaning of defining a variable.
+        self.scope.mark_modified(qn)
+        self.scope.mark_param(qn, self.enclosing_entities[-1])
+      elif self._in_lambda:
+        assert isinstance(self._untracked_symbols, set)
+        self._untracked_symbols.add(qn)
+      else:
+        # TODO(mdan): Is this case even possible?
+        raise NotImplementedError(
+            'Param "{}" outside a function arguments or lambda.'.format(qn))
     else:
-      raise ValueError('Unknown context %s for node %s.' % (type(node.ctx), qn))
+      raise ValueError('Unknown context {} for node "{}".'.format(
+          type(node.ctx), qn))
 
     if self._in_return_statement:
       self.scope.mark_returned(qn)
@@ -294,6 +316,15 @@ class ActivityAnalyzer(transformer.Base):
       self.scope.merge_from(after_child)
     return parent
 
+  def visit_Lambda(self, node):
+    assert not self._in_lambda or self._in_function_def_args
+    self._in_lambda = True
+    self._untracked_symbols = set()
+    node = self.generic_visit(node)
+    self._untracked_symbols = None
+    self._in_lambda = False
+    return node
+
   def visit_arguments(self, node):
     return self._process_statement(node)
 
@@ -308,7 +339,10 @@ class ActivityAnalyzer(transformer.Base):
 
     # A separate Scope tracks the actual function definition.
     self._enter_scope(True)
+    assert not self._in_function_def_args
+    self._in_function_def_args = True
     node.args = self.visit(node.args)
+    self._in_function_def_args = False
 
     # Track the body separately. This is for compatibility reasons, it may not
     # be strictly needed.
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index 9a4f1bf09b..678199970c 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -427,6 +427,40 @@ class ActivityAnalyzerTest(test.TestCase):
     args_scope = anno.getanno(fn_node.args, anno.Static.SCOPE)
     self.assertSymbolSetsAre(('a', 'b'), args_scope.params.keys(), 'params')
 
+  def test_lambda_captures_reads(self):
+
+    def test_fn(a, b):
+      return lambda: a + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('a', 'b'), ())
+    # Nothing local to the lambda is tracked.
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
+  def test_lambda_params_are_isolated(self):
+
+    def test_fn(a, b):  # pylint: disable=unused-argument
+      return lambda a: a + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b',), ())
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
+  def test_lambda_complex(self):
+
+    def test_fn(a, b, c, d):  # pylint: disable=unused-argument
+      a = (lambda a, b, c: a + b + c)(d, 1, 2) + b
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('b', 'd'), ('a',))
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 48b24214dd5da842bd00414b46f3e46319c777ee Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Tue, 9 Oct 2018 18:47:55 -0700
Subject: [PATCH 0663/1085] Update model in keras dist strat learning phase
 test to return consistent values.

PiperOrigin-RevId: 216461637
---
 .../contrib/distribute/python/keras_test.py   | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 3511b7761f..6553642ad3 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -592,33 +592,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
     # meaningful values. Currently we don't pass the learning phase if the
     # Lambda layer uses the learning phase.
     with self.cached_session():
-      x = keras.layers.Input(shape=(16,), name='input')
-      y = keras.layers.Dense(16)(x)
+      x = keras.layers.Input(shape=(1,), name='input')
+      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
       z = keras.layers.Dropout(0.9999)(y)
       model = keras.Model(x, z)
+      initial_weights = model.get_weights()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.005)
       loss = 'mse'
       metrics = ['acc']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
+      strategy = mirrored_strategy.MirroredStrategy(
+          ['/device:GPU:0', '/device:GPU:1'])
 
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
 
-      inputs = np.random.rand(10, 16)
-      targets = np.ones((10, 16), dtype=np.float32)
+      inputs = np.ones((10, 1), dtype=np.float32)
+      targets = np.ones((10, 1), dtype=np.float32)
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(8)
-
-      hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1)
-      self.assertEqual(hist.history['acc'][0], 1)
+      dataset = dataset.repeat().batch(8)
+      hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
+      self.assertAlmostEqual(hist.history['acc'][0], 0, 0)
 
+      model.set_weights(initial_weights)
       evaluate_output = model.evaluate(dataset, steps=20)
-      self.assertEqual(evaluate_output[1], 0)
-
-      predict_output = model.predict(dataset, steps=1)
-      self.assertNotEqual(np.mean(predict_output), 0)
+      self.assertAlmostEqual(evaluate_output[1], 1, 0)
+
+      inputs = np.ones((10, 1), dtype=np.float32)
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+      predict_dataset = predict_dataset.repeat().batch(5)
+      output = model.predict(predict_dataset, steps=10)
+      ref_output = np.ones((50, 1), dtype=np.float32)
+      self.assertArrayNear(output[0], ref_output, 1e-1)
 
 
 class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
-- 
GitLab


From 9369994b4b2c4fe822d67a9f65384532cc09c99d Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Tue, 9 Oct 2018 19:06:55 -0700
Subject: [PATCH 0664/1085] Automated rollback of commit
 d78c747e9177fc93d43a580acef2b62eb1420859

PiperOrigin-RevId: 216463443
---
 tensorflow/contrib/lite/python/BUILD        |  2 ++
 tensorflow/contrib/lite/python/lite_test.py | 14 +++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD
index be6c44d306..916788f215 100644
--- a/tensorflow/contrib/lite/python/BUILD
+++ b/tensorflow/contrib/lite/python/BUILD
@@ -73,6 +73,7 @@ py_test(
     data = ["@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_windows",
     ],
     deps = [
@@ -171,6 +172,7 @@ py_test(
     srcs = ["convert_saved_model_test.py"],
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "no_windows",
     ],
     visibility = ["//visibility:public"],
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index ef9bbded2a..d243a494f6 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -591,19 +591,11 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
         'Unable to parse input file \'{}\'.'.format(graph_def_file),
         str(error.exception))
 
+  # TODO(nupurgarg): Test model loading in open source.
   def _initObjectDetectionArgs(self):
     # Initializes the arguments required for the object detection model.
-    # Looks for the model file which is saved in a different location interally
-    # and externally.
-    filename = resource_loader.get_path_to_datafile('testdata/tflite_graph.pb')
-    if not os.path.exists(filename):
-      filename = os.path.join(
-          resource_loader.get_root_dir_with_all_resources(),
-          '../tflite_mobilenet_ssd_quant_protobuf/tflite_graph.pb')
-      if not os.path.exists(filename):
-        raise IOError("File '{0}' does not exist.".format(filename))
-
-    self._graph_def_file = filename
+    self._graph_def_file = resource_loader.get_path_to_datafile(
+        'testdata/tflite_graph.pb')
     self._input_arrays = ['normalized_input_image_tensor']
     self._output_arrays = [
         'TFLite_Detection_PostProcess', 'TFLite_Detection_PostProcess:1',
-- 
GitLab


From 93eef55c4d04af24a6c8080f34629db179634f07 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 19:07:52 -0700
Subject: [PATCH 0665/1085] Automated rollback of commit
 9bd459e4ceba14f9bb1af98d52a109325de952e8

PiperOrigin-RevId: 216463491
---
 .../contrib/lite/experimental/objc/BUILD      |  94 ----
 .../contrib/lite/experimental/objc/README.md  |  10 -
 .../Configs/TensorFlowLiteObjc.tulsigen       |  60 ---
 .../project.tulsiconf                         |  17 -
 .../experimental/objc/apis/TFLInterpreter.h   | 188 --------
 .../objc/apis/TFLInterpreterOptions.h         |  37 --
 .../objc/apis/TFLQuantizationParameters.h     |  36 --
 .../lite/experimental/objc/apis/TFLTensor.h   |  77 ---
 .../experimental/objc/sources/TFLErrorUtil.h  |  51 --
 .../experimental/objc/sources/TFLErrorUtil.m  |  45 --
 .../objc/sources/TFLInterpreter.mm            | 440 ------------------
 .../objc/sources/TFLInterpreterOptions.m      |  30 --
 .../objc/sources/TFLQuantizationParameters.m  |  23 -
 .../objc/sources/TFLTensor+Internal.h         |  42 --
 .../experimental/objc/sources/TFLTensor.m     |  54 ---
 .../objc/tests/TFLInterpreterOptionsTests.m   |  49 --
 .../objc/tests/TFLInterpreterTests.m          | 266 -----------
 .../tools/pip_package/pip_smoke_test.py       |   1 -
 18 files changed, 1520 deletions(-)
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/BUILD
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/README.md
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
 delete mode 100644 tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m

diff --git a/tensorflow/contrib/lite/experimental/objc/BUILD b/tensorflow/contrib/lite/experimental/objc/BUILD
deleted file mode 100644
index 236b96adb5..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/BUILD
+++ /dev/null
@@ -1,94 +0,0 @@
-# TensorFlow Lite Objective-C API.
-
-package(default_visibility = ["//visibility:private"])
-
-licenses(["notice"])  # Apache 2.0
-
-load("//tools/build_defs/apple:ios.bzl", "ios_unit_test")
-
-SOURCES = glob([
-    "sources/*.h",
-    "sources/*.m",
-    "sources/*.mm",
-])
-
-API_HEADERS = glob([
-    "apis/*.h",
-])
-
-MINIMUM_OS_VERSION = "8.0"
-
-# Compiler flags for building regular non-test libraries.
-RELEASE_COPTS = [
-    # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++.
-    "-Wall",
-    # Warns if functions, variables, and types marked with the deprecated attribute are being used.
-    "-Wdeprecated-declarations",
-    # Warns for errors in documentation.
-    "-Wdocumentation",
-    # Turns all warnings into errors.
-    "-Werror",
-    # Enables extra warning flags that are not enabled by -Wall.
-    "-Wextra",
-    # Warns if a global function is defined without a previous prototype declaration.
-    "-Wmissing-prototypes",
-    # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison.
-    "-Wno-sign-compare",
-    # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks.
-    "-Wno-unused-parameter",
-    # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable.
-    "-Wshadow",
-    # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of ().
-    "-Wstrict-prototypes",
-    # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet.
-    "-Wundeclared-selector",
-
-    # Turn off warnings for headers not part of TensorFlow Lite Objective-C API.
-    "--system-header-prefix=third_party/tensorflow/contrib/lite/experimental/c/",
-]
-
-# Compiler flags for building test libraries.
-TEST_COPTS = RELEASE_COPTS + [
-    # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument.
-    "-Wno-nonnull",
-    # Disables warning when a global or local variable or type declaration shadows another.
-    "-Wno-shadow",
-]
-
-objc_library(
-    name = "TensorFlowLiteObjCLib",
-    srcs = SOURCES,
-    hdrs = API_HEADERS,
-    copts = RELEASE_COPTS,
-    deps = [
-        "//tensorflow/contrib/lite/experimental/c:c_api",
-    ],
-    alwayslink = 1,
-)
-
-ios_unit_test(
-    name = "TensorFlowLiteObjCTests",
-    size = "small",
-    minimum_os_version = MINIMUM_OS_VERSION,
-    deps = [":TensorFlowLiteObjCTestLib"],
-)
-
-objc_library(
-    name = "TensorFlowLiteObjCTestLib",
-    testonly = 1,
-    srcs = glob([
-        "tests/*.m",
-    ]),
-    hdrs = glob([
-        "apis/*.h",
-        "sources/*.h",
-        "tests/*.h",
-    ]),
-    copts = TEST_COPTS,
-    resources = [
-        "//tensorflow/contrib/lite:testdata/add.bin",
-    ],
-    deps = [
-        ":TensorFlowLiteObjCLib",
-    ],
-)
diff --git a/tensorflow/contrib/lite/experimental/objc/README.md b/tensorflow/contrib/lite/experimental/objc/README.md
deleted file mode 100644
index e8f150b1e8..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# TensorFlow Lite Objective-C API
-
-## TensorFlowLiteObjc Tulsi Project
-
-Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by
-running the following command in Terminal from the root source directory:
-
-```shell
-generate_xcodeproj.sh --genconfig tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj
-```
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
deleted file mode 100644
index babb5902d3..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  "sourceFilters" : [
-    "third_party/tensorflow/contrib/lite",
-    "third_party/tensorflow/contrib/lite/experimental/c",
-    "third_party/tensorflow/contrib/lite/experimental/objc",
-    "third_party/tensorflow/contrib/lite/experimental/objc/apis",
-    "third_party/tensorflow/contrib/lite/experimental/objc/sources",
-    "third_party/tensorflow/contrib/lite/experimental/objc/tests",
-    "third_party/tensorflow/contrib/lite/kernels",
-    "third_party/tensorflow/contrib/lite/kernels/internal",
-    "third_party/tensorflow/contrib/lite/nnapi",
-    "third_party/tensorflow/contrib/lite/schema",
-  ],
-  "buildTargets" : [
-    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCLib",
-    "//third_party/tensorflow/contrib/lite/experimental/objc:TensorFlowLiteObjCTests",
-  ],
-  "projectName" : "TensorFlowLiteObjC",
-  "optionSet" : {
-    "LaunchActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildStartupOptionsRelease" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildOptionsRelease" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildOptionsDebug" : {
-      "p" : "$(inherited)"
-    },
-    "EnvironmentVariables" : {
-      "p" : "$(inherited)"
-    },
-    "BuildActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "CommandlineArguments" : {
-      "p" : "$(inherited)"
-    },
-    "TestActionPreActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "BazelBuildStartupOptionsDebug" : {
-      "p" : "$(inherited)"
-    },
-    "BuildActionPostActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "TestActionPostActionScript" : {
-      "p" : "$(inherited)"
-    },
-    "LaunchActionPostActionScript" : {
-      "p" : "$(inherited)"
-    }
-  },
-  "additionalFilePaths" : [
-    "third_party/tensorflow/contrib/lite/experimental/objc/BUILD",
-  ]
-}
diff --git a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
deleted file mode 100644
index 00299cd4cf..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "configDefaults" : {
-    "optionSet" : {
-      "BazelBuildOptionsDebug" : {
-        "p" : "--ios_minimum_os=8.0"
-      },
-      "BazelBuildOptionsRelease" : {
-        "p" : "--ios_minimum_os=8.0"
-      },
-    }
-  },
-  "projectName" : "TensorFlowLiteObjC",
-  "packages" : [
-    "third_party/tensorflow/contrib/lite/experimental/objc"
-  ],
-  "workspaceRoot" : "../../../../../../.."
-}
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
deleted file mode 100644
index c07ffc06ff..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-@class TFLInterpreterOptions;
-@class TFLTensor;
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLInterpreterErrorCode
- * This enum specifies various error codes related to `TFLInterpreter`.
- */
-typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) {
-  /** Provided tensor index is invalid. */
-  TFLInterpreterErrorCodeInvalidTensorIndex,
-
-  /** Input data has invalid byte size. */
-  TFLInterpreterErrorCodeInvalidInputByteSize,
-
-  /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */
-  TFLInterpreterErrorCodeInvalidShape,
-
-  /** Provided model cannot be loaded. */
-  TFLInterpreterErrorCodeFailedToLoadModel,
-
-  /** Failed to create `TFLInterpreter`. */
-  TFLInterpreterErrorCodeFailedToCreateInterpreter,
-
-  /** Failed to invoke `TFLInterpreter`. */
-  TFLInterpreterErrorCodeFailedToInvoke,
-
-  /** Failed to retrieve a tensor. */
-  TFLInterpreterErrorCodeFailedToGetTensor,
-
-  /** Failed to resize an input tensor. */
-  TFLInterpreterErrorCodeFailedToResizeInputTensor,
-
-  /** Failed to copy data into an input tensor. */
-  TFLInterpreterErrorCodeFailedToCopyDataToInputTensor,
-
-  /** Failed to get data from an output tensor. */
-  TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor,
-
-  /** Failed to allocate memory for tensors. */
-  TFLInterpreterErrorCodeFailedToAllocateTensors,
-
-  /** Operaton not allowed without allocating memory for tensors first. */
-  TFLInterpreterErrorCodeAllocateTensorsRequired,
-
-  /** Operaton not allowed without invoking the interpreter first. */
-  TFLInterpreterErrorCodeInvokeInterpreterRequired,
-};
-
-/**
- * A TensorFlow Lite model interpreter.
- */
-@interface TFLInterpreter : NSObject
-
-/** The total number of input tensors. 0 if the interpreter creation failed. */
-@property(nonatomic, readonly) NSUInteger inputTensorCount;
-
-/** The total number of output tensors. 0 if the interpreter creation failed. */
-@property(nonatomic, readonly) NSUInteger outputTensorCount;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-/**
- * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the
- * default interpreter options.
- *
- * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
- *
- * @return A new instance of `TFLInterpreter` with the given model and the default interpreter
- *     options.
- */
-- (instancetype)initWithModelPath:(NSString *)modelPath;
-
-/**
- * Initializes a new TensorFlow Lite interpreter instance with the given model file path and
- * options.
- *
- * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
- * @param options Options to use for configuring the TensorFlow Lite interpreter.
- *
- * @return A new instance of `TFLInterpreter` with the given model and options.
- */
-- (instancetype)initWithModelPath:(NSString *)modelPath
-                          options:(TFLInterpreterOptions *)options NS_DESIGNATED_INITIALIZER;
-
-/**
- * Invokes the interpreter to run inference.
- *
- * @param error An optional error parameter populated when there is an error in invoking the
- *     interpreter.
- *
- * @return Whether the invocation is successful. Returns NO if an error occurred.
- */
-- (BOOL)invokeWithError:(NSError **)error;
-
-/**
- * Returns the input tensor at the given index.
- *
- * @param index The index of an input tensor.
- * @param error An optional error parameter populated when there is an error in looking up the input
- *     tensor.
- *
- * @return The input tensor at the given index. `nil` if there is an error.
- */
-- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Returns the output tensor at the given index.
- *
- * @param index The index of an output tensor.
- * @param error An optional error parameter populated when there is an error in looking up the
- *     output tensor.
- *
- * @return The output tensor at the given index. `nil` if there is an error.
- */
-- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned
- * integers).
- *
- * @param index The index of an input tensor.
- * @param shape Shape that the given input tensor should be resized to. It should be an array of
- *     positive unsigned integer(s) containing the size of each dimension.
- * @param error An optional error parameter populated when there is an error in resizing the input
- *     tensor.
- *
- * @return Whether the input tensor was resized successfully. Returns NO if an error occurred.
- */
-- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
-                         toShape:(NSArray<NSNumber *> *)shape
-                           error:(NSError **)error;
-
-/**
- * Copies the given data into the input tensor at the given index. This is allowed only before the
- * interpreter is invoked.
- *
- * @param data The data to set. The byte size of the data must match what's required by the given
- *     input tensor.
- * @param index The index of an input tensor.
- * @param error An optional error parameter populated when there is an error in setting the data.
- *
- * @return Whether the data was set into the input tensor successfully. Returns NO if an error
- *     occurred.
- */
-- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Gets the data from the output tensor at the given index. The interpreter invocation has to
- * complete before the data can be retrieved from an output tensor.
- *
- * @param index The index of an output tensor.
- * @param error An optional error parameter populated when there is an error in getting the data.
- *
- * @return The data of the output tensor at the given index. `nil` if there is an error.
- */
-- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error;
-
-/**
- * Allocates memory for tensors.
- *
- * @param error An optional error parameter populated when there is an error in allocating memory.
- *
- * @return Whether memory allocation is successful. Returns NO if an error occurred.
- */
-- (BOOL)allocateTensorsWithError:(NSError **)error;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
deleted file mode 100644
index 6461fbf017..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Custom configuration options for a TensorFlow Lite interpreter. */
-@interface TFLInterpreterOptions : NSObject
-
-/**
- * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting
- * TensorFlow Lite to optimize the threading decision).
- */
-@property(nonatomic) NSUInteger numberOfThreads;
-
-/**
- * Initializes a new instance of `TFLInterpreterOptions`.
- *
- * @return A new instance of `TFLInterpreterOptions`.
- */
-- (instancetype)init NS_DESIGNATED_INITIALIZER;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
deleted file mode 100644
index 3d5cf793c5..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * Parameters for asymmetric quantization. Quantized values can be converted to float values using:
- * `realValue = scale * (quantizedValue - zeroPoint)`.
- */
-@interface TFLQuantizationParameters : NSObject
-
-/** Scale of asymmetric quantization. */
-@property(nonatomic, readonly) float scale;
-
-/** Zero point of asymmetric quantization. */
-@property(nonatomic, readonly) int32_t zeroPoint;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
deleted file mode 100644
index d08b8fc0e9..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-@class TFLQuantizationParameters;
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLTensorDataType
- * This enum specifies supported TensorFlow Lite tensor data types.
- */
-typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
-  /** Tensor data type not available. This indicates an error with the model. */
-  TFLTensorDataTypeNoType,
-
-  /** 32-bit single precision floating point. */
-  TFLTensorDataTypeFloat32,
-
-  /** 32-bit signed integer. */
-  TFLTensorDataTypeInt32,
-
-  /** 8-bit unsigned integer. */
-  TFLTensorDataTypeUInt8,
-
-  /** 64-bit signed integer. */
-  TFLTensorDataTypeInt64,
-
-  /** Boolean. */
-  TFLTensorDataTypeBool,
-
-  /** 16-bit signed integer. */
-  TFLTensorDataTypeInt16,
-};
-
-/**
- * An input or output tensor in a TensorFlow Lite model.
- */
-@interface TFLTensor : NSObject
-
-/** Name of the tensor. */
-@property(nonatomic, readonly, copy) NSString *name;
-
-/** Data type of the tensor. */
-@property(nonatomic, readonly) TFLTensorDataType dataType;
-
-/**
- * Shape of the tensor, an array of positive unsigned integer(s) containing the size of each
- * dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is
- * [2, 2, 3].
- */
-@property(nonatomic, readonly, copy) NSArray<NSNumber *> *shape;
-
-/** Number of bytes for the tensor data. */
-@property(nonatomic, readonly) NSUInteger byteSize;
-
-/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */
-@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
deleted file mode 100644
index b6fd4763d6..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import <Foundation/Foundation.h>
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Helper utility for error reporting. */
-@interface TFLErrorUtil : NSObject
-
-/**
- * Creates and returns an interpreter error with the given error code and description.
- *
- * @param code Error code.
- * @param description Error description.
- *
- * @return The created interpreter error with the given error code and description.
- */
-+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                          description:(NSString *)description;
-
-/**
- * Creates and saves an interpreter error with the given error code and description.
- *
- * @param code Error code.
- * @param description Error description.
- * @param error Pointer to where to save the created error. If `nil`, no error will be saved.
- */
-+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                         description:(NSString *)description
-                               error:(NSError **)error;
-
-/** Unavailable. */
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
deleted file mode 100644
index 756d69481c..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLErrorUtil.m
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "TFLErrorUtil.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Error domain of TensorFlow Lite interpreter related errors. */
-static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter";
-
-@implementation TFLErrorUtil
-
-#pragma mark - Public
-
-+ (NSError *)interpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                          description:(NSString *)description {
-  return [NSError errorWithDomain:TFLInterpreterErrorDomain
-                             code:code
-                         userInfo:@{NSLocalizedDescriptionKey : description}];
-}
-
-+ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code
-                         description:(NSString *)description
-                               error:(NSError **)error {
-  if (error) {
-    *error = [NSError errorWithDomain:TFLInterpreterErrorDomain
-                                 code:code
-                             userInfo:@{NSLocalizedDescriptionKey : description}];
-  }
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
deleted file mode 100644
index 0f940a5cf3..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreter.mm
+++ /dev/null
@@ -1,440 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-#import "TFLErrorUtil.h"
-#import "TFLTensor+Internal.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-#include "third_party/tensorflow/contrib/lite/experimental/c/c_api.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * @enum TFLTensorType
- * This enum specifies input or output tensor types.
- */
-typedef NS_ENUM(NSUInteger, TFLTensorType) {
-  /** Input tensor type. */
-  TFLTensorTypeInput,
-
-  /** Output tensor type. */
-  TFLTensorTypeOutput,
-};
-
-// Names used for indicating input or output in error messages.
-static NSString *const kTFLInputDirection = @"input";
-static NSString *const kTFLOutputDirection = @"output";
-
-/**
- * Error reporter for TFLInterpreter.
- *
- * @param user_data User data. Not used.
- * @param format Error message which may contain argument formatting specifiers.
- * @param args Values of the arguments in the error message.
- */
-static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) {
-  NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]);
-}
-
-@interface TFLInterpreter ()
-
-/** TFL_Interpreter backed by C API. */
-@property(nonatomic, nullable) TFL_Interpreter *interpreter;
-
-/**
- * An error in initializing the interpreter. If not `nil`, this error will be reported when the
- * interpreter is used.
- */
-@property(nonatomic, nullable) NSError *initializationError;
-
-@end
-
-@implementation TFLInterpreter
-
-#pragma mark - NSObject
-
-- (void)dealloc {
-  TFL_DeleteInterpreter(_interpreter);
-}
-
-#pragma mark - Public
-
-- (instancetype)initWithModelPath:(NSString *)modelPath {
-  return [self initWithModelPath:modelPath options:[[TFLInterpreterOptions alloc] init]];
-}
-
-- (instancetype)initWithModelPath:(NSString *)modelPath options:(TFLInterpreterOptions *)options {
-  self = [super init];
-
-  if (self != nil) {
-    const char *modelPathCString = modelPath.UTF8String;
-    NSString *pathErrorString =
-        [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath];
-    if (modelPathCString == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
-                                     description:pathErrorString];
-      return self;
-    }
-
-    TFL_Model *model = TFL_NewModelFromFile(modelPathCString);
-    if (model == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel
-                                     description:pathErrorString];
-      return self;
-    }
-
-    TFL_InterpreterOptions *cOptions = TFL_NewInterpreterOptions();
-    if (cOptions == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                     description:@"Failed to create the interpreter."];
-      TFL_DeleteModel(model);
-      return self;
-    }
-
-    if (options.numberOfThreads > 0) {
-      TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads);
-    }
-    TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr);
-
-    _interpreter = TFL_NewInterpreter(model, cOptions);
-    if (_interpreter == nullptr) {
-      _initializationError =
-          [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                     description:@"Failed to create the interpreter."];
-    } else {
-      _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter);
-      _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter);
-      if (_inputTensorCount <= 0 || _outputTensorCount <= 0) {
-        _initializationError =
-            [TFLErrorUtil interpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter
-                                       description:@"Failed to create the interpreter."];
-      }
-    }
-    TFL_DeleteInterpreterOptions(cOptions);
-    TFL_DeleteModel(model);
-  }
-
-  return self;
-}
-
-- (BOOL)invokeWithError:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke
-                                   description:@"Failed to invoke the interpreter."
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return nil;
-  }
-
-  return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error];
-}
-
-- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
-    return nil;
-  }
-
-  return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error];
-}
-
-- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index
-                         toShape:(NSArray<NSNumber *> *)shape
-                           error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return NO;
-  }
-
-  if (shape.count == 0) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
-                                   description:@"Invalid shape. Must not be empty."
-                                         error:error];
-    return NO;
-  }
-
-  int cDimensions[self.inputTensorCount];
-  for (int d = 0; d < shape.count; ++d) {
-    int dimension = shape[d].intValue;
-    if (dimension <= 0) {
-      NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers.";
-      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape
-                                     description:errorDescription
-                                           error:error];
-      return NO;
-    }
-    cDimensions[d] = dimension;
-  }
-
-  if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions,
-                                       (int32_t)shape.count) != kTfLiteOk) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) {
-    return NO;
-  }
-
-  TFL_Tensor *tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
-  if (tensor == nullptr) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get input tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
-  if (data.length != byteSize) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).",
-                         (unsigned long)index, byteSize, (unsigned long)data.length];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  if (TFL_TensorCopyFromBuffer(tensor, data.bytes, data.length) != kTfLiteOk) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).",
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-- (nullable NSData *)dataFromOutputTensorAtIndex:(NSUInteger)index error:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return nil;
-  }
-
-  if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) {
-    return nil;
-  }
-
-  const TFL_Tensor *tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
-  if (tensor == nullptr) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get output tensor at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil
-        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
-                         description:errorDescription
-                               error:error];
-    return nil;
-  }
-
-  void *bytes = TFL_TensorData(tensor);
-  NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(tensor);
-  if (bytes == nullptr || byteSize == 0) {
-    NSString *errorDescription = [NSString
-        stringWithFormat:@"Failed to get output tensor data at index (%lu).", (unsigned long)index];
-    [TFLErrorUtil
-        saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromOutputTensor
-                         description:errorDescription
-                               error:error];
-    return nil;
-  }
-
-  return [NSData dataWithBytes:bytes length:byteSize];
-}
-
-- (BOOL)allocateTensorsWithError:(NSError **)error {
-  if (self.initializationError != nil) {
-    [self saveInitializationErrorToDestination:error];
-    return NO;
-  }
-
-  if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) {
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors
-                                   description:@"Failed to allocate memory for tensors."
-                                         error:error];
-    return NO;
-  }
-  return YES;
-}
-
-#pragma mark - Private
-
-- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type
-                             atIndex:(NSUInteger)index
-                               error:(NSError **)error {
-  const TFL_Tensor *tensor = nullptr;
-  NSString *tensorType;
-  switch (type) {
-    case TFLTensorTypeInput:
-      tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index);
-      tensorType = kTFLInputDirection;
-      break;
-    case TFLTensorTypeOutput:
-      tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index);
-      tensorType = kTFLOutputDirection;
-      break;
-  }
-
-  if (tensor == nullptr) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType,
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-
-  const char *cName = TFL_TensorName(tensor);
-  if (cName == nullptr) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType,
-                                   (unsigned long)index];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-  NSString *name = [NSString stringWithUTF8String:cName];
-
-  TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)];
-
-  int32_t rank = TFL_TensorNumDims(tensor);
-  if (rank <= 0) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType,
-                                   (unsigned long)index, rank];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                   description:errorDescription
-                                         error:error];
-    return nil;
-  }
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank];
-  for (int32_t d = 0; d < rank; d++) {
-    int32_t dimension = TFL_TensorDim(tensor, d);
-    if (dimension <= 0) {
-      NSString *errorDescription =
-          [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).",
-                                     tensorType, (unsigned long)index, d, dimension];
-      [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor
-                                     description:errorDescription
-                                           error:error];
-      return nil;
-    }
-    shape[d] = @((NSUInteger)dimension);
-  }
-
-  // TODO: Set quantization parameters when C API supports it.
-  return [[TFLTensor alloc] initWithName:name
-                                dataType:dataType
-                                   shape:shape
-                                byteSize:(NSUInteger)TFL_TensorByteSize(tensor)
-                  quantizationParameters:nil];
-}
-
-- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType {
-  switch (cTensorType) {
-    case kTfLiteFloat32:
-      return TFLTensorDataTypeFloat32;
-    case kTfLiteInt32:
-      return TFLTensorDataTypeInt32;
-    case kTfLiteUInt8:
-      return TFLTensorDataTypeUInt8;
-    case kTfLiteInt64:
-      return TFLTensorDataTypeInt64;
-    case kTfLiteBool:
-      return TFLTensorDataTypeBool;
-    case kTfLiteInt16:
-      return TFLTensorDataTypeInt16;
-    case kTfLiteNoType:
-    case kTfLiteString:
-    case kTfLiteComplex64:
-      // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API.
-      return TFLTensorDataTypeNoType;
-  }
-}
-
-- (void)saveInitializationErrorToDestination:(NSError **)destination {
-  if (destination != NULL) {
-    *destination = self.initializationError;
-  }
-}
-
-- (BOOL)isValidTensorIndex:(NSUInteger)index
-                belowLimit:(NSUInteger)totalTensorCount
-                     error:(NSError **)error {
-  if (index >= totalTensorCount) {
-    NSString *errorDescription =
-        [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).",
-                                   (unsigned long)index, (unsigned long)(totalTensorCount - 1)];
-    [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex
-                                   description:errorDescription
-                                         error:error];
-    return NO;
-  }
-
-  return YES;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
deleted file mode 100644
index 1776688288..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLInterpreterOptions.m
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@implementation TFLInterpreterOptions
-
-#pragma mark - Public
-
-- (instancetype)init {
-  self = [super init];
-  return self;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
deleted file mode 100644
index 190f0479ce..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLQuantizationParameters.m
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLQuantizationParameters.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@implementation TFLQuantizationParameters
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
deleted file mode 100644
index f2f13e5e5f..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor+Internal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface TFLTensor (Internal)
-
-/**
- * Initializes a `TFLTensor` with the given name, data type, shape, and quantization parameters.
- *
- * @param name Name of the tensor.
- * @param dataType Data type of the tensor.
- * @param shape Shape of the tensor.
- * @param byteSize Size of the tensor data in number of bytes.
- * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not
- *     use quantization.
- *
- * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization
- *     parameters.
- */
-- (instancetype)initWithName:(NSString *)name
-                    dataType:(TFLTensorDataType)dataType
-                       shape:(NSArray<NSNumber *> *)shape
-                    byteSize:(NSUInteger)byteSize
-      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
deleted file mode 100644
index adb1c5ad2c..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/sources/TFLTensor.m
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-#import "TFLTensor+Internal.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-@interface TFLTensor ()
-
-// Redefines readonly properties.
-@property(nonatomic, copy) NSString *name;
-@property(nonatomic) TFLTensorDataType dataType;
-@property(nonatomic, copy) NSArray<NSNumber *> *shape;
-@property(nonatomic) NSUInteger byteSize;
-@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters;
-
-@end
-
-@implementation TFLTensor
-
-#pragma mark - TFLTensor (Internal)
-
-- (instancetype)initWithName:(NSString *)name
-                    dataType:(TFLTensorDataType)dataType
-                       shape:(NSArray<NSNumber *> *)shape
-                    byteSize:(NSUInteger)byteSize
-      quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters {
-  self = [super init];
-  if (self != nil) {
-    _name = [name copy];
-    _dataType = dataType;
-    _shape = [shape copy];
-    _byteSize = byteSize;
-    _quantizationParameters = quantizationParameters;
-  }
-  return self;
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
deleted file mode 100644
index 17c495fa18..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-
-#import <XCTest/XCTest.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-/**
- * Unit tests for TFLInterpreterOptions.
- */
-@interface TFLInterpreterOptionsTests : XCTestCase
-@end
-
-@implementation TFLInterpreterOptionsTests
-
-#pragma mark - Tests
-
-- (void)testInit {
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  XCTAssertNotNil(options);
-  XCTAssertEqual(options.numberOfThreads, 0);
-}
-
-- (void)testSetNumberOfThread {
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  options.numberOfThreads = 2;
-  XCTAssertEqual(options.numberOfThreads, 2);
-  options.numberOfThreads = 0;
-  XCTAssertEqual(options.numberOfThreads, 0);
-  options.numberOfThreads = 3;
-  XCTAssertEqual(options.numberOfThreads, 3);
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
deleted file mode 100644
index 9e6319a732..0000000000
--- a/tensorflow/contrib/lite/experimental/objc/tests/TFLInterpreterTests.m
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright 2018 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at:
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreter.h"
-
-#import <XCTest/XCTest.h>
-
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLInterpreterOptions.h"
-#import "third_party/tensorflow/contrib/lite/experimental/objc/apis/TFLTensor.h"
-
-NS_ASSUME_NONNULL_BEGIN
-
-/** Model resource name. */
-static NSString *const kAddModelResourceName = @"add";
-
-/** Model resource type. */
-static NSString *const kAddModelResourceType = @"bin";
-
-/** Rank of the input and output tensor in the Add model. */
-static const NSUInteger kAddModelTensorRank = 1U;
-
-/** Size of the first (and only) dimension of the input and output tensor in the Add model. */
-static const NSUInteger kAddModelTensorFirstDimensionSize = 2U;
-
-/** Invalid input tensor index. */
-static const NSUInteger kInvalidInputTensorIndex = 1U;
-
-/** Invalid output tensor index. */
-static const NSUInteger kInvalidOutputTensorIndex = 1U;
-
-/** Accurary used in comparing floating numbers. */
-static const float kTestAccuracy = 1E-5F;
-
-/**
- * Unit tests for TFLInterpreter.
- */
-@interface TFLInterpreterTests : XCTestCase
-
-/** Absolute path of the Add model resource. */
-@property(nonatomic, nullable) NSString *modelPath;
-
-/** Default interpreter using the Add model. */
-@property(nonatomic, nullable) TFLInterpreter *interpreter;
-
-@end
-
-@implementation TFLInterpreterTests
-
-#pragma mark - XCTestCase
-
-- (void)setUp {
-  [super setUp];
-
-  NSBundle *bundle = [NSBundle bundleForClass:[self class]];
-  self.modelPath = [bundle pathForResource:kAddModelResourceName ofType:kAddModelResourceType];
-  self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
-  XCTAssertNotNil(self.interpreter);
-  XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]);
-}
-
-- (void)tearDown {
-  self.modelPath = nil;
-  self.interpreter = nil;
-
-  [super tearDown];
-}
-
-#pragma mark - Tests
-
-- (void)testSuccessfulFullRun {
-  // Shape for both input and output tensor.
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-
-  // Creates the interpreter options.
-  TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init];
-  XCTAssertNotNil(options);
-  options.numberOfThreads = 2;
-
-  // Creates the interpreter.
-  TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.modelPath
-                                                                        options:options];
-  XCTAssertNotNil(customInterpreter);
-
-  // Allocates memory for tensors.
-  NSError *error;
-  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies input and output tensor counts.
-  XCTAssertEqual(customInterpreter.inputTensorCount, 1);
-  XCTAssertEqual(customInterpreter.outputTensorCount, 1);
-
-  // Resizes the intput tensor.
-  XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertNil(error);
-
-  // Re-allocates memory for tensors.
-  XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies the input tensor.
-  TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(inputTensor);
-  XCTAssertNil(error);
-  XCTAssertTrue([inputTensor.name isEqualToString:@"input"]);
-  XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32);
-  XCTAssertTrue([shape isEqualToArray:inputTensor.shape]);
-  XCTAssertEqual(inputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
-
-  // Copies the input data.
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:sizeof(float)];
-  XCTAssertTrue([customInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertNil(error);
-
-  // Invokes the interpreter.
-  XCTAssertTrue([customInterpreter invokeWithError:&error]);
-  XCTAssertNil(error);
-
-  // Verifies the output tensor.
-  TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(outputTensor);
-  XCTAssertNil(error);
-  XCTAssertTrue([outputTensor.name isEqualToString:@"output"]);
-  XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32);
-  XCTAssertTrue([shape isEqualToArray:outputTensor.shape]);
-  XCTAssertEqual(outputTensor.byteSize, sizeof(float) * kAddModelTensorFirstDimensionSize);
-
-  // Tries to query an invalid output tensor index.
-  TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex
-                                                                    error:&error];
-  XCTAssertNil(invalidOutputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-
-  // Gets the output tensor data.
-  error = nil;
-  NSData *outputData = [customInterpreter dataFromOutputTensorAtIndex:0 error:&error];
-  XCTAssertNotNil(outputData);
-  XCTAssertNil(error);
-  float output[kAddModelTensorFirstDimensionSize];
-  [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)];
-  XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy);
-  XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy);
-}
-
-- (void)testInitWithModelPath_invalidPath {
-  // Shape for both input and output tensor.
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-
-  // Creates the interpreter.
-  TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath"];
-  XCTAssertNotNil(brokenInterpreter);
-  XCTAssertEqual(brokenInterpreter.inputTensorCount, 0);
-  XCTAssertEqual(brokenInterpreter.outputTensorCount, 0);
-
-  // Allocates memory for tensors.
-  NSError *error;
-  XCTAssertFalse([brokenInterpreter allocateTensorsWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Resizes the intput tensor.
-  XCTAssertFalse([brokenInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Verifies the input tensor.
-  TFLTensor *inputTensor = [brokenInterpreter inputTensorAtIndex:0 error:&error];
-  XCTAssertNil(inputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Copies the input data.
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:sizeof(float)];
-  XCTAssertFalse([brokenInterpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Invokes the interpreter.
-  XCTAssertFalse([brokenInterpreter invokeWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Verifies the output tensor.
-  TFLTensor *outputTensor = [brokenInterpreter outputTensorAtIndex:0 error:&error];
-  XCTAssertNil(outputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-
-  // Gets the output tensor data.
-  NSData *outputData = [brokenInterpreter dataFromOutputTensorAtIndex:0 error:&error];
-  XCTAssertNil(outputData);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel);
-}
-
-- (void)testInvoke_beforeAllocation {
-  TFLInterpreter *interpreterWithoutAllocation =
-      [[TFLInterpreter alloc] initWithModelPath:self.modelPath];
-  XCTAssertNotNil(interpreterWithoutAllocation);
-
-  NSError *error;
-  XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke);
-}
-
-- (void)testInputTensorAtIndex_invalidIndex {
-  NSError *error;
-  TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex
-                                                          error:&error];
-  XCTAssertNil(inputTensor);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-}
-
-- (void)testResizeInputTensorAtIndex_invalidIndex {
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex
-                                                    toShape:shape
-                                                      error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex);
-}
-
-- (void)testResizeInputTensorAtIndex_emptyShape {
-  NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
-}
-
-- (void)testResizeInputTensorAtIndex_zeroDimensionSize {
-  NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank];
-  shape[0] = [NSNumber numberWithUnsignedInteger:0];
-  NSError *error;
-  XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape);
-}
-
-- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize {
-  NSMutableData *inputData = [NSMutableData dataWithCapacity:0];
-  float one = 1.f;
-  float three = 3.f;
-  [inputData appendBytes:&one length:sizeof(float)];
-  [inputData appendBytes:&three length:(sizeof(float) - 1)];
-  NSError *error;
-  XCTAssertFalse([self.interpreter copyData:inputData toInputTensorAtIndex:0 error:&error]);
-  XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize);
-}
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index 31b68c8f00..c6ef82ccdc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -105,7 +105,6 @@ BLACKLIST = [
     "//tensorflow/contrib/timeseries/python/timeseries:test_utils",
     "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils",  # pylint:disable=line-too-long
     "//tensorflow/contrib/image:sparse_image_warp_test_data",
-    "//tools/build_defs/apple:ios.bzl",
 ]
 
 
-- 
GitLab


From 58fcfc98cd59ae3952399fc55380b8733df08df9 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Tue, 9 Oct 2018 19:41:35 -0700
Subject: [PATCH 0666/1085] [XLA] Add documentation and HLO-level support for
 multi-value sort.

No support in any of the backends, and not yet exposed through XlaBuilder.

PiperOrigin-RevId: 216465753
---
 .../xla/service/algebraic_simplifier.cc       |  2 +-
 .../xla/service/algebraic_simplifier_test.cc  | 12 ++++++---
 .../service/bfloat16_normalization_test.cc    |  2 +-
 .../xla/service/hlo_dataflow_analysis_test.cc |  3 ++-
 .../compiler/xla/service/hlo_instruction.cc   | 17 ++++++------
 .../compiler/xla/service/hlo_instruction.h    |  4 +--
 .../compiler/xla/service/hlo_instructions.cc  |  9 +++----
 .../compiler/xla/service/hlo_instructions.h   |  2 +-
 tensorflow/compiler/xla/service/hlo_parser.cc | 20 +++-----------
 .../compiler/xla/service/hlo_parser_test.cc   | 15 +++++++++++
 .../compiler/xla/service/hlo_verifier.cc      | 22 +++++++++-------
 .../compiler/xla/service/shape_inference.cc   | 25 +++++++++++-------
 .../xla/service/shape_inference_test.cc       | 26 ++++++++++++++++++-
 .../service/tuple_points_to_analysis_test.cc  |  3 ++-
 tensorflow/compiler/xla/tests/test_utils.cc   |  6 +++--
 15 files changed, 104 insertions(+), 64 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 86d9dbea90..ca71f2cc12 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -2209,7 +2209,7 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) {
     }
     // If it is key/value sort, the output of sort is a tuple.
     return ReplaceWithNewInstruction(
-        sort, HloInstruction::CreateTuple({operand, sort->mutable_operand(1)}));
+        sort, HloInstruction::CreateTuple(sort->operands()));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 2047f894b4..42d1f337dc 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2133,16 +2133,20 @@ TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) {
   Shape values_shape = ShapeUtil::MakeShape(S32, {5, 0});
   auto keys = builder.AddInstruction(
       HloInstruction::CreateParameter(0, keys_shape, "keys"));
-  auto values = builder.AddInstruction(
-      HloInstruction::CreateParameter(1, values_shape, "values"));
+  auto values0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, values_shape, "values0"));
+  auto values1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(2, values_shape, "values1"));
   builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape, values_shape}), 0,
+      keys, {values0, values1}));
   auto module = CreateNewModule();
   HloComputation* computation = module->AddEntryComputation(builder.Build());
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
   ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
-  EXPECT_THAT(computation->root_instruction(), op::Tuple(keys, values));
+  EXPECT_THAT(computation->root_instruction(),
+              op::Tuple(keys, values0, values1));
 }
 
 // Used for TEST_Ps that test merging (or not) of a kPad instruction into a
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index cef0eba14e..2411fdcb20 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -284,7 +284,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) {
       HloInstruction::CreateParameter(1, s32_shape, "value"));
 
   HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, value));
+      ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, {value}));
   HloInstruction* gte = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0));
 
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index d27786d160..909853106d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -2346,7 +2346,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) {
   auto values = builder.AddInstruction(
       HloInstruction::CreateParameter(1, values_shape, "values"));
   auto sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys,
+      {values}));
 
   BuildModuleAndRunAnalysis(builder.Build());
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 09bcf8a9e7..c317e9e3b4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -195,17 +195,16 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       }
       break;
     case HloOpcode::kSort: {
-      TF_RET_CHECK(proto.operand_ids_size() == 1 ||
-                   proto.operand_ids_size() == 2)
-          << "Sort instruction should have 1 or 2 operands but has "
+      TF_RET_CHECK(proto.operand_ids_size() >= 1)
+          << "Sort instruction should have at least 1 operand but has "
           << proto.operand_ids_size();
       TF_RET_CHECK(proto.dimensions().size() == 1)
           << "Sort instruction should have 1 dimension";
-      HloInstruction* keys = operands(0);
-      HloInstruction* values =
-          proto.operand_ids_size() == 2 ? operands(1) : nullptr;
-      instruction =
-          CreateSort(proto.shape(), proto.dimensions(0), keys, values);
+      auto sort_operands = all_operands();
+      HloInstruction* keys = sort_operands[0];
+      instruction = CreateSort(
+          proto.shape(), proto.dimensions(0), keys,
+          absl::Span<HloInstruction* const>(sort_operands).subspan(1));
       break;
     }
     case HloOpcode::kTranspose:
@@ -1078,7 +1077,7 @@ HloInstruction::CreateBroadcastSequence(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSort(
     const Shape& shape, int64 dimension, HloInstruction* keys,
-    HloInstruction* values) {
+    absl::Span<HloInstruction* const> values) {
   return absl::make_unique<HloSortInstruction>(shape, dimension, keys, values);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 44f776ebac..93ff04b1e4 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -670,10 +670,10 @@ class HloInstruction {
       const Shape& shape, HloInstruction* operand,
       absl::Span<const int64> dimensions);
 
-  // Creates a sort op, with a keys operand, and an optional values operand.
+  // Creates a sort op, with a keys operand, and optional values operands.
   static std::unique_ptr<HloInstruction> CreateSort(
       const Shape& shape, int64 dimension, HloInstruction* keys,
-      HloInstruction* values = nullptr);
+      absl::Span<HloInstruction* const> values = {});
 
   // Creates a while instruction, given a condition computation, a body
   // computation, and the initial value for the input of the computations. For
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 2ec233eaec..179ace2cdb 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -600,11 +600,11 @@ std::unique_ptr<HloInstruction> HloReduceInstruction::CloneWithNewOperandsImpl(
 
 HloSortInstruction::HloSortInstruction(const Shape& shape, int64 dimension,
                                        HloInstruction* keys,
-                                       HloInstruction* values)
+                                       absl::Span<HloInstruction* const> values)
     : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) {
   AppendOperand(keys);
-  if (values) {
-    AppendOperand(values);
+  for (auto* value : values) {
+    AppendOperand(value);
   }
 }
 
@@ -633,9 +633,8 @@ std::unique_ptr<HloInstruction> HloSortInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   HloInstruction* keys = new_operands[0];
-  HloInstruction* values = new_operands.size() == 2 ? new_operands[1] : nullptr;
   return absl::make_unique<HloSortInstruction>(shape, dimensions(0), keys,
-                                               values);
+                                               new_operands.subspan(1));
 }
 
 HloTransposeInstruction::HloTransposeInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 4c5fc759a3..3a0b7490dc 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -418,7 +418,7 @@ class HloSortInstruction : public HloInstruction {
  public:
   explicit HloSortInstruction(const Shape& shape, int64 dimension,
                               HloInstruction* keys,
-                              HloInstruction* values = nullptr);
+                              absl::Span<HloInstruction* const> values = {});
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 96f9ff6654..128113f7a5 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -839,8 +839,6 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
       break;
     }
     case HloOpcode::kSort: {
-      auto loc = lexer_.GetLoc();
-
       optional<std::vector<tensorflow::int64>> dimensions;
       attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
                              &dimensions};
@@ -848,20 +846,10 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder,
           dimensions->size() != 1) {
         return false;
       }
-      switch (operands.size()) {
-        case 1:
-          instruction = builder->AddInstruction(HloInstruction::CreateSort(
-              shape, dimensions->at(0), /*keys=*/operands[0]));
-          break;
-        case 2:
-          instruction = builder->AddInstruction(HloInstruction::CreateSort(
-              shape, dimensions->at(0),
-              /*keys=*/operands[0], /*values=*/operands[1]));
-          break;
-        default:
-          return Error(loc, StrCat("expects either 1 or 2 operands, but has ",
-                                   operands.size(), " operands"));
-      }
+      instruction = builder->AddInstruction(HloInstruction::CreateSort(
+          shape, dimensions->at(0),
+          /*keys=*/operands[0],
+          /*values=*/absl::Span<HloInstruction* const>(operands).subspan(1)));
       break;
     }
     case HloOpcode::kTuple: {
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 17538c05bc..ef2e74588c 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1003,6 +1003,21 @@ ENTRY Sort {
   ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}) sort(keys, values), dimensions={0}
 }
 
+)"
+},
+// Sort (Key, Value, Value, Value)
+{
+"SortManyValues",
+R"(HloModule sort
+
+ENTRY Sort {
+  keys = f32[1024,16]{0,1} parameter(0)
+  values.0 = s32[1024,16]{0,1} parameter(1)
+  values.1 = u32[1024,16]{0,1} parameter(2)
+  values.2 = f32[1024,16]{0,1} parameter(3)
+  ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}, u32[1024,16]{0,1}, f32[1024,16]{0,1}) sort(keys, values.0, values.1, values.2), dimensions={0}
+}
+
 )"
 },
 // Conditional
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 620458855f..a1f668921d 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -266,18 +266,20 @@ Status ShapeVerifier::HandleReverse(HloInstruction* reverse) {
 }
 
 Status ShapeVerifier::HandleSort(HloInstruction* sort) {
-  if (sort->operand_count() < 1 || sort->operand_count() > 2) {
-    return InternalError("Expected 1 or 2 operands for %s instruction: %s",
+  if (sort->operand_count() < 1) {
+    return InternalError("Expected at least 1 operand for %s instruction: %s",
                          HloOpcodeString(sort->opcode()), sort->ToString());
   }
-  if (sort->operand_count() == 2 &&
-      !ShapeUtil::SameDimensions(sort->operand(0)->shape(),
-                                 sort->operand(1)->shape())) {
-    return InternalError(
-        "Expected sort to have to have the same dimensions for the keys and "
-        "the values. Keys shape is: %s\n, Values shape is: %s",
-        StringifyShape(sort->operand(0)->shape()),
-        StringifyShape(sort->operand(1)->shape()));
+  for (int64 operand = 1; operand < sort->operand_count(); ++operand) {
+    if (!ShapeUtil::SameDimensions(sort->operand(0)->shape(),
+                                   sort->operand(operand)->shape())) {
+      return InternalError(
+          "Expected sort to have to have the same dimensions for the keys "
+          "and the values. Keys shape is: %s\n, Values shape (operand index "
+          "%lld) is: %s",
+          StringifyShape(sort->operand(0)->shape()), operand,
+          StringifyShape(sort->operand(operand)->shape()));
+    }
   }
   return CheckVariadicShape(sort);
 }
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index e379911462..aa49f98bcf 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1029,17 +1029,22 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
     case HloOpcode::kSort: {
       if (operand_shapes.size() == 1) {
         return *operand_shapes[0];
-      } else if (operand_shapes.size() == 2) {
-        if (!ShapeUtil::SameDimensions(*operand_shapes[0],
-                                       *operand_shapes[1])) {
-          return InvalidArgument(
-              "Sort keys and values dimensions must match. "
-              "Keys shape is: %s\n, Values shape is: %s",
-              ShapeUtil::HumanString(*operand_shapes[0]),
-              ShapeUtil::HumanString(*operand_shapes[1]));
+      } else {
+        for (int64 operand = 1; operand < operand_shapes.size(); ++operand) {
+          if (!ShapeUtil::SameDimensions(*operand_shapes[0],
+                                         *operand_shapes[operand])) {
+            return InvalidArgument(
+                "Sort keys and values dimensions must match. "
+                "Keys shape is: %s\n, Values shape (operand index %lld) is: %s",
+                ShapeUtil::HumanString(*operand_shapes[0]), operand,
+                ShapeUtil::HumanString(*operand_shapes[operand]));
+          }
+        }
+        std::vector<Shape> operand_shape_values;
+        for (const Shape* operand_shape : operand_shapes) {
+          operand_shape_values.push_back(*operand_shape);
         }
-        return ShapeUtil::MakeTupleShape(
-            {*operand_shapes[0], *operand_shapes[1]});
+        return ShapeUtil::MakeTupleShape(operand_shape_values);
       }
       return InvalidArgument("Unexpected number of operands for sort");
     }
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 864ed43118..7b65e8c1c9 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -1618,13 +1618,37 @@ TEST_F(ShapeInferenceTest, BadSort) {
   auto values = ShapeUtil::MakeShape(F32, {5});
   StatusOr<Shape> statusor =
       ShapeInference::InferVariadicOpShape(HloOpcode::kSort, {&keys, &values});
-  ASSERT_FALSE(statusor.ok());
+  EXPECT_FALSE(statusor.ok());
+  EXPECT_THAT(statusor.status().error_message(),
+              HasSubstr("dimensions must match"))
+      << statusor.status();
+}
 
+TEST_F(ShapeInferenceTest, BadSortValuesMismatch) {
+  auto keys = ShapeUtil::MakeShape(F32, {4});
+  auto values_good = ShapeUtil::MakeShape(F32, {4});
+  auto values_bad = ShapeUtil::MakeShape(F32, {5});
+  StatusOr<Shape> statusor = ShapeInference::InferVariadicOpShape(
+      HloOpcode::kSort, {&keys, &values_good, &values_bad});
+  EXPECT_FALSE(statusor.ok());
   EXPECT_THAT(statusor.status().error_message(),
               HasSubstr("dimensions must match"))
       << statusor.status();
 }
 
+TEST_F(ShapeInferenceTest, SortManyValues) {
+  auto keys = ShapeUtil::MakeShape(F32, {4});
+  auto values_s32 = ShapeUtil::MakeShape(S32, {4});
+  auto values_u32 = ShapeUtil::MakeShape(U32, {4});
+  StatusOr<Shape> statusor = ShapeInference::InferVariadicOpShape(
+      HloOpcode::kSort, {&keys, &values_s32, &values_u32});
+  EXPECT_IS_OK(statusor);
+  Shape inferred_shape = statusor.ValueOrDie();
+  EXPECT_TRUE(ShapeUtil::Compatible(
+      inferred_shape,
+      ShapeUtil::MakeTupleShape({keys, values_s32, values_u32})));
+}
+
 class ScatterGatherShapeInferenceTest : public ShapeInferenceTest {
  protected:
   const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {});
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index a571bd571b..d9ebebf74e 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -1073,7 +1073,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) {
   auto values = builder.AddInstruction(
       HloInstruction::CreateParameter(1, values_shape, "values"));
   auto sort = builder.AddInstruction(HloInstruction::CreateSort(
-      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values));
+      ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys,
+      {values}));
 
   BuildModuleAndRunAnalysis(builder.Build());
 
diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc
index 5155f0c652..2f18036ff4 100644
--- a/tensorflow/compiler/xla/tests/test_utils.cc
+++ b/tensorflow/compiler/xla/tests/test_utils.cc
@@ -272,9 +272,11 @@ std::vector<HloInstruction*> FindConstrainedUses(
         constrained_uses.insert(constrained_uses.end(), converted_uses.begin(),
                                 converted_uses.end());
       } else if (opcode == HloOpcode::kSort &&
-                 instruction->operand_count() == 2 && op_num == 0) {
+                 instruction->operand_count() >= 2 && op_num == 0) {
         // Operand 0 of sort is the array of keys used for key/value
-        // (two-operand) kSort instructions.
+        // (two-operand) kSort instructions. Since sort stability is not
+        // guaranteed, constrain keys of key-value sort not to have duplicates,
+        // since otherwise the value order may legitimately differ.
         constrained_uses.push_back(instruction);
       }
     }
-- 
GitLab


From 854ae599743a1e92a31ad49cfe42c6454cefd3b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 20:05:22 -0700
Subject: [PATCH 0667/1085] Use Ophints to support TfLite
 UnidirectionaSequenceLstm and add an e2e test.

Support peephole and num_proj as well.

PiperOrigin-RevId: 216467578
---
 .../lite/experimental/examples/lstm/BUILD     |  40 ++
 .../experimental/examples/lstm/tflite_lstm.py | 396 ++++++++++++++++++
 .../lstm/unidirectional_sequence_lstm_test.py | 226 ++++++++++
 .../propagate_array_data_types.cc             |   6 +
 .../propagate_fixed_sizes.cc                  |  47 +++
 .../contrib/lite/toco/import_tensorflow.cc    |  44 ++
 tensorflow/contrib/lite/toco/model.h          |   6 +
 .../contrib/lite/toco/tflite/operator.cc      |  39 ++
 tensorflow/contrib/lite/toco/tooling_util.cc  |   5 +-
 .../tools/pip_package/pip_smoke_test.py       |   4 +
 10 files changed, 811 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
 create mode 100644 tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py

diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/BUILD b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD
new file mode 100644
index 0000000000..2125f218ca
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD
@@ -0,0 +1,40 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//tensorflow:internal"])
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+py_library(
+    name = "tflite_lstm",
+    srcs = ["tflite_lstm.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/python:framework",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "unidirectional_sequence_lstm_test",
+    size = "large",
+    srcs = ["unidirectional_sequence_lstm_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",
+        "no_pip",
+    ],
+    deps = [
+        ":tflite_lstm",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/examples/tutorials/mnist:input_data",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python/tools:optimize_for_inference",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
new file mode 100644
index 0000000000..2357743266
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py
@@ -0,0 +1,396 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TfLite LSTMCell wrapper.
+
+TODO(renjieliu): Find a better home for this one.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+
+from tensorflow.contrib.lite.python import lite
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import initializers
+from tensorflow.python.layers import base as base_layer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import rnn_cell_impl
+from tensorflow.python.platform import tf_logging as logging
+
+
+class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell):
+  """Long short-term memory unit (LSTM) recurrent network cell.
+
+  This is used only for TfLite, it provides hints and it also makes the
+  variables in the desired for the tflite ops  (transposed and seaparated).
+
+  The default non-peephole implementation is based on:
+
+    https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf
+
+  Felix Gers, Jurgen Schmidhuber, and Fred Cummins.
+  "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999.
+
+  The peephole implementation is based on:
+
+    https://research.google.com/pubs/archive/43905.pdf
+
+  Hasim Sak, Andrew Senior, and Francoise Beaufays.
+  "Long short-term memory recurrent neural network architectures for
+   large scale acoustic modeling." INTERSPEECH, 2014.
+
+  The class uses optional peep-hole connections, optional cell clipping, and
+  an optional projection layer.
+
+  Note that this cell is not optimized for performance. Please use
+  `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or
+  `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for
+  better performance on CPU.
+  """
+
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=None,
+               num_proj_shards=None,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=None,
+               reuse=None,
+               name=None,
+               dtype=None):
+    """Initialize the parameters for an LSTM cell.
+
+    Args:
+      num_units: int, The number of units in the LSTM cell.
+      use_peepholes: bool, set True to enable diagonal/peephole connections.
+      cell_clip: (optional) A float value, if provided the cell state is clipped
+        by this value prior to the cell output activation.
+      initializer: (optional) The initializer to use for the weight and
+        projection matrices.
+      num_proj: (optional) int, The output dimensionality for the projection
+        matrices.  If None, no projection is performed.
+      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
+        provided, then the projected values are clipped elementwise to within
+        `[-proj_clip, proj_clip]`.
+      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      forget_bias: Biases of the forget gate are initialized by default to 1 in
+        order to reduce the scale of forgetting at the beginning of the
+        training. Must set it manually to `0.0` when restoring from CudnnLSTM
+        trained checkpoints.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of the
+        `c_state` and `m_state`.  If False, they are concatenated along the
+        column axis.  This latter behavior will soon be deprecated.
+      activation: Activation function of the inner states.  Default: `tanh`.
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope.  If not `True`, and the existing scope already has
+        the given variables, an error is raised.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.  When
+        restoring from CudnnLSTM-trained checkpoints, use
+        `CudnnCompatibleLSTMCell` instead.
+    """
+    super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
+    # TODO(raziel): decide if we want to just support tuples (yes please!).
+    if not state_is_tuple:
+      logging.warn(
+          "%s: Using a concatenated state is slower and will soon be "
+          "deprecated.  Use state_is_tuple=True.", self)
+    if num_unit_shards is not None or num_proj_shards is not None:
+      logging.warn(
+          "%s: The num_unit_shards and proj_unit_shards parameters are "
+          "deprecated and will be removed in Jan 2017.  "
+          "Use a variable scope with a partitioner instead.", self)
+
+    # Inputs must be 2-dimensional.
+    # TODO(raziel): layers stuff -- chop if un-layerizing Op.
+    self.input_spec = base_layer.InputSpec(ndim=2)
+
+    self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceLstm")
+
+    self._num_units = num_units
+    self._use_peepholes = use_peepholes
+    self._cell_clip = cell_clip
+    self._initializer = initializer
+    self._num_proj = num_proj
+    self._proj_clip = proj_clip
+    self._num_unit_shards = num_unit_shards
+    self._num_proj_shards = num_proj_shards
+    self._forget_bias = forget_bias
+    self._state_is_tuple = state_is_tuple
+    self._activation = activation or math_ops.tanh
+
+    self._output_size = num_proj if num_proj else num_units
+    self._state_size = (
+        tf.nn.rnn_cell.LSTMStateTuple(num_units, self._output_size)
+        if state_is_tuple else num_units + self._output_size)
+
+  @property
+  def state_size(self):
+    return self._state_size
+
+  @property
+  def output_size(self):
+    return self._output_size
+
+  def build(self, inputs_shape):
+    """Build TfLite LSTM cell graph.
+
+    Args:
+      inputs_shape: The inputs_shape must be known, and is [batch_size,
+        input_size] shape.
+
+    Raises:
+      ValueError: if the inputs_shape is invalid.
+    """
+    if len(inputs_shape) != 2 or inputs_shape[1].value is None:
+      raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape)
+
+    input_depth = inputs_shape[1].value
+    maybe_partitioner = (
+        partitioned_variables.fixed_size_partitioner(self._num_unit_shards)
+        if self._num_unit_shards is not None else None)
+    input_weight_shape = [self._num_units, input_depth]
+    cell_weight_shape = [self._num_units, self._output_size]
+    bias_shape = [self._num_units]
+
+    def add_variable_wrapped(name, shape, initializer, index, partitioner):
+      var = self.add_variable(
+          name, shape=shape, initializer=initializer, partitioner=partitioner)
+      return self._tflite_wrapper.add_input(
+          var, name="name", index_override=index)
+
+    weight_initializer = self._initializer
+    if self.dtype is None:
+      bias_initializer = init_ops.zeros_initializer
+    else:
+      bias_initializer = init_ops.zeros_initializer(dtype=self.dtype)
+
+    self.input_to_input_w = add_variable_wrapped(
+        "input_to_input_w", input_weight_shape, weight_initializer, 1,
+        maybe_partitioner)
+    self.input_to_forget_w = add_variable_wrapped(
+        "input_to_forget_w", input_weight_shape, weight_initializer, 2,
+        maybe_partitioner)
+    self.input_to_cell_w = add_variable_wrapped(
+        "input_to_cell_w", input_weight_shape, weight_initializer, 3,
+        maybe_partitioner)
+    self.input_to_output_w = add_variable_wrapped(
+        "input_to_output_w", input_weight_shape, weight_initializer, 4,
+        maybe_partitioner)
+    self.cell_to_input_w = add_variable_wrapped(
+        "cell_to_input_w", cell_weight_shape, weight_initializer, 5,
+        maybe_partitioner)
+    self.cell_to_forget_w = add_variable_wrapped(
+        "cell_to_forget_w", cell_weight_shape, weight_initializer, 6,
+        maybe_partitioner)
+    self.cell_to_cell_w = add_variable_wrapped(
+        "cell_to_cell_w", cell_weight_shape, weight_initializer, 7,
+        maybe_partitioner)
+    self.cell_to_output_w = add_variable_wrapped(
+        "cell_to_output_w", cell_weight_shape, weight_initializer, 8,
+        maybe_partitioner)
+
+    self.input_bias = add_variable_wrapped(
+        "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner)
+    self.forget_bias = add_variable_wrapped(
+        "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner)
+    self.cell_bias = add_variable_wrapped(
+        "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner)
+    self.output_bias = add_variable_wrapped(
+        "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner)
+
+    # index 9, 10, 11.
+    # f stands for forget, i stands for input and o stands for output.
+    if self._use_peepholes:
+      self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units],
+                                            self._initializer, 9,
+                                            maybe_partitioner)
+      self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units],
+                                            self._initializer, 10,
+                                            maybe_partitioner)
+      self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units],
+                                            self._initializer, 11,
+                                            maybe_partitioner)
+
+    # index 16 for proj kernel.
+    if self._num_proj is not None:
+      maybe_proj_partitioner = (
+          partitioned_variables.fixed_size_partitioner(self._num_proj_shards)
+          if self._num_proj_shards is not None else None)
+      self._proj_kernel = add_variable_wrapped(
+          "projection/kernel", [self._num_proj, self._num_units],
+          self._initializer,
+          16,
+          partitioner=maybe_proj_partitioner)
+
+    self.built = True
+
+  def call(self, inputs, state):
+    """Run one step of LSTM.
+
+    Args:
+      inputs: input Tensor, 2D, `[batch, num_units]`.
+      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
+        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
+        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.
+
+    Returns:
+      A tuple containing:
+
+      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
+        LSTM after reading `inputs` when previous state was `state`.
+        Here output_dim is:
+           num_proj if num_proj was set,
+           num_units otherwise.
+      - Tensor(s) representing the new state of LSTM after reading `inputs` when
+        the previous state was `state`.  Same type and shape(s) as `state`.
+
+    Raises:
+      ValueError: If input size cannot be inferred from inputs via
+        static shape inference.
+    """
+    inputs = self._tflite_wrapper.add_input(
+        inputs, tag="input", name="input", aggregate="stack", index_override=0)
+
+    # Make sure inputs and bias_initializer has the same type.
+    assert inputs.dtype == self.input_to_input_w.dtype
+
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
+    sigmoid = math_ops.sigmoid
+
+    if self._state_is_tuple:
+      (c_prev, m_prev) = state
+    else:
+      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
+      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
+
+    # Note: For TfLite, cell_state is at index 19 while activation state at
+    # index 18.
+    c_prev = self._tflite_wrapper.add_input(
+        c_prev,
+        tag="c_prev",
+        name="c_prev",
+        aggregate="first",
+        index_override=19)
+    m_prev = self._tflite_wrapper.add_input(
+        m_prev,
+        tag="m_prev",
+        name="m_prev",
+        aggregate="first",
+        index_override=18)
+
+    input_size = inputs.get_shape().with_rank(2)[1]
+    if input_size.value is None:
+      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
+
+    inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1)
+
+    # i stands for input gate.
+    # f stands for forget gate activation.
+    # o outputs.
+    # j output of LSTM unit.
+    # c is the final state.
+    # m is the output.
+    i = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_input_w, self.cell_to_input_w], axis=1),
+            transpose_b=True), self.input_bias)
+    f = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_forget_w, self.cell_to_forget_w], axis=1),
+            transpose_b=True), self.forget_bias)
+    o = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_output_w, self.cell_to_output_w], axis=1),
+            transpose_b=True), self.output_bias)
+    j = nn_ops.bias_add(
+        tf.matmul(
+            inputs_and_m_prev,
+            tf.concat([self.input_to_cell_w, self.cell_to_cell_w], axis=1),
+            transpose_b=True), self.cell_bias)
+
+    # Diagonal connections
+    if self._use_peepholes:
+      c = (
+          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
+          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
+    else:
+      c = (
+          sigmoid(f + self._forget_bias) * c_prev +
+          sigmoid(i) * self._activation(j))
+
+    if self._cell_clip is not None:
+      # pylint: disable=invalid-unary-operand-type
+      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
+      # pylint: enable=invalid-unary-operand-type
+    if self._use_peepholes:
+      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
+    else:
+      m = sigmoid(o) * self._activation(c)
+
+    if self._num_proj is not None:
+      transposed_proj_kernel = tf.transpose(self._proj_kernel)
+      m = math_ops.matmul(m, transposed_proj_kernel)
+
+      if self._proj_clip is not None:
+        # pylint: disable=invalid-unary-operand-type
+        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
+        # pylint: enable=invalid-unary-operand-type
+
+    c = self._tflite_wrapper.add_output(
+        c, tag="c", name="c", aggregate="last", index_override=1)
+    m = self._tflite_wrapper.add_output(
+        m, tag="m", name="m", index_override=2, aggregate="stack")
+
+    new_state = (
+        tf.nn.rnn_cell.LSTMStateTuple(c, m)
+        if self._state_is_tuple else array_ops.concat([c, m], 1))
+    return m, new_state
+
+  def get_config(self):
+    config = {
+        "num_units": self._num_units,
+        "use_peepholes": self._use_peepholes,
+        "cell_clip": self._cell_clip,
+        "initializer": initializers.serialize(self._initializer),
+        "num_proj": self._num_proj,
+        "proj_clip": self._proj_clip,
+        "num_unit_shards": self._num_unit_shards,
+        "num_proj_shards": self._num_proj_shards,
+        "forget_bias": self._forget_bias,
+        "state_is_tuple": self._state_is_tuple,
+        "activation": activations.serialize(self._activation),
+        "reuse": self._reuse,
+    }
+    base_config = super(TFLiteLSTMCell, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
new file mode 100644
index 0000000000..2ca977518c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
@@ -0,0 +1,226 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tempfile
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.contrib.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell
+from tensorflow.examples.tutorials.mnist import input_data
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import test
+from tensorflow.python.tools import optimize_for_inference_lib
+
+# Number of steps to train model.
+TRAIN_STEPS = 1
+
+CONFIG = tf.ConfigProto(device_count={"GPU": 0})
+
+
+class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    tf.reset_default_graph()
+    # Import MNIST dataset
+    self.mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+
+    # Define constants
+    # Unrolled through 28 time steps
+    self.time_steps = 28
+    # Rows of 28 pixels
+    self.n_input = 28
+    # Learning rate for Adam optimizer
+    self.learning_rate = 0.001
+    # MNIST is meant to be classified in 10 classes(0-9).
+    self.n_classes = 10
+    # Batch size
+    self.batch_size = 16
+    # Lstm Units.
+    self.num_units = 64
+
+  def buildLstmLayer(self):
+    return tf.nn.rnn_cell.MultiRNNCell([
+        TFLiteLSTMCell(
+            self.num_units, use_peepholes=True, forget_bias=0, name="rnn1"),
+        TFLiteLSTMCell(self.num_units, num_proj=64, forget_bias=0, name="rnn2"),
+        TFLiteLSTMCell(
+            self.num_units // 2,
+            use_peepholes=True,
+            num_proj=64,
+            forget_bias=0,
+            name="rnn3"),
+        TFLiteLSTMCell(self.num_units, forget_bias=0, name="rnn4")
+    ])
+
+  def buildModel(self, lstm_layer, is_dynamic_rnn, is_train):
+    # Weights and biases for output softmax layer.
+    out_weights = tf.Variable(
+        tf.random_normal([self.num_units, self.n_classes]))
+    out_bias = tf.Variable(tf.random_normal([self.n_classes]))
+
+    # input image placeholder
+    x = tf.placeholder(
+        "float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE")
+
+    # For dynamic_rnn, train with dynamic_rnn and inference with static_rnn.
+    # x is shaped [batch_size,time_steps,num_inputs]
+    if is_dynamic_rnn:
+      if is_train:
+        lstm_input = x
+        outputs, _ = tf.nn.dynamic_rnn(lstm_layer, lstm_input, dtype="float32")
+        outputs = tf.unstack(outputs, axis=1)
+      else:
+        lstm_input = tf.unstack(x, self.time_steps, 1)
+        outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32")
+    else:
+      lstm_input = tf.unstack(x, self.time_steps, 1)
+      outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32")
+
+    # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units]
+    # by the softmax layer's out_weight of shape [num_units,n_classes]
+    # plus out_bias
+    prediction = tf.matmul(outputs[-1], out_weights) + out_bias
+    output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS")
+
+    return x, prediction, output_class
+
+  def trainModel(self, x, prediction, output_class, sess):
+    # input label placeholder
+    y = tf.placeholder("float", [None, self.n_classes])
+    # Loss function
+    loss = tf.reduce_mean(
+        tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
+    # Optimization
+    opt = tf.train.AdamOptimizer(
+        learning_rate=self.learning_rate).minimize(loss)
+
+    # Initialize variables
+    init = tf.global_variables_initializer()
+    sess.run(init)
+    for _ in range(TRAIN_STEPS):
+      batch_x, batch_y = self.mnist.train.next_batch(
+          batch_size=self.batch_size, shuffle=False)
+
+      batch_x = batch_x.reshape((self.batch_size, self.time_steps,
+                                 self.n_input))
+      sess.run(opt, feed_dict={x: batch_x, y: batch_y})
+
+  def saveAndRestoreModel(self, lstm_layer, sess, saver, is_dynamic_rnn):
+    model_dir = tempfile.mkdtemp()
+    saver.save(sess, model_dir)
+
+    # Reset the graph.
+    tf.reset_default_graph()
+    x, prediction, output_class = self.buildModel(
+        lstm_layer, is_dynamic_rnn, is_train=False)
+
+    new_sess = tf.Session(config=CONFIG)
+    saver = tf.train.Saver()
+    saver.restore(new_sess, model_dir)
+    return x, prediction, output_class, new_sess
+
+  def getInferenceResult(self, x, output_class, sess):
+    b1, _ = self.mnist.train.next_batch(batch_size=1)
+    sample_input = np.reshape(b1, (1, self.time_steps, self.n_input))
+
+    expected_output = sess.run(output_class, feed_dict={x: sample_input})
+    frozen_graph = tf.graph_util.convert_variables_to_constants(
+        sess, sess.graph_def, [output_class.op.name])
+    return sample_input, expected_output, frozen_graph
+
+  def tfliteInvoke(self, graph, test_inputs, outputs):
+    tf.reset_default_graph()
+    # Turn the input into placeholder of shape 1
+    tflite_input = tf.placeholder(
+        "float", [1, self.time_steps, self.n_input], name="INPUT_IMAGE_LITE")
+    tf.import_graph_def(graph, name="", input_map={"INPUT_IMAGE": tflite_input})
+    with tf.Session() as sess:
+      curr = sess.graph_def
+      curr = tf.contrib.lite.convert_op_hints_to_stubs(graph_def=curr)
+
+    curr = optimize_for_inference_lib.optimize_for_inference(
+        curr, ["INPUT_IMAGE_LITE"], ["OUTPUT_CLASS"],
+        [tf.float32.as_datatype_enum])
+
+    tflite = tf.contrib.lite.toco_convert(
+        curr, [tflite_input], [outputs], allow_custom_ops=False)
+    interpreter = tf.contrib.lite.Interpreter(model_content=tflite)
+
+    try:
+      interpreter.allocate_tensors()
+    except ValueError:
+      assert False
+
+    input_index = (interpreter.get_input_details()[0]["index"])
+    interpreter.set_tensor(input_index, test_inputs)
+    interpreter.invoke()
+    output_index = (interpreter.get_output_details()[0]["index"])
+    result = interpreter.get_tensor(output_index)
+    # Reset all variables so it will not pollute other inferences.
+    interpreter.reset_all_variables()
+    return result
+
+  def testStaticRnnMultiRnnCell(self):
+    sess = tf.Session(config=CONFIG)
+
+    x, prediction, output_class = self.buildModel(
+        self.buildLstmLayer(), is_dynamic_rnn=False, is_train=True)
+    self.trainModel(x, prediction, output_class, sess)
+
+    saver = tf.train.Saver()
+    x, prediction, output_class, new_sess = self.saveAndRestoreModel(
+        self.buildLstmLayer(), sess, saver, is_dynamic_rnn=False)
+
+    test_inputs, expected_output, frozen_graph = self.getInferenceResult(
+        x, output_class, new_sess)
+
+    result = self.tfliteInvoke(frozen_graph, test_inputs, output_class)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3))
+
+  def testDynamicRnnMultiRnnCell(self):
+    sess = tf.Session(config=CONFIG)
+
+    x, prediction, output_class = self.buildModel(
+        self.buildLstmLayer(), is_dynamic_rnn=True, is_train=True)
+    self.trainModel(x, prediction, output_class, sess)
+
+    # Since we don't yet support OpHints for dynamic, we will load the model
+    # back in as a static model. This requires the variables to have the same
+    # names as if they were trained as a static. Thus, we get rid of while/rnn
+    # names.
+    variables_to_save = {}
+    for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
+      op_name = i.name
+      if op_name.startswith("while/rnn/"):
+        op_name = op_name.split("while/rnn/")[1]
+      if op_name.endswith(":0"):
+        op_name = op_name.split(":0")[0]
+      variables_to_save[op_name] = i
+    saver = tf.train.Saver(variables_to_save)
+
+    x, prediction, output_class, new_sess = self.saveAndRestoreModel(
+        self.buildLstmLayer(), sess, saver, is_dynamic_rnn=True)
+
+    test_inputs, expected_output, frozen_graph = self.getInferenceResult(
+        x, output_class, new_sess)
+
+    result = self.tfliteInvoke(frozen_graph, test_inputs, output_class)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
index 40cd6dea82..47faa20a29 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@@ -239,6 +239,12 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op,
       }
       break;
     }
+    case OperatorType::kUnidirectionalSequenceLstm: {
+      const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type;
+      if (data_type != ArrayDataType::kFloat) return ::tensorflow::Status::OK();
+      SetDataTypeForAllOutputs(model, op, data_type);
+      break;
+    }
     default: {
       // These operators produce outputs with the same type as their 1st input
       CHECK_GT(op->inputs.size(), 0);
diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
index 5496e2093e..e861df2b3d 100644
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@@ -946,6 +946,49 @@ void ProcessLstmCellOperator(Model* model, LstmCellOperator* op) {
       .copy_shape(activ_temp_shape);
 }
 
+void ProcessUnidirectionalSequenceLstmOperator(
+    Model* model, UnidirectionalSequenceLstmOperator* op) {
+  auto& output_array = model->GetArray(op->outputs[0]);
+  if (output_array.has_shape()) {
+    // Shape already propagated
+    return;
+  }
+
+  if (output_array.data_type == ArrayDataType::kNone) {
+    // Yield until the output type has been set by PropagateArrayDataTypes
+    return;
+  }
+
+  // TODO(renjieliu): check the inputs, as well as all kinds of weights.
+  const auto& input_array = model->GetArray(op->inputs[0]);
+  // Yield until input dims have been resolved.
+  if (!input_array.has_shape()) {
+    return;
+  }
+  const auto& input_shape = input_array.shape();
+  const int batch_size = input_shape.dims(1);
+  const int timestamp = input_shape.dims(0);
+
+  const auto& recurrent_to_output_weights_array =
+      model->GetArray(op->inputs[8]);
+  // Yield until input dims have been resolved.
+  if (!recurrent_to_output_weights_array.has_shape()) {
+    return;
+  }
+
+  constexpr int kInputActivationStateTensor = 18;
+  constexpr int kInputCellStateTensor = 19;
+  // b(115961645): This is a hack to work around.
+  model->GetArray(op->inputs[kInputActivationStateTensor]).buffer.reset();
+  model->GetArray(op->inputs[kInputCellStateTensor]).buffer.reset();
+
+  const auto& output_weights_shape = recurrent_to_output_weights_array.shape();
+  const int output_size = output_weights_shape.dims(1);
+
+  Shape* output_shape = output_array.mutable_shape();
+  output_shape->ReplaceDims({timestamp, batch_size, output_size});
+}
+
 void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) {
   const auto& input_array = model->GetArray(op->inputs[0]);
   // Yield until input dims have been resolved.
@@ -1800,6 +1843,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) {
       ProcessResizeBilinearOperator(model,
                                     static_cast<ResizeBilinearOperator*>(op));
       break;
+    case OperatorType::kUnidirectionalSequenceLstm:
+      ProcessUnidirectionalSequenceLstmOperator(
+          model, static_cast<UnidirectionalSequenceLstmOperator*>(op));
+      break;
     case OperatorType::kLstmCell:
       ProcessLstmCellOperator(model, static_cast<LstmCellOperator*>(op));
       break;
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 32f22e1ea0..6b195cc992 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/public/session_options.h"
@@ -2002,6 +2003,48 @@ tensorflow::Status ConvertCTCBeamSearchDecoderOperator(
   return tensorflow::Status::OK();
 }
 
+// This isn't a TensorFlow builtin op. Currently this node can only be generated
+// with TfLite OpHint API.
+tensorflow::Status ConvertUnidirectionalSequenceLstm(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  DCHECK_EQ(node.op(), "UnidirectionalSequenceLstm");
+
+  auto* op = new UnidirectionalSequenceLstmOperator();
+  const auto& indices = GetListAttr(node, "_tflite_input_indices");
+  if (indices.i_size() != node.input().size()) {
+    return tensorflow::errors::InvalidArgument("Input size does not match.");
+  }
+
+  // The input size needs to be the same as the TfLite UniDirectionalSequence
+  // Lstm implementation.
+  const int kInputsSize = 20;
+
+  op->inputs.resize(kInputsSize);
+  std::vector<bool> done(kInputsSize);
+  int idx = 0;
+  for (const string& input : node.input()) {
+    int real_index = indices.i(idx);
+    op->inputs[real_index] = (input);
+    done[real_index] = true;
+    idx++;
+  }
+
+  for (int idx = 0; idx < done.size(); idx++) {
+    if (!done[idx]) {
+      string optional_name = node.name() + "_" + std::to_string(idx);
+      model->CreateOptionalArray(optional_name);
+      op->inputs[idx] = optional_name;
+    }
+  }
+
+  // There're three outputs, only the last one is required.
+  op->outputs.push_back(node.name() + ":2");
+  model->operators.emplace_back(op);
+
+  return tensorflow::Status::OK();
+}
+
 }  // namespace
 
 namespace internal {
@@ -2121,6 +2164,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() {
       {"Transpose", ConvertSimpleOperator<TransposeOperator, 2>},
       {"Unpack", ConvertUnpackOperator},
       {"ZerosLike", ConvertSimpleOperator<TensorFlowZerosLikeOperator, 1>},
+      {"UnidirectionalSequenceLstm", ConvertUnidirectionalSequenceLstm},
   });
 }
 
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 61f1f095e9..f3b84430db 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -58,6 +58,7 @@ enum class OperatorType : uint8 {
   kL2Normalization,
   kL2Pool,
   kLstmCell,
+  kUnidirectionalSequenceLstm,
   kLocalResponseNormalization,
   kLog,
   kLogistic,
@@ -635,6 +636,11 @@ struct LstmCellOperator : Operator {
   KernelType kernel_type;
 };
 
+struct UnidirectionalSequenceLstmOperator : Operator {
+  UnidirectionalSequenceLstmOperator()
+      : Operator(OperatorType::kUnidirectionalSequenceLstm) {}
+};
+
 // Element-wise multiplication operator.
 //
 // Inputs:
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index ed37535fe0..e08a61d357 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -741,6 +741,42 @@ class Lstm : public BuiltinOperator<LstmCellOperator, ::tflite::LSTMOptions,
   }
 };
 
+class UnidirectionalSequenceLstm
+    : public BuiltinOperator<
+          UnidirectionalSequenceLstmOperator,
+          ::tflite::UnidirectionalSequenceLSTMOptions,
+          ::tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    // Current toco converter only supports tanh, no clip.
+    return ::tflite::CreateUnidirectionalSequenceLSTMOptions(
+        *builder, /*fused_activation_function=*/
+        ::tflite::ActivationFunctionType_TANH,
+        /*cell_clip=*/0.0,
+        /*proj_clip=*/0.0);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {
+    // Only support tanh activation, so check that tflite type is tanh.
+    DCHECK(options.fused_activation_function() ==
+           ::tflite::ActivationFunctionType_TANH);
+  }
+
+  int GetVersion(const Operator& op) const override { return 1; }
+
+  std::vector<bool> GetMutatingInputVariables(
+      const Operator& op) const override {
+    std::vector<bool> mutating_input_variables(op.inputs.size(), false);
+    mutating_input_variables[kInputActivationStateTensor] = true;
+    mutating_input_variables[kInputCellStateTensor] = true;
+    return mutating_input_variables;
+  }
+};
+
 class Mean : public BuiltinOperator<MeanOperator, ::tflite::ReducerOptions,
                                     ::tflite::BuiltinOptions_ReducerOptions> {
  public:
@@ -1435,6 +1471,9 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
                                       OperatorType::kFakeQuant));
   ops.push_back(
       MakeUnique<Pack>(::tflite::BuiltinOperator_PACK, OperatorType::kPack));
+  ops.emplace_back(MakeUnique<UnidirectionalSequenceLstm>(
+      ::tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+      OperatorType::kUnidirectionalSequenceLstm));
   ops.push_back(MakeUnique<OneHot>(::tflite::BuiltinOperator_ONE_HOT,
                                    OperatorType::kOneHot));
   ops.push_back(MakeUnique<Unpack>(::tflite::BuiltinOperator_UNPACK,
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 083a96ad9d..61aa311212 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -407,6 +407,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder)
     HANDLE_OPERATORTYPENAME_CASE(Unpack)
     HANDLE_OPERATORTYPENAME_CASE(ZerosLike)
+    HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceLstm)
     default:
       LOG(FATAL) << "Unhandled op type";
 #undef HANDLE_OPERATORTYPENAME_CASE
@@ -898,12 +899,12 @@ void CheckNoMissingArray(const Model& model) {
 void FixNoMissingArray(Model* model) {
   for (const auto& op : model->operators) {
     for (const auto& input : op->inputs) {
-      if (!model->HasArray(input)) {
+      if (!model->HasArray(input) && !model->IsOptionalArray(input)) {
         model->GetOrCreateArray(input);
       }
     }
     for (const auto& output : op->outputs) {
-      if (!model->HasArray(output)) {
+      if (!model->HasArray(output) && !model->IsOptionalArray(output)) {
         model->GetOrCreateArray(output);
       }
     }
diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py
index c6ef82ccdc..45106b35fc 100644
--- a/tensorflow/tools/pip_package/pip_smoke_test.py
+++ b/tensorflow/tools/pip_package/pip_smoke_test.py
@@ -85,6 +85,10 @@ BLACKLIST = [
     # contrib
     "//tensorflow/contrib/session_bundle:session_bundle_half_plus_two",
     "//tensorflow/contrib/keras:testing_utils",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:tflite_lstm.py",
+    "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test",  # pylint:disable=line-too-long
+    "//tensorflow/contrib/lite/experimental/examples/lstm:unidirectional_sequence_lstm_test.py",  # pylint:disable=line-too-long
     "//tensorflow/contrib/lite/python:interpreter",
     "//tensorflow/contrib/lite/python:interpreter_test",
     "//tensorflow/contrib/lite/python:interpreter.py",
-- 
GitLab


From 5d670479c6ea20c510fa46ae1bb45123df75e067 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 20:51:50 -0700
Subject: [PATCH 0668/1085] Add a more verbose error message.

PiperOrigin-RevId: 216471178
---
 tensorflow/contrib/lite/kernels/embedding_lookup.cc    | 10 ++++++++--
 .../contrib/lite/kernels/embedding_lookup_sparse.cc    |  4 +++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
index fe33f98eb0..1d0c71ad48 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc
@@ -78,7 +78,10 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
   for (int i = 0; i < SizeOfDimension(lookup, 0); i++) {
     int idx = lookup->data.i32[i];
     if (idx >= row_size || idx < 0) {
-      context->ReportError(context, "Embedding Lookup: index out of bounds.");
+      context->ReportError(context,
+                           "Embedding Lookup: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, row_size - 1);
       return kTfLiteError;
     } else {
       memcpy(output->data.raw + i * row_bytes,
@@ -104,7 +107,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   for (int i = 0; i < SizeOfDimension(lookup, 0); i++) {
     int idx = lookup->data.i32[i];
     if (idx >= row_size || idx < 0) {
-      context->ReportError(context, "Embedding Lookup: index out of bounds.");
+      context->ReportError(context,
+                           "Embedding Lookup: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, row_size - 1);
       return kTfLiteError;
     } else {
       // Dequantize embedding values.
diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
index aa75b03990..0b076941ea 100644
--- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
+++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc
@@ -188,7 +188,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     int idx = ids->data.i32[i];
     if (idx >= num_rows || idx < 0) {
       context->ReportError(context,
-                           "Embedding Lookup Sparse: index out of bounds.");
+                           "Embedding Lookup Sparse: index out of bounds. "
+                           "Got %d, and bounds are [0, %d]",
+                           idx, num_rows - 1);
       return kTfLiteError;
     }
 
-- 
GitLab


From 513de7eaeffe5deb1d1a8c42d24028045f8046e5 Mon Sep 17 00:00:00 2001
From: Hoeseong Kim <hsgkim@snu.ac.kr>
Date: Wed, 10 Oct 2018 13:55:21 +0900
Subject: [PATCH 0669/1085] fixed documentation formatting

---
 .../core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
index 3c8a455983..9c4015eaa4 100644
--- a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt
@@ -42,8 +42,5 @@ We specify the size-related attributes as:
 ```
 END
   }
-  summary: <<END
-Extract `patches` from `input` and put them in the "depth" output
-dimension. 3D extension of `extract_image_patches`.
-END
+  summary: "Extract `patches` from `input` and put them in the \"depth\" output dimension. 3D extension of `extract_image_patches`."
 }
-- 
GitLab


From 91d625c6f0377bb629b2509bb4f5cb040d870244 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Tue, 9 Oct 2018 21:54:32 -0700
Subject: [PATCH 0670/1085] Fix lstm_test&layer_norm_lstm_test w/ Clang 8.0.0

PiperOrigin-RevId: 216475683
---
 .../lite/kernels/layer_norm_lstm_test.cc      | 116 +++++++++---------
 tensorflow/contrib/lite/kernels/lstm_test.cc  |  92 +++++++-------
 2 files changed, 102 insertions(+), 106 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
index 479f6a7d3c..1535f750f9 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
@@ -129,87 +129,85 @@ class LayerNormLSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -278,67 +276,67 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
                              use_projection_bias, cell_clip, proj_clip,
                              input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -346,26 +344,26 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
 class BaseLayerNormLstmTest : public ::testing::Test {
  protected:
   // Weights of the Layer Norm LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> input_layer_norm_weights_;
-  std::initializer_list<float> forget_layer_norm_weights_;
-  std::initializer_list<float> cell_layer_norm_weights_;
-  std::initializer_list<float> output_layer_norm_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> input_layer_norm_weights_;
+  std::vector<float> forget_layer_norm_weights_;
+  std::vector<float> cell_layer_norm_weights_;
+  std::vector<float> output_layer_norm_weights_;
+  std::vector<float> projection_weights_;
 
   // Layer Norm LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> layer_norm_lstm_input_;
diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc
index e7ddfceb45..f8947db724 100644
--- a/tensorflow/contrib/lite/kernels/lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_test.cc
@@ -116,71 +116,69 @@ class LSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -243,51 +241,51 @@ class HybridLSTMOpModel : public LSTMOpModel {
                     use_projection_weights, use_projection_bias, cell_clip,
                     proj_clip, input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -295,22 +293,22 @@ class HybridLSTMOpModel : public LSTMOpModel {
 class BaseLstmTest : public ::testing::Test {
  protected:
   // Weights of the LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> projection_weights_;
 
   // LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> lstm_input_;
-- 
GitLab


From dcf641daac0f2fee74eafbb0de1d32f6c8c4c6fd Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 9 Oct 2018 22:57:45 -0700
Subject: [PATCH 0671/1085] Remove python shebang line from gen_git_source.

PiperOrigin-RevId: 216479972
---
 tensorflow/tensorflow.bzl              | 4 ++--
 tensorflow/tools/git/BUILD             | 6 ++++--
 tensorflow/tools/git/gen_git_source.py | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index cad5de1b0c..df15914233 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1967,9 +1967,9 @@ def tf_version_info_genrule():
         ],
         outs = ["util/version_info.cc"],
         cmd =
-            "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}",
+            "$(location //tensorflow/tools/git:gen_git_source) --generate $(SRCS) \"$@\" --git_tag_override=$${GIT_TAG_OVERRIDE:-}",
         local = 1,
-        tools = [clean_dep("//tensorflow/tools/git:gen_git_source.py")],
+        tools = [clean_dep("//tensorflow/tools/git:gen_git_source")],
     )
 
 def tf_py_build_info_genrule():
diff --git a/tensorflow/tools/git/BUILD b/tensorflow/tools/git/BUILD
index daa17fbd50..34a5167948 100644
--- a/tensorflow/tools/git/BUILD
+++ b/tensorflow/tools/git/BUILD
@@ -6,6 +6,8 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-exports_files(
-    ["gen_git_source.py"],
+py_binary(
+    name = "gen_git_source",
+    srcs = ["gen_git_source.py"],
+    srcs_version = "PY2AND3",
 )
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index cc2288a7fa..8e7cd9b104 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
-- 
GitLab


From 5a2d98f7f7cf6f52eb0496bf27be07d9e1f29040 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 23:57:17 -0700
Subject: [PATCH 0672/1085] Run while loop test that was not being run before.

PiperOrigin-RevId: 216483744
---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index baea5c0f6d..a5f85b97f7 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1116,8 +1116,8 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(10.0, r.eval())
 
   def testWhile_Gpu_2(self):
-    self._testWhile_Gpu_1(use_gpu=False)
-    self._testWhile_Gpu_1(use_gpu=True)
+    self._testWhile_Gpu_2(use_gpu=False)
+    self._testWhile_Gpu_2(use_gpu=True)
 
   def testWhileShape(self):
     with self.cached_session():
-- 
GitLab


From 1409ea9dbd8275dcbd394451d2cb878e0e873d45 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 9 Oct 2018 23:57:18 -0700
Subject: [PATCH 0673/1085] Delete dead code in batch_scatter_ops_test.

PiperOrigin-RevId: 216483746
---
 .../python/kernel_tests/batch_scatter_ops_test.py      | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
index 0d41a7e3b3..498e5f05a3 100644
--- a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
@@ -73,16 +73,6 @@ class ScatterTest(test.TestCase):
           tf_scatter(ref, indices, updates).eval()
           self.assertAllClose(ref.eval(), new)
 
-  def _VariableRankTests(self,
-                         tf_scatter):
-    vtypes = [np.float32, np.float64]
-    if tf_scatter != state_ops.scatter_div:
-      vtypes.append(np.int32)
-
-    for vtype in vtypes:
-      for itype in (np.int32, np.int64):
-        self._VariableRankTest(tf_scatter, vtype, itype)
-
   def testVariableRankUpdate(self):
     vtypes = [np.float32, np.float64]
     for vtype in vtypes:
-- 
GitLab


From 7575e0949703a4dd0ec19e51e568e9abba037728 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 02:01:57 -0700
Subject: [PATCH 0674/1085] compat: Update forward compatibility horizon to
 2018-10-10

PiperOrigin-RevId: 216495091
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 0e14c0e044..b7a1fce586 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 10)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From ee7c9597f4ab8e586e921f9fe3e3c1383417169c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 02:22:32 -0700
Subject: [PATCH 0675/1085] Emit xla::Or in TensorArrayScatterV3 for PRED types
 instead of xla::Add

Previosuly we emitted xla::Add what isn't supported by some XLA backend
on PRED types.

PiperOrigin-RevId: 216497939
---
 .../compiler/tests/tensor_array_ops_test.py   | 37 +++++++++++++++++--
 .../tf2xla/kernels/tensor_array_ops.cc        | 26 +++++++++----
 2 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py
index 78244d0b36..46ca371c8a 100644
--- a/tensorflow/compiler/tests/tensor_array_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_array_ops_test.py
@@ -920,6 +920,34 @@ class TensorArrayTest(xla_test.XLATestCase):
   def testTensorArrayEvalEmptyWithDefault(self):
     self._testTensorArrayEvalEmptyWithDefault()
 
+  def _testTensorArrayScatterRead(self, tf_dtype):
+    with self.cached_session() as session, self.test_scope():
+      convert = _make_converter(tf_dtype)
+
+      ta = tensor_array_ops.TensorArray(
+          dtype=tf_dtype,
+          tensor_array_name="foo",
+          size=10)
+
+      indices = constant_op.constant([1, 8])
+      value = constant_op.constant(convert([[1.0, -1.0], [10.0, -10.0]]))
+      id0 = array_ops.placeholder(dtypes.int32)
+      id1 = array_ops.placeholder(dtypes.int32)
+
+      w = ta.scatter(indices, value)
+      r0 = w.read(id0)
+      r1 = w.read(id1)
+
+      # Test aggregation of read
+      read_vals = session.run([r0, r1], feed_dict={id0: 1, id1: 8})
+      self.assertAllEqual(convert([1.0, -1.0]), read_vals[0])
+      self.assertAllEqual(convert([10.0, -10.0]), read_vals[1])
+
+  def testTensorArrayScatterRead(self):
+    for dtype in self.numeric_tf_types:
+      self._testTensorArrayScatterRead(dtype)
+    self._testTensorArrayScatterRead(dtypes.bool)
+
   def testTensorArrayScatterReadAndGradients(self):
     with self.cached_session() as session, self.test_scope():
       ta = tensor_array_ops.TensorArray(
@@ -929,15 +957,18 @@ class TensorArrayTest(xla_test.XLATestCase):
 
       indices = constant_op.constant([1, 8])
       value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]])
+      id0 = array_ops.placeholder(dtypes.int32)
+      id1 = array_ops.placeholder(dtypes.int32)
 
       w = ta.scatter(indices, value)
-      r0 = w.read(1)
-      r1 = w.read(8)
+      r0 = w.read(id0)
+      r1 = w.read(id1)
 
       # Test combined gradients + aggregation of read(0).
       grad = gradients_impl.gradients(
           ys=[r0, r1], xs=[value], grad_ys=[[2.0, 3.0], [4.0, 5.0]])
-      read_vals, grad_vals = session.run([[r0, r1], grad])
+      read_vals, grad_vals = session.run([[r0, r1], grad],
+                                         feed_dict={id0: 1, id1: 8})
 
       self.assertEqual(len(read_vals), 2)
       self.assertEqual(len(grad_vals), 1)
diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 94108b764f..6cdfaf4d97 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -123,9 +123,10 @@ Status GetTensorArrayShape(const XlaResource* resource,
 xla::XlaOp DynamicAddSlice(xla::XlaBuilder* builder, const xla::XlaOp& operand,
                            const xla::XlaOp& update,
                            absl::Span<const int64> update_dims,
-                           const xla::XlaOp& start_indices) {
+                           const xla::XlaOp& start_indices, DataType dtype) {
   xla::XlaOp current = xla::DynamicSlice(operand, start_indices, update_dims);
-  xla::XlaOp sum = xla::Add(current, update);
+  xla::XlaOp sum =
+      dtype == DT_BOOL ? xla::Or(current, update) : xla::Add(current, update);
   return xla::DynamicUpdateSlice(operand, sum, start_indices);
 }
 
@@ -222,8 +223,8 @@ class TensorArrayWriteOp : public XlaOpKernel {
     slice_shape.InsertDim(0, 1LL);
     auto update = xla::Reshape(value, slice_shape.dim_sizes());
 
-    xla::XlaOp written =
-        DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), start_indices);
+    xla::XlaOp written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(),
+                                         start_indices, dtype_);
 
     OP_REQUIRES_OK(ctx, resource->SetValue(written));
     ctx->SetOutput(0, flow);
@@ -391,7 +392,11 @@ class TensorArrayScatterOp : public XlaOpKernel {
     }
 
     if (scatter_all_elements_in_order) {
-      ta = xla::Add(ta, value);
+      if (dtype_ == DT_BOOL) {
+        ta = xla::Or(ta, value);
+      } else {
+        ta = xla::Add(ta, value);
+      }
     } else {
       auto slice_dims = value_shape.dim_sizes();
       slice_dims[0] = 1LL;
@@ -414,7 +419,7 @@ class TensorArrayScatterOp : public XlaOpKernel {
         auto start_indices =
             xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0<int32>(b, 0),
                      xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}}));
-        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices);
+        ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices, dtype_);
       }
     }
 
@@ -522,8 +527,13 @@ class TensorArraySplitOp : public XlaOpKernel {
                                         value_shape.DebugString(), " vs. ",
                                         ta_shape.DebugString()));
 
-    OP_REQUIRES_OK(ctx, resource->SetValue(xla::Add(
-                            ta, xla::Reshape(value, ta_shape.dim_sizes()))));
+    const xla::XlaOp reshape = xla::Reshape(value, ta_shape.dim_sizes());
+    if (dtype_ == DT_BOOL) {
+      ta = xla::Or(ta, reshape);
+    } else {
+      ta = xla::Add(ta, reshape);
+    }
+    OP_REQUIRES_OK(ctx, resource->SetValue(ta));
 
     ctx->SetOutput(0, flow);
   }
-- 
GitLab


From dd7d31fa7bfa357e58987c2f3881d99c8050b6de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 02:29:11 -0700
Subject: [PATCH 0676/1085] Change user_set to an absl::flat_hash_set in
 HloInstruction.

absl::flat_hash_set have better performance than a std::unordered_set, which can improve overall compile time.
PiperOrigin-RevId: 216498767
---
 tensorflow/compiler/xla/service/hlo_instruction.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 93ff04b1e4..81fe1d0a9a 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -28,11 +28,10 @@ limitations under the License.
 #include <set>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -1645,7 +1644,7 @@ class HloInstruction {
   // members. The set enables fast membership testing and the vector enables
   // fast, stable iteration.
   std::vector<HloInstruction*> users_;
-  std::unordered_set<const HloInstruction*> user_set_;
+  absl::flat_hash_set<const HloInstruction*> user_set_;
 
   // The set of control successors of this instruction.
   std::vector<HloInstruction*> control_successors_;
-- 
GitLab


From d6a3d6a8295359364c86aecc479e6392bcde0ce4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 02:42:39 -0700
Subject: [PATCH 0677/1085] Automated rollback of commit
 950cf87104bfee28e2165fe368f66337b8a1336d

PiperOrigin-RevId: 216500702
---
 tensorflow/core/graph/graph.cc                |   2 +-
 .../optimizers/data/vectorization/BUILD       |  34 ++--
 .../data/vectorization/add_vectorizer.cc      | 150 ------------------
 .../optimizers/data/vectorization_utils.cc    |  21 ++-
 .../data/vectorization_utils_test.cc          | 103 ++----------
 .../optimization/map_vectorization_test.py    |   1 -
 6 files changed, 31 insertions(+), 280 deletions(-)
 delete mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index a17491d4f7..6f068546d2 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -34,7 +34,7 @@ namespace tensorflow {
 
 const int Graph::kControlSlot = -1;
 
-struct NodeProperties {
+class NodeProperties {
  public:
   NodeProperties(const OpDef* op_def, const NodeDef& node_def,
                  const DataTypeSlice inputs, const DataTypeSlice outputs)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 09018d0124..985d6c6c3a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -9,11 +9,7 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
 
 VECTORIZER_DEPS = [
     ":vectorizer_registry",
-    "//tensorflow/cc:ops",
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
-    "//tensorflow/core:core_cpu",
-    "//tensorflow/cc:scope_internal",
-    "//tensorflow/cc:cc_ops",
 ] + tf_protos_all()
 
 cc_library(
@@ -46,24 +42,6 @@ cc_library(
     ],
 )
 
-tf_cc_test(
-    name = "vectorizer_registry_test",
-    srcs = ["vectorizer_registry_test.cc"],
-    deps = [
-        ":vectorizer_registry",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ] + tf_protos_all(),
-)
-
-cc_library(
-    name = "add_vectorizer",
-    srcs = ["add_vectorizer.cc"],
-    deps = VECTORIZER_DEPS,
-    alwayslink = 1,
-)
-
 cc_library(
     name = "cast_vectorizer",
     srcs = ["cast_vectorizer.cc"],
@@ -83,10 +61,20 @@ cc_library(
     hdrs = ["vectorizer_registry.h"],
     visibility = ["//visibility:public"],
     deps = [
-        ":add_vectorizer",
         ":cast_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
         ":vectorizer_registry",
     ],
 )
+
+tf_cc_test(
+    name = "vectorizer_registry_test",
+    srcs = ["vectorizer_registry_test.cc"],
+    deps = [
+        ":vectorizer_registry",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
deleted file mode 100644
index d90a51b01a..0000000000
--- a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
+++ /dev/null
@@ -1,150 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/cc/framework/ops.h"
-#include "tensorflow/cc/framework/scope_internal.h"
-#include "tensorflow/cc/ops/array_ops.h"
-#include "tensorflow/cc/ops/math_ops.h"
-#include "tensorflow/cc/ops/standard_ops.h"
-#include "tensorflow/core/graph/node_builder.h"
-#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
-
-namespace tensorflow {
-namespace grappler {
-
-namespace {
-
-const char* const kExpandDimsPrefix = "vectorized/expanddims/";
-
-// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading
-// dimension, which may cause automatic broadcasting rules to expand the
-// input dimensions wrongly when the unstacked shapes have different ranks.
-// To avoid that, we reshape stacked inputs to the maximum rank they need
-// to be broadcasted to.
-//
-// For example, suppose we have inputs A and B, where A is a stacked tensor with
-// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with
-// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules
-// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that
-// the dimensions n and 7 are compatible, and if so, create an output of shape
-// [12, 7, 5]. However, correct addition of these inputs would create an output
-// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A
-// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before
-// broadcasting.
-Status ExpandDimsForBroadcast(std::vector<WrappedTensor>* inputs, Graph* g) {
-  Status status;
-  Scope parent = NewInternalScope(g, &status, nullptr);
-  Scope s = parent.NewSubScope(kExpandDimsPrefix);
-
-  // TODO(rachelim): We can potentially get rid of all these ops if shapes are
-  // known statically
-
-  Output const_0 = ops::Const(s, 0);
-  Output const_1 = ops::Const(s, 1);
-
-  std::vector<Output> ranks;
-  ranks.reserve(inputs->size());
-
-  // Get the stacked rank of each input
-  for (const auto& input : *inputs) {
-    Output rank = ops::Rank(s, Output(input.node, input.output_index));
-
-    if (!input.stacked) {
-      // If the input is unstacked, add 1
-      rank = ops::Add(s, rank, const_1);
-    }
-
-    ranks.push_back(rank);
-  }
-
-  // Pack the ranks into one tensor to get the max
-  Output packed_ranks = ops::Stack(s, ranks);
-
-  Output max_rank =
-      ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true));
-
-  std::vector<WrappedTensor> expanded_inputs;
-  expanded_inputs.reserve(inputs->size());
-
-  // For all inputs that are stacked, expand dimensions after dim 0.
-  for (size_t i = 0; i < inputs->size(); ++i) {
-    if (!inputs->at(i).stacked) {
-      expanded_inputs.push_back(inputs->at(i));
-      continue;
-    }
-
-    Output input(inputs->at(i).node, inputs->at(i).output_index);
-
-    // Number of dimensions to expand
-    Output rank_diff = ops::Sub(s, max_rank, ranks[i]);
-
-    // [1] * rank_diff
-    Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff);
-
-    Output const_vec_1 = ops::Const(s, {1});
-
-    Output shape = ops::Shape(s, input);
-
-    // shape[:1]
-    Output concat_pre =
-        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
-                          ops::StridedSlice::Attrs().BeginMask(1));
-
-    // shape[1:]
-    Output concat_post =
-        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
-                          ops::StridedSlice::Attrs().EndMask(1));
-
-    // tf.concat([shape[:1], ones, shape[1:]], 0)
-    Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0);
-
-    Output result = ops::Reshape(s, input, new_shape);
-
-    expanded_inputs.push_back({result.node(), 0, true});
-  }
-
-  inputs->swap(expanded_inputs);
-  return status;
-}
-
-class AddVectorizer : public Vectorizer {
- public:
-  Status Vectorize(const Node& node, Graph* outer_scope,
-                   std::vector<WrappedTensor>&& inputs,
-                   std::vector<WrappedTensor>* outputs) override {
-    if (node.num_inputs() != 2) {
-      return errors::Internal("Add op should only have two inputs.");
-    }
-
-    TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope));
-
-    // Add new Add node with the same op and attrs as the original node
-    Node* new_add_node;
-    TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add")
-                           .Input(inputs[0].node, inputs[0].output_index)
-                           .Input(inputs[1].node, inputs[1].output_index)
-                           .Finalize(outer_scope, &new_add_node));
-
-    // Add output mappings
-    outputs->push_back({new_add_node, 0, true});
-    return Status::OK();
-  }
-};
-
-REGISTER_VECTORIZER("Add", AddVectorizer);
-
-}  // namespace
-}  // namespace grappler
-}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 8b93b1f2b8..d977ff3198 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -64,18 +64,9 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
   }
 }
 
-// Update node attrs to keep its properties consistent with the function
-void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) {
-  map_defun_node->AddAttr("output_types", map_defun_fn->ret_types);
-
-  // TODO(rachelim): Propagate precise shapes if they're known, which may enable
-  // subsequent optimizations.
-  map_defun_node->AddAttr("output_shapes", std::vector<PartialTensorShape>(
-                                               map_defun_fn->ret_types.size()));
-}
-
 Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
                          const TensorDesc& output) {
+  // Note that we don't update MapDefun attrs as we go, only when we are done
   DataType type = output.first->output_type(output.second);
   int index = map_defun_fn->ret_nodes.size();
 
@@ -92,13 +83,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
   map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0);
   map_defun_fn->ret_nodes.push_back(ret_node);
   map_defun_fn->ret_types.push_back(type);
-  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   return s;
 }
 
 void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                           FunctionBody* map_defun_fn, Node* map_defun_node) {
+  // Note that we don't update MapDefun attrs as we go, only when we are done
   DCHECK_LT(output_position, map_defun_fn->ret_nodes.size())
       << "Trying to remove output that doesn't exist. Output number: "
       << output_position;
@@ -111,7 +102,6 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                                 output_position);
   map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() +
                                 output_position);
-  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   // Renumber the nodes and edges that come after
   for (int i = 0; i < num_later_outputs; ++i) {
@@ -352,6 +342,13 @@ void Vectorization::VectorizeHelper() {
   // need the MapDefun node and can delete it.
   if (map_defun_fn_->ret_nodes.empty()) {
     outer_scope_->RemoveNode(map_defun_node_);
+  } else {
+    // Update MapDefun node attrs accordingly
+    DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size());
+    map_defun_node_->AddAttr(
+        "output_shapes",
+        std::vector<PartialTensorShape>(map_defun_fn_->ret_types.size()));
+    map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index be498d150b..a6020e36bb 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   FunctionDef* vectorized;
   Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
   LOG(ERROR) << s;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_EQ(GetRetval(*vectorized, 0), "ret0");
@@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
 
   auto map_defun_node = vectorized->node_def(
       function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized));
@@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& unpack_node = vectorized->node_def(
@@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   // They should be unchanged
   // We check this somewhat manually as the names of nodes may have changed
   EXPECT_EQ(vectorized->node_def_size(), 1);
@@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
@@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   auto const_node = vectorized->node_def(
@@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   *lib.add_function() = inner;
 
   FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
 
   auto find_const = [vectorized](int val) -> const NodeDef* {
     for (const auto& n : vectorized->node_def()) {
@@ -924,89 +924,6 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
 }
 
-// Before:
-//
-//                   +------+
-// +-----------------+ Arg0 +----------------------+
-// |                 +---+--+                      |
-// |                     |                         |
-// |                 +---v--+                      |
-// |   +-------------+ Arg0 +------------------+   |
-// |   |             +---+--+                  |   |
-// |   |                 |                     |   |
-// |   |                 |          +-----+    |   |
-// |   |                 |          |Const|    |   |
-// |   |                 |          +-+---+    |   |
-// |   |                 |            |        |   |
-// |   |                 |   +--------+        |   |
-// |   |                 |   |                 |   |
-// |   |               +-v---v-+               |   |
-// |   |               |  Add  |               |   |
-// |   |               +-+-----+               |   |
-// |   |                 |                     |   |
-// |   |                 |                     |   |
-// |   | MapDefun      +-v----+                |   |
-// |   +---------------| Ret  |----------------+   |
-// |                   +--v---+                    |
-// |                      |                        |
-// |                      |                        |
-// |                   +--v----                    |
-// +-------------------| Ret  |--------------------+
-//                     +------+
-//
-//
-//  After:
-//
-//              +------+
-// +------------+ Arg0 +----------------------+
-// |            +---+--+                      |
-// |                |                         |
-// |                |              +-----+    |
-// |                |              |Const|    |
-// |              +-v---------+    +--+--+    |
-// |              |ExpandDims*|       |       |
-// |              +-----+-----+       |       |
-// |                    |             |       |
-// |                    +-----+ +-----+       |
-// |                          | |             |
-// |                        +-v-v-+           |
-// |                        | Add |           |
-// |                        +--+--+           |
-// |                           |              |
-// |                       +---v--+           |
-// +-----------------------+ Ret  +-----------+
-//                         +------+
-//
-TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
-  // Note that this checks that the "Add" vectorizer is successful, but does not
-  // check that the transformed function is correct (i.e. produces the same
-  // output as the unvectorized map defun). For the latter, the tests are in
-  // tensorflow/python/data/experimental/kernel_tests/optimization/
-  // map_vectorization_test.py
-  FunctionDef inner = FunctionDefHelper::Create(
-      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
-      {/* nodes */ FunctionDefHelper::Const("Const", 2),
-       {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}},
-      {{"ret0", "Add:z:0"}});
-
-  FunctionDef outer = FunctionDefHelper::Create(
-      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
-      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
-
-  NodeDef* map_defun =
-      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
-                      inner.signature().name(), &outer);
-  CHECK_NOTNULL(map_defun);
-
-  FunctionDefLibrary lib;
-  *lib.add_function() = outer;
-  *lib.add_function() = inner;
-  FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
-  EXPECT_TRUE(
-      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
-}
-
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index d1d6cf28ab..803ff87924 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -80,7 +80,6 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("Basic", lambda x: (x, x + 1), None),
       ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
-      ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
   def testOptimization(self, map_fn, num_parallel_calls):
-- 
GitLab


From 028ca321cb7b476868dcb39585d5cd361d81f05f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 03:29:54 -0700
Subject: [PATCH 0678/1085] Support removing side effecting instructions with
 RemoveInstructionAndUnusedOperands

If the caller explicitly asks to remove a side effceting instruction
(e.g. all-reduce) then we should respect it instead of silently ignoring
the request.

PiperOrigin-RevId: 216505133
---
 tensorflow/compiler/xla/service/hlo_computation.cc | 2 +-
 tensorflow/compiler/xla/service/hlo_computation.h  | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c2041c4667..b0f7cd91ad 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -215,7 +215,7 @@ Status HloComputation::RemoveInstructionAndUnusedOperands(
 
     if (removed.count(item) != 0 || item->user_count() != 0 ||
         item == root_instruction() || !IsRemovable(item) ||
-        item->HasSideEffect()) {
+        (item->HasSideEffect() && item != instruction)) {
       continue;
     }
     for (int i = 0; i < item->operand_count(); ++i) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index d87ab4bda1..dec96d11a9 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -128,9 +128,10 @@ class HloComputation {
   // users. Instruction is deallocated with this call.
   Status RemoveInstruction(HloInstruction* instruction);
 
-  // Remove an instruction from the computation and also transitively any
-  // operand that has no users post removing an instruction. The instruction
-  // must have no users. Instruction is deallocated with this call.
+  // Remove an instruction (including side effecting ones) from the computation
+  // and also transitively any operand that has no side effect and no users post
+  // removing an instruction. The instruction must have no users. Instruction is
+  // deallocated with this call.
   Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction);
 
   // Set the root of the computation to the given instruction. The instruction
-- 
GitLab


From 58c566ddd4fc604028375f97e39ac9732d0c46d2 Mon Sep 17 00:00:00 2001
From: Michael Gielda <mgielda@antmicro.com>
Date: Fri, 5 Oct 2018 14:35:03 +0200
Subject: [PATCH 0679/1085] Create a Robot-based test for Bluepill on Renode

---
 .../experimental/micro/testing/bluepill.resc  |  3 ---
 .../experimental/micro/testing/bluepill.robot | 23 +++++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/micro/testing/bluepill.robot

diff --git a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
index 9333dc42bf..c46b33e3fb 100644
--- a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
+++ b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
@@ -31,6 +31,3 @@ macro reset
 
 runMacro $reset
 
-emulation RunFor @1
-
-quit
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/bluepill.robot b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.robot
new file mode 100644
index 0000000000..f09c3a0cc0
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/testing/bluepill.robot
@@ -0,0 +1,23 @@
+*** Settings ***
+Suite Setup                   Setup
+Suite Teardown                Teardown
+Test Setup                    Reset Emulation
+Resource                      /opt/renode/tests/renode-keywords.robot
+
+*** Variables ***
+${UART}                       sysbus.cpu.uartSemihosting
+
+*** Test Cases ***
+Should Run Bluepill Test
+    [Documentation]           Runs a Bluepill test and waits for a specific string on the semihosting UART
+    [Tags]                    bluepill  uart  tensorflow  arm
+    ${BIN} =                  Get Environment Variable    BIN
+    ${SCRIPT} =               Get Environment Variable    SCRIPT
+    ${EXPECTED} =             Get Environment Variable    EXPECTED
+    Execute Command           $bin = @${BIN}
+    Execute Script            ${SCRIPT}
+
+    Create Terminal Tester    ${UART}  timeout=3
+    Start Emulation
+
+    Wait For Line On Uart     ${EXPECTED}
-- 
GitLab


From 85baa1449799b2a09bfa8b3341bfa4ac8267df20 Mon Sep 17 00:00:00 2001
From: Michael Gielda <mgielda@antmicro.com>
Date: Fri, 5 Oct 2018 14:40:45 +0200
Subject: [PATCH 0680/1085] Change the Bluepill tests to use Robot on Renode

---
 .../micro/testing/test_bluepill_binary.sh     | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
index 07742a8262..a470dc52f8 100755
--- a/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
+++ b/tensorflow/contrib/lite/experimental/micro/testing/test_bluepill_binary.sh
@@ -30,25 +30,27 @@ docker build -t renode_bluepill \
   -f ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/Dockerfile.bluepill \
   ${ROOT_DIR}/tensorflow/contrib/lite/experimental/micro/testing/
 
-docker run \
+exit_code=0
+# running in `if` to avoid setting +e
+if ! docker run \
   --log-driver=none -a stdout -a stderr \
   -v ${ROOT_DIR}:/workspace \
   -v /tmp:/tmp \
+  -e BIN=/workspace/$1 \
+  -e SCRIPT=/workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc \
+  -e EXPECTED="$2" \
   -it renode_bluepill \
-  /bin/bash -c "renode -P 5000 --disable-xwt -e '
-\$bin?=@/workspace/$1
-s @/workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.resc
-' 2>&1 >${MICRO_LOG_FILENAME}"
+  /bin/bash -c "/opt/renode/tests/test.sh /workspace/tensorflow/contrib/lite/experimental/micro/testing/bluepill.robot 2>&1 >${MICRO_LOG_FILENAME}"
+then
+  exit_code=1
+fi
 
 echo "LOGS:"
 cat ${MICRO_LOG_FILENAME}
-
-if grep -q "$2" ${MICRO_LOG_FILENAME}
+if [ $exit_code -eq 0 ]
 then
   echo "$1: PASS"
-  exit 0
 else
   echo "$1: FAIL - '$2' not found in logs."
-  exit 1
 fi
-
+exit $exit_code
-- 
GitLab


From e851764c24e5ac5f527a7ce2ce12050edddeb209 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 07:17:04 -0700
Subject: [PATCH 0681/1085] Support kDomain instructions in the HloMatcher
 framework

PiperOrigin-RevId: 216525613
---
 tensorflow/compiler/xla/service/hlo_matchers.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index 5502e565b6..b05a012b4a 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -179,6 +179,7 @@ HLO_MATCHER(Convolution);
 HLO_MATCHER(Copy);
 HLO_MATCHER(CrossReplicaSum);
 HLO_MATCHER(Divide);
+HLO_MATCHER(Domain);
 HLO_MATCHER(DynamicSlice);
 HLO_MATCHER(DynamicUpdateSlice);
 HLO_MATCHER(Eq);
-- 
GitLab


From 93226f635c5c108b3b501d8bbcf27e64dec49fb9 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 07:38:42 -0700
Subject: [PATCH 0682/1085] Use overloaded operators for the assert statement.
 This should remove the reliance on importing tensorflow in the generated
 code.

PiperOrigin-RevId: 216528047
---
 .../python/autograph/converters/asserts.py    |  2 +-
 .../autograph/converters/asserts_test.py      | 24 +++--
 tensorflow/python/autograph/operators/BUILD   | 11 +++
 .../python/autograph/operators/__init__.py    |  1 +
 .../python/autograph/operators/exceptions.py  | 86 ++++++++++++++++++
 .../autograph/operators/exceptions_test.py    | 87 +++++++++++++++++++
 6 files changed, 201 insertions(+), 10 deletions(-)
 create mode 100644 tensorflow/python/autograph/operators/exceptions.py
 create mode 100644 tensorflow/python/autograph/operators/exceptions_test.py

diff --git a/tensorflow/python/autograph/converters/asserts.py b/tensorflow/python/autograph/converters/asserts.py
index 56a97534c4..4ba827c35f 100644
--- a/tensorflow/python/autograph/converters/asserts.py
+++ b/tensorflow/python/autograph/converters/asserts.py
@@ -33,7 +33,7 @@ class AssertTransformer(converter.Base):
     # Note: The lone tf.Assert call will be wrapped with control_dependencies
     # by side_effect_guards.
     template = """
-      tf.Assert(test, (msg,))
+      ag__.assert_stmt(test, lambda: msg)
     """
 
     if node.msg is None:
diff --git a/tensorflow/python/autograph/converters/asserts_test.py b/tensorflow/python/autograph/converters/asserts_test.py
index 01282f9e62..eef628aeb6 100644
--- a/tensorflow/python/autograph/converters/asserts_test.py
+++ b/tensorflow/python/autograph/converters/asserts_test.py
@@ -18,24 +18,30 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import gast
-
 from tensorflow.python.autograph.converters import asserts
+from tensorflow.python.autograph.converters import side_effect_guards
 from tensorflow.python.autograph.core import converter_testing
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.ops import gen_control_flow_ops
 from tensorflow.python.platform import test
 
 
 class AssertsTest(converter_testing.TestCase):
 
-  def test_transform(self):
+  def test_basic(self):
 
     def test_fn(a):
-      assert a > 0
-
-    node, ctx = self.prepare(test_fn, {})
-    node = asserts.transform(node, ctx)
-
-    self.assertTrue(isinstance(node.body[0].value, gast.Call))
+      assert a, 'test message'
+      return tf.no_op()  # pylint:disable=undefined-variable
+
+    with self.converted(test_fn, (asserts, side_effect_guards), {},
+                        gen_control_flow_ops.no_op) as result:
+      with self.cached_session() as sess:
+        op = result.test_fn(constant_op.constant(False))
+        with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                     'test message'):
+          sess.run(op)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD
index a116611b64..f422911377 100644
--- a/tensorflow/python/autograph/operators/BUILD
+++ b/tensorflow/python/autograph/operators/BUILD
@@ -22,6 +22,7 @@ py_library(
         "__init__.py",
         "control_flow.py",
         "data_structures.py",
+        "exceptions.py",
         "py_builtins.py",
         "slices.py",
     ],
@@ -62,6 +63,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "exceptions_test",
+    srcs = ["exceptions_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":operators",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "py_builtins_test",
     srcs = ["py_builtins_test.py"],
diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py
index 0d3b44b6c4..53f4b0ddc8 100644
--- a/tensorflow/python/autograph/operators/__init__.py
+++ b/tensorflow/python/autograph/operators/__init__.py
@@ -45,6 +45,7 @@ from tensorflow.python.autograph.operators.data_structures import list_stack
 from tensorflow.python.autograph.operators.data_structures import ListPopOpts
 from tensorflow.python.autograph.operators.data_structures import ListStackOpts
 from tensorflow.python.autograph.operators.data_structures import new_list
+from tensorflow.python.autograph.operators.exceptions import assert_stmt
 from tensorflow.python.autograph.operators.py_builtins import float_
 from tensorflow.python.autograph.operators.py_builtins import int_
 from tensorflow.python.autograph.operators.py_builtins import len_
diff --git a/tensorflow/python/autograph/operators/exceptions.py b/tensorflow/python/autograph/operators/exceptions.py
new file mode 100644
index 0000000000..6078160f68
--- /dev/null
+++ b/tensorflow/python/autograph/operators/exceptions.py
@@ -0,0 +1,86 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Exception handling statements: assert, etc."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.util import tf_inspect
+
+
+def assert_stmt(expression1, expression2):
+  """Functional form of an assert statement.
+
+  This follows the semantics of the Python assert statement, however the
+  concrete implementations may deviate from it. See the respective
+  implementation for details.
+
+  In general, the assert statement should not be used for control flow.
+  Furthermore, it is encouraged that the assertion expressions should not have
+  side effects.
+
+  Args:
+    expression1: Any
+    expression2: Callable[[], Any], returns the expression to include in the
+        error message when expression1 evaluates to False. When expression1 is
+        True, the result of expression2 will not be evaluated, however,
+        expression2 itself may be evaluated in some implementations.
+
+  Returns:
+    Any, implementation-dependent.
+
+  Raises:
+    ValueError: if any arguments are illegal.
+  """
+  if not callable(expression2):
+    raise ValueError('{} must be a callable'.format(expression2))
+  args, _, keywords, _ = tf_inspect.getargspec(expression2)
+  if args or keywords:
+    raise ValueError('{} may not have any arguments'.format(expression2))
+
+  if tensor_util.is_tensor(expression1):
+    return _tf_assert_stmt(expression1, expression2)
+  else:
+    return _py_assert_stmt(expression1, expression2)
+
+
+def _tf_assert_stmt(expression1, expression2):
+  """Overload of assert_stmt that stages a TF Assert.
+
+  This implementation deviates from Python semantics as follows:
+    (1) the assertion is verified regardless of the state of __debug__
+    (2) on assertion failure, the graph execution will fail with
+        tensorflow.errors.ValueError, rather than AssertionError.
+
+  Args:
+    expression1: tensorflow.Tensor, must evaluate to a tf.bool scalar
+    expression2: Callable[[], Union[tensorflow.Tensor, List[tensorflow.Tensor]]]
+
+  Returns:
+    tensorflow.Operation
+  """
+  expression2_tensors = expression2()
+  if not isinstance(expression2_tensors, list):
+    expression2_tensors = [expression2_tensors]
+  return control_flow_ops.Assert(expression1, expression2_tensors)
+
+
+def _py_assert_stmt(expression1, expression2):
+  """Overload of assert_stmt that executes a Python assert statement."""
+  assert expression1, expression2()
+  return None
diff --git a/tensorflow/python/autograph/operators/exceptions_test.py b/tensorflow/python/autograph/operators/exceptions_test.py
new file mode 100644
index 0000000000..186535d05b
--- /dev/null
+++ b/tensorflow/python/autograph/operators/exceptions_test.py
@@ -0,0 +1,87 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for exceptions module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.operators import exceptions
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.platform import test
+
+
+class ExceptionsTest(test.TestCase):
+
+  def test_assert_tf_untriggered(self):
+    with self.cached_session() as sess:
+      t = exceptions.assert_stmt(
+          constant_op.constant(True), lambda: constant_op.constant('ignored'))
+      sess.run(t)
+
+  def test_assert_tf_triggered(self):
+    with self.cached_session() as sess:
+      t = exceptions.assert_stmt(
+          constant_op.constant(False),
+          lambda: constant_op.constant('test message'))
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   'test message'):
+        sess.run(t)
+
+  def test_assert_tf_multiple_printed_values(self):
+    two_tensors = [
+        constant_op.constant('test message'),
+        constant_op.constant('another message')
+    ]
+    with self.cached_session() as sess:
+      t = exceptions.assert_stmt(
+          constant_op.constant(False), lambda: two_tensors)
+
+      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                   'test message.*another message'):
+        sess.run(t)
+
+  def test_assert_python_untriggered(self):
+    side_effect_trace = []
+
+    def expression_with_side_effects():
+      side_effect_trace.append(object())
+      return 'test message'
+
+    exceptions.assert_stmt(True, expression_with_side_effects)
+
+    self.assertListEqual(side_effect_trace, [])
+
+  def test_assert_python_triggered(self):
+    if not __debug__:
+      # Python assertions only be tested when in debug mode.
+      return
+
+    side_effect_trace = []
+    tracer = object()
+
+    def expression_with_side_effects():
+      side_effect_trace.append(tracer)
+      return 'test message'
+
+    with self.assertRaisesRegexp(AssertionError, 'test message'):
+      exceptions.assert_stmt(False, expression_with_side_effects)
+    self.assertListEqual(side_effect_trace, [tracer])
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 0bb68afa38cf5c45232e85fb09186e01055e4d11 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 08:01:45 -0700
Subject: [PATCH 0683/1085] Fix number of outputs when importing tensorflow
 GraphDef.

Sometimes the actual number of outputs is dictated by one of the attributes of the NodeDef.

PiperOrigin-RevId: 216530696
---
 .../contrib/lite/toco/import_tensorflow.cc    | 22 ++++++++++---
 .../lite/toco/import_tensorflow_test.cc       | 31 +++++++++++++++++++
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 6b195cc992..ff67b306e0 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1122,13 +1122,27 @@ tensorflow::Status ConvertUnsupportedOperator(
     op->inputs.push_back(node.input(i));
   }
 
-  // Parse outputs.
-  op->outputs.push_back(node.name());  // Implicit :0.
+  // Parse outputs. Name them after the node's name, plus an ordinal suffix.
+  // Note that some outputs are to be multipled by a named attribute.
   const tensorflow::OpDef* op_def = nullptr;
   if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
-    for (int i = 1; i < op_def->output_arg_size(); ++i) {
-      op->outputs.push_back(absl::StrCat(node.name(), ":", i));
+    int next_output = 0;
+    for (int i = 0; i < op_def->output_arg_size(); ++i) {
+      string multiples = op_def->output_arg(i).number_attr();
+      int num_outputs = multiples.empty() ? 1 : GetIntAttr(node, multiples);
+      LOG(INFO) << "dddddddd " << num_outputs;
+      for (int j = 0; j < num_outputs; ++j) {
+        if (next_output == 0) {
+          op->outputs.push_back(node.name());  // Implicit :0.
+        } else {
+          op->outputs.push_back(absl::StrCat(node.name(), ":", next_output));
+        }
+        ++next_output;
+      }
     }
+  } else {
+    LOG(INFO) << "nodef!!!!!!!!!!! ";
+    op->outputs.push_back(node.name());  // Implicit :0.
   }
 
   // Parse if the op supports quantization
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index cd9a144b52..0767221b83 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -55,6 +55,13 @@ Status ImportNode(const NodeDef& node, Model* model) {
                                         converter);
 }
 
+Status ImportFlexNode(const NodeDef& node, Model* model) {
+  // Empty converter => all nodes are flex nodes.
+  const auto converter = internal::ConverterMapType();
+  return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model,
+                                        converter);
+}
+
 Status ImportNode(const NodeDef& node) {
   Model model;
   return ImportNode(node, &model);
@@ -299,5 +306,29 @@ TEST(ImportTest, UnsupportedOpWithWildcardOutputShapes) {
   ASSERT_TRUE(op->output_shapes.empty());
 }
 
+TEST(ImportTest, UnsupportedOpWithMultipleOutputs) {
+  NodeDef node = BuildNode("Unpack", {});
+
+  // Unpack's OpDef has a single output which gets multiplied based on the
+  // "num" attribute of the NodeDef.
+  AttrValue value_attr;
+  SetAttrValue(3, &value_attr);  // 3 outputs.
+  (*node.mutable_attr())["num"] = value_attr;
+
+  Model model;
+  EXPECT_TRUE(ImportFlexNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast<const TensorFlowUnsupportedOperator*>(
+          model.operators[0].get());
+
+  ASSERT_EQ(op->outputs.size(), 3);
+  ASSERT_EQ(op->outputs[0], "Node1");
+  ASSERT_EQ(op->outputs[1], "Node1:1");
+  ASSERT_EQ(op->outputs[2], "Node1:2");
+}
+
 }  // namespace
 }  // namespace toco
-- 
GitLab


From 1ae0a45a5de65ab4ae6def232da016e7ee32773c Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 10 Oct 2018 08:12:24 -0700
Subject: [PATCH 0684/1085] [tf.data] `Dataset.make_one_shot_iterator()`
 inherits the random seed from the calling graph.

This change makes a subtle difference to the behavior of existing
programs that create multiple iterators. Previously, one-shot
iterators would not inherit the graph seed, and so their values would
be non-deterministic (unless explicit seeds were set). After this
change, an iterator will inherit its seed from the outer
graph. Multiple one-shot iterators created from the same dataset will
inherit different seeds, matching the semantics of creating multiple
ops with the same graph seed.

PiperOrigin-RevId: 216532256
---
 .../kernel_tests/shuffle_dataset_op_test.py   | 32 +++++++++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     | 13 ++++++++
 2 files changed, 45 insertions(+)

diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
index 8694f58a24..cad28f860e 100644
--- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py
@@ -241,6 +241,38 @@ class ShuffleDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     self.assertAllEqual(results[0], results[1])
 
+  @parameterized.named_parameters(
+      ("ReshuffleOneShot", True, False),
+      ("ReshuffleInitializable", True, True),
+      ("NoReshuffleOneShot", False, False),
+      ("NoReshuffleInitializable", False, True),
+  )
+  def testMultipleIterators(self, reshuffle, initializable):
+    with ops.Graph().as_default() as g:
+      dataset = dataset_ops.Dataset.range(100).shuffle(
+          10, reshuffle_each_iteration=reshuffle).repeat(3)
+
+      if initializable:
+        iterators = [dataset.make_initializable_iterator() for _ in range(2)]
+      else:
+        iterators = [dataset.make_one_shot_iterator() for _ in range(2)]
+
+      results = []
+      with self.session(graph=g) as sess:
+        for iterator in iterators:
+          if initializable:
+            sess.run(iterator.initializer)
+          next_element = iterator.get_next()
+          run_results = []
+          for _ in range(300):
+            run_results.append(sess.run(next_element))
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(next_element)
+
+          results.append(run_results)
+
+        self.assertNotEqual(results[0], results[1])
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 6195747671..cdb883cac9 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -34,6 +34,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed as core_random_seed
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
@@ -178,10 +179,21 @@ class Dataset(object):
     """
     if context.executing_eagerly():
       return iterator_ops.EagerIterator(self)
+
+    graph_level_seed, op_level_seed = core_random_seed.get_seed(None)
+
     # NOTE(mrry): We capture by value here to ensure that `_make_dataset()` is
     # a 0-argument function.
     @function.Defun(capture_by_value=True)
     def _make_dataset():
+      # NOTE(mrry): `Defun` does not capture the graph-level seed from the
+      # enclosing graph, so if a graph-level seed is present we set the local
+      # graph seed based on a combination of the graph- and op-level seeds.
+      if graph_level_seed is not None:
+        assert op_level_seed is not None
+        core_random_seed.set_random_seed(
+            (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1))
+
       dataset = self
       options = self.options()
       static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
@@ -2265,6 +2277,7 @@ class ShuffleDataset(UnaryDataset):
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
     self._seed, self._seed2 = random_seed.get_seed(seed)
+
     if reshuffle_each_iteration is None:
       self._reshuffle_each_iteration = True
     else:
-- 
GitLab


From afcc1a4452de7898391683f7cbb16ff548f839a1 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 10 Oct 2018 08:17:09 -0700
Subject: [PATCH 0685/1085] Allow the executor type for a function to be
 specified as an attr on a function.

This change complements the existing `InstantiateOptions::executor_type`
option, which takes precedence over the attr if both are provided. It
enables the choice of executor to be separated from both the calling
op implementation and the function definition, which simplifies the
use of custom executors in operations that take a function as an attr
(e.g.) `tf.data` and the functional control-flow ops.

PiperOrigin-RevId: 216532778
---
 tensorflow/core/common_runtime/function.cc    |  2 +-
 .../core/common_runtime/function_test.cc      | 38 +++++++++++++++++--
 tensorflow/core/framework/function.cc         | 24 ++++++++++--
 tensorflow/core/framework/function.h          |  7 ++++
 4 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 472865ca43..e0e5f4a215 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -551,7 +551,7 @@ Status FunctionLibraryRuntimeImpl::Instantiate(
       item->func_graph = fbody;
       item->overlay_lib = options.overlay_lib;
       item->instantiation_counter = 1;
-      item->executor_type = options.executor_type;
+      item->executor_type = ExecutorType(options, attrs);
       items_.emplace(next_handle_, std::unique_ptr<Item>(item));
       next_handle_++;
     }
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 7bab9be9a6..716167132b 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -584,7 +584,28 @@ TEST_F(FunctionLibraryRuntimeTest, ExecutorFactory) {
              "Internal: This is a dummy.");
   }
 
-  // Test that non-existent exector types trigger an error.
+  // Test that a non-default executor factory can be invoked via an attr.
+  {
+    FunctionLibraryRuntime::InstantiateOptions options;
+    HasError(InstantiateAndRun(flr0_, "XTimesTwo",
+                               {{"T", DT_FLOAT}, {"_executor", "DUMMY"}},
+                               options, {x}, {&y}),
+             "Internal: This is a dummy.");
+  }
+
+  // Test that a non-default executor factory specified via an
+  // `InstantiateOptions` supersedes the attr when both are present.
+  {
+    FunctionLibraryRuntime::InstantiateOptions options;
+    options.executor_type = "DUMMY";
+    HasError(
+        InstantiateAndRun(flr0_, "XTimesTwo",
+                          {{"T", DT_FLOAT}, {"_executor", "UNKNOWN_EXECUTOR"}},
+                          options, {x}, {&y}),
+        "Internal: This is a dummy.");
+  }
+
+  // Test that non-existent executor types trigger an error.
   {
     FunctionLibraryRuntime::InstantiateOptions options;
     options.executor_type = "UNKNOWN_EXECUTOR";
@@ -593,6 +614,15 @@ TEST_F(FunctionLibraryRuntimeTest, ExecutorFactory) {
              "Not found: No executor factory registered for the given executor "
              "type: UNKNOWN_EXECUTOR");
   }
+  {
+    FunctionLibraryRuntime::InstantiateOptions options;
+    HasError(
+        InstantiateAndRun(flr0_, "XTimesTwo",
+                          {{"T", DT_FLOAT}, {"_executor", "UNKNOWN_EXECUTOR"}},
+                          options, {x}, {&y}),
+        "Not found: No executor factory registered for the given executor "
+        "type: UNKNOWN_EXECUTOR");
+  }
 }
 
 TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) {
@@ -869,7 +899,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) {
     Scope s = Scope::NewRootScope();
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto x4_x2_scale = ops::Const<float>(
-        s.WithOpName("x4/x2/scale/_12__cf__10")
+        s.WithOpName("x4/x2/scale/_12__cf__13")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale);
@@ -1076,13 +1106,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) {
     auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0);
     auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1);
     auto scale = ops::Const(
-        s.WithOpName("scale/_6__cf__15")
+        s.WithOpName("scale/_6__cf__18")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         2.0f);
     auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale);
     auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x);
     auto const0 = ops::Const(
-        s.WithOpName("Func/_1/sy/_5__cf__14")
+        s.WithOpName("Func/_1/sy/_5__cf__17")
             .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"),
         0, {0});
     auto func1_rx = ops::internal::BroadcastGradientArgs(
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index 20f957190b..aa2f274752 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -796,12 +796,28 @@ uint64 FunctionDefHash(const FunctionDef& fdef) {
   return h;
 }
 
+static constexpr const char* const kExecutorAttr = "_executor";
+
+/* static */
+string FunctionLibraryRuntime::ExecutorType(const InstantiateOptions& options,
+                                            AttrSlice attrs) {
+  if (!options.executor_type.empty()) {
+    return options.executor_type;
+  } else if (const AttrValue* executor_attr = attrs.Find(kExecutorAttr)) {
+    return executor_attr->s();
+  } else {
+    return string();
+  }
+}
+
 string Canonicalize(const string& funcname, AttrSlice attrs,
                     const FunctionLibraryRuntime::InstantiateOptions& options) {
   std::vector<string> entries;
   entries.reserve(options.target.empty() ? attrs.size() : (attrs.size() + 1));
   for (auto p : attrs) {
-    entries.push_back(strings::StrCat(p.first, "=", Print(p.second)));
+    if (p.first != kExecutorAttr) {
+      entries.push_back(strings::StrCat(p.first, "=", Print(p.second)));
+    }
   }
   if (!options.target.empty()) {
     entries.push_back(
@@ -815,9 +831,9 @@ string Canonicalize(const string& funcname, AttrSlice attrs,
     entries.push_back(
         strings::StrCat("_state_handle", "=", options.state_handle));
   }
-  if (!options.executor_type.empty()) {
-    entries.push_back(
-        strings::StrCat("_executor_type", "=", options.executor_type));
+  string executor_type = FunctionLibraryRuntime::ExecutorType(options, attrs);
+  if (!executor_type.empty()) {
+    entries.push_back(strings::StrCat(kExecutorAttr, "=", executor_type));
   }
   std::sort(entries.begin(), entries.end());
   return strings::StrCat(funcname, "[", str_util::Join(entries, ","), "]");
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 4d6d68e214..d4beca7e11 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -609,6 +609,13 @@ class FunctionLibraryRuntime {
   virtual Status Clone(std::unique_ptr<FunctionLibraryDefinition>* out_lib_def,
                        std::unique_ptr<ProcessFunctionLibraryRuntime>* out_pflr,
                        FunctionLibraryRuntime** out_flr) = 0;
+
+  // Returns the name of the executor class (in the sense of
+  // `ExecutorFactory::GetFactory()`) that will be used based on the given
+  // dynamic `options` and static `attrs`. If none is specified, this method
+  // will return an empty string, which leaves the decision up to the runtime.
+  static string ExecutorType(const InstantiateOptions& options,
+                             AttrSlice attrs);
 };
 
 // Returns a canonicalized string for the instantiation of the
-- 
GitLab


From f146d586bf93b918d6f3e014b230abee49170a52 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 08:24:25 -0700
Subject: [PATCH 0686/1085] Use lambdas when converting ifexps, since they are
 now supported.

PiperOrigin-RevId: 216533613
---
 .../converters/conditional_expressions.py     | 97 +------------------
 .../python/autograph/operators/__init__.py    |  1 +
 2 files changed, 4 insertions(+), 94 deletions(-)

diff --git a/tensorflow/python/autograph/converters/conditional_expressions.py b/tensorflow/python/autograph/converters/conditional_expressions.py
index 40728f555d..a4eef7e6a1 100644
--- a/tensorflow/python/autograph/converters/conditional_expressions.py
+++ b/tensorflow/python/autograph/converters/conditional_expressions.py
@@ -19,109 +19,18 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.autograph.core import converter
-from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import templates
-from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
-
-
-class _FunctionDefs(object):
-
-  def __init__(self):
-    self.nodes = []
-
-
-class _Statement(object):
-
-  def __init__(self):
-    self.scope = None
 
 
 class ConditionalExpressionTransformer(converter.Base):
   """Converts contitional expressions to functional form."""
 
-  def _postprocess_statement(self, node):
-    """Inserts any separate functions that node may use."""
-    replacements = []
-    for def_node in self.state[_FunctionDefs].nodes:
-      replacements.extend(def_node)
-    replacements.append(node)
-    node = replacements
-    # The corresponding enter is called by self.visit_block (see _process_block)
-    self.state[_FunctionDefs].exit()
-    return node, None
-
-  def _create_branch(self, expr, name_stem):
-    scope = self.state[_Statement].scope
-    name = self.ctx.namer.new_symbol(name_stem, scope.referenced)
-    template = """
-      def name():
-        return expr,
-    """
-    node = templates.replace(template, name=name, expr=expr)
-    self.state[_FunctionDefs].nodes.append(node)
-    return name
-
   def visit_IfExp(self, node):
-    if anno.hasanno(node.test, anno.Basic.QN):
-      name_root = anno.getanno(node.test, anno.Basic.QN).ssf()
-    else:
-      name_root = 'ifexp'
-
-    true_fn_name = self._create_branch(node.body, '%s_true' % name_root)
-    false_fn_name = self._create_branch(node.orelse, '%s_false' % name_root)
-
     return templates.replace_as_expression(
-        'ag__.utils.run_cond(test, true_fn_name, false_fn_name)',
+        'ag__.if_stmt(test, lambda: true_expr, lambda: false_expr)',
         test=node.test,
-        true_fn_name=true_fn_name,
-        false_fn_name=false_fn_name)
-
-  def _process_block(self, scope, block):
-    self.state[_Statement].enter()
-    self.state[_Statement].scope = scope
-    block = self.visit_block(
-        block,
-        before_visit=self.state[_FunctionDefs].enter,
-        after_visit=self._postprocess_statement)
-    self.state[_Statement].exit()
-    return block
-
-  def visit_FunctionDef(self, node):
-    node.args = self.generic_visit(node.args)
-    node.decorator_list = self.visit_block(node.decorator_list)
-    node.body = self._process_block(
-        anno.getanno(node, anno.Static.SCOPE), node.body)
-    return node
-
-  def visit_For(self, node):
-    node.target = self.visit(node.target)
-    node.body = self._process_block(
-        anno.getanno(node, NodeAnno.BODY_SCOPE), node.body)
-    node.orelse = self._process_block(
-        anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse)
-    return node
-
-  def visit_While(self, node):
-    node.test = self.visit(node.test)
-    node.body = self._process_block(
-        anno.getanno(node, NodeAnno.BODY_SCOPE), node.body)
-    node.orelse = self._process_block(
-        anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse)
-    return node
-
-  def visit_If(self, node):
-    node.test = self.visit(node.test)
-    node.body = self._process_block(
-        anno.getanno(node, NodeAnno.BODY_SCOPE), node.body)
-    node.orelse = self._process_block(
-        anno.getanno(node, NodeAnno.ORELSE_SCOPE), node.orelse)
-    return node
-
-  def visit_With(self, node):
-    node.items = self.visit_block(node.items)
-    node.body = self._process_block(
-        anno.getanno(node, NodeAnno.BODY_SCOPE), node.body)
-    return node
+        true_expr=node.body,
+        false_expr=node.orelse)
 
 
 def transform(node, ctx):
diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py
index 53f4b0ddc8..8ba2558ac2 100644
--- a/tensorflow/python/autograph/operators/__init__.py
+++ b/tensorflow/python/autograph/operators/__init__.py
@@ -38,6 +38,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.autograph.operators.control_flow import for_stmt
+from tensorflow.python.autograph.operators.control_flow import if_stmt
 from tensorflow.python.autograph.operators.control_flow import while_stmt
 from tensorflow.python.autograph.operators.data_structures import list_append
 from tensorflow.python.autograph.operators.data_structures import list_pop
-- 
GitLab


From 131f6f8429ffa0511a3d5a6a595843d3d96ec942 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 10 Oct 2018 08:28:08 -0700
Subject: [PATCH 0687/1085] cond_v2: raise an error if pred is a Python bool.

This is to match the existing behavior of tf.cond.

PiperOrigin-RevId: 216534084
---
 .../python/kernel_tests/cond_v2_test.py       | 34 +++++++++++--------
 .../kernel_tests/control_flow_ops_py_test.py  |  1 -
 tensorflow/python/ops/cond_v2_impl.py         |  3 ++
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index a424a0f219..0e7c2f8ae6 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -670,7 +670,7 @@ class CondV2CollectionTest(test.TestCase):
           y_const = constant_op.constant(ops.get_collection("y")[0])
           return math_ops.add(x_const, y_const)
 
-        cnd = cond_v2.cond_v2(True, fn, fn)
+        cnd = cond_v2.cond_v2(constant_op.constant(True), fn, fn)
         self.assertEquals(cnd.eval(), 7)
 
   def testCollectionTensorValueAccessInCond(self):
@@ -705,9 +705,7 @@ class CondV2CollectionTest(test.TestCase):
           z = math_ops.add(x, y)
           return math_ops.mul(x, z)
 
-        cnd = cond_v2.cond_v2(
-            True, true_fn,
-            false_fn)
+        cnd = cond_v2.cond_v2(constant_op.constant(True), true_fn, false_fn)
         self.assertEquals(cnd.eval(), 14)
 
         read_z_collection = ops.get_collection("z")
@@ -780,10 +778,12 @@ class CondV2ContainerTest(test.TestCase):
           return constant_op.constant(6.0)
 
         with ops.container("l1"):
-          cnd_true = cond_v2.cond_v2(True, true_fn, false_fn)
+          cnd_true = cond_v2.cond_v2(
+              constant_op.constant(True), true_fn, false_fn)
           self.assertEquals(cnd_true.eval(), 2)
 
-          cnd_false = cond_v2.cond_v2(False, true_fn, false_fn)
+          cnd_false = cond_v2.cond_v2(
+              constant_op.constant(False), true_fn, false_fn)
           self.assertEquals(cnd_false.eval(), 6)
 
           v4 = variables.Variable([3])
@@ -812,7 +812,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.colocate_with(a.op):
-          self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3)
+          self.assertEquals(
+              cond_v2.cond_v2(constant_op.constant(True), fn, fn).eval(), 3)
 
         def fn2():
           c = constant_op.constant(3.0)
@@ -821,7 +822,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
         with ops.colocate_with(a.op):
           with ops.colocate_with(b.op):
-            self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
+            self.assertEquals(
+                cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3)
 
   def testColocateWithInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
@@ -837,7 +839,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
             return c
 
         with ops.colocate_with(a.op):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
+          self.assertEquals(
+              cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3)
 
           d = constant_op.constant([2.0], name="d")
           self.assertEqual([b"loc:@a"], d.op.colocation_groups())
@@ -858,7 +861,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           with ops.colocate_with(b.op):
             c = math_ops.add(a, a, name="c")
           return c
-        out_cond_2 = cond_v2.cond_v2(True, fn, fn)
+        out_cond_2 = cond_v2.cond_v2(constant_op.constant(True), fn, fn)
 
         run_options = config_pb2.RunOptions(output_partition_graphs=True)
         run_metadata = config_pb2.RunMetadata()
@@ -880,7 +883,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.device("/device:CPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn, fn).eval(), 3)
+          self.assertEquals(
+              cond_v2.cond_v2(constant_op.constant(True), fn, fn).eval(), 3)
 
         def fn2():
           c = constant_op.constant(3.0)
@@ -888,7 +892,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
           return c
 
         with ops.device("/device:GPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
+          self.assertEquals(
+              cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3)
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
@@ -902,7 +907,8 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
             return c
 
         with ops.device("/device:CPU:0"):
-          self.assertEquals(cond_v2.cond_v2(True, fn2, fn2).eval(), 3)
+          self.assertEquals(
+              cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3)
 
           d = constant_op.constant(4.0)
           self.assertEqual("/device:CPU:0", d.op.device)
@@ -921,7 +927,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
         with ops.device("/device:CPU:0"):
           a = constant_op.constant([2.0], name="a")
-          out_cond_2 = cond_v2.cond_v2(True, fn, fn)
+          out_cond_2 = cond_v2.cond_v2(constant_op.constant(True), fn, fn)
 
         run_options = config_pb2.RunOptions(output_partition_graphs=True)
         run_metadata = config_pb2.RunMetadata()
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index a5f85b97f7..46b8b10e90 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -333,7 +333,6 @@ class ControlFlowTest(test.TestCase):
       with self.assertRaisesOpError("has inputs from different frames"):
         res.eval(feed_dict={data: 1.0})
 
-  @test_util.disable_control_flow_v2("b/113294340")
   def testCondBool(self):
     values = constant_op.constant(10)
     fn1 = lambda: math_ops.add(values, 1)
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
index c9aa4d4889..81d9cba042 100644
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ b/tensorflow/python/ops/cond_v2_impl.py
@@ -52,6 +52,9 @@ _gradients_impl = None
 
 def cond_v2(pred, true_fn, false_fn, name="cond"):
   """Like tf.cond, except emits a single If op."""
+  if isinstance(pred, bool):
+    raise TypeError("pred must not be a Python bool", pred)
+
   if not name:
     name = "cond"
 
-- 
GitLab


From 79af30d357fbe0869e163e1d9dce0cb869b3724f Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 10 Oct 2018 08:36:36 -0700
Subject: [PATCH 0688/1085] [Grappler] Add RemoveStackStridedSliceSameAxis
 optimizer.

// Replace operations of the form:
//    x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i,...]
// with
//    a_i
// when the strided slice index `i` is applied in the k'th axis.
//
// Similarly, replace operations of the form:
//    x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i:i+1,...]
// with
//    expand_dims(a_i, axis=k)
//

PiperOrigin-RevId: 216535346
---
 .../optimizers/arithmetic_optimizer.cc        | 295 +++++++++++++++++-
 .../optimizers/arithmetic_optimizer.h         |   3 +
 .../optimizers/arithmetic_optimizer_test.cc   | 211 +++++++++++++
 .../optimizers/graph_optimizer_stage.h        |   4 +
 .../optimizers/graph_optimizer_stage_test.cc  |   3 +
 5 files changed, 515 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 7d5014ee0a..0c2686a419 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -46,6 +46,7 @@ limitations under the License.
 #include "tensorflow/core/platform/tensor_coding.h"
 #include "tensorflow/core/util/device_name_utils.h"
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
+#include "tensorflow/core/util/strided_slice_op.h"
 
 using tensorflow::strings::StrCat;
 
@@ -157,6 +158,14 @@ void SetSourceDataType(DataType dtype, NodeDef* node) {
   SetDataTypeToAttr(dtype, SourceDataTypeAttrName(*node), node);
 }
 
+Status CheckAttrExists(const NodeDef& node, const string& key) {
+  if (node.attr().count(key) == 0) {
+    return errors::InvalidArgument("Node '", node.name(), "'lacks '", key,
+                                   "' attr: ", node.DebugString());
+  }
+  return Status::OK();
+}
+
 NodeDef* GetTailOfValuePreservingChain(
     const NodeDef& node, const NodeMap& node_map,
     const std::unordered_set<string>& nodes_to_preserve) {
@@ -2902,6 +2911,284 @@ class UnaryOpsComposition : public ArithmeticOptimizerStage {
   std::unordered_set<string> fused_nodes_;
 };
 
+// Replace operations of the form:
+//    x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i,...]
+// with
+//    a_i
+// when the strided slice index `i` is applied in the k'th axis.
+//
+// Similarly, replace operations of the form:
+//    x = stack((a_0, a_1, ..., a_{n-1}), axis=k)[:,...,i:i+1,...]
+// with
+//    expand_dims(a_i, axis=k)
+//
+// TODO(ebrevdo): Extend to also replace operations of the form
+//    concat((a_0, a_1, ..., ), axis=k)[:, ..., s_i:s_{i+1}, ...]
+// with
+//    a_i,
+// when
+//    s_i = cumsum(shape(a)[k] for a in (a_0, ...,))[i]
+// and slicing is in the k'th axis.
+class RemoveStackStridedSliceSameAxis : public ArithmeticOptimizerStage {
+ public:
+  explicit RemoveStackStridedSliceSameAxis(
+      const GraphOptimizerContext& ctx,
+      const ArithmeticOptimizerContext& ctx_ext)
+      : ArithmeticOptimizerStage("RemoveStackStridedSliceSameAxis", ctx,
+                                 ctx_ext) {}
+  ~RemoveStackStridedSliceSameAxis() override = default;
+
+  bool IsSupported(const NodeDef* node) const override {
+    return IsStridedSlice(*node);
+  }
+
+  Status TrySimplify(NodeDef* node, string* simplified_node_name) override {
+    // *node is a StridedSlice NodeDef.
+    NodeDef* pack;
+
+    // Get the input and see if it's a Pack op.
+    TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &pack));
+    if (!IsPack(*pack)) return Status::OK();
+
+    bool return_early;
+    PartialTensorShape pack_output_shape;
+    int pack_axis;
+    TF_RETURN_IF_ERROR(
+        CheckInputs(node, pack, &pack_output_shape, &pack_axis, &return_early));
+    if (return_early) return Status::OK();
+
+    int slice_start_value;
+    bool found;
+    TF_RETURN_IF_ERROR(GetSliceAxis(node, pack, pack_output_shape, pack_axis,
+                                    &slice_start_value, &found));
+    if (!found) return Status::OK();
+
+    return RewriteGraph(node, pack, slice_start_value, pack_axis,
+                        simplified_node_name);
+  }
+
+ protected:
+  bool IsReallyConstant(const NodeDef& node) const {
+    if (!IsConstant(node)) {
+      return false;
+    }
+    // If the node is fed it's not constant anymore.
+    return ctx().feed_nodes->find(node.name()) == ctx().feed_nodes->end();
+  }
+
+  bool GetConstantAsInt64(const NodeDef& node, DataType dtype,
+                          std::vector<int64>* values) {
+    if (dtype == DT_INT32) {
+      std::vector<int32> values_int32;
+      if (!ValuesFromConstNode(node, &values_int32)) {
+        return false;
+      }
+      std::copy(values_int32.begin(), values_int32.end(),
+                std::inserter(*values, values->begin()));
+      return true;
+    } else {
+      return ValuesFromConstNode(node, values);
+    }
+  }
+
+  Status CheckInputs(const NodeDef* node, const NodeDef* pack,
+                     PartialTensorShape* pack_output_shape, int* pack_axis,
+                     bool* return_early) {
+    *return_early = true;
+    TF_RETURN_IF_ERROR(CheckAttrExists(*pack, "axis"));
+
+    *pack_axis = pack->attr().at("axis").i();
+    auto slice_properties =
+        ctx().graph_properties->GetInputProperties(node->name());
+    *pack_output_shape = slice_properties[0].shape();
+    if (pack_output_shape->unknown_rank()) {
+      return Status::OK();
+    }
+    const int pack_input_rank = pack_output_shape->dims() - 1;
+    if (*pack_axis < 0) {
+      // The ndims of any input into Pack op is its output ndims - 1.
+      *pack_axis += pack_input_rank;
+    }
+    if (*pack_axis < 0 || *pack_axis >= pack_input_rank) {
+      return errors::InvalidArgument(
+          "Pack node (", pack->name(),
+          ") axis attribute is out of bounds: ", pack->attr().at("axis").i());
+    }
+    *return_early = false;
+    return Status::OK();
+  }
+
+  Status GetSliceAxis(const NodeDef* node, const NodeDef* pack,
+                      const PartialTensorShape& pack_output_shape,
+                      int pack_axis, int* slice_start_value, bool* found) {
+    *found = false;
+    for (auto key : {"begin_mask", "end_mask", "ellipsis_mask", "new_axis_mask",
+                     "shrink_axis_mask"}) {
+      TF_RETURN_IF_ERROR(CheckAttrExists(*node, key));
+    }
+
+    const int begin_mask = node->attr().at("begin_mask").i();
+    const int end_mask = node->attr().at("end_mask").i();
+    const int ellipsis_mask = node->attr().at("ellipsis_mask").i();
+    const int new_axis_mask = node->attr().at("new_axis_mask").i();
+    const int shrink_axis_mask = node->attr().at("shrink_axis_mask").i();
+
+    // Check that the StridedSlice is one of these at pack_axis:
+    //   [..., i, ...]
+    //   [..., i:i+1, ...]
+    //   [..., :1, ...]
+    //   [..., -1:, ...]
+    ///  [..., s_{pack_axis}-1:, ...]
+    NodeDef* slice_begin;
+    NodeDef* slice_end;
+    NodeDef* slice_strides;
+    TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &slice_begin));
+    TF_RETURN_IF_ERROR(GetInputNode(node->input(2), &slice_end));
+    TF_RETURN_IF_ERROR(GetInputNode(node->input(3), &slice_strides));
+
+    for (const auto* n : {slice_begin, slice_end, slice_strides}) {
+      if (!IsReallyConstant(*n)) return Status::OK();
+    }
+
+    Tensor slice_begin_t;
+    Tensor slice_end_t;
+    Tensor slice_strides_t;
+
+    TF_RETURN_IF_ERROR(CheckAttrExists(*slice_begin, "value"));
+    TF_RETURN_IF_ERROR(CheckAttrExists(*slice_end, "value"));
+
+    if (!slice_begin_t.FromProto(slice_begin->attr().at("value").tensor())) {
+      return Status::OK();
+    }
+    if (!slice_end_t.FromProto(slice_end->attr().at("value").tensor())) {
+      return Status::OK();
+    }
+    if (!slice_strides_t.FromProto(
+            slice_strides->attr().at("value").tensor())) {
+      return Status::OK();
+    }
+    TensorShape processing_shape;
+    TensorShape final_shape;
+    bool is_identity;
+    bool is_simple_slice;
+    bool slice_dim0;
+    gtl::InlinedVector<int64, 4> slice_begin_vec;
+    gtl::InlinedVector<int64, 4> slice_end_vec;
+    gtl::InlinedVector<int64, 4> slice_strides_vec;
+    TF_RETURN_IF_ERROR(ValidateStridedSliceOp(
+        &slice_begin_t, &slice_end_t, slice_strides_t, pack_output_shape,
+        begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask,
+        &processing_shape, &final_shape, &is_identity, &is_simple_slice,
+        &slice_dim0, &slice_begin_vec, &slice_end_vec, &slice_strides_vec));
+
+    if (!is_simple_slice) return Status::OK();
+
+    int begin_index = -1;
+    int64 begin_value = 0;
+    for (int i = 0; i < slice_begin_vec.size(); ++i) {
+      const int64 v = slice_begin_vec[i];
+      if (v != 0) {
+        if (begin_index != -1) {
+          // At least two start values that are nonzero.
+          return Status::OK();
+        }
+        begin_index = i;
+        begin_value = v;
+      }
+    }
+
+    int end_index = -1;
+    int64 end_value = 0;
+    for (int i = 0; i < slice_end_vec.size(); ++i) {
+      const int64 v = slice_end_vec[i];
+      if (v != pack_output_shape.dim_size(i)) {
+        if (end_index != -1) {
+          // At least two end values that are nonzero.
+          return Status::OK();
+        }
+        end_index = i;
+        end_value = v;
+      }
+    }
+
+    if (begin_index == -1 && end_index == -1) return Status::OK();
+    if (begin_index != -1 && end_index != -1 && begin_index != end_index) {
+      // Somehow received different axes for begin/end slicing
+      return Status::OK();
+    }
+    const int slice_axis = (begin_index == -1) ? end_index : begin_index;
+    if (slice_axis != pack_axis) {
+      // Not slicing on the same axis as the Pack op.
+      return Status::OK();
+    }
+    *slice_start_value = (begin_index == -1) ? 0 : begin_value;
+    const int64 slice_end_value =
+        (end_index == -1) ? pack_output_shape.dim_size(slice_axis) : end_value;
+    if (slice_end_value != *slice_start_value + 1) {
+      // Not slicing a single value out.
+      return Status::OK();
+    }
+
+    if (*slice_start_value < 0 || *slice_start_value >= pack->input_size()) {
+      return errors::InvalidArgument(
+          "Node ", node->name(), " requested invalid slice index ",
+          *slice_start_value, " on axis ", slice_axis,
+          " from tensor of shape: ", pack_output_shape.DebugString());
+    }
+
+    *found = true;  // slice_start_value is valid.
+    return Status::OK();
+  }
+
+  Status RewriteGraph(const NodeDef* node, const NodeDef* pack,
+                      int slice_start_value, int pack_axis,
+                      string* simplified_node_name) {
+    OpInfo::TensorProperties input_slice_properties;
+    NodeDef* input_slice;
+    TF_RETURN_IF_ERROR(
+        GetInputNode(pack->input(slice_start_value), &input_slice));
+    TF_RETURN_IF_ERROR(GetTensorProperties(pack->input(slice_start_value),
+                                           &input_slice_properties));
+    PartialTensorShape input_slice_shape(input_slice_properties.shape());
+
+    OpInfo::TensorProperties output_properties;
+    TF_RETURN_IF_ERROR(GetTensorProperties(
+        strings::StrCat(node->name(), ":", 0), &output_properties));
+    PartialTensorShape output_shape(output_properties.shape());
+    NodeDef* output =
+        AddEmptyNode(OptimizedNodeName(ParseNodeScopeAndName(node->name())));
+    if (input_slice_shape.IsCompatibleWith(output_shape)) {
+      output->set_op("Identity");
+      output->set_device(node->device());
+      SetDataTypeToAttr(output_properties.dtype(), "T", output);
+      output->add_input(input_slice->name());
+    } else {
+      NodeDef* axis = AddEmptyNode(
+          OptimizedNodeName(ParseNodeScopeAndName(node->name()), "Axis"));
+      axis->set_op("Const");
+      axis->set_device(node->device());
+      auto axis_attr = axis->mutable_attr();
+      SetDataTypeToAttr(DT_INT32, "dtype", axis);
+      auto* axis_t = (*axis_attr)["value"].mutable_tensor();
+      axis_t->set_dtype(DT_INT32);
+      axis_t->add_int_val(pack_axis);
+      AddToOptimizationQueue(axis);
+      output->set_op("ExpandDims");
+      output->set_device(node->device());
+      SetDataTypeToAttr(output_properties.dtype(), "T", output);
+      output->add_input(input_slice->name());
+      output->add_input(axis->name());
+    }
+
+    // Copy dependencies over.
+    ForwardControlDependencies(output, {node, pack});
+    AddToOptimizationQueue(output);
+    *simplified_node_name = output->name();
+
+    return Status::OK();
+  }
+};
+
 }  // namespace
 
 class UniqueNodes {
@@ -3132,7 +3419,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) {
 
   const GraphOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_,
                                   graph_properties_.get(), node_map_.get(),
-                                  opt_level_);
+                                  &feed_nodes_, opt_level_);
   const ArithmeticOptimizerContext ctx_ext(&nodes_to_simplify);
 
   // Stop pipeline after first stage returning non-empty simplified tensor name.
@@ -3186,6 +3473,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) {
     pipeline.AddStage<ConvertExpm1Stage>(ctx, ctx_ext);
   if (options_.unary_ops_composition)
     pipeline.AddStage<UnaryOpsComposition>(ctx, ctx_ext);
+  if (options_.remove_stack_strided_slice_same_axis)
+    pipeline.AddStage<RemoveStackStridedSliceSameAxis>(ctx, ctx_ext);
 
   VLOG(1) << "Run " << pipeline.NumStages() << " arithmetic optimizer stages: "
           << str_util::Join(pipeline.StageNames(), ", ");
@@ -3249,6 +3538,10 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   optimized_graph_ = &optimized_item.graph;
   node_map_.reset(new NodeMap(optimized_graph_));
 
+  for (const auto& feed : item.feed) {
+    feed_nodes_.insert(NodeName(feed.first));
+  }
+
   // Disable restricted graph rewrites.
   options_.unary_ops_composition &=
       item.allowed_optimizations.non_differentiable_rewrites;
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index d457eb6d21..bb56f61e30 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
@@ -79,6 +80,7 @@ class ArithmeticOptimizer : public GraphOptimizer {
     bool convert_log1p = true;
     bool convert_expm1 = true;
     bool unary_ops_composition = true;
+    bool remove_stack_strided_slice_same_axis = false;
 
     // Choose which arithmetic optimizer stages will be enabled for a given
     // optimization level by default.
@@ -128,6 +130,7 @@ class ArithmeticOptimizer : public GraphOptimizer {
   std::unique_ptr<NodeMap> node_map_;
   std::unique_ptr<GraphProperties> graph_properties_;
   GraphDef* optimized_graph_ = nullptr;  // Not owned.
+  gtl::FlatSet<string> feed_nodes_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 77f3c64c65..d091b26b65 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -288,6 +288,12 @@ class ArithmeticOptimizerTest : public GrapplerTest {
     DisableAllStages(optimizer);
     optimizer->options_.unary_ops_composition = true;
   }
+
+  void EnableOnlyRemoveStackStridedSliceSameAxis(
+      ArithmeticOptimizer* optimizer) {
+    DisableAllStages(optimizer);
+    optimizer->options_.remove_stack_strided_slice_same_axis = true;
+  }
 };
 
 TEST_F(ArithmeticOptimizerTest, NoOp) {
@@ -3364,5 +3370,210 @@ TEST_F(ArithmeticOptimizerTest, UnaryOpsComposition) {
   test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-6);
 }
 
+TEST_F(ArithmeticOptimizerTest, RemoveStackStridedSliceSameAxis) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto a_in =
+      ops::Const(s.WithOpName("a_in"), {1.0f, 2.0f, 3.0f, 4.0f}, {2, 2});
+  auto b_in =
+      ops::Const(s.WithOpName("b_in"), {-1.0f, -2.0f, -3.0f, -4.0f}, {2, 2});
+  auto c_in =
+      ops::Const(s.WithOpName("c_in"), {5.0f, 6.0f, 7.0f, 8.0f}, {2, 2});
+  auto a = ops::PlaceholderWithDefault(s.WithOpName("a"), a_in,
+                                       PartialTensorShape({-1, -1}));
+  auto b = ops::PlaceholderWithDefault(s.WithOpName("b"), b_in,
+                                       PartialTensorShape({-1, -1}));
+  auto c = ops::PlaceholderWithDefault(s.WithOpName("c"), c_in,
+                                       PartialTensorShape({-1, -1}));
+  // stacked = tf.stack((a, b, c), axis=1).
+  // stacked.shape == [2, 3, 2] (a, b, c are stacked along new axis 1)
+  auto stacked =
+      ops::Stack(s.WithOpName("stacked"), {a.output, b.output, c.output},
+                 ops::Stack::Axis(1));
+  auto expanded_a = ops::ExpandDims(s.WithOpName("expanded_a"), a, {1});
+  auto expanded_b = ops::ExpandDims(s.WithOpName("expanded_b"), b, {1});
+  auto expanded_c = ops::ExpandDims(s.WithOpName("expanded_c"), c, {1});
+  auto begin_a = ops::Const(s.WithOpName("begin_a"), {0, 0, 0}, {3});
+  auto end_a = ops::Const(s.WithOpName("end_a"), {0, 1, 0}, {3});
+  auto begin_b = ops::Const(s.WithOpName("begin_b"), {0, 1, 0}, {3});
+  auto end_b = ops::Const(s.WithOpName("end_b"), {0, 2, 0}, {3});
+  auto begin_c = ops::Const(s.WithOpName("begin_c"), {0, 2, 0}, {3});
+  auto end_c = ops::Const(s.WithOpName("end_c"), {0, 3, 0}, {3});
+  auto end_c_1to = ops::Const(s.WithOpName("begin_c_2to"), {0, 0, 0}, {3});
+  auto strides = ops::Const(s.WithOpName("strides"), {1, 1, 1}, {3});
+
+  // stacked[:, 0]
+  using SS = ops::StridedSlice;
+  auto pa_slice = ops::Identity(
+      s.WithOpName("pa_slice_out"),
+      SS(s.WithOpName("pa_slice"), stacked, begin_a, end_a, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0b0010)));  // 2
+
+  // stacked[:, 1]
+  auto pb_slice = ops::Identity(
+      s.WithOpName("pb_slice_out"),
+      SS(s.WithOpName("pb_slice"), stacked, begin_b, end_b, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0b0010)));  // 2
+
+  // stacked[:, 2]
+  auto pc_slice = ops::Identity(
+      s.WithOpName("pc_slice_out"),
+      SS(s.WithOpName("pc_slice"), stacked, begin_c, end_c, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0b0010)));  // 2
+
+  // stacked[:, 0:1, :]
+  auto pa_slice_01 = ops::Identity(
+      s.WithOpName("pa_slice_01_out"),
+      SS(s.WithOpName("pa_slice_01"), stacked, begin_a, end_a, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0)));
+
+  // stacked[:, :1, :]
+  auto pa_slice_to1 = ops::Identity(
+      s.WithOpName("pa_slice_to1_out"),
+      SS(s.WithOpName("pa_slice_to1"), stacked, begin_a, end_a, strides,
+         SS::BeginMask(0b0111)  // 7
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0)));
+
+  // stacked[:, 1:2, :]
+  auto pb_slice_12 = ops::Identity(
+      s.WithOpName("pb_slice_12_out"),
+      SS(s.WithOpName("pb_slice_12"), stacked, begin_b, end_b, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0101)  // 5
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0)));
+
+  // stacked[:, 2:, :].
+  auto pc_slice_2to = ops::Identity(
+      s.WithOpName("pc_slice_2to_out"),
+      SS(s.WithOpName("pc_slice_2to"), stacked, begin_c, end_c_1to, strides,
+         SS::BeginMask(0b0101)  // 5
+             .EllipsisMask(0)
+             .EndMask(0b0111)  // 7
+             .NewAxisMask(0)
+             .ShrinkAxisMask(0)));
+
+  GrapplerItem item;
+  item.fetch = {"a",
+                "b",
+                "c",
+                "pa_slice_out",
+                "pb_slice_out",
+                "pc_slice_out",
+                "expanded_a",
+                "expanded_b",
+                "expanded_c",
+                "pa_slice_01_out",
+                "pa_slice_to1_out",
+                "pb_slice_12_out",
+                "pc_slice_2to_out"};
+  enum FetchItem {
+    fA,
+    fB,
+    fC,
+    fASliceOut,
+    fBSliceOut,
+    fCSliceOut,
+    fExpandedA,
+    fExpandedB,
+    fExpandedC,
+    fASlice01Out,
+    fASliceTo1Out,
+    fBSlice12Out,
+    fCSlice2ToOut,
+  };
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+
+  // stacked[:, 0, :] == a.
+  test::ExpectTensorEqual<float>(tensors_expected[fA],
+                                 tensors_expected[fASliceOut]);
+  // stacked[:, 1, :] == b.
+  test::ExpectTensorEqual<float>(tensors_expected[fB],
+                                 tensors_expected[fBSliceOut]);
+  // stacked[:, 2, :] == c.
+  test::ExpectTensorEqual<float>(tensors_expected[fC],
+                                 tensors_expected[fCSliceOut]);
+
+  // stacked[:, 0:1, :] == expand_dims(a, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedA],
+                                 tensors_expected[fASlice01Out]);
+
+  // stacked[:, :1, :] == expand_dims(a, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedA],
+                                 tensors_expected[fASliceTo1Out]);
+
+  // stacked[:, 1:2, :] == expand_dims(b, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedB],
+                                 tensors_expected[fBSlice12Out]);
+  // stacked[:, 2:, :] == expand_dims(c, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedC],
+                                 tensors_expected[fCSlice2ToOut]);
+
+  GraphDef output;
+  ArithmeticOptimizer optimizer;
+  EnableOnlyRemoveStackStridedSliceSameAxis(&optimizer);
+  OptimizeAndPrune(&optimizer, &item, &output);
+
+  for (const auto& node : output.node()) {
+    if (node.name() == "pa_slice_out") {
+      EXPECT_EQ(node.input(0), "a");
+    } else if (node.name() == "pb_slice_out") {
+      EXPECT_EQ(node.input(0), "b");
+    } else if (node.name() == "pc_slice_out") {
+      EXPECT_EQ(node.input(0), "c");
+    } else if (str_util::EndsWith(node.name(), "_out")) {
+      EXPECT_EQ(strings::StrCat(node.input(0), "_out"),
+                strings::StrCat(
+                    "ArithmeticOptimizer/RemoveStackStridedSliceSameAxis_",
+                    node.name()));
+    }
+  }
+
+  auto tensors = EvaluateNodes(output, item.fetch);
+
+  // stacked[:, 0, :] == a.
+  test::ExpectTensorEqual<float>(tensors_expected[fA], tensors[fASliceOut]);
+
+  // stacked[:, 1, :] == b.
+  test::ExpectTensorEqual<float>(tensors_expected[fB], tensors[fBSliceOut]);
+  // stacked[:, 2, :] == c.
+  test::ExpectTensorEqual<float>(tensors_expected[fC], tensors[fCSliceOut]);
+
+  // stacked[:, 0:1, :] == expand_dims(a, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedA],
+                                 tensors[fASlice01Out]);
+
+  // stacked[:, :1, :] == expand_dims(a, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedA],
+                                 tensors[fASliceTo1Out]);
+
+  // stacked[:, 1:2, :] == expand_dims(b, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedB],
+                                 tensors[fBSlice12Out]);
+  // stacked[:, 2:, :] == expand_dims(c, 1).
+  test::ExpectTensorEqual<float>(tensors_expected[fExpandedC],
+                                 tensors[fCSlice2ToOut]);
+}
+
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
index 2afb5df431..f31a30ec0e 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
@@ -46,17 +47,20 @@ struct GraphOptimizerContext {
   GraphOptimizerContext(const std::unordered_set<string>* nodes_to_preserve,
                         GraphDef* optimized_graph,
                         GraphProperties* graph_properties, NodeMap* node_map,
+                        gtl::FlatSet<string>* feed_nodes,
                         RewriterConfig::Toggle opt_level)
       : nodes_to_preserve(nodes_to_preserve),
         optimized_graph(optimized_graph),
         graph_properties(graph_properties),
         node_map(node_map),
+        feed_nodes(feed_nodes),
         opt_level(opt_level) {}
 
   const std::unordered_set<string>* nodes_to_preserve;
   GraphDef* optimized_graph;
   GraphProperties* graph_properties;
   NodeMap* node_map;
+  gtl::FlatSet<string>* feed_nodes;
   RewriterConfig::Toggle opt_level;
 };
 
diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc
index 34f28c7c27..799c40c67b 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc
@@ -61,6 +61,7 @@ TEST_F(GraphOptimizerStageTest, OptimizedNodeName) {
                             /*optimized_graph*/ nullptr,
                             /*graph_properties*/ nullptr,
                             /*node_name*/ nullptr,
+                            /*feed_nodes*/ nullptr,
                             /*opt_level*/ RewriterConfig::ON);
   FakeOptimizerStage stage("my_opt", "my_stg", ctx);
 
@@ -97,6 +98,7 @@ TEST_F(GraphOptimizerStageTest, GetInputNodeAndProperties) {
                             /*optimized_graph*/ &item.graph,
                             /*graph_properties*/ &properties,
                             /*node_name*/ &node_map,
+                            /*feed_nodes*/ nullptr,
                             /*opt_level*/ RewriterConfig::ON);
   FakeOptimizerStage stage("my_opt", "my_stg", ctx);
 
@@ -137,6 +139,7 @@ TEST_F(GraphOptimizerStageTest, AddNodes) {
                             /*optimized_graph*/ &item.graph,
                             /*graph_properties*/ &properties,
                             /*node_name*/ &node_map,
+                            /*feed_nodes*/ nullptr,
                             /*opt_level*/ RewriterConfig::ON);
   FakeOptimizerStage stage("my_opt", "my_stg", ctx);
 
-- 
GitLab


From 828e374bfbe788a1c5ddbdbbd36149785ad6d0e6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 08:43:35 -0700
Subject: [PATCH 0689/1085] remove debug statements

PiperOrigin-RevId: 216536298
---
 tensorflow/contrib/lite/toco/import_tensorflow.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index ff67b306e0..41d02df5f0 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -1130,7 +1130,6 @@ tensorflow::Status ConvertUnsupportedOperator(
     for (int i = 0; i < op_def->output_arg_size(); ++i) {
       string multiples = op_def->output_arg(i).number_attr();
       int num_outputs = multiples.empty() ? 1 : GetIntAttr(node, multiples);
-      LOG(INFO) << "dddddddd " << num_outputs;
       for (int j = 0; j < num_outputs; ++j) {
         if (next_output == 0) {
           op->outputs.push_back(node.name());  // Implicit :0.
@@ -1141,7 +1140,6 @@ tensorflow::Status ConvertUnsupportedOperator(
       }
     }
   } else {
-    LOG(INFO) << "nodef!!!!!!!!!!! ";
     op->outputs.push_back(node.name());  // Implicit :0.
   }
 
-- 
GitLab


From ac53355550c994cf22485d977125b23153162593 Mon Sep 17 00:00:00 2001
From: Jason Furmanek <furmanek@us.ibm.com>
Date: Wed, 10 Oct 2018 16:34:16 +0000
Subject: [PATCH 0690/1085] Make nccl2 bazel configuration platform independent

---
 third_party/nccl/build_defs.bzl.tpl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
index ede1d3dad5..886f6ddcd4 100644
--- a/third_party/nccl/build_defs.bzl.tpl
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -152,6 +152,14 @@ def device_link(name, srcs):
         suffix = ".pic.a",
     )
 
+    cpu_arch = "X86_64"
+    native.genrule(
+        name = "get_cpu_gen",
+        outs = [cpu_arch],
+        cmd = "uname -m",
+    )
+    cpu_arch = cpu_arch.upper()
+
     # Device-link to cubins for each architecture.
     images = []
     cubins = []
@@ -159,7 +167,7 @@ def device_link(name, srcs):
         cubin = "%s_%s.cubin" % (name, arch)
         register_hdr = "%s_%s.h" % (name, arch)
         nvlink = "@local_config_nccl//:nvlink"
-        cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink +
+        cmd = ("$(location %s) --cpu-arch=%s " % (nvlink, cpu_arch) +
             "--arch=%s $(SRCS) " % arch +
             "--register-link-binaries=$(location %s) " % register_hdr +
             "--output-file=$(location %s)" % cubin)
@@ -207,4 +215,5 @@ def device_link(name, srcs):
             "@local_config_cuda//cuda:cuda_headers",
             "@local_config_cuda//cuda:cudart_static",
         ],
+        defines = ["__NV_EXTRA_INITIALIZATION=", "__NV_EXTRA_FINALIZATION="]
     )
-- 
GitLab


From f02251190f5908d2078e9fc11b92375dfc3a3054 Mon Sep 17 00:00:00 2001
From: Benjamin Barenblat <bbaren@google.com>
Date: Wed, 10 Oct 2018 09:47:43 -0700
Subject: [PATCH 0691/1085] Correct a couple of format strings

Change a couple of fscanf-style format strings to use the format macro
constants defined in cinttypes. This quashes -Wformat.

PiperOrigin-RevId: 216545604
---
 tensorflow/core/lib/strings/numbers.cc     | 3 ++-
 tensorflow/core/util/command_line_flags.cc | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index 87aa5915ff..fff6f1fedc 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <stdio.h>
 #include <stdlib.h>
 #include <algorithm>
+#include <cinttypes>
 #include <cmath>
 #include <locale>
 #include <unordered_map>
@@ -391,7 +392,7 @@ string FpToString(Fprint fp) {
 bool StringToFp(const string& s, Fprint* fp) {
   char junk;
   uint64_t result;
-  if (sscanf(s.c_str(), "%lx%c", &result, &junk) == 1) {
+  if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) {
     *fp = result;
     return true;
   } else {
diff --git a/tensorflow/core/util/command_line_flags.cc b/tensorflow/core/util/command_line_flags.cc
index 55f1e30880..f1196fdfec 100644
--- a/tensorflow/core/util/command_line_flags.cc
+++ b/tensorflow/core/util/command_line_flags.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cinttypes>
 #include <string>
 #include <vector>
 
@@ -70,7 +71,7 @@ bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
       str_util::ConsumePrefix(&arg, "=")) {
     char extra;
     int64_t parsed_int64;
-    if (sscanf(arg.data(), "%ld%c", &parsed_int64, &extra) != 1) {
+    if (sscanf(arg.data(), "%" SCNd64 "%c", &parsed_int64, &extra) != 1) {
       LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
                  << ".";
       *value_parsing_ok = false;
-- 
GitLab


From 24593b1c6bd7b05115cbc4e61b08cf6953541a5c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 09:54:44 -0700
Subject: [PATCH 0692/1085] Adds `get_config` and `from_config` to Optimizers
 V2.

PiperOrigin-RevId: 216546565
---
 .../optimizer_v2/checkpointable_utils_test.py | 10 +++---
 .../python/keras/optimizer_v2/adadelta.py     | 10 ++++++
 .../keras/optimizer_v2/adadelta_test.py       | 17 +++++++++
 .../python/keras/optimizer_v2/adagrad.py      |  8 +++++
 .../python/keras/optimizer_v2/adagrad_test.py | 13 +++++++
 tensorflow/python/keras/optimizer_v2/adam.py  | 10 ++++++
 .../python/keras/optimizer_v2/adam_test.py    | 11 ++++++
 .../optimizer_v2/checkpointable_utils_test.py | 10 +++---
 .../python/keras/optimizer_v2/optimizer_v2.py | 36 +++++++++++++++++++
 .../python/keras/optimizer_v2/rmsprop.py      | 11 ++++++
 .../python/keras/optimizer_v2/rmsprop_test.py | 22 ++++++++++++
 tensorflow/python/keras/optimizer_v2/sgd.py   | 14 ++++++++
 .../python/keras/optimizer_v2/sgd_test.py     | 14 ++++++++
 13 files changed, 178 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
index 3c68ef995a..3e225ff0dd 100644
--- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
@@ -143,10 +143,12 @@ class CheckpointingTests(test.TestCase):
     suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
     expected_checkpoint_names = [
         name + suffix for name in expected_checkpoint_names]
-    # The Dense layers also save get_config() JSON
-    expected_checkpoint_names.extend(
-        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
-         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    # The optimizer and Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend([
+        "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+        "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+        "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"
+    ])
     named_variables = {v.name: v for v in named_variables}
     six.assertCountEqual(self, expected_checkpoint_names,
                          named_variables.keys())
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
index d3b3c9c12e..2ae930fe35 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -37,6 +37,7 @@ class Adadelta(optimizer_v2.OptimizerV2):
   Tensor or a Python value.
 
   Arguments:
+
       learning_rate: float hyperparameter >= 0. Learning rate. It is recommended
         to leave it at the default value.
       rho: float hyperparameter >= 0. The decay rate.
@@ -114,3 +115,12 @@ class Adadelta(optimizer_v2.OptimizerV2):
         grad,
         indices,
         use_locking=self._use_locking)
+
+  def get_config(self):
+    config = super(Adadelta, self).get_config()
+    config.update({
+        "learning_rate": self._serialize_hyperparameter("learning_rate"),
+        "rho": self._serialize_hyperparameter("rho"),
+        "epsilon": self._serialize_hyperparameter("epsilon")
+    })
+    return config
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
index 6e48f92e4f..6c8fa874e1 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.keras.optimizer_v2 import adadelta
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
@@ -161,6 +162,22 @@ class AdadeltaOptimizerTest(test.TestCase):
         self.assertAllCloseAccordingToType(
             [[-111, -138]], var0.eval())
 
+  def testConfig(self):
+
+    def rho():
+      return ops.convert_to_tensor(1.0)
+
+    epsilon = ops.convert_to_tensor(1.0)
+
+    opt = adadelta.Adadelta(learning_rate=1.0, rho=rho, epsilon=epsilon)
+    config = opt.get_config()
+    opt2 = adadelta.Adadelta.from_config(config)
+    self.assertEqual(opt._hyper["learning_rate"][1],
+                     opt2._hyper["learning_rate"][1])
+    self.assertEqual(opt._hyper["rho"][1].__name__,
+                     opt2._hyper["rho"][1].__name__)
+    self.assertEqual(opt._hyper["epsilon"][1], opt2._hyper["epsilon"][1])
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
index 2d8cec2300..fe3e8799ef 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -117,3 +117,11 @@ class Adagrad(optimizer_v2.OptimizerV2):
         grad,
         indices,
         use_locking=self._use_locking)
+
+  def get_config(self):
+    config = super(Adagrad, self).get_config()
+    config.update({
+        "learning_rate": self._serialize_hyperparameter("learning_rate"),
+        "initial_accumulator_value": self._initial_accumulator_value
+    })
+    return config
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
index fc4ef5c399..81a0993897 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import types as python_types
+
 import numpy as np
 
 from tensorflow.python.framework import constant_op
@@ -271,6 +273,17 @@ class AdagradOptimizerTest(test.TestCase):
       # Creating optimizer should cause no exception.
       adagrad.Adagrad(3.0, initial_accumulator_value=0.1)
 
+  def testConfig(self):
+    opt = adagrad.Adagrad(
+        learning_rate=lambda: ops.convert_to_tensor(1.0),
+        initial_accumulator_value=2.0)
+    config = opt.get_config()
+    opt2 = adagrad.Adagrad.from_config(config)
+    self.assertIsInstance(opt2._hyper["learning_rate"][1],
+                          python_types.LambdaType)
+    self.assertEqual(opt._initial_accumulator_value,
+                     opt2._initial_accumulator_value)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
index 8367228d7a..9f1b3330e5 100644
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -201,3 +201,13 @@ class Adam(optimizer_v2.OptimizerV2):
     update_beta_2 = beta_2_power.assign(
         beta_2_power * state.get_hyper("beta_2"), use_locking=self._use_locking)
     return control_flow_ops.group(update_beta_1, update_beta_2)
+
+  def get_config(self):
+    config = super(Adam, self).get_config()
+    config.update({
+        "learning_rate": self._serialize_hyperparameter("learning_rate"),
+        "beta_1": self._serialize_hyperparameter("beta_1"),
+        "beta_2": self._serialize_hyperparameter("beta_2"),
+        "epsilon": self._serialize_hyperparameter("epsilon")
+    })
+    return config
diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py
index 77796317a1..ff328cf925 100644
--- a/tensorflow/python/keras/optimizer_v2/adam_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@@ -329,5 +329,16 @@ class AdamOptimizerTest(test.TestCase):
       # for v1 and v2 respectively.
       self.assertEqual(6, len(set(opt.variables())))
 
+  def testConfig(self):
+    opt = adam.Adam(learning_rate=1.0, beta_1=2.0, beta_2=3.0, epsilon=4.0)
+    config = opt.get_config()
+    opt2 = adam.Adam.from_config(config)
+    self.assertEqual(opt._hyper["learning_rate"][1],
+                     opt2._hyper["learning_rate"][1])
+    self.assertEqual(opt._hyper["beta_1"][1], opt2._hyper["beta_1"][1])
+    self.assertEqual(opt._hyper["beta_2"][1], opt2._hyper["beta_2"][1])
+    self.assertEqual(opt._hyper["epsilon"][1], opt2._hyper["epsilon"][1])
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
index 338c04148b..9e66eca9b0 100644
--- a/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/python/keras/optimizer_v2/checkpointable_utils_test.py
@@ -143,10 +143,12 @@ class CheckpointingTests(test.TestCase):
     suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
     expected_checkpoint_names = [
         name + suffix for name in expected_checkpoint_names]
-    # The Dense layers also save get_config() JSON
-    expected_checkpoint_names.extend(
-        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
-         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    # The optimizer and Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend([
+        "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+        "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+        "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"
+    ])
     named_variables = {v.name: v for v in named_variables}
     six.assertCountEqual(self, expected_checkpoint_names,
                          named_variables.keys())
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index bd5557f4fd..7d05be694e 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -1319,6 +1319,42 @@ class OptimizerV2(optimizer_v1.Optimizer):
         variable=variable,
         optional_op_name=self._name)
 
+  def get_config(self):
+    """Returns the config of the optimimizer.
+
+    An optimizer config is a Python dictionary (serializable)
+    containing the configuration of an optimizer.
+    The same optimizer can be reinstantiated later
+    (without any saved state) from this configuration.
+
+    Returns:
+        Python dictionary.
+    """
+    return {"name": self._name}
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    """Creates an optimizer from its config.
+
+    This method is the reverse of `get_config`,
+    capable of instantiating the same optimizer from the config
+    dictionary.
+
+    Arguments:
+        config: A Python dictionary, typically the output of get_config.
+        custom_objects: A Python dictionary mapping names to additional Python
+          objects used to create this optimizer, such as a function used for a
+          hyperparameter.
+
+    Returns:
+        An optimizer instance.
+    """
+    return cls(**config)
+
+  def _serialize_hyperparameter(self, hyperparameter_name):
+    """Serialize a hyperparameter that can be a float, callable, or Tensor."""
+    return self._hyper[hyperparameter_name][1]
+
   # --------------
   # Unsupported parent methods
   # --------------
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
index 2748d8eff7..9b9d0c12d5 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -237,3 +237,14 @@ class RMSProp(optimizer_v2.OptimizerV2):
           grad,
           indices,
           use_locking=self._use_locking)
+
+  def get_config(self):
+    config = super(RMSProp, self).get_config()
+    config.update({
+        "learning_rate": self._serialize_hyperparameter("learning_rate"),
+        "rho": self._serialize_hyperparameter("rho"),
+        "momentum": self._serialize_hyperparameter("momentum"),
+        "epsilon": self._serialize_hyperparameter("epsilon"),
+        "centered": self._centered
+    })
+    return config
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
index 2c5eccdc5b..851c6b283e 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import copy
 import math
+import types as python_types
 
 from absl.testing import parameterized
 import numpy as np
@@ -439,6 +440,27 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
                (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5)))
           ]), var1.eval())
 
+  def testConfig(self):
+
+    def momentum():
+      return ops.convert_to_tensor(3.0)
+
+    opt = rmsprop.RMSProp(
+        learning_rate=1.0,
+        rho=2.0,
+        momentum=momentum,
+        epsilon=lambda: ops.convert_to_tensor(4.0),
+        centered=True)
+    config = opt.get_config()
+    opt2 = rmsprop.RMSProp.from_config(config)
+    self.assertEqual(opt._hyper["learning_rate"][1],
+                     opt2._hyper["learning_rate"][1])
+    self.assertEqual(opt._hyper["rho"][1], opt2._hyper["rho"][1])
+    self.assertEqual(opt._hyper["momentum"][1].__name__,
+                     opt2._hyper["momentum"][1].__name__)
+    self.assertIsInstance(opt2._hyper["epsilon"][1], python_types.LambdaType)
+    self.assertEqual(True, opt2._centered)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/sgd.py b/tensorflow/python/keras/optimizer_v2/sgd.py
index f5583691f7..4bd0671735 100644
--- a/tensorflow/python/keras/optimizer_v2/sgd.py
+++ b/tensorflow/python/keras/optimizer_v2/sgd.py
@@ -168,3 +168,17 @@ class SGD(optimizer_v2.OptimizerV2):
           grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype),
           grad.indices, grad.dense_shape)
       return var.scatter_sub(delta, use_locking=self._use_locking)
+
+  def get_config(self):
+    config = super(SGD, self).get_config()
+    # Control whether momentum variables are created.
+    if not self._use_momentum:
+      momentum = None
+    else:
+      momentum = self._serializer_hyperparameter("momentum")
+    config.update({
+        "learning_rate": self._serialize_hyperparameter("learning_rate"),
+        "momentum": momentum,
+        "nesterov": self._use_nesterov
+    })
+    return config
diff --git a/tensorflow/python/keras/optimizer_v2/sgd_test.py b/tensorflow/python/keras/optimizer_v2/sgd_test.py
index eb39aac283..b1dc4fab61 100644
--- a/tensorflow/python/keras/optimizer_v2/sgd_test.py
+++ b/tensorflow/python/keras/optimizer_v2/sgd_test.py
@@ -754,6 +754,20 @@ class MomentumOptimizerTest(test.TestCase):
                     (0.9 * 0.01 + 0.01) * 2.0)
             ]), var1.eval())
 
+  def testConfig(self):
+    opt = sgd.SGD(learning_rate=1.0, momentum=2.0, nesterov=True)
+    config = opt.get_config()
+    opt2 = sgd.SGD.from_config(config)
+    self.assertEqual(opt._hyper["learning_rate"][1],
+                     opt2._hyper["learning_rate"][1])
+    self.assertEqual(opt._hyper["momentum"][1], opt2._hyper["momentum"][1])
+    self.assertEqual(opt2._use_nesterov, True)
+
+    opt = sgd.SGD(momentum=None)
+    config = opt.get_config()
+    opt2 = sgd.SGD.from_config(config)
+    self.assertEqual(False, opt2._use_momentum)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 9fe6fe02a1f9ae89bdf395a4a092b62602eec36e Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Wed, 10 Oct 2018 09:58:31 -0700
Subject: [PATCH 0693/1085] Add comment about adding
 --define=with_xla_support=true build option for mnist_softmax_xla example.

PiperOrigin-RevId: 216547149
---
 tensorflow/examples/tutorials/mnist/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD
index 99da44d6d5..a3dd927509 100644
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
@@ -83,6 +83,8 @@ py_binary(
     ],
 )
 
+# Note: we need to add --define=with_xla_support=true to the build command in
+# order to run the mnist_softmax_xla example with xla.
 py_binary(
     name = "mnist_softmax_xla",
     srcs = [
-- 
GitLab


From e09ddb4290f74f053c6420d7bc140486b237404b Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 10:05:49 -0700
Subject: [PATCH 0694/1085] Port the conditional control flow conversion to the
 new operators.

PiperOrigin-RevId: 216548561
---
 .../autograph/converters/control_flow.py      | 31 ++++++----
 .../autograph/operators/control_flow_test.py  | 34 +++++++----
 tensorflow/python/autograph/utils/BUILD       | 11 ----
 tensorflow/python/autograph/utils/__init__.py |  1 -
 .../autograph/utils/multiple_dispatch.py      | 56 -------------------
 .../autograph/utils/multiple_dispatch_test.py | 46 ---------------
 6 files changed, 45 insertions(+), 134 deletions(-)
 delete mode 100644 tensorflow/python/autograph/utils/multiple_dispatch.py
 delete mode 100644 tensorflow/python/autograph/utils/multiple_dispatch_test.py

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index 70879f6c97..e52e98f42a 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -49,12 +49,23 @@ class ControlFlowTransformer(converter.Base):
 
   def _create_cond_branch(self, body_name, aliased_orig_names,
                           aliased_new_names, body, returns):
+    if len(returns) == 1:
+      template = """
+        return retval
+      """
+      return_stmt = templates.replace(template, retval=returns[0])
+    else:
+      template = """
+        return (retvals,)
+      """
+      return_stmt = templates.replace(template, retvals=returns)
+
     if aliased_orig_names:
       template = """
         def body_name():
           aliased_new_names, = aliased_orig_names,
           body
-          return (returns,)
+          return_stmt
       """
       return templates.replace(
           template,
@@ -62,20 +73,20 @@ class ControlFlowTransformer(converter.Base):
           body=body,
           aliased_orig_names=aliased_orig_names,
           aliased_new_names=aliased_new_names,
-          returns=returns)
+          return_stmt=return_stmt)
     else:
       template = """
         def body_name():
           body
-          return (returns,)
+          return_stmt
       """
       return templates.replace(
-          template, body_name=body_name, body=body, returns=returns)
+          template, body_name=body_name, body=body, return_stmt=return_stmt)
 
   def _create_cond_expr(self, results, test, body_name, orelse_name):
     if results is not None:
       template = """
-        results = ag__.utils.run_cond(test, body_name, orelse_name)
+        results = ag__.if_stmt(test, body_name, orelse_name)
       """
       return templates.replace(
           template,
@@ -85,7 +96,7 @@ class ControlFlowTransformer(converter.Base):
           orelse_name=orelse_name)
     else:
       template = """
-        ag__.utils.run_cond(test, body_name, orelse_name)
+        ag__.if_stmt(test, body_name, orelse_name)
       """
       return templates.replace(
           template, test=test, body_name=body_name, orelse_name=orelse_name)
@@ -111,7 +122,7 @@ class ControlFlowTransformer(converter.Base):
       elif s.is_composite():
         # Special treatment for compound objects: if any of their owner entities
         # are live, then they are outputs as well.
-        if any(owner in live_out for owner in s.owner_set):
+        if live_out & s.owner_set:
           returned_from_cond.add(s)
 
     need_alias_in_body = body_scope.modified & defined_in
@@ -152,7 +163,6 @@ class ControlFlowTransformer(converter.Base):
     returned_from_cond = tuple(returned_from_cond)
     if returned_from_cond:
       if len(returned_from_cond) == 1:
-        # TODO(mdan): Move this quirk into the operator implementation.
         cond_results = returned_from_cond[0]
       else:
         cond_results = gast.Tuple([s.ast() for s in returned_from_cond], None)
@@ -171,8 +181,9 @@ class ControlFlowTransformer(converter.Base):
       # actually has some return value as well.
       cond_results = None
       # TODO(mdan): This doesn't belong here; it's specific to the operator.
-      returned_from_body = templates.replace_as_expression('tf.constant(1)')
-      returned_from_orelse = templates.replace_as_expression('tf.constant(1)')
+      returned_from_body = (templates.replace_as_expression('tf.constant(1)'),)
+      returned_from_orelse = (
+          templates.replace_as_expression('tf.constant(1)'),)
 
     body_name = self.ctx.namer.new_symbol('if_true', body_scope.referenced)
     orelse_name = self.ctx.namer.new_symbol('if_false', orelse_scope.referenced)
diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py
index bb214b6f16..2dea18dc5f 100644
--- a/tensorflow/python/autograph/operators/control_flow_test.py
+++ b/tensorflow/python/autograph/operators/control_flow_test.py
@@ -80,20 +80,34 @@ class WhileLoopTest(test.TestCase):
 
 class IfStmtTest(test.TestCase):
 
-  def test_tensor(self):
-    def test_if_stmt(cond):
-      return control_flow.if_stmt(
-          cond=cond,
-          body=lambda: 1,
-          orelse=lambda: -1)
+  def single_return_if_stmt(self, cond):
+    return control_flow.if_stmt(cond=cond, body=lambda: 1, orelse=lambda: -1)
+
+  def multi_return_if_stmt(self, cond):
+    return control_flow.if_stmt(
+        cond=cond, body=lambda: (1, 2), orelse=lambda: (-1, -2))
 
+  def test_tensor(self):
     with self.cached_session() as sess:
-      self.assertEqual(1, sess.run(test_if_stmt(constant_op.constant(True))))
-      self.assertEqual(-1, sess.run(test_if_stmt(constant_op.constant(False))))
+      t = self.single_return_if_stmt(constant_op.constant(True))
+      self.assertEqual(1, sess.run(t))
+      t = self.single_return_if_stmt(constant_op.constant(False))
+      self.assertEqual(-1, sess.run(t))
 
   def test_python(self):
-    self.assertEqual(1, control_flow.if_stmt(True, lambda: 1, lambda: -1))
-    self.assertEqual(-1, control_flow.if_stmt(False, lambda: 1, lambda: -1))
+    self.assertEqual(1, self.single_return_if_stmt(True))
+    self.assertEqual(-1, self.single_return_if_stmt(False))
+
+  def test_tensor_multiple_returns(self):
+    with self.cached_session() as sess:
+      t = self.multi_return_if_stmt(constant_op.constant(True))
+      self.assertAllEqual([1, 2], sess.run(t))
+      t = self.multi_return_if_stmt(constant_op.constant(False))
+      self.assertAllEqual([-1, -2], sess.run(t))
+
+  def test_python_multiple_returns(self):
+    self.assertEqual((1, 2), self.multi_return_if_stmt(True))
+    self.assertEqual((-1, -2), self.multi_return_if_stmt(False))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/utils/BUILD b/tensorflow/python/autograph/utils/BUILD
index 22451d4f3f..790c661661 100644
--- a/tensorflow/python/autograph/utils/BUILD
+++ b/tensorflow/python/autograph/utils/BUILD
@@ -22,7 +22,6 @@ py_library(
         "__init__.py",
         "context_managers.py",
         "misc.py",
-        "multiple_dispatch.py",
         "py_func.py",
         "tensor_list.py",
         "tensors.py",
@@ -61,16 +60,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "multiple_dispatch_test",
-    srcs = ["multiple_dispatch_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":utils",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
 py_test(
     name = "py_func_test",
     srcs = ["py_func_test.py"],
diff --git a/tensorflow/python/autograph/utils/__init__.py b/tensorflow/python/autograph/utils/__init__.py
index c781958481..d9031159b1 100644
--- a/tensorflow/python/autograph/utils/__init__.py
+++ b/tensorflow/python/autograph/utils/__init__.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.utils.context_managers import control_dependency_on_returns
 from tensorflow.python.autograph.utils.misc import alias_tensors
-from tensorflow.python.autograph.utils.multiple_dispatch import run_cond
 from tensorflow.python.autograph.utils.py_func import wrap_py_func
 from tensorflow.python.autograph.utils.tensor_list import dynamic_list_append
 from tensorflow.python.autograph.utils.testing import fake_tf
diff --git a/tensorflow/python/autograph/utils/multiple_dispatch.py b/tensorflow/python/autograph/utils/multiple_dispatch.py
deleted file mode 100644
index 107c8f7a68..0000000000
--- a/tensorflow/python/autograph/utils/multiple_dispatch.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for type-dependent behavior used in autograph-generated code."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.autograph.utils.type_check import is_tensor
-from tensorflow.python.ops import control_flow_ops
-
-
-def run_cond(condition, true_fn, false_fn):
-  """Type-dependent functional conditional.
-
-  Args:
-    condition: A Tensor or Python bool.
-    true_fn: A Python callable implementing the true branch of the conditional.
-    false_fn: A Python callable implementing the false branch of the
-      conditional.
-
-  Returns:
-    result: The result of calling the appropriate branch. If condition is a
-    Tensor, tf.cond will be used. Otherwise, a standard Python if statement will
-    be ran.
-  """
-  if is_tensor(condition):
-    return control_flow_ops.cond(condition, true_fn, false_fn)
-  else:
-    return py_cond(condition, true_fn, false_fn)
-
-
-def py_cond(condition, true_fn, false_fn):
-  """Functional version of Python's conditional."""
-  if condition:
-    results = true_fn()
-  else:
-    results = false_fn()
-
-  # The contract for the branch functions is to return tuples, but they should
-  # be collapsed to a single element when there is only one output.
-  if len(results) == 1:
-    return results[0]
-  return results
diff --git a/tensorflow/python/autograph/utils/multiple_dispatch_test.py b/tensorflow/python/autograph/utils/multiple_dispatch_test.py
deleted file mode 100644
index 2a77c895ce..0000000000
--- a/tensorflow/python/autograph/utils/multiple_dispatch_test.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for multiple_dispatch."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.autograph.utils import multiple_dispatch
-from tensorflow.python.client.session import Session
-from tensorflow.python.framework.constant_op import constant
-from tensorflow.python.platform import test
-
-
-class MultipleDispatchTest(test.TestCase):
-
-  def test_run_cond_python(self):
-    true_fn = lambda: (2,)
-    false_fn = lambda: (3,)
-    self.assertEqual(multiple_dispatch.run_cond(True, true_fn, false_fn), 2)
-    self.assertEqual(multiple_dispatch.run_cond(False, true_fn, false_fn), 3)
-
-  def test_run_cond_tf(self):
-    true_fn = lambda: (constant(2),)
-    false_fn = lambda: (constant(3),)
-    with Session() as sess:
-      out = multiple_dispatch.run_cond(constant(True), true_fn, false_fn)
-      self.assertEqual(sess.run(out), 2)
-      out = multiple_dispatch.run_cond(constant(False), true_fn, false_fn)
-      self.assertEqual(sess.run(out), 3)
-
-
-if __name__ == '__main__':
-  test.main()
-- 
GitLab


From c602fc061ae817ba09cd9aed35f955f45955206f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 10:16:38 -0700
Subject: [PATCH 0695/1085] Improve shape inference for tf.eye.

PiperOrigin-RevId: 216550243
---
 tensorflow/python/kernel_tests/BUILD          |   1 +
 .../python/kernel_tests/linalg_ops_test.py    | 173 ++++++++++++------
 tensorflow/python/ops/linalg_ops_impl.py      |  27 ++-
 3 files changed, 137 insertions(+), 64 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 4e8639dfc8..cc6fbf26c2 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1785,6 +1785,7 @@ cuda_py_test(
     size = "medium",
     srcs = ["linalg_ops_test.py"],
     additional_deps = [
+        "@absl_py//absl/testing:parameterized",
         "//third_party/py/numpy",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index aa17f727d0..ccb3feeaf6 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import itertools
+
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import dtypes
@@ -52,7 +55,7 @@ class CholeskySolveTest(test.TestCase):
   def test_works_with_five_different_random_pos_def_matrices(self):
     for n in range(1, 6):
       for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
-        with self.test_session(use_gpu=True):
+        with self.session(use_gpu=True):
           # Create 2 x n x n matrix
           array = np.array(
               [_RandomPDMatrix(n, self.rng),
@@ -76,7 +79,7 @@ class LogdetTest(test.TestCase):
                              (np.complex64, 0.05), (np.complex128, 1e-5)]:
         matrix = _RandomPDMatrix(n, self.rng, np_dtype)
         _, logdet_np = np.linalg.slogdet(matrix)
-        with self.test_session(use_gpu=True):
+        with self.session(use_gpu=True):
           # Create 2 x n x n matrix
           # matrix = np.array(
           #     [_RandomPDMatrix(n, self.rng, np_dtype),
@@ -89,7 +92,7 @@ class LogdetTest(test.TestCase):
                            (np.complex64, 0.05), (np.complex128, 1e-5)]:
       matrix = (np.eye(20) * 1e-6).astype(np_dtype)
       _, logdet_np = np.linalg.slogdet(matrix)
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         logdet_tf = linalg.logdet(matrix)
         self.assertAllClose(logdet_np, logdet_tf.eval(), atol=atol)
 
@@ -105,7 +108,7 @@ class SlogdetTest(test.TestCase):
                              (np.complex64, 0.05), (np.complex128, 1e-5)]:
         matrix = _RandomPDMatrix(n, self.rng, np_dtype)
         sign_np, log_abs_det_np = np.linalg.slogdet(matrix)
-        with self.test_session(use_gpu=True):
+        with self.session(use_gpu=True):
           sign_tf, log_abs_det_tf = linalg.slogdet(matrix)
           self.assertAllClose(log_abs_det_np, log_abs_det_tf.eval(), atol=atol)
           self.assertAllClose(sign_np, sign_tf.eval(), atol=atol)
@@ -115,7 +118,7 @@ class SlogdetTest(test.TestCase):
                            (np.complex64, 0.05), (np.complex128, 1e-5)]:
       matrix = (np.eye(20) * 1e-6).astype(np_dtype)
       sign_np, log_abs_det_np = np.linalg.slogdet(matrix)
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         sign_tf, log_abs_det_tf = linalg.slogdet(matrix)
         self.assertAllClose(log_abs_det_np, log_abs_det_tf.eval(), atol=atol)
         self.assertAllClose(sign_np, sign_tf.eval(), atol=atol)
@@ -128,66 +131,126 @@ class AdjointTest(test.TestCase):
       matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j,
                                                        6 + 6j]]).astype(dtype)
       expected_transposed = np.conj(matrix_np.T)
-      with self.cached_session():
+      with self.session():
         matrix = ops.convert_to_tensor(matrix_np)
         transposed = linalg.adjoint(matrix)
         self.assertEqual((3, 2), transposed.get_shape())
         self.assertAllEqual(expected_transposed, transposed.eval())
 
 
-class EyeTest(test.TestCase):
-  pass  # Will be filled in below
-
-
-def _GetEyeTest(num_rows, num_columns, batch_shape, dtype):
-
-  def Test(self):
+class EyeTest(parameterized.TestCase, test.TestCase):
+
+  def testShapeInferenceNoBatch(self):
+    self.assertEqual((2, 2), linalg_ops.eye(num_rows=2).shape)
+    self.assertEqual((2, 3), linalg_ops.eye(num_rows=2, num_columns=3).shape)
+
+  def testShapeInferenceStaticBatch(self):
+    batch_shape = (2, 3)
+    self.assertEqual(
+        (2, 3, 2, 2),
+        linalg_ops.eye(num_rows=2, batch_shape=batch_shape).shape)
+    self.assertEqual(
+        (2, 3, 2, 3),
+        linalg_ops.eye(
+            num_rows=2, num_columns=3, batch_shape=batch_shape).shape)
+
+  @parameterized.named_parameters(
+      ("DynamicRow", array_ops.placeholder_with_default(2, shape=None), None),
+      ("DynamicRowStaticColumn",
+       array_ops.placeholder_with_default(2, shape=None),
+       3),
+      ("StaticRowDynamicColumn",
+       2,
+       array_ops.placeholder_with_default(3, shape=None)),
+      ("DynamicRowDynamicColumn",
+       array_ops.placeholder_with_default(2, shape=None),
+       array_ops.placeholder_with_default(3, shape=None)))
+  def testShapeInferenceStaticBatchWith(self, num_rows, num_columns):
+    batch_shape = (2, 3)
+    identity_matrix = linalg_ops.eye(
+        num_rows=num_rows,
+        num_columns=num_columns,
+        batch_shape=batch_shape)
+    self.assertEqual(4, identity_matrix.shape.ndims)
+    self.assertEqual((2, 3), identity_matrix.shape[:2])
+    if num_rows is not None and not isinstance(num_rows, ops.Tensor):
+      self.assertEqual(2, identity_matrix.shape[-2])
+
+    if num_columns is not None and not isinstance(num_columns, ops.Tensor):
+      self.assertEqual(3, identity_matrix.shape[-1])
+
+  @parameterized.parameters(
+      itertools.product(
+          # num_rows
+          [0, 1, 2, 5],
+          # num_columns
+          [None, 0, 1, 2, 5],
+          # batch_shape
+          [None, [], [2], [2, 3]],
+          # dtype
+          [
+              dtypes.int32,
+              dtypes.int64,
+              dtypes.float32,
+              dtypes.float64,
+              dtypes.complex64,
+              dtypes.complex128
+          ])
+      )
+  def test_eye_no_placeholder(self, num_rows, num_columns, batch_shape, dtype):
     eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype)
     if batch_shape is not None:
       eye_np = np.tile(eye_np, batch_shape + [1, 1])
-    for use_placeholder in False, True:
-      if use_placeholder and (num_columns is None or batch_shape is None):
-        return
-      with self.test_session(use_gpu=True) as sess:
-        if use_placeholder:
-          num_rows_placeholder = array_ops.placeholder(
-              dtypes.int32, name="num_rows")
-          num_columns_placeholder = array_ops.placeholder(
-              dtypes.int32, name="num_columns")
-          batch_shape_placeholder = array_ops.placeholder(
-              dtypes.int32, name="batch_shape")
-          eye = linalg_ops.eye(
-              num_rows_placeholder,
-              num_columns=num_columns_placeholder,
-              batch_shape=batch_shape_placeholder,
-              dtype=dtype)
-          eye_tf = sess.run(
-              eye,
-              feed_dict={
-                  num_rows_placeholder: num_rows,
-                  num_columns_placeholder: num_columns,
-                  batch_shape_placeholder: batch_shape
-              })
-        else:
-          eye_tf = linalg_ops.eye(
-              num_rows,
-              num_columns=num_columns,
-              batch_shape=batch_shape,
-              dtype=dtype).eval()
-        self.assertAllEqual(eye_np, eye_tf)
-
-  return Test
+    eye_tf = self.evaluate(linalg_ops.eye(
+        num_rows,
+        num_columns=num_columns,
+        batch_shape=batch_shape,
+        dtype=dtype))
+    self.assertAllEqual(eye_np, eye_tf)
+
+  @parameterized.parameters(
+      itertools.product(
+          # num_rows
+          [0, 1, 2, 5],
+          # num_columns
+          [0, 1, 2, 5],
+          # batch_shape
+          [[], [2], [2, 3]],
+          # dtype
+          [
+              dtypes.int32,
+              dtypes.int64,
+              dtypes.float32,
+              dtypes.float64,
+              dtypes.complex64,
+              dtypes.complex128
+          ])
+      )
+  def test_eye_with_placeholder(
+      self, num_rows, num_columns, batch_shape, dtype):
+    eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype)
+    eye_np = np.tile(eye_np, batch_shape + [1, 1])
+    num_rows_placeholder = array_ops.placeholder(
+        dtypes.int32, name="num_rows")
+    num_columns_placeholder = array_ops.placeholder(
+        dtypes.int32, name="num_columns")
+    batch_shape_placeholder = array_ops.placeholder(
+        dtypes.int32, name="batch_shape")
+    eye = linalg_ops.eye(
+        num_rows_placeholder,
+        num_columns=num_columns_placeholder,
+        batch_shape=batch_shape_placeholder,
+        dtype=dtype)
+    with self.session(use_gpu=True) as sess:
+      eye_tf = sess.run(
+          eye,
+          feed_dict={
+              num_rows_placeholder: num_rows,
+              num_columns_placeholder: num_columns,
+              batch_shape_placeholder: batch_shape
+          })
+    self.assertAllEqual(eye_np, eye_tf)
 
 
 if __name__ == "__main__":
-  for _num_rows in 0, 1, 2, 5:
-    for _num_columns in None, 0, 1, 2, 5:
-      for _batch_shape in None, [], [2], [2, 3]:
-        for _dtype in (dtypes.int32, dtypes.int64, dtypes.float32,
-                       dtypes.float64, dtypes.complex64, dtypes.complex128):
-          name = "dtype_%s_num_rows_%s_num_column_%s_batch_shape_%s_" % (
-              _dtype.name, _num_rows, _num_columns, _batch_shape)
-          _AddTest(EyeTest, "EyeTest", name,
-                   _GetEyeTest(_num_rows, _num_columns, _batch_shape, _dtype))
-
   test.main()
diff --git a/tensorflow/python/ops/linalg_ops_impl.py b/tensorflow/python/ops/linalg_ops_impl.py
index e7c89f6ae3..37c724e032 100644
--- a/tensorflow/python/ops/linalg_ops_impl.py
+++ b/tensorflow/python/ops/linalg_ops_impl.py
@@ -44,22 +44,31 @@ def eye(num_rows,
     is_square = num_columns is None
     batch_shape = [] if batch_shape is None else batch_shape
     num_columns = num_rows if num_columns is None else num_columns
-    if isinstance(num_rows, ops.Tensor) or isinstance(
-        num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor):
-      batch_shape = ops.convert_to_tensor(
-          batch_shape, name='shape', dtype=dtypes.int32)
+
+    # We cannot statically infer what the diagonal size should be:
+    if (isinstance(num_rows, ops.Tensor) or
+        isinstance(num_columns, ops.Tensor)):
       diag_size = math_ops.minimum(num_rows, num_columns)
-      diag_shape = array_ops.concat((batch_shape, [diag_size]), 0)
-      if not is_square:
-        shape = array_ops.concat((batch_shape, [num_rows, num_columns]), 0)
     else:
+      # We can statically infer the diagonal size, and whether it is square.
       if not isinstance(num_rows, compat.integral_types) or not isinstance(
           num_columns, compat.integral_types):
         raise TypeError(
             'num_rows and num_columns must be positive integer values.')
-      batch_shape = [dim for dim in batch_shape]
       is_square = num_rows == num_columns
-      diag_shape = batch_shape + [np.minimum(num_rows, num_columns)]
+      diag_size = np.minimum(num_rows, num_columns)
+
+    # We can not statically infer the shape of the tensor.
+    if isinstance(batch_shape, ops.Tensor) or isinstance(diag_size, ops.Tensor):
+      batch_shape = ops.convert_to_tensor(
+          batch_shape, name='shape', dtype=dtypes.int32)
+      diag_shape = array_ops.concat((batch_shape, [diag_size]), axis=0)
+      if not is_square:
+        shape = array_ops.concat((batch_shape, [num_rows, num_columns]), axis=0)
+    # We can statically infer everything.
+    else:
+      batch_shape = list(batch_shape)
+      diag_shape = batch_shape + [diag_size]
       if not is_square:
         shape = batch_shape + [num_rows, num_columns]
 
-- 
GitLab


From 60a0bfeb389e490e80d2effd1e518c7953783ac7 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 10:20:21 -0700
Subject: [PATCH 0696/1085] Remove the tensorflow import from generated code.

PiperOrigin-RevId: 216550899
---
 tensorflow/python/autograph/core/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/autograph/core/config.py b/tensorflow/python/autograph/core/config.py
index 4fa8489af5..574f819504 100644
--- a/tensorflow/python/autograph/core/config.py
+++ b/tensorflow/python/autograph/core/config.py
@@ -45,5 +45,4 @@ NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',))
 # TODO(mdan); Consolidate all internal imports into a single __ag module.
 COMPILED_IMPORT_STATEMENTS = (
     'from __future__ import print_function',
-    'import tensorflow as tf',
 )
-- 
GitLab


From 4f0caab261e26178c8b53080055fdddd046c0d6a Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 10 Oct 2018 10:26:57 -0700
Subject: [PATCH 0697/1085] Switch to code style, keep link to tensorflow.org

---
 README.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 34406f4ed7..44511c9510 100644
--- a/README.md
+++ b/README.md
@@ -31,15 +31,17 @@ subscribing to
 ## Installation
 for install current release for CPU-only:
 
-*pip install tensorflow*
+```
+pip install tensorflow
+```
 
 GPU package for CUDA-enabled GPU cards:
 
-*pip install tensorflow-gpu*
-
-
-
+```
+pip install tensorflow-gpu
+```
 
+*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.*
 
 People who are a little more adventurous can also try our nightly binaries:
 
-- 
GitLab


From 83976f270ceb1cbe17a1fbf3a8e945748537886e Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 10 Oct 2018 10:28:54 -0700
Subject: [PATCH 0698/1085] Wording.

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 44511c9510..99321b294e 100644
--- a/README.md
+++ b/README.md
@@ -29,19 +29,20 @@ subscribing to
 [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
 
 ## Installation
-for install current release for CPU-only:
+
+To install the current release for CPU-only:
 
 ```
 pip install tensorflow
 ```
 
-GPU package for CUDA-enabled GPU cards:
+Use the GPU package for CUDA-enabled GPU cards:
 
 ```
 pip install tensorflow-gpu
 ```
 
-*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.*
+*See [Installing TensorFlow](https://www.tensorflow.org/install) for detailed instructions, and how to build from source.*
 
 People who are a little more adventurous can also try our nightly binaries:
 
-- 
GitLab


From 1e69efe803df50dc00174da37cda9b8147d886f3 Mon Sep 17 00:00:00 2001
From: Tamara Norman <tamaranorman@google.com>
Date: Wed, 10 Oct 2018 10:21:56 -0700
Subject: [PATCH 0699/1085] Remove unused build dependency in gradients_impl on
 backprop in order to allow reverse dependency.

PiperOrigin-RevId: 216551226
---
 tensorflow/python/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 822d596995..18ade384f5 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2178,7 +2178,6 @@ py_library(
         ":util",
         ":variable_scope",
         "//tensorflow/core:protos_all_py",
-        "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:tape",
         "//third_party/py/numpy",
-- 
GitLab


From dbac4acb330663c4a3b8a167b9f83c5b9acc95fe Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Wed, 10 Oct 2018 10:31:56 -0700
Subject: [PATCH 0700/1085] [XLA] Make sure that the multi-output path of BF16
 normalization updates roots.

PiperOrigin-RevId: 216553003
---
 .../xla/service/bfloat16_normalization.cc     |  4 ++++
 .../service/bfloat16_normalization_test.cc    | 24 +++++++++++++++++++
 .../compiler/xla/service/hlo_verifier.cc      |  6 ++---
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
index d5b1148058..1251f0258f 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
@@ -231,6 +231,10 @@ Status BFloat16NormalizationVisitor::HandleMultipleOutputs(
   for (auto* user : materialized_users) {
     TF_RETURN_IF_ERROR(hlo->ReplaceUseWith(user, tuple));
   }
+  bool is_root = computation_->root_instruction() == hlo;
+  if (is_root) {
+    computation_->set_root_instruction(tuple);
+  }
   *tuple->mutable_shape() = original_shape;
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index 2411fdcb20..cb075a5e38 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -298,6 +298,30 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) {
   EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32);
 }
 
+TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSortRoot) {
+  auto module = CreateNewModule();
+  auto builder = HloComputation::Builder(TestName());
+  Shape f32_shape = ShapeUtil::MakeShape(F32, {1024});
+  Shape bf16_shape = ShapeUtil::MakeShape(BF16, {1024});
+
+  HloInstruction* key = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, f32_shape, "key"));
+  HloInstruction* value = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, bf16_shape, "value"));
+
+  HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort(
+      ShapeUtil::MakeTupleShape({bf16_shape, bf16_shape}), 0, key, {value}));
+
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  EXPECT_TRUE(Normalize(module));
+
+  EXPECT_EQ(sort->operand(0)->shape().element_type(), F32);
+  EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32);
+  EXPECT_NE(computation->root_instruction(), sort);
+  EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kTuple);
+}
+
 // Tests that the normalization should not cause unsupported mixed precision due
 // to resolving unsupported BF16 operand.
 TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) {
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index a1f668921d..912d2dbe75 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -878,10 +878,8 @@ Status CheckEntryComputationLayout(const HloModule& module) {
   const HloComputation* computation = module.entry_computation();
   const auto& layout = module.entry_computation_layout();
 
-  // TODO(117498192): Change into a call to Compatible(...).
-  if (!ShapeUtil::CompatibleIgnoringFpPrecision(
-          computation->root_instruction()->shape(),
-          layout.result_layout().shape())) {
+  if (!ShapeUtil::Compatible(computation->root_instruction()->shape(),
+                             layout.result_layout().shape())) {
     return InternalError(
         "Shape of the root instruction of entry computation (%s) should be "
         "compatible to one specified in module's entry computation layout (%s)",
-- 
GitLab


From 0445f420981524a52b87ce7de74d7c0c39177cd6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 11:12:34 -0700
Subject: [PATCH 0701/1085] Call InitMain in tflite_diff_example_test.

PiperOrigin-RevId: 216560608
---
 tensorflow/contrib/lite/testing/BUILD                       | 2 ++
 tensorflow/contrib/lite/testing/tflite_diff_example_test.cc | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 3dc666f631..b476445b3a 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -350,6 +350,7 @@ tf_cc_test(
     deps = [
         ":tflite_diff_flags",
         ":tflite_diff_util",
+        "//tensorflow/core:lib",
     ],
 )
 
@@ -359,6 +360,7 @@ cc_binary(
     deps = [
         ":tflite_diff_flags",
         ":tflite_diff_util",
+        "//tensorflow/core:lib",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
index f2c49fe389..e85d9c525a 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
+++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
@@ -15,12 +15,15 @@ limitations under the License.
 
 #include "tensorflow/contrib/lite/testing/tflite_diff_flags.h"
 #include "tensorflow/contrib/lite/testing/tflite_diff_util.h"
+#include "tensorflow/core/platform/init_main.h"
 
 int main(int argc, char** argv) {
   ::tflite::testing::DiffOptions options =
       ::tflite::testing::ParseTfliteDiffFlags(&argc, argv);
   if (options.tensorflow_model.empty()) return 1;
 
+  ::tensorflow::port::InitMain("usage", &argc, &argv);
+
   int failure_count = 0;
   for (int i = 0; i < options.num_runs_per_pass; i++) {
     if (!tflite::testing::RunDiffTest(options, /*num_invocations=*/1)) {
-- 
GitLab


From 4a433a5b273a32fc7f87a32a7245dc1a708dfc33 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 10 Oct 2018 11:13:31 -0700
Subject: [PATCH 0702/1085] Automated rollback of commit
 6c40bc717442d56f0b6a60658b05f0549afd69ee.

PiperOrigin-RevId: 216560788
---
 .../contrib/losses/python/losses/loss_ops.py  | 14 ++++--
 .../contrib/metrics/python/ops/metric_ops.py  | 48 ++++++++++++-------
 tensorflow/contrib/rate/rate.py               | 11 ++---
 tensorflow/python/keras/metrics.py            | 28 +++++++----
 tensorflow/python/kernel_tests/losses_test.py | 15 ------
 tensorflow/python/ops/losses/losses_impl.py   | 22 ++++++---
 tensorflow/python/ops/metrics_impl.py         | 42 ++++++++++------
 7 files changed, 104 insertions(+), 76 deletions(-)

diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 651de4e2f4..619294b518 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.framework.python.ops import add_arg_scope
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -82,6 +83,8 @@ def _safe_div(numerator, denominator, name="value"):
   Returns:
     The element-wise value of the numerator divided by the denominator.
   """
+  if compat.forward_compatible(2018, 11, 1):
+    return math_ops.div_no_nan(numerator, denominator, name=name)
   return array_ops.where(
       math_ops.greater(denominator, 0),
       math_ops.div(numerator,
@@ -104,7 +107,7 @@ def _safe_mean(losses, num_present):
       then zero is returned.
   """
   total_loss = math_ops.reduce_sum(losses)
-  return _safe_div(total_loss, num_present)
+  return _safe_div(total_loss, num_present, name="value")
 
 
 @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.")
@@ -609,11 +612,14 @@ def mean_pairwise_squared_error(predictions,
         math_ops.square(diffs), reduction_indices=reduction_indices)
     num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-    term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch)
+    term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
+                            num_present_per_batch,
+                            name="value")
 
     sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices)
-    term2 = 2.0 * _safe_div(
-        math_ops.square(sum_diff), math_ops.square(num_present_per_batch))
+    term2 = 2.0 * _safe_div(math_ops.square(sum_diff),
+                            math_ops.square(num_present_per_batch),
+                            name="value")
 
     loss = _scale_losses(term1 - term2, weights)
 
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index bbf5d3f30c..d6932f6e4b 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 
 import collections as collections_lib
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -45,22 +46,30 @@ from tensorflow.python.util.deprecation import deprecated
 _EPSILON = 1e-7
 
 
-def _safe_div(numerator, denominator, name):
-  """Divides two values, returning 0 if the denominator is <= 0.
+def _safe_div(numerator, denominator):
+  """Computes a safe divide which returns 0 if the denominator is zero.
+
+  Note that the function contains an additional conditional check that is
+  necessary for avoiding situations where the loss is zero causing NaNs to
+  creep into the gradient computation.
 
   Args:
-    numerator: A real `Tensor`.
-    denominator: A real `Tensor`, with dtype matching `numerator`.
-    name: Name for the returned op.
+    numerator: An arbitrary `Tensor`.
+    denominator: A `Tensor` whose shape matches `numerator` and whose values are
+      assumed to be non-negative.
 
   Returns:
-    0 if `denominator` <= 0, else `numerator` / `denominator`
+    The element-wise value of the numerator divided by the denominator.
   """
+  if compat.forward_compatible(2018, 11, 1):
+    return math_ops.div_no_nan(numerator, denominator)
   return array_ops.where(
       math_ops.greater(denominator, 0),
-      math_ops.truediv(numerator, denominator),
-      0,
-      name=name)
+      math_ops.div(numerator,
+                   array_ops.where(
+                       math_ops.equal(denominator, 0),
+                       array_ops.ones_like(denominator), denominator)),
+      array_ops.zeros_like(numerator))
 
 
 @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the '
@@ -3239,11 +3248,11 @@ def streaming_covariance(predictions,
     # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
     # batch_mean_prediction is E[x_B] in the update equation
     batch_mean_prediction = _safe_div(
-        math_ops.reduce_sum(weighted_predictions), batch_count,
-        'batch_mean_prediction')
+        math_ops.reduce_sum(weighted_predictions),
+        batch_count)
     delta_mean_prediction = _safe_div(
-        (batch_mean_prediction - mean_prediction) * batch_count, update_count,
-        'delta_mean_prediction')
+        (batch_mean_prediction - mean_prediction) * batch_count,
+        update_count)
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
     # prev_mean_prediction is E[x_A] in the update equation
@@ -3251,9 +3260,11 @@ def streaming_covariance(predictions,
 
     # batch_mean_label is E[y_B] in the update equation
     batch_mean_label = _safe_div(
-        math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label')
-    delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count,
-                                 update_count, 'delta_mean_label')
+        math_ops.reduce_sum(weighted_labels),
+        batch_count)
+    delta_mean_label = _safe_div(
+        (batch_mean_label - mean_label) * batch_count,
+        update_count)
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
     prev_mean_label = update_mean_label - delta_mean_label
@@ -3915,8 +3926,9 @@ def cohen_kappa(labels,
       po_sum = math_ops.reduce_sum(po)
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
-          metrics_impl._safe_div(  # pylint: disable=protected-access
-              pe_row * pe_col, total, None))
+          _safe_div(
+              math_ops.to_double(pe_row * pe_col),
+              math_ops.to_double(total)))
       po_sum, pe_sum, total = (math_ops.to_double(po_sum),
                                math_ops.to_double(pe_sum),
                                math_ops.to_double(total))
diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py
index 24d586479a..d948066b36 100644
--- a/tensorflow/contrib/rate/rate.py
+++ b/tensorflow/contrib/rate/rate.py
@@ -108,13 +108,6 @@ class Rate(object):
   def variables(self):
     return self._vars
 
-  def _safe_div(self, numerator, denominator, name):
-    t = math_ops.truediv(numerator, denominator)
-    zero = array_ops.zeros_like(t, dtype=denominator.dtype)
-    condition = math_ops.greater(denominator, zero)
-    zero = math_ops.cast(zero, t.dtype)
-    return array_ops.where(condition, t, zero, name=name)
-
   def _add_variable(self, name, shape=None, dtype=None):
     """Private method for adding variables to the graph."""
     if self._built:
@@ -148,4 +141,6 @@ class Rate(object):
     state_ops.assign(self.prev_values, values)
     state_ops.assign(self.prev_denominator, denominator)
 
-    return self._safe_div(self.numer, self.denom, name="safe_rate")
+    return math_ops.div_no_nan(self.numer,
+                               math_ops.maximum(self.denom, 0),
+                               name="safe_rate")
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index d217244e2f..920eaf5596 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -28,6 +28,7 @@ import types
 import weakref
 import six
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
 from tensorflow.python.framework import dtypes
@@ -172,20 +173,29 @@ def weakmethod(method):
 
 
 def safe_div(numerator, denominator):
-  """Divides two tensors element-wise, returning 0 if the denominator is <= 0.
+  """Computes a safe divide which returns 0 if the denominator is zero.
+
+  Note that the function contains an additional conditional check that is
+  necessary for avoiding situations where the loss is zero causing NaNs to
+  creep into the gradient computation.
 
   Args:
-    numerator: A `Tensor`.
-    denominator: A `Tensor`, with dtype matching `numerator`.
+    numerator: An arbitrary `Tensor`.
+    denominator: A `Tensor` whose shape matches `numerator` and whose values are
+      assumed to be non-negative.
 
   Returns:
-    0 if `denominator` <= 0, else `numerator` / `denominator`
+    The element-wise value of the numerator divided by the denominator.
   """
-  t = math_ops.truediv(numerator, denominator)
-  zero = array_ops.zeros_like(t, dtype=denominator.dtype)
-  condition = math_ops.greater(denominator, zero)
-  zero = math_ops.cast(zero, t.dtype)
-  return array_ops.where(condition, t, zero)
+  if compat.forward_compatible(2018, 11, 1):
+    return math_ops.div_no_nan(numerator, denominator)
+  return array_ops.where(
+      math_ops.greater(denominator, 0),
+      math_ops.div(numerator,
+                   array_ops.where(
+                       math_ops.equal(denominator, 0),
+                       array_ops.ones_like(denominator), denominator)),
+      array_ops.zeros_like(numerator))
 
 
 def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 3ce0b74263..fb0b5f1137 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -34,25 +33,11 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import losses
-from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.ops.losses import util
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum as momentum_lib
 
 
-safe_div = losses_impl._safe_div  # pylint: disable=protected-access
-
-
-class SafeDivTest(test.TestCase):
-
-  def testEager(self):
-    with context.eager_mode():
-      self.assertAllEqual(safe_div(constant_op.constant(1.0),
-                                   constant_op.constant(0.0)), 0.0)
-      self.assertAllEqual(safe_div(constant_op.constant(1.0),
-                                   0.0), 0.0)
-
-
 class AbsoluteDifferenceLossTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 806539747e..8a8a81ab5c 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -83,18 +84,21 @@ def _safe_div(numerator, denominator, name="value"):
 
   Args:
     numerator: An arbitrary `Tensor`.
-    denominator: `Tensor` whose shape matches `numerator` and whose values are
+    denominator: A `Tensor` whose shape matches `numerator` and whose values are
       assumed to be non-negative.
     name: An optional name for the returned op.
 
   Returns:
     The element-wise value of the numerator divided by the denominator.
   """
+  if compat.forward_compatible(2018, 11, 1):
+    return math_ops.div_no_nan(numerator, denominator, name=name)
   return array_ops.where(
       math_ops.greater(denominator, 0),
-      math_ops.div(numerator, array_ops.where(
-          math_ops.equal(denominator, 0),
-          array_ops.ones_like(denominator), denominator)),
+      math_ops.div(numerator,
+                   array_ops.where(
+                       math_ops.equal(denominator, 0),
+                       array_ops.ones_like(denominator), denominator)),
       array_ops.zeros_like(numerator),
       name=name)
 
@@ -599,14 +603,18 @@ def mean_pairwise_squared_error(
           keepdims=True)
       num_present_per_batch = _num_present(diffs, weights, per_batch=True)
 
-      term1 = 2.0 * _safe_div(sum_squares_diff_per_batch,
-                              num_present_per_batch - 1)
+      term1 = 2.0 * _safe_div(
+          sum_squares_diff_per_batch,
+          math_ops.maximum(num_present_per_batch - 1, 0))
 
       sum_diff = math_ops.reduce_sum(
           diffs, reduction_indices=reduction_indices, keepdims=True)
       term2 = 2.0 * _safe_div(
           math_ops.square(sum_diff),
-          math_ops.multiply(num_present_per_batch, num_present_per_batch - 1))
+          math_ops.maximum(
+              math_ops.multiply(num_present_per_batch,
+                                num_present_per_batch - 1),
+              0))
 
       weighted_losses = math_ops.multiply(term1 - term2, weights)
       loss = math_ops.reduce_sum(weighted_losses)
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index 763877c2d2..b8d96b4a6e 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -224,6 +225,8 @@ def _safe_div(numerator, denominator, name):
   Returns:
     0 if `denominator` <= 0, else `numerator` / `denominator`
   """
+  if compat.forward_compatible(2018, 11, 1):
+    return math_ops.div_no_nan(numerator, denominator)
   t = math_ops.truediv(numerator, denominator)
   zero = array_ops.zeros_like(t, dtype=denominator.dtype)
   condition = math_ops.greater(denominator, zero)
@@ -244,12 +247,7 @@ def _safe_scalar_div(numerator, denominator, name):
   """
   numerator.get_shape().with_rank_at_most(1)
   denominator.get_shape().with_rank_at_most(1)
-  return control_flow_ops.cond(
-      math_ops.equal(
-          array_ops.constant(0.0, dtype=dtypes.float64), denominator),
-      lambda: array_ops.constant(0.0, dtype=dtypes.float64),
-      lambda: math_ops.div(numerator, denominator),
-      name=name)
+  return _safe_div(numerator, denominator, name=name)
 
 
 def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None):
@@ -402,11 +400,14 @@ def mean(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
+    def compute_mean(_, t, c):
+      return _safe_div(t, math_ops.maximum(c, 0), name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
-    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
+    update_op = _safe_div(update_total_op,
+                          math_ops.maximum(update_count_op, 0),
+                          name='update_op')
 
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
@@ -778,16 +779,21 @@ def auc(labels,
       """
       dtp = tp[:num_thresholds - 1] - tp[1:]
       p = tp + fp
-      prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope')
+      prec_slope = _safe_div(
+          dtp,
+          math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0),
+          name='prec_slope')
       intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:])
       safe_p_ratio = array_ops.where(
           math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0),
-          _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'),
+          _safe_div(p[:num_thresholds - 1],
+                    math_ops.maximum(p[1:], 0),
+                    name='recall_relative_ratio'),
           array_ops.ones_like(p[1:]))
       return math_ops.reduce_sum(
           _safe_div(
               prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)),
-              tp[1:] + fn[1:],
+              math_ops.maximum(tp[1:] + fn[1:], 0),
               name='pr_auc_increment'),
           name='interpolate_pr_auc')
 
@@ -1068,7 +1074,8 @@ def mean_per_class_accuracy(labels,
     update_count_op = state_ops.scatter_add(count, labels, is_correct)
 
     def compute_mean_accuracy(_, count, total):
-      per_class_accuracy = _safe_div(count, total, None)
+      per_class_accuracy = _safe_div(
+          count, math_ops.maximum(total, 0), name=None)
       mean_accuracy_v = math_ops.reduce_mean(
           per_class_accuracy, name='mean_accuracy')
       return mean_accuracy_v
@@ -1076,7 +1083,9 @@ def mean_per_class_accuracy(labels,
     mean_accuracy_v = _aggregate_across_towers(
         metrics_collections, compute_mean_accuracy, count, total)
 
-    update_op = _safe_div(update_count_op, update_total_op, name='update_op')
+    update_op = _safe_div(update_count_op,
+                          math_ops.maximum(update_total_op, 0),
+                          name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
@@ -1385,12 +1394,15 @@ def mean_tensor(values,
     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count, num_values)
 
-    compute_mean = lambda _, t, c: _safe_div(t, c, 'value')
+    compute_mean = lambda _, t, c: _safe_div(
+        t, math_ops.maximum(c, 0), name='value')
 
     mean_t = _aggregate_across_towers(
         metrics_collections, compute_mean, total, count)
 
-    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
+    update_op = _safe_div(update_total_op,
+                          math_ops.maximum(update_count_op, 0),
+                          name='update_op')
     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)
 
-- 
GitLab


From 217ad9a568d85b36834090d8a7a17fffeaa0ec89 Mon Sep 17 00:00:00 2001
From: Raghuraman Krishnamoorthi <raghuramank@google.com>
Date: Wed, 10 Oct 2018 11:15:27 -0700
Subject: [PATCH 0703/1085]  Support for shared weights in quantization
 rewriter.

PiperOrigin-RevId: 216561137
---
 .../quantize/python/fold_batch_norms.py       |  58 ++-
 .../quantize/python/fold_batch_norms_test.py  | 400 ++++++++++++------
 .../contrib/quantize/python/quantize.py       | 157 +++++--
 .../quantize/python/quantize_graph_test.py    | 109 +++++
 4 files changed, 538 insertions(+), 186 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py
index 7575b1b6cd..e0c6da00d8 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py
@@ -145,7 +145,7 @@ def _FindFusedBatchNorms(graph):
   Args:
     graph: Graph to inspect.
 
-  Yields:
+  Returns:
     _FusedBatchNormMatches.
   """
   input_pattern = graph_matcher.OpTypePattern('*')
@@ -169,8 +169,15 @@ def _FindFusedBatchNorms(graph):
           graph_matcher.OpTypePattern('*'),
           graph_matcher.OpTypePattern('*')
       ])
+  # Identity between conv/matmul and bn
+  layer_pattern_with_identity = graph_matcher.OpTypePattern(
+      'Identity',
+      inputs=[
+          graph_matcher.OneofPattern([batch_to_space_pattern, layer_pattern])
+      ])
   layer_output_pattern = graph_matcher.OneofPattern(
-      [layer_pattern, batch_to_space_pattern])
+      [layer_pattern_with_identity, layer_pattern, batch_to_space_pattern])
+
   # MatMul has a Reshape between it and FusedBatchNorm.
   matmul_reshape_pattern = graph_matcher.OpTypePattern(
       'Reshape',
@@ -188,6 +195,11 @@ def _FindFusedBatchNorms(graph):
       'Reshape', inputs=[batch_norm_pattern,
                          graph_matcher.OpTypePattern('*')])
 
+  batch_norm_identity_pattern = graph_matcher.OpTypePattern(
+      'Identity', inputs=[batch_norm_pattern, matmul_bn_output_reshape_pattern])
+
+  bn_identity_matcher = graph_matcher.GraphMatcher(batch_norm_identity_pattern)
+
   bn_matcher = graph_matcher.GraphMatcher(
       graph_matcher.OneofPattern(
           [matmul_bn_output_reshape_pattern, batch_norm_pattern]))
@@ -200,7 +212,17 @@ def _FindFusedBatchNorms(graph):
   moving_avg_mul_matcher = graph_matcher.GraphMatcher(
       moving_average_mul_pattern)
 
-  for match_result in bn_matcher.match_graph(graph):
+  def _GetLayerMatch(match_result):
+    """Populates a layer match object containing ops/tensors for folding BNs.
+
+    Args:
+      match_result: Matched result from graph matcher
+
+    Returns:
+      layer_op: Matching conv/fc op prior to batch norm
+      BatchNormMatch: _BatchNormMatch containing all required batch norm
+      parameters.
+    """
     moving_mean_tensor = None
     moving_variance_tensor = None
     bn_decay_mean_tensor = None
@@ -208,7 +230,11 @@ def _FindFusedBatchNorms(graph):
     batch_to_space_op = None
     layer_op = match_result.get_op(layer_pattern)
     layer_tensor = match_result.get_tensor(layer_pattern)
+    bn_id_op = match_result.get_op(batch_norm_identity_pattern)
     bn_op = match_result.get_op(batch_norm_pattern)
+    if bn_id_op is None:
+      bn_id_op = bn_op
+
     batch_epsilon = bn_op.get_attr('epsilon')
 
     # In the MatMul case, the output of batch norm is reshaped back into a
@@ -219,13 +245,13 @@ def _FindFusedBatchNorms(graph):
       # If the matcher didn't match matmul_bn_output_reshape, there will be
       # another match for this 'MatMul' later, so we can skip this one.
       if output_reshape_op is None:
-        continue
+        return None, None
       output_tensor = output_reshape_op.outputs[0]
 
     # Ensure that the output tensor has consumers, otherwise this is a dangling
     # node and not a match.
     if not output_tensor.consumers():
-      continue
+      return None, None
 
     batch_to_space_op = match_result.get_op(batch_to_space_pattern)
     input_tensor = match_result.get_tensor(input_pattern)
@@ -277,7 +303,7 @@ def _FindFusedBatchNorms(graph):
       mean_tensor = match_result.get_tensor(mean_pattern)
       variance_tensor = match_result.get_tensor(variance_pattern)
 
-    yield _BatchNormMatch(
+    return layer_op, _BatchNormMatch(
         layer_op=layer_op,
         bn_op=bn_op,
         output_tensor=output_tensor,
@@ -294,6 +320,26 @@ def _FindFusedBatchNorms(graph):
         batch_epsilon=batch_epsilon,
         batch_to_space_op=batch_to_space_op)
 
+  layer_matches = []
+  # We use matched_layer_set to ensure that layers aren't matched multiple
+  # times.
+  matched_layer_set = set()
+  for match_result in bn_identity_matcher.match_graph(graph):
+    layer_op, layer_match = _GetLayerMatch(match_result)
+    if layer_op is not None:
+      if layer_op not in matched_layer_set:
+        matched_layer_set.add(layer_op)
+        layer_matches.append(layer_match)
+
+  for match_result in bn_matcher.match_graph(graph):
+    layer_op, layer_match = _GetLayerMatch(match_result)
+    if layer_op is not None:
+      if layer_op not in matched_layer_set:
+        matched_layer_set.add(layer_op)
+        layer_matches.append(layer_match)
+
+  return layer_matches
+
 
 def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay):
   """Computes batch norm correction params.
diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
index 3f8063cc02..77b3f62e9d 100644
--- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
+++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py
@@ -48,26 +48,32 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def _RunTestOverParameters(self, test_fn):
     parameters_list = [
         # (relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm,
-        # freeze_batch_norm_delay)
-        (nn_ops.relu6, 'Relu6', False, False, False, 100),
-        (nn_ops.relu, 'Relu', False, False, False, None),
-        (nn_ops.relu6, 'Relu6', True, False, False, 100),
-        (nn_ops.relu, 'Relu', True, False, False, None),
-        (nn_ops.relu6, 'Relu6', False, True, False, 100),
-        (nn_ops.relu, 'Relu', False, True, False, None),
-        (nn_ops.relu6, 'Relu6', True, True, False, 100),
-        (nn_ops.relu, 'Relu', True, True, False, None),
+        # freeze_batch_norm_delay, insert identity node)
+        (nn_ops.relu6, 'Relu6', False, False, False, 100, False),
+        (nn_ops.relu, 'Relu', False, False, False, None, False),
+        (nn_ops.relu6, 'Relu6', True, False, False, 100, False),
+        (nn_ops.relu, 'Relu', True, False, False, None, False),
+        (nn_ops.relu6, 'Relu6', False, True, False, 100, False),
+        (nn_ops.relu, 'Relu', False, True, False, None, False),
+        (nn_ops.relu6, 'Relu6', True, True, False, 100, False),
+        (nn_ops.relu, 'Relu', True, True, False, None, False),
         # Fused batch norm always has scaling enabled.
-        (nn_ops.relu6, 'Relu6', False, True, True, None),
-        (nn_ops.relu, 'Relu', False, True, True, 100),
-        (nn_ops.relu6, 'Relu6', True, True, True, None),
-        (nn_ops.relu, 'Relu', True, True, True, 100),
+        (nn_ops.relu6, 'Relu6', False, True, True, None, False),
+        (nn_ops.relu, 'Relu', False, True, True, 100, False),
+        (nn_ops.relu6, 'Relu6', True, True, True, None, False),
+        (nn_ops.relu, 'Relu', True, True, True, 100, False),
+        (nn_ops.relu6, 'Relu6', False, True, True, None, True),
+        (nn_ops.relu, 'Relu', False, True, True, 100, True),
+        (nn_ops.relu6, 'Relu6', True, True, True, None, True),
+        (nn_ops.relu, 'Relu', True, True, True, 100, True),
     ]
     for params in parameters_list:
-      test_fn(params[0], params[1], params[2], params[3], params[4], params[5])
+      test_fn(params[0], params[1], params[2], params[3], params[4], params[5],
+              params[6])
 
   def _TestFoldConv2d(self, relu, relu_op_name, with_bypass, has_scaling,
-                      fused_batch_norm, freeze_batch_norm_delay):
+                      fused_batch_norm, freeze_batch_norm_delay,
+                      insert_identity_node):
     """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*.
 
     Args:
@@ -79,6 +85,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
       switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+      norm
     """
     g = ops.Graph()
     with g.as_default():
@@ -87,18 +95,42 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       out_depth = 3 if with_bypass else 32
       stride = 1 if with_bypass else 2
       activation_fn = None if with_bypass else relu
-      scope = 'test/test2' if with_bypass else 'test'
-      node = conv2d(
-          inputs,
-          out_depth, [5, 5],
-          stride=stride,
-          padding='SAME',
-          weights_initializer=self._WeightInit(0.09),
-          activation_fn=activation_fn,
-          normalizer_fn=batch_norm,
-          normalizer_params=self._BatchNormParams(
-              scale=has_scaling, fused=fused_batch_norm),
-          scope=scope)
+      name = 'test/test2' if with_bypass else 'test'
+      if insert_identity_node:
+        with g.name_scope(name):
+          node = conv2d(
+              inputs,
+              out_depth, [5, 5],
+              stride=stride,
+              padding='SAME',
+              weights_initializer=self._WeightInit(0.09),
+              activation_fn=None,
+              normalizer_fn=None,
+              biases_initializer=None)
+          conv_out = array_ops.identity(node, name='conv_out')
+
+          node = batch_norm(
+              conv_out,
+              center=True,
+              scale=has_scaling,
+              decay=1.0 - 0.003,
+              fused=fused_batch_norm)
+          if activation_fn is not None:
+            node = activation_fn(node)
+          conv_name = name + '/Conv'
+      else:
+        node = conv2d(
+            inputs,
+            out_depth, [5, 5],
+            stride=stride,
+            padding='SAME',
+            weights_initializer=self._WeightInit(0.09),
+            activation_fn=activation_fn,
+            normalizer_fn=batch_norm,
+            normalizer_params=self._BatchNormParams(
+                scale=has_scaling, fused=fused_batch_norm),
+            scope=name)
+        conv_name = name
       if with_bypass:
         node = math_ops.add(inputs, node, name='test/Add')
         relu(node, name='test/' + relu_op_name)
@@ -106,31 +138,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fold_batch_norms.FoldBatchNorms(
           g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)
 
-    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
+    folded_mul = g.get_operation_by_name(conv_name + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
     self._AssertInputOpsAre(folded_mul, [
-        scope + '/correction_mult',
-        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
+        conv_name + '/correction_mult',
+        self._BatchNormMultiplierName(conv_name, has_scaling, fused_batch_norm)
     ])
-    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold'])
+    self._AssertOutputGoesToOps(folded_mul, g, [conv_name + '/Conv2D_Fold'])
 
-    folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold')
+    folded_conv = g.get_operation_by_name(conv_name + '/Conv2D_Fold')
     self.assertEqual(folded_conv.type, 'Conv2D')
     self._AssertInputOpsAre(folded_conv,
-                            [scope + '/mul_fold', inputs.op.name])
-    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul'])
+                            [conv_name + '/mul_fold', inputs.op.name])
+    self._AssertOutputGoesToOps(folded_conv, g, [conv_name + '/post_conv_mul'])
 
-    folded_add = g.get_operation_by_name(scope + '/add_fold')
+    folded_add = g.get_operation_by_name(conv_name + '/add_fold')
     self.assertEqual(folded_add.type, 'Add')
     self._AssertInputOpsAre(folded_add, [
-        scope + '/correction_add',
-        self._BathNormBiasName(scope, fused_batch_norm)
+        conv_name + '/correction_add',
+        self._BathNormBiasName(conv_name, fused_batch_norm)
     ])
     output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
     self._AssertOutputGoesToOps(folded_add, g, output_op_names)
     if freeze_batch_norm_delay is not None:
-      self._AssertMovingAveragesAreFrozen(g, scope)
-
+      self._AssertMovingAveragesAreFrozen(g, name)
 
     for op in g.get_operations():
       self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
@@ -143,7 +174,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
                               relu_op_name='Relu',
                               has_scaling=True,
                               fused_batch_norm=False,
-                              freeze_batch_norm_delay=None):
+                              freeze_batch_norm_delay=None,
+                              insert_identity_node=False):
     """Tests folding cases for a network with multiple layers.
 
     Args:
@@ -153,6 +185,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
       switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+      norm
     """
     g = ops.Graph()
     with g.as_default():
@@ -225,9 +259,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
     for op in g.get_operations():
       self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
 
-  def _TestFoldConv2dUnknownShape(self, relu, relu_op_name, with_bypass,
-                                  has_scaling, fused_batch_norm,
-                                  freeze_batch_norm_delay):
+  def _TestFoldConv2dUnknownShape(self,
+                                  relu,
+                                  relu_op_name,
+                                  with_bypass,
+                                  has_scaling,
+                                  fused_batch_norm,
+                                  freeze_batch_norm_delay,
+                                  insert_identity_node=False):
     """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*.
 
     Tests that folding works even with an input shape where some dimensions are
@@ -242,6 +281,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
       switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+      norm
     """
     g = ops.Graph()
     with g.as_default():
@@ -298,9 +339,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def testFoldConv2dUnknownShape(self):
     self._RunTestOverParameters(self._TestFoldConv2dUnknownShape)
 
-  def _TestFoldFullyConnectedLayer(self, relu, relu_op_name, with_bypass,
-                                   has_scaling, fused_batch_norm,
-                                   freeze_batch_norm_delay):
+  def _TestFoldFullyConnectedLayer(
+      self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm,
+      freeze_batch_norm_delay, insert_identity_node):
     """Tests folding cases: inputs -> FC with batch norm -> Relu*.
 
     Args:
@@ -312,6 +353,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
       switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+      norm
     """
     g = ops.Graph()
     with g.as_default():
@@ -319,16 +362,40 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, depth))
       out_depth = 256 if with_bypass else 128
       activation_fn = None if with_bypass else relu
-      scope = 'test/test2' if with_bypass else 'test'
-      node = fully_connected(
-          inputs,
-          out_depth,
-          weights_initializer=self._WeightInit(0.03),
-          activation_fn=activation_fn,
-          normalizer_fn=batch_norm,
-          normalizer_params=self._BatchNormParams(
-              scale=has_scaling, fused=fused_batch_norm),
-          scope=scope)
+      name = 'test/test2' if with_bypass else 'test'
+      insert_identity_node = fused_batch_norm
+      if insert_identity_node:
+        with g.name_scope(name):
+          node = fully_connected(
+              inputs,
+              out_depth,
+              weights_initializer=self._WeightInit(0.03),
+              activation_fn=None,
+              normalizer_fn=None,
+              biases_initializer=None)
+          node = array_ops.identity(node, name='fc_out')
+
+          node = batch_norm(
+              node,
+              center=True,
+              scale=has_scaling,
+              decay=1.0 - 0.003,
+              fused=fused_batch_norm)
+          if activation_fn is not None:
+            node = activation_fn(node)
+          fc_name = name + '/fully_connected'
+      else:
+
+        node = fully_connected(
+            inputs,
+            out_depth,
+            weights_initializer=self._WeightInit(0.03),
+            activation_fn=activation_fn,
+            normalizer_fn=batch_norm,
+            normalizer_params=self._BatchNormParams(
+                scale=has_scaling, fused=fused_batch_norm),
+            scope=name)
+        fc_name = name
       if with_bypass:
         node = math_ops.add(inputs, node, name='test/Add')
         relu(node, name='test/' + relu_op_name)
@@ -336,30 +403,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fold_batch_norms.FoldBatchNorms(
           g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)
 
-    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
+    folded_mul = g.get_operation_by_name(fc_name + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
     self._AssertInputOpsAre(folded_mul, [
-        scope + '/correction_mult',
-        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm)
+        fc_name + '/correction_mult',
+        self._BatchNormMultiplierName(fc_name, has_scaling, fused_batch_norm)
     ])
-    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/MatMul_Fold'])
+    self._AssertOutputGoesToOps(folded_mul, g, [fc_name + '/MatMul_Fold'])
 
-    folded_conv = g.get_operation_by_name(scope + '/MatMul_Fold')
+    folded_conv = g.get_operation_by_name(fc_name + '/MatMul_Fold')
     self.assertEqual(folded_conv.type, 'MatMul')
     self._AssertInputOpsAre(folded_conv,
-                            [scope + '/mul_fold', inputs.op.name])
-    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul'])
+                            [fc_name + '/mul_fold', inputs.op.name])
+    self._AssertOutputGoesToOps(folded_conv, g, [fc_name + '/post_conv_mul'])
 
-    folded_add = g.get_operation_by_name(scope + '/add_fold')
+    folded_add = g.get_operation_by_name(fc_name + '/add_fold')
     self.assertEqual(folded_add.type, 'Add')
     self._AssertInputOpsAre(folded_add, [
-        scope + '/correction_add',
-        self._BathNormBiasName(scope, fused_batch_norm)
+        fc_name + '/correction_add',
+        self._BathNormBiasName(fc_name, fused_batch_norm)
     ])
     output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
     self._AssertOutputGoesToOps(folded_add, g, output_op_names)
     if freeze_batch_norm_delay is not None:
-      self._AssertMovingAveragesAreFrozen(g, scope)
+      self._AssertMovingAveragesAreFrozen(g, name)
 
     for op in g.get_operations():
       self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
@@ -369,7 +436,7 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
 
   def _TestFoldDepthwiseConv2d(self, relu, relu_op_name, with_bypass,
                                has_scaling, fused_batch_norm,
-                               freeze_batch_norm_delay):
+                               freeze_batch_norm_delay, insert_identity_node):
     """Tests folding: inputs -> DepthwiseConv2d with batch norm -> Relu*.
 
     Args:
@@ -380,7 +447,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       has_scaling: Bool, when true the batch norm has scaling.
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
-      switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+        norm switches to using frozen mean and variance
     """
     g = ops.Graph()
     with g.as_default():
@@ -388,19 +456,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, height, width, 3))
       stride = 1 if with_bypass else 2
       activation_fn = None if with_bypass else relu
-      scope = 'test/test2' if with_bypass else 'test'
-      node = separable_conv2d(
-          inputs,
-          None, [5, 5],
-          stride=stride,
-          depth_multiplier=1.0,
-          padding='SAME',
-          weights_initializer=self._WeightInit(0.09),
-          activation_fn=activation_fn,
-          normalizer_fn=batch_norm,
-          normalizer_params=self._BatchNormParams(
-              scale=has_scaling, fused=fused_batch_norm),
-          scope=scope)
+      name = 'test/test2' if with_bypass else 'test'
+      if insert_identity_node:
+        with g.name_scope(name):
+          node = separable_conv2d(
+              inputs,
+              None, [5, 5],
+              stride=stride,
+              depth_multiplier=1.0,
+              padding='SAME',
+              weights_initializer=self._WeightInit(0.09),
+              activation_fn=None,
+              normalizer_fn=None,
+              biases_initializer=None)
+          node = array_ops.identity(node, name='sep_conv_out')
+
+          node = batch_norm(
+              node,
+              center=True,
+              scale=has_scaling,
+              decay=1.0 - 0.003,
+              fused=fused_batch_norm)
+          if activation_fn is not None:
+            node = activation_fn(node)
+          sep_conv_name = name + '/SeparableConv2d'
+      else:
+        node = separable_conv2d(
+            inputs,
+            None, [5, 5],
+            stride=stride,
+            depth_multiplier=1.0,
+            padding='SAME',
+            weights_initializer=self._WeightInit(0.09),
+            activation_fn=activation_fn,
+            normalizer_fn=batch_norm,
+            normalizer_params=self._BatchNormParams(
+                scale=has_scaling, fused=fused_batch_norm),
+            scope=name)
+        sep_conv_name = name
       if with_bypass:
         node = math_ops.add(inputs, node, name='test/Add')
         relu(node, name='test/' + relu_op_name)
@@ -408,40 +501,43 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fold_batch_norms.FoldBatchNorms(
           g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)
 
-    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
+    folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
     if fused_batch_norm:
-      scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape'
+      scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape'
     else:
-      scale_reshape_op_name = scope + '/scale_reshape'
-    self._AssertInputOpsAre(folded_mul,
-                            [scope + '/correction_mult', scale_reshape_op_name])
-    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold'])
+      scale_reshape_op_name = sep_conv_name + '/scale_reshape'
+    self._AssertInputOpsAre(
+        folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name])
+    self._AssertOutputGoesToOps(folded_mul, g,
+                                [sep_conv_name + '/depthwise_Fold'])
 
     scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
     self.assertEqual(scale_reshape.type, 'Reshape')
     self._AssertInputOpsAre(scale_reshape, [
-        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm),
+        self._BatchNormMultiplierName(sep_conv_name, has_scaling,
+                                      fused_batch_norm),
         scale_reshape_op_name + '/shape'
     ])
-    self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold'])
+    self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold'])
 
-    folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold')
+    folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold')
     self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative')
     self._AssertInputOpsAre(folded_conv,
-                            [scope + '/mul_fold', inputs.op.name])
-    self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul'])
+                            [sep_conv_name + '/mul_fold', inputs.op.name])
+    self._AssertOutputGoesToOps(folded_conv, g,
+                                [sep_conv_name + '/post_conv_mul'])
 
-    folded_add = g.get_operation_by_name(scope + '/add_fold')
+    folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold')
     self.assertEqual(folded_add.type, 'Add')
     self._AssertInputOpsAre(folded_add, [
-        scope + '/correction_add',
-        self._BathNormBiasName(scope, fused_batch_norm)
+        sep_conv_name + '/correction_add',
+        self._BathNormBiasName(sep_conv_name, fused_batch_norm)
     ])
     output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
     self._AssertOutputGoesToOps(folded_add, g, output_op_names)
     if freeze_batch_norm_delay is not None:
-      self._AssertMovingAveragesAreFrozen(g, scope)
+      self._AssertMovingAveragesAreFrozen(g, name)
 
     for op in g.get_operations():
       self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
@@ -450,7 +546,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
     self._RunTestOverParameters(self._TestFoldDepthwiseConv2d)
 
   def _TestFoldAtrousConv2d(self, relu, relu_op_name, with_bypass, has_scaling,
-                            fused_batch_norm, freeze_batch_norm_delay):
+                            fused_batch_norm, freeze_batch_norm_delay,
+                            insert_identity_node):
     """Tests folding: inputs -> AtrousConv2d with batch norm -> Relu*.
 
     Args:
@@ -461,7 +558,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       has_scaling: Bool, when true the batch norm has scaling.
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
-      switches to using frozen mean and variance
+        switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+        norm
     """
     g = ops.Graph()
     with g.as_default():
@@ -469,19 +568,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       inputs = array_ops.zeros((batch_size, height, width, 3))
       dilation_rate = 2
       activation_fn = None if with_bypass else relu
-      scope = 'test/test2' if with_bypass else 'test'
-      node = separable_conv2d(
-          inputs,
-          None, [3, 3],
-          rate=dilation_rate,
-          depth_multiplier=1.0,
-          padding='SAME',
-          weights_initializer=self._WeightInit(0.09),
-          activation_fn=activation_fn,
-          normalizer_fn=batch_norm,
-          normalizer_params=self._BatchNormParams(
-              scale=has_scaling, fused=fused_batch_norm),
-          scope=scope)
+      name = 'test/test2' if with_bypass else 'test'
+      if insert_identity_node:
+        with g.name_scope(name):
+          node = separable_conv2d(
+              inputs,
+              None, [3, 3],
+              rate=dilation_rate,
+              depth_multiplier=1.0,
+              padding='SAME',
+              weights_initializer=self._WeightInit(0.09),
+              activation_fn=None,
+              normalizer_fn=None,
+              biases_initializer=None)
+          node = array_ops.identity(node, name='sep_conv_out')
+
+          node = batch_norm(
+              node,
+              center=True,
+              scale=has_scaling,
+              decay=1.0 - 0.003,
+              fused=fused_batch_norm)
+          if activation_fn is not None:
+            node = activation_fn(node)
+          sep_conv_name = name + '/SeparableConv2d'
+      else:
+        node = separable_conv2d(
+            inputs,
+            None, [3, 3],
+            rate=dilation_rate,
+            depth_multiplier=1.0,
+            padding='SAME',
+            weights_initializer=self._WeightInit(0.09),
+            activation_fn=activation_fn,
+            normalizer_fn=batch_norm,
+            normalizer_params=self._BatchNormParams(
+                scale=has_scaling, fused=fused_batch_norm),
+            scope=name)
+        sep_conv_name = name
       if with_bypass:
         node = math_ops.add(inputs, node, name='test/Add')
         relu(node, name='test/' + relu_op_name)
@@ -489,45 +613,48 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fold_batch_norms.FoldBatchNorms(
           g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay)
 
-    folded_mul = g.get_operation_by_name(scope + '/mul_fold')
+    folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold')
     self.assertEqual(folded_mul.type, 'Mul')
     if fused_batch_norm:
-      scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape'
+      scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape'
     else:
-      scale_reshape_op_name = scope + '/scale_reshape'
-    self._AssertInputOpsAre(folded_mul,
-                            [scope + '/correction_mult', scale_reshape_op_name])
-    self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold'])
+      scale_reshape_op_name = sep_conv_name + '/scale_reshape'
+    self._AssertInputOpsAre(
+        folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name])
+    self._AssertOutputGoesToOps(folded_mul, g,
+                                [sep_conv_name + '/depthwise_Fold'])
 
     scale_reshape = g.get_operation_by_name(scale_reshape_op_name)
     self.assertEqual(scale_reshape.type, 'Reshape')
     self._AssertInputOpsAre(scale_reshape, [
-        self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm),
+        self._BatchNormMultiplierName(sep_conv_name, has_scaling,
+                                      fused_batch_norm),
         scale_reshape_op_name + '/shape'
     ])
-    self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold'])
+    self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold'])
 
-    folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold')
+    folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold')
     self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative')
-    self._AssertInputOpsAre(
-        folded_conv, [scope + '/mul_fold', scope + '/depthwise/SpaceToBatchND'])
+    self._AssertInputOpsAre(folded_conv, [
+        sep_conv_name + '/mul_fold', sep_conv_name + '/depthwise/SpaceToBatchND'
+    ])
     if fused_batch_norm:
       self._AssertOutputGoesToOps(folded_conv, g,
-                                  [scope + '/BatchToSpaceND_Fold'])
+                                  [sep_conv_name + '/BatchToSpaceND_Fold'])
     else:
-      self._AssertOutputGoesToOps(folded_conv, g,
-                                  [scope + '/depthwise/BatchToSpaceND_Fold'])
+      self._AssertOutputGoesToOps(
+          folded_conv, g, [sep_conv_name + '/depthwise/BatchToSpaceND_Fold'])
 
-    folded_add = g.get_operation_by_name(scope + '/add_fold')
+    folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold')
     self.assertEqual(folded_add.type, 'Add')
     self._AssertInputOpsAre(folded_add, [
-        scope + '/correction_add',
-        self._BathNormBiasName(scope, fused_batch_norm)
+        sep_conv_name + '/correction_add',
+        self._BathNormBiasName(sep_conv_name, fused_batch_norm)
     ])
     output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name]
     self._AssertOutputGoesToOps(folded_add, g, output_op_names)
     if freeze_batch_norm_delay is not None:
-      self._AssertMovingAveragesAreFrozen(g, scope)
+      self._AssertMovingAveragesAreFrozen(g, name)
 
     for op in g.get_operations():
       self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name)
@@ -535,9 +662,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
   def testFoldAtrousConv2d(self):
     self._RunTestOverParameters(self._TestFoldAtrousConv2d)
 
-  def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass,
-                                  has_scaling, fused_batch_norm,
-                                  freeze_batch_norm_delay):
+  def _TestCompareFoldAndUnfolded(self,
+                                  relu,
+                                  relu_op_name,
+                                  with_bypass,
+                                  has_scaling,
+                                  fused_batch_norm,
+                                  freeze_batch_norm_delay,
+                                  insert_identity_node=False):
     """Tests that running folded and unfolded BN returns the same results.
 
     Args:
@@ -549,6 +681,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase):
       fused_batch_norm: Bool, when true the batch norm is fused.
       freeze_batch_norm_delay: None or the number of steps after which training
       switches to using frozen mean and variance
+      insert_identity_node: Bool, insert identity node between conv and batch
+      norm
     """
     random_seed.set_random_seed(1234)
     unfolded_g = ops.Graph()
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 5e63d33db8..fd86a96905 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -32,7 +32,9 @@ from tensorflow.python.platform import tf_logging as logging
 _QUANTIZABLE_TYPES = {'Conv2D', 'MatMul', 'DepthwiseConv2dNative'}
 
 # Activations that are supported by the quantization rewrite.
-_ACTIVATION_TYPES = {'Relu', 'Relu6'}
+_ACTIVATION_TYPES = {'Relu', 'Relu6', 'Identity'}
+
+_RELU_TYPES = {'Relu', 'Relu6'}
 
 
 def Quantize(graph,
@@ -172,7 +174,7 @@ def Quantize(graph,
       # Add at inference time.
       consumers = input_to_ops_map.ConsumerOperations(
           layer_match.post_activation_bypass_op)
-      if any([consumer.type in _ACTIVATION_TYPES for consumer in consumers]):
+      if any([consumer.type in _RELU_TYPES for consumer in consumers]):
         logging.info('Skipping %s, because its followed by an activation.',
                      layer_match.post_activation_bypass_op.name)
       else:
@@ -384,10 +386,11 @@ def _FindLayersToQuantize(graph):
       bias_add_op = match_result.get_op(folded_bias_add_pattern)
     bypass_op = match_result.get_op(bypass_pattern)
     if layer_op not in matched_layer_set:
-      matched_layer_set.add(layer_op)
-      layer_matches.append(
-          _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None,
-                      bias_add_op))
+      if not _IsSkipLayer(activation_op):
+        matched_layer_set.add(layer_op)
+        layer_matches.append(
+            _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None,
+                        bias_add_op))
 
   # Match the final layer, where there may not be an activation and instead
   # the output of the final BiasAdd must be quantized. So we treat the BiasAdd
@@ -424,6 +427,32 @@ def _FindLayersToQuantize(graph):
   return layer_matches
 
 
+def _IsSkipLayer(activation_op):
+  """Skip quantizing conv->identity->Batch norm layers.
+
+  Args:
+    activation_op: Activation op detected by layer matching pattern
+
+  Returns:
+    skip_layer: boolean, true when conv->identity->batch norm is detected.
+  """
+
+  # Exclude quantization of conv->identity->BN,
+  # After folding, this part corresponds to estimation of mean and variance
+  # and should not be quantized.
+  skip_layer = False
+  if activation_op.type == 'Identity' and len(activation_op.outputs) == 1:
+    if len(activation_op.outputs[0].consumers()) == 1:
+      consumer = activation_op.outputs[0].consumers()[0]
+      if consumer.type == 'FusedBatchNorm':
+        skip_layer = True
+        logging.info(
+            'Skipping quantizing %s, because it is the output of a conv/fc'
+            'followed by a identity, feeding a fused batch norm.',
+            activation_op.name)
+  return skip_layer
+
+
 class _LayerMatch(object):
   """Contains all information related to a matched Layer."""
 
@@ -461,8 +490,8 @@ class _LayerMatch(object):
     return self._bias_add_op
 
 
-def _FollowedByFakeQuant(tensor):
-  """Returns True if the tensor is followed by a FakeQuant."""
+def _GetFollowingFakeQuantOp(tensor):
+  """Returns the following FakeQuant op if it exists else None."""
   fake_quant_ops = set([
       'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs',
       'FakeQuantWithMinMaxVarsPerChannel'
@@ -472,11 +501,11 @@ def _FollowedByFakeQuant(tensor):
   while consumers:
     c = consumers.pop()
     if c.type in fake_quant_ops:
-      return True
+      return c
     elif c.type in pass_through_ops:
       for output in c.outputs:
         consumers.extend(output.consumers())
-  return False
+  return None
 
 
 def _InsertQuantOp(context,
@@ -559,44 +588,78 @@ def _InsertQuantOp(context,
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
   # overlap between multiple matches, so we need to ensure that we don't
   # add duplicate FakeQuant operations.
-  if _FollowedByFakeQuant(inputs):
-    return
-
-  if moving_avg:
-    quant = (
-        quant_ops.MovingAvgQuantize(
-            inputs,
-            init_min=init_min,
-            init_max=init_max,
-            ema_decay=ema_decay,
-            is_training=is_training,
-            num_bits=bits,
-            narrow_range=narrow_range,
-            vars_collection=vars_collection,
-            name_prefix=name_prefix))
+  fake_quant_op = _GetFollowingFakeQuantOp(inputs)
+
+  # If we find that we are attempting to insert a fake quant op following
+  # a fake quant, we skip inserting a fake quant op
+
+  if fake_quant_op is None:
+    if moving_avg:
+      quant = (
+          quant_ops.MovingAvgQuantize(
+              inputs,
+              init_min=init_min,
+              init_max=init_max,
+              ema_decay=ema_decay,
+              is_training=is_training,
+              num_bits=bits,
+              narrow_range=narrow_range,
+              vars_collection=vars_collection,
+              name_prefix=name_prefix))
+    else:
+      quant = (
+          quant_ops.LastValueQuantize(
+              inputs,
+              init_min=init_min,
+              init_max=init_max,
+              is_training=is_training,
+              num_bits=bits,
+              narrow_range=narrow_range,
+              vars_collection=vars_collection,
+              name_prefix=name_prefix))
+
+    if quant_delay and quant_delay > 0:
+      activate_quant = math_ops.greater_equal(
+          common.CreateOrGetQuantizationStep(),
+          quant_delay,
+          name=name_prefix + '/activate_quant')
+      quant = control_flow_ops.cond(
+          activate_quant,
+          lambda: quant,
+          lambda: inputs,
+          name=name_prefix + '/delayed_quant')
   else:
-    quant = (
-        quant_ops.LastValueQuantize(
-            inputs,
-            init_min=init_min,
-            init_max=init_max,
-            is_training=is_training,
-            num_bits=bits,
-            narrow_range=narrow_range,
-            vars_collection=vars_collection,
-            name_prefix=name_prefix))
-
-  if quant_delay and quant_delay > 0:
-    activate_quant = math_ops.greater_equal(
-        common.CreateOrGetQuantizationStep(),
-        quant_delay,
-        name=name_prefix + '/activate_quant')
-    quant = control_flow_ops.cond(
-        activate_quant,
-        lambda: quant,
-        lambda: inputs,
-        name=name_prefix + '/delayed_quant')
-
+    #  return
+    # If a fake quant op is present already, make sure that
+    # any downstream use of the tensor reroutes to the appropriate quantized
+    # tensor. If there is no quant_delay, this is simply the output of the
+    # fake quant op. If there is a quant delay, we reroute to the output
+    # of the delayed quant operation, which inserts quantization only after
+    # a specified quant_delay
+
+    quant = fake_quant_op.outputs[0]
+    if quant_delay and quant_delay > 0:
+      name_prefix = '/'.join(quant.name.split('/')[:-1])
+      quant = quant.graph.get_tensor_by_name(name_prefix +
+                                             '/delayed_quant/Merge:0')
+    pruned_consumer_set = set()
+    for consumer in consumers:
+      fake_quant_dest_op = _GetFollowingFakeQuantOp(consumer.outputs[0])
+      if (fake_quant_dest_op is None or
+          fake_quant_dest_op.name != fake_quant_op.name):
+        pruned_consumer_set.add(consumer)
+    consumers = pruned_consumer_set
+
+    # If we have
+    # input->pass_through->fake_quant
+    # there is nothing to reroute.
+    #
+    # If we have
+    #  input-> pass_through->fake_quant
+    #                |-> consumer
+    # Then we reroute such that:
+    # input-> pass_through->fake_quant
+    #                            |-> consumer
   if consumers:
     tensors_modified_count = common.RerouteTensor(
         quant, inputs, can_modify=consumers)
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index e80d2183a6..f0fd0949dd 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import template
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import googletest
 
@@ -267,6 +268,27 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
       graph_def_after = str(g.as_graph_def())
       self.assertEqual(graph_def_before, graph_def_after)
 
+  def testIdentityNode(self):
+    self._RunTestOverAllRewrites(self._TestIdentityNode)
+
+  def _TestIdentityNode(self, rewrite_fn):
+    graph = ops.Graph()
+    with graph.as_default():
+      self._LayerWithIdentity()
+
+    rewrite_fn(graph)
+    op_names = [op.name for op in graph.get_operations()]
+    self.assertTrue(any('test/Conv/weights_quant' in name for name in op_names))
+    self.assertTrue(any('test/Conv/act_quant' in name for name in op_names))
+    bn_out_identity = graph.get_operation_by_name('test/bn_out')
+    self._AssertInputOpsAre(bn_out_identity, [
+        'test/Conv/add_fold',
+    ])
+
+    conv_out_identity = graph.get_operation_by_name('test/conv_out')
+    self._AssertOutputGoesToOps(conv_out_identity, graph,
+                                ['test/BatchNorm/FusedBatchNorm'])
+
   def testRewriteWithScope(self):
     self._RunTestOverExperimentalRewritesWithScope(
         self._TestRewriteWithScope, 'scope1')
@@ -306,6 +328,42 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     # No ops should be inserted or removed.
     self.assertEqual(op_names_before_rewrite, op_names_after_rewrite)
 
+  def testWithSharedWeights(self):
+
+    self._RunTestOverAllRewrites(self._TestWithSharedWeights)
+    self._RunTestOverTrainingRewrites(self._TestRewriteWithSharedWeights)
+
+  def _TestRewriteWithSharedWeights(self, rewrite_fn, quant_delay=1):
+    self._TestWithSharedWeights(rewrite_fn, quant_delay)
+
+  def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None):
+    with ops.Graph().as_default() as g:
+      conv = template.make_template('shared_weights_conv', self._ConvLayer)
+      conv()
+      conv()
+      if quant_delay is None:
+        rewrite_fn()
+      else:
+        rewrite_fn(quant_delay=quant_delay)
+
+    conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D']
+    weights_quants = [
+        op for op in g.get_operations()
+        if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars'
+    ]
+    # Check that the shared weights variable is not quantized multiple times
+    self.assertTrue(len(weights_quants) == 1)
+    weights_quant_tensor = weights_quants[0].outputs[0]
+    if quant_delay:
+      delayed_weights_quants = [
+          op for op in g.get_operations()
+          if 'weights_quant' in op.name and op.type == 'Merge'
+      ]
+      self.assertTrue(len(delayed_weights_quants) == 1)
+      weights_quant_tensor = delayed_weights_quants[0].outputs[0]
+    # Check that the Conv2D operations get the quantized weights
+    self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops))
+
   def _ConvLayer(
       self, input_tensor=None, scope='test', pre_activation_bypass=False,
       post_activation_bypass=False):
@@ -328,6 +386,57 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
         output += input_tensor
     return output
 
+  def _LayerWithIdentity(self,
+                         input_tensor=None,
+                         scope='test',
+                         post_activation_bypass=False):
+    """Add a basic conv, identity, batch norm with skip to the default graph."""
+    batch_size, height, width, depth = 5, 128, 128, 3
+    if input_tensor is None:
+      input_tensor = array_ops.zeros((batch_size, height, width, depth))
+    weight_init = init_ops.truncated_normal_initializer
+    with ops.name_scope(scope):
+      output = layers.conv2d(
+          input_tensor,
+          depth, [5, 5],
+          padding='SAME',
+          weights_initializer=weight_init(0.09),
+          activation_fn=None,
+          normalizer_fn=None,
+          biases_initializer=None)
+      output = array_ops.identity(output, name='conv_out')
+
+      output = layers.batch_norm(
+          output, center=True, scale=True, decay=1.0 - 0.003, fused=True)
+
+      output = array_ops.identity(output, name='bn_out')
+      if post_activation_bypass:
+        output += input_tensor
+    return output
+
+  def _AssertInputOpsAre(self, op, in_op_names):
+    """Asserts that all inputs to op come from in_op_names (disregarding order).
+
+    Args:
+      op: Operation to check inputs for.
+      in_op_names: List of strings, operations where all op's inputs should come
+        from.
+    """
+    expected_inputs = [in_op_name + ':0' for in_op_name in in_op_names]
+    self.assertItemsEqual([t.name for t in op.inputs], expected_inputs)
+
+  def _AssertOutputGoesToOps(self, op, graph, out_op_names):
+    """Asserts that outputs from op go to out_op_names (and perhaps others).
+
+    Args:
+      op: Operation to check outputs for.
+      graph: Graph where output operations are located.
+      out_op_names: List of strings, operations where op's outputs should go.
+    """
+    for out_op_name in out_op_names:
+      out_op = graph.get_operation_by_name(out_op_name)
+      self.assertIn(op.outputs[0].name, [str(t.name) for t in out_op.inputs])
+
 
 if __name__ == '__main__':
   googletest.main()
-- 
GitLab


From 881c11a0771c25875453deaa5937cb681675b4d5 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 10 Oct 2018 11:20:21 -0700
Subject: [PATCH 0704/1085] Don't decluster ops not supported by TF

There are ops that don't have a TensorFlow kernel and must be run via XLA.
Don't decluster these ops.  If declustering these ops become important in the
future we could perhaps put these on XLA_* devices or put them in a single-node
cluster but YAGNI probably.

PiperOrigin-RevId: 216562037
---
 tensorflow/compiler/jit/BUILD                 |  1 +
 .../compiler/jit/partially_decluster_pass.cc  | 22 +++++++++----
 .../jit/partially_decluster_pass_test.cc      | 32 +++++++++++++++++++
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 661b444a42..64adc885bc 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -510,6 +510,7 @@ tf_cc_test(
         "//tensorflow/compiler/tf2xla:test_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
+        "//tensorflow/compiler/tf2xla/cc:xla_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc
index b1f9e9088f..5b96103223 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
 
 namespace tensorflow {
 namespace {
@@ -206,18 +207,27 @@ bool IsIntraClusterEdge(const Edge& edge) {
   return src_cluster_name.has_value() && src_cluster_name == dst_cluster_name;
 }
 
-Status MustCompileNode(const Node* n, bool* result) {
+bool IsMustCompileDevice(const DeviceType& device_type) {
+  const XlaOpRegistry::DeviceRegistration* registration;
+  if (XlaOpRegistry::GetCompilationDevice(device_type.type(), &registration)) {
+    return registration->requires_compilation;
+  }
+
+  return false;
+}
+
+Status MustCompileNode(const Node* n, bool* must_compile) {
   DeviceType device_type("");
   TF_RETURN_IF_ERROR(
       DeviceToDeviceType(n->assigned_device_name(), &device_type));
 
-  const XlaOpRegistry::DeviceRegistration* registration;
-  if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), &registration)) {
-    *result = false;
-  } else {
-    *result = registration->requires_compilation;
+  if (IsMustCompileDevice(device_type)) {
+    *must_compile = true;
+    return Status::OK();
   }
 
+  // We must compile `n` if it does not have a TensorFlow kernel.
+  *must_compile = !FindKernelDef(device_type, n->def(), nullptr, nullptr).ok();
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc
index 0feb73a89e..74d5ef5718 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/xla_cluster_util.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_ops.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -405,5 +406,36 @@ TEST(PartiallyDeclusterPassTest, DontDeclusterXlaDeviceOps) {
   }
 }
 
+TEST(PartiallyDeclusterPassTest, DontDeclusterNonTensorFlowOps) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output dynamic_slice_operand =
+      ops::Placeholder(s.WithOpName("dynamic_slice_operand"), DT_INT32,
+                       ops::Placeholder::Attrs{});
+  Output dynamic_slice_begin = ops::Placeholder(
+      s.WithOpName("dynamic_slice_begin"), DT_INT32, ops::Placeholder::Attrs{});
+  Output dynamic_slice_size = ops::Placeholder(
+      s.WithOpName("dynamic_slice_size"), DT_INT32, ops::Placeholder::Attrs{});
+  Output dynamic_slice =
+      ops::XlaDynamicSlice(s.WithOpName("dynamic_slice"), dynamic_slice_operand,
+                           dynamic_slice_begin, dynamic_slice_size);
+
+  Output reshape_input = ops::Placeholder(s.WithOpName("reshape_input"),
+                                          DT_FLOAT, ops::Placeholder::Attrs{});
+  Output reshape =
+      ops::Reshape(s.WithOpName("reshape"), reshape_input, dynamic_slice);
+
+  AddToCluster({dynamic_slice.node(), reshape.node()}, "cluster_0");
+
+  std::unique_ptr<Graph> graph = absl::make_unique<Graph>(OpRegistry::Global());
+  TF_ASSERT_OK(s.ToGraph(graph.get()));
+
+  Node* n = FindNodeByName(*graph, "dynamic_slice");
+  ASSERT_NE(n, nullptr);
+
+  TF_ASSERT_OK(PartiallyDecluster(&graph));
+
+  EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0");
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From b95a4b41941b2d4b672df7ddbc30792beb7e1e14 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 11:33:09 -0700
Subject: [PATCH 0705/1085] Internal change.

PiperOrigin-RevId: 216564327
---
 .../kernels/bidirectional_sequence_lstm.cc    | 59 ++++++++++---------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index a326827b1e..1137f05fa6 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -129,14 +129,14 @@ enum TemporaryTensor {
   kBwScratchBuffer = 1,
   // Quantized tensors needed for the hybrid kernel.
   kInputQuantized = 2,
-  kAuxInputQuantized = 3,  // Quantized tensor needed for auxiliary input.
-  kFwActivationStateQuantized = 4,
-  kBwActivationStateQuantized = 5,
-  kFwCellStateQuantized = 6,
-  kBwCellStateQuantized = 7,
-  kScalingFactors = 8,
-  kProductScalingFactors = 9,
-  kRecoveredCellWeights = 10,
+  kFwActivationStateQuantized = 3,
+  kBwActivationStateQuantized = 4,
+  kFwCellStateQuantized = 5,
+  kBwCellStateQuantized = 6,
+  kScalingFactors = 7,
+  kProductScalingFactors = 8,
+  kRecoveredCellWeights = 9,
+  kAuxInputQuantized = 10,  // Optional, quantized tensor for auxiliary input.
   kNumTemporaryTensors = 11
 };
 
@@ -469,7 +469,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   TfLiteIntArrayFree(node->temporaries);
   if (is_hybrid_op) {
-    node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors);
+    node->temporaries = TfLiteIntArrayCreate(
+        has_aux_input ? kNumTemporaryTensors : kNumTemporaryTensors - 1);
   } else {
     node->temporaries = TfLiteIntArrayCreate(2);  // the two scratch buffers.
   }
@@ -570,22 +571,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                                                        input_quantized_size));
     }
 
-    if (has_aux_input) {
-      node->temporaries->data[kAuxInputQuantized] =
-          *scratch_tensor_index + kAuxInputQuantized;
-      TfLiteTensor* aux_input_quantized =
-          GetTemporary(context, node, kAuxInputQuantized);
-      aux_input_quantized->type = kTfLiteUInt8;
-      aux_input_quantized->allocation_type = kTfLiteArenaRw;
-      if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) {
-        TfLiteIntArray* aux_input_quantized_size =
-            TfLiteIntArrayCopy(aux_input->dims);
-        TF_LITE_ENSURE_OK(context,
-                          context->ResizeTensor(context, aux_input_quantized,
-                                                aux_input_quantized_size));
-      }
-    }
-
     node->temporaries->data[kFwActivationStateQuantized] =
         *scratch_tensor_index + kFwActivationStateQuantized;
     TfLiteTensor* fw_activation_state_quantized =
@@ -691,6 +676,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                         context->ResizeTensor(context, recovered_cell_weights,
                                               recovered_cell_weights_size));
     }
+
+    // Only allocate a temporary tensor for quantized auxiliary input if we are
+    // actually going to use it.
+    if (has_aux_input) {
+      node->temporaries->data[kAuxInputQuantized] =
+          *scratch_tensor_index + kAuxInputQuantized;
+      TfLiteTensor* aux_input_quantized =
+          GetTemporary(context, node, kAuxInputQuantized);
+      aux_input_quantized->type = kTfLiteUInt8;
+      aux_input_quantized->allocation_type = kTfLiteArenaRw;
+      if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) {
+        TfLiteIntArray* aux_input_quantized_size =
+            TfLiteIntArrayCopy(aux_input->dims);
+        TF_LITE_ENSURE_OK(context,
+                          context->ResizeTensor(context, aux_input_quantized,
+                                                aux_input_quantized_size));
+      }
+    }
   }
   return kTfLiteOk;
 }
@@ -868,8 +871,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteUInt8: {
       TfLiteTensor* input_quantized =
           GetTemporary(context, node, kInputQuantized);
-      TfLiteTensor* aux_input_quantized =
-          GetTemporary(context, node, kAuxInputQuantized);
       TfLiteTensor* fw_activation_state_quantized =
           GetTemporary(context, node, kFwActivationStateQuantized);
       TfLiteTensor* bw_activation_state_quantized =
@@ -884,6 +885,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           GetTemporary(context, node, kProductScalingFactors);
       TfLiteTensor* recovered_cell_weights =
           GetTemporary(context, node, kRecoveredCellWeights);
+      TfLiteTensor* aux_input_quantized =
+          (aux_input == nullptr)
+              ? nullptr
+              : GetTemporary(context, node, kAuxInputQuantized);
 
       TfLiteStatus fw_pass_status = lstm_eval::EvalHybrid(
           input, fw_input_to_input_weights, fw_input_to_forget_weights,
-- 
GitLab


From 0f1634bb5e1f056c560a6df93b7367fa4ddd62be Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Wed, 10 Oct 2018 11:53:16 -0700
Subject: [PATCH 0706/1085] Add convenient method result_shape() to hlo module

PiperOrigin-RevId: 216567812
---
 tensorflow/compiler/xla/service/hlo_module.h           | 10 +++++++++-
 .../compiler/xla/service/hlo_rematerialization.cc      |  2 +-
 tensorflow/compiler/xla/service/layout_assignment.cc   |  5 ++---
 .../compiler/xla/service/layout_assignment_test.cc     |  3 +--
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 735804e827..509b82c08a 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -99,7 +99,7 @@ class HloModule {
   HloComputation* DeepCloneComputation(HloComputation* computation,
                                        HloCloneContext* context = nullptr);
 
-  // Return a pointer to the entry computation of the module..
+  // Return a pointer to the entry computation of the module.
   const HloComputation* entry_computation() const {
     CHECK_NE(nullptr, entry_computation_);
     return entry_computation_;
@@ -109,6 +109,14 @@ class HloModule {
     return entry_computation_;
   }
 
+  // Returns the root instruction shape of entry computation.
+  //
+  // Precondition: entry_computation_ is not nullptr.
+  const Shape& result_shape() const {
+    CHECK_NE(nullptr, entry_computation_);
+    return entry_computation()->root_instruction()->shape();
+  }
+
   // Creates the ComputationLayout which describes the current status of the HLO
   // module entry computation.
   ComputationLayout compute_computation_layout() const {
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 5ac43808ee..49e46ecd00 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -1215,7 +1215,7 @@ StatusOr<bool> HloRematerialization::Run(HloModule* module) {
   // by the caller.
   int64 module_output_size = 0;
   ShapeUtil::ForEachSubshape(
-      module->entry_computation()->root_instruction()->shape(),
+      module->result_shape(),
       [&module_output_size, this](const Shape& subshape,
                                   const ShapeIndex& /*index*/) {
         module_output_size += size_function_(subshape);
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 2cf5fc94ac..be0351fa6b 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -959,9 +959,8 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
       FindOrDie(computation_layouts_, module->entry_computation())
           .result_layout();
   if (result_layout.LayoutIsSet()) {
-    TF_RET_CHECK(ShapeUtil::Equal(
-        module->entry_computation()->root_instruction()->shape(),
-        result_layout.shape()));
+    TF_RET_CHECK(
+        ShapeUtil::Equal(module->result_shape(), result_layout.shape()));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index ff6fdb5e4a..a831751fa9 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -1284,8 +1284,7 @@ ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0},
            ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})}));
   AssignLayouts(module.get(), &computation_layout);
 
-  ExpectTupleLayoutIs(module->entry_computation()->root_instruction()->shape(),
-                      {{1, 0}, {1, 0}});
+  ExpectTupleLayoutIs(module->result_shape(), {{1, 0}, {1, 0}});
 
   const HloInstruction* custom_call =
       FindInstruction(module.get(), "custom-call");
-- 
GitLab


From 069ff62df8f84285fac88fcd1718b34f91aeaa18 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Wed, 10 Oct 2018 12:07:56 -0700
Subject: [PATCH 0707/1085] Fix mul_test with Clang 8.0.0

PiperOrigin-RevId: 216570443
---
 tensorflow/contrib/lite/kernels/mul_test.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc
index 2807550a6b..0f9c0c2eee 100644
--- a/tensorflow/contrib/lite/kernels/mul_test.cc
+++ b/tensorflow/contrib/lite/kernels/mul_test.cc
@@ -107,7 +107,7 @@ TEST(FloatMulOpTest, ActivationRELU_N1_TO_1) {
 }
 
 TEST(FloatMulOpTest, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -124,7 +124,7 @@ TEST(FloatMulOpTest, VariousInputShapes) {
 }
 
 TEST(FloatMulOpTest, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -161,7 +161,7 @@ TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) {
 }
 
 TEST(IntegerMulOpTest, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
@@ -176,7 +176,7 @@ TEST(IntegerMulOpTest, VariousInputShapes) {
 }
 
 TEST(IntegerMulOpTest, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
@@ -245,7 +245,7 @@ float GetTolerance(int min, int max) {
 
 TEST(QuantizedMulOpTest, WithBroadcast) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedMulOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-- 
GitLab


From 7cf8899b2194c3049e65dc136751b2bfa6d79f5d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 12:09:01 -0700
Subject: [PATCH 0708/1085] Move from deprecated self.test_session() to
 self.session() or self.cached_session().

Move to cached_session() if the session is create more than once per test. Move to session() otherwise.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function:
* the session is not cached anymore (a new session is created).
* the session is closed when exiting the "with" scope.

PiperOrigin-RevId: 216570649
---
 .../features/python/clip_weights_test.py      |  4 +-
 .../python/random_tensor_pool_test.py         | 10 ++--
 .../features/python/virtual_batchnorm_test.py | 14 +++---
 .../opt/python/training/adamax_test.py        |  2 +-
 .../opt/python/training/addsign_test.py       |  4 +-
 .../training/lazy_adam_optimizer_test.py      |  2 +-
 .../opt/python/training/powersign_test.py     |  4 +-
 tensorflow/contrib/optimizer_v2/adam_test.py  |  2 +-
 .../optimizer_v2/checkpointable_utils_test.py | 10 ++--
 .../contrib/optimizer_v2/rmsprop_test.py      |  8 ++--
 .../python/kernel_tests/core_rnn_cell_test.py |  2 +-
 .../rnn/python/kernel_tests/core_rnn_test.py  | 46 +++++++++----------
 .../rnn/python/kernel_tests/gru_ops_test.py   | 12 ++---
 .../rnn/python/kernel_tests/lstm_ops_test.py  | 20 ++++----
 .../rnn/python/kernel_tests/rnn_test.py       | 10 ++--
 .../kernel_tests/attention_wrapper_test.py    |  6 +--
 .../python/kernel_tests/basic_decoder_test.py | 14 +++---
 .../kernel_tests/beam_search_ops_test.py      |  6 +--
 .../python/kernel_tests/decoder_test.py       |  4 +-
 .../seq2seq/python/kernel_tests/loss_test.py  |  2 +-
 20 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
index 2b7bb5f14e..e4fac1976d 100644
--- a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
+++ b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py
@@ -47,13 +47,13 @@ class ClipWeightsTest(test.TestCase):
     train_op1 = opt.minimize(loss, var_list=self.variables)
     train_op2 = opt_clip.minimize(loss, var_list=self.variables)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       self.assertEqual(2.0, self.variables[0].eval())
       sess.run(train_op1)
       self.assertLess(0.1, self.variables[0].eval())
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       self.assertEqual(2.0, self.variables[0].eval())
       sess.run(train_op2)
diff --git a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
index 08584dcd65..3c9dfd6de0 100644
--- a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
+++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py
@@ -37,7 +37,7 @@ class TensorPoolTest(test.TestCase):
     output_value = tensor_pool(input_value, pool_size=10)
     self.assertEqual(output_value.shape.as_list(), [None, None, 3])
 
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       for i in range(10):
         session.run(output_value, {input_value: [[[i] * 3]]})
         session.run(output_value, {input_value: [[[i] * 3] * 2]})
@@ -49,7 +49,7 @@ class TensorPoolTest(test.TestCase):
     output_value = tensor_pool(input_value, pool_size=10)
     self.assertEqual(output_value.shape.as_list(), [])
 
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       outs = []
       for i in range(50):
         out = session.run(output_value, {input_value: i})
@@ -67,7 +67,7 @@ class TensorPoolTest(test.TestCase):
         input_value, pool_size=10, pooling_probability=0.0)
     self.assertEqual(output_value.shape.as_list(), [])
 
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       for i in range(50):
         out = session.run(output_value, {input_value: i})
         self.assertEqual(out, i)
@@ -83,7 +83,7 @@ class TensorPoolTest(test.TestCase):
         pooling_probability=pooling_probability)
     self.assertEqual(output_value.shape.as_list(), [])
 
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       not_pooled = 0
       total = 1000
       for i in range(total):
@@ -104,7 +104,7 @@ class TensorPoolTest(test.TestCase):
     for output_value in output_values:
       self.assertEqual(output_value.shape.as_list(), [])
 
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       for i in range(10):
         outs = session.run(output_values, {
             input_values[0]: i,
diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py
index 2fe06a2872..ecfbb8a432 100644
--- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py
+++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py
@@ -59,7 +59,7 @@ class VirtualBatchnormTest(test.TestCase):
       mom_mean, mom_var = nn.moments(tensors, axes)
       vb_var = mean_sq - math_ops.square(vb_mean)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([
             vb_mean, vb_var, mom_mean, mom_var])
 
@@ -93,7 +93,7 @@ class VirtualBatchnormTest(test.TestCase):
       vb_mean = array_ops.squeeze(vb_mean, batch_axis)
       vb_variance = array_ops.squeeze(vb_variance, batch_axis)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([
             vb_mean, vb_variance, mom_mean, mom_variance])
 
@@ -116,7 +116,7 @@ class VirtualBatchnormTest(test.TestCase):
       vbn = virtual_batchnorm.VBN(batch, axis, batch_axis=batch_axis)
       vbn_normalized = vbn.reference_batch_normalization()
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         variables_lib.global_variables_initializer().run()
 
         bn_normalized_np, vbn_normalized_np = sess.run(
@@ -142,7 +142,7 @@ class VirtualBatchnormTest(test.TestCase):
       vb_normed = array_ops.squeeze(
           vbn(array_ops.expand_dims(examples[i], [0])), [0])
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         variables_lib.global_variables_initializer().run()
         bn_np, vb_np = sess.run([batch_normalized, vb_normed])
       self.assertAllClose(bn_np[i, ...], vb_np)
@@ -167,7 +167,7 @@ class VirtualBatchnormTest(test.TestCase):
     vbn = virtual_batchnorm.VBN(reference_batch)
     vbn_fixed_example = array_ops.squeeze(
         vbn(array_ops.expand_dims(fixed_example, 0)), 0)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       variables_lib.global_variables_initializer().run()
       vbn_fixed_example_np = vbn_fixed_example.eval()
 
@@ -180,7 +180,7 @@ class VirtualBatchnormTest(test.TestCase):
       minibatch = array_ops.stack([fixed_example] + examples)
       vbn_minibatch = vbn(minibatch)
       cur_vbn_fixed_example = vbn_minibatch[0, ...]
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         variables_lib.global_variables_initializer().run()
         cur_vbn_fixed_example_np = cur_vbn_fixed_example.eval()
       self.assertAllClose(vbn_fixed_example_np, cur_vbn_fixed_example_np)
@@ -219,7 +219,7 @@ class VirtualBatchnormTest(test.TestCase):
 
     self.assertEqual(4, len(contrib_variables_lib.get_variables()))
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       variables_lib.global_variables_initializer().run()
       sess.run(to_fetch)
 
diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py
index 61d8b94eca..a1e220924f 100644
--- a/tensorflow/contrib/opt/python/training/adamax_test.py
+++ b/tensorflow/contrib/opt/python/training/adamax_test.py
@@ -129,7 +129,7 @@ class AdaMaxOptimizerTest(test.TestCase):
 
   def testSparseDevicePlacement(self):
     for index_dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(force_gpu=test.is_gpu_available()):
+      with self.cached_session(force_gpu=test.is_gpu_available()):
         # If a GPU is available, tests that all optimizer ops can be placed on
         # it (i.e. they have GPU kernels).
         var = variables.Variable([[1.0], [2.0]])
diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py
index 6150fa117f..2c74acd9ff 100644
--- a/tensorflow/contrib/opt/python/training/addsign_test.py
+++ b/tensorflow/contrib/opt/python/training/addsign_test.py
@@ -66,7 +66,7 @@ class AddSignTest(test.TestCase):
                  alpha=1.0,
                  beta=0.9):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Initialize variables for numpy implementation.
         m0, m1 = 0.0, 0.0
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@@ -169,7 +169,7 @@ class AddSignTest(test.TestCase):
                   alpha=1.0,
                   beta=0.9):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Initialize variables for numpy implementation.
         m0, m1 = 0.0, 0.0
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
index 089ecf597d..65ad724b3c 100644
--- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py
@@ -107,7 +107,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase):
   @parameterized.parameters([False, True])
   def testSparseDevicePlacement(self, use_resource):
     for index_dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(force_gpu=test.is_gpu_available()):
+      with self.cached_session(force_gpu=test.is_gpu_available()):
         # If a GPU is available, tests that all optimizer ops can be placed on
         # it (i.e. they have GPU kernels).
         if use_resource:
diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py
index 1cf9901dc0..f2c87b5883 100644
--- a/tensorflow/contrib/opt/python/training/powersign_test.py
+++ b/tensorflow/contrib/opt/python/training/powersign_test.py
@@ -67,7 +67,7 @@ class PowerSignTest(test.TestCase):
                  base=math.e,
                  beta=0.9):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Initialize variables for numpy implementation.
         m0, m1 = 0.0, 0.0
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@@ -173,7 +173,7 @@ class PowerSignTest(test.TestCase):
                   py_sign_decay_fn=None,
                   base=math.e,
                   beta=0.9):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         # Initialize variables for numpy implementation.
         m0, m1 = 0.0, 0.0
diff --git a/tensorflow/contrib/optimizer_v2/adam_test.py b/tensorflow/contrib/optimizer_v2/adam_test.py
index b1ad0ade42..b55739f788 100644
--- a/tensorflow/contrib/optimizer_v2/adam_test.py
+++ b/tensorflow/contrib/optimizer_v2/adam_test.py
@@ -109,7 +109,7 @@ class AdamOptimizerTest(test.TestCase):
 
   def testSparseDevicePlacement(self):
     for index_dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(force_gpu=test.is_gpu_available()):
+      with self.cached_session(force_gpu=test.is_gpu_available()):
         # If a GPU is available, tests that all optimizer ops can be placed on
         # it (i.e. they have GPU kernels).
         var = variables.Variable([[1.0], [2.0]])
diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
index 3e225ff0dd..6362d424ed 100644
--- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
+++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py
@@ -337,7 +337,7 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
-      with ops.Graph().as_default(), self.test_session(
+      with ops.Graph().as_default(), self.session(
           graph=ops.get_default_graph()), test_util.device(use_gpu=True):
         model = MyModel()
         optimizer = adam.AdamOptimizer(0.001)
@@ -370,7 +370,7 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
-      with ops.Graph().as_default(), self.test_session(
+      with ops.Graph().as_default(), self.session(
           graph=ops.get_default_graph()), test_util.device(use_gpu=True):
         model = MyModel()
         # Don't actually train so we can test variable values
@@ -688,7 +688,7 @@ class CheckpointCompatibilityTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph) as session:
         root = self._initialized_model()
         name_saver = core_saver.Saver()
@@ -733,7 +733,7 @@ class CheckpointCompatibilityTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph) as session:
         root = self._initialized_model()
         save_path = root.save(
@@ -752,7 +752,7 @@ class CheckpointCompatibilityTests(test.TestCase):
       save_path = root.save(file_prefix=checkpoint_prefix)
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph):
         root = self._initialized_model()
         self._set_sentinels(root)
diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
index 83f5971039..202c1e9afc 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
@@ -89,7 +89,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
   def testDense(self, dtype, param_value):
     (learning_rate, decay, momentum, epsilon, centered, use_resource) = tuple(
         param_value)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Initialize variables for numpy implementation.
       var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
       grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
@@ -213,7 +213,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
   def testSparse(self, dtype, param_value):
     (learning_rate, decay, momentum, epsilon, centered, _) = tuple(
         param_value)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Initialize variables for numpy implementation.
       var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
       grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
@@ -287,7 +287,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(_DATA_TYPES)
   def testWithoutMomentum(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       var0 = variables.Variable([1.0, 2.0], dtype=dtype)
       var1 = variables.Variable([3.0, 4.0], dtype=dtype)
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
@@ -353,7 +353,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(_DATA_TYPES)
   def testWithMomentum(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       var0 = variables.Variable([1.0, 2.0], dtype=dtype)
       var1 = variables.Variable([3.0, 4.0], dtype=dtype)
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index be0306cb07..572df58e52 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -695,7 +695,7 @@ class RNNCellTest(test.TestCase):
       return
 
     gpu_dev = test.gpu_device_name()
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 1, 3])
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
index f31ad53d3c..5cba54dd3d 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
@@ -211,7 +211,7 @@ class RNNTest(test.TestCase):
       self.assertEqual(out.get_shape(), inp.get_shape())
       self.assertEqual(out.dtype, inp.dtype)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       input_value = np.random.randn(batch_size, input_size)
       values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
 
@@ -247,7 +247,7 @@ class RNNTest(test.TestCase):
       self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list())
       self.assertEqual(out.dtype, inp.dtype)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       input_value = np.random.randn(batch_size, input_size)
       values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
       full_dropout_values = sess.run(
@@ -274,7 +274,7 @@ class RNNTest(test.TestCase):
           cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32)
     self.assertEqual(len(dynamic_outputs), len(inputs))
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       input_value = np.random.randn(batch_size, input_size)
       dynamic_values = sess.run(
           dynamic_outputs,
@@ -310,7 +310,7 @@ class RNNTest(test.TestCase):
                                      1.0 * (2 + 1) * np.ones((input_size)))))
 
   def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()):
+    with self.session(use_gpu=True, graph=ops_lib.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           factory(scope)
@@ -372,7 +372,7 @@ class LSTMTest(test.TestCase):
     input_size = 5
     batch_size = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
       cell = rnn_cell.LSTMCell(
@@ -394,7 +394,7 @@ class LSTMTest(test.TestCase):
     input_size = 5
     batch_size = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
       cell = rnn_cell.LSTMCell(
@@ -424,7 +424,7 @@ class LSTMTest(test.TestCase):
     input_size = 5
     batch_size = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
       state_saver = TestStateSaver(batch_size, 2 * num_units)
@@ -562,7 +562,7 @@ class LSTMTest(test.TestCase):
     batch_size = 2
     num_proj = 4
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
       inputs = max_length * [
@@ -659,7 +659,7 @@ class LSTMTest(test.TestCase):
     num_proj_shards = 3
     num_unit_shards = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
 
@@ -692,7 +692,7 @@ class LSTMTest(test.TestCase):
     num_proj_shards = 3
     num_unit_shards = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed)
       inputs = max_length * [
           array_ops.placeholder(dtypes.float64, shape=(None, input_size))
@@ -728,7 +728,7 @@ class LSTMTest(test.TestCase):
     num_proj_shards = 3
     num_unit_shards = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       inputs = max_length * [
           array_ops.placeholder(dtypes.float32, shape=(None, input_size))
       ]
@@ -784,7 +784,7 @@ class LSTMTest(test.TestCase):
     num_proj_shards = 3
     num_unit_shards = 2
     max_length = 8
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       sequence_length = array_ops.placeholder(dtypes.int64)
       initializer = init_ops.random_uniform_initializer(
           -0.01, 0.01, seed=self._seed)
@@ -1117,7 +1117,7 @@ class LSTMTest(test.TestCase):
           state_is_tuple=False)
 
     ########### Step 1: Run static graph and generate readouts
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       if in_graph_mode:
         concat_inputs = array_ops.placeholder(
             dtypes.float32, shape=(time_steps, batch_size, input_size))
@@ -1177,7 +1177,7 @@ class LSTMTest(test.TestCase):
             static_individual_variable_gradients, feed_dict=feeds)
 
     ########## Step 2: Run dynamic graph and generate readouts
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       if in_graph_mode:
         concat_inputs = array_ops.placeholder(
             dtypes.float32, shape=(time_steps, batch_size, input_size))
@@ -1337,7 +1337,7 @@ class BidirectionalRNNTest(test.TestCase):
     return input_value, inputs, outputs, state_fw, state_bw, sequence_length
 
   def _testBidirectionalRNN(self, use_shape):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
           self._createBidirectionalRNN(use_shape, True))
       variables_lib.global_variables_initializer().run()
@@ -1384,7 +1384,7 @@ class BidirectionalRNNTest(test.TestCase):
       self.assertAllClose(s_fw, s_bw)
 
   def _testBidirectionalRNNWithoutSequenceLength(self, use_shape):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       input_value, inputs, outputs, state_fw, state_bw, _ = (
           self._createBidirectionalRNN(use_shape, False))
       variables_lib.global_variables_initializer().run()
@@ -1472,7 +1472,7 @@ class BidirectionalRNNTest(test.TestCase):
 
   def _testBidirectionalDynamicRNN(self, use_shape, use_state_tuple,
                                    use_time_major, use_sequence_length):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
           self._createBidirectionalDynamicRNN(
               use_shape, use_state_tuple, use_time_major, use_sequence_length))
@@ -1549,7 +1549,7 @@ class BidirectionalRNNTest(test.TestCase):
     # REMARKS: factory(scope) is a function accepting a scope
     #          as an argument, such scope can be None, a string
     #          or a VariableScope instance.
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()):
+    with self.session(use_gpu=True, graph=ops_lib.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           factory(scope)
@@ -1868,7 +1868,7 @@ class StateSaverRNNTest(test.TestCase):
     batch_size = 2
     state_saver = TestStateSaver(batch_size, 2 * num_units)
 
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()):
+    with self.session(use_gpu=True, graph=ops_lib.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           self._factory(scope=scope, state_saver=state_saver)
@@ -1945,7 +1945,7 @@ class GRUTest(test.TestCase):
 
     sequence_length = np.random.randint(0, time_steps, size=batch_size)
 
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess:
       concat_inputs = array_ops.placeholder(
           dtypes.float32, shape=(time_steps, batch_size, input_size))
 
@@ -1967,7 +1967,7 @@ class GRUTest(test.TestCase):
       sess.run([outputs_dynamic, state_dynamic], feed_dict=feeds)
 
   def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()):
+    with self.session(use_gpu=True, graph=ops_lib.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           factory(scope)
@@ -2253,7 +2253,7 @@ class RawRNNTest(test.TestCase):
           np.ones((max_time, batch_size, 1), np.int64), output_vals[1])
 
   def _testScope(self, factory, prefix="prefix", use_outer_scope=True):
-    with self.test_session(use_gpu=True, graph=ops_lib.Graph()):
+    with self.session(use_gpu=True, graph=ops_lib.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           factory(scope)
@@ -2370,7 +2370,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
           sequence_length=sequence_length,
           dtype=dtypes.float32)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
       run_metadata = config_pb2.RunMetadata()
       variables_lib.global_variables_initializer().run()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py
index b865466cc7..50d0da6eaf 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py
@@ -41,7 +41,7 @@ from tensorflow.python.training import gradient_descent
 class GRUBlockCellTest(test.TestCase):
 
   def testNoneDimsWithDynamicRNN(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 4
       cell_size = 5
       input_size = 6
@@ -58,7 +58,7 @@ class GRUBlockCellTest(test.TestCase):
       sess.run(output, feed)
 
   def testBlockGRUToGRUCellSingleStep(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 4
       cell_size = 5
       input_size = 6
@@ -91,7 +91,7 @@ class GRUBlockCellTest(test.TestCase):
         self.assertAllClose(block, basic)
 
   def testBlockGRUToGRUCellMultiStep(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 2
       cell_size = 3
       input_size = 3
@@ -150,7 +150,7 @@ class GRUBlockCellTest(test.TestCase):
       self.assertAllClose(block_res[1], block_res[1])
 
   def testDerivativeOfBlockGRUToGRUCellSingleStep(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 2
       cell_size = 3
       input_size = 4
@@ -220,7 +220,7 @@ class GRUBlockCellTest(test.TestCase):
     cell_size = 3
     input_size = 4
     time_steps = 2
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       # Random initializers.
       seed = 1994
       initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
@@ -287,7 +287,7 @@ class GRUBlockCellTest(test.TestCase):
       self.assertAllClose(block, basic)
 
   def testGradient(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 1
       cell_size = 3
       input_size = 2
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
index ffd2421894..9ce0b399ba 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
@@ -130,7 +130,7 @@ def blocks_match(sess, use_peephole):
 class LSTMBlockCellTest(test.TestCase):
 
   def testNoneDimsWithDynamicRNN(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       batch_size = 4
       num_steps = 5
       input_dim = 6
@@ -147,7 +147,7 @@ class LSTMBlockCellTest(test.TestCase):
       sess.run(output, feed)
 
   def testLSTMBlockCell(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2])
@@ -175,7 +175,7 @@ class LSTMBlockCellTest(test.TestCase):
         self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
 
   def testCompatibleNames(self):
-    with self.test_session(use_gpu=True, graph=ops.Graph()):
+    with self.session(use_gpu=True, graph=ops.Graph()):
       cell = rnn_cell.LSTMCell(10)
       pcell = rnn_cell.LSTMCell(10, use_peepholes=True)
       inputs = [array_ops.zeros([4, 5])] * 6
@@ -186,7 +186,7 @@ class LSTMBlockCellTest(test.TestCase):
           for v in variables.trainable_variables()
       }
 
-    with self.test_session(use_gpu=True, graph=ops.Graph()):
+    with self.session(use_gpu=True, graph=ops.Graph()):
       cell = lstm_ops.LSTMBlockCell(10)
       pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True)
       inputs = [array_ops.zeros([4, 5])] * 6
@@ -197,7 +197,7 @@ class LSTMBlockCellTest(test.TestCase):
           for v in variables.trainable_variables()
       }
 
-    with self.test_session(use_gpu=True, graph=ops.Graph()):
+    with self.session(use_gpu=True, graph=ops.Graph()):
       cell = lstm_ops.LSTMBlockFusedCell(10)
       pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True)
       inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6)
@@ -212,7 +212,7 @@ class LSTMBlockCellTest(test.TestCase):
     self.assertEqual(basic_names, fused_names)
 
   def testLSTMBasicToBlockCell(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       x = array_ops.zeros([1, 2])
       x_values = np.random.randn(1, 2)
 
@@ -262,7 +262,7 @@ class LSTMBlockCellTest(test.TestCase):
         self.assertAllClose(basic, block)
 
   def testLSTMBasicToBlockCellPeeping(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       x = array_ops.zeros([1, 2])
       x_values = np.random.randn(1, 2)
 
@@ -315,7 +315,7 @@ class LSTMBlockCellTest(test.TestCase):
         self.assertAllClose(basic, block)
 
   def testLSTMBasicToBlock(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs,
        basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads,
        fused_wgrads) = blocks_match(
@@ -333,7 +333,7 @@ class LSTMBlockCellTest(test.TestCase):
         self.assertAllClose(basic, fused, rtol=1e-6, atol=1e-6)
 
   def testLSTMBasicToBlockPeeping(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs,
        basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads,
        fused_wgrads) = blocks_match(
@@ -352,7 +352,7 @@ class LSTMBlockCellTest(test.TestCase):
 
   def testLSTMFusedSequenceLengths(self):
     """Verify proper support for sequence lengths in LSTMBlockFusedCell."""
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       batch_size = 3
       input_size = 4
       cell_size = 5
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
index eda8cb3c12..32df1db964 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
@@ -99,7 +99,7 @@ class StackBidirectionalRNNTest(test.TestCase):
     return input_value, inputs, outputs, state_fw, state_bw, sequence_length
 
   def _testStackBidirectionalRNN(self, use_gpu, use_shape):
-    with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
       input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
           self._createStackBidirectionalRNN(use_gpu, use_shape, True))
       variables.global_variables_initializer().run()
@@ -159,7 +159,7 @@ class StackBidirectionalRNNTest(test.TestCase):
     # - Check that the state_5 and state_5' (forward and backward) are the
     #   same for the first layer (it does not apply for the second layer since
     #   it has forward-backward dependencies).
-    with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
       batch_size = 2
       # Create states placeholders.
       initial_states_fw = [
@@ -281,7 +281,7 @@ class StackBidirectionalRNNTest(test.TestCase):
 
   def _testStackBidirectionalDynamicRNN(self, use_gpu, use_shape,
                                         use_state_tuple):
-    with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
       input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
           self._createStackBidirectionalDynamicRNN(use_gpu, use_shape,
                                                    use_state_tuple))
@@ -343,7 +343,7 @@ class StackBidirectionalRNNTest(test.TestCase):
     # - Check that the state_5 and state_5' (forward and backward) are the
     #   same for the first layer (it does not apply for the second layer since
     #   it has forward-backward dependencies).
-    with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
       batch_size = 2
       # Create states placeholders.
       initial_states_fw = [
@@ -414,7 +414,7 @@ class StackBidirectionalRNNTest(test.TestCase):
     # REMARKS: factory(scope) is a function accepting a scope
     #          as an argument, such scope can be None, a string
     #          or a VariableScope instance.
-    with self.test_session(use_gpu=True, graph=ops.Graph()):
+    with self.session(use_gpu=True, graph=ops.Graph()):
       if use_outer_scope:
         with variable_scope.variable_scope(prefix) as scope:
           factory(scope)
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index 1f3b533de9..c1e36b2ea3 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
@@ -181,7 +181,7 @@ class AttentionWrapperTest(test.TestCase):
         for creator, depth in zip(create_attention_mechanisms,
                                   attention_mechanism_depths)]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with vs.variable_scope(
           'root',
           initializer=init_ops.random_normal_initializer(stddev=0.01, seed=3)):
@@ -724,7 +724,7 @@ class AttentionWrapperTest(test.TestCase):
   def testBahdanauMonotonicHard(self):
     # Run attention mechanism with mode='hard', make sure probabilities are hard
     b, t, u, d = 10, 20, 30, 40
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       a = wrapper.BahdanauMonotonicAttention(
           d,
           random_ops.random_normal((b, t, u)),
@@ -898,7 +898,7 @@ class AttentionWrapperTest(test.TestCase):
   def testLuongMonotonicHard(self):
     # Run attention mechanism with mode='hard', make sure probabilities are hard
     b, t, u, d = 10, 20, 30, 40
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       a = wrapper.LuongMonotonicAttention(
           d,
           random_ops.random_normal((b, t, u)),
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py
index fa3f074c67..b7f9f3fb09 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py
@@ -50,7 +50,7 @@ class BasicDecoderTest(test.TestCase):
     cell_depth = 10
     output_layer_depth = 3
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inputs = np.random.randn(batch_size, max_time,
                                input_depth).astype(np.float32)
       cell = rnn_cell.LSTMCell(cell_depth)
@@ -136,7 +136,7 @@ class BasicDecoderTest(test.TestCase):
     start_tokens = np.random.randint(0, vocabulary_size, size=batch_size)
     end_token = 1
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       embeddings = np.random.randn(vocabulary_size,
                                    input_depth).astype(np.float32)
       cell = rnn_cell.LSTMCell(vocabulary_size)
@@ -209,7 +209,7 @@ class BasicDecoderTest(test.TestCase):
     start_tokens = np.random.randint(0, vocabulary_size, size=batch_size)
     end_token = 1
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with variable_scope.variable_scope(
           "testStepWithSampleEmbeddingHelper",
           initializer=init_ops.constant_initializer(0.01)):
@@ -278,7 +278,7 @@ class BasicDecoderTest(test.TestCase):
     input_depth = 7
     vocabulary_size = 10
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inputs = np.random.randn(
           batch_size, max_time, input_depth).astype(np.float32)
       embeddings = np.random.randn(
@@ -371,7 +371,7 @@ class BasicDecoderTest(test.TestCase):
     else:
       auxiliary_inputs = None
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inputs = np.random.randn(batch_size, max_time,
                                input_depth).astype(np.float32)
       cell = rnn_cell.LSTMCell(cell_depth)
@@ -523,7 +523,7 @@ class BasicDecoderTest(test.TestCase):
         lambda x: array_ops.one_hot(x, vocabulary_size, dtype=dtypes.float32))
     end_fn = lambda sample_ids: math_ops.equal(sample_ids, end_token)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with variable_scope.variable_scope(
           "testStepWithInferenceHelper",
           initializer=init_ops.constant_initializer(0.01)):
@@ -604,7 +604,7 @@ class BasicDecoderTest(test.TestCase):
     next_inputs_fn = math_ops.to_float
     end_fn = lambda sample_ids: sample_ids[:, end_token]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with variable_scope.variable_scope(
           "testStepWithInferenceHelper",
           initializer=init_ops.constant_initializer(0.01)):
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py
index 9662a5780a..b41734d214 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py
@@ -49,7 +49,7 @@ class GatherTreeTest(test.TestCase):
         parent_ids=parent_ids,
         max_sequence_lengths=max_sequence_lengths,
         end_token=end_token)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(expected_result, beams.eval())
 
   def testBadParentValuesOnCPU(self):
@@ -93,7 +93,7 @@ class GatherTreeTest(test.TestCase):
           parent_ids=parent_ids,
           max_sequence_lengths=max_sequence_lengths,
           end_token=end_token)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(expected_result, beams.eval())
 
   def testGatherTreeBatch(self):
@@ -103,7 +103,7 @@ class GatherTreeTest(test.TestCase):
     max_sequence_lengths = [0, 1, 2, 4, 7, 8, 9, 10, 11, 0]
     end_token = 5
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       step_ids = np.random.randint(
           0, high=end_token + 1, size=(max_time, batch_size, beam_width))
       parent_ids = np.random.randint(
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py
index b549cbf568..4c25489fad 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py
@@ -44,7 +44,7 @@ class DynamicDecodeRNNTest(test.TestCase):
     cell_depth = 10
     max_out = max(sequence_length)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       if time_major:
         inputs = np.random.randn(max_time, batch_size,
                                  input_depth).astype(np.float32)
@@ -126,7 +126,7 @@ class DynamicDecodeRNNTest(test.TestCase):
     cell_depth = 10
     max_out = max(sequence_length)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inputs = np.random.randn(batch_size, max_time,
                                input_depth).astype(np.float32)
 
diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py
index 35c601a4bc..5aa32b532f 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py
@@ -34,7 +34,7 @@ from tensorflow.python.platform import test
 class LossTest(test.TestCase):
 
   def testSequenceLoss(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with variable_scope.variable_scope(
           'root', initializer=init_ops.constant_initializer(0.5)):
         batch_size = 2
-- 
GitLab


From b77648b3418f0cc66f249226c77a79d5d34f7618 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 12:15:48 -0700
Subject: [PATCH 0709/1085] Modify getqualifiedname to iterate over a copy of
 the namespace, to avoid "dictionary changed size during iteration" errors.

PiperOrigin-RevId: 216571661
---
 tensorflow/python/autograph/pyct/inspect_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 29c406c248..1fc3c6006d 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -92,7 +92,10 @@ def getqualifiedname(namespace, object_, max_depth=2):
 
   # TODO(mdan): Use breadth-first search and avoid visiting modules twice.
   if max_depth:
-    for name, value in namespace.items():
+    # Iterating over a copy prevents "changed size due to iteration" errors.
+    # It's unclear why those occur - suspecting new modules may load during
+    # iteration.
+    for name, value in namespace.copy().items():
       if tf_inspect.ismodule(value):
         name_in_module = getqualifiedname(value.__dict__, object_,
                                           max_depth - 1)
-- 
GitLab


From b6335dfe51ac1ac6c947c71577f41a24a13fe547 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 12:32:43 -0700
Subject: [PATCH 0710/1085] Internal change.

PiperOrigin-RevId: 216574118
---
 .../kernels/bidirectional_sequence_lstm.cc    | 57 ++++++++++++++++---
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 1137f05fa6..0d9863ae8d 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -151,7 +151,7 @@ void Free(TfLiteContext* context, void* buffer) {
 }
 
 // Check that input tensor dimensions matches with each other.
-TfLiteStatus CheckLstmTensorDimensions(
+TfLiteStatus CheckLstmTensorDimensionsAndTypes(
     TfLiteContext* context, TfLiteNode* node, int n_input, int n_output,
     int n_cell, int input_to_input_weights_tensor,
     int input_to_forget_weights_tensor, int input_to_cell_weights_tensor,
@@ -172,25 +172,39 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE(context, params->cell_clip >= 0);
   TF_LITE_ENSURE(context, params->proj_clip >= 0);
 
+  const TfLiteTensor* input_to_forget_weights =
+      GetInput(context, node, input_to_forget_weights_tensor);
+  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell);
+  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input);
+  TF_LITE_ENSURE(context, (input_to_forget_weights->type == kTfLiteFloat32) ||
+                              (input_to_forget_weights->type == kTfLiteUInt8));
+
   const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, input_to_input_weights_tensor);
   if (input_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
+    TF_LITE_ENSURE_EQ(context, input_to_input_weights->type,
+                      input_to_forget_weights->type);
   }
 
-  const TfLiteTensor* input_to_forget_weights =
-      GetInput(context, node, input_to_forget_weights_tensor);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell);
-  TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input);
-
   const TfLiteTensor* input_to_cell_weights =
       GetInput(context, node, input_to_cell_weights_tensor);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input);
+  TF_LITE_ENSURE_EQ(context, input_to_cell_weights->type,
+                    input_to_forget_weights->type);
+
+  const TfLiteTensor* input_to_output_weights =
+      GetInput(context, node, input_to_output_weights_tensor);
+  TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[0], n_cell);
+  TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input);
+  TF_LITE_ENSURE_EQ(context, input_to_output_weights->type,
+                    input_to_forget_weights->type);
 
   const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor);
@@ -200,6 +214,8 @@ TfLiteStatus CheckLstmTensorDimensions(
                       n_cell);
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1],
                       n_output);
+    TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->type,
+                      input_to_forget_weights->type);
   }
 
   const TfLiteTensor* recurrent_to_forget_weights =
@@ -209,6 +225,8 @@ TfLiteStatus CheckLstmTensorDimensions(
                     n_cell);
   TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1],
                     n_output);
+  TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->type,
+                    input_to_forget_weights->type);
 
   const TfLiteTensor* recurrent_to_cell_weights =
       GetInput(context, node, recurrent_to_cell_weights_tensor);
@@ -216,6 +234,8 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell);
   TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1],
                     n_output);
+  TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->type,
+                    input_to_forget_weights->type);
 
   // We make sure the input-gate's parameters are either both present (regular
   // LSTM) or not at all (CIFG-LSTM).
@@ -231,6 +251,8 @@ TfLiteStatus CheckLstmTensorDimensions(
   if (cell_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
+    TF_LITE_ENSURE_EQ(context, cell_to_input_weights->type,
+                      input_to_forget_weights->type);
   }
 
   const TfLiteTensor* cell_to_forget_weights =
@@ -238,6 +260,8 @@ TfLiteStatus CheckLstmTensorDimensions(
   if (cell_to_forget_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
+    TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->type,
+                      input_to_forget_weights->type);
   }
 
   const TfLiteTensor* cell_to_output_weights =
@@ -245,6 +269,8 @@ TfLiteStatus CheckLstmTensorDimensions(
   if (cell_to_output_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell);
+    TF_LITE_ENSURE_EQ(context, cell_to_output_weights->type,
+                      input_to_forget_weights->type);
   }
 
   // Making sure the peephole weights are there all or none.
@@ -266,22 +292,26 @@ TfLiteStatus CheckLstmTensorDimensions(
   } else {
     TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell);
+    TF_LITE_ENSURE_EQ(context, input_gate_bias->type, kTfLiteFloat32);
   }
 
   const TfLiteTensor* forget_gate_bias =
       GetInput(context, node, forget_gate_bias_tensor);
   TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell);
+  TF_LITE_ENSURE_EQ(context, forget_gate_bias->type, kTfLiteFloat32);
 
   const TfLiteTensor* cell_bias =
       GetInput(context, node, cell_gate_bias_tensor);
   TF_LITE_ENSURE_EQ(context, cell_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, cell_bias->dims->data[0], n_cell);
+  TF_LITE_ENSURE_EQ(context, cell_bias->type, kTfLiteFloat32);
 
   const TfLiteTensor* output_gate_bias =
       GetInput(context, node, output_gate_bias_tensor);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
+  TF_LITE_ENSURE_EQ(context, output_gate_bias->type, kTfLiteFloat32);
 
   const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, projection_weights_tensor);
@@ -289,6 +319,8 @@ TfLiteStatus CheckLstmTensorDimensions(
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
+    TF_LITE_ENSURE_EQ(context, projection_weights->type,
+                      input_to_forget_weights->type);
   }
 
   const TfLiteTensor* projection_bias =
@@ -296,6 +328,7 @@ TfLiteStatus CheckLstmTensorDimensions(
   if (projection_bias) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output);
+    TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteFloat32);
   }
 
   // Making sure the projection tensors are consistent:
@@ -315,7 +348,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
                                         int n_output, int n_cell) {
   TF_LITE_ENSURE_OK(
       context,
-      CheckLstmTensorDimensions(
+      CheckLstmTensorDimensionsAndTypes(
           context, node, n_input, n_output, n_cell,
           kFwInputToInputWeightsTensor, kFwInputToForgetWeightsTensor,
           kFwInputToCellWeightsTensor, kFwInputToOutputWeightsTensor,
@@ -329,7 +362,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   TF_LITE_ENSURE_OK(
       context,
-      CheckLstmTensorDimensions(
+      CheckLstmTensorDimensionsAndTypes(
           context, node, n_input, n_output, n_cell,
           kBwInputToInputWeightsTensor, kBwInputToForgetWeightsTensor,
           kBwInputToCellWeightsTensor, kBwInputToOutputWeightsTensor,
@@ -379,12 +412,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2);
   TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1],
                     n_input);
+  TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->type,
+                    fw_input_to_output_weights->type);
 
   const TfLiteTensor* fw_recurrent_to_output_weights =
       GetInput(context, node, kFwRecurrentToOutputWeightsTensor);
   TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->size, 2);
   TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->data[0],
                     n_fw_cell);
+  TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->type,
+                    fw_input_to_output_weights->type);
   const int n_fw_output = fw_recurrent_to_output_weights->dims->data[1];
 
   const TfLiteTensor* bw_recurrent_to_output_weights =
@@ -392,6 +429,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2);
   TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0],
                     n_bw_cell);
+  TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->type,
+                    fw_input_to_output_weights->type);
   const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1];
 
   // Check that input tensor dimensions matches with each other.
-- 
GitLab


From dbbb44ec54ea4d8ae028eed6cae9240112d8340c Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Wed, 10 Oct 2018 13:00:46 -0700
Subject: [PATCH 0711/1085] [XLA] Finish NumUniqueInstructionIds() cleanup.

NumUniqueInstructionIds() is no longer a good proxy for the number of instructions. Remove the API, and use the real number of instructions instead of the remaining uses.

PiperOrigin-RevId: 216578465
---
 .../compiler/xla/service/dfs_hlo_visitor.cc   |  4 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    | 56 ++++---------------
 .../compiler/xla/service/hlo_instruction.cc   |  2 +-
 tensorflow/compiler/xla/service/hlo_module.h  |  4 --
 .../xla/service/tuple_points_to_analysis.cc   |  2 +-
 .../xla/service/tuple_points_to_analysis.h    | 20 +++++--
 6 files changed, 31 insertions(+), 57 deletions(-)

diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
index 3e7373adc5..c54f81e691 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
@@ -50,7 +50,7 @@ void DfsHloVisitorBase<HloInstructionPtr>::SetVisiting(
     const HloInstruction& instruction) {
   VLOG(3) << "marking HLO " << &instruction << " as visiting: ";
   DCHECK(NotVisited(instruction));
-  visit_state_.SetState(instruction.unique_id(), VisitState::kVisiting);
+  visit_state_[instruction.unique_id()] = VisitState::kVisiting;
 }
 
 template <typename HloInstructionPtr>
@@ -58,7 +58,7 @@ void DfsHloVisitorBase<HloInstructionPtr>::SetVisited(
     const HloInstruction& instruction) {
   VLOG(3) << "marking HLO " << &instruction << " as visited: ";
   DCHECK(NotVisited(instruction) || IsVisiting(instruction));
-  visit_state_.SetState(instruction.unique_id(), VisitState::kVisited);
+  visit_state_[instruction.unique_id()] = VisitState::kVisited;
 }
 
 template <typename HloInstructionPtr>
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index 68d01d75a2..4159aa281f 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <type_traits>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -263,21 +264,25 @@ class DfsHloVisitorBase {
     kVisited = 2,
   };
 
-  VisitState GetVisitState(int id) { return visit_state_.GetState(id); }
+  VisitState GetVisitState(int id) {
+    auto iter = visit_state_.find(id);
+    if (iter == visit_state_.end()) {
+      return VisitState::kNotVisited;
+    }
+    return iter->second;
+  }
   VisitState GetVisitState(const HloInstruction& instruction);
 
   // Resize internal state if necessary to hold state for ids <= num.
   // This call is purely a performance hint and can be omitted without
   // affecting correctness.
-  void ReserveVisitStates(int num) { visit_state_.Reserve(num); }
+  void ReserveVisitStates(int num) { visit_state_.reserve(num); }
 
   // Useful when we want to visit the same computation more than once with the
   // same visitor.
-  void ResetVisitStates() { visit_state_.Reset(); }
+  void ResetVisitStates() { visit_state_.clear(); }
 
-  void SetVisitState(int id, VisitState state) {
-    visit_state_.SetState(id, state);
-  }
+  void SetVisitState(int id, VisitState state) { visit_state_[id] = state; }
 
   // Sets the visitation state of the given instruction as kVisiting.
   //
@@ -326,44 +331,7 @@ class DfsHloVisitorBase {
   virtual Status Postprocess(HloInstructionPtr hlo);
 
  private:
-  class DFSVisitStates {
-   public:
-    DFSVisitStates() {}
-    void Reserve(uint64 num) {
-      states_.reserve((num + kStatesPerWord - 1) / kStatesPerWord);
-    }
-    VisitState GetState(uint64 id) {
-      uint64 word_index = id / kStatesPerWord;
-      if (word_index >= states_.size()) {
-        return VisitState::kNotVisited;
-      }
-      static_assert(static_cast<int>(VisitState::kVisited) < 3,
-                    "VisitState must fit in two bits");
-      uint64 w = states_[word_index];
-      uint32 shift = 2 * (id % kStatesPerWord);  // 2 bits per state
-      return static_cast<VisitState>((w >> shift) & 0x3);
-    }
-    void SetState(uint64 id, VisitState state) {
-      uint64 word_index = id / kStatesPerWord;
-      if (word_index >= states_.size()) {
-        states_.resize(word_index + 1, 0);
-      }
-      uint64* w = &states_[word_index];
-      uint32 shift = 2 * (id % kStatesPerWord);  // 2 bits per state
-      uint64 mask = 0x3ull << shift;
-      *w = (*w & ~mask) | (static_cast<uint64>(state) << shift);
-      DCHECK_EQ(GetState(id), state);
-    }
-    void Reset() { states_.clear(); }
-
-   private:
-    static const uint32 kStatesPerWord = sizeof(uint64) / 2 /*bits per entry*/;
-    // Map from id to two-bit states.  We store 32 such states per 64-bit
-    // value
-    std::vector<uint64> states_;
-  };
-
-  DFSVisitStates visit_state_;
+  absl::flat_hash_map<int, VisitState> visit_state_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(DfsHloVisitorBase);
 };
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c317e9e3b4..306d29a2ae 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2483,7 +2483,7 @@ template <typename Visitor>
 static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
                            const InternalCompareFunction* operand_order,
                            bool ignore_control_predecessors) {
-  visitor->ReserveVisitStates(root->GetModule()->NumUniqueInstructionIds());
+  visitor->ReserveVisitStates(root->GetModule()->instruction_count());
 
   // dfs_stack holds pairs of <HloInstruction*->unique_id(), HloInstruction*>.
   //
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 509b82c08a..a1ffdc4023 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -220,10 +220,6 @@ class HloModule {
     return result;
   }
 
-  // Returns the number of unique intruction ids given out.  All ids up to
-  // this point are guaranteed to be in the range [0..NumUniqueInstructionIds())
-  int NumUniqueInstructionIds() const { return next_unique_id_; }
-
   // Returns an id that is unique to this module across all modules created over
   // the lifetime of this process.
   int unique_id() const { return unique_id_; }
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index ef4e69180d..96f3055c98 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -148,7 +148,7 @@ TuplePointsToAnalysis::Run(const HloModule* module) {
 
 Status TuplePointsToAnalysis::Analyze() {
   per_instruction_.clear();
-  per_instruction_.resize(module_->NumUniqueInstructionIds());
+  per_instruction_.reserve(module_->instruction_count());
 
   logical_buffer_aliases_.clear();
   logical_buffer_aliases_.resize(
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 30c365053c..bcfcb388f9 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
@@ -315,14 +316,23 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   const PerInstruction* PerInst(const HloInstruction* inst) const {
     int id = inst->unique_id();
     DCHECK_GE(id, 0);
-    DCHECK_LT(id, per_instruction_.size());
-    return &per_instruction_[id];
+    auto iter = per_instruction_.find(id);
+    if (iter == per_instruction_.end()) {
+      LOG(FATAL) << "Expected per-instruction information to already exist";
+    } else {
+      return iter->second.get();
+    }
   }
   PerInstruction* PerInst(const HloInstruction* inst) {
     int id = inst->unique_id();
     DCHECK_GE(id, 0);
-    DCHECK_LT(id, per_instruction_.size());
-    return &per_instruction_[id];
+    auto iter = per_instruction_.find(id);
+    if (iter == per_instruction_.end()) {
+      return per_instruction_.emplace(id, absl::make_unique<PerInstruction>())
+          .first->second.get();
+    } else {
+      return iter->second.get();
+    }
   }
 
   std::vector<std::pair<HloInstruction*, int64>> GetAllUsesOfInstructionAtIndex(
@@ -339,7 +349,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault {
   const std::unique_ptr<LogicalBufferAnalysis> logical_buffer_analysis_;
 
   // A map from instruction->unique_id() to
-  std::vector<PerInstruction> per_instruction_;
+  absl::flat_hash_map<int, std::unique_ptr<PerInstruction>> per_instruction_;
 
   // A map from LogicalBuffer->id() to alias information about that logical
   // buffer
-- 
GitLab


From 7ea8eca41a309b98355c4ed0dae0f5b176590dcd Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 10 Oct 2018 13:11:14 -0700
Subject: [PATCH 0712/1085] Pass rewriter config into PartitionedCallOp and run
 the grappler optimization pass

Grappler is disabled by default. Can be controlled through the config argument to enable_eager_execution(), by explicitly setting graph_options.rewrite_options.disable_meta_optimizer to False.

Benchmarks (with grappler enabled):
benchmark_defun_matmul_100_by_784_CPU
  Before: 353.947162628
  After: 322.019815445

benchmark_defun_matmul_2_by_2_CPU
  Before: 76.9423325857
  After: 72.8042999903

benchmark_defun_matmul_2_by_2_CPU_async
  Before: 38.6896689733
  After: 39.6879593531

benchmark_defun_matmul_forward_backward_2_by_2_CPU
  Before: 189.12516435
  After: 182.339771589

benchmark_defun_matmul_forward_backward_2_by_2_CPU_async
  Before: 222.475560506
  After: 245.545005798

benchmark_defun_with_signature
  Before: 120.702934265
  After: 122.84330527

benchmark_defun_with_signature_and_kwargs
  Before: 129.103032748
  After: 140.212361018

benchmark_defun_without_signature
  Before: 63.2988293966
  After: 62.6370668411

benchmark_defun_without_signature_and_with_kwargs
  Before: 73.5487699509
  After: 73.1824000676

PiperOrigin-RevId: 216580246
---
 .../grappler/optimizers/meta_optimizer.cc     |  2 +
 tensorflow/core/kernels/BUILD                 |  4 +
 .../core/kernels/partitioned_function_ops.cc  | 85 ++++++++++++++++---
 tensorflow/core/ops/functional_ops.cc         |  2 +
 tensorflow/python/BUILD                       |  1 +
 tensorflow/python/eager/context.py            | 46 +++++++++-
 tensorflow/python/eager/function.py           |  3 +-
 tensorflow/python/eager/function_test.py      | 14 +++
 tensorflow/python/ops/functional_ops.py       | 37 +++++++-
 9 files changed, 174 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 225c0a91e3..b8f4599f56 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -123,6 +123,8 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
     : cpu_device_(cpu_device), cfg_(cfg) {
+  DCHECK(cpu_device_ == nullptr ||
+         cpu_device_->attributes().device_type() == "CPU");
   // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs()
   // if we want to the threadpool for parallelizing Grappler
   const int kNumThreads = 1;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 1ca9c7b7f5..f0a2924378 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2127,6 +2127,10 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:functional_ops_op_lib",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler/clusters:virtual_cluster",
+        "//tensorflow/core/grappler/optimizers:meta_optimizer",
     ],
 )
 
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index 3979e4b53a..b2b3cef59b 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -24,6 +24,10 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_partition.h"
+#include "tensorflow/core/grappler/clusters/virtual_cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
+#include "tensorflow/core/protobuf/rewriter_config.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
 #include "tensorflow/core/util/reffed_status_callback.h"
 
@@ -35,7 +39,6 @@ namespace tensorflow {
 typedef FunctionLibraryRuntime::Handle FHandle;
 
 namespace {
-
 // A `PartitionedCallOp` asynchronously executes a function, potentially across
 // multiple devices but within a single process. The kernel places and
 // partitions a given function's underlying graph, and executes each of the
@@ -46,6 +49,12 @@ class PartitionedCallOp : public AsyncOpKernel {
  public:
   explicit PartitionedCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    string rewriter_config_serialized;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("config", &rewriter_config_serialized));
+    OP_REQUIRES(
+        ctx, rewriter_config_.ParseFromString(rewriter_config_serialized),
+        errors::InvalidArgument("Unable to parse rewriter_config string as "
+                                "tensorflow::RewriterConfig proto."));
   }
 
   ~PartitionedCallOp() override {}
@@ -109,8 +118,7 @@ class PartitionedCallOp : public AsyncOpKernel {
         // by name.
         auto graph = tensorflow::MakeUnique<Graph>(fbody->graph->flib_def());
         FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
-        TF_CHECK_OK(
-                    graph.get()->AddFunctionLibrary(global_flib.ToProto()));
+        TF_CHECK_OK(graph->AddFunctionLibrary(global_flib.ToProto()));
         CopyGraph(*fbody->graph, graph.get());
         OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done);
 
@@ -159,6 +167,17 @@ class PartitionedCallOp : public AsyncOpKernel {
                 optimization_options),
             done);
 
+        Device* cpu_device;
+        OP_REQUIRES_OK_ASYNC(
+            ctx, lib->device_mgr()->LookupDevice("CPU:0", &cpu_device), done);
+
+        // Run grappler passes on the graph. It is possible that these are
+        // optimized by the graph executor already.
+        OP_REQUIRES_OK_ASYNC(ctx,
+                             OptimizeGraph(ctx, fbody->ret_nodes, overlay_lib,
+                                           device_set, cpu_device, &graph),
+                             done);
+
         std::unordered_map<string, std::unique_ptr<Graph>> subgraphs;
         OP_REQUIRES_OK_ASYNC(
             ctx, PartitionHelper(device_set, std::move(graph), &subgraphs),
@@ -266,8 +285,7 @@ class PartitionedCallOp : public AsyncOpKernel {
     for (const auto& partition : partitions) {
       std::unique_ptr<Graph> subgraph(new Graph(graph->flib_def()));
       FunctionLibraryDefinition global_flib(OpRegistry::Global(), {});
-      TF_CHECK_OK(
-                subgraph.get()->AddFunctionLibrary(global_flib.ToProto()));
+      TF_CHECK_OK(subgraph->AddFunctionLibrary(global_flib.ToProto()));
       GraphConstructorOptions opts;
       opts.allow_internal_ops = true;
       opts.expect_device_spec = true;
@@ -317,14 +335,6 @@ class PartitionedCallOp : public AsyncOpKernel {
       }
     }
 
-    // Rewrite the indices of the Arg and Retval nodes for this function
-    // to range from 0 to the number of Arg nodes, Retval nodes, respectively.
-    auto sort_by_index = [](std::pair<Node*, int> one,
-                            std::pair<Node*, int> two) -> bool {
-      return one.second < two.second;
-    };
-    std::sort(arg_nodes.begin(), arg_nodes.end(), sort_by_index);
-    std::sort(ret_nodes.begin(), ret_nodes.end(), sort_by_index);
     for (int i = 0; i < arg_nodes.size(); ++i) {
       Node* arg = arg_nodes[i].first;
       arg->AddAttr("index", i);
@@ -470,7 +480,56 @@ class PartitionedCallOp : public AsyncOpKernel {
     }
   }
 
+  Status OptimizeGraph(OpKernelContext* ctx,
+                       const gtl::InlinedVector<Node*, 4>& ret_nodes,
+                       FunctionLibraryDefinition* flib,
+                       const DeviceSet& device_set, Device* cpu_device,
+                       std::unique_ptr<Graph>* graph) {
+    if (!tensorflow::grappler::MetaOptimizerEnabled(rewriter_config_)) {
+      return Status::OK();
+    }
+
+    tensorflow::grappler::GrapplerItem item;
+
+    // Add fetches so that the graph can be pruned.
+    for (Node* node : ret_nodes) {
+      item.fetch.push_back(node->name());
+    }
+
+    (*graph)->ToGraphDef(&item.graph);
+
+    if (flib) {
+      *item.graph.mutable_library() = flib->ToProto();
+    }
+
+    tensorflow::GraphDef out_graph;
+
+    tensorflow::grappler::VirtualCluster cluster(&device_set);
+
+    // TODO(nareshmodi): Consider adding and using the more generic GraphOptions
+    // proto (which also contain the OptimizerOptions).
+    TF_RETURN_IF_ERROR(tensorflow::grappler::RunMetaOptimizer(
+        item, rewriter_config_, cpu_device, &cluster, &out_graph));
+
+    std::unique_ptr<Graph> optimized_graph(new Graph(OpRegistry::Global()));
+    TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(
+        GraphConstructorOptions(), out_graph, optimized_graph.get()));
+
+    *graph = std::move(optimized_graph);
+
+    // The graph conversion sets the requested device names but not the
+    // assigned device names. However, since at this point the graph is
+    // placed TF expects an assigned device name for every node. Therefore
+    // we copy the requested device into the assigned device field.
+    for (Node* node : graph->get()->nodes()) {
+      node->set_assigned_device_name(node->requested_device());
+    }
+
+    return Status::OK();
+  }
+
   NameAttrList func_;
+  RewriterConfig rewriter_config_;
   string local_device_name_;
   // Contains maps from device names to handles of function partitions, keyed by
   // FunctionLibraryRuntime pointers. (Because this kernel may be instantiated
diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc
index 22b4b07eff..17b15108a9 100644
--- a/tensorflow/core/ops/functional_ops.cc
+++ b/tensorflow/core/ops/functional_ops.cc
@@ -225,6 +225,7 @@ REGISTER_OP("PartitionedCall")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >= 0")
     .Attr("f: func")
+    .Attr("config: string = ''")
     .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("StatefulPartitionedCall")
@@ -233,6 +234,7 @@ REGISTER_OP("StatefulPartitionedCall")
     .Attr("Tin: list(type) >= 0")
     .Attr("Tout: list(type) >= 0")
     .Attr("f: func")
+    .Attr("config: string = ''")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape);
 
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 18ade384f5..c4b5be7a34 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1154,6 +1154,7 @@ py_library(
         ":tensor_shape",
         ":util",
         ":variable_scope",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
     ],
 )
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 778ff85342..74e648ee6f 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -25,6 +25,7 @@ import random
 import threading
 
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.framework import c_api_util
 from tensorflow.python.framework import device as pydev
@@ -84,7 +85,7 @@ class _EagerTensorCache(object):
 class _EagerContext(threading.local):
   """Thread local eager context."""
 
-  def __init__(self):
+  def __init__(self, config=None):
     super(_EagerContext, self).__init__()
     self.device_spec = pydev.DeviceSpec.from_string("")
     self.device_name = self.device_spec.to_string()
@@ -97,6 +98,12 @@ class _EagerContext(threading.local):
     self.ones_rank_cache = _EagerTensorCache()
     self.zeros_cache = _EagerTensorCache()
     self.execution_mode = None
+    self.rewriter_config = None
+    if config is not None and config.HasField(
+        "graph_options") and config.graph_options.HasField("rewrite_options"):
+      self.rewriter_config = (
+          config.graph_options.rewrite_options.SerializeToString())
+
 
 
 ContextSwitch = collections.namedtuple(
@@ -191,7 +198,7 @@ class Context(object):
     Raises:
      ValueError: If execution_mode is not valid.
     """
-    self._eager_context = _EagerContext()
+    self._eager_context = _EagerContext(config)
     self._context_switches = _ContextSwitchStack(self.executing_eagerly())
     self._context_handle = None
     self._context_devices = None
@@ -361,6 +368,36 @@ class Context(object):
       if mode == EAGER_MODE:
         self.context_switches.pop()
 
+  @tf_contextlib.contextmanager
+  def rewriter_config(self, rewriter_config_=None):
+    """A context manager to allow setting the grappler rewrite options.
+
+    Args:
+      rewriter_config_: A tensorflow.RewriterConfig proto object.
+
+    Yields:
+      Nothing.
+
+    Raises:
+      ValueError: if rewriter_config is not a tensorflow.RewriterConfig proto.
+    """
+    if rewriter_config_ is None or not isinstance(
+        rewriter_config_, rewriter_config_pb2.RewriterConfig):
+      raise ValueError("Must pass a rewriter_config proto")
+
+    ctx = self._eager_context
+    old_rewriter_config = ctx.rewriter_config
+    ctx.rewriter_config = rewriter_config_.SerializeToString()
+    try:
+      yield
+    finally:
+      ctx.rewriter_config = old_rewriter_config
+
+  @property
+  def rewriter_config_string(self):
+    """Returns the serialized rewriter_config for the current thread."""
+    return self._eager_context.rewriter_config
+
   def executing_eagerly(self):
     """Returns True if current thread has eager executing enabled."""
     return self._eager_context.is_eager
@@ -783,6 +820,11 @@ def export_run_metadata():
   return context().export_run_metadata()
 
 
+def rewriter_config(rewriter_config_):
+  """Context manager for setting the grappler rewrite config."""
+  return context().rewriter_config(rewriter_config_)
+
+
 def set_server_def(server_def):
   context().set_server_def(server_def)
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f1a63adce1..191279abae 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -501,7 +501,8 @@ class _EagerDefinedFunction(object):
           args=args,
           f=self,
           tout=self._output_types,
-          executing_eagerly=executing_eagerly)
+          executing_eagerly=executing_eagerly,
+          config=ctx.rewriter_config_string)  # pylint: disable=protected-access
 
     if executing_eagerly:
       return outputs
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 953f4300cf..7b708622f1 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -95,6 +95,20 @@ class FunctionTest(test.TestCase):
     self.assertAllEqual(sq.numpy().reshape(-1), [10, 14, 14, 20])
     self.assertAllEqual(sq2.numpy().reshape(-1), [52, 76, 74, 108])
 
+  def testWastedAdd(self):
+
+    @function.defun()
+    def add(x, y):
+      _ = x * y
+      return x + y
+
+    # The default config allows everything.
+    rewrites = rewriter_config_pb2.RewriterConfig()
+
+    with context.rewriter_config(rewrites):
+      t = constant_op.constant(1.0)
+      self.assertAllEqual(add(t, t).numpy(), 2.0)
+
   def testBasicGraphMode(self):
     matmul = function.defun(math_ops.matmul)
 
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index 119d9522bd..f8a95f5e62 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -23,6 +23,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
@@ -979,8 +980,19 @@ def For(start,
   return ret
 # pylint: enable=invalid-name,protected-access
 
+_rewriter_config_optimizer_disabled = None
 
-def partitioned_call(args, f, tout=None, executing_eagerly=None):
+
+def _get_disabled_rewriter_config():
+  global _rewriter_config_optimizer_disabled
+  if _rewriter_config_optimizer_disabled is None:
+    rewriter_config = rewriter_config_pb2.RewriterConfig()
+    rewriter_config.disable_meta_optimizer = True
+    _rewriter_config_optimizer_disabled = rewriter_config.SerializeToString()
+  return _rewriter_config_optimizer_disabled
+
+
+def partitioned_call(args, f, tout=None, executing_eagerly=None, config=None):
   """Executes a function while respecting device annotations.
 
   Currently, only those functions that execute within the same address space
@@ -994,6 +1006,9 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None):
       the signature of `f`.
     executing_eagerly: (Optional) A boolean indicating whether the context is
       executing eagerly. If `None`, fetched from the global context.
+    config: (Optional) A tensorflow::RewriterConfig proto, serialized. If
+      `None`, all optimizations are disabled. Currently only handled for eager
+      defined functions.
 
   Returns:
     The list of `Tensor`s returned by invoking `f(args)`. If the function does
@@ -1007,12 +1022,16 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None):
   if executing_eagerly is None:
     executing_eagerly = context.executing_eagerly()
 
+  if config is None:
+    config = _get_disabled_rewriter_config()
+
   if executing_eagerly or len(tout):
     if f.stateful_ops:
       outputs = gen_functional_ops.stateful_partitioned_call(
-          args=args, Tout=tout, f=f)
+          args=args, Tout=tout, f=f, config=config)
     else:
-      outputs = gen_functional_ops.partitioned_call(args=args, Tout=tout, f=f)
+      outputs = gen_functional_ops.partitioned_call(
+          args=args, Tout=tout, f=f, config=config)
     return outputs if outputs else None
 
   # The generated binding returns an empty list for functions that don't
@@ -1026,6 +1045,11 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None):
   func_attr = attr_value_pb2.AttrValue(
       func=attr_value_pb2.NameAttrList(name=f.name))
 
+  # When running in graph mode, the graph and function graphs are optimized
+  # (i.e. run through grappler) per the session options, so we can disable any
+  # eager-specific rewriting.
+  rewriter_config = attr_value_pb2.AttrValue(s=_get_disabled_rewriter_config())
+
   graph = ops.get_default_graph()
   f.add_to_graph(graph)
   op_name = "StatefulPartitionedCall" if f.stateful_ops else "PartitionedCall"
@@ -1035,6 +1059,11 @@ def partitioned_call(args, f, tout=None, executing_eagerly=None):
       tout,
       compute_shapes=False,
       name="PartitionedFunctionCall",
-      attrs={"Tin": tin_attr, "Tout": tout_attr, "f": func_attr})
+      attrs={
+          "Tin": tin_attr,
+          "Tout": tout_attr,
+          "f": func_attr,
+          "config": rewriter_config
+      })
   outputs = op.outputs
   return outputs if outputs else op
-- 
GitLab


From 07ab34180b746f767614ccdafbb1a8e9bb7b1641 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 13:21:12 -0700
Subject: [PATCH 0713/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216581784
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 65 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 14 ++++
 2 files changed, 79 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 415e15b720..713ca5a651 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -39601,6 +39601,38 @@ op {
     type: "func"
   }
 }
+op {
+  name: "PartitionedCall"
+  input_arg {
+    name: "args"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
 op {
   name: "Placeholder"
   output_arg {
@@ -71002,6 +71034,39 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "StatefulPartitionedCall"
+  input_arg {
+    name: "args"
+    type_list_attr: "Tin"
+  }
+  output_arg {
+    name: "output"
+    type_list_attr: "Tout"
+  }
+  attr {
+    name: "Tin"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "Tout"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "StatelessIf"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index a8da95dea3..78f796fb7f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -20050,6 +20050,13 @@ op {
     name: "f"
     type: "func"
   }
+  attr {
+    name: "config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
 }
 op {
   name: "Placeholder"
@@ -33156,6 +33163,13 @@ op {
     name: "f"
     type: "func"
   }
+  attr {
+    name: "config"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
   is_stateful: true
 }
 op {
-- 
GitLab


From 93ee9924edfe6d012d6df76b94c290d3d0974848 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 10 Oct 2018 13:21:43 -0700
Subject: [PATCH 0714/1085] patch the "read the toco guide" button in the
 index.

PiperOrigin-RevId: 216581868
---
 tensorflow/contrib/lite/g3doc/_index.yaml     |  8 +++---
 .../g3doc/tflite_convert/cmdline_examples.md  | 14 +++++-----
 .../g3doc/tflite_convert/cmdline_reference.md |  2 +-
 .../lite/g3doc/tflite_convert/python_api.md   | 26 +++++++++----------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml
index bc66cc5dc1..eb32d3e94a 100644
--- a/tensorflow/contrib/lite/g3doc/_index.yaml
+++ b/tensorflow/contrib/lite/g3doc/_index.yaml
@@ -97,7 +97,7 @@ landing_page:
       path: https://www.shazam.com/
     - custom_image:
         path: ./images/landing-page/nest_logo.png
-      path: https://nest.com/    
+      path: https://nest.com/
     - custom_image:
         path: ./images/landing-page/loseit_logo.png
       path: https://www.loseit.com/
@@ -129,10 +129,10 @@ landing_page:
         icon_name: autorenew
       description: >
         Convert a TensorFlow model into a compressed flat buffer with the
-        TensorFlow Lite Optimizing Converter (TOCO).
+        TensorFlow Lite Converter.
       buttons:
-      - label: Read the TOCO guide
-        path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/g3doc/python_api.md
+      - label: Read the converter guide
+        path: /lite/tflite_convert/python_api
         classname: button button-primary tfo-button-primary
     - heading: Deploy
       icon:
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
index d88acfae80..ffb73b77b8 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
@@ -21,10 +21,10 @@ There are two approaches to running the converter in the command line.
         //tensorflow/contrib/lite/python:tflite_convert --
         --output_file=...`
 
-### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre_tensorflow_1.9"></a>
 
 The recommended approach for using the converter prior to TensorFlow 1.9 is the
-[Python API](python_api.md#pre-tensorflow-1.9). If a command line tool is
+[Python API](python_api.md#pre_tensorflow_1.9). If a command line tool is
 desired, the `toco` command line tool was available in TensorFlow 1.7. Enter
 `toco --help` in Terminal for additional details on the command-line flags
 available. There were no command line tools in TensorFlow 1.8.
@@ -91,7 +91,7 @@ tflite_convert \
 
 ## Quantization
 
-### Convert a TensorFlow GraphDef for quantized inference <a name="graphdef-quant"></a>
+### Convert a TensorFlow GraphDef for quantized inference <a name="graphdef_quant"></a>
 
 The TensorFlow Lite Converter is compatible with fixed point quantization models
 described [here](https://www.tensorflow.org/performance/quantization). These are
@@ -115,7 +115,7 @@ tflite_convert \
   --std_dev_values=127
 ```
 
-### Use \"dummy-quantization\" to try out quantized inference on a float graph <a name="dummy-quant"></a>
+### Use \"dummy-quantization\" to try out quantized inference on a float graph <a name="dummy_quant"></a>
 
 In order to evaluate the possible benefit of generating a quantized graph, the
 converter allows "dummy-quantization" on float graphs. The flags
@@ -185,7 +185,7 @@ tflite_convert \
 Any array in the input file can be specified as an input or output array in
 order to extract subgraphs out of an input graph file. The TensorFlow Lite
 Converter discards the parts of the graph outside of the specific subgraph. Use
-[graph visualizations](#graph-visualizations) to identify the input and output
+[graph visualizations](#graph_visualizations) to identify the input and output
 arrays that make up the desired subgraph.
 
 The follow command shows how to extract a single fused layer out of a TensorFlow
@@ -228,7 +228,7 @@ visualization using either the `--output_format` flag or the
 `--dump_graphviz_dir` flag. The subsections below outline the use cases for
 each.
 
-### Using `--output_format=GRAPHVIZ_DOT` <a name="using-output-format-graphviz-dot"></a>
+### Using `--output_format=GRAPHVIZ_DOT` <a name="using_output_format_graphviz_dot"></a>
 
 The first way to get a Graphviz rendering is to pass `GRAPHVIZ_DOT` into
 `--output_format`. This results in a plausible visualization of the graph. This
@@ -326,7 +326,7 @@ each individual graph transformation, resulting in thousands of files.
 Typically, one would then bisect into these files to understand when a given
 change was introduced in the graph.
 
-### Legend for the graph visualizations <a name="graphviz-legend"></a>
+### Legend for the graph visualizations <a name="graphviz_legend"></a>
 
 *   Operators are red square boxes with the following hues of red:
     *   Most operators are
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
index d65912fea6..eab26f5cb2 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
@@ -59,7 +59,7 @@ based on index.
 *   `--input_shapes`. Type: colon-separated list of comma-separated lists of
     integers. Each comma-separated list of integers gives the shape of one of
     the input arrays specified in
-    [TensorFlow convention](https://www.tensorflow.org/guide/dims_types#shape).
+    [TensorFlow convention](https://www.tensorflow.org/guide/tensors#shape).
     *   Example: `--input_shapes=1,60,80,3` for a typical vision model means a
         batch size of 1, an input image height of 60, an input image width of
         80, and an input image depth of 3 (representing RGB channels).
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
index e1c0e0c240..71a38c7bea 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
+++ b/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
@@ -1,7 +1,7 @@
 # TensorFlow Lite Converter & Interpreter Python API reference
 
 This page provides examples on how to use the TensorFlow Lite Converter and the
-TensorFlow Lite interpreter using the Python API. 
+TensorFlow Lite interpreter using the Python API.
 
 [TOC]
 
@@ -21,8 +21,8 @@ is `tf.contrib.lite.Interpreter`.
 
 Note: Reference "Additional Instructions" sections for converting TensorFlow
 models to TensorFlow Lite
-[in TensorFlow 1.9 to TensorFlow 1.11](#pre-tensorflow-1.11) and
-[prior to TensorFlow 1.9](#pre-tensorflow-1.9)
+[in TensorFlow 1.9 to TensorFlow 1.11](#pre_tensorflow_1.11) and
+[prior to TensorFlow 1.9](#pre_tensorflow_1.9)
 
 `TFLiteConverter` provides class methods based on the original format of the
 model. `TFLiteConverter.from_session()` is available for GraphDefs.
@@ -37,7 +37,7 @@ Example usages for simple float-point models are shown in
 The following section shows examples of how to convert a basic float-point model
 from each of the supported data formats into a TensorFlow Lite FlatBuffers.
 
-### Exporting a GraphDef from tf.Session <a name="basic-graphdef-sess"></a>
+### Exporting a GraphDef from tf.Session <a name="basic_graphdef_sess"></a>
 
 The following example shows how to convert a TensorFlow GraphDef into a
 TensorFlow Lite FlatBuffer from a `tf.Session` object.
@@ -57,7 +57,7 @@ with tf.Session() as sess:
   open("converted_model.tflite", "wb").write(tflite_model)
 ```
 
-### Exporting a GraphDef from file <a name="basic-graphdef-file"></a>
+### Exporting a GraphDef from file <a name="basic_graphdef_file"></a>
 
 The following example shows how to convert a TensorFlow GraphDef into a
 TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and
@@ -81,7 +81,7 @@ tflite_model = converter.convert()
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
 
-### Exporting a SavedModel <a name="basic-savedmodel"></a>
+### Exporting a SavedModel <a name="basic_savedmodel"></a>
 
 The following example shows how to convert a SavedModel into a TensorFlow Lite
 FlatBuffer.
@@ -99,7 +99,7 @@ For more complex SavedModels, the optional parameters that can be passed into
 `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are
 available by running `help(tf.contrib.lite.TFLiteConverter)`.
 
-### Exporting a tf.keras File <a name="basic-keras-file"></a>
+### Exporting a tf.keras File <a name="basic_keras_file"></a>
 
 The following example shows how to convert a `tf.keras` model into a TensorFlow
 Lite FlatBuffer. This example requires
@@ -156,7 +156,7 @@ terminal for detailed documentation on the attributes.
 Although the examples are demonstrated on GraphDefs containing only constants.
 The same logic can be applied irrespective of the input data format.
 
-### Exporting a quantized GraphDef <a name="complex-quant"></a>
+### Exporting a quantized GraphDef <a name="complex_quant"></a>
 
 The following example shows how to convert a quantized model into a TensorFlow
 Lite FlatBuffer.
@@ -180,7 +180,7 @@ with tf.Session() as sess:
 
 ## TensorFlow Lite Python interpreter <a name="interpreter"></a>
 
-### Using the interpreter from a model file <a name="interpreter-file"></a>
+### Using the interpreter from a model file <a name="interpreter_file"></a>
 
 The following example shows how to use the TensorFlow Lite Python interpreter
 when provided a TensorFlow Lite FlatBuffer file. The example also demonstrates
@@ -210,7 +210,7 @@ output_data = interpreter.get_tensor(output_details[0]['index'])
 print(output_data)
 ```
 
-### Using the interpreter from model data <a name="interpreter-data"></a>
+### Using the interpreter from model data <a name="interpreter_data"></a>
 
 The following example shows how to use the TensorFlow Lite Python interpreter
 when starting with the TensorFlow Lite Flatbuffer model previously loaded. This
@@ -237,7 +237,7 @@ interpreter.allocate_tensors()
 
 ## Additional instructions
 
-### Build from source code <a name="latest-package"></a>
+### Build from source code <a name="latest_package"></a>
 
 In order to run the latest version of the TensorFlow Lite Converter Python API,
 either install the nightly build with
@@ -245,13 +245,13 @@ either install the nightly build with
 [Docker](https://www.tensorflow.org/install/docker), or
 [build the pip package from source](https://www.tensorflow.org/install/source).
 
-### Converting models in TensorFlow 1.9 to TensorFlow 1.11 <a name="#pre-tensorflow-1.11"></a>
+### Converting models in TensorFlow 1.9 to TensorFlow 1.11 <a name="pre_tensorflow_1.11"></a>
 
 To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through
 TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically
 identically to `TFLiteConverter`.
 
-### Converting models prior to TensorFlow 1.9 <a name="pre-tensorflow-1.9"></a>
+### Converting models prior to TensorFlow 1.9 <a name="pre_tensorflow_1.9"></a>
 
 To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow
 1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)`
-- 
GitLab


From 95bffd3a923d7c433560d89aa63fdc36221e3a32 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 13:23:26 -0700
Subject: [PATCH 0715/1085] Don't use wildcard reshaping when dimensions can be
 unknown.

PiperOrigin-RevId: 216582137
---
 tensorflow/python/feature_column/feature_column.py    | 5 ++++-
 tensorflow/python/feature_column/feature_column_v2.py | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py
index 8a11ca142c..b1f47ebec2 100644
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -3468,7 +3468,10 @@ class _SequenceCategoricalColumn(
     # combined during embedding lookup. If the tensor is already 3D, leave
     # as-is.
     shape = array_ops.shape(id_tensor)
-    target_shape = [shape[0], shape[1], -1]
+    # Compute the third dimension explicitly instead of setting it to -1, as
+    # that doesn't work for dynamically shaped tensors with 0-length at runtime.
+    # This happens for empty sequences.
+    target_shape = [shape[0], shape[1], math_ops.reduce_prod(shape[2:])]
     id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
     if weight_tensor is not None:
       weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 6d089de991..67de174a67 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -4110,7 +4110,10 @@ class SequenceCategoricalColumn(
     # combined during embedding lookup. If the tensor is already 3D, leave
     # as-is.
     shape = array_ops.shape(id_tensor)
-    target_shape = [shape[0], shape[1], -1]
+    # Compute the third dimension explicitly instead of setting it to -1, as
+    # that doesn't work for dynamically shaped tensors with 0-length at runtime.
+    # This happens for empty sequences.
+    target_shape = [shape[0], shape[1], math_ops.reduce_prod(shape[2:])]
     id_tensor = sparse_ops.sparse_reshape(id_tensor, target_shape)
     if weight_tensor is not None:
       weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
-- 
GitLab


From 0e1bc5100d72dd75e7b148f0cf1d422ac0c6469b Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 10 Oct 2018 13:38:06 -0700
Subject: [PATCH 0716/1085] Revise the NestedDirectories test working for both
 Windows and Linux

---
 ...tching_files_dataset_serialization_test.py | 16 +++++-----
 .../matching_files_dataset_op_test.py         | 30 ++++++++-----------
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
index 41146c9786..b120c0a626 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/matching_files_dataset_serialization_test.py
@@ -42,21 +42,19 @@ class MatchingFilesDatasetSerializationTest(
                                 *[str(dir_name) for dir_name in range(j)])
         if not os.path.exists(new_base):
           os.makedirs(new_base)
-        for f in ['a.txt', 'b.py', 'c.pyc']:
+        child_files = ['a.py', 'b.pyc'] if j < depth - 1 else ['c.txt', 'd.log']
+        for f in child_files:
           filename = os.path.join(new_base, f)
           open(filename, 'w').close()
 
-    patterns = []
-    for i in range(depth):
-      pattern = os.path.join(tmp_dir,
-                             os.path.join(*['**' for _ in range(i + 1)]),
-                             '*.txt')
-      patterns.append(pattern)
+    patterns = [
+        os.path.join(tmp_dir, os.path.join(*['**' for _ in range(depth)]),
+                     suffix) for suffix in ['*.txt', '*.log']]
 
-    num_outputs = width * depth
+    num_outputs = width * len(patterns)
     self.run_core_tests(
         lambda: self._build_iterator_graph(patterns),
-        lambda: self._build_iterator_graph(patterns[0:depth // 2]), num_outputs)
+        lambda: self._build_iterator_graph(patterns[0:1]), num_outputs)
 
     shutil.rmtree(tmp_dir, ignore_errors=True)
 
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 4b59500bd4..d811844cae 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -136,23 +136,23 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
         new_base = os.path.join(self.tmp_dir, str(i),
                                 *[str(dir_name) for dir_name in range(j)])
         os.makedirs(new_base)
-        for f in ['a.txt', 'b.py', 'c.pyc']:
+        child_files = ['a.py', 'b.pyc'] if j < depth - 1 else ['c.txt', 'd.log']
+        for f in child_files:
           filename = os.path.join(new_base, f)
           filenames.append(filename)
           open(filename, 'w').close()
 
-    patterns = []
-    for i in range(depth):
-      pattern = os.path.join(
-          self.tmp_dir, os.path.join(*['**' for _ in range(i + 1)]), '*.txt')
-      patterns.append(pattern)
+    patterns = [
+        os.path.join(self.tmp_dir, os.path.join(*['**' for _ in range(depth)]),
+                     suffix) for suffix in ['*.txt', '*.log']]
 
     dataset = MatchingFilesDataset(patterns)
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
       expected_filenames = [compat.as_bytes(filename)
                             for filename in filenames
-                            if filename.endswith('.txt')]
+                            if filename.endswith('.txt')
+                            or filename.endswith('.log')]
       actual_filenames = []
       while True:
         try:
@@ -173,19 +173,15 @@ class MatchingFilesDatasetBenchmark(test.Benchmark):
       for j in range(depth):
         new_base = os.path.join(tmp_dir, str(i),
                                 *[str(dir_name) for dir_name in range(j)])
-        if not os.path.exists(new_base):
-          os.makedirs(new_base)
-        for f in ['a.txt', 'b.py', 'c.pyc']:
+        os.makedirs(new_base)
+        child_files = ['a.py', 'b.pyc'] if j < depth - 1 else ['c.txt', 'd.log']
+        for f in child_files:
           filename = os.path.join(new_base, f)
           open(filename, 'w').close()
 
-    patterns = []
-    for i in range(depth):
-      pattern = os.path.join(tmp_dir,
-                             os.path.join(*['**' for _ in range(i + 1)]),
-                             '*.txt')
-
-      patterns.append(pattern)
+    patterns = [
+        os.path.join(tmp_dir, os.path.join(*['**' for _ in range(depth)]),
+                     suffix) for suffix in ['*.txt', '*.log']]
 
     deltas = []
     iters = 3
-- 
GitLab


From 59b2b0392976f48f7eb8b50afa63f9f4da82af58 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 10 Oct 2018 13:23:36 -0700
Subject: [PATCH 0717/1085] Update renames_v2.py file.

PiperOrigin-RevId: 216582173
---
 tensorflow/tools/compatibility/renames_v2.py  | 149 +++++++++++++-----
 .../tools/compatibility/tf_upgrade_v2_test.py |  17 +-
 .../update/generate_v2_renames_map.py         |   1 +
 3 files changed, 120 insertions(+), 47 deletions(-)

diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index 7e66ad816a..c3f20d0337 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -25,35 +25,53 @@ from __future__ import division
 from __future__ import print_function
 
 renames = {
-    'tf.acos': 'tf.math.acos',
-    'tf.acosh': 'tf.math.acosh',
-    'tf.add': 'tf.math.add',
-    'tf.as_string': 'tf.dtypes.as_string',
-    'tf.asin': 'tf.math.asin',
-    'tf.asinh': 'tf.math.asinh',
-    'tf.atan': 'tf.math.atan',
-    'tf.atan2': 'tf.math.atan2',
-    'tf.atanh': 'tf.math.atanh',
-    'tf.batch_to_space_nd': 'tf.manip.batch_to_space_nd',
+    'tf.OpError': 'tf.errors.OpError',
+    'tf.PaddingFIFOQueue': 'tf.io.PaddingFIFOQueue',
+    'tf.PriorityQueue': 'tf.io.PriorityQueue',
+    'tf.QueueBase': 'tf.io.QueueBase',
+    'tf.RandomShuffleQueue': 'tf.io.RandomShuffleQueue',
+    'tf.SparseConditionalAccumulator': 'tf.sparse.SparseConditionalAccumulator',
+    'tf.accumulate_n': 'tf.math.accumulate_n',
+    'tf.angle': 'tf.math.angle',
+    'tf.assert_greater_equal': 'tf.debugging.assert_greater_equal',
+    'tf.assert_integer': 'tf.debugging.assert_integer',
+    'tf.assert_less_equal': 'tf.debugging.assert_less_equal',
+    'tf.assert_near': 'tf.debugging.assert_near',
+    'tf.assert_negative': 'tf.debugging.assert_negative',
+    'tf.assert_non_negative': 'tf.debugging.assert_non_negative',
+    'tf.assert_non_positive': 'tf.debugging.assert_non_positive',
+    'tf.assert_none_equal': 'tf.debugging.assert_none_equal',
+    'tf.assert_positive': 'tf.debugging.assert_positive',
+    'tf.assert_proper_iterable': 'tf.debugging.assert_proper_iterable',
+    'tf.assert_rank_at_least': 'tf.debugging.assert_rank_at_least',
+    'tf.assert_rank_in': 'tf.debugging.assert_rank_in',
+    'tf.assert_same_float_dtype': 'tf.debugging.assert_same_float_dtype',
+    'tf.assert_scalar': 'tf.debugging.assert_scalar',
+    'tf.assert_type': 'tf.debugging.assert_type',
     'tf.betainc': 'tf.math.betainc',
+    'tf.bincount': 'tf.math.bincount',
     'tf.ceil': 'tf.math.ceil',
     'tf.check_numerics': 'tf.debugging.check_numerics',
     'tf.cholesky': 'tf.linalg.cholesky',
-    'tf.cos': 'tf.math.cos',
-    'tf.cosh': 'tf.math.cosh',
+    'tf.cholesky_solve': 'tf.linalg.cholesky_solve',
+    'tf.confusion_matrix': 'tf.train.confusion_matrix',
+    'tf.conj': 'tf.math.conj',
     'tf.cross': 'tf.linalg.cross',
+    'tf.cumprod': 'tf.math.cumprod',
     'tf.decode_base64': 'tf.io.decode_base64',
     'tf.decode_compressed': 'tf.io.decode_compressed',
+    'tf.decode_csv': 'tf.io.decode_csv',
     'tf.decode_json_example': 'tf.io.decode_json_example',
     'tf.decode_raw': 'tf.io.decode_raw',
+    'tf.depth_to_space': 'tf.nn.depth_to_space',
     'tf.dequantize': 'tf.quantization.dequantize',
+    'tf.deserialize_many_sparse': 'tf.io.deserialize_many_sparse',
     'tf.diag': 'tf.linalg.tensor_diag',
     'tf.diag_part': 'tf.linalg.tensor_diag_part',
     'tf.digamma': 'tf.math.digamma',
     'tf.encode_base64': 'tf.io.encode_base64',
-    'tf.equal': 'tf.math.equal',
+    'tf.erf': 'tf.math.erf',
     'tf.erfc': 'tf.math.erfc',
-    'tf.exp': 'tf.math.exp',
     'tf.expm1': 'tf.math.expm1',
     'tf.extract_image_patches': 'tf.image.extract_image_patches',
     'tf.fake_quant_with_min_max_args': 'tf.quantization.fake_quant_with_min_max_args',
@@ -63,26 +81,33 @@ renames = {
     'tf.fake_quant_with_min_max_vars_per_channel': 'tf.quantization.fake_quant_with_min_max_vars_per_channel',
     'tf.fake_quant_with_min_max_vars_per_channel_gradient': 'tf.quantization.fake_quant_with_min_max_vars_per_channel_gradient',
     'tf.fft': 'tf.spectral.fft',
-    'tf.floor': 'tf.math.floor',
-    'tf.gather_nd': 'tf.manip.gather_nd',
-    'tf.GraphKeys.VARIABLES': 'tf.GraphKeys.GLOBAL_VARIABLES',
-    'tf.greater': 'tf.math.greater',
-    'tf.greater_equal': 'tf.math.greater_equal',
+    'tf.floordiv': 'tf.math.floordiv',
+    'tf.get_seed': 'tf.random.get_seed',
+    'tf.global_norm': 'tf.linalg.global_norm',
+    'tf.glorot_normal_initializer': 'tf.keras.initializers.glorot_normal',
     'tf.ifft': 'tf.spectral.ifft',
     'tf.igamma': 'tf.math.igamma',
     'tf.igammac': 'tf.math.igammac',
+    'tf.imag': 'tf.math.imag',
     'tf.invert_permutation': 'tf.math.invert_permutation',
     'tf.is_finite': 'tf.debugging.is_finite',
     'tf.is_inf': 'tf.debugging.is_inf',
     'tf.is_nan': 'tf.debugging.is_nan',
-    'tf.less': 'tf.math.less',
-    'tf.less_equal': 'tf.math.less_equal',
+    'tf.is_non_decreasing': 'tf.debugging.is_non_decreasing',
+    'tf.is_numeric_tensor': 'tf.debugging.is_numeric_tensor',
+    'tf.is_strictly_increasing': 'tf.debugging.is_strictly_increasing',
+    'tf.lbeta': 'tf.math.lbeta',
     'tf.lgamma': 'tf.math.lgamma',
-    'tf.log': 'tf.math.log',
-    'tf.log1p': 'tf.math.log1p',
-    'tf.logical_and': 'tf.math.logical_and',
-    'tf.logical_not': 'tf.math.logical_not',
-    'tf.logical_or': 'tf.math.logical_or',
+    'tf.log_sigmoid': 'tf.math.log_sigmoid',
+    'tf.logical_xor': 'tf.math.logical_xor',
+    'tf.manip.batch_to_space_nd': 'tf.batch_to_space_nd',
+    'tf.manip.gather_nd': 'tf.gather_nd',
+    'tf.manip.reshape': 'tf.reshape',
+    'tf.manip.reverse': 'tf.reverse',
+    'tf.manip.roll': 'tf.roll',
+    'tf.manip.scatter_nd': 'tf.scatter_nd',
+    'tf.manip.space_to_batch_nd': 'tf.space_to_batch_nd',
+    'tf.manip.tile': 'tf.tile',
     'tf.matching_files': 'tf.io.matching_files',
     'tf.matrix_band_part': 'tf.linalg.band_part',
     'tf.matrix_determinant': 'tf.linalg.det',
@@ -91,31 +116,72 @@ renames = {
     'tf.matrix_inverse': 'tf.linalg.inv',
     'tf.matrix_set_diag': 'tf.linalg.set_diag',
     'tf.matrix_solve': 'tf.linalg.solve',
+    'tf.matrix_solve_ls': 'tf.linalg.lstsq',
+    'tf.matrix_transpose': 'tf.linalg.transpose',
     'tf.matrix_triangular_solve': 'tf.linalg.triangular_solve',
-    'tf.maximum': 'tf.math.maximum',
-    'tf.minimum': 'tf.math.minimum',
-    'tf.not_equal': 'tf.math.not_equal',
+    'tf.nn.log_uniform_candidate_sampler': 'tf.random.log_uniform_candidate_sampler',
+    'tf.nn.uniform_candidate_sampler': 'tf.random.uniform_candidate_sampler',
+    'tf.orthogonal_initializer': 'tf.keras.initializers.Orthogonal',
     'tf.parse_tensor': 'tf.io.parse_tensor',
     'tf.polygamma': 'tf.math.polygamma',
+    'tf.python_io.TFRecordCompressionType': 'tf.io.TFRecordCompressionType',
+    'tf.python_io.TFRecordOptions': 'tf.io.TFRecordOptions',
+    'tf.python_io.TFRecordWriter': 'tf.io.TFRecordWriter',
+    'tf.python_io.tf_record_iterator': 'tf.io.tf_record_iterator',
     'tf.qr': 'tf.linalg.qr',
+    'tf.quantize': 'tf.quantization.quantize',
     'tf.quantized_concat': 'tf.quantization.quantized_concat',
+    'tf.random_gamma': 'tf.random.gamma',
+    'tf.random_poisson': 'tf.random.poisson',
     'tf.read_file': 'tf.io.read_file',
+    'tf.real': 'tf.math.real',
     'tf.reciprocal': 'tf.math.reciprocal',
+    'tf.reduce_join': 'tf.strings.reduce_join',
     'tf.regex_replace': 'tf.strings.regex_replace',
-    'tf.reshape': 'tf.manip.reshape',
-    'tf.reverse': 'tf.manip.reverse',
-    'tf.reverse_v2': 'tf.manip.reverse',
+    'tf.reverse_v2': 'tf.reverse',
     'tf.rint': 'tf.math.rint',
     'tf.rsqrt': 'tf.math.rsqrt',
-    'tf.scatter_nd': 'tf.manip.scatter_nd',
+    'tf.saved_model.builder.SavedModelBuilder': 'tf.saved_model.Builder',
+    'tf.saved_model.loader.maybe_saved_model_directory': 'tf.saved_model.maybe_saved_model_directory',
+    'tf.saved_model.main_op.main_op_with_restore': 'tf.saved_model.main_op_with_restore',
+    'tf.saved_model.signature_def_utils.build_signature_def': 'tf.saved_model.build_signature_def',
+    'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.saved_model.classification_signature_def',
+    'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.saved_model.is_valid_signature',
+    'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.saved_model.predict_signature_def',
+    'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.saved_model.regression_signature_def',
+    'tf.saved_model.utils.build_tensor_info': 'tf.saved_model.build_tensor_info',
+    'tf.saved_model.utils.get_tensor_from_tensor_info': 'tf.saved_model.get_tensor_from_tensor_info',
     'tf.segment_max': 'tf.math.segment_max',
     'tf.segment_mean': 'tf.math.segment_mean',
     'tf.segment_min': 'tf.math.segment_min',
     'tf.segment_prod': 'tf.math.segment_prod',
     'tf.segment_sum': 'tf.math.segment_sum',
-    'tf.sin': 'tf.math.sin',
-    'tf.sinh': 'tf.math.sinh',
-    'tf.space_to_batch_nd': 'tf.manip.space_to_batch_nd',
+    'tf.self_adjoint_eig': 'tf.linalg.eigh',
+    'tf.self_adjoint_eigvals': 'tf.linalg.eigvalsh',
+    'tf.serialize_many_sparse': 'tf.io.serialize_many_sparse',
+    'tf.serialize_sparse': 'tf.io.serialize_sparse',
+    'tf.space_to_batch': 'tf.nn.space_to_batch',
+    'tf.space_to_depth': 'tf.nn.space_to_depth',
+    'tf.sparse_add': 'tf.sparse.add',
+    'tf.sparse_fill_empty_rows': 'tf.sparse.fill_empty_rows',
+    'tf.sparse_mask': 'tf.sparse.mask',
+    'tf.sparse_maximum': 'tf.sparse.maximum',
+    'tf.sparse_merge': 'tf.sparse.merge',
+    'tf.sparse_minimum': 'tf.sparse.minimum',
+    'tf.sparse_placeholder': 'tf.sparse.placeholder',
+    'tf.sparse_reorder': 'tf.sparse.reorder',
+    'tf.sparse_reset_shape': 'tf.sparse.reset_shape',
+    'tf.sparse_reshape': 'tf.sparse.reshape',
+    'tf.sparse_retain': 'tf.sparse.retain',
+    'tf.sparse_segment_mean': 'tf.sparse.segment_mean',
+    'tf.sparse_segment_sqrt_n': 'tf.sparse.segment_sqrt_n',
+    'tf.sparse_segment_sum': 'tf.sparse.segment_sum',
+    'tf.sparse_slice': 'tf.sparse.slice',
+    'tf.sparse_softmax': 'tf.sparse.softmax',
+    'tf.sparse_tensor_dense_matmul': 'tf.sparse.matmul',
+    'tf.sparse_tensor_to_dense': 'tf.sparse.to_dense',
+    'tf.sparse_to_indicator': 'tf.sparse.to_indicator',
+    'tf.sparse_transpose': 'tf.sparse.transpose',
     'tf.squared_difference': 'tf.math.squared_difference',
     'tf.string_join': 'tf.strings.join',
     'tf.string_strip': 'tf.strings.strip',
@@ -123,13 +189,18 @@ renames = {
     'tf.string_to_hash_bucket_fast': 'tf.strings.to_hash_bucket_fast',
     'tf.string_to_hash_bucket_strong': 'tf.strings.to_hash_bucket_strong',
     'tf.string_to_number': 'tf.strings.to_number',
-    'tf.substr': 'tf.strings.substr',
-    'tf.tan': 'tf.math.tan',
-    'tf.tile': 'tf.manip.tile',
+    'tf.svd': 'tf.linalg.svd',
+    'tf.trace': 'tf.linalg.trace',
+    'tf.train.match_filenames_once': 'tf.io.match_filenames_once',
+    'tf.uniform_unit_scaling_initializer': 'tf.initializers.uniform_unit_scaling',
     'tf.unsorted_segment_max': 'tf.math.unsorted_segment_max',
+    'tf.unsorted_segment_mean': 'tf.math.unsorted_segment_mean',
     'tf.unsorted_segment_min': 'tf.math.unsorted_segment_min',
     'tf.unsorted_segment_prod': 'tf.math.unsorted_segment_prod',
+    'tf.unsorted_segment_sqrt_n': 'tf.math.unsorted_segment_sqrt_n',
     'tf.unsorted_segment_sum': 'tf.math.unsorted_segment_sum',
+    'tf.variance_scaling_initializer': 'tf.keras.initializers.VarianceScaling',
+    'tf.verify_tensor_all_finite': 'tf.debugging.assert_all_finite',
     'tf.write_file': 'tf.io.write_file',
     'tf.zeta': 'tf.math.zeta'
 }
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 3886c1e8b9..f606d202a6 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -49,19 +49,20 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     self.assertTrue(report.find("Failed to parse") != -1)
 
   def testReport(self):
-    text = "tf.acos(a)\n"
+    text = "tf.assert_near(a)\n"
     _, report, unused_errors, unused_new_text = self._upgrade(text)
     # This is not a complete test, but it is a sanity test that a report
     # is generating information.
-    self.assertTrue(report.find("Renamed function `tf.acos` to `tf.math.acos`"))
+    self.assertTrue(report.find("Renamed function `tf.assert_near` to "
+                                "`tf.debugging.assert_near`"))
 
   def testRename(self):
-    text = "tf.acos(a)\n"
+    text = "tf.conj(a)\n"
     _, unused_report, unused_errors, new_text = self._upgrade(text)
-    self.assertEqual(new_text, "tf.math.acos(a)\n")
-    text = "tf.rsqrt(tf.log(3.8))\n"
+    self.assertEqual(new_text, "tf.math.conj(a)\n")
+    text = "tf.rsqrt(tf.log_sigmoid(3.8))\n"
     _, unused_report, unused_errors, new_text = self._upgrade(text)
-    self.assertEqual(new_text, "tf.math.rsqrt(tf.math.log(3.8))\n")
+    self.assertEqual(new_text, "tf.math.rsqrt(tf.math.log_sigmoid(3.8))\n")
 
   def testLearningRateDecay(self):
     for decay in ["tf.train.exponential_decay", "tf.train.piecewise_constant",
@@ -82,8 +83,8 @@ class TestUpgradeFiles(test_util.TensorFlowTestCase):
   def testInplace(self):
     """Check to make sure we don't have a file system race."""
     temp_file = tempfile.NamedTemporaryFile("w", delete=False)
-    original = "tf.acos(a, b)\n"
-    upgraded = "tf.math.acos(a, b)\n"
+    original = "tf.conj(a)\n"
+    upgraded = "tf.math.conj(a)\n"
     temp_file.write(original)
     temp_file.close()
     upgrader = ast_edits.ASTCodeUpgrader(tf_upgrade_v2.TFAPIChangeSpec())
diff --git a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
index 567eceb0b6..7d6beca358 100644
--- a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
+++ b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
@@ -88,6 +88,7 @@ def update_renames_v2(output_file_path):
 
   visitor = public_api.PublicAPIVisitor(visit)
   visitor.do_not_descend_map['tf'].append('contrib')
+  visitor.do_not_descend_map['tf.compat'] = ['v1', 'v2']
   traverse.traverse(tf, visitor)
 
   renames_file_text = '%srenames = {\n%s\n}\n' % (
-- 
GitLab


From 2ed698735d465872e16cb6b292bd747ca431c083 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Wed, 10 Oct 2018 13:31:12 -0700
Subject: [PATCH 0718/1085] Support lowering of nested If and While ops.
 Removes the LowerIfOpPass and LowerWhileOpPass and adds a unified
 LowerIfWhilePass. This change is required because the PRE_PLACEMENT graph
 optimization
 pass(https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/common_runtime/graph_execution_state.cc#L531)
 is run only once and hence can not handle nesting. Enable the test for nested
 cond and while in control_flow_ops_py_test.

PiperOrigin-RevId: 216583372
---
 tensorflow/core/BUILD                         |  25 ++
 tensorflow/core/common_runtime/lower_if_op.cc |  47 +--
 tensorflow/core/common_runtime/lower_if_op.h  |  14 +-
 .../core/common_runtime/lower_if_op_test.cc   |   9 +-
 .../core/common_runtime/lower_if_while.cc     |  95 +++++
 .../core/common_runtime/lower_if_while.h      |  38 ++
 .../common_runtime/lower_if_while_test.cc     | 337 ++++++++++++++++++
 .../core/common_runtime/lower_while_op.cc     |  61 +---
 .../core/common_runtime/lower_while_op.h      |  14 +-
 .../common_runtime/lower_while_op_test.cc     |  19 +-
 .../kernel_tests/control_flow_ops_py_test.py  |  11 +-
 11 files changed, 533 insertions(+), 137 deletions(-)
 create mode 100644 tensorflow/core/common_runtime/lower_if_while.cc
 create mode 100644 tensorflow/core/common_runtime/lower_if_while.h
 create mode 100644 tensorflow/core/common_runtime/lower_if_while_test.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 9e7806342a..25651252a7 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2823,6 +2823,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
     "common_runtime/graph_optimizer.h",
     "common_runtime/local_device.h",
     "common_runtime/lower_if_op.h",
+    "common_runtime/lower_if_while.h",
     "common_runtime/lower_while_op.h",
     "common_runtime/memory_types.h",
     "common_runtime/mkl_cpu_allocator.h",
@@ -2878,6 +2879,7 @@ tf_cuda_library(
         "common_runtime/hierarchical_tree_broadcaster.cc",
         "common_runtime/local_device.cc",
         "common_runtime/lower_if_op.cc",
+        "common_runtime/lower_if_while.cc",
         "common_runtime/lower_while_op.cc",
         "common_runtime/memory_types.cc",
         "common_runtime/mkl_cpu_allocator.cc",
@@ -4723,6 +4725,29 @@ tf_cc_tests(
     ],
 )
 
+tf_cc_tests(
+    name = "common_runtime_lower_if_while_test",
+    size = "small",
+    srcs = ["common_runtime/lower_if_while_test.cc"],
+    deps = [
+        ":all_kernels",
+        ":core_cpu",
+        ":core_cpu_internal",
+        ":direct_session",
+        ":framework",
+        ":framework_internal",
+        ":lib",
+        ":test",
+        ":test_main",
+        ":testlib",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/cc:client_session",
+        "//tensorflow/cc:function_ops",
+        "//tensorflow/cc:ops",
+    ],
+)
+
 # Test data
 filegroup(
     name = "image_testdata",
diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index 9306386117..44a2478e3f 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -22,10 +22,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-// TODO(jpienaar): Consider making it a public attribute.
-const char* const LowerIfOpPass::kLowerUsingSwitchMergeAttr =
-    "_lower_using_switch_merge";
-
 namespace {
 
 using NodeOut = NodeBuilder::NodeOut;
@@ -237,45 +233,7 @@ Status CondBuilder::InlineCallNodes() {
 
 }  // namespace
 
-Status LowerIfOpPass::Run(const GraphOptimizationPassOptions& options) {
-  if (options.partition_graphs != nullptr) {
-    return errors::Internal(
-        "Lowering If op should happen before partitioning.");
-  }
-  if (options.graph == nullptr) {
-    return Status::OK();
-  }
-
-  Graph* g = options.graph->get();
-  if (g == nullptr) {
-    return errors::Internal("Lowering If op requires a graph to be available.");
-  }
-
-  FunctionLibraryDefinition* flib = options.flib_def;
-  if (flib == nullptr) {
-    return errors::Internal(
-        "Lowering If op requires a FunctionLibraryDefinition to be available.");
-  }
-
-  // Match all the nodes that need to be rewritten.
-  gtl::InlinedVector<Node*, 2> matches;
-  for (Node* n : g->op_nodes()) {
-    if (n->type_string() == "If") {
-      // Only rewrite if the If op is marked as needing to be lowered.
-      bool match;
-      Status s = GetNodeAttr(n->attrs(), kLowerUsingSwitchMergeAttr, &match);
-      if (s.ok() && match) matches.push_back(n);
-    }
-  }
-  for (Node* n : matches) {
-    TF_RETURN_IF_ERROR(RewriteNode(n, *flib, g));
-  }
-  return Status::OK();
-}
-
-Status LowerIfOpPass::RewriteNode(Node* n,
-                                  const FunctionLibraryDefinition& flib,
-                                  Graph* g) {
+Status RewriteIfNode(Node* n, Graph* g, const FunctionLibraryDefinition& flib) {
   const AttrValue* then_attr = n->attrs().Find("then_branch");
   if (then_attr == nullptr) {
     return errors::InvalidArgument("Then branch function missing");
@@ -296,7 +254,4 @@ Status LowerIfOpPass::RewriteNode(Node* n,
   return Status::OK();
 }
 
-REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0,
-                      LowerIfOpPass);
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_if_op.h b/tensorflow/core/common_runtime/lower_if_op.h
index 5ab1123e3f..fc52e597be 100644
--- a/tensorflow/core/common_runtime/lower_if_op.h
+++ b/tensorflow/core/common_runtime/lower_if_op.h
@@ -21,18 +21,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Rewrite If ops to use switch and merge nodes instead.
-class LowerIfOpPass : public GraphOptimizationPass {
- public:
-  static const char* const kLowerUsingSwitchMergeAttr;
-
-  Status Run(const GraphOptimizationPassOptions& options) override;
-
- private:
-  // Rewrite the given If node `n` in graph `g` to use the switch-merge
-  // form. `flib` should contain the branch functions referenced by `n`.
-  Status RewriteNode(Node* n, const FunctionLibraryDefinition& flib, Graph* g);
-};
+// Replaces If node `n` with its lowered form that uses Switch and Merge nodes.
+Status RewriteIfNode(Node* n, Graph* g, const FunctionLibraryDefinition& flib);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/common_runtime/lower_if_op_test.cc b/tensorflow/core/common_runtime/lower_if_op_test.cc
index 044a355d06..5765e3e367 100644
--- a/tensorflow/core/common_runtime/lower_if_op_test.cc
+++ b/tensorflow/core/common_runtime/lower_if_op_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/common_runtime/lower_if_op.h"
+#include "tensorflow/core/common_runtime/lower_if_while.h"
 
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/framework/ops.h"
@@ -40,7 +40,7 @@ Status Rewrite(std::unique_ptr<Graph>* graph) {
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = graph;
   opt_options.flib_def = &flib_def;
-  LowerIfOpPass pass;
+  LowerIfWhilePass pass;
   return pass.Run(opt_options);
 }
 
@@ -51,7 +51,6 @@ TEST(LowerIfOpTest, Simple) {
   FunctionDefLibrary f_lib_proto;
   *(f_lib_proto.add_function()) = test::function::XTimesTwo();
   *(f_lib_proto.add_function()) = test::function::XTimesFour();
-  FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto);
 
   // Construct simple conditional that switches on `pred` and operates only on
   // single input `A`.
@@ -65,12 +64,12 @@ TEST(LowerIfOpTest, Simple) {
   tb.mutable_func()->set_name("XTimesTwo");
   AttrValue eb;
   eb.mutable_func()->set_name("XTimesFour");
-  TF_ASSERT_OK(NodeBuilder("if", "If", &f_lib)
+  TF_ASSERT_OK(NodeBuilder("if", "If", &root.graph()->flib_def())
                    .Input(pred.node())
                    .Input(inputs)
                    .Attr("then_branch", tb)
                    .Attr("else_branch", eb)
-                   .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true)
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
                    .Attr("Tout", {DT_INT32})
                    .Finalize(root.graph(), &written_if));
   TF_ASSERT_OK(root.DoShapeInference(written_if));
diff --git a/tensorflow/core/common_runtime/lower_if_while.cc b/tensorflow/core/common_runtime/lower_if_while.cc
new file mode 100644
index 0000000000..c1db575b04
--- /dev/null
+++ b/tensorflow/core/common_runtime/lower_if_while.cc
@@ -0,0 +1,95 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/lower_if_while.h"
+#include "tensorflow/core/common_runtime/lower_if_op.h"
+#include "tensorflow/core/common_runtime/lower_while_op.h"
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/node_builder.h"
+
+namespace tensorflow {
+
+#if defined(_MSC_VER)
+constexpr char* LowerIfWhilePass::kLowerUsingSwitchMergeAttr;
+#else
+constexpr char LowerIfWhilePass::kLowerUsingSwitchMergeAttr[];
+#endif
+
+namespace {
+
+bool HasLoweringAttr(const AttrSlice& attrs) {
+  bool match;
+  Status s =
+      GetNodeAttr(attrs, LowerIfWhilePass::kLowerUsingSwitchMergeAttr, &match);
+  return s.ok() && match;
+}
+
+}  // namespace
+
+Status LowerIfWhilePass::Run(const GraphOptimizationPassOptions& options) {
+  if (options.partition_graphs != nullptr) {
+    return errors::Internal(
+        "Lowering If/While ops should happen before partitioning.");
+  }
+  if (options.graph == nullptr) {
+    return Status::OK();
+  }
+
+  Graph* g = options.graph->get();
+  if (g == nullptr) {
+    return errors::Internal(
+        "Lowering While op requires a graph to be available.");
+  }
+
+  FunctionLibraryDefinition* flib = options.flib_def;
+  if (flib == nullptr) {
+    return errors::Internal(
+        "Lowering If op requires a FunctionLibraryDefinition to be available.");
+  }
+
+  // Lower all If and While ops that have the `kLowerUsingSwitchMergeAttr` attr
+  // set.
+  // We start at `i` = 2 to skip the source and sink nodes.
+  // Note that `g->num_node_ids()` may change in the for body if a matching If
+  // or While node is lowered. Since new graph nodes are always added to the
+  // end of the list of nodes it is ensured that nested If/While nodes will be
+  // lowered as well.
+  for (int i = 2; i < g->num_node_ids(); ++i) {
+    Node* n = g->FindNodeId(i);
+    if (n == nullptr) continue;  // deleted node
+    if (HasLoweringAttr(n->attrs())) {
+      if (n->type_string() == "If") {
+        TF_RETURN_IF_ERROR(RewriteIfNode(n, g, *flib));
+      } else if (n->type_string() == "While") {
+        TF_RETURN_IF_ERROR(RewriteWhileNode(n, g, *flib));
+      } else {
+        return errors::Internal(
+            "Node:", n->name(), " of type ", n->type_string(), " has '",
+            LowerIfWhilePass::kLowerUsingSwitchMergeAttr,
+            "' attr set but it does not support lowering.\n", n->DebugString());
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0,
+                      LowerIfWhilePass);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_if_while.h b/tensorflow/core/common_runtime/lower_if_while.h
new file mode 100644
index 0000000000..efa3945bca
--- /dev/null
+++ b/tensorflow/core/common_runtime/lower_if_while.h
@@ -0,0 +1,38 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_
+
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Rewrite If and While ops to use lower level control flow primitives instead.
+class LowerIfWhilePass : public GraphOptimizationPass {
+ public:
+  Status Run(const GraphOptimizationPassOptions& options) override;
+#if defined(_MSC_VER)
+  static constexpr char* kLowerUsingSwitchMergeAttr =
+#else
+  static constexpr char kLowerUsingSwitchMergeAttr[] =
+#endif
+      "_lower_using_switch_merge";
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_LOWER_IF_WHILE_H_
diff --git a/tensorflow/core/common_runtime/lower_if_while_test.cc b/tensorflow/core/common_runtime/lower_if_while_test.cc
new file mode 100644
index 0000000000..07bcecf168
--- /dev/null
+++ b/tensorflow/core/common_runtime/lower_if_while_test.cc
@@ -0,0 +1,337 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/lower_if_while.h"
+
+#include "tensorflow/cc/client/client_session.h"
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/control_flow_ops_internal.h"
+#include "tensorflow/cc/ops/function_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/common_runtime/graph_runner.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/graph/graph_def_builder_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+typedef FunctionDefHelper FDH;
+
+static void AssertHasSubstr(StringPiece s, StringPiece expected) {
+  ASSERT_TRUE(str_util::StrContains(s, expected))
+      << "'" << s << "' does not contain '" << expected << "'";
+}
+
+Status Rewrite(std::unique_ptr<Graph>* graph) {
+  FunctionLibraryDefinition flib_def((*graph)->flib_def());
+  GraphOptimizationPassOptions opt_options;
+  opt_options.graph = graph;
+  opt_options.flib_def = &flib_def;
+  LowerIfWhilePass pass;
+  return pass.Run(opt_options);
+}
+
+// (counter:int32, pred:bool, x:int32) -> counter < N
+FunctionDef WhileWithIfCond(int32 N) {
+  const Tensor kN = test::AsScalar<int32>(N);
+  return FDH::Define(
+      // Name
+      "WhileWithIfCond",
+      // Args
+      {"counter: int32", "pred: bool", "x: int32"},
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT32}}},
+          {{"z"}, "Less", {"counter", "N"}, {{"T", DT_INT32}}},
+      });
+}
+
+// (counter:int32, pred:bool, x:int32) ->
+//   (counter+1, pred, if pred: x * 2 else: x * 4)
+FunctionDef WhileWithIfBody() {
+  NameAttrList then_func;
+  then_func.set_name("XTimesTwo");
+  NameAttrList else_func;
+  else_func.set_name("XTimesFour");
+  const Tensor kOne = test::AsScalar<int32>(1);
+  std::vector<DataType> input_types = {DT_INT32};
+  std::vector<DataType> output_types = {DT_INT32};
+  return FDH::Define(
+      // Name
+      "WhileWithIfBody",
+      // Args
+      {"counter: int32", "pred: bool", "x: int32"},
+      // Return values
+      {"updated_counter: int32", "pred: bool", "if: int32"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"if"},
+           "If",
+           {"pred", "x"},
+           {{"then_branch", then_func},
+            {"else_branch", else_func},
+            {"Tcond", DT_BOOL},
+            {"Tin", input_types},
+            {"Tout", output_types},
+            {LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true}}},
+          {{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT32}}},
+          {{"updated_counter"}, "Add", {"counter", "one"}, {{"T", DT_INT32}}},
+      });
+}
+
+TEST(LowerIfWhileTest, CondInWhile) {
+  // Tests the value of `a` for different values of args after the following
+  // program:
+  //
+  // Args:
+  // counter = Arg(type = int32)
+  // pred = Arg(type = bool)
+  // a = Arg(type = int32)
+  // N = 3
+  // while (counter < N) {
+  //   counter += 1;
+  //   if (pred) {
+  //     a *= 2;
+  //   } else {
+  //     a *= 4;
+  //   }
+  // }
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  FunctionDefLibrary f_lib_proto;
+  // Cond functions.
+  *f_lib_proto.add_function() = test::function::XTimesTwo();
+  *f_lib_proto.add_function() = test::function::XTimesFour();
+  // While functions.
+  *f_lib_proto.add_function() = WhileWithIfCond(3);
+  *f_lib_proto.add_function() = WhileWithIfBody();
+
+  Scope root = Scope::NewRootScope().ExitOnError();
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
+  auto counter = ops::_Arg(root.WithOpName("counter"), DT_INT32, 0);
+  auto pred = ops::_Arg(root.WithOpName("pred"), DT_BOOL, 0);
+  auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 0);
+  std::vector<NodeBuilder::NodeOut> inputs(
+      {NodeBuilder::NodeOut(counter.node()), NodeBuilder::NodeOut(pred.node()),
+       NodeBuilder::NodeOut(a.node())});
+  Node* while_node;
+  AttrValue cond_func;
+  cond_func.mutable_func()->set_name("WhileWithIfCond");
+  AttrValue body_func;
+  body_func.mutable_func()->set_name("WhileWithIfBody");
+  TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def())
+                   .Input(inputs)
+                   .Attr("T", {DT_INT32, DT_BOOL, DT_INT32})
+                   .Attr("cond", cond_func)
+                   .Attr("body", body_func)
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
+                   .Finalize(root.graph(), &while_node));
+  TF_ASSERT_OK(root.DoShapeInference(while_node));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  // Lowered graph has no While and If ops.
+  for (const auto* op : graph->op_nodes()) {
+    ASSERT_NE(op->type_string(), "While");
+    ASSERT_NE(op->type_string(), "If");
+  }
+
+  // Verify execution.
+  ClientSession session(root);
+  {
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(counter.node()), Input::Initializer(0));
+    feeds.emplace(Output(pred.node()), Input::Initializer(true));
+    feeds.emplace(Output(a.node()), Input::Initializer(1));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(feeds, {Output(while_node, 2)}, &out_tensors));
+    ASSERT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 8);
+  }
+  {
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(counter.node()), Input::Initializer(0));
+    feeds.emplace(Output(pred.node()), Input::Initializer(false));
+    feeds.emplace(Output(a.node()), Input::Initializer(1));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(feeds, {Output(while_node, 2)}, &out_tensors));
+    ASSERT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 64);  // a
+  }
+}
+
+// x:int32 ->
+//    while x <= N:
+//      x*=2;
+//    return x;
+FunctionDef IfWithWhileThen() {
+  NameAttrList cond_func;
+  cond_func.set_name("LessThanOrEqualToN");
+  NameAttrList body_func;
+  body_func.set_name("XTimesTwo");
+  std::vector<DataType> input_and_output_types = {DT_INT32};
+  std::vector<TensorShape> output_shapes = {TensorShape()};
+  return FDH::Define(
+      // Name
+      "IfWithWhileThen",
+      // Args
+      {"x: int32"},
+      // Return values
+      {"while: int32"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"while"},
+           "While",
+           {"x"},
+           {{"cond", cond_func},
+            {"body", body_func},
+            {"T", input_and_output_types},
+            {"output_shapes", output_shapes},
+            {LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true}}},
+      });
+}
+
+TEST(LowerIfWhileTest, WhileInCond) {
+  // Tests the value of `a` for different values of args after the following
+  // program:
+  //
+  // Args:
+  // pred = Arg(type = bool)
+  // a = Arg(type = int32)
+  // N = 8
+  // if (pred) {
+  //   while (a <= N) {
+  //     a *= 2;
+  //   }
+  // }
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for cond and body.
+  FunctionDefLibrary f_lib_proto;
+  *f_lib_proto.add_function() = test::function::XTimesTwo();
+  *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8);
+  *f_lib_proto.add_function() = IfWithWhileThen();
+
+  Scope root = Scope::NewRootScope().ExitOnError();
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
+  auto pred = ops::_Arg(root.WithOpName("pred"), DT_BOOL, 0);
+  auto a = ops::_Arg(root.WithOpName("A"), DT_INT32, 1);
+  std::vector<NodeBuilder::NodeOut> inputs({NodeBuilder::NodeOut(a.node())});
+  AttrValue then_func;
+  then_func.mutable_func()->set_name("IfWithWhileThen");
+  AttrValue else_func;
+  else_func.mutable_func()->set_name("XTimesTwo");
+  Node* if_node;
+  TF_ASSERT_OK(NodeBuilder("if", "If", &root.graph()->flib_def())
+                   .Input(pred.node())
+                   .Input(inputs)
+                   .Attr("then_branch", then_func)
+                   .Attr("else_branch", else_func)
+                   .Attr("Tout", {DT_INT32})
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
+                   .Finalize(root.graph(), &if_node));
+  TF_ASSERT_OK(root.DoShapeInference(if_node));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  // The input graph has no lower level control flow primitives.
+  int node_called_if_count = 0;
+  for (const auto* op : graph->op_nodes()) {
+    ASSERT_FALSE(op->IsEnter());
+    ASSERT_FALSE(op->IsExit());
+    ASSERT_FALSE(op->IsSwitch());
+    ASSERT_FALSE(op->IsMerge());
+    ASSERT_FALSE(op->IsNextIteration());
+    ASSERT_FALSE(op->IsLoopCond());
+    if (op->name() == "if") {
+      node_called_if_count++;
+    }
+  }
+  ASSERT_EQ(node_called_if_count, 1);
+
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  node_called_if_count = 0;
+  for (const auto* op : graph->op_nodes()) {
+    if (op->name() == "if") {
+      node_called_if_count++;
+    }
+    ASSERT_NE(op->type_string(), "While");
+    ASSERT_NE(op->type_string(), "If");
+  }
+  // One node per loop input.
+  ASSERT_EQ(node_called_if_count, 1);
+
+  // Verify execution.
+  ClientSession session(root);
+  {
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(pred.node()), Input::Initializer(true));
+    feeds.emplace(Output(a.node()), Input::Initializer(1));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(feeds, {Output(if_node)}, &out_tensors));
+    ASSERT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 16);
+  }
+  {
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(pred.node()), Input::Initializer(false));
+    feeds.emplace(Output(a.node()), Input::Initializer(1));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(feeds, {Output(if_node)}, &out_tensors));
+    ASSERT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 2);
+  }
+}
+
+TEST(LowerIfWhileTest, RaisesWhenLoweringUnhandledOpType) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Node* const_node;
+  Tensor const_val(DT_INT32, TensorShape({}));
+  const_val.scalar<int32>()() = 1;
+  TF_ASSERT_OK(NodeBuilder("const", "Const")
+                   .Attr("value", const_val)
+                   .Attr("dtype", const_val.dtype())
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
+                   .Finalize(root.graph(), &const_node));
+  TF_ASSERT_OK(root.DoShapeInference(const_node));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  Status s = Rewrite(&graph);
+  ASSERT_EQ(s.code(), error::INTERNAL);
+  AssertHasSubstr(s.error_message(), "does not support lowering");
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc
index 1f5da133e9..6f9921a796 100644
--- a/tensorflow/core/common_runtime/lower_while_op.cc
+++ b/tensorflow/core/common_runtime/lower_while_op.cc
@@ -53,8 +53,9 @@ using NodeOut = NodeBuilder::NodeOut;
 class LowerWhileHelper {
  public:
   static Status Run(Node* while_op, const string& cond_fn_name,
-                    const string& body_fn_name, Graph* graph) {
-    LowerWhileHelper helper(while_op, cond_fn_name, body_fn_name, graph);
+                    const string& body_fn_name, Graph* graph,
+                    const FunctionLibraryDefinition& flib) {
+    LowerWhileHelper helper(while_op, cond_fn_name, body_fn_name, graph, flib);
     return helper.RunInternal();
   }
 
@@ -63,7 +64,8 @@ class LowerWhileHelper {
   // and body functions named `cond_fn_name` and `body_fn_name` respectively in
   // the given graph.
   LowerWhileHelper(Node* while_op, const string& cond_fn_name,
-                   const string& body_fn_name, Graph* graph);
+                   const string& body_fn_name, Graph* graph,
+                   const FunctionLibraryDefinition& flib);
 
   Status RunInternal();
 
@@ -127,6 +129,7 @@ class LowerWhileHelper {
   // The IdentityN node with the same outputs as the original While op.
   Node* lowered_while_output_;
   Graph* graph_;
+  const FunctionLibraryDefinition& flib_;
   // Name of the `while_op_`.
   string name_;
 
@@ -143,9 +146,11 @@ class LowerWhileHelper {
 };
 
 LowerWhileHelper::LowerWhileHelper(Node* while_op, const string& cond_fn_name,
-                                   const string& body_fn_name, Graph* graph)
+                                   const string& body_fn_name, Graph* graph,
+                                   const FunctionLibraryDefinition& flib)
     : while_op_(while_op),
       graph_(graph),
+      flib_(flib),
       name_(while_op->name()),
       cond_call_builder_(NewName("cond"), cond_fn_name, graph->op_registry()),
       body_call_builder_(NewName("body"), body_fn_name, graph->op_registry()),
@@ -346,8 +351,8 @@ string LowerWhileHelper::NewName(const string& infix) {
   return graph_->NewName(strings::StrCat(name_, "/", infix));
 }
 
-Status InlineCallInGraph(Node* n, Graph* g) {
-  const auto& lib = g->flib_def();
+Status InlineCallInGraph(Node* n, Graph* g,
+                         const FunctionLibraryDefinition& lib) {
   const FunctionDef* fdef = lib.Find(n->type_string());
   CHECK(fdef != nullptr);
   FunctionBody* fbody;
@@ -365,46 +370,15 @@ Status InlineCallInGraph(Node* n, Graph* g) {
 }
 
 Status LowerWhileHelper::InlineCallNodes() {
-  TF_RETURN_IF_ERROR(InlineCallInGraph(cond_call_node_, graph_));
-  TF_RETURN_IF_ERROR(InlineCallInGraph(body_call_node_, graph_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(cond_call_node_, graph_, flib_));
+  TF_RETURN_IF_ERROR(InlineCallInGraph(body_call_node_, graph_, flib_));
   return Status::OK();
 }
 
 }  // namespace
 
-Status LowerWhileOpPass::Run(const GraphOptimizationPassOptions& options) {
-  if (options.partition_graphs != nullptr) {
-    return errors::Internal(
-        "Lowering While op should happen before partitioning.");
-  }
-  if (options.graph == nullptr) {
-    return Status::OK();
-  }
-
-  Graph* g = options.graph->get();
-  if (g == nullptr) {
-    return errors::Internal(
-        "Lowering While op requires a graph to be available.");
-  }
-
-  // Match all the nodes that need to be rewritten.
-  gtl::InlinedVector<Node*, 2> matches;
-  for (Node* n : g->op_nodes()) {
-    if (n->type_string() == "While") {
-      // Only rewrite if the While op is marked as needing to be lowered.
-      bool match;
-      Status s = GetNodeAttr(n->attrs(),
-                             LowerIfOpPass::kLowerUsingSwitchMergeAttr, &match);
-      if (s.ok() && match) matches.push_back(n);
-    }
-  }
-  for (Node* n : matches) {
-    TF_RETURN_IF_ERROR(RewriteNode(n, g));
-  }
-  return Status::OK();
-}
-
-Status LowerWhileOpPass::RewriteNode(Node* n, Graph* g) {
+Status RewriteWhileNode(Node* n, Graph* g,
+                        const FunctionLibraryDefinition& flib) {
   const AttrValue* cond_attr = n->attrs().Find("cond");
   if (cond_attr == nullptr) {
     return errors::InvalidArgument("While cond function missing");
@@ -415,13 +389,10 @@ Status LowerWhileOpPass::RewriteNode(Node* n, Graph* g) {
   }
 
   TF_RETURN_IF_ERROR(LowerWhileHelper::Run(n, cond_attr->func().name(),
-                                           body_attr->func().name(), g));
+                                           body_attr->func().name(), g, flib));
   g->RemoveNode(n);
 
   return Status::OK();
 }
 
-REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0,
-                      LowerWhileOpPass);
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/lower_while_op.h b/tensorflow/core/common_runtime/lower_while_op.h
index eadafbeb91..4b640bafba 100644
--- a/tensorflow/core/common_runtime/lower_while_op.h
+++ b/tensorflow/core/common_runtime/lower_while_op.h
@@ -21,16 +21,10 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Rewrite While ops to use lower level control flow primitives instead.
-class LowerWhileOpPass : public GraphOptimizationPass {
- public:
-  Status Run(const GraphOptimizationPassOptions& options) override;
-
- private:
-  // Rewrite the given While node `n` in graph `g` to use the lower level
-  // primitives Enter, Exit, Switch, Merge and NextIteration.
-  Status RewriteNode(Node* n, Graph* g);
-};
+// Replaces While node `n` with its lowered form that uses Enter, Exit, Switch,
+// Merge, NextIteration and LoopCond nodes.
+Status RewriteWhileNode(Node* n, Graph* g,
+                        const FunctionLibraryDefinition& flib);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/common_runtime/lower_while_op_test.cc b/tensorflow/core/common_runtime/lower_while_op_test.cc
index 27cbada004..24fd4ed5bb 100644
--- a/tensorflow/core/common_runtime/lower_while_op_test.cc
+++ b/tensorflow/core/common_runtime/lower_while_op_test.cc
@@ -13,8 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/common_runtime/lower_while_op.h"
-#include "tensorflow/core/common_runtime/lower_if_op.h"
+#include "tensorflow/core/common_runtime/lower_if_while.h"
 
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/framework/ops.h"
@@ -37,13 +36,11 @@ namespace tensorflow {
 namespace {
 
 Status Rewrite(std::unique_ptr<Graph>* graph) {
-  FunctionDefLibrary flib;
-  FunctionLibraryDefinition flib_def((*graph)->op_registry(), flib);
-
+  FunctionLibraryDefinition flib_def((*graph)->flib_def());
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = graph;
   opt_options.flib_def = &flib_def;
-  LowerWhileOpPass pass;
+  LowerIfWhilePass pass;
   return pass.Run(opt_options);
 }
 
@@ -54,7 +51,6 @@ TEST(LowerWhileOpTest, Simple) {
   FunctionDefLibrary f_lib_proto;
   *f_lib_proto.add_function() = test::function::XTimesTwo();
   *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8);
-  FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto);
 
   Scope root = Scope::NewRootScope().ExitOnError();
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
@@ -65,12 +61,12 @@ TEST(LowerWhileOpTest, Simple) {
   cond_func.mutable_func()->set_name("LessThanOrEqualToN");
   AttrValue body_func;
   body_func.mutable_func()->set_name("XTimesTwo");
-  TF_ASSERT_OK(NodeBuilder("while", "While", &f_lib)
+  TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def())
                    .Input(inputs)
                    .Attr("T", {DT_INT32})
                    .Attr("cond", cond_func)
                    .Attr("body", body_func)
-                   .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true)
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
                    .Finalize(root.graph(), &while_node));
   TF_ASSERT_OK(root.DoShapeInference(while_node));
   TF_ASSERT_OK(root.ToGraph(graph.get()));
@@ -154,7 +150,6 @@ TEST(LowerWhileOpTest, MultipleInputs) {
   FunctionDefLibrary f_lib_proto;
   *(f_lib_proto.add_function()) = test::function::XPlusOneXTimesY();
   *(f_lib_proto.add_function()) = test::function::XYXLessThanOrEqualToN(4);
-  FunctionLibraryDefinition f_lib(OpRegistry::Global(), f_lib_proto);
 
   Scope root = Scope::NewRootScope().ExitOnError();
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
@@ -167,12 +162,12 @@ TEST(LowerWhileOpTest, MultipleInputs) {
   cond_func.mutable_func()->set_name("XYXLessThanOrEqualToN");
   AttrValue body_func;
   body_func.mutable_func()->set_name("XPlusOneXTimesY");
-  TF_ASSERT_OK(NodeBuilder("while", "While", &f_lib)
+  TF_ASSERT_OK(NodeBuilder("while", "While", &root.graph()->flib_def())
                    .Input(inputs)
                    .Attr("T", {DT_INT32, DT_INT32})
                    .Attr("cond", cond_func)
                    .Attr("body", body_func)
-                   .Attr(LowerIfOpPass::kLowerUsingSwitchMergeAttr, true)
+                   .Attr(LowerIfWhilePass::kLowerUsingSwitchMergeAttr, true)
                    .Finalize(root.graph(), &while_node));
   TF_ASSERT_OK(root.DoShapeInference(while_node));
   TF_ASSERT_OK(root.ToGraph(graph.get()));
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 46b8b10e90..f7bab691cd 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1366,7 +1366,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(lambda x: x < 10, body, [x0])
       self.assertEqual(10, sess.run(r, {b: True}))
 
-  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCondWithControl(self):
     # Ensure that no control edges by an outer control dependency context are
     # added to nodes inside cond/while contexts.
@@ -1478,7 +1477,6 @@ class ControlFlowTest(test.TestCase):
     self._testCondWhile_3(use_gpu=False)
     self._testCondWhile_3(use_gpu=True)
 
-  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_1(self):
 
     with self.cached_session():
@@ -1495,7 +1493,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [i])
       self.assertAllEqual(10, r.eval())
 
-  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_2(self):
 
     with self.cached_session():
@@ -1505,7 +1502,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n])
       self.assertAllEqual(10, r.eval())
 
-  @test_util.disable_control_flow_v2("b/116134862 (cond output shape)")
   def testWhileCond_3(self):
 
     with self.cached_session():
@@ -2271,12 +2267,13 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, r.eval())
 
-  @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)")
   def testWhileCondGrad_Simple(self):
     self._testWhileCondGrad_Simple(use_gpu=False)
-    self._testWhileCondGrad_Simple(use_gpu=True)
+    if not control_flow_ops.ENABLE_WHILE_V2:
+      # TODO(b/117519152): Enable.
+      self._testWhileCondGrad_Simple(use_gpu=True)
 
-  @test_util.disable_control_flow_v2("b/116272044 (cond_in_while)")
+  @test_util.disable_control_flow_v2("b/117276490")
   def testWhileCondGrad_UnknownShape(self):
     with self.cached_session() as sess:
       v = array_ops.placeholder(dtypes.float32)
-- 
GitLab


From 6ae53a0915e5ce3bbd85e76e5c075f0ec60f23b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 13:46:54 -0700
Subject: [PATCH 0719/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216586210

---
 tensorflow/go/op/wrappers.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 28cd642739..c6ecd75587 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -19685,7 +19685,7 @@ func QuantizeV2RoundMode(value string) QuantizeV2Attr {
 //
 // ```
 // out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-// if T == qint8: out[i] -= (range(T) + 1) / 2.0
+// if T == qint8, out[i] -= (range(T) + 1) / 2.0
 // ```
 //
 // here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
@@ -24277,7 +24277,7 @@ func DequantizeMode(value string) DequantizeAttr {
 // In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 //
 // ```
-// if T == qint8: in[i] += (range(T) + 1)/ 2.0
+// if T == qint8, in[i] += (range(T) + 1)/ 2.0
 // out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
 // ```
 // here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-- 
GitLab


From 402e5862fa772282d673d5b1a95f4373db3be1b0 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 10 Oct 2018 13:52:11 -0700
Subject: [PATCH 0720/1085] Use the conversion options object throughout the
 process (including during the deferred runtime conversions), and remove its
 arg_types field.

This is necessary for the correct behavior when converting dynamic functions - converted_call must operate in a manner consistent with the original parameters.

PiperOrigin-RevId: 216587135
---
 tensorflow/python/autograph/__init__.py       |   6 +-
 .../python/autograph/converters/call_trees.py |  15 +--
 .../python/autograph/converters/decorators.py |   3 +-
 .../autograph/converters/decorators_test.py   |   7 +-
 tensorflow/python/autograph/core/converter.py | 111 ++++++++++++++----
 .../autograph/core/converter_testing.py       |  22 ++--
 tensorflow/python/autograph/impl/api.py       |  82 ++++---------
 tensorflow/python/autograph/impl/api_test.py  |  33 ++----
 .../python/autograph/impl/conversion.py       |   4 +-
 .../python/autograph/impl/conversion_test.py  |   3 +-
 tensorflow/python/autograph/utils/testing.py  |   2 +
 tensorflow/python/eager/function.py           |   7 +-
 12 files changed, 153 insertions(+), 142 deletions(-)

diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py
index 5ed5e85158..e02fb36269 100644
--- a/tensorflow/python/autograph/__init__.py
+++ b/tensorflow/python/autograph/__init__.py
@@ -24,14 +24,14 @@ from __future__ import print_function
 # TODO(mdan): Bring only the relevant symbols to the top level.
 from tensorflow.python.autograph import operators
 from tensorflow.python.autograph import utils
+from tensorflow.python.autograph.core.converter import ConversionOptions
 from tensorflow.python.autograph.core.errors import GraphConstructionError
-from tensorflow.python.autograph.core.errors import TfRuntimeError
 from tensorflow.python.autograph.core.errors import improved_errors
-from tensorflow.python.autograph.impl.api import ConversionOptions
-from tensorflow.python.autograph.impl.api import RunMode
+from tensorflow.python.autograph.core.errors import TfRuntimeError
 from tensorflow.python.autograph.impl.api import convert
 from tensorflow.python.autograph.impl.api import converted_call
 from tensorflow.python.autograph.impl.api import do_not_convert
+from tensorflow.python.autograph.impl.api import RunMode
 from tensorflow.python.autograph.impl.api import to_code
 from tensorflow.python.autograph.impl.api import to_graph
 from tensorflow.python.autograph.lang.directives import set_element_type
diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index fc2075b781..a7926266d5 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -141,7 +141,7 @@ class CallTreeTransformer(converter.Base):
       if hasattr(target_entity, '__pyct_is_compile_decorator'):
         return False
 
-      if target_entity in self.ctx.program.autograph_decorators:
+      if target_entity in self.ctx.program.options.strip_decorators:
         return False
 
       # Inspect the target function decorators. If any include a @convert
@@ -160,7 +160,7 @@ class CallTreeTransformer(converter.Base):
       for dec in target_node.decorator_list:
         decorator_fn = self._resolve_name(dec)
         if (decorator_fn is not None and
-            decorator_fn in self.ctx.program.autograph_decorators):
+            decorator_fn in self.ctx.program.options.strip_decorators):
           return False
 
     return True
@@ -238,15 +238,12 @@ class CallTreeTransformer(converter.Base):
     # Before we could convert all the time though, we'd need a reasonable
     # caching mechanism.
     template = """
-      ag__.converted_call(
-          func,
-          ag__.ConversionOptions.new(recursive=recursive_val),
-          args)
+      ag__.converted_call(func, options, args)
     """
     call_expr = templates.replace(
         template,
         func=node.func,
-        recursive_val=parser.parse_expression(str(self.ctx.program.recursive)),
+        options=self.ctx.program.options.to_ast(self.ctx.info.namespace),
         args=node.args)
     new_call = call_expr[0].value
     # TODO(mdan): Improve the template mechanism to better support this.
@@ -276,7 +273,7 @@ class CallTreeTransformer(converter.Base):
     # consider it graph ready.
     if anno.hasanno(node.func, 'live_val'):
       target_entity = anno.getanno(node.func, 'live_val')
-      if target_entity in self.ctx.program.autograph_decorators:
+      if target_entity in self.ctx.program.options.strip_decorators:
         if len(node.args) < 1:
           raise ValueError(
               'Found call to decorator function "%s", but it had no arguments. '
@@ -318,7 +315,7 @@ class CallTreeTransformer(converter.Base):
         # ensure that they return the correct value.
         return node
 
-      if self.ctx.program.recursive:
+      if self.ctx.program.options.recursive:
         node = self._insert_dynamic_conversion(node)
     return node
 
diff --git a/tensorflow/python/autograph/converters/decorators.py b/tensorflow/python/autograph/converters/decorators.py
index 724f0fe5ed..d41893063b 100644
--- a/tensorflow/python/autograph/converters/decorators.py
+++ b/tensorflow/python/autograph/converters/decorators.py
@@ -56,8 +56,7 @@ class DecoratorsTransformer(converter.Base):
       original_dec = anno.getanno(dec_func, anno.Basic.QN)
       dec_value = anno.getanno(dec_func, 'live_val')
 
-      if dec_value in self.ctx.program.autograph_decorators:
-        # AutoGraph decorators do not need to be preserved.
+      if dec_value in self.ctx.program.options.strip_decorators:
         continue
 
       # When using foo.bar.baz, we only really need to grab foo and import
diff --git a/tensorflow/python/autograph/converters/decorators_test.py b/tensorflow/python/autograph/converters/decorators_test.py
index fb31c8d583..abd76849d6 100644
--- a/tensorflow/python/autograph/converters/decorators_test.py
+++ b/tensorflow/python/autograph/converters/decorators_test.py
@@ -57,17 +57,14 @@ def self_transform_decorator(transform):
 
 class DecoratorsTest(converter_testing.TestCase):
 
-  def _transform(self, f, autograph_decorators):
+  def _transform(self, f, strip_decorators):
     namespace = {
         'self_transform_decorator': self_transform_decorator,
         'simple_decorator': simple_decorator,
         'converter_testing': converter_testing,
     }
     node, ctx = self.prepare(
-        f,
-        namespace,
-        recursive=False,
-        autograph_decorators=autograph_decorators)
+        f, namespace, recursive=False, strip_decorators=strip_decorators)
     node = decorators.transform(node, ctx)
     import_line = '\n'.join(ctx.program.additional_imports)
     result, _ = compiler.ast_to_object(node, source_prefix=import_line)
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 408a573ad0..1b07bed495 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -40,7 +40,7 @@ converter.ProgramContext contains mutable state across related entities. For
 example, when converting several functions that call one another, the
 ProgramContext should be shared across these entities.
 
-Below is the overal flow at conversion:
+Below is the overall flow at conversion:
 
     program_ctx = ProgramContext(<entities to convert>, <global settings>, ...)
     while <program_ctx has more entities to convert>:
@@ -71,7 +71,10 @@ from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
 from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import compiler
+from tensorflow.python.autograph.pyct import inspect_utils
+from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import qual_names
+from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.pyct import transformer
 from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import live_values
@@ -86,43 +89,107 @@ from tensorflow.python.autograph.pyct.static_analysis import type_info
 # TODO(mdan): Add a test specific to this converter.
 
 
+class ConversionOptions(object):
+  """Immutable container for global conversion flags.
+
+  Attributes:
+    recursive: bool, whether to recursively convert any user functions or
+      classes that the converted function may use.
+    verbose: bool, whether to log the converted code.
+    strip_decorators: Tuple[Callable], contains decorators that should be in
+      excluded from the compiled output. By default, when converting a function
+      before the decorators are applied, the compiled output will include those
+      decorators.
+    force_conversion: bool, whether to force convertinng the target entity. When
+      force_conversion is turned off, the converter may decide to return the
+      function as-is.
+  """
+
+  def __init__(self,
+               recursive=False,
+               verbose=False,
+               strip_decorators=None,
+               force_conversion=False):
+    self.recursive = recursive
+    self.verbose = verbose
+    self.strip_decorators = strip_decorators or ()
+    self.force_conversion = force_conversion
+
+  def to_ast(self, namespace):
+    """Returns a representation of this object as an AST node.
+
+    The AST node encodes a constructor that would create an object with the
+    same contents.
+
+    Args:
+      namespace: Dict[str, Any], the namespace to use when serializing values to
+        names.
+
+    Returns:
+      ast.Node
+    """
+    template = """
+      constructor_name(
+          recursive=recursive_val,
+          verbose=verbose_val,
+          strip_decorators=strip_decorator_names,
+          force_conversion=force_conversion_val)
+    """
+
+    def as_qualified_name(o):
+      name = inspect_utils.getqualifiedname(namespace, o)
+      if not name:
+        raise ValueError('Could not locate entity {} in {}'.format(
+            o, namespace))
+      return name
+
+    strip_decorators_code = '({})'.format(', '.join(
+        tuple(as_qualified_name(o) for o in self.strip_decorators)))
+
+    expr_ast = templates.replace(
+        template,
+        constructor_name=parser.parse_expression(
+            as_qualified_name(ConversionOptions)),
+        recursive_val=parser.parse_expression(str(self.recursive)),
+        verbose_val=parser.parse_expression(str(self.verbose)),
+        strip_decorator_names=parser.parse_expression(strip_decorators_code),
+        force_conversion_val=parser.parse_expression(
+            str(self.force_conversion)))
+    return expr_ast[0].value
+
+
 class ProgramContext(object):
   """ProgramContext keeps track of converting function hierarchies.
 
   This object is mutable, and is updated during conversion. Not thread safe.
 
   Attributes:
-    recursive: bool, whether to recursively convert any functions that the
-        decorator function may call.
-    autograph_decorators: Tuple[Callable, ...], decorator functions that belong
-        to AutoGraph. These require special treatment.
+    options: ConversionOptions
     dependency_cache: Dict[Any, ast.AST], the original entities mapped to their
-        converted AST
+      converted AST
     additional_imports: Set[Any], additional entities which for any reason
-        cannot be attached after loading and need to be explicitly imported
-        in the generated code
-    name_map: Dict[str, str], map of original entity name to the name of
-        their converted counterparts
-    autograph_module: Module, a reference to the autograph module. This
-        needs to be specified by the caller to avoid circular dependencies.
+      cannot be attached after loading and need to be explicitly imported in the
+      generated code
+    name_map: Dict[str, str], map of original entity name to the name of their
+      converted counterparts
+    autograph_module: Module, a reference to the autograph module. This needs to
+      be specified by the caller to avoid circular dependencies.
     uncompiled_modules: Set[Tuple[str, ...]], with each tuple representing the
-        fully qualified name of a package containing functions that will not be
-        compiled.
+      fully qualified name of a package containing functions that will not be
+      compiled.
     required_imports: str, containing an import statement on each line. These
-        are all the imports necessary for the compiled code to run, in addition
-        to the closures of each entity, which are attached dynamically.
+      are all the imports necessary for the compiled code to run, in addition to
+      the closures of each entity, which are attached dynamically.
   """
 
   def __init__(
       self,
-      recursive,
-      autograph_decorators,
+      options,
       partial_types,
       autograph_module,
       uncompiled_modules,
   ):
-    self.recursive = recursive
-    self.autograph_decorators = autograph_decorators
+    self.options = options
     self.partial_types = partial_types if partial_types else ()
     self.autograph_module = autograph_module
     self.uncompiled_modules = uncompiled_modules
@@ -140,7 +207,7 @@ class ProgramContext(object):
                      tuple(self.additional_imports))
 
   def new_namer(self, namespace):
-    return naming.Namer(namespace, self.recursive, self.name_map,
+    return naming.Namer(namespace, self.options.recursive, self.name_map,
                         self.partial_types)
 
   def update_name_map(self, namer):
@@ -294,7 +361,7 @@ def standard_analysis(node, context, is_initial=False):
     node: ast.AST
     context: converter.EntityContext
     is_initial: bool, whether this is the initial analysis done on the input
-        source code
+      source code
 
   Returns:
     ast.AST, same as node, with the static analysis annotations added
diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index fcdbd0a82c..c701053124 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -94,21 +94,13 @@ class TestCase(test.TestCase):
       self.dynamic_calls.append(args)
       return 7
 
-    class ConversionOptions(object):
-      """Mock version of api.ConversionOptions."""
-
-      def __init__(self, recursive):
-        self.recursive = recursive
-
-      @classmethod
-      def new(cls, recursive):
-        cls(recursive)
-
     try:
       result, source = compiler.ast_to_object(node, include_source_map=True)
 
+      # TODO(mdan): Move this into self.prepare()
       result.tf = self.make_fake_mod('fake_tf', *symbols)
-      fake_ag = self.make_fake_mod('fake_ag', converted_call, ConversionOptions)
+      fake_ag = self.make_fake_mod('fake_ag', converted_call,
+                                   converter.ConversionOptions)
       fake_ag.__dict__.update(operators.__dict__)
       fake_ag.__dict__['utils'] = utils
       fake_ag.__dict__['rewrite_graph_construction_error'] = (
@@ -161,14 +153,16 @@ class TestCase(test.TestCase):
               arg_types=None,
               owner_type=None,
               recursive=True,
-              autograph_decorators=()):
+              strip_decorators=()):
+    namespace['ConversionOptions'] = converter.ConversionOptions
+
     node, source = parser.parse_entity(test_fn)
     node = node.body[0]
     if namer is None:
       namer = FakeNamer()
     program_ctx = converter.ProgramContext(
-        recursive=recursive,
-        autograph_decorators=autograph_decorators,
+        options=converter.ConversionOptions(
+            recursive=recursive, strip_decorators=strip_decorators),
         partial_types=None,
         autograph_module=None,
         uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index 1dc97d2331..3c31762cab 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -18,9 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
 import functools
-
 from enum import Enum
 
 from tensorflow.python.autograph.core import config
@@ -39,41 +37,6 @@ from tensorflow.python.util import tf_inspect
 # (currently we require (module + class name, type))
 
 
-class ConversionOptions(
-    collections.namedtuple('ConversionOptions',
-                           ('recursive', 'verbose', 'strip_decorators',
-                            'force_conversion', 'arg_types'))):
-  """Container for conversion flags.
-
-  Attributes:
-    recursive: bool, whether to recursively convert any user functions or
-        classes that the converted function may use.
-    verbose: bool, whether to log the compiled code.
-    strip_decorators: Tuple[Callable], contains decorators that should be in
-        excluded from the compiled output. By default, when converting a
-        function before the decorators are applied, the compiled output will
-        include those decorators.
-    force_conversion: bool, whether to force convertinng the target entity.
-        When force_conversion is turned off, the converter may decide to
-        return the function as-is.
-    arg_types: Optional[Dict[Text, Type]], type hints for symbols including
-        function arguments.
-  """
-
-  @classmethod
-  def new(cls,
-          recursive=False,
-          verbose=False,
-          strip_decorators=None,
-          force_conversion=False,
-          arg_types=None):
-    return cls(recursive=recursive,
-               verbose=verbose,
-               strip_decorators=strip_decorators or (),
-               force_conversion=force_conversion,
-               arg_types=arg_types or {})
-
-
 # TODO(mdan): This should behave like to_graph (e.g. convert statically).
 def convert(recursive=False, verbose=False):
   """Decorator that compiles a function to use TensorFlow ops.
@@ -85,13 +48,14 @@ def convert(recursive=False, verbose=False):
 
   Args:
     recursive: bool, whether to recursively convert any functions or classes
-        that the converted function may use.
+      that the converted function may use.
     verbose: bool, whether to output the compiled code in the logs.
 
   Returns:
     Callable, a decorator that converts the given function into an equivalent
     function that uses TensorFlow ops.
   """
+
   def decorator(f):
     """Decorator implementation."""
 
@@ -99,7 +63,7 @@ def convert(recursive=False, verbose=False):
     def wrapper(*args, **kwargs):
       return converted_call(
           f,
-          ConversionOptions.new(
+          converter.ConversionOptions(
               recursive=recursive,
               verbose=verbose,
               force_conversion=True,
@@ -137,10 +101,10 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
 
   Args:
     run_as: RunMode, specifies how to use the function in TensorFlow.
-    return_dtypes: Optional[Iterable[
-        Union[tf.DType, utils.py_func.MatchDType]]], the return data types of
-        the converted function, if run_as is RunMode.PY_FUNC. Ignored otherwise.
-        May be set to None if the function has no return values.
+    return_dtypes: Optional[Iterable[ Union[tf.DType,
+      utils.py_func.MatchDType]]], the return data types of the converted
+      function, if run_as is RunMode.PY_FUNC. Ignored otherwise. May be set to
+      None if the function has no return values.
 
   Returns:
     Callable, a decorator that wraps the original function.
@@ -219,13 +183,12 @@ def converted_call(f, options, *args, **kwargs):
     NotImplementedError('unknown callable type "%s"' % type(f))
 
   arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs)
+  arg_types = {}
   for name, arg in arg_values.items():
     if arg is unknown_arg_value:
       continue
     arg_class = arg.__class__
-    # If arg_value_hints specifies any name, use that instead.
-    if name not in options.arg_types:
-      options.arg_types[name] = (arg_class.__name__, arg_class)
+    arg_types[name] = (arg_class.__name__, arg_class)
 
   # When called from within a decorator, this is the only indication that
   # the function is a method - it appears that the decorator is applied
@@ -243,7 +206,7 @@ def converted_call(f, options, *args, **kwargs):
       recursive=options.recursive,
       verbose=options.verbose,
       arg_values=arg_values,
-      arg_types=options.arg_types,
+      arg_types=arg_types,
       partial_types=partial_types,
       strip_decorators=options.strip_decorators)
   return converted_f(*effective_args, **kwargs)
@@ -270,15 +233,15 @@ def to_graph(e,
   Args:
     e: Union[Callable, Type], the Python entity to convert.
     recursive: bool, whether to recursively convert any functions that the
-        converted function may call.
+      converted function may call.
     verbose: bool, whether to output the compiled code in the logs.
     arg_values: Optional[Dict[Text, Any]], value hints for symbols including
-        function arguments.
+      function arguments.
     arg_types: Optional[Dict[Text, Type]], type hints for symbols including
-        function arguments.
+      function arguments.
     partial_types: Set[Type], reserved for internal use.
     strip_decorators: Tuple[Callable], same as
-        ConversionOptions.strip_decorators.
+      ConversionOptions.strip_decorators.
 
   Returns:
     Union[Callable, Type], the converted entity, which is the same kind as e
@@ -293,8 +256,10 @@ def to_graph(e,
   strip_decorators += (convert, do_not_convert, converted_call)
 
   program_ctx = converter.ProgramContext(
-      recursive=recursive,
-      autograph_decorators=strip_decorators,
+      options=converter.ConversionOptions(
+          recursive=recursive,
+          verbose=verbose,
+          strip_decorators=strip_decorators),
       partial_types=partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
@@ -352,11 +317,11 @@ def to_code(e,
   Args:
     e: Union[Callable, Type], the Python entity to convert.
     recursive: bool, whether to recursively convert any functions that the
-        converted function may call.
+      converted function may call.
     arg_values: Optional[Dict[Text, Any]], value hints for symbols including
-        function arguments.
+      function arguments.
     arg_types: Optional[Dict[Text, Type]], type hints for symbols including
-        function arguments.
+      function arguments.
     partial_types: Set[Type], reserved for internal use.
     indentation: Text, when to use for each level of indentation.
 
@@ -364,8 +329,9 @@ def to_code(e,
     Text, the converted code.
   """
   program_ctx = converter.ProgramContext(
-      recursive=recursive,
-      autograph_decorators=(convert, do_not_convert, converted_call),
+      options=converter.ConversionOptions(
+          recursive=recursive,
+          strip_decorators=(convert, do_not_convert, converted_call)),
       partial_types=partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index 8ce5022c0a..8567c66bf1 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.autograph import utils
-from tensorflow.python.autograph.core import config
+from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.impl import api
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.utils import py_func
@@ -29,15 +29,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_inspect
 
-
 tf = utils.fake_tf()
 
-class ApiTest(test.TestCase):
 
-  def setUp(self):
-    config.COMPILED_IMPORT_STATEMENTS = (
-        'from __future__ import print_function',
-    )
+class ApiTest(test.TestCase):
 
   def test_decorator_recurses(self):
 
@@ -179,9 +174,8 @@ class ApiTest(test.TestCase):
       @api.convert(recursive=True)
       def test_method(self, x, s, a):
         while tf.reduce_sum(x) > s:
-          x //= api.converted_call(
-              self.called_member,
-              api.ConversionOptions.new(), self, a)
+          x //= api.converted_call(self.called_member,
+                                   converter.ConversionOptions(), self, a)
         return x
 
     tc = TestClass()
@@ -192,7 +186,7 @@ class ApiTest(test.TestCase):
       self.assertListEqual([0, 1], sess.run(x).tolist())
 
   def test_converted_call_builtin(self):
-    x = api.converted_call(range, api.ConversionOptions.new(), 3)
+    x = api.converted_call(range, converter.ConversionOptions(), 3)
     self.assertEqual((0, 1, 2), tuple(x))
 
   def test_converted_call_function(self):
@@ -203,7 +197,7 @@ class ApiTest(test.TestCase):
       return x
 
     with self.cached_session() as sess:
-      x = api.converted_call(test_fn, api.ConversionOptions.new(),
+      x = api.converted_call(test_fn, converter.ConversionOptions(),
                              constant_op.constant(-1))
       self.assertEqual(1, sess.run(x))
 
@@ -221,7 +215,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc.test_method, api.ConversionOptions.new(), tc)
+      x = api.converted_call(tc.test_method, converter.ConversionOptions(), tc)
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_method_by_class(self):
@@ -238,9 +232,8 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(
-          TestClass.test_method,
-          api.ConversionOptions.new(), tc)
+      x = api.converted_call(TestClass.test_method,
+                             converter.ConversionOptions(), tc)
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_callable_object(self):
@@ -257,7 +250,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc, api.ConversionOptions.new())
+      x = api.converted_call(tc, converter.ConversionOptions())
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_constructor(self):
@@ -273,7 +266,7 @@ class ApiTest(test.TestCase):
         return self.x
 
     with self.cached_session() as sess:
-      tc = api.converted_call(TestClass, api.ConversionOptions.new(),
+      tc = api.converted_call(TestClass, converter.ConversionOptions(),
                               constant_op.constant(-1))
       # tc is now a converted object.
       x = tc.test_method()
@@ -285,12 +278,12 @@ class ApiTest(test.TestCase):
       return x == 0
 
     with self.cached_session() as sess:
-      x = api.converted_call(f, api.ConversionOptions.new(),
+      x = api.converted_call(f, converter.ConversionOptions(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
       converted_f = api.to_graph(f)
-      x = api.converted_call(converted_f, api.ConversionOptions.new(),
+      x = api.converted_call(converted_f, converter.ConversionOptions(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 52abd40626..0374406ff2 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -145,7 +145,7 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types):
 
   program_ctx.add_to_cache(o, node)
 
-  if program_ctx.recursive:
+  if program_ctx.options.recursive:
     while True:
       candidate = None
       for obj in program_ctx.name_map.keys():
@@ -256,7 +256,7 @@ def _add_self_references(namespace, autograph_module):
     # internal modules.
     ag_internal = imp.new_module('autograph')
     ag_internal.converted_call = autograph_module.converted_call
-    ag_internal.ConversionOptions = autograph_module.ConversionOptions
+    ag_internal.ConversionOptions = converter.ConversionOptions
     ag_internal.utils = utils
     ag_internal.function_scope = function_wrapping.function_scope
     ag_internal.rewrite_graph_construction_error = (
diff --git a/tensorflow/python/autograph/impl/conversion_test.py b/tensorflow/python/autograph/impl/conversion_test.py
index 07d0f75129..34550d0112 100644
--- a/tensorflow/python/autograph/impl/conversion_test.py
+++ b/tensorflow/python/autograph/impl/conversion_test.py
@@ -34,8 +34,7 @@ class ConversionTest(test.TestCase):
 
   def _simple_program_ctx(self):
     return converter.ProgramContext(
-        recursive=True,
-        autograph_decorators=(),
+        options=converter.ConversionOptions(recursive=True),
         partial_types=(),
         autograph_module=api,
         uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py
index cb4785d0dc..dd6bdc8931 100644
--- a/tensorflow/python/autograph/utils/testing.py
+++ b/tensorflow/python/autograph/utils/testing.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import imp
 
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 
 
@@ -28,6 +29,7 @@ def fake_tf():
   """Creates a fake module that looks like TensorFlow, for testing."""
   mod = imp.new_module('tensorflow')
   mod_contents = dict()
+  mod_contents.update(gen_math_ops.__dict__)
   mod_contents.update(math_ops.__dict__)
   mod_contents.update(ops.__dict__)
   mod_contents.update(mod.__dict__)
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 191279abae..e399a4abb5 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -915,11 +915,8 @@ def func_graph_from_py_func(name,
         func_outputs = autograph.converted_call(
             python_func,
             autograph.ConversionOptions(
-                verbose=True,
-                recursive=True,
-                force_conversion=False,
-                strip_decorators=(defun,),
-                arg_types={}), *func_args, **func_kwargs)
+                verbose=True, recursive=True, strip_decorators=(defun,)),
+            *func_args, **func_kwargs)
       else:
         func_outputs = python_func(*func_args, **func_kwargs)
       # invariant: `func_outputs` contains only Tensors and `None`s.
-- 
GitLab


From 3fb4383c772ed8c5dc25bc903d6be694b70bf70a Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Wed, 10 Oct 2018 13:54:48 -0700
Subject: [PATCH 0721/1085] Don't force random ops (that generate data) to be
 on the CPU.

It can be slower to generate the data (specially for larger pieces of data) on the host, and then copy to GPU.

Before:
entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_64_channels_first"
  iters: 1000
  wall_time: 0.00114285802841
  extras {
    key: "examples_per_sec"
    value {
      double_value: 55999.9566079
    }
  }
}

entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_128_channels_first"
  iters: 1000
  wall_time: 0.00138294219971
  extras {
    key: "examples_per_sec"
    value {
      double_value: 92556.2905139
    }
  }
}

entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_256_channels_first"
  iters: 1000
  wall_time: 0.00216886901855
  extras {
    key: "examples_per_sec"
    value {
      double_value: 118033.868256
    }
  }
}

After:
entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_64_channels_first"
  iters: 1000
  wall_time: 0.000907440900803
  extras {
    key: "examples_per_sec"
    value {
      double_value: 70528.0089793
    }
  }
}

entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_128_channels_first"
  iters: 1000
  wall_time: 0.000939697980881
  extras {
    key: "examples_per_sec"
    value {
      double_value: 136213.9779
    }
  }
}

entry {
  name: "MnistEagerGanBenchmark.eager_generate_gpu_batch_256_channels_first"
  iters: 1000
  wall_time: 0.00147917103767
  extras {
    key: "examples_per_sec"
    value {
      double_value: 173069.911105
    }
  }
}

PiperOrigin-RevId: 216587562
---
 .../core/common_runtime/eager/execute.cc      | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index a52f933d75..0f46483ce5 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/env.h"
@@ -580,6 +581,22 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
 #endif
 }
 
+// These ops are not pinnable since they generate data. It can be slower to
+// generate and then copy the data instead of just generating the data on the
+// device directly.
+bool IsPinnableOp(const string& op_type) {
+  static const gtl::FlatSet<string>* unpinnable_ops = new gtl::FlatSet<string>({
+      "RandomUniform",
+      "RandomUniformInt",
+      "RandomNormal",
+      "StatelessRandomUniform",
+      "StatelessRandomUniformInt",
+      "StatelessRandomNormal",
+  });
+
+  return unpinnable_ops->find(op_type) == unpinnable_ops->end();
+}
+
 // The Op device may be updated if:
 // - A resource touching input is specified: all resource-touching ops run in
 // the device the resource is, regardless of anything else that has been
@@ -591,7 +608,8 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
 Status MaybeUpdateOpDevice(EagerOperation* op) {
   EagerContext* ctx = op->EagerContext();
   bool device_set_for_resource_variable = false;
-  bool all_inputs_eligible_for_cpu_pinning = ctx->PinSmallOpsToCPU();
+  bool all_inputs_eligible_for_cpu_pinning =
+      ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name());
 
   for (int i = 0; i < op->Inputs().size(); ++i) {
     Device* input_op_device = nullptr;
-- 
GitLab


From ef64a86fe85972acda3a5195b78486c143b51b97 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Wed, 10 Oct 2018 14:09:00 -0700
Subject: [PATCH 0722/1085] Update to new toolchain with Clang 8.0.0 r340178. 
 - Also update bazel to 0.16.1 because the new toolchain image requires it.

PiperOrigin-RevId: 216590129
---
 tensorflow/workspace.bzl     | 8 ++++----
 third_party/toolchains/BUILD | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b03af53cff..7238a74b73 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -694,11 +694,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "bazel_toolchains",
-        sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b",
-        strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1",
+        sha256 = "07dfbe80638eb1fe681f7c07e61b34b579c6710c691e49ee90ccdc6e9e75ebbb",
+        strip_prefix = "bazel-toolchains-9a111bd82161c1fbe8ed17a593ca1023fd941c70",
         urls = [
-            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
-            "https://github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
+            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
+            "https://github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
         ],
     )
 
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index bcbc4dda11..6e1416ced1 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -17,7 +17,7 @@ platform(
     remote_execution_properties = """
         properties: {
             name: "container-image"
-            value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c"
+            value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:63a0e981a4e7ce5da2a851cf063e430f72947fd999d9336b7e54e2eebe8e0bf5"
         }""",
 )
 
-- 
GitLab


From 8e468c33d22b8da61b02254d648759860f9426bb Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 10 Oct 2018 14:21:01 -0700
Subject: [PATCH 0723/1085] [TF CTC] Add ctc_beam_search_decoder_v2, which
 disables merge_repeated.

Followup of PRs #15586 and #21826.

PiperOrigin-RevId: 216592105
---
 tensorflow/python/ops/ctc_ops.py              | 40 ++++++++++++++++++-
 .../tools/api/golden/v1/tensorflow.nn.pbtxt   |  4 ++
 .../tools/api/golden/v2/tensorflow.nn.pbtxt   |  2 +-
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py
index 32d455bdad..4b0f528dfb 100644
--- a/tensorflow/python/ops/ctc_ops.py
+++ b/tensorflow/python/ops/ctc_ops.py
@@ -231,7 +231,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
           log_probabilities)
 
 
-@tf_export("nn.ctc_beam_search_decoder")
+@tf_export(v1=["nn.ctc_beam_search_decoder"])
 def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
                             top_paths=1, merge_repeated=True):
   """Performs beam search decoding on the logits given in input.
@@ -282,6 +282,44 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
       log_probabilities)
 
 
+@tf_export("nn.ctc_beam_search_decoder", v1=["nn.ctc_beam_search_decoder_v2"])
+def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100,
+                               top_paths=1):
+  """Performs beam search decoding on the logits given in input.
+
+  **Note** The `ctc_greedy_decoder` is a special case of the
+  `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but
+  that decoder is faster for this special case).
+
+  Args:
+    inputs: 3-D `float` `Tensor`, size
+      `[max_time, batch_size, num_classes]`.  The logits.
+    sequence_length: 1-D `int32` vector containing sequence lengths,
+      having size `[batch_size]`.
+    beam_width: An int scalar >= 0 (beam search beam width).
+    top_paths: An int scalar >= 0, <= beam_width (controls output size).
+
+  Returns:
+    A tuple `(decoded, log_probabilities)` where
+    decoded: A list of length top_paths, where `decoded[j]`
+      is a `SparseTensor` containing the decoded outputs:
+      `decoded[j].indices`: Indices matrix `[total_decoded_outputs[j], 2]`;
+        The rows store: `[batch, time]`.
+      `decoded[j].values`: Values vector, size `[total_decoded_outputs[j]]`.
+        The vector stores the decoded classes for beam `j`.
+      `decoded[j].dense_shape`: Shape vector, size `(2)`.
+        The shape values are: `[batch_size, max_decoded_length[j]]`.
+    log_probability: A `float` matrix `[batch_size, top_paths]` containing
+        sequence log-probabilities.
+  """
+
+  # Note, merge_repeated is an invalid optimization that is removed from the
+  # public API: it returns low probability paths.
+  return ctc_beam_search_decoder(inputs, sequence_length=sequence_length,
+                                 beam_width=beam_width, top_paths=top_paths,
+                                 merge_repeated=False)
+
+
 ops.NotDifferentiable("CTCGreedyDecoder")
 
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
index 9b28ce5746..b7f5d88fd4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt
@@ -92,6 +92,10 @@ tf_module {
     name: "ctc_beam_search_decoder"
     argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'100\', \'1\', \'True\'], "
   }
+  member_method {
+    name: "ctc_beam_search_decoder_v2"
+    argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\'], varargs=None, keywords=None, defaults=[\'100\', \'1\'], "
+  }
   member_method {
     name: "ctc_greedy_decoder"
     argspec: "args=[\'inputs\', \'sequence_length\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'True\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index 9b28ce5746..f6c5e42034 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -90,7 +90,7 @@ tf_module {
   }
   member_method {
     name: "ctc_beam_search_decoder"
-    argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\', \'merge_repeated\'], varargs=None, keywords=None, defaults=[\'100\', \'1\', \'True\'], "
+    argspec: "args=[\'inputs\', \'sequence_length\', \'beam_width\', \'top_paths\'], varargs=None, keywords=None, defaults=[\'100\', \'1\'], "
   }
   member_method {
     name: "ctc_greedy_decoder"
-- 
GitLab


From ab69b3450ff9469448b0b1c3e365e860d9ba1600 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 10 Oct 2018 14:42:08 -0700
Subject: [PATCH 0724/1085] Add conversion, tests for Reshape and Transpose

---
 tensorflow/contrib/tensorrt/BUILD             |   1 +
 .../contrib/tensorrt/convert/convert_graph.cc |   2 +
 .../contrib/tensorrt/convert/convert_nodes.cc |  57 ++++++
 .../tensorrt/test/reshape_transpose_test.py   | 188 ++++++++++++++++++
 4 files changed, 248 insertions(+)
 create mode 100644 tensorflow/contrib/tensorrt/test/reshape_transpose_test.py

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index 5c16fcb760..d756857f18 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -455,6 +455,7 @@ cuda_py_tests(
         "test/multi_connection_neighbor_engine_test.py",
         "test/neighboring_engine_test.py",
         "test/rank_two_test.py",
+        "test/reshape_transpose_test.py",
         "test/vgg_block_nchw_test.py",
         "test/vgg_block_test.py",
     ],
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 7ad9bf22d3..4d41761fdb 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -115,6 +115,8 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) {
     "Sqrt",
     "Abs",
     "Neg",
+    "Transpose",
+    "Reshape",
 #if NV_TENSORRT_MAJOR > 3
     "MatMul",
     "BatchMatMul",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 0ce891782e..e2ed372f12 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1467,6 +1467,59 @@ tensorflow::Status ConvertPlugin(Converter& ctx,
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status ConvertTranspose(
+    Converter& ctx, const tensorflow::NodeDef& node_def,
+    const std::vector<TRT_TensorOrWeights>& inputs,
+    std::vector<TRT_TensorOrWeights>* outputs) {
+  if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
+      !inputs.at(1).is_weights()) {
+    return tensorflow::errors::InvalidArgument(
+        "Input expects tensor and weights, at", node_def.name());
+  }
+  nvinfer1::ITensor* input_tensor = const_cast<nvinfer1::ITensor*>(
+      inputs.at(0).tensor());
+
+  TRT_ShapedWeights weights = inputs.at(1).weights();
+  const int* weights_ptr = static_cast<int*>(const_cast<void*>(
+      weights.GetValues()));
+  std::vector<int> perm(weights.count());
+  for (int i = 0; i < weights.count(); i++) {
+    perm[i] = weights_ptr[i];
+  }
+
+  nvinfer1::ITensor* output_tensor = ctx.TransposeTensor(input_tensor, perm);
+  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status ConvertReshape(
+    Converter& ctx, const tensorflow::NodeDef& node_def,
+    const std::vector<TRT_TensorOrWeights>& inputs,
+    std::vector<TRT_TensorOrWeights>* outputs) {
+  if (inputs.size() != 2 || !inputs.at(1).is_weights()) {
+    return tensorflow::errors::InvalidArgument(
+        "Input expects weights for shape, at", node_def.name());
+  }
+
+  TRT_ShapedWeights weights = inputs.at(1).weights();
+  const int* weights_ptr = static_cast<int*>(const_cast<void*>(
+      weights.GetValues()));
+  nvinfer1::Dims new_shape;
+  // Ignore first (batch) dimension because TRT abstracts batch away
+  new_shape.nbDims = weights.count() - 1;
+  for (int i = 1; i < weights.count(); i++) {
+    new_shape.d[i-1] = weights_ptr[i];
+  }
+
+  const nvinfer1::ITensor* output_tensor;
+  TFTRT_RETURN_ERROR_IF_FALSE(
+      PrepareTensorForShape(ctx, inputs.at(0), new_shape, &output_tensor),
+      node_def.name());
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
+  return tensorflow::Status::OK();
+}
+
 tensorflow::Status ConvertConv2D(Converter& ctx,
                                  const tensorflow::NodeDef& node_def,
                                  const std::vector<TRT_TensorOrWeights>& inputs,
@@ -2666,6 +2719,10 @@ void Converter::register_op_converters() {
   op_registry_["Sqrt"] = ConvertUnary;
   op_registry_["Abs"] = ConvertUnary;
   op_registry_["Neg"] = ConvertUnary;
+
+  op_registry_["Transpose"] = ConvertTranspose;
+  op_registry_["Reshape"] = ConvertReshape;
+
 #if NV_TENSORRT_MAJOR == 3
   op_registry_["Mean"] = ConvertReducePool;
 #endif
diff --git a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
new file mode 100644
index 0000000000..a81e3c7bc6
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
@@ -0,0 +1,188 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Basic tests for TF-TensorRT integration."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.tensorrt.python import trt_convert
+from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+
+
+class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    # TODO(aaroey): test graph with different dtypes.
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [-1, 24*24*2])
+        print('RESHAPE SHAPE', reshape.get_shape().as_list())
+        identity = array_ops.identity(reshape, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(100, 24*24*2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return ["my_trt_op_0"]
+
+class ReshapeInverseTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    # TODO(aaroey): test graph with different dtypes.
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [-1, 24*24*2])
+        reshape = array_ops.reshape(reshape, [-1, 24, 24, 2])
+        identity = array_ops.identity(reshape, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(100, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return ["my_trt_op_0"]
+
+class ManyReshapeTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    # TODO(aaroey): test graph with different dtypes.
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [-1, 24*24, 2])
+        reshape = array_ops.reshape(reshape, [-1, 24*2, 24])
+        reshape = array_ops.reshape(reshape, [-1, 24, 24*2])
+        reshape = array_ops.reshape(reshape, [-1, 6, 4, 24, 2])
+        reshape = array_ops.reshape(reshape, [-1, 6, 4, 6, 4, 2])
+        reshape = array_ops.reshape(reshape, [-1, 6, 4, 6, 4, 2, 1])
+        reshape = array_ops.reshape(reshape, [-1, 24, 24, 2])
+        identity = array_ops.identity(reshape, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(100, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return ["my_trt_op_0"]
+
+class SimpleTransposeTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    # TODO(aaroey): test graph with different dtypes.
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        # to NCHW
+        transpose = array_ops.transpose(inp, [0, 3, 1, 2])
+        identity = array_ops.identity(transpose, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(100, 2, 24, 24)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return ["my_trt_op_0"]
+
+class TransposeInverseTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    # TODO(aaroey): test graph with different dtypes.
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        # to NCHW
+        transpose = array_ops.transpose(inp, [0, 3, 1, 2])
+        # back to NHWC
+        transpose = array_ops.transpose(transpose, [0, 2, 3, 1])
+        identity = array_ops.identity(transpose, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(100, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return ["my_trt_op_0"]
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 307ef84f3c53bb35222182afaf40385fae88ec73 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 10 Oct 2018 14:44:05 -0700
Subject: [PATCH 0725/1085] Remove leftover todo comments

---
 tensorflow/contrib/tensorrt/test/reshape_transpose_test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
index a81e3c7bc6..81dad5e1a4 100644
--- a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
+++ b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
@@ -36,7 +36,6 @@ class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
-    # TODO(aaroey): test graph with different dtypes.
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
@@ -65,7 +64,6 @@ class ReshapeInverseTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
-    # TODO(aaroey): test graph with different dtypes.
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
@@ -94,7 +92,6 @@ class ManyReshapeTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
-    # TODO(aaroey): test graph with different dtypes.
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
@@ -128,7 +125,6 @@ class SimpleTransposeTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
-    # TODO(aaroey): test graph with different dtypes.
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
@@ -157,7 +153,6 @@ class TransposeInverseTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
-    # TODO(aaroey): test graph with different dtypes.
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
-- 
GitLab


From 5275bd0ae0306ac5881c5325656a9e6e4a963df8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 14:53:37 -0700
Subject: [PATCH 0726/1085] create a reusable function for initializing
 tensorflow from tf lite.

PiperOrigin-RevId: 216597926
---
 tensorflow/contrib/lite/testing/BUILD         | 21 +++++++++++--
 .../contrib/lite/testing/init_tensorflow.cc   | 31 +++++++++++++++++++
 .../contrib/lite/testing/init_tensorflow.h    | 26 ++++++++++++++++
 .../lite/testing/tflite_diff_example_test.cc  |  6 ++--
 4 files changed, 79 insertions(+), 5 deletions(-)
 create mode 100644 tensorflow/contrib/lite/testing/init_tensorflow.cc
 create mode 100644 tensorflow/contrib/lite/testing/init_tensorflow.h

diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index b476445b3a..f7f812343b 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -296,6 +296,23 @@ cc_test(
     ],
 )
 
+cc_library(
+    name = "init_tensorflow",
+    srcs = [
+        "init_tensorflow.cc",
+    ],
+    hdrs = [
+        "init_tensorflow.h",
+    ],
+    visibility = [
+        "//tensorflow/contrib/lite/java/src/main/native:__subpackages__",
+        "//tensorflow/contrib/lite/testing:__subpackages__",
+    ],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "tflite_diff_util",
     srcs = ["tflite_diff_util.cc"],
@@ -348,9 +365,9 @@ tf_cc_test(
         "tflite_not_portable",
     ],
     deps = [
+        ":init_tensorflow",
         ":tflite_diff_flags",
         ":tflite_diff_util",
-        "//tensorflow/core:lib",
     ],
 )
 
@@ -358,9 +375,9 @@ cc_binary(
     name = "tflite_diff",
     srcs = ["tflite_diff_example_test.cc"],
     deps = [
+        ":init_tensorflow",
         ":tflite_diff_flags",
         ":tflite_diff_util",
-        "//tensorflow/core:lib",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.cc b/tensorflow/contrib/lite/testing/init_tensorflow.cc
new file mode 100644
index 0000000000..f3dcf620a2
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/init_tensorflow.cc
@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
+
+#include <cstdlib>
+#include <cstring>
+
+#include "tensorflow/core/platform/init_main.h"
+
+namespace tflite {
+void InitTensorFlow() {
+  static const char* kFakeName = "fake program name";
+  int argc = 1;
+  char* fake_name_copy = strdup(kFakeName);
+  char** argv = &fake_name_copy;
+  ::tensorflow::port::InitMain(kFakeName, &argc, &argv);
+  free(fake_name_copy);
+}
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.h b/tensorflow/contrib/lite/testing/init_tensorflow.h
new file mode 100644
index 0000000000..2cc89bbbca
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/init_tensorflow.h
@@ -0,0 +1,26 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_
+#define TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_
+
+namespace tflite {
+
+// Initializes tensorflow's libraries. Note that this simulates an empty
+// command line, so flags are not initialized.
+void InitTensorFlow();
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_
diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
index e85d9c525a..49696ac76b 100644
--- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
+++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc
@@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
 #include "tensorflow/contrib/lite/testing/tflite_diff_flags.h"
 #include "tensorflow/contrib/lite/testing/tflite_diff_util.h"
-#include "tensorflow/core/platform/init_main.h"
 
 int main(int argc, char** argv) {
+  ::tflite::InitTensorFlow();  // For Flex support.
+
   ::tflite::testing::DiffOptions options =
       ::tflite::testing::ParseTfliteDiffFlags(&argc, argv);
   if (options.tensorflow_model.empty()) return 1;
 
-  ::tensorflow::port::InitMain("usage", &argc, &argv);
-
   int failure_count = 0;
   for (int i = 0; i < options.num_runs_per_pass; i++) {
     if (!tflite::testing::RunDiffTest(options, /*num_invocations=*/1)) {
-- 
GitLab


From a584dc9020e41031c13f65625c55b715cc5ca5dd Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Wed, 10 Oct 2018 14:55:22 -0700
Subject: [PATCH 0727/1085] Automated rollback of commit
 6aebb0866718cae2c921e875f3fd74573ee9acc8

PiperOrigin-RevId: 216598193
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 8d15c857f8..367606ef27 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1794,19 +1794,18 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
         summary_writer=summary_writer)
 
   def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_steps_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_steps_per_sec
+    global_step_per_sec = elapsed_steps / elapsed_time
+    examples_per_sec = self._batch_size * global_step_per_sec
     if self._summary_writer is not None:
       global_step_summary = Summary(value=[
-          Summary.Value(tag='global_steps/sec',
-                        simple_value=global_steps_per_sec)
+          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
       ])
       example_summary = Summary(value=[
           Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
       ])
       self._summary_writer.add_summary(global_step_summary, global_step)
       self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_steps/sec: %g', global_steps_per_sec)
+    logging.info('global_step/sec: %g', global_step_per_sec)
     logging.info('examples/sec: %g', examples_per_sec)
 
 
-- 
GitLab


From 4fe9033756aca1f31e087c7fee0849120d6629bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 15:04:08 -0700
Subject: [PATCH 0728/1085] Bump open source abseil-cpp revision to
 f340f773edab951656b19b6f1a77c964a78ec4c2

PiperOrigin-RevId: 216599799
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 7238a74b73..6229e01bbe 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -117,11 +117,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "cd1650daecfdd5591502bb017c70777c959cf604a962352bd5312bef8d78a8c6",
-        strip_prefix = "abseil-cpp-445998d7ac4e5d3c50411d377e3b50e960d2d6c2",
+        sha256 = "2809f7d97d126ad341c3126711df7bae6336278d959144db293a9b2756b726a8",
+        strip_prefix = "abseil-cpp-f340f773edab951656b19b6f1a77c964a78ec4c2",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/445998d7ac4e5d3c50411d377e3b50e960d2d6c2.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz",
         ],
     )
 
-- 
GitLab


From 6905ffd1cf2bea18af3486ef831dd174c3d95b7a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 15:05:57 -0700
Subject: [PATCH 0729/1085] [XLA] Update TODO references related to all-reduce.

PiperOrigin-RevId: 216600146
---
 tensorflow/compiler/xla/client/xla_builder.h      | 4 ++--
 tensorflow/compiler/xla/service/hlo_instruction.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 933c0e7b44..2916ac1b2a 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -699,7 +699,7 @@ class XlaBuilder {
   // the same channel_id, they will be 'Allreduce'd. If empty, Allreduce will
   // not be applied cross modules.
   //
-  // TODO(b/79737069): Rename this to AllReduce when it's ready to use.
+  // TODO(b/117564385): Rename this to AllReduce when it's ready to use.
   XlaOp CrossReplicaSum(
       const XlaOp& operand, const XlaComputation& computation,
       absl::Span<const ReplicaGroup> replica_groups = {},
@@ -1864,7 +1864,7 @@ XlaOp CrossReplicaSum(const XlaOp& operand,
 // same channel_id, they will be 'Allreduce'd. If empty, Allreduce will not be
 // applied cross modules.
 //
-// TODO(b/79737069): Rename this to AllReduce when it's ready to use.
+// TODO(b/117564385): Rename this to AllReduce when it's ready to use.
 XlaOp CrossReplicaSum(
     const XlaOp& operand, const XlaComputation& computation,
     absl::Span<const ReplicaGroup> replica_groups = {},
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 81fe1d0a9a..15a4da8dbe 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -463,7 +463,7 @@ class HloInstruction {
   // the same all_reduce_id, they will be 'Allreduce'd. If empty, Allreduce will
   // not be applied cross modules.
   //
-  // TODO(b/79737069): Rename this to AllReduce.
+  // TODO(b/117564385): Rename this to AllReduce.
   static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
-- 
GitLab


From ea38b380b945c5a0462c9125787439deda7f6327 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 10 Oct 2018 15:18:22 -0700
Subject: [PATCH 0730/1085] First draft of tf.compat.v1.wrap_function.

PiperOrigin-RevId: 216602214
---
 tensorflow/python/eager/BUILD                 | 26 ++++++
 tensorflow/python/eager/function.py           | 53 +++++++----
 tensorflow/python/eager/wrap_function.py      | 93 +++++++++++++++++++
 tensorflow/python/eager/wrap_function_test.py | 59 ++++++++++++
 tensorflow/python/framework/ops.py            |  6 +-
 5 files changed, 215 insertions(+), 22 deletions(-)
 create mode 100644 tensorflow/python/eager/wrap_function.py
 create mode 100644 tensorflow/python/eager/wrap_function_test.py

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index cae809a7c3..72cf97dca3 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -56,6 +56,7 @@ py_library(
         ":graph_only_ops",
         ":tape",
         ":test",
+        ":wrap_function",
         "//tensorflow/python:pywrap_tensorflow",
     ],
 )
@@ -413,3 +414,28 @@ py_test(
         "//tensorflow/python:framework_ops",
     ],
 )
+
+py_library(
+    name = "wrap_function",
+    srcs = ["wrap_function.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":context",
+        ":function",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:template",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/training/checkpointable:base",
+    ],
+)
+
+py_test(
+    name = "wrap_function_test",
+    srcs = ["wrap_function_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":wrap_function",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_ops",
+    ],
+)
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index e399a4abb5..6ef07b71a9 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -537,7 +537,7 @@ class Function(object):
   is differentiable under `tf.GradientTape` objects.
   """
 
-  def __init__(self, func_graph, attrs=None):
+  def __init__(self, func_graph, attrs=None, signature=None):
     """Initialize a Function.
 
     Args:
@@ -563,6 +563,7 @@ class Function(object):
         _inference_name(self._func_graph.name), self._func_graph,
         self._func_graph.inputs, self._func_graph.outputs, self._attrs)
     self._backward_graph_function = None
+    self._signature = signature
 
   def __call__(self, *args):
     """Executes the wrapped function.
@@ -600,6 +601,10 @@ class Function(object):
         tensor_inputs.append(arg.handle)
       elif isinstance(arg, ops.Tensor):
         tensor_inputs.append(arg)
+      elif (self._signature is not None and
+            isinstance(self._signature[i], tensor_spec.TensorSpec)):
+        tensor_inputs.append(
+            ops.convert_to_tensor(arg, self._signature[i].dtype))
       else:
         raise ValueError("All inputs to `Function`s must be Tensors; "
                          "on invocation of %s, the %d-th input (%s) was not a "
@@ -841,13 +846,30 @@ def _get_defun_inputs_from_args(args):
   return nest.pack_sequence_as(args, function_inputs)
 
 
+def check_mutation(n1, n2):
+  """Check if two list of arguments are exactly the same."""
+  errmsg = ("Function to be traced should not modify structure of input "
+            "arguments. Check if your function has list and dictionary "
+            "operations that alter input arguments, "
+            "such as `list.pop`, `list.append`")
+  try:
+    nest.assert_same_structure(n1, n2)
+  except ValueError:
+    raise ValueError(errmsg)
+
+  for arg1, arg2 in zip(nest.flatten(n1), nest.flatten(n2)):
+    if arg1 is not arg2:
+      raise ValueError(errmsg)
+
+
 def func_graph_from_py_func(name,
                             python_func,
                             args,
                             kwargs,
                             signature=None,
                             func_graph=None,
-                            experimental_autograph=False):
+                            experimental_autograph=False,
+                            add_control_dependencies=True):
   """Returns a `FuncGraph` generated from `python_func`.
 
   Args:
@@ -866,6 +888,9 @@ def func_graph_from_py_func(name,
       this graph else a new one is built and returned.
     experimental_autograph: whether to use autograph to compile `python_func`.
       See https://www.tensorflow.org/guide/autograph for more information.
+    add_control_dependencies: If True, automatically adds control dependencies
+      to ensure program order matches execution order and stateful ops always
+      execute.
 
   Returns:
     A FuncGraph.
@@ -877,7 +902,11 @@ def func_graph_from_py_func(name,
   if func_graph is None:
     func_graph = FuncGraph(name)
   assert isinstance(func_graph, FuncGraph)
-  with func_graph.as_default(), AutomaticControlDependencies() as a:
+  if add_control_dependencies:
+    control_manager = AutomaticControlDependencies
+  else:
+    control_manager = ops.NullContextmanager
+  with func_graph.as_default(), control_manager() as a:
     variable_scope.get_variable_scope().set_use_resource(True)
 
     if signature is not None:
@@ -906,7 +935,8 @@ def func_graph_from_py_func(name,
             "must return zero or more Tensors; in compilation of %s, found "
             "return value of type %s, which is not a Tensor." %
             (str(python_func), type(x)))
-      x = a.mark_as_return(x)
+      if add_control_dependencies:
+        x = a.mark_as_return(x)
       return x
 
     this_tape = tape.push_new_tape()
@@ -922,21 +952,6 @@ def func_graph_from_py_func(name,
       # invariant: `func_outputs` contains only Tensors and `None`s.
       func_outputs = nest.map_structure(convert, func_outputs)
 
-      def check_mutation(n1, n2):
-        """Check if two list of arguments are exactly the same."""
-        errmsg = ("Function to be traced should not modify structure of input "
-                  "arguments. Check if your function has list and dictionary "
-                  "operations that alter input arguments, "
-                  "such as `list.pop`, `list.append`")
-        try:
-          nest.assert_same_structure(n1, n2)
-        except ValueError:
-          raise ValueError(errmsg)
-
-        for arg1, arg2 in zip(nest.flatten(n1), nest.flatten(n2)):
-          if arg1 is not arg2:
-            raise ValueError(errmsg)
-
       check_mutation(func_args_before, func_args)
       check_mutation(func_kwargs_before, func_kwargs)
     finally:
diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py
new file mode 100644
index 0000000000..2a9a164709
--- /dev/null
+++ b/tensorflow/python/eager/wrap_function.py
@@ -0,0 +1,93 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=unidiomatic-typecheck
+"""Prototype decorator for defining legacy-graph-mode functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import function
+from tensorflow.python.ops import variable_scope
+
+
+class VariableHolder(object):
+  """Holds variables for a python function."""
+
+  def __init__(self, fn):
+    self._fn = fn
+    self._variables = []
+
+  def variable_creator_scope(self, next_creator, **kwargs):
+    v = next_creator(**kwargs)
+    self._variables.append(v)
+    return v
+
+  def __call__(self, *args, **kwargs):
+    with variable_scope.variable_creator_scope(self.variable_creator_scope):
+      return self._fn(*args, **kwargs)
+
+
+def wrap_function(fn, signature, name=None):
+  """Wraps the TF 1.x function fn into a graph function.
+
+  The python function `fn` will be called once with symbolic arguments specified
+  in the `signature`, traced, and turned into a graph function. Any variables
+  created by `fn` will be owned by the object returned by `wrap_function`. The
+  resulting graph function can be called with tensors which match the
+  signature.
+
+  ```python
+  def f(x, do_add):
+    v = tf.Variable(5.0)
+    if do_add:
+      op = v.assign_add(x)
+    else:
+      op = v.assign_sub(x)
+    with tf.control_dependencies([op]):
+      return v.read_value()
+
+  f_add = tf.compat.v1.wrap_function(f, [tf.TensorSpec((), tf.float32), True])
+
+  assert float(f_add(1.0)) == 6.0
+  assert float(f_add(1.0)) == 7.0
+
+  # Can call tf.compat.v1.wrap_function again to get a new trace, a new set
+  # of variables, and possibly different non-template arguments.
+  f_sub= tf.compat.v1.wrap_function(f, [tf.TensorSpec((), tf.float32), False])
+
+  assert float(f_sub(1.0)) == 4.0
+  assert float(f_sub(1.0)) == 3.0
+  ```
+
+  Args:
+    fn: python function to be wrapped
+    signature: the placeholder and python arguments to be passed to the
+      wrapped function
+    name: Optional. The name of the function.
+
+  Returns:
+    the wrapped graph function.
+  """
+  holder = VariableHolder(fn)
+  fn = function.Function(
+      function.func_graph_from_py_func(
+          name,
+          holder,
+          args=None, kwargs=None, signature=signature,
+          add_control_dependencies=False),
+      signature=signature)
+  fn._variable_holder = holder
+  return fn
diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py
new file mode 100644
index 0000000000..0690358491
--- /dev/null
+++ b/tensorflow/python/eager/wrap_function_test.py
@@ -0,0 +1,59 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from tensorflow.python.eager import wrap_function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+class WrapFunctionTest(test.TestCase):
+
+  def testDocString(self):
+
+    def f(x, do_add):
+      v = variables.Variable(5.0)
+      if do_add:
+        op = v.assign_add(x)
+      else:
+        op = v.assign_sub(x)
+      with ops.control_dependencies([op]):
+        return v.read_value()
+
+    f_add = wrap_function.wrap_function(
+        f, [tensor_spec.TensorSpec((), dtypes.float32), True])
+
+    self.assertAllEqual(f_add(1.0), 6.0)
+    self.assertAllEqual(f_add(1.0), 7.0)
+
+    # Can call tf.compat.v1.wrap_function again to get a new trace, a new set
+    # of variables, and possibly different non-template arguments.
+    f_sub = wrap_function.wrap_function(
+        f, [tensor_spec.TensorSpec((), dtypes.float32), False])
+
+    self.assertAllEqual(f_sub(1.0), 4.0)
+    self.assertAllEqual(f_sub(1.0), 3.0)
+
+
+if __name__ == '__main__':
+  ops.enable_eager_execution()
+  test.main()
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 140bd098a6..50ab118fd6 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -101,7 +101,7 @@ class _UserDeviceSpec(object):
       self.function = pydev.merge_device(self._device_name_or_function)
 
 
-class _NullContextmanager(object):
+class NullContextmanager(object):
 
   def __enter__(self):
     pass
@@ -4951,7 +4951,7 @@ def _colocate_with_for_gradient(op, gradient_uid, ignore_existing=False):
     if op is not None:
       return device(op.device)
     else:
-      return _NullContextmanager()
+      return NullContextmanager()
   else:
     default_graph = get_default_graph()
     if isinstance(op, EagerTensor):
@@ -4996,7 +4996,7 @@ def control_dependencies(control_inputs):
       for control in control_inputs:
         if callable(control):
           control()
-    return _NullContextmanager()
+    return NullContextmanager()
   else:
     return get_default_graph().control_dependencies(control_inputs)
 
-- 
GitLab


From 20c47de2eabec47391e19f8fe4da5d83f0f8ab85 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 10 Oct 2018 15:19:12 -0700
Subject: [PATCH 0731/1085] Internal change

PiperOrigin-RevId: 216602362
---
 tensorflow/contrib/lite/delegates/flex/BUILD | 38 +++++++++++++++++---
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index 9b89ed4f84..2f866eaecb 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -42,8 +42,31 @@ tf_cc_test(
     ],
 )
 
+# Delegate implementation that pulls in the standard set of TensorFlow ops and
+# kernels.
 cc_library(
     name = "delegate",
+    hdrs = [
+        "delegate.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":delegate_only_runtime",
+    ] + select({
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+        "//conditions:default": [
+            "//tensorflow/core:tensorflow",
+        ],
+    }),
+    alwayslink = 1,
+)
+
+# Delegate implementation that does *not* pull in the standard set of TensorFlow
+# ops and kernels.
+cc_library(
+    name = "delegate_only_runtime",
     srcs = [
         "delegate.cc",
     ],
@@ -134,12 +157,12 @@ cc_library(
         # set of core TensorFlow kernels. We may want to revisit this dependency
         # to allow selective registration via build targets.
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
+            "//tensorflow/core:lib",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:framework",
-            "//tensorflow/core:tensorflow",
         ],
     }),
 )
@@ -153,7 +176,14 @@ tf_cc_test(
         ":kernel",
         ":test_util",
         "@com_google_googletest//:gtest",
-    ],
+    ] + select({
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+        "//conditions:default": [
+            "//tensorflow/core:tensorflow",
+        ],
+    }),
 )
 
 cc_library(
@@ -180,7 +210,7 @@ cc_library(
         "//tensorflow/contrib/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
-- 
GitLab


From 0c284043cbdaed1f88c43be3a222d3bc1c235e89 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 10 Oct 2018 15:35:14 -0700
Subject: [PATCH 0732/1085] [TF:XLA] Don't aggregate writes to TensorArrays
 that aren't gradient TensorArrays. In the non-XLA case it is an error to
 write the same entry multiple times. This saves reading the existing value
 from memory in cases where it should not be meaningful.

PiperOrigin-RevId: 216604960
---
 .../compiler/tf2xla/kernels/tensor_array_ops.cc     | 13 ++++++++++---
 tensorflow/compiler/tf2xla/xla_context.cc           |  3 ++-
 tensorflow/compiler/tf2xla/xla_resource.cc          | 13 +++++++++----
 tensorflow/compiler/tf2xla/xla_resource.h           | 10 +++++++++-
 4 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
index 6cdfaf4d97..06a560d947 100644
--- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc
@@ -223,9 +223,16 @@ class TensorArrayWriteOp : public XlaOpKernel {
     slice_shape.InsertDim(0, 1LL);
     auto update = xla::Reshape(value, slice_shape.dim_sizes());
 
-    xla::XlaOp written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(),
-                                         start_indices, dtype_);
-
+    xla::XlaOp written;
+    if (resource->tensor_array_multiple_writes_aggregate()) {
+      written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(),
+                                start_indices, dtype_);
+    } else {
+      // TODO(b/117569591): Ideally we would report an error in the case that we
+      // see multiple writes to the same offset. Unfortunately there is no way
+      // to report errors at the moment, so we silently overwrite.
+      written = xla::DynamicUpdateSlice(ta, update, start_indices);
+    }
     OP_REQUIRES_OK(ctx, resource->SetValue(written));
     ctx->SetOutput(0, flow);
   }
diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc
index f247570d72..2095a6b809 100644
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc
@@ -138,7 +138,8 @@ Status XlaContext::CreateResource(
     const std::set<string>& tensor_array_gradients, XlaResource** resource) {
   resources_.emplace_back(
       new XlaResource(kind, arg_num, std::move(name), type, std::move(shape),
-                      handle, tensor_array_size, tensor_array_gradients));
+                      handle, tensor_array_size, tensor_array_gradients,
+                      /*tensor_array_multiple_writes_aggregate=*/false));
   *resource = resources_.back().get();
   return Status::OK();
 }
diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc
index 56c2e01055..63b09c8f02 100644
--- a/tensorflow/compiler/tf2xla/xla_resource.cc
+++ b/tensorflow/compiler/tf2xla/xla_resource.cc
@@ -29,7 +29,8 @@ namespace tensorflow {
 XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type,
                          TensorShape shape, const xla::XlaOp& initial_value,
                          int64 tensor_array_size,
-                         const std::set<string>& tensor_array_gradients)
+                         const std::set<string>& tensor_array_gradients,
+                         bool tensor_array_multiple_writes_aggregate)
     : kind_(kind),
       arg_num_(arg_num),
       name_(std::move(name)),
@@ -37,14 +38,17 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type,
       shape_(std::move(shape)),
       value_(initial_value),
       initial_value_(initial_value),
-      tensor_array_size_(tensor_array_size) {
+      tensor_array_size_(tensor_array_size),
+      tensor_array_multiple_writes_aggregate_(
+          tensor_array_multiple_writes_aggregate) {
   CHECK(kind_ != kInvalid);
 
   for (const string& gradient : tensor_array_gradients) {
     tensor_array_gradients_[gradient].reset(new XlaResource(
         /*kind=*/kTensorArray, /*arg_num=*/-1,
         /*name=*/absl::StrCat("TensorArrayGrad: ", name_), type_, shape_,
-        xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{}));
+        xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{},
+        /*tensor_array_multiple_writes_aggregate=*/true));
   }
 }
 
@@ -137,7 +141,8 @@ Status XlaResource::GetOrCreateTensorArrayGradient(const string& source,
         new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1,
                         /*name=*/absl::StrCat("TensorArrayGrad: ", name_),
                         type_, shape_, gradient_value, tensor_array_size_,
-                        /*tensor_array_gradients=*/{}));
+                        /*tensor_array_gradients=*/{},
+                        /*tensor_array_multiple_writes_aggregate=*/true));
   }
   *gradient_out = gradient.get();
   return Status::OK();
diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h
index 2438490be1..aa9ce1b171 100644
--- a/tensorflow/compiler/tf2xla/xla_resource.h
+++ b/tensorflow/compiler/tf2xla/xla_resource.h
@@ -39,7 +39,8 @@ class XlaResource {
   XlaResource(Kind kind, int arg_num, string name, DataType type,
               TensorShape shape, const xla::XlaOp& initial_value,
               int64 tensor_array_size,
-              const std::set<string>& tensor_array_gradients);
+              const std::set<string>& tensor_array_gradients,
+              bool tensor_array_multiple_writes_aggregate);
 
   XlaResource(const XlaResource&) = delete;
   XlaResource(XlaResource&&) = delete;
@@ -113,6 +114,8 @@ class XlaResource {
                      const xla::XlaOp& pack, xla::XlaBuilder* builder);
 
   // TensorArray and Stack specific fields
+  // TODO(phawkins): refactor this code to use subclasses, rather than putting
+  // kind-specific fields in XlaResource.
 
   // 'tensor_array_size' stores the expected size of the TensorArray or Stack.
   // We need to store this since sometimes TensorArrays must be initialized
@@ -121,6 +124,10 @@ class XlaResource {
   int64 tensor_array_size() const { return tensor_array_size_; }
   void set_tensor_array_size(int64 size) { tensor_array_size_ = size; }
 
+  bool tensor_array_multiple_writes_aggregate() const {
+    return tensor_array_multiple_writes_aggregate_;
+  }
+
   // 'tensor_array_gradient' is a map from TensorArrayGradV3 'source' attributes
   // to an XlaResource containing the gradient TensorArrays. We store a pointer
   // here since there should only be one gradient TensorArray per 'source'
@@ -143,6 +150,7 @@ class XlaResource {
   xla::XlaOp initial_value_;
 
   int64 tensor_array_size_ = -1;
+  bool tensor_array_multiple_writes_aggregate_ = false;
 
   std::map<string, std::unique_ptr<XlaResource>> tensor_array_gradients_;
 };
-- 
GitLab


From a67fda82206665d195398f68c2b0fad49a4cfccd Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 10 Oct 2018 15:40:39 -0700
Subject: [PATCH 0733/1085] Only acquire the flex delegate when flex ops
 present

PiperOrigin-RevId: 216605828
---
 tensorflow/contrib/lite/model.cc | 37 +++++++++++++++++++++++++-------
 tensorflow/contrib/lite/model.h  |  1 +
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc
index d7b109ac1a..a8a010be1a 100644
--- a/tensorflow/contrib/lite/model.cc
+++ b/tensorflow/contrib/lite/model.cc
@@ -384,6 +384,33 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
   return status;
 }
 
+TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter) {
+  // TODO(b/117561550): Move flex delegate application to the OpResolver.
+  if (AcquireFlexDelegate == nullptr) {
+    return kTfLiteOk;
+  }
+
+  bool has_flex_op = false;
+  for (const auto* registration : flatbuffer_op_index_to_registration_) {
+    if ((registration->builtin_code == BuiltinOperator_CUSTOM) &&
+        IsFlexOp(registration->custom_name)) {
+      has_flex_op = true;
+      break;
+    }
+  }
+
+  if (!has_flex_op) {
+    return kTfLiteOk;
+  }
+
+  if (auto flex_delegate = AcquireFlexDelegate()) {
+    return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate),
+                                                /*allow_dynamic_tensors=*/true);
+  }
+
+  return kTfLiteOk;
+}
+
 TfLiteStatus InterpreterBuilder::operator()(
     std::unique_ptr<Interpreter>* interpreter) {
   return operator()(interpreter, /*num_threads=*/-1);
@@ -466,14 +493,8 @@ TfLiteStatus InterpreterBuilder::operator()(
   }
   (**interpreter).SetVariables(std::move(variables));
 
-  // TODO(b/116667551): Only create the flex delegate if the model has flex ops.
-  if (AcquireFlexDelegate != nullptr) {
-    if (auto flex_delegate = AcquireFlexDelegate()) {
-      (**interpreter)
-          .ModifyGraphWithDelegate(std::move(flex_delegate),
-                                   /*allow_dynamic_tensors=*/true);
-    }
-  }
+  if (ApplyDelegates(interpreter->get()) != kTfLiteOk)
+    return cleanup_and_error();
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h
index 6abdfcd079..9505824dcc 100644
--- a/tensorflow/contrib/lite/model.h
+++ b/tensorflow/contrib/lite/model.h
@@ -173,6 +173,7 @@ class InterpreterBuilder {
       const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
       const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors,
       Interpreter* interpreter);
+  TfLiteStatus ApplyDelegates(Interpreter* interpreter);
 
   const ::tflite::Model* model_;
   const OpResolver& op_resolver_;
-- 
GitLab


From dd03b7d2a55b5501f3fcabc4ff0701ac2e9b3364 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 15:54:35 -0700
Subject: [PATCH 0734/1085] Don't crash an XRT server if a client leaks a
 compilation reference.

PiperOrigin-RevId: 216608167
---
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 21 +++++++++++++++++++
 .../compiler/xrt/xrt_compilation_cache.cc     | 11 +++++++---
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index f590fbf0d9..9fc01e6304 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -437,6 +437,27 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
 }
 
+TEST(RawApiTest, LeakCompilationReference) {
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2});
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2});
+  *shapes->mutable_result() = xla::ShapeUtil::MakeTupleShape(
+      {xla::ShapeUtil::MakeShape(xla::F32, {2})});
+  StoreComputationSnapshot(AddAndTuple(), c.mutable_hlo_snapshot());
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({c_handle}, &outputs));
+}
+
 }  // namespace
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.cc b/tensorflow/compiler/xrt/xrt_compilation_cache.cc
index 4844c7fb71..31bb476895 100644
--- a/tensorflow/compiler/xrt/xrt_compilation_cache.cc
+++ b/tensorflow/compiler/xrt/xrt_compilation_cache.cc
@@ -46,12 +46,17 @@ XRTCompilationCache::XRTCompilationCache(int max_number_of_entries)
 
 XRTCompilationCache::~XRTCompilationCache() {
   VLOG(1) << "XRTCompilationCache::~XRTCompilationCache()";
+  // A buggy client may be holding onto a reference, or a client might have
+  // crashed while holding onto a reference. In either case, discard all
+  // outstanding client references to avoid leaking storage.
+  for (const auto& entry : entries_by_uid_) {
+    while (!entry.second->RefCountIsOne()) {
+      entry.second->Unref();
+    }
+  }
   while (!entries_by_last_use_.empty()) {
     MarkOldestEntryForEviction();
   }
-  // By the time the cache is deleted all reference holders should have already
-  // been deleted, since they were holding references to the cache. So all
-  // entries should be gone at this point.
   CHECK_EQ(cache_.size(), 0);
   CHECK_EQ(entries_by_uid_.size(), 0);
   CHECK_EQ(cache_entries_, 0);
-- 
GitLab


From 883083715e6cfe400d0c0c08dff73b924cc1b72f Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Wed, 10 Oct 2018 16:27:58 -0700
Subject: [PATCH 0735/1085] [XLA] Don't overestimate the amount of bytes a
 gather reads

PiperOrigin-RevId: 216613367
---
 .../compiler/xla/service/hlo_cost_analysis.cc |  5 ++++
 .../xla/service/hlo_cost_analysis_test.cc     | 25 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index a502fff9a0..23ab4cda93 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -664,6 +664,11 @@ Status HloCostAnalysis::HandleConditional(const HloInstruction* conditional) {
 }
 
 Status HloCostAnalysis::HandleGather(const HloInstruction* gather) {
+  // Gather doesn't read the whole input buffer, it's equivalent to a copy the
+  // size of the output shape and a read of the gather indices.
+  current_properties_[kBytesAccessedKey] =
+      GetShapeSize(gather->shape()) * 2 +
+      GetShapeSize(gather->operand(1)->shape());
   // Gather does not issue any flops.
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
index d76ce9ecbc..802cdfc9e4 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc
@@ -556,5 +556,30 @@ TEST_F(HloCostAnalysisTest, DynamicUpdateSlice) {
   EXPECT_EQ(analysis.bytes_accessed(), 8);
 }
 
+TEST_F(HloCostAnalysisTest, Gather) {
+  // Test the analysis on a gather.
+  XlaBuilder builder("gather");
+  Shape operand_shape = ShapeUtil::MakeShape(S32, {3, 3});
+  Shape indices_shape = ShapeUtil::MakeShape(S32, {2});
+
+  auto operand = Parameter(&builder, 0, operand_shape, "operand");
+  auto indices = Parameter(&builder, 1, indices_shape, "indices");
+  GatherDimensionNumbers dim_numbers;
+  dim_numbers.add_offset_dims(1);
+  dim_numbers.add_collapsed_slice_dims(0);
+  dim_numbers.add_start_index_map(0);
+  dim_numbers.set_index_vector_dim(1);
+  Gather(operand, indices, dim_numbers, {1, 3});
+
+  auto hlo_module = BuildHloGraph(&builder);
+
+  // Run HLO cost analysis.
+  HloCostAnalysis analysis(ShapeSize);
+  ASSERT_IS_OK(
+      hlo_module->entry_computation()->root_instruction()->Accept(&analysis));
+
+  EXPECT_EQ(analysis.bytes_accessed(), 56);
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 9b093e3428c9a24b7c23d7136f45d925eec13258 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 10 Oct 2018 16:45:11 -0700
Subject: [PATCH 0736/1085] Better handle the non-ok status and clean code

---
 .../kernels/data/matching_files_dataset_op.cc | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 0158b31d92..23587a2d00 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -100,9 +100,14 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                              std::vector<Tensor>* out_tensors,
                              bool* end_of_sequence) override {
         mutex_lock l(mu_);
-        Status ret;
         FileSystem* fs;
 
+        TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(
+            dataset()->patterns_[(current_pattern_index_ > 0)
+                                     ? current_pattern_index_ - 1
+                                     : 0],
+            &fs));
+
         while (!filepath_queue_.empty() ||
                current_pattern_index_ < dataset()->patterns_.size()) {
           // All the elements in the heap will be the matched filenames or the
@@ -111,13 +116,10 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             PathStatus current_path = filepath_queue_.top();
             filepath_queue_.pop();
 
-            TF_RETURN_IF_ERROR(
-                ctx->env()->GetFileSystemForFile(current_path.first, &fs));
-
             if (!current_path.second) {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
 
-              // Replace the forward slash by the backslash for Windows path
+              // Replace the forward slash with the backslash for Windows path
               if (dataset()->patterns_[current_pattern_index_ - 1].find('\\') !=
                   std::string::npos) {
                 std::replace(current_path.first.begin(),
@@ -133,13 +135,11 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
             // In this case, current_path is a directory. Then continue the
             // search.
-            ret.Update(
+            TF_RETURN_IF_ERROR(
                 UpdateIterator(ctx, fs, current_path.first, current_pattern_));
           } else {
             // search a new pattern
             current_pattern_ = dataset()->patterns_[current_pattern_index_];
-            TF_RETURN_IF_ERROR(
-                ctx->env()->GetFileSystemForFile(current_pattern_, &fs));
 
             // Windows paths contain backslashes and Windows APIs accept forward
             // and backslashes equivalently, so we convert the pattern to use
@@ -163,16 +163,15 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               current_dir = ".";
               current_pattern_ = io::JoinPath(current_dir, current_pattern_);
             }
-            std::cout << "Input pattern: " << current_pattern_
-                      << "; Current dir: " << current_dir << std::endl;
 
-            ret.Update(UpdateIterator(ctx, fs, current_dir, current_pattern_));
+            TF_RETURN_IF_ERROR(
+                UpdateIterator(ctx, fs, current_dir, current_pattern_));
             ++current_pattern_index_;
           }
         }
 
         *end_of_sequence = true;
-        return ret;
+        return Status::OK();
       }
 
      protected:
@@ -259,14 +258,13 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           const string& current_dir = current_path.first;
           std::vector<string> children;
           Status s = fs->GetChildren(current_dir, &children);
-          std::cout << "GetChildren status: " << s.ToString()
-                    << "; Children size: " << children.size()
-                    << "; Heap size: " << filepath_queue_.size() << std::endl;
           ret.Update(s);
 
           // If GetChildren() fails, continue the next search.
-          if (!s.ok()) {
+          if (ret.code() == error::NOT_FOUND) {
             continue;
+          } else if (!ret.ok()) {
+            return ret;
           }
 
           // children_dir_status holds is_dir status for children. It can have
@@ -304,7 +302,6 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             const string& child_dir_path =
                 io::JoinPath(current_dir, children[i]);
             const Status& child_dir_status = children_dir_status[i];
-            std::cout << "Child dir path: " << child_dir_path << std::endl;
 
             // If the IsDirectory call was cancelled we bail.
             if (child_dir_status.code() == tensorflow::error::CANCELLED) {
-- 
GitLab


From 09e098e5057ca1a781573a199726da750e90e4f3 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Wed, 10 Oct 2018 16:50:47 -0700
Subject: [PATCH 0737/1085] Automated rollback of commit
 d6a3d6a8295359364c86aecc479e6392bcde0ce4

PiperOrigin-RevId: 216617037
---
 tensorflow/cc/BUILD                           |  36 ++++-
 tensorflow/core/graph/graph.cc                |   2 +-
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../optimizers/data/vectorization/BUILD       |  38 +++--
 .../data/vectorization/add_vectorizer.cc      | 150 ++++++++++++++++++
 .../optimizers/data/vectorization_utils.cc    |  21 +--
 .../data/vectorization_utils_test.cc          | 103 ++++++++++--
 .../optimization/map_vectorization_test.py    |   1 +
 tensorflow/tensorflow.bzl                     |   9 +-
 9 files changed, 325 insertions(+), 36 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 9d2208d84d..c18b07603a 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -454,11 +454,33 @@ tf_cc_test(
     ],
 )
 
+# Generates separate libraries for array_ops and math_ops to reduce the dependency count of targets that depend on only these
 tf_gen_op_wrappers_cc(
-    name = "cc_ops",
+    name = "math_ops",
+    api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
+    op_lib_names = [
+        "math_ops",
+    ],
+    pkg = "//tensorflow/core",
+)
+
+tf_gen_op_wrappers_cc(
+    name = "array_ops",
     api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
     op_lib_names = [
         "array_ops",
+    ],
+    pkg = "//tensorflow/core",
+)
+
+tf_gen_op_wrappers_cc(
+    name = "cc_ops",
+    api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
+    deps_internal = [
+        ":array_ops_internal",
+        ":math_ops_internal",
+    ],
+    op_lib_names = [
         "audio_ops",
         "candidate_sampling_ops",
         "control_flow_ops",
@@ -469,7 +491,6 @@ tf_gen_op_wrappers_cc(
         "logging_ops",
         "lookup_ops",
         "manip_ops",
-        "math_ops",
         "nn_ops",
         "no_op",
         "parsing_ops",
@@ -481,10 +502,21 @@ tf_gen_op_wrappers_cc(
         "user_ops",
     ],
     other_hdrs = [
+        "ops/array_ops.h",
         "ops/const_op.h",
+        "ops/math_ops.h",
         "ops/standard_ops.h",
     ],
+    other_hdrs_internal = [
+        "ops/array_ops_internal.h",
+        "ops/math_ops_internal.h",
+    ],
     pkg = "//tensorflow/core",
+    deps = [
+        ":array_ops",
+        ":const_op",
+        ":math_ops",
+    ],
 )
 
 tf_cc_test(
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 6f068546d2..a17491d4f7 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -34,7 +34,7 @@ namespace tensorflow {
 
 const int Graph::kControlSlot = -1;
 
-class NodeProperties {
+struct NodeProperties {
  public:
   NodeProperties(const OpDef* op_def, const NodeDef& node_def,
                  const DataTypeSlice inputs, const DataTypeSlice outputs)
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index e898377ded..43a7d6a70b 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -113,6 +113,7 @@ tf_cc_test(
     deps = [
         ":constant_folding",
         ":dependency_optimizer",
+        "//tensorflow/cc:array_ops_internal",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:cc_ops_internal",
         "//tensorflow/core:all_kernels",
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 985d6c6c3a..cf84ac710a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -9,7 +9,13 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_all")
 
 VECTORIZER_DEPS = [
     ":vectorizer_registry",
+    "//tensorflow/cc:ops",
     "//tensorflow/core/grappler/optimizers/data:graph_utils",
+    "//tensorflow/core:core_cpu",
+    "//tensorflow/cc:scope_internal",
+    "//tensorflow/cc:math_ops",
+    "//tensorflow/cc:array_ops",
+    "//tensorflow/cc:const_op",
 ] + tf_protos_all()
 
 cc_library(
@@ -42,6 +48,26 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "vectorizer_registry_test",
+    srcs = ["vectorizer_registry_test.cc"],
+    deps = [
+        ":vectorizer_registry",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + tf_protos_all(),
+)
+
+cc_library(
+    name = "add_vectorizer",
+    srcs = [
+        "add_vectorizer.cc",
+    ],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
 cc_library(
     name = "cast_vectorizer",
     srcs = ["cast_vectorizer.cc"],
@@ -61,20 +87,10 @@ cc_library(
     hdrs = ["vectorizer_registry.h"],
     visibility = ["//visibility:public"],
     deps = [
+        ":add_vectorizer",
         ":cast_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
         ":vectorizer_registry",
     ],
 )
-
-tf_cc_test(
-    name = "vectorizer_registry_test",
-    srcs = ["vectorizer_registry_test.cc"],
-    deps = [
-        ":vectorizer_registry",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-    ] + tf_protos_all(),
-)
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
new file mode 100644
index 0000000000..9f2679a5ef
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/add_vectorizer.cc
@@ -0,0 +1,150 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/math_ops.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+const char* const kExpandDimsPrefix = "vectorized/expanddims/";
+
+// Reshapes stacked inputs for broadcast. Stacked inputs have an extra leading
+// dimension, which may cause automatic broadcasting rules to expand the
+// input dimensions wrongly when the unstacked shapes have different ranks.
+// To avoid that, we reshape stacked inputs to the maximum rank they need
+// to be broadcasted to.
+//
+// For example, suppose we have inputs A and B, where A is a stacked tensor with
+// shape [n, 5] (where n is the stack size) and B is an unstacked tensor with
+// shape [12, 7, 5]. If we added them directly, tensorflow broadcasting rules
+// would expand the dimensions of A to [1, n, 5], then (incorrectly) check that
+// the dimensions n and 7 are compatible, and if so, create an output of shape
+// [12, 7, 5]. However, correct addition of these inputs would create an output
+// with shape [n, 12, 7, 5]: we need to manually expand the dimensions of A
+// *after* the leading dimension, i.e. expand A to the shape [n, 1, 1, 5] before
+// broadcasting.
+Status ExpandDimsForBroadcast(std::vector<WrappedTensor>* inputs, Graph* g) {
+  Status status;
+  Scope parent = NewInternalScope(g, &status, nullptr);
+  Scope s = parent.NewSubScope(kExpandDimsPrefix);
+
+  // TODO(rachelim): We can potentially get rid of all these ops if shapes are
+  // known statically
+
+  Output const_0 = ops::Const(s, 0);
+  Output const_1 = ops::Const(s, 1);
+
+  std::vector<Output> ranks;
+  ranks.reserve(inputs->size());
+
+  // Get the stacked rank of each input
+  for (const auto& input : *inputs) {
+    Output rank = ops::Rank(s, Output(input.node, input.output_index));
+
+    if (!input.stacked) {
+      // If the input is unstacked, add 1
+      rank = ops::Add(s, rank, const_1);
+    }
+
+    ranks.push_back(rank);
+  }
+
+  // Pack the ranks into one tensor to get the max
+  Output packed_ranks = ops::Stack(s, ranks);
+
+  Output max_rank =
+      ops::Max(s, packed_ranks, const_0, ops::Max::Attrs().KeepDims(true));
+
+  std::vector<WrappedTensor> expanded_inputs;
+  expanded_inputs.reserve(inputs->size());
+
+  // For all inputs that are stacked, expand dimensions after dim 0.
+  for (size_t i = 0; i < inputs->size(); ++i) {
+    if (!inputs->at(i).stacked) {
+      expanded_inputs.push_back(inputs->at(i));
+      continue;
+    }
+
+    Output input(inputs->at(i).node, inputs->at(i).output_index);
+
+    // Number of dimensions to expand
+    Output rank_diff = ops::Sub(s, max_rank, ranks[i]);
+
+    // [1] * rank_diff
+    Output ones = ops::Tile(s, ops::Const(s, {1}), rank_diff);
+
+    Output const_vec_1 = ops::Const(s, {1});
+
+    Output shape = ops::Shape(s, input);
+
+    // shape[:1]
+    Output concat_pre =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().BeginMask(1));
+
+    // shape[1:]
+    Output concat_post =
+        ops::StridedSlice(s, shape, const_vec_1, const_vec_1, const_vec_1,
+                          ops::StridedSlice::Attrs().EndMask(1));
+
+    // tf.concat([shape[:1], ones, shape[1:]], 0)
+    Output new_shape = ops::Concat(s, {concat_pre, ones, concat_post}, const_0);
+
+    Output result = ops::Reshape(s, input, new_shape);
+
+    expanded_inputs.push_back({result.node(), 0, true});
+  }
+
+  inputs->swap(expanded_inputs);
+  return status;
+}
+
+class AddVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (node.num_inputs() != 2) {
+      return errors::Internal("Add op should only have two inputs.");
+    }
+
+    TF_RETURN_IF_ERROR(ExpandDimsForBroadcast(&inputs, outer_scope));
+
+    // Add new Add node with the same op and attrs as the original node
+    Node* new_add_node;
+    TF_RETURN_IF_ERROR(NodeBuilder("Add", "Add")
+                           .Input(inputs[0].node, inputs[0].output_index)
+                           .Input(inputs[1].node, inputs[1].output_index)
+                           .Finalize(outer_scope, &new_add_node));
+
+    // Add output mappings
+    outputs->push_back({new_add_node, 0, true});
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Add", AddVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index d977ff3198..8b93b1f2b8 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -64,9 +64,18 @@ void ReplaceEdgeSources(const TensorDesc& old_src, const TensorDesc& new_src,
   }
 }
 
+// Update node attrs to keep its properties consistent with the function
+void UpdateMapDefunAttrs(FunctionBody* map_defun_fn, Node* map_defun_node) {
+  map_defun_node->AddAttr("output_types", map_defun_fn->ret_types);
+
+  // TODO(rachelim): Propagate precise shapes if they're known, which may enable
+  // subsequent optimizations.
+  map_defun_node->AddAttr("output_shapes", std::vector<PartialTensorShape>(
+                                               map_defun_fn->ret_types.size()));
+}
+
 Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
                          const TensorDesc& output) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DataType type = output.first->output_type(output.second);
   int index = map_defun_fn->ret_nodes.size();
 
@@ -83,13 +92,13 @@ Status AddMapDefunOutput(FunctionBody* map_defun_fn, Node* map_defun_node,
   map_defun_fn->graph->AddEdge(output.first, output.second, ret_node, 0);
   map_defun_fn->ret_nodes.push_back(ret_node);
   map_defun_fn->ret_types.push_back(type);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   return s;
 }
 
 void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                           FunctionBody* map_defun_fn, Node* map_defun_node) {
-  // Note that we don't update MapDefun attrs as we go, only when we are done
   DCHECK_LT(output_position, map_defun_fn->ret_nodes.size())
       << "Trying to remove output that doesn't exist. Output number: "
       << output_position;
@@ -102,6 +111,7 @@ void RemoveMapDefunOutput(int output_position, Graph* outer_scope,
                                 output_position);
   map_defun_fn->ret_types.erase(map_defun_fn->ret_types.begin() +
                                 output_position);
+  UpdateMapDefunAttrs(map_defun_fn, map_defun_node);
 
   // Renumber the nodes and edges that come after
   for (int i = 0; i < num_later_outputs; ++i) {
@@ -342,13 +352,6 @@ void Vectorization::VectorizeHelper() {
   // need the MapDefun node and can delete it.
   if (map_defun_fn_->ret_nodes.empty()) {
     outer_scope_->RemoveNode(map_defun_node_);
-  } else {
-    // Update MapDefun node attrs accordingly
-    DCHECK_EQ(map_defun_fn_->ret_types.size(), map_defun_fn_->ret_nodes.size());
-    map_defun_node_->AddAttr(
-        "output_shapes",
-        std::vector<PartialTensorShape>(map_defun_fn_->ret_types.size()));
-    map_defun_node_->AddAttr("output_types", map_defun_fn_->ret_types);
   }
 }
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index a6020e36bb..be498d150b 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -145,7 +145,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
   FunctionDef* vectorized;
   Status s = VectorizeMapDefun(outer, *map_defun, &lib, &vectorized);
   LOG(ERROR) << s;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_EQ(GetRetval(*vectorized, 0), "ret0");
@@ -237,7 +237,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto map_defun_node = vectorized->node_def(
       function_utils::FindFunctionNodeWithOp("MapDefun", *vectorized));
@@ -311,7 +311,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -389,7 +389,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -475,7 +475,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& unpack_node = vectorized->node_def(
@@ -574,7 +574,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   const NodeDef& cast_node = vectorized->node_def(
@@ -654,7 +654,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   // They should be unchanged
   // We check this somewhat manually as the names of nodes may have changed
   EXPECT_EQ(vectorized->node_def_size(), 1);
@@ -738,7 +738,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
@@ -817,7 +817,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
   *lib.add_function() = outer;
   *lib.add_function() = inner;
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
   EXPECT_TRUE(
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
   auto const_node = vectorized->node_def(
@@ -902,7 +902,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   *lib.add_function() = inner;
 
   FunctionDef* vectorized;
-  EXPECT_TRUE(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized).ok());
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
 
   auto find_const = [vectorized](int val) -> const NodeDef* {
     for (const auto& n : vectorized->node_def()) {
@@ -924,6 +924,89 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
 }
 
+// Before:
+//
+//                   +------+
+// +-----------------+ Arg0 +----------------------+
+// |                 +---+--+                      |
+// |                     |                         |
+// |                 +---v--+                      |
+// |   +-------------+ Arg0 +------------------+   |
+// |   |             +---+--+                  |   |
+// |   |                 |                     |   |
+// |   |                 |          +-----+    |   |
+// |   |                 |          |Const|    |   |
+// |   |                 |          +-+---+    |   |
+// |   |                 |            |        |   |
+// |   |                 |   +--------+        |   |
+// |   |                 |   |                 |   |
+// |   |               +-v---v-+               |   |
+// |   |               |  Add  |               |   |
+// |   |               +-+-----+               |   |
+// |   |                 |                     |   |
+// |   |                 |                     |   |
+// |   | MapDefun      +-v----+                |   |
+// |   +---------------| Ret  |----------------+   |
+// |                   +--v---+                    |
+// |                      |                        |
+// |                      |                        |
+// |                   +--v----                    |
+// +-------------------| Ret  |--------------------+
+//                     +------+
+//
+//
+//  After:
+//
+//              +------+
+// +------------+ Arg0 +----------------------+
+// |            +---+--+                      |
+// |                |                         |
+// |                |              +-----+    |
+// |                |              |Const|    |
+// |              +-v---------+    +--+--+    |
+// |              |ExpandDims*|       |       |
+// |              +-----+-----+       |       |
+// |                    |             |       |
+// |                    +-----+ +-----+       |
+// |                          | |             |
+// |                        +-v-v-+           |
+// |                        | Add |           |
+// |                        +--+--+           |
+// |                           |              |
+// |                       +---v--+           |
+// +-----------------------+ Ret  +-----------+
+//                         +------+
+//
+TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
+  // Note that this checks that the "Add" vectorizer is successful, but does not
+  // check that the transformed function is correct (i.e. produces the same
+  // output as the unvectorized map defun). For the latter, the tests are in
+  // tensorflow/python/data/experimental/kernel_tests/optimization/
+  // map_vectorization_test.py
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const", 2),
+       {{"Add"}, "Add", {"arg0", "Const:output:0"}, {{"T", DT_INT32}}}},
+      {{"ret0", "Add:z:0"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
+
 // TODO(rachelim): More test cases when we get around to implementing them:
 // [] A badly defined converter, e.g. doesn't produce nodes that have the
 //    same number of outputs/inputs as the nodes to be converted
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index 803ff87924..d1d6cf28ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -80,6 +80,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("Basic", lambda x: (x, x + 1), None),
       ("Const", lambda x: 2, 12),
       ("Parallel", lambda x: (x, x + 1), 12),
+      ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None),
       ("Gather", lambda x: array_ops.gather(x, 0), 12),
   )
   def testOptimization(self, map_fn, num_parallel_calls):
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index df15914233..53a382bd49 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -528,12 +528,15 @@ def tf_gen_op_wrappers_cc(
         op_lib_names = [],
         other_srcs = [],
         other_hdrs = [],
+        other_srcs_internal = [],
+        other_hdrs_internal = [],
         pkg = "",
         deps = [
             clean_dep("//tensorflow/cc:ops"),
             clean_dep("//tensorflow/cc:scope"),
             clean_dep("//tensorflow/cc:const_op"),
         ],
+        deps_internal = [],
         op_gen = clean_dep("//tensorflow/cc:cc_op_gen_main"),
         include_internal_ops = 0,
         visibility = None,
@@ -541,8 +544,8 @@ def tf_gen_op_wrappers_cc(
         api_def_srcs = []):
     subsrcs = other_srcs[:]
     subhdrs = other_hdrs[:]
-    internalsrcs = []
-    internalhdrs = []
+    internalsrcs = other_srcs_internal[:]
+    internalhdrs = other_hdrs_internal[:]
     for n in op_lib_names:
         tf_gen_op_wrapper_cc(
             n,
@@ -577,7 +580,7 @@ def tf_gen_op_wrappers_cc(
         name = name + "_internal",
         srcs = internalsrcs,
         hdrs = internalhdrs,
-        deps = deps + if_not_android([
+        deps = deps + deps_internal + if_not_android([
             clean_dep("//tensorflow/core:core_cpu"),
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
-- 
GitLab


From 84f81b91a7bf938560ef40974dd672b95803db42 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Wed, 10 Oct 2018 16:55:25 -0700
Subject: [PATCH 0738/1085] Add HloModule::Clone overload which takes an
 HloModuleConfig.

PiperOrigin-RevId: 216617647
---
 tensorflow/compiler/xla/service/hlo_module.cc | 7 ++++++-
 tensorflow/compiler/xla/service/hlo_module.h  | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 93e04eb3db..4b0612b368 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -558,8 +558,13 @@ std::vector<HloComputation*> HloModule::MakeNonfusionComputations() const {
 }
 
 std::unique_ptr<HloModule> HloModule::Clone(const string& suffix) const {
+  return Clone(config(), suffix);
+}
+
+std::unique_ptr<HloModule> HloModule::Clone(const HloModuleConfig& config,
+                                            const string& suffix) const {
   VLOG(1) << "Cloning module :" << name_ << " --> " << suffix << "\n";
-  auto module = absl::make_unique<HloModule>(name_ + "-" + suffix, config_);
+  auto module = absl::make_unique<HloModule>(name_ + "-" + suffix, config);
 
   HloCloneContext context(module.get(), suffix);
   auto cloned_computation = entry_computation_->Clone(suffix, &context);
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index a1ffdc4023..de6d3a13bf 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -92,6 +92,8 @@ class HloModule {
 
   // Returns a deep copy of this module including all computations.
   std::unique_ptr<HloModule> Clone(const string& suffix = "clone") const;
+  std::unique_ptr<HloModule> Clone(const HloModuleConfig& config,
+                                   const string& suffix = "clone") const;
 
   // Performs a deep clone of the computation, by recursively cloning all
   // the called computations as well. If the clone context is specified, it
-- 
GitLab


From f5b79a0a545c74bfef15e86fa8c3fa019b4da7b8 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 10 Oct 2018 17:04:17 -0700
Subject: [PATCH 0739/1085] Minor change on coding style

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 23587a2d00..6ee0bcf6f5 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -103,9 +103,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         FileSystem* fs;
 
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(
-            dataset()->patterns_[(current_pattern_index_ > 0)
-                                     ? current_pattern_index_ - 1
-                                     : 0],
+            dataset()
+                ->patterns_[std::max(size_t(0), current_pattern_index_ - 1)],
             &fs));
 
         while (!filepath_queue_.empty() ||
-- 
GitLab


From 9bad98c61f27b60152119bb1c2cfd402c3bf7f3d Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 10 Oct 2018 16:59:33 -0700
Subject: [PATCH 0740/1085] [tf.data] Changing the background performance
 modeling to be on by default.

PiperOrigin-RevId: 216618194
---
 tensorflow/core/framework/model.cc            | 32 +++++++++++++------
 .../core/kernels/data/cache_dataset_ops.cc    | 14 ++++----
 .../assert_next_dataset_op_test.py            | 12 -------
 .../data/experimental/ops/prefetching_ops.py  |  9 ++++++
 tensorflow/python/data/ops/dataset_ops.py     | 29 ++++++++---------
 .../data/ops/multi_device_iterator_ops.py     |  4 +++
 6 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index bfdb3a6658..9684b736a7 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -59,9 +59,15 @@ int64 Model::Node::ProcessingTimeLocked() {
       return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs();
     }
     case Type::FILTER: {
+      if (inputs_.size() <= 1) {
+        return NanosPerElementLocked();
+      }
       std::shared_ptr<Node> input = inputs_.front();
-      double ratio = static_cast<double>(input->num_elements()) /
-                     static_cast<double>(num_elements_);
+      double ratio = 0.0L;
+      if (num_elements_ > 0) {
+        ratio = static_cast<double>(input->num_elements()) /
+                static_cast<double>(num_elements_);
+      }
       return NanosPerElementLocked() +
              static_cast<int64>(ratio *
                                 static_cast<double>(ProcessingTimeForInputs()));
@@ -115,15 +121,21 @@ int64 Model::Node::OutputTimeLocked(std::vector<int64>* input_times) {
              batch_size * OutputTimeForInputs(input_times);
     }
     case Type::FILTER: {
+      if (inputs_.size() <= 1) {
+        return NanosPerElementLocked();
+      }
       std::shared_ptr<Node> input = inputs_.front();
-      int64 old_value = (*input_times)[input_times->size() - 1];
-      double ratio = static_cast<double>(input->num_elements()) /
-                     static_cast<double>(num_elements_);
-      (*input_times)[input_times->size() - 1] = static_cast<int64>(
-          static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
-      auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
-        (*input_times)[input_times->size() - 1] = old_value;
-      });
+      double ratio = 0.0L;
+      if (num_elements_ > 0) {
+        ratio = static_cast<double>(input->num_elements()) /
+                static_cast<double>(num_elements_);
+        int64 old_value = (*input_times)[input_times->size() - 1];
+        (*input_times)[input_times->size() - 1] = static_cast<int64>(
+            static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
+        auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
+          (*input_times)[input_times->size() - 1] = old_value;
+        });
+      }
       return NanosPerElementLocked() +
              static_cast<int64>(
                  static_cast<double>(OutputTimeForInputs(input_times)) * ratio);
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index 34c6c86538..f2419db3dc 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -516,10 +516,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
         // `FileReaderIterator` and seek to the `cur_index`.
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(new FileReaderIterator({dataset(), prefix()}));
+            iterator_.reset(new FileReaderIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}));
             break;
           case Mode::write:
-            iterator_.reset(new FileWriterIterator({dataset(), prefix()}));
+            iterator_.reset(new FileWriterIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}));
         }
       }
 
@@ -866,12 +868,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
       void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(
-                new MemoryReaderIterator({dataset(), prefix()}, cache_));
+            iterator_.reset(new MemoryReaderIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
             break;
           case Mode::write:
-            iterator_.reset(
-                new MemoryWriterIterator({dataset(), prefix()}, cache_));
+            iterator_.reset(new MemoryWriterIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
         }
       }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
index 45b77b5c20..a138436fff 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -48,18 +48,6 @@ class AssertNextDatasetTest(test_base.DatasetTestBase):
           "Map transformation instead."):
         sess.run(get_next)
 
-  def testAssertNextShort(self):
-    dataset = dataset_ops.Dataset.from_tensors(0).apply(
-        optimization.assert_next(["Map", "Whoops"])).map(lambda x: x)
-    iterator = dataset.make_one_shot_iterator()
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Asserted next 2 transformations but encountered only 1."):
-        sess.run(get_next)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index 48d7136f95..df082e9e35 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -506,6 +506,15 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
     else:
       return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
 
+  def make_initializable_iterator(self):
+    if self._is_gpu_target:
+      # TODO(b/116140813) : Enable dynamic optimizations.
+      options = dataset_ops.Options()
+      options.experimental_autotune = False
+      return self.with_options(options).make_initializable_iterator()
+    else:
+      return super(_CopyToDeviceDataset, self).make_initializable_iterator()
+
   def _as_variant_tensor(self):
     with ops.device(self._target_device):
       return gen_dataset_ops.generator_dataset(
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index cdb883cac9..d7e37da48b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -99,6 +99,16 @@ class Dataset(object):
         return options
     return Options()
 
+  def _apply_options(self):
+    dataset = self
+    options = self.options()
+    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+    if static_optimizations:
+      dataset = _OptimizeDataset(dataset, static_optimizations)
+    if options.experimental_autotune is not False:
+      dataset = _ModelDataset(dataset)
+    return dataset
+
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -127,13 +137,7 @@ class Dataset(object):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
-    dataset = self
-    options = self.options()
-    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
-    if static_optimizations:
-      dataset = _OptimizeDataset(dataset, static_optimizations)
-    if options.experimental_autotune:
-      dataset = _ModelDataset(dataset)
+    dataset = self._apply_options()
     if shared_name is None:
       shared_name = ""
     if compat.forward_compatible(2018, 8, 3):
@@ -163,7 +167,8 @@ class Dataset(object):
       RuntimeError: If eager execution is not enabled.
     """
     if context.executing_eagerly():
-      return iterator_ops.EagerIterator(self)
+      dataset = self._apply_options()
+      return iterator_ops.EagerIterator(dataset)
     else:
       raise RuntimeError("dataset.__iter__() is only supported when eager "
                          "execution is enabled.")
@@ -194,13 +199,7 @@ class Dataset(object):
         core_random_seed.set_random_seed(
             (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1))
 
-      dataset = self
-      options = self.options()
-      static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
-      if static_optimizations:
-        dataset = _OptimizeDataset(dataset, static_optimizations)
-      if options.experimental_autotune:
-        dataset = _ModelDataset(dataset)
+      dataset = self._apply_options()
       return dataset._as_variant_tensor()  # pylint: disable=protected-access
 
     try:
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index b7d3aac206..3bcc20b333 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -206,6 +206,10 @@ class MultiDeviceIterator(object):
           i, self._multi_device_iterator_resource, self._incarnation_id,
           self._source_device_tensor, device, self._dataset.output_shapes,
           self._dataset.output_types, self._dataset.output_classes)
+      # TODO(b/116140813) : Enable dynamic optimizations.
+      options = dataset_ops.Options()
+      options.experimental_autotune = False
+      ds = ds.with_options(options)
       if prefetch_buffer_size > 0:
         ds = ds.prefetch(prefetch_buffer_size)
       with ops.device(device):
-- 
GitLab


From 128903381b93289b1d19fef255b939d30339727c Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 10 Oct 2018 17:00:49 -0700
Subject: [PATCH 0741/1085] Turn on worker watchdog in TPUEstimator.

PiperOrigin-RevId: 216618378
---
 .../contrib/tpu/python/tpu/session_support.py  | 18 ++++++++++++++++++
 .../contrib/tpu/python/tpu/tpu_estimator.py    |  6 ++++++
 2 files changed, 24 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py
index 05264f5a46..8248256373 100644
--- a/tensorflow/contrib/tpu/python/tpu/session_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/session_support.py
@@ -35,6 +35,8 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training_util
 
+_WATCHDOG = None
+
 
 class CoordinatorShutdownException(Exception):
   """Raised when the coordinator needs to shutdown."""
@@ -256,6 +258,22 @@ class WatchdogManager(threading.Thread):
       time.sleep(self.ping_interval)
 
 
+def start_worker_watchdog(session,
+                          devices=None,
+                          ping_interval=60,
+                          shutdown_timeout=3600):
+  """Start global worker watchdog to shutdown workers on coordinator exit."""
+  global _WATCHDOG
+  if _WATCHDOG is None:
+    # Ensure we can send a few pings before we timeout!
+    ping_interval = min(shutdown_timeout / 10., ping_interval)
+    logging.info('Enabling watchdog timer with %d second timeout',
+                 shutdown_timeout)
+    _WATCHDOG = WatchdogManager(session, devices, ping_interval,
+                                shutdown_timeout)
+    _WATCHDOG.configure_and_run()
+
+
 class GracefulShutdownHook(session_run_hook.SessionRunHook):
   """Session hook that watches for shutdown events.
 
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 367606ef27..23c30e3f06 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -480,6 +480,12 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
     self._outfeed_controller = _OpQueueContext(
         name='OutfeedController', target=self._run_outfeed, args=(session,))
 
+    # Enable the worker watchdog to terminate workers on coordinator exit.
+    watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0'))
+    if watchdog_timeout > 0:
+      session_support.start_worker_watchdog(session,
+                                            shutdown_timeout=watchdog_timeout)
+
   def before_run(self, run_context):
     self._feed_error = None
 
-- 
GitLab


From 725a6c8ee178ec7802ddaa9912cace3eb964b1ad Mon Sep 17 00:00:00 2001
From: rachellj218 <42055825+rachellj218@users.noreply.github.com>
Date: Wed, 10 Oct 2018 17:16:36 -0700
Subject: [PATCH 0742/1085] - Don't set tpu optimizer parameter variable during
 weight initialization if the optimizer isn't set, e.g. loading weights and
 then predict. (#22786)

- Add load_weights for `KerasTpuModel`.

PiperOrigin-RevId: 215920993
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index a3a7fd8bb0..af183b3232 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -1998,6 +1998,9 @@ class KerasTPUModel(models.Model):
 
     logging.info('Setting weights on TPU model.')
     cloned_model.set_weights(weights)
+    if self._tpu_model.optimizer is None:
+      # tpu_model may not be compiled, e.g., loading weights and then predict.
+      return
     for k, v in six.iteritems(cpu_optimizer_config):
       opt_var = getattr(self._tpu_model.optimizer, k)
       if isinstance(opt_var, variables.Variable):
@@ -2052,6 +2055,10 @@ class KerasTPUModel(models.Model):
     self._cpu_model.set_weights(weights)
     self._tpu_weights_initialized = False
 
+  def load_weights(self, filepath, by_name=False):
+    self._cpu_model.load_weights(filepath, by_name)
+    self._tpu_weights_initialized = False
+
 
 # pylint: disable=bad-continuation
 def _validate_shapes(model):
-- 
GitLab


From 0be7b32fa4db37fe7e4a12ab12f87796ec07e54f Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 10 Oct 2018 17:03:27 -0700
Subject: [PATCH 0743/1085] [XLA:GPU] Rename cudnn convolution passes.

Make them shorter and more consistent.

- CudnnConvolutionFoo -> CudnnConvFoo
- PadInsertion -> CudnnConvPaddingLegalization
- PadForTensorCores -> CudnnConvPadForSpeed (padding channel dimensions from
  3 -> 4 is not a tensor-cores-related optimization and ideally should be run
  on P100s as well).

PiperOrigin-RevId: 216618934
---
 tensorflow/compiler/xla/service/gpu/BUILD     | 64 +++++++++----------
 .../xla/service/gpu/convolution_thunk.cc      |  8 +--
 .../xla/service/gpu/convolution_thunk.h       |  2 +-
 ...cker.cc => cudnn_conv_algorithm_picker.cc} | 21 +++---
 ...picker.h => cudnn_conv_algorithm_picker.h} | 17 +++--
 ...r_cores.cc => cudnn_conv_pad_for_speed.cc} |  6 +-
 ...sor_cores.h => cudnn_conv_pad_for_speed.h} | 13 ++--
 ...st.cc => cudnn_conv_pad_for_speed_test.cc} | 28 ++++----
 ....cc => cudnn_conv_padding_legalization.cc} | 14 ++--
 ...on.h => cudnn_conv_padding_legalization.h} | 12 ++--
 ...ion_rewriter.cc => cudnn_conv_rewriter.cc} | 30 +++++----
 ...ution_rewriter.h => cudnn_conv_rewriter.h} | 12 ++--
 ...er_test.cc => cudnn_conv_rewriter_test.cc} | 45 ++++++-------
 ...olution_runner.cc => cudnn_conv_runner.cc} | 47 +++++++-------
 ...nvolution_runner.h => cudnn_conv_runner.h} | 29 ++++-----
 ...writer.cc => cudnn_fused_conv_rewriter.cc} |  4 +-
 ...rewriter.h => cudnn_fused_conv_rewriter.h} |  8 +--
 .../xla/service/gpu/ir_emission_utils.h       |  4 +-
 .../xla/service/gpu/ir_emitter_unnested.cc    |  2 +-
 .../xla/service/gpu/nvptx_compiler.cc         | 35 +++++-----
 .../compiler/xla/service/gpu/tests/BUILD      |  4 +-
 ...t.cc => cudnn_fused_conv_rewriter_test.cc} | 23 ++++---
 22 files changed, 214 insertions(+), 214 deletions(-)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_algorithm_picker.cc => cudnn_conv_algorithm_picker.cc} (95%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_algorithm_picker.h => cudnn_conv_algorithm_picker.h} (78%)
 rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores.cc => cudnn_conv_pad_for_speed.cc} (98%)
 rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores.h => cudnn_conv_pad_for_speed.h} (72%)
 rename tensorflow/compiler/xla/service/gpu/{pad_for_tensor_cores_test.cc => cudnn_conv_pad_for_speed_test.cc} (86%)
 rename tensorflow/compiler/xla/service/gpu/{pad_insertion.cc => cudnn_conv_padding_legalization.cc} (97%)
 rename tensorflow/compiler/xla/service/gpu/{pad_insertion.h => cudnn_conv_padding_legalization.h} (78%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter.cc => cudnn_conv_rewriter.cc} (95%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter.h => cudnn_conv_rewriter.h} (74%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_rewriter_test.cc => cudnn_conv_rewriter_test.cc} (95%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_runner.cc => cudnn_conv_runner.cc} (90%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_convolution_runner.h => cudnn_conv_runner.h} (67%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_fused_convolution_rewriter.cc => cudnn_fused_conv_rewriter.cc} (98%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_fused_convolution_rewriter.h => cudnn_fused_conv_rewriter.h} (77%)
 rename tensorflow/compiler/xla/service/gpu/tests/{cudnn_fused_convolution_rewriter_test.cc => cudnn_fused_conv_rewriter_test.cc} (93%)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 62da43d68a..ea285994be 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -154,7 +154,7 @@ cc_library(
     deps = [
         ":backend_configs",
         ":buffer_allocations",
-        ":cudnn_convolution_runner",
+        ":cudnn_conv_runner",
         ":elemental_ir_emitter",
         ":gpu_constants",
         ":gpu_executable",
@@ -323,7 +323,7 @@ cc_library(
     ],
     deps = [
         ":buffer_allocations",
-        ":cudnn_convolution_runner",
+        ":cudnn_conv_runner",
         ":hlo_execution_profiler",
         ":infeed_manager",
         ":ir_emission_utils",
@@ -385,13 +385,13 @@ cc_library(
 )
 
 cc_library(
-    name = "cudnn_convolution_algorithm_picker",
-    srcs = ["cudnn_convolution_algorithm_picker.cc"],
-    hdrs = ["cudnn_convolution_algorithm_picker.h"],
+    name = "cudnn_conv_algorithm_picker",
+    srcs = ["cudnn_conv_algorithm_picker.cc"],
+    hdrs = ["cudnn_conv_algorithm_picker.h"],
     deps = [
         ":backend_configs",
         ":buffer_comparator",
-        ":cudnn_convolution_runner",
+        ":cudnn_conv_runner",
         ":gpu_executable",
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal_util",
@@ -410,9 +410,9 @@ cc_library(
 )
 
 cc_library(
-    name = "cudnn_convolution_runner",
-    srcs = ["cudnn_convolution_runner.cc"],
-    hdrs = ["cudnn_convolution_runner.h"],
+    name = "cudnn_conv_runner",
+    srcs = ["cudnn_conv_runner.cc"],
+    hdrs = ["cudnn_conv_runner.h"],
     deps = [
         ":backend_configs",
         ":ir_emission_utils",
@@ -432,9 +432,9 @@ cc_library(
 )
 
 cc_library(
-    name = "cudnn_convolution_rewriter",
-    srcs = ["cudnn_convolution_rewriter.cc"],
-    hdrs = ["cudnn_convolution_rewriter.h"],
+    name = "cudnn_conv_rewriter",
+    srcs = ["cudnn_conv_rewriter.cc"],
+    hdrs = ["cudnn_conv_rewriter.h"],
     deps = [
         ":backend_configs",
         ":ir_emission_utils",
@@ -449,10 +449,10 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "cudnn_convolution_rewriter_test",
-    srcs = ["cudnn_convolution_rewriter_test.cc"],
+    name = "cudnn_conv_rewriter_test",
+    srcs = ["cudnn_conv_rewriter_test.cc"],
     deps = [
-        ":cudnn_convolution_rewriter",
+        ":cudnn_conv_rewriter",
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
@@ -581,9 +581,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "pad_insertion",
-    srcs = ["pad_insertion.cc"],
-    hdrs = ["pad_insertion.h"],
+    name = "cudnn_conv_padding_legalization",
+    srcs = ["cudnn_conv_padding_legalization.cc"],
+    hdrs = ["cudnn_conv_padding_legalization.h"],
     deps = [
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal",
@@ -600,9 +600,9 @@ cc_library(
 )
 
 cc_library(
-    name = "pad_for_tensor_cores",
-    srcs = ["pad_for_tensor_cores.cc"],
-    hdrs = ["pad_for_tensor_cores.h"],
+    name = "cudnn_conv_pad_for_speed",
+    srcs = ["cudnn_conv_pad_for_speed.cc"],
+    hdrs = ["cudnn_conv_pad_for_speed.h"],
     deps = [
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal_util",
@@ -614,11 +614,11 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "pad_for_tensor_cores_test",
-    srcs = ["pad_for_tensor_cores_test.cc"],
+    name = "cudnn_conv_pad_for_speed_test",
+    srcs = ["cudnn_conv_pad_for_speed_test.cc"],
     deps = [
+        ":cudnn_conv_pad_for_speed",
         ":ir_emission_utils",
-        ":pad_for_tensor_cores",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:hlo_matchers",
@@ -660,9 +660,11 @@ cc_library(
     srcs = ["nvptx_compiler.cc"],
     hdrs = ["nvptx_compiler.h"],
     deps = [
-        ":cudnn_convolution_algorithm_picker",
-        ":cudnn_convolution_rewriter",
-        ":cudnn_fused_convolution_rewriter",
+        ":cudnn_conv_algorithm_picker",
+        ":cudnn_conv_pad_for_speed",
+        ":cudnn_conv_padding_legalization",
+        ":cudnn_conv_rewriter",
+        ":cudnn_fused_conv_rewriter",
         ":fusion_merger",
         ":gpu_constants",
         ":gpu_copy_insertion",
@@ -674,8 +676,6 @@ cc_library(
         ":ir_emission_utils",
         ":ir_emitter",
         ":multi_output_fusion",
-        ":pad_for_tensor_cores",
-        ":pad_insertion",
         ":partition_assignment",
         ":stream_assignment",
         ":stream_executor_util",
@@ -966,9 +966,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "cudnn_fused_convolution_rewriter",
-    srcs = ["cudnn_fused_convolution_rewriter.cc"],
-    hdrs = ["cudnn_fused_convolution_rewriter.h"],
+    name = "cudnn_fused_conv_rewriter",
+    srcs = ["cudnn_fused_conv_rewriter.cc"],
+    hdrs = ["cudnn_fused_conv_rewriter.h"],
     deps = [
         ":backend_configs",
         ":ir_emission_utils",
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
index 4effea637d..e1dffad304 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/strings/str_cat.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -56,9 +56,9 @@ Status ConvolutionThunk::ExecuteOnStream(
       buffer_allocations.GetDeviceAddress(scratch_buffer_);
 
   auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction());
-  TF_RETURN_IF_ERROR(RunCudnnConvolution(cudnn_call_,
-                                         absl::MakeSpan(operand_se_buffers),
-                                         result_buffer, scratch, stream));
+  TF_RETURN_IF_ERROR(RunCudnnConv(cudnn_call_,
+                                  absl::MakeSpan(operand_se_buffers),
+                                  result_buffer, scratch, stream));
 
   void* ptrs[] = {result_buffer.opaque(), scratch.opaque()};
   se::DeviceMemory<void*> tuple_addr(
diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
index f53bc54198..c71515490c 100644
--- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
similarity index 95%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
index 6d4a72038f..6d6780fa1c 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/types/optional.h"
@@ -145,9 +145,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
 // cache misses and doing extra work.  Overall, caching doesn't seem worth the
 // trouble, but we may want to revisit this if we ever find a model where
 // caching would speed up compilation a lot.
-StatusOr<CudnnConvolutionAlgorithmPicker::AutotuneResult>
-CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
-    HloCustomCallInstruction* instr) {
+StatusOr<CudnnConvAlgorithmPicker::AutotuneResult>
+CudnnConvAlgorithmPicker::PickBestAlgorithm(HloCustomCallInstruction* instr) {
   // TODO(timshen): for now only check fp16. It can be expanded to other types,
   // with some work on the HLO routines.
   const bool cross_check_enabled =
@@ -253,10 +252,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     backend_config.set_algorithm(alg.algo_id());
     backend_config.set_tensor_ops_enabled(alg.tensor_ops_enabled());
     TF_RETURN_IF_ERROR(instr->set_backend_config(backend_config));
-    bool launch_ok = RunCudnnConvolution(instr, absl::MakeSpan(operand_buffers),
-                                         result_buffer, &scratch_allocator,
-                                         &stream, &profile_result)
-                         .ok();
+    bool launch_ok =
+        RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer,
+                     &scratch_allocator, &stream, &profile_result)
+            .ok();
 
     if (launch_ok && profile_result.is_valid()) {
       const bool crash_on_checking_failure =
@@ -328,7 +327,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
       instr->ToString());
 }
 
-StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
+StatusOr<bool> CudnnConvAlgorithmPicker::RunOnInstruction(
     HloInstruction* instr) {
   CHECK(IsCustomCallToDnnConvolution(*instr));
 
@@ -378,7 +377,7 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnInstruction(
   return true;
 }
 
-StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnComputation(
+StatusOr<bool> CudnnConvAlgorithmPicker::RunOnComputation(
     HloComputation* computation) {
   std::vector<HloInstruction*> convs;
   for (auto* instr : computation->instructions()) {
@@ -395,7 +394,7 @@ StatusOr<bool> CudnnConvolutionAlgorithmPicker::RunOnComputation(
   return changed;
 }
 
-StatusOr<bool> CudnnConvolutionAlgorithmPicker::Run(HloModule* module) {
+StatusOr<bool> CudnnConvAlgorithmPicker::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation));
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
similarity index 78%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
index 136c32210a..642af787af 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_
 
 #include "absl/time/time.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
@@ -31,18 +31,17 @@ namespace gpu {
 
 // Modifies CustomCalls to cudnn convolutions, choosing the best algorithm for
 // each and adding explicit scratch space to the CustomCalls.
-class CudnnConvolutionAlgorithmPicker : public HloModulePass {
+class CudnnConvAlgorithmPicker : public HloModulePass {
  public:
   // If the `allocator` parameter is not null, we will use it to allocate temp
   // memory while timing the various convolution algorithms.  If it's null,
   // we'll use the default allocator on the StreamExecutor.
-  CudnnConvolutionAlgorithmPicker(se::StreamExecutor* stream_exec,
-                                  DeviceMemoryAllocator* allocator,
-                                  Compiler* compiler)
+  CudnnConvAlgorithmPicker(se::StreamExecutor* stream_exec,
+                           DeviceMemoryAllocator* allocator, Compiler* compiler)
       : stream_exec_(stream_exec), allocator_(allocator), compiler_(compiler) {}
 
   absl::string_view name() const override {
-    return "cudnn-convolution-algorithm-picker";
+    return "cudnn-conv-algorithm-picker";
   }
 
   StatusOr<bool> Run(HloModule* module) override;
@@ -67,4 +66,4 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc
similarity index 98%
rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc
index 8f1f5a7bf5..24b1f1af27 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -108,7 +108,7 @@ static HloInstruction* PadInstruction(HloInstruction* instr,
 static StatusOr<bool> PadFeaturesDims(HloCustomCallInstruction* conv) {
   CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0))
       << "conv must use 0 scratch bytes, i.e. this pass must be run "
-         "before CudnnConvolutionAlgorithmPicker.";
+         "before CudnnConvAlgorithmPicker.";
 
   TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
   const auto& dnums = conv->convolution_dimension_numbers();
@@ -252,7 +252,7 @@ static std::vector<HloCustomCallInstruction*> GetRelevantConvs(
   return convs;
 }
 
-StatusOr<bool> PadForTensorCores::Run(HloModule* module) {
+StatusOr<bool> CudnnConvPadForSpeed::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* comp : module->MakeNonfusionComputations()) {
     for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h
similarity index 72%
rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h
index e592a3774e..89a894e9d3 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
 
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
@@ -29,10 +29,13 @@ namespace gpu {
 // opposite of useful on other GPUs, so you should check what GPU you're
 // targeting before running this pass.
 //
+// TODO(jlebar): Rework this.  For one thing, it should not be Volta-only.
+// Padding input channels 3 to 4 is (we think) applicable to Pascal as well.
+//
 // TODO(jlebar): Also pad dots.
-class PadForTensorCores : public HloModulePass {
+class CudnnConvPadForSpeed : public HloModulePass {
  public:
-  absl::string_view name() const override { return "pad for tensor cores"; }
+  absl::string_view name() const override { return "cudnn-conv-pad-for-speed"; }
 
   StatusOr<bool> Run(HloModule* module) override;
 };
@@ -40,4 +43,4 @@ class PadForTensorCores : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc
similarity index 86%
rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc
index 5c92b0dcb8..ec403021e6 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
 
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
@@ -29,9 +29,9 @@ namespace {
 namespace op = xla::testing::opcode_matchers;
 using ::testing::_;
 
-class PadForTensorCoresTest : public HloVerifiedTestBase {};
+class CudnnConvPadForSpeedTest : public HloVerifiedTestBase {};
 
-TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -42,7 +42,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
 
   SCOPED_TRACE(module().ToString());
@@ -55,7 +55,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) {
                                ShapeUtil::MakeShape(F16, {2, 2, 48, 40})));
 }
 
-TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -66,7 +66,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convBackwardInput"
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget,
                                    op::Pad(op::Parameter(0), _),
@@ -77,7 +77,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
                                ShapeUtil::MakeShape(F16, {2, 2, 40, 48})));
 }
 
-TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -88,7 +88,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall(
                                   kCudnnConvForwardCallTarget, op::Parameter(0),
@@ -96,7 +96,7 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
                               _));
 }
 
-TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -108,7 +108,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
               custom_call_target="__cudnn$convBackwardInput"
     ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
@@ -117,7 +117,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
                         _)));
 }
 
-TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -129,7 +129,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
@@ -138,7 +138,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
                         _)));
 }
 
-TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
+TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -150,7 +150,7 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc
similarity index 97%
rename from tensorflow/compiler/xla/service/gpu/pad_insertion.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc
index ae7abca7c6..d7829045cc 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h"
 
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -132,7 +132,8 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window,
 }
 }  // namespace
 
-bool PadInsertion::CanonicalizeForwardConvolution(HloInstruction* conv) {
+bool CudnnConvPaddingLegalization::CanonicalizeForwardConvolution(
+    HloInstruction* conv) {
   if (IsForwardConvolutionCanonical(*conv)) {
     return false;
   }
@@ -187,7 +188,7 @@ void IncreasePaddingHighBy(int64 delta, WindowDimension* window_dim) {
 }
 }  // namespace
 
-bool PadInsertion::CanonicalizeBackwardFilterConvolution(
+bool CudnnConvPaddingLegalization::CanonicalizeBackwardFilterConvolution(
     HloInstruction* backward_conv) {
   CHECK_EQ(backward_conv->custom_call_target(),
            kCudnnConvBackwardFilterCallTarget);
@@ -260,7 +261,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution(
   return true;
 }
 
-bool PadInsertion::CanonicalizeBackwardInputConvolution(
+bool CudnnConvPaddingLegalization::CanonicalizeBackwardInputConvolution(
     HloInstruction* backward_conv) {
   if (window_util::HasSymmetricPadding(backward_conv->window())) {
     return false;
@@ -377,7 +378,8 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
   return true;
 }
 
-StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
+StatusOr<bool> CudnnConvPaddingLegalization::RunOnComputation(
+    HloComputation* computation) {
   bool changed = false;
   std::vector<HloCustomCallInstruction*> convs;
   for (auto* instr : computation->instructions()) {
@@ -402,7 +404,7 @@ StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
   return changed;
 }
 
-StatusOr<bool> PadInsertion::Run(HloModule* module) {
+StatusOr<bool> CudnnConvPaddingLegalization::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation));
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h
similarity index 78%
rename from tensorflow/compiler/xla/service/gpu/pad_insertion.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h
index 25cdf64c4c..7d1b075517 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_
 
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
@@ -24,9 +24,11 @@ namespace gpu {
 // An HLO pass that canonicalizes convolution instructions for GPU codegen. It
 // inserts Pad instructions before Convolution instructions with uncanonicalized
 // padding, so that they can be lowered to cuDNN convolution.
-class PadInsertion : public HloModulePass {
+class CudnnConvPaddingLegalization : public HloModulePass {
  public:
-  absl::string_view name() const override { return "pad insertion"; }
+  absl::string_view name() const override {
+    return "cudnn-conv-padding-legalization";
+  }
 
   StatusOr<bool> Run(HloModule* module) override;
 
@@ -41,4 +43,4 @@ class PadInsertion : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
similarity index 95%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
index 437d25727e..5cea66de38 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h"
 
 #include <cstdlib>
 #include <numeric>
@@ -188,9 +188,9 @@ std::tuple<bool, Window, ConvolutionDimensionNumbers> MatchBackwardFilter(
     // the amount of high padding the same as the amount of low padding as long
     // as it is between min_padding_high and max_padding_high. If it is not in
     // that range, we pick the one that's closest to dim->padding_low() and let
-    // PadInsertion canonicalize the resultant backward convolution later.
-    // Picking the closest one minimizes the cost of the kPad instruction to be
-    // inserted by PadInsertion.
+    // CudnnConvPaddingLegalization canonicalize the resultant backward
+    // convolution later. Picking the closest one minimizes the cost of the kPad
+    // instruction to be inserted by CudnnConvPaddingLegalization.
     if (dim->padding_low() >= min_padding_high &&
         dim->padding_low() <= max_padding_high) {
       dim->set_padding_high(dim->padding_low());
@@ -207,7 +207,8 @@ std::tuple<bool, Window, ConvolutionDimensionNumbers> MatchBackwardFilter(
              "negative padding ("
           << dim->padding_high()
           << ") on right/bottom of the weight gradients, which is not "
-             "supported by PadInsertion (b/32744257). Falling back to "
+             "supported by CudnnConvPaddingLegalization (b/32744257). "
+             "Falling back to "
              "unfused convolution for instruction: "
           << conv->ToString();
       return no_match_result;
@@ -342,7 +343,8 @@ MatchBackwardInput(HloInstruction* conv) {
       LOG(ERROR)
           << "The low padding of the backward convolution would be negative ("
           << backward_padding_low
-          << "), which isn't supported by PadInsertion for now (b/32744257).";
+          << "), which isn't supported by CudnnConvPaddingLegalization "
+             "for now (b/32744257).";
       return no_match_result;
     }
     dim->set_padding_low(backward_padding_low);
@@ -371,8 +373,8 @@ MatchBackwardInput(HloInstruction* conv) {
       dim->set_padding_high(backward_padding_low);
     } else {
       // Otherwise, we choose the amount that's closest to backward_padding_low,
-      // and PadInsertion will later insert kSlice instructions to enforce even
-      // padding.
+      // and CudnnConvPaddingLegalization will later insert kSlice
+      // instructions to enforce even padding.
       //
       // For example, consider the backward convolution pattern
       //
@@ -398,9 +400,9 @@ MatchBackwardInput(HloInstruction* conv) {
         dim->set_padding_high(max_padding_high);
       }
     }
-    // PadInsertion doesn't handle backward input convolution with negative
-    // padding for now. So fall back to unfused convolution in case of negative
-    // padding. For example,
+    // CudnnConvPaddingLegalization doesn't handle backward input
+    // convolution with negative padding for now. So fall back to unfused
+    // convolution in case of negative padding. For example,
     //   ABCD = Conv(abc, reverse(xy), padding_high=2)
     // could be fused to
     //   ABCD = BackwardInputConv(abc, xy, padding_low=1, padding_high=-1)
@@ -410,8 +412,8 @@ MatchBackwardInput(HloInstruction* conv) {
                     "negative padding ("
                  << dim->padding_high()
                  << ") on right/bottom of the activations, which is not "
-                    "supported by PadInsertion (b/32744257). Falling back to "
-                    "unfused convolution for instruction: "
+                    "supported by CudnnConvPaddingLegalization (b/32744257). "
+                    "Falling back to unfused convolution for instruction: "
                  << conv->ToString();
       return no_match_result;
     }
@@ -555,7 +557,7 @@ StatusOr<bool> RunOnComputation(HloComputation* computation) {
 }
 }  // namespace
 
-StatusOr<bool> CudnnConvolutionRewriter::Run(HloModule* module) {
+StatusOr<bool> CudnnConvRewriter::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation));
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h
similarity index 74%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h
index 8d7c6fdab5..d8ec72c27b 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_
 
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
@@ -24,11 +24,9 @@ namespace gpu {
 
 // Rewrites plain convolutions, backwards-filter convolutions, and
 // backwards-input convolutions into CustomCall HLOs that call into cuDNN.
-class CudnnConvolutionRewriter : public HloModulePass {
+class CudnnConvRewriter : public HloModulePass {
  public:
-  absl::string_view name() const override {
-    return "cudnn-convolution-rewriter";
-  }
+  absl::string_view name() const override { return "cudnn-conv-rewriter"; }
 
   StatusOr<bool> Run(HloModule* module) override;
 };
@@ -36,4 +34,4 @@ class CudnnConvolutionRewriter : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
similarity index 95%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
index d237f8930b..543160df8b 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h"
 
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -34,9 +34,9 @@ namespace {
 namespace op = xla::testing::opcode_matchers;
 using ::testing::_;
 
-class CudnnConvolutionRewriterTest : public HloVerifiedTestBase {
+class CudnnConvRewriterTest : public HloVerifiedTestBase {
  public:
-  CudnnConvolutionRewriterTest()
+  CudnnConvRewriterTest()
       : HloVerifiedTestBase(/*layout_sensitive=*/true,
                             /*allow_mixed_precision=*/false) {
     for (int i = 0; i < 2; ++i) {
@@ -85,7 +85,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase {
 
  protected:
   bool RunPass(HloModule* module) {
-    return CudnnConvolutionRewriter().Run(module).ValueOrDie();
+    return CudnnConvRewriter().Run(module).ValueOrDie();
   }
 
   // A convolution window with stride 1 and zero padding. The size fields are
@@ -95,7 +95,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase {
   ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_;
 };
 
-TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) {
+TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) {
   HloComputation::Builder builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -123,7 +123,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) {
                   op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0));
 }
 
-TEST_F(CudnnConvolutionRewriterTest,
+TEST_F(CudnnConvRewriterTest,
        BackwardFilterConvolveEquivalentToForwardConvolution) {
   HloComputation::Builder builder(TestName());
   HloInstruction* activations =
@@ -152,8 +152,7 @@ TEST_F(CudnnConvolutionRewriterTest,
 }
 
 // Extracted from block35 training.
-TEST_F(CudnnConvolutionRewriterTest,
-       BackwardFilterConvolveWithPaddedActivations) {
+TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedActivations) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -183,8 +182,7 @@ TEST_F(CudnnConvolutionRewriterTest,
 }
 
 // Extracted from inception v3 training.
-TEST_F(CudnnConvolutionRewriterTest,
-       BackwardFilterConvolveWithPaddedGradients) {
+TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedGradients) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -213,7 +211,7 @@ TEST_F(CudnnConvolutionRewriterTest,
                   op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0));
 }
 
-TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) {
+TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithUnevenPadding) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* activations =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -242,7 +240,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) {
                   op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0));
 }
 
-TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) {
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolveEvenPadding) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* output =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -307,7 +305,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) {
 // Convolve([abc], [x], base_dilation=2)
 //   = Convolve([abc], Reverse([x]), base_dilation=2)
 //   = BackwardInputConvolve([abc], [x], stride=2)
-TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) {
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1Filter) {
   auto builder = HloComputation::Builder(TestName());
   // NHWC dimension order.
   HloInstruction* output =
@@ -341,7 +339,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) {
 // BackwardInputConvolve([abc], [x], stride=1) is equivalent to
 // ForwardConvolve([abc], [x], stride=1). No need to fold it into backward input
 // convolution.
-TEST_F(CudnnConvolutionRewriterTest,
+TEST_F(CudnnConvRewriterTest,
        BackwardInputConvolve1x1FilterEquivalentToForwardConvolve) {
   auto builder = HloComputation::Builder(TestName());
   // NHWC dimension order.
@@ -385,8 +383,7 @@ TEST_F(CudnnConvolutionRewriterTest,
 //                     20x10x10x192
 //
 // Gradients are padded unevenly.
-TEST_F(CudnnConvolutionRewriterTest,
-       BackwardInputConvolveUnevenPaddingOnGradients) {
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnGradients) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* output =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -436,7 +433,7 @@ TEST_F(CudnnConvolutionRewriterTest,
 
 // Similar to BackwardInputConvolveUnevenPadding, but the low padding of the
 // gradients exceeds kernel_size - 1. Therefore, this pattern cannot be fused.
-TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) {
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolveLowPaddingTooLarge) {
   auto builder = HloComputation::Builder(TestName());
   HloInstruction* output =
       builder.AddInstruction(HloInstruction::CreateParameter(
@@ -488,9 +485,8 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) {
 //               padding_low=2, padding_high=1, base_dilation=2)
 //
 // We should fuse BC even though padding on activations is uneven, because
-// PadInsertion will canonicalize the fusion HLO.
-TEST_F(CudnnConvolutionRewriterTest,
-       BackwardInputConvolveUnevenPaddingOnActivations) {
+// CudnnConvPaddingLegalization will canonicalize the fusion HLO.
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnActivations) {
   auto builder = HloComputation::Builder(TestName());
   // The gradients are in NCHW layout.
   HloInstruction* output =
@@ -543,9 +539,10 @@ TEST_F(CudnnConvolutionRewriterTest,
 // BC = BackwardInput(FC) does:
 //   [4] = conv([3], reverse([2]), padding_high=2)
 //
-// We currently don't fuse BC because PadInsertion doesn't support negative
-// padding on the gradients of backward convolution (b/32744257).
-TEST_F(CudnnConvolutionRewriterTest,
+// We currently don't fuse BC because CudnnConvPaddingLegalization
+// doesn't support negative padding on the gradients of backward convolution
+// (b/32744257).
+TEST_F(CudnnConvRewriterTest,
        BackwardInputConvolveNegativePaddingHighOnActivations) {
   auto builder = HloComputation::Builder(TestName());
   // The gradients are in NCHW layout.
@@ -586,7 +583,7 @@ TEST_F(CudnnConvolutionRewriterTest,
 
 // Check that we will materialize a reversed version of a constant in order to
 // pattern-match a backwards input convolution.
-TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveConstantFilter) {
+TEST_F(CudnnConvRewriterTest, BackwardInputConvolveConstantFilter) {
   Array4D<float> constant_arr(4, 4, 2, 2);
   constant_arr.FillIota(0);
   string constant_str =
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc
similarity index 90%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc
index a809c22b33..0b4fdf7162 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
@@ -110,10 +110,10 @@ class ScratchBufAllocator : public se::ScratchAllocator {
 };
 
 template <typename T>
-Status RunCudnnConvolutionImpl(CudnnConvParams params,
-                               se::ScratchAllocator* scratch_allocator,
-                               se::Stream* stream,
-                               se::dnn::ProfileResult* profile_result) {
+Status RunCudnnConvImpl(CudnnConvParams params,
+                        se::ScratchAllocator* scratch_allocator,
+                        se::Stream* stream,
+                        se::dnn::ProfileResult* profile_result) {
   CudnnConvKind kind = params.kind;
   const Shape& input_shape = *params.input_shape;
   const Shape& filter_shape = *params.filter_shape;
@@ -380,22 +380,21 @@ StatusOr<CudnnConvParams> GetCudnnConvParams(
 
 }  // anonymous namespace
 
-Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
-                           absl::Span<se::DeviceMemoryBase> operand_buffers,
-                           se::DeviceMemoryBase result_buffer,
-                           se::DeviceMemoryBase scratch_buf, se::Stream* stream,
-                           se::dnn::ProfileResult* profile_result) {
+Status RunCudnnConv(const HloCustomCallInstruction* conv,
+                    absl::Span<se::DeviceMemoryBase> operand_buffers,
+                    se::DeviceMemoryBase result_buffer,
+                    se::DeviceMemoryBase scratch_buf, se::Stream* stream,
+                    se::dnn::ProfileResult* profile_result) {
   ScratchBufAllocator scratch_allocator(scratch_buf);
-  return RunCudnnConvolution(conv, operand_buffers, result_buffer,
-                             &scratch_allocator, stream, profile_result);
+  return RunCudnnConv(conv, operand_buffers, result_buffer, &scratch_allocator,
+                      stream, profile_result);
 }
 
-Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
-                           absl::Span<se::DeviceMemoryBase> operand_buffers,
-                           se::DeviceMemoryBase result_buffer,
-                           se::ScratchAllocator* scratch_allocator,
-                           se::Stream* stream,
-                           se::dnn::ProfileResult* profile_result) {
+Status RunCudnnConv(const HloCustomCallInstruction* conv,
+                    absl::Span<se::DeviceMemoryBase> operand_buffers,
+                    se::DeviceMemoryBase result_buffer,
+                    se::ScratchAllocator* scratch_allocator, se::Stream* stream,
+                    se::dnn::ProfileResult* profile_result) {
   TF_ASSIGN_OR_RETURN(CudnnConvParams params,
                       GetCudnnConvParams(conv, operand_buffers, result_buffer));
 
@@ -403,14 +402,14 @@ Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
       conv->shape().tuple_shapes(0).element_type();
   switch (output_primitive_type) {
     case F16:
-      return RunCudnnConvolutionImpl<Eigen::half>(params, scratch_allocator,
-                                                  stream, profile_result);
+      return RunCudnnConvImpl<Eigen::half>(params, scratch_allocator, stream,
+                                           profile_result);
     case F32:
-      return RunCudnnConvolutionImpl<float>(params, scratch_allocator, stream,
-                                            profile_result);
+      return RunCudnnConvImpl<float>(params, scratch_allocator, stream,
+                                     profile_result);
     case F64:
-      return RunCudnnConvolutionImpl<double>(params, scratch_allocator, stream,
-                                             profile_result);
+      return RunCudnnConvImpl<double>(params, scratch_allocator, stream,
+                                      profile_result);
     default:
       LOG(FATAL) << ShapeUtil::HumanString(*params.output_shape);
   }
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h
similarity index 67%
rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h
index 61aec1cecc..edbc75a94a 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
 
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -42,20 +42,19 @@ namespace gpu {
 // allocator and take note of how much memory is used.  The next time you call
 // the same conv, you can provide an explicitly preallocated scratch buffer of
 // that size, if you like.
-Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
-                           absl::Span<se::DeviceMemoryBase> operand_buffers,
-                           se::DeviceMemoryBase result_buffer,
-                           se::DeviceMemoryBase scratch_buf, se::Stream* stream,
-                           se::dnn::ProfileResult* profile_result = nullptr);
-
-Status RunCudnnConvolution(const HloCustomCallInstruction* conv,
-                           absl::Span<se::DeviceMemoryBase> operand_buffers,
-                           se::DeviceMemoryBase result_buffer,
-                           se::ScratchAllocator* scratch_allocator,
-                           se::Stream* stream,
-                           se::dnn::ProfileResult* profile_result = nullptr);
+Status RunCudnnConv(const HloCustomCallInstruction* conv,
+                    absl::Span<se::DeviceMemoryBase> operand_buffers,
+                    se::DeviceMemoryBase result_buffer,
+                    se::DeviceMemoryBase scratch_buf, se::Stream* stream,
+                    se::dnn::ProfileResult* profile_result = nullptr);
+
+Status RunCudnnConv(const HloCustomCallInstruction* conv,
+                    absl::Span<se::DeviceMemoryBase> operand_buffers,
+                    se::DeviceMemoryBase result_buffer,
+                    se::ScratchAllocator* scratch_allocator, se::Stream* stream,
+                    se::dnn::ProfileResult* profile_result = nullptr);
 
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
similarity index 98%
rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
index d508cbc2e1..8ac11bcf65 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h"
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
@@ -242,7 +242,7 @@ StatusOr<std::unique_ptr<HloInstruction>> TryRewriteToCudnnForwardRelu(
 
 }  // namespace
 
-StatusOr<bool> CudnnFusedConvolutionRewriter::Run(HloModule* module) {
+StatusOr<bool> CudnnFusedConvRewriter::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     std::vector<ConvWithRelu> matches;
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h
similarity index 77%
rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h
index bd12aadded..613ed8dbdc 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_
 
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
@@ -22,7 +22,7 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-class CudnnFusedConvolutionRewriter : public HloModulePass {
+class CudnnFusedConvRewriter : public HloModulePass {
  public:
   absl::string_view name() const override {
     return "cudnn-fused-convolution-rewriter";
@@ -34,4 +34,4 @@ class CudnnFusedConvolutionRewriter : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index a64a616ab1..f373d4a839 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -108,9 +108,9 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo);
 // memory used by cudnn.  Callers shouldn't inspect scratch_memory, as its value
 // is not well-defined.
 //
-// CudnnConvolutionRewriter lowers kConvolution HLOs to these custom calls.
+// CudnnConvRewriter lowers kConvolution HLOs to these custom calls.
 // When it does so, it chooses algorithm -1 and 0 bytes of scratch space.  Later
-// on in the pipeline, CudnnConvolutionAlgorithmChooser chooses an explicit
+// on in the pipeline, CudnnConvAlgorithmChooser chooses an explicit
 // algorithm for each conv and sets the amount of scratch space needed.
 //
 // (Representing the scratch memory as an output may seem strange at first, but
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 09486d291a..851060da6e 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -43,7 +43,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/copy_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/gpu/fft_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/for_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/gemm_thunk.h"
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 5409f65589..b394784fde 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -38,9 +38,11 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/conditional_simplifier.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
@@ -54,8 +56,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h"
 #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.h"
 #include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h"
-#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h"
-#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
@@ -201,21 +201,22 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
   {
     // Convert convolutions into CustomCalls to cudnn, then canonicalize them
-    // (PadInsertion).
+    // (CudnnConvPaddingLegalization).
     HloPassPipeline pipeline("conv_canonicalization");
     pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                               /*allow_mixed_precision=*/false);
-    pipeline.AddPass<CudnnConvolutionRewriter>();
-    pipeline.AddPass<CudnnFusedConvolutionRewriter>();
-    pipeline.AddPass<PadInsertion>();
+    pipeline.AddPass<CudnnConvRewriter>();
+    pipeline.AddPass<CudnnFusedConvRewriter>();
+    pipeline.AddPass<CudnnConvPaddingLegalization>();
     if (IsVoltaOrLater(*stream_exec)) {
-      pipeline.AddPass<PadForTensorCores>();
-      // PadForTensorCores leaves behind unnecessary tuple/get-tuple-element
+      pipeline.AddPass<CudnnConvPadForSpeed>();
+      // CudnnConvPadForSpeed leaves behind unnecessary tuple/get-tuple-element
       // pairs that TupleSimplifier fixes.
       pipeline.AddPass<TupleSimplifier>();
     }
-    // CudnnConvolutionRewriter, PadInsertion and PadForTensorCores may add
-    // instructions which can be simplified by constant folding.
+    // CudnnConvRewriter, CudnnConvPaddingLegalization and
+    // CudnnConvPadForSpeed may add instructions which can be simplified by
+    // constant folding.
     pipeline.AddPass<HloConstantFolding>();
     TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status());
   }
@@ -252,7 +253,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     // Choose the fastest algorithm for each conv.
     //
     // We pick the algorithm before fusion so we can generate better HLO. After
-    // CudnnConvolutionRewriter, our convolutions are CustomCalls which return a
+    // CudnnConvRewriter, our convolutions are CustomCalls which return a
     // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of
     // scratch:
     //
@@ -270,12 +271,12 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     // The new tuple and gte instructions then be simplified away, because
     // nobody is expected to use the scratch value.
     //
-    // However, if we were to run CudnnConvolutionAlgorithmPicker after fusion
+    // However, if we were to run CudnnConvAlgorithmPicker after fusion
     // the gte(customcall, 0) would probably already be into a fusion node.  We
     // can't simplify across HloComputation boundaries, so in this case we
     // wouldn't be able to simplify away the new_tuple bits.
-    pipeline.AddPass<CudnnConvolutionAlgorithmPicker>(
-        stream_exec, device_allocator, compiler);
+    pipeline.AddPass<CudnnConvAlgorithmPicker>(stream_exec, device_allocator,
+                                               compiler);
     // Clean up new_tuple described above.
     pipeline.AddPass<TupleSimplifier>();
 
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 1f0436278c..d22ffc1754 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -211,8 +211,8 @@ tf_cc_test(
 )
 
 tf_cc_test(
-    name = "cudnn_fused_convolution_rewriter_test",
-    srcs = ["cudnn_fused_convolution_rewriter_test.cc"],
+    name = "cudnn_fused_conv_rewriter_test",
+    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
     tags = tf_cuda_tests_tags(),
     deps = [
         ":gpu_codegen_test",
diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
similarity index 93%
rename from tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc
rename to tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
index 5632cac186..8bdb4c8080 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
@@ -22,7 +22,7 @@ namespace xla {
 namespace gpu {
 namespace {
 
-class CudnnFusedConvolutionRewriterTest : public HloTestBase {
+class CudnnFusedConvRewriterTest : public HloTestBase {
  protected:
   string GetOptimizedHlo(absl::string_view hlo_string) {
     return backend()
@@ -66,7 +66,7 @@ class CudnnFusedConvolutionRewriterTest : public HloTestBase {
   }
 };
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) {
+TEST_F(CudnnFusedConvRewriterTest, TestConvOnly) {
   // max(0, conv(x, w));
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -83,7 +83,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) {
+TEST_F(CudnnFusedConvRewriterTest, TestBias) {
   // max(0, conv(x, w) + bias);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -103,7 +103,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) {
+TEST_F(CudnnFusedConvRewriterTest, TestSideInputOnly) {
   // max(0, conv(x, w) + side_input);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -122,7 +122,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) {
+TEST_F(CudnnFusedConvRewriterTest, TestBiasAndSideInput) {
   // max(0, conv(x, w) + side_input + bias);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -144,7 +144,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) {
+TEST_F(CudnnFusedConvRewriterTest, TestScaledConv) {
   // max(0, 0.999994934 * conv(x, w));
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -164,7 +164,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) {
+TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndSideInput) {
   // max(0, conv(x, w) + 0.899994934 * side_input);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -186,7 +186,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) {
+TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInput) {
   // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -211,8 +211,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest,
-       TestScaledConvAndScaledSideInputWithBias) {
+TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInputWithBias) {
   // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input + bias);
   TestMatchWithAllTypes(R"(
     HloModule Test
@@ -240,7 +239,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest,
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) {
+TEST_F(CudnnFusedConvRewriterTest, TestMatchMaxZeroOnly) {
   // max(0.1, conv(x, w)) shouldn't match.
   TestNotMatchWithAllTypes(R"(
     HloModule Test
@@ -257,7 +256,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) {
     })");
 }
 
-TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchBroadcastedBiasOnly) {
+TEST_F(CudnnFusedConvRewriterTest, TestMatchBroadcastedBiasOnly) {
   // max(0, conv(x, w) + side_input1 + side_input2) shouldn't match.
   TestNotMatchWithAllTypes(R"(
     HloModule Test
-- 
GitLab


From 3abfe2cd9befa263de57edfae7d4c0d29c9c9182 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 17:07:19 -0700
Subject: [PATCH 0744/1085] Allow the XRTCompile op to return the ProgramShape
 resulted form the XLA compilation.

PiperOrigin-RevId: 216619617
---
 .../xla/service/compile_only_service.cc       |   2 +
 .../compiler/xrt/kernels/xrt_compile_ops.cc   |  19 +++-
 .../compiler/xrt/kernels/xrt_execute_op.cc    |   8 --
 .../compiler/xrt/ops/xrt_compile_ops.cc       |   7 +-
 tensorflow/compiler/xrt/tests/BUILD           |  13 ++-
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +++++++++++++++++-
 6 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index 96bd2616f5..bd5045b9b9 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -89,6 +89,8 @@ CompileOnlyService::CompileAheadOfTime(
     const auto& program_shape = instance.computation.program_shape();
     ExecutionOptions execution_options;
     *execution_options.mutable_debug_options() = debug_options;
+    *execution_options.mutable_shape_with_output_layout() =
+        *instance.result_layout;
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<HloModuleConfig> module_config,
         CreateModuleConfig(program_shape, instance.argument_layouts,
diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
index 1d4f8d97f2..1ab836a496 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
@@ -166,10 +166,21 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) {
                  VLOG(1) << "Compiling XLA executable";
                  return Compile(ctx, computation_proto, program);
                }));
-
-  Tensor output(DT_INT64, TensorShape({}));
-  output.scalar<int64>()() = uid;
-  ctx->set_output(0, output);
+  std::unique_ptr<XRTCompilationCacheEntryRef> entry;
+  OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry));
+
+  Tensor handle_output(DT_INT64, TensorShape({}));
+  handle_output.scalar<int64>()() = uid;
+  ctx->set_output(0, handle_output);
+
+  xla::LocalExecutable* executable = entry->get().get_executable();
+  xla::ProgramShape program_shape = executable->executable()
+                                        ->module()
+                                        .entry_computation()
+                                        ->ComputeProgramShape();
+  Tensor program_shape_output(DT_STRING, TensorShape({1}));
+  program_shape_output.vec<string>()(0) = program_shape.SerializeAsString();
+  ctx->set_output(1, program_shape_output);
 }
 
 XRTCompileOp::~XRTCompileOp() = default;
diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
index 257b054f16..3a1e03280a 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
@@ -64,14 +64,6 @@ uint32 GetXLARandomSeed() {
   return counter.fetch_add(2);
 }
 
-// Looks up the input `key` in the compilation cache.
-Status GetComputationCacheEntry(
-    XRTCompilationCache* cache, int64 key,
-    std::unique_ptr<XRTCompilationCacheEntryRef>* entry) {
-  TF_RETURN_IF_ERROR(cache->Lookup(key, entry));
-  return Status::OK();
-}
-
 // Populates `inputs` with the input tensors to the computation.
 Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm,
                             bool release_inputs,
diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
index 5cfc8711f9..7b3b50c695 100644
--- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
@@ -23,7 +23,12 @@ namespace tensorflow {
 REGISTER_OP("XRTCompile")
     .Input("computation: string")
     .Output("handle: int64")
-    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
+    .Output("program_shape: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      c->set_output(1, c->UnknownShapeOfRank(1));
+      return Status::OK();
+    })
     .Doc(
         R"(
 Reads a computation proto, compiles it, and places it in the global compilation
diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD
index b6dcfc4eb9..be44a3474a 100644
--- a/tensorflow/compiler/xrt/tests/BUILD
+++ b/tensorflow/compiler/xrt/tests/BUILD
@@ -29,8 +29,11 @@ cc_library(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xrt:xrt_proto",
         "//tensorflow/compiler/xrt:xrt_server",
         "//tensorflow/compiler/xrt/cc:xrt_ops",
@@ -49,7 +52,10 @@ tf_cc_test(
     name = "raw_api_test_cpu",
     size = "medium",
     srcs = [],
-    args = ["--xla_test_device=XLA_CPU"],
+    args = [
+        "--xla_test_device=XLA_CPU",
+        "--xla_platform=CPU",
+    ],
     deps = [
         ":raw_api_test_lib",
         "//tensorflow/compiler/jit:xla_cpu_device",
@@ -60,7 +66,10 @@ tf_cuda_cc_test(
     name = "raw_api_test_gpu",
     size = "medium",
     srcs = [],
-    args = ["--xla_test_device=XLA_GPU"],
+    args = [
+        "--xla_test_device=XLA_GPU",
+        "--xla_platform=GPU",
+    ],
     tags = tf_cuda_tests_tags(),
     deps = [
         ":raw_api_test_lib",
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 9fc01e6304..ee6734020d 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -22,10 +22,13 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h"
@@ -43,6 +46,7 @@ namespace tensorflow {
 namespace {
 
 string* xla_test_device_ptr;  // initial value set in main()
+string* xla_platform_ptr;     // initial value set in main()
 
 string DeviceFromFlag() {
   string xla_test_device = *xla_test_device_ptr;
@@ -145,6 +149,28 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation,
   *dst = *snapshot;
 }
 
+xla::ProgramShape XlaCompiledProgramShape(
+    const xla::XlaComputation& computation,
+    const xla::ProgramShape& input_program_shape) {
+  se::Platform* platform =
+      xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie();
+  xla::LocalClient* client =
+      xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie();
+  xla::ExecutableBuildOptions exec_options;
+  exec_options.set_result_layout(input_program_shape.result());
+  std::vector<const xla::Shape*> parameters_shapes;
+  for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) {
+    parameters_shapes.push_back(&input_program_shape.parameters(i));
+  }
+  auto local_executable =
+      client->Compile(computation, parameters_shapes, exec_options)
+          .ValueOrDie();
+  return local_executable->executable()
+      ->module()
+      .entry_computation()
+      ->ComputeProgramShape();
+}
+
 TEST(RawApiTest, ReadAndWriteState) {
   xrt::XLAAllocation alloc;
   alloc.set_device_ordinal(0);
@@ -338,20 +364,87 @@ TEST(RawApiTest, CompileAndExecute) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({read_back}, &outputs));
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
 
   xla::LiteralProto response;
   EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
 
   auto expected = xla::LiteralUtil::CreateR1<float>({27.0f, 21.0f});
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+
+  xla::ProgramShape program_shape;
+  EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec<string>()(0)));
+  EXPECT_EQ(program_shape.parameters_size(), 2);
+}
+
+TEST(RawApiTest, CompileWithXlaReturnShapes) {
+  xla::XlaBuilder builder("XrtXlaShapes");
+  auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128});
+  auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5});
+  // Clear layouts to signal XLA we are ready to get whatever are coming out of
+  // the compilation process.
+  xla::LayoutUtil::ClearLayout(&input_shape);
+  xla::LayoutUtil::ClearLayout(&kernel_shape);
+  auto param_shape =
+      xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape});
+  auto param = xla::Parameter(&builder, 0, param_shape, "param");
+  auto input = xla::GetTupleElement(param, 0);
+  auto kernel = xla::GetTupleElement(param, 1);
+  xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame);
+  TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build());
+
+  auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result();
+  // Clear the result shape layout to tell XLA we are accepting whatever are
+  // coming out of the compilation process.
+  xla::LayoutUtil::ClearLayout(&result_shape);
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() = param_shape;
+  *shapes->mutable_result() = result_shape;
+  StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot());
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(),
+                           {c_handle.program_shape}, {release}, &outputs));
+
+  xla::ProgramShape program_shape;
+  EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec<string>()(0)));
+  EXPECT_EQ(program_shape.parameters_size(), 1);
+
+  VLOG(2) << "Param: "
+          << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0));
+  VLOG(2) << "Result: "
+          << xla::ShapeUtil::HumanStringWithLayout(program_shape.result());
+
+  xla::ProgramShape xla_program_shape =
+      XlaCompiledProgramShape(xla_computation, *shapes);
+  EXPECT_TRUE(xla::LayoutUtil::Equal(
+      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(),
+      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0})
+          .layout()));
+  EXPECT_TRUE(xla::LayoutUtil::Equal(
+      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(),
+      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1})
+          .layout()));
+  EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(),
+                                     xla_program_shape.result().layout()));
 }
 
 TEST(RawApiTest, CompileAndExecuteZeroArg) {
@@ -371,7 +464,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) {
   auto computation =
       ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
   auto c_handle = ops::XRTCompile(root, computation);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 std::initializer_list<Input>({}));
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -420,7 +513,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -455,7 +548,7 @@ TEST(RawApiTest, LeakCompilationReference) {
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({c_handle}, &outputs));
+  TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs));
 }
 
 }  // namespace
@@ -464,9 +557,12 @@ TEST(RawApiTest, LeakCompilationReference) {
 
 int main(int argc, char** argv) {
   tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU");
+  tensorflow::xla_platform_ptr = new tensorflow::string("CPU");
   std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr,
                        "Tensorflow device type to use for test, e.g., XLA_CPU"),
+      tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr,
+                       "The XLA platform to select for the device"),
   };
   tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
   const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-- 
GitLab


From 331683cb22246d116778c850fcbf1cc2cc74c9ce Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 10 Oct 2018 17:25:25 -0700
Subject: [PATCH 0745/1085] Include <cmath> in cost_estimator.h

INFINITY define requires the cmath header.

PiperOrigin-RevId: 216621867
---
 tensorflow/core/grappler/costs/cost_estimator.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index 811e923b87..e3b3a36b09 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_COSTS_COST_ESTIMATOR_H_
 
 #include <chrono>
+#include <cmath>
 #include <unordered_map>
 #include "tensorflow/core/lib/core/status.h"
 
-- 
GitLab


From 2b010f2e48c4da512a87ab568600c8befe7147a9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 17:30:27 -0700
Subject: [PATCH 0746/1085] Adds a simple util to build a GrapplerItem from a
 MetaGraphDef stored in a file.

PiperOrigin-RevId: 216622520
---
 .../core/grappler/grappler_item_builder.cc    |  9 ++++
 .../core/grappler/grappler_item_builder.h     |  6 +++
 tensorflow/core/grappler/inputs/utils.cc      | 16 ++++--
 tensorflow/core/grappler/inputs/utils.h       | 10 +++-
 tensorflow/core/grappler/inputs/utils_test.cc | 49 +++++++++++++++++++
 5 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 369046666d..24245a41c3 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -630,5 +630,14 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
   return new_item;
 }
 
+std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDefFile(
+    const string& id, const string& meta_graph_file, const ItemConfig& cfg) {
+  MetaGraphDef meta_graph;
+  if (!ReadMetaGraphDefFromFile(meta_graph_file, &meta_graph).ok()) {
+    return nullptr;
+  }
+  return GrapplerItemFromMetaGraphDef(id, meta_graph, cfg);
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h
index 1698587f8c..7102cf94c6 100644
--- a/tensorflow/core/grappler/grappler_item_builder.h
+++ b/tensorflow/core/grappler/grappler_item_builder.h
@@ -58,6 +58,12 @@ struct ItemConfig {
 std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
     const string& id, const MetaGraphDef& meta_graph, const ItemConfig& cfg);
 
+// Factory method for creating a GrapplerItem from a file
+// containing a MetaGraphDef in either binary or text format.
+// Returns nullptr if the given meta_graph cannot be converted.
+std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDefFile(
+    const string& id, const string& meta_graph_file, const ItemConfig& cfg);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/inputs/utils.cc b/tensorflow/core/grappler/inputs/utils.cc
index def9198a69..03f59701ce 100644
--- a/tensorflow/core/grappler/inputs/utils.cc
+++ b/tensorflow/core/grappler/inputs/utils.cc
@@ -35,11 +35,19 @@ bool FileExists(const string& file, Status* status) {
   return status->ok();
 }
 
-Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
-                            GraphDef* result) {
+Status ReadGraphDefFromFile(const string& graph_def_path, GraphDef* result) {
   Status status;
-  if (FileExists(graph_def_pbtxt_path, &status)) {
-    return ReadTextProto(Env::Default(), graph_def_pbtxt_path, result);
+  if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) {
+    return ReadTextProto(Env::Default(), graph_def_path, result);
+  }
+  return status;
+}
+
+Status ReadMetaGraphDefFromFile(const string& graph_def_path,
+                                MetaGraphDef* result) {
+  Status status;
+  if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) {
+    return ReadTextProto(Env::Default(), graph_def_path, result);
   }
   return status;
 }
diff --git a/tensorflow/core/grappler/inputs/utils.h b/tensorflow/core/grappler/inputs/utils.h
index 4b9cb0a9ad..2588e380fe 100644
--- a/tensorflow/core/grappler/inputs/utils.h
+++ b/tensorflow/core/grappler/inputs/utils.h
@@ -20,7 +20,9 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -31,8 +33,12 @@ bool FilesExist(const std::set<string>& files);
 
 bool FileExists(const string& file, Status* status);
 
-Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
-                            GraphDef* result);
+// Reads GraphDef from file in either text or raw serialized format.
+Status ReadGraphDefFromFile(const string& graph_def_path, GraphDef* result);
+
+// Reads MetaGraphDef from file in either text or raw serialized format.
+Status ReadMetaGraphDefFromFile(const string& meta_graph_def_path,
+                                MetaGraphDef* result);
 
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/inputs/utils_test.cc b/tensorflow/core/grappler/inputs/utils_test.cc
index 694a855280..c8af2aa738 100644
--- a/tensorflow/core/grappler/inputs/utils_test.cc
+++ b/tensorflow/core/grappler/inputs/utils_test.cc
@@ -31,6 +31,25 @@ class UtilsTest : public ::testing::Test {
     non_existent_file_ = io::JoinPath(BaseDir(), "non_existent_file.txt");
     actual_file_ = io::JoinPath(BaseDir(), "test_file.txt");
     TF_CHECK_OK(WriteStringToFile(env_, actual_file_, "Some test data"));
+
+    text_graph_def_file_ = io::JoinPath(BaseDir(), "text_graph_def_file.txt");
+    binary_graph_def_file_ =
+        io::JoinPath(BaseDir(), "binary_graph_def_file.txt");
+    text_meta_graph_def_file_ =
+        io::JoinPath(BaseDir(), "text_meta_graph_def_file.txt");
+    binary_meta_graph_def_file_ =
+        io::JoinPath(BaseDir(), "binary_meta_graph_def_file.txt");
+
+    auto node = graph_def_.add_node();
+    node->set_name("foo");
+    node->set_op("bar");
+    TF_CHECK_OK(WriteTextProto(env_, text_graph_def_file_, graph_def_));
+    TF_CHECK_OK(WriteBinaryProto(env_, binary_graph_def_file_, graph_def_));
+    *meta_graph_def_.mutable_graph_def() = graph_def_;
+    TF_CHECK_OK(
+        WriteTextProto(env_, text_meta_graph_def_file_, meta_graph_def_));
+    TF_CHECK_OK(
+        WriteBinaryProto(env_, binary_meta_graph_def_file_, meta_graph_def_));
   }
 
   void TearDown() override {
@@ -39,8 +58,14 @@ class UtilsTest : public ::testing::Test {
         env_->DeleteRecursively(BaseDir(), &undeleted_files, &undeleted_dirs));
   }
 
+  GraphDef graph_def_;
+  MetaGraphDef meta_graph_def_;
   string non_existent_file_;
   string actual_file_;
+  string text_graph_def_file_;
+  string binary_graph_def_file_;
+  string text_meta_graph_def_file_;
+  string binary_meta_graph_def_file_;
   Env* env_ = Env::Default();
 };
 
@@ -58,6 +83,30 @@ TEST_F(UtilsTest, FilesExist) {
   EXPECT_TRUE(status[1].ok());
 }
 
+TEST_F(UtilsTest, ReadGraphDefFromFile_Text) {
+  GraphDef result;
+  TF_CHECK_OK(ReadGraphDefFromFile(text_graph_def_file_, &result));
+  EXPECT_EQ(result.DebugString(), graph_def_.DebugString());
+}
+
+TEST_F(UtilsTest, ReadGraphDefFromFile_Binary) {
+  GraphDef result;
+  TF_CHECK_OK(ReadGraphDefFromFile(binary_graph_def_file_, &result));
+  EXPECT_EQ(result.DebugString(), graph_def_.DebugString());
+}
+
+TEST_F(UtilsTest, ReadMetaGraphDefFromFile_Text) {
+  MetaGraphDef result;
+  TF_CHECK_OK(ReadMetaGraphDefFromFile(text_meta_graph_def_file_, &result));
+  EXPECT_EQ(result.DebugString(), meta_graph_def_.DebugString());
+}
+
+TEST_F(UtilsTest, ReadReadMetaGraphDefFromFile_Binary) {
+  MetaGraphDef result;
+  TF_CHECK_OK(ReadMetaGraphDefFromFile(binary_meta_graph_def_file_, &result));
+  EXPECT_EQ(result.DebugString(), meta_graph_def_.DebugString());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 0dab0f78293e3c11e7428083ec0afe94e3ba7439 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 10 Oct 2018 17:39:16 -0700
Subject: [PATCH 0747/1085] Changed Adam algorithm variant formula from
 sqrt(max(v, epsilon**2)) to sqrt(v + epsilon**2) and changed flag name
 accordingly. (#22876)

PiperOrigin-RevId: 216240045
---
 tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
index 8529b48c15..c2e3be03db 100644
--- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto
+++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto
@@ -62,9 +62,9 @@ message FtrlParameters {
 // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If
 // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in
 // order to get correct results; a warning will be printed otherwise (which may
-// change to an error in the future). If use_max_with_epsilon is set, the Adam
+// change to an error in the future). If use_sum_inside_sqrt is set, the Adam
 // variable update formula will be changed from m / (sqrt(v) + epsilon) to
-// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU
+// m / sqrt(v + epsilon**2); this option improves the performance of TPU
 // training and is not expected to harm model quality.
 message AdamParameters {
   float beta1 = 3;
@@ -73,7 +73,7 @@ message AdamParameters {
   float initial_m = 6;
   float initial_v = 7;
   bool use_non_lazy_adam = 8;
-  bool use_max_with_epsilon = 9;
+  bool use_sum_inside_sqrt = 10;
 }
 
 // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
-- 
GitLab


From e5537748753491f803fbddebdcb1cdb710631db9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 17:35:46 -0700
Subject: [PATCH 0748/1085] Batched per_image_standardization

PiperOrigin-RevId: 216623201
---
 tensorflow/python/ops/image_ops_impl.py | 14 +++++++++-----
 tensorflow/python/ops/image_ops_test.py | 10 ++++++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 1c75aab578..a5c800ed9f 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1184,7 +1184,8 @@ def per_image_standardization(image):
   away from zero to protect against division by 0 when handling uniform images.
 
   Args:
-    image: 3-D tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or
+           3-D Tensor of shape `[height, width, channels]`.
 
   Returns:
     The standardized image with same shape as `image`.
@@ -1194,14 +1195,17 @@ def per_image_standardization(image):
   """
   with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
     image = ops.convert_to_tensor(image, name='image')
-    image = _Assert3DImage(image)
-    num_pixels = math_ops.reduce_prod(array_ops.shape(image))
+    image = _AssertAtLeast3DImage(image)
+    if image.get_shape().ndims != 3 and image.get_shape().ndims != 4:
+      raise ValueError('`image` must have either 3 or 4 dimensions.')
+    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-1:-4:-1])
 
     image = math_ops.cast(image, dtype=dtypes.float32)
-    image_mean = math_ops.reduce_mean(image)
+    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
 
     variance = (
-        math_ops.reduce_mean(math_ops.square(image)) -
+        math_ops.reduce_mean(
+            math_ops.square(image), axis=[-1, -2, -3], keepdims=True) -
         math_ops.square(image_mean))
     variance = gen_nn_ops.relu(variance)
     stddev = math_ops.sqrt(variance)
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index ff86df6346..9ed3b4ff5d 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -1491,6 +1491,16 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase):
       whiten_np = whiten.eval()
       self.assertFalse(np.any(np.isnan(whiten_np)))
 
+  def testBatchWhitening(self):
+    imgs_np = np.random.uniform(0., 255., [4, 24, 24, 3])
+    whiten_np = [self._NumpyPerImageWhitening(img) for img in imgs_np]
+    with self.test_session(use_gpu=True):
+      imgs = constant_op.constant(imgs_np)
+      whiten = image_ops.per_image_standardization(imgs)
+      whiten_tf = whiten.eval()
+      for w_tf, w_np in zip(whiten_tf, whiten_np):
+        self.assertAllClose(w_tf, w_np, atol=1e-4)
+
 
 class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From ed68320998bb13d7802b8cd1a2d02cf80a089052 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 10 Oct 2018 17:41:22 -0700
Subject: [PATCH 0749/1085] [XLA:GPU] Rework CudnnConvPadForSpeed.

 - Make the channel-dims-equal-to-3 heuristic more specific.  The relevant
   cudnn optimization only occurs with input-channels equal to 3 and
   output-channels equal to 32 or 64.

 - Simplify the logic that decided which shapes' dimensions to pad by using a
   clever (if I do say so myself) trick. Really all we need is a new layer of
   indirection which allows us to say "modify the input shape's feature dim"
   and have that be reflected in the lhs shape's feature dim.

 - Rename it to CudnnConvPadForTensorCores.  I initially thought that the
   3-input-channels optimization applied to Pascal, but experimentally this
   seems to be incorrect.

PiperOrigin-RevId: 216623854
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  14 +-
 ....cc => cudnn_conv_pad_for_tensor_cores.cc} | 248 ++++++++----------
 ...ed.h => cudnn_conv_pad_for_tensor_cores.h} |  25 +-
 ...> cudnn_conv_pad_for_tensor_cores_test.cc} |  52 +++-
 .../xla/service/gpu/nvptx_compiler.cc         |  12 +-
 5 files changed, 176 insertions(+), 175 deletions(-)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed.cc => cudnn_conv_pad_for_tensor_cores.cc} (55%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed.h => cudnn_conv_pad_for_tensor_cores.h} (58%)
 rename tensorflow/compiler/xla/service/gpu/{cudnn_conv_pad_for_speed_test.cc => cudnn_conv_pad_for_tensor_cores_test.cc} (75%)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index ea285994be..4eb5739fe2 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -600,9 +600,9 @@ cc_library(
 )
 
 cc_library(
-    name = "cudnn_conv_pad_for_speed",
-    srcs = ["cudnn_conv_pad_for_speed.cc"],
-    hdrs = ["cudnn_conv_pad_for_speed.h"],
+    name = "cudnn_conv_pad_for_tensor_cores",
+    srcs = ["cudnn_conv_pad_for_tensor_cores.cc"],
+    hdrs = ["cudnn_conv_pad_for_tensor_cores.h"],
     deps = [
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:literal_util",
@@ -614,10 +614,10 @@ cc_library(
 )
 
 tf_cc_test(
-    name = "cudnn_conv_pad_for_speed_test",
-    srcs = ["cudnn_conv_pad_for_speed_test.cc"],
+    name = "cudnn_conv_pad_for_tensor_cores_test",
+    srcs = ["cudnn_conv_pad_for_tensor_cores_test.cc"],
     deps = [
-        ":cudnn_conv_pad_for_speed",
+        ":cudnn_conv_pad_for_tensor_cores",
         ":ir_emission_utils",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:util",
@@ -661,7 +661,7 @@ cc_library(
     hdrs = ["nvptx_compiler.h"],
     deps = [
         ":cudnn_conv_algorithm_picker",
-        ":cudnn_conv_pad_for_speed",
+        ":cudnn_conv_pad_for_tensor_cores",
         ":cudnn_conv_padding_legalization",
         ":cudnn_conv_rewriter",
         ":cudnn_fused_conv_rewriter",
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc
similarity index 55%
rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc
index 24b1f1af27..5aa4f839f4 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h"
 
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -24,50 +24,17 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-// We want the input/output feature counts of an f16 conv to be factors of 8,
-// because without this cudnn can't use tensor cores on the conv.
-static constexpr int64 kDesiredNumFeaturesFactor = 8;
-
 // We won't pad a conv if doing so increases the total number of bytes in the
 // lhs, rhs, or result by more than this amount.
 //
 // TODO(jlebar): This number was tuned experimentally.  It represents a
 // compromise on our current benchmarks; it speeds some up significantly, and
 // doesn't slow any down.  But we can observe by changing this value that
-// there's additional room for speedups.  Achieving those speedups without also
-// slowing other things down will likely require a more sophisticated heuristic,
-// possibly some form of auto-tuning.
-//
-// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4"
-// special case inside PadShape won't fire.
+// there's additional room for speedups.  Achieving those speedups without
+// also slowing other things down will likely require a more sophisticated
+// heuristic, possibly some form of auto-tuning.
 static constexpr double kMaxBytesTouchedIncrease = 1.35;
 
-// Pads the given dimensions in the given shape up to a multiple of
-// kDesiredNumFeaturesFactor.
-static Shape PadShape(Shape s, absl::Span<const int64> dims) {
-  for (int64 dim : dims) {
-    int64 dim_to_pad_size = s.dimensions(dim);
-
-    // Round dim_to_pad_size up to the next multiple of
-    // kDesiredNumFeaturesFactor.
-    //
-    // Special case: dims of size 3 are rounded up to 4, not
-    // kDesiredNumFeaturesFactor.  Empirically (and on the advice of nvidia),
-    // this helps, but as of writing, it's not supported by anything in the
-    // cudnn docs.
-    int64 new_dim_to_pad_size;
-    if (dim_to_pad_size == 3) {
-      new_dim_to_pad_size = 4;
-    } else {
-      new_dim_to_pad_size =
-          RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor);
-    }
-
-    s.set_dimensions(dim, new_dim_to_pad_size);
-  }
-  return s;
-}
-
 // Creates and returns an HLO that zero-pads one or more dimensions in the given
 // instruction so that its shape is equal to the given shape.
 //
@@ -103,100 +70,19 @@ static HloInstruction* PadInstruction(HloInstruction* instr,
       HloInstruction::CreatePad(new_shape, instr, zero, pad_config));
 }
 
-// Pads the input/output feature dimensions of the given cudnn convolution
-// custom-call to be multiples of kDesiredNumFeaturesFactor.
-static StatusOr<bool> PadFeaturesDims(HloCustomCallInstruction* conv) {
+// Modifies the given convolution to have the given LHS/RHS/result shapes.
+static Status PadConv(HloCustomCallInstruction* conv,
+                      const Shape& new_lhs_shape, const Shape& new_rhs_shape,
+                      const Shape& new_result_shape) {
   CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0))
       << "conv must use 0 scratch bytes, i.e. this pass must be run "
          "before CudnnConvAlgorithmPicker.";
 
-  TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
-  const auto& dnums = conv->convolution_dimension_numbers();
   auto* lhs = conv->mutable_operand(0);
   auto* rhs = conv->mutable_operand(1);
-  const Shape& result_shape = conv->shape().tuple_shapes(0);
-
-  Shape new_lhs_shape = [&] {
-    switch (kind) {
-      case CudnnConvKind::kForward:
-      case CudnnConvKind::kBackwardFilter:
-        // LHS is "input".
-        return PadShape(lhs->shape(), {dnums.input_feature_dimension()});
-      case CudnnConvKind::kBackwardInput:
-        // LHS is "output".
-        return PadShape(lhs->shape(), {dnums.output_feature_dimension()});
-      case CudnnConvKind::kForwardActivation:
-        LOG(FATAL) << "Not yet implemented.";
-    }
-  }();
-
-  Shape new_rhs_shape = [&] {
-    switch (kind) {
-      case CudnnConvKind::kForward:
-      case CudnnConvKind::kBackwardInput:
-        // RHS is "filter".
-        return PadShape(rhs->shape(),
-                        {dnums.kernel_input_feature_dimension(),
-                         dnums.kernel_output_feature_dimension()});
-      case CudnnConvKind::kBackwardFilter:
-        // RHS is "output".
-        return PadShape(rhs->shape(), {dnums.output_feature_dimension()});
-      case CudnnConvKind::kForwardActivation:
-        LOG(FATAL) << "Not yet implemented.";
-    }
-  }();
-
-  if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) &&
-      ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) {
-    VLOG(3) << "No need to pad features of " << conv->ToString();
-    return false;
-  }
-
-  Shape new_result_shape = [&] {
-    switch (kind) {
-      case CudnnConvKind::kForward:
-        // Result is "output".
-        return PadShape(result_shape, {dnums.output_feature_dimension()});
-      case CudnnConvKind::kBackwardInput:
-        // Result is "input".
-        return PadShape(result_shape, {dnums.input_feature_dimension()});
-      case CudnnConvKind::kBackwardFilter:
-        // Result is "filter".
-        return PadShape(result_shape,
-                        {dnums.kernel_input_feature_dimension(),
-                         dnums.kernel_output_feature_dimension()});
-      case CudnnConvKind::kForwardActivation:
-        LOG(FATAL) << "Not yet implemented.";
-    }
-  }();
-
-  // Check that padding wouldn't increase the total bytes read/written by this
-  // operation too much.
-  auto check_size_increase = [&](const Shape& old_shape,
-                                 const Shape& new_shape) {
-    int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape);
-    int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape);
-    if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) {
-      return true;
-    }
-    VLOG(3) << "Not padding convolution; doing so would change input / result "
-               "shape from "
-            << ShapeUtil::HumanString(old_shape) << " to "
-            << ShapeUtil::HumanString(new_shape) << ", a size increase of "
-            << new_bytes / static_cast<double>(old_bytes) << "x > "
-            << kMaxBytesTouchedIncrease << "x: " << conv->ToString();
-    return false;
-  };
-  if (!check_size_increase(lhs->shape(), new_lhs_shape) ||
-      !check_size_increase(rhs->shape(), new_rhs_shape) ||
-      !check_size_increase(result_shape, new_result_shape)) {
-    return false;
-  }
-
-  // OK, let's do the transformation!
-
   auto* new_lhs = PadInstruction(lhs, new_lhs_shape);
   auto* new_rhs = PadInstruction(rhs, new_rhs_shape);
+  const Shape& result_shape = conv->shape().tuple_shapes(0);
   CHECK(new_lhs != lhs || new_rhs != rhs)
       << "We should have had to pad either LHS or RHS.";
 
@@ -229,7 +115,105 @@ static StatusOr<bool> PadFeaturesDims(HloCustomCallInstruction* conv) {
 
   VLOG(2) << "Padded features of " << conv->ToString() << ", replaced with "
           << new_conv->ToString();
-  TF_RETURN_IF_ERROR(conv->parent()->ReplaceInstruction(conv, new_conv));
+  return conv->parent()->ReplaceInstruction(conv, new_conv);
+}
+
+static StatusOr<bool> PadForTensorCores(HloCustomCallInstruction* conv) {
+  TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv));
+  const auto& dnums = conv->convolution_dimension_numbers();
+  auto* lhs = conv->mutable_operand(0);
+  auto* rhs = conv->mutable_operand(1);
+  const Shape& result_shape = conv->shape().tuple_shapes(0);
+
+  // Nothing to do on non-f16 convolutions.
+  if (result_shape.element_type() != PrimitiveType::F16) {
+    return false;
+  }
+
+  // TODO(timshen): Don't skip forward-activation convs if we find a benchmark
+  // where there's a speedup.
+  if (kind == CudnnConvKind::kForwardActivation) {
+    return false;
+  }
+
+  Shape new_lhs_shape = lhs->shape();
+  Shape new_rhs_shape = rhs->shape();
+  Shape new_result_shape = conv->shape().tuple_shapes(0);
+
+  // new_{input,filter_output}_shape points to the appropriate one of
+  // new_{lhs,rhs,result}_shape.
+  Shape* new_input_shape;
+  Shape* new_filter_shape;
+  Shape* new_output_shape;
+  std::tie(new_input_shape, new_filter_shape, new_output_shape) = [&] {
+    switch (kind) {
+      case CudnnConvKind::kForward:
+      case CudnnConvKind::kForwardActivation:
+        return std::make_tuple(&new_lhs_shape, &new_rhs_shape,
+                               &new_result_shape);
+      case CudnnConvKind::kBackwardInput:
+        return std::make_tuple(&new_result_shape, &new_rhs_shape,
+                               &new_lhs_shape);
+      case CudnnConvKind::kBackwardFilter:
+        return std::make_tuple(&new_lhs_shape, &new_result_shape,
+                               &new_rhs_shape);
+    }
+  }();
+
+  // If there are 3 input features and 32 or 64 output features, pad the input
+  // features to 4.  Otherwise, try padding to multiples of 8 and check that
+  // this doesn't make any of the conv buffers too much larger.
+  auto input_features =
+      new_input_shape->dimensions(dnums.input_feature_dimension());
+  auto output_features =
+      new_output_shape->dimensions(dnums.output_feature_dimension());
+  if (input_features == 3 && (output_features == 32 || output_features == 64)) {
+    new_input_shape->set_dimensions(dnums.input_feature_dimension(), 4);
+    new_filter_shape->set_dimensions(dnums.kernel_input_feature_dimension(), 4);
+  } else {
+    auto pad_dim = [](Shape* s, int64 dim) {
+      s->set_dimensions(dim, RoundUpToNearest<int64>(s->dimensions(dim), 8));
+    };
+    pad_dim(new_input_shape, dnums.input_feature_dimension());
+    pad_dim(new_filter_shape, dnums.kernel_input_feature_dimension());
+    pad_dim(new_filter_shape, dnums.kernel_output_feature_dimension());
+    pad_dim(new_output_shape, dnums.output_feature_dimension());
+
+    // Check that padding wouldn't increase the total bytes read/written by this
+    // operation too much.
+    auto check_size_increase = [&](const Shape& old_shape,
+                                   const Shape& new_shape) {
+      int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape);
+      int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape);
+      if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) {
+        return true;
+      }
+      VLOG(3)
+          << "Not padding convolution; doing so would change input / result "
+             "shape from "
+          << ShapeUtil::HumanString(old_shape) << " to "
+          << ShapeUtil::HumanString(new_shape) << ", a size increase of "
+          << new_bytes / static_cast<double>(old_bytes) << "x > "
+          << kMaxBytesTouchedIncrease << "x: " << conv->ToString();
+      return false;
+    };
+
+    if (!check_size_increase(lhs->shape(), new_lhs_shape) ||
+        !check_size_increase(rhs->shape(), new_rhs_shape) ||
+        !check_size_increase(result_shape, new_result_shape)) {
+      return false;
+    }
+  }
+
+  if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) &&
+      ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) {
+    VLOG(3) << "No need to pad features of " << conv->ToString();
+    return false;
+  }
+
+  // OK, let's do the transformation!
+  TF_RETURN_IF_ERROR(
+      PadConv(conv, new_lhs_shape, new_rhs_shape, new_result_shape));
   return true;
 }
 
@@ -237,26 +221,18 @@ static std::vector<HloCustomCallInstruction*> GetRelevantConvs(
     HloComputation* comp) {
   std::vector<HloCustomCallInstruction*> convs;
   for (HloInstruction* instr : comp->instructions()) {
-    if (!IsCustomCallToDnnConvolution(*instr)) {
-      continue;
-    }
-    auto* custom_call = Cast<HloCustomCallInstruction>(instr);
-    if (custom_call->operand(0)->shape().element_type() == F16 &&
-        // TODO(timshen): Disable for fused conv for now. Implement it if it's
-        // needed.
-        custom_call->custom_call_target() !=
-            kCudnnConvBiasActivationForwardCallTarget) {
-      convs.push_back(custom_call);
+    if (IsCustomCallToDnnConvolution(*instr)) {
+      convs.push_back(Cast<HloCustomCallInstruction>(instr));
     }
   }
   return convs;
 }
 
-StatusOr<bool> CudnnConvPadForSpeed::Run(HloModule* module) {
+StatusOr<bool> CudnnConvPadForTensorCores::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* comp : module->MakeNonfusionComputations()) {
     for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) {
-      TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv));
+      TF_ASSIGN_OR_RETURN(bool result, PadForTensorCores(conv));
       changed |= result;
     }
   }
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h
similarity index 58%
rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h
index 89a894e9d3..d4e51e86c1 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h
@@ -13,27 +13,28 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_
 
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 
 namespace xla {
 namespace gpu {
 
-// Ensures that f16 cudnn convolutions have input/output channel dimensions that
-// are multiples of 8, inserting pads/slices as necessary.
+// Adds padding to cudnn convolutions to make them run faster on GPUs with
+// tensor cores.
 //
-// This is useful primarily for Volta and newer GPUs, where tensor cores can
-// only be used if the channel dims are multiples of 8.  It's probably the
-// opposite of useful on other GPUs, so you should check what GPU you're
-// targeting before running this pass.
+//  - f16 convolutions are padded to have input/output channel dimensions that
+//    are multiples of 8, so that we can use tensor cores.
 //
-// TODO(jlebar): Rework this.  For one thing, it should not be Volta-only.
-// Padding input channels 3 to 4 is (we think) applicable to Pascal as well.
+//  - f16 convolutions with 3 input channels and 32 or 64 output channels are
+//    padded to 4 input channels.  There's a special-cased cudnn algorithm just
+//    for this.
+//
+// Don't run this pass on GPUs without tensor cores -- it will make them slower!
 //
 // TODO(jlebar): Also pad dots.
-class CudnnConvPadForSpeed : public HloModulePass {
+class CudnnConvPadForTensorCores : public HloModulePass {
  public:
   absl::string_view name() const override { return "cudnn-conv-pad-for-speed"; }
 
@@ -43,4 +44,4 @@ class CudnnConvPadForSpeed : public HloModulePass {
 }  // namespace gpu
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_SPEED_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
similarity index 75%
rename from tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
index ec403021e6..074f95698e 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h"
 
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
@@ -29,9 +29,9 @@ namespace {
 namespace op = xla::testing::opcode_matchers;
 using ::testing::_;
 
-class CudnnConvPadForSpeedTest : public HloVerifiedTestBase {};
+class CudnnConvPadForTensorCoresTest : public HloVerifiedTestBase {};
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -42,7 +42,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
 
   SCOPED_TRACE(module().ToString());
@@ -55,7 +55,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvInputChannels) {
                                ShapeUtil::MakeShape(F16, {2, 2, 48, 40})));
 }
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -66,7 +66,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convBackwardInput"
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget,
                                    op::Pad(op::Parameter(0), _),
@@ -77,7 +77,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvOutputChannels) {
                                ShapeUtil::MakeShape(F16, {2, 2, 40, 48})));
 }
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -88,7 +88,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) {
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall(
                                   kCudnnConvForwardCallTarget, op::Parameter(0),
@@ -96,7 +96,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16ForwardConvOutputChannels) {
                               _));
 }
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -108,7 +108,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) {
               custom_call_target="__cudnn$convBackwardInput"
     ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
@@ -117,7 +117,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardInputConvInputChannels) {
                         _)));
 }
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -129,7 +129,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) {
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
@@ -138,7 +138,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvInputChannels) {
                         _)));
 }
 
-TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) {
+TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
   ParseAndVerifyModule(R"(
   HloModule TestModule
 
@@ -150,7 +150,7 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) {
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0
   })");
-  EXPECT_TRUE(CudnnConvPadForSpeed().Run(&module()).ValueOrDie());
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
   auto* root = module().entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
@@ -159,6 +159,30 @@ TEST_F(CudnnConvPadForSpeedTest, PadF16BackwardFilterConvOutputChannels) {
                         _)));
 }
 
+TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) {
+  ParseAndVerifyModule(R"(
+  HloModule TestModule
+
+  ENTRY TestComputation {
+    input = f16[10,20,30,3] parameter(0)
+    filter = f16[2,2,3,32] parameter(1)
+    ROOT result = (f16[10,20,30,32], u8[0]) custom-call(input, filter),
+                  window={size=2x2}, dim_labels=b01f_01io->b01f,
+                  custom_call_target="__cudnn$convForward"
+  })");
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
+  auto* root = module().entry_computation()->root_instruction();
+
+  SCOPED_TRACE(module().ToString());
+  EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget,
+                                   op::Pad(op::Parameter(0), _),
+                                   op::Pad(op::Parameter(1), _)));
+  EXPECT_TRUE(ShapeUtil::Equal(root->operand(0)->shape(),
+                               ShapeUtil::MakeShape(F16, {10, 20, 30, 4})));
+  EXPECT_TRUE(ShapeUtil::Equal(root->operand(1)->shape(),
+                               ShapeUtil::MakeShape(F16, {2, 2, 4, 32})));
+}
+
 }  // anonymous namespace
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index b394784fde..829d1499bc 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -39,7 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
-#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_speed.h"
+#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h"
@@ -209,14 +209,14 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     pipeline.AddPass<CudnnFusedConvRewriter>();
     pipeline.AddPass<CudnnConvPaddingLegalization>();
     if (IsVoltaOrLater(*stream_exec)) {
-      pipeline.AddPass<CudnnConvPadForSpeed>();
-      // CudnnConvPadForSpeed leaves behind unnecessary tuple/get-tuple-element
-      // pairs that TupleSimplifier fixes.
+      pipeline.AddPass<CudnnConvPadForTensorCores>();
+      // CudnnConvPadForTensorCores leaves behind unnecessary
+      // tuple/get-tuple-element pairs that TupleSimplifier fixes.
       pipeline.AddPass<TupleSimplifier>();
     }
     // CudnnConvRewriter, CudnnConvPaddingLegalization and
-    // CudnnConvPadForSpeed may add instructions which can be simplified by
-    // constant folding.
+    // CudnnConvPadForTensorCores may add instructions which can be simplified
+    // by constant folding.
     pipeline.AddPass<HloConstantFolding>();
     TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status());
   }
-- 
GitLab


From 787f16c1bff954b1385e92cba00a54df1951b6f9 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 10 Oct 2018 17:44:26 -0700
Subject: [PATCH 0750/1085] Getting rid of MOVING_AVERAGE_VARIABLES collection
 usage in quantize_ops.

PiperOrigin-RevId: 216624182
---
 tensorflow/contrib/quantize/python/quant_ops.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py
index d9dc7fa62e..c7c099e1c6 100644
--- a/tensorflow/contrib/quantize/python/quant_ops.py
+++ b/tensorflow/contrib/quantize/python/quant_ops.py
@@ -49,7 +49,7 @@ def _ModelVariable(name,
                    collections=None,
                    trainable=None):
   collections = list(collections or [])
-  collections += [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES]
+  collections += [ops.GraphKeys.GLOBAL_VARIABLES]
   return variable_scope.get_variable(
       name,
       shape=shape,
@@ -62,7 +62,7 @@ def LastValueQuantize(inputs,
                       per_channel=False,
                       init_min=-6.0,
                       init_max=6.0,
-                      vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES,
+                      vars_collection=None,
                       name_prefix='LastValueQuant',
                       reuse=None,
                       is_training=True,
@@ -104,17 +104,18 @@ def LastValueQuantize(inputs,
     else:
       min_max_shape = []
 
+    vars_collections = [vars_collection] if vars_collection else []
     min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
-        collections=[vars_collection],
+        collections=vars_collections,
         trainable=False)
     max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
-        collections=[vars_collection],
+        collections=vars_collections,
         trainable=False)
     if not is_training:
       return _FakeQuantWithMinMaxVars(
@@ -212,17 +213,18 @@ def MovingAvgQuantize(inputs,
     else:
       min_max_shape = []
 
+    vars_collections = [vars_collection] if vars_collection else []
     min_var = _ModelVariable(
         'min',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_min),
-        collections=[vars_collection],
+        collections=vars_collections,
         trainable=False)
     max_var = _ModelVariable(
         'max',
         shape=min_max_shape,
         initializer=init_ops.constant_initializer(init_max),
-        collections=[vars_collection],
+        collections=vars_collections,
         trainable=False)
     if not is_training:
       return _FakeQuantWithMinMaxVars(
-- 
GitLab


From 566cc7d1a6869c1ecb5e3de6e7b96ba292b6a8db Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 10 Oct 2018 17:52:38 -0700
Subject: [PATCH 0751/1085] Add mode_override to the TPU embedding enqueue ops.
 This allows the mode to be (#22877)

overridden at runtime allowing dynamic switching between inference and training
modes. Not fully implemented yet.

PiperOrigin-RevId: 215325071
---
 tensorflow/contrib/tpu/BUILD                  |   3 +
 .../contrib/tpu/ops/tpu_embedding_ops.cc      |  52 ++++--
 tensorflow/contrib/tpu/python/ops/tpu_ops.py  | 148 ++++++++++++++++++
 3 files changed, 186 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index c22c385d9c..8c36d5a297 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -135,6 +135,9 @@ tf_gen_op_wrapper_py(
     name = "tpu_ops",
     hidden = [
         "SendTPUEmbeddingGradients",
+        "EnqueueTPUEmbeddingIntegerBatch",
+        "EnqueueTPUEmbeddingSparseBatch",
+        "EnqueueTPUEmbeddingSparseTensorBatch",
     ],
     deps = [
         ":cross_replica_ops_op_lib",
diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
index ef2f8dd36d..0ef29bdf73 100644
--- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
+++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc
@@ -335,7 +335,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableLoadOpsForAlgorithmBody(alg, true,
@@ -353,7 +352,6 @@ void RegisterPerTableLoadAndRetrieveOps() {
     tpu::GradientAccumulationSupport grad_accum_support;
     TF_CHECK_OK(GetGradientAccumulationSupport(alg, &grad_accum_support));
     if (grad_accum_support == tpu::GradientAccumulationSupport::kSupported) {
-      // TODO(gkurian): Condition this on being used internally within Google.
       OpRegistry::Global()->Register(
           [alg](OpRegistrationData* op_reg_data) -> Status {
             return RegisterPerTableRetrieveOpsForAlgorithmBody(alg, true,
@@ -366,7 +364,7 @@ void RegisterPerTableLoadAndRetrieveOps() {
 }  // namespace
 
 REGISTER_OP("RecvTPUEmbeddingActivations")
-    .Output("outputs: num_outputs * float")
+    .Output("outputs: num_outputs * float32")
     .Attr("num_outputs: int >= 1")
     .Attr("config: string")
     .SetIsStateful()
@@ -476,7 +474,8 @@ config: Serialized TPUEmbeddingConfiguration proto.
 
 REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch")
     .Input("batch: N * int32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .SetIsStateful()
     .SetShapeFn(shape_inference::UnknownShape)
@@ -485,6 +484,10 @@ An op that enqueues a list of input batch tensors to TPUEmbedding.
 
 batch: A list of 1D tensors, one for each embedding table, containing the
     indices into the tables.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 )doc");
@@ -493,7 +496,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .SetIsStateful()
@@ -523,14 +527,18 @@ The tensors at corresponding positions in the three input lists
 must have the same shape, i.e. rank 1 with dim_size() equal to the total
 number of lookups into the table described by the corresponding table_id.
 
-sample_indices: A list of Rank 1 Tensors specifying the training example and
+sample_indices: A list of rank 1 Tensors specifying the training example and
     feature to which the corresponding embedding_indices and aggregation_weights
     values belong. sample_indices[i] must equal b * nf + f, where nf is the
     number of features from the corresponding table, f is in [0, nf), and
     b is in [0, batch size).
-embedding_indices: A list of Rank 1 Tensors, indices into the embedding tables.
-aggregation_weights: A list of Rank 1 Tensors containing per sample -- i.e. per
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
     (training example, feature) -- aggregation weights.
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -545,7 +553,8 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
     .Input("sample_indices: N * int32")
     .Input("embedding_indices: N * int32")
     .Input("aggregation_weights: N * float32")
-    .Attr("N: int")
+    .Input("mode_override: string")
+    .Attr("N: int >= 1")
     .Attr("device_ordinal: int = -1")
     .Attr("combiners: list(string) = []")
     .Attr("table_ids: list(int)")
@@ -555,7 +564,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch")
 This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse().
 
 sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-to ith feature. table_ids[i] indicates which embedding table to look up ith
+to the ith feature. table_ids[i] indicates which embedding table to look up ith
 feature.
 
 The tensors at corresponding positions in the three input lists (sample_indices,
@@ -563,12 +572,18 @@ embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
 with dim_size() equal to the total number of lookups into the table described by
 the corresponding feature.
 
-sample_indices: A list of Rank 1 Tensors, corresponds to sp_ids.indices[:,0] in
+sample_indices: A list of rank 1 Tensors specifying the training example to
+    which the corresponding embedding_indices and aggregation_weights values
+    belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+    It corresponds to sp_ids.values in embedding_lookup_sparse().
+aggregation_weights: A list of rank 1 Tensors containing per training example
+    aggregation weights. It corresponds to sp_weights.values in
     embedding_lookup_sparse().
-embedding_indices: A list of Rank 1 Tensors, corresponds to sp_ids.values
-    in embedding_lookup_sparse().
-aggregation_weights: A list of Rank 1 Tensors, corresponds to sp_weights.values
-    in embedding_lookup_sparse().
+mode_override: A string input that overrides the mode specified in the
+    TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+    'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+    in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
 device_ordinal: The TPU device to use. Should be >= 0 and less than the number
     of TPU cores in the task on which the node is placed.
 combiners: A list of string scalars, one for each embedding table that specify
@@ -577,8 +592,11 @@ combiners: A list of string scalars, one for each embedding table that specify
     the sum of the weights be 0 for 'mean' or the sum of the squared weights be
     0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
     all tables.
-table_ids: A list of int. table_ids[i] indicates which embedding table to look
-    up ith feature in the list.
+table_ids: A list of integers specifying the identifier of the embedding table
+    (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+    corresponding input. The ith input is looked up using table_ids[i]. The size
+    of the table_ids list must be equal to that of sample_indices,
+    embedding_indices and aggregation_weights.
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
index e2e4acadab..968adccf2b 100644
--- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py
+++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py
@@ -227,6 +227,154 @@ if platform.system() != "Windows":
         inputs=inputs, learning_rates=learning_rates, config=config, name=name)
 
 
+  send_tpu_embedding_gradients.__doc__ = (
+      gen_tpu_ops._send_tpu_embedding_gradients.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_integer_batch(batch,
+                                          device_ordinal,
+                                          mode_override=None,
+                                          name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      batch: A list of 1D tensors, one for each embedding table, containing the
+        indices into the tables.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingIntegerBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_integer_batch(
+        batch=batch,
+        device_ordinal=device_ordinal,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_integer_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_batch(sample_indices,
+                                         embedding_indices,
+                                         aggregation_weights,
+                                         device_ordinal,
+                                         combiners=None,
+                                         mode_override=None,
+                                         name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        and feature to which the corresponding embedding_indices and
+        aggregation_weights values belong. sample_indices[i] must equal b * nf +
+        f, where nf is the number of features from the corresponding table, f is
+        in [0, nf), and b is in [0, batch size).
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables.
+      aggregation_weights: A list of rank 1 Tensors containing per sample --
+        i.e. per (training example, feature) -- aggregation weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__)
+
+  # pylint: disable=protected-access
+  def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices,
+                                                embedding_indices,
+                                                aggregation_weights,
+                                                table_ids,
+                                                device_ordinal,
+                                                combiners=None,
+                                                mode_override=None,
+                                                name=None):
+    """A placeholder op for enqueueing embedding IDs to the TPU.
+
+    Args:
+      sample_indices: A list of rank 1 Tensors specifying the training example
+        to which the corresponding embedding_indices and aggregation_weights
+        values
+        belong. It corresponds to sp_ids.indices[:,0] in
+          embedding_lookup_sparse().
+      embedding_indices: A list of rank 1 Tensors, indices into the embedding
+        tables. It corresponds to sp_ids.values in embedding_lookup_sparse().
+      aggregation_weights: A list of rank 1 Tensors containing per training
+        example aggregation weights. It corresponds to sp_weights.values in
+        embedding_lookup_sparse().
+      table_ids: A list of integers specifying the identifier of the embedding
+        table (offset of TableDescriptor in the TPUEmbeddingConfiguration) to
+        lookup the corresponding input. The ith input is looked up using
+        table_ids[i]. The size of the table_ids list must be equal to that of
+        sample_indices, embedding_indices and aggregation_weights.
+      device_ordinal: The TPU device to use. Should be >= 0 and less than the
+        number of TPU cores in the task on which the node is placed.
+      combiners: A list of string scalars, one for each embedding table that
+        specify how to normalize the embedding activations after weighted
+        summation. Supported combiners are 'mean', 'sum', or 'sqrtn'. It is
+        invalid to have the sum of the weights be 0 for 'mean' or the sum of the
+        squared weights be 0 for 'sqrtn'. If combiners isn't passed, the default
+        is to use 'sum' for all tables (optional).
+      mode_override: A string input that overrides the mode specified in the
+        TPUEmbeddingConfiguration. Supported values are {'unspecified',
+        'inference', 'training', 'backward_pass_only'}. When set to
+        'unspecified', the mode set in TPUEmbeddingConfiguration is used,
+        otherwise mode_override is used (optional).
+      name: A name for the operation (optional).
+
+    Returns:
+      An EnqueueTPUEmbeddingSparseTensorBatch operation.
+    """
+    if mode_override is None:
+      mode_override = "unspecified"
+    return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch(
+        sample_indices=sample_indices,
+        embedding_indices=embedding_indices,
+        aggregation_weights=aggregation_weights,
+        table_ids=table_ids,
+        device_ordinal=device_ordinal,
+        combiners=combiners,
+        mode_override=mode_override,
+        name=name)
+
+  enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = (
+      gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__)
+
 else:
   # We have already built the appropriate libraries into the binary via CMake
   # if we have built contrib, so we don't need this
-- 
GitLab


From d45c30fa4cf35a0cc71c8b18c2b710d1a3559aee Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Wed, 10 Oct 2018 17:44:49 -0700
Subject: [PATCH 0752/1085] [XLA:GPU] Allow input fusion into scatter

We fuse everything into the scatter now, and emit two kernels. The first kernel
fills the output buffer with the computation fused into the scatter operand.
The second kernel is a regular scatter, which also contains the fused
operations from the updates and scatter_indices inputs.

PiperOrigin-RevId: 216624225
---
 .../xla/service/gpu/instruction_fusion.cc     |   9 +-
 .../service/gpu/instruction_fusion_test.cc    |  39 +++++
 .../xla/service/gpu/ir_emitter_unnested.cc    | 135 ++++++++++++++----
 .../xla/service/gpu/ir_emitter_unnested.h     |   8 ++
 .../compiler/xla/service/hlo_matchers.h       |   1 +
 tensorflow/compiler/xla/tests/scatter_test.cc |  31 ++++
 6 files changed, 195 insertions(+), 28 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index b61f038739..1d66787d89 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -47,6 +47,7 @@ bool IsFusible(const HloInstruction& hlo) {
          hlo.opcode() == HloOpcode::kReduce ||
          hlo.opcode() == HloOpcode::kReduceWindow ||
          hlo.opcode() == HloOpcode::kReshape ||
+         hlo.opcode() == HloOpcode::kScatter ||
          hlo.opcode() == HloOpcode::kSlice ||
          hlo.opcode() == HloOpcode::kTranspose;
 }
@@ -223,6 +224,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
     return false;
   }
 
+  // Scatter is only supported at the root of a kInput fusion.
+  if (producer->opcode() == HloOpcode::kScatter) {
+    return false;
+  }
+
   // Do not fuse into reduce input fusions if the resulting kernel would suffer
   // from poor data locality (due to unfriendly input layouts).
   if (IsInputFusibleReduction(*consumer) &&
@@ -285,7 +291,8 @@ bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer,
 
 HloInstruction::FusionKind GpuInstructionFusion::ChooseKind(
     const HloInstruction* producer, const HloInstruction* consumer) {
-  if (IsReductionToVector(*consumer)) {
+  if (IsReductionToVector(*consumer) ||
+      consumer->opcode() == HloOpcode::kScatter) {
     return HloInstruction::FusionKind::kInput;
   }
   if (producer->opcode() == HloOpcode::kDot ||
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
index 96bfe0c12e..fd9b7cee80 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
@@ -709,5 +709,44 @@ TEST_F(InstructionFusionTest, AvoidsLargeFusion) {
   }
 }
 
+TEST_F(InstructionFusionTest, FuseIntoScatter) {
+  auto module = ParseHloString(R"(
+    HloModule test_module
+
+    add {
+      lhs = f32[] parameter(0)
+      rhs = f32[] parameter(1)
+      ROOT add = f32[] add(lhs, rhs)
+    }
+
+    ENTRY FuseIntoScatter {
+      p0 = s32[3,3] parameter(0)
+      operand = s32[3,3] add(p0, p0)
+      p1 = s32[2] parameter(1)
+      indices = s32[2] add(p1, p1)
+      p2 = s32[2,3] parameter(2)
+      updates = s32[2,3] add(p2, p2)
+      scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=add,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+      ROOT add = s32[3,3] add(scatter, scatter)
+    })")
+                    .ValueOrDie();
+
+  EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true)
+                  .Run(module.get())
+                  .ValueOrDie());
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, op::Add(op::Fusion(), op::Fusion()));
+  EXPECT_EQ(root->operand(0)->fusion_kind(),
+            HloInstruction::FusionKind::kInput);
+  EXPECT_THAT(root->operand(0)->fused_expression_root(),
+              op::Scatter(op::Add(), op::Add(), op::Add()));
+}
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 851060da6e..d8ae5b46fe 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -493,13 +493,68 @@ Status IrEmitterUnnested::HandleFft(HloInstruction* fft) {
 
 Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
   HloInstruction* root = fusion->fused_expression_root();
-  // HandleFusion specializes reduction from a multi-dimensional array to a 1D
-  // array. The specialized version requires a initializer thunk that
-  // initializes the output array to the initial value of the reduce.
   if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) {
     switch (root->opcode()) {
+      case HloOpcode::kScatter: {
+        std::vector<std::unique_ptr<Thunk>> thunks;
+        // The initialization from 'operand' is using different loop bounds, so
+        // emit it in a separate kernel. Treat it like a loop fusion, writing to
+        // the output buffer.
+        {
+          int unroll_factor = ComputeMaxUnrollFactor(fusion);
+          thunks.push_back(BuildKernelThunk(
+              fusion, /*implements_whole_instruction=*/false, unroll_factor));
+
+          std::vector<IrArray> operand_parameter_arrays;
+          for (HloInstruction* operand : fusion->operands()) {
+            operand_parameter_arrays.push_back(GetIrArray(*operand, *fusion));
+          }
+          GpuElementalIrEmitter operand_elemental_emitter(
+              hlo_module_config_, ir_emitter_context_->llvm_module(), &b_,
+              GetNestedComputer());
+          FusedIrEmitter operand_fused_emitter(operand_parameter_arrays,
+                                               &operand_elemental_emitter);
+          TF_RETURN_IF_ERROR(
+              root->mutable_operand(0)->Accept(&operand_fused_emitter));
+
+          TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk(
+              *fusion, operand_fused_emitter.GetGenerator(root->operand(0)),
+              static_cast<KernelThunk*>(thunks.back().get())));
+        }
+
+        // Now build the actual scatter, reading and writing to the freshly
+        // filled output buffer.
+        {
+          thunks.push_back(
+              BuildKernelThunk(fusion,
+                               /*implements_whole_instruction=*/false));
+          // Spin up a new fused emitter for the scatter kernel and emit it.
+          std::vector<IrArray> scatter_parameter_arrays;
+          for (HloInstruction* operand : fusion->operands()) {
+            scatter_parameter_arrays.push_back(GetIrArray(*operand, *fusion));
+          }
+          GpuElementalIrEmitter scatter_elemental_emitter(
+              hlo_module_config_, ir_emitter_context_->llvm_module(), &b_,
+              GetNestedComputer());
+          FusedIrEmitter scatter_fused_emitter(scatter_parameter_arrays,
+                                               &scatter_elemental_emitter);
+          TF_RETURN_IF_ERROR(root->Accept(&scatter_fused_emitter));
+          TF_RETURN_IF_ERROR(EmitScatter(
+              thunks.back().get(), root,
+              /*scatter_indices_gen=*/
+              scatter_fused_emitter.GetGenerator(root->operand(1)),
+              /*updates_gen=*/
+              scatter_fused_emitter.GetGenerator(root->operand(2))));
+        }
+        thunk_sequence_->emplace_back(
+            absl::make_unique<SequentialThunk>(std::move(thunks), fusion));
+        return Status::OK();
+      }
       case HloOpcode::kTuple:
       case HloOpcode::kReduce: {
+        // HandleFusion specializes reduction from a multi-dimensional array to
+        // a 1D array. The specialized version requires a initializer thunk that
+        // initializes the output array to the initial value of the reduce.
         if (root->opcode() == HloOpcode::kReduce &&
             ShapeUtil::IsTuple(root->shape())) {
           // TODO(b/112040122): Support variadic reduce.
@@ -1962,9 +2017,6 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
   const HloInstruction* operand = scatter->operand(0);
   const HloInstruction* scatter_indices = scatter->operand(1);
   const HloInstruction* updates = scatter->operand(2);
-  const ScatterDimensionNumbers& dim_numbers =
-      scatter->scatter_dimension_numbers();
-  CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape()));
 
   std::vector<std::unique_ptr<Thunk>> thunks;
 
@@ -1978,6 +2030,44 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
         /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter));
   }
 
+  thunks.push_back(
+      BuildKernelThunk(scatter,
+                       /*implements_whole_instruction=*/thunks.empty()));
+
+  TF_RETURN_IF_ERROR(
+      EmitScatter(thunks.back().get(), scatter,
+                  /*scatter_indices_gen=*/
+                  [=](const IrArray::Index& index) {
+                    return GetIrArray(*scatter_indices, *scatter)
+                        .EmitReadArrayElement(index, &b_, "scatter_index");
+                  },
+                  /*updates_gen=*/
+                  [=](const IrArray::Index& index) {
+                    return GetIrArray(*updates, *scatter)
+                        .EmitReadArrayElement(index, &b_, "update");
+                  }));
+
+  // Elide the sequential thunk if there's no copy.
+  if (thunks.size() == 1) {
+    thunk_sequence_->push_back(std::move(thunks[0]));
+  } else {
+    thunk_sequence_->emplace_back(
+        absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  }
+  return Status::OK();
+}
+
+Status IrEmitterUnnested::EmitScatter(
+    Thunk* thunk, HloInstruction* scatter,
+    const llvm_ir::ElementGenerator& scatter_indices_gen,
+    const llvm_ir::ElementGenerator& updates_gen) {
+  const HloInstruction* operand = scatter->operand(0);
+  const HloInstruction* scatter_indices = scatter->operand(1);
+  const HloInstruction* updates = scatter->operand(2);
+  const ScatterDimensionNumbers& dim_numbers =
+      scatter->scatter_dimension_numbers();
+  CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape()));
+
   auto loop_body_emitter = [&](const IrArray::Index& index) -> Status {
     std::vector<llvm::Value*> raw_window_multidim;
     std::vector<llvm::Value*> input_scatter_multidim;
@@ -2023,9 +2113,6 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
       scatter_indices_shape.mutable_layout()->add_minor_to_major(
           dim_numbers.index_vector_dim());
     }
-    llvm_ir::IrArray scatter_indices_reshaped =
-        GetIrArray(*scatter_indices, *scatter)
-            .CastToShape(scatter_indices_shape, &b_);
 
     // Now load the indices corresponding to the current window from
     // scatter_indices.
@@ -2041,9 +2128,10 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
           raw_scatter_index_index.GetConstantWithIndexType(i);
 
       int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i);
-      llvm::Value* loaded_scatter_index =
-          scatter_indices_reshaped.EmitReadArrayElement(raw_scatter_index_index,
-                                                        &b_, "scatter_index");
+      TF_ASSIGN_OR_RETURN(
+          llvm::Value* const loaded_scatter_index,
+          scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape(
+              scatter_indices_shape, scatter_indices->shape(), &b_)));
       // And add the index to our window index. This yields the output index.
       llvm::Value* dim_offset =
           Add(input_window_multidim[operand_dim],
@@ -2068,11 +2156,15 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
     // an atomic store to the calculated location in the output.
     llvm_ir::IrArray::Index input_window_index(input_window_multidim,
                                                index.GetType());
-    llvm::Value* input_address =
-        GetIrArray(*updates, *scatter).EmitArrayElementAddress(index, &b_);
+    HloInstruction* output_hlo =
+        scatter->IsFused() ? scatter->parent()->FusionInstruction() : scatter;
     llvm::Value* output_address =
-        GetIrArray(*scatter, *scatter)
+        GetIrArray(*output_hlo, *output_hlo)
             .EmitArrayElementAddress(input_window_index, &b_);
+    llvm::Value* input_address = Alloca(llvm_ir::PrimitiveTypeToIrType(
+        updates->shape().element_type(), module_));
+    TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, updates_gen(index));
+    Store(input_ir_value, input_address);
     return EmitAtomicOperationForNestedComputation(
         *scatter->to_apply(), output_address, input_address);
   };
@@ -2080,22 +2172,11 @@ Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
   // Launch a kernel that reads every element in the updates tensor. We could
   // also do one kernel per window instead if bounds checks turn out to be a
   // bottleneck.
-  thunks.push_back(
-      BuildKernelThunk(scatter,
-                       /*implements_whole_instruction=*/thunks.empty()));
-
   LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
       updates->shape(), ir_emitter_context_->device_description());
-  UpdateLaunchDimensions(launch_dimensions,
-                         static_cast<KernelThunk*>(thunks.back().get()),
+  UpdateLaunchDimensions(launch_dimensions, thunk,
                          ir_emitter_context_->llvm_module());
 
-  if (thunks.size() == 1) {
-    thunk_sequence_->push_back(std::move(thunks[0]));
-  } else {
-    thunk_sequence_->emplace_back(
-        absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
-  }
   return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
                              launch_dimensions, &b_)
       .EmitLoop(IrName(scatter),
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 2e36e7235b..93f11c069a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -185,6 +185,14 @@ class IrEmitterUnnested : public IrEmitter {
       absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
+  // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
+  // the process. `scatter` may be fused, scatter indices are taken from
+  // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is
+  // expected to have the operand values in it already.
+  Status EmitScatter(Thunk* thunk, HloInstruction* scatter,
+                     const llvm_ir::ElementGenerator& scatter_indices_gen,
+                     const llvm_ir::ElementGenerator& updates_gen);
+
   // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel
   // for the hlo instruction.
   bool CheckAndEmitHloWithTile021(HloInstruction* hlo);
diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index b05a012b4a..1717770301 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -217,6 +217,7 @@ HLO_MATCHER(Remainder);
 HLO_MATCHER(Reshape);
 HLO_MATCHER(Reverse);
 HLO_MATCHER(Rng);
+HLO_MATCHER(Scatter);
 HLO_MATCHER(Select);
 HLO_MATCHER(SelectAndScatter);
 HLO_MATCHER(Send);
diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc
index b21dd56045..d0cb93befa 100644
--- a/tensorflow/compiler/xla/tests/scatter_test.cc
+++ b/tensorflow/compiler/xla/tests/scatter_test.cc
@@ -69,6 +69,37 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
+XLA_TEST_F(ScatterTest, TensorFlowScatterV1_WithFusedAdds) {
+  const string hlo_text = R"(
+HloModule TensorFlowScatterV1
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  ROOT rhs = s32[] parameter(1)
+}
+
+ENTRY main {
+  p0 = s32[3,3] parameter(0)
+  operand = s32[3,3] add(p0, p0)
+  p1 = s32[2] parameter(1)
+  indices = s32[2] add(p1, p1)
+  p2 = s32[2,3] parameter(2)
+  updates = s32[2,3] add(p2, p2)
+  ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+      to_apply=update_s32,
+      update_window_dims={1},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0},
+      index_vector_dim=1
+}
+)";
+  Literal operand =
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  Literal scatter_indices = LiteralUtil::CreateR1<int32>({0, 1});
+  Literal updates = LiteralUtil::CreateR2<int32>({{10, 20, 30}, {70, 80, 90}});
+  RunTest(hlo_text, &operand, &scatter_indices, &updates);
+}
+
 XLA_TEST_F(ScatterTest, TensorFlowScatterV2_Update) {
   const char* hlo_text = R"(
 HloModule TensorFlowScatterV2
-- 
GitLab


From f043aab73ceaa51ea12867c87ea6f856c3c02089 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 18:08:11 -0700
Subject: [PATCH 0753/1085] Fixes in CompareConstantArrays:  - Array fields
 minmax and quantization_params are pointers.       (so the current check
 could have false negatives as identical objects        have different
 addresses)  - also compare narrow_range.       (so the current check could
 have false positives --- my bad, I added        narrow_range later and forgot
 to update this code).

PiperOrigin-RevId: 216626868
---
 .../contrib/lite/kernels/internal/types.h     |  5 +++
 tensorflow/contrib/lite/toco/tooling_util.cc  | 38 ++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index c6bc6074d4..a5913143b9 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -107,6 +107,11 @@ struct QuantizationParams {
   double scale = 0.0;
 };
 
+inline bool operator==(const QuantizationParams& qp1,
+                       const QuantizationParams& qp2) {
+  return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale;
+}
+
 template <int N>
 struct Dims {
   int sizes[N];
diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index 61aa311212..a770ff8544 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -738,15 +738,41 @@ bool CompareArrayBuffers(const Array& lhs_array, const Array& rhs_array) {
   }
   return true;
 }
+
+bool HaveSameMinMax(const Array& lhs_array, const Array& rhs_array) {
+  if (lhs_array.minmax || rhs_array.minmax) {
+    if (!lhs_array.minmax || !rhs_array.minmax) {
+      return false;
+    }
+    if (!(*lhs_array.minmax == *rhs_array.minmax)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool HaveSameQuantizationParams(const Array& lhs_array,
+                                const Array& rhs_array) {
+  if (lhs_array.quantization_params || rhs_array.quantization_params) {
+    if (!lhs_array.quantization_params || !rhs_array.quantization_params) {
+      return false;
+    }
+    if (!(*lhs_array.quantization_params == *rhs_array.quantization_params)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace
 
 bool CompareConstantArrays(const Array& lhs_array, const Array& rhs_array) {
-  bool attrs_equal =
-      lhs_array.shape() == rhs_array.shape() &&
-      lhs_array.data_type == rhs_array.data_type &&
-      lhs_array.final_data_type == rhs_array.final_data_type &&
-      lhs_array.minmax == rhs_array.minmax &&
-      lhs_array.quantization_params == rhs_array.quantization_params;
+  bool attrs_equal = lhs_array.shape() == rhs_array.shape() &&
+                     lhs_array.data_type == rhs_array.data_type &&
+                     lhs_array.final_data_type == rhs_array.final_data_type &&
+                     HaveSameMinMax(lhs_array, rhs_array) &&
+                     HaveSameQuantizationParams(lhs_array, rhs_array) &&
+                     lhs_array.narrow_range == rhs_array.narrow_range;
   if (!attrs_equal) {
     return false;
   }
-- 
GitLab


From 9b21439cda871f4253f5658f6664abdb8cc6d632 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 10 Oct 2018 18:15:12 -0700
Subject: [PATCH 0754/1085] 1.12-rc1 cherry-pick request: Revert constant
 folding changes (#22875)

* Roll forward change "Skip control flow functionalization if there is no Switch or Merge node.".

PiperOrigin-RevId: 215772272

* Revert constant folding to previous state.

PiperOrigin-RevId: 215946205
---
 .../tf2xla/functionalize_control_flow.cc      | 157 +++++++++---------
 .../core/common_runtime/constant_folding.cc   |  28 +---
 2 files changed, 88 insertions(+), 97 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 36c6f5d316..0362682bd6 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -79,7 +79,10 @@ Status FunctionalizeControlFlowForFunction(
     const string& func_name, const string& new_func_name,
     const protobuf::Map<string, tensorflow::AttrValue>& attrs,
     FunctionLibraryDefinition* fld, FunctionLibraryRuntime* flr,
-    std::map<string, string>* canonicalized_name_to_new_name) {
+    std::map<string, absl::optional<string>>* canonicalized_name_to_new_name,
+    bool* modified) {
+  *modified = false;
+
   // Convert the function to Graph.
   FunctionLibraryRuntime::Handle handle;
   TF_RETURN_IF_ERROR(flr->Instantiate(func_name, AttrSlice(&attrs), &handle));
@@ -91,44 +94,20 @@ Status FunctionalizeControlFlowForFunction(
     }
   });
   const FunctionBody* body = flr->GetFunctionBody(handle);
+  Graph* g = body->graph;
 
-  // Call graph optimizer. The most important optimization we need is constant
-  // folding, which will replace ops like Shape/BroadcastGradientArgs with
-  // constant shape input. Without this optimization, those ops might become
-  // dynamic input for then/else body function and XLA will complain that input
-  // is not compile time constant. We enable function inlining as well, because
-  // otherwise we won't be able to infer shape for any node depending on
-  // function call nodes.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_opt_", func_name),
-        *body->graph, fld);
-  }
-  // Optimizer accepts std::unique_ptr<Graph>* as input and might change
-  // underlying pointer, thus we create a new Graph and copy from body->graph.
-  std::unique_ptr<Graph> optimized_graph(new Graph(fld));
-  CopyGraph(*body->graph, optimized_graph.get());
-  OptimizerOptions opts;
-  opts.set_opt_level(OptimizerOptions::L0);
-  opts.set_do_function_inlining(true);
-  opts.set_do_constant_folding(true);
-  GraphOptimizer optimizer(opts);
-  auto cf_consider_fn = [](const Node* n) {
-    // Skip SymbolicGradient op when doing constant folding.
-    // Enabling SymbolicGradient op in constant folding requires
-    // flr->device() to be non-null, and here we have not constructed
-    // proper Device object yet (it will be constructed in XlaCompiler).
-    return n->type_string() != FunctionLibraryDefinition::kGradientOp;
-  };
-  optimizer.Optimize(flr, flr->env(),
-                     /*device=*/nullptr, &optimized_graph,
-                     /*shape_map=*/nullptr, /*cse_consider_fn=*/nullptr,
-                     cf_consider_fn);
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_opt_", func_name),
-        *optimized_graph, fld);
+  // Check if the graph has Switch or Merge node.
+  bool has_switch_or_merge = false;
+  for (Node* n : body->graph->nodes()) {
+    if (n->type_string() == "Switch" || n->type_string() == "Merge") {
+      has_switch_or_merge = true;
+      break;
+    }
   }
+  // We cannot return here directly if the graph has no Switch/Merge.
+  // It might contain function call nodes, or If/While nodes with Switch/Merge
+  // in function body. We still need to rewrite those functions and modify
+  // corresponding nodes.
 
   // If any node has associated functions, functionalize them first.
   // Gather nodes with associated functions first, because rewriting those nodes
@@ -136,7 +115,7 @@ Status FunctionalizeControlFlowForFunction(
   // it.
   std::vector<std::pair<Node*, std::vector<AssociatedFunctionInfo>>>
       nodes_to_associated_functions;
-  for (auto* n : optimized_graph->nodes()) {
+  for (auto* n : g->nodes()) {
     auto associated_functions = GetAssociatedFunctions(*n, flr);
     if (!associated_functions.empty()) {
       nodes_to_associated_functions.push_back({n, associated_functions});
@@ -151,10 +130,15 @@ Status FunctionalizeControlFlowForFunction(
           Canonicalize(name, AttrSlice(&associated_function.attrs()));
       auto iter = canonicalized_name_to_new_name->find(canonicalized_name);
       string new_name;
+      bool function_modified;
       if (iter != canonicalized_name_to_new_name->end()) {
-        // If we already functionalized this function, skip functionalization
-        // but still rewrite the node.
-        new_name = iter->second;
+        // If we already processed this function, check if it was rewritten. If
+        // the function was rewritten, the entry will be non-empty. Otherwise
+        // the entry will be empty.
+        function_modified = iter->second.has_value();
+        if (function_modified) {
+          new_name = iter->second.value();
+        }
       } else {
         if (associated_function.type() ==
             AssociatedFunctionInfo::AssociatedFunctionType::kSymbolicGradient) {
@@ -166,42 +150,62 @@ Status FunctionalizeControlFlowForFunction(
         }
         TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
             name, new_name, associated_function.attrs(), fld, flr,
-            canonicalized_name_to_new_name));
-        (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+            canonicalized_name_to_new_name, &function_modified));
+        if (function_modified) {
+          // If the function was rewritten, add an non-empty entry. So later we
+          // know we have processed this function, and it was rewritten into
+          // another function.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = new_name;
+        } else {
+          // If the function was not rewritten, add an empty entry. So later
+          // we know we have processed this function, and it does not need to be
+          // rewritten.
+          (*canonicalized_name_to_new_name)[canonicalized_name] = absl::nullopt;
+        }
+      }
+      if (function_modified) {
+        *modified = true;
+
+        // Notice that if "n" is a function call, RewriteAssociatedFunction()
+        // will delete it and create a new node instead, making "n" an invalid
+        // pointer. That's fine because in that case, associated_functions will
+        // only have one member and the loop will only run once.
+        TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
+            g, n, fld, associated_function, new_name));
       }
-      // Notice that if "n" is a function call, RewriteAssociatedFunction() will
-      // delete it and create a new node instead, making "n" an invalid pointer.
-      // That's fine because in that case, associated_functions will only have
-      // one member and the loop will only run once.
-      TF_RETURN_IF_ERROR(RewriteAssociatedFunction(
-          optimized_graph.get(), n, fld, associated_function, new_name));
     }
   }
 
-  // Functionalize the function body.
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
-        *optimized_graph, fld);
-  }
-  TF_RETURN_IF_ERROR(FunctionalizeControlFlow(optimized_graph.get(), fld));
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile(
-        absl::StrCat("functionalize_control_flow_after_fdef_", func_name),
-        *optimized_graph, fld);
+  if (has_switch_or_merge) {
+    *modified = true;
+
+    // Functionalize the function body.
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_before_fdef_", func_name),
+          *g, fld);
+    }
+    TF_RETURN_IF_ERROR(FunctionalizeControlFlow(g, fld));
+    if (VLOG_IS_ON(4)) {
+      dump_graph::DumpGraphToFile(
+          absl::StrCat("functionalize_control_flow_after_fdef_", func_name), *g,
+          fld);
+    }
   }
-  FunctionDef functionalized_fdef;
-  TF_RETURN_IF_ERROR(GraphToFunctionDef(*optimized_graph, new_func_name,
-                                        &functionalized_fdef));
 
-  // Add rewritten FunctionDef into library.
-  if (func_name == new_func_name) {
-    VLOG(2) << "Replacing function " << func_name;
+  if (*modified) {
+    // Add rewritten FunctionDef into library.
+    FunctionDef functionalized_fdef;
     TF_RETURN_IF_ERROR(
-        fld->ReplaceFunction(new_func_name, functionalized_fdef));
-  } else {
-    VLOG(2) << "Adding function " << new_func_name;
-    TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+        GraphToFunctionDef(*g, new_func_name, &functionalized_fdef));
+    if (func_name == new_func_name) {
+      VLOG(2) << "Replacing function " << func_name;
+      TF_RETURN_IF_ERROR(
+          fld->ReplaceFunction(new_func_name, functionalized_fdef));
+    } else {
+      VLOG(2) << "Adding function " << new_func_name;
+      TF_RETURN_IF_ERROR(fld->AddFunctionDef(functionalized_fdef));
+    }
   }
 
   return ret_status;
@@ -227,7 +231,7 @@ Status FunctionalizeControlFlowPass::Run(
           {"TPUCompile", "function"},
           {"XlaLaunch", "function"},
       };
-  std::map<string, string> canonicalized_name_to_new_name;
+  std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
   for (Node* n : graph->nodes()) {
     auto it = kNodeTypeToFunctionAttrMapping->find(n->type_string());
     if (it == kNodeTypeToFunctionAttrMapping->end()) {
@@ -242,12 +246,15 @@ Status FunctionalizeControlFlowPass::Run(
               << ". Corresponding function: " << func.name();
       string new_func_name = options.flib_def->UniqueFunctionName(
           absl::StrCat(func.name(), "_f15n_"));
+      bool modified;
       TF_RETURN_IF_ERROR(FunctionalizeControlFlowForFunction(
           func.name(), new_func_name, func.attr(), options.flib_def, flr,
-          &canonicalized_name_to_new_name));
-      n->ClearAttr(func_attr);
-      func.set_name(new_func_name);
-      n->AddAttr(func_attr, func);
+          &canonicalized_name_to_new_name, &modified));
+      if (modified) {
+        n->ClearAttr(func_attr);
+        func.set_name(new_func_name);
+        n->AddAttr(func_attr, func);
+      }
     }
   }
 
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 419867ff58..e81e61b633 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -473,16 +473,16 @@ bool ReplaceTensorWithConstant(
   // 1) Do not replace another constant.
   // 2) If the destination tensor is not an int32 tensor, and has HOST_MEMORY
   // constraint, do not replace it.
-  // 3) If the size of the constant in bytes is too large (>
+  // 3) If the destination tensor is an int32 tensor, and has DEVICE_MEMORY
+  // constraint, do not replace it.
+  // 4) If the size of the constant in bytes is too large (>
   // max_constant_in_bytes), do not replace it. This prevents the size of the
   // Graph from growing too large.
-  // 4) If the constant op created does not have a kernel implementation
+  // 5) If the constant op created does not have a kernel implementation
   // for the device, do not use it.
   // TODO(keveman): Consider adding a new constant op that has a kernel
   // implementation for all types, but with HostMemory constraint on it's
   // output.
-  // 5) If the constant op for the device has different output memory type
-  // from the original op output memory type, do not replace it.
   if (tensor.first->IsConstant()) {
     return false;
   }
@@ -497,7 +497,8 @@ bool ReplaceTensorWithConstant(
       return false;
     }
     bool is_int32 = tensor.first->output_type(tensor.second) == DT_INT32;
-    if (memory_type == HOST_MEMORY && !is_int32) {
+    if ((memory_type == HOST_MEMORY && !is_int32) ||
+        (memory_type == DEVICE_MEMORY && is_int32)) {
       return false;
     }
   }
@@ -535,23 +536,6 @@ bool ReplaceTensorWithConstant(
   if (!NodeBuilder(builder).Finalize(graph, &constant_node).ok()) {
     return false;
   }
-  if (partition_device && device_type != DEVICE_CPU) {
-    MemoryType original_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, tensor.first, tensor.second,
-                             &original_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    MemoryType const_output_memory_type;
-    if (!MemoryTypeForOutput(device_type, graph, constant_node, 0,
-                             &const_output_memory_type)
-             .ok()) {
-      return false;
-    }
-    if (original_output_memory_type != const_output_memory_type) {
-      return false;
-    }
-  }
   for (auto edge : edges_to_remove) {
     graph->AddEdge(constant_node, 0, edge->dst(), edge->dst_input());
     graph->RemoveEdge(edge);
-- 
GitLab


From 4e97b77576f89103073afd378e85c3c4b5dc17db Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Wed, 10 Oct 2018 18:10:32 -0700
Subject: [PATCH 0755/1085] [XLA::GPU] Explicitly use
 {lhs,rhs}_contracting_dimension from DotDimensionNumbers as the reduction
 dimension for Dot.

PiperOrigin-RevId: 216627100
---
 .../compiler/xla/service/gpu/ir_emitter.cc    | 22 ++++++++-----------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 47102347cb..a3821e077e 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -495,18 +495,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
   TF_RET_CHECK(!ShapeUtil::IsScalar(lhs_shape) &&
                !ShapeUtil::IsScalar(rhs_shape));
 
-  // Reduce along the last dimension of the LHS and the second-to-last dimension
-  // of the RHS. Vectors are a special case where the reduction dimension is 0
-  // for both LHS and RHS. This results in a vector dot product producing a
-  // scalar.
-  const int64 lhs_reduction_dimension =
-      ShapeUtil::GetDimensionNumber(lhs_shape, -1);
-  const int64 rhs_reduction_dimension =
-      ShapeUtil::Rank(rhs_shape) >= 2 + dnums.lhs_batch_dimensions_size()
-          ? ShapeUtil::GetDimensionNumber(rhs_shape, -2)
-          : dnums.lhs_batch_dimensions_size();
-
-  // Check that the batch dims don't cover the last two dims.
+  const int64 lhs_reduction_dimension = dnums.lhs_contracting_dimensions(0);
+  const int64 rhs_reduction_dimension = dnums.rhs_contracting_dimensions(0);
+
+  // Check that the batch dims don't cover the reduction dimensions.
   for (int64 batch_dim : dnums.lhs_batch_dimensions()) {
     CHECK_NE(lhs_reduction_dimension, batch_dim);
     CHECK_NE(rhs_reduction_dimension, batch_dim);
@@ -514,7 +506,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) {
 
   // Verify the reduction dimension in the two operands are the same size.
   TF_RET_CHECK(lhs_shape.dimensions(lhs_reduction_dimension) ==
-               rhs_shape.dimensions(rhs_reduction_dimension));
+               rhs_shape.dimensions(rhs_reduction_dimension))
+      << "lhs_shape.dimensions(" << lhs_reduction_dimension
+      << ") = " << lhs_shape.dimensions(lhs_reduction_dimension)
+      << ", and rhs_shape.dimensions(" << rhs_reduction_dimension
+      << ") = " << rhs_shape.dimensions(rhs_reduction_dimension);
 
   // Create loop nests which loop through the LHS operand dimensions and the RHS
   // operand dimensions. The reduction dimension of the LHS and RHS are handled
-- 
GitLab


From ba2d5c3a7c415205ac796740e8b52c7fec2c8ec7 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 10 Oct 2018 18:11:45 -0700
Subject: [PATCH 0756/1085] Automated rollback of commit
 9bad98c61f27b60152119bb1c2cfd402c3bf7f3d

PiperOrigin-RevId: 216627219
---
 tensorflow/core/framework/model.cc            | 32 ++++++-------------
 .../core/kernels/data/cache_dataset_ops.cc    | 14 ++++----
 .../assert_next_dataset_op_test.py            | 12 +++++++
 .../data/experimental/ops/prefetching_ops.py  |  9 ------
 tensorflow/python/data/ops/dataset_ops.py     | 29 +++++++++--------
 .../data/ops/multi_device_iterator_ops.py     |  4 ---
 6 files changed, 43 insertions(+), 57 deletions(-)

diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index 9684b736a7..bfdb3a6658 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -59,15 +59,9 @@ int64 Model::Node::ProcessingTimeLocked() {
       return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs();
     }
     case Type::FILTER: {
-      if (inputs_.size() <= 1) {
-        return NanosPerElementLocked();
-      }
       std::shared_ptr<Node> input = inputs_.front();
-      double ratio = 0.0L;
-      if (num_elements_ > 0) {
-        ratio = static_cast<double>(input->num_elements()) /
-                static_cast<double>(num_elements_);
-      }
+      double ratio = static_cast<double>(input->num_elements()) /
+                     static_cast<double>(num_elements_);
       return NanosPerElementLocked() +
              static_cast<int64>(ratio *
                                 static_cast<double>(ProcessingTimeForInputs()));
@@ -121,21 +115,15 @@ int64 Model::Node::OutputTimeLocked(std::vector<int64>* input_times) {
              batch_size * OutputTimeForInputs(input_times);
     }
     case Type::FILTER: {
-      if (inputs_.size() <= 1) {
-        return NanosPerElementLocked();
-      }
       std::shared_ptr<Node> input = inputs_.front();
-      double ratio = 0.0L;
-      if (num_elements_ > 0) {
-        ratio = static_cast<double>(input->num_elements()) /
-                static_cast<double>(num_elements_);
-        int64 old_value = (*input_times)[input_times->size() - 1];
-        (*input_times)[input_times->size() - 1] = static_cast<int64>(
-            static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
-        auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
-          (*input_times)[input_times->size() - 1] = old_value;
-        });
-      }
+      int64 old_value = (*input_times)[input_times->size() - 1];
+      double ratio = static_cast<double>(input->num_elements()) /
+                     static_cast<double>(num_elements_);
+      (*input_times)[input_times->size() - 1] = static_cast<int64>(
+          static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
+      auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
+        (*input_times)[input_times->size() - 1] = old_value;
+      });
       return NanosPerElementLocked() +
              static_cast<int64>(
                  static_cast<double>(OutputTimeForInputs(input_times)) * ratio);
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index f2419db3dc..34c6c86538 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -516,12 +516,10 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
         // `FileReaderIterator` and seek to the `cur_index`.
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(new FileReaderIterator(
-                {dataset(), strings::StrCat(prefix(), "Impl")}));
+            iterator_.reset(new FileReaderIterator({dataset(), prefix()}));
             break;
           case Mode::write:
-            iterator_.reset(new FileWriterIterator(
-                {dataset(), strings::StrCat(prefix(), "Impl")}));
+            iterator_.reset(new FileWriterIterator({dataset(), prefix()}));
         }
       }
 
@@ -868,12 +866,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
       void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(new MemoryReaderIterator(
-                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
+            iterator_.reset(
+                new MemoryReaderIterator({dataset(), prefix()}, cache_));
             break;
           case Mode::write:
-            iterator_.reset(new MemoryWriterIterator(
-                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
+            iterator_.reset(
+                new MemoryWriterIterator({dataset(), prefix()}, cache_));
         }
       }
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
index a138436fff..45b77b5c20 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/assert_next_dataset_op_test.py
@@ -48,6 +48,18 @@ class AssertNextDatasetTest(test_base.DatasetTestBase):
           "Map transformation instead."):
         sess.run(get_next)
 
+  def testAssertNextShort(self):
+    dataset = dataset_ops.Dataset.from_tensors(0).apply(
+        optimization.assert_next(["Map", "Whoops"])).map(lambda x: x)
+    iterator = dataset.make_one_shot_iterator()
+    get_next = iterator.get_next()
+
+    with self.cached_session() as sess:
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          "Asserted next 2 transformations but encountered only 1."):
+        sess.run(get_next)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index df082e9e35..48d7136f95 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -506,15 +506,6 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset):
     else:
       return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
 
-  def make_initializable_iterator(self):
-    if self._is_gpu_target:
-      # TODO(b/116140813) : Enable dynamic optimizations.
-      options = dataset_ops.Options()
-      options.experimental_autotune = False
-      return self.with_options(options).make_initializable_iterator()
-    else:
-      return super(_CopyToDeviceDataset, self).make_initializable_iterator()
-
   def _as_variant_tensor(self):
     with ops.device(self._target_device):
       return gen_dataset_ops.generator_dataset(
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d7e37da48b..cdb883cac9 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -99,16 +99,6 @@ class Dataset(object):
         return options
     return Options()
 
-  def _apply_options(self):
-    dataset = self
-    options = self.options()
-    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
-    if static_optimizations:
-      dataset = _OptimizeDataset(dataset, static_optimizations)
-    if options.experimental_autotune is not False:
-      dataset = _ModelDataset(dataset)
-    return dataset
-
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -137,7 +127,13 @@ class Dataset(object):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
-    dataset = self._apply_options()
+    dataset = self
+    options = self.options()
+    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+    if static_optimizations:
+      dataset = _OptimizeDataset(dataset, static_optimizations)
+    if options.experimental_autotune:
+      dataset = _ModelDataset(dataset)
     if shared_name is None:
       shared_name = ""
     if compat.forward_compatible(2018, 8, 3):
@@ -167,8 +163,7 @@ class Dataset(object):
       RuntimeError: If eager execution is not enabled.
     """
     if context.executing_eagerly():
-      dataset = self._apply_options()
-      return iterator_ops.EagerIterator(dataset)
+      return iterator_ops.EagerIterator(self)
     else:
       raise RuntimeError("dataset.__iter__() is only supported when eager "
                          "execution is enabled.")
@@ -199,7 +194,13 @@ class Dataset(object):
         core_random_seed.set_random_seed(
             (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1))
 
-      dataset = self._apply_options()
+      dataset = self
+      options = self.options()
+      static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+      if static_optimizations:
+        dataset = _OptimizeDataset(dataset, static_optimizations)
+      if options.experimental_autotune:
+        dataset = _ModelDataset(dataset)
       return dataset._as_variant_tensor()  # pylint: disable=protected-access
 
     try:
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 3bcc20b333..b7d3aac206 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -206,10 +206,6 @@ class MultiDeviceIterator(object):
           i, self._multi_device_iterator_resource, self._incarnation_id,
           self._source_device_tensor, device, self._dataset.output_shapes,
           self._dataset.output_types, self._dataset.output_classes)
-      # TODO(b/116140813) : Enable dynamic optimizations.
-      options = dataset_ops.Options()
-      options.experimental_autotune = False
-      ds = ds.with_options(options)
       if prefetch_buffer_size > 0:
         ds = ds.prefetch(prefetch_buffer_size)
       with ops.device(device):
-- 
GitLab


From 073c727871c6d4c8e696cb4b071e131f5550bb62 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 10 Oct 2018 18:41:23 -0700
Subject: [PATCH 0757/1085] [XLA:GPU] Use HloVerifiedModule in
 cudnn_conv_pad_for_tensor_cores_test.

No functional change.

PiperOrigin-RevId: 216629980
---
 .../cudnn_conv_pad_for_tensor_cores_test.cc   | 67 ++++++++++---------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
index 074f95698e..fa3afa6a5d 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc
@@ -32,7 +32,7 @@ using ::testing::_;
 class CudnnConvPadForTensorCoresTest : public HloVerifiedTestBase {};
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -41,11 +41,12 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) {
     ROOT result = (f16[10,20,30,40], u8[0]) custom-call(input, filter),
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
 
-  SCOPED_TRACE(module().ToString());
+  SCOPED_TRACE(module->ToString());
   EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget,
                                    op::Pad(op::Parameter(0), _),
                                    op::Pad(op::Parameter(1), _)));
@@ -56,7 +57,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -65,9 +66,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
     ROOT result = (f16[10,20,30,40], u8[0]) custom-call(output, filter),
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convBackwardInput"
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget,
                                    op::Pad(op::Parameter(0), _),
                                    op::Pad(op::Parameter(1), _)));
@@ -78,7 +80,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -87,9 +89,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
     ROOT result = (f16[10,20,30,41], u8[0]) custom-call(input, filter),
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall(
                                   kCudnnConvForwardCallTarget, op::Parameter(0),
                                   op::Pad(op::Parameter(1), _)))),
@@ -97,7 +100,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -107,9 +110,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
               window={size=2x2}, dim_labels=b01f_01io->b01f,
               custom_call_target="__cudnn$convBackwardInput"
     ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
                             kCudnnConvBackwardInputCallTarget, op::Parameter(0),
@@ -118,7 +122,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -128,9 +132,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
               window={size=2x2}, dim_labels=b01f_01io->b01f,
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
                             kCudnnConvBackwardFilterCallTarget,
@@ -139,7 +144,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -149,9 +154,10 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
               window={size=2x2}, dim_labels=b01f_01io->b01f,
               custom_call_target="__cudnn$convBackwardFilter"
     ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(root, op::GetTupleElement(op::Tuple(
                         op::Slice(op::GetTupleElement(op::CustomCall(
                             kCudnnConvBackwardFilterCallTarget,
@@ -160,7 +166,7 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) {
 }
 
 TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) {
-  ParseAndVerifyModule(R"(
+  auto module = ParseAndReturnVerifiedModule(R"(
   HloModule TestModule
 
   ENTRY TestComputation {
@@ -169,11 +175,12 @@ TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) {
     ROOT result = (f16[10,20,30,32], u8[0]) custom-call(input, filter),
                   window={size=2x2}, dim_labels=b01f_01io->b01f,
                   custom_call_target="__cudnn$convForward"
-  })");
-  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(&module()).ValueOrDie());
-  auto* root = module().entry_computation()->root_instruction();
+  })")
+                    .ValueOrDie();
+  EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie());
+  auto* root = module->entry_computation()->root_instruction();
 
-  SCOPED_TRACE(module().ToString());
+  SCOPED_TRACE(module->ToString());
   EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget,
                                    op::Pad(op::Parameter(0), _),
                                    op::Pad(op::Parameter(1), _)));
-- 
GitLab


From 9ef04f539932c286bcf6c9b05c06b5c3981bd892 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 10 Oct 2018 18:52:45 -0700
Subject: [PATCH 0758/1085] Disable signed-compare warnings in TensorFlow
 default build.

We have a lot of such warnings and don't make an attempt to clean them up
internally.

PiperOrigin-RevId: 216631010
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 89dc79b6b6..07c6e4108e 100644
--- a/configure.py
+++ b/configure.py
@@ -497,7 +497,7 @@ def set_cc_opt_flags(environ_cp):
   elif is_windows():
     default_cc_opt_flags = '/arch:AVX'
   else:
-    default_cc_opt_flags = '-march=native'
+    default_cc_opt_flags = '-march=native -Wno-sign-compare'
   question = ('Please specify optimization flags to use during compilation when'
               ' bazel option "--config=opt" is specified [Default is %s]: '
              ) % default_cc_opt_flags
-- 
GitLab


From 9153b897c4dfb8685d78397e22c1acd5ff24d40a Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Wed, 10 Oct 2018 19:09:31 -0700
Subject: [PATCH 0759/1085] Fp16 LSTMBlocKCell and LSTMBlockFusedCell

PiperOrigin-RevId: 216632480
---
 tensorflow/contrib/rnn/kernels/blas_gemm.cc   |   7 +-
 tensorflow/contrib/rnn/kernels/blas_gemm.h    |   9 +-
 tensorflow/contrib/rnn/kernels/lstm_ops.cc    | 163 +++++++++---------
 tensorflow/contrib/rnn/kernels/lstm_ops.h     |  34 ++--
 .../contrib/rnn/kernels/lstm_ops_gpu.cu.cc    |  80 +++++++--
 tensorflow/contrib/rnn/ops/lstm_ops.cc        |   8 +-
 tensorflow/contrib/rnn/python/ops/lstm_ops.py |   5 +-
 7 files changed, 185 insertions(+), 121 deletions(-)

diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
index 45d22b739b..56ec86418d 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
@@ -38,8 +38,9 @@ namespace functor {
 template <typename T>
 void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
                                      bool transb, uint64 m, uint64 n, uint64 k,
-                                     T alpha, const T* a, int lda, const T* b,
-                                     int ldb, T beta, T* c, int ldc) {
+                                     float alpha, const T* a, int lda,
+                                     const T* b, int ldb, float beta, T* c,
+                                     int ldc) {
 #if GOOGLE_CUDA
   se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose,
                                  se::blas::Transpose::kTranspose};
@@ -60,8 +61,8 @@ void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
 #endif
 }
 
+template struct TensorCuBlasGemm<Eigen::half>;
 template struct TensorCuBlasGemm<float>;
-template struct TensorCuBlasGemm<double>;
 
 }  // end namespace functor
 }  // end namespace tensorflow
diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h
index a52c934233..9535a76566 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.h
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h
@@ -28,8 +28,8 @@ namespace functor {
 template <typename T>
 struct TensorCuBlasGemm {
   void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m,
-                  uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b,
-                  int ldb, T beta, T* c, int ldc);
+                  uint64 n, uint64 k, float alpha, const T* a, int lda,
+                  const T* b, int ldb, float beta, T* c, int ldc);
 };
 
 template <typename Device, typename T, bool USE_CUBLAS>
@@ -38,8 +38,9 @@ struct TensorBlasGemm;
 template <typename Device, typename T>
 struct TensorBlasGemm<Device, T, true /* USE_CUBLAS */> {
   static void compute(OpKernelContext* ctx, const Device& d, bool transa,
-                      bool transb, T alpha, typename TTypes<T>::ConstMatrix a,
-                      typename TTypes<T>::ConstMatrix b, T beta,
+                      bool transb, float alpha,
+                      typename TTypes<T>::ConstMatrix a,
+                      typename TTypes<T>::ConstMatrix b, float beta,
                       typename TTypes<T>::Matrix c) {
     int64 m = c.dimensions()[0];
     int64 n = c.dimensions()[1];
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
index 5e7cf0ce84..ee08d306f8 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
@@ -44,7 +44,7 @@ namespace functor {
 template <typename T>
 void LSTMBlockCellFpropWithEigen(
     const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d,
-    const T forget_bias, const T cell_clip, bool use_peephole,
+    const float forget_bias, const float cell_clip, bool use_peephole,
     typename TTypes<T>::ConstMatrix x, typename TTypes<T>::ConstMatrix cs_prev,
     typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
     typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -177,50 +177,51 @@ void LSTMBlockCellBpropWithEigen(
   }
 }
 
-#define DEFINE_CPU_SPECS(T)                                                    \
-  template <>                                                                  \
-  void LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(   \
-      OpKernelContext* ctx, const CPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,           \
-      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,             \
-      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
-      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
-      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {         \
-    LSTMBlockCellFpropWithEigen<T>(                                            \
-        *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev,       \
-        h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h);        \
-  }                                                                            \
-  template <>                                                                  \
-  void LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(   \
-      OpKernelContext* ctx, const CPUDevice& d, bool use_peephole,             \
-      typename TTypes<T>::ConstMatrix x,                                       \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::ConstMatrix i,       \
-      typename TTypes<T>::ConstMatrix cs, typename TTypes<T>::ConstMatrix f,   \
-      typename TTypes<T>::ConstMatrix o, typename TTypes<T>::ConstMatrix ci,   \
-      typename TTypes<T>::ConstMatrix co,                                      \
-      typename TTypes<T>::ConstMatrix cs_grad,                                 \
-      typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_,  \
-      typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,          \
-      typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,            \
-      typename TTypes<T>::Matrix dicfo,                                        \
-      typename TTypes<T>::Matrix cs_prev_grad,                                 \
-      typename TTypes<T>::Vec wci_grad, typename TTypes<T>::Vec wcf_grad,      \
-      typename TTypes<T>::Vec wco_grad) {                                      \
-    LSTMBlockCellBpropWithEigen<CPUDevice, T, false /* USE_CUBLAS */>(         \
-        *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b,  \
-        i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo,    \
-        cs_prev_grad, wci_grad, wcf_grad, wco_grad);                           \
-  }                                                                            \
-  template struct LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>;    \
+#define DEFINE_CPU_SPECS(T)                                                   \
+  template <>                                                                 \
+  void LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(  \
+      OpKernelContext* ctx, const CPUDevice& d, const float forget_bias,      \
+      const float cell_clip, bool use_peephole,                               \
+      typename TTypes<T>::ConstMatrix x,                                      \
+      typename TTypes<T>::ConstMatrix cs_prev,                                \
+      typename TTypes<T>::ConstMatrix h_prev,                                 \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,    \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,     \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,          \
+      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,            \
+      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,             \
+      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,           \
+      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {        \
+    LSTMBlockCellFpropWithEigen<T>(                                           \
+        *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev,      \
+        h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h);       \
+  }                                                                           \
+  template <>                                                                 \
+  void LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(  \
+      OpKernelContext* ctx, const CPUDevice& d, bool use_peephole,            \
+      typename TTypes<T>::ConstMatrix x,                                      \
+      typename TTypes<T>::ConstMatrix cs_prev,                                \
+      typename TTypes<T>::ConstMatrix h_prev,                                 \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,    \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,     \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::ConstMatrix i,      \
+      typename TTypes<T>::ConstMatrix cs, typename TTypes<T>::ConstMatrix f,  \
+      typename TTypes<T>::ConstMatrix o, typename TTypes<T>::ConstMatrix ci,  \
+      typename TTypes<T>::ConstMatrix co,                                     \
+      typename TTypes<T>::ConstMatrix cs_grad,                                \
+      typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_, \
+      typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,         \
+      typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,           \
+      typename TTypes<T>::Matrix dicfo,                                       \
+      typename TTypes<T>::Matrix cs_prev_grad,                                \
+      typename TTypes<T>::Vec wci_grad, typename TTypes<T>::Vec wcf_grad,     \
+      typename TTypes<T>::Vec wco_grad) {                                     \
+    LSTMBlockCellBpropWithEigen<CPUDevice, T, false /* USE_CUBLAS */>(        \
+        *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \
+        i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo,   \
+        cs_prev_grad, wci_grad, wcf_grad, wco_grad);                          \
+  }                                                                           \
+  template struct LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>;   \
   template struct LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>;
 
 DEFINE_CPU_SPECS(float);
@@ -377,24 +378,26 @@ REGISTER_KERNEL(float);
 
 #if GOOGLE_CUDA
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                                    \
-  template <>                                                                  \
-  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                     \
-      OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,           \
-      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,             \
-      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
-      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
-      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);          \
-                                                                               \
+#define DECLARE_GPU_SPEC(T)                                                \
+  template <>                                                              \
+  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                 \
+      OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,   \
+      const float cell_clip, bool use_peephole,                            \
+      typename TTypes<T>::ConstMatrix x,                                   \
+      typename TTypes<T>::ConstMatrix cs_prev,                             \
+      typename TTypes<T>::ConstMatrix h_prev,                              \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci, \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,  \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,       \
+      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,         \
+      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,          \
+      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,        \
+      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);      \
+                                                                           \
   extern template struct LSTMBlockCellFprop<GPUDevice, T, true>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -405,6 +408,7 @@ DECLARE_GPU_SPEC(float);
       LSTMBlockCellOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -629,9 +633,9 @@ class LSTMBlockCellGradOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
-    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<float>());
+    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<T>());
 
     functor::LSTMBlockCellBprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                        cell_size)(
@@ -688,6 +692,7 @@ namespace functor {
                                             true /* USE_CUBLAS */>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -698,6 +703,7 @@ DECLARE_GPU_SPEC(float);
       LSTMBlockCellGradOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -984,10 +990,10 @@ class BlockLSTMOp : public OpKernel {
       Tensor cs_tensor = cs_out->Slice(seq_len_max, timelen);
       Tensor h_tensor = h_out->Slice(seq_len_max, timelen);
 
-      functor::TensorUnalignedZero<Device, T>()(
-          device, cs_tensor.unaligned_flat<float>());
-      functor::TensorUnalignedZero<Device, T>()(
-          device, h_tensor.unaligned_flat<float>());
+      functor::TensorUnalignedZero<Device, T>()(device,
+                                                cs_tensor.unaligned_flat<T>());
+      functor::TensorUnalignedZero<Device, T>()(device,
+                                                h_tensor.unaligned_flat<T>());
     }
   }
 
@@ -1021,6 +1027,7 @@ namespace functor {
   extern template struct TensorUnalignedZero<GPUDevice, T>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -1033,6 +1040,7 @@ DECLARE_GPU_SPEC(float);
                           BlockLSTMOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -1195,16 +1203,15 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
-    functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<float>());
-    functor::TensorZero<Device, T>()(device,
-                                     cs_prev_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, h_grad_tensor.flat<float>());
-    functor::TensorZero<Device, T>()(device, h_prev_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, w_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, b_grad_tensor->flat<float>());
+    functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<T>());
+    functor::TensorZero<Device, T>()(device, cs_prev_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, h_grad_tensor.flat<T>());
+    functor::TensorZero<Device, T>()(device, h_prev_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, w_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, b_grad_tensor->flat<T>());
 
     const int64 seq_len_max = seq_len_max_tensor->scalar<int64>()();
     SliceHelper<Device, T> slicer(ctx);
@@ -1331,6 +1338,7 @@ namespace functor {
   extern template struct BlockLSTMBprop<GPUDevice, T, true>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -1343,6 +1351,7 @@ DECLARE_GPU_SPEC(float);
                           BlockLSTMGradOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h
index d23cedc234..5ca1dad655 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.h
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h
@@ -77,8 +77,7 @@ template <typename Device, typename T>
 struct TensorZeroPadding {
   void operator()(const Device& d, const int64 time_idx,
                   typename TTypes<int64>::ConstVec seq_len,
-                  typename TTypes<float>::Vec mask,
-                  typename TTypes<float>::Matrix m) {
+                  typename TTypes<T>::Vec mask, typename TTypes<T>::Matrix m) {
     // mask is shape [batch_size].
     mask.device(d) = seq_len.constant(time_idx) < seq_len;
 
@@ -154,18 +153,21 @@ struct LSTMBlockCellFprop : public LSTMBlockCell {
                      const int cell_size)
       : LSTMBlockCell(batch_size, input_size, cell_size) {}
 
-  void operator()(
-      OpKernelContext* ctx, const Device& d, const T forget_bias,
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
-      typename TTypes<T>::ConstMatrix cs_prev,
-      typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
-      typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
-      typename TTypes<T>::ConstVec wco, typename TTypes<T>::ConstVec b,
-      typename TTypes<T>::Matrix xh, typename TTypes<T>::Matrix i,
-      typename TTypes<T>::Matrix cs, typename TTypes<T>::Matrix f,
-      typename TTypes<T>::Matrix o, typename TTypes<T>::Matrix ci,
-      typename TTypes<T>::Matrix co, typename TTypes<T>::Matrix icfo,
-      typename TTypes<T>::Matrix h);
+  void operator()(OpKernelContext* ctx, const Device& d,
+                  const float forget_bias, const float cell_clip,
+                  bool use_peephole, typename TTypes<T>::ConstMatrix x,
+                  typename TTypes<T>::ConstMatrix cs_prev,
+                  typename TTypes<T>::ConstMatrix h_prev,
+                  typename TTypes<T>::ConstMatrix w,
+                  typename TTypes<T>::ConstVec wci,
+                  typename TTypes<T>::ConstVec wcf,
+                  typename TTypes<T>::ConstVec wco,
+                  typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,
+                  typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,
+                  typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,
+                  typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,
+                  typename TTypes<T>::Matrix icfo,
+                  typename TTypes<T>::Matrix h);
 };
 
 // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for
@@ -261,7 +263,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
     typename TTypes<T>::ConstMatrix const_dicfo(dicfo.data(),
                                                 dicfo.dimensions());
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad);
+        ctx, d, false, true, 1.f, const_dicfo, w, 0.f, xh_grad);
 
     // xh.
     xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
@@ -274,7 +276,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
 
     // w_grad.
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad);
+        ctx, d, true, false, 1.f, const_xh, const_dicfo, 1.f, w_grad);
 
     // b_grad.
     b_grad.device(d) += dicfo.sum(Eigen::array<int, 1>({0}));
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
index 6d3758fef1..b664b0f45e 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
@@ -31,6 +31,49 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace {
 
+struct FloatToHalf {
+  __host__ __device__ EIGEN_STRONG_INLINE Eigen::half operator()(
+      const float& x) const {
+    return Eigen::half_impl::float_to_half_rtne(x);
+  }
+};
+
+template <typename U, typename T>
+__host__ __device__ EIGEN_STRONG_INLINE
+    typename std::enable_if<!std::is_same<T, U>::value, U>::type
+    strict_cast(T t);
+
+template <typename U, typename T>
+__host__ __device__ EIGEN_STRONG_INLINE
+    typename std::enable_if<std::is_same<T, U>::value, U>::type
+    strict_cast(T t) {
+  return t;
+}
+
+template <>
+__host__ __device__ EIGEN_STRONG_INLINE Eigen::half
+strict_cast<Eigen::half, float>(float t) {
+  return FloatToHalf()(t);
+}
+
+}  // namespace
+
+template <typename T>
+struct TensorZero<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat t) {
+    t.device(d) = t.constant(strict_cast<T>(0.f));
+  }
+};
+
+template <typename T>
+struct TensorUnalignedZero<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::UnalignedFlat t) {
+    t.device(d) = t.constant(strict_cast<T>(0.f));
+  }
+};
+
+namespace {
+
 // Adds bias, applies non-linearities and gates.
 //
 // Launch with a 2D setup such that there is one thread per (example,
@@ -42,12 +85,15 @@ namespace {
 template <typename T, bool use_peephole>
 __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev,
                            const T* wci, const T* wcf, const T* wco, T* o, T* h,
-                           T* ci, T* cs, T* co, T* i, T* f, const T forget_bias,
-                           const T cell_clip, const int batch_size,
-                           const int cell_size) {
+                           T* ci, T* cs, T* co, T* i, T* f,
+                           const float forget_bias, const float cell_clip,
+                           const int batch_size, const int cell_size) {
   const int batch_id = blockIdx.x * blockDim.x + threadIdx.x;
   const int act_id = blockIdx.y * blockDim.y + threadIdx.y;
 
+  T forget_bias_t = strict_cast<T>(forget_bias);
+  T cell_clip_t = strict_cast<T>(cell_clip);
+
   if (batch_id >= batch_size || act_id >= cell_size) return;
 
   // The following code assumes the input arrays are of the following
@@ -115,16 +161,16 @@ __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev,
   T f_local;
   if (use_peephole) {
     f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] +
-                         forget_bias + cs_prev[cid] * wcf[act_id]);
+                         forget_bias_t + cs_prev[cid] * wcf[act_id]);
   } else {
     f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] +
-                         forget_bias);
+                         forget_bias_t);
   }
   f[cid] = f_local;
 
   T cs_local = i_local * ci_local + f_local * cs_prev[cid];
-  if (cell_clip > 0.0) {
-    cs_local = clip_op(cs_local, cell_clip);
+  if (cell_clip_t > strict_cast<T>(0.0f)) {
+    cs_local = clip_op(cs_local, cell_clip_t);
   }
   cs[cid] = cs_local;
 
@@ -174,8 +220,8 @@ __global__ void concat_xh(T* xh, const T* x, const T* h_prev,
 
 template <typename T>
 void LSTMBlockCellFpropWithCUDA(
-    OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,
-    const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
+    OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,
+    const float cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
     typename TTypes<T>::ConstMatrix cs_prev,
     typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
     typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -202,7 +248,7 @@ void LSTMBlockCellFpropWithCUDA(
   // states1 = xh * w
   typename TTypes<T>::ConstMatrix const_xh(xh.data(), xh.dimensions());
   TensorBlasGemm<GPUDevice, T, true /* USE_CUBLAS */>::compute(
-      ctx, d, false, false, T(1), const_xh, w, T(0), icfo);
+      ctx, d, false, false, 1.f, const_xh, w, 0.f, icfo);
 
   // Add bias, apply non-linearities and gating.
   //
@@ -357,8 +403,9 @@ void LSTMBlockCellBpropWithCUDA(
   template struct TensorAdd<GPUDevice, T>;                                     \
   template <>                                                                  \
   void LSTMBlockCellFprop<GPUDevice, T, true /* USE_CUBLAS */>::operator()(    \
-      OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
+      OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,       \
+      const float cell_clip, bool use_peephole,                                \
+      typename TTypes<T>::ConstMatrix x,                                       \
       typename TTypes<T>::ConstMatrix cs_prev,                                 \
       typename TTypes<T>::ConstMatrix h_prev,                                  \
       typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
@@ -368,10 +415,10 @@ void LSTMBlockCellBpropWithCUDA(
       typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
       typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
       typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {         \
-    LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole,   \
-                               x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \
-                               cs, f, o, ci, co, icfo, h, batch_size_,         \
-                               cell_size_, input_size_);                       \
+    LSTMBlockCellFpropWithCUDA<T>(ctx, d, forget_bias, cell_clip,              \
+                                  use_peephole, x, cs_prev, h_prev, w, wci,    \
+                                  wcf, wco, b, xh, i, cs, f, o, ci, co, icfo,  \
+                                  h, batch_size_, cell_size_, input_size_);    \
   }                                                                            \
   template <>                                                                  \
   void LSTMBlockCellBprop<GPUDevice, T, true /* USE_CUBLAS */>::operator()(    \
@@ -403,6 +450,7 @@ void LSTMBlockCellBpropWithCUDA(
   template struct BlockLSTMBprop<GPUDevice, T, true /* USE_CUBLAS */>;
 
 DEFINE_GPU_SPECS(float);
+DEFINE_GPU_SPECS(Eigen::half);
 // DEFINE_GPU_SPECS(double);
 #undef DEFINE_GPU_SPECS
 
diff --git a/tensorflow/contrib/rnn/ops/lstm_ops.cc b/tensorflow/contrib/rnn/ops/lstm_ops.cc
index 699cc6c88a..1679e35518 100644
--- a/tensorflow/contrib/rnn/ops/lstm_ops.cc
+++ b/tensorflow/contrib/rnn/ops/lstm_ops.cc
@@ -41,7 +41,7 @@ REGISTER_OP("LSTMBlockCell")
     .Attr("forget_bias: float = 1.0")
     .Attr("cell_clip: float = 3.0")
     .Attr("use_peephole: bool = false")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x));
@@ -128,7 +128,7 @@ REGISTER_OP("LSTMBlockCellGrad")
     .Output("wcf_grad: T")
     .Output("wco_grad: T")
     .Attr("use_peephole: bool")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x));
@@ -196,7 +196,7 @@ REGISTER_OP("BlockLSTM")
     .Attr("forget_bias: float = 1.0")
     .Attr("cell_clip: float = 3.0")
     .Attr("use_peephole: bool = false")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, b;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x));
@@ -288,7 +288,7 @@ REGISTER_OP("BlockLSTMGrad")
     .Output("wco_grad: T")
     .Output("b_grad: T")
     .Attr("use_peephole: bool")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev, h_prev, w, wci, wco, wcf, b;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x));
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 9e61fc54d1..f645165efe 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -596,6 +596,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
                cell_clip=None,
                use_peephole=False,
                reuse=None,
+               dtype=None,
                name="lstm_fused_cell"):
     """Initialize the LSTM cell.
 
@@ -607,12 +608,14 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
         given variables, an error is raised.
+      dtype: the dtype of variables of this layer.
       name: String, the name of the layer. Layers with the same name will
         share weights, but to avoid mistakes we require reuse=True in such
         cases.  By default this is "lstm_cell", for variable-name compatibility
         with `tf.nn.rnn_cell.LSTMCell`.
     """
-    super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name)
+    super(LSTMBlockFusedCell, self).__init__(
+        _reuse=reuse, name=name, dtype=dtype)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._cell_clip = cell_clip if cell_clip is not None else -1
-- 
GitLab


From 905deeaadd41d529461d8a6666e9cf46f0097a8f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 19:19:07 -0700
Subject: [PATCH 0760/1085] Automated rollback of commit
 3abfe2cd9befa263de57edfae7d4c0d29c9c9182

PiperOrigin-RevId: 216633097
---
 .../xla/service/compile_only_service.cc       |   2 -
 .../compiler/xrt/kernels/xrt_compile_ops.cc   |  19 +---
 .../compiler/xrt/kernels/xrt_execute_op.cc    |   8 ++
 .../compiler/xrt/ops/xrt_compile_ops.cc       |   7 +-
 tensorflow/compiler/xrt/tests/BUILD           |  13 +--
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +-----------------
 6 files changed, 20 insertions(+), 135 deletions(-)

diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index bd5045b9b9..96bd2616f5 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -89,8 +89,6 @@ CompileOnlyService::CompileAheadOfTime(
     const auto& program_shape = instance.computation.program_shape();
     ExecutionOptions execution_options;
     *execution_options.mutable_debug_options() = debug_options;
-    *execution_options.mutable_shape_with_output_layout() =
-        *instance.result_layout;
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<HloModuleConfig> module_config,
         CreateModuleConfig(program_shape, instance.argument_layouts,
diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
index 1ab836a496..1d4f8d97f2 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
@@ -166,21 +166,10 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) {
                  VLOG(1) << "Compiling XLA executable";
                  return Compile(ctx, computation_proto, program);
                }));
-  std::unique_ptr<XRTCompilationCacheEntryRef> entry;
-  OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry));
-
-  Tensor handle_output(DT_INT64, TensorShape({}));
-  handle_output.scalar<int64>()() = uid;
-  ctx->set_output(0, handle_output);
-
-  xla::LocalExecutable* executable = entry->get().get_executable();
-  xla::ProgramShape program_shape = executable->executable()
-                                        ->module()
-                                        .entry_computation()
-                                        ->ComputeProgramShape();
-  Tensor program_shape_output(DT_STRING, TensorShape({1}));
-  program_shape_output.vec<string>()(0) = program_shape.SerializeAsString();
-  ctx->set_output(1, program_shape_output);
+
+  Tensor output(DT_INT64, TensorShape({}));
+  output.scalar<int64>()() = uid;
+  ctx->set_output(0, output);
 }
 
 XRTCompileOp::~XRTCompileOp() = default;
diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
index 3a1e03280a..257b054f16 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
@@ -64,6 +64,14 @@ uint32 GetXLARandomSeed() {
   return counter.fetch_add(2);
 }
 
+// Looks up the input `key` in the compilation cache.
+Status GetComputationCacheEntry(
+    XRTCompilationCache* cache, int64 key,
+    std::unique_ptr<XRTCompilationCacheEntryRef>* entry) {
+  TF_RETURN_IF_ERROR(cache->Lookup(key, entry));
+  return Status::OK();
+}
+
 // Populates `inputs` with the input tensors to the computation.
 Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm,
                             bool release_inputs,
diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
index 7b3b50c695..5cfc8711f9 100644
--- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
@@ -23,12 +23,7 @@ namespace tensorflow {
 REGISTER_OP("XRTCompile")
     .Input("computation: string")
     .Output("handle: int64")
-    .Output("program_shape: string")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      c->set_output(0, c->Scalar());
-      c->set_output(1, c->UnknownShapeOfRank(1));
-      return Status::OK();
-    })
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
     .Doc(
         R"(
 Reads a computation proto, compiles it, and places it in the global compilation
diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD
index be44a3474a..b6dcfc4eb9 100644
--- a/tensorflow/compiler/xrt/tests/BUILD
+++ b/tensorflow/compiler/xrt/tests/BUILD
@@ -29,11 +29,8 @@ cc_library(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/client:client_library",
-        "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
-        "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xrt:xrt_proto",
         "//tensorflow/compiler/xrt:xrt_server",
         "//tensorflow/compiler/xrt/cc:xrt_ops",
@@ -52,10 +49,7 @@ tf_cc_test(
     name = "raw_api_test_cpu",
     size = "medium",
     srcs = [],
-    args = [
-        "--xla_test_device=XLA_CPU",
-        "--xla_platform=CPU",
-    ],
+    args = ["--xla_test_device=XLA_CPU"],
     deps = [
         ":raw_api_test_lib",
         "//tensorflow/compiler/jit:xla_cpu_device",
@@ -66,10 +60,7 @@ tf_cuda_cc_test(
     name = "raw_api_test_gpu",
     size = "medium",
     srcs = [],
-    args = [
-        "--xla_test_device=XLA_GPU",
-        "--xla_platform=GPU",
-    ],
+    args = ["--xla_test_device=XLA_GPU"],
     tags = tf_cuda_tests_tags(),
     deps = [
         ":raw_api_test_lib",
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index ee6734020d..9fc01e6304 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -22,13 +22,10 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/standard_ops.h"
-#include "tensorflow/compiler/xla/client/client_library.h"
-#include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
-#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h"
@@ -46,7 +43,6 @@ namespace tensorflow {
 namespace {
 
 string* xla_test_device_ptr;  // initial value set in main()
-string* xla_platform_ptr;     // initial value set in main()
 
 string DeviceFromFlag() {
   string xla_test_device = *xla_test_device_ptr;
@@ -149,28 +145,6 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation,
   *dst = *snapshot;
 }
 
-xla::ProgramShape XlaCompiledProgramShape(
-    const xla::XlaComputation& computation,
-    const xla::ProgramShape& input_program_shape) {
-  se::Platform* platform =
-      xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie();
-  xla::LocalClient* client =
-      xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie();
-  xla::ExecutableBuildOptions exec_options;
-  exec_options.set_result_layout(input_program_shape.result());
-  std::vector<const xla::Shape*> parameters_shapes;
-  for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) {
-    parameters_shapes.push_back(&input_program_shape.parameters(i));
-  }
-  auto local_executable =
-      client->Compile(computation, parameters_shapes, exec_options)
-          .ValueOrDie();
-  return local_executable->executable()
-      ->module()
-      .entry_computation()
-      ->ComputeProgramShape();
-}
-
 TEST(RawApiTest, ReadAndWriteState) {
   xrt::XLAAllocation alloc;
   alloc.set_device_ordinal(0);
@@ -364,87 +338,20 @@ TEST(RawApiTest, CompileAndExecute) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+  TF_EXPECT_OK(session.Run({read_back}, &outputs));
 
   xla::LiteralProto response;
   EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
 
   auto expected = xla::LiteralUtil::CreateR1<float>({27.0f, 21.0f});
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
-
-  xla::ProgramShape program_shape;
-  EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec<string>()(0)));
-  EXPECT_EQ(program_shape.parameters_size(), 2);
-}
-
-TEST(RawApiTest, CompileWithXlaReturnShapes) {
-  xla::XlaBuilder builder("XrtXlaShapes");
-  auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128});
-  auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5});
-  // Clear layouts to signal XLA we are ready to get whatever are coming out of
-  // the compilation process.
-  xla::LayoutUtil::ClearLayout(&input_shape);
-  xla::LayoutUtil::ClearLayout(&kernel_shape);
-  auto param_shape =
-      xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape});
-  auto param = xla::Parameter(&builder, 0, param_shape, "param");
-  auto input = xla::GetTupleElement(param, 0);
-  auto kernel = xla::GetTupleElement(param, 1);
-  xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame);
-  TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build());
-
-  auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result();
-  // Clear the result shape layout to tell XLA we are accepting whatever are
-  // coming out of the compilation process.
-  xla::LayoutUtil::ClearLayout(&result_shape);
-
-  xrt::XLAComputation c;
-  auto config = c.mutable_config();
-  auto shapes = config->mutable_program_shape();
-  *shapes->add_parameters() = param_shape;
-  *shapes->mutable_result() = result_shape;
-  StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot());
-
-  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
-  auto computation =
-      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
-  auto c_handle = ops::XRTCompile(root, computation);
-  auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle);
-  TF_ASSERT_OK(root.status());
-
-  ClientSession session(root);
-  std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(),
-                           {c_handle.program_shape}, {release}, &outputs));
-
-  xla::ProgramShape program_shape;
-  EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec<string>()(0)));
-  EXPECT_EQ(program_shape.parameters_size(), 1);
-
-  VLOG(2) << "Param: "
-          << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0));
-  VLOG(2) << "Result: "
-          << xla::ShapeUtil::HumanStringWithLayout(program_shape.result());
-
-  xla::ProgramShape xla_program_shape =
-      XlaCompiledProgramShape(xla_computation, *shapes);
-  EXPECT_TRUE(xla::LayoutUtil::Equal(
-      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(),
-      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0})
-          .layout()));
-  EXPECT_TRUE(xla::LayoutUtil::Equal(
-      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(),
-      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1})
-          .layout()));
-  EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(),
-                                     xla_program_shape.result().layout()));
 }
 
 TEST(RawApiTest, CompileAndExecuteZeroArg) {
@@ -464,7 +371,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) {
   auto computation =
       ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
   auto c_handle = ops::XRTCompile(root, computation);
-  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle, e_config,
                                 std::initializer_list<Input>({}));
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -513,7 +420,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -548,7 +455,7 @@ TEST(RawApiTest, LeakCompilationReference) {
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs));
+  TF_EXPECT_OK(session.Run({c_handle}, &outputs));
 }
 
 }  // namespace
@@ -557,12 +464,9 @@ TEST(RawApiTest, LeakCompilationReference) {
 
 int main(int argc, char** argv) {
   tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU");
-  tensorflow::xla_platform_ptr = new tensorflow::string("CPU");
   std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr,
                        "Tensorflow device type to use for test, e.g., XLA_CPU"),
-      tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr,
-                       "The XLA platform to select for the device"),
   };
   tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
   const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-- 
GitLab


From 0f79b5ff0d557abe5f6b21da10ae5a2570fd21d9 Mon Sep 17 00:00:00 2001
From: Andy Craze <accraze@gmail.com>
Date: Wed, 10 Oct 2018 19:41:32 -0700
Subject: [PATCH 0761/1085] Update momentum.py

fix docstring line lengths
---
 tensorflow/python/training/momentum.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py
index 34c74cda4e..4a280e7c51 100644
--- a/tensorflow/python/training/momentum.py
+++ b/tensorflow/python/training/momentum.py
@@ -60,9 +60,10 @@ class MomentumOptimizer(optimizer.Optimizer):
         variable(s) passed to the optimizer. Using Nesterov Momentum makes the
         variable(s) track the values called `theta_t + mu*v_t` in the paper.
         This implementation is an approximation of the original formula, valid 
-        for high values of momentum. It will compute the "adjusted gradient" in NAG 
-        by assuming that the new gradient will be estimated by the current 
-        average gradient plus the product of momentum and the change in the average gradient.
+        for high values of momentum. It will compute the "adjusted gradient" 
+        in NAG by assuming that the new gradient will be estimated by the 
+        current average gradient plus the product of momentum and the change 
+        in the average gradient.
 
     @compatibility(eager)
     When eager execution is enabled, `learning_rate` and `momentum` can each be
-- 
GitLab


From 2b8f59243e412d58ec88f12399478b6ff022b3a9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 10 Oct 2018 20:32:01 -0700
Subject: [PATCH 0762/1085] Add Range to schema.

PiperOrigin-RevId: 216638239
---
 tensorflow/contrib/lite/builtin_ops.h         |   1 +
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 tensorflow/contrib/lite/nnapi_delegate.cc     |   1 +
 tensorflow/contrib/lite/schema/schema.fbs     |   5 +
 .../contrib/lite/schema/schema_generated.h    | 124 +++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h
index 6117cbf9f1..eb26c2dbdb 100644
--- a/tensorflow/contrib/lite/builtin_ops.h
+++ b/tensorflow/contrib/lite/builtin_ops.h
@@ -121,6 +121,7 @@ typedef enum {
   kTfLiteBuiltinZerosLike = 93,
   kTfLiteBuiltinFill = 94,
   kTfLiteBuiltinFloorMod = 95,
+  kTfLiteBuiltinRange = 96,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index 890d9c04bb..348ce54dd7 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -652,6 +652,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_ZEROS_LIKE:
     case BuiltinOperator_FILL:
     case BuiltinOperator_FLOOR_MOD:
+    case BuiltinOperator_RANGE:
       break;
   }
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index c7005eb53e..31f2333520 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -680,6 +680,7 @@ TfLiteStatus AddOpsAndParams(
       case tflite::BuiltinOperator_ZEROS_LIKE:
       case tflite::BuiltinOperator_FILL:
       case tflite::BuiltinOperator_FLOOR_MOD:
+      case tflite::BuiltinOperator_RANGE:
         logError("Op code %d is currently not delegated to NNAPI", builtin);
         return kTfLiteError;
         break;
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 2b36209e5f..73805b7618 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -177,6 +177,7 @@ enum BuiltinOperator : byte {
   ZEROS_LIKE = 93,
   FILL = 94,
   FLOOR_MOD = 95,
+  RANGE = 96,
 }
 
 // Options for the builtin operators.
@@ -253,6 +254,7 @@ union BuiltinOptions {
   BidirectionalSequenceRNNOptions,
   UnidirectionalSequenceLSTMOptions,
   FloorModOptions,
+  RangeOptions,
 }
 
 enum Padding : byte { SAME, VALID }
@@ -623,6 +625,9 @@ table FillOptions {
 table FloorModOptions {
 }
 
+table RangeOptions {
+}
+
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 3aaa99ec55..4426b7d407 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -244,6 +244,9 @@ struct FillOptionsT;
 struct FloorModOptions;
 struct FloorModOptionsT;
 
+struct RangeOptions;
+struct RangeOptionsT;
+
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -405,11 +408,12 @@ enum BuiltinOperator {
   BuiltinOperator_ZEROS_LIKE = 93,
   BuiltinOperator_FILL = 94,
   BuiltinOperator_FLOOR_MOD = 95,
+  BuiltinOperator_RANGE = 96,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_FLOOR_MOD
+  BuiltinOperator_MAX = BuiltinOperator_RANGE
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[96] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -505,7 +509,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[95] {
     BuiltinOperator_SQUARE,
     BuiltinOperator_ZEROS_LIKE,
     BuiltinOperator_FILL,
-    BuiltinOperator_FLOOR_MOD
+    BuiltinOperator_FLOOR_MOD,
+    BuiltinOperator_RANGE
   };
   return values;
 }
@@ -608,6 +613,7 @@ inline const char * const *EnumNamesBuiltinOperator() {
     "ZEROS_LIKE",
     "FILL",
     "FLOOR_MOD",
+    "RANGE",
     nullptr
   };
   return names;
@@ -692,11 +698,12 @@ enum BuiltinOptions {
   BuiltinOptions_BidirectionalSequenceRNNOptions = 70,
   BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71,
   BuiltinOptions_FloorModOptions = 72,
+  BuiltinOptions_RangeOptions = 73,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_FloorModOptions
+  BuiltinOptions_MAX = BuiltinOptions_RangeOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[74] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -770,7 +777,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[73] {
     BuiltinOptions_BidirectionalSequenceLSTMOptions,
     BuiltinOptions_BidirectionalSequenceRNNOptions,
     BuiltinOptions_UnidirectionalSequenceLSTMOptions,
-    BuiltinOptions_FloorModOptions
+    BuiltinOptions_FloorModOptions,
+    BuiltinOptions_RangeOptions
   };
   return values;
 }
@@ -850,6 +858,7 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "BidirectionalSequenceRNNOptions",
     "UnidirectionalSequenceLSTMOptions",
     "FloorModOptions",
+    "RangeOptions",
     nullptr
   };
   return names;
@@ -1152,6 +1161,10 @@ template<> struct BuiltinOptionsTraits<FloorModOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
 };
 
+template<> struct BuiltinOptionsTraits<RangeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
+};
+
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -1759,6 +1772,14 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_FloorModOptions ?
       reinterpret_cast<const FloorModOptionsT *>(value) : nullptr;
   }
+  RangeOptionsT *AsRangeOptions() {
+    return type == BuiltinOptions_RangeOptions ?
+      reinterpret_cast<RangeOptionsT *>(value) : nullptr;
+  }
+  const RangeOptionsT *AsRangeOptions() const {
+    return type == BuiltinOptions_RangeOptions ?
+      reinterpret_cast<const RangeOptionsT *>(value) : nullptr;
+  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -6302,6 +6323,46 @@ inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(
 
 flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct RangeOptionsT : public flatbuffers::NativeTable {
+  typedef RangeOptions TableType;
+  RangeOptionsT() {
+  }
+};
+
+struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RangeOptionsT NativeTableType;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RangeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RangeOptionsBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
+  flatbuffers::Offset<RangeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RangeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  RangeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   BuiltinOperator builtin_code;
@@ -6651,6 +6712,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const FloorModOptions *builtin_options_as_FloorModOptions() const {
     return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast<const FloorModOptions *>(builtin_options()) : nullptr;
   }
+  const RangeOptions *builtin_options_as_RangeOptions() const {
+    return builtin_options_type() == BuiltinOptions_RangeOptions ? static_cast<const RangeOptions *>(builtin_options()) : nullptr;
+  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -6970,6 +7034,10 @@ template<> inline const FloorModOptions *Operator::builtin_options_as<FloorModOp
   return builtin_options_as_FloorModOptions();
 }
 
+template<> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const {
+  return builtin_options_as_RangeOptions();
+}
+
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -9377,6 +9445,29 @@ inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::F
       _fbb);
 }
 
+inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new RangeOptionsT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RangeOptions> RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRangeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateRangeOptions(
+      _fbb);
+}
+
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -9854,6 +9945,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
     default: return false;
   }
 }
@@ -10160,6 +10255,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
       return ptr->UnPack(resolver);
     }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
     default: return nullptr;
   }
 }
@@ -10454,6 +10553,10 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const FloorModOptionsT *>(value);
       return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
     }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const RangeOptionsT *>(value);
+      return CreateRangeOptions(_fbb, ptr, _rehasher).Union();
+    }
     default: return 0;
   }
 }
@@ -10748,6 +10851,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new FloorModOptionsT(*reinterpret_cast<FloorModOptionsT *>(u.value));
       break;
     }
+    case BuiltinOptions_RangeOptions: {
+      value = new RangeOptionsT(*reinterpret_cast<RangeOptionsT *>(u.value));
+      break;
+    }
     default:
       break;
   }
@@ -11115,6 +11222,11 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<RangeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
     default: break;
   }
   value = nullptr;
-- 
GitLab


From 96a633367ecd5ae9b31e128c2436b1a3f81b27fd Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 10 Oct 2018 20:50:21 -0700
Subject: [PATCH 0763/1085] Part 3/4 of the update of tf.keras to the 2.2.4
 API.

PiperOrigin-RevId: 216639755
---
 tensorflow/python/keras/BUILD                 |  13 +
 .../python/keras/engine/training_generator.py |  19 +-
 .../keras/engine/training_generator_test.py   | 307 +++++++++++++
 .../python/keras/engine/training_test.py      | 275 -----------
 tensorflow/python/keras/utils/data_utils.py   | 428 ++++++++----------
 .../python/keras/utils/data_utils_test.py     |   4 +-
 ...flow.keras.utils.-generator-enqueuer.pbtxt |   2 +-
 ...rflow.keras.utils.-sequence-enqueuer.pbtxt |   1 +
 ...flow.keras.utils.-generator-enqueuer.pbtxt |   2 +-
 ...rflow.keras.utils.-sequence-enqueuer.pbtxt |   1 +
 tensorflow/tools/ci_build/ci_sanity.sh        |   3 +-
 11 files changed, 517 insertions(+), 538 deletions(-)
 create mode 100644 tensorflow/python/keras/engine/training_generator_test.py

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index c4d23f117f..a566c9acab 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -718,6 +718,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "training_generator_test",
+    size = "enormous",
+    srcs = ["engine/training_generator_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "feature_columns_integration_test",
     size = "small",
diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index 2e074699da..21f44423ec 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py
@@ -24,6 +24,7 @@ import numpy as np
 from tensorflow.python.eager import context
 from tensorflow.python.keras import callbacks as cbks
 from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer
+from tensorflow.python.keras.utils.data_utils import iter_sequence_infinite
 from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer
 from tensorflow.python.keras.utils.data_utils import Sequence
 from tensorflow.python.keras.utils.generic_utils import Progbar
@@ -45,7 +46,6 @@ def fit_generator(model,
                   shuffle=True,
                   initial_epoch=0):
   """See docstring for `Model.fit_generator`."""
-  wait_time = 0.01  # in seconds
   epoch = initial_epoch
 
   do_validation = bool(validation_data)
@@ -124,13 +124,12 @@ def fit_generator(model,
       else:
         enqueuer = GeneratorEnqueuer(
             generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
+            use_multiprocessing=use_multiprocessing)
       enqueuer.start(workers=workers, max_queue_size=max_queue_size)
       output_generator = enqueuer.get()
     else:
       if is_sequence:
-        output_generator = iter(generator)
+        output_generator = iter_sequence_infinite(generator)
       else:
         output_generator = generator
 
@@ -251,7 +250,6 @@ def evaluate_generator(model,
     stateful_metric_indices = []
 
   steps_done = 0
-  wait_time = 0.01
   all_outs = []
   batch_sizes = []
   is_sequence = isinstance(generator, Sequence)
@@ -279,13 +277,12 @@ def evaluate_generator(model,
       else:
         enqueuer = GeneratorEnqueuer(
             generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
+            use_multiprocessing=use_multiprocessing)
       enqueuer.start(workers=workers, max_queue_size=max_queue_size)
       output_generator = enqueuer.get()
     else:
       if is_sequence:
-        output_generator = iter(generator)
+        output_generator = iter_sequence_infinite(generator)
       else:
         output_generator = generator
 
@@ -354,7 +351,6 @@ def predict_generator(model,
     model._make_test_function()
 
   steps_done = 0
-  wait_time = 0.01
   all_outs = []
   is_sequence = isinstance(generator, Sequence)
   if not is_sequence and use_multiprocessing and workers > 1:
@@ -381,13 +377,12 @@ def predict_generator(model,
       else:
         enqueuer = GeneratorEnqueuer(
             generator,
-            use_multiprocessing=use_multiprocessing,
-            wait_time=wait_time)
+            use_multiprocessing=use_multiprocessing)
       enqueuer.start(workers=workers, max_queue_size=max_queue_size)
       output_generator = enqueuer.get()
     else:
       if is_sequence:
-        output_generator = iter(generator)
+        output_generator = iter_sequence_infinite(generator)
       else:
         output_generator = generator
 
diff --git a/tensorflow/python/keras/engine/training_generator_test.py b/tensorflow/python/keras/engine/training_generator_test.py
new file mode 100644
index 0000000000..88e8943424
--- /dev/null
+++ b/tensorflow/python/keras/engine/training_generator_test.py
@@ -0,0 +1,307 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for training routines."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.platform import test
+from tensorflow.python.training.rmsprop import RMSPropOptimizer
+
+
+class TestGeneratorMethods(test.TestCase):
+
+  @unittest.skipIf(
+      os.name == 'nt',
+      'use_multiprocessing=True does not work on windows properly.')
+  def test_generator_methods(self):
+    arr_data = np.random.random((50, 2))
+    arr_labels = np.random.random((50,))
+
+    def custom_generator():
+      batch_size = 10
+      num_samples = 50
+      while True:
+        batch_index = np.random.randint(0, num_samples - batch_size)
+        start = batch_index
+        end = start + batch_size
+        x = arr_data[start: end]
+        y = arr_labels[start: end]
+        yield x, y
+
+    with self.cached_session():
+      x = keras.Input((2,))
+      y = keras.layers.Dense(1)(x)
+      fn_model = keras.models.Model(x, y)
+      fn_model.compile(
+          loss='mse',
+          optimizer='sgd',
+          metrics=['mae', metrics_module.CategoricalAccuracy()])
+
+      seq_model = keras.models.Sequential()
+      seq_model.add(keras.layers.Dense(1, input_shape=(2,)))
+      seq_model.compile(loss='mse', optimizer='sgd')
+
+      for model in [fn_model, seq_model]:
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            epochs=1,
+                            verbose=1,
+                            max_queue_size=10,
+                            workers=4,
+                            use_multiprocessing=True)
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            epochs=1,
+                            verbose=1,
+                            max_queue_size=10,
+                            use_multiprocessing=False)
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            epochs=1,
+                            verbose=1,
+                            max_queue_size=10,
+                            use_multiprocessing=False,
+                            validation_data=custom_generator(),
+                            validation_steps=10)
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            validation_data=custom_generator(),
+                            validation_steps=1,
+                            workers=0)
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                workers=2,
+                                use_multiprocessing=True)
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                use_multiprocessing=False)
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                workers=0)
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 workers=2,
+                                 verbose=1,
+                                 use_multiprocessing=True)
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 use_multiprocessing=False)
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 use_multiprocessing=False,
+                                 workers=0)
+
+  def test_generator_methods_with_sample_weights(self):
+    arr_data = np.random.random((50, 2))
+    arr_labels = np.random.random((50,))
+    arr_sample_weights = np.random.random((50,))
+
+    def custom_generator():
+      batch_size = 10
+      num_samples = 50
+      while True:
+        batch_index = np.random.randint(0, num_samples - batch_size)
+        start = batch_index
+        end = start + batch_size
+        x = arr_data[start: end]
+        y = arr_labels[start: end]
+        w = arr_sample_weights[start: end]
+        yield x, y, w
+
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(1, input_shape=(2,)))
+      model.compile(
+          loss='mse',
+          optimizer='sgd',
+          metrics=['mae', metrics_module.CategoricalAccuracy()])
+
+      model.fit_generator(custom_generator(),
+                          steps_per_epoch=5,
+                          epochs=1,
+                          verbose=1,
+                          max_queue_size=10,
+                          use_multiprocessing=False)
+      model.fit_generator(custom_generator(),
+                          steps_per_epoch=5,
+                          epochs=1,
+                          verbose=1,
+                          max_queue_size=10,
+                          use_multiprocessing=False,
+                          validation_data=custom_generator(),
+                          validation_steps=10)
+      model.predict_generator(custom_generator(),
+                              steps=5,
+                              max_queue_size=10,
+                              use_multiprocessing=False)
+      model.evaluate_generator(custom_generator(),
+                               steps=5,
+                               max_queue_size=10,
+                               use_multiprocessing=False)
+
+  def test_generator_methods_invalid_use_case(self):
+
+    def custom_generator():
+      while 1:
+        yield 0
+
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(1, input_shape=(2,)))
+      model.compile(loss='mse', optimizer='sgd')
+
+      with self.assertRaises(ValueError):
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            epochs=1,
+                            verbose=1,
+                            max_queue_size=10,
+                            use_multiprocessing=False)
+      with self.assertRaises(ValueError):
+        model.fit_generator(custom_generator(),
+                            steps_per_epoch=5,
+                            epochs=1,
+                            verbose=1,
+                            max_queue_size=10,
+                            use_multiprocessing=False,
+                            validation_data=custom_generator(),
+                            validation_steps=10)
+      with self.assertRaises(AttributeError):
+        model.predict_generator(custom_generator(),
+                                steps=5,
+                                max_queue_size=10,
+                                use_multiprocessing=False)
+      with self.assertRaises(ValueError):
+        model.evaluate_generator(custom_generator(),
+                                 steps=5,
+                                 max_queue_size=10,
+                                 use_multiprocessing=False)
+
+  def test_training_with_sequences(self):
+
+    class DummySequence(keras.utils.Sequence):
+
+      def __getitem__(self, idx):
+        return np.zeros([10, 2]), np.ones([10])
+
+      def __len__(self):
+        return 10
+
+    arr_data = np.random.random((50, 2))
+    arr_labels = np.random.random((50,))
+    arr_sample_weights = np.random.random((50,))
+
+    def custom_generator():
+      batch_size = 10
+      num_samples = 50
+      while True:
+        batch_index = np.random.randint(0, num_samples - batch_size)
+        start = batch_index
+        end = start + batch_size
+        x = arr_data[start: end]
+        y = arr_labels[start: end]
+        w = arr_sample_weights[start: end]
+        yield x, y, w
+
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(1, input_shape=(2,)))
+      model.compile(loss='mse', optimizer='sgd')
+
+    model.fit_generator(DummySequence(),
+                        steps_per_epoch=10,
+                        validation_data=custom_generator(),
+                        validation_steps=1,
+                        max_queue_size=10,
+                        workers=0,
+                        use_multiprocessing=True)
+    model.fit_generator(DummySequence(),
+                        steps_per_epoch=10,
+                        validation_data=custom_generator(),
+                        validation_steps=1,
+                        max_queue_size=10,
+                        workers=0,
+                        use_multiprocessing=False)
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_generator_input_to_fit_eval_predict(self):
+    val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
+
+    def custom_generator():
+      while True:
+        yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
+
+    inputs = keras.layers.Input(shape=(10,))
+    x = keras.layers.Dense(10, activation='relu')(inputs)
+    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
+    model = keras.Model(inputs, outputs)
+
+    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
+    model.fit(
+        custom_generator(),
+        steps_per_epoch=2,
+        validation_data=val_data,
+        epochs=2)
+    model.evaluate(custom_generator(), steps=2)
+    model.predict(custom_generator(), steps=2)
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_sequence_input_to_fit_eval_predict(self):
+    val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
+
+    class CustomSequence(keras.utils.Sequence):
+
+      def __getitem__(self, idx):
+        return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
+
+      def __len__(self):
+        return 2
+
+    inputs = keras.layers.Input(shape=(10,))
+    x = keras.layers.Dense(10, activation='relu')(inputs)
+    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
+    model = keras.Model(inputs, outputs)
+
+    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
+    model.fit(CustomSequence(), validation_data=val_data, epochs=2)
+    model.evaluate(CustomSequence())
+    model.predict(CustomSequence())
+
+    with self.assertRaisesRegexp(ValueError, '`y` argument is not supported'):
+      model.fit(CustomSequence(), y=np.ones([10, 1]))
+
+    with self.assertRaisesRegexp(ValueError,
+                                 '`sample_weight` argument is not supported'):
+      model.fit(CustomSequence(), sample_weight=np.ones([10, 1]))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 868fd1dc69..bd6b0e1aa1 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -19,8 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import logging
-import os
-import unittest
 
 import numpy as np
 
@@ -1102,279 +1100,6 @@ class TestDynamicTrainability(test.TestCase):
       self.assertListEqual(outer_model.trainable_weights, [])
 
 
-class TestGeneratorMethods(test.TestCase):
-
-  @unittest.skipIf(
-      os.name == 'nt',
-      'use_multiprocessing=True does not work on windows properly.')
-  def test_generator_methods(self):
-    arr_data = np.random.random((50, 2))
-    arr_labels = np.random.random((50,))
-
-    def custom_generator():
-      batch_size = 10
-      num_samples = 50
-      while True:
-        batch_index = np.random.randint(0, num_samples - batch_size)
-        start = batch_index
-        end = start + batch_size
-        x = arr_data[start: end]
-        y = arr_labels[start: end]
-        yield x, y
-
-    with self.cached_session():
-      x = keras.Input((2,))
-      y = keras.layers.Dense(1)(x)
-      fn_model = keras.models.Model(x, y)
-      fn_model.compile(
-          loss='mse',
-          optimizer='sgd',
-          metrics=['mae', metrics_module.CategoricalAccuracy()])
-
-      seq_model = keras.models.Sequential()
-      seq_model.add(keras.layers.Dense(1, input_shape=(2,)))
-      seq_model.compile(loss='mse', optimizer='sgd')
-
-      for model in [fn_model, seq_model]:
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            epochs=1,
-                            verbose=1,
-                            max_queue_size=10,
-                            workers=4,
-                            use_multiprocessing=True)
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            epochs=1,
-                            verbose=1,
-                            max_queue_size=10,
-                            use_multiprocessing=False)
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            epochs=1,
-                            verbose=1,
-                            max_queue_size=10,
-                            use_multiprocessing=False,
-                            validation_data=custom_generator(),
-                            validation_steps=10)
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            validation_data=custom_generator(),
-                            validation_steps=1,
-                            workers=0)
-        model.predict_generator(custom_generator(),
-                                steps=5,
-                                max_queue_size=10,
-                                workers=2,
-                                use_multiprocessing=True)
-        model.predict_generator(custom_generator(),
-                                steps=5,
-                                max_queue_size=10,
-                                use_multiprocessing=False)
-        model.predict_generator(custom_generator(),
-                                steps=5,
-                                max_queue_size=10,
-                                workers=0)
-        model.evaluate_generator(custom_generator(),
-                                 steps=5,
-                                 max_queue_size=10,
-                                 workers=2,
-                                 verbose=1,
-                                 use_multiprocessing=True)
-        model.evaluate_generator(custom_generator(),
-                                 steps=5,
-                                 max_queue_size=10,
-                                 use_multiprocessing=False)
-        model.evaluate_generator(custom_generator(),
-                                 steps=5,
-                                 max_queue_size=10,
-                                 use_multiprocessing=False,
-                                 workers=0)
-
-  def test_generator_methods_with_sample_weights(self):
-    arr_data = np.random.random((50, 2))
-    arr_labels = np.random.random((50,))
-    arr_sample_weights = np.random.random((50,))
-
-    def custom_generator():
-      batch_size = 10
-      num_samples = 50
-      while True:
-        batch_index = np.random.randint(0, num_samples - batch_size)
-        start = batch_index
-        end = start + batch_size
-        x = arr_data[start: end]
-        y = arr_labels[start: end]
-        w = arr_sample_weights[start: end]
-        yield x, y, w
-
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(1, input_shape=(2,)))
-      model.compile(
-          loss='mse',
-          optimizer='sgd',
-          metrics=['mae', metrics_module.CategoricalAccuracy()])
-
-      model.fit_generator(custom_generator(),
-                          steps_per_epoch=5,
-                          epochs=1,
-                          verbose=1,
-                          max_queue_size=10,
-                          use_multiprocessing=False)
-      model.fit_generator(custom_generator(),
-                          steps_per_epoch=5,
-                          epochs=1,
-                          verbose=1,
-                          max_queue_size=10,
-                          use_multiprocessing=False,
-                          validation_data=custom_generator(),
-                          validation_steps=10)
-      model.predict_generator(custom_generator(),
-                              steps=5,
-                              max_queue_size=10,
-                              use_multiprocessing=False)
-      model.evaluate_generator(custom_generator(),
-                               steps=5,
-                               max_queue_size=10,
-                               use_multiprocessing=False)
-
-  def test_generator_methods_invalid_use_case(self):
-
-    def custom_generator():
-      while 1:
-        yield 0
-
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(1, input_shape=(2,)))
-      model.compile(loss='mse', optimizer='sgd')
-
-      with self.assertRaises(ValueError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            epochs=1,
-                            verbose=1,
-                            max_queue_size=10,
-                            use_multiprocessing=False)
-      with self.assertRaises(ValueError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=5,
-                            epochs=1,
-                            verbose=1,
-                            max_queue_size=10,
-                            use_multiprocessing=False,
-                            validation_data=custom_generator(),
-                            validation_steps=10)
-      with self.assertRaises(AttributeError):
-        model.predict_generator(custom_generator(),
-                                steps=5,
-                                max_queue_size=10,
-                                use_multiprocessing=False)
-      with self.assertRaises(ValueError):
-        model.evaluate_generator(custom_generator(),
-                                 steps=5,
-                                 max_queue_size=10,
-                                 use_multiprocessing=False)
-
-  def test_training_with_sequences(self):
-
-    class DummySequence(keras.utils.Sequence):
-
-      def __getitem__(self, idx):
-        return np.zeros([10, 2]), np.ones([10])
-
-      def __len__(self):
-        return 10
-
-    arr_data = np.random.random((50, 2))
-    arr_labels = np.random.random((50,))
-    arr_sample_weights = np.random.random((50,))
-
-    def custom_generator():
-      batch_size = 10
-      num_samples = 50
-      while True:
-        batch_index = np.random.randint(0, num_samples - batch_size)
-        start = batch_index
-        end = start + batch_size
-        x = arr_data[start: end]
-        y = arr_labels[start: end]
-        w = arr_sample_weights[start: end]
-        yield x, y, w
-
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(1, input_shape=(2,)))
-      model.compile(loss='mse', optimizer='sgd')
-
-    model.fit_generator(DummySequence(),
-                        steps_per_epoch=10,
-                        validation_data=custom_generator(),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=True)
-    model.fit_generator(DummySequence(),
-                        steps_per_epoch=10,
-                        validation_data=custom_generator(),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=False)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_generator_input_to_fit_eval_predict(self):
-    val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
-
-    def custom_generator():
-      while True:
-        yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
-
-    inputs = keras.layers.Input(shape=(10,))
-    x = keras.layers.Dense(10, activation='relu')(inputs)
-    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs, outputs)
-
-    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
-    model.fit(
-        custom_generator(),
-        steps_per_epoch=2,
-        validation_data=val_data,
-        epochs=2)
-    model.evaluate(custom_generator(), steps=2)
-    model.predict(custom_generator(), steps=2)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_sequence_input_to_fit_eval_predict(self):
-    val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
-
-    class CustomSequence(keras.utils.Sequence):
-
-      def __getitem__(self, idx):
-        return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
-
-      def __len__(self):
-        return 2
-
-    inputs = keras.layers.Input(shape=(10,))
-    x = keras.layers.Dense(10, activation='relu')(inputs)
-    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs, outputs)
-
-    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
-    model.fit(CustomSequence(), validation_data=val_data, epochs=2)
-    model.evaluate(CustomSequence())
-    model.predict(CustomSequence())
-
-    with self.assertRaisesRegexp(ValueError, '`y` argument is not supported'):
-      model.fit(CustomSequence(), y=np.ones([10, 1]))
-
-    with self.assertRaisesRegexp(ValueError,
-                                 '`sample_weight` argument is not supported'):
-      model.fit(CustomSequence(), sample_weight=np.ones([10, 1]))
-
-
 class TestTrainingUtils(test.TestCase):
 
   def test_check_array_lengths(self):
diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py
index b736daa46d..01a9d61a84 100644
--- a/tensorflow/python/keras/utils/data_utils.py
+++ b/tensorflow/python/keras/utils/data_utils.py
@@ -30,7 +30,6 @@ import sys
 import tarfile
 import threading
 import time
-import traceback
 import zipfile
 
 import numpy as np
@@ -117,16 +116,16 @@ def _extract_archive(file_path, path='.', archive_format='auto'):
   """
   if archive_format is None:
     return False
-  if archive_format is 'auto':
+  if archive_format == 'auto':
     archive_format = ['tar', 'zip']
   if isinstance(archive_format, six.string_types):
     archive_format = [archive_format]
 
   for archive_type in archive_format:
-    if archive_type is 'tar':
+    if archive_type == 'tar':
       open_fn = tarfile.open
       is_match_fn = tarfile.is_tarfile
-    if archive_type is 'zip':
+    if archive_type == 'zip':
       open_fn = zipfile.ZipFile
       is_match_fn = zipfile.is_zipfile
 
@@ -237,7 +236,7 @@ def get_file(fname,
 
     def dl_progress(count, block_size, total_size):
       if ProgressTracker.progbar is None:
-        if total_size is -1:
+        if total_size == -1:
           total_size = None
         ProgressTracker.progbar = Progbar(total_size)
       else:
@@ -288,7 +287,7 @@ def _hash_file(fpath, algorithm='sha256', chunk_size=65535):
   Returns:
       The file hash
   """
-  if (algorithm is 'sha256') or (algorithm is 'auto' and len(hash) is 64):
+  if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64):
     hasher = hashlib.sha256()
   else:
     hasher = hashlib.md5()
@@ -314,8 +313,7 @@ def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535):
   Returns:
       Whether the file is valid
   """
-  if ((algorithm is 'sha256') or
-      (algorithm is 'auto' and len(file_hash) is 64)):
+  if (algorithm == 'sha256') or (algorithm == 'auto' and len(file_hash) == 64):
     hasher = 'sha256'
   else:
     hasher = 'md5'
@@ -400,14 +398,23 @@ class Sequence(object):
     pass
 
   def __iter__(self):
-    """Creates an infinite generator that iterate over the Sequence.
+    """Create a generator that iterate over the Sequence."""
+    for item in (self[i] for i in range(len(self))):
+      yield item
 
-    Yields:
-      Sequence items.
-    """
-    while True:
-      for item in (self[i] for i in range(len(self))):
-        yield item
+
+def iter_sequence_infinite(seq):
+  """Iterates indefinitely over a Sequence.
+
+  Arguments:
+    seq: Sequence instance.
+
+  Yields:
+    Batches of data from the Sequence.
+  """
+  while True:
+    for item in seq:
+      yield item
 
 
 # Global variables to be shared across processes
@@ -445,7 +452,7 @@ class SequenceEnqueuer(object):
   The task of an Enqueuer is to use parallelism to speed up preprocessing.
   This is done with processes or threads.
 
-  Examples:
+  Example:
 
   ```python
       enqueuer = SequenceEnqueuer(...)
@@ -458,61 +465,10 @@ class SequenceEnqueuer(object):
   ```
 
   The `enqueuer.get()` should be an infinite stream of datas.
-
   """
 
-  @abstractmethod
-  def is_running(self):
-    raise NotImplementedError
-
-  @abstractmethod
-  def start(self, workers=1, max_queue_size=10):
-    """Starts the handler's workers.
-
-    Arguments:
-        workers: number of worker threads
-        max_queue_size: queue size
-            (when full, threads could block on `put()`).
-    """
-    raise NotImplementedError
-
-  @abstractmethod
-  def stop(self, timeout=None):
-    """Stop running threads and wait for them to exit, if necessary.
-
-    Should be called by the same thread which called start().
-
-    Arguments:
-        timeout: maximum time to wait on thread.join()
-    """
-    raise NotImplementedError
-
-  @abstractmethod
-  def get(self):
-    """Creates a generator to extract data from the queue.
-
-    Skip the data if it is `None`.
-
-    Returns:
-        Generator yielding tuples `(inputs, targets)`
-            or `(inputs, targets, sample_weights)`.
-    """
-    raise NotImplementedError
-
-
-@tf_export('keras.utils.OrderedEnqueuer')
-class OrderedEnqueuer(SequenceEnqueuer):
-  """Builds a Enqueuer from a Sequence.
-
-  Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
-
-  Arguments:
-      sequence: A `keras.utils.data_utils.Sequence` object.
-      use_multiprocessing: use multiprocessing if True, otherwise threading
-      shuffle: whether to shuffle the data at the beginning of each epoch
-  """
-
-  def __init__(self, sequence, use_multiprocessing=False, shuffle=False):
+  def __init__(self, sequence,
+               use_multiprocessing=False):
     self.sequence = sequence
     self.use_multiprocessing = use_multiprocessing
 
@@ -535,7 +491,6 @@ class OrderedEnqueuer(SequenceEnqueuer):
         self.uid = _SEQUENCE_COUNTER.value
         _SEQUENCE_COUNTER.value += 1
 
-    self.shuffle = shuffle
     self.workers = 0
     self.executor_fn = None
     self.queue = None
@@ -546,16 +501,15 @@ class OrderedEnqueuer(SequenceEnqueuer):
     return self.stop_signal is not None and not self.stop_signal.is_set()
 
   def start(self, workers=1, max_queue_size=10):
-    """Start the handler's workers.
+    """Starts the handler's workers.
 
     Arguments:
-        workers: number of worker threads
+        workers: Number of workers.
         max_queue_size: queue size
             (when full, workers could block on `put()`)
     """
     if self.use_multiprocessing:
-      self.executor_fn = lambda seqs: multiprocessing.Pool(  # pylint: disable=g-long-lambda
-          workers, initializer=init_pool, initargs=(seqs,))
+      self.executor_fn = self._get_executor_init(workers)
     else:
       # We do not need the init since it's threads.
       self.executor_fn = lambda _: ThreadPool(workers)
@@ -566,6 +520,87 @@ class OrderedEnqueuer(SequenceEnqueuer):
     self.run_thread.daemon = True
     self.run_thread.start()
 
+  def _send_sequence(self):
+    """Sends current Iterable to all workers."""
+    # For new processes that may spawn
+    _SHARED_SEQUENCES[self.uid] = self.sequence
+
+  def stop(self, timeout=None):
+    """Stops running threads and wait for them to exit, if necessary.
+
+    Should be called by the same thread which called `start()`.
+
+    Arguments:
+        timeout: maximum time to wait on `thread.join()`
+    """
+    self.stop_signal.set()
+    with self.queue.mutex:
+      self.queue.queue.clear()
+      self.queue.unfinished_tasks = 0
+      self.queue.not_full.notify()
+    self.run_thread.join(timeout)
+    _SHARED_SEQUENCES[self.uid] = None
+
+  @abstractmethod
+  def _run(self):
+    """Submits request to the executor and queue the `Future` objects."""
+    raise NotImplementedError
+
+  @abstractmethod
+  def _get_executor_init(self, workers):
+    """Gets the Pool initializer for multiprocessing.
+
+    Arguments:
+        workers: Number of workers.
+
+    Returns:
+        Function, a Function to initialize the pool
+    """
+    raise NotImplementedError
+
+  @abstractmethod
+  def get(self):
+    """Creates a generator to extract data from the queue.
+
+    Skip the data if it is `None`.
+    # Returns
+        Generator yielding tuples `(inputs, targets)`
+            or `(inputs, targets, sample_weights)`.
+    """
+    raise NotImplementedError
+
+
+@tf_export('keras.utils.OrderedEnqueuer')
+class OrderedEnqueuer(SequenceEnqueuer):
+  """Builds a Enqueuer from a Sequence.
+
+  Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
+
+  Arguments:
+      sequence: A `tf.keras.utils.data_utils.Sequence` object.
+      use_multiprocessing: use multiprocessing if True, otherwise threading
+      shuffle: whether to shuffle the data at the beginning of each epoch
+  """
+
+  def __init__(self, sequence, use_multiprocessing=False, shuffle=False):
+    super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing)
+    self.shuffle = shuffle
+
+  def _get_executor_init(self, workers):
+    """Gets the Pool initializer for multiprocessing.
+
+    Arguments:
+        workers: Number of workers.
+
+    Returns:
+        Function, a Function to initialize the pool
+    """
+    def pool_fn(seqs):
+      return multiprocessing.Pool(workers,
+                                  initializer=init_pool_generator,
+                                  initargs=(seqs, self.random_seed))
+    return pool_fn
+
   def _wait_queue(self):
     """Wait for the queue to be empty."""
     while True:
@@ -615,30 +650,34 @@ class OrderedEnqueuer(SequenceEnqueuer):
         self.queue.task_done()
         if inputs is not None:
           yield inputs
-    except Exception as e:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
       self.stop()
-      six.raise_from(StopIteration(e), e)
+      six.reraise(*sys.exc_info())
 
-  def _send_sequence(self):
-    """Send current Sequence to all workers."""
-    # For new processes that may spawn
-    _SHARED_SEQUENCES[self.uid] = self.sequence
 
-  def stop(self, timeout=None):
-    """Stops running threads and wait for them to exit, if necessary.
+def init_pool_generator(gens, random_seed=None):
+  global _SHARED_SEQUENCES
+  _SHARED_SEQUENCES = gens
 
-    Should be called by the same thread which called `start()`.
+  if random_seed is not None:
+    ident = multiprocessing.current_process().ident
+    np.random.seed(random_seed + ident)
 
-    Arguments:
-        timeout: maximum time to wait on `thread.join()`
-    """
-    self.stop_signal.set()
-    with self.queue.mutex:
-      self.queue.queue.clear()
-      self.queue.unfinished_tasks = 0
-      self.queue.not_full.notify()
-    self.run_thread.join(timeout)
-    _SHARED_SEQUENCES[self.uid] = None
+
+def next_sample(uid):
+  """Gets the next value from the generator `uid`.
+
+  To allow multiple generators to be used at the same time, we use `uid` to
+  get a specific one. A single generator would cause the validation to
+  overwrite the training generator.
+
+  Arguments:
+      uid: int, generator identifier
+
+  Returns:
+      The next value of generator `uid`.
+  """
+  return six.next(_SHARED_SEQUENCES[uid])
 
 
 @tf_export('keras.utils.GeneratorEnqueuer')
@@ -658,145 +697,36 @@ class GeneratorEnqueuer(SequenceEnqueuer):
           will be incremented by one for each worker.
   """
 
-  def __init__(self,
-               generator,
+  def __init__(self, sequence,
                use_multiprocessing=False,
-               wait_time=0.05,
-               seed=None):
-    self.wait_time = wait_time
-    self._generator = generator
-    if os.name is 'nt' and use_multiprocessing is True:
-      # On Windows, avoid **SYSTEMATIC** error in `multiprocessing`:
-      # `TypeError: can't pickle generator objects`
-      # => Suggest multithreading instead of multiprocessing on Windows
-      raise ValueError('Using a generator with `use_multiprocessing=True`'
-                       ' is not supported on Windows (no marshalling of'
-                       ' generators across process boundaries). Instead,'
-                       ' use single thread/process or multithreading.')
-    else:
-      self._use_multiprocessing = use_multiprocessing
-    self._threads = []
-    self._stop_event = None
-    self._manager = None
-    self.queue = None
-    self.seed = seed
-
-  def _data_generator_task(self):
-    if self._use_multiprocessing is False:
-      while not self._stop_event.is_set():
-        with self.genlock:
-          try:
-            if (self.queue is not None and
-                self.queue.qsize() < self.max_queue_size):
-              # On all OSes, avoid **SYSTEMATIC** error
-              # in multithreading mode:
-              # `ValueError: generator already executing`
-              # => Serialize calls to
-              # infinite iterator/generator's next() function
-              generator_output = next(self._generator)
-              self.queue.put((True, generator_output))
-            else:
-              time.sleep(self.wait_time)
-          except StopIteration:
-            break
-          except Exception as e:  # pylint: disable=broad-except
-            # Can't pickle tracebacks.
-            # As a compromise, print the traceback and pickle None instead.
-            if not hasattr(e, '__traceback__'):
-              setattr(e, '__traceback__', sys.exc_info()[2])
-            self.queue.put((False, e))
-            self._stop_event.set()
-            break
-    else:
-      while not self._stop_event.is_set():
-        try:
-          if (self.queue is not None and
-              self.queue.qsize() < self.max_queue_size):
-            generator_output = next(self._generator)
-            self.queue.put((True, generator_output))
-          else:
-            time.sleep(self.wait_time)
-        except StopIteration:
-          break
-        except Exception as e:  # pylint: disable=broad-except
-          # Can't pickle tracebacks.
-          # As a compromise, print the traceback and pickle None instead.
-          traceback.print_exc()
-          setattr(e, '__traceback__', None)
-          self.queue.put((False, e))
-          self._stop_event.set()
-          break
+               random_seed=None):
+    super(GeneratorEnqueuer, self).__init__(sequence, use_multiprocessing)
+    self.random_seed = random_seed
 
-  def start(self, workers=1, max_queue_size=10):
-    """Kicks off threads which add data from the generator into the queue.
+  def _get_executor_init(self, workers):
+    """Gets the Pool initializer for multiprocessing.
 
     Arguments:
-        workers: number of worker threads
-        max_queue_size: queue size
-            (when full, threads could block on `put()`)
-    """
-    try:
-      self.max_queue_size = max_queue_size
-      if self._use_multiprocessing:
-        self._manager = multiprocessing.Manager()
-        self.queue = self._manager.Queue(maxsize=max_queue_size)
-        self._stop_event = multiprocessing.Event()
-      else:
-        # On all OSes, avoid **SYSTEMATIC** error in multithreading mode:
-        # `ValueError: generator already executing`
-        # => Serialize calls to infinite iterator/generator's next() function
-        self.genlock = threading.Lock()
-        self.queue = queue.Queue(maxsize=max_queue_size)
-        self._stop_event = threading.Event()
-
-      for _ in range(workers):
-        if self._use_multiprocessing:
-          # Reset random seed else all children processes
-          # share the same seed
-          np.random.seed(self.seed)
-          thread = multiprocessing.Process(target=self._data_generator_task)
-          thread.daemon = True
-          if self.seed is not None:
-            self.seed += 1
-        else:
-          thread = threading.Thread(target=self._data_generator_task)
-        self._threads.append(thread)
-        thread.start()
-    except:
-      self.stop()
-      raise
-
-  def is_running(self):
-    return self._stop_event is not None and not self._stop_event.is_set()
-
-  def stop(self, timeout=None):
-    """Stops running threads and wait for them to exit, if necessary.
-
-    Should be called by the same thread which called `start()`.
+      workers: Number of works.
 
-    Arguments:
-        timeout: maximum time to wait on `thread.join()`.
+    Returns:
+        A Function to initialize the pool
     """
-    if self.is_running():
-      self._stop_event.set()
-
-    for thread in self._threads:
-      if self._use_multiprocessing:
-        if thread.is_alive():
-          thread.terminate()
-      else:
-        # The thread.is_alive() test is subject to a race condition:
-        # the thread could terminate right after the test and before the
-        # join, rendering this test meaningless -> Call thread.join()
-        # always, which is ok no matter what the status of the thread.
-        thread.join(timeout)
-
-    if self._manager:
-      self._manager.shutdown()
+    def pool_fn(seqs):
+      return multiprocessing.Pool(workers,
+                                  initializer=init_pool_generator,
+                                  initargs=(seqs, self.random_seed))
+    return pool_fn
 
-    self._threads = []
-    self._stop_event = None
-    self.queue = None
+  def _run(self):
+    """Submits request to the executor and queue the `Future` objects."""
+    self._send_sequence()  # Share the initial generator
+    with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
+      while True:
+        if self.stop_signal.is_set():
+          return
+        self.queue.put(
+            executor.apply_async(next_sample, (self.uid,)), block=True)
 
   def get(self):
     """Creates a generator to extract data from the queue.
@@ -808,24 +738,30 @@ class GeneratorEnqueuer(SequenceEnqueuer):
         `(inputs, targets)` or
         `(inputs, targets, sample_weights)`.
     """
-    while self.is_running():
-      if not self.queue.empty():
-        success, value = self.queue.get()
-        # Rethrow any exceptions found in the queue
-        if not success:
-          six.reraise(value.__class__, value, value.__traceback__)
-        # Yield regular values
-        if value is not None:
-          yield value
-      else:
-        all_finished = all([not thread.is_alive() for thread in self._threads])
-        if all_finished and self.queue.empty():
-          raise StopIteration()
-        else:
-          time.sleep(self.wait_time)
-
-    # Make sure to rethrow the first exception in the queue, if any
-    while not self.queue.empty():
-      success, value = self.queue.get()
-      if not success:
-        six.reraise(value.__class__, value, value.__traceback__)
+    try:
+      while self.is_running():
+        inputs = self.queue.get(block=True).get()
+        self.queue.task_done()
+        if inputs is not None:
+          yield inputs
+    except StopIteration:
+      # Special case for finite generators
+      last_ones = []
+      while self.queue.qsize() > 0:
+        last_ones.append(self.queue.get(block=True))
+      # Wait for them to complete
+      for f in last_ones:
+        f.wait()
+      # Keep the good ones
+      last_ones = [future.get() for future in last_ones if future.successful()]
+      for inputs in last_ones:
+        if inputs is not None:
+          yield inputs
+    except Exception as e:  # pylint: disable=broad-except
+      self.stop()
+      if 'generator already executing' in str(e):
+        raise RuntimeError(
+            'Your generator is NOT thread-safe. '
+            'Keras requires a thread-safe generator when '
+            '`use_multiprocessing=False, workers > 1`. ')
+      six.reraise(*sys.exc_info())
diff --git a/tensorflow/python/keras/utils/data_utils_test.py b/tensorflow/python/keras/utils/data_utils_test.py
index 395df7e0e7..cc95803d6d 100644
--- a/tensorflow/python/keras/utils/data_utils_test.py
+++ b/tensorflow/python/keras/utils/data_utils_test.py
@@ -228,7 +228,7 @@ class TestEnqueuers(test.TestCase):
         FaultSequence(), use_multiprocessing=False)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   def test_ordered_enqueuer_fail_processes(self):
@@ -236,7 +236,7 @@ class TestEnqueuers(test.TestCase):
         FaultSequence(), use_multiprocessing=True)
     enqueuer.start(3, 10)
     gen_output = enqueuer.get()
-    with self.assertRaises(StopIteration):
+    with self.assertRaises(IndexError):
       next(gen_output)
 
   def test_on_epoch_end_processes(self):
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt
index 939fd547d0..6f5ad2dc96 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-generator-enqueuer.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], "
+    argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\', \'random_seed\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
   member_method {
     name: "get"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
index a9e499d100..aa36d66f92 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
@@ -4,6 +4,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
+    argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
     name: "get"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt
index 939fd547d0..6f5ad2dc96 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-generator-enqueuer.pbtxt
@@ -5,7 +5,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'generator\', \'use_multiprocessing\', \'wait_time\', \'seed\'], varargs=None, keywords=None, defaults=[\'False\', \'0.05\', \'None\'], "
+    argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\', \'random_seed\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
   member_method {
     name: "get"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
index a9e499d100..aa36d66f92 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
@@ -4,6 +4,7 @@ tf_class {
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
+    argspec: "args=[\'self\', \'sequence\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
     name: "get"
diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh
index a98c15d961..503e602198 100755
--- a/tensorflow/tools/ci_build/ci_sanity.sh
+++ b/tensorflow/tools/ci_build/ci_sanity.sh
@@ -104,7 +104,8 @@ do_pylint() {
 "^tensorflow/python/keras/callbacks\.py.*\[E1133.*not-an-iterable "\
 "^tensorflow/python/keras/engine/base_layer.py.*\[E0203.*access-member-before-definition "\
 "^tensorflow/python/keras/layers/recurrent\.py.*\[E0203.*access-member-before-definition "\
-"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned"
+"^tensorflow/python/kernel_tests/constant_op_eager_test.py.*\[E0303.*invalid-length-returned "\
+"^tensorflow/python/keras/utils/data_utils.py.*\[E1102.*not-callable"
 
   echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\""
 
-- 
GitLab


From 579155d915bd1fe2cfcff9927ca9af996aca1b72 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Wed, 10 Oct 2018 21:29:36 -0700
Subject: [PATCH 0764/1085] Solve the size_t issue in computing pattern index

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 6ee0bcf6f5..23587a2d00 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -103,8 +103,9 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         FileSystem* fs;
 
         TF_RETURN_IF_ERROR(ctx->env()->GetFileSystemForFile(
-            dataset()
-                ->patterns_[std::max(size_t(0), current_pattern_index_ - 1)],
+            dataset()->patterns_[(current_pattern_index_ > 0)
+                                     ? current_pattern_index_ - 1
+                                     : 0],
             &fs));
 
         while (!filepath_queue_.empty() ||
-- 
GitLab


From 2e75c6321fbcc1225c28de1dc1fc852bc2085881 Mon Sep 17 00:00:00 2001
From: Grzegorz Pawelczak <grzegorzp@graphcore.ai>
Date: Thu, 11 Oct 2018 08:40:21 +0100
Subject: [PATCH 0765/1085] Query on Windows as well

---
 configure.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.py b/configure.py
index 08e3c17b18..bac689aa08 100644
--- a/configure.py
+++ b/configure.py
@@ -1560,7 +1560,6 @@ def main():
     # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on
     # Windows.
     environ_cp['TF_DOWNLOAD_CLANG'] = '0'
-    environ_cp['TF_ENABLE_XLA'] = '0'
     environ_cp['TF_NEED_MPI'] = '0'
     environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
 
-- 
GitLab


From 100f9dee3115d892be1a39ae023bd80e24bf70eb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 02:02:24 -0700
Subject: [PATCH 0766/1085] compat: Update forward compatibility horizon to
 2018-10-11

PiperOrigin-RevId: 216663386
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index b7a1fce586..292b9a8480 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 10)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 11)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 6280f4167056e77f6fa4ac986cf15ac50d75991a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 05:53:19 -0700
Subject: [PATCH 0767/1085] Only file changed was the tensorcore outfeed
 manager test, which was passing wrong shape. Wonder how that could have ever
 let this CL to go in in the first place.

Automated rollback of commit 905deeaadd41d529461d8a6666e9cf46f0097a8f

PiperOrigin-RevId: 216683052
---
 .../xla/service/compile_only_service.cc       |   2 +
 .../compiler/xrt/kernels/xrt_compile_ops.cc   |  19 +++-
 .../compiler/xrt/kernels/xrt_execute_op.cc    |   8 --
 .../compiler/xrt/ops/xrt_compile_ops.cc       |   7 +-
 tensorflow/compiler/xrt/tests/BUILD           |  13 ++-
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 106 +++++++++++++++++-
 6 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index 96bd2616f5..bd5045b9b9 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -89,6 +89,8 @@ CompileOnlyService::CompileAheadOfTime(
     const auto& program_shape = instance.computation.program_shape();
     ExecutionOptions execution_options;
     *execution_options.mutable_debug_options() = debug_options;
+    *execution_options.mutable_shape_with_output_layout() =
+        *instance.result_layout;
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<HloModuleConfig> module_config,
         CreateModuleConfig(program_shape, instance.argument_layouts,
diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
index 1d4f8d97f2..1ab836a496 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
@@ -166,10 +166,21 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) {
                  VLOG(1) << "Compiling XLA executable";
                  return Compile(ctx, computation_proto, program);
                }));
-
-  Tensor output(DT_INT64, TensorShape({}));
-  output.scalar<int64>()() = uid;
-  ctx->set_output(0, output);
+  std::unique_ptr<XRTCompilationCacheEntryRef> entry;
+  OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry));
+
+  Tensor handle_output(DT_INT64, TensorShape({}));
+  handle_output.scalar<int64>()() = uid;
+  ctx->set_output(0, handle_output);
+
+  xla::LocalExecutable* executable = entry->get().get_executable();
+  xla::ProgramShape program_shape = executable->executable()
+                                        ->module()
+                                        .entry_computation()
+                                        ->ComputeProgramShape();
+  Tensor program_shape_output(DT_STRING, TensorShape({1}));
+  program_shape_output.vec<string>()(0) = program_shape.SerializeAsString();
+  ctx->set_output(1, program_shape_output);
 }
 
 XRTCompileOp::~XRTCompileOp() = default;
diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
index 257b054f16..3a1e03280a 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
@@ -64,14 +64,6 @@ uint32 GetXLARandomSeed() {
   return counter.fetch_add(2);
 }
 
-// Looks up the input `key` in the compilation cache.
-Status GetComputationCacheEntry(
-    XRTCompilationCache* cache, int64 key,
-    std::unique_ptr<XRTCompilationCacheEntryRef>* entry) {
-  TF_RETURN_IF_ERROR(cache->Lookup(key, entry));
-  return Status::OK();
-}
-
 // Populates `inputs` with the input tensors to the computation.
 Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm,
                             bool release_inputs,
diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
index 5cfc8711f9..7b3b50c695 100644
--- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc
@@ -23,7 +23,12 @@ namespace tensorflow {
 REGISTER_OP("XRTCompile")
     .Input("computation: string")
     .Output("handle: int64")
-    .SetShapeFn(tensorflow::shape_inference::ScalarShape)
+    .Output("program_shape: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      c->set_output(1, c->UnknownShapeOfRank(1));
+      return Status::OK();
+    })
     .Doc(
         R"(
 Reads a computation proto, compiles it, and places it in the global compilation
diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD
index b6dcfc4eb9..be44a3474a 100644
--- a/tensorflow/compiler/xrt/tests/BUILD
+++ b/tensorflow/compiler/xrt/tests/BUILD
@@ -29,8 +29,11 @@ cc_library(
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xrt:xrt_proto",
         "//tensorflow/compiler/xrt:xrt_server",
         "//tensorflow/compiler/xrt/cc:xrt_ops",
@@ -49,7 +52,10 @@ tf_cc_test(
     name = "raw_api_test_cpu",
     size = "medium",
     srcs = [],
-    args = ["--xla_test_device=XLA_CPU"],
+    args = [
+        "--xla_test_device=XLA_CPU",
+        "--xla_platform=CPU",
+    ],
     deps = [
         ":raw_api_test_lib",
         "//tensorflow/compiler/jit:xla_cpu_device",
@@ -60,7 +66,10 @@ tf_cuda_cc_test(
     name = "raw_api_test_gpu",
     size = "medium",
     srcs = [],
-    args = ["--xla_test_device=XLA_GPU"],
+    args = [
+        "--xla_test_device=XLA_GPU",
+        "--xla_platform=GPU",
+    ],
     tags = tf_cuda_tests_tags(),
     deps = [
         ":raw_api_test_lib",
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 9fc01e6304..ee6734020d 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -22,10 +22,13 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h"
@@ -43,6 +46,7 @@ namespace tensorflow {
 namespace {
 
 string* xla_test_device_ptr;  // initial value set in main()
+string* xla_platform_ptr;     // initial value set in main()
 
 string DeviceFromFlag() {
   string xla_test_device = *xla_test_device_ptr;
@@ -145,6 +149,28 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation,
   *dst = *snapshot;
 }
 
+xla::ProgramShape XlaCompiledProgramShape(
+    const xla::XlaComputation& computation,
+    const xla::ProgramShape& input_program_shape) {
+  se::Platform* platform =
+      xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie();
+  xla::LocalClient* client =
+      xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie();
+  xla::ExecutableBuildOptions exec_options;
+  exec_options.set_result_layout(input_program_shape.result());
+  std::vector<const xla::Shape*> parameters_shapes;
+  for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) {
+    parameters_shapes.push_back(&input_program_shape.parameters(i));
+  }
+  auto local_executable =
+      client->Compile(computation, parameters_shapes, exec_options)
+          .ValueOrDie();
+  return local_executable->executable()
+      ->module()
+      .entry_computation()
+      ->ComputeProgramShape();
+}
+
 TEST(RawApiTest, ReadAndWriteState) {
   xrt::XLAAllocation alloc;
   alloc.set_device_ordinal(0);
@@ -338,20 +364,87 @@ TEST(RawApiTest, CompileAndExecute) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({read_back}, &outputs));
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
 
   xla::LiteralProto response;
   EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
 
   auto expected = xla::LiteralUtil::CreateR1<float>({27.0f, 21.0f});
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+
+  xla::ProgramShape program_shape;
+  EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec<string>()(0)));
+  EXPECT_EQ(program_shape.parameters_size(), 2);
+}
+
+TEST(RawApiTest, CompileWithXlaReturnShapes) {
+  xla::XlaBuilder builder("XrtXlaShapes");
+  auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128});
+  auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5});
+  // Clear layouts to signal XLA we are ready to get whatever are coming out of
+  // the compilation process.
+  xla::LayoutUtil::ClearLayout(&input_shape);
+  xla::LayoutUtil::ClearLayout(&kernel_shape);
+  auto param_shape =
+      xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape});
+  auto param = xla::Parameter(&builder, 0, param_shape, "param");
+  auto input = xla::GetTupleElement(param, 0);
+  auto kernel = xla::GetTupleElement(param, 1);
+  xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame);
+  TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build());
+
+  auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result();
+  // Clear the result shape layout to tell XLA we are accepting whatever are
+  // coming out of the compilation process.
+  xla::LayoutUtil::ClearLayout(&result_shape);
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() = param_shape;
+  *shapes->mutable_result() = result_shape;
+  StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot());
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(),
+                           {c_handle.program_shape}, {release}, &outputs));
+
+  xla::ProgramShape program_shape;
+  EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec<string>()(0)));
+  EXPECT_EQ(program_shape.parameters_size(), 1);
+
+  VLOG(2) << "Param: "
+          << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0));
+  VLOG(2) << "Result: "
+          << xla::ShapeUtil::HumanStringWithLayout(program_shape.result());
+
+  xla::ProgramShape xla_program_shape =
+      XlaCompiledProgramShape(xla_computation, *shapes);
+  EXPECT_TRUE(xla::LayoutUtil::Equal(
+      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(),
+      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0})
+          .layout()));
+  EXPECT_TRUE(xla::LayoutUtil::Equal(
+      xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(),
+      xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1})
+          .layout()));
+  EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(),
+                                     xla_program_shape.result().layout()));
 }
 
 TEST(RawApiTest, CompileAndExecuteZeroArg) {
@@ -371,7 +464,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) {
   auto computation =
       ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
   auto c_handle = ops::XRTCompile(root, computation);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 std::initializer_list<Input>({}));
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -420,7 +513,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   auto p1_value =
       ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
   auto p1_handle = ops::XRTAllocate(root, p1_value);
-  auto result = ops::XRTExecute(root, c_handle, e_config,
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
                                 {Output(p0_handle), Output(p1_handle)});
   auto read_back = ops::XRTReadLiteralAndRelease(root, result);
   TF_ASSERT_OK(root.status());
@@ -455,7 +548,7 @@ TEST(RawApiTest, LeakCompilationReference) {
 
   ClientSession session(root);
   std::vector<Tensor> outputs;
-  TF_EXPECT_OK(session.Run({c_handle}, &outputs));
+  TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs));
 }
 
 }  // namespace
@@ -464,9 +557,12 @@ TEST(RawApiTest, LeakCompilationReference) {
 
 int main(int argc, char** argv) {
   tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU");
+  tensorflow::xla_platform_ptr = new tensorflow::string("CPU");
   std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr,
                        "Tensorflow device type to use for test, e.g., XLA_CPU"),
+      tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr,
+                       "The XLA platform to select for the device"),
   };
   tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
   const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-- 
GitLab


From c77588f44043fca23328d9d680fca29a50a9df48 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 06:12:51 -0700
Subject: [PATCH 0768/1085] Don't use xla::Add on PRED types in tf-xla bridge
 for TileOp

We are working on disallowing xla::Add on PRED types because they can be
confusing. This is a prerequiste for that to not fail over in the new
shape check.

PiperOrigin-RevId: 216685015
---
 tensorflow/compiler/tf2xla/kernels/tile_ops.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
index 93d5996b5e..52f2b36e19 100644
--- a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc
@@ -96,7 +96,11 @@ class TileOp : public XlaOpKernel {
       // operation broadcast semantics.
       auto broadcasted_zero = xla::Broadcast(
           XlaHelpers::Zero(ctx->builder(), ctx->input_type(0)), output_shape);
-      ctx->SetOutput(0, xla::Add(broadcasted_zero, input));
+      if (ctx->input_type(0) == DT_BOOL) {
+        ctx->SetOutput(0, xla::Or(broadcasted_zero, input));
+      } else {
+        ctx->SetOutput(0, xla::Add(broadcasted_zero, input));
+      }
       return;
     }
 
-- 
GitLab


From fe18d063a9e277583329ee017485ccb2196d2ce9 Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Thu, 11 Oct 2018 06:25:35 -0700
Subject: [PATCH 0769/1085] Updated model and test data for microcontroller
 speech example

PiperOrigin-RevId: 216686136
---
 .../micro/examples/micro_speech/BUILD         |    4 +
 .../micro/examples/micro_speech/README.md     |  103 +
 .../micro_speech/micro_speech_test.cc         |   82 +
 .../examples/micro_speech/no_features_data.cc |  152 +
 .../examples/micro_speech/no_features_data.h  |   23 +
 .../micro_speech/tiny_conv_model_data.cc      | 3235 +++++++++--------
 .../micro_speech/yes_features_data.cc         |  158 +
 .../examples/micro_speech/yes_features_data.h |   23 +
 .../experimental/micro/testing/micro_test.h   |   18 +
 .../experimental/micro/tools/make/Makefile    |    4 +-
 10 files changed, 2184 insertions(+), 1618 deletions(-)
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
 create mode 100644 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h

diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
index dad58b6c1c..626f733540 100644
--- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD
@@ -14,8 +14,12 @@ tflite_micro_cc_test(
     name = "micro_speech_test",
     srcs = [
         "micro_speech_test.cc",
+        "no_features_data.cc",
+        "no_features_data.h",
         "tiny_conv_model_data.cc",
         "tiny_conv_model_data.h",
+        "yes_features_data.cc",
+        "yes_features_data.h",
     ],
     tags = [
         "nomsan",
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md
new file mode 100644
index 0000000000..438a432356
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md
@@ -0,0 +1,103 @@
+# Micro Speech Example
+
+This examples shows how you can use TensorFlow Lite to run a 20 kilobyte neural network model to recognize keywords in speech. It's designed to run on systems with very small amounts of memory such as microcontrollers and DSPs. The code itself also has a small footprint (for example around 22 kilobytes on a Cortex M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to run on systems like an STM32F103 with only 20 kilobytes of total SRAM and 64 kilobytes of Flash.
+
+## Table of Contents
+
+    * [Getting Started](#getting-started)
+    * [Getting Started on a Microcontroller](#getting-started-on-a-microcontroller)
+    * [Calculating the Input to the Neural Network](#calculating-the-input-to-the-neural-network)
+    * [Creating Your Own Model](#creating-your-own-model)
+
+## Getting Started
+
+To compile and test this example on a desktop Linux or MacOS machine, download [the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd` into the source directory from a terminal, and then retrieve the support libraries you need by running:
+
+```
+tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh
+```
+
+This will take a few minutes, and downloads frameworks the code uses like [CMSIS](https://developer.arm.com/embedded/cmsis) and [flatbuffers](https://google.github.io/flatbuffers/). Once that process has finished, run:
+
+```
+make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test_micro_speech
+```
+
+You should see a series of files get compiled, followed by some logging output from a test, which should conclude with "~~~ALL TESTS PASSED~~~". If you see this, it means that a small program has been built and run that loads a trained TensorFlow model, runs some example inputs through it, and got the expected outputs. This particular test runs spectrograms generated from recordings of people saying "Yes" and "No", and checks that the network correctly identifies them.
+
+To understand how TensorFlow Lite does this, you can look at the `TestInvoke()` function in [micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc). It's a fairly small amount of code, creating an interpreter, getting a handle to a model that's been compiled into the program, and then invoking the interpreter with the model and sample inputs.
+
+## Getting Started on a Microcontroller
+
+Once you have downloaded the dependencies and got the x86/Linux build working, you can try building a version for the STM32F103 'bluepill' device. The following command will build the test and then run it on an emulator, assuming you have Docker installed:
+
+```
+make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test_micro_speech
+```
+
+If you have a real device [(see here for how to set one up)](https://github.com/google/stm32_bare_lib/tree/master/README.md) you can then convert the ELF file into a  a `.bin` format executable to load onto it by running:
+
+```
+arm-none-eabi-objcopy \
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test \
+tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test.bin \
+--output binary
+```
+
+## Calculating the Input to the Neural Network
+
+The TensorFlow Lite model doesn't take in raw audio sample data. Instead it works with spectrograms, which are two dimensional arrays that are made up of slices of frequency information, each taken from a different time window. This test uses spectrograms that have been pre-calculated from one-second WAV files in the test data set. In a complete application these spectrograms would be calculated at runtime from microphone inputs, but the code for doing that is not yet included in this sample code.
+
+The recipe for creating the spectrogram data is that each frequency slice is created by running an FFT across a 30ms section of the audio sample data. The input samples are treated as being between -1 and +1 as real values (encoded as -32,768 and 32,767 in 16-bit signed integer samples). This results in an FFT with 256 entries. Every sequence of six entries is averaged together, giving a total of 43 frequency buckets in the final slice. The results are stored as unsigned eight-bit values, where 0 represents a real number of zero, and 255 represents 127.5 as a real number. Each adjacent frequency entry is stored in ascending memory order (frequency bucket 0 at data[0], bucket 1 at data [1], etc). The window for the frequency analysis is then moved forward by 20ms, and the process repeated, storing the results in the next memory row (for example bucket 0 in this moved window would be in data[43 + 0], etc). This process happens 49 times in total, producing a single channel image that is 43 pixels wide, and 49 rows high. Here's an illustration of the process:
+
+![spectrogram diagram](https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram_diagram.png)
+
+
+The test data files have been generated by running the following commands:
+
+```
+bazel run tensorflow/examples/speech_commands:wav_to_features -- \
+--input_wav=${HOME}/speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav \
+--output_c_file=yes_features_data.cc \
+--window_stride=20 --preprocess=average --quantize=1
+
+bazel run tensorflow/examples/speech_commands:wav_to_features -- \
+--input_wav=${HOME}/speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav \
+--output_c_file=no_features_data.cc \
+--window_stride=20 --preprocess=average --quantize=1
+```
+
+## Creating Your Own Model
+
+The neural network model used in this example was built using the [TensorFlow speech commands tutorial](https://www.tensorflow.org/tutorials/sequences/audio_recognition). If you would like to create your own, you can start by training a model with this command:
+
+```
+bazel run -c opt --copt=-mavx2 --copt=-mfma \
+tensorflow/examples/speech_commands:train -- \
+--model_architecture=tiny_conv --window_stride=20 --preprocess=average \
+--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1
+```
+
+If you see a compiling error on older machines, try leaving out the `--copt` arguments, they are just there to accelerate training on chips that support the extensions. The training process is likely to take a couple of hours. Once it has completed, the next step is to freeze the variables:
+
+```
+bazel run tensorflow/examples/speech_commands:freeze -- \
+--model_architecture=tiny_conv --window_stride=20 --preprocess=average \
+--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb
+```
+
+The next step is to create a TensorFlow Lite file from the frozen graph:
+
+```
+bazel run tensorflow/contrib/lite/toco:toco -- \
+--input_file=/tmp/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \
+--input_shapes=1,49,43,1 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \
+--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_values=2 \
+--change_concat_input_ranges=false
+```
+
+Finally, convert the file into a C source file that can be compiled into an embedded system:
+
+```
+xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_model_data.cc
+```
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
index 86cd056a72..0f4731fd4b 100644
--- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h"
 #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h"
 #include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h"
@@ -24,9 +26,12 @@ limitations under the License.
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(TestInvoke) {
+  // Set up logging.
   tflite::MicroErrorReporter micro_error_reporter;
   tflite::ErrorReporter* error_reporter = &micro_error_reporter;
 
+  // Map the model into a usable data structure. This doesn't involve any
+  // copying or parsing, it's a very lightweight operation.
   const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     error_reporter->Report(
@@ -34,21 +39,98 @@ TF_LITE_MICRO_TEST(TestInvoke) {
         "to supported version %d.\n",
         model->version(), TFLITE_SCHEMA_VERSION);
   }
+
+  // This pulls in all the operation implementations we need.
   tflite::ops::micro::AllOpsResolver resolver;
 
+  // Create an area of memory to use for input, output, and intermediate arrays.
   const int tensor_arena_size = 10 * 1024;
   uint8_t tensor_arena[tensor_arena_size];
   tflite::SimpleTensorAllocator tensor_allocator(tensor_arena,
                                                  tensor_arena_size);
 
+  // Build an interpreter to run the model with.
   tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator,
                                        error_reporter);
+
+  // Get information about the memory area to use for the model's input.
+  TfLiteTensor* input = interpreter.input(0);
+
+  // Make sure the input has the properties we expect.
+  TF_LITE_MICRO_EXPECT_NE(nullptr, input);
+  TF_LITE_MICRO_EXPECT_EQ(4, input->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(49, input->dims->data[1]);
+  TF_LITE_MICRO_EXPECT_EQ(43, input->dims->data[2]);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, input->type);
+
+  // Copy a spectrogram created from a .wav audio file of someone saying "Yes",
+  // into the memory area used for the input.
+  const uint8_t* yes_features_data = g_yes_f2e59fea_nohash_1_data;
+  for (int i = 0; i < input->bytes; ++i) {
+    input->data.uint8[i] = yes_features_data[i];
+  }
+
+  // Run the model on this input and make sure it succeeds.
   TfLiteStatus invoke_status = interpreter.Invoke();
   if (invoke_status != kTfLiteOk) {
     error_reporter->Report("Invoke failed\n");
   }
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status);
 
+  // Get the output from the model, and make sure it's the expected size and
+  // type.
+  TfLiteTensor* output = interpreter.output(0);
+  TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type);
+
+  // There are four possible classes in the output, each with a score.
+  const int kSilenceIndex = 0;
+  const int kUnknownIndex = 1;
+  const int kYesIndex = 2;
+  const int kNoIndex = 3;
+
+  // Make sure that the expected "Yes" score is higher than the other classes.
+  uint8_t silence_score = output->data.uint8[kSilenceIndex];
+  uint8_t unknown_score = output->data.uint8[kUnknownIndex];
+  uint8_t yes_score = output->data.uint8[kYesIndex];
+  uint8_t no_score = output->data.uint8[kNoIndex];
+  TF_LITE_MICRO_EXPECT_GT(yes_score, silence_score);
+  TF_LITE_MICRO_EXPECT_GT(yes_score, unknown_score);
+  TF_LITE_MICRO_EXPECT_GT(yes_score, no_score);
+
+  // Now test with a different input, from a recording of "No".
+  const uint8_t* no_features_data = g_no_f9643d42_nohash_4_data;
+  for (int i = 0; i < input->bytes; ++i) {
+    input->data.uint8[i] = no_features_data[i];
+  }
+
+  // Run the model on this "No" input.
+  invoke_status = interpreter.Invoke();
+  if (invoke_status != kTfLiteOk) {
+    error_reporter->Report("Invoke failed\n");
+  }
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status);
+
+  // Get the output from the model, and make sure it's the expected size and
+  // type.
+  output = interpreter.output(0);
+  TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]);
+  TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type);
+
+  // Make sure that the expected "No" score is higher than the other classes.
+  silence_score = output->data.uint8[kSilenceIndex];
+  unknown_score = output->data.uint8[kUnknownIndex];
+  yes_score = output->data.uint8[kYesIndex];
+  no_score = output->data.uint8[kNoIndex];
+  TF_LITE_MICRO_EXPECT_GT(no_score, silence_score);
+  TF_LITE_MICRO_EXPECT_GT(no_score, unknown_score);
+  TF_LITE_MICRO_EXPECT_GT(no_score, yes_score);
+
   error_reporter->Report("Ran successfully\n");
 }
 
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc
new file mode 100644
index 0000000000..3615deb26c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc
@@ -0,0 +1,152 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize \
+ * --preprocess="average" \
+ * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \
+ * --output_c_file="no_features_data.cc" \
+ */
+
+const int g_no_f9643d42_nohash_4_width = 43;
+const int g_no_f9643d42_nohash_4_height = 49;
+const unsigned char g_no_f9643d42_nohash_4_data[] = {
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   5,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   67, 2,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   139, 2,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   195, 2,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   230, 2,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  255, 7,
+    6, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 255, 7,  16, 1,   1,   0,  2, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 255, 7,   22, 0,  1,   0,
+    1, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 238, 5,   20, 3, 4,   1,  1,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  144, 4,   19, 3, 5,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  42, 6,   3,
+    1, 3,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  3, 1,   5,  0,  1,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  5, 1,   3,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    1, 0,   1,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0, 0,   0,   0,  0, 0,   0,  0,  0,   0,   0,  0, 0,   0,   0,  0,  0,   0,
+    0,
+};
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h
new file mode 100644
index 0000000000..b53d0a202b
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h
@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_
+
+extern const int g_no_f9643d42_nohash_4_width;
+extern const int g_no_f9643d42_nohash_4_height;
+extern const unsigned char g_no_f9643d42_nohash_4_data[];
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
index f1f9e0e219..f0769a1237 100644
--- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // Automatically created from a TensorFlow Lite flatbuffer using the command:
 // xxd -i tiny_conv.tflite > tiny_conv_model_data.cc
+// See the README for a full description of the creation process.
 
 #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h"
 
@@ -26,1643 +27,1643 @@ const unsigned char g_tiny_conv_model_data[] = {
     0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
     0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74,
     0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00,
-    0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00,
-    0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0xb4, 0x47, 0x00, 0x00, 0xe4, 0x02, 0x00, 0x00, 0xb4, 0x02, 0x00, 0x00,
+    0xac, 0x02, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
     0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff,
-    0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff,
-    0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff,
-    0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-    0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83,
-    0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac,
-    0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6,
-    0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2,
-    0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9,
-    0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e,
-    0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57,
-    0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac,
-    0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a,
-    0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac,
-    0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1,
-    0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d,
-    0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b,
-    0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c,
-    0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87,
-    0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4,
-    0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8,
-    0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1,
-    0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c,
-    0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6,
-    0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2,
-    0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6,
-    0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf,
-    0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98,
-    0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45,
-    0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4,
-    0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91,
-    0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac,
-    0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9,
-    0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4,
-    0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42,
-    0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9,
-    0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99,
-    0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac,
-    0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b,
-    0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae,
-    0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3,
-    0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83,
-    0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb,
-    0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1,
-    0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e,
-    0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7,
-    0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4,
-    0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5,
-    0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4,
-    0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3,
-    0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9,
-    0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8,
-    0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b,
-    0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7,
-    0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6,
-    0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0,
-    0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9,
-    0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff,
-    0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-    0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50,
-    0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d,
-    0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e,
-    0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43,
-    0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c,
-    0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f,
-    0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49,
-    0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56,
-    0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45,
-    0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a,
-    0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f,
-    0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e,
-    0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45,
-    0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53,
-    0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44,
-    0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f,
-    0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c,
-    0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54,
-    0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b,
-    0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a,
-    0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50,
-    0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a,
-    0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48,
-    0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40,
-    0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44,
-    0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e,
-    0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51,
-    0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49,
-    0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a,
-    0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55,
-    0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48,
-    0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52,
-    0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b,
-    0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47,
-    0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54,
-    0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d,
-    0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d,
-    0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47,
-    0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c,
-    0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e,
-    0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51,
-    0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48,
-    0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f,
-    0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52,
-    0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43,
-    0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b,
-    0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48,
-    0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e,
-    0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c,
-    0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55,
-    0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42,
-    0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56,
-    0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d,
-    0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43,
-    0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e,
-    0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b,
-    0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46,
-    0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b,
-    0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b,
-    0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e,
-    0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d,
-    0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48,
-    0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c,
-    0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51,
-    0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e,
-    0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f,
-    0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e,
-    0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41,
-    0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47,
-    0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b,
-    0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42,
-    0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44,
-    0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43,
-    0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54,
-    0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47,
-    0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47,
-    0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46,
-    0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42,
-    0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41,
-    0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62,
-    0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50,
-    0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a,
-    0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45,
-    0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f,
-    0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45,
-    0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45,
-    0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c,
-    0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45,
-    0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e,
-    0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51,
-    0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46,
-    0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46,
-    0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d,
-    0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b,
-    0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47,
-    0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f,
-    0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d,
-    0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42,
-    0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d,
-    0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43,
-    0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41,
-    0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43,
-    0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c,
-    0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b,
-    0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d,
-    0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41,
-    0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53,
-    0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f,
-    0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41,
-    0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51,
-    0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40,
-    0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d,
-    0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a,
-    0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d,
-    0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f,
-    0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46,
-    0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49,
-    0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52,
-    0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50,
-    0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49,
-    0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52,
-    0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c,
-    0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50,
-    0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e,
-    0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d,
-    0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d,
-    0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c,
-    0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48,
-    0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44,
-    0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48,
-    0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d,
-    0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49,
-    0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a,
-    0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52,
-    0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40,
-    0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d,
-    0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e,
-    0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e,
-    0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48,
-    0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50,
-    0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43,
-    0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40,
-    0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d,
-    0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f,
-    0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e,
-    0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45,
-    0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47,
-    0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54,
-    0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d,
-    0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d,
-    0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52,
-    0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45,
-    0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c,
-    0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53,
-    0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e,
-    0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36,
-    0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49,
-    0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44,
-    0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c,
-    0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41,
-    0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47,
-    0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f,
-    0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48,
-    0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45,
-    0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47,
-    0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49,
-    0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d,
-    0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a,
-    0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48,
-    0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e,
-    0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43,
-    0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d,
-    0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d,
-    0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52,
-    0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c,
-    0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47,
-    0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51,
-    0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b,
-    0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41,
-    0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38,
-    0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42,
-    0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53,
-    0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48,
-    0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48,
-    0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a,
-    0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a,
-    0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e,
-    0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e,
-    0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b,
-    0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d,
-    0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e,
-    0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55,
-    0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37,
-    0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44,
-    0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44,
-    0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e,
-    0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a,
-    0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57,
-    0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e,
-    0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41,
-    0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49,
-    0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e,
-    0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44,
-    0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49,
-    0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d,
-    0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52,
-    0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42,
-    0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48,
-    0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a,
-    0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41,
-    0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48,
-    0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61,
-    0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f,
-    0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31,
-    0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49,
-    0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e,
-    0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c,
-    0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a,
-    0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47,
-    0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44,
-    0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45,
-    0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46,
-    0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a,
-    0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c,
-    0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43,
-    0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e,
-    0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f,
-    0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d,
-    0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b,
-    0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f,
-    0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49,
-    0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49,
-    0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51,
-    0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a,
-    0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51,
-    0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59,
-    0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41,
-    0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47,
-    0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45,
-    0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47,
-    0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48,
-    0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c,
-    0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a,
-    0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38,
-    0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50,
-    0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c,
-    0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50,
-    0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48,
-    0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42,
-    0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57,
-    0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44,
-    0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e,
-    0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42,
-    0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d,
-    0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45,
-    0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f,
-    0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49,
-    0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37,
-    0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48,
-    0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45,
-    0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52,
-    0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49,
-    0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48,
-    0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54,
-    0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49,
-    0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f,
-    0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e,
-    0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41,
-    0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42,
-    0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53,
-    0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43,
-    0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b,
-    0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49,
-    0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48,
-    0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43,
-    0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48,
-    0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42,
-    0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46,
-    0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45,
-    0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a,
-    0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44,
-    0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51,
-    0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47,
-    0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46,
-    0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50,
-    0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57,
-    0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e,
-    0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44,
-    0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42,
-    0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45,
-    0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42,
-    0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51,
-    0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48,
-    0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d,
-    0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49,
-    0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54,
-    0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41,
-    0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d,
-    0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51,
-    0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b,
-    0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45,
-    0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a,
-    0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49,
-    0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44,
-    0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a,
-    0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f,
-    0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c,
-    0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e,
-    0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48,
-    0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52,
-    0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40,
-    0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49,
-    0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50,
-    0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55,
-    0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40,
-    0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48,
-    0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47,
-    0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37,
-    0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53,
-    0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a,
-    0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a,
-    0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e,
-    0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d,
-    0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60,
-    0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40,
-    0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55,
-    0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52,
-    0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55,
-    0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46,
-    0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b,
-    0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47,
-    0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a,
-    0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42,
-    0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c,
-    0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51,
-    0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49,
-    0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47,
-    0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e,
-    0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49,
-    0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61,
-    0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46,
-    0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f,
-    0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49,
-    0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e,
-    0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a,
-    0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46,
-    0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49,
-    0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e,
-    0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c,
-    0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e,
-    0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a,
-    0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42,
-    0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c,
-    0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f,
-    0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e,
-    0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46,
-    0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d,
-    0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f,
-    0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a,
-    0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52,
-    0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42,
-    0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52,
-    0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c,
-    0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b,
-    0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41,
-    0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42,
-    0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51,
-    0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c,
-    0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c,
-    0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40,
-    0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47,
-    0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42,
-    0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b,
-    0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e,
-    0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f,
-    0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53,
-    0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e,
-    0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52,
-    0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e,
-    0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50,
-    0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42,
-    0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e,
-    0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48,
-    0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49,
-    0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48,
-    0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a,
-    0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43,
-    0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e,
-    0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44,
-    0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55,
-    0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47,
-    0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b,
-    0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48,
-    0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a,
-    0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49,
-    0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43,
-    0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f,
-    0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b,
-    0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50,
-    0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b,
-    0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e,
-    0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44,
-    0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48,
-    0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58,
-    0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b,
-    0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62,
-    0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56,
-    0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c,
-    0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53,
-    0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52,
-    0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46,
-    0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47,
-    0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48,
-    0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49,
-    0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52,
-    0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40,
-    0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b,
-    0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56,
-    0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46,
-    0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61,
-    0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47,
-    0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57,
-    0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49,
-    0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f,
-    0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f,
-    0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e,
-    0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b,
-    0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a,
-    0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c,
-    0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38,
-    0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51,
-    0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58,
-    0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53,
-    0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60,
-    0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49,
-    0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53,
-    0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44,
-    0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40,
-    0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47,
-    0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45,
-    0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74,
-    0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41,
-    0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57,
-    0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a,
-    0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c,
-    0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b,
-    0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55,
-    0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50,
-    0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54,
-    0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60,
-    0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f,
-    0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54,
-    0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46,
-    0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48,
-    0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f,
-    0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50,
-    0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f,
-    0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46,
-    0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55,
-    0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43,
-    0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49,
-    0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53,
-    0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45,
-    0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58,
-    0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45,
-    0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c,
-    0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d,
-    0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47,
-    0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a,
-    0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c,
-    0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47,
-    0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59,
-    0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c,
-    0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43,
-    0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e,
-    0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d,
-    0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52,
-    0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58,
-    0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46,
-    0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55,
-    0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f,
-    0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52,
-    0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45,
-    0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e,
-    0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70,
-    0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46,
-    0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50,
-    0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48,
-    0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45,
-    0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a,
-    0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b,
-    0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51,
-    0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49,
-    0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a,
-    0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e,
-    0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f,
-    0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50,
-    0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52,
-    0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48,
-    0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57,
-    0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48,
-    0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43,
-    0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52,
-    0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42,
-    0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47,
-    0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47,
-    0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a,
-    0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54,
-    0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45,
-    0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e,
-    0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43,
-    0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51,
-    0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44,
-    0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50,
-    0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c,
-    0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56,
-    0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c,
-    0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49,
-    0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b,
-    0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d,
-    0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46,
-    0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57,
-    0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f,
-    0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b,
-    0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42,
-    0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41,
-    0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e,
-    0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44,
-    0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64,
-    0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42,
-    0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c,
-    0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51,
-    0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45,
-    0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45,
-    0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48,
-    0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f,
-    0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49,
-    0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51,
-    0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51,
-    0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44,
-    0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49,
-    0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45,
-    0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46,
-    0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a,
-    0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48,
-    0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50,
-    0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50,
-    0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46,
-    0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f,
-    0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41,
-    0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d,
-    0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51,
-    0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49,
-    0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a,
-    0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45,
-    0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e,
-    0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51,
-    0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56,
-    0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49,
-    0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d,
-    0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57,
-    0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a,
-    0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d,
-    0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a,
-    0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c,
-    0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50,
-    0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50,
-    0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a,
-    0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b,
-    0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e,
-    0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47,
-    0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b,
-    0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e,
-    0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48,
-    0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53,
-    0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a,
-    0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52,
-    0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49,
-    0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43,
-    0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40,
-    0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f,
-    0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a,
-    0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47,
-    0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d,
-    0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52,
-    0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52,
-    0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51,
-    0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60,
-    0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47,
-    0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59,
-    0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d,
-    0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47,
-    0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d,
-    0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39,
-    0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46,
-    0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b,
-    0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c,
-    0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47,
-    0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44,
-    0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42,
-    0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54,
-    0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53,
-    0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56,
-    0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a,
-    0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a,
-    0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54,
-    0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48,
-    0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44,
-    0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45,
-    0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47,
-    0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b,
-    0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49,
-    0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51,
-    0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50,
-    0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51,
-    0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a,
-    0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a,
-    0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48,
-    0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57,
-    0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e,
-    0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48,
-    0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41,
-    0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51,
-    0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38,
-    0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42,
-    0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c,
-    0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a,
-    0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43,
-    0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48,
-    0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a,
-    0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47,
-    0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a,
-    0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44,
-    0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50,
-    0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56,
-    0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46,
-    0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b,
-    0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a,
-    0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51,
-    0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d,
-    0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41,
-    0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b,
-    0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e,
-    0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e,
-    0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52,
-    0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56,
-    0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a,
-    0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a,
-    0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e,
-    0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e,
-    0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49,
-    0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47,
-    0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54,
-    0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c,
-    0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42,
-    0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38,
-    0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50,
-    0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51,
-    0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e,
-    0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55,
-    0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e,
-    0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49,
-    0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44,
-    0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54,
-    0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f,
-    0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44,
-    0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47,
-    0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e,
-    0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d,
-    0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a,
-    0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48,
-    0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44,
-    0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43,
-    0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b,
-    0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d,
-    0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51,
-    0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49,
-    0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a,
-    0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f,
-    0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d,
-    0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49,
-    0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37,
-    0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a,
-    0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48,
-    0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42,
-    0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39,
-    0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45,
-    0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45,
-    0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e,
-    0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46,
-    0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45,
-    0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a,
-    0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58,
-    0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d,
-    0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b,
-    0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f,
-    0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f,
-    0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d,
-    0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50,
-    0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42,
-    0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a,
-    0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44,
-    0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52,
-    0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47,
-    0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42,
-    0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44,
-    0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45,
-    0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a,
-    0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d,
-    0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36,
-    0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50,
-    0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39,
-    0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b,
-    0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40,
-    0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42,
-    0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43,
-    0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b,
-    0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44,
-    0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e,
-    0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b,
-    0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43,
-    0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43,
-    0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f,
-    0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41,
-    0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42,
-    0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43,
-    0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f,
-    0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a,
-    0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41,
-    0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45,
-    0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a,
-    0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42,
-    0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46,
-    0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b,
-    0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b,
-    0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c,
-    0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49,
-    0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a,
-    0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46,
-    0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d,
-    0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51,
-    0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b,
-    0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45,
-    0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43,
-    0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53,
-    0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d,
-    0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b,
-    0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44,
-    0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a,
-    0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51,
-    0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c,
-    0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59,
-    0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46,
-    0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d,
-    0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a,
-    0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b,
-    0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54,
-    0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e,
-    0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42,
-    0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a,
-    0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d,
-    0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47,
-    0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42,
-    0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37,
-    0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66,
-    0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57,
-    0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d,
-    0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43,
-    0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50,
-    0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26,
-    0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48,
-    0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33,
-    0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b,
-    0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31,
-    0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43,
-    0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d,
-    0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d,
-    0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45,
-    0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46,
-    0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62,
-    0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42,
-    0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46,
-    0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47,
-    0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a,
-    0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44,
-    0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b,
-    0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52,
-    0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36,
-    0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56,
-    0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38,
-    0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45,
-    0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c,
-    0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e,
-    0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51,
-    0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58,
-    0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e,
-    0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b,
-    0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f,
-    0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f,
-    0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34,
-    0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e,
-    0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35,
-    0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48,
-    0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32,
-    0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44,
-    0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b,
-    0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49,
-    0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e,
-    0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63,
-    0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59,
-    0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a,
-    0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49,
-    0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c,
-    0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46,
-    0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a,
-    0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43,
-    0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50,
-    0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40,
-    0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a,
-    0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d,
-    0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43,
-    0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a,
-    0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c,
-    0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63,
-    0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41,
-    0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46,
-    0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46,
-    0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42,
-    0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49,
-    0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47,
-    0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48,
-    0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45,
-    0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47,
-    0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d,
-    0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b,
-    0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b,
-    0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40,
-    0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52,
-    0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52,
-    0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d,
-    0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c,
-    0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43,
-    0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49,
-    0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b,
-    0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c,
-    0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50,
-    0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45,
-    0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f,
-    0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42,
-    0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43,
-    0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46,
-    0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30,
-    0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62,
-    0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56,
-    0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c,
-    0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54,
-    0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48,
-    0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59,
-    0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46,
-    0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57,
-    0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47,
-    0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46,
-    0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50,
-    0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42,
-    0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d,
-    0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b,
-    0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54,
-    0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62,
-    0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51,
-    0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40,
-    0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47,
-    0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e,
-    0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46,
-    0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64,
-    0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f,
-    0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c,
-    0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a,
-    0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47,
-    0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49,
-    0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d,
-    0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43,
-    0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55,
-    0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51,
-    0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64,
-    0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46,
-    0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44,
-    0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50,
-    0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56,
-    0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e,
-    0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c,
-    0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52,
-    0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b,
-    0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c,
-    0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f,
-    0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c,
-    0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a,
-    0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a,
-    0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e,
-    0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43,
-    0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55,
-    0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48,
-    0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d,
-    0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41,
-    0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f,
-    0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46,
-    0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f,
-    0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47,
-    0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36,
-    0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40,
-    0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f,
-    0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b,
-    0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67,
-    0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44,
-    0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c,
-    0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42,
-    0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c,
-    0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43,
-    0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69,
-    0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48,
-    0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60,
-    0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d,
-    0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39,
-    0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f,
-    0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46,
-    0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47,
-    0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50,
-    0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58,
-    0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58,
-    0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b,
-    0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b,
-    0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d,
-    0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d,
-    0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47,
-    0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62,
-    0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c,
-    0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51,
-    0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f,
-    0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47,
-    0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a,
-    0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30,
-    0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f,
-    0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61,
-    0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d,
-    0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50,
-    0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e,
-    0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64,
-    0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b,
-    0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67,
-    0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43,
-    0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53,
-    0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48,
-    0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34,
-    0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47,
-    0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59,
-    0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47,
-    0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d,
-    0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44,
-    0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40,
-    0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b,
-    0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52,
-    0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d,
-    0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74,
-    0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e,
-    0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56,
-    0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40,
-    0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e,
-    0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42,
-    0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47,
-    0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c,
-    0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52,
-    0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51,
-    0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57,
-    0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44,
-    0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51,
-    0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d,
-    0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66,
-    0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f,
-    0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66,
-    0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54,
-    0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55,
-    0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52,
-    0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a,
-    0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e,
-    0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d,
-    0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60,
-    0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e,
-    0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49,
-    0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42,
-    0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40,
-    0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e,
-    0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50,
-    0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72,
-    0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46,
-    0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59,
-    0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46,
-    0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32,
-    0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c,
-    0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59,
-    0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e,
-    0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54,
-    0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42,
-    0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f,
-    0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f,
-    0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e,
-    0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48,
-    0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71,
-    0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b,
-    0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65,
-    0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55,
-    0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e,
-    0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44,
-    0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d,
-    0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f,
-    0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c,
-    0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a,
-    0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51,
-    0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b,
-    0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52,
-    0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c,
-    0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e,
-    0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44,
-    0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69,
-    0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c,
-    0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53,
-    0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46,
-    0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48,
-    0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47,
-    0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23,
-    0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f,
-    0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49,
-    0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43,
-    0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46,
-    0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41,
-    0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60,
-    0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55,
-    0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71,
-    0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d,
-    0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58,
-    0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49,
-    0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45,
-    0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43,
-    0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b,
-    0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e,
-    0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c,
-    0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56,
-    0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b,
-    0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45,
-    0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f,
-    0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55,
-    0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75,
-    0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52,
-    0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63,
-    0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40,
-    0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39,
-    0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e,
-    0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42,
-    0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c,
-    0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41,
-    0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a,
-    0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a,
-    0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45,
-    0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e,
-    0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46,
-    0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66,
-    0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c,
-    0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61,
-    0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47,
-    0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42,
-    0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e,
-    0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f,
-    0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45,
-    0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b,
-    0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51,
-    0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40,
-    0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d,
-    0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e,
-    0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48,
-    0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a,
-    0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48,
-    0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b,
-    0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e,
-    0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53,
-    0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48,
-    0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47,
-    0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a,
-    0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41,
-    0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49,
-    0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40,
-    0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51,
-    0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c,
-    0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45,
-    0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a,
-    0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54,
-    0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d,
-    0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53,
-    0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51,
-    0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47,
-    0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39,
-    0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46,
-    0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48,
-    0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34,
-    0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40,
-    0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48,
-    0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46,
-    0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52,
-    0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44,
-    0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52,
-    0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f,
-    0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e,
-    0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b,
-    0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49,
-    0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f,
-    0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56,
-    0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47,
-    0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47,
-    0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45,
-    0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46,
-    0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40,
-    0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47,
-    0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e,
-    0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a,
-    0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b,
-    0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f,
-    0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b,
-    0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f,
-    0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50,
-    0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47,
-    0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c,
-    0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43,
-    0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48,
-    0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48,
-    0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e,
-    0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46,
-    0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54,
-    0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46,
-    0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b,
-    0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e,
-    0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53,
-    0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41,
-    0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a,
-    0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49,
-    0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58,
-    0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49,
-    0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46,
-    0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30,
-    0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f,
-    0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b,
-    0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48,
-    0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46,
-    0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49,
-    0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43,
-    0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d,
-    0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a,
-    0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f,
-    0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c,
-    0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59,
-    0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45,
-    0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48,
-    0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49,
-    0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a,
-    0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e,
-    0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b,
-    0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43,
-    0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a,
-    0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41,
-    0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a,
-    0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43,
-    0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51,
-    0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e,
-    0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c,
-    0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47,
-    0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e,
-    0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d,
-    0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d,
-    0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47,
-    0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d,
-    0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46,
-    0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a,
-    0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40,
-    0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c,
-    0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e,
-    0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48,
-    0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f,
-    0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c,
-    0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51,
-    0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54,
-    0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44,
-    0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44,
-    0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e,
-    0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48,
-    0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c,
-    0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c,
-    0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c,
-    0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c,
-    0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f,
-    0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38,
-    0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f,
-    0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43,
-    0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46,
-    0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f,
-    0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e,
-    0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44,
-    0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c,
-    0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52,
-    0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f,
-    0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41,
-    0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41,
-    0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a,
-    0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48,
-    0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30,
-    0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46,
-    0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46,
-    0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49,
-    0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d,
-    0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41,
-    0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63,
-    0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42,
-    0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e,
-    0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48,
-    0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28,
-    0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37,
-    0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58,
-    0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45,
-    0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34,
-    0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46,
-    0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35,
-    0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44,
-    0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43,
-    0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d,
-    0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55,
-    0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43,
-    0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47,
-    0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e,
-    0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48,
-    0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a,
-    0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c,
-    0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48,
-    0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44,
-    0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54,
-    0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36,
-    0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50,
-    0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e,
-    0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a,
-    0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51,
-    0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d,
-    0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e,
-    0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52,
-    0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58,
-    0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47,
-    0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a,
-    0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45,
-    0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53,
-    0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44,
-    0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30,
-    0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51,
-    0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38,
-    0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c,
-    0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43,
-    0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42,
-    0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d,
-    0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45,
-    0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55,
-    0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a,
-    0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d,
-    0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41,
-    0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e,
-    0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40,
-    0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34,
-    0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d,
-    0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35,
-    0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45,
-    0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39,
-    0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e,
-    0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56,
-    0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43,
-    0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c,
-    0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d,
-    0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c,
-    0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41,
-    0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40,
-    0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e,
-    0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47,
-    0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49,
-    0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b,
-    0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a,
-    0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31,
-    0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c,
-    0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b,
-    0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41,
-    0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e,
-    0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50,
-    0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c,
-    0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47,
-    0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30,
-    0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43,
-    0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46,
-    0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47,
-    0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e,
-    0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43,
-    0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37,
-    0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c,
-    0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46,
-    0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d,
-    0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55,
-    0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c,
-    0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54,
-    0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47,
-    0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22,
-    0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a,
-    0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e,
-    0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d,
-    0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e,
-    0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46,
-    0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e,
-    0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50,
-    0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f,
-    0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47,
-    0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a,
-    0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b,
-    0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e,
-    0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c,
-    0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a,
-    0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39,
-    0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e,
-    0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50,
-    0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e,
-    0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48,
-    0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f,
-    0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41,
-    0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37,
-    0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47,
-    0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47,
-    0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49,
-    0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45,
-    0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c,
-    0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61,
-    0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e,
-    0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a,
-    0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45,
-    0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47,
-    0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43,
-    0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f,
-    0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c,
-    0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c,
-    0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42,
-    0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e,
-    0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45,
-    0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57,
-    0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e,
-    0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54,
-    0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b,
-    0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b,
-    0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41,
-    0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f,
-    0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40,
-    0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38,
-    0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40,
-    0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a,
-    0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45,
-    0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a,
-    0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f,
-    0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57,
-    0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47,
-    0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e,
-    0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47,
-    0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b,
-    0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45,
-    0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41,
-    0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49,
-    0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44,
-    0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46,
-    0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37,
-    0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47,
-    0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35,
-    0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48,
-    0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43,
-    0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e,
-    0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d,
-    0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a,
-    0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57,
-    0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40,
-    0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38,
-    0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49,
-    0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42,
-    0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f,
-    0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23,
-    0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e,
-    0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37,
-    0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c,
-    0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45,
-    0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50,
-    0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c,
-    0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b,
-    0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d,
-    0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51,
-    0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41,
-    0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e,
-    0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b,
-    0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d,
-    0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37,
-    0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d,
-    0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34,
-    0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49,
-    0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b,
-    0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45,
-    0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54,
-    0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46,
-    0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c,
-    0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c,
-    0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b,
-    0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42,
-    0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42,
-    0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f,
-    0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c,
-    0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f,
-    0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30,
-    0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50,
-    0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45,
-    0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a,
-    0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a,
-    0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49,
-    0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49,
-    0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42,
-    0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a,
-    0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47,
-    0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42,
-    0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55,
-    0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f,
-    0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47,
-    0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30,
-    0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53,
-    0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d,
-    0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f,
-    0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44,
-    0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52,
-    0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58,
-    0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e,
-    0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44,
-    0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51,
-    0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e,
-    0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40,
-    0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54,
-    0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48,
-    0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f,
-    0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b,
-    0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43,
-    0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44,
-    0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40,
-    0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f,
-    0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50,
-    0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48,
-    0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46,
-    0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46,
-    0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44,
-    0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a,
-    0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46,
-    0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e,
-    0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b,
-    0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41,
-    0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a,
-    0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f,
-    0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f,
-    0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53,
-    0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e,
-    0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45,
-    0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42,
-    0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47,
-    0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
+    0xbc, 0xb3, 0xff, 0xff, 0xc0, 0xb3, 0xff, 0xff, 0x1e, 0xb4, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x89, 0xa5, 0xe8, 0xc1,
+    0xb1, 0x89, 0x5b, 0xc6, 0x4f, 0x9b, 0xd3, 0x74, 0x93, 0x88, 0xff, 0xaf,
+    0x89, 0xff, 0xf4, 0x70, 0xcc, 0x75, 0x78, 0xbf, 0x92, 0xcd, 0xa9, 0xa8,
+    0xd6, 0x6a, 0x6f, 0x7b, 0x7f, 0xd8, 0xa8, 0xb1, 0xe6, 0x32, 0x21, 0x70,
+    0xa0, 0x9c, 0x6f, 0xc8, 0xc6, 0x59, 0x67, 0x93, 0x97, 0xca, 0x3f, 0xde,
+    0xcb, 0x74, 0x7c, 0xb5, 0xa4, 0xd9, 0x66, 0xc6, 0x87, 0x98, 0xa5, 0xd0,
+    0xbb, 0xb9, 0xc2, 0xb2, 0xaa, 0x79, 0x25, 0xb9, 0x6d, 0x5a, 0xc8, 0x7f,
+    0x70, 0x85, 0x79, 0xbc, 0x6a, 0x9b, 0xd1, 0x9a, 0x9c, 0x51, 0x53, 0x71,
+    0x89, 0xc0, 0xb4, 0xac, 0xae, 0x47, 0x67, 0x70, 0x79, 0xd2, 0x81, 0xa5,
+    0xd2, 0x09, 0x38, 0x82, 0x74, 0xc9, 0x5d, 0xaf, 0xc1, 0x4f, 0x53, 0x99,
+    0xcb, 0xb7, 0x3a, 0xba, 0xe8, 0x7f, 0x76, 0xb9, 0xb3, 0xd3, 0x60, 0xc0,
+    0x93, 0x9f, 0x87, 0xbd, 0xd0, 0xb8, 0xca, 0xc1, 0xb6, 0x6c, 0x01, 0xc1,
+    0x5c, 0x5d, 0xb2, 0x82, 0x76, 0x77, 0x39, 0xbc, 0x72, 0x6a, 0xc3, 0xb4,
+    0x79, 0x21, 0x48, 0x42, 0x86, 0xa6, 0xbd, 0xaf, 0xae, 0x23, 0x9c, 0x69,
+    0x78, 0xc3, 0x6b, 0xb3, 0xab, 0x43, 0xb2, 0x88, 0x71, 0xc6, 0x6b, 0xbe,
+    0xc3, 0x75, 0xc2, 0xc3, 0xa5, 0xcf, 0x32, 0xbe, 0xcb, 0xb0, 0xb8, 0xc1,
+    0x9c, 0xcf, 0x64, 0xc4, 0xb4, 0x96, 0xa8, 0xb9, 0xcb, 0xc0, 0xc0, 0xb8,
+    0xb8, 0x77, 0x65, 0xc0, 0xc4, 0xb3, 0xc5, 0x77, 0x9b, 0x61, 0xd4, 0xac,
+    0x7e, 0x36, 0xb1, 0xae, 0x36, 0x36, 0xb8, 0x39, 0x6b, 0x70, 0x9c, 0xb5,
+    0x88, 0x5c, 0xb3, 0x6a, 0xad, 0xc5, 0x7b, 0xb4, 0xad, 0xaa, 0xc4, 0x84,
+    0x5e, 0xc4, 0x67, 0xc1, 0xde, 0xba, 0xcf, 0xbd, 0xa0, 0xd3, 0x35, 0xb3,
+    0xe7, 0xc8, 0xb8, 0xb8, 0xaf, 0xb4, 0x59, 0xb8, 0xb4, 0xac, 0xac, 0xaa,
+    0xc7, 0xad, 0xc8, 0xb6, 0xac, 0x99, 0xa0, 0xcb, 0xc1, 0xc8, 0xcb, 0x89,
+    0xc3, 0xac, 0xca, 0x8b, 0x97, 0x1f, 0xbd, 0xbf, 0x13, 0xad, 0xc8, 0x41,
+    0x56, 0x3c, 0x86, 0xb2, 0x61, 0xc4, 0xbb, 0x71, 0xba, 0x92, 0x8d, 0xc3,
+    0x86, 0xcb, 0xc5, 0x8d, 0x88, 0xc8, 0x6a, 0xbf, 0x9c, 0xcd, 0xcd, 0xc0,
+    0x81, 0xb1, 0x47, 0xb5, 0xf0, 0xce, 0xb1, 0xc1, 0xaa, 0xa8, 0x54, 0xcb,
+    0xbc, 0xc7, 0xc5, 0x8e, 0xc3, 0xce, 0xc7, 0xb9, 0xb9, 0xa1, 0xc5, 0xbd,
+    0xb8, 0xb8, 0xb7, 0x81, 0xb6, 0xba, 0xd2, 0x90, 0xbc, 0x96, 0xbe, 0xba,
+    0x53, 0xb5, 0xc7, 0x3c, 0x3c, 0x1f, 0x90, 0xaa, 0x5a, 0xb8, 0xba, 0x7e,
+    0xbc, 0x9e, 0xc2, 0xb1, 0x6e, 0xc0, 0xc4, 0x91, 0xf0, 0xb5, 0x60, 0xad,
+    0x73, 0xba, 0xcd, 0xba, 0x6e, 0x94, 0x39, 0xb5, 0xe4, 0xbe, 0xb4, 0xb5,
+    0xa0, 0xa9, 0x51, 0xac, 0xbc, 0xc2, 0xb3, 0x8a, 0xbd, 0x9a, 0xca, 0xb3,
+    0xbf, 0xaf, 0xb5, 0x9a, 0xb9, 0xc3, 0xb6, 0x92, 0xb5, 0xc1, 0xb0, 0x95,
+    0xd6, 0xcc, 0xbb, 0xbb, 0xa9, 0xb9, 0xac, 0x4a, 0x62, 0x27, 0xa7, 0xa7,
+    0x30, 0xbd, 0xb1, 0x73, 0xa1, 0x74, 0xc2, 0xb7, 0x58, 0xc0, 0xae, 0x8f,
+    0xe1, 0xac, 0x4e, 0xb0, 0x55, 0xc9, 0xc8, 0x9f, 0x83, 0x8e, 0x3e, 0xd5,
+    0xb5, 0xbe, 0xcd, 0xb2, 0xa6, 0xc8, 0x64, 0xac, 0xc0, 0xc8, 0xaf, 0x99,
+    0xc5, 0x9e, 0xb8, 0xbd, 0xa9, 0xc2, 0xb3, 0x81, 0xb4, 0xc2, 0xb4, 0x8f,
+    0xbc, 0xb8, 0x9c, 0x88, 0xbe, 0xc6, 0xbf, 0xba, 0xc8, 0xb4, 0xab, 0x5b,
+    0x92, 0x51, 0xb1, 0x9a, 0x44, 0xb9, 0xab, 0x80, 0xa5, 0x3e, 0xc0, 0xa5,
+    0x5c, 0xb6, 0xa8, 0xa2, 0xb3, 0x9a, 0x6b, 0xb3, 0x34, 0xc6, 0x7e, 0x96,
+    0xcb, 0x88, 0x48, 0xc6, 0xa3, 0xbb, 0xd2, 0xa2, 0xaf, 0xd0, 0x6e, 0xae,
+    0xb4, 0xce, 0xc8, 0x8f, 0xd7, 0xad, 0xc8, 0xb0, 0xae, 0xb7, 0xb2, 0x70,
+    0xb9, 0xad, 0xc1, 0xa0, 0xcb, 0xa2, 0xb0, 0x9b, 0xbe, 0xd3, 0xca, 0xb6,
+    0xbd, 0xaf, 0xa9, 0x82, 0xa1, 0xd7, 0xbc, 0x9b, 0x8b, 0xac, 0xaa, 0xac,
+    0xad, 0x37, 0xb7, 0xb6, 0x46, 0xae, 0xa9, 0xbd, 0x6b, 0x90, 0x5e, 0xcd,
+    0x23, 0xa4, 0x76, 0xa1, 0xc4, 0x96, 0x50, 0xcc, 0x95, 0x99, 0x93, 0xa7,
+    0xb2, 0xe1, 0x7c, 0xbd, 0xbd, 0xb5, 0xbf, 0x9a, 0xca, 0x80, 0xd7, 0xae,
+    0x79, 0xa8, 0xaa, 0xb2, 0xbc, 0x51, 0xda, 0xa3, 0x80, 0x8b, 0xa2, 0xc8,
+    0xd1, 0x94, 0xe1, 0xc4, 0xbd, 0xae, 0xae, 0xcc, 0xb3, 0xca, 0xd5, 0xa1,
+    0xd5, 0xa7, 0xaf, 0xd2, 0xb4, 0x8d, 0xcc, 0xc8, 0x63, 0xa3, 0xa4, 0xdf,
+    0x6f, 0x7e, 0x98, 0xdf, 0x1b, 0x7b, 0x43, 0x99, 0xb0, 0x99, 0x71, 0xdb,
+    0x63, 0x7b, 0x69, 0x9c, 0xba, 0xcd, 0x90, 0xd0, 0xb6, 0xa6, 0x9e, 0x95,
+    0x50, 0xb6, 0xff, 0xff, 0xae, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00,
+    0xda, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0xc0, 0x44, 0x00, 0x00,
+    0x2c, 0x30, 0x38, 0x5a, 0x3d, 0x4c, 0x44, 0x3b, 0x48, 0x48, 0x44, 0x57,
+    0x3f, 0x43, 0x45, 0x3a, 0x24, 0x32, 0x21, 0x5c, 0x3f, 0x3a, 0x38, 0x3a,
+    0x35, 0x35, 0x2f, 0x51, 0x3c, 0x3a, 0x45, 0x3a, 0x3b, 0x41, 0x39, 0x55,
+    0x3c, 0x41, 0x39, 0x44, 0x3a, 0x40, 0x37, 0x48, 0x33, 0x47, 0x36, 0x3e,
+    0x3c, 0x41, 0x3f, 0x3e, 0x3e, 0x47, 0x36, 0x3e, 0x41, 0x33, 0x3e, 0x3b,
+    0x3a, 0x46, 0x45, 0x40, 0x48, 0x3a, 0x35, 0x4b, 0x45, 0x4d, 0x3c, 0x49,
+    0x42, 0x44, 0x3c, 0x4c, 0x3e, 0x3c, 0x44, 0x32, 0x33, 0x41, 0x36, 0x4b,
+    0x38, 0x3b, 0x3c, 0x38, 0x3b, 0x45, 0x34, 0x46, 0x40, 0x4e, 0x44, 0x35,
+    0x43, 0x36, 0x3d, 0x40, 0x3e, 0x48, 0x40, 0x34, 0x3a, 0x46, 0x45, 0x43,
+    0x45, 0x3f, 0x47, 0x37, 0x36, 0x35, 0x44, 0x3a, 0x3e, 0x37, 0x39, 0x40,
+    0x3a, 0x3f, 0x3f, 0x4c, 0x3e, 0x41, 0x43, 0x35, 0x3f, 0x3d, 0x3d, 0x4c,
+    0x3c, 0x4a, 0x46, 0x3c, 0x3a, 0x41, 0x40, 0x4e, 0x36, 0x47, 0x40, 0x3b,
+    0x47, 0x42, 0x38, 0x4d, 0x48, 0x47, 0x3c, 0x3c, 0x33, 0x3b, 0x3e, 0x42,
+    0x3f, 0x3e, 0x3a, 0x3d, 0x32, 0x39, 0x41, 0x46, 0x3a, 0x3a, 0x3e, 0x3e,
+    0x47, 0x48, 0x4e, 0x36, 0x44, 0x40, 0x41, 0x45, 0x3a, 0x3c, 0x38, 0x55,
+    0x2e, 0x26, 0x2f, 0x32, 0x3f, 0x41, 0x3e, 0x4c, 0x45, 0x36, 0x40, 0x31,
+    0x17, 0x2e, 0x14, 0x53, 0x34, 0x30, 0x34, 0x3f, 0x2e, 0x44, 0x2b, 0x4e,
+    0x34, 0x3e, 0x34, 0x43, 0x3d, 0x35, 0x3f, 0x46, 0x39, 0x40, 0x38, 0x3e,
+    0x35, 0x3b, 0x35, 0x45, 0x3d, 0x40, 0x38, 0x37, 0x40, 0x3e, 0x32, 0x3e,
+    0x41, 0x39, 0x30, 0x41, 0x3a, 0x32, 0x3e, 0x3d, 0x39, 0x31, 0x33, 0x3e,
+    0x41, 0x47, 0x40, 0x47, 0x35, 0x33, 0x3c, 0x32, 0x40, 0x3c, 0x42, 0x49,
+    0x34, 0x38, 0x39, 0x37, 0x39, 0x35, 0x40, 0x4d, 0x37, 0x43, 0x42, 0x3e,
+    0x3f, 0x3c, 0x3e, 0x51, 0x36, 0x37, 0x42, 0x41, 0x36, 0x31, 0x43, 0x3d,
+    0x46, 0x43, 0x37, 0x46, 0x32, 0x45, 0x42, 0x36, 0x3f, 0x42, 0x42, 0x41,
+    0x3d, 0x46, 0x39, 0x41, 0x3c, 0x3f, 0x38, 0x3c, 0x43, 0x43, 0x3d, 0x3c,
+    0x3d, 0x41, 0x38, 0x42, 0x3a, 0x3d, 0x43, 0x42, 0x41, 0x40, 0x39, 0x36,
+    0x3a, 0x3c, 0x3c, 0x4f, 0x44, 0x36, 0x39, 0x35, 0x46, 0x46, 0x36, 0x4a,
+    0x3a, 0x42, 0x43, 0x39, 0x3f, 0x3d, 0x3c, 0x47, 0x38, 0x3f, 0x43, 0x40,
+    0x36, 0x3c, 0x45, 0x3b, 0x33, 0x36, 0x3b, 0x39, 0x3c, 0x35, 0x40, 0x38,
+    0x40, 0x3e, 0x3f, 0x48, 0x3f, 0x34, 0x40, 0x53, 0x26, 0x2c, 0x29, 0x39,
+    0x2a, 0x38, 0x3f, 0x45, 0x32, 0x31, 0x4a, 0x37, 0x1c, 0x28, 0x09, 0x43,
+    0x35, 0x3b, 0x33, 0x3c, 0x32, 0x3f, 0x28, 0x41, 0x36, 0x35, 0x3a, 0x37,
+    0x41, 0x39, 0x32, 0x3c, 0x40, 0x3c, 0x3c, 0x32, 0x38, 0x39, 0x37, 0x44,
+    0x3a, 0x33, 0x41, 0x36, 0x37, 0x3c, 0x35, 0x3a, 0x3d, 0x30, 0x3d, 0x41,
+    0x37, 0x3c, 0x45, 0x3a, 0x37, 0x2f, 0x36, 0x3c, 0x3a, 0x3d, 0x39, 0x48,
+    0x46, 0x33, 0x3a, 0x3e, 0x40, 0x3d, 0x3b, 0x52, 0x38, 0x45, 0x34, 0x47,
+    0x39, 0x36, 0x37, 0x56, 0x42, 0x3f, 0x33, 0x36, 0x38, 0x3f, 0x40, 0x53,
+    0x3e, 0x37, 0x3d, 0x3c, 0x48, 0x3a, 0x3d, 0x33, 0x39, 0x40, 0x3e, 0x35,
+    0x3d, 0x46, 0x38, 0x36, 0x37, 0x43, 0x3a, 0x3c, 0x40, 0x38, 0x39, 0x3b,
+    0x39, 0x3a, 0x42, 0x3d, 0x34, 0x3f, 0x35, 0x43, 0x3a, 0x35, 0x46, 0x3a,
+    0x48, 0x38, 0x3b, 0x48, 0x3c, 0x35, 0x42, 0x3d, 0x3a, 0x3d, 0x38, 0x42,
+    0x3e, 0x3c, 0x33, 0x39, 0x34, 0x30, 0x42, 0x44, 0x41, 0x3d, 0x3c, 0x39,
+    0x3c, 0x3a, 0x39, 0x41, 0x3d, 0x44, 0x3c, 0x40, 0x3f, 0x3e, 0x42, 0x3f,
+    0x37, 0x40, 0x39, 0x3b, 0x42, 0x43, 0x49, 0x37, 0x39, 0x46, 0x35, 0x3c,
+    0x3e, 0x39, 0x45, 0x52, 0x24, 0x2d, 0x38, 0x35, 0x3a, 0x3a, 0x3c, 0x44,
+    0x39, 0x32, 0x51, 0x3f, 0x16, 0x34, 0x0a, 0x49, 0x39, 0x38, 0x39, 0x3e,
+    0x2f, 0x36, 0x24, 0x3f, 0x37, 0x34, 0x38, 0x3b, 0x34, 0x34, 0x30, 0x3b,
+    0x3d, 0x36, 0x35, 0x42, 0x33, 0x40, 0x37, 0x35, 0x43, 0x3f, 0x3f, 0x39,
+    0x3a, 0x43, 0x36, 0x3e, 0x39, 0x3d, 0x3f, 0x3d, 0x47, 0x3b, 0x39, 0x37,
+    0x35, 0x42, 0x3f, 0x3b, 0x41, 0x3a, 0x42, 0x4b, 0x3d, 0x3f, 0x3d, 0x3e,
+    0x38, 0x3b, 0x34, 0x4e, 0x3f, 0x39, 0x36, 0x43, 0x39, 0x35, 0x41, 0x4d,
+    0x3c, 0x39, 0x43, 0x33, 0x37, 0x3b, 0x41, 0x48, 0x3c, 0x3f, 0x39, 0x32,
+    0x35, 0x3d, 0x42, 0x35, 0x3d, 0x3e, 0x37, 0x3b, 0x38, 0x3a, 0x44, 0x36,
+    0x42, 0x35, 0x48, 0x40, 0x3a, 0x44, 0x44, 0x39, 0x43, 0x41, 0x3c, 0x37,
+    0x47, 0x3b, 0x42, 0x42, 0x45, 0x3a, 0x40, 0x46, 0x35, 0x3f, 0x3a, 0x48,
+    0x35, 0x44, 0x3f, 0x37, 0x33, 0x3e, 0x45, 0x49, 0x39, 0x43, 0x47, 0x37,
+    0x3f, 0x3f, 0x3b, 0x44, 0x38, 0x3d, 0x39, 0x42, 0x37, 0x3e, 0x40, 0x45,
+    0x3b, 0x3f, 0x40, 0x34, 0x42, 0x3f, 0x43, 0x3c, 0x43, 0x41, 0x38, 0x38,
+    0x38, 0x41, 0x55, 0x33, 0x33, 0x39, 0x39, 0x3c, 0x35, 0x39, 0x38, 0x42,
+    0x27, 0x26, 0x32, 0x41, 0x41, 0x32, 0x3f, 0x47, 0x3a, 0x38, 0x48, 0x37,
+    0x11, 0x27, 0x08, 0x49, 0x35, 0x42, 0x3c, 0x2e, 0x34, 0x43, 0x25, 0x3b,
+    0x3a, 0x33, 0x37, 0x30, 0x3c, 0x36, 0x2d, 0x3c, 0x3b, 0x39, 0x3b, 0x40,
+    0x46, 0x3a, 0x30, 0x42, 0x35, 0x32, 0x36, 0x3a, 0x3a, 0x34, 0x34, 0x33,
+    0x3d, 0x30, 0x3b, 0x42, 0x41, 0x3f, 0x3d, 0x3b, 0x44, 0x3d, 0x41, 0x41,
+    0x3d, 0x3f, 0x40, 0x51, 0x42, 0x42, 0x36, 0x45, 0x30, 0x40, 0x32, 0x4f,
+    0x3a, 0x3c, 0x40, 0x39, 0x3d, 0x3b, 0x3e, 0x4b, 0x3d, 0x37, 0x42, 0x46,
+    0x40, 0x40, 0x47, 0x3d, 0x35, 0x3c, 0x3f, 0x46, 0x37, 0x37, 0x3a, 0x2e,
+    0x3d, 0x3c, 0x3a, 0x46, 0x3a, 0x44, 0x3c, 0x3a, 0x32, 0x44, 0x31, 0x41,
+    0x43, 0x36, 0x49, 0x39, 0x3d, 0x37, 0x3f, 0x41, 0x3b, 0x3b, 0x3c, 0x42,
+    0x3c, 0x34, 0x3f, 0x3b, 0x40, 0x3e, 0x48, 0x47, 0x3e, 0x3c, 0x38, 0x39,
+    0x3f, 0x35, 0x39, 0x3f, 0x3e, 0x3e, 0x3b, 0x43, 0x41, 0x40, 0x43, 0x41,
+    0x3f, 0x37, 0x39, 0x41, 0x46, 0x32, 0x3d, 0x41, 0x36, 0x3f, 0x3e, 0x3f,
+    0x36, 0x48, 0x43, 0x3d, 0x43, 0x3f, 0x34, 0x3d, 0x34, 0x35, 0x4f, 0x32,
+    0x3c, 0x3f, 0x3d, 0x3f, 0x39, 0x3c, 0x3d, 0x47, 0x23, 0x36, 0x33, 0x45,
+    0x37, 0x2e, 0x42, 0x42, 0x39, 0x34, 0x4f, 0x3f, 0x19, 0x2b, 0x01, 0x50,
+    0x35, 0x3f, 0x37, 0x3c, 0x33, 0x35, 0x25, 0x32, 0x38, 0x3e, 0x40, 0x40,
+    0x2f, 0x38, 0x35, 0x3d, 0x31, 0x42, 0x44, 0x3c, 0x3a, 0x3d, 0x2d, 0x3e,
+    0x3b, 0x3e, 0x3d, 0x31, 0x3b, 0x37, 0x35, 0x31, 0x36, 0x35, 0x34, 0x31,
+    0x41, 0x3a, 0x33, 0x32, 0x3c, 0x31, 0x3e, 0x3d, 0x40, 0x3b, 0x34, 0x45,
+    0x36, 0x39, 0x3e, 0x3f, 0x3c, 0x45, 0x37, 0x4b, 0x42, 0x3d, 0x33, 0x43,
+    0x3e, 0x40, 0x35, 0x4e, 0x38, 0x36, 0x3a, 0x33, 0x38, 0x44, 0x3f, 0x3c,
+    0x3f, 0x40, 0x3a, 0x3c, 0x3c, 0x3c, 0x44, 0x29, 0x3a, 0x40, 0x35, 0x3a,
+    0x3d, 0x48, 0x3b, 0x30, 0x45, 0x41, 0x45, 0x40, 0x37, 0x32, 0x3a, 0x35,
+    0x3f, 0x38, 0x3b, 0x43, 0x3b, 0x3f, 0x33, 0x40, 0x3b, 0x40, 0x38, 0x33,
+    0x39, 0x3c, 0x3c, 0x3f, 0x43, 0x33, 0x43, 0x40, 0x43, 0x3d, 0x33, 0x42,
+    0x40, 0x32, 0x3e, 0x36, 0x40, 0x38, 0x43, 0x40, 0x44, 0x38, 0x34, 0x3c,
+    0x3e, 0x39, 0x47, 0x43, 0x40, 0x3b, 0x3f, 0x3f, 0x3c, 0x3b, 0x4b, 0x33,
+    0x36, 0x49, 0x32, 0x41, 0x48, 0x45, 0x57, 0x3a, 0x40, 0x42, 0x40, 0x46,
+    0x36, 0x35, 0x3c, 0x46, 0x22, 0x2e, 0x33, 0x3e, 0x3c, 0x39, 0x44, 0x4d,
+    0x3f, 0x41, 0x51, 0x44, 0x15, 0x2e, 0x02, 0x4e, 0x39, 0x3a, 0x3c, 0x35,
+    0x30, 0x38, 0x1e, 0x31, 0x40, 0x3b, 0x39, 0x3d, 0x3a, 0x37, 0x35, 0x36,
+    0x46, 0x36, 0x3c, 0x3e, 0x39, 0x3e, 0x32, 0x40, 0x3b, 0x35, 0x42, 0x41,
+    0x41, 0x38, 0x41, 0x35, 0x42, 0x36, 0x3c, 0x42, 0x3d, 0x41, 0x35, 0x31,
+    0x3f, 0x44, 0x3e, 0x41, 0x3f, 0x35, 0x42, 0x4b, 0x3e, 0x36, 0x37, 0x34,
+    0x36, 0x3d, 0x40, 0x49, 0x41, 0x3e, 0x3d, 0x3b, 0x38, 0x37, 0x40, 0x47,
+    0x35, 0x32, 0x43, 0x38, 0x36, 0x3b, 0x33, 0x47, 0x33, 0x34, 0x3d, 0x47,
+    0x3c, 0x37, 0x3d, 0x2b, 0x3a, 0x36, 0x3b, 0x3d, 0x43, 0x38, 0x35, 0x32,
+    0x32, 0x37, 0x43, 0x36, 0x3f, 0x48, 0x38, 0x30, 0x3a, 0x3c, 0x42, 0x34,
+    0x37, 0x3c, 0x37, 0x40, 0x48, 0x3e, 0x35, 0x3b, 0x3f, 0x38, 0x39, 0x3e,
+    0x37, 0x35, 0x36, 0x3d, 0x3b, 0x3c, 0x40, 0x3d, 0x34, 0x40, 0x46, 0x42,
+    0x3f, 0x3c, 0x3c, 0x3e, 0x40, 0x40, 0x3d, 0x3f, 0x3f, 0x44, 0x46, 0x41,
+    0x32, 0x43, 0x40, 0x41, 0x3c, 0x42, 0x39, 0x38, 0x48, 0x44, 0x3d, 0x38,
+    0x34, 0x40, 0x4e, 0x31, 0x3c, 0x42, 0x39, 0x48, 0x3c, 0x33, 0x3e, 0x40,
+    0x20, 0x27, 0x39, 0x45, 0x45, 0x36, 0x47, 0x4c, 0x35, 0x3e, 0x4a, 0x36,
+    0x16, 0x2f, 0x04, 0x4f, 0x3a, 0x35, 0x36, 0x3a, 0x2d, 0x36, 0x21, 0x34,
+    0x3b, 0x32, 0x3d, 0x3c, 0x3c, 0x3f, 0x3b, 0x3b, 0x41, 0x46, 0x40, 0x3d,
+    0x3b, 0x44, 0x33, 0x42, 0x34, 0x33, 0x3e, 0x45, 0x3f, 0x46, 0x39, 0x33,
+    0x3b, 0x37, 0x37, 0x37, 0x42, 0x47, 0x3c, 0x35, 0x31, 0x41, 0x44, 0x3a,
+    0x3b, 0x33, 0x39, 0x44, 0x42, 0x33, 0x3d, 0x3f, 0x43, 0x33, 0x41, 0x4a,
+    0x35, 0x46, 0x36, 0x3e, 0x39, 0x41, 0x41, 0x4c, 0x34, 0x3d, 0x38, 0x33,
+    0x3c, 0x3f, 0x43, 0x44, 0x37, 0x35, 0x35, 0x3c, 0x43, 0x34, 0x3e, 0x2d,
+    0x3f, 0x35, 0x38, 0x3c, 0x33, 0x35, 0x43, 0x2a, 0x40, 0x33, 0x34, 0x40,
+    0x3d, 0x38, 0x36, 0x2d, 0x36, 0x3c, 0x43, 0x3d, 0x37, 0x3d, 0x39, 0x38,
+    0x3b, 0x3e, 0x3c, 0x46, 0x35, 0x35, 0x43, 0x44, 0x39, 0x40, 0x34, 0x39,
+    0x3d, 0x34, 0x40, 0x45, 0x38, 0x35, 0x3e, 0x39, 0x3c, 0x44, 0x48, 0x44,
+    0x41, 0x3e, 0x3c, 0x45, 0x3a, 0x3c, 0x3c, 0x46, 0x3a, 0x40, 0x39, 0x43,
+    0x35, 0x35, 0x3e, 0x45, 0x3a, 0x34, 0x3c, 0x39, 0x46, 0x3a, 0x4f, 0x35,
+    0x32, 0x3d, 0x36, 0x41, 0x32, 0x38, 0x3f, 0x45, 0x2d, 0x34, 0x2a, 0x35,
+    0x43, 0x3f, 0x41, 0x49, 0x41, 0x3c, 0x4b, 0x3f, 0x17, 0x31, 0x02, 0x4f,
+    0x30, 0x38, 0x39, 0x40, 0x33, 0x3a, 0x25, 0x38, 0x35, 0x3c, 0x39, 0x35,
+    0x34, 0x41, 0x34, 0x43, 0x40, 0x40, 0x46, 0x3d, 0x40, 0x38, 0x3f, 0x3b,
+    0x35, 0x39, 0x3c, 0x39, 0x34, 0x38, 0x3f, 0x36, 0x3a, 0x38, 0x44, 0x3f,
+    0x3f, 0x38, 0x3c, 0x33, 0x41, 0x42, 0x38, 0x33, 0x3c, 0x3b, 0x3c, 0x46,
+    0x38, 0x3b, 0x3f, 0x33, 0x3f, 0x48, 0x3b, 0x49, 0x3f, 0x3a, 0x3d, 0x3f,
+    0x47, 0x3d, 0x30, 0x45, 0x36, 0x42, 0x3d, 0x36, 0x43, 0x38, 0x3b, 0x3d,
+    0x3c, 0x30, 0x3b, 0x43, 0x3d, 0x41, 0x34, 0x2e, 0x43, 0x3d, 0x43, 0x46,
+    0x43, 0x3c, 0x3c, 0x2e, 0x3c, 0x43, 0x34, 0x43, 0x3e, 0x43, 0x3f, 0x2b,
+    0x45, 0x40, 0x3a, 0x43, 0x36, 0x39, 0x3f, 0x3d, 0x3a, 0x3c, 0x35, 0x3b,
+    0x36, 0x3f, 0x45, 0x3e, 0x45, 0x40, 0x3f, 0x36, 0x45, 0x42, 0x35, 0x3e,
+    0x3a, 0x3a, 0x3f, 0x40, 0x3e, 0x3c, 0x39, 0x46, 0x43, 0x3e, 0x3f, 0x3f,
+    0x40, 0x3c, 0x40, 0x4b, 0x41, 0x35, 0x3b, 0x3e, 0x49, 0x32, 0x3e, 0x41,
+    0x31, 0x37, 0x3d, 0x3b, 0x3f, 0x45, 0x50, 0x3a, 0x3f, 0x3c, 0x44, 0x36,
+    0x43, 0x37, 0x3d, 0x4b, 0x29, 0x39, 0x2f, 0x38, 0x45, 0x36, 0x40, 0x4e,
+    0x39, 0x3f, 0x48, 0x43, 0x23, 0x3c, 0x06, 0x51, 0x37, 0x3b, 0x3e, 0x3b,
+    0x28, 0x45, 0x2b, 0x37, 0x3f, 0x33, 0x3f, 0x41, 0x31, 0x36, 0x33, 0x3a,
+    0x3a, 0x35, 0x3b, 0x33, 0x3e, 0x36, 0x35, 0x40, 0x3a, 0x34, 0x3a, 0x38,
+    0x34, 0x3a, 0x3a, 0x34, 0x42, 0x45, 0x40, 0x3e, 0x40, 0x38, 0x39, 0x34,
+    0x38, 0x37, 0x3f, 0x3e, 0x3c, 0x32, 0x3f, 0x46, 0x3f, 0x44, 0x3b, 0x3e,
+    0x44, 0x45, 0x36, 0x3e, 0x36, 0x3f, 0x3b, 0x40, 0x39, 0x34, 0x38, 0x41,
+    0x42, 0x3e, 0x3d, 0x47, 0x3e, 0x45, 0x33, 0x40, 0x3e, 0x3a, 0x44, 0x3d,
+    0x3c, 0x3a, 0x3a, 0x2c, 0x3a, 0x3d, 0x35, 0x45, 0x3c, 0x41, 0x36, 0x30,
+    0x32, 0x32, 0x3a, 0x3b, 0x35, 0x3c, 0x43, 0x2d, 0x35, 0x3f, 0x41, 0x37,
+    0x3f, 0x46, 0x34, 0x39, 0x3c, 0x43, 0x40, 0x3e, 0x3e, 0x36, 0x3e, 0x3c,
+    0x37, 0x3a, 0x3d, 0x3a, 0x3c, 0x38, 0x44, 0x41, 0x3f, 0x3b, 0x3c, 0x47,
+    0x40, 0x3b, 0x41, 0x47, 0x3e, 0x45, 0x39, 0x3e, 0x37, 0x45, 0x4b, 0x4c,
+    0x37, 0x37, 0x37, 0x3c, 0x3c, 0x3d, 0x40, 0x38, 0x39, 0x3e, 0x43, 0x3f,
+    0x38, 0x45, 0x51, 0x3c, 0x31, 0x34, 0x3b, 0x48, 0x46, 0x41, 0x40, 0x40,
+    0x2c, 0x39, 0x32, 0x42, 0x3c, 0x2e, 0x49, 0x4d, 0x3c, 0x3f, 0x45, 0x38,
+    0x20, 0x38, 0x03, 0x55, 0x33, 0x3e, 0x32, 0x39, 0x32, 0x3b, 0x24, 0x2b,
+    0x42, 0x35, 0x45, 0x32, 0x2e, 0x3b, 0x2f, 0x3f, 0x3c, 0x37, 0x39, 0x3b,
+    0x34, 0x34, 0x3d, 0x36, 0x3d, 0x39, 0x3b, 0x30, 0x3c, 0x3e, 0x40, 0x32,
+    0x3d, 0x3c, 0x3c, 0x3e, 0x33, 0x33, 0x3f, 0x3a, 0x33, 0x3e, 0x46, 0x36,
+    0x3a, 0x3d, 0x40, 0x40, 0x3f, 0x41, 0x3a, 0x42, 0x34, 0x32, 0x34, 0x46,
+    0x3b, 0x31, 0x40, 0x37, 0x37, 0x32, 0x3e, 0x47, 0x3f, 0x3b, 0x3e, 0x43,
+    0x49, 0x45, 0x3a, 0x3d, 0x3e, 0x44, 0x40, 0x31, 0x39, 0x3e, 0x3b, 0x2d,
+    0x3b, 0x3a, 0x33, 0x3d, 0x39, 0x37, 0x3e, 0x32, 0x41, 0x3c, 0x3a, 0x37,
+    0x3b, 0x40, 0x39, 0x2f, 0x3e, 0x3f, 0x47, 0x32, 0x3e, 0x3b, 0x3e, 0x3e,
+    0x40, 0x3e, 0x40, 0x3c, 0x41, 0x39, 0x38, 0x46, 0x45, 0x32, 0x47, 0x31,
+    0x36, 0x47, 0x37, 0x49, 0x3a, 0x3f, 0x47, 0x3a, 0x41, 0x3b, 0x3c, 0x4f,
+    0x3e, 0x36, 0x3b, 0x47, 0x35, 0x39, 0x41, 0x4e, 0x3d, 0x3e, 0x3b, 0x46,
+    0x38, 0x39, 0x3b, 0x45, 0x3e, 0x3f, 0x44, 0x42, 0x44, 0x3f, 0x55, 0x3b,
+    0x41, 0x3d, 0x43, 0x43, 0x37, 0x3f, 0x3d, 0x4c, 0x28, 0x3d, 0x36, 0x3c,
+    0x3e, 0x3e, 0x48, 0x50, 0x3e, 0x39, 0x45, 0x41, 0x22, 0x37, 0x07, 0x4f,
+    0x2e, 0x33, 0x38, 0x3f, 0x31, 0x3a, 0x1b, 0x36, 0x34, 0x38, 0x3c, 0x37,
+    0x37, 0x3e, 0x36, 0x35, 0x36, 0x3b, 0x3d, 0x38, 0x42, 0x48, 0x3d, 0x40,
+    0x40, 0x44, 0x3d, 0x39, 0x37, 0x3b, 0x3d, 0x33, 0x3d, 0x35, 0x42, 0x3c,
+    0x39, 0x3e, 0x43, 0x2d, 0x3c, 0x40, 0x43, 0x43, 0x45, 0x35, 0x3c, 0x44,
+    0x34, 0x3c, 0x3d, 0x31, 0x39, 0x40, 0x39, 0x3d, 0x3e, 0x34, 0x3e, 0x3b,
+    0x40, 0x38, 0x42, 0x4a, 0x40, 0x3b, 0x35, 0x3d, 0x36, 0x38, 0x35, 0x42,
+    0x3c, 0x3c, 0x3d, 0x3b, 0x38, 0x39, 0x45, 0x28, 0x3a, 0x37, 0x37, 0x35,
+    0x3a, 0x3d, 0x35, 0x2a, 0x3c, 0x3f, 0x37, 0x34, 0x37, 0x3f, 0x3e, 0x2b,
+    0x39, 0x43, 0x3b, 0x45, 0x35, 0x36, 0x36, 0x42, 0x33, 0x38, 0x3b, 0x35,
+    0x31, 0x3f, 0x41, 0x41, 0x3c, 0x41, 0x45, 0x42, 0x3b, 0x3c, 0x39, 0x46,
+    0x3c, 0x3e, 0x3a, 0x41, 0x39, 0x3d, 0x41, 0x4b, 0x40, 0x3f, 0x43, 0x3d,
+    0x39, 0x39, 0x44, 0x44, 0x37, 0x42, 0x3f, 0x44, 0x3e, 0x37, 0x42, 0x35,
+    0x44, 0x3f, 0x40, 0x42, 0x3f, 0x3a, 0x47, 0x3d, 0x38, 0x3a, 0x3b, 0x3a,
+    0x42, 0x36, 0x3a, 0x97, 0x32, 0x31, 0x30, 0x36, 0x47, 0x3e, 0x46, 0x51,
+    0x42, 0x34, 0x50, 0x34, 0x26, 0x3b, 0x06, 0x55, 0x3c, 0x3b, 0x2d, 0x3a,
+    0x37, 0x37, 0x1b, 0x32, 0x39, 0x3d, 0x36, 0x40, 0x3b, 0x3f, 0x33, 0x33,
+    0x3d, 0x37, 0x35, 0x37, 0x44, 0x3f, 0x35, 0x39, 0x33, 0x3c, 0x43, 0x39,
+    0x3f, 0x42, 0x3e, 0x34, 0x38, 0x38, 0x39, 0x3c, 0x48, 0x3c, 0x2f, 0x30,
+    0x40, 0x3c, 0x41, 0x3e, 0x3f, 0x3e, 0x36, 0x43, 0x40, 0x3c, 0x36, 0x43,
+    0x43, 0x38, 0x3a, 0x47, 0x3e, 0x37, 0x39, 0x3a, 0x43, 0x45, 0x38, 0x43,
+    0x3b, 0x45, 0x37, 0x44, 0x36, 0x45, 0x3a, 0x3e, 0x3e, 0x3e, 0x3d, 0x33,
+    0x39, 0x36, 0x48, 0x33, 0x30, 0x42, 0x33, 0x39, 0x37, 0x3a, 0x3f, 0x34,
+    0x34, 0x40, 0x40, 0x40, 0x3f, 0x3d, 0x3f, 0x33, 0x41, 0x40, 0x3b, 0x43,
+    0x3b, 0x3a, 0x40, 0x3a, 0x38, 0x3e, 0x38, 0x3b, 0x38, 0x42, 0x40, 0x40,
+    0x41, 0x35, 0x37, 0x38, 0x3b, 0x3c, 0x39, 0x4b, 0x32, 0x39, 0x42, 0x3c,
+    0x36, 0x3d, 0x32, 0x52, 0x3a, 0x31, 0x40, 0x40, 0x3a, 0x43, 0x3d, 0x46,
+    0x3c, 0x3e, 0x3e, 0x33, 0x3f, 0x41, 0x4d, 0x37, 0x39, 0x39, 0x3e, 0x3b,
+    0x40, 0x39, 0x53, 0x2d, 0x46, 0x3c, 0x32, 0x42, 0x3d, 0x40, 0x40, 0x4d,
+    0x2e, 0x34, 0x39, 0x3b, 0x46, 0x3b, 0x42, 0x4f, 0x3d, 0x39, 0x4e, 0x36,
+    0x1a, 0x31, 0x0e, 0x56, 0x36, 0x42, 0x38, 0x44, 0x36, 0x3a, 0x20, 0x30,
+    0x36, 0x34, 0x37, 0x38, 0x40, 0x41, 0x2a, 0x35, 0x3b, 0x3b, 0x3a, 0x38,
+    0x33, 0x39, 0x36, 0x41, 0x43, 0x39, 0x35, 0x3d, 0x37, 0x3d, 0x33, 0x31,
+    0x45, 0x33, 0x3f, 0x3b, 0x44, 0x38, 0x39, 0x34, 0x38, 0x39, 0x38, 0x3d,
+    0x3a, 0x3a, 0x41, 0x40, 0x44, 0x3e, 0x3f, 0x45, 0x34, 0x31, 0x34, 0x43,
+    0x3b, 0x34, 0x42, 0x3c, 0x3c, 0x43, 0x35, 0x45, 0x36, 0x38, 0x3d, 0x3c,
+    0x3f, 0x3d, 0x3e, 0x45, 0x41, 0x43, 0x35, 0x3f, 0x40, 0x3f, 0x3a, 0x34,
+    0x3d, 0x32, 0x41, 0x3d, 0x48, 0x42, 0x37, 0x2a, 0x3c, 0x3a, 0x3e, 0x49,
+    0x38, 0x36, 0x38, 0x2e, 0x36, 0x37, 0x34, 0x3e, 0x3c, 0x43, 0x43, 0x39,
+    0x39, 0x3b, 0x44, 0x46, 0x44, 0x43, 0x37, 0x46, 0x43, 0x34, 0x3b, 0x35,
+    0x42, 0x41, 0x3f, 0x3d, 0x3d, 0x3a, 0x42, 0x3e, 0x38, 0x47, 0x3d, 0x49,
+    0x45, 0x49, 0x3a, 0x3c, 0x3e, 0x37, 0x40, 0x46, 0x41, 0x33, 0x45, 0x36,
+    0x37, 0x44, 0x49, 0x3b, 0x44, 0x40, 0x33, 0x46, 0x37, 0x39, 0x4e, 0x3a,
+    0x43, 0x38, 0x3a, 0x42, 0x3a, 0x3d, 0x45, 0x50, 0x26, 0x34, 0x3b, 0x3c,
+    0x46, 0x46, 0x4c, 0x54, 0x3f, 0x35, 0x4e, 0x47, 0x21, 0x39, 0x0e, 0x54,
+    0x3a, 0x3a, 0x2f, 0x40, 0x2d, 0x3a, 0x1f, 0x31, 0x31, 0x42, 0x34, 0x45,
+    0x37, 0x36, 0x30, 0x3b, 0x3a, 0x3a, 0x36, 0x40, 0x32, 0x36, 0x3c, 0x3c,
+    0x37, 0x42, 0x35, 0x3e, 0x39, 0x47, 0x36, 0x32, 0x41, 0x30, 0x42, 0x39,
+    0x39, 0x44, 0x37, 0x30, 0x41, 0x3b, 0x3d, 0x3d, 0x43, 0x3b, 0x38, 0x45,
+    0x3b, 0x3a, 0x39, 0x3a, 0x31, 0x33, 0x43, 0x46, 0x3f, 0x41, 0x44, 0x3f,
+    0x3b, 0x44, 0x3a, 0x4c, 0x33, 0x33, 0x33, 0x3e, 0x37, 0x3e, 0x45, 0x45,
+    0x36, 0x42, 0x3e, 0x43, 0x40, 0x34, 0x36, 0x31, 0x38, 0x34, 0x41, 0x3b,
+    0x32, 0x38, 0x3e, 0x29, 0x47, 0x33, 0x37, 0x45, 0x3c, 0x3d, 0x43, 0x2c,
+    0x36, 0x3a, 0x3c, 0x40, 0x3d, 0x46, 0x3c, 0x37, 0x40, 0x44, 0x37, 0x38,
+    0x3e, 0x41, 0x3c, 0x40, 0x33, 0x3f, 0x44, 0x32, 0x44, 0x3a, 0x43, 0x42,
+    0x3e, 0x38, 0x44, 0x3b, 0x41, 0x48, 0x3f, 0x4e, 0x3f, 0x44, 0x35, 0x45,
+    0x34, 0x3f, 0x42, 0x4b, 0x37, 0x37, 0x3e, 0x45, 0x46, 0x45, 0x46, 0x3d,
+    0x3e, 0x39, 0x3b, 0x3a, 0x46, 0x3a, 0x56, 0x35, 0x46, 0x3d, 0x40, 0x3b,
+    0x36, 0x39, 0x3f, 0x54, 0x27, 0x2b, 0x34, 0x3c, 0x48, 0x3d, 0x49, 0x4c,
+    0x3e, 0x3d, 0x4e, 0x42, 0x25, 0x3b, 0x10, 0x4d, 0x30, 0x36, 0x3e, 0x36,
+    0x2e, 0x31, 0x1d, 0x37, 0x3a, 0x39, 0x33, 0x3f, 0x39, 0x38, 0x2e, 0x36,
+    0x44, 0x3e, 0x41, 0x37, 0x3b, 0x30, 0x3b, 0x48, 0x31, 0x39, 0x41, 0x3e,
+    0x37, 0x37, 0x34, 0x2f, 0x35, 0x3b, 0x3a, 0x3e, 0x45, 0x3e, 0x3f, 0x35,
+    0x39, 0x39, 0x3b, 0x44, 0x43, 0x3c, 0x3e, 0x46, 0x40, 0x3a, 0x36, 0x45,
+    0x41, 0x40, 0x36, 0x44, 0x3a, 0x37, 0x47, 0x47, 0x3d, 0x36, 0x43, 0x4e,
+    0x3b, 0x38, 0x40, 0x48, 0x44, 0x43, 0x45, 0x3f, 0x43, 0x3c, 0x3b, 0x37,
+    0x43, 0x41, 0x39, 0x2f, 0x3d, 0x45, 0x3e, 0x3e, 0x42, 0x40, 0x41, 0x2f,
+    0x47, 0x38, 0x3a, 0x48, 0x3e, 0x35, 0x37, 0x2a, 0x34, 0x38, 0x41, 0x3b,
+    0x3d, 0x37, 0x3b, 0x35, 0x38, 0x3e, 0x41, 0x3c, 0x41, 0x43, 0x3d, 0x46,
+    0x47, 0x47, 0x3d, 0x35, 0x48, 0x41, 0x3d, 0x3e, 0x34, 0x47, 0x38, 0x38,
+    0x39, 0x3e, 0x38, 0x4d, 0x43, 0x36, 0x42, 0x40, 0x3e, 0x41, 0x3f, 0x4c,
+    0x3e, 0x3e, 0x37, 0x44, 0x3e, 0x3b, 0x47, 0x3e, 0x3f, 0x3b, 0x39, 0x3c,
+    0x3c, 0x3c, 0x53, 0x3b, 0x3b, 0x32, 0x3e, 0x3f, 0x32, 0x3c, 0x37, 0x4b,
+    0x33, 0x30, 0x2f, 0x41, 0x47, 0x42, 0x49, 0x4f, 0x3b, 0x42, 0x4c, 0x44,
+    0x1f, 0x37, 0x16, 0x4e, 0x3b, 0x3f, 0x30, 0x36, 0x35, 0x38, 0x26, 0x36,
+    0x32, 0x3b, 0x38, 0x3c, 0x30, 0x3e, 0x34, 0x3e, 0x3d, 0x34, 0x39, 0x3c,
+    0x36, 0x47, 0x34, 0x41, 0x31, 0x39, 0x44, 0x3e, 0x39, 0x41, 0x32, 0x36,
+    0x3b, 0x3f, 0x32, 0x3d, 0x36, 0x3e, 0x40, 0x3d, 0x45, 0x32, 0x45, 0x42,
+    0x38, 0x43, 0x40, 0x42, 0x34, 0x3a, 0x43, 0x38, 0x47, 0x3f, 0x41, 0x47,
+    0x34, 0x44, 0x41, 0x39, 0x3c, 0x46, 0x36, 0x4f, 0x41, 0x3e, 0x38, 0x38,
+    0x3a, 0x3b, 0x43, 0x44, 0x37, 0x3f, 0x35, 0x43, 0x34, 0x3d, 0x40, 0x32,
+    0x3a, 0x3b, 0x3d, 0x34, 0x35, 0x43, 0x31, 0x2c, 0x3b, 0x36, 0x38, 0x41,
+    0x3c, 0x38, 0x3d, 0x31, 0x45, 0x46, 0x42, 0x41, 0x33, 0x3f, 0x3f, 0x3a,
+    0x36, 0x3f, 0x3c, 0x3c, 0x3c, 0x3e, 0x39, 0x3e, 0x40, 0x37, 0x47, 0x3e,
+    0x35, 0x39, 0x3d, 0x3d, 0x37, 0x36, 0x3e, 0x45, 0x38, 0x3d, 0x45, 0x43,
+    0x3a, 0x32, 0x3b, 0x3a, 0x32, 0x3c, 0x3d, 0x43, 0x3d, 0x33, 0x3b, 0x3d,
+    0x46, 0x3a, 0x44, 0x45, 0x3b, 0x3e, 0x3c, 0x42, 0x37, 0x37, 0x52, 0x2a,
+    0x3a, 0x35, 0x35, 0x3f, 0x40, 0x38, 0x40, 0x5b, 0x35, 0x32, 0x2b, 0x3d,
+    0x4a, 0x3c, 0x46, 0x56, 0x44, 0x30, 0x4d, 0x39, 0x20, 0x32, 0x0f, 0x4f,
+    0x33, 0x3c, 0x35, 0x35, 0x3a, 0x45, 0x29, 0x3b, 0x31, 0x38, 0x34, 0x38,
+    0x42, 0x45, 0x37, 0x3e, 0x37, 0x2e, 0x36, 0x43, 0x3f, 0x38, 0x2f, 0x41,
+    0x3f, 0x41, 0x3c, 0x31, 0x37, 0x36, 0x37, 0x39, 0x41, 0x3a, 0x3a, 0x40,
+    0x3e, 0x47, 0x3d, 0x37, 0x3c, 0x38, 0x35, 0x39, 0x3a, 0x43, 0x3f, 0x42,
+    0x42, 0x38, 0x3e, 0x40, 0x3c, 0x3a, 0x45, 0x48, 0x37, 0x3a, 0x3e, 0x35,
+    0x3a, 0x3d, 0x45, 0x4a, 0x3d, 0x37, 0x38, 0x3a, 0x3d, 0x46, 0x46, 0x41,
+    0x37, 0x41, 0x40, 0x48, 0x37, 0x34, 0x3b, 0x2c, 0x39, 0x34, 0x37, 0x35,
+    0x3a, 0x43, 0x39, 0x2e, 0x39, 0x3f, 0x40, 0x3e, 0x40, 0x40, 0x3c, 0x2d,
+    0x3e, 0x3c, 0x37, 0x39, 0x3c, 0x3b, 0x3d, 0x3f, 0x41, 0x48, 0x3b, 0x3d,
+    0x3b, 0x41, 0x45, 0x3e, 0x3a, 0x38, 0x3f, 0x3c, 0x3d, 0x3e, 0x40, 0x42,
+    0x46, 0x38, 0x43, 0x34, 0x35, 0x47, 0x3d, 0x46, 0x3f, 0x3e, 0x32, 0x3f,
+    0x3e, 0x3d, 0x47, 0x46, 0x38, 0x41, 0x45, 0x3f, 0x34, 0x3f, 0x41, 0x43,
+    0x3e, 0x3e, 0x44, 0x3b, 0x3b, 0x36, 0x51, 0x32, 0x37, 0x3c, 0x42, 0x43,
+    0x33, 0x39, 0x42, 0x61, 0x2c, 0x3b, 0x2e, 0x39, 0x42, 0x39, 0x42, 0x54,
+    0x3c, 0x3a, 0x48, 0x35, 0x26, 0x34, 0x15, 0x51, 0x35, 0x40, 0x36, 0x3c,
+    0x2d, 0x37, 0x25, 0x38, 0x33, 0x3d, 0x3d, 0x39, 0x3e, 0x3b, 0x2e, 0x4b,
+    0x3d, 0x3b, 0x42, 0x37, 0x37, 0x40, 0x37, 0x40, 0x35, 0x45, 0x37, 0x37,
+    0x3f, 0x41, 0x36, 0x39, 0x3c, 0x32, 0x3e, 0x38, 0x41, 0x40, 0x3e, 0x3f,
+    0x3b, 0x3c, 0x43, 0x35, 0x3e, 0x3d, 0x44, 0x44, 0x3a, 0x36, 0x39, 0x3f,
+    0x3a, 0x31, 0x42, 0x4d, 0x40, 0x33, 0x40, 0x45, 0x44, 0x3d, 0x40, 0x49,
+    0x41, 0x3f, 0x42, 0x3a, 0x34, 0x46, 0x38, 0x46, 0x42, 0x34, 0x3a, 0x40,
+    0x40, 0x41, 0x3d, 0x32, 0x35, 0x48, 0x35, 0x3e, 0x44, 0x41, 0x40, 0x2c,
+    0x46, 0x38, 0x38, 0x3f, 0x36, 0x40, 0x38, 0x2a, 0x43, 0x41, 0x3e, 0x35,
+    0x46, 0x3a, 0x45, 0x46, 0x46, 0x42, 0x3a, 0x3b, 0x40, 0x38, 0x35, 0x43,
+    0x38, 0x3d, 0x3b, 0x41, 0x36, 0x44, 0x3f, 0x3f, 0x34, 0x3e, 0x3c, 0x3d,
+    0x49, 0x36, 0x37, 0x4b, 0x38, 0x3c, 0x43, 0x37, 0x3a, 0x3f, 0x31, 0x45,
+    0x3b, 0x39, 0x3f, 0x40, 0x37, 0x3c, 0x42, 0x3f, 0x3c, 0x33, 0x40, 0x3b,
+    0x32, 0x3c, 0x52, 0x31, 0x3d, 0x44, 0x3b, 0x31, 0x46, 0x38, 0x40, 0x60,
+    0x2b, 0x3c, 0x37, 0x34, 0x43, 0x38, 0x45, 0x57, 0x37, 0x39, 0x49, 0x33,
+    0x2d, 0x3f, 0x18, 0x4e, 0x39, 0x39, 0x32, 0x3b, 0x34, 0x3b, 0x2c, 0x45,
+    0x33, 0x37, 0x45, 0x42, 0x3d, 0x37, 0x2a, 0x4c, 0x3d, 0x3f, 0x3c, 0x36,
+    0x37, 0x3c, 0x39, 0x47, 0x3d, 0x44, 0x3d, 0x40, 0x3d, 0x41, 0x34, 0x3e,
+    0x40, 0x34, 0x3b, 0x3a, 0x41, 0x36, 0x37, 0x40, 0x3e, 0x3f, 0x3a, 0x36,
+    0x3e, 0x35, 0x3b, 0x48, 0x41, 0x40, 0x3c, 0x42, 0x34, 0x41, 0x3f, 0x44,
+    0x34, 0x39, 0x33, 0x39, 0x39, 0x47, 0x40, 0x48, 0x38, 0x3a, 0x43, 0x43,
+    0x48, 0x3a, 0x3f, 0x46, 0x35, 0x3a, 0x33, 0x36, 0x32, 0x3c, 0x40, 0x34,
+    0x40, 0x3a, 0x42, 0x3a, 0x39, 0x38, 0x41, 0x35, 0x3a, 0x3f, 0x35, 0x40,
+    0x3f, 0x39, 0x39, 0x36, 0x38, 0x40, 0x3e, 0x3e, 0x3a, 0x31, 0x32, 0x44,
+    0x40, 0x47, 0x3a, 0x3c, 0x43, 0x43, 0x46, 0x48, 0x40, 0x35, 0x3d, 0x37,
+    0x44, 0x37, 0x33, 0x44, 0x3b, 0x3e, 0x3f, 0x37, 0x36, 0x3a, 0x38, 0x47,
+    0x3a, 0x44, 0x36, 0x42, 0x3e, 0x44, 0x34, 0x46, 0x33, 0x43, 0x44, 0x3e,
+    0x30, 0x48, 0x37, 0x38, 0x33, 0x3c, 0x46, 0x42, 0x38, 0x3d, 0x50, 0x39,
+    0x33, 0x38, 0x3e, 0x40, 0x3b, 0x2b, 0x3b, 0x5f, 0x2b, 0x32, 0x2f, 0x37,
+    0x3f, 0x3a, 0x40, 0x4e, 0x34, 0x38, 0x47, 0x37, 0x27, 0x2b, 0x1b, 0x4f,
+    0x36, 0x38, 0x3a, 0x3a, 0x3b, 0x38, 0x2e, 0x3f, 0x3f, 0x42, 0x42, 0x42,
+    0x36, 0x3e, 0x3c, 0x55, 0x39, 0x40, 0x44, 0x43, 0x3e, 0x33, 0x3c, 0x43,
+    0x38, 0x44, 0x3b, 0x46, 0x3f, 0x45, 0x34, 0x38, 0x3c, 0x41, 0x42, 0x3d,
+    0x42, 0x36, 0x43, 0x3f, 0x3c, 0x39, 0x3e, 0x39, 0x39, 0x42, 0x33, 0x47,
+    0x36, 0x3d, 0x3f, 0x3b, 0x40, 0x39, 0x3b, 0x49, 0x36, 0x40, 0x3d, 0x41,
+    0x40, 0x34, 0x3b, 0x4e, 0x3b, 0x36, 0x3b, 0x45, 0x40, 0x32, 0x3b, 0x49,
+    0x37, 0x38, 0x3a, 0x47, 0x37, 0x40, 0x3e, 0x38, 0x40, 0x3f, 0x3c, 0x3a,
+    0x47, 0x41, 0x42, 0x30, 0x40, 0x3c, 0x42, 0x3f, 0x31, 0x44, 0x39, 0x38,
+    0x3b, 0x38, 0x42, 0x43, 0x41, 0x35, 0x3a, 0x39, 0x3e, 0x38, 0x39, 0x3e,
+    0x3c, 0x42, 0x3d, 0x49, 0x47, 0x3c, 0x3f, 0x35, 0x41, 0x3a, 0x36, 0x43,
+    0x43, 0x3b, 0x39, 0x3b, 0x36, 0x43, 0x43, 0x4e, 0x3e, 0x35, 0x37, 0x3b,
+    0x3f, 0x37, 0x41, 0x48, 0x32, 0x44, 0x43, 0x32, 0x38, 0x39, 0x45, 0x39,
+    0x3e, 0x3d, 0x35, 0x39, 0x35, 0x39, 0x50, 0x37, 0x39, 0x40, 0x43, 0x47,
+    0x32, 0x2a, 0x40, 0x62, 0x24, 0x30, 0x36, 0x3e, 0x41, 0x32, 0x47, 0x58,
+    0x39, 0x36, 0x44, 0x34, 0x26, 0x34, 0x1e, 0x50, 0x3c, 0x3b, 0x3f, 0x42,
+    0x35, 0x3d, 0x2a, 0x4e, 0x40, 0x38, 0x36, 0x31, 0x3a, 0x30, 0x37, 0x4b,
+    0x3c, 0x3b, 0x3b, 0x41, 0x3b, 0x3c, 0x2e, 0x45, 0x44, 0x3f, 0x3b, 0x35,
+    0x3e, 0x33, 0x37, 0x3d, 0x40, 0x39, 0x39, 0x37, 0x40, 0x3e, 0x3a, 0x3e,
+    0x3c, 0x3c, 0x45, 0x40, 0x3c, 0x3f, 0x3a, 0x51, 0x47, 0x3a, 0x34, 0x39,
+    0x3b, 0x34, 0x44, 0x4c, 0x36, 0x3d, 0x3a, 0x35, 0x34, 0x36, 0x38, 0x4b,
+    0x3f, 0x40, 0x3f, 0x3e, 0x40, 0x41, 0x47, 0x43, 0x32, 0x38, 0x46, 0x44,
+    0x46, 0x43, 0x43, 0x37, 0x39, 0x49, 0x37, 0x36, 0x3e, 0x3d, 0x37, 0x3c,
+    0x39, 0x37, 0x34, 0x43, 0x45, 0x32, 0x3a, 0x3a, 0x38, 0x43, 0x3b, 0x40,
+    0x3b, 0x3f, 0x3d, 0x41, 0x40, 0x3d, 0x3a, 0x3b, 0x48, 0x37, 0x3d, 0x41,
+    0x40, 0x3e, 0x38, 0x41, 0x3d, 0x3a, 0x38, 0x49, 0x40, 0x3c, 0x42, 0x41,
+    0x3a, 0x38, 0x38, 0x4c, 0x3e, 0x41, 0x40, 0x3b, 0x3d, 0x3e, 0x3c, 0x46,
+    0x3e, 0x42, 0x41, 0x38, 0x42, 0x42, 0x41, 0x3e, 0x3e, 0x37, 0x3c, 0x43,
+    0x43, 0x3b, 0x54, 0x2b, 0x45, 0x3b, 0x43, 0x41, 0x41, 0x26, 0x3f, 0x60,
+    0x25, 0x2b, 0x2e, 0x3a, 0x40, 0x31, 0x40, 0x49, 0x40, 0x31, 0x46, 0x3c,
+    0x1e, 0x2a, 0x1a, 0x47, 0x33, 0x37, 0x37, 0x34, 0x31, 0x36, 0x25, 0x41,
+    0x2e, 0x36, 0x35, 0x33, 0x33, 0x34, 0x31, 0x45, 0x3a, 0x3f, 0x3d, 0x40,
+    0x3c, 0x41, 0x30, 0x3c, 0x3f, 0x46, 0x37, 0x3c, 0x3a, 0x3c, 0x36, 0x3a,
+    0x47, 0x3d, 0x31, 0x3f, 0x40, 0x3e, 0x36, 0x44, 0x41, 0x3d, 0x36, 0x3f,
+    0x37, 0x3f, 0x34, 0x4b, 0x31, 0x47, 0x43, 0x3e, 0x3e, 0x3a, 0x3b, 0x4b,
+    0x37, 0x32, 0x38, 0x3d, 0x37, 0x47, 0x46, 0x4d, 0x36, 0x3c, 0x3f, 0x3a,
+    0x41, 0x31, 0x47, 0x43, 0x3d, 0x3d, 0x3e, 0x35, 0x3d, 0x46, 0x49, 0x2a,
+    0x37, 0x3c, 0x39, 0x3d, 0x47, 0x3c, 0x34, 0x2c, 0x3e, 0x38, 0x47, 0x32,
+    0x36, 0x36, 0x41, 0x38, 0x35, 0x44, 0x48, 0x3b, 0x39, 0x3e, 0x38, 0x3e,
+    0x40, 0x36, 0x37, 0x46, 0x39, 0x3b, 0x34, 0x45, 0x40, 0x3b, 0x48, 0x36,
+    0x34, 0x44, 0x37, 0x46, 0x3f, 0x42, 0x33, 0x36, 0x43, 0x3c, 0x41, 0x46,
+    0x31, 0x42, 0x43, 0x44, 0x44, 0x3e, 0x42, 0x3b, 0x3b, 0x3a, 0x3c, 0x37,
+    0x42, 0x41, 0x46, 0x38, 0x41, 0x3b, 0x40, 0x44, 0x37, 0x3c, 0x4c, 0x2e,
+    0x3a, 0x3e, 0x3b, 0x36, 0x33, 0x27, 0x37, 0x5d, 0x27, 0x34, 0x32, 0x41,
+    0x41, 0x3f, 0x40, 0x5d, 0x40, 0x3d, 0x48, 0x39, 0x2e, 0x30, 0x1f, 0x3f,
+    0x38, 0x3f, 0x40, 0x33, 0x40, 0x38, 0x31, 0x3f, 0x42, 0x3e, 0x3b, 0x3a,
+    0x42, 0x36, 0x3a, 0x42, 0x3c, 0x3b, 0x3d, 0x41, 0x3d, 0x40, 0x40, 0x3e,
+    0x36, 0x41, 0x47, 0x3d, 0x33, 0x32, 0x33, 0x44, 0x3e, 0x3a, 0x3e, 0x3d,
+    0x45, 0x3f, 0x38, 0x3f, 0x40, 0x3a, 0x3c, 0x46, 0x32, 0x42, 0x3c, 0x51,
+    0x33, 0x38, 0x3a, 0x38, 0x41, 0x34, 0x45, 0x4e, 0x35, 0x3c, 0x42, 0x3e,
+    0x3f, 0x45, 0x44, 0x4e, 0x39, 0x47, 0x3a, 0x33, 0x3e, 0x3b, 0x45, 0x42,
+    0x37, 0x3a, 0x3e, 0x33, 0x41, 0x48, 0x32, 0x2a, 0x3b, 0x37, 0x3f, 0x3d,
+    0x3a, 0x42, 0x41, 0x2f, 0x34, 0x3e, 0x49, 0x3b, 0x38, 0x3e, 0x3d, 0x3a,
+    0x37, 0x3c, 0x44, 0x41, 0x39, 0x42, 0x3f, 0x39, 0x40, 0x35, 0x3d, 0x41,
+    0x3b, 0x45, 0x44, 0x48, 0x3d, 0x42, 0x36, 0x33, 0x3e, 0x44, 0x3f, 0x41,
+    0x42, 0x40, 0x49, 0x34, 0x48, 0x41, 0x3f, 0x40, 0x3c, 0x45, 0x47, 0x34,
+    0x41, 0x37, 0x47, 0x3e, 0x41, 0x41, 0x39, 0x42, 0x3f, 0x3a, 0x46, 0x33,
+    0x39, 0x41, 0x38, 0x38, 0x3e, 0x42, 0x41, 0x38, 0x35, 0x32, 0x33, 0x38,
+    0x3a, 0x3f, 0x45, 0x66, 0x33, 0x47, 0x38, 0x3c, 0x41, 0x2f, 0x48, 0x55,
+    0x33, 0x3e, 0x49, 0x3b, 0x3c, 0x30, 0x24, 0x45, 0x3c, 0x44, 0x43, 0x32,
+    0x3d, 0x3f, 0x35, 0x3b, 0x3e, 0x36, 0x38, 0x3a, 0x36, 0x37, 0x3b, 0x41,
+    0x38, 0x42, 0x3e, 0x43, 0x39, 0x3f, 0x3c, 0x40, 0x37, 0x43, 0x3e, 0x3b,
+    0x3d, 0x35, 0x35, 0x3d, 0x43, 0x3f, 0x3a, 0x35, 0x37, 0x3c, 0x31, 0x47,
+    0x44, 0x45, 0x40, 0x32, 0x44, 0x36, 0x38, 0x51, 0x3c, 0x41, 0x45, 0x37,
+    0x39, 0x44, 0x3e, 0x4f, 0x3c, 0x3a, 0x38, 0x40, 0x3f, 0x34, 0x39, 0x4e,
+    0x3d, 0x39, 0x45, 0x3f, 0x3e, 0x3c, 0x3b, 0x42, 0x3b, 0x3b, 0x34, 0x3d,
+    0x41, 0x44, 0x39, 0x2e, 0x37, 0x44, 0x45, 0x37, 0x3d, 0x41, 0x3f, 0x33,
+    0x3f, 0x3e, 0x3e, 0x40, 0x44, 0x3f, 0x37, 0x32, 0x35, 0x3e, 0x43, 0x41,
+    0x39, 0x37, 0x35, 0x3f, 0x48, 0x3d, 0x43, 0x49, 0x38, 0x35, 0x3f, 0x48,
+    0x3b, 0x3a, 0x34, 0x3f, 0x3c, 0x44, 0x3a, 0x40, 0x36, 0x35, 0x44, 0x36,
+    0x44, 0x3b, 0x3d, 0x38, 0x3c, 0x44, 0x47, 0x3a, 0x3b, 0x45, 0x41, 0x3a,
+    0x39, 0x35, 0x44, 0x3a, 0x49, 0x36, 0x48, 0x31, 0x42, 0x43, 0x42, 0x34,
+    0x41, 0x40, 0x4d, 0x36, 0x3e, 0x35, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x39,
+    0x3c, 0x44, 0x3f, 0x39, 0x3a, 0x36, 0x3d, 0x36, 0x3a, 0x3a, 0x34, 0x3b,
+    0x38, 0x2f, 0x40, 0x34, 0x32, 0x4d, 0x43, 0x45, 0x4e, 0x3f, 0x48, 0x35,
+    0x3b, 0x4d, 0x4f, 0x39, 0x42, 0x36, 0x46, 0x36, 0x4a, 0x3c, 0x37, 0x41,
+    0x40, 0x43, 0x50, 0x36, 0x3e, 0x39, 0x44, 0x40, 0x36, 0x47, 0x3f, 0x36,
+    0x45, 0x40, 0x45, 0x41, 0x3b, 0x37, 0x41, 0x39, 0x3b, 0x48, 0x37, 0x34,
+    0x41, 0x45, 0x49, 0x3f, 0x39, 0x49, 0x3f, 0x3a, 0x42, 0x34, 0x38, 0x37,
+    0x44, 0x34, 0x3c, 0x3d, 0x40, 0x47, 0x3a, 0x36, 0x3f, 0x3c, 0x41, 0x3e,
+    0x47, 0x46, 0x46, 0x43, 0x3f, 0x38, 0x3b, 0x40, 0x3f, 0x48, 0x3b, 0x4c,
+    0x3d, 0x4b, 0x34, 0x3b, 0x44, 0x43, 0x3c, 0x49, 0x38, 0x42, 0x41, 0x36,
+    0x33, 0x36, 0x40, 0x46, 0x40, 0x3a, 0x42, 0x3c, 0x3d, 0x35, 0x3c, 0x52,
+    0x3e, 0x40, 0x43, 0x43, 0x41, 0x3b, 0x3e, 0x44, 0x3f, 0x40, 0x40, 0x43,
+    0x3d, 0x3f, 0x36, 0x42, 0x3f, 0x3c, 0x34, 0x3d, 0x33, 0x41, 0x3c, 0x39,
+    0x34, 0x43, 0x3f, 0x34, 0x3c, 0x3a, 0x3a, 0x37, 0x42, 0x41, 0x40, 0x3e,
+    0x3d, 0x3c, 0x41, 0x3c, 0x38, 0x33, 0x49, 0x46, 0x40, 0x40, 0x3a, 0x46,
+    0x38, 0x3c, 0x37, 0x34, 0x3e, 0x3d, 0x32, 0x38, 0x3c, 0x4c, 0x3a, 0x34,
+    0x35, 0x32, 0x39, 0x40, 0x3a, 0x58, 0x40, 0x46, 0x42, 0x33, 0x45, 0x39,
+    0x34, 0x4f, 0x53, 0x45, 0x43, 0x3e, 0x41, 0x36, 0x3e, 0x3f, 0x40, 0x47,
+    0x4e, 0x3d, 0x53, 0x2b, 0x41, 0x36, 0x3e, 0x38, 0x47, 0x41, 0x3f, 0x34,
+    0x47, 0x40, 0x38, 0x39, 0x3d, 0x42, 0x3f, 0x3c, 0x48, 0x3a, 0x35, 0x3c,
+    0x45, 0x49, 0x3c, 0x33, 0x33, 0x3f, 0x3c, 0x46, 0x43, 0x3f, 0x45, 0x31,
+    0x35, 0x43, 0x46, 0x3a, 0x45, 0x3c, 0x37, 0x3a, 0x37, 0x36, 0x35, 0x3f,
+    0x38, 0x49, 0x34, 0x3f, 0x3c, 0x42, 0x49, 0x3e, 0x3e, 0x3c, 0x39, 0x49,
+    0x3e, 0x3c, 0x3b, 0x43, 0x44, 0x45, 0x39, 0x4b, 0x47, 0x47, 0x3e, 0x33,
+    0x3c, 0x31, 0x34, 0x4f, 0x45, 0x43, 0x40, 0x3d, 0x42, 0x3b, 0x43, 0x50,
+    0x3c, 0x3b, 0x37, 0x42, 0x47, 0x42, 0x3e, 0x4a, 0x3f, 0x3a, 0x48, 0x3d,
+    0x48, 0x45, 0x3e, 0x40, 0x3a, 0x3c, 0x3d, 0x39, 0x41, 0x42, 0x3c, 0x42,
+    0x43, 0x3c, 0x3b, 0x3d, 0x47, 0x49, 0x38, 0x3c, 0x46, 0x3a, 0x3c, 0x3f,
+    0x3a, 0x46, 0x3a, 0x3b, 0x3d, 0x3a, 0x49, 0x46, 0x38, 0x40, 0x3e, 0x38,
+    0x37, 0x32, 0x40, 0x3c, 0x42, 0x3d, 0x3b, 0x40, 0x3a, 0x38, 0x49, 0x33,
+    0x40, 0x38, 0x2b, 0x3a, 0x3c, 0x4f, 0x4d, 0x3e, 0x35, 0x3d, 0x3b, 0x40,
+    0x3a, 0x54, 0x3e, 0x3e, 0x43, 0x30, 0x47, 0x3d, 0x3b, 0x53, 0x52, 0x4a,
+    0x43, 0x41, 0x49, 0x37, 0x3b, 0x35, 0x44, 0x3c, 0x45, 0x40, 0x4f, 0x36,
+    0x4b, 0x42, 0x41, 0x3a, 0x41, 0x44, 0x47, 0x32, 0x43, 0x35, 0x3f, 0x37,
+    0x43, 0x41, 0x43, 0x36, 0x3f, 0x3b, 0x3d, 0x38, 0x3d, 0x40, 0x42, 0x36,
+    0x44, 0x3a, 0x39, 0x47, 0x37, 0x34, 0x42, 0x3a, 0x37, 0x38, 0x37, 0x3f,
+    0x36, 0x3b, 0x45, 0x3f, 0x3f, 0x3d, 0x39, 0x3d, 0x39, 0x41, 0x37, 0x3f,
+    0x3f, 0x3d, 0x3f, 0x41, 0x43, 0x41, 0x45, 0x43, 0x41, 0x3c, 0x3e, 0x40,
+    0x40, 0x39, 0x41, 0x4f, 0x47, 0x42, 0x46, 0x48, 0x3b, 0x3b, 0x3c, 0x46,
+    0x47, 0x3e, 0x46, 0x37, 0x38, 0x3d, 0x38, 0x52, 0x36, 0x46, 0x3c, 0x3a,
+    0x3b, 0x37, 0x48, 0x4b, 0x3f, 0x42, 0x3c, 0x36, 0x40, 0x37, 0x33, 0x4c,
+    0x39, 0x34, 0x41, 0x34, 0x3f, 0x3b, 0x35, 0x4b, 0x3b, 0x45, 0x43, 0x31,
+    0x3e, 0x39, 0x30, 0x3d, 0x32, 0x43, 0x44, 0x3c, 0x3e, 0x38, 0x43, 0x41,
+    0x3e, 0x37, 0x41, 0x39, 0x39, 0x44, 0x43, 0x38, 0x3f, 0x37, 0x48, 0x3f,
+    0x3b, 0x44, 0x37, 0x3f, 0x3a, 0x3f, 0x3b, 0x33, 0x42, 0x3e, 0x2f, 0x42,
+    0x44, 0x4f, 0x52, 0x3c, 0x34, 0x33, 0x39, 0x46, 0x31, 0x55, 0x43, 0x4e,
+    0x49, 0x38, 0x4d, 0x48, 0x34, 0x4d, 0x5c, 0x4d, 0x49, 0x37, 0x4f, 0x40,
+    0x3c, 0x3d, 0x41, 0x42, 0x3f, 0x51, 0x4b, 0x2f, 0x46, 0x35, 0x39, 0x3c,
+    0x49, 0x3d, 0x4e, 0x32, 0x43, 0x47, 0x31, 0x3e, 0x42, 0x4a, 0x4c, 0x39,
+    0x43, 0x46, 0x3e, 0x3f, 0x44, 0x3c, 0x42, 0x30, 0x3e, 0x34, 0x3b, 0x3b,
+    0x3a, 0x3c, 0x42, 0x3d, 0x3d, 0x48, 0x48, 0x36, 0x3a, 0x45, 0x38, 0x40,
+    0x3c, 0x41, 0x3f, 0x49, 0x42, 0x41, 0x38, 0x3d, 0x3d, 0x44, 0x3b, 0x3d,
+    0x35, 0x48, 0x43, 0x3b, 0x32, 0x41, 0x3e, 0x3a, 0x46, 0x41, 0x40, 0x54,
+    0x38, 0x3f, 0x3c, 0x36, 0x3b, 0x36, 0x43, 0x50, 0x38, 0x3c, 0x44, 0x3b,
+    0x43, 0x47, 0x32, 0x50, 0x3d, 0x46, 0x3d, 0x3b, 0x39, 0x37, 0x3b, 0x4a,
+    0x47, 0x43, 0x46, 0x3d, 0x3d, 0x41, 0x43, 0x45, 0x3b, 0x3c, 0x39, 0x47,
+    0x43, 0x42, 0x39, 0x4c, 0x34, 0x41, 0x45, 0x3b, 0x38, 0x3e, 0x37, 0x3f,
+    0x45, 0x43, 0x39, 0x42, 0x3c, 0x3d, 0x3d, 0x3c, 0x48, 0x39, 0x3b, 0x3a,
+    0x46, 0x45, 0x3d, 0x3a, 0x3f, 0x3a, 0x45, 0x36, 0x3d, 0x43, 0x36, 0x43,
+    0x42, 0x3d, 0x41, 0x3f, 0x3a, 0x3f, 0x31, 0x37, 0x48, 0x4f, 0x4e, 0x36,
+    0x30, 0x3a, 0x3e, 0x3e, 0x38, 0x57, 0x40, 0x47, 0x47, 0x38, 0x4f, 0x46,
+    0x3d, 0x4a, 0x50, 0x4c, 0x42, 0x3b, 0x4d, 0x3d, 0x3d, 0x33, 0x40, 0x41,
+    0x48, 0x4b, 0x46, 0x39, 0x4d, 0x30, 0x45, 0x38, 0x48, 0x3c, 0x48, 0x3b,
+    0x4d, 0x40, 0x3b, 0x40, 0x46, 0x41, 0x51, 0x34, 0x40, 0x43, 0x3f, 0x42,
+    0x45, 0x42, 0x3e, 0x35, 0x3d, 0x38, 0x37, 0x3a, 0x42, 0x40, 0x43, 0x3c,
+    0x3c, 0x3d, 0x43, 0x40, 0x45, 0x3a, 0x3e, 0x3a, 0x3e, 0x40, 0x43, 0x35,
+    0x37, 0x3f, 0x3f, 0x3e, 0x39, 0x3f, 0x47, 0x38, 0x3e, 0x44, 0x3b, 0x3c,
+    0x3b, 0x32, 0x40, 0x3e, 0x42, 0x45, 0x3a, 0x52, 0x3a, 0x3e, 0x45, 0x40,
+    0x41, 0x48, 0x3f, 0x4e, 0x3e, 0x42, 0x3d, 0x39, 0x3a, 0x33, 0x3f, 0x4b,
+    0x3e, 0x38, 0x36, 0x3e, 0x31, 0x41, 0x3a, 0x40, 0x3b, 0x37, 0x3f, 0x3e,
+    0x3e, 0x3f, 0x35, 0x44, 0x3d, 0x42, 0x3d, 0x44, 0x42, 0x3f, 0x3e, 0x44,
+    0x3e, 0x45, 0x37, 0x3a, 0x3b, 0x42, 0x3f, 0x41, 0x3b, 0x3f, 0x41, 0x41,
+    0x3e, 0x34, 0x47, 0x39, 0x46, 0x46, 0x37, 0x39, 0x3f, 0x45, 0x39, 0x39,
+    0x3a, 0x40, 0x38, 0x3a, 0x31, 0x34, 0x3a, 0x41, 0x38, 0x41, 0x3a, 0x41,
+    0x44, 0x37, 0x2d, 0x41, 0x43, 0x4d, 0x4b, 0x3b, 0x2c, 0x30, 0x42, 0x3b,
+    0x31, 0x56, 0x43, 0x47, 0x47, 0x38, 0x50, 0x44, 0x40, 0x52, 0x5a, 0x50,
+    0x44, 0x3f, 0x4b, 0x35, 0x3a, 0x36, 0x41, 0x44, 0x47, 0x4e, 0x52, 0x36,
+    0x45, 0x39, 0x38, 0x3c, 0x42, 0x44, 0x40, 0x3b, 0x4b, 0x38, 0x35, 0x35,
+    0x3f, 0x40, 0x4f, 0x39, 0x3d, 0x37, 0x34, 0x3e, 0x41, 0x4c, 0x40, 0x37,
+    0x3d, 0x3b, 0x37, 0x37, 0x40, 0x42, 0x35, 0x39, 0x41, 0x42, 0x3d, 0x34,
+    0x3c, 0x37, 0x3a, 0x3d, 0x46, 0x46, 0x46, 0x3f, 0x44, 0x3d, 0x3c, 0x40,
+    0x3c, 0x3a, 0x3d, 0x3b, 0x3b, 0x41, 0x47, 0x3a, 0x43, 0x43, 0x43, 0x3b,
+    0x3e, 0x3e, 0x42, 0x46, 0x36, 0x37, 0x45, 0x35, 0x3c, 0x3b, 0x31, 0x4b,
+    0x3c, 0x3e, 0x3a, 0x3a, 0x42, 0x42, 0x34, 0x47, 0x37, 0x34, 0x41, 0x3d,
+    0x3e, 0x39, 0x43, 0x47, 0x31, 0x3b, 0x40, 0x3b, 0x42, 0x3d, 0x44, 0x44,
+    0x37, 0x39, 0x44, 0x3b, 0x40, 0x3a, 0x3d, 0x44, 0x3c, 0x40, 0x42, 0x3b,
+    0x40, 0x3e, 0x32, 0x3d, 0x3c, 0x3e, 0x44, 0x3e, 0x47, 0x3d, 0x3f, 0x2e,
+    0x3e, 0x3d, 0x3f, 0x3b, 0x3b, 0x43, 0x43, 0x3c, 0x3a, 0x3c, 0x3a, 0x36,
+    0x38, 0x46, 0x30, 0x3e, 0x3f, 0x35, 0x3e, 0x34, 0x3c, 0x34, 0x32, 0x4a,
+    0x41, 0x48, 0x48, 0x3f, 0x34, 0x37, 0x42, 0x43, 0x36, 0x59, 0x42, 0x3f,
+    0x4b, 0x3d, 0x5d, 0x45, 0x3b, 0x51, 0x51, 0x4c, 0x41, 0x40, 0x4d, 0x36,
+    0x3f, 0x34, 0x39, 0x3d, 0x4a, 0x4b, 0x4f, 0x33, 0x48, 0x32, 0x3c, 0x32,
+    0x48, 0x4c, 0x4d, 0x3a, 0x49, 0x3a, 0x3a, 0x2e, 0x4b, 0x44, 0x4f, 0x33,
+    0x3a, 0x48, 0x34, 0x43, 0x38, 0x45, 0x44, 0x35, 0x3b, 0x3f, 0x40, 0x37,
+    0x35, 0x34, 0x38, 0x3e, 0x41, 0x3e, 0x3b, 0x47, 0x41, 0x47, 0x3c, 0x3c,
+    0x39, 0x40, 0x3e, 0x45, 0x36, 0x41, 0x3f, 0x3f, 0x3c, 0x44, 0x3f, 0x43,
+    0x3d, 0x3c, 0x49, 0x42, 0x3e, 0x3f, 0x48, 0x37, 0x43, 0x37, 0x43, 0x3d,
+    0x32, 0x42, 0x44, 0x39, 0x36, 0x37, 0x40, 0x46, 0x47, 0x3d, 0x3a, 0x42,
+    0x3f, 0x38, 0x37, 0x48, 0x39, 0x40, 0x3c, 0x37, 0x33, 0x38, 0x38, 0x40,
+    0x41, 0x3c, 0x3f, 0x3b, 0x40, 0x3a, 0x47, 0x46, 0x3a, 0x37, 0x42, 0x47,
+    0x3b, 0x3f, 0x3b, 0x40, 0x33, 0x3f, 0x3a, 0x3c, 0x38, 0x3a, 0x36, 0x38,
+    0x36, 0x40, 0x48, 0x42, 0x48, 0x3c, 0x43, 0x36, 0x32, 0x3b, 0x34, 0x39,
+    0x38, 0x46, 0x37, 0x3b, 0x44, 0x34, 0x36, 0x38, 0x3c, 0x43, 0x33, 0x3c,
+    0x3b, 0x45, 0x38, 0x38, 0x44, 0x33, 0x36, 0x4a, 0x46, 0x4c, 0x4a, 0x34,
+    0x36, 0x37, 0x43, 0x42, 0x33, 0x58, 0x43, 0x48, 0x44, 0x38, 0x5f, 0x3f,
+    0x3c, 0x4d, 0x53, 0x52, 0x43, 0x47, 0x52, 0x3e, 0x3b, 0x2d, 0x3b, 0x3a,
+    0x4b, 0x49, 0x53, 0x38, 0x4c, 0x2f, 0x38, 0x31, 0x42, 0x40, 0x48, 0x3f,
+    0x44, 0x3c, 0x3c, 0x34, 0x46, 0x3f, 0x49, 0x3a, 0x43, 0x3d, 0x34, 0x42,
+    0x36, 0x47, 0x51, 0x3c, 0x3d, 0x39, 0x39, 0x3a, 0x3b, 0x35, 0x35, 0x41,
+    0x47, 0x3c, 0x3b, 0x43, 0x3f, 0x45, 0x3e, 0x40, 0x3c, 0x3f, 0x3c, 0x42,
+    0x3b, 0x3e, 0x38, 0x3f, 0x3f, 0x41, 0x39, 0x39, 0x3d, 0x43, 0x4f, 0x3d,
+    0x48, 0x3b, 0x44, 0x45, 0x3d, 0x3b, 0x49, 0x43, 0x44, 0x3d, 0x37, 0x3b,
+    0x3c, 0x45, 0x46, 0x44, 0x35, 0x3e, 0x32, 0x35, 0x34, 0x3b, 0x40, 0x43,
+    0x3e, 0x45, 0x37, 0x3d, 0x3f, 0x43, 0x36, 0x3f, 0x3f, 0x43, 0x39, 0x44,
+    0x3e, 0x3e, 0x45, 0x40, 0x3e, 0x44, 0x3b, 0x3e, 0x42, 0x42, 0x3b, 0x3d,
+    0x3a, 0x40, 0x39, 0x3a, 0x32, 0x36, 0x41, 0x30, 0x39, 0x46, 0x33, 0x3f,
+    0x46, 0x40, 0x3c, 0x31, 0x41, 0x3a, 0x3f, 0x3f, 0x3b, 0x36, 0x3f, 0x38,
+    0x36, 0x3e, 0x35, 0x35, 0x3b, 0x3d, 0x3f, 0x39, 0x46, 0x37, 0x3a, 0x47,
+    0x37, 0x39, 0x2c, 0x55, 0x40, 0x4b, 0x4a, 0x39, 0x35, 0x42, 0x3d, 0x40,
+    0x3a, 0x54, 0x41, 0x48, 0x51, 0x3b, 0x61, 0x3e, 0x3e, 0x4d, 0x51, 0x52,
+    0x3e, 0x43, 0x52, 0x41, 0x48, 0x2d, 0x35, 0x35, 0x4b, 0x44, 0x4d, 0x3c,
+    0x54, 0x33, 0x39, 0x27, 0x4a, 0x44, 0x4a, 0x41, 0x3c, 0x3a, 0x31, 0x2f,
+    0x3d, 0x42, 0x48, 0x3f, 0x42, 0x40, 0x44, 0x3b, 0x40, 0x3e, 0x49, 0x3a,
+    0x3c, 0x35, 0x30, 0x3e, 0x3e, 0x3d, 0x36, 0x3a, 0x3e, 0x3a, 0x4a, 0x3e,
+    0x3d, 0x49, 0x40, 0x43, 0x3e, 0x45, 0x3f, 0x3c, 0x3b, 0x42, 0x3a, 0x39,
+    0x3b, 0x47, 0x3f, 0x39, 0x49, 0x46, 0x3d, 0x34, 0x32, 0x44, 0x46, 0x42,
+    0x47, 0x39, 0x49, 0x48, 0x3b, 0x38, 0x45, 0x45, 0x37, 0x38, 0x46, 0x46,
+    0x37, 0x42, 0x35, 0x34, 0x45, 0x42, 0x35, 0x43, 0x3b, 0x3a, 0x43, 0x43,
+    0x40, 0x42, 0x35, 0x3f, 0x38, 0x3f, 0x3a, 0x3a, 0x3b, 0x3f, 0x3e, 0x36,
+    0x3f, 0x3c, 0x48, 0x3b, 0x3a, 0x41, 0x41, 0x35, 0x33, 0x3f, 0x3b, 0x45,
+    0x48, 0x36, 0x40, 0x38, 0x47, 0x3d, 0x35, 0x40, 0x41, 0x42, 0x41, 0x37,
+    0x41, 0x3e, 0x36, 0x48, 0x3e, 0x3c, 0x32, 0x39, 0x41, 0x40, 0x38, 0x3f,
+    0x46, 0x43, 0x33, 0x40, 0x43, 0x43, 0x3a, 0x49, 0x3f, 0x35, 0x2c, 0x5d,
+    0x43, 0x49, 0x52, 0x3b, 0x3c, 0x41, 0x40, 0x4a, 0x33, 0x50, 0x41, 0x46,
+    0x52, 0x41, 0x68, 0x48, 0x44, 0x53, 0x54, 0x55, 0x42, 0x42, 0x57, 0x44,
+    0x47, 0x35, 0x35, 0x3e, 0x4b, 0x44, 0x4e, 0x38, 0x55, 0x2f, 0x36, 0x2d,
+    0x40, 0x48, 0x4b, 0x41, 0x48, 0x36, 0x32, 0x32, 0x44, 0x42, 0x47, 0x42,
+    0x48, 0x3d, 0x3d, 0x39, 0x3e, 0x35, 0x4b, 0x39, 0x38, 0x3a, 0x39, 0x46,
+    0x38, 0x3f, 0x3a, 0x42, 0x4b, 0x45, 0x3e, 0x32, 0x46, 0x43, 0x3b, 0x40,
+    0x45, 0x41, 0x3e, 0x43, 0x37, 0x3d, 0x43, 0x3b, 0x46, 0x48, 0x42, 0x3b,
+    0x3d, 0x48, 0x4a, 0x3c, 0x3b, 0x42, 0x40, 0x3c, 0x3a, 0x42, 0x38, 0x47,
+    0x3b, 0x3b, 0x3d, 0x41, 0x3f, 0x38, 0x3f, 0x4a, 0x44, 0x3f, 0x47, 0x3a,
+    0x47, 0x44, 0x43, 0x43, 0x34, 0x3d, 0x3a, 0x3c, 0x47, 0x3f, 0x3e, 0x39,
+    0x42, 0x4a, 0x40, 0x36, 0x40, 0x41, 0x42, 0x3f, 0x3f, 0x43, 0x39, 0x38,
+    0x3c, 0x3b, 0x4c, 0x2f, 0x41, 0x39, 0x40, 0x42, 0x3f, 0x42, 0x40, 0x36,
+    0x3b, 0x45, 0x41, 0x41, 0x44, 0x45, 0x42, 0x37, 0x3d, 0x3a, 0x33, 0x3e,
+    0x3b, 0x3b, 0x3c, 0x3d, 0x38, 0x49, 0x44, 0x39, 0x3f, 0x48, 0x3d, 0x41,
+    0x42, 0x43, 0x44, 0x3e, 0x41, 0x3d, 0x32, 0x59, 0x45, 0x4b, 0x4b, 0x38,
+    0x37, 0x3d, 0x48, 0x42, 0x3d, 0x52, 0x43, 0x46, 0x54, 0x48, 0x67, 0x4d,
+    0x45, 0x4e, 0x49, 0x52, 0x45, 0x45, 0x58, 0x3b, 0x41, 0x38, 0x3f, 0x3f,
+    0x49, 0x44, 0x4f, 0x48, 0x57, 0x31, 0x3c, 0x2a, 0x3e, 0x4c, 0x41, 0x40,
+    0x47, 0x3f, 0x33, 0x34, 0x3f, 0x42, 0x48, 0x43, 0x4b, 0x38, 0x39, 0x3d,
+    0x3f, 0x3e, 0x4b, 0x3f, 0x35, 0x36, 0x3c, 0x46, 0x3c, 0x45, 0x37, 0x3b,
+    0x3c, 0x39, 0x41, 0x40, 0x41, 0x43, 0x44, 0x41, 0x45, 0x4f, 0x44, 0x43,
+    0x44, 0x3c, 0x45, 0x34, 0x42, 0x45, 0x3f, 0x46, 0x3f, 0x43, 0x3d, 0x3a,
+    0x39, 0x47, 0x45, 0x3d, 0x3f, 0x3b, 0x3d, 0x42, 0x38, 0x48, 0x48, 0x3b,
+    0x3c, 0x3a, 0x3f, 0x41, 0x44, 0x4b, 0x44, 0x48, 0x41, 0x3c, 0x3d, 0x3c,
+    0x3e, 0x3a, 0x4a, 0x3b, 0x49, 0x35, 0x3a, 0x3d, 0x41, 0x3f, 0x49, 0x39,
+    0x44, 0x37, 0x3f, 0x3c, 0x42, 0x40, 0x4a, 0x46, 0x39, 0x38, 0x46, 0x37,
+    0x41, 0x46, 0x41, 0x45, 0x40, 0x3b, 0x3b, 0x33, 0x3b, 0x39, 0x3c, 0x43,
+    0x37, 0x3c, 0x44, 0x3d, 0x46, 0x39, 0x3c, 0x3c, 0x44, 0x48, 0x41, 0x44,
+    0x41, 0x43, 0x46, 0x3b, 0x47, 0x41, 0x31, 0x41, 0x44, 0x40, 0x43, 0x42,
+    0x3e, 0x43, 0x34, 0x65, 0x4f, 0x50, 0x4d, 0x3a, 0x37, 0x43, 0x4d, 0x4a,
+    0x3d, 0x54, 0x40, 0x42, 0x5b, 0x3b, 0x71, 0x49, 0x44, 0x4f, 0x54, 0x56,
+    0x48, 0x40, 0x52, 0x41, 0x42, 0x38, 0x3c, 0x49, 0x4a, 0x45, 0x51, 0x35,
+    0x54, 0x2f, 0x35, 0x25, 0x4d, 0x3f, 0x4d, 0x43, 0x49, 0x33, 0x32, 0x3a,
+    0x46, 0x48, 0x48, 0x3d, 0x43, 0x3a, 0x3c, 0x3a, 0x48, 0x40, 0x4b, 0x3b,
+    0x45, 0x3b, 0x3f, 0x38, 0x37, 0x41, 0x31, 0x3b, 0x41, 0x43, 0x43, 0x37,
+    0x48, 0x3f, 0x48, 0x37, 0x40, 0x4a, 0x43, 0x45, 0x3d, 0x39, 0x37, 0x37,
+    0x3c, 0x3f, 0x47, 0x48, 0x43, 0x3e, 0x41, 0x3f, 0x3e, 0x38, 0x3e, 0x37,
+    0x45, 0x45, 0x35, 0x44, 0x38, 0x3a, 0x49, 0x43, 0x40, 0x41, 0x40, 0x44,
+    0x3c, 0x3e, 0x40, 0x38, 0x42, 0x41, 0x3c, 0x41, 0x3a, 0x3b, 0x3c, 0x3a,
+    0x49, 0x3c, 0x42, 0x44, 0x3f, 0x39, 0x45, 0x32, 0x45, 0x43, 0x45, 0x39,
+    0x43, 0x41, 0x4b, 0x39, 0x32, 0x3c, 0x3c, 0x36, 0x39, 0x3f, 0x46, 0x32,
+    0x39, 0x35, 0x4f, 0x32, 0x3e, 0x40, 0x3d, 0x3e, 0x3a, 0x39, 0x4c, 0x38,
+    0x43, 0x38, 0x49, 0x3b, 0x33, 0x39, 0x3b, 0x36, 0x36, 0x43, 0x3b, 0x3c,
+    0x32, 0x3c, 0x3a, 0x45, 0x31, 0x3d, 0x37, 0x40, 0x3f, 0x3f, 0x35, 0xff,
+    0x49, 0x4e, 0x4c, 0x3c, 0x36, 0x43, 0x46, 0x45, 0x41, 0x59, 0x44, 0x4a,
+    0x53, 0x44, 0x71, 0x4a, 0x39, 0x4f, 0x50, 0x4b, 0x47, 0x42, 0x5a, 0x3c,
+    0x45, 0x38, 0x3e, 0x42, 0x53, 0x43, 0x52, 0x3a, 0x52, 0x34, 0x31, 0x20,
+    0x49, 0x4e, 0x46, 0x43, 0x4b, 0x3d, 0x2b, 0x27, 0x46, 0x46, 0x47, 0x41,
+    0x42, 0x37, 0x39, 0x38, 0x45, 0x3f, 0x51, 0x3d, 0x48, 0x3f, 0x33, 0x3f,
+    0x38, 0x45, 0x31, 0x38, 0x41, 0x3d, 0x47, 0x39, 0x42, 0x40, 0x4c, 0x3f,
+    0x40, 0x42, 0x41, 0x41, 0x41, 0x42, 0x39, 0x35, 0x3f, 0x46, 0x45, 0x36,
+    0x3f, 0x43, 0x3b, 0x39, 0x41, 0x38, 0x43, 0x37, 0x3d, 0x44, 0x3b, 0x40,
+    0x36, 0x3d, 0x42, 0x41, 0x41, 0x3d, 0x38, 0x4a, 0x40, 0x4a, 0x4c, 0x38,
+    0x3f, 0x40, 0x45, 0x3c, 0x3f, 0x4b, 0x43, 0x41, 0x43, 0x3e, 0x43, 0x3f,
+    0x36, 0x40, 0x40, 0x39, 0x3f, 0x3a, 0x3a, 0x30, 0x41, 0x3c, 0x3c, 0x34,
+    0x46, 0x38, 0x43, 0x34, 0x3a, 0x42, 0x43, 0x42, 0x40, 0x41, 0x49, 0x34,
+    0x35, 0x40, 0x47, 0x3d, 0x3d, 0x3e, 0x4c, 0x33, 0x3c, 0x3b, 0x39, 0x43,
+    0x3a, 0x3e, 0x3b, 0x37, 0x3f, 0x42, 0x31, 0x3d, 0x41, 0x3e, 0x32, 0x47,
+    0x34, 0x41, 0x3d, 0x35, 0x39, 0x40, 0x38, 0x69, 0x4f, 0x4a, 0x49, 0x37,
+    0x37, 0x44, 0x43, 0x46, 0x40, 0x58, 0x43, 0x48, 0x54, 0x46, 0x6c, 0x50,
+    0x3a, 0x50, 0x50, 0x57, 0x47, 0x46, 0x5c, 0x40, 0x40, 0x39, 0x3e, 0x46,
+    0x53, 0x46, 0x5c, 0x36, 0x4f, 0x32, 0x30, 0x2d, 0x4a, 0x48, 0x41, 0x45,
+    0x47, 0x2f, 0x32, 0x2b, 0x43, 0x40, 0x43, 0x3c, 0x40, 0x44, 0x3e, 0x37,
+    0x39, 0x3e, 0x48, 0x42, 0x45, 0x36, 0x47, 0x3f, 0x3b, 0x41, 0x35, 0x35,
+    0x3b, 0x3e, 0x35, 0x43, 0x3e, 0x41, 0x3d, 0x36, 0x41, 0x3c, 0x40, 0x44,
+    0x3d, 0x40, 0x35, 0x32, 0x48, 0x3e, 0x39, 0x42, 0x44, 0x3d, 0x39, 0x3b,
+    0x3b, 0x45, 0x40, 0x4a, 0x3f, 0x41, 0x43, 0x39, 0x42, 0x44, 0x4c, 0x3c,
+    0x3f, 0x3e, 0x3f, 0x43, 0x40, 0x42, 0x4c, 0x3b, 0x3e, 0x3d, 0x49, 0x42,
+    0x40, 0x44, 0x40, 0x34, 0x36, 0x40, 0x45, 0x39, 0x42, 0x40, 0x3e, 0x44,
+    0x45, 0x37, 0x3c, 0x38, 0x3e, 0x49, 0x3e, 0x3c, 0x41, 0x3d, 0x42, 0x32,
+    0x40, 0x45, 0x3e, 0x36, 0x44, 0x3a, 0x4e, 0x38, 0x43, 0x38, 0x40, 0x38,
+    0x49, 0x42, 0x40, 0x3d, 0x42, 0x48, 0x48, 0x3d, 0x41, 0x3a, 0x3f, 0x41,
+    0x38, 0x3c, 0x44, 0x39, 0x3a, 0x32, 0x3a, 0x3e, 0x3d, 0x3b, 0x39, 0x38,
+    0x3a, 0x43, 0x3a, 0x6b, 0x45, 0x50, 0x47, 0x33, 0x38, 0x48, 0x4d, 0x4f,
+    0x39, 0x4b, 0x46, 0x4a, 0x4f, 0x42, 0x6f, 0x4b, 0x40, 0x55, 0x54, 0x50,
+    0x42, 0x47, 0x5e, 0x46, 0x40, 0x34, 0x40, 0x47, 0x52, 0x46, 0x55, 0x3b,
+    0x4f, 0x2b, 0x35, 0x33, 0x4c, 0x44, 0x44, 0x48, 0x47, 0x37, 0x35, 0x27,
+    0x4a, 0x3b, 0x41, 0x40, 0x40, 0x3e, 0x36, 0x39, 0x3e, 0x3c, 0x45, 0x3f,
+    0x4d, 0x41, 0x3d, 0x48, 0x47, 0x46, 0x33, 0x3d, 0x3d, 0x3e, 0x34, 0x3f,
+    0x3e, 0x3a, 0x41, 0x35, 0x3b, 0x3e, 0x42, 0x3c, 0x42, 0x42, 0x40, 0x31,
+    0x37, 0x40, 0x36, 0x42, 0x48, 0x39, 0x3d, 0x3c, 0x3a, 0x43, 0x39, 0x3d,
+    0x47, 0x49, 0x43, 0x3d, 0x45, 0x39, 0x44, 0x37, 0x3e, 0x4d, 0x3d, 0x40,
+    0x3d, 0x4c, 0x4d, 0x44, 0x3c, 0x3d, 0x46, 0x41, 0x41, 0x42, 0x40, 0x40,
+    0x41, 0x3a, 0x3c, 0x3b, 0x3c, 0x44, 0x40, 0x34, 0x44, 0x38, 0x3b, 0x33,
+    0x45, 0x45, 0x44, 0x3f, 0x3e, 0x3a, 0x3b, 0x3b, 0x43, 0x39, 0x3a, 0x45,
+    0x3b, 0x3a, 0x4b, 0x39, 0x3d, 0x38, 0x41, 0x39, 0x42, 0x45, 0x43, 0x40,
+    0x3e, 0x35, 0x44, 0x3f, 0x45, 0x41, 0x40, 0x3e, 0x43, 0x42, 0x37, 0x3a,
+    0x38, 0x35, 0x3a, 0x48, 0x3e, 0x3b, 0x40, 0x38, 0x3c, 0x3c, 0x3b, 0x6a,
+    0x48, 0x4d, 0x4d, 0x34, 0x38, 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x41, 0x4b,
+    0x58, 0x46, 0x71, 0x49, 0x3d, 0x53, 0x44, 0x52, 0x42, 0x3e, 0x57, 0x4c,
+    0x4c, 0x38, 0x40, 0x3b, 0x5c, 0x4c, 0x52, 0x3e, 0x4c, 0x2d, 0x32, 0x37,
+    0x49, 0x3f, 0x41, 0x47, 0x4a, 0x3b, 0x2f, 0x26, 0x45, 0x40, 0x47, 0x42,
+    0x3d, 0x39, 0x2d, 0x2c, 0x3f, 0x45, 0x46, 0x44, 0x48, 0x43, 0x42, 0x48,
+    0x40, 0x41, 0x3b, 0x3b, 0x41, 0x3b, 0x39, 0x40, 0x3b, 0x47, 0x3f, 0x38,
+    0x3f, 0x49, 0x3b, 0x35, 0x40, 0x45, 0x38, 0x35, 0x36, 0x34, 0x3e, 0x3d,
+    0x46, 0x3e, 0x33, 0x38, 0x43, 0x48, 0x3f, 0x45, 0x31, 0x44, 0x38, 0x35,
+    0x3c, 0x41, 0x4b, 0x44, 0x3d, 0x43, 0x38, 0x48, 0x3c, 0x39, 0x4a, 0x42,
+    0x3d, 0x43, 0x3f, 0x49, 0x3e, 0x47, 0x49, 0x41, 0x3b, 0x3c, 0x47, 0x3a,
+    0x3d, 0x40, 0x4a, 0x38, 0x3d, 0x3b, 0x47, 0x3a, 0x36, 0x47, 0x42, 0x46,
+    0x3c, 0x3d, 0x45, 0x3b, 0x48, 0x3f, 0x38, 0x36, 0x39, 0x46, 0x43, 0x3a,
+    0x41, 0x3d, 0x39, 0x39, 0x46, 0x37, 0x3f, 0x3f, 0x3a, 0x46, 0x3f, 0x39,
+    0x49, 0x44, 0x42, 0x3a, 0x3a, 0x43, 0x3e, 0x42, 0x3d, 0x3d, 0x43, 0x40,
+    0x43, 0x3c, 0x3f, 0x43, 0x40, 0x42, 0x3b, 0x57, 0x4a, 0x4f, 0x4a, 0x2d,
+    0x3b, 0x48, 0x45, 0x42, 0x34, 0x4c, 0x3e, 0x4f, 0x4d, 0x40, 0x6c, 0x4b,
+    0x3b, 0x4d, 0x4c, 0x57, 0x49, 0x3d, 0x5d, 0x44, 0x43, 0x29, 0x42, 0x3f,
+    0x5b, 0x47, 0x4f, 0x3e, 0x54, 0x2e, 0x34, 0x34, 0x4b, 0x47, 0x46, 0x46,
+    0x4b, 0x34, 0x36, 0x28, 0x3e, 0x3f, 0x42, 0x40, 0x3b, 0x38, 0x39, 0x42,
+    0x49, 0x3d, 0x49, 0x47, 0x47, 0x3b, 0x43, 0x34, 0x39, 0x36, 0x42, 0x3d,
+    0x37, 0x40, 0x37, 0x38, 0x46, 0x42, 0x49, 0x37, 0x44, 0x3f, 0x38, 0x3e,
+    0x36, 0x32, 0x33, 0x38, 0x40, 0x46, 0x42, 0x34, 0x41, 0x42, 0x3e, 0x38,
+    0x44, 0x3e, 0x3f, 0x43, 0x3f, 0x43, 0x35, 0x3f, 0x4d, 0x3b, 0x43, 0x39,
+    0x40, 0x47, 0x3f, 0x4a, 0x3a, 0x3f, 0x45, 0x45, 0x48, 0x42, 0x3b, 0x47,
+    0x42, 0x4b, 0x47, 0x3e, 0x3c, 0x42, 0x46, 0x39, 0x41, 0x3f, 0x48, 0x33,
+    0x45, 0x34, 0x3d, 0x30, 0x40, 0x4c, 0x40, 0x40, 0x39, 0x37, 0x40, 0x33,
+    0x49, 0x42, 0x45, 0x38, 0x3c, 0x43, 0x45, 0x35, 0x37, 0x33, 0x34, 0x3b,
+    0x3b, 0x38, 0x39, 0x41, 0x42, 0x40, 0x3e, 0x3e, 0x41, 0x33, 0x3a, 0x36,
+    0x40, 0x3a, 0x3c, 0x45, 0x43, 0x3c, 0x40, 0x41, 0x49, 0x47, 0x35, 0x34,
+    0x3a, 0x3d, 0x3a, 0x68, 0x4f, 0x48, 0x43, 0x36, 0x37, 0x3e, 0x45, 0x49,
+    0x3a, 0x4d, 0x41, 0x3d, 0x46, 0x45, 0x65, 0x46, 0x38, 0x4d, 0x4a, 0x53,
+    0x43, 0x41, 0x5d, 0x47, 0x41, 0x34, 0x39, 0x43, 0x4e, 0x48, 0x50, 0x38,
+    0x53, 0x32, 0x30, 0x2e, 0x49, 0x4c, 0x4d, 0x3f, 0x46, 0x38, 0x34, 0x2b,
+    0x44, 0x44, 0x41, 0x41, 0x36, 0x40, 0x3f, 0x32, 0x46, 0x38, 0x50, 0x45,
+    0x3f, 0x3d, 0x3b, 0x36, 0x3b, 0x43, 0x3a, 0x34, 0x36, 0x3f, 0x39, 0x35,
+    0x3c, 0x40, 0x40, 0x37, 0x3c, 0x39, 0x3d, 0x36, 0x48, 0x3d, 0x43, 0x34,
+    0x3b, 0x46, 0x43, 0x41, 0x33, 0x3e, 0x44, 0x3d, 0x44, 0x44, 0x4c, 0x3c,
+    0x37, 0x49, 0x42, 0x35, 0x45, 0x3a, 0x3c, 0x41, 0x3a, 0x45, 0x46, 0x41,
+    0x3c, 0x48, 0x46, 0x36, 0x36, 0x42, 0x3b, 0x46, 0x42, 0x45, 0x44, 0x47,
+    0x3f, 0x44, 0x3a, 0x35, 0x37, 0x46, 0x40, 0x38, 0x40, 0x3d, 0x36, 0x2c,
+    0x34, 0x47, 0x40, 0x38, 0x3f, 0x3f, 0x44, 0x2d, 0x3b, 0x3d, 0x3e, 0x44,
+    0x3c, 0x40, 0x3e, 0x33, 0x3c, 0x3a, 0x49, 0x40, 0x42, 0x42, 0x3a, 0x3b,
+    0x33, 0x3d, 0x3c, 0x43, 0x3e, 0x3d, 0x3a, 0x3a, 0x48, 0x3e, 0x3c, 0x39,
+    0x3f, 0x44, 0x37, 0x40, 0x3f, 0x3c, 0x3e, 0x3d, 0x38, 0x42, 0x34, 0x62,
+    0x51, 0x47, 0x44, 0x3f, 0x32, 0x3c, 0x3f, 0x46, 0x3d, 0x46, 0x3e, 0x45,
+    0x4a, 0x3e, 0x5d, 0x43, 0x45, 0x49, 0x4a, 0x55, 0x41, 0x3c, 0x5a, 0x44,
+    0x43, 0x3b, 0x3c, 0x3a, 0x4b, 0x4e, 0x4d, 0x42, 0x49, 0x30, 0x3b, 0x38,
+    0x42, 0x44, 0x51, 0x40, 0x48, 0x33, 0x3f, 0x2b, 0x3c, 0x41, 0x3c, 0x45,
+    0x35, 0x39, 0x42, 0x37, 0x40, 0x46, 0x46, 0x3f, 0x41, 0x45, 0x42, 0x3d,
+    0x43, 0x38, 0x3e, 0x38, 0x3c, 0x39, 0x40, 0x38, 0x37, 0x36, 0x3d, 0x3d,
+    0x38, 0x47, 0x45, 0x3b, 0x45, 0x44, 0x42, 0x2e, 0x37, 0x40, 0x42, 0x42,
+    0x3c, 0x36, 0x3b, 0x39, 0x44, 0x4d, 0x42, 0x3f, 0x3a, 0x3e, 0x45, 0x34,
+    0x3c, 0x43, 0x47, 0x43, 0x3f, 0x48, 0x3b, 0x44, 0x3d, 0x44, 0x43, 0x3e,
+    0x40, 0x4a, 0x31, 0x42, 0x42, 0x43, 0x48, 0x45, 0x3a, 0x42, 0x36, 0x2f,
+    0x3c, 0x3e, 0x3b, 0x3b, 0x44, 0x3f, 0x3a, 0x2c, 0x47, 0x3f, 0x4a, 0x40,
+    0x40, 0x40, 0x3c, 0x2a, 0x3e, 0x44, 0x40, 0x43, 0x3a, 0x42, 0x39, 0x34,
+    0x49, 0x3e, 0x36, 0x42, 0x3f, 0x42, 0x33, 0x3b, 0x3c, 0x45, 0x39, 0x3f,
+    0x3e, 0x3f, 0x41, 0x3d, 0x32, 0x3b, 0x31, 0x40, 0x3f, 0x44, 0x3c, 0x3f,
+    0x40, 0x46, 0x45, 0x36, 0x36, 0x42, 0x30, 0x57, 0x47, 0x44, 0x48, 0x3f,
+    0x35, 0x37, 0x3f, 0x3f, 0x38, 0x4a, 0x41, 0x46, 0x50, 0x3d, 0x5b, 0x41,
+    0x3e, 0x3c, 0x4a, 0x54, 0x45, 0x41, 0x5b, 0x46, 0x3d, 0x3b, 0x43, 0x33,
+    0x45, 0x4e, 0x43, 0x3b, 0x44, 0x37, 0x37, 0x32, 0x4c, 0x3d, 0x4c, 0x3f,
+    0x49, 0x3b, 0x37, 0x3a, 0x33, 0x43, 0x3f, 0x40, 0x44, 0x36, 0x3b, 0x44,
+    0x45, 0x40, 0x3c, 0x3c, 0x41, 0x44, 0x3b, 0x3d, 0x33, 0x37, 0x3c, 0x35,
+    0x3d, 0x3f, 0x39, 0x38, 0x33, 0x43, 0x3e, 0x39, 0x3b, 0x3e, 0x41, 0x35,
+    0x40, 0x46, 0x43, 0x35, 0x41, 0x3d, 0x32, 0x39, 0x3c, 0x40, 0x3e, 0x3f,
+    0x42, 0x38, 0x3b, 0x45, 0x3a, 0x3d, 0x40, 0x36, 0x3a, 0x40, 0x46, 0x44,
+    0x48, 0x45, 0x3f, 0x3a, 0x45, 0x45, 0x3c, 0x3b, 0x40, 0x4c, 0x39, 0x3a,
+    0x38, 0x39, 0x46, 0x3a, 0x3e, 0x4b, 0x34, 0x39, 0x3d, 0x3f, 0x40, 0x39,
+    0x45, 0x31, 0x45, 0x29, 0x3f, 0x38, 0x3a, 0x3f, 0x38, 0x3b, 0x36, 0x2d,
+    0x43, 0x3d, 0x45, 0x3c, 0x46, 0x3f, 0x40, 0x3c, 0x3a, 0x3e, 0x3d, 0x38,
+    0x3f, 0x3c, 0x3f, 0x42, 0x35, 0x3f, 0x3a, 0x43, 0x3d, 0x43, 0x3d, 0x33,
+    0x3d, 0x48, 0x42, 0x3d, 0x45, 0x46, 0x3d, 0x35, 0x32, 0x44, 0x42, 0x37,
+    0x3d, 0x40, 0x3c, 0x47, 0x4a, 0x45, 0x47, 0x2f, 0x33, 0x36, 0x3f, 0x42,
+    0x38, 0x43, 0x3e, 0x3a, 0x41, 0x3f, 0x5f, 0x3f, 0x48, 0x3a, 0x44, 0x47,
+    0x41, 0x3e, 0x57, 0x42, 0x41, 0x33, 0x34, 0x39, 0x42, 0x44, 0x42, 0x3c,
+    0x49, 0x34, 0x37, 0x33, 0x47, 0x38, 0x43, 0x3d, 0x43, 0x3e, 0x3e, 0x36,
+    0x41, 0x41, 0x37, 0x40, 0x39, 0x3e, 0x3b, 0x3b, 0x3e, 0x41, 0x3d, 0x3b,
+    0x43, 0x3e, 0x39, 0x43, 0x2f, 0x3e, 0x33, 0x40, 0x45, 0x47, 0x30, 0x46,
+    0x3f, 0x3f, 0x37, 0x42, 0x3d, 0x42, 0x43, 0x37, 0x38, 0x3c, 0x35, 0x34,
+    0x41, 0x43, 0x3e, 0x3e, 0x3f, 0x49, 0x35, 0x35, 0x38, 0x36, 0x3a, 0x43,
+    0x38, 0x46, 0x48, 0x36, 0x3f, 0x39, 0x3b, 0x3e, 0x48, 0x47, 0x41, 0x34,
+    0x3b, 0x3c, 0x37, 0x3e, 0x40, 0x41, 0x3b, 0x3d, 0x43, 0x42, 0x3a, 0x39,
+    0x3b, 0x43, 0x38, 0x2b, 0x43, 0x41, 0x48, 0x35, 0x44, 0x44, 0x3e, 0x2c,
+    0x46, 0x40, 0x3e, 0x41, 0x38, 0x34, 0x35, 0x37, 0x34, 0x3f, 0x3d, 0x46,
+    0x33, 0x3c, 0x3c, 0x2e, 0x3b, 0x45, 0x3d, 0x3e, 0x3a, 0x42, 0x3c, 0x36,
+    0x3a, 0x42, 0x39, 0x43, 0x35, 0x39, 0x40, 0x44, 0x47, 0x41, 0x44, 0x3d,
+    0x41, 0x3e, 0x38, 0x39, 0x45, 0x3a, 0x35, 0x43, 0x3f, 0x44, 0x41, 0x49,
+    0x47, 0x3f, 0x44, 0x40, 0x38, 0x43, 0x40, 0x3e, 0x39, 0x42, 0x32, 0x3b,
+    0x42, 0x47, 0x57, 0x37, 0x36, 0x38, 0x43, 0x49, 0x3b, 0x34, 0x54, 0x42,
+    0x3d, 0x3f, 0x3e, 0x3b, 0x38, 0x41, 0x43, 0x3a, 0x44, 0x39, 0x34, 0x2c,
+    0x38, 0x43, 0x4b, 0x3f, 0x40, 0x3e, 0x32, 0x33, 0x3d, 0x44, 0x45, 0x44,
+    0x3e, 0x35, 0x37, 0x39, 0x40, 0x3e, 0x40, 0x3c, 0x34, 0x43, 0x37, 0x40,
+    0x39, 0x3e, 0x3d, 0x43, 0x3a, 0x44, 0x43, 0x44, 0x3d, 0x3b, 0x45, 0x3b,
+    0x3a, 0x3a, 0x3f, 0x37, 0x43, 0x3b, 0x33, 0x35, 0x40, 0x47, 0x3e, 0x3c,
+    0x39, 0x3c, 0x34, 0x29, 0x3c, 0x3e, 0x46, 0x3e, 0x3c, 0x38, 0x3f, 0x2d,
+    0x3d, 0x3d, 0x3f, 0x3f, 0x3d, 0x45, 0x3b, 0x32, 0x39, 0x3f, 0x41, 0x38,
+    0x36, 0x3e, 0x3a, 0x35, 0x40, 0x3f, 0x3b, 0x32, 0x3c, 0x39, 0x3e, 0x35,
+    0x3e, 0x45, 0x34, 0x38, 0x44, 0x39, 0x3f, 0x31, 0x34, 0x39, 0x3f, 0x38,
+    0x44, 0x42, 0x3f, 0x3b, 0x39, 0x3d, 0x39, 0x3b, 0x44, 0x46, 0x38, 0x3d,
+    0x45, 0x37, 0x40, 0x3a, 0x3a, 0x39, 0x35, 0x3c, 0x39, 0x40, 0x47, 0x3e,
+    0x38, 0x42, 0x41, 0x3b, 0x48, 0x3f, 0x3a, 0x3e, 0x3d, 0x3f, 0x32, 0x3b,
+    0x3f, 0x3d, 0x3e, 0x44, 0x43, 0x41, 0x44, 0x47, 0x48, 0x41, 0x41, 0x36,
+    0x3a, 0x33, 0x3c, 0x3c, 0x37, 0x3e, 0x40, 0x34, 0x3f, 0x42, 0x53, 0x40,
+    0x3f, 0x35, 0x3e, 0x46, 0x3a, 0x3e, 0x4b, 0x41, 0x46, 0x32, 0x39, 0x36,
+    0x3b, 0x4f, 0x36, 0x3c, 0x40, 0x3a, 0x40, 0x40, 0x47, 0x3e, 0x49, 0x37,
+    0x3f, 0x31, 0x3e, 0x40, 0x3b, 0x3f, 0x43, 0x44, 0x3a, 0x3d, 0x31, 0x41,
+    0x41, 0x33, 0x43, 0x40, 0x3c, 0x3a, 0x41, 0x40, 0x37, 0x3f, 0x34, 0x3e,
+    0x44, 0x42, 0x3d, 0x3f, 0x3f, 0x34, 0x36, 0x34, 0x31, 0x41, 0x32, 0x39,
+    0x3e, 0x3d, 0x42, 0x35, 0x3e, 0x3a, 0x41, 0x47, 0x3d, 0x42, 0x33, 0x32,
+    0x43, 0x42, 0x36, 0x41, 0x3e, 0x39, 0x46, 0x39, 0x35, 0x3d, 0x3d, 0x40,
+    0x38, 0x44, 0x3d, 0x31, 0x44, 0x39, 0x3a, 0x45, 0x42, 0x41, 0x3d, 0x36,
+    0x3f, 0x3c, 0x39, 0x3d, 0x32, 0x39, 0x42, 0x34, 0x3f, 0x38, 0x44, 0x3c,
+    0x43, 0x45, 0x41, 0x2d, 0x44, 0x42, 0x3d, 0x3f, 0x44, 0x38, 0x3d, 0x35,
+    0x3a, 0x48, 0x40, 0x3b, 0x3d, 0x36, 0x3b, 0x40, 0x3f, 0x3a, 0x3a, 0x3f,
+    0x3c, 0x33, 0x39, 0x3c, 0x3c, 0x38, 0x47, 0x36, 0x3d, 0x41, 0x46, 0x41,
+    0x34, 0x46, 0x48, 0x46, 0x3d, 0x3c, 0x40, 0x43, 0x3d, 0x41, 0x37, 0x3e,
+    0x39, 0x47, 0x3f, 0x39, 0x46, 0x43, 0x3f, 0x41, 0x45, 0x37, 0x40, 0x3a,
+    0x3d, 0x44, 0x3f, 0x3b, 0x3b, 0x40, 0x4f, 0x3d, 0x3d, 0x41, 0x3c, 0x43,
+    0x3e, 0x46, 0x4e, 0x40, 0x3f, 0x34, 0x48, 0x29, 0x45, 0x44, 0x46, 0x41,
+    0x45, 0x32, 0x3e, 0x38, 0x39, 0x3a, 0x3e, 0x3e, 0x4c, 0x34, 0x3c, 0x40,
+    0x4a, 0x44, 0x3d, 0x46, 0x3b, 0x3e, 0x42, 0x42, 0x3a, 0x41, 0x43, 0x41,
+    0x39, 0x3f, 0x3e, 0x3c, 0x36, 0x48, 0x3f, 0x3e, 0x3e, 0x37, 0x3f, 0x3f,
+    0x3b, 0x40, 0x3e, 0x35, 0x32, 0x35, 0x3f, 0x33, 0x3f, 0x38, 0x43, 0x37,
+    0x49, 0x38, 0x37, 0x3c, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x46, 0x37, 0x34,
+    0x34, 0x3b, 0x3d, 0x2f, 0x3a, 0x38, 0x3d, 0x46, 0x3d, 0x3b, 0x3d, 0x38,
+    0x35, 0x37, 0x44, 0x3c, 0x3d, 0x3e, 0x40, 0x3a, 0x40, 0x33, 0x3e, 0x38,
+    0x40, 0x3e, 0x45, 0x37, 0x3f, 0x3b, 0x3c, 0x40, 0x3b, 0x3c, 0x3b, 0x33,
+    0x41, 0x3f, 0x3b, 0x42, 0x31, 0x3b, 0x3a, 0x39, 0x3d, 0x41, 0x39, 0x40,
+    0x43, 0x45, 0x39, 0x3b, 0x3a, 0x42, 0x43, 0x3d, 0x3f, 0x40, 0x47, 0x39,
+    0x37, 0x3f, 0x47, 0x3f, 0x45, 0x41, 0x39, 0x3a, 0x41, 0x38, 0x3c, 0x3c,
+    0x39, 0x40, 0x39, 0x3b, 0x3b, 0x3e, 0x38, 0x3b, 0x37, 0x48, 0x41, 0x3f,
+    0x3e, 0x37, 0x3d, 0x44, 0x3c, 0x3e, 0x40, 0x39, 0x41, 0x42, 0x3d, 0x45,
+    0x3b, 0x3e, 0x4c, 0x3b, 0x3a, 0x3a, 0x3e, 0x47, 0x3c, 0x3f, 0x48, 0x3f,
+    0x46, 0x3f, 0x39, 0x25, 0x44, 0x3a, 0x3b, 0x40, 0x41, 0x39, 0x39, 0x47,
+    0x3b, 0x32, 0x49, 0x42, 0x41, 0x3a, 0x43, 0x41, 0x3e, 0x35, 0x37, 0x3d,
+    0x49, 0x40, 0x45, 0x3b, 0x3c, 0x38, 0x48, 0x3c, 0x3c, 0x35, 0x3f, 0x41,
+    0x41, 0x4c, 0x36, 0x39, 0x37, 0x3d, 0x3b, 0x3e, 0x44, 0x32, 0x3d, 0x3f,
+    0x3a, 0x3b, 0x3a, 0x47, 0x38, 0x42, 0x36, 0x34, 0x43, 0x3f, 0x3e, 0x40,
+    0x34, 0x31, 0x36, 0x33, 0x42, 0x37, 0x41, 0x41, 0x40, 0x3d, 0x3d, 0x37,
+    0x43, 0x3a, 0x3e, 0x44, 0x43, 0x3c, 0x35, 0x38, 0x38, 0x3c, 0x43, 0x36,
+    0x3a, 0x38, 0x40, 0x3f, 0x3d, 0x3e, 0x37, 0x3b, 0x41, 0x3a, 0x3b, 0x3d,
+    0x3c, 0x41, 0x3c, 0x41, 0x47, 0x3f, 0x3f, 0x3b, 0x3d, 0x3f, 0x3b, 0x45,
+    0x38, 0x38, 0x40, 0x38, 0x46, 0x42, 0x39, 0x3d, 0x3d, 0x3b, 0x42, 0x36,
+    0x42, 0x41, 0x3e, 0x3e, 0x36, 0x3f, 0x37, 0x3f, 0x36, 0x48, 0x3b, 0x39,
+    0x3d, 0x3f, 0x43, 0x3e, 0x3c, 0x40, 0x48, 0x46, 0x43, 0x36, 0x42, 0x39,
+    0x46, 0x3c, 0x37, 0x38, 0x49, 0x37, 0x36, 0x39, 0x3e, 0x42, 0x48, 0x3a,
+    0x3c, 0x3e, 0x42, 0x30, 0x3e, 0x34, 0x39, 0x3b, 0x46, 0x61, 0x46, 0x1e,
+    0x4c, 0x3b, 0x40, 0x2d, 0x3c, 0x42, 0x32, 0x30, 0x49, 0x3e, 0x39, 0x34,
+    0x30, 0x40, 0x31, 0x38, 0x40, 0x3d, 0x3c, 0x35, 0x3a, 0x36, 0x40, 0x3b,
+    0x41, 0x40, 0x3b, 0x39, 0x37, 0x37, 0x3f, 0x3b, 0x3c, 0x3a, 0x40, 0x3a,
+    0x36, 0x3c, 0x42, 0x39, 0x3e, 0x36, 0x40, 0x42, 0x39, 0x40, 0x3b, 0x34,
+    0x37, 0x33, 0x36, 0x3f, 0x43, 0x33, 0x33, 0x27, 0x3d, 0x46, 0x40, 0x31,
+    0x38, 0x3e, 0x41, 0x20, 0x3f, 0x39, 0x42, 0x35, 0x35, 0x45, 0x40, 0x1e,
+    0x32, 0x35, 0x32, 0x3c, 0x35, 0x44, 0x46, 0x29, 0x3a, 0x3d, 0x37, 0x42,
+    0x3b, 0x45, 0x3a, 0x26, 0x38, 0x40, 0x30, 0x37, 0x41, 0x40, 0x39, 0x2b,
+    0x49, 0x3f, 0x43, 0x43, 0x40, 0x3a, 0x38, 0x29, 0x43, 0x3a, 0x37, 0x40,
+    0x3f, 0x35, 0x3a, 0x28, 0x36, 0x3e, 0x3f, 0x43, 0x3c, 0x39, 0x42, 0x2c,
+    0x38, 0x42, 0x38, 0x3d, 0x42, 0x38, 0x35, 0x2d, 0x34, 0x38, 0x3d, 0x43,
+    0x46, 0x3e, 0x3c, 0x27, 0x3e, 0x40, 0x46, 0x39, 0x35, 0x3d, 0x42, 0x35,
+    0x42, 0x36, 0x40, 0x3e, 0x3a, 0x3e, 0x3c, 0x37, 0x3a, 0x3c, 0x48, 0x48,
+    0x48, 0x37, 0x3d, 0x38, 0x4b, 0x40, 0x43, 0x3b, 0x41, 0x46, 0x3c, 0x34,
+    0x46, 0x3c, 0x3c, 0x3c, 0x4b, 0x64, 0x4a, 0x22, 0x52, 0x41, 0x42, 0x3b,
+    0x42, 0x4a, 0x34, 0x37, 0x4b, 0x44, 0x3b, 0x4a, 0x38, 0x3f, 0x38, 0x3a,
+    0x40, 0x41, 0x42, 0x3c, 0x33, 0x3e, 0x3c, 0x42, 0x2c, 0x4e, 0x47, 0x3f,
+    0x38, 0x33, 0x39, 0x3f, 0x3b, 0x45, 0x37, 0x3a, 0x42, 0x42, 0x44, 0x3f,
+    0x3c, 0x3c, 0x3e, 0x3d, 0x3c, 0x3c, 0x40, 0x2c, 0x3c, 0x3d, 0x42, 0x39,
+    0x3a, 0x37, 0x43, 0x2a, 0x3d, 0x40, 0x41, 0x41, 0x46, 0x46, 0x42, 0x28,
+    0x39, 0x3c, 0x37, 0x44, 0x46, 0x41, 0x47, 0x2b, 0x44, 0x33, 0x39, 0x3f,
+    0x3f, 0x43, 0x3d, 0x23, 0x3a, 0x43, 0x41, 0x3b, 0x41, 0x42, 0x33, 0x1f,
+    0x43, 0x3e, 0x3d, 0x40, 0x37, 0x33, 0x42, 0x28, 0x3b, 0x38, 0x37, 0x3c,
+    0x34, 0x40, 0x44, 0x2a, 0x3c, 0x3a, 0x41, 0x37, 0x45, 0x3f, 0x3e, 0x26,
+    0x41, 0x40, 0x35, 0x3d, 0x45, 0x3e, 0x3d, 0x29, 0x3c, 0x39, 0x3f, 0x3c,
+    0x3d, 0x39, 0x38, 0x2d, 0x39, 0x38, 0x38, 0x44, 0x3c, 0x3e, 0x38, 0x26,
+    0x40, 0x36, 0x39, 0x38, 0x3f, 0x32, 0x39, 0x35, 0x3d, 0x3e, 0x35, 0x3a,
+    0x3f, 0x3f, 0x31, 0x35, 0x34, 0x45, 0x3e, 0x43, 0x48, 0x3b, 0x37, 0x39,
+    0x4d, 0x46, 0x54, 0x40, 0x41, 0x4e, 0x3d, 0x38, 0x4d, 0x38, 0x3a, 0x3b,
+    0x49, 0x5a, 0x4a, 0x1e, 0x5e, 0x39, 0x38, 0x37, 0x3a, 0x51, 0x3a, 0x3c,
+    0x50, 0x3f, 0x40, 0x42, 0x33, 0x3b, 0x2e, 0x4a, 0x3f, 0x4a, 0x3b, 0x43,
+    0x36, 0x3e, 0x3d, 0x42, 0x39, 0x46, 0x4b, 0x3c, 0x3b, 0x3b, 0x35, 0x3e,
+    0x3d, 0x4b, 0x3f, 0x41, 0x3f, 0x3b, 0x42, 0x42, 0x38, 0x3a, 0x41, 0x3d,
+    0x36, 0x41, 0x37, 0x2f, 0x38, 0x37, 0x3f, 0x34, 0x35, 0x35, 0x45, 0x30,
+    0x31, 0x42, 0x31, 0x3a, 0x3a, 0x3e, 0x3d, 0x23, 0x3f, 0x43, 0x3b, 0x41,
+    0x35, 0x3b, 0x40, 0x25, 0x45, 0x3e, 0x42, 0x3b, 0x31, 0x40, 0x36, 0x28,
+    0x43, 0x42, 0x30, 0x42, 0x32, 0x32, 0x36, 0x2c, 0x35, 0x3a, 0x3d, 0x3a,
+    0x3c, 0x36, 0x3e, 0x30, 0x41, 0x42, 0x38, 0x41, 0x41, 0x3e, 0x3c, 0x23,
+    0x37, 0x40, 0x3c, 0x3e, 0x3e, 0x3a, 0x37, 0x2b, 0x36, 0x40, 0x41, 0x42,
+    0x3e, 0x38, 0x44, 0x22, 0x46, 0x38, 0x33, 0x3b, 0x3a, 0x3a, 0x3a, 0x24,
+    0x36, 0x3b, 0x38, 0x44, 0x34, 0x38, 0x40, 0x28, 0x38, 0x3d, 0x36, 0x44,
+    0x31, 0x3e, 0x37, 0x37, 0x36, 0x3f, 0x47, 0x38, 0x3b, 0x3e, 0x2c, 0x4c,
+    0x36, 0x3c, 0x3b, 0x41, 0x4c, 0x3d, 0x3d, 0x40, 0x49, 0x44, 0x52, 0x3f,
+    0x3b, 0x4d, 0x3c, 0x3a, 0x4f, 0x3b, 0x36, 0x3b, 0x4a, 0x5f, 0x4e, 0x1f,
+    0x57, 0x3c, 0x3d, 0x3d, 0x46, 0x59, 0x42, 0x45, 0x52, 0x3d, 0x3a, 0x41,
+    0x31, 0x39, 0x39, 0x4f, 0x43, 0x4e, 0x3e, 0x37, 0x3a, 0x37, 0x33, 0x47,
+    0x32, 0x45, 0x47, 0x43, 0x31, 0x33, 0x38, 0x43, 0x3e, 0x47, 0x3d, 0x32,
+    0x3b, 0x39, 0x3c, 0x42, 0x3d, 0x47, 0x42, 0x40, 0x3d, 0x3f, 0x3c, 0x34,
+    0x3b, 0x3e, 0x42, 0x3d, 0x43, 0x35, 0x42, 0x2c, 0x35, 0x3d, 0x3c, 0x3d,
+    0x3a, 0x3c, 0x46, 0x25, 0x43, 0x35, 0x3d, 0x39, 0x3a, 0x3c, 0x40, 0x2b,
+    0x33, 0x40, 0x3d, 0x46, 0x45, 0x37, 0x3c, 0x36, 0x43, 0x37, 0x3e, 0x3a,
+    0x3c, 0x47, 0x3f, 0x38, 0x36, 0x3e, 0x3a, 0x42, 0x3c, 0x42, 0x33, 0x39,
+    0x3c, 0x3a, 0x3c, 0x40, 0x48, 0x3b, 0x40, 0x32, 0x37, 0x47, 0x34, 0x38,
+    0x33, 0x3d, 0x49, 0x2d, 0x36, 0x42, 0x3d, 0x3e, 0x47, 0x3c, 0x42, 0x2c,
+    0x3b, 0x31, 0x3f, 0x3c, 0x3d, 0x3c, 0x3f, 0x2b, 0x41, 0x35, 0x33, 0x43,
+    0x47, 0x39, 0x34, 0x2a, 0x3a, 0x3a, 0x40, 0x3d, 0x44, 0x3c, 0x39, 0x34,
+    0x43, 0x40, 0x33, 0x3a, 0x3b, 0x42, 0x38, 0x3b, 0x34, 0x35, 0x40, 0x43,
+    0x4b, 0x41, 0x3d, 0x38, 0x49, 0x44, 0x4d, 0x37, 0x3a, 0x4b, 0x40, 0x39,
+    0x4e, 0x3b, 0x30, 0x38, 0x47, 0x5d, 0x50, 0x1f, 0x54, 0x35, 0x3a, 0x39,
+    0x40, 0x4c, 0x46, 0x42, 0x52, 0x39, 0x39, 0x45, 0x41, 0x3c, 0x30, 0x5b,
+    0x43, 0x4d, 0x4a, 0x3e, 0x31, 0x39, 0x41, 0x4c, 0x36, 0x44, 0x4c, 0x39,
+    0x32, 0x41, 0x47, 0x3e, 0x34, 0x49, 0x45, 0x3b, 0x34, 0x3a, 0x3b, 0x47,
+    0x43, 0x3e, 0x43, 0x32, 0x40, 0x3e, 0x3e, 0x38, 0x37, 0x3e, 0x37, 0x3a,
+    0x3a, 0x40, 0x48, 0x2f, 0x3e, 0x3e, 0x46, 0x3a, 0x3e, 0x35, 0x49, 0x30,
+    0x3a, 0x41, 0x3e, 0x39, 0x34, 0x45, 0x3d, 0x34, 0x48, 0x43, 0x43, 0x42,
+    0x33, 0x39, 0x3b, 0x3f, 0x30, 0x46, 0x41, 0x39, 0x48, 0x3a, 0x3c, 0x3e,
+    0x3f, 0x36, 0x40, 0x3d, 0x43, 0x40, 0x3e, 0x39, 0x44, 0x40, 0x44, 0x3b,
+    0x43, 0x42, 0x39, 0x38, 0x3a, 0x3f, 0x3b, 0x3f, 0x38, 0x3d, 0x34, 0x30,
+    0x34, 0x3d, 0x3f, 0x42, 0x44, 0x3e, 0x34, 0x32, 0x37, 0x46, 0x44, 0x38,
+    0x3c, 0x45, 0x39, 0x2b, 0x41, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x3c, 0x32,
+    0x45, 0x42, 0x3d, 0x46, 0x38, 0x3b, 0x34, 0x35, 0x38, 0x43, 0x3d, 0x34,
+    0x42, 0x3b, 0x38, 0x3d, 0x37, 0x43, 0x3f, 0x39, 0x4e, 0x39, 0x40, 0x3f,
+    0x4d, 0x43, 0x49, 0x3f, 0x36, 0x41, 0x44, 0x39, 0x48, 0x3a, 0x35, 0x39,
+    0x48, 0x59, 0x4e, 0x25, 0x58, 0x39, 0x42, 0x35, 0x43, 0x4e, 0x42, 0x3f,
+    0x4a, 0x43, 0x3b, 0x3f, 0x3b, 0x37, 0x2b, 0x5a, 0x3d, 0x44, 0x3b, 0x40,
+    0x31, 0x38, 0x37, 0x44, 0x32, 0x3e, 0x41, 0x3d, 0x2c, 0x42, 0x42, 0x3c,
+    0x37, 0x45, 0x41, 0x41, 0x3d, 0x39, 0x41, 0x40, 0x3a, 0x46, 0x41, 0x40,
+    0x40, 0x3d, 0x38, 0x31, 0x37, 0x3f, 0x42, 0x38, 0x3f, 0x3c, 0x48, 0x30,
+    0x3e, 0x39, 0x3f, 0x3d, 0x3d, 0x44, 0x52, 0x35, 0x3b, 0x32, 0x42, 0x32,
+    0x3a, 0x43, 0x39, 0x3b, 0x31, 0x43, 0x36, 0x3c, 0x3c, 0x3c, 0x41, 0x45,
+    0x42, 0x49, 0x41, 0x3b, 0x42, 0x3e, 0x41, 0x44, 0x36, 0x41, 0x3f, 0x3c,
+    0x3e, 0x47, 0x45, 0x41, 0x38, 0x41, 0x3f, 0x43, 0x35, 0x32, 0x41, 0x39,
+    0x36, 0x47, 0x35, 0x42, 0x44, 0x3b, 0x3f, 0x34, 0x48, 0x41, 0x43, 0x42,
+    0x36, 0x3e, 0x3c, 0x3d, 0x3d, 0x3b, 0x42, 0x44, 0x3a, 0x44, 0x36, 0x2a,
+    0x41, 0x39, 0x3a, 0x41, 0x46, 0x3c, 0x44, 0x2f, 0x36, 0x39, 0x3b, 0x3f,
+    0x38, 0x45, 0x3c, 0x3c, 0x3e, 0x41, 0x3c, 0x39, 0x3e, 0x40, 0x2f, 0x45,
+    0x3b, 0x41, 0x40, 0x3c, 0x4e, 0x38, 0x3e, 0x48, 0x46, 0x40, 0x48, 0x44,
+    0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x39, 0x37, 0x3a, 0x4e, 0x59, 0x5c, 0x22,
+    0x58, 0x32, 0x38, 0x34, 0x40, 0x4b, 0x43, 0x43, 0x4f, 0x3e, 0x39, 0x40,
+    0x37, 0x3e, 0x2f, 0x55, 0x3f, 0x40, 0x38, 0x3f, 0x3a, 0x33, 0x37, 0x3d,
+    0x34, 0x4c, 0x37, 0x3f, 0x32, 0x39, 0x45, 0x34, 0x44, 0x4c, 0x3f, 0x3b,
+    0x3c, 0x36, 0x36, 0x43, 0x36, 0x47, 0x41, 0x46, 0x41, 0x3e, 0x41, 0x3a,
+    0x43, 0x3a, 0x48, 0x42, 0x42, 0x3e, 0x4c, 0x36, 0x3d, 0x39, 0x43, 0x46,
+    0x3d, 0x42, 0x42, 0x3b, 0x45, 0x43, 0x3c, 0x40, 0x39, 0x37, 0x34, 0x45,
+    0x3f, 0x40, 0x34, 0x38, 0x43, 0x3f, 0x36, 0x47, 0x3f, 0x3b, 0x49, 0x3c,
+    0x3a, 0x3a, 0x42, 0x4c, 0x37, 0x3e, 0x3b, 0x32, 0x47, 0x40, 0x45, 0x4d,
+    0x39, 0x3b, 0x39, 0x40, 0x3e, 0x3c, 0x3d, 0x3a, 0x3d, 0x3b, 0x3e, 0x43,
+    0x3e, 0x3f, 0x3a, 0x3c, 0x41, 0x40, 0x39, 0x3c, 0x3a, 0x38, 0x39, 0x37,
+    0x36, 0x33, 0x43, 0x45, 0x3f, 0x45, 0x41, 0x30, 0x3b, 0x34, 0x3c, 0x39,
+    0x3b, 0x45, 0x37, 0x2e, 0x36, 0x34, 0x36, 0x44, 0x3d, 0x40, 0x3a, 0x3c,
+    0x3d, 0x3b, 0x38, 0x41, 0x42, 0x3a, 0x32, 0x4b, 0x38, 0x3e, 0x41, 0x46,
+    0x57, 0x3a, 0x44, 0x48, 0x47, 0x45, 0x47, 0x3e, 0x43, 0x42, 0x45, 0x3b,
+    0x50, 0x39, 0x37, 0x3f, 0x47, 0x51, 0x5e, 0x22, 0x59, 0x33, 0x3c, 0x37,
+    0x43, 0x50, 0x49, 0x47, 0x46, 0x42, 0x39, 0x44, 0x44, 0x3d, 0x2f, 0x53,
+    0x35, 0x41, 0x40, 0x3d, 0x2d, 0x35, 0x2f, 0x3e, 0x3f, 0x37, 0x38, 0x3e,
+    0x30, 0x45, 0x46, 0x38, 0x33, 0x3c, 0x3e, 0x3b, 0x44, 0x42, 0x47, 0x49,
+    0x43, 0x40, 0x3d, 0x3c, 0x38, 0x43, 0x3e, 0x38, 0x3d, 0x40, 0x36, 0x43,
+    0x43, 0x3e, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x3d, 0x41, 0x39, 0x3e, 0x45,
+    0x39, 0x3d, 0x39, 0x40, 0x42, 0x40, 0x3b, 0x4a, 0x40, 0x41, 0x3f, 0x37,
+    0x43, 0x41, 0x37, 0x4c, 0x3f, 0x3d, 0x38, 0x3a, 0x42, 0x46, 0x43, 0x4d,
+    0x3c, 0x3a, 0x43, 0x3e, 0x3b, 0x3d, 0x46, 0x4a, 0x38, 0x3d, 0x3d, 0x39,
+    0x3e, 0x3c, 0x3b, 0x3e, 0x3a, 0x40, 0x40, 0x34, 0x41, 0x3f, 0x3e, 0x3f,
+    0x47, 0x3c, 0x32, 0x3a, 0x3c, 0x44, 0x3f, 0x42, 0x41, 0x43, 0x3e, 0x3a,
+    0x3b, 0x42, 0x41, 0x39, 0x39, 0x37, 0x39, 0x3e, 0x3d, 0x33, 0x3e, 0x35,
+    0x44, 0x37, 0x40, 0x35, 0x3f, 0x47, 0x37, 0x41, 0x35, 0x38, 0x47, 0x40,
+    0x43, 0x44, 0x2e, 0x48, 0x35, 0x44, 0x41, 0x3c, 0x47, 0x3d, 0x3d, 0x52,
+    0x48, 0x41, 0x44, 0x41, 0x42, 0x4b, 0x3e, 0x3d, 0x4e, 0x32, 0x34, 0x47,
+    0x55, 0x57, 0x5f, 0x22, 0x57, 0x33, 0x40, 0x37, 0x40, 0x4a, 0x4d, 0x47,
+    0x48, 0x38, 0x3e, 0x46, 0x37, 0x42, 0x28, 0x57, 0x38, 0x42, 0x36, 0x43,
+    0x35, 0x37, 0x39, 0x39, 0x42, 0x39, 0x38, 0x3c, 0x35, 0x3c, 0x3c, 0x3a,
+    0x3c, 0x4c, 0x45, 0x3f, 0x43, 0x3d, 0x45, 0x45, 0x40, 0x47, 0x3e, 0x3e,
+    0x3d, 0x4b, 0x49, 0x35, 0x43, 0x3c, 0x36, 0x46, 0x3c, 0x46, 0x42, 0x44,
+    0x3c, 0x42, 0x3d, 0x42, 0x44, 0x3c, 0x4a, 0x40, 0x40, 0x3c, 0x3b, 0x3c,
+    0x35, 0x34, 0x2e, 0x46, 0x38, 0x3d, 0x38, 0x44, 0x41, 0x40, 0x3c, 0x52,
+    0x3b, 0x3d, 0x3b, 0x3f, 0x42, 0x47, 0x44, 0x52, 0x44, 0x44, 0x39, 0x3f,
+    0x43, 0x35, 0x3c, 0x4d, 0x39, 0x3d, 0x3b, 0x37, 0x3e, 0x38, 0x3e, 0x49,
+    0x3a, 0x37, 0x3c, 0x49, 0x40, 0x41, 0x3c, 0x40, 0x3d, 0x38, 0x39, 0x3f,
+    0x44, 0x3e, 0x42, 0x3e, 0x47, 0x40, 0x34, 0x46, 0x48, 0x37, 0x45, 0x3e,
+    0x46, 0x3f, 0x35, 0x39, 0x38, 0x3f, 0x36, 0x2c, 0x40, 0x38, 0x3e, 0x3c,
+    0x32, 0x3c, 0x46, 0x3a, 0x3f, 0x41, 0x36, 0x49, 0x42, 0x38, 0x36, 0x43,
+    0x3d, 0x41, 0x46, 0x35, 0x4f, 0x3a, 0x41, 0x5c, 0x4a, 0x42, 0x4e, 0x42,
+    0x46, 0x54, 0x3f, 0x45, 0x4c, 0x30, 0x33, 0x44, 0x56, 0x5d, 0x68, 0x26,
+    0x60, 0x33, 0x3e, 0x3a, 0x42, 0x49, 0x52, 0x47, 0x51, 0x46, 0x40, 0x47,
+    0x41, 0x3b, 0x1b, 0x4f, 0x3c, 0x45, 0x3d, 0x3d, 0x32, 0x2f, 0x3e, 0x3c,
+    0x3c, 0x3f, 0x3b, 0x3c, 0x2c, 0x3a, 0x41, 0x3c, 0x35, 0x3e, 0x3e, 0x3c,
+    0x3d, 0x3f, 0x3e, 0x40, 0x40, 0x44, 0x42, 0x3c, 0x3c, 0x3c, 0x41, 0x3c,
+    0x3c, 0x3d, 0x3e, 0x3d, 0x3c, 0x3d, 0x4a, 0x46, 0x3f, 0x35, 0x33, 0x43,
+    0x42, 0x41, 0x4d, 0x48, 0x48, 0x44, 0x3e, 0x41, 0x41, 0x36, 0x3c, 0x4c,
+    0x34, 0x47, 0x42, 0x39, 0x3e, 0x43, 0x3a, 0x53, 0x3b, 0x3b, 0x42, 0x3d,
+    0x41, 0x3c, 0x3e, 0x52, 0x3a, 0x44, 0x34, 0x43, 0x3d, 0x3d, 0x3a, 0x50,
+    0x3e, 0x33, 0x41, 0x40, 0x3f, 0x38, 0x43, 0x42, 0x3b, 0x37, 0x3e, 0x43,
+    0x3f, 0x3c, 0x41, 0x49, 0x40, 0x32, 0x40, 0x3e, 0x3b, 0x3e, 0x44, 0x3c,
+    0x35, 0x37, 0x3d, 0x41, 0x34, 0x3f, 0x3a, 0x3c, 0x47, 0x32, 0x41, 0x3d,
+    0x3c, 0x3a, 0x4a, 0x31, 0x43, 0x38, 0x45, 0x37, 0x49, 0x3c, 0x34, 0x3f,
+    0x3d, 0x3d, 0x3d, 0x45, 0x47, 0x3e, 0x37, 0x48, 0x40, 0x3b, 0x45, 0x3d,
+    0x4e, 0x42, 0x3f, 0x57, 0x4b, 0x43, 0x4b, 0x3d, 0x3f, 0x47, 0x4a, 0x43,
+    0x4e, 0x30, 0x38, 0x45, 0x59, 0x60, 0x64, 0x2d, 0x5a, 0x2d, 0x34, 0x35,
+    0x47, 0x54, 0x4e, 0x3f, 0x44, 0x45, 0x3c, 0x43, 0x3d, 0x40, 0x1c, 0x5a,
+    0x36, 0x3f, 0x3a, 0x39, 0x37, 0x3c, 0x32, 0x3b, 0x2d, 0x4a, 0x42, 0x35,
+    0x30, 0x41, 0x43, 0x3d, 0x3d, 0x45, 0x38, 0x36, 0x3e, 0x40, 0x3a, 0x4a,
+    0x34, 0x3d, 0x44, 0x3c, 0x39, 0x3b, 0x52, 0x38, 0x40, 0x3b, 0x3f, 0x3f,
+    0x35, 0x37, 0x46, 0x48, 0x38, 0x3b, 0x40, 0x36, 0x3d, 0x3a, 0x4f, 0x45,
+    0x35, 0x3a, 0x35, 0x33, 0x37, 0x43, 0x42, 0x52, 0x37, 0x3b, 0x3d, 0x42,
+    0x44, 0x3d, 0x48, 0x58, 0x33, 0x3f, 0x41, 0x44, 0x44, 0x3f, 0x3b, 0x52,
+    0x47, 0x39, 0x32, 0x3b, 0x38, 0x35, 0x48, 0x50, 0x34, 0x30, 0x39, 0x43,
+    0x42, 0x40, 0x3b, 0x4b, 0x43, 0x3d, 0x34, 0x44, 0x33, 0x39, 0x44, 0x4b,
+    0x45, 0x3e, 0x3c, 0x3f, 0x3a, 0x3e, 0x3c, 0x45, 0x36, 0x3e, 0x3d, 0x40,
+    0x43, 0x46, 0x37, 0x3d, 0x3b, 0x42, 0x43, 0x3f, 0x3a, 0x41, 0x48, 0x2f,
+    0x3e, 0x39, 0x3a, 0x39, 0x3f, 0x3a, 0x41, 0x40, 0x40, 0x3c, 0x3b, 0x3b,
+    0x3f, 0x40, 0x3e, 0x42, 0x38, 0x3f, 0x38, 0x3c, 0x49, 0x45, 0x3f, 0x62,
+    0x55, 0x47, 0x4c, 0x3c, 0x3c, 0x4a, 0x4c, 0x46, 0x4f, 0x39, 0x3a, 0x3b,
+    0x5e, 0x58, 0x6f, 0x2b, 0x5a, 0x2f, 0x3a, 0x35, 0x4b, 0x47, 0x4a, 0x46,
+    0x45, 0x3e, 0x38, 0x4f, 0x3b, 0x3d, 0x21, 0x4b, 0x3d, 0x40, 0x37, 0x40,
+    0x2d, 0x2c, 0x43, 0x3f, 0x2b, 0x3e, 0x3d, 0x39, 0x2f, 0x39, 0x44, 0x3c,
+    0x39, 0x39, 0x43, 0x3b, 0x3d, 0x3b, 0x44, 0x39, 0x42, 0x42, 0x3e, 0x40,
+    0x3b, 0x42, 0x53, 0x40, 0x32, 0x3d, 0x35, 0x3f, 0x3d, 0x45, 0x48, 0x46,
+    0x3d, 0x43, 0x3c, 0x36, 0x35, 0x39, 0x3d, 0x4a, 0x39, 0x39, 0x3e, 0x41,
+    0x38, 0x36, 0x3b, 0x53, 0x3c, 0x36, 0x32, 0x3b, 0x43, 0x3d, 0x42, 0x57,
+    0x35, 0x2f, 0x38, 0x40, 0x2f, 0x3d, 0x3c, 0x4c, 0x40, 0x2f, 0x3a, 0x36,
+    0x39, 0x3c, 0x3a, 0x51, 0x3d, 0x37, 0x39, 0x3c, 0x42, 0x40, 0x43, 0x52,
+    0x3e, 0x42, 0x3e, 0x45, 0x36, 0x34, 0x42, 0x4b, 0x3a, 0x38, 0x37, 0x3f,
+    0x36, 0x41, 0x3a, 0x45, 0x3e, 0x38, 0x35, 0x41, 0x35, 0x34, 0x37, 0x3c,
+    0x3f, 0x31, 0x3c, 0x35, 0x33, 0x43, 0x36, 0x28, 0x44, 0x42, 0x3e, 0x42,
+    0x3a, 0x41, 0x43, 0x35, 0x3d, 0x3f, 0x40, 0x3e, 0x3d, 0x33, 0x31, 0x41,
+    0x3d, 0x40, 0x3b, 0x40, 0x51, 0x40, 0x3f, 0xfb, 0x51, 0x49, 0x4c, 0x3d,
+    0x44, 0x4e, 0x47, 0x42, 0x50, 0x39, 0x39, 0x40, 0x59, 0x5d, 0x70, 0x2c,
+    0x59, 0x39, 0x38, 0x2f, 0x46, 0x50, 0x51, 0x47, 0x4c, 0x3c, 0x39, 0x48,
+    0x44, 0x3a, 0x1a, 0x51, 0x35, 0x3e, 0x34, 0x3a, 0x3d, 0x2b, 0x41, 0x39,
+    0x37, 0x4d, 0x3e, 0x43, 0x38, 0x3b, 0x3a, 0x35, 0x36, 0x3a, 0x43, 0x39,
+    0x39, 0x3a, 0x46, 0x3b, 0x39, 0x3c, 0x46, 0x36, 0x3e, 0x3d, 0x4b, 0x3d,
+    0x3b, 0x46, 0x3a, 0x41, 0x31, 0x3c, 0x44, 0x4a, 0x37, 0x42, 0x39, 0x43,
+    0x43, 0x3e, 0x40, 0x47, 0x3c, 0x3e, 0x3b, 0x43, 0x34, 0x3a, 0x43, 0x53,
+    0x3f, 0x37, 0x39, 0x37, 0x3e, 0x3b, 0x46, 0x59, 0x37, 0x37, 0x33, 0x3d,
+    0x38, 0x42, 0x36, 0x58, 0x2e, 0x32, 0x2b, 0x45, 0x32, 0x33, 0x36, 0x50,
+    0x41, 0x3f, 0x37, 0x3d, 0x3f, 0x3d, 0x46, 0x49, 0x41, 0x38, 0x33, 0x3d,
+    0x33, 0x32, 0x3a, 0x49, 0x41, 0x41, 0x3d, 0x33, 0x3b, 0x3b, 0x3a, 0x46,
+    0x34, 0x44, 0x3f, 0x3b, 0x2f, 0x3f, 0x32, 0x3c, 0x3f, 0x43, 0x3e, 0x45,
+    0x3a, 0x3c, 0x43, 0x26, 0x46, 0x37, 0x38, 0x3e, 0x36, 0x31, 0x3e, 0x34,
+    0x39, 0x3a, 0x38, 0x42, 0x38, 0x3e, 0x32, 0x42, 0x37, 0x37, 0x3c, 0x3a,
+    0x48, 0x44, 0x3a, 0x68, 0x56, 0x46, 0x4d, 0x47, 0x40, 0x4e, 0x42, 0x46,
+    0x51, 0x40, 0x38, 0x43, 0x58, 0x5d, 0x6a, 0x31, 0x57, 0x32, 0x3c, 0x36,
+    0x49, 0x56, 0x52, 0x48, 0x4b, 0x41, 0x2f, 0x4d, 0x31, 0x43, 0x1b, 0x4c,
+    0x30, 0x44, 0x33, 0x36, 0x2c, 0x3d, 0x45, 0x3a, 0x35, 0x46, 0x3d, 0x39,
+    0x2e, 0x38, 0x3f, 0x37, 0x41, 0x44, 0x46, 0x31, 0x33, 0x46, 0x37, 0x37,
+    0x3f, 0x41, 0x45, 0x30, 0x46, 0x3b, 0x50, 0x3b, 0x40, 0x39, 0x42, 0x43,
+    0x35, 0x37, 0x40, 0x44, 0x3b, 0x41, 0x3d, 0x37, 0x3a, 0x41, 0x3d, 0x46,
+    0x36, 0x41, 0x38, 0x41, 0x38, 0x3d, 0x45, 0x58, 0x3d, 0x3a, 0x3d, 0x44,
+    0x45, 0x38, 0x48, 0x5c, 0x3d, 0x39, 0x43, 0x45, 0x41, 0x3e, 0x4a, 0x56,
+    0x40, 0x33, 0x30, 0x31, 0x42, 0x39, 0x38, 0x56, 0x30, 0x3a, 0x35, 0x3e,
+    0x3f, 0x38, 0x36, 0x47, 0x3c, 0x3a, 0x3d, 0x3f, 0x37, 0x35, 0x3b, 0x4d,
+    0x43, 0x36, 0x39, 0x37, 0x3e, 0x42, 0x3d, 0x3f, 0x40, 0x3f, 0x34, 0x3b,
+    0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a, 0x3a, 0x3c, 0x34, 0x3f, 0x3c, 0x2a,
+    0x49, 0x3b, 0x36, 0x3c, 0x35, 0x46, 0x38, 0x3b, 0x3c, 0x39, 0x38, 0x42,
+    0x39, 0x36, 0x2e, 0x4a, 0x3d, 0x39, 0x3f, 0x3f, 0x4b, 0x45, 0x3e, 0x67,
+    0x4b, 0x4b, 0x49, 0x3e, 0x3f, 0x53, 0x4c, 0x55, 0x47, 0x32, 0x3b, 0x39,
+    0x54, 0x5b, 0x6f, 0x29, 0x5a, 0x34, 0x3e, 0x26, 0x45, 0x52, 0x59, 0x44,
+    0x59, 0x39, 0x3c, 0x47, 0x36, 0x46, 0x16, 0x50, 0x32, 0x46, 0x34, 0x35,
+    0x35, 0x2d, 0x39, 0x38, 0x2c, 0x42, 0x43, 0x3b, 0x32, 0x3f, 0x37, 0x2f,
+    0x34, 0x43, 0x46, 0x3b, 0x3b, 0x41, 0x3c, 0x37, 0x3e, 0x43, 0x4b, 0x36,
+    0x3e, 0x3c, 0x4c, 0x42, 0x40, 0x3f, 0x49, 0x40, 0x3c, 0x40, 0x3c, 0x48,
+    0x35, 0x42, 0x3f, 0x42, 0x44, 0x40, 0x45, 0x4f, 0x3f, 0x3f, 0x40, 0x42,
+    0x3b, 0x3d, 0x49, 0x55, 0x42, 0x39, 0x41, 0x3b, 0x3f, 0x38, 0x44, 0x60,
+    0x34, 0x40, 0x3b, 0x3b, 0x35, 0x3d, 0x41, 0x4e, 0x35, 0x33, 0x30, 0x3a,
+    0x3a, 0x32, 0x42, 0x4f, 0x33, 0x34, 0x2f, 0x38, 0x49, 0x38, 0x40, 0x4c,
+    0x35, 0x38, 0x3e, 0x46, 0x3f, 0x3a, 0x3a, 0x45, 0x3b, 0x34, 0x2e, 0x39,
+    0x32, 0x3e, 0x40, 0x48, 0x35, 0x44, 0x3a, 0x34, 0x3f, 0x35, 0x3b, 0x32,
+    0x40, 0x43, 0x3e, 0x38, 0x3b, 0x43, 0x3c, 0x2b, 0x46, 0x43, 0x40, 0x32,
+    0x42, 0x3b, 0x49, 0x2e, 0x3b, 0x3a, 0x3e, 0x41, 0x3c, 0x3f, 0x31, 0x3b,
+    0x41, 0x33, 0x41, 0x3c, 0x4d, 0x40, 0x38, 0x68, 0x4c, 0x4c, 0x4e, 0x3f,
+    0x3f, 0x54, 0x4a, 0x3d, 0x4c, 0x33, 0x3b, 0x3a, 0x5d, 0x60, 0x71, 0x2b,
+    0x59, 0x33, 0x3c, 0x2c, 0x47, 0x52, 0x4f, 0x51, 0x56, 0x3d, 0x39, 0x44,
+    0x35, 0x41, 0x1b, 0x4a, 0x35, 0x41, 0x37, 0x35, 0x2c, 0x35, 0x37, 0x35,
+    0x38, 0x41, 0x38, 0x3e, 0x3c, 0x40, 0x3c, 0x2f, 0x38, 0x3e, 0x3f, 0x45,
+    0x40, 0x3d, 0x3c, 0x35, 0x3c, 0x46, 0x43, 0x39, 0x37, 0x42, 0x4e, 0x3c,
+    0x42, 0x46, 0x37, 0x33, 0x43, 0x3f, 0x47, 0x4a, 0x3d, 0x3e, 0x40, 0x40,
+    0x40, 0x3f, 0x4b, 0x54, 0x36, 0x3f, 0x37, 0x40, 0x39, 0x39, 0x47, 0x51,
+    0x3d, 0x39, 0x36, 0x36, 0x40, 0x40, 0x41, 0x5a, 0x38, 0x39, 0x42, 0x38,
+    0x40, 0x39, 0x43, 0x50, 0x3a, 0x3a, 0x32, 0x3c, 0x3c, 0x35, 0x44, 0x4a,
+    0x37, 0x35, 0x36, 0x3c, 0x35, 0x30, 0x48, 0x4b, 0x3c, 0x33, 0x37, 0x3e,
+    0x42, 0x3c, 0x42, 0x4e, 0x41, 0x32, 0x3e, 0x33, 0x49, 0x39, 0x3e, 0x42,
+    0x3d, 0x39, 0x37, 0x36, 0x35, 0x41, 0x3e, 0x37, 0x37, 0x3e, 0x3d, 0x38,
+    0x3a, 0x3c, 0x41, 0x29, 0x3c, 0x3b, 0x39, 0x40, 0x43, 0x3d, 0x3e, 0x33,
+    0x3f, 0x3f, 0x3e, 0x43, 0x43, 0x38, 0x38, 0x41, 0x3b, 0x38, 0x35, 0x3a,
+    0x4b, 0x44, 0x44, 0x55, 0x4e, 0x44, 0x4d, 0x49, 0x3e, 0x53, 0x45, 0x3f,
+    0x45, 0x3d, 0x36, 0x36, 0x4f, 0x5b, 0x6b, 0x28, 0x59, 0x34, 0x39, 0x34,
+    0x4f, 0x4d, 0x52, 0x3e, 0x51, 0x34, 0x35, 0x4a, 0x3b, 0x3f, 0x21, 0x45,
+    0x36, 0x3f, 0x38, 0x33, 0x2c, 0x37, 0x32, 0x2f, 0x2b, 0x44, 0x47, 0x3f,
+    0x38, 0x3a, 0x3f, 0x2e, 0x41, 0x3f, 0x3d, 0x41, 0x35, 0x48, 0x43, 0x40,
+    0x33, 0x44, 0x40, 0x38, 0x47, 0x44, 0x4c, 0x3d, 0x41, 0x3b, 0x39, 0x36,
+    0x3e, 0x44, 0x49, 0x48, 0x3c, 0x3b, 0x34, 0x34, 0x3f, 0x3c, 0x42, 0x52,
+    0x43, 0x41, 0x3c, 0x3c, 0x3d, 0x43, 0x48, 0x54, 0x39, 0x35, 0x39, 0x3c,
+    0x43, 0x3c, 0x44, 0x5f, 0x39, 0x3d, 0x38, 0x3f, 0x36, 0x3d, 0x43, 0x58,
+    0x33, 0x3d, 0x43, 0x33, 0x3f, 0x36, 0x39, 0x54, 0x3a, 0x37, 0x2d, 0x46,
+    0x43, 0x41, 0x47, 0x46, 0x3e, 0x42, 0x34, 0x49, 0x3a, 0x3f, 0x38, 0x50,
+    0x3a, 0x3b, 0x42, 0x3a, 0x3e, 0x3c, 0x3b, 0x40, 0x42, 0x45, 0x37, 0x3b,
+    0x2f, 0x3b, 0x46, 0x30, 0x42, 0x3b, 0x3b, 0x44, 0x3b, 0x3e, 0x40, 0x1e,
+    0x33, 0x40, 0x40, 0x3d, 0x39, 0x3a, 0x41, 0x33, 0x45, 0x3e, 0x3c, 0x3f,
+    0x3f, 0x38, 0x31, 0x46, 0x3b, 0x35, 0x42, 0x39, 0x49, 0x3e, 0x3d, 0x66,
+    0x53, 0x3f, 0x44, 0x40, 0x43, 0x45, 0x48, 0x45, 0x49, 0x2d, 0x3e, 0x3a,
+    0x4f, 0x5a, 0x62, 0x27, 0x54, 0x37, 0x35, 0x34, 0x42, 0x50, 0x54, 0x43,
+    0x4d, 0x38, 0x39, 0x48, 0x38, 0x4c, 0x21, 0x3f, 0x40, 0x3a, 0x3a, 0x2f,
+    0x37, 0x2f, 0x29, 0x2c, 0x36, 0x47, 0x3f, 0x41, 0x31, 0x33, 0x3e, 0x32,
+    0x3e, 0x40, 0x42, 0x40, 0x42, 0x3a, 0x46, 0x33, 0x44, 0x40, 0x3c, 0x43,
+    0x3d, 0x41, 0x4d, 0x3d, 0x3c, 0x47, 0x46, 0x43, 0x42, 0x3e, 0x44, 0x4e,
+    0x41, 0x3a, 0x44, 0x38, 0x45, 0x3b, 0x49, 0x4c, 0x40, 0x3f, 0x37, 0x3e,
+    0x3e, 0x46, 0x41, 0x51, 0x3f, 0x39, 0x30, 0x40, 0x3e, 0x38, 0x43, 0x5b,
+    0x33, 0x3e, 0x31, 0x42, 0x3d, 0x2f, 0x49, 0x57, 0x37, 0x31, 0x46, 0x44,
+    0x3e, 0x35, 0x40, 0x55, 0x36, 0x35, 0x3d, 0x3c, 0x38, 0x33, 0x42, 0x52,
+    0x3b, 0x39, 0x34, 0x31, 0x45, 0x34, 0x3c, 0x51, 0x33, 0x39, 0x3c, 0x40,
+    0x36, 0x36, 0x42, 0x3e, 0x37, 0x3e, 0x3b, 0x40, 0x3d, 0x36, 0x41, 0x30,
+    0x42, 0x45, 0x40, 0x49, 0x3d, 0x32, 0x46, 0x26, 0x40, 0x44, 0x3a, 0x3f,
+    0x3d, 0x46, 0x45, 0x31, 0x33, 0x34, 0x3e, 0x37, 0x46, 0x3b, 0x32, 0x3a,
+    0x3d, 0x31, 0x3c, 0x36, 0x50, 0x41, 0x3b, 0x5d, 0x53, 0x42, 0x44, 0x37,
+    0x3e, 0x4d, 0x41, 0x4b, 0x49, 0x2f, 0x35, 0x3a, 0x4e, 0x59, 0x5d, 0x27,
+    0x5c, 0x30, 0x3d, 0x3a, 0x46, 0x50, 0x57, 0x4a, 0x4c, 0x36, 0x37, 0x46,
+    0x48, 0x41, 0x24, 0x49, 0x36, 0x3e, 0x41, 0x45, 0x37, 0x38, 0x2e, 0x2e,
+    0x34, 0x3c, 0x38, 0x41, 0x36, 0x3d, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x3b,
+    0x42, 0x3c, 0x43, 0x38, 0x3e, 0x3d, 0x41, 0x48, 0x47, 0x4c, 0x45, 0x3b,
+    0x37, 0x41, 0x38, 0x41, 0x3d, 0x41, 0x46, 0x4e, 0x36, 0x45, 0x38, 0x39,
+    0x42, 0x42, 0x37, 0x4c, 0x34, 0x46, 0x3c, 0x44, 0x4a, 0x39, 0x45, 0x53,
+    0x3c, 0x3f, 0x41, 0x35, 0x3c, 0x45, 0x4c, 0x5a, 0x44, 0x41, 0x30, 0x35,
+    0x40, 0x39, 0x42, 0x5a, 0x36, 0x36, 0x3a, 0x3b, 0x43, 0x35, 0x3c, 0x56,
+    0x35, 0x38, 0x2b, 0x4a, 0x3c, 0x40, 0x45, 0x54, 0x37, 0x37, 0x3a, 0x44,
+    0x42, 0x3b, 0x3d, 0x4a, 0x3f, 0x37, 0x3b, 0x35, 0x34, 0x3f, 0x40, 0x48,
+    0x45, 0x3e, 0x37, 0x38, 0x41, 0x41, 0x3d, 0x37, 0x43, 0x3d, 0x3d, 0x45,
+    0x3a, 0x38, 0x3f, 0x23, 0x4a, 0x37, 0x42, 0x3c, 0x3f, 0x43, 0x42, 0x33,
+    0x37, 0x39, 0x35, 0x3b, 0x41, 0x36, 0x2f, 0x3b, 0x41, 0x3a, 0x44, 0x3d,
+    0x3e, 0x45, 0x44, 0x50, 0x47, 0x47, 0x48, 0x3c, 0x3f, 0x45, 0x43, 0x3f,
+    0x4a, 0x33, 0x3c, 0x3a, 0x52, 0x52, 0x5a, 0x23, 0x58, 0x31, 0x3b, 0x3b,
+    0x47, 0x44, 0x54, 0x45, 0x42, 0x38, 0x38, 0x40, 0x43, 0x3f, 0x2a, 0x46,
+    0x3b, 0x46, 0x3b, 0x46, 0x35, 0x37, 0x29, 0x35, 0x38, 0x41, 0x3a, 0x31,
+    0x44, 0x41, 0x39, 0x36, 0x45, 0x41, 0x40, 0x3e, 0x40, 0x44, 0x47, 0x37,
+    0x3f, 0x42, 0x49, 0x34, 0x46, 0x3d, 0x4b, 0x3d, 0x42, 0x3b, 0x42, 0x3e,
+    0x41, 0x3b, 0x3f, 0x43, 0x47, 0x45, 0x47, 0x41, 0x40, 0x3a, 0x3d, 0x45,
+    0x40, 0x36, 0x3b, 0x3b, 0x44, 0x37, 0x46, 0x55, 0x35, 0x42, 0x3f, 0x3a,
+    0x41, 0x41, 0x44, 0x5c, 0x31, 0x44, 0x3d, 0x46, 0x39, 0x38, 0x46, 0x59,
+    0x41, 0x3b, 0x3d, 0x39, 0x33, 0x3e, 0x41, 0x58, 0x33, 0x44, 0x34, 0x31,
+    0x48, 0x3e, 0x4d, 0x56, 0x36, 0x3c, 0x37, 0x46, 0x46, 0x38, 0x45, 0x53,
+    0x35, 0x3d, 0x3a, 0x31, 0x42, 0x48, 0x45, 0x44, 0x3b, 0x3b, 0x3c, 0x41,
+    0x3d, 0x42, 0x3f, 0x2f, 0x38, 0x3c, 0x3e, 0x41, 0x44, 0x3a, 0x4a, 0x24,
+    0x37, 0x3e, 0x37, 0x48, 0x40, 0x3f, 0x46, 0x3c, 0x47, 0x4a, 0x38, 0x47,
+    0x34, 0x45, 0x31, 0x42, 0x43, 0x44, 0x3f, 0x3f, 0x49, 0x40, 0x3c, 0x41,
+    0x4d, 0x43, 0x42, 0x39, 0x39, 0x48, 0x41, 0x38, 0x47, 0x3c, 0x3c, 0x42,
+    0x44, 0x55, 0x62, 0x2a, 0x5c, 0x32, 0x3a, 0x37, 0x4c, 0x44, 0x4f, 0x3e,
+    0x4e, 0x42, 0x3a, 0x42, 0x41, 0x4a, 0x35, 0x44, 0x45, 0x3b, 0x43, 0x41,
+    0x33, 0x38, 0x28, 0x36, 0x40, 0x47, 0x3e, 0x3e, 0x3e, 0x39, 0x3a, 0x37,
+    0x44, 0x44, 0x3f, 0x3b, 0x41, 0x3c, 0x45, 0x36, 0x38, 0x3a, 0x3c, 0x42,
+    0x42, 0x3f, 0x59, 0x3c, 0x47, 0x3d, 0x38, 0x3a, 0x42, 0x44, 0x41, 0x46,
+    0x3f, 0x43, 0x48, 0x42, 0x44, 0x35, 0x3f, 0x45, 0x36, 0x3f, 0x38, 0x3a,
+    0x44, 0x3d, 0x3d, 0x4e, 0x3e, 0x45, 0x40, 0x42, 0x3c, 0x33, 0x43, 0x5a,
+    0x38, 0x3e, 0x45, 0x3a, 0x3e, 0x42, 0x45, 0x52, 0x3c, 0x42, 0x3a, 0x38,
+    0x3d, 0x3b, 0x4a, 0x57, 0x38, 0x37, 0x47, 0x44, 0x3e, 0x3c, 0x38, 0x48,
+    0x36, 0x41, 0x3f, 0x41, 0x3a, 0x3a, 0x46, 0x47, 0x42, 0x40, 0x32, 0x33,
+    0x43, 0x37, 0x41, 0x43, 0x3e, 0x40, 0x3d, 0x3a, 0x3e, 0x38, 0x42, 0x30,
+    0x3e, 0x40, 0x46, 0x42, 0x40, 0x44, 0x42, 0x23, 0x31, 0x40, 0x3f, 0x3d,
+    0x3b, 0x33, 0x40, 0x33, 0x41, 0x33, 0x43, 0x41, 0x3a, 0x3e, 0x36, 0x40,
+    0x40, 0x45, 0x37, 0x42, 0x46, 0x42, 0x39, 0x48, 0x44, 0x40, 0x40, 0x45,
+    0x3c, 0x49, 0x41, 0x3f, 0x4c, 0x3d, 0x2f, 0x3f, 0x47, 0x52, 0x54, 0x2c,
+    0x55, 0x42, 0x44, 0x3b, 0x46, 0x4f, 0x48, 0x3c, 0x45, 0x39, 0x3f, 0x4b,
+    0x3f, 0x3f, 0x36, 0x42, 0x41, 0x48, 0x44, 0x44, 0x36, 0x3b, 0x37, 0x40,
+    0x39, 0x49, 0x3a, 0x35, 0x3e, 0x48, 0x31, 0x30, 0x44, 0x38, 0x4c, 0x3c,
+    0x41, 0x3e, 0x46, 0x32, 0x44, 0x3b, 0x42, 0x3c, 0x38, 0x3a, 0x47, 0x3f,
+    0x3a, 0x42, 0x3a, 0x43, 0x40, 0x4b, 0x47, 0x3c, 0x42, 0x46, 0x45, 0x42,
+    0x3c, 0x46, 0x3d, 0x3f, 0x3e, 0x36, 0x38, 0x3e, 0x46, 0x3c, 0x4d, 0x43,
+    0x49, 0x41, 0x48, 0x3c, 0x3d, 0x39, 0x43, 0x58, 0x3a, 0x41, 0x3f, 0x38,
+    0x37, 0x3f, 0x46, 0x5d, 0x3c, 0x3c, 0x39, 0x36, 0x3d, 0x46, 0x43, 0x50,
+    0x3a, 0x47, 0x39, 0x36, 0x41, 0x3f, 0x3e, 0x51, 0x31, 0x36, 0x3e, 0x3c,
+    0x3c, 0x3a, 0x48, 0x41, 0x3a, 0x43, 0x49, 0x3e, 0x42, 0x46, 0x3f, 0x41,
+    0x49, 0x33, 0x42, 0x41, 0x45, 0x40, 0x3d, 0x2b, 0x3d, 0x38, 0x40, 0x37,
+    0x3a, 0x31, 0x45, 0x26, 0x33, 0x3d, 0x3f, 0x39, 0x36, 0x3c, 0x38, 0x33,
+    0x34, 0x3f, 0x35, 0x44, 0x3a, 0x39, 0x32, 0x41, 0x35, 0x40, 0x3c, 0x3b,
+    0x4a, 0x3f, 0x3e, 0x3e, 0x4a, 0x3e, 0x42, 0x35, 0x38, 0x43, 0x3c, 0x37,
+    0x3d, 0x3c, 0x39, 0x43, 0x3f, 0x4e, 0x54, 0x33, 0x4b, 0x37, 0x43, 0x3b,
+    0x43, 0x48, 0x43, 0x42, 0x3d, 0x46, 0x45, 0x49, 0x3a, 0x39, 0x36, 0x4a,
+    0x48, 0x48, 0x37, 0x4b, 0x42, 0x47, 0x34, 0x34, 0x43, 0x42, 0x3a, 0x3d,
+    0x3c, 0x46, 0x34, 0x39, 0x40, 0x3b, 0x3e, 0x3e, 0x37, 0x3d, 0x53, 0x3b,
+    0x48, 0x3c, 0x43, 0x44, 0x3b, 0x3f, 0x57, 0x3d, 0x39, 0x3c, 0x39, 0x3a,
+    0x3e, 0x3f, 0x43, 0x3e, 0x41, 0x47, 0x3c, 0x41, 0x40, 0x41, 0x37, 0x3f,
+    0x3b, 0x43, 0x35, 0x3e, 0x45, 0x40, 0x47, 0x59, 0x41, 0x49, 0x3b, 0x3f,
+    0x47, 0x49, 0x4b, 0x61, 0x39, 0x48, 0x39, 0x3e, 0x44, 0x34, 0x3b, 0x59,
+    0x3c, 0x42, 0x45, 0x35, 0x42, 0x41, 0x39, 0x52, 0x42, 0x3c, 0x3d, 0x3e,
+    0x3d, 0x4a, 0x4a, 0x4d, 0x3c, 0x34, 0x44, 0x3c, 0x41, 0x34, 0x38, 0x46,
+    0x38, 0x45, 0x40, 0x45, 0x40, 0x3a, 0x3d, 0x44, 0x3a, 0x37, 0x3a, 0x3a,
+    0x3b, 0x42, 0x40, 0x34, 0x3b, 0x3c, 0x42, 0x40, 0x3d, 0x32, 0x40, 0x27,
+    0x37, 0x39, 0x37, 0x46, 0x48, 0x31, 0x40, 0x30, 0x42, 0x42, 0x3a, 0x40,
+    0x3d, 0x37, 0x2a, 0x40, 0x41, 0x37, 0x3c, 0x4a, 0x46, 0x45, 0x3d, 0x34,
+    0x48, 0x41, 0x42, 0x3e, 0x3f, 0x39, 0x3c, 0x3a, 0x4f, 0x3b, 0x32, 0x3e,
+    0x43, 0x51, 0x4f, 0x2a, 0x46, 0x3a, 0x3d, 0x3b, 0x40, 0x3d, 0x4c, 0x3c,
+    0x48, 0x40, 0x36, 0x4a, 0x3a, 0x38, 0x42, 0x43, 0x4c, 0x3d, 0x47, 0x47,
+    0x33, 0x3f, 0x2d, 0x37, 0x4a, 0x43, 0x38, 0x3e, 0x49, 0x42, 0x42, 0x3d,
+    0x43, 0x47, 0x41, 0x38, 0x46, 0x37, 0x46, 0x38, 0x47, 0x42, 0x49, 0x3d,
+    0x3b, 0x37, 0x4c, 0x3c, 0x3a, 0x45, 0x3f, 0x37, 0x36, 0x3d, 0x3c, 0x40,
+    0x3e, 0x45, 0x46, 0x41, 0x41, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x37, 0x3f,
+    0x3e, 0x3a, 0x3a, 0x4b, 0x3a, 0x36, 0x3d, 0x3f, 0x38, 0x3f, 0x3c, 0x58,
+    0x40, 0x49, 0x3d, 0x42, 0x38, 0x3a, 0x47, 0x50, 0x3b, 0x49, 0x40, 0x44,
+    0x3e, 0x3c, 0x38, 0x52, 0x3a, 0x3e, 0x44, 0x3c, 0x35, 0x44, 0x3a, 0x47,
+    0x3e, 0x49, 0x3f, 0x47, 0x45, 0x39, 0x3b, 0x46, 0x44, 0x3e, 0x41, 0x46,
+    0x40, 0x41, 0x40, 0x40, 0x3a, 0x35, 0x3e, 0x36, 0x3e, 0x3e, 0x3d, 0x35,
+    0x3b, 0x3c, 0x38, 0x46, 0x3b, 0x3c, 0x41, 0x2c, 0x3f, 0x42, 0x38, 0x3b,
+    0x36, 0x3b, 0x39, 0x40, 0x40, 0x38, 0x36, 0x33, 0x34, 0x42, 0x2f, 0x44,
+    0x41, 0x40, 0x39, 0x35, 0x3b, 0x44, 0x42, 0x2c, 0x41, 0x3b, 0x44, 0x41,
+    0x35, 0x44, 0x3b, 0x34, 0x44, 0x49, 0x36, 0x39, 0x3a, 0x52, 0x4d, 0x2b,
+    0x40, 0x40, 0x3e, 0x39, 0x48, 0x42, 0x3c, 0x44, 0x46, 0x49, 0x3f, 0x54,
+    0x43, 0x40, 0x2e, 0x40, 0x4f, 0x36, 0x3e, 0x3f, 0x38, 0x48, 0x44, 0x3c,
+    0x44, 0x43, 0x41, 0x47, 0x40, 0x46, 0x40, 0x37, 0x41, 0x34, 0x3a, 0x41,
+    0x41, 0x3b, 0x49, 0x39, 0x42, 0x38, 0x3d, 0x39, 0x34, 0x35, 0x43, 0x36,
+    0x3e, 0x44, 0x3f, 0x40, 0x43, 0x40, 0x40, 0x3a, 0x47, 0x42, 0x3e, 0x42,
+    0x46, 0x35, 0x3a, 0x46, 0x3c, 0x3c, 0x3c, 0x3d, 0x3f, 0x40, 0x43, 0x4c,
+    0x3a, 0x37, 0x3f, 0x43, 0x47, 0x38, 0x42, 0x58, 0x42, 0x3b, 0x34, 0x37,
+    0x3e, 0x48, 0x3c, 0x57, 0x44, 0x3c, 0x3d, 0x3a, 0x36, 0x48, 0x3c, 0x51,
+    0x3d, 0x48, 0x45, 0x45, 0x38, 0x45, 0x40, 0x3f, 0x3b, 0x35, 0x3d, 0x3f,
+    0x38, 0x47, 0x39, 0x3b, 0x36, 0x49, 0x43, 0x40, 0x3f, 0x46, 0x38, 0x40,
+    0x3f, 0x3e, 0x39, 0x32, 0x47, 0x42, 0x35, 0x33, 0x39, 0x47, 0x3c, 0x36,
+    0x3b, 0x37, 0x43, 0x35, 0x3b, 0x3b, 0x34, 0x3b, 0x38, 0x3d, 0x3e, 0x3a,
+    0x35, 0x49, 0x38, 0x40, 0x3f, 0x3f, 0x3e, 0x37, 0x43, 0x3b, 0x3e, 0x3e,
+    0x3b, 0x40, 0x44, 0x39, 0x3d, 0x3f, 0x31, 0x42, 0x42, 0x3b, 0x41, 0x3d,
+    0x3e, 0x3c, 0x37, 0x34, 0x48, 0x3d, 0x49, 0x4a, 0x47, 0x36, 0x3a, 0x34,
+    0x37, 0x36, 0x3e, 0x38, 0x33, 0x45, 0x39, 0x44, 0x34, 0x49, 0x3a, 0x3d,
+    0x34, 0x31, 0x31, 0x3d, 0x34, 0x3d, 0x41, 0x3e, 0x49, 0x41, 0x34, 0x3f,
+    0x3a, 0x42, 0x3e, 0x40, 0x3f, 0x33, 0x46, 0x3f, 0x34, 0x39, 0x37, 0x46,
+    0x3e, 0x32, 0x3f, 0x45, 0x45, 0x41, 0x3b, 0x4b, 0x35, 0x35, 0x3b, 0x4a,
+    0x3d, 0x43, 0x3b, 0x44, 0x3c, 0x38, 0x31, 0x43, 0x39, 0x35, 0x41, 0x45,
+    0x37, 0x3e, 0x43, 0x47, 0x39, 0x40, 0x41, 0x41, 0x40, 0x32, 0x37, 0x3e,
+    0x3d, 0x39, 0x3b, 0x49, 0x33, 0x35, 0x38, 0x41, 0x45, 0x37, 0x3c, 0x49,
+    0x3b, 0x34, 0x34, 0x41, 0x3a, 0x3f, 0x3e, 0x47, 0x39, 0x3c, 0x34, 0x3a,
+    0x38, 0x44, 0x40, 0x51, 0x3a, 0x37, 0x3b, 0x3f, 0x3d, 0x3a, 0x45, 0x48,
+    0x3f, 0x46, 0x35, 0x43, 0x38, 0x43, 0x35, 0x4c, 0x42, 0x47, 0x44, 0x3d,
+    0x40, 0x3a, 0x39, 0x4e, 0x3d, 0x37, 0x3c, 0x42, 0x40, 0x48, 0x44, 0x4c,
+    0x31, 0x40, 0x42, 0x3b, 0x45, 0x45, 0x3f, 0x3e, 0x3d, 0x44, 0x3f, 0x31,
+    0x3f, 0x44, 0x45, 0x37, 0x3e, 0x3d, 0x35, 0x3b, 0x2d, 0x44, 0x4a, 0x3a,
+    0x2b, 0x37, 0x38, 0x46, 0x41, 0x39, 0x3c, 0x3c, 0x46, 0x33, 0x36, 0x3c,
+    0x4b, 0x34, 0x49, 0x50, 0x30, 0x3c, 0x33, 0x41, 0x44, 0x33, 0x43, 0x39,
+    0x36, 0x45, 0x33, 0x3b, 0x3d, 0x36, 0x47, 0x30, 0x42, 0x37, 0x49, 0x3e,
+    0x3b, 0x49, 0x3d, 0x3b, 0x3a, 0x41, 0x38, 0x44, 0x42, 0x3b, 0x3f, 0x40,
+    0x46, 0x35, 0x38, 0x3c, 0x48, 0x3a, 0x46, 0x41, 0x36, 0x36, 0x41, 0x3e,
+    0x43, 0x3e, 0x32, 0x39, 0x3a, 0x41, 0x30, 0x3e, 0x40, 0x3e, 0x36, 0x3a,
+    0x45, 0x45, 0x3a, 0x3c, 0x31, 0x3b, 0x47, 0x3f, 0x36, 0x3a, 0x3c, 0x41,
+    0x3b, 0x41, 0x39, 0x46, 0x3f, 0x3c, 0x34, 0x3e, 0x41, 0x45, 0x41, 0x42,
+    0x39, 0x40, 0x40, 0x44, 0x45, 0x42, 0x34, 0x3f, 0x3e, 0x31, 0x3b, 0x41,
+    0x33, 0x43, 0x37, 0x44, 0x44, 0x3a, 0x36, 0x36, 0x48, 0x3c, 0x37, 0x47,
+    0x39, 0x3e, 0x3e, 0x3c, 0x3c, 0x41, 0x3c, 0x44, 0x3b, 0x42, 0x3f, 0x3a,
+    0x43, 0x3b, 0x3e, 0x48, 0x36, 0x3f, 0x3d, 0x34, 0x40, 0x43, 0x35, 0x4f,
+    0x34, 0x39, 0x3b, 0x41, 0x40, 0x39, 0x37, 0x4c, 0x39, 0x36, 0x39, 0x39,
+    0x47, 0x41, 0x43, 0x3f, 0x3f, 0x33, 0x42, 0x3f, 0x42, 0x40, 0x37, 0x40,
+    0x3f, 0x34, 0x45, 0x3d, 0x2d, 0x3c, 0x44, 0x3b, 0x43, 0x37, 0x26, 0x50,
+    0x43, 0x44, 0x3d, 0x43, 0x42, 0x2d, 0x3c, 0x33, 0x4a, 0x32, 0x4a, 0x53,
+    0x33, 0x38, 0x27, 0x36, 0x42, 0x30, 0x47, 0x3d, 0x36, 0x45, 0x46, 0x36,
+    0x3b, 0x3b, 0x40, 0x33, 0x37, 0x36, 0x44, 0x46, 0x3d, 0x35, 0x40, 0x38,
+    0x3b, 0x40, 0x36, 0x3c, 0x3d, 0x37, 0x31, 0x41, 0x33, 0x3c, 0x38, 0x3f,
+    0x43, 0x3a, 0x40, 0x49, 0x38, 0x39, 0x38, 0x3d, 0x43, 0x3d, 0x39, 0x3b,
+    0x3d, 0x3f, 0x38, 0x42, 0x34, 0x43, 0x33, 0x3e, 0x43, 0x3e, 0x40, 0x42,
+    0x3b, 0x45, 0x37, 0x44, 0x43, 0x39, 0x3c, 0x3d, 0x37, 0x44, 0x3a, 0x3b,
+    0x47, 0x3f, 0x3a, 0x3c, 0x3a, 0x3b, 0x3f, 0x43, 0x3e, 0x3d, 0x46, 0x3e,
+    0x37, 0x36, 0x3f, 0x40, 0x42, 0x42, 0x37, 0x36, 0x48, 0x35, 0x44, 0x44,
+    0x39, 0x3c, 0x3b, 0x41, 0x44, 0x49, 0x3a, 0x40, 0x41, 0x36, 0x33, 0x3a,
+    0x3c, 0x3d, 0x40, 0x3f, 0x43, 0x36, 0x3c, 0x3a, 0x3f, 0x4b, 0x32, 0x49,
+    0x49, 0x3e, 0x3a, 0x3e, 0x3f, 0x41, 0x3c, 0x47, 0x40, 0x41, 0x45, 0x3e,
+    0x47, 0x47, 0x3f, 0x47, 0x45, 0x3e, 0x31, 0x43, 0x4a, 0x44, 0x36, 0x40,
+    0x41, 0x47, 0x3e, 0x42, 0x37, 0x40, 0x3b, 0x46, 0x37, 0x41, 0x3e, 0x3c,
+    0x27, 0x40, 0x49, 0x42, 0x42, 0x39, 0x30, 0x49, 0x43, 0x38, 0x3d, 0x42,
+    0x43, 0x2f, 0x3b, 0x37, 0x4b, 0x2d, 0x4f, 0x52, 0x30, 0x31, 0x2f, 0x3a,
+    0x49, 0x38, 0x4f, 0x45, 0x2e, 0x47, 0x3a, 0x32, 0x33, 0x3f, 0x4a, 0x2e,
+    0x33, 0x3b, 0x3e, 0x3e, 0x49, 0x45, 0x44, 0x38, 0x3c, 0x35, 0x45, 0x47,
+    0x41, 0x3b, 0x3c, 0x48, 0x46, 0x39, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x42,
+    0x3d, 0x46, 0x33, 0x41, 0x36, 0x3f, 0x3f, 0x3c, 0x33, 0x3e, 0x3e, 0x40,
+    0x44, 0x40, 0x3c, 0x38, 0x46, 0x3a, 0x40, 0x36, 0x42, 0x35, 0x3f, 0x3b,
+    0x3b, 0x43, 0x3c, 0x40, 0x40, 0x49, 0x2e, 0x39, 0x40, 0x3f, 0x45, 0x41,
+    0x3f, 0x30, 0x42, 0x3d, 0x40, 0x3c, 0x3a, 0x3b, 0x3b, 0x40, 0x39, 0x42,
+    0x3a, 0x3f, 0x3f, 0x3e, 0x35, 0x3b, 0x38, 0x45, 0x47, 0x35, 0x44, 0x3e,
+    0x3b, 0x3f, 0x3f, 0x40, 0x3a, 0x35, 0x30, 0x49, 0x45, 0x35, 0x3b, 0x39,
+    0x3b, 0x48, 0x3f, 0x37, 0x39, 0x40, 0x43, 0x45, 0x3d, 0x40, 0x41, 0x3a,
+    0x33, 0x3d, 0x3a, 0x4b, 0x40, 0x42, 0x40, 0x42, 0x43, 0x39, 0x3c, 0x49,
+    0x3e, 0x47, 0x3e, 0x44, 0x3f, 0x3a, 0x40, 0x41, 0x3f, 0x42, 0x42, 0x37,
+    0x3e, 0x3b, 0x36, 0x3e, 0x3b, 0x3c, 0x48, 0x43, 0x2d, 0x46, 0x4a, 0x38,
+    0x45, 0x3a, 0x29, 0x46, 0x40, 0x3c, 0x40, 0x44, 0x40, 0x33, 0x2f, 0x33,
+    0x48, 0x2e, 0x51, 0x4f, 0x2b, 0x32, 0x2e, 0x2d, 0x45, 0x33, 0x4d, 0x41,
+    0x29, 0x4b, 0x41, 0x39, 0x2f, 0x3a, 0x49, 0x31, 0x37, 0x40, 0x47, 0x4c,
+    0x3e, 0x31, 0x41, 0x3f, 0x43, 0x37, 0x45, 0x4f, 0x41, 0x3c, 0x30, 0x4a,
+    0x37, 0x37, 0x36, 0x39, 0x31, 0x3d, 0x36, 0x4b, 0x37, 0x44, 0x3c, 0x43,
+    0x44, 0x36, 0x3f, 0x3b, 0x34, 0x3e, 0x3a, 0x35, 0x38, 0x3f, 0x33, 0x37,
+    0x3b, 0x3d, 0x46, 0x38, 0x3b, 0x37, 0x38, 0x3b, 0x31, 0x3e, 0x3d, 0x3b,
+    0x3d, 0x39, 0x35, 0x33, 0x33, 0x3c, 0x39, 0x39, 0x48, 0x39, 0x39, 0x3f,
+    0x3e, 0x36, 0x47, 0x3a, 0x44, 0x40, 0x32, 0x3c, 0x37, 0x35, 0x40, 0x3f,
+    0x3a, 0x38, 0x3b, 0x3d, 0x46, 0x45, 0x36, 0x43, 0x40, 0x3d, 0x41, 0x41,
+    0x47, 0x3a, 0x3d, 0x3e, 0x43, 0x42, 0x32, 0x36, 0x41, 0x37, 0x3b, 0x35,
+    0x36, 0x44, 0x36, 0x3c, 0x43, 0x32, 0x3e, 0x3e, 0x42, 0x45, 0x32, 0x3c,
+    0x3a, 0x3b, 0x35, 0x43, 0x41, 0x3d, 0x44, 0x50, 0x43, 0x31, 0x3e, 0x44,
+    0x44, 0x41, 0x3a, 0x44, 0x36, 0x39, 0x3b, 0x3c, 0x32, 0x38, 0x3b, 0x45,
+    0x38, 0x43, 0x40, 0x42, 0x33, 0x3e, 0x4a, 0x42, 0x45, 0x39, 0x2f, 0x42,
+    0x39, 0x35, 0x44, 0x3e, 0x39, 0x2f, 0x34, 0x33, 0x49, 0x29, 0x50, 0x4f,
+    0x2b, 0x36, 0x34, 0x2d, 0x47, 0x33, 0x49, 0x3c, 0x33, 0x51, 0x49, 0x3f,
+    0x34, 0x39, 0x4a, 0x2c, 0x34, 0x45, 0x4f, 0x47, 0x34, 0x42, 0x3a, 0x3d,
+    0x36, 0x4a, 0x3b, 0x43, 0x36, 0x3f, 0x39, 0x4b, 0x38, 0x3a, 0x31, 0x3d,
+    0x32, 0x42, 0x3a, 0x47, 0x48, 0x3e, 0x44, 0x3f, 0x39, 0x3e, 0x44, 0x35,
+    0x41, 0x3c, 0x45, 0x3a, 0x3e, 0x3b, 0x3d, 0x2f, 0x37, 0x40, 0x3e, 0x43,
+    0x39, 0x39, 0x33, 0x3b, 0x37, 0x3b, 0x37, 0x37, 0x37, 0x39, 0x36, 0x31,
+    0x39, 0x3b, 0x41, 0x39, 0x3b, 0x40, 0x36, 0x37, 0x42, 0x39, 0x3a, 0x46,
+    0x3f, 0x30, 0x38, 0x39, 0x35, 0x32, 0x3e, 0x3a, 0x43, 0x43, 0x3e, 0x33,
+    0x42, 0x3f, 0x41, 0x3c, 0x46, 0x34, 0x34, 0x40, 0x43, 0x37, 0x32, 0x43,
+    0x3c, 0x37, 0x36, 0x33, 0x3d, 0x36, 0x3a, 0x40, 0x39, 0x38, 0x32, 0x3e,
+    0x32, 0x3d, 0x37, 0x49, 0x42, 0x47, 0x41, 0x3b, 0x3d, 0x3c, 0x3a, 0x37,
+    0x3c, 0x45, 0x3a, 0x45, 0x36, 0x44, 0x3a, 0x3a, 0x3a, 0x3c, 0x43, 0x3b,
+    0x3b, 0x35, 0x38, 0x47, 0x36, 0x40, 0x32, 0x43, 0x3e, 0x39, 0x42, 0x40,
+    0x2c, 0x3c, 0x4c, 0x4c, 0x43, 0x3b, 0x37, 0x4a, 0x3f, 0x3c, 0x45, 0x44,
+    0x3f, 0x30, 0x36, 0x31, 0x4f, 0x2f, 0x5d, 0x4b, 0x34, 0x34, 0x2d, 0x2b,
+    0x44, 0x31, 0x4e, 0x40, 0x2e, 0x4d, 0x48, 0x3e, 0x37, 0x2b, 0x49, 0x25,
+    0x31, 0x49, 0x44, 0x49, 0x39, 0x39, 0x4b, 0x3a, 0x3a, 0x41, 0x3e, 0x42,
+    0x3c, 0x36, 0x36, 0x4a, 0x32, 0x44, 0x3e, 0x48, 0x3e, 0x3c, 0x37, 0x49,
+    0x3d, 0x34, 0x3f, 0x37, 0x33, 0x36, 0x46, 0x3a, 0x3a, 0x31, 0x45, 0x3f,
+    0x3a, 0x31, 0x3b, 0x33, 0x41, 0x42, 0x35, 0x39, 0x38, 0x44, 0x36, 0x3a,
+    0x3f, 0x3b, 0x37, 0x3e, 0x3b, 0x38, 0x2f, 0x32, 0x44, 0x3d, 0x44, 0x41,
+    0x39, 0x36, 0x3a, 0x34, 0x39, 0x38, 0x34, 0x3f, 0x3b, 0x37, 0x34, 0x34,
+    0x40, 0x3d, 0x34, 0x3a, 0x46, 0x42, 0x3f, 0x34, 0x38, 0x33, 0x39, 0x44,
+    0x3f, 0x41, 0x3c, 0x31, 0x40, 0x32, 0x3f, 0x37, 0x37, 0x41, 0x3e, 0x35,
+    0x37, 0x48, 0x3b, 0x41, 0x3d, 0x3a, 0x3f, 0x35, 0x33, 0x3c, 0x36, 0x3b,
+    0x3a, 0x48, 0x33, 0x42, 0x37, 0x33, 0x39, 0x41, 0x3c, 0x3d, 0x3b, 0x4d,
+    0x39, 0x3a, 0x3e, 0x44, 0x3d, 0x41, 0x3b, 0x38, 0x49, 0x41, 0x3a, 0x38,
+    0x34, 0x38, 0x38, 0x3c, 0x45, 0x3c, 0x37, 0x3b, 0x36, 0x3e, 0x4a, 0x4b,
+    0x42, 0x3f, 0x32, 0x45, 0x46, 0x35, 0x46, 0x41, 0x38, 0x33, 0x39, 0x37,
+    0x44, 0x2b, 0x60, 0x4a, 0x2a, 0x2e, 0x35, 0x2d, 0x43, 0x37, 0x51, 0x47,
+    0x2f, 0x4d, 0x50, 0x3e, 0x3a, 0x33, 0x4f, 0x2a, 0x35, 0x45, 0x4a, 0x4c,
+    0x3b, 0x3d, 0x43, 0x44, 0x3d, 0x3f, 0x4a, 0x3e, 0x49, 0x37, 0x2e, 0x4f,
+    0x39, 0x3f, 0x32, 0x3c, 0x37, 0x3b, 0x39, 0x4d, 0x34, 0x3f, 0x46, 0x44,
+    0x3d, 0x40, 0x3f, 0x40, 0x39, 0x33, 0x39, 0x3e, 0x3d, 0x40, 0x31, 0x30,
+    0x35, 0x3d, 0x3e, 0x3a, 0x3e, 0x32, 0x31, 0x3e, 0x48, 0x3c, 0x40, 0x43,
+    0x3f, 0x3f, 0x34, 0x2e, 0x3a, 0x3e, 0x3b, 0x43, 0x45, 0x32, 0x3a, 0x31,
+    0x37, 0x38, 0x31, 0x35, 0x34, 0x3d, 0x42, 0x36, 0x46, 0x37, 0x32, 0x47,
+    0x41, 0x3c, 0x35, 0x35, 0x36, 0x41, 0x3a, 0x3b, 0x42, 0x44, 0x36, 0x31,
+    0x3c, 0x3d, 0x34, 0x34, 0x3b, 0x40, 0x40, 0x2e, 0x40, 0x46, 0x3b, 0x43,
+    0x3f, 0x40, 0x3b, 0x3a, 0x32, 0x40, 0x46, 0x39, 0x3c, 0x49, 0x2f, 0x3d,
+    0x49, 0x3e, 0x44, 0x3c, 0x3e, 0x35, 0x3f, 0x44, 0x41, 0x40, 0x3e, 0x47,
+    0x3d, 0x40, 0x3f, 0x41, 0x3b, 0x41, 0x41, 0x3f, 0x40, 0x3f, 0x3e, 0x3e,
+    0x3f, 0x43, 0x35, 0x40, 0x2b, 0x42, 0x45, 0x56, 0x40, 0x3c, 0x2f, 0x44,
+    0x44, 0x3d, 0x3e, 0x3d, 0x40, 0x2d, 0x39, 0x31, 0x54, 0x2f, 0x61, 0x48,
+    0x2e, 0x37, 0x37, 0x32, 0x3e, 0x2d, 0x52, 0x4d, 0x2d, 0x4d, 0x4c, 0x3a,
+    0x3a, 0x31, 0x4e, 0x2d, 0x31, 0x48, 0x47, 0x54, 0x45, 0x38, 0x3b, 0x3d,
+    0x42, 0x41, 0x44, 0x4a, 0x48, 0x42, 0x2f, 0x4d, 0x31, 0x34, 0x3a, 0x46,
+    0x37, 0x44, 0x2c, 0x45, 0x46, 0x43, 0x40, 0x3f, 0x34, 0x33, 0x40, 0x39,
+    0x32, 0x35, 0x3a, 0x40, 0x3f, 0x3f, 0x36, 0x32, 0x3f, 0x3d, 0x35, 0x48,
+    0x3c, 0x48, 0x37, 0x39, 0x35, 0x3f, 0x37, 0x3d, 0x44, 0x46, 0x2d, 0x2a,
+    0x47, 0x38, 0x3a, 0x39, 0x45, 0x3b, 0x40, 0x2d, 0x37, 0x33, 0x41, 0x3c,
+    0x40, 0x35, 0x3f, 0x32, 0x3a, 0x36, 0x40, 0x41, 0x3a, 0x3c, 0x33, 0x31,
+    0x42, 0x3f, 0x41, 0x3a, 0x41, 0x46, 0x38, 0x2f, 0x3c, 0x3d, 0x3d, 0x39,
+    0x3b, 0x46, 0x41, 0x31, 0x46, 0x36, 0x40, 0x48, 0x3c, 0x33, 0x42, 0x32,
+    0x3b, 0x40, 0x3f, 0x36, 0x37, 0x44, 0x34, 0x35, 0x32, 0x32, 0x37, 0x38,
+    0x33, 0x3b, 0x37, 0x4a, 0x3f, 0x46, 0x3a, 0x41, 0x32, 0x37, 0x30, 0x3e,
+    0x40, 0x35, 0x41, 0x40, 0x37, 0x41, 0x2b, 0x40, 0x3d, 0x3d, 0x32, 0x38,
+    0x34, 0x3e, 0x47, 0x61, 0x43, 0x3b, 0x3c, 0x42, 0x46, 0x3d, 0x40, 0x4a,
+    0x3c, 0x2d, 0x33, 0x35, 0x55, 0x38, 0x69, 0x4f, 0x33, 0x37, 0x30, 0x39,
+    0x44, 0x2e, 0x58, 0x4b, 0x2a, 0x51, 0x4b, 0x3c, 0x39, 0x2e, 0x51, 0x2d,
+    0x30, 0x4a, 0x42, 0x53, 0x3f, 0x39, 0x3e, 0x44, 0x3b, 0x40, 0x47, 0x44,
+    0x47, 0x3e, 0x39, 0x4b, 0x40, 0x3d, 0x42, 0x39, 0x3b, 0x39, 0x32, 0x42,
+    0x36, 0x36, 0x36, 0x42, 0x44, 0x34, 0x33, 0x40, 0x40, 0x40, 0x3a, 0x3a,
+    0x41, 0x3f, 0x31, 0x30, 0x3f, 0x31, 0x30, 0x39, 0x46, 0x36, 0x35, 0x34,
+    0x40, 0x43, 0x3c, 0x41, 0x31, 0x46, 0x35, 0x26, 0x44, 0x32, 0x3d, 0x35,
+    0x3d, 0x3c, 0x36, 0x32, 0x39, 0x3a, 0x30, 0x40, 0x48, 0x3e, 0x38, 0x37,
+    0x44, 0x3b, 0x3d, 0x42, 0x3d, 0x3c, 0x32, 0x2b, 0x3f, 0x41, 0x39, 0x3d,
+    0x3e, 0x3f, 0x35, 0x2f, 0x46, 0x3d, 0x3d, 0x3b, 0x45, 0x37, 0x31, 0x35,
+    0x44, 0x40, 0x3a, 0x45, 0x3a, 0x3c, 0x39, 0x31, 0x3b, 0x3d, 0x3b, 0x3a,
+    0x43, 0x44, 0x39, 0x47, 0x44, 0x36, 0x3e, 0x39, 0x48, 0x3f, 0x39, 0x4b,
+    0x3c, 0x36, 0x3d, 0x44, 0x44, 0x3f, 0x39, 0x43, 0x3f, 0x37, 0x3f, 0x37,
+    0x3b, 0x3b, 0x38, 0x3b, 0x3f, 0x40, 0x31, 0x44, 0x30, 0x44, 0x46, 0x5b,
+    0x46, 0x3f, 0x39, 0x40, 0x40, 0x37, 0x4a, 0x46, 0x3f, 0x36, 0x40, 0x39,
+    0x59, 0x3e, 0x66, 0x57, 0x32, 0x34, 0x2e, 0x33, 0x46, 0x31, 0x58, 0x44,
+    0x26, 0x4c, 0x4b, 0x3c, 0x39, 0x2e, 0x4d, 0x35, 0x32, 0x46, 0x52, 0x52,
+    0x3e, 0x40, 0x39, 0x3c, 0x39, 0x3d, 0x53, 0x48, 0x41, 0x3c, 0x3b, 0x4d,
+    0x3c, 0x3e, 0x38, 0x44, 0x3a, 0x3a, 0x29, 0x4a, 0x3c, 0x37, 0x36, 0x38,
+    0x3a, 0x31, 0x37, 0x39, 0x3a, 0x40, 0x46, 0x32, 0x42, 0x38, 0x32, 0x2e,
+    0x3a, 0x45, 0x44, 0x34, 0x34, 0x38, 0x32, 0x2e, 0x35, 0x40, 0x3a, 0x41,
+    0x42, 0x3d, 0x37, 0x2c, 0x3f, 0x37, 0x3c, 0x3d, 0x3a, 0x36, 0x33, 0x35,
+    0x3c, 0x34, 0x3c, 0x39, 0x3c, 0x3a, 0x37, 0x30, 0x30, 0x3e, 0x3d, 0x3a,
+    0x44, 0x37, 0x36, 0x32, 0x36, 0x37, 0x36, 0x3a, 0x3c, 0x41, 0x3a, 0x35,
+    0x36, 0x3a, 0x34, 0x40, 0x39, 0x40, 0x3e, 0x32, 0x34, 0x46, 0x33, 0x3f,
+    0x36, 0x45, 0x3e, 0x35, 0x3f, 0x38, 0x3f, 0x3e, 0x3b, 0x3a, 0x36, 0x3b,
+    0x36, 0x38, 0x32, 0x3f, 0x44, 0x3c, 0x35, 0x48, 0x38, 0x39, 0x31, 0x49,
+    0x3d, 0x43, 0x36, 0x3f, 0x31, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x39, 0x3b,
+    0x40, 0x42, 0x3c, 0x43, 0x36, 0x4a, 0x48, 0x67, 0x4e, 0x43, 0x36, 0x46,
+    0x44, 0x3f, 0x4b, 0x4b, 0x3f, 0x38, 0x3c, 0x3c, 0x5e, 0x38, 0x70, 0x52,
+    0x38, 0x32, 0x3b, 0x36, 0x4a, 0x2c, 0x52, 0x46, 0x29, 0x4f, 0x48, 0x42,
+    0x2d, 0x2e, 0x4f, 0x28, 0x28, 0x45, 0x4d, 0x52, 0x42, 0x3e, 0x3f, 0x41,
+    0x3c, 0x3a, 0x47, 0x50, 0x44, 0x45, 0x33, 0x4b, 0x3e, 0x3f, 0x42, 0x3d,
+    0x43, 0x34, 0x27, 0x3f, 0x42, 0x3e, 0x43, 0x3e, 0x3a, 0x3c, 0x37, 0x3b,
+    0x3f, 0x30, 0x3a, 0x3e, 0x3c, 0x34, 0x37, 0x24, 0x3d, 0x43, 0x40, 0x44,
+    0x40, 0x46, 0x31, 0x2f, 0x43, 0x38, 0x38, 0x39, 0x3c, 0x34, 0x2d, 0x2a,
+    0x38, 0x31, 0x43, 0x3b, 0x39, 0x3b, 0x32, 0x34, 0x3e, 0x39, 0x41, 0x3b,
+    0x3e, 0x33, 0x3a, 0x2a, 0x41, 0x3f, 0x3c, 0x43, 0x3b, 0x3e, 0x35, 0x2c,
+    0x38, 0x41, 0x33, 0x31, 0x3e, 0x3f, 0x3a, 0x3c, 0x3b, 0x35, 0x3f, 0x3d,
+    0x42, 0x3a, 0x3c, 0x35, 0x3f, 0x40, 0x3c, 0x3e, 0x37, 0x41, 0x3d, 0x38,
+    0x34, 0x31, 0x36, 0x3d, 0x3d, 0x47, 0x36, 0x44, 0x3f, 0x45, 0x3c, 0x3c,
+    0x35, 0x36, 0x31, 0x4f, 0x46, 0x3a, 0x41, 0x42, 0x40, 0x32, 0x33, 0x41,
+    0x34, 0x40, 0x3d, 0x43, 0x3b, 0x3a, 0x32, 0x3c, 0x42, 0x42, 0x3d, 0x43,
+    0x37, 0x45, 0x45, 0xff, 0x4b, 0x45, 0x3b, 0x40, 0x43, 0x3e, 0x47, 0x49,
+    0x3d, 0x3b, 0x3e, 0x33, 0x58, 0x35, 0x71, 0x54, 0x2f, 0x38, 0x38, 0x33,
+    0x47, 0x35, 0x5b, 0x46, 0x2c, 0x4c, 0x43, 0x37, 0x36, 0x39, 0x4f, 0x30,
+    0x26, 0x48, 0x51, 0x48, 0x46, 0x45, 0x3b, 0x39, 0x42, 0x50, 0x47, 0x4c,
+    0x4b, 0x3b, 0x3d, 0x4d, 0x41, 0x34, 0x40, 0x44, 0x38, 0x32, 0x2d, 0x43,
+    0x39, 0x36, 0x3b, 0x3b, 0x40, 0x3d, 0x37, 0x3c, 0x44, 0x39, 0x42, 0x37,
+    0x38, 0x38, 0x32, 0x2f, 0x41, 0x40, 0x3f, 0x3a, 0x37, 0x35, 0x3b, 0x2a,
+    0x37, 0x30, 0x3c, 0x37, 0x40, 0x38, 0x3a, 0x27, 0x44, 0x3d, 0x43, 0x40,
+    0x35, 0x3f, 0x3e, 0x32, 0x3e, 0x3c, 0x40, 0x39, 0x39, 0x3a, 0x41, 0x31,
+    0x3b, 0x3f, 0x34, 0x43, 0x3a, 0x38, 0x42, 0x2a, 0x47, 0x46, 0x3b, 0x38,
+    0x47, 0x45, 0x39, 0x31, 0x43, 0x40, 0x37, 0x3a, 0x3d, 0x3e, 0x39, 0x30,
+    0x36, 0x37, 0x3a, 0x43, 0x3f, 0x32, 0x31, 0x41, 0x45, 0x3e, 0x43, 0x38,
+    0x3f, 0x37, 0x3c, 0x49, 0x3b, 0x33, 0x3d, 0x3a, 0x37, 0x44, 0x32, 0x50,
+    0x39, 0x44, 0x3e, 0x3f, 0x3d, 0x41, 0x3e, 0x3e, 0x42, 0x44, 0x45, 0x3f,
+    0x36, 0x3f, 0x37, 0x39, 0x3b, 0x3d, 0x3b, 0x3b, 0x2f, 0x46, 0x40, 0x6d,
+    0x50, 0x45, 0x3b, 0x45, 0x46, 0x3b, 0x42, 0x48, 0x42, 0x3c, 0x39, 0x37,
+    0x57, 0x3b, 0x6c, 0x5b, 0x32, 0x35, 0x3d, 0x39, 0x48, 0x31, 0x5c, 0x46,
+    0x29, 0x4c, 0x3f, 0x3e, 0x37, 0x33, 0x58, 0x32, 0x2a, 0x43, 0x4c, 0x50,
+    0x3b, 0x44, 0x3c, 0x41, 0x39, 0x48, 0x55, 0x4c, 0x42, 0x38, 0x3b, 0x51,
+    0x3f, 0x38, 0x44, 0x46, 0x36, 0x3b, 0x38, 0x4a, 0x3f, 0x37, 0x36, 0x3c,
+    0x31, 0x3d, 0x32, 0x39, 0x3b, 0x3f, 0x3e, 0x35, 0x38, 0x3f, 0x34, 0x2b,
+    0x37, 0x36, 0x39, 0x40, 0x37, 0x41, 0x32, 0x27, 0x36, 0x33, 0x40, 0x3a,
+    0x3f, 0x44, 0x3f, 0x25, 0x38, 0x34, 0x42, 0x3c, 0x3a, 0x40, 0x38, 0x31,
+    0x49, 0x3e, 0x33, 0x3d, 0x31, 0x36, 0x39, 0x2b, 0x44, 0x2f, 0x43, 0x34,
+    0x34, 0x37, 0x39, 0x33, 0x3b, 0x34, 0x42, 0x3c, 0x40, 0x45, 0x36, 0x31,
+    0x43, 0x47, 0x3e, 0x3f, 0x40, 0x3a, 0x33, 0x34, 0x41, 0x44, 0x3a, 0x43,
+    0x3e, 0x38, 0x36, 0x31, 0x42, 0x44, 0x40, 0x41, 0x44, 0x43, 0x33, 0x42,
+    0x3d, 0x41, 0x3d, 0x3e, 0x3c, 0x39, 0x3e, 0x4f, 0x3f, 0x37, 0x31, 0x40,
+    0x3b, 0x38, 0x35, 0x3b, 0x44, 0x41, 0x41, 0x37, 0x40, 0x42, 0x2d, 0x3d,
+    0x39, 0x48, 0x44, 0x3e, 0x34, 0x48, 0x49, 0x6d, 0x45, 0x4b, 0x3a, 0x44,
+    0x49, 0x40, 0x4d, 0x51, 0x3f, 0x34, 0x3b, 0x40, 0x52, 0x34, 0x6f, 0x56,
+    0x33, 0x3e, 0x40, 0x39, 0x41, 0x32, 0x5d, 0x45, 0x2e, 0x51, 0x48, 0x3c,
+    0x2e, 0x2e, 0x51, 0x39, 0x32, 0x45, 0x4a, 0x4c, 0x3b, 0x40, 0x40, 0x3b,
+    0x36, 0x41, 0x54, 0x4e, 0x4a, 0x49, 0x3b, 0x4d, 0x3c, 0x41, 0x38, 0x47,
+    0x3d, 0x3c, 0x37, 0x48, 0x3f, 0x42, 0x3e, 0x36, 0x39, 0x46, 0x37, 0x3e,
+    0x3b, 0x38, 0x40, 0x3b, 0x39, 0x32, 0x3e, 0x29, 0x37, 0x35, 0x3c, 0x3d,
+    0x37, 0x3b, 0x35, 0x2f, 0x32, 0x3b, 0x37, 0x3c, 0x40, 0x3e, 0x39, 0x27,
+    0x3b, 0x38, 0x37, 0x36, 0x39, 0x37, 0x37, 0x35, 0x42, 0x3e, 0x3b, 0x43,
+    0x41, 0x3c, 0x37, 0x2a, 0x3a, 0x3e, 0x38, 0x40, 0x36, 0x3e, 0x44, 0x2e,
+    0x3e, 0x3a, 0x37, 0x3b, 0x3e, 0x41, 0x3d, 0x30, 0x3b, 0x3f, 0x41, 0x45,
+    0x3a, 0x48, 0x37, 0x2f, 0x3a, 0x37, 0x34, 0x43, 0x42, 0x3d, 0x38, 0x41,
+    0x3b, 0x3c, 0x39, 0x3c, 0x39, 0x47, 0x2e, 0x41, 0x42, 0x40, 0x32, 0x36,
+    0x43, 0x40, 0x3d, 0x4c, 0x38, 0x3e, 0x3b, 0x41, 0x3d, 0x3b, 0x34, 0x43,
+    0x43, 0x3f, 0x44, 0x3c, 0x3a, 0x33, 0x39, 0x42, 0x43, 0x3f, 0x33, 0x3d,
+    0x33, 0x3e, 0x48, 0x6b, 0x48, 0x43, 0x36, 0x47, 0x49, 0x44, 0x4a, 0x49,
+    0x3c, 0x31, 0x35, 0x3e, 0x5c, 0x34, 0x73, 0x53, 0x33, 0x3c, 0x32, 0x3b,
+    0x43, 0x27, 0x59, 0x4e, 0x2b, 0x51, 0x4f, 0x37, 0x36, 0x34, 0x56, 0x34,
+    0x32, 0x4f, 0x46, 0x50, 0x40, 0x40, 0x3c, 0x3e, 0x34, 0x37, 0x50, 0x49,
+    0x43, 0x47, 0x3e, 0x52, 0x44, 0x38, 0x3b, 0x4f, 0x3a, 0x3d, 0x2b, 0x4c,
+    0x40, 0x38, 0x3a, 0x35, 0x3a, 0x3a, 0x3d, 0x38, 0x3d, 0x3b, 0x37, 0x48,
+    0x3d, 0x3d, 0x32, 0x30, 0x3a, 0x34, 0x3f, 0x3a, 0x3b, 0x3e, 0x35, 0x2f,
+    0x3b, 0x3a, 0x45, 0x3d, 0x42, 0x33, 0x33, 0x24, 0x44, 0x39, 0x3c, 0x3d,
+    0x41, 0x3c, 0x37, 0x2c, 0x3b, 0x36, 0x34, 0x41, 0x3d, 0x3f, 0x39, 0x32,
+    0x3c, 0x40, 0x44, 0x3d, 0x41, 0x3d, 0x3a, 0x29, 0x3e, 0x3e, 0x43, 0x33,
+    0x3f, 0x3e, 0x3e, 0x31, 0x38, 0x3a, 0x34, 0x3d, 0x3f, 0x3e, 0x3a, 0x3d,
+    0x3e, 0x48, 0x45, 0x3d, 0x44, 0x37, 0x33, 0x3d, 0x45, 0x39, 0x40, 0x40,
+    0x42, 0x3f, 0x3f, 0x3d, 0x3a, 0x3b, 0x41, 0x33, 0x41, 0x3c, 0x32, 0x55,
+    0x43, 0x3a, 0x32, 0x40, 0x3c, 0x3e, 0x40, 0x43, 0x37, 0x3f, 0x40, 0x38,
+    0x43, 0x41, 0x36, 0x42, 0x44, 0x3c, 0x32, 0x3f, 0x38, 0x42, 0x46, 0x59,
+    0x4c, 0x41, 0x39, 0x47, 0x46, 0x46, 0x44, 0x44, 0x35, 0x42, 0x32, 0x39,
+    0x4f, 0x34, 0x6d, 0x55, 0x31, 0x3b, 0x3a, 0x3f, 0x44, 0x2c, 0x5d, 0x43,
+    0x26, 0x4a, 0x4f, 0x40, 0x36, 0x32, 0x4d, 0x33, 0x2f, 0x50, 0x4d, 0x57,
+    0x3b, 0x40, 0x42, 0x44, 0x41, 0x3f, 0x52, 0x4e, 0x35, 0x41, 0x44, 0x52,
+    0x40, 0x35, 0x39, 0x4b, 0x45, 0x34, 0x2c, 0x4a, 0x3b, 0x41, 0x31, 0x33,
+    0x3f, 0x3a, 0x36, 0x3c, 0x3c, 0x33, 0x30, 0x38, 0x43, 0x3f, 0x32, 0x2d,
+    0x3f, 0x3a, 0x38, 0x41, 0x39, 0x45, 0x36, 0x2e, 0x3c, 0x38, 0x45, 0x3f,
+    0x40, 0x3f, 0x3e, 0x26, 0x41, 0x37, 0x3c, 0x44, 0x3f, 0x3f, 0x35, 0x37,
+    0x46, 0x34, 0x37, 0x3e, 0x48, 0x38, 0x36, 0x34, 0x33, 0x39, 0x40, 0x3c,
+    0x42, 0x3d, 0x3b, 0x31, 0x38, 0x3b, 0x44, 0x42, 0x45, 0x38, 0x41, 0x30,
+    0x3d, 0x42, 0x36, 0x3f, 0x3b, 0x45, 0x37, 0x32, 0x3c, 0x37, 0x3d, 0x42,
+    0x38, 0x3d, 0x2f, 0x31, 0x39, 0x40, 0x3f, 0x44, 0x3a, 0x41, 0x44, 0x46,
+    0x3d, 0x3a, 0x32, 0x3b, 0x34, 0x47, 0x36, 0x4c, 0x47, 0x35, 0x3c, 0x33,
+    0x3b, 0x3c, 0x30, 0x43, 0x43, 0x3f, 0x31, 0x40, 0x3a, 0x37, 0x30, 0x46,
+    0x39, 0x3b, 0x42, 0x40, 0x2d, 0x3f, 0x3e, 0x6a, 0x50, 0x3b, 0x31, 0x54,
+    0x47, 0x3d, 0x48, 0x4e, 0x3b, 0x41, 0x3a, 0x39, 0x49, 0x36, 0x64, 0x4e,
+    0x32, 0x39, 0x3d, 0x37, 0x42, 0x2c, 0x5c, 0x43, 0x2a, 0x4b, 0x4b, 0x46,
+    0x30, 0x29, 0x52, 0x31, 0x35, 0x44, 0x4a, 0x4b, 0x3d, 0x3b, 0x4e, 0x42,
+    0x3d, 0x39, 0x42, 0x52, 0x3f, 0x36, 0x3e, 0x50, 0x3f, 0x32, 0x35, 0x3a,
+    0x40, 0x39, 0x35, 0x48, 0x3b, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x2f, 0x36,
+    0x38, 0x34, 0x3f, 0x44, 0x32, 0x3f, 0x37, 0x33, 0x33, 0x35, 0x2e, 0x41,
+    0x37, 0x3e, 0x38, 0x28, 0x49, 0x30, 0x46, 0x39, 0x3b, 0x30, 0x38, 0x28,
+    0x3b, 0x3d, 0x3a, 0x43, 0x3f, 0x34, 0x43, 0x36, 0x39, 0x3c, 0x3e, 0x3e,
+    0x39, 0x3b, 0x39, 0x32, 0x3c, 0x36, 0x3e, 0x38, 0x34, 0x3c, 0x3a, 0x2a,
+    0x46, 0x3d, 0x40, 0x37, 0x3b, 0x39, 0x3b, 0x34, 0x38, 0x31, 0x43, 0x46,
+    0x3b, 0x43, 0x39, 0x2b, 0x38, 0x40, 0x3e, 0x39, 0x35, 0x3d, 0x2c, 0x36,
+    0x37, 0x40, 0x36, 0x40, 0x41, 0x38, 0x32, 0x3f, 0x36, 0x46, 0x34, 0x31,
+    0x40, 0x3e, 0x3c, 0x4e, 0x42, 0x3d, 0x36, 0x3f, 0x42, 0x3f, 0x33, 0x40,
+    0x34, 0x37, 0x3c, 0x3b, 0x31, 0x47, 0x32, 0x3c, 0x34, 0x3d, 0x42, 0x3b,
+    0x37, 0x41, 0x3b, 0x64, 0x52, 0x40, 0x36, 0x4e, 0x46, 0x3f, 0x3f, 0x47,
+    0x3c, 0x3a, 0x3a, 0x41, 0x4a, 0x32, 0x5e, 0x50, 0x2d, 0x39, 0x3a, 0x38,
+    0x3d, 0x2c, 0x5a, 0x3e, 0x2e, 0x47, 0x3e, 0x3e, 0x33, 0x29, 0x4c, 0x35,
+    0x30, 0x4d, 0x4d, 0x4d, 0x38, 0x42, 0x51, 0x47, 0x39, 0x3c, 0x43, 0x4b,
+    0x42, 0x3f, 0x3a, 0x4b, 0x44, 0x3f, 0x3a, 0x44, 0x3e, 0x37, 0x30, 0x45,
+    0x3d, 0x36, 0x34, 0x3f, 0x36, 0x35, 0x37, 0x36, 0x43, 0x3b, 0x37, 0x3e,
+    0x35, 0x3e, 0x32, 0x34, 0x32, 0x38, 0x3c, 0x3a, 0x3a, 0x3c, 0x30, 0x2b,
+    0x31, 0x37, 0x30, 0x42, 0x36, 0x37, 0x36, 0x2c, 0x3c, 0x31, 0x41, 0x37,
+    0x44, 0x41, 0x3b, 0x37, 0x41, 0x3f, 0x38, 0x3b, 0x3a, 0x3a, 0x3c, 0x2f,
+    0x47, 0x41, 0x3e, 0x33, 0x42, 0x3a, 0x32, 0x34, 0x44, 0x40, 0x43, 0x3d,
+    0x34, 0x41, 0x38, 0x35, 0x35, 0x3b, 0x45, 0x38, 0x32, 0x37, 0x3c, 0x2e,
+    0x39, 0x40, 0x30, 0x3e, 0x42, 0x35, 0x3d, 0x36, 0x3e, 0x3d, 0x39, 0x46,
+    0x3f, 0x36, 0x37, 0x49, 0x41, 0x39, 0x3d, 0x3d, 0x33, 0x44, 0x42, 0x50,
+    0x3d, 0x3c, 0x3e, 0x3f, 0x42, 0x42, 0x3b, 0x3d, 0x41, 0x31, 0x39, 0x3a,
+    0x44, 0x34, 0x38, 0x47, 0x44, 0x38, 0x3b, 0x42, 0x30, 0x42, 0x44, 0x57,
+    0x49, 0x3a, 0x39, 0x4f, 0x41, 0x3e, 0x40, 0x43, 0x37, 0x42, 0x3b, 0x48,
+    0x50, 0x29, 0x5b, 0x44, 0x2c, 0x40, 0x3f, 0x3c, 0x46, 0x34, 0x5c, 0x41,
+    0x2c, 0x48, 0x46, 0x46, 0x35, 0x32, 0x4c, 0x35, 0x2f, 0x3b, 0x48, 0x44,
+    0x41, 0x41, 0x49, 0x45, 0x34, 0x37, 0x44, 0x45, 0x43, 0x3b, 0x42, 0x44,
+    0x3a, 0x37, 0x48, 0x49, 0x34, 0x39, 0x33, 0x4a, 0x40, 0x3d, 0x33, 0x39,
+    0x39, 0x3b, 0x30, 0x31, 0x3d, 0x47, 0x3c, 0x3a, 0x34, 0x3c, 0x3a, 0x2b,
+    0x3a, 0x34, 0x41, 0x40, 0x42, 0x36, 0x44, 0x2c, 0x40, 0x47, 0x3b, 0x37,
+    0x38, 0x42, 0x44, 0x29, 0x36, 0x3d, 0x3d, 0x36, 0x42, 0x3b, 0x35, 0x36,
+    0x43, 0x39, 0x41, 0x3d, 0x45, 0x41, 0x31, 0x32, 0x40, 0x3d, 0x3c, 0x41,
+    0x3e, 0x3d, 0x35, 0x34, 0x32, 0x38, 0x36, 0x3f, 0x3b, 0x3d, 0x39, 0x36,
+    0x40, 0x3e, 0x3d, 0x3a, 0x3a, 0x3b, 0x3c, 0x32, 0x40, 0x34, 0x3a, 0x36,
+    0x42, 0x47, 0x3e, 0x33, 0x3a, 0x44, 0x30, 0x39, 0x40, 0x3a, 0x36, 0x44,
+    0x3c, 0x3b, 0x3f, 0x33, 0x3e, 0x3c, 0x35, 0x53, 0x43, 0x3c, 0x3f, 0x43,
+    0x3d, 0x44, 0x33, 0x47, 0x42, 0x40, 0x37, 0x3b, 0x43, 0x3f, 0x33, 0x41,
+    0x38, 0x42, 0x44, 0x3d, 0x2d, 0x3f, 0x46, 0x49, 0x4e, 0x3f, 0x36, 0x45,
+    0x45, 0x39, 0x40, 0x42, 0x39, 0x39, 0x3a, 0x42, 0x45, 0x2c, 0x61, 0x44,
+    0x30, 0x45, 0x38, 0x3a, 0x40, 0x37, 0x58, 0x39, 0x31, 0x3e, 0x3a, 0x3e,
+    0x37, 0x32, 0x4a, 0x39, 0x2e, 0x47, 0x3e, 0x4e, 0x3f, 0x3e, 0x48, 0x45,
+    0x3f, 0x48, 0x3a, 0x3f, 0x40, 0x36, 0x3a, 0x44, 0x36, 0x3e, 0x3d, 0x41,
+    0x45, 0x36, 0x36, 0x4b, 0x3a, 0x3d, 0x45, 0x48, 0x38, 0x45, 0x39, 0x38,
+    0x38, 0x3a, 0x42, 0x34, 0x3f, 0x34, 0x39, 0x34, 0x32, 0x3f, 0x3c, 0x3d,
+    0x3d, 0x47, 0x3a, 0x2f, 0x3c, 0x3e, 0x3f, 0x39, 0x35, 0x42, 0x3c, 0x2a,
+    0x3b, 0x35, 0x42, 0x44, 0x46, 0x39, 0x38, 0x39, 0x43, 0x3a, 0x38, 0x42,
+    0x3d, 0x3a, 0x40, 0x35, 0x34, 0x39, 0x3a, 0x38, 0x43, 0x42, 0x42, 0x2d,
+    0x31, 0x3b, 0x33, 0x40, 0x3b, 0x47, 0x35, 0x30, 0x3a, 0x3c, 0x3b, 0x47,
+    0x3a, 0x3c, 0x38, 0x35, 0x3c, 0x35, 0x3e, 0x3e, 0x39, 0x3d, 0x39, 0x40,
+    0x37, 0x33, 0x49, 0x38, 0x3c, 0x43, 0x34, 0x40, 0x39, 0x42, 0x3c, 0x3b,
+    0x3e, 0x45, 0x3e, 0x51, 0x3d, 0x3f, 0x3b, 0x34, 0x37, 0x3c, 0x40, 0x47,
+    0x3c, 0x41, 0x3f, 0x41, 0x37, 0x3e, 0x36, 0x3c, 0x42, 0x40, 0x3f, 0x3a,
+    0x3b, 0x42, 0x44, 0x4b, 0x4b, 0x37, 0x41, 0x4d, 0x41, 0x45, 0x40, 0x41,
+    0x40, 0x38, 0x37, 0x40, 0x42, 0x2c, 0x57, 0x43, 0x2d, 0x49, 0x3a, 0x3e,
+    0x37, 0x2f, 0x52, 0x37, 0x31, 0x42, 0x3b, 0x3f, 0x39, 0x38, 0x48, 0x3c,
+    0x37, 0x3d, 0x3a, 0x39, 0x3a, 0x45, 0x4b, 0x49, 0x3e, 0x44, 0x48, 0x49,
+    0x3d, 0x39, 0x3c, 0x41, 0x41, 0x38, 0x45, 0x38, 0x33, 0x3d, 0x37, 0x47,
+    0x34, 0x3f, 0x3b, 0x3d, 0x39, 0x34, 0x30, 0x39, 0x44, 0x36, 0x34, 0x3c,
+    0x37, 0x38, 0x45, 0x34, 0x40, 0x33, 0x41, 0x3a, 0x3e, 0x3c, 0x3b, 0x3a,
+    0x40, 0x3f, 0x3b, 0x3d, 0x3b, 0x46, 0x41, 0x2a, 0x3a, 0x3c, 0x42, 0x46,
+    0x33, 0x3f, 0x2d, 0x3a, 0x45, 0x45, 0x38, 0x3b, 0x44, 0x34, 0x35, 0x3f,
+    0x34, 0x43, 0x38, 0x3e, 0x41, 0x3b, 0x42, 0x38, 0x3d, 0x3f, 0x38, 0x45,
+    0x3b, 0x35, 0x39, 0x3c, 0x43, 0x43, 0x38, 0x34, 0x44, 0x43, 0x2e, 0x39,
+    0x39, 0x40, 0x39, 0x41, 0x41, 0x34, 0x3e, 0x44, 0x3d, 0x43, 0x3a, 0x3a,
+    0x3b, 0x3b, 0x36, 0x45, 0x3c, 0x43, 0x3d, 0x48, 0x36, 0x36, 0x39, 0x55,
+    0x35, 0x40, 0x3e, 0x49, 0x40, 0x3a, 0x3d, 0x3d, 0x34, 0x47, 0x40, 0x41,
+    0x40, 0x47, 0x39, 0x3e, 0x3b, 0x38, 0x3c, 0x3a, 0x35, 0x3e, 0x41, 0x4a,
+    0x4b, 0x3f, 0x36, 0x3d, 0x40, 0x3c, 0x39, 0x32, 0x33, 0x36, 0x30, 0x42,
+    0x42, 0x36, 0x54, 0x48, 0x2e, 0x4c, 0x34, 0x3c, 0x39, 0x36, 0x4e, 0x37,
+    0x2f, 0x3e, 0x30, 0x3d, 0x36, 0x3b, 0x45, 0x36, 0x37, 0x3e, 0x41, 0x4b,
+    0x3b, 0x36, 0x45, 0x3b, 0x38, 0x45, 0x3e, 0x43, 0x48, 0x46, 0x44, 0x44,
+    0x3e, 0x3b, 0x37, 0x3b, 0x3a, 0x3f, 0x3d, 0x44, 0x39, 0x38, 0x45, 0x43,
+    0x3d, 0x35, 0x39, 0x2c, 0x44, 0x41, 0x36, 0x40, 0x3d, 0x39, 0x3d, 0x2f,
+    0x3d, 0x39, 0x42, 0x3d, 0x36, 0x46, 0x43, 0x2c, 0x41, 0x3a, 0x30, 0x45,
+    0x3f, 0x41, 0x35, 0x2b, 0x3b, 0x38, 0x3a, 0x44, 0x32, 0x32, 0x39, 0x3c,
+    0x3a, 0x3a, 0x3c, 0x3a, 0x35, 0x40, 0x3b, 0x31, 0x36, 0x33, 0x35, 0x34,
+    0x3c, 0x3b, 0x3d, 0x36, 0x48, 0x3b, 0x3f, 0x42, 0x3e, 0x33, 0x2f, 0x3a,
+    0x49, 0x41, 0x39, 0x3e, 0x3c, 0x44, 0x3c, 0x39, 0x33, 0x39, 0x36, 0x35,
+    0x3d, 0x42, 0x34, 0x3e, 0x38, 0x45, 0x40, 0x45, 0x3d, 0x48, 0x42, 0x4a,
+    0x3f, 0x45, 0x38, 0x42, 0x44, 0x40, 0x34, 0x49, 0x44, 0x3d, 0x3a, 0x39,
+    0x3e, 0x3a, 0x42, 0x3e, 0x48, 0x42, 0x3e, 0x3a, 0x3f, 0x3f, 0x32, 0x3b,
+    0x38, 0x41, 0x3c, 0x39, 0x33, 0x45, 0x44, 0x3c, 0x48, 0x41, 0x41, 0x3d,
+    0x3a, 0x3c, 0x37, 0x33, 0x41, 0x3f, 0x38, 0x3a, 0x3f, 0x37, 0x51, 0x3c,
+    0x37, 0x3a, 0x43, 0x37, 0x40, 0x31, 0x4f, 0x34, 0x3b, 0x44, 0x45, 0x39,
+    0x40, 0x33, 0x49, 0x33, 0x3e, 0x35, 0x44, 0x3d, 0x3b, 0x3f, 0x43, 0x41,
+    0x43, 0x43, 0x48, 0x44, 0x46, 0x3b, 0x43, 0x3f, 0x3c, 0x3f, 0x3e, 0x3d,
+    0x3b, 0x41, 0x3c, 0x43, 0x30, 0x34, 0x39, 0x33, 0x3f, 0x38, 0x36, 0x2e,
+    0x33, 0x3f, 0x3c, 0x40, 0x3d, 0x3b, 0x3b, 0x31, 0x36, 0x41, 0x3b, 0x38,
+    0x46, 0x36, 0x34, 0x31, 0x42, 0x44, 0x33, 0x35, 0x3f, 0x36, 0x3c, 0x30,
+    0x3f, 0x31, 0x39, 0x3e, 0x3f, 0x47, 0x3e, 0x34, 0x36, 0x36, 0x34, 0x39,
+    0x37, 0x46, 0x40, 0x33, 0x3b, 0x3a, 0x3f, 0x41, 0x37, 0x44, 0x3a, 0x3f,
+    0x34, 0x45, 0x37, 0x33, 0x3f, 0x47, 0x41, 0x36, 0x39, 0x3e, 0x40, 0x38,
+    0x41, 0x3d, 0x3d, 0x36, 0x40, 0x3a, 0x3b, 0x3b, 0x41, 0x3b, 0x3a, 0x3f,
+    0x3f, 0x3b, 0x35, 0x42, 0x46, 0x3a, 0x30, 0x45, 0x40, 0x37, 0x39, 0x39,
+    0x3d, 0x38, 0x3f, 0x45, 0x3f, 0x31, 0x32, 0x3b, 0x35, 0x3e, 0x3b, 0x38,
+    0x3b, 0x44, 0x37, 0x39, 0x37, 0x42, 0x3f, 0x44, 0x38, 0x36, 0x37, 0x44,
+    0x45, 0x46, 0x41, 0x3b, 0x46, 0x42, 0x43, 0x43, 0x3a, 0x4b, 0x37, 0x35,
+    0x3b, 0x40, 0x32, 0x38, 0x41, 0x38, 0x4f, 0x3e, 0x36, 0x3f, 0x47, 0x3b,
+    0x47, 0x3b, 0x4a, 0x2e, 0x3d, 0x45, 0x3b, 0x46, 0x3e, 0x38, 0x43, 0x38,
+    0x41, 0x48, 0x3a, 0x39, 0x40, 0x45, 0x3b, 0x43, 0x40, 0x3e, 0x43, 0x41,
+    0x41, 0x3e, 0x39, 0x3f, 0x35, 0x42, 0x33, 0x3f, 0x3d, 0x32, 0x45, 0x3c,
+    0x41, 0x31, 0x45, 0x38, 0x43, 0x45, 0x41, 0x35, 0x35, 0x40, 0x44, 0x36,
+    0x3a, 0x3b, 0x3c, 0x2c, 0x3e, 0x41, 0x33, 0x3d, 0x46, 0x34, 0x3b, 0x30,
+    0x30, 0x42, 0x43, 0x3d, 0x3d, 0x3d, 0x43, 0x31, 0x3f, 0x40, 0x3a, 0x3f,
+    0x48, 0x3e, 0x3b, 0x39, 0x44, 0x43, 0x3b, 0x3a, 0x42, 0x38, 0x38, 0x3b,
+    0x3f, 0x44, 0x37, 0x3e, 0x45, 0x40, 0x41, 0x3b, 0x3c, 0x3a, 0x38, 0x37,
+    0x3b, 0x33, 0x3f, 0x35, 0x43, 0x3d, 0x33, 0x41, 0x3b, 0x46, 0x39, 0x32,
+    0x39, 0x3f, 0x3b, 0x39, 0x47, 0x3c, 0x3f, 0x39, 0x34, 0x3d, 0x3c, 0x46,
+    0x3f, 0x3e, 0x3e, 0x44, 0x34, 0x40, 0x3f, 0x39, 0x3c, 0x38, 0x36, 0x45,
+    0x42, 0x46, 0x3b, 0x44, 0x3a, 0x3d, 0x3b, 0x42, 0x3b, 0x3b, 0x3c, 0x45,
+    0x42, 0x3d, 0x36, 0x37, 0x3d, 0x43, 0x3f, 0x48, 0xa6, 0xfb, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00,
+    0x39, 0xff, 0xff, 0xff, 0xe5, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,
+    0x68, 0xfb, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00,
     0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
-    0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,
-    0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00,
-    0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00,
+    0x70, 0x03, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00,
+    0x80, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00,
+    0xa4, 0x02, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65,
+    0x6c, 0x73, 0x5f, 0x73, 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x3c, 0xfd, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x61, 0x64, 0x64, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xfd, 0xff, 0xff,
+    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x97, 0xf5, 0x3f,
+    0x01, 0x00, 0x00, 0x00, 0x87, 0x35, 0xa0, 0x43, 0x01, 0x00, 0x00, 0x00,
+    0xd6, 0xd7, 0x28, 0xc3, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
     0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00,
-    0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
     0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76,
-    0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff,
+    0x05, 0x80, 0xbf, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x85, 0xc0, 0xbe, 0x43,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x03, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
+    0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57,
+    0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72,
+    0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0xa4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
     0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-    0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68,
-    0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff,
-    0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x17, 0xac, 0x6e, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0x20, 0x4e, 0x97, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xaf, 0x27, 0x21, 0xbe,
+    0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f,
+    0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x1c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
-    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x42,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfc, 0xfe, 0xff, 0xff,
     0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-    0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
-    0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f,
-    0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75,
-    0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61,
-    0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff,
-    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a,
-    0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00,
-    0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
-    0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,
-    0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
-    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00,
-    0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3,
-    0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,
-    0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff,
-    0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x17, 0xac, 0xee, 0x39, 0x5a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,
+    0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x54, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67,
+    0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f,
+    0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74,
+    0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f,
+    0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00,
+    0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x9d, 0xaf, 0xd0, 0x3a, 0x01, 0x00, 0x00, 0x00,
+    0xe7, 0x29, 0x9e, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x5b, 0x91, 0xc3, 0xbd,
+    0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,
+    0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d,
+    0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
     0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00,
-    0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
-    0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,
-    0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e,
-    0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56,
-    0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73,
-    0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00,
-    0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
-    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00,
-    0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x62, 0x1b, 0x1c, 0x3b,
     0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,
     0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
-    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
-    0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,
     0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,
     0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
-    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
     0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,
     0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
-    0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
-    0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,
     0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,
     0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
     0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
new file mode 100644
index 0000000000..3ad29e53c8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
@@ -0,0 +1,158 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h"
+
+/* File automatically created by
+ * tensorflow/examples/speech_commands/wav_to_features.py \
+ * --sample_rate=16000 \
+ * --clip_duration_ms=1000 \
+ * --window_size_ms=30 \
+ * --window_stride_ms=20 \
+ * --feature_bin_count=40 \
+ * --quantize \
+ * --preprocess="average" \
+ * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \
+ * --output_c_file="yes_features_data.cc" \
+ */
+
+const int g_yes_f2e59fea_nohash_1_width = 43;
+const int g_yes_f2e59fea_nohash_1_height = 49;
+const unsigned char g_yes_f2e59fea_nohash_1_data[] = {
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  1,   1,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  4,   5,   1,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   2,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   1,  19, 1,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   1,   0,  1,  3,   3,   1,  1,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   8,   89, 8,   0,   0,  0,  0,   0,   0,  0,  0,   4,  13,
+    1,  6,  23,  20,  6,   4,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  19, 177, 42, 1,
+    1,  0,  0,   0,   0,   2,  3,   119, 51, 5,  139, 92,  58, 58, 15,  2,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   13, 165, 176, 3,  1,  1,   0,   0,  1,  1,   32, 214,
+    26, 19, 113, 103, 28,  22, 27,  3,   1,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  12,  55, 128,
+    27, 1,  1,   0,   1,   4,  2,   52,  93, 10, 28,  156, 10, 21, 21,  3,  3,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  14,  99,  32, 65, 7,   1,   2,  2,  6,   13, 121,
+    36, 15, 11,  112, 125, 14, 5,   13,  4,  4,  2,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   24, 25,
+    32, 5,  1,   0,   0,   0,  1,   0,   7,  5,  1,   1,   3,  3,  0,   3,  3,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   13,  13, 5,  1,   0,   0,  0,  0,   0,  3,
+    4,  1,  0,   1,   2,   3,  1,   1,   1,  4,  8,   1,   2,  1,  3,   1,  1,
+    0,  1,  1,   3,   1,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    8,  2,  1,   0,   0,   0,  0,   0,   1,  1,  0,   0,   1,  1,  2,   0,  2,
+    1,  0,  2,   0,   2,   2,  3,   1,   1,  0,  1,   1,   4,  5,  1,   0,  1,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   1,   1,  0,  1,   2,   1,  0,  1,   3,  1,
+    1,  3,  1,   1,   6,   2,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  2,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  1,   1,   0,  1,  2,   6,   2,  4,  2,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  3,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  1,
+    0,  0,  1,   2,   1,   1,  2,   1,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  4,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   2,  1,  0,   0,   2,  3,  5,   2,  0,
+    1,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   1,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   1,   2,  2,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  1,  0,   0,   0,  0,  1,   2,  3,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   1,  1,   1,   1,  0,  0,   0,   1,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,  0,
+    0,  0,  0,   0,   0,   0,  0,   0,   0,  0,  0,   0,   0,  0,  0,   0,
+};
diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h
new file mode 100644
index 0000000000..33ac230862
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h
@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_
+
+extern const int g_yes_f2e59fea_nohash_1_width;
+extern const int g_yes_f2e59fea_nohash_1_height;
+extern const unsigned char g_yes_f2e59fea_nohash_1_data[];
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
index 104509c9dc..3b6554dea6 100644
--- a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
+++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h
@@ -135,4 +135,22 @@ extern tflite::ErrorReporter* reporter;
     }                                                                 \
   } while (false)
 
+#define TF_LITE_MICRO_EXPECT_GT(x, y)                                        \
+  do {                                                                       \
+    if ((x) <= (y)) {                                                        \
+      micro_test::reporter->Report(#x " > " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                \
+      micro_test::did_test_fail = true;                                      \
+    }                                                                        \
+  } while (false)
+
+#define TF_LITE_MICRO_EXPECT_LT(x, y)                                        \
+  do {                                                                       \
+    if ((x) >= (y)) {                                                        \
+      micro_test::reporter->Report(#x " < " #y " failed at %s:%d", __FILE__, \
+                                   __LINE__);                                \
+      micro_test::did_test_fail = true;                                      \
+    }                                                                        \
+  } while (false)
+
 #endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_
diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
index 880bb4763c..3f749e53ef 100644
--- a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
+++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile
@@ -55,7 +55,9 @@ MICROLITE_LIB_NAME := libtensorflow-microlite.a
 # Test binary for the microcontroller speech model.
 MICRO_SPEECH_TEST_SRCS := \
 tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
-tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
+tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
 
 MICROLITE_TEST_SRCS := \
 $(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \
-- 
GitLab


From b55ff8747914d41f16fcfe02a7346c574dff1bb7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 06:32:38 -0700
Subject: [PATCH 0770/1085] Internal change.

PiperOrigin-RevId: 216686849
---
 .../internal/optimized/depthwiseconv_float.h  | 27 -------------------
 .../internal/optimized/depthwiseconv_uint8.h  | 27 -------------------
 2 files changed, 54 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index d8dd7bba89..bcadfb2f8c 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -841,33 +841,6 @@ inline void FloatDepthwiseConvAccumRowGeneric(
     int filter_width, const float* filter_data, int out_x_buffer_start,
     int out_x_buffer_end, int output_depth, float* acc_buffer) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)");
-#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-  LOG(FATAL)
-      << "\n\n"
-      << "*****************************************************************\n"
-      << "* This tfmini inference code was about to use the slow generic\n"
-      << "* fallback implementation for a DepthwiseConv op, and we want you\n"
-      << "* to be aware of that so that you will know why you get terrible\n"
-      << "* performance.\n"
-      << "*\n"
-      << "* If you would like to carry on with the slow code, compile\n"
-      << "* with this preprocessor token defined:\n"
-      << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n"
-      << "*\n"
-      << "* The right thing to do, if you care about performance, is to add\n"
-      << "* a new DepthwiseConv kernel to tfmini to cover your case.\n"
-      << "* The relevant parameters defining your case are:\n"
-      << "* stride = " << stride << "\n"
-      << "* input_depth = " << input_depth << "\n"
-      << "* depth_multiplier = " << depth_multiplier << "\n"
-      << "* dilation_factor = " << dilation_factor << "\n"
-      << "*\n"
-      << "* Please do not hesitate to contact benoitjacob@ with this\n"
-      << "* information.\n"
-      << "*****************************************************************\n";
-#endif  // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#endif  // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
   const float* filter_base_ptr = filter_data;
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     const int out_x_loop_start = std::max(
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index 803eff292a..eff9cab477 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1546,33 +1546,6 @@ inline void QuantizedDepthwiseConvAccumRowGeneric(
     int16 filter_offset, int out_x_buffer_start, int out_x_buffer_end,
     int output_depth, int32* acc_buffer) {
   gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)");
-#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-  LOG(FATAL)
-      << "\n\n"
-      << "*****************************************************************\n"
-      << "* This tfmini inference code was about to use the slow generic\n"
-      << "* fallback implementation for a DepthwiseConv op, and we want you\n"
-      << "* to be aware of that so that you will know why you get terrible\n"
-      << "* performance.\n"
-      << "*\n"
-      << "* If you would like to carry on with the slow code, compile\n"
-      << "* with this preprocessor token defined:\n"
-      << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n"
-      << "*\n"
-      << "* The right thing to do, if you care about performance, is to add\n"
-      << "* a new DepthwiseConv kernel to tfmini to cover your case.\n"
-      << "* The relevant parameters defining your case are:\n"
-      << "* stride = " << stride << "\n"
-      << "* input_depth = " << input_depth << "\n"
-      << "* depth_multiplier = " << depth_multiplier << "\n"
-      << "* dilation_factor = " << dilation_factor << "\n"
-      << "*\n"
-      << "* Please do not hesitate to contact benoitjacob@ with this\n"
-      << "* information.\n"
-      << "*****************************************************************\n";
-#endif  // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#endif  // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
   const uint8* filter_base_ptr = filter_data;
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     const int out_x_loop_start = std::max(
-- 
GitLab


From b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 11 Oct 2018 06:49:37 -0700
Subject: [PATCH 0771/1085] Support arbitrary many values in KeyValueSort on
 GPU backend.

PiperOrigin-RevId: 216688700
---
 .../xla/service/gpu/ir_emitter_unnested.cc    | 72 +++++++++++--------
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 -
 .../compiler/xla/service/llvm_ir/sort_util.cc | 20 +++---
 .../compiler/xla/service/llvm_ir/sort_util.h  |  5 +-
 4 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index d8ae5b46fe..fd624fda08 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h"
@@ -2192,34 +2193,34 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) {
 
 Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
   std::vector<std::unique_ptr<Thunk>> thunks;
-  auto keys = sort->operand(0);
-  auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr;
-  ShapeIndex keys_shape_index({});
-  ShapeIndex values_shape_index({});
-  if (values != nullptr) {
-    keys_shape_index = ShapeIndex({0});
-    values_shape_index = ShapeIndex({1});
-  }
-  auto keys_destination = GetAllocationSlice(*sort, keys_shape_index);
-  auto values_destination = GetAllocationSlice(*sort, values_shape_index);
-
-  if (keys_destination != GetAllocationSlice(*keys)) {
-    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
-        /*source_address=*/GetAllocationSlice(*keys),
-        /*destination_buffer=*/keys_destination,
-        /*mem_size=*/ShapeUtil::ByteSizeOf(keys->shape()), nullptr));
-  }
-  if (values != nullptr && values_destination != GetAllocationSlice(*values)) {
-    // TODO(b/26783907): Figure out why we never seem to share buffers for
-    // key/value sort.
-    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
-        /*source_address=*/GetAllocationSlice(*values),
-        /*destination_buffer=*/values_destination,
-        /*mem_size=*/ShapeUtil::ByteSizeOf(values->shape()), nullptr));
+  Shape keys_shape = sort->operand(0)->shape();
+  for (int64 i = 0; i < sort->operand_count(); ++i) {
+    ShapeIndex shape_index =
+        sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
+    // We assume that the layout of all involved operands and outputs is the
+    // same.
+    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape,
+                                                  sort->operand(i)->shape()));
+    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
+        keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index)));
+
+    // If possible, we share buffers. If that is not possible, we need to copy
+    // the values, because the emitter does the sorting in-place.
+    auto destination_buffer = GetAllocationSlice(*sort, shape_index);
+    auto source_address = GetAllocationSlice(*sort->operand(i));
+    if (destination_buffer != source_address) {
+      // TODO(b/26783907): Figure out why we never seem to share buffers for
+      // key/value sort.
+      thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+          /*source_address=*/source_address,
+          /*destination_buffer=*/destination_buffer,
+          /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()),
+          nullptr));
+    }
   }
 
   int64 dimension_to_sort = sort->dimensions(0);
-  int64 dimension_to_sort_bound = keys->shape().dimensions(dimension_to_sort);
+  int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort);
   int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound);
   auto index_type = b_.getInt64Ty();
 
@@ -2243,7 +2244,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
       thunks.push_back(
           BuildKernelThunk(sort, /*implements_whole_instruction=*/false));
       LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
-          keys->shape(), ir_emitter_context_->device_description());
+          keys_shape, ir_emitter_context_->device_description());
       UpdateLaunchDimensions(launch_dimensions, thunks.back().get(),
                              ir_emitter_context_->llvm_module());
 
@@ -2254,12 +2255,21 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
         xor_mask = llvm::ConstantInt::get(index_type, 1LL << mask);
       }
 
+      IrArray keys_array;
+      std::vector<IrArray> values_arrays;
+      values_arrays.reserve(sort->operand_count() - 1);
+      for (int64 i = 0; i < sort->operand_count(); ++i) {
+        ShapeIndex shape_index =
+            sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
+        if (i == 0) {
+          keys_array = GetIrArray(*sort, *sort, shape_index);
+        } else {
+          values_arrays.push_back(GetIrArray(*sort, *sort, shape_index));
+        }
+      }
       TF_RETURN_IF_ERROR(llvm_ir::EmitSortInPlace(
-          dimension_to_sort, GetIrArray(*sort, *sort, keys_shape_index),
-          values != nullptr ? absl::make_optional<IrArray>(
-                                  GetIrArray(*sort, *sort, values_shape_index))
-                            : absl::nullopt,
-          IrName(sort), xor_mask, &b_, &launch_dimensions));
+          dimension_to_sort, keys_array, values_arrays, IrName(sort), xor_mask,
+          &b_, &launch_dimensions));
     }
   }
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 6223a34b12..5f7ad81d82 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -204,7 +204,6 @@ cc_library(
         "//tensorflow/compiler/xla/service/gpu:partition_assignment",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/types:optional",
         "@llvm//:core",
         "@llvm//:support",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
index 944c79580c..05ba4a40da 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
@@ -15,9 +15,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/llvm_ir/sort_util.h"
 
+#include <vector>
+
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
 #include "absl/strings/string_view.h"
-#include "absl/types/optional.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -43,7 +44,7 @@ namespace {
 void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index,
                      const IrArray::Index& compare_keys_index,
                      const IrArray& keys_array,
-                     const absl::optional<IrArray>& values_array,
+                     const std::vector<IrArray>& values_arrays,
                      llvm::IRBuilder<>* b) {
   // if (is_smaller_index &&
   //     compare_keys[dimension_to_sort] < dimension_to_sort_bound)
@@ -100,19 +101,18 @@ void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index,
   // Swap key1 with key2.
   keys_array.EmitWriteArrayElement(keys_index, key2, b);
   keys_array.EmitWriteArrayElement(compare_keys_index, key1, b);
-  if (values_array.has_value()) {
+  for (const auto& values_array : values_arrays) {
     // Also swap the values.
-    auto value1 = values_array.value().EmitReadArrayElement(keys_index, b);
-    auto value2 =
-        values_array.value().EmitReadArrayElement(compare_keys_index, b);
-    values_array.value().EmitWriteArrayElement(keys_index, value2, b);
-    values_array.value().EmitWriteArrayElement(compare_keys_index, value1, b);
+    auto value1 = values_array.EmitReadArrayElement(keys_index, b);
+    auto value2 = values_array.EmitReadArrayElement(compare_keys_index, b);
+    values_array.EmitWriteArrayElement(keys_index, value2, b);
+    values_array.EmitWriteArrayElement(compare_keys_index, value1, b);
   }
 }
 }  // namespace
 
 Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const absl::optional<IrArray>& values_array,
+                       const std::vector<IrArray>& values_arrays,
                        absl::string_view name, llvm::Value* xor_mask,
                        llvm::IRBuilder<>* b,
                        const gpu::LaunchDimensions* launch_dimensions) {
@@ -162,7 +162,7 @@ Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
     compare_keys_index[dimension_to_sort] =
         b->CreateXor(compare_index[0], xor_mask);
     EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index,
-                    keys_array, values_array, b);
+                    keys_array, values_arrays, b);
     return Status::OK();
   };
   if (launch_dimensions != nullptr) {
diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
index 527ed10374..2f3bcda230 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
@@ -16,8 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
 
+#include <vector>
+
 #include "absl/strings/string_view.h"
-#include "absl/types/optional.h"
 #include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
@@ -31,7 +32,7 @@ namespace llvm_ir {
 // implements the inner loop of BitonicSort. If 'launch_dimensions' is nullptr,
 // the inner compare loop will not be parallelized.
 Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const absl::optional<IrArray>& values_array,
+                       const std::vector<IrArray>& values_arrays,
                        absl::string_view name, llvm::Value* xor_mask,
                        llvm::IRBuilder<>* b,
                        const gpu::LaunchDimensions* launch_dimensions);
-- 
GitLab


From 5eb3d92fc5d7a0641ad5d1ad2b54870b6e5b5e58 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 07:15:05 -0700
Subject: [PATCH 0772/1085] Disallow PRED as element type for arithmetic
 operations in HloVerifier

PiperOrigin-RevId: 216691593
---
 tensorflow/compiler/xla/service/shape_inference.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index aa49f98bcf..25afc23e5b 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -919,6 +919,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
   switch (opcode) {
     case HloOpcode::kMaximum:
     case HloOpcode::kMinimum:
+      return InferElementwiseBinaryOpShape(opcode, lhs, rhs,
+                                           broadcast_dimensions);
+
     case HloOpcode::kSubtract:
     case HloOpcode::kAdd:
     case HloOpcode::kAtan2:
@@ -929,6 +932,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
     case HloOpcode::kShiftLeft:
     case HloOpcode::kShiftRightArithmetic:
     case HloOpcode::kShiftRightLogical:
+      if (lhs.element_type() == PRED || rhs.element_type() == PRED) {
+        return InvalidArgument(
+            "Expected element type in shape to be arithmetic type for "
+            "operation %s; got PRED.",
+            HloOpcodeString(opcode));
+      }
       return InferElementwiseBinaryOpShape(opcode, lhs, rhs,
                                            broadcast_dimensions);
 
-- 
GitLab


From 4c080129c62d625c3f57c7d9ee6d8dab8e91ca1e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 08:06:10 -0700
Subject: [PATCH 0773/1085] Automated rollback of commit
 e5537748753491f803fbddebdcb1cdb710631db9

PiperOrigin-RevId: 216697425
---
 tensorflow/python/ops/image_ops_impl.py | 14 +++++---------
 tensorflow/python/ops/image_ops_test.py | 10 ----------
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index a5c800ed9f..1c75aab578 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1184,8 +1184,7 @@ def per_image_standardization(image):
   away from zero to protect against division by 0 when handling uniform images.
 
   Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 3-D tensor of shape `[height, width, channels]`.
 
   Returns:
     The standardized image with same shape as `image`.
@@ -1195,17 +1194,14 @@ def per_image_standardization(image):
   """
   with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
     image = ops.convert_to_tensor(image, name='image')
-    image = _AssertAtLeast3DImage(image)
-    if image.get_shape().ndims != 3 and image.get_shape().ndims != 4:
-      raise ValueError('`image` must have either 3 or 4 dimensions.')
-    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-1:-4:-1])
+    image = _Assert3DImage(image)
+    num_pixels = math_ops.reduce_prod(array_ops.shape(image))
 
     image = math_ops.cast(image, dtype=dtypes.float32)
-    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
+    image_mean = math_ops.reduce_mean(image)
 
     variance = (
-        math_ops.reduce_mean(
-            math_ops.square(image), axis=[-1, -2, -3], keepdims=True) -
+        math_ops.reduce_mean(math_ops.square(image)) -
         math_ops.square(image_mean))
     variance = gen_nn_ops.relu(variance)
     stddev = math_ops.sqrt(variance)
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 9ed3b4ff5d..ff86df6346 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -1491,16 +1491,6 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase):
       whiten_np = whiten.eval()
       self.assertFalse(np.any(np.isnan(whiten_np)))
 
-  def testBatchWhitening(self):
-    imgs_np = np.random.uniform(0., 255., [4, 24, 24, 3])
-    whiten_np = [self._NumpyPerImageWhitening(img) for img in imgs_np]
-    with self.test_session(use_gpu=True):
-      imgs = constant_op.constant(imgs_np)
-      whiten = image_ops.per_image_standardization(imgs)
-      whiten_tf = whiten.eval()
-      for w_tf, w_np in zip(whiten_tf, whiten_np):
-        self.assertAllClose(w_tf, w_np, atol=1e-4)
-
 
 class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 085c3e45863d57b10777d9e4903c36a1326b256a Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Thu, 11 Oct 2018 08:09:55 -0700
Subject: [PATCH 0774/1085] Update downloadable clang to r343880

PiperOrigin-RevId: 216697820
---
 third_party/clang_toolchain/download_clang.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/third_party/clang_toolchain/download_clang.bzl b/third_party/clang_toolchain/download_clang.bzl
index e782739661..5fa459caf1 100644
--- a/third_party/clang_toolchain/download_clang.bzl
+++ b/third_party/clang_toolchain/download_clang.bzl
@@ -39,15 +39,15 @@ def download_clang(repo_ctx, out_folder):
 
     # Latest CLANG_REVISION and CLANG_SUB_REVISION of the Chromiums's release
     # can be found in https://chromium.googlesource.com/chromium/src/tools/clang/+/master/scripts/update.py
-    CLANG_REVISION = "340427"
+    CLANG_REVISION = "343880"
     CLANG_SUB_REVISION = 1
 
     package_version = "%s-%s" % (CLANG_REVISION, CLANG_SUB_REVISION)
 
     checksums = {
-        "Linux_x64": "8a8f21fb624fc7be7e91e439a13114847185375bb932db51ba590174ecaf764b",
-        "Mac": "ba894536b7c8d37103a5ddba784f268d55e65bb2ea1200a2cf9f2ef1590eaacd",
-        "Win": "c3f5bd977266dfd011411c94a13e00974b643b70fb0225a5fb030f7f703fa474",
+        "Linux_x64": "3530f53516fd08799e2754601e53a19531e1db5bc73c9ad8d2d1d8efdd9c9c9b",
+        "Mac": "8761b47869089be216324af8c5a93cba2d539a1d252c9c8cad8f2cd6da21f9f4",
+        "Win": "06eb08aa0b1ff7ea65db375a7dc7151cde7c89a44044fb63e5b73ea2f96c6e65",
     }
 
     platform_folder = _get_platform_folder(repo_ctx.os.name)
-- 
GitLab


From 0b1ce4d6f5912c7d749e20207402ee871598b21a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 09:05:09 -0700
Subject: [PATCH 0775/1085] Automated rollback of commit
 9b558126e31d25ec4e82cb4f50033d6eca44349a

PiperOrigin-RevId: 216705354
---
 .../grappler/optimizers/graph_optimizer.h     | 21 ------
 .../grappler/optimizers/meta_optimizer.cc     | 70 +------------------
 .../core/grappler/optimizers/meta_optimizer.h | 15 +---
 .../optimizers/meta_optimizer_test.cc         | 62 ----------------
 .../core/protobuf/rewriter_config.proto       |  4 --
 5 files changed, 5 insertions(+), 167 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index bd6bf9f860..765dd13263 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -16,11 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_GRAPH_OPTIMIZER_H_
 
-#include <atomic>
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -32,7 +29,6 @@ struct GrapplerItem;
 // optimization of a GrapplerItem for running on a cluster.
 class GraphOptimizer {
  public:
-  GraphOptimizer() : is_cancelled_(false) {}
   virtual ~GraphOptimizer() {}
 
   virtual string name() const = 0;
@@ -49,25 +45,8 @@ class GraphOptimizer {
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
-
-  // Best effort cancellation. Sets is_cancelled to true and requests that the
-  // optimizer returns as soon as possible from active calls to Optimize() or
-  // FeedBack().
-  void Cancel() { is_cancelled_ = true; }
-
-  bool is_cancelled() const { return is_cancelled_; }
-
- private:
-  std::atomic<bool> is_cancelled_;
 };
 
-#define GRAPPLER_RETURN_IF_CANCELLED()                                  \
-  do {                                                                  \
-    if (is_cancelled()) {                                               \
-      return errors::DeadlineExceeded(this->name(), " was cancelled."); \
-    }                                                                   \
-  } while (0)
-
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index b8f4599f56..c3d70a1fdf 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -14,9 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/meta_optimizer.h"
-
-#include <memory>
-
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
@@ -40,11 +37,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
-#include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/platform/notification.h"
-#include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
@@ -121,23 +114,6 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 #undef MK_OPT
 
-MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
-    : cpu_device_(cpu_device), cfg_(cfg) {
-  DCHECK(cpu_device_ == nullptr ||
-         cpu_device_->attributes().device_type() == "CPU");
-  // TODO(rmlarsen): Increase kNumThreads to, say, port::NumSchedulableCPUs()
-  // if we want to the threadpool for parallelizing Grappler
-  const int kNumThreads = 1;
-  thread_pool_ = absl::make_unique<thread::ThreadPool>(
-      Env::Default(), "MetaOptimizerThreadPool", kNumThreads);
-}
-
-MetaOptimizer::~MetaOptimizer() {
-  // The ThreadPool destructor waits for threads to finish, so we don't
-  // pull the rug out from under them.
-  thread_pool_.reset();
-}
-
 Status MetaOptimizer::InitializeOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   if (cfg_.disable_meta_optimizer()) {
@@ -333,7 +309,6 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
 
     VLOG(4) << "Starting optimization iteration " << iteration;
     for (const auto& optimizer : optimizers) {
-      GRAPPLER_RETURN_IF_CANCELLED();
       // Some optimizers can run only once.
       if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue;
       // Some must run only on the last iteration.
@@ -392,7 +367,6 @@ Status MetaOptimizer::RunOptimizer(
   // resets optimized_graph to an empty graph.
   optimized_graph->Swap(&optimized_item->graph);
   *optimized_graph = GraphDef();
-  // TODO(rmlarsen): Add timeout for individual optimizers.
   Status status =
       optimizer->Optimize(cluster, *optimized_item, optimized_graph);
   uint64 end_us = Env::Default()->NowMicros();
@@ -414,15 +388,14 @@ Status MetaOptimizer::RunOptimizer(
   return status;
 }
 
-Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary(
-    Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) {
+Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
+                               GraphDef* optimized_graph) {
   VLOG(1) << "Starting optimization for grappler item: " << item.id;
   optimization_results_.clear();
 
   // 1. Optimize main graph
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
-  GRAPPLER_RETURN_IF_CANCELLED();
 
   // Skip optimizing functions if this is a TPU graph. Currently, Grappler
   // passes do not handle TPU functions correctly in a variety of ways (Note
@@ -458,8 +431,6 @@ Status MetaOptimizer::OptimizeMainGraphAndFunctionLibrary(
     optimize_function_library = false;
 
     for (const FunctionDef& func : optimized_graph->library().function()) {
-      GRAPPLER_RETURN_IF_CANCELLED();
-
       const string& func_name = func.signature().name();
 
       // Skip already optimized functions.
@@ -534,43 +505,6 @@ void MetaOptimizer::PrintResult() {
   }
 }
 
-Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
-                               GraphDef* optimized_graph) {
-  const int64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000;
-  const int64 timeout_usec = (cfg_.meta_optimizer_timeout_ms() == 0
-                                  ? kFiveMinutesInUsec
-                                  : cfg_.meta_optimizer_timeout_ms() * 1000);
-  if (timeout_usec < 0) {
-    return OptimizeMainGraphAndFunctionLibrary(cluster, item, optimized_graph);
-  }
-
-  GraphDef optimized_with_timeout;
-  Status status;
-  Notification done;
-  thread_pool_->Schedule(
-      [this, cluster, &done, &optimized_with_timeout, &item, &status]() {
-        status = this->OptimizeMainGraphAndFunctionLibrary(
-            cluster, item, &optimized_with_timeout);
-        done.Notify();
-      });
-
-  const bool notified = WaitForNotificationWithTimeout(&done, timeout_usec);
-  if (notified && status.ok()) {
-    optimized_graph->Swap(&optimized_with_timeout);
-  } else {
-    *optimized_graph = item.graph;
-    if (!notified) {
-      this->Cancel();
-      done.WaitForNotification();
-      status = errors::DeadlineExceeded(
-          "Grappler MetaOptimizer timed out after ",
-          static_cast<float>(timeout_usec) / (1000 * 1000), " seconds");
-      LOG(WARNING) << status.error_message();
-    }
-  }
-  return status;
-}
-
 void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
                              const GraphDef& pruned_graph, double result) {
   // Nothing to do for MetaOptimizer.
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 35d6a4559b..99a0a33ffa 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
 namespace tensorflow {
@@ -29,8 +28,9 @@ namespace grappler {
 // Run the other grappler optimizers based on the specified rewriter config.
 class MetaOptimizer : public GraphOptimizer {
  public:
-  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg);
-  ~MetaOptimizer();
+  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
+      : cpu_device_(cpu_device), cfg_(cfg) {}
+  ~MetaOptimizer() override = default;
 
   string name() const override { return "meta_optimizer"; };
 
@@ -65,18 +65,9 @@ class MetaOptimizer : public GraphOptimizer {
   Status OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
                        GraphDef* optimized_graph);
 
-  // Run optimization passes over the main graph and for functions in the
-  // function library.
-  Status OptimizeMainGraphAndFunctionLibrary(Cluster* cluster,
-                                             const GrapplerItem& item,
-                                             GraphDef* optimized_graph);
-
   DeviceBase* const cpu_device_;  // may be NULL
   RewriterConfig cfg_;
 
-  // Thread pool used for launching optimizers asynchronously.
-  std::unique_ptr<thread::ThreadPool> thread_pool_;
-
   struct OptimizerResult {
     string optimizer_name;
     string result;
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 7f1dd91f09..3f3f43382f 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -461,68 +461,6 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
   EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
 }
 
-class SleepingOptimizer : public CustomGraphOptimizer {
- public:
-  SleepingOptimizer() {}
-  string name() const override { return "test_optimizer"; }
-
-  Status Init(
-      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
-    return Status::OK();
-  }
-
-  Status Optimize(Cluster* cluster, const GrapplerItem& item,
-                  GraphDef* optimized_graph) override {
-    *optimized_graph = item.graph;
-    optimized_graph->add_node();
-    sleep(1);
-    return Status::OK();
-  }
-
-  void Feedback(Cluster* cluster, const GrapplerItem& item,
-                const GraphDef& optimized_graph, double result) override {}
-};
-
-REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer);
-
-TEST_F(MetaOptimizerTest, OptimizerTimesOut) {
-  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
-  GrapplerItem item;
-  CHECK(fake_input.NextItem(&item));
-
-  RewriterConfig rewriter_config;
-  rewriter_config.add_optimizers("SleepingOptimizer");
-  rewriter_config.set_min_graph_nodes(-1);
-  rewriter_config.set_meta_optimizer_timeout_ms(1500);
-  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
-
-  MetaOptimizer optimizer(nullptr, rewriter_config);
-  GraphDef output;
-  const Status status = optimizer.Optimize(nullptr, item, &output);
-  EXPECT_EQ(status.error_message(),
-            "Grappler MetaOptimizer timed out after 1.5 seconds");
-  // Make sure the graph was reverted to the original regardless of when the
-  // optimizer timed out.
-  CompareGraphs(item.graph, output);
-}
-
-TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) {
-  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
-  GrapplerItem item;
-  CHECK(fake_input.NextItem(&item));
-
-  RewriterConfig rewriter_config;
-  rewriter_config.add_optimizers("SleepingOptimizer");
-  rewriter_config.set_min_graph_nodes(-1);
-  rewriter_config.set_meta_optimizer_timeout_ms(1500);
-  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE);
-  MetaOptimizer optimizer(nullptr, rewriter_config);
-  GraphDef output;
-  const Status status = optimizer.Optimize(nullptr, item, &output);
-  TF_EXPECT_OK(status);
-  EXPECT_EQ(item.graph.node_size() + 1, output.node_size());
-}
-
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 7ccd54b818..8c31468ff5 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -83,10 +83,6 @@ message RewriterConfig {
   // Controls how many times we run the optimizers in meta optimizer (default
   // is once).
   NumIterationsType meta_optimizer_iterations = 12;
-  // Maximum number of milliseconds to spend optimizing a single graph before
-  // timing out. If equal to 0 the system picks a default (currently 5 minutes).
-  // If less than 0 the optimizer will never time out.
-  int64 meta_optimizer_timeout_ms = 20;
 
   // The minimum number of nodes in a graph to optimizer. For smaller graphs,
   // optimization is skipped.
-- 
GitLab


From 7ac4860203f85a5d42cac32d1585f91fc6345e9c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 09:05:20 -0700
Subject: [PATCH 0776/1085] Raise the VLOG level for CalculateTensorSize
 information.

PiperOrigin-RevId: 216705389
---
 tensorflow/core/grappler/costs/utils.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 87b74e2952..7d868a3679 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -209,7 +209,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   // Can't infer the size if the rank is unknown. It has to be at least a
   // scalar though.
   if (shape.unknown_rank()) {
-    VLOG(1) << "CalculateTensorSize() -- unknown rank";
+    VLOG(2) << "CalculateTensorSize() -- unknown rank";
     return size;
   }
 
@@ -217,7 +217,7 @@ int64 CalculateTensorSize(const OpInfo::TensorProperties& prop) {
   for (int i = 0; i < shape.dim_size(); ++i) {
     if (shape.dim(i).size() < 0) {
       shape.mutable_dim(i)->set_size(1);
-      VLOG(1) << "CalculateTensorSize() -- unknown dim: " << i;
+      VLOG(2) << "CalculateTensorSize() -- unknown dim: " << i;
     }
   }
 
-- 
GitLab


From ac54c4591d1972472195ea06151946991dc550dc Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 11 Oct 2018 09:13:02 -0700
Subject: [PATCH 0777/1085] Account for the possibility of nested lambda
 functions.

PiperOrigin-RevId: 216706627
---
 .../pyct/static_analysis/activity.py          | 38 ++++++++++---------
 .../pyct/static_analysis/activity_test.py     | 11 ++++++
 .../python/autograph/pyct/transformer.py      |  6 ++-
 3 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 0ce410d522..0b95b714fb 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -133,6 +133,14 @@ class Scope(object):
       self.parent.mark_returned(name)
 
 
+class _Lambda(object):
+
+  no_root = True
+
+  def __init__(self):
+    self.args = set()
+
+
 class ActivityAnalyzer(transformer.Base):
   """Annotates nodes with local scope information.
 
@@ -151,11 +159,8 @@ class ActivityAnalyzer(transformer.Base):
     # leaves in the AST, that is, they cannot contain other statements.
     self._in_return_statement = False
     self._in_aug_assign = False
-    self._in_lambda = False
     self._in_function_def_args = False
 
-    self._untracked_symbols = None
-
   @property
   def _in_constructor(self):
     if len(self.enclosing_entities) > 1:
@@ -179,11 +184,12 @@ class ActivityAnalyzer(transformer.Base):
       return
     qn = anno.getanno(node, anno.Basic.QN)
 
-    # Ignore any untracked symbols.
-    if self._untracked_symbols:
-      if qn in self._untracked_symbols:
+    # When inside a lambda, ignore any of the lambda's arguments.
+    # This includes attributes or slices of those arguments.
+    for l in self.state[_Lambda]:
+      if qn in l.args:
         return
-      if qn.owner_set & set(self._untracked_symbols):
+      if qn.owner_set & set(l.args):
         return
 
     if isinstance(node.ctx, gast.Store):
@@ -199,11 +205,11 @@ class ActivityAnalyzer(transformer.Base):
         # In function defs have the meaning of defining a variable.
         self.scope.mark_modified(qn)
         self.scope.mark_param(qn, self.enclosing_entities[-1])
-      elif self._in_lambda:
-        assert isinstance(self._untracked_symbols, set)
-        self._untracked_symbols.add(qn)
+      elif self.state[_Lambda].level:
+        # In lambdas, they are tracked separately.
+        self.state[_Lambda].args.add(qn)
       else:
-        # TODO(mdan): Is this case even possible?
+        # TODO(mdan): Is this case possible at all?
         raise NotImplementedError(
             'Param "{}" outside a function arguments or lambda.'.format(qn))
     else:
@@ -317,12 +323,10 @@ class ActivityAnalyzer(transformer.Base):
     return parent
 
   def visit_Lambda(self, node):
-    assert not self._in_lambda or self._in_function_def_args
-    self._in_lambda = True
-    self._untracked_symbols = set()
+    assert not self._in_function_def_args
+    self.state[_Lambda].enter()
     node = self.generic_visit(node)
-    self._untracked_symbols = None
-    self._in_lambda = False
+    self.state[_Lambda].exit()
     return node
 
   def visit_arguments(self, node):
@@ -339,7 +343,7 @@ class ActivityAnalyzer(transformer.Base):
 
     # A separate Scope tracks the actual function definition.
     self._enter_scope(True)
-    assert not self._in_function_def_args
+    assert not (self._in_function_def_args or self.state[_Lambda].level)
     self._in_function_def_args = True
     node.args = self.visit(node.args)
     self._in_function_def_args = False
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index 678199970c..65267751c1 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -461,6 +461,17 @@ class ActivityAnalyzerTest(test.TestCase):
     self.assertScopeIs(body_scope, ('b', 'd'), ('a',))
     self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
 
+  def test_lambda_nested(self):
+
+    def test_fn(a, b, c, d, e):  # pylint: disable=unused-argument
+      a = lambda a, b: d(lambda b: a + b + c)  # pylint: disable=undefined-variable
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node.body[0]
+    body_scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(body_scope, ('c', 'd'), ('a',))
+    self.assertSymbolSetsAre((), body_scope.params.keys(), 'params')
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/pyct/transformer.py b/tensorflow/python/autograph/pyct/transformer.py
index 520f5038da..7827e6e9cd 100644
--- a/tensorflow/python/autograph/pyct/transformer.py
+++ b/tensorflow/python/autograph/pyct/transformer.py
@@ -92,7 +92,8 @@ class _StateStack(object):
     # the superclass' setattr.
     object.__setattr__(self, 'type', type_)
     object.__setattr__(self, '_stack', [])
-    self.enter()
+    if not hasattr(type_, 'no_root'):
+      self.enter()
 
   def enter(self):
     self._stack.append(self.type())
@@ -108,6 +109,9 @@ class _StateStack(object):
   def value(self):
     return self._stack[-1]
 
+  def __iter__(self):
+    return iter(self._stack)
+
   def __getattr__(self, key):
     return getattr(self._stack[-1], key)
 
-- 
GitLab


From 0d054f20851f6156b1af26c35e68f6083bca8e13 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Thu, 11 Oct 2018 09:14:14 -0700
Subject: [PATCH 0778/1085] Have TensorFlow opensource code use
 tensorflow_estimator module if available.

PiperOrigin-RevId: 216706771
---
 tensorflow/BUILD                              |    1 -
 tensorflow/api_template.__init__.py           |   17 +-
 tensorflow/contrib/estimator/BUILD            |  337 +-
 tensorflow/contrib/estimator/__init__.py      |   46 +-
 .../estimator/python/estimator/baseline.py    |   92 +-
 .../python/estimator/baseline_test.py         |  436 --
 .../python/estimator/boosted_trees.py         |  419 +-
 .../python/estimator/boosted_trees_test.py    |  438 --
 .../contrib/estimator/python/estimator/dnn.py |  156 +-
 .../python/estimator/dnn_linear_combined.py   |  176 +-
 .../estimator/dnn_linear_combined_test.py     |  227 -
 .../estimator/python/estimator/dnn_test.py    |  171 -
 .../estimator/dnn_with_layer_annotations.py   |  430 +-
 .../dnn_with_layer_annotations_test.py        |  611 ---
 .../python/estimator/early_stopping.py        |  498 +-
 .../python/estimator/early_stopping_test.py   |  246 -
 .../estimator/python/estimator/export.py      |  213 +-
 .../estimator/python/estimator/export_test.py |  373 --
 .../estimator/python/estimator/exporter.py    |  270 +-
 .../python/estimator/exporter_test.py         |  206 -
 .../estimator/python/estimator/extenders.py   |  349 +-
 .../python/estimator/extenders_test.py        |  426 --
 .../estimator/python/estimator/head.py        |  969 +---
 .../estimator/python/estimator/head_test.py   | 1482 ------
 .../estimator/python/estimator/hooks.py       |  277 +-
 .../estimator/python/estimator/hooks_test.py  |  403 --
 .../estimator/python/estimator/linear.py      |  130 +-
 .../estimator/python/estimator/linear_test.py |  156 -
 .../estimator/python/estimator/logit_fns.py   |   86 +-
 .../python/estimator/logit_fns_test.py        |   95 -
 .../estimator/python/estimator/multi_head.py  |  416 +-
 .../python/estimator/multi_head_test.py       |  705 ---
 .../python/estimator/replicate_model_fn.py    |  820 +---
 .../estimator/replicate_model_fn_test.py      | 1649 -------
 .../contrib/estimator/python/estimator/rnn.py |  572 +--
 .../estimator/python/estimator/rnn_test.py    | 1185 -----
 .../python/estimator/saved_model_estimator.py |  441 +-
 .../estimator/saved_model_estimator_test.py   |  369 --
 tensorflow/python/estimator/BUILD             |  424 +-
 tensorflow/python/estimator/__init__.py       |   17 +-
 .../python/estimator/canned/__init__.py       |   32 +
 .../python/estimator/canned/baseline.py       |  366 +-
 .../python/estimator/canned/baseline_test.py  | 1558 -------
 .../python/estimator/canned/boosted_trees.py  | 1558 +------
 .../estimator/canned/boosted_trees_test.py    | 2549 -----------
 .../estimator/canned/boosted_trees_utils.py   |   72 +-
 .../canned/boosted_trees_utils_test.py        |  187 -
 tensorflow/python/estimator/canned/dnn.py     |  652 +--
 .../estimator/canned/dnn_linear_combined.py   |  626 +--
 .../canned/dnn_linear_combined_test.py        | 1123 -----
 .../python/estimator/canned/dnn_test.py       |  580 ---
 .../estimator/canned/dnn_testing_utils.py     | 2068 +--------
 tensorflow/python/estimator/canned/head.py    | 1593 +------
 .../python/estimator/canned/head_test.py      | 4056 -----------------
 tensorflow/python/estimator/canned/linear.py  |  535 +--
 .../python/estimator/canned/linear_test.py    |  255 --
 .../estimator/canned/linear_testing_utils.py  | 2349 +---------
 .../python/estimator/canned/metric_keys.py    |   46 +-
 .../python/estimator/canned/optimizers.py     |   72 +-
 .../estimator/canned/optimizers_test.py       |  103 -
 .../python/estimator/canned/parsing_utils.py  |  296 +-
 .../estimator/canned/parsing_utils_test.py    |  211 -
 .../estimator/canned/prediction_keys.py       |   29 +-
 tensorflow/python/estimator/estimator.py      | 2167 +--------
 tensorflow/python/estimator/estimator_lib.py  |   45 +-
 tensorflow/python/estimator/estimator_test.py | 3280 -------------
 .../python/estimator/export/__init__.py       |   32 +
 tensorflow/python/estimator/export/export.py  |  628 +--
 .../python/estimator/export/export_lib.py     |   24 +-
 .../python/estimator/export/export_output.py  |  407 +-
 .../estimator/export/export_output_test.py    |  397 --
 .../python/estimator/export/export_test.py    |  802 ----
 tensorflow/python/estimator/exporter.py       |  498 +-
 tensorflow/python/estimator/exporter_test.py  |  400 --
 tensorflow/python/estimator/gc.py             |  199 +-
 tensorflow/python/estimator/gc_test.py        |  156 -
 .../python/estimator/inputs/__init__.py       |   32 +
 tensorflow/python/estimator/inputs/inputs.py  |   19 +-
 .../python/estimator/inputs/numpy_io.py       |  217 +-
 .../python/estimator/inputs/numpy_io_test.py  |  620 ---
 .../python/estimator/inputs/pandas_io.py      |  147 +-
 .../python/estimator/inputs/pandas_io_test.py |  320 --
 .../estimator/inputs/queues/__init__.py       |   32 +
 .../inputs/queues/feeding_functions.py        |  507 +--
 .../inputs/queues/feeding_functions_test.py   |  391 --
 .../inputs/queues/feeding_queue_runner.py     |  172 +-
 .../queues/feeding_queue_runner_test.py       |  140 -
 tensorflow/python/estimator/keras.py          |  492 +-
 tensorflow/python/estimator/keras_test.py     |  805 ----
 tensorflow/python/estimator/model_fn.py       |  510 +--
 tensorflow/python/estimator/model_fn_test.py  |  661 ---
 tensorflow/python/estimator/run_config.py     |  907 +---
 .../python/estimator/run_config_test.py       | 1235 -----
 tensorflow/python/estimator/training.py       | 1065 +----
 tensorflow/python/estimator/training_test.py  | 2198 ---------
 tensorflow/python/estimator/util.py           |  143 +-
 tensorflow/python/estimator/util_test.py      |  102 -
 tensorflow/python/feature_column/BUILD        |    2 +-
 tensorflow/python/tools/api/generator/BUILD   |   18 -
 ...rflow.estimator.-baseline-classifier.pbtxt |    4 +-
 ...orflow.estimator.-baseline-regressor.pbtxt |    4 +-
 .../tensorflow.estimator.-best-exporter.pbtxt |    4 +-
 ....estimator.-boosted-trees-classifier.pbtxt |    6 +-
 ...w.estimator.-boosted-trees-regressor.pbtxt |    6 +-
 ...nsorflow.estimator.-d-n-n-classifier.pbtxt |    4 +-
 ...or.-d-n-n-linear-combined-classifier.pbtxt |    4 +-
 ...tor.-d-n-n-linear-combined-regressor.pbtxt |    4 +-
 ...ensorflow.estimator.-d-n-n-regressor.pbtxt |    4 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |    4 +-
 .../v1/tensorflow.estimator.-estimator.pbtxt  |    2 +-
 .../v1/tensorflow.estimator.-eval-spec.pbtxt  |    4 +-
 .../v1/tensorflow.estimator.-exporter.pbtxt   |    2 +-
 ...tensorflow.estimator.-final-exporter.pbtxt |    4 +-
 ...ensorflow.estimator.-latest-exporter.pbtxt |    4 +-
 ...sorflow.estimator.-linear-classifier.pbtxt |    4 +-
 ...nsorflow.estimator.-linear-regressor.pbtxt |    4 +-
 .../v1/tensorflow.estimator.-mode-keys.pbtxt  |    2 +-
 .../v1/tensorflow.estimator.-run-config.pbtxt |    2 +-
 .../v1/tensorflow.estimator.-train-spec.pbtxt |    4 +-
 ...rflow.estimator.-warm-start-settings.pbtxt |    4 +-
 ...imator.export.-classification-output.pbtxt |    4 +-
 ...flow.estimator.export.-export-output.pbtxt |    2 +-
 ...low.estimator.export.-predict-output.pbtxt |    4 +-
 ....estimator.export.-regression-output.pbtxt |    4 +-
 ...mator.export.-serving-input-receiver.pbtxt |    4 +-
 ...xport.-tensor-serving-input-receiver.pbtxt |    4 +-
 ...rflow.estimator.-baseline-classifier.pbtxt |    4 +-
 ...orflow.estimator.-baseline-regressor.pbtxt |    4 +-
 .../tensorflow.estimator.-best-exporter.pbtxt |    4 +-
 ....estimator.-boosted-trees-classifier.pbtxt |    6 +-
 ...w.estimator.-boosted-trees-regressor.pbtxt |    6 +-
 ...nsorflow.estimator.-d-n-n-classifier.pbtxt |    4 +-
 ...or.-d-n-n-linear-combined-classifier.pbtxt |    4 +-
 ...tor.-d-n-n-linear-combined-regressor.pbtxt |    4 +-
 ...ensorflow.estimator.-d-n-n-regressor.pbtxt |    4 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |    4 +-
 .../v2/tensorflow.estimator.-estimator.pbtxt  |    2 +-
 .../v2/tensorflow.estimator.-eval-spec.pbtxt  |    4 +-
 .../v2/tensorflow.estimator.-exporter.pbtxt   |    2 +-
 ...tensorflow.estimator.-final-exporter.pbtxt |    4 +-
 ...ensorflow.estimator.-latest-exporter.pbtxt |    4 +-
 ...sorflow.estimator.-linear-classifier.pbtxt |    4 +-
 ...nsorflow.estimator.-linear-regressor.pbtxt |    4 +-
 .../v2/tensorflow.estimator.-mode-keys.pbtxt  |    2 +-
 .../v2/tensorflow.estimator.-run-config.pbtxt |    2 +-
 .../v2/tensorflow.estimator.-train-spec.pbtxt |    4 +-
 ...rflow.estimator.-warm-start-settings.pbtxt |    4 +-
 ...imator.export.-classification-output.pbtxt |    4 +-
 ...flow.estimator.export.-export-output.pbtxt |    2 +-
 ...low.estimator.export.-predict-output.pbtxt |    4 +-
 ....estimator.export.-regression-output.pbtxt |    4 +-
 ...mator.export.-serving-input-receiver.pbtxt |    4 +-
 ...xport.-tensor-serving-input-receiver.pbtxt |    4 +-
 tensorflow/tools/pip_package/setup.py         |    1 +
 154 files changed, 918 insertions(+), 56307 deletions(-)
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/baseline_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/export_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/exporter_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/extenders_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/head_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/hooks_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/linear_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/multi_head_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/rnn_test.py
 delete mode 100644 tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
 delete mode 100644 tensorflow/python/estimator/canned/baseline_test.py
 delete mode 100644 tensorflow/python/estimator/canned/boosted_trees_test.py
 delete mode 100644 tensorflow/python/estimator/canned/boosted_trees_utils_test.py
 delete mode 100644 tensorflow/python/estimator/canned/dnn_linear_combined_test.py
 delete mode 100644 tensorflow/python/estimator/canned/dnn_test.py
 delete mode 100644 tensorflow/python/estimator/canned/head_test.py
 delete mode 100644 tensorflow/python/estimator/canned/linear_test.py
 delete mode 100644 tensorflow/python/estimator/canned/optimizers_test.py
 delete mode 100644 tensorflow/python/estimator/canned/parsing_utils_test.py
 delete mode 100644 tensorflow/python/estimator/estimator_test.py
 delete mode 100644 tensorflow/python/estimator/export/export_output_test.py
 delete mode 100644 tensorflow/python/estimator/export/export_test.py
 delete mode 100644 tensorflow/python/estimator/exporter_test.py
 delete mode 100644 tensorflow/python/estimator/gc_test.py
 delete mode 100644 tensorflow/python/estimator/inputs/numpy_io_test.py
 delete mode 100644 tensorflow/python/estimator/inputs/pandas_io_test.py
 delete mode 100644 tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
 delete mode 100644 tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
 delete mode 100644 tensorflow/python/estimator/keras_test.py
 delete mode 100644 tensorflow/python/estimator/model_fn_test.py
 delete mode 100644 tensorflow/python/estimator/run_config_test.py
 delete mode 100644 tensorflow/python/estimator/training_test.py
 delete mode 100644 tensorflow/python/estimator/util_test.py

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 9b62a50452..8f4927324b 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -553,7 +553,6 @@ gen_api_init_files(
 
 py_library(
     name = "tensorflow_py",
-    srcs = ["//tensorflow/python/estimator/api:estimator_python_api_gen"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 2de740e145..65172fd74a 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -23,18 +23,11 @@ import os as _os
 # pylint: disable=g-bad-import-order
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 
-try:
-  # Add `estimator` attribute to allow access to estimator APIs via
-  # "tf.estimator..."
-  from tensorflow.python.estimator.api import estimator  # pylint: disable=g-import-not-at-top
-
-  # Add `estimator` to the __path__ to allow "from tensorflow.estimator..."
-  # style imports.
-  from tensorflow.python.estimator import api as estimator_api  # pylint: disable=g-import-not-at-top
-  __path__ += [_os.path.dirname(estimator_api.__file__)]
-  del estimator_api
-except (ImportError, AttributeError):
-  print('tf.estimator package not installed.')
+from tensorflow.python.tools import component_api_helper
+component_api_helper.package_hook(
+    parent_package_str=__name__,
+    child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
+del component_api_helper
 
 # API IMPORTS PLACEHOLDER
 
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 1ea00fb7f3..8b99158b30 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -8,6 +8,7 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+# PLACEHOLDER PIP REQUIREMENTS
 
 py_library(
     name = "estimator_py",
@@ -20,6 +21,7 @@ py_library(
         ":dnn_linear_combined",
         ":dnn_with_layer_annotations",
         ":early_stopping",
+        ":expect_tensorflow_estimator_installed",
         ":export",
         ":exporter",
         ":extenders",
@@ -32,6 +34,7 @@ py_library(
         ":rnn",
         ":saved_model_estimator",
         "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
@@ -40,98 +43,41 @@ py_library(
     srcs = ["python/estimator/baseline.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:baseline",
     ],
 )
 
-py_test(
-    name = "baseline_test",
-    size = "small",
-    srcs = ["python/estimator/baseline_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",
-    ],
-    deps = [
-        ":baseline",
-        ":head",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:metric_keys",
-        "//tensorflow/python/estimator:numpy_io",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "boosted_trees",
     srcs = ["python/estimator/boosted_trees.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:boosted_trees",
     ],
 )
 
-py_test(
-    name = "boosted_trees_test",
-    size = "medium",
-    srcs = ["python/estimator/boosted_trees_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",
-    ],
-    deps = [
-        ":boosted_trees",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:numpy_io",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_library(
     name = "dnn",
     srcs = ["python/estimator/dnn.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:dnn",
     ],
 )
 
-py_test(
-    name = "dnn_test",
-    size = "medium",
-    srcs = ["python/estimator/dnn_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",
-        "optonly",  # times out http://b/79220679
-    ],
-    deps = [
-        ":dnn",
-        ":head",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:dnn_testing_utils",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:prediction_keys",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "dnn_with_layer_annotations",
     srcs = ["python/estimator/dnn_with_layer_annotations.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:head",
@@ -140,64 +86,18 @@ py_library(
     ],
 )
 
-py_test(
-    name = "dnn_with_layer_annotations_test",
-    size = "medium",
-    srcs = ["python/estimator/dnn_with_layer_annotations_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",  # b/67510291
-    ],
-    deps = [
-        ":dnn_with_layer_annotations",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:dnn",
-        "//tensorflow/python/estimator:dnn_testing_utils",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:pandas_io",
-        "//tensorflow/python/estimator:prediction_keys",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "dnn_linear_combined",
     srcs = ["python/estimator/dnn_linear_combined.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:dnn_linear_combined",
     ],
 )
 
-py_test(
-    name = "dnn_linear_combined_test",
-    size = "medium",
-    srcs = ["python/estimator/dnn_linear_combined_test.py"],
-    shard_count = 3,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",
-    ],
-    deps = [
-        ":dnn_linear_combined",
-        ":head",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:dnn_testing_utils",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:linear_testing_utils",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:prediction_keys",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "extenders",
     srcs = [
@@ -205,6 +105,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:model_fn",
@@ -213,23 +114,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "extenders_test",
-    size = "medium",
-    srcs = ["python/estimator/extenders_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],  # b/62863147
-    deps = [
-        ":extenders",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/contrib/data/python/ops:dataset_ops",
-        "//tensorflow/contrib/predictor",
-        "//tensorflow/python/estimator:estimator_py",
-        "//tensorflow/python/estimator:linear",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_library(
     name = "export",
     srcs = [
@@ -237,22 +121,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python/estimator:model_fn",
-    ],
-)
-
-py_test(
-    name = "export_test",
-    size = "medium",
-    srcs = ["python/estimator/export_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],  # b/62863147
-    deps = [
-        ":export",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:export_output",
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator:model_fn",
     ],
 )
@@ -264,24 +133,12 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:exporter",
     ],
 )
 
-py_test(
-    name = "exporter_test",
-    size = "medium",
-    srcs = ["python/estimator/exporter_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":exporter",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator",
-        "//tensorflow/python/estimator:exporter",
-    ],
-)
-
 py_library(
     name = "head",
     srcs = [
@@ -289,6 +146,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:head",
@@ -298,22 +156,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "head_test",
-    size = "medium",
-    srcs = ["python/estimator/head_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":head",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:metric_keys",
-        "//tensorflow/python/estimator:model_fn",
-        "//tensorflow/python/estimator:prediction_keys",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "hooks",
     srcs = [
@@ -321,58 +163,23 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
-py_test(
-    name = "hooks_test",
-    size = "medium",
-    srcs = ["python/estimator/hooks_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":hooks",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:estimator_py",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "linear",
     srcs = ["python/estimator/linear.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:linear",
     ],
 )
 
-py_test(
-    name = "linear_test",
-    size = "medium",
-    srcs = ["python/estimator/linear_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",
-    ],
-    deps = [
-        ":head",
-        ":linear",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:linear_testing_utils",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:prediction_keys",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "logit_fns",
     srcs = [
@@ -380,24 +187,13 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:dnn",
         "//tensorflow/python/estimator:linear",
     ],
 )
 
-py_test(
-    name = "logit_fns_test",
-    size = "small",
-    srcs = ["python/estimator/logit_fns_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":logit_fns",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:model_fn",
-    ],
-)
-
 py_library(
     name = "multi_head",
     srcs = [
@@ -405,6 +201,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:head",
@@ -414,23 +211,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "multi_head_test",
-    size = "small",
-    srcs = ["python/estimator/multi_head_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":head",
-        ":multi_head",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:metric_keys",
-        "//tensorflow/python/estimator:model_fn",
-        "//tensorflow/python/estimator:prediction_keys",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "replicate_model_fn",
     srcs = [
@@ -438,6 +218,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
@@ -446,35 +227,12 @@ py_library(
     ],
 )
 
-cuda_py_test(
-    name = "replicate_model_fn_test",
-    size = "medium",
-    srcs = ["python/estimator/replicate_model_fn_test.py"],
-    additional_deps = [
-        "@absl_py//absl/testing:parameterized",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator",
-        "//tensorflow/python/estimator:dnn",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:export_output",
-        "//tensorflow/python/estimator:model_fn",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:optimizers",
-        "//tensorflow/python/estimator:prediction_keys",
-        ":replicate_model_fn",
-    ],
-    tags = [
-        "manual",
-        "multi_gpu",
-        "notap",
-    ],
-)
-
 py_library(
     name = "rnn",
     srcs = ["python/estimator/rnn.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":extenders",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/contrib/feature_column:feature_column_py",
@@ -485,55 +243,22 @@ py_library(
     ],
 )
 
-py_test(
-    name = "rnn_test",
-    size = "medium",
-    srcs = ["python/estimator/rnn_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "noasan",  # times out
-        "notsan",
-        "optonly",  # times out http://b/79220679
-    ],
-    deps = [
-        ":head",
-        ":rnn",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/contrib/data",
-        "//tensorflow/python/estimator:numpy_io",
-        "//tensorflow/python/estimator:parsing_utils",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "early_stopping",
     srcs = ["python/estimator/early_stopping.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
     ],
 )
 
-py_test(
-    name = "early_stopping_test",
-    srcs = ["python/estimator/early_stopping_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":early_stopping",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
 py_library(
     name = "saved_model_estimator",
     srcs = ["python/estimator/saved_model_estimator.py"],
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":export",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
@@ -542,21 +267,9 @@ py_library(
     ],
 )
 
-py_test(
-    name = "saved_model_estimator_test",
-    size = "medium",
-    srcs = ["python/estimator/saved_model_estimator_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "notsan",
-    ],
-    deps = [
-        ":export",
-        ":saved_model_estimator",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator",
-        "//tensorflow/python/estimator:export_export",
-        "//tensorflow/python/estimator:export_output",
-        "//tensorflow/python/estimator:model_fn",
-    ],
+py_library(
+    name = "expect_tensorflow_estimator_installed",
+    # This is a dummy rule used as a dependency in open-source.
+    # We expect tensorflow_estimator to already be installed.
+    visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index 419609b1af..fc7d94e4fc 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,33 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Experimental utilities re:tf.estimator.*."""
+"""estimator python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,line-too-long,wildcard-import
-from tensorflow.contrib.estimator.python.estimator.baseline import *
-from tensorflow.contrib.estimator.python.estimator.boosted_trees import *
-from tensorflow.contrib.estimator.python.estimator.dnn import *
-from tensorflow.contrib.estimator.python.estimator.dnn_with_layer_annotations import *
-from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import *
-from tensorflow.contrib.estimator.python.estimator.early_stopping import *
-from tensorflow.contrib.estimator.python.estimator.export import *
-from tensorflow.contrib.estimator.python.estimator.extenders import *
-from tensorflow.contrib.estimator.python.estimator.head import *
-from tensorflow.contrib.estimator.python.estimator.hooks import *
-from tensorflow.contrib.estimator.python.estimator.linear import *
-from tensorflow.contrib.estimator.python.estimator.logit_fns import *
-from tensorflow.contrib.estimator.python.estimator.multi_head import *
-from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import *
-from tensorflow.contrib.estimator.python.estimator.rnn import *
-from tensorflow.contrib.estimator.python.estimator.saved_model_estimator import *
-from tensorflow.python.estimator.export.export import *
+# Importing from tensorflow.python.estimator
+# is unsupported and will soon break!
+
+from tensorflow_estimator.contrib import estimator
+
+# Fixes remove_undocumented not working as intended.
+#
+# Problem is that when the below import happens (for first time,
+# Python only imports things once), Python sets attribute named
+# 'python' to this package. If this first import happens
+# after the call to remove_undocumented, then the 'python'
+# attribute won't be removed.
+import tensorflow.contrib.estimator.python
+
+# Include attrs that start with single underscore.
+estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
 
+from tensorflow_estimator.contrib.estimator import *
 from tensorflow.python.util.all_util import remove_undocumented
-# pylint: enable=unused-import,line-too-long,wildcard-import
 
 _allowed_symbols = [
     'add_metrics',
diff --git a/tensorflow/contrib/estimator/python/estimator/baseline.py b/tensorflow/contrib/estimator/python/estimator/baseline.py
index beffbee730..5a684befb6 100644
--- a/tensorflow/contrib/estimator/python/estimator/baseline.py
+++ b/tensorflow/contrib/estimator/python/estimator/baseline.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,87 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Baseline estimators."""
+"""baseline python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import baseline
-
-
-class BaselineEstimator(estimator.Estimator):
-  """An estimator that can establish a simple baseline.
-
-  The estimator uses a user-specified head.
-
-  This estimator ignores feature values and will learn to predict the average
-  value of each label. E.g. for single-label classification problems, this will
-  predict the probability distribution of the classes as seen in the labels.
-  For multi-label classification problems, it will predict the ratio of examples
-  that contain each class.
-
-  Example:
-
-  ```python
-
-  # Build baseline multi-label classifier.
-  estimator = BaselineEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3))
-
-  # Input builders
-  def input_fn_train: # returns x, y (where y represents label's class index).
-    pass
-
-  def input_fn_eval: # returns x, y (where y represents label's class index).
-    pass
-
-  # Fit model.
-  estimator.train(input_fn=input_fn_train)
-
-  # Evaluates cross entropy between the test and train labels.
-  loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
-
-  # For each class, predicts the ratio of training examples that contain the
-  # class.
-  predictions = classifier.predict(new_samples)
-
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * if `weight_column` passed to the `head` constructor is not `None`, a feature
-    with `key=weight_column` whose value is a `Tensor`.
-  """
+from tensorflow_estimator.contrib.estimator.python.estimator import baseline
 
-  def __init__(self,
-               head,
-               model_dir=None,
-               optimizer='Ftrl',
-               config=None):
-    """Initializes a BaselineEstimator instance.
+# Include attrs that start with single underscore.
+baseline.__all__ = [s for s in dir(baseline) if not s.startswith('__')]
 
-    Args:
-      head: A `_Head` instance constructed with a method such as
-        `tf.contrib.estimator.multi_label_head`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: String, `tf.Optimizer` object, or callable that creates the
-        optimizer to use for training. If not specified, will use
-        `FtrlOptimizer` with a default learning rate of 0.3.
-      config: `RunConfig` object to configure the runtime settings.
-    """
-    def _model_fn(features, labels, mode, config):
-      return baseline._baseline_model_fn(  # pylint: disable=protected-access
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          optimizer=optimizer,
-          config=config)
-    super(BaselineEstimator, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.baseline import *
diff --git a/tensorflow/contrib/estimator/python/estimator/baseline_test.py b/tensorflow/contrib/estimator/python/estimator/baseline_test.py
deleted file mode 100644
index 513feb03b6..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/baseline_test.py
+++ /dev/null
@@ -1,436 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for baseline.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import baseline
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import optimizer
-from tensorflow.python.training import saver
-
-# Names of variables created by model.
-BIAS_NAME = 'baseline/bias'
-
-
-def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
-  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-    expected = ops.convert_to_tensor(expected, name='expected')
-    actual = ops.convert_to_tensor(actual, name='actual')
-    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
-    rtol = ops.convert_to_tensor(rtol, name='rtol')
-    return check_ops.assert_less(
-        rdiff,
-        rtol,
-        data=('Condition expected =~ actual did not hold element-wise:'
-              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
-              'rtol = ', rtol,),
-        name=scope)
-
-
-def save_variables_to_ckpt(model_dir):
-  init_all_op = [variables.global_variables_initializer()]
-  with tf_session.Session() as sess:
-    sess.run(init_all_op)
-    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-
-
-def _baseline_estimator_fn(
-    weight_column=None, label_dimension=1, *args, **kwargs):
-  """Returns a BaselineEstimator that uses regression_head."""
-  return baseline.BaselineEstimator(
-      head=head_lib.regression_head(
-          weight_column=weight_column, label_dimension=label_dimension,
-          # Tests in core (from which this test inherits) test the sum loss.
-          loss_reduction=losses.Reduction.SUM),
-      *args, **kwargs)
-
-
-class BaselineEstimatorEvaluationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_evaluation_batch(self):
-    """Tests evaluation for batch_size==2."""
-    with ops.Graph().as_default():
-      variables.Variable([13.0], name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir)
-    eval_metrics = baseline_estimator.evaluate(
-        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
-
-    # Logit is bias = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the sum over batch = 9 + 9 = 18
-    # Average loss is the average over batch = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 18.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_weights(self):
-    """Tests evaluation with weights."""
-    with ops.Graph().as_default():
-      variables.Variable([13.0], name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    def _input_fn():
-      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
-      labels = ((10.,), (10.,))
-      return features, labels
-
-    baseline_estimator = _baseline_estimator_fn(
-        weight_column='weights',
-        model_dir=self._model_dir)
-    eval_metrics = baseline_estimator.evaluate(input_fn=_input_fn, steps=1)
-
-    # Logit is bias = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
-    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 27.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_for_multi_dimensions(self):
-    label_dim = 2
-    with ops.Graph().as_default():
-      variables.Variable([46.0, 58.0], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_estimator = _baseline_estimator_fn(
-        label_dimension=label_dim,
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'age': np.array([[2., 4., 5.]]),
-        },
-        y=np.array([[46., 58.]]),
-        batch_size=1,
-        num_epochs=None,
-        shuffle=False)
-    eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1)
-
-    self.assertItemsEqual(
-        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
-         metric_keys.MetricKeys.PREDICTION_MEAN,
-         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
-        eval_metrics.keys())
-
-    # Logit is bias which is [46, 58]
-    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
-
-
-class BaselineEstimatorPredictTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_1d(self):
-    """Tests predict when all variables are one-dimensional."""
-    with ops.Graph().as_default():
-      variables.Variable([.2], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[2.]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = baseline_estimator.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # x * weight + bias = 2. * 10. + .2 = 20.2
-    self.assertAllClose([[.2]], predicted_scores)
-
-  def testMultiDim(self):
-    """Tests predict when all variables are multi-dimenstional."""
-    batch_size = 2
-    label_dimension = 3
-    with ops.Graph().as_default():
-      variables.Variable(  # shape=[label_dimension]
-          [.2, .4, .6], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_estimator = _baseline_estimator_fn(
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        # x shape=[batch_size, x_dim]
-        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predictions = baseline_estimator.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # score = bias, shape=[batch_size, label_dimension]
-    self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
-                        predicted_scores)
-
-
-class BaselineEstimatorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, prediction_length):
-    feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = _baseline_estimator_fn(
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    # learn y = x
-    est.train(train_input_fn, steps=200)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array(
-        [x['predictions'] for x in est.predict(predict_input_fn)])
-    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-
-class BaselineEstimatorTrainingTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s:0' % BIAS_NAME
-    ]
-
-    def _minimize(loss, global_step=None, var_list=None):
-      trainable_vars = var_list or ops.get_collection(
-          ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(expected_var_names,
-                            [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-      assert_loss = assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def _assert_checkpoint(self,
-                         label_dimension,
-                         expected_global_step,
-                         expected_bias=None):
-    shapes = {
-        name: shape
-        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(expected_global_step,
-                     checkpoint_utils.load_variable(self._model_dir,
-                                                    ops.GraphKeys.GLOBAL_STEP))
-
-    self.assertEqual([label_dimension], shapes[BIAS_NAME])
-    if expected_bias is not None:
-      self.assertEqual(expected_bias,
-                       checkpoint_utils.load_variable(self._model_dir,
-                                                      BIAS_NAME))
-
-  def testFromScratch(self):
-    # Create BaselineRegressor.
-    label = 5.
-    age = 17
-    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
-    mock_optimizer = self._mock_optimizer(expected_loss=25.)
-    baseline_estimator = _baseline_estimator_fn(
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    baseline_estimator.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        label_dimension=1,
-        expected_global_step=num_steps,
-        expected_bias=[0.])
-
-  def testFromCheckpoint(self):
-    # Create initial checkpoint.
-    bias = 7.0
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable([bias], name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = bias = 6.
-    # loss = (logits - label)^2 = (7 - 5)^2 = 4
-    mock_optimizer = self._mock_optimizer(expected_loss=4.)
-    baseline_estimator = _baseline_estimator_fn(
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    baseline_estimator.train(
-        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        label_dimension=1,
-        expected_global_step=initial_global_step + num_steps,
-        expected_bias=[bias])
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index b131ed4f12..e2a7d01530 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -12,414 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Boosted Trees estimators."""
+"""boosted_trees python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees
-from tensorflow.python.estimator.canned import head as head_lib
-
-
-def _validate_input_fn_and_repeat_dataset(train_input_fn):
-  """Validates whether the input_fn is valid, and repeat() if tf.Dataset."""
-  def _input_fn():
-    result_input_fn = train_input_fn()
-    if isinstance(result_input_fn, dataset_ops.Dataset):
-      return result_input_fn.repeat()
-    return result_input_fn
-
-  return _input_fn
-
-
-def _is_classification_head(head):
-  """Infers if the head is a classification head."""
-  # Check using all classification heads defined in canned/head.py. However, it
-  # is not a complete list - it does not check for other classification heads
-  # not defined in the head library.
-  # pylint: disable=protected-access
-  return isinstance(head,
-                    (head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss,
-                     head_lib._MultiClassHeadWithSoftmaxCrossEntropyLoss))
-  # pylint: enable=protected-access
-
-
-class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase):  # pylint: disable=protected-access
-  """An Estimator for Tensorflow Boosted Trees models."""
-
-  def __init__(self,
-               feature_columns,
-               n_batches_per_layer,
-               head,
-               model_dir=None,
-               weight_column=None,
-               n_trees=100,
-               max_depth=6,
-               learning_rate=0.1,
-               l1_regularization=0.,
-               l2_regularization=0.,
-               tree_complexity=0.,
-               min_node_weight=0.,
-               config=None,
-               center_bias=False,
-               pruning_mode='none'):
-    """Initializes a `BoostedTreesEstimator` instance.
-
-    Args:
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_batches_per_layer: the number of batches to collect statistics per
-        layer.
-      head: the `Head` instance defined for Estimator.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into an estimator
-        to continue training a previously saved model.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to downweight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      n_trees: number trees to be created.
-      max_depth: maximum depth of the tree to grow.
-      learning_rate: shrinkage parameter to be used when a tree added to the
-        model.
-      l1_regularization: regularization multiplier applied to the absolute
-        weights of the tree leafs.
-      l2_regularization: regularization multiplier applied to the square weights
-        of the tree leafs.
-      tree_complexity: regularization factor to penalize trees with more leaves.
-      min_node_weight: minimum hessian a node must have for a split to be
-        considered. The value will be compared with sum(leaf_hessian)/
-        (batch_size * n_batches_per_layer).
-      config: `RunConfig` object to configure the runtime settings.
-      center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
-        pruning (do not split a node if not enough gain is observed) and post
-        pruning (build the tree up to a max depth and then prune branches with
-        negative gain). For pre and post pruning, you MUST provide
-        tree_complexity >0.
-
-    Raises:
-      ValueError: when wrong arguments are given or unsupported functionalities
-         are requested.
-    """
-    # HParams for the model.
-    # pylint: disable=protected-access
-    tree_hparams = canned_boosted_trees._TreeHParams(
-        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
-        tree_complexity, min_node_weight, center_bias, pruning_mode)
-
-    def _model_fn(features, labels, mode, config):
-      return canned_boosted_trees._bt_model_fn(
-          features,
-          labels,
-          mode,
-          head,
-          feature_columns,
-          tree_hparams,
-          n_batches_per_layer,
-          config=config)
-
-    super(_BoostedTreesEstimator, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config,
-        feature_columns=feature_columns,
-        head=head,
-        center_bias=center_bias,
-        is_classification=_is_classification_head(head))
-    # pylint: enable=protected-access
-
-
-def boosted_trees_classifier_train_in_memory(
-    train_input_fn,
-    feature_columns,
-    model_dir=None,
-    n_classes=canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT,
-    weight_column=None,
-    label_vocabulary=None,
-    n_trees=100,
-    max_depth=6,
-    learning_rate=0.1,
-    l1_regularization=0.,
-    l2_regularization=0.,
-    tree_complexity=0.,
-    min_node_weight=0.,
-    config=None,
-    train_hooks=None,
-    center_bias=False,
-    pruning_mode='none'):
-  """Trains a boosted tree classifier with in memory dataset.
-
-  Example:
-
-  ```python
-  bucketized_feature_1 = bucketized_column(
-    numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
-  bucketized_feature_2 = bucketized_column(
-    numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
-
-  def train_input_fn():
-    dataset = create-dataset-from-training-data
-    # This is tf.data.Dataset of a tuple of feature dict and label.
-    #   e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}),
-    #                     Dataset.from_tensors(label_array)))
-    # The returned Dataset shouldn't be batched.
-    # If Dataset repeats, only the first repetition would be used for training.
-    return dataset
-
-  classifier = boosted_trees_classifier_train_in_memory(
-      train_input_fn,
-      feature_columns=[bucketized_feature_1, bucketized_feature_2],
-      n_trees=100,
-      ... <some other params>
-  )
-
-  def input_fn_eval():
-    ...
-    return dataset
-
-  metrics = classifier.evaluate(input_fn=input_fn_eval, steps=10)
-  ```
-
-  Args:
-    train_input_fn: the input function returns a dataset containing a single
-      epoch of *unbatched* features and labels.
-    feature_columns: An iterable containing all the feature columns used by
-      the model. All items in the set should be instances of classes derived
-      from `FeatureColumn`.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into an estimator
-      to continue training a previously saved model.
-    n_classes: number of label classes. Default is binary classification.
-      Multiclass support is not yet implemented.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to downweight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-      then weight_column.normalizer_fn is applied on it to get weight tensor.
-    label_vocabulary: A list of strings represents possible label values. If
-      given, labels must be string type and have any value in
-      `label_vocabulary`. If it is not given, that means labels are
-      already encoded as integer or float within [0, 1] for `n_classes=2` and
-      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-      Also there will be errors if vocabulary is not provided and labels are
-      string.
-    n_trees: number trees to be created.
-    max_depth: maximum depth of the tree to grow.
-    learning_rate: shrinkage parameter to be used when a tree added to the
-      model.
-    l1_regularization: regularization multiplier applied to the absolute
-      weights of the tree leafs.
-    l2_regularization: regularization multiplier applied to the square weights
-      of the tree leafs.
-    tree_complexity: regularization factor to penalize trees with more leaves.
-    min_node_weight: minimum hessian a node must have for a split to be
-        considered. The value will be compared with sum(leaf_hessian)/
-        (batch_size * n_batches_per_layer).
-    config: `RunConfig` object to configure the runtime settings.
-    train_hooks: a list of Hook instances to be passed to estimator.train()
-    center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-    pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
-        pruning (do not split a node if not enough gain is observed) and post
-        pruning (build the tree up to a max depth and then prune branches with
-        negative gain). For pre and post pruning, you MUST provide
-        tree_complexity >0.
-
-  Returns:
-    a `BoostedTreesClassifier` instance created with the given arguments and
-      trained with the data loaded up on memory from the input_fn.
-
-  Raises:
-    ValueError: when wrong arguments are given or unsupported functionalities
-       are requested.
-  """
-  # pylint: disable=protected-access
-  # TODO(nponomareva): Support multi-class cases.
-  if n_classes == canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT:
-    n_classes = 2
-  head, closed_form = (
-      canned_boosted_trees._create_classification_head_and_closed_form(
-          n_classes, weight_column, label_vocabulary=label_vocabulary))
-
-  # HParams for the model.
-  tree_hparams = canned_boosted_trees._TreeHParams(
-      n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
-      tree_complexity, min_node_weight, center_bias, pruning_mode)
-
-  def _model_fn(features, labels, mode, config):
-    return canned_boosted_trees._bt_model_fn(
-        features,
-        labels,
-        mode,
-        head,
-        feature_columns,
-        tree_hparams,
-        n_batches_per_layer=1,
-        config=config,
-        closed_form_grad_and_hess_fn=closed_form,
-        train_in_memory=True)
-
-  in_memory_classifier = estimator.Estimator(
-      model_fn=_model_fn, model_dir=model_dir, config=config)
-
-  in_memory_classifier.train(
-      input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn),
-      hooks=train_hooks)
-
-  return in_memory_classifier
-  # pylint: enable=protected-access
-
-
-def boosted_trees_regressor_train_in_memory(
-    train_input_fn,
-    feature_columns,
-    model_dir=None,
-    label_dimension=canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT,
-    weight_column=None,
-    n_trees=100,
-    max_depth=6,
-    learning_rate=0.1,
-    l1_regularization=0.,
-    l2_regularization=0.,
-    tree_complexity=0.,
-    min_node_weight=0.,
-    config=None,
-    train_hooks=None,
-    center_bias=False,
-    pruning_mode='none'):
-  """Trains a boosted tree regressor with in memory dataset.
-
-  Example:
-
-  ```python
-  bucketized_feature_1 = bucketized_column(
-    numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
-  bucketized_feature_2 = bucketized_column(
-    numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
-
-  def train_input_fn():
-    dataset = create-dataset-from-training-data
-    # This is tf.data.Dataset of a tuple of feature dict and label.
-    #   e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}),
-    #                     Dataset.from_tensors(label_array)))
-    # The returned Dataset shouldn't be batched.
-    # If Dataset repeats, only the first repetition would be used for training.
-    return dataset
-
-  regressor = boosted_trees_regressor_train_in_memory(
-      train_input_fn,
-      feature_columns=[bucketized_feature_1, bucketized_feature_2],
-      n_trees=100,
-      ... <some other params>
-  )
-
-  def input_fn_eval():
-    ...
-    return dataset
-
-  metrics = regressor.evaluate(input_fn=input_fn_eval, steps=10)
-  ```
-
-  Args:
-    train_input_fn: the input function returns a dataset containing a single
-      epoch of *unbatched* features and labels.
-    feature_columns: An iterable containing all the feature columns used by
-      the model. All items in the set should be instances of classes derived
-      from `FeatureColumn`.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into an estimator
-      to continue training a previously saved model.
-    label_dimension: Number of regression targets per example.
-      Multi-dimensional support is not yet implemented.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to downweight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-      then weight_column.normalizer_fn is applied on it to get weight tensor.
-    n_trees: number trees to be created.
-    max_depth: maximum depth of the tree to grow.
-    learning_rate: shrinkage parameter to be used when a tree added to the
-      model.
-    l1_regularization: regularization multiplier applied to the absolute
-      weights of the tree leafs.
-    l2_regularization: regularization multiplier applied to the square weights
-      of the tree leafs.
-    tree_complexity: regularization factor to penalize trees with more leaves.
-    min_node_weight: minimum hessian a node must have for a split to be
-        considered. The value will be compared with sum(leaf_hessian)/
-        (batch_size * n_batches_per_layer).
-    config: `RunConfig` object to configure the runtime settings.
-    train_hooks: a list of Hook instances to be passed to estimator.train().
-    center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-    pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
-        pruning (do not split a node if not enough gain is observed) and post
-        pruning (build the tree up to a max depth and then prune branches with
-        negative gain). For pre and post pruning, you MUST provide
-        tree_complexity >0.
-
-  Returns:
-    a `BoostedTreesClassifier` instance created with the given arguments and
-      trained with the data loaded up on memory from the input_fn.
-
-  Raises:
-    ValueError: when wrong arguments are given or unsupported functionalities
-       are requested.
-  """
-  # pylint: disable=protected-access
-  # TODO(nponomareva): Extend it to multi-dimension cases.
-  if label_dimension == canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT:
-    label_dimension = 1
-  head = canned_boosted_trees._create_regression_head(label_dimension,
-                                                      weight_column)
-
-  # HParams for the model.
-  tree_hparams = canned_boosted_trees._TreeHParams(
-      n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
-      tree_complexity, min_node_weight, center_bias, pruning_mode)
-
-  def _model_fn(features, labels, mode, config):
-    return canned_boosted_trees._bt_model_fn(
-        features,
-        labels,
-        mode,
-        head,
-        feature_columns,
-        tree_hparams,
-        n_batches_per_layer=1,
-        config=config,
-        train_in_memory=True)
-
-  in_memory_regressor = estimator.Estimator(
-      model_fn=_model_fn, model_dir=model_dir, config=config)
+from tensorflow_estimator.contrib.estimator.python.estimator import boosted_trees
 
-  in_memory_regressor.train(
-      input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn),
-      hooks=train_hooks)
+# Include attrs that start with single underscore.
+boosted_trees.__all__ = [
+    s for s in dir(boosted_trees) if not s.startswith('__')
+]
 
-  return in_memory_regressor
-  # pylint: enable=protected-access
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.boosted_trees import *
diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
deleted file mode 100644
index e23d9c0fc4..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests boosted_trees estimators."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.estimator.python.estimator import boosted_trees
-from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import googletest
-from tensorflow.python.training import checkpoint_utils
-
-NUM_FEATURES = 3
-
-BUCKET_BOUNDARIES = [-2., .5, 12.]  # Boundaries for all the features.
-INPUT_FEATURES = np.array(
-    [
-        [12.5, 1.0, -2.001, -2.0001, -1.999],  # feature_0 quantized:[3,2,0,0,1]
-        [2.0, -3.0, 0.5, 0.0, 0.4995],         # feature_1 quantized:[2,0,2,1,1]
-        [3.0, 20.0, 50.0, -100.0, 102.75],     # feature_2 quantized:[2,3,3,0,3]
-    ],
-    dtype=np.float32)
-CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]]
-REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]]
-FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)}
-
-
-def _make_train_input_fn(is_classification):
-  """Makes train input_fn for classification/regression."""
-
-  def _input_fn():
-    features_dict = dict(FEATURES_DICT)
-    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
-    return features_dict, labels
-
-  return _input_fn
-
-
-def _make_train_input_fn_dataset(is_classification):
-  """Makes input_fn using Dataset."""
-
-  def _input_fn():
-    features_dict = dict(FEATURES_DICT)
-    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
-    ds = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.from_tensors(features_dict),
-         dataset_ops.Dataset.from_tensors(labels)
-        ))
-    return ds
-
-  return _input_fn
-
-
-class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
-    self._feature_columns = {
-        feature_column.bucketized_column(
-            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
-            BUCKET_BOUNDARIES)
-        for i in range(NUM_FEATURES)
-    }
-
-  def _assert_checkpoint(self, model_dir, global_step, finalized_trees,
-                         attempted_layers):
-    reader = checkpoint_utils.load_checkpoint(model_dir)
-    self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP))
-    serialized = reader.get_tensor('boosted_trees:0_serialized')
-    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-    ensemble_proto.ParseFromString(serialized)
-    self.assertEqual(
-        finalized_trees,
-        sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized]))
-    self.assertEqual(attempted_layers,
-                     ensemble_proto.growing_metadata.num_layers_attempted)
-
-  def testTrainAndEvaluateEstimator(self):
-    input_fn = _make_train_input_fn(is_classification=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        head=self._head,
-        max_depth=5)
-
-    # It will stop after 10 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 1.008551)
-
-  def testTrainAndEvaluateEstimatorWithCenterBias(self):
-    input_fn = _make_train_input_fn(is_classification=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        head=self._head,
-        max_depth=5,
-        center_bias=True)
-
-    # It will stop after 11 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    # 10 steps for training and 2 step for bias centering.
-    self._assert_checkpoint(
-        est.model_dir, global_step=12, finalized_trees=2, attempted_layers=10)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 0.614642)
-
-  def testTrainAndEvaluateEstimatorWithPrePruning(self):
-    input_fn = _make_train_input_fn(is_classification=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        head=self._head,
-        max_depth=5,
-        tree_complexity=0.001,
-        pruning_mode='pre')
-
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    # We stop actually after 2*depth*n_trees steps (via a hook) because we still
-    # could not grow 2 trees of depth 5 (due to pre-pruning).
-    self._assert_checkpoint(
-        est.model_dir, global_step=21, finalized_trees=0, attempted_layers=21)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 3.83943)
-
-  def testTrainAndEvaluateEstimatorWithPostPruning(self):
-    input_fn = _make_train_input_fn(is_classification=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        head=self._head,
-        max_depth=5,
-        tree_complexity=0.001,
-        pruning_mode='post')
-
-    # It will stop after 10 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.37652)
-
-  def testInferEstimator(self):
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        head=self._head)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(train_input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    # Validate predictions.
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testInferEstimatorWithCenterBias(self):
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        center_bias=True,
-        head=self._head)
-
-    # It will stop after 6 steps because of the max depth and num trees (5 for
-    # training and 2 for bias centering).
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(train_input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=7, finalized_trees=1, attempted_layers=5)
-    # Validate predictions.
-    predictions = list(est.predict(input_fn=predict_input_fn))
-
-    self.assertAllClose(
-        [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]],
-        [pred['predictions'] for pred in predictions])
-
-  def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self):
-    train_input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_classifier_train_in_memory(
-        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
-        n_trees=1, max_depth=5)
-    # It will stop after 5 steps because of the max depth and num trees.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    # Validate predictions.
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testBinaryClassifierTrainInMemoryAndEvalAndInferWithCenterBias(self):
-    train_input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_classifier_train_in_memory(
-        train_input_fn=train_input_fn,
-        feature_columns=self._feature_columns,
-        n_trees=1,
-        max_depth=5,
-        center_bias=True)
-    # It will stop after 5 steps + 3 for bias, because of the max depth and num
-    # trees.
-    self._assert_checkpoint(
-        est.model_dir, global_step=8, finalized_trees=1, attempted_layers=5)
-
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    # Validate predictions.
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testBinaryClassifierTrainInMemoryAndEvalAndInferWithPrePruning(self):
-    train_input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_classifier_train_in_memory(
-        train_input_fn=train_input_fn,
-        feature_columns=self._feature_columns,
-        n_trees=1,
-        max_depth=5,
-        pruning_mode='pre',
-        tree_complexity=0.01)
-    # We stop actually after 2*depth*n_trees steps (via a hook) because we still
-    # could not grow 1 trees of depth 5 (due to pre-pruning).
-    self._assert_checkpoint(
-        est.model_dir, global_step=11, finalized_trees=0, attempted_layers=11)
-
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    # Validate predictions.
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testBinaryClassifierTrainInMemoryWithDataset(self):
-    train_input_fn = _make_train_input_fn_dataset(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_classifier_train_in_memory(
-        train_input_fn=train_input_fn,
-        feature_columns=self._feature_columns,
-        n_trees=1,
-        max_depth=5)
-    # It will stop after 5 steps because of the max depth and num trees.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testRegressorTrainInMemoryAndEvalAndInfer(self):
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_regressor_train_in_memory(
-        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
-        n_trees=1, max_depth=5)
-    # It will stop after 5 steps because of the max depth and num trees.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testRegressorTrainInMemoryWithDataset(self):
-    train_input_fn = _make_train_input_fn_dataset(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.boosted_trees_regressor_train_in_memory(
-        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
-        n_trees=1, max_depth=5)
-    # It will stop after 5 steps because of the max depth and num trees.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    # Check evaluate and predict.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-
-class BoostedTreesDebugOutputTest(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
-    self._feature_columns = {
-        feature_column.bucketized_column(
-            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
-            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
-    }
-
-  def testContribEstimatorThatDFCIsInPredictions(self):
-    # pylint:disable=protected-access
-    head = canned_boosted_trees._create_regression_head(label_dimension=1)
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees._BoostedTreesEstimator(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        head=head,
-        n_trees=1,
-        max_depth=5,
-        center_bias=True)
-    # pylint:enable=protected-access
-
-    num_steps = 100
-    # Train for a few steps. Validate debug outputs in prediction dicts.
-    est.train(train_input_fn, steps=num_steps)
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn)
-    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
-                         for pred in debug_predictions])
-    self.assertAllClose([1.8] * 5, biases)
-    self.assertAllClose(({
-        0: -0.070499420166015625,
-        1: -0.095000028610229492,
-        2: 0.0
-    }, {
-        0: -0.53763031959533691,
-        1: 0.063333392143249512,
-        2: 0.0
-    }, {
-        0: -0.51756942272186279,
-        1: -0.095000028610229492,
-        2: 0.0
-    }, {
-        0: 0.1563495397567749,
-        1: 0.063333392143249512,
-        2: 0.0
-    }, {
-        0: 0.96934974193572998,
-        1: 0.063333392143249512,
-        2: 0.0
-    }), dfcs)
-
-    # Assert sum(dfcs) + bias == predictions.
-    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
-                            [2.01968288], [2.83268309]]
-    predictions = [
-        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
-    ]
-    self.assertAllClose(expected_predictions, predictions)
-
-    # Test when user doesn't include bias or dfc in predict_keys.
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn, predict_keys=['predictions'])
-    for prediction_dict in debug_predictions:
-      self.assertTrue('bias' in prediction_dict)
-      self.assertTrue('dfc' in prediction_dict)
-      self.assertTrue('predictions' in prediction_dict)
-      self.assertEqual(len(prediction_dict), 3)
-
-
-if __name__ == '__main__':
-  googletest.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn.py b/tensorflow/contrib/estimator/python/estimator/dnn.py
index 9efa8f474d..6b260de9e3 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,153 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Deep Neural Network estimators."""
+"""dnn python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import dnn as dnn_lib
-from tensorflow.python.ops import nn
-
-
-class DNNEstimator(estimator.Estimator):
-  """An estimator for TensorFlow DNN models with user-specified head.
-
-  Example:
-
-  ```python
-  sparse_feature_a = sparse_column_with_hash_bucket(...)
-  sparse_feature_b = sparse_column_with_hash_bucket(...)
-
-  sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a,
-                                          ...)
-  sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b,
-                                          ...)
-
-  estimator = DNNEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
-      hidden_units=[1024, 512, 256])
-
-  # Or estimator using the ProximalAdagradOptimizer optimizer with
-  # regularization.
-  estimator = DNNEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001
-      ))
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = DNNEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=lambda: tf.AdamOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator with warm-starting from a previous checkpoint.
-  estimator = DNNEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      warm_start_from="/path/to/checkpoint/dir")
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss and predicted output are determined by the specified head.
-  """
+from tensorflow_estimator.contrib.estimator.python.estimator import dnn
 
-  def __init__(self,
-               head,
-               hidden_units,
-               feature_columns,
-               model_dir=None,
-               optimizer='Adagrad',
-               activation_fn=nn.relu,
-               dropout=None,
-               input_layer_partitioner=None,
-               config=None,
-               warm_start_from=None,
-               batch_norm=False):
-    """Initializes a `DNNEstimator` instance.
+# Include attrs that start with single underscore.
+dnn.__all__ = [s for s in dir(dnn) if not s.startswith('__')]
 
-    Args:
-      head: A `_Head` instance constructed with a method such as
-        `tf.contrib.estimator.multi_label_head`.
-      hidden_units: Iterable of number hidden units per layer. All layers are
-        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
-        second one has 32.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `_FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to Adagrad optimizer.
-      activation_fn: Activation function applied to each layer. If `None`, will
-        use `tf.nn.relu`.
-      dropout: When not `None`, the probability we will drop out a given
-        coordinate.
-      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
-        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: `RunConfig` object to configure the runtime settings.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights are warm-started, and it is assumed that vocabularies and Tensor
-        names are unchanged.
-      batch_norm: Whether to use batch normalization after each hidden layer.
-    """
-    def _model_fn(features, labels, mode, config):
-      return dnn_lib._dnn_model_fn(  # pylint: disable=protected-access
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          hidden_units=hidden_units,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          activation_fn=activation_fn,
-          dropout=dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          batch_norm=batch_norm)
-    super(DNNEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.dnn import *
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
index 4e7965ef26..24655c9964 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,171 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TensorFlow estimator for Linear and DNN joined training models."""
+"""dnn_linear_combined python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import dnn_linear_combined as dnn_linear_combined_lib
-from tensorflow.python.ops import nn
-
-
-class DNNLinearCombinedEstimator(estimator.Estimator):
-  """An estimator for TensorFlow Linear and DNN joined models with custom head.
-
-  Note: This estimator is also known as wide-n-deep.
-
-  Example:
-
-  ```python
-  numeric_feature = numeric_column(...)
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-  categorical_feature_a_emb = embedding_column(
-      categorical_column=categorical_feature_a, ...)
-  categorical_feature_b_emb = embedding_column(
-      categorical_column=categorical_feature_b, ...)
-
-  estimator = DNNLinearCombinedEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      # wide settings
-      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
-      linear_optimizer=tf.train.FtrlOptimizer(...),
-      # deep settings
-      dnn_feature_columns=[
-          categorical_feature_a_emb, categorical_feature_b_emb,
-          numeric_feature],
-      dnn_hidden_units=[1000, 500, 100],
-      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
-
-  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
-  tf.train.ProximalAdagradOptimizer(
-      learning_rate=0.1,
-      l1_regularization_strength=0.001,
-      l2_regularization_strength=0.001)
-  # To apply learning rate decay, you can set dnn_optimizer to a callable:
-  lambda: tf.AdamOptimizer(
-      learning_rate=tf.exponential_decay(
-          learning_rate=0.1,
-          global_step=tf.get_global_step(),
-          decay_steps=10000,
-          decay_rate=0.96)
-  # It is the same for linear_optimizer.
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using mean squared error.
-
-  @compatibility(eager)
-  Estimators are not compatible with eager execution.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               head,
-               model_dir=None,
-               linear_feature_columns=None,
-               linear_optimizer='Ftrl',
-               dnn_feature_columns=None,
-               dnn_optimizer='Adagrad',
-               dnn_hidden_units=None,
-               dnn_activation_fn=nn.relu,
-               dnn_dropout=None,
-               input_layer_partitioner=None,
-               config=None,
-               linear_sparse_combiner='sum'):
-    """Initializes a DNNLinearCombinedEstimator instance.
-
-    Args:
-      head: A `_Head` instance constructed with a method such as
-        `tf.contrib.estimator.multi_label_head`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into an estimator
-        to continue training a previously saved model.
-      linear_feature_columns: An iterable containing all the feature columns
-        used by linear part of the model. All items in the set must be
-        instances of classes derived from `FeatureColumn`.
-      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the linear part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
-        optimizer.
-      dnn_feature_columns: An iterable containing all the feature columns used
-        by deep part of the model. All items in the set must be instances of
-        classes derived from `FeatureColumn`.
-      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the deep part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
-        optimizer.
-      dnn_hidden_units: List of hidden units per layer. All layers are fully
-        connected.
-      dnn_activation_fn: Activation function applied to each layer. If None,
-        will use `tf.nn.relu`.
-      dnn_dropout: When not None, the probability we will drop out
-        a given coordinate.
-      input_layer_partitioner: Partitioner for input layer. Defaults to
-        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: RunConfig object to configure the runtime settings.
-      linear_sparse_combiner: A string specifying how to reduce the linear model
-        if a categorical column is multivalent.  One of "mean", "sqrtn", and
-        "sum" -- these are effectively different ways to do example-level
-        normalization, which can be useful for bag-of-words features.  For more
-        details, see `tf.feature_column.linear_model`.
-
-    Raises:
-      ValueError: If both linear_feature_columns and dnn_features_columns are
-        empty at the same time.
-    """
-    linear_feature_columns = linear_feature_columns or []
-    dnn_feature_columns = dnn_feature_columns or []
-    self._feature_columns = (
-        list(linear_feature_columns) + list(dnn_feature_columns))
-    if not self._feature_columns:
-      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
-                       'must be defined.')
+from tensorflow_estimator.contrib.estimator.python.estimator import dnn_linear_combined
 
-    def _model_fn(features, labels, mode, config):
-      return dnn_linear_combined_lib._dnn_linear_combined_model_fn(  # pylint: disable=protected-access
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          linear_feature_columns=linear_feature_columns,
-          linear_optimizer=linear_optimizer,
-          dnn_feature_columns=dnn_feature_columns,
-          dnn_optimizer=dnn_optimizer,
-          dnn_hidden_units=dnn_hidden_units,
-          dnn_activation_fn=dnn_activation_fn,
-          dnn_dropout=dnn_dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          linear_sparse_combiner=linear_sparse_combiner)
+# Include attrs that start with single underscore.
+dnn_linear_combined.__all__ = [
+    s for s in dir(dnn_linear_combined) if not s.startswith('__')
+]
 
-    super(DNNLinearCombinedEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.dnn_linear_combined import *
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
deleted file mode 100644
index 51b9ce7005..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dnn_linear_combined.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import dnn_linear_combined
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import linear_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-
-
-def _dnn_only_estimator_fn(
-    hidden_units,
-    feature_columns,
-    model_dir=None,
-    label_dimension=1,
-    weight_column=None,
-    optimizer='Adagrad',
-    activation_fn=nn.relu,
-    dropout=None,
-    input_layer_partitioner=None,
-    config=None):
-  return dnn_linear_combined.DNNLinearCombinedEstimator(
-      head=head_lib.regression_head(
-          weight_column=weight_column, label_dimension=label_dimension,
-          # Tests in core (from which this test inherits) test the sum loss.
-          loss_reduction=losses.Reduction.SUM),
-      model_dir=model_dir,
-      dnn_feature_columns=feature_columns,
-      dnn_optimizer=optimizer,
-      dnn_hidden_units=hidden_units,
-      dnn_activation_fn=activation_fn,
-      dnn_dropout=dropout,
-      input_layer_partitioner=input_layer_partitioner,
-      config=config)
-
-
-class DNNOnlyEstimatorEvaluateTest(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_only_estimator_fn)
-
-
-class DNNOnlyEstimatorPredictTest(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_only_estimator_fn)
-
-
-class DNNOnlyEstimatorTrainTest(
-    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_only_estimator_fn)
-
-
-def _linear_only_estimator_fn(
-    feature_columns,
-    model_dir=None,
-    label_dimension=1,
-    weight_column=None,
-    optimizer='Ftrl',
-    config=None,
-    partitioner=None,
-    sparse_combiner='sum'):
-  return dnn_linear_combined.DNNLinearCombinedEstimator(
-      head=head_lib.regression_head(
-          weight_column=weight_column, label_dimension=label_dimension,
-          # Tests in core (from which this test inherits) test the sum loss.
-          loss_reduction=losses.Reduction.SUM),
-      model_dir=model_dir,
-      linear_feature_columns=feature_columns,
-      linear_optimizer=optimizer,
-      input_layer_partitioner=partitioner,
-      config=config,
-      linear_sparse_combiner=sparse_combiner)
-
-
-class LinearOnlyEstimatorEvaluateTest(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_only_estimator_fn)
-
-
-class LinearOnlyEstimatorPredictTest(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_only_estimator_fn)
-
-
-class LinearOnlyEstimatorTrainTest(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_only_estimator_fn)
-
-
-class DNNLinearCombinedEstimatorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
-    linear_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
-    dnn_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    est = dnn_linear_combined.DNNLinearCombinedEstimator(
-        head=head_lib.regression_head(label_dimension=label_dimension),
-        linear_feature_columns=linear_feature_columns,
-        dnn_feature_columns=dnn_feature_columns,
-        dnn_hidden_units=(2, 2),
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_test.py
deleted file mode 100644
index 050b0428bf..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/dnn_test.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dnn.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import dnn
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-
-
-def _dnn_estimator_fn(weight_column=None, label_dimension=1, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
-  """Returns a DNNEstimator that uses regression_head."""
-  return dnn.DNNEstimator(
-      head=head_lib.regression_head(
-          weight_column=weight_column, label_dimension=label_dimension,
-          # Tests in core (from which this test inherits) test the sum loss.
-          loss_reduction=losses.Reduction.SUM),
-      *args, **kwargs)
-
-
-def _dnn_estimator_classifier_fn(n_classes=3, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
-  """Returns a DNNEstimator that uses multi_class_head."""
-  return dnn.DNNEstimator(head=head_lib.multi_class_head(n_classes=n_classes),
-                          *args, **kwargs)
-
-
-class DNNEstimatorEvaluateTest(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_estimator_fn)
-
-
-class DNNEstimatorPredictTest(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_estimator_fn)
-
-
-class DNNEstimatorTrainTest(
-    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_estimator_fn)
-
-
-class DNNEstimatorWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
-                                   test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
-        self, _dnn_estimator_classifier_fn, _dnn_estimator_fn)
-
-
-class DNNEstimatorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
-    est = dnn.DNNEstimator(
-        head=head_lib.regression_head(label_dimension=label_dimension),
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 40a91175b7..29cbdeeb76 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,425 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Deep Neural Network estimators with layer annotations."""
+"""dnn_with_layer_annotations python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import contextlib
-import pickle
-
-from google.protobuf.any_pb2 import Any
-
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.saved_model import utils as saved_model_utils
-
-
-class LayerAnnotationsCollectionNames(object):
-  """Names for the collections containing the annotations."""
-
-  UNPROCESSED_FEATURES = 'layer_annotations/unprocessed_features'
-  PROCESSED_FEATURES = 'layer_annotatons/processed_features'
-  FEATURE_COLUMNS = 'layer_annotations/feature_columns'
-
-  @classmethod
-  def keys(cls, collection_name):
-    return '%s/keys' % collection_name
-
-  @classmethod
-  def values(cls, collection_name):
-    return '%s/values' % collection_name
-
-
-def serialize_feature_column(feature_column):
-  if isinstance(feature_column, feature_column_lib._EmbeddingColumn):  # pylint: disable=protected-access
-    # We can't pickle nested functions, and we don't need the value of
-    # layer_creator in most cases anyway, so just discard its value.
-    args = feature_column._asdict()
-    args['layer_creator'] = None
-    temp = type(feature_column)(**args)
-    return pickle.dumps(temp)
-  return pickle.dumps(feature_column)
-
-
-def _to_any_wrapped_tensor_info(tensor):
-  """Converts a `Tensor` to a `TensorInfo` wrapped in a proto `Any`."""
-  any_buf = Any()
-  tensor_info = saved_model_utils.build_tensor_info(tensor)
-  any_buf.Pack(tensor_info)
-  return any_buf
-
-
-def make_input_layer_with_layer_annotations(original_input_layer):
-  """Make an input_layer replacement function that adds layer annotations."""
-
-  def input_layer_with_layer_annotations(features,
-                                         feature_columns,
-                                         weight_collections=None,
-                                         trainable=True,
-                                         cols_to_vars=None,
-                                         scope=None,
-                                         cols_to_output_tensors=None,
-                                         from_template=False):
-    """Returns a dense `Tensor` as input layer based on given `feature_columns`.
-
-    Generally a single example in training data is described with
-    FeatureColumns.
-    At the first layer of the model, this column oriented data should be
-    converted
-    to a single `Tensor`.
-
-    This is like tf.feature_column.input_layer, except with added
-    Integrated-Gradient annotations.
-
-    Args:
-      features: A mapping from key to tensors. `_FeatureColumn`s look up via
-        these keys. For example `numeric_column('price')` will look at 'price'
-        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
-        on corresponding `_FeatureColumn`.
-      feature_columns: An iterable containing the FeatureColumns to use as
-        inputs to your model. All items should be instances of classes derived
-        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
-        `bucketized_column`, `indicator_column`. If you have categorical
-        features, you can wrap them with an `embedding_column` or
-        `indicator_column`.
-      weight_collections: A list of collection names to which the Variable will
-        be added. Note that variables will also be added to collections
-        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
-      trainable: If `True` also add the variable to the graph collection
-        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-      cols_to_vars: If not `None`, must be a dictionary that will be filled with
-        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
-        after the call, we might have cols_to_vars = {_EmbeddingColumn(
-        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
-        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
-        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
-          shape=(5, 10)]} If a column creates no variables, its value will be an
-          empty list.
-      scope: A name or variable scope to use
-      cols_to_output_tensors: If not `None`, must be a dictionary that will be
-        filled with a mapping from '_FeatureColumn' to the associated output
-        `Tensor`s.
-      from_template: True if the method is being instantiated from a
-        `make_template`.
-
-    Returns:
-      A `Tensor` which represents input layer of a model. Its shape
-      is (batch_size, first_layer_dimension) and its dtype is `float32`.
-      first_layer_dimension is determined based on given `feature_columns`.
-
-    Raises:
-      ValueError: features and feature_columns have different lengths.
-    """
-
-    local_cols_to_output_tensors = {}
-    input_layer = original_input_layer(
-        features=features,
-        feature_columns=feature_columns,
-        weight_collections=weight_collections,
-        trainable=trainable,
-        cols_to_vars=cols_to_vars,
-        scope=scope,
-        cols_to_output_tensors=local_cols_to_output_tensors,
-        from_template=from_template)
-
-    if cols_to_output_tensors is not None:
-      cols_to_output_tensors = local_cols_to_output_tensors
-
-    # Annotate features.
-    # These are the parsed Tensors, before embedding.
-
-    # Only annotate features used by FeatureColumns.
-    # We figure which ones are used by FeatureColumns by creating a parsing
-    # spec and looking at the keys.
-    spec = feature_column_lib.make_parse_example_spec(feature_columns)
-    for key in spec.keys():
-      tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
-      ops.add_to_collection(
-          LayerAnnotationsCollectionNames.keys(
-              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
-      ops.add_to_collection(
-          LayerAnnotationsCollectionNames.values(
-              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
-          _to_any_wrapped_tensor_info(tensor))
-
-    # Annotate feature columns.
-    for column in feature_columns:
-      # TODO(cyfoo): Find a better way to serialize and deserialize
-      # _FeatureColumn.
-      ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
-                            serialize_feature_column(column))
-
-    for column, tensor in local_cols_to_output_tensors.items():
-      ops.add_to_collection(
-          LayerAnnotationsCollectionNames.keys(
-              LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name)
-      ops.add_to_collection(
-          LayerAnnotationsCollectionNames.values(
-              LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
-          _to_any_wrapped_tensor_info(tensor))
-
-    return input_layer
-
-  return input_layer_with_layer_annotations
-
-
-@contextlib.contextmanager
-def _monkey_patch(module, function, replacement):
-  old_function = getattr(module, function)
-  setattr(module, function, replacement)
-  yield
-  setattr(module, function, old_function)
-
-
-def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
-    hidden_units,
-    feature_columns,
-    model_dir=None,
-    n_classes=2,
-    weight_column=None,
-    label_vocabulary=None,
-    optimizer='Adagrad',
-    activation_fn=nn.relu,
-    dropout=None,
-    input_layer_partitioner=None,
-    config=None,
-    warm_start_from=None,
-    loss_reduction=losses.Reduction.SUM):
-  """A classifier for TensorFlow DNN models with layer annotations.
-
-  This classifier is fuctionally identical to estimator.DNNClassifier as far as
-  training and evaluating models is concerned. The key difference is that this
-  classifier adds additional layer annotations, which can be used for computing
-  Integrated Gradients.
-
-  Integrated Gradients is a method for attributing a classifier's predictions
-  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
-  instance, the method assigns attribution scores to individual features in
-  proportion to the feature's importance to the classifier's prediction.
-
-  See estimator.DNNClassifer for example code for training and evaluating models
-  using this classifier.
-
-  This classifier is checkpoint-compatible with estimator.DNNClassifier and
-  therefore the following should work seamlessly:
-
-  # Instantiate ordinary estimator as usual.
-  estimator = tf.estimator.DNNClassifier(
-    config, feature_columns, hidden_units, ...)
-
-  # Train estimator, export checkpoint.
-  tf.estimator.train_and_evaluate(estimator, ...)
-
-  # Instantiate estimator with annotations with the same configuration as the
-  # ordinary estimator.
-  estimator_with_annotations = (
-    tf.contrib.estimator.DNNClassifierWithLayerAnnotations(
-      config, feature_columns, hidden_units, ...))
-
-  # Call export_savedmodel with the same arguments as the ordinary estimator,
-  # using the checkpoint produced for the ordinary estimator.
-  estimator_with_annotations.export_saved_model(
-    export_dir_base, serving_input_receiver, ...
-    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
-
-  Args:
-    hidden_units: Iterable of number hidden units per layer. All layers are
-      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
-      one has 32.
-    feature_columns: An iterable containing all the feature columns used by the
-      model. All items in the set should be instances of classes derived from
-      `_FeatureColumn`.
-    model_dir: Directory to save model parameters, graph and etc. This can also
-      be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    n_classes: Number of label classes. Defaults to 2, namely binary
-      classification. Must be > 1.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
-      weight_column.normalizer_fn is applied on it to get weight tensor.
-    label_vocabulary: A list of strings represents possible label values. If
-      given, labels must be string type and have any value in
-      `label_vocabulary`. If it is not given, that means labels are already
-      encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
-      integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
-      will be errors if vocabulary is not provided and labels are string.
-    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
-      to Adagrad optimizer.
-    activation_fn: Activation function applied to each layer. If `None`, will
-      use `tf.nn.relu`.
-    dropout: When not `None`, the probability we will drop out a given
-      coordinate.
-    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
-      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-    config: `RunConfig` object to configure the runtime settings.
-    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
-      `WarmStartSettings` object to fully configure warm-starting.  If the
-      string filepath is provided instead of a `WarmStartSettings`, then all
-      weights are warm-started, and it is assumed that vocabularies and Tensor
-      names are unchanged.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM`.
-
-  Returns:
-    DNNClassifier with layer annotations.
-  """
-
-  original = dnn.DNNClassifier(
-      hidden_units=hidden_units,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      n_classes=n_classes,
-      weight_column=weight_column,
-      label_vocabulary=label_vocabulary,
-      optimizer=optimizer,
-      activation_fn=activation_fn,
-      dropout=dropout,
-      input_layer_partitioner=input_layer_partitioner,
-      config=config,
-      warm_start_from=warm_start_from,
-      loss_reduction=loss_reduction)
-
-  def _model_fn(features, labels, mode, config):
-    with _monkey_patch(
-        feature_column_lib, '_internal_input_layer',
-        make_input_layer_with_layer_annotations(
-            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
-      return original.model_fn(features, labels, mode, config)
-
-  return estimator.Estimator(
-      model_fn=_model_fn,
-      model_dir=model_dir,
-      config=config,
-      warm_start_from=warm_start_from)
-
-
-def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
-    hidden_units,
-    feature_columns,
-    model_dir=None,
-    label_dimension=1,
-    weight_column=None,
-    optimizer='Adagrad',
-    activation_fn=nn.relu,
-    dropout=None,
-    input_layer_partitioner=None,
-    config=None,
-    warm_start_from=None,
-    loss_reduction=losses.Reduction.SUM,
-):
-  """A regressor for TensorFlow DNN models with layer annotations.
-
-  This regressor is fuctionally identical to estimator.DNNRegressor as far as
-  training and evaluating models is concerned. The key difference is that this
-  classifier adds additional layer annotations, which can be used for computing
-  Integrated Gradients.
-
-  Integrated Gradients is a method for attributing a classifier's predictions
-  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
-  instance, the method assigns attribution scores to individual features in
-  proportion to the feature's importance to the classifier's prediction.
-
-  See estimator.DNNRegressor for example code for training and evaluating models
-  using this regressor.
-
-  This regressor is checkpoint-compatible with estimator.DNNRegressor and
-  therefore the following should work seamlessly:
-
-  # Instantiate ordinary estimator as usual.
-  estimator = tf.estimator.DNNRegressor(
-    config, feature_columns, hidden_units, ...)
-
-  # Train estimator, export checkpoint.
-  tf.estimator.train_and_evaluate(estimator, ...)
-
-  # Instantiate estimator with annotations with the same configuration as the
-  # ordinary estimator.
-  estimator_with_annotations = (
-    tf.contrib.estimator.DNNRegressorWithLayerAnnotations(
-      config, feature_columns, hidden_units, ...))
-
-  # Call export_savedmodel with the same arguments as the ordinary estimator,
-  # using the checkpoint produced for the ordinary estimator.
-  estimator_with_annotations.export_saved_model(
-    export_dir_base, serving_input_receiver, ...
-    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
-
-  Args:
-    hidden_units: Iterable of number hidden units per layer. All layers are
-      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
-      one has 32.
-    feature_columns: An iterable containing all the feature columns used by the
-      model. All items in the set should be instances of classes derived from
-      `_FeatureColumn`.
-    model_dir: Directory to save model parameters, graph and etc. This can also
-      be used to load checkpoints from the directory into a estimator to
-      continue training a previously saved model.
-    label_dimension: Number of regression targets per example. This is the size
-      of the last dimension of the labels and logits `Tensor` objects
-      (typically, these have shape `[batch_size, label_dimension]`).
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
-      weight_column.normalizer_fn is applied on it to get weight tensor.
-    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
-      to Adagrad optimizer.
-    activation_fn: Activation function applied to each layer. If `None`, will
-      use `tf.nn.relu`.
-    dropout: When not `None`, the probability we will drop out a given
-      coordinate.
-    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
-      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-    config: `RunConfig` object to configure the runtime settings.
-    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
-      `WarmStartSettings` object to fully configure warm-starting.  If the
-      string filepath is provided instead of a `WarmStartSettings`, then all
-      weights are warm-started, and it is assumed that vocabularies and Tensor
-      names are unchanged.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM`.
-
-  Returns:
-    DNNRegressor with layer annotations.
-  """
-
-  original = dnn.DNNRegressor(
-      hidden_units=hidden_units,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      label_dimension=label_dimension,
-      weight_column=weight_column,
-      optimizer=optimizer,
-      activation_fn=activation_fn,
-      dropout=dropout,
-      input_layer_partitioner=input_layer_partitioner,
-      config=config,
-      warm_start_from=warm_start_from,
-      loss_reduction=loss_reduction,
-  )
+from tensorflow_estimator.contrib.estimator.python.estimator import dnn_with_layer_annotations
 
-  def _model_fn(features, labels, mode, config):
-    with _monkey_patch(
-        feature_column_lib, '_internal_input_layer',
-        make_input_layer_with_layer_annotations(
-            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
-      return original.model_fn(features, labels, mode, config)
+# Include attrs that start with single underscore.
+dnn_with_layer_annotations.__all__ = [
+    s for s in dir(dnn_with_layer_annotations) if not s.startswith('__')
+]
 
-  return estimator.Estimator(
-      model_fn=_model_fn,
-      model_dir=model_dir,
-      config=config,
-      warm_start_from=warm_start_from)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.dnn_with_layer_annotations import *
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
deleted file mode 100644
index 2fe3d4c72e..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
+++ /dev/null
@@ -1,611 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dnn_with_layer_annotations.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import dnn_with_layer_annotations
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import queue_runner
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-def _dnn_classifier_fn(*args, **kwargs):
-  return dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
-      *args, **kwargs)
-
-
-class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
-                          test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(self, _dnn_classifier_fn,
-                                                       _dnn_regressor_fn)
-
-
-class DNNWithLayerAnnotationsClassifierEvaluateTest(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn)
-
-
-class DNNClassifierWithLayerAnnotationsPredictTest(
-    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn)
-
-
-class DNNClassifierWithLayerAnnotationsTrainTest(
-    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn)
-
-
-def _dnn_regressor_fn(*args, **kwargs):
-  return dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations(
-      *args, **kwargs)
-
-
-class DNNWithLayerAnnotationsTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def _getLayerAnnotationCollection(self, graph, collection_name):
-    keys = graph.get_collection(
-        dnn_with_layer_annotations.LayerAnnotationsCollectionNames.keys(
-            collection_name))
-    values = graph.get_collection(
-        dnn_with_layer_annotations.LayerAnnotationsCollectionNames.values(
-            collection_name))
-    if len(keys) != len(values):
-      raise ValueError('keys and values should have same length. lengths were: '
-                       '%d and %d, and elements were %s and %s' %
-                       (len(keys), len(values), keys, values))
-    return dict(zip(keys, values))
-
-  def _testAnnotationsPresentForEstimator(self, estimator_class):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(1,)),
-        feature_column.embedding_column(
-            feature_column.categorical_column_with_vocabulary_list(
-                'y', vocabulary_list=['a', 'b', 'c']),
-            dimension=3)
-    ]
-    estimator = estimator_class(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        model_dir=self._model_dir)
-    model_fn = estimator.model_fn
-
-    graph = ops.Graph()
-    with graph.as_default():
-      model_fn({
-          'x': array_ops.constant([1.0]),
-          'y': array_ops.constant(['a'])
-      }, {},
-               model_fn_lib.ModeKeys.PREDICT,
-               config=None)
-
-      unprocessed_features = self._getLayerAnnotationCollection(
-          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
-          .UNPROCESSED_FEATURES)
-      processed_features = self._getLayerAnnotationCollection(
-          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
-          .PROCESSED_FEATURES)
-      feature_columns = graph.get_collection(
-          dnn_with_layer_annotations.LayerAnnotationsCollectionNames
-          .FEATURE_COLUMNS)
-
-      self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y'])
-      self.assertEqual(2, len(processed_features.keys()))
-      self.assertEqual(2, len(feature_columns))
-
-  def testAnnotationsPresentForClassifier(self):
-    self._testAnnotationsPresentForEstimator(
-        dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations)
-
-  def testAnnotationsPresentForRegressor(self):
-    self._testAnnotationsPresentForEstimator(
-        dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations)
-
-  def _testCheckpointCompatibleWithNonAnnotatedEstimator(
-      self, train_input_fn, predict_input_fn, non_annotated_class,
-      annotated_class, prediction_key, estimator_args):
-    input_dimension = 2
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    estimator = non_annotated_class(
-        model_dir=self._model_dir,
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        **estimator_args)
-
-    estimator.train(train_input_fn, steps=10)
-
-    predictions = np.array(
-        [x[prediction_key] for x in estimator.predict(predict_input_fn)])
-
-    annotated_estimator = annotated_class(
-        model_dir=self._model_dir,
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        warm_start_from=self._model_dir,
-        **estimator_args)
-
-    annotated_predictions = np.array([
-        x[prediction_key] for x in annotated_estimator.predict(predict_input_fn)
-    ])
-
-    self.assertAllEqual(predictions.shape, annotated_predictions.shape)
-    for i, (a, b) in enumerate(
-        zip(predictions.flatten(), annotated_predictions.flatten())):
-      self.assertAlmostEqual(a, b, msg='index=%d' % i)
-
-  def testCheckpointCompatibleForClassifier(self):
-    n_classes = 2
-    input_dimension = 2
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    y_data = np.reshape(
-        np.rint(data[:batch_size]).astype(np.int64), (batch_size, 1))
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data}, batch_size=batch_size, shuffle=False)
-
-    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
-        train_input_fn,
-        predict_input_fn,
-        dnn.DNNClassifier,
-        dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations,
-        prediction_key=prediction_keys.PredictionKeys.PROBABILITIES,
-        estimator_args={'n_classes': n_classes})
-
-  def testCheckpointCompatibleForRegressor(self):
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, batch_size=batch_size, shuffle=False)
-
-    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
-        train_input_fn,
-        predict_input_fn,
-        dnn.DNNRegressor,
-        dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations,
-        prediction_key=prediction_keys.PredictionKeys.PREDICTIONS,
-        estimator_args={'label_dimension': label_dimension})
-
-
-class DNNRegressorWithLayerAnnotationsEvaluateTest(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn)
-
-
-class DNNRegressorWithLayerAnnotationsPredictTest(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn)
-
-
-class DNNRegressorWithLayerAnnotationsTrainTest(
-    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn)
-
-
-def _queue_parsed_features(feature_map):
-  tensors_to_enqueue = []
-  keys = []
-  for key, tensor in six.iteritems(feature_map):
-    keys.append(key)
-    tensors_to_enqueue.append(tensor)
-  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
-  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
-  queue_runner.add_queue_runner(
-      queue_runner.QueueRunner(input_queue,
-                               [input_queue.enqueue(tensors_to_enqueue)]))
-  dequeued_tensors = input_queue.dequeue()
-  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
-
-
-class DNNRegressorWithLayerAnnotationsIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=data, batch_size=batch_size, shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-  def test_pandas_input_fn(self):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    label_dimension = 1
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(data)
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-  def test_input_fn_from_parse_example(self):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(
-          features=feature_pb2.Features(
-              feature={
-                  'x':
-                      feature_pb2.Feature(
-                          float_list=feature_pb2.FloatList(value=datum)),
-                  'y':
-                      feature_pb2.Feature(
-                          float_list=feature_pb2.FloatList(value=datum)),
-              }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-
-class DNNClassifierWithLayerAnnotationsIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _as_label(self, data_in_float):
-    return np.rint(data_in_float).astype(np.int64)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, n_classes, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-    # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    n_classes = 3
-    input_dimension = 2
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data}, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size)
-
-  def test_pandas_input_fn(self):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    input_dimension = 1
-    n_classes = 3
-    batch_size = 10
-    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(self._as_label(data))
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size)
-
-  def test_input_fn_from_parse_example(self):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    input_dimension = 2
-    n_classes = 3
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(
-          features=feature_pb2.Features(
-              feature={
-                  'x':
-                      feature_pb2.Feature(
-                          float_list=feature_pb2.FloatList(value=datum)),
-                  'y':
-                      feature_pb2.Feature(
-                          int64_list=feature_pb2.Int64List(
-                              value=self._as_label(datum[:1]))),
-              }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping.py b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
index cafe8279c7..b5d256dfeb 100644
--- a/tensorflow/contrib/estimator/python/estimator/early_stopping.py
+++ b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
@@ -12,495 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utilities for early stopping."""
+"""early_stopping python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import operator
-import os
-
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.summary import summary_iterator
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-
-_EVENT_FILE_GLOB_PATTERN = 'events.out.tfevents.*'
-
-
-def make_early_stopping_hook(estimator,
-                             should_stop_fn,
-                             run_every_secs=60,
-                             run_every_steps=None):
-  """Creates early-stopping hook.
-
-  Returns a `SessionRunHook` that stops training when `should_stop_fn` returns
-  `True`.
-
-  Usage example:
-
-  ```python
-  estimator = ...
-  hook = early_stopping.make_early_stopping_hook(
-      estimator, should_stop_fn=make_stop_fn(...))
-  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
-  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
-  ```
-
-  Caveat: Current implementation supports early-stopping both training and
-  evaluation in local mode. In distributed mode, training can be stopped but
-  evaluation (where it's a separate job) will indefinitely wait for new model
-  checkpoints to evaluate, so you will need other means to detect and stop it.
-  Early-stopping evaluation in distributed mode requires changes in
-  `train_and_evaluate` API and will be addressed in a future revision.
-
-  Args:
-    estimator: A `tf.estimator.Estimator` instance.
-    should_stop_fn: `callable`, function that takes no arguments and returns a
-      `bool`. If the function returns `True`, stopping will be initiated by the
-      chief.
-    run_every_secs: If specified, calls `should_stop_fn` at an interval of
-      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
-      `run_every_steps` must be set.
-    run_every_steps: If specified, calls `should_stop_fn` every
-      `run_every_steps` steps. Either this or `run_every_secs` must be set.
-
-  Returns:
-    A `SessionRunHook` that periodically executes `should_stop_fn` and initiates
-    early stopping if the function returns `True`.
-
-  Raises:
-    TypeError: If `estimator` is not of type `tf.estimator.Estimator`.
-    ValueError: If both `run_every_secs` and `run_every_steps` are set.
-  """
-  if not isinstance(estimator, estimator_lib.Estimator):
-    raise TypeError('`estimator` must have type `tf.estimator.Estimator`. '
-                    'Got: {}'.format(type(estimator)))
-
-  if run_every_secs is not None and run_every_steps is not None:
-    raise ValueError('Only one of `run_every_secs` and `run_every_steps` must '
-                     'be set.')
-
-  if estimator.config.is_chief:
-    return _StopOnPredicateHook(should_stop_fn, run_every_secs, run_every_steps)
-  else:
-    return _CheckForStoppingHook()
-
-
-def stop_if_higher_hook(estimator,
-                        metric_name,
-                        threshold,
-                        eval_dir=None,
-                        min_steps=0,
-                        run_every_secs=60,
-                        run_every_steps=None):
-  """Creates hook to stop if the given metric is higher than the threshold.
-
-  Usage example:
-
-  ```python
-  estimator = ...
-  # Hook to stop training if accuracy becomes higher than 0.9.
-  hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.9)
-  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
-  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
-  ```
-
-  Caveat: Current implementation supports early-stopping both training and
-  evaluation in local mode. In distributed mode, training can be stopped but
-  evaluation (where it's a separate job) will indefinitely wait for new model
-  checkpoints to evaluate, so you will need other means to detect and stop it.
-  Early-stopping evaluation in distributed mode requires changes in
-  `train_and_evaluate` API and will be addressed in a future revision.
-
-  Args:
-    estimator: A `tf.estimator.Estimator` instance.
-    metric_name: `str`, metric to track. "loss", "accuracy", etc.
-    threshold: Numeric threshold for the given metric.
-    eval_dir: If set, directory containing summary files with eval metrics. By
-      default, `estimator.eval_dir()` will be used.
-    min_steps: `int`, stop is never requested if global step is less than this
-      value. Defaults to 0.
-    run_every_secs: If specified, calls `should_stop_fn` at an interval of
-      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
-      `run_every_steps` must be set.
-    run_every_steps: If specified, calls `should_stop_fn` every
-      `run_every_steps` steps. Either this or `run_every_secs` must be set.
-
-  Returns:
-    An early-stopping hook of type `SessionRunHook` that periodically checks
-    if the given metric is higher than specified threshold and initiates
-    early stopping if true.
-  """
-  return _stop_if_threshold_crossed_hook(
-      estimator=estimator,
-      metric_name=metric_name,
-      threshold=threshold,
-      higher_is_better=True,
-      eval_dir=eval_dir,
-      min_steps=min_steps,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def stop_if_lower_hook(estimator,
-                       metric_name,
-                       threshold,
-                       eval_dir=None,
-                       min_steps=0,
-                       run_every_secs=60,
-                       run_every_steps=None):
-  """Creates hook to stop if the given metric is lower than the threshold.
-
-  Usage example:
-
-  ```python
-  estimator = ...
-  # Hook to stop training if loss becomes lower than 100.
-  hook = early_stopping.stop_if_lower_hook(estimator, "loss", 100)
-  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
-  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
-  ```
-
-  Caveat: Current implementation supports early-stopping both training and
-  evaluation in local mode. In distributed mode, training can be stopped but
-  evaluation (where it's a separate job) will indefinitely wait for new model
-  checkpoints to evaluate, so you will need other means to detect and stop it.
-  Early-stopping evaluation in distributed mode requires changes in
-  `train_and_evaluate` API and will be addressed in a future revision.
-
-  Args:
-    estimator: A `tf.estimator.Estimator` instance.
-    metric_name: `str`, metric to track. "loss", "accuracy", etc.
-    threshold: Numeric threshold for the given metric.
-    eval_dir: If set, directory containing summary files with eval metrics. By
-      default, `estimator.eval_dir()` will be used.
-    min_steps: `int`, stop is never requested if global step is less than this
-      value. Defaults to 0.
-    run_every_secs: If specified, calls `should_stop_fn` at an interval of
-      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
-      `run_every_steps` must be set.
-    run_every_steps: If specified, calls `should_stop_fn` every
-      `run_every_steps` steps. Either this or `run_every_secs` must be set.
-
-  Returns:
-    An early-stopping hook of type `SessionRunHook` that periodically checks
-    if the given metric is lower than specified threshold and initiates
-    early stopping if true.
-  """
-  return _stop_if_threshold_crossed_hook(
-      estimator=estimator,
-      metric_name=metric_name,
-      threshold=threshold,
-      higher_is_better=False,
-      eval_dir=eval_dir,
-      min_steps=min_steps,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def stop_if_no_increase_hook(estimator,
-                             metric_name,
-                             max_steps_without_increase,
-                             eval_dir=None,
-                             min_steps=0,
-                             run_every_secs=60,
-                             run_every_steps=None):
-  """Creates hook to stop if metric does not increase within given max steps.
-
-  Usage example:
-
-  ```python
-  estimator = ...
-  # Hook to stop training if accuracy does not increase in over 100000 steps.
-  hook = early_stopping.stop_if_no_increase_hook(estimator, "accuracy", 100000)
-  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
-  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
-  ```
-
-  Caveat: Current implementation supports early-stopping both training and
-  evaluation in local mode. In distributed mode, training can be stopped but
-  evaluation (where it's a separate job) will indefinitely wait for new model
-  checkpoints to evaluate, so you will need other means to detect and stop it.
-  Early-stopping evaluation in distributed mode requires changes in
-  `train_and_evaluate` API and will be addressed in a future revision.
-
-  Args:
-    estimator: A `tf.estimator.Estimator` instance.
-    metric_name: `str`, metric to track. "loss", "accuracy", etc.
-    max_steps_without_increase: `int`, maximum number of training steps with no
-      increase in the given metric.
-    eval_dir: If set, directory containing summary files with eval metrics. By
-      default, `estimator.eval_dir()` will be used.
-    min_steps: `int`, stop is never requested if global step is less than this
-      value. Defaults to 0.
-    run_every_secs: If specified, calls `should_stop_fn` at an interval of
-      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
-      `run_every_steps` must be set.
-    run_every_steps: If specified, calls `should_stop_fn` every
-      `run_every_steps` steps. Either this or `run_every_secs` must be set.
-
-  Returns:
-    An early-stopping hook of type `SessionRunHook` that periodically checks
-    if the given metric shows no increase over given maximum number of
-    training steps, and initiates early stopping if true.
-  """
-  return _stop_if_no_metric_improvement_hook(
-      estimator=estimator,
-      metric_name=metric_name,
-      max_steps_without_improvement=max_steps_without_increase,
-      higher_is_better=True,
-      eval_dir=eval_dir,
-      min_steps=min_steps,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def stop_if_no_decrease_hook(estimator,
-                             metric_name,
-                             max_steps_without_decrease,
-                             eval_dir=None,
-                             min_steps=0,
-                             run_every_secs=60,
-                             run_every_steps=None):
-  """Creates hook to stop if metric does not decrease within given max steps.
-
-  Usage example:
-
-  ```python
-  estimator = ...
-  # Hook to stop training if loss does not decrease in over 100000 steps.
-  hook = early_stopping.stop_if_no_decrease_hook(estimator, "loss", 100000)
-  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
-  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
-  ```
-
-  Caveat: Current implementation supports early-stopping both training and
-  evaluation in local mode. In distributed mode, training can be stopped but
-  evaluation (where it's a separate job) will indefinitely wait for new model
-  checkpoints to evaluate, so you will need other means to detect and stop it.
-  Early-stopping evaluation in distributed mode requires changes in
-  `train_and_evaluate` API and will be addressed in a future revision.
-
-  Args:
-    estimator: A `tf.estimator.Estimator` instance.
-    metric_name: `str`, metric to track. "loss", "accuracy", etc.
-    max_steps_without_decrease: `int`, maximum number of training steps with no
-      decrease in the given metric.
-    eval_dir: If set, directory containing summary files with eval metrics. By
-      default, `estimator.eval_dir()` will be used.
-    min_steps: `int`, stop is never requested if global step is less than this
-      value. Defaults to 0.
-    run_every_secs: If specified, calls `should_stop_fn` at an interval of
-      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
-      `run_every_steps` must be set.
-    run_every_steps: If specified, calls `should_stop_fn` every
-      `run_every_steps` steps. Either this or `run_every_secs` must be set.
-
-  Returns:
-    An early-stopping hook of type `SessionRunHook` that periodically checks
-    if the given metric shows no decrease over given maximum number of
-    training steps, and initiates early stopping if true.
-  """
-  return _stop_if_no_metric_improvement_hook(
-      estimator=estimator,
-      metric_name=metric_name,
-      max_steps_without_improvement=max_steps_without_decrease,
-      higher_is_better=False,
-      eval_dir=eval_dir,
-      min_steps=min_steps,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def read_eval_metrics(eval_dir):
-  """Helper to read eval metrics from eval summary files.
-
-  Args:
-    eval_dir: Directory containing summary files with eval metrics.
-
-  Returns:
-    A `dict` with global steps mapping to `dict` of metric names and values.
-  """
-  eval_metrics_dict = {}
-  for event in _summaries(eval_dir):
-    if not event.HasField('summary'):
-      continue
-    metrics = {}
-    for value in event.summary.value:
-      if value.HasField('simple_value'):
-        metrics[value.tag] = value.simple_value
-    if metrics:
-      eval_metrics_dict[event.step] = metrics
-  return collections.OrderedDict(
-      sorted(eval_metrics_dict.items(), key=lambda t: t[0]))
-
-
-def _stop_if_threshold_crossed_hook(estimator, metric_name, threshold,
-                                    higher_is_better, eval_dir, min_steps,
-                                    run_every_secs, run_every_steps):
-  """Creates early-stopping hook to stop training if threshold is crossed."""
-
-  if eval_dir is None:
-    eval_dir = estimator.eval_dir()
-
-  is_lhs_better = operator.gt if higher_is_better else operator.lt
-  greater_or_lesser = 'greater than' if higher_is_better else 'less than'
-
-  def stop_if_threshold_crossed_fn():
-    """Returns `True` if the given metric crosses specified threshold."""
-
-    eval_results = read_eval_metrics(eval_dir)
-
-    for step, metrics in eval_results.items():
-      if step < min_steps:
-        continue
-      val = metrics[metric_name]
-      if is_lhs_better(val, threshold):
-        tf_logging.info(
-            'At step %s, metric "%s" has value %s which is %s the configured '
-            'threshold (%s) for early stopping.', step, metric_name, val,
-            greater_or_lesser, threshold)
-        return True
-    return False
-
-  return make_early_stopping_hook(
-      estimator=estimator,
-      should_stop_fn=stop_if_threshold_crossed_fn,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def _stop_if_no_metric_improvement_hook(
-    estimator, metric_name, max_steps_without_improvement, higher_is_better,
-    eval_dir, min_steps, run_every_secs, run_every_steps):
-  """Returns hook to stop training if given metric shows no improvement."""
-
-  if eval_dir is None:
-    eval_dir = estimator.eval_dir()
-
-  is_lhs_better = operator.gt if higher_is_better else operator.lt
-  increase_or_decrease = 'increase' if higher_is_better else 'decrease'
-
-  def stop_if_no_metric_improvement_fn():
-    """Returns `True` if metric does not improve within max steps."""
-
-    eval_results = read_eval_metrics(eval_dir)
-
-    best_val = None
-    best_val_step = None
-    for step, metrics in eval_results.items():
-      if step < min_steps:
-        continue
-      val = metrics[metric_name]
-      if best_val is None or is_lhs_better(val, best_val):
-        best_val = val
-        best_val_step = step
-      if step - best_val_step >= max_steps_without_improvement:
-        tf_logging.info(
-            'No %s in metric "%s" for %s steps, which is greater than or equal '
-            'to max steps (%s) configured for early stopping.',
-            increase_or_decrease, metric_name, step - best_val_step,
-            max_steps_without_improvement)
-        return True
-    return False
-
-  return make_early_stopping_hook(
-      estimator=estimator,
-      should_stop_fn=stop_if_no_metric_improvement_fn,
-      run_every_secs=run_every_secs,
-      run_every_steps=run_every_steps)
-
-
-def _summaries(eval_dir):
-  """Yields `tensorflow.Event` protos from event files in the eval dir.
-
-  Args:
-    eval_dir: Directory containing summary files with eval metrics.
-
-  Yields:
-    `tensorflow.Event` object read from the event files.
-  """
-  if gfile.Exists(eval_dir):
-    for event_file in gfile.Glob(
-        os.path.join(eval_dir, _EVENT_FILE_GLOB_PATTERN)):
-      for event in summary_iterator.summary_iterator(event_file):
-        yield event
-
-
-def _get_or_create_stop_var():
-  with variable_scope.variable_scope(
-      name_or_scope='signal_early_stopping',
-      values=[],
-      reuse=variable_scope.AUTO_REUSE):
-    return variable_scope.get_variable(
-        name='STOP',
-        shape=[],
-        dtype=dtypes.bool,
-        initializer=init_ops.constant_initializer(False),
-        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
-        trainable=False)
-
-
-class _StopOnPredicateHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop when `should_stop_fn` returns `True`."""
-
-  def __init__(self, should_stop_fn, run_every_secs=60, run_every_steps=None):
-    if not callable(should_stop_fn):
-      raise TypeError('`should_stop_fn` must be callable.')
-
-    self._should_stop_fn = should_stop_fn
-    self._timer = basic_session_run_hooks.SecondOrStepTimer(
-        every_secs=run_every_secs, every_steps=run_every_steps)
-    self._global_step_tensor = None
-    self._stop_var = None
-    self._stop_op = None
-
-  def begin(self):
-    self._global_step_tensor = training_util.get_global_step()
-    self._stop_var = _get_or_create_stop_var()
-    self._stop_op = state_ops.assign(self._stop_var, True)
-
-  def before_run(self, run_context):
-    del run_context
-    return session_run_hook.SessionRunArgs(self._global_step_tensor)
-
-  def after_run(self, run_context, run_values):
-    global_step = run_values.results
-    if self._timer.should_trigger_for_step(global_step):
-      self._timer.update_last_triggered_step(global_step)
-      if self._should_stop_fn():
-        tf_logging.info('Requesting early stopping at global step %d',
-                        global_step)
-        run_context.session.run(self._stop_op)
-        run_context.request_stop()
-
-
-class _CheckForStoppingHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop if stop is requested by `_StopOnPredicateHook`."""
-
-  def __init__(self):
-    self._stop_var = None
-
-  def begin(self):
-    self._stop_var = _get_or_create_stop_var()
+from tensorflow_estimator.contrib.estimator.python.estimator import early_stopping
 
-  def before_run(self, run_context):
-    del run_context
-    return session_run_hook.SessionRunArgs(self._stop_var)
+# Include attrs that start with single underscore.
+early_stopping.__all__ = [
+    s for s in dir(early_stopping) if not s.startswith('__')
+]
 
-  def after_run(self, run_context, run_values):
-    should_early_stop = run_values.results
-    if should_early_stop:
-      tf_logging.info('Early stopping requested, suspending run.')
-      run_context.request_stop()
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.early_stopping import *
diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py b/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
deleted file mode 100644
index e4bfd4b446..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for early_stopping."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-
-from absl.testing import parameterized
-from tensorflow.contrib.estimator.python.estimator import early_stopping
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import run_config
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import training_util
-
-
-class _FakeRunConfig(run_config.RunConfig):
-
-  def __init__(self, is_chief):
-    super(_FakeRunConfig, self).__init__()
-    self._is_chief = is_chief
-
-  @property
-  def is_chief(self):
-    return self._is_chief
-
-
-def _dummy_model_fn(features, labels, params):
-  _, _, _ = features, labels, params
-
-
-class _FakeEstimator(estimator.Estimator):
-  """Fake estimator for testing."""
-
-  def __init__(self, config):
-    super(_FakeEstimator, self).__init__(
-        model_fn=_dummy_model_fn, config=config)
-
-
-def _write_events(eval_dir, params):
-  """Test helper to write events to summary files."""
-  for steps, loss, accuracy in params:
-    estimator._write_dict_to_summary(eval_dir, {
-        'loss': loss,
-        'accuracy': accuracy,
-    }, steps)
-
-
-class ReadEvalMetricsTest(test.TestCase):
-
-  def test_read_eval_metrics(self):
-    eval_dir = tempfile.mkdtemp()
-    _write_events(
-        eval_dir,
-        [
-            # steps, loss, accuracy
-            (1000, 1, 2),
-            (2000, 3, 4),
-            (3000, 5, 6),
-        ])
-    self.assertEqual({
-        1000: {
-            'loss': 1,
-            'accuracy': 2
-        },
-        2000: {
-            'loss': 3,
-            'accuracy': 4
-        },
-        3000: {
-            'loss': 5,
-            'accuracy': 6
-        },
-    }, early_stopping.read_eval_metrics(eval_dir))
-
-  def test_read_eval_metrics_when_no_events(self):
-    eval_dir = tempfile.mkdtemp()
-    self.assertTrue(os.path.exists(eval_dir))
-
-    # No error should be raised when eval directory exists with no event files.
-    self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir))
-
-    os.rmdir(eval_dir)
-    self.assertFalse(os.path.exists(eval_dir))
-
-    # No error should be raised when eval directory does not exist.
-    self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir))
-
-
-class EarlyStoppingHooksTest(test.TestCase, parameterized.TestCase):
-
-  def setUp(self):
-    config = _FakeRunConfig(is_chief=True)
-    self._estimator = _FakeEstimator(config=config)
-    eval_dir = self._estimator.eval_dir()
-    os.makedirs(eval_dir)
-    _write_events(
-        eval_dir,
-        [
-            # steps, loss, accuracy
-            (1000, 0.8, 0.5),
-            (2000, 0.7, 0.6),
-            (3000, 0.4, 0.7),
-            (3500, 0.41, 0.68),
-        ])
-
-  def run_session(self, hooks, should_stop):
-    hooks = hooks if isinstance(hooks, list) else [hooks]
-    with ops.Graph().as_default():
-      training_util.create_global_step()
-      no_op = control_flow_ops.no_op()
-      with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess:
-        mon_sess.run(no_op)
-        self.assertEqual(mon_sess.should_stop(), should_stop)
-
-  @parameterized.parameters((0.8, 0, False), (0.6, 4000, False), (0.6, 0, True))
-  def test_stop_if_higher_hook(self, threshold, min_steps, should_stop):
-    self.run_session(
-        early_stopping.stop_if_higher_hook(
-            self._estimator,
-            metric_name='accuracy',
-            threshold=threshold,
-            min_steps=min_steps), should_stop)
-
-  @parameterized.parameters((0.3, 0, False), (0.5, 4000, False), (0.5, 0, True))
-  def test_stop_if_lower_hook(self, threshold, min_steps, should_stop):
-    self.run_session(
-        early_stopping.stop_if_lower_hook(
-            self._estimator,
-            metric_name='loss',
-            threshold=threshold,
-            min_steps=min_steps), should_stop)
-
-  @parameterized.parameters((1500, 0, False), (500, 4000, False),
-                            (500, 0, True))
-  def test_stop_if_no_increase_hook(self, max_steps, min_steps, should_stop):
-    self.run_session(
-        early_stopping.stop_if_no_increase_hook(
-            self._estimator,
-            metric_name='accuracy',
-            max_steps_without_increase=max_steps,
-            min_steps=min_steps), should_stop)
-
-  @parameterized.parameters((1500, 0, False), (500, 4000, False),
-                            (500, 0, True))
-  def test_stop_if_no_decrease_hook(self, max_steps, min_steps, should_stop):
-    self.run_session(
-        early_stopping.stop_if_no_decrease_hook(
-            self._estimator,
-            metric_name='loss',
-            max_steps_without_decrease=max_steps,
-            min_steps=min_steps), should_stop)
-
-  @parameterized.parameters((1500, 0.3, False), (1500, 0.5, True),
-                            (500, 0.3, True))
-  def test_multiple_hooks(self, max_steps, loss_threshold, should_stop):
-    self.run_session([
-        early_stopping.stop_if_no_decrease_hook(
-            self._estimator,
-            metric_name='loss',
-            max_steps_without_decrease=max_steps),
-        early_stopping.stop_if_lower_hook(
-            self._estimator, metric_name='loss', threshold=loss_threshold)
-    ], should_stop)
-
-  @parameterized.parameters(False, True)
-  def test_make_early_stopping_hook(self, should_stop):
-    self.run_session([
-        early_stopping.make_early_stopping_hook(
-            self._estimator, should_stop_fn=lambda: should_stop)
-    ], should_stop)
-
-  def test_make_early_stopping_hook_typeerror(self):
-    with self.assertRaises(TypeError):
-      early_stopping.make_early_stopping_hook(
-          estimator=object(), should_stop_fn=lambda: True)
-
-  def test_make_early_stopping_hook_valueerror(self):
-    with self.assertRaises(ValueError):
-      early_stopping.make_early_stopping_hook(
-          self._estimator,
-          should_stop_fn=lambda: True,
-          run_every_secs=60,
-          run_every_steps=100)
-
-
-class StopOnPredicateHookTest(test.TestCase):
-
-  def test_stop(self):
-    hook = early_stopping._StopOnPredicateHook(
-        should_stop_fn=lambda: False, run_every_secs=0)
-    with ops.Graph().as_default():
-      training_util.create_global_step()
-      no_op = control_flow_ops.no_op()
-      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.run(no_op)
-        self.assertFalse(mon_sess.should_stop())
-        self.assertFalse(mon_sess.raw_session().run(hook._stop_var))
-
-    hook = early_stopping._StopOnPredicateHook(
-        should_stop_fn=lambda: True, run_every_secs=0)
-    with ops.Graph().as_default():
-      training_util.create_global_step()
-      no_op = control_flow_ops.no_op()
-      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.run(no_op)
-        self.assertTrue(mon_sess.should_stop())
-        self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
-
-
-class CheckForStoppingHookTest(test.TestCase):
-
-  def test_stop(self):
-    hook = early_stopping._CheckForStoppingHook()
-    with ops.Graph().as_default():
-      no_op = control_flow_ops.no_op()
-      assign_op = state_ops.assign(early_stopping._get_or_create_stop_var(),
-                                   True)
-      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.run(no_op)
-        self.assertFalse(mon_sess.should_stop())
-        mon_sess.run(assign_op)
-        self.assertTrue(mon_sess.should_stop())
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/export.py b/tensorflow/contrib/estimator/python/estimator/export.py
index b0deb9b494..4f3fe9c0dd 100644
--- a/tensorflow/contrib/estimator/python/estimator/export.py
+++ b/tensorflow/contrib/estimator/python/estimator/export.py
@@ -12,212 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Wrapper for methods to export train/eval graphs from Estimator."""
+"""export python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import model_fn as model_fn_lib
-
-
-def export_saved_model_for_mode(
-    estimator, export_dir_base, input_receiver_fn,
-    assets_extra=None,
-    as_text=False,
-    checkpoint_path=None,
-    strip_default_attrs=False,
-    mode=model_fn_lib.ModeKeys.PREDICT):
-  # pylint: disable=line-too-long
-  """Exports a single train/eval/predict graph as a SavedModel.
-
-  For a detailed guide, see [Using SavedModel with Estimators](
-  https://tensorflow.org/guide/saved_model#using_savedmodel_with_estimators).
-
-  Sample usage:
-  ```python
-  classifier = tf.estimator.LinearClassifier(
-      feature_columns=[age, language])
-  classifier.train(input_fn=input_fn, steps=1000)
-
-  feature_spec = {
-      'age': tf.placeholder(dtype=tf.int64),
-      'language': array_ops.placeholder(dtype=tf.string)
-  }
-  label_spec = tf.placeholder(dtype=dtypes.int64)
-
-  train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
-      feature_spec, label_spec)
-
-  export_dir = tf.contrib.estimator.export_saved_model_for_mode(
-      classifier,
-      export_dir_base='my_model/',
-      input_receiver_fn=train_rcvr_fn,
-      mode=model_fn_lib.ModeKeys.TRAIN)
-
-  # export_dir is a timestamped directory with the SavedModel, which
-  # can be used for serving, analysis with TFMA, or directly loaded in.
-  with ops.Graph().as_default() as graph:
-    with session.Session(graph=graph) as sess:
-      loader.load(sess, [tag_constants.TRAINING], export_dir)
-      weights = graph.get_tensor_by_name(''linear/linear_model/age/weights')
-      ...
-  ```
-
-  This method is a wrapper for _export_all_saved_models, and wraps a raw
-  input_receiver_fn in a dictionary to pass in to that function.
-  See _export_all_saved_models for full docs.
-
-  See tf.contrib.estimator.export_saved_model_for_mode for the currently
-  exposed version of this function.
-
-  Args:
-    estimator: an instance of tf.estimator.Estimator
-    export_dir_base: A string containing a directory in which to create
-      timestamped subdirectories containing exported SavedModels.
-    input_receiver_fn: a function that takes no argument and
-      returns the appropriate subclass of `InputReceiver`.
-    assets_extra: A dict specifying how to populate the assets.extra directory
-      within the exported SavedModel, or `None` if no extra assets are needed.
-    as_text: whether to write the SavedModel proto in text format.
-    checkpoint_path: The checkpoint path to export.  If `None` (the default),
-      the most recent checkpoint found within the model directory is chosen.
-    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-      removed from the NodeDefs. For a detailed guide, see
-      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-    mode: tf.estimator.ModeKeys value indicating with mode will be exported.
-
-  Returns:
-    The string path to the exported directory.
-
-  Raises:
-    ValueError: if input_receiver_fn is None, no export_outputs
-      are provided, or no checkpoint can be found.
-  """
-  # pylint: enable=line-too-long
-
-  # pylint: disable=protected-access
-  return estimator._export_saved_model_for_mode(
-      export_dir_base, input_receiver_fn,
-      assets_extra=assets_extra,
-      as_text=as_text,
-      checkpoint_path=checkpoint_path,
-      strip_default_attrs=strip_default_attrs,
-      mode=mode)
-  # pylint: enable=protected-access
-
-
-def export_all_saved_models(
-    estimator, export_dir_base, input_receiver_fn_map,
-    assets_extra=None,
-    as_text=False,
-    checkpoint_path=None,
-    strip_default_attrs=False):
-  # pylint: disable=line-too-long
-  """Exports requested train/eval/predict graphs as separate SavedModels.
-
-  See tf.contrib.estimator.export_all_saved_models for the currently
-  exposed version of this function.
-
-  For each mode passed in via the input_receiver_fn_map,
-  this method builds a new graph by calling the input_receiver_fn to obtain
-  feature and label `Tensor`s. Next, this method calls the `Estimator`'s
-  model_fn in the passed mode to generate the model graph based on
-  those features and labels, and restores the given checkpoint
-  (or, lacking that, the most recent checkpoint) into the graph.
-  Only one of the modes is used for saving variables to the SavedModel
-  (order of preference: TRAIN, EVAL, then PREDICT), such that up to three
-  MetaGraphDefs are saved with a single set of variables in a single
-  SavedModel directory.
-
-  For prediction, the exported `MetaGraphDef` will provide one `SignatureDef`
-  for each element of the export_outputs dict returned from the model_fn,
-  named using the same keys.  One of these keys is always
-  signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, indicating which
-  signature will be served when a serving request does not specify one.
-  For each signature, the outputs are provided by the corresponding
-  `ExportOutput`s, and the inputs are always the input receivers provided by
-  the serving_input_receiver_fn.
-
-  For training and evaluation, the train_op is stored in an extra collection,
-  and loss, metrics, and predictions are included in a SignatureDef for the
-  mode in question.
-
-  Extra assets may be written into the SavedModel via the assets_extra
-  argument.  This should be a dict, where each key gives a destination path
-  (including the filename) relative to the assets.extra directory.  The
-  corresponding value gives the full path of the source file to be copied.
-  For example, the simple case of copying a single file without renaming it
-  is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-
-  Sample usage:
-  ```python
-  classifier = tf.estimator.LinearClassifier(
-      feature_columns=[age, language])
-  classifier.train(input_fn=input_fn)
-
-  feature_spec = {
-      'age': tf.placeholder(dtype=tf.int64),
-      'language': array_ops.placeholder(dtype=tf.string)
-  }
-  label_spec = tf.placeholder(dtype=dtypes.int64)
-
-  train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
-      feature_spec, label_spec)
-
-  serve_rcvr_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
-      feature_spec)
-
-  rcvr_fn_map = {
-      model_fn_lib.ModeKeys.TRAIN: train_rcvr_fn,
-      model_fn_lib.ModeKeys.PREDICT: serve_rcvr_fn,
-  }
-
-  export_dir = tf.contrib.estimator.export_all_saved_models(
-      classifier,
-      export_dir_base='my_model/',
-      input_receiver_fn_map=rcvr_fn_map)
-
-  # export_dirs is a dict of directories with SavedModels, which
-  # can be used for serving, analysis with TFMA, or directly loaded in.
-  with ops.Graph().as_default() as graph:
-    with session.Session(graph=graph) as sess:
-      loader.load(sess, [tag_constants.TRAINING], export_dir)
-      weights = graph.get_tensor_by_name('linear/linear_model/age/weights')
-      ...
-  ```
-
-  Args:
-    estimator: an instance of tf.estimator.Estimator
-    export_dir_base: A string containing a directory in which to create
-      timestamped subdirectories containing exported SavedModels.
-    input_receiver_fn_map: dict of tf.estimator.ModeKeys to input_receiver_fn
-      mappings, where the input_receiver_fn is a function that takes no
-      argument and returns the appropriate subclass of `InputReceiver`.
-    assets_extra: A dict specifying how to populate the assets.extra directory
-      within the exported SavedModel, or `None` if no extra assets are needed.
-    as_text: whether to write the SavedModel proto in text format.
-    checkpoint_path: The checkpoint path to export.  If `None` (the default),
-      the most recent checkpoint found within the model directory is chosen.
-    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-      removed from the NodeDefs. For a detailed guide, see
-      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-
-  Returns:
-    A dict of tf.estimator.ModeKeys value to string path for each exported
-    directory.
+from tensorflow_estimator.contrib.estimator.python.estimator import export
 
-  Raises:
-    ValueError: if any input_receiver_fn is None, no export_outputs
-      are provided, or no checkpoint can be found.
-  """
-  # pylint: enable=line-too-long
+# Include attrs that start with single underscore.
+export.__all__ = [s for s in dir(export) if not s.startswith('__')]
 
-  # pylint: disable=protected-access
-  return estimator._export_all_saved_models(
-      export_dir_base, input_receiver_fn_map,
-      assets_extra=assets_extra,
-      as_text=as_text,
-      checkpoint_path=checkpoint_path,
-      strip_default_attrs=strip_default_attrs)
-  # pylint: enable=protected-access
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.export import *
diff --git a/tensorflow/contrib/estimator/python/estimator/export_test.py b/tensorflow/contrib/estimator/python/estimator/export_test.py
deleted file mode 100644
index 050821ee67..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/export_test.py
+++ /dev/null
@@ -1,373 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for contrib wrapping of export_saved_model_for_mode functionality.
-
-These are direct copies of the tests included in core, with import locations
-changed. These should be removed when the functionality in core is part of the
-public API.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-
-from tensorflow.contrib.estimator.python.estimator import export as contrib_export
-from tensorflow.python.client import session
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import loader
-from tensorflow.python.saved_model import tag_constants
-from tensorflow.python.training import training
-from tensorflow.python.util import compat
-
-
-def _model_fn_for_export_tests(features, labels, mode):
-  _, _ = features, labels
-  variables.Variable(1., name='weight')
-  scores = constant_op.constant([3.])
-  classes = constant_op.constant(['wumpus'])
-  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
-  with ops.control_dependencies([update_global_step]):
-    train_op = constant_op.constant(2.)
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      predictions=constant_op.constant(10.),
-      loss=constant_op.constant(1.),
-      train_op=train_op,
-      export_outputs={
-          'test': export_output.ClassificationOutput(scores, classes)})
-
-
-def _x_y_input_fn():
-  return ({'x': constant_op.constant([[1], [1]]),
-           'y': constant_op.constant([[2], [2]])},
-          constant_op.constant([[1], [1]]))
-
-
-def _model_fn_with_x_y(features, labels, mode):
-  _ = labels
-  variables.Variable(1., name='weight')
-  scores = constant_op.constant([3.])
-  classes = constant_op.constant(['wumpus'])
-  if mode == model_fn_lib.ModeKeys.PREDICT:
-    variables.Variable(36., name='name_collision')
-    return model_fn_lib.EstimatorSpec(
-        mode,
-        predictions=constant_op.constant(10.),
-        export_outputs={
-            'test': export_output.ClassificationOutput(scores, classes)})
-  else:
-    prefix = 'eval_' if mode == model_fn_lib.ModeKeys.EVAL else ''
-
-    multiplied = math_ops.multiply(
-        features['x'], features['y'], name='{}multiplied'.format(prefix))
-    metrics = {'mean': metrics_lib.mean(features['x'] - features['y'],
-                                        name='{}mean'.format(prefix))}
-    variables.Variable(1., name='later_var')
-    variables.Variable(3., name='name_collision')
-    return model_fn_lib.EstimatorSpec(
-        mode,
-        predictions=multiplied,
-        loss=constant_op.constant(1.),
-        train_op=state_ops.assign_add(training.get_global_step(), 1),
-        eval_metric_ops=metrics)
-
-
-def _get_serving_input_receiver_fn():
-  feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                  'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-  return export.build_parsing_serving_input_receiver_fn(feature_spec)
-
-
-def _get_supervised_input_receiver_fn():
-  feature_spec = {
-      'x': array_ops.placeholder(
-          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
-      'y': array_ops.placeholder(
-          dtype=dtypes.int64, shape=(2, 1), name='feature_y')
-      }
-  label_spec = array_ops.placeholder(
-      dtype=dtypes.float32, shape=[1], name='truth')
-
-  return export.build_raw_supervised_input_receiver_fn(
-      feature_spec, label_spec)
-
-
-class EstimatorExportTest(test.TestCase):
-
-  def test_export_saved_model_train(self):
-    self._test_export_saved_model_for_mode(
-        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.TRAIN)
-
-  def test_export_saved_model_eval(self):
-    self._test_export_saved_model_for_mode(
-        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.EVAL)
-
-  def test_export_saved_model_predict(self):
-    self._test_export_saved_model_for_mode(
-        _get_serving_input_receiver_fn(), model_fn_lib.ModeKeys.PREDICT)
-
-  def _test_export_saved_model_for_mode(self, input_receiver_fn, mode):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=_x_y_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = contrib_export.export_saved_model_for_mode(
-        est, export_dir_base, input_receiver_fn, mode=mode)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self._validate_exported_files(export_dir)
-
-    # Restore, to validate that the export was well-formed.
-    tag_set = model_fn_lib.EXPORT_TAG_MAP[mode]
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, tag_set, export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertFalse('name_collision_1' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_receiver_map(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('input_example_tensor' in graph_ops)
-        self.assertTrue('ParseExample/ParseExample' in graph_ops)
-        self.assertFalse('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_train_only(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('multiplied' in graph_ops)
-        self.assertTrue('mean/update_op' in graph_ops)
-        self.assertFalse('eval_multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_eval_only(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.EVAL], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('eval_multiplied' in graph_ops)
-        self.assertTrue('eval_mean/value' in graph_ops)
-        self.assertFalse('multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_no_serving(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('multiplied' in graph_ops)
-        self.assertFalse('eval_multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.EVAL], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('eval_multiplied' in graph_ops)
-        self.assertFalse('multiplied' in graph_ops)
-        # TODO(karmel): is this the desired behavior when names are shared?
-        self.assertTrue('feature_x_1' in graph_ops)
-        self.assertTrue('feature_y_1' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_three_defs(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    # Restore, to validate that the export was well-formed.
-    for tag_set in model_fn_lib.EXPORT_TAG_MAP.values():
-      with ops.Graph().as_default() as graph:
-        with session.Session(graph=graph) as sess:
-          loader.load(sess, tag_set, export_dir)
-          graph_ops = [x.name for x in graph.get_operations()]
-          self.assertTrue('global_step/Assign' in graph_ops)
-          self.assertTrue('global_step/Initializer/zeros' in graph_ops)
-          self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_all_vars(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('later_var' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertFalse('later_var' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_name_collision(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('name_collision' in graph_ops)
-        self.assertFalse('name_collision_1' in graph_ops)
-        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        self.assertEqual(3, collection_vars[-1].eval())
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('name_collision' in graph_ops)
-        self.assertFalse('name_collision_1' in graph_ops)
-        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        # This is a non-obvious detail: when we load the estimator spec
-        # for predict, name_collision gets set to 36. However, we then restore
-        # from checkpoint, which should overwrite that var and make it the 3
-        # from training. In practice, this would not be a good way to write
-        # a model_fn, but leaving this check in for now to ensure consistency
-        # with what would happen given our current order of spec, then
-        # checkpoint.
-        self.assertEqual(3, collection_vars[-1].eval())
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def _test_export_all_saved_models(self, input_receiver_fn_map):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_with_x_y)
-    est.train(input_fn=_x_y_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = contrib_export.export_all_saved_models(
-        est, export_dir_base, input_receiver_fn_map)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-
-    self._validate_exported_files(export_dir)
-
-    return export_dir, tmpdir
-
-  def _validate_exported_files(self, export_dir):
-    self.assertTrue(gfile.Exists(export_dir))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('saved_model.pb'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.index'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.data-00000-of-00001'))))
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/exporter.py b/tensorflow/contrib/estimator/python/estimator/exporter.py
index 09d7440605..33d0314905 100644
--- a/tensorflow/contrib/estimator/python/estimator/exporter.py
+++ b/tensorflow/contrib/estimator/python/estimator/exporter.py
@@ -12,269 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Implements StepsExporter to export the model in user specified steps."""
+"""exporter python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-
-from tensorflow.python.estimator import exporter
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.summary import summary_iterator
-
-DEFAULT_GLOBAL_STEP_KEY = ops.GraphKeys.GLOBAL_STEP
-
-
-class StepsExporter(exporter.Exporter):
-  """This class exports the model in user specified steps.
-
-  This class exports the model at the steps given by the `steps_to_keep`
-  argument. Each number in the list is treated as a lower bound for model
-  exports, to handle the case when evaluation is performed at different steps.
-
-  Consider this example:
-
-  ```
-  steps_to_keep = [1, 2, 3, 6, 7, 10, 12, 25]
-  ```
-
-  The model is evaluated at step increments of 5: `[5, 10, 15, 20, 25, 30]`.
-  The `StepsExporter` will export the model when it has reached steps
-  `[5, 10, 15, 25]`.
-
-  This example illustrates the two cases when the model is exported:
-
-  1. Model is evaluated on a step defined in the list `steps_to_keep`.
-
-     In the example, the model is exported on step `10` and `25`.
-
-  2. Model is evaluated on a step not defined in the list `steps_to_keep`, but
-     is still exported because a step in `steps_to_keep` was missed.
-
-     In the example, when the model reaches step `5`, the model is exported even
-     though  `steps_to_keep` does not contain `5`. Step `5` is exported to make
-     up for step `3`, which was missed. Steps `1` and `2` in `steps_to_keep` are
-     skipped completely (e.g. say the model is evaluated at step `6`. It will
-     **not** be exported to make up for step `2`).
-
-  Using the `steps_to_keep` list as a lower bound allows users to define
-  approximate step boundaries for exporting their models, and avoid frustrating
-  off-by-one calculation errors.
-
-  Sample Use Cases:
-    There are specific points during the training when having a saved version of
-    the model would be useful. One example is at the end of each training phase
-    when the set of freezed weights is changed.
-    Another good use case is saving the model at the end of each epoch for
-    visualization or retraining.
-  """
-
-  def __init__(self,
-               steps_to_keep,
-               name='steps_exporter',
-               serving_input_receiver_fn=None,
-               event_file_pattern='eval/*.tfevents.*',
-               assets_extra=None,
-               as_text=False):
-    """Create an `StepsExporter` to use with `tf.estimator.EvalSpec`.
-
-    Example of creating a StepsExporter for training and evaluation:
-
-    ```python
-    categorical_feature_a = categorical_column_with_hash_bucket(...)
-    categorical_feature_b = categorical_column_with_hash_bucket(...)
-
-    categorical_feature_a_emb = embedding_column(
-        categorical_column=categorical_feature_a, ...)
-    categorical_feature_b_emb = embedding_column(
-        categorical_column=categorical_feature_b, ...)
-
-    estimator = tf.estimator.DNNClassifier(
-        feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-        hidden_units=[1024, 512, 256])
-
-    # Input pipeline for train and evaluate.
-    def train_input_fn: # returns x, y
-      # please shuffle the data.
-      pass
-    def eval_input_fn_eval: # returns x, y
-      pass
-
-    exporter = tf.contrib.estimator.exporter.StepsExporter(
-        name="steps_exporter",
-        serving_input_receiver_fn=serving_input_receiver_fn,
-        event_file_pattern='eval/*.tfevents.*'
-        steps_to_keep=[...])
-
-    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000)
-
-    eval_spec = [tf.estimator.EvalSpec(
-      input_fn=eval_input_fn,
-      steps=1,
-      exporters=exporter,
-      start_delay_secs=0,
-      throttle_secs=5)]
-
-    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
-
-    # Models will be exported to estimator.model_dir in timestamped directories,
-    # which can be used for serving, analysis with TFMA, or directly loaded in.
-    # For example:
-    export_dir = os.path.join(estimator.model_dir,
-                              <timestamped directory name>)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        tf.saved_model.loader.load(
-            sess, [tf.saved_model.tag_constants.SERVING], export_dir)
-
-    ```
-
-    Args:
-      steps_to_keep: Non-empty list of positive integers containing
-        the step numbers at which the model should be exported. All the exports
-        will be kept, so there is no garbage collection.
-      name: Unique name of this `Exporter` that is going to be used in the
-        export path.
-      serving_input_receiver_fn: A function that takes no arguments and returns
-        a `ServingInputReceiver`.
-      event_file_pattern: Event file name pattern relative to model_dir. If
-        None, however, the exporter would not be preemption-safe. To be
-        preemption-safe, event_file_pattern should be specified.
-      assets_extra: An optional dict specifying how to populate the assets.extra
-        directory within the exported SavedModel.  Each key should give the
-        destination path (including the filename) relative to the assets.extra
-        directory.  The corresponding value gives the full path of the source
-        file to be copied.  For example, the simple case of copying a single
-        file without renaming it is specified as `{'my_asset_file.txt':
-        '/path/to/my_asset_file.txt'}`.
-      as_text: Whether to write the SavedModel proto in text format. Defaults to
-        `False`.
-
-    Raises:
-      ValueError: If any arguments is invalid.
-    """
-    # pylint: disable=protected-access
-    self._saved_model_exporter = exporter._SavedModelExporter(
-        name, serving_input_receiver_fn, assets_extra, as_text)
-    # pylint: enable=protected-access
-
-    self._event_file_pattern = event_file_pattern
-    self._model_dir = None
-
-    self._input_steps_to_keep = steps_to_keep
-    steps_to_keep = [step for step in steps_to_keep if isinstance(step, int)]
-    steps_to_keep = [step for step in steps_to_keep if step > 0]
-    if not steps_to_keep:
-      raise ValueError(
-          '`steps_to_keep` list must have at least one positive integer')
-    elif self._input_steps_to_keep != steps_to_keep:
-      tf_logging.warn('Changed `steps_to_keep`, by omitting non-integer or'
-                      ' less than 1 elements, to [%s]',
-                      ', '.join(str(step) for step in steps_to_keep))
-    self._steps_to_keep = sorted(steps_to_keep)
-    self._steps_kept = []
-
-  @property
-  def name(self):
-    return self._saved_model_exporter.name
-
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    """Exports the given Estimator to a specific format.
-
-    Args:
-      estimator: A `tf.estimator.Estimator` instance to export.
-      export_path: A string containing a directory where to write the export.
-      checkpoint_path: The checkpoint path to export.
-      eval_result: The output of Estimator.evaluate on this checkpoint.
-      is_the_final_export: This boolean is True when this is an export in the
-        end of training. It is False for the intermediate exports during the
-        training. When passing Exporter to tf.estimator.train_and_evaluate
-        is_the_final_export is always False if TrainSpec.max_steps is None.
-
-    Returns:
-      The string path to the exported directory or None if export is skipped.
-
-    Raises:
-      ValueError: If `eval_result` is None or doesn't have
-        `ops.GraphKeys.GLOBAL_STEP` as a key.
-    """
-    export_result = None
-
-    if not eval_result or DEFAULT_GLOBAL_STEP_KEY not in eval_result:
-      raise ValueError(
-          '`eval_result` is empty, or does not have global step. This'
-          ' should never happen as Estimator always sets the global step in '
-          '`eval_result`. Please file a bug report. Got eval_result: %s'
-          % str(eval_result))
-
-    if self._model_dir != estimator.model_dir and self._event_file_pattern:
-      tf_logging.info('Loads the steps that the model was already evaluated at,'
-                      'from event files')
-      self._model_dir = estimator.model_dir
-      full_event_file_pattern = os.path.join(self._model_dir,
-                                             self._event_file_pattern)
-      self._steps_kept = self._get_kept_steps(full_event_file_pattern)
-
-      if self._steps_kept:
-        self._steps_kept = sorted(self._steps_kept)
-        self._steps_to_keep = [step for step in self._steps_to_keep if
-                               step > self._steps_kept[-1]]
-    # It is assumed that the model is exported at any evaluated step 'n' if
-    # there is any `steps_missed` lower than 'n'. As a result, all the steps in
-    # `_steps_to_keep` lower than the last evaluated step will be removed.
-    steps_missed = [step for step in self._steps_to_keep
-                    if step <= eval_result[DEFAULT_GLOBAL_STEP_KEY]]
-
-    if steps_missed:
-      # update the `_steps_to_keep` list by omitting all steps smaller than the
-      # current global step which are missed to be exported
-      export_result = self._saved_model_exporter.export(estimator, export_path,
-                                                        checkpoint_path,
-                                                        eval_result,
-                                                        is_the_final_export)
-      self._steps_to_keep = [step for step in self._steps_to_keep if step
-                             not in steps_missed]
-      # contains all the steps in which export has happened.
-      self._steps_kept.append(eval_result[DEFAULT_GLOBAL_STEP_KEY])
-      # Show warning for all the missed steps except the last one
-      if steps_missed[:-1]:
-        tf_logging.warn('Missed steps [%s] for exporting, as no evaluation'
-                        ' took place at them.', ', '.join(str(step) for step in
-                                                          steps_missed[:-1]))
-      # Log model export if the last missed step is the same as the current step
-      if steps_missed[-1] == eval_result[DEFAULT_GLOBAL_STEP_KEY]:
-        tf_logging.info('Performing model export at step %d.',
-                        eval_result[DEFAULT_GLOBAL_STEP_KEY])
-      # Show warning for exporting model at another step instead of the user
-      #   specified one
-      else:
-        tf_logging.warn('Performing model export at step %d instead of %d, as'
-                        ' no evaluation took place at step %d.',
-                        eval_result[DEFAULT_GLOBAL_STEP_KEY], steps_missed[-1],
-                        steps_missed[-1])
-    return export_result
-
-  def _get_kept_steps(self, event_files):
-    """Get the steps that the model was evaluated at, from event files.
-
-    Args:
-      event_files: Absolute pattern of event files.
+from tensorflow_estimator.contrib.estimator.python.estimator import exporter
 
-    Returns:
-      steps_kept: A list of steps in which the model was evaluated.
-    """
-    if not event_files:
-      return None
+# Include attrs that start with single underscore.
+exporter.__all__ = [s for s in dir(exporter) if not s.startswith('__')]
 
-    steps_kept = []
-    for event_file in gfile.Glob(os.path.join(event_files)):
-      for event in summary_iterator.summary_iterator(event_file):
-        if event.step not in steps_kept:
-          steps_kept.append(event.step)
-    return steps_kept
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.exporter import *
diff --git a/tensorflow/contrib/estimator/python/estimator/exporter_test.py b/tensorflow/contrib/estimator/python/estimator/exporter_test.py
deleted file mode 100644
index 0d009b945e..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/exporter_test.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for `StepsExporter`."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-import tempfile
-
-from tensorflow.contrib.estimator.python.estimator import exporter as exporter_lib
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-class StepsExporterTest(test.TestCase):
-
-  def test_error_out_if_steps_to_keep_has_no_positive_integers(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    with self.assertRaisesRegexp(ValueError, "positive integer"):
-      exporter = exporter_lib.StepsExporter(
-          name="specified_steps_exporter",
-          serving_input_receiver_fn=_serving_input_receiver_fn,
-          steps_to_keep=[-1, 0, 1.1])
-      self.assertEqual("specified_steps_exporter", exporter.name)
-
-  def test_steps_exporter(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    exporter = exporter_lib.StepsExporter(
-        name="steps_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        steps_to_keep=[1])
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-    estimator.model_dir = export_dir_base
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 1},
-                                    False)
-
-    self.assertEqual("export_result_path", export_result)
-    estimator.export_savedmodel.assert_called_with(
-        export_dir_base,
-        _serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        checkpoint_path="checkpoint_path",
-        strip_default_attrs=True)
-
-    shutil.rmtree(export_dir_base, ignore_errors=True)
-
-  def test_steps_exporter_with_preemption(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
-    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1)
-    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 2)
-
-    exporter = exporter_lib.StepsExporter(
-        name="steps_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        event_file_pattern="eval_continuous/*.tfevents.*",
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        steps_to_keep=[1, 2, 6, 8])
-
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.model_dir = export_dir_base
-    estimator.export_savedmodel.return_value = "export_result_path"
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 3},
-                                    False)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 6},
-                                    False)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 7},
-                                    False)
-    self.assertEqual(None, export_result)
-
-    shutil.rmtree(export_dir_base, ignore_errors=True)
-
-  def test_specified_step_is_saved(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    exporter = exporter_lib.StepsExporter(
-        name="steps_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        steps_to_keep=[1, 5, 8, 10, 11])
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-    estimator.model_dir = export_dir_base
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 1},
-                                    False)
-
-    self.assertTrue(estimator.export_savedmodel.called)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 2},
-                                    False)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 5},
-                                    False)
-    self.assertTrue(estimator.export_savedmodel.called)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 10},
-                                    False)
-    self.assertTrue(estimator.export_savedmodel.called)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 15},
-                                    False)
-    self.assertTrue(estimator.export_savedmodel.called)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"global_step": 20},
-                                    False)
-    self.assertEqual(None, export_result)
-
-    shutil.rmtree(export_dir_base, ignore_errors=True)
-
-  def test_steps_exporter_with_no_global_step_key(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    exporter = exporter_lib.StepsExporter(
-        name="steps_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        steps_to_keep=[1])
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-    estimator.model_dir = export_dir_base
-
-    with self.assertRaisesRegexp(ValueError, "does not have global step"):
-      exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False)
-
-    shutil.rmtree(export_dir_base, ignore_errors=True)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py
index e3c44bea66..ca5494db56 100644
--- a/tensorflow/contrib/estimator/python/estimator/extenders.py
+++ b/tensorflow/contrib/estimator/python/estimator/extenders.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,346 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Extenders of tf.estimator.Estimator."""
+"""extenders python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export.export_output import PredictOutput
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.util import function_utils
-
-
-_VALID_METRIC_FN_ARGS = set(['features', 'labels', 'predictions', 'config'])
-
-
-def add_metrics(estimator, metric_fn):
-  """Creates a new `tf.estimator.Estimator` which has given metrics.
-
-  Example:
-
-  ```python
-    def my_auc(labels, predictions):
-      return {'auc': tf.metrics.auc(labels, predictions['logistic'])}
-
-    estimator = tf.estimator.DNNClassifier(...)
-    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
-    estimator.train(...)
-    estimator.evaluate(...)
-  ```
-  Example usage of custom metric which uses features:
-
-  ```python
-    def my_auc(features, labels, predictions):
-      return {'auc': tf.metrics.auc(
-        labels, predictions['logistic'], weights=features['weight'])}
-
-    estimator = tf.estimator.DNNClassifier(...)
-    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
-    estimator.train(...)
-    estimator.evaluate(...)
-  ```
-
-  Args:
-    estimator: A `tf.estimator.Estimator` object.
-    metric_fn: A function which should obey the following signature:
-      - Args: can only have following four arguments in any order:
-        * predictions: Predictions `Tensor` or dict of `Tensor` created by given
-          `estimator`.
-        * features: Input `dict` of `Tensor` objects created by `input_fn` which
-          is given to `estimator.evaluate` as an argument.
-        * labels:  Labels `Tensor` or dict of `Tensor` created by `input_fn`
-          which is given to `estimator.evaluate` as an argument.
-        * config: config attribute of the `estimator`.
-       - Returns:
-         Dict of metric results keyed by name. Final metrics are a union of this
-         and `estimator's` existing metrics. If there is a name conflict between
-         this and `estimator`s existing metrics, this will override the existing
-         one. The values of the dict are the results of calling a metric
-         function, namely a `(metric_tensor, update_op)` tuple.
-
-  Returns:
-      A new `tf.estimator.Estimator` which has a union of original metrics with
-        given ones.
-  """
-  _verify_metric_fn_args(metric_fn)
-
-  def new_model_fn(features, labels, mode, config):
-    spec = estimator.model_fn(features, labels, mode, config)
-    if mode != model_fn_lib.ModeKeys.EVAL:
-      return spec
-    new_metrics = _call_metric_fn(metric_fn, features, labels, spec.predictions,
-                                  config)
-    all_metrics = spec.eval_metric_ops or {}
-    all_metrics.update(new_metrics)
-    return spec._replace(eval_metric_ops=all_metrics)
-
-  return estimator_lib.Estimator(
-      model_fn=new_model_fn,
-      model_dir=estimator.model_dir,
-      config=estimator.config,
-      # pylint: disable=protected-access
-      warm_start_from=estimator._warm_start_settings)
-      # pylint: enable=protected-access
-
-
-def clip_gradients_by_norm(optimizer, clip_norm):
-  """Returns an optimizer which clips gradients before applying them.
-
-  Example:
-
-  ```python
-  optimizer = tf.train.ProximalAdagradOptimizer(
-      learning_rate=0.1,
-      l1_regularization_strength=0.001)
-  optimizer = tf.contrib.estimator.clip_gradients_by_norm(
-      optimizer, clip_norm)
-  estimator = tf.estimator.DNNClassifier(
-      feature_columns=[...],
-      hidden_units=[1024, 512, 256],
-      optimizer=optimizer)
-  ```
-
-  Args:
-    optimizer: An `tf.Optimizer` object to apply gradients.
-    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
-
-  Returns:
-    A `tf.Optimizer`.
-  """
-
-  def clip_grads(grads_and_vars):
-    gradients, variables = zip(*grads_and_vars)
-    gradients = clip_ops.clip_by_global_norm(gradients, clip_norm)[0]
-    grads_and_vars = list(zip(gradients, variables))
-    return grads_and_vars
-
-  return _TransformGradients(
-      optimizer=optimizer,
-      transform_grads_fn=clip_grads,
-      name='ClipByNorm' + optimizer.get_name())
-
-
-def forward_features(estimator, keys=None, sparse_default_values=None):
-  """Forward features to predictions dictionary.
-
-  In some cases, user wants to see some of the features in estimators prediction
-  output. As an example, consider a batch prediction service: The service simply
-  runs inference on the users graph and returns the results. Keys are essential
-  because there is no order guarantee on the outputs so they need to be rejoined
-  to the inputs via keys or transclusion of the inputs in the outputs.
-  Example:
-  ```python
-    def input_fn():
-      features, labels = ...
-      features['unique_example_id'] = ...
-      features, labels
-    estimator = tf.estimator.LinearClassifier(...)
-    estimator = tf.contrib.estimator.forward_features(
-        estimator, 'unique_example_id')
-    estimator.train(...)
-    assert 'unique_example_id' in estimator.predict(...)
-  ```
-  Args:
-    estimator: A `tf.estimator.Estimator` object.
-    keys: A `string` or a `list` of `string`. If it is `None`, all of the
-      `features` in `dict` is forwarded to the `predictions`. If it is a
-      `string`, only given key is forwarded. If it is a `list` of strings, all
-      the given `keys` are forwarded.
-    sparse_default_values: A dict of `str` keys mapping the name of the sparse
-      features to be converted to dense, to the default value to use. Only
-      sparse features indicated in the dictionary are converted to dense and the
-      provided default value is used.
-
-  Returns:
-      A new `tf.estimator.Estimator` which forwards features to predictions.
-  Raises:
-    ValueError:
-      * if `keys` is already part of `predictions`. We don't allow
-        override.
-      * if 'keys' does not exist in `features`.
-    TypeError: if `keys` type is not one of `string` or list/tuple of `string`.
-  """
-
-  def verify_key_types(keys):  # pylint: disable=missing-docstring
-    if keys is None:
-      return keys
-    if isinstance(keys, six.string_types):
-      return [keys]
-    if not isinstance(keys, (list, tuple)):
-      raise TypeError('keys should be either a string or a list of strings. '
-                      'Given: {}'.format(type(keys)))
-    for key in keys:
-      if not isinstance(key, six.string_types):
-        raise TypeError('All items in the given keys list should be a string. '
-                        'There exist an item with type: {}'.format(type(key)))
-    return keys
-
-  def get_keys(features):
-    if keys is None:
-      return features.keys()
-    return keys
-
-  def verify_keys_and_predictions(features, predictions):
-    if not isinstance(predictions, dict):
-      raise ValueError(
-          'Predictions should be a dict to be able to forward features. '
-          'Given: {}'.format(type(predictions)))
-    for key in get_keys(features):
-      if key not in features:
-        raise ValueError(
-            'keys should be exist in features. Key "{}" is not in features '
-            'dict. features dict has following keys: {}. Please check '
-            'arguments of forward_features.'.format(key, features.keys()))
-      if key in predictions:
-        raise ValueError(
-            'Cannot forward feature key ({}). Since it does exist in '
-            'predictions. Existing prediction keys: {}. Please check arguments '
-            'of forward_features.'.format(key, predictions.keys()))
-
-  keys = verify_key_types(keys)
-
-  def new_model_fn(features, labels, mode, config):  # pylint: disable=missing-docstring
-    spec = estimator.model_fn(features, labels, mode, config)
-    predictions = spec.predictions
-    if predictions is None:
-      return spec
-    verify_keys_and_predictions(features, predictions)
-    for key in get_keys(features):
-      feature = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
-          features[key])
-      if sparse_default_values and (key in sparse_default_values):
-        if not isinstance(feature, sparse_tensor_lib.SparseTensor):
-          raise ValueError(
-              'Feature ({}) is expected to be a `SparseTensor`.'.format(key))
-        feature = sparse_ops.sparse_tensor_to_dense(
-            feature, default_value=sparse_default_values[key])
-      if not isinstance(feature, ops.Tensor):
-        raise ValueError(
-            'Feature ({}) should be a Tensor. Please use `keys` '
-            'argument of forward_features to filter unwanted features, or'
-            'add key to argument `sparse_default_values`.'
-            'Type of features[{}] is {}.'.format(key, key, type(feature)))
-      predictions[key] = feature
-    spec = spec._replace(predictions=predictions)
-    if spec.export_outputs:
-      for ekey in ['predict', 'serving_default']:
-        if (ekey in spec.export_outputs and
-            isinstance(spec.export_outputs[ekey],
-                       PredictOutput)):
-          export_outputs = spec.export_outputs[ekey].outputs
-          for key in get_keys(features):
-            export_outputs[key] = predictions[key]
-
-    return spec
-
-  return estimator_lib.Estimator(
-      model_fn=new_model_fn,
-      model_dir=estimator.model_dir,
-      config=estimator.config)
-
-
-class _TransformGradients(optimizer_lib.Optimizer):
-  """Add given gradient transformation to the optimizer."""
-
-  def __init__(self, optimizer, transform_grads_fn, name=None):
-    """Construct an `tf.Optimizer` wrapper to apply given transformations.
-
-    Example:
-
-    ```python
-    optimizer = tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001)
-    def clip_grads(grads_and_vars):
-      gradients, variables = zip(*grads_and_vars)
-      gradients = tf.clip_by_global_norm(grads, my_norm)[0]
-      grads_and_vars = list(zip(gradients, variables))
-      return grads_and_vars
-    optimizer = _TransformGradients(
-        opt=optimizer, transform_grads_fn=clip_grads)
-    estimator = tf.estimator.DNNClassifier(
-        feature_columns=[...],
-        hidden_units=[1024, 512, 256],
-        optimizer=optimizer)
-    ```
-
-    Args:
-      optimizer: An `tf.Optimizer` object to apply gradients.
-      transform_grads_fn: A function which takes a single argument, a list of
-        gradient to variable pairs (tuples), performs any requested gradient
-        updates, such as gradient clipping or multipliers, and returns the
-        updated list.
-      name: A string which will be used for debugging purposes.
-    """
-    super(_TransformGradients, self).__init__(
-        use_locking=False, name=name or optimizer.get_name())
-    self._optimizer = optimizer
-    self._transform_grads_fn = transform_grads_fn
-
-  def compute_gradients(self, *args, **kwargs):
-    """See `tf.Optimizer`."""
-    return self._optimizer.compute_gradients(*args, **kwargs)
-
-  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
-    """Apply gradients to variables.
-
-    Calls `transform_grads_fn`, and then applies the real optimizer.
-
-    Args:
-      grads_and_vars: List of (gradient, variable) pairs as returned by
-        compute_gradients().
-      global_step: Optional Variable to increment by one after the
-        variables have been updated.
-      name: Optional name for the returned operation.  Default to the
-        name passed to the Optimizer constructor.
-
-    Returns:
-      An `Operation` that applies the gradients. If `global_step` was not None,
-      that operation also increments `global_step`.
-
-    Raises:
-      ValueError: If the grads_and_vars is malformed.
-    """
-    grads_and_vars = self._transform_grads_fn(grads_and_vars)
-    return self._optimizer.apply_gradients(grads_and_vars, global_step, name)
-
-  def get_slot(self, *args, **kwargs):
-    """See `tf.Optimizer`."""
-    return self._optimizer.get_slot(*args, **kwargs)
-
-  def get_slot_names(self, *args, **kwargs):
-    """See `tf.Optimizer`."""
-    return self._optimizer.get_slot_names(*args, **kwargs)
-
-
-def _verify_metric_fn_args(metric_fn):
-  args = set(function_utils.fn_args(metric_fn))
-  invalid_args = list(args - _VALID_METRIC_FN_ARGS)
-  if invalid_args:
-    raise ValueError('metric_fn (%s) has following not expected args: %s' %
-                     (metric_fn, invalid_args))
+from tensorflow_estimator.contrib.estimator.python.estimator import extenders
 
+# Include attrs that start with single underscore.
+extenders.__all__ = [s for s in dir(extenders) if not s.startswith('__')]
 
-def _call_metric_fn(metric_fn, features, labels, predictions, config):
-  """Calls metric fn with proper arguments."""
-  metric_fn_args = function_utils.fn_args(metric_fn)
-  kwargs = {}
-  if 'features' in metric_fn_args:
-    kwargs['features'] = features
-  if 'labels' in metric_fn_args:
-    kwargs['labels'] = labels
-  if 'predictions' in metric_fn_args:
-    kwargs['predictions'] = predictions
-  if 'config' in metric_fn_args:
-    kwargs['config'] = config
-  return metric_fn(**kwargs)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.extenders import *
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders_test.py b/tensorflow/contrib/estimator/python/estimator/extenders_test.py
deleted file mode 100644
index c8fdaa8791..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/extenders_test.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""extenders tests."""
-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-import numpy as np
-
-from tensorflow.contrib.estimator.python.estimator import extenders
-from tensorflow.contrib.layers.python.layers import layers
-from tensorflow.contrib.predictor import from_saved_model
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator_lib
-from tensorflow.python.estimator.canned import linear
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.training import training
-from tensorflow.python.util import compat
-
-
-def get_input_fn(x, y):
-
-  def input_fn():
-    dataset = dataset_ops.Dataset.from_tensor_slices({'x': x, 'y': y})
-    iterator = dataset.make_one_shot_iterator()
-    features = iterator.get_next()
-    labels = features.pop('y')
-    return features, labels
-
-  return input_fn
-
-
-class AddMetricsTest(test.TestCase):
-
-  def test_should_add_metrics(self):
-    input_fn = get_input_fn(
-        x=np.arange(4)[:, None, None], y=np.ones(4)[:, None])
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-
-    def metric_fn(features):
-      return {'mean_x': metrics_lib.mean(features['x'])}
-
-    estimator = extenders.add_metrics(estimator, metric_fn)
-
-    estimator.train(input_fn=input_fn)
-    metrics = estimator.evaluate(input_fn=input_fn)
-    self.assertIn('mean_x', metrics)
-    self.assertEqual(1.5, metrics['mean_x'])
-    # assert that it keeps original estimators metrics
-    self.assertIn('auc', metrics)
-
-  def test_should_error_out_for_not_recognized_args(self):
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-
-    def metric_fn(features, not_recognized):
-      _, _ = features, not_recognized
-      return {}
-
-    with self.assertRaisesRegexp(ValueError, 'not_recognized'):
-      estimator = extenders.add_metrics(estimator, metric_fn)
-
-  def test_all_supported_args(self):
-    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-
-    def metric_fn(features, predictions, labels, config):
-      self.assertIn('x', features)
-      self.assertIsNotNone(labels)
-      self.assertIn('logistic', predictions)
-      self.assertTrue(isinstance(config, estimator_lib.RunConfig))
-      return {}
-
-    estimator = extenders.add_metrics(estimator, metric_fn)
-
-    estimator.train(input_fn=input_fn)
-    estimator.evaluate(input_fn=input_fn)
-
-  def test_all_supported_args_in_different_order(self):
-    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-
-    def metric_fn(labels, config, features, predictions):
-      self.assertIn('x', features)
-      self.assertIsNotNone(labels)
-      self.assertIn('logistic', predictions)
-      self.assertTrue(isinstance(config, estimator_lib.RunConfig))
-      return {}
-
-    estimator = extenders.add_metrics(estimator, metric_fn)
-
-    estimator.train(input_fn=input_fn)
-    estimator.evaluate(input_fn=input_fn)
-
-  def test_all_args_are_optional(self):
-    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-
-    def metric_fn():
-      return {'two': metrics_lib.mean(constant_op.constant([2.]))}
-
-    estimator = extenders.add_metrics(estimator, metric_fn)
-
-    estimator.train(input_fn=input_fn)
-    metrics = estimator.evaluate(input_fn=input_fn)
-    self.assertEqual(2., metrics['two'])
-
-  def test_overrides_existing_metrics(self):
-    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
-    estimator = linear.LinearClassifier([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn)
-    metrics = estimator.evaluate(input_fn=input_fn)
-    self.assertNotEqual(2., metrics['auc'])
-
-    def metric_fn():
-      return {'auc': metrics_lib.mean(constant_op.constant([2.]))}
-
-    estimator = extenders.add_metrics(estimator, metric_fn)
-    metrics = estimator.evaluate(input_fn=input_fn)
-    self.assertEqual(2., metrics['auc'])
-
-
-class ClipGradientsByNormTest(test.TestCase):
-  """Tests clip_gradients_by_norm."""
-
-  def test_applies_norm(self):
-    optimizer = extenders.clip_gradients_by_norm(
-        training.GradientDescentOptimizer(1.0), clip_norm=3.)
-    with ops.Graph().as_default():
-      w = variables.Variable(1., name='weight')
-      x = constant_op.constant(5.)
-      y = -x * w
-      grads = optimizer.compute_gradients(y, var_list=[w])[0]
-      opt_op = optimizer.minimize(y, var_list=[w])
-      with training.MonitoredSession() as sess:
-        grads_value = sess.run(grads)
-        self.assertEqual(-5., grads_value[0])
-        sess.run(opt_op)
-        new_w = sess.run(w)
-        self.assertEqual(4., new_w)  # 1 + 1*3 (w - lr * clipped_grad)
-
-  def test_name(self):
-    optimizer = extenders.clip_gradients_by_norm(
-        training.GradientDescentOptimizer(1.0), clip_norm=3.)
-    self.assertEqual('ClipByNormGradientDescent', optimizer.get_name())
-
-
-class ForwardFeaturesTest(test.TestCase):
-  """Tests forward_features."""
-
-  def _export_estimator(self, estimator, serving_input_fn):
-    tmpdir = tempfile.mkdtemp()
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = estimator.export_savedmodel(export_dir_base, serving_input_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-    return export_dir, tmpdir
-
-  def make_dummy_input_fn(self):
-    def _input_fn():
-      dataset = dataset_ops.Dataset.from_tensors({
-          'x': [[3.], [5.]],
-          'id': [[101], [102]],
-          'sparse_id': sparse_tensor.SparseTensor(
-              values=[1, 2, 3],
-              indices=[[0, 0], [1, 0], [1, 1]],
-              dense_shape=[2, 2]),
-          'labels': [[1.], [2.]]
-      })
-      def _split(x):
-        labels = x.pop('labels')
-        return x, labels
-      dataset = dataset.map(_split)
-      return dataset
-    return _input_fn
-
-  def test_forward_keys(self):
-
-    input_fn = self.make_dummy_input_fn()
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    forwarded_keys = ['id', 'sparse_id']
-
-    for key in forwarded_keys:
-      self.assertNotIn(key, next(estimator.predict(input_fn=input_fn)))
-
-    estimator = extenders.forward_features(
-        estimator, forwarded_keys, sparse_default_values={'sparse_id': 1})
-
-    expected_results = [101, 2, 102, 5]
-    predictions = estimator.predict(input_fn=input_fn)
-    for _ in range(2):
-      prediction = next(predictions)
-      for key in forwarded_keys:
-        self.assertIn(key, prediction)
-        self.assertEqual(expected_results.pop(0), sum(prediction[key]))
-
-  def test_forward_in_exported(self):
-
-    def serving_input_fn():
-      features_ph = {
-          'x': array_ops.placeholder(dtypes.float32, [None]),
-          'id': array_ops.placeholder(dtypes.int32, [None])
-      }
-      features = {
-          key: array_ops.expand_dims(tensor, -1)
-          for key, tensor in features_ph.items()
-      }
-      return estimator_lib.export.ServingInputReceiver(features, features_ph)
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
-    # create estimator
-    feature_columns = [fc.numeric_column('x')]
-    estimator = linear.LinearRegressor(feature_columns)
-    estimator.train(input_fn=input_fn, steps=1)
-    estimator = extenders.forward_features(estimator, 'id')
-
-    # export saved model
-    export_dir, tmpdir = self._export_estimator(estimator, serving_input_fn)
-
-    # restore model
-    predict_fn = from_saved_model(export_dir, signature_def_key='predict')
-    predictions = predict_fn({'x': [3], 'id': [101]})
-
-    # verify that 'id' exists in predictions
-    self.assertIn('id', predictions)
-    self.assertEqual(101, predictions['id'])
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_forward_in_exported_sparse(self):
-    features_columns = [fc.indicator_column(
-        fc.categorical_column_with_vocabulary_list('x', range(10)))]
-
-    classifier = linear.LinearClassifier(feature_columns=features_columns)
-
-    def train_input_fn():
-      dataset = dataset_ops.Dataset.from_tensors({
-          'x': sparse_tensor.SparseTensor(
-              values=[1, 2, 3],
-              indices=[[0, 0], [1, 0], [1, 1]],
-              dense_shape=[2, 2]),
-          'labels': [[0], [1]]
-      })
-      def _split(x):
-        labels = x.pop('labels')
-        return x, labels
-      dataset = dataset.map(_split)
-      return dataset
-
-    classifier.train(train_input_fn, max_steps=1)
-
-    classifier = extenders.forward_features(
-        classifier, keys=['x'], sparse_default_values={'x': 0})
-
-    def serving_input_fn():
-      features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x',
-                                          shape=[None])
-      features = {'x': layers.dense_to_sparse(features_ph)}
-      return estimator_lib.export.ServingInputReceiver(features,
-                                                       {'x': features_ph})
-    export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn)
-    prediction_fn = from_saved_model(export_dir, signature_def_key='predict')
-
-    features = (0, 2)
-    prediction = prediction_fn({'x': features})
-
-    self.assertIn('x', prediction)
-    self.assertEqual(features, tuple(prediction['x']))
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_forward_list(self):
-
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
-
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    self.assertNotIn('id', next(estimator.predict(input_fn=input_fn)))
-    estimator = extenders.forward_features(estimator, ['x', 'id'])
-    predictions = next(estimator.predict(input_fn=input_fn))
-    self.assertIn('id', predictions)
-    self.assertIn('x', predictions)
-    self.assertEqual(101, predictions['id'])
-    self.assertEqual(3., predictions['x'])
-
-  def test_forward_all(self):
-
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
-
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    self.assertNotIn('id', next(estimator.predict(input_fn=input_fn)))
-    self.assertNotIn('x', next(estimator.predict(input_fn=input_fn)))
-    estimator = extenders.forward_features(estimator)
-    predictions = next(estimator.predict(input_fn=input_fn))
-    self.assertIn('id', predictions)
-    self.assertIn('x', predictions)
-    self.assertEqual(101, predictions['id'])
-    self.assertEqual(3., predictions['x'])
-
-  def test_key_should_be_string(self):
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    with self.assertRaisesRegexp(TypeError, 'keys should be either a string'):
-      extenders.forward_features(estimator, estimator)
-
-  def test_key_should_be_list_of_string(self):
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    with self.assertRaisesRegexp(TypeError, 'should be a string'):
-      extenders.forward_features(estimator, ['x', estimator])
-
-  def test_key_should_be_in_features(self):
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
-
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    estimator = extenders.forward_features(estimator, 'y')
-    with self.assertRaisesRegexp(ValueError,
-                                 'keys should be exist in features'):
-      next(estimator.predict(input_fn=input_fn))
-
-  def test_forwarded_feature_should_not_be_a_sparse_tensor(self):
-    def input_fn():
-      return {
-          'x': [[3.], [5.]],
-          'id': sparse_tensor.SparseTensor(
-              values=['1', '2'],
-              indices=[[0, 0], [1, 0]],
-              dense_shape=[2, 1])
-          }, [[1.], [2.]]
-
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    estimator = extenders.forward_features(estimator)
-    with self.assertRaisesRegexp(ValueError,
-                                 'Feature .* should be a Tensor.*'):
-      next(estimator.predict(input_fn=input_fn))
-
-  def test_forwarded_feature_should_be_a_sparse_tensor(self):
-    input_fn = self.make_dummy_input_fn()
-
-    estimator = linear.LinearRegressor([fc.numeric_column('x')])
-    estimator.train(input_fn=input_fn, steps=1)
-
-    estimator = extenders.forward_features(
-        estimator, sparse_default_values={'id': 0, 'sparse_id': 0})
-    with self.assertRaisesRegexp(
-        ValueError, 'Feature .* is expected to be a `SparseTensor`.'):
-      next(estimator.predict(input_fn=input_fn))
-
-  def test_predictions_should_be_dict(self):
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}
-
-    def model_fn(features, mode):
-      del features
-      global_step = training.get_global_step()
-      return estimator_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant([5.]),
-          predictions=constant_op.constant([5.]),
-          train_op=global_step.assign_add(1))
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-    estimator.train(input_fn=input_fn, steps=1)
-
-    estimator = extenders.forward_features(estimator)
-    with self.assertRaisesRegexp(ValueError, 'Predictions should be a dict'):
-      next(estimator.predict(input_fn=input_fn))
-
-  def test_should_not_conflict_with_existing_predictions(self):
-
-    def input_fn():
-      return {'x': [[3.], [5.]], 'id': [[101], [102]]}
-
-    def model_fn(features, mode):
-      del features
-      global_step = training.get_global_step()
-      return estimator_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant([5.]),
-          predictions={'x': constant_op.constant([5.])},
-          train_op=global_step.assign_add(1))
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-    estimator.train(input_fn=input_fn, steps=1)
-
-    estimator = extenders.forward_features(estimator)
-    with self.assertRaisesRegexp(ValueError, 'Cannot forward feature key'):
-      next(estimator.predict(input_fn=input_fn))
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py
index 34f765d565..f4b4a079ad 100644
--- a/tensorflow/contrib/estimator/python/estimator/head.py
+++ b/tensorflow/contrib/estimator/python/estimator/head.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,966 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Abstractions for the head(s) of a model."""
+"""head python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.summary import summary
-from tensorflow.python.training import training_util
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def multi_class_head(n_classes,
-                     weight_column=None,
-                     label_vocabulary=None,
-                     loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-                     loss_fn=None,
-                     name=None):
-  """Creates a `_Head` for multi class classification.
-
-  Uses `sparse_softmax_cross_entropy` loss.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`.
-  In many applications, the shape is `[batch_size, n_classes]`.
-
-  `labels` must be a dense `Tensor` with shape matching `logits`, namely
-  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
-  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
-  `labels` must be an integer `Tensor` with values specifying the class index.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
-
-  The loss is the weighted sum over the input dimensions. Namely, if the input
-  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
-  `batch_size`.
-
-  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with
-  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
-  the input labels before passing them to `loss_fn`.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.multi_class_head(n_classes=3)
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.multi_class_head(n_classes=3)
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    n_classes: Number of classes, must be greater than 2 (for 2 classes, use
-      `binary_classification_head`).
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    label_vocabulary: A list or tuple of strings representing possible label
-      values. If it is not given, that means labels are already encoded as an
-      integer within [0, n_classes). If given, labels must be of string type and
-      have any value in `label_vocabulary`. Note that errors will be raised if
-      `label_vocabulary` is not provided but labels are strings.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
-      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
-    loss_fn: Optional loss function.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for multi class classification.
-
-  Raises:
-    ValueError: if `n_classes`, `label_vocabulary` or `loss_reduction` is
-      invalid.
-  """
-  return head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint:disable=protected-access
-      n_classes=n_classes,
-      weight_column=weight_column,
-      label_vocabulary=label_vocabulary,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      name=name)
-
-
-def binary_classification_head(
-    weight_column=None,
-    thresholds=None,
-    label_vocabulary=None,
-    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-    loss_fn=None,
-    name=None):
-  """Creates a `_Head` for single label binary classification.
-
-  This head uses `sigmoid_cross_entropy_with_logits` loss.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
-  In many applications, the shape is `[batch_size, 1]`.
-
-  `labels` must be a dense `Tensor` with shape matching `logits`, namely
-  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
-  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
-  `labels` must be float `Tensor` with values in the interval `[0, 1]`.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
-
-  The loss is the weighted sum over the input dimensions. Namely, if the input
-  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
-  `batch_size`.
-
-  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
-  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
-  the input labels before passing them to `loss_fn`.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.binary_classification_head()
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.binary_classification_head()
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    thresholds: Iterable of floats in the range `(0, 1)`. For binary
-      classification metrics such as precision and recall, an eval metric is
-      generated for each threshold value. This threshold is applied to the
-      logistic values to determine the binary classification (i.e., above the
-      threshold is `true`, below is `false`.
-    label_vocabulary: A list or tuple of strings representing possible label
-      values. If it is not given, labels must be float with values within
-      [0, 1]. If given, labels must be string type and have any value in
-      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
-      is not provided but labels are strings.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
-      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
-    loss_fn: Optional loss function.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for binary classification.
-
-  Raises:
-    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
-    ValueError: If `loss_reduction` is invalid.
-  """
-  return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
-      weight_column=weight_column,
-      thresholds=thresholds,
-      label_vocabulary=label_vocabulary,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      name=name)
-
-
-def regression_head(weight_column=None,
-                    label_dimension=1,
-                    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-                    loss_fn=None,
-                    inverse_link_fn=None,
-                    name=None):
-  """Creates a `_Head` for regression using the `mean_squared_error` loss.
-
-  The loss is the weighted sum over all input dimensions. Namely, if the input
-  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
-  sum over both `batch_size` and `label_dimension`.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
-  In many applications, the shape is `[batch_size, label_dimension]`.
-
-  The `labels` shape must match `logits`, namely
-  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
-  `[D0, D1, ... DN]` is also supported.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
-  `[D0, D1, ... DN, label_dimension]`.
-
-  Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, label_dimension]`.
-
-  Also supports custom `inverse_link_fn`, also known as 'mean function'.
-  `inverse_link_fn` is only used in `PREDICT` mode. It takes `logits` as
-  argument and returns predicted values. This function is the inverse of the
-  link function defined in
-  https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function
-  Namely, for poisson regression, set `inverse_link_fn=tf.exp`.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.regression_head()
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.regression_head()
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    label_dimension: Number of regression labels per example. This is the size
-      of the last dimension of the labels `Tensor` (typically, this has shape
-      `[batch_size, label_dimension]`).
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch and label dimension. Defaults to
-      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
-      `batch size * label_dimension`. See `tf.losses.Reduction`.
-    loss_fn: Optional loss function. Defaults to `mean_squared_error`.
-    inverse_link_fn: Optional inverse link function, also known as 'mean
-      function'. Defaults to identity.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for linear regression.
-
-  Raises:
-    ValueError: If `label_dimension` or `loss_reduction` is invalid.
-  """
-  return head_lib._regression_head(  # pylint:disable=protected-access
-      weight_column=weight_column,
-      label_dimension=label_dimension,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      inverse_link_fn=inverse_link_fn,
-      name=name)
-
-
-def poisson_regression_head(
-    weight_column=None,
-    label_dimension=1,
-    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-    compute_full_loss=True,
-    name=None):
-  """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`.
-
-  The loss is the weighted sum over all input dimensions. Namely, if the input
-  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
-  sum over both `batch_size` and `label_dimension`.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
-  In many applications, the shape is `[batch_size, label_dimension]`.
-
-  The `labels` shape must match `logits`, namely
-  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
-  `[D0, D1, ... DN]` is also supported.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
-  `[D0, D1, ... DN, label_dimension]`.
-
-  This is implemented as a generalized linear model, see
-  https://en.wikipedia.org/wiki/Generalized_linear_model.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.poisson_regression_head()
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.poisson_regression_head()
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    label_dimension: Number of regression labels per example. This is the size
-      of the last dimension of the labels `Tensor` (typically, this has shape
-      `[batch_size, label_dimension]`).
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch and label dimension. Defaults to
-      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
-      `batch size * label_dimension`. See `tf.losses.Reduction`.
-    compute_full_loss: Whether to include the constant `log(z!)` term in
-      computing the poisson loss. See `tf.nn.log_poisson_loss` for the full
-      documentation.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for poisson regression.
-
-  Raises:
-    ValueError: If `label_dimension` or `loss_reduction` is invalid.
-  """
-  def _poisson_loss(labels, logits):
-    return nn.log_poisson_loss(
-        targets=labels, log_input=logits, compute_full_loss=compute_full_loss)
-  return head_lib._regression_head(  # pylint:disable=protected-access
-      weight_column=weight_column,
-      label_dimension=label_dimension,
-      loss_reduction=loss_reduction,
-      loss_fn=_poisson_loss,
-      inverse_link_fn=math_ops.exp,
-      name=name)
-
-
-def logistic_regression_head(
-    weight_column=None,
-    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-    name=None):
-  """Creates a `_Head` for logistic regression.
-
-  Uses `sigmoid_cross_entropy_with_logits` loss, which is the same as
-  `binary_classification_head`. The differences compared to
-  `binary_classification_head` are:
-
-  * Does not support `label_vocabulary`. Instead, labels must be float in the
-    range [0, 1].
-  * Does not calculate some metrics that do not make sense, such as AUC.
-  * In `PREDICT` mode, only returns logits and predictions
-    (`=tf.sigmoid(logits)`), whereas `binary_classification_head` also returns
-    probabilities, classes, and class_ids.
-  * Export output defaults to `RegressionOutput`, whereas
-    `binary_classification_head` defaults to `PredictOutput`.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
-  In many applications, the shape is `[batch_size, 1]`.
-
-  The `labels` shape must match `logits`, namely
-  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.
-
-  This is implemented as a generalized linear model, see
-  https://en.wikipedia.org/wiki/Generalized_linear_model.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.logistic_regression_head()
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.logistic_regression_head()
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch and label dimension. Defaults to
-      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
-      `batch size * label_dimension`. See `tf.losses.Reduction`.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for logistic regression.
-
-  Raises:
-    ValueError: If `loss_reduction` is invalid.
-  """
-  def _logistic_loss(labels, logits):
-    labels = head_lib._assert_range(  # pylint:disable=protected-access
-        labels, n_classes=2, message='Labels must be in range [0, 1]')
-    return nn.sigmoid_cross_entropy_with_logits(
-        labels=labels, logits=logits)
-  return head_lib._regression_head(  # pylint:disable=protected-access
-      weight_column=weight_column,
-      label_dimension=1,
-      loss_reduction=loss_reduction,
-      loss_fn=_logistic_loss,
-      inverse_link_fn=math_ops.sigmoid,
-      name=name)
-
-
-def multi_label_head(n_classes,
-                     weight_column=None,
-                     thresholds=None,
-                     label_vocabulary=None,
-                     loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-                     loss_fn=None,
-                     classes_for_class_based_metrics=None,
-                     name=None):
-  """Creates a `_Head` for multi-label classification.
-
-  Multi-label classification handles the case where each example may have zero
-  or more associated labels, from a discrete set. This is distinct from
-  `multi_class_head` which has exactly one label per example.
-
-  Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over
-  the batch. Namely, if the input logits have shape `[batch_size, n_classes]`,
-  the loss is the average over `n_classes` and the weighted sum over
-  `batch_size`.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many
-  applications, the shape is `[batch_size, n_classes]`.
-
-  Labels can be:
-
-  * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`
-  * An integer `SparseTensor` of class indices. The `dense_shape` must be
-    `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`.
-  * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape`
-    must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a
-    multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
-
-  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with
-  shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies
-  `label_vocabulary` to the input labels before passing them to `loss_fn`.
-
-  The head can be used with a canned estimator. Example:
-
-  ```python
-  my_head = tf.contrib.estimator.multi_label_head(n_classes=3)
-  my_estimator = tf.contrib.estimator.DNNEstimator(
-      head=my_head,
-      hidden_units=...,
-      feature_columns=...)
-  ```
-
-  It can also be used with a custom `model_fn`. Example:
-
-  ```python
-  def _my_model_fn(features, labels, mode):
-    my_head = tf.contrib.estimator.multi_label_head(n_classes=3)
-    logits = tf.keras.Model(...)(features)
-
-    return my_head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
-        logits=logits)
-
-  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
-  ```
-
-  Args:
-    n_classes: Number of classes, must be greater than 1 (for 1 class, use
-      `binary_classification_head`).
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.  Per-class weighting is
-      not supported.
-    thresholds: Iterable of floats in the range `(0, 1)`. Accuracy, precision
-      and recall metrics are evaluated for each threshold value. The threshold
-      is applied to the predicted probabilities, i.e. above the threshold is
-      `true`, below is `false`.
-    label_vocabulary: A list of strings represents possible label values. If it
-      is not given, that means labels are already encoded as integer within
-      [0, n_classes) or multi-hot Tensor. If given, labels must be SparseTensor
-      string type and have any value in `label_vocabulary`. Also there will be
-      errors if vocabulary is not provided and labels are string.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
-      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
-    loss_fn: Optional loss function.
-    classes_for_class_based_metrics: List of integer class IDs or string class
-      names for which per-class metrics are evaluated. If integers, all must be
-      in the range `[0, n_classes - 1]`. If strings, all must be in
-      `label_vocabulary`.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for multi-label classification.
-
-  Raises:
-    ValueError: if `n_classes`, `thresholds`, `loss_reduction`, `loss_fn` or
-    `metric_class_ids` is invalid.
-  """
-  thresholds = tuple(thresholds) if thresholds else tuple()
-  if n_classes is None or n_classes < 2:
-    raise ValueError(
-        'n_classes must be > 1 for multi-class classification. '
-        'Given: {}'.format(n_classes))
-  for threshold in thresholds:
-    if (threshold <= 0.0) or (threshold >= 1.0):
-      raise ValueError(
-          'thresholds must be in (0, 1) range. Given: {}'.format(threshold))
-  if label_vocabulary is not None:
-    if not isinstance(label_vocabulary, (list, tuple)):
-      raise ValueError(
-          'label_vocabulary must be a list or tuple. '
-          'Given type: {}'.format(type(label_vocabulary)))
-    if len(label_vocabulary) != n_classes:
-      raise ValueError(
-          'Length of label_vocabulary must be n_classes ({}). '
-          'Given: {}'.format(n_classes, len(label_vocabulary)))
-  if loss_fn:
-    head_lib._validate_loss_fn_args(loss_fn)  # pylint:disable=protected-access
-  if (loss_reduction not in losses.Reduction.all() or
-      loss_reduction == losses.Reduction.NONE):
-    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
-  classes_for_class_based_metrics = tuple(
-      [] if classes_for_class_based_metrics is None
-      else classes_for_class_based_metrics)
-  if classes_for_class_based_metrics:
-    if isinstance(classes_for_class_based_metrics[0], six.string_types):
-      if not label_vocabulary:
-        raise ValueError(
-            'label_vocabulary must be provided when '
-            'classes_for_class_based_metrics are sting.')
-      class_ids = []
-      for class_string in classes_for_class_based_metrics:
-        class_ids.append(label_vocabulary.index(class_string))
-      classes_for_class_based_metrics = tuple(class_ids)
-    else:
-      for class_id in classes_for_class_based_metrics:
-        if (class_id < 0) or (class_id >= n_classes):
-          raise ValueError(
-              'All classes_for_class_based_metrics must be in range [0, {}]. '
-              'Given: {}'.format(n_classes - 1, class_id))
-  return _MultiLabelHead(
-      n_classes=n_classes, weight_column=weight_column, thresholds=thresholds,
-      label_vocabulary=label_vocabulary, loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      classes_for_class_based_metrics=classes_for_class_based_metrics,
-      name=name)
-
-
-class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
-  """`_Head` for multi-label classification."""
-
-  def __init__(self,
-               n_classes,
-               weight_column=None,
-               thresholds=None,
-               label_vocabulary=None,
-               loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-               loss_fn=None,
-               classes_for_class_based_metrics=None,
-               name=None):
-    self._n_classes = n_classes
-    self._weight_column = weight_column
-    self._thresholds = thresholds
-    self._label_vocabulary = label_vocabulary
-    self._loss_reduction = loss_reduction
-    self._loss_fn = loss_fn
-    self._classes_for_class_based_metrics = classes_for_class_based_metrics
-    self._name = name
-
-  @property
-  def name(self):
-    return self._name
-
-  @property
-  def logits_dimension(self):
-    return self._n_classes
-
-  def _process_labels(self, labels):
-    if labels is None:
-      raise ValueError(
-          'You must provide a labels Tensor. Given: None. '
-          'Suggested troubleshooting steps: Check that your data contain '
-          'your label feature. Check that your input_fn properly parses and '
-          'returns labels.')
-    if isinstance(labels, sparse_tensor.SparseTensor):
-      if labels.dtype == dtypes.string:
-        label_ids_values = lookup_ops.index_table_from_tensor(
-            vocabulary_list=tuple(self._label_vocabulary),
-            name='class_id_lookup').lookup(labels.values)
-        label_ids = sparse_tensor.SparseTensor(
-            indices=labels.indices,
-            values=label_ids_values,
-            dense_shape=labels.dense_shape)
-        return math_ops.to_int64(
-            sparse_ops.sparse_to_indicator(label_ids, self._n_classes))
-      else:
-        err_msg = (
-            r'labels must be an integer SparseTensor with values in '
-            r'[0, {})'.format(self._n_classes))
-        assert_int = check_ops.assert_integer(
-            labels.values, message=err_msg)
-        assert_less = check_ops.assert_less(
-            labels.values,
-            ops.convert_to_tensor(self._n_classes, dtype=labels.dtype),
-            message=err_msg)
-        assert_greater = check_ops.assert_non_negative(
-            labels.values, message=err_msg)
-        with ops.control_dependencies(
-            [assert_int, assert_less, assert_greater]):
-          return math_ops.to_int64(
-              sparse_ops.sparse_to_indicator(labels, self._n_classes))
-    err_msg = (
-        r'labels must be an integer indicator Tensor with values in [0, 1]')
-    return head_lib._assert_range(labels, 2, message=err_msg)  # pylint:disable=protected-access,
-
-  def create_loss(self, features, mode, logits, labels):
-    """See `Head`."""
-    del mode  # Unused for this head.
-    logits = ops.convert_to_tensor(logits)
-    processed_labels = self._process_labels(labels)
-    processed_labels = head_lib._check_dense_labels_match_logits_and_reshape(  # pylint:disable=protected-access
-        labels=processed_labels, logits=logits,
-        expected_labels_dimension=self.logits_dimension)
-    if self._loss_fn:
-      unweighted_loss = head_lib._call_loss_fn(  # pylint:disable=protected-access
-          loss_fn=self._loss_fn, labels=processed_labels, logits=logits,
-          features=features, expected_loss_dim=1)
-    else:
-      unweighted_loss = losses.sigmoid_cross_entropy(
-          multi_class_labels=processed_labels, logits=logits,
-          reduction=losses.Reduction.NONE)
-      # Averages loss over classes.
-      unweighted_loss = math_ops.reduce_mean(
-          unweighted_loss, axis=-1, keepdims=True)
-    weights = head_lib._get_weights_and_check_match_logits(  # pylint:disable=protected-access,
-        features=features, weight_column=self._weight_column, logits=logits)
-    training_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=self._loss_reduction)
-    return head_lib.LossSpec(
-        training_loss=training_loss,
-        unreduced_loss=unweighted_loss,
-        weights=weights,
-        processed_labels=processed_labels)
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns an `model_fn._TPUEstimatorSpec`.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` with shape `[D0, D1, ... DN, n_classes]`.
-        For many applications, the shape is `[batch_size, n_classes]`.
-      labels: Labels with shape matching `logits`. Can be multi-hot `Tensor`
-        with shape `[D0, D1, ... DN, n_classes]` or `SparseTensor` with
-        `dense_shape` `[D0, D1, ... DN, ?]`. `labels` is required argument when
-        `mode` equals `TRAIN` or `EVAL`.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns
-        `train_op`. Used if `optimizer` is `None`.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses. These losses are
-        usually expressed as a batch average, so for best results users need to
-        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
-        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
-        avoid scaling errors.
-    Returns:
-      `model_fn._TPUEstimatorSpec`.
-    Raises:
-      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
-        mode, or if both are set.
-    """
-    with ops.name_scope(self._name, 'head'):
-      logits = head_lib._check_logits_final_dim(logits, self.logits_dimension)  # pylint:disable=protected-access
-
-      # Predict.
-      pred_keys = prediction_keys.PredictionKeys
-      with ops.name_scope(None, 'predictions', (logits,)):
-        probabilities = math_ops.sigmoid(logits, name=pred_keys.PROBABILITIES)
-        predictions = {
-            pred_keys.LOGITS: logits,
-            pred_keys.PROBABILITIES: probabilities,
-        }
-      if mode == model_fn.ModeKeys.PREDICT:
-        classifier_output = head_lib._classification_output(  # pylint:disable=protected-access
-            scores=probabilities, n_classes=self._n_classes,
-            label_vocabulary=self._label_vocabulary)
-        return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs={
-                _DEFAULT_SERVING_KEY: classifier_output,
-                head_lib._CLASSIFY_SERVING_KEY: classifier_output,  # pylint:disable=protected-access
-                head_lib._PREDICT_SERVING_KEY: (  # pylint:disable=protected-access
-                    export_output.PredictOutput(predictions))
-            })
-
-      (training_loss, unreduced_loss, weights,
-       processed_labels) = self.create_loss(
-           features=features, mode=mode, logits=logits, labels=labels)
-      if regularization_losses:
-        regularization_loss = math_ops.add_n(regularization_losses)
-        regularized_training_loss = math_ops.add_n(
-            [training_loss, regularization_loss])
-      else:
-        regularization_loss = None
-        regularized_training_loss = training_loss
-
-      # Eval.
-      if mode == model_fn.ModeKeys.EVAL:
-        return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=regularized_training_loss,
-            eval_metrics=head_lib._create_eval_metrics_tuple(  # pylint:disable=protected-access
-                self._eval_metric_ops, {
-                    'labels': processed_labels,
-                    'probabilities': probabilities,
-                    'weights': weights,
-                    'unreduced_loss': unreduced_loss,
-                    'regularization_loss': regularization_loss,
-                }))
+from tensorflow_estimator.contrib.estimator.python.estimator import head
 
-      # Train.
-      if optimizer is not None:
-        if train_op_fn is not None:
-          raise ValueError('train_op_fn and optimizer cannot both be set.')
-        train_op = optimizer.minimize(
-            regularized_training_loss,
-            global_step=training_util.get_global_step())
-      elif train_op_fn is not None:
-        train_op = train_op_fn(regularized_training_loss)
-      else:
-        raise ValueError('train_op_fn and optimizer cannot both be None.')
-      train_op = head_lib._append_update_ops(train_op)  # pylint:disable=protected-access
-      # Only summarize mean_loss for SUM reduction to preserve backwards
-      # compatibility. Otherwise skip it to avoid unnecessary computation.
-      if self._loss_reduction == losses.Reduction.SUM:
-        example_weight_sum = math_ops.reduce_sum(
-            weights * array_ops.ones_like(unreduced_loss))
-        mean_loss = training_loss / example_weight_sum
-      else:
-        mean_loss = None
-    with ops.name_scope(''):
-      keys = metric_keys.MetricKeys
-      summary.scalar(
-          head_lib._summary_key(self._name, keys.LOSS),  # pylint:disable=protected-access
-          regularized_training_loss)
-      if mean_loss is not None:
-        summary.scalar(
-            head_lib._summary_key(self._name, keys.LOSS_MEAN),  # pylint:disable=protected-access
-            mean_loss)
-      if regularization_loss is not None:
-        summary.scalar(
-            head_lib._summary_key(self._name, keys.LOSS_REGULARIZATION),  # pylint:disable=protected-access
-            regularization_loss)
-    return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
-        mode=model_fn.ModeKeys.TRAIN,
-        predictions=predictions,
-        loss=regularized_training_loss,
-        train_op=train_op)
+# Include attrs that start with single underscore.
+head.__all__ = [s for s in dir(head) if not s.startswith('__')]
 
-  def _eval_metric_ops(
-      self, labels, probabilities, weights, unreduced_loss,
-      regularization_loss):
-    """Returns a dict of metrics for eval_metric_ops."""
-    with ops.name_scope(
-        None, 'metrics',
-        [labels, probabilities, weights, unreduced_loss, regularization_loss]):
-      keys = metric_keys.MetricKeys
-      metric_ops = {
-          # Estimator already adds a metric for loss.
-          head_lib._summary_key(self._name, keys.LOSS_MEAN):  # pylint:disable=protected-access
-              metrics_lib.mean(
-                  values=unreduced_loss,
-                  weights=weights,
-                  name=keys.LOSS_MEAN),
-          head_lib._summary_key(self._name, keys.AUC):  # pylint:disable=protected-access
-              metrics_lib.auc(labels=labels, predictions=probabilities,
-                              weights=weights, name=keys.AUC),
-          head_lib._summary_key(self._name, keys.AUC_PR):  # pylint:disable=protected-access
-              metrics_lib.auc(labels=labels, predictions=probabilities,
-                              weights=weights, curve='PR',
-                              name=keys.AUC_PR),
-      }
-      if regularization_loss is not None:
-        loss_regularization_key = head_lib._summary_key(  # pylint:disable=protected-access
-            self._name, keys.LOSS_REGULARIZATION)
-        metric_ops[loss_regularization_key] = (
-            metrics_lib.mean(
-                values=regularization_loss,
-                name=keys.LOSS_REGULARIZATION))
-      for threshold in self._thresholds:
-        accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold
-        metric_ops[head_lib._summary_key(self._name, accuracy_key)] = (  # pylint:disable=protected-access
-            head_lib._accuracy_at_threshold(  # pylint:disable=protected-access
-                labels=labels,
-                predictions=probabilities,
-                weights=weights,
-                threshold=threshold,
-                name=accuracy_key))
-        # Precision for positive examples.
-        precision_key = keys.PRECISION_AT_THRESHOLD % threshold
-        metric_ops[head_lib._summary_key(self._name, precision_key)] = (  # pylint:disable=protected-access
-            head_lib._precision_at_threshold(  # pylint:disable=protected-access
-                labels=labels,
-                predictions=probabilities,
-                weights=weights,
-                threshold=threshold,
-                name=precision_key))
-        # Recall for positive examples.
-        recall_key = keys.RECALL_AT_THRESHOLD % threshold
-        metric_ops[head_lib._summary_key(self._name, recall_key)] = (  # pylint:disable=protected-access
-            head_lib._recall_at_threshold(  # pylint:disable=protected-access
-                labels=labels,
-                predictions=probabilities,
-                weights=weights,
-                threshold=threshold,
-                name=recall_key))
-      for class_id in self._classes_for_class_based_metrics:
-        batch_rank = array_ops.rank(probabilities) - 1
-        begin = array_ops.concat(
-            [array_ops.zeros([batch_rank], dtype=dtypes.int32), [class_id]],
-            axis=0)
-        size = array_ops.concat(
-            [-1 * array_ops.ones([batch_rank], dtype=dtypes.int32), [1]],
-            axis=0)
-        class_probabilities = array_ops.slice(
-            probabilities, begin=begin, size=size)
-        class_labels = array_ops.slice(labels, begin=begin, size=size)
-        if self._label_vocabulary is None:
-          prob_key = keys.PROBABILITY_MEAN_AT_CLASS % class_id
-        else:
-          prob_key = (
-              keys.PROBABILITY_MEAN_AT_NAME % self._label_vocabulary[class_id])
-        metric_ops[head_lib._summary_key(self._name, prob_key)] = (  # pylint:disable=protected-access
-            head_lib._predictions_mean(  # pylint:disable=protected-access
-                predictions=class_probabilities,
-                weights=weights,
-                name=prob_key))
-        if self._label_vocabulary is None:
-          auc_key = keys.AUC_AT_CLASS % class_id
-        else:
-          auc_key = keys.AUC_AT_NAME % self._label_vocabulary[class_id]
-        metric_ops[head_lib._summary_key(self._name, auc_key)] = (  # pylint:disable=protected-access
-            head_lib._auc(  # pylint:disable=protected-access
-                labels=class_labels,
-                predictions=class_probabilities,
-                weights=weights,
-                name=auc_key))
-        if self._label_vocabulary is None:
-          auc_pr_key = keys.AUC_PR_AT_CLASS % class_id
-        else:
-          auc_pr_key = keys.AUC_PR_AT_NAME % self._label_vocabulary[class_id]
-        metric_ops[head_lib._summary_key(self._name, auc_pr_key)] = (  # pylint:disable=protected-access
-            head_lib._auc(  # pylint:disable=protected-access
-                labels=class_labels,
-                predictions=class_probabilities,
-                weights=weights,
-                curve='PR',
-                name=auc_pr_key))
-    return metric_ops
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.head import *
diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py
deleted file mode 100644
index c6e75f8d46..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/head_test.py
+++ /dev/null
@@ -1,1482 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for head."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.core.framework import summary_pb2
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import monitored_session
-
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def _initialize_variables(test_case, scaffold):
-  scaffold.finalize()
-  test_case.assertIsNone(scaffold.init_feed_dict)
-  test_case.assertIsNone(scaffold.init_fn)
-  scaffold.init_op.run()
-  scaffold.ready_for_local_init_op.eval()
-  scaffold.local_init_op.run()
-  scaffold.ready_op.eval()
-  test_case.assertIsNotNone(scaffold.saver)
-
-
-def _assert_simple_summaries(test_case, expected_summaries, summary_str,
-                             tol=1e-6):
-  """Assert summary the specified simple values.
-
-  Args:
-    test_case: test case.
-    expected_summaries: Dict of expected tags and simple values.
-    summary_str: Serialized `summary_pb2.Summary`.
-    tol: Tolerance for relative and absolute.
-  """
-  summary = summary_pb2.Summary()
-  summary.ParseFromString(summary_str)
-  test_case.assertAllClose(expected_summaries, {
-      v.tag: v.simple_value for v in summary.value
-  }, rtol=tol, atol=tol)
-
-
-def _assert_no_hooks(test_case, spec):
-  test_case.assertAllEqual([], spec.training_chief_hooks)
-  test_case.assertAllEqual([], spec.training_hooks)
-
-
-def _sigmoid(logits):
-  return 1 / (1 + np.exp(-logits))
-
-
-def _sigmoid_cross_entropy(labels, logits):
-  """Returns sigmoid cross entropy averaged over classes."""
-  sigmoid_logits = _sigmoid(logits)
-  unreduced_result = (
-      -labels * np.log(sigmoid_logits)
-      -(1 - labels) * np.log(1 - sigmoid_logits))
-  # Mean over classes
-  return np.mean(unreduced_result, axis=-1, keepdims=True)
-
-
-class MultiLabelHead(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_n_classes_is_none(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'n_classes must be > 1 for multi-class classification\. Given: None'):
-      head_lib.multi_label_head(n_classes=None)
-
-  def test_n_classes_is_1(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'n_classes must be > 1 for multi-class classification\. Given: 1'):
-      head_lib.multi_label_head(n_classes=1)
-
-  def test_threshold_too_small(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'thresholds must be in \(0, 1\) range\. Given: 0\.0'):
-      head_lib.multi_label_head(n_classes=2, thresholds=[0., 0.5])
-
-  def test_threshold_too_large(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'thresholds must be in \(0, 1\) range\. Given: 1\.0'):
-      head_lib.multi_label_head(n_classes=2, thresholds=[0.5, 1.0])
-
-  def test_label_vocabulary_dict(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'label_vocabulary must be a list or tuple\. '
-        r'Given type: <(type|class) \'dict\'>'):
-      head_lib.multi_label_head(n_classes=2, label_vocabulary={'foo': 'bar'})
-
-  def test_label_vocabulary_wrong_size(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'Length of label_vocabulary must be n_classes \(3\). Given: 2'):
-      head_lib.multi_label_head(n_classes=3, label_vocabulary=['foo', 'bar'])
-
-  def test_invalid_loss_reduction(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
-      head_lib.multi_label_head(
-          n_classes=3, loss_reduction='invalid_loss_reduction')
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: none'):
-      head_lib.multi_label_head(
-          n_classes=3, loss_reduction=losses.Reduction.NONE)
-
-  def test_loss_fn_arg_labels_missing(self):
-    def _loss_fn(logits):
-      del logits  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: labels\. '
-        r'Given arguments: \(\'logits\',\)'):
-      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_logits_missing(self):
-    def _loss_fn(labels):
-      del labels  # unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: logits\. '
-        r'Given arguments: \(\'labels\',\)'):
-      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_features_ok(self):
-    def _loss_fn(labels, logits, features):
-      del labels, logits, features  # Unused
-    head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_invalid(self):
-    def _loss_fn(labels, logits, name=None):
-      del labels, logits, name  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn has unexpected args: \[\'name\'\]'):
-      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
-
-  def test_classes_for_class_based_metrics_invalid(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'All classes_for_class_based_metrics must be in range \[0, 2\]\. '
-        r'Given: -1'):
-      head_lib.multi_label_head(
-          n_classes=3, classes_for_class_based_metrics=[2, -1])
-
-  def test_classes_for_class_based_metrics_string_invalid(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'\'z\' is not in list'):
-      head_lib.multi_label_head(
-          n_classes=3, label_vocabulary=['a', 'b', 'c'],
-          classes_for_class_based_metrics=['c', 'z'])
-
-  def test_name(self):
-    head = head_lib.multi_label_head(n_classes=4, name='foo')
-    self.assertEqual('foo', head.name)
-
-  def test_predict(self):
-    n_classes = 4
-    head = head_lib.multi_label_head(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    logits = np.array(
-        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
-    expected_probabilities = _sigmoid(logits)
-    expected_export_classes = [[b'0', b'1', b'2', b'3']] * 2
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    self.assertItemsEqual(
-        (_DEFAULT_SERVING_KEY, 'predict', 'classification'),
-        spec.export_outputs.keys())
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(logits,
-                          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(
-          expected_probabilities,
-          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-
-      self.assertAllClose(
-          expected_probabilities,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
-      self.assertAllEqual(
-          expected_export_classes,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
-
-  def test_predict_with_label_vocabulary(self):
-    n_classes = 4
-    head = head_lib.multi_label_head(
-        n_classes, label_vocabulary=['foo', 'bar', 'foobar', 'barfoo'])
-
-    logits = np.array(
-        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
-    expected_export_classes = [[b'foo', b'bar', b'foobar', b'barfoo']] * 2
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllEqual(
-          expected_export_classes,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
-
-  def test_weight_should_not_impact_prediction(self):
-    n_classes = 4
-    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    logits = np.array(
-        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
-    expected_probabilities = _sigmoid(logits)
-
-    weights_2x1 = [[1.], [2.]]
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'example_weights': weights_2x1,
-        },
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(logits,
-                          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(
-          expected_probabilities,
-          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-
-  def test_eval_create_loss(self):
-    """Tests head.create_loss for eval mode."""
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes)
-
-    logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = (labels * -log(sigmoid(logits)) +
-    #         (1 - labels) * -log(1 - sigmoid(logits))) / 2
-    expected_training_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels, logits=logits))
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss,
-                          actual_training_loss.eval())
-
-  def test_eval_create_loss_large_logits(self):
-    """Tests head.create_loss for eval mode and large logits."""
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes)
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # For large logits, this is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits
-    expected_training_loss = 0.5 * np.sum(
-        np.array([[(10. + 10.) / 2.], [(15. + 0.) / 2.]], dtype=np.float32))
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, actual_training_loss.eval(), atol=1e-4)
-
-  def test_eval_create_loss_labels_wrong_shape(self):
-    """Tests head.create_loss for eval mode when labels has the wrong shape."""
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes)
-
-    logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'):
-        actual_training_loss.eval({
-            labels_placeholder: np.array([[1], [1]], dtype=np.int64)
-        })
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'labels shape must be \[D0, D1, ... DN, 2\]\..*'
-          r'\[Received shape: \] \[2\]'):
-        actual_training_loss.eval({
-            labels_placeholder: np.array([1, 1], dtype=np.int64)
-        })
-
-  def test_eval_create_loss_loss_fn(self):
-    """Tests head.create_loss for eval mode and custom loss_fn."""
-    loss = np.array([[1.], [2.]], dtype=np.float32)
-    logits_input = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels_input = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    def _loss_fn(labels, logits):
-      check_labels = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
-          data=[labels])
-      check_logits = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
-          data=[logits])
-      with ops.control_dependencies([check_labels, check_logits]):
-        return constant_op.constant(loss)
-    head = head_lib.multi_label_head(n_classes=2, loss_fn=_loss_fn)
-
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_input,
-        labels=labels_input)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(np.sum(loss) / 2., actual_training_loss.eval())
-
-  def test_eval_create_loss_loss_fn_wrong_shape(self):
-    """Tests custom loss_fn that returns Tensor of unexpected shape."""
-    loss = np.array([1., 2.], dtype=np.float32)
-    def _loss_fn(labels, logits):
-      del labels, logits  # Unused
-      return constant_op.constant(loss)
-    head = head_lib.multi_label_head(n_classes=2, loss_fn=_loss_fn)
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
-          r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2\]'):
-        actual_training_loss.eval()
-
-  def test_eval_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib.multi_label_head(n_classes=2)
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-          labels=None)
-
-  def _test_eval(
-      self, head, logits, labels, expected_loss, expected_metrics,
-      features=None, regularization_losses=None):
-    spec = head.create_estimator_spec(
-        features=features or {},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels,
-        regularization_losses=regularization_losses)
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol,
-          atol=tol)
-
-  def test_eval(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes)
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels, logits=logits))
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-    }
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_sparse_labels(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes)
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    # Equivalent to multi_hot = [[1, 0], [1, 1]]
-    labels = sparse_tensor.SparseTensor(
-        values=[0, 0, 1],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-    }
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_regularization_losses(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(
-        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = sum(
-    #     labels * -log(sigmoid(logits)) +
-    #     (1 - labels) * -log(1 - sigmoid(logits))) / batch_size
-    expected_unregularized_loss = np.sum(
-        _sigmoid_cross_entropy(labels=labels, logits=logits)) / 2.
-    expected_regularized_loss = (
-        expected_unregularized_loss + expected_regularization_loss)
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_unregularized_loss,
-        keys.LOSS_REGULARIZATION: expected_regularization_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-    }
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_regularized_loss,
-        expected_metrics=expected_metrics,
-        regularization_losses=regularization_losses)
-
-  def test_eval_with_label_vocabulary(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(
-        n_classes, label_vocabulary=['class0', 'class1'])
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    # Equivalent to multi_hot = [[1, 0], [1, 1]]
-    labels = sparse_tensor.SparseTensor(
-        values=['class0', 'class0', 'class1'],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-    }
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_label_vocabulary_with_multi_hot_input(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(
-        n_classes, label_vocabulary=['class0', 'class1'])
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-    }
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels_multi_hot,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_thresholds(self):
-    n_classes = 2
-    thresholds = [0.25, 0.5, 0.75]
-    head = head_lib.multi_label_head(n_classes, thresholds=thresholds)
-
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels, logits=logits))
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 2. / 4.,
-        keys.PRECISION_AT_THRESHOLD % thresholds[0]: 2. / 3.,
-        keys.RECALL_AT_THRESHOLD % thresholds[0]: 2. / 3.,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[1]: 1. / 4.,
-        keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1. / 2.,
-        keys.RECALL_AT_THRESHOLD % thresholds[1]: 1. / 3.,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 2. / 4.,
-        keys.PRECISION_AT_THRESHOLD % thresholds[2]: 1. / 1.,
-        keys.RECALL_AT_THRESHOLD % thresholds[2]: 1. / 3.,
-    }
-
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_classes_for_class_based_metrics(self):
-    head = head_lib.multi_label_head(
-        n_classes=2, classes_for_class_based_metrics=[0, 1])
-
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels, logits=logits))
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-        keys.PROBABILITY_MEAN_AT_CLASS % 0: np.sum(_sigmoid(logits[:, 0])) / 2.,
-        keys.AUC_AT_CLASS % 0: 0.,
-        keys.AUC_PR_AT_CLASS % 0: 1.,
-        keys.PROBABILITY_MEAN_AT_CLASS % 1: np.sum(_sigmoid(logits[:, 1])) / 2.,
-        keys.AUC_AT_CLASS % 1: 1.,
-        keys.AUC_PR_AT_CLASS % 1: 1.,
-    }
-
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_classes_for_class_based_metrics_string(self):
-    head = head_lib.multi_label_head(
-        n_classes=2, label_vocabulary=['a', 'b'],
-        classes_for_class_based_metrics=['a', 'b'])
-
-    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
-    labels = sparse_tensor.SparseTensor(
-        values=['a', 'a', 'b'],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    labels_onehot = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # Sum over examples, divide by batch_size.
-    expected_loss = 0.5 * np.sum(
-        _sigmoid_cross_entropy(labels=labels_onehot, logits=logits))
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over examples.
-        keys.LOSS_MEAN: expected_loss,
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.3333,
-        keys.AUC_PR: 0.7639,
-        keys.PROBABILITY_MEAN_AT_NAME % 'a':
-            np.sum(_sigmoid(logits[:, 0])) / 2.,
-        keys.AUC_AT_NAME % 'a': 0.,
-        keys.AUC_PR_AT_NAME % 'a': 1.,
-        keys.PROBABILITY_MEAN_AT_NAME % 'b':
-            np.sum(_sigmoid(logits[:, 1])) / 2.,
-        keys.AUC_AT_NAME % 'b': 1.,
-        keys.AUC_PR_AT_NAME % 'b': 1.,
-    }
-
-    self._test_eval(
-        head=head,
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-  def test_eval_with_weights(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, weighted sum over examples, divide by batch_size.
-    # loss = ( 1 * (10 + 10) / 2 + 2 * (15 + 0) / 2) / 2
-    expected_loss = 12.5
-
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array([[41], [42]], dtype=np.int32),
-            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # Average loss over weighted examples (denominator is sum(weights)).
-        keys.LOSS_MEAN: expected_loss * (2. / 3.),
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.2000,
-        keys.AUC_PR: 0.7833,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol,
-          atol=tol)
-
-  def test_train_create_loss_large_logits(self):
-    """Tests head.create_loss for train mode and large logits."""
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    weights = np.array([[1.], [2.]], dtype=np.float32)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # For large logits, this is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits
-    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
-    expected_weights = [[1.], [2.]]
-    expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'example_weights': weights
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), atol=1e-4)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_train_create_loss_loss_reduction(self):
-    """Tests head.create_loss with loss_reduction."""
-    n_classes = 2
-    head = head_lib.multi_label_head(
-        n_classes, weight_column='example_weights',
-        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    weights = np.array([[1.], [2.]], dtype=np.float32)
-    # loss = labels * -log(sigmoid(logits)) +
-    #        (1 - labels) * -log(1 - sigmoid(logits))
-    # For large logits, this is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits
-    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
-    expected_weights = [[1.], [2.]]
-    expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'example_weights': weights
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), atol=1e-4)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_train_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib.multi_label_head(n_classes=2)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-          labels=None,
-          train_op_fn=_no_op_train_fn)
-
-  def test_train_invalid_indicator_labels(self):
-    head = head_lib.multi_label_head(n_classes=2)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    # The value 2 is outside the allowed range.
-    labels = np.array([[2, 0], [1, 1]], dtype=np.int64)
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'labels must be an integer indicator Tensor with values in '
-          r'\[0, 1\]'):
-        sess.run(spec.loss)
-
-  def test_train_invalid_sparse_labels(self):
-    head = head_lib.multi_label_head(n_classes=2)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    # The value 2 is outside the allowed range.
-    labels = sparse_tensor.SparseTensor(
-        values=[2, 0, 1],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'labels must be an integer SparseTensor with values in \[0, 2\)'):
-        sess.run(spec.loss)
-
-  def _test_train(self, head, logits, labels, expected_loss):
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(
-          self, {metric_keys.MetricKeys.LOSS: expected_loss}, summary_str, tol)
-
-  def test_train(self):
-    head = head_lib.multi_label_head(n_classes=2)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, sum over examples, divide by batch_size.
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
-    expected_loss = 8.75
-    self._test_train(
-        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
-
-  def test_train_sparse_labels(self):
-    head = head_lib.multi_label_head(n_classes=2)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    # Equivalent to multi_hot = [[1, 0], [1, 1]]
-    labels = sparse_tensor.SparseTensor(
-        values=[0, 0, 1],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, sum over examples, divide by batch_size.
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
-    expected_loss = 8.75
-    self._test_train(
-        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
-
-  def test_train_with_label_vocabulary(self):
-    head = head_lib.multi_label_head(
-        n_classes=2, label_vocabulary=['class0', 'class1'])
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    # Equivalent to multi_hot = [[1, 0], [1, 1]]
-    labels = sparse_tensor.SparseTensor(
-        values=['class0', 'class0', 'class1'],
-        indices=[[0, 0], [1, 0], [1, 1]],
-        dense_shape=[2, 2])
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, sum over examples, divide by batch_size.
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
-    expected_loss = 8.75
-    self._test_train(
-        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
-
-  def test_train_with_optimizer(self):
-    head = head_lib.multi_label_head(n_classes=2)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, sum over examples, divide by batch_size.
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
-    expected_loss = 8.75
-    expected_train_result = 'my_train_op'
-
-    class _Optimizer(object):
-
-      def minimize(self, loss, global_step):
-        del global_step
-        return string_ops.string_join(
-            [constant_op.constant(expected_train_result),
-             string_ops.as_string(loss, precision=3)])
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        optimizer=_Optimizer())
-
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def test_train_with_update_ops(self):
-    head = head_lib.multi_label_head(n_classes=2)
-
-    with ops.Graph().as_default():
-      w = variables.Variable(1)
-      update_op = w.assign_add(1)
-      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
-
-      t = variables.Variable('')
-      expected_train_result = b'my_train_op'
-      def _train_op_fn(loss):
-        del loss
-        return t.assign(expected_train_result)
-
-      spec = head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-          labels=np.array([[1, 0], [1, 1]], dtype=np.int64),
-          train_op_fn=_train_op_fn)
-
-      with self.cached_session() as sess:
-        _initialize_variables(self, spec.scaffold)
-        sess.run(spec.train_op)
-        w_value, t_value = sess.run([w, t])
-        self.assertEqual(2, w_value)
-        self.assertEqual(expected_train_result, t_value)
-
-  def test_train_with_regularization_losses(self):
-    head = head_lib.multi_label_head(
-        n_classes=2, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    regularization_losses = [1.5, 0.5]
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes and over batch and add regularization loss.
-    expected_loss = 35. / 4. + 2.
-    expected_summaries = {
-        metric_keys.MetricKeys.LOSS: expected_loss,
-        metric_keys.MetricKeys.LOSS_REGULARIZATION: 2.,
-    }
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn,
-        regularization_losses=regularization_losses)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, expected_summaries, summary_str, tol)
-
-  def test_train_with_weights(self):
-    n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
-
-    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
-    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # Average over classes, weighted sum over examples, divide by batch_size.
-    # loss = ( 1 * (10 + 10) / 2 + 2 * (15 + 0) / 2 ) / 2
-    expected_loss = 12.5
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array([[41], [42]], dtype=np.int32),
-            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(
-          self, {metric_keys.MetricKeys.LOSS: expected_loss,}, summary_str, tol)
-
-  def test_multi_dim_weighted_train_create_loss(self):
-    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
-    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
-
-    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
-                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
-    labels = np.array([[[1, 0, 0], [1, 0, 0]],
-                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # unreduced_loss =
-    #     [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
-    #   = [[20/3, 10/3], [4, 8]]
-    expected_unreduced_loss = [[[20./3.], [10./3.]], [[4.], [8.]]]
-    # weights are reshaped to [2, 2, 1] to match logits.
-    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
-    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
-    expected_training_loss = 9.9167
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    atol = 1.e-3
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), atol=atol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), atol=atol)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_multi_dim_weighted_train(self):
-    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
-    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
-
-    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
-                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
-    labels = np.array([[[1, 0, 0], [1, 0, 0]],
-                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
-    #      = [[20/3, 10/3], [4, 8]]
-    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
-    expected_loss = 9.9167
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    atol = 1.e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, monitored_session.Scaffold())
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, atol=atol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def test_multi_dim_weights_wrong_inner_dim(self):
-    """Logits and labels of shape [2, 2, 3], weights [2, 1]."""
-    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
-
-    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
-                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
-    labels = np.array([[[1, 0, 0], [1, 0, 0]],
-                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
-    weights = np.array([[1.], [2.]], dtype=np.float32)
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
-        spec.loss.eval()
-
-  def test_multi_dim_weights_wrong_outer_dim(self):
-    """Logits and labels of shape [2, 2, 3], weights [2, 2, 3]."""
-    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
-
-    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
-                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
-    labels = np.array([[[1, 0, 0], [1, 0, 0]],
-                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
-    weights = np.array([[[1., 1., 1.], [1.5, 1.5, 1.5]],
-                        [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32)
-    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights_placeholder},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'):
-        spec.loss.eval({weights_placeholder: weights})
-
-  def test_multi_dim_weighted_eval(self):
-    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
-    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
-
-    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
-                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
-    labels = np.array([[[1, 0, 0], [1, 0, 0]],
-                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
-    #      = [[20/3, 10/3], [4, 8]]
-    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
-    expected_loss = 9.9167
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss * (4. / np.sum(weights)),
-        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC: 0.4977,
-        keys.AUC_PR: 0.6645,
-    }
-    self._test_eval(
-        head=head,
-        features={'weights': weights},
-        logits=logits,
-        labels=labels,
-        expected_loss=expected_loss,
-        expected_metrics=expected_metrics)
-
-
-class PoissonRegressionHead(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_train(self):
-    head = head_lib.poisson_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    labels = np.array([[1], [2], [3]], dtype=np.int32)
-    # With x = exp(logits), z = labels.
-    # loss = -ln(exp(-x) * (x^z) / z!)
-    #      = x - z * ln(x) + ln(z!)
-    #      = exp(logits) - labels * logits - ln(labels!)
-    # But for ln(z!) and z > 1, the Stirling approximation is used
-    # ln(z!) = z*ln(z) - z + 0.5*ln(2*pi*z)
-    # loss = [exp(0) - 1 * 0 + ln(1!),
-    #         exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2),
-    #         exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)]
-    #      = [1.0, 3.020, 1.482]
-    # training_loss = (1.0 + 3.020 + 1.482) / 3
-    expected_loss = 1.834
-    atol = 0.001
-    expected_train_result = b'my_train_op'
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_near(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          atol=atol, name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run([spec.loss, spec.train_op])
-      self.assertAlmostEqual(expected_loss, loss, delta=atol)
-      self.assertEqual(expected_train_result, train_result)
-
-  def test_predict(self):
-    head = head_lib.poisson_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    expected_predictions = np.exp(logits)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert spec contains expected tensors.
-    keys = prediction_keys.PredictionKeys
-    self.assertItemsEqual(
-        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
-    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
-
-    # Assert predictions.
-    with self.cached_session():
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllClose(
-          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
-      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
-
-
-class LogisticRegressionHead(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_train(self):
-    head = head_lib.logistic_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    labels = np.array([[.4], [.6], [.8]], dtype=np.float32)
-    # Following the documentation in
-    # tf.nn.sigmoid_cross_entropy_with_logits:
-    # With x = logits, z = labels.
-    # loss  = max(x, 0) - x * z + log(1 + exp(-abs(x)))
-    # loss = [0 - 0 * 0.4 + ln(1 + exp(-0)),
-    #         0 + 1 * 0.6 + ln(1 + exp(-1)),
-    #         1 - 1 * 0.8 + ln(1 + exp(-1))]
-    #      = [0.6931, 0.9133, 0.5133]
-    # training_loss = (0.6931 + 0.9133 + 0.5133) / 3
-    expected_loss = 0.7066
-    atol = 0.001
-    expected_train_result = b'my_train_op'
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_near(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          atol=atol, name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run([spec.loss, spec.train_op])
-      self.assertAlmostEqual(expected_loss, loss, delta=atol)
-      self.assertEqual(expected_train_result, train_result)
-
-  def test_train_labels_too_large(self):
-    head = head_lib.logistic_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    labels = np.array([[.4], [1.2], [.8]], dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    def _train_op_fn(loss):
-      del loss
-      return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[1.2\]\[0.8\]\]'):
-        _ = sess.run(spec.loss)
-
-  def test_train_labels_negative(self):
-    head = head_lib.logistic_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    labels = np.array([[.4], [-0.2], [.8]], dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    def _train_op_fn(loss):
-      del loss
-      return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[-0.2\]\[0.8\]\]'
-      ):
-        _ = sess.run(spec.loss)
-
-  def test_predict(self):
-    head = head_lib.logistic_regression_head()
-
-    # Create estimator spec.
-    logits = np.array([[0], [-1], [1]], dtype=np.float32)
-    expected_predictions = 1. / (1. + np.exp(-logits))
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert spec contains expected tensors.
-    keys = prediction_keys.PredictionKeys
-    self.assertItemsEqual(
-        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
-    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
-
-    # Assert predictions.
-    with self.cached_session():
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllClose(
-          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
-      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/hooks.py b/tensorflow/contrib/estimator/python/estimator/hooks.py
index 49f7bbd320..33b587ec0b 100644
--- a/tensorflow/contrib/estimator/python/estimator/hooks.py
+++ b/tensorflow/contrib/estimator/python/estimator/hooks.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,274 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Some useful session run hooks."""
+"""hooks python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import time
-
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.training import training
-from tensorflow.python.training import training_util
-
-
-# pylint: disable=protected-access
-class InMemoryEvaluatorHook(training.SessionRunHook):
-  """Hook to run evaluation in training without a checkpoint.
-
-  Example:
-
-  ```python
-  def train_input_fn():
-    ...
-    return train_dataset
-
-  def eval_input_fn():
-    ...
-    return eval_dataset
-
-  estimator = tf.estimator.DNNClassifier(...)
-
-  evaluator = tf.contrib.estimator.InMemoryEvaluatorHook(
-      estimator, eval_input_fn)
-  estimator.train(train_input_fn, hooks=[evaluator])
-  ```
-
-  Current limitations of this approach are:
-
-  * It doesn't support multi-node distributed mode.
-  * It doesn't support saveable objects other than variables (such as boosted
-    tree support)
-  * It doesn't support custom saver logic (such as ExponentialMovingAverage
-    support)
-
-  """
-
-  def __init__(self,
-               estimator,
-               input_fn,
-               steps=None,
-               hooks=None,
-               name=None,
-               every_n_iter=100):
-    """Initializes a `InMemoryEvaluatorHook`.
-
-    Args:
-      estimator: A `tf.estimator.Estimator` instance to call evaluate.
-      input_fn:  Equivalent to the `input_fn` arg to `estimator.evaluate`. A
-        function that constructs the input data for evaluation.
-        See [Createing input functions](
-        https://tensorflow.org/guide/premade_estimators#create_input_functions)
-        for more information. The function should construct and return one of
-        the following:
-
-          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-            tuple (features, labels) with same constraints as below.
-          * A tuple (features, labels): Where `features` is a `Tensor` or a
-            dictionary of string feature name to `Tensor` and `labels` is a
-            `Tensor` or a dictionary of string label name to `Tensor`. Both
-            `features` and `labels` are consumed by `model_fn`. They should
-            satisfy the expectation of `model_fn` from inputs.
-
-      steps: Equivalent to the `steps` arg to `estimator.evaluate`.  Number of
-        steps for which to evaluate model. If `None`, evaluates until `input_fn`
-        raises an end-of-input exception.
-      hooks: Equivalent to the `hooks` arg to `estimator.evaluate`. List of
-        `SessionRunHook` subclass instances. Used for callbacks inside the
-        evaluation call.
-      name:  Equivalent to the `name` arg to `estimator.evaluate`. Name of the
-        evaluation if user needs to run multiple evaluations on different data
-        sets, such as on training data vs test data. Metrics for different
-        evaluations are saved in separate folders, and appear separately in
-        tensorboard.
-      every_n_iter: `int`, runs the evaluator once every N training iteration.
-
-    Raises:
-      ValueError: if `every_n_iter` is non-positive or it's not a single machine
-        training
-    """
-    if every_n_iter is None or every_n_iter <= 0:
-      raise ValueError('invalid every_n_iter=%s.' % every_n_iter)
-    if (estimator.config.num_ps_replicas > 0 or
-        estimator.config.num_worker_replicas > 1):
-      raise ValueError(
-          'InMemoryEvaluator supports only single machine (aka Local) setting.')
-    self._estimator = estimator
-    self._input_fn = input_fn
-    self._steps = steps
-    self._name = name
-    self._every_n_iter = every_n_iter
-    self._eval_dir = os.path.join(self._estimator.model_dir, 'eval'
-                                  if not name else 'eval_' + name)
-
-    self._graph = None
-    self._hooks = estimator_lib._check_hooks_type(hooks)
-    self._hooks.extend(self._estimator._convert_eval_steps_to_hooks(steps))
-    self._timer = training.SecondOrStepTimer(every_steps=every_n_iter)
-
-  def begin(self):
-    """Build eval graph and restoring op."""
-    self._timer.reset()
-    self._iter_count = 0
-    self._graph = ops.Graph()
-    with self._graph.as_default():
-      (self._scaffold, self._update_op, self._eval_dict,
-       self._all_hooks) = self._estimator._evaluate_build_graph(
-           self._input_fn, self._hooks, checkpoint_path=None)
-
-      if self._scaffold.saver is not None:
-        raise ValueError('InMemoryEvaluator does not support custom saver')
-      if self._scaffold.init_fn is not None:
-        raise ValueError('InMemoryEvaluator does not support custom init_fn')
-
-      self._var_name_to_eval_var = {
-          v.name: v for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-      }
-      self._var_name_to_placeholder = {
-          v.name: array_ops.placeholder(v.dtype)
-          for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-      }
-
-  def after_create_session(self, session, coord):  # pylint: disable=unused-argument
-    """Does first run which shows the eval metrics before training."""
-    if ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS):
-      raise ValueError(
-          'InMemoryEvaluator does not support saveables other than global '
-          'variables.')
-    self._var_name_to_train_var = {
-        v.name: v for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    }
-    var_names_to_transfer = set(self._var_name_to_placeholder.keys()) & set(
-        self._var_name_to_train_var.keys())
-    # Filter training var names that are not exist in evaluation
-    self._var_name_to_train_var = {
-        v_name: self._var_name_to_train_var[v_name]
-        for v_name in var_names_to_transfer
-    }
-    # Filter eval var names that are not exist in training
-    self._var_name_to_eval_var = {
-        v_name: self._var_name_to_eval_var[v_name]
-        for v_name in var_names_to_transfer
-    }
-
-    with self._graph.as_default():
-      self._var_feed_op = control_flow_ops.group([
-          state_ops.assign(self._var_name_to_eval_var[v_name],
-                           self._var_name_to_placeholder[v_name])
-          for v_name in var_names_to_transfer
-      ])
-
-    self._evaluate(session)
-
-  def _evaluate(self, train_session):
-    var_name_to_value = train_session.run(self._var_name_to_train_var)
-    placeholder_to_value = {
-        self._var_name_to_placeholder[v_name]: var_name_to_value[v_name]
-        for v_name in var_name_to_value
-    }
-
-    def feed_variables(scaffold, session):
-      del scaffold
-      session.run(self._var_feed_op, feed_dict=placeholder_to_value)
-
-    scaffold = training.Scaffold(
-        init_fn=feed_variables, copy_from_scaffold=self._scaffold)
-
-    with self._graph.as_default():
-      self._estimator._evaluate_run(
-          checkpoint_path=None,
-          scaffold=scaffold,
-          update_op=self._update_op,
-          eval_dict=self._eval_dict,
-          all_hooks=self._all_hooks,
-          output_dir=self._eval_dir)
-
-    self._timer.update_last_triggered_step(self._iter_count)
-
-  def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
-    """Runs evaluator."""
-    self._iter_count += 1
-    if self._timer.should_trigger_for_step(self._iter_count):
-      self._evaluate(run_context.session)
-
-  def end(self, session):  # pylint: disable=unused-argument
-    """Runs evaluator for final model."""
-    self._evaluate(session)
-
-
-class _StopAtCheckpointStepHook(training.SessionRunHook):
-  """Hook that requests stop at a specified step based on checkpoint.
-
-  Note: We recommend using 'make_stop_at_checkpoint_step_hook` to get the proper
-  hook.
-  """
-
-  def __init__(self, model_dir, last_step,
-               wait_after_file_check_secs=30):
-    """Initializes a `StopAtCheckpointStepHook`.
-
-    This hook requests stop after a last step has been reached. It checks latest
-    checkpoint to verify last step is written on disk or not.
-
-    Args:
-      model_dir: Directory to read global step from latest checkpoint.
-      last_step: Step after which to stop.
-      wait_after_file_check_secs: Reading same file by many workers may create
-      I/O issues. To throttle that we will wait given secs after each read of
-      the file.
-
-    Raises:
-      ValueError: If one of the arguments is invalid.
-    """
-    if last_step is None:
-      raise ValueError('last_step must be specified.')
-    if model_dir is None:
-      raise ValueError('model_dir must be specified.')
-
-    self._model_dir = model_dir
-    self._last_step = last_step
-    self._wait_after_file_check_secs = wait_after_file_check_secs
-
-  def begin(self):
-    self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
-    if self._global_step_tensor is None:
-      raise RuntimeError(
-          'Global step should be created to use StopAtCheckpointStepHook.')
-
-  def before_run(self, run_context):  # pylint: disable=unused-argument
-    return training.SessionRunArgs(self._global_step_tensor)
-
-  def after_run(self, run_context, run_values):
-    global_step = run_values.results + 1
-    if global_step >= self._last_step:
-      # Check latest global step in the checkpoint to ensure that the targeted
-      # last step is written on disk.
-
-      step = estimator_lib._load_global_step_from_checkpoint_dir(
-          self._model_dir)
-      if step >= self._last_step:
-        run_context.request_stop()
-      else:
-        time.sleep(self._wait_after_file_check_secs)
-
-
-def make_stop_at_checkpoint_step_hook(estimator,
-                                      last_step,
-                                      wait_after_file_check_secs=30):
-  """Creates a proper StopAtCheckpointStepHook based on chief status."""
+from tensorflow_estimator.contrib.estimator.python.estimator import hooks
 
-  if estimator.config.is_chief:
-    return training.StopAtStepHook(last_step=last_step)
-  return _StopAtCheckpointStepHook(
-      model_dir=estimator.model_dir,
-      last_step=last_step,
-      wait_after_file_check_secs=wait_after_file_check_secs)
+# Include attrs that start with single underscore.
+hooks.__all__ = [s for s in dir(hooks) if not s.startswith('__')]
 
-# pylint: enable=protected-access
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.hooks import *
diff --git a/tensorflow/contrib/estimator/python/estimator/hooks_test.py b/tensorflow/contrib/estimator/python/estimator/hooks_test.py
deleted file mode 100644
index 62ffad56da..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/hooks_test.py
+++ /dev/null
@@ -1,403 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for hooks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import glob
-import json
-import os
-import tempfile
-import time
-
-from tensorflow.contrib.estimator.python.estimator import hooks as hooks_lib
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator_lib
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.summary import summary_iterator
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import training
-
-
-def summary_step_keyword_to_value_mapping(dir_):
-  writer_cache.FileWriterCache.clear()
-
-  # Get last Event written.
-  event_paths = glob.glob(os.path.join(dir_, 'events*'))
-  step_keyword_to_value = {}
-  for last_event in summary_iterator.summary_iterator(event_paths[-1]):
-    if last_event.step not in step_keyword_to_value:
-      step_keyword_to_value[last_event.step] = {}
-    if last_event.summary is not None:
-      for value in last_event.summary.value:
-        step_keyword_to_value[last_event.step][value.tag] = value.simple_value
-
-  return step_keyword_to_value
-
-
-def get_summary_value(dir_, step, keyword):
-  """Get summary value for given step and keyword."""
-
-  writer_cache.FileWriterCache.clear()
-  # Get last Event written.
-  event_paths = glob.glob(os.path.join(dir_, 'events*'))
-  print('XXX', event_paths)
-  for last_event in summary_iterator.summary_iterator(event_paths[-1]):
-    if last_event.step == step and last_event.summary is not None:
-      for value in last_event.summary.value:
-        if keyword in value.tag:
-          return value.simple_value
-  return None
-
-
-class InMemoryEvaluatorHookTest(test.TestCase):
-
-  def test_runs_eval_metrics(self):
-
-    def model_fn(features, labels, mode):
-      _ = labels
-      if estimator_lib.ModeKeys.TRAIN == mode:
-        with ops.control_dependencies([features]):
-          train_op = state_ops.assign_add(training.get_global_step(), 1)
-        return estimator_lib.EstimatorSpec(
-            mode, loss=constant_op.constant(3.), train_op=train_op)
-      if estimator_lib.ModeKeys.EVAL == mode:
-        return estimator_lib.EstimatorSpec(
-            mode,
-            loss=constant_op.constant(5.),
-            eval_metric_ops={'mean_of_features': metrics_lib.mean(features)})
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-
-    def input_fn():
-      return dataset_ops.Dataset.range(10)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(
-        estimator, input_fn, every_n_iter=4)
-    estimator.train(input_fn, hooks=[evaluator])
-
-    self.assertTrue(os.path.isdir(estimator.eval_dir()))
-    step_keyword_to_value = summary_step_keyword_to_value_mapping(
-        estimator.eval_dir())
-
-    # 4.5 = sum(range(10))/10
-    # before training
-    self.assertEqual(4.5, step_keyword_to_value[0]['mean_of_features'])
-    # intervals (every_n_iter=4)
-    self.assertEqual(4.5, step_keyword_to_value[4]['mean_of_features'])
-    self.assertEqual(4.5, step_keyword_to_value[8]['mean_of_features'])
-    # end
-    self.assertEqual(4.5, step_keyword_to_value[10]['mean_of_features'])
-    self.assertEqual(set([0, 4, 8, 10]), set(step_keyword_to_value.keys()))
-
-  def test_uses_latest_variable_value(self):
-
-    def model_fn(features, labels, mode):
-      _ = labels
-      step = training.get_global_step()
-      w = variable_scope.get_variable(
-          'w',
-          shape=[],
-          initializer=init_ops.zeros_initializer(),
-          dtype=dtypes.int64)
-      if estimator_lib.ModeKeys.TRAIN == mode:
-        # to consume features, we have control dependency
-        with ops.control_dependencies([features]):
-          step_inc = state_ops.assign_add(training.get_global_step(), 1)
-        with ops.control_dependencies([step_inc]):
-          assign_w_to_step_plus_2 = w.assign(step + 2)
-        return estimator_lib.EstimatorSpec(
-            mode,
-            loss=constant_op.constant(3.),
-            train_op=assign_w_to_step_plus_2)
-      if estimator_lib.ModeKeys.EVAL == mode:
-        # to consume features, we have control dependency
-        with ops.control_dependencies([features]):
-          loss = constant_op.constant(5.)
-        return estimator_lib.EstimatorSpec(
-            mode,
-            loss=loss,
-            # w is constant in each step, so the mean.
-            # w = 0 if step==0 else step+2
-            eval_metric_ops={'mean_of_const': metrics_lib.mean(w)})
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-
-    def input_fn():
-      return dataset_ops.Dataset.range(10)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(
-        estimator, input_fn, every_n_iter=4)
-    estimator.train(input_fn, hooks=[evaluator])
-
-    self.assertTrue(os.path.isdir(estimator.eval_dir()))
-    step_keyword_to_value = summary_step_keyword_to_value_mapping(
-        estimator.eval_dir())
-    # w = 0 if step==0 else step+2
-    self.assertEqual(0, step_keyword_to_value[0]['mean_of_const'])
-    self.assertEqual(6, step_keyword_to_value[4]['mean_of_const'])
-    self.assertEqual(12, step_keyword_to_value[10]['mean_of_const'])
-
-  def test_dnn_classifier(self):
-    embedding = feature_column_lib.embedding_column(
-        feature_column_lib.categorical_column_with_vocabulary_list(
-            'wire_cast', ['kima', 'omar', 'stringer']), 8)
-    dnn = estimator_lib.DNNClassifier(
-        feature_columns=[embedding], hidden_units=[3, 1])
-
-    def train_input_fn():
-      return dataset_ops.Dataset.from_tensors(({
-          'wire_cast': [['omar'], ['kima']]
-      }, [[0], [1]])).repeat(3)
-
-    def eval_input_fn():
-      return dataset_ops.Dataset.from_tensors(({
-          'wire_cast': [['stringer'], ['kima']]
-      }, [[0], [1]])).repeat(2)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(
-        dnn, eval_input_fn, name='in-memory')
-    dnn.train(train_input_fn, hooks=[evaluator])
-    self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory')))
-    step_keyword_to_value = summary_step_keyword_to_value_mapping(
-        dnn.eval_dir('in-memory'))
-
-    final_metrics = dnn.evaluate(eval_input_fn)
-    step = final_metrics[ops.GraphKeys.GLOBAL_STEP]
-    for summary_tag in final_metrics:
-      if summary_tag == ops.GraphKeys.GLOBAL_STEP:
-        continue
-      self.assertEqual(final_metrics[summary_tag],
-                       step_keyword_to_value[step][summary_tag])
-
-  def test_raise_error_with_multi_worker(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    with test.mock.patch.dict('os.environ',
-                              {'TF_CONFIG': json.dumps(tf_config)}):
-      dnn = estimator_lib.DNNClassifier(
-          feature_columns=[feature_column_lib.numeric_column('x')],
-          hidden_units=[3, 1])
-
-    def eval_input_fn():
-      pass
-
-    with self.assertRaisesRegexp(ValueError, 'supports only single machine'):
-      hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
-
-  def test_raise_error_with_ps(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    with test.mock.patch.dict('os.environ',
-                              {'TF_CONFIG': json.dumps(tf_config)}):
-      dnn = estimator_lib.DNNClassifier(
-          feature_columns=[feature_column_lib.numeric_column('x')],
-          hidden_units=[3, 1])
-
-    def eval_input_fn():
-      pass
-
-    with self.assertRaisesRegexp(ValueError, 'supports only single machine'):
-      hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
-
-  def test_raise_error_with_custom_saver_in_eval(self):
-
-    def model_fn(features, labels, mode):
-      _, _ = features, labels
-      return estimator_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(3.),
-          scaffold=training.Scaffold(saver=training.Saver()),
-          train_op=constant_op.constant(5.),
-          eval_metric_ops={
-              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
-          })
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-
-    def input_fn():
-      return dataset_ops.Dataset.range(10)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
-    with self.assertRaisesRegexp(ValueError, 'does not support custom saver'):
-      evaluator.begin()
-
-  def test_raise_error_with_custom_init_fn_in_eval(self):
-
-    def model_fn(features, labels, mode):
-      _, _ = features, labels
-
-      def init_fn(scaffold, session):
-        _, _ = scaffold, session
-
-      return estimator_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(3.),
-          scaffold=training.Scaffold(init_fn=init_fn),
-          train_op=constant_op.constant(5.),
-          eval_metric_ops={
-              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
-          })
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-
-    def input_fn():
-      return dataset_ops.Dataset.range(10)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
-    with self.assertRaisesRegexp(ValueError, 'does not support custom init_fn'):
-      evaluator.begin()
-
-  def test_raise_error_with_saveables_other_than_global_variables(self):
-
-    def model_fn(features, labels, mode):
-      _, _ = features, labels
-      w = variables.VariableV1(
-          initial_value=[0.],
-          trainable=False,
-          collections=[ops.GraphKeys.SAVEABLE_OBJECTS])
-      init_op = control_flow_ops.group(
-          [w.initializer, training.get_global_step().initializer])
-      return estimator_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(3.),
-          scaffold=training.Scaffold(init_op=init_op),
-          train_op=constant_op.constant(5.),
-          eval_metric_ops={
-              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
-          })
-
-    estimator = estimator_lib.Estimator(model_fn=model_fn)
-
-    def input_fn():
-      return dataset_ops.Dataset.range(10)
-
-    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
-    with self.assertRaisesRegexp(ValueError, 'does not support saveables'):
-      estimator.train(input_fn, hooks=[evaluator])
-
-
-class StopAtCheckpointStepHookTest(test.TestCase):
-
-  def test_do_not_stop_if_checkpoint_is_not_there(self):
-    with ops.Graph().as_default():
-      step = training.create_global_step()
-      assign_ten = step.assign(10)
-      no_op = control_flow_ops.no_op()
-      hook = hooks_lib._StopAtCheckpointStepHook(
-          model_dir=tempfile.mkdtemp(), last_step=10)
-      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.raw_session().run(assign_ten)
-        with test.mock.patch.object(time, 'sleep') as mock_sleep:
-          mon_sess.run(no_op)
-          self.assertTrue(mock_sleep.called)
-        self.assertFalse(mon_sess.should_stop())
-
-  def test_do_not_stop_if_checkpoint_step_is_smaller(self):
-    model_dir = tempfile.mkdtemp()
-    with ops.Graph().as_default():
-      step = training.create_global_step()
-      assign_nine = step.assign(9)
-      assign_ten = step.assign(10)
-      no_op = control_flow_ops.no_op()
-      hook = hooks_lib._StopAtCheckpointStepHook(
-          model_dir=model_dir, last_step=10)
-      with tf_session.Session() as sess:
-        sess.run(assign_nine)
-        training.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.raw_session().run(assign_ten)
-        with test.mock.patch.object(time, 'sleep') as mock_sleep:
-          mon_sess.run(no_op)
-          self.assertTrue(mock_sleep.called)
-        self.assertFalse(mon_sess.should_stop())
-
-  def test_stop_if_checkpoint_step_is_laststep(self):
-    model_dir = tempfile.mkdtemp()
-    with ops.Graph().as_default():
-      step = training.create_global_step()
-      assign_ten = step.assign(10)
-      no_op = control_flow_ops.no_op()
-      hook = hooks_lib._StopAtCheckpointStepHook(
-          model_dir=model_dir, last_step=10)
-      with tf_session.Session() as sess:
-        sess.run(assign_ten)
-        training.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
-        mon_sess.raw_session().run(assign_ten)
-        with test.mock.patch.object(time, 'sleep') as mock_sleep:
-          mon_sess.run(no_op)
-          self.assertFalse(mock_sleep.called)
-        self.assertTrue(mon_sess.should_stop())
-
-  def test_creates_regular_stop_at_step_hook_for_chief(self):
-    # by default an estimator is in chief mode
-    dnn = estimator_lib.DNNClassifier(
-        feature_columns=[feature_column_lib.numeric_column('x')],
-        hidden_units=[3, 1])
-    hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300)
-    self.assertIsInstance(hook, training.StopAtStepHook)
-    self.assertEqual(300, hook._last_step)
-
-  def test_creates_checkpoint_hook_for_workers(self):
-
-    class FakeWorkerConfig(estimator_lib.RunConfig):
-
-      @property
-      def is_chief(self):
-        return False
-
-    dnn = estimator_lib.DNNClassifier(
-        feature_columns=[feature_column_lib.numeric_column('x')],
-        hidden_units=[3, 1],
-        config=FakeWorkerConfig())
-    hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300)
-    self.assertIsInstance(hook, hooks_lib._StopAtCheckpointStepHook)
-    self.assertEqual(300, hook._last_step)
-    self.assertEqual(dnn.model_dir, hook._model_dir)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/linear.py b/tensorflow/contrib/estimator/python/estimator/linear.py
index 2b68f24eb2..1c32255ba0 100644
--- a/tensorflow/contrib/estimator/python/estimator/linear.py
+++ b/tensorflow/contrib/estimator/python/estimator/linear.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,127 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Linear estimator."""
+"""linear python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import linear as linear_lib
-
-
-class LinearEstimator(estimator.Estimator):
-  """An estimator for TensorFlow linear models with user-specified head.
-
-  Example:
-
-  ```python
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-
-  # Estimator using the default optimizer.
-  estimator = LinearEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b])
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = LinearEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      optimizer=lambda: tf.train.FtrlOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator using the FTRL optimizer with regularization.
-  estimator = LinearEstimator(
-      head=tf.contrib.estimator.multi_label_head(n_classes=3),
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b])
-      optimizer=tf.train.FtrlOptimizer(
-          learning_rate=0.1,
-          l1_regularization_strength=0.001
-      ))
-
-  def input_fn_train: # returns x, y (where y represents label's class index).
-    ...
-  estimator.train(input_fn=input_fn_train, steps=100)
-  def input_fn_eval: # returns x, y (where y represents label's class index).
-    ...
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    ...
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss and predicted output are determined by the specified head.
-
-  @compatibility(eager)
-  Estimators are not compatible with eager execution.
-  @end_compatibility
-  """
+from tensorflow_estimator.contrib.estimator.python.estimator import linear
 
-  def __init__(self,
-               head,
-               feature_columns,
-               model_dir=None,
-               optimizer='Ftrl',
-               config=None,
-               partitioner=None,
-               sparse_combiner='sum'):
-    """Initializes a `LinearEstimator` instance.
+# Include attrs that start with single underscore.
+linear.__all__ = [s for s in dir(linear) if not s.startswith('__')]
 
-    Args:
-      head: A `_Head` instance constructed with a method such as
-        `tf.contrib.estimator.multi_label_head`.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to FTRL optimizer.
-      config: `RunConfig` object to configure the runtime settings.
-      partitioner: Optional. Partitioner for input layer.
-      sparse_combiner: A string specifying how to reduce if a categorical column
-        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
-        effectively different ways to do example-level normalization, which can
-        be useful for bag-of-words features. for more details, see
-        `tf.feature_column.linear_model`.
-    """
-    def _model_fn(features, labels, mode, config):
-      return linear_lib._linear_model_fn(  # pylint: disable=protected-access
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          partitioner=partitioner,
-          config=config,
-          sparse_combiner=sparse_combiner)
-    super(LinearEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.linear import *
diff --git a/tensorflow/contrib/estimator/python/estimator/linear_test.py b/tensorflow/contrib/estimator/python/estimator/linear_test.py
deleted file mode 100644
index c41996b9c6..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/linear_test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for linear.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.contrib.estimator.python.estimator import linear
-from tensorflow.python.estimator.canned import linear_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-
-
-def _linear_estimator_fn(
-    weight_column=None, label_dimension=1, *args, **kwargs):
-  """Returns a LinearEstimator that uses regression_head."""
-  return linear.LinearEstimator(
-      head=head_lib.regression_head(
-          weight_column=weight_column, label_dimension=label_dimension,
-          # Tests in core (from which this test inherits) test the sum loss.
-          loss_reduction=losses.Reduction.SUM),
-      *args, **kwargs)
-
-
-class LinearEstimatorEvaluateTest(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_estimator_fn)
-
-
-class LinearEstimatorPredictTest(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_estimator_fn)
-
-
-class LinearEstimatorTrainTest(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_estimator_fn)
-
-
-class LinearEstimatorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(
-      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
-    est = linear.LinearEstimator(
-        head=head_lib.regression_head(label_dimension=label_dimension),
-        feature_columns=feature_columns,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/logit_fns.py b/tensorflow/contrib/estimator/python/estimator/logit_fns.py
index c8b0dd6297..7eba28dc57 100644
--- a/tensorflow/contrib/estimator/python/estimator/logit_fns.py
+++ b/tensorflow/contrib/estimator/python/estimator/logit_fns.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,85 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Aliases for logit_fn builders used by canned (core) tf.Estimator's.
+"""logit_fns python module.
 
-A logit_fn is an abstraction within model_fn that factors out the logit
-construction logic.  Its output can be fed into Heads or otherwise composed.  It
-should follow the following signature:
-
-Args:
-`features`: This is the first item returned from the `input_fn` passed to
-            `train`, `evaluate`, and `predict`. This should be a single
-            `Tensor` or `dict` of same, and is the only required argument.
-`mode`: Optional. Specifies if this training, evaluation or prediction. See
-        `ModeKeys`.
-`params`: Optional `dict` of hyperparameters.  Will receive what is passed to
-          Estimator in `params` parameter. This allows configuration of
-          Estimators from hyperparameter tuning.
-`config`: Optional configuration object. Will receive what is passed to
-          Estimator in `config` parameter, or the default `config`. Allows
-          updating things in your model_fn based on configuration such as
-          `num_ps_replicas`, or `model_dir`.
-
-Returns:
-    A Tensor representing the logits.
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator.canned import dnn as dnn_core
-from tensorflow.python.estimator.canned import linear as linear_core
-from tensorflow.python.framework import ops
-from tensorflow.python.util import function_utils
-
-# pylint: disable=protected-access
-dnn_logit_fn_builder = dnn_core._dnn_logit_fn_builder
-linear_logit_fn_builder = linear_core._linear_logit_fn_builder
-# pylint: enable=protected-access
-
-
-def call_logit_fn(logit_fn, features, mode, params, config):
-  """Calls logit_fn.
-
-  A utility function that calls the provided logit_fn with the relevant subset
-  of provided arguments.  Similar to tf.estimator._call_model_fn().
-
-  Args:
-    logit_fn: A logit_fn as defined above.
-    features: The features dict.
-    mode: TRAIN / EVAL / PREDICT ModeKeys.
-    params: The hyperparameter dict.
-    config: The configuration object.
-
-  Returns:
-    A logit Tensor, the output of logit_fn.
-
-  Raises:
-    ValueError: if logit_fn does not return a Tensor or a dictionary mapping
-      strings to Tensors.
-  """
-  logit_fn_args = function_utils.fn_args(logit_fn)
-  kwargs = {}
-  if 'mode' in logit_fn_args:
-    kwargs['mode'] = mode
-  if 'params' in logit_fn_args:
-    kwargs['params'] = params
-  if 'config' in logit_fn_args:
-    kwargs['config'] = config
-  logit_fn_results = logit_fn(features=features, **kwargs)
-
-  result_is_valid_dictionary = (
-      isinstance(logit_fn_results, dict) and
-      all([(isinstance(k, six.string_types) and isinstance(v, ops.Tensor))
-           for k, v in six.iteritems(logit_fn_results)]))
-  result_is_tensor = isinstance(logit_fn_results, ops.Tensor)
+from tensorflow_estimator.contrib.estimator.python.estimator import logit_fns
 
-  if not (result_is_valid_dictionary or result_is_tensor):
-    raise ValueError('logit_fn should return a Tensor or a dictionary mapping '
-                     'strings to Tensors.  logit_fn returned: %s' %
-                     logit_fn_results)
+# Include attrs that start with single underscore.
+logit_fns.__all__ = [s for s in dir(logit_fns) if not s.startswith('__')]
 
-  return logit_fn_results
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.logit_fns import *
diff --git a/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py b/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
deleted file mode 100644
index 074ece6cca..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""logit_fn tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.estimator.python.estimator import logit_fns
-from tensorflow.python.client import session
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.framework import constant_op
-from tensorflow.python.platform import test
-
-
-class LogitFnTest(test.TestCase):
-
-  def test_simple_call_logit_fn(self):
-    def dummy_logit_fn(features, mode):
-      if mode == model_fn.ModeKeys.TRAIN:
-        return features['f1']
-      else:
-        return features['f2']
-    features = {
-        'f1': constant_op.constant([[2., 3.]]),
-        'f2': constant_op.constant([[4., 5.]])
-    }
-    logit_fn_result = logit_fns.call_logit_fn(
-        dummy_logit_fn, features, model_fn.ModeKeys.EVAL, 'fake_params',
-        'fake_config')
-    with session.Session():
-      self.assertAllClose([[4., 5.]], logit_fn_result.eval())
-
-  def test_simple_call_multi_logit_fn(self):
-
-    def dummy_logit_fn(features):
-      return {u'head1': features['f1'], 'head2': features['f2']}
-
-    features = {
-        'f1': constant_op.constant([[2., 3.]]),
-        'f2': constant_op.constant([[4., 5.]])
-    }
-    logit_fn_result = logit_fns.call_logit_fn(dummy_logit_fn, features,
-                                              model_fn.ModeKeys.TRAIN,
-                                              'fake_params', 'fake_config')
-    with session.Session():
-      self.assertAllClose([[2., 3.]], logit_fn_result['head1'].eval())
-      self.assertAllClose([[4., 5.]], logit_fn_result['head2'].eval())
-
-  def test_invalid_logit_fn_results(self):
-
-    def invalid_logit_fn(features, params):
-      return [
-          features['f1'] * params['input_multiplier'],
-          features['f2'] * params['input_multiplier']
-      ]
-
-    features = {
-        'f1': constant_op.constant([[2., 3.]]),
-        'f2': constant_op.constant([[4., 5.]])
-    }
-    params = {'learning_rate': 0.001, 'input_multiplier': 2.0}
-    with self.assertRaisesRegexp(
-        ValueError, 'logit_fn should return a Tensor or a dictionary mapping '
-                    'strings to Tensors'):
-      logit_fns.call_logit_fn(invalid_logit_fn, features, 'fake_mode', params,
-                              'fake_config')
-
-  def test_invalid_logit_fn_results_dict(self):
-
-    def invalid_logit_fn(features):
-      return {'head1': features['f1'], 'head2': features['f2']}
-
-    features = {'f1': constant_op.constant([[2., 3.]]), 'f2': 'some string'}
-    with self.assertRaisesRegexp(
-        ValueError, 'logit_fn should return a Tensor or a dictionary mapping '
-                    'strings to Tensors'):
-      logit_fns.call_logit_fn(invalid_logit_fn, features, 'fake_mode',
-                              'fake_params', 'fake_config')
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py
index 6e793c8302..1d8a065299 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,413 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Abstractions for the head(s) of a model.
+"""multi_head python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.export import export_output as export_output_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.summary import summary
-from tensorflow.python.training import training_util
-
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def multi_head(heads, head_weights=None):
-  """Creates a `_Head` for multi-objective learning.
-
-  This class merges the output of multiple `_Head` objects.
-  Specifically:
-  * For training, sums losses of each head, calls `train_op_fn` with this
-    final loss.
-  * For eval, merges metrics by adding `head.name` suffix to the keys in eval
-    metrics, such as `precision/head1`, `precision/head2`.
-  * For prediction, merges predictions and updates keys in prediction dict to a
-    2-tuple, `(head.name, prediction_key)`. Merges `export_outputs` such that
-    by default the first head is served.
-
-  Usage:
-
-  ```python
-  # In `input_fn` specify labels as a dict keyed by head name:
-  def input_fn():
-    features = ...
-    labels1 = ...
-    labels2 = ...
-    return features, {'head1': labels1, 'head2': labels2}
-
-  # In `model_fn`, specify logits as a dict keyed by head name:
-  def model_fn(features, labels, mode):
-    # Create simple heads and specify head name.
-    head1 = multi_class_head(n_classes=3, name='head1')
-    head2 = binary_classification_head(name='head2')
-    # Create multi-head from two simple heads.
-    head = multi_head([head1, head2])
-    # Create logits for each head, and combine them into a dict.
-    logits1, logits2 = logit_fn()
-    logits = {'head1': logits1, 'head2': logits2}
-    # Return the merged EstimatorSpec
-    return head.create_estimator_spec(..., logits=logits, ...)
-
-  # Create an estimator with this model_fn.
-  estimator = tf.estimator.Estimator(model_fn=model_fn)
-  estimator.train(input_fn=input_fn, steps=100)
-  ```
-
-  Also supports `logits` as a `Tensor` of shape
-  `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the
-  last dimension and distribute it appropriately among the heads. E.g.:
-
-  ```python
-  def model_fn(features, labels, mode):
-    # Create simple heads and specify head name.
-    head1 = multi_class_head(n_classes=3, name='head1')
-    head2 = binary_classification_head(name='head2')
-    # Create multi-head from two simple heads.
-    head = multi_head([head1, head2])
-    # Create logits for the multihead.
-    logits = logit_fn(logits_dimension=head.logits_dimension)
-    # Return the merged EstimatorSpec
-    return head.create_estimator_spec(..., logits=logits, ...)
-  ```
-
-  Args:
-    heads: List or tuple of `_Head` instances. All heads must have `name`
-      specified. The first head in the list is the default used at serving time.
-    head_weights: Optional list of weights, same length as `heads`. Used when
-      merging losses to calculate the weighted sum of losses from each head. If
-      `None`, all losses are weighted equally.
-
-  Returns:
-    A instance of `_Head` that merges multiple heads.
-
-  Raises:
-    ValueError: If `heads` is empty.
-    ValueError: If any of the `heads` does not have `name` specified.
-    ValueError: If `heads` and `head_weights` have different size.
-  """
-  if head_weights:
-    if len(head_weights) != len(heads):
-      raise ValueError(
-          'heads and head_weights must have the same size. '
-          'Given len(heads): {}. Given len(head_weights): {}.'.format(
-              len(heads), len(head_weights)))
-  if not heads:
-    raise ValueError('Must specify heads. Given: {}'.format(heads))
-  for head in heads:
-    if not head.name:
-      raise ValueError(
-          'All given heads must have name specified. '
-          'Given: {}'.format(head))
-
-  return _MultiHead(
-      heads=tuple(heads),
-      head_weights=tuple(head_weights) if head_weights else tuple())
-
-
-def _no_op_train_fn(loss):
-  del loss
-  return control_flow_ops.no_op()
-
-
-def _merge_losses(losses, head_weights=None):
-  """Merges the given losses into one tensor."""
-  losses = tuple(losses)
-  with ops.name_scope(
-      'merge_losses', values=losses + (head_weights or tuple())):
-    if head_weights:
-      weighted_losses = []
-      for loss, weight in zip(losses, head_weights):
-        weighted_losses.append(math_ops.multiply(loss, weight))
-    else:
-      weighted_losses = losses
-    return math_ops.add_n(weighted_losses)
-
-
-def _default_export_output(export_outputs, head_name):
-  """Extracts the default export output from the given export_outputs dict."""
-  if len(export_outputs) == 1:
-    return next(six.itervalues(export_outputs))
-  for k, v in six.iteritems(export_outputs):
-    if k == _DEFAULT_SERVING_KEY:
-      return v
-  raise ValueError(
-      '{} did not specify default export_outputs. '
-      'Given: {} '
-      'Suggested fix: Use one of the heads in tf.contrib.estimator, or include '
-      'key {} in export_outputs.'.format(
-          head_name, export_outputs, _DEFAULT_SERVING_KEY))
-
-
-class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
-  """`_Head` for multi objective learning."""
-
-  def __init__(self, heads, head_weights):
-    self._logits_dimension = 0
-    for head in heads:
-      self._logits_dimension += head.logits_dimension
-
-    self._heads = heads
-    self._head_weights = head_weights
-
-  @property
-  def name(self):
-    return '_'.join([h.name for h in self._heads])
-
-  @property
-  def logits_dimension(self):
-    return self._logits_dimension
-
-  def create_loss(self, features, mode, logits, labels):
-    """See `Head`."""
-    if isinstance(logits, dict):
-      logits_dict = logits
-    else:
-      logits_dict = self._split_logits(logits)
-    training_losses = []
-    labels_by_head = {}
-    unreduced_losses_by_head = {}
-    example_weights_by_head = {}
-    for i, head in enumerate(self._heads):
-      (training_loss, unreduced_loss,
-       weights, processed_labels) = head.create_loss(
-           features, mode, logits_dict[head.name], labels[head.name])
-      training_losses.append(training_loss)
-      labels_by_head[head.name] = processed_labels
-      if self._head_weights:
-        head_weight = self._head_weights[i]
-        unreduced_losses_by_head[head.name] = math_ops.multiply(
-            unreduced_loss, head_weight)
-        example_weights_by_head[head.name] = math_ops.multiply(
-            weights, head_weight)
-      else:
-        unreduced_losses_by_head[head.name] = unreduced_loss
-        example_weights_by_head[head.name] = weights
-
-    training_losses = tuple(training_losses)
-    with ops.name_scope(
-        'merge_losses',
-        values=training_losses + (self._head_weights or tuple())):
-      if self._head_weights:
-        head_weighted_training_losses = []
-        for training_loss, head_weight in zip(
-            training_losses, self._head_weights):
-          head_weighted_training_losses.append(
-              math_ops.multiply(training_loss, head_weight))
-        merged_training_loss = math_ops.add_n(head_weighted_training_losses)
-      else:
-        merged_training_loss = math_ops.add_n(training_losses)
-
-    return head_lib.LossSpec(
-        training_loss=merged_training_loss,
-        unreduced_loss=unreduced_losses_by_head,
-        weights=example_weights_by_head,
-        processed_labels=labels_by_head)
-
-  # TODO(b/65403806): Support regularization_losses arg.
-  def create_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None):
-    """See `_Head`."""
-    return self._create_estimator_spec(
-        features=features, mode=mode, logits=logits, labels=labels,
-        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=False)
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None):
-    """See `_Head`."""
-    return self._create_estimator_spec(
-        features=features, mode=mode, logits=logits, labels=labels,
-        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=True)
-
-  def _create_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, use_tpu=False):
-    """Returns `EstimatorSpec` or `TPUEstimatorSpec`."""
-    if isinstance(logits, dict):
-      logits_dict = logits
-    else:
-      logits_dict = self._split_logits(logits)
-    if labels and not isinstance(labels, dict):
-      raise ValueError('labels must be a dict. Given: {}'.format(labels))
-
-    all_estimator_spec = []
-    for head in self._heads:
-      head_name = head.name
-      all_estimator_spec.append(
-          head.create_estimator_spec(
-              features=features,
-              mode=mode,
-              logits=logits_dict[head_name],
-              labels=labels[head_name] if labels else None,
-              train_op_fn=_no_op_train_fn))
-
-    if mode == model_fn.ModeKeys.TRAIN:
-      spec = self._merge_train(
-          all_estimator_spec=all_estimator_spec,
-          optimizer=optimizer,
-          train_op_fn=train_op_fn,
-          use_tpu=use_tpu)
-      with ops.name_scope(''):
-        summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss)
-      return spec
-    if mode == model_fn.ModeKeys.PREDICT:
-      return self._merge_predict(all_estimator_spec, use_tpu=use_tpu)
-    if mode == model_fn.ModeKeys.EVAL:
-      return self._merge_eval(all_estimator_spec, use_tpu=use_tpu)
-    raise ValueError('mode={} unrecognized'.format(mode))
-
-  def _split_logits(self, logits):
-    """Splits logits along the last dimension and returns a dict."""
-    logits_dict = {}
-    with ops.name_scope(None, 'split_logits', values=[logits]):
-      logits = ops.convert_to_tensor(logits)
-      batch_shape = array_ops.shape(logits)[:-1]
-      zeros_like_batch_shape = array_ops.zeros_like(batch_shape)
-      minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape)
-      begin_idx = 0
-      for head in self._heads:
-        begin_tensor = array_ops.concat(
-            [zeros_like_batch_shape, [begin_idx]], axis=0)
-        size_tensor = array_ops.concat(
-            [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0)
-        logits_dict[head.name] = array_ops.slice(
-            logits, begin=begin_tensor, size=size_tensor)
-        begin_idx += head.logits_dimension
-    return logits_dict
-
-  def _merge_train(
-      self, all_estimator_spec, optimizer, train_op_fn, use_tpu=False):
-    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for training.
-
-    Args:
-      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
-        individual heads.
-      optimizer: `Optimizer` instance to create train op. See
-        `create_estimator_spec` documentation for more details.
-      train_op_fn: Function to create train op. Used if `optimizer` is `None`.
-      use_tpu: If `True`, returns `TPUEstimatorSpec`.
-
-    Returns:
-      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for TRAIN.
-
-    Raises:
-      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
-        mode.
-    """
-    losses = []
-    for spec in all_estimator_spec:
-      losses.append(spec.loss)
-    loss = _merge_losses(losses, self._head_weights)
-    if optimizer is not None:
-      if train_op_fn is not None:
-        raise ValueError('train_op_fn and optimizer cannot both be set.')
-      train_op = optimizer.minimize(
-          loss, global_step=training_util.get_global_step())
-    elif train_op_fn is not None:
-      train_op = train_op_fn(loss)
-    else:
-      raise ValueError('train_op_fn and optimizer cannot both be None.')
-
-    spec_type = (
-        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
-    return spec_type(
-        mode=model_fn.ModeKeys.TRAIN,
-        loss=loss,
-        train_op=train_op)
-
-  def _merge_predict(self, all_estimator_spec, use_tpu=False):
-    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for prediction.
-
-    Args:
-      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
-        individual heads.
-      use_tpu: If `True`, returns `TPUEstimatorSpec`.
-
-    Returns:
-      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for PREDICT.
-    """
-    predictions = {}
-    export_outputs = {
-        _DEFAULT_SERVING_KEY: _default_export_output(
-            all_estimator_spec[0].export_outputs,
-            self._heads[0].name),
-    }
-    merged_predict_outputs = {}
-    for head, spec in zip(self._heads, all_estimator_spec):
-      head_name = head.name
-      for k, v in six.iteritems(spec.export_outputs):
-        if k == _DEFAULT_SERVING_KEY:
-          key = head_name
-        else:
-          key = '%s/%s' % (head_name, k)
-        export_outputs[key] = v
-        if (k == head_lib._PREDICT_SERVING_KEY and  # pylint:disable=protected-access
-            isinstance(v, export_output_lib.PredictOutput)):
-          for kp, vp in six.iteritems(v.outputs):
-            key = '%s/%s' % (head_name, kp)
-            merged_predict_outputs[key] = vp
-      for k, v in six.iteritems(spec.predictions):
-        predictions[(head_name, k)] = v
-    export_outputs[head_lib._PREDICT_SERVING_KEY] = (  # pylint:disable=protected-access
-        export_output_lib.PredictOutput(merged_predict_outputs))
-
-    spec_type = (
-        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
-    return spec_type(
-        mode=model_fn.ModeKeys.PREDICT,
-        predictions=predictions,
-        export_outputs=export_outputs)
-
-  def _merge_eval(self, all_estimator_spec, use_tpu=False):
-    """Merges list of `EstimatorSpec` for eval.
-
-    Args:
-      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
-      use_tpu: If `True`, will raise `NotImplementedError`, because TPU is not
-        yet supported for eval.
+from tensorflow_estimator.contrib.estimator.python.estimator import multi_head
 
-    Returns:
-      `EstimatorSpec` that merges all heads for EVAL.
-    Raises:
-      NotImplementedError: If `use_tpu` is `True`.
-    """
-    if use_tpu:
-      raise NotImplementedError(
-          'TPU evaluation is not implemented for multi_head.')
-    predictions = {}
-    metrics = {}
-    losses = []
-    with ops.name_scope('merge_eval'):
-      for head, spec in zip(self._heads, all_estimator_spec):
-        losses.append(spec.loss)
-        head_name = head.name
-        # Loss metric is not added by default.
-        loss_name = head_lib._summary_key(  # pylint:disable=protected-access
-            head_name, metric_keys.MetricKeys.LOSS)
-        metrics[loss_name] = metrics_lib.mean(spec.loss, name=loss_name)
-        # Metric keys already contain head.name.
-        metrics.update(spec.eval_metric_ops or {})
-        for k, v in six.iteritems(spec.predictions):
-          predictions[(head_name, k)] = v
-      loss = _merge_losses(losses, self._head_weights)
+# Include attrs that start with single underscore.
+multi_head.__all__ = [s for s in dir(multi_head) if not s.startswith('__')]
 
-    return model_fn.EstimatorSpec(
-        mode=model_fn.ModeKeys.EVAL,
-        predictions=predictions,
-        loss=loss,
-        eval_metric_ops=metrics)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.multi_head import *
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
deleted file mode 100644
index a602f87b4a..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
+++ /dev/null
@@ -1,705 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for head."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.contrib.estimator.python.estimator import multi_head as multi_head_lib
-from tensorflow.core.framework import summary_pb2
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def _initialize_variables(test_case, scaffold):
-  scaffold.finalize()
-  test_case.assertIsNone(scaffold.init_feed_dict)
-  test_case.assertIsNone(scaffold.init_fn)
-  scaffold.init_op.run()
-  scaffold.ready_for_local_init_op.eval()
-  scaffold.local_init_op.run()
-  scaffold.ready_op.eval()
-  test_case.assertIsNotNone(scaffold.saver)
-
-
-def _assert_simple_summaries(test_case, expected_summaries, summary_str,
-                             tol=1e-6):
-  """Assert summary the specified simple values.
-
-  Args:
-    test_case: test case.
-    expected_summaries: Dict of expected tags and simple values.
-    summary_str: Serialized `summary_pb2.Summary`.
-    tol: Tolerance for relative and absolute.
-  """
-  summary = summary_pb2.Summary()
-  summary.ParseFromString(summary_str)
-  test_case.assertAllClose(expected_summaries, {
-      v.tag: v.simple_value for v in summary.value
-  }, rtol=tol, atol=tol)
-
-
-def _assert_no_hooks(test_case, spec):
-  test_case.assertAllEqual([], spec.training_chief_hooks)
-  test_case.assertAllEqual([], spec.training_hooks)
-
-
-def _sigmoid(logits):
-  return 1 / (1 + np.exp(-logits))
-
-
-class MultiHeadTest(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_no_heads(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'Must specify heads\. Given: \[\]'):
-      multi_head_lib.multi_head(heads=[])
-
-  def test_head_name_missing(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3)
-    with self.assertRaisesRegexp(
-        ValueError, r'All given heads must have name specified\.'):
-      multi_head_lib.multi_head([head1, head2])
-
-  def test_head_weights_wrong_size(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'heads and head_weights must have the same size\. '
-        r'Given len\(heads\): 2. Given len\(head_weights\): 1\.'):
-      multi_head_lib.multi_head([head1, head2], head_weights=[1.])
-
-  def test_name(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head([head1, head2])
-    self.assertEqual('head1_head2', multi_head.name)
-
-  def _test_predict_two_heads_logits_dict(self, use_tpu):
-    """Tests predict with logits as dict."""
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head([head1, head2])
-
-    logits = {
-        'head1': np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32),
-        'head2': np.array([[2., -2., 2.], [-3., 2., -2.]], dtype=np.float32)
-    }
-    expected_probabilities = {
-        'head1': _sigmoid(logits['head1']),
-        'head2': _sigmoid(logits['head2']),
-    }
-
-    if use_tpu:
-      spec = multi_head._create_tpu_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.PREDICT,
-          logits=logits).as_estimator_spec()
-    else:
-      spec = multi_head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.PREDICT,
-          logits=logits)
-
-    self.assertItemsEqual(
-        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
-         'head1/predict', 'head2', 'head2/classification', 'head2/predict'),
-        spec.export_outputs.keys())
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(
-          logits['head1'],
-          predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
-      self.assertAllClose(
-          logits['head2'],
-          predictions[('head2', prediction_keys.PredictionKeys.LOGITS)])
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)])
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)])
-
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(spec.export_outputs['head1'].scores))
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          sess.run(spec.export_outputs['head2'].scores))
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(
-              spec.export_outputs['predict'].outputs['head1/probabilities']))
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          sess.run(
-              spec.export_outputs['predict'].outputs['head2/probabilities']))
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(
-              spec.export_outputs['head1/predict'].outputs['probabilities']))
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          sess.run(
-              spec.export_outputs['head2/predict'].outputs['probabilities']))
-
-  def test_predict_two_heads_logits_dict(self):
-    self._test_predict_two_heads_logits_dict(use_tpu=False)
-
-  def test_predict_two_heads_logits_dict_tpu(self):
-    self._test_predict_two_heads_logits_dict(use_tpu=True)
-
-  def test_predict_two_heads_logits_tensor(self):
-    """Tests predict with logits as Tensor."""
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head([head1, head2])
-
-    logits = np.array(
-        [[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32)
-    expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
-    expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]],
-                                dtype=np.float32)
-    expected_probabilities = {
-        'head1': _sigmoid(expected_logits1),
-        'head2': _sigmoid(expected_logits2),
-    }
-
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    self.assertItemsEqual(
-        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
-         'head1/predict', 'head2', 'head2/classification', 'head2/predict'),
-        spec.export_outputs.keys())
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(
-          expected_logits1,
-          predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
-      self.assertAllClose(
-          expected_logits2,
-          predictions[('head2', prediction_keys.PredictionKeys.LOGITS)])
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)])
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)])
-
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
-      self.assertAllClose(
-          expected_probabilities['head1'],
-          sess.run(spec.export_outputs['head1'].scores))
-      self.assertAllClose(
-          expected_probabilities['head2'],
-          sess.run(spec.export_outputs['head2'].scores))
-
-  def test_predict_two_heads_logits_tensor_multi_dim(self):
-    """Tests predict with multi-dimensional logits of shape [2, 2, 5]."""
-    head1 = head_lib.regression_head(label_dimension=2, name='head1')
-    head2 = head_lib.regression_head(label_dimension=3, name='head2')
-    multi_head = multi_head_lib.multi_head([head1, head2])
-
-    logits = np.array(
-        [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]],
-         [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]],
-        dtype=np.float32)
-    expected_logits1 = np.array(
-        [[[-1., 1.], [-1., 1.]],
-         [[-1.5, 1.], [-1.5, 1.]]],
-        dtype=np.float32)
-    expected_logits2 = np.array(
-        [[[2., -2., 2.], [2., -2., 2.]],
-         [[-3., 2., -2.], [-3., 2., -2.]]],
-        dtype=np.float32)
-
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    self.assertItemsEqual(
-        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/regression',
-         'head1/predict', 'head2', 'head2/regression', 'head2/predict'),
-        spec.export_outputs.keys())
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(
-          expected_logits1,
-          predictions[('head1', prediction_keys.PredictionKeys.PREDICTIONS)])
-      self.assertAllClose(
-          expected_logits2,
-          predictions[('head2', prediction_keys.PredictionKeys.PREDICTIONS)])
-
-      self.assertAllClose(
-          expected_logits1,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].value))
-      self.assertAllClose(
-          expected_logits1,
-          sess.run(spec.export_outputs['head1'].value))
-      self.assertAllClose(
-          expected_logits2,
-          sess.run(spec.export_outputs['head2'].value))
-
-  def test_eval_two_heads_with_weights(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head(
-        [head1, head2], head_weights=[1., 2.])
-
-    logits = {
-        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
-                          dtype=np.float32),
-    }
-    labels = {
-        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
-        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
-    }
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
-    # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
-    # loss = ( (20 + 20 + 20) / 3 + (30 + 0 + 0) / 3 ) / 2 = 15
-    expected_loss_head1 = 8.75
-    expected_loss_head2 = 15.
-    expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
-
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS + '/head1': expected_loss_head1,
-        keys.LOSS + '/head2': expected_loss_head2,
-        # Average loss over examples.
-        keys.LOSS_MEAN + '/head1': expected_loss_head1,
-        keys.LOSS_MEAN + '/head2': expected_loss_head2,
-        # auc and auc_pr cannot be reliably calculated for only 4-6 samples, but
-        # this assert tests that the algorithm remains consistent.
-        keys.AUC + '/head1': 0.1667,
-        keys.AUC + '/head2': 0.3333,
-        keys.AUC_PR + '/head1': 0.6667,
-        keys.AUC_PR + '/head2': 0.5000,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol,
-          atol=tol)
-
-  def test_eval_tpu(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head(
-        [head1, head2], head_weights=[1., 2.])
-
-    logits = {
-        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
-                          dtype=np.float32),
-    }
-    labels = {
-        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
-        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
-    }
-
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        r'TPU evaluation is not implemented for multi_head\.'):
-      multi_head._create_tpu_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits,
-          labels=labels)
-
-  def test_train_create_loss_one_head(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    multi_head = multi_head_lib.multi_head([head1])
-
-    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
-    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
-    loss = multi_head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    tol = 1e-3
-    with self.cached_session():
-      # Unreduced loss of the head is [[(10 + 10) / 2], (15 + 0) / 2]
-      # (averaged over classes, averaged over examples).
-      self.assertAllClose(8.75, loss.eval(), rtol=tol, atol=tol)
-
-  def test_train_create_loss_two_heads_with_weights(self):
-    # Use different example weighting for each head weighting.
-    weights1 = np.array([[1.], [2.]], dtype=np.float32)
-    weights2 = np.array([[2.], [3.]])
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1',
-                                      weight_column='weights1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2',
-                                      weight_column='weights2')
-    multi_head = multi_head_lib.multi_head(
-        [head1, head2], head_weights=[1., 2.])
-
-    logits = {
-        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
-                          dtype=np.float32),
-    }
-    labels = {
-        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
-        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
-    }
-    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'weights1': weights1,
-            'weights2': weights2
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-3
-    with self.cached_session():
-      # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
-      # = [10, 7.5]
-      # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5
-      # head-weighted unreduced_loss = 1 * [10, 7.5]
-      self.assertAllClose(
-          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
-      # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
-      # = [20, 10]
-      # training_loss = (2 * 20 + 3 * 10) / 2 = 35
-      # head-weighted unreduced_loss = 2 * [20, 10]
-      self.assertAllClose(
-          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
-      # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5
-      self.assertAllClose(82.5, training_loss.eval(), rtol=tol, atol=tol)
-      # head-weighted example weights
-      self.assertAllClose(
-          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
-
-  def test_train_create_loss_logits_tensor(self):
-    """Tests create_loss with logits Tensor."""
-    weights1 = np.array([[1.], [2.]], dtype=np.float32)
-    weights2 = np.array([[2.], [3.]])
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1',
-                                      weight_column='weights1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2',
-                                      weight_column='weights2')
-    multi_head = multi_head_lib.multi_head(
-        [head1, head2], head_weights=[1., 2.])
-
-    logits = np.array([[-10., 10., 20., -20., 20.],
-                       [-15., 10., -30., 20., -20.]], dtype=np.float32)
-    labels = {
-        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
-        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
-    }
-    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'weights1': weights1,
-            'weights2': weights2
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-3
-    with self.cached_session():
-      # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
-      # = [10, 7.5]
-      # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5
-      # head-weighted unreduced_loss = 1 * [10, 7.5]
-      self.assertAllClose(
-          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
-      # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
-      # = [20, 10]
-      # training_loss = (2 * 20 + 3 * 10) / 2 = 35
-      # head-weighted unreduced_loss = 2 * [20, 10]
-      self.assertAllClose(
-          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
-      # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5
-      self.assertAllClose(82.5, training_loss.eval(), rtol=tol, atol=tol)
-      # head-weighted example weights
-      self.assertAllClose(
-          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
-
-  def test_train_create_loss_logits_tensor_multi_dim(self):
-    """Tests create_loss with multi-dimensional logits of shape [2, 2, 5]."""
-    head1 = head_lib.regression_head(label_dimension=2, name='head1')
-    head2 = head_lib.regression_head(label_dimension=3, name='head2')
-    multi_head = multi_head_lib.multi_head([head1, head2])
-
-    logits = np.array(
-        [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]],
-         [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]],
-        dtype=np.float32)
-    labels = {
-        'head1': np.array([[[1., 0.], [1., 0.]],
-                           [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32),
-        'head2': np.array([[[0., 1., 0.], [0., 1., 0.]],
-                           [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32),
-    }
-    # Loss for the first head:
-    # loss1 = ((1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 +
-    #          (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2) / 8
-    #       = 3.5
-    # Loss for the second head:
-    # loss2 = ((0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 +
-    #          (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2) / 12
-    #       = 6.167
-    expected_training_loss = 3.5 + 6.167
-
-    training_loss = multi_head.create_loss(
-        features={},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    tol = 1e-3
-    with self.cached_session():
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
-
-  def test_train_one_head(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    multi_head = multi_head_lib.multi_head([head1])
-
-    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
-    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
-    expected_loss = 8.75
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS + '/head1': expected_loss,
-      }, summary_str, tol)
-
-  def test_train_one_head_with_optimizer(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    multi_head = multi_head_lib.multi_head([head1])
-
-    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
-    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
-    expected_loss = 8.75
-    expected_train_result = 'my_train_op'
-
-    class _Optimizer(object):
-
-      def minimize(self, loss, global_step):
-        del global_step
-        return string_ops.string_join(
-            [constant_op.constant(expected_train_result),
-             string_ops.as_string(loss, precision=3)])
-
-    spec = multi_head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        optimizer=_Optimizer())
-
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def _test_train_two_heads_with_weights(self, use_tpu):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
-    multi_head = multi_head_lib.multi_head(
-        [head1, head2], head_weights=[1., 2.])
-
-    logits = {
-        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
-        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
-                          dtype=np.float32),
-    }
-    labels = {
-        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
-        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
-    }
-    # For large logits, sigmoid cross entropy loss is approximated as:
-    # loss = labels * (logits < 0) * (-logits) +
-    #        (1 - labels) * (logits > 0) * logits =>
-    # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
-    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
-    # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
-    # loss = ( (20 + 20 + 20) / 3 + (30 + 0 + 0) / 3 ) / 2 = 15
-    # Average over classes, weighted sum over batch and heads.
-    expected_loss_head1 = 8.75
-    expected_loss_head2 = 15.0
-    expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=3)])
-
-    if use_tpu:
-      spec = multi_head._create_tpu_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=logits,
-          labels=labels,
-          train_op_fn=_train_op_fn).as_estimator_spec()
-    else:
-      spec = multi_head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=logits,
-          labels=labels,
-          train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-3
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1,
-          metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2,
-      }, summary_str, tol)
-
-  def test_train_two_heads_with_weights(self):
-    self._test_train_two_heads_with_weights(use_tpu=False)
-
-  def test_train_two_heads_with_weights_tpu(self):
-    self._test_train_two_heads_with_weights(use_tpu=True)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index cda23aa437..f500d54acb 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,819 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utilities to replicate model_fn's over local GPUs.
+"""replicate_model_fn python module.
 
-This file contains util that allow to replicate `Estimator.model_fn` over
-GPUs.  Replicated version of a `model_fn` is returned that can subsequently
-be used with `Estimator`.
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from collections import defaultdict
-from contextlib import contextmanager
-import copy
+from tensorflow_estimator.contrib.estimator.python.estimator import replicate_model_fn
 
-import six
+# Include attrs that start with single underscore.
+replicate_model_fn.__all__ = [
+    s for s in dir(replicate_model_fn) if not s.startswith('__')
+]
 
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.client import device_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export_output as export_output_lib
-from tensorflow.python.framework import device as framework_device
-from tensorflow.python.framework import ops as ops_lib
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.training import device_setter as device_setter_lib
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.util import deprecation
-from tensorflow.python.util import function_utils
-
-
-@deprecation.deprecated(
-    '2018-05-31',
-    'Please use `tf.contrib.distribute.MirroredStrategy` instead.')
-def replicate_model_fn(model_fn,
-                       loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
-                       devices=None):
-  """Replicate `Estimator.model_fn` over GPUs.
-
-  The given `model_fn` specifies a single forward pass of a model.  To replicate
-  such a model over GPUs, each GPU gets its own instance of the forward pass
-  (a.k.a. a tower).  The input features and labels get sharded into the chunks
-  that correspond to the number of GPUs.  Each tower computes a loss based
-  on its input.  For each such loss, gradients are computed.  After that, the
-  available losses are aggregated to form aggregated loss.  Available
-  gradients are summed.  Then, they update weights using the specified
-  optimizer.
-
-  If `devices` are `None`, then all available GPUs are going to be used for
-  replication.  If no GPUs are available, then the model is going to be
-  placed on the CPU.
-
-  Two modes of local replication over available GPUs are supported:
-    1)  If exactly 1 GPU is detected, then variables and operations are placed
-        onto the GPU.
-    2)  If more than 1 GPU is detected, then variables are going to be placed on
-        the CPU.  Replicas of operations are placed on each individual GPU.
-
-  Here is an example of how one might use their `model_fn` to run over GPUs:
-    ```python
-       ...
-       def model_fn(...):  # See `model_fn` in `Estimator`.
-         loss = ...
-         optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
-         optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
-         if mode == tf.estimator.ModeKeys.TRAIN:
-           #  See the section below on `EstimatorSpec.train_op`.
-           return EstimatorSpec(mode=mode, loss=loss,
-                                train_op=optimizer.minimize(loss))
-
-         #  No change for `ModeKeys.EVAL` or `ModeKeys.PREDICT`.
-         return EstimatorSpec(...)
-       ...
-       classifier = tf.estimator.Estimator(
-         model_fn=tf.contrib.estimator.replicate_model_fn(model_fn))
-    ```
-
-  Please see `DNNClassifierIntegrationTest` for an example with a canned
-  Estimator.
-
-  On `EstimatorSpec.train_op`:
-  `model_fn` returns `EstimatorSpec.train_op` for
-  `tf.estimator.GraphKeys.TRAIN`. It is typically derived using an optimizer.
-  Towers are expected to populate it in the same way.  Gradients from all towers
-  are reduced and applied in the last tower.  To achieve that in the case of
-  multiple towers, `TowerOptimizer` needs to be used.  See `TowerOptimizer`.
-
-  On sharding input features and labels:
-  Input features and labels are split for consumption by each tower. They are
-  split across the dimension 0.  Features and labels need to be batch major.
-
-  On reduction algorithms:
-  Certain algorithms were chosen for aggregating results of computations on
-  multiple towers:
-    - Losses from all towers are reduced according to `loss_reduction`.
-    - Gradients from all towers are reduced according to `loss_reduction`
-      for each trainable variable.
-    - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
-    - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
-      reduced using concatenation.
-    - For all other fields of `EstimatorSpec` the values of the first tower
-      are taken.
-
-  On distribution of variables:
-  Variables are not duplicated between towers.  Instead, they are placed on a
-  single device as defined above and shared across towers.
-
-  On overhead:
-  If only one device is specified, then aggregation of loss and gradients
-  doesn't happen. Replication consists of placing `model_fn` onto the
-  specified device.
-
-  On current limitations:
-    - `predictions` are not supported for `ModeKeys.EVAL`.  They are required
-       for `tf.contrib.estimator.add_metrics`.
-
-  Args:
-    model_fn: `model_fn` as defined in `Estimator`.  See the section above about
-      the train_op argument of `EstimatorSpec`.
-    loss_reduction: controls whether losses are summed or averaged.
-    devices: Optional list of devices to replicate the model across.  This
-      argument can be used to replicate only on the subset of available GPUs.
-      If `None`, then all available GPUs are going to be used for replication.
-      If no GPUs are available, then the model is going to be placed on the CPU.
-
-  Raises:
-    ValueError: if there is no `loss_reduction` or if TowerOptimizer is
-      mis-used.
-
-  Returns:
-    A replicated version of the supplied `model_fn`. Returned function that
-      conforms to the requirements of `Estimator`'s `model_fn` and can be used
-      instead of the supplied `model_fn`.
-  """
-  return _replicate_model_fn_with_mode(
-      model_fn,
-      loss_reduction,
-      devices,
-      # TODO(isaprykin): Query the system configuration to choose modes other
-      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
-      # appropriate.
-      mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
-
-
-class _VariableDistributionMode(object):
-  """Modes for variable distribution used for forcing a particular one.
-
-  Forcing a mode is meant for performance experimentation purposes rather than
-  for general use cases.
-  """
-
-  SHARED_LOCAL_PARAMETER_SERVER = 1
-  """Variables are placed on a single device and shared across all devices.
-
-  Two ways to achieve this distribution over available GPUs are supported:
-    1)  If exactly 1 GPU is detected, then variables and operations are placed
-        onto GPU.
-    2)  If more than 1 GPU is detected, then variables are going to be placed on
-        the CPU.  Replicas of operations are placed on each individual GPU.
-  """
-
-  SHARED_ROUND_ROBIN = 2
-  """Variables are placed on all devices in a round-robin fashion.
-
-  Every subsequent variable is placed on the next device.  There is only one
-  copy of each variable that is shared across all devices.
-  """
-
-
-def _replicate_model_fn_with_mode(
-    model_fn,
-    loss_reduction,
-    devices=None,
-    mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
-  """A version of `replicate_model_fn` that allows to specify a `mode`."""
-  if loss_reduction == losses.Reduction.NONE:
-    raise ValueError('Tower losses need to be reduced in some way, yet {} '
-                     'reduction is specified.'.format(loss_reduction))
-  if not devices:
-    devices = _get_local_devices('GPU') or _get_local_devices('CPU')
-
-  is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0].upper()
-  consolidation_device = devices[0] if is_a_single_gpu_case else '/CPU:0'
-
-  ps_devices = [consolidation_device]
-  if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN:
-    ps_devices = devices
-
-  tf_logging.info('Replicating the `model_fn` across {}.  Variables are going '
-                  'to be placed on {}.  Consolidation device is going to be {}.'
-                  .format(devices, ps_devices, consolidation_device))
-
-  def single_device_model_fn(features, labels, mode, params=None, config=None):
-    """`model_fn` on a single device without reduction overhead."""
-    return _get_loss_towers(
-        model_fn=model_fn,
-        mode=mode,
-        features=[features],
-        labels=[labels],
-        params=params,
-        loss_reduction=loss_reduction,
-        config=config,
-        devices=devices,
-        local_ps_devices=ps_devices)[0]  # One device, so one spec is out.
-
-  def replicated_model_fn(features, labels, mode, params=None, config=None):
-    """Replicated version of `model_fn` to be used instead."""
-    feature_shards, label_shards = _split_batch(
-        features, labels, len(devices), device=consolidation_device)
-    tower_specs = _get_loss_towers(
-        model_fn=model_fn,
-        mode=mode,
-        features=feature_shards,
-        labels=label_shards,
-        params=params,
-        loss_reduction=loss_reduction,
-        config=config,
-        devices=devices,
-        local_ps_devices=ps_devices)
-
-    if mode == model_fn_lib.ModeKeys.TRAIN:
-      train_op = _minimize_towers(tower_specs)
-      return _train_spec(
-          tower_specs, train_op, aggregation_device=consolidation_device)
-    elif mode == model_fn_lib.ModeKeys.EVAL:
-      return _eval_spec(tower_specs, aggregation_device=consolidation_device)
-    elif mode == model_fn_lib.ModeKeys.PREDICT:
-      return _predict_spec(tower_specs, aggregation_device=consolidation_device)
-
-  if len(devices) == 1:
-    return single_device_model_fn
-  else:
-    return replicated_model_fn
-
-
-class TowerOptimizer(optimizer_lib.Optimizer):
-  """Gathers gradients from all towers and reduces them in the last one."""
-
-  COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states'
-
-  @deprecation.deprecated(
-      '2018-05-31',
-      'Please use `tf.contrib.distribute.MirroredStrategy` instead.')
-  def __init__(self, optimizer_or_optimizer_fn):
-    """Wrap an existing optimizer for gathering gradients across towers.
-
-    Each invocation of model_fn has to call the same optimizers in the same
-    order.
-
-    Multiple optimizers that use the same or different losses are supported.
-
-    If TowerOptimizer is used but `replicate_model_fn` isn't, then no
-    aggregation will happen.  All calls will simply be forwarded to the
-    underlying optimizer. The behavior is similar if there is only one tower.
-
-    If TowerOptimizer is used together with SyncReplicasOptimizer that wraps
-    the user's optimizer, then it's the SyncReplicasOptimizer that needs to be
-    wrapped with TowerOptimizer.
-
-    Args:
-      optimizer_or_optimizer_fn: an instance of optimizer to wrap.  That
-        instance is going to be used for optimizer-specific logic.  This can
-        also be a no-argument function that returns such an optimizer instance.
-    """
-    self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn
-
-  @staticmethod
-  def has_been_used():
-    return TowerOptimizer._graph_state().has_tower_optimizer_been_used
-
-  def get_slot(self, *args, **kwargs):
-    return self._get_optimizer().get_slot(*args, **kwargs)
-
-  def get_slot_names(self, *args, **kwargs):
-    return self._get_optimizer().get_slot_names(*args, **kwargs)
-
-  def get_name(self, *args, **kwargs):
-    return self._get_optimizer().get_name(*args, **kwargs)
-
-  def variables(self, *args, **kwargs):
-    return self._get_optimizer().variables(*args, **kwargs)
-
-  def compute_gradients(self, loss, *args, **kwargs):
-    """Compute gradients, but first, if needed, scale the loss."""
-    loss = _scale_loss(loss,
-                       self._graph_state().loss_reduction,
-                       self._graph_state().number_of_towers)
-    return self._get_optimizer().compute_gradients(loss, *args, **kwargs)
-
-  def apply_gradients(self, grads_and_vars, global_step=None, **kwargs):
-    """Collect gradients updates to apply them with the last tower."""
-    if self._graph_state().number_of_towers == 1:
-      # Avoid the overhead of reduction if there's only one tower.
-      #
-      # There assumed to be only one tower if aggregation-related methods were
-      # not called by `_get_loss_towers`, for example if the model_fn uses
-      # TowerEstimator, but `replicate_model_fn` isn't used.
-      return self._get_optimizer().apply_gradients(grads_and_vars, global_step,
-                                                   **kwargs)
-
-    self._graph_state().collect_gradients(grads_and_vars)
-
-    if not self._graph_state().is_the_last_tower:
-      with ops_lib.control_dependencies(_extract_tensors(grads_and_vars)):
-        return self._construct_no_op_train_op()
-    else:
-      # Gradients need to be gathered and applied in the scope of the first
-      # tower, so that the tensors are accessible via names without prefixes.
-      var_scope, name_scope = self._graph_state().scopes_of_the_first_tower
-      with variable_scope.variable_scope(var_scope):
-        with ops_lib.name_scope(name_scope):
-          return self._apply_gathered_gradients(global_step, **kwargs)
-
-  def _apply_gathered_gradients(self, global_step, **kwargs):
-    graph_state = self._graph_state()
-    optimizer = self._get_optimizer()
-
-    grad_lists = {}
-    for grad, var in graph_state.get_latest_gradients_from_all_towers():
-      if grad is not None:
-        grad_lists.setdefault(var, []).append(grad)
-
-    aggregated_grads = []
-    with ops_lib.name_scope('gradient_aggregating'):
-      for var, grads in six.iteritems(grad_lists):
-        grad = _compute_sum_on_device(grads, var.device)
-        aggregated_grads.append((grad, var))
-    return optimizer.apply_gradients(
-        aggregated_grads, global_step=global_step, **kwargs)
-
-  def _get_optimizer(self):
-    if callable(self._optimizer_or_optimizer_fn):
-      # If optimizer is given as a function then we need to wait till we are
-      # under the right graph context before constructing it.  That's why the
-      # optimizer is constructed in _get_optimizer() rather than __init__().
-      self._optimizer_or_optimizer_fn = self._optimizer_or_optimizer_fn()
-    self._graph_state().has_tower_optimizer_been_used = True
-    return self._optimizer_or_optimizer_fn
-
-  def _construct_no_op_train_op(self):
-    return control_flow_ops.no_op(name='train_op_placeholder')
-
-  @staticmethod
-  def _graph_state():
-    graph_states = ops_lib.get_default_graph().get_collection_ref(
-        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
-    if not graph_states:
-      graph_states.append(TowerOptimizer._PerGraphState())
-    return graph_states[-1]
-
-  @staticmethod
-  def _did_towers_have_same_optimizer_calls():
-    graph_state = TowerOptimizer._graph_state()
-    return graph_state.did_towers_have_same_optimizer_calls()
-
-  @staticmethod
-  def _clear_graph_state():
-    # Clearing the Graph collection will prevent _PerGraphState from being
-    # serialized.
-    ops_lib.get_default_graph().clear_collection(
-        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
-
-  class _PerGraphState(object):
-    """Gradient reduction related state of a Tensorflow graph."""
-
-    def __init__(self):
-      self._collected_grads_and_vars = defaultdict(list)
-      self._current_tower_index = 0
-      self._number_of_towers = 1
-      self._loss_reduction = None
-      # Scopes of the first tower that don't have a prefix:
-      self._variable_scope = None
-      self._name_scope = None
-      # If needed, alert that TowerOptimizer needs to be used with model_fn.
-      self._has_tower_optimizer_been_used = False
-
-    def collect_gradients(self, grads_and_vars):
-      self._collected_grads_and_vars[self._current_tower_index].append(
-          grads_and_vars)
-
-    def get_latest_gradients_from_all_towers(self):
-      """Get gradients across towers for the last called optimizer."""
-      grads_and_vars = []
-      index_of_last_gradients = len(
-          self._collected_grads_and_vars[self._current_tower_index]) - 1
-      for tower_id in range(self._current_tower_index + 1):
-        grads_and_vars.extend(
-            self._collected_grads_and_vars[tower_id][index_of_last_gradients])
-      return grads_and_vars
-
-    def set_reduction_across_towers(self, loss_reduction, number_of_towers):
-      self._loss_reduction = loss_reduction
-      self._number_of_towers = number_of_towers
-
-    @contextmanager
-    def tower(self, tower_id, var_scope, name_scope):
-      if tower_id == 0:
-        self._variable_scope = var_scope
-        self._name_scope = name_scope
-      self._current_tower_index = tower_id
-      yield
-
-    @property
-    def scopes_of_the_first_tower(self):
-      return self._variable_scope, self._name_scope
-
-    @property
-    def is_the_last_tower(self):
-      return self._current_tower_index == (self._number_of_towers - 1)
-
-    @property
-    def number_of_towers(self):
-      return self._number_of_towers
-
-    @property
-    def loss_reduction(self):
-      return self._loss_reduction
-
-    @property
-    def has_tower_optimizer_been_used(self):
-      return self._has_tower_optimizer_been_used
-
-    @has_tower_optimizer_been_used.setter
-    def has_tower_optimizer_been_used(self, value):
-      self._has_tower_optimizer_been_used = value
-
-    def did_towers_have_same_optimizer_calls(self):
-      total_number_of_grads = sum([
-          len(grads)
-          for _, grads in six.iteritems(self._collected_grads_and_vars)
-      ])
-      return total_number_of_grads % self._number_of_towers == 0
-
-
-def _get_local_devices(device_type):
-  local_device_protos = device_lib.list_local_devices()
-  return [
-      device.name
-      for device in local_device_protos
-      if device.device_type == device_type
-  ]
-
-
-def _split_batch(features, labels, number_of_shards, device):
-  """Split input features and labels into batches."""
-
-  def ensure_divisible_by_shards(sequence):
-    batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0]
-    if batch_size % number_of_shards != 0:
-      raise ValueError(
-          'Batch size {} needs to be divisible by the number of GPUs, which '
-          'is {}.'.format(batch_size, number_of_shards))
-
-  def split_dictionary(dictionary):
-    """Split a dictionary into shards."""
-    shards = [{} for _ in range(number_of_shards)]
-    for name, tensor in six.iteritems(dictionary):
-      if isinstance(tensor, sparse_tensor.SparseTensor):
-        for i, shard in enumerate(
-            sparse_ops.sparse_split(
-                sp_input=tensor, num_split=number_of_shards, axis=0)):
-          shards[i][name] = shard
-      else:
-        ensure_divisible_by_shards(tensor)
-        for i, shard in enumerate(array_ops.split(tensor, number_of_shards)):
-          shards[i][name] = shard
-    return shards
-
-  with ops_lib.name_scope('split_inputs'):
-    with ops_lib.device(device):
-      if isinstance(features, dict):
-        feature_shards = split_dictionary(features)
-      else:
-        ensure_divisible_by_shards(features)
-        feature_shards = array_ops.split(features, number_of_shards)
-
-      if labels is None:
-        label_shards = None
-      elif isinstance(labels, dict):
-        label_shards = split_dictionary(labels)
-      else:
-        ensure_divisible_by_shards(labels)
-        label_shards = array_ops.split(labels, number_of_shards)
-  return feature_shards, label_shards
-
-
-_DEFAULT_NAME_SCOPE_PATTERN = 'tower_{}'
-
-
-def _get_loss_towers(model_fn,
-                     mode,
-                     features,
-                     labels,
-                     params,
-                     config,
-                     devices,
-                     local_ps_devices,
-                     loss_reduction,
-                     name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
-  """Replicate the loss computation across devices."""
-  tower_specs = []
-
-  model_fn_args = function_utils.fn_args(model_fn)
-  optional_params = {}
-  if 'params' in model_fn_args:
-    optional_params['params'] = copy.deepcopy(params)
-  if 'config' in model_fn_args:
-    optional_params['config'] = copy.deepcopy(config)
-
-  # pylint: disable=protected-access
-  round_robin_strategy = device_setter_lib._RoundRobinStrategy(
-      num_tasks=len(local_ps_devices))
-  TowerOptimizer._graph_state().set_reduction_across_towers(
-      loss_reduction, len(devices))
-
-  for i, device in enumerate(devices):
-    is_the_first_tower = (i == 0)
-
-    device_setter = _local_device_setter(
-        worker_device=device,
-        ps_devices=local_ps_devices,
-        ps_strategy=round_robin_strategy)
-
-    # We would like to preserve the names of the variables and ops that the user
-    # might be relying on. Names without a prefix are going to resolve to
-    # variables and ops of the first tower.
-    name_scope = name_scope_pattern
-    if is_the_first_tower:
-      name_scope = ''
-
-    with variable_scope.variable_scope(
-        '', reuse=not is_the_first_tower) as var_scope:
-      with ops_lib.name_scope(name_scope.format(i)) as name_scope:
-        with TowerOptimizer._graph_state().tower(
-            tower_id=i, var_scope=var_scope, name_scope=name_scope):
-          with ops_lib.device(device_setter):
-            labels_shard = None
-            if labels:
-              labels_shard = labels[i]
-
-            tower_spec = model_fn(
-                mode=mode,
-                features=features[i],
-                labels=labels_shard,
-                **optional_params)
-
-            if (tower_spec.train_op is not None and len(devices) > 1 and
-                not TowerOptimizer.has_been_used()):
-              raise ValueError('Please wrap optimizers with TowerOptimizer'
-                               ' in order to use replicate_model_fn with'
-                               ' multiple `devices`.')
-
-            # Scaling the loss here doesn't actually affect gradients.  Another
-            # instance of scaling happens inside the TowerOptimizer.
-            tower_spec = _scale_tower_loss(
-                tower_spec, loss_reduction, number_of_towers=len(devices))
-            tower_specs.append(tower_spec)
-
-  if not TowerOptimizer._did_towers_have_same_optimizer_calls():
-    raise ValueError('Each invocation of model_fn was supposed to make the same'
-                     ' optimizer calls.')
-  TowerOptimizer._clear_graph_state()
-  # pylint: enable=protected-access
-  return tower_specs
-
-
-def _local_device_setter(worker_device, ps_devices, ps_strategy):
-  """A device setter that puts distributes Var/Ops to PS/workers."""
-  ps_ops = ['Variable', 'VariableV2', 'VarHandleOp']
-
-  def local_device_chooser(op):
-    current_device = framework_device.DeviceSpec.from_string(op.device or '')
-
-    node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def
-    if node_def.op in ps_ops:
-      ps_device_spec = framework_device.DeviceSpec.from_string(
-          '{}'.format(ps_devices[ps_strategy(op)]))
-
-      ps_device_spec.merge_from(current_device)
-      return ps_device_spec.to_string()
-    else:
-      worker_device_spec = framework_device.DeviceSpec.from_string(
-          worker_device or '')
-      worker_device_spec.merge_from(current_device)
-      return worker_device_spec.to_string()
-
-  return local_device_chooser
-
-
-def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers):
-  """Produce an EstimatorSpec with appropriately scaled loss."""
-  if tower_spec.loss is None:
-    return tower_spec
-
-  estimator_spec = _asdict(tower_spec)
-  estimator_spec['loss'] = _scale_loss(tower_spec.loss, loss_reduction,
-                                       number_of_towers)
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
-def _scale_loss(loss, loss_reduction, number_of_towers):
-  """If needed, scale down the loss for averaging loss by summing."""
-  if loss is None:
-    return None
-  if number_of_towers == 1:
-    return loss
-
-  if loss_reduction != losses.Reduction.SUM:
-    return math_ops.div(loss, 1.0 * number_of_towers, name='averaged_loss')
-  else:
-    return loss
-
-
-def _minimize_towers(tower_specs):
-  """`train_op` of the last tower applies aggregated gradients."""
-  return tower_specs[-1].train_op
-
-
-def _compute_sum_on_device(values, device, name=None):
-  with ops_lib.device(device):
-    if isinstance(values[0], ops_lib.IndexedSlices):
-      if name:
-        raise ValueError('The name {} is not expected to be given to '
-                         'IndexedSlices {}'.format(name, values))
-
-      values_concat = array_ops.concat([v.values for v in values], axis=0)
-      indices_concat = array_ops.concat([v.indices for v in values], axis=0)
-      return ops_lib.IndexedSlices(values_concat, indices_concat,
-                                   values[0].dense_shape)
-    else:
-      return math_ops.add_n(values, name=name)
-
-
-def _train_spec(tower_specs,
-                train_op,
-                aggregation_device,
-                aggregated_loss_name='loss'):
-  """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`."""
-  # Spec of the last tower is used as the template for the final spec, because
-  # some `EstimatorSpec.training_hooks` rely on calls made in model_fn.  For
-  # example, `SyncReplicasOptimizerHook` validates the
-  # `SyncReplicasOptimizer.apply_gradients` call. `TowerEstimator` makes that
-  # call only in the last tower.
-  estimator_spec = _asdict(tower_specs[-1])
-  estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN
-  estimator_spec['train_op'] = train_op
-  estimator_spec['loss'] = _compute_sum_on_device(
-      [spec.loss for spec in tower_specs], aggregation_device,
-      aggregated_loss_name)
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
-def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'):
-  """Populate replicated EstimatorSpec for `GraphKeys.EVAL`."""
-  estimator_spec = _asdict(tower_specs[0])
-  estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL
-  estimator_spec['loss'] = _compute_sum_on_device(
-      [spec.loss for spec in tower_specs], aggregation_device,
-      aggregated_loss_name)
-
-  update_ops = []
-  for tower_spec in tower_specs:
-    for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops):
-      update_ops.append(update_op)
-
-  with ops_lib.control_dependencies(update_ops):
-    reduced_update_op = _reduce_metric_variables(len(tower_specs))
-
-  eval_metric_ops = {}
-  for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops):
-    eval_metric_ops[name] = (metric_tensor, reduced_update_op)
-  estimator_spec['eval_metric_ops'] = eval_metric_ops
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
-def _reduce_metric_variables(number_of_towers):
-  """Aggregate local variables used in metrics into the first tower."""
-  if number_of_towers == 1:
-    return control_flow_ops.no_op(name='no_eval_metric_reduction')
-
-  metric_variables = ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)
-  variables_per_tower = len(metric_variables) // number_of_towers
-
-  if len(metric_variables) % number_of_towers != 0:
-    raise ValueError(
-        'Different `EstimatorSpec.eval_metric_ops` across `model_fn()` calls.'
-        ' Expected {} local variables, but got {} instead.'.format(
-            variables_per_tower * number_of_towers, len(metric_variables)))
-
-  # `metric_variables` has the size of `variables_per_tower` x
-  #  number_of_towers.  Each tower is produced by calling the same model_fn.
-  #  First `variables_per_tower` correspond to the first tower.  Each such
-  #  variable has an replica at the `(variables_per_tower * i)` position, where
-  #  `i` is `[1.. number_of_towers]`.  We are going to add values from replicas
-  #  to each variable of the first tower.  We then zero out replica values, so
-  #  that `_reduce_metric_variables` operation is idempotent.  If a metric
-  #  is then computed based on local variables from the first tower, then the
-  #  resulting metric is an estimate for all `number_of_towers` towers.
-  ops = []
-  for i in range(0, variables_per_tower):
-    next_replica_id = i + variables_per_tower
-    replicas = [
-        metric_variables[replica_id]
-        for replica_id in range(next_replica_id, len(metric_variables),
-                                variables_per_tower)
-    ]  #  `replicas` doesn't contain the first-tower variable.
-
-    reduce_op = state_ops.assign_add(metric_variables[i],
-                                     math_ops.add_n(replicas))
-
-    with ops_lib.control_dependencies([reduce_op]):
-      for replica in replicas:
-        zeros_for_replica = array_ops.zeros(
-            array_ops.shape(replica), dtype=replica.dtype)
-        zero_out_replica_op = state_ops.assign(replica, zeros_for_replica)
-        ops.append(zero_out_replica_op)
-
-  return control_flow_ops.group(*ops)
-
-
-def _predict_spec(tower_specs, aggregation_device):
-  """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`."""
-  estimator_spec = _asdict(tower_specs[0])
-  estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT
-
-  with ops_lib.device(aggregation_device):
-    estimator_spec['predictions'] = _concat_tensor_dicts(
-        *[tower_spec.predictions for tower_spec in tower_specs])
-
-    export_outputs_dict = _dict_concat(
-        *[tower_spec.export_outputs for tower_spec in tower_specs])
-
-    export_outputs = {}
-    for name, export_output_list in six.iteritems(export_outputs_dict):
-      if isinstance(export_output_list[0], export_output_lib.PredictOutput):
-        export_outputs[name] = export_output_lib.PredictOutput(
-            outputs=_concat_tensor_dicts(*[
-                export_output.outputs for export_output in export_output_list
-            ]))
-      elif isinstance(export_output_list[0],
-                      export_output_lib.RegressionOutput):
-        export_outputs[name] = export_output_lib.RegressionOutput(
-            value=array_ops.concat(
-                [export_output.value for export_output in export_output_list],
-                axis=0))
-      elif isinstance(export_output_list[0],
-                      export_output_lib.ClassificationOutput):
-        scores = None
-        if export_output_list[0].scores is not None:
-          scores = array_ops.concat(
-              [export_output.scores for export_output in export_output_list],
-              axis=0)
-
-        classes = None
-        if export_output_list[0].classes is not None:
-          classes = array_ops.stack(
-              [export_output.classes for export_output in export_output_list],
-              axis=0)
-
-        export_outputs[name] = export_output_lib.ClassificationOutput(
-            scores=scores, classes=classes)
-
-  estimator_spec['export_outputs'] = export_outputs
-  return model_fn_lib.EstimatorSpec(**estimator_spec)
-
-
-def _concat_tensor_dicts(*tensor_dicts):
-  return {
-      name: array_ops.concat(tensors, axis=0, name=name)
-      for name, tensors in six.iteritems(_dict_concat(*tensor_dicts))
-  }
-
-
-def _extract_tensors(tensors_and_vars):
-  tensors = []
-  for tensor_and_var in tensors_and_vars:
-    tensor, _ = tensor_and_var
-    if isinstance(tensor, ops_lib.IndexedSlices):
-      tensors.append(tensor.values)
-    elif tensor is not None:
-      tensors.append(tensor)
-  return tensors
-
-
-def _dict_concat(*dicts):
-  list_dict = {}
-  for d in dicts:
-    if d is None:
-      continue
-
-    for k, v in six.iteritems(d):
-      list_dict.setdefault(k, []).append(v)
-  return list_dict
-
-
-def _asdict(namedtuple):
-  """Returns a namedtuple as a dictionary.
-
-  This is required because `_asdict()` in Python 3.x.x is broken in classes
-  that inherit from `collections.namedtuple`. See
-  https://bugs.python.org/issue24931 for more details.
-
-  Args:
-    namedtuple: An object that inherits from `collections.namedtuple`.
-
-  Returns:
-    A dictionary version of the tuple.
-  """
-  return {k: getattr(namedtuple, k) for k in namedtuple._fields}
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.replicate_model_fn import *
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
deleted file mode 100644
index 65229d67bb..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
+++ /dev/null
@@ -1,1649 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for utilities that replicate `Estimator.model_fn` over GPUs."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-import shutil
-import tempfile
-from absl.testing import parameterized
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import replicate_model_fn
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops as ops_lib
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import losses
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import adam
-from tensorflow.python.training import device_setter
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import training
-
-
-class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase,
-                                   parameterized.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  @parameterized.named_parameters(
-      ('PublicInterface', None),
-      ('ParameterServerMode', replicate_model_fn._VariableDistributionMode.
-       SHARED_LOCAL_PARAMETER_SERVER),
-      ('RoundRobinMode',
-       replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN))
-  def test_complete_flow_with_mode(self, mode):
-    n_classes = 3
-    input_dimension = 2
-    batch_size = 12
-
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    categorical_data = np.random.random_integers(
-        0, len(x_data), size=len(x_data))
-    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data,
-           'categories': categorical_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data,
-           'categories': categorical_data},
-        y=y_data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data,
-           'categories': categorical_data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,)),
-        feature_column.embedding_column(
-            feature_column.categorical_column_with_vocabulary_list(
-                'categories',
-                vocabulary_list=np.linspace(
-                    0., len(x_data), len(x_data), dtype=np.int64)), 1)
-    ]
-
-    def optimizer_fn():
-      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
-
-    estimator = dnn.DNNClassifier(
-        hidden_units=(2, 2),
-        # Adagrad is configured with `get_optimizer_instance`, so the function
-        # form of `TowerOptimizer.__init__` is used.
-        optimizer=replicate_model_fn.TowerOptimizer(optimizer_fn),
-        feature_columns=feature_columns,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    if not mode:  # Use the public `replicate_model_fn`.
-      model_fn = replicate_model_fn.replicate_model_fn(
-          estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'])
-    else:
-      model_fn = replicate_model_fn._replicate_model_fn_with_mode(
-          estimator.model_fn,
-          devices=['/gpu:0', '/gpu:1', '/gpu:2'],
-          loss_reduction=losses.Reduction.SUM,
-          mode=mode)
-
-    estimator = estimator_lib.Estimator(
-        model_fn=model_fn,
-        model_dir=estimator.model_dir,
-        config=estimator.config,
-        params=estimator.params)
-
-    num_steps = 10
-    estimator.train(train_input_fn, steps=num_steps)
-
-    scores = estimator.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in estimator.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
-                                             serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-    # Nothing should be left in the graph so that it doesn't get serialized.
-    self.assertFalse(ops_lib.get_default_graph().get_collection_ref(
-        replicate_model_fn.TowerOptimizer.COLLECTION_FOR_GRAPH_STATES))
-
-  def _as_label(self, data_in_float):
-    return np.rint(data_in_float).astype(np.int64)
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-
-class ReplicateModelTest(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    optimizer = replicate_model_fn.TowerOptimizer(
-        gradient_descent.GradientDescentOptimizer(params['learning_rate']))
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=loss,
-        eval_metric_ops=metrics,
-        predictions={'probabilities': predictions},
-        train_op=optimizer.minimize(loss))
-
-  @property
-  def params(self):
-    params = {}
-    params['learning_rate'] = 1.0
-    return params
-
-  def test_train(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          loss_reduction=losses.Reduction.SUM,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
-      # new value of c = 10 - learning rate * 3 = 7.0.
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(7.0, session.run(c))
-
-  def test_train_with_mean_reduction(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      # Add another trainable variable that doesn't produce a gradient to
-      # verify that None gradients are supported.
-      _ = variable_scope.get_variable(
-          'another_variable',
-          initializer=constant_op.constant(1, dtype=dtypes.float64),
-          dtype=dtypes.float64)
-
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
-      # It's the same computation as without mean reduction, but the
-      # loss from every tower is scaled by 1/<number of towers>.
-      # new value of c = 10 - learning rate * 1.5 = 8.5
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(8.5, session.run(c))
-
-  def test_train_two_steps_collected_gradients_are_reset_between_steps(self):
-    with ops_lib.Graph().as_default():
-      features = array_ops.placeholder(dtypes.float64)
-      labels = array_ops.placeholder(dtypes.float64)
-
-      feature_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
-      label_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
-
-      # loss = feature * c - label
-      expected_losses = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0),
-                         (1.5 * 7.0 - 1.5) + (2.5 * 7.0 - 2.5))
-      # Derivative of the loss is 1.0 + 2.0 for the first step and 1.5 + 2.5
-      # for the second.
-      expected_c = 10.0 - 3.0, 7.0 - 4.0
-
-      with self.cached_session() as session, variable_scope.variable_scope(
-          '', reuse=variable_scope.AUTO_REUSE):
-        replicated_model_fn = replicate_model_fn.replicate_model_fn(
-            self.model_fn,
-            loss_reduction=losses.Reduction.SUM,
-            devices=['/gpu:0', '/gpu:1'])
-        estimator_spec = replicated_model_fn(
-            features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-        session.run(variables.global_variables_initializer())
-
-        for feature_input, label_input, loss, weight in zip(
-            feature_inputs, label_inputs, expected_losses, expected_c):
-          feeds = {features: feature_input, labels: label_input}
-
-          self.assertEqual(loss, session.run(estimator_spec.loss, feeds))
-
-          session.run(estimator_spec.train_op, feeds)
-          c = variable_scope.get_variable('c', dtype=dtypes.float64)
-          self.assertEqual(weight, session.run(c, feeds))
-
-  def test_eval(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          loss_reduction=losses.Reduction.SUM,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
-      session.run(variables.local_variables_initializer())
-      session.run(variables.global_variables_initializer())
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      # loss[i] = features[i] * 10 - labels[i].
-      # Accuracy is 0.0 (no match) in the first tower.
-      # Accuracy is 1.0 (match) in the second tower, since the feature
-      # times weight "c" happened to be equal to the label.
-      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02))
-
-      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
-
-  def test_eval_with_mean_reduction(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
-      session.run(variables.local_variables_initializer())
-      session.run(variables.global_variables_initializer())
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      # loss[i] = features[i] * 10 - labels[i].
-      # Accuracy is 0.0 (no match) in the first tower.
-      # Accuracy is 1.0 (match) in the second tower, since the feature
-      # times weight "c" happened to be equal to the label.
-      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
-
-      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
-
-  def test_predict(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
-      session.run(variables.global_variables_initializer())
-
-      self.assertAllClose({
-          'probabilities': np.array([[0.1], [0.02]])
-      }, session.run(estimator_spec.predictions))
-
-  def test_train_single_tower(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # loss' of c is 3.
-      # new value of c = 10 - learning rate * 3 = 7.0.
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(7.0, session.run(c))
-
-  def test_eval_single_tower(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
-      session.run(variables.local_variables_initializer())
-      session.run(variables.global_variables_initializer())
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      # Accuracy is 0.0 (no match) in the first tower.
-      # Accuracy is 1.0 (match) in the second tower, since the feature
-      # times weight "c" happened to be equal to the label.
-      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02))
-
-      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
-
-  def test_predict_single_tower(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
-      session.run(variables.global_variables_initializer())
-
-      self.assertAllClose({
-          'probabilities': np.array([[0.1], [0.02]])
-      }, session.run(estimator_spec.predictions))
-
-  def test_batch_size_that_is_not_divisible_by_the_number_of_gpus(self):
-    features = np.array([[1.0], [2.0], [3.0]])
-    labels = np.array([[1.0], [2.0], [3.0]])
-
-    with self.assertRaisesRegexp(
-        ValueError, '.*Batch.+size.+needs.+to.+be.+divisible.+by.+GPUs.+'):
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0', '/gpu:1'])
-      _ = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-
-  def test_unsupported_loss_reduction(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 '.+none.+reduction.+is.+specified.+'):
-      _ = replicate_model_fn.replicate_model_fn(self.model_fn,
-                                                losses.Reduction.NONE)
-
-  def test_places_on_gpu_with_upper_case_spelling(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session():
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/GPU:0'])
-      _ = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:0', c.device)
-
-  def test_places_on_gpu_with_lower_case_spelling(self):
-    features = np.array([[0.01], [0.002]])
-    labels = np.array([[0.01], [0.02]])
-
-    with self.cached_session():
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0'])
-      _ = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:0', c.device)
-
-
-class ReplicateAcrossASingleDeviceWithoutTowerOptimizer(
-    test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    optimizer = gradient_descent.GradientDescentOptimizer(
-        params['learning_rate'])
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=loss,
-        eval_metric_ops=metrics,
-        predictions={'probabilities': predictions},
-        train_op=optimizer.minimize(loss))
-
-  @property
-  def params(self):
-    params = {}
-    params['learning_rate'] = 1.0
-    return params
-
-  def test_train_single_tower(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn, devices=['/gpu:0'])
-      estimator_spec = replicated_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # loss' of c is 3.
-      # new value of c = 10 - learning rate * 3 = 7.0.
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(7.0, session.run(c))
-
-
-class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    features = features['features']
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    optimizer = gradient_descent.GradientDescentOptimizer(
-        params['learning_rate'])
-    optimizer = training.SyncReplicasOptimizer(
-        optimizer, replicas_to_aggregate=1)
-    sync_hook = optimizer.make_session_run_hook(True)
-    optimizer = replicate_model_fn.TowerOptimizer(optimizer)
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=loss,
-        eval_metric_ops=metrics,
-        training_hooks=[sync_hook],
-        predictions={'probabilities': predictions},
-        train_op=optimizer.minimize(
-            loss, global_step=training.get_global_step()))
-
-  @property
-  def params(self):
-    params = {}
-    params['learning_rate'] = 1.0
-    return params
-
-  def test_train_multiple_towers(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'features': features}, y=labels, batch_size=2, shuffle=False)
-
-    model_fn = replicate_model_fn.replicate_model_fn(
-        self.model_fn,
-        loss_reduction=losses.Reduction.SUM,
-        devices=['/gpu:0', '/gpu:1'])
-
-    estimator = estimator_lib.Estimator(
-        model_fn=model_fn, model_dir=tempfile.mkdtemp(), params=self.params)
-    estimator.train(train_input_fn, steps=1)
-
-    self.assertEqual(7.0, estimator.get_variable_value('c'))
-
-
-class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    side_effects = variable_scope.get_variable(
-        'side_effects',
-        initializer=constant_op.constant(0, dtype=dtypes.float64),
-        dtype=dtypes.float64,
-        trainable=False)
-
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    first_optimizer = replicate_model_fn.TowerOptimizer(
-        gradient_descent.GradientDescentOptimizer(1.0))
-    second_optimizer = replicate_model_fn.TowerOptimizer(
-        adam.AdamOptimizer(1.0))
-
-    with ops_lib.control_dependencies([side_effects.assign_add(1.0)]):
-      first_grads_and_vars = first_optimizer.compute_gradients(loss)
-
-    train_op = control_flow_ops.group(
-        [first_optimizer.apply_gradients(first_grads_and_vars),
-         second_optimizer.minimize(loss)])
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=loss,
-        eval_metric_ops=metrics,
-        predictions={'probabilities': predictions},
-        train_op=train_op)
-
-  def test_train(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          loss_reduction=losses.Reduction.SUM,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(features, labels,
-                                           model_fn_lib.ModeKeys.TRAIN, {})
-      session.run(variables.global_variables_initializer())
-
-      # loss = feature * c - label
-      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      # loss' of c is 3.
-      # new value of c = 10 - learning rate * 3 = 7.0.
-      # Adam subtracts another ~1.
-      session.run(estimator_spec.train_op)
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertNear(6.0, session.run(c), 0.000001)
-
-        side_effects = variable_scope.get_variable(
-            'side_effects', dtype=dtypes.float64)
-        self.assertNear(2.0, session.run(side_effects), 0.000001)
-
-
-class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    self._should_skip_optimizer = False
-    self._towers_left_before_skipping_optimizer = -1
-
-  def incorrectly_skip_optimizer_for_tower(self, tower_number):
-    self._should_skip_optimizer = True
-    self._towers_left_before_skipping_optimizer = tower_number
-
-  def should_skip_optimizer(self):
-    if not self._should_skip_optimizer:
-      return False
-    if self._towers_left_before_skipping_optimizer == 0:
-      return True
-    else:
-      self._towers_left_before_skipping_optimizer -= 1
-      return False
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-    d = variable_scope.get_variable(
-        'd',
-        initializer=constant_op.constant(2, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    another_predictions = math_ops.multiply(features, d)
-    another_loss = losses.absolute_difference(
-        labels=labels,
-        predictions=another_predictions,
-        reduction=losses.Reduction.SUM)
-    another_loss = math_ops.reduce_sum(another_loss)
-
-    total_loss = math_ops.add(loss, another_loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    train_ops = []
-
-    optimizer = replicate_model_fn.TowerOptimizer(
-        gradient_descent.GradientDescentOptimizer(1.0))
-    train_ops.append(optimizer.minimize(loss, var_list=[c]))
-    if not self.should_skip_optimizer():
-      another_optimizer = replicate_model_fn.TowerOptimizer(
-          gradient_descent.GradientDescentOptimizer(1.0))
-      train_ops.append(another_optimizer.minimize(another_loss, var_list=[d]))
-
-    train_op = control_flow_ops.group(train_ops)
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=total_loss,
-        eval_metric_ops=metrics,
-        predictions={'probabilities': predictions},
-        train_op=train_op)
-
-  def test_train(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session() as session:
-      replicated_model_fn = replicate_model_fn.replicate_model_fn(
-          self.model_fn,
-          loss_reduction=losses.Reduction.SUM,
-          devices=['/gpu:0', '/gpu:1'])
-      estimator_spec = replicated_model_fn(features, labels,
-                                           model_fn_lib.ModeKeys.TRAIN, {})
-      session.run(variables.global_variables_initializer())
-
-      # For each tower, loss = (feature * c - label) + (feature * d - label).
-      total_loss = (1.0 * 10 - 1.0 + 1.0 * 2.0 - 1.0) + (
-          2.0 * 10 - 2.0 + 2.0 * 2.0 - 2.0)
-      self.assertEqual(total_loss, session.run(estimator_spec.loss))
-
-      session.run(estimator_spec.train_op)
-
-      # loss' of c or loss' of d is 3.
-      # new value of c = 10 - learning rate * 3 = 7.0.
-      # new value of d = 2  - learning rate * 3 = -1.0.
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertNear(7.0, session.run(c), 0.000001)
-        d = variable_scope.get_variable('d', dtype=dtypes.float64)
-        self.assertNear(-1.0, session.run(d), 0.000001)
-
-  def test_different_optimizer_calls_within_towers(self):
-    self.incorrectly_skip_optimizer_for_tower(1)
-
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session(), ops_lib.Graph().as_default():
-      with self.assertRaisesRegexp(
-          ValueError, '.+was.+supposed.+to.+make.+same.+optimizer.+calls.+'):
-        replicated_model_fn = replicate_model_fn.replicate_model_fn(
-            self.model_fn, devices=['/gpu:0', '/gpu:1'])
-        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
-                                {})
-
-
-class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.multiply(features, c)
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-    loss = math_ops.reduce_sum(loss)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-
-    optimizer = gradient_descent.GradientDescentOptimizer(1.0)
-    train_op = optimizer.minimize(loss)
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=loss,
-        eval_metric_ops=metrics,
-        predictions={'probabilities': predictions},
-        train_op=train_op)
-
-  def test_train(self):
-    features = np.array([[1.0], [2.0]])
-    labels = np.array([[1.0], [2.0]])
-
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError,
-                                   'Please.+wrap.+with.+TowerOptimizer'):
-        replicated_model_fn = replicate_model_fn.replicate_model_fn(
-            self.model_fn, devices=['/gpu:0', '/gpu:1'])
-        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
-                                {})
-
-
-class GetLossTowersTest(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(0.25, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
-    labels = np.array([0.1, 0.2, 0.3, labels[0]])
-
-    loss = losses.absolute_difference(
-        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
-
-    return model_fn_lib.EstimatorSpec(mode=mode, loss=math_ops.reduce_sum(loss))
-
-  def test_gradients_are_computed(self):
-    with self.cached_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          self.model_fn,
-          mode=None,
-          features=[[0.6], [1.6]],
-          labels=[[0.6], [0.6]],
-          params=None,
-          config=None,
-          loss_reduction=losses.Reduction.SUM,
-          devices=['/gpu:0', '/gpu:1'],
-          local_ps_devices=['/gpu:0'],
-          name_scope_pattern='test_tower_{}')
-      session.run(variables.global_variables_initializer())
-
-      self.assertEqual(len(tower_specs), 2)
-
-      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
-      self.assertEqual('Sum:0', tower_specs[0].loss.name)
-      self.assertEqual(1.0, session.run(tower_specs[0].loss))
-
-      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
-      self.assertEqual('test_tower_1/Sum:0', tower_specs[1].loss.name)
-      # The input batch for the second tower had a loss that is 1.0
-      # bigger: 0.6 vs 1.6.
-      self.assertEqual(2.0, session.run(tower_specs[1].loss))
-
-      self.assertEqual(1, len(variables.global_variables()))
-      self.assertEqual(1, len(variables.trainable_variables()))
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(0.25, session.run(c))
-
-  def test_gradients_are_computed_with_mean_reduction(self):
-    with self.cached_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          self.model_fn,
-          mode=model_fn_lib.ModeKeys.EVAL,
-          features=[[0.6], [1.6]],
-          labels=[[0.6], [0.6]],
-          params=None,
-          loss_reduction=losses.Reduction.MEAN,
-          config=None,
-          devices=['/gpu:0', '/gpu:1'],
-          local_ps_devices=['/gpu:0'],
-          name_scope_pattern='test_tower_{}')
-      session.run(variables.global_variables_initializer())
-
-      self.assertEqual(len(tower_specs), 2)
-
-      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
-      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
-      self.assertEqual(0.5, session.run(tower_specs[0].loss))
-
-      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
-      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
-      # The input batch for the second tower had a loss that is 1.0
-      # bigger: 0.6 vs 1.6.
-      self.assertEqual(1.0, session.run(tower_specs[1].loss))
-
-      self.assertEqual(1, len(variables.global_variables()))
-      self.assertEqual(1, len(variables.trainable_variables()))
-
-      with variable_scope.variable_scope('', reuse=True):
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual(0.25, session.run(c))
-
-  def test_variables_are_round_robined_correctly(self):
-    """Test that creates multiple variables and tests round-robin placement."""
-
-    def model_fn(mode, features, labels, params):
-      del params
-      for variable_name in ['a', 'b', 'c', 'd']:
-        c = variable_scope.get_variable(
-            variable_name,
-            initializer=constant_op.constant(0.25, dtype=dtypes.float64),
-            dtype=dtypes.float64)
-
-      predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
-      labels = np.array([0.1, 0.2, 0.3, labels[0]])
-      loss = losses.absolute_difference(
-          labels=labels,
-          predictions=predictions,
-          reduction=losses.Reduction.SUM)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode, loss=math_ops.reduce_sum(loss))
-
-    with self.cached_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          model_fn,
-          mode=None,
-          features=[[0.6], [1.6], [2.6]],
-          labels=[[0.6], [0.6], [2.6]],
-          params=None,
-          loss_reduction=losses.Reduction.SUM,
-          config=None,
-          devices=['/gpu:0', '/gpu:1', '/gpu:3'],
-          local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'],
-          name_scope_pattern='test_tower_{}')
-      session.run(variables.global_variables_initializer())
-
-      self.assertEqual(len(tower_specs), 3)
-      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
-      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
-      self.assertEqual('/device:GPU:3', tower_specs[2].loss.device)
-
-      with variable_scope.variable_scope('', reuse=True):
-        a = variable_scope.get_variable('a', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:0', a.device)
-        b = variable_scope.get_variable('b', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:1', b.device)
-        c = variable_scope.get_variable('c', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:3', c.device)
-        d = variable_scope.get_variable('d', dtype=dtypes.float64)
-        self.assertEqual('/device:GPU:0', d.device)
-
-
-class SplitBatchTest(test_util.TensorFlowTestCase):
-
-  def evaluate_shards(self, first_list, second_list):
-    evaluate_items = lambda x: x.eval()
-    return list(map(evaluate_items, first_list)), list(
-        map(evaluate_items, second_list))
-
-  def assertSparseValuesEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
-  def test_simple_half_split(self):
-    with self.cached_session():
-      features = [0.0, 1.0, 2.0, 3.0]
-      labels = [10.0, 11.0, 12.0, 13.0]
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 2, device='/gpu:0')
-
-      feature_shards, label_shards = self.evaluate_shards(
-          feature_shards, label_shards)
-
-      self.assertAllEqual([[0.0, 1.0], [2.0, 3.0]], feature_shards)
-      self.assertAllEqual([[10.0, 11.0], [12.0, 13.0]], label_shards)
-
-  def test_to_each_their_own(self):
-    with self.cached_session():
-      features = [0.0, 1.0, 2.0, 3.0]
-      labels = [10.0, 11.0, 12.0, 13.0]
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 4, device='/gpu:0')
-
-      feature_shards, label_shards = self.evaluate_shards(
-          feature_shards, label_shards)
-
-      self.assertAllEqual([[0.0], [1.0], [2.0], [3.0]], feature_shards)
-      self.assertAllEqual([[10.0], [11.0], [12.0], [13.0]], label_shards)
-
-  def test_one_batch(self):
-    with self.cached_session():
-      features = [0.0, 1.0, 2.0, 3.0]
-      labels = [10.0, 11.0, 12.0, 13.0]
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 1, device='/gpu:0')
-
-      feature_shards, label_shards = self.evaluate_shards(
-          feature_shards, label_shards)
-
-      self.assertAllEqual([[0.0, 1.0, 2.0, 3.0]], feature_shards)
-      self.assertAllEqual([[10.0, 11.0, 12.0, 13.0]], label_shards)
-
-  def test_half_split_in_dictionary(self):
-    with self.cached_session():
-      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
-      labels = [10.0, 11.0, 12.0, 13.0]
-
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 2, device='/gpu:0')
-
-      self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval())
-      self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval())
-      self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval())
-      self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval())
-      self.assertAllEqual([10.0, 11.0], label_shards[0].eval())
-      self.assertAllEqual([12.0, 13.0], label_shards[1].eval())
-
-  def test_sparse_tensor_can_be_split_unevenly(self):
-    with self.cached_session():
-      features = {
-          'x':
-              sparse_tensor.SparseTensor(
-                  indices=[[0, 0], [1, 2], [2, 2]],
-                  values=[1.0, 2.0, 3.0],
-                  dense_shape=[3, 4])
-      }
-      labels = np.array([[1.0], [2.0]])
-
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 2, device='/gpu:0')
-
-      self.assertSparseValuesEqual(
-          sparse_tensor.SparseTensorValue(
-              indices=[[0, 0], [1, 2]], values=[1., 2.], dense_shape=[2, 4]),
-          feature_shards[0]['x'].eval())
-      self.assertSparseValuesEqual(
-          sparse_tensor.SparseTensorValue(
-              indices=[[0, 2]], values=[3.], dense_shape=[1, 4]),
-          feature_shards[1]['x'].eval())
-      self.assertAllEqual([[1.0]], label_shards[0].eval())
-      self.assertAllEqual([[2.0]], label_shards[1].eval())
-
-  def test_sparse_tensor_can_be_split_unevenly_repeated_row(self):
-    with self.cached_session():
-      features = {
-          'x':
-              sparse_tensor.SparseTensor(
-                  indices=[[0, 0], [1, 0], [1, 1]],
-                  values=[1.0, 2.0, 3.0],
-                  dense_shape=[3, 4])
-      }
-      labels = np.array([[1.0], [2.0]])
-
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 2, device='/gpu:0')
-
-      self.assertSparseValuesEqual(
-          sparse_tensor.SparseTensorValue(
-              indices=[[0, 0], [1, 0], [1, 1]],
-              values=[1., 2., 3.],
-              dense_shape=[2, 4]), feature_shards[0]['x'].eval())
-
-      second_batch = feature_shards[1]['x'].eval()
-      self.assertFalse(len(second_batch.indices))
-      self.assertFalse(len(second_batch.values))
-      self.assertAllEqual([1, 4], second_batch.dense_shape)
-      self.assertAllEqual([[1.0]], label_shards[0].eval())
-      self.assertAllEqual([[2.0]], label_shards[1].eval())
-
-  def test_one_batch_in_dictionary(self):
-    with self.cached_session() as session:  # pylint: disable=unused-variable
-      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
-      labels = [10.0, 11.0, 12.0, 13.0]
-
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 1, device='/gpu:0')
-
-      self.assertAllEqual([0.0, 1.0, 2.0, 3.0],
-                          feature_shards[0]['first'].eval())
-      self.assertAllEqual([4.0, 5.0, 6.0, 7.0],
-                          feature_shards[0]['second'].eval())
-      self.assertAllEqual([10.0, 11.0, 12.0, 13.0], label_shards[0].eval())
-
-  def test_feature_and_label_dictionaries(self):
-    with self.cached_session() as session:  # pylint: disable=unused-variable
-      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
-      labels = {'first': [10.0, 11.0], 'second': [12.0, 13.0]}
-
-      feature_shards, label_shards = replicate_model_fn._split_batch(
-          features, labels, 2, device='/gpu:0')
-
-      self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval())
-      self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval())
-      self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval())
-      self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval())
-      self.assertAllEqual([10.0], label_shards[0]['first'].eval())
-      self.assertAllEqual([12.0], label_shards[0]['second'].eval())
-      self.assertAllEqual([11], label_shards[1]['first'].eval())
-      self.assertAllEqual([13.0], label_shards[1]['second'].eval())
-
-
-class TrainSpecTest(test_util.TensorFlowTestCase):
-
-  expected_predictions = {}
-
-  def create_estimator_spec(self, loss):
-    return model_fn_lib.EstimatorSpec(
-        mode=model_fn_lib.ModeKeys.TRAIN,
-        loss=loss,
-        train_op=loss,  # Not used; currently required.
-        predictions=self.expected_predictions)
-
-  def create_constant_loss(self, loss_value):
-    return constant_op.constant(loss_value, dtype=dtypes.float64)
-
-  def test_example(self):
-    with self.cached_session() as session:
-      tower_losses = list(map(self.create_constant_loss, [2, 4, 6]))
-      tower_specs = list(map(self.create_estimator_spec, tower_losses))
-
-      expected_train_op = tower_losses[1]
-
-      estimator_spec = replicate_model_fn._train_spec(
-          tower_specs, expected_train_op, aggregation_device='/gpu:0')
-
-      self.assertEqual(expected_train_op, estimator_spec.train_op)
-      self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss))
-      self.assertEqual(self.expected_predictions, estimator_spec.predictions)
-
-
-class EvalSpecTest(test_util.TensorFlowTestCase):
-
-  def create_estimator_spec(self, loss, metrics):
-    return model_fn_lib.EstimatorSpec(
-        mode=model_fn_lib.ModeKeys.EVAL, loss=loss, eval_metric_ops=metrics)
-
-  def create_constant_loss(self, loss_value):
-    return constant_op.constant(loss_value, dtype=dtypes.float64)
-
-  def create_eval_metrics(self, noise):
-    predictions = np.array([0.1, 0.2, 0.3, 0.6 + noise])
-    labels = np.array([0.1, 0.2, 0.3, 0.6])
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions),
-        'auc': metrics_lib.auc(labels, predictions)
-    }
-    return metrics
-
-  def test_example(self):
-    with self.cached_session() as session:
-      tower_losses = map(self.create_constant_loss, [2, 4, 6])
-      tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3])
-      tower_specs = [
-          self.create_estimator_spec(l, m)
-          for l, m in zip(tower_losses, tower_metrics)
-      ]
-      session.run(variables.local_variables_initializer())
-
-      estimator_spec = replicate_model_fn._eval_spec(
-          tower_specs, aggregation_device='/device:GPU:0')
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      self.assertEqual('/device:CPU:0', accuracy.device)
-      self.assertEqual('/device:CPU:0', auc.device)
-
-      session.run([a, b])
-      accuracy, auc = session.run([accuracy, auc])
-
-      self.assertNear((12 - 2) / 12, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss))
-
-  def test_handles_single_tower(self):
-    with self.cached_session() as session:
-      tower_losses = map(self.create_constant_loss, [5])
-      tower_metrics = map(self.create_eval_metrics, [0.2])
-      tower_specs = [
-          self.create_estimator_spec(l, m)
-          for l, m in zip(tower_losses, tower_metrics)
-      ]
-      session.run(variables.local_variables_initializer())
-
-      estimator_spec = replicate_model_fn._eval_spec(
-          tower_specs, aggregation_device='/device:GPU:0')
-
-      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
-      auc, b = estimator_spec.eval_metric_ops['auc']
-
-      self.assertEqual('/device:CPU:0', accuracy.device)
-      self.assertEqual('/device:CPU:0', auc.device)
-
-      session.run([a, b])
-      accuracy = session.run(accuracy)
-      auc = session.run(auc)
-
-      self.assertNear((4 - 1) / 4, accuracy, 0.01)
-      self.assertEqual(0, auc)
-      self.assertEqual(5, session.run(estimator_spec.loss))
-
-
-class PredictSpecTest(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(0.25, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = math_ops.add(np.array([features[0], features[0]]), c)
-
-    return model_fn_lib.EstimatorSpec(
-        mode=model_fn_lib.ModeKeys.PREDICT,
-        predictions={
-            'probabilities': predictions
-        })
-
-  def test_example(self):
-    with self.cached_session() as session:
-      tower_specs = replicate_model_fn._get_loss_towers(
-          self.model_fn,
-          mode=None,
-          features=[[0.1], [0.2]],
-          loss_reduction=losses.Reduction.SUM,
-          labels=[[], []],
-          params=None,
-          config=None,
-          devices=['/gpu:0', '/gpu:1'],
-          local_ps_devices=['/gpu:0'],
-      )
-      session.run(variables.global_variables_initializer())
-
-      estimator_spec = replicate_model_fn._predict_spec(
-          tower_specs, aggregation_device='/gpu:0')
-
-      self.assertEqual('/device:GPU:0',
-                       estimator_spec.predictions['probabilities'].device)
-      self.assertAllClose({
-          'probabilities': np.array([0.35, 0.35, 0.45, 0.45])
-      }, session.run(estimator_spec.predictions))
-
-
-class ReduceMetricVariablesTest(test_util.TensorFlowTestCase):
-
-  def create_metric_variable(self, initial_value, name):
-    return variable_scope.variable(
-        initial_value,
-        trainable=False,
-        collections=[ops_lib.GraphKeys.METRIC_VARIABLES],
-        validate_shape=True,
-        name=name)
-
-  def create_tower_metrics(self, tower_id):
-    with variable_scope.variable_scope('', reuse=(tower_id != 0)):
-      self.create_metric_variable(1.3 * (tower_id + 1), 'total')
-      self.create_metric_variable(2.3 * (tower_id + 1), 'count')
-      self.create_metric_variable(
-          np.array([3.3, 3.5, 3.7]) * (tower_id + 1), 'total')
-
-  def test_example(self):
-    with self.cached_session() as session:
-      for tower_id in range(3):
-        self.create_tower_metrics(tower_id)
-
-      session.run(
-          variables.variables_initializer(
-              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
-
-      session.run(
-          replicate_model_fn._reduce_metric_variables(number_of_towers=3))
-
-      # 1st tower = 1.3, 2.3,  [3.3, 3.5, 3.7]
-      # 2nd tower = 2.6, 4.6,  [6.6, 7.0, 7.4]
-      # 3rd tower = 3.9, 6.9,  [9.9, 10.5, 11.1]
-      # Reduced =   7.8, 13.8, [19.8, 21.0, 22.2]
-      # Towers are accumulated in the first tower.
-      local_metrics = session.run(
-          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
-
-      self.assertNear(7.8, local_metrics[0], 0.01)
-      self.assertNear(13.8, local_metrics[1], 0.01)
-      self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01)
-      self.assertNear(0.0, local_metrics[3], 0.01)
-      self.assertNear(0.0, local_metrics[4], 0.01)
-      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01)
-      self.assertNear(0.0, local_metrics[6], 0.01)
-      self.assertNear(0.0, local_metrics[7], 0.01)
-      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01)
-
-  def test_reduce_is_idempotent(self):
-    with self.cached_session() as session:
-      for tower_id in range(3):
-        self.create_tower_metrics(tower_id)
-
-      session.run(
-          variables.variables_initializer(
-              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
-
-      for _ in range(20):
-        session.run(
-            replicate_model_fn._reduce_metric_variables(number_of_towers=3))
-
-      local_metrics = session.run(
-          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
-
-      self.assertNear(7.8, local_metrics[0], 0.01)
-      self.assertNear(13.8, local_metrics[1], 0.01)
-      self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01)
-      self.assertNear(0.0, local_metrics[3], 0.01)
-      self.assertNear(0.0, local_metrics[4], 0.01)
-      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01)
-      self.assertNear(0.0, local_metrics[6], 0.01)
-      self.assertNear(0.0, local_metrics[7], 0.01)
-      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01)
-
-  def test_handles_single_tower(self):
-    with self.cached_session() as session:
-      self.create_tower_metrics(0)
-      session.run(
-          variables.variables_initializer(
-              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
-
-      session.run(
-          replicate_model_fn._reduce_metric_variables(number_of_towers=1))
-
-      local_metrics = session.run(
-          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
-
-      self.assertNear(1.3, local_metrics[0], 0.01)
-      self.assertNear(2.3, local_metrics[1], 0.01)
-      self.assertAllClose([3.3, 3.5, 3.7], local_metrics[2], 0.01)
-
-  def test_doesnt_accept_uneven_number_of_variables(self):
-    with self.cached_session() as session:
-      for tower_id in range(3):
-        self.create_tower_metrics(tower_id)
-      self.create_metric_variable(-1.0, 'oddball')
-
-      session.run(
-          variables.variables_initializer(
-              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
-
-      with self.assertRaisesRegexp(
-          ValueError, '.+Expected.+local.+variables.+but.+got.+instead.+'):
-        session.run(
-            replicate_model_fn._reduce_metric_variables(number_of_towers=3))
-
-
-class MergeExportOutputsTest(test_util.TensorFlowTestCase):
-
-  def model_fn(self, mode, features, labels, params):
-    c = variable_scope.get_variable(
-        'c',
-        initializer=constant_op.constant(10, dtype=dtypes.float64),
-        dtype=dtypes.float64)
-
-    predictions = {'probabilities': math_ops.multiply(features, c)}
-    loss = losses.absolute_difference(
-        labels=labels,
-        predictions=predictions['probabilities'],
-        reduction=losses.Reduction.SUM)
-
-    metrics = {
-        'accuracy': metrics_lib.accuracy(labels, predictions['probabilities']),
-        'auc': metrics_lib.auc(labels, predictions['probabilities'])
-    }
-    tensor_string_repr = str(features)
-    classes = constant_op.constant(
-        re.search('(split_inputs/split:[0-9])', tensor_string_repr).group(1),
-        dtype=dtypes.string)
-
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            export_output.PredictOutput(predictions),
-        'classification_output':
-            export_output.ClassificationOutput(predictions['probabilities'],
-                                               classes),
-        'classification_scores':
-            export_output.ClassificationOutput(
-                scores=predictions['probabilities']),
-        'classification_classes':
-            export_output.ClassificationOutput(classes=classes),
-        'regression_output':
-            export_output.RegressionOutput(predictions['probabilities']),
-    }
-
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        loss=math_ops.reduce_sum(loss),
-        eval_metric_ops=metrics,
-        predictions=predictions,
-        export_outputs=export_outputs)
-
-  def replicate_estimator_spec(self, session):
-    features = np.array([0.01, 0.002])
-    labels = np.array([0.01, 0.02])
-
-    replicated_model_fn = replicate_model_fn.replicate_model_fn(
-        self.model_fn, devices=['/gpu:0', '/gpu:1'])
-    estimator_spec = replicated_model_fn(features, labels,
-                                         model_fn_lib.ModeKeys.PREDICT, {})
-    session.run(variables.global_variables_initializer())
-    return estimator_spec
-
-  def test_merge_predict_output(self):
-    with self.cached_session() as session:
-      estimator_spec = self.replicate_estimator_spec(session)
-      self.assertAllClose(
-          {
-              'probabilities': np.array([0.1, 0.02])
-          },
-          session.run(estimator_spec.export_outputs[
-              signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs))
-
-  def test_merge_classification_output_scores_classes(self):
-    with self.cached_session() as session:
-      estimator_spec = self.replicate_estimator_spec(session)
-      self.assertAllClose(
-          [0.1, 0.02],
-          session.run(
-              estimator_spec.export_outputs['classification_output'].scores))
-      self.assertAllEqual(
-          [b'split_inputs/split:0', b'split_inputs/split:1'],
-          session.run(
-              estimator_spec.export_outputs['classification_output'].classes))
-
-  def test_merge_classification_output_scores(self):
-    with self.cached_session() as session:
-      estimator_spec = self.replicate_estimator_spec(session)
-      self.assertAllClose(
-          [0.1, 0.02],
-          session.run(
-              estimator_spec.export_outputs['classification_scores'].scores))
-      self.assertEqual(
-          None, estimator_spec.export_outputs['classification_scores'].classes)
-
-  def test_merge_classification_output_classes(self):
-    with self.cached_session() as session:
-      estimator_spec = self.replicate_estimator_spec(session)
-      self.assertAllEqual(
-          [b'split_inputs/split:0', b'split_inputs/split:1'],
-          session.run(
-              estimator_spec.export_outputs['classification_classes'].classes))
-      self.assertEqual(
-          None, estimator_spec.export_outputs['classification_classes'].scores)
-
-  def test_merge_regression_output(self):
-    with self.cached_session() as session:
-      estimator_spec = self.replicate_estimator_spec(session)
-      self.assertAllClose(
-          [0.1, 0.02],
-          session.run(estimator_spec.export_outputs['regression_output'].value))
-
-
-class GetLocalDevicesTest(test_util.TensorFlowTestCase):
-
-  def test_there_is_at_least_a_cpu(self):
-    self.assertTrue(replicate_model_fn._get_local_devices('CPU'))
-
-  def test_there_is_no_xpu(self):
-    self.assertFalse(
-        replicate_model_fn._get_local_devices('XPU'))  # XPU doesn't exist.
-
-  def test_whether_there_is_a_gpu(self):
-    if test.is_gpu_available():
-      self.assertTrue(len(replicate_model_fn._get_local_devices('GPU')))
-
-
-class LocalDeviceSetterTest(test_util.TensorFlowTestCase):
-
-  def test_vars_are_on_ps_but_ops_are_on_workers(self):
-    ps_devices = ['/device:GPU:3']
-    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
-
-    local_device_setter = replicate_model_fn._local_device_setter(
-        ps_devices=ps_devices,
-        ps_strategy=round_robin,
-        worker_device='/device:GPU:2')
-
-    with ops_lib.device(local_device_setter):
-      a = variables.Variable(0.01)
-      self.assertEqual('/device:GPU:3', a.device)
-
-      b = variables.Variable(0.02)
-      self.assertEqual('/device:GPU:3', b.device)
-
-      c = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:3', c.device)
-
-      a_op = array_ops.concat(a, axis=0)
-      self.assertEqual('/device:GPU:2', a_op.device)
-
-      b_op = array_ops.concat(b, axis=0)
-      self.assertEqual('/device:GPU:2', b_op.device)
-
-  def test_round_robin_placement(self):
-    ps_devices = [
-        '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4'
-    ]
-    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
-
-    local_device_setter = replicate_model_fn._local_device_setter(
-        ps_devices=ps_devices,
-        ps_strategy=round_robin,
-        worker_device='/device:GPU:2')
-
-    with ops_lib.device(local_device_setter):
-      a = variables.Variable(0.01)
-      self.assertEqual('/device:GPU:0', a.device)
-
-      b = variables.Variable(0.02)
-      self.assertEqual('/device:GPU:1', b.device)
-
-      c = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:3', c.device)
-
-      a_op = array_ops.concat(a, axis=0)
-      self.assertEqual('/device:GPU:2', a_op.device)
-
-      b_op = array_ops.concat(b, axis=0)
-      self.assertEqual('/device:GPU:2', b_op.device)
-
-      c = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:4', c.device)
-
-      d = variables.Variable(0.03)
-      self.assertEqual('/device:GPU:0', d.device)
-
-      c_op = array_ops.concat(c, axis=0)
-      self.assertEqual('/device:GPU:2', c_op.device)
-
-
-class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase):
-
-  def test_vectors(self):
-    with self.cached_session() as session:
-      total = replicate_model_fn._compute_sum_on_device(
-          [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum')
-
-      self.assertEqual('/device:GPU:0', total.device)
-      self.assertEqual('test_sum', total.op.name)
-      self.assertEqual(10.0, session.run(total))
-
-  def test_tensors(self):
-    with self.cached_session() as session:
-      total = replicate_model_fn._compute_sum_on_device(
-          [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum')
-
-      self.assertEqual('/device:GPU:0', total.device)
-      self.assertEqual('test_sum', total.op.name)
-      self.assertAllEqual([4.0, 6.0], session.run(total))
-
-  def test_indexedslices(self):
-    with self.cached_session() as session:
-      a = ops_lib.IndexedSlices(
-          constant_op.constant([1.0, 2.0]), [0, 1],
-          dense_shape=constant_op.constant([2]))
-      b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
-
-      total = replicate_model_fn._compute_sum_on_device(
-          [a, b], device='/device:GPU:0')
-
-      self.assertEqual('/device:GPU:0', total.device)
-      self.assertAllEqual([4.0, 6.0],
-                          session.run(ops_lib.convert_to_tensor(total)))
-
-  def test_indexedslices_higher_dimensions(self):
-    with self.cached_session() as session:
-      a = ops_lib.IndexedSlices(
-          constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1],
-          dense_shape=constant_op.constant([2, 4]))
-      b = ops_lib.IndexedSlices(
-          constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1])
-
-      total = replicate_model_fn._compute_sum_on_device(
-          [a, b], device='/device:GPU:0')
-
-      self.assertEqual('/device:GPU:0', total.device)
-      self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]],
-                          session.run(ops_lib.convert_to_tensor(total)))
-
-  def test_indexedslices_some_dont_overlap(self):
-    with self.cached_session() as session:
-      a = ops_lib.IndexedSlices(
-          constant_op.constant([1.0, 2.0]), [0, 3],
-          dense_shape=constant_op.constant([4]))
-      b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
-
-      total = replicate_model_fn._compute_sum_on_device(
-          [a, b], device='/device:GPU:0')
-
-      self.assertEqual('/device:GPU:0', total.device)
-      self.assertAllEqual([4.0, 4.0, 0.0, 2.0],
-                          session.run(ops_lib.convert_to_tensor(total)))
-
-  def test_no_name_for_indexslices(self):
-    a = ops_lib.IndexedSlices(
-        constant_op.constant([1.0, 2.0]), [0, 1],
-        dense_shape=constant_op.constant([2]))
-    b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
-
-    with self.assertRaisesRegexp(ValueError, '.+name.+not.+expected.+'):
-      _ = replicate_model_fn._compute_sum_on_device(
-          [a, b], device='/device:GPU:0', name='cant_name_indexslices')
-
-
-class ConcatTensorDictsTest(test_util.TensorFlowTestCase):
-
-  def test_example(self):
-    tensor_dicts = [
-        {
-            'a': np.array([1.0, 2.0]),
-            'b': np.array([11.0]),
-            'c': np.array([21.0]),
-        },
-        {
-            'a': np.array([3.0]),
-            'b': np.array([12.0, 13.0]),
-        },
-        {
-            'b': np.array([14.0]),
-        },
-    ]
-
-    with self.cached_session() as session:
-      self.assertAllClose({
-          'a': np.array([1.0, 2.0, 3.0]),
-          'b': np.array([11.0, 12.0, 13.0, 14.0]),
-          'c': np.array([21.0]),
-      }, session.run(replicate_model_fn._concat_tensor_dicts(*tensor_dicts)))
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py
index c595f47395..60a2cd0912 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,569 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Recurrent Neural Network estimators."""
+"""rnn python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.contrib.estimator.python.estimator import extenders
-from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.layers import core as core_layers
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import rnn
-from tensorflow.python.ops import rnn_cell
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.summary import summary
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import training_util
-
-
-# The defaults are historical artifacts of the initial implementation, but seem
-# reasonable choices.
-_DEFAULT_LEARNING_RATE = 0.05
-_DEFAULT_CLIP_NORM = 5.0
-
-_CELL_TYPES = {'basic_rnn': rnn_cell.BasicRNNCell,
-               'lstm': rnn_cell.BasicLSTMCell,
-               'gru': rnn_cell.GRUCell}
-
-# Indicates no value was provided by the user to a kwarg.
-USE_DEFAULT = object()
-
-
-def _single_rnn_cell(num_units, cell_type):
-  cell_type = _CELL_TYPES.get(cell_type, cell_type)
-  if not cell_type or not issubclass(cell_type, rnn_cell.RNNCell):
-    raise ValueError('Supported cell types are {}; got {}'.format(
-        list(_CELL_TYPES.keys()), cell_type))
-  return cell_type(num_units=num_units)
-
-
-def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'):
-  """Convenience function to create `rnn_cell_fn` for canned RNN Estimators.
-
-  Args:
-    num_units: Iterable of integer number of hidden units per RNN layer.
-    cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
-      the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
-      `'gru'`.
-
-  Returns:
-    A function that takes a single argument, an instance of
-    `tf.estimator.ModeKeys`, and returns an instance derived from
-    `tf.nn.rnn_cell.RNNCell`.
-
-  Raises:
-    ValueError: If cell_type is not supported.
-  """
-  def rnn_cell_fn(mode):
-    # Unused. Part of the rnn_cell_fn interface since user specified functions
-    # may need different behavior across modes (e.g. dropout).
-    del mode
-    cells = [_single_rnn_cell(n, cell_type) for n in num_units]
-    if len(cells) == 1:
-      return cells[0]
-    return rnn_cell.MultiRNNCell(cells)
-  return rnn_cell_fn
-
-
-def _select_last_activations(activations, sequence_lengths):
-  """Selects the nth set of activations for each n in `sequence_length`.
-
-  Returns a `Tensor` of shape `[batch_size, k]`. If `sequence_length` is not
-  `None`, then `output[i, :] = activations[i, sequence_length[i] - 1, :]`. If
-  `sequence_length` is `None`, then `output[i, :] = activations[i, -1, :]`.
-
-  Args:
-    activations: A `Tensor` with shape `[batch_size, padded_length, k]`.
-    sequence_lengths: A `Tensor` with shape `[batch_size]` or `None`.
-  Returns:
-    A `Tensor` of shape `[batch_size, k]`.
-  """
-  with ops.name_scope(
-      'select_last_activations', values=[activations, sequence_lengths]):
-    activations_shape = array_ops.shape(activations)
-    batch_size = activations_shape[0]
-    padded_length = activations_shape[1]
-    output_units = activations_shape[2]
-    if sequence_lengths is None:
-      sequence_lengths = padded_length
-    start_indices = math_ops.to_int64(
-        math_ops.range(batch_size) * padded_length)
-    last_indices = start_indices + sequence_lengths - 1
-    reshaped_activations = array_ops.reshape(
-        activations, [batch_size * padded_length, output_units])
-
-    last_activations = array_ops.gather(reshaped_activations, last_indices)
-    last_activations.set_shape([activations.shape[0], activations.shape[2]])
-    return last_activations
-
-
-def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns,
-                          context_feature_columns, input_layer_partitioner):
-  """Function builder for a rnn logit_fn.
-
-  Args:
-    output_units: An int indicating the dimension of the logit layer.
-    rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
-      returns an object of type `tf.nn.rnn_cell.RNNCell`.
-    sequence_feature_columns: An iterable containing the `FeatureColumn`s
-      that represent sequential input.
-    context_feature_columns: An iterable containing the `FeatureColumn`s
-      that represent contextual input.
-    input_layer_partitioner: Partitioner for input layer.
-
-  Returns:
-    A logit_fn (see below).
-
-  Raises:
-    ValueError: If output_units is not an int.
-  """
-  if not isinstance(output_units, int):
-    raise ValueError('output_units must be an int.  Given type: {}'.format(
-        type(output_units)))
-
-  def rnn_logit_fn(features, mode):
-    """Recurrent Neural Network logit_fn.
-
-    Args:
-      features: This is the first item returned from the `input_fn`
-                passed to `train`, `evaluate`, and `predict`. This should be a
-                single `Tensor` or `dict` of same.
-      mode: Optional. Specifies if this training, evaluation or prediction. See
-            `ModeKeys`.
-
-    Returns:
-      A `Tensor` representing the logits.
-    """
-    with variable_scope.variable_scope(
-        'sequence_input_layer',
-        values=tuple(six.itervalues(features)),
-        partitioner=input_layer_partitioner):
-      sequence_input, sequence_length = seq_fc.sequence_input_layer(
-          features=features, feature_columns=sequence_feature_columns)
-      summary.histogram('sequence_length', sequence_length)
-
-      if context_feature_columns:
-        context_input = feature_column_lib.input_layer(
-            features=features,
-            feature_columns=context_feature_columns)
-        sequence_input = seq_fc.concatenate_context_input(
-            context_input, sequence_input)
-
-    cell = rnn_cell_fn(mode)
-    # Ignore output state.
-    rnn_outputs, _ = rnn.dynamic_rnn(
-        cell=cell,
-        inputs=sequence_input,
-        sequence_length=sequence_length,
-        dtype=dtypes.float32,
-        time_major=False)
-    last_activations = _select_last_activations(rnn_outputs, sequence_length)
-
-    with variable_scope.variable_scope('logits', values=(rnn_outputs,)):
-      logits = core_layers.dense(
-          last_activations,
-          units=output_units,
-          activation=None,
-          kernel_initializer=init_ops.glorot_uniform_initializer())
-    return logits
-
-  return rnn_logit_fn
-
-
-def _rnn_model_fn(features,
-                  labels,
-                  mode,
-                  head,
-                  rnn_cell_fn,
-                  sequence_feature_columns,
-                  context_feature_columns,
-                  optimizer='Adagrad',
-                  input_layer_partitioner=None,
-                  config=None):
-  """Recurrent Neural Net model_fn.
-
-  Args:
-    features: dict of `Tensor` and `SparseTensor` objects returned from
-      `input_fn`.
-    labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `head_lib._Head` instance.
-    rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
-      returns an object of type `tf.nn.rnn_cell.RNNCell`.
-    sequence_feature_columns: Iterable containing `FeatureColumn`s that
-      represent sequential model inputs.
-    context_feature_columns: Iterable containing `FeatureColumn`s that
-      represent model inputs not associated with a specific timestep.
-    optimizer: String, `tf.Optimizer` object, or callable that creates the
-      optimizer to use for training. If not specified, will use the Adagrad
-      optimizer with a default learning rate of 0.05 and gradient clip norm of
-      5.0.
-    input_layer_partitioner: Partitioner for input layer. Defaults
-      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-    config: `RunConfig` object to configure the runtime settings.
-
-  Returns:
-    An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: If mode or optimizer is invalid, or features has the wrong type.
-  """
-  if not isinstance(features, dict):
-    raise ValueError('features should be a dictionary of `Tensor`s. '
-                     'Given type: {}'.format(type(features)))
-
-  # If user does not provide an optimizer instance, use the optimizer specified
-  # by the string with default learning rate and gradient clipping.
-  if not isinstance(optimizer, optimizer_lib.Optimizer):
-    optimizer = optimizers.get_optimizer_instance(
-        optimizer, learning_rate=_DEFAULT_LEARNING_RATE)
-    optimizer = extenders.clip_gradients_by_norm(optimizer, _DEFAULT_CLIP_NORM)
-
-  num_ps_replicas = config.num_ps_replicas if config else 0
-  partitioner = partitioned_variables.min_max_variable_partitioner(
-      max_partitions=num_ps_replicas)
-  with variable_scope.variable_scope(
-      'rnn',
-      values=tuple(six.itervalues(features)),
-      partitioner=partitioner):
-    input_layer_partitioner = input_layer_partitioner or (
-        partitioned_variables.min_max_variable_partitioner(
-            max_partitions=num_ps_replicas,
-            min_slice_size=64 << 20))
-
-    logit_fn = _rnn_logit_fn_builder(
-        output_units=head.logits_dimension,
-        rnn_cell_fn=rnn_cell_fn,
-        sequence_feature_columns=sequence_feature_columns,
-        context_feature_columns=context_feature_columns,
-        input_layer_partitioner=input_layer_partitioner)
-    logits = logit_fn(features=features, mode=mode)
-
-    def _train_op_fn(loss):
-      """Returns the op to optimize the loss."""
-      return optimizer.minimize(
-          loss,
-          global_step=training_util.get_global_step())
-
-    return head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        train_op_fn=_train_op_fn,
-        logits=logits)
-
-
-def _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type):
-  """Assert arguments are valid and return rnn_cell_fn."""
-  if rnn_cell_fn and (num_units or cell_type != USE_DEFAULT):
-    raise ValueError(
-        'num_units and cell_type must not be specified when using rnn_cell_fn'
-    )
-  if not rnn_cell_fn:
-    if cell_type == USE_DEFAULT:
-      cell_type = 'basic_rnn'
-    rnn_cell_fn = _make_rnn_cell_fn(num_units, cell_type)
-  return rnn_cell_fn
-
-
-class RNNClassifier(estimator.Estimator):
-  """A classifier for TensorFlow RNN models.
-
-  Trains a recurrent neural network model to classify instances into one of
-  multiple classes.
-
-  Example:
-
-  ```python
-  token_sequence = sequence_categorical_column_with_hash_bucket(...)
-  token_emb = embedding_column(categorical_column=token_sequence, ...)
-
-  estimator = RNNClassifier(
-      sequence_feature_columns=[token_emb],
-      num_units=[32, 16], cell_type='lstm')
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `sequence_feature_columns`:
-    - a feature with `key=column.name` whose `value` is a `SparseTensor`.
-  * for each `column` in `context_feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using softmax cross entropy.
-
-  @compatibility(eager)
-  Estimators are not compatible with eager execution.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               sequence_feature_columns,
-               context_feature_columns=None,
-               num_units=None,
-               cell_type=USE_DEFAULT,
-               rnn_cell_fn=None,
-               model_dir=None,
-               n_classes=2,
-               weight_column=None,
-               label_vocabulary=None,
-               optimizer='Adagrad',
-               loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
-               input_layer_partitioner=None,
-               config=None):
-    """Initializes a `RNNClassifier` instance.
-
-    Args:
-      sequence_feature_columns: An iterable containing the `FeatureColumn`s
-        that represent sequential input. All items in the set should either be
-        sequence columns (e.g. `sequence_numeric_column`) or constructed from
-        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
-        input).
-      context_feature_columns: An iterable containing the `FeatureColumn`s
-        for contextual input. The data represented by these columns will be
-        replicated and given to the RNN at each timestep. These columns must be
-        instances of classes derived from `_DenseColumn` such as
-        `numeric_column`, not the sequential variants.
-      num_units: Iterable of integer number of hidden units per RNN layer. If
-        set, `cell_type` must also be specified and `rnn_cell_fn` must be
-        `None`.
-      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
-        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
-        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
-        must be `None`.
-      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
-        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
-        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
-        This is for advanced users who need additional customization beyond
-        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
-        needed for stacked RNNs.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      n_classes: Number of label classes. Defaults to 2, namely binary
-        classification. Must be > 1.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      label_vocabulary: A list of strings represents possible label values. If
-        given, labels must be string type and have any value in
-        `label_vocabulary`. If it is not given, that means labels are
-        already encoded as integer or float within [0, 1] for `n_classes=2` and
-        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-        Also there will be errors if vocabulary is not provided and labels are
-        string.
-      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
-        type. Defaults to Adagrad optimizer.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
-      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
-        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: `RunConfig` object to configure the runtime settings.
-
-    Raises:
-      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
-        compatible.
-    """
-    rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)
-
-    if n_classes == 2:
-      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-    else:
-      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
-          n_classes,
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-
-    def _model_fn(features, labels, mode, config):
-      return _rnn_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          rnn_cell_fn=rnn_cell_fn,
-          sequence_feature_columns=tuple(sequence_feature_columns or []),
-          context_feature_columns=tuple(context_feature_columns or []),
-          optimizer=optimizer,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config)
-    super(RNNClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
-
-
-class RNNEstimator(estimator.Estimator):
-  """An Estimator for TensorFlow RNN models with user-specified head.
-
-  Example:
-
-  ```python
-  token_sequence = sequence_categorical_column_with_hash_bucket(...)
-  token_emb = embedding_column(categorical_column=token_sequence, ...)
-
-  estimator = RNNEstimator(
-      head=tf.contrib.estimator.regression_head(),
-      sequence_feature_columns=[token_emb],
-      num_units=[32, 16], cell_type='lstm')
-
-  # Or with custom RNN cell:
-  def rnn_cell_fn(mode):
-    cells = [ tf.contrib.rnn.LSTMCell(size) for size in [32, 16] ]
-    if mode == tf.estimator.ModeKeys.TRAIN:
-      cells = [ tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=0.5)
-                    for cell in cells ]
-    return tf.contrib.rnn.MultiRNNCell(cells)
-
-  estimator = RNNEstimator(
-      head=tf.contrib.estimator.regression_head(),
-      sequence_feature_columns=[token_emb],
-      rnn_cell_fn=rnn_cell_fn)
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if the head's `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `sequence_feature_columns`:
-    - a feature with `key=column.name` whose `value` is a `SparseTensor`.
-  * for each `column` in `context_feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss and predicted output are determined by the specified head.
-
-  @compatibility(eager)
-  Estimators are not compatible with eager execution.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               head,
-               sequence_feature_columns,
-               context_feature_columns=None,
-               num_units=None,
-               cell_type=USE_DEFAULT,
-               rnn_cell_fn=None,
-               model_dir=None,
-               optimizer='Adagrad',
-               input_layer_partitioner=None,
-               config=None):
-    """Initializes a `RNNClassifier` instance.
-
-    Args:
-      head: A `_Head` instance constructed with a method such as
-        `tf.contrib.estimator.multi_label_head`. This specifies the model's
-        output and loss function to be optimized.
-      sequence_feature_columns: An iterable containing the `FeatureColumn`s
-        that represent sequential input. All items in the set should either be
-        sequence columns (e.g. `sequence_numeric_column`) or constructed from
-        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
-        input).
-      context_feature_columns: An iterable containing the `FeatureColumn`s
-        for contextual input. The data represented by these columns will be
-        replicated and given to the RNN at each timestep. These columns must be
-        instances of classes derived from `_DenseColumn` such as
-        `numeric_column`, not the sequential variants.
-      num_units: Iterable of integer number of hidden units per RNN layer. If
-        set, `cell_type` must also be specified and `rnn_cell_fn` must be
-        `None`.
-      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
-        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
-        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
-        must be `None`.
-      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
-        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
-        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
-        This is for advanced users who need additional customization beyond
-        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
-        needed for stacked RNNs.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
-        type. Defaults to Adagrad optimizer.
-      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
-        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: `RunConfig` object to configure the runtime settings.
+from tensorflow_estimator.contrib.estimator.python.estimator import rnn
 
-    Raises:
-      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
-        compatible.
-    """
-    rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)
+# Include attrs that start with single underscore.
+rnn.__all__ = [s for s in dir(rnn) if not s.startswith('__')]
 
-    def _model_fn(features, labels, mode, config):
-      return _rnn_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          rnn_cell_fn=rnn_cell_fn,
-          sequence_feature_columns=tuple(sequence_feature_columns or []),
-          context_feature_columns=tuple(context_feature_columns or []),
-          optimizer=optimizer,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config)
-    super(RNNEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.rnn import *
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
deleted file mode 100644
index 89506ee661..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/rnn_test.py
+++ /dev/null
@@ -1,1185 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for rnn.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import random
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.contrib.estimator.python.estimator import head as head_lib
-from tensorflow.contrib.estimator.python.estimator import rnn
-from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.data.experimental.ops import readers
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import parsing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.lib.io import python_io
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import rnn_cell
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import optimizer
-from tensorflow.python.training import training_util
-
-
-# Names of variables created by BasicRNNCell model.
-TOKEN_EMBEDDING_NAME = 'rnn/sequence_input_layer/input_layer/tokens_sequential_embedding/embedding_weights'
-CELL_WEIGHTS_NAME = 'rnn/rnn/basic_rnn_cell/kernel'
-CELL_BIAS_NAME = 'rnn/rnn/basic_rnn_cell/bias'
-MULTI_CELL_WEIGHTS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/kernel'
-MULTI_CELL_BIAS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/bias'
-LOGITS_WEIGHTS_NAME = 'rnn/logits/dense/kernel'
-LOGITS_BIAS_NAME = 'rnn/logits/dense/bias'
-
-
-def _assert_close(expected, actual, rtol=1e-04, name='assert_close'):
-  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-    expected = ops.convert_to_tensor(expected, name='expected')
-    actual = ops.convert_to_tensor(actual, name='actual')
-    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
-    rtol = ops.convert_to_tensor(rtol, name='rtol')
-    return check_ops.assert_less(
-        rdiff,
-        rtol,
-        data=('Condition expected =~ actual did not hold element-wise:'
-              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
-              'rtol = ', rtol,),
-        name=scope)
-
-
-def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases,
-                      global_step, model_dir):
-  """Create checkpoint file with provided model weights.
-
-  Args:
-    rnn_weights: Iterable of values of weights for the RNN cell.
-    rnn_biases: Iterable of values of biases for the RNN cell.
-    logits_weights: Iterable of values for matrix connecting RNN output to
-      logits.
-    logits_biases: Iterable of values for logits bias term.
-    global_step: Initial global step to save in checkpoint.
-    model_dir: Directory into which checkpoint is saved.
-  """
-  model_weights = {}
-  model_weights[CELL_WEIGHTS_NAME] = rnn_weights
-  model_weights[CELL_BIAS_NAME] = rnn_biases
-  model_weights[LOGITS_WEIGHTS_NAME] = logits_weights
-  model_weights[LOGITS_BIAS_NAME] = logits_biases
-
-  with ops.Graph().as_default():
-    # Create model variables.
-    for k, v in six.iteritems(model_weights):
-      variables_lib.Variable(v, name=k, dtype=dtypes.float32)
-
-    # Create non-model variables.
-    global_step_var = training_util.create_global_step()
-    assign_op = global_step_var.assign(global_step)
-
-    # Initialize vars and save checkpoint.
-    with monitored_session.MonitoredTrainingSession(
-        checkpoint_dir=model_dir) as sess:
-      sess.run(assign_op)
-
-
-class RNNLogitFnTest(test.TestCase):
-  """Tests correctness of logits calculated from _rnn_logit_fn_builder."""
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_logits(self, mode, rnn_units, logits_dimension, features_fn,
-                   sequence_feature_columns, context_feature_columns,
-                   expected_logits):
-    """Tests that the expected logits are calculated."""
-    with ops.Graph().as_default():
-      # Global step needed for MonitoredSession, which is in turn used to
-      # explicitly set variable weights through a checkpoint.
-      training_util.create_global_step()
-      # Use a variable scope here with 'rnn', emulating the rnn model_fn, so
-      # the checkpoint naming is shared.
-      with variable_scope.variable_scope('rnn'):
-        input_layer_partitioner = (
-            partitioned_variables.min_max_variable_partitioner(
-                max_partitions=0, min_slice_size=64 << 20))
-        logit_fn = rnn._rnn_logit_fn_builder(
-            output_units=logits_dimension,
-            rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units),
-            sequence_feature_columns=sequence_feature_columns,
-            context_feature_columns=context_feature_columns,
-            input_layer_partitioner=input_layer_partitioner)
-        # Features are constructed within this function, otherwise the Tensors
-        # containing the features would be defined outside this graph.
-        logits = logit_fn(features=features_fn(), mode=mode)
-        with monitored_session.MonitoredTrainingSession(
-            checkpoint_dir=self._model_dir) as sess:
-          self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
-
-  def testOneDimLogits(self):
-    """Tests one-dimensional logits.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[10]], [[5]]]
-    initial_state = [0, 0]
-    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
-                              tanh(-.2*10 - .3*0 - .4*0 +.5)]]
-                          = [[0.83, -0.91]]
-    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
-                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)]]
-                          = [[0.53, -0.37]]
-    logits = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5.],
-                  indices=[[0, 0], [0, 1]],
-                  dense_shape=[1, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    context_feature_columns = []
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=1,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-0.6033]])
-
-  def testMultiDimLogits(self):
-    """Tests multi-dimensional logits.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[10]], [[5]]]
-    initial_state = [0, 0]
-    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
-                              tanh(-.2*10 - .3*0 - .4*0 +.5)]]
-                          = [[0.83, -0.91]]
-    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
-                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)]]
-                          = [[0.53, -0.37]]
-    logits = [[-1*0.53 - 1*0.37 + 0.3],
-              [0.5*0.53 + 0.3*0.37 + 0.4],
-              [0.2*0.53 - 0.1*0.37 + 0.5]
-           = [[-0.6033, 0.7777, 0.5698]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
-        logits_biases=[0.3, 0.4, 0.5],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5.],
-                  indices=[[0, 0], [0, 1]],
-                  dense_shape=[1, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    context_feature_columns = []
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=3,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-0.6033, 0.7777, 0.5698]])
-
-  def testMultiExampleMultiDim(self):
-    """Tests multiple examples and multi-dimensional logits.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[10], [5]], [[2], [7]]]
-    initial_state = [[0, 0], [0, 0]]
-    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
-                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
-                             [tanh(.1*2 + .2*0 + .3*0 +.2),
-                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
-                          = [[0.83, -0.91], [0.38, 0.10]]
-    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
-                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
-                             [tanh(.1*7 + .2*.38 + .3*.10 +.2),
-                              tanh(-.2*7 - .3*.38 - .4*.10 +.5)]]
-                          = [[0.53, -0.37], [0.76, -0.78]
-    logits = [[-1*0.53 - 1*0.37 + 0.3,
-               0.5*0.53 + 0.3*0.37 + 0.4,
-               0.2*0.53 - 0.1*0.37 + 0.5],
-              [-1*0.76 - 1*0.78 + 0.3,
-               0.5*0.76 +0.3*0.78 + 0.4,
-               0.2*0.76 -0.1*0.78 + 0.5]]
-           = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
-        logits_biases=[0.3, 0.4, 0.5],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2., 7.],
-                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
-                  dense_shape=[2, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))
-    ]
-    context_feature_columns = []
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=3,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-0.6033, 0.7777, 0.5698],
-                           [-1.2473, 1.0170, 0.5745]])
-
-  def testMultiExamplesDifferentLength(self):
-    """Tests multiple examples with different lengths.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[10], [5]], [[2], [0]]]
-    initial_state = [[0, 0], [0, 0]]
-    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
-                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
-                             [tanh(.1*2 + .2*0 + .3*0 +.2),
-                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
-                          = [[0.83, -0.91], [0.38, 0.10]]
-    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
-                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
-                             [<ignored-padding>]]
-                          = [[0.53, -0.37], [<ignored-padding>]]
-    logits = [[-1*0.53 - 1*0.37 + 0.3],
-              [-1*0.38 + 1*0.10 + 0.3]]
-           = [[-0.6033], [0.0197]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2.],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    context_feature_columns = []
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=1,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-0.6033], [0.0197]])
-
-  def testMultiExamplesWithContext(self):
-    """Tests multiple examples with context features.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]]
-    initial_state = [[0, 0], [0, 0]]
-    rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2),
-                              tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)],
-                             [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2),
-                              tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]]
-                          = [[0.60, -0.96], [0.83, 0.68]]
-    rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2),
-                              tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)],
-                             [<ignored-padding>]]
-                          = [[0.03, -0.63], [<ignored-padding>]]
-    logits = [[-1*0.03 - 1*0.63 + 0.3],
-              [-1*0.83 + 1*0.68 + 0.3]]
-           = [[-0.3662], [0.1414]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        # Context features weights are inserted between input and state weights.
-        rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2.],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-          'context': [[-0.5], [0.8]],
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    context_feature_columns = [fc.numeric_column('context', shape=(1,))]
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=1,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-0.3662], [0.1414]])
-
-  def testMultiExamplesMultiFeatures(self):
-    """Tests examples with multiple sequential feature columns.
-
-    Intermediate values are rounded for ease in reading.
-    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
-    initial_state = [[0, 0], [0, 0]]
-    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
-                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
-                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
-                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
-                          = [[0.94, -0.96], [0.72, -0.38]]
-    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
-                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
-                             [<ignored-padding>]]
-                          = [[0.92, -0.88], [<ignored-padding>]]
-    logits = [[-1*0.92 - 1*0.88 + 0.3],
-              [-1*0.72 - 1*0.38 + 0.3]]
-           = [[-1.5056], [-0.7962]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        # FeatureColumns are sorted alphabetically, so on_sale weights are
-        # inserted before price.
-        rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=base_global_step,
-        model_dir=self._model_dir)
-
-    def features_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2.],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-          'on_sale':
-              sparse_tensor.SparseTensor(
-                  values=[0, 1, 0],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-      }
-
-    price_column = seq_fc.sequence_numeric_column('price', shape=(1,))
-    on_sale_column = fc.indicator_column(
-        seq_fc.sequence_categorical_column_with_identity(
-            'on_sale', num_buckets=2))
-    sequence_feature_columns = [price_column, on_sale_column]
-    context_feature_columns = []
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          rnn_units=[2],
-          logits_dimension=1,
-          features_fn=features_fn,
-          sequence_feature_columns=sequence_feature_columns,
-          context_feature_columns=context_feature_columns,
-          expected_logits=[[-1.5056], [-0.7962]])
-
-
-class RNNClassifierTrainingTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _assert_checkpoint(
-      self, n_classes, input_units, cell_units, expected_global_step):
-
-    shapes = {
-        name: shape for (name, shape) in
-        checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(
-        expected_global_step,
-        checkpoint_utils.load_variable(
-            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
-
-    # RNN Cell variables.
-    if len(cell_units) > 1:
-      for i, cell_unit in enumerate(cell_units):
-        self.assertEqual([input_units + cell_unit, cell_unit],
-                         shapes[MULTI_CELL_WEIGHTS_NAME_PATTERN % i])
-        self.assertEqual([cell_unit],
-                         shapes[MULTI_CELL_BIAS_NAME_PATTERN % i])
-        input_units = cell_unit
-    elif len(cell_units) == 1:
-      self.assertEqual([input_units + cell_unit, cell_unit],
-                       shapes[CELL_WEIGHTS_NAME])
-      self.assertEqual([cell_unit], shapes[CELL_BIAS_NAME])
-
-    # Logits variables.
-    logits_dimension = n_classes if n_classes > 2 else 1
-    self.assertEqual([cell_units[-1], logits_dimension],
-                     shapes[LOGITS_WEIGHTS_NAME])
-    self.assertEqual([logits_dimension], shapes[LOGITS_BIAS_NAME])
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s/part_0:0' % CELL_BIAS_NAME,
-        '%s/part_0:0' % CELL_WEIGHTS_NAME,
-        '%s/part_0:0' % LOGITS_BIAS_NAME,
-        '%s/part_0:0' % LOGITS_WEIGHTS_NAME,
-    ]
-
-    def _minimize(loss, global_step):
-      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(
-          expected_var_names,
-          [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        return state_ops.assign_add(global_step, 1).op
-      assert_loss = _assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        return state_ops.assign_add(global_step, 1).op
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def testConflictingRNNCellFn(self):
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    cell_units = [4, 2]
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        'num_units and cell_type must not be specified when using rnn_cell_fn'):
-      rnn.RNNClassifier(
-          sequence_feature_columns=[embed],
-          rnn_cell_fn=lambda x: x,
-          num_units=cell_units)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        'num_units and cell_type must not be specified when using rnn_cell_fn'):
-      rnn.RNNClassifier(
-          sequence_feature_columns=[embed],
-          rnn_cell_fn=lambda x: x,
-          cell_type='lstm')
-
-  def _testFromScratchWithDefaultOptimizer(self, n_classes):
-    def train_input_fn():
-      return {
-          'tokens':
-              sparse_tensor.SparseTensor(
-                  values=['the', 'cat', 'sat'],
-                  indices=[[0, 0], [0, 1], [0, 2]],
-                  dense_shape=[1, 3]),
-      }, [[1]]
-
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    input_units = 2
-
-    cell_units = [4, 2]
-    est = rnn.RNNClassifier(
-        sequence_feature_columns=[embed],
-        num_units=cell_units,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    est.train(input_fn=train_input_fn, steps=num_steps)
-    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
-
-  def testBinaryClassFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=2)
-
-  def testMultiClassFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=4)
-
-  def testFromScratchWithCustomRNNCellFn(self):
-    def train_input_fn():
-      return {
-          'tokens':
-              sparse_tensor.SparseTensor(
-                  values=['the', 'cat', 'sat'],
-                  indices=[[0, 0], [0, 1], [0, 2]],
-                  dense_shape=[1, 3]),
-      }, [[1]]
-
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    input_units = 2
-    cell_units = [4, 2]
-    n_classes = 2
-
-    def rnn_cell_fn(mode):
-      del mode  # unused
-      cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units]
-      return rnn_cell.MultiRNNCell(cells)
-
-    est = rnn.RNNClassifier(
-        sequence_feature_columns=[embed],
-        rnn_cell_fn=rnn_cell_fn,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    est.train(input_fn=train_input_fn, steps=num_steps)
-    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
-
-  def _testExampleWeight(self, n_classes):
-    def train_input_fn():
-      return {
-          'tokens':
-              sparse_tensor.SparseTensor(
-                  values=['the', 'cat', 'sat', 'dog', 'barked'],
-                  indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
-                  dense_shape=[2, 3]),
-          'w': [[1], [2]],
-      }, [[1], [0]]
-
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    input_units = 2
-
-    cell_units = [4, 2]
-    est = rnn.RNNClassifier(
-        num_units=cell_units,
-        sequence_feature_columns=[embed],
-        n_classes=n_classes,
-        weight_column='w',
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    est.train(input_fn=train_input_fn, steps=num_steps)
-    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
-
-  def testBinaryClassWithExampleWeight(self):
-    self._testExampleWeight(n_classes=2)
-
-  def testMultiClassWithExampleWeight(self):
-    self._testExampleWeight(n_classes=4)
-
-  def testBinaryClassFromCheckpoint(self):
-    initial_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=initial_global_step,
-        model_dir=self._model_dir)
-
-    def train_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2.],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-      }, [[0], [1]]
-
-    # Uses same checkpoint and examples as testBinaryClassEvaluationMetrics.
-    # See that test for loss calculation.
-    mock_optimizer = self._mock_optimizer(expected_loss=0.559831)
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=2,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-    est.train(input_fn=train_input_fn, steps=10)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-
-  def testMultiClassFromCheckpoint(self):
-    initial_global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
-        logits_biases=[0.3, 0.4, 0.5],
-        global_step=initial_global_step,
-        model_dir=self._model_dir)
-
-    def train_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2., 7.],
-                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
-                  dense_shape=[2, 2]),
-      }, [[0], [1]]
-
-    # Uses same checkpoint and examples as testMultiClassEvaluationMetrics.
-    # See that test for loss calculation.
-    mock_optimizer = self._mock_optimizer(expected_loss=1.331465)
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=3,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-    est.train(input_fn=train_input_fn, steps=10)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-
-
-def sorted_key_dict(unsorted_dict):
-  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
-
-
-class RNNClassifierEvaluationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def testBinaryClassEvaluationMetrics(self):
-    global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=global_step,
-        model_dir=self._model_dir)
-
-    def eval_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2.],
-                  indices=[[0, 0], [0, 1], [1, 0]],
-                  dense_shape=[2, 2]),
-      }, [[0], [1]]
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=2,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(eval_input_fn, steps=1)
-
-    # Uses identical numbers to testMultiExamplesWithDifferentLength.
-    # See that test for logits calculation.
-    # logits = [[-0.603282], [0.019719]]
-    # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]]
-    # loss = -label * ln(p) - (1 - label) * ln(1 - p)
-    #      = [[0.436326], [0.683335]]
-    # sum_over_batch_size = (0.436326 + 0.683335)/2
-    expected_metrics = {
-        ops.GraphKeys.GLOBAL_STEP:
-            global_step,
-        metric_keys.MetricKeys.LOSS:
-            0.559831,
-        metric_keys.MetricKeys.LOSS_MEAN:
-            0.559831,
-        metric_keys.MetricKeys.ACCURACY:
-            1.0,
-        metric_keys.MetricKeys.PREDICTION_MEAN:
-            0.429262,
-        metric_keys.MetricKeys.LABEL_MEAN:
-            0.5,
-        metric_keys.MetricKeys.ACCURACY_BASELINE:
-            0.5,
-        # With default threshold of 0.5, the model is a perfect classifier.
-        metric_keys.MetricKeys.RECALL:
-            1.0,
-        metric_keys.MetricKeys.PRECISION:
-            1.0,
-        # Positive example is scored above negative, so AUC = 1.0.
-        metric_keys.MetricKeys.AUC:
-            1.0,
-        metric_keys.MetricKeys.AUC_PR:
-            1.0,
-    }
-    self.assertAllClose(
-        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
-
-  def testMultiClassEvaluationMetrics(self):
-    global_step = 100
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
-        logits_biases=[0.3, 0.4, 0.5],
-        global_step=global_step,
-        model_dir=self._model_dir)
-
-    def eval_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5., 2., 7.],
-                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
-                  dense_shape=[2, 2]),
-      }, [[0], [1]]
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=3,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(eval_input_fn, steps=1)
-
-    # Uses identical numbers to testMultiExampleMultiDim.
-    # See that test for logits calculation.
-    # logits = [[-0.603282, 0.777708, 0.569756],
-    #           [-1.247356, 1.017018, 0.574481]]
-    # logits_exp = exp(logits) / (1 + exp(logits))
-    #            = [[0.547013, 2.176468, 1.767836],
-    #               [0.287263, 2.764937, 1.776208]]
-    # softmax_probabilities = logits_exp / logits_exp.sum()
-    #                       = [[0.121793, 0.484596, 0.393611],
-    #                          [0.059494, 0.572639, 0.367866]]
-    # loss = -1. * log(softmax[label])
-    #      = [[2.105432], [0.557500]]
-    # sum_over_batch_size = (2.105432 + 0.557500)/2
-    expected_metrics = {
-        ops.GraphKeys.GLOBAL_STEP: global_step,
-        metric_keys.MetricKeys.LOSS: 1.331465,
-        metric_keys.MetricKeys.LOSS_MEAN: 1.331466,
-        metric_keys.MetricKeys.ACCURACY: 0.5,
-    }
-
-    self.assertAllClose(
-        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
-
-
-class RNNClassifierPredictionTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def testBinaryClassPredictions(self):
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1.], [1.]],
-        logits_biases=[0.3],
-        global_step=0,
-        model_dir=self._model_dir)
-
-    def predict_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5.],
-                  indices=[[0, 0], [0, 1]],
-                  dense_shape=[1, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    label_vocabulary = ['class_0', 'class_1']
-
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=2,
-        label_vocabulary=label_vocabulary,
-        model_dir=self._model_dir)
-    # Uses identical numbers to testOneDimLogits.
-    # See that test for logits calculation.
-    # logits = [-0.603282]
-    # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593]
-    # probabilities = [0.646407, 0.353593]
-    # class_ids = argmax(probabilities) = [0]
-    predictions = next(est.predict(predict_input_fn))
-    self.assertAllClose([-0.603282],
-                        predictions[prediction_keys.PredictionKeys.LOGITS])
-    self.assertAllClose([0.353593],
-                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
-    self.assertAllClose(
-        [0.646407, 0.353593],
-        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-    self.assertAllClose([0],
-                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-    self.assertEqual([b'class_0'],
-                     predictions[prediction_keys.PredictionKeys.CLASSES])
-
-  def testMultiClassPredictions(self):
-    create_checkpoint(
-        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
-        rnn_biases=[.2, .5],
-        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
-        logits_biases=[0.3, 0.4, 0.5],
-        global_step=0,
-        model_dir=self._model_dir)
-
-    def predict_input_fn():
-      return {
-          'price':
-              sparse_tensor.SparseTensor(
-                  values=[10., 5.],
-                  indices=[[0, 0], [0, 1]],
-                  dense_shape=[1, 2]),
-      }
-
-    sequence_feature_columns = [
-        seq_fc.sequence_numeric_column('price', shape=(1,))]
-    label_vocabulary = ['class_0', 'class_1', 'class_2']
-
-    est = rnn.RNNClassifier(
-        num_units=[2],
-        sequence_feature_columns=sequence_feature_columns,
-        n_classes=3,
-        label_vocabulary=label_vocabulary,
-        model_dir=self._model_dir)
-    # Uses identical numbers to testMultiDimLogits.
-    # See that test for logits calculation.
-    # logits = [-0.603282, 0.777708, 0.569756]
-    # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836]
-    # softmax_probabilities = logits_exp / logits_exp.sum()
-    #                       = [0.121793, 0.484596, 0.393611]
-    # class_ids = argmax(probabilities) = [1]
-    predictions = next(est.predict(predict_input_fn))
-    self.assertAllClose([-0.603282, 0.777708, 0.569756],
-                        predictions[prediction_keys.PredictionKeys.LOGITS])
-    self.assertAllClose(
-        [0.121793, 0.484596, 0.393611],
-        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-    self.assertAllClose([1],
-                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-    self.assertEqual([b'class_1'],
-                     predictions[prediction_keys.PredictionKeys.CLASSES])
-
-
-class BaseRNNClassificationIntegrationTest(object):
-
-  def __init__(self, _create_estimator_fn):
-    self._create_estimator_fn = _create_estimator_fn
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, feature_columns, train_input_fn, eval_input_fn,
-                          predict_input_fn, n_classes, batch_size):
-    cell_units = [4, 2]
-    est = self._create_estimator_fn(feature_columns, n_classes, cell_units,
-                                    self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUATE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-    # EXPORT
-    feature_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns,
-        label_key='label',
-        label_dtype=dtypes.int64)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def testNumpyInputFn(self):
-    """Tests complete flow with numpy_input_fn."""
-    n_classes = 3
-    batch_size = 10
-    words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept']
-    # Numpy only supports dense input, so all examples will have same length.
-    # TODO(b/73160931): Update test when support for prepadded data exists.
-    sequence_length = 3
-
-    features = []
-    for _ in range(batch_size):
-      sentence = random.sample(words, sequence_length)
-      features.append(sentence)
-
-    x_data = np.array(features)
-    y_data = np.random.randint(n_classes, size=batch_size)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'tokens': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'tokens': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'tokens': x_data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    feature_columns = [embed]
-
-    self._test_complete_flow(
-        feature_columns=feature_columns,
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        n_classes=n_classes,
-        batch_size=batch_size)
-
-  def testParseExampleInputFn(self):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    n_classes = 3
-    batch_size = 10
-    words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept']
-
-    _, examples_file = tempfile.mkstemp()
-    writer = python_io.TFRecordWriter(examples_file)
-    for _ in range(batch_size):
-      sequence_length = random.randint(1, len(words))
-      sentence = random.sample(words, sequence_length)
-      label = random.randint(0, n_classes - 1)
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'tokens':
-                  feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
-                      value=sentence)),
-              'label':
-                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
-                      value=[label])),
-          }))
-      writer.write(example.SerializeToString())
-    writer.close()
-
-    col = seq_fc.sequence_categorical_column_with_hash_bucket(
-        'tokens', hash_bucket_size=10)
-    embed = fc.embedding_column(col, dimension=2)
-    feature_columns = [embed]
-    feature_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns,
-        label_key='label',
-        label_dtype=dtypes.int64)
-
-    def _train_input_fn():
-      dataset = readers.make_batched_features_dataset(
-          examples_file, batch_size, feature_spec)
-      return dataset.map(lambda features: (features, features.pop('label')))
-    def _eval_input_fn():
-      dataset = readers.make_batched_features_dataset(
-          examples_file, batch_size, feature_spec, num_epochs=1)
-      return dataset.map(lambda features: (features, features.pop('label')))
-    def _predict_input_fn():
-      dataset = readers.make_batched_features_dataset(
-          examples_file, batch_size, feature_spec, num_epochs=1)
-      def features_fn(features):
-        features.pop('label')
-        return features
-      return dataset.map(features_fn)
-
-    self._test_complete_flow(
-        feature_columns=feature_columns,
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        n_classes=n_classes,
-        batch_size=batch_size)
-
-
-def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir):
-  return rnn.RNNClassifier(
-      num_units=cell_units,
-      sequence_feature_columns=feature_columns,
-      n_classes=n_classes,
-      model_dir=model_dir)
-
-
-class RNNClassifierIntegrationTest(BaseRNNClassificationIntegrationTest,
-                                   test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    BaseRNNClassificationIntegrationTest.__init__(self, _rnn_classifier_fn)
-
-
-def _rnn_estimator_fn(feature_columns, n_classes, cell_units, model_dir):
-  return rnn.RNNEstimator(
-      head=head_lib.multi_class_head(n_classes=n_classes),
-      num_units=cell_units,
-      sequence_feature_columns=feature_columns,
-      model_dir=model_dir)
-
-
-class RNNEstimatorIntegrationTest(BaseRNNClassificationIntegrationTest,
-                                  test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    BaseRNNClassificationIntegrationTest.__init__(self, _rnn_estimator_fn)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
index ce98e9987e..abd579ac7f 100644
--- a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
+++ b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
@@ -12,438 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Class that creates an Estimator from a SavedModel."""
+"""saved_model_estimator python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export as export_lib
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import constants
-from tensorflow.python.saved_model import loader_impl
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import training_util
-
-
-class SavedModelEstimator(estimator_lib.Estimator):
-  """Create an Estimator from a SavedModel.
-
-  Only SavedModels exported with
-  `tf.contrib.estimator.export_all_saved_models()` or
-  `tf.estimator.Estimator.export_savedmodel()` are supported for this class.
-
-  Example with `tf.estimator.DNNClassifier`:
-
-  **Step 1: Create and train DNNClassifier.**
-
-  ```python
-  feature1 = tf.feature_column.embedding_column(
-      tf.feature_column.categorical_column_with_vocabulary_list(
-          key='feature1', vocabulary_list=('green', 'yellow')), dimension=1)
-  feature2 = tf.feature_column.numeric_column(key='feature2', default_value=0.0)
-
-  classifier = tf.estimator.DNNClassifier(
-      hidden_units=[4,2], feature_columns=[feature1, feature2])
-
-  def input_fn():
-    features = {'feature1': tf.constant(['green', 'green', 'yellow']),
-                'feature2': tf.constant([3.5, 4.2, 6.1])}
-    label = tf.constant([1., 0., 0.])
-    return tf.data.Dataset.from_tensors((features, label)).repeat()
-
-  classifier.train(input_fn=input_fn, steps=10)
-  ```
-
-  **Step 2: Export classifier.**
-  First, build functions that specify the expected inputs.
-
-  ```python
-  # During train and evaluation, both the features and labels should be defined.
-  supervised_input_receiver_fn = (
-      tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
-          {'feature1': tf.placeholder(dtype=tf.string, shape=[None]),
-           'feature2': tf.placeholder(dtype=tf.float32, shape=[None])},
-          tf.placeholder(dtype=tf.float32, shape=[None])))
-
-  # During predict mode, expect to receive a `tf.Example` proto, so a parsing
-  # function is used.
-  serving_input_receiver_fn = (
-      tf.estimator.export.build_parsing_serving_input_receiver_fn(
-          tf.feature_column.make_parse_example_spec([feature1, feature2])))
-  ```
-
-  Next, export the model as a SavedModel. A timestamped directory will be
-  created (for example `/tmp/export_all/1234567890`).
-
-  ```python
-  # Option 1: Save all modes (train, eval, predict)
-  export_dir = tf.contrib.estimator.export_all_saved_models(
-      classifier, '/tmp/export_all',
-      {tf.estimator.ModeKeys.TRAIN: supervised_input_receiver_fn,
-       tf.estimator.ModeKeys.EVAL: supervised_input_receiver_fn,
-       tf.estimator.ModeKeys.PREDICT: serving_input_receiver_fn})
-
-  # Option 2: Only export predict mode
-  export_dir = classifier.export_savedmodel(
-      '/tmp/export_predict', serving_input_receiver_fn)
-  ```
-
-  **Step 3: Create a SavedModelEstimator from the exported SavedModel.**
-
-  ```python
-  est = tf.contrib.estimator.SavedModelEstimator(export_dir)
-
-  # If all modes were exported, you can immediately evaluate and predict, or
-  # continue training. Otherwise only predict is available.
-  eval_results = est.evaluate(input_fn=input_fn, steps=1)
-  print(eval_results)
-
-  est.train(input_fn=input_fn, steps=20)
-
-  def predict_input_fn():
-    example = tf.train.Example()
-    example.features.feature['feature1'].bytes_list.value.extend(['yellow'])
-    example.features.feature['feature2'].float_list.value.extend([1.])
-    return {'inputs':tf.constant([example.SerializeToString()])}
-
-  predictions = est.predict(predict_input_fn)
-  print(next(predictions))
-  ```
-  """
-
-  def __init__(self, saved_model_dir, model_dir=None):
-    """Initialize a SavedModelEstimator.
-
-    The SavedModelEstimator loads its model function and variable values from
-    the graphs defined in the SavedModel. There is no option to pass in
-    `RunConfig` or `params` arguments, because the model function graph is
-    defined statically in the SavedModel.
-
-    Args:
-      saved_model_dir: Directory containing SavedModel protobuf and subfolders.
-      model_dir: Directory to save new checkpoints during training.
-
-    Raises:
-      NotImplementedError: If a DistributionStrategy is defined in the config.
-        Unless the SavedModelEstimator is subclassed, this shouldn't happen.
-    """
-    checkpoint = estimator_lib._get_saved_model_ckpt(saved_model_dir)  # pylint: disable=protected-access
-    vars_to_warm_start = [name for name, _ in
-                          checkpoint_utils.list_variables(checkpoint)]
-    warm_start_settings = estimator_lib.WarmStartSettings(
-        ckpt_to_initialize_from=checkpoint,
-        vars_to_warm_start=vars_to_warm_start)
-
-    super(SavedModelEstimator, self).__init__(
-        model_fn=self._model_fn_from_saved_model, model_dir=model_dir,
-        warm_start_from=warm_start_settings)
-    if self._train_distribution or self._eval_distribution:
-      raise NotImplementedError(
-          'SavedModelEstimator currently does not support '
-          'DistributionStrategy.')
-    self.saved_model_dir = saved_model_dir
-    self.saved_model_loader = loader_impl.SavedModelLoader(saved_model_dir)
-    self._available_modes = self._extract_available_modes()
-
-  def _extract_available_modes(self):
-    """Return list of modes found in SavedModel."""
-    available_modes = []
-    logging.info('Checking available modes for SavedModelEstimator.')
-    for mode in [model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL,
-                 model_fn_lib.ModeKeys.PREDICT]:
-      try:
-        self._get_meta_graph_def_for_mode(mode)
-      except RuntimeError:
-        logging.warning('%s mode not found in SavedModel.' % mode)
-        continue
-
-      if self._get_signature_def_for_mode(mode) is not None:
-        available_modes.append(mode)
-
-    logging.info('Available modes for Estimator: %s' % available_modes)
-    return available_modes
-
-  def _validate_mode(self, mode):
-    """Make sure that mode can be run using the SavedModel."""
-    if mode not in self._available_modes:
-      raise RuntimeError('%s mode is not available in the SavedModel. Use '
-                         'saved_model_cli to check that the Metagraph for this '
-                         'mode has been exported.' % mode)
-
-  def _get_meta_graph_def_for_mode(self, mode):
-    tags = model_fn_lib.EXPORT_TAG_MAP[mode]
-    return self.saved_model_loader.get_meta_graph_def_from_tags(tags)
-
-  def _get_signature_def_for_mode(self, mode):
-    meta_graph_def = self._get_meta_graph_def_for_mode(mode)
-    sig_def_key = (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-                   if mode == model_fn_lib.ModeKeys.PREDICT else mode)
-    if sig_def_key not in meta_graph_def.signature_def:
-      logging.warning('Metagraph for mode %s was found, but SignatureDef with'
-                      ' key \"%s\" is missing.' % (mode, sig_def_key))
-      return None
-    return meta_graph_def.signature_def[sig_def_key]
-
-  def _create_and_assert_global_step(self, graph):
-    # Do nothing here. The global step variable will be created/loaded from the
-    # SavedModel. If a global step variable were created here, the result
-    # will be two duplicate global step variables, causing issues during
-    # the warm-start phase.
-    # Due to the global variable being created in the model function, this may
-    # cause issues when running DistributionStrategy. Thus, DistributionStrategy
-    # is not yet supported with SavedModelEstimator.
-    return None
-
-  def _model_fn_from_saved_model(self, features, labels, mode):
-    """Load a SavedModel graph and return an EstimatorSpec."""
-    # TODO(kathywu): Model function loads placeholders from the graph. Calling
-    # export_all_saved_models creates another placeholder for the inputs, on top
-    # of the original placeholders. There should be a way to avoid this.
-    self._validate_mode(mode)
-
-    g = ops.get_default_graph()
-    if  training_util.get_global_step(g) is not None:
-      raise RuntimeError(
-          'Graph must not contain a global step tensor before the SavedModel is'
-          ' loaded. Please make sure that the input function does not create a '
-          'global step.')
-
-    # Extract SignatureDef for information about the input and output tensors.
-    signature_def = self._get_signature_def_for_mode(mode)
-
-    # Generate input map for replacing the inputs in the SavedModel graph with
-    # the provided features and labels.
-    input_map = _generate_input_map(signature_def, features, labels)
-
-    # Create a list of the names of output tensors. When the graph is loaded,
-    # names of the output tensors may be remapped. This ensures that the correct
-    # tensors are returned in the EstimatorSpec.
-    output_tensor_names = [
-        value.name for value in six.itervalues(signature_def.outputs)]
-
-    # Load the graph. `output_tensors` contains output `Tensors` in the same
-    # same order as the `output_tensor_names` list.
-    tags = model_fn_lib.EXPORT_TAG_MAP[mode]
-    _, output_tensors = self.saved_model_loader.load_graph(
-        g, tags, input_map=input_map, return_elements=output_tensor_names)
-
-    # Create a scaffold from the MetaGraphDef that contains ops to initialize
-    # the graph. This should mirror the steps from _add_meta_graph_for_mode(),
-    # which creates a MetaGraphDef from the EstimatorSpec's scaffold.
-    scaffold = monitored_session.Scaffold(
-        local_init_op=loader_impl._get_main_op_tensor(  # pylint: disable=protected-access
-            self._get_meta_graph_def_for_mode(mode)))
-
-    # Ensure that a global step tensor has been created.
-    global_step_tensor = training_util.get_global_step(g)
-    training_util.assert_global_step(global_step_tensor)
-
-    # Extract values to return in the EstimatorSpec.
-    output_map = dict(zip(output_tensor_names, output_tensors))
-    outputs = {key: output_map[value.name]
-               for key, value in six.iteritems(signature_def.outputs)}
-
-    loss, predictions, metrics = _validate_and_extract_outputs(
-        mode, outputs, signature_def.method_name)
-
-    train_op = ops.get_collection(constants.TRAIN_OP_KEY)
-    if len(train_op) > 1:
-      raise RuntimeError('Multiple ops found in the train_op collection.')
-    train_op = None if not train_op else train_op[0]
-
-    _clear_saved_model_collections()
-    return model_fn_lib.EstimatorSpec(
-        scaffold=scaffold,
-        mode=mode,
-        loss=loss,
-        train_op=train_op,
-        predictions=predictions,
-        eval_metric_ops=metrics)
-
-
-def _clear_saved_model_collections():
-  """Clear collections that are expected empty when exporting a SavedModel.
-
-  The SavedModel builder uses these collections to track ops necessary to
-  restore the graph state. These collections are expected to be empty before
-  MetaGraphs are added to the builder.
-  """
-  del ops.get_collection_ref(constants.ASSETS_KEY)[:]
-  del ops.get_collection_ref(constants.LEGACY_INIT_OP_KEY)[:]
-  del ops.get_collection_ref(constants.MAIN_OP_KEY)[:]
-  del ops.get_collection_ref(constants.TRAIN_OP_KEY)[:]
-
-
-def _generate_input_map(signature_def, features, labels):
-  """Return dict mapping an input tensor name to a feature or label tensor.
-
-  Args:
-    signature_def: SignatureDef loaded from SavedModel
-    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the features to be passed to the model.
-    labels: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the labels to be passed to the model. May be
-      `None`.
-
-  Returns:
-    dict mapping string names of inputs to features or labels tensors
-
-  Raises:
-    ValueError: if SignatureDef inputs are not completely mapped by the input
-      features and labels.
-  """
-  # pylint: disable=protected-access
-  if not isinstance(features, dict):
-    features = {export_lib._SINGLE_FEATURE_DEFAULT_NAME: features}
-  if labels is not None and not isinstance(labels, dict):
-    labels = {export_lib._SINGLE_LABEL_DEFAULT_NAME: labels}
-  # pylint: enable=protected-access
-
-  inputs = signature_def.inputs
-  input_map = {}
-  for key, tensor_info in six.iteritems(inputs):
-    input_name = tensor_info.name
-    if ':' in input_name:
-      input_name = input_name[:input_name.find(':')]
-
-    # When tensors are used as control inputs for operations, their names are
-    # prepended with a '^' character in the GraphDef. To handle possible control
-    # flow edge cases, control input names must be included in the input map.
-    control_dependency_name = '^' + input_name
-
-    if key in features:
-      _check_same_dtype_and_shape(features[key], tensor_info, key)
-      input_map[input_name] = input_map[control_dependency_name] = features[key]
-    elif labels is not None and key in labels:
-      _check_same_dtype_and_shape(labels[key], tensor_info, key)
-      input_map[input_name] = input_map[control_dependency_name] = labels[key]
-    else:
-      raise ValueError(
-          'Key \"%s\" not found in features or labels passed in to the model '
-          'function. All required keys: %s' % (key, inputs.keys()))
-
-  return input_map
-
-
-def _check_same_dtype_and_shape(tensor, tensor_info, name):
-  """Validate that tensor has the same properties as the TensorInfo proto.
-
-  Args:
-    tensor: a `Tensor` object.
-    tensor_info: a `TensorInfo` proto.
-    name: Name of the input (to identify Tensor if an error is raised).
-
-  Raises:
-    ValueError: If the tensor shape or dtype don't match the TensorInfo
-  """
-  dtype_error = (tensor.dtype != dtypes.DType(tensor_info.dtype))
-  shape_error = not tensor.shape.is_compatible_with(tensor_info.tensor_shape)
-
-  if dtype_error or shape_error:
-    msg = 'Tensor shape and/or dtype validation failed for input %s:' % name
-    if dtype_error:
-      msg += ('\n\tExpected dtype: %s, Got: %s'
-              % (dtypes.DType(tensor_info.dtype), tensor.dtype))
-    if shape_error:
-      msg += ('\n\tExpected shape: %s, Got: %s'
-              % (tensor_shape.TensorShape(tensor_info.tensor_shape),
-                 tensor.shape))
-
-    raise ValueError(msg)
-
-
-def _extract_eval_metrics(output_dict):
-  """Return a eval metric dict extracted from the output_dict.
-
-  Eval metrics consist of a value tensor and an update op. Both must be in the
-  passed-in tensor dictionary for an eval metric to be added to the returned
-  dictionary.
-
-  Args:
-    output_dict: a dict that maps strings to tensors.
-
-  Returns:
-    dict mapping strings to (value, update_op) tuples.
-  """
-  # pylint: disable=protected-access
-  metric_ops = {}
-  separator_char = export_output._SupervisedOutput._SEPARATOR_CHAR
-
-  for key, tensor in six.iteritems(output_dict):
-    split_key = key.split(separator_char)
-
-    # The metric name may contain the separator character, so recreate its name.
-    metric_name = separator_char.join(split_key[:-1])
-
-    if split_key[0] == export_output._SupervisedOutput.METRICS_NAME:
-      # If the key ends with the value suffix, and there is a corresponding
-      # key ending with the update_op suffix, then add tensors to metrics dict.
-      if split_key[-1] == export_output._SupervisedOutput.METRIC_VALUE_SUFFIX:
-        update_op = ''.join(
-            [metric_name, separator_char,
-             export_output._SupervisedOutput.METRIC_UPDATE_SUFFIX])
-        if update_op in output_dict:
-          update_op_tensor = output_dict[update_op]
-          metric_ops[metric_name] = (tensor, update_op_tensor)
-
-  # pylint: enable=protected-access
-  return metric_ops
-
-
-def _validate_and_extract_outputs(mode, output_dict, method_name):
-  """Extract values from SignatureDef output dictionary.
-
-  Args:
-    mode: One of the modes enumerated in `tf.estimator.ModeKeys`.
-    output_dict: dict of string SignatureDef keys to `Tensor`.
-    method_name: Method name of the SignatureDef as a string.
-
-  Returns:
-    Tuple of (
-      loss: `Tensor` object,
-      predictions: dictionary mapping string keys to `Tensor` objects,
-      metrics: dictionary mapping string keys to a tuple of two `Tensor` objects
-    )
-
-  Raises:
-    RuntimeError: raised if SignatureDef has an invalid method name for the mode
-  """
-  # pylint: disable=protected-access
-  loss, predictions, metrics = None, None, None
-
-  if mode == model_fn_lib.ModeKeys.PREDICT:
-    predictions = output_dict
-  else:
-    # Validate that the SignatureDef's method name matches the expected name for
-    # the given mode.
-    expected_method_name = signature_constants.SUPERVISED_TRAIN_METHOD_NAME
-    if mode == model_fn_lib.ModeKeys.EVAL:
-      expected_method_name = signature_constants.SUPERVISED_EVAL_METHOD_NAME
-    if method_name != expected_method_name:
-      raise RuntimeError(
-          'Invalid SignatureDef method name for mode %s.\n\tExpected: %s\n\t'
-          'Got: %s\nPlease ensure that the SavedModel was exported with '
-          '`tf.contrib.estimator.export_all_saved_models()`.' %
-          (mode, expected_method_name, method_name))
+from tensorflow_estimator.contrib.estimator.python.estimator import saved_model_estimator
 
-    # Extract loss, metrics and predictions from the output dict.
-    loss = output_dict[export_output._SupervisedOutput.LOSS_NAME]
-    metrics = _extract_eval_metrics(output_dict)
-    predictions = {
-        key: value for key, value in six.iteritems(output_dict)
-        if key.split(export_output._SupervisedOutput._SEPARATOR_CHAR)[0] == (
-            export_output._SupervisedOutput.PREDICTIONS_NAME)}
+# Include attrs that start with single underscore.
+saved_model_estimator.__all__ = [
+    s for s in dir(saved_model_estimator) if not s.startswith('__')
+]
 
-  # pylint: enable=protected-access
-  return loss, predictions, metrics
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.contrib.estimator.python.estimator.saved_model_estimator import *
diff --git a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
deleted file mode 100644
index 718da1367c..0000000000
--- a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for SavedModelEstimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-from tensorflow.contrib.estimator.python.estimator import export as contrib_export
-from tensorflow.contrib.estimator.python.estimator import saved_model_estimator
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import training
-
-
-def dummy_input_fn():
-  return dataset_ops.Dataset.from_tensors((
-      {'x': constant_op.constant([[1], [-2]], dtype=dtypes.int64)},
-      constant_op.constant([[4], [-3]], dtype=dtypes.float32))).repeat()
-
-
-def dummy_input_fn_features_only():
-  return dataset_ops.Dataset.from_tensors(
-      {'x': constant_op.constant([[5], [6]], dtype=dtypes.int64)}).repeat()
-
-
-def dummy_supervised_receiver_fn():
-  feature_spec = {
-      'x': array_ops.placeholder(
-          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
-      }
-  label_spec = array_ops.placeholder(
-      dtype=dtypes.float32, shape=[2, 1], name='truth')
-  return export.build_raw_supervised_input_receiver_fn(
-      feature_spec, label_spec)
-
-
-def dummy_serving_receiver_fn():
-  feature_spec = {'x': array_ops.placeholder(
-      dtype=dtypes.int64, shape=(2, 1), name='feature_x'),}
-  return export.build_raw_serving_input_receiver_fn(feature_spec)
-
-
-def model_fn_diff_modes(features, labels, mode):
-  _, _ = features, labels
-  v = variables.Variable(21, name='some_var')
-  train_op = None
-  loss = constant_op.constant(104)
-  if mode == model_fn_lib.ModeKeys.TRAIN:
-    loss = constant_op.constant(105)
-    predictions = constant_op.constant([501])
-    train_op = control_flow_ops.group(
-        state_ops.assign_add(training.get_global_step(), 1),
-        state_ops.assign_add(v, 3))
-  elif mode == model_fn_lib.ModeKeys.EVAL:
-    loss = constant_op.constant(106)
-    predictions = constant_op.constant([502])
-  else:
-    loss = constant_op.constant(107)
-    predictions = constant_op.constant([503])
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      loss=loss,
-      train_op=train_op,
-      eval_metric_ops={
-          'abs_err': metrics_lib.mean_absolute_error(
-              constant_op.constant(0), predictions)},
-      predictions=predictions)
-
-
-class SavedModelEstimatorTest(test.TestCase):
-
-  def setUp(self):
-    self.tmpdirs = []
-
-  def tearDown(self):
-    for tmpdir in self.tmpdirs:
-      # gfile.DeleteRecursively fails in the windows cmake test, so use shutil.
-      shutil.rmtree(tmpdir, ignore_errors=True)
-    self.tmpdirs = []
-
-  def _get_tmp_dir(self):
-    tmpdir = tempfile.mkdtemp()
-    self.tmpdirs.append(tmpdir)
-    return tmpdir
-
-  def _export_estimator(self, train=True, evaluate=True, predict=True,
-                        model_fn=model_fn_diff_modes):
-    est = estimator.Estimator(model_fn, self._get_tmp_dir())
-    est.train(input_fn=dummy_input_fn, steps=10)
-
-    input_receiver_fn_map = {}
-    if train:
-      input_receiver_fn_map[model_fn_lib.ModeKeys.TRAIN] = (
-          dummy_supervised_receiver_fn())
-    if evaluate:
-      input_receiver_fn_map[model_fn_lib.ModeKeys.EVAL] = (
-          dummy_supervised_receiver_fn())
-    if predict:
-      input_receiver_fn_map[model_fn_lib.ModeKeys.PREDICT] = (
-          dummy_serving_receiver_fn())
-
-    export_base_path = self._get_tmp_dir()
-    export_dir = contrib_export.export_all_saved_models(
-        est, export_base_path, input_receiver_fn_map)
-    return export_dir
-
-  def test_load_all_modes(self):
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-    sme.train(input_fn=dummy_input_fn, steps=1)
-    sme.train(input_fn=dummy_input_fn, steps=2)
-    self.assertEqual(13, sme.get_variable_value('global_step'))
-    self.assertEqual(60, sme.get_variable_value('some_var'))
-
-    eval_results = sme.evaluate(dummy_input_fn, steps=5)
-
-    self.assertEqual(13, eval_results['global_step'])
-    self.assertEqual(106, eval_results['loss'])
-    self.assertEqual(502, eval_results['metrics/abs_err'])
-
-    predictions = next(sme.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-  def test_load_all_modes_no_train(self):
-    """Ensure that all functions can be used without requiring a ckpt."""
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-    eval_results = sme.evaluate(dummy_input_fn, steps=5)
-    self.assertEqual(10, eval_results['global_step'])
-    self.assertEqual(106, eval_results['loss'])
-    self.assertEqual(502, eval_results['metrics/abs_err'])
-
-    predictions = next(sme.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-  def test_partial_exported_estimator(self):
-    sme1 = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(train=False, predict=False), self._get_tmp_dir())
-    sme1.evaluate(dummy_input_fn, steps=5)
-    with self.assertRaisesRegexp(RuntimeError, 'train mode is not available'):
-      sme1.train(input_fn=dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(RuntimeError, 'infer mode is not available'):
-      next(sme1.predict(dummy_input_fn_features_only))
-
-    sme2 = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(evaluate=False), self._get_tmp_dir())
-    sme2.train(input_fn=dummy_input_fn, steps=1)
-    next(sme2.predict(dummy_input_fn_features_only))
-    with self.assertRaisesRegexp(RuntimeError, 'eval mode is not available'):
-      sme2.evaluate(dummy_input_fn, steps=5)
-
-  def test_with_incorrect_input(self):
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-
-    def bad_shape_input_fn():
-      return dataset_ops.Dataset.from_tensors((
-          {'x': constant_op.constant([1, 2], dtype=dtypes.int64)},
-          constant_op.constant([1, 2], dtype=dtypes.float32)))
-
-    with self.assertRaisesRegexp(ValueError, 'Expected shape'):
-      sme.train(bad_shape_input_fn, steps=1)
-
-    def bad_dtype_input_fn():
-      return dataset_ops.Dataset.from_tensors((
-          {'x': constant_op.constant([[1], [1]], dtype=dtypes.int32)},
-          constant_op.constant([[1], [1]], dtype=dtypes.int64)))
-
-    with self.assertRaisesRegexp(ValueError, 'Expected dtype'):
-      sme.train(bad_dtype_input_fn, steps=1)
-
-  def test_input_fn_with_global_step(self):
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-
-    def bad_input_fn():
-      training.get_or_create_global_step()
-      return dataset_ops.Dataset.from_tensors((
-          {'x': constant_op.constant([[1], [1]], dtype=dtypes.int64)},
-          constant_op.constant([[1], [1]], dtype=dtypes.float32)))
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 'Graph must not contain a global step tensor'):
-      sme.train(bad_input_fn, steps=1)
-
-  def test_re_export_saved_model_serving_only(self):
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-    sme.train(dummy_input_fn, steps=3)
-    self.assertEqual(13, sme.get_variable_value('global_step'))
-    self.assertEqual(60, sme.get_variable_value('some_var'))
-
-    predictions = next(sme.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-    # Export SavedModel, and test that the variable and prediction values are
-    # the same.
-    sme_export_dir = sme.export_savedmodel(
-        self._get_tmp_dir(), dummy_serving_receiver_fn())
-
-    sme2 = saved_model_estimator.SavedModelEstimator(
-        sme_export_dir, self._get_tmp_dir())
-    self.assertEqual(60, sme.get_variable_value('some_var'))
-    self.assertEqual(13, sme.get_variable_value('global_step'))
-
-    predictions = next(sme2.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-  def test_re_export_saved_model(self):
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(), self._get_tmp_dir())
-    self.assertDictEqual(
-        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 10},
-        sme.evaluate(dummy_input_fn, steps=1))
-
-    sme.train(dummy_input_fn, steps=3)
-    self.assertDictEqual(
-        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 13},
-        sme.evaluate(dummy_input_fn, steps=1))
-    self.assertEqual(60, sme.get_variable_value('some_var'))
-
-    predictions = next(sme.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-    # Export SavedModel for all modes
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: dummy_supervised_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: dummy_supervised_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: dummy_serving_receiver_fn()}
-    sme_export_dir = contrib_export.export_all_saved_models(
-        sme, self._get_tmp_dir(), input_receiver_fn_map)
-
-    sme2 = saved_model_estimator.SavedModelEstimator(
-        sme_export_dir, self._get_tmp_dir())
-    self.assertDictEqual(
-        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 13},
-        sme.evaluate(dummy_input_fn, steps=1))
-    self.assertEqual(60, sme.get_variable_value('some_var'))
-
-    sme.train(dummy_input_fn, steps=7)
-    self.assertEqual(20, sme.get_variable_value('global_step'))
-
-    predictions = next(sme2.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'output': 503}, predictions)
-
-  def test_load_saved_model_from_serving_only(self):
-    def model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant([103]),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([502]),
-          export_outputs={'test': export_output.ClassificationOutput(
-              constant_op.constant([[32.]]))})
-
-    est = estimator.Estimator(model_fn, self._get_tmp_dir())
-    est.train(input_fn=dummy_input_fn, steps=10)
-
-    def serving_input_receiver_fn():
-      return export.ServingInputReceiver(
-          {'test-features': constant_op.constant([[1], [1]])},
-          array_ops.placeholder(dtype=dtypes.string))
-
-    export_dir = est.export_savedmodel(
-        self._get_tmp_dir(), serving_input_receiver_fn)
-
-    sme = saved_model_estimator.SavedModelEstimator(
-        export_dir, self._get_tmp_dir())
-
-    def input_fn():
-      return {'inputs': constant_op.constant('someinputstr')}
-
-    prediction = next(sme.predict(input_fn))
-    self.assertDictEqual({'scores': 32}, prediction)
-
-  def test_with_local_init_op(self):
-    def model_fn(features, labels, mode):
-      _, _ = features, labels
-      v = variables.Variable(21, name='some_var')
-      scaffold = monitored_session.Scaffold(
-          local_init_op=state_ops.assign_add(v, -3).op
-      )
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          scaffold=scaffold,
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          loss=array_ops.identity(v))
-    export_dir = self._export_estimator(predict=False, model_fn=model_fn)
-    sme = saved_model_estimator.SavedModelEstimator(
-        export_dir, self._get_tmp_dir())
-
-    eval_results1 = sme.evaluate(dummy_input_fn, steps=2)
-    self.assertEqual(15, eval_results1['loss'])
-
-    sme.train(dummy_input_fn, steps=1)
-    self.assertEqual(15, sme.get_variable_value('some_var'))
-
-    eval_results2 = sme.evaluate(dummy_input_fn, steps=5)
-    self.assertEqual(12, eval_results2['loss'])
-
-  def test_with_working_input_fn(self):
-    def model_fn(features, labels, mode):
-      loss = None
-      if labels is not None:
-        loss = labels[0][0] + labels[1][0]
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=loss,
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={'features_0': array_ops.identity([features['x'][0][0]]),
-                       'features_1': array_ops.identity([features['x'][1][0]])})
-
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(model_fn=model_fn), self._get_tmp_dir())
-    eval_results = sme.evaluate(dummy_input_fn, steps=1)
-    self.assertEqual(1, eval_results['loss'])
-
-    predictions = next(sme.predict(dummy_input_fn_features_only))
-    self.assertDictEqual({'features_0': 5, 'features_1': 6}, predictions)
-
-  def test_control_dependency(self):
-    # Control dependencies are saved with "^" appended to the start of the input
-    # name. The input map must include control dependencies as well.
-    def model_fn(features, labels, mode):
-      _ = labels
-      with ops.control_dependencies([features['x']]):
-        loss = features['x'][1][0]
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=loss,
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-    sme = saved_model_estimator.SavedModelEstimator(
-        self._export_estimator(train=False, predict=False, model_fn=model_fn),
-        self._get_tmp_dir())
-    sme.evaluate(dummy_input_fn, steps=1)  # Should run without error
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 1c4c5951df..7363a112af 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -2,8 +2,6 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-load("//tensorflow:tensorflow.bzl", "py_test")
-
 py_library(
     name = "estimator_py",
     srcs = [
@@ -21,6 +19,7 @@ py_library(
         ":dnn",
         ":dnn_linear_combined",
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":export",
         ":exporter",
         ":inputs",
@@ -39,6 +38,7 @@ py_library(
     srcs = ["exporter.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":gc",
         ":metric_keys",
         ":util",
@@ -46,34 +46,12 @@ py_library(
     ],
 )
 
-py_test(
-    name = "exporter_test",
-    size = "small",
-    srcs = ["exporter_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":estimator",
-        ":exporter",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "gc",
     srcs = ["gc.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
-py_test(
-    name = "gc_test",
-    size = "small",
-    srcs = ["gc_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":gc",
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
@@ -83,30 +61,20 @@ py_library(
     srcs = ["model_fn.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":export_output",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "model_fn_test",
-    size = "small",
-    srcs = ["model_fn_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":export_output",
-        ":model_fn",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "training",
     srcs = ["training.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":exporter",
         ":run_config",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -114,51 +82,24 @@ py_library(
     ],
 )
 
-py_test(
-    name = "training_test",
-    size = "medium",
-    srcs = ["training_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":dnn",
-        ":estimator",
-        ":exporter",
-        ":inputs",
-        ":run_config",
-        ":training",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "run_config",
     srcs = ["run_config.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "run_config_test",
-    size = "small",
-    srcs = ["run_config_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":run_config",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "baseline",
     srcs = ["canned/baseline.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         ":optimizers",
@@ -167,31 +108,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "baseline_test",
-    size = "medium",
-    srcs = ["canned/baseline_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "noasan",  # test flakily times out in asan mode.
-        "notsan",  # b/67510291
-        "optonly",  # flakily times out in fastbuild
-    ],
-    deps = [
-        ":baseline",
-        ":estimator",
-        ":export_export",
-        ":metric_keys",
-        ":numpy_io",
-        ":pandas_io",
-        ":run_config",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "boosted_trees",
     srcs = ["canned/boosted_trees.py"],
@@ -199,66 +115,33 @@ py_library(
     deps = [
         ":boosted_trees_utils",
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
-py_test(
-    name = "boosted_trees_test",
-    size = "medium",
-    srcs = ["canned/boosted_trees_test.py"],
-    shard_count = 2,
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",
-        "notap",
-        "optonly",
-    ],
-    deps = [
-        ":boosted_trees",
-        ":inputs",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "boosted_trees_utils",
     srcs = ["canned/boosted_trees_utils.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
-py_test(
-    name = "boosted_trees_utils_test",
-    size = "medium",
-    srcs = ["canned/boosted_trees_utils_test.py"],
-    shard_count = 2,
-    srcs_version = "PY2AND3",
-    tags = [
-        "optonly",
-    ],
-    deps = [
-        ":boosted_trees",
-        ":inputs",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_library(
     name = "dnn",
     srcs = ["canned/dnn.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         ":optimizers",
@@ -274,6 +157,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":metric_keys",
         ":model_fn",
@@ -286,29 +170,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "dnn_test",
-    size = "medium",
-    srcs = ["canned/dnn_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",  # b/67510291
-    ],
-    deps = [
-        ":dnn",
-        ":dnn_testing_utils",
-        ":export_export",
-        ":numpy_io",
-        ":pandas_io",
-        ":prediction_keys",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "dnn_linear_combined",
     srcs = ["canned/dnn_linear_combined.py"],
@@ -316,6 +177,7 @@ py_library(
     deps = [
         ":dnn",
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":linear",
         ":model_fn",
@@ -325,30 +187,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "dnn_linear_combined_test",
-    size = "medium",
-    srcs = ["canned/dnn_linear_combined_test.py"],
-    shard_count = 8,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",  # b/67510291
-    ],
-    deps = [
-        ":dnn_linear_combined",
-        ":dnn_testing_utils",
-        ":export_export",
-        ":linear_testing_utils",
-        ":numpy_io",
-        ":pandas_io",
-        ":prediction_keys",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "@absl_py//absl/testing:parameterized",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "util",
     srcs = [
@@ -356,23 +194,11 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
-py_test(
-    name = "util_test",
-    srcs = ["util_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],  # b/67510291
-    deps = [
-        ":util",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "estimator",
     srcs = [
@@ -380,6 +206,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":model_fn",
         ":run_config",
@@ -390,25 +217,6 @@ py_library(
     ],
 )
 
-py_test(
-    name = "estimator_test",
-    srcs = ["estimator_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],  # b/67510291
-    deps = [
-        ":estimator",
-        ":export_export",
-        ":export_output",
-        ":model_fn",
-        ":numpy_io",
-        ":run_config",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "parsing_utils",
     srcs = [
@@ -416,42 +224,23 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "parsing_utils_test",
-    srcs = ["canned/parsing_utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":parsing_utils",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "export_output",
     srcs = ["export/export_output.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "export_output_test",
-    size = "small",
-    srcs = ["export/export_output_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":export_output",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "export",
     srcs = [
@@ -459,6 +248,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":export_output",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -472,30 +262,19 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":util",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "export_test",
-    size = "small",
-    srcs = ["export/export_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":export_export",
-        ":export_output",
-        ":util",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "head",
     srcs = ["canned/head.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":export_output",
         ":metric_keys",
         ":model_fn",
@@ -505,31 +284,12 @@ py_library(
     ],
 )
 
-py_test(
-    name = "head_test",
-    size = "medium",
-    srcs = ["canned/head_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    deps = [
-        ":dnn_testing_utils",
-        ":head",
-        ":metric_keys",
-        ":model_fn",
-        ":numpy_io",
-        ":prediction_keys",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
 py_library(
     name = "inputs",
     srcs = ["inputs/inputs.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":numpy_io",
         ":pandas_io",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -542,6 +302,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":head",
         ":optimizers",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -556,6 +317,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":linear",
         ":metric_keys",
@@ -567,28 +329,12 @@ py_library(
     ],
 )
 
-py_test(
-    name = "linear_test",
-    size = "medium",
-    srcs = ["canned/linear_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_pip",
-        "notsan",  # b/67510291
-    ],
-    deps = [
-        ":linear",
-        ":linear_testing_utils",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "metric_keys",
     srcs = ["canned/metric_keys.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":model_fn",
     ],
 )
@@ -598,57 +344,29 @@ py_library(
     srcs = ["inputs/numpy_io.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         ":inputs_queues",
     ],
 )
 
-py_test(
-    name = "numpy_io_test",
-    size = "small",
-    srcs = ["inputs/numpy_io_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":numpy_io",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "optimizers",
     srcs = ["canned/optimizers.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "optimizers_test",
-    size = "small",
-    srcs = ["canned/optimizers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":optimizers",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "pandas_io",
     srcs = ["inputs/pandas_io.py"],
     srcs_version = "PY2AND3",
-    deps = [":inputs_queues"],
-)
-
-py_test(
-    name = "pandas_io_test",
-    size = "small",
-    srcs = ["inputs/pandas_io_test.py"],
-    srcs_version = "PY2AND3",
     deps = [
-        ":pandas_io",
-        "//tensorflow:tensorflow_py_no_contrib",
+        ":expect_tensorflow_estimator_installed",
+        ":inputs_queues",
     ],
 )
 
@@ -656,7 +374,9 @@ py_library(
     name = "prediction_keys",
     srcs = ["canned/prediction_keys.py"],
     srcs_version = "PY2AND3",
-    deps = [],
+    deps = [
+        ":expect_tensorflow_estimator_installed",
+    ],
 )
 
 py_library(
@@ -668,41 +388,19 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
-py_test(
-    name = "feeding_functions_test",
-    size = "small",
-    srcs = [
-        "inputs/queues/feeding_functions_test.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":inputs_queues",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
-py_test(
-    name = "feeding_queue_runner_test",
-    size = "small",
-    srcs = ["inputs/queues/feeding_queue_runner_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":inputs_queues",
-        "//tensorflow:tensorflow_py_no_contrib",
-    ],
-)
-
 py_library(
     name = "keras",
     srcs = ["keras.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
+        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":model_fn",
         ":run_config",
@@ -710,61 +408,9 @@ py_library(
     ],
 )
 
-py_test(
-    name = "keras_test",
-    size = "large",
-    srcs = ["keras_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    tags = [
-        "no_windows",
-        "notsan",  # b/67510291
-    ],
-    deps = [
-        ":keras",
-        ":numpy_io",
-        ":run_config",
-        "//tensorflow:tensorflow_py_no_contrib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "expect_numpy_installed",
-    # This is a dummy rule used as a numpy dependency in open-source.
-    # We expect numpy to already be installed on the system, e.g. via
-    # `pip install numpy`
-    visibility = ["//visibility:public"],
-)
-
-py_library(
-    name = "expect_pandas_installed",
-    # This is a dummy rule used as a numpy dependency in open-source.
-    # We expect pandas to already be installed on the system, e.g. via
-    # `pip install pandas`
-    visibility = ["//visibility:public"],
-)
-
-py_library(
-    name = "expect_h5py_installed",
-    # This is a dummy rule used as a numpy dependency in open-source.
-    # We expect h5py to already be installed on the system, e.g. via
-    # `pip install h5py'
-    visibility = ["//visibility:public"],
-)
-
-py_library(
-    name = "expect_six_installed",
-    # This is a dummy rule used as a numpy dependency in open-source.
-    # We expect six to already be installed on the system, e.g. via
-    # `pip install six`
-    visibility = ["//visibility:public"],
-)
-
 py_library(
-    name = "expect_tensorflow_installed",
-    # This is a dummy rule used as a numpy dependency in open-source.
-    # We expect tensorflow to already be installed on the system, e.g. via
-    # `pip install tensorflow` or `pip install tensorflow_gpu`
+    name = "expect_tensorflow_estimator_installed",
+    # This is a dummy rule used as a dependency in open-source.
+    # We expect tensorflow_estimator to already be installed.
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/python/estimator/__init__.py b/tensorflow/python/estimator/__init__.py
index 8cf8df567f..03d310a6cf 100644
--- a/tensorflow/python/estimator/__init__.py
+++ b/tensorflow/python/estimator/__init__.py
@@ -12,14 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Import Estimator APIs.
+"""estimator python module.
 
-Note: This file is imported by the create_estimator_api genrule. It must
-transitively import all Estimator modules/packages for their @estimator_export
-annotations to generate the public Estimator python API.
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import tensorflow.python.estimator.estimator_lib
+from tensorflow_estimator.python import estimator
+
+# Include attrs that start with single underscore.
+estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
+
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator import *
diff --git a/tensorflow/python/estimator/canned/__init__.py b/tensorflow/python/estimator/canned/__init__.py
index e69de29bb2..fd46937941 100644
--- a/tensorflow/python/estimator/canned/__init__.py
+++ b/tensorflow/python/estimator/canned/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""canned python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_estimator.python.estimator import canned
+
+# Include attrs that start with single underscore.
+canned.__all__ = [s for s in dir(canned) if not s.startswith('__')]
+
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned import *
diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py
index 20c7a69b7c..9dd64bad0a 100644
--- a/tensorflow/python/estimator/canned/baseline.py
+++ b/tensorflow/python/estimator/canned/baseline.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,365 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Baseline estimators.
+"""baseline python module.
 
-Baseline estimators are bias-only estimators that can be used for debugging
-and as simple baselines.
-
-Example:
-
-```
-# Build BaselineClassifier
-classifier = BaselineClassifier(n_classes=3)
-
-# Input builders
-def input_fn_train(): # returns x, y (where y represents label's class index).
-  pass
-
-def input_fn_eval(): # returns x, y (where y represents label's class index).
-  pass
-
-# Fit model.
-classifier.train(input_fn=input_fn_train)
-
-# Evaluate cross entropy between the test and train labels.
-loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
-
-# predict outputs the probability distribution of the classes as seen in
-# training.
-predictions = classifier.predict(new_samples)
-```
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.training import training_util
-from tensorflow.python.util.tf_export import estimator_export
-
-# The default learning rate of 0.3 is a historical artifact of the initial
-# implementation, but seems a reasonable choice.
-_LEARNING_RATE = 0.3
-
-
-def _get_weight_column_key(weight_column):
-  if weight_column is None:
-    return None
-  if isinstance(weight_column, six.string_types):
-    return weight_column
-  if not isinstance(weight_column, feature_column_lib._NumericColumn):  # pylint: disable=protected-access
-    raise TypeError('Weight column must be either a string or _NumericColumn.'
-                    ' Given type: {}.'.format(type(weight_column)))
-  return weight_column.key()
-
-
-def _baseline_logit_fn_builder(num_outputs, weight_column=None):
-  """Function builder for a baseline logit_fn.
-
-  Args:
-    num_outputs: Number of outputs for the model.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-       weights. It will be multiplied by the loss of the example.
-  Returns:
-    A logit_fn (see below).
-  """
-
-  def baseline_logit_fn(features):
-    """Baseline model logit_fn.
-
-    The baseline model simply learns a bias, so the output logits are a
-    `Variable` with one weight for each output that learns the bias for the
-    corresponding output.
-
-    Args:
-      features: The first item returned from the `input_fn` passed to `train`,
-        `evaluate`, and `predict`. This should be a single `Tensor` or dict with
-        `Tensor` values.
-    Returns:
-      A `Tensor` representing the logits.
-    """
-    size_checks = []
-    batch_size = None
-
-    weight_column_key = _get_weight_column_key(weight_column)
-
-    # The first dimension is assumed to be a batch size and must be consistent
-    # among all of the features.
-    for key, feature in features.items():
-      # Skip weight_column to ensure we don't add size checks to it.
-      # These would introduce a dependency on the weight at serving time.
-      if key == weight_column_key:
-        continue
-      first_dim = array_ops.shape(feature)[0]
-      if batch_size is None:
-        batch_size = first_dim
-      else:
-        size_checks.append(check_ops.assert_equal(batch_size, first_dim))
-
-    with ops.control_dependencies(size_checks):
-      with variable_scope.variable_scope('baseline'):
-        bias = variable_scope.get_variable('bias', shape=[num_outputs],
-                                           initializer=init_ops.Zeros)
-        return math_ops.multiply(bias, array_ops.ones([batch_size,
-                                                       num_outputs]))
-
-  return baseline_logit_fn
-
-
-def _baseline_model_fn(features, labels, mode, head, optimizer,
-                       weight_column=None, config=None):
-  """Model_fn for baseline models.
-
-  Args:
-    features: `Tensor` or dict of `Tensor` (depends on data passed to `train`).
-    labels: `Tensor` of labels that are compatible with the `Head` instance.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `Head` instance.
-    optimizer: String, `tf.Optimizer` object, or callable that creates the
-      optimizer to use for training. If not specified, will use `FtrlOptimizer`
-      with a default learning rate of 0.3.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-       weights. It will be multiplied by the loss of the example.
-    config: `RunConfig` object to configure the runtime settings.
-
-  Raises:
-    KeyError: If weight column is specified but not present.
-    ValueError: If features is an empty dictionary.
-
-  Returns:
-    An `EstimatorSpec` instance.
-  """
-  del config  # Unused.
-
-  logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column)
-  logits = logit_fn(features)
-
-  def train_op_fn(loss):
-    opt = optimizers.get_optimizer_instance(
-        optimizer, learning_rate=_LEARNING_RATE)
-    return opt.minimize(loss, global_step=training_util.get_global_step())
-
-  return head.create_estimator_spec(
-      features=features,
-      mode=mode,
-      logits=logits,
-      labels=labels,
-      train_op_fn=train_op_fn)
-
-
-@estimator_export('estimator.BaselineClassifier')
-class BaselineClassifier(estimator.Estimator):
-  """A classifier that can establish a simple baseline.
-
-  This classifier ignores feature values and will learn to predict the average
-  value of each label. For single-label problems, this will predict the
-  probability distribution of the classes as seen in the labels. For multi-label
-  problems, this will predict the fraction of examples that are positive for
-  each class.
-
-  Example:
-
-  ```python
-
-  # Build BaselineClassifier
-  classifier = BaselineClassifier(n_classes=3)
-
-  # Input builders
-  def input_fn_train: # returns x, y (where y represents label's class index).
-    pass
-
-  def input_fn_eval: # returns x, y (where y represents label's class index).
-    pass
-
-  # Fit model.
-  classifier.train(input_fn=input_fn_train)
-
-  # Evaluate cross entropy between the test and train labels.
-  loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
-
-  # predict outputs the probability distribution of the classes as seen in
-  # training.
-  predictions = classifier.predict(new_samples)
-
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-     `key=weight_column` whose value is a `Tensor`.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               model_dir=None,
-               n_classes=2,
-               weight_column=None,
-               label_vocabulary=None,
-               optimizer='Ftrl',
-               config=None,
-               loss_reduction=losses.Reduction.SUM):
-    """Initializes a BaselineClassifier instance.
-
-    Args:
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      n_classes: number of label classes. Default is binary classification.
-        It must be greater than 1. Note: Class labels are integers representing
-        the class index (i.e. values from 0 to n_classes-1). For arbitrary
-        label values (e.g. string labels), convert to class indices first.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-         weights. It will be multiplied by the loss of the example.
-      label_vocabulary: Optional list of strings with size `[n_classes]`
-        defining the label vocabulary. Only supported for `n_classes` > 2.
-      optimizer: String, `tf.Optimizer` object, or callable that creates the
-        optimizer to use for training. If not specified, will use
-        `FtrlOptimizer` with a default learning rate of 0.3.
-      config: `RunConfig` object to configure the runtime settings.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-    Returns:
-      A `BaselineClassifier` estimator.
-
-    Raises:
-      ValueError: If `n_classes` < 2.
-    """
-    if n_classes == 2:
-      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-    else:
-      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
-          n_classes, weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-    def _model_fn(features, labels, mode, config):
-      return _baseline_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          optimizer=optimizer,
-          weight_column=weight_column,
-          config=config)
-    super(BaselineClassifier, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config)
-
-
-@estimator_export('estimator.BaselineRegressor')
-class BaselineRegressor(estimator.Estimator):
-  """A regressor that can establish a simple baseline.
-
-  This regressor ignores feature values and will learn to predict the average
-  value of each label.
-
-  Example:
-
-  ```python
-
-  # Build BaselineRegressor
-  regressor = BaselineRegressor()
-
-  # Input builders
-  def input_fn_train: # returns x, y (where y is the label).
-    pass
-
-  def input_fn_eval: # returns x, y (where y is the label).
-    pass
-
-  # Fit model.
-  regressor.train(input_fn=input_fn_train)
-
-  # Evaluate squared-loss between the test and train targets.
-  loss = regressor.evaluate(input_fn=input_fn_eval)["loss"]
-
-  # predict outputs the mean value seen during training.
-  predictions = regressor.predict(new_samples)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-     `key=weight_column` whose value is a `Tensor`.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               model_dir=None,
-               label_dimension=1,
-               weight_column=None,
-               optimizer='Ftrl',
-               config=None,
-               loss_reduction=losses.Reduction.SUM):
-    """Initializes a BaselineRegressor instance.
+from tensorflow_estimator.python.estimator.canned import baseline
 
-    Args:
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      label_dimension: Number of regression targets per example. This is the
-        size of the last dimension of the labels and logits `Tensor` objects
-        (typically, these have shape `[batch_size, label_dimension]`).
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-         weights. It will be multiplied by the loss of the example.
-      optimizer: String, `tf.Optimizer` object, or callable that creates the
-        optimizer to use for training. If not specified, will use
-        `FtrlOptimizer` with a default learning rate of 0.3.
-      config: `RunConfig` object to configure the runtime settings.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-    Returns:
-      A `BaselineRegressor` estimator.
-    """
+# Include attrs that start with single underscore.
+baseline.__all__ = [s for s in dir(baseline) if not s.startswith('__')]
 
-    head = head_lib._regression_head(  # pylint: disable=protected-access
-        label_dimension=label_dimension,
-        weight_column=weight_column,
-        loss_reduction=loss_reduction)
-    def _model_fn(features, labels, mode, config):
-      return _baseline_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          optimizer=optimizer,
-          config=config)
-    super(BaselineRegressor, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.baseline import *
diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py
deleted file mode 100644
index 1df7216ba6..0000000000
--- a/tensorflow/python/estimator/canned/baseline_test.py
+++ /dev/null
@@ -1,1558 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for baseline.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.estimator.canned import baseline
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import optimizer
-from tensorflow.python.training import queue_runner
-from tensorflow.python.training import saver
-
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-# pylint rules which are disabled by default for test files.
-# pylint: disable=invalid-name,protected-access,missing-docstring
-
-# Names of variables created by model.
-BIAS_NAME = 'baseline/bias'
-
-
-def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
-  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-    expected = ops.convert_to_tensor(expected, name='expected')
-    actual = ops.convert_to_tensor(actual, name='actual')
-    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
-    rtol = ops.convert_to_tensor(rtol, name='rtol')
-    return check_ops.assert_less(
-        rdiff,
-        rtol,
-        data=('Condition expected =~ actual did not hold element-wise:'
-              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
-              'rtol = ', rtol,),
-        name=scope)
-
-
-def save_variables_to_ckpt(model_dir):
-  init_all_op = [variables.global_variables_initializer()]
-  with tf_session.Session() as sess:
-    sess.run(init_all_op)
-    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-
-
-def queue_parsed_features(feature_map):
-  tensors_to_enqueue = []
-  keys = []
-  for key, tensor in six.iteritems(feature_map):
-    keys.append(key)
-    tensors_to_enqueue.append(tensor)
-  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
-  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
-  queue_runner.add_queue_runner(
-      queue_runner.QueueRunner(input_queue,
-                               [input_queue.enqueue(tensors_to_enqueue)]))
-  dequeued_tensors = input_queue.dequeue()
-  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
-
-
-def sorted_key_dict(unsorted_dict):
-  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
-
-
-def sigmoid(x):
-  return 1 / (1 + np.exp(-1.0 * x))
-
-
-def _baseline_regressor_fn(*args, **kwargs):
-  return baseline.BaselineRegressor(*args, **kwargs)
-
-
-def _baseline_classifier_fn(*args, **kwargs):
-  return baseline.BaselineClassifier(*args, **kwargs)
-
-
-# Tests for Baseline Regressor.
-
-
-# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
-class BaselineRegressorEvaluationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_evaluation_for_simple_data(self):
-    with ops.Graph().as_default():
-      variables.Variable([13.0], name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
-    eval_metrics = baseline_regressor.evaluate(
-        input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
-
-    # Logit is bias = 13, while label is 10. Loss is 3**2 = 9.
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 9.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_batch(self):
-    """Tests evaluation for batch_size==2."""
-    with ops.Graph().as_default():
-      variables.Variable([13.0], name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
-    eval_metrics = baseline_regressor.evaluate(
-        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
-
-    # Logit is bias = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the sum over batch = 9 + 9 = 18
-    # Average loss is the average over batch = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 18.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_weights(self):
-    """Tests evaluation with weights."""
-    with ops.Graph().as_default():
-      variables.Variable([13.0], name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    def _input_fn():
-      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
-      labels = ((10.,), (10.,))
-      return features, labels
-
-    baseline_regressor = _baseline_regressor_fn(
-        weight_column='weights',
-        model_dir=self._model_dir)
-    eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1)
-
-    # Logit is bias = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
-    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 27.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_for_multi_dimensions(self):
-    label_dim = 2
-    with ops.Graph().as_default():
-      variables.Variable([46.0, 58.0], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_regressor = _baseline_regressor_fn(
-        label_dimension=label_dim,
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'age': np.array([[2., 4., 5.]]),
-        },
-        y=np.array([[46., 58.]]),
-        batch_size=1,
-        num_epochs=None,
-        shuffle=False)
-    eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1)
-
-    self.assertItemsEqual(
-        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
-         metric_keys.MetricKeys.PREDICTION_MEAN,
-         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
-        eval_metrics.keys())
-
-    # Logit is bias which is [46, 58]
-    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
-
-
-class BaselineRegressorPredictTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_1d(self):
-    """Tests predict when all variables are one-dimensional."""
-    with ops.Graph().as_default():
-      variables.Variable([.2], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[2.]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = baseline_regressor.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # x * weight + bias = 2. * 10. + .2 = 20.2
-    self.assertAllClose([[.2]], predicted_scores)
-
-  def testMultiDim(self):
-    """Tests predict when all variables are multi-dimenstional."""
-    batch_size = 2
-    label_dimension = 3
-    with ops.Graph().as_default():
-      variables.Variable(  # shape=[label_dimension]
-          [.2, .4, .6], name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    baseline_regressor = _baseline_regressor_fn(
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        # x shape=[batch_size, x_dim]
-        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predictions = baseline_regressor.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # score = bias, shape=[batch_size, label_dimension]
-    self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
-                        predicted_scores)
-
-
-class BaselineRegressorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, prediction_length):
-    feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = _baseline_regressor_fn(
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    # learn y = x
-    est.train(train_input_fn, steps=200)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array(
-        [x['predictions'] for x in est.predict(predict_input_fn)])
-    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-  def test_pandas_input_fn(self):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-
-    # Pandas DataFrame natually supports 1 dim data only.
-    label_dimension = 1
-    input_dimension = label_dimension
-    batch_size = 10
-    data = np.array([1., 2., 3., 4.], dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(data)
-    prediction_length = 4
-
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-  def test_input_fn_from_parse_example(self):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum)),
-              'y':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum[:label_dimension])),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-
-class BaselineRegressorTrainingTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s:0' % BIAS_NAME
-    ]
-
-    def _minimize(loss, global_step=None, var_list=None):
-      trainable_vars = var_list or ops.get_collection(
-          ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(expected_var_names,
-                            [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-      assert_loss = assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def _assert_checkpoint(self,
-                         label_dimension,
-                         expected_global_step,
-                         expected_bias=None):
-    shapes = {
-        name: shape
-        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(expected_global_step,
-                     checkpoint_utils.load_variable(self._model_dir,
-                                                    ops.GraphKeys.GLOBAL_STEP))
-
-    self.assertEqual([label_dimension], shapes[BIAS_NAME])
-    if expected_bias is not None:
-      self.assertEqual(expected_bias,
-                       checkpoint_utils.load_variable(self._model_dir,
-                                                      BIAS_NAME))
-
-  def testFromScratchWithDefaultOptimizer(self):
-    # Create BaselineRegressor.
-    label = 5.
-    age = 17
-    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    baseline_regressor.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps)
-
-  def testTrainWithOneDimLabel(self):
-    label_dimension = 1
-    batch_size = 20
-    est = _baseline_regressor_fn(
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
-    self.assertEqual((batch_size,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(label_dimension=1, expected_global_step=200)
-
-  def testTrainWithOneDimWeight(self):
-    label_dimension = 1
-    batch_size = 20
-    est = _baseline_regressor_fn(
-        label_dimension=label_dimension,
-        weight_column='w',
-        model_dir=self._model_dir)
-
-    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
-    self.assertEqual((batch_size,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1,
-           'w': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(label_dimension=1, expected_global_step=200)
-
-  def testFromScratch(self):
-    # Create BaselineRegressor.
-    label = 5.
-    age = 17
-    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
-    mock_optimizer = self._mock_optimizer(expected_loss=25.)
-    baseline_regressor = _baseline_regressor_fn(
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    baseline_regressor.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        label_dimension=1,
-        expected_global_step=num_steps,
-        expected_bias=[0.])
-
-  def testFromCheckpoint(self):
-    # Create initial checkpoint.
-    bias = 7.0
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable([bias], name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = bias = 6.
-    # loss = (logits - label)^2 = (7 - 5)^2 = 4
-    mock_optimizer = self._mock_optimizer(expected_loss=4.)
-    baseline_regressor = _baseline_regressor_fn(
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    baseline_regressor.train(
-        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        label_dimension=1,
-        expected_global_step=initial_global_step + num_steps,
-        expected_bias=[bias])
-
-  def testFromCheckpointMultiBatch(self):
-    # Create initial checkpoint.
-    bias = 5.0
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable([bias], name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = bias
-    # logits[0] = 5.
-    # logits[1] = 5.
-    # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4
-    mock_optimizer = self._mock_optimizer(expected_loss=4.)
-    baseline_regressor = _baseline_regressor_fn(
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    baseline_regressor.train(
-        input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))),
-        steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        label_dimension=1,
-        expected_global_step=initial_global_step + num_steps,
-        expected_bias=bias)
-
-
-# Tests for Baseline Classifier.
-
-
-class BaselineClassifierTrainingTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s:0' % BIAS_NAME
-    ]
-
-    def _minimize(loss, global_step):
-      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(
-          expected_var_names,
-          [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        return state_ops.assign_add(global_step, 1).op
-      assert_loss = assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        return state_ops.assign_add(global_step, 1).op
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer.Optimizer,
-        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def _assert_checkpoint(
-      self, n_classes, expected_global_step, expected_bias=None):
-    logits_dimension = n_classes if n_classes > 2 else 1
-
-    shapes = {
-        name: shape for (name, shape) in
-        checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(
-        expected_global_step,
-        checkpoint_utils.load_variable(
-            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
-
-    self.assertEqual([logits_dimension], shapes[BIAS_NAME])
-    if expected_bias is not None:
-      self.assertAllEqual(expected_bias,
-                          checkpoint_utils.load_variable(
-                              self._model_dir, BIAS_NAME))
-
-  def _testFromScratchWithDefaultOptimizer(self, n_classes):
-    label = 0
-    age = 17
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self._assert_checkpoint(n_classes, num_steps)
-
-  def testBinaryClassesFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=2)
-
-  def testMultiClassesFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=4)
-
-  def _testTrainWithTwoDimsLabel(self, n_classes):
-    batch_size = 20
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    data_rank_2 = np.array([[0], [1]])
-    self.assertEqual((2,), data_rank_1.shape)
-    self.assertEqual((2, 1), data_rank_2.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_2,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithTwoDimsLabel(self):
-    self._testTrainWithTwoDimsLabel(n_classes=2)
-
-  def testMultiClassesTrainWithTwoDimsLabel(self):
-    self._testTrainWithTwoDimsLabel(n_classes=4)
-
-  def _testTrainWithOneDimLabel(self, n_classes):
-    batch_size = 20
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    self.assertEqual((2,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithOneDimLabel(self):
-    self._testTrainWithOneDimLabel(n_classes=2)
-
-  def testMultiClassesTrainWithOneDimLabel(self):
-    self._testTrainWithOneDimLabel(n_classes=4)
-
-  def _testTrainWithTwoDimsWeight(self, n_classes):
-    batch_size = 20
-
-    est = baseline.BaselineClassifier(
-        weight_column='w',
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    data_rank_2 = np.array([[0], [1]])
-    self.assertEqual((2,), data_rank_1.shape)
-    self.assertEqual((2, 1), data_rank_2.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1,
-        batch_size=batch_size, num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithTwoDimsWeight(self):
-    self._testTrainWithTwoDimsWeight(n_classes=2)
-
-  def testMultiClassesTrainWithTwoDimsWeight(self):
-    self._testTrainWithTwoDimsWeight(n_classes=4)
-
-  def _testTrainWithOneDimWeight(self, n_classes):
-    batch_size = 20
-
-    est = baseline.BaselineClassifier(
-        weight_column='w',
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    self.assertEqual((2,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1,
-        batch_size=batch_size, num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithOneDimWeight(self):
-    self._testTrainWithOneDimWeight(n_classes=2)
-
-  def testMultiClassesTrainWithOneDimWeight(self):
-    self._testTrainWithOneDimWeight(n_classes=4)
-
-  def _testFromScratch(self, n_classes):
-    label = 1
-    age = 17
-    # For binary classifier:
-    #   loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are
-    #   all zero initially) and label = 1 so,
-    #      loss = 1 * -log ( sigmoid(logits) ) = 0.69315
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label) where logits are all 0s (weights are
-    #   all zero initially) and label = 1 so,
-    #      loss = 1 * -log ( 1.0 / n_classes )
-    # For this particular test case, as logits are same, the formula
-    # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases.
-    mock_optimizer = self._mock_optimizer(
-        expected_loss=-1 * math.log(1.0/n_classes))
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=num_steps,
-        expected_bias=[0.] if n_classes == 2 else [.0] * n_classes)
-
-  def testBinaryClassesFromScratch(self):
-    self._testFromScratch(n_classes=2)
-
-  def testMultiClassesFromScratch(self):
-    self._testFromScratch(n_classes=4)
-
-  def _testFromCheckpoint(self, n_classes):
-    # Create initial checkpoint.
-    label = 1
-    age = 17
-    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # For binary classifier:
-    #   logits = bias = -1.
-    #   loss = sigmoid_cross_entropy(logits, label)
-    #   so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label)
-    #   where logits = bias and label = 1
-    #   so, loss = 1 * -log ( softmax(logits)[1] )
-    if n_classes == 2:
-      expected_loss = 1.3133
-    else:
-      logits = bias
-      logits_exp = np.exp(logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_loss = -1 * math.log(softmax[label])
-
-    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=initial_global_step + num_steps,
-        expected_bias=bias)
-
-  def testBinaryClassesFromCheckpoint(self):
-    self._testFromCheckpoint(n_classes=2)
-
-  def testMultiClassesFromCheckpoint(self):
-    self._testFromCheckpoint(n_classes=4)
-
-  def _testFromCheckpointFloatLabels(self, n_classes):
-    """Tests float labels for binary classification."""
-    # Create initial checkpoint.
-    if n_classes > 2:
-      return
-    label = 0.8
-    age = 17
-    bias = [-1.0]
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = bias = -1.
-    # loss = sigmoid_cross_entropy(logits, label)
-    # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617
-    mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-
-  def testBinaryClassesFromCheckpointFloatLabels(self):
-    self._testFromCheckpointFloatLabels(n_classes=2)
-
-  def testMultiClassesFromCheckpointFloatLabels(self):
-    self._testFromCheckpointFloatLabels(n_classes=4)
-
-  def _testFromCheckpointMultiBatch(self, n_classes):
-    # Create initial checkpoint.
-    label = [1, 0]
-    age = [17, 18.5]
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # For binary classifier:
-    #   logits = bias
-    #   logits[0] = -1.
-    #   logits[1] = -1.
-    #   loss = sigmoid_cross_entropy(logits, label)
-    #   so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133
-    #       loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label)
-    #   where logits = bias and label = [1, 0]
-    #   so, loss = 1 * -log ( softmax(logits)[label] )
-    if n_classes == 2:
-      expected_loss = (1.3133 + 0.3132)
-    else:
-      # Expand logits since batch_size=2
-      logits = bias * np.ones(shape=(2, 1))
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      expected_loss = expected_loss_0 + expected_loss_1
-
-    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
-
-    est = baseline.BaselineClassifier(
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': (age)}, (label)),
-        steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=initial_global_step + num_steps,
-        expected_bias=bias)
-
-  def testBinaryClassesFromCheckpointMultiBatch(self):
-    self._testFromCheckpointMultiBatch(n_classes=2)
-
-  def testMultiClassesFromCheckpointMultiBatch(self):
-    self._testFromCheckpointMultiBatch(n_classes=4)
-
-
-class BaselineClassifierEvaluationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _test_evaluation_for_simple_data(self, n_classes):
-    label = 1
-    age = 1.
-
-    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
-
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = _baseline_classifier_fn(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1)
-
-    if n_classes == 2:
-      # Binary classes: loss = -log(sigmoid(-1)) = 1.3133
-      # Prediction = sigmoid(-1) = 0.2689
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: 1.3133,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: 1.3133,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
-          metric_keys.MetricKeys.LABEL_MEAN: 1.,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: 1,
-          metric_keys.MetricKeys.AUC: 0.,
-          metric_keys.MetricKeys.AUC_PR: 1.,
-      }
-    else:
-      # Multi classes: loss = 1 * -log ( softmax(logits)[label] )
-      logits = bias
-      logits_exp = np.exp(logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_loss = -1 * math.log(softmax[label])
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_for_simple_data(self):
-    self._test_evaluation_for_simple_data(n_classes=2)
-
-  def test_multi_classes_evaluation_for_simple_data(self):
-    self._test_evaluation_for_simple_data(n_classes=4)
-
-  def _test_evaluation_batch(self, n_classes):
-    """Tests evaluation for batch_size==2."""
-    label = [1, 0]
-    age = [17., 18.]
-    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = _baseline_classifier_fn(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': (age)}, (label)), steps=1)
-
-    if n_classes == 2:
-      # Logits are (-1., -1.) labels are (1, 0).
-      # Loss is
-      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
-      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
-      # Prediction = sigmoid(-1) = 0.2689
-      expected_loss = 1.3133 + 0.3132
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-          metric_keys.MetricKeys.ACCURACY: 0.5,
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
-          metric_keys.MetricKeys.LABEL_MEAN: 0.5,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
-          metric_keys.MetricKeys.AUC: 0.5,
-          metric_keys.MetricKeys.AUC_PR: 0.75,
-      }
-    else:
-      # Expand logits since batch_size=2
-      logits = bias * np.ones(shape=(2, 1))
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      expected_loss = expected_loss_0 + expected_loss_1
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-          metric_keys.MetricKeys.ACCURACY: 0.5,
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_batch(self):
-    self._test_evaluation_batch(n_classes=2)
-
-  def test_multi_classes_evaluation_batch(self):
-    self._test_evaluation_batch(n_classes=4)
-
-  def _test_evaluation_weights(self, n_classes):
-    """Tests evaluation with weights."""
-
-    label = [1, 0]
-    age = [17., 18.]
-    weights = [1., 2.]
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(
-          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = _baseline_classifier_fn(
-        n_classes=n_classes,
-        weight_column='w',
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1)
-
-    if n_classes == 2:
-      # Logits are (-1., -1.) labels are (1, 0).
-      # Loss is
-      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
-      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
-      #   weights = [1., 2.]
-      expected_loss = 1.3133 * 1. + 0.3132 * 2.
-      loss_mean = expected_loss / (1.0 + 2.0)
-      label_mean = np.average(label, weights=weights)
-      logits = [-1, -1]
-      logistics = sigmoid(np.array(logits))
-      predictions_mean = np.average(logistics, weights=weights)
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
-          metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean,
-          metric_keys.MetricKeys.LABEL_MEAN: label_mean,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: (
-              max(label_mean, 1-label_mean)),
-          metric_keys.MetricKeys.AUC: 0.5,
-          metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.),
-      }
-    else:
-      # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] )
-      # Expand logits since batch_size=2
-      logits = bias * np.ones(shape=(2, 1))
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      loss_mean = np.average([expected_loss_0, expected_loss_1],
-                             weights=weights)
-      expected_loss = loss_mean * np.sum(weights)
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
-          metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_weights(self):
-    self._test_evaluation_weights(n_classes=2)
-
-  def test_multi_classes_evaluation_weights(self):
-    self._test_evaluation_weights(n_classes=4)
-
-
-class BaselineClassifierPredictTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _testPredictions(self, n_classes, label_vocabulary, label_output_fn):
-    """Tests predict when all variables are one-dimensional."""
-    age = 1.
-
-    bias = [10.0] if n_classes == 2 else [10.0] * n_classes
-
-    with ops.Graph().as_default():
-      variables.Variable(bias, name=BIAS_NAME)
-      variables.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = _baseline_classifier_fn(
-        label_vocabulary=label_vocabulary,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'age': np.array([[age]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-
-    if n_classes == 2:
-      scalar_logits = bias[0]
-      two_classes_logits = [0, scalar_logits]
-      two_classes_logits_exp = np.exp(two_classes_logits)
-      softmax = two_classes_logits_exp / two_classes_logits_exp.sum()
-
-      expected_predictions = {
-          'class_ids': [1],
-          'classes': [label_output_fn(1)],
-          'logistic': [sigmoid(np.array(scalar_logits))],
-          'logits': [scalar_logits],
-          'probabilities': softmax,
-      }
-    else:
-      onedim_logits = np.array(bias)
-      class_ids = onedim_logits.argmax()
-      logits_exp = np.exp(onedim_logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_predictions = {
-          'class_ids': [class_ids],
-          'classes': [label_output_fn(class_ids)],
-          'logits': onedim_logits,
-          'probabilities': softmax,
-      }
-
-    self.assertEqual(1, len(predictions))
-    # assertAllClose cannot handle byte type.
-    self.assertEqual(expected_predictions['classes'], predictions[0]['classes'])
-    expected_predictions.pop('classes')
-    predictions[0].pop('classes')
-    self.assertAllClose(sorted_key_dict(expected_predictions),
-                        sorted_key_dict(predictions[0]))
-
-  def testBinaryClassesWithoutLabelVocabulary(self):
-    n_classes = 2
-    self._testPredictions(n_classes,
-                          label_vocabulary=None,
-                          label_output_fn=lambda x: ('%s' % x).encode())
-
-  def testBinaryClassesWithLabelVocabulary(self):
-    n_classes = 2
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=['class_vocab_{}'.format(i)
-                          for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-  def testMultiClassesWithoutLabelVocabulary(self):
-    n_classes = 4
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=None,
-        label_output_fn=lambda x: ('%s' % x).encode())
-
-  def testMultiClassesWithLabelVocabulary(self):
-    n_classes = 4
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=['class_vocab_{}'.format(i)
-                          for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-
-class BaselineClassifierIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
-                          predict_input_fn, input_dimension, prediction_length):
-    feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = _baseline_classifier_fn(
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    # learn y = x
-    est.train(train_input_fn, steps=200)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array(
-        [x['classes'] for x in est.predict(predict_input_fn)])
-    self.assertAllEqual((prediction_length, 1), predictions.shape)
-
-    # EXPORT
-    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def _test_numpy_input_fn(self, n_classes):
-    """Tests complete flow with numpy_input_fn."""
-    input_dimension = 4
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-    target = np.array([1] * batch_size)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=target,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=target,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_numpy_input_fn(self):
-    self._test_numpy_input_fn(n_classes=2)
-
-  def test_multi_classes_numpy_input_fn(self):
-    self._test_numpy_input_fn(n_classes=4)
-
-  def _test_pandas_input_fn(self, n_classes):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-
-    # Pandas DataFrame natually supports 1 dim data only.
-    input_dimension = 1
-    batch_size = 10
-    data = np.array([1., 2., 3., 4.], dtype=np.float32)
-    target = np.array([1, 0, 1, 0], dtype=np.int32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(target)
-    prediction_length = 4
-
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_pandas_input_fn(self):
-    self._test_pandas_input_fn(n_classes=2)
-
-  def test_multi_classes_pandas_input_fn(self):
-    self._test_pandas_input_fn(n_classes=4)
-
-  def _test_input_fn_from_parse_example(self, n_classes):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    input_dimension = 2
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-    target = np.array([1] * batch_size, dtype=np.int64)
-
-    serialized_examples = []
-    for x, y in zip(data, target):
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=x)),
-              'y':
-                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
-                      value=[y])),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_input_fn_from_parse_example(self):
-    self._test_input_fn_from_parse_example(n_classes=2)
-
-  def test_multi_classes_input_fn_from_parse_example(self):
-    self._test_input_fn_from_parse_example(n_classes=4)
-
-
-# Tests for Baseline logit_fn.
-
-
-class BaselineLogitFnTest(test.TestCase):
-
-  def test_basic_logit_correctness(self):
-    """baseline_logit_fn simply returns the bias variable."""
-    with ops.Graph().as_default():
-      logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2)
-      logits = logit_fn(features={'age': [[23.], [31.]]})
-      with variable_scope.variable_scope('baseline', reuse=True):
-        bias_var = variable_scope.get_variable('bias')
-      with tf_session.Session() as sess:
-        sess.run([variables.global_variables_initializer()])
-        self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
-        sess.run(bias_var.assign([10., 5.]))
-        self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 0278990cfc..62a178830c 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -12,1553 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Estimator classes for BoostedTrees."""
+"""boosted_trees python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
-import collections
-import functools
-
-import numpy as np
-
-from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.canned import boosted_trees_utils
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import boosted_trees_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import gradients_impl
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.array_ops import identity as tf_identity
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.summary import summary
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-from tensorflow.python.util.tf_export import estimator_export
-
-# TODO(nponomareva): Reveal pruning params here.
-_TreeHParams = collections.namedtuple('TreeHParams', [
-    'n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity',
-    'min_node_weight', 'center_bias', 'pruning_mode'
-])
-
-_HOLD_FOR_MULTI_CLASS_SUPPORT = object()
-_HOLD_FOR_MULTI_DIM_SUPPORT = object()
-_DUMMY_NUM_BUCKETS = -1
-_DUMMY_NODE_ID = -1
-
-
-def _get_transformed_features(features, sorted_feature_columns):
-  """Gets the transformed features from features/feature_columns pair.
-
-  Args:
-    features: a dicionary of name to Tensor.
-    sorted_feature_columns: a list/set of tf.feature_column, sorted by name.
-
-  Returns:
-    result_features: a list of the transformed features, sorted by the name.
-
-  Raises:
-    ValueError: when unsupported features/columns are tried.
-  """
-  # pylint:disable=protected-access
-  transformed_features = feature_column_lib._transform_features(
-      features, sorted_feature_columns)
-  result_features = []
-  for column in sorted_feature_columns:
-    if isinstance(column, feature_column_lib._BucketizedColumn):
-      source_name = column.source_column.name
-      squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1)
-      if len(squeezed_tensor.shape) > 1:
-        raise ValueError('For now, only supports features equivalent to rank 1 '
-                         'but column `{}` got: {}'.format(
-                             source_name, features[source_name].shape))
-      result_features.append(squeezed_tensor)
-    elif isinstance(column, feature_column_lib._IndicatorColumn):
-      source_name = column.categorical_column.name
-      tensor = math_ops.to_int32(transformed_features[column])
-      if len(tensor.shape) > 2:
-        raise ValueError('Rank of indicator column must be no more than 2, '
-                         'but column `{}` got: {}'.format(
-                             source_name, features[source_name].shape))
-      unstacked = array_ops.unstack(tensor, axis=1)
-      result_features.extend(unstacked)
-    else:
-      raise ValueError(
-          'For now, only bucketized_column and indicator_column is supported '
-          'but got: {}'.format(column))
-    # pylint:enable=protected-access
-
-  return result_features
-
-
-def _local_variable(initial_value, name=None):
-  """Stores a tensor as a local Variable for faster read."""
-  result = variable_scope.variable(
-      initial_value=initial_value,
-      trainable=False,
-      collections=[ops.GraphKeys.LOCAL_VARIABLES],
-      validate_shape=False,
-      name=name)
-  if isinstance(initial_value, ops.Tensor):
-    # Match the resulting variable's shape if the initial_value is a Tensor.
-    result.set_shape(initial_value.shape)
-  return result
-
-
-def _group_features_by_num_buckets(sorted_feature_columns):
-  """Groups feature ids by the number of buckets.
-
-  Derives the feature ids based on iterating through ordered feature columns
-  and groups them by the number of buckets each feature require. Returns a
-  sorted list of buckets and a list of lists of feature ids for each of those
-  buckets.
-
-  Args:
-    sorted_feature_columns: a list/set of tf.feature_column sorted by name.
-
-  Returns:
-    bucket_size_list: a list of required bucket sizes.
-    feature_ids_list: a list of lists of feature ids for each bucket size.
-
-  Raises:
-    ValueError: when unsupported features columns are provided.
-  """
-  bucket_size_to_feature_ids_dict = collections.OrderedDict()
-
-  # TODO(nponomareva) for now we preserve the previous functionality and bucket
-  # all numeric into the same num of buckets. Can be easily changed to using
-  # each numeric's real buckets num, but we need to test that it does not cause
-  # a performance hit.
-
-  # We will replace this dummy key with the real max after we calculate it.
-  bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS] = []
-
-  max_buckets_for_bucketized = 2
-  max_buckets_for_indicator = 2
-
-  feature_idx = 0
-  # pylint:disable=protected-access
-
-  for column in sorted_feature_columns:
-    if isinstance(column, feature_column_lib._IndicatorColumn):
-      num_categorical_features = column.categorical_column._num_buckets
-      if max_buckets_for_indicator not in bucket_size_to_feature_ids_dict:
-        bucket_size_to_feature_ids_dict[max_buckets_for_indicator] = []
-
-      for _ in range(num_categorical_features):
-        # We use bucket size of 2 for categorical.
-        bucket_size_to_feature_ids_dict[max_buckets_for_indicator].append(
-            feature_idx)
-        feature_idx += 1
-    elif isinstance(column, feature_column_lib._BucketizedColumn):
-      max_buckets_for_bucketized = max(max_buckets_for_bucketized,
-                                       len(column.boundaries) + 1)
-      bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS].append(feature_idx)
-      feature_idx += 1
-    elif not isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
-      raise ValueError(
-          'For now, only bucketized_column and indicator column are supported '
-          'but got: {}'.format(column))
-
-  # pylint:enable=protected-access
-  # Replace the dummy key with the real max num of buckets for all bucketized
-  # columns.
-  if max_buckets_for_bucketized not in bucket_size_to_feature_ids_dict:
-    bucket_size_to_feature_ids_dict[max_buckets_for_bucketized] = []
-  bucket_size_to_feature_ids_dict[max_buckets_for_bucketized].extend(
-      bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS])
-  del bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS]
-
-  feature_ids_list = list(bucket_size_to_feature_ids_dict.values())
-  bucket_size_list = list(bucket_size_to_feature_ids_dict.keys())
-  return bucket_size_list, feature_ids_list
-
-
-def _calculate_num_features(sorted_feature_columns):
-  num_features = 0
-  for column in sorted_feature_columns:
-    if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
-      num_features += column.categorical_column._num_buckets  # pylint:disable=protected-access
-    else:
-      num_features += 1
-  return num_features
-
-
-def _generate_feature_name_mapping(sorted_feature_columns):
-  """Return a list of feature name for feature ids.
-
-  Args:
-    sorted_feature_columns: a list/set of tf.feature_column sorted by name.
-
-  Returns:
-    feature_name_mapping: a list of feature names indexed by the feature ids.
-
-  Raises:
-    ValueError: when unsupported features/columns are tried.
-  """
-  names = []
-  for column in sorted_feature_columns:
-    if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
-      categorical_column = column.categorical_column
-      if isinstance(categorical_column,
-                    feature_column_lib._VocabularyListCategoricalColumn):  # pylint:disable=protected-access
-        for value in categorical_column.vocabulary_list:
-          names.append('{}:{}'.format(column.name, value))
-      elif isinstance(categorical_column,
-                      feature_column_lib._BucketizedColumn):  # pylint:disable=protected-access
-        boundaries = [-np.inf] + list(categorical_column.boundaries) + [np.inf]
-        for pair in zip(boundaries[:-1], boundaries[1:]):
-          names.append('{}:{}'.format(column.name, pair))
-      else:
-        for num in range(categorical_column._num_buckets):  # pylint:disable=protected-access
-          names.append('{}:{}'.format(column.name, num))
-    elif isinstance(column, feature_column_lib._BucketizedColumn):
-      names.append(column.name)
-    else:
-      raise ValueError(
-          'For now, only bucketized_column and indicator_column is supported '
-          'but got: {}'.format(column))
-  return names
-
-
-def _cache_transformed_features(features, sorted_feature_columns, batch_size):
-  """Transform features and cache, then returns (cached_features, cache_op)."""
-  num_features = _calculate_num_features(sorted_feature_columns)
-  cached_features = [
-      _local_variable(
-          array_ops.zeros([batch_size], dtype=dtypes.int32),
-          name='cached_feature_{}'.format(i)) for i in range(num_features)
-  ]
-  are_features_cached = _local_variable(False, name='are_features_cached')
-
-  def cache_features_and_return():
-    """Caches transformed features.
-
-    The intention is to hide get_transformed_features() from the graph by
-    caching the result except the first step, since bucketize operation
-    (inside get_transformed_features) is expensive.
-
-    Returns:
-      input_feature_list: a list of input features.
-      cache_flip_op: op to add to graph to make sure cache update is included to
-          the graph.
-    """
-
-    transformed_features = _get_transformed_features(features,
-                                                     sorted_feature_columns)
-    cached = [
-        state_ops.assign(cached_features[i], transformed_features[i])
-        for i in range(num_features)
-    ]
-    # TODO(youngheek): Try other combination of dependencies so that the
-    # function returns a single result, not a tuple.
-    with ops.control_dependencies(cached):
-      cache_flip_op = are_features_cached.assign(True)
-    return cached, cache_flip_op
-
-  input_feature_list, cache_flip_op = control_flow_ops.cond(
-      are_features_cached, lambda: (cached_features, control_flow_ops.no_op()),
-      cache_features_and_return)
-  return input_feature_list, cache_flip_op
-
-
-class _CacheTrainingStatesUsingHashTable(object):
-  """Caching logits, etc. using MutableHashTable."""
-
-  def __init__(self, example_ids, logits_dimension):
-    """Creates a cache with the given configuration.
-
-    It maintains a MutableDenseHashTable for all values.
-    The API lookup() and insert() would have those specs,
-      tree_ids: shape=[batch_size], dtype=int32
-      node_ids: shape=[batch_size], dtype=int32
-      logits: shape=[batch_size, logits_dimension], dtype=float32
-    However in the MutableDenseHashTable, ids are bitcasted into float32 and
-    all values are concatenated as a single tensor (of float32).
-
-    Hence conversion happens internally before inserting to the HashTable and
-    after lookup from it.
-
-    Args:
-      example_ids: a Rank 1 tensor to be used as a key of the cache.
-      logits_dimension: a constant (int) for the dimension of logits.
-
-    Raises:
-      ValueError: if example_ids is other than int64 or string.
-    """
-    if dtypes.as_dtype(dtypes.int64).is_compatible_with(example_ids.dtype):
-      empty_key = -1 << 62
-    elif dtypes.as_dtype(dtypes.string).is_compatible_with(example_ids.dtype):
-      empty_key = ''
-    else:
-      raise ValueError(
-          'Unsupported example_id_feature dtype %s.' % example_ids.dtype)
-    # Cache holds latest <tree_id, node_id, logits> for each example.
-    # tree_id and node_id are both int32 but logits is a float32.
-    # To reduce the overhead, we store all of them together as float32 and
-    # bitcast the ids to int32.
-    self._table_ref = lookup_ops.mutable_dense_hash_table_v2(
-        empty_key=empty_key, value_dtype=dtypes.float32, value_shape=[3])
-    self._example_ids = ops.convert_to_tensor(example_ids)
-    if self._example_ids.shape.ndims not in (None, 1):
-      raise ValueError(
-          'example_id should have rank 1, but got %s' % self._example_ids)
-    self._logits_dimension = logits_dimension
-
-  def lookup(self):
-    """Returns cached_tree_ids, cached_node_ids, cached_logits."""
-    cached_tree_ids, cached_node_ids, cached_logits = array_ops.split(
-        lookup_ops.lookup_table_find_v2(
-            self._table_ref,
-            self._example_ids,
-            default_value=[0.0, _DUMMY_NODE_ID, 0.0]),
-        [1, 1, self._logits_dimension],
-        axis=1)
-    cached_tree_ids = array_ops.squeeze(
-        array_ops.bitcast(cached_tree_ids, dtypes.int32))
-    cached_node_ids = array_ops.squeeze(
-        array_ops.bitcast(cached_node_ids, dtypes.int32))
-    if self._example_ids.shape.ndims is not None:
-      cached_logits.set_shape(
-          [self._example_ids.shape[0], self._logits_dimension])
-    return (cached_tree_ids, cached_node_ids, cached_logits)
-
-  def insert(self, tree_ids, node_ids, logits):
-    """Inserts values and returns the op."""
-    insert_op = lookup_ops.lookup_table_insert_v2(
-        self._table_ref, self._example_ids,
-        array_ops.concat(
-            [
-                array_ops.expand_dims(
-                    array_ops.bitcast(tree_ids, dtypes.float32), 1),
-                array_ops.expand_dims(
-                    array_ops.bitcast(node_ids, dtypes.float32), 1),
-                logits,
-            ],
-            axis=1,
-            name='value_concat_for_cache_insert'))
-    return insert_op
-
-
-class _CacheTrainingStatesUsingVariables(object):
-  """Caching logits, etc. using Variables."""
-
-  def __init__(self, batch_size, logits_dimension):
-    """Creates a cache with the given configuration.
-
-    It maintains three variables, tree_ids, node_ids, logits, for caching.
-      tree_ids: shape=[batch_size], dtype=int32
-      node_ids: shape=[batch_size], dtype=int32
-      logits: shape=[batch_size, logits_dimension], dtype=float32
-
-    Note, this can be used only with in-memory data setting.
-
-    Args:
-      batch_size: `int`, the size of the cache.
-      logits_dimension: a constant (int) for the dimension of logits.
-    """
-    self._logits_dimension = logits_dimension
-    self._tree_ids = _local_variable(
-        array_ops.zeros([batch_size], dtype=dtypes.int32),
-        name='tree_ids_cache')
-    self._node_ids = _local_variable(
-        _DUMMY_NODE_ID * array_ops.ones([batch_size], dtype=dtypes.int32),
-        name='node_ids_cache')
-    self._logits = _local_variable(
-        array_ops.zeros([batch_size, logits_dimension], dtype=dtypes.float32),
-        name='logits_cache')
-
-  def lookup(self):
-    """Returns cached_tree_ids, cached_node_ids, cached_logits."""
-    return (self._tree_ids, self._node_ids, self._logits)
-
-  def insert(self, tree_ids, node_ids, logits):
-    """Inserts values and returns the op."""
-    return control_flow_ops.group(
-        [
-            self._tree_ids.assign(tree_ids),
-            self._node_ids.assign(node_ids),
-            self._logits.assign(logits)
-        ],
-        name='cache_insert')
-
-
-class _StopAtAttemptsHook(session_run_hook.SessionRunHook):
-  """Hook that requests stop at the number of attempts."""
-
-  def __init__(self, num_finalized_trees_tensor, num_attempted_layers_tensor,
-               max_trees, max_depth):
-    self._num_finalized_trees_tensor = num_finalized_trees_tensor
-    self._num_attempted_layers_tensor = num_attempted_layers_tensor
-    self._max_trees = max_trees
-    self._max_depth = max_depth
-
-  def before_run(self, run_context):
-    return session_run_hook.SessionRunArgs(
-        [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor])
-
-  def after_run(self, run_context, run_values):
-    # num_* tensors should be retrieved by a separate session than the training
-    # one, in order to read the values after growing.
-    # So, if it's approaching to the limit, get the actual value by additional
-    # session.
-    num_finalized_trees, num_attempted_layers = run_values.results
-    if (num_finalized_trees >= self._max_trees - 1 or
-        num_attempted_layers > 2 * self._max_trees * self._max_depth - 1):
-      num_finalized_trees, num_attempted_layers = run_context.session.run(
-          [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor])
-    if (num_finalized_trees >= self._max_trees or
-        num_attempted_layers > 2 * self._max_trees * self._max_depth):
-      run_context.request_stop()
-
-
-def _get_max_splits(tree_hparams):
-  """Calculates the max possible number of splits based on tree params."""
-  # maximum number of splits possible in the whole tree =2^(D-1)-1
-  max_splits = (1 << tree_hparams.max_depth) - 1
-  return max_splits
-
-
-class _EnsembleGrower(object):
-  """Abstract base class for different types of ensemble growers.
-
-  Use it to receive training ops for growing and centering bias, depending
-  on the implementation (for example, in memory or accumulator-based
-  distributed):
-    grower = ...create subclass grower(tree_ensemble, tree_hparams)
-    grow_op = grower.grow_tree(stats_summaries_list, feature_ids_list,
-                               last_layer_nodes_range)
-    training_ops.append(grow_op)
-  """
-
-  def __init__(self, tree_ensemble, tree_hparams, feature_ids_list):
-    """Initializes a grower object.
-
-    Args:
-      tree_ensemble: A TreeEnsemble variable.
-      tree_hparams: TODO. collections.namedtuple for hyper parameters.
-      feature_ids_list: a list of lists of feature ids for each bucket size.
-
-    Raises:
-      ValueError: when pruning mode is invalid or pruning is used and no tree
-      complexity is set.
-    """
-    self._tree_ensemble = tree_ensemble
-    self._tree_hparams = tree_hparams
-    self._feature_ids_list = feature_ids_list
-    # pylint: disable=protected-access
-    self._pruning_mode_parsed = boosted_trees_ops.PruningMode.from_str(
-        tree_hparams.pruning_mode)
-
-    if tree_hparams.tree_complexity > 0:
-      if self._pruning_mode_parsed == boosted_trees_ops.PruningMode.NO_PRUNING:
-        raise ValueError(
-            'Tree complexity have no effect unless pruning mode is chosen.')
-    else:
-      if self._pruning_mode_parsed != boosted_trees_ops.PruningMode.NO_PRUNING:
-        raise ValueError('For pruning, tree_complexity must be positive.')
-    # pylint: enable=protected-access
-
-  @abc.abstractmethod
-  def center_bias(self, center_bias_var, gradients, hessians):
-    """Centers bias, if ready, based on statistics.
-
-    Args:
-      center_bias_var: A variable that will be updated when bias centering
-        finished.
-      gradients: A rank 2 tensor of gradients.
-      hessians: A rank 2 tensor of hessians.
-
-    Returns:
-      An operation for centering bias.
-    """
-
-  @abc.abstractmethod
-  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
-    """Grows a tree, if ready, based on provided statistics.
-
-    Args:
-      stats_summaries_list: List of stats summary tensors, representing sums of
-        gradients and hessians for each feature bucket.
-      last_layer_nodes_range: A tensor representing ids of the nodes in the
-        current layer, to be split.
-
-    Returns:
-      An op for growing a tree.
-    """
-
-  def chief_init_op(self):
-    """Ops that chief needs to run to initialize the state."""
-    return control_flow_ops.no_op()
-
-  #  ============= Helper methods ===========
-
-  def _center_bias_fn(self, center_bias_var, mean_gradients, mean_hessians):
-    """Updates the ensembles and cache (if needed) with logits prior."""
-    continue_centering = boosted_trees_ops.center_bias(
-        self._tree_ensemble.resource_handle,
-        mean_gradients=mean_gradients,
-        mean_hessians=mean_hessians,
-        l1=self._tree_hparams.l1,
-        l2=self._tree_hparams.l2)
-    return center_bias_var.assign(continue_centering)
-
-  def _grow_tree_from_stats_summaries(self, stats_summaries_list,
-                                      last_layer_nodes_range):
-    """Updates ensemble based on the best gains from stats summaries."""
-    node_ids_per_feature = []
-    gains_list = []
-    thresholds_list = []
-    left_node_contribs_list = []
-    right_node_contribs_list = []
-    all_feature_ids = []
-    assert len(stats_summaries_list) == len(self._feature_ids_list)
-
-    max_splits = _get_max_splits(self._tree_hparams)
-
-    for i, feature_ids in enumerate(self._feature_ids_list):
-      (numeric_node_ids_per_feature, numeric_gains_list,
-       numeric_thresholds_list, numeric_left_node_contribs_list,
-       numeric_right_node_contribs_list) = (
-           boosted_trees_ops.calculate_best_gains_per_feature(
-               node_id_range=last_layer_nodes_range,
-               stats_summary_list=stats_summaries_list[i],
-               l1=self._tree_hparams.l1,
-               l2=self._tree_hparams.l2,
-               tree_complexity=self._tree_hparams.tree_complexity,
-               min_node_weight=self._tree_hparams.min_node_weight,
-               max_splits=max_splits))
-
-      all_feature_ids += feature_ids
-      node_ids_per_feature += numeric_node_ids_per_feature
-      gains_list += numeric_gains_list
-      thresholds_list += numeric_thresholds_list
-      left_node_contribs_list += numeric_left_node_contribs_list
-      right_node_contribs_list += numeric_right_node_contribs_list
-
-    grow_op = boosted_trees_ops.update_ensemble(
-        # Confirm if local_tree_ensemble or tree_ensemble should be used.
-        self._tree_ensemble.resource_handle,
-        feature_ids=all_feature_ids,
-        node_ids=node_ids_per_feature,
-        gains=gains_list,
-        thresholds=thresholds_list,
-        left_node_contribs=left_node_contribs_list,
-        right_node_contribs=right_node_contribs_list,
-        learning_rate=self._tree_hparams.learning_rate,
-        max_depth=self._tree_hparams.max_depth,
-        pruning_mode=self._pruning_mode_parsed)
-    return grow_op
-
-
-class _InMemoryEnsembleGrower(_EnsembleGrower):
-  """An in-memory ensemble grower."""
-
-  def __init__(self, tree_ensemble, tree_hparams, feature_ids_list):
-
-    super(_InMemoryEnsembleGrower, self).__init__(
-        tree_ensemble=tree_ensemble, tree_hparams=tree_hparams,
-        feature_ids_list=feature_ids_list)
-
-  def center_bias(self, center_bias_var, gradients, hessians):
-    # For in memory, we already have a full batch of gradients and hessians,
-    # so just take a mean and proceed with centering.
-    mean_gradients = array_ops.expand_dims(
-        math_ops.reduce_mean(gradients, 0), 0)
-    mean_heassians = array_ops.expand_dims(math_ops.reduce_mean(hessians, 0), 0)
-    return self._center_bias_fn(center_bias_var, mean_gradients, mean_heassians)
-
-  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
-    # For in memory, we already have full data in one batch, so we can grow the
-    # tree immediately.
-    return self._grow_tree_from_stats_summaries(
-        stats_summaries_list, last_layer_nodes_range)
-
-
-class _AccumulatorEnsembleGrower(_EnsembleGrower):
-  """An accumulator based ensemble grower."""
-
-  def __init__(self, tree_ensemble, tree_hparams, stamp_token,
-               n_batches_per_layer, bucket_size_list, is_chief, center_bias,
-               feature_ids_list):
-    super(_AccumulatorEnsembleGrower, self).__init__(
-        tree_ensemble=tree_ensemble, tree_hparams=tree_hparams,
-        feature_ids_list=feature_ids_list)
-    self._stamp_token = stamp_token
-    self._n_batches_per_layer = n_batches_per_layer
-    self._bucket_size_list = bucket_size_list
-    self._is_chief = is_chief
-    self._growing_accumulators = []
-    self._chief_init_ops = []
-    max_splits = _get_max_splits(self._tree_hparams)
-    for i, feature_ids in enumerate(self._feature_ids_list):
-      accumulator = data_flow_ops.ConditionalAccumulator(
-          dtype=dtypes.float32,
-          # The stats consist of grads and hessians (the last dimension).
-          shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2],
-          shared_name='numeric_stats_summary_accumulator_' + str(i))
-      self._chief_init_ops.append(
-          accumulator.set_global_step(self._stamp_token))
-      self._growing_accumulators.append(accumulator)
-    self._center_bias = center_bias
-    if center_bias:
-      self._bias_accumulator = data_flow_ops.ConditionalAccumulator(
-          dtype=dtypes.float32,
-          # The stats consist of grads and hessians means only.
-          # TODO(nponomareva): this will change for a multiclass
-          shape=[2, 1],
-          shared_name='bias_accumulator')
-      self._chief_init_ops.append(
-          self._bias_accumulator.set_global_step(self._stamp_token))
-
-  def center_bias(self, center_bias_var, gradients, hessians):
-    # For not in memory situation, we need to accumulate enough of batches first
-    # before proceeding with centering bias.
-
-    # Create an accumulator.
-    if not self._center_bias:
-      raise RuntimeError('center_bias called but bias centering is disabled.')
-    bias_dependencies = []
-    grads_and_hess = array_ops.stack([gradients, hessians], axis=0)
-    grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1)
-
-    apply_grad = self._bias_accumulator.apply_grad(
-        grads_and_hess, self._stamp_token)
-    bias_dependencies.append(apply_grad)
-
-    # Center bias if enough batches were processed.
-    with ops.control_dependencies(bias_dependencies):
-      if not self._is_chief:
-        return control_flow_ops.no_op()
-      def _set_accumulators_stamp():
-        return control_flow_ops.group(
-            [acc.set_global_step(self._stamp_token + 1) for acc in
-             self._growing_accumulators])
-
-      def center_bias_from_accumulator():
-        accumulated = array_ops.unstack(self._bias_accumulator.take_grad(1),
-                                        axis=0)
-        center_bias_op = self._center_bias_fn(
-            center_bias_var,
-            array_ops.expand_dims(accumulated[0], 0),
-            array_ops.expand_dims(accumulated[1], 0))
-        with ops.control_dependencies([center_bias_op]):
-          return control_flow_ops.cond(center_bias_var,
-                                       control_flow_ops.no_op,
-                                       _set_accumulators_stamp)
-
-      center_bias_op = control_flow_ops.cond(
-          math_ops.greater_equal(self._bias_accumulator.num_accumulated(),
-                                 self._n_batches_per_layer),
-          center_bias_from_accumulator,
-          control_flow_ops.no_op,
-          name='wait_until_n_batches_for_bias_accumulated')
-      return center_bias_op
-
-  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
-    dependencies = []
-    for i in range(len(self._feature_ids_list)):
-      stats_summaries = stats_summaries_list[i]
-      apply_grad = self._growing_accumulators[i].apply_grad(
-          array_ops.stack(stats_summaries, axis=0), self._stamp_token)
-      dependencies.append(apply_grad)
-
-    # Grow the tree if enough batches is accumulated.
-    with ops.control_dependencies(dependencies):
-      if not self._is_chief:
-        return control_flow_ops.no_op()
-
-      min_accumulated = math_ops.reduce_min(
-          array_ops.stack([acc.num_accumulated() for acc in
-                           self._growing_accumulators]))
-
-      def grow_tree_from_accumulated_summaries_fn():
-        """Updates tree with the best layer from accumulated summaries."""
-        # Take out the accumulated summaries from the accumulator and grow.
-        stats_summaries_list = []
-        stats_summaries_list = [
-            array_ops.unstack(accumulator.take_grad(1), axis=0)
-            for accumulator in self._growing_accumulators
-        ]
-        grow_op = self._grow_tree_from_stats_summaries(
-            stats_summaries_list, last_layer_nodes_range
-        )
-        return grow_op
-
-      grow_model = control_flow_ops.cond(
-          math_ops.greater_equal(min_accumulated, self._n_batches_per_layer),
-          grow_tree_from_accumulated_summaries_fn,
-          control_flow_ops.no_op,
-          name='wait_until_n_batches_accumulated')
-      return grow_model
-
-  def chief_init_op(self):
-    """Ops that chief needs to run to initialize the state."""
-    return control_flow_ops.group(self._chief_init_ops)
-
-
-def _bt_model_fn(
-    features,
-    labels,
-    mode,
-    head,
-    feature_columns,
-    tree_hparams,
-    n_batches_per_layer,
-    config,
-    closed_form_grad_and_hess_fn=None,
-    example_id_column_name=None,
-    # TODO(youngheek): replace this later using other options.
-    train_in_memory=False,
-    name='boosted_trees'):
-  """Gradient Boosted Trees model_fn.
-
-  Args:
-    features: dict of `Tensor`.
-    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
-      dtype `int32` or `int64` in the range `[0, n_classes)`.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `head_lib._Head` instance.
-    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
-    tree_hparams: TODO. collections.namedtuple for hyper parameters.
-    n_batches_per_layer: A `Tensor` of `int64`. Each layer is built after at
-      least n_batches_per_layer accumulations.
-    config: `RunConfig` object to configure the runtime settings.
-    closed_form_grad_and_hess_fn: a function that accepts logits and labels
-      and returns gradients and hessians. By default, they are created by
-      tf.gradients() from the loss.
-    example_id_column_name: Name of the feature for a unique ID per example.
-      Currently experimental -- not exposed to public API.
-    train_in_memory: `bool`, when true, it assumes the dataset is in memory,
-      i.e., input_fn should return the entire dataset as a single batch, and
-      also n_batches_per_layer should be set as 1.
-    name: Name to use for the model.
-
-  Returns:
-      An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: mode or params are invalid, or features has the wrong type.
-  """
-  sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
-  with ops.name_scope(name) as name:
-    # Prepare.
-    global_step = training_util.get_or_create_global_step()
-    bucket_size_list, feature_ids_list = _group_features_by_num_buckets(
-        sorted_feature_columns)
-    # Create Ensemble resources.
-    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
-
-    # Create logits.
-    if mode != model_fn_lib.ModeKeys.TRAIN:
-      input_feature_list = _get_transformed_features(features,
-                                                     sorted_feature_columns)
-      logits = boosted_trees_ops.predict(
-          # For non-TRAIN mode, ensemble doesn't change after initialization,
-          # so no local copy is needed; using tree_ensemble directly.
-          tree_ensemble_handle=tree_ensemble.resource_handle,
-          bucketized_features=input_feature_list,
-          logits_dimension=head.logits_dimension)
-      return head.create_estimator_spec(
-          features=features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=control_flow_ops.no_op,
-          logits=logits)
-
-    # ============== Training graph ==============
-    center_bias = tree_hparams.center_bias
-    is_single_machine = (config.num_worker_replicas <= 1)
-
-    if train_in_memory:
-      assert n_batches_per_layer == 1, (
-          'When train_in_memory is enabled, input_fn should return the entire '
-          'dataset as a single batch, and n_batches_per_layer should be set as '
-          '1.')
-      if (not config.is_chief or config.num_worker_replicas > 1 or
-          config.num_ps_replicas > 0):
-        raise ValueError('train_in_memory is supported only for '
-                         'non-distributed training.')
-    worker_device = control_flow_ops.no_op().device
-    train_op = []
-    # Extract input features and set up cache for training.
-    training_state_cache = None
-    if train_in_memory:
-      # cache transformed features as well for in-memory training.
-      batch_size = array_ops.shape(labels)[0]
-      input_feature_list, input_cache_op = (
-          _cache_transformed_features(features, sorted_feature_columns,
-                                      batch_size))
-      train_op.append(input_cache_op)
-      training_state_cache = _CacheTrainingStatesUsingVariables(
-          batch_size, head.logits_dimension)
-    else:
-      input_feature_list = _get_transformed_features(features,
-                                                     sorted_feature_columns)
-      if example_id_column_name:
-        example_ids = features[example_id_column_name]
-        training_state_cache = _CacheTrainingStatesUsingHashTable(
-            example_ids, head.logits_dimension)
-    if training_state_cache:
-      cached_tree_ids, cached_node_ids, cached_logits = (
-          training_state_cache.lookup())
-    else:
-      # Always start from the beginning when no cache is set up.
-      batch_size = array_ops.shape(labels)[0]
-      cached_tree_ids, cached_node_ids, cached_logits = (
-          array_ops.zeros([batch_size], dtype=dtypes.int32),
-          _DUMMY_NODE_ID * array_ops.ones([batch_size], dtype=dtypes.int32),
-          array_ops.zeros(
-              [batch_size, head.logits_dimension], dtype=dtypes.float32))
-
-    if is_single_machine:
-      local_tree_ensemble = tree_ensemble
-      ensemble_reload = control_flow_ops.no_op()
-    else:
-      # Have a local copy of ensemble for the distributed setting.
-      with ops.device(worker_device):
-        local_tree_ensemble = boosted_trees_ops.TreeEnsemble(
-            name=name + '_local', is_local=True)
-      # TODO(soroush): Do partial updates if this becomes a bottleneck.
-      ensemble_reload = local_tree_ensemble.deserialize(
-          *tree_ensemble.serialize())
-    with ops.control_dependencies([ensemble_reload]):
-      (stamp_token, num_trees, num_finalized_trees, num_attempted_layers,
-       last_layer_nodes_range) = local_tree_ensemble.get_states()
-      partial_logits, tree_ids, node_ids = boosted_trees_ops.training_predict(
-          tree_ensemble_handle=local_tree_ensemble.resource_handle,
-          cached_tree_ids=cached_tree_ids,
-          cached_node_ids=cached_node_ids,
-          bucketized_features=input_feature_list,
-          logits_dimension=head.logits_dimension)
-    logits = cached_logits + partial_logits
-
-    if train_in_memory:
-      grower = _InMemoryEnsembleGrower(tree_ensemble, tree_hparams,
-                                       feature_ids_list=feature_ids_list)
-    else:
-      grower = _AccumulatorEnsembleGrower(tree_ensemble, tree_hparams,
-                                          stamp_token, n_batches_per_layer,
-                                          bucket_size_list, config.is_chief,
-                                          center_bias=center_bias,
-                                          feature_ids_list=feature_ids_list)
-
-    summary.scalar('ensemble/num_trees', num_trees)
-    summary.scalar('ensemble/num_finalized_trees', num_finalized_trees)
-    summary.scalar('ensemble/num_attempted_layers', num_attempted_layers)
-
-    # Variable that determines whether bias centering is needed.
-    center_bias_var = variable_scope.variable(
-        initial_value=center_bias, name='center_bias_needed', trainable=False,
-        use_resource=True)
-    # Create training graph.
-    def _train_op_fn(loss):
-      """Run one training iteration."""
-      if training_state_cache:
-        # Cache logits only after center_bias is complete, if it's in progress.
-        train_op.append(
-            control_flow_ops.cond(
-                center_bias_var, control_flow_ops.no_op,
-                lambda: training_state_cache.insert(tree_ids, node_ids, logits))
-        )
-
-      if closed_form_grad_and_hess_fn:
-        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
-      else:
-        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
-        hessians = gradients_impl.gradients(
-            gradients, logits, name='Hessians')[0]
-
-      # TODO(youngheek): perhaps storage could be optimized by storing stats
-      # with the dimension max_splits_per_layer, instead of max_splits (for the
-      # entire tree).
-      max_splits = _get_max_splits(tree_hparams)
-
-      stats_summaries_list = []
-      for i, feature_ids in enumerate(feature_ids_list):
-        num_buckets = bucket_size_list[i]
-        summaries = [
-            array_ops.squeeze(
-                boosted_trees_ops.make_stats_summary(
-                    node_ids=node_ids,
-                    gradients=gradients,
-                    hessians=hessians,
-                    bucketized_features_list=[input_feature_list[f]],
-                    max_splits=max_splits,
-                    num_buckets=num_buckets),
-                axis=0) for f in feature_ids
-        ]
-        stats_summaries_list.append(summaries)
-      if center_bias:
-        update_model = control_flow_ops.cond(
-            center_bias_var,
-            functools.partial(
-                grower.center_bias,
-                center_bias_var,
-                gradients,
-                hessians,
-            ),
-            functools.partial(grower.grow_tree, stats_summaries_list,
-                              last_layer_nodes_range))
-      else:
-        update_model = grower.grow_tree(stats_summaries_list,
-                                        last_layer_nodes_range)
-      train_op.append(update_model)
-
-      with ops.control_dependencies([update_model]):
-        increment_global = state_ops.assign_add(global_step, 1).op
-        train_op.append(increment_global)
-
-      return control_flow_ops.group(train_op, name='train_op')
-
-  estimator_spec = head.create_estimator_spec(
-      features=features,
-      mode=mode,
-      labels=labels,
-      train_op_fn=_train_op_fn,
-      logits=logits)
-
-  # Add an early stop hook.
-  estimator_spec = estimator_spec._replace(
-      training_hooks=estimator_spec.training_hooks +
-      (_StopAtAttemptsHook(num_finalized_trees, num_attempted_layers,
-                           tree_hparams.n_trees, tree_hparams.max_depth),),
-      training_chief_hooks=[GrowerInitializationHook(grower.chief_init_op())] +
-      list(estimator_spec.training_chief_hooks))
-  return estimator_spec
-
-
-class GrowerInitializationHook(session_run_hook.SessionRunHook):
-  """A SessionRunHook handles initialization of `_EnsembleGrower`."""
-
-  def __init__(self, init_op):
-    self._init_op = init_op
-
-  def after_create_session(self, session, coord):
-    session.run(self._init_op)
-
-
-def _create_classification_head(n_classes,
-                                weight_column=None,
-                                label_vocabulary=None):
-  """Creates a classification head. Refer to canned.head for details on args."""
-  # TODO(nponomareva): Support multi-class cases.
-  if n_classes == 2:
-    # pylint: disable=protected-access
-    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column=weight_column,
-        label_vocabulary=label_vocabulary,
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    # pylint: enable=protected-access
-  else:
-    raise ValueError('For now only binary classification is supported.'
-                     'n_classes given as {}'.format(n_classes))
-
-
-def _create_classification_head_and_closed_form(n_classes, weight_column,
-                                                label_vocabulary):
-  """Creates a head for classifier and the closed form gradients/hessians."""
-  head = _create_classification_head(n_classes, weight_column, label_vocabulary)
-  if (n_classes == 2 and head.logits_dimension == 1 and
-      weight_column is None and label_vocabulary is None):
-    # Use the closed-form gradients/hessians for 2 class.
-    def _grad_and_hess_for_logloss(logits, labels):
-      """A closed form gradient and hessian for logistic loss."""
-      # TODO(youngheek): add weights handling.
-      predictions = math_ops.reciprocal(math_ops.exp(-logits) + 1.0)
-      normalizer = math_ops.reciprocal(
-          math_ops.cast(array_ops.size(predictions), dtypes.float32))
-      labels = math_ops.cast(labels, dtypes.float32)
-      labels = head_lib._check_dense_labels_match_logits_and_reshape(  # pylint: disable=protected-access
-          labels, logits, head.logits_dimension)
-      gradients = (predictions - labels) * normalizer
-      hessians = predictions * (1.0 - predictions) * normalizer
-      return gradients, hessians
-
-    closed_form = _grad_and_hess_for_logloss
-  else:
-    closed_form = None
-  return (head, closed_form)
-
-
-def _create_regression_head(label_dimension, weight_column=None):
-  if label_dimension != 1:
-    raise ValueError('For now only 1 dimension regression is supported.'
-                     'label_dimension given as {}'.format(label_dimension))
-  # pylint: disable=protected-access
-  return head_lib._regression_head(
-      label_dimension=label_dimension,
-      weight_column=weight_column,
-      loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-  # pylint: enable=protected-access
-
-
-def _compute_feature_importances_per_tree(tree, num_features):
-  """Computes the importance of each feature in the tree."""
-  importances = np.zeros(num_features)
-
-  for node in tree.nodes:
-    node_type = node.WhichOneof('node')
-    if node_type == 'bucketized_split':
-      feature_id = node.bucketized_split.feature_id
-      importances[feature_id] += node.metadata.gain
-    elif node_type == 'leaf':
-      assert node.metadata.gain == 0
-    else:
-      raise ValueError('Unexpected split type %s', node_type)
-
-  return importances
-
-
-def _compute_feature_importances(tree_ensemble, num_features, normalize):
-  """Computes gain-based feature importances.
-
-  The higher the value, the more important the feature.
-
-  Args:
-    tree_ensemble: a trained tree ensemble, instance of proto
-      boosted_trees.TreeEnsemble.
-    num_features: The total number of feature ids.
-    normalize: If True, normalize the feature importances.
-
-  Returns:
-    sorted_feature_idx: A list of feature_id which is sorted
-      by its feature importance.
-    feature_importances: A list of corresponding feature importances.
-
-  Raises:
-    AssertionError: When normalize = True, if feature importances
-      contain negative value, or if normalization is not possible
-      (e.g. ensemble is empty or trees contain only a root node).
-  """
-  tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
-                      for tree in tree_ensemble.trees]
-  tree_importances = np.array(tree_importances)
-  tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
-  feature_importances = np.sum(tree_importances * tree_weights, axis=0)
-  if normalize:
-    assert np.all(feature_importances >= 0), ('feature_importances '
-                                              'must be non-negative.')
-    normalizer = np.sum(feature_importances)
-    assert normalizer > 0, 'Trees are all empty or contain only a root node.'
-    feature_importances /= normalizer
-
-  sorted_feature_idx = np.argsort(feature_importances)[::-1]
-  return sorted_feature_idx, feature_importances[sorted_feature_idx]
-
-
-def _bt_explanations_fn(features,
-                        head,
-                        sorted_feature_columns,
-                        name='boosted_trees'):
-  """Gradient Boosted Trees predict with explanations model_fn.
-
-  Args:
-    features: dict of `Tensor`.
-    head: A `head_lib._Head` instance.
-    sorted_feature_columns: Sorted iterable of `feature_column._FeatureColumn`
-      model inputs.
-    name: Name used for the model.
-
-  Returns:
-      An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: mode or params are invalid, or features has the wrong type.
-  """
-  mode = model_fn_lib.ModeKeys.PREDICT
-  with ops.name_scope(name) as name:
-    # Create Ensemble resources.
-    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
-
-    input_feature_list = _get_transformed_features(features,
-                                                   sorted_feature_columns)
-
-    logits = boosted_trees_ops.predict(
-        # For non-TRAIN mode, ensemble doesn't change after initialization,
-        # so no local copy is needed; using tree_ensemble directly.
-        tree_ensemble_handle=tree_ensemble.resource_handle,
-        bucketized_features=input_feature_list,
-        logits_dimension=head.logits_dimension)
-
-    estimator_spec = head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=None,
-        train_op_fn=control_flow_ops.no_op,
-        logits=logits)
-
-    debug_op = boosted_trees_ops.example_debug_outputs(
-        tree_ensemble.resource_handle,
-        bucketized_features=input_feature_list,
-        logits_dimension=head.logits_dimension)
-    estimator_spec.predictions[boosted_trees_utils._DEBUG_PROTO_KEY] = debug_op  # pylint: disable=protected-access
-    return estimator_spec
-
-
-class _BoostedTreesBase(estimator.Estimator):
-  """Base class for boosted trees estimators.
-
-  This class is intended to keep tree-specific functions (E.g., methods for
-  feature importances and directional feature contributions) in one central
-  place.
-
-  It is not a valid (working) Estimator on its own and should only be used as a
-  base class.
-  """
-
-  def __init__(self, model_fn, model_dir, config, feature_columns, head,
-               center_bias, is_classification):
-    """Initializes a `_BoostedTreesBase` instance.
-
-    Args:
-      model_fn: model_fn: Model function. See base class for more detail.
-      model_dir: Directory to save model parameters, graph and etc. See base
-        class for more detail.
-      config: `estimator.RunConfig` configuration object.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`
-      head: A `head_lib._Head` instance.
-      center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-      is_classification: If the estimator is for classification.
-    """
-    super(_BoostedTreesBase, self).__init__(
-        model_fn=model_fn, model_dir=model_dir, config=config)
-    self._sorted_feature_columns = sorted(
-        feature_columns, key=lambda tc: tc.name)
-    self._head = head
-    self._n_features = _calculate_num_features(self._sorted_feature_columns)
-    self._names_for_feature_id = np.array(
-        _generate_feature_name_mapping(self._sorted_feature_columns))
-    self._center_bias = center_bias
-    self._is_classification = is_classification
-
-  def experimental_feature_importances(self, normalize=False):
-    """Computes gain-based feature importances.
-
-    The higher the value, the more important the corresponding feature.
-
-    Args:
-      normalize: If True, normalize the feature importances.
-
-    Returns:
-      sorted_feature_names: 1-D array of feature name which is sorted
-        by its feature importance.
-      feature_importances: 1-D array of the corresponding feature importance.
-
-    Raises:
-      ValueError: When attempting to normalize on an empty ensemble
-        or an ensemble of trees which have no splits. Or when attempting
-        to normalize and feature importances have negative values.
-    """
-    reader = checkpoint_utils.load_checkpoint(self._model_dir)
-    serialized = reader.get_tensor('boosted_trees:0_serialized')
-    if not serialized:
-      raise ValueError('Found empty serialized string for TreeEnsemble.'
-                       'You should only call this method after training.')
-    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-    ensemble_proto.ParseFromString(serialized)
-
-    sorted_feature_id, importances = _compute_feature_importances(
-        ensemble_proto, self._n_features, normalize)
-    return self._names_for_feature_id[sorted_feature_id], importances
-
-  def experimental_predict_with_explanations(self,
-                                             input_fn,
-                                             predict_keys=None,
-                                             hooks=None,
-                                             checkpoint_path=None):
-    """Computes model explainability outputs per example along with predictions.
-
-    Currently supports directional feature contributions (DFCs). For each
-    instance, DFCs indicate the aggregate contribution of each feature. See
-    https://arxiv.org/abs/1312.1121 and
-    http://blog.datadive.net/interpreting-random-forests/ for more details.
-    Args:
-      input_fn: A function that provides input data for predicting as
-        minibatches. See [Premade Estimators](
-        https://tensorflow.org/guide/premade_estimators#create_input_functions)
-          for more information. The function should construct and return one of
-        the following:  * A `tf.data.Dataset` object: Outputs of `Dataset`
-          object must be a tuple `(features, labels)` with same constraints as
-        below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor`
-          or a dictionary of string feature name to `Tensor` and `labels` is a
-          `Tensor` or a dictionary of string label name to `Tensor`. Both
-          `features` and `labels` are consumed by `model_fn`. They should
-          satisfy the expectation of `model_fn` from inputs.
-      predict_keys: list of `str`, name of the keys to predict. It is used if
-        the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If
-        `predict_keys` is used then rest of the predictions will be filtered
-        from the dictionary, with the exception of 'bias' and 'dfc', which will
-        always be in the dictionary. If `None`, returns all keys in prediction
-        dict, as well as two new keys 'dfc' and 'bias'.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the prediction call.
-      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
-        latest checkpoint in `model_dir` is used.  If there are no checkpoints
-        in `model_dir`, prediction is run with newly initialized `Variables`
-        instead of ones restored from checkpoint.
-
-    Yields:
-      Evaluated values of `predictions` tensors. The `predictions` tensors will
-      contain at least two keys 'dfc' and 'bias' for model explanations. The
-      `dfc` value corresponds to the contribution of each feature to the overall
-      prediction for this instance (positive indicating that the feature makes
-      it more likely to select class 1 and negative less likely). The 'bias'
-      value will be the same across all the instances, corresponding to the
-      probability (classification) or prediction (regression) of the training
-      data distribution.
-
-    Raises:
-      ValueError: when wrong arguments are given or unsupported functionalities
-       are requested.
-    """
-    if not self._center_bias:
-      raise ValueError('center_bias must be enabled during estimator '
-                       'instantiation when using '
-                       'experimental_predict_with_explanations.')
-    # pylint: disable=protected-access
-    if not self._is_classification:
-      identity_inverse_link_fn = self._head._inverse_link_fn in (None,
-                                                                 tf_identity)
-      # pylint:enable=protected-access
-      if not identity_inverse_link_fn:
-        raise ValueError(
-            'For now only identity inverse_link_fn in regression_head is '
-            'supported for experimental_predict_with_explanations.')
-
-    # pylint:disable=unused-argument
-    def new_model_fn(features, labels, mode):
-      return _bt_explanations_fn(features, self._head,
-                                 self._sorted_feature_columns)
-
-    # pylint:enable=unused-argument
-    est = estimator.Estimator(
-        model_fn=new_model_fn,
-        model_dir=self.model_dir,
-        config=self.config,
-        warm_start_from=self._warm_start_settings)
-    # Make sure bias and dfc will be in prediction dict.
-    user_supplied_predict_keys = predict_keys is not None
-    if user_supplied_predict_keys:
-      predict_keys = set(predict_keys)
-      predict_keys.add(boosted_trees_utils._DEBUG_PROTO_KEY)
-    predictions = est.predict(
-        input_fn,
-        predict_keys=predict_keys,
-        hooks=hooks,
-        checkpoint_path=checkpoint_path,
-        yield_single_examples=True)
-    for pred in predictions:
-      bias, dfcs = boosted_trees_utils._parse_explanations_from_prediction(
-          pred[boosted_trees_utils._DEBUG_PROTO_KEY], self._n_features,
-          self._is_classification)
-      pred['bias'] = bias
-      pred['dfc'] = dfcs
-      # Don't need to expose serialized proto to end user.
-      del pred[boosted_trees_utils._DEBUG_PROTO_KEY]
-      yield pred
-
-
-# pylint: disable=protected-access
-@estimator_export('estimator.BoostedTreesClassifier')
-class BoostedTreesClassifier(_BoostedTreesBase):
-  """A Classifier for Tensorflow Boosted Trees models.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               feature_columns,
-               n_batches_per_layer,
-               model_dir=None,
-               n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT,
-               weight_column=None,
-               label_vocabulary=None,
-               n_trees=100,
-               max_depth=6,
-               learning_rate=0.1,
-               l1_regularization=0.,
-               l2_regularization=0.,
-               tree_complexity=0.,
-               min_node_weight=0.,
-               config=None,
-               center_bias=False,
-               pruning_mode='none'):
-    """Initializes a `BoostedTreesClassifier` instance.
-
-    Example:
-
-    ```python
-    bucketized_feature_1 = bucketized_column(
-      numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
-    bucketized_feature_2 = bucketized_column(
-      numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
-
-    # Need to see a large portion of the data before we can build a layer, for
-    # example half of data n_batches_per_layer = 0.5 * NUM_EXAMPLES / BATCH_SIZE
-    classifier = estimator.BoostedTreesClassifier(
-        feature_columns=[bucketized_feature_1, bucketized_feature_2],
-        n_batches_per_layer=n_batches_per_layer,
-        n_trees=100,
-        ... <some other params>
-    )
-
-    def input_fn_train():
-      ...
-      return dataset
-
-    classifier.train(input_fn=input_fn_train)
-
-    def input_fn_eval():
-      ...
-      return dataset
-
-    metrics = classifier.evaluate(input_fn=input_fn_eval)
-    ```
-
-    Args:
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_batches_per_layer: the number of batches to collect statistics per
-        layer. The total number of batches is total number of data divided by
-        batch size.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      n_classes: number of label classes. Default is binary classification.
-        Multiclass support is not yet implemented.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to downweight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      label_vocabulary: A list of strings represents possible label values. If
-        given, labels must be string type and have any value in
-        `label_vocabulary`. If it is not given, that means labels are
-        already encoded as integer or float within [0, 1] for `n_classes=2` and
-        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-        Also there will be errors if vocabulary is not provided and labels are
-        string.
-      n_trees: number trees to be created.
-      max_depth: maximum depth of the tree to grow.
-      learning_rate: shrinkage parameter to be used when a tree added to the
-        model.
-      l1_regularization: regularization multiplier applied to the absolute
-        weights of the tree leafs.
-      l2_regularization: regularization multiplier applied to the square weights
-        of the tree leafs.
-      tree_complexity: regularization factor to penalize trees with more leaves.
-      min_node_weight: min_node_weight: minimum hessian a node must have for a
-        split to be considered. The value will be compared with
-        sum(leaf_hessian)/(batch_size * n_batches_per_layer).
-      config: `RunConfig` object to configure the runtime settings.
-      center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
-        pruning (do not split a node if not enough gain is observed) and post
-        pruning (build the tree up to a max depth and then prune branches with
-        negative gain). For pre and post pruning, you MUST provide
-        tree_complexity >0.
-
-    Raises:
-      ValueError: when wrong arguments are given or unsupported functionalities
-         are requested.
-    """
-    # TODO(nponomareva): Support multi-class cases.
-    if n_classes == _HOLD_FOR_MULTI_CLASS_SUPPORT:
-      n_classes = 2
-    head, closed_form = _create_classification_head_and_closed_form(
-        n_classes, weight_column, label_vocabulary=label_vocabulary)
-    # HParams for the model.
-    tree_hparams = _TreeHParams(
-        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
-        tree_complexity, min_node_weight, center_bias, pruning_mode)
-
-    def _model_fn(features, labels, mode, config):
-      return _bt_model_fn(
-          features,
-          labels,
-          mode,
-          head,
-          feature_columns,
-          tree_hparams,
-          n_batches_per_layer,
-          config,
-          closed_form_grad_and_hess_fn=closed_form)
-
-    super(BoostedTreesClassifier, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config,
-        feature_columns=feature_columns,
-        head=head,
-        center_bias=center_bias,
-        is_classification=True)
-
-
-@estimator_export('estimator.BoostedTreesRegressor')
-class BoostedTreesRegressor(_BoostedTreesBase):
-  """A Regressor for Tensorflow Boosted Trees models.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               feature_columns,
-               n_batches_per_layer,
-               model_dir=None,
-               label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT,
-               weight_column=None,
-               n_trees=100,
-               max_depth=6,
-               learning_rate=0.1,
-               l1_regularization=0.,
-               l2_regularization=0.,
-               tree_complexity=0.,
-               min_node_weight=0.,
-               config=None,
-               center_bias=False,
-               pruning_mode='none'):
-    """Initializes a `BoostedTreesRegressor` instance.
-
-    Example:
-
-    ```python
-    bucketized_feature_1 = bucketized_column(
-      numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
-    bucketized_feature_2 = bucketized_column(
-      numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
-
-    # Need to see a large portion of the data before we can build a layer, for
-    # example half of data n_batches_per_layer = 0.5 * NUM_EXAMPLES / BATCH_SIZE
-    regressor = estimator.BoostedTreesRegressor(
-        feature_columns=[bucketized_feature_1, bucketized_feature_2],
-        n_batches_per_layer=n_batches_per_layer,
-        n_trees=100,
-        ... <some other params>
-    )
-
-    def input_fn_train():
-      ...
-      return dataset
-
-    regressor.train(input_fn=input_fn_train)
-
-    def input_fn_eval():
-      ...
-      return dataset
-
-    metrics = regressor.evaluate(input_fn=input_fn_eval)
-    ```
-
-    Args:
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_batches_per_layer: the number of batches to collect statistics per
-        layer. The total number of batches is total number of data divided by
-        batch size.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      label_dimension: Number of regression targets per example.
-        Multi-dimensional support is not yet implemented.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to downweight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      n_trees: number trees to be created.
-      max_depth: maximum depth of the tree to grow.
-      learning_rate: shrinkage parameter to be used when a tree added to the
-        model.
-      l1_regularization: regularization multiplier applied to the absolute
-        weights of the tree leafs.
-      l2_regularization: regularization multiplier applied to the square weights
-        of the tree leafs.
-      tree_complexity: regularization factor to penalize trees with more leaves.
-      min_node_weight: min_node_weight: minimum hessian a node must have for a
-        split to be considered. The value will be compared with
-        sum(leaf_hessian)/(batch_size * n_batches_per_layer).
-      config: `RunConfig` object to configure the runtime settings.
-      center_bias: Whether bias centering needs to occur. Bias centering refers
-        to the first node in the very first tree returning the prediction that
-        is aligned with the original labels distribution. For example, for
-        regression problems, the first node will return the mean of the labels.
-        For binary classification problems, it will return a logit for a prior
-        probability of label 1.
-      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
-        pruning (do not split a node if not enough gain is observed) and post
-        pruning (build the tree up to a max depth and then prune branches with
-        negative gain). For pre and post pruning, you MUST provide
-        tree_complexity >0.
-
-    Raises:
-      ValueError: when wrong arguments are given or unsupported functionalities
-         are requested.
-    """
-    # TODO(nponomareva): Extend it to multi-dimension cases.
-    if label_dimension == _HOLD_FOR_MULTI_DIM_SUPPORT:
-      label_dimension = 1
-    head = _create_regression_head(label_dimension, weight_column)
-
-    # HParams for the model.
-    tree_hparams = _TreeHParams(
-        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
-        tree_complexity, min_node_weight, center_bias, pruning_mode)
-
-    def _model_fn(features, labels, mode, config):
-      return _bt_model_fn(features, labels, mode, head, feature_columns,
-                          tree_hparams, n_batches_per_layer, config)
-
-    super(BoostedTreesRegressor, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config,
-        feature_columns=feature_columns,
-        head=head,
-        center_bias=center_bias,
-        is_classification=False)
+from tensorflow_estimator.python.estimator.canned import boosted_trees
 
+# Include attrs that start with single underscore.
+boosted_trees.__all__ = [
+    s for s in dir(boosted_trees) if not s.startswith('__')
+]
 
-# pylint: enable=protected-access
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.boosted_trees import *
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
deleted file mode 100644
index 23687a738b..0000000000
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ /dev/null
@@ -1,2549 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests boosted_trees estimators and model_fn."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from google.protobuf import text_format
-import numpy as np
-
-from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
-from tensorflow.python.client import session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator import run_config
-from tensorflow.python.estimator.canned import boosted_trees
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import gen_boosted_trees_ops
-from tensorflow.python.ops import boosted_trees_ops
-from tensorflow.python.ops import resources
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import googletest
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import session_run_hook
-
-NUM_FEATURES = 3
-
-BUCKET_BOUNDARIES = [-2., .5, 12.]  # Boundaries for all the features.
-INPUT_FEATURES = np.array(
-    [
-        [12.5, 1.0, -2.001, -2.0001, -1.999],  # feature_0 quantized:[3,2,0,0,1]
-        [2.0, -3.0, 0.5, 0.0, 0.4995],         # feature_1 quantized:[2,0,2,1,1]
-        [3.0, 20.0, 50.0, -100.0, 102.75],     # feature_2 quantized:[2,3,3,0,3]
-    ],
-    dtype=np.float32)
-
-CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]]
-REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]]
-FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)}
-
-# EXAMPLE_ID is not exposed to Estimator yet, but supported at model_fn level.
-EXAMPLE_IDS = np.array([0, 1, 2, 3, 4], dtype=np.int64)
-EXAMPLE_ID_COLUMN = '__example_id__'
-
-
-def _make_train_input_fn(is_classification):
-  """Makes train input_fn for classification/regression."""
-
-  def _input_fn():
-    features_dict = dict(FEATURES_DICT)  # copies the dict to add an entry.
-    features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS)
-    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
-    return features_dict, labels
-
-  return _input_fn
-
-
-def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None):
-  """Makes input_fn using Dataset."""
-
-  def _input_fn():
-    features_dict = dict(FEATURES_DICT)  # copies the dict to add an entry.
-    features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS)
-    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
-    if batch:
-      ds = dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensor_slices(features_dict),
-           dataset_ops.Dataset.from_tensor_slices(labels))).batch(batch)
-    else:
-      ds = dataset_ops.Dataset.zip(
-          (dataset_ops.Dataset.from_tensors(features_dict),
-           dataset_ops.Dataset.from_tensors(labels)))
-    # repeat indefinitely by default, or stop at the given step.
-    ds = ds.repeat(repeat)
-    return ds
-
-  return _input_fn
-
-
-class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    self._feature_columns = {
-        feature_column.bucketized_column(
-            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
-            BUCKET_BOUNDARIES)
-        for i in range(NUM_FEATURES)
-    }
-
-  def _assert_checkpoint(self, model_dir, global_step, finalized_trees,
-                         attempted_layers):
-    self._assert_checkpoint_and_return_model(model_dir, global_step,
-                                             finalized_trees, attempted_layers)
-
-  def _assert_checkpoint_and_return_model(self, model_dir, global_step,
-                                          finalized_trees, attempted_layers):
-    reader = checkpoint_utils.load_checkpoint(model_dir)
-    self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP))
-    serialized = reader.get_tensor('boosted_trees:0_serialized')
-    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-    ensemble_proto.ParseFromString(serialized)
-
-    self.assertEqual(
-        finalized_trees,
-        sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized]))
-    self.assertEqual(attempted_layers,
-                     ensemble_proto.growing_metadata.num_layers_attempted)
-
-    return ensemble_proto
-
-  def testFirstCheckpointWorksFine(self):
-    """Tests that eval/pred doesn't crash with the very first checkpoint.
-
-    The step-0 checkpoint will have only an empty ensemble, and a separate eval
-    job might read from it and crash.
-    This test ensures that prediction/evaluation works fine with it.
-    """
-    input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    class BailOutWithoutTraining(session_run_hook.SessionRunHook):
-
-      def before_run(self, run_context):
-        raise StopIteration('to bail out.')
-
-    est.train(input_fn, steps=100,  # must stop at 0 anyway.
-              hooks=[BailOutWithoutTraining()])
-    self._assert_checkpoint(
-        est.model_dir, global_step=0, finalized_trees=0, attempted_layers=0)
-    # Empty ensemble returns 0 logits, so that all output labels are 0.
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 0.6)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [0], [0], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testTrainAndEvaluateBinaryClassifier(self):
-    input_fn = _make_train_input_fn(is_classification=True)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-
-  def testTrainTwiceAndEvaluateBinaryClassifier(self):
-    input_fn = _make_train_input_fn(is_classification=True)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=5,
-        max_depth=10)
-
-    num_steps = 2
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    est.train(input_fn, steps=num_steps)
-
-    self._assert_checkpoint(
-        est.model_dir, global_step=num_steps * 2,
-        finalized_trees=0, attempted_layers=4)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-
-  def testInferBinaryClassifier(self):
-    train_input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(train_input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testTrainClassifierWithRankOneLabel(self):
-    """Tests that label with rank-1 tensor is also accepted by classifier."""
-    def _input_fn_with_rank_one_label():
-      return FEATURES_DICT, [0., 1., 1., 0., 0.]
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(_input_fn_with_rank_one_label, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-
-  def testTrainClassifierWithLabelVocabulary(self):
-    apple, banana = 'apple', 'banana'
-    def _input_fn_with_label_vocab():
-      return FEATURES_DICT, [[apple], [banana], [banana], [apple], [apple]]
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        label_vocabulary=[apple, banana])
-    est.train(input_fn=_input_fn_with_label_vocab, steps=5)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=_input_fn_with_label_vocab, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testTrainClassifierWithIntegerLabel(self):
-    def _input_fn_with_integer_label():
-      return (FEATURES_DICT,
-              constant_op.constant([[0], [1], [1], [0], [0]], dtypes.int32))
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    est.train(input_fn=_input_fn_with_integer_label, steps=5)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=_input_fn_with_integer_label, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testTrainClassifierWithDataset(self):
-    train_input_fn = _make_train_input_fn_dataset(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['accuracy'], 1.0)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose([[0], [1], [1], [0], [0]],
-                        [pred['class_ids'] for pred in predictions])
-
-  def testTrainAndEvaluateRegressor(self):
-    input_fn = _make_train_input_fn(is_classification=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    # It will stop after 10 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 1.008551)
-
-  def testInferRegressor(self):
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(train_input_fn, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainRegressorWithRankOneLabel(self):
-    """Tests that label with rank-1 tensor is also accepted by regressor."""
-    def _input_fn_with_rank_one_label():
-      return FEATURES_DICT, [1.5, 0.3, 0.2, 2., 5.]
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(_input_fn_with_rank_one_label, steps=num_steps)
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-
-  def testTrainRegressorWithDataset(self):
-    train_input_fn = _make_train_input_fn_dataset(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainRegressorWithDatasetBatch(self):
-    # The batch_size as the entire data size should yield the same result as
-    # dataset without batching.
-    train_input_fn = _make_train_input_fn_dataset(
-        is_classification=False, batch=5)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainRegressorWithDatasetLargerBatch(self):
-    # The batch_size as the multiple of the entire data size should still yield
-    # the same result.
-    train_input_fn = _make_train_input_fn_dataset(
-        is_classification=False, batch=15)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
-    self._assert_checkpoint(
-        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainRegressorWithDatasetSmallerBatch(self):
-    # Even when using small batches, if (n_batches_per_layer * batch_size) makes
-    # the same entire data size, the result should be the same.
-    train_input_fn = _make_train_input_fn_dataset(
-        is_classification=False, batch=1)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=5,
-        n_trees=1,
-        max_depth=5)
-    # Train stops after (n_batches_per_layer * n_trees * max_depth) steps.
-    est.train(train_input_fn, steps=100)
-    self._assert_checkpoint(
-        est.model_dir, global_step=25, finalized_trees=1, attempted_layers=5)
-    # 5 batches = one epoch.
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=5)
-    self.assertAllClose(eval_res['average_loss'], 2.478283)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self):
-    train_input_fn = _make_train_input_fn_dataset(
-        is_classification=False, repeat=3)  # to stop input after 3 steps.
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-    # Note that training will stop when input exhausts.
-    # This might not be a typical pattern, but dataset.repeat(3) causes
-    # the input stream to cease after 3 steps.
-    est.train(train_input_fn, steps=100)
-    self._assert_checkpoint(
-        est.model_dir, global_step=3, finalized_trees=0, attempted_layers=3)
-    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
-    self.assertAllClose(eval_res['average_loss'], 3.777295)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-    self.assertAllClose(
-        [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]],
-        [pred['predictions'] for pred in predictions])
-
-  def testTrainEvaluateAndPredictWithIndicatorColumn(self):
-    categorical = feature_column.categorical_column_with_vocabulary_list(
-        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
-    feature_indicator = feature_column.indicator_column(categorical)
-    bucketized_col = feature_column.bucketized_column(
-        feature_column.numeric_column(
-            'an_uninformative_feature', dtype=dtypes.float32),
-        BUCKET_BOUNDARIES)
-
-    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
-    # Our categorical feature defines the labels perfectly
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'an_uninformative_feature': np.array([1, 1, 1, 1, 1]),
-            'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
-        },
-        y=labels,
-        batch_size=5,
-        shuffle=False)
-
-    # Train depth 1 tree.
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=[bucketized_col, feature_indicator],
-        n_batches_per_layer=1,
-        n_trees=1,
-        learning_rate=1.0,
-        max_depth=1)
-
-    num_steps = 1
-    est.train(input_fn, steps=num_steps)
-    ensemble = self._assert_checkpoint_and_return_model(
-        est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1)
-
-    # We learnt perfectly.
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['loss'], 0)
-
-    predictions = list(est.predict(input_fn))
-    self.assertAllClose(
-        labels,
-        [pred['predictions'] for pred in predictions])
-
-    self.assertEqual(3, len(ensemble.trees[0].nodes))
-
-    # Check that the split happened on 'good' value, which will be encoded as
-    # feature with index 2 (0-numeric, 1 - 'bad')
-    self.assertEqual(2, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
-    self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
-
-  def testTrainEvaluateAndPredictWithOnlyIndicatorColumn(self):
-    categorical = feature_column.categorical_column_with_vocabulary_list(
-        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
-    feature_indicator = feature_column.indicator_column(categorical)
-
-    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
-    # Our categorical feature defines the labels perfectly
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
-        },
-        y=labels,
-        batch_size=5,
-        shuffle=False)
-
-    # Train depth 1 tree.
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=[feature_indicator],
-        n_batches_per_layer=1,
-        n_trees=1,
-        learning_rate=1.0,
-        max_depth=1)
-
-    num_steps = 1
-    est.train(input_fn, steps=num_steps)
-    ensemble = self._assert_checkpoint_and_return_model(
-        est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1)
-
-    # We learnt perfectly.
-    eval_res = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertAllClose(eval_res['loss'], 0)
-
-    predictions = list(est.predict(input_fn))
-    self.assertAllClose(
-        labels,
-        [pred['predictions'] for pred in predictions])
-
-    self.assertEqual(3, len(ensemble.trees[0].nodes))
-
-    # Check that the split happened on 'good' value, which will be encoded as
-    # feature with index 1 (0 - 'bad', 2 - 'ok')
-    self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
-    self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
-
-  def testFeatureImportancesWithTrainedEnsemble(self):
-    input_fn = _make_train_input_fn(is_classification=True)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    # It will stop after 5 steps because of the max depth and num trees.
-    num_steps = 100
-    # Train for a few steps, and validate final checkpoint.
-    est.train(input_fn, steps=num_steps)
-
-    feature_names_expected = ['f_0_bucketized',
-                              'f_2_bucketized',
-                              'f_1_bucketized']
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.833933, 0.606342, 0.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.579010, 0.420990, 0.0], importances)
-
-  def testFeatureImportancesOnEmptyEnsemble(self):
-    input_fn = _make_train_input_fn(is_classification=True)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    class BailOutWithoutTraining(session_run_hook.SessionRunHook):
-
-      def before_run(self, run_context):
-        raise StopIteration('to bail out.')
-
-    # The step-0 checkpoint will have only an empty ensemble.
-    est.train(input_fn,
-              steps=100,  # must stop at 0 anyway.
-              hooks=[BailOutWithoutTraining()])
-
-    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
-      est.experimental_feature_importances(normalize=False)
-
-    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
-      est.experimental_feature_importances(normalize=True)
-
-  def _create_fake_checkpoint_with_tree_ensemble_proto(self,
-                                                       est,
-                                                       tree_ensemble_text):
-    with ops.Graph().as_default():
-      with ops.name_scope('boosted_trees') as name:
-        tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
-        tree_ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-        text_format.Merge(tree_ensemble_text, tree_ensemble_proto)
-        stamp_token, _ = tree_ensemble.serialize()
-        restore_op = tree_ensemble.deserialize(
-            stamp_token, tree_ensemble_proto.SerializeToString())
-
-        with session.Session() as sess:
-          resources.initialize_resources(resources.shared_resources()).run()
-          restore_op.run()
-          saver = saver_lib.Saver()
-          save_path = os.path.join(est.model_dir, 'model.ckpt')
-          saver.save(sess, save_path)
-
-  def testFeatureImportancesOnNonEmptyEnsemble(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 2.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 3.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 2.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 7
-              right_id: 8
-            }
-            metadata {
-              gain: 1.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 3.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 3.34
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 1.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 3.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized',
-                              'f_2_bucketized',
-                              'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    # Gain sum for each features:
-    # = 1.0 * [3 + 1, 2, 2] + 1.0 * [1, 1, 0]
-    self.assertAllClose([5.0, 3.0, 2.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2], importances)
-
-  def testFeatureImportancesWithTreeWeights(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=3,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 12.5
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 5.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 5.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 0.4
-        tree_weights: 0.6
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    feature_names_expected = ['f_0_bucketized',
-                              'f_2_bucketized',
-                              'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    # Gain sum for each features:
-    # = 0.4 * [12.5, 0, 5] + 0.6 * [0, 5, 0] + 1.0 * [0, 0, 0]
-    self.assertAllClose([5.0, 3.0, 2.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.3, 0.2], importances)
-
-  def testFeatureImportancesWithAllEmptyTree(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=2,
-        max_depth=5)
-
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
-    feature_names_expected = ['f_2_bucketized',
-                              'f_1_bucketized',
-                              'f_0_bucketized']
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.0, 0.0, 0.0], importances)
-
-    with self.assertRaisesRegexp(AssertionError,
-                                 'all empty or contain only a root node'):
-      est.experimental_feature_importances(normalize=True)
-
-  def testNegativeFeatureImportances(self):
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5)
-
-    # In order to generate a negative feature importances,
-    # We assign an invalid value -1 to tree_weights here.
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 5.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-        }
-        tree_weights: -1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    # Github #21509 (nataliaponomareva):
-    # The gains stored in the splits can be negative
-    # if people are using complexity regularization.
-    feature_names_expected = ['f_2_bucketized',
-                              'f_0_bucketized',
-                              'f_1_bucketized']
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.0, 0.0, -5.0], importances)
-
-    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
-      est.experimental_feature_importances(normalize=True)
-
-  def testFeatureImportancesNamesForCategoricalColumn(self):
-    categorical = feature_column.categorical_column_with_vocabulary_list(
-        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
-    feature_indicator = feature_column.indicator_column(categorical)
-    bucketized_col = feature_column.bucketized_column(
-        feature_column.numeric_column(
-            'continuous', dtype=dtypes.float32),
-        BUCKET_BOUNDARIES)
-    bucketized_indicator = feature_column.indicator_column(bucketized_col)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=[feature_indicator,
-                         bucketized_col,
-                         bucketized_indicator],
-        n_batches_per_layer=1,
-        n_trees=2,
-        learning_rate=1.0,
-        max_depth=1)
-
-    tree_ensemble_text = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 5.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 4
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.0
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 5
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.0
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -2.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 3.34
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 4.34
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        """
-    self._create_fake_checkpoint_with_tree_ensemble_proto(
-        est, tree_ensemble_text)
-
-    feature_names_expected = ['categorical_indicator:ok',
-                              'continuous_bucketized_indicator:(-2.0, 0.5)',
-                              'continuous_bucketized_indicator:(-inf, -2.0)',
-                              'categorical_indicator:bad',
-                              # Reverse order because feature importances
-                              # are sorted by np.argsort(f)[::-1]
-                              'continuous_bucketized_indicator:(12.0, inf)',
-                              'continuous_bucketized_indicator:(0.5, 12.0)',
-                              'continuous_bucketized',
-                              'categorical_indicator:good']
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=False)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    # Gain sum for each features:
-    # = 1.0 * [5, 0, 2, 0, 0, 0, 0, 0] + 1.0 * [0, 2, 0, 1, 0, 0, 0, 0]
-    self.assertAllClose([5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0], importances)
-
-    feature_names, importances = est.experimental_feature_importances(
-        normalize=True)
-    self.assertAllEqual(feature_names_expected, feature_names)
-    self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances)
-
-  def testFeatureImportancesNamesForUnsupportedColumn(self):
-    numeric_col = feature_column.numeric_column(
-        'continuous', dtype=dtypes.float32)
-
-    with self.assertRaisesRegexp(ValueError,
-                                 'only bucketized_column and indicator_column'):
-      _ = boosted_trees.BoostedTreesRegressor(
-          feature_columns=[numeric_col],
-          n_batches_per_layer=1,
-          n_trees=2,
-          learning_rate=1.0,
-          max_depth=1)
-
-  def testTreeComplexityIsSetCorrectly(self):
-    input_fn = _make_train_input_fn(is_classification=True)
-
-    num_steps = 10
-    # Tree complexity is set but no pruning.
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        tree_complexity=1e-3)
-    with self.assertRaisesRegexp(ValueError, 'Tree complexity have no effect'):
-      est.train(input_fn, steps=num_steps)
-
-    # Pruning but no tree complexity.
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        pruning_mode='pre')
-    with self.assertRaisesRegexp(ValueError,
-                                 'tree_complexity must be positive'):
-      est.train(input_fn, steps=num_steps)
-
-    # All is good.
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        pruning_mode='pre',
-        tree_complexity=1e-3)
-    est.train(input_fn, steps=num_steps)
-
-
-class BoostedTreesDebugOutputsTest(test_util.TensorFlowTestCase):
-  """Test debug/model explainability outputs for individual predictions.
-
-  Includes directional feature contributions (DFC).
-  """
-
-  def setUp(self):
-    self._feature_columns = {
-        feature_column.bucketized_column(
-            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
-            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
-    }
-
-  def testBinaryClassifierThatDFCIsInPredictions(self):
-    train_input_fn = _make_train_input_fn(is_classification=True)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=3, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesClassifier(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        center_bias=True)
-
-    num_steps = 100
-    # Train for a few steps. Validate debug outputs in prediction dicts.
-    est.train(train_input_fn, steps=num_steps)
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn)
-    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
-                         for pred in debug_predictions])
-    self.assertAllClose([0.4] * 5, biases)
-    self.assertAllClose(({
-        0: -0.12108613453574479,
-        1: 0.0,
-        2: -0.039254929814481143
-    }, {
-        0: 0.19650601422250574,
-        1: 0.0,
-        2: 0.02693827052766018
-    }, {
-        0: 0.16057487356133376,
-        1: 0.0,
-        2: 0.02693827052766018
-    }, {
-        0: -0.12108613453574479,
-        1: 0.0,
-        2: -0.039254929814481143
-    }, {
-        0: -0.10832468554550384,
-        1: 0.0,
-        2: 0.02693827052766018
-    }), dfcs)
-
-    # Assert sum(dfcs) + bias == probabilities.
-    expected_probabilities = [
-        0.23965894, 0.62344426, 0.58751315, 0.23965894, 0.31861359
-    ]
-    probabilities = [
-        sum(dfc.values()) + bias for (dfc, bias) in zip(dfcs, biases)
-    ]
-    self.assertAllClose(expected_probabilities, probabilities)
-
-    # When user doesn't include bias or dfc in predict_keys, make sure to still
-    # include dfc and bias.
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn, predict_keys=['probabilities'])
-    for prediction_dict in debug_predictions:
-      self.assertTrue('bias' in prediction_dict)
-      self.assertTrue('dfc' in prediction_dict)
-      self.assertTrue('probabilities' in prediction_dict)
-      self.assertEqual(len(prediction_dict), 3)
-
-  def testRegressorThatDFCIsInPredictions(self):
-    train_input_fn = _make_train_input_fn(is_classification=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-    est = boosted_trees.BoostedTreesRegressor(
-        feature_columns=self._feature_columns,
-        n_batches_per_layer=1,
-        n_trees=1,
-        max_depth=5,
-        center_bias=True)
-
-    num_steps = 100
-    # Train for a few steps. Validate debug outputs in prediction dicts.
-    est.train(train_input_fn, steps=num_steps)
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn)
-    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
-                         for pred in debug_predictions])
-    self.assertAllClose([1.8] * 5, biases)
-    self.assertAllClose(({
-        0: -0.070499420166015625,
-        1: -0.095000028610229492,
-        2: 0.0
-    }, {
-        0: -0.53763031959533691,
-        1: 0.063333392143249512,
-        2: 0.0
-    }, {
-        0: -0.51756942272186279,
-        1: -0.095000028610229492,
-        2: 0.0
-    }, {
-        0: 0.1563495397567749,
-        1: 0.063333392143249512,
-        2: 0.0
-    }, {
-        0: 0.96934974193572998,
-        1: 0.063333392143249512,
-        2: 0.0
-    }), dfcs)
-
-    # Assert sum(dfcs) + bias == predictions.
-    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
-                            [2.01968288], [2.83268309]]
-    predictions = [
-        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
-    ]
-    self.assertAllClose(expected_predictions, predictions)
-
-    # Test when user doesn't include bias or dfc in predict_keys.
-    debug_predictions = est.experimental_predict_with_explanations(
-        predict_input_fn, predict_keys=['predictions'])
-    for prediction_dict in debug_predictions:
-      self.assertTrue('bias' in prediction_dict)
-      self.assertTrue('dfc' in prediction_dict)
-      self.assertTrue('predictions' in prediction_dict)
-      self.assertEqual(len(prediction_dict), 3)
-
-
-class ModelFnTests(test_util.TensorFlowTestCase):
-  """Tests bt_model_fn including unexposed internal functionalities."""
-
-  def setUp(self):
-    self._feature_columns = {
-        feature_column.bucketized_column(
-            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
-            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
-    }
-
-  def _get_expected_ensembles_for_classification(self):
-    first_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.387675
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.181818
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.0625
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 1
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    second_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.387675
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 3
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 0.0
-              original_leaf {
-                scalar: -0.181818
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.105518
-              original_leaf {
-                scalar: 0.0625
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.348397
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.181818
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.224091
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.056815
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 0
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 2
-          last_layer_node_start: 0
-          last_layer_node_end: 1
-        }
-        """
-    third_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.387675
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 3
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 0.0
-              original_leaf {
-                scalar: -0.181818
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.105518
-              original_leaf {
-                scalar: 0.0625
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.348397
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.181818
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.224091
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.056815
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.287131
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.162042
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.086986
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 2
-          num_layers_attempted: 3
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    return (first_round, second_round, third_round)
-
-  def _get_expected_ensembles_for_classification_with_bias(self):
-    first_round = """
-        trees {
-          nodes {
-            leaf {
-              scalar: -0.405086
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-        }
-        """
-    second_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.407711
-              original_leaf {
-                scalar: -0.405086
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.556054
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.301233
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 1
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    third_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.407711
-              original_leaf {
-                scalar: -0.405086
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 3
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              original_leaf {
-                scalar: -0.556054
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.09876
-              original_leaf {
-                scalar: -0.301233
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.698072
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.556054
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.106016
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.27349
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 2
-          last_layer_node_end: 1
-        }
-        """
-    forth_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.4077113
-              original_leaf {
-                scalar: -0.405086
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              threshold: 3
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              original_leaf {
-                scalar: -0.556054
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.09876
-              original_leaf {
-                scalar: -0.301233
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.698072
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.556054
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.106016
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.27349
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 2
-              threshold: 2
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.289927
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.134588
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.083838            
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 1
-        }
-        growing_metadata {
-          num_trees_attempted: 2
-          num_layers_attempted: 3
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    return (first_round, second_round, third_round, forth_round)
-
-  def _get_expected_ensembles_for_regression(self):
-    first_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.169714
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.241322
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.083951
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 1
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    second_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.169714
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.673407
-              original_leaf {
-                scalar: 0.241322
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.324102
-              original_leaf {
-                scalar: 0.083951
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.563167
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.247047
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.095273
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.222102
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 0
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 2
-          last_layer_node_start: 0
-          last_layer_node_end: 1
-        }
-        """
-    third_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.169714
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.673407
-              original_leaf {
-                scalar: 0.241322
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.324102
-              original_leaf {
-                scalar: 0.083951
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.563167
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.247047
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.095273
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.222102
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 0
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.981026
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.005166
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.180281
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 2
-          num_layers_attempted: 3
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    return (first_round, second_round, third_round)
-
-  def _get_expected_ensembles_for_regression_with_bias(self):
-    first_round = """
-        trees {
-          nodes {
-            leaf {
-              scalar: 1.799974
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-        }
-        """
-    second_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.190442
-              original_leaf {
-                scalar: 1.799974
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.862786
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.706149
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 1
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 1
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    third_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.190442
-              original_leaf {
-                scalar: 1.799974
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.683594
-              original_leaf {
-                scalar: 1.862786
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              feature_id: 0
-              threshold: 0
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.322693
-              original_leaf {
-                scalar: 1.706149
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 2.024487
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.710319
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.559208
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.686037
-            }
-          }
-        }
-        trees {
-          nodes {
-            leaf {
-              scalar: 0.0
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 0
-          is_finalized: false
-        }
-        growing_metadata {
-          num_trees_attempted: 1
-          num_layers_attempted: 2
-          last_layer_node_start: 0
-          last_layer_node_end: 1
-        }
-        """
-    forth_round = """
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              threshold: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 1.190442
-              original_leaf {
-                scalar:  1.799974
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              threshold: 1
-              left_id: 3
-              right_id: 4
-            }
-            metadata {
-              gain: 2.683594
-              original_leaf {
-                scalar: 1.8627863
-              }
-            }
-          }
-          nodes {
-            bucketized_split {
-              left_id: 5
-              right_id: 6
-            }
-            metadata {
-              gain: 0.322693
-              original_leaf {
-                scalar: 1.706149
-              }
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 2.024487
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.710319
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.5592078
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 1.686037
-            }
-          }
-        }
-        trees {
-          nodes {
-            bucketized_split {
-              feature_id: 1
-              left_id: 1
-              right_id: 2
-            }
-            metadata {
-              gain: 0.972589
-            }
-          }
-          nodes {
-            leaf {
-              scalar: -0.137592
-            }
-          }
-          nodes {
-            leaf {
-              scalar: 0.034926
-            }
-          }
-        }
-        tree_weights: 1.0
-        tree_weights: 1.0
-        tree_metadata {
-          num_layers_grown: 2
-          is_finalized: true
-        }
-        tree_metadata {
-          num_layers_grown: 1
-        }
-        growing_metadata {
-          num_trees_attempted: 2
-          num_layers_attempted: 3
-          last_layer_node_start: 1
-          last_layer_node_end: 3
-        }
-        """
-    return (first_round, second_round, third_round, forth_round)
-
-  def _get_train_op_and_ensemble(self,
-                                 head,
-                                 config,
-                                 is_classification,
-                                 train_in_memory,
-                                 center_bias=False):
-    """Calls bt_model_fn() and returns the train_op and ensemble_serialzed."""
-    features, labels = _make_train_input_fn(is_classification)()
-
-    tree_hparams = boosted_trees._TreeHParams(  # pylint:disable=protected-access
-        n_trees=2,
-        max_depth=2,
-        learning_rate=0.1,
-        l1=0.,
-        l2=0.01,
-        tree_complexity=0.,
-        min_node_weight=0.,
-        center_bias=center_bias,
-        pruning_mode='none')
-
-    estimator_spec = boosted_trees._bt_model_fn(  # pylint:disable=protected-access
-        features=features,
-        labels=labels,
-        mode=model_fn.ModeKeys.TRAIN,
-        head=head,
-        feature_columns=self._feature_columns,
-        tree_hparams=tree_hparams,
-        example_id_column_name=EXAMPLE_ID_COLUMN,
-        n_batches_per_layer=1,
-        config=config,
-        train_in_memory=train_in_memory)
-    resources.initialize_resources(resources.shared_resources()).run()
-    variables.global_variables_initializer().run()
-    variables.local_variables_initializer().run()
-
-    # Gets the train_op and serialized proto of the ensemble.
-    shared_resources = resources.shared_resources()
-    self.assertEqual(1, len(shared_resources))
-    train_op = estimator_spec.train_op
-    with ops.control_dependencies([train_op]):
-      _, ensemble_serialized = (
-          gen_boosted_trees_ops.boosted_trees_serialize_ensemble(
-              shared_resources[0].handle))
-    return train_op, ensemble_serialized
-
-  def testTrainClassifierInMemory(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third = (
-        self._get_expected_ensembles_for_classification())
-    with self.cached_session() as sess:
-      # Train with train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_classification_head(n_classes=2),
-            run_config.RunConfig(),
-            is_classification=True,
-            train_in_memory=True)
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-  def testTrainClassifierWithCenterBiasInMemory(self):
-    ops.reset_default_graph()
-
-    # When bias centering is on, we expect the very first node to have the
-    expected_first, expected_second, expected_third, expected_forth = (
-        self._get_expected_ensembles_for_classification_with_bias())
-
-    with self.cached_session() as sess:
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_classification_head(n_classes=2),
-            run_config.RunConfig(),
-            is_classification=True,
-            train_in_memory=True,
-            center_bias=True)
-
-      # 4 iterations to center bias.
-      for _ in range(4):
-        _, serialized = sess.run([train_op, ensemble_serialized])
-
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-      # Forth round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-
-      self.assertProtoEquals(expected_forth, ensemble_proto)
-
-  def testTrainClassifierNonInMemory(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third = (
-        self._get_expected_ensembles_for_classification())
-    with self.cached_session() as sess:
-      # Train without train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_classification_head(n_classes=2),
-            run_config.RunConfig(),
-            is_classification=True,
-            train_in_memory=False)
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-  def testTrainClassifierWithCenterBiasNonInMemory(self):
-    ops.reset_default_graph()
-
-    # When bias centering is on, we expect the very first node to have the
-    expected_first, expected_second, expected_third, expected_forth = (
-        self._get_expected_ensembles_for_classification_with_bias())
-
-    with self.cached_session() as sess:
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_classification_head(n_classes=2),
-            run_config.RunConfig(),
-            is_classification=True,
-            train_in_memory=False,
-            center_bias=True)
-      # 4 iterations to center bias.
-      for _ in range(4):
-        _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-      # Forth round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_forth, ensemble_proto)
-
-  def testTrainRegressorInMemory(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third = (
-        self._get_expected_ensembles_for_regression())
-    with self.cached_session() as sess:
-      # Train with train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_regression_head(label_dimension=1),
-            run_config.RunConfig(),
-            is_classification=False,
-            train_in_memory=True)
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-  def testTrainRegressorInMemoryWithCenterBias(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third, expected_forth = (
-        self._get_expected_ensembles_for_regression_with_bias())
-    with self.cached_session() as sess:
-      # Train with train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_regression_head(label_dimension=1),
-            run_config.RunConfig(),
-            is_classification=False,
-            train_in_memory=True,
-            center_bias=True)
-      # 3 iterations to center bias.
-      for _ in range(3):
-        _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-      # Forth round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_forth, ensemble_proto)
-
-  def testTrainRegressorNonInMemory(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third = (
-        self._get_expected_ensembles_for_regression())
-    with self.cached_session() as sess:
-      # Train without train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_regression_head(label_dimension=1),
-            run_config.RunConfig(),
-            is_classification=False,
-            train_in_memory=False)
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-  def testTrainRegressorNotInMemoryWithCenterBias(self):
-    ops.reset_default_graph()
-    expected_first, expected_second, expected_third, expected_forth = (
-        self._get_expected_ensembles_for_regression_with_bias())
-    with self.cached_session() as sess:
-      # Train with train_in_memory mode.
-      with sess.graph.as_default():
-        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
-            boosted_trees._create_regression_head(label_dimension=1),
-            run_config.RunConfig(),
-            is_classification=False,
-            train_in_memory=False,
-            center_bias=True)
-      # 3 iterations to center the bias (because we are using regularization).
-      for _ in range(3):
-        _, serialized = sess.run([train_op, ensemble_serialized])
-
-      # Validate the trained ensemble.
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_first, ensemble_proto)
-
-      # Run one more time and validate the trained ensemble.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_second, ensemble_proto)
-
-      # Third round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_third, ensemble_proto)
-
-      # Forth round training and validation.
-      _, serialized = sess.run([train_op, ensemble_serialized])
-      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
-      ensemble_proto.ParseFromString(serialized)
-      self.assertProtoEquals(expected_forth, ensemble_proto)
-
-
-if __name__ == '__main__':
-  googletest.main()
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils.py b/tensorflow/python/estimator/canned/boosted_trees_utils.py
index 85efc2304a..0ff70ddff1 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_utils.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_utils.py
@@ -12,69 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Debug and model explainability logic for boosted trees."""
+"""boosted_trees_utils python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
-from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
-
-# For directional feature contributions.
-_DEBUG_PROTO_KEY = '_serialized_debug_outputs_proto'
-_BIAS_ID = 0
-
-
-def _parse_debug_proto_string(example_proto_serialized):
-  example_debug_outputs = boosted_trees_pb2.DebugOutput()
-  example_debug_outputs.ParseFromString(example_proto_serialized)
-  feature_ids = example_debug_outputs.feature_ids
-  logits_path = example_debug_outputs.logits_path
-  return feature_ids, logits_path
-
-
-def _compute_directional_feature_contributions(example_feature_ids,
-                                               example_logits_paths, activation,
-                                               num_bucketized_features):
-  """Directional feature contributions and bias, per example."""
-  # Initialize contributions to 0.
-  dfcs = {k: 0 for k in range(num_bucketized_features)}
-
-  # Traverse tree subtracting child prediction from parent prediction and
-  # associating change with feature id used to split.
-  predictions = np.array(activation(example_logits_paths))
-  delta_pred = predictions[_BIAS_ID + 1:] - predictions[:-1]
-  # Group by feature id, then sum delta_pred.
-  contribs = np.bincount(
-      example_feature_ids,
-      weights=delta_pred,
-      minlength=num_bucketized_features)
-  for f, dfc in zip(range(num_bucketized_features), contribs):
-    dfcs[f] = dfc
-  return predictions[_BIAS_ID], dfcs
-
-
-def _identity(logits):
-  return logits
-
-
-def _sigmoid(logits):
-  # TODO(crawles): Change to softmax once multiclass support is available.
-  return 1 / (1 + np.exp(-np.array(logits)))
+from tensorflow_estimator.python.estimator.canned import boosted_trees_utils
 
+# Include attrs that start with single underscore.
+boosted_trees_utils.__all__ = [
+    s for s in dir(boosted_trees_utils) if not s.startswith('__')
+]
 
-def _parse_explanations_from_prediction(serialized_debug_proto,
-                                        n_features,
-                                        classification=False):
-  """Parse serialized explanability proto, compute dfc, and return bias, dfc."""
-  feature_ids, logits_path = _parse_debug_proto_string(serialized_debug_proto)
-  if classification:
-    activation = _sigmoid
-  else:
-    activation = _identity
-  bias, dfcs = _compute_directional_feature_contributions(
-      feature_ids, logits_path, activation, n_features)
-  # TODO(crawles): Prediction path and leaf IDs.
-  return bias, dfcs
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.boosted_trees_utils import *
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils_test.py b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
deleted file mode 100644
index 506d4ea6fb..0000000000
--- a/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests boosted_trees estimators and model_fn."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.estimator.canned import boosted_trees_utils
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import googletest
-
-
-class BoostedTreesDFCTest(test_util.TensorFlowTestCase):
-  """Test directional feature contributions (DFC) helper functions. """
-
-  def testDirectionalFeatureContributionsCompute(self):
-    """Tests logic to compute DFCs given feature ids and logits paths."""
-    num_bucketized_features = 3  # Includes one unused feature.
-    examples_feature_ids = ((2, 2, 0, 0), (2, 2, 0))
-    e1_feature_ids, e2_feature_ids = examples_feature_ids
-
-    # DFCs are computed by traversing the prediction path and subtracting each
-    # child prediction from its parent prediction and associating the change in
-    # prediction with the respective feature id used for the split.
-    # For each activation function, f, (currently identity or sigmoid), DFCs are
-    # calculated for the two examples as:
-    # example 1:
-    #   feature_0 = (f(1.114) - f(1.214)) + (f(6.114) - f(1.114))
-    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
-    #   feature_2 = (f(0.114) - bias_pred) + (f(1.214) - f(0.114))
-    # example 2:
-    #   feature_0 = f(-5.486) - f(1.514)
-    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
-    #   feature_2 = (f(0.114) - bias_pred) + (f(1.514) - f(0.114))
-    # where bias_pred is = f(0) or f(0.21), with center_bias = {True, False},
-    # respectively.
-    # Keys are center_bias.
-    expected_dfcs_identity = {
-        False: ({
-            0: 4.9,
-            1: 0,
-            2: 1.214
-        }, {
-            0: -7.0,
-            1: 0,
-            2: 1.514
-        }),
-        True: ({
-            0: 4.9,
-            1: 0,
-            2: 1.0039999999999998
-        }, {
-            0: -7.0,
-            1: 0,
-            2: 1.3039999999999998
-        })
-    }
-    expected_dfcs_sigmoid = {
-        False: ({
-            0: 0.22678725678805578,
-            1: 0,
-            2: 0.2710059376234506
-        }, {
-            0: -0.81552596670046507,
-            1: 0,
-            2: 0.319653250251275
-        }),
-        True: ({
-            0: 0.22678725678805578,
-            1: 0,
-            2: 0.2186980280491253
-        }, {
-            0: -0.81552596670046507,
-            1: 0,
-            2: 0.26734534067694971
-        })
-    }
-    # pylint: disable=protected-access
-    for f, expected_dfcs in zip(
-        (boosted_trees_utils._identity, boosted_trees_utils._sigmoid),
-        (expected_dfcs_identity, expected_dfcs_sigmoid)):
-      for center_bias in [False, True]:
-        # If not center_bias, the bias after activation is 0.
-        if center_bias:
-          bias_logit = 0.21  # Root node of tree_0.
-        else:
-          bias_logit = 0  # 0 is default value when there is no original_leaf.
-        f_bias = f(bias_logit)
-
-        # Logits before and after, as is outputed from
-        # boosted_trees_ops.example_debug_outputs
-        examples_logits_paths = ((bias_logit, 0.114, 1.214, 1.114, 6.114),
-                                 (bias_logit, 0.114, 1.514, -5.486))
-        e1_logits_path, e2_logits_path = examples_logits_paths
-        e1_expected_dfcs, e2_expected_dfcs = expected_dfcs[center_bias]
-        # Check feature contributions are correct for both examples.
-        # Example 1.
-        # pylint:disable=line-too-long
-        e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
-            e1_feature_ids, e1_logits_path, f, num_bucketized_features)
-        self.assertAllClose(e1_bias, f_bias)
-        self.assertAllClose(e1_dfc, e1_expected_dfcs)
-        # Example 2.
-        e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
-            e2_feature_ids, e2_logits_path, f, num_bucketized_features)
-        # pylint:enable=line-too-long
-        self.assertAllClose(e2_bias, f_bias)
-        self.assertAllClose(e2_dfc, e2_expected_dfcs)
-        # Check if contributions sum to final prediction.
-        # For each tree, get leaf of last tree.
-        expected_logits = (e1_logits_path[-1], e2_logits_path[-1])
-        # Predictions should be the sum of contributions + bias.
-        expected_preds = [f(logit) for logit in expected_logits]
-        e1_pred = e1_bias + sum(e1_dfc.values())
-        e2_pred = e2_bias + sum(e2_dfc.values())
-        preds = [e1_pred, e2_pred]
-        self.assertAllClose(preds, expected_preds)
-    # pylint: enable=protected-access
-
-  def testDFCComputeComparedToExternalExample(self):
-    """Tests `compute_dfc` compared to external example (regression).
-
-    Example from http://blog.datadive.net/interpreting-random-forests.
-    """
-    # DIS:3, RM: 2, LSTAT:1, NOX:0
-    num_bucketized_features = 4
-    e1_feature_ids = (2, 1, 0)
-    e2_feature_ids = (2, 2, 2)
-    e3_feature_ids = (2, 2, 0)
-
-    bias_logit = 22.60  # Root node of tree_0.
-    activation = boosted_trees_utils._identity
-    f_bias = activation(bias_logit)
-    # Logits before and after, as is outputed from
-    # boosted_trees_ops.example_debug_outputs
-    e1_logits_path = (bias_logit, 19.96, 14.91, 18.11)
-    e2_logits_path = (bias_logit, 37.42, 45.10, 45.90)
-    e3_logits_path = (bias_logit, 37.42, 32.30, 33.58)
-    e1_expected_dfcs = {0: 3.20, 1: -5.05, 2: -2.64, 3: 0}
-    e2_expected_dfcs = {0: 0, 1: 0, 2: 23.3, 3: 0}
-    e3_expected_dfcs = {0: 1.28, 1: 0, 2: 9.7, 3: 0}
-    # Check feature contributions are correct for both examples.
-    # Example 1.
-    # pylint: disable=protected-access
-    # pylint: disable=line-too-long
-    e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
-        e1_feature_ids, e1_logits_path, activation, num_bucketized_features)
-    self.assertAllClose(e1_bias, f_bias)
-    self.assertAllClose(e1_dfc, e1_expected_dfcs)
-    # Example 2.
-    e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
-        e2_feature_ids, e2_logits_path, activation, num_bucketized_features)
-    self.assertAllClose(e2_bias, f_bias)
-    self.assertAllClose(e2_dfc, e2_expected_dfcs)
-    # Example 3.
-    e3_bias, e3_dfc = boosted_trees_utils._compute_directional_feature_contributions(
-        e3_feature_ids, e3_logits_path, activation, num_bucketized_features)
-    # pylint: enable=line-too-long
-    self.assertAllClose(e3_bias, f_bias)
-    self.assertAllClose(e3_dfc, e3_expected_dfcs)
-    # pylint: enable=protected-access
-    # Check if contributions sum to final prediction.
-    # For each tree, get leaf of last tree.
-    expected_logits = (18.11, 45.90, 33.58)
-    # Predictions should be the sum of contributions + bias.
-    expected_preds = [activation(logit) for logit in expected_logits]
-    e1_pred = e1_bias + sum(e1_dfc.values())
-    e2_pred = e2_bias + sum(e2_dfc.values())
-    e3_pred = e3_bias + sum(e3_dfc.values())
-    preds = [e1_pred, e2_pred, e3_pred]
-    self.assertAllClose(preds, expected_preds)
-
-
-if __name__ == '__main__':
-  googletest.main()
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index a6c2aaa7d9..6b80bd5224 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,649 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Deep Neural Network estimators."""
+"""dnn python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.keras.engine import training
-from tensorflow.python.layers import core as core_layers
-from tensorflow.python.layers import normalization
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.summary import summary
-from tensorflow.python.util.tf_export import estimator_export
-
-# The default learning rate of 0.05 is a historical artifact of the initial
-# implementation, but seems a reasonable choice.
-_LEARNING_RATE = 0.05
-
-
-def _add_hidden_layer_summary(value, tag):
-  summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value))
-  summary.histogram('%s/activation' % tag, value)
-
-
-def _dnn_logit_fn_builder(units,
-                          hidden_units,
-                          feature_columns,
-                          activation_fn,
-                          dropout,
-                          input_layer_partitioner,
-                          batch_norm,
-                          shared_state_manager=None):
-  """Function builder for a dnn logit_fn.
-
-  Args:
-    units: An int indicating the dimension of the logit layer.  In the
-      MultiHead case, this should be the sum of all component Heads' logit
-      dimensions.
-    hidden_units: Iterable of integer number of hidden units per layer.
-    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
-    activation_fn: Activation function applied to each layer.
-    dropout: When not `None`, the probability we will drop out a given
-      coordinate.
-    input_layer_partitioner: Partitioner for input layer.
-    batch_norm: Whether to use batch normalization after each hidden layer.
-    shared_state_manager: A SharedEmbeddingStateManager object to hold the
-      shared state for SharedEmbeddingColumn's.
-
-  Returns:
-    A logit_fn (see below).
-
-  Raises:
-    ValueError: If units is not an int.
-  """
-  if not isinstance(units, int):
-    raise ValueError('units must be an int.  Given type: {}'.format(
-        type(units)))
-
-  def dnn_logit_fn(features, mode):
-    """Deep Neural Network logit_fn.
-
-    Args:
-      features: This is the first item returned from the `input_fn`
-                passed to `train`, `evaluate`, and `predict`. This should be a
-                single `Tensor` or `dict` of same.
-      mode: Optional. Specifies if this training, evaluation or prediction. See
-            `ModeKeys`.
-
-    Returns:
-      A `Tensor` representing the logits, or a list of `Tensor`'s representing
-      multiple logits in the MultiHead case.
-    """
-    dnn_model = _DNNModel(
-        units,
-        hidden_units,
-        feature_columns,
-        activation_fn,
-        dropout,
-        input_layer_partitioner,
-        batch_norm,
-        shared_state_manager,
-        name='dnn')
-    return dnn_model(features, mode)
-
-  return dnn_logit_fn
-
-
-def _get_previous_name_scope():
-  current_name_scope = ops.get_name_scope()
-  return current_name_scope.rsplit('/', 1)[0] + '/'
-
-
-class _DNNModel(training.Model):
-  """A DNN Model."""
-
-  def __init__(self,
-               units,
-               hidden_units,
-               feature_columns,
-               activation_fn,
-               dropout,
-               input_layer_partitioner,
-               batch_norm,
-               shared_state_manager,
-               name=None,
-               **kwargs):
-    super(_DNNModel, self).__init__(name=name, **kwargs)
-    if feature_column_v2.is_feature_column_v2(feature_columns):
-      self._input_layer = feature_column_v2.FeatureLayer(
-          feature_columns=feature_columns,
-          name='input_layer',
-          shared_state_manager=shared_state_manager)
-    else:
-      self._input_layer = feature_column.InputLayer(
-          feature_columns=feature_columns,
-          name='input_layer',
-          create_scope_now=False)
-
-    self._add_layer(self._input_layer, 'input_layer')
-
-    self._dropout = dropout
-    self._batch_norm = batch_norm
-
-    self._hidden_layers = []
-    self._dropout_layers = []
-    self._batch_norm_layers = []
-    self._hidden_layer_scope_names = []
-    for layer_id, num_hidden_units in enumerate(hidden_units):
-      with variable_scope.variable_scope(
-          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
-        hidden_layer = core_layers.Dense(
-            units=num_hidden_units,
-            activation=activation_fn,
-            kernel_initializer=init_ops.glorot_uniform_initializer(),
-            name=hidden_layer_scope,
-            _scope=hidden_layer_scope)
-        self._add_layer(hidden_layer, hidden_layer_scope.name)
-        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
-        self._hidden_layers.append(hidden_layer)
-        if self._dropout is not None:
-          dropout_layer = core_layers.Dropout(rate=self._dropout)
-          self._add_layer(dropout_layer, dropout_layer.name)
-          self._dropout_layers.append(dropout_layer)
-        if self._batch_norm:
-          batch_norm_layer = normalization.BatchNormalization(
-              # The default momentum 0.99 actually crashes on certain
-              # problem, so here we use 0.999, which is the default of
-              # tf.contrib.layers.batch_norm.
-              momentum=0.999,
-              trainable=True,
-              name='batchnorm_%d' % layer_id,
-              _scope='batchnorm_%d' % layer_id)
-          self._add_layer(batch_norm_layer, batch_norm_layer.name)
-          self._batch_norm_layers.append(batch_norm_layer)
-
-    with variable_scope.variable_scope('logits') as logits_scope:
-      self._logits_layer = core_layers.Dense(
-          units=units,
-          activation=None,
-          kernel_initializer=init_ops.glorot_uniform_initializer(),
-          name=logits_scope,
-          _scope=logits_scope)
-      self._add_layer(self._logits_layer, logits_scope.name)
-      self._logits_scope_name = logits_scope.name
-    self._input_layer_partitioner = input_layer_partitioner
-
-  def call(self, features, mode):
-    is_training = mode == model_fn.ModeKeys.TRAIN
-    # The Keras training.Model adds a name_scope with the name of the model
-    # which modifies the constructed graph. Hence we add another name_scope
-    # here which is the one before the training.Model one was applied.
-    # TODO(rohanj): Remove this in TF 2.0 (b/116728605)
-    with ops.name_scope(name=_get_previous_name_scope()):
-      # TODO(rohanj): Remove dependence on variable scope for partitioning.
-      with variable_scope.variable_scope(
-          'input_from_feature_columns',
-          partitioner=self._input_layer_partitioner):
-        net = self._input_layer(features)
-      for i in range(len(self._hidden_layers)):
-        net = self._hidden_layers[i](net)
-        if self._dropout is not None and is_training:
-          net = self._dropout_layers[i](net, training=True)
-        if self._batch_norm:
-          net = self._batch_norm_layers[i](net, training=is_training)
-        _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i])
-
-      logits = self._logits_layer(net)
-      _add_hidden_layer_summary(logits, self._logits_scope_name)
-      return logits
-
-  def _add_layer(self, layer, layer_name):
-    # "Magic" required for keras.Model classes to track all the variables in
-    # a list of layers.Layer objects.
-    # TODO(ashankar): Figure out API so user code doesn't have to do this.
-    setattr(self, layer_name, layer)
-
-
-def _dnn_model_fn(features,
-                  labels,
-                  mode,
-                  head,
-                  hidden_units,
-                  feature_columns,
-                  optimizer='Adagrad',
-                  activation_fn=nn.relu,
-                  dropout=None,
-                  input_layer_partitioner=None,
-                  config=None,
-                  use_tpu=False,
-                  batch_norm=False,
-                  shared_state_manager=None):
-  """Deep Neural Net model_fn.
-
-  Args:
-    features: dict of `Tensor`.
-    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
-      dtype `int32` or `int64` in the range `[0, n_classes)`.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `head_lib._Head` instance.
-    hidden_units: Iterable of integer number of hidden units per layer.
-    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
-    optimizer: String, `tf.Optimizer` object, or callable that creates the
-      optimizer to use for training. If not specified, will use the Adagrad
-      optimizer with a default learning rate of 0.05.
-    activation_fn: Activation function applied to each layer.
-    dropout: When not `None`, the probability we will drop out a given
-      coordinate.
-    input_layer_partitioner: Partitioner for input layer. Defaults
-      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-    config: `RunConfig` object to configure the runtime settings.
-    use_tpu: Whether to make a DNN model able to run on TPU. Will make function
-      return a `_TPUEstimatorSpec` instance and disable variable partitioning.
-    batch_norm: Whether to use batch normalization after each hidden layer.
-    shared_state_manager: A SharedEmbeddingStateManager object to hold the
-      shared state for SharedEmbeddingColumn's.
-
-  Returns:
-    An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: If features has the wrong type.
-  """
-  if not isinstance(features, dict):
-    raise ValueError('features should be a dictionary of `Tensor`s. '
-                     'Given type: {}'.format(type(features)))
-
-  optimizer = optimizers.get_optimizer_instance(
-      optimizer, learning_rate=_LEARNING_RATE)
-  num_ps_replicas = config.num_ps_replicas if config else 0
-
-  partitioner = (None if use_tpu else
-                 partitioned_variables.min_max_variable_partitioner(
-                     max_partitions=num_ps_replicas))
-  with variable_scope.variable_scope(
-      'dnn',
-      values=tuple(six.itervalues(features)),
-      partitioner=partitioner):
-    input_layer_partitioner = input_layer_partitioner or (
-        None if use_tpu else
-        partitioned_variables.min_max_variable_partitioner(
-            max_partitions=num_ps_replicas,
-            min_slice_size=64 << 20))
-
-    logit_fn = _dnn_logit_fn_builder(
-        units=head.logits_dimension,
-        hidden_units=hidden_units,
-        feature_columns=feature_columns,
-        activation_fn=activation_fn,
-        dropout=dropout,
-        input_layer_partitioner=input_layer_partitioner,
-        batch_norm=batch_norm,
-        shared_state_manager=shared_state_manager)
-    logits = logit_fn(features=features, mode=mode)
-
-    if use_tpu:
-      return head._create_tpu_estimator_spec(  # pylint: disable=protected-access
-          features=features,
-          mode=mode,
-          labels=labels,
-          optimizer=optimizer,
-          logits=logits)
-    else:
-      return head.create_estimator_spec(
-          features=features,
-          mode=mode,
-          labels=labels,
-          optimizer=optimizer,
-          logits=logits)
-
-
-@estimator_export('estimator.DNNClassifier')
-class DNNClassifier(estimator.Estimator):
-  """A classifier for TensorFlow DNN models.
-
-  Example:
-
-  ```python
-  categorical_feature_a = categorical_column_with_hash_bucket(...)
-  categorical_feature_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_emb = embedding_column(
-      categorical_column=categorical_feature_a, ...)
-  categorical_feature_b_emb = embedding_column(
-      categorical_column=categorical_feature_b, ...)
-
-  estimator = DNNClassifier(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256])
-
-  # Or estimator using the ProximalAdagradOptimizer optimizer with
-  # regularization.
-  estimator = DNNClassifier(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001
-      ))
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = DNNClassifier(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=lambda: tf.AdamOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator with warm-starting from a previous checkpoint.
-  estimator = DNNClassifier(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      warm_start_from="/path/to/checkpoint/dir")
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using softmax cross entropy.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(
-      self,
-      hidden_units,
-      feature_columns,
-      model_dir=None,
-      n_classes=2,
-      weight_column=None,
-      label_vocabulary=None,
-      optimizer='Adagrad',
-      activation_fn=nn.relu,
-      dropout=None,
-      input_layer_partitioner=None,
-      config=None,
-      warm_start_from=None,
-      loss_reduction=losses.Reduction.SUM,
-      batch_norm=False,
-  ):
-    """Initializes a `DNNClassifier` instance.
-
-    Args:
-      hidden_units: Iterable of number hidden units per layer. All layers are
-        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
-        second one has 32.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `_FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      n_classes: Number of label classes. Defaults to 2, namely binary
-        classification. Must be > 1.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      label_vocabulary: A list of strings represents possible label values. If
-        given, labels must be string type and have any value in
-        `label_vocabulary`. If it is not given, that means labels are
-        already encoded as integer or float within [0, 1] for `n_classes=2` and
-        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-        Also there will be errors if vocabulary is not provided and labels are
-        string.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to Adagrad optimizer.
-      activation_fn: Activation function applied to each layer. If `None`, will
-        use `tf.nn.relu`.
-      dropout: When not `None`, the probability we will drop out a given
-        coordinate.
-      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
-        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: `RunConfig` object to configure the runtime settings.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights are warm-started, and it is assumed that vocabularies and Tensor
-        names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      batch_norm: Whether to use batch normalization after each hidden layer.
-    """
-    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
-        n_classes, weight_column, label_vocabulary, loss_reduction)
-
-    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
-        feature_columns)
-
-    def _model_fn(features, labels, mode, config):
-      """Call the defined shared _dnn_model_fn."""
-      return _dnn_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          hidden_units=hidden_units,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          activation_fn=activation_fn,
-          dropout=dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          batch_norm=batch_norm,
-          shared_state_manager=shared_state_manager)
-
-    super(DNNClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
-
-
-@estimator_export('estimator.DNNRegressor')
-class DNNRegressor(estimator.Estimator):
-  """A regressor for TensorFlow DNN models.
-
-  Example:
-
-  ```python
-  categorical_feature_a = categorical_column_with_hash_bucket(...)
-  categorical_feature_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_emb = embedding_column(
-      categorical_column=categorical_feature_a, ...)
-  categorical_feature_b_emb = embedding_column(
-      categorical_column=categorical_feature_b, ...)
-
-  estimator = DNNRegressor(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256])
-
-  # Or estimator using the ProximalAdagradOptimizer optimizer with
-  # regularization.
-  estimator = DNNRegressor(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001
-      ))
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = DNNRegressor(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=lambda: tf.AdamOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator with warm-starting from a previous checkpoint.
-  estimator = DNNRegressor(
-      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-      hidden_units=[1024, 512, 256],
-      warm_start_from="/path/to/checkpoint/dir")
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using mean squared error.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(
-      self,
-      hidden_units,
-      feature_columns,
-      model_dir=None,
-      label_dimension=1,
-      weight_column=None,
-      optimizer='Adagrad',
-      activation_fn=nn.relu,
-      dropout=None,
-      input_layer_partitioner=None,
-      config=None,
-      warm_start_from=None,
-      loss_reduction=losses.Reduction.SUM,
-      batch_norm=False,
-  ):
-    """Initializes a `DNNRegressor` instance.
-
-    Args:
-      hidden_units: Iterable of number hidden units per layer. All layers are
-        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
-        second one has 32.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `_FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      label_dimension: Number of regression targets per example. This is the
-        size of the last dimension of the labels and logits `Tensor` objects
-        (typically, these have shape `[batch_size, label_dimension]`).
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to Adagrad optimizer.
-      activation_fn: Activation function applied to each layer. If `None`, will
-        use `tf.nn.relu`.
-      dropout: When not `None`, the probability we will drop out a given
-        coordinate.
-      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
-        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: `RunConfig` object to configure the runtime settings.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights are warm-started, and it is assumed that vocabularies and Tensor
-        names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      batch_norm: Whether to use batch normalization after each hidden layer.
-    """
-
-    shared_state_manager = None
-    if feature_column_v2.is_feature_column_v2(feature_columns):
-      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+from tensorflow_estimator.python.estimator.canned import dnn
 
-    def _model_fn(features, labels, mode, config):
-      """Call the defined shared _dnn_model_fn."""
-      return _dnn_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension,
-              weight_column=weight_column,
-              loss_reduction=loss_reduction),
-          hidden_units=hidden_units,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          activation_fn=activation_fn,
-          dropout=dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          batch_norm=batch_norm,
-          shared_state_manager=shared_state_manager)
+# Include attrs that start with single underscore.
+dnn.__all__ = [s for s in dir(dnn) if not s.startswith('__')]
 
-    super(DNNRegressor, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.dnn import *
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index f712244c8d..7d6b4a4bb1 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,621 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""TensorFlow estimators for Linear and DNN joined training models."""
+"""dnn_linear_combined python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
-import six
-
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import linear
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.summary import summary
-from tensorflow.python.training import sync_replicas_optimizer
-from tensorflow.python.training import training_util
-from tensorflow.python.util.tf_export import estimator_export
-
-# The default learning rates are a historical artifact of the initial
-# implementation.
-_DNN_LEARNING_RATE = 0.001
-_LINEAR_LEARNING_RATE = 0.005
-
-
-def _check_no_sync_replicas_optimizer(optimizer):
-  if isinstance(optimizer, sync_replicas_optimizer.SyncReplicasOptimizer):
-    raise ValueError(
-        'SyncReplicasOptimizer does not support multi optimizers case. '
-        'Therefore, it is not supported in DNNLinearCombined model. '
-        'If you want to use this optimizer, please use either DNN or Linear '
-        'model.')
-
-
-def _linear_learning_rate(num_linear_feature_columns):
-  """Returns the default learning rate of the linear model.
-
-  The calculation is a historical artifact of this initial implementation, but
-  has proven a reasonable choice.
-
-  Args:
-    num_linear_feature_columns: The number of feature columns of the linear
-      model.
-
-  Returns:
-    A float.
-  """
-  default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
-  return min(_LINEAR_LEARNING_RATE, default_learning_rate)
-
-
-def _add_layer_summary(value, tag):
-  summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value))
-  summary.histogram('%s/activation' % tag, value)
-
-
-def _dnn_linear_combined_model_fn(features,
-                                  labels,
-                                  mode,
-                                  head,
-                                  linear_feature_columns=None,
-                                  linear_optimizer='Ftrl',
-                                  dnn_feature_columns=None,
-                                  dnn_optimizer='Adagrad',
-                                  dnn_hidden_units=None,
-                                  dnn_activation_fn=nn.relu,
-                                  dnn_dropout=None,
-                                  input_layer_partitioner=None,
-                                  config=None,
-                                  batch_norm=False,
-                                  linear_sparse_combiner='sum'):
-  """Deep Neural Net and Linear combined model_fn.
-
-  Args:
-    features: dict of `Tensor`.
-    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
-      `int32` or `int64` in the range `[0, n_classes)`.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `Head` instance.
-    linear_feature_columns: An iterable containing all the feature columns used
-      by the Linear model.
-    linear_optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training the Linear model. Defaults to the Ftrl
-      optimizer.
-    dnn_feature_columns: An iterable containing all the feature columns used by
-      the DNN model.
-    dnn_optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training the DNN model. Defaults to the Adagrad
-      optimizer.
-    dnn_hidden_units: List of hidden units per DNN layer.
-    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
-      will use `tf.nn.relu`.
-    dnn_dropout: When not `None`, the probability we will drop out a given DNN
-      coordinate.
-    input_layer_partitioner: Partitioner for input layer.
-    config: `RunConfig` object to configure the runtime settings.
-    batch_norm: Whether to use batch normalization after each hidden layer.
-    linear_sparse_combiner: A string specifying how to reduce the linear model
-      if a categorical column is multivalent.  One of "mean", "sqrtn", and
-      "sum".
-  Returns:
-    An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
-      are empty at the same time, or `input_layer_partitioner` is missing,
-      or features has the wrong type.
-  """
-  if not isinstance(features, dict):
-    raise ValueError('features should be a dictionary of `Tensor`s. '
-                     'Given type: {}'.format(type(features)))
-  if not linear_feature_columns and not dnn_feature_columns:
-    raise ValueError(
-        'Either linear_feature_columns or dnn_feature_columns must be defined.')
-
-  num_ps_replicas = config.num_ps_replicas if config else 0
-  input_layer_partitioner = input_layer_partitioner or (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas,
-          min_slice_size=64 << 20))
-
-  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
-      list(linear_feature_columns) + list(dnn_feature_columns))
-
-  # Build DNN Logits.
-  dnn_parent_scope = 'dnn'
-
-  if not dnn_feature_columns:
-    dnn_logits = None
-  else:
-    dnn_optimizer = optimizers.get_optimizer_instance(
-        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
-    _check_no_sync_replicas_optimizer(dnn_optimizer)
-    if not dnn_hidden_units:
-      raise ValueError(
-          'dnn_hidden_units must be defined when dnn_feature_columns is '
-          'specified.')
-    dnn_partitioner = (
-        partitioned_variables.min_max_variable_partitioner(
-            max_partitions=num_ps_replicas))
-    with variable_scope.variable_scope(
-        dnn_parent_scope,
-        values=tuple(six.itervalues(features)),
-        partitioner=dnn_partitioner) as scope:
-      dnn_absolute_scope = scope.name
-      dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
-          units=head.logits_dimension,
-          hidden_units=dnn_hidden_units,
-          feature_columns=dnn_feature_columns,
-          activation_fn=dnn_activation_fn,
-          dropout=dnn_dropout,
-          batch_norm=batch_norm,
-          input_layer_partitioner=input_layer_partitioner,
-          shared_state_manager=shared_state_manager)
-      dnn_logits = dnn_logit_fn(features=features, mode=mode)
-
-  linear_parent_scope = 'linear'
-
-  if not linear_feature_columns:
-    linear_logits = None
-  else:
-    linear_optimizer = optimizers.get_optimizer_instance(
-        linear_optimizer,
-        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
-    _check_no_sync_replicas_optimizer(linear_optimizer)
-    with variable_scope.variable_scope(
-        linear_parent_scope,
-        values=tuple(six.itervalues(features)),
-        partitioner=input_layer_partitioner) as scope:
-      linear_absolute_scope = scope.name
-      logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
-          units=head.logits_dimension,
-          feature_columns=linear_feature_columns,
-          sparse_combiner=linear_sparse_combiner)
-      linear_logits = logit_fn(features=features)
-      _add_layer_summary(linear_logits, scope.name)
-
-  # Combine logits and build full model.
-  if dnn_logits is not None and linear_logits is not None:
-    logits = dnn_logits + linear_logits
-  elif dnn_logits is not None:
-    logits = dnn_logits
-  else:
-    logits = linear_logits
-
-  def _train_op_fn(loss):
-    """Returns the op to optimize the loss."""
-    train_ops = []
-    global_step = training_util.get_global_step()
-    if dnn_logits is not None:
-      train_ops.append(
-          dnn_optimizer.minimize(
-              loss,
-              var_list=ops.get_collection(
-                  ops.GraphKeys.TRAINABLE_VARIABLES,
-                  scope=dnn_absolute_scope)))
-    if linear_logits is not None:
-      train_ops.append(
-          linear_optimizer.minimize(
-              loss,
-              var_list=ops.get_collection(
-                  ops.GraphKeys.TRAINABLE_VARIABLES,
-                  scope=linear_absolute_scope)))
-
-    train_op = control_flow_ops.group(*train_ops)
-    with ops.control_dependencies([train_op]):
-      return state_ops.assign_add(global_step, 1).op
-
-  return head.create_estimator_spec(
-      features=features,
-      mode=mode,
-      labels=labels,
-      train_op_fn=_train_op_fn,
-      logits=logits)
-
-
-@estimator_export('estimator.DNNLinearCombinedClassifier')
-class DNNLinearCombinedClassifier(estimator.Estimator):
-  """An estimator for TensorFlow Linear and DNN joined classification models.
-
-  Note: This estimator is also known as wide-n-deep.
-
-  Example:
-
-  ```python
-  numeric_feature = numeric_column(...)
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-  categorical_feature_a_emb = embedding_column(
-      categorical_column=categorical_feature_a, ...)
-  categorical_feature_b_emb = embedding_column(
-      categorical_id_column=categorical_feature_b, ...)
-
-  estimator = DNNLinearCombinedClassifier(
-      # wide settings
-      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
-      linear_optimizer=tf.train.FtrlOptimizer(...),
-      # deep settings
-      dnn_feature_columns=[
-          categorical_feature_a_emb, categorical_feature_b_emb,
-          numeric_feature],
-      dnn_hidden_units=[1000, 500, 100],
-      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
-      # warm-start settings
-      warm_start_from="/path/to/checkpoint/dir")
-
-  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
-  tf.train.ProximalAdagradOptimizer(
-      learning_rate=0.1,
-      l1_regularization_strength=0.001,
-      l2_regularization_strength=0.001)
-  # To apply learning rate decay, you can set dnn_optimizer to a callable:
-  lambda: tf.AdamOptimizer(
-      learning_rate=tf.exponential_decay(
-          learning_rate=0.1,
-          global_step=tf.get_global_step(),
-          decay_steps=10000,
-          decay_rate=0.96)
-  # It is the same for linear_optimizer.
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using softmax cross entropy.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               model_dir=None,
-               linear_feature_columns=None,
-               linear_optimizer='Ftrl',
-               dnn_feature_columns=None,
-               dnn_optimizer='Adagrad',
-               dnn_hidden_units=None,
-               dnn_activation_fn=nn.relu,
-               dnn_dropout=None,
-               n_classes=2,
-               weight_column=None,
-               label_vocabulary=None,
-               input_layer_partitioner=None,
-               config=None,
-               warm_start_from=None,
-               loss_reduction=losses.Reduction.SUM,
-               batch_norm=False,
-               linear_sparse_combiner='sum'):
-    """Initializes a DNNLinearCombinedClassifier instance.
-
-    Args:
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      linear_feature_columns: An iterable containing all the feature columns
-        used by linear part of the model. All items in the set must be
-        instances of classes derived from `FeatureColumn`.
-      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the linear part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
-        optimizer.
-      dnn_feature_columns: An iterable containing all the feature columns used
-        by deep part of the model. All items in the set must be instances of
-        classes derived from `FeatureColumn`.
-      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the deep part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
-        optimizer.
-      dnn_hidden_units: List of hidden units per layer. All layers are fully
-        connected.
-      dnn_activation_fn: Activation function applied to each layer. If None,
-        will use `tf.nn.relu`.
-      dnn_dropout: When not None, the probability we will drop out
-        a given coordinate.
-      n_classes: Number of label classes. Defaults to 2, namely binary
-        classification. Must be > 1.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      label_vocabulary: A list of strings represents possible label values. If
-        given, labels must be string type and have any value in
-        `label_vocabulary`. If it is not given, that means labels are
-        already encoded as integer or float within [0, 1] for `n_classes=2` and
-        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-        Also there will be errors if vocabulary is not provided and labels are
-        string.
-      input_layer_partitioner: Partitioner for input layer. Defaults to
-        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: RunConfig object to configure the runtime settings.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights are warm-started, and it is assumed that vocabularies and Tensor
-        names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      batch_norm: Whether to use batch normalization after each hidden layer.
-      linear_sparse_combiner: A string specifying how to reduce the linear model
-        if a categorical column is multivalent.  One of "mean", "sqrtn", and
-        "sum" -- these are effectively different ways to do example-level
-        normalization, which can be useful for bag-of-words features.  For more
-        details, see `tf.feature_column.linear_model`.
-
-    Raises:
-      ValueError: If both linear_feature_columns and dnn_features_columns are
-        empty at the same time.
-    """
-    linear_feature_columns = linear_feature_columns or []
-    dnn_feature_columns = dnn_feature_columns or []
-    self._feature_columns = (
-        list(linear_feature_columns) + list(dnn_feature_columns))
-    if not self._feature_columns:
-      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
-                       'must be defined.')
-    if n_classes == 2:
-      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-    else:
-      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
-          n_classes,
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-
-    def _model_fn(features, labels, mode, config):
-      """Call the _dnn_linear_combined_model_fn."""
-      return _dnn_linear_combined_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          linear_feature_columns=linear_feature_columns,
-          linear_optimizer=linear_optimizer,
-          dnn_feature_columns=dnn_feature_columns,
-          dnn_optimizer=dnn_optimizer,
-          dnn_hidden_units=dnn_hidden_units,
-          dnn_activation_fn=dnn_activation_fn,
-          dnn_dropout=dnn_dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          batch_norm=batch_norm,
-          linear_sparse_combiner=linear_sparse_combiner)
-
-    super(DNNLinearCombinedClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
-
-
-@estimator_export('estimator.DNNLinearCombinedRegressor')
-class DNNLinearCombinedRegressor(estimator.Estimator):
-  """An estimator for TensorFlow Linear and DNN joined models for regression.
-
-  Note: This estimator is also known as wide-n-deep.
-
-  Example:
-
-  ```python
-  numeric_feature = numeric_column(...)
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-  categorical_feature_a_emb = embedding_column(
-      categorical_column=categorical_feature_a, ...)
-  categorical_feature_b_emb = embedding_column(
-      categorical_column=categorical_feature_b, ...)
-
-  estimator = DNNLinearCombinedRegressor(
-      # wide settings
-      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
-      linear_optimizer=tf.train.FtrlOptimizer(...),
-      # deep settings
-      dnn_feature_columns=[
-          categorical_feature_a_emb, categorical_feature_b_emb,
-          numeric_feature],
-      dnn_hidden_units=[1000, 500, 100],
-      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
-      # warm-start settings
-      warm_start_from="/path/to/checkpoint/dir")
-
-  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
-  tf.train.ProximalAdagradOptimizer(
-      learning_rate=0.1,
-      l1_regularization_strength=0.001,
-      l2_regularization_strength=0.001)
-  # To apply learning rate decay, you can set dnn_optimizer to a callable:
-  lambda: tf.AdamOptimizer(
-      learning_rate=tf.exponential_decay(
-          learning_rate=0.1,
-          global_step=tf.get_global_step(),
-          decay_steps=10000,
-          decay_rate=0.96)
-  # It is the same for linear_optimizer.
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    pass
-  estimator.train(input_fn=input_fn_train, steps=100)
-
-  def input_fn_eval: # returns x, y
-    pass
-  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
-  def input_fn_predict: # returns x, None
-    pass
-  predictions = estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-  otherwise there will be a `KeyError`:
-
-  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
-    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
-      with `key` the id column name, the second with `key` the weight column
-      name. Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using mean squared error.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               model_dir=None,
-               linear_feature_columns=None,
-               linear_optimizer='Ftrl',
-               dnn_feature_columns=None,
-               dnn_optimizer='Adagrad',
-               dnn_hidden_units=None,
-               dnn_activation_fn=nn.relu,
-               dnn_dropout=None,
-               label_dimension=1,
-               weight_column=None,
-               input_layer_partitioner=None,
-               config=None,
-               warm_start_from=None,
-               loss_reduction=losses.Reduction.SUM,
-               batch_norm=False,
-               linear_sparse_combiner='sum'):
-    """Initializes a DNNLinearCombinedRegressor instance.
-
-    Args:
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      linear_feature_columns: An iterable containing all the feature columns
-        used by linear part of the model. All items in the set must be
-        instances of classes derived from `FeatureColumn`.
-      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the linear part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
-        optimizer.
-      dnn_feature_columns: An iterable containing all the feature columns used
-        by deep part of the model. All items in the set must be instances of
-        classes derived from `FeatureColumn`.
-      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
-        the deep part of the model. Can also be a string (one of 'Adagrad',
-        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
-        optimizer.
-      dnn_hidden_units: List of hidden units per layer. All layers are fully
-        connected.
-      dnn_activation_fn: Activation function applied to each layer. If None,
-        will use `tf.nn.relu`.
-      dnn_dropout: When not None, the probability we will drop out
-        a given coordinate.
-      label_dimension: Number of regression targets per example. This is the
-        size of the last dimension of the labels and logits `Tensor` objects
-        (typically, these have shape `[batch_size, label_dimension]`).
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      input_layer_partitioner: Partitioner for input layer. Defaults to
-        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
-      config: RunConfig object to configure the runtime settings.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights are warm-started, and it is assumed that vocabularies and Tensor
-        names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      batch_norm: Whether to use batch normalization after each hidden layer.
-      linear_sparse_combiner: A string specifying how to reduce the linear model
-        if a categorical column is multivalent.  One of "mean", "sqrtn", and
-        "sum" -- these are effectively different ways to do example-level
-        normalization, which can be useful for bag-of-words features.  For more
-        details, see `tf.feature_column.linear_model`.
-
-    Raises:
-      ValueError: If both linear_feature_columns and dnn_features_columns are
-        empty at the same time.
-    """
-    linear_feature_columns = linear_feature_columns or []
-    dnn_feature_columns = dnn_feature_columns or []
-    self._feature_columns = (
-        list(linear_feature_columns) + list(dnn_feature_columns))
-    if not self._feature_columns:
-      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
-                       'must be defined.')
+from tensorflow_estimator.python.estimator.canned import dnn_linear_combined
 
-    def _model_fn(features, labels, mode, config):
-      """Call the _dnn_linear_combined_model_fn."""
-      return _dnn_linear_combined_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension, weight_column=weight_column,
-              loss_reduction=loss_reduction),
-          linear_feature_columns=linear_feature_columns,
-          linear_optimizer=linear_optimizer,
-          dnn_feature_columns=dnn_feature_columns,
-          dnn_optimizer=dnn_optimizer,
-          dnn_hidden_units=dnn_hidden_units,
-          dnn_activation_fn=dnn_activation_fn,
-          dnn_dropout=dnn_dropout,
-          input_layer_partitioner=input_layer_partitioner,
-          config=config,
-          batch_norm=batch_norm,
-          linear_sparse_combiner=linear_sparse_combiner)
+# Include attrs that start with single underscore.
+dnn_linear_combined.__all__ = [
+    s for s in dir(dnn_linear_combined) if not s.startswith('__')
+]
 
-    super(DNNLinearCombinedRegressor, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.dnn_linear_combined import *
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
deleted file mode 100644
index ab945d7b1a..0000000000
--- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
+++ /dev/null
@@ -1,1123 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dnn_linear_combined.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-from absl.testing import parameterized
-import numpy as np
-import six
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import dnn_linear_combined
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import linear_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import optimizer as optimizer_lib
-
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-class DNNOnlyModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNModelFnTest.__init__(self, self._dnn_only_model_fn)
-
-  def _dnn_only_model_fn(self,
-                         features,
-                         labels,
-                         mode,
-                         head,
-                         hidden_units,
-                         feature_columns,
-                         optimizer='Adagrad',
-                         activation_fn=nn.relu,
-                         dropout=None,
-                         input_layer_partitioner=None,
-                         config=None):
-    return dnn_linear_combined._dnn_linear_combined_model_fn(
-        features=features,
-        labels=labels,
-        mode=mode,
-        head=head,
-        linear_feature_columns=[],
-        dnn_hidden_units=hidden_units,
-        dnn_feature_columns=feature_columns,
-        dnn_optimizer=optimizer,
-        dnn_activation_fn=activation_fn,
-        dnn_dropout=dropout,
-        input_layer_partitioner=input_layer_partitioner,
-        config=config)
-
-
-# A function to mimic linear-regressor init reuse same tests.
-def _linear_regressor_fn(feature_columns,
-                         model_dir=None,
-                         label_dimension=1,
-                         weight_column=None,
-                         optimizer='Ftrl',
-                         config=None,
-                         partitioner=None,
-                         sparse_combiner='sum'):
-  return dnn_linear_combined.DNNLinearCombinedRegressor(
-      model_dir=model_dir,
-      linear_feature_columns=feature_columns,
-      linear_optimizer=optimizer,
-      label_dimension=label_dimension,
-      weight_column=weight_column,
-      input_layer_partitioner=partitioner,
-      config=config,
-      linear_sparse_combiner=sparse_combiner)
-
-
-class LinearOnlyRegressorPartitionerTest(
-    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearOnlyRegressorPartitionerV2Test(
-    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearOnlyRegressorEvaluationTest(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearOnlyRegressorEvaluationV2Test(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearOnlyRegressorPredictTest(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearOnlyRegressorPredictV2Test(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearOnlyRegressorIntegrationTest(
-    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearOnlyRegressorIntegrationV2Test(
-    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearOnlyRegressorTrainingTest(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearOnlyRegressorTrainingV2Test(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-def _linear_classifier_fn(feature_columns,
-                          model_dir=None,
-                          n_classes=2,
-                          weight_column=None,
-                          label_vocabulary=None,
-                          optimizer='Ftrl',
-                          config=None,
-                          partitioner=None,
-                          sparse_combiner='sum'):
-  return dnn_linear_combined.DNNLinearCombinedClassifier(
-      model_dir=model_dir,
-      linear_feature_columns=feature_columns,
-      linear_optimizer=optimizer,
-      n_classes=n_classes,
-      weight_column=weight_column,
-      label_vocabulary=label_vocabulary,
-      input_layer_partitioner=partitioner,
-      config=config,
-      linear_sparse_combiner=sparse_combiner)
-
-
-class LinearOnlyClassifierTrainingTest(
-    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearOnlyClassifierTrainingV2Test(
-    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearOnlyClassifierClassesEvaluationTest(
-    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearOnlyClassifierClassesEvaluationV2Test(
-    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearOnlyClassifierPredictTest(
-    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearOnlyClassifierPredictV2Test(
-    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearOnlyClassifierIntegrationTest(
-    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearOnlyClassifierIntegrationV2Test(
-    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow_helper(
-      self, linear_feature_columns, dnn_feature_columns, feature_spec,
-      train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
-      label_dimension, batch_size):
-    est = dnn_linear_combined.DNNLinearCombinedRegressor(
-        linear_feature_columns=linear_feature_columns,
-        dnn_hidden_units=(2, 2),
-        dnn_feature_columns=dnn_feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          fc_impl):
-    linear_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
-    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
-                                    feature_spec, train_input_fn, eval_input_fn,
-                                    predict_input_fn, input_dimension,
-                                    label_dimension, batch_size)
-
-  def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn,
-                               predict_input_fn, input_dimension,
-                               label_dimension, batch_size, fc_impl):
-    del fc_impl
-    linear_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        feature_column_v2.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
-                                    feature_spec, train_input_fn, eval_input_fn,
-                                    predict_input_fn, input_dimension,
-                                    label_dimension, batch_size)
-
-  def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
-                               predict_input_fn, input_dimension,
-                               label_dimension, batch_size, fc_impl):
-    del fc_impl
-    linear_feature_columns = [
-        feature_column_v2.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
-                                    feature_spec, train_input_fn, eval_input_fn,
-                                    predict_input_fn, input_dimension,
-                                    label_dimension, batch_size)
-
-  def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    fn_to_run(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_numpy_input_fn_basic(self, fc_impl):
-    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow)
-
-  def test_numpy_input_fn_mix1(self, fc_impl):
-    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
-
-  def test_numpy_input_fn_mix2(self, fc_impl):
-    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
-
-  def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    label_dimension = 1
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(data)
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        batch_size=batch_size,
-        shuffle=False)
-
-    fn_to_run(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_pandas_input_fn_basic(self, fc_impl):
-    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow)
-
-  def test_pandas_input_fn_mix1(self, fc_impl):
-    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
-
-  def test_pandas_input_fn_mix2(self, fc_impl):
-    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
-
-  def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x': feature_pb2.Feature(
-                  float_list=feature_pb2.FloatList(value=datum)),
-              'y': feature_pb2.Feature(
-                  float_list=feature_pb2.FloatList(value=datum)),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-    }
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    fn_to_run(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_input_fn_from_parse_example_basic(self, fc_impl):
-    self._test_input_fn_from_parse_example_helper(fc_impl,
-                                                  self._test_complete_flow)
-
-  def test_input_fn_from_parse_example_mix1(self, fc_impl):
-    self._test_input_fn_from_parse_example_helper(fc_impl,
-                                                  self._test_complete_flow_mix1)
-
-  def test_input_fn_from_parse_example_mix2(self, fc_impl):
-    self._test_input_fn_from_parse_example_helper(fc_impl,
-                                                  self._test_complete_flow_mix2)
-
-
-# A function to mimic dnn-classifier init reuse same tests.
-def _dnn_classifier_fn(hidden_units,
-                       feature_columns,
-                       model_dir=None,
-                       n_classes=2,
-                       weight_column=None,
-                       label_vocabulary=None,
-                       optimizer='Adagrad',
-                       config=None,
-                       input_layer_partitioner=None):
-  return dnn_linear_combined.DNNLinearCombinedClassifier(
-      model_dir=model_dir,
-      dnn_hidden_units=hidden_units,
-      dnn_feature_columns=feature_columns,
-      dnn_optimizer=optimizer,
-      n_classes=n_classes,
-      weight_column=weight_column,
-      label_vocabulary=label_vocabulary,
-      input_layer_partitioner=input_layer_partitioner,
-      config=config)
-
-
-class DNNOnlyClassifierEvaluateTest(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNOnlyClassifierEvaluateV2Test(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-class DNNOnlyClassifierPredictTest(
-    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNOnlyClassifierPredictV2Test(
-    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-class DNNOnlyClassifierTrainTest(
-    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNOnlyClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
-                                   test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-# A function to mimic dnn-regressor init reuse same tests.
-def _dnn_regressor_fn(hidden_units,
-                      feature_columns,
-                      model_dir=None,
-                      label_dimension=1,
-                      weight_column=None,
-                      optimizer='Adagrad',
-                      config=None,
-                      input_layer_partitioner=None):
-  return dnn_linear_combined.DNNLinearCombinedRegressor(
-      model_dir=model_dir,
-      dnn_hidden_units=hidden_units,
-      dnn_feature_columns=feature_columns,
-      dnn_optimizer=optimizer,
-      label_dimension=label_dimension,
-      weight_column=weight_column,
-      input_layer_partitioner=input_layer_partitioner,
-      config=config)
-
-
-class DNNOnlyRegressorEvaluateTest(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNOnlyRegressorEvaluateV2Test(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-class DNNOnlyRegressorPredictTest(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNOnlyRegressorPredictV2Test(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-class DNNOnlyRegressorTrainTest(
-    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNOnlyRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
-                                  test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _as_label(self, data_in_float):
-    return np.rint(data_in_float).astype(np.int64)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, n_classes, batch_size, fc_impl):
-    linear_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        fc_impl.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    est = dnn_linear_combined.DNNLinearCombinedClassifier(
-        linear_feature_columns=linear_feature_columns,
-        dnn_hidden_units=(2, 2),
-        dnn_feature_columns=dnn_feature_columns,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-    # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self, fc_impl):
-    """Tests complete flow with numpy_input_fn."""
-    n_classes = 3
-    input_dimension = 2
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    y_data = self._as_label(np.reshape(data[:batch_size], (batch_size, 1)))
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_pandas_input_fn(self, fc_impl):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    input_dimension = 1
-    n_classes = 2
-    batch_size = 10
-    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(self._as_label(data))
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_input_fn_from_parse_example(self, fc_impl):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    input_dimension = 2
-    n_classes = 3
-    batch_size = 10
-    data = np.linspace(0., n_classes-1., batch_size * input_dimension,
-                       dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum)),
-              'y':
-                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
-                      value=self._as_label(datum[:1]))),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
-    }
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = linear_testing_utils.queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNLinearCombinedTests(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, real_optimizer, var_name_prefix):
-    """Verifies global_step is None and var_names start with given prefix."""
-
-    def _minimize(loss, global_step=None, var_list=None):
-      self.assertIsNone(global_step)
-      trainable_vars = var_list or ops.get_collection(
-          ops.GraphKeys.TRAINABLE_VARIABLES)
-      var_names = [var.name for var in trainable_vars]
-      self.assertTrue(
-          all([name.startswith(var_name_prefix) for name in var_names]))
-      # var is used to check this op called by training.
-      with ops.name_scope(''):
-        var = variables_lib.Variable(0., name=(var_name_prefix + '_called'))
-      with ops.control_dependencies([var.assign(100.)]):
-        return real_optimizer.minimize(loss, global_step, var_list)
-
-    optimizer_mock = test.mock.NonCallableMagicMock(
-        spec=optimizer_lib.Optimizer, wraps=real_optimizer)
-    optimizer_mock.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    return optimizer_mock
-
-  def test_train_op_calls_both_dnn_and_linear(self, fc_impl):
-    opt = gradient_descent.GradientDescentOptimizer(1.)
-    x_column = fc_impl.numeric_column('x')
-    input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[0.], [1.]])},
-        y=np.array([[0.], [1.]]),
-        batch_size=1,
-        shuffle=False)
-    est = dnn_linear_combined.DNNLinearCombinedClassifier(
-        linear_feature_columns=[x_column],
-        # verifies linear_optimizer is used only for linear part.
-        linear_optimizer=self._mock_optimizer(opt, 'linear'),
-        dnn_hidden_units=(2, 2),
-        dnn_feature_columns=[x_column],
-        # verifies dnn_optimizer is used only for linear part.
-        dnn_optimizer=self._mock_optimizer(opt, 'dnn'),
-        model_dir=self._model_dir)
-    est.train(input_fn, steps=1)
-    # verifies train_op fires linear minimize op
-    self.assertEqual(100.,
-                     checkpoint_utils.load_variable(
-                         self._model_dir, 'linear_called'))
-    # verifies train_op fires dnn minimize op
-    self.assertEqual(100.,
-                     checkpoint_utils.load_variable(
-                         self._model_dir, 'dnn_called'))
-
-  def test_dnn_and_linear_logits_are_added(self, fc_impl):
-    with ops.Graph().as_default():
-      variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights')
-      variables_lib.Variable([2.0], name='linear/linear_model/bias_weights')
-      variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel')
-      variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias')
-      variables_lib.Variable([[5.0]], name='dnn/logits/kernel')
-      variables_lib.Variable([6.0], name='dnn/logits/bias')
-      variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
-      linear_testing_utils.save_variables_to_ckpt(self._model_dir)
-
-    x_column = fc_impl.numeric_column('x')
-    est = dnn_linear_combined.DNNLinearCombinedRegressor(
-        linear_feature_columns=[x_column],
-        dnn_hidden_units=[1],
-        dnn_feature_columns=[x_column],
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
-    # linear logits = 10*1 + 2 = 12
-    # dnn logits = (10*3 + 4)*5 + 6 = 176
-    # logits = dnn + linear = 176 + 12 = 188
-    self.assertAllClose(
-        {
-            prediction_keys.PredictionKeys.PREDICTIONS: [188.],
-        },
-        next(est.predict(input_fn=input_fn)))
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNLinearCombinedWarmStartingTest(test.TestCase):
-
-  def setUp(self):
-    # Create a directory to save our old checkpoint and vocabularies to.
-    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
-
-    # Make a dummy input_fn.
-    def _input_fn():
-      features = {
-          'age': [[23.], [31.]],
-          'city': [['Palo Alto'], ['Mountain View']],
-      }
-      return features, [0, 1]
-
-    self._input_fn = _input_fn
-
-  def tearDown(self):
-    # Clean up checkpoint / vocab dir.
-    writer_cache.FileWriterCache.clear()
-    shutil.rmtree(self._ckpt_and_vocab_dir)
-
-  def test_classifier_basic_warm_starting(self, fc_impl):
-    """Tests correctness of DNNLinearCombinedClassifier default warm-start."""
-    age = fc_impl.numeric_column('age')
-    city = fc_impl.embedding_column(
-        fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
-    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        linear_feature_columns=[age],
-        dnn_feature_columns=[city],
-        dnn_hidden_units=[256, 128],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        linear_optimizer='SGD',
-        dnn_optimizer='SGD')
-    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
-    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
-    # have accumulator values that change).
-    warm_started_dnn_lc_classifier = (
-        dnn_linear_combined.DNNLinearCombinedClassifier(
-            linear_feature_columns=[age],
-            dnn_feature_columns=[city],
-            dnn_hidden_units=[256, 128],
-            n_classes=4,
-            linear_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            warm_start_from=dnn_lc_classifier.model_dir))
-
-    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
-      self.assertAllClose(
-          dnn_lc_classifier.get_variable_value(variable_name),
-          warm_started_dnn_lc_classifier.get_variable_value(variable_name))
-
-  def test_regressor_basic_warm_starting(self, fc_impl):
-    """Tests correctness of DNNLinearCombinedRegressor default warm-start."""
-    age = fc_impl.numeric_column('age')
-    city = fc_impl.embedding_column(
-        fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNLinearCombinedRegressor and train to save a checkpoint.
-    dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
-        linear_feature_columns=[age],
-        dnn_feature_columns=[city],
-        dnn_hidden_units=[256, 128],
-        model_dir=self._ckpt_and_vocab_dir,
-        linear_optimizer='SGD',
-        dnn_optimizer='SGD')
-    dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNLinearCombinedRegressor, warm-started from the first.
-    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
-    # have accumulator values that change).
-    warm_started_dnn_lc_regressor = (
-        dnn_linear_combined.DNNLinearCombinedRegressor(
-            linear_feature_columns=[age],
-            dnn_feature_columns=[city],
-            dnn_hidden_units=[256, 128],
-            linear_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            warm_start_from=dnn_lc_regressor.model_dir))
-
-    warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_lc_regressor.get_variable_names():
-      self.assertAllClose(
-          dnn_lc_regressor.get_variable_value(variable_name),
-          warm_started_dnn_lc_regressor.get_variable_value(variable_name))
-
-  def test_warm_starting_selective_variables(self, fc_impl):
-    """Tests selecting variables to warm-start."""
-    age = fc_impl.numeric_column('age')
-    city = fc_impl.embedding_column(
-        fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
-    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        linear_feature_columns=[age],
-        dnn_feature_columns=[city],
-        dnn_hidden_units=[256, 128],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        linear_optimizer='SGD',
-        dnn_optimizer='SGD')
-    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
-    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
-    # have accumulator values that change).
-    warm_started_dnn_lc_classifier = (
-        dnn_linear_combined.DNNLinearCombinedClassifier(
-            linear_feature_columns=[age],
-            dnn_feature_columns=[city],
-            dnn_hidden_units=[256, 128],
-            n_classes=4,
-            linear_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
-                learning_rate=0.0),
-            # The provided regular expression will only warm-start the deep
-            # portion of the model.
-            warm_start_from=estimator.WarmStartSettings(
-                ckpt_to_initialize_from=dnn_lc_classifier.model_dir,
-                vars_to_warm_start='.*(dnn).*')))
-
-    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
-      if 'dnn' in variable_name:
-        self.assertAllClose(
-            dnn_lc_classifier.get_variable_value(variable_name),
-            warm_started_dnn_lc_classifier.get_variable_value(variable_name))
-      elif 'linear' in variable_name:
-        linear_values = warm_started_dnn_lc_classifier.get_variable_value(
-            variable_name)
-        # Since they're not warm-started, the linear weights will be
-        # zero-initialized.
-        self.assertAllClose(np.zeros_like(linear_values), linear_values)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
deleted file mode 100644
index 756696cea0..0000000000
--- a/tensorflow/python/estimator/canned/dnn_test.py
+++ /dev/null
@@ -1,580 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dnn.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import shutil
-import tempfile
-
-from absl.testing import parameterized
-import numpy as np
-import six
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import queue_runner
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-def _dnn_classifier_fn(*args, **kwargs):
-  return dnn.DNNClassifier(*args, **kwargs)
-
-
-class DNNModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNModelFnTest.__init__(
-        self, dnn._dnn_model_fn, fc_impl=feature_column)
-
-
-class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNModelFnTest.__init__(
-        self, dnn._dnn_model_fn, fc_impl=feature_column_v2)
-
-
-class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
-        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column)
-
-
-class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
-        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column_v2)
-
-
-class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
-                          test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
-        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest,
-                            test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
-        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-class DNNClassifierEvaluateTest(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNClassifierEvaluateV2Test(
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-class DNNClassifierPredictTest(
-    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest,
-                                 test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-class DNNClassifierTrainTest(
-    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column)
-
-
-class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
-                               test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
-        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
-
-
-def _dnn_regressor_fn(*args, **kwargs):
-  return dnn.DNNRegressor(*args, **kwargs)
-
-
-class DNNRegressorEvaluateTest(
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest,
-                                 test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-class DNNRegressorPredictTest(
-    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest,
-                                test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-class DNNRegressorTrainTest(
-    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column)
-
-
-class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
-                              test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
-        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
-
-
-def _queue_parsed_features(feature_map):
-  tensors_to_enqueue = []
-  keys = []
-  for key, tensor in six.iteritems(feature_map):
-    keys.append(key)
-    tensors_to_enqueue.append(tensor)
-  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
-  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
-  queue_runner.add_queue_runner(
-      queue_runner.QueueRunner(
-          input_queue,
-          [input_queue.enqueue(tensors_to_enqueue)]))
-  dequeued_tensors = input_queue.dequeue()
-  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, batch_size,
-                          fc_impl):
-    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
-
-    est = dnn.DNNRegressor(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self, fc_impl):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_pandas_input_fn(self, fc_impl):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    label_dimension = 1
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(data)
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_input_fn_from_parse_example(self, fc_impl):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    label_dimension = 2
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x': feature_pb2.Feature(
-                  float_list=feature_pb2.FloatList(value=datum)),
-              'y': feature_pb2.Feature(
-                  float_list=feature_pb2.FloatList(value=datum)),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-    }
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=label_dimension,
-        label_dimension=label_dimension,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-
-@parameterized.parameters((feature_column,), (feature_column_v2,))
-class DNNClassifierIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _as_label(self, data_in_float):
-    return np.rint(data_in_float).astype(np.int64)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, n_classes, batch_size, fc_impl):
-    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
-
-    est = dnn.DNNClassifier(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    num_steps = 10
-    est.train(train_input_fn, steps=num_steps)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn('loss', six.iterkeys(scores))
-
-    # PREDICT
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-    # EXPORT
-    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self, fc_impl):
-    """Tests complete flow with numpy_input_fn."""
-    n_classes = 3
-    input_dimension = 2
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        y=y_data,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': x_data},
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_pandas_input_fn(self, fc_impl):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-    input_dimension = 1
-    n_classes = 3
-    batch_size = 10
-    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(self._as_label(data))
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        y=y,
-        batch_size=batch_size,
-        shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x,
-        batch_size=batch_size,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-  def test_input_fn_from_parse_example(self, fc_impl):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    input_dimension = 2
-    n_classes = 3
-    batch_size = 10
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum)),
-              'y':
-                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
-                      value=self._as_label(datum[:1]))),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
-    }
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = _queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        n_classes=n_classes,
-        batch_size=batch_size,
-        fc_impl=fc_impl)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index 71d7e54783..ab3f9b1020 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,2063 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utils to be used in testing DNN estimators."""
+"""dnn_testing_utils python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.core.framework import summary_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-from tensorflow.python.summary import summary as summary_lib
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import saver
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-
-# pylint rules which are disabled by default for test files.
-# pylint: disable=invalid-name,protected-access,missing-docstring
-
-# Names of variables created by model.
-LEARNING_RATE_NAME = 'dnn/regression_head/dnn/learning_rate'
-HIDDEN_WEIGHTS_NAME_PATTERN = 'dnn/hiddenlayer_%d/kernel'
-HIDDEN_BIASES_NAME_PATTERN = 'dnn/hiddenlayer_%d/bias'
-BATCH_NORM_BETA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/beta'
-BATCH_NORM_GAMMA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/gamma'
-BATCH_NORM_MEAN_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/moving_mean'
-BATCH_NORM_VARIANCE_NAME_PATTERN = (
-    'dnn/hiddenlayer_%d/batchnorm_%d/moving_variance')
-LOGITS_WEIGHTS_NAME = 'dnn/logits/kernel'
-LOGITS_BIASES_NAME = 'dnn/logits/bias'
-OCCUPATION_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
-                             'occupation_embedding/embedding_weights')
-CITY_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
-                       'city_embedding/embedding_weights')
-
-
-def assert_close(expected, actual, rtol=1e-04, message='', name='assert_close'):
-  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-    expected = ops.convert_to_tensor(expected, name='expected')
-    actual = ops.convert_to_tensor(actual, name='actual')
-    rdiff = math_ops.abs((expected - actual) / expected, 'diff')
-    rtol = ops.convert_to_tensor(rtol, name='rtol')
-    return check_ops.assert_less(
-        rdiff,
-        rtol,
-        data=(message, 'Condition expected =~ actual did not hold element-wise:'
-              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
-              'rtol = ', rtol,),
-        summarize=expected.get_shape().num_elements(),
-        name=scope)
-
-
-def create_checkpoint(weights_and_biases,
-                      global_step,
-                      model_dir,
-                      batch_norm_vars=None):
-  """Create checkpoint file with provided model weights.
-
-  Args:
-    weights_and_biases: Iterable of tuples of weight and bias values.
-    global_step: Initial global step to save in checkpoint.
-    model_dir: Directory into which checkpoint is saved.
-    batch_norm_vars: Variables used for batch normalization.
-  """
-  weights, biases = zip(*weights_and_biases)
-  if batch_norm_vars:
-    assert len(batch_norm_vars) == len(weights_and_biases) - 1
-    (bn_betas, bn_gammas, bn_means, bn_variances) = zip(*batch_norm_vars)
-  model_weights = {}
-
-  # Hidden layer weights.
-  for i in range(0, len(weights) - 1):
-    model_weights[HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i]
-    model_weights[HIDDEN_BIASES_NAME_PATTERN % i] = biases[i]
-    if batch_norm_vars:
-      model_weights[BATCH_NORM_BETA_NAME_PATTERN % (i, i)] = bn_betas[i]
-      model_weights[BATCH_NORM_GAMMA_NAME_PATTERN % (i, i)] = bn_gammas[i]
-      model_weights[BATCH_NORM_MEAN_NAME_PATTERN % (i, i)] = bn_means[i]
-      model_weights[BATCH_NORM_VARIANCE_NAME_PATTERN % (i, i)] = bn_variances[i]
-
-  # Output layer weights.
-  model_weights[LOGITS_WEIGHTS_NAME] = weights[-1]
-  model_weights[LOGITS_BIASES_NAME] = biases[-1]
-
-  with ops.Graph().as_default():
-    # Create model variables.
-    for k, v in six.iteritems(model_weights):
-      variables_lib.Variable(v, name=k, dtype=dtypes.float32)
-
-    # Create non-model variables.
-    global_step_var = training_util.create_global_step()
-
-    # Initialize vars and save checkpoint.
-    with tf_session.Session() as sess:
-      variables_lib.global_variables_initializer().run()
-      global_step_var.assign(global_step).eval()
-      saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-
-
-def mock_head(testcase, hidden_units, logits_dimension, expected_logits):
-  """Returns a mock head that validates logits values and variable names."""
-  hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i
-                          for i in range(len(hidden_units))]
-  hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i
-                         for i in range(len(hidden_units))]
-  expected_var_names = (
-      hidden_weights_names + hidden_biases_names +
-      [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0'])
-
-  def _create_tpu_estimator_spec(
-      features, mode, logits, labels, train_op_fn=None, optimizer=None):
-    del features, labels  # Not used.
-    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-    testcase.assertItemsEqual(expected_var_names,
-                              [var.name for var in trainable_vars])
-    loss = constant_op.constant(1.)
-    assert_logits = assert_close(
-        expected_logits, logits, message='Failed for mode={}. '.format(mode))
-    with ops.control_dependencies([assert_logits]):
-      if mode == model_fn.ModeKeys.TRAIN:
-        if train_op_fn is not None:
-          train_op = train_op_fn(loss)
-        elif optimizer is not None:
-          train_op = optimizer.minimize(loss, global_step=None)
-        return model_fn._TPUEstimatorSpec(
-            mode=mode, loss=loss, train_op=train_op)
-      elif mode == model_fn.ModeKeys.EVAL:
-        return model_fn._TPUEstimatorSpec(
-            mode=mode, loss=array_ops.identity(loss))
-      elif mode == model_fn.ModeKeys.PREDICT:
-        return model_fn._TPUEstimatorSpec(
-            mode=mode, predictions={'logits': array_ops.identity(logits)})
-      else:
-        testcase.fail('Invalid mode: {}'.format(mode))
-
-  def _create_estimator_spec(
-      features, mode, logits, labels, train_op_fn=None, optimizer=None):
-    tpu_spec = _create_tpu_estimator_spec(
-        features, mode, logits, labels, train_op_fn, optimizer)
-    return tpu_spec.as_estimator_spec()
-
-  head = test.mock.NonCallableMagicMock(spec=head_lib._Head)
-  head.logits_dimension = logits_dimension
-  head._create_tpu_estimator_spec = test.mock.MagicMock(
-      wraps=_create_tpu_estimator_spec)
-  head.create_estimator_spec = test.mock.MagicMock(
-      wraps=_create_estimator_spec)
-
-  return head
-
-
-def mock_optimizer(testcase, hidden_units, expected_loss=None):
-  """Creates a mock optimizer to test the train method.
-
-  Args:
-    testcase: A TestCase instance.
-    hidden_units: Iterable of integer sizes for the hidden layers.
-    expected_loss: If given, will assert the loss value.
-
-  Returns:
-    A mock Optimizer.
-  """
-  hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i
-                          for i in range(len(hidden_units))]
-  hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i
-                         for i in range(len(hidden_units))]
-  expected_var_names = (
-      hidden_weights_names + hidden_biases_names +
-      [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0'])
-
-  def _minimize(loss, global_step=None, var_list=None):
-    """Mock of optimizer.minimize."""
-    trainable_vars = var_list or ops.get_collection(
-        ops.GraphKeys.TRAINABLE_VARIABLES)
-    testcase.assertItemsEqual(expected_var_names,
-                              [var.name for var in trainable_vars])
-
-    # Verify loss. We can't check the value directly, so we add an assert op.
-    testcase.assertEquals(0, loss.shape.ndims)
-    if expected_loss is None:
-      if global_step is not None:
-        return state_ops.assign_add(global_step, 1).op
-      return control_flow_ops.no_op()
-    assert_loss = assert_close(
-        math_ops.to_float(expected_loss, name='expected'),
-        loss,
-        name='assert_loss')
-    with ops.control_dependencies((assert_loss,)):
-      if global_step is not None:
-        return state_ops.assign_add(global_step, 1).op
-      return control_flow_ops.no_op()
-
-  optimizer_mock = test.mock.NonCallableMagicMock(
-      spec=optimizer_lib.Optimizer,
-      wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
-  optimizer_mock.minimize = test.mock.MagicMock(wraps=_minimize)
-
-  return optimizer_mock
-
-
-class BaseDNNModelFnTest(object):
-  """Tests that _dnn_model_fn passes expected logits to mock head."""
-
-  def __init__(self, dnn_model_fn, fc_impl=feature_column):
-    self._dnn_model_fn = dnn_model_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_logits(self, mode, hidden_units, logits_dimension, inputs,
-                   expected_logits):
-    """Tests that the expected logits are passed to mock head."""
-    with ops.Graph().as_default():
-      training_util.create_global_step()
-      head = mock_head(
-          self,
-          hidden_units=hidden_units,
-          logits_dimension=logits_dimension,
-          expected_logits=expected_logits)
-      estimator_spec = self._dnn_model_fn(
-          features={'age': constant_op.constant(inputs)},
-          labels=constant_op.constant([[1]]),
-          mode=mode,
-          head=head,
-          hidden_units=hidden_units,
-          feature_columns=[
-              self._fc_impl.numeric_column(
-                  'age', shape=np.array(inputs).shape[1:])
-          ],
-          optimizer=mock_optimizer(self, hidden_units))
-      with monitored_session.MonitoredTrainingSession(
-          checkpoint_dir=self._model_dir) as sess:
-        if mode == model_fn.ModeKeys.TRAIN:
-          sess.run(estimator_spec.train_op)
-        elif mode == model_fn.ModeKeys.EVAL:
-          sess.run(estimator_spec.loss)
-        elif mode == model_fn.ModeKeys.PREDICT:
-          sess.run(estimator_spec.predictions)
-        else:
-          self.fail('Invalid mode: {}'.format(mode))
-
-  def test_one_dim_logits(self):
-    """Tests one-dimensional logits.
-
-    input_layer = [[10]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
-                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
-    logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=1,
-          inputs=[[10.]],
-          expected_logits=[[-2.08]])
-
-  def test_multi_dim_logits(self):
-    """Tests multi-dimensional logits.
-
-    input_layer = [[10]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
-                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
-    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]]
-           = [[-2.08, 2.08, 1.19]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                                 [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10.]],
-          expected_logits=[[-2.08, 2.08, 1.19]])
-
-  def test_multi_example_multi_dim_logits(self):
-    """Tests multiple examples and multi-dimensional logits.
-
-    input_layer = [[10], [5]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)],
-                      [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]]
-                   = [[6.1, 4.9], [3.1, 2.4]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)],
-                      [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]]
-                   = [[2.38, 0], [1.38, 0]]
-    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38],
-              [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]]
-           = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                                 [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10.], [5.]],
-          expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]])
-
-  def test_multi_dim_input_one_dim_logits(self):
-    """Tests multi-dimensional inputs and one-dimensional logits.
-
-    input_layer = [[10, 8]]
-    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
-                   = [[1.3, 0.9]]
-    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
-                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
-    logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1.], [1.]], [.3]),), base_global_step,
-                      self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=1,
-          inputs=[[10., 8.]],
-          expected_logits=[[-0.48]])
-
-  def test_multi_dim_input_multi_dim_logits(self):
-    """Tests multi-dimensional inputs and multi-dimensional logits.
-
-    input_layer = [[10, 8]]
-    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
-                   = [[1.3, 0.9]]
-    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
-                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
-    logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10., 8.]],
-          expected_logits=[[-0.48, 0.48, 0.39]])
-
-  def test_multi_feature_column_multi_dim_logits(self):
-    """Tests multiple feature columns and multi-dimensional logits.
-
-    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
-    difference is that the input consists of two 1D feature columns, instead of
-    one 2D feature column.
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-    hidden_units = (2, 2)
-    logits_dimension = 3
-    inputs = ([[10.]], [[8.]])
-    expected_logits = [[-0.48, 0.48, 0.39]]
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      with ops.Graph().as_default():
-        training_util.create_global_step()
-        head = mock_head(
-            self,
-            hidden_units=hidden_units,
-            logits_dimension=logits_dimension,
-            expected_logits=expected_logits)
-        estimator_spec = self._dnn_model_fn(
-            features={
-                'age': constant_op.constant(inputs[0]),
-                'height': constant_op.constant(inputs[1])
-            },
-            labels=constant_op.constant([[1]]),
-            mode=mode,
-            head=head,
-            hidden_units=hidden_units,
-            feature_columns=[
-                self._fc_impl.numeric_column('age'),
-                self._fc_impl.numeric_column('height')
-            ],
-            optimizer=mock_optimizer(self, hidden_units))
-        with monitored_session.MonitoredTrainingSession(
-            checkpoint_dir=self._model_dir) as sess:
-          if mode == model_fn.ModeKeys.TRAIN:
-            sess.run(estimator_spec.train_op)
-          elif mode == model_fn.ModeKeys.EVAL:
-            sess.run(estimator_spec.loss)
-          elif mode == model_fn.ModeKeys.PREDICT:
-            sess.run(estimator_spec.predictions)
-          else:
-            self.fail('Invalid mode: {}'.format(mode))
-
-  def test_multi_feature_column_mix_multi_dim_logits(self):
-    """Tests multiple feature columns and multi-dimensional logits.
-
-    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
-    difference is that the input consists of two 1D feature columns, instead of
-    one 2D feature column.
-    """
-    base_global_step = 100
-    create_checkpoint((
-        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
-        ([[1., .8], [-.8, -1.]], [.2, -.2]),
-        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
-    ), base_global_step, self._model_dir)
-    hidden_units = (2, 2)
-    logits_dimension = 3
-    inputs = ([[10.]], [[8.]])
-    expected_logits = [[-0.48, 0.48, 0.39]]
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      with ops.Graph().as_default():
-        training_util.create_global_step()
-        head = mock_head(
-            self,
-            hidden_units=hidden_units,
-            logits_dimension=logits_dimension,
-            expected_logits=expected_logits)
-        estimator_spec = self._dnn_model_fn(
-            features={
-                'age': constant_op.constant(inputs[0]),
-                'height': constant_op.constant(inputs[1])
-            },
-            labels=constant_op.constant([[1]]),
-            mode=mode,
-            head=head,
-            hidden_units=hidden_units,
-            feature_columns=[
-                feature_column.numeric_column('age'),
-                feature_column_v2.numeric_column('height')
-            ],
-            optimizer=mock_optimizer(self, hidden_units))
-        with monitored_session.MonitoredTrainingSession(
-            checkpoint_dir=self._model_dir) as sess:
-          if mode == model_fn.ModeKeys.TRAIN:
-            sess.run(estimator_spec.train_op)
-          elif mode == model_fn.ModeKeys.EVAL:
-            sess.run(estimator_spec.loss)
-          elif mode == model_fn.ModeKeys.PREDICT:
-            sess.run(estimator_spec.predictions)
-          else:
-            self.fail('Invalid mode: {}'.format(mode))
-
-  def test_features_tensor_raises_value_error(self):
-    """Tests that passing a Tensor for features raises a ValueError."""
-    hidden_units = (2, 2)
-    logits_dimension = 3
-    inputs = ([[10.]], [[8.]])
-    expected_logits = [[0, 0, 0]]
-
-    with ops.Graph().as_default():
-      training_util.create_global_step()
-      head = mock_head(
-          self,
-          hidden_units=hidden_units,
-          logits_dimension=logits_dimension,
-          expected_logits=expected_logits)
-      with self.assertRaisesRegexp(ValueError, 'features should be a dict'):
-        self._dnn_model_fn(
-            features=constant_op.constant(inputs),
-            labels=constant_op.constant([[1]]),
-            mode=model_fn.ModeKeys.TRAIN,
-            head=head,
-            hidden_units=hidden_units,
-            feature_columns=[
-                self._fc_impl.numeric_column(
-                    'age', shape=np.array(inputs).shape[1:])
-            ],
-            optimizer=mock_optimizer(self, hidden_units))
-
-
-class BaseDNNLogitFnTest(object):
-  """Tests correctness of logits calculated from _dnn_logit_fn_builder."""
-
-  def __init__(self, dnn_logit_fn_builder, fc_impl=feature_column):
-    self._dnn_logit_fn_builder = dnn_logit_fn_builder
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_logits(self,
-                   mode,
-                   hidden_units,
-                   logits_dimension,
-                   inputs,
-                   expected_logits,
-                   batch_norm=False):
-    """Tests that the expected logits are calculated."""
-    with ops.Graph().as_default():
-      # Global step needed for MonitoredSession, which is in turn used to
-      # explicitly set variable weights through a checkpoint.
-      training_util.create_global_step()
-      # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
-      # the checkpoint naming is shared.
-      with variable_scope.variable_scope('dnn'):
-        input_layer_partitioner = (
-            partitioned_variables.min_max_variable_partitioner(
-                max_partitions=0, min_slice_size=64 << 20))
-        logit_fn = self._dnn_logit_fn_builder(
-            units=logits_dimension,
-            hidden_units=hidden_units,
-            feature_columns=[
-                self._fc_impl.numeric_column(
-                    'age', shape=np.array(inputs).shape[1:])
-            ],
-            activation_fn=nn.relu,
-            dropout=None,
-            input_layer_partitioner=input_layer_partitioner,
-            batch_norm=batch_norm)
-        logits = logit_fn(
-            features={'age': constant_op.constant(inputs)}, mode=mode)
-        with monitored_session.MonitoredTrainingSession(
-            checkpoint_dir=self._model_dir) as sess:
-          self.assertAllClose(expected_logits, sess.run(logits))
-
-  def test_one_dim_logits(self):
-    """Tests one-dimensional logits.
-
-    input_layer = [[10]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
-                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
-    logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=1,
-          inputs=[[10.]],
-          expected_logits=[[-2.08]])
-
-  def test_one_dim_logits_with_batch_norm(self):
-    """Tests one-dimensional logits.
-
-    input_layer = [[10]]
-    hidden_layer_0 = [[relu(0.6*10 +1), relu(0.5*10 -1)]] = [[7, 4]]
-    hidden_layer_0 = [[relu(0.6*20 +1), relu(0.5*20 -1)]] = [[13, 9]]
-
-    batch_norm_0, training (epsilon = 0.001):
-      mean1 = 1/2*(7+13) = 10,
-      variance1 = 1/2*(3^2+3^2) = 9
-      x11 = (7-10)/sqrt(9+0.001) = -0.999944449,
-      x21 = (13-10)/sqrt(9+0.001) = 0.999944449,
-
-      mean2 = 1/2*(4+9) = 6.5,
-      variance2 = 1/2*(2.5^2+.2.5^2) = 6.25
-      x12 = (4-6.5)/sqrt(6.25+0.001) = -0.99992001,
-      x22 = (9-6.5)/sqrt(6.25+0.001) = 0.99992001,
-
-    logits = [[-1*(-0.999944449) + 2*(-0.99992001) + 0.3],
-              [-1*0.999944449 + 2*0.99992001 + 0.3]]
-           = [[-0.699895571],[1.299895571]]
-
-    batch_norm_0, not training (epsilon = 0.001):
-      moving_mean1 = 0, moving_variance1 = 1
-      x11 = (7-0)/sqrt(1+0.001) = 6.996502623,
-      x21 = (13-0)/sqrt(1+0.001) = 12.993504871,
-      moving_mean2 = 0, moving_variance2 = 1
-      x12 = (4-0)/sqrt(1+0.001) = 3.998001499,
-      x22 = (9-0)/sqrt(1+0.001) = 8.995503372,
-
-    logits = [[-1*6.996502623 + 2*3.998001499 + 0.3],
-              [-1*12.993504871 + 2*8.995503372 + 0.3]]
-           = [[1.299500375],[5.297501873]]
-    """
-    base_global_step = 100
-    create_checkpoint(
-        (
-            ([[.6, .5]], [1., -1.]),
-            ([[-1.], [2.]], [.3]),
-        ),
-        base_global_step,
-        self._model_dir,
-        batch_norm_vars=([[0, 0],  # beta.
-                          [1, 1],  # gamma.
-                          [0, 0],  # moving mean.
-                          [1, 1],  # moving variance.
-                         ],))
-    self._test_logits(
-        model_fn.ModeKeys.TRAIN,
-        hidden_units=[2],
-        logits_dimension=1,
-        inputs=[[10.], [20.]],
-        expected_logits=[[-0.699895571], [1.299895571]],
-        batch_norm=True)
-    for mode in [model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT]:
-      self._test_logits(
-          mode,
-          hidden_units=[2],
-          logits_dimension=1,
-          inputs=[[10.], [20.]],
-          expected_logits=[[1.299500375], [5.297501873]],
-          batch_norm=True)
-
-  def test_multi_dim_logits(self):
-    """Tests multi-dimensional logits.
-
-    input_layer = [[10]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
-                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
-    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]]
-           = [[-2.08, 2.08, 1.19]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                                 [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10.]],
-          expected_logits=[[-2.08, 2.08, 1.19]])
-
-  def test_multi_example_multi_dim_logits(self):
-    """Tests multiple examples and multi-dimensional logits.
-
-    input_layer = [[10], [5]]
-    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)],
-                      [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]]
-                   = [[6.1, 4.9], [3.1, 2.4]]
-    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)],
-                      [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]]
-                   = [[2.38, 0], [1.38, 0]]
-    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38],
-              [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]]
-           = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                                 [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10.], [5.]],
-          expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]])
-
-  def test_multi_dim_input_one_dim_logits(self):
-    """Tests multi-dimensional inputs and one-dimensional logits.
-
-    input_layer = [[10, 8]]
-    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
-                   = [[1.3, 0.9]]
-    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
-                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
-    logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1.], [1.]], [.3]),), base_global_step,
-                      self._model_dir)
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=1,
-          inputs=[[10., 8.]],
-          expected_logits=[[-0.48]])
-
-  def test_multi_dim_input_multi_dim_logits(self):
-    """Tests multi-dimensional inputs and multi-dimensional logits.
-
-    input_layer = [[10, 8]]
-    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
-                   = [[1.3, 0.9]]
-    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
-                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
-    logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]]
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      self._test_logits(
-          mode,
-          hidden_units=(2, 2),
-          logits_dimension=3,
-          inputs=[[10., 8.]],
-          expected_logits=[[-0.48, 0.48, 0.39]])
-
-  def test_multi_feature_column_multi_dim_logits(self):
-    """Tests multiple feature columns and multi-dimensional logits.
-
-    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
-    difference is that the input consists of two 1D feature columns, instead of
-    one 2D feature column.
-    """
-    base_global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      base_global_step, self._model_dir)
-
-    hidden_units = (2, 2)
-    logits_dimension = 3
-    inputs = ([[10.]], [[8.]])
-    expected_logits = [[-0.48, 0.48, 0.39]]
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      with ops.Graph().as_default():
-        # Global step needed for MonitoredSession, which is in turn used to
-        # explicitly set variable weights through a checkpoint.
-        training_util.create_global_step()
-        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
-        # the checkpoint naming is shared.
-        with variable_scope.variable_scope('dnn'):
-          input_layer_partitioner = (
-              partitioned_variables.min_max_variable_partitioner(
-                  max_partitions=0, min_slice_size=64 << 20))
-          logit_fn = self._dnn_logit_fn_builder(
-              units=logits_dimension,
-              hidden_units=hidden_units,
-              feature_columns=[
-                  self._fc_impl.numeric_column('age'),
-                  self._fc_impl.numeric_column('height')
-              ],
-              activation_fn=nn.relu,
-              dropout=None,
-              input_layer_partitioner=input_layer_partitioner,
-              batch_norm=False)
-          logits = logit_fn(
-              features={
-                  'age': constant_op.constant(inputs[0]),
-                  'height': constant_op.constant(inputs[1])
-              },
-              mode=mode)
-          with monitored_session.MonitoredTrainingSession(
-              checkpoint_dir=self._model_dir) as sess:
-            self.assertAllClose(expected_logits, sess.run(logits))
-
-  def test_multi_feature_column_mix_multi_dim_logits(self):
-    """Tests multiple feature columns and multi-dimensional logits.
-
-    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
-    difference is that the input consists of two 1D feature columns, instead of
-    one 2D feature column.
-    """
-    base_global_step = 100
-    create_checkpoint((
-        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
-        ([[1., .8], [-.8, -1.]], [.2, -.2]),
-        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
-    ), base_global_step, self._model_dir)
-
-    hidden_units = (2, 2)
-    logits_dimension = 3
-    inputs = ([[10.]], [[8.]])
-    expected_logits = [[-0.48, 0.48, 0.39]]
-
-    for mode in [
-        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
-        model_fn.ModeKeys.PREDICT
-    ]:
-      with ops.Graph().as_default():
-        # Global step needed for MonitoredSession, which is in turn used to
-        # explicitly set variable weights through a checkpoint.
-        training_util.create_global_step()
-        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
-        # the checkpoint naming is shared.
-        with variable_scope.variable_scope('dnn'):
-          input_layer_partitioner = (
-              partitioned_variables.min_max_variable_partitioner(
-                  max_partitions=0, min_slice_size=64 << 20))
-          logit_fn = self._dnn_logit_fn_builder(
-              units=logits_dimension,
-              hidden_units=hidden_units,
-              feature_columns=[
-                  feature_column.numeric_column('age'),
-                  feature_column_v2.numeric_column('height')
-              ],
-              activation_fn=nn.relu,
-              dropout=None,
-              input_layer_partitioner=input_layer_partitioner,
-              batch_norm=False)
-          logits = logit_fn(
-              features={
-                  'age': constant_op.constant(inputs[0]),
-                  'height': constant_op.constant(inputs[1])
-              },
-              mode=mode)
-          with monitored_session.MonitoredTrainingSession(
-              checkpoint_dir=self._model_dir) as sess:
-            self.assertAllClose(expected_logits, sess.run(logits))
-
-
-class BaseDNNWarmStartingTest(object):
-
-  def __init__(self,
-               _dnn_classifier_fn,
-               _dnn_regressor_fn,
-               fc_impl=feature_column):
-    self._dnn_classifier_fn = _dnn_classifier_fn
-    self._dnn_regressor_fn = _dnn_regressor_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    # Create a directory to save our old checkpoint and vocabularies to.
-    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
-
-    # Make a dummy input_fn.
-    def _input_fn():
-      features = {
-          'city': [['Palo Alto'], ['Mountain View']],
-          'locality': [['Palo Alto'], ['Mountain View']],
-          'occupation': [['doctor'], ['consultant']]
-      }
-      return features, [0, 1]
-
-    self._input_fn = _input_fn
-
-  def tearDown(self):
-    # Clean up checkpoint / vocab dir.
-    writer_cache.FileWriterCache.clear()
-    shutil.rmtree(self._ckpt_and_vocab_dir)
-
-  def assertAllNotClose(self, t1, t2):
-    """Helper assert for arrays."""
-    sum_of_abs_diff = 0.0
-    for x, y in zip(t1, t2):
-      try:
-        for a, b in zip(x, y):
-          sum_of_abs_diff += abs(b - a)
-      except TypeError:
-        sum_of_abs_diff += abs(y - x)
-    self.assertGreater(sum_of_abs_diff, 0)
-
-  def test_classifier_basic_warm_starting(self):
-    """Tests correctness of DNNClassifier default warm-start."""
-    city = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNClassifier and train to save a checkpoint.
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=dnn_classifier.model_dir)
-
-    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_classifier.get_variable_names():
-      self.assertAllClose(
-          dnn_classifier.get_variable_value(variable_name),
-          warm_started_dnn_classifier.get_variable_value(variable_name))
-
-  def test_regressor_basic_warm_starting(self):
-    """Tests correctness of DNNRegressor default warm-start."""
-    city = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNRegressor and train to save a checkpoint.
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        model_dir=self._ckpt_and_vocab_dir,
-        optimizer='SGD')
-    dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNRegressor, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=dnn_regressor.model_dir)
-
-    warm_started_dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_regressor.get_variable_names():
-      self.assertAllClose(
-          dnn_regressor.get_variable_value(variable_name),
-          warm_started_dnn_regressor.get_variable_value(variable_name))
-
-  def test_warm_starting_selective_variables(self):
-    """Tests selecting variables to warm-start."""
-    city = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNClassifier and train to save a checkpoint.
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        # The provided regular expression will only warm-start the city
-        # embedding, not the kernels and biases of the hidden weights.
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=dnn_classifier.model_dir,
-            vars_to_warm_start='.*(city).*'))
-
-    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_classifier.get_variable_names():
-      if 'city' in variable_name:
-        self.assertAllClose(
-            dnn_classifier.get_variable_value(variable_name),
-            warm_started_dnn_classifier.get_variable_value(variable_name))
-      elif 'bias' in variable_name:
-        # Hidden layer biases are zero-initialized.
-        bias_values = warm_started_dnn_classifier.get_variable_value(
-            variable_name)
-        self.assertAllClose(np.zeros_like(bias_values), bias_values)
-      elif 'kernel' in variable_name:
-        # We can't override the glorot uniform initializer used for the kernels
-        # in the dense layers, so just make sure we're not getting the same
-        # values from the old checkpoint.
-        self.assertAllNotClose(
-            dnn_classifier.get_variable_value(variable_name),
-            warm_started_dnn_classifier.get_variable_value(variable_name))
-
-  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
-    """Tests warm-starting with vocab remapping and partitioning."""
-    vocab_list = ['doctor', 'lawyer', 'consultant']
-    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
-    with open(vocab_file, 'w') as f:
-      f.write('\n'.join(vocab_list))
-    occupation = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_file(
-            'occupation',
-            vocabulary_file=vocab_file,
-            vocabulary_size=len(vocab_list)),
-        dimension=2)
-
-    # Create a DNNClassifier and train to save a checkpoint.
-    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[occupation],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD',
-        input_layer_partitioner=partitioner)
-    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).  Use a new FeatureColumn with a
-    # different vocabulary for occupation.
-    new_vocab_list = ['doctor', 'consultant', 'engineer']
-    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
-                                  'new_occupation_vocab')
-    with open(new_vocab_file, 'w') as f:
-      f.write('\n'.join(new_vocab_list))
-    new_occupation = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_file(
-            'occupation',
-            vocabulary_file=new_vocab_file,
-            vocabulary_size=len(new_vocab_list)),
-        dimension=2)
-    # We can create our VocabInfo object from the new and old occupation
-    # FeatureColumn's.
-    occupation_vocab_info = estimator.VocabInfo(
-        new_vocab=new_occupation.categorical_column.vocabulary_file,
-        new_vocab_size=new_occupation.categorical_column.vocabulary_size,
-        num_oov_buckets=new_occupation.categorical_column.num_oov_buckets,
-        old_vocab=occupation.categorical_column.vocabulary_file,
-        old_vocab_size=occupation.categorical_column.vocabulary_size,
-        # Can't use constant_initializer with load_and_remap.  In practice,
-        # use a truncated normal initializer.
-        backup_initializer=init_ops.random_uniform_initializer(
-            minval=0.39, maxval=0.39))
-    warm_started_dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[occupation],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=dnn_classifier.model_dir,
-            var_name_to_vocab_info={
-                OCCUPATION_EMBEDDING_NAME: occupation_vocab_info
-            },
-            # Explicitly providing None here will only warm-start variables
-            # referenced in var_name_to_vocab_info (no hidden weights will be
-            # warmstarted).
-            vars_to_warm_start=None),
-        input_layer_partitioner=partitioner)
-
-    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-    # 'doctor' was ID-0 and still ID-0.
-    self.assertAllClose(
-        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[0, :],
-        warm_started_dnn_classifier.get_variable_value(
-            OCCUPATION_EMBEDDING_NAME)[0, :])
-    # 'consultant' was ID-2 and now ID-1.
-    self.assertAllClose(
-        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[2, :],
-        warm_started_dnn_classifier.get_variable_value(
-            OCCUPATION_EMBEDDING_NAME)[1, :])
-    # 'engineer' is a new entry and should be initialized with the
-    # backup_initializer in VocabInfo.
-    self.assertAllClose([0.39] * 2,
-                        warm_started_dnn_classifier.get_variable_value(
-                            OCCUPATION_EMBEDDING_NAME)[2, :])
-    for variable_name in warm_started_dnn_classifier.get_variable_names():
-      if 'bias' in variable_name:
-        # Hidden layer biases are zero-initialized.
-        bias_values = warm_started_dnn_classifier.get_variable_value(
-            variable_name)
-        self.assertAllClose(np.zeros_like(bias_values), bias_values)
-      elif 'kernel' in variable_name:
-        # We can't override the glorot uniform initializer used for the kernels
-        # in the dense layers, so just make sure we're not getting the same
-        # values from the old checkpoint.
-        self.assertAllNotClose(
-            dnn_classifier.get_variable_value(variable_name),
-            warm_started_dnn_classifier.get_variable_value(variable_name))
-
-  def test_warm_starting_with_naming_change(self):
-    """Tests warm-starting with a Tensor name remapping."""
-    locality = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_list(
-            'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-
-    # Create a DNNClassifier and train to save a checkpoint.
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[locality],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second DNNClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    city = self._fc_impl.embedding_column(
-        self._fc_impl.categorical_column_with_vocabulary_list(
-            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
-        dimension=5)
-    warm_started_dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=[256, 128],
-        feature_columns=[city],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        # The 'city' variable correspond to the 'locality' variable in the
-        # previous model.
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=dnn_classifier.model_dir,
-            var_name_to_prev_var_name={
-                CITY_EMBEDDING_NAME:
-                    CITY_EMBEDDING_NAME.replace('city', 'locality')
-            }))
-
-    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_dnn_classifier.get_variable_names():
-      if 'city' in variable_name:
-        self.assertAllClose(
-            dnn_classifier.get_variable_value(
-                CITY_EMBEDDING_NAME.replace('city', 'locality')),
-            warm_started_dnn_classifier.get_variable_value(CITY_EMBEDDING_NAME))
-      else:
-        self.assertAllClose(
-            dnn_classifier.get_variable_value(variable_name),
-            warm_started_dnn_classifier.get_variable_value(variable_name))
-
-
-class BaseDNNClassifierEvaluateTest(object):
-
-  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
-    self._dnn_classifier_fn = dnn_classifier_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_one_dim(self):
-    """Asserts evaluation metrics for one-dimensional input and logits."""
-    global_step = 100
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age')],
-        model_dir=self._model_dir)
-    def _input_fn():
-      # batch_size = 2, one false label, and one true.
-      return {'age': [[10.], [10.]]}, [[1], [0]]
-    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-2.08], [-2.08]] =>
-    # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]]
-    # loss = -1. * log(0.111) -1. * log(0.889) = 2.31544200
-    expected_loss = 2.31544200
-    self.assertAllClose({
-        metric_keys.MetricKeys.LOSS: expected_loss,
-        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2.,
-        metric_keys.MetricKeys.ACCURACY: 0.5,
-        metric_keys.MetricKeys.PRECISION: 0.0,
-        metric_keys.MetricKeys.RECALL: 0.0,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597,
-        metric_keys.MetricKeys.LABEL_MEAN: 0.5,
-        metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
-        # There is no good way to calculate AUC for only two data points. But
-        # that is what the algorithm returns.
-        metric_keys.MetricKeys.AUC: 0.5,
-        metric_keys.MetricKeys.AUC_PR: 0.75,
-
-        ops.GraphKeys.GLOBAL_STEP: global_step
-    }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
-
-  def test_multi_dim(self):
-    """Asserts evaluation metrics for multi-dimensional input and logits."""
-    global_step = 100
-    create_checkpoint(
-        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                               [.2, -.2]),
-         ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3,
-                                           .0]),), global_step, self._model_dir)
-    n_classes = 3
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    def _input_fn():
-      # batch_size = 2, one false label, and one true.
-      return {'age': [[10., 8.], [10., 8.]]}, [[1], [0]]
-    # Uses identical numbers as
-    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-0.48, 0.48, 0.39], [-0.48, 0.48, 0.39]]
-    # probabilities = exp(logits)/sum(exp(logits))
-    #               = [[0.16670536, 0.43538380, 0.39791084],
-    #                  [0.16670536, 0.43538380, 0.39791084]]
-    # loss = -log(0.43538380) - log(0.16670536)
-    expected_loss = 2.62305466
-    self.assertAllClose({
-        metric_keys.MetricKeys.LOSS: expected_loss,
-        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-        metric_keys.MetricKeys.ACCURACY: 0.5,
-        ops.GraphKeys.GLOBAL_STEP: global_step
-    }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
-
-  def test_float_labels(self):
-    """Asserts evaluation metrics for float labels in binary classification."""
-    global_step = 100
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age')],
-        model_dir=self._model_dir)
-    def _input_fn():
-      # batch_size = 2, one false label, and one true.
-      return {'age': [[10.], [10.]]}, [[0.8], [0.4]]
-    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-2.08], [-2.08]] =>
-    # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]]
-    # loss = -0.8 * log(0.111) -0.2 * log(0.889)
-    #        -0.4 * log(0.111) -0.6 * log(0.889) = 2.7314420
-    metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertAlmostEqual(2.7314420, metrics[metric_keys.MetricKeys.LOSS])
-
-  def test_multi_dim_weights(self):
-    """Tests evaluation with weights."""
-    # Uses same checkpoint with test_multi_dims
-    global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      global_step, self._model_dir)
-    n_classes = 3
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
-        n_classes=n_classes,
-        weight_column='w',
-        model_dir=self._model_dir)
-
-    def _input_fn():
-      # batch_size = 2, one false label, and one true.
-      return {'age': [[10., 8.], [10., 8.]], 'w': [[10.], [100.]]}, [[1], [0]]
-
-    # Uses identical numbers as test_multi_dims
-    # See that test for calculation of logits.
-    # loss = -log(0.43538380)*10 - log(0.16670536)*100
-    expected_loss = 187.468007
-    metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertAlmostEqual(
-        expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
-
-
-class BaseDNNRegressorEvaluateTest(object):
-
-  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
-    self._dnn_regressor_fn = dnn_regressor_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_one_dim(self):
-    """Asserts evaluation metrics for one-dimensional input and logits."""
-    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
-    global_step = 100
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
-
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age')],
-        model_dir=self._model_dir)
-    def _input_fn():
-      return {'age': [[10.]]}, [[1.]]
-    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-2.08]] => predictions = [-2.08].
-    # loss = (1+2.08)^2 = 9.4864
-    expected_loss = 9.4864
-    self.assertAllClose({
-        metric_keys.MetricKeys.LOSS: expected_loss,
-        metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-        metric_keys.MetricKeys.PREDICTION_MEAN: -2.08,
-        metric_keys.MetricKeys.LABEL_MEAN: 1.0,
-        ops.GraphKeys.GLOBAL_STEP: global_step
-    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
-
-  def test_multi_dim(self):
-    """Asserts evaluation metrics for multi-dimensional input and logits."""
-    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
-    global_step = 100
-    create_checkpoint(
-        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                               [.2, -.2]),
-         ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3,
-                                           .0]),), global_step, self._model_dir)
-    label_dimension = 3
-
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-    def _input_fn():
-      return {'age': [[10., 8.]]}, [[1., -1., 0.5]]
-    # Uses identical numbers as
-    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-0.48, 0.48, 0.39]]
-    # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
-    expected_loss = 4.3929
-    self.assertAllClose({
-        metric_keys.MetricKeys.LOSS: expected_loss,
-        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 0.39 / 3.0,
-        metric_keys.MetricKeys.LABEL_MEAN: 0.5 / 3.0,
-        ops.GraphKeys.GLOBAL_STEP: global_step
-    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
-
-  def test_multi_dim_weights(self):
-    """Asserts evaluation metrics for multi-dimensional input and logits."""
-    # same checkpoint with test_multi_dim.
-    global_step = 100
-    create_checkpoint((([[.6, .5], [-.6, -.5]],
-                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
-                      global_step, self._model_dir)
-    label_dimension = 3
-
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=(2, 2),
-        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
-        label_dimension=label_dimension,
-        weight_column='w',
-        model_dir=self._model_dir)
-
-    def _input_fn():
-      return {'age': [[10., 8.]], 'w': [10.]}, [[1., -1., 0.5]]
-
-    # Uses identical numbers as test_multi_dim.
-    # See that test for calculation of logits.
-    # loss = 4.3929*10
-    expected_loss = 43.929
-    metrics = dnn_regressor.evaluate(input_fn=_input_fn, steps=1)
-    self.assertAlmostEqual(
-        expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
-
-
-class BaseDNNClassifierPredictTest(object):
-
-  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
-    self._dnn_classifier_fn = dnn_classifier_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_one_dim(self, label_vocabulary, label_output_fn):
-    """Asserts predictions for one-dimensional input and logits."""
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),),
-        global_step=0,
-        model_dir=self._model_dir)
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        label_vocabulary=label_vocabulary,
-        feature_columns=(self._fc_impl.numeric_column('x'),),
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
-    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-2.08] =>
-    # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597
-    # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597]
-    # class_ids = argmax(probabilities) = [0]
-    predictions = next(dnn_classifier.predict(input_fn=input_fn))
-    self.assertAllClose([-2.08],
-                        predictions[prediction_keys.PredictionKeys.LOGITS])
-    self.assertAllClose([0.11105597],
-                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
-    self.assertAllClose(
-        [0.88894403,
-         0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-    self.assertAllClose([0],
-                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-    self.assertAllEqual([label_output_fn(0)],
-                        predictions[prediction_keys.PredictionKeys.CLASSES])
-
-  def test_one_dim_without_label_vocabulary(self):
-    self._test_one_dim(label_vocabulary=None,
-                       label_output_fn=lambda x: ('%s' % x).encode())
-
-  def test_one_dim_with_label_vocabulary(self):
-    n_classes = 2
-    self._test_one_dim(
-        label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-  def _test_multi_dim_with_3_classes(self, label_vocabulary, label_output_fn):
-    """Asserts predictions for multi-dimensional input and logits."""
-    create_checkpoint(
-        (([[.6, .5], [-.6, -.5]], [.1, -.1]),
-         ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]],
-                                               [.3, -.3, .0]),),
-        global_step=0,
-        model_dir=self._model_dir)
-
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=(2, 2),
-        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
-        label_vocabulary=label_vocabulary,
-        n_classes=3,
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        # Inputs shape is (batch_size, num_inputs).
-        x={'x': np.array([[10., 8.]])},
-        batch_size=1,
-        shuffle=False)
-    # Uses identical numbers as
-    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-0.48, 0.48, 0.39] =>
-    # probabilities[i] = exp(logits[i]) / sum_j exp(logits[j]) =>
-    # probabilities = [0.16670536, 0.43538380, 0.39791084]
-    # class_ids = argmax(probabilities) = [1]
-    predictions = next(dnn_classifier.predict(input_fn=input_fn))
-    self.assertItemsEqual(
-        [prediction_keys.PredictionKeys.LOGITS,
-         prediction_keys.PredictionKeys.PROBABILITIES,
-         prediction_keys.PredictionKeys.CLASS_IDS,
-         prediction_keys.PredictionKeys.CLASSES],
-        six.iterkeys(predictions))
-    self.assertAllClose(
-        [-0.48, 0.48, 0.39], predictions[prediction_keys.PredictionKeys.LOGITS])
-    self.assertAllClose(
-        [0.16670536, 0.43538380, 0.39791084],
-        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-    self.assertAllEqual(
-        [1], predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-    self.assertAllEqual(
-        [label_output_fn(1)],
-        predictions[prediction_keys.PredictionKeys.CLASSES])
-
-  def test_multi_dim_with_3_classes_but_no_label_vocab(self):
-    self._test_multi_dim_with_3_classes(
-        label_vocabulary=None,
-        label_output_fn=lambda x: ('%s' % x).encode())
-
-  def test_multi_dim_with_3_classes_and_label_vocab(self):
-    n_classes = 3
-    self._test_multi_dim_with_3_classes(
-        label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-
-class BaseDNNRegressorPredictTest(object):
-
-  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
-    self._dnn_regressor_fn = dnn_regressor_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_one_dim(self):
-    """Asserts predictions for one-dimensional input and logits."""
-    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),),
-        global_step=0,
-        model_dir=self._model_dir)
-
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=(2, 2),
-        feature_columns=(self._fc_impl.numeric_column('x'),),
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
-    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-2.08]] => predictions = [-2.08].
-    self.assertAllClose({
-        prediction_keys.PredictionKeys.PREDICTIONS: [-2.08],
-    }, next(dnn_regressor.predict(input_fn=input_fn)))
-
-  def test_multi_dim(self):
-    """Asserts predictions for multi-dimensional input and logits."""
-    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
-    create_checkpoint(
-        (([[.6, .5], [-.6, -.5]], [.1, -.1]),
-         ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]],
-                                               [.3, -.3,
-                                                .0]),), 100, self._model_dir)
-
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=(2, 2),
-        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
-        label_dimension=3,
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        # Inputs shape is (batch_size, num_inputs).
-        x={'x': np.array([[10., 8.]])},
-        batch_size=1,
-        shuffle=False)
-    # Uses identical numbers as
-    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39]
-    self.assertAllClose({
-        prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39],
-    }, next(dnn_regressor.predict(input_fn=input_fn)))
-
-
-class _SummaryHook(session_run_hook.SessionRunHook):
-  """Saves summaries every N steps."""
-
-  def __init__(self):
-    self._summaries = []
-
-  def begin(self):
-    self._summary_op = summary_lib.merge_all()
-
-  def before_run(self, run_context):
-    return session_run_hook.SessionRunArgs({'summary': self._summary_op})
-
-  def after_run(self, run_context, run_values):
-    s = summary_pb2.Summary()
-    s.ParseFromString(run_values.results['summary'])
-    self._summaries.append(s)
-
-  def summaries(self):
-    return tuple(self._summaries)
-
-
-def _assert_checkpoint(
-    testcase, global_step, input_units, hidden_units, output_units, model_dir):
-  """Asserts checkpoint contains expected variables with proper shapes.
-
-  Args:
-    testcase: A TestCase instance.
-    global_step: Expected global step value.
-    input_units: The dimension of input layer.
-    hidden_units: Iterable of integer sizes for the hidden layers.
-    output_units: The dimension of output layer (logits).
-    model_dir: The model directory.
-  """
-  shapes = {
-      name: shape
-      for (name, shape) in checkpoint_utils.list_variables(model_dir)
-  }
-
-  # Global step.
-  testcase.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-  testcase.assertEqual(
-      global_step,
-      checkpoint_utils.load_variable(
-          model_dir, ops.GraphKeys.GLOBAL_STEP))
-
-  # Hidden layer weights.
-  prev_layer_units = input_units
-  for i in range(len(hidden_units)):
-    layer_units = hidden_units[i]
-    testcase.assertAllEqual(
-        (prev_layer_units, layer_units),
-        shapes[HIDDEN_WEIGHTS_NAME_PATTERN % i])
-    testcase.assertAllEqual(
-        (layer_units,),
-        shapes[HIDDEN_BIASES_NAME_PATTERN % i])
-    prev_layer_units = layer_units
-
-  # Output layer weights.
-  testcase.assertAllEqual((prev_layer_units, output_units),
-                          shapes[LOGITS_WEIGHTS_NAME])
-  testcase.assertAllEqual((output_units,),
-                          shapes[LOGITS_BIASES_NAME])
-
-
-def _assert_simple_summary(testcase, expected_values, actual_summary):
-  """Assert summary the specified simple values.
-
-  Args:
-    testcase: A TestCase instance.
-    expected_values: Dict of expected tags and simple values.
-    actual_summary: `summary_pb2.Summary`.
-  """
-  testcase.assertAllClose(expected_values, {
-      v.tag: v.simple_value
-      for v in actual_summary.value if (v.tag in expected_values)
-  })
-
-
-class BaseDNNClassifierTrainTest(object):
-
-  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
-    self._dnn_classifier_fn = dnn_classifier_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_from_scratch_with_default_optimizer_binary(self):
-    hidden_units = (2, 2)
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        model_dir=self._model_dir)
-
-    # Train for a few steps, then validate final checkpoint.
-    num_steps = 5
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps)
-    _assert_checkpoint(
-        self, num_steps, input_units=1, hidden_units=hidden_units,
-        output_units=1, model_dir=self._model_dir)
-
-  def test_from_scratch_with_default_optimizer_multi_class(self):
-    hidden_units = (2, 2)
-    n_classes = 3
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # Train for a few steps, then validate final checkpoint.
-    num_steps = 5
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[2]]), steps=num_steps)
-    _assert_checkpoint(
-        self, num_steps, input_units=1, hidden_units=hidden_units,
-        output_units=n_classes, model_dir=self._model_dir)
-
-  def test_from_scratch_validate_summary(self):
-    hidden_units = (2, 2)
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units)
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    _assert_checkpoint(
-        self, num_steps, input_units=1, hidden_units=hidden_units,
-        output_units=1, model_dir=self._model_dir)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      summary_keys = [v.tag for v in summary.value]
-      self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys)
-      self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
-
-  def test_binary_classification(self):
-    base_global_step = 100
-    hidden_units = (2, 2)
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
-
-    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-2.08] => probabilities = [0.889, 0.111]
-    # loss = -1. * log(0.111) = 2.19772100
-    expected_loss = 2.19772100
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units, expected_loss=expected_loss)
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      _assert_simple_summary(
-          self,
-          {
-              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
-              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5,
-              'dnn/dnn/logits/fraction_of_zero_values': 0.,
-              metric_keys.MetricKeys.LOSS: expected_loss,
-          },
-          summary)
-    _assert_checkpoint(
-        self, base_global_step + num_steps, input_units=1,
-        hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
-
-  def test_binary_classification_float_labels(self):
-    base_global_step = 100
-    hidden_units = (2, 2)
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
-
-    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-2.08] => probabilities = [0.889, 0.111]
-    # loss = -0.8 * log(0.111) -0.2 * log(0.889) = 1.7817210
-    expected_loss = 1.7817210
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units, expected_loss=expected_loss)
-    dnn_classifier = self._dnn_classifier_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[0.8]]), steps=num_steps)
-    self.assertEqual(1, opt.minimize.call_count)
-
-  def test_multi_class(self):
-    n_classes = 3
-    base_global_step = 100
-    hidden_units = (2, 2)
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1., 1., .5], [-1., 1., .5]],
-          [.3, -.3, .0]),), base_global_step, self._model_dir)
-
-    # Uses identical numbers as DNNModelFnTest.test_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-2.08, 2.08, 1.19] => probabilities = [0.0109, 0.7011, 0.2879]
-    # loss = -1. * log(0.7011) = 0.35505795
-    expected_loss = 0.35505795
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units, expected_loss=expected_loss)
-    dnn_classifier = self._dnn_classifier_fn(
-        n_classes=n_classes,
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_classifier.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      _assert_simple_summary(
-          self,
-          {
-              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
-              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5,
-              'dnn/dnn/logits/fraction_of_zero_values': 0.,
-              metric_keys.MetricKeys.LOSS: expected_loss,
-          },
-          summary)
-    _assert_checkpoint(
-        self, base_global_step + num_steps, input_units=1,
-        hidden_units=hidden_units, output_units=n_classes,
-        model_dir=self._model_dir)
-
-
-class BaseDNNRegressorTrainTest(object):
-
-  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
-    self._dnn_regressor_fn = dnn_regressor_fn
-    self._fc_impl = fc_impl
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_from_scratch_with_default_optimizer(self):
-    hidden_units = (2, 2)
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        model_dir=self._model_dir)
-
-    # Train for a few steps, then validate final checkpoint.
-    num_steps = 5
-    dnn_regressor.train(
-        input_fn=lambda: ({'age': ((1,),)}, ((10,),)), steps=num_steps)
-    _assert_checkpoint(
-        self, num_steps, input_units=1, hidden_units=hidden_units,
-        output_units=1, model_dir=self._model_dir)
-
-  def test_from_scratch(self):
-    hidden_units = (2, 2)
-    opt = mock_optimizer(self, hidden_units=hidden_units)
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_regressor.train(
-        input_fn=lambda: ({'age': ((1,),)}, ((5.,),)), steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    _assert_checkpoint(
-        self, num_steps, input_units=1, hidden_units=hidden_units,
-        output_units=1, model_dir=self._model_dir)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      summary_keys = [v.tag for v in summary.value]
-      self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys)
-      self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
-
-  def test_one_dim(self):
-    """Asserts train loss for one-dimensional input and logits."""
-    base_global_step = 100
-    hidden_units = (2, 2)
-    create_checkpoint(
-        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
-         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
-
-    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [-2.08] => predictions = [-2.08]
-    # loss = (1 + 2.08)^2 = 9.4864
-    expected_loss = 9.4864
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units, expected_loss=expected_loss)
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=hidden_units,
-        feature_columns=(self._fc_impl.numeric_column('age'),),
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
-
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_regressor.train(
-        input_fn=lambda: ({'age': [[10.]]}, [[1.]]), steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      _assert_simple_summary(
-          self,
-          {
-              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
-              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5,
-              'dnn/dnn/logits/fraction_of_zero_values': 0.,
-              metric_keys.MetricKeys.LOSS: expected_loss,
-          },
-          summary)
-    _assert_checkpoint(
-        self, base_global_step + num_steps, input_units=1,
-        hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
-
-  def test_multi_dim(self):
-    """Asserts train loss for multi-dimensional input and logits."""
-    base_global_step = 100
-    hidden_units = (2, 2)
-    create_checkpoint(
-        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
-                                               [.2, -.2]),
-         ([[-1., 1., .5], [-1., 1., .5]],
-          [.3, -.3, .0]),), base_global_step, self._model_dir)
-    input_dimension = 2
-    label_dimension = 3
+from tensorflow_estimator.python.estimator.canned import dnn_testing_utils
 
-    # Uses identical numbers as
-    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
-    # See that test for calculation of logits.
-    # logits = [[-0.48, 0.48, 0.39]]
-    # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
-    expected_loss = 4.3929
-    opt = mock_optimizer(
-        self, hidden_units=hidden_units, expected_loss=expected_loss)
-    dnn_regressor = self._dnn_regressor_fn(
-        hidden_units=hidden_units,
-        feature_columns=[
-            self._fc_impl.numeric_column('age', shape=[input_dimension])
-        ],
-        label_dimension=label_dimension,
-        optimizer=opt,
-        model_dir=self._model_dir)
-    self.assertEqual(0, opt.minimize.call_count)
+# Include attrs that start with single underscore.
+dnn_testing_utils.__all__ = [
+    s for s in dir(dnn_testing_utils) if not s.startswith('__')
+]
 
-    # Train for a few steps, then validate optimizer, summaries, and
-    # checkpoint.
-    num_steps = 5
-    summary_hook = _SummaryHook()
-    dnn_regressor.train(
-        input_fn=lambda: ({'age': [[10., 8.]]}, [[1., -1., 0.5]]),
-        steps=num_steps,
-        hooks=(summary_hook,))
-    self.assertEqual(1, opt.minimize.call_count)
-    summaries = summary_hook.summaries()
-    self.assertEqual(num_steps, len(summaries))
-    for summary in summaries:
-      _assert_simple_summary(
-          self,
-          {
-              metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension,
-              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
-              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5,
-              'dnn/dnn/logits/fraction_of_zero_values': 0.,
-              metric_keys.MetricKeys.LOSS: expected_loss,
-          },
-          summary)
-    _assert_checkpoint(
-        self, base_global_step + num_steps, input_units=input_dimension,
-        hidden_units=hidden_units, output_units=label_dimension,
-        model_dir=self._model_dir)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.dnn_testing_utils import *
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index 06593f9520..68befa2a9b 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,1590 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Abstractions for the head(s) of a model."""
+"""head python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
-import collections
-
-import six
-
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops import weights_broadcast_ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.summary import summary
-from tensorflow.python.training import training_util
-from tensorflow.python.util import function_utils
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-# The above default is defined by TF Serving, but these next three are just
-# a local convention without any special meaning.
-_CLASSIFY_SERVING_KEY = 'classification'
-_REGRESS_SERVING_KEY = 'regression'
-_PREDICT_SERVING_KEY = 'predict'
-
-
-# A LossSpec contains
-# * a scalar `Tensor` representing reduced weighted training loss
-# * a `Tensor` representing the unreduced unweighted loss
-# * a `Tensor` representing the example weights
-# * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc)
-LossSpec = collections.namedtuple(
-    'LossSpec', ['training_loss', 'unreduced_loss', 'weights',
-                 'processed_labels'])
-
-
-def _summary_key(head_name, val):
-  return '%s/%s' % (val, head_name) if head_name else val
-
-
-def _create_eval_metrics_tuple(fn, kwargs):
-  """Creates TPU eval metrics tuple.
-
-  Helper function to make eval_metric tuple (eval_metric_fn, fn_kwargs) used
-  by `TPUEstimator`. TPUEstimator requires that `eval_metric_fn` take
-  exclusively Tensor arguments. This helper can help create such a function from
-  a more generic function that can take both Tensor and non-Tensor arguments.
-
-  Args:
-    fn: A eval_metric_fn that takes both Tensor and non-Tensor arguments.
-        This function must return a dict of form
-        {'metric name': (metric_tensor, eval_op)}
-    kwargs: Dict of arguments for `fn`.
-
-  Returns:
-    `eval_metric` tuple that can be passed to a `model_fn._TPUEstimatorSpec`.
-  """
-  tensor_kwargs = {}
-  nontensor_kwargs = {}
-  for k, v in six.iteritems(kwargs):
-    if tensor_util.is_tensor(v):
-      tensor_kwargs[k] = v
-    else:
-      nontensor_kwargs[k] = v
-  def _fn(**tensors):
-    return fn(**dict(nontensor_kwargs, **tensors))
-  return (_fn, tensor_kwargs)
-
-
-class _Head(object):
-  """Interface for the head/top of a model.
-
-  Given logits (or output of a hidden layer), a Head knows how to compute
-  predictions, loss, train_op, metrics and export outputs. It is meant to:
-
-  1. Simplify writing model_fn and to make model_fn more configurable
-  2. Support wide range of machine learning models. Since most heads can work
-     with logits, they can support DNN, RNN, Wide, Wide&Deep,
-     Global objectives, Gradient boosted trees and many other types
-     of machine learning models.
-
-  Common usage:
-  Here is simplified model_fn to build a DNN regression model.
-    ```python
-    def _my_dnn_model_fn(features, labels, mode, params, config=None):
-      # Optionally your callers can pass head to model_fn as a param.
-      head = tf.contrib.estimator.regression_head(...)
-      inputs = tf.feature_column.input_layer(features, ...)
-      hidden_layer0 = tf.layers.dense(
-          inputs, units=1000, activation=tf.nn.relu)
-      hidden_layer1 = tf.layers.dense(
-          hidden_layer0, units=500, activation=tf.nn.relu)
-      logits = tf.layers.dense(
-          hidden_layer1, units=head.logits_dimension, activation=None)
-
-      return head.create_estimator_spec(
-          features=features,
-          labels=labels,
-          mode=mode,
-          logits=logits,
-          optimizer=optimizer)
-    ```
-
-  There are cases where computing and applying gradients can not be meaningfully
-  captured with optimizer or train_op_fn we support (for example, with sync
-  optimizer). In such case, you can take the responsibility on your own. Here is
-  a common use case,
-    ```python
-    estimator_spec = head.create_estimator_spec(
-        features=features,
-        labels=labels,
-        mode=mode,
-        logits=logits,
-        train_op_fn=lambda _: tf.no_op())
-    if mode == model_fn.ModeKeys.TRAIN:
-      optimizer = ...
-      sync = tf.train.SyncReplicasOptimizer(opt=optimizer, ...)
-      update_op = sync.minimize(
-          estimator_spec.loss, global_step=tf.get_global_step())
-      hooks = [sync.make_session_run_hook(is_chief)]
-      ... update train_op and hooks in EstimatorSpec and return
-    ```
-  """
-  __metaclass__ = abc.ABCMeta
-
-  @abc.abstractproperty
-  def name(self):
-    """The name of this head.
-
-    Returns:
-      A string.
-    """
-    raise NotImplementedError('Calling an abstract method.')
-
-  @abc.abstractproperty
-  def logits_dimension(self):
-    """Size of the last dimension of the logits `Tensor`.
-
-    Typically, logits is of shape `[batch_size, logits_dimension]`.
-
-    Returns:
-      The expected size of the `logits` tensor.
-    """
-    raise NotImplementedError('Calling an abstract method.')
-
-  @abc.abstractmethod
-  def create_loss(self, features, mode, logits, labels):
-    """Returns a loss Tensor from provided logits.
-
-    This function is designed to be used by framework developers.  Almost all
-    users should use create_estimator_spec(), which calls this internally.
-    `mode` and `features` are most likely not used, but some Head
-    implementations may require them.
-
-    Args:
-      features: Input `dict` of `Tensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` to be used for loss construction.
-      labels: Labels `Tensor`, or `dict` of same.
-
-    Returns:
-      A LossSpec that contains
-      * the scalar `Tensor` representing reduced weighted training loss
-      * the `Tensor` representing the unreduced unweighted loss
-      * the `Tensor` representing the example weights
-      * possibly processed labels (e.g. vocabulary lookup, shape manipulation,
-        etc.)
-
-      To be extendable in the future.
-    """
-    raise NotImplementedError('Calling an abstract method.')
-
-  # TODO(b/65403806): By default, collect regularization_losses from
-  # GraphKeys.REGULARIZATION_LOSSES collection.
-  def create_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns `EstimatorSpec` that a model_fn can return.
-
-    Please note that,
-    + All args must be passed via name.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` to be used by the head.
-      labels: Labels `Tensor`, or `dict` of same.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns an op
-        to optimize the model with the loss in TRAIN mode. Used if `optimizer`
-        is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in
-        TRAIN mode. None is allowed in other modes. If you want to optimize loss
-        yourself you can pass `lambda _: tf.no_op()` and then use
-        EstimatorSpec.loss to compute and apply gradients.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses.
-
-    Returns:
-      `EstimatorSpec`.
-    """
-    try:
-      tpu_estimator_spec = (
-          self._create_tpu_estimator_spec(
-              features, mode, logits, labels, optimizer, train_op_fn,
-              regularization_losses))
-      return tpu_estimator_spec.as_estimator_spec()
-    except NotImplementedError:
-      # Not all subclasses of _Head will have implemented
-      # _create_tpu_estimator_spec. If it is implemented, we can use it to
-      # create our `EstimatorSpec` here.
-      raise NotImplementedError(
-          'Subclasses of _Head must implement `create_estimator_spec()` or '
-          '_create_tpu_estimator_spec().')
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns `model_fn._TPUEstimatorSpec` that a model_fn can return.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` to be used by the head.
-      labels: Labels `Tensor`, or `dict` of same.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns an op
-        to optimize the model with the loss in TRAIN mode. Used if `optimizer`
-        is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in
-        TRAIN mode. None is allowed in other modes. If you want to optimize loss
-        yourself you can pass `lambda _: tf.no_op()` and then use
-        EstimatorSpec.loss to compute and apply gradients.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses.
-
-    Returns:
-      A `model_fn._TPUEstimatorSpec' instance.
-    """
-    raise NotImplementedError(
-        'TPUEstimatorSpec not available for this model head.')
-
-
-def _check_dense_labels_match_logits_and_reshape(
-    labels, logits, expected_labels_dimension):
-  """Checks that labels shape matches logits and reshapes if needed.
-
-  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels
-  shape must be [D0, D1, ... DN, expected_labels_dimension].
-  If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this
-  method reshapes them to [D0, D1, ... DN, 1].
-
-  Args:
-    labels: labels Tensor.
-    logits: logits Tensor.
-    expected_labels_dimension: Integer.
-  Returns:
-    Validated and reshaped labels Tensor.
-  Raises:
-    ValueError: If labels is a SparseTensor.
-    ValueError: If labels shape is statically defined and fails validation.
-    OpError: If labels shape is not statically defined and fails validation.
-  """
-  if labels is None:
-    raise ValueError(
-        'You must provide a labels Tensor. Given: None. '
-        'Suggested troubleshooting steps: Check that your data contain '
-        'your label feature. Check that your input_fn properly parses and '
-        'returns labels.')
-  with ops.name_scope(None, 'labels', (labels, logits)) as scope:
-    labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels)
-    if isinstance(labels, sparse_tensor.SparseTensor):
-      raise ValueError(
-          'SparseTensor labels are not supported. '
-          'labels must be a Tensor of shape [D0, D1, ..., DN, %s], '
-          'e.g. [batch_size, %s]. '
-          'Suggested Fix (1): Check the label feature in your data. '
-          'Each example must contain %s value(s). If not, your choice of label '
-          'was probably incorrect. '
-          'Suggested Fix (2): In your input_fn, use '
-          'tf.sparse_tensor_to_dense() to turn labels into a Tensor.'
-          '' % (expected_labels_dimension, expected_labels_dimension,
-                expected_labels_dimension))
-    if (labels.shape.ndims is not None and logits.shape.ndims is not None and
-        labels.shape.ndims == logits.shape.ndims - 1):
-      labels = array_ops.expand_dims(labels, -1)
-    labels_shape = array_ops.shape(labels)
-    logits_shape = array_ops.shape(logits)
-    err_msg = (
-        'labels shape must be [D0, D1, ... DN, {}]. '
-        'Suggested Fix: check your n_classes argument to the estimator '
-        'and/or the shape of your label.'.format(expected_labels_dimension))
-    assert_rank = check_ops.assert_rank_at_least(labels, 2, message=err_msg)
-    with ops.control_dependencies([assert_rank]):
-      static_shape = labels.shape
-      if static_shape.ndims is not None:
-        dim1 = static_shape[-1]
-        if (dim1 is not None) and (dim1 != expected_labels_dimension):
-          raise ValueError(
-              'Mismatched label shape. '
-              'Expected labels dimension=%s.  Received %s. '
-              'Suggested Fix:'
-              'If your classifier expects one-hot encoding label,'
-              'check your n_classes argument to the estimator '
-              'and/or the shape of your label. '
-              'Otherwise, check the shape of your label.' %
-              (expected_labels_dimension, dim1))
-      expected_labels_shape = array_ops.concat(
-          [logits_shape[:-1], [expected_labels_dimension]], axis=0)
-      assert_dimension = check_ops.assert_equal(
-          expected_labels_shape, labels_shape, message=err_msg,
-          data=['expected_labels_shape: ', expected_labels_shape,
-                'labels_shape: ', labels_shape])
-      with ops.control_dependencies([assert_dimension]):
-        return array_ops.identity(labels, name=scope)
-
-
-def _get_weights_and_check_match_logits(
-    features, weight_column, logits, allow_per_logit_weights=False):
-  """Fetches weights from features and checks that the shape matches logits.
-
-  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
-  can be either:
-  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
-  * [D0, D1, ... DN, 1]
-  * [D0, D1, ... DN]: In this case, weights is reshaped into
-    [D0, D1, ... DN, 1] to work with weight broadcasting rules.
-
-  Args:
-    features: The features dict that contains weights.
-    weight_column: The weight column. If not given, this method returns 1.
-    logits: logits Tensor.
-    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
-      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.
-  Returns:
-    Validated and reshaped weights Tensor.
-  Raises:
-    ValueError: If the weights `Tensor` cannot be cast into float.
-  """
-  if allow_per_logit_weights:
-    err_msg = (
-        'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
-        '[D0, D1, ... DN, logits_dimension]')
-  else:
-    err_msg = (
-        'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
-  with ops.name_scope(
-      None, 'weights',
-      values=tuple(six.itervalues(features)) + (logits,)) as scope:
-    # Fetch the weights.
-    if weight_column is None:
-      return 1.
-    if isinstance(weight_column, six.string_types):
-      weight_column = feature_column_lib.numeric_column(
-          key=weight_column, shape=(1,))
-    if not isinstance(weight_column, feature_column_lib._NumericColumn):  # pylint: disable=protected-access
-      raise TypeError('Weight column must be either a string or _NumericColumn.'
-                      ' Given type: {}.'.format(type(weight_column)))
-    weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
-        feature_column_lib._LazyBuilder(features))  # pylint: disable=protected-access
-    if not (weights.dtype.is_floating or weights.dtype.is_integer):
-      raise ValueError('Weight column should be castable to float. '
-                       'Given dtype: {}'.format(weights.dtype))
-    weights = math_ops.to_float(weights, name='weights')
-
-    # Validate the weights shape.
-    weights_shape = array_ops.shape(weights, name='weights_shape')
-    logits_shape = array_ops.shape(logits, name='logits_shape')
-    if (weights.shape.ndims is not None and logits.shape.ndims is not None and
-        weights.shape.ndims == logits.shape.ndims - 1):
-      assert_dimension = check_ops.assert_equal(
-          logits_shape[:-1], weights_shape, message=err_msg,
-          data=['logits_shape: ', logits_shape,
-                'weights_shape: ', weights_shape])
-      with ops.control_dependencies([assert_dimension]):
-        return array_ops.expand_dims(weights, -1, name=scope)
-    supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0)
-    if allow_per_logit_weights:
-      condition = math_ops.reduce_any(
-          [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)),
-           math_ops.reduce_all(math_ops.equal(
-               supported_weights_shape, weights_shape))])
-      assert_dimension = control_flow_ops.Assert(
-          condition=condition,
-          data=[err_msg, 'logits_shape: ', logits_shape,
-                'weights_shape: ', weights_shape])
-    else:
-      assert_dimension = check_ops.assert_equal(
-          supported_weights_shape, weights_shape, message=err_msg,
-          data=['logits_shape: ', logits_shape,
-                'weights_shape: ', weights_shape])
-    with ops.control_dependencies([assert_dimension]):
-      return array_ops.identity(weights, name=scope)
-
-
-def _check_logits_final_dim(logits, expected_logits_dimension):
-  """Checks that logits shape is [D0, D1, ... DN, logits_dimension]."""
-  with ops.name_scope(None, 'logits', (logits,)) as scope:
-    logits = math_ops.to_float(logits)
-    logits_shape = array_ops.shape(logits)
-    assert_rank = check_ops.assert_rank_at_least(
-        logits, 2, data=[logits_shape],
-        message='logits shape must be [D0, D1, ... DN, logits_dimension]')
-    with ops.control_dependencies([assert_rank]):
-      static_shape = logits.shape
-      if static_shape.ndims is not None and static_shape[-1] is not None:
-        if static_shape[-1] != expected_logits_dimension:
-          raise ValueError(
-              'logits shape must be [D0, D1, ... DN, logits_dimension], '
-              'got %s.' % (static_shape,))
-        return logits
-      assert_dimension = check_ops.assert_equal(
-          expected_logits_dimension, logits_shape[-1], data=[logits_shape],
-          message='logits shape must be [D0, D1, ... DN, logits_dimension]')
-      with ops.control_dependencies([assert_dimension]):
-        return array_ops.identity(logits, name=scope)
-
-
-def _validate_loss_fn_args(loss_fn):
-  """Validates loss_fn arguments.
-
-  Required arguments: labels, logits.
-  Optional arguments: features.
-
-  Args:
-    loss_fn: The loss function.
-  Raises:
-    ValueError: If the signature is unexpected.
-  """
-  loss_fn_args = function_utils.fn_args(loss_fn)
-  for required_arg in ['labels', 'logits']:
-    if required_arg not in loss_fn_args:
-      raise ValueError(
-          'loss_fn must contain argument: {}. '
-          'Given arguments: {}'.format(required_arg, loss_fn_args))
-  invalid_args = list(set(loss_fn_args) - set(['labels', 'logits', 'features']))
-  if invalid_args:
-    raise ValueError('loss_fn has unexpected args: {}'.format(invalid_args))
-
-
-def _call_loss_fn(loss_fn, labels, logits, features, expected_loss_dim=1):
-  """Calls loss_fn and checks the returned shape.
-
-  Args:
-    loss_fn: The loss function.
-    labels: Processed labels Tensor.
-    logits: Logits Tensor of shape [D0, D1, ... DN, logits_dimension].
-    features: Features dict.
-    expected_loss_dim: The expected last dimension of loss Tensor.
-  Returns:
-    Loss Tensor with shape [D0, D1, ... DN, expected_loss_dim].
-  """
-  loss_fn_args = function_utils.fn_args(loss_fn)
-  kwargs = {}
-  if 'features' in loss_fn_args:
-    kwargs['features'] = features
-  with ops.name_scope(
-      None, 'call_loss_fn',
-      values=[labels, logits] + list(six.itervalues(features))):
-    unweighted_loss = loss_fn(labels=labels, logits=logits, **kwargs)
-    logits_shape = array_ops.shape(logits, name='logits_shape')
-    expected_loss_shape = array_ops.concat(
-        [logits_shape[:-1], [expected_loss_dim]], axis=0,
-        name='expected_loss_shape')
-    loss_shape = array_ops.shape(unweighted_loss, name='loss_shape')
-    check_loss_shape_op = control_flow_ops.Assert(
-        math_ops.reduce_all(math_ops.equal(loss_shape, expected_loss_shape)),
-        data=[
-            'loss_fn must return Tensor of shape '
-            '[D0, D1, ... DN, {}]. '.format(expected_loss_dim),
-            'logits_shape: ', logits_shape, 'loss_shape: ', loss_shape],
-        name='check_loss_shape')
-    with ops.control_dependencies([check_loss_shape_op]):
-      return array_ops.identity(unweighted_loss)
-
-
-def _indicator_labels_mean(labels, weights=None, name=None):
-  with ops.name_scope(name, 'labels_mean', (labels, weights)) as scope:
-    labels = math_ops.to_float(labels, name='labels')
-    if weights is not None:
-      weights = weights_broadcast_ops.broadcast_weights(weights, labels)
-    return metrics_lib.mean(labels, weights=weights, name=scope)
-
-
-def _classification_output(scores, n_classes, label_vocabulary=None):
-  batch_size = array_ops.shape(scores)[0]
-  if label_vocabulary:
-    export_class_list = label_vocabulary
-  else:
-    export_class_list = string_ops.as_string(math_ops.range(n_classes))
-  export_output_classes = array_ops.tile(
-      input=array_ops.expand_dims(input=export_class_list, axis=0),
-      multiples=[batch_size, 1])
-  return export_output.ClassificationOutput(
-      scores=scores,
-      # `ClassificationOutput` requires string classes.
-      classes=export_output_classes)
-
-
-def _accuracy_baseline(labels_mean):
-  """Return accuracy baseline based on labels mean.
-
-  This is the best the model could do by always predicting one class.
-
-  Args:
-    labels_mean: Tuple of value and update op.
-
-  Returns:
-    Tuple of value and update op.
-  """
-  with ops.name_scope(None, 'accuracy_baseline', labels_mean):
-    value, update_op = labels_mean
-    return (
-        math_ops.maximum(value, 1. - value, name='value'),
-        math_ops.maximum(update_op, 1 - update_op, name='update_op'))
-
-
-def _predictions_mean(predictions, weights=None, name=None):
-  with ops.name_scope(
-      name, 'predictions_mean', (predictions, weights)) as scope:
-    predictions = math_ops.to_float(predictions, name='predictions')
-    if weights is not None:
-      weights = weights_broadcast_ops.broadcast_weights(weights, predictions)
-    return metrics_lib.mean(predictions, weights=weights, name=scope)
-
-
-def _auc(labels, predictions, weights=None, curve='ROC', name=None):
-  with ops.name_scope(name, 'auc', (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions, name='predictions')
-    if weights is not None:
-      weights = weights_broadcast_ops.broadcast_weights(weights, predictions)
-    return metrics_lib.auc(
-        labels=labels, predictions=predictions, weights=weights, curve=curve,
-        name=scope)
-
-
-def _accuracy_at_threshold(labels, predictions, weights, threshold, name=None):
-  with ops.name_scope(
-      name, 'accuracy_at_%s' % threshold,
-      (predictions, labels, weights, threshold)) as scope:
-    threshold_predictions = math_ops.to_float(
-        math_ops.greater_equal(predictions, threshold))
-    return metrics_lib.accuracy(
-        labels=labels, predictions=threshold_predictions, weights=weights,
-        name=scope)
-
-
-def _precision_at_threshold(labels, predictions, weights, threshold, name=None):
-  with ops.name_scope(
-      name, 'precision_at_%s' % threshold,
-      (predictions, labels, weights, threshold)) as scope:
-    precision_tensor, update_op = metrics_lib.precision_at_thresholds(
-        labels=labels, predictions=predictions, thresholds=(threshold,),
-        weights=weights, name=scope)
-    return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
-
-
-def _recall_at_threshold(labels, predictions, weights, threshold, name=None):
-  with ops.name_scope(
-      name, 'recall_at_%s' % threshold,
-      (predictions, labels, weights, threshold)) as scope:
-    precision_tensor, update_op = metrics_lib.recall_at_thresholds(
-        labels=labels, predictions=predictions, thresholds=(threshold,),
-        weights=weights, name=scope)
-    return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
-
-
-def _multi_class_head_with_softmax_cross_entropy_loss(
-    n_classes,
-    weight_column=None,
-    label_vocabulary=None,
-    loss_reduction=losses.Reduction.SUM,
-    loss_fn=None,
-    name=None):
-  """Creates a '_Head' for multi class classification.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`.
-  In many applications, the shape is `[batch_size, n_classes]`.
-
-  `labels` must be a dense `Tensor` with shape matching `logits`, namely
-  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
-  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
-  `labels` must be an integer `Tensor` with values specifying the class index.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
-
-  The loss is the weighted sum over the input dimensions. Namely, if the input
-  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
-  `batch_size`.
-
-  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with
-  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
-  the input labels before passing them to `loss_fn`.
-
-  Args:
-    n_classes: Number of classes, must be greater than 2 (for 2 classes, use
-      `_BinaryLogisticHeadWithSigmoidCrossEntropyLoss`).
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    label_vocabulary: A list or tuple of strings representing possible label
-      values. If it is not given, that means labels are already encoded as an
-      integer within [0, n_classes). If given, labels must be of string type and
-      have any value in `label_vocabulary`. Note that errors will be raised if
-      `label_vocabulary` is not provided but labels are strings.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM`.
-    loss_fn: Optional loss function.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for multi class classification.
-
-  Raises:
-    ValueError: If `n_classes`, `label_vocabulary` or `loss_reduction` is
-      invalid.
-  """
-  if label_vocabulary is not None and not isinstance(label_vocabulary,
-                                                     (list, tuple)):
-    raise ValueError(
-        'label_vocabulary should be a list or a tuple. Given type: {}'.format(
-            type(label_vocabulary)))
-  if (loss_reduction not in losses.Reduction.all() or
-      loss_reduction == losses.Reduction.NONE):
-    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
-  if loss_fn:
-    _validate_loss_fn_args(loss_fn)
-  return _MultiClassHeadWithSoftmaxCrossEntropyLoss(
-      n_classes=n_classes,
-      weight_column=weight_column,
-      label_vocabulary=label_vocabulary,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      name=name)
-
-
-class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
-  """See `_multi_class_head_with_softmax_cross_entropy_loss`."""
-
-  def __init__(self,
-               n_classes,
-               weight_column=None,
-               label_vocabulary=None,
-               loss_reduction=losses.Reduction.SUM,
-               loss_fn=None,
-               name=None):
-    if (n_classes is None) or (n_classes <= 2):
-      raise ValueError('n_classes must be > 2: %s.' % n_classes)
-    self._n_classes = n_classes
-    self._weight_column = weight_column
-    self._label_vocabulary = label_vocabulary
-    self._loss_reduction = loss_reduction
-    self._loss_fn = loss_fn
-    self._name = name
-
-  @property
-  def name(self):
-    return self._name
-
-  @property
-  def logits_dimension(self):
-    return self._n_classes
-
-  def _eval_metric_ops(
-      self, labels, class_ids, weights, unreduced_loss, regularization_loss):
-    """Returns the Eval metric ops."""
-    with ops.name_scope(
-        None, 'metrics',
-        (labels, class_ids, weights, unreduced_loss, regularization_loss)):
-      keys = metric_keys.MetricKeys
-      metric_ops = {
-          # Estimator already adds a metric for loss.
-          # TODO(xiejw): Any other metrics?
-          _summary_key(self._name, keys.LOSS_MEAN):
-              metrics_lib.mean(
-                  values=unreduced_loss,
-                  weights=weights,
-                  name=keys.LOSS_MEAN),
-          _summary_key(self._name, keys.ACCURACY):
-              metrics_lib.accuracy(
-                  labels=labels,
-                  predictions=class_ids,
-                  weights=weights,
-                  name=keys.ACCURACY),
-      }
-      if regularization_loss is not None:
-        metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = (
-            metrics_lib.mean(
-                values=regularization_loss,
-                name=keys.LOSS_REGULARIZATION))
-    return metric_ops
-
-  def _label_ids(self, labels):
-    """Converts labels to integer id space."""
-    if self._label_vocabulary is None:
-      if not labels.dtype.is_integer:
-        raise ValueError('Labels dtype should be integer. Instead got {}.'.
-                         format(labels.dtype))
-      label_ids = labels
-    else:
-      if labels.dtype != dtypes.string:
-        raise ValueError('Labels dtype should be string if there is a '
-                         'vocabulary. Instead got {}'.format(labels.dtype))
-      label_ids = lookup_ops.index_table_from_tensor(
-          vocabulary_list=tuple(self._label_vocabulary),
-          name='class_id_lookup').lookup(labels)
-    return _assert_range(label_ids, self._n_classes)
-
-  def create_loss(self, features, mode, logits, labels):
-    """See `Head`."""
-    del mode  # Unused for this head.
-    logits = ops.convert_to_tensor(logits)
-    labels = _check_dense_labels_match_logits_and_reshape(
-        labels=labels, logits=logits, expected_labels_dimension=1)
-    label_ids = self._label_ids(labels)
-    if self._loss_fn:
-      unweighted_loss = _call_loss_fn(
-          loss_fn=self._loss_fn, labels=label_ids, logits=logits,
-          features=features, expected_loss_dim=1)
-    else:
-      unweighted_loss = losses.sparse_softmax_cross_entropy(
-          labels=label_ids, logits=logits, reduction=losses.Reduction.NONE)
-      # Restore the squeezed dim, so unweighted_loss matches the weights shape.
-      unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1)
-    weights = _get_weights_and_check_match_logits(
-        features=features, weight_column=self._weight_column, logits=logits)
-    training_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=self._loss_reduction)
-    return LossSpec(
-        training_loss=training_loss,
-        unreduced_loss=unweighted_loss,
-        weights=weights,
-        processed_labels=label_ids)
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns a `model_fn._TPUEstimatorSpec`.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`.
-        For many applications, the shape is `[batch_size, logits_dimension]`.
-      labels: Labels integer or string `Tensor` with shape matching `logits`,
-        namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is
-        required argument when `mode` equals `TRAIN` or `EVAL`.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns
-        `train_op`. Used if `optimizer` is `None`.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses. These losses are
-        usually expressed as a batch average, so for best results users need to
-        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
-        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
-        avoid scaling errors.
-    Returns:
-      A `model_fn._TPUEstimatorSpec` instance.
-    Raises:
-      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
-        mode, or if both are set.
-    """
-    with ops.name_scope(self._name, 'head'):
-      logits = _check_logits_final_dim(logits, self.logits_dimension)
-
-      # Predict.
-      pred_keys = prediction_keys.PredictionKeys
-      with ops.name_scope(None, 'predictions', (logits,)):
-        # class_ids's shape is [D0, D1, ... DN].
-        class_ids = math_ops.argmax(logits, axis=-1, name=pred_keys.CLASS_IDS)
-        class_ids = array_ops.expand_dims(class_ids, axis=-1)
-        if self._label_vocabulary:
-          table = lookup_ops.index_to_string_table_from_tensor(
-              vocabulary_list=self._label_vocabulary,
-              name='class_string_lookup')
-          classes = table.lookup(class_ids)
-        else:
-          classes = string_ops.as_string(class_ids, name='str_classes')
-
-        probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES)
-        predictions = {
-            pred_keys.LOGITS: logits,
-            pred_keys.PROBABILITIES: probabilities,
-            # Expand to [batch_size, 1]
-            pred_keys.CLASS_IDS: class_ids,
-            pred_keys.CLASSES: classes,
-        }
-      if mode == model_fn.ModeKeys.PREDICT:
-        classifier_output = _classification_output(
-            scores=probabilities, n_classes=self._n_classes,
-            label_vocabulary=self._label_vocabulary)
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs={
-                _DEFAULT_SERVING_KEY: classifier_output,
-                _CLASSIFY_SERVING_KEY: classifier_output,
-                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
-            })
-
-      training_loss, unreduced_loss, weights, label_ids = self.create_loss(
-          features=features, mode=mode, logits=logits, labels=labels)
-      if regularization_losses:
-        regularization_loss = math_ops.add_n(regularization_losses)
-        regularized_training_loss = math_ops.add_n(
-            [training_loss, regularization_loss])
-      else:
-        regularization_loss = None
-        regularized_training_loss = training_loss
-      # Eval.
-      if mode == model_fn.ModeKeys.EVAL:
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=regularized_training_loss,
-            eval_metrics=_create_eval_metrics_tuple(self._eval_metric_ops, {
-                'labels': label_ids,
-                'class_ids': class_ids,
-                'weights': weights,
-                'unreduced_loss': unreduced_loss,
-                'regularization_loss': regularization_loss
-            }))
-
-      # Train.
-      if optimizer is not None:
-        if train_op_fn is not None:
-          raise ValueError('train_op_fn and optimizer cannot both be set.')
-        train_op = optimizer.minimize(
-            regularized_training_loss,
-            global_step=training_util.get_global_step())
-      elif train_op_fn is not None:
-        train_op = train_op_fn(regularized_training_loss)
-      else:
-        raise ValueError('train_op_fn and optimizer cannot both be None.')
-      train_op = _append_update_ops(train_op)
-      # Only summarize mean_loss for SUM reduction to preserve backwards
-      # compatibility. Otherwise skip it to avoid unnecessary computation.
-      if self._loss_reduction == losses.Reduction.SUM:
-        example_weight_sum = math_ops.reduce_sum(
-            weights * array_ops.ones_like(unreduced_loss))
-        mean_loss = training_loss / example_weight_sum
-      else:
-        mean_loss = None
-    with ops.name_scope(''):
-      keys = metric_keys.MetricKeys
-      summary.scalar(
-          _summary_key(self._name, keys.LOSS),
-          regularized_training_loss)
-      if mean_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_MEAN),
-            mean_loss)
-      if regularization_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_REGULARIZATION),
-            regularization_loss)
-    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-        mode=model_fn.ModeKeys.TRAIN,
-        predictions=predictions,
-        loss=regularized_training_loss,
-        train_op=train_op)
-
-
-def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
-    weight_column=None,
-    thresholds=None,
-    label_vocabulary=None,
-    loss_reduction=losses.Reduction.SUM,
-    loss_fn=None,
-    name=None):
-  """Creates a `_Head` for single label binary classification.
-
-  This head uses `sigmoid_cross_entropy_with_logits` loss.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
-  In many applications, the shape is `[batch_size, 1]`.
-
-  `labels` must be a dense `Tensor` with shape matching `logits`, namely
-  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
-  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
-  `labels` must be float `Tensor` with values in the interval `[0, 1]`.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
-
-  The loss is the weighted sum over the input dimensions. Namely, if the input
-  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
-  `batch_size`.
-
-  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
-  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
-  the input labels before passing them to `loss_fn`.
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    thresholds: Iterable of floats in the range `(0, 1)`. For binary
-      classification metrics such as precision and recall, an eval metric is
-      generated for each threshold value. This threshold is applied to the
-      logistic values to determine the binary classification (i.e., above the
-      threshold is `true`, below is `false`.
-    label_vocabulary: A list or tuple of strings representing possible label
-      values. If it is not given, that means labels are already encoded within
-      [0, 1]. If given, labels must be string type and have any value in
-      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
-      is not provided but labels are strings.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM`.
-    loss_fn: Optional loss function.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for binary classification.
-
-  Raises:
-    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
-    ValueError: If `loss_reduction` is invalid.
-    TypeError: if `label_vocabulary` has invalid type.
-  """
-  thresholds = tuple(thresholds) if thresholds else tuple()
-  if label_vocabulary is not None and not isinstance(label_vocabulary,
-                                                     (list, tuple)):
-    raise TypeError(
-        'label_vocabulary should be a list or tuple. Given type: {}'.format(
-            type(label_vocabulary)))
-
-  for threshold in thresholds:
-    if (threshold <= 0.0) or (threshold >= 1.0):
-      raise ValueError('thresholds not in (0, 1): {}.'.format((thresholds,)))
-  if (loss_reduction not in losses.Reduction.all() or
-      loss_reduction == losses.Reduction.NONE):
-    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
-  if loss_fn:
-    _validate_loss_fn_args(loss_fn)
-  return _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(
-      weight_column=weight_column,
-      thresholds=thresholds,
-      label_vocabulary=label_vocabulary,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      name=name)
-
-
-class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
-  """See `_binary_logistic_head_with_sigmoid_cross_entropy_loss`."""
-
-  def __init__(self,
-               weight_column=None,
-               thresholds=None,
-               label_vocabulary=None,
-               loss_reduction=losses.Reduction.SUM,
-               loss_fn=None,
-               name=None):
-    self._weight_column = weight_column
-    self._thresholds = thresholds
-    self._label_vocabulary = label_vocabulary
-    self._loss_reduction = loss_reduction
-    self._loss_fn = loss_fn
-    self._name = name
-
-  @property
-  def name(self):
-    return self._name
-
-  @property
-  def logits_dimension(self):
-    return 1
-
-  def _eval_metric_ops(self, labels, logits, logistic, class_ids, weights,
-                       unreduced_loss, regularization_loss):
-    with ops.name_scope(None, 'metrics',
-                        (labels, logits, logistic, class_ids, weights,
-                         unreduced_loss, regularization_loss)):
-      keys = metric_keys.MetricKeys
-      labels_mean = _indicator_labels_mean(
-          labels=labels, weights=weights, name=keys.LABEL_MEAN)
-      metric_ops = {
-          # Estimator already adds a metric for loss.
-          _summary_key(self._name, keys.LOSS_MEAN):
-              metrics_lib.mean(
-                  values=unreduced_loss,
-                  weights=weights,
-                  name=keys.LOSS_MEAN),
-          _summary_key(self._name, keys.ACCURACY):
-              metrics_lib.accuracy(
-                  labels=labels,
-                  predictions=class_ids,
-                  weights=weights,
-                  name=keys.ACCURACY),
-          _summary_key(self._name, keys.PRECISION):
-              metrics_lib.precision(
-                  labels=labels,
-                  predictions=class_ids,
-                  weights=weights,
-                  name=keys.PRECISION),
-          _summary_key(self._name, keys.RECALL):
-              metrics_lib.recall(
-                  labels=labels,
-                  predictions=class_ids,
-                  weights=weights,
-                  name=keys.RECALL),
-          _summary_key(self._name, keys.PREDICTION_MEAN):
-              _predictions_mean(
-                  predictions=logistic,
-                  weights=weights,
-                  name=keys.PREDICTION_MEAN),
-          _summary_key(self._name, keys.LABEL_MEAN):
-              labels_mean,
-          _summary_key(self._name, keys.ACCURACY_BASELINE):
-              _accuracy_baseline(labels_mean),
-          _summary_key(self._name, keys.AUC):
-              _auc(
-                  labels=labels,
-                  predictions=logistic,
-                  weights=weights,
-                  name=keys.AUC),
-          _summary_key(self._name, keys.AUC_PR):
-              _auc(
-                  labels=labels,
-                  predictions=logistic,
-                  weights=weights,
-                  curve='PR',
-                  name=keys.AUC_PR)
-      }
-      if regularization_loss is not None:
-        metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = (
-            metrics_lib.mean(
-                values=regularization_loss,
-                name=keys.LOSS_REGULARIZATION))
-      for threshold in self._thresholds:
-        accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold
-        metric_ops[_summary_key(self._name,
-                                accuracy_key)] = _accuracy_at_threshold(
-                                    labels=labels,
-                                    predictions=logistic,
-                                    weights=weights,
-                                    threshold=threshold,
-                                    name=accuracy_key)
-        # Precision for positive examples.
-        precision_key = keys.PRECISION_AT_THRESHOLD % threshold
-        metric_ops[_summary_key(self._name,
-                                precision_key)] = _precision_at_threshold(
-                                    labels=labels,
-                                    predictions=logistic,
-                                    weights=weights,
-                                    threshold=threshold,
-                                    name=precision_key)
-        # Recall for positive examples.
-        recall_key = keys.RECALL_AT_THRESHOLD % threshold
-        metric_ops[_summary_key(self._name,
-                                recall_key)] = _recall_at_threshold(
-                                    labels=labels,
-                                    predictions=logistic,
-                                    weights=weights,
-                                    threshold=threshold,
-                                    name=recall_key)
-      return metric_ops
-
-  def create_loss(self, features, mode, logits, labels):
-    """See `Head`."""
-    del mode  # Unused for this head.
-    logits = ops.convert_to_tensor(logits)
-    labels = _check_dense_labels_match_logits_and_reshape(
-        labels=labels, logits=logits, expected_labels_dimension=1)
-    if self._label_vocabulary is not None:
-      labels = lookup_ops.index_table_from_tensor(
-          vocabulary_list=tuple(self._label_vocabulary),
-          name='class_id_lookup').lookup(labels)
-    labels = math_ops.to_float(labels)
-    labels = _assert_range(labels, n_classes=2)
-    if self._loss_fn:
-      unweighted_loss = _call_loss_fn(
-          loss_fn=self._loss_fn, labels=labels, logits=logits,
-          features=features, expected_loss_dim=1)
-    else:
-      unweighted_loss = nn.sigmoid_cross_entropy_with_logits(
-          labels=labels, logits=logits)
-    weights = _get_weights_and_check_match_logits(
-        features=features, weight_column=self._weight_column, logits=logits)
-    training_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=self._loss_reduction)
-    return LossSpec(
-        training_loss=training_loss,
-        unreduced_loss=unweighted_loss,
-        weights=weights,
-        processed_labels=labels)
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns an `EstimatorSpec`.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` with shape `[D0, D1, ... DN, 1]`. For many
-        applications, the shape is `[batch_size, 1]`.
-      labels: Labels integer or string `Tensor` with shape matching `logits`,
-        namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required
-        argument when `mode` equals `TRAIN` or `EVAL`.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns
-        `train_op`. Used if `optimizer` is `None`.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses. These losses are
-        usually expressed as a batch average, so for best results users need to
-        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
-        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
-        avoid scaling errors.
-    Returns:
-      `EstimatorSpec`.
-    Raises:
-      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
-        mode, or if both are set.
-    """
-    # Predict.
-    with ops.name_scope(self._name, 'head'):
-      with ops.name_scope(None, 'predictions', (logits,)):
-        pred_keys = prediction_keys.PredictionKeys
-        logits = _check_logits_final_dim(logits, self.logits_dimension)
-        logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
-        two_class_logits = array_ops.concat(
-            (array_ops.zeros_like(logits), logits),
-            axis=-1, name='two_class_logits')
-        probabilities = nn.softmax(
-            two_class_logits, name=pred_keys.PROBABILITIES)
-        class_ids = math_ops.argmax(
-            two_class_logits, axis=-1, name=pred_keys.CLASS_IDS)
-        class_ids = array_ops.expand_dims(class_ids, axis=-1)
-        if self._label_vocabulary:
-          table = lookup_ops.index_to_string_table_from_tensor(
-              vocabulary_list=self._label_vocabulary,
-              name='class_string_lookup')
-          classes = table.lookup(class_ids)
-        else:
-          classes = string_ops.as_string(class_ids, name='str_classes')
-        predictions = {
-            pred_keys.LOGITS: logits,
-            pred_keys.LOGISTIC: logistic,
-            pred_keys.PROBABILITIES: probabilities,
-            pred_keys.CLASS_IDS: class_ids,
-            pred_keys.CLASSES: classes,
-        }
-      if mode == model_fn.ModeKeys.PREDICT:
-        classifier_output = _classification_output(
-            scores=probabilities, n_classes=2,
-            label_vocabulary=self._label_vocabulary)
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs={
-                _DEFAULT_SERVING_KEY: classifier_output,
-                _CLASSIFY_SERVING_KEY: classifier_output,
-                _REGRESS_SERVING_KEY: export_output.RegressionOutput(
-                    value=logistic),
-                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
-            })
-
-      (training_loss, unreduced_loss, weights, processed_labels) = (
-          self.create_loss(
-              features=features, mode=mode, logits=logits, labels=labels))
-      if regularization_losses:
-        regularization_loss = math_ops.add_n(regularization_losses)
-        regularized_training_loss = math_ops.add_n(
-            [training_loss, regularization_loss])
-      else:
-        regularization_loss = None
-        regularized_training_loss = training_loss
-
-      # Eval.
-      if mode == model_fn.ModeKeys.EVAL:
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=regularized_training_loss,
-            eval_metrics=_create_eval_metrics_tuple(
-                self._eval_metric_ops,
-                {
-                    'labels': processed_labels,
-                    'logits': logits,
-                    'logistic': logistic,
-                    'class_ids': class_ids,
-                    'weights': weights,
-                    'unreduced_loss': unreduced_loss,
-                    'regularization_loss': regularization_loss
-                }
-            ))
-
-      # Train.
-      if optimizer is not None:
-        if train_op_fn is not None:
-          raise ValueError('train_op_fn and optimizer cannot both be set.')
-        train_op = optimizer.minimize(
-            regularized_training_loss,
-            global_step=training_util.get_global_step())
-      elif train_op_fn is not None:
-        train_op = train_op_fn(regularized_training_loss)
-      else:
-        raise ValueError('train_op_fn and optimizer cannot both be None.')
-      train_op = _append_update_ops(train_op)
-      # Only summarize mean_loss for SUM reduction to preserve backwards
-      # compatibility. Otherwise skip it to avoid unnecessary computation.
-      if self._loss_reduction == losses.Reduction.SUM:
-        example_weight_sum = math_ops.reduce_sum(
-            weights * array_ops.ones_like(unreduced_loss))
-        mean_loss = training_loss / example_weight_sum
-      else:
-        mean_loss = None
-    with ops.name_scope(''):
-      keys = metric_keys.MetricKeys
-      summary.scalar(
-          _summary_key(self._name, keys.LOSS),
-          regularized_training_loss)
-      if mean_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_MEAN), mean_loss)
-      if regularization_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_REGULARIZATION),
-            regularization_loss)
-    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-        mode=model_fn.ModeKeys.TRAIN,
-        predictions=predictions,
-        loss=regularized_training_loss,
-        train_op=train_op)
-
-
-def _regression_head(
-    weight_column=None,
-    label_dimension=1,
-    loss_reduction=losses.Reduction.SUM,
-    loss_fn=None,
-    inverse_link_fn=None,
-    name=None):
-  """Creates a `_Head` for regression using the `mean_squared_error` loss.
-
-  The loss is the weighted sum over all input dimensions. Namely, if the input
-  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
-  sum over both `batch_size` and `label_dimension`.
-
-  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
-  In many applications, the shape is `[batch_size, label_dimension]`.
-
-  The `labels` shape must match `logits`, namely
-  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
-  `[D0, D1, ... DN]` is also supported.
-
-  If `weight_column` is specified, weights must be of shape
-  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
-  `[D0, D1, ... DN, label_dimension]`.
-
-  Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
-  `(labels, logits, features)` as arguments and returns unreduced loss with
-  shape `[D0, D1, ... DN, label_dimension]`.
-
-  Also supports custom `inverse_link_fn`, also known as 'mean function'.
-  `inverse_link_fn` takes `logits` as argument and returns predicted values.
-  This function is the inverse of the link function defined in
-  https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function
-  Namely, for poisson regression, set `inverse_link_fn=tf.exp`.
-
-  Args:
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
-    label_dimension: Number of regression labels per example. This is the size
-      of the last dimension of the labels `Tensor` (typically, this has shape
-      `[batch_size, label_dimension]`).
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
-      reduce training loss over batch. Defaults to `SUM`.
-    loss_fn: Optional loss function. Defaults to `mean_squared_error`.
-    inverse_link_fn: Optional inverse link function, also known as 'mean
-      function'. Defaults to identity.
-    name: name of the head. If provided, summary and metrics keys will be
-      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
-
-  Returns:
-    An instance of `_Head` for linear regression.
-
-  Raises:
-    ValueError: If `label_dimension` or `loss_reduction` is invalid.
-  """
-  if (loss_reduction not in losses.Reduction.all() or
-      loss_reduction == losses.Reduction.NONE):
-    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
-  if loss_fn:
-    _validate_loss_fn_args(loss_fn)
-  return _RegressionHeadWithMeanSquaredErrorLoss(
-      weight_column=weight_column,
-      label_dimension=label_dimension,
-      loss_reduction=loss_reduction,
-      loss_fn=loss_fn,
-      inverse_link_fn=inverse_link_fn,
-      name=name)
-
-
-class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
-  """`Head` for regression using the mean squared loss."""
-
-  def __init__(
-      self,
-      label_dimension,
-      weight_column=None,
-      loss_reduction=losses.Reduction.SUM,
-      loss_fn=None,
-      inverse_link_fn=None,
-      name=None):
-    """`Head` for regression."""
-    if label_dimension < 1:
-      raise ValueError('Invalid label_dimension %s.' % label_dimension)
-    self._logits_dimension = label_dimension
-    self._weight_column = weight_column
-    self._loss_reduction = loss_reduction
-    self._loss_fn = loss_fn
-    self._inverse_link_fn = inverse_link_fn
-    self._name = name
-
-  @property
-  def name(self):
-    return self._name
-
-  @property
-  def logits_dimension(self):
-    return self._logits_dimension
-
-  def create_loss(self, features, mode, logits, labels):
-    """See `Head`."""
-    del mode  # Unused for this head.
-    logits = ops.convert_to_tensor(logits)
-    labels = _check_dense_labels_match_logits_and_reshape(
-        labels=labels, logits=logits,
-        expected_labels_dimension=self._logits_dimension)
-    labels = math_ops.to_float(labels)
-    if self._loss_fn:
-      unweighted_loss = _call_loss_fn(
-          loss_fn=self._loss_fn, labels=labels, logits=logits,
-          features=features, expected_loss_dim=self._logits_dimension)
-    else:
-      unweighted_loss = losses.mean_squared_error(
-          labels=labels, predictions=logits, reduction=losses.Reduction.NONE)
-    weights = _get_weights_and_check_match_logits(
-        features=features, weight_column=self._weight_column, logits=logits,
-        allow_per_logit_weights=True)
-    training_loss = losses.compute_weighted_loss(
-        unweighted_loss, weights=weights, reduction=self._loss_reduction)
-    return LossSpec(
-        training_loss=training_loss,
-        unreduced_loss=unweighted_loss,
-        weights=weights,
-        processed_labels=labels)
-
-  def _eval_metric_ops(self, predicted_value, labels, weights, unreduced_loss,
-                       regularization_loss):
-    """Returns the Eval metric ops."""
-    keys = metric_keys.MetricKeys
-    # Estimator already adds a metric for loss.
-    eval_metric_ops = {
-        _summary_key(self._name, keys.LOSS_MEAN):
-            metrics_lib.mean(values=unreduced_loss, weights=weights),
-        _summary_key(self._name, keys.PREDICTION_MEAN):
-            _predictions_mean(
-                predictions=predicted_value,
-                weights=weights,
-                name=keys.PREDICTION_MEAN),
-        _summary_key(self._name, keys.LABEL_MEAN):
-            metrics_lib.mean(values=labels, weights=weights)
-    }
-    if regularization_loss is not None:
-      regularization_loss_key = _summary_key(
-          self._name, keys.LOSS_REGULARIZATION)
-      eval_metric_ops[regularization_loss_key] = metrics_lib.mean(
-          values=regularization_loss,
-          name=keys.LOSS_REGULARIZATION)
-    return eval_metric_ops
-
-  def _create_tpu_estimator_spec(
-      self, features, mode, logits, labels=None, optimizer=None,
-      train_op_fn=None, regularization_losses=None):
-    """Returns an `EstimatorSpec`.
-
-    Args:
-      features: Input `dict` of `Tensor` or `SparseTensor` objects.
-      mode: Estimator's `ModeKeys`.
-      logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`.
-        For many applications, the shape is `[batch_size, logits_dimension]`.
-      labels: Labels `Tensor` with shape matching `logits`, namely
-        `[D0, D1, ... DN, logits_dimension]`. When `logits_dimension=1`, shape
-        `[D0, D1, ... DN]` is also supported. `labels` is required argument when
-        `mode` equals `TRAIN` or `EVAL`.
-      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
-        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
-        updates variables and increments `global_step`.
-      train_op_fn: Function that takes a scalar loss `Tensor` and returns
-        `train_op`. Used if `optimizer` is `None`.
-      regularization_losses: A list of additional scalar losses to be added to
-        the training loss, such as regularization losses. These losses are
-        usually expressed as a batch average, so for best results users need to
-        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
-        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
-        avoid scaling errors.
-    Returns:
-      A `model_fn._TPUEstimatorSpec` instance.
-    Raises:
-      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
-        mode, or if both are set.
-    """
-    # Predict.
-    with ops.name_scope(self._name, 'head'):
-      logits = _check_logits_final_dim(logits, self._logits_dimension)
-      if self._inverse_link_fn:
-        predicted_value = self._inverse_link_fn(logits)
-        predictions = {
-            prediction_keys.PredictionKeys.PREDICTIONS: predicted_value,
-            prediction_keys.PredictionKeys.LOGITS: logits,
-        }
-      else:
-        predicted_value = logits
-        predictions = {
-            prediction_keys.PredictionKeys.PREDICTIONS: predicted_value}
-      if mode == model_fn.ModeKeys.PREDICT:
-        regression_output = export_output.RegressionOutput(
-            value=predicted_value)
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs={
-                _DEFAULT_SERVING_KEY: regression_output,
-                _REGRESS_SERVING_KEY: regression_output,
-                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
-            })
-
-      training_loss, unreduced_loss, weights, _ = self.create_loss(
-          features=features, mode=mode, logits=logits, labels=labels)
-      if regularization_losses:
-        regularization_loss = math_ops.add_n(regularization_losses)
-        regularized_training_loss = math_ops.add_n(
-            [training_loss, regularization_loss])
-      else:
-        regularization_loss = None
-        regularized_training_loss = training_loss
-
-      # Eval.
-      if mode == model_fn.ModeKeys.EVAL:
-        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=regularized_training_loss,
-            eval_metrics=_create_eval_metrics_tuple(
-                self._eval_metric_ops, {
-                    'predicted_value': predicted_value,
-                    'labels': labels,
-                    'weights': weights,
-                    'unreduced_loss': unreduced_loss,
-                    'regularization_loss': regularization_loss,
-                }))
-
-      # Train.
-      if optimizer is not None:
-        if train_op_fn is not None:
-          raise ValueError('train_op_fn and optimizer cannot both be set.')
-        train_op = optimizer.minimize(
-            regularized_training_loss,
-            global_step=training_util.get_global_step())
-      elif train_op_fn is not None:
-        train_op = train_op_fn(regularized_training_loss)
-      else:
-        raise ValueError('train_op_fn and optimizer cannot both be None.')
-      train_op = _append_update_ops(train_op)
-      # Only summarize mean_loss for SUM reduction to preserve backwards
-      # compatibility. Otherwise skip it to avoid unnecessary computation.
-      if self._loss_reduction == losses.Reduction.SUM:
-        example_weight_sum = math_ops.reduce_sum(
-            weights * array_ops.ones_like(unreduced_loss))
-        mean_loss = training_loss / example_weight_sum
-      else:
-        mean_loss = None
-    with ops.name_scope(''):
-      keys = metric_keys.MetricKeys
-      summary.scalar(
-          _summary_key(self._name, keys.LOSS),
-          regularized_training_loss)
-      if mean_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_MEAN), mean_loss)
-      if regularization_loss is not None:
-        summary.scalar(
-            _summary_key(self._name, keys.LOSS_REGULARIZATION),
-            regularization_loss)
-    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
-        mode=model_fn.ModeKeys.TRAIN,
-        predictions=predictions,
-        loss=regularized_training_loss,
-        train_op=train_op)
-
-
-def _append_update_ops(train_op):
-  """Returns `train_op` appending `UPDATE_OPS` collection if present."""
-  update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
-  if update_ops:
-    return control_flow_ops.group(train_op, *update_ops)
-  return train_op
-
-
-def _assert_range(labels, n_classes, message=None):
-  with ops.name_scope(None, 'assert_range', (labels,)):
-    assert_less = check_ops.assert_less_equal(
-        labels,
-        ops.convert_to_tensor(n_classes - 1, dtype=labels.dtype),
-        message=message or 'Labels must <= n_classes - 1')
-    assert_greater = check_ops.assert_non_negative(
-        labels, message=message or 'Labels must >= 0')
-    with ops.control_dependencies((assert_less, assert_greater)):
-      return array_ops.identity(labels)
-
-
-def _binary_logistic_or_multi_class_head(
-    n_classes, weight_column, label_vocabulary, loss_reduction):
-  """Creates either binary or multi-class head.
+from tensorflow_estimator.python.estimator.canned import head
 
-  Args:
-    n_classes: Number of label classes.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-      then weight_column.normalizer_fn is applied on it to get weight tensor.
-    label_vocabulary: A list of strings represents possible label values. If
-      given, labels must be string type and have any value in
-      `label_vocabulary`. If it is not given, that means labels are
-      already encoded as integer or float within [0, 1] for `n_classes=2` and
-      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-      Also there will be errors if vocabulary is not provided and labels are
-      string.
-    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-      to reduce training loss over batch. Defaults to `SUM`.
+# Include attrs that start with single underscore.
+head.__all__ = [s for s in dir(head) if not s.startswith('__')]
 
-  Returns:
-    `head._Head` instance.
-  """
-  if n_classes == 2:
-    head = _binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column=weight_column,
-        label_vocabulary=label_vocabulary,
-        loss_reduction=loss_reduction)
-  else:
-    head = _multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column=weight_column,
-        label_vocabulary=label_vocabulary,
-        loss_reduction=loss_reduction)
-  return head
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.head import *
diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py
deleted file mode 100644
index de9c84d2ef..0000000000
--- a/tensorflow/python/estimator/canned/head_test.py
+++ /dev/null
@@ -1,4056 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for head.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import six
-
-from tensorflow.core.framework import summary_pb2
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.canned import dnn_testing_utils
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column as feature_column_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import queue_runner_impl
-
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def _initialize_variables(test_case, scaffold):
-  scaffold.finalize()
-  test_case.assertIsNone(scaffold.init_feed_dict)
-  test_case.assertIsNone(scaffold.init_fn)
-  scaffold.init_op.run()
-  scaffold.ready_for_local_init_op.eval()
-  scaffold.local_init_op.run()
-  scaffold.ready_op.eval()
-  test_case.assertIsNotNone(scaffold.saver)
-
-
-def _assert_simple_summaries(test_case, expected_summaries, summary_str,
-                             tol=1e-6):
-  """Assert summary the specified simple values.
-
-  Args:
-    test_case: test case.
-    expected_summaries: Dict of expected tags and simple values.
-    summary_str: Serialized `summary_pb2.Summary`.
-    tol: Tolerance for relative and absolute.
-  """
-  summary = summary_pb2.Summary()
-  summary.ParseFromString(summary_str)
-  test_case.assertAllClose(expected_summaries, {
-      v.tag: v.simple_value for v in summary.value
-  }, rtol=tol, atol=tol)
-
-
-def _assert_no_hooks(test_case, spec):
-  test_case.assertAllEqual([], spec.training_chief_hooks)
-  test_case.assertAllEqual([], spec.training_hooks)
-
-
-def _sigmoid(logits):
-  return 1 / (1 + np.exp(-logits))
-
-
-class CreateEstimatorSpecTest(test.TestCase):
-
-  class _HeadWithTPUSupport(head_lib._Head):
-    """Head that overrides _create_tpu_estimator_spec."""
-
-    def name(self):
-      return 'HeadWithTPUSupport'
-
-    def logits_dimension(self):
-      return None
-
-    def create_loss(self, features, mode, logits, labels):
-      return None
-
-    def _create_tpu_estimator_spec(self, features, mode, logits, labels=None,
-                                   optimizer=None, train_op_fn=None,
-                                   regularization_losses=None):
-      return model_fn._TPUEstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          loss=constant_op.constant(0.0, dtype=dtypes.float32))
-
-  class _HeadWithOutTPUSupport(head_lib._Head):
-    """Head that overrides create_estimator_spec."""
-
-    def name(self):
-      return 'HeadWithOutTPUSupport'
-
-    def logits_dimension(self):
-      return None
-
-    def create_loss(self, features, mode, logits, labels):
-      return None
-
-    def create_estimator_spec(self, features, mode, logits, labels=None,
-                              optimizer=None, train_op_fn=None,
-                              regularization_losses=None):
-      return model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          loss=constant_op.constant(0.0, dtype=dtypes.float32))
-
-  class _InvalidHead(head_lib._Head):
-    """Head that overrides neither estimator_spec functions."""
-
-    def name(self):
-      return 'InvalidHead'
-
-    def logits_dimension(self):
-      return None
-
-    def create_loss(self, features, mode, logits, labels):
-      return None
-
-  def test_head_override_tpu_estimator_spec(self):
-    """Test for `_Head` that overrides _create_tpu_estimator_spec."""
-    head = self._HeadWithTPUSupport()
-
-    tpu_spec = head._create_tpu_estimator_spec(
-        features=None, mode=None, logits=None)
-    self.assertTrue(isinstance(tpu_spec, model_fn._TPUEstimatorSpec))
-    est_spec = head.create_estimator_spec(
-        features=None, mode=None, logits=None)
-    self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec))
-
-  def test_head_override_estimator_spec(self):
-    """Test for `_Head` that overrides create_estimator_spec."""
-    head = self._HeadWithOutTPUSupport()
-
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        'TPUEstimatorSpec not available for this model head.'):
-      _ = head._create_tpu_estimator_spec(
-          features=None, mode=None, logits=None)
-    est_spec = head.create_estimator_spec(
-        features=None, mode=None, logits=None)
-    self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec))
-
-  def test_invalid_head_class(self):
-    head = self._InvalidHead()
-
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        'TPUEstimatorSpec not available for this model head.'):
-      _ = head._create_tpu_estimator_spec(
-          features=None, mode=None, logits=None)
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        r'Subclasses of _Head must implement `create_estimator_spec\(\)` or '
-        r'_create_tpu_estimator_spec\(\).'):
-      _ = head.create_estimator_spec(
-          features=None, mode=None, logits=None)
-
-
-class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_n_classes_is_none(self):
-    with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=None)
-
-  def test_n_classes_is_2(self):
-    with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=2)
-
-  def test_invalid_loss_reduction(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=3, loss_reduction='invalid_loss_reduction')
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: none'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=3, loss_reduction=losses.Reduction.NONE)
-
-  def test_loss_fn_arg_labels_missing(self):
-    def _loss_fn(logits):
-      del logits  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: labels\. '
-        r'Given arguments: \(\'logits\',\)'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_logits_missing(self):
-    def _loss_fn(labels):
-      del labels  # unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: logits\. '
-        r'Given arguments: \(\'labels\',\)'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_features_ok(self):
-    def _loss_fn(labels, logits, features):
-      del labels, logits, features  # Unused
-    head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_invalid(self):
-    def _loss_fn(labels, logits, name=None):
-      del labels, logits, name  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn has unexpected args: \[\'name\'\]'):
-      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-          n_classes=3, loss_fn=_loss_fn)
-
-  def test_invalid_logits_shape(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    # Logits should be shape (batch_size, 3).
-    logits_2x2 = np.array(((45., 44.), (41., 42.),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'logits shape'):
-      head.create_estimator_spec(
-          features={'x': np.array(((30.,), (42.,),))},
-          mode=model_fn.ModeKeys.PREDICT,
-          logits=logits_2x2)
-
-    # Dynamic shape.
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((30.,), (42.,),))},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits_placeholder)
-    with self.cached_session():
-      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
-        spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval({
-            logits_placeholder: logits_2x2
-        })
-
-  def test_invalid_labels_shape(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    # Logits should be shape (batch_size, 3).
-    # Labels should be shape (batch_size, 1).
-    labels_2x2 = np.array(((45, 44), (41, 42),), dtype=np.int)
-    logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
-    features = {'x': np.array(((42.,),))}
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
-      head.create_loss(
-          features=features,
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits_2x3,
-          labels=labels_2x2)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
-        training_loss.eval({
-            logits_placeholder: logits_2x3,
-            labels_placeholder: labels_2x2
-        })
-
-  def test_invalid_labels_type(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    # Logits should be shape (batch_size, 3).
-    # Labels should be shape (batch_size, 1).
-    labels_2x1 = np.array(((1.,), (1.,),))
-    logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
-    features = {'x': np.array(((42.,),))}
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'Labels dtype'):
-      head.create_loss(
-          features=features,
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits_2x3,
-          labels=labels_2x1)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    with self.assertRaisesRegexp(ValueError, 'Labels dtype'):
-      head.create_loss(
-          features=features,
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits_placeholder,
-          labels=labels_placeholder)
-
-  def test_invalid_labels_values(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    labels_2x1_with_large_id = np.array(((45,), (1,),), dtype=np.int)
-    labels_2x1_with_negative_id = np.array(((-5,), (1,),), dtype=np.int)
-    logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),))
-
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    training_loss = head.create_loss(
-        features={'x': np.array(((42.,),))},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesOpError('Labels must <= n_classes - 1'):
-        training_loss.eval({
-            labels_placeholder: labels_2x1_with_large_id,
-            logits_placeholder: logits_2x3
-        })
-
-    with self.cached_session():
-      with self.assertRaisesOpError('Labels must >= 0'):
-        training_loss.eval({
-            labels_placeholder: labels_2x1_with_negative_id,
-            logits_placeholder: logits_2x3
-        })
-
-  def test_invalid_labels_sparse_tensor(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    labels_2x1 = sparse_tensor.SparseTensor(
-        values=['english', 'italian'],
-        indices=[[0, 0], [1, 0]],
-        dense_shape=[2, 1])
-    logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),))
-
-    with self.assertRaisesRegexp(
-        ValueError, 'SparseTensor labels are not supported.'):
-      head.create_loss(
-          features={'x': np.array(((42.,),))},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits_2x3,
-          labels=labels_2x1)
-
-  def test_incompatible_labels_shape(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    # Logits should be shape (batch_size, 3).
-    # Labels should be shape (batch_size, 1).
-    # Here batch sizes are different.
-    values_3x1 = np.array(((1,), (1,), (1,),))
-    values_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
-    features = {'x': values_2x3}
-
-    # Static shape.
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'Shape mismatch: The shape of labels \(received \(3,\)\) should equal '
-        r'the shape of logits except for the last dimension '
-        r'\(received \(2, 3\)\)\.'
-    ):
-      head.create_loss(
-          features=features,
-          mode=model_fn.ModeKeys.EVAL,
-          logits=values_2x3,
-          labels=values_3x1)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
-        training_loss.eval({
-            labels_placeholder: values_3x1,
-            logits_placeholder: values_2x3
-        })
-
-  def test_name(self):
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, name='foo')
-    self.assertEqual('foo', head.name)
-
-  def test_predict(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    self.assertEqual(n_classes, head.logits_dimension)
-
-    logits = [[1., 0., 0.], [0., 0., 1.]]
-    expected_probabilities = [[0.576117, 0.2119416, 0.2119416],
-                              [0.2119416, 0.2119416, 0.576117]]
-    expected_class_ids = [[0], [2]]
-    expected_classes = [[b'0'], [b'2']]
-    expected_export_classes = [[b'0', b'1', b'2']] * 2
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    self.assertItemsEqual(
-        (_DEFAULT_SERVING_KEY, 'predict', 'classification'),
-        spec.export_outputs.keys())
-
-    # Assert predictions and export_outputs.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(logits,
-                          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(
-          expected_probabilities,
-          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-      self.assertAllClose(expected_class_ids,
-                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-      self.assertAllEqual(expected_classes,
-                          predictions[prediction_keys.PredictionKeys.CLASSES])
-
-      self.assertAllClose(
-          expected_probabilities,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
-      self.assertAllEqual(
-          expected_export_classes,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
-
-  def test_predict_with_vocabulary_list(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
-
-    logits = [[1., 0., 0.], [0., 0., 1.]]
-    expected_classes = [[b'aang'], [b'zuko']]
-    expected_export_classes = [[b'aang', b'iroh', b'zuko']] * 2
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllEqual(
-          expected_classes,
-          sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES]))
-      self.assertAllEqual(
-          expected_export_classes,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
-
-  def test_weight_should_not_impact_prediction(self):
-    n_classes = 3
-    logits = [[1., 0., 0.], [0., 0., 1.]]
-    expected_probabilities = [[0.576117, 0.2119416, 0.2119416],
-                              [0.2119416, 0.2119416, 0.576117]]
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column='label_weights')
-
-    weights_2x1 = [[1.], [2.]]
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'label_weights': weights_2x1,
-        },
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(logits,
-                          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(
-          expected_probabilities,
-          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-
-  def test_eval_create_loss(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_training_loss = 10.
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_eval_create_loss_loss_fn(self):
-    """Tests head.create_loss for eval mode and custom loss_fn."""
-    loss = np.array([[1.], [2.]], dtype=np.float32)
-    logits_input = np.array([[-10., 10., 0.], [-15., 10., 0]], dtype=np.float32)
-    labels_input = np.array([[1], [2]], dtype=np.int64)
-    def _loss_fn(labels, logits):
-      check_labels = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
-          data=[labels])
-      check_logits = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
-          data=[logits])
-      with ops.control_dependencies([check_labels, check_logits]):
-        return constant_op.constant(loss)
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, loss_fn=_loss_fn)
-
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_input,
-        labels=labels_input)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
-
-  def test_eval_create_loss_loss_fn_wrong_shape(self):
-    """Tests custom loss_fn that returns Tensor of unexpected shape."""
-    loss = np.array([1., 2.], dtype=np.float32)
-    def _loss_fn(labels, logits):
-      del labels, logits  # Unused
-      return constant_op.constant(loss)
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, loss_fn=_loss_fn)
-
-    logits = np.array([[-10., 10., 0.], [-15., 10., 0.]], dtype=np.float32)
-    labels = np.array([[1], [2]], dtype=np.int64)
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
-          r'\[logits_shape: \] \[2 3\] \[loss_shape: \] \[2\]'):
-        actual_training_loss.eval()
-
-  def test_eval_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3)
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
-          labels=None)
-
-  def test_eval(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss / 2,
-        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval()
-                             for k in value_ops},
-          rtol=tol,
-          atol=tol)
-
-  def test_eval_metric_ops_with_head_name(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, name='some_multiclass_head')
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    expected_metric_keys = [
-        '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
-        '{}/some_multiclass_head'.format(metric_keys.MetricKeys.ACCURACY)
-    ]
-    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
-
-  def test_eval_with_regularization_losses(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
-    #                    = sum(10, 0) / 2 = 5.
-    expected_unregularized_loss = 5.
-    expected_regularized_loss = (
-        expected_unregularized_loss + expected_regularization_loss)
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels,
-        regularization_losses=regularization_losses)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_unregularized_loss,
-        keys.LOSS_REGULARIZATION: expected_regularization_loss,
-        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
-    }
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_regularized_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval()
-                             for k in value_ops},
-          rtol=tol,
-          atol=tol)
-
-  def test_eval_with_label_vocabulary_create_loss(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
-    logits = [[10., 0, 0], [0, 10, 0]]
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_training_loss = 10.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_eval_with_label_vocabulary(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
-
-    logits = [[10., 0, 0], [0, 10, 0]]
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss / 2,
-        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
-    }
-
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol, atol=tol)
-
-  def test_weighted_multi_example_eval(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
-    labels = np.array(((1,), (2,), (2,)), dtype=np.int64)
-    weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64)
-    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
-    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
-    expected_loss = 30.
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,),), dtype=np.int32),
-            'label_weights': weights_3x1,
-        },
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss / np.sum(weights_3x1),
-        # Weighted accuracy is 1 * 3.0 / sum weights = 0.5
-        keys.ACCURACY: 0.5,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert loss, and metrics.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol, atol=tol)
-
-  def test_train_create_loss(self):
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
-    expected_unreduced_loss = [[10.], [0.]]
-    # Weights default to 1.
-    expected_weights = 1.
-    # training_loss = 1 * 10 + 1 * 0
-    expected_training_loss = 10.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-2
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_train_create_loss_loss_reduction(self):
-    """Tests create_loss with loss_reduction."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
-    expected_unreduced_loss = [[10.], [0.]]
-    # Weights default to 1.
-    expected_weights = 1.
-    # training_loss = 1 * 10 + 1 * 0 / num_nonzero_weights
-    expected_training_loss = 10. / 2.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-2
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_train_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
-          labels=None,
-          train_op_fn=_no_op_train_fn)
-
-  def test_train(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-      }, summary_str, tol)
-
-  def test_train_with_optimizer(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    expected_train_result = 'my_train_op'
-
-    class _Optimizer(object):
-
-      def minimize(self, loss, global_step):
-        del global_step
-        return string_ops.string_join(
-            [constant_op.constant(expected_train_result),
-             string_ops.as_string(loss, precision=2)])
-
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        optimizer=_Optimizer())
-
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def test_train_with_update_ops(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
-
-    with ops.Graph().as_default():
-      w = variables.Variable(1)
-      update_op = w.assign_add(1)
-      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
-
-      t = variables.Variable('')
-      expected_train_result = b'my_train_op'
-      def _train_op_fn(loss):
-        del loss
-        return t.assign(expected_train_result)
-
-      spec = head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
-          labels=np.array(((1,), (1,)), dtype=np.int64),
-          train_op_fn=_train_op_fn)
-
-      with self.cached_session() as sess:
-        _initialize_variables(self, spec.scaffold)
-        sess.run(spec.train_op)
-        w_value, t_value = sess.run([w, t])
-        self.assertEqual(2, w_value)
-        self.assertEqual(expected_train_result, t_value)
-
-  def test_train_summaries_with_head_name(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, name='some_multiclass_head')
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      summary_str = sess.run(spec.scaffold.summary_op)
-      _assert_simple_summaries(self, {
-          '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS):
-              expected_loss,
-          '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN):
-              expected_loss / 2,
-      }, summary_str, tol)
-
-  def test_train_with_regularization_losses(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-
-    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
-    #                    = sum(10, 0) / 2 = 5.
-    # loss = unregularized_loss + regularization_loss = 7.
-    expected_loss = 7.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn,
-        regularization_losses=regularization_losses)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
-              expected_regularization_loss),
-      }, summary_str, tol)
-
-  def test_train_one_dim_create_loss(self):
-    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='label_weights')
-
-    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
-    labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
-    weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64)
-    features = {
-        'x': np.array(((42,),), dtype=np.float32),
-        'label_weights': weights_rank_1
-    }
-
-    # unreduced_loss = cross_entropy(labels, logits) = [10, 10, 0].
-    expected_unreduced_loss = [[10.], [10.], [0.]]
-    # weights are reshaped to [3, 1] to match logits.
-    expected_weights = [[1.], [2.], [3.]]
-    # training_loss = 1 * 10 + 2 * 10 + 3 * 0 = 30.
-    expected_training_loss = 30.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1)
-    tol = 1e-2
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_train_one_dim(self):
-    """Tests train with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='label_weights')
-
-    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
-    labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
-    weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64)
-
-    self.assertEqual((3,), labels_rank_1.shape)
-    self.assertEqual((3,), weights_rank_1.shape)
-
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
-    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
-    expected_loss = 30.
-
-    features = {
-        'x': np.array(((42,),), dtype=np.float32),
-        'label_weights': weights_rank_1
-    }
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS_MEAN: (
-              expected_loss / np.sum(weights_rank_1)),
-      }, summary_str, tol)
-
-  def test_train_with_vocabulary_create_loss(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
-
-    logits = [[10., 0, 0], [0, 10, 0]]
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # loss = cross_entropy(labels, logits) = [10, 0].
-    expected_training_loss = 10.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_train_with_vocabulary(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
-
-    logits = [[10., 0, 0], [0, 10, 0]]
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
-    expected_loss = 10.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss = sess.run(spec.loss)
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-
-  def test_weighted_multi_example_train(self):
-    n_classes = 3
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes, weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
-    labels = np.array(((1,), (2,), (2,)), dtype=np.int64)
-    weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64)
-    expected_train_result = 'my_train_op'
-    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
-    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
-    expected_loss = 30.
-
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,),), dtype=np.float32),
-            'label_weights': weights_3x1,
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss mean = sum(cross_entropy(labels, logits) * [1,2,3]) / (1+2+3)
-          #      = sum([10, 10, 0] * [1, 2, 3]) / 6 = 30 / 6
-          metric_keys.MetricKeys.LOSS_MEAN:
-              expected_loss / np.sum(weights_3x1),
-      }, summary_str, tol)
-
-  def test_multi_dim_weighted_train_create_loss(self):
-    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='weights')
-
-    logits = np.array([[[10, 0, 0], [12, 0, 0]],
-                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
-    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-
-    # unreduced_loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
-    expected_unreduced_loss = [[[0.], [12.]], [[0.], [15.]]]
-    # weights are reshaped to [2, 2, 1] to match logits.
-    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
-    # training_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
-    expected_training_loss = 55.5
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-2
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_multi_dim_weighted_train(self):
-    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='weights')
-
-    logits = np.array([[[10, 0, 0], [12, 0, 0]],
-                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
-    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
-    # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
-    expected_loss = 55.5
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def test_multi_dim_train_weights_wrong_inner_dim(self):
-    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 1]."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='weights')
-    logits = np.array([[[10, 0, 0], [12, 0, 0]],
-                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
-    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
-    weights = np.array([[1.], [2.]], dtype=np.float32)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
-        spec.loss.eval()
-
-  def test_multi_dim_train_weights_wrong_outer_dim(self):
-    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3]."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='weights')
-    logits = np.array([[[10, 0, 0], [12, 0, 0]],
-                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
-    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
-    weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]],
-                        [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]])
-    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights_placeholder},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'):
-        spec.loss.eval({weights_placeholder: weights})
-
-  def test_multi_dim_weighted_eval(self):
-    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
-    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
-        n_classes=3, weight_column='weights')
-    logits = np.array([[[10, 0, 0], [12, 0, 0]],
-                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
-    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
-    # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
-    expected_loss = 55.5
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss / np.sum(weights),
-        keys.ACCURACY: (1.*1. + 1.5*0. + 2.*1. + 2.5*0.) / np.sum(weights),
-    }
-
-    # Assert predictions, loss, and metrics.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol, atol=tol)
-
-
-class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_threshold_too_small(self):
-    with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          thresholds=(0., 0.5))
-
-  def test_threshold_too_large(self):
-    with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          thresholds=(0.5, 1.))
-
-  def test_invalid_loss_reduction(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_reduction='invalid_loss_reduction')
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: none'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_reduction=losses.Reduction.NONE)
-
-  def test_loss_fn_arg_labels_missing(self):
-    def _loss_fn(logits):
-      del logits  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: labels\. '
-        r'Given arguments: \(\'logits\',\)'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_logits_missing(self):
-    def _loss_fn(labels):
-      del labels  # unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: logits\. '
-        r'Given arguments: \(\'labels\',\)'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_features_ok(self):
-    def _loss_fn(labels, logits, features):
-      del labels, logits, features  # Unused
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_invalid(self):
-    def _loss_fn(labels, logits, name=None):
-      del labels, logits, name  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn has unexpected args: \[\'name\'\]'):
-      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-          loss_fn=_loss_fn)
-
-  def test_invalid_logits_shape(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Logits should be shape (batch_size, 1).
-    logits_2x2 = np.array(((45., 44.), (41., 42.),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'logits shape'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42.,),))},
-          mode=model_fn.ModeKeys.PREDICT,
-          logits=logits_2x2)
-
-    # Dynamic shape.
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),))},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits_placeholder)
-    with self.cached_session():
-      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
-        spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval({
-            logits_placeholder: logits_2x2
-        })
-
-  def test_invalid_labels_shape(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Labels and logits should be shape (batch_size, 1).
-    labels_2x2 = np.array(((45., 44.), (41., 42.),))
-    logits_2x1 = np.array(((45.,), (41.,),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
-      head.create_loss(
-          features={'x': np.array(((42.,),))},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=logits_2x1,
-          labels=labels_2x2)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    training_loss = head.create_loss(
-        features={'x': np.array(((42.,),))},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
-        training_loss.eval({
-            logits_placeholder: logits_2x1,
-            labels_placeholder: labels_2x2
-        })
-
-  def test_incompatible_labels_shape(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Both logits and labels should be shape (batch_size, 1).
-    values_2x1 = np.array(((0.,), (1.,),))
-    values_3x1 = np.array(((0.,), (1.,), (0.,),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(
-        ValueError, 'logits and labels must have the same shape'):
-      head.create_loss(
-          features={'x': values_2x1},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=values_2x1,
-          labels=values_3x1)
-    with self.assertRaisesRegexp(
-        ValueError, 'logits and labels must have the same shape'):
-      head.create_loss(
-          features={'x': values_2x1},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=values_3x1,
-          labels=values_2x1)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    training_loss = head.create_loss(
-        features={'x': values_2x1},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[3 1\] \[labels_shape: \] \[2 1\]'):
-        training_loss.eval({
-            labels_placeholder: values_2x1,
-            logits_placeholder: values_3x1
-        })
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
-        training_loss.eval({
-            labels_placeholder: values_3x1,
-            logits_placeholder: values_2x1
-        })
-
-  def test_name(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        name='foo')
-    self.assertEqual('foo', head.name)
-
-  def test_predict(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = [[0.3], [-0.4]]
-    expected_logistics = [[0.574443], [0.401312]]
-    expected_probabilities = [[0.425557, 0.574443], [0.598688, 0.401312]]
-    expected_class_ids = [[1], [0]]
-    expected_classes = [[b'1'], [b'0']]
-    expected_export_classes = [[b'0', b'1']] * 2
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert spec contains expected tensors.
-    self.assertIsNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNone(spec.train_op)
-    self.assertItemsEqual(('classification', 'regression', 'predict',
-                           _DEFAULT_SERVING_KEY), spec.export_outputs.keys())
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(logits,
-                          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(expected_logistics,
-                          predictions[prediction_keys.PredictionKeys.LOGISTIC])
-      self.assertAllClose(
-          expected_probabilities,
-          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-      self.assertAllClose(expected_class_ids,
-                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-      self.assertAllEqual(expected_classes,
-                          predictions[prediction_keys.PredictionKeys.CLASSES])
-      self.assertAllClose(
-          expected_probabilities,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
-      self.assertAllEqual(
-          expected_export_classes,
-          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
-      self.assertAllClose(expected_logistics,
-                          sess.run(spec.export_outputs['regression'].value))
-
-  def test_predict_with_vocabulary_list(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        label_vocabulary=['aang', 'iroh'])
-
-    logits = [[1.], [0.]]
-    expected_classes = [[b'iroh'], [b'aang']]
-
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllEqual(
-          expected_classes,
-          sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES]))
-
-  def test_eval_create_loss(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    # loss = cross_entropy(labels, logits) = [0, 41].
-    expected_training_loss = 41.
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_eval_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=np.array(((45,), (-41,),), dtype=np.float32),
-          labels=None)
-
-  def test_eval(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
-        # loss_mean = loss/2 = 41./2 = 20.5
-        keys.LOSS_MEAN: 20.5,
-        keys.ACCURACY: 1./2,
-        keys.PRECISION: 1.,
-        keys.RECALL: 1./2,
-        keys.PREDICTION_MEAN: 1./2,
-        keys.LABEL_MEAN: 2./2,
-        keys.ACCURACY_BASELINE: 2./2,
-        keys.AUC: 0.,
-        keys.AUC_PR: 1.,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(41., loss)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
-
-  def test_eval_metric_ops_with_head_name(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        name='some_binary_head')
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    expected_metric_keys = [
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.PRECISION),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.RECALL),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.PREDICTION_MEAN),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.LABEL_MEAN),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY_BASELINE),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC),
-        '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC_PR),
-    ]
-    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
-
-  def test_eval_with_regularization_losses(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
-    #                    = sum(0, 41) / 2 = 20.5
-    expected_unregularized_loss = 20.5
-    expected_regularized_loss = (
-        expected_unregularized_loss + expected_regularization_loss)
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels,
-        regularization_losses=regularization_losses)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_unregularized_loss,
-        keys.LOSS_REGULARIZATION: expected_regularization_loss,
-        keys.ACCURACY: 1./2,
-        keys.PRECISION: 1.,
-        keys.RECALL: 1./2,
-        keys.PREDICTION_MEAN: 1./2,
-        keys.LABEL_MEAN: 2./2,
-        keys.ACCURACY_BASELINE: 2./2,
-        keys.AUC: 0.,
-        keys.AUC_PR: 1.,
-    }
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_regularized_loss, loss)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
-
-  def test_eval_with_vocabulary_list_create_loss(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        label_vocabulary=['aang', 'iroh'])
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(41., training_loss.eval())
-
-  def test_eval_with_vocabulary_list(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        label_vocabulary=['aang', 'iroh'])
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = [[b'iroh'], [b'iroh']]
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      sess.run(update_ops)
-      self.assertAllClose(1. / 2,
-                          value_ops[metric_keys.MetricKeys.ACCURACY].eval())
-
-  def test_eval_with_thresholds_create_loss(self):
-    thresholds = [0.25, 0.5, 0.75]
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        thresholds=thresholds)
-    logits = np.array(((-1,), (1,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # probabilities[i] = 1/(1 + exp(-logits[i])) =>
-    # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731]
-    # loss = -ln(probabilities[label[i]])) = [-ln(0.269), -ln(0.731)]
-    #      = [1.31304389, 0.31334182]
-    # weighted sum loss = 1.62638571
-    expected_training_loss = 1.62638571
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_eval_with_thresholds(self):
-    thresholds = [0.25, 0.5, 0.75]
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        thresholds=thresholds)
-    logits = np.array(((-1,), (1,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    # probabilities[i] = 1/(1 + exp(-logits[i])) =>
-    # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731]
-    # loss = -sum(ln(probabilities[label[i]])) = -ln(0.269) -ln(0.731)
-    #      = 1.62652338
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: 1.62652338 / 2.,
-        keys.ACCURACY: 1./2,
-        keys.PRECISION: 1.,
-        keys.RECALL: .5,
-        keys.PREDICTION_MEAN: 1./2,
-        keys.LABEL_MEAN: 2./2,
-        keys.ACCURACY_BASELINE: 2./2,
-        keys.AUC: 0.,
-        keys.AUC_PR: 1.,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1.,
-        keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1.,
-        keys.RECALL_AT_THRESHOLD % thresholds[0]: 1.,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[1]: .5,
-        keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1.,
-        keys.RECALL_AT_THRESHOLD % thresholds[1]: .5,
-        keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 0.,
-        keys.PRECISION_AT_THRESHOLD % thresholds[2]: 0.,
-        keys.RECALL_AT_THRESHOLD % thresholds[2]: 0.,
-    }
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(1.62652338, loss)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval()
-                             for k in value_ops},
-          atol=tol,
-          rtol=tol)
-
-  def test_train_create_loss(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
-    expected_unreduced_loss = [[0.], [41.]]
-    # weights default to 1.
-    expected_weights = 1.
-    # training loss = 1 * 0 + 1 * 41
-    expected_training_loss = 41.
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_train_create_loss_loss_reduction(self):
-    """Tests create_loss with loss_reduction."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
-    expected_unreduced_loss = [[0.], [41.]]
-    # weights default to 1.
-    expected_weights = 1.
-    # training loss = (1 * 0 + 1 * 41) / num_nonzero_weights
-    expected_training_loss = 41. / 2.
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_eval_create_loss_loss_fn(self):
-    """Tests head.create_loss for eval mode and custom loss_fn."""
-    loss = np.array([[1.], [2.]], dtype=np.float32)
-    logits_input = np.array([[-10.], [10.]], dtype=np.float32)
-    labels_input = np.array([[1], [0]], dtype=np.int64)
-    def _loss_fn(labels, logits):
-      check_labels = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
-          data=[labels])
-      check_logits = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
-          data=[logits])
-      with ops.control_dependencies([check_labels, check_logits]):
-        return constant_op.constant(loss)
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_fn=_loss_fn)
-
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_input,
-        labels=labels_input)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
-
-  def test_eval_create_loss_loss_fn_wrong_shape(self):
-    """Tests custom loss_fn that returns Tensor of unexpected shape."""
-    loss = np.array([1., 2.], dtype=np.float32)
-    def _loss_fn(labels, logits):
-      del labels, logits  # Unused
-      return constant_op.constant(loss)
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_fn=_loss_fn)
-
-    logits = np.array([[-10.], [10.]], dtype=np.float32)
-    labels = np.array([[1], [0]], dtype=np.int64)
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
-          r'\[logits_shape: \] \[2 1\] \[loss_shape: \] \[2\]'):
-        actual_training_loss.eval()
-
-  def test_train_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((45,), (-41,),), dtype=np.float32),
-          labels=None,
-          train_op_fn=_no_op_train_fn)
-
-  def test_train(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
-    expected_loss = 41.
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/2 = 41/2 = 20.5
-          metric_keys.MetricKeys.LOSS_MEAN: 20.5,
-      }, summary_str)
-
-  def test_train_with_optimizer(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
-    expected_loss = 41.
-
-    class _Optimizer(object):
-
-      def minimize(self, loss, global_step):
-        del global_step
-        with ops.control_dependencies((check_ops.assert_equal(
-            math_ops.to_float(expected_loss), math_ops.to_float(loss),
-            name='assert_loss'),)):
-          return constant_op.constant(expected_train_result)
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        optimizer=_Optimizer())
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-
-  def test_train_with_update_ops(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    with ops.Graph().as_default():
-      w = variables.Variable(1)
-      update_op = w.assign_add(1)
-      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
-
-      t = variables.Variable('')
-      expected_train_result = b'my_train_op'
-      def _train_op_fn(loss):
-        del loss
-        return t.assign(expected_train_result)
-
-      spec = head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((45,), (-41,),), dtype=np.float32),
-          labels=np.array(((1,), (1,),), dtype=np.float64),
-          train_op_fn=_train_op_fn)
-
-      with self.cached_session() as sess:
-        _initialize_variables(self, spec.scaffold)
-        sess.run(spec.train_op)
-        w_value, t_value = sess.run([w, t])
-        self.assertEqual(2, w_value)
-        self.assertEqual(expected_train_result, t_value)
-
-  def test_train_summaries_with_head_name(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        name='some_binary_head')
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
-    expected_loss = 41.
-
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    # Assert summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      summary_str = sess.run(spec.scaffold.summary_op)
-      _assert_simple_summaries(
-          self,
-          {
-              '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS):
-                  expected_loss,
-              # loss_mean = loss/2 = 41/2 = 20.5
-              '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN):
-                  20.5,
-          },
-          summary_str)
-
-  def test_train_with_regularization_losses(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-
-    logits = np.array(((45,), (-41,),), dtype=np.float32)
-    labels = np.array(((1,), (1,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
-    #                    = sum(0, 41) / 2 = 20.5
-    # loss = unregularized_loss + regularization_loss = 7.
-    expected_loss = 22.5
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn,
-        regularization_losses=regularization_losses)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
-                                                  spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
-              expected_regularization_loss),
-      }, summary_str)
-
-  def test_float_labels_invalid_values(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
-    labels = np.array([[1.2], [0.4]], dtype=np.float32)
-    features = {'x': np.array([[42]], dtype=np.float32)}
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    with self.assertRaisesRegexp(
-        errors.InvalidArgumentError,
-        r'Labels must <= n_classes - 1'):
-      with self.cached_session():
-        _initialize_variables(self, monitored_session.Scaffold())
-        training_loss.eval()
-
-  def test_float_labels_train_create_loss(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
-    labels = np.array([[0.8], [0.4]], dtype=np.float32)
-    features = {'x': np.array([[42]], dtype=np.float32)}
-    # loss = cross_entropy(labels, logits)
-    #      = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])
-    #      = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)),
-    #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
-    #      = [0.57407698418, 0.67435524446]
-    # weighted sum loss = 0.57407698418 + 0.67435524446
-    expected_training_loss = 1.24843222864
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_float_labels_train(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
-    labels = np.array([[0.8], [0.4]], dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array([[42]], dtype=np.float32)}
-    # loss = sum(cross_entropy(labels, logits))
-    #      = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]))
-    #      = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5))
-    #        -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))
-    #      = 1.2484322
-    expected_loss = 1.2484322
-    def _train_op_fn(loss):
-      with ops.control_dependencies((dnn_testing_utils.assert_close(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss)),)):
-        return constant_op.constant(expected_train_result)
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAlmostEqual(expected_loss, loss, delta=1.e-5)
-      self.assertEqual(expected_train_result, train_result)
-
-  def test_float_labels_eval_create_loss(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
-    labels = np.array([[0.8], [0.4]], dtype=np.float32)
-    features = {'x': np.array([[42]], dtype=np.float32)}
-    # loss = cross_entropy(labels, logits)
-    #      = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])
-    #      = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)),
-    #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
-    #      = [0.57407698418, 0.67435524446]
-    # weighted sum loss = 0.57407698418 + 0.67435524446
-    expected_training_loss = 1.24843222864
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
-
-  def test_float_labels_eval(self):
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
-
-    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
-    labels = np.array([[0.8], [0.4]], dtype=np.float32)
-    features = {'x': np.array([[42]], dtype=np.float32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    # loss = sum(cross_entropy(labels, logits))
-    #      = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]))
-    #      = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5))
-    #        -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))
-    #      = 1.2484322
-    expected_loss = 1.2484322
-
-    # Assert loss.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAlmostEqual(expected_loss, loss, delta=1.e-5)
-      self.assertAlmostEqual(
-          expected_loss / 2., metrics[metric_keys.MetricKeys.LOSS_MEAN])
-
-  def test_weighted_multi_example_predict(self):
-    """3 examples, 1 batch."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((45,), (-41,), (44,)), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
-            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      predictions = sess.run(spec.predictions)
-      self.assertAllClose(
-          logits.astype(np.float32),
-          predictions[prediction_keys.PredictionKeys.LOGITS])
-      self.assertAllClose(
-          _sigmoid(logits).astype(np.float32),
-          predictions[prediction_keys.PredictionKeys.LOGISTIC])
-      self.assertAllClose(
-          [[0., 1.], [1., 0.],
-           [0., 1.]], predictions[prediction_keys.PredictionKeys.PROBABILITIES])
-      self.assertAllClose([[1], [0], [1]],
-                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
-      self.assertAllEqual([[b'1'], [b'0'], [b'1']],
-                          predictions[prediction_keys.PredictionKeys.CLASSES])
-
-  def test_weighted_multi_example_eval(self):
-    """3 examples, 1 batch."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((45,), (-41,), (44,)), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
-            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=np.array(((1,), (1,), (0,)), dtype=np.int32))
-
-    # label_mean = (1*1 + .1*1 + 1.5*0)/(1 + .1 + 1.5) = 1.1/2.6
-    #            = .42307692307
-    expected_label_mean = .42307692307
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        # losses = label_weights*cross_entropy(labels, logits)
-        #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
-        # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
-        # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
-        #           = 70.1/2.6 = 26.9615384615
-        keys.LOSS_MEAN: 26.9615384615,
-        # accuracy = (1*1 + .1*0 + 1.5*0)/(1 + .1 + 1.5) = 1/2.6 = .38461538461
-        keys.ACCURACY: .38461538461,
-        keys.PRECISION: 1./2.5,
-        keys.RECALL: 1./1.1,
-        # prediction_mean = (1*1 + .1*0 + 1.5*1)/(1 + .1 + 1.5) = 2.5/2.6
-        #                 = .96153846153
-        keys.PREDICTION_MEAN: .96153846153,
-        keys.LABEL_MEAN: expected_label_mean,
-        keys.ACCURACY_BASELINE: 1 - expected_label_mean,
-        keys.AUC: .45454565,
-        keys.AUC_PR: .6737757325172424,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(70.1, loss)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
-
-  def test_train_one_dim_create_loss(self):
-    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
-    labels_rank_1 = np.array((1., 1., 0.,))
-    weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64)
-    features = {
-        'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
-        'label_weights': weights_rank_1,
-    }
-    # unreduced_loss = cross_entropy(labels, logits) = [0, 41, 44]
-    expected_unreduced_loss = [[0.], [41.], [44.]]
-    # weights are reshaped to [3, 1] to match logits.
-    expected_weights = [[1.], [.1], [1.5]]
-    # training loss = 1 * 0 + .1 * 41 + 1.5 * 44
-    expected_training_loss = 70.1
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(),
-          rtol=1e-2, atol=1e-2)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(),
-          rtol=1e-2, atol=1e-2)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_train_one_dim(self):
-    """Tests train with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
-    labels_rank_1 = np.array((1., 1., 0.,))
-    weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64)
-    self.assertEqual((3,), labels_rank_1.shape)
-    self.assertEqual((3,), weights_rank_1.shape)
-    features = {
-        'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
-        'label_weights': weights_rank_1,
-    }
-    expected_train_result = b'my_train_op'
-    # losses = label_weights*cross_entropy(labels, logits)
-    #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
-    # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
-    expected_loss = 70.1
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1,
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertIsNotNone(spec.train_op)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((
-          spec.loss, spec.train_op, spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
-          #           = 70.1/2.6 = 26.9615384615
-          metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615,
-      }, summary_str)
-
-  def test_weighted_multi_example_train(self):
-    """3 examples, 1 batch."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='label_weights')
-
-    # Create estimator spec.
-    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    # losses = label_weights*cross_entropy(labels, logits)
-    #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
-    # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
-    expected_loss = 70.1
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
-            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64),
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=np.array(((1.,), (1.,), (0.,))),
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    self.assertIsNotNone(spec.loss)
-    self.assertIsNotNone(spec.train_op)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      loss, train_result, summary_str = sess.run((
-          spec.loss, spec.train_op, spec.scaffold.summary_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
-          #           = 70.1/2.6 = 26.9615384615
-          metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615,
-      }, summary_str)
-
-  def test_multi_dim_weighted_train_create_loss(self):
-    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='weights')
-
-    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
-    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # unreduced_loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
-    expected_unreduced_loss = [[[10.], [0.]], [[0.], [12.]]]
-    # Weights are reshaped to [2, 2, 1] to match logits.
-    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
-    # training_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
-    expected_training_loss = 40.
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    tol = 1e-2
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(
-          expected_training_loss, training_loss.eval(),
-          rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_unreduced_loss, unreduced_loss.eval(),
-          rtol=tol, atol=tol)
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_multi_dim_weighted_train(self):
-    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='weights')
-
-    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
-    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
-    # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
-    expected_loss = 40.
-    expected_train_result = 'my_train_op'
-    def _train_op_fn(loss):
-      return string_ops.string_join(
-          [constant_op.constant(expected_train_result),
-           string_ops.as_string(loss, precision=2)])
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert predictions, loss, train_op, and summaries.
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      self.assertEqual(
-          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
-          train_result)
-
-  def test_multi_dim_train_weights_wrong_inner_dim(self):
-    """Logits and labels of shape [2, 2, 1], weights [2, 1]."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='weights')
-
-    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
-    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
-    weights = np.array([[1.], [2.]], dtype=np.float32)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \] \[2 2 1\] \[weights_shape: \] \[2 1\]'):
-        spec.loss.eval()
-
-  def test_multi_dim_train_weights_wrong_outer_dim(self):
-    """Logits and labels of shape [2, 2, 1], weights [2, 2, 2]."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='weights')
-
-    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
-    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
-    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features={'weights': weights_placeholder},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \]\s\[2 2 1\]\s\[weights_shape: \]\s\[2 2 2\]'):
-        spec.loss.eval({
-            weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]],
-                                           [[2., 2.1], [2.5, 2.6]]])})
-
-  def test_multi_dim_weighted_eval(self):
-    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
-    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        weight_column='weights')
-
-    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
-    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
-    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
-    # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
-    # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
-    expected_loss = 40.
-
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features={'weights': weights},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_loss / np.sum(weights),
-        keys.ACCURACY: (1.*0. + 1.5*1. + 2.*1. + 2.5*0.) / np.sum(weights),
-        keys.PRECISION: 2.0/3.0,
-        keys.RECALL: 2.0/4.5,
-        keys.PREDICTION_MEAN: (1.*1 + 1.5*0 + 2.*1 + 2.5*0) / np.sum(weights),
-        keys.LABEL_MEAN: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights),
-        keys.ACCURACY_BASELINE: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights),
-        # We cannot reliably calculate AUC with only 4 data points, but the
-        # values should not change because of backwards-compatibility.
-        keys.AUC: 0.5222,
-        keys.AUC_PR: 0.7341,
-    }
-
-    tol = 1e-2
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      loss, metrics = sess.run((spec.loss, update_ops))
-      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
-          rtol=tol, atol=tol)
-
-
-class RegressionHead(test.TestCase):
-
-  def setUp(self):
-    ops.reset_default_graph()
-
-  def test_invalid_label_dimension(self):
-    with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'):
-      head_lib._regression_head(label_dimension=-1)
-    with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'):
-      head_lib._regression_head(label_dimension=0)
-
-  def test_invalid_loss_reduction(self):
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
-      head_lib._regression_head(loss_reduction='invalid_loss_reduction')
-    with self.assertRaisesRegexp(
-        ValueError, r'Invalid loss_reduction: none'):
-      head_lib._regression_head(loss_reduction=losses.Reduction.NONE)
-
-  def test_loss_fn_arg_labels_missing(self):
-    def _loss_fn(logits):
-      del logits  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: labels\. '
-        r'Given arguments: \(\'logits\',\)'):
-      head_lib._regression_head(loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_logits_missing(self):
-    def _loss_fn(labels):
-      del labels  # unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn must contain argument: logits\. '
-        r'Given arguments: \(\'labels\',\)'):
-      head_lib._regression_head(loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_features_ok(self):
-    def _loss_fn(labels, logits, features):
-      del labels, logits, features  # Unused
-      head_lib._regression_head(loss_fn=_loss_fn)
-
-  def test_loss_fn_arg_invalid(self):
-    def _loss_fn(labels, logits, name=None):
-      del labels, logits, name  # Unused
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'loss_fn has unexpected args: \[\'name\'\]'):
-      head_lib._regression_head(loss_fn=_loss_fn)
-
-  def test_invalid_logits(self):
-    head = head_lib._regression_head(label_dimension=3)
-    self.assertEqual(3, head.logits_dimension)
-    logits_1d = np.array(((45.,), (41.,),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'logits shape'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42.,),))},
-          mode=model_fn.ModeKeys.PREDICT,
-          logits=logits_1d)
-
-    # Dynamic shape.
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),))},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits_placeholder)
-    with self.cached_session():
-      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
-        spec.predictions[prediction_keys.PredictionKeys.PREDICTIONS].eval({
-            logits_placeholder: logits_1d
-        })
-
-  def test_incompatible_labels_eval(self):
-    head = head_lib._regression_head(label_dimension=3)
-    self.assertEqual(3, head.logits_dimension)
-    values_3d = np.array(((45., 46., 47.), (41., 42., 43.),))
-    values_1d = np.array(((43.,), (44.,),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
-      head.create_loss(
-          features={'x': values_1d},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=values_3d,
-          labels=values_1d)
-    with self.assertRaisesRegexp(ValueError, 'logits shape'):
-      head.create_estimator_spec(
-          features={'x': values_3d}, labels=values_3d,
-          mode=model_fn.ModeKeys.EVAL, logits=values_1d, train_op_fn=None)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    spec = head.create_estimator_spec(
-        features={'x': values_1d},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)
-    with self.cached_session():
-      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
-        spec.loss.eval({
-            labels_placeholder: values_3d,
-            logits_placeholder: values_1d
-        })
-    training_loss = head.create_loss(
-        features={'x': values_1d},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
-        training_loss.eval({
-            labels_placeholder: values_1d,
-            logits_placeholder: values_3d
-        })
-
-  def test_incompatible_labels_train(self):
-    head = head_lib._regression_head(label_dimension=3)
-    self.assertEqual(3, head.logits_dimension)
-    values_3d = np.array(((45., 46., 47.), (41., 42., 43.),))
-    values_1d = np.array(((43.,), (44.,),))
-
-    # Static shape.
-    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
-      head.create_loss(
-          features={'x': values_1d},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=values_3d,
-          labels=values_1d)
-
-    with self.assertRaisesRegexp(ValueError, 'logits shape'):
-      head.create_estimator_spec(
-          features={'x': values_3d},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=values_1d,
-          labels=values_3d,
-          train_op_fn=lambda x: x)
-
-    # Dynamic shape.
-    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    spec = head.create_estimator_spec(
-        features={'x': values_1d},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits_placeholder,
-        labels=labels_placeholder,
-        train_op_fn=lambda x: x)
-    with self.cached_session():
-      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
-        spec.loss.eval({
-            labels_placeholder: values_3d,
-            logits_placeholder: values_1d
-        })
-    training_loss = head.create_loss(
-        features={'x': values_1d},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits_placeholder,
-        labels=labels_placeholder)[0]
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
-        training_loss.eval({
-            labels_placeholder: values_1d,
-            logits_placeholder: values_3d
-        })
-
-  def test_name(self):
-    head = head_lib._regression_head(name='foo')
-    self.assertEqual('foo', head.name)
-
-  def test_predict(self):
-    head = head_lib._regression_head()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertIsNone(spec.loss)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNone(spec.train_op)
-    default_serving_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-    self.assertItemsEqual(
-        (default_serving_key, 'predict', 'regression'),
-        spec.export_outputs.keys())
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions.
-    with self.cached_session():
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllClose(logits, spec.predictions[prediction_key].eval())
-      self.assertAllClose(
-          logits, spec.export_outputs[default_serving_key].value.eval())
-      self.assertAllClose(
-          logits, spec.export_outputs['regression'].value.eval())
-      self.assertAllClose(
-          logits, spec.export_outputs['predict'].outputs['predictions'].eval())
-
-  def test_predict_with_inverse_link_fn(self):
-    def _inverse_link_fn(logits):
-      return logits - 10.
-    head = head_lib._regression_head(inverse_link_fn=_inverse_link_fn)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.int32)
-    expected_predictions = np.array(((35,), (31,),), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={'x': np.array(((42.,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.PREDICT,
-        logits=logits)
-
-    # Assert spec contains expected tensors.
-    keys = prediction_keys.PredictionKeys
-    self.assertItemsEqual(
-        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
-    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
-    default_serving_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-    self.assertItemsEqual(
-        (default_serving_key, 'predict', 'regression'),
-        spec.export_outputs.keys())
-
-    # Assert predictions.
-    with self.cached_session():
-      _initialize_variables(self, spec.scaffold)
-      self.assertAllClose(
-          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
-      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
-      self.assertAllClose(
-          expected_predictions,
-          spec.export_outputs[default_serving_key].value.eval())
-      self.assertAllClose(
-          expected_predictions, spec.export_outputs['regression'].value.eval())
-      self.assertAllClose(
-          expected_predictions,
-          spec.export_outputs['predict'].outputs['predictions'].eval())
-      self.assertAllClose(
-          logits, spec.export_outputs['predict'].outputs['logits'].eval())
-
-  def test_eval_create_loss(self):
-    head = head_lib._regression_head()
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43,), (44,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      # loss = [(43-45)^2, (44-41)] = [4, 9]
-      self.assertAllClose(13., training_loss.eval())
-
-  def test_eval_create_loss_loss_fn(self):
-    """Tests head.create_loss for eval mode and custom loss_fn."""
-    loss = np.array([[0., 1.], [2., 3.]], dtype=np.float32)
-    logits_input = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32)
-    labels_input = np.array([[1., 0.], [2., -1.]], dtype=np.float32)
-    def _loss_fn(labels, logits):
-      check_labels = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
-          data=[labels])
-      check_logits = control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
-          data=[logits])
-      with ops.control_dependencies([check_labels, check_logits]):
-        return constant_op.constant(loss)
-    head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn)
-
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits_input,
-        labels=labels_input)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
-
-  def test_eval_create_loss_loss_fn_wrong_shape(self):
-    """Tests custom loss_fn that returns Tensor of unexpected shape."""
-    loss = np.array([[1.], [2.]], dtype=np.float32)
-    def _loss_fn(labels, logits):
-      del labels, logits  # Unused
-      return constant_op.constant(loss)
-    head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn)
-
-    logits = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32)
-    labels = np.array([[1., 0.], [2., -1.]], dtype=np.float32)
-    actual_training_loss = head.create_loss(
-        features={'x': np.array(((42,),), dtype=np.int32)},
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 2\]\. \] '
-          r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2 1\]'):
-        actual_training_loss.eval()
-
-  def test_eval_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._regression_head()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.EVAL,
-          logits=np.array(((45,), (41,),), dtype=np.float32),
-          labels=None)
-
-  def test_eval(self):
-    head = head_lib._regression_head()
-    self.assertEqual(1, head.logits_dimension)
-
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43,), (44,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
-                           metric_keys.MetricKeys.PREDICTION_MEAN,
-                           metric_keys.MetricKeys.LABEL_MEAN),
-                          spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
-          metric_keys.MetricKeys.LOSS_MEAN]
-      predictions, loss, loss_mean = sess.run((
-          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
-      self.assertAllClose(logits, predictions)
-      # loss = (43-45)^2 + (44-41)^2 = 4+9 = 13
-      self.assertAllClose(13., loss)
-      # loss_mean = loss/2 = 13/2 = 6.5
-      expected_loss_mean = 6.5
-      # Check results of both update (in `loss_mean`) and value ops.
-      self.assertAllClose(expected_loss_mean, loss_mean)
-      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
-
-  def test_eval_metric_ops_with_head_name_for_regression(self):
-    head = head_lib._regression_head(name='some_regression_head')
-    logits = np.array(((1,), (9,)), dtype=np.float32)
-    labels = np.array(((1,), (1,)), dtype=np.int64)
-    features = {'x': np.array(((42,),), dtype=np.int32)}
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    expected_metric_keys = [
-        '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
-        '{}/some_regression_head'.format(
-            metric_keys.MetricKeys.PREDICTION_MEAN),
-        '{}/some_regression_head'.format(metric_keys.MetricKeys.LABEL_MEAN),
-    ]
-    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
-
-  def test_eval_with_regularization_losses(self):
-    head = head_lib._regression_head(
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    self.assertEqual(1, head.logits_dimension)
-
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43,), (44,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size
-    #                    = (4 + 9) / 2 = 6.5
-    expected_unregularized_loss = 6.5
-    expected_regularized_loss = (
-        expected_unregularized_loss + expected_regularization_loss)
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels,
-        regularization_losses=regularization_losses)
-
-    keys = metric_keys.MetricKeys
-    expected_metrics = {
-        keys.LOSS_MEAN: expected_unregularized_loss,
-        keys.LOSS_REGULARIZATION: expected_regularization_loss,
-        keys.PREDICTION_MEAN: (45 + 41) / 2.0,
-        keys.LABEL_MEAN: (43 + 44) / 2.0,
-    }
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
-      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-      prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-      predictions, loss, metrics = sess.run((
-          spec.predictions[prediction_key], spec.loss, update_ops))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_regularized_loss, loss)
-      # Check results of both update (in `metrics`) and value ops.
-      self.assertAllClose(expected_metrics, metrics)
-      self.assertAllClose(
-          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
-
-  def test_train_create_loss(self):
-    head = head_lib._regression_head()
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43,), (44,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
-    expected_unreduced_loss = [[4.], [9.]]
-    # weights default to 1.
-    expected_weights = 1
-    # training_loss = 1 * 4 + 1 * 9 = 13
-    expected_training_loss = 13.
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_train_create_loss_loss_reduction(self):
-    """Tests create_loss with loss_reduction."""
-    head = head_lib._regression_head(
-        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43,), (44,),), dtype=np.int32)
-    features = {'x': np.array(((42,),), dtype=np.float32)}
-    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
-    expected_unreduced_loss = [[4.], [9.]]
-    # weights default to 1.
-    expected_weights = 1
-    # training_loss = (1 * 4 + 1 * 9) / num_nonzero_weights
-    expected_training_loss = 13. / 2.
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights)
-
-  def test_train_labels_none(self):
-    """Tests that error is raised when labels is None."""
-    head = head_lib._regression_head()
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    with self.assertRaisesRegexp(
-        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
-      head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((45,), (41,),), dtype=np.float32),
-          labels=None,
-          train_op_fn=_no_op_train_fn)
-
-  def test_train(self):
-    head = head_lib._regression_head()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43.,), (44.,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42.,),), dtype=np.float32)}
-    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
-    expected_loss = 13
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      predictions, loss, train_result, summary_str = sess.run((
-          spec.predictions[prediction_key], spec.loss, spec.train_op,
-          spec.scaffold.summary_op))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/2 = 13/2 = 6.5
-          metric_keys.MetricKeys.LOSS_MEAN: 6.5,
-      }, summary_str)
-
-  def test_train_with_optimizer(self):
-    head = head_lib._regression_head()
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43.,), (44.,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42.,),), dtype=np.float32)}
-    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
-    expected_loss = 13
-
-    class _Optimizer(object):
-
-      def minimize(self, loss, global_step):
-        del global_step
-        with ops.control_dependencies((check_ops.assert_equal(
-            math_ops.to_float(expected_loss), math_ops.to_float(loss),
-            name='assert_loss'),)):
-          return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        optimizer=_Optimizer())
-
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss, train_result = sess.run((spec.loss, spec.train_op))
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-
-  def test_train_with_update_ops(self):
-    head = head_lib._regression_head()
-
-    with ops.Graph().as_default():
-      w = variables.Variable(1)
-      update_op = w.assign_add(1)
-      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
-
-      t = variables.Variable('')
-      expected_train_result = b'my_train_op'
-      def _train_op_fn(loss):
-        del loss
-        return t.assign(expected_train_result)
-
-      spec = head.create_estimator_spec(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=np.array(((45,), (41,),), dtype=np.float32),
-          labels=np.array(((43.,), (44.,),), dtype=np.float64),
-          train_op_fn=_train_op_fn)
-
-      with self.cached_session() as sess:
-        _initialize_variables(self, spec.scaffold)
-        sess.run(spec.train_op)
-        w_value, t_value = sess.run([w, t])
-        self.assertEqual(2, w_value)
-        self.assertEqual(expected_train_result, t_value)
-
-  def test_train_summaries_with_head_name(self):
-    head = head_lib._regression_head(name='some_regression_head')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43.,), (44.,),), dtype=np.float64)
-    features = {'x': np.array(((42.,),), dtype=np.float32)}
-    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
-    expected_loss = 13
-
-    def _train_op_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      summary_str = sess.run(spec.scaffold.summary_op)
-      _assert_simple_summaries(
-          self,
-          {
-              '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS):
-                  expected_loss,
-              # loss_mean = loss/2 = 13/2 = 6.5
-              '{}/some_regression_head'
-              .format(metric_keys.MetricKeys.LOSS_MEAN):
-                  6.5,
-          },
-          summary_str)
-
-  def test_train_with_regularization_losses(self):
-    head = head_lib._regression_head(
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,),), dtype=np.float32)
-    labels = np.array(((43.,), (44.,),), dtype=np.float64)
-    expected_train_result = b'my_train_op'
-    features = {'x': np.array(((42.,),), dtype=np.float32)}
-    regularization_losses = [1.5, 0.5]
-    expected_regularization_loss = 2.
-    # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size
-    #                    = (4 + 9) / 2 = 6.5
-    # loss = unregularized_loss + regularization_loss = 8.5
-    expected_loss = 8.5
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn,
-        regularization_losses=regularization_losses)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-      predictions, loss, train_result, summary_str = sess.run((
-          spec.predictions[prediction_key], spec.loss, spec.train_op,
-          spec.scaffold.summary_op))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
-              expected_regularization_loss),
-      }, summary_str)
-
-  def test_weighted_multi_example_eval(self):
-    """1d label, 3 examples, 1 batch."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,), (44,)), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
-            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=np.array(((35,), (42,), (45,)), dtype=np.int32))
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
-                           metric_keys.MetricKeys.PREDICTION_MEAN,
-                           metric_keys.MetricKeys.LABEL_MEAN),
-                          spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
-          metric_keys.MetricKeys.LOSS_MEAN]
-      predictions, loss, loss_mean = sess.run((
-          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
-      self.assertAllClose(logits, predictions)
-      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-      self.assertAllClose(101.6, loss)
-      # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
-      expected_loss_mean = 39.0769231
-      # Check results of both update (in `loss_mean`) and value ops.
-      self.assertAllClose(expected_loss_mean, loss_mean)
-      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
-
-  def test_weight_with_numeric_column(self):
-    """1d label, 3 examples, 1 batch."""
-    head = head_lib._regression_head(
-        weight_column=feature_column_lib.numeric_column(
-            'label_weights', normalizer_fn=lambda x: x + 1.))
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,), (44,)), dtype=np.int32)
-    spec = head.create_estimator_spec(
-        features={
-            'x':
-                np.array(((42,), (43,), (44,)), dtype=np.int32),
-            'label_weights':
-                np.array(((0.,), (-0.9,), (0.5,)), dtype=np.float32),
-        },
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=np.array(((35,), (42,), (45,)), dtype=np.int32))
-
-    # Assert loss.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      loss = sess.run(spec.loss)
-      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-      self.assertAllClose(101.6, loss)
-
-  def test_weighted_multi_example_train(self):
-    """1d label, 3 examples, 1 batch."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-    expected_loss = 101.6
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-    spec = head.create_estimator_spec(
-        features={
-            'x': np.array(((42,), (43,), (44,)), dtype=np.float32),
-            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64),
-        },
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=np.array(((35.,), (42.,), (45.,)), dtype=np.float32),
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      predictions, loss, train_result, summary_str = sess.run((
-          spec.predictions[prediction_key], spec.loss, spec.train_op,
-          spec.scaffold.summary_op))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
-          metric_keys.MetricKeys.LOSS_MEAN: 39.0769231,
-      }, summary_str)
-
-  def test_train_one_dim_create_loss(self):
-    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
-    x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32)
-    weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64)
-    labels_rank_1 = np.array((35., 42., 45.,))
-    # unreduced_loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
-    expected_unreduced_loss = [[100.], [1.], [1.]]
-    # weights are reshaped to [3, 1] to match logits.
-    expected_weights = [[1.], [.1], [1.5]]
-    # training_loss = 100 * 1 + 1 * .1 + 1.5 * 1 = 101.6
-    expected_training_loss = 101.6
-    features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1}
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_train_one_dim(self):
-    """Tests train with 1D labels and weights (shape [batch_size])."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
-    expected_train_result = b'my_train_op'
-    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-    expected_loss = 101.6
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32)
-    weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64)
-    labels_rank_1 = np.array((35., 42., 45.,))
-    features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1}
-    self.assertEqual((3,), x_feature_rank_1.shape)
-    self.assertEqual((3,), weight_rank_1.shape)
-    self.assertEqual((3,), labels_rank_1.shape)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels_rank_1,
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      predictions, loss, train_result, summary_str = sess.run((
-          spec.predictions[prediction_key], spec.loss, spec.train_op,
-          spec.scaffold.summary_op))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
-          metric_keys.MetricKeys.LOSS_MEAN: 39.0769231,
-      }, summary_str)
-
-  def test_weighted_multi_value_eval_create_loss(self):
-    """3d label, 1 example, 1 batch."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    logits = np.array(((45., 41., 44.),))
-    labels = np.array(((35., 42., 45.),))
-    features = {
-        'x': np.array(((42., 43., 44.),)),
-        'label_weights': np.array(((1., .1, 1.5),))
-    }
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
-      # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
-      self.assertAllClose(101.6, training_loss.eval())
-
-  def test_weighted_multi_value_eval(self):
-    """3d label, 1 example, 1 batch."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    self.assertEqual(3, head.logits_dimension)
-
-    logits = np.array(((45., 41., 44.),))
-    labels = np.array(((35., 42., 45.),))
-    features = {
-        'x': np.array(((42., 43., 44.),)),
-        'label_weights': np.array(((1., .1, 1.5),))
-    }
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=logits,
-        labels=labels)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
-                           metric_keys.MetricKeys.PREDICTION_MEAN,
-                           metric_keys.MetricKeys.LABEL_MEAN),
-                          spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Assert predictions, loss, and metrics.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNone(spec.scaffold.summary_op)
-      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
-          metric_keys.MetricKeys.LOSS_MEAN]
-      predictions, loss, loss_mean = sess.run((
-          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
-      self.assertAllClose(logits, predictions)
-      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-      self.assertAllClose(101.6, loss)
-      # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
-      expected_loss_mean = 39.076923
-      # Check results of both update (in `loss_mean`) and value ops.
-      self.assertAllClose(expected_loss_mean, loss_mean)
-      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
-
-  def test_weighted_multi_value_train_create_loss(self):
-    """3d label, 1 example, 1 batch."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    logits = np.array(((45., 41., 44.),))
-    labels = np.array(((35., 42., 45.),))
-    features = {
-        'x': np.array(((42., 43., 44.),)),
-        'label_weights': np.array(((1., .1, 1.5),))
-    }
-    # Create loss.
-    training_loss = head.create_loss(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)[0]
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
-      # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
-      self.assertAllClose(101.6, training_loss.eval())
-
-  def test_weighted_multi_value_train(self):
-    """3d label, 1 example, 1 batch."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    self.assertEqual(3, head.logits_dimension)
-
-    logits = np.array(((45., 41., 44.),))
-    labels = np.array(((35., 42., 45.),))
-    expected_train_result = b'my_train_op'
-    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
-    expected_loss = 101.6
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    features = {
-        'x': np.array(((42., 43., 44.),)),
-        'label_weights': np.array(((1., .1, 1.5),)),
-    }
-    # Create estimator spec.
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-
-    # Assert spec contains expected tensors.
-    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
-    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
-    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertEqual({}, spec.eval_metric_ops)
-    self.assertIsNotNone(spec.train_op)
-    self.assertIsNone(spec.export_outputs)
-    _assert_no_hooks(self, spec)
-
-    # Evaluate predictions, loss, train_op, and summaries.
-    with self.cached_session() as sess:
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      predictions, loss, train_result, summary_str = sess.run((
-          spec.predictions[prediction_key], spec.loss, spec.train_op,
-          spec.scaffold.summary_op))
-      self.assertAllClose(logits, predictions)
-      self.assertAllClose(expected_loss, loss)
-      self.assertEqual(expected_train_result, train_result)
-      _assert_simple_summaries(self, {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
-          metric_keys.MetricKeys.LOSS_MEAN: 39.076923,
-      }, summary_str)
-
-  def test_weighted_multi_batch_eval(self):
-    """1d label, 1 example, 3 batches."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45.,), (41.,), (44.,)))
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'x': np.array(((42.,), (43.,), (44.,))),
-            'label_weights': np.array(((1.,), (.1,), (1.5,))),
-            # 'logits' is not a feature, but we use `numpy_input_fn` to make a
-            # batched version of it, and pop it off before passing to
-            # `create_estimator_spec`.
-            'logits': logits,
-        },
-        y=np.array(((35.,), (42.,), (45.,))),
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    batched_features, batched_labels = input_fn()
-    batched_logits = batched_features.pop('logits')
-    spec = head.create_estimator_spec(
-        features=batched_features,
-        mode=model_fn.ModeKeys.EVAL,
-        logits=batched_logits,
-        labels=batched_labels,
-        train_op_fn=None)
-
-    # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5]
-    # loss = sum(losses) = 100+.1+1.5 = 101.6
-    # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
-    expected_metrics = {
-        metric_keys.MetricKeys.LOSS_MEAN:
-            39.076923,
-        metric_keys.MetricKeys.PREDICTION_MEAN:
-            (45 + 41 * 0.1 + 44 * 1.5) / 2.6,
-        metric_keys.MetricKeys.LABEL_MEAN: (35 + 42 * 0.1 + 45 * 1.5) / 2.6,
-    }
-
-    # Assert spec contains expected tensors.
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
-    self.assertIsNone(spec.train_op)
-    _assert_no_hooks(self, spec)
-
-    with self.cached_session() as sess:
-      # Finalize graph and initialize variables.
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      queue_runner_impl.start_queue_runners()
-
-      # Run tensors for `steps` steps.
-      steps = len(logits)
-      results = tuple([
-          sess.run((
-              spec.loss,
-              # The `[1]` gives us the metric update op.
-              {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
-          )) for _ in range(steps)
-      ])
-
-      # Assert losses and metrics.
-      self.assertAllClose((100, .1, 1.5), [r[0] for r in results])
-      # For metrics, check results of both update (in `results`) and value ops.
-      # Note: we only check the result of the last step for streaming metrics.
-      self.assertAllClose(expected_metrics, results[steps - 1][1])
-      self.assertAllClose(expected_metrics, {
-          k: spec.eval_metric_ops[k][0].eval() for k in spec.eval_metric_ops
-      })
-
-  def test_weighted_multi_batch_train(self):
-    """1d label, 1 example, 3 batches."""
-    head = head_lib._regression_head(weight_column='label_weights')
-    self.assertEqual(1, head.logits_dimension)
-
-    # Create estimator spec.
-    logits = np.array(((45.,), (41.,), (44.,)))
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'x': np.array(((42.,), (43.,), (44.,))),
-            'label_weights': np.array(((1.,), (.1,), (1.5,))),
-            # 'logits' is not a feature, but we use `numpy_input_fn` to make a
-            # batched version of it, and pop it off before passing to
-            # `create_estimator_spec`.
-            'logits': logits,
-        },
-        y=np.array(((35.,), (42.,), (45.,))),
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    batched_features, batched_labels = input_fn()
-    batched_logits = batched_features.pop('logits')
-    spec = head.create_estimator_spec(
-        features=batched_features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=batched_logits,
-        labels=batched_labels,
-        train_op_fn=lambda loss: loss * -7.)
-
-    # Assert spec contains expected tensors.
-    self.assertEqual(dtypes.float32, spec.loss.dtype)
-    self.assertIsNotNone(spec.train_op)
-
-    with self.cached_session() as sess:
-      # Finalize graph and initialize variables.
-      _initialize_variables(self, spec.scaffold)
-      self.assertIsNotNone(spec.scaffold.summary_op)
-      queue_runner_impl.start_queue_runners()
-
-      results = tuple([
-          sess.run((spec.loss, spec.train_op)) for _ in range(len(logits))
-      ])
-
-      # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5]
-      expected_losses = np.array((100, .1, 1.5))
-      self.assertAllClose(expected_losses, [r[0] for r in results])
-      self.assertAllClose(expected_losses * -7., [r[1] for r in results])
-
-  def test_multi_dim_weighted_train_create_loss(self):
-    """Logits, labels of shape [2, 2, 3], weight shape [2, 2]."""
-    label_dimension = 3
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=label_dimension)
-    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
-                       [[20., 21., 22.], [30., 31., 32.]]])
-    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
-                       [[23., 24., 25.], [34., 35., 36.]]])
-    weights = np.array([[1., 1.5], [2., 2.5]])
-    expected_unreduced_loss = [[[1., 1., 1.], [4., 4., 4.]],
-                               [[9., 9., 9.], [16., 16., 16.]]]
-    expected_training_loss = np.sum(
-        np.array([[[1. * x for x in [1., 1., 1.]],
-                   [1.5 * x for x in [4., 4., 4.]]],
-                  [[2. * x for x in [9., 9., 9.]],
-                   [2.5 * x for x in [16., 16., 16.]]]]))
-    # Weights are expanded to [2, 2, 1] to match logits.
-    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
-    # Create loss.
-    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
-        features={'label_weights': weights},
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_training_loss, training_loss.eval())
-      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
-      self.assertAllClose(expected_weights, actual_weights.eval())
-
-  def test_multi_dim_weighted_train(self):
-    """Logits, labels of shape [2, 2, 3], weight shape [2, 2]."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
-                       [[20., 21., 22.], [30., 31., 32.]]])
-    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
-                       [[23., 24., 25.], [34., 35., 36.]]])
-    expected_train_result = b'my_train_op'
-    features = {
-        'label_weights': np.array([[1., 1.5], [2., 2.5]]),
-    }
-    # loss = 1*3*1^2 + 1.5*3*2^2 + 2*3*3^2 +2.5*3*4^2 = 195
-    expected_loss = 195.
-    # Create estimator spec.
-    def _train_op_fn(loss):
-      with ops.control_dependencies((check_ops.assert_equal(
-          math_ops.to_float(expected_loss), math_ops.to_float(loss),
-          name='assert_loss'),)):
-        return constant_op.constant(expected_train_result)
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_train_op_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      self.assertAllClose(expected_loss, spec.loss.eval())
-
-  def test_multi_dim_train_weights_wrong_inner_dim(self):
-    """Logits, labels of shape [2, 2, 3], weight shape [2, 1]."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
-                       [[20., 21., 22.], [30., 31., 32.]]])
-    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
-                       [[23., 24., 25.], [34., 35., 36.]]])
-    features = {
-        'label_weights': np.array([[1.], [2]]),
-    }
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
-        spec.loss.eval()
-
-  def test_multi_dim_train_weights_wrong_outer_dim(self):
-    """Logits, labels of shape [2, 2, 3], weight shape [2, 2, 2]."""
-    head = head_lib._regression_head(
-        weight_column='label_weights', label_dimension=3)
-    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
-                       [[20., 21., 22.], [30., 31., 32.]]])
-    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
-                       [[23., 24., 25.], [34., 35., 36.]]])
-    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
-    features = {
-        'label_weights': weights_placeholder,
-    }
-    def _no_op_train_fn(loss):
-      del loss
-      return control_flow_ops.no_op()
-
-    spec = head.create_estimator_spec(
-        features=features,
-        mode=model_fn.ModeKeys.TRAIN,
-        logits=logits,
-        labels=labels,
-        train_op_fn=_no_op_train_fn)
-    with self.cached_session():
-      _initialize_variables(self, monitored_session.Scaffold())
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'):
-        spec.loss.eval({
-            weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]],
-                                           [[2., 2.1], [2.5, 2.6]]])})
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py
index 8b96284bd3..f674e50f92 100644
--- a/tensorflow/python/estimator/canned/linear.py
+++ b/tensorflow/python/estimator/canned/linear.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,532 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Linear Estimators."""
+"""linear python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
-import six
-
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator.canned import head as head_lib
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variable_ops
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.summary import summary
-from tensorflow.python.training import ftrl
-from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import estimator_export
-
-
-# The default learning rate of 0.2 is a historical artifact of the initial
-# implementation, but seems a reasonable choice.
-_LEARNING_RATE = 0.2
-
-
-def _get_default_optimizer(feature_columns):
-  learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
-  return ftrl.FtrlOptimizer(learning_rate=learning_rate)
-
-
-def _get_expanded_variable_list(var_list):
-  """Given a list of variables, expands them if they are partitioned.
-
-  Args:
-    var_list: A list of variables.
-
-  Returns:
-    A list of variables where each partitioned variable is expanded to its
-    components.
-  """
-  returned_list = []
-  for variable in var_list:
-    if (isinstance(variable, variable_ops.Variable) or
-        resource_variable_ops.is_resource_variable(variable)):
-      returned_list.append(variable)  # Single variable case.
-    else:  # Must be a PartitionedVariable, so convert into a list.
-      returned_list.extend(list(variable))
-  return returned_list
-
-
-# TODO(rohanj): Consider making this a public utility method.
-def _compute_fraction_of_zero(variables):
-  """Given a linear variables list, compute the fraction of zero weights.
-
-  Args:
-    variables: A list or list of list of variables
-
-  Returns:
-    The fraction of zeros (sparsity) in the linear model.
-  """
-  all_weight_vars = []
-  for var_or_var_list in variables:
-    var_list = nest.flatten(var_or_var_list)
-    # Skip empty-lists associated with columns that created no Variables.
-    if var_list:
-      all_weight_vars += [array_ops.reshape(var, [-1]) for var in var_list]
-  return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0))
-
-
-def _linear_logit_fn_builder(units, feature_columns, sparse_combiner='sum'):
-  """Function builder for a linear logit_fn.
-
-  Args:
-    units: An int indicating the dimension of the logit layer.
-    feature_columns: An iterable containing all the feature columns used by
-      the model.
-    sparse_combiner: A string specifying how to reduce if a categorical column
-      is multivalent.  One of "mean", "sqrtn", and "sum".
-
-  Returns:
-    A logit_fn (see below).
-
-  """
-
-  def linear_logit_fn(features):
-    """Linear model logit_fn.
-
-    Args:
-      features: This is the first item returned from the `input_fn`
-                passed to `train`, `evaluate`, and `predict`. This should be a
-                single `Tensor` or `dict` of same.
-
-    Returns:
-      A `Tensor` representing the logits.
-    """
-    if feature_column_v2.is_feature_column_v2(feature_columns):
-      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
-      linear_model = feature_column_v2.LinearModel(
-          feature_columns=feature_columns,
-          units=units,
-          sparse_combiner=sparse_combiner,
-          shared_state_manager=shared_state_manager)
-      logits = linear_model(features)
-      bias = linear_model.bias_variable
-
-      # We'd like to get all the non-bias variables associated with this
-      # LinearModel. This includes the shared embedding variables as well.
-      variables = linear_model.variables
-      variables.remove(bias)
-      variables.extend(shared_state_manager.variables)
-
-      # Expand (potential) Partitioned variables
-      bias = _get_expanded_variable_list([bias])
-      variables = _get_expanded_variable_list(variables)
-    else:
-      linear_model = feature_column._LinearModel(  # pylint: disable=protected-access
-          feature_columns=feature_columns,
-          units=units,
-          sparse_combiner=sparse_combiner,
-          name='linear_model')
-      logits = linear_model(features)
-      cols_to_vars = linear_model.cols_to_vars()
-      bias = cols_to_vars.pop('bias')
-      variables = cols_to_vars.values()
-
-    if units > 1:
-      summary.histogram('bias', bias)
-    else:
-      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
-      # so we should provide a scalar summary.
-      summary.scalar('bias', bias[0][0])
-    summary.scalar('fraction_of_zero_weights',
-                   _compute_fraction_of_zero(variables))
-    return logits
-
-  return linear_logit_fn
-
-
-def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
-                     partitioner, config, sparse_combiner='sum'):
-  """A model_fn for linear models that use a gradient-based optimizer.
-
-  Args:
-    features: dict of `Tensor`.
-    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
-    mode: Defines whether this is training, evaluation or prediction.
-      See `ModeKeys`.
-    head: A `Head` instance.
-    feature_columns: An iterable containing all the feature columns used by
-      the model.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training. If `None`, will use a FTRL optimizer.
-    partitioner: Partitioner for variables.
-    config: `RunConfig` object to configure the runtime settings.
-    sparse_combiner: A string specifying how to reduce if a categorical column
-      is multivalent.  One of "mean", "sqrtn", and "sum".
-
-  Returns:
-    An `EstimatorSpec` instance.
-
-  Raises:
-    ValueError: mode or params are invalid, or features has the wrong type.
-  """
-  if not isinstance(features, dict):
-    raise ValueError('features should be a dictionary of `Tensor`s. '
-                     'Given type: {}'.format(type(features)))
-
-  optimizer = optimizers.get_optimizer_instance(
-      optimizer or _get_default_optimizer(feature_columns),
-      learning_rate=_LEARNING_RATE)
-  num_ps_replicas = config.num_ps_replicas if config else 0
-
-  partitioner = partitioner or (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas,
-          min_slice_size=64 << 20))
-
-  with variable_scope.variable_scope(
-      'linear',
-      values=tuple(six.itervalues(features)),
-      partitioner=partitioner):
-
-    logit_fn = _linear_logit_fn_builder(
-        units=head.logits_dimension, feature_columns=feature_columns,
-        sparse_combiner=sparse_combiner)
-    logits = logit_fn(features=features)
-
-    return head.create_estimator_spec(
-        features=features,
-        mode=mode,
-        labels=labels,
-        optimizer=optimizer,
-        logits=logits)
-
-
-@estimator_export('estimator.LinearClassifier')
-class LinearClassifier(estimator.Estimator):
-  """Linear classifier model.
-
-  Train a linear model to classify instances into one of multiple possible
-  classes. When number of possible classes is 2, this is binary classification.
-
-  Example:
-
-  ```python
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-
-  # Estimator using the default optimizer.
-  estimator = LinearClassifier(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b])
-
-  # Or estimator using the FTRL optimizer with regularization.
-  estimator = LinearClassifier(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      optimizer=tf.train.FtrlOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001
-      ))
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = LinearClassifier(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      optimizer=lambda: tf.train.FtrlOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator with warm-starting from a previous checkpoint.
-  estimator = LinearClassifier(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      warm_start_from="/path/to/checkpoint/dir")
-
-
-  # Input builders
-  def input_fn_train: # returns x, y (where y represents label's class index).
-    ...
-  def input_fn_eval: # returns x, y (where y represents label's class index).
-    ...
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * if `weight_column` is not `None`, a feature with
-    `key=weight_column` whose value is a `Tensor`.
-  * for each `column` in `feature_columns`:
-    - if `column` is a `SparseColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `WeightedSparseColumn`, two features: the first with
-      `key` the id column name, the second with `key` the weight column name.
-      Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-
-  Loss is calculated by using softmax cross entropy.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               feature_columns,
-               model_dir=None,
-               n_classes=2,
-               weight_column=None,
-               label_vocabulary=None,
-               optimizer='Ftrl',
-               config=None,
-               partitioner=None,
-               warm_start_from=None,
-               loss_reduction=losses.Reduction.SUM,
-               sparse_combiner='sum'):
-    """Construct a `LinearClassifier` estimator object.
-
-    Args:
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      n_classes: number of label classes. Default is binary classification.
-        Note that class labels are integers representing the class index (i.e.
-        values from 0 to n_classes-1). For arbitrary label values (e.g. string
-        labels), convert to class indices first.
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      label_vocabulary: A list of strings represents possible label values. If
-        given, labels must be string type and have any value in
-        `label_vocabulary`. If it is not given, that means labels are
-        already encoded as integer or float within [0, 1] for `n_classes=2` and
-        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
-        Also there will be errors if vocabulary is not provided and labels are
-        string.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to FTRL optimizer.
-      config: `RunConfig` object to configure the runtime settings.
-      partitioner: Optional. Partitioner for input layer.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights and biases are warm-started, and it is assumed that vocabularies
-        and Tensor names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      sparse_combiner: A string specifying how to reduce if a categorical column
-        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
-        effectively different ways to do example-level normalization, which can
-        be useful for bag-of-words features. for more details, see
-        `tf.feature_column.linear_model`.
-
-    Returns:
-      A `LinearClassifier` estimator.
-
-    Raises:
-      ValueError: if n_classes < 2.
-    """
-    if n_classes == 2:
-      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
-          weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-    else:
-      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
-          n_classes, weight_column=weight_column,
-          label_vocabulary=label_vocabulary,
-          loss_reduction=loss_reduction)
-
-    def _model_fn(features, labels, mode, config):
-      """Call the defined shared _linear_model_fn."""
-      return _linear_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          partitioner=partitioner,
-          config=config,
-          sparse_combiner=sparse_combiner)
-
-    super(LinearClassifier, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config,
-        warm_start_from=warm_start_from)
-
-
-@estimator_export('estimator.LinearRegressor')
-class LinearRegressor(estimator.Estimator):
-  """An estimator for TensorFlow Linear regression problems.
-
-  Train a linear regression model to predict label value given observation of
-  feature values.
-
-  Example:
-
-  ```python
-  categorical_column_a = categorical_column_with_hash_bucket(...)
-  categorical_column_b = categorical_column_with_hash_bucket(...)
-
-  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
-
-  # Estimator using the default optimizer.
-  estimator = LinearRegressor(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b])
-
-  # Or estimator using the FTRL optimizer with regularization.
-  estimator = LinearRegressor(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      optimizer=tf.train.FtrlOptimizer(
-        learning_rate=0.1,
-        l1_regularization_strength=0.001
-      ))
-
-  # Or estimator using an optimizer with a learning rate decay.
-  estimator = LinearRegressor(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      optimizer=lambda: tf.train.FtrlOptimizer(
-          learning_rate=tf.exponential_decay(
-              learning_rate=0.1,
-              global_step=tf.get_global_step(),
-              decay_steps=10000,
-              decay_rate=0.96))
-
-  # Or estimator with warm-starting from a previous checkpoint.
-  estimator = LinearRegressor(
-      feature_columns=[categorical_column_a,
-                       categorical_feature_a_x_categorical_feature_b],
-      warm_start_from="/path/to/checkpoint/dir")
-
-
-  # Input builders
-  def input_fn_train: # returns x, y
-    ...
-  def input_fn_eval: # returns x, y
-    ...
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Input of `train` and `evaluate` should have following features,
-    otherwise there will be a KeyError:
-
-  * if `weight_column` is not `None`:
-    key=weight_column, value=a `Tensor`
-  * for column in `feature_columns`:
-    - if isinstance(column, `SparseColumn`):
-        key=column.name, value=a `SparseTensor`
-    - if isinstance(column, `WeightedSparseColumn`):
-        {key=id column name, value=a `SparseTensor`,
-         key=weight column name, value=a `SparseTensor`}
-    - if isinstance(column, `RealValuedColumn`):
-        key=column.name, value=a `Tensor`
-
-  Loss is calculated by using mean squared error.
-
-  @compatibility(eager)
-  Estimators can be used while eager execution is enabled. Note that `input_fn`
-  and all hooks are executed inside a graph context, so they have to be written
-  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
-  generally works in both graph and eager modes.
-  @end_compatibility
-  """
-
-  def __init__(self,
-               feature_columns,
-               model_dir=None,
-               label_dimension=1,
-               weight_column=None,
-               optimizer='Ftrl',
-               config=None,
-               partitioner=None,
-               warm_start_from=None,
-               loss_reduction=losses.Reduction.SUM,
-               sparse_combiner='sum'):
-    """Initializes a `LinearRegressor` instance.
-
-    Args:
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator
-        to continue training a previously saved model.
-      label_dimension: Number of regression targets per example. This is the
-        size of the last dimension of the labels and logits `Tensor` objects
-        (typically, these have shape `[batch_size, label_dimension]`).
-      weight_column: A string or a `_NumericColumn` created by
-        `tf.feature_column.numeric_column` defining feature column representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example. If it is a string, it is
-        used as a key to fetch weight tensor from the `features`. If it is a
-        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-        then weight_column.normalizer_fn is applied on it to get weight tensor.
-      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
-        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
-        callable. Defaults to FTRL optimizer.
-      config: `RunConfig` object to configure the runtime settings.
-      partitioner: Optional. Partitioner for input layer.
-      warm_start_from: A string filepath to a checkpoint to warm-start from, or
-        a `WarmStartSettings` object to fully configure warm-starting.  If the
-        string filepath is provided instead of a `WarmStartSettings`, then all
-        weights and biases are warm-started, and it is assumed that vocabularies
-        and Tensor names are unchanged.
-      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
-        to reduce training loss over batch. Defaults to `SUM`.
-      sparse_combiner: A string specifying how to reduce if a categorical column
-        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
-        effectively different ways to do example-level normalization, which can
-        be useful for bag-of-words features. for more details, see
-        `tf.feature_column.linear_model`.
-    """
-    head = head_lib._regression_head(  # pylint: disable=protected-access
-        label_dimension=label_dimension, weight_column=weight_column,
-        loss_reduction=loss_reduction)
+from tensorflow_estimator.python.estimator.canned import linear
 
-    def _model_fn(features, labels, mode, config):
-      """Call the defined shared _linear_model_fn."""
-      return _linear_model_fn(
-          features=features,
-          labels=labels,
-          mode=mode,
-          head=head,
-          feature_columns=tuple(feature_columns or []),
-          optimizer=optimizer,
-          partitioner=partitioner,
-          config=config,
-          sparse_combiner=sparse_combiner)
+# Include attrs that start with single underscore.
+linear.__all__ = [s for s in dir(linear) if not s.startswith('__')]
 
-    super(LinearRegressor, self).__init__(
-        model_fn=_model_fn,
-        model_dir=model_dir,
-        config=config,
-        warm_start_from=warm_start_from)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.linear import *
diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py
deleted file mode 100644
index 3e6da5de22..0000000000
--- a/tensorflow/python/estimator/canned/linear_test.py
+++ /dev/null
@@ -1,255 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for linear.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.estimator.canned import linear
-from tensorflow.python.estimator.canned import linear_testing_utils
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.platform import test
-
-
-def _linear_regressor_fn(*args, **kwargs):
-  return linear.LinearRegressor(*args, **kwargs)
-
-
-def _linear_classifier_fn(*args, **kwargs):
-  return linear.LinearClassifier(*args, **kwargs)
-
-
-# Tests for Linear Regressor.
-
-
-class LinearRegressorPartitionerTest(
-    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearRegressorPartitionerV2Test(
-    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearRegressorEvaluationTest(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearRegressorEvaluationV2Test(
-    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearRegressorPredictTest(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearRegressorPredictV2Test(
-    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearRegressorIntegrationTest(
-    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearRegressorIntegrationV2Test(
-    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-class LinearRegressorTrainingTest(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column)
-
-
-class LinearRegressorTrainingV2Test(
-    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
-        self, _linear_regressor_fn, fc_lib=feature_column_v2)
-
-
-# Tests for Linear Classifier.
-class LinearClassifierTrainingTest(
-    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearClassifierTrainingV2Test(
-    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearClassifierEvaluationTest(
-    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearClassifierEvaluationV2Test(
-    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearClassifierPredictTest(
-    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearClassifierPredictV2Test(
-    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-class LinearClassifierIntegrationTest(
-    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
-
-
-class LinearClassifierIntegrationV2Test(
-    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
-        self,
-        linear_classifier_fn=_linear_classifier_fn,
-        fc_lib=feature_column_v2)
-
-
-# Tests for Linear logit_fn.
-class LinearLogitFnTest(linear_testing_utils.BaseLinearLogitFnTest,
-                        test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearLogitFnTest.__init__(
-        self, fc_lib=feature_column)
-
-
-class LinearLogitFnV2Test(linear_testing_utils.BaseLinearLogitFnTest,
-                          test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearLogitFnTest.__init__(
-        self, fc_lib=feature_column_v2)
-
-
-# Tests for warm-starting with Linear logit_fn.
-class LinearWarmStartingTest(linear_testing_utils.BaseLinearWarmStartingTest,
-                             test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
-        self,
-        _linear_classifier_fn,
-        _linear_regressor_fn,
-        fc_lib=feature_column)
-
-
-class LinearWarmStartingV2Test(linear_testing_utils.BaseLinearWarmStartingTest,
-                               test.TestCase):
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
-        self,
-        _linear_classifier_fn,
-        _linear_regressor_fn,
-        fc_lib=feature_column_v2)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index 2cfa2a8e15..f6d26348c9 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,2344 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utils for testing linear estimators."""
+"""linear_testing_utils python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-import os
-import shutil
-import tempfile
-
-import numpy as np
-import six
-
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import run_config
-from tensorflow.python.estimator.canned import linear
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.feature_column import feature_column_v2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import input as input_lib
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import queue_runner
-from tensorflow.python.training import saver
-from tensorflow.python.training import session_run_hook
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-# pylint rules which are disabled by default for test files.
-# pylint: disable=invalid-name,protected-access,missing-docstring
-
-# Names of variables created by model.
-AGE_WEIGHT_NAME = 'linear/linear_model/age/weights'
-HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights'
-OCCUPATION_WEIGHT_NAME = 'linear/linear_model/occupation/weights'
-BIAS_NAME = 'linear/linear_model/bias_weights'
-LANGUAGE_WEIGHT_NAME = 'linear/linear_model/language/weights'
-
-
-def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
-  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-    expected = ops.convert_to_tensor(expected, name='expected')
-    actual = ops.convert_to_tensor(actual, name='actual')
-    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
-    rtol = ops.convert_to_tensor(rtol, name='rtol')
-    return check_ops.assert_less(
-        rdiff,
-        rtol,
-        data=('Condition expected =~ actual did not hold element-wise:'
-              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
-              'rtol = ', rtol,),
-        name=scope)
-
-
-def save_variables_to_ckpt(model_dir):
-  init_all_op = [variables_lib.global_variables_initializer()]
-  with tf_session.Session() as sess:
-    sess.run(init_all_op)
-    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
-
-
-def queue_parsed_features(feature_map):
-  tensors_to_enqueue = []
-  keys = []
-  for key, tensor in six.iteritems(feature_map):
-    keys.append(key)
-    tensors_to_enqueue.append(tensor)
-  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
-  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
-  queue_runner.add_queue_runner(
-      queue_runner.QueueRunner(input_queue,
-                               [input_queue.enqueue(tensors_to_enqueue)]))
-  dequeued_tensors = input_queue.dequeue()
-  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
-
-
-def sorted_key_dict(unsorted_dict):
-  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
-
-
-def sigmoid(x):
-  return 1 / (1 + np.exp(-1.0 * x))
-
-
-class CheckPartitionerVarHook(session_run_hook.SessionRunHook):
-  """A `SessionRunHook` to check a partitioned variable."""
-
-  def __init__(self, test_case, var_name, var_dim, partitions):
-    self._test_case = test_case
-    self._var_name = var_name
-    self._var_dim = var_dim
-    self._partitions = partitions
-
-  def begin(self):
-    with variable_scope.variable_scope(
-        variable_scope.get_variable_scope()) as scope:
-      scope.reuse_variables()
-      partitioned_weight = variable_scope.get_variable(
-          self._var_name, shape=(self._var_dim, 1))
-      self._test_case.assertTrue(
-          isinstance(partitioned_weight, variables_lib.PartitionedVariable))
-      for part in partitioned_weight:
-        self._test_case.assertEqual(self._var_dim // self._partitions,
-                                    part.get_shape()[0])
-
-
-class BaseLinearRegressorPartitionerTest(object):
-
-  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
-    self._linear_regressor_fn = linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def testPartitioner(self):
-    x_dim = 64
-    partitions = 4
-
-    def _partitioner(shape, dtype):
-      del dtype  # unused; required by Fn signature.
-      # Only partition the embedding tensor.
-      return [partitions, 1] if shape[0] == x_dim else [1]
-
-    regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
-            'language', hash_bucket_size=x_dim),),
-        partitioner=_partitioner,
-        model_dir=self._model_dir)
-
-    def _input_fn():
-      return {
-          'language':
-              sparse_tensor.SparseTensor(
-                  values=['english', 'spanish'],
-                  indices=[[0, 0], [0, 1]],
-                  dense_shape=[1, 2])
-      }, [[10.]]
-
-    hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim,
-                                   partitions)
-    regressor.train(input_fn=_input_fn, steps=1, hooks=[hook])
-
-  def testDefaultPartitionerWithMultiplePsReplicas(self):
-    partitions = 2
-    # This results in weights larger than the default partition size of 64M,
-    # so partitioned weights are created (each weight uses 4 bytes).
-    x_dim = 32 << 20
-
-    class FakeRunConfig(run_config.RunConfig):
-
-      @property
-      def num_ps_replicas(self):
-        return partitions
-
-    # Mock the device setter as ps is not available on test machines.
-    with test.mock.patch.object(
-        estimator,
-        '_get_replica_device_setter',
-        return_value=lambda _: '/cpu:0'):
-      linear_regressor = self._linear_regressor_fn(
-          feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
-              'language', hash_bucket_size=x_dim),),
-          config=FakeRunConfig(),
-          model_dir=self._model_dir)
-
-      def _input_fn():
-        return {
-            'language':
-                sparse_tensor.SparseTensor(
-                    values=['english', 'spanish'],
-                    indices=[[0, 0], [0, 1]],
-                    dense_shape=[1, 2])
-        }, [[10.]]
-
-      hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim,
-                                     partitions)
-      linear_regressor.train(input_fn=_input_fn, steps=1, hooks=[hook])
-
-
-# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
-class BaseLinearRegressorEvaluationTest(object):
-
-  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
-    self._linear_regressor_fn = linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_evaluation_for_simple_data(self):
-    with ops.Graph().as_default():
-      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([2.0], name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir)
-    eval_metrics = linear_regressor.evaluate(
-        input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
-
-    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. Loss is 3**2 = 9.
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 9.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_batch(self):
-    """Tests evaluation for batch_size==2."""
-    with ops.Graph().as_default():
-      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([2.0], name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir)
-    eval_metrics = linear_regressor.evaluate(
-        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
-
-    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the sum over batch = 9 + 9 = 18
-    # Average loss is the average over batch = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 18.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_weights(self):
-    """Tests evaluation with weights."""
-    with ops.Graph().as_default():
-      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([2.0], name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    def _input_fn():
-      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
-      labels = ((10.,), (10.,))
-      return features, labels
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        weight_column='weights',
-        model_dir=self._model_dir)
-    eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1)
-
-    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10.
-    # Loss per example is 3**2 = 9.
-    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
-    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
-    self.assertDictEqual({
-        metric_keys.MetricKeys.LOSS: 27.,
-        metric_keys.MetricKeys.LOSS_MEAN: 9.,
-        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
-        metric_keys.MetricKeys.LABEL_MEAN: 10.,
-        ops.GraphKeys.GLOBAL_STEP: 100
-    }, eval_metrics)
-
-  def test_evaluation_for_multi_dimensions(self):
-    x_dim = 3
-    label_dim = 2
-    with ops.Graph().as_default():
-      variables_lib.Variable(
-          [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([7.0, 8.0], name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age', shape=(x_dim,)),),
-        label_dimension=label_dim,
-        model_dir=self._model_dir)
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'age': np.array([[2., 4., 5.]]),
-        },
-        y=np.array([[46., 58.]]),
-        batch_size=1,
-        num_epochs=None,
-        shuffle=False)
-    eval_metrics = linear_regressor.evaluate(input_fn=input_fn, steps=1)
-
-    self.assertItemsEqual(
-        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
-         metric_keys.MetricKeys.PREDICTION_MEAN,
-         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
-        eval_metrics.keys())
-
-    # Logit is
-    #   [2., 4., 5.] * [1.0, 2.0] + [7.0, 8.0] = [39, 50] + [7.0, 8.0]
-    #                  [3.0, 4.0]
-    #                  [5.0, 6.0]
-    # which is [46, 58]
-    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
-
-  def test_evaluation_for_multiple_feature_columns(self):
-    with ops.Graph().as_default():
-      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
-      variables_lib.Variable([5.0], name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    batch_size = 2
-    feature_columns = [
-        self._fc_lib.numeric_column('age'),
-        self._fc_lib.numeric_column('height')
-    ]
-    input_fn = numpy_io.numpy_input_fn(
-        x={'age': np.array([20, 40]),
-           'height': np.array([4, 8])},
-        y=np.array([[213.], [421.]]),
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=False)
-
-    est = self._linear_regressor_fn(
-        feature_columns=feature_columns, model_dir=self._model_dir)
-
-    eval_metrics = est.evaluate(input_fn=input_fn, steps=1)
-    self.assertItemsEqual(
-        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
-         metric_keys.MetricKeys.PREDICTION_MEAN,
-         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
-        eval_metrics.keys())
-
-    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
-    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
-    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
-
-  def test_evaluation_for_multiple_feature_columns_mix(self):
-    with ops.Graph().as_default():
-      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
-      variables_lib.Variable([5.0], name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    batch_size = 2
-    feature_columns = [
-        feature_column.numeric_column('age'),
-        feature_column_v2.numeric_column('height')
-    ]
-
-    def _input_fn():
-      features_ds = dataset_ops.Dataset.from_tensor_slices({
-          'age': np.array([20, 40]),
-          'height': np.array([4, 8])
-      })
-      labels_ds = dataset_ops.Dataset.from_tensor_slices(
-          np.array([[213.], [421.]]))
-      return (dataset_ops.Dataset.zip((features_ds, labels_ds))
-              .batch(batch_size).repeat(None))
-
-    est = self._linear_regressor_fn(
-        feature_columns=feature_columns, model_dir=self._model_dir)
-
-    eval_metrics = est.evaluate(input_fn=_input_fn, steps=1)
-    self.assertItemsEqual(
-        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
-         metric_keys.MetricKeys.PREDICTION_MEAN,
-         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
-        eval_metrics.keys())
-
-    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
-    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
-    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
-
-
-class BaseLinearRegressorPredictTest(object):
-
-  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
-    self._linear_regressor_fn = linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def test_1d(self):
-    """Tests predict when all variables are one-dimensional."""
-    with ops.Graph().as_default():
-      variables_lib.Variable([[10.]], name='linear/linear_model/x/weights')
-      variables_lib.Variable([.2], name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('x'),),
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': np.array([[2.]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = linear_regressor.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # x * weight + bias = 2. * 10. + .2 = 20.2
-    self.assertAllClose([[20.2]], predicted_scores)
-
-  def testMultiDim(self):
-    """Tests predict when all variables are multi-dimenstional."""
-    batch_size = 2
-    label_dimension = 3
-    x_dim = 4
-    feature_columns = (self._fc_lib.numeric_column('x', shape=(x_dim,)),)
-    with ops.Graph().as_default():
-      variables_lib.Variable(  # shape=[x_dim, label_dimension]
-          [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]],
-          name='linear/linear_model/x/weights')
-      variables_lib.Variable(  # shape=[label_dimension]
-          [.2, .4, .6], name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        # x shape=[batch_size, x_dim]
-        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predictions = linear_regressor.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # score = x * weight + bias, shape=[batch_size, label_dimension]
-    self.assertAllClose([[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]],
-                        predicted_scores)
-
-  def testTwoFeatureColumns(self):
-    """Tests predict with two feature columns."""
-    with ops.Graph().as_default():
-      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
-      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
-      variables_lib.Variable([.2], name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('x0'),
-                         self._fc_lib.numeric_column('x1')),
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x0': np.array([[2.]]),
-           'x1': np.array([[3.]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = linear_regressor.predict(input_fn=predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
-    self.assertAllClose([[80.2]], predicted_scores)
-
-  def testTwoFeatureColumnsMix(self):
-    """Tests predict with two feature columns."""
-    with ops.Graph().as_default():
-      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
-      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
-      variables_lib.Variable([.2], name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(feature_column.numeric_column('x0'),
-                         feature_column_v2.numeric_column('x1')),
-        model_dir=self._model_dir)
-
-    def _predict_input_fn():
-      return dataset_ops.Dataset.from_tensor_slices({
-          'x0': np.array([[2.]]),
-          'x1': np.array([[3.]])
-      }).batch(1)
-
-    predictions = linear_regressor.predict(input_fn=_predict_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
-    self.assertAllClose([[80.2]], predicted_scores)
-
-  def testSparseCombiner(self):
-    w_a = 2.0
-    w_b = 3.0
-    w_c = 5.0
-    bias = 5.0
-    with ops.Graph().as_default():
-      variables_lib.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME)
-      variables_lib.Variable([bias], name=BIAS_NAME)
-      variables_lib.Variable(1, name=ops.GraphKeys.GLOBAL_STEP,
-                             dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensors({
-          'language': sparse_tensor.SparseTensor(
-              values=['a', 'c', 'b', 'c'],
-              indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
-              dense_shape=[2, 2]),
-      })
-
-    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
-        'language', vocabulary_list=['a', 'b', 'c']),)
-
-    # Check prediction for each sparse_combiner.
-    # With sparse_combiner = 'sum', we have
-    # logits_1 = w_a + w_c + bias
-    #          = 2.0 + 5.0 + 5.0 = 12.0
-    # logits_2 = w_b + w_c + bias
-    #          = 3.0 + 5.0 + 5.0 = 13.0
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir)
-    predictions = linear_regressor.predict(input_fn=_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    self.assertAllClose([[12.0], [13.0]], predicted_scores)
-
-    # With sparse_combiner = 'mean', we have
-    # logits_1 = 1/2 * (w_a + w_c) + bias
-    #          = 1/2 * (2.0 + 5.0) + 5.0 = 8.5
-    # logits_2 = 1/2 * (w_b + w_c) + bias
-    #          = 1/2 * (3.0 + 5.0) + 5.0 = 9.0
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir,
-        sparse_combiner='mean')
-    predictions = linear_regressor.predict(input_fn=_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    self.assertAllClose([[8.5], [9.0]], predicted_scores)
-
-    # With sparse_combiner = 'sqrtn', we have
-    # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias
-    #          = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974
-    # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias
-    #          = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir,
-        sparse_combiner='sqrtn')
-    predictions = linear_regressor.predict(input_fn=_input_fn)
-    predicted_scores = list([x['predictions'] for x in predictions])
-    self.assertAllClose([[9.94974], [10.65685]], predicted_scores)
-
-
-class BaseLinearRegressorIntegrationTest(object):
-
-  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
-    self._linear_regressor_fn = linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
-                          input_dimension, label_dimension, prediction_length):
-    feature_columns = [
-        self._fc_lib.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    # learn y = x
-    est.train(train_input_fn, steps=200)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array(
-        [x['predictions'] for x in est.predict(predict_input_fn)])
-    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
-
-    # EXPORT
-    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def test_numpy_input_fn(self):
-    """Tests complete flow with numpy_input_fn."""
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-  def test_pandas_input_fn(self):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-
-    # Pandas DataFrame natually supports 1 dim data only.
-    label_dimension = 1
-    input_dimension = label_dimension
-    batch_size = 10
-    data = np.array([1., 2., 3., 4.], dtype=np.float32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(data)
-    prediction_length = 4
-
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-  def test_input_fn_from_parse_example(self):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-
-    serialized_examples = []
-    for datum in data:
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum)),
-              'y':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=datum[:label_dimension])),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        label_dimension=label_dimension,
-        prediction_length=prediction_length)
-
-
-class BaseLinearRegressorTrainingTest(object):
-
-  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
-    self._linear_regressor_fn = linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s/part_0:0' % AGE_WEIGHT_NAME,
-        '%s/part_0:0' % BIAS_NAME
-    ]
-
-    def _minimize(loss, global_step=None, var_list=None):
-      trainable_vars = var_list or ops.get_collection(
-          ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(expected_var_names,
-                            [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-      assert_loss = assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        if global_step is not None:
-          return state_ops.assign_add(global_step, 1).op
-        return control_flow_ops.no_op()
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer_lib.Optimizer,
-        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def _assert_checkpoint(self,
-                         expected_global_step,
-                         expected_age_weight=None,
-                         expected_bias=None):
-    shapes = {
-        name: shape
-        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(expected_global_step,
-                     checkpoint_utils.load_variable(self._model_dir,
-                                                    ops.GraphKeys.GLOBAL_STEP))
-
-    self.assertEqual([1, 1], shapes[AGE_WEIGHT_NAME])
-    if expected_age_weight is not None:
-      self.assertEqual(expected_age_weight,
-                       checkpoint_utils.load_variable(self._model_dir,
-                                                      AGE_WEIGHT_NAME))
-
-    self.assertEqual([1], shapes[BIAS_NAME])
-    if expected_bias is not None:
-      self.assertEqual(expected_bias,
-                       checkpoint_utils.load_variable(self._model_dir,
-                                                      BIAS_NAME))
-
-  def testFromScratchWithDefaultOptimizer(self):
-    # Create LinearRegressor.
-    label = 5.
-    age = 17
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    linear_regressor.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self._assert_checkpoint(num_steps)
-
-  def testTrainWithOneDimLabel(self):
-    label_dimension = 1
-    batch_size = 20
-    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
-    est = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir)
-    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
-    self.assertEqual((batch_size,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(200)
-
-  def testTrainWithOneDimWeight(self):
-    label_dimension = 1
-    batch_size = 20
-    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
-    est = self._linear_regressor_fn(
-        feature_columns=feature_columns,
-        label_dimension=label_dimension,
-        weight_column='w',
-        model_dir=self._model_dir)
-
-    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
-    self.assertEqual((batch_size,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1,
-           'w': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(200)
-
-  def testFromScratch(self):
-    # Create LinearRegressor.
-    label = 5.
-    age = 17
-    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
-    mock_optimizer = self._mock_optimizer(expected_loss=25.)
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    linear_regressor.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        expected_global_step=num_steps,
-        expected_age_weight=0.,
-        expected_bias=0.)
-
-  def testFromCheckpoint(self):
-    # Create initial checkpoint.
-    age_weight = 10.0
-    bias = 5.0
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([bias], name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = age * age_weight + bias = 17 * 10. + 5. = 175
-    # loss = (logits - label)^2 = (175 - 5)^2 = 28900
-    mock_optimizer = self._mock_optimizer(expected_loss=28900.)
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    linear_regressor.train(
-        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        expected_global_step=initial_global_step + num_steps,
-        expected_age_weight=age_weight,
-        expected_bias=bias)
-
-  def testFromCheckpointMultiBatch(self):
-    # Create initial checkpoint.
-    age_weight = 10.0
-    bias = 5.0
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
-      variables_lib.Variable([bias], name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = age * age_weight + bias
-    # logits[0] = 17 * 10. + 5. = 175
-    # logits[1] = 15 * 10. + 5. = 155
-    # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004
-    mock_optimizer = self._mock_optimizer(expected_loss=52004.)
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        model_dir=self._model_dir,
-        optimizer=mock_optimizer)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    linear_regressor.train(
-        input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))),
-        steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        expected_global_step=initial_global_step + num_steps,
-        expected_age_weight=age_weight,
-        expected_bias=bias)
-
-
-class BaseLinearClassifierTrainingTest(object):
-
-  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
-    self._linear_classifier_fn = linear_classifier_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _mock_optimizer(self, expected_loss=None):
-    expected_var_names = [
-        '%s/part_0:0' % AGE_WEIGHT_NAME,
-        '%s/part_0:0' % BIAS_NAME
-    ]
-
-    def _minimize(loss, global_step):
-      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      self.assertItemsEqual(
-          expected_var_names,
-          [var.name for var in trainable_vars])
-
-      # Verify loss. We can't check the value directly, so we add an assert op.
-      self.assertEquals(0, loss.shape.ndims)
-      if expected_loss is None:
-        return state_ops.assign_add(global_step, 1).op
-      assert_loss = assert_close(
-          math_ops.to_float(expected_loss, name='expected'),
-          loss,
-          name='assert_loss')
-      with ops.control_dependencies((assert_loss,)):
-        return state_ops.assign_add(global_step, 1).op
-
-    mock_optimizer = test.mock.NonCallableMock(
-        spec=optimizer_lib.Optimizer,
-        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
-    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
-
-    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
-    # So, return mock_optimizer itself for deepcopy.
-    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
-    return mock_optimizer
-
-  def _assert_checkpoint(
-      self, n_classes, expected_global_step, expected_age_weight=None,
-      expected_bias=None):
-    logits_dimension = n_classes if n_classes > 2 else 1
-
-    shapes = {
-        name: shape for (name, shape) in
-        checkpoint_utils.list_variables(self._model_dir)
-    }
-
-    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
-    self.assertEqual(
-        expected_global_step,
-        checkpoint_utils.load_variable(
-            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
-
-    self.assertEqual([1, logits_dimension],
-                     shapes[AGE_WEIGHT_NAME])
-    if expected_age_weight is not None:
-      self.assertAllEqual(expected_age_weight,
-                          checkpoint_utils.load_variable(
-                              self._model_dir,
-                              AGE_WEIGHT_NAME))
-
-    self.assertEqual([logits_dimension], shapes[BIAS_NAME])
-    if expected_bias is not None:
-      self.assertAllEqual(expected_bias,
-                          checkpoint_utils.load_variable(
-                              self._model_dir, BIAS_NAME))
-
-  def _testFromScratchWithDefaultOptimizer(self, n_classes):
-    label = 0
-    age = 17
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # Train for a few steps, and validate final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self._assert_checkpoint(n_classes, num_steps)
-
-  def testBinaryClassesFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=2)
-
-  def testMultiClassesFromScratchWithDefaultOptimizer(self):
-    self._testFromScratchWithDefaultOptimizer(n_classes=4)
-
-  def _testTrainWithTwoDimsLabel(self, n_classes):
-    batch_size = 20
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    data_rank_2 = np.array([[0], [1]])
-    self.assertEqual((2,), data_rank_1.shape)
-    self.assertEqual((2, 1), data_rank_2.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_2,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithTwoDimsLabel(self):
-    self._testTrainWithTwoDimsLabel(n_classes=2)
-
-  def testMultiClassesTrainWithTwoDimsLabel(self):
-    self._testTrainWithTwoDimsLabel(n_classes=4)
-
-  def _testTrainWithOneDimLabel(self, n_classes):
-    batch_size = 20
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    self.assertEqual((2,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1},
-        y=data_rank_1,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithOneDimLabel(self):
-    self._testTrainWithOneDimLabel(n_classes=2)
-
-  def testMultiClassesTrainWithOneDimLabel(self):
-    self._testTrainWithOneDimLabel(n_classes=4)
-
-  def _testTrainWithTwoDimsWeight(self, n_classes):
-    batch_size = 20
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        weight_column='w',
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    data_rank_2 = np.array([[0], [1]])
-    self.assertEqual((2,), data_rank_1.shape)
-    self.assertEqual((2, 1), data_rank_2.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1,
-        batch_size=batch_size, num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithTwoDimsWeight(self):
-    self._testTrainWithTwoDimsWeight(n_classes=2)
-
-  def testMultiClassesTrainWithTwoDimsWeight(self):
-    self._testTrainWithTwoDimsWeight(n_classes=4)
-
-  def _testTrainWithOneDimWeight(self, n_classes):
-    batch_size = 20
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        weight_column='w',
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    data_rank_1 = np.array([0, 1])
-    self.assertEqual((2,), data_rank_1.shape)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1,
-        batch_size=batch_size, num_epochs=None,
-        shuffle=True)
-    est.train(train_input_fn, steps=200)
-    self._assert_checkpoint(n_classes, 200)
-
-  def testBinaryClassesTrainWithOneDimWeight(self):
-    self._testTrainWithOneDimWeight(n_classes=2)
-
-  def testMultiClassesTrainWithOneDimWeight(self):
-    self._testTrainWithOneDimWeight(n_classes=4)
-
-  def _testFromScratch(self, n_classes):
-    label = 1
-    age = 17
-    # For binary classifier:
-    #   loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are
-    #   all zero initially) and label = 1 so,
-    #      loss = 1 * -log ( sigmoid(logits) ) = 0.69315
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label) where logits are all 0s (weights are
-    #   all zero initially) and label = 1 so,
-    #      loss = 1 * -log ( 1.0 / n_classes )
-    # For this particular test case, as logits are same, the formular
-    # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases.
-    mock_optimizer = self._mock_optimizer(
-        expected_loss=-1 * math.log(1.0/n_classes))
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=num_steps,
-        expected_age_weight=[[0.]] if n_classes == 2 else [[0.] * n_classes],
-        expected_bias=[0.] if n_classes == 2 else [.0] * n_classes)
-
-  def testBinaryClassesFromScratch(self):
-    self._testFromScratch(n_classes=2)
-
-  def testMultiClassesFromScratch(self):
-    self._testFromScratch(n_classes=4)
-
-  def _testFromCheckpoint(self, n_classes):
-    # Create initial checkpoint.
-    label = 1
-    age = 17
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[2.0]] if n_classes == 2 else (
-        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # For binary classifier:
-    #   logits = age * age_weight + bias = 17 * 2. - 35. = -1.
-    #   loss = sigmoid_cross_entropy(logits, label)
-    #   so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label)
-    #   where logits = 17 * age_weight + bias and label = 1
-    #   so, loss = 1 * -log ( soft_max(logits)[1] )
-    if n_classes == 2:
-      expected_loss = 1.3133
-    else:
-      logits = age_weight * age + bias
-      logits_exp = np.exp(logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_loss = -1 * math.log(softmax[0, label])
-
-    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=initial_global_step + num_steps,
-        expected_age_weight=age_weight,
-        expected_bias=bias)
-
-  def testBinaryClassesFromCheckpoint(self):
-    self._testFromCheckpoint(n_classes=2)
-
-  def testMultiClassesFromCheckpoint(self):
-    self._testFromCheckpoint(n_classes=4)
-
-  def _testFromCheckpointFloatLabels(self, n_classes):
-    """Tests float labels for binary classification."""
-    # Create initial checkpoint.
-    if n_classes > 2:
-      return
-    label = 0.8
-    age = 17
-    age_weight = [[2.0]]
-    bias = [-35.0]
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # logits = age * age_weight + bias = 17 * 2. - 35. = -1.
-    # loss = sigmoid_cross_entropy(logits, label)
-    # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617
-    mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-
-  def testBinaryClassesFromCheckpointFloatLabels(self):
-    self._testFromCheckpointFloatLabels(n_classes=2)
-
-  def testMultiClassesFromCheckpointFloatLabels(self):
-    self._testFromCheckpointFloatLabels(n_classes=4)
-
-  def _testFromCheckpointMultiBatch(self, n_classes):
-    # Create initial checkpoint.
-    label = [1, 0]
-    age = [17, 18.5]
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[2.0]] if n_classes == 2 else (
-        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    # For binary classifier:
-    #   logits = age * age_weight + bias
-    #   logits[0] = 17 * 2. - 35. = -1.
-    #   logits[1] = 18.5 * 2. - 35. = 2.
-    #   loss = sigmoid_cross_entropy(logits, label)
-    #   so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133
-    #       loss[1] = (1 - 0) * -log ( 1- sigmoid(2) ) = 2.1269
-    # For multi class classifier:
-    #   loss = cross_entropy(logits, label)
-    #   where logits = [17, 18.5] * age_weight + bias and label = [1, 0]
-    #   so, loss = 1 * -log ( soft_max(logits)[label] )
-    if n_classes == 2:
-      expected_loss = (1.3133 + 2.1269)
-    else:
-      logits = age_weight * np.reshape(age, (2, 1)) + bias
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      expected_loss = expected_loss_0 + expected_loss_1
-
-    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
-
-    est = linear.LinearClassifier(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        optimizer=mock_optimizer,
-        model_dir=self._model_dir)
-    self.assertEqual(0, mock_optimizer.minimize.call_count)
-
-    # Train for a few steps, and validate optimizer and final checkpoint.
-    num_steps = 10
-    est.train(
-        input_fn=lambda: ({'age': (age)}, (label)),
-        steps=num_steps)
-    self.assertEqual(1, mock_optimizer.minimize.call_count)
-    self._assert_checkpoint(
-        n_classes,
-        expected_global_step=initial_global_step + num_steps,
-        expected_age_weight=age_weight,
-        expected_bias=bias)
-
-  def testBinaryClassesFromCheckpointMultiBatch(self):
-    self._testFromCheckpointMultiBatch(n_classes=2)
-
-  def testMultiClassesFromCheckpointMultiBatch(self):
-    self._testFromCheckpointMultiBatch(n_classes=4)
-
-
-class BaseLinearClassifierEvaluationTest(object):
-
-  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
-    self._linear_classifier_fn = linear_classifier_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _test_evaluation_for_simple_data(self, n_classes):
-    label = 1
-    age = 1.
-
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[-11.0]] if n_classes == 2 else (
-        np.reshape(-11.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [-30.0] if n_classes == 2 else [-30.0] * n_classes
-
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = self._linear_classifier_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1)
-
-    if n_classes == 2:
-      # Binary classes: loss = sum(corss_entropy(41)) = 41.
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: 41.,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: 41.,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: 0.,
-          metric_keys.MetricKeys.LABEL_MEAN: 1.,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: 1,
-          metric_keys.MetricKeys.AUC: 0.,
-          metric_keys.MetricKeys.AUC_PR: 1.,
-      }
-    else:
-      # Multi classes: loss = 1 * -log ( soft_max(logits)[label] )
-      logits = age_weight * age + bias
-      logits_exp = np.exp(logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_loss = -1 * math.log(softmax[0, label])
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_for_simple_data(self):
-    self._test_evaluation_for_simple_data(n_classes=2)
-
-  def test_multi_classes_evaluation_for_simple_data(self):
-    self._test_evaluation_for_simple_data(n_classes=4)
-
-  def _test_evaluation_batch(self, n_classes):
-    """Tests evaluation for batch_size==2."""
-    label = [1, 0]
-    age = [17., 18.]
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[2.0]] if n_classes == 2 else (
-        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = self._linear_classifier_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': (age)}, (label)), steps=1)
-
-    if n_classes == 2:
-      # Logits are (-1., 1.) labels are (1, 0).
-      # Loss is
-      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
-      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133
-      expected_loss = 1.3133 * 2
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: 0.5,
-          metric_keys.MetricKeys.LABEL_MEAN: 0.5,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
-          metric_keys.MetricKeys.AUC: 0.,
-          metric_keys.MetricKeys.AUC_PR: 0.25,
-      }
-    else:
-      # Multi classes: loss = 1 * -log ( soft_max(logits)[label] )
-      logits = age_weight * np.reshape(age, (2, 1)) + bias
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      expected_loss = expected_loss_0 + expected_loss_1
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_batch(self):
-    self._test_evaluation_batch(n_classes=2)
-
-  def test_multi_classes_evaluation_batch(self):
-    self._test_evaluation_batch(n_classes=4)
-
-  def _test_evaluation_weights(self, n_classes):
-    """Tests evaluation with weights."""
-
-    label = [1, 0]
-    age = [17., 18.]
-    weights = [1., 2.]
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[2.0]] if n_classes == 2 else (
-        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
-    initial_global_step = 100
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(
-          initial_global_step,
-          name=ops.GraphKeys.GLOBAL_STEP,
-          dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = self._linear_classifier_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        n_classes=n_classes,
-        weight_column='w',
-        model_dir=self._model_dir)
-    eval_metrics = est.evaluate(
-        input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1)
-
-    if n_classes == 2:
-      # Logits are (-1., 1.) labels are (1, 0).
-      # Loss is
-      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
-      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133
-      #   weights = [1., 2.]
-      expected_loss = 1.3133 * (1. + 2.)
-      loss_mean = expected_loss / (1.0 + 2.0)
-      label_mean = np.average(label, weights=weights)
-      logits = [-1, 1]
-      logistics = sigmoid(np.array(logits))
-      predictions_mean = np.average(logistics, weights=weights)
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-          metric_keys.MetricKeys.PRECISION: 0.,
-          metric_keys.MetricKeys.RECALL: 0.,
-          metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean,
-          metric_keys.MetricKeys.LABEL_MEAN: label_mean,
-          metric_keys.MetricKeys.ACCURACY_BASELINE: (
-              max(label_mean, 1-label_mean)),
-          metric_keys.MetricKeys.AUC: 0.,
-          metric_keys.MetricKeys.AUC_PR: 0.1668,
-      }
-    else:
-      # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] )
-      logits = age_weight * np.reshape(age, (2, 1)) + bias
-      logits_exp = np.exp(logits)
-      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
-      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
-      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
-      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
-      loss_mean = np.average([expected_loss_0, expected_loss_1],
-                             weights=weights)
-      expected_loss = loss_mean * np.sum(weights)
-
-      expected_metrics = {
-          metric_keys.MetricKeys.LOSS: expected_loss,
-          ops.GraphKeys.GLOBAL_STEP: 100,
-          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
-          metric_keys.MetricKeys.ACCURACY: 0.,
-      }
-
-    self.assertAllClose(sorted_key_dict(expected_metrics),
-                        sorted_key_dict(eval_metrics), rtol=1e-3)
-
-  def test_binary_classes_evaluation_weights(self):
-    self._test_evaluation_weights(n_classes=2)
-
-  def test_multi_classes_evaluation_weights(self):
-    self._test_evaluation_weights(n_classes=4)
-
-
-class BaseLinearClassifierPredictTest(object):
-
-  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
-    self._linear_classifier_fn = linear_classifier_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _testPredictions(self, n_classes, label_vocabulary, label_output_fn):
-    """Tests predict when all variables are one-dimensional."""
-    age = 1.
-
-    # For binary case, the expected weight has shape (1,1). For multi class
-    # case, the shape is (1, n_classes). In order to test the weights, set
-    # weights as 2.0 * range(n_classes).
-    age_weight = [[-11.0]] if n_classes == 2 else (
-        np.reshape(-11.0 * np.array(list(range(n_classes)), dtype=np.float32),
-                   (1, n_classes)))
-    bias = [10.0] if n_classes == 2 else [10.0] * n_classes
-
-    with ops.Graph().as_default():
-      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
-      variables_lib.Variable(bias, name=BIAS_NAME)
-      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    est = self._linear_classifier_fn(
-        feature_columns=(self._fc_lib.numeric_column('age'),),
-        label_vocabulary=label_vocabulary,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'age': np.array([[age]])},
-        y=None,
-        batch_size=1,
-        num_epochs=1,
-        shuffle=False)
-    predictions = list(est.predict(input_fn=predict_input_fn))
-
-    if n_classes == 2:
-      scalar_logits = np.asscalar(
-          np.reshape(np.array(age_weight) * age + bias, (1,)))
-      two_classes_logits = [0, scalar_logits]
-      two_classes_logits_exp = np.exp(two_classes_logits)
-      softmax = two_classes_logits_exp / two_classes_logits_exp.sum()
-
-      expected_predictions = {
-          'class_ids': [0],
-          'classes': [label_output_fn(0)],
-          'logistic': [sigmoid(np.array(scalar_logits))],
-          'logits': [scalar_logits],
-          'probabilities': softmax,
-      }
-    else:
-      onedim_logits = np.reshape(np.array(age_weight) * age + bias, (-1,))
-      class_ids = onedim_logits.argmax()
-      logits_exp = np.exp(onedim_logits)
-      softmax = logits_exp / logits_exp.sum()
-      expected_predictions = {
-          'class_ids': [class_ids],
-          'classes': [label_output_fn(class_ids)],
-          'logits': onedim_logits,
-          'probabilities': softmax,
-      }
-
-    self.assertEqual(1, len(predictions))
-    # assertAllClose cannot handle byte type.
-    self.assertEqual(expected_predictions['classes'], predictions[0]['classes'])
-    expected_predictions.pop('classes')
-    predictions[0].pop('classes')
-    self.assertAllClose(sorted_key_dict(expected_predictions),
-                        sorted_key_dict(predictions[0]))
-
-  def testBinaryClassesWithoutLabelVocabulary(self):
-    n_classes = 2
-    self._testPredictions(n_classes,
-                          label_vocabulary=None,
-                          label_output_fn=lambda x: ('%s' % x).encode())
-
-  def testBinaryClassesWithLabelVocabulary(self):
-    n_classes = 2
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=['class_vocab_{}'.format(i)
-                          for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-  def testMultiClassesWithoutLabelVocabulary(self):
-    n_classes = 4
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=None,
-        label_output_fn=lambda x: ('%s' % x).encode())
-
-  def testMultiClassesWithLabelVocabulary(self):
-    n_classes = 4
-    self._testPredictions(
-        n_classes,
-        label_vocabulary=['class_vocab_{}'.format(i)
-                          for i in range(n_classes)],
-        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
-
-  def testSparseCombiner(self):
-    w_a = 2.0
-    w_b = 3.0
-    w_c = 5.0
-    bias = 5.0
-    with ops.Graph().as_default():
-      variables_lib.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME)
-      variables_lib.Variable([bias], name=BIAS_NAME)
-      variables_lib.Variable(1, name=ops.GraphKeys.GLOBAL_STEP,
-                             dtype=dtypes.int64)
-      save_variables_to_ckpt(self._model_dir)
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensors({
-          'language': sparse_tensor.SparseTensor(
-              values=['a', 'c', 'b', 'c'],
-              indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
-              dense_shape=[2, 2]),
-      })
-
-    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
-        'language', vocabulary_list=['a', 'b', 'c']),)
-
-    # Check prediction for each sparse_combiner.
-    # With sparse_combiner = 'sum', we have
-    # logits_1 = w_a + w_c + bias
-    #          = 2.0 + 5.0 + 5.0 = 12.0
-    # logits_2 = w_b + w_c + bias
-    #          = 3.0 + 5.0 + 5.0 = 13.0
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir)
-    predictions = linear_classifier.predict(input_fn=_input_fn)
-    predicted_scores = list([x['logits'] for x in predictions])
-    self.assertAllClose([[12.0], [13.0]], predicted_scores)
-
-    # With sparse_combiner = 'mean', we have
-    # logits_1 = 1/2 * (w_a + w_c) + bias
-    #          = 1/2 * (2.0 + 5.0) + 5.0 = 8.5
-    # logits_2 = 1/2 * (w_b + w_c) + bias
-    #          = 1/2 * (3.0 + 5.0) + 5.0 = 9.0
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir,
-        sparse_combiner='mean')
-    predictions = linear_classifier.predict(input_fn=_input_fn)
-    predicted_scores = list([x['logits'] for x in predictions])
-    self.assertAllClose([[8.5], [9.0]], predicted_scores)
-
-    # With sparse_combiner = 'sqrtn', we have
-    # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias
-    #          = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974
-    # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias
-    #          = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=feature_columns,
-        model_dir=self._model_dir,
-        sparse_combiner='sqrtn')
-    predictions = linear_classifier.predict(input_fn=_input_fn)
-    predicted_scores = list([x['logits'] for x in predictions])
-    self.assertAllClose([[9.94974], [10.65685]], predicted_scores)
-
-
-class BaseLinearClassifierIntegrationTest(object):
-
-  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
-    self._linear_classifier_fn = linear_classifier_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
-                          predict_input_fn, input_dimension, prediction_length):
-    feature_columns = [
-        self._fc_lib.numeric_column('x', shape=(input_dimension,))
-    ]
-    est = self._linear_classifier_fn(
-        feature_columns=feature_columns,
-        n_classes=n_classes,
-        model_dir=self._model_dir)
-
-    # TRAIN
-    # learn y = x
-    est.train(train_input_fn, steps=200)
-
-    # EVALUTE
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
-    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
-
-    # PREDICT
-    predictions = np.array(
-        [x['classes'] for x in est.predict(predict_input_fn)])
-    self.assertAllEqual((prediction_length, 1), predictions.shape)
-
-    # EXPORT
-    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def _test_numpy_input_fn(self, n_classes):
-    """Tests complete flow with numpy_input_fn."""
-    input_dimension = 4
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-    target = np.array([1] * batch_size)
-
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=target,
-        batch_size=batch_size,
-        num_epochs=None,
-        shuffle=True)
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=target,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data},
-        y=None,
-        batch_size=batch_size,
-        num_epochs=1,
-        shuffle=False)
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_numpy_input_fn(self):
-    self._test_numpy_input_fn(n_classes=2)
-
-  def test_multi_classes_numpy_input_fn(self):
-    self._test_numpy_input_fn(n_classes=4)
-
-  def _test_pandas_input_fn(self, n_classes):
-    """Tests complete flow with pandas_input_fn."""
-    if not HAS_PANDAS:
-      return
-
-    # Pandas DataFrame natually supports 1 dim data only.
-    input_dimension = 1
-    batch_size = 10
-    data = np.array([1., 2., 3., 4.], dtype=np.float32)
-    target = np.array([1, 0, 1, 0], dtype=np.int32)
-    x = pd.DataFrame({'x': data})
-    y = pd.Series(target)
-    prediction_length = 4
-
-    train_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
-    eval_input_fn = pandas_io.pandas_input_fn(
-        x=x, y=y, batch_size=batch_size, shuffle=False)
-    predict_input_fn = pandas_io.pandas_input_fn(
-        x=x, batch_size=batch_size, shuffle=False)
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=train_input_fn,
-        eval_input_fn=eval_input_fn,
-        predict_input_fn=predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_pandas_input_fn(self):
-    self._test_pandas_input_fn(n_classes=2)
-
-  def test_multi_classes_pandas_input_fn(self):
-    self._test_pandas_input_fn(n_classes=4)
-
-  def _test_input_fn_from_parse_example(self, n_classes):
-    """Tests complete flow with input_fn constructed from parse_example."""
-    input_dimension = 2
-    batch_size = 10
-    prediction_length = batch_size
-    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, input_dimension)
-    target = np.array([1] * batch_size, dtype=np.int64)
-
-    serialized_examples = []
-    for x, y in zip(data, target):
-      example = example_pb2.Example(features=feature_pb2.Features(
-          feature={
-              'x':
-                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
-                      value=x)),
-              'y':
-                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
-                      value=[y])),
-          }))
-      serialized_examples.append(example.SerializeToString())
-
-    feature_spec = {
-        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
-        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
-    }
-
-    def _train_input_fn():
-      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _eval_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      labels = features.pop('y')
-      return features, labels
-
-    def _predict_input_fn():
-      feature_map = parsing_ops.parse_example(
-          input_lib.limit_epochs(serialized_examples, num_epochs=1),
-          feature_spec)
-      features = queue_parsed_features(feature_map)
-      features.pop('y')
-      return features, None
-
-    self._test_complete_flow(
-        n_classes=n_classes,
-        train_input_fn=_train_input_fn,
-        eval_input_fn=_eval_input_fn,
-        predict_input_fn=_predict_input_fn,
-        input_dimension=input_dimension,
-        prediction_length=prediction_length)
-
-  def test_binary_classes_input_fn_from_parse_example(self):
-    self._test_input_fn_from_parse_example(n_classes=2)
-
-  def test_multi_classes_input_fn_from_parse_example(self):
-    self._test_input_fn_from_parse_example(n_classes=4)
-
-
-class BaseLinearLogitFnTest(object):
-
-  def __init__(self, fc_lib=feature_column):
-    self._fc_lib = fc_lib
-
-  def test_basic_logit_correctness(self):
-    """linear_logit_fn simply wraps feature_column_lib.linear_model."""
-    age = self._fc_lib.numeric_column('age')
-    with ops.Graph().as_default():
-      logit_fn = linear._linear_logit_fn_builder(units=2, feature_columns=[age])
-      logits = logit_fn(features={'age': [[23.], [31.]]})
-      bias_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                                    'linear_model/bias_weights')[0]
-      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                                   'linear_model/age')[0]
-      with tf_session.Session() as sess:
-        sess.run([variables_lib.global_variables_initializer()])
-        self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
-        sess.run(bias_var.assign([10., 5.]))
-        self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
-        sess.run(age_var.assign([[2.0, 3.0]]))
-        # [2 * 23 + 10, 3 * 23 + 5] = [56, 74].
-        # [2 * 31 + 10, 3 * 31 + 5] = [72, 98]
-        self.assertAllClose([[56., 74.], [72., 98.]], logits.eval())
-
-  def test_compute_fraction_of_zero(self):
-    """Tests the calculation of sparsity."""
-    if self._fc_lib != feature_column:
-      return
-    age = feature_column.numeric_column('age')
-    occupation = feature_column.categorical_column_with_hash_bucket(
-        'occupation', hash_bucket_size=5)
-    with ops.Graph().as_default():
-      cols_to_vars = {}
-      feature_column.linear_model(
-          features={
-              'age': [[23.], [31.]],
-              'occupation': [['doctor'], ['engineer']]
-          },
-          feature_columns=[age, occupation],
-          units=3,
-          cols_to_vars=cols_to_vars)
-      cols_to_vars.pop('bias')
-      fraction_zero = linear._compute_fraction_of_zero(cols_to_vars.values())
-      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                                   'linear_model/age')[0]
-      with tf_session.Session() as sess:
-        sess.run([variables_lib.global_variables_initializer()])
-        # Upon initialization, all variables will be zero.
-        self.assertAllClose(1, fraction_zero.eval())
-
-        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
-        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
-        # x 3-dim output) are zero.
-        self.assertAllClose(16. / 18., fraction_zero.eval())
-
-  def test_compute_fraction_of_zero_v2(self):
-    """Tests the calculation of sparsity."""
-    if self._fc_lib != feature_column_v2:
-      return
-
-    age = feature_column_v2.numeric_column('age')
-    occupation = feature_column_v2.categorical_column_with_hash_bucket(
-        'occupation', hash_bucket_size=5)
-    shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
-    with ops.Graph().as_default():
-      model = feature_column_v2.LinearModel(
-          feature_columns=[age, occupation],
-          units=3,
-          shared_state_manager=shared_state_manager)
-      features = {
-          'age': [[23.], [31.]],
-          'occupation': [['doctor'], ['engineer']]
-      }
-      model(features)
-      variables = model.variables
-      variables.remove(model.bias_variable)
-      variables.extend(shared_state_manager.variables)
-      fraction_zero = linear._compute_fraction_of_zero(variables)
-      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                                   'linear_model/age')[0]
-      with tf_session.Session() as sess:
-        sess.run([variables_lib.global_variables_initializer()])
-        # Upon initialization, all variables will be zero.
-        self.assertAllClose(1, fraction_zero.eval())
-
-        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
-        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
-        # x 3-dim output) are zero.
-        self.assertAllClose(16. / 18., fraction_zero.eval())
-
-
-class BaseLinearWarmStartingTest(object):
-
-  def __init__(self,
-               _linear_classifier_fn,
-               _linear_regressor_fn,
-               fc_lib=feature_column):
-    self._linear_classifier_fn = _linear_classifier_fn
-    self._linear_regressor_fn = _linear_regressor_fn
-    self._fc_lib = fc_lib
-
-  def setUp(self):
-    # Create a directory to save our old checkpoint and vocabularies to.
-    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
-
-    # Make a dummy input_fn.
-    def _input_fn():
-      features = {
-          'age': [[23.], [31.]],
-          'age_in_years': [[23.], [31.]],
-          'occupation': [['doctor'], ['consultant']]
-      }
-      return features, [0, 1]
-
-    self._input_fn = _input_fn
-
-  def tearDown(self):
-    # Clean up checkpoint / vocab dir.
-    writer_cache.FileWriterCache.clear()
-    shutil.rmtree(self._ckpt_and_vocab_dir)
-
-  def test_classifier_basic_warm_starting(self):
-    """Tests correctness of LinearClassifier default warm-start."""
-    age = self._fc_lib.numeric_column('age')
-
-    # Create a LinearClassifier and train to save a checkpoint.
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=[age],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second LinearClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_linear_classifier = self._linear_classifier_fn(
-        feature_columns=[age],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=linear_classifier.model_dir)
-
-    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_linear_classifier.get_variable_names():
-      self.assertAllClose(
-          linear_classifier.get_variable_value(variable_name),
-          warm_started_linear_classifier.get_variable_value(variable_name))
-
-  def test_regressor_basic_warm_starting(self):
-    """Tests correctness of LinearRegressor default warm-start."""
-    age = self._fc_lib.numeric_column('age')
-
-    # Create a LinearRegressor and train to save a checkpoint.
-    linear_regressor = self._linear_regressor_fn(
-        feature_columns=[age],
-        model_dir=self._ckpt_and_vocab_dir,
-        optimizer='SGD')
-    linear_regressor.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second LinearRegressor, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_linear_regressor = self._linear_regressor_fn(
-        feature_columns=[age],
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=linear_regressor.model_dir)
-
-    warm_started_linear_regressor.train(input_fn=self._input_fn, max_steps=1)
-    for variable_name in warm_started_linear_regressor.get_variable_names():
-      self.assertAllClose(
-          linear_regressor.get_variable_value(variable_name),
-          warm_started_linear_regressor.get_variable_value(variable_name))
-
-  def test_warm_starting_selective_variables(self):
-    """Tests selecting variables to warm-start."""
-    age = self._fc_lib.numeric_column('age')
-
-    # Create a LinearClassifier and train to save a checkpoint.
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=[age],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second LinearClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_linear_classifier = self._linear_classifier_fn(
-        feature_columns=[age],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        # The provided regular expression will only warm-start the age variable
-        # and not the bias.
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=linear_classifier.model_dir,
-            vars_to_warm_start='.*(age).*'))
-
-    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-    self.assertAllClose(
-        linear_classifier.get_variable_value(AGE_WEIGHT_NAME),
-        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
-    # Bias should still be zero from initialization.
-    self.assertAllClose(
-        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
-
-  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
-    """Tests warm-starting with vocab remapping and partitioning."""
-    vocab_list = ['doctor', 'lawyer', 'consultant']
-    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
-    with open(vocab_file, 'w') as f:
-      f.write('\n'.join(vocab_list))
-    occupation = self._fc_lib.categorical_column_with_vocabulary_file(
-        'occupation',
-        vocabulary_file=vocab_file,
-        vocabulary_size=len(vocab_list))
-
-    # Create a LinearClassifier and train to save a checkpoint.
-    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=[occupation],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD',
-        partitioner=partitioner)
-    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-
-    # Create a second LinearClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).  Use a new FeatureColumn with a
-    # different vocabulary for occupation.
-    new_vocab_list = ['doctor', 'consultant', 'engineer']
-    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
-                                  'new_occupation_vocab')
-    with open(new_vocab_file, 'w') as f:
-      f.write('\n'.join(new_vocab_list))
-    new_occupation = self._fc_lib.categorical_column_with_vocabulary_file(
-        'occupation',
-        vocabulary_file=new_vocab_file,
-        vocabulary_size=len(new_vocab_list))
-    # We can create our VocabInfo object from the new and old occupation
-    # FeatureColumn's.
-    occupation_vocab_info = estimator.VocabInfo(
-        new_vocab=new_occupation.vocabulary_file,
-        new_vocab_size=new_occupation.vocabulary_size,
-        num_oov_buckets=new_occupation.num_oov_buckets,
-        old_vocab=occupation.vocabulary_file,
-        old_vocab_size=occupation.vocabulary_size,
-        # Can't use constant_initializer with load_and_remap.  In practice,
-        # use a truncated normal initializer.
-        backup_initializer=init_ops.random_uniform_initializer(
-            minval=0.39, maxval=0.39))
-    warm_started_linear_classifier = self._linear_classifier_fn(
-        feature_columns=[occupation],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=linear_classifier.model_dir,
-            var_name_to_vocab_info={
-                OCCUPATION_WEIGHT_NAME: occupation_vocab_info
-            },
-            # Explicitly providing None here will only warm-start variables
-            # referenced in var_name_to_vocab_info (the bias will not be
-            # warm-started).
-            vars_to_warm_start=None),
-        partitioner=partitioner)
-
-    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-    # 'doctor' was ID-0 and still ID-0.
-    self.assertAllClose(
-        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[0, :],
-        warm_started_linear_classifier.get_variable_value(
-            OCCUPATION_WEIGHT_NAME)[0, :])
-    # 'consultant' was ID-2 and now ID-1.
-    self.assertAllClose(
-        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[2, :],
-        warm_started_linear_classifier.get_variable_value(
-            OCCUPATION_WEIGHT_NAME)[1, :])
-    # 'engineer' is a new entry and should be initialized with the
-    # backup_initializer in VocabInfo.
-    self.assertAllClose([0.39] * 4,
-                        warm_started_linear_classifier.get_variable_value(
-                            OCCUPATION_WEIGHT_NAME)[2, :])
-    # Bias should still be zero (from initialization logic).
-    self.assertAllClose(
-        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
-
-  def test_warm_starting_with_naming_change(self):
-    """Tests warm-starting with a Tensor name remapping."""
-    age_in_years = self._fc_lib.numeric_column('age_in_years')
-
-    # Create a LinearClassifier and train to save a checkpoint.
-    linear_classifier = self._linear_classifier_fn(
-        feature_columns=[age_in_years],
-        model_dir=self._ckpt_and_vocab_dir,
-        n_classes=4,
-        optimizer='SGD')
-    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+from tensorflow_estimator.python.estimator.canned import linear_testing_utils
 
-    # Create a second LinearClassifier, warm-started from the first.  Use a
-    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
-    # accumulator values that change).
-    warm_started_linear_classifier = self._linear_classifier_fn(
-        feature_columns=[self._fc_lib.numeric_column('age')],
-        n_classes=4,
-        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
-        # The 'age' variable correspond to the 'age_in_years' variable in the
-        # previous model.
-        warm_start_from=estimator.WarmStartSettings(
-            ckpt_to_initialize_from=linear_classifier.model_dir,
-            var_name_to_prev_var_name={
-                AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years')
-            }))
+# Include attrs that start with single underscore.
+linear_testing_utils.__all__ = [
+    s for s in dir(linear_testing_utils) if not s.startswith('__')
+]
 
-    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
-    self.assertAllClose(
-        linear_classifier.get_variable_value(
-            AGE_WEIGHT_NAME.replace('age', 'age_in_years')),
-        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
-    # The bias is also warm-started (with no name remapping).
-    self.assertAllClose(
-        linear_classifier.get_variable_value(BIAS_NAME),
-        warm_started_linear_classifier.get_variable_value(BIAS_NAME))
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.linear_testing_utils import *
diff --git a/tensorflow/python/estimator/canned/metric_keys.py b/tensorflow/python/estimator/canned/metric_keys.py
index 9d49240fea..959bb58e0c 100644
--- a/tensorflow/python/estimator/canned/metric_keys.py
+++ b/tensorflow/python/estimator/canned/metric_keys.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,43 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Enum for model prediction keys."""
+"""metric_keys python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.estimator import model_fn
-
-
-class MetricKeys(object):
-  """Metric key strings."""
-  LOSS = model_fn.LOSS_METRIC_KEY
-  LOSS_MEAN = model_fn.AVERAGE_LOSS_METRIC_KEY
-  LOSS_REGULARIZATION = 'regularization_loss'
-
-  ACCURACY = 'accuracy'
-  PRECISION = 'precision'
-  RECALL = 'recall'
-  # This is the best the model could do by always predicting one class.
-  # Should be < ACCURACY in a trained model.
-  ACCURACY_BASELINE = 'accuracy_baseline'
-  AUC = 'auc'
-  AUC_PR = 'auc_precision_recall'
-  LABEL_MEAN = 'label/mean'
-  PREDICTION_MEAN = 'prediction/mean'
-
-  # The following require a threshold applied, should be float in range (0, 1).
-  ACCURACY_AT_THRESHOLD = 'accuracy/positive_threshold_%g'
-  PRECISION_AT_THRESHOLD = 'precision/positive_threshold_%g'
-  RECALL_AT_THRESHOLD = 'recall/positive_threshold_%g'
+from tensorflow_estimator.python.estimator.canned import metric_keys
 
-  # The following require a class id applied.
-  PROBABILITY_MEAN_AT_CLASS = 'probability_mean/class%d'
-  AUC_AT_CLASS = 'auc/class%d'
-  AUC_PR_AT_CLASS = 'auc_precision_recall/class%d'
+# Include attrs that start with single underscore.
+metric_keys.__all__ = [s for s in dir(metric_keys) if not s.startswith('__')]
 
-  # The following require a class name applied.
-  PROBABILITY_MEAN_AT_NAME = 'probability_mean/%s'
-  AUC_AT_NAME = 'auc/%s'
-  AUC_PR_AT_NAME = 'auc_precision_recall/%s'
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.metric_keys import *
diff --git a/tensorflow/python/estimator/canned/optimizers.py b/tensorflow/python/estimator/canned/optimizers.py
index 8f51cc3a80..99b4c49ace 100644
--- a/tensorflow/python/estimator/canned/optimizers.py
+++ b/tensorflow/python/estimator/canned/optimizers.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,69 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Methods related to optimizers used in canned_estimators."""
+"""optimizers python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-
-from tensorflow.python.training import adagrad
-from tensorflow.python.training import adam
-from tensorflow.python.training import ftrl
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import rmsprop
-
-
-_OPTIMIZER_CLS_NAMES = {
-    'Adagrad': adagrad.AdagradOptimizer,
-    'Adam': adam.AdamOptimizer,
-    'Ftrl': ftrl.FtrlOptimizer,
-    'RMSProp': rmsprop.RMSPropOptimizer,
-    'SGD': gradient_descent.GradientDescentOptimizer,
-}
-
-
-def get_optimizer_instance(opt, learning_rate=None):
-  """Returns an optimizer instance.
-
-  Supports the following types for the given `opt`:
-  * An `Optimizer` instance: Returns the given `opt`.
-  * A string: Creates an `Optimizer` subclass with the given `learning_rate`.
-    Supported strings:
-    * 'Adagrad': Returns an `AdagradOptimizer`.
-    * 'Adam': Returns an `AdamOptimizer`.
-    * 'Ftrl': Returns an `FtrlOptimizer`.
-    * 'RMSProp': Returns an `RMSPropOptimizer`.
-    * 'SGD': Returns a `GradientDescentOptimizer`.
-
-  Args:
-    opt: An `Optimizer` instance, or string, as discussed above.
-    learning_rate: A float. Only used if `opt` is a string.
+from tensorflow_estimator.python.estimator.canned import optimizers
 
-  Returns:
-    An `Optimizer` instance.
+# Include attrs that start with single underscore.
+optimizers.__all__ = [s for s in dir(optimizers) if not s.startswith('__')]
 
-  Raises:
-    ValueError: If `opt` is an unsupported string.
-    ValueError: If `opt` is a supported string but `learning_rate` was not
-      specified.
-    ValueError: If `opt` is none of the above types.
-  """
-  if isinstance(opt, six.string_types):
-    if opt in six.iterkeys(_OPTIMIZER_CLS_NAMES):
-      if not learning_rate:
-        raise ValueError('learning_rate must be specified when opt is string.')
-      return _OPTIMIZER_CLS_NAMES[opt](learning_rate=learning_rate)
-    raise ValueError(
-        'Unsupported optimizer name: {}. Supported names are: {}'.format(
-            opt, tuple(sorted(six.iterkeys(_OPTIMIZER_CLS_NAMES)))))
-  if callable(opt):
-    opt = opt()
-  if not isinstance(opt, optimizer_lib.Optimizer):
-    raise ValueError(
-        'The given object is not an Optimizer instance. Given: {}'.format(opt))
-  return opt
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.optimizers import *
diff --git a/tensorflow/python/estimator/canned/optimizers_test.py b/tensorflow/python/estimator/canned/optimizers_test.py
deleted file mode 100644
index eadabdbc49..0000000000
--- a/tensorflow/python/estimator/canned/optimizers_test.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for optimizers.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.platform import test
-from tensorflow.python.training import adagrad
-from tensorflow.python.training import adam
-from tensorflow.python.training import ftrl
-from tensorflow.python.training import gradient_descent
-from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import rmsprop
-
-
-class _TestOptimizer(optimizer_lib.Optimizer):
-
-  def __init__(self):
-    super(_TestOptimizer, self).__init__(
-        use_locking=False, name='TestOptimizer')
-
-
-class GetOptimizerInstance(test.TestCase):
-
-  def test_unsupported_name(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'Unsupported optimizer name: unsupported_name'):
-      optimizers.get_optimizer_instance('unsupported_name', learning_rate=0.1)
-
-  def test_supported_name_but_learning_rate_none(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'learning_rate must be specified when opt is string'):
-      optimizers.get_optimizer_instance('Adagrad', learning_rate=None)
-
-  def test_adagrad(self):
-    opt = optimizers.get_optimizer_instance('Adagrad', learning_rate=0.1)
-    self.assertIsInstance(opt, adagrad.AdagradOptimizer)
-    self.assertAlmostEqual(0.1, opt._learning_rate)
-
-  def test_adam(self):
-    opt = optimizers.get_optimizer_instance('Adam', learning_rate=0.1)
-    self.assertIsInstance(opt, adam.AdamOptimizer)
-    self.assertAlmostEqual(0.1, opt._lr)
-
-  def test_ftrl(self):
-    opt = optimizers.get_optimizer_instance('Ftrl', learning_rate=0.1)
-    self.assertIsInstance(opt, ftrl.FtrlOptimizer)
-    self.assertAlmostEqual(0.1, opt._learning_rate)
-
-  def test_rmsprop(self):
-    opt = optimizers.get_optimizer_instance('RMSProp', learning_rate=0.1)
-    self.assertIsInstance(opt, rmsprop.RMSPropOptimizer)
-    self.assertAlmostEqual(0.1, opt._learning_rate)
-
-  def test_sgd(self):
-    opt = optimizers.get_optimizer_instance('SGD', learning_rate=0.1)
-    self.assertIsInstance(opt, gradient_descent.GradientDescentOptimizer)
-    self.assertAlmostEqual(0.1, opt._learning_rate)
-
-  def test_object(self):
-    opt = optimizers.get_optimizer_instance(_TestOptimizer())
-    self.assertIsInstance(opt, _TestOptimizer)
-
-  def test_object_invalid(self):
-    with self.assertRaisesRegexp(
-        ValueError, 'The given object is not an Optimizer instance'):
-      optimizers.get_optimizer_instance((1, 2, 3))
-
-  def test_callable(self):
-    def _optimizer_fn():
-      return _TestOptimizer()
-    opt = optimizers.get_optimizer_instance(_optimizer_fn)
-    self.assertIsInstance(opt, _TestOptimizer)
-
-  def test_lambda(self):
-    opt = optimizers.get_optimizer_instance(lambda: _TestOptimizer())  # pylint: disable=unnecessary-lambda
-    self.assertIsInstance(opt, _TestOptimizer)
-
-  def test_callable_returns_invalid(self):
-    def _optimizer_fn():
-      return (1, 2, 3)
-    with self.assertRaisesRegexp(
-        ValueError, 'The given object is not an Optimizer instance'):
-      optimizers.get_optimizer_instance(_optimizer_fn)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/parsing_utils.py b/tensorflow/python/estimator/canned/parsing_utils.py
index 1ae0f1e9f7..74bb3158be 100644
--- a/tensorflow/python/estimator/canned/parsing_utils.py
+++ b/tensorflow/python/estimator/canned/parsing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,291 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Parsing related helper function to be used in `input_fn`."""
+"""parsing_utils python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
-
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.util.tf_export import estimator_export
-
-
-@estimator_export('estimator.classifier_parse_example_spec')
-def classifier_parse_example_spec(feature_columns,
-                                  label_key,
-                                  label_dtype=dtypes.int64,
-                                  label_default=None,
-                                  weight_column=None):
-  """Generates parsing spec for tf.parse_example to be used with classifiers.
-
-  If users keep data in tf.Example format, they need to call tf.parse_example
-  with a proper feature spec. There are two main things that this utility helps:
-
-  * Users need to combine parsing spec of features with labels and weights
-    (if any) since they are all parsed from same tf.Example instance. This
-    utility combines these specs.
-  * It is difficult to map expected label by a classifier such as
-    `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes
-    it by getting related information from users (key, dtype).
-
-  Example output of parsing spec:
-
-  ```python
-  # Define features and transformations
-  feature_b = tf.feature_column.numeric_column(...)
-  feature_c_bucketized = tf.feature_column.bucketized_column(
-    tf.feature_column.numeric_column("feature_c"), ...)
-  feature_a_x_feature_c = tf.feature_column.crossed_column(
-      columns=["feature_a", feature_c_bucketized], ...)
-
-  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
-  parsing_spec = tf.estimator.classifier_parse_example_spec(
-      feature_columns, label_key='my-label', label_dtype=tf.string)
-
-  # For the above example, classifier_parse_example_spec would return the dict:
-  assert parsing_spec == {
-    "feature_a": parsing_ops.VarLenFeature(tf.string),
-    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
-    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
-    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string)
-  }
-  ```
-
-  Example usage with a classifier:
-
-  ```python
-  feature_columns = # define features via tf.feature_column
-  estimator = DNNClassifier(
-      n_classes=1000,
-      feature_columns=feature_columns,
-      weight_column='example-weight',
-      label_vocabulary=['photos', 'keep', ...],
-      hidden_units=[256, 64, 16])
-  # This label configuration tells the classifier the following:
-  # * weights are retrieved with key 'example-weight'
-  # * label is string and can be one of the following ['photos', 'keep', ...]
-  # * integer id for label 'photos' is 0, 'keep' is 1, ...
-
-
-  # Input builders
-  def input_fn_train():  # Returns a tuple of features and labels.
-    features = tf.contrib.learn.read_keyed_batch_features(
-        file_pattern=train_files,
-        batch_size=batch_size,
-        # creates parsing configuration for tf.parse_example
-        features=tf.estimator.classifier_parse_example_spec(
-            feature_columns,
-            label_key='my-label',
-            label_dtype=tf.string,
-            weight_column='example-weight'),
-        reader=tf.RecordIOReader)
-     labels = features.pop('my-label')
-     return features, labels
-
-  estimator.train(input_fn=input_fn_train)
-  ```
-
-  Args:
-    feature_columns: An iterable containing all feature columns. All items
-      should be instances of classes derived from `_FeatureColumn`.
-    label_key: A string identifying the label. It means tf.Example stores labels
-      with this key.
-    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
-      `tf.int64`. If user defines a `label_vocabulary`, this should be set as
-      `tf.string`. `tf.float32` labels are only supported for binary
-      classification.
-    label_default: used as label if label_key does not exist in given
-      tf.Example. An example usage: let's say `label_key` is 'clicked' and
-      tf.Example contains clicked data only for positive examples in following
-      format `key:clicked, value:1`. This means that if there is no data with
-      key 'clicked' it should count as negative example by setting
-      `label_deafault=0`. Type of this value should be compatible with
-      `label_dtype`.
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-      then weight_column.normalizer_fn is applied on it to get weight tensor.
-
-  Returns:
-    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
-    value.
-
-  Raises:
-    ValueError: If label is used in `feature_columns`.
-    ValueError: If weight_column is used in `feature_columns`.
-    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
-      instance.
-    ValueError: If `weight_column` is not a `_NumericColumn` instance.
-    ValueError: if label_key is None.
-  """
-  parsing_spec = fc.make_parse_example_spec(feature_columns)
-  if label_key in parsing_spec:
-    raise ValueError('label should not be used as feature. '
-                     'label_key: {}, features: {}'.format(
-                         label_key, parsing_spec.keys()))
-  parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype,
-                                                        label_default)
-
-  if weight_column is None:
-    return parsing_spec
-
-  if isinstance(weight_column, six.string_types):
-    weight_column = fc.numeric_column(weight_column)
-
-  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
-    raise ValueError('weight_column should be an instance of '
-                     'tf.feature_column.numeric_column. '
-                     'Given type: {} value: {}'.format(
-                         type(weight_column), weight_column))
-
-  if weight_column.key in parsing_spec:
-    raise ValueError('weight_column should not be used as feature. '
-                     'weight_column: {}, features: {}'.format(
-                         weight_column.key, parsing_spec.keys()))
-
-  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
-  return parsing_spec
-
-
-@estimator_export('estimator.regressor_parse_example_spec')
-def regressor_parse_example_spec(feature_columns,
-                                 label_key,
-                                 label_dtype=dtypes.float32,
-                                 label_default=None,
-                                 label_dimension=1,
-                                 weight_column=None):
-  """Generates parsing spec for tf.parse_example to be used with regressors.
-
-  If users keep data in tf.Example format, they need to call tf.parse_example
-  with a proper feature spec. There are two main things that this utility helps:
-
-  * Users need to combine parsing spec of features with labels and weights
-    (if any) since they are all parsed from same tf.Example instance. This
-    utility combines these specs.
-  * It is difficult to map expected label by a regressor such as `DNNRegressor`
-    to corresponding tf.parse_example spec. This utility encodes it by getting
-    related information from users (key, dtype).
-
-  Example output of parsing spec:
-
-  ```python
-  # Define features and transformations
-  feature_b = tf.feature_column.numeric_column(...)
-  feature_c_bucketized = tf.feature_column.bucketized_column(
-    tf.feature_column.numeric_column("feature_c"), ...)
-  feature_a_x_feature_c = tf.feature_column.crossed_column(
-      columns=["feature_a", feature_c_bucketized], ...)
-
-  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
-  parsing_spec = tf.estimator.regressor_parse_example_spec(
-      feature_columns, label_key='my-label')
-
-  # For the above example, regressor_parse_example_spec would return the dict:
-  assert parsing_spec == {
-    "feature_a": parsing_ops.VarLenFeature(tf.string),
-    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
-    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
-    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32)
-  }
-  ```
-
-  Example usage with a regressor:
-
-  ```python
-  feature_columns = # define features via tf.feature_column
-  estimator = DNNRegressor(
-      hidden_units=[256, 64, 16],
-      feature_columns=feature_columns,
-      weight_column='example-weight',
-      label_dimension=3)
-  # This label configuration tells the regressor the following:
-  # * weights are retrieved with key 'example-weight'
-  # * label is a 3 dimension tensor with float32 dtype.
-
-
-  # Input builders
-  def input_fn_train():  # Returns a tuple of features and labels.
-    features = tf.contrib.learn.read_keyed_batch_features(
-        file_pattern=train_files,
-        batch_size=batch_size,
-        # creates parsing configuration for tf.parse_example
-        features=tf.estimator.classifier_parse_example_spec(
-            feature_columns,
-            label_key='my-label',
-            label_dimension=3,
-            weight_column='example-weight'),
-        reader=tf.RecordIOReader)
-     labels = features.pop('my-label')
-     return features, labels
-
-  estimator.train(input_fn=input_fn_train)
-  ```
-
-  Args:
-    feature_columns: An iterable containing all feature columns. All items
-      should be instances of classes derived from `_FeatureColumn`.
-    label_key: A string identifying the label. It means tf.Example stores labels
-      with this key.
-    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
-      `tf.float32`.
-    label_default: used as label if label_key does not exist in given
-      tf.Example. By default default_value is none, which means
-      `tf.parse_example` will error out if there is any missing label.
-    label_dimension: Number of regression targets per example. This is the
-      size of the last dimension of the labels and logits `Tensor` objects
-      (typically, these have shape `[batch_size, label_dimension]`).
-    weight_column: A string or a `_NumericColumn` created by
-      `tf.feature_column.numeric_column` defining feature column representing
-      weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example. If it is a string, it is
-      used as a key to fetch weight tensor from the `features`. If it is a
-      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
-      then weight_column.normalizer_fn is applied on it to get weight tensor.
-
-  Returns:
-    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
-    value.
-
-  Raises:
-    ValueError: If label is used in `feature_columns`.
-    ValueError: If weight_column is used in `feature_columns`.
-    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
-      instance.
-    ValueError: If `weight_column` is not a `_NumericColumn` instance.
-    ValueError: if label_key is None.
-  """
-  parsing_spec = fc.make_parse_example_spec(feature_columns)
-  if label_key in parsing_spec:
-    raise ValueError('label should not be used as feature. '
-                     'label_key: {}, features: {}'.format(
-                         label_key, parsing_spec.keys()))
-  parsing_spec[label_key] = parsing_ops.FixedLenFeature(
-      (label_dimension,), label_dtype, label_default)
-
-  if weight_column is None:
-    return parsing_spec
-
-  if isinstance(weight_column, six.string_types):
-    weight_column = fc.numeric_column(weight_column)
-
-  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
-    raise ValueError('weight_column should be an instance of '
-                     'tf.feature_column.numeric_column. '
-                     'Given type: {} value: {}'.format(
-                         type(weight_column), weight_column))
+from tensorflow_estimator.python.estimator.canned import parsing_utils
 
-  if weight_column.key in parsing_spec:
-    raise ValueError('weight_column should not be used as feature. '
-                     'weight_column: {}, features: {}'.format(
-                         weight_column.key, parsing_spec.keys()))
+# Include attrs that start with single underscore.
+parsing_utils.__all__ = [
+    s for s in dir(parsing_utils) if not s.startswith('__')
+]
 
-  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
-  return parsing_spec
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.parsing_utils import *
diff --git a/tensorflow/python/estimator/canned/parsing_utils_test.py b/tensorflow/python/estimator/canned/parsing_utils_test.py
deleted file mode 100644
index 366bb104ca..0000000000
--- a/tensorflow/python/estimator/canned/parsing_utils_test.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for parsing_utils.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.estimator.canned import parsing_utils
-from tensorflow.python.feature_column import feature_column as fc
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import test
-
-
-class ClassifierParseExampleSpec(test.TestCase):
-  """Tests tf.estimator.classifier_parse_example_spec."""
-
-  def test_defaults(self):
-    parsing_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')], label_key='b')
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_string(self):
-    parsing_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        label_dtype=dtypes.string)
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.string),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  # TODO(ispir): test label_default_value compatibility with label_dtype
-  def test_label_default_value(self):
-    parsing_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        label_default=0)
-    expected_spec = {
-        'a':
-            parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b':
-            parsing_ops.FixedLenFeature(
-                (1,), dtype=dtypes.int64, default_value=0),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_weight_column_as_string(self):
-    parsing_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        weight_column='c')
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
-        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_weight_column_as_numeric_column(self):
-    parsing_spec = parsing_utils.classifier_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        weight_column=fc.numeric_column('c'))
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
-        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_label_key_should_not_be_used_as_feature(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'label should not be used as feature'):
-      parsing_utils.classifier_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')], label_key='a')
-
-  def test_weight_column_should_not_be_used_as_feature(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'weight_column should not be used as feature'):
-      parsing_utils.classifier_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')],
-          label_key='b',
-          weight_column=fc.numeric_column('a'))
-
-  def test_weight_column_should_be_a_numeric_column(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'tf.feature_column.numeric_column'):
-      not_a_numeric_column = 3
-      parsing_utils.classifier_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')],
-          label_key='b',
-          weight_column=not_a_numeric_column)
-
-
-class RegressorParseExampleSpec(test.TestCase):
-  """Tests tf.estimator.classifier_parse_example_spec."""
-
-  def test_defaults(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')], label_key='b')
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_int64(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        label_dtype=dtypes.int64)
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_label_default_value(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        label_default=0.)
-    expected_spec = {
-        'a':
-            parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b':
-            parsing_ops.FixedLenFeature(
-                (1,), dtype=dtypes.float32, default_value=0.),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_label_dimension(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        label_dimension=3)
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((3,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_weight_column_as_string(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        weight_column='c')
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_weight_column_as_numeric_column(self):
-    parsing_spec = parsing_utils.regressor_parse_example_spec(
-        feature_columns=[fc.numeric_column('a')],
-        label_key='b',
-        weight_column=fc.numeric_column('c'))
-    expected_spec = {
-        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
-    }
-    self.assertDictEqual(expected_spec, parsing_spec)
-
-  def test_label_key_should_not_be_used_as_feature(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'label should not be used as feature'):
-      parsing_utils.regressor_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')], label_key='a')
-
-  def test_weight_column_should_not_be_used_as_feature(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'weight_column should not be used as feature'):
-      parsing_utils.regressor_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')],
-          label_key='b',
-          weight_column=fc.numeric_column('a'))
-
-  def test_weight_column_should_be_a_numeric_column(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'tf.feature_column.numeric_column'):
-      not_a_numeric_column = 3
-      parsing_utils.regressor_parse_example_spec(
-          feature_columns=[fc.numeric_column('a')],
-          label_key='b',
-          weight_column=not_a_numeric_column)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/canned/prediction_keys.py b/tensorflow/python/estimator/canned/prediction_keys.py
index daa275b46b..2cf332ce19 100644
--- a/tensorflow/python/estimator/canned/prediction_keys.py
+++ b/tensorflow/python/estimator/canned/prediction_keys.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,24 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Enum for model prediction keys."""
+"""prediction_keys python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow_estimator.python.estimator.canned import prediction_keys
 
-class PredictionKeys(object):
-  """Enum for canonical model prediction keys.
-
-  The following values are defined:
-  PREDICTIONS: Used by models that predict values, such as regressor models.
-  """
+# Include attrs that start with single underscore.
+prediction_keys.__all__ = [
+    s for s in dir(prediction_keys) if not s.startswith('__')
+]
 
-  CLASSES = 'classes'
-  CLASS_IDS = 'class_ids'
-  LOGISTIC = 'logistic'
-  LOGITS = 'logits'
-  PREDICTIONS = 'predictions'
-  PROBABILITIES = 'probabilities'
-  TOP_K = 'top_k'
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.canned.prediction_keys import *
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index e6d82f0db7..c43f0513bd 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,2166 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""estimator python module.
 
-"""Base Estimator class."""
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import copy
-import os
-import tempfile
+from tensorflow_estimator.python.estimator import estimator
 
-import numpy as np
-import six
+# Include attrs that start with single underscore.
+estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
 
-from google.protobuf import message
-from tensorflow.core.framework import summary_pb2
-from tensorflow.python.client import session as tf_session
-from tensorflow.python.eager import context
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator import run_config
-from tensorflow.python.estimator import util as estimator_util
-from tensorflow.python.estimator.export import export as export_helpers
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import builder as saved_model_builder
-from tensorflow.python.saved_model import utils_impl as saved_model_utils
-from tensorflow.python.summary import summary
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import device_setter
-from tensorflow.python.training import distribute as distribute_lib
-from tensorflow.python.training import evaluation
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import saver
-from tensorflow.python.training import training
-from tensorflow.python.training import training_util
-from tensorflow.python.training import warm_starting_util
-from tensorflow.python.util import compat
-from tensorflow.python.util import compat_internal
-from tensorflow.python.util import function_utils
-from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import estimator_export
-
-
-_VALID_MODEL_FN_ARGS = set(
-    ['features', 'labels', 'mode', 'params', 'self', 'config'])
-
-
-@estimator_export('estimator.Estimator')
-class Estimator(object):
-  """Estimator class to train and evaluate TensorFlow models.
-
-  The `Estimator` object wraps a model which is specified by a `model_fn`,
-  which, given inputs and a number of other parameters, returns the ops
-  necessary to perform training, evaluation, or predictions.
-
-  All outputs (checkpoints, event files, etc.) are written to `model_dir`, or a
-  subdirectory thereof. If `model_dir` is not set, a temporary directory is
-  used.
-
-  The `config` argument can be passed `tf.estimator.RunConfig` object containing
-  information about the execution environment. It is passed on to the
-  `model_fn`, if the `model_fn` has a parameter named "config" (and input
-  functions in the same manner). If the `config` parameter is not passed, it is
-  instantiated by the `Estimator`. Not passing config means that defaults useful
-  for local execution are used. `Estimator` makes config available to the model
-  (for instance, to allow specialization based on the number of workers
-  available), and also uses some of its fields to control internals, especially
-  regarding checkpointing.
-
-  The `params` argument contains hyperparameters. It is passed to the
-  `model_fn`, if the `model_fn` has a parameter named "params", and to the input
-  functions in the same manner. `Estimator` only passes params along, it does
-  not inspect it. The structure of `params` is therefore entirely up to the
-  developer.
-
-  None of `Estimator`'s methods can be overridden in subclasses (its
-  constructor enforces this). Subclasses should use `model_fn` to configure
-  the base class, and may add methods implementing specialized functionality.
-
-  @compatibility(eager)
-  Calling methods of `Estimator` will work while eager execution is enabled.
-  However, the `model_fn` and `input_fn` is not executed eagerly, `Estimator`
-  will switch to graph model before calling all user-provided functions (incl.
-  hooks), so their code has to be compatible with graph mode execution. Note
-  that `input_fn` code using `tf.data` generally works in both graph and eager
-  modes.
-  @end_compatibility
-  """
-
-  def __init__(self, model_fn, model_dir=None, config=None, params=None,
-               warm_start_from=None):
-    """Constructs an `Estimator` instance.
-
-    See [estimators](https://tensorflow.org/guide/estimators) for more
-    information.
-
-    To warm-start an `Estimator`:
-
-    ```python
-    estimator = tf.estimator.DNNClassifier(
-        feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
-        hidden_units=[1024, 512, 256],
-        warm_start_from="/path/to/checkpoint/dir")
-    ```
-
-    For more details on warm-start configuration, see
-    `tf.estimator.WarmStartSettings`.
-
-    Args:
-      model_fn: Model function. Follows the signature:
-
-        * Args:
-
-          * `features`: This is the first item returned from the `input_fn`
-                 passed to `train`, `evaluate`, and `predict`. This should be a
-                 single `tf.Tensor` or `dict` of same.
-          * `labels`: This is the second item returned from the `input_fn`
-                 passed to `train`, `evaluate`, and `predict`. This should be a
-                 single `tf.Tensor` or `dict` of same (for multi-head models).
-                 If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will
-                 be passed. If the `model_fn`'s signature does not accept
-                 `mode`, the `model_fn` must still be able to handle
-                 `labels=None`.
-          * `mode`: Optional. Specifies if this training, evaluation or
-                 prediction. See `tf.estimator.ModeKeys`.
-          * `params`: Optional `dict` of hyperparameters.  Will receive what
-                 is passed to Estimator in `params` parameter. This allows
-                 to configure Estimators from hyper parameter tuning.
-          * `config`: Optional `estimator.RunConfig` object. Will receive what
-                 is passed to Estimator as its `config` parameter, or a default
-                 value. Allows setting up things in your `model_fn` based on
-                 configuration such as `num_ps_replicas`, or `model_dir`.
-
-        * Returns:
-          `tf.estimator.EstimatorSpec`
-
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into an estimator to
-        continue training a previously saved model. If `PathLike` object, the
-        path will be resolved. If `None`, the model_dir in `config` will be used
-        if set. If both are set, they must be same. If both are `None`, a
-        temporary directory will be used.
-      config: `estimator.RunConfig` configuration object.
-      params: `dict` of hyper parameters that will be passed into `model_fn`.
-              Keys are names of parameters, values are basic python types.
-      warm_start_from: Optional string filepath to a checkpoint or SavedModel to
-                       warm-start from, or a `tf.estimator.WarmStartSettings`
-                       object to fully configure warm-starting.  If the string
-                       filepath is provided instead of a
-                       `tf.estimator.WarmStartSettings`, then all variables are
-                       warm-started, and it is assumed that vocabularies
-                       and `tf.Tensor` names are unchanged.
-
-    Raises:
-      ValueError: parameters of `model_fn` don't match `params`.
-      ValueError: if this is called via a subclass and if that class overrides
-        a member of `Estimator`.
-    """
-    Estimator._assert_members_are_not_overridden(self)
-
-    self._config = maybe_overwrite_model_dir_and_session_config(config,
-                                                                model_dir)
-
-    # The distribute field contains an instance of DistributionStrategy.
-    self._train_distribution = self._config.train_distribute
-    self._eval_distribution = self._config.eval_distribute
-    # Model directory.
-    self._model_dir = self._config.model_dir
-    self._session_config = self._config.session_config
-    logging.info('Using config: %s', str(vars(self._config)))
-
-    self._device_fn = (
-        self._config.device_fn or _get_replica_device_setter(self._config))
-
-    if model_fn is None:
-      raise ValueError('model_fn must be provided to Estimator.')
-    _verify_model_fn_args(model_fn, params)
-    self._model_fn = model_fn
-    self._params = copy.deepcopy(params or {})
-
-    # pylint: disable=protected-access
-    self._warm_start_settings = _get_default_warm_start_settings(
-        warm_start_from)
-    # pylint: enable=protected-access
-
-  @property
-  def model_dir(self):
-    return self._model_dir
-
-  @property
-  def config(self):
-    return copy.deepcopy(self._config)
-
-  @property
-  def params(self):
-    return copy.deepcopy(self._params)
-
-  @property
-  def model_fn(self):
-    """Returns the `model_fn` which is bound to `self.params`.
-
-    Returns:
-      The `model_fn` with following signature:
-        `def model_fn(features, labels, mode, config)`
-    """
-
-    def public_model_fn(features, labels, mode, config):
-      return self._call_model_fn(features, labels, mode, config)
-
-    return public_model_fn
-
-  # TODO(ispir): support a list of names
-  def get_variable_value(self, name):
-    """Returns value of the variable given by name.
-
-    Args:
-      name: string or a list of string, name of the tensor.
-
-    Returns:
-      Numpy array - value of the tensor.
-
-    Raises:
-      ValueError: If the `Estimator` has not produced a checkpoint yet.
-    """
-    _check_checkpoint_available(self.model_dir)
-    with context.graph_mode():
-      return training.load_variable(self.model_dir, name)
-
-  def get_variable_names(self):
-    """Returns list of all variable names in this model.
-
-    Returns:
-      List of names.
-
-    Raises:
-      ValueError: If the `Estimator` has not produced a checkpoint yet.
-    """
-    _check_checkpoint_available(self.model_dir)
-    with context.graph_mode():
-      return [name for name, _ in training.list_variables(self.model_dir)]
-
-  def latest_checkpoint(self):
-    """Finds the filename of the latest saved checkpoint file in `model_dir`.
-
-    Returns:
-      The full path to the latest checkpoint or `None` if no checkpoint was
-      found.
-    """
-    with context.graph_mode():
-      return checkpoint_management.latest_checkpoint(self.model_dir)
-
-  def train(self,
-            input_fn,
-            hooks=None,
-            steps=None,
-            max_steps=None,
-            saving_listeners=None):
-    """Trains a model given training data `input_fn`.
-
-    Args:
-      input_fn: A function that provides input data for training as minibatches.
-        See [Premade Estimators](
-        https://tensorflow.org/guide/premade_estimators#create_input_functions)
-        for more information. The function should construct and return one of
-        the following:  * A
-        `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
-        `(features, labels)` with same constraints as below. * A tuple
-        `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
-        of string feature name to `Tensor` and `labels` is a `Tensor` or a
-        dictionary of string label name to `Tensor`. Both `features` and
-        `labels` are consumed by `model_fn`. They should satisfy the expectation
-        of `model_fn` from inputs.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the training loop.
-      steps: Number of steps for which to train the model. If `None`, train
-        forever or train until `input_fn` generates the `tf.errors.OutOfRange`
-        error or `StopIteration` exception. `steps` works incrementally. If you
-        call two times `train(steps=10)` then training occurs in total 20 steps.
-        If `OutOfRange` or `StopIteration` occurs in the middle, training stops
-        before 20 steps. If you don't want to have incremental behavior please
-        set `max_steps` instead. If set, `max_steps` must be `None`.
-      max_steps: Number of total steps for which to train model. If `None`,
-        train forever or train until `input_fn` generates the
-        `tf.errors.OutOfRange` error or `StopIteration` exception. If set,
-        `steps` must be `None`. If `OutOfRange` or `StopIteration` occurs in the
-        middle, training stops before `max_steps` steps. Two calls to
-        `train(steps=100)` means 200 training iterations. On the other hand, two
-        calls to `train(max_steps=100)` means that the second call will not do
-        any iteration since first call did all 100 steps.
-      saving_listeners: list of `CheckpointSaverListener` objects. Used for
-        callbacks that run immediately before or after checkpoint savings.
-
-    Returns:
-      `self`, for chaining.
-
-    Raises:
-      ValueError: If both `steps` and `max_steps` are not `None`.
-      ValueError: If either `steps` or `max_steps <= 0`.
-    """
-    if self.config.task_type in (run_config.TaskType.EVALUATOR,
-                                 run_config.TaskType.PS):
-      raise ValueError(
-          'Train has been called wrong configuration. Please use '
-          'tf.estimator.train_and_evaluate which calls proper API according '
-          'to given configuration. Current configuration: {}.'.format(
-              self.config))
-
-    with context.graph_mode():
-      if (steps is not None) and (max_steps is not None):
-        raise ValueError('Can not provide both steps and max_steps.')
-      if steps is not None and steps <= 0:
-        raise ValueError('Must specify steps > 0, given: {}'.format(steps))
-      if max_steps is not None and max_steps <= 0:
-        raise ValueError(
-            'Must specify max_steps > 0, given: {}'.format(max_steps))
-
-      if max_steps is not None:
-        start_step = _load_global_step_from_checkpoint_dir(self._model_dir)
-        if max_steps <= start_step:
-          logging.info('Skipping training since max_steps has already saved.')
-          return self
-
-      hooks = _check_hooks_type(hooks)
-      hooks.extend(self._convert_train_steps_to_hooks(steps, max_steps))
-
-      saving_listeners = _check_listeners_type(saving_listeners)
-      loss = self._train_model(input_fn, hooks, saving_listeners)
-      logging.info('Loss for final step: %s.', loss)
-      return self
-
-  def _convert_train_steps_to_hooks(self, steps, max_steps):
-    """Create hooks to run correct number of steps in training.
-
-    Args:
-      steps: number of steps to run during training.
-      max_steps: maximum number of steps to be run during training. It'll be
-        the maximum number of steps the model will train to after restoring
-        from checkpoint even across multiple estimator.train calls.
-
-    Returns:
-      List of hooks to be passed to the estimator.
-    """
-    if steps is not None or max_steps is not None:
-      if self._train_distribution:
-        steps_per_run = getattr(self._train_distribution, 'steps_per_run', 1)
-        if steps_per_run > 1:
-          return [basic_session_run_hooks._MultiStepStopAtStepHook(  # pylint: disable=protected-access
-              steps, max_steps, steps_per_run)]
-      return [training.StopAtStepHook(steps, max_steps)]
-    else:
-      return []
-
-  def eval_dir(self, name=None):
-    """Shows the directory name where evaluation metrics are dumped.
-
-    Args:
-      name: Name of the evaluation if user needs to run multiple evaluations on
-        different data sets, such as on training data vs test data. Metrics for
-        different evaluations are saved in separate folders, and appear
-        separately in tensorboard.
-
-    Returns:
-      A string which is the path of directory contains evaluation metrics.
-    """
-    return os.path.join(self._model_dir, 'eval' if not name else
-                        'eval_' + name)
-
-  def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None,
-               name=None):
-    """Evaluates the model given evaluation data `input_fn`.
-
-    For each step, calls `input_fn`, which returns one batch of data.
-    Evaluates until:
-    - `steps` batches are processed, or
-    - `input_fn` raises an end-of-input exception (`tf.errors.OutOfRangeError`
-    or
-    `StopIteration`).
-
-    Args:
-      input_fn: A function that constructs the input data for evaluation. See
-        [Premade Estimators](
-        https://tensorflow.org/guide/premade#create_input_functions)
-        for more information. The
-        function should construct and return one of the following:  * A
-        `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
-        `(features, labels)` with same constraints as below. * A tuple
-        `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
-        of string feature name to `Tensor` and `labels` is a `Tensor` or a
-        dictionary of string label name to `Tensor`. Both `features` and
-        `labels` are consumed by `model_fn`. They should satisfy the expectation
-        of `model_fn` from inputs.
-      steps: Number of steps for which to evaluate model. If `None`, evaluates
-        until `input_fn` raises an end-of-input exception.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the evaluation call.
-      checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the
-        latest checkpoint in `model_dir` is used.  If there are no checkpoints
-        in `model_dir`, evaluation is run with newly initialized `Variables`
-        instead of ones restored from checkpoint.
-      name: Name of the evaluation if user needs to run multiple evaluations on
-        different data sets, such as on training data vs test data. Metrics for
-        different evaluations are saved in separate folders, and appear
-        separately in tensorboard.
-
-    Returns:
-      A dict containing the evaluation metrics specified in `model_fn` keyed by
-      name, as well as an entry `global_step` which contains the value of the
-      global step for which this evaluation was performed. For canned
-      estimators, the dict contains the `loss` (mean loss per mini-batch) and
-      the `average_loss` (mean loss per sample). Canned classifiers also return
-      the `accuracy`. Canned regressors also return the `label/mean` and the
-      `prediction/mean`.
-
-    Raises:
-      ValueError: If `steps <= 0`.
-      ValueError: If no model has been trained, namely `model_dir`, or the
-        given `checkpoint_path` is empty.
-    """
-    with context.graph_mode():
-      hooks = _check_hooks_type(hooks)
-      hooks.extend(self._convert_eval_steps_to_hooks(steps))
-
-      # Check that model has been trained (if nothing has been set explicitly).
-      if not checkpoint_path:
-        latest_path = checkpoint_management.latest_checkpoint(self._model_dir)
-        if not latest_path:
-          logging.info('Could not find trained model in model_dir: {}, running '
-                       'initialization to evaluate.'.format(self._model_dir))
-        checkpoint_path = latest_path
-
-      def _evaluate():
-        (scaffold, update_op, eval_dict, all_hooks) = (
-            self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
-        return self._evaluate_run(
-            checkpoint_path=checkpoint_path,
-            scaffold=scaffold,
-            update_op=update_op,
-            eval_dict=eval_dict,
-            all_hooks=all_hooks,
-            output_dir=self.eval_dir(name))
-
-      with ops.Graph().as_default():
-        if self._eval_distribution:
-          # We want to create the iterations variable outside the distribution
-          # scope as that is just stored on the host and mainly used to drive
-          # the loop and doesn't need to be a Mirrored/Device variable.
-          training.get_or_create_steps_per_run_variable()
-          with self._eval_distribution.scope():
-            return _evaluate()
-        else:
-          return _evaluate()
-
-  def _convert_eval_steps_to_hooks(self, steps):
-    """Create hooks to run correct number of steps in evaluation.
-
-    Args:
-      steps: number of steps to run during evaluation.
-
-    Raises:
-      ValueError: if steps is less than or equal to zero.
-
-    Returns:
-      List of hooks to be passed to the estimator.
-    """
-    if steps is None:
-      return []
-
-    if steps <= 0:
-      raise ValueError('Must specify steps > 0, given: {}'.format(steps))
-
-    # The hooks are declared as private in evaluation.py discourage the use
-    # by other libraries or open source users. This should be the only usage
-    # of the estimator evaluation hooks.
-    if self._eval_distribution:
-      steps_per_run = getattr(self._eval_distribution, 'steps_per_run', 1)
-      if steps_per_run > 1:
-        return [evaluation._MultiStepStopAfterNEvalsHook(  # pylint: disable=protected-access
-            num_evals=steps, steps_per_run=steps_per_run)]
-    return [evaluation._StopAfterNEvalsHook(num_evals=steps)]  # pylint: disable=protected-access
-
-  def predict(self,
-              input_fn,
-              predict_keys=None,
-              hooks=None,
-              checkpoint_path=None,
-              yield_single_examples=True):
-    """Yields predictions for given features.
-
-    Please note that interleaving two predict outputs does not work. See:
-    [issue/20506](
-    https://github.com/tensorflow/tensorflow/issues/20506#issuecomment-422208517)
-
-    Args:
-      input_fn: A function that constructs the features. Prediction continues
-        until `input_fn` raises an end-of-input exception
-        (`tf.errors.OutOfRangeError` or `StopIteration`).
-        See [Premade Estimators](
-        https://tensorflow.org/guide/premade_estimators#create_input_functions)
-        for more information. The function should construct and return one of
-        the following:
-
-          * A `tf.data.Dataset` object: Outputs of `Dataset` object must have
-            same constraints as below.
-          * features: A `tf.Tensor` or a dictionary of string feature name to
-            `Tensor`. features are consumed by `model_fn`. They should satisfy
-            the expectation of `model_fn` from inputs.
-          * A tuple, in which case the first item is extracted as features.
-
-      predict_keys: list of `str`, name of the keys to predict. It is used if
-        the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If
-        `predict_keys` is used then rest of the predictions will be filtered
-        from the dictionary. If `None`, returns all.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the prediction call.
-      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
-        latest checkpoint in `model_dir` is used.  If there are no checkpoints
-        in `model_dir`, prediction is run with newly initialized `Variables`
-        instead of ones restored from checkpoint.
-      yield_single_examples: If `False`, yields the whole batch as returned by
-        the `model_fn` instead of decomposing the batch into individual
-        elements. This is useful if `model_fn` returns some tensors whose first
-        dimension is not equal to the batch size.
-
-    Yields:
-      Evaluated values of `predictions` tensors.
-
-    Raises:
-      ValueError: Could not find a trained model in `model_dir`.
-      ValueError: If batch length of predictions is not the same and
-        `yield_single_examples` is `True`.
-      ValueError: If there is a conflict between `predict_keys` and
-        `predictions`. For example if `predict_keys` is not `None` but
-        `tf.estimator.EstimatorSpec.predictions` is not a `dict`.
-    """
-    with context.graph_mode():
-      hooks = _check_hooks_type(hooks)
-      # Check that model has been trained.
-      if not checkpoint_path:
-        checkpoint_path = checkpoint_management.latest_checkpoint(
-            self._model_dir)
-      if not checkpoint_path:
-        logging.info('Could not find trained model in model_dir: {}, running '
-                     'initialization to predict.'.format(self._model_dir))
-      with ops.Graph().as_default() as g:
-        random_seed.set_random_seed(self._config.tf_random_seed)
-        self._create_and_assert_global_step(g)
-        features, input_hooks = self._get_features_from_input_fn(
-            input_fn, model_fn_lib.ModeKeys.PREDICT)
-        estimator_spec = self._call_model_fn(
-            features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
-
-        # Call to warm_start has to be after model_fn is called.
-        self._maybe_warm_start(checkpoint_path)
-
-        predictions = self._extract_keys(
-            estimator_spec.predictions, predict_keys)
-        all_hooks = list(input_hooks)
-        all_hooks.extend(hooks)
-        all_hooks.extend(list(estimator_spec.prediction_hooks or []))
-        with training.MonitoredSession(
-            session_creator=training.ChiefSessionCreator(
-                checkpoint_filename_with_path=checkpoint_path,
-                master=self._config.master,
-                scaffold=estimator_spec.scaffold,
-                config=self._session_config),
-            hooks=all_hooks) as mon_sess:
-          while not mon_sess.should_stop():
-            preds_evaluated = mon_sess.run(predictions)
-            if not yield_single_examples:
-              yield preds_evaluated
-            elif not isinstance(predictions, dict):
-              for pred in preds_evaluated:
-                yield pred
-            else:
-              for i in range(self._extract_batch_length(preds_evaluated)):
-                yield {
-                    key: value[i]
-                    for key, value in six.iteritems(preds_evaluated)
-                }
-
-  def _assert_members_are_not_overridden(self):
-    """Asserts members of `Estimator` are not overridden."""
-    # TPUEstimator is special cased (owned by TF).
-    if self.__class__.__name__ == 'TPUEstimator':
-      return
-
-    allowed_overrides = set([
-        '_create_and_assert_global_step',
-        '_tf_api_names', '_tf_api_names_v1', '_estimator_api_names',
-        '_estimator_api_names_v1', '_estimator_api_constants',
-        '_estimator_api_constants_v1',
-    ])
-    estimator_members = set([m for m in Estimator.__dict__.keys()
-                             if not m.startswith('__')])
-    subclass_members = set(self.__class__.__dict__.keys())
-    common_members = estimator_members & subclass_members - allowed_overrides
-    overridden_members = [
-        m for m in common_members
-        if Estimator.__dict__[m] != self.__class__.__dict__[m]]
-    if overridden_members:
-      raise ValueError(
-          'Subclasses of Estimator cannot override members of Estimator. '
-          '{} does override {}'.format(self.__class__, overridden_members))
-
-  def export_savedmodel(
-      self, export_dir_base, serving_input_receiver_fn,
-      assets_extra=None,
-      as_text=False,
-      checkpoint_path=None,
-      strip_default_attrs=False):
-    # pylint: disable=line-too-long,g-doc-args,g-doc-return-or-yield
-    """Exports inference graph as a `SavedModel` into the given dir.
-
-    Note that `export_to_savedmodel` will be renamed to `export_saved_model`
-    in TensorFlow 2.0. At that time, `export_to_savedmodel` without the
-    additional underscore will be available only through tf.compat.v1.
-
-    Please see `tf.estimator.Estimator.export_saved_model` for more information.
-
-    There is one additional arg versus the new method:
-      strip_default_attrs: This parameter is going away in TF 2.0, and
-        the new behavior will automatically strip all default attributes.
-        Boolean. If `True`, default-valued attributes will be
-        removed from the `NodeDef`s. For a detailed guide, see [Stripping
-        Default-Valued Attributes](
-        https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-    """
-    # pylint: enable=line-too-long,g-doc-args,g-doc-return-or-yield
-    return self._export_saved_model_for_mode(
-        export_dir_base,
-        serving_input_receiver_fn,
-        assets_extra=assets_extra,
-        as_text=as_text,
-        checkpoint_path=checkpoint_path,
-        strip_default_attrs=strip_default_attrs,
-        mode=model_fn_lib.ModeKeys.PREDICT)
-
-  def export_saved_model(
-      self, export_dir_base, serving_input_receiver_fn,
-      assets_extra=None,
-      as_text=False,
-      checkpoint_path=None):
-    # pylint: disable=line-too-long
-    """Exports inference graph as a `SavedModel` into the given dir.
-
-    For a detailed guide, see
-    [Using SavedModel with Estimators](https://tensorflow.org/guide/saved_model#using_savedmodel_with_estimators).
-
-    This method builds a new graph by first calling the
-    `serving_input_receiver_fn` to obtain feature `Tensor`s, and then calling
-    this `Estimator`'s `model_fn` to generate the model graph based on those
-    features. It restores the given checkpoint (or, lacking that, the most
-    recent checkpoint) into this graph in a fresh session.  Finally it creates
-    a timestamped export directory below the given `export_dir_base`, and writes
-    a `SavedModel` into it containing a single `tf.MetaGraphDef` saved from this
-    session.
-
-    The exported `MetaGraphDef` will provide one `SignatureDef` for each
-    element of the `export_outputs` dict returned from the `model_fn`, named
-    using
-    the same keys.  One of these keys is always
-    `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`,
-    indicating which
-    signature will be served when a serving request does not specify one.
-    For each signature, the outputs are provided by the corresponding
-    `tf.estimator.export.ExportOutput`s, and the inputs are always the input
-    receivers provided by
-    the `serving_input_receiver_fn`.
-
-    Extra assets may be written into the `SavedModel` via the `assets_extra`
-    argument.  This should be a dict, where each key gives a destination path
-    (including the filename) relative to the assets.extra directory.  The
-    corresponding value gives the full path of the source file to be copied.
-    For example, the simple case of copying a single file without renaming it
-    is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-
-    Args:
-      export_dir_base: A string containing a directory in which to create
-        timestamped subdirectories containing exported `SavedModel`s.
-      serving_input_receiver_fn: A function that takes no argument and returns a
-        `tf.estimator.export.ServingInputReceiver` or
-        `tf.estimator.export.TensorServingInputReceiver`.
-      assets_extra: A dict specifying how to populate the assets.extra directory
-        within the exported `SavedModel`, or `None` if no extra assets are
-        needed.
-      as_text: whether to write the `SavedModel` proto in text format.
-      checkpoint_path: The checkpoint path to export.  If `None` (the default),
-        the most recent checkpoint found within the model directory is chosen.
-
-    Returns:
-      The string path to the exported directory.
-
-    Raises:
-      ValueError: if no `serving_input_receiver_fn` is provided, no
-      `export_outputs` are provided, or no checkpoint can be found.
-    """
-    # pylint: enable=line-too-long
-    # TODO(b/111442174): `export_to_savedmodel` will be renamed to
-    # `export_saved_model` in TensorFlow 2.0. This function is a wrapper
-    # while staging the new version; do not add any logic here.
-    return self.export_savedmodel(
-        export_dir_base,
-        serving_input_receiver_fn,
-        assets_extra=assets_extra,
-        as_text=as_text,
-        checkpoint_path=checkpoint_path,
-        strip_default_attrs=True)
-
-  def _export_saved_model_for_mode(
-      self, export_dir_base, input_receiver_fn,
-      assets_extra=None,
-      as_text=False,
-      checkpoint_path=None,
-      strip_default_attrs=False,
-      mode=model_fn_lib.ModeKeys.PREDICT):
-    # pylint: disable=line-too-long
-    """Exports a single train/eval/predict graph as a `SavedModel`.
-
-    This method is a wrapper for `_export_all_saved_models`, and wraps a raw
-    `input_receiver_fn` in a dictionary to pass in to that function.
-    See `_export_all_saved_models` for full docs.
-
-    See `tf.contrib.estimator.export_saved_model_for_mode` for the currently
-    exposed version of this function.
-
-    Args:
-      export_dir_base: A string containing a directory in which to create
-        timestamped subdirectories containing exported `SavedModel`s.
-      input_receiver_fn: a function that takes no argument and returns the
-        appropriate subclass of `InputReceiver`.
-      assets_extra: A dict specifying how to populate the assets.extra directory
-        within the exported `SavedModel`, or `None` if no extra assets are
-        needed.
-      as_text: whether to write the `SavedModel` proto in text format.
-      checkpoint_path: The checkpoint path to export.  If `None` (the default),
-        the most recent checkpoint found within the model directory is chosen.
-      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-        removed from the `NodeDef`s. For a detailed guide, see [Stripping
-        Default-Valued
-        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-      mode: `tf.estimator.ModeKeys` value indicating with mode will be exported.
-
-    Returns:
-      The string path to the exported directory.
-
-    Raises:
-      ValueError: if `input_receiver_fn` is `None`, no `export_outputs`
-        are provided, or no checkpoint can be found.
-    """
-    # pylint: enable=line-too-long
-    if not input_receiver_fn:
-      raise ValueError('An input_receiver_fn must be defined.')
-
-    input_receiver_fn_map = {mode: input_receiver_fn}
-
-    return self._export_all_saved_models(
-        export_dir_base,
-        input_receiver_fn_map,
-        assets_extra=assets_extra,
-        as_text=as_text,
-        checkpoint_path=checkpoint_path,
-        strip_default_attrs=strip_default_attrs)
-
-  def _export_all_saved_models(
-      self, export_dir_base, input_receiver_fn_map,
-      assets_extra=None,
-      as_text=False,
-      checkpoint_path=None,
-      strip_default_attrs=False):
-    # pylint: disable=line-too-long
-    """Exports a `SavedModel` containing `tf.MetaGraphDefs` for each requested mode.
-
-    See `tf.contrib.estimator.export_all_saved_models` for the currently
-    exposed version of this function.
-
-    For each mode passed in via the `input_receiver_fn_map`,
-    this method builds a new graph by calling the `input_receiver_fn` to obtain
-    feature and label `Tensor`s. Next, this method calls the `Estimator`'s
-    `model_fn` in the passed mode to generate the model graph based on
-    those features and labels, and restores the given checkpoint
-    (or, lacking that, the most recent checkpoint) into the graph.
-    Only one of the modes is used for saving variables to the `SavedModel`
-    (order of preference: `tf.estimator.ModeKeys.TRAIN`,
-    `tf.estimator.ModeKeys.EVAL`, then
-    `tf.estimator.ModeKeys.PREDICT`), such that up to three
-    `tf.MetaGraphDefs` are saved with a single set of variables in a single
-    `SavedModel` directory.
-
-    For the variables and `tf.MetaGraphDefs`, a timestamped export directory
-    below
-    `export_dir_base`, and writes a `SavedModel` into it containing
-    the `tf.MetaGraphDef` for the given mode and its associated signatures.
-
-    For prediction, the exported `MetaGraphDef` will provide one `SignatureDef`
-    for each element of the `export_outputs` dict returned from the `model_fn`,
-    named using the same keys.  One of these keys is always
-    `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`,
-    indicating which
-    signature will be served when a serving request does not specify one.
-    For each signature, the outputs are provided by the corresponding
-    `tf.estimator.export.ExportOutput`s, and the inputs are always the input
-    receivers provided by
-    the `serving_input_receiver_fn`.
-
-    For training and evaluation, the `train_op` is stored in an extra
-    collection,
-    and loss, metrics, and predictions are included in a `SignatureDef` for the
-    mode in question.
-
-    Extra assets may be written into the `SavedModel` via the `assets_extra`
-    argument.  This should be a dict, where each key gives a destination path
-    (including the filename) relative to the assets.extra directory.  The
-    corresponding value gives the full path of the source file to be copied.
-    For example, the simple case of copying a single file without renaming it
-    is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-
-    Args:
-      export_dir_base: A string containing a directory in which to create
-        timestamped subdirectories containing exported `SavedModel`s.
-      input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to
-        `input_receiver_fn` mappings, where the `input_receiver_fn` is a
-        function that takes no arguments and returns the appropriate subclass of
-        `InputReceiver`.
-      assets_extra: A dict specifying how to populate the assets.extra directory
-        within the exported `SavedModel`, or `None` if no extra assets are
-        needed.
-      as_text: whether to write the `SavedModel` proto in text format.
-      checkpoint_path: The checkpoint path to export.  If `None` (the default),
-        the most recent checkpoint found within the model directory is chosen.
-      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-        removed from the `NodeDef`s. For a detailed guide, see [Stripping
-        Default-Valued
-        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-
-    Returns:
-      A dict of `tf.estimator.ModeKeys` value to string path for each exported
-      directory.
-
-    Raises:
-      ValueError: if any `input_receiver_fn` is `None`, no `export_outputs`
-        are provided, or no checkpoint can be found.
-    """
-    # pylint: enable=line-too-long
-    # TODO(b/65561022): Consider allowing multiple input_receiver_fns per mode.
-    with context.graph_mode():
-      if not checkpoint_path:
-        # Locate the latest checkpoint
-        checkpoint_path = checkpoint_management.latest_checkpoint(
-            self._model_dir)
-      if not checkpoint_path:
-        raise ValueError("Couldn't find trained model at %s." % self._model_dir)
-
-      export_dir = export_helpers.get_timestamped_export_dir(export_dir_base)
-      temp_export_dir = export_helpers.get_temp_export_dir(export_dir)
-
-      builder = saved_model_builder.SavedModelBuilder(temp_export_dir)
-
-      save_variables = True
-      # Note that the order in which we run here matters, as the first
-      # mode we pass through will be used to save the variables. We run TRAIN
-      # first, as that is also the mode used for checkpoints, and therefore
-      # we are not likely to have vars in PREDICT that are not in the checkpoint
-      # created by TRAIN.
-      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.TRAIN):
-        self._add_meta_graph_for_mode(
-            builder, input_receiver_fn_map, checkpoint_path,
-            strip_default_attrs, save_variables,
-            mode=model_fn_lib.ModeKeys.TRAIN)
-        save_variables = False
-      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.EVAL):
-        self._add_meta_graph_for_mode(
-            builder, input_receiver_fn_map, checkpoint_path,
-            strip_default_attrs, save_variables,
-            mode=model_fn_lib.ModeKeys.EVAL)
-        save_variables = False
-      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.PREDICT):
-        self._add_meta_graph_for_mode(
-            builder, input_receiver_fn_map, checkpoint_path,
-            strip_default_attrs, save_variables,
-            mode=model_fn_lib.ModeKeys.PREDICT)
-        save_variables = False
-
-      if save_variables:
-        raise ValueError('No valid modes for exporting found. Got {}.'.format(
-            input_receiver_fn_map.keys()))
-
-      builder.save(as_text)
-
-      # Add the extra assets
-      if assets_extra:
-        assets_extra_path = os.path.join(compat.as_bytes(temp_export_dir),
-                                         compat.as_bytes('assets.extra'))
-        for dest_relative, source in assets_extra.items():
-          dest_absolute = os.path.join(compat.as_bytes(assets_extra_path),
-                                       compat.as_bytes(dest_relative))
-          dest_path = os.path.dirname(dest_absolute)
-          gfile.MakeDirs(dest_path)
-          gfile.Copy(source, dest_absolute)
-
-      gfile.Rename(temp_export_dir, export_dir)
-      return export_dir
-
-  def _add_meta_graph_for_mode(self,
-                               builder,
-                               input_receiver_fn_map,
-                               checkpoint_path,
-                               strip_default_attrs,
-                               save_variables=True,
-                               mode=model_fn_lib.ModeKeys.PREDICT,
-                               export_tags=None,
-                               check_variables=True):
-    # pylint: disable=line-too-long
-    """Loads variables and adds them along with a `tf.MetaGraphDef` for saving.
-
-    Args:
-      builder: instance of `tf.saved_modle.builder.SavedModelBuilder` that will
-        be used for saving.
-      input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to
-        `input_receiver_fn` mappings, where the `input_receiver_fn` is a
-        function that takes no argument and returns the appropriate subclass of
-        `InputReceiver`.
-      checkpoint_path: The checkpoint path to export.  If `None` (the default),
-        the most recent checkpoint found within the model directory is chosen.
-      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
-        removed from the `NodeDef`s. For a detailed guide, see [Stripping
-        Default-Valued
-        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
-      save_variables: bool, whether variables should be saved. If `False`, just
-        the `tf.MetaGraphDef` will be saved. Note that `save_variables` should
-        only be `True` for the first call to this function, and the
-        `SavedModelBuilder` will raise an error if that is not the case.
-      mode: `tf.estimator.ModeKeys` value indicating which mode will be
-        exported.
-      export_tags: The set of tags with which to save `tf.MetaGraphDef`. If
-        `None`, a default set will be selected to matched the passed mode.
-      check_variables: bool, whether to check the checkpoint has all variables.
-
-    Raises:
-      ValueError: if `save_variables` is `True` and `check_variable` is `False`.
-    """
-    # pylint: enable=line-too-long
-    if export_tags is None:
-      export_tags = model_fn_lib.EXPORT_TAG_MAP[mode]
-    input_receiver_fn = input_receiver_fn_map[mode]
-
-    with ops.Graph().as_default() as g:
-      self._create_and_assert_global_step(g)
-      random_seed.set_random_seed(self._config.tf_random_seed)
-
-      input_receiver = input_receiver_fn()
-
-      # Call the model_fn and collect the export_outputs.
-      estimator_spec = self._call_model_fn(
-          features=input_receiver.features,
-          labels=getattr(input_receiver, 'labels', None),
-          mode=mode,
-          config=self.config)
-
-      export_outputs = model_fn_lib.export_outputs_for_mode(
-          mode=estimator_spec.mode,
-          serving_export_outputs=estimator_spec.export_outputs,
-          predictions=estimator_spec.predictions,
-          loss=estimator_spec.loss,
-          metrics=estimator_spec.eval_metric_ops)
-
-      # Build the SignatureDefs from receivers and all outputs
-      signature_def_map = export_helpers.build_all_signature_defs(
-          input_receiver.receiver_tensors,
-          export_outputs,
-          getattr(input_receiver, 'receiver_tensors_alternatives', None),
-          serving_only=(mode == model_fn_lib.ModeKeys.PREDICT))
-
-      with tf_session.Session(config=self._session_config) as session:
-
-        if estimator_spec.scaffold.local_init_op is not None:
-          local_init_op = estimator_spec.scaffold.local_init_op
-        else:
-          local_init_op = monitored_session.Scaffold.default_local_init_op()
-
-        # This saver will be used both for restoring variables now,
-        # and in saving out the metagraph below. This ensures that any
-        # Custom Savers stored with the Scaffold are passed through to the
-        # SavedModel for restore later.
-        graph_saver = estimator_spec.scaffold.saver or saver.Saver(sharded=True)
-
-        if save_variables and not check_variables:
-          raise ValueError('If `save_variables` is `True, `check_variables`'
-                           'must not be `False`.')
-        if check_variables:
-          try:
-            graph_saver.restore(session, checkpoint_path)
-          except errors.NotFoundError as e:
-            msg = ('Could not load all requested variables from checkpoint. '
-                   'Please make sure your model_fn does not expect variables '
-                   'that were not saved in the checkpoint.\n\n'
-                   'Encountered error with mode `{}` while restoring '
-                   'checkpoint from: `{}`. Full Traceback:\n\n{}').format(
-                       mode, checkpoint_path, e)
-            raise ValueError(msg)
-
-        # We add the train op explicitly for now, so that we don't have to
-        # change the Builder public interface. Note that this is a no-op
-        # for prediction, where train_op is None.
-        builder._add_train_op(estimator_spec.train_op)  # pylint: disable=protected-access
-
-        meta_graph_kwargs = dict(
-            tags=export_tags,
-            signature_def_map=signature_def_map,
-            assets_collection=ops.get_collection(
-                ops.GraphKeys.ASSET_FILEPATHS),
-            strip_default_attrs=strip_default_attrs,
-            legacy_init_op=local_init_op,
-            saver=graph_saver)
-
-        if save_variables:
-          builder.add_meta_graph_and_variables(
-              session, **meta_graph_kwargs)
-        else:
-          builder.add_meta_graph(**meta_graph_kwargs)
-
-  def _get_features_from_input_fn(self, input_fn, mode):
-    """Extracts the `features` from return values of `input_fn`."""
-    result = self._call_input_fn(input_fn, mode)
-    result, _, hooks = estimator_util.parse_input_fn_result(result)
-    self._validate_features_in_predict_input(result)
-    return result, hooks
-
-  def _validate_features_in_predict_input(self, result):
-    if not _has_dataset_or_queue_runner(result):
-      logging.warning('Input graph does not use tf.data.Dataset or contain a '
-                      'QueueRunner. That means predict yields forever. '
-                      'This is probably a mistake.')
-
-  def _get_iterator_from_input_fn(self, input_fn, mode, distribution=None):
-    if distribution is not None:
-      result = distribution.distribute_dataset(
-          lambda: self._call_input_fn(input_fn, mode))
-    else:
-      result = self._call_input_fn(input_fn, mode)
-
-    iterator = result.make_initializable_iterator()
-    input_hooks = [estimator_util._DatasetInitializerHook(iterator)]  # pylint: disable=protected-access
-    return iterator, input_hooks
-
-  def _get_features_and_labels_from_input_fn(self, input_fn, mode):
-    """Extracts the `features` and labels from return values of `input_fn`."""
-    return estimator_util.parse_input_fn_result(
-        self._call_input_fn(input_fn, mode))
-
-  def _extract_batch_length(self, preds_evaluated):
-    """Extracts batch length of predictions."""
-    batch_length = None
-    for key, value in six.iteritems(preds_evaluated):
-      batch_length = batch_length or value.shape[0]
-      if value.shape[0] != batch_length:
-        raise ValueError('Batch length of predictions should be same. %s has '
-                         'different batch length than others.' % key)
-    return batch_length
-
-  def _extract_keys(self, predictions, predict_keys):
-    """Extracts `predict_keys` from `predictions`."""
-    if not predict_keys:
-      return predictions
-    if not isinstance(predictions, dict):
-      raise ValueError(
-          'predict_keys argument is not valid in case of non-dict predictions.')
-    existing_keys = predictions.keys()
-    predictions = {
-        key: value
-        for key, value in six.iteritems(predictions) if key in predict_keys
-    }
-    if not predictions:
-      raise ValueError('Expected to run at least one output from %s, '
-                       'provided %s.' % (existing_keys, predict_keys))
-    return predictions
-
-  def _create_global_step(self, graph):
-    """Creates the global step tensor in graph.
-
-    The global step tensor must be an integer type with name 'global_step' and
-    be added to the collection `tf.GraphKeys.GLOBAL_STEP`.
-
-    Args:
-      graph: The graph in which to create the global step tensor.
-
-    Returns:
-      The global step `tf.Tensor`.
-    """
-    return training.create_global_step(graph)
-
-  def _create_and_assert_global_step(self, graph):
-    """Creates and asserts properties of the global step.
-
-    Args:
-      graph: The graph in which to create the global step tensor.
-
-    Returns:
-      The global step `tf.Tensor`.
-    """
-    step = self._create_global_step(graph)
-    assert step == training.get_global_step()
-    assert step.dtype.is_integer
-    return step
-
-  def _call_input_fn(self, input_fn, mode):
-    """Calls the input function.
-
-    Args:
-      input_fn: The input function.
-      mode: `tf.estimator.ModeKeys`
-
-    Returns:
-      The return value of the passed `input_fn`, which should be one of:
-
-        * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-            tuple `(features, labels)` with same constraints as below.
-        * A tuple `(features, labels)`: Where `features` is a `Tensor` or a
-          dictionary of string feature name to `Tensor` and `labels` is a
-          `Tensor` or a dictionary of string label name to `Tensor`. Both
-          `features` and `labels` are consumed by `model_fn`. They should
-          satisfy the expectation of `model_fn` from inputs.
-
-    Raises:
-      ValueError: if `input_fn` takes invalid arguments.
-    """
-    input_fn_args = function_utils.fn_args(input_fn)
-    kwargs = {}
-    if 'mode' in input_fn_args:
-      kwargs['mode'] = mode
-    if 'params' in input_fn_args:
-      kwargs['params'] = self.params
-    if 'config' in input_fn_args:
-      kwargs['config'] = self.config
-    with ops.device('/cpu:0'):
-      return input_fn(**kwargs)
-
-  def _call_model_fn(self, features, labels, mode, config):
-    """Calls model function.
-
-    Args:
-      features: features dict.
-      labels: labels dict.
-      mode: `tf.estimator.ModeKeys`
-      config: `tf.estimator.RunConfig`
-
-    Returns:
-      An `tf.estimator.EstimatorSpec` object.
-
-    Raises:
-      ValueError: if `model_fn` returns invalid objects.
-    """
-    model_fn_args = function_utils.fn_args(self._model_fn)
-    kwargs = {}
-    if 'labels' in model_fn_args:
-      kwargs['labels'] = labels
-    else:
-      if labels is not None:
-        raise ValueError(
-            'model_fn does not take labels, but input_fn returns labels.')
-    if 'mode' in model_fn_args:
-      kwargs['mode'] = mode
-    if 'params' in model_fn_args:
-      kwargs['params'] = self.params
-    if 'config' in model_fn_args:
-      kwargs['config'] = config
-
-    logging.info('Calling model_fn.')
-    model_fn_results = self._model_fn(features=features, **kwargs)
-    logging.info('Done calling model_fn.')
-
-    if not isinstance(model_fn_results, model_fn_lib.EstimatorSpec):
-      raise ValueError('model_fn should return an EstimatorSpec.')
-
-    return model_fn_results
-
-  def _train_model(self, input_fn, hooks, saving_listeners):
-    if self._train_distribution:
-      return self._train_model_distributed(input_fn, hooks, saving_listeners)
-    else:
-      return self._train_model_default(input_fn, hooks, saving_listeners)
-
-  def _train_model_default(self, input_fn, hooks, saving_listeners):
-    """Initiate training with `input_fn`, without `DistributionStrategies`.
-
-    Args:
-      input_fn: A function that provides input data for training as minibatches.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the training loop.
-      saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used
-        for callbacks that run immediately before or after checkpoint savings.
-
-    Returns:
-      Loss from training
-    """
-    worker_hooks = []
-    with ops.Graph().as_default() as g, g.device(self._device_fn):
-      random_seed.set_random_seed(self._config.tf_random_seed)
-      global_step_tensor = self._create_and_assert_global_step(g)
-
-      # Skip creating a read variable if _create_and_assert_global_step
-      # returns None (e.g. tf.contrib.estimator.SavedModelEstimator).
-      if global_step_tensor is not None:
-        training_util._get_or_create_global_step_read(g)  # pylint: disable=protected-access
-
-      features, labels, input_hooks = (
-          self._get_features_and_labels_from_input_fn(
-              input_fn, model_fn_lib.ModeKeys.TRAIN))
-      worker_hooks.extend(input_hooks)
-      estimator_spec = self._call_model_fn(
-          features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
-      global_step_tensor = training_util.get_global_step(g)
-      return self._train_with_estimator_spec(estimator_spec, worker_hooks,
-                                             hooks, global_step_tensor,
-                                             saving_listeners)
-
-  def _train_model_distributed(self, input_fn, hooks, saving_listeners):
-    """Initiate training with `input_fn`, using `DistributionStrategies`.
-
-    Args:
-      input_fn: A function that provides input data for training as minibatches.
-      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
-        callbacks inside the training loop.
-      saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used
-        for callbacks that run immediately before or after checkpoint savings.
-
-    Returns:
-      Loss from training
-    """
-    self._train_distribution.configure(self._session_config)
-
-    # TODO(sourabhbajaj): Remove this hack once we migrate the other strategies
-    # to use the new API
-    is_tpu_strategy = (
-        self._train_distribution.__class__.__name__ == 'TPUStrategy')
-
-    worker_hooks = []
-    with ops.Graph().as_default() as g:
-      # We want to create the iterations variable outside the distribution scope
-      # as that is just stored on the host and mainly used to drive the loop
-      # and doesn't need to be a Mirrored/Device variable.
-      if is_tpu_strategy:
-        steps_per_run_variable = training.get_or_create_steps_per_run_variable()
-      with self._train_distribution.scope():
-        random_seed.set_random_seed(self._config.tf_random_seed)
-        iterator, input_hooks = self._get_iterator_from_input_fn(
-            input_fn, model_fn_lib.ModeKeys.TRAIN, self._train_distribution)
-        worker_hooks.extend(input_hooks)
-        global_step_tensor = self._create_and_assert_global_step(g)
-        # we want to add to the global collection in the main thread not the
-        # tower threads.
-        ops.add_to_collection(
-            training_util.GLOBAL_STEP_READ_KEY,
-            self._train_distribution.read_var(global_step_tensor))
-
-        if is_tpu_strategy:
-          # Create a step_fn from the train_op of grouped_estimator_spec
-          def step_fn(ctx, features, labels=None):
-            """A single step that is passed to run_on_dataset."""
-            estimator_spec = self._train_distribution.call_for_each_tower(
-                self._call_model_fn,
-                features,
-                labels,
-                model_fn_lib.ModeKeys.TRAIN,
-                self.config)
-            ctx.set_last_step_output(
-                name='loss',
-                output=estimator_spec.loss,
-                aggregation=distribute_lib.get_loss_reduction())
-            ctx.set_non_tensor_output(
-                name='estimator_spec', output=estimator_spec)
-            return estimator_spec.train_op
-
-          # Create new train_op post graph rewrites
-          initial_training_loss = constant_op.constant(1e7)
-          ctx = self._train_distribution.run_steps_on_dataset(
-              step_fn, iterator, iterations=steps_per_run_variable,
-              initial_loop_values={'loss': initial_training_loss})
-          distributed_train_op = ctx.run_op
-          loss = ctx.last_step_outputs['loss']
-          grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec']
-        else:
-          features, labels = estimator_util.parse_iterator_result(
-              iterator.get_next())
-          grouped_estimator_spec = self._train_distribution.call_for_each_tower(
-              self._call_model_fn,
-              features,
-              labels,  # although this will be None it seems
-              model_fn_lib.ModeKeys.TRAIN,
-              self.config)
-          loss = self._train_distribution.unwrap(
-              self._train_distribution.reduce(
-                  distribute_lib.get_loss_reduction(),
-                  grouped_estimator_spec.loss,
-                  destinations='/device:CPU:0'))[0]
-          distributed_train_op = grouped_estimator_spec.train_op
-
-        scaffold = _combine_distributed_scaffold(
-            grouped_estimator_spec.scaffold, self._train_distribution)
-
-        # TODO(yuefengz): add a test for unwrapping per_device_hooks.
-        def get_hooks_from_the_first_device(per_device_hooks):
-          return [
-              self._distribution.unwrap(per_device_hook)[0]
-              for per_device_hook in per_device_hooks
-          ]
-
-        training_hooks = get_hooks_from_the_first_device(
-            grouped_estimator_spec.training_hooks)
-        training_chief_hooks = get_hooks_from_the_first_device(
-            grouped_estimator_spec.training_chief_hooks)
-        worker_hooks.append(
-            estimator_util.StrategyInitFinalizeHook(
-                self._train_distribution.initialize,
-                self._train_distribution.finalize))
-
-        estimator_spec = model_fn_lib.EstimatorSpec(
-            mode=grouped_estimator_spec.mode,
-            loss=loss,
-            train_op=self._train_distribution.group(distributed_train_op),
-            training_hooks=training_hooks,
-            training_chief_hooks=training_chief_hooks,
-            scaffold=scaffold)
-        return self._train_with_estimator_spec(estimator_spec, worker_hooks,
-                                               hooks, global_step_tensor,
-                                               saving_listeners)
-
-  def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks,
-                                 global_step_tensor, saving_listeners):
-    """Train a model with the given Estimator Spec."""
-    if self._warm_start_settings:
-      logging.info('Warm-starting with WarmStartSettings: %s' %
-                   (self._warm_start_settings,))
-      warm_starting_util.warm_start(*self._warm_start_settings)
-    # Check if the user created a loss summary, and add one if they didn't.
-    # We assume here that the summary is called 'loss'. If it is not, we will
-    # make another one with the name 'loss' to ensure it shows up in the right
-    # graph in TensorBoard.
-    if not any([x.op.name == 'loss'
-                for x in ops.get_collection(ops.GraphKeys.SUMMARIES)]):
-      summary.scalar('loss', estimator_spec.loss)
-    ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss)
-    worker_hooks.extend(hooks)
-    worker_hooks.append(
-        training.NanTensorHook(estimator_spec.loss)
-    )
-    if self._config.log_step_count_steps is not None:
-      worker_hooks.append(
-          training.LoggingTensorHook(
-              {
-                  'loss': estimator_spec.loss,
-                  'step': global_step_tensor
-              },
-              every_n_iter=self._config.log_step_count_steps)
-      )
-    worker_hooks.extend(estimator_spec.training_hooks)
-
-    if not (estimator_spec.scaffold.saver or
-            ops.get_collection(ops.GraphKeys.SAVERS)):
-      ops.add_to_collection(
-          ops.GraphKeys.SAVERS,
-          training.Saver(
-              sharded=True,
-              max_to_keep=self._config.keep_checkpoint_max,
-              keep_checkpoint_every_n_hours=(
-                  self._config.keep_checkpoint_every_n_hours),
-              defer_build=True,
-              save_relative_paths=True))
-
-    chief_hooks = []
-    all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks)
-    saver_hooks = [
-        h for h in all_hooks if isinstance(h, training.CheckpointSaverHook)]
-    if (self._config.save_checkpoints_secs or
-        self._config.save_checkpoints_steps):
-      if not saver_hooks:
-        chief_hooks = [
-            training.CheckpointSaverHook(
-                self._model_dir,
-                save_secs=self._config.save_checkpoints_secs,
-                save_steps=self._config.save_checkpoints_steps,
-                scaffold=estimator_spec.scaffold)
-        ]
-        saver_hooks = [chief_hooks[0]]
-    if saving_listeners:
-      if not saver_hooks:
-        raise ValueError(
-            'There should be a CheckpointSaverHook to use saving_listeners. '
-            'Please set one of the RunConfig.save_checkpoints_steps or '
-            'RunConfig.save_checkpoints_secs.')
-      else:
-        # It is expected to have one CheckpointSaverHook. If multiple, we pick
-        # up the first one to add listener.
-        saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
-
-    # Add summary hooks to worker 0 if we are running with a master, to ensure
-    # that summaries are written at correct intervals even with long-running
-    # evaluations.
-    save_summary_steps = self._config.save_summary_steps
-    log_step_count_steps = self._config.log_step_count_steps
-    if (self._config.cluster_spec and self._config.cluster_spec.jobs and
-        (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)):
-      # Update config values to prevent the default hooks from being created on
-      # the master or other workers.
-      save_summary_steps = 0
-      log_step_count_steps = None
-
-      if (self._config.task_type == run_config.TaskType.WORKER and
-          self._config.task_id == 0):
-        if (self._config.save_summary_steps and
-            self._config.save_summary_steps > 0):
-          worker_hooks.append(
-              training.SummarySaverHook(
-                  save_steps=self._config.save_summary_steps,
-                  output_dir=self._config.model_dir,
-                  scaffold=estimator_spec.scaffold))
-
-        if (self._config.log_step_count_steps and
-            self._config.log_step_count_steps > 0):
-          worker_hooks.append(
-              training.StepCounterHook(
-                  every_n_steps=self._config.log_step_count_steps,
-                  output_dir=self._config.model_dir))
-
-    with training.MonitoredTrainingSession(
-        master=self._config.master,
-        is_chief=self._config.is_chief,
-        checkpoint_dir=self._model_dir,
-        scaffold=estimator_spec.scaffold,
-        hooks=worker_hooks,
-        chief_only_hooks=(
-            tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
-        save_checkpoint_secs=0,  # Saving is handled by a hook.
-        save_summaries_steps=save_summary_steps,
-        config=self._session_config,
-        log_step_count_steps=log_step_count_steps) as mon_sess:
-      loss = None
-      while not mon_sess.should_stop():
-        _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
-    return loss
-
-  def _evaluate_build_graph(self, input_fn, hooks=None, checkpoint_path=None):
-    """Builds the graph and related hooks to run evaluation."""
-    random_seed.set_random_seed(self._config.tf_random_seed)
-    self._create_and_assert_global_step(ops.get_default_graph())
-
-    if self._eval_distribution:
-      (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = (
-          self._call_model_fn_eval_distributed(input_fn, self.config))
-    else:
-      (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = (
-          self._call_model_fn_eval(input_fn, self.config))
-
-    global_step_tensor = training_util.get_global_step(ops.get_default_graph())
-    # Call to warm_start has to be after model_fn is called.
-    self._maybe_warm_start(checkpoint_path)
-
-    if ops.GraphKeys.GLOBAL_STEP in eval_dict:
-      raise ValueError(
-          'Metric with name `global_step` is not allowed, because Estimator '
-          'already defines a default metric with the same name.')
-    eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor
-
-    all_hooks = list(input_hooks)
-    all_hooks.extend(hooks)
-    all_hooks.extend(list(evaluation_hooks or []))
-    # New local variables have been added, so update the estimator spec's
-    # local init op if it was defined.
-    if scaffold and scaffold.local_init_op:
-      # Ensure that eval step has been created before updating local init op.
-      evaluation._get_or_create_eval_step()  # pylint: disable=protected-access
-
-      scaffold = monitored_session.Scaffold(
-          local_init_op=control_flow_ops.group(
-              scaffold.local_init_op,
-              monitored_session.Scaffold.default_local_init_op()),
-          copy_from_scaffold=scaffold
-      )
-
-    return scaffold, update_op, eval_dict, all_hooks
-
-  def _call_model_fn_eval(self, input_fn, config):
-    """Call model_fn for evaluation and handle return values."""
-    features, labels, input_hooks = self._get_features_and_labels_from_input_fn(
-        input_fn, model_fn_lib.ModeKeys.EVAL)
-
-    estimator_spec = self._call_model_fn(
-        features, labels, model_fn_lib.ModeKeys.EVAL, config)
-    eval_metric_ops = _verify_and_create_loss_metric(
-        estimator_spec.eval_metric_ops, estimator_spec.loss)
-    update_op, eval_dict = _extract_metric_update_ops(eval_metric_ops)
-    return (estimator_spec.scaffold, estimator_spec.evaluation_hooks,
-            input_hooks, update_op, eval_dict)
-
-  def _call_model_fn_eval_distributed(self, input_fn, config):
-    """Call model_fn in distribution mode and handle return values."""
-
-    iterator, input_hooks = self._get_iterator_from_input_fn(
-        input_fn, model_fn_lib.ModeKeys.EVAL, self._eval_distribution)
-
-    is_tpu_strategy = (
-        self._eval_distribution.__class__.__name__ == 'TPUStrategy')
-
-    if is_tpu_strategy:
-      steps_per_run_variable = training.get_or_create_steps_per_run_variable()
-      def step_fn(ctx, features, labels=None):
-        """Runs one step of the eval computation and captures outputs."""
-        estimator_spec = self._eval_distribution.call_for_each_tower(
-            self._call_model_fn, features, labels, model_fn_lib.ModeKeys.EVAL,
-            config)
-        eval_metric_ops = _verify_and_create_loss_metric(
-            estimator_spec.eval_metric_ops, estimator_spec.loss,
-            self._eval_distribution)
-        update_op, eval_dict = _extract_metric_update_ops(
-            eval_metric_ops, self._eval_distribution)
-        ctx.set_non_tensor_output(name='estimator_spec', output=estimator_spec)
-        ctx.set_non_tensor_output(name='eval_dict', output=eval_dict)
-        return update_op
-
-      # TODO(priyag): Fix eval step hook to account for steps_per_run.
-      ctx = self._eval_distribution.run_steps_on_dataset(
-          step_fn, iterator, iterations=steps_per_run_variable)
-      update_op = ctx.run_op
-      eval_dict = ctx.non_tensor_outputs['eval_dict']
-      grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec']
-    else:
-      features, labels = estimator_util.parse_iterator_result(
-          iterator.get_next())
-      grouped_estimator_spec = self._eval_distribution.call_for_each_tower(
-          self._call_model_fn, features, labels,
-          model_fn_lib.ModeKeys.EVAL, config)
-      eval_metric_ops = _verify_and_create_loss_metric(
-          grouped_estimator_spec.eval_metric_ops, grouped_estimator_spec.loss,
-          self._eval_distribution)
-      update_op, eval_dict = _extract_metric_update_ops(
-          eval_metric_ops, self._eval_distribution)
-
-    scaffold = _combine_distributed_scaffold(
-        grouped_estimator_spec.scaffold, self._eval_distribution)
-    evaluation_hooks = self._eval_distribution.unwrap(
-        grouped_estimator_spec.evaluation_hooks)[0]
-    evaluation_hooks = evaluation_hooks + (
-        estimator_util.StrategyInitFinalizeHook(
-            self._eval_distribution.initialize,
-            self._eval_distribution.finalize),)
-
-    return (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict)
-
-  def _evaluate_run(self, checkpoint_path, scaffold, update_op, eval_dict,
-                    all_hooks, output_dir):
-    """Run evaluation."""
-    eval_results = evaluation._evaluate_once(  # pylint: disable=protected-access
-        checkpoint_path=checkpoint_path,
-        master=self._config.evaluation_master,
-        scaffold=scaffold,
-        eval_ops=update_op,
-        final_ops=eval_dict,
-        hooks=all_hooks,
-        config=self._session_config)
-
-    current_global_step = eval_results[ops.GraphKeys.GLOBAL_STEP]
-
-    _write_dict_to_summary(
-        output_dir=output_dir,
-        dictionary=eval_results,
-        current_global_step=current_global_step)
-
-    if checkpoint_path:
-      _write_checkpoint_path_to_summary(
-          output_dir=output_dir,
-          checkpoint_path=checkpoint_path,
-          current_global_step=current_global_step)
-
-    return eval_results
-
-  def _maybe_warm_start(self, checkpoint_path):
-    if not checkpoint_path and self._warm_start_settings:
-      logging.info('Warm-starting with WarmStartSettings: %s' %
-                   (self._warm_start_settings,))
-      warm_starting_util.warm_start(*self._warm_start_settings)
-
-
-def _verify_and_create_loss_metric(eval_metric_ops, loss, distribution=None):
-  """Creates a metric for loss and throws an error if one already exists."""
-  if model_fn_lib.LOSS_METRIC_KEY in eval_metric_ops:
-    raise ValueError(
-        'Metric with name "%s" is not allowed, because Estimator ' %
-        (model_fn_lib.LOSS_METRIC_KEY) +
-        'already defines a default metric with the same name.')
-
-  if distribution is None:
-    loss_metric = metrics_lib.mean(loss)
-  else:
-    loss_metric = distribution.call_for_each_tower(
-        metrics_lib.mean, loss)
-  eval_metric_ops[model_fn_lib.LOSS_METRIC_KEY] = loss_metric
-  return eval_metric_ops
-
-
-def maybe_overwrite_model_dir_and_session_config(config, model_dir):
-  """Overwrite estimator config by `model_dir` and `session_config` if needed.
-
-  Args:
-    config: Original estimator config.
-    model_dir: Estimator model checkpoint directory.
-
-  Returns:
-    Overwritten estimator config.
-
-  Raises:
-    ValueError: Model directory inconsistent between `model_dir` and `config`.
-  """
-
-  if config is None:
-    config = run_config.RunConfig()
-    logging.info('Using default config.')
-  if not isinstance(config, run_config.RunConfig):
-    raise ValueError(
-        'config must be an instance of `RunConfig`, but provided %s.' % config)
-
-  if config.session_config is None:
-    session_config = run_config.get_default_session_config()
-    config = run_config.RunConfig.replace(config, session_config=session_config)
-
-  model_dir = compat_internal.path_to_str(model_dir)
-  if model_dir is not None:
-    if (getattr(config, 'model_dir', None) is not None and
-        config.model_dir != model_dir):
-      raise ValueError(
-          "`model_dir` are set both in constructor and `RunConfig`, but with "
-          "different values. In constructor: '{}', in `RunConfig`: "
-          "'{}' ".format(model_dir, config.model_dir))
-  if model_dir:
-    config = run_config.RunConfig.replace(config, model_dir=model_dir)
-  elif getattr(config, 'model_dir', None) is None:
-    model_dir = tempfile.mkdtemp()
-    logging.warning('Using temporary folder as model directory: %s', model_dir)
-    config = run_config.RunConfig.replace(config, model_dir=model_dir)
-
-  return config
-
-
-def create_per_tower_ready_for_local_init_op(scaffold):
-  """Create a `tf.train.Scaffold.ready_for_local_init_op` inside a tower."""
-  if scaffold.ready_for_local_init_op:
-    return scaffold.ready_for_local_init_op
-
-  def default_ready_for_local_init_op():
-    return variables.report_uninitialized_variables(
-        variables.global_variables())
-
-  return monitored_session.Scaffold.get_or_default(
-      'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP,
-      default_ready_for_local_init_op)
-
-
-def _combine_distributed_scaffold(grouped_scaffold, distribution):
-  """Combines scaffold(s) returned from `distribution.call_for_each_tower`."""
-
-  # TODO(anjalisridhar): Figure out how to resolve the following scaffold
-  # parameters: init_feed_dict, init_fn.
-  scaffold_list = distribution.unwrap(grouped_scaffold)
-  init_feed_dict = [
-      s.init_feed_dict
-      for s in scaffold_list
-      if s.init_feed_dict is not None
-  ]
-  if init_feed_dict:
-    init_feed_dict = distribution.group(init_feed_dict)
-  else:
-    init_feed_dict = None
-
-  init_fn = [s.init_fn for s in scaffold_list if s.init_fn is not None]
-  if init_fn:
-    init_fn = distribution.group(init_fn)
-  else:
-    init_fn = None
-
-  init_op = [s.init_op for s in scaffold_list if s.init_op is not None]
-  if init_op:
-    init_op = distribution.group(init_op)
-  else:
-    init_op = None
-
-  def _unwrap_and_concat(value):
-    value = nest.flatten(distribution.unwrap(value))
-    if len(value) != 1:
-      return array_ops.concat(value, 0)
-    return value[0]
-
-  ready_op = distribution.call_for_each_tower(
-      lambda scaffold: scaffold.ready_op, grouped_scaffold)
-  if ready_op is not None:
-    ready_op = _unwrap_and_concat(ready_op)
-
-  ready_for_local_init_op = distribution.call_for_each_tower(
-      create_per_tower_ready_for_local_init_op, grouped_scaffold)
-  if ready_for_local_init_op is not None:
-    ready_for_local_init_op = _unwrap_and_concat(ready_for_local_init_op)
-  else:
-    ready_for_local_init_op = None
-
-  local_init_op = [
-      s.local_init_op
-      for s in scaffold_list
-      if s.local_init_op is not None
-  ]
-  if local_init_op:
-    local_init_op = distribution.group(local_init_op)
-  else:
-    local_init_op = None
-
-  summary_op = [
-      s.summary_op for s in scaffold_list if s.summary_op is not None
-  ]
-  if summary_op:
-    summary_op = distribution.group(summary_op)
-  else:
-    summary_op = None
-
-  scaffold = monitored_session.Scaffold(
-      init_op=init_op,
-      ready_op=ready_op,
-      ready_for_local_init_op=ready_for_local_init_op,
-      local_init_op=local_init_op,
-      summary_op=summary_op,
-      init_feed_dict=init_feed_dict,
-      init_fn=init_fn)
-  return scaffold
-
-
-def _check_checkpoint_available(model_dir):
-  latest_path = checkpoint_management.latest_checkpoint(model_dir)
-  if not latest_path:
-    raise ValueError(
-        'Could not find trained model in model_dir: {}.'.format(model_dir))
-
-
-def _check_hooks_type(hooks):
-  """Returns hooks if all are `SessionRunHook`, raises TypeError otherwise."""
-  hooks = list(hooks or [])
-  for h in hooks:
-    if not isinstance(h, training.SessionRunHook):
-      raise TypeError('Hooks must be a SessionRunHook, given: {}'.format(h))
-  return hooks
-
-
-def _check_listeners_type(saving_listeners):
-  """Check listeners type."""
-  listeners = list(saving_listeners or [])
-  for l in listeners:
-    if not isinstance(l, training.CheckpointSaverListener):
-      raise TypeError(
-          'saving_listeners must be a list of CheckpointSaverListener, '
-          'given: {}'.format(l))
-  return listeners
-
-
-def _get_replica_device_setter(config):
-  """Creates a replica device setter if required as a default `device_fn`.
-
-  `Estimator` uses `tf.train.ReplicaDeviceSetter` as a default device placer. It
-  sets the
-  distributed related arguments such as number of `ps_replicas` based on given
-  `config`.
-
-  Args:
-    config: A `tf.estimator.RunConfig` instance.
-
-  Returns:
-    A replica device setter, or `None`.
-  """
-  if config.task_type:
-    worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id)
-  else:
-    worker_device = '/job:worker'
-
-  if config.num_ps_replicas > 0:
-    return training.replica_device_setter(
-        ps_tasks=config.num_ps_replicas,
-        worker_device=worker_device,
-        merge_devices=True,
-        ps_ops=list(device_setter.STANDARD_PS_OPS),
-        cluster=config.cluster_spec)
-  else:
-    return None
-
-
-def _verify_model_fn_args(model_fn, params):
-  """Verifies `model_fn` arguments."""
-  args = set(function_utils.fn_args(model_fn))
-  if 'features' not in args:
-    raise ValueError('model_fn (%s) must include features argument.' % model_fn)
-  if params is not None and 'params' not in args:
-    raise ValueError('model_fn (%s) does not include params argument, '
-                     'but params (%s) is passed to Estimator.' % (model_fn,
-                                                                  params))
-  if params is None and 'params' in args:
-    logging.warning('Estimator\'s model_fn (%s) includes params '
-                    'argument, but params are not passed to Estimator.',
-                    model_fn)
-  non_valid_args = list(args - _VALID_MODEL_FN_ARGS)
-  if non_valid_args:
-    raise ValueError('model_fn (%s) has following not expected args: %s' %
-                     (model_fn, non_valid_args))
-
-
-def _load_global_step_from_checkpoint_dir(checkpoint_dir):
-  try:
-    checkpoint_reader = training.NewCheckpointReader(
-        training.latest_checkpoint(checkpoint_dir))
-    return checkpoint_reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)
-  except:  # pylint: disable=bare-except
-    return 0
-
-
-def _extract_metric_update_ops(eval_dict, distribution=None):
-  """Separate update operations from metric value operations."""
-  update_ops = []
-  value_ops = {}
-  # Sort metrics lexicographically so graph is identical every time.
-  for name, value in sorted(six.iteritems(eval_dict)):
-    value_ops[name] = value[0]
-    update_ops.append(
-        distribution.group(value[1]) if distribution else value[1])
-
-  update_op = control_flow_ops.group(*update_ops) if update_ops else None
-  return update_op, value_ops
-
-
-def _dict_to_str(dictionary):
-  """Get a `str` representation of a `dict`.
-
-  Args:
-    dictionary: The `dict` to be represented as `str`.
-
-  Returns:
-    A `str` representing the `dictionary`.
-  """
-  return ', '.join('%s = %s' % (k, v)
-                   for k, v in sorted(six.iteritems(dictionary))
-                   if not isinstance(v, six.binary_type))
-
-
-def _write_dict_to_summary(output_dir,
-                           dictionary,
-                           current_global_step):
-  """Writes a `dict` into summary file in given output directory.
-
-  Args:
-    output_dir: `str`, directory to write the summary file in.
-    dictionary: the `dict` to be written to summary file.
-    current_global_step: `int`, the current global step.
-  """
-  logging.info('Saving dict for global step %d: %s', current_global_step,
-               _dict_to_str(dictionary))
-  summary_writer = writer_cache.FileWriterCache.get(output_dir)
-  summary_proto = summary_pb2.Summary()
-  for key in dictionary:
-    if dictionary[key] is None:
-      continue
-    if key == 'global_step':
-      continue
-    if (isinstance(dictionary[key], np.float32) or
-        isinstance(dictionary[key], float)):
-      summary_proto.value.add(tag=key, simple_value=float(dictionary[key]))
-    elif (isinstance(dictionary[key], np.int64) or
-          isinstance(dictionary[key], np.int32) or
-          isinstance(dictionary[key], int)):
-      summary_proto.value.add(tag=key, simple_value=int(dictionary[key]))
-    elif isinstance(dictionary[key], six.binary_type):
-      try:
-        summ = summary_pb2.Summary.FromString(dictionary[key])
-        for i, _ in enumerate(summ.value):
-          summ.value[i].tag = '%s/%d' % (key, i)
-        summary_proto.value.extend(summ.value)
-      except message.DecodeError:
-        logging.warn('Skipping summary for %s, cannot parse string to Summary.',
-                     key)
-        continue
-    elif isinstance(dictionary[key], np.ndarray):
-      value = summary_proto.value.add()
-      value.tag = key
-      value.node_name = key
-      tensor_proto = tensor_util.make_tensor_proto(dictionary[key])
-      value.tensor.CopyFrom(tensor_proto)
-      # pylint: disable=line-too-long
-      logging.info(
-          'Summary for np.ndarray is not visible in Tensorboard by default. '
-          'Consider using a Tensorboard plugin for visualization (see '
-          'https://github.com/tensorflow/tensorboard-plugin-example/blob/master/README.md'
-          ' for more information).')
-      # pylint: enable=line-too-long
-    else:
-      logging.warn(
-          'Skipping summary for %s, must be a float, np.float32, np.int64, '
-          'np.int32 or int or np.ndarray or a serialized string of Summary.',
-          key)
-  summary_writer.add_summary(summary_proto, current_global_step)
-  summary_writer.flush()
-
-
-def _write_checkpoint_path_to_summary(output_dir, checkpoint_path,
-                                      current_global_step):
-  """Writes `checkpoint_path` into summary file in the given output directory.
-
-  Args:
-    output_dir: `str`, directory to write the summary file in.
-    checkpoint_path: `str`, checkpoint file path to be written to summary file.
-    current_global_step: `int`, the current global step.
-  """
-
-  checkpoint_path_tag = 'checkpoint_path'
-
-  logging.info('Saving \'%s\' summary for global step %d: %s',
-               checkpoint_path_tag, current_global_step, checkpoint_path)
-  summary_proto = summary_pb2.Summary()
-  summary_proto.value.add(
-      tag=checkpoint_path_tag,
-      tensor=tensor_util.make_tensor_proto(
-          checkpoint_path, dtype=dtypes.string))
-  summary_writer = writer_cache.FileWriterCache.get(output_dir)
-  summary_writer.add_summary(summary_proto, current_global_step)
-  summary_writer.flush()
-
-
-def _has_dataset_or_queue_runner(maybe_tensor):
-  """Returns `True` if `Dataset` or `QueueRunner` has been used."""
-  # Check TF dataset first. Here, we use a simple algorithm to check the top
-  # level Tensors only, which should be sufficient for most users.
-  tensors = [x for x in nest.flatten(maybe_tensor) if isinstance(x, ops.Tensor)]
-  if any([t.op.type == 'IteratorGetNext' for t in tensors]):
-    return True
-
-  # Now, check queue.
-  return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS)
-
-
-VocabInfo = warm_starting_util.VocabInfo  # pylint: disable=invalid-name
-estimator_export('estimator.VocabInfo')(VocabInfo)
-
-
-@estimator_export('estimator.WarmStartSettings')
-class WarmStartSettings(
-    collections.namedtuple('WarmStartSettings', [
-        'ckpt_to_initialize_from',
-        'vars_to_warm_start',
-        'var_name_to_vocab_info',
-        'var_name_to_prev_var_name',
-    ])):
-  """Settings for warm-starting in `tf.estimator.Estimators`.
-
-  Example Use with canned `tf.estimator.DNNEstimator`:
-
-  ```
-  emb_vocab_file = tf.feature_column.embedding_column(
-      tf.feature_column.categorical_column_with_vocabulary_file(
-          "sc_vocab_file", "new_vocab.txt", vocab_size=100),
-      dimension=8)
-  emb_vocab_list = tf.feature_column.embedding_column(
-      tf.feature_column.categorical_column_with_vocabulary_list(
-          "sc_vocab_list", vocabulary_list=["a", "b"]),
-      dimension=8)
-  estimator = tf.estimator.DNNClassifier(
-    hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list],
-    warm_start_from=ws)
-  ```
-
-  where `ws` could be defined as:
-
-  Warm-start all weights in the model (input layer and hidden weights).
-  Either the directory or a specific checkpoint can be provided (in the case
-  of the former, the latest checkpoint will be used):
-
-  ```
-  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp")
-  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000")
-  ```
-
-  Warm-start only the embeddings (input layer):
-
-  ```
-  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp",
-                         vars_to_warm_start=".*input_layer.*")
-  ```
-
-  Warm-start all weights but the embedding parameters corresponding to
-  `sc_vocab_file` have a different vocab from the one used in the current
-  model:
-
-  ```
-  vocab_info = tf.estimator.VocabInfo(
-      new_vocab=sc_vocab_file.vocabulary_file,
-      new_vocab_size=sc_vocab_file.vocabulary_size,
-      num_oov_buckets=sc_vocab_file.num_oov_buckets,
-      old_vocab="old_vocab.txt"
-  )
-  ws = WarmStartSettings(
-      ckpt_to_initialize_from="/tmp",
-      var_name_to_vocab_info={
-          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
-      })
-  ```
-
-  Warm-start only `sc_vocab_file` embeddings (and no other variables), which
-  have a different vocab from the one used in the current model:
-
-  ```
-  vocab_info = tf.estimator.VocabInfo(
-      new_vocab=sc_vocab_file.vocabulary_file,
-      new_vocab_size=sc_vocab_file.vocabulary_size,
-      num_oov_buckets=sc_vocab_file.num_oov_buckets,
-      old_vocab="old_vocab.txt"
-  )
-  ws = WarmStartSettings(
-      ckpt_to_initialize_from="/tmp",
-      vars_to_warm_start=None,
-      var_name_to_vocab_info={
-          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
-      })
-  ```
-
-  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
-  have a different vocab from the one used in current checkpoint, and only
-  100 of those entries were used:
-
-  ```
-  vocab_info = tf.estimator.VocabInfo(
-      new_vocab=sc_vocab_file.vocabulary_file,
-      new_vocab_size=sc_vocab_file.vocabulary_size,
-      num_oov_buckets=sc_vocab_file.num_oov_buckets,
-      old_vocab="old_vocab.txt",
-      old_vocab_size=100
-  )
-  ws = WarmStartSettings(
-      ckpt_to_initialize_from="/tmp",
-      var_name_to_vocab_info={
-          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
-      })
-  ```
-
-  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
-  have a different vocab from the one used in current checkpoint and the
-  parameters corresponding to `sc_vocab_list` have a different name from the
-  current checkpoint:
-
-  ```
-  vocab_info = tf.estimator.VocabInfo(
-      new_vocab=sc_vocab_file.vocabulary_file,
-      new_vocab_size=sc_vocab_file.vocabulary_size,
-      num_oov_buckets=sc_vocab_file.num_oov_buckets,
-      old_vocab="old_vocab.txt",
-      old_vocab_size=100
-  )
-  ws = WarmStartSettings(
-      ckpt_to_initialize_from="/tmp",
-      var_name_to_vocab_info={
-          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
-      },
-      var_name_to_prev_var_name={
-          "input_layer/sc_vocab_list_embedding/embedding_weights":
-              "old_tensor_name"
-      })
-  ```
-
-  Attributes:
-    ckpt_to_initialize_from: [Required] A string specifying the directory with
-      checkpoint file(s) or path to checkpoint from which to warm-start the
-      model parameters.
-    vars_to_warm_start: [Optional] One of the following:  - A regular expression
-      (string) that captures which variables to warm-start (see
-      `tf.get_collection`).  This expression will only consider variables in the
-      `TRAINABLE_VARIABLES` collection. - A list of Variables to warm-start. - A
-      list of strings, each representing a full variable name to warm-start. -
-      `None`, in which case only variables specified in `var_name_to_vocab_info`
-      will be warm-started.  Defaults to `'.*'`, which warm-starts all variables
-      in the `TRAINABLE_VARIABLES` collection.  Note that this excludes
-      variables such as accumulators and moving statistics from batch norm.
-    var_name_to_vocab_info: [Optional] Dict of variable names (strings) to
-      `tf.estimator.VocabInfo`. The variable names should be "full" variables,
-      not the names of the partitions.  If not explicitly provided, the variable
-      is assumed to have no (changes to) vocabulary.
-    var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to
-      name of the previously-trained variable in `ckpt_to_initialize_from`. If
-      not explicitly provided, the name of the variable is assumed to be same
-      between previous checkpoint and current model.
-  """
-
-  def __new__(cls,
-              ckpt_to_initialize_from,
-              vars_to_warm_start='.*',
-              var_name_to_vocab_info=None,
-              var_name_to_prev_var_name=None):
-    if not ckpt_to_initialize_from:
-      raise ValueError(
-          '`ckpt_to_initialize_from` MUST be set in WarmStartSettings')
-    return super(WarmStartSettings, cls).__new__(
-        cls,
-        ckpt_to_initialize_from,
-        vars_to_warm_start,
-        var_name_to_vocab_info or {},
-        var_name_to_prev_var_name or {},
-    )
-
-
-def _get_saved_model_ckpt(saved_model_dir):
-  """Return path to variables checkpoint in a `SavedModel` directory."""
-  if not gfile.Exists(
-      os.path.join(saved_model_utils.get_variables_dir(saved_model_dir),
-                   compat.as_text('variables.index'))):
-    raise ValueError('Directory provided has an invalid SavedModel format: %s'
-                     % saved_model_dir)
-  return saved_model_utils.get_variables_path(saved_model_dir)
-
-
-def _get_default_warm_start_settings(warm_start_from):
-  """Returns default `tf.estimator.WarmStartSettings`.
-
-  Args:
-    warm_start_from: Either a string representing the filepath of a checkpoint
-      or `SavedModel` to initialize from, or an instance of
-      `tf.estimator.WarmStartSettings`.
-
-  Returns:
-    Either None or an instance of `WarmStartSettings`.
-
-  Raises:
-    ValueError: If `warm_start_from` is not `None` but is neither a string nor
-    an
-      instance of `WarmStartSettings`.
-  """
-  if warm_start_from is None:
-    return None
-  if isinstance(warm_start_from, (six.string_types, six.binary_type)):
-    # Infer that this is a SavedModel if export_path +
-    # 'variables/variables.index' exists, and if so, construct the
-    # WarmStartSettings pointing to the variables path
-    # (export_path + 'variables/variables').
-    if gfile.Exists(os.path.join(
-        saved_model_utils.get_variables_dir(warm_start_from),
-        compat.as_text('variables.index'))):
-      logging.info('Warm-starting from a SavedModel')
-      return WarmStartSettings(
-          ckpt_to_initialize_from=saved_model_utils.get_variables_path(
-              warm_start_from))
-    return WarmStartSettings(ckpt_to_initialize_from=warm_start_from)
-  elif isinstance(warm_start_from, WarmStartSettings):
-    return warm_start_from
-  else:
-    raise ValueError('warm_start_from must be a string or a WarmStartSettings, '
-                     'instead got {}'.format(type(warm_start_from)))
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.estimator import *
diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py
index f188f2d4e6..bfda6591ac 100644
--- a/tensorflow/python/estimator/estimator_lib.py
+++ b/tensorflow/python/estimator/estimator_lib.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,40 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Estimator: High level tools for working with models."""
+"""estimator_lib python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,line-too-long,wildcard-import
-from tensorflow.python.estimator.canned.baseline import BaselineClassifier
-from tensorflow.python.estimator.canned.baseline import BaselineRegressor
-from tensorflow.python.estimator.canned.boosted_trees import BoostedTreesClassifier
-from tensorflow.python.estimator.canned.boosted_trees import BoostedTreesRegressor
-from tensorflow.python.estimator.canned.dnn import DNNClassifier
-from tensorflow.python.estimator.canned.dnn import DNNRegressor
-from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier
-from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedRegressor
-from tensorflow.python.estimator.canned.linear import LinearClassifier
-from tensorflow.python.estimator.canned.linear import LinearRegressor
-from tensorflow.python.estimator.canned.parsing_utils import classifier_parse_example_spec
-from tensorflow.python.estimator.canned.parsing_utils import regressor_parse_example_spec
-from tensorflow.python.estimator.estimator import Estimator
-from tensorflow.python.estimator.estimator import VocabInfo
-from tensorflow.python.estimator.estimator import WarmStartSettings
-from tensorflow.python.estimator.export import export_lib as export
-from tensorflow.python.estimator.exporter import Exporter
-from tensorflow.python.estimator.exporter import FinalExporter
-from tensorflow.python.estimator.exporter import LatestExporter
-from tensorflow.python.estimator.inputs import inputs
-from tensorflow.python.estimator.keras import model_to_estimator
-from tensorflow.python.estimator.model_fn import EstimatorSpec
-from tensorflow.python.estimator.model_fn import ModeKeys
-from tensorflow.python.estimator.run_config import RunConfig
-from tensorflow.python.estimator.training import EvalSpec
-from tensorflow.python.estimator.training import train_and_evaluate
-from tensorflow.python.estimator.training import TrainSpec
+from tensorflow_estimator.python.estimator import estimator_lib
 
+# Include attrs that start with single underscore.
+estimator_lib.__all__ = [
+    s for s in dir(estimator_lib) if not s.startswith('__')
+]
 
-# pylint: enable=unused-import,line-too-long,wildcard-import
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.estimator_lib import *
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
deleted file mode 100644
index 246dfb1a4b..0000000000
--- a/tensorflow/python/estimator/estimator_test.py
+++ /dev/null
@@ -1,3280 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Estimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import glob
-import json
-import os
-import tempfile
-
-import numpy as np
-import six
-
-from google.protobuf import text_format
-
-from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python.client import session
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator import run_config
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras import metrics as metrics_module
-from tensorflow.python.layers import layers
-from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.losses import losses
-from tensorflow.python.ops.random_ops import random_uniform
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import loader
-from tensorflow.python.saved_model import loader_impl
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import tag_constants
-from tensorflow.python.summary import summary
-from tensorflow.python.summary import summary_iterator
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import checkpoint_state_pb2
-from tensorflow.python.training import saver
-from tensorflow.python.training import saver_test_utils
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training
-from tensorflow.python.util import compat
-from tensorflow.python.util import function_utils
-
-_TMP_DIR = '/tmp'
-_ANOTHER_TMP_DIR = '/another_tmp'
-
-
-def dummy_model_fn(features, labels, params):
-  _, _, _ = features, labels, params
-
-
-def summaries_with_matching_keyword(keyword, dir_):
-  """Yields summary protos matching given keyword from event file."""
-
-  writer_cache.FileWriterCache.clear()
-
-  event_paths = glob.glob(os.path.join(dir_, 'events*'))
-  for event in summary_iterator.summary_iterator(event_paths[-1]):
-    if event.summary is not None:
-      for value in event.summary.value:
-        if keyword in value.tag:
-          yield event.summary
-
-
-def check_eventfile_for_keyword(keyword, dir_):
-  """Checks event files for the keyword."""
-  return any(summaries_with_matching_keyword(keyword, dir_))
-
-
-def get_mock_saver():
-  real_saver = saver.Saver()
-  return test.mock.Mock(wraps=real_saver, saver_def=real_saver.saver_def)
-
-
-class EstimatorInheritanceConstraintTest(test.TestCase):
-  """Tests that sub classes cannot override methods of Estimator."""
-
-  def test_override_a_method(self):
-    class _Estimator(estimator.Estimator):
-
-      def __init__(self):
-        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
-
-      def predict(self, input_fn, predict_keys=None, hooks=None):
-        pass
-
-    with self.assertRaisesRegexp(
-        ValueError, 'cannot override members of Estimator.*predict'):
-      _Estimator()
-
-  def test_override_a_method_with_tricks(self):
-    class _Estimator(estimator.Estimator):
-
-      def __init__(self):
-        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
-
-      def _assert_members_are_not_overridden(self):
-        pass  # HAHA! I tricked you!
-
-      def predict(self, input_fn, predict_keys=None, hooks=None):
-        pass
-
-    with self.assertRaisesRegexp(
-        ValueError, 'cannot override members of Estimator.*predict'):
-      _Estimator()
-
-  def test_extension_of_api_is_ok(self):
-    class _Estimator(estimator.Estimator):
-
-      def __init__(self):
-        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
-
-      def predict_proba(self, input_fn, predict_keys=None, hooks=None):
-        pass
-
-    _Estimator()
-
-  def test_override_allowed_method(self):
-    class _Estimator(estimator.Estimator):
-
-      def __init__(self):
-        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
-
-      def _tf_api_names(self):
-        pass
-
-    _Estimator()
-
-
-class EstimatorConstructorTest(test.TestCase):
-
-  def test_config_must_be_a_run_config(self):
-    with self.assertRaisesRegexp(ValueError, 'an instance of `RunConfig`'):
-      estimator.Estimator(model_fn=None, config='NotARunConfig')
-
-  def test_model_fn_must_be_provided(self):
-    with self.assertRaisesRegexp(ValueError, 'model_fn.* must be'):
-      estimator.Estimator(model_fn=None)
-
-  def test_property_accessors(self):
-
-    def model_fn(features, labels, params):
-      _, _, _ = features, labels, params
-
-    class FakeConfig(run_config.RunConfig):
-      pass
-
-    params = {'hidden_layers': [3, 4]}
-    est = estimator.Estimator(
-        model_fn=model_fn, model_dir='bla', config=FakeConfig(), params=params)
-    self.assertTrue(isinstance(est.config, FakeConfig))
-    self.assertEqual(params, est.params)
-    self.assertEqual('bla', est.model_dir)
-
-  def test_default_config(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    est = estimator.Estimator(model_fn=model_fn)
-    self.assertTrue(isinstance(est.config, run_config.RunConfig))
-    self.assertTrue(est._session_config.allow_soft_placement)
-    rewrite_options = est._session_config.graph_options.rewrite_options
-    self.assertEqual(rewrite_options.meta_optimizer_iterations,
-                     rewriter_config_pb2.RewriterConfig.ONE)
-
-  def test_default_model_dir(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
-      est = estimator.Estimator(model_fn=model_fn)
-      self.assertEqual(_TMP_DIR, est.config.model_dir)
-      self.assertEqual(_TMP_DIR, est.model_dir)
-
-  def test_model_dir_in_constructor(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    est = estimator.Estimator(model_fn=model_fn, model_dir=_TMP_DIR)
-    self.assertEqual(_TMP_DIR, est.config.model_dir)
-    self.assertEqual(_TMP_DIR, est.model_dir)
-
-  def test_empty_model_dir(self):
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
-      est = estimator.Estimator(model_fn=model_fn, model_dir='')
-      self.assertEqual(_TMP_DIR, est.config.model_dir)
-      self.assertEqual(_TMP_DIR, est.model_dir)
-
-  def test_model_dir_in_run_config(self):
-
-    class FakeConfig(run_config.RunConfig):
-
-      @property
-      def model_dir(self):
-        return _TMP_DIR
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    est = estimator.Estimator(model_fn=model_fn, config=FakeConfig())
-    self.assertEqual(_TMP_DIR, est.config.model_dir)
-    self.assertEqual(_TMP_DIR, est.model_dir)
-
-  def test_same_model_dir_in_constructor_and_run_config(self):
-
-    class FakeConfig(run_config.RunConfig):
-
-      @property
-      def model_dir(self):
-        return _TMP_DIR
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    est = estimator.Estimator(
-        model_fn=model_fn, config=FakeConfig(), model_dir=_TMP_DIR)
-    self.assertEqual(_TMP_DIR, est.config.model_dir)
-    self.assertEqual(_TMP_DIR, est.model_dir)
-
-  def test_different_model_dir_in_constructor_and_run_config(self):
-
-    class FakeConfig(run_config.RunConfig):
-
-      @property
-      def model_dir(self):
-        return _TMP_DIR
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        '`model_dir` are set both in constructor and `RunConfig`, but '
-        'with different values'):
-      estimator.Estimator(
-          model_fn=model_fn, config=FakeConfig(), model_dir=_ANOTHER_TMP_DIR)
-
-  def test_model_fn_args_must_include_features(self):
-
-    def model_fn(x, labels):
-      _, _ = x, labels
-
-    with self.assertRaisesRegexp(ValueError, 'features'):
-      estimator.Estimator(model_fn=model_fn)
-
-  def test_model_fn_args_labels_is_optional(self):
-
-    def model_fn(features):
-      _ = features
-
-    estimator.Estimator(model_fn=model_fn)
-
-  def test_if_params_provided_then_model_fn_should_accept_it(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    estimator.Estimator(model_fn=model_fn)
-    with self.assertRaisesRegexp(ValueError, 'params'):
-      estimator.Estimator(model_fn=model_fn, params={'hidden_layers': 4})
-
-  def test_internal_params_is_a_deepcopy(self):
-
-    def model_fn(features, labels, params):
-      _, _, _ = features, labels, params
-
-    params = {'hidden_layers': 4}
-    est = estimator.Estimator(model_fn=model_fn, params=params)
-
-    params['hidden_layers'] = 5
-    self.assertEqual(4, est.params['hidden_layers'])
-
-  def test_not_known_model_fn_args(self):
-
-    def model_fn(features, labels, something):
-      _, _, _ = features, labels, something
-
-    with self.assertRaisesRegexp(ValueError, 'something'):
-      estimator.Estimator(model_fn=model_fn)
-
-  def test_not_known_model_fn_args_handled_by_lambda(self):
-    def model_fn(features, labels, something):
-      _, _, _ = features, labels, something
-
-    new_model_fn = lambda features, labels: model_fn(  # pylint: disable=g-long-lambda
-        features, labels, 'something')
-    estimator.Estimator(model_fn=new_model_fn)
-
-  def test_if_model_fn_is_a_member_function_of_a_class(self):
-
-    class ModelFnClass(object):
-
-      def __init__(self):
-        estimator.Estimator(model_fn=self.model_fn)
-
-      def model_fn(self, features, labels, mode):
-        _, _, _ = features, labels, mode
-
-    ModelFnClass()
-
-  def test_model_fn_property_binds_params(self):
-
-    def model_fn(features, labels, mode, config, params):
-      _, _, _, _, _ = features, labels, mode, config, params
-
-    est = estimator.Estimator(model_fn=model_fn)
-    model_fn_args = function_utils.fn_args(est.model_fn)
-    self.assertEqual(
-        set(['features', 'labels', 'mode', 'config']), set(model_fn_args))
-
-  def test_model_fn_property_returns_fixed_signature(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-
-    est = estimator.Estimator(model_fn=model_fn)
-    model_fn_args = function_utils.fn_args(est.model_fn)
-    self.assertEqual(
-        set(['features', 'labels', 'mode', 'config']), set(model_fn_args))
-
-
-def dummy_input_fn():
-  return ({'x': constant_op.constant([[1], [1]])},
-          constant_op.constant([[1], [1]]))
-
-
-def model_fn_global_step_incrementer(features, labels, mode):
-  _, _ = features, labels
-  global_step = training.get_global_step()
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      loss=constant_op.constant(1.),
-      train_op=state_ops.assign_add(global_step, 1))
-
-
-def assert_features_op(expected_features, actual_features):
-  return [
-      check_ops.assert_equal(
-          expected_features[k], actual_features[k], name='assert_%s' % k)
-      for k in expected_features
-  ]
-
-
-def _estimator_spec(
-    expected_features, expected_labels, actual_features, actual_labels, mode):
-  assert_ops = tuple(
-      assert_features_op(expected_features, actual_features) + [
-          check_ops.assert_equal(
-              expected_labels, actual_labels, name='assert_labels')
-      ])
-  global_step = training.get_global_step()
-  with ops.control_dependencies(assert_ops):
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        predictions=constant_op.constant(0.),
-        loss=constant_op.constant(0.),
-        train_op=state_ops.assign_add(global_step, 1))
-
-
-def _make_input_fn(features, labels):
-  def _input_fn():
-    return {
-        k: constant_op.constant(v)
-        for k, v in six.iteritems(features)
-    }, constant_op.constant(labels)
-  return _input_fn
-
-
-class EstimatorTrainTest(test.TestCase):
-
-  def test_callable_model_fn(self):
-    expected_features = {'x': 42., 'y': 43.}
-    expected_labels = 44.
-
-    model_fn_call_count = [0]
-
-    test_self = self
-
-    class ModelFn(object):
-
-      def __call__(self, features, labels):
-        model_fn_call_count[0] += 1
-        test_self.assertItemsEqual(expected_features.keys(), features.keys())
-        return _estimator_spec(
-            expected_features, expected_labels, features, labels,
-            model_fn_lib.ModeKeys.TRAIN)
-
-    with self.assertRaisesRegexp(ValueError, 'does not include params'):
-      estimator.Estimator(model_fn=ModelFn(), params={'a': 'b'})
-    est = estimator.Estimator(model_fn=ModelFn(), config=run_config.RunConfig())
-    self.assertEqual(0, model_fn_call_count[0])
-    est.train(
-        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
-    self.assertEqual(1, model_fn_call_count[0])
-
-  def test_callable_input_fn(self):
-    expected_mode = model_fn_lib.ModeKeys.TRAIN
-    expected_params = {'batch_size': 10}
-    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
-    input_fn_call_count = [0]
-
-    def _model_fn(features, labels, mode, params, config):
-      del params, config
-      return model_fn_global_step_incrementer(features, labels, mode)
-
-    test_self = self
-
-    class InputFn(object):
-
-      def __call__(self, mode, params, config):
-        input_fn_call_count[0] += 1
-        test_self.assertEqual(expected_mode, mode)
-        test_self.assertEqual(expected_params, params)
-        test_self.assertEqual(4321, config.tf_random_seed)
-        return dummy_input_fn()
-
-    est = estimator.Estimator(model_fn=_model_fn,
-                              params=expected_params,
-                              config=expected_config)
-    self.assertEqual(0, input_fn_call_count[0])
-    est.train(InputFn(), steps=1)
-    self.assertEqual(1, input_fn_call_count[0])
-
-  def test_nested_input_fn(self):
-    expected_params = {'batch_size': 10}
-
-    def _input_fn():
-      dataset_features = dataset_ops.Dataset.from_tensor_slices(
-          (random_uniform([4]),
-           random_uniform([4, 100], maxval=100, dtype=dtypes.int32)))
-      dataset_labels = dataset_ops.Dataset.from_tensor_slices(
-          random_uniform([4, 10]))
-      dataset = dataset_ops.Dataset.zip((dataset_features, dataset_labels))
-      dataset = dataset.repeat(-1)
-      iterator = dataset.make_initializable_iterator()
-      return iterator.get_next()
-
-    def _model_fn(features, labels, mode, params, config):
-      del params, config
-      return model_fn_global_step_incrementer(features, labels, mode)
-
-    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
-    est = estimator.Estimator(
-        model_fn=_model_fn, params=expected_params, config=expected_config)
-    est.train(_input_fn, steps=4)
-
-  def test_input_fn_args(self):
-    expected_mode = model_fn_lib.ModeKeys.TRAIN
-    expected_params = {'batch_size': 10}
-    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
-    input_fn_call_count = [0]
-
-    def _model_fn(features, labels, mode, params, config):
-      del params, config
-      return model_fn_global_step_incrementer(features, labels, mode)
-
-    def _input_fn(mode, params, config):
-      input_fn_call_count[0] += 1
-      self.assertEqual(expected_mode, mode)
-      self.assertEqual(expected_params, params)
-      self.assertEqual(4321, config.tf_random_seed)
-      return dummy_input_fn()
-
-    est = estimator.Estimator(model_fn=_model_fn,
-                              params=expected_params,
-                              config=expected_config)
-    self.assertEqual(0, input_fn_call_count[0])
-    est.train(_input_fn, steps=1)
-    self.assertEqual(1, input_fn_call_count[0])
-
-  def test_minimal_model_fn_args(self):
-    expected_features = {'x': 4, 'y': 5}
-
-    def _input_fn():
-      return expected_features
-
-    model_fn_call_count = [0]
-    def _model_fn(features):
-      model_fn_call_count[0] += 1
-      self.assertItemsEqual(expected_features.keys(), features.keys())
-      with ops.control_dependencies(
-          assert_features_op(expected_features, features)):
-        return model_fn_lib.EstimatorSpec(
-            mode=None,
-            predictions=constant_op.constant(0.),
-            loss=constant_op.constant(0.),
-            train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    self.assertEqual(0, model_fn_call_count[0])
-    est.train(input_fn=_input_fn, steps=1)
-    self.assertEqual(1, model_fn_call_count[0])
-
-  def test_labels_should_be_none_if_model_fn_does_not_use_labels(self):
-
-    def _input_fn_with_labels():
-      return {'x': 4, 'y': 5}, [4]
-
-    def _model_fn(features):
-      _ = features
-      return model_fn_lib.EstimatorSpec(
-          mode=None,
-          predictions=constant_op.constant(0.),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    with self.assertRaisesRegexp(ValueError, 'model_fn does not take labels'):
-      est.train(input_fn=_input_fn_with_labels, steps=1)
-
-  def test_input_fn_len_should_be_2_if_tuple_or_list(self):
-
-    def _input_fn():
-      return 4, 5, 6
-
-    def _model_fn(features):
-      _ = features
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    with self.assertRaisesRegexp(ValueError, 'len 2 tuple'):
-      est.train(input_fn=_input_fn, steps=1)
-
-  def test_all_model_fn_args(self):
-    expected_features = {'x': 42., 'y': 43.}
-    expected_labels = 44.
-    expected_params = {'some_param': 'some_value'}
-    expected_config = run_config.RunConfig()
-    expected_config.i_am_test = True
-
-    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
-    # doesn't work with mock fns.
-    model_fn_call_count = [0]
-
-    # Note that args are all passed by keyword, so can be in any order.
-    def _model_fn(mode, params, features, labels, config):
-      model_fn_call_count[0] += 1
-      self.assertItemsEqual(expected_features.keys(), features.keys())
-      self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)
-      self.assertEqual(expected_params, params)
-      self.assertTrue(config.i_am_test)
-      return _estimator_spec(
-          expected_features, expected_labels, features, labels, mode)
-
-    est = estimator.Estimator(
-        model_fn=_model_fn, params=expected_params, config=expected_config)
-    self.assertEqual(0, model_fn_call_count[0])
-    est.train(
-        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
-    self.assertEqual(1, model_fn_call_count[0])
-
-  def test_partial_model_fn_args(self):
-    expected_features = {'x': 42., 'y': 43.}
-    expected_labels = 44.
-    expected_params = {'some_param': 'some_value'}
-    expected_config = run_config.RunConfig()
-    expected_config.i_am_test = True
-    expected_foo = 45.
-    expected_bar = 46.
-
-    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
-    # doesn't work with mock fns.
-    model_fn_call_count = [0]
-
-    def _model_fn(features, labels, foo, mode, params, config, bar):
-      model_fn_call_count[0] += 1
-      self.assertEqual(expected_foo, foo)
-      self.assertEqual(expected_bar, bar)
-      self.assertItemsEqual(expected_features.keys(), features.keys())
-      self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)
-      self.assertEqual(expected_params, params)
-      self.assertTrue(config.i_am_test)
-      return _estimator_spec(
-          expected_features, expected_labels, features, labels, mode)
-    partial_model_fn = functools.partial(
-        _model_fn, foo=expected_foo, bar=expected_bar)
-
-    est = estimator.Estimator(
-        model_fn=partial_model_fn, params=expected_params,
-        config=expected_config)
-    self.assertEqual(0, model_fn_call_count[0])
-    est.train(
-        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
-    self.assertEqual(1, model_fn_call_count[0])
-
-  def test_model_fn_must_return_estimator_spec(self):
-
-    def model_fn(features, labels):
-      _, _ = features, labels
-      return 'NotGoodNotGood'
-
-    est = estimator.Estimator(model_fn=model_fn)
-    with self.assertRaisesRegexp(ValueError, 'EstimatorSpec'):
-      est.train(dummy_input_fn, steps=1)
-
-  def test_run_train_op_and_saves_at_the_end(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, steps=5)
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
-
-  def test_loss_summary(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer,
-                              config=run_config.RunConfig(save_summary_steps=1))
-    est.train(dummy_input_fn, steps=1)
-
-    # Make sure nothing is stuck in limbo.
-    writer_cache.FileWriterCache.clear()
-
-    if check_eventfile_for_keyword('loss', est.model_dir):
-      return
-    self.fail('{} should be part of reported summaries.'.format('loss'))
-
-  def test_latest_checkpoint(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    self.assertIsNone(est.latest_checkpoint())
-    est.train(dummy_input_fn, steps=5)
-    self.assertIsNotNone(est.latest_checkpoint())
-    self.assertTrue(est.latest_checkpoint().startswith(est.model_dir))
-
-  def test_steps_and_saves_reloads(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, steps=5)
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
-    est.train(dummy_input_fn, steps=5)
-    self.assertEqual(
-        10, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
-
-  def test_warm_starts(self):
-    def _make_model_fn(x):
-      def _variable_creating_model_fn(features, labels, mode):
-        _, _ = features, labels
-        variable_scope.get_variable('x', initializer=x)
-        global_step = training.get_global_step()
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            loss=constant_op.constant(1.),
-            train_op=state_ops.assign_add(global_step, 1))
-      return _variable_creating_model_fn
-
-    est = estimator.Estimator(model_fn=_make_model_fn(42.))
-    est.train(dummy_input_fn, steps=10)
-
-    warm_started_est = estimator.Estimator(
-        model_fn=_make_model_fn(36.),
-        warm_start_from=est.model_dir)
-    warm_started_est.train(dummy_input_fn, steps=5)
-    # warm_start is called after the model_fn, so x should have the value
-    # from the checkpoint.
-    self.assertEqual(42., warm_started_est.get_variable_value('x'))
-    # global_step should not be warm-started.
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(
-            warm_started_est.model_dir))
-
-  def test_warm_starts_from_savedmodel(self):
-    def _make_model_fn(x):
-      def _variable_creating_and_export_model_fn(features, labels, mode):
-        _, _ = features, labels
-        variable_scope.get_variable('x', initializer=x)
-        global_step = training.get_global_step()
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            predictions={'y': constant_op.constant(1.0)},
-            loss=constant_op.constant(1.),
-            train_op=state_ops.assign_add(global_step, 1),
-            export_outputs={'test': export_output.ClassificationOutput(
-                constant_op.constant([4.2]), constant_op.constant(['label']))})
-      return _variable_creating_and_export_model_fn
-
-    est = estimator.Estimator(model_fn=_make_model_fn(42.))
-    est.train(dummy_input_fn, steps=10)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    tmpdir = tempfile.mkdtemp()
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn)
-
-    warm_started_est = estimator.Estimator(
-        model_fn=_make_model_fn(36.),
-        warm_start_from=export_dir)
-    warm_started_est.train(dummy_input_fn, steps=5)
-    # warm_start is called after the model_fn, so x should have the value
-    # from the SavedModel.
-    self.assertEqual(42., warm_started_est.get_variable_value('x'))
-
-  def test_max_step(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, max_steps=5)
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
-    est.train(dummy_input_fn, max_steps=5)
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
-
-  def test_checkpoint_contains_relative_paths(self):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(
-        model_dir=tmpdir,
-        model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, steps=5)
-
-    checkpoint_file_content = file_io.read_file_to_string(
-        os.path.join(tmpdir, 'checkpoint'))
-    ckpt = checkpoint_state_pb2.CheckpointState()
-    text_format.Merge(checkpoint_file_content, ckpt)
-    self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5')
-    # TODO(b/78461127): Please modify tests to not directly rely on names of
-    # checkpoints.
-    self.assertAllEqual(
-        ['model.ckpt-0', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths)
-
-  def test_train_save_copy_reload(self):
-    tmpdir = tempfile.mkdtemp()
-    model_dir1 = os.path.join(tmpdir, 'model_dir1')
-    est1 = estimator.Estimator(
-        model_dir=model_dir1,
-        model_fn=model_fn_global_step_incrementer)
-    est1.train(dummy_input_fn, steps=5)
-
-    # We have to clear the cache before we can rename the directory,
-    # otherwise open file handles will prevent the delete on Windows.
-    writer_cache.FileWriterCache.clear()
-    model_dir2 = os.path.join(tmpdir, 'model_dir2')
-    os.renames(model_dir1, model_dir2)
-
-    est2 = estimator.Estimator(
-        model_dir=model_dir2,
-        model_fn=model_fn_global_step_incrementer)
-    self.assertEqual(
-        5, estimator._load_global_step_from_checkpoint_dir(est2.model_dir))
-    est2.train(dummy_input_fn, steps=5)
-    self.assertEqual(
-        10, estimator._load_global_step_from_checkpoint_dir(est2.model_dir))
-
-  def test_steps0_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
-      est.train(dummy_input_fn, steps=0)
-
-  def test_steps_negative_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
-      est.train(dummy_input_fn, steps=-1)
-
-  def test_max_steps0_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'):
-      est.train(dummy_input_fn, max_steps=0)
-
-  def test_max_steps_negative_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'):
-      est.train(dummy_input_fn, max_steps=-1)
-
-  def test_scaffold_is_used(self):
-    self.is_init_fn_called = False
-
-    def _init_fn(scaffold, sess):
-      _, _ = scaffold, sess
-      self.is_init_fn_called = True
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(init_fn=_init_fn))
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    self.assertTrue(self.is_init_fn_called)
-
-  def test_hooks_should_be_session_run_hook(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
-      est.train(dummy_input_fn, steps=1, hooks=['NotAHook'])
-
-  def test_training_hooks_are_used(self):
-    chief_hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-    hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-
-    def _model_fn_hooks(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          training_chief_hooks=[chief_hook],
-          training_hooks=[hook])
-
-    est = estimator.Estimator(model_fn=_model_fn_hooks)
-    self.assertFalse(chief_hook.begin.called)
-    self.assertFalse(hook.begin.called)
-    est.train(dummy_input_fn, steps=1)
-    self.assertTrue(chief_hook.begin.called)
-    self.assertTrue(hook.begin.called)
-
-  def test_saving_listeners_are_used(self):
-    listener = test.mock.Mock(spec=training.CheckpointSaverListener)
-    listener.after_save.return_value = None
-    est = estimator.Estimator(
-        model_fn=model_fn_global_step_incrementer,
-        config=run_config.RunConfig(save_checkpoints_steps=10))
-    est.train(dummy_input_fn, steps=26, saving_listeners=[listener])
-    self.assertEqual(4, listener.before_save.call_count)
-    self.assertEqual(4, listener.after_save.call_count)
-
-  def test_saver_hook_should_exist_to_use_saving_listeners(self):
-    listener = test.mock.Mock(spec=training.CheckpointSaverListener)
-    est = estimator.Estimator(
-        model_fn=model_fn_global_step_incrementer,
-        config=run_config.RunConfig(save_checkpoints_steps=None,
-                                    save_checkpoints_secs=None))
-    with self.assertRaisesRegexp(
-        ValueError, 'CheckpointSaverHook to use saving_listeners'):
-      est.train(dummy_input_fn, steps=1, saving_listeners=[listener])
-
-  def test_listeners_should_be_listeners(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    with self.assertRaisesRegexp(
-        TypeError, 'must be a list of CheckpointSaverListener'):
-      est.train(dummy_input_fn, steps=1, saving_listeners=['not-a-listener'])
-
-  def test_chief_only_hook_should_not_be_called_on_non_chief(self):
-    chief_hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-    hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-
-    def _model_fn_hooks(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          training_chief_hooks=[chief_hook],
-          training_hooks=[hook])
-
-    class NonChiefRunConfig(run_config.RunConfig):
-      @property
-      def is_chief(self):  # pylint: disable=g-wrong-blank-lines
-        return False
-
-    # Mocking the SessionManager.wait_for_session, so that worker doesn't wait
-    # for chief.
-    def get_initialized_session(*args, **kwargs):
-      # Session doesn't take 'max_wait_secs' argument.
-      kwargs.pop('max_wait_secs', None)
-      scaffold = training.Scaffold().finalize()
-      sess = session.Session(*args, **kwargs)
-      sess.run(scaffold.init_op)
-      return sess
-
-    with test.mock.patch.object(
-        training.SessionManager,
-        'wait_for_session',
-        side_effect=get_initialized_session):
-      est = estimator.Estimator(
-          model_fn=_model_fn_hooks, config=NonChiefRunConfig())
-      self.assertFalse(chief_hook.begin.called)
-      self.assertFalse(hook.begin.called)
-      est.train(dummy_input_fn, steps=1)
-      self.assertFalse(chief_hook.begin.called)
-      self.assertTrue(hook.begin.called)
-
-  def test_features_labels_mode(self):
-    given_features = {'test-features': [[1], [1]]}
-    given_labels = {'test-labels': [[1], [1]]}
-
-    def _input_fn():
-      return given_features, given_labels
-
-    def _model_fn(features, labels, mode):
-      self.features, self.labels, self.mode = features, labels, mode
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    self.assertEqual(given_features, self.features)
-    self.assertEqual(given_labels, self.labels)
-    self.assertEqual(model_fn_lib.ModeKeys.TRAIN, self.mode)
-
-  def test_graph_initialization_global_step_and_random_seed(self):
-    expected_random_seed = run_config.RunConfig().tf_random_seed
-    def _model_fn(features, labels, mode):
-      _, _, _ = features, labels, mode
-      self.assertIsNotNone(training.get_global_step())
-      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-
-  def test_config_should_not_be_evaluator_or_ps(self):
-
-    class FakeEvaluatorConfig(run_config.RunConfig):
-
-      @property
-      def task_type(self):
-        return run_config.TaskType.EVALUATOR
-
-    est = estimator.Estimator(
-        model_fn=dummy_model_fn, config=FakeEvaluatorConfig())
-    with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'):
-      est.train(dummy_input_fn, steps=1)
-
-  def test_master_distributed_hooks(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.MASTER,
-            'index': 0
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
-  def test_master_distributed_hooks_for_worker_0(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.WORKER,
-            'index': 0
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertTrue(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertTrue(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
-  def test_master_distributed_hooks_for_worker_nonzero(self):
-    tf_config = json.dumps({
-        'cluster': {
-            run_config.TaskType.PS: ['localhost:1234'],
-            run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'],
-            run_config.TaskType.MASTER: ['localhost:1236']
-        },
-        'task': {
-            'type': run_config.TaskType.WORKER,
-            'index': 1
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      est = estimator.Estimator(
-          model_fn=model_fn_global_step_incrementer,
-          config=run_config.RunConfig())
-
-    with test.mock.patch.object(training,
-                                'MonitoredTrainingSession') as mock_sess:
-      est.train(dummy_input_fn, steps=1)
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertFalse(
-          any(
-              isinstance(hook, basic_session_run_hooks.StepCounterHook)
-              for hook in mock_sess.call_args[1]['hooks']))
-      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
-      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
-
-
-def _model_fn_with_eval_metric_ops(features, labels, mode, params):
-  _, _ = features, labels
-  global_step = training.get_global_step()
-  loss = constant_op.constant(1.)
-  metric_name_1 = params.get('metric_name') or 'metric'
-  metric_value_1 = params.get('metric_value') or 2.
-  metric_name_2 = params.get('metric_name_2') or 'metric2'
-  metric_value_2 = params.get('metric_value_2') or 2.
-
-  metric_update_op = loss.op
-  metric_tensor = control_flow_ops.with_dependencies(
-      [metric_update_op], constant_op.constant(metric_value_1))
-
-  mean = metrics_module.Mean()
-  mean.update_state(metric_value_2)
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      loss=loss,
-      predictions={'predictions': constant_op.constant(1.)},
-      train_op=state_ops.assign_add(global_step, 1),
-      eval_metric_ops={
-          metric_name_1: (metric_tensor, metric_update_op),
-          metric_name_2: mean,
-      })
-
-
-class _StepCounterHook(session_run_hook.SessionRunHook):
-  """Hooks that counts the number of times it is called."""
-
-  def __init__(self):
-    self._steps = 0
-
-  def before_run(self, run_context):
-    del run_context
-    self._steps += 1
-
-  @property
-  def steps(self):
-    return self._steps
-
-
-class EstimatorGetVariablesTest(test.TestCase):
-
-  def test_model_should_be_trained(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='one')
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    with self.assertRaisesRegexp(ValueError, 'not find trained model'):
-      est.get_variable_names()
-    with self.assertRaisesRegexp(ValueError, 'not find trained model'):
-      est.get_variable_value('one')
-
-  def test_get_variable_utils(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='one')
-      variables.VariableV1(3., name='three')
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    self.assertEqual(
-        set(['one', 'three', 'global_step']), set(est.get_variable_names()))
-    self.assertEqual(1., est.get_variable_value('one'))
-    self.assertEqual(3., est.get_variable_value('three'))
-
-
-class EstimatorDatasetIntegrationTest(test.TestCase):
-  """Tests dataset integration."""
-
-  def test_returned_by_input_fn(self):
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensors(([1.], [2.]))
-
-    def _model_fn(features, labels, mode):
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=features + labels,  # 1 + 2
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    scores = est.evaluate(_input_fn, steps=1)
-    self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY])
-
-  def test_with_none_labels(self):
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensors([7.])
-
-    def _model_fn(features, labels, mode):
-      self.assertIsNone(labels)
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=features,  # 7
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    scores = est.evaluate(_input_fn, steps=1)
-    self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY])
-
-  def test_with_predict(self):
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensors([10.])
-
-    def _model_fn(features, labels, mode):
-      _ = labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=features,  # 10
-          loss=features,  # 10
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    self.assertEqual([10.], next(est.predict(input_fn=_input_fn)))
-
-  def test_batching(self):
-
-    def _input_fn():
-      return dataset_ops.Dataset.from_tensor_slices(([[1.], [2.]],
-                                                     [[10.], [20.]])).batch(1)
-
-    def _model_fn(features, labels, mode):
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=features,
-          loss=features + (0 if labels is None else labels),  # 11, 22
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn)
-    scores = est.evaluate(_input_fn)
-    # (11 + 22)/2 = 16.5
-    self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY])
-    self.assertEqual([1., 2.], list(est.predict(_input_fn)))
-
-
-class EstimatorEvaluateTest(test.TestCase):
-
-  def test_eval_dir(self):
-    est = estimator.Estimator(
-        model_fn=model_fn_global_step_incrementer,
-        model_dir='some_path')
-    expected_eval_dir = os.path.join('some_path', 'eval')
-    self.assertEqual(expected_eval_dir, est.eval_dir())
-    expected_eval_dir_name = os.path.join('some_path', 'eval_a_name')
-    self.assertEqual(expected_eval_dir_name, est.eval_dir('a_name'))
-
-  def test_input_fn_args(self):
-    expected_mode = model_fn_lib.ModeKeys.EVAL
-    expected_params = {'batch_size': 10}
-    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
-    input_fn_call_count = [0]
-
-    def _model_fn(features, labels, mode, params, config):
-      del params, config
-      return model_fn_global_step_incrementer(features, labels, mode)
-
-    def _input_fn(mode, params, config):
-      input_fn_call_count[0] += 1
-      self.assertEqual(expected_mode, mode)
-      self.assertEqual(expected_params, params)
-      self.assertEqual(4321, config.tf_random_seed)
-      return dummy_input_fn()
-
-    est = estimator.Estimator(model_fn=_model_fn,
-                              params=expected_params,
-                              config=expected_config)
-    est.train(dummy_input_fn, steps=1)
-    self.assertEqual(0, input_fn_call_count[0])
-    est.evaluate(_input_fn, steps=1)
-    self.assertEqual(1, input_fn_call_count[0])
-
-  def test_model_fn_must_return_estimator_spec(self):
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      if mode == model_fn_lib.ModeKeys.EVAL:
-        return 'NotGoodNotGood'
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(
-        ValueError, 'model_fn should return an EstimatorSpec'):
-      est.evaluate(dummy_input_fn, steps=1)
-
-  def test_no_checkpoint_uses_init(self):
-    def _model_fn(features, labels, mode, params):
-      del features, labels, params
-      mean = metrics_module.Mean()
-      mean.update_state(variables.VariableV1(2.) + 1)
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(1.),
-          eval_metric_ops={
-              'mean1': mean,
-              'mean2': metrics_lib.mean(variables.VariableV1(2.) + 1)
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    scores = est.evaluate(dummy_input_fn, steps=1)
-    # Metric value here is set to 1 + the value of the Variable that is newly
-    # initialized (since there is no checkpoint).
-    self.assertEqual(3., scores['mean1'])
-    self.assertEqual(3., scores['mean2'])
-
-  def test_no_checkpoint_uses_init_with_warm_starting(self):
-    def _make_model_fn(x):
-      def _variable_creating_and_export_model_fn(features, labels, mode):
-        _, _ = features, labels
-        x_var = variable_scope.get_variable('x', initializer=x)
-        global_step = training.get_global_step()
-        mean = metrics_module.Mean()
-        mean.update_state(x_var + 1)
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            predictions={'y': constant_op.constant(1.0)},
-            loss=constant_op.constant(1.),
-            eval_metric_ops={
-                'mean1': mean,
-                'mean2': metrics_lib.mean(x_var + 1)
-            },
-            train_op=state_ops.assign_add(global_step, 1),
-            export_outputs={
-                'test':
-                    export_output.ClassificationOutput(
-                        constant_op.constant([4.2]),
-                        constant_op.constant(['label']))
-            })
-
-      return _variable_creating_and_export_model_fn
-
-    first_est = estimator.Estimator(model_fn=_make_model_fn(42.))
-    first_est.train(dummy_input_fn, steps=10)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    tmpdir = tempfile.mkdtemp()
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    exported_path = first_est.export_savedmodel(export_dir_base,
-                                                serving_input_receiver_fn)
-
-    # Test that we can pass either warm_start_from as an external checkpoint
-    # or an exported SavedModel.
-    est = estimator.Estimator(model_fn=_make_model_fn(52.),
-                              warm_start_from=exported_path)
-    eval_metrics = est.evaluate(dummy_input_fn, steps=1)
-    # Metric value here is set to 1 + the value of the Variable that is
-    # warm-started from the SavedModel of the first model (42.), as opposed to
-    # the initialization in the new model_fn (52.).
-    self.assertEqual(43., eval_metrics['mean1'])
-    self.assertEqual(43., eval_metrics['mean2'])
-
-    est = estimator.Estimator(model_fn=_make_model_fn(62.),
-                              warm_start_from=first_est.model_dir)
-    eval_metrics = est.evaluate(dummy_input_fn, steps=1)
-    # Metric value here is set to 1 + the value of the Variable that is
-    # warm-started from a checkpoint of the first model (42.), as opposed to
-    # the initialization in the new model_fn (52.).
-    self.assertEqual(43., eval_metrics['mean1'])
-    self.assertEqual(43., eval_metrics['mean2'])
-
-  def test_scores(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops,
-        params={
-            'metric_name': 'metric',
-            'metric_value': 2.,
-            'metric_name_2': 'metric2',
-            'metric_value_2': 3.,
-        })
-    est.train(dummy_input_fn, steps=5)
-    scores = est.evaluate(dummy_input_fn, steps=1)
-    self.assertIn('metric', scores)
-    self.assertAlmostEqual(2., scores['metric'])
-    self.assertIn('metric2', scores)
-    self.assertAlmostEqual(3., scores['metric2'])
-
-  def test_tuple_metrics(self):
-    def _model_fn(features, labels, mode):
-      del features  # unused
-      del labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          loss=constant_op.constant(1.),
-          eval_metric_ops={
-              'nested_metric': (
-                  ((constant_op.constant(2.), constant_op.constant(1)),
-                   constant_op.constant(3., dtype=dtypes.float64)),
-                  control_flow_ops.no_op())})
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    evaluation = est.evaluate(dummy_input_fn, steps=1)
-    ((two_float, one_integer), three_double) = evaluation['nested_metric']
-    self.assertAlmostEqual(2., two_float)
-    self.assertEqual(1, one_integer)
-    self.assertAlmostEqual(3., three_double)
-
-  def test_steps0_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    est.train(dummy_input_fn, steps=5)
-    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
-      est.evaluate(dummy_input_fn, steps=0)
-
-  def test_steps_negative_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops)
-    est.train(dummy_input_fn, steps=5)
-    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
-      est.evaluate(dummy_input_fn, steps=-1)
-
-  def test_global_step_metric_raises_error(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops,
-        params={
-            'metric_name': 'global_step',
-            'metric_value': 2.})
-    est.train(dummy_input_fn, steps=5)
-    with self.assertRaisesRegexp(
-        ValueError, 'Metric with name `global_step` is not allowed'):
-      est.evaluate(dummy_input_fn, steps=1)
-
-  def test_global_step_is_reported(self):
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops,
-        params={
-            'metric_name': 'metric',
-            'metric_value': 2.,
-            'metric_name_2': 'metric2',
-            'metric_value_2': 3.,
-        })
-    est.train(dummy_input_fn, steps=5)
-    scores = est.evaluate(dummy_input_fn, steps=1)
-    self.assertIn('global_step', scores)
-    self.assertEqual(5, scores['global_step'])
-
-  def test_loss_metric_is_reported(self):
-
-    def _model_fn_with_incremental_loss(features, labels, mode):
-      _, _ = features, labels
-      local_weight = variables.VariableV1(
-          0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES])
-      # Loss will be 2, 4, 6, ...
-      loss = 2 * state_ops.assign_add(local_weight, 1.)
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=loss,
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    est = estimator.Estimator(model_fn=_model_fn_with_incremental_loss)
-    est.train(dummy_input_fn, steps=1)
-    scores = est.evaluate(dummy_input_fn, steps=5)
-    self.assertIn(model_fn_lib.LOSS_METRIC_KEY, scores)
-    # Average loss will be (2 + 4 + 6 + 8 + 10)/5=6
-    self.assertAlmostEqual(6., scores[model_fn_lib.LOSS_METRIC_KEY])
-
-  def test_hooks_should_be_session_run_hook(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
-      est.evaluate(dummy_input_fn, steps=5, hooks=['NotAHook'])
-
-  def test_hooks_are_used(self):
-    step_counter_hook = _StepCounterHook()
-
-    est = estimator.Estimator(model_fn=_model_fn_with_eval_metric_ops)
-    est.train(dummy_input_fn, steps=1)
-    est.evaluate(dummy_input_fn, steps=5, hooks=[step_counter_hook])
-    self.assertEqual(5, step_counter_hook.steps)
-
-  def test_evaluate_from_checkpoint(self):
-    params = {
-        'metric_name': 'metric',
-        'metric_value': 2.,
-        'metric_name_2': 'metric2',
-        'metric_value_2': 3.,
-    }
-    est1 = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops,
-        params=params)
-    est1.train(dummy_input_fn, steps=5)
-    est2 = estimator.Estimator(
-        model_fn=_model_fn_with_eval_metric_ops,
-        params=params)
-    scores = est2.evaluate(
-        dummy_input_fn, steps=1, checkpoint_path=est1.latest_checkpoint())
-    self.assertEqual(5, scores['global_step'])
-
-  def test_wrong_shape_throws_reasonable_error(self):
-    """Make sure we are helpful when model_fns change. See b/110263146."""
-    def _get_model_fn(val=1):
-      def _model_fn(features, labels, mode):
-        del features, labels  # unused
-        variables.VariableV1(val, name='weight')
-        return model_fn_lib.EstimatorSpec(
-            mode=mode,
-            predictions=constant_op.constant([[1.]]),
-            loss=constant_op.constant(0.),
-            train_op=state_ops.assign_add(training.get_global_step(), 1))
-      return _model_fn
-
-    model_fn_1 = _get_model_fn()
-    model_fn_2 = _get_model_fn(val=[1])
-
-    est1 = estimator.Estimator(model_fn=model_fn_1)
-    est1.train(dummy_input_fn, steps=5)
-    est2 = estimator.Estimator(
-        model_fn=model_fn_2, model_dir=est1.model_dir)
-
-    expected_msg = 'Restoring from checkpoint failed.*a mismatch between'
-    with self.assertRaisesRegexp(errors.InvalidArgumentError, expected_msg):
-      est2.train(dummy_input_fn, steps=1,)
-
-  def test_scaffold_is_used(self):
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='weight')
-      self.mock_saver = get_mock_saver()
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(saver=self.mock_saver))
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    est.evaluate(dummy_input_fn, steps=1)
-    self.assertTrue(self.mock_saver.restore.called)
-
-  def test_features_labels_mode(self):
-    given_features = {'test-features': [[1], [1]]}
-    given_labels = {'test-labels': [[1], [1]]}
-
-    def _input_fn():
-      return given_features, given_labels
-
-    def _model_fn(features, labels, mode):
-      self.features, self.labels, self.mode = features, labels, mode
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    est.evaluate(_input_fn, steps=1)
-    self.assertEqual(given_features, self.features)
-    self.assertEqual(given_labels, self.labels)
-    self.assertEqual(model_fn_lib.ModeKeys.EVAL, self.mode)
-
-  def test_graph_initialization_global_step_and_random_seed(self):
-    expected_random_seed = run_config.RunConfig().tf_random_seed
-    def _model_fn(features, labels, mode):
-      _, _, _ = features, labels, mode
-      self.assertIsNotNone(training.get_global_step())
-      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    est.evaluate(dummy_input_fn, steps=1)
-
-  def test_evaluation_hooks_are_used(self):
-    hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-
-    def _model_fn_hooks(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          evaluation_hooks=[hook])
-
-    est = estimator.Estimator(model_fn=_model_fn_hooks)
-    est.train(dummy_input_fn, steps=1)
-    self.assertFalse(hook.begin.called)
-    est.evaluate(dummy_input_fn, steps=1)
-    self.assertTrue(hook.begin.called)
-
-  def test_summary_writing_with_summary_proto(self):
-
-    def model_fn_global_step_incrementer_image(features, labels, mode):
-      _, _ = features, labels
-      global_step = training.get_global_step()
-
-      image = array_ops.zeros([5, 3, 3, 1])
-      eval_metric_ops = {
-          'foo': (summary.image('image', image, max_outputs=3),
-                  constant_op.constant(1))
-      }
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(global_step, 1),
-          eval_metric_ops=eval_metric_ops)
-
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer_image,
-                              config=run_config.RunConfig(save_summary_steps=1))
-    est.train(dummy_input_fn, steps=200)
-    est.evaluate(
-        input_fn=dummy_input_fn,
-        steps=200,
-    )
-
-    # Make sure nothing is stuck in limbo.
-    writer_cache.FileWriterCache.clear()
-
-    # Get last evaluation Event written.
-    for key in ['foo/0', 'foo/1', 'foo/2']:
-      self.assertTrue(
-          check_eventfile_for_keyword(key, est.eval_dir()),
-          '{} should be part of reported summaries.'.format(key))
-
-    # Verify that evaluated checkpoint path is written to event file.
-    checkpoint_path_tag = 'checkpoint_path'
-    self.assertTrue(
-        check_eventfile_for_keyword(checkpoint_path_tag, est.eval_dir()),
-        '{} should be part of reported summaries.'.format(checkpoint_path_tag))
-
-    expected_tensor_proto = tensor_util.make_tensor_proto(
-        est.latest_checkpoint(), dtype=dtypes.string)
-    summaries = summaries_with_matching_keyword(checkpoint_path_tag,
-                                                est.eval_dir())
-    self.assertProtoEquals(expected_tensor_proto,
-                           next(summaries).value[0].tensor)
-
-  def test_summary_writing_with_tensor(self):
-
-    def model_fn_with_prediction_mean_tensor_eval_metric_ops(
-        features, labels, mode, params):
-      _, _ = features, labels
-      global_step = training.get_global_step()
-
-      metric_name = params.get('metric_name') or 'metric'
-      predictions = constant_op.constant([1., .5, 0.])
-      eval_metric_ops = {metric_name: metrics_lib.mean_tensor(predictions)}
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(1.),
-          predictions={'predictions': predictions},
-          train_op=state_ops.assign_add(global_step, 1),
-          eval_metric_ops=eval_metric_ops)
-
-    metric_key = 'PMT'
-    params = {
-        'metric_name': metric_key,
-    }
-    est = estimator.Estimator(
-        model_fn=model_fn_with_prediction_mean_tensor_eval_metric_ops,
-        params=params,
-        config=run_config.RunConfig(save_summary_steps=1))
-    est.train(input_fn=dummy_input_fn, steps=10)
-    est.evaluate(
-        input_fn=dummy_input_fn,
-        steps=10,
-    )
-
-    writer_cache.FileWriterCache.clear()
-
-    self.assertTrue(
-        check_eventfile_for_keyword(metric_key, est.eval_dir()),
-        '{} should be part of reported summaries.'.format(metric_key))
-
-    summaries = summaries_with_matching_keyword(metric_key, est.eval_dir())
-    for value in next(summaries).value:
-      if value.tag == metric_key:
-        self.assertTrue(value.HasField('tensor'))
-
-
-class EstimatorPredictTest(test.TestCase):
-
-  def test_input_fn_args(self):
-    expected_mode = model_fn_lib.ModeKeys.PREDICT
-    expected_params = {'batch_size': 10}
-    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
-    input_fn_call_count = [0]
-
-    def _model_fn(features, labels, mode, params, config):
-      del features, labels, params, config
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    def _input_fn(mode, params, config):
-      input_fn_call_count[0] += 1
-      self.assertEqual(expected_mode, mode)
-      self.assertEqual(expected_params, params)
-      self.assertEqual(4321, config.tf_random_seed)
-      return dummy_input_fn()
-
-    est = estimator.Estimator(model_fn=_model_fn,
-                              params=expected_params,
-                              config=expected_config)
-    est.train(dummy_input_fn, steps=1)
-    self.assertEqual(0, input_fn_call_count[0])
-    next(est.predict(_input_fn))
-    self.assertEqual(1, input_fn_call_count[0])
-
-  def test_no_checkpoint_uses_init(self):
-    def _model_fn(features, labels, mode, params, config):
-      del features, labels, params, config
-      x = variables.VariableV1([[3.]], name='x')
-      return model_fn_lib.EstimatorSpec(mode, predictions=math_ops.add(x, 1.))
-    est = estimator.Estimator(model_fn=_model_fn)
-    # Expected prediction value is 1 + the value of the Variable that is newly
-    # initialized (since there is no checkpoint).
-    self.assertEqual(4., next(est.predict(dummy_input_fn)))
-
-  def test_no_checkpoint_uses_init_with_warm_starting(self):
-    def _make_model_fn(x):
-      def _variable_creating_and_export_model_fn(features, labels, mode):
-        _, _ = features, labels
-        x_var = variables.VariableV1([[x]], name='x')
-        return model_fn_lib.EstimatorSpec(
-            mode,
-            predictions=math_ops.add(x_var, 1.),
-            loss=constant_op.constant(1.),
-            train_op=state_ops.assign_add(training.get_global_step(), 1),
-            export_outputs={'test': export_output.ClassificationOutput(
-                constant_op.constant([4.2]),
-                constant_op.constant(['label']))})
-      return _variable_creating_and_export_model_fn
-
-    first_est = estimator.Estimator(model_fn=_make_model_fn(3.))
-    first_est.train(dummy_input_fn, steps=10)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    tmpdir = tempfile.mkdtemp()
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    exported_path = first_est.export_savedmodel(export_dir_base,
-                                                serving_input_receiver_fn)
-
-    # Test that we can pass either warm_start_from as an external checkpoint
-    # or an exported SavedModel.
-    est = estimator.Estimator(model_fn=_make_model_fn(30.),
-                              warm_start_from=exported_path)
-    # Prediction here is set to 1 + the value of the Variable that is
-    # warm-started from the SavedModel of the first model (3.), as opposed to
-    # the initialization in the new model_fn (30.).
-    self.assertEqual(4., next(est.predict(dummy_input_fn)))
-
-    est = estimator.Estimator(model_fn=_make_model_fn(40.),
-                              warm_start_from=first_est.model_dir)
-    # Prediction here is set to 1 + the value of the Variable that is
-    # warm-started from a checkpoint of the first model (3.), as opposed to
-    # the initialization in the new model_fn (40.).
-    self.assertEqual(4., next(est.predict(dummy_input_fn)))
-
-  def test_no_trained_model_invalid_checkpoint_path(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    with self.assertRaises(ValueError):
-      next(
-          est.predict(
-              dummy_input_fn,
-              checkpoint_path=
-              checkpoint_management.latest_checkpoint('fakedir')))
-
-  def test_tensor_predictions(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    self.assertEqual(10., next(est.predict(dummy_input_fn)))
-
-  def test_predictionhooks_are_used(self):
-    hook = test.mock.MagicMock(
-        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
-
-    def _model_fn_hooks(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]),
-          prediction_hooks=[hook])
-
-    est = estimator.Estimator(model_fn=_model_fn_hooks)
-    est.train(dummy_input_fn, steps=1)
-    self.assertFalse(hook.begin.called)
-    next(est.predict(dummy_input_fn))
-    self.assertTrue(hook.begin.called)
-
-  def test_warn_if_no_queue_runner(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with test.mock.patch.object(logging, 'warning') as mock_log:
-      next(est.predict(dummy_input_fn))
-      self.assertRegexpMatches(
-          str(mock_log.call_args),
-          'Input graph does not.*contain a QueueRunner.')
-
-  def test_skip_warn_if_dataset_returns_features(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    def _input_fn():
-      it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator()
-      return it.get_next()
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with test.mock.patch.object(logging, 'warning') as mock_log:
-      next(est.predict(_input_fn))
-      # The warning should not have keyword QueueRunner.
-      self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$')
-
-  def test_skip_warn_if_dataset_returns_features_dict(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    def _input_fn():
-      it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator()
-      features = {'age': it.get_next()}
-      return features
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with test.mock.patch.object(logging, 'warning') as mock_log:
-      next(est.predict(_input_fn))
-      # The warning should not have keyword QueueRunner.
-      self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$')
-
-  def test_input_fn_can_return_just_features(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-
-    def _only_features():
-      return {'x': constant_op.constant([[0.]])}
-
-    self.assertEqual([10.], next(est.predict(_only_features)))
-
-  def test_batch_size_mismatch(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={
-              'y1': constant_op.constant([[10.]]),
-              'y2': constant_op.constant([[12.], [13]])
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(ValueError,
-                                 'Batch length of predictions should be same'):
-      next(est.predict(dummy_input_fn))
-
-  def test_iterate_batches(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={
-              # First dim is different but the prediction should still work
-              'y1': array_ops.zeros(shape=[3]),
-              'y2': array_ops.zeros(shape=[5, 3])
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-
-    predictions = next(est.predict(dummy_input_fn, yield_single_examples=False))
-    self.assertAllEqual(predictions['y1'].shape, [3])
-    self.assertAllEqual(predictions['y2'].shape, [5, 3])
-
-  def test_predict_keys_defined_for_tensor(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'predict_keys argument is not valid in case of non-dict predictions'):
-      next(est.predict(dummy_input_fn, predict_keys=['y']))
-
-  def test_predict_keys_does_not_exists(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={
-              'y1': constant_op.constant([[10.]]),
-              'y2': constant_op.constant([[12.]])
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(ValueError,
-                                 'Expected to run at least one output from'):
-      next(est.predict(dummy_input_fn, predict_keys=['y3']))
-
-  def test_return_given_predict_keys(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={
-              'y1': constant_op.constant([[10.]]),
-              'y2': constant_op.constant([[12.]])
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    results = next(est.predict(dummy_input_fn, predict_keys=['y1']))
-    self.assertIn('y1', results)
-    self.assertNotIn('y2', results)
-
-  def test_yield_rows_of_tensor(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.], [12.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    results = est.predict(dummy_input_fn)
-    self.assertEqual([10.], next(results))
-    self.assertEqual([12.], next(results))
-
-  def test_yield_rows_of_dict(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions={
-              'y1': constant_op.constant([[10.], [12]]),
-              'y2': constant_op.constant([[0.], [2.]])
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    results = est.predict(dummy_input_fn)
-    self.assertDictEqual({'y1': [10.], 'y2': [0.]}, next(results))
-    self.assertDictEqual({'y1': [12.], 'y2': [2.]}, next(results))
-
-  def test_hooks_should_be_session_run_hook(self):
-    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
-    est.train(dummy_input_fn, steps=1)
-    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
-      next(est.predict(dummy_input_fn, hooks=['NotAHook']))
-
-  def test_hooks_are_used(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[10.], [12.]]))
-
-    step_counter_hook = _StepCounterHook()
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    results = est.predict(dummy_input_fn, hooks=[step_counter_hook])
-    self.assertEqual(0, step_counter_hook.steps)  # not called yet
-    next(results)
-    self.assertEqual(1, step_counter_hook.steps)  # first call
-    next(results)
-    self.assertEqual(1, step_counter_hook.steps)  # it's in same batch
-    next(results)
-    self.assertEqual(2, step_counter_hook.steps)  # next batch
-
-  def test_predict_from_old_model_dir(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      v = variables.VariableV1([[16.]], name='weight')
-      prediction = v * 2
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=prediction)
-
-    est1 = estimator.Estimator(model_fn=_model_fn)
-    est1.train(dummy_input_fn, steps=1)
-    est2 = estimator.Estimator(model_fn=_model_fn, model_dir=est1.model_dir)
-    self.assertEqual([32.], next(est2.predict(dummy_input_fn)))
-
-  def test_predict_from_checkpoint_path(self):
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      v = variables.VariableV1([[16.]], name='weight')
-      prediction = v * 2
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=prediction)
-
-    est1 = estimator.Estimator(model_fn=_model_fn)
-    est1.train(dummy_input_fn, steps=1)
-    est2 = estimator.Estimator(model_fn=_model_fn, model_dir=est1.model_dir)
-    self.assertEqual([32.],
-                     next(
-                         est2.predict(
-                             dummy_input_fn,
-                             checkpoint_path=est2.latest_checkpoint())))
-
-  def test_scaffold_is_used(self):
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='weight')
-      self.mock_saver = get_mock_saver()
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(saver=self.mock_saver))
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    next(est.predict(dummy_input_fn))
-    self.assertTrue(self.mock_saver.restore.called)
-
-  def test_features_labels_mode(self):
-    given_features = {'test-features': [[1], [1]]}
-    given_labels = {'test-labels': [[1], [1]]}
-
-    def _input_fn():
-      return given_features, given_labels
-
-    def _model_fn(features, labels, mode):
-      self.features, self.labels, self.mode = features, labels, mode
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(_input_fn, steps=1)
-    next(est.predict(_input_fn))
-    self.assertEqual(given_features, self.features)
-    self.assertIsNone(self.labels)
-    self.assertEqual(model_fn_lib.ModeKeys.PREDICT, self.mode)
-
-  def test_graph_initialization_global_step_and_random_seed(self):
-    expected_random_seed = run_config.RunConfig().tf_random_seed
-    def _model_fn(features, labels, mode):
-      _, _, _ = features, labels, mode
-      self.assertIsNotNone(training.get_global_step())
-      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    next(est.predict(dummy_input_fn))
-
-
-def _model_fn_for_export_tests(features, labels, mode):
-  _, _ = features, labels
-  variables.VariableV1(1., name='weight')
-  scores = constant_op.constant([3.])
-  classes = constant_op.constant(['wumpus'])
-  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
-  with ops.control_dependencies([update_global_step]):
-    train_op = constant_op.constant(2.)
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      predictions=constant_op.constant(10.),
-      loss=constant_op.constant(1.),
-      train_op=train_op,
-      export_outputs={
-          'test': export_output.ClassificationOutput(scores, classes)})
-
-
-def _x_y_input_fn():
-  return ({'x': constant_op.constant([[1], [1]]),
-           'y': constant_op.constant([[2], [2]])},
-          constant_op.constant([[1], [1]]))
-
-
-def _model_fn_with_x_y(features, labels, mode):
-  _ = labels
-  variables.VariableV1(1., name='weight')
-  scores = constant_op.constant([3.])
-  classes = constant_op.constant(['wumpus'])
-  if mode == model_fn_lib.ModeKeys.PREDICT:
-    variables.VariableV1(36., name='name_collision')
-    return model_fn_lib.EstimatorSpec(
-        mode,
-        predictions=constant_op.constant(10.),
-        export_outputs={
-            'test': export_output.ClassificationOutput(scores, classes)})
-  else:
-    prefix = 'eval_' if mode == model_fn_lib.ModeKeys.EVAL else ''
-
-    multiplied = math_ops.multiply(
-        features['x'], features['y'], name='{}multiplied'.format(prefix))
-    mean = metrics_module.Mean(name='{}mean'.format(prefix))
-    mean.update_state(features['x'] - features['y'])
-    eval_metrics = {
-        'mean1':
-            mean,
-        'mean2':
-            metrics_lib.mean(
-                features['x'] - features['y'], name='{}mean'.format(prefix))
-    }
-    variables.VariableV1(1., name='later_var')
-    variables.VariableV1(3., name='name_collision')
-    return model_fn_lib.EstimatorSpec(
-        mode,
-        predictions=multiplied,
-        loss=constant_op.constant(1.),
-        train_op=state_ops.assign_add(training.get_global_step(), 1),
-        eval_metric_ops=eval_metrics)
-
-
-def _model_fn_with_saveables_for_export_tests(features, labels, mode):
-  _, _ = features, labels
-  table = saver_test_utils.CheckpointedOp(name='v2')
-  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
-  with ops.control_dependencies([update_global_step]):
-    train_op = table.insert('k1', 30.0)
-  prediction = table.lookup('k1', 0.0)
-  return model_fn_lib.EstimatorSpec(
-      mode,
-      predictions=prediction,
-      loss=constant_op.constant(1.),
-      train_op=train_op,
-      export_outputs={
-          'test': export_output.PredictOutput({'prediction': prediction})})
-
-
-def _get_serving_input_receiver_fn():
-  feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                  'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-  return export.build_parsing_serving_input_receiver_fn(feature_spec)
-
-
-def _get_supervised_input_receiver_fn():
-  feature_spec = {
-      'x': array_ops.placeholder(
-          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
-      'y': array_ops.placeholder(
-          dtype=dtypes.int64, shape=(2, 1), name='feature_y')
-      }
-  label_spec = array_ops.placeholder(
-      dtype=dtypes.float32, shape=[1], name='truth')
-
-  return export.build_raw_supervised_input_receiver_fn(feature_spec, label_spec)
-
-
-_VOCAB_FILE_CONTENT = 'emerson\nlake\npalmer\n'
-_EXTRA_FILE_CONTENT = 'kermit\npiggy\nralph\n'
-
-
-class EstimatorExportTest(test.TestCase):
-
-  def test_export_savedmodel_proto_roundtrip_raw_receiver(self):
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=dummy_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self._validate_exported_files(export_dir)
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('input_example_tensor' in graph_ops)
-        self.assertTrue('ParseExample/ParseExample' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-  def test_export_saved_model_train(self):
-    self._test_export_saved_model_for_mode(
-        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.TRAIN)
-
-  def test_export_saved_model_eval(self):
-    self._test_export_saved_model_for_mode(
-        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.EVAL)
-
-  def test_export_saved_model_predict(self):
-    self._test_export_saved_model_for_mode(
-        _get_serving_input_receiver_fn(), model_fn_lib.ModeKeys.PREDICT)
-
-  def _test_export_saved_model_for_mode(self, input_receiver_fn, mode):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=_x_y_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est._export_saved_model_for_mode(
-        export_dir_base, input_receiver_fn, mode=mode)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self._validate_exported_files(export_dir)
-
-    # Restore, to validate that the export was well-formed.
-    tag_set = model_fn_lib.EXPORT_TAG_MAP[mode]
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, tag_set, export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertFalse('name_collision_1' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_receiver_map(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('input_example_tensor' in graph_ops)
-        self.assertTrue('ParseExample/ParseExample' in graph_ops)
-        self.assertFalse('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_train_only(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('multiplied' in graph_ops)
-        self.assertTrue('mean/update_op' in graph_ops)
-        self.assertFalse('eval_multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_eval_only(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.EVAL], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('eval_multiplied' in graph_ops)
-        self.assertTrue('eval_mean/value' in graph_ops)
-        self.assertFalse('multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_no_serving(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('multiplied' in graph_ops)
-        self.assertFalse('eval_multiplied' in graph_ops)
-        self.assertTrue('feature_x' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.EVAL], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('eval_multiplied' in graph_ops)
-        self.assertFalse('multiplied' in graph_ops)
-        # TODO(karmel): is this the desired behavior when names are shared?
-        self.assertTrue('feature_x_1' in graph_ops)
-        self.assertTrue('feature_y_1' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_three_defs(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    # Restore, to validate that the export was well-formed.
-    for tag_set in model_fn_lib.EXPORT_TAG_MAP.values():
-      with ops.Graph().as_default() as graph:
-        with session.Session(graph=graph) as sess:
-          loader.load(sess, tag_set, export_dir)
-          graph_ops = [x.name for x in graph.get_operations()]
-          self.assertTrue('global_step/Assign' in graph_ops)
-          self.assertTrue('global_step/Initializer/zeros' in graph_ops)
-          self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_proto_roundtrip_all_vars(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('later_var' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertFalse('later_var' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_all_saved_models_name_collision(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-    export_dir, tmpdir = self._test_export_all_saved_models(
-        input_receiver_fn_map)
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('name_collision' in graph_ops)
-        self.assertFalse('name_collision_1' in graph_ops)
-        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        self.assertEqual(3, collection_vars[-1].eval())
-
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('name_collision' in graph_ops)
-        self.assertFalse('name_collision_1' in graph_ops)
-        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        # This is a non-obvious detail: when we load the estimator spec
-        # for predict, name_collision gets set to 36. However, we then restore
-        # from checkpoint, which should overwrite that var and make it the 3
-        # from training. In practice, this would not be a good way to write
-        # a model_fn, but leaving this check in for now to ensure consistency
-        # with what would happen given our current order of spec, then
-        # checkpoint.
-        self.assertEqual(3, collection_vars[-1].eval())
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def _test_export_all_saved_models(self, input_receiver_fn_map):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_with_x_y)
-    est.train(input_fn=_x_y_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est._export_all_saved_models(
-        export_dir_base, input_receiver_fn_map)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-
-    self._validate_exported_files(export_dir)
-
-    return export_dir, tmpdir
-
-  def _validate_exported_files(self, export_dir):
-    self.assertTrue(gfile.Exists(export_dir))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('saved_model.pb'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.index'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.data-00000-of-00001'))))
-
-  def test_export_all_saved_models_var_not_found(self):
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-
-    def _model_fn_with_predict_only_vars(features, labels, mode):
-      _, _ = features, labels
-      if mode == model_fn_lib.ModeKeys.PREDICT:
-        variables.VariableV1(1., name='only_in_predict')
-      else:
-        variables.VariableV1(1., name='otherwise')
-
-      prediction = constant_op.constant(1.)
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=prediction,
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          export_outputs={
-              'test': export_output.PredictOutput({'prediction': prediction})
-          })
-
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_with_predict_only_vars)
-    est.train(input_fn=_x_y_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-
-    err_regex = r'Could not load all requested variables[\w\W]*infer'
-    with self.assertRaisesRegexp(ValueError, err_regex):
-      est._export_all_saved_models(export_dir_base, input_receiver_fn_map)
-
-  def test_export_all_saved_models_metric_operation(self):
-    """Ensures metrics ops.Operations can be expoerted (b/109740581)."""
-
-    def _model_fn(features, labels, mode):
-      del features, labels  # Unused
-      metric_obj = metrics_module.Mean()
-      metric_obj.update_state(constant_op.constant([0]))
-      eval_metrics = {
-          'metrics1': (constant_op.constant([0]), control_flow_ops.no_op()),
-          'metrics2': metric_obj,
-      }
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=constant_op.constant(10.),
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          eval_metric_ops=eval_metrics)
-
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(input_fn=dummy_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('metric_operation_export'))
-
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()}
-
-    export_dir = est._export_all_saved_models(
-        export_dir_base, input_receiver_fn_map)
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        meta_graph = loader.load(sess, [tag_constants.EVAL], export_dir)
-        sig_outputs = meta_graph.signature_def[
-            model_fn_lib.ModeKeys.EVAL].outputs
-        self.assertTrue(sig_outputs['metrics1/update_op'].name.startswith(
-            'metric_op_wrapper'))
-        self.assertTrue(sig_outputs['metrics2/update_op'].name.startswith(
-            'metric_op_wrapper'))
-
-  def test_export_savedmodel_with_saveables_proto_roundtrip(self):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(
-        model_fn=_model_fn_with_saveables_for_export_tests)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn)
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self.assertTrue(gfile.Exists(export_dir))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('saved_model.pb'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.index'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.data-00000-of-00001'))))
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('input_example_tensor' in graph_ops)
-        self.assertTrue('ParseExample/ParseExample' in graph_ops)
-        # The original saver is used to restore variables
-        self.assertTrue('save/LookupTableImportV2' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_savedmodel_assets(self):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Create a fake asset.
-    vocab_file_name = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('my_vocab_file'))
-    vocab_file = gfile.GFile(vocab_file_name, mode='w')
-    vocab_file.write(_VOCAB_FILE_CONTENT)
-    vocab_file.close()
-
-    # hack in an op that uses the asset, in order to test asset export.
-    # this is not actually valid, of course.
-    def serving_input_receiver_with_asset_fn():
-      features, receiver_tensor, _ = serving_input_receiver_fn()
-      filename = ops.convert_to_tensor(vocab_file_name,
-                                       dtypes.string,
-                                       name='asset_filepath')
-      ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filename)
-      features['bogus_filename'] = filename
-
-      return export.ServingInputReceiver(features, receiver_tensor)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_with_asset_fn)
-
-    # Check that the asset files are in the right places.
-    expected_vocab_file_name = os.path.join(
-        compat.as_bytes(export_dir), compat.as_bytes('assets/my_vocab_file'))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir), compat.as_bytes('assets'))))
-    self.assertTrue(gfile.Exists(expected_vocab_file_name))
-    self.assertEqual(
-        compat.as_bytes(_VOCAB_FILE_CONTENT),
-        compat.as_bytes(gfile.GFile(expected_vocab_file_name).read()))
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        assets = [
-            x.eval()
-            for x in graph.get_collection(ops.GraphKeys.ASSET_FILEPATHS)
-        ]
-        self.assertItemsEqual([vocab_file_name], assets)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('input_example_tensor' in graph_ops)
-        self.assertTrue('ParseExample/ParseExample' in graph_ops)
-        self.assertTrue('asset_filepath' in graph_ops)
-        self.assertTrue('weight' in graph_ops)
-
-    # cleanup
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_savedmodel_extra_assets(self):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Create a fake asset.
-    extra_file_name = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('my_extra_file'))
-    extra_file = gfile.GFile(extra_file_name, mode='w')
-    extra_file.write(_EXTRA_FILE_CONTENT)
-    extra_file.close()
-
-    # Perform the export.
-    assets_extra = {'some/sub/directory/my_extra_file': extra_file_name}
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(export_dir_base,
-                                       serving_input_receiver_fn,
-                                       assets_extra=assets_extra)
-
-    # Check that the asset files are in the right places.
-    expected_extra_path = os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('assets.extra/some/sub/directory/my_extra_file'))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir), compat.as_bytes('assets.extra'))))
-    self.assertTrue(gfile.Exists(expected_extra_path))
-    self.assertEqual(
-        compat.as_bytes(_EXTRA_FILE_CONTENT),
-        compat.as_bytes(gfile.GFile(expected_extra_path).read()))
-
-    # cleanup
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_savedmodel_tensor_features(self):
-    """Test that models accepting a single raw Tensor can be exported.
-
-    See https://github.com/tensorflow/tensorflow/issues/11674
-
-    If the model_fn and receiver_fn accept raw tensors rather than dictionaries
-    as input, export_savedmodel should be okay with that, too.
-
-    """
-
-    tmpdir = tempfile.mkdtemp()
-
-    def _input_fn_tensor_features():
-      t = array_ops.constant([1, 2, 3], dtype=dtypes.float32, shape=[1, 3])
-      return (t, None)
-
-    def _model_fn_tensor_features(features, labels, mode):
-      _ = labels
-      prediction = math_ops.matmul(features, features, transpose_b=True)
-
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=prediction,
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          export_outputs={
-              'test': export_output.PredictOutput({'prediction': prediction})
-          })
-
-    def _serving_input_receiver_fn():
-      feat = array_ops.placeholder(dtype=dtypes.float32)
-      return export.TensorServingInputReceiver(
-          features=feat, receiver_tensors=feat)
-
-    est = estimator.Estimator(model_fn=_model_fn_tensor_features)
-    est.train(input_fn=_input_fn_tensor_features, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, _serving_input_receiver_fn)
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name.lower() for x in graph.get_operations()]
-        self.assertTrue('const' in graph_ops)
-        self.assertTrue('matmul' in graph_ops)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_scaffold_is_used_for_saver(self):
-    tmpdir = tempfile.mkdtemp()
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='weight')
-      self.mock_saver = get_mock_saver()
-      scores = constant_op.constant([3.])
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(saver=self.mock_saver),
-          export_outputs={'test': export_output.ClassificationOutput(scores)})
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    est.export_savedmodel(export_dir_base, serving_input_receiver_fn)
-
-    self.assertTrue(self.mock_saver.restore.called)
-    self.assertTrue(self.mock_saver.export_meta_graph.called)
-    self.assertTrue(self.mock_saver.save.called)
-
-  def test_scaffold_is_used_for_saver_multiple_modes(self):
-    tmpdir = tempfile.mkdtemp()
-    savers = {'predict_saver': None, 'train_saver': None}
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='weight')
-
-      scores = constant_op.constant([3.])
-      if mode == model_fn_lib.ModeKeys.PREDICT:
-        savers['predict_saver'] = get_mock_saver()
-        scaffold = training.Scaffold(saver=savers['predict_saver'])
-      elif mode == model_fn_lib.ModeKeys.TRAIN:
-        savers['train_saver'] = get_mock_saver()
-        scaffold = training.Scaffold(saver=savers['train_saver'])
-      else:
-        scaffold = training.Scaffold()
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=scaffold,
-          export_outputs={'test': export_output.ClassificationOutput(scores)})
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    est._export_all_saved_models(export_dir_base, input_receiver_fn_map)
-
-    self.assertTrue(savers['train_saver'].restore.called)
-    self.assertEqual(savers['train_saver'].export_meta_graph.call_count, 1)
-    self.assertEqual(savers['train_saver'].save.call_count, 1)
-
-    self.assertTrue(savers['predict_saver'].restore.called)
-    self.assertEqual(savers['predict_saver'].export_meta_graph.call_count, 1)
-    self.assertEqual(savers['predict_saver'].save.call_count, 0)
-
-  def test_scaffold_is_used_for_local_init(self):
-    tmpdir = tempfile.mkdtemp()
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      my_int = variables.VariableV1(1, name='my_int',
-                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
-      _ = training.get_or_create_steps_per_run_variable()
-      scores = constant_op.constant([3.])
-      with ops.control_dependencies([
-          variables.local_variables_initializer(),
-          lookup_ops.tables_initializer()
-      ]):
-        assign_op = state_ops.assign(my_int, 12345)
-
-      # local_initSop must be an Operation, not a Tensor.
-      custom_local_init_op = control_flow_ops.group(assign_op)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(local_init_op=custom_local_init_op),
-          export_outputs={'test': export_output.ClassificationOutput(scores)})
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est.export_savedmodel(export_dir_base,
-                                       serving_input_receiver_fn)
-
-    # Restore, to validate that the custom local_init_op runs.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        my_int = graph.get_tensor_by_name('my_int:0')
-        my_int_value = sess.run(my_int)
-        self.assertEqual(12345, my_int_value)
-
-  def test_scaffold_is_used_for_local_init_multiple_modes(self):
-    tmpdir = tempfile.mkdtemp()
-
-    def _model_fn_scaffold(features, labels, mode):
-      _, _ = features, labels
-      my_int = variables.VariableV1(1, name='my_int',
-                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
-      scores = constant_op.constant([3.])
-      with ops.control_dependencies([
-          variables.local_variables_initializer(),
-          lookup_ops.tables_initializer()
-      ]):
-        assign_op = state_ops.assign(my_int, 12345)
-
-      custom_local_init_op = None
-      if mode == model_fn_lib.ModeKeys.PREDICT:
-        # local_initSop must be an Operation, not a Tensor.
-        custom_local_init_op = control_flow_ops.group(assign_op)
-
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=constant_op.constant([[1.]]),
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          scaffold=training.Scaffold(local_init_op=custom_local_init_op),
-          export_outputs={'test': export_output.ClassificationOutput(scores)})
-
-    est = estimator.Estimator(model_fn=_model_fn_scaffold)
-    est.train(dummy_input_fn, steps=1)
-    input_receiver_fn_map = {
-        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
-        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
-    }
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir = est._export_all_saved_models(
-        export_dir_base, input_receiver_fn_map)
-
-    # Restore, to validate that the custom local_init_op runs.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.SERVING], export_dir)
-        my_int = graph.get_tensor_by_name('my_int:0')
-        my_int_value = sess.run(my_int)
-        self.assertEqual(12345, my_int_value)
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        loader.load(sess, [tag_constants.TRAINING], export_dir)
-        my_int = graph.get_tensor_by_name('my_int:0')
-        my_int_value = sess.run(my_int)
-        self.assertEqual(1, my_int_value)
-
-  def test_features_labels_mode(self):
-    given_features = {'test-features': constant_op.constant([[1], [1]])}
-
-    def serving_input_receiver_fn():
-      return export.ServingInputReceiver(
-          given_features, array_ops.placeholder(dtype=dtypes.string))
-
-    def _model_fn(features, labels, mode):
-      self.features, self.labels, self.mode = features, labels, mode
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]),
-          export_outputs={
-              'test': export_output.ClassificationOutput(
-                  constant_op.constant([[0.]]))
-          })
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn)
-    self.assertEqual(given_features, self.features)
-    self.assertIsNone(self.labels)
-    self.assertEqual(model_fn_lib.ModeKeys.PREDICT, self.mode)
-
-  def test_graph_initialization_global_step_and_random_seed(self):
-    expected_random_seed = run_config.RunConfig().tf_random_seed
-    def _model_fn(features, labels, mode):
-      _, _, _ = features, labels, mode
-      self.assertIsNotNone(training.get_global_step())
-      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(0.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1),
-          predictions=constant_op.constant([[0.]]),
-          export_outputs={
-              'test': export_output.ClassificationOutput(
-                  constant_op.constant([[0.]]))
-          })
-
-    def serving_input_receiver_fn():
-      return export.ServingInputReceiver(
-          {'test-features': constant_op.constant([[1], [1]])},
-          array_ops.placeholder(dtype=dtypes.string))
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(dummy_input_fn, steps=1)
-    est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn)
-
-  def test_export_savedmodel_respects_soft_placement(self):
-    def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode):
-      _, _ = features, labels
-      table = saver_test_utils.CheckpointedOp(name='v2')
-
-      update_global_step = state_ops.assign_add(training.get_global_step(), 1)
-      with ops.control_dependencies([update_global_step]):
-        train_op = table.insert('k1', 30.0)
-
-      #  In this test, there are no GPUs available.  The goal is to verify that
-      #  export_savedmodel executes nevertheless.
-      with ops.device('/gpu:0'):
-        string_op = string_ops.as_string(update_global_step)
-
-      with ops.control_dependencies([string_op]):
-        prediction = table.lookup('k1', 0.0)
-
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=prediction,
-          loss=constant_op.constant(1.),
-          train_op=train_op,
-          export_outputs={
-              'test': export_output.PredictOutput({
-                  'prediction': prediction
-              })
-          })
-
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(
-        model_fn=model_fn_with_a_gpu_op_but_no_kernel)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-
-    export_dir = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn)
-
-    # At this point, if export_savedmodel executed with
-    # allow_soft_placement=True, then the GPU-assigned operation was silently
-    # placed on the CPU.  Otherwise, an exception would have been raised
-    # related to the fact that the requested GPU device isn't available.
-
-    # Expectations below assume that export_savedmodel has completed normally.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self.assertTrue(gfile.Exists(export_dir))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('saved_model.pb'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.index'))))
-    self.assertTrue(gfile.Exists(os.path.join(
-        compat.as_bytes(export_dir),
-        compat.as_bytes('variables/variables.data-00000-of-00001'))))
-
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_savedmodel_proto_strip_default_attrs(self):
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
-    est.train(input_fn=dummy_input_fn, steps=1)
-    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
-                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('export'))
-    export_dir_stripped = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn, strip_default_attrs=True)
-    export_dir_not_stripped = est.export_savedmodel(
-        export_dir_base, serving_input_receiver_fn, strip_default_attrs=False)
-
-    # Load the SavedModel from disk as-is to verify default attrs
-    # are stripped. Reimporting the SavedModel via the loader causes the
-    # default attrs to be populated in the NodeDefs.
-
-    # pylint: disable=protected-access
-    saved_model_stripped_pb = loader_impl._parse_saved_model(
-        export_dir_stripped)
-    saved_model_not_stripped_pb = loader_impl._parse_saved_model(
-        export_dir_not_stripped)
-    self.assertIsNotNone(saved_model_stripped_pb)
-    self.assertIsNotNone(saved_model_not_stripped_pb)
-    # pylint: enable=protected-access
-
-    meta_graph_def_stripped = [
-        x for x in saved_model_stripped_pb.meta_graphs
-        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
-    meta_graph_def_not_stripped = [
-        x for x in saved_model_not_stripped_pb.meta_graphs
-        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
-
-    # "weight" node in graph is a "Variable" Op with 2 default valued attrs.
-    #   o "container"    : "".
-    #   o "shared_name"  : "".
-
-    # saved_model_stripped_pb was exported with strip_default_attrs set to True.
-    # "weight" node shouldn't have attributes "container" and "shared_name".
-    node_def = test_util.get_node_def_from_graph(
-        'weight', meta_graph_def_stripped.graph_def)
-    self.assertNotIn('container', node_def.attr)
-    self.assertNotIn('shared_name', node_def.attr)
-
-    # saved_model_not_stripped_pb was exported with strip_default_attrs
-    # disabled. "weight" node should have attributes "container" and
-    # "shared_name".
-    node_def = test_util.get_node_def_from_graph(
-        'weight', meta_graph_def_not_stripped.graph_def)
-    self.assertIn('container', node_def.attr)
-    self.assertIn('shared_name', node_def.attr)
-
-    # Clean up.
-    gfile.DeleteRecursively(tmpdir)
-
-  def test_export_savedmodel_no_export_outputs(self):
-    """Ensure that an EstimatorSpec without outputs defined can be exported."""
-
-    def _model_fn(features, labels, mode):
-      _, _ = features, labels
-      variables.VariableV1(1., name='weight')
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=constant_op.constant(10.),
-          loss=constant_op.constant(1.),
-          train_op=state_ops.assign_add(training.get_global_step(), 1))
-
-    tmpdir = tempfile.mkdtemp()
-    est = estimator.Estimator(model_fn=_model_fn)
-    est.train(input_fn=dummy_input_fn, steps=1)
-
-    # Perform the export.
-    export_dir_base = os.path.join(
-        compat.as_bytes(tmpdir), compat.as_bytes('no_export_outputs'))
-    export_dir = est.export_savedmodel(
-        export_dir_base, _get_serving_input_receiver_fn())
-
-    # Check that all the files are in the right places.
-    self.assertTrue(gfile.Exists(export_dir_base))
-    self._validate_exported_files(export_dir)
-
-    # Restore, to validate that the export was well-formed.
-    with ops.Graph().as_default() as graph:
-      with session.Session(graph=graph) as sess:
-        meta_graph = loader.load(sess, [tag_constants.SERVING], export_dir)
-        graph_ops = [x.name for x in graph.get_operations()]
-        self.assertTrue('weight' in graph_ops)
-
-        sig_def = meta_graph.signature_def
-        self.assertEqual(len(sig_def), 1)
-        sig_outputs = sig_def[
-            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs
-        self.assertEqual(sig_outputs['output'].name, 'Const:0')
-
-
-class EstimatorHookOrderingTest(test.TestCase):
-
-  def testCustomHooksAreCalledBeforeNanTensorHook(self):
-
-    def nan_making_model_fn(mode, features, labels):
-      """A graph that generates NaN's for testing."""
-      del features, labels
-
-      global_step = variables.VariableV1(
-          0, dtype=dtypes.int64, name='global_step')
-      inc_global_step = state_ops.assign_add(global_step, 1)
-      nan_const = constant_op.constant(np.nan, dtype=dtypes.float32)
-      loss = control_flow_ops.cond(
-          inc_global_step > 1, lambda: nan_const, lambda: 1.0)
-
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          predictions=global_step.read_value(),
-          loss=loss,
-          train_op=inc_global_step)
-
-    def empty_input_fn():
-      return dict(), None
-
-    class AfterRunCountingHook(session_run_hook.SessionRunHook):
-      """Hooks that counts the number of times after_run() is called."""
-
-      def __init__(self):
-        self.after_run_count = 0
-
-      def after_run(self, run_context, run_values):
-        del run_context, run_values
-        self.after_run_count += 1
-
-    test_hook = AfterRunCountingHook()
-    est = estimator.Estimator(model_fn=nan_making_model_fn)
-    with self.assertRaises(basic_session_run_hooks.NanLossDuringTrainingError):
-      est.train(input_fn=empty_input_fn, steps=2, hooks=[test_hook])
-    self.assertEqual(2, test_hook.after_run_count)
-
-
-class EstimatorIntegrationTest(test.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_complete_flow_with_a_simple_linear_model(self):
-
-    def _model_fn(features, labels, mode):
-      predictions = layers.dense(
-          features['x'], 1, kernel_initializer=init_ops.zeros_initializer())
-      export_outputs = {
-          'predictions': export_output.RegressionOutput(predictions)
-      }
-
-      if mode == model_fn_lib.ModeKeys.PREDICT:
-        return model_fn_lib.EstimatorSpec(
-            mode, predictions=predictions, export_outputs=export_outputs)
-
-      loss = losses.mean_squared_error(labels, predictions)
-      train_op = training.GradientDescentOptimizer(learning_rate=0.5).minimize(
-          loss, training.get_global_step())
-      mean = metrics_module.Mean()
-      mean.update_state(loss)
-      eval_metric_ops = {
-          'absolute_error':
-              metrics_lib.mean_absolute_error(labels, predictions),
-          'mean':
-              mean,
-      }
-
-      return model_fn_lib.EstimatorSpec(
-          mode,
-          predictions=predictions,
-          loss=loss,
-          train_op=train_op,
-          eval_metric_ops=eval_metric_ops,
-          export_outputs=export_outputs)
-
-    est = estimator.Estimator(model_fn=_model_fn)
-    data = np.linspace(0., 1., 100, dtype=np.float32).reshape(-1, 1)
-
-    # TRAIN
-    # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=data, batch_size=50, num_epochs=None, shuffle=True)
-    est.train(train_input_fn, steps=200)
-
-    # EVALUATE
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=data, batch_size=50, num_epochs=1, shuffle=True)
-    scores = est.evaluate(eval_input_fn)
-    self.assertEqual(200, scores['global_step'])
-    self.assertGreater(0.1, scores['absolute_error'])
-    self.assertAlmostEqual(4.4e-14, scores['mean'], places=2)
-
-    # PREDICT
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=None, batch_size=10, num_epochs=1, shuffle=False)
-    predictions = list(est.predict(predict_input_fn))
-    self.assertAllClose(data, predictions, atol=0.01)
-
-    # EXPORT
-    feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
-                                       serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/export/__init__.py b/tensorflow/python/estimator/export/__init__.py
index e69de29bb2..0ba905a148 100644
--- a/tensorflow/python/estimator/export/__init__.py
+++ b/tensorflow/python/estimator/export/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""export python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_estimator.python.estimator import export
+
+# Include attrs that start with single underscore.
+export.__all__ = [s for s in dir(export) if not s.startswith('__')]
+
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.export import *
diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index 55aace5fa9..fd1616adea 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,625 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Configuration and utilities for receiving inputs at serving time."""
+"""export python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import os
-
-import six
-
-from tensorflow.python.estimator import util
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import signature_def_utils
-from tensorflow.python.util import compat
-from tensorflow.python.util.tf_export import estimator_export
-
-_SINGLE_FEATURE_DEFAULT_NAME = 'feature'
-_SINGLE_RECEIVER_DEFAULT_NAME = 'input'
-_SINGLE_LABEL_DEFAULT_NAME = 'label'
-
-_SINGLE_TENSOR_DEFAULT_NAMES = {
-    'feature': _SINGLE_FEATURE_DEFAULT_NAME,
-    'label': _SINGLE_LABEL_DEFAULT_NAME,
-    'receiver_tensor': _SINGLE_RECEIVER_DEFAULT_NAME,
-    'receiver_tensors_alternative': _SINGLE_RECEIVER_DEFAULT_NAME
-}
-
-
-def _wrap_and_check_input_tensors(tensors, field_name):
-  """Ensure that tensors is a dict of str to Tensor mappings.
-
-  Args:
-    tensors: dict of str to Tensors, or a single Tensor.
-    field_name: name of the member field of `ServingInputReceiver`
-      whose value is being passed to `tensors`.
-
-  Returns:
-    dict of str to Tensors; this is the original dict if one was passed, or
-    the original tensor wrapped in a dictionary.
-
-  Raises:
-    ValueError: if tensors is None, or has non-string keys,
-      or non-Tensor values
-  """
-  if tensors is None:
-    raise ValueError('{}s must be defined.'.format(field_name))
-  if not isinstance(tensors, dict):
-    tensors = {_SINGLE_TENSOR_DEFAULT_NAMES[field_name]: tensors}
-  for name, tensor in tensors.items():
-    _check_tensor_key(name, error_label=field_name)
-    _check_tensor(tensor, name, error_label=field_name)
-  return tensors
-
-
-def _check_tensor(tensor, name, error_label='feature'):
-  """Check that passed `tensor` is a Tensor or SparseTensor."""
-  if not (isinstance(tensor, ops.Tensor) or
-          isinstance(tensor, sparse_tensor.SparseTensor)):
-    fmt_name = ' {}'.format(name) if name else ''
-    value_error = ValueError('{}{} must be a Tensor or SparseTensor.'.format(
-        error_label, fmt_name))
-    # NOTE(ericmc): This if-else block is a specific carve-out for
-    # LabeledTensor, which has a `.tensor` attribute and which is
-    # convertible to tf.Tensor via ops.convert_to_tensor.
-    # Allowing all types convertible to tf.Tensor is considered by soergel@
-    # to be too permissive.
-    # TODO(soergel): accept any type convertible to Tensor,
-    # as in cl/193238295 snapshot #6.
-    if hasattr(tensor, 'tensor'):
-      try:
-        ops.convert_to_tensor(tensor)
-      except TypeError:
-        raise value_error
-    else:
-      raise value_error
-
-
-def _check_tensor_key(name, error_label='feature'):
-  if not isinstance(name, six.string_types):
-    raise ValueError('{} keys must be strings: {}.'.format(error_label, name))
-
-
-@estimator_export('estimator.export.ServingInputReceiver')
-class ServingInputReceiver(
-    collections.namedtuple(
-        'ServingInputReceiver',
-        ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])):
-  """A return type for a serving_input_receiver_fn.
-
-  The expected return values are:
-    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the features to be passed to the model. Note:
-      if `features` passed is not a dict, it will be wrapped in a dict with a
-      single entry, using 'feature' as the key.  Consequently, the model must
-      accept a feature dict of the form {'feature': tensor}.  You may use
-      `TensorServingInputReceiver` if you want the tensor to be passed as is.
-    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
-      or `SparseTensor`, specifying input nodes where this receiver expects to
-      be fed by default.  Typically, this is a single placeholder expecting
-      serialized `tf.Example` protos.
-    receiver_tensors_alternatives: a dict of string to additional
-      groups of receiver tensors, each of which may be a `Tensor`,
-      `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`.
-      These named receiver tensor alternatives generate additional serving
-      signatures, which may be used to feed inputs at different points within
-      the input receiver subgraph.  A typical usage is to allow feeding raw
-      feature `Tensor`s *downstream* of the tf.parse_example() op.
-      Defaults to None.
-  """
-
-  def __new__(cls,
-              features,
-              receiver_tensors,
-              receiver_tensors_alternatives=None):
-    features = _wrap_and_check_input_tensors(features, 'feature')
-
-    receiver_tensors = _wrap_and_check_input_tensors(receiver_tensors,
-                                                     'receiver_tensor')
-
-    if receiver_tensors_alternatives is not None:
-      if not isinstance(receiver_tensors_alternatives, dict):
-        raise ValueError(
-            'receiver_tensors_alternatives must be a dict: {}.'.format(
-                receiver_tensors_alternatives))
-      for alternative_name, receiver_tensors_alt in (
-          six.iteritems(receiver_tensors_alternatives)):
-        # Updating dict during iteration is OK in this case.
-        receiver_tensors_alternatives[alternative_name] = (
-            _wrap_and_check_input_tensors(
-                receiver_tensors_alt, 'receiver_tensors_alternative'))
-
-    return super(ServingInputReceiver, cls).__new__(
-        cls,
-        features=features,
-        receiver_tensors=receiver_tensors,
-        receiver_tensors_alternatives=receiver_tensors_alternatives)
-
-
-@estimator_export('estimator.export.TensorServingInputReceiver')
-class TensorServingInputReceiver(
-    collections.namedtuple(
-        'TensorServingInputReceiver',
-        ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])):
-  """A return type for a serving_input_receiver_fn.
-
-  This is for use with models that expect a single `Tensor` or `SparseTensor`
-  as an input feature, as opposed to a dict of features.
-
-  The normal `ServingInputReceiver` always returns a feature dict, even if it
-  contains only one entry, and so can be used only with models that accept such
-  a dict.  For models that accept only a single raw feature, the
-  `serving_input_receiver_fn` provided to `Estimator.export_savedmodel()` should
-  return this `TensorServingInputReceiver` instead.  See:
-  https://github.com/tensorflow/tensorflow/issues/11674
-
-  Note that the receiver_tensors and receiver_tensor_alternatives arguments
-  will be automatically converted to the dict representation in either case,
-  because the SavedModel format requires each input `Tensor` to have a name
-  (provided by the dict key).
-
-  The expected return values are:
-    features: A single `Tensor` or `SparseTensor`, representing the feature
-      to be passed to the model.
-    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
-      or `SparseTensor`, specifying input nodes where this receiver expects to
-      be fed by default.  Typically, this is a single placeholder expecting
-      serialized `tf.Example` protos.
-    receiver_tensors_alternatives: a dict of string to additional
-      groups of receiver tensors, each of which may be a `Tensor`,
-      `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`.
-      These named receiver tensor alternatives generate additional serving
-      signatures, which may be used to feed inputs at different points within
-      the input receiver subgraph.  A typical usage is to allow feeding raw
-      feature `Tensor`s *downstream* of the tf.parse_example() op.
-      Defaults to None.
-  """
-
-  def __new__(cls,
-              features,
-              receiver_tensors,
-              receiver_tensors_alternatives=None):
-    if features is None:
-      raise ValueError('features must be defined.')
-    _check_tensor(features, None)
-
-    receiver = ServingInputReceiver(
-        features=features,
-        receiver_tensors=receiver_tensors,
-        receiver_tensors_alternatives=receiver_tensors_alternatives)
-
-    return super(TensorServingInputReceiver, cls).__new__(
-        cls,
-        features=receiver.features[_SINGLE_FEATURE_DEFAULT_NAME],
-        receiver_tensors=receiver.receiver_tensors,
-        receiver_tensors_alternatives=receiver.receiver_tensors_alternatives)
-
-
-class UnsupervisedInputReceiver(ServingInputReceiver):
-  """A return type for a training_input_receiver_fn or eval_input_receiver_fn.
-
-  This differs from SupervisedInputReceiver in that it does not require a set
-  of labels.
-
-  The expected return values are:
-    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the features to be passed to the model.
-    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
-      or `SparseTensor`, specifying input nodes where this receiver expects to
-      be fed by default.  Typically, this is a single placeholder expecting
-      serialized `tf.Example` protos.
-  """
-
-  def __new__(cls, features, receiver_tensors):
-    return super(UnsupervisedInputReceiver, cls).__new__(
-        cls,
-        features=features,
-        receiver_tensors=receiver_tensors,
-        receiver_tensors_alternatives=None)
-
-
-class SupervisedInputReceiver(
-    collections.namedtuple('SupervisedInputReceiver',
-                           ['features', 'labels', 'receiver_tensors'])):
-  """A return type for a training_input_receiver_fn or eval_input_receiver_fn.
-
-  This differs from a ServingInputReceiver in that (1) this receiver expects
-  a set of labels to be passed in with features, and (2) this receiver does
-  not support receiver_tensors_alternatives, which are primarily used for
-  serving.
-
-  The expected return values are:
-    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the features to be passed to the model.
-    labels: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
-      `SparseTensor`, specifying the labels to be passed to the model.
-    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
-      or `SparseTensor`, specifying input nodes where this receiver expects to
-      be fed by default.  Typically, this is a single placeholder expecting
-      serialized `tf.Example` protos.
-
-  """
-
-  def __new__(cls, features, labels, receiver_tensors):
-    # Both features and labels can be dicts or raw tensors.
-    for input_vals, error_label in ((features, 'feature'), (labels, 'label')):
-      # _wrap_and_check_input_tensors is called here only to validate the
-      # tensors. The wrapped dict that is returned is deliberately discarded.
-      _wrap_and_check_input_tensors(input_vals, error_label)
-
-    receiver_tensors = _wrap_and_check_input_tensors(receiver_tensors,
-                                                     'receiver_tensor')
-
-    return super(SupervisedInputReceiver, cls).__new__(
-        cls,
-        features=features,
-        labels=labels,
-        receiver_tensors=receiver_tensors)
-
-
-@estimator_export('estimator.export.build_parsing_serving_input_receiver_fn')
-def build_parsing_serving_input_receiver_fn(feature_spec,
-                                            default_batch_size=None):
-  """Build a serving_input_receiver_fn expecting fed tf.Examples.
-
-  Creates a serving_input_receiver_fn that expects a serialized tf.Example fed
-  into a string placeholder.  The function parses the tf.Example according to
-  the provided feature_spec, and returns all parsed Tensors as features.
-
-  Args:
-    feature_spec: a dict of string to `VarLenFeature`/`FixedLenFeature`.
-    default_batch_size: the number of query examples expected per batch.
-        Leave unset for variable batch size (recommended).
-
-  Returns:
-    A serving_input_receiver_fn suitable for use in serving.
-  """
-
-  def serving_input_receiver_fn():
-    """An input_fn that expects a serialized tf.Example."""
-    serialized_tf_example = array_ops.placeholder(
-        dtype=dtypes.string,
-        shape=[default_batch_size],
-        name='input_example_tensor')
-    receiver_tensors = {'examples': serialized_tf_example}
-    features = parsing_ops.parse_example(serialized_tf_example, feature_spec)
-    return ServingInputReceiver(features, receiver_tensors)
-
-  return serving_input_receiver_fn
-
-
-def _placeholder_from_tensor(t, default_batch_size=None):
-  """Creates a placeholder that matches the dtype and shape of passed tensor.
-
-  Args:
-    t: Tensor or EagerTensor
-    default_batch_size: the number of query examples expected per batch.
-        Leave unset for variable batch size (recommended).
-
-  Returns:
-    Placeholder that matches the passed tensor.
-  """
-  batch_shape = tensor_shape.TensorShape([default_batch_size])
-  shape = batch_shape.concatenate(t.get_shape()[1:])
-
-  # Reuse the feature tensor's op name (t.op.name) for the placeholder,
-  # excluding the index from the tensor's name (t.name):
-  # t.name = "%s:%d" % (t.op.name, t._value_index)
-  try:
-    name = t.op.name
-  except AttributeError:
-    # In Eager mode, tensors don't have ops or names, and while they do have
-    # IDs, those are not maintained across runs. The name here is used
-    # primarily for debugging, and is not critical to the placeholder.
-    # So, in order to make this Eager-compatible, continue with an empty
-    # name if none is available.
-    name = None
-
-  return array_ops.placeholder(dtype=t.dtype, shape=shape, name=name)
-
-
-def _placeholders_from_receiver_tensors_dict(input_vals,
-                                             default_batch_size=None):
-  return {
-      name: _placeholder_from_tensor(t, default_batch_size)
-      for name, t in input_vals.items()
-  }
-
-
-@estimator_export('estimator.export.build_raw_serving_input_receiver_fn')
-def build_raw_serving_input_receiver_fn(features, default_batch_size=None):
-  """Build a serving_input_receiver_fn expecting feature Tensors.
-
-  Creates an serving_input_receiver_fn that expects all features to be fed
-  directly.
-
-  Args:
-    features: a dict of string to `Tensor`.
-    default_batch_size: the number of query examples expected per batch.
-        Leave unset for variable batch size (recommended).
-
-  Returns:
-    A serving_input_receiver_fn.
-  """
-
-  def serving_input_receiver_fn():
-    """A serving_input_receiver_fn that expects features to be fed directly."""
-    receiver_tensors = _placeholders_from_receiver_tensors_dict(
-        features, default_batch_size)
-    return ServingInputReceiver(receiver_tensors, receiver_tensors)
-
-  return serving_input_receiver_fn
-
-
-def build_raw_supervised_input_receiver_fn(features,
-                                           labels,
-                                           default_batch_size=None):
-  """Build a supervised_input_receiver_fn for raw features and labels.
-
-  This function wraps tensor placeholders in a supervised_receiver_fn
-  with the expectation that the features and labels appear precisely as
-  the model_fn expects them. Features and labels can therefore be dicts of
-  tensors, or raw tensors.
-
-  Args:
-    features: a dict of string to `Tensor` or `Tensor`.
-    labels: a dict of string to `Tensor` or `Tensor`.
-    default_batch_size: the number of query examples expected per batch.
-        Leave unset for variable batch size (recommended).
-
-  Returns:
-    A supervised_input_receiver_fn.
-
-  Raises:
-    ValueError: if features and labels have overlapping keys.
-  """
-  # Check for overlapping keys before beginning.
-  try:
-    feat_keys = features.keys()
-  except AttributeError:
-    feat_keys = [_SINGLE_RECEIVER_DEFAULT_NAME]
-  try:
-    label_keys = labels.keys()
-  except AttributeError:
-    label_keys = [_SINGLE_LABEL_DEFAULT_NAME]
-
-  overlap_keys = set(feat_keys) & set(label_keys)
-  if overlap_keys:
-    raise ValueError('Features and labels must have distinct keys. '
-                     'Found overlapping keys: {}'.format(overlap_keys))
-
-  def supervised_input_receiver_fn():
-    """A receiver_fn that expects pass-through features and labels."""
-    if not isinstance(features, dict):
-      features_cp = _placeholder_from_tensor(features, default_batch_size)
-      receiver_features = {_SINGLE_RECEIVER_DEFAULT_NAME: features_cp}
-    else:
-      receiver_features = _placeholders_from_receiver_tensors_dict(
-          features, default_batch_size)
-      features_cp = receiver_features
-
-    if not isinstance(labels, dict):
-      labels_cp = _placeholder_from_tensor(labels, default_batch_size)
-      receiver_labels = {_SINGLE_LABEL_DEFAULT_NAME: labels_cp}
-    else:
-      receiver_labels = _placeholders_from_receiver_tensors_dict(
-          labels, default_batch_size)
-      labels_cp = receiver_labels
-
-    receiver_tensors = dict(receiver_features)
-    receiver_tensors.update(receiver_labels)
-    return SupervisedInputReceiver(features_cp, labels_cp, receiver_tensors)
-
-  return supervised_input_receiver_fn
-
-
-def build_supervised_input_receiver_fn_from_input_fn(input_fn, **input_fn_args):
-  """Get a function that returns a SupervisedInputReceiver matching an input_fn.
-
-  Note that this function calls the input_fn in a local graph in order to
-  extract features and labels. Placeholders are then created from those
-  features and labels in the default graph.
-
-  Args:
-    input_fn: An Estimator input_fn, which is a function that returns one of:
-
-      * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-          tuple (features, labels) with same constraints as below.
-      * A tuple (features, labels): Where `features` is a `Tensor` or a
-        dictionary of string feature name to `Tensor` and `labels` is a
-        `Tensor` or a dictionary of string label name to `Tensor`. Both
-        `features` and `labels` are consumed by `model_fn`. They should
-        satisfy the expectation of `model_fn` from inputs.
-
-    **input_fn_args: set of kwargs to be passed to the input_fn. Note that
-      these will not be checked or validated here, and any errors raised by
-      the input_fn will be thrown to the top.
-
-  Returns:
-    A function taking no arguments that, when called, returns a
-    SupervisedInputReceiver. This function can be passed in as part of the
-    input_receiver_map when exporting SavedModels from Estimator with multiple
-    modes.
-  """
-  # Wrap the input_fn call in a graph to prevent sullying the default namespace
-  with ops.Graph().as_default():
-    result = input_fn(**input_fn_args)
-    features, labels, _ = util.parse_input_fn_result(result)
-  # Placeholders are created back in the default graph.
-  return build_raw_supervised_input_receiver_fn(features, labels)
-
-
-### Below utilities are specific to SavedModel exports.
-
-
-def build_all_signature_defs(receiver_tensors,
-                             export_outputs,
-                             receiver_tensors_alternatives=None,
-                             serving_only=True):
-  """Build `SignatureDef`s for all export outputs.
-
-  Args:
-    receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
-      input nodes where this receiver expects to be fed by default.  Typically,
-      this is a single placeholder expecting serialized `tf.Example` protos.
-    export_outputs: a dict of ExportOutput instances, each of which has
-      an as_signature_def instance method that will be called to retrieve
-      the signature_def for all export output tensors.
-    receiver_tensors_alternatives: a dict of string to additional
-      groups of receiver tensors, each of which may be a `Tensor` or a dict of
-      string to `Tensor`.  These named receiver tensor alternatives generate
-      additional serving signatures, which may be used to feed inputs at
-      different points within the input receiver subgraph.  A typical usage is
-      to allow feeding raw feature `Tensor`s *downstream* of the
-      tf.parse_example() op.  Defaults to None.
-    serving_only: boolean; if true, resulting signature defs will only include
-      valid serving signatures. If false, all requested signatures will be
-      returned.
-
-  Returns:
-    signature_def representing all passed args.
-
-  Raises:
-    ValueError: if export_outputs is not a dict
-  """
-  if not isinstance(receiver_tensors, dict):
-    receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
-  if export_outputs is None or not isinstance(export_outputs, dict):
-    raise ValueError('export_outputs must be a dict and not'
-                     '{}'.format(type(export_outputs)))
-
-  signature_def_map = {}
-  excluded_signatures = {}
-  for output_key, export_output in export_outputs.items():
-    signature_name = '{}'.format(output_key or 'None')
-    try:
-      signature = export_output.as_signature_def(receiver_tensors)
-      signature_def_map[signature_name] = signature
-    except ValueError as e:
-      excluded_signatures[signature_name] = str(e)
-
-  if receiver_tensors_alternatives:
-    for receiver_name, receiver_tensors_alt in (
-        six.iteritems(receiver_tensors_alternatives)):
-      if not isinstance(receiver_tensors_alt, dict):
-        receiver_tensors_alt = {
-            _SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt
-        }
-      for output_key, export_output in export_outputs.items():
-        signature_name = '{}:{}'.format(receiver_name or 'None', output_key or
-                                        'None')
-        try:
-          signature = export_output.as_signature_def(receiver_tensors_alt)
-          signature_def_map[signature_name] = signature
-        except ValueError as e:
-          excluded_signatures[signature_name] = str(e)
-
-  _log_signature_report(signature_def_map, excluded_signatures)
-
-  # The above calls to export_output.as_signature_def should return only
-  # valid signatures; if there is a validity problem, they raise a ValueError,
-  # in which case we exclude that signature from signature_def_map above.
-  # The is_valid_signature check ensures that the signatures produced are
-  # valid for serving, and acts as an additional sanity check for export
-  # signatures produced for serving. We skip this check for training and eval
-  # signatures, which are not intended for serving.
-  if serving_only:
-    signature_def_map = {
-        k: v
-        for k, v in signature_def_map.items()
-        if signature_def_utils.is_valid_signature(v)
-    }
-  return signature_def_map
-
-
-_FRIENDLY_METHOD_NAMES = {
-    signature_constants.CLASSIFY_METHOD_NAME: 'Classify',
-    signature_constants.REGRESS_METHOD_NAME: 'Regress',
-    signature_constants.PREDICT_METHOD_NAME: 'Predict',
-    signature_constants.SUPERVISED_TRAIN_METHOD_NAME: 'Train',
-    signature_constants.SUPERVISED_EVAL_METHOD_NAME: 'Eval',
-}
-
-
-def _log_signature_report(signature_def_map, excluded_signatures):
-  """Log a report of which signatures were produced."""
-  sig_names_by_method_name = collections.defaultdict(list)
-
-  # We'll collect whatever method_names are present, but also we want to make
-  # sure to output a line for each of the three standard methods even if they
-  # have no signatures.
-  for method_name in _FRIENDLY_METHOD_NAMES:
-    sig_names_by_method_name[method_name] = []
-
-  for signature_name, sig in signature_def_map.items():
-    sig_names_by_method_name[sig.method_name].append(signature_name)
-
-  # TODO(b/67733540): consider printing the full signatures, not just names
-  for method_name, sig_names in sig_names_by_method_name.items():
-    if method_name in _FRIENDLY_METHOD_NAMES:
-      method_name = _FRIENDLY_METHOD_NAMES[method_name]
-    logging.info('Signatures INCLUDED in export for {}: {}'.format(
-        method_name, sig_names if sig_names else 'None'))
-
-  if excluded_signatures:
-    logging.info('Signatures EXCLUDED from export because they cannot be '
-                 'be served via TensorFlow Serving APIs:')
-    for signature_name, message in excluded_signatures.items():
-      logging.info('\'{}\' : {}'.format(signature_name, message))
-
-  if not signature_def_map:
-    logging.warn('Export includes no signatures!')
-  elif (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in
-        signature_def_map):
-    logging.warn('Export includes no default signature!')
-
-
-def get_timestamped_export_dir(export_dir_base):
-  """Builds a path to a new subdirectory within the base directory.
-
-  Each export is written into a new subdirectory named using the
-  current time.  This guarantees monotonically increasing version
-  numbers even across multiple runs of the pipeline.
-  The timestamp used is the number of seconds since epoch UTC.
-
-  Args:
-    export_dir_base: A string containing a directory to write the exported
-        graph and checkpoints.
-  Returns:
-    The full path of the new subdirectory (which is not actually created yet).
-
-  Raises:
-    RuntimeError: if repeated attempts fail to obtain a unique timestamped
-      directory name.
-  """
-  return util.get_timestamped_dir(export_dir_base)
-
-
-def get_temp_export_dir(timestamped_export_dir):
-  """Builds a directory name based on the argument but starting with 'temp-'.
-
-  This relies on the fact that TensorFlow Serving ignores subdirectories of
-  the base directory that can't be parsed as integers.
+from tensorflow_estimator.python.estimator.export import export
 
-  Args:
-    timestamped_export_dir: the name of the eventual export directory, e.g.
-      /foo/bar/<timestamp>
+# Include attrs that start with single underscore.
+export.__all__ = [s for s in dir(export) if not s.startswith('__')]
 
-  Returns:
-    A sister directory prefixed with 'temp-', e.g. /foo/bar/temp-<timestamp>.
-  """
-  (dirname, basename) = os.path.split(timestamped_export_dir)
-  temp_export_dir = os.path.join(
-      compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename)))
-  return temp_export_dir
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.export.export import *
diff --git a/tensorflow/python/estimator/export/export_lib.py b/tensorflow/python/estimator/export/export_lib.py
index f4ac8581ea..ce49f89a7d 100644
--- a/tensorflow/python/estimator/export/export_lib.py
+++ b/tensorflow/python/estimator/export/export_lib.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utility methods for exporting Estimator."""
+"""export_lib python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,line-too-long
-from tensorflow.python.estimator.export.export import build_parsing_serving_input_receiver_fn
-from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
-from tensorflow.python.estimator.export.export import ServingInputReceiver
-from tensorflow.python.estimator.export.export import TensorServingInputReceiver
-from tensorflow.python.estimator.export.export_output import ClassificationOutput
-from tensorflow.python.estimator.export.export_output import ExportOutput
-from tensorflow.python.estimator.export.export_output import PredictOutput
-from tensorflow.python.estimator.export.export_output import RegressionOutput
+from tensorflow_estimator.python.estimator.export import export_lib
 
-# pylint: enable=unused-import,line-too-long
+# Include attrs that start with single underscore.
+export_lib.__all__ = [s for s in dir(export_lib) if not s.startswith('__')]
 
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.export.export_lib import *
diff --git a/tensorflow/python/estimator/export/export_output.py b/tensorflow/python/estimator/export/export_output.py
index c17fc08f21..e61162e13f 100644
--- a/tensorflow/python/estimator/export/export_output.py
+++ b/tensorflow/python/estimator/export/export_output.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,402 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Classes for different types of export output."""
+"""export_output python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
-
-import six
-
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.keras import metrics as metrics_module
-from tensorflow.python.saved_model import signature_def_utils
-from tensorflow.python.util.tf_export import estimator_export
-
-
-@estimator_export('estimator.export.ExportOutput')
-class ExportOutput(object):
-  """Represents an output of a model that can be served.
-
-  These typically correspond to model heads.
-  """
-
-  __metaclass__ = abc.ABCMeta
-
-  _SEPARATOR_CHAR = '/'
-
-  @abc.abstractmethod
-  def as_signature_def(self, receiver_tensors):
-    """Generate a SignatureDef proto for inclusion in a MetaGraphDef.
-
-    The SignatureDef will specify outputs as described in this ExportOutput,
-    and will use the provided receiver_tensors as inputs.
-
-    Args:
-      receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
-        input nodes that will be fed.
-    """
-    pass
-
-  def _check_output_key(self, key, error_label):
-    # For multi-head models, the key can be a tuple.
-    if isinstance(key, tuple):
-      key = self._SEPARATOR_CHAR.join(key)
-
-    if not isinstance(key, six.string_types):
-      raise ValueError(
-          '{} output key must be a string; got {}.'.format(error_label, key))
-    return key
-
-  def _wrap_and_check_outputs(
-      self, outputs, single_output_default_name, error_label=None):
-    """Wraps raw tensors as dicts and checks type.
-
-    Note that we create a new dict here so that we can overwrite the keys
-    if necessary.
-
-    Args:
-      outputs: A `Tensor` or a dict of string to `Tensor`.
-      single_output_default_name: A string key for use in the output dict
-        if the provided `outputs` is a raw tensor.
-      error_label: descriptive string for use in error messages. If none,
-        single_output_default_name will be used.
-
-    Returns:
-      A dict of tensors
-
-    Raises:
-      ValueError: if the outputs dict keys are not strings or tuples of strings
-        or the values are not Tensors.
-    """
-    if not isinstance(outputs, dict):
-      outputs = {single_output_default_name: outputs}
-
-    output_dict = {}
-    for key, value in outputs.items():
-      error_name = error_label or single_output_default_name
-      key = self._check_output_key(key, error_name)
-      if not isinstance(value, ops.Tensor):
-        raise ValueError(
-            '{} output value must be a Tensor; got {}.'.format(
-                error_name, value))
-
-      output_dict[key] = value
-    return output_dict
-
-
-@estimator_export('estimator.export.ClassificationOutput')
-class ClassificationOutput(ExportOutput):
-  """Represents the output of a classification head.
-
-  Either classes or scores or both must be set.
-
-  The classes `Tensor` must provide string labels, not integer class IDs.
-
-  If only classes is set, it is interpreted as providing top-k results in
-  descending order.
-
-  If only scores is set, it is interpreted as providing a score for every class
-  in order of class ID.
-
-  If both classes and scores are set, they are interpreted as zipped, so each
-  score corresponds to the class at the same index.  Clients should not depend
-  on the order of the entries.
-  """
-
-  def __init__(self, scores=None, classes=None):
-    """Constructor for `ClassificationOutput`.
-
-    Args:
-      scores: A float `Tensor` giving scores (sometimes but not always
-          interpretable as probabilities) for each class.  May be `None`, but
-          only if `classes` is set.  Interpretation varies-- see class doc.
-      classes: A string `Tensor` giving predicted class labels.  May be `None`,
-          but only if `scores` is set.  Interpretation varies-- see class doc.
-
-    Raises:
-      ValueError: if neither classes nor scores is set, or one of them is not a
-          `Tensor` with the correct dtype.
-    """
-    if (scores is not None
-        and not (isinstance(scores, ops.Tensor)
-                 and scores.dtype.is_floating)):
-      raise ValueError('Classification scores must be a float32 Tensor; '
-                       'got {}'.format(scores))
-    if (classes is not None
-        and not (isinstance(classes, ops.Tensor)
-                 and dtypes.as_dtype(classes.dtype) == dtypes.string)):
-      raise ValueError('Classification classes must be a string Tensor; '
-                       'got {}'.format(classes))
-    if scores is None and classes is None:
-      raise ValueError('At least one of scores and classes must be set.')
-
-    self._scores = scores
-    self._classes = classes
-
-  @property
-  def scores(self):
-    return self._scores
-
-  @property
-  def classes(self):
-    return self._classes
-
-  def as_signature_def(self, receiver_tensors):
-    if len(receiver_tensors) != 1:
-      raise ValueError('Classification input must be a single string Tensor; '
-                       'got {}'.format(receiver_tensors))
-    (_, examples), = receiver_tensors.items()
-    if dtypes.as_dtype(examples.dtype) != dtypes.string:
-      raise ValueError('Classification input must be a single string Tensor; '
-                       'got {}'.format(receiver_tensors))
-    return signature_def_utils.classification_signature_def(
-        examples, self.classes, self.scores)
-
-
-@estimator_export('estimator.export.RegressionOutput')
-class RegressionOutput(ExportOutput):
-  """Represents the output of a regression head."""
-
-  def __init__(self, value):
-    """Constructor for `RegressionOutput`.
-
-    Args:
-      value: a float `Tensor` giving the predicted values.  Required.
-
-    Raises:
-      ValueError: if the value is not a `Tensor` with dtype tf.float32.
-    """
-    if not (isinstance(value, ops.Tensor) and value.dtype.is_floating):
-      raise ValueError('Regression output value must be a float32 Tensor; '
-                       'got {}'.format(value))
-    self._value = value
-
-  @property
-  def value(self):
-    return self._value
-
-  def as_signature_def(self, receiver_tensors):
-    if len(receiver_tensors) != 1:
-      raise ValueError('Regression input must be a single string Tensor; '
-                       'got {}'.format(receiver_tensors))
-    (_, examples), = receiver_tensors.items()
-    if dtypes.as_dtype(examples.dtype) != dtypes.string:
-      raise ValueError('Regression input must be a single string Tensor; '
-                       'got {}'.format(receiver_tensors))
-    return signature_def_utils.regression_signature_def(examples, self.value)
-
-
-@estimator_export('estimator.export.PredictOutput')
-class PredictOutput(ExportOutput):
-  """Represents the output of a generic prediction head.
-
-  A generic prediction need not be either a classification or a regression.
-
-  Named outputs must be provided as a dict from string to `Tensor`,
-  """
-  _SINGLE_OUTPUT_DEFAULT_NAME = 'output'
-
-  def __init__(self, outputs):
-    """Constructor for PredictOutput.
-
-    Args:
-      outputs: A `Tensor` or a dict of string to `Tensor` representing the
-        predictions.
-
-    Raises:
-      ValueError: if the outputs is not dict, or any of its keys are not
-          strings, or any of its values are not `Tensor`s.
-    """
-
-    self._outputs = self._wrap_and_check_outputs(
-        outputs, self._SINGLE_OUTPUT_DEFAULT_NAME, error_label='Prediction')
-
-  @property
-  def outputs(self):
-    return self._outputs
-
-  def as_signature_def(self, receiver_tensors):
-    return signature_def_utils.predict_signature_def(receiver_tensors,
-                                                     self.outputs)
-
-
-class _SupervisedOutput(ExportOutput):
-  """Represents the output of a supervised training or eval process."""
-  __metaclass__ = abc.ABCMeta
-
-  LOSS_NAME = 'loss'
-  PREDICTIONS_NAME = 'predictions'
-  METRICS_NAME = 'metrics'
-
-  METRIC_VALUE_SUFFIX = 'value'
-  METRIC_UPDATE_SUFFIX = 'update_op'
-
-  _loss = None
-  _predictions = None
-  _metrics = None
-
-  def __init__(self, loss=None, predictions=None, metrics=None):
-    """Constructor for SupervisedOutput (ie, Train or Eval output).
-
-    Args:
-      loss: dict of Tensors or single Tensor representing calculated loss.
-      predictions: dict of Tensors or single Tensor representing model
-        predictions.
-      metrics: Dict of metric results keyed by name.
-        The values of the dict can be one of the following:
-        (1) instance of `Metric` class.
-        (2) (metric_value, update_op) tuples, or a single tuple.
-        metric_value must be a Tensor, and update_op must be a Tensor or Op.
-
-    Raises:
-      ValueError: if any of the outputs' dict keys are not strings or tuples of
-        strings or the values are not Tensors (or Operations in the case of
-        update_op).
-    """
-
-    if loss is not None:
-      loss_dict = self._wrap_and_check_outputs(loss, self.LOSS_NAME)
-      self._loss = self._prefix_output_keys(loss_dict, self.LOSS_NAME)
-    if predictions is not None:
-      pred_dict = self._wrap_and_check_outputs(
-          predictions, self.PREDICTIONS_NAME)
-      self._predictions = self._prefix_output_keys(
-          pred_dict, self.PREDICTIONS_NAME)
-    if metrics is not None:
-      self._metrics = self._wrap_and_check_metrics(metrics)
-
-  def _prefix_output_keys(self, output_dict, output_name):
-    """Prepend output_name to the output_dict keys if it doesn't exist.
-
-    This produces predictable prefixes for the pre-determined outputs
-    of SupervisedOutput.
-
-    Args:
-      output_dict: dict of string to Tensor, assumed valid.
-      output_name: prefix string to prepend to existing keys.
-
-    Returns:
-      dict with updated keys and existing values.
-    """
-
-    new_outputs = {}
-    for key, val in output_dict.items():
-      key = self._prefix_key(key, output_name)
-      new_outputs[key] = val
-    return new_outputs
-
-  def _prefix_key(self, key, output_name):
-    if key.find(output_name) != 0:
-      key = output_name + self._SEPARATOR_CHAR + key
-    return key
-
-  def _wrap_and_check_metrics(self, metrics):
-    """Handle the saving of metrics.
-
-    Metrics is either a tuple of (value, update_op), or a dict of such tuples.
-    Here, we separate out the tuples and create a dict with names to tensors.
-
-    Args:
-      metrics: Dict of metric results keyed by name.
-        The values of the dict can be one of the following:
-        (1) instance of `Metric` class.
-        (2) (metric_value, update_op) tuples, or a single tuple.
-        metric_value must be a Tensor, and update_op must be a Tensor or Op.
-
-    Returns:
-      dict of output_names to tensors
-
-    Raises:
-      ValueError: if the dict key is not a string, or the metric values or ops
-        are not tensors.
-    """
-    if not isinstance(metrics, dict):
-      metrics = {self.METRICS_NAME: metrics}
-
-    outputs = {}
-    for key, value in metrics.items():
-      if isinstance(value, metrics_module.Metric):
-        metric_val = value.result()
-        assert len(value.updates) == 1  # We expect only one update op.
-        metric_op = value.updates[0]
-      else:
-        metric_val, metric_op = value
-      key = self._check_output_key(key, self.METRICS_NAME)
-      key = self._prefix_key(key, self.METRICS_NAME)
-
-      val_name = key + self._SEPARATOR_CHAR + self.METRIC_VALUE_SUFFIX
-      op_name = key + self._SEPARATOR_CHAR + self.METRIC_UPDATE_SUFFIX
-      if not isinstance(metric_val, ops.Tensor):
-        raise ValueError(
-            '{} output value must be a Tensor; got {}.'.format(
-                key, metric_val))
-      if (not isinstance(metric_op, ops.Tensor) and
-          not isinstance(metric_op, ops.Operation)):
-        raise ValueError(
-            '{} update_op must be a Tensor or Operation; got {}.'.format(
-                key, metric_op))
-
-      # We must wrap any ops in a Tensor before export, as the SignatureDef
-      # proto expects tensors only. See b/109740581
-      metric_op_tensor = metric_op
-      if isinstance(metric_op, ops.Operation):
-        with ops.control_dependencies([metric_op]):
-          metric_op_tensor = constant_op.constant([], name='metric_op_wrapper')
-
-      outputs[val_name] = metric_val
-      outputs[op_name] = metric_op_tensor
-
-    return outputs
-
-  @property
-  def loss(self):
-    return self._loss
-
-  @property
-  def predictions(self):
-    return self._predictions
-
-  @property
-  def metrics(self):
-    return self._metrics
-
-  @abc.abstractmethod
-  def _get_signature_def_fn(self):
-    """Returns a function that produces a SignatureDef given desired outputs."""
-    pass
-
-  def as_signature_def(self, receiver_tensors):
-    signature_def_fn = self._get_signature_def_fn()
-    return signature_def_fn(
-        receiver_tensors, self.loss, self.predictions, self.metrics)
-
-
-class TrainOutput(_SupervisedOutput):
-  """Represents the output of a supervised training process.
-
-  This class generates the appropriate signature def for exporting
-  training output by type-checking and wrapping loss, predictions, and metrics
-  values.
-  """
-
-  def _get_signature_def_fn(self):
-    return signature_def_utils.supervised_train_signature_def
-
-
-class EvalOutput(_SupervisedOutput):
-  """Represents the output of a supervised eval process.
+from tensorflow_estimator.python.estimator.export import export_output
 
-  This class generates the appropriate signature def for exporting
-  eval output by type-checking and wrapping loss, predictions, and metrics
-  values.
-  """
+# Include attrs that start with single underscore.
+export_output.__all__ = [
+    s for s in dir(export_output) if not s.startswith('__')
+]
 
-  def _get_signature_def_fn(self):
-    return signature_def_utils.supervised_eval_signature_def
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.export.export_output import *
diff --git a/tensorflow/python/estimator/export/export_output_test.py b/tensorflow/python/estimator/export/export_output_test.py
deleted file mode 100644
index 96ce0e580d..0000000000
--- a/tensorflow/python/estimator/export/export_output_test.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for export."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.core.framework import tensor_shape_pb2
-from tensorflow.core.framework import types_pb2
-from tensorflow.core.protobuf import meta_graph_pb2
-from tensorflow.python.estimator.export import export_output as export_output_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras import metrics as metrics_module
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-
-
-class ExportOutputTest(test.TestCase):
-
-  def test_regress_value_must_be_float(self):
-    value = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
-    with self.assertRaises(ValueError) as e:
-      export_output_lib.RegressionOutput(value)
-    self.assertEqual('Regression output value must be a float32 Tensor; got '
-                     'Tensor("output-tensor-1:0", shape=(1,), dtype=string)',
-                     str(e.exception))
-
-  def test_classify_classes_must_be_strings(self):
-    classes = array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1")
-    with self.assertRaises(ValueError) as e:
-      export_output_lib.ClassificationOutput(classes=classes)
-    self.assertEqual('Classification classes must be a string Tensor; got '
-                     'Tensor("output-tensor-1:0", shape=(1,), dtype=float32)',
-                     str(e.exception))
-
-  def test_classify_scores_must_be_float(self):
-    scores = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
-    with self.assertRaises(ValueError) as e:
-      export_output_lib.ClassificationOutput(scores=scores)
-    self.assertEqual('Classification scores must be a float32 Tensor; got '
-                     'Tensor("output-tensor-1:0", shape=(1,), dtype=string)',
-                     str(e.exception))
-
-  def test_classify_requires_classes_or_scores(self):
-    with self.assertRaises(ValueError) as e:
-      export_output_lib.ClassificationOutput()
-    self.assertEqual("At least one of scores and classes must be set.",
-                     str(e.exception))
-
-  def test_build_standardized_signature_def_regression(self):
-    input_tensors = {
-        "input-1":
-            array_ops.placeholder(
-                dtypes.string, 1, name="input-tensor-1")
-    }
-    value = array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1")
-
-    export_output = export_output_lib.RegressionOutput(value)
-    actual_signature_def = export_output.as_signature_def(input_tensors)
-
-    expected_signature_def = meta_graph_pb2.SignatureDef()
-    shape = tensor_shape_pb2.TensorShapeProto(
-        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
-    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.REGRESS_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-    expected_signature_def.outputs[
-        signature_constants.REGRESS_OUTPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-1:0",
-                                      dtype=dtype_float,
-                                      tensor_shape=shape))
-
-    expected_signature_def.method_name = signature_constants.REGRESS_METHOD_NAME
-    self.assertEqual(actual_signature_def, expected_signature_def)
-
-  def test_build_standardized_signature_def_classify_classes_only(self):
-    """Tests classification with one output tensor."""
-    input_tensors = {
-        "input-1":
-            array_ops.placeholder(
-                dtypes.string, 1, name="input-tensor-1")
-    }
-    classes = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
-
-    export_output = export_output_lib.ClassificationOutput(classes=classes)
-    actual_signature_def = export_output.as_signature_def(input_tensors)
-
-    expected_signature_def = meta_graph_pb2.SignatureDef()
-    shape = tensor_shape_pb2.TensorShapeProto(
-        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
-    dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-    expected_signature_def.outputs[
-        signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-1:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-
-    expected_signature_def.method_name = (
-        signature_constants.CLASSIFY_METHOD_NAME)
-    self.assertEqual(actual_signature_def, expected_signature_def)
-
-  def test_build_standardized_signature_def_classify_both(self):
-    """Tests multiple output tensors that include classes and scores."""
-    input_tensors = {
-        "input-1":
-            array_ops.placeholder(
-                dtypes.string, 1, name="input-tensor-1")
-    }
-    classes = array_ops.placeholder(dtypes.string, 1,
-                                    name="output-tensor-classes")
-    scores = array_ops.placeholder(dtypes.float32, 1,
-                                   name="output-tensor-scores")
-
-    export_output = export_output_lib.ClassificationOutput(
-        scores=scores, classes=classes)
-    actual_signature_def = export_output.as_signature_def(input_tensors)
-
-    expected_signature_def = meta_graph_pb2.SignatureDef()
-    shape = tensor_shape_pb2.TensorShapeProto(
-        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
-    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-    expected_signature_def.outputs[
-        signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-classes:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-    expected_signature_def.outputs[
-        signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-scores:0",
-                                      dtype=dtype_float,
-                                      tensor_shape=shape))
-
-    expected_signature_def.method_name = (
-        signature_constants.CLASSIFY_METHOD_NAME)
-    self.assertEqual(actual_signature_def, expected_signature_def)
-
-  def test_build_standardized_signature_def_classify_scores_only(self):
-    """Tests classification without classes tensor."""
-    input_tensors = {
-        "input-1":
-            array_ops.placeholder(
-                dtypes.string, 1, name="input-tensor-1")
-    }
-
-    scores = array_ops.placeholder(dtypes.float32, 1,
-                                   name="output-tensor-scores")
-
-    export_output = export_output_lib.ClassificationOutput(
-        scores=scores)
-    actual_signature_def = export_output.as_signature_def(input_tensors)
-
-    expected_signature_def = meta_graph_pb2.SignatureDef()
-    shape = tensor_shape_pb2.TensorShapeProto(
-        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
-    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
-                                      dtype=dtype_string,
-                                      tensor_shape=shape))
-    expected_signature_def.outputs[
-        signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
-            meta_graph_pb2.TensorInfo(name="output-tensor-scores:0",
-                                      dtype=dtype_float,
-                                      tensor_shape=shape))
-
-    expected_signature_def.method_name = (
-        signature_constants.CLASSIFY_METHOD_NAME)
-    self.assertEqual(actual_signature_def, expected_signature_def)
-
-  def test_predict_outputs_valid(self):
-    """Tests that no errors are raised when provided outputs are valid."""
-    outputs = {
-        "output0": constant_op.constant([0]),
-        u"output1": constant_op.constant(["foo"]),
-    }
-    export_output_lib.PredictOutput(outputs)
-
-    # Single Tensor is OK too
-    export_output_lib.PredictOutput(constant_op.constant([0]))
-
-  def test_predict_outputs_invalid(self):
-    with self.assertRaisesRegexp(
-        ValueError,
-        "Prediction output key must be a string"):
-      export_output_lib.PredictOutput({1: constant_op.constant([0])})
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        "Prediction output value must be a Tensor"):
-      export_output_lib.PredictOutput({
-          "prediction1": sparse_tensor.SparseTensor(
-              indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-      })
-
-
-class MockSupervisedOutput(export_output_lib._SupervisedOutput):
-  """So that we can test the abstract class methods directly."""
-
-  def _get_signature_def_fn(self):
-    pass
-
-
-class SupervisedOutputTest(test.TestCase):
-
-  def test_supervised_outputs_valid(self):
-    """Tests that no errors are raised when provided outputs are valid."""
-    loss = {"my_loss": constant_op.constant([0])}
-    predictions = {u"output1": constant_op.constant(["foo"])}
-    metric_obj = metrics_module.Mean()
-    metric_obj.update_state(constant_op.constant([0]))
-    metrics = {
-        "metrics": metric_obj,
-        "metrics2": (constant_op.constant([0]), constant_op.constant([10]))
-    }
-
-    outputter = MockSupervisedOutput(loss, predictions, metrics)
-    self.assertEqual(outputter.loss["loss/my_loss"], loss["my_loss"])
-    self.assertEqual(
-        outputter.predictions["predictions/output1"], predictions["output1"])
-    self.assertEqual(outputter.metrics["metrics/update_op"].name,
-                     "metric_op_wrapper:0")
-    self.assertEqual(
-        outputter.metrics["metrics2/update_op"], metrics["metrics2"][1])
-
-    # Single Tensor is OK too
-    outputter = MockSupervisedOutput(
-        loss["my_loss"], predictions["output1"], metrics["metrics"])
-    self.assertEqual(outputter.loss, {"loss": loss["my_loss"]})
-    self.assertEqual(
-        outputter.predictions, {"predictions": predictions["output1"]})
-    self.assertEqual(outputter.metrics["metrics/update_op"].name,
-                     "metric_op_wrapper_1:0")
-
-  def test_supervised_outputs_none(self):
-    outputter = MockSupervisedOutput(
-        constant_op.constant([0]), None, None)
-    self.assertEqual(len(outputter.loss), 1)
-    self.assertEqual(outputter.predictions, None)
-    self.assertEqual(outputter.metrics, None)
-
-  def test_supervised_outputs_invalid(self):
-    with self.assertRaisesRegexp(ValueError, "predictions output value must"):
-      MockSupervisedOutput(constant_op.constant([0]), [3], None)
-    with self.assertRaisesRegexp(ValueError, "loss output value must"):
-      MockSupervisedOutput("str", None, None)
-    with self.assertRaisesRegexp(ValueError, "metrics output value must"):
-      MockSupervisedOutput(None, None, (15.3, 4))
-    with self.assertRaisesRegexp(ValueError, "loss output key must"):
-      MockSupervisedOutput({25: "Tensor"}, None, None)
-
-  def test_supervised_outputs_tuples(self):
-    """Tests that no errors are raised when provided outputs are valid."""
-    loss = {("my", "loss"): constant_op.constant([0])}
-    predictions = {(u"output1", "2"): constant_op.constant(["foo"])}
-    metric_obj = metrics_module.Mean()
-    metric_obj.update_state(constant_op.constant([0]))
-    metrics = {
-        ("metrics", "1"):
-            metric_obj,
-        ("metrics", "2"): (constant_op.constant([0]),
-                           constant_op.constant([10]))
-    }
-
-    outputter = MockSupervisedOutput(loss, predictions, metrics)
-    self.assertEqual(set(outputter.loss.keys()), set(["loss/my/loss"]))
-    self.assertEqual(set(outputter.predictions.keys()),
-                     set(["predictions/output1/2"]))
-    self.assertEqual(
-        set(outputter.metrics.keys()),
-        set([
-            "metrics/1/value", "metrics/1/update_op", "metrics/2/value",
-            "metrics/2/update_op"
-        ]))
-
-  def test_supervised_outputs_no_prepend(self):
-    """Tests that no errors are raised when provided outputs are valid."""
-    loss = {"loss": constant_op.constant([0])}
-    predictions = {u"predictions": constant_op.constant(["foo"])}
-    metric_obj = metrics_module.Mean()
-    metric_obj.update_state(constant_op.constant([0]))
-    metrics = {
-        "metrics_1": metric_obj,
-        "metrics_2": (constant_op.constant([0]), constant_op.constant([10]))
-    }
-
-    outputter = MockSupervisedOutput(loss, predictions, metrics)
-    self.assertEqual(set(outputter.loss.keys()), set(["loss"]))
-    self.assertEqual(set(outputter.predictions.keys()), set(["predictions"]))
-    self.assertEqual(
-        set(outputter.metrics.keys()),
-        set([
-            "metrics_1/value", "metrics_1/update_op", "metrics_2/update_op",
-            "metrics_2/value"
-        ]))
-
-  def test_train_signature_def(self):
-    loss = {"my_loss": constant_op.constant([0])}
-    predictions = {u"output1": constant_op.constant(["foo"])}
-    metric_obj = metrics_module.Mean()
-    metric_obj.update_state(constant_op.constant([0]))
-    metrics = {
-        "metrics_1": metric_obj,
-        "metrics_2": (constant_op.constant([0]), constant_op.constant([10]))
-    }
-
-    outputter = export_output_lib.TrainOutput(loss, predictions, metrics)
-
-    receiver = {u"features": constant_op.constant(100, shape=(100, 2)),
-                "labels": constant_op.constant(100, shape=(100, 1))}
-    sig_def = outputter.as_signature_def(receiver)
-
-    self.assertTrue("loss/my_loss" in sig_def.outputs)
-    self.assertTrue("metrics_1/value" in sig_def.outputs)
-    self.assertTrue("metrics_2/value" in sig_def.outputs)
-    self.assertTrue("predictions/output1" in sig_def.outputs)
-    self.assertTrue("features" in sig_def.inputs)
-
-  def test_eval_signature_def(self):
-    loss = {"my_loss": constant_op.constant([0])}
-    predictions = {u"output1": constant_op.constant(["foo"])}
-
-    outputter = export_output_lib.EvalOutput(loss, predictions, None)
-
-    receiver = {u"features": constant_op.constant(100, shape=(100, 2)),
-                "labels": constant_op.constant(100, shape=(100, 1))}
-    sig_def = outputter.as_signature_def(receiver)
-
-    self.assertTrue("loss/my_loss" in sig_def.outputs)
-    self.assertFalse("metrics/value" in sig_def.outputs)
-    self.assertTrue("predictions/output1" in sig_def.outputs)
-    self.assertTrue("features" in sig_def.inputs)
-
-  def test_metric_op_is_tensor(self):
-    """Tests that ops.Operation is wrapped by a tensor for metric_ops."""
-    loss = {"my_loss": constant_op.constant([0])}
-    predictions = {u"output1": constant_op.constant(["foo"])}
-    metric_obj = metrics_module.Mean()
-    metric_obj.update_state(constant_op.constant([0]))
-    metrics = {
-        "metrics_1": metric_obj,
-        "metrics_2": (constant_op.constant([0]), control_flow_ops.no_op())
-    }
-
-    outputter = MockSupervisedOutput(loss, predictions, metrics)
-
-    self.assertTrue(outputter.metrics["metrics_1/update_op"].name.startswith(
-        "metric_op_wrapper"))
-    self.assertTrue(
-        isinstance(outputter.metrics["metrics_1/update_op"], ops.Tensor))
-    self.assertTrue(
-        isinstance(outputter.metrics["metrics_1/value"], ops.Tensor))
-
-    self.assertEqual(outputter.metrics["metrics_2/value"],
-                     metrics["metrics_2"][0])
-    self.assertTrue(outputter.metrics["metrics_2/update_op"].name.startswith(
-        "metric_op_wrapper"))
-    self.assertTrue(
-        isinstance(outputter.metrics["metrics_2/update_op"], ops.Tensor))
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py
deleted file mode 100644
index ed3219c49b..0000000000
--- a/tensorflow/python/estimator/export/export_test.py
+++ /dev/null
@@ -1,802 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for export."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-import time
-
-from google.protobuf import text_format
-
-from tensorflow.core.example import example_pb2
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import signature_def_utils
-
-
-class LabeledTensorMock(object):
-  """Mock class emulating LabeledTensor."""
-
-  def __init__(self):
-    self.tensor = constant_op.constant([1])
-
-
-def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs):
-  return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs)
-
-
-ops.register_tensor_conversion_function(LabeledTensorMock,
-                                        _convert_labeled_tensor_mock_to_tensor)
-
-
-class ServingInputReceiverTest(test_util.TensorFlowTestCase):
-
-  def test_serving_input_receiver_constructor(self):
-    """Tests that no errors are raised when input is expected."""
-    features = {
-        "feature0": constant_op.constant([0]),
-        u"feature1": constant_op.constant([1]),
-        "feature2": sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    export.ServingInputReceiver(features, receiver_tensors)
-
-  def test_serving_input_receiver_features_invalid(self):
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-
-    with self.assertRaisesRegexp(ValueError, "features must be defined"):
-      export.ServingInputReceiver(
-          features=None,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(ValueError, "feature keys must be strings"):
-      export.ServingInputReceiver(
-          features={1: constant_op.constant([1])},
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(
-        ValueError, "feature feature1 must be a Tensor or SparseTensor"):
-      export.ServingInputReceiver(
-          features={"feature1": [1]},
-          receiver_tensors=receiver_tensors)
-
-  def test_serving_input_receiver_receiver_tensors_invalid(self):
-    features = {
-        "feature0": constant_op.constant([0]),
-        u"feature1": constant_op.constant([1]),
-        "feature2": sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensors must be defined"):
-      export.ServingInputReceiver(
-          features=features,
-          receiver_tensors=None)
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor keys must be strings"):
-      export.ServingInputReceiver(
-          features=features,
-          receiver_tensors={
-              1: array_ops.placeholder(dtypes.string, name="example0")})
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor example1 must be a Tensor"):
-      export.ServingInputReceiver(
-          features=features,
-          receiver_tensors={"example1": [1]})
-
-  def test_single_feature_single_receiver(self):
-    feature = constant_op.constant(5)
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    input_receiver = export.ServingInputReceiver(
-        feature, receiver_tensor)
-    # single feature is automatically named
-    feature_key, = input_receiver.features.keys()
-    self.assertEqual("feature", feature_key)
-    # single receiver is automatically named
-    receiver_key, = input_receiver.receiver_tensors.keys()
-    self.assertEqual("input", receiver_key)
-
-  def test_multi_feature_single_receiver(self):
-    features = {"foo": constant_op.constant(5),
-                "bar": constant_op.constant(6)}
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    _ = export.ServingInputReceiver(features, receiver_tensor)
-
-  def test_multi_feature_multi_receiver(self):
-    features = {"foo": constant_op.constant(5),
-                "bar": constant_op.constant(6)}
-    receiver_tensors = {"baz": array_ops.placeholder(dtypes.int64),
-                        "qux": array_ops.placeholder(dtypes.float32)}
-    _ = export.ServingInputReceiver(features, receiver_tensors)
-
-  def test_feature_wrong_type(self):
-    feature = "not a tensor"
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    with self.assertRaises(ValueError):
-      _ = export.ServingInputReceiver(feature, receiver_tensor)
-
-  def test_feature_labeled_tensor(self):
-    feature = LabeledTensorMock()
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    _ = export.ServingInputReceiver(feature, receiver_tensor)
-
-  def test_receiver_wrong_type(self):
-    feature = constant_op.constant(5)
-    receiver_tensor = "not a tensor"
-    with self.assertRaises(ValueError):
-      _ = export.ServingInputReceiver(feature, receiver_tensor)
-
-
-class UnsupervisedInputReceiverTest(test_util.TensorFlowTestCase):
-
-  # Since this is basically a wrapper around ServingInputReceiver, we only
-  # have a simple sanity check to ensure that it works.
-
-  def test_unsupervised_input_receiver_constructor(self):
-    """Tests that no errors are raised when input is expected."""
-    features = {
-        "feature0":
-            constant_op.constant([0]),
-        u"feature1":
-            constant_op.constant([1]),
-        "feature2":
-            sparse_tensor.SparseTensor(
-                indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    export.UnsupervisedInputReceiver(features, receiver_tensors)
-
-
-class SupervisedInputReceiverTest(test_util.TensorFlowTestCase):
-
-  def test_input_receiver_constructor(self):
-    """Tests that no errors are raised when input is expected."""
-    features = {
-        "feature0": constant_op.constant([0]),
-        u"feature1": constant_op.constant([1]),
-        "feature2": sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-    labels = {
-        "classes": constant_op.constant([0] * 100),
-    }
-
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    export.SupervisedInputReceiver(features, labels, receiver_tensors)
-
-  def test_input_receiver_raw_values(self):
-    """Tests that no errors are raised when input is expected."""
-    features = {
-        "feature0": constant_op.constant([0]),
-        u"feature1": constant_op.constant([1]),
-        "feature2": sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-
-    labels = {
-        "classes": constant_op.constant([0] * 100),
-    }
-
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    rec = export.SupervisedInputReceiver(
-        features["feature2"], labels, receiver_tensors)
-    self.assertIsInstance(rec.features, sparse_tensor.SparseTensor)
-
-    rec = export.SupervisedInputReceiver(
-        features, labels["classes"], receiver_tensors)
-    self.assertIsInstance(rec.labels, ops.Tensor)
-
-  def test_input_receiver_features_invalid(self):
-    features = constant_op.constant([0] * 100)
-    labels = constant_op.constant([0])
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-
-    with self.assertRaisesRegexp(ValueError, "features must be defined"):
-      export.SupervisedInputReceiver(
-          features=None,
-          labels=labels,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(ValueError, "feature keys must be strings"):
-      export.SupervisedInputReceiver(
-          features={1: constant_op.constant([1])},
-          labels=labels,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(ValueError, "label keys must be strings"):
-      export.SupervisedInputReceiver(
-          features=features,
-          labels={1: constant_op.constant([1])},
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(
-        ValueError, "feature feature1 must be a Tensor or SparseTensor"):
-      export.SupervisedInputReceiver(
-          features={"feature1": [1]},
-          labels=labels,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(
-        ValueError, "feature must be a Tensor or SparseTensor"):
-      export.SupervisedInputReceiver(
-          features=[1],
-          labels=labels,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(
-        ValueError, "label must be a Tensor or SparseTensor"):
-      export.SupervisedInputReceiver(
-          features=features,
-          labels=100,
-          receiver_tensors=receiver_tensors)
-
-  def test_input_receiver_receiver_tensors_invalid(self):
-    features = {
-        "feature0": constant_op.constant([0]),
-        u"feature1": constant_op.constant([1]),
-        "feature2": sparse_tensor.SparseTensor(
-            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
-    }
-    labels = constant_op.constant([0])
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensors must be defined"):
-      export.SupervisedInputReceiver(
-          features=features,
-          labels=labels,
-          receiver_tensors=None)
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor keys must be strings"):
-      export.SupervisedInputReceiver(
-          features=features,
-          labels=labels,
-          receiver_tensors={
-              1: array_ops.placeholder(dtypes.string, name="example0")})
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor example1 must be a Tensor"):
-      export.SupervisedInputReceiver(
-          features=features,
-          labels=labels,
-          receiver_tensors={"example1": [1]})
-
-  def test_single_feature_single_receiver(self):
-    feature = constant_op.constant(5)
-    label = constant_op.constant(5)
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    input_receiver = export.SupervisedInputReceiver(
-        feature, label, receiver_tensor)
-
-    # single receiver is automatically named
-    receiver_key, = input_receiver.receiver_tensors.keys()
-    self.assertEqual("input", receiver_key)
-
-  def test_multi_feature_single_receiver(self):
-    features = {"foo": constant_op.constant(5),
-                "bar": constant_op.constant(6)}
-    labels = {"value": constant_op.constant(5)}
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    _ = export.SupervisedInputReceiver(features, labels, receiver_tensor)
-
-  def test_multi_feature_multi_receiver(self):
-    features = {"foo": constant_op.constant(5),
-                "bar": constant_op.constant(6)}
-    labels = {"value": constant_op.constant(5)}
-    receiver_tensors = {"baz": array_ops.placeholder(dtypes.int64),
-                        "qux": array_ops.placeholder(dtypes.float32)}
-    _ = export.SupervisedInputReceiver(features, labels, receiver_tensors)
-
-  def test_feature_labeled_tensor(self):
-    feature = LabeledTensorMock()
-    label = constant_op.constant(5)
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    _ = export.SupervisedInputReceiver(feature, label, receiver_tensor)
-
-
-class ExportTest(test_util.TensorFlowTestCase):
-
-  def test_build_parsing_serving_input_receiver_fn(self):
-    feature_spec = {"int_feature": parsing_ops.VarLenFeature(dtypes.int64),
-                    "float_feature": parsing_ops.VarLenFeature(dtypes.float32)}
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    with ops.Graph().as_default():
-      serving_input_receiver = serving_input_receiver_fn()
-      self.assertEqual(set(["int_feature", "float_feature"]),
-                       set(serving_input_receiver.features.keys()))
-      self.assertEqual(set(["examples"]),
-                       set(serving_input_receiver.receiver_tensors.keys()))
-
-      example = example_pb2.Example()
-      text_format.Parse("features: { "
-                        "  feature: { "
-                        "    key: 'int_feature' "
-                        "    value: { "
-                        "      int64_list: { "
-                        "        value: [ 21, 2, 5 ] "
-                        "      } "
-                        "    } "
-                        "  } "
-                        "  feature: { "
-                        "    key: 'float_feature' "
-                        "    value: { "
-                        "      float_list: { "
-                        "        value: [ 525.25 ] "
-                        "      } "
-                        "    } "
-                        "  } "
-                        "} ", example)
-
-      with self.cached_session() as sess:
-        sparse_result = sess.run(
-            serving_input_receiver.features,
-            feed_dict={
-                serving_input_receiver.receiver_tensors["examples"].name:
-                [example.SerializeToString()]})
-        self.assertAllEqual([[0, 0], [0, 1], [0, 2]],
-                            sparse_result["int_feature"].indices)
-        self.assertAllEqual([21, 2, 5],
-                            sparse_result["int_feature"].values)
-        self.assertAllEqual([[0, 0]],
-                            sparse_result["float_feature"].indices)
-        self.assertAllEqual([525.25],
-                            sparse_result["float_feature"].values)
-
-  def test_build_raw_serving_input_receiver_fn_name(self):
-    """Test case for issue #12755."""
-    f = {
-        "feature":
-            array_ops.placeholder(
-                name="feature", shape=[32], dtype=dtypes.float32)
-    }
-    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f)
-    v = serving_input_receiver_fn()
-    self.assertTrue(isinstance(v, export.ServingInputReceiver))
-
-  def test_build_raw_serving_input_receiver_fn_without_shape(self):
-    """Test case for issue #21178."""
-    f = {"feature_1": array_ops.placeholder(dtypes.float32),
-         "feature_2": array_ops.placeholder(dtypes.int32)}
-    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f)
-    v = serving_input_receiver_fn()
-    self.assertTrue(isinstance(v, export.ServingInputReceiver))
-    self.assertEqual(
-        tensor_shape.unknown_shape(),
-        v.receiver_tensors["feature_1"].shape)
-    self.assertEqual(
-        tensor_shape.unknown_shape(),
-        v.receiver_tensors["feature_2"].shape)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_raw_serving_input_receiver_fn(self):
-    features = {"feature_1": constant_op.constant(["hello"]),
-                "feature_2": constant_op.constant([42])}
-    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(
-        features)
-    with ops.Graph().as_default():
-      serving_input_receiver = serving_input_receiver_fn()
-      self.assertEqual(set(["feature_1", "feature_2"]),
-                       set(serving_input_receiver.features.keys()))
-      self.assertEqual(set(["feature_1", "feature_2"]),
-                       set(serving_input_receiver.receiver_tensors.keys()))
-      self.assertEqual(
-          dtypes.string,
-          serving_input_receiver.receiver_tensors["feature_1"].dtype)
-      self.assertEqual(
-          dtypes.int32,
-          serving_input_receiver.receiver_tensors["feature_2"].dtype)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_raw_supervised_input_receiver_fn(self):
-    features = {"feature_1": constant_op.constant(["hello"]),
-                "feature_2": constant_op.constant([42])}
-    labels = {"foo": constant_op.constant([5]),
-              "bar": constant_op.constant([6])}
-    input_receiver_fn = export.build_raw_supervised_input_receiver_fn(
-        features, labels)
-    with ops.Graph().as_default():
-      input_receiver = input_receiver_fn()
-      self.assertEqual(set(["feature_1", "feature_2"]),
-                       set(input_receiver.features.keys()))
-      self.assertEqual(set(["foo", "bar"]),
-                       set(input_receiver.labels.keys()))
-      self.assertEqual(set(["feature_1", "feature_2", "foo", "bar"]),
-                       set(input_receiver.receiver_tensors.keys()))
-      self.assertEqual(
-          dtypes.string, input_receiver.receiver_tensors["feature_1"].dtype)
-      self.assertEqual(
-          dtypes.int32, input_receiver.receiver_tensors["feature_2"].dtype)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_raw_supervised_input_receiver_fn_raw_tensors(self):
-    features = {"feature_1": constant_op.constant(["hello"]),
-                "feature_2": constant_op.constant([42])}
-    labels = {"foo": constant_op.constant([5]),
-              "bar": constant_op.constant([6])}
-    input_receiver_fn1 = export.build_raw_supervised_input_receiver_fn(
-        features["feature_1"], labels)
-    input_receiver_fn2 = export.build_raw_supervised_input_receiver_fn(
-        features["feature_1"], labels["foo"])
-    with ops.Graph().as_default():
-      input_receiver = input_receiver_fn1()
-      self.assertIsInstance(input_receiver.features, ops.Tensor)
-      self.assertEqual(set(["foo", "bar"]),
-                       set(input_receiver.labels.keys()))
-      self.assertEqual(set(["input", "foo", "bar"]),
-                       set(input_receiver.receiver_tensors.keys()))
-
-      input_receiver = input_receiver_fn2()
-      self.assertIsInstance(input_receiver.features, ops.Tensor)
-      self.assertIsInstance(input_receiver.labels, ops.Tensor)
-      self.assertEqual(set(["input", "label"]),
-                       set(input_receiver.receiver_tensors.keys()))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_raw_supervised_input_receiver_fn_batch_size(self):
-    features = {"feature_1": constant_op.constant(["hello"]),
-                "feature_2": constant_op.constant([42])}
-    labels = {"foo": constant_op.constant([5]),
-              "bar": constant_op.constant([6])}
-    input_receiver_fn = export.build_raw_supervised_input_receiver_fn(
-        features, labels, default_batch_size=10)
-    with ops.Graph().as_default():
-      input_receiver = input_receiver_fn()
-      self.assertEqual([10], input_receiver.receiver_tensors["feature_1"].shape)
-      self.assertEqual([10], input_receiver.features["feature_1"].shape)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_raw_supervised_input_receiver_fn_overlapping_keys(self):
-    features = {"feature_1": constant_op.constant(["hello"]),
-                "feature_2": constant_op.constant([42])}
-    labels = {"feature_1": constant_op.constant([5]),
-              "bar": constant_op.constant([6])}
-    with self.assertRaises(ValueError):
-      export.build_raw_supervised_input_receiver_fn(features, labels)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_supervised_input_receiver_fn_from_input_fn(self):
-    def dummy_input_fn():
-      return ({"x": constant_op.constant([[1], [1]]),
-               "y": constant_op.constant(["hello", "goodbye"])},
-              constant_op.constant([[1], [1]]))
-
-    input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn(
-        dummy_input_fn)
-
-    with ops.Graph().as_default():
-      input_receiver = input_receiver_fn()
-      self.assertEqual(set(["x", "y"]),
-                       set(input_receiver.features.keys()))
-      self.assertIsInstance(input_receiver.labels, ops.Tensor)
-      self.assertEqual(set(["x", "y", "label"]),
-                       set(input_receiver.receiver_tensors.keys()))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_supervised_input_receiver_fn_from_input_fn_args(self):
-    def dummy_input_fn(feature_key="x"):
-      return ({feature_key: constant_op.constant([[1], [1]]),
-               "y": constant_op.constant(["hello", "goodbye"])},
-              {"my_label": constant_op.constant([[1], [1]])})
-
-    input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn(
-        dummy_input_fn, feature_key="z")
-
-    with ops.Graph().as_default():
-      input_receiver = input_receiver_fn()
-      self.assertEqual(set(["z", "y"]),
-                       set(input_receiver.features.keys()))
-      self.assertEqual(set(["my_label"]),
-                       set(input_receiver.labels.keys()))
-      self.assertEqual(set(["z", "y", "my_label"]),
-                       set(input_receiver.receiver_tensors.keys()))
-
-  def test_build_all_signature_defs_without_receiver_alternatives(self):
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    output_1 = constant_op.constant([1.])
-    output_2 = constant_op.constant(["2"])
-    output_3 = constant_op.constant(["3"])
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            export_output.RegressionOutput(value=output_1),
-        "head-2": export_output.ClassificationOutput(classes=output_2),
-        "head-3": export_output.PredictOutput(outputs={
-            "some_output_3": output_3
-        }),
-    }
-
-    signature_defs = export.build_all_signature_defs(
-        receiver_tensor, export_outputs)
-
-    expected_signature_defs = {
-        "serving_default":
-            signature_def_utils.regression_signature_def(receiver_tensor,
-                                                         output_1),
-        "head-2":
-            signature_def_utils.classification_signature_def(receiver_tensor,
-                                                             output_2, None),
-        "head-3":
-            signature_def_utils.predict_signature_def({
-                "input": receiver_tensor
-            }, {"some_output_3": output_3})
-    }
-
-    self.assertDictEqual(expected_signature_defs, signature_defs)
-
-  def test_build_all_signature_defs_with_dict_alternatives(self):
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    receiver_tensors_alternative_1 = {
-        "foo": array_ops.placeholder(dtypes.int64),
-        "bar": array_ops.sparse_placeholder(dtypes.float32)}
-    receiver_tensors_alternatives = {"other": receiver_tensors_alternative_1}
-    output_1 = constant_op.constant([1.])
-    output_2 = constant_op.constant(["2"])
-    output_3 = constant_op.constant(["3"])
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            export_output.RegressionOutput(value=output_1),
-        "head-2": export_output.ClassificationOutput(classes=output_2),
-        "head-3": export_output.PredictOutput(outputs={
-            "some_output_3": output_3
-        }),
-    }
-
-    signature_defs = export.build_all_signature_defs(
-        receiver_tensor, export_outputs, receiver_tensors_alternatives)
-
-    expected_signature_defs = {
-        "serving_default":
-            signature_def_utils.regression_signature_def(
-                receiver_tensor,
-                output_1),
-        "head-2":
-            signature_def_utils.classification_signature_def(
-                receiver_tensor,
-                output_2, None),
-        "head-3":
-            signature_def_utils.predict_signature_def(
-                {"input": receiver_tensor},
-                {"some_output_3": output_3}),
-        "other:head-3":
-            signature_def_utils.predict_signature_def(
-                receiver_tensors_alternative_1,
-                {"some_output_3": output_3})
-
-        # Note that the alternatives 'other:serving_default' and 'other:head-2'
-        # are invalid, because regession and classification signatures must take
-        # a single string input.  Here we verify that these invalid signatures
-        # are not included in the export.
-    }
-
-    self.assertDictEqual(expected_signature_defs, signature_defs)
-
-  def test_build_all_signature_defs_with_single_alternatives(self):
-    receiver_tensor = array_ops.placeholder(dtypes.string)
-    receiver_tensors_alternative_1 = array_ops.placeholder(dtypes.int64)
-    receiver_tensors_alternative_2 = array_ops.sparse_placeholder(
-        dtypes.float32)
-    # Note we are passing single Tensors as values of
-    # receiver_tensors_alternatives, where normally that is a dict.
-    # In this case a dict will be created using the default receiver tensor
-    # name "input".
-    receiver_tensors_alternatives = {"other1": receiver_tensors_alternative_1,
-                                     "other2": receiver_tensors_alternative_2}
-    output_1 = constant_op.constant([1.])
-    output_2 = constant_op.constant(["2"])
-    output_3 = constant_op.constant(["3"])
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            export_output.RegressionOutput(value=output_1),
-        "head-2": export_output.ClassificationOutput(classes=output_2),
-        "head-3": export_output.PredictOutput(outputs={
-            "some_output_3": output_3
-        }),
-    }
-
-    signature_defs = export.build_all_signature_defs(
-        receiver_tensor, export_outputs, receiver_tensors_alternatives)
-
-    expected_signature_defs = {
-        "serving_default":
-            signature_def_utils.regression_signature_def(
-                receiver_tensor,
-                output_1),
-        "head-2":
-            signature_def_utils.classification_signature_def(
-                receiver_tensor,
-                output_2, None),
-        "head-3":
-            signature_def_utils.predict_signature_def(
-                {"input": receiver_tensor},
-                {"some_output_3": output_3}),
-        "other1:head-3":
-            signature_def_utils.predict_signature_def(
-                {"input": receiver_tensors_alternative_1},
-                {"some_output_3": output_3}),
-        "other2:head-3":
-            signature_def_utils.predict_signature_def(
-                {"input": receiver_tensors_alternative_2},
-                {"some_output_3": output_3})
-
-        # Note that the alternatives 'other:serving_default' and 'other:head-2'
-        # are invalid, because regession and classification signatures must take
-        # a single string input.  Here we verify that these invalid signatures
-        # are not included in the export.
-    }
-
-    self.assertDictEqual(expected_signature_defs, signature_defs)
-
-  def test_build_all_signature_defs_export_outputs_required(self):
-    receiver_tensor = constant_op.constant(["11"])
-
-    with self.assertRaises(ValueError) as e:
-      export.build_all_signature_defs(receiver_tensor, None)
-
-    self.assertTrue(str(e.exception).startswith(
-        "export_outputs must be a dict"))
-
-  def test_get_timestamped_export_dir(self):
-    export_dir_base = tempfile.mkdtemp() + "export/"
-    export_dir_1 = export.get_timestamped_export_dir(
-        export_dir_base)
-    time.sleep(2)
-    export_dir_2 = export.get_timestamped_export_dir(
-        export_dir_base)
-    time.sleep(2)
-    export_dir_3 = export.get_timestamped_export_dir(
-        export_dir_base)
-
-    # Export directories should be named using a timestamp that is seconds
-    # since epoch.  Such a timestamp is 10 digits long.
-    time_1 = os.path.basename(export_dir_1)
-    self.assertEqual(10, len(time_1))
-    time_2 = os.path.basename(export_dir_2)
-    self.assertEqual(10, len(time_2))
-    time_3 = os.path.basename(export_dir_3)
-    self.assertEqual(10, len(time_3))
-
-    self.assertTrue(int(time_1) < int(time_2))
-    self.assertTrue(int(time_2) < int(time_3))
-
-  def test_build_all_signature_defs_serving_only(self):
-    receiver_tensor = {"input": array_ops.placeholder(dtypes.string)}
-    output_1 = constant_op.constant([1.])
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            export_output.PredictOutput(outputs=output_1),
-        "train": export_output.TrainOutput(loss=output_1),
-    }
-
-    signature_defs = export.build_all_signature_defs(
-        receiver_tensor, export_outputs)
-
-    expected_signature_defs = {
-        "serving_default": signature_def_utils.predict_signature_def(
-            receiver_tensor, {"output": output_1})
-    }
-
-    self.assertDictEqual(expected_signature_defs, signature_defs)
-
-    signature_defs = export.build_all_signature_defs(
-        receiver_tensor, export_outputs, serving_only=False)
-
-    expected_signature_defs.update({
-        "train": signature_def_utils.supervised_train_signature_def(
-            receiver_tensor, loss={"loss": output_1})
-    })
-
-    self.assertDictEqual(expected_signature_defs, signature_defs)
-
-
-class TensorServingReceiverTest(test_util.TensorFlowTestCase):
-
-  def test_tensor_serving_input_receiver_constructor(self):
-    features = constant_op.constant([0])
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    r = export.TensorServingInputReceiver(features, receiver_tensors)
-    self.assertTrue(isinstance(r.features, ops.Tensor))
-    self.assertTrue(isinstance(r.receiver_tensors, dict))
-
-  def test_tensor_serving_input_receiver_sparse(self):
-    features = sparse_tensor.SparseTensor(
-        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-    r = export.TensorServingInputReceiver(features, receiver_tensors)
-    self.assertTrue(isinstance(r.features, sparse_tensor.SparseTensor))
-    self.assertTrue(isinstance(r.receiver_tensors, dict))
-
-  def test_serving_input_receiver_features_invalid(self):
-    receiver_tensors = {
-        "example0": array_ops.placeholder(dtypes.string, name="example0"),
-        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
-    }
-
-    with self.assertRaisesRegexp(ValueError, "features must be defined"):
-      export.TensorServingInputReceiver(
-          features=None,
-          receiver_tensors=receiver_tensors)
-
-    with self.assertRaisesRegexp(ValueError, "feature must be a Tensor"):
-      export.TensorServingInputReceiver(
-          features={"1": constant_op.constant([1])},
-          receiver_tensors=receiver_tensors)
-
-  def test_serving_input_receiver_receiver_tensors_invalid(self):
-    features = constant_op.constant([0])
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensors must be defined"):
-      export.TensorServingInputReceiver(
-          features=features,
-          receiver_tensors=None)
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor keys must be strings"):
-      export.TensorServingInputReceiver(
-          features=features,
-          receiver_tensors={
-              1: array_ops.placeholder(dtypes.string, name="example0")})
-
-    with self.assertRaisesRegexp(
-        ValueError, "receiver_tensor example1 must be a Tensor"):
-      export.TensorServingInputReceiver(
-          features=features,
-          receiver_tensors={"example1": [1]})
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py
index b18212cfcd..7e14da56bc 100644
--- a/tensorflow/python/estimator/exporter.py
+++ b/tensorflow/python/estimator/exporter.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,495 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""`Exporter` class represents different flavors of model export."""
+"""exporter python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
-import os
-
-from tensorflow.python.estimator import gc
-from tensorflow.python.estimator import util
-from tensorflow.python.estimator.canned import metric_keys
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.summary import summary_iterator
-from tensorflow.python.util.tf_export import estimator_export
-
-
-@estimator_export('estimator.Exporter')
-class Exporter(object):
-  """A class representing a type of model export."""
-
-  @abc.abstractproperty
-  def name(self):
-    """Directory name.
-
-    A directory name under the export base directory where exports of
-    this type are written.  Should not be `None` nor empty.
-    """
-    pass
-
-  @abc.abstractmethod
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    """Exports the given `Estimator` to a specific format.
-
-    Args:
-      estimator: the `Estimator` to export.
-      export_path: A string containing a directory where to write the export.
-      checkpoint_path: The checkpoint path to export.
-      eval_result: The output of `Estimator.evaluate` on this checkpoint.
-      is_the_final_export: This boolean is True when this is an export in the
-        end of training.  It is False for the intermediate exports during
-        the training.
-        When passing `Exporter` to `tf.estimator.train_and_evaluate`
-        `is_the_final_export` is always False if `TrainSpec.max_steps` is
-        `None`.
-
-    Returns:
-      The string path to the exported directory or `None` if export is skipped.
-    """
-    pass
-
-
-class _SavedModelExporter(Exporter):
-  """This class exports the serving graph and checkpoints.
-
-     This class provides a basic exporting functionality and serves as a
-     foundation for specialized `Exporter`s.
-  """
-
-  def __init__(self,
-               name,
-               serving_input_receiver_fn,
-               assets_extra=None,
-               as_text=False,
-               strip_default_attrs=True):
-    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
-
-    Args:
-      name: unique name of this `Exporter` that is going to be used in the
-        export path.
-      serving_input_receiver_fn: a function that takes no arguments and returns
-        a `ServingInputReceiver`.
-      assets_extra: An optional dict specifying how to populate the assets.extra
-        directory within the exported SavedModel.  Each key should give the
-        destination path (including the filename) relative to the assets.extra
-        directory.  The corresponding value gives the full path of the source
-        file to be copied.  For example, the simple case of copying a single
-        file without renaming it is specified as
-        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-      as_text: whether to write the SavedModel proto in text format. Defaults to
-        `False`.
-      strip_default_attrs: Boolean. If set, default attrs in the `GraphDef` will
-        be stripped on write. This is the default behavior and recommended for
-        better forward compatibility of the resulting `SavedModel`.
-
-    Raises:
-      ValueError: if any arguments is invalid.
-    """
-    self._name = name
-    self._serving_input_receiver_fn = serving_input_receiver_fn
-    self._assets_extra = assets_extra
-    self._as_text = as_text
-    self._strip_default_attrs = strip_default_attrs
-
-  @property
-  def name(self):
-    return self._name
-
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    del is_the_final_export
-
-    export_result = estimator.export_savedmodel(
-        export_path,
-        self._serving_input_receiver_fn,
-        assets_extra=self._assets_extra,
-        as_text=self._as_text,
-        checkpoint_path=checkpoint_path,
-        strip_default_attrs=self._strip_default_attrs)
-
-    return export_result
-
-
-def _loss_smaller(best_eval_result, current_eval_result):
-  """Compares two evaluation results and returns true if the 2nd one is smaller.
-
-  Both evaluation results should have the values for MetricKeys.LOSS, which are
-  used for comparison.
-
-  Args:
-    best_eval_result: best eval metrics.
-    current_eval_result: current eval metrics.
-
-  Returns:
-    True if the loss of current_eval_result is smaller; otherwise, False.
-
-  Raises:
-    ValueError: If input eval result is None or no loss is available.
-  """
-  default_key = metric_keys.MetricKeys.LOSS
-  if not best_eval_result or default_key not in best_eval_result:
-    raise ValueError(
-        'best_eval_result cannot be empty or no loss is found in it.')
-
-  if not current_eval_result or default_key not in current_eval_result:
-    raise ValueError(
-        'current_eval_result cannot be empty or no loss is found in it.')
-
-  return best_eval_result[default_key] > current_eval_result[default_key]
-
-
-def _verify_compare_fn_args(compare_fn):
-  """Verifies compare_fn arguments."""
-  args = set(util.fn_args(compare_fn))
-  if 'best_eval_result' not in args:
-    raise ValueError(
-        'compare_fn (%s) must include best_eval_result argument.' % compare_fn)
-  if 'current_eval_result' not in args:
-    raise ValueError(
-        'compare_fn (%s) must include current_eval_result argument.' %
-        compare_fn)
-  non_valid_args = list(args - set(['best_eval_result', 'current_eval_result']))
-  if non_valid_args:
-    raise ValueError('compare_fn (%s) has following not expected args: %s' %
-                     (compare_fn, non_valid_args))
-
-
-@estimator_export('estimator.BestExporter')
-class BestExporter(Exporter):
-  """This class exports the serving graph and checkpoints of the best models.
-
-  This class performs a model export everytime when the new model is better
-  than any exsiting model.
-  """
-
-  def __init__(self,
-               name='best_exporter',
-               serving_input_receiver_fn=None,
-               event_file_pattern='eval/*.tfevents.*',
-               compare_fn=_loss_smaller,
-               assets_extra=None,
-               as_text=False,
-               exports_to_keep=5):
-    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
-
-    Example of creating a BestExporter for training and evluation:
-    ```python
-    def make_train_and_eval_fn():
-      # Set up feature columns.
-      categorial_feature_a = (
-          tf.feature_column.categorical_column_with_hash_bucket(...))
-      categorial_feature_a_emb = embedding_column(
-          categorical_column=categorial_feature_a, ...)
-      ...  # other feature columns
-
-      estimator = tf.estimator.DNNClassifier(
-          config=tf.estimator.RunConfig(
-              model_dir='/my_model', save_summary_steps=100),
-          feature_columns=[categorial_feature_a_emb, ...],
-          hidden_units=[1024, 512, 256])
-
-      serving_feature_spec = tf.feature_column.make_parse_example_spec(
-          categorial_feature_a_emb)
-      serving_input_receiver_fn = (
-          tf.estimator.export.build_parsing_serving_input_receiver_fn(
-          serving_feature_spec))
-
-      exporter = tf.estimator.BestExporter(
-          name="best_exporter",
-          serving_input_receiver_fn=serving_input_receiver_fn,
-          exports_to_keep=5)
-
-      train_spec = tf.estimator.TrainSpec(...)
-
-      eval_spec = [tf.estimator.EvalSpec(
-        input_fn=eval_input_fn,
-        steps=100,
-        exporters=exporter,
-        start_delay_secs=0,
-        throttle_secs=5)]
-
-      return tf.estimator.DistributedTrainingSpec(estimator, train_spec,
-                                                  eval_spec)
-    ```
-
-    Args:
-      name: unique name of this `Exporter` that is going to be used in the
-        export path.
-      serving_input_receiver_fn: a function that takes no arguments and returns
-        a `ServingInputReceiver`.
-      event_file_pattern: event file name pattern relative to model_dir. If
-        None, however, the exporter would not be preemption-safe. To be
-        preemption-safe, event_file_pattern should be specified.
-      compare_fn: a function that compares two evaluation results and returns
-        true if current evaluation result is better. Follows the signature:
-        * Args:
-          * `best_eval_result`: This is the evaluation result of the best model.
-          * `current_eval_result`: This is the evaluation result of current
-                 candidate model.
-        * Returns:
-          True if current evaluation result is better; otherwise, False.
-      assets_extra: An optional dict specifying how to populate the assets.extra
-        directory within the exported SavedModel.  Each key should give the
-        destination path (including the filename) relative to the assets.extra
-        directory.  The corresponding value gives the full path of the source
-        file to be copied.  For example, the simple case of copying a single
-        file without renaming it is specified as `{'my_asset_file.txt':
-        '/path/to/my_asset_file.txt'}`.
-      as_text: whether to write the SavedModel proto in text format. Defaults to
-        `False`.
-      exports_to_keep: Number of exports to keep.  Older exports will be
-        garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
-        collection.
-
-    Raises:
-      ValueError: if any arguments is invalid.
-    """
-    self._compare_fn = compare_fn
-    if self._compare_fn is None:
-      raise ValueError('`compare_fn` must not be None.')
-    _verify_compare_fn_args(self._compare_fn)
-
-    self._saved_model_exporter = _SavedModelExporter(
-        name, serving_input_receiver_fn, assets_extra, as_text)
-
-    self._event_file_pattern = event_file_pattern
-    self._model_dir = None
-    self._best_eval_result = None
-
-    self._exports_to_keep = exports_to_keep
-    if exports_to_keep is not None and exports_to_keep <= 0:
-      raise ValueError(
-          '`exports_to_keep`, if provided, must be positive number')
-
-  @property
-  def name(self):
-    return self._saved_model_exporter.name
-
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    export_result = None
-
-    if self._model_dir != estimator.model_dir and self._event_file_pattern:
-      # Loads best metric from event files.
-      tf_logging.info('Loading best metric from event files.')
-
-      self._model_dir = estimator.model_dir
-      full_event_file_pattern = os.path.join(self._model_dir,
-                                             self._event_file_pattern)
-      self._best_eval_result = self._get_best_eval_result(
-          full_event_file_pattern)
-
-    if self._best_eval_result is None or self._compare_fn(
-        best_eval_result=self._best_eval_result,
-        current_eval_result=eval_result):
-      tf_logging.info('Performing best model export.')
-      self._best_eval_result = eval_result
-      export_result = self._saved_model_exporter.export(
-          estimator, export_path, checkpoint_path, eval_result,
-          is_the_final_export)
-      self._garbage_collect_exports(export_path)
-
-    return export_result
-
-  def _garbage_collect_exports(self, export_dir_base):
-    """Deletes older exports, retaining only a given number of the most recent.
-
-    Export subdirectories are assumed to be named with monotonically increasing
-    integers; the most recent are taken to be those with the largest values.
-
-    Args:
-      export_dir_base: the base directory under which each export is in a
-        versioned subdirectory.
-    """
-    if self._exports_to_keep is None:
-      return
-
-    def _export_version_parser(path):
-      # create a simple parser that pulls the export_version from the directory.
-      filename = os.path.basename(path.path)
-      if not (len(filename) == 10 and filename.isdigit()):
-        return None
-      return path._replace(export_version=int(filename))
-
-    # pylint: disable=protected-access
-    keep_filter = gc._largest_export_versions(self._exports_to_keep)
-    delete_filter = gc._negation(keep_filter)
-    for p in delete_filter(
-        gc._get_paths(export_dir_base, parser=_export_version_parser)):
-      try:
-        gfile.DeleteRecursively(p.path)
-      except errors_impl.NotFoundError as e:
-        tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
-    # pylint: enable=protected-access
-
-  def _get_best_eval_result(self, event_files):
-    """Get the best eval result from event files.
-
-    Args:
-      event_files: Absolute pattern of event files.
-
-    Returns:
-      The best eval result.
-    """
-    if not event_files:
-      return None
-
-    best_eval_result = None
-    for event_file in gfile.Glob(os.path.join(event_files)):
-      for event in summary_iterator.summary_iterator(event_file):
-        if event.HasField('summary'):
-          event_eval_result = {}
-          for value in event.summary.value:
-            if value.HasField('simple_value'):
-              event_eval_result[value.tag] = value.simple_value
-          if event_eval_result:
-            if best_eval_result is None or self._compare_fn(
-                best_eval_result, event_eval_result):
-              best_eval_result = event_eval_result
-    return best_eval_result
-
-
-@estimator_export('estimator.FinalExporter')
-class FinalExporter(Exporter):
-  """This class exports the serving graph and checkpoints in the end.
-
-  This class performs a single export in the end of training.
-  """
-
-  def __init__(self,
-               name,
-               serving_input_receiver_fn,
-               assets_extra=None,
-               as_text=False):
-    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
-
-    Args:
-      name: unique name of this `Exporter` that is going to be used in the
-        export path.
-      serving_input_receiver_fn: a function that takes no arguments and returns
-        a `ServingInputReceiver`.
-      assets_extra: An optional dict specifying how to populate the assets.extra
-        directory within the exported SavedModel.  Each key should give the
-        destination path (including the filename) relative to the assets.extra
-        directory.  The corresponding value gives the full path of the source
-        file to be copied.  For example, the simple case of copying a single
-        file without renaming it is specified as
-        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-      as_text: whether to write the SavedModel proto in text format. Defaults to
-        `False`.
-
-    Raises:
-      ValueError: if any arguments is invalid.
-    """
-    self._saved_model_exporter = _SavedModelExporter(
-        name, serving_input_receiver_fn, assets_extra, as_text)
-
-  @property
-  def name(self):
-    return self._saved_model_exporter.name
-
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    if not is_the_final_export:
-      return None
-
-    tf_logging.info('Performing the final export in the end of training.')
-
-    return self._saved_model_exporter.export(estimator, export_path,
-                                             checkpoint_path, eval_result,
-                                             is_the_final_export)
-
-
-@estimator_export('estimator.LatestExporter')
-class LatestExporter(Exporter):
-  """This class regularly exports the serving graph and checkpoints.
-
-  In addition to exporting, this class also garbage collects stale exports.
-  """
-
-  def __init__(self,
-               name,
-               serving_input_receiver_fn,
-               assets_extra=None,
-               as_text=False,
-               exports_to_keep=5):
-    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
-
-    Args:
-      name: unique name of this `Exporter` that is going to be used in the
-        export path.
-      serving_input_receiver_fn: a function that takes no arguments and returns
-        a `ServingInputReceiver`.
-      assets_extra: An optional dict specifying how to populate the assets.extra
-        directory within the exported SavedModel.  Each key should give the
-        destination path (including the filename) relative to the assets.extra
-        directory.  The corresponding value gives the full path of the source
-        file to be copied.  For example, the simple case of copying a single
-        file without renaming it is specified as
-        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
-      as_text: whether to write the SavedModel proto in text format. Defaults to
-        `False`.
-      exports_to_keep: Number of exports to keep.  Older exports will be
-        garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
-        collection.
-
-    Raises:
-      ValueError: if any arguments is invalid.
-    """
-    self._saved_model_exporter = _SavedModelExporter(
-        name, serving_input_receiver_fn, assets_extra, as_text)
-    self._exports_to_keep = exports_to_keep
-    if exports_to_keep is not None and exports_to_keep <= 0:
-      raise ValueError(
-          '`exports_to_keep`, if provided, must be positive number')
-
-  @property
-  def name(self):
-    return self._saved_model_exporter.name
-
-  def export(self, estimator, export_path, checkpoint_path, eval_result,
-             is_the_final_export):
-    export_result = self._saved_model_exporter.export(
-        estimator, export_path, checkpoint_path, eval_result,
-        is_the_final_export)
-
-    self._garbage_collect_exports(export_path)
-    return export_result
-
-  def _garbage_collect_exports(self, export_dir_base):
-    """Deletes older exports, retaining only a given number of the most recent.
-
-    Export subdirectories are assumed to be named with monotonically increasing
-    integers; the most recent are taken to be those with the largest values.
-
-    Args:
-      export_dir_base: the base directory under which each export is in a
-        versioned subdirectory.
-    """
-    if self._exports_to_keep is None:
-      return
+from tensorflow_estimator.python.estimator import exporter
 
-    def _export_version_parser(path):
-      # create a simple parser that pulls the export_version from the directory.
-      filename = os.path.basename(path.path)
-      if not (len(filename) == 10 and filename.isdigit()):
-        return None
-      return path._replace(export_version=int(filename))
+# Include attrs that start with single underscore.
+exporter.__all__ = [s for s in dir(exporter) if not s.startswith('__')]
 
-    # pylint: disable=protected-access
-    keep_filter = gc._largest_export_versions(self._exports_to_keep)
-    delete_filter = gc._negation(keep_filter)
-    for p in delete_filter(
-        gc._get_paths(export_dir_base, parser=_export_version_parser)):
-      try:
-        gfile.DeleteRecursively(p.path)
-      except errors_impl.NotFoundError as e:
-        tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
-    # pylint: enable=protected-access
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.exporter import *
diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py
deleted file mode 100644
index fcccfbde7a..0000000000
--- a/tensorflow/python/estimator/exporter_test.py
+++ /dev/null
@@ -1,400 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for `Exporter`s."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-import time
-
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import exporter as exporter_lib
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import compat
-
-
-class BestExporterTest(test.TestCase):
-
-  def test_error_out_if_exports_to_keep_is_zero(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    with self.assertRaisesRegexp(ValueError, "positive number"):
-      exporter = exporter_lib.BestExporter(
-          name="best_exporter",
-          serving_input_receiver_fn=_serving_input_receiver_fn,
-          exports_to_keep=0)
-      self.assertEqual("best_exporter", exporter.name)
-
-  def test_best_exporter(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    exporter = exporter_lib.BestExporter(
-        name="best_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        exports_to_keep=5)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-    estimator.model_dir = export_dir_base
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {}, False)
-
-    self.assertEqual("export_result_path", export_result)
-    estimator.export_savedmodel.assert_called_with(
-        export_dir_base,
-        _serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        checkpoint_path="checkpoint_path",
-        strip_default_attrs=True)
-
-  def test_best_export_is_saved(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    exporter = exporter_lib.BestExporter(
-        name="best_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        exports_to_keep=1)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-    estimator.model_dir = export_dir_base
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 0.5}, False)
-
-    self.assertTrue(estimator.export_savedmodel.called)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 0.6}, False)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 0.4}, False)
-    self.assertEqual("export_result_path", export_result)
-
-  def test_best_exporter_with_preemption(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
-    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 50}, 1)
-    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2)
-
-    exporter = exporter_lib.BestExporter(
-        name="best_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        event_file_pattern="eval_continuous/*.tfevents.*",
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        exports_to_keep=1)
-
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.model_dir = export_dir_base
-    estimator.export_savedmodel.return_value = "export_result_path"
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 100}, False)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 10}, False)
-    self.assertEqual("export_result_path", export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 20}, False)
-    self.assertEqual(None, export_result)
-
-  def test_best_exporter_with_empty_event(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
-    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1)
-    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2)
-
-    exporter = exporter_lib.BestExporter(
-        name="best_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        event_file_pattern="eval_continuous/*.tfevents.*",
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        exports_to_keep=1)
-
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.model_dir = export_dir_base
-    estimator.export_savedmodel.return_value = "export_result_path"
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 100}, False)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {"loss": 10}, False)
-    self.assertEqual("export_result_path", export_result)
-
-  def test_garbage_collect_exports(self):
-    export_dir_base = tempfile.mkdtemp()
-    gfile.MkDir(export_dir_base)
-    gfile.MkDir(export_dir_base + "/export")
-    gfile.MkDir(export_dir_base + "/eval")
-
-    export_dir_1 = _create_test_export_dir(export_dir_base)
-    export_dir_2 = _create_test_export_dir(export_dir_base)
-    export_dir_3 = _create_test_export_dir(export_dir_base)
-    export_dir_4 = _create_test_export_dir(export_dir_base)
-
-    self.assertTrue(gfile.Exists(export_dir_1))
-    self.assertTrue(gfile.Exists(export_dir_2))
-    self.assertTrue(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-    def _serving_input_receiver_fn():
-      return array_ops.constant([1]), None
-
-    exporter = exporter_lib.BestExporter(
-        name="best_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        exports_to_keep=2)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.model_dir = export_dir_base
-    # Garbage collect all but the most recent 2 exports,
-    # where recency is determined based on the timestamp directory names.
-    exporter.export(estimator, export_dir_base, None, None, False)
-
-    self.assertFalse(gfile.Exists(export_dir_1))
-    self.assertFalse(gfile.Exists(export_dir_2))
-    self.assertTrue(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-
-class LatestExporterTest(test.TestCase):
-
-  def test_error_out_if_exports_to_keep_is_zero(self):
-    def _serving_input_receiver_fn():
-      pass
-
-    with self.assertRaisesRegexp(ValueError, "positive number"):
-      exporter = exporter_lib.LatestExporter(
-          name="latest_exporter",
-          serving_input_receiver_fn=_serving_input_receiver_fn,
-          exports_to_keep=0)
-      self.assertEqual("latest_exporter", exporter.name)
-
-  def test_latest_exporter(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp() + "export/"
-    gfile.MkDir(export_dir_base)
-
-    exporter = exporter_lib.LatestExporter(
-        name="latest_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        exports_to_keep=5)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {}, False)
-
-    self.assertEqual("export_result_path", export_result)
-    estimator.export_savedmodel.assert_called_with(
-        export_dir_base,
-        _serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        checkpoint_path="checkpoint_path",
-        strip_default_attrs=True)
-
-  def test_only_the_last_export_is_saved(self):
-
-    def _serving_input_receiver_fn():
-      pass
-
-    export_dir_base = tempfile.mkdtemp() + "export/"
-    gfile.MkDir(export_dir_base)
-
-    exporter = exporter_lib.FinalExporter(
-        name="latest_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    estimator.export_savedmodel.return_value = "export_result_path"
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {}, False)
-
-    self.assertFalse(estimator.export_savedmodel.called)
-    self.assertEqual(None, export_result)
-
-    export_result = exporter.export(estimator, export_dir_base,
-                                    "checkpoint_path", {}, True)
-
-    self.assertEqual("export_result_path", export_result)
-    estimator.export_savedmodel.assert_called_with(
-        export_dir_base,
-        _serving_input_receiver_fn,
-        assets_extra={"from/path": "to/path"},
-        as_text=False,
-        checkpoint_path="checkpoint_path",
-        strip_default_attrs=True)
-
-  def test_garbage_collect_exports(self):
-    export_dir_base = tempfile.mkdtemp() + "export/"
-    gfile.MkDir(export_dir_base)
-    export_dir_1 = _create_test_export_dir(export_dir_base)
-    export_dir_2 = _create_test_export_dir(export_dir_base)
-    export_dir_3 = _create_test_export_dir(export_dir_base)
-    export_dir_4 = _create_test_export_dir(export_dir_base)
-
-    self.assertTrue(gfile.Exists(export_dir_1))
-    self.assertTrue(gfile.Exists(export_dir_2))
-    self.assertTrue(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-    def _serving_input_receiver_fn():
-      return array_ops.constant([1]), None
-
-    exporter = exporter_lib.LatestExporter(
-        name="latest_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        exports_to_keep=2)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    # Garbage collect all but the most recent 2 exports,
-    # where recency is determined based on the timestamp directory names.
-    exporter.export(estimator, export_dir_base, None, None, False)
-
-    self.assertFalse(gfile.Exists(export_dir_1))
-    self.assertFalse(gfile.Exists(export_dir_2))
-    self.assertTrue(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-  def test_garbage_collect_exports_with_trailing_delimiter(self):
-    export_dir_base = tempfile.mkdtemp() + "export/"
-    gfile.MkDir(export_dir_base)
-    export_dir_1 = _create_test_export_dir(export_dir_base)
-    export_dir_2 = _create_test_export_dir(export_dir_base)
-    export_dir_3 = _create_test_export_dir(export_dir_base)
-    export_dir_4 = _create_test_export_dir(export_dir_base)
-
-    self.assertTrue(gfile.Exists(export_dir_1))
-    self.assertTrue(gfile.Exists(export_dir_2))
-    self.assertTrue(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-    def _serving_input_receiver_fn():
-      return array_ops.constant([1]), None
-
-    exporter = exporter_lib.LatestExporter(
-        name="latest_exporter",
-        serving_input_receiver_fn=_serving_input_receiver_fn,
-        exports_to_keep=1)
-    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
-    # Garbage collect all but the most recent 2 exports,
-    # where recency is determined based on the timestamp directory names.
-    with test.mock.patch.object(gfile, "ListDirectory") as mock_list_directory:
-      mock_list_directory.return_value = [
-          os.path.basename(export_dir_1) + b"/",
-          os.path.basename(export_dir_2) + b"/",
-          os.path.basename(export_dir_3) + b"/",
-          os.path.basename(export_dir_4) + b"/",
-          ]
-      exporter.export(estimator, export_dir_base, None, None, False)
-
-    self.assertFalse(gfile.Exists(export_dir_1))
-    self.assertFalse(gfile.Exists(export_dir_2))
-    self.assertFalse(gfile.Exists(export_dir_3))
-    self.assertTrue(gfile.Exists(export_dir_4))
-
-
-def _create_test_export_dir(export_dir_base):
-  export_dir = _get_timestamped_export_dir(export_dir_base)
-  gfile.MkDir(export_dir)
-  time.sleep(2)
-  return export_dir
-
-
-def _get_timestamped_export_dir(export_dir_base):
-  # When we create a timestamped directory, there is a small chance that the
-  # directory already exists because another worker is also writing exports.
-  # In this case we just wait one second to get a new timestamp and try again.
-  # If this fails several times in a row, then something is seriously wrong.
-  max_directory_creation_attempts = 10
-
-  attempts = 0
-  while attempts < max_directory_creation_attempts:
-    export_timestamp = int(time.time())
-
-    export_dir = os.path.join(
-        compat.as_bytes(export_dir_base), compat.as_bytes(
-            str(export_timestamp)))
-    if not gfile.Exists(export_dir):
-      # Collisions are still possible (though extremely unlikely): this
-      # directory is not actually created yet, but it will be almost
-      # instantly on return from this function.
-      return export_dir
-    time.sleep(1)
-    attempts += 1
-    logging.warn(
-        "Export directory {} already exists; retrying (attempt {}/{})".format(
-            export_dir, attempts, max_directory_creation_attempts))
-  raise RuntimeError("Failed to obtain a unique export directory name after "
-                     "{} attempts.".format(max_directory_creation_attempts))
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/gc.py b/tensorflow/python/estimator/gc.py
index 03ad33dd6b..10db0a1e2a 100644
--- a/tensorflow/python/estimator/gc.py
+++ b/tensorflow/python/estimator/gc.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,200 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""gc python module.
 
-r"""System for specifying garbage collection (GC) of path based data.
-
-This framework allows for GC of data specified by path names, for example files
-on disk.  gc.Path objects each represent a single item stored at a path and may
-be a base directory,
-  /tmp/exports/0/...
-  /tmp/exports/1/...
-  ...
-or a fully qualified file,
-  /tmp/train-1.ckpt
-  /tmp/train-2.ckpt
-  ...
-
-A gc filter function takes and returns a list of gc.Path items.  Filter
-functions are responsible for selecting Path items for preservation or deletion.
-Note that functions should always return a sorted list.
-
-For example,
-  base_dir = "/tmp"
-  # Create the directories.
-  for e in xrange(10):
-    os.mkdir("%s/%d" % (base_dir, e), 0o755)
-
-  # Create a simple parser that pulls the export_version from the directory.
-  path_regex = "^" + re.escape(base_dir) + "/(\\d+)$"
-  def parser(path):
-    match = re.match(path_regex, path.path)
-    if not match:
-      return None
-    return path._replace(export_version=int(match.group(1)))
-
-  path_list = gc._get_paths("/tmp", parser)  # contains all ten Paths
-
-  every_fifth = gc._mod_export_version(5)
-  print(every_fifth(path_list))  # shows ["/tmp/0", "/tmp/5"]
-
-  largest_three = gc.largest_export_versions(3)
-  print(largest_three(all_paths))  # shows ["/tmp/7", "/tmp/8", "/tmp/9"]
-
-  both = gc._union(every_fifth, largest_three)
-  print(both(all_paths))  # shows ["/tmp/0", "/tmp/5",
-                          #        "/tmp/7", "/tmp/8", "/tmp/9"]
-  # Delete everything not in 'both'.
-  to_delete = gc._negation(both)
-  for p in to_delete(all_paths):
-    gfile.DeleteRecursively(p.path)  # deletes:  "/tmp/1", "/tmp/2",
-                                     # "/tmp/3", "/tmp/4", "/tmp/6",
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import heapq
-import math
-import os
-
-from tensorflow.python.platform import gfile
-from tensorflow.python.util import compat
-
-Path = collections.namedtuple('Path', 'path export_version')
-
-
-def _largest_export_versions(n):
-  """Creates a filter that keeps the largest n export versions.
-
-  Args:
-    n: number of versions to keep.
-
-  Returns:
-    A filter function that keeps the n largest paths.
-  """
-  def keep(paths):
-    heap = []
-    for idx, path in enumerate(paths):
-      if path.export_version is not None:
-        heapq.heappush(heap, (path.export_version, idx))
-    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
-    return sorted(keepers)
-
-  return keep
-
-
-def _one_of_every_n_export_versions(n):
-  """Creates a filter that keeps one of every n export versions.
-
-  Args:
-    n: interval size.
-
-  Returns:
-    A filter function that keeps exactly one path from each interval
-    [0, n], (n, 2n], (2n, 3n], etc...  If more than one path exists in an
-    interval the largest is kept.
-  """
-  def keep(paths):
-    """A filter function that keeps exactly one out of every n paths."""
-
-    keeper_map = {}  # map from interval to largest path seen in that interval
-    for p in paths:
-      if p.export_version is None:
-        # Skip missing export_versions.
-        continue
-      # Find the interval (with a special case to map export_version = 0 to
-      # interval 0.
-      interval = math.floor(
-          (p.export_version - 1) / n) if p.export_version else 0
-      existing = keeper_map.get(interval, None)
-      if (not existing) or (existing.export_version < p.export_version):
-        keeper_map[interval] = p
-    return sorted(keeper_map.values())
-
-  return keep
-
-
-def _mod_export_version(n):
-  """Creates a filter that keeps every export that is a multiple of n.
-
-  Args:
-    n: step size.
-
-  Returns:
-    A filter function that keeps paths where export_version % n == 0.
-  """
-  def keep(paths):
-    keepers = []
-    for p in paths:
-      if p.export_version % n == 0:
-        keepers.append(p)
-    return sorted(keepers)
-  return keep
-
-
-def _union(lf, rf):
-  """Creates a filter that keeps the union of two filters.
-
-  Args:
-    lf: first filter
-    rf: second filter
-
-  Returns:
-    A filter function that keeps the n largest paths.
-  """
-  def keep(paths):
-    l = set(lf(paths))
-    r = set(rf(paths))
-    return sorted(list(l|r))
-  return keep
-
-
-def _negation(f):
-  """Negate a filter.
-
-  Args:
-    f: filter function to invert
-
-  Returns:
-    A filter function that returns the negation of f.
-  """
-  def keep(paths):
-    l = set(paths)
-    r = set(f(paths))
-    return sorted(list(l-r))
-  return keep
-
-
-def _get_paths(base_dir, parser):
-  """Gets a list of Paths in a given directory.
+from tensorflow_estimator.python.estimator import gc
 
-  Args:
-    base_dir: directory.
-    parser: a function which gets the raw Path and can augment it with
-      information such as the export_version, or ignore the path by returning
-      None.  An example parser may extract the export version from a path
-      such as "/tmp/exports/100" an another may extract from a full file
-      name such as "/tmp/checkpoint-99.out".
+# Include attrs that start with single underscore.
+gc.__all__ = [s for s in dir(gc) if not s.startswith('__')]
 
-  Returns:
-    A list of Paths contained in the base directory with the parsing function
-    applied.
-    By default the following fields are populated,
-      - Path.path
-    The parsing function is responsible for populating,
-      - Path.export_version
-  """
-  raw_paths = gfile.ListDirectory(base_dir)
-  paths = []
-  for r in raw_paths:
-    # ListDirectory() return paths with "/" at the last if base_dir was GCS URL
-    r = compat.as_str_any(r)
-    if r[-1] == '/':
-      r = r[0:len(r)-1]
-    p = parser(Path(os.path.join(compat.as_str_any(base_dir), r), None))
-    if p:
-      paths.append(p)
-  return sorted(paths)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.gc import *
diff --git a/tensorflow/python/estimator/gc_test.py b/tensorflow/python/estimator/gc_test.py
deleted file mode 100644
index 53c3d4ca2a..0000000000
--- a/tensorflow/python/estimator/gc_test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for garbage collection utilities."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import re
-
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
-from tensorflow.python.estimator import gc
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.util import compat
-
-
-def _create_parser(base_dir):
-  # create a simple parser that pulls the export_version from the directory.
-  def parser(path):
-    # Modify the path object for RegEx match for Windows Paths
-    if os.name == "nt":
-      match = re.match(
-          "^" + compat.as_str_any(base_dir).replace("\\", "/") + "/(\\d+)$",
-          compat.as_str_any(path.path).replace("\\", "/"))
-    else:
-      match = re.match("^" + compat.as_str_any(base_dir) + "/(\\d+)$",
-                       compat.as_str_any(path.path))
-    if not match:
-      return None
-    return path._replace(export_version=int(match.group(1)))
-
-  return parser
-
-
-class GcTest(test_util.TensorFlowTestCase):
-
-  def testLargestExportVersions(self):
-    paths = [gc.Path("/foo", 8), gc.Path("/foo", 9), gc.Path("/foo", 10)]
-    newest = gc._largest_export_versions(2)
-    n = newest(paths)
-    self.assertEqual(n, [gc.Path("/foo", 9), gc.Path("/foo", 10)])
-
-  def testLargestExportVersionsDoesNotDeleteZeroFolder(self):
-    paths = [gc.Path("/foo", 0), gc.Path("/foo", 3)]
-    newest = gc._largest_export_versions(2)
-    n = newest(paths)
-    self.assertEqual(n, [gc.Path("/foo", 0), gc.Path("/foo", 3)])
-
-  def testModExportVersion(self):
-    paths = [
-        gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6),
-        gc.Path("/foo", 9)
-    ]
-    mod = gc._mod_export_version(2)
-    self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 6)])
-    mod = gc._mod_export_version(3)
-    self.assertEqual(mod(paths), [gc.Path("/foo", 6), gc.Path("/foo", 9)])
-
-  def testOneOfEveryNExportVersions(self):
-    paths = [
-        gc.Path("/foo", 0), gc.Path("/foo", 1), gc.Path("/foo", 3),
-        gc.Path("/foo", 5), gc.Path("/foo", 6), gc.Path("/foo", 7),
-        gc.Path("/foo", 8), gc.Path("/foo", 33)
-    ]
-    one_of = gc._one_of_every_n_export_versions(3)
-    self.assertEqual(
-        one_of(paths), [
-            gc.Path("/foo", 3), gc.Path("/foo", 6), gc.Path("/foo", 8),
-            gc.Path("/foo", 33)
-        ])
-
-  def testOneOfEveryNExportVersionsZero(self):
-    # Zero is a special case since it gets rolled into the first interval.
-    # Test that here.
-    paths = [gc.Path("/foo", 0), gc.Path("/foo", 4), gc.Path("/foo", 5)]
-    one_of = gc._one_of_every_n_export_versions(3)
-    self.assertEqual(one_of(paths), [gc.Path("/foo", 0), gc.Path("/foo", 5)])
-
-  def testUnion(self):
-    paths = []
-    for i in xrange(10):
-      paths.append(gc.Path("/foo", i))
-    f = gc._union(gc._largest_export_versions(3), gc._mod_export_version(3))
-    self.assertEqual(
-        f(paths), [
-            gc.Path("/foo", 0), gc.Path("/foo", 3), gc.Path("/foo", 6),
-            gc.Path("/foo", 7), gc.Path("/foo", 8), gc.Path("/foo", 9)
-        ])
-
-  def testNegation(self):
-    paths = [
-        gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6),
-        gc.Path("/foo", 9)
-    ]
-    mod = gc._negation(gc._mod_export_version(2))
-    self.assertEqual(mod(paths), [gc.Path("/foo", 5), gc.Path("/foo", 9)])
-    mod = gc._negation(gc._mod_export_version(3))
-    self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 5)])
-
-  def testPathsWithParse(self):
-    base_dir = os.path.join(test.get_temp_dir(), "paths_parse")
-    self.assertFalse(gfile.Exists(base_dir))
-    for p in xrange(3):
-      gfile.MakeDirs(os.path.join(base_dir, "%d" % p))
-    # add a base_directory to ignore
-    gfile.MakeDirs(os.path.join(base_dir, "ignore"))
-
-    self.assertEqual(
-        gc._get_paths(base_dir, _create_parser(base_dir)),
-        [
-            gc.Path(os.path.join(base_dir, "0"), 0),
-            gc.Path(os.path.join(base_dir, "1"), 1),
-            gc.Path(os.path.join(base_dir, "2"), 2)
-        ])
-
-  def testMixedStrTypes(self):
-    temp_dir = compat.as_bytes(test.get_temp_dir())
-
-    for sub_dir in ["str", b"bytes", u"unicode"]:
-      base_dir = os.path.join(
-          (temp_dir if isinstance(sub_dir, bytes) else temp_dir.decode()),
-          sub_dir)
-      self.assertFalse(gfile.Exists(base_dir))
-      gfile.MakeDirs(os.path.join(compat.as_str_any(base_dir), "42"))
-      gc._get_paths(base_dir, _create_parser(base_dir))
-
-  def testGcsDirWithSeparator(self):
-    base_dir = "gs://bucket/foo"
-    with test.mock.patch.object(gfile, "ListDirectory") as mock_list_directory:
-      # gfile.ListDirectory returns directory names with separator '/'
-      mock_list_directory.return_value = ["0/", "1/"]
-      self.assertEqual(
-          gc._get_paths(base_dir, _create_parser(base_dir)),
-          [
-              gc.Path(os.path.join(base_dir, "0"), 0),
-              gc.Path(os.path.join(base_dir, "1"), 1)
-          ])
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/inputs/__init__.py b/tensorflow/python/estimator/inputs/__init__.py
index e69de29bb2..b35091b11f 100644
--- a/tensorflow/python/estimator/inputs/__init__.py
+++ b/tensorflow/python/estimator/inputs/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""inputs python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_estimator.python.estimator import inputs
+
+# Include attrs that start with single underscore.
+inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')]
+
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs import *
diff --git a/tensorflow/python/estimator/inputs/inputs.py b/tensorflow/python/estimator/inputs/inputs.py
index 6be168ee08..6084cee72b 100644
--- a/tensorflow/python/estimator/inputs/inputs.py
+++ b/tensorflow/python/estimator/inputs/inputs.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,14 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utility methods to create simple input_fns."""
+"""inputs python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import,line-too-long
-from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn
-from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn
+from tensorflow_estimator.python.estimator.inputs import inputs
+
+# Include attrs that start with single underscore.
+inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')]
 
-# pylint: enable=unused-import,line-too-long
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.inputs import *
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index a6cefdece2..d408d6384d 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,214 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Methods to allow dict of numpy arrays."""
+"""numpy_io python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-
-import numpy as np
-from six import string_types
-
-from tensorflow.python.estimator.inputs.queues import feeding_functions
-from tensorflow.python.util.tf_export import estimator_export
-
-# Key name to pack the target into dict of `features`. See
-# `_get_unique_target_key` for details.
-_TARGET_KEY = '__target_key__'
-
-
-def _get_unique_target_key(features):
-  """Returns a key not existed in the input dict `features`.
-
-  Caller of `input_fn` usually provides `features` (dict of numpy arrays) and
-  `target`, but the underlying feeding module expects a single dict of numpy
-  arrays as input. So, the `target` needs to be packed into the `features`
-  temporarily and unpacked after calling the feeding function. Toward this goal,
-  this function returns a key not existed in the `features` to pack the
-  `target`.
-
-  Args:
-    features: OrderedDict of numpy arrays
-
-  Returns:
-    A unique key that can be used to insert the subsequent target into
-      features dict.
-  """
-  target_key = _TARGET_KEY
-  while target_key in features:
-    target_key += '_n'
-  return target_key
-
-
-def _validate_and_convert_features(x):
-  """Type check input data and make a shadow copy as an ordered dict.
-
-  Args:
-    x: numpy array object or dict of numpy array objects. If an array,
-      the array will be treated as a single feature.
-
-  Returns:
-    OrderedDict copy of x.
-
-  Raises:
-    ValueError: if x is empty
-    TypeError: if x is an unknown type.
-  """
-  if isinstance(x, dict):
-    if not x:
-      raise ValueError('x cannot be an empty dict')
-    # Make a shadow copy and also ensure the order of iteration is consistent.
-    ordered_dict_data = collections.OrderedDict(
-        sorted(x.items(), key=lambda t: t[0]))
-  elif isinstance(x, np.ndarray):
-    if x.size == 0:
-      raise ValueError('x cannot be an empty array')
-
-    # Make a shadow copy and convert to dict to align with dict processing.
-    ordered_dict_data = collections.OrderedDict({'__direct_np_input__': x})
-  else:
-    x_type = type(x).__name__
-    raise TypeError('x must be a dict or array; got {}'.format(x_type))
-
-  return ordered_dict_data
-
-
-@estimator_export('estimator.inputs.numpy_input_fn')
-def numpy_input_fn(x,
-                   y=None,
-                   batch_size=128,
-                   num_epochs=1,
-                   shuffle=None,
-                   queue_capacity=1000,
-                   num_threads=1):
-  """Returns input function that would feed dict of numpy arrays into the model.
-
-  This returns a function outputting `features` and `targets` based on the dict
-  of numpy arrays. The dict `features` has the same keys as the `x`. The dict
-  `targets` has the same keys as the `y` if `y` is a dict.
-
-  Example:
-
-  ```python
-  age = np.arange(4) * 1.0
-  height = np.arange(32, 36)
-  x = {'age': age, 'height': height}
-  y = np.arange(-32, -28)
-
-  with tf.Session() as session:
-    input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-  ```
-
-  Args:
-    x: numpy array object or dict of numpy array objects. If an array,
-      the array will be treated as a single feature.
-    y: numpy array object or dict of numpy array object. `None` if absent.
-    batch_size: Integer, size of batches to return.
-    num_epochs: Integer, number of epochs to iterate over data. If `None` will
-      run forever.
-    shuffle: Boolean, if True shuffles the queue. Avoid shuffle at prediction
-      time.
-    queue_capacity: Integer, size of queue to accumulate.
-    num_threads: Integer, number of threads used for reading and enqueueing. In
-      order to have predicted and repeatable order of reading and enqueueing,
-      such as in prediction and evaluation mode, `num_threads` should be 1.
-
-  Returns:
-    Function, that has signature of ()->(dict of `features`, `targets`)
-
-  Raises:
-    ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e.,
-      values in `x` have same shape).
-    ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
-    ValueError: if x or y is an empty dict.
-    TypeError: `x` is not a dict or array.
-    ValueError: if 'shuffle' is not provided or a bool.
-  """
-  if not isinstance(shuffle, bool):
-    raise ValueError('shuffle must be provided and explicitly set as boolean '
-                     '(it is recommended to set it as True for training); '
-                     'got {}'.format(shuffle))
-
-  def input_fn():
-    """Numpy input function."""
-
-    # Note that `x` should not be used after conversion to ordered_dict_data,
-    # as type could be either dict or array.
-    ordered_dict_data = _validate_and_convert_features(x)
-
-    # Deep copy keys which is a view in python 3
-    feature_keys = list(ordered_dict_data.keys())
-
-    if y is None:
-      target_keys = None
-    elif isinstance(y, dict):
-      if not y:
-        raise ValueError('y cannot be empty dict, use None instead.')
-
-      ordered_dict_y = collections.OrderedDict(
-          sorted(y.items(), key=lambda t: t[0]))
-      target_keys = list(ordered_dict_y.keys())
-
-      duplicate_keys = set(feature_keys).intersection(set(target_keys))
-      if duplicate_keys:
-        raise ValueError('{} duplicate keys are found in both x and y: '
-                         '{}'.format(len(duplicate_keys), duplicate_keys))
-
-      ordered_dict_data.update(ordered_dict_y)
-    else:
-      target_keys = _get_unique_target_key(ordered_dict_data)
-      ordered_dict_data[target_keys] = y
-
-    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
-
-      if target_keys is None:
-        shape_of_y = None
-      elif isinstance(target_keys, string_types):
-        shape_of_y = y.shape
-      else:
-        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
-
-      raise ValueError('Length of tensors in x and y is mismatched. All '
-                       'elements in x and y must have the same length.\n'
-                       'Shapes in x: {}\n'
-                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))
-
-    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
-        ordered_dict_data,
-        queue_capacity,
-        shuffle=shuffle,
-        num_threads=num_threads,
-        enqueue_size=batch_size,
-        num_epochs=num_epochs)
-
-    batch = (
-        queue.dequeue_many(batch_size)
-        if num_epochs is None else queue.dequeue_up_to(batch_size))
-
-    # Remove the first `Tensor` in `batch`, which is the row number.
-    if batch:
-      batch.pop(0)
-
-    if isinstance(x, np.ndarray):
-      # Return as the same type as original array.
-      features = batch[0]
-    else:
-      # Return as the original dict type
-      features = dict(zip(feature_keys, batch[:len(feature_keys)]))
+from tensorflow_estimator.python.estimator.inputs import numpy_io
 
-    if target_keys is None:
-      # TODO(martinwicke), return consistent result
-      return features
-    elif isinstance(target_keys, string_types):
-      target = batch[-1]
-      return features, target
-    else:
-      target = dict(zip(target_keys, batch[-len(target_keys):]))
-      return features, target
+# Include attrs that start with single underscore.
+numpy_io.__all__ = [s for s in dir(numpy_io) if not s.startswith('__')]
 
-  return input_fn
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.numpy_io import *
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
deleted file mode 100644
index 632908415f..0000000000
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ /dev/null
@@ -1,620 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for numpy_io."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from tensorflow.python.client import session as session_lib
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column_lib as fc
-from tensorflow.python.feature_column.feature_column import _LinearModel
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables as variables_lib
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import queue_runner_impl
-
-
-class NumpyIoTest(test.TestCase):
-
-  def testNumpyInputFn(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      session.run([features, target])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self):
-    a = np.arange(2) * 1.0
-    b = np.arange(32, 34)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -30)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=128, shuffle=False, num_epochs=2)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1, 0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33, 32, 33])
-      self.assertAllEqual(res[1], [-32, -31, -32, -31])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithZeroEpochs(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=0)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
-    batch_size = 2
-    a = np.arange(5) * 1.0
-    b = np.arange(32, 37)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -27)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2, 3])
-      self.assertAllEqual(res[0]['b'], [34, 35])
-      self.assertAllEqual(res[1], [-30, -29])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [4])
-      self.assertAllEqual(res[0]['b'], [36])
-      self.assertAllEqual(res[1], [-28])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeNotDividedByDataSizeAndMultipleEpochs(self):
-    batch_size = 2
-    a = np.arange(3) * 1.0
-    b = np.arange(32, 35)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -29)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=3)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2, 0])
-      self.assertAllEqual(res[0]['b'], [34, 32])
-      self.assertAllEqual(res[1], [-30, -32])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [1, 2])
-      self.assertAllEqual(res[0]['b'], [33, 34])
-      self.assertAllEqual(res[1], [-31, -30])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [2])
-      self.assertAllEqual(res[0]['b'], [34])
-      self.assertAllEqual(res[1], [-30])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithBatchSizeLargerThanDataSize(self):
-    batch_size = 10
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -28)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1, 2, 3])
-      self.assertAllEqual(res[0]['b'], [32, 33, 34, 35])
-      self.assertAllEqual(res[1], [-32, -31, -30, -29])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithDifferentDimensionsOfFeatures(self):
-    a = np.array([[1, 2], [3, 4]])
-    b = np.array([5, 6])
-    x = {'a': a, 'b': b}
-    y = np.arange(-32, -30)
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features, target = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [[1, 2], [3, 4]])
-      self.assertAllEqual(res[0]['b'], [5, 6])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithXAsNonDict(self):
-    x = list(range(32, 36))
-    y = np.arange(4)
-    with self.cached_session():
-      with self.assertRaisesRegexp(TypeError, 'x must be a dict or array'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x, y, batch_size=2, shuffle=False, num_epochs=1)
-        failing_input_fn()
-
-  def testNumpyInputFnWithXIsEmptyDict(self):
-    x = {}
-    y = np.arange(4)
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithXIsEmptyArray(self):
-    x = np.array([[], []])
-    y = np.arange(4)
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithYIsNone(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = None
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features_tensor = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      feature = session.run(features_tensor)
-      self.assertEqual(len(feature), 2)
-      self.assertAllEqual(feature['a'], [0, 1])
-      self.assertAllEqual(feature['b'], [32, 33])
-
-      session.run([features_tensor])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features_tensor])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithNonBoolShuffle(self):
-    x = np.arange(32, 36)
-    y = np.arange(4)
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError,
-                                   'shuffle must be provided and explicitly '
-                                   'set as boolean'):
-        # Default shuffle is None.
-        numpy_io.numpy_input_fn(x, y)
-
-  def testNumpyInputFnWithTargetKeyAlreadyInX(self):
-    array = np.arange(32, 36)
-    x = {'__target_key__': array}
-    y = np.arange(4)
-
-    with self.cached_session():
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      input_fn()
-      self.assertAllEqual(x['__target_key__'], array)
-      # The input x should not be mutated.
-      self.assertItemsEqual(x.keys(), ['__target_key__'])
-
-  def testNumpyInputFnWithMismatchLengthOfInputs(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    x_mismatch_length = {'a': np.arange(1), 'b': b}
-    y_longer_length = np.arange(10)
-
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'Length of tensors in x and y is mismatched.'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x, y_longer_length, batch_size=2, shuffle=False, num_epochs=1)
-        failing_input_fn()
-
-      with self.assertRaisesRegexp(
-          ValueError, 'Length of tensors in x and y is mismatched.'):
-        failing_input_fn = numpy_io.numpy_input_fn(
-            x=x_mismatch_length,
-            y=None,
-            batch_size=2,
-            shuffle=False,
-            num_epochs=1)
-        failing_input_fn()
-
-  def testNumpyInputFnWithYAsDict(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}
-
-    with self.cached_session() as session:
-      input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features_tensor, targets_tensor = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      features, targets = session.run([features_tensor, targets_tensor])
-      self.assertEqual(len(features), 2)
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertEqual(len(targets), 2)
-      self.assertAllEqual(targets['y1'], [-32, -31])
-      self.assertAllEqual(targets['y2'], [32, 31])
-
-      session.run([features_tensor, targets_tensor])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features_tensor, targets_tensor])
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testNumpyInputFnWithYIsEmptyDict(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {}
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithDuplicateKeysInXAndY(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
-    with self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, '2 duplicate keys are found in both x and y'):
-        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
-        failing_input_fn()
-
-  def testNumpyInputFnWithXIsArray(self):
-    x = np.arange(4) * 1.0
-    y = np.arange(-32, -28)
-
-    input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-    features, target = input_fn()
-
-    with monitored_session.MonitoredSession() as session:
-      res = session.run([features, target])
-      self.assertAllEqual(res[0], [0, 1])
-      self.assertAllEqual(res[1], [-32, -31])
-
-      session.run([features, target])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-  def testNumpyInputFnWithXIsNDArray(self):
-    x = np.arange(16).reshape(4, 2, 2) * 1.0
-    y = np.arange(-48, -32).reshape(4, 2, 2)
-
-    input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-    features, target = input_fn()
-
-    with monitored_session.MonitoredSession() as session:
-      res = session.run([features, target])
-      self.assertAllEqual(res[0], [[[0, 1], [2, 3]], [[4, 5], [6, 7]]])
-      self.assertAllEqual(
-          res[1], [[[-48, -47], [-46, -45]], [[-44, -43], [-42, -41]]])
-
-      session.run([features, target])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
-
-  def testNumpyInputFnWithXIsArrayYIsDict(self):
-    x = np.arange(4) * 1.0
-    y = {'y1': np.arange(-32, -28)}
-
-    input_fn = numpy_io.numpy_input_fn(
-        x, y, batch_size=2, shuffle=False, num_epochs=1)
-    features_tensor, targets_tensor = input_fn()
-
-    with monitored_session.MonitoredSession() as session:
-      features, targets = session.run([features_tensor, targets_tensor])
-      self.assertEqual(len(features), 2)
-      self.assertAllEqual(features, [0, 1])
-      self.assertEqual(len(targets), 1)
-      self.assertAllEqual(targets['y1'], [-32, -31])
-
-      session.run([features_tensor, targets_tensor])
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run([features_tensor, targets_tensor])
-
-  def testArrayAndDictGiveSameOutput(self):
-    a = np.arange(4) * 1.0
-    b = np.arange(32, 36)
-    x_arr = np.vstack((a, b))
-    x_dict = {'feature1': x_arr}
-    y = np.arange(-48, -40).reshape(2, 4)
-
-    input_fn_arr = numpy_io.numpy_input_fn(
-        x_arr, y, batch_size=2, shuffle=False, num_epochs=1)
-    features_arr, targets_arr = input_fn_arr()
-
-    input_fn_dict = numpy_io.numpy_input_fn(
-        x_dict, y, batch_size=2, shuffle=False, num_epochs=1)
-    features_dict, targets_dict = input_fn_dict()
-
-    with monitored_session.MonitoredSession() as session:
-      res_arr, res_dict = session.run([
-          (features_arr, targets_arr), (features_dict, targets_dict)])
-
-      self.assertAllEqual(res_arr[0], res_dict[0]['feature1'])
-      self.assertAllEqual(res_arr[1], res_dict[1])
-
-
-class FeatureColumnIntegrationTest(test.TestCase):
-
-  def _initialized_session(self, config=None):
-    sess = session_lib.Session(config=config)
-    sess.run(variables_lib.global_variables_initializer())
-    sess.run(lookup_ops.tables_initializer())
-    return sess
-
-  def _get_linear_model_bias(self, name='linear_model'):
-    with variable_scope.variable_scope(name, reuse=True):
-      return variable_scope.get_variable('bias_weights')
-
-  def _get_linear_model_column_var(self, column, name='linear_model'):
-    return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
-                              name + '/' + column.name)[0]
-
-  def _get_keras_linear_model_predictions(
-      self,
-      features,
-      feature_columns,
-      units=1,
-      sparse_combiner='sum',
-      weight_collections=None,
-      trainable=True,
-      cols_to_vars=None):
-    keras_linear_model = _LinearModel(
-        feature_columns,
-        units,
-        sparse_combiner,
-        weight_collections,
-        trainable,
-        name='linear_model')
-    retval = keras_linear_model(features)  # pylint: disable=not-callable
-    if cols_to_vars is not None:
-      cols_to_vars.update(keras_linear_model.cols_to_vars())
-    return retval
-
-  def test_linear_model_numpy_input_fn(self):
-    price = fc.numeric_column('price')
-    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
-    body_style = fc.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([-1., 2., 13., 104.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = fc.linear_model(features, [price_buckets, body_style])
-    # self.assertEqual(1 + 3 + 5, net.shape[1])
-    with self._initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      bias = self._get_linear_model_bias()
-      price_buckets_var = self._get_linear_model_column_var(price_buckets)
-      body_style_var = self._get_linear_model_column_var(body_style)
-
-      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
-      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
-      sess.run(bias.assign([5.]))
-
-      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def test_linear_model_impl_numpy_input_fn(self):
-    price = fc.numeric_column('price')
-    price_buckets = fc.bucketized_column(
-        price, boundaries=[
-            0.,
-            10.,
-            100.,
-        ])
-    body_style = fc.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([-1., 2., 13., 104.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = self._get_keras_linear_model_predictions(
-        features, [price_buckets, body_style])
-    # self.assertEqual(1 + 3 + 5, net.shape[1])
-    with self._initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      bias = self._get_linear_model_bias()
-      price_buckets_var = self._get_linear_model_column_var(price_buckets)
-      body_style_var = self._get_linear_model_column_var(body_style)
-
-      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
-      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
-      sess.run(bias.assign([5.]))
-
-      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def test_functional_input_layer_with_numpy_input_fn(self):
-    embedding_values = (
-        (1., 2., 3., 4., 5.),  # id 0
-        (6., 7., 8., 9., 10.),  # id 1
-        (11., 12., 13., 14., 15.)  # id 2
-    )
-    def _initializer(shape, dtype, partition_info):
-      del shape, dtype, partition_info
-      return embedding_values
-
-    # price has 1 dimension in input_layer
-    price = fc.numeric_column('price')
-    body_style = fc.categorical_column_with_vocabulary_list(
-        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
-    # one_hot_body_style has 3 dims in input_layer.
-    one_hot_body_style = fc.indicator_column(body_style)
-    # embedded_body_style has 5 dims in input_layer.
-    embedded_body_style = fc.embedding_column(body_style, dimension=5,
-                                              initializer=_initializer)
-
-    input_fn = numpy_io.numpy_input_fn(
-        x={
-            'price': np.array([11., 12., 13., 14.]),
-            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
-        },
-        batch_size=2,
-        shuffle=False)
-    features = input_fn()
-    net = fc.input_layer(features,
-                         [price, one_hot_body_style, embedded_body_style])
-    self.assertEqual(1 + 3 + 5, net.shape[1])
-    with self._initialized_session() as sess:
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-
-      # Each row is formed by concatenating `embedded_body_style`,
-      # `one_hot_body_style`, and `price` in order.
-      self.assertAllEqual(
-          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
-           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
-          sess.run(net))
-
-      coord.request_stop()
-      coord.join(threads)
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py
index 616bcb410f..50bff2f717 100644
--- a/tensorflow/python/estimator/inputs/pandas_io.py
+++ b/tensorflow/python/estimator/inputs/pandas_io.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,146 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""pandas_io python module.
 
-"""Methods to allow pandas.DataFrame."""
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import six
-import uuid
 
-import numpy as np
-from tensorflow.python.estimator.inputs.queues import feeding_functions
-from tensorflow.python.util.tf_export import estimator_export
+from tensorflow_estimator.python.estimator.inputs import pandas_io
 
-try:
-  # pylint: disable=g-import-not-at-top
-  # pylint: disable=unused-import
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
+# Include attrs that start with single underscore.
+pandas_io.__all__ = [s for s in dir(pandas_io) if not s.startswith('__')]
 
-
-def _get_unique_target_key(features, target_column_name):
-  """Returns a key that does not exist in the input DataFrame `features`.
-
-  Args:
-    features: DataFrame
-    target_column_name: Name of the target column as a `str`
-
-  Returns:
-    A unique key that can be used to insert the target into
-      features.
-  """
-  if target_column_name in features:
-    target_column_name += '_' + str(uuid.uuid4())
-  return target_column_name
-
-
-@estimator_export('estimator.inputs.pandas_input_fn')
-def pandas_input_fn(x,
-                    y=None,
-                    batch_size=128,
-                    num_epochs=1,
-                    shuffle=None,
-                    queue_capacity=1000,
-                    num_threads=1,
-                    target_column='target'):
-  """Returns input function that would feed Pandas DataFrame into the model.
-
-  Note: `y`'s index must match `x`'s index.
-
-  Args:
-    x: pandas `DataFrame` object.
-    y: pandas `Series` object or `DataFrame`. `None` if absent.
-    batch_size: int, size of batches to return.
-    num_epochs: int, number of epochs to iterate over data. If not `None`,
-      read attempts that would exceed this value will raise `OutOfRangeError`.
-    shuffle: bool, whether to read the records in random order.
-    queue_capacity: int, size of the read queue. If `None`, it will be set
-      roughly to the size of `x`.
-    num_threads: Integer, number of threads used for reading and enqueueing. In
-      order to have predicted and repeatable order of reading and enqueueing,
-      such as in prediction and evaluation mode, `num_threads` should be 1.
-    target_column: str, name to give the target column `y`. This parameter
-      is not used when `y` is a `DataFrame`.
-
-  Returns:
-    Function, that has signature of ()->(dict of `features`, `target`)
-
-  Raises:
-    ValueError: if `x` already contains a column with the same name as `y`, or
-      if the indexes of `x` and `y` don't match.
-    ValueError: if 'shuffle' is not provided or a bool.
-  """
-  if not HAS_PANDAS:
-    raise TypeError(
-        'pandas_input_fn should not be called without pandas installed')
-
-  if not isinstance(shuffle, bool):
-    raise ValueError('shuffle must be provided and explicitly set as boolean '
-                     '(it is recommended to set it as True for training); '
-                     'got {}'.format(shuffle))
-
-  if not isinstance(target_column, six.string_types):
-    raise TypeError('target_column must be a string type')
-
-  x = x.copy()
-  if y is not None:
-    if target_column in x:
-      raise ValueError(
-          'Cannot use name %s for target column: DataFrame already has a '
-          'column with that name: %s' % (target_column, x.columns))
-    if not np.array_equal(x.index, y.index):
-      raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n'
-                       'Index for y: %s\n' % (x.index, y.index))
-    if isinstance(y, pd.DataFrame):
-      y_columns = [(column, _get_unique_target_key(x, column))
-                   for column in list(y)]
-      target_column = [v for _, v in y_columns]
-      x[target_column] = y
-    else:
-      x[target_column] = y
-
-  # TODO(mdan): These are memory copies. We probably don't need 4x slack space.
-  # The sizes below are consistent with what I've seen elsewhere.
-  if queue_capacity is None:
-    if shuffle:
-      queue_capacity = 4 * len(x)
-    else:
-      queue_capacity = len(x)
-  min_after_dequeue = max(queue_capacity / 4, 1)
-
-  def input_fn():
-    """Pandas input function."""
-    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
-        x,
-        queue_capacity,
-        shuffle=shuffle,
-        min_after_dequeue=min_after_dequeue,
-        num_threads=num_threads,
-        enqueue_size=batch_size,
-        num_epochs=num_epochs)
-    if num_epochs is None:
-      features = queue.dequeue_many(batch_size)
-    else:
-      features = queue.dequeue_up_to(batch_size)
-    assert len(features) == len(x.columns) + 1, ('Features should have one '
-                                                 'extra element for the index.')
-    features = features[1:]
-    features = dict(zip(list(x.columns), features))
-    if y is not None:
-      if isinstance(target_column, list):
-        keys = [k for k, _ in y_columns]
-        values = [features.pop(column) for column in target_column]
-        target = {k: v for k, v in zip(keys, values)}
-      else:
-        target = features.pop(target_column)
-      return features, target
-    return features
-  return input_fn
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.pandas_io import *
diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py
deleted file mode 100644
index 9e69fc72dc..0000000000
--- a/tensorflow/python/estimator/inputs/pandas_io_test.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for pandas_io."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.estimator.inputs import pandas_io
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import queue_runner_impl
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-class PandasIoTest(test.TestCase):
-
-  def makeTestDataFrame(self):
-    index = np.arange(100, 104)
-    a = np.arange(4)
-    b = np.arange(32, 36)
-    x = pd.DataFrame({'a': a, 'b': b}, index=index)
-    y = pd.Series(np.arange(-32, -28), index=index)
-    return x, y
-
-  def makeTestDataFrameWithYAsDataFrame(self):
-    index = np.arange(100, 104)
-    a = np.arange(4)
-    b = np.arange(32, 36)
-    a_label = np.arange(10, 14)
-    b_label = np.arange(50, 54)
-    x = pd.DataFrame({'a': a, 'b': b}, index=index)
-    y = pd.DataFrame({'a_target': a_label, 'b_target': b_label}, index=index)
-    return x, y
-
-  def callInputFnOnce(self, input_fn, session):
-    results = input_fn()
-    coord = coordinator.Coordinator()
-    threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-    result_values = session.run(results)
-    coord.request_stop()
-    coord.join(threads)
-    return result_values
-
-  def testPandasInputFn_IndexMismatch(self):
-    if not HAS_PANDAS:
-      return
-    x, _ = self.makeTestDataFrame()
-    y_noindex = pd.Series(np.arange(-32, -28))
-    with self.assertRaises(ValueError):
-      pandas_io.pandas_input_fn(
-          x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
-
-  def testPandasInputFn_RaisesWhenTargetColumnIsAList(self):
-    if not HAS_PANDAS:
-      return
-
-    x, y = self.makeTestDataFrame()
-
-    with self.assertRaisesRegexp(TypeError,
-                                 'target_column must be a string type'):
-      pandas_io.pandas_input_fn(x, y, batch_size=2,
-                                shuffle=False,
-                                num_epochs=1,
-                                target_column=['one', 'two'])
-
-  def testPandasInputFn_NonBoolShuffle(self):
-    if not HAS_PANDAS:
-      return
-    x, _ = self.makeTestDataFrame()
-    y_noindex = pd.Series(np.arange(-32, -28))
-    with self.assertRaisesRegexp(ValueError,
-                                 'shuffle must be provided and explicitly '
-                                 'set as boolean'):
-      # Default shuffle is None
-      pandas_io.pandas_input_fn(x, y_noindex)
-
-  def testPandasInputFn_ProducesExpectedOutputs(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      features, target = self.callInputFnOnce(input_fn, session)
-
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertAllEqual(target, [-32, -31])
-
-  def testPandasInputFnWhenYIsDataFrame_ProducesExpectedOutput(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrameWithYAsDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      features, targets = self.callInputFnOnce(input_fn, session)
-
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertAllEqual(targets['a_target'], [10, 11])
-      self.assertAllEqual(targets['b_target'], [50, 51])
-
-  def testPandasInputFnYIsDataFrame_HandlesOverlappingColumns(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrameWithYAsDataFrame()
-      y = y.rename(columns={'a_target': 'a', 'b_target': 'b'})
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      features, targets = self.callInputFnOnce(input_fn, session)
-
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertAllEqual(targets['a'], [10, 11])
-      self.assertAllEqual(targets['b'], [50, 51])
-
-  def testPandasInputFnYIsDataFrame_HandlesOverlappingColumnsInTargets(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrameWithYAsDataFrame()
-      y = y.rename(columns={'a_target': 'a', 'b_target': 'a_n'})
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      features, targets = self.callInputFnOnce(input_fn, session)
-
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertAllEqual(targets['a'], [10, 11])
-      self.assertAllEqual(targets['a_n'], [50, 51])
-
-  def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      index = np.arange(100, 102)
-      a = np.arange(2)
-      b = np.arange(32, 34)
-      x = pd.DataFrame({'a': a, 'b': b}, index=index)
-      y = pd.Series(np.arange(-32, -30), index=index)
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=128, shuffle=False, num_epochs=2)
-
-      results = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      features, target = session.run(results)
-      self.assertAllEqual(features['a'], [0, 1, 0, 1])
-      self.assertAllEqual(features['b'], [32, 33, 32, 33])
-      self.assertAllEqual(target, [-32, -31, -32, -31])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run(results)
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      index = np.arange(100, 105)
-      a = np.arange(5)
-      b = np.arange(32, 37)
-      x = pd.DataFrame({'a': a, 'b': b}, index=index)
-      y = pd.Series(np.arange(-32, -27), index=index)
-
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      results = input_fn()
-
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-
-      features, target = session.run(results)
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-      self.assertAllEqual(target, [-32, -31])
-
-      features, target = session.run(results)
-      self.assertAllEqual(features['a'], [2, 3])
-      self.assertAllEqual(features['b'], [34, 35])
-      self.assertAllEqual(target, [-30, -29])
-
-      features, target = session.run(results)
-      self.assertAllEqual(features['a'], [4])
-      self.assertAllEqual(features['b'], [36])
-      self.assertAllEqual(target, [-28])
-
-      with self.assertRaises(errors.OutOfRangeError):
-        session.run(results)
-
-      coord.request_stop()
-      coord.join(threads)
-
-  def testPandasInputFn_OnlyX(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, _ = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y=None, batch_size=2, shuffle=False, num_epochs=1)
-
-      features = self.callInputFnOnce(input_fn, session)
-
-      self.assertAllEqual(features['a'], [0, 1])
-      self.assertAllEqual(features['b'], [32, 33])
-
-  def testPandasInputFn_ExcludesIndex(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
-
-      features, _ = self.callInputFnOnce(input_fn, session)
-
-      self.assertFalse('index' in features)
-
-  def assertInputsCallableNTimes(self, input_fn, session, n):
-    inputs = input_fn()
-    coord = coordinator.Coordinator()
-    threads = queue_runner_impl.start_queue_runners(session, coord=coord)
-    for _ in range(n):
-      session.run(inputs)
-    with self.assertRaises(errors.OutOfRangeError):
-      session.run(inputs)
-    coord.request_stop()
-    coord.join(threads)
-
-  def testPandasInputFn_RespectsEpoch_NoShuffle(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=4, shuffle=False, num_epochs=1)
-
-      self.assertInputsCallableNTimes(input_fn, session, 1)
-
-  def testPandasInputFn_RespectsEpoch_WithShuffle(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=4, shuffle=True, num_epochs=1)
-
-      self.assertInputsCallableNTimes(input_fn, session, 1)
-
-  def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self):
-    if not HAS_PANDAS:
-      return
-    with self.cached_session() as session:
-      x, y = self.makeTestDataFrame()
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2)
-
-      self.assertInputsCallableNTimes(input_fn, session, 4)
-
-  def testPandasInputFn_RespectsEpochUnevenBatches(self):
-    if not HAS_PANDAS:
-      return
-    x, y = self.makeTestDataFrame()
-    with self.cached_session() as session:
-      input_fn = pandas_io.pandas_input_fn(
-          x, y, batch_size=3, shuffle=False, num_epochs=1)
-
-      # Before the last batch, only one element of the epoch should remain.
-      self.assertInputsCallableNTimes(input_fn, session, 2)
-
-  def testPandasInputFn_Idempotent(self):
-    if not HAS_PANDAS:
-      return
-    x, y = self.makeTestDataFrame()
-    for _ in range(2):
-      pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)()
-    for _ in range(2):
-      pandas_io.pandas_input_fn(
-          x, y, batch_size=2, shuffle=True, num_epochs=1)()
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/inputs/queues/__init__.py b/tensorflow/python/estimator/inputs/queues/__init__.py
index e69de29bb2..70b95b81c3 100644
--- a/tensorflow/python/estimator/inputs/queues/__init__.py
+++ b/tensorflow/python/estimator/inputs/queues/__init__.py
@@ -0,0 +1,32 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""queues python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_estimator.python.estimator.inputs import queues
+
+# Include attrs that start with single underscore.
+queues.__all__ = [s for s in dir(queues) if not s.startswith('__')]
+
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.queues import *
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index 51a61adb21..d6b0231501 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,502 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Helper functions for enqueuing data from arrays and pandas `DataFrame`s."""
+"""feeding_functions python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import random
-import types as tp
-import numpy as np
-import six
-
-from tensorflow.python.estimator.inputs.queues import feeding_queue_runner as fqr
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.summary import summary
-from tensorflow.python.training import queue_runner
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-def _fill_array(arr, seq, fillvalue=0):
-  """Recursively fills padded arr with elements from seq.
-
-  If length of seq is less than arr padded length, fillvalue used.
-  Args:
-    arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len].
-    seq: Non-padded list of data samples of shape
-      [batch_size, ..., padded_dim(None)]
-    fillvalue: Default fillvalue to use.
-  """
-  if arr.ndim == 1:
-    try:
-      len_ = len(seq)
-    except TypeError:
-      len_ = 0
-    arr[:len_] = seq
-    arr[len_:] = fillvalue
-  else:
-    for subarr, subseq in six.moves.zip_longest(arr, seq, fillvalue=()):
-      _fill_array(subarr, subseq, fillvalue)
-
-
-def _pad_if_needed(batch_key_item, fillvalue=0):
-  """ Returns padded batch.
-
-  Args:
-    batch_key_item: List of data samples of any type with shape
-      [batch_size, ..., padded_dim(None)].
-    fillvalue: Default fillvalue to use.
-
-  Returns:
-    Padded with zeros tensor of same type and shape
-      [batch_size, ..., max_padded_dim_len].
-
-  Raises:
-    ValueError if data samples have different shapes (except last padded dim).
-  """
-  shapes = [
-      seq.shape[:-1] if len(seq.shape) > 0 else -1 for seq in batch_key_item
-  ]
-  if not all(shapes[0] == x for x in shapes):
-    raise ValueError("Array shapes must match.")
-
-  last_length = [
-      seq.shape[-1] if len(seq.shape) > 0 else 0 for seq in batch_key_item
-  ]
-  if all([x == last_length[0] for x in last_length]):
-    return batch_key_item
-
-  batch_size = len(batch_key_item)
-  max_sequence_length = max(last_length)
-  result_batch = np.zeros(
-      shape=[batch_size] + list(shapes[0]) + [max_sequence_length],
-      dtype=batch_key_item[0].dtype)
-  _fill_array(result_batch, batch_key_item, fillvalue)
-  return result_batch
-
-
-def _get_integer_indices_for_next_batch(batch_indices_start, batch_size,
-                                        epoch_end, array_length, current_epoch,
-                                        total_epochs):
-  """Returns the integer indices for next batch.
-
-  If total epochs is not None and current epoch is the final epoch, the end
-  index of the next batch should not exceed the `epoch_end` (i.e., the final
-  batch might not have size `batch_size` to avoid overshooting the last epoch).
-
-  Args:
-    batch_indices_start: Integer, the index to start next batch.
-    batch_size: Integer, size of batches to return.
-    epoch_end: Integer, the end index of the epoch. The epoch could start from a
-      random position, so `epoch_end` provides the end index for that.
-    array_length: Integer, the length of the array.
-    current_epoch: Integer, the epoch number has been emitted.
-    total_epochs: Integer or `None`, the total number of epochs to emit. If
-      `None` will run forever.
-
-  Returns:
-    A tuple of a list with integer indices for next batch and `current_epoch`
-    value after the next batch.
-
-  Raises:
-    OutOfRangeError if `current_epoch` is not less than `total_epochs`.
-
-  """
-  if total_epochs is not None and current_epoch >= total_epochs:
-    raise errors.OutOfRangeError(None, None,
-                                 "Already emitted %s epochs." % current_epoch)
-
-  batch_indices_end = batch_indices_start + batch_size
-  batch_indices = [
-      j % array_length for j in range(batch_indices_start, batch_indices_end)
-  ]
-  epoch_end_indices = [i for i, x in enumerate(batch_indices) if x == epoch_end]
-  current_epoch += len(epoch_end_indices)
-
-  if total_epochs is None or current_epoch < total_epochs:
-    return (batch_indices, current_epoch)
-
-  # Now we might have emitted more data for expected epochs. Need to trim.
-  final_epoch_end_inclusive = epoch_end_indices[
-      -(current_epoch - total_epochs + 1)]
-  batch_indices = batch_indices[:final_epoch_end_inclusive + 1]
-
-  return (batch_indices, total_epochs)
-
-
-class _ArrayFeedFn(object):
-  """Creates feed dictionaries from numpy arrays."""
-
-  def __init__(self,
-               placeholders,
-               array,
-               batch_size,
-               random_start=False,
-               seed=None,
-               num_epochs=None):
-    if len(placeholders) != 2:
-      raise ValueError("_array_feed_fn expects 2 placeholders; got {}.".format(
-          len(placeholders)))
-    self._placeholders = placeholders
-    self._array = array
-    self._max = len(array)
-    self._batch_size = batch_size
-    self._num_epochs = num_epochs
-    self._epoch = 0
-    random.seed(seed)
-    self._trav = random.randrange(self._max) if random_start else 0
-    self._epoch_end = (self._trav - 1) % self._max
-
-  def __call__(self):
-    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
-        batch_indices_start=self._trav,
-        batch_size=self._batch_size,
-        epoch_end=self._epoch_end,
-        array_length=self._max,
-        current_epoch=self._epoch,
-        total_epochs=self._num_epochs)
-
-    self._trav = (integer_indexes[-1] + 1) % self._max
-    return {
-        self._placeholders[0]: integer_indexes,
-        self._placeholders[1]: self._array[integer_indexes]
-    }
-
-
-class _OrderedDictNumpyFeedFn(object):
-  """Creates feed dictionaries from `OrderedDict`s of numpy arrays."""
-
-  def __init__(self,
-               placeholders,
-               ordered_dict_of_arrays,
-               batch_size,
-               random_start=False,
-               seed=None,
-               num_epochs=None):
-    if len(placeholders) != len(ordered_dict_of_arrays) + 1:
-      raise ValueError("Expected {} placeholders; got {}.".format(
-          len(ordered_dict_of_arrays), len(placeholders)))
-    self._index_placeholder = placeholders[0]
-    self._col_placeholders = placeholders[1:]
-    self._ordered_dict_of_arrays = ordered_dict_of_arrays
-    self._max = len(next(iter(ordered_dict_of_arrays.values())))
-    for _, v in ordered_dict_of_arrays.items():
-      if len(v) != self._max:
-        raise ValueError("Array lengths must match.")
-    self._batch_size = batch_size
-    self._num_epochs = num_epochs
-    self._epoch = 0
-    random.seed(seed)
-    self._trav = random.randrange(self._max) if random_start else 0
-    self._epoch_end = (self._trav - 1) % self._max
-
-  def __call__(self):
-    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
-        batch_indices_start=self._trav,
-        batch_size=self._batch_size,
-        epoch_end=self._epoch_end,
-        array_length=self._max,
-        current_epoch=self._epoch,
-        total_epochs=self._num_epochs)
-
-    self._trav = (integer_indexes[-1] + 1) % self._max
-    feed_dict = {self._index_placeholder: integer_indexes}
-    cols = [
-        column[integer_indexes]
-        for column in self._ordered_dict_of_arrays.values()
-    ]
-    feed_dict.update(dict(zip(self._col_placeholders, cols)))
-    return feed_dict
-
-
-class _PandasFeedFn(object):
-  """Creates feed dictionaries from pandas `DataFrames`."""
-
-  def __init__(self,
-               placeholders,
-               dataframe,
-               batch_size,
-               random_start=False,
-               seed=None,
-               num_epochs=None):
-    if len(placeholders) != len(dataframe.columns) + 1:
-      raise ValueError("Expected {} placeholders; got {}.".format(
-          len(dataframe.columns) + 1, len(placeholders)))
-    self._index_placeholder = placeholders[0]
-    self._col_placeholders = placeholders[1:]
-    self._dataframe = dataframe
-    self._max = len(dataframe)
-    self._batch_size = batch_size
-    self._num_epochs = num_epochs
-    self._epoch = 0
-    random.seed(seed)
-    self._trav = random.randrange(self._max) if random_start else 0
-    self._epoch_end = (self._trav - 1) % self._max
-
-  def __call__(self):
-    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
-        batch_indices_start=self._trav,
-        batch_size=self._batch_size,
-        epoch_end=self._epoch_end,
-        array_length=self._max,
-        current_epoch=self._epoch,
-        total_epochs=self._num_epochs)
-
-    self._trav = (integer_indexes[-1] + 1) % self._max
-    result = self._dataframe.iloc[integer_indexes]
-    cols = [result[col].values for col in result.columns]
-    feed_dict = dict(zip(self._col_placeholders, cols))
-    feed_dict[self._index_placeholder] = result.index.values
-    return feed_dict
-
-
-class _GeneratorFeedFn(object):
-  """Creates feed dictionaries from `Generator` of `dicts` of numpy arrays."""
-
-  def __init__(self,
-               placeholders,
-               generator,
-               batch_size,
-               random_start=False,
-               seed=None,
-               num_epochs=None,
-               pad_value=None):
-    first_sample = next(generator())
-    if len(placeholders) != len(first_sample):
-      raise ValueError("Expected {} placeholders; got {}.".format(
-          len(first_sample), len(placeholders)))
-    self._keys = sorted(list(first_sample.keys()))
-    self._col_placeholders = placeholders
-    self._generator_function = generator
-    self._iterator = generator()
-    self._batch_size = batch_size
-    self._num_epochs = num_epochs
-    self._epoch = 0
-    self._pad_value = pad_value
-    random.seed(seed)
-
-  def __call__(self):
-    if self._num_epochs and self._epoch >= self._num_epochs:
-      raise errors.OutOfRangeError(None, None,
-                                   "Already emitted %s epochs." % self._epoch)
-    list_dict = {}
-    list_dict_size = 0
-    while list_dict_size < self._batch_size:
-      try:
-        data_row = next(self._iterator)
-      except StopIteration:
-        self._epoch += 1
-        self._iterator = self._generator_function()
-        data_row = next(self._iterator)
-      for index, key in enumerate(self._keys):
-        if key not in data_row.keys():
-          raise KeyError("key mismatch between dicts emitted by GenFun "
-                         "Expected {} keys; got {}".format(
-                             self._keys, data_row.keys()))
-        list_dict.setdefault(self._col_placeholders[index], list()).append(
-            data_row[key])
-        list_dict_size += 1
-
-    if self._pad_value is not None:
-      feed_dict = {
-          key: np.asarray(_pad_if_needed(item, self._pad_value))
-          for key, item in list(list_dict.items())
-      }
-    else:
-      feed_dict = {
-          key: np.asarray(item)
-          for key, item in list(list_dict.items())
-      }
-    return feed_dict
-
-
-def _enqueue_data(data,
-                  capacity,
-                  shuffle=False,
-                  min_after_dequeue=None,
-                  num_threads=1,
-                  seed=None,
-                  name="enqueue_input",
-                  enqueue_size=1,
-                  num_epochs=None,
-                  pad_value=None):
-  """Creates a queue filled from a numpy array or pandas `DataFrame`.
-
-    Returns a queue filled with the rows of the given (`OrderedDict` of) array
-    or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued
-    `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of)
-    numpy arrays, the first enqueued `Tensor` contains the row number.
-
-  Args:
-    data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator
-       yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
-       into the queue.
-    capacity: the capacity of the queue.
-    shuffle: whether or not to shuffle the rows of the array.
-    min_after_dequeue: minimum number of elements that can remain in the queue
-    after a dequeue operation. Only used when `shuffle` is true. If not set,
-    defaults to `capacity` / 4.
-    num_threads: number of threads used for reading and enqueueing.
-    seed: used to seed shuffling and reader starting points.
-    name: a scope name identifying the data.
-    enqueue_size: the number of rows to enqueue per step.
-    num_epochs: limit enqueuing to a specified number of epochs, if provided.
-    pad_value: default value for dynamic padding of data samples, if provided.
-
-  Returns:
-    A queue filled with the rows of the given (`OrderedDict` of) array or
-      `DataFrame`.
-
-  Raises:
-    TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy
-      arrays, a numpy `ndarray`, or a generator producing these.
-    NotImplementedError: padding and shuffling data at the same time.
-    NotImplementedError: padding usage with non generator data type.
-  """
-  with ops.name_scope(name):
-    if isinstance(data, np.ndarray):
-      types = [dtypes.int64, dtypes.as_dtype(data.dtype)]
-      queue_shapes = [(), data.shape[1:]]
-      get_feed_fn = _ArrayFeedFn
-    elif isinstance(data, collections.OrderedDict):
-      types = [dtypes.int64
-              ] + [dtypes.as_dtype(col.dtype) for col in data.values()]
-      queue_shapes = [()] + [col.shape[1:] for col in data.values()]
-      get_feed_fn = _OrderedDictNumpyFeedFn
-    elif isinstance(data, tp.FunctionType):
-      x_first_el = six.next(data())
-      x_first_keys = sorted(x_first_el.keys())
-      x_first_values = [x_first_el[key] for key in x_first_keys]
-      types = [dtypes.as_dtype(col.dtype) for col in x_first_values]
-      queue_shapes = [col.shape for col in x_first_values]
-      get_feed_fn = _GeneratorFeedFn
-    elif HAS_PANDAS and isinstance(data, pd.DataFrame):
-      types = [
-          dtypes.as_dtype(dt) for dt in [data.index.dtype] + list(data.dtypes)
-      ]
-      queue_shapes = [() for _ in types]
-      get_feed_fn = _PandasFeedFn
-    else:
-      raise TypeError(
-          "data must be either a numpy array or pandas DataFrame if pandas is "
-          "installed; got {}".format(type(data).__name__))
-
-    pad_data = pad_value is not None
-    if pad_data and get_feed_fn is not _GeneratorFeedFn:
-      raise NotImplementedError(
-          "padding is only available with generator usage")
-    if shuffle and pad_data:
-      raise NotImplementedError(
-          "padding and shuffling data at the same time is not implemented")
-
-    # TODO(jamieas): TensorBoard warnings for all warnings below once available.
-
-    if num_threads > 1 and num_epochs is not None:
-      logging.warning(
-          "enqueue_data was called with num_epochs and num_threads > 1. "
-          "num_epochs is applied per thread, so this will produce more "
-          "epochs than you probably intend. "
-          "If you want to limit epochs, use one thread.")
-
-    if shuffle and num_threads > 1 and num_epochs is not None:
-      logging.warning(
-          "enqueue_data was called with shuffle=True, num_threads > 1, and "
-          "num_epochs. This will create multiple threads, all reading the "
-          "array/dataframe in order adding to the same shuffling queue; the "
-          "results will likely not be sufficiently shuffled.")
-
-    if not shuffle and num_threads > 1:
-      logging.warning(
-          "enqueue_data was called with shuffle=False and num_threads > 1. "
-          "This will create multiple threads, all reading the "
-          "array/dataframe in order. If you want examples read in order, use"
-          " one thread; if you want multiple threads, enable shuffling.")
-
-    if shuffle:
-      min_after_dequeue = int(capacity / 4 if min_after_dequeue is None else
-                              min_after_dequeue)
-      queue = data_flow_ops.RandomShuffleQueue(
-          capacity,
-          min_after_dequeue,
-          dtypes=types,
-          shapes=queue_shapes,
-          seed=seed)
-    elif pad_data:
-      min_after_dequeue = 0  # just for the summary text
-      queue_shapes = list(
-          map(lambda x: tuple(list(x[:-1]) + [None]) if len(x) > 0 else x,
-              queue_shapes))
-      queue = data_flow_ops.PaddingFIFOQueue(
-          capacity, dtypes=types, shapes=queue_shapes)
-    else:
-      min_after_dequeue = 0  # just for the summary text
-      queue = data_flow_ops.FIFOQueue(
-          capacity, dtypes=types, shapes=queue_shapes)
-
-    enqueue_ops = []
-    feed_fns = []
-
-    for i in range(num_threads):
-      # Note the placeholders have no shapes, so they will accept any
-      # enqueue_size.  enqueue_many below will break them up.
-      placeholders = [array_ops.placeholder(t) for t in types]
-
-      enqueue_ops.append(queue.enqueue_many(placeholders))
-      seed_i = None if seed is None else (i + 1) * seed
-
-      if not pad_data:
-        feed_fns.append(
-            get_feed_fn(
-                placeholders,
-                data,
-                enqueue_size,
-                random_start=shuffle,
-                seed=seed_i,
-                num_epochs=num_epochs))
-      else:
-        feed_fns.append(
-            get_feed_fn(
-                placeholders,
-                data,
-                enqueue_size,
-                random_start=shuffle,
-                seed=seed_i,
-                num_epochs=num_epochs,
-                pad_value=pad_value))
+from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions
 
-    runner = fqr._FeedingQueueRunner(  # pylint: disable=protected-access
-        queue=queue,
-        enqueue_ops=enqueue_ops,
-        feed_fns=feed_fns)
-    queue_runner.add_queue_runner(runner)
+# Include attrs that start with single underscore.
+feeding_functions.__all__ = [
+    s for s in dir(feeding_functions) if not s.startswith('__')
+]
 
-    full = (
-        math_ops.cast(
-            math_ops.maximum(0,
-                             queue.size() - min_after_dequeue), dtypes.float32)
-        * (1. / (capacity - min_after_dequeue)))
-    # Note that name contains a '/' at the end so we intentionally do not place
-    # a '/' after %s below.
-    summary_name = ("queue/%sfraction_over_%d_of_%d_full" %
-                    (queue.name, min_after_dequeue,
-                     capacity - min_after_dequeue))
-    summary.scalar(summary_name, full)
-    return queue
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.queues.feeding_functions import *
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py b/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
deleted file mode 100644
index 30abd82130..0000000000
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
+++ /dev/null
@@ -1,391 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests feeding functions using arrays and `DataFrames`."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-import numpy as np
-
-from tensorflow.python.estimator.inputs.queues import feeding_functions as ff
-from tensorflow.python.platform import test
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-def vals_to_list(a):
-  return {
-      key: val.tolist() if isinstance(val, np.ndarray) else val
-      for key, val in a.items()
-  }
-
-
-class _FeedingFunctionsTestCase(test.TestCase):
-  """Tests for feeding functions."""
-
-  def testArrayFeedFnBatchOne(self):
-    array = np.arange(32).reshape([16, 2])
-    placeholders = ["index_placeholder", "value_placeholder"]
-    aff = ff._ArrayFeedFn(placeholders, array, 1)
-
-    # cycle around a couple times
-    for x in range(0, 100):
-      i = x % 16
-      expected = {
-          "index_placeholder": [i],
-          "value_placeholder": [[2 * i, 2 * i + 1]]
-      }
-      actual = aff()
-      self.assertEqual(expected, vals_to_list(actual))
-
-  def testArrayFeedFnBatchFive(self):
-    array = np.arange(32).reshape([16, 2])
-    placeholders = ["index_placeholder", "value_placeholder"]
-    aff = ff._ArrayFeedFn(placeholders, array, 5)
-
-    # cycle around a couple times
-    for _ in range(0, 101, 2):
-      aff()
-
-    expected = {
-        "index_placeholder": [15, 0, 1, 2, 3],
-        "value_placeholder": [[30, 31], [0, 1], [2, 3], [4, 5], [6, 7]]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testArrayFeedFnBatchTwoWithOneEpoch(self):
-    array = np.arange(5) + 10
-    placeholders = ["index_placeholder", "value_placeholder"]
-    aff = ff._ArrayFeedFn(placeholders, array, batch_size=2, num_epochs=1)
-
-    expected = {
-        "index_placeholder": [0, 1],
-        "value_placeholder": [10, 11]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [2, 3],
-        "value_placeholder": [12, 13]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [4],
-        "value_placeholder": [14]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testArrayFeedFnBatchOneHundred(self):
-    array = np.arange(32).reshape([16, 2])
-    placeholders = ["index_placeholder", "value_placeholder"]
-    aff = ff._ArrayFeedFn(placeholders, array, 100)
-
-    expected = {
-        "index_placeholder":
-            list(range(0, 16)) * 6 + list(range(0, 4)),
-        "value_placeholder":
-            np.arange(32).reshape([16, 2]).tolist() * 6 +
-            [[0, 1], [2, 3], [4, 5], [6, 7]]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testArrayFeedFnBatchOneHundredWithSmallerArrayAndMultipleEpochs(self):
-    array = np.arange(2) + 10
-    placeholders = ["index_placeholder", "value_placeholder"]
-    aff = ff._ArrayFeedFn(placeholders, array, batch_size=100, num_epochs=2)
-
-    expected = {
-        "index_placeholder": [0, 1, 0, 1],
-        "value_placeholder": [10, 11, 10, 11],
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testPandasFeedFnBatchOne(self):
-    if not HAS_PANDAS:
-      return
-    array1 = np.arange(32, 64)
-    array2 = np.arange(64, 96)
-    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._PandasFeedFn(placeholders, df, 1)
-
-    # cycle around a couple times
-    for x in range(0, 100):
-      i = x % 32
-      expected = {
-          "index_placeholder": [i + 96],
-          "a_placeholder": [32 + i],
-          "b_placeholder": [64 + i]
-      }
-      actual = aff()
-      self.assertEqual(expected, vals_to_list(actual))
-
-  def testPandasFeedFnBatchFive(self):
-    if not HAS_PANDAS:
-      return
-    array1 = np.arange(32, 64)
-    array2 = np.arange(64, 96)
-    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._PandasFeedFn(placeholders, df, 5)
-
-    # cycle around a couple times
-    for _ in range(0, 101, 2):
-      aff()
-
-    expected = {
-        "index_placeholder": [127, 96, 97, 98, 99],
-        "a_placeholder": [63, 32, 33, 34, 35],
-        "b_placeholder": [95, 64, 65, 66, 67]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testPandasFeedFnBatchTwoWithOneEpoch(self):
-    if not HAS_PANDAS:
-      return
-    array1 = np.arange(32, 37)
-    array2 = np.arange(64, 69)
-    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 101))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._PandasFeedFn(placeholders, df, batch_size=2, num_epochs=1)
-
-    expected = {
-        "index_placeholder": [96, 97],
-        "a_placeholder": [32, 33],
-        "b_placeholder": [64, 65]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [98, 99],
-        "a_placeholder": [34, 35],
-        "b_placeholder": [66, 67]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [100],
-        "a_placeholder": [36],
-        "b_placeholder": [68]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testPandasFeedFnBatchOneHundred(self):
-    if not HAS_PANDAS:
-      return
-    array1 = np.arange(32, 64)
-    array2 = np.arange(64, 96)
-    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._PandasFeedFn(placeholders, df, 100)
-
-    expected = {
-        "index_placeholder": list(range(96, 128)) * 3 + list(range(96, 100)),
-        "a_placeholder": list(range(32, 64)) * 3 + list(range(32, 36)),
-        "b_placeholder": list(range(64, 96)) * 3 + list(range(64, 68))
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testPandasFeedFnBatchOneHundredWithSmallDataArrayAndMultipleEpochs(self):
-    if not HAS_PANDAS:
-      return
-    array1 = np.arange(32, 34)
-    array2 = np.arange(64, 66)
-    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 98))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._PandasFeedFn(placeholders, df, batch_size=100, num_epochs=2)
-
-    expected = {
-        "index_placeholder": [96, 97, 96, 97],
-        "a_placeholder": [32, 33, 32, 33],
-        "b_placeholder": [64, 65, 64, 65]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testOrderedDictNumpyFeedFnBatchTwoWithOneEpoch(self):
-    a = np.arange(32, 37)
-    b = np.arange(64, 69)
-    x = {"a": a, "b": b}
-    ordered_dict_x = collections.OrderedDict(
-        sorted(x.items(), key=lambda t: t[0]))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._OrderedDictNumpyFeedFn(
-        placeholders, ordered_dict_x, batch_size=2, num_epochs=1)
-
-    expected = {
-        "index_placeholder": [0, 1],
-        "a_placeholder": [32, 33],
-        "b_placeholder": [64, 65]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [2, 3],
-        "a_placeholder": [34, 35],
-        "b_placeholder": [66, 67]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-    expected = {
-        "index_placeholder": [4],
-        "a_placeholder": [36],
-        "b_placeholder": [68]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testOrderedDictNumpyFeedFnLargeBatchWithSmallArrayAndMultipleEpochs(self):
-    a = np.arange(32, 34)
-    b = np.arange(64, 66)
-    x = {"a": a, "b": b}
-    ordered_dict_x = collections.OrderedDict(
-        sorted(x.items(), key=lambda t: t[0]))
-    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
-    aff = ff._OrderedDictNumpyFeedFn(
-        placeholders, ordered_dict_x, batch_size=100, num_epochs=2)
-
-    expected = {
-        "index_placeholder": [0, 1, 0, 1],
-        "a_placeholder": [32, 33, 32, 33],
-        "b_placeholder": [64, 65, 64, 65]
-    }
-    actual = aff()
-    self.assertEqual(expected, vals_to_list(actual))
-
-  def testFillArraySmall(self):
-    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[32, 36], dtype=np.int32).tolist())
-    actual = np.ones(shape=[64, 36], dtype=np.int32)
-    ff._fill_array(actual, a)
-    expected = np.ones(shape=[64, 36], dtype=np.int32)
-    expected[:32, 32:] = 0
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testFillArrayLarge(self):
-    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
-    actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    ff._fill_array(actual, a)
-    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    expected[:8, ..., 32:] = 0
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testFillArraySmallWithSpecifiedValue(self):
-    fill_value = 8
-    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[32, 36], dtype=np.int32).tolist())
-    actual = np.ones(shape=[64, 36], dtype=np.int32)
-    ff._fill_array(actual, a, fill_value)
-    expected = np.ones(shape=[64, 36], dtype=np.int32)
-    expected[:32, 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testFillArrayLargeWithSpecifiedValue(self):
-    fill_value = 8
-    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
-    actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    ff._fill_array(actual, a, fill_value)
-    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    expected[:8, ..., 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededSmall(self):
-    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[32, 36], dtype=np.int32).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a)
-    expected = np.ones(shape=[64, 36], dtype=np.int32)
-    expected[:32, 32:] = 0
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededLarge(self):
-    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a)
-    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    expected[:8, ..., 32:] = 0
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededSmallWithSpecifiedValue(self):
-    fill_value = 8
-    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[32, 36], dtype=np.int32).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a, fill_value)
-    expected = np.ones(shape=[64, 36], dtype=np.int32)
-    expected[:32, 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededLargeWithSpecifiedValue(self):
-    fill_value = 8
-    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
-         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a, fill_value)
-    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
-    expected[:8, ..., 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededSmallWithSpecifiedNonNumericValue(self):
-    fill_value = False
-    a = (np.ones(shape=[32, 32], dtype=np.bool).tolist() +
-         np.ones(shape=[32, 36], dtype=np.bool).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a, fill_value)
-    expected = np.ones(shape=[64, 36], dtype=np.bool)
-    expected[:32, 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-  def testPadIfNeededLargeWithSpecifiedNonNumericValue(self):
-    fill_value = False
-    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.bool).tolist() +
-         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.bool).tolist())
-    a = list(map(np.array, a))
-    actual = ff._pad_if_needed(a, fill_value)
-    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.bool)
-    expected[:8, ..., 32:] = fill_value
-    self.assertEqual(expected.tolist(), actual.tolist())
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
index afbcab596a..c940909def 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,169 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""feeding_queue_runner python module.
 
-"""A `QueueRunner` that takes a feed function as an argument."""
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import threading
+from tensorflow_estimator.python.estimator.inputs.queues import feeding_queue_runner
 
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import queue_runner as qr
+# Include attrs that start with single underscore.
+feeding_queue_runner.__all__ = [
+    s for s in dir(feeding_queue_runner) if not s.startswith('__')
+]
 
-
-class _FeedingQueueRunner(qr.QueueRunner):
-  """A queue runner that allows the feeding of values such as numpy arrays."""
-
-  def __init__(self, queue=None, enqueue_ops=None, close_op=None,
-               cancel_op=None, feed_fns=None,
-               queue_closed_exception_types=None):
-    """Initialize the queue runner.
-
-    For further documentation, see `queue_runner.py`. Note that
-    `FeedingQueueRunner` does not support construction from protobuffer nor
-    serialization to protobuffer.
-
-    Args:
-      queue: A `Queue`.
-      enqueue_ops: List of enqueue ops to run in threads later.
-      close_op: Op to close the queue. Pending enqueue ops are preserved.
-      cancel_op: Op to close the queue and cancel pending enqueue ops.
-      feed_fns: a list of functions that return a dictionary mapping fed
-        `Tensor`s to values. Must be the same length as `enqueue_ops`.
-      queue_closed_exception_types: Optional tuple of Exception types that
-        indicate that the queue has been closed when raised during an enqueue
-        operation.  Defaults to
-        `(tf.errors.OutOfRangeError, tf.errors.CancelledError)`.
-
-    Raises:
-      ValueError: `feed_fns` is not `None` and has different length than
-        `enqueue_ops`.
-    """
-    if queue_closed_exception_types is None:
-      queue_closed_exception_types = (
-          errors.OutOfRangeError, errors.CancelledError)
-    super(_FeedingQueueRunner, self).__init__(
-        queue, enqueue_ops, close_op,
-        cancel_op, queue_closed_exception_types=queue_closed_exception_types)
-    if feed_fns is None:
-      self._feed_fns = [None for _ in enqueue_ops]
-    else:
-      if len(feed_fns) != len(enqueue_ops):
-        raise ValueError(
-            "If feed_fns is not None, it must have the same length as "
-            "enqueue_ops.")
-      self._feed_fns = feed_fns
-
-  # pylint: disable=broad-except
-  def _run(self, sess, enqueue_op, feed_fn, coord=None):
-    """Execute the enqueue op in a loop, close the queue in case of error.
-
-    Args:
-      sess: A `Session`.
-      enqueue_op: The `Operation` to run.
-      feed_fn: the feed function to pass to `sess.run`.
-      coord: Optional `Coordinator` object for reporting errors and checking
-        for stop conditions.
-
-    """
-    # TODO(jamieas): Reduce code duplication with `QueueRunner`.
-    if coord:
-      coord.register_thread(threading.current_thread())
-    decremented = False
-    try:
-      while True:
-        if coord and coord.should_stop():
-          break
-        try:
-          feed_dict = None if feed_fn is None else feed_fn()
-          sess.run(enqueue_op, feed_dict=feed_dict)
-        except (errors.OutOfRangeError, errors.CancelledError):
-          # This exception indicates that a queue was closed.
-          with self._lock:
-            self._runs_per_session[sess] -= 1
-            decremented = True
-            if self._runs_per_session[sess] == 0:
-              try:
-                sess.run(self._close_op)
-              except Exception as e:
-                # Intentionally ignore errors from close_op.
-                logging.vlog(1, "Ignored exception: %s", str(e))
-            return
-    except Exception as e:
-      # This catches all other exceptions.
-      if coord:
-        coord.request_stop(e)
-      else:
-        logging.error("Exception in QueueRunner: %s", str(e))
-        with self._lock:
-          self._exceptions_raised.append(e)
-        raise
-    finally:
-      # Make sure we account for all terminations: normal or errors.
-      if not decremented:
-        with self._lock:
-          self._runs_per_session[sess] -= 1
-
-  def create_threads(self, sess, coord=None, daemon=False, start=False):
-    """Create threads to run the enqueue ops for the given session.
-
-    This method requires a session in which the graph was launched.  It creates
-    a list of threads, optionally starting them.  There is one thread for each
-    op passed in `enqueue_ops`.
-
-    The `coord` argument is an optional coordinator, that the threads will use
-    to terminate together and report exceptions.  If a coordinator is given,
-    this method starts an additional thread to close the queue when the
-    coordinator requests a stop.
-
-    If previously created threads for the given session are still running, no
-    new threads will be created.
-
-    Args:
-      sess: A `Session`.
-      coord: Optional `Coordinator` object for reporting errors and checking
-        stop conditions.
-      daemon: Boolean.  If `True` make the threads daemon threads.
-      start: Boolean.  If `True` starts the threads.  If `False` the
-        caller must call the `start()` method of the returned threads.
-
-    Returns:
-      A list of threads.
-    """
-    with self._lock:
-      try:
-        if self._runs_per_session[sess] > 0:
-          # Already started: no new threads to return.
-          return []
-      except KeyError:
-        # We haven't seen this session yet.
-        pass
-      self._runs_per_session[sess] = len(self._enqueue_ops)
-      self._exceptions_raised = []
-
-    ret_threads = [threading.Thread(target=self._run,
-                                    args=(sess, op, feed_fn, coord))
-                   for op, feed_fn in zip(self._enqueue_ops, self._feed_fns)]
-    if coord:
-      ret_threads.append(threading.Thread(target=self._close_on_stop,
-                                          args=(sess, self._cancel_op, coord)))
-    for t in ret_threads:
-      if daemon:
-        t.daemon = True
-      if start:
-        t.start()
-    return ret_threads
-
-  def _init_from_proto(self, queue_runner_def):
-    raise NotImplementedError(
-        "{} does not support initialization from proto.".format(type(
-            self).__name__))
-
-  def to_proto(self):
-    raise NotImplementedError(
-        "{} does not support serialization to proto.".format(type(
-            self).__name__))
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.inputs.queues.feeding_queue_runner import *
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
deleted file mode 100644
index 6292eb7da1..0000000000
--- a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests `FeedingQueueRunner` using arrays and `DataFrames`."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.estimator.inputs.queues import feeding_functions as ff
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import queue_runner_impl
-
-try:
-  # pylint: disable=g-import-not-at-top
-  import pandas as pd
-  HAS_PANDAS = True
-except IOError:
-  # Pandas writes a temporary file during import. If it fails, don't use pandas.
-  HAS_PANDAS = False
-except ImportError:
-  HAS_PANDAS = False
-
-
-def get_rows(array, row_indices):
-  rows = [array[i] for i in row_indices]
-  return np.vstack(rows)
-
-
-class FeedingQueueRunnerTestCase(test.TestCase):
-  """Tests for `FeedingQueueRunner`."""
-
-  def testArrayFeeding(self):
-    with ops.Graph().as_default():
-      array = np.arange(32).reshape([16, 2])
-      q = ff._enqueue_data(array, capacity=100)
-      batch_size = 3
-      dq_op = q.dequeue_many(batch_size)
-      with session.Session() as sess:
-        coord = coordinator.Coordinator()
-        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-        for i in range(100):
-          indices = [
-              j % array.shape[0]
-              for j in range(batch_size * i, batch_size * (i + 1))
-          ]
-          expected_dq = get_rows(array, indices)
-          dq = sess.run(dq_op)
-          np.testing.assert_array_equal(indices, dq[0])
-          np.testing.assert_array_equal(expected_dq, dq[1])
-        coord.request_stop()
-        coord.join(threads)
-
-  def testArrayFeedingMultiThread(self):
-    with ops.Graph().as_default():
-      array = np.arange(256).reshape([128, 2])
-      q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True)
-      batch_size = 3
-      dq_op = q.dequeue_many(batch_size)
-      with session.Session() as sess:
-        coord = coordinator.Coordinator()
-        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-        for _ in range(100):
-          dq = sess.run(dq_op)
-          indices = dq[0]
-          expected_dq = get_rows(array, indices)
-          np.testing.assert_array_equal(expected_dq, dq[1])
-        coord.request_stop()
-        coord.join(threads)
-
-  def testPandasFeeding(self):
-    if not HAS_PANDAS:
-      return
-    with ops.Graph().as_default():
-      array1 = np.arange(32)
-      array2 = np.arange(32, 64)
-      df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96))
-      q = ff._enqueue_data(df, capacity=100)
-      batch_size = 5
-      dq_op = q.dequeue_many(5)
-      with session.Session() as sess:
-        coord = coordinator.Coordinator()
-        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-        for i in range(100):
-          indices = [
-              j % array1.shape[0]
-              for j in range(batch_size * i, batch_size * (i + 1))
-          ]
-          expected_df_indices = df.index[indices]
-          expected_rows = df.iloc[indices]
-          dq = sess.run(dq_op)
-          np.testing.assert_array_equal(expected_df_indices, dq[0])
-          for col_num, col in enumerate(df.columns):
-            np.testing.assert_array_equal(expected_rows[col].values,
-                                          dq[col_num + 1])
-        coord.request_stop()
-        coord.join(threads)
-
-  def testPandasFeedingMultiThread(self):
-    if not HAS_PANDAS:
-      return
-    with ops.Graph().as_default():
-      array1 = np.arange(128, 256)
-      array2 = 2 * array1
-      df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
-      q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
-      batch_size = 5
-      dq_op = q.dequeue_many(batch_size)
-      with session.Session() as sess:
-        coord = coordinator.Coordinator()
-        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-        for _ in range(100):
-          dq = sess.run(dq_op)
-          indices = dq[0]
-          expected_rows = df.iloc[indices]
-          for col_num, col in enumerate(df.columns):
-            np.testing.assert_array_equal(expected_rows[col].values,
-                                          dq[col_num + 1])
-        coord.request_stop()
-        coord.join(threads)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 5d5ed81fbb..07e57873d5 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -1,4 +1,4 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,489 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# pylint: disable=protected-access
-"""Home of estimator related functions.
+"""keras python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
 """
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import re
-import six
-
-from tensorflow.python.client import session
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import export as export_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras import backend as K
-from tensorflow.python.keras import metrics
-from tensorflow.python.keras import models
-from tensorflow.python.keras import optimizers
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import metrics as metrics_module
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import distribution_strategy_context
-from tensorflow.python.training import optimizer as tf_optimizer_module
-from tensorflow.python.training import saver as saver_lib
-from tensorflow.python.training import training_util
-
-
-_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-
-
-def _cast_tensor_to_floatx(x):
-  """Cast tensor to keras's floatx dtype if it is not already the same dtype."""
-  if x.dtype == K.floatx():
-    return x
-  else:
-    return math_ops.cast(x, K.floatx())
-
-
-def _convert_tensor(x):
-  """Create or cast tensor if needed."""
-  if not tensor_util.is_tensor(x):
-    # x is a numpy array
-    x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
-  if check_ops.is_numeric_tensor(x):
-    # is_numeric_tensor returns False if provided with a numpy array
-    x = _cast_tensor_to_floatx(x)
-  return x
-
-
-def _any_weight_initialized(keras_model):
-  """Check if any weights has been initialized in the Keras model.
-
-  Args:
-    keras_model: An instance of compiled keras model.
-
-  Returns:
-    boolean, True if at least one weight has been initialized, else False.
-    Currently keras initialize all weights at get_session().
-  """
-  if keras_model is None:
-    return False
-  for layer in keras_model.layers:
-    for weight in layer.weights:
-      if hasattr(weight, '_keras_initialized'):
-        return True
-  return False
-
-
-def _convert_estimator_io_to_keras(keras_model, features, labels):
-  """Converts estimator features and labels to keras input and target tensors.
-
-  Args:
-    keras_model: a compiled `tf.keras.Model` instance, used to determine the
-      order of the returned lists.
-    features: Dict of tensors or `None`.
-    labels: Dict of tensors, a single tensor, or `None`.
-
-  Returns:
-    Tuple of (
-      list of input tensors or `None`,
-      list of target tensors or `None`)
-    The order of tensors is determined by the order set in the keras model.
-  """
-
-  def _to_ordered_tensor_list(obj, key_order, obj_name, order_name):
-    """Convert obj to an ordered list of tensors.
-
-    Args:
-      obj: List, dict, or single tensor. May be `None`.
-      key_order: List of strings with the order to return (used if obj is a
-        dict).
-      obj_name: String name of object (e.g. "features" or "labels")
-      order_name: String name of the key order (e.g. "inputs" or "outputs")
-
-    Returns:
-      List of tensors, or `None`
-
-    Raises:
-      KeyError: If obj has invalid keys.
-    """
-    if obj is None:
-      return None
-    elif isinstance(obj, (list, tuple)):
-      return [_convert_tensor(x) for x in obj]
-    elif isinstance(obj, dict):
-      # Ensure that the obj keys and keys in key_order are exactly the same.
-      different_keys = set(obj.keys()) ^ set(key_order)
-
-      if different_keys:
-        raise KeyError(
-            'The dictionary passed into {obj_name} does not have the expected '
-            '{order_name} keys defined in the keras model.'
-            '\n\tExpected keys: {order_keys}'
-            '\n\t{obj_name} keys: {obj_keys}'
-            '\n\tDifference: {different_keys}'.format(
-                order_name=order_name, order_keys=set(key_order),
-                obj_name=obj_name, obj_keys=set(obj.keys()),
-                different_keys=different_keys))
-
-      return [_convert_tensor(obj[key]) for key in key_order]
-    else:  # Assume obj is a tensor.
-      return [_convert_tensor(obj)]
-
-  input_names = None
-  output_names = None
-  if isinstance(features, dict):
-    input_names = (
-        keras_model.input_names if keras_model._is_graph_network else
-        ['input_%d' % i for i in range(1, len(features) + 1)])
-  if isinstance(labels, dict):
-    output_names = (
-        keras_model.output_names if keras_model._is_graph_network else
-        ['output_%d' % i for i in range(1, len(labels) + 1)])
-
-  input_tensors = _to_ordered_tensor_list(
-      features, input_names, 'features', 'inputs')
-  target_tensors = _to_ordered_tensor_list(
-      labels, output_names, 'labels', 'outputs')
-
-  return input_tensors, target_tensors
-
-
-def _clone_and_build_model(mode,
-                           keras_model,
-                           custom_objects,
-                           features=None,
-                           labels=None):
-  """Clone and build the given keras_model.
-
-  Args:
-    mode: training mode.
-    keras_model: an instance of compiled keras model.
-    custom_objects: Dictionary for custom objects.
-    features: Dict of tensors.
-    labels: Dict of tensors, or single tensor instance.
-
-  Returns:
-    The newly built model.
-  """
-  # Set to True during training, False for inference or testing.
-  K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)
-  input_tensors, target_tensors = _convert_estimator_io_to_keras(
-      keras_model, features, labels)
-
-  compile_clone = (mode != model_fn_lib.ModeKeys.PREDICT)
-
-  global_step = None
-  if compile_clone:
-    # Set iterations to the global step created by tf.train.create_global_step()
-    # which is automatically run in the estimator framework.
-    global_step = training_util.get_or_create_global_step()
-    K.track_variable(global_step)
-
-  clone = models.clone_and_build_model(
-      keras_model, input_tensors, target_tensors, custom_objects,
-      compile_clone=compile_clone,
-      in_place_reset=(not keras_model._is_graph_network),
-      optimizer_iterations=global_step)
-
-  return clone
-
-
-def _convert_keras_metrics_to_estimator(model):
-  """Convert metrics from a Keras model to ops used by the Estimator framework.
-
-  Args:
-    model: A `tf.keras.Model` object.
-
-  Returns:
-    Dictionary mapping metric names to tuples of (value, update) ops. May return
-    `None` if the model does not contain any metrics.
-  """
-  if not getattr(model, 'metrics', None):
-    return None
-
-  eval_metric_ops = {}
-
-  def get_metric_name(metric):
-    if isinstance(metric, metrics.Metric):
-      return metric.name
-    if callable(metric):
-      return metric.__name__
-    assert isinstance(metric, six.string_types)
-    return metric
-
-  # When each metric maps to an output
-  if isinstance(model.metrics, dict):
-    for i, output_name in enumerate(model.metrics.keys()):
-      # `metric` is the user given metric value in `compile`. This can be
-      # metric name (`acc`), metric function (binary_accuracy) or a metric
-      # object (BinaryAccuracy()).
-      metric = model.metrics[output_name]
-      metric_name = get_metric_name(metric)
-      # When some outputs use the same metric
-      if list(model.metrics.values()).count(metric_name) > 1:
-        metric_name += '_' + output_name
-      if isinstance(metric, metrics.Metric):
-        eval_metric_ops[metric_name] = metric
-      else:
-        eval_metric_ops[metric_name] = metrics_module.mean(
-            model.metrics_tensors[i - len(model.metrics)])
-  else:
-    for i, metric in enumerate(model.metrics):
-      metric_name = get_metric_name(metric)
-      if isinstance(metric, metrics.Metric):
-        eval_metric_ops[metric_name] = metric
-      else:
-        eval_metric_ops[metric_name] = metrics_module.mean(
-            model.metrics_tensors[i])
-  return eval_metric_ops
-
-
-def _create_keras_model_fn(keras_model, custom_objects=None):
-  """Creates model_fn for keras Estimator.
-
-  Args:
-    keras_model: an instance of compiled keras model.
-    custom_objects: Dictionary for custom objects.
-
-  Returns:
-    The model_fn for a keras Estimator.
-  """
-
-  def model_fn(features, labels, mode):
-    """model_fn for keras Estimator."""
-    # Raise an error when users use DistributionStrategy with native Keras
-    # optimizers. Currently we only support native TensorFlow optimizers.
-    if distribution_strategy_context.has_distribution_strategy() and \
-        not isinstance(keras_model.optimizer,
-                       (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
-      raise ValueError('Only TensorFlow native optimizers are supported with '
-                       'DistributionStrategy.')
-
-    model = _clone_and_build_model(mode, keras_model, custom_objects, features,
-                                   labels)
-    model_output_names = []
-    # We need to make sure that the output names of the last layer in the model
-    # is the same for each of the cloned models. This is required for mirrored
-    # strategy when we call regroup.
-    if distribution_strategy_context.has_distribution_strategy():
-      for name in model.output_names:
-        name = re.compile(r'_\d$').sub('', name)
-        model_output_names.append(name)
-    else:
-      model_output_names = model.output_names
-
-    # Get inputs to EstimatorSpec
-    predictions = dict(zip(model_output_names, model.outputs))
-
-    loss = None
-    train_op = None
-    eval_metric_ops = None
-
-    # Set loss and metric only during train and evaluate.
-    if mode is not model_fn_lib.ModeKeys.PREDICT:
-      if mode is model_fn_lib.ModeKeys.TRAIN:
-        model._make_train_function()  # pylint: disable=protected-access
-      else:
-        model._make_test_function()  # pylint: disable=protected-access
-      loss = model.total_loss
-
-      eval_metric_ops = _convert_keras_metrics_to_estimator(model)
-
-    # Set train_op only during train.
-    if mode is model_fn_lib.ModeKeys.TRAIN:
-      train_op = model.train_function.updates_op
-
-    if not model._is_graph_network:
-      # Reset model state to original state,
-      # to avoid `model_fn` being destructive for the initial model argument.
-      models.in_place_subclassed_model_state_restoration(keras_model)
-    return model_fn_lib.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=eval_metric_ops,
-        export_outputs={
-            _DEFAULT_SERVING_KEY:
-            export_lib.export_output.PredictOutput(predictions)
-        })
-
-  return model_fn
-
-
-def _save_first_checkpoint(keras_model, custom_objects, config):
-  """Save first checkpoint for the keras Estimator.
-
-  Args:
-    keras_model: an instance of compiled keras model.
-    custom_objects: Dictionary for custom objects.
-    config: Estimator config.
-
-  Returns:
-    The path where keras model checkpoint is saved.
-  """
-  # save checkpoint into subdirectory to allow warm start
-  keras_model_dir = os.path.join(config.model_dir, 'keras')
-  # Load weights and save to checkpoint if there is no checkpoint
-  latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
-  if not latest_path:
-    keras_weights = None
-    if _any_weight_initialized(keras_model):
-      keras_weights = keras_model.get_weights()
-    if not gfile.IsDirectory(keras_model_dir):
-      gfile.MakeDirs(keras_model_dir)
-    with ops.Graph().as_default():
-      random_seed.set_random_seed(config.tf_random_seed)
-      training_util.create_global_step()
-      model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model,
-                                     custom_objects)
-      # save to checkpoint
-      with session.Session(config=config.session_config) as sess:
-        if keras_weights:
-          model.set_weights(keras_weights)
-        # Make update ops and initialize all variables.
-        if not model.train_function:
-          # pylint: disable=protected-access
-          model._make_train_function()
-          K._initialize_variables(sess)
-          # pylint: enable=protected-access
-        saver = saver_lib.Saver()
-        latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt')
-        saver.save(sess, latest_path)
-  return latest_path
-
-
-def _get_file_from_google_storage(keras_model_path, model_dir):
-  """Get file from google storage and download to local file.
-
-  Args:
-    keras_model_path: a google storage path for compiled keras model.
-    model_dir: the directory from estimator config.
-
-  Returns:
-    The path where keras model is saved.
-
-  Raises:
-    ValueError: if storage object name does not end with .h5.
-  """
-  try:
-    from google.cloud import storage  # pylint:disable=g-import-not-at-top
-  except ImportError:
-    raise TypeError('Could not save model to Google cloud storage; please '
-                    'install `google-cloud-storage` via '
-                    '`pip install google-cloud-storage`.')
-  storage_client = storage.Client()
-  path, blob_name = os.path.split(keras_model_path)
-  _, bucket_name = os.path.split(path)
-  keras_model_dir = os.path.join(model_dir, 'keras')
-  if not gfile.Exists(keras_model_dir):
-    gfile.MakeDirs(keras_model_dir)
-  file_name = os.path.join(keras_model_dir, 'keras_model.h5')
-  try:
-    blob = storage_client.get_bucket(bucket_name).blob(blob_name)
-    blob.download_to_filename(file_name)
-  except:
-    raise ValueError('Failed to download keras model, please check '
-                     'environment variable GOOGLE_APPLICATION_CREDENTIALS '
-                     'and model path storage.googleapis.com/{bucket}/{object}.')
-  logging.info('Saving model to {}'.format(file_name))
-  del storage_client
-  return file_name
-
-
-def model_to_estimator(keras_model=None,
-                       keras_model_path=None,
-                       custom_objects=None,
-                       model_dir=None,
-                       config=None):
-  """Constructs an `Estimator` instance from given keras model.
-
-  For usage example, please see:
-  [Creating estimators from Keras
-  Models](https://tensorflow.org/guide/estimators#model_to_estimator).
-
-  Args:
-    keras_model: A compiled Keras model object. This argument is mutually
-      exclusive with `keras_model_path`.
-    keras_model_path: Path to a compiled Keras model saved on disk, in HDF5
-      format, which can be generated with the `save()` method of a Keras model.
-      This argument is mutually exclusive with `keras_model`.
-    custom_objects: Dictionary for custom objects.
-    model_dir: Directory to save `Estimator` model parameters, graph, summary
-      files for TensorBoard, etc.
-    config: `RunConfig` to config `Estimator`.
-
-  Returns:
-    An Estimator from given keras model.
-
-  Raises:
-    ValueError: if neither keras_model nor keras_model_path was given.
-    ValueError: if both keras_model and keras_model_path was given.
-    ValueError: if the keras_model_path is a GCS URI.
-    ValueError: if keras_model has not been compiled.
-  """
-  if not (keras_model or keras_model_path):
-    raise ValueError(
-        'Either `keras_model` or `keras_model_path` needs to be provided.')
-  if keras_model and keras_model_path:
-    raise ValueError(
-        'Please specity either `keras_model` or `keras_model_path`, '
-        'but not both.')
-
-  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
-      config, model_dir)
-  if not keras_model:
-    if keras_model_path.startswith(
-        'gs://') or 'storage.googleapis.com' in keras_model_path:
-      keras_model_path = _get_file_from_google_storage(keras_model_path,
-                                                       config.model_dir)
-    logging.info('Loading models from %s', keras_model_path)
-    keras_model = models.load_model(keras_model_path)
-  else:
-    logging.info('Using the Keras model provided.')
-    keras_model = keras_model
-
-  if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer:
-    raise ValueError(
-        'The given keras model has not been compiled yet. '
-        'Please compile the model with `model.compile()` '
-        'before calling `model_to_estimator()`.')
-
-  keras_model_fn = _create_keras_model_fn(keras_model, custom_objects)
-  if _any_weight_initialized(keras_model):
-    # Warn if config passed to estimator tries to update GPUOptions. If a
-    # session has already been created, the GPUOptions passed to the first
-    # session sticks.
-    if config.session_config.HasField('gpu_options'):
-      logging.warning(
-          'The Keras backend session has already been set. '
-          'The _session_config passed to model_to_estimator will not be used.')
-  else:
-    # Pass the config into keras backend's default session.
-    sess = session.Session(config=config.session_config)
-    K.set_session(sess)
-
-  warm_start_path = None
-  if keras_model._is_graph_network:
-    warm_start_path = _save_first_checkpoint(keras_model, custom_objects,
-                                             config)
-  elif keras_model.built:
-    logging.warning('You are creating an Estimator from a Keras model manually '
-                    'subclassed from `Model`, that was already called on some '
-                    'inputs (and thus already had weights). We are currently '
-                    'unable to preserve the model\'s state (its weights) as '
-                    'part of the estimator in this case. Be warned that the '
-                    'estimator has been created using a freshly initialized '
-                    'version of your model.\n'
-                    'Note that this doesn\'t affect the state of the model '
-                    'instance you passed as `keras_model` argument.')
+from tensorflow_estimator.python.estimator import keras
 
-  estimator = estimator_lib.Estimator(keras_model_fn,
-                                      config=config,
-                                      warm_start_from=warm_start_path)
+# Include attrs that start with single underscore.
+keras.__all__ = [s for s in dir(keras) if not s.startswith('__')]
 
-  return estimator
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.keras import *
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
deleted file mode 100644
index 4e285fa25a..0000000000
--- a/tensorflow/python/estimator/keras_test.py
+++ /dev/null
@@ -1,805 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for training routines."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-from math import log10
-import os
-import tempfile
-
-import numpy as np
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import keras as keras_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.optimizers import SGD
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.parsing_ops import gen_parsing_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import rmsprop
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-
-
-try:
-  import h5py  # pylint:disable=g-import-not-at-top
-except ImportError:
-  h5py = None
-
-_RANDOM_SEED = 1337
-_TRAIN_SIZE = 200
-_INPUT_SIZE = (10,)
-_NUM_CLASS = 2
-
-_TMP_DIR = '/tmp'
-
-
-def simple_sequential_model():
-  model = keras.models.Sequential()
-  model.add(keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE))
-  model.add(keras.layers.Dropout(0.1))
-  model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax'))
-  return model
-
-
-def simple_functional_model(activation='relu'):
-  a = keras.layers.Input(shape=_INPUT_SIZE)
-  b = keras.layers.Dense(16, activation=activation)(a)
-  b = keras.layers.Dropout(0.1)(b)
-  b = keras.layers.Dense(_NUM_CLASS, activation='softmax')(b)
-  model = keras.models.Model(inputs=[a], outputs=[b])
-  return model
-
-
-def simple_subclassed_model():
-
-  class SimpleModel(keras.Model):
-
-    def __init__(self):
-      super(SimpleModel, self).__init__()
-      self.dense1 = keras.layers.Dense(16, activation='relu')
-      self.dp = keras.layers.Dropout(0.1)
-      self.dense2 = keras.layers.Dense(_NUM_CLASS, activation='softmax')
-
-    def call(self, inputs):
-      x = self.dense1(inputs)
-      x = self.dp(x)
-      return self.dense2(x)
-
-  return SimpleModel()
-
-
-def gen_input_fn(x, y=None, batch_size=128, num_epochs=1, shuffle=False):
-  def input_fn():
-    ds = dataset_ops.Dataset.from_tensor_slices((x, y) if y is not None else x)
-    if shuffle:
-      ds = ds.shuffle(1000)
-    return ds.repeat(num_epochs).batch(batch_size)
-  return input_fn
-
-
-def get_multi_inputs_multi_outputs_data():
-  (a_train, c_train), (a_test, c_test) = testing_utils.get_test_data(
-      train_samples=_TRAIN_SIZE,
-      test_samples=50,
-      input_shape=(16,),
-      num_classes=3,
-      random_seed=_RANDOM_SEED)
-  (b_train, d_train), (b_test, d_test) = testing_utils.get_test_data(
-      train_samples=_TRAIN_SIZE,
-      test_samples=50,
-      input_shape=(16,),
-      num_classes=2,
-      random_seed=_RANDOM_SEED)
-  (m_train, _), (m_test, _) = testing_utils.get_test_data(
-      train_samples=_TRAIN_SIZE,
-      test_samples=50,
-      input_shape=(8,),
-      num_classes=2,
-      random_seed=_RANDOM_SEED)
-
-  c_train = keras.utils.to_categorical(c_train)
-  c_test = keras.utils.to_categorical(c_test)
-  d_train = keras.utils.to_categorical(d_train)
-  d_test = keras.utils.to_categorical(d_test)
-
-  train_data = {
-      'input_a': a_train,
-      'input_b': b_train,
-      'input_m': m_train,
-      'output_c': c_train,
-      'output_d': d_train
-  }
-  test_data = {
-      'input_a': a_test,
-      'input_b': b_test,
-      'input_m': m_test,
-      'output_c': c_test,
-      'output_d': d_test
-  }
-
-  return (train_data, test_data)
-
-
-def get_resource_for_simple_model(model_type='sequential',
-                                  is_evaluate=False,):
-  if model_type == 'sequential':
-    model = simple_sequential_model()
-    model.build()
-  elif model_type == 'subclass':
-    model = simple_subclassed_model()
-  else:
-    assert model_type == 'functional'
-    model = simple_functional_model()
-
-  if model_type == 'subclass':
-    input_name = 'input_1'
-    output_name = 'output_1'
-  else:
-    input_name = model.input_names[0]
-    output_name = model.output_names[0]
-
-  np.random.seed(_RANDOM_SEED)
-  (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
-      train_samples=_TRAIN_SIZE,
-      test_samples=50,
-      input_shape=_INPUT_SIZE,
-      num_classes=_NUM_CLASS)
-  y_train = keras.utils.to_categorical(y_train)
-  y_test = keras.utils.to_categorical(y_test)
-
-  train_input_fn = gen_input_fn(
-      x=randomize_io_type(x_train, input_name),
-      y=randomize_io_type(y_train, output_name),
-      shuffle=False,
-      num_epochs=None,
-      batch_size=16)
-
-  evaluate_input_fn = gen_input_fn(
-      x=randomize_io_type(x_test, input_name),
-      y=randomize_io_type(y_test, output_name),
-      num_epochs=1, shuffle=False)
-
-  predict_input_fn = gen_input_fn(
-      x=randomize_io_type(x_test, input_name), num_epochs=1, shuffle=False)
-
-  inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn
-
-  return model, (x_train, y_train), (x_test,
-                                     y_test), train_input_fn, inference_input_fn
-
-
-def randomize_io_type(array, name):
-  switch = np.random.random()
-  if switch > 0.5:
-    return array
-  else:
-    return {name: array}
-
-
-def multi_inputs_multi_outputs_model():
-  input_a = keras.layers.Input(shape=(16,), name='input_a')
-  input_b = keras.layers.Input(shape=(16,), name='input_b')
-  input_m = keras.layers.Input(shape=(8,), dtype='string', name='input_m')
-  dense = keras.layers.Dense(8, name='dense_1')
-
-  interm_a = dense(input_a)
-  # Read m
-  interm_m = keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m)
-  interm_s = keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a])
-  interm_b = dense(input_b)
-  merged = keras.layers.concatenate([interm_s, interm_b], name='merge')
-  output_c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
-  output_d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
-  model = keras.models.Model(
-      inputs=[input_a, input_b, input_m], outputs=[output_c, output_d])
-  model.compile(
-      loss='categorical_crossentropy',
-      optimizer='rmsprop',
-      metrics={
-          'dense_2': 'categorical_accuracy',
-          'dense_3': 'categorical_accuracy'
-      })
-  return model
-
-
-class MyHook(session_run_hook.SessionRunHook):
-
-  def begin(self):
-    _ = variable_scope.get_variable('temp', [1])
-
-
-class TestKerasEstimator(test_util.TensorFlowTestCase):
-
-  def setUp(self):
-    self._base_dir = os.path.join(self.get_temp_dir(), 'keras_estimator_test')
-    gfile.MakeDirs(self._base_dir)
-    self._config = run_config_lib.RunConfig(
-        tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir)
-    super(TestKerasEstimator, self).setUp()
-
-  def tearDown(self):
-    # Make sure nothing is stuck in limbo.
-    writer_cache.FileWriterCache.clear()
-    if os.path.isdir(self._base_dir):
-      gfile.DeleteRecursively(self._base_dir)
-    super(TestKerasEstimator, self).tearDown()
-
-  def test_train(self):
-    for model_type in ['sequential', 'functional']:
-      keras_model, (_, _), (
-          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
-              model_type=model_type, is_evaluate=True)
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer='rmsprop',
-          metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-      with self.cached_session():
-        est_keras = keras_lib.model_to_estimator(
-            keras_model=keras_model, config=self._config)
-        before_eval_results = est_keras.evaluate(
-            input_fn=eval_input_fn, steps=1)
-        est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-      writer_cache.FileWriterCache.clear()
-      gfile.DeleteRecursively(self._config.model_dir)
-
-  # see b/109935364
-  @test_util.run_in_graph_and_eager_modes
-  def test_train_with_hooks(self):
-    for model_type in ['sequential', 'functional']:
-      keras_model, (_, _), (
-          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
-              model_type=model_type, is_evaluate=True)
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer=rmsprop.RMSPropOptimizer(1e-3),
-          metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-      my_hook = MyHook()
-      with self.cached_session():
-        est_keras = keras_lib.model_to_estimator(
-            keras_model=keras_model, config=self._config)
-        before_eval_results = est_keras.evaluate(
-            input_fn=eval_input_fn, steps=1)
-        est_keras.train(input_fn=train_input_fn, hooks=[my_hook],
-                        steps=_TRAIN_SIZE / 16)
-        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-      writer_cache.FileWriterCache.clear()
-      gfile.DeleteRecursively(self._config.model_dir)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_train_with_model_fit_and_hooks(self):
-    keras_model, (x_train, y_train), _, \
-      train_input_fn, eval_input_fn = get_resource_for_simple_model(
-          model_type='sequential', is_evaluate=True)
-
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-    my_hook = MyHook()
-    with self.cached_session():
-      keras_model.fit(x_train, y_train, epochs=1)
-
-      keras_est = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-      before_eval_results = keras_est.evaluate(input_fn=eval_input_fn)
-      keras_est.train(input_fn=train_input_fn, hooks=[my_hook],
-                      steps=_TRAIN_SIZE / 16)
-      after_eval_results = keras_est.evaluate(input_fn=eval_input_fn, steps=1)
-      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_train_with_tf_optimizer(self):
-    for model_type in ['sequential', 'functional']:
-      keras_model, (_, _), (
-          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
-              model_type=model_type, is_evaluate=True)
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer=rmsprop.RMSPropOptimizer(1e-3),
-          metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-      with self.cached_session():
-        est_keras = keras_lib.model_to_estimator(
-            keras_model=keras_model,
-            config=self._config)
-        before_eval_results = est_keras.evaluate(
-            input_fn=eval_input_fn, steps=1)
-        est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-      writer_cache.FileWriterCache.clear()
-      gfile.DeleteRecursively(self._config.model_dir)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_train_with_subclassed_model(self):
-    keras_model, (_, _), (
-        _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
-            model_type='subclass', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    with self.cached_session():
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-      before_eval_results = est_keras.evaluate(
-          input_fn=eval_input_fn, steps=1)
-      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-      after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-  def test_train_with_subclassed_model_with_existing_state(self):
-    keras_model, (_, _), (
-        _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
-            model_type='subclass', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    with self.cached_session():
-      # Create state
-      keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE),
-                                 np.random.random((10, _NUM_CLASS)))
-      original_preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE))
-
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-      before_eval_results = est_keras.evaluate(
-          input_fn=eval_input_fn, steps=1)
-      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-      after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
-
-      # Check that original model state was not altered
-      preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE))
-      self.assertAllClose(original_preds, preds, atol=1e-5)
-      # Check that the original model compilation did not break
-      keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE),
-                                 np.random.random((10, _NUM_CLASS)))
-
-  def test_evaluate(self):
-    keras_model, (x_train, y_train), (
-        x_test, y_test), _, eval_input_fn = get_resource_for_simple_model(
-            model_type='functional', is_evaluate=True)
-
-    with self.cached_session():
-      metrics = [
-          'binary_accuracy', 'binary_crossentropy', 'categorical_accuracy',
-          'categorical_crossentropy', 'cosine_proximity', 'hinge',
-          'kullback_leibler_divergence', 'mean_absolute_error',
-          'mean_absolute_percentage_error', 'mean_squared_error',
-          'mean_squared_logarithmic_error', 'poisson', 'squared_hinge',
-          'top_k_categorical_accuracy'
-      ]
-      keras_model.compile(
-          loss='categorical_crossentropy', optimizer='adam', metrics=metrics)
-      keras_model.fit(x_train, y_train, epochs=1)
-      keras_eval = keras_model.evaluate(x_test, y_test, batch_size=32)
-
-    with self.cached_session():
-      keras_est = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-      est_eval = keras_est.evaluate(input_fn=eval_input_fn)
-
-    metrics = ['loss'] + metrics
-
-    # Check loss and all metrics match between keras and estimator.
-    def shift(val):
-      if val == 0:
-        return 0
-      else:
-        return val / 10**int(log10(abs(val)))
-
-    for i, metric_name in enumerate(metrics):
-      self.assertAlmostEqual(
-          shift(est_eval[metric_name]),
-          shift(keras_eval[i]),
-          places=4,
-          msg='%s mismatch, keras model: %s, estimator: %s' %
-          (metric_name, est_eval[metric_name], keras_eval[i]))
-
-  def test_predict(self):
-    # Check that predict on a pretrained model yield the same result.
-    keras_model, (x_train, y_train), (
-        x_test, _), _, pred_input_fn = get_resource_for_simple_model(
-            model_type='sequential', is_evaluate=False)
-
-    with self.cached_session():
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer='adam',
-          metrics=['accuracy'])
-      keras_model.fit(x_train, y_train, epochs=1)
-      keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)]
-
-    with self.cached_session():
-      keras_est = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-      est_pred = [
-          np.argmax(y[keras_model.output_names[0]])
-          for y in keras_est.predict(input_fn=pred_input_fn)
-      ]
-    self.assertAllEqual(est_pred, keras_pred)
-
-  def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self):
-    train_data, test_data = get_multi_inputs_multi_outputs_data()
-
-    def train_input_fn():
-      input_dict = {
-          'input_a': train_data['input_a'],
-          'input_b': train_data['input_b'],
-          'input_m': train_data['input_m'].astype(np.str)
-      }
-      output_dict = {
-          'dense_2': train_data['output_c'],
-          'dense_3': train_data['output_d']
-      }
-      return input_dict, output_dict
-
-    def eval_input_fn():
-      input_dict = {
-          'input_a': test_data['input_a'],
-          'input_b': test_data['input_b'],
-          'input_m': test_data['input_m'].astype(np.str)
-      }
-      output_dict = {
-          'dense_2': test_data['output_c'],
-          'dense_3': test_data['output_d']
-      }
-      return input_dict, output_dict
-
-    def pred_input_fn():
-      input_dict = {
-          'input_a': test_data['input_a'],
-          'input_b': test_data['input_b'],
-          'input_m': test_data['input_m'].astype(np.str)
-      }
-      return input_dict
-
-    self.do_test_multi_inputs_multi_outputs_with_input_fn(
-        train_input_fn, eval_input_fn, pred_input_fn)
-
-  def test_multi_inputs_multi_outputs_with_input_fn_as_list(self):
-    train_data, test_data = get_multi_inputs_multi_outputs_data()
-
-    def train_input_fn():
-      input_list = [
-          train_data['input_a'], train_data['input_b'],
-          train_data['input_m'].astype(np.str)
-      ]
-      output_list = [train_data['output_c'], train_data['output_d']]
-      return input_list, output_list
-
-    def eval_input_fn():
-      input_list = [
-          test_data['input_a'], test_data['input_b'],
-          test_data['input_m'].astype(np.str)
-      ]
-      output_list = [test_data['output_c'], test_data['output_d']]
-      return input_list, output_list
-
-    def pred_input_fn():
-      input_list = [
-          test_data['input_a'], test_data['input_b'],
-          test_data['input_m'].astype(np.str)
-      ]
-      return input_list
-
-    self.do_test_multi_inputs_multi_outputs_with_input_fn(
-        train_input_fn, eval_input_fn, pred_input_fn)
-
-  def do_test_multi_inputs_multi_outputs_with_input_fn(
-      self, train_input_fn, eval_input_fn, pred_input_fn):
-    with self.cached_session():
-      model = multi_inputs_multi_outputs_model()
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=model, config=self._config)
-      baseline_eval_results = est_keras.evaluate(
-          input_fn=eval_input_fn, steps=1)
-      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
-      eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
-      self.assertLess(eval_results['loss'], baseline_eval_results['loss'])
-      est_keras.predict(input_fn=pred_input_fn)
-
-  def test_init_from_file(self):
-    if h5py is None:
-      return  # Skip test if models cannot be saved.
-
-    keras_model, (x_train, y_train), (
-        x_test, _), _, pred_input_fn = get_resource_for_simple_model(
-            model_type='functional', is_evaluate=False)
-
-    with self.cached_session():
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer='rmsprop',
-          metrics=['categorical_accuracy'])
-      keras_model.fit(x_train, y_train, epochs=1)
-      keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)]
-      fname = os.path.join(self._base_dir, 'keras_model.h5')
-      keras.models.save_model(keras_model, fname)
-
-    with self.cached_session():
-      keras_est = keras_lib.model_to_estimator(
-          keras_model_path=fname, config=self._config)
-      est_pred = [
-          np.argmax(y[keras_model.output_names[0]])
-          for y in keras_est.predict(input_fn=pred_input_fn)
-      ]
-    self.assertAllEqual(est_pred, keras_pred)
-
-  def test_keras_model_init_error(self):
-    with self.assertRaisesRegexp(ValueError, 'Either'):
-      keras_lib.model_to_estimator()
-
-    with self.cached_session():
-      keras_model = simple_sequential_model()
-      with self.assertRaisesRegexp(ValueError, 'not both'):
-        keras_lib.model_to_estimator(
-            keras_model=keras_model,
-            keras_model_path=tempfile.mkdtemp(dir=self._base_dir))
-
-    with self.cached_session():
-      keras_model = simple_sequential_model()
-      with self.assertRaisesRegexp(ValueError, 'compiled'):
-        keras_lib.model_to_estimator(keras_model=keras_model)
-
-  def test_invalid_ionames_error(self):
-    (x_train, y_train), (_, _) = testing_utils.get_test_data(
-        train_samples=_TRAIN_SIZE,
-        test_samples=100,
-        input_shape=(10,),
-        num_classes=2)
-    y_train = keras.utils.to_categorical(y_train)
-
-    def invald_input_name_input_fn():
-      input_dict = {'invalid_input_name': x_train}
-      return input_dict, y_train
-
-    def invald_output_name_input_fn():
-      input_dict = {'input_1': x_train}
-      output_dict = {'invalid_output_name': y_train}
-      return input_dict, output_dict
-    model = simple_functional_model()
-    model.compile(
-        loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
-    with self.cached_session():
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=model, config=self._config)
-    with self.cached_session():
-      with self.assertRaisesRegexp(KeyError,
-                                   'Difference: .*invalid_input_name'):
-        est_keras.train(input_fn=invald_input_name_input_fn, steps=100)
-
-      with self.assertRaisesRegexp(KeyError,
-                                   'Difference: .*invalid_output_name'):
-        est_keras.train(input_fn=invald_output_name_input_fn, steps=100)
-
-  def test_custom_objects(self):
-
-    def relu6(x):
-      return keras.backend.relu(x, max_value=6)
-
-    keras_model = simple_functional_model(activation=relu6)
-    keras_model.compile(loss='categorical_crossentropy', optimizer='adam')
-    custom_objects = {
-        'relu6': relu6
-    }
-
-    (x_train, y_train), _ = testing_utils.get_test_data(
-        train_samples=_TRAIN_SIZE,
-        test_samples=50,
-        input_shape=(10,),
-        num_classes=2)
-    y_train = keras.utils.to_categorical(y_train, 2)
-    input_name = keras_model.input_names[0]
-    output_name = keras_model.output_names[0]
-    train_input_fn = gen_input_fn(
-        x=randomize_io_type(x_train, input_name),
-        y=randomize_io_type(y_train, output_name),
-        shuffle=False,
-        num_epochs=None,
-        batch_size=16)
-    with self.assertRaisesRegexp(ValueError, 'relu6'):
-      with self.cached_session():
-        est = keras_lib.model_to_estimator(
-            keras_model=keras_model,
-            model_dir=tempfile.mkdtemp(dir=self._base_dir))
-        est.train(input_fn=train_input_fn, steps=1)
-
-    with self.cached_session():
-      est = keras_lib.model_to_estimator(
-          keras_model=keras_model,
-          model_dir=tempfile.mkdtemp(dir=self._base_dir),
-          custom_objects=custom_objects)
-      est.train(input_fn=train_input_fn, steps=1)
-
-  def test_tf_config(self):
-    keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer='rmsprop',
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    tf_config = json.dumps({
-        'cluster': {
-            run_config_lib.TaskType.PS: ['localhost:1234'],
-            run_config_lib.TaskType.WORKER: ['localhost:1236'],
-            run_config_lib.TaskType.MASTER: ['localhost:1238']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        }
-    })
-    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
-      with self.cached_session():
-        keras_lib.model_to_estimator(
-            keras_model=keras_model,
-            model_dir=tempfile.mkdtemp(dir=self._base_dir))
-
-  def test_gpu_config(self):
-    with ops.Graph().as_default():
-      keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer='rmsprop',
-          metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-      gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3)
-      sess_config = config_pb2.ConfigProto(gpu_options=gpu_options)
-      self._config._session_config = sess_config
-      with self.cached_session():
-        keras_lib.model_to_estimator(
-            keras_model=keras_model, config=self._config)
-        self.assertEqual(
-            keras.backend.get_session()
-            ._config.gpu_options.per_process_gpu_memory_fraction,
-            gpu_options.per_process_gpu_memory_fraction)
-
-  def test_with_empty_config(self):
-    keras_model, _, _, _, _ = get_resource_for_simple_model(
-        model_type='sequential', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer='rmsprop',
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    with self.cached_session():
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=keras_model, model_dir=self._base_dir,
-          config=run_config_lib.RunConfig())
-      self.assertEqual(run_config_lib.get_default_session_config(),
-                       est_keras._session_config)
-      self.assertEqual(est_keras._session_config,
-                       est_keras._config.session_config)
-      self.assertEqual(self._base_dir, est_keras._config.model_dir)
-      self.assertEqual(self._base_dir, est_keras._model_dir)
-
-    with self.cached_session():
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=keras_model, model_dir=self._base_dir,
-          config=None)
-      self.assertEqual(run_config_lib.get_default_session_config(),
-                       est_keras._session_config)
-      self.assertEqual(est_keras._session_config,
-                       est_keras._config.session_config)
-      self.assertEqual(self._base_dir, est_keras._config.model_dir)
-      self.assertEqual(self._base_dir, est_keras._model_dir)
-
-  def test_with_empty_config_and_empty_model_dir(self):
-    keras_model, _, _, _, _ = get_resource_for_simple_model(
-        model_type='sequential', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer='rmsprop',
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    with self.cached_session():
-      with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
-        est_keras = keras_lib.model_to_estimator(
-            keras_model=keras_model,
-            config=run_config_lib.RunConfig())
-        self.assertEqual(est_keras._model_dir, _TMP_DIR)
-
-  def test_with_conflicting_model_dir_and_config(self):
-    keras_model, _, _, _, _ = get_resource_for_simple_model(
-        model_type='sequential', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer='rmsprop',
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-
-    with self.cached_session():
-      with self.assertRaisesRegexp(ValueError, '`model_dir` are set both in '
-                                   'constructor and `RunConfig`'):
-        keras_lib.model_to_estimator(
-            keras_model=keras_model, model_dir=self._base_dir,
-            config=run_config_lib.RunConfig(model_dir=_TMP_DIR))
-
-  def test_pretrained_weights(self):
-    keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=rmsprop.RMSPropOptimizer(1e-3),
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-    with self.cached_session():
-      keras_model.train_on_batch(
-          np.random.random((10,) + _INPUT_SIZE),
-          np.random.random((10, _NUM_CLASS)))
-      weights = keras_model.get_weights()
-      keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
-      keras_model.set_weights(weights)
-      keras_model.compile(
-          loss='categorical_crossentropy',
-          optimizer=SGD(lr=0.0001, momentum=0.9),
-          metrics=['mse', keras.metrics.CategoricalAccuracy()])
-      keras_lib.model_to_estimator(
-          keras_model=keras_model, config=self._config)
-
-  def assert_increasing_global_step(self, optimizer):
-    keras_model, _, _, train_input_fn, _ = get_resource_for_simple_model(
-        model_type='sequential', is_evaluate=True)
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=optimizer,
-        metrics=['mse', keras.metrics.CategoricalAccuracy()])
-    with self.cached_session() as sess:
-      keras_model_fn = keras_lib._create_keras_model_fn(keras_model)
-      global_step = training_util.create_global_step()
-      features, labels = train_input_fn().make_one_shot_iterator().get_next()
-      spec = keras_model_fn(features, labels, mode=model_fn_lib.ModeKeys.TRAIN)
-
-      sess.run(variables.global_variables_initializer())
-      sess.run(variables.local_variables_initializer())
-
-      self.assertEqual(global_step.eval(), 0)  # Sanity check
-      sess.run(spec.train_op)
-      self.assertEqual(global_step.eval(), 1)
-
-  def test_model_fn_increments_global_step_tf_optimizer(self):
-    self.assert_increasing_global_step(rmsprop.RMSPropOptimizer(1e-3))
-
-  def test_model_fn_increments_global_step_keras_optimizer(self):
-    self.assert_increasing_global_step('rmsprop')
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 824789467d..3bb3d5785d 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,509 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""model_fn python module.
 
-"""Classes and methods related to model_fn."""
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
+from tensorflow_estimator.python.estimator import model_fn
 
-import six
+# Include attrs that start with single underscore.
+model_fn.__all__ = [s for s in dir(model_fn) if not s.startswith('__')]
 
-from tensorflow.python.estimator.export import export_output as export_output_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras.metrics import Metric
-from tensorflow.python.ops import array_ops
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import tag_constants
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import estimator_export
-
-
-@estimator_export('estimator.ModeKeys')
-class ModeKeys(object):
-  """Standard names for model modes.
-
-  The following standard keys are defined:
-
-  * `TRAIN`: training mode.
-  * `EVAL`: evaluation mode.
-  * `PREDICT`: inference mode.
-  """
-
-  TRAIN = 'train'
-  EVAL = 'eval'
-  PREDICT = 'infer'
-
-
-LOSS_METRIC_KEY = 'loss'
-AVERAGE_LOSS_METRIC_KEY = 'average_loss'
-
-# Mapping of the modes to appropriate tag_constants that are used for saving.
-EXPORT_TAG_MAP = {
-    ModeKeys.PREDICT: [tag_constants.SERVING],
-    ModeKeys.TRAIN: [tag_constants.TRAINING],
-    ModeKeys.EVAL: [tag_constants.EVAL],
-}
-
-
-@estimator_export('estimator.EstimatorSpec')
-class EstimatorSpec(
-    collections.namedtuple('EstimatorSpec', [
-        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
-        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
-        'evaluation_hooks', 'prediction_hooks'
-    ])):
-  """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
-
-  `EstimatorSpec` fully defines the model to be run by an `Estimator`.
-  """
-
-  def __new__(cls,
-              mode,
-              predictions=None,
-              loss=None,
-              train_op=None,
-              eval_metric_ops=None,
-              export_outputs=None,
-              training_chief_hooks=None,
-              training_hooks=None,
-              scaffold=None,
-              evaluation_hooks=None,
-              prediction_hooks=None):
-    """Creates a validated `EstimatorSpec` instance.
-
-    Depending on the value of `mode`, different arguments are required. Namely
-
-    * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`.
-    * For `mode == ModeKeys.EVAL`: required field is `loss`.
-    * For `mode == ModeKeys.PREDICT`: required fields are `predictions`.
-
-    model_fn can populate all arguments independent of mode. In this case, some
-    arguments will be ignored by an `Estimator`. E.g. `train_op` will be
-    ignored in eval and infer modes. Example:
-
-    ```python
-    def my_model_fn(features, labels, mode):
-      predictions = ...
-      loss = ...
-      train_op = ...
-      return tf.estimator.EstimatorSpec(
-          mode=mode,
-          predictions=predictions,
-          loss=loss,
-          train_op=train_op)
-    ```
-
-    Alternatively, model_fn can just populate the arguments appropriate to the
-    given mode. Example:
-
-    ```python
-    def my_model_fn(features, labels, mode):
-      if (mode == tf.estimator.ModeKeys.TRAIN or
-          mode == tf.estimator.ModeKeys.EVAL):
-        loss = ...
-      else:
-        loss = None
-      if mode == tf.estimator.ModeKeys.TRAIN:
-        train_op = ...
-      else:
-        train_op = None
-      if mode == tf.estimator.ModeKeys.PREDICT:
-        predictions = ...
-      else:
-        predictions = None
-
-      return tf.estimator.EstimatorSpec(
-          mode=mode,
-          predictions=predictions,
-          loss=loss,
-          train_op=train_op)
-    ```
-
-    Args:
-      mode: A `ModeKeys`. Specifies if this is training, evaluation or
-        prediction.
-      predictions: Predictions `Tensor` or dict of `Tensor`.
-      loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`.
-      train_op: Op for the training step.
-      eval_metric_ops: Dict of metric results keyed by name.
-        The values of the dict can be one of the following:
-        (1) instance of `Metric` class.
-        (2) Results of calling a metric function, namely a
-        `(metric_tensor, update_op)` tuple. `metric_tensor` should be
-        evaluated without any impact on state (typically is a pure computation
-        results based on variables.). For example, it should not trigger the
-        `update_op` or requires any input fetching.
-      export_outputs: Describes the output signatures to be exported to
-        `SavedModel` and used during serving.
-        A dict `{name: output}` where:
-        * name: An arbitrary name for this output.
-        * output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
-        Single-headed models only need to specify one entry in this dictionary.
-        Multi-headed models should specify one entry for each head, one of
-        which must be named using
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
-        If no entry is provided, a default `PredictOutput` mapping to
-        `predictions` will be created.
-      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run on the chief worker during training.
-      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on all workers during training.
-      scaffold: A `tf.train.Scaffold` object that can be used to set
-        initialization, saver, and more to be used in training.
-      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during evaluation.
-      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during predictions.
-
-    Returns:
-      A validated `EstimatorSpec` object.
-
-    Raises:
-      ValueError: If validation fails.
-      TypeError: If any of the arguments is not the expected type.
-    """
-    # Validate train_op.
-    if train_op is None:
-      if mode == ModeKeys.TRAIN:
-        raise ValueError('Missing train_op.')
-    else:
-      _check_is_tensor_or_operation(train_op, 'train_op')
-
-    # Validate loss.
-    if loss is None:
-      if mode in (ModeKeys.TRAIN, ModeKeys.EVAL):
-        raise ValueError('Missing loss.')
-    else:
-      loss = _check_is_tensor(loss, 'loss')
-      loss_shape = loss.get_shape()
-      if loss_shape.num_elements() not in (None, 1):
-        raise ValueError('Loss must be scalar, given: {}'.format(loss))
-      if not loss_shape.is_compatible_with(tensor_shape.scalar()):
-        loss = array_ops.reshape(loss, [])
-
-    # Validate predictions.
-    if predictions is None:
-      if mode == ModeKeys.PREDICT:
-        raise ValueError('Missing predictions.')
-      predictions = {}
-    else:
-      if isinstance(predictions, dict):
-        predictions = {
-            k: _check_is_tensor(v, 'predictions[{}]'.format(k))
-            for k, v in six.iteritems(predictions)
-        }
-      else:
-        predictions = _check_is_tensor(predictions, 'predictions')
-
-    # Validate eval_metric_ops.
-    if eval_metric_ops is None:
-      eval_metric_ops = {}
-    else:
-      if not isinstance(eval_metric_ops, dict):
-        raise TypeError(
-            'eval_metric_ops must be a dict, given: {}'.format(eval_metric_ops))
-      for key, value in six.iteritems(eval_metric_ops):
-        # TODO(psv): When we deprecate the old metrics, throw an error here if
-        # the value is not an instance of `Metric` class.
-        if isinstance(value, Metric):
-          if not value.updates:  # Check if metrics updates are available.
-            raise ValueError(
-                'Please call update_state(...) on the "{metric_name}" metric'
-                .format(metric_name=value.name))
-        else:
-          if not isinstance(value, tuple) or len(value) != 2:
-            raise TypeError(
-                'Values of eval_metric_ops must be (metric_value, update_op) '
-                'tuples, given: {} for key: {}'.format(value, key))
-          metric_value, metric_update = value
-          for metric_value_member in nest.flatten(metric_value):
-            # Allow (possibly nested) tuples for metric values, but require that
-            # each of them be Tensors or Operations.
-            _check_is_tensor_or_operation(metric_value_member,
-                                          'eval_metric_ops[{}]'.format(key))
-          _check_is_tensor_or_operation(metric_update,
-                                        'eval_metric_ops[{}]'.format(key))
-
-    # Validate the passed export outputs, or generate defaults.
-    if mode == ModeKeys.PREDICT:
-      export_outputs = _get_export_outputs(export_outputs, predictions)
-
-    # Validate that all tensors and ops are from the default graph.
-    default_graph = ops.get_default_graph()
-
-    # We enumerate possible error causes here to aid in debugging.
-    error_message_template = (
-        '{0} with "{1}" must be from the default graph. '
-        'Possible causes of this error include: \n\n'
-        '1) {0} was created outside the context of the default graph.'
-        '\n\n'
-        '2) The object passed through to EstimatorSpec was not created '
-        'in the most recent call to "model_fn".')
-
-    if isinstance(predictions, dict):
-      for key, value in six.iteritems(predictions):
-        if value.graph is not default_graph:
-          raise ValueError(error_message_template.format(
-              'prediction values',
-              '{0}: {1}'.format(key, value.name)))
-    elif predictions is not None:
-      # 'predictions' must be a single Tensor.
-      if predictions.graph is not default_graph:
-        raise ValueError(error_message_template.format(
-            'prediction values', predictions.name))
-
-    if loss is not None and loss.graph is not default_graph:
-      raise ValueError(error_message_template.format('loss', loss.name))
-    if train_op is not None and train_op.graph is not default_graph:
-      raise ValueError(error_message_template.format('train_op', train_op.name))
-    for key, value in list(six.iteritems(eval_metric_ops)):
-      if isinstance(value, Metric):
-        values_to_check = value.updates[:]
-        values_to_check.append(value.result())
-      else:
-        values_to_check = nest.flatten(value)
-      for val in values_to_check:
-        if val.graph is not default_graph:
-          raise ValueError(error_message_template.format(
-              'eval_metric_ops',
-              '{0}: {1}'.format(key, val.name)))
-
-    # Validate hooks.
-    training_chief_hooks = tuple(training_chief_hooks or [])
-    training_hooks = tuple(training_hooks or [])
-    evaluation_hooks = tuple(evaluation_hooks or [])
-    prediction_hooks = tuple(prediction_hooks or [])
-
-    for hook in (training_hooks + training_chief_hooks + evaluation_hooks +
-                 prediction_hooks):
-      if not isinstance(hook, session_run_hook.SessionRunHook):
-        raise TypeError(
-            'All hooks must be SessionRunHook instances, given: {}'.format(
-                hook))
-
-    # Add metric variables to the `LOCAL_VARIABLES` collection. Metric variables
-    # are by default not added to any collections. We are doing this here, so
-    # that metric variables get initialized.
-    local_vars = set(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
-    vars_to_add = set()
-    for key, value in six.iteritems(eval_metric_ops):
-      if isinstance(value, Metric):
-        vars_to_add.update(value.variables)
-        # Convert Metric instances to (value_tensor, update_op) tuple.
-        eval_metric_ops[key] = (value.result(), value.updates[0])
-    # Remove variables that are in the local variables collection already.
-    vars_to_add = vars_to_add.difference(local_vars)
-    for v in vars_to_add:
-      ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, v)
-
-    scaffold = scaffold or monitored_session.Scaffold()
-    # Validate scaffold.
-    if not isinstance(scaffold, monitored_session.Scaffold):
-      raise TypeError(
-          'scaffold must be tf.train.Scaffold. Given: {}'.format(scaffold))
-
-    return super(EstimatorSpec, cls).__new__(
-        cls,
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=eval_metric_ops,
-        export_outputs=export_outputs,
-        training_chief_hooks=training_chief_hooks,
-        training_hooks=training_hooks,
-        scaffold=scaffold,
-        evaluation_hooks=evaluation_hooks,
-        prediction_hooks=prediction_hooks)
-
-  def _replace(self, **kwds):
-    """Return a new EstimatorSpec replacing specified fields with new values."""
-    if 'mode' in kwds:
-      if self.mode != kwds['mode']:
-        raise ValueError('mode of EstimatorSpec cannot be changed.')
-    new_fields = map(kwds.pop, self._fields, list(self))
-    return EstimatorSpec(*new_fields)
-
-
-def _get_export_outputs(export_outputs, predictions):
-  """Validate export_outputs or create default export_outputs.
-
-  Args:
-    export_outputs: Describes the output signatures to be exported to
-      `SavedModel` and used during serving. Should be a dict or None.
-    predictions:  Predictions `Tensor` or dict of `Tensor`.
-
-  Returns:
-    Valid export_outputs dict
-
-  Raises:
-    TypeError: if export_outputs is not a dict or its values are not
-      ExportOutput instances.
-  """
-  if export_outputs is None:
-    default_output = export_output_lib.PredictOutput(predictions)
-    export_outputs = {
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: default_output}
-
-  if not isinstance(export_outputs, dict):
-    raise TypeError('export_outputs must be dict, given: {}'.format(
-        export_outputs))
-  for v in six.itervalues(export_outputs):
-    if not isinstance(v, export_output_lib.ExportOutput):
-      raise TypeError(
-          'Values in export_outputs must be ExportOutput objects. '
-          'Given: {}'.format(export_outputs))
-
-  _maybe_add_default_serving_output(export_outputs)
-
-  return export_outputs
-
-
-def _maybe_add_default_serving_output(export_outputs):
-  """Add a default serving output to the export_outputs if not present.
-
-  Args:
-    export_outputs: Describes the output signatures to be exported to
-      `SavedModel` and used during serving. Should be a dict.
-
-  Returns:
-    export_outputs dict with default serving signature added if necessary
-
-  Raises:
-    ValueError: if multiple export_outputs were provided without a default
-      serving key.
-  """
-  if len(export_outputs) == 1:
-    (key, value), = export_outputs.items()
-    if key != signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-      export_outputs[
-          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value
-  if len(export_outputs) > 1:
-    if (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-        not in export_outputs):
-      raise ValueError(
-          'Multiple export_outputs were provided, but none of them is '
-          'specified as the default.  Do this by naming one of them with '
-          'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.')
-
-  return export_outputs
-
-
-class _TPUEstimatorSpec(
-    collections.namedtuple('TPUEstimatorSpec', [
-        'mode', 'predictions', 'loss', 'train_op', 'eval_metrics',
-        'export_outputs', 'scaffold_fn', 'host_call', 'training_hooks',
-        'evaluation_hooks', 'prediction_hooks'
-    ])):
-  """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
-
-  This is a simplified implementation of `tf.contrib.tpu.EstimatorSpec`. See
-  tensorflow/contrib/tpu/python/tpu/tpu_estimator.py for more detailed
-  documentation.
-  """
-
-  def __new__(cls,
-              mode,
-              predictions=None,
-              loss=None,
-              train_op=None,
-              eval_metrics=None,
-              export_outputs=None,
-              scaffold_fn=None,
-              host_call=None,
-              training_hooks=None,
-              evaluation_hooks=None,
-              prediction_hooks=None):
-    """Creates a `_TPUEstimatorSpec` instance."""
-    return super(_TPUEstimatorSpec, cls).__new__(
-        cls,
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metrics=eval_metrics,
-        export_outputs=export_outputs,
-        scaffold_fn=scaffold_fn,
-        host_call=host_call,
-        training_hooks=training_hooks,
-        evaluation_hooks=evaluation_hooks,
-        prediction_hooks=prediction_hooks)
-
-  def as_estimator_spec(self):
-    """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
-    if not self.eval_metrics:
-      eval_metric_ops = None
-    else:
-      metric_fn, tensors = self.eval_metrics
-      eval_metric_ops = metric_fn(**tensors)
-    return EstimatorSpec(
-        mode=self.mode,
-        predictions=self.predictions,
-        loss=self.loss,
-        train_op=self.train_op,
-        eval_metric_ops=eval_metric_ops,
-        export_outputs=self.export_outputs,
-        training_hooks=self.training_hooks,
-        evaluation_hooks=self.evaluation_hooks,
-        prediction_hooks=self.prediction_hooks)
-
-
-def _check_is_tensor_or_operation(x, name):
-  if not (isinstance(x, ops.Operation) or ops.is_dense_tensor_like(x)):
-    raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x))
-
-
-def _check_is_tensor(x, tensor_name):
-  """Returns `x` if it is a `Tensor`, raises TypeError otherwise."""
-  if not ops.is_dense_tensor_like(x):
-    raise TypeError('{} must be Tensor, given: {}'.format(tensor_name, x))
-  return x
-
-
-def export_outputs_for_mode(
-    mode, serving_export_outputs=None, predictions=None, loss=None,
-    metrics=None):
-  """Util function for constructing a `ExportOutput` dict given a mode.
-
-  The returned dict can be directly passed to `build_all_signature_defs` helper
-  function as the `export_outputs` argument, used for generating a SignatureDef
-  map.
-
-  Args:
-    mode: A `ModeKeys` specifying the mode.
-    serving_export_outputs: Describes the output signatures to be exported to
-      `SavedModel` and used during serving. Should be a dict or None.
-    predictions: A dict of Tensors or single Tensor representing model
-        predictions. This argument is only used if serving_export_outputs is not
-        set.
-    loss: A dict of Tensors or single Tensor representing calculated loss.
-    metrics: A dict of (metric_value, update_op) tuples, or a single tuple.
-      metric_value must be a Tensor, and update_op must be a Tensor or Op
-
-  Returns:
-    Dictionary mapping the a key to an `tf.estimator.export.ExportOutput` object
-    The key is the expected SignatureDef key for the mode.
-
-  Raises:
-    ValueError: if an appropriate ExportOutput cannot be found for the mode.
-  """
-  # TODO(b/113185250): move all model export helper functions into an util file.
-  if mode == ModeKeys.PREDICT:
-    return _get_export_outputs(serving_export_outputs, predictions)
-  elif mode == ModeKeys.TRAIN:
-    return {mode: export_output_lib.TrainOutput(
-        loss=loss, predictions=predictions, metrics=metrics)}
-  elif mode == ModeKeys.EVAL:
-    return {mode: export_output_lib.EvalOutput(
-        loss=loss, predictions=predictions, metrics=metrics)}
-  else:
-    raise ValueError(
-        'Export output type not found for mode: {}'.format(mode))
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.model_fn import *
diff --git a/tensorflow/python/estimator/model_fn_test.py b/tensorflow/python/estimator/model_fn_test.py
deleted file mode 100644
index 8a3a9f3f51..0000000000
--- a/tensorflow/python/estimator/model_fn_test.py
+++ /dev/null
@@ -1,661 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for model_fn.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.estimator import model_fn
-from tensorflow.python.estimator.export import export_output
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras import metrics
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import session_run_hook
-
-
-class _FakeHook(session_run_hook.SessionRunHook):
-  """Fake implementation of `SessionRunHook`."""
-
-
-class _InvalidHook(object):
-  """Invalid hook (not a subclass of `SessionRunHook`)."""
-
-
-class _InvalidScaffold(object):
-  """Invalid scaffold (not a subclass of `Scaffold`)."""
-
-
-class EstimatorSpecTrainTest(test.TestCase):
-  """Tests EstimatorSpec in train mode."""
-
-  def testRequiredArgumentsSet(self):
-    """Tests that no errors are raised when all required arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.TRAIN,
-          loss=constant_op.constant(1.),
-          train_op=control_flow_ops.no_op())
-
-  def testAllArgumentsSet(self):
-    """Tests that no errors are raised when all arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      predictions = {'loss': loss}
-      classes = constant_op.constant('hello')
-      metric_obj = metrics.Mean()
-      metric_obj.update_state(loss)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.TRAIN,
-          predictions=predictions,
-          loss=loss,
-          train_op=control_flow_ops.no_op(),
-          eval_metric_ops={
-              'loss': (control_flow_ops.no_op(), loss),
-              'mean': metric_obj,
-          },
-          export_outputs={
-              'head_name': export_output.ClassificationOutput(classes=classes)
-          },
-          training_chief_hooks=[_FakeHook()],
-          training_hooks=[_FakeHook()],
-          scaffold=monitored_session.Scaffold(),
-          evaluation_hooks=[_FakeHook()],
-          prediction_hooks=[_FakeHook()])
-
-  def testLossNumber(self):
-    """Tests that error is raised when loss is a number (not Tensor)."""
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=1.,
-            train_op=control_flow_ops.no_op())
-
-  def testLoss1DTensor(self):
-    """Tests that no errors are raised when loss is 1D tensor."""
-    with ops.Graph().as_default(), self.cached_session():
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.TRAIN,
-          loss=constant_op.constant([1.]),
-          train_op=control_flow_ops.no_op())
-
-  def testLossMissing(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'Missing loss'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN, train_op=control_flow_ops.no_op())
-
-  def testLossNotScalar(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant([1., 2.]),
-            train_op=control_flow_ops.no_op())
-
-  def testLossSparseTensor(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = sparse_tensor.SparseTensor(
-          indices=[[0]],
-          values=[0.],
-          dense_shape=[1])
-      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=loss,
-            train_op=control_flow_ops.no_op())
-
-  def testLossFromDifferentGraph(self):
-    with ops.Graph().as_default():
-      loss = constant_op.constant(1.)
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=loss,
-            train_op=control_flow_ops.no_op())
-
-  def testTrainOpMissing(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'Missing train_op'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN, loss=constant_op.constant(1.))
-
-  def testTrainOpNotOperationAndTensor(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(TypeError,
-                                   'train_op must be Operation or Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant(1.),
-            train_op='Not an Operation or Tensor')
-
-  def testTrainOpFromDifferentGraph(self):
-    with ops.Graph().as_default():
-      train_op = control_flow_ops.no_op()
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant(1.),
-            train_op=train_op)
-
-  def testTrainingChiefHookInvalid(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, 'All hooks must be SessionRunHook instances'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant(1.),
-            train_op=control_flow_ops.no_op(),
-            training_chief_hooks=[_InvalidHook()])
-
-  def testTrainingHookInvalid(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, 'All hooks must be SessionRunHook instances'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant(1.),
-            train_op=control_flow_ops.no_op(),
-            training_hooks=[_InvalidHook()])
-
-  def testScaffoldInvalid(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, r'scaffold must be tf\.train\.Scaffold'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.TRAIN,
-            loss=constant_op.constant(1.),
-            train_op=control_flow_ops.no_op(),
-            scaffold=_InvalidScaffold())
-
-  def testReturnDefaultScaffold(self):
-    with ops.Graph().as_default(), self.cached_session():
-      estimator_spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.TRAIN,
-          loss=constant_op.constant(1.),
-          train_op=control_flow_ops.no_op())
-      self.assertIsNotNone(estimator_spec.scaffold)
-
-
-class EstimatorSpecEvalTest(test.TestCase):
-  """Tests EstimatorSpec in eval mode."""
-
-  def testRequiredArgumentsSet(self):
-    """Tests that no errors are raised when all required arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          predictions={'loss': loss},
-          loss=loss)
-
-  def testAllArgumentsSet(self):
-    """Tests that no errors are raised when all arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      predictions = {'loss': loss}
-      classes = constant_op.constant('hello')
-      metric_obj = metrics.Mean()
-      metric_obj.update_state(loss)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          predictions=predictions,
-          loss=loss,
-          train_op=control_flow_ops.no_op(),
-          eval_metric_ops={
-              'loss': (control_flow_ops.no_op(), loss),
-              'mean': metric_obj,
-          },
-          export_outputs={
-              'head_name': export_output.ClassificationOutput(classes=classes)
-          },
-          training_chief_hooks=[_FakeHook()],
-          training_hooks=[_FakeHook()],
-          scaffold=monitored_session.Scaffold(),
-          evaluation_hooks=[_FakeHook()])
-
-  def testEvaluationHookInvalid(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, 'All hooks must be SessionRunHook instances'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            loss=constant_op.constant(1.),
-            evaluation_hooks=[_InvalidHook()])
-
-  def testTupleMetric(self):
-    """Tests that no errors are raised when a metric is tuple-valued."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          loss=loss,
-          eval_metric_ops={
-              'some_metric': ((loss, loss, (constant_op.constant(2), loss)),
-                              control_flow_ops.no_op())})
-
-  def testLoss1DTensor(self):
-    """Tests that no errors are raised when loss is 1D tensor."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant([1.])
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          predictions={'loss': loss},
-          loss=loss)
-
-  def testLossNumber(self):
-    """Tests that error is raised when loss is a number (not Tensor)."""
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': constant_op.constant(1.)},
-            loss=1.)
-
-  def testLossMissing(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'Missing loss'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': constant_op.constant(1.)})
-
-  def testLossNotScalar(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant([1., 2.])
-      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss)
-
-  def testLossSparseTensor(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = sparse_tensor.SparseTensor(
-          indices=[[0]],
-          values=[0.],
-          dense_shape=[1])
-      with self.assertRaisesRegexp(
-          TypeError, 'loss must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'prediction': constant_op.constant(1.)},
-            loss=loss)
-
-  def testLossFromDifferentGraph(self):
-    with ops.Graph().as_default():
-      loss = constant_op.constant(1.)
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'prediction': constant_op.constant(1.)},
-            loss=loss)
-
-  def testReplaceRaisesConstructorChecks(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
-      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
-        spec._replace(loss=constant_op.constant([1., 2.]))
-
-  def testReplaceDoesReplace(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
-      new_spec = spec._replace(predictions={'m': loss})
-      self.assertEqual(['m'], list(new_spec.predictions.keys()))
-
-  def testReplaceNotAllowModeChange(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
-      spec._replace(mode=model_fn.ModeKeys.EVAL)
-      with self.assertRaisesRegexp(ValueError,
-                                   'mode of EstimatorSpec cannot be changed'):
-        spec._replace(mode=model_fn.ModeKeys.TRAIN)
-
-  def testPredictionsMissingIsOkay(self):
-    with ops.Graph().as_default(), self.cached_session():
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL, loss=constant_op.constant(1.))
-
-  def testPredictionsTensor(self):
-    """Tests that no error is raised when predictions is Tensor (not dict)."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.EVAL,
-          predictions=loss,
-          loss=loss)
-
-  def testPredictionsNumber(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, r'predictions\[number\] must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'number': 1.},
-            loss=constant_op.constant(1.))
-
-  def testPredictionsSparseTensor(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {
-          'sparse': sparse_tensor.SparseTensor(
-              indices=[[0]],
-              values=[0.],
-              dense_shape=[1])}
-      with self.assertRaisesRegexp(
-          TypeError, r'predictions\[sparse\] must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=constant_op.constant(1.))
-
-  def testPredictionsFromDifferentGraph(self):
-    with ops.Graph().as_default():
-      predictions = {'loss': constant_op.constant(1.)}
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions=predictions,
-            loss=constant_op.constant(1.))
-
-  def testEvalMetricOpsNoDict(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(
-          TypeError, 'eval_metric_ops must be a dict'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops=loss)
-
-  def testEvalMetricOpsNoTuple(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(
-          TypeError,
-          (r'Values of eval_metric_ops must be \(metric_value, update_op\) '
-           'tuples')):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops={'loss': loss})
-
-  def testEvalMetricOpsNoTensorOrOperation(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(TypeError, 'must be Operation or Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops={'loss': ('NonTensor', loss)})
-
-  def testEvalMetricNestedNoTensorOrOperation(self):
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(TypeError, 'must be Operation or Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops={'loss': ((('NonTensor',),),
-                                      control_flow_ops.no_op())})
-
-  def testEvalMetricOpsFromDifferentGraphWithMetricTuple(self):
-    with ops.Graph().as_default():
-      eval_metric_ops = {
-          'loss': (control_flow_ops.no_op(), constant_op.constant(1.))}
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops=eval_metric_ops)
-
-  def testEvalMetricOpsFromDifferentGraphWithMetricObject(self):
-    with ops.Graph().as_default():
-      metric_obj = metrics.Mean()
-      metric_obj.update_state(constant_op.constant(1.))
-      eval_metric_ops = {'metric': metric_obj}
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(
-          ValueError, 'must be from the default graph'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops=eval_metric_ops)
-
-  def testEvalMetricOpsWithoutUpdates(self):
-    with ops.Graph().as_default():
-      eval_metric_ops = {'mean': metrics.Mean()}
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      with self.assertRaisesRegexp(ValueError, 'Please call update_state(...)'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.EVAL,
-            predictions={'loss': loss},
-            loss=loss,
-            eval_metric_ops=eval_metric_ops)
-
-
-class EstimatorSpecInferTest(test.TestCase):
-  """Tests EstimatorSpec in infer mode."""
-
-  def testRequiredArgumentsSet(self):
-    """Tests that no errors are raised when all required arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.PREDICT,
-          predictions={'loss': constant_op.constant(1.)})
-
-  def testAllArgumentsSet(self):
-    """Tests that no errors are raised when all arguments are set."""
-    with ops.Graph().as_default(), self.cached_session():
-      loss = constant_op.constant(1.)
-      predictions = {'loss': loss}
-      classes = constant_op.constant('hello')
-      metric_obj = metrics.Mean()
-      metric_obj.update_state(loss)
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.PREDICT,
-          predictions=predictions,
-          loss=loss,
-          train_op=control_flow_ops.no_op(),
-          eval_metric_ops={
-              'loss': (control_flow_ops.no_op(), loss),
-              'mean': metric_obj,
-          },
-          export_outputs={
-              'head_name': export_output.ClassificationOutput(classes=classes)
-          },
-          training_chief_hooks=[_FakeHook()],
-          training_hooks=[_FakeHook()],
-          scaffold=monitored_session.Scaffold(),
-          evaluation_hooks=[_FakeHook()],
-          prediction_hooks=[_FakeHook()])
-
-  def testPredictionHookInvalid(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, 'All hooks must be SessionRunHook instances'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=constant_op.constant(1.),
-            prediction_hooks=[_InvalidHook()])
-
-  def testPredictionsMissing(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(ValueError, 'Missing predictions'):
-        model_fn.EstimatorSpec(mode=model_fn.ModeKeys.PREDICT)
-
-  def testPredictionsTensor(self):
-    """Tests that no error is raised when predictions is Tensor (not dict)."""
-    with ops.Graph().as_default(), self.cached_session():
-      model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.PREDICT, predictions=constant_op.constant(1.))
-
-  def testPredictionsNumber(self):
-    with ops.Graph().as_default(), self.cached_session():
-      with self.assertRaisesRegexp(
-          TypeError, r'predictions\[number\] must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT, predictions={'number': 1.})
-
-  def testPredictionsSparseTensor(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {
-          'sparse': sparse_tensor.SparseTensor(
-              indices=[[0]],
-              values=[0.],
-              dense_shape=[1])}
-      with self.assertRaisesRegexp(
-          TypeError, r'predictions\[sparse\] must be Tensor'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT, predictions=predictions)
-
-  def testExportOutputsNoDict(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.)}
-      classes = constant_op.constant('hello')
-      with self.assertRaisesRegexp(
-          TypeError, 'export_outputs must be dict'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs=export_output.ClassificationOutput(classes=classes))
-
-  def testExportOutputsValueNotExportOutput(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.)}
-      with self.assertRaisesRegexp(
-          TypeError,
-          r"Values in export_outputs must be ExportOutput objects. "
-          r"Given: {'head_name': {'loss': <tf.Tensor 'Const:0' shape=\(\) "
-          r"dtype=float32>}}"):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs={'head_name': predictions})
-
-  def testExportOutputsSingleheadMissingDefault(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.)}
-      output_1 = constant_op.constant([1.])
-      regression_output = export_output.RegressionOutput(value=output_1)
-      export_outputs = {
-          'head-1': regression_output,
-          }
-      estimator_spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.PREDICT,
-          predictions=predictions,
-          export_outputs=export_outputs)
-      expected_export_outputs = {
-          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-          regression_output,
-          'head-1': regression_output,
-      }
-      self.assertEqual(expected_export_outputs, estimator_spec.export_outputs)
-
-  def testExportOutputsMultiheadWithDefault(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.)}
-      output_1 = constant_op.constant([1.])
-      output_2 = constant_op.constant(['2'])
-      output_3 = constant_op.constant(['3'])
-      export_outputs = {
-          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-          export_output.RegressionOutput(value=output_1),
-          'head-2': export_output.ClassificationOutput(classes=output_2),
-          'head-3': export_output.PredictOutput(outputs={
-              'some_output_3': output_3
-          })}
-      estimator_spec = model_fn.EstimatorSpec(
-          mode=model_fn.ModeKeys.PREDICT,
-          predictions=predictions,
-          export_outputs=export_outputs)
-      self.assertEqual(export_outputs, estimator_spec.export_outputs)
-
-  def testExportOutputsMultiheadMissingDefault(self):
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.)}
-      output_1 = constant_op.constant([1.])
-      output_2 = constant_op.constant(['2'])
-      output_3 = constant_op.constant(['3'])
-      export_outputs = {
-          'head-1': export_output.RegressionOutput(value=output_1),
-          'head-2': export_output.ClassificationOutput(classes=output_2),
-          'head-3': export_output.PredictOutput(outputs={
-              'some_output_3': output_3
-          })}
-      with self.assertRaisesRegexp(
-          ValueError,
-          'Multiple export_outputs were provided, but none of them is '
-          'specified as the default.  Do this by naming one of them with '
-          'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.'):
-        model_fn.EstimatorSpec(
-            mode=model_fn.ModeKeys.PREDICT,
-            predictions=predictions,
-            export_outputs=export_outputs)
-
-  def testDefaultExportOutputCreated(self):
-    """Ensure that a default PredictOutput is created for export."""
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = constant_op.constant(1.)
-      self._assertDefaultExportOutputForPredictions(predictions)
-
-  def testDefaultExportOutputCreatedDict(self):
-    """Ensure that a default PredictOutput is created for export for dicts."""
-    with ops.Graph().as_default(), self.cached_session():
-      predictions = {'loss': constant_op.constant(1.),
-                     'score': constant_op.constant(10.)}
-      self._assertDefaultExportOutputForPredictions(predictions)
-
-  def _assertDefaultExportOutputForPredictions(self, predictions):
-    spec = model_fn.EstimatorSpec(
-        mode=model_fn.ModeKeys.PREDICT, predictions=predictions)
-
-    expected = export_output.PredictOutput(predictions).outputs
-    serving_output = spec.export_outputs[
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-    self.assertEqual(serving_output.outputs, expected)
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index 3773810a04..bccad8fe00 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,904 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Environment configuration object for Estimators."""
+"""run_config python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import copy
-import json
-import os
-
-import six
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python.distribute import estimator_training as distribute_coordinator_training
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import server_lib
-from tensorflow.python.util import compat_internal
-from tensorflow.python.util import function_utils
-from tensorflow.python.util.tf_export import estimator_export
-
-
-_USE_DEFAULT = object()
-_VALID_DEVICE_FN_ARGS = set(['op'])
-
-# A list of the property names in RunConfig that the user is allowed to change.
-_DEFAULT_REPLACEABLE_LIST = [
-    'model_dir',
-    'tf_random_seed',
-    'save_summary_steps',
-    'save_checkpoints_steps',
-    'save_checkpoints_secs',
-    'session_config',
-    'keep_checkpoint_max',
-    'keep_checkpoint_every_n_hours',
-    'log_step_count_steps',
-    'train_distribute',
-    'device_fn',
-    'protocol',
-    'eval_distribute',
-    'experimental_distribute',
-]
-
-_SAVE_CKPT_ERR = (
-    '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.'
-)
-
-_TF_CONFIG_ENV = 'TF_CONFIG'
-_TASK_ENV_KEY = 'task'
-_TASK_TYPE_KEY = 'type'
-_TASK_ID_KEY = 'index'
-_CLUSTER_KEY = 'cluster'
-_SERVICE_KEY = 'service'
-_SESSION_MASTER_KEY = 'session_master'
-_EVAL_SESSION_MASTER_KEY = 'eval_session_master'
-_MODEL_DIR_KEY = 'model_dir'
-_LOCAL_MASTER = ''
-_GRPC_SCHEME = 'grpc://'
-
-
-def _get_session_master(cluster_spec, task_type, task_id, tf_config):
-  """Returns the appropriate address for TensorFlow master.
-
-  The order of precedence to deteremine the TF session master is as follows:
-  1. If `tf_session_master` is set in TF_CONFIG environment variable, takes it.
-  2. If the cluster has only one node, returns empty string ''.
-  3. Returns the grpc address according to the task type and id in the cluster.
-     This is between-graph replication.
-
-  Note: task_type and task_id must be validated. Typically, validated using
-  `_validate_task_type_and_task_id`.
-
-  Args:
-    cluster_spec: A `ClusterSpec` instance.
-    task_type: String. Task type for current node.
-    task_id: Int. Task id for current node.
-    tf_config: Dict. Python dict for the TF_CONFIG environment variable.
-
-  Raises:
-    RuntimeError: If `cluster_spec` is not set.
-
-  """
-  if _SESSION_MASTER_KEY in tf_config:
-    return tf_config[_SESSION_MASTER_KEY]
-
-  if not cluster_spec:
-    raise RuntimeError('Internal error: `_get_session_master` '
-                       'does not expect empty cluster_spec.')
-
-  jobs = cluster_spec.jobs
-
-  # If there is only one node in the cluster, do things locally by setting
-  # master to ''.  If a service or user sets TF_CONFIG with a single node, it's
-  # more performant to use a direct master rather than an RPC service.
-  if len(jobs) == 1 and len(cluster_spec.job_tasks(jobs[0])) == 1:
-    return _LOCAL_MASTER
-
-  # Lookup the master in cluster_spec using task_type and task_id,
-  # if possible.
-  addresses = cluster_spec.job_tasks(task_type)
-  return _GRPC_SCHEME + addresses[task_id]
-
-
-def _get_eval_session_master(task_type, tf_config):
-  """Returns the appropriate address for TensorFlow evaluation master."""
-  if task_type == TaskType.EVALUATOR:
-    return tf_config.get(_EVAL_SESSION_MASTER_KEY, _LOCAL_MASTER)
-
-  if _EVAL_SESSION_MASTER_KEY in tf_config:
-    raise ValueError('Key ({}) should not be set for task type other than {}. '
-                     'Task type: {}'.format(_EVAL_SESSION_MASTER_KEY,
-                                            TaskType.EVALUATOR, task_type))
-  return _LOCAL_MASTER
-
-
-def _count_ps(cluster_spec):
-  """Counts the number of parameter servers in cluster_spec."""
-  if not cluster_spec:
-    raise RuntimeError(
-        'Internal error: `_count_ps` does not expect empty cluster_spec.')
-
-  return len(cluster_spec.as_dict().get(TaskType.PS, []))
-
-
-def _count_worker(cluster_spec, chief_task_type):
-  """Counts the number of workers (including chief) in cluster_spec."""
-  if not cluster_spec:
-    raise RuntimeError(
-        'Internal error: `_count_worker` does not expect empty cluster_spec.')
-
-  return (len(cluster_spec.as_dict().get(TaskType.WORKER, [])) +
-          len(cluster_spec.as_dict().get(chief_task_type, [])))
-
-
-def _validate_service(service):
-  """Validates the service key."""
-  if service is not None and not isinstance(service, dict):
-    raise TypeError(
-        'If "service" is set in TF_CONFIG, it must be a dict. Given %s' %
-        type(service))
-  return service
-
-
-def _validate_task_type_and_task_id(cluster_spec, task_env, chief_task_type):
-  """Validates the task type and index in `task_env` according to cluster."""
-  if chief_task_type not in cluster_spec.jobs:
-    raise ValueError(
-        'If "cluster" is set in TF_CONFIG, it must have one "%s" node.' %
-        chief_task_type)
-  if len(cluster_spec.job_tasks(chief_task_type)) > 1:
-    raise ValueError(
-        'The "cluster" in TF_CONFIG must have only one "%s" node.' %
-        chief_task_type)
-
-  task_type = task_env.get(_TASK_TYPE_KEY, None)
-  task_id = task_env.get(_TASK_ID_KEY, None)
-
-  if not task_type:
-    raise ValueError(
-        'If "cluster" is set in TF_CONFIG, task type must be set.')
-  if task_id is None:
-    raise ValueError(
-        'If "cluster" is set in TF_CONFIG, task index must be set.')
-
-  task_id = int(task_id)
-
-  # Check the task id bounds. Upper bound is not necessary as
-  # - for evaluator, there is no upper bound.
-  # - for non-evaluator, task id is upper bounded by the number of jobs in
-  # cluster spec, which will be checked later (when retrieving the `master`)
-  if task_id < 0:
-    raise ValueError('Task index must be non-negative number.')
-
-  # Evaluator is not part of the training cluster.
-  if task_type == TaskType.EVALUATOR:
-    return task_type, task_id
-
-  if task_type not in cluster_spec.jobs:
-    raise ValueError(
-        '%s is not a valid task_type in the cluster_spec:\n'
-        '%s\n\n'
-        'Note that these values may be coming from the TF_CONFIG environment '
-        'variable.' % (task_type, cluster_spec))
-  addresses = cluster_spec.job_tasks(task_type)
-  if not 0 <= task_id < len(addresses):
-    raise ValueError(
-        '%d is not a valid task_id for task_type %s in the cluster_spec:\n'
-        '%s\n\n'
-        'Note that these values may be coming from the TF_CONFIG environment '
-        'variable.' % (task_id, task_type, cluster_spec))
-
-  return task_type, task_id
-
-
-def _get_global_id_in_cluster(
-    cluster_spec, task_type, task_id, chief_task_type):
-  """Returns the global id in cluster."""
-  # Note: This is implementation details, which user should not rely on.
-  # The first id is 0, which is always for the `chief` node. All other nodes,
-  # except `ps`, are ordered alphabetical based on task type (alphabetically)
-  # and task id (ascendingly). `ps` are ordered last.
-
-  # Sort task names in cluster
-  task_type_ordered_list = [chief_task_type]
-  task_type_ordered_list.extend([
-      t for t in sorted(cluster_spec.jobs)
-      if t != chief_task_type and t != TaskType.PS
-  ])
-  if TaskType.PS in cluster_spec.jobs:
-    task_type_ordered_list.append(TaskType.PS)
-
-  next_global_id = 0
-  for t in task_type_ordered_list:
-    if t == task_type:
-      return next_global_id + task_id
-    next_global_id += len(cluster_spec.job_tasks(t))
-
-  # This should never happen.
-  raise RuntimeError('Internal Error: `task_type` ({}) is not in '
-                     'cluster_spec ({}).'.format(task_type, cluster_spec))
-
-
-def _validate_save_ckpt_with_replaced_keys(new_copy, replaced_keys):
-  """Validates the save ckpt properties."""
-  # Ensure one (and only one) of save_steps and save_secs is not None.
-  # Also, if user sets one save ckpt property, say steps, the other one (secs)
-  # should be set as None to improve usability.
-
-  save_steps = new_copy.save_checkpoints_steps
-  save_secs = new_copy.save_checkpoints_secs
-
-  if ('save_checkpoints_steps' in replaced_keys and
-      'save_checkpoints_secs' in replaced_keys):
-    # If user sets both properties explicitly, we need to error out if both
-    # are set or neither of them are set.
-    if save_steps is not None and save_secs is not None:
-      raise ValueError(_SAVE_CKPT_ERR)
-  elif 'save_checkpoints_steps' in replaced_keys and save_steps is not None:
-    new_copy._save_checkpoints_secs = None  # pylint: disable=protected-access
-  elif 'save_checkpoints_secs' in replaced_keys and save_secs is not None:
-    new_copy._save_checkpoints_steps = None  # pylint: disable=protected-access
-
-
-def _validate_properties(run_config):
-  """Validates the properties."""
-  def _validate(property_name, cond, message):
-    property_value = getattr(run_config, property_name)
-    if property_value is not None and not cond(property_value):
-      raise ValueError(message)
-
-  _validate('model_dir', lambda dir: dir,
-            message='model_dir should be non-empty')
-
-  _validate('save_summary_steps', lambda steps: steps >= 0,
-            message='save_summary_steps should be >= 0')
-
-  _validate('save_checkpoints_steps', lambda steps: steps >= 0,
-            message='save_checkpoints_steps should be >= 0')
-  _validate('save_checkpoints_secs', lambda secs: secs >= 0,
-            message='save_checkpoints_secs should be >= 0')
-
-  _validate('session_config',
-            lambda sc: isinstance(sc, config_pb2.ConfigProto),
-            message='session_config must be instance of ConfigProto')
-
-  _validate('keep_checkpoint_max', lambda keep_max: keep_max >= 0,
-            message='keep_checkpoint_max should be >= 0')
-  _validate('keep_checkpoint_every_n_hours', lambda keep_hours: keep_hours > 0,
-            message='keep_checkpoint_every_n_hours should be > 0')
-  _validate('log_step_count_steps', lambda num_steps: num_steps > 0,
-            message='log_step_count_steps should be > 0')
-
-  _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types),
-            message='tf_random_seed must be integer.')
-
-  _validate('device_fn', lambda device_fn: six.callable(device_fn) and
-            set(function_utils.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
-            message='device_fn must be callable with exactly'
-                    ' one argument "op".')
-
-  _validate('protocol',
-            lambda protocol: protocol in (None, "grpc", "grpc+verbs"),
-            message='protocol should be grpc or grpc+verbs')
-
-
-def get_default_session_config():
-  """Returns tf.ConfigProto instance."""
-
-  rewrite_opts = rewriter_config_pb2.RewriterConfig(
-      meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
-  graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
-
-  return config_pb2.ConfigProto(allow_soft_placement=True,
-                                graph_options=graph_opts)
-
-
-class TaskType(object):
-  MASTER = 'master'
-  PS = 'ps'
-  WORKER = 'worker'
-  CHIEF = 'chief'
-  EVALUATOR = 'evaluator'
-
-
-@estimator_export('estimator.RunConfig')
-class RunConfig(object):
-  """This class specifies the configurations for an `Estimator` run."""
-
-  def __init__(self,
-               model_dir=None,
-               tf_random_seed=None,
-               save_summary_steps=100,
-               save_checkpoints_steps=_USE_DEFAULT,
-               save_checkpoints_secs=_USE_DEFAULT,
-               session_config=None,
-               keep_checkpoint_max=5,
-               keep_checkpoint_every_n_hours=10000,
-               log_step_count_steps=100,
-               train_distribute=None,
-               device_fn=None,
-               protocol=None,
-               eval_distribute=None,
-               experimental_distribute=None):
-    """Constructs a RunConfig.
-
-    All distributed training related properties `cluster_spec`, `is_chief`,
-    `master` , `num_worker_replicas`, `num_ps_replicas`, `task_id`, and
-    `task_type` are set based on the `TF_CONFIG` environment variable, if the
-    pertinent information is present. The `TF_CONFIG` environment variable is a
-    JSON object with attributes: `cluster` and `task`.
-
-    `cluster` is a JSON serialized version of `ClusterSpec`'s Python dict from
-    `server_lib.py`, mapping task types (usually one of the `TaskType` enums) to
-    a list of task addresses.
-
-    `task` has two attributes: `type` and `index`, where `type` can be any of
-    the task types in `cluster`. When `TF_CONFIG` contains said information,
-    the following properties are set on this class:
-
-    * `cluster_spec` is parsed from `TF_CONFIG['cluster']`. Defaults to {}. If
-      present, must have one and only one node in the `chief` attribute of
-      `cluster_spec`.
-    * `task_type` is set to `TF_CONFIG['task']['type']`. Must set if
-      `cluster_spec` is present; must be `worker` (the default value) if
-      `cluster_spec` is not set.
-    * `task_id` is set to `TF_CONFIG['task']['index']`. Must set if
-      `cluster_spec` is present; must be 0 (the default value) if
-      `cluster_spec` is not set.
-    * `master` is determined by looking up `task_type` and `task_id` in the
-      `cluster_spec`. Defaults to ''.
-    * `num_ps_replicas` is set by counting the number of nodes listed
-      in the `ps` attribute of `cluster_spec`. Defaults to 0.
-    * `num_worker_replicas` is set by counting the number of nodes listed
-      in the `worker` and `chief` attributes of `cluster_spec`. Defaults to 1.
-    * `is_chief` is determined based on `task_type` and `cluster`.
-
-    There is a special node with `task_type` as `evaluator`, which is not part
-    of the (training) `cluster_spec`. It handles the distributed evaluation job.
-
-    Example of non-chief node:
-    ```
-      cluster = {'chief': ['host0:2222'],
-                 'ps': ['host1:2222', 'host2:2222'],
-                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
-      os.environ['TF_CONFIG'] = json.dumps(
-          {'cluster': cluster,
-           'task': {'type': 'worker', 'index': 1}})
-      config = RunConfig()
-      assert config.master == 'host4:2222'
-      assert config.task_id == 1
-      assert config.num_ps_replicas == 2
-      assert config.num_worker_replicas == 4
-      assert config.cluster_spec == server_lib.ClusterSpec(cluster)
-      assert config.task_type == 'worker'
-      assert not config.is_chief
-    ```
-
-    Example of chief node:
-    ```
-      cluster = {'chief': ['host0:2222'],
-                 'ps': ['host1:2222', 'host2:2222'],
-                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
-      os.environ['TF_CONFIG'] = json.dumps(
-          {'cluster': cluster,
-           'task': {'type': 'chief', 'index': 0}})
-      config = RunConfig()
-      assert config.master == 'host0:2222'
-      assert config.task_id == 0
-      assert config.num_ps_replicas == 2
-      assert config.num_worker_replicas == 4
-      assert config.cluster_spec == server_lib.ClusterSpec(cluster)
-      assert config.task_type == 'chief'
-      assert config.is_chief
-    ```
-
-    Example of evaluator node (evaluator is not part of training cluster):
-    ```
-      cluster = {'chief': ['host0:2222'],
-                 'ps': ['host1:2222', 'host2:2222'],
-                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
-      os.environ['TF_CONFIG'] = json.dumps(
-          {'cluster': cluster,
-           'task': {'type': 'evaluator', 'index': 0}})
-      config = RunConfig()
-      assert config.master == ''
-      assert config.evaluator_master == ''
-      assert config.task_id == 0
-      assert config.num_ps_replicas == 0
-      assert config.num_worker_replicas == 0
-      assert config.cluster_spec == {}
-      assert config.task_type == 'evaluator'
-      assert not config.is_chief
-    ```
-
-    N.B.: If `save_checkpoints_steps` or `save_checkpoints_secs` is set,
-    `keep_checkpoint_max` might need to be adjusted accordingly, especially in
-    distributed training. For example, setting `save_checkpoints_secs` as 60
-    without adjusting `keep_checkpoint_max` (defaults to 5) leads to situation
-    that checkpoint would be garbage collected after 5 minutes. In distributed
-    training, the evaluation job starts asynchronously and might fail to load or
-    find the checkpoint due to race condition.
-
-    Args:
-      model_dir: directory where model parameters, graph, etc are saved. If
-        `PathLike` object, the path will be resolved. If `None`, will use a
-        default value set by the Estimator.
-      tf_random_seed: Random seed for TensorFlow initializers.
-        Setting this value allows consistency between reruns.
-      save_summary_steps: Save summaries every this many steps.
-      save_checkpoints_steps: Save checkpoints every this many steps. Can not be
-          specified with `save_checkpoints_secs`.
-      save_checkpoints_secs: Save checkpoints every this many seconds. Can not
-          be specified with `save_checkpoints_steps`. Defaults to 600 seconds if
-          both `save_checkpoints_steps` and `save_checkpoints_secs` are not set
-          in constructor.  If both `save_checkpoints_steps` and
-          `save_checkpoints_secs` are None, then checkpoints are disabled.
-      session_config: a ConfigProto used to set session parameters, or None.
-      keep_checkpoint_max: The maximum number of recent checkpoint files to
-        keep. As new files are created, older files are deleted. If None or 0,
-        all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent
-        checkpoint files are kept.)
-      keep_checkpoint_every_n_hours: Number of hours between each checkpoint
-        to be saved. The default value of 10,000 hours effectively disables
-        the feature.
-      log_step_count_steps: The frequency, in number of global steps, that the
-        global step/sec and the loss will be logged during training.
-      train_distribute: An optional instance of
-        `tf.contrib.distribute.DistributionStrategy`. If specified,
-        then Estimator will distribute the user's model during training,
-        according to the policy specified by that strategy. Setting
-        `experimental_distribute.train_distribute` is preferred.
-      device_fn: A callable invoked for every `Operation` that takes the
-        `Operation` and returns the device string. If `None`, defaults to
-        the device function returned by `tf.train.replica_device_setter`
-        with round-robin strategy.
-      protocol: An optional argument which specifies the protocol used when
-        starting server. None means default to grpc.
-      eval_distribute: An optional instance of
-        `tf.contrib.distribute.DistributionStrategy`. If specified,
-        then Estimator will distribute the user's model during evaluation,
-        according to the policy specified by that strategy. Setting
-        `experimental_distribute.eval_distribute` is preferred.
-      experimental_distribute: an optional
-        `tf.contrib.distribute.DistributeConfig` object specifying
-        DistributionStrategy-related configuration. The `train_distribute` and
-        `eval_distribute` can be passed as parameters to `RunConfig` or set in
-        `experimental_distribute` but not both.
-
-    Raises:
-      ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs`
-      are set.
-    """
-    if (save_checkpoints_steps == _USE_DEFAULT and
-        save_checkpoints_secs == _USE_DEFAULT):
-      save_checkpoints_steps = None
-      save_checkpoints_secs = 600
-    elif save_checkpoints_secs == _USE_DEFAULT:
-      save_checkpoints_secs = None
-    elif save_checkpoints_steps == _USE_DEFAULT:
-      save_checkpoints_steps = None
-    elif (save_checkpoints_steps is not None and
-          save_checkpoints_secs is not None):
-      raise ValueError(_SAVE_CKPT_ERR)
-
-    tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
-    if tf_config:
-      logging.info('TF_CONFIG environment variable: %s', tf_config)
-
-    model_dir = _get_model_dir(tf_config,
-                               compat_internal.path_to_str(model_dir))
-
-    RunConfig._replace(
-        self,
-        allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
-        model_dir=model_dir,
-        tf_random_seed=tf_random_seed,
-        save_summary_steps=save_summary_steps,
-        save_checkpoints_steps=save_checkpoints_steps,
-        save_checkpoints_secs=save_checkpoints_secs,
-        session_config=session_config,
-        keep_checkpoint_max=keep_checkpoint_max,
-        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
-        log_step_count_steps=log_step_count_steps,
-        train_distribute=train_distribute,
-        device_fn=device_fn,
-        protocol=protocol,
-        eval_distribute=eval_distribute,
-        experimental_distribute=experimental_distribute)
-
-    # TODO(frankchn,priyag): Eventually use distributed coordinator for TPUs.
-    if ((train_distribute and
-         train_distribute.__class__.__name__ != 'TPUStrategy') or
-        (eval_distribute and
-         eval_distribute.__class__.__name__ != 'TPUStrategy') or
-        experimental_distribute):
-      logging.info('Initializing RunConfig with distribution strategies.')
-      distribute_coordinator_training.init_run_config(self, tf_config)
-    else:
-      self._init_distributed_setting_from_environment_var(tf_config)
-      self._maybe_overwrite_session_config_for_distributed_training()
-
-  def _maybe_overwrite_session_config_for_distributed_training(self):
-    """Overwrites the session_config for distributed training.
-
-    The default overwrite is optimized for between-graph training. Subclass
-    should override this method if necessary.
-    """
-    # Get session_config only for between-graph distributed mode (cluster_spec
-    # is present).
-    if not self._session_config and self._cluster_spec:
-      RunConfig._replace(
-          self,
-          allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
-          session_config=self._get_default_session_config_distributed())
-
-  def _get_default_session_config_distributed(self):
-    """Returns None or tf.ConfigProto instance with default device_filters set.
-
-    Device filters are set such that chief/master and worker communicates with
-    only ps. session_config=None for evaluators or any other TaskType.
-    """
-
-    rewrite_opts = rewriter_config_pb2.RewriterConfig(
-        meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
-    graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
-
-    device_filters = None
-    if self._task_type == TaskType.MASTER:
-      device_filters = ['/job:ps', '/job:master']
-    elif self._task_type == TaskType.CHIEF:
-      device_filters = ['/job:ps', '/job:chief']
-    elif self._task_type == TaskType.WORKER:
-      device_filters = ['/job:ps', '/job:worker/task:%d' % self._task_id]
-    elif self._task_type == TaskType.PS:
-      device_filters = ['/job:ps', '/job:worker', '/job:master']
-    else:
-      # If the task_type is `EVALUATOR` or something other than the ones in
-      # TaskType then don't set any device filters.
-      return None
-
-    return config_pb2.ConfigProto(
-        allow_soft_placement=True,
-        graph_options=graph_opts,
-        device_filters=device_filters)
-
-  def _init_distributed_setting_from_environment_var(self, tf_config):
-    """Initialize distributed properties based on `tf_config`."""
-
-    self._service = _validate_service(tf_config.get(_SERVICE_KEY))
-    self._cluster_spec = server_lib.ClusterSpec(tf_config.get(_CLUSTER_KEY, {}))
-    task_env = tf_config.get(_TASK_ENV_KEY, {})
-
-    if self._cluster_spec and TaskType.MASTER in self._cluster_spec.jobs:
-      return self._init_distributed_setting_from_environment_var_with_master(
-          tf_config)
-
-    if self._cluster_spec:
-      # Distributed mode.
-      self._task_type, self._task_id = _validate_task_type_and_task_id(
-          self._cluster_spec, task_env, TaskType.CHIEF)
-
-      self._evaluation_master = _get_eval_session_master(
-          self._task_type, tf_config)
-
-      if self._task_type != TaskType.EVALUATOR:
-        self._master = _get_session_master(self._cluster_spec, self._task_type,
-                                           self._task_id, tf_config)
-        self._num_ps_replicas = _count_ps(self._cluster_spec)
-        self._num_worker_replicas = _count_worker(
-            self._cluster_spec, chief_task_type=TaskType.CHIEF)
-        self._global_id_in_cluster = _get_global_id_in_cluster(
-            self._cluster_spec,
-            self._task_type,
-            self._task_id,
-            chief_task_type=TaskType.CHIEF)
-      else:
-        # Evaluator is not part of the training cluster.
-        self._cluster_spec = server_lib.ClusterSpec({})
-        self._master = _LOCAL_MASTER
-        self._num_ps_replicas = 0
-        self._num_worker_replicas = 0
-        self._global_id_in_cluster = None  # undefined
-
-      self._is_chief = self._task_type == TaskType.CHIEF
-    else:
-      # Local mode.
-      self._task_type = task_env.get(_TASK_TYPE_KEY, TaskType.WORKER)
-      self._task_id = int(task_env.get(_TASK_ID_KEY, 0))
-      self._global_id_in_cluster = 0
-
-      if self._task_type != TaskType.WORKER:
-        raise ValueError(
-            'If "cluster" is not set in TF_CONFIG, task type must be WORKER.')
-      if self._task_id != 0:
-        raise ValueError(
-            'If "cluster" is not set in TF_CONFIG, task index must be 0.')
-
-      self._master = tf_config.get(_SESSION_MASTER_KEY, _LOCAL_MASTER)
-      self._evaluation_master = tf_config.get(_EVAL_SESSION_MASTER_KEY,
-                                              _LOCAL_MASTER)
-      self._is_chief = True
-      self._num_ps_replicas = 0
-      self._num_worker_replicas = 1
-
-  def _init_distributed_setting_from_environment_var_with_master(self,
-                                                                 tf_config):
-    """Initialize distributed properties for legacy cluster with `master`."""
-    # There is no tech reason, why user cannot have chief and master in the same
-    # cluster, but it is super confusing (which is really the chief?). So, block
-    # this case.
-    if TaskType.CHIEF in self._cluster_spec.jobs:
-      raise ValueError('If `master` node exists in `cluster`, job '
-                       '`chief` is not supported.')
-
-    task_env = tf_config.get(_TASK_ENV_KEY, {})
-
-    self._task_type, self._task_id = _validate_task_type_and_task_id(
-        self._cluster_spec, task_env, TaskType.MASTER)
-
-    if self._task_type == TaskType.EVALUATOR:
-      raise ValueError('If `master` node exists in `cluster`, task_type '
-                       '`evaluator` is not supported.')
-
-    self._global_id_in_cluster = _get_global_id_in_cluster(
-        self._cluster_spec,
-        self._task_type,
-        self._task_id,
-        chief_task_type=TaskType.MASTER)
-
-    self._master = _get_session_master(self._cluster_spec, self._task_type,
-                                       self._task_id, tf_config)
-    self._evaluation_master = _get_eval_session_master(self._task_type,
-                                                       tf_config)
-    self._num_ps_replicas = _count_ps(self._cluster_spec)
-    self._num_worker_replicas = _count_worker(
-        self._cluster_spec, chief_task_type=TaskType.MASTER)
-
-    self._is_chief = self._task_type == TaskType.MASTER
-
-  @property
-  def cluster_spec(self):
-    return self._cluster_spec
-
-  @property
-  def device_fn(self):
-    """Returns the device_fn.
-
-    If device_fn is not `None`, it overrides the default
-    device function used in `Estimator`.
-    Otherwise the default one is used.
-    """
-    return self._device_fn
-
-  @property
-  def evaluation_master(self):
-    return self._evaluation_master
-
-  @property
-  def is_chief(self):
-    return self._is_chief
-
-  @property
-  def master(self):
-    return self._master
-
-  @property
-  def num_ps_replicas(self):
-    return self._num_ps_replicas
-
-  @property
-  def num_worker_replicas(self):
-    return self._num_worker_replicas
-
-  @property
-  def task_id(self):
-    return self._task_id
-
-  @property
-  def global_id_in_cluster(self):
-    """The global id in the training cluster.
-
-    All global ids in the training cluster are assigned from an increasing
-    sequence of consecutive integers. The first id is 0.
-
-    Note: Task id (the property field `task_id`) is tracking the index of the
-    node among all nodes with the SAME task type. For example, given the cluster
-    definition as follows:
-
-    ```
-      cluster = {'chief': ['host0:2222'],
-                 'ps': ['host1:2222', 'host2:2222'],
-                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
-    ```
-
-    Nodes with task type `worker` can have id 0, 1, 2.  Nodes with task type
-    `ps` can have id, 0, 1. So, `task_id` is not unique, but the pair
-    (`task_type`, `task_id`) can uniquely determine a node in the cluster.
-
-    Global id, i.e., this field, is tracking the index of the node among ALL
-    nodes in the cluster. It is uniquely assigned.  For example, for the cluster
-    spec given above, the global ids are assigned as:
-    ```
-      task_type  | task_id  |  global_id
-      --------------------------------
-      chief      | 0        |  0
-      worker     | 0        |  1
-      worker     | 1        |  2
-      worker     | 2        |  3
-      ps         | 0        |  4
-      ps         | 1        |  5
-    ```
-
-    Returns:
-      An integer id.
-    """
-    return self._global_id_in_cluster
-
-  @property
-  def task_type(self):
-    return self._task_type
-
-  @property
-  def tf_random_seed(self):
-    return self._tf_random_seed
-
-  @property
-  def save_summary_steps(self):
-    return self._save_summary_steps
-
-  @property
-  def save_checkpoints_secs(self):
-    return self._save_checkpoints_secs
-
-  @property
-  def session_config(self):
-    return self._session_config
-
-  @property
-  def save_checkpoints_steps(self):
-    return self._save_checkpoints_steps
-
-  @property
-  def keep_checkpoint_max(self):
-    return self._keep_checkpoint_max
-
-  @property
-  def keep_checkpoint_every_n_hours(self):
-    return self._keep_checkpoint_every_n_hours
-
-  @property
-  def log_step_count_steps(self):
-    return self._log_step_count_steps
-
-  @property
-  def model_dir(self):
-    return self._model_dir
-
-  @property
-  def service(self):
-    """Returns the platform defined (in TF_CONFIG) service dict."""
-    return self._service
-
-  @property
-  def train_distribute(self):
-    """Optional `tf.contrib.distribute.DistributionStrategy` for training.
-    """
-    return self._train_distribute
-
-  @property
-  def eval_distribute(self):
-    """Optional `tf.contrib.distribute.DistributionStrategy` for evaluation.
-    """
-    return self._eval_distribute
-
-  @property
-  def protocol(self):
-    """Returns the optional protocol value."""
-    return self._protocol
-
-  def replace(self, **kwargs):
-    """Returns a new instance of `RunConfig` replacing specified properties.
-
-    Only the properties in the following list are allowed to be replaced:
-
-      - `model_dir`,
-      - `tf_random_seed`,
-      - `save_summary_steps`,
-      - `save_checkpoints_steps`,
-      - `save_checkpoints_secs`,
-      - `session_config`,
-      - `keep_checkpoint_max`,
-      - `keep_checkpoint_every_n_hours`,
-      - `log_step_count_steps`,
-      - `train_distribute`,
-      - `device_fn`,
-      - `protocol`.
-      - `eval_distribute`,
-      - `experimental_distribute`,
-
-    In addition, either `save_checkpoints_steps` or `save_checkpoints_secs`
-    can be set (should not be both).
-
-    Args:
-      **kwargs: keyword named properties with new values.
-
-    Raises:
-      ValueError: If any property name in `kwargs` does not exist or is not
-        allowed to be replaced, or both `save_checkpoints_steps` and
-        `save_checkpoints_secs` are set.
-
-    Returns:
-      a new instance of `RunConfig`.
-    """
-    return RunConfig._replace(
-        copy.deepcopy(self),
-        allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
-        **kwargs)
-
-  @staticmethod
-  def _replace(config, allowed_properties_list=None, **kwargs):
-    """See `replace`.
-
-    N.B.: This implementation assumes that for key named "foo", the underlying
-    property the RunConfig holds is "_foo" (with one leading underscore).
-
-    Args:
-      config: The RunConfig to replace the values of.
-      allowed_properties_list: The property name list allowed to be replaced.
-      **kwargs: keyword named properties with new values.
-
-    Raises:
-      ValueError: If any property name in `kwargs` does not exist or is not
-        allowed to be replaced, or both `save_checkpoints_steps` and
-        `save_checkpoints_secs` are set.
-
-    Returns:
-      a new instance of `RunConfig`.
-    """
-
-    allowed_properties_list = allowed_properties_list or []
-
-    for key, new_value in six.iteritems(kwargs):
-      if key in allowed_properties_list:
-        setattr(config, '_' + key, new_value)
-        continue
-
-      raise ValueError(
-          'Replacing {} is not supported. Allowed properties are {}.'.format(
-              key, allowed_properties_list))
-
-    _validate_save_ckpt_with_replaced_keys(config, kwargs.keys())
-    _validate_properties(config)
-    return config
-
-
-def _get_model_dir(tf_config, model_dir):
-  """Returns `model_dir` based user provided `tf_config` or `model_dir`."""
-  # pylint: disable=g-explicit-bool-comparison
-
-  # Empty string is treated as False in Python condition check, which triggers
-  # some confusing error messages. For example, 'a or b' returns None if a is ''
-  # and b is None. `None` is allowed for model_dir but '' is not allowed. Here,
-  # explicitly check empty string to provide clear error message.
-  if model_dir == '':
-    raise ValueError('model_dir should be non-empty.')
-
-  model_dir_in_tf_config = tf_config.get('model_dir')
-  if model_dir_in_tf_config == '':
-    raise ValueError('model_dir in TF_CONFIG should be non-empty.')
-
-  if model_dir_in_tf_config:
-    if model_dir and model_dir_in_tf_config != model_dir:
-      raise ValueError(
-          '`model_dir` provided in RunConfig construct, if set, '
-          'must have the same value as the model_dir in TF_CONFIG. '
-          'model_dir: {}\nTF_CONFIG["model_dir"]: {}.\n'.format(
-              model_dir, model_dir_in_tf_config))
+from tensorflow_estimator.python.estimator import run_config
 
-    logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config)
+# Include attrs that start with single underscore.
+run_config.__all__ = [s for s in dir(run_config) if not s.startswith('__')]
 
-  return model_dir or model_dir_in_tf_config
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.run_config import *
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
deleted file mode 100644
index 06df7cb9dd..0000000000
--- a/tensorflow/python/estimator/run_config_test.py
+++ /dev/null
@@ -1,1235 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""RunConfig tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.platform import test
-
-_TEST_DIR = 'test_dir'
-_MASTER = 'master_'
-_NOT_SUPPORTED_REPLACE_PROPERTY_MSG = 'Replacing .*is not supported'
-_SAVE_CKPT_ERR = (
-    '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.'
-)
-_MODEL_DIR_ERR = 'model_dir should be non-empty'
-_MODEL_DIR_TF_CONFIG_ERR = 'model_dir in TF_CONFIG should be non-empty'
-_MODEL_DIR_MISMATCH_ERR = (
-    '`model_dir` provided in RunConfig construct, if set, '
-    'must have the same value as the model_dir in TF_CONFIG. ')
-_SAVE_SUMMARY_STEPS_ERR = 'save_summary_steps should be >= 0'
-_SAVE_CKPT_STEPS_ERR = 'save_checkpoints_steps should be >= 0'
-_SAVE_CKPT_SECS_ERR = 'save_checkpoints_secs should be >= 0'
-_SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto'
-_KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
-_KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
-_TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
-_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".'
-_ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
-_ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
-_INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
-    'Key.*eval.*master.*should not be set for task type other than')
-_MISSING_CHIEF_ERR = 'If "cluster" is set .* it must have one "chief" node'
-_MISSING_TASK_TYPE_ERR = 'If "cluster" is set .* task type must be set'
-_MISSING_TASK_ID_ERR = 'If "cluster" is set .* task index must be set'
-_INVALID_TASK_INDEX_ERR = 'is not a valid task_id'
-_NEGATIVE_TASK_INDEX_ERR = 'Task index must be non-negative number.'
-_INVALID_TASK_TYPE_ERR = 'is not a valid task_type'
-_INVALID_TASK_TYPE_FOR_LOCAL_ERR = (
-    'If "cluster" is not set in TF_CONFIG, task type must be WORKER.')
-_INVALID_TASK_INDEX_FOR_LOCAL_ERR = (
-    'If "cluster" is not set in TF_CONFIG, task index must be 0.')
-_INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR = (
-    'If `master` node exists in `cluster`, task_type `evaluator` is not '
-    'supported.')
-_INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR = (
-    'If `master` node exists in `cluster`, job `chief` is not supported.')
-_INVALID_SERVICE_TYPE_ERR = (
-    'If "service" is set in TF_CONFIG, it must be a dict. Given')
-
-
-def _create_run_config_with_cluster_spec(tf_config, **kwargs):
-  with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}):
-    return run_config_lib.RunConfig(**kwargs)
-
-
-class RunConfigTest(test.TestCase):
-
-  def test_default_property_values(self):
-    config = run_config_lib.RunConfig()
-    self.assertIsNone(config.model_dir)
-    self.assertIsNone(config.session_config)
-    self.assertIsNone(config.tf_random_seed)
-    self.assertEqual(100, config.save_summary_steps)
-    self.assertEqual(600, config.save_checkpoints_secs)
-    self.assertIsNone(config.save_checkpoints_steps)
-    self.assertEqual(5, config.keep_checkpoint_max)
-    self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
-    self.assertIsNone(config.service)
-    self.assertIsNone(config.device_fn)
-
-  def test_model_dir(self):
-    empty_config = run_config_lib.RunConfig()
-    self.assertIsNone(empty_config.model_dir)
-
-    new_config = empty_config.replace(model_dir=_TEST_DIR)
-    self.assertEqual(_TEST_DIR, new_config.model_dir)
-
-  def test_replace_with_allowed_properties(self):
-    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-    device_fn = lambda op: "/cpu:0"
-
-    config = run_config_lib.RunConfig().replace(
-        tf_random_seed=11,
-        save_summary_steps=12,
-        save_checkpoints_secs=14,
-        session_config=session_config,
-        keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17,
-        device_fn=device_fn)
-    self.assertEqual(11, config.tf_random_seed)
-    self.assertEqual(12, config.save_summary_steps)
-    self.assertEqual(14, config.save_checkpoints_secs)
-    self.assertEqual(session_config, config.session_config)
-    self.assertEqual(16, config.keep_checkpoint_max)
-    self.assertEqual(17, config.keep_checkpoint_every_n_hours)
-    self.assertEqual(device_fn, config.device_fn)
-
-  def test_replace_none_value(self):
-    config = run_config_lib.RunConfig().replace(
-        tf_random_seed=None,
-        model_dir=None,
-        save_summary_steps=None,
-        save_checkpoints_secs=None,
-        save_checkpoints_steps=None,
-        session_config=None,
-        keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None,
-        device_fn=None)
-    self.assertIsNone(config.tf_random_seed)
-    self.assertIsNone(config.model_dir)
-    self.assertIsNone(config.save_summary_steps)
-    self.assertIsNone(config.save_checkpoints_secs)
-    self.assertIsNone(config.save_checkpoints_steps)
-    self.assertIsNone(config.session_config)
-    self.assertIsNone(config.keep_checkpoint_max)
-    self.assertIsNone(config.keep_checkpoint_every_n_hours)
-    self.assertIsNone(config.device_fn)
-
-  def test_replace_with_disallowallowed_properties(self):
-    config = run_config_lib.RunConfig()
-    with self.assertRaises(ValueError):
-      # tf_random_seed is not allowed to be replaced.
-      config.replace(master='_master')
-    with self.assertRaises(ValueError):
-      config.replace(some_undefined_property=123)
-
-  def test_replace(self):
-    config = run_config_lib.RunConfig()
-
-    with self.assertRaisesRegexp(
-        ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG):
-      # master is not allowed to be replaced.
-      config.replace(master=_MASTER)
-
-    with self.assertRaisesRegexp(
-        ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG):
-      config.replace(some_undefined_property=_MASTER)
-
-  def test_replace_invalid_values(self):
-    config = run_config_lib.RunConfig()
-
-    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
-      config.replace(model_dir='')
-    with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR):
-      config.replace(save_summary_steps=-1)
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR):
-      config.replace(save_checkpoints_steps=-1)
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR):
-      config.replace(save_checkpoints_secs=-1)
-    with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR):
-      config.replace(session_config={})
-    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR):
-      config.replace(keep_checkpoint_max=-1)
-    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR):
-      config.replace(keep_checkpoint_every_n_hours=0)
-    with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
-      config.replace(tf_random_seed=1.0)
-    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
-      config.replace(device_fn=lambda x, y: 0)
-
-  def test_init_with_allowed_properties(self):
-    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
-    device_fn = lambda op: "/cpu:0"
-
-    config = run_config_lib.RunConfig(
-        tf_random_seed=11,
-        save_summary_steps=12,
-        save_checkpoints_secs=14,
-        session_config=session_config,
-        keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17,
-        device_fn=device_fn)
-    self.assertEqual(11, config.tf_random_seed)
-    self.assertEqual(12, config.save_summary_steps)
-    self.assertEqual(14, config.save_checkpoints_secs)
-    self.assertEqual(session_config, config.session_config)
-    self.assertEqual(16, config.keep_checkpoint_max)
-    self.assertEqual(17, config.keep_checkpoint_every_n_hours)
-    self.assertEqual(device_fn, config.device_fn)
-
-  def test_init_none_value(self):
-    config = run_config_lib.RunConfig(
-        tf_random_seed=None,
-        model_dir=None,
-        save_summary_steps=None,
-        save_checkpoints_secs=None,
-        save_checkpoints_steps=None,
-        session_config=None,
-        keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None,
-        device_fn=None)
-    self.assertIsNone(config.tf_random_seed)
-    self.assertIsNone(config.model_dir)
-    self.assertIsNone(config.save_summary_steps)
-    self.assertIsNone(config.save_checkpoints_secs)
-    self.assertIsNone(config.save_checkpoints_steps)
-    self.assertIsNone(config.session_config)
-    self.assertIsNone(config.keep_checkpoint_max)
-    self.assertIsNone(config.keep_checkpoint_every_n_hours)
-    self.assertIsNone(config.device_fn)
-
-  def test_init_invalid_values(self):
-    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
-      run_config_lib.RunConfig(model_dir='')
-    with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR):
-      run_config_lib.RunConfig(save_summary_steps=-1)
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR):
-      run_config_lib.RunConfig(save_checkpoints_steps=-1)
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR):
-      run_config_lib.RunConfig(save_checkpoints_secs=-1)
-    with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR):
-      run_config_lib.RunConfig(session_config={})
-    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR):
-      run_config_lib.RunConfig(keep_checkpoint_max=-1)
-    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR):
-      run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0)
-    with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
-      run_config_lib.RunConfig(tf_random_seed=1.0)
-    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
-      run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0")
-
-
-class RunConfigDistributedSettingTest(test.TestCase):
-
-  def _assert_distributed_properties(self, run_config,
-                                     expected_cluster_spec,
-                                     expected_task_type,
-                                     expected_task_id,
-                                     expected_master,
-                                     expected_evaluation_master,
-                                     expected_is_chief,
-                                     expected_num_worker_replicas,
-                                     expected_num_ps_replicas):
-    self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict())
-    self.assertEqual(expected_task_type, run_config.task_type)
-    self.assertEqual(expected_task_id, run_config.task_id)
-    self.assertEqual(expected_master, run_config.master)
-    self.assertEqual(expected_evaluation_master, run_config.evaluation_master)
-    self.assertEqual(expected_is_chief, run_config.is_chief)
-    self.assertEqual(expected_num_worker_replicas,
-                     run_config.num_worker_replicas)
-    self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas)
-
-  def test_default_values(self):
-    self._assert_distributed_properties(
-        run_config=run_config_lib.RunConfig(),
-        expected_cluster_spec={},
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=0,
-        expected_master='',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-
-  def test_tf_config_for_local(self):
-    tf_config = {
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 0
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_distributed_properties(
-        run_config=run_config,
-        expected_cluster_spec={},
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=0,
-        expected_master='',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-    self.assertEqual(0, run_config.global_id_in_cluster)
-    self.assertIsNone(run_config.session_config, None)
-
-  def test_session_master_for_local(self):
-    tf_config = {'session_master': '_my_master'}
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec={},
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=0,
-        expected_master='_my_master',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-
-  def test_eval_session_master_for_local(self):
-    tf_config = {'eval_session_master': '_my_eval_master'}
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec={},
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=0,
-        expected_master='',
-        expected_evaluation_master='_my_eval_master',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-
-  def test_invalid_task_type_for_local(self):
-    tf_config = {
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_invalid_task_index_for_local(self):
-    tf_config = {
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_FOR_LOCAL_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_chief_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.CHIEF,
-        expected_task_id=0,
-        expected_master='grpc://host0:0',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_session_master_from_single_node_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        },
-        'session_master': '_my_master'
-    }
-    self.assertEqual('_my_master',
-                     _create_run_config_with_cluster_spec(tf_config).master)
-
-  def test_session_master_from_multiple_nodes_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        },
-        'session_master': '_my_master'
-    }
-    self.assertEqual('_my_master',
-                     _create_run_config_with_cluster_spec(tf_config).master)
-
-  def test_fail_with_eval_session_master_for_non_evaluator(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        },
-        'eval_session_master': 'grpc://123',
-    }
-    with self.assertRaisesRegexp(
-        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_multiple_chief_nodes(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0', 'host:6:6'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-    }
-    with self.assertRaisesRegexp(ValueError, _ONE_CHIEF_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_missing_chief_node(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-    }
-    with self.assertRaisesRegexp(ValueError, _MISSING_CHIEF_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_single_chief_node(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.CHIEF,
-        expected_task_id=0,
-        expected_master='',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-
-  def test_fail_with_missing_task_type_for_distributed(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-    }
-    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_missing_task_index_for_distributed(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_index_is_too_large(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_invalid_task_index(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': -1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_invalid_task_type(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 0
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_worker_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 1
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=1,
-        expected_master='grpc://host4:4',
-        expected_evaluation_master='',
-        expected_is_chief=False,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_ps_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.PS,
-        expected_task_id=0,
-        expected_master='grpc://host1:1',
-        expected_evaluation_master='',
-        expected_is_chief=False,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_evaluator_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 12
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_distributed_properties(
-        run_config=run_config,
-        expected_cluster_spec={},
-        expected_task_type=run_config_lib.TaskType.EVALUATOR,
-        expected_task_id=12,
-        expected_master='',
-        expected_evaluation_master='',
-        expected_is_chief=False,  # evaluator is never chief.
-        expected_num_worker_replicas=0,  # evaluator is not in training cluster.
-        expected_num_ps_replicas=0)
-    self.assertIsNone(run_config.global_id_in_cluster)
-
-  def test_eval_master_for_evaluator(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 12
-        },
-        'eval_session_master': 'grpc://123',
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual('grpc://123', run_config.evaluation_master)
-
-  def test_fail_with_invalid_task_index_for_evaluator(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': -1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_global_id_in_cluster_for_chief(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(0, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_worker(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 2,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(3, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_ps(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 1,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(5, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_multipe_worker_types(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            'worker': ['host3:3', 'host4:4', 'host5:5'],
-            'other_type': ['host3:1', 'host4:2'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': 'other_type',
-            'index': 1,
-        },
-    }
-    # Though 'other_type' is defined after 'worker', based on alphabetical
-    # order, the task type order should be 'chief', 'other_type', 'worker',
-    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
-    # order list.
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(2, run_config.global_id_in_cluster)
-
-
-class RunConfigDistributedSettingWithMasterTest(test.TestCase):
-
-  def _assert_distributed_properties(self, run_config,
-                                     expected_cluster_spec,
-                                     expected_task_type,
-                                     expected_task_id,
-                                     expected_master,
-                                     expected_evaluation_master,
-                                     expected_is_chief,
-                                     expected_num_worker_replicas,
-                                     expected_num_ps_replicas):
-    self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict())
-    self.assertEqual(expected_task_type, run_config.task_type)
-    self.assertEqual(expected_task_id, run_config.task_id)
-    self.assertEqual(expected_master, run_config.master)
-    self.assertEqual(expected_evaluation_master, run_config.evaluation_master)
-    self.assertEqual(expected_is_chief, run_config.is_chief)
-    self.assertEqual(expected_num_worker_replicas,
-                     run_config.num_worker_replicas)
-    self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas)
-
-  def test_invalid_task_type_for_local(self):
-    tf_config = {
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_master_node(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.MASTER,
-        expected_task_id=0,
-        expected_master='grpc://host0:0',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_session_master_in_single_node_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        },
-        'session_master': '_my_master'
-    }
-    self.assertEqual('_my_master',
-                     _create_run_config_with_cluster_spec(tf_config).master)
-
-  def test_session_master_in_multiple_nodes_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        },
-        'session_master': '_my_master'
-    }
-    self.assertEqual('_my_master',
-                     _create_run_config_with_cluster_spec(tf_config).master)
-
-  def test_fail_with_eval_session_master(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        },
-        'eval_session_master': 'grpc://123',
-    }
-    with self.assertRaisesRegexp(
-        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_multiple_master_nodes(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0', 'host:6:6'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-    }
-    with self.assertRaisesRegexp(ValueError, _ONE_MASTER_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_single_master_node(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.MASTER,
-        expected_task_id=0,
-        expected_master='',
-        expected_evaluation_master='',
-        expected_is_chief=True,
-        expected_num_worker_replicas=1,
-        expected_num_ps_replicas=0)
-
-  def test_fail_with_missing_task_type_for_distributed(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host3:3']
-        },
-    }
-    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_missing_task_index_for_distributed(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_index_is_too_large(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_invalid_task_index(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': -1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_invalid_task_type(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host3:3']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 0
-        }
-    }
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_worker_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 1
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.WORKER,
-        expected_task_id=1,
-        expected_master='grpc://host4:4',
-        expected_evaluation_master='',
-        expected_is_chief=False,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_ps_tf_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 0
-        }
-    }
-    self._assert_distributed_properties(
-        run_config=_create_run_config_with_cluster_spec(tf_config),
-        expected_cluster_spec=tf_config['cluster'],
-        expected_task_type=run_config_lib.TaskType.PS,
-        expected_task_id=0,
-        expected_master='grpc://host1:1',
-        expected_evaluation_master='',
-        expected_is_chief=False,
-        expected_num_worker_replicas=4,
-        expected_num_ps_replicas=2)
-
-  def test_fail_with_evaluator(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError,
-                                 _INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_fail_with_chief(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.CHIEF: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 1
-        }
-    }
-    with self.assertRaisesRegexp(ValueError,
-                                 _INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-  def test_global_id_in_cluster_for_master(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(0, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_worker(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 2,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(3, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_ps(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 1,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(5, run_config.global_id_in_cluster)
-
-  def test_global_id_in_cluster_for_multipe_worker_types(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            'worker': ['host3:3', 'host4:4', 'host5:5'],
-            'other_type': ['host3:1', 'host4:2'],
-            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
-        },
-        'task': {
-            'type': 'other_type',
-            'index': 1,
-        },
-    }
-    # Though 'other_type' is defined after 'worker', based on alphabetical
-    # order, the task type order should be 'chief', 'other_type', 'worker',
-    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
-    # order list.
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(2, run_config.global_id_in_cluster)
-
-
-class RunConfigSaveCheckpointsTest(test.TestCase):
-
-  def test_save_checkpoint(self):
-    empty_config = run_config_lib.RunConfig()
-    self.assertEqual(600, empty_config.save_checkpoints_secs)
-    self.assertIsNone(empty_config.save_checkpoints_steps)
-
-    config_with_steps = empty_config.replace(save_checkpoints_steps=100)
-    del empty_config
-    self.assertEqual(100, config_with_steps.save_checkpoints_steps)
-    self.assertIsNone(config_with_steps.save_checkpoints_secs)
-
-    config_with_secs = config_with_steps.replace(save_checkpoints_secs=200)
-    del config_with_steps
-    self.assertEqual(200, config_with_secs.save_checkpoints_secs)
-    self.assertIsNone(config_with_secs.save_checkpoints_steps)
-
-  def test_save_checkpoint_both_steps_and_secs_are_not_none(self):
-    empty_config = run_config_lib.RunConfig()
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR):
-      empty_config.replace(save_checkpoints_steps=100,
-                           save_checkpoints_secs=200)
-
-    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR):
-      run_config_lib.RunConfig(save_checkpoints_steps=100,
-                               save_checkpoints_secs=200)
-
-  def test_save_checkpoint_both_steps_and_secs_are_none(self):
-    config_with_secs = run_config_lib.RunConfig()
-    config_without_ckpt = config_with_secs.replace(
-        save_checkpoints_steps=None, save_checkpoints_secs=None)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
-
-  def test_save_checkpoint_flip_secs_to_none(self):
-    config_with_secs = run_config_lib.RunConfig()
-    config_without_ckpt = config_with_secs.replace(save_checkpoints_secs=None)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
-
-  def test_save_checkpoint_flip_steps_to_none(self):
-    config_with_steps = run_config_lib.RunConfig().replace(
-        save_checkpoints_steps=100)
-    config_without_ckpt = config_with_steps.replace(save_checkpoints_steps=None)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
-    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
-
-
-class RunConfigServiceKeyTest(test.TestCase):
-
-  def test_arbitrary_key_value_pairs(self):
-    tf_config = {
-        'service': {
-            'key1': [1, 2],
-            'key2': {'a': 3, 'b': 4},
-            'key3': 789,
-        },
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual(tf_config['service'], run_config.service)
-
-  def test_missing_service_key(self):
-    tf_config = {
-        'model_dir': '/tmp/123',
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertIsNone(run_config.service)
-
-  def test_fail_with_non_dict(self):
-    tf_config = {
-        'service': 789,
-    }
-    with self.assertRaisesRegexp(TypeError, _INVALID_SERVICE_TYPE_ERR):
-      _create_run_config_with_cluster_spec(tf_config)
-
-
-class RunConfigModelDirTest(test.TestCase):
-
-  def test_default(self):
-    run_config = run_config_lib.RunConfig()
-    self.assertIsNone(run_config.model_dir)
-
-  def test_model_dir_in_constructor(self):
-    run_config = run_config_lib.RunConfig(model_dir='/tmp/123')
-    self.assertEqual('/tmp/123', run_config.model_dir)
-
-  def test_model_dir_in_tf_config(self):
-    tf_config = {
-        'model_dir': '/tmp/123',
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertEqual('/tmp/123', run_config.model_dir)
-
-  def test_model_dir_both_set_in_both_constructor_and_tf_config(self):
-    model_dir = '/tmp/123'
-    tf_config = {'model_dir': model_dir}
-    kwargs = {'model_dir': model_dir}
-    run_config = _create_run_config_with_cluster_spec(tf_config, **kwargs)
-    self.assertEqual('/tmp/123', run_config.model_dir)
-
-  def test_model_dir_different_in_both_constructor_and_tf_config(self):
-    tf_config = {'model_dir': '/tmp/123'}
-    kwargs = {'model_dir': '/tmp/456'}
-    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_MISMATCH_ERR):
-      _create_run_config_with_cluster_spec(tf_config, **kwargs)
-
-  def test_fail_with_empty_string_in_constructor(self):
-    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
-      run_config_lib.RunConfig(model_dir='')
-
-  def test_fail_with_empty_string_in_tf_config(self):
-    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_TF_CONFIG_ERR):
-      tf_config = {'model_dir': ''}
-      _create_run_config_with_cluster_spec(tf_config)
-
-
-class RunConfigSessionConfigTest(test.TestCase):
-
-  def _assert_equal_session_config(self, session_config,
-                                   expected_device_filters):
-
-    rewrite_opts = rewriter_config_pb2.RewriterConfig(
-        meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
-    graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
-    expected_session_config = config_pb2.ConfigProto(
-        allow_soft_placement=True,
-        graph_options=graph_opts,
-        device_filters=expected_device_filters)
-    self.assertEqual(session_config, expected_session_config)
-
-  def test_master_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.MASTER,
-            'index': 0
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_equal_session_config(run_config.session_config,
-                                      ['/job:ps', '/job:master'])
-
-  def test_chief_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.CHIEF,
-            'index': 0
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_equal_session_config(run_config.session_config,
-                                      ['/job:ps', '/job:chief'])
-
-  def test_worker_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.WORKER,
-            'index': 1
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_equal_session_config(run_config.session_config,
-                                      ['/job:ps', '/job:worker/task:1'])
-
-  def test_ps_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.PS,
-            'index': 1
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self._assert_equal_session_config(run_config.session_config,
-                                      ['/job:ps', '/job:worker', '/job:master'])
-
-  def test_evaluator_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 0
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertIsNone(run_config.session_config)
-
-  def test_other_type_session_config(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.MASTER: ['host0:0'],
-            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-            'other_type': ['host3:1', 'host4:2'],
-            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
-        },
-        'task': {
-            'type': 'other_type',
-            'index': 0
-        }
-    }
-    run_config = _create_run_config_with_cluster_spec(tf_config)
-    self.assertIsNone(run_config.session_config)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index 240be5dabe..e621a9531a 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,1062 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Classes and functions related to train_and_evaluate."""
+"""training python module.
+
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import json
-import os
-import time
-
-import six
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.distribute import estimator_training as distribute_coordinator_training
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import exporter as exporter_lib
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import server_lib
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.util import compat
-from tensorflow.python.util.tf_export import estimator_export
-
-_MAX_DELAY_SECS = 60
-_DELAY_SECS_PER_WORKER = 5
-_TF_CONFIG_ENV = 'TF_CONFIG'
-_ENVIRONMENT_KEY = 'environment'
-_ENVIRONMENT_GOOGLE_VALUE = 'google'
-_TRAINER_JOBS = (run_config_lib.TaskType.CHIEF, run_config_lib.TaskType.MASTER,
-                 run_config_lib.TaskType.WORKER)
-
-
-def _validate_input_fn(input_fn):
-  """Validates the `input_fn`."""
-  if not callable(input_fn):
-    raise TypeError('`input_fn` must be callable, given: {}'.format(input_fn))
-
-
-def _validate_hooks(hooks):
-  """Validates the `hooks`."""
-  hooks = tuple(hooks or [])
-  for hook in hooks:
-    if not isinstance(hook, session_run_hook.SessionRunHook):
-      raise TypeError(
-          'All hooks must be `SessionRunHook` instances, given: {}'.format(
-              hook))
-  return hooks
-
-
-def _validate_exporters(exporters):
-  """Validates `exporters` and returns them as a tuple."""
-  if not exporters:
-    return ()
-
-  if isinstance(exporters, exporter_lib.Exporter):
-    exporters = [exporters]
-
-  unique_names = []  # `Exporter`s should have unique names.
-  try:
-    for exporter in exporters:
-      if not isinstance(exporter, exporter_lib.Exporter):
-        # Error message will be printed out by the outer try/except.
-        raise TypeError
-
-      if not exporter.name:
-        full_list_of_names = [e.name for e in exporters]
-        raise ValueError('An Exporter cannot have a name that is `None` or'
-                         ' empty. All exporter names:'
-                         ' {}'.format(full_list_of_names))
-
-      if not isinstance(exporter.name, six.string_types):
-        raise ValueError('An Exporter must have a string name. Given: '
-                         '{}'.format(type(exporter.name)))
-
-      if exporter.name in unique_names:
-        full_list_of_names = [e.name for e in exporters]
-        raise ValueError(
-            '`exporters` must have unique names. Such a name cannot be `None`.'
-            ' All exporter names: {}'.format(full_list_of_names))
-      unique_names.append(exporter.name)
-  except TypeError:
-    # Two possibilities:
-    # - `exporters` is neither `Exporter` nor iterable.  Python has
-    #   raised a `TypeError` when iterating over `exporters`.
-    # - an `exporter` was None or not of type `Exporter`, so we raised a
-    #   `TypeError`.
-    raise TypeError('`exporters` must be an Exporter,'
-                    ' an iterable of Exporter, or `None`,'
-                    ' found %s.' % exporters)
-
-  return tuple(exporters)
-
-
-def _is_google_env():
-  """Detects whether current environment is google."""
-  tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV) or '{}')
-  if not tf_config:
-    logging.warn('TF_CONFIG should not be empty in distributed environment.')
-  return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE
-
-
-@estimator_export('estimator.TrainSpec')
-class TrainSpec(
-    collections.namedtuple('TrainSpec', ['input_fn', 'max_steps', 'hooks'])):
-  """Configuration for the "train" part for the `train_and_evaluate` call.
-
-  `TrainSpec` determines the input data for the training, as well as the
-  duration. Optional hooks run at various stages of training.
-  """
-
-  def __new__(cls, input_fn, max_steps=None, hooks=None):
-    """Creates a validated `TrainSpec` instance.
-
-    Args:
-      input_fn: A function that provides input data for training as minibatches.
-        See [Premade Estimators](https://tensorflow.org/guide/premade_estimators#create_input_functions)
-        for more information. The function should construct and return one of
-        the following:
-          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-            tuple (features, labels) with same constraints as below.
-          * A tuple (features, labels): Where features is a `Tensor` or a
-            dictionary of string feature name to `Tensor` and labels is a
-            `Tensor` or a dictionary of string label name to `Tensor`.
-
-      max_steps: Int. Positive number of total steps for which to train model.
-        If `None`, train forever. The training `input_fn` is not expected to
-        generate `OutOfRangeError` or `StopIteration` exceptions. See the
-        `train_and_evaluate` stop condition section for details.
-      hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on all workers (including chief) during training.
-
-    Returns:
-      A validated `TrainSpec` object.
-
-    Raises:
-      ValueError: If any of the input arguments is invalid.
-      TypeError: If any of the arguments is not of the expected type.
-    """
-    # Validate input_fn.
-    _validate_input_fn(input_fn)
-
-    # Validate max_steps.
-    if max_steps is not None and max_steps <= 0:
-      raise ValueError(
-          'Must specify max_steps > 0, given: {}'.format(max_steps))
-
-    # Validate hooks.
-    hooks = _validate_hooks(hooks)
-
-    return super(TrainSpec, cls).__new__(
-        cls, input_fn=input_fn, max_steps=max_steps, hooks=hooks)
-
-
-@estimator_export('estimator.EvalSpec')
-class EvalSpec(
-    collections.namedtuple('EvalSpec', [
-        'input_fn', 'steps', 'name', 'hooks', 'exporters', 'start_delay_secs',
-        'throttle_secs'
-    ])):
-  """Configuration for the "eval" part for the `train_and_evaluate` call.
-
-  `EvalSpec` combines details of evaluation of the trained model as well as its
-  export. Evaluation consists of computing metrics to judge the performance of
-  the trained model.  Export writes out the trained model on to external
-  storage.
-  """
-
-  def __new__(cls,
-              input_fn,
-              steps=100,
-              name=None,
-              hooks=None,
-              exporters=None,
-              start_delay_secs=120,
-              throttle_secs=600):
-    """Creates a validated `EvalSpec` instance.
-
-    Args:
-      input_fn: A function that constructs the input data for evaluation.
-        See [Premade Estimators](https://tensorflow.org/api_guides/premade_estimators#create_input_functions)
-        for more information. The function should construct and return one of
-        the following:
-          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-            tuple (features, labels) with same constraints as below.
-          * A tuple (features, labels): Where features is a `Tensor` or a
-            dictionary of string feature name to `Tensor` and labels is a
-            `Tensor` or a dictionary of string label name to `Tensor`.
-
-      steps: Int. Positive number of steps for which to evaluate model. If
-        `None`, evaluates until `input_fn` raises an end-of-input exception.
-        See `Estimator.evaluate` for details.
-      name: String. Name of the evaluation if user needs to run multiple
-        evaluations on different data sets. Metrics for different evaluations
-        are saved in separate folders, and appear separately in tensorboard.
-      hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        during evaluation.
-      exporters: Iterable of `Exporter`s, or a single one, or `None`.
-        `exporters` will be invoked after each evaluation.
-      start_delay_secs: Int. Start evaluating after waiting for this many
-        seconds.
-      throttle_secs: Int. Do not re-evaluate unless the last evaluation was
-        started at least this many seconds ago. Of course, evaluation does not
-        occur if no new checkpoints are available, hence, this is the minimum.
-
-    Returns:
-      A validated `EvalSpec` object.
-
-    Raises:
-      ValueError: If any of the input arguments is invalid.
-      TypeError: If any of the arguments is not of the expected type.
-    """
-    # Validate input_fn.
-    _validate_input_fn(input_fn)
-
-    # Validate steps.
-    if steps is not None and steps <= 0:
-      raise ValueError('Must specify steps > 0, given: {}'.format(steps))
-
-    # Validate name.
-    if name is not None and not isinstance(name, six.string_types):
-      raise TypeError('`name` must be string, given: {}'.format(name))
-
-    # Validate hooks.
-    hooks = _validate_hooks(hooks)
-
-    # Validate exporters.
-    exporters = _validate_exporters(exporters)
-
-    # Validate start_delay_secs.
-    if start_delay_secs < 0:
-      raise ValueError('Must specify start_delay_secs >= 0, given: {}'.format(
-          start_delay_secs))
-
-    # Validate throttle_secs.
-    if throttle_secs < 0:
-      raise ValueError(
-          'Must specify throttle_secs >= 0, given: {}'.format(throttle_secs))
-
-    return super(EvalSpec, cls).__new__(
-        cls,
-        input_fn=input_fn,
-        steps=steps,
-        name=name,
-        hooks=hooks,
-        exporters=exporters,
-        start_delay_secs=start_delay_secs,
-        throttle_secs=throttle_secs)
-
-
-@estimator_export('estimator.train_and_evaluate')
-def train_and_evaluate(estimator, train_spec, eval_spec):
-  """Train and evaluate the `estimator`.
-
-  This utility function trains, evaluates, and (optionally) exports the model by
-  using the given `estimator`. All training related specification is held in
-  `train_spec`, including training `input_fn` and training max steps, etc. All
-  evaluation and export related specification is held in `eval_spec`, including
-  evaluation `input_fn`, steps, etc.
-
-  This utility function provides consistent behavior for both local
-  (non-distributed) and distributed configurations. The default distribution
-  configuration is parameter server-based between-graph replication. For other
-  types of distribution configurations such as all-reduce training, please use
-  [DistributionStrategies](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute).  # pylint: disable=line-too-long
-
-  Overfitting: In order to avoid overfitting, it is recommended to set up the
-  training `input_fn` to shuffle the training data properly.
-
-  Stop condition: In order to support both distributed and non-distributed
-  configuration reliably, the only supported stop condition for model
-  training is `train_spec.max_steps`. If `train_spec.max_steps` is `None`, the
-  model is trained forever. *Use with care* if model stop condition is
-  different. For example, assume that the model is expected to be trained with
-  one epoch of training data, and the training `input_fn` is configured to throw
-  `OutOfRangeError` after going through one epoch, which stops the
-  `Estimator.train`. For a three-training-worker distributed configuration, each
-  training worker is likely to go through the whole epoch independently. So, the
-  model will be trained with three epochs of training data instead of one epoch.
-
-  Example of local (non-distributed) training:
-
-  ```python
-  # Set up feature columns.
-  categorial_feature_a = categorial_column_with_hash_bucket(...)
-  categorial_feature_a_emb = embedding_column(
-      categorical_column=categorial_feature_a, ...)
-  ...  # other feature columns
-
-  estimator = DNNClassifier(
-      feature_columns=[categorial_feature_a_emb, ...],
-      hidden_units=[1024, 512, 256])
-
-  # Or set up the model directory
-  #   estimator = DNNClassifier(
-  #       config=tf.estimator.RunConfig(
-  #           model_dir='/my_model', save_summary_steps=100),
-  #       feature_columns=[categorial_feature_a_emb, ...],
-  #       hidden_units=[1024, 512, 256])
-
-  # Input pipeline for train and evaluate.
-  def train_input_fn(): # returns x, y
-    # please shuffle the data.
-    pass
-  def eval_input_fn(): # returns x, y
-    pass
-
-  train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000)
-  eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
-
-  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
-  ```
-  Note that in current implementation `estimator.evaluate` will be called
-  multiple times. This means that evaluation graph (including eval_input_fn)
-  will be re-created for each `evaluate` call. `estimator.train` will be called
-  only once.
-
-  Example of distributed training:
-
-  Regarding the example of distributed training, the code above can be used
-  without a change (Please do make sure that the `RunConfig.model_dir` for all
-  workers is set to the same directory, i.e., a shared file system all workers
-  can read and write). The only extra work to do is setting the environment
-  variable `TF_CONFIG` properly for each worker correspondingly.
-
-  Also see
-  [Distributed TensorFlow](https://www.tensorflow.org/deploy/distributed).
-
-  Setting environment variable depends on the platform. For example, on Linux,
-  it can be done as follows (`$` is the shell prompt):
-
-  ```
-  $ TF_CONFIG='<replace_with_real_content>' python train_model.py
-  ```
-
-  For the content in `TF_CONFIG`, assume that the training cluster spec looks
-  like:
-
-  ```
-  cluster = {"chief": ["host0:2222"],
-             "worker": ["host1:2222", "host2:2222", "host3:2222"],
-             "ps": ["host4:2222", "host5:2222"]}
-  ```
-
-  Example of `TF_CONFIG` for chief training worker (must have one and only one):
-
-  ```
-  # This should be a JSON string, which is set as environment variable. Usually
-  # the cluster manager handles that.
-  TF_CONFIG='{
-      "cluster": {
-          "chief": ["host0:2222"],
-          "worker": ["host1:2222", "host2:2222", "host3:2222"],
-          "ps": ["host4:2222", "host5:2222"]
-      },
-      "task": {"type": "chief", "index": 0}
-  }'
-  ```
-  Note that the chief worker also does the model training job, similar to other
-  non-chief training workers (see next paragraph). In addition to the model
-  training, it manages some extra work, e.g., checkpoint saving and restoring,
-  writing summaries, etc.
-
-  Example of `TF_CONFIG` for non-chief training worker (optional, could be
-  multiple):
-
-  ```
-  # This should be a JSON string, which is set as environment variable. Usually
-  # the cluster manager handles that.
-  TF_CONFIG='{
-      "cluster": {
-          "chief": ["host0:2222"],
-          "worker": ["host1:2222", "host2:2222", "host3:2222"],
-          "ps": ["host4:2222", "host5:2222"]
-      },
-      "task": {"type": "worker", "index": 0}
-  }'
-  ```
-  where the `task.index` should be set as 0, 1, 2, in this example, respectively
-  for non-chief training workers.
-
-  Example of `TF_CONFIG` for parameter server, aka ps (could be multiple):
-
-  ```
-  # This should be a JSON string, which is set as environment variable. Usually
-  # the cluster manager handles that.
-  TF_CONFIG='{
-      "cluster": {
-          "chief": ["host0:2222"],
-          "worker": ["host1:2222", "host2:2222", "host3:2222"],
-          "ps": ["host4:2222", "host5:2222"]
-      },
-      "task": {"type": "ps", "index": 0}
-  }'
-  ```
-  where the `task.index` should be set as 0 and 1, in this example, respectively
-  for parameter servers.
-
-  Example of `TF_CONFIG` for evaluator task. Evaluator is a special task that is
-  not part of the training cluster. There could be only one. It is used for
-  model evaluation.
-
-  ```
-  # This should be a JSON string, which is set as environment variable. Usually
-  # the cluster manager handles that.
-  TF_CONFIG='{
-      "cluster": {
-          "chief": ["host0:2222"],
-          "worker": ["host1:2222", "host2:2222", "host3:2222"],
-          "ps": ["host4:2222", "host5:2222"]
-      },
-      "task": {"type": "evaluator", "index": 0}
-  }'
-  ```
-
-  When `distribute` or `experimental_distribute.train_distribute` and
-  `experimental_distribute.remote_cluster` is set, this method will start a
-  client running on the current host which connects to the `remote_cluster` for
-  training and evaluation.
-
-  Args:
-    estimator: An `Estimator` instance to train and evaluate.
-    train_spec: A `TrainSpec` instance to specify the training specification.
-    eval_spec: A `EvalSpec` instance to specify the evaluation and export
-      specification.
-
-  Returns:
-    A tuple of the result of the `evaluate` call to the `Estimator` and the
-    export results using the specified `ExportStrategy`.
-    Currently, the return value is undefined for distributed training mode.
-
-  Raises:
-    ValueError: if environment variable `TF_CONFIG` is incorrectly set.
-  """
-  _assert_eval_spec(eval_spec)  # fail fast if eval_spec is invalid.
-
-  executor = _TrainingExecutor(
-      estimator=estimator, train_spec=train_spec, eval_spec=eval_spec)
-  config = estimator.config
-
-  # If `distribute_coordinator_mode` is set and running in distributed
-  # environment, we run `train_and_evaluate` via distribute coordinator.
-  if distribute_coordinator_training.should_run_distribute_coordinator(config):
-    logging.info('Running `train_and_evaluate` with Distribute Coordinator.')
-    distribute_coordinator_training.train_and_evaluate(
-        estimator, train_spec, eval_spec, _TrainingExecutor)
-    return
-
-  if (config.task_type == run_config_lib.TaskType.EVALUATOR and
-      config.task_id > 0):
-    raise ValueError(
-        'For distributed training, there can only be one `evaluator` task '
-        '(with task id 0).  Given task id {}'.format(config.task_id))
-
-  return executor.run()
-
-
-class _StopAtSecsHook(session_run_hook.SessionRunHook):
-  """Stops given secs after begin is called."""
-
-  def __init__(self, stop_after_secs):
-    self._stop_after_secs = stop_after_secs
-    self._start_time = None
-
-  def begin(self):
-    self._start_time = time.time()
-
-  def after_run(self, run_context, run_values):
-    del run_values
-    if time.time() - self._start_time >= self._stop_after_secs:
-      run_context.request_stop()
-
-
-class _NewCheckpointListenerForEvaluate(
-    basic_session_run_hooks.CheckpointSaverListener):
-  """A saver listener to run evaluate with every checkpoint."""
-
-  def __init__(self, evaluator, eval_throttle_secs, continuous_eval_listener):
-    self._evaluator = evaluator
-    self._eval_throttle_secs = eval_throttle_secs
-    self._continuous_eval_listener = continuous_eval_listener
-    self.eval_result, self.export_results = None, None
-
-  def begin(self):
-    self._timer = basic_session_run_hooks.SecondOrStepTimer(
-        every_secs=self._eval_throttle_secs)
-    self._is_first_run = True
-
-  def after_save(self, session, global_step_value):
-    del session  # unused; required by signature.
-    # skip first run model is not trained yet.
-    if self._is_first_run:
-      self._is_first_run = False
-      return
-
-    if not self._continuous_eval_listener.before_eval():
-      logging.info('Exiting training and evaluation loop, as requested by '
-                   '_ContinuousEvalListener.before_eval.')
-      return True
-    if self._timer.should_trigger_for_step(global_step_value):
-      self._evaluate(global_step_value)  # updates self.eval_result
-      if not self._continuous_eval_listener.after_eval(self.eval_result):
-        logging.info('Exiting evaluation, as requested by '
-                     '_ContinuousEvalListener.after_eval.')
-        return True
-    else:
-      # TODO(ispir): add remaining time in the log.
-      logging.info('Skip the current checkpoint eval due to throttle secs '
-                   '({} secs).'.format(self._eval_throttle_secs))
-
-  def end(self, session, global_step_value):
-    # Evaluate if the last step has not been evaluated, yet.
-    if global_step_value != self._timer.last_triggered_step():
-      if self._continuous_eval_listener.before_eval():
-        self._evaluate(global_step_value)
-        self._continuous_eval_listener.after_eval(self.eval_result)
-
-  def _evaluate(self, global_step_value):
-    self._timer.update_last_triggered_step(global_step_value)
-    self.eval_result, self.export_results = (
-        self._evaluator.evaluate_and_export())
-    if self.eval_result.status != _EvalStatus.EVALUATED:
-      #  This is unexpected; should never happen.
-      #  Training should always end with a new checkpoint.
-      raise RuntimeError('There was no new checkpoint after the training. '
-                         'Eval status: {}'.format(self.eval_result.status))
-
-
-class _TrainingExecutor(object):
-  """The executor to run `Estimator` training and evaluation.
-
-  This implementation supports both distributed and non-distributed (aka local)
-  training and evaluation based on the setting in `tf.estimator.RunConfig`.
-  """
-
-  def __init__(self,
-               estimator,
-               train_spec,
-               eval_spec,
-               train_hooks=None,
-               continuous_eval_listener=None):
-    if not isinstance(estimator, estimator_lib.Estimator):
-      raise TypeError(
-          '`estimator` must have type `tf.estimator.Estimator`. '
-          'Got: {}'.format(type(estimator)))
-    self._estimator = estimator
-
-    if not isinstance(train_spec, TrainSpec):
-      raise TypeError(
-          '`train_spec` must have type `tf.estimator.TrainSpec`. '
-          'Got: {}'.format(type(train_spec)))
-    self._train_spec = train_spec
-
-    if eval_spec and not isinstance(eval_spec, EvalSpec):
-      raise TypeError('`eval_spec` must be either `None` or have type '
-                      '`tf.estimator.EvalSpec`. Got: {}'.format(
-                          type(eval_spec)))
-    self._eval_spec = eval_spec
-
-    self._train_hooks = _validate_hooks(train_hooks)
-
-    if (continuous_eval_listener and
-        not isinstance(continuous_eval_listener, _ContinuousEvalListener)):
-      raise TypeError('`continuous_eval_listener` must have type '
-                      '`_ContinuousEvalListener`.')
-    self._continuous_eval_listener = (
-        continuous_eval_listener or _ContinuousEvalListener())
-
-  @property
-  def estimator(self):
-    return self._estimator
-
-  def run(self):
-    """Executes the run_foo for task type `foo`.
-
-    `_TrainingExecutor` predefines the procedure for task type 'chief',
-    'worker', 'ps', and 'evaluator'. For task type `foo`, the corresponding
-    procedure is `run_foo'. This `run` method invoke the procedure base on the
-    `RunConfig.task_type`.
-
-    Returns:
-      A tuple of the result of the `evaluate` call to the `Estimator` and the
-      export results using the specified `ExportStrategy`.
-      Currently undefined for distributed training mode.
-
-    Raises:
-      ValueError: if the estimator.config is mis-configured.
-    """
-    config = self._estimator.config
-
-    if (not config.cluster_spec and
-        config.task_type != run_config_lib.TaskType.EVALUATOR):
-      logging.info('Running training and evaluation locally (non-distributed).')
-      return self.run_local()
-
-    # Distributed case.
-    if not config.task_type:
-      # TODO(xiejw): Improve the error message about how to set the TF_CONFIG
-      # correctly.
-      raise ValueError(
-          '`estimator.config` must have task_type set. This usually means '
-          'TF_CONFIG environment is not set correctly.')
-
-    if config.task_type == 'local':
-      raise ValueError(
-          '`task.type` in TF_CONFIG cannot be `local`. Leaving `cluster` and '
-          '`task` properties in TF_CONFIG absent triggers train and evaluate '
-          '`Estimator` locally (non-distributed).')
-
-    # For task type foo, call executor.run_foo.
-    available_tasks = [
-        x for x in dir(self)
-        if x.startswith('run_') and x != 'run_local' and
-        callable(getattr(self, x))
-    ]
-    task_to_run = 'run_' + config.task_type
-    if task_to_run not in available_tasks:
-      raise ValueError(
-          'Task type {} is not supported. Supported task types are {}'.format(
-              config.task_type, [x[len('run_'):] for x in available_tasks]))
-    getattr(self, task_to_run)()
-
-  def run_chief(self):
-    """Runs task chief."""
-    # TODO(xiejw): To allow execution framework to add train hooks.
-    return self._start_distributed_training()
-
-  def run_worker(self):
-    """Runs task (training) worker."""
-    # TODO(xiejw): To allow execution framework to add train hooks.
-    return self._start_distributed_training()
-
-  def run_master(self):
-    """Runs task master."""
-    _assert_eval_spec(self._eval_spec)
-
-    # Final export signal: For any eval result with global_step >= train
-    # max_steps, the evaluator will send the final export signal. There is a
-    # small chance that the Estimator.train stopping logic sees a different
-    # global_step value (due to global step race condition and the fact the
-    # saver sees a larger value for checkpoint saving), which does not end
-    # the training. When the training ends, a new checkpoint is generated, which
-    # triggers the listener again. So, it could be the case the final export is
-    # triggered twice.
-    #
-    # But here, throttle_secs will skip the next intermediate checkpoint and,
-    # so, the double final export chance is very small.
-    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
-                                             self._train_spec.max_steps)
-
-    # When the underlying `Estimator` object saves a new checkpoint, we would
-    # like this callback to be called so that evaluation and export can trigger.
-    saving_listeners = [
-        _NewCheckpointListenerForEvaluate(evaluator,
-                                          self._eval_spec.throttle_secs,
-                                          _ContinuousEvalListener())
-    ]
-    self._start_distributed_training(saving_listeners=saving_listeners)
-
-  def run_evaluator(self):
-    """Runs task evaluator."""
-    # TODO(xiejw): To allow execution framework to add continuous eval listener.
-    return self._start_continuous_evaluation()
-
-  def run_ps(self):
-    """Runs task parameter server (in training cluster spec)."""
-    config = self._estimator.config
-    server = self._start_std_server(config)
-    server.join()
-
-  def run_local(self):
-    """Runs training and evaluation locally (non-distributed)."""
-    _assert_eval_spec(self._eval_spec)
-
-    train_hooks = list(self._train_spec.hooks) + list(self._train_hooks)
-    logging.info('Start train and evaluate loop. The evaluate will happen '
-                 'after every checkpoint. Checkpoint frequency is determined '
-                 'based on RunConfig arguments: save_checkpoints_steps {} or '
-                 'save_checkpoints_secs {}.'.format(
-                     self._estimator.config.save_checkpoints_steps,
-                     self._estimator.config.save_checkpoints_secs))
-
-    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
-                                             self._train_spec.max_steps)
-
-    listener_for_eval = _NewCheckpointListenerForEvaluate(
-        evaluator, self._eval_spec.throttle_secs,
-        self._continuous_eval_listener)
-    saving_listeners = [listener_for_eval]
-
-    self._estimator.train(
-        input_fn=self._train_spec.input_fn,
-        max_steps=self._train_spec.max_steps,
-        hooks=train_hooks,
-        saving_listeners=saving_listeners)
-
-    eval_result = listener_for_eval.eval_result or _EvalResult(
-        status=_EvalStatus.MISSING_CHECKPOINT)
-    return eval_result.metrics, listener_for_eval.export_results
-
-  def _start_std_server(self, config):
-    """Creates, starts, and returns a server_lib.Server."""
-    if (not config.cluster_spec or not config.task_type or
-        config.task_id is None):
-      raise RuntimeError('Could not start server; be sure to specify '
-                         'cluster_spec, task_type, and task in '
-                         'RunConfig or set the TF_CONFIG environment variable.')
-
-    if not config.master:
-      jobs = config.cluster_spec.jobs
-      if (len(jobs) == 1 and
-          len(config.cluster_spec.job_tasks(jobs[0])) == 1 and
-          config.task_type in _TRAINER_JOBS):
-        # For distributed training, config.master is empty if and only if it has
-        # a single node in the cluster spec. In this case, we should not start
-        # the server.
-        logging.info('Skip starting Tensorflow server as there is only one '
-                     'node in the cluster.')
-        return
-      else:
-        raise RuntimeError(
-            'Could not start server; be sure to specify master in '
-            'RunConfig or set the TF_CONFIG environment variable.')
-
-    logging.info('Start Tensorflow server.')
-
-    if config.session_config is None:
-      session_config = config_pb2.ConfigProto(log_device_placement=False)
-    else:
-      session_config = config_pb2.ConfigProto(
-          log_device_placement=False,
-          gpu_options=config.session_config.gpu_options)
-
-    server = server_lib.Server(
-        config.cluster_spec,
-        job_name=config.task_type,
-        task_index=config.task_id,
-        config=session_config,
-        start=False,
-        protocol=config.protocol)
-    server.start()
-    return server
-
-  def _start_distributed_training(self, saving_listeners=None):
-    """Calls `Estimator` train in a distributed setting."""
-    config = self._estimator.config
-
-    # Start in-process TensorFlow server if needed. It's important to start the
-    # server before we (optionally) sleep. Otherwise, the servers will wait to
-    # connect to each other before starting to train.
-    if not _is_google_env():
-      self._start_std_server(config)
-
-    # Delay worker to start. For asynchronous training, this usually helps model
-    # to converge faster.  Chief starts the training immediately, so, worker
-    # with task id x (0-based) should wait (x+1) * _DELAY_SECS_PER_WORKER.
-    start_delay_secs = 0
-    if config.task_type == run_config_lib.TaskType.WORKER:
-      # TODO(xiejw): Replace the hard code logic (task_id + 1) with unique id in
-      # training cluster.
-      start_delay_secs = min(_MAX_DELAY_SECS,
-                             (config.task_id + 1) * _DELAY_SECS_PER_WORKER)
-    if start_delay_secs > 0:
-      logging.info('Waiting %d secs before starting training.',
-                   start_delay_secs)
-      time.sleep(start_delay_secs)
-
-    self._estimator.train(
-        input_fn=self._train_spec.input_fn,
-        max_steps=self._train_spec.max_steps,
-        hooks=list(self._train_spec.hooks) + list(self._train_hooks),
-        saving_listeners=saving_listeners)
-
-  def _start_continuous_evaluation(self):
-    """Repeatedly calls `Estimator` evaluate and export until training ends."""
-
-    _assert_eval_spec(self._eval_spec)
-
-    start_delay_secs = self._eval_spec.start_delay_secs
-    if start_delay_secs:
-      logging.info('Waiting %f secs before starting eval.', start_delay_secs)
-      time.sleep(start_delay_secs)
-
-    latest_eval_result = None
-    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
-                                             self._train_spec.max_steps)
-
-    should_early_stop = False
-    while not should_early_stop:
-      if (latest_eval_result and
-          latest_eval_result.status == _EvalStatus.EVALUATED):
-        global_step = latest_eval_result.metrics.get(ops.GraphKeys.GLOBAL_STEP)
-        if (global_step and self._train_spec.max_steps and
-            global_step >= self._train_spec.max_steps):
-          logging.info(
-              'Exiting evaluation, global_step=%s >= train max_steps=%s',
-              global_step, self._train_spec.max_steps)
-          return
-
-      latest_eval_result, should_early_stop = self._execute_evaluator_once(
-          evaluator, self._continuous_eval_listener,
-          self._eval_spec.throttle_secs)
-
-  def _execute_evaluator_once(self, evaluator, continuous_eval_listener,
-                              throttle_secs):
-    """Executes the `evaluator`."""
-
-    _assert_eval_spec(self._eval_spec)
-
-    start = time.time()
-
-    eval_result = None
-    should_early_stop = False
-
-    if not continuous_eval_listener.before_eval():
-      logging.info('Exiting evaluation, as requested by '
-                   '_ContinuousEvalListener.before_eval.')
-      should_early_stop = True
-      return (eval_result, should_early_stop)
-
-    # Final export signal: For any eval result with global_step >= train
-    # max_steps, the evaluator will send the final export signal. The next
-    # iteration of while loop will end the continuous eval as the stopping
-    # condition is satisfied (both checks use the same global_step value,
-    # i.e., no race condition)
-    eval_result, _ = evaluator.evaluate_and_export()
-
-    if not self._continuous_eval_listener.after_eval(eval_result):
-      logging.info('Exiting evaluation, as requested by '
-                   '_ContinuousEvalListener.after_eval.')
-      should_early_stop = True
-      return (eval_result, should_early_stop)
-
-    # Throttle if necessary.
-    elapsed_time = time.time() - start
-    difference = throttle_secs - elapsed_time
-    if difference > 0:
-      logging.info('Waiting %f secs before starting next eval run.', difference)
-      time.sleep(difference)
-    elif (throttle_secs == 0 and
-          eval_result.status != _EvalStatus.EVALUATED):
-      # Prints a user-actionable warning to avoid unnecessary load on evaluator.
-      logging.warning(
-          'EvalSpec.throttle_secs is set as 0. This might overload the job '
-          'before finding (next) new checkpoint. Please consider to increase '
-          'it.')
-
-    return (eval_result, should_early_stop)
-
-  class _Evaluator(object):
-    """A helper class to call `Estimator.evaluate` and export model."""
-
-    def __init__(self, estimator, eval_spec, max_training_steps):
-      self._estimator = estimator
-
-      _assert_eval_spec(eval_spec)
-      self._eval_spec = eval_spec
-
-      self._is_final_export_triggered = False
-      self._previous_ckpt_path = None
-      self._last_warning_time = 0
-      self._max_training_steps = max_training_steps
-
-    @property
-    def is_final_export_triggered(self):
-      return self._is_final_export_triggered
-
-    def evaluate_and_export(self):
-      """Evaluate and (maybe) export the current model.
-
-      Returns:
-        A tuple of `EvalResult` instance and the export results.
-
-      Raises:
-        RuntimeError: for any unexpected internal error.
-        TypeError: if evaluation result has wrong type.
-      """
-      latest_ckpt_path = self._estimator.latest_checkpoint()
-      if not latest_ckpt_path:
-        self._log_err_msg('Estimator is not trained yet. Will start an '
-                          'evaluation when a checkpoint is ready.')
-        return _EvalResult(status=_EvalStatus.MISSING_CHECKPOINT), []
-
-      if latest_ckpt_path == self._previous_ckpt_path:
-        self._log_err_msg(
-            'No new checkpoint ready for evaluation. Skip the current '
-            'evaluation pass as evaluation results are expected to be same '
-            'for the same checkpoint.')
-        return _EvalResult(status=_EvalStatus.NO_NEW_CHECKPOINT), []
-
-      metrics = self._estimator.evaluate(
-          input_fn=self._eval_spec.input_fn,
-          steps=self._eval_spec.steps,
-          name=self._eval_spec.name,
-          checkpoint_path=latest_ckpt_path,
-          hooks=self._eval_spec.hooks)
-
-      # _EvalResult validates the metrics.
-      eval_result = _EvalResult(
-          status=_EvalStatus.EVALUATED,
-          metrics=metrics,
-          checkpoint_path=latest_ckpt_path)
-
-      is_the_final_export = (
-          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP] >=
-          self._max_training_steps if self._max_training_steps else False)
-      export_results = self._export_eval_result(eval_result,
-                                                is_the_final_export)
-
-      if is_the_final_export:
-        logging.debug('Calling exporter with the `is_the_final_export=True`.')
-        self._is_final_export_triggered = True
-
-      self._last_warning_time = 0
-      self._previous_ckpt_path = latest_ckpt_path
-      return eval_result, export_results
-
-    def _log_err_msg(self, message):
-      """Prints warning `message` every 10 mins."""
-      current_time = time.time()
-      if current_time - self._last_warning_time > 600:
-        logging.warning(message)
-        self._last_warning_time = current_time
-
-    def _export_eval_result(self, eval_result, is_the_final_export):
-      """Export `eval_result` according to exporters in `EvalSpec`."""
-      export_dir_base = os.path.join(
-          compat.as_str_any(self._estimator.model_dir),
-          compat.as_str_any('export'))
-
-      export_results = []
-      for exporter in self._eval_spec.exporters:
-        export_results.append(
-            exporter.export(
-                estimator=self._estimator,
-                export_path=os.path.join(
-                    compat.as_str_any(export_dir_base),
-                    compat.as_str_any(exporter.name)),
-                checkpoint_path=eval_result.checkpoint_path,
-                eval_result=eval_result.metrics,
-                is_the_final_export=is_the_final_export))
-      return export_results
-
-
-class _EvalStatus(object):
-  """The status of an evaluation event.
-
-  For local training and evaluation, the status can only be `EVALUATED` as
-  `Estimator.train` always generates a new checkpoint.
-
-  For distributed training and evaluation, a separated evaluator keeps looking
-  for new checkpoint. So, multiple situations might occur:
-
-  - EVALUATED: A new checkpoint is found since last evaluation.
-      `Estimator.evaluate` will be invoked.
-  - MISSING_CHECKPOINT: No checkpoint can be found. Typically, this means
-      the trainer has not yet produced any checkpoint.
-  - NO_NEW_CHECKPOINT: No new checkpoint can be found since last evaluation.
-      Typically, this means the trainer has not yet produced any new checkpoint.
-  """
-
-  EVALUATED = 'evaluated'
-  MISSING_CHECKPOINT = 'missing checkpoint'
-  NO_NEW_CHECKPOINT = 'no new checkpoint'
-
-
-class _EvalResult(
-    collections.namedtuple('EvalResult',
-                           ['status', 'metrics', 'checkpoint_path'])):
-  """_EvalResult holds the result of an evaluation event."""
-
-  def __new__(cls, status, metrics=None, checkpoint_path=None):
-    """Creates a validated `_EvalResult`.
-
-    Args:
-      status: See `_EvalStatus`.
-      metrics: The evaluation results returned by `Estimator.evaluate`. Only set
-          if status is `EVALUATED`.
-      checkpoint_path: The corresponding checkpoint path for the `metrics`. Only
-          set if status is `EVALUATED`.
-    Returns:
-      A validated `_EvalResult` object.
-
-    Raises:
-      ValueError: If validation fails.
-      TypeError: If any of the arguments is not the expected type.
-    """
-
-    if status != _EvalStatus.EVALUATED:
-      if metrics:
-        raise ValueError(
-            'metrics must be `None` if status is not {}; got status {},'
-            ' metrics {}'.format(_EvalStatus.EVALUATED, status, metrics))
-      if checkpoint_path:
-        raise ValueError(
-            'checkpoint must be `None` if status is not {}; got status {}, '
-            'checkpoint_path {}'.format(_EvalStatus.EVALUATED, status,
-                                        checkpoint_path))
-      return super(_EvalResult, cls).__new__(cls, status, metrics,
-                                             checkpoint_path)
-
-    # Now, evaluated case.
-    assert status == _EvalStatus.EVALUATED
-
-    # Validates metrics.
-    if not metrics:
-      raise ValueError(
-          'Internal error: `Estimator.evaluate` should never return empty '
-          'metrics.')
-    if not isinstance(metrics, dict):
-      raise TypeError(
-          '`Estimator.evaluate` should return dict. Given {}.'.format(
-              type(metrics)))
-    if ops.GraphKeys.GLOBAL_STEP not in metrics:
-      raise ValueError(
-          'Internal error: `Estimator.evaluate` result should have '
-          '`global_step` in result. Given {}'.format(metrics))
-
-    # Validates checkpoint_path.
-    if not checkpoint_path:
-      raise ValueError(
-          'Internal error: `checkpoint_path` should never be empty.')
-
-    return super(_EvalResult, cls).__new__(cls, status, metrics,
-                                           checkpoint_path)
-
-
-class _ContinuousEvalListener(object):
-  """Interface for listeners that take action before or after evaluation."""
-
-  def before_eval(self):
-    """Called before evaluation.
-
-    Returns:
-      `False` if you want to skip the current evaluation and early stop the
-      continuous evaluation; `True` otherwise.
-    """
-    return True
-
-  def after_eval(self, eval_result):
-    """Called after the evaluation is executed.
-
-    Args:
-      eval_result: An `_EvalResult` instance.
-
-    Returns:
-      False if you want to early stop continuous evaluation; `True` otherwise.
-    """
-    del eval_result
-    return True
+from tensorflow_estimator.python.estimator import training
 
+# Include attrs that start with single underscore.
+training.__all__ = [s for s in dir(training) if not s.startswith('__')]
 
-def _assert_eval_spec(eval_spec):
-  """Raise error if `eval_spec` is not of the right type."""
-  if not isinstance(eval_spec, EvalSpec):
-    raise TypeError('`eval_spec` must have type `tf.estimator.EvalSpec`. '
-                    'Got: {}'.format(type(eval_spec)))
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.training import *
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
deleted file mode 100644
index 7d46917a6f..0000000000
--- a/tensorflow/python/estimator/training_test.py
+++ /dev/null
@@ -1,2198 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for training.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import glob
-import json
-import os
-import random
-import shutil
-import tempfile
-import time
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import estimator as estimator_lib
-from tensorflow.python.estimator import exporter as exporter_lib
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator import run_config as run_config_lib
-from tensorflow.python.estimator import training
-from tensorflow.python.estimator.canned import dnn
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export as export_lib
-from tensorflow.python.feature_column import feature_column
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import metrics as metrics_lib
-from tensorflow.python.ops import state_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.summary import summary_iterator
-from tensorflow.python.summary.writer import writer_cache
-from tensorflow.python.training import basic_session_run_hooks
-from tensorflow.python.training import monitored_session
-from tensorflow.python.training import server_lib
-from tensorflow.python.training import session_run_hook
-from tensorflow.python.training import training_util
-from tensorflow.python.util import compat
-
-_DEFAULT_EVAL_STEPS = 100
-_DEFAULT_EVAL_DELAY_SECS = 120
-_DEFAULT_EVAL_THROTTLE_SECS = 600
-_DELAY_SECS_PER_WORKER = 5
-_GLOBAL_STEP_KEY = ops.GraphKeys.GLOBAL_STEP
-_INVALID_INPUT_FN_MSG = '`input_fn` must be callable'
-_INVALID_HOOK_MSG = 'All hooks must be `SessionRunHook` instances'
-_INVALID_MAX_STEPS_MSG = 'Must specify max_steps > 0'
-_INVALID_STEPS_MSG = 'Must specify steps > 0'
-_INVALID_NAME_MSG = '`name` must be string'
-_INVALID_EVAL_DELAY_SECS_MSG = 'Must specify start_delay_secs >= 0'
-_INVALID_EVAL_THROTTLE_SECS_MSG = 'Must specify throttle_secs >= 0'
-_INVALID_ESTIMATOR_MSG = '`estimator` must have type `tf.estimator.Estimator`'
-_STALE_CHECKPOINT_MSG = 'There was no new checkpoint after the training.'
-_INVALID_EXPORTER_MSG = '`exporters` must be an Exporter'
-_INVALID_EXPORTER_NAME_TYPE_MSG = 'An Exporter must have a string name'
-_DUPLICATE_EXPORTER_NAMES_MSG = '`exporters` must have unique names.'
-_NONE_EXPORTER_NAME_MSG = (
-    'An Exporter cannot have a name that is `None` or empty.')
-_INVALID_TRAIN_SPEC_MSG = '`train_spec` must have type `tf.estimator.TrainSpec`'
-_INVALID_EVAL_SPEC_MSG = '`eval_spec` must have type `tf.estimator.EvalSpec`'
-_EVAL_SPEC_OR_NONE_MSG = (
-    '`eval_spec` must be either `None` or have type `tf.estimator.EvalSpec`')
-_INVALID_EVAL_LISTENER_MSG = 'must have type `_ContinuousEvalListener`'
-_INVALID_CONFIG_FOR_STD_SERVER_MSG = 'Could not start server; .*TF_CONFIG'
-_INVALID_LOCAL_TASK_WITH_CLUSTER = '`task.type` in TF_CONFIG cannot be `local`'
-_INVALID_TASK_TYPE = '`estimator.config` must have task_type set.'
-_INPROPER_THROTTL_SECS = (
-    'EvalSpec.throttle_secs is set as 0.*Please consider to increase')
-
-# The message should NOT have 'local' word as part of it. As (?!word) is looking
-# ahead, so, the $ (ending) check is required; otherwise, it will match
-# partially and return successuful.
-_INVALID_TASK_TO_RUN = (
-    'Task type .* is not supported. Supported task types are ((?!local).)*$')
-_INVALID_EMPTY_EVAL_RESULT_ERR = (
-    'Internal error: `Estimator.evaluate` should never return empty metrics')
-_INVALID_EVAL_RESULT_TYPE_ERR = '`Estimator.evaluate` should return dict.'
-_MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR = (
-    'Internal error: `Estimator.evaluate` result should have `global_step`')
-_INVALID_EVAL_TASK_ID_ERR = (
-    'there can only be one `evaluator` task .*with task id 0')
-
-_TF_CONFIG_FOR_CHIEF = {
-    'cluster': {
-        run_config_lib.TaskType.CHIEF: ['host0:0'],
-        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
-    },
-    'task': {
-        'type': run_config_lib.TaskType.CHIEF,
-        'index': 0
-    }
-}
-
-_TF_CONFIG_FOR_MASTER = {
-    'cluster': {
-        run_config_lib.TaskType.MASTER: ['host0:0'],
-        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
-    },
-    'task': {
-        'type': run_config_lib.TaskType.MASTER,
-        'index': 0
-    }
-}
-
-_TF_CONFIG_FOR_WORKER = {
-    'cluster': {
-        run_config_lib.TaskType.CHIEF: ['host0:0'],
-        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
-    },
-    'task': {
-        'type': run_config_lib.TaskType.WORKER,
-        'index': 1
-    }
-}
-
-_TF_CONFIG_FOR_PS = {
-    'cluster': {
-        run_config_lib.TaskType.CHIEF: ['host0:0'],
-        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
-    },
-    'task': {
-        'type': run_config_lib.TaskType.PS,
-        'index': 1
-    }
-}
-
-_TF_CONFIG_FOR_EVALUATOR = {
-    'cluster': {
-        run_config_lib.TaskType.CHIEF: ['host0:0'],
-        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
-        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
-    },
-    'task': {
-        'type': run_config_lib.TaskType.EVALUATOR,
-        'index': 0
-    }
-}
-
-_TF_CONFIG_FOR_GOOGLE = {'environment': 'google'}
-
-
-class _FakeHook(session_run_hook.SessionRunHook):
-  """Fake implementation of `SessionRunHook`."""
-
-
-class _InvalidHook(object):
-  """Invalid hook (not a subclass of `SessionRunHook`)."""
-
-
-def _create_exporter(name):
-  class FakeExporter(exporter_lib.Exporter):
-
-    def __init__(self, name):
-      self._name = name
-
-    @property
-    def name(self):
-      return self._name
-
-    def export(self, *args, **kwargs):
-      del args, kwargs
-
-  return FakeExporter(name=name)
-
-
-def _create_run_config_with_cluster_spec(tf_config):
-  with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}):
-    return run_config_lib.RunConfig()
-
-
-class TrainSpecTest(test.TestCase):
-  """Tests TrainSpec."""
-
-  def testRequiredArgumentsSet(self):
-    """Tests that no errors are raised when all required arguments are set."""
-    spec = training.TrainSpec(input_fn=lambda: 1)
-    self.assertEqual(1, spec.input_fn())
-    self.assertIsNone(spec.max_steps)
-    self.assertEqual(0, len(spec.hooks))
-
-  def testAllArgumentsSet(self):
-    """Tests that no errors are raised when all arguments are set."""
-    hooks = [_FakeHook()]
-    spec = training.TrainSpec(input_fn=lambda: 1, max_steps=2, hooks=hooks)
-    self.assertEqual(1, spec.input_fn())
-    self.assertEqual(2, spec.max_steps)
-    self.assertEqual(tuple(hooks), spec.hooks)
-
-  def testInvalidInputFn(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG):
-      training.TrainSpec(input_fn='invalid')
-
-  def testInvalidMaxStep(self):
-    with self.assertRaisesRegexp(ValueError, _INVALID_MAX_STEPS_MSG):
-      training.TrainSpec(input_fn=lambda: 1, max_steps=0)
-
-  def testInvalidHook(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
-      training.TrainSpec(input_fn=lambda: 1, hooks=[_InvalidHook()])
-
-
-class EvalSpecTest(test.TestCase):
-  """Tests EvalSpec."""
-
-  def testRequiredArgumentsSet(self):
-    """Tests that no errors are raised when all required arguments are set."""
-    spec = training.EvalSpec(input_fn=lambda: 1)
-    self.assertEqual(1, spec.input_fn())
-    self.assertEqual(_DEFAULT_EVAL_STEPS, spec.steps)
-    self.assertIsNone(spec.name)
-    self.assertEqual(0, len(spec.hooks))
-    self.assertEqual(0, len(spec.exporters))
-    self.assertEqual(_DEFAULT_EVAL_DELAY_SECS, spec.start_delay_secs)
-    self.assertEqual(_DEFAULT_EVAL_THROTTLE_SECS, spec.throttle_secs)
-
-  def testAllArgumentsSet(self):
-    """Tests that no errors are raised when all arguments are set."""
-    hooks = [_FakeHook()]
-    exporter = _create_exporter('a')
-
-    spec = training.EvalSpec(
-        input_fn=lambda: 1,
-        steps=2,
-        name='name',
-        hooks=hooks,
-        exporters=exporter,
-        start_delay_secs=3,
-        throttle_secs=4)
-    self.assertEqual(1, spec.input_fn())
-    self.assertEqual(2, spec.steps)
-    self.assertEqual('name', spec.name)
-    self.assertEqual(tuple(hooks), spec.hooks)
-    self.assertEqual((exporter,), spec.exporters)
-    self.assertEqual(3, spec.start_delay_secs)
-    self.assertEqual(4, spec.throttle_secs)
-
-  def testListOfExporters(self):
-    """Tests that no errors are raised with multiple exporters."""
-    exporters = [_create_exporter('a'), _create_exporter('b')]
-
-    spec = training.EvalSpec(input_fn=lambda: 1, exporters=exporters)
-    self.assertEqual(1, spec.input_fn())
-    self.assertEqual(tuple(exporters), spec.exporters)
-
-  def testInvalidInputFn(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG):
-      training.EvalSpec(input_fn='invalid')
-
-  def testInvalidMaxStep(self):
-    with self.assertRaisesRegexp(ValueError, _INVALID_STEPS_MSG):
-      training.EvalSpec(input_fn=lambda: 1, steps=0)
-
-  def testInvalidName(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_NAME_MSG):
-      training.EvalSpec(input_fn=lambda: 1, name=123)
-
-  def testInvalidHook(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
-      training.EvalSpec(input_fn=lambda: 1, hooks=[_InvalidHook()])
-
-  def testInvalidDelaySecs(self):
-    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_DELAY_SECS_MSG):
-      training.EvalSpec(input_fn=lambda: 1, start_delay_secs=-1)
-
-  def testInvalidThrottleSecs(self):
-    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_THROTTLE_SECS_MSG):
-      training.EvalSpec(input_fn=lambda: 1, throttle_secs=-1)
-
-  def testInvalidTypeOfListOfExporters(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG):
-      training.EvalSpec(
-          input_fn=lambda: 1, exporters=[_create_exporter('a'),
-                                         _FakeHook()])
-
-  def testInvalidTypeOfIndividualExporter(self):
-    with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG):
-      training.EvalSpec(input_fn=lambda: 1, exporters=_FakeHook())
-
-  def testInvalidTypeOfExporterName(self):
-    with self.assertRaisesRegexp(ValueError, _INVALID_EXPORTER_NAME_TYPE_MSG):
-      training.EvalSpec(input_fn=lambda: 1,
-                        exporters=_create_exporter(name=123))
-
-  def testMultipleExportersWithTheSameName(self):
-    with self.assertRaisesRegexp(ValueError, _DUPLICATE_EXPORTER_NAMES_MSG):
-      training.EvalSpec(
-          input_fn=lambda: 1,
-          exporters=[_create_exporter('a'), _create_exporter('a')])
-
-  def testMultipleExportersAndOneWithoutAName(self):
-    with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG):
-      training.EvalSpec(
-          input_fn=lambda: 1,
-          exporters=[_create_exporter('a'),
-                     _create_exporter(None)])
-
-  def testSingleExporterWithoutAName(self):
-    with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG):
-      training.EvalSpec(input_fn=lambda: 1, exporters=_create_exporter(None))
-
-
-class TrainAndEvaluateTest(test.TestCase):
-
-  def test_run_task(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      mock_executor_instance = test.mock.Mock()
-      mock_executor.return_value = mock_executor_instance
-      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-      mock_executor.assert_called_with(estimator=mock_est,
-                                       train_spec=mock_train_spec,
-                                       eval_spec=mock_eval_spec)
-      self.assertTrue(mock_executor_instance.run.called)
-
-  def test_error_out_if_evaluator_task_id_is_non_zero(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-        },
-        'task': {
-            'type': run_config_lib.TaskType.EVALUATOR,
-            'index': 1
-        }
-    }
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_TASK_ID_ERR):
-      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
-
-  def test_invalid_estimator(self):
-    invalid_estimator = object()
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG):
-      training.train_and_evaluate(invalid_estimator, mock_train_spec,
-                                  mock_eval_spec)
-
-  def test_fail_fast_if_invalid_eval_spec(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    invalid_eval_spec = object()
-
-    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
-      with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
-        training.train_and_evaluate(mock_est, mock_train_spec,
-                                    invalid_eval_spec)
-
-      mock_executor.assert_not_called()
-
-
-class TrainingExecutorConstructorTest(test.TestCase):
-  """Tests constructor of _TrainingExecutor."""
-
-  def test_required_arguments_set(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=lambda: 1)
-
-    executor = training._TrainingExecutor(estimator, train_spec, eval_spec)
-    self.assertEqual(estimator, executor.estimator)
-
-  def test_invalid_estimator(self):
-    invalid_estimator = object()
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=lambda: 1)
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG):
-      training._TrainingExecutor(invalid_estimator, train_spec, eval_spec)
-
-  def test_invalid_train_spec(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    invalid_train_spec = object()
-    eval_spec = training.EvalSpec(input_fn=lambda: 1)
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_TRAIN_SPEC_MSG):
-      training._TrainingExecutor(estimator, invalid_train_spec, eval_spec)
-
-  def test_invalid_eval_spec(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    invalid_eval_spec = object()
-
-    with self.assertRaisesRegexp(TypeError, _EVAL_SPEC_OR_NONE_MSG):
-      training._TrainingExecutor(estimator, train_spec, invalid_eval_spec)
-
-  def test_eval_spec_none(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = None
-
-    # Tests that no error is raised.
-    training._TrainingExecutor(estimator, train_spec, eval_spec)
-
-  def test_invalid_train_hooks(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=lambda: 1)
-    invalid_train_hooks = [object()]
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
-      training._TrainingExecutor(
-          estimator, train_spec, eval_spec, train_hooks=invalid_train_hooks)
-
-  def test_invalid_continuous_eval_listener(self):
-    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=lambda: 1)
-    invalid_continuous_eval_listener = object()
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_LISTENER_MSG):
-      training._TrainingExecutor(
-          estimator,
-          train_spec,
-          eval_spec,
-          continuous_eval_listener=invalid_continuous_eval_listener)
-
-
-class _TrainingExecutorTrainingTest(object):
-  """Tests training of _TrainingExecutor."""
-
-  def __init__(self, run_config):
-    self._run_config = run_config
-
-  def _run_task(self, executor):
-    # We should not call executor.run as the test here is intended to test
-    # run_foo explicitly (foo is the task type).
-    return getattr(executor, 'run_' + self._run_config.task_type)()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_train_spec(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-    mock_server_instance = mock_server.return_value
-
-    executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec)
-    self._run_task(executor)
-
-    mock_server.assert_called_with(
-        mock_est.config.cluster_spec,
-        job_name=mock_est.config.task_type,
-        task_index=mock_est.config.task_id,
-        config=test.mock.ANY,
-        protocol=None,
-        start=False)
-
-    self.assertTrue(mock_server_instance.start.called)
-
-    mock_est.train.assert_called_with(
-        input_fn=train_spec.input_fn,
-        max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks),
-        saving_listeners=test.mock.ANY)
-    mock_est.evaluate.assert_not_called()
-    mock_est.export_savedmodel.assert_not_called()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_no_eval_spec(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    eval_spec = None
-    mock_server_instance = mock_server.return_value
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    self._run_task(executor)
-
-    mock_server.assert_called_with(
-        mock_est.config.cluster_spec,
-        job_name=mock_est.config.task_type,
-        task_index=mock_est.config.task_id,
-        config=test.mock.ANY,
-        protocol=None,
-        start=False)
-
-    self.assertTrue(mock_server_instance.start.called)
-
-    mock_est.train.assert_called_with(
-        input_fn=train_spec.input_fn,
-        max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks),
-        saving_listeners=test.mock.ANY)
-    mock_est.evaluate.assert_not_called()
-    mock_est.export_savedmodel.assert_not_called()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_train_hooks(self, unused_mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-    extra_hooks = [_FakeHook()]
-
-    executor = training._TrainingExecutor(
-        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
-    self._run_task(executor)
-
-    mock_est.train.assert_called_with(
-        input_fn=train_spec.input_fn,
-        max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks) + extra_hooks,
-        saving_listeners=test.mock.ANY)
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)}
-    with test.mock.patch.dict('os.environ', tf_config):
-      self._run_task(executor)
-      mock_server.assert_not_called()
-
-  def test_fail_with_empty_cluster_spec(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = None
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
-                                                mock_eval_spec))
-
-  def test_fail_with_empty_master(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec(
-        {'worker': ['dummy', 'dummy1']})
-    mock_est.config.master = ''
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
-                                                mock_eval_spec))
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_single_worker_node_with_empty_tf_master(
-      self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    # Single node cluster.
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
-    mock_est.config.master = ''
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = 2
-
-    self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
-                                              mock_eval_spec))
-    self.assertTrue(mock_est.train.called)
-    mock_server.assert_not_called()
-
-  def test_fail_with_empty_task_type(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = ''
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
-                                                mock_eval_spec))
-
-  def test_fail_with_none_task_id(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'worker'
-    mock_est.config.task_id = None
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
-                                                mock_eval_spec))
-
-
-class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest,
-                                    test.TestCase):
-  """Tests run_worker of _TrainingExecutor."""
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    _TrainingExecutorTrainingTest.__init__(
-        self,
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
-
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_delay_for_worker(self, _):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-
-    expected_secs = (self._run_config.task_id + 1) * _DELAY_SECS_PER_WORKER
-    with test.mock.patch.object(time, 'sleep') as mock_sleep:
-      mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s)
-      self._run_task(executor)
-      self.assertTrue(mock_sleep.called)
-
-
-class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest,
-                                   test.TestCase):
-  """Tests run_chief of _TrainingExecutor."""
-
-  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
-    test.TestCase.__init__(self, methodName)
-    _TrainingExecutorTrainingTest.__init__(
-        self,
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
-
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_no_delay_for_chief(self, _):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-
-    with test.mock.patch.object(time, 'sleep') as mock_sleep:
-      self._run_task(executor)
-      mock_sleep.assert_not_called()
-
-
-class TrainingExecutorRunMasterTest(test.TestCase):
-  """Tests run_chief of _TrainingExecutor."""
-
-  def setUp(self):
-    self._run_config = _create_run_config_with_cluster_spec(
-        _TF_CONFIG_FOR_MASTER)
-
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_no_delay_for_master(self, _):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-    mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(
-        spec=training.TrainSpec, max_steps=123, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-
-    with test.mock.patch.object(time, 'sleep') as mock_sleep:
-      executor.run_master()
-      mock_sleep.assert_not_called()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_train_spec(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
-    mock_server_instance = mock_server.return_value
-
-    executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec)
-    executor.run_master()
-
-    mock_server.assert_called_with(
-        mock_est.config.cluster_spec,
-        job_name=mock_est.config.task_type,
-        task_index=mock_est.config.task_id,
-        config=test.mock.ANY,
-        protocol=None,
-        start=False)
-
-    self.assertTrue(mock_server_instance.start.called)
-
-    mock_est.train.assert_called_with(
-        input_fn=train_spec.input_fn,
-        max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks),
-        saving_listeners=test.mock.ANY)
-    mock_est.export_savedmodel.assert_not_called()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_no_eval_spec_fails(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    eval_spec = None
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
-      executor.run_master()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_train_with_train_hooks(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-    mock_est.config = self._run_config
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
-    extra_hooks = [_FakeHook()]
-
-    executor = training._TrainingExecutor(
-        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
-    executor.run_master()
-
-    mock_est.train.assert_called_with(
-        input_fn=train_spec.input_fn,
-        max_steps=train_spec.max_steps,
-        hooks=list(train_spec.hooks) + extra_hooks,
-        saving_listeners=test.mock.ANY)
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-    mock_est.config = self._run_config
-    mock_train_spec = test.mock.Mock(
-        spec=training.TrainSpec, max_steps=123, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)}
-    with test.mock.patch.dict('os.environ', tf_config):
-      executor.run_master()
-      mock_server.assert_not_called()
-
-  def test_fail_with_empty_cluster_spec(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = None
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'master'
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(
-          mock_est, mock_train_spec, mock_eval_spec).run_master()
-
-  def test_fail_with_empty_master(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec(
-        {'master': ['dummy'], 'worker': ['dummy1']})
-    mock_est.config.master = ''
-    mock_est.config.task_type = 'master'
-    mock_est.config.task_id = 0
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(
-          mock_est, mock_train_spec, mock_eval_spec).run_master()
-
-  @test.mock.patch.object(time, 'sleep')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_single_master_node_with_empty_tf_master(
-      self, mock_server, unused_mock_sleep):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
-
-    mock_train_spec = test.mock.Mock(
-        spec=training.TrainSpec, max_steps=123, hooks=[])
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec(
-        {'master': ['dummy']})
-    mock_est.config.master = ''
-    mock_est.config.task_type = 'master'
-    mock_est.config.task_id = 0
-
-    executor = training._TrainingExecutor(
-        mock_est, mock_train_spec, mock_eval_spec)
-    executor.run_master()
-
-    mock_server.assert_not_called()
-    self.assertTrue(mock_est.train.called)
-
-  def test_fail_with_empty_task_type(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = ''
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(
-          mock_est, mock_train_spec, mock_eval_spec).run_master()
-
-  def test_fail_with_none_task_id(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'master'
-    mock_est.config.task_id = None
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(
-          mock_est, mock_train_spec, mock_eval_spec).run_master()
-
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_run_master_triggers_evaluate_and_export(self, _):
-
-    def estimator_train(saving_listeners, *args, **kwargs):
-      #  There shalt be a saving_listener.  Estimator is going to call
-      # `after_save`.
-      del args, kwargs
-      saving_listeners[0].begin()
-      saving_listeners[0].after_save(session=None, global_step_value=0)
-      saving_listeners[0].after_save(session=None, global_step_value=10)
-
-    mock_est = test.mock.Mock(
-        spec=estimator_lib.Estimator, model_dir='path/', train=estimator_train)
-    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
-    mock_est.config = self._run_config
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_whether_export_is_called'
-
-    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, steps=2, exporters=exporter)
-    eval_result = {_GLOBAL_STEP_KEY: train_spec.max_steps}
-    mock_est.evaluate.return_value = eval_result
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_master()
-
-    mock_est.evaluate.assert_called_with(
-        name=eval_spec.name,
-        input_fn=eval_spec.input_fn,
-        steps=eval_spec.steps,
-        checkpoint_path='checkpoint_path/',
-        hooks=eval_spec.hooks)
-    self.assertEqual(1, exporter.export.call_count)
-    exporter.export.assert_called_with(
-        estimator=mock_est,
-        export_path=os.path.join('path/', 'export', exporter.name),
-        checkpoint_path='checkpoint_path/',
-        eval_result=eval_result,
-        is_the_final_export=True)
-
-  @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_run_master_throttle_eval(self, _, mock_timer_class):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
-
-    mock_timer = test.mock.Mock()
-    mock_timer_class.return_value = mock_timer
-
-    def estimator_train(saving_listeners, *args, **kwargs):
-      del args, kwargs
-      saving_listeners[0].begin()
-
-      # Call four times.
-      mock_timer.should_trigger_for_step.return_value = True
-      saving_listeners[0].after_save(session=None, global_step_value=None)
-
-      mock_timer.should_trigger_for_step.return_value = True
-      saving_listeners[0].after_save(session=None, global_step_value=None)
-
-      mock_timer.should_trigger_for_step.return_value = False
-      saving_listeners[0].after_save(session=None, global_step_value=None)
-
-      mock_timer.should_trigger_for_step.return_value = True
-      saving_listeners[0].after_save(session=None, global_step_value=None)
-
-    mock_est.train = estimator_train
-    mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2']
-    mock_est.config = self._run_config
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_whether_export_is_called'
-
-    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10)
-
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: train_spec.max_steps //2},
-        {_GLOBAL_STEP_KEY: train_spec.max_steps}
-    ]
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_master()
-
-    self.assertEqual(2, mock_est.evaluate.call_count)
-    self.assertEqual(2, exporter.export.call_count)
-
-    is_final_export_list = [call[1]['is_the_final_export']
-                            for call in exporter.export.call_args_list]
-    self.assertEqual([False, True], is_final_export_list)
-
-  @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer')
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_run_master_throttle_eval_which_skips_final_ckpt(
-      self, _, mock_timer_class):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
-
-    mock_timer = test.mock.Mock()
-    mock_timer_class.return_value = mock_timer
-
-    def estimator_train(saving_listeners, *args, **kwargs):
-      del args, kwargs
-      saving_listeners[0].begin()
-
-      # Call tree times (one for first saving).
-      mock_timer.should_trigger_for_step.return_value = True
-      saving_listeners[0].after_save(session=None, global_step_value=0)
-
-      mock_timer.should_trigger_for_step.return_value = True
-      saving_listeners[0].after_save(session=None, global_step_value=125)
-
-      mock_timer.should_trigger_for_step.return_value = False
-      saving_listeners[0].after_save(session=None, global_step_value=250)
-
-      # At the end evaluate should be called even if throttle secs prevents it.
-      mock_timer.should_trigger_for_step.return_value = False
-      saving_listeners[0].end(session=None, global_step_value=300)
-
-    mock_est.train = estimator_train
-    mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2']
-    mock_est.config = self._run_config
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_whether_export_is_called'
-
-    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10)
-
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: train_spec.max_steps //2},
-        {_GLOBAL_STEP_KEY: train_spec.max_steps}
-    ]
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_master()
-
-    self.assertEqual(2, mock_est.evaluate.call_count)
-    self.assertEqual(2, exporter.export.call_count)
-
-    is_final_export_list = [call[1]['is_the_final_export']
-                            for call in exporter.export.call_args_list]
-    self.assertEqual([False, True], is_final_export_list)
-
-
-class TrainingExecutorRunEvaluatorTest(test.TestCase):
-  """Tests run_evaluator of _TrainingExecutor."""
-
-  def _set_up_mock_est_to_train_and_evaluate_once(self, mock_est,
-                                                  mock_train_spec):
-    """Sets global step in eval result to end the while True eval loop."""
-    training_max_step = 200
-    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step}
-    mock_train_spec.max_steps = training_max_step
-
-  def test_evaluate_with_evaluate_spec(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.latest_checkpoint.return_value = 'latest_it_is'
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval',
-        start_delay_secs=0, throttle_secs=0)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    executor.run_evaluator()
-
-    mock_est.evaluate.assert_called_with(
-        name='cont_eval',
-        input_fn=eval_spec.input_fn,
-        steps=eval_spec.steps,
-        checkpoint_path='latest_it_is',
-        hooks=eval_spec.hooks)
-    self.assertFalse(mock_est.train.called)
-
-  def test_evaluate_with_no_eval_spec_fails(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.latest_checkpoint.return_value = 'latest_it_is'
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    eval_spec = None
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
-      executor.run_evaluator()
-
-  def test_evaluate_with_train_hooks(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.latest_checkpoint.return_value = 'latest_it_is'
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1,
-        steps=2,
-        hooks=[_FakeHook()],
-        name='cont_eval',
-        start_delay_secs=0,
-        throttle_secs=0)
-
-    # The train_hooks will not be called during eval.
-    mock_hook = test.mock.Mock(spec=session_run_hook.SessionRunHook)
-    executor = training._TrainingExecutor(
-        mock_est, mock_train_spec, eval_spec, train_hooks=[mock_hook])
-    executor.run_evaluator()
-
-    mock_hook.begin.assert_not_called()
-
-  def test_evaluate_multiple_times(self):
-    training_max_step = 200
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: training_max_step // 2},
-        {_GLOBAL_STEP_KEY: training_max_step}
-    ]
-    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
-
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_how_many_times_export_is_called'
-
-    mock_est.times_export_was_called = 0
-    mock_est.times_final_export_was_called = 0
-    def export(estimator, export_path, checkpoint_path, eval_result,
-               is_the_final_export):
-      del export_path, checkpoint_path, eval_result
-      estimator.times_export_was_called += 1
-      # final_export is happened at the end.
-      self.assertEqual(0, estimator.times_final_export_was_called)
-      if is_the_final_export:
-        estimator.times_final_export_was_called += 1
-
-    exporter.export = export
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1,
-        start_delay_secs=0,
-        throttle_secs=0,
-        exporters=exporter)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    executor.run_evaluator()
-
-    self.assertEqual(2, mock_est.evaluate.call_count)
-    self.assertEqual(2, mock_est.times_export_was_called)
-    self.assertEqual(1, mock_est.times_final_export_was_called)
-
-  def test_evaluate_listener_before_eval(self):
-    training_max_step = 200
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
-    # Without early stopping, this eval will be run twice.
-    mock_est.evaluate.side_effect = [{
-        _GLOBAL_STEP_KEY: training_max_step // 2
-    }, {
-        _GLOBAL_STEP_KEY: training_max_step
-    }]
-    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
-
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
-    mock_train_spec.max_steps = training_max_step
-
-    class _Listener(training._ContinuousEvalListener):
-
-      def __init__(self):
-        self.call_count = 0
-
-      def before_eval(self):
-        self.call_count += 1
-        return  self.call_count == 1
-
-    listener = _Listener()
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
-
-    training._TrainingExecutor(
-        mock_est, mock_train_spec, eval_spec,
-        continuous_eval_listener=listener).run_evaluator()
-
-    # Before_eval returns False during the second time, so, evaluate will be
-    # called once.
-    self.assertEqual(1, mock_est.evaluate.call_count)
-    self.assertEqual(2, listener.call_count)
-
-  def test_evaluate_listener_after_eval(self):
-    training_max_step = 200
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
-    # Without early stopping, this eval will be run twice.
-    expected_eval_metrics = [{
-        _GLOBAL_STEP_KEY: training_max_step // 2
-    }, {
-        _GLOBAL_STEP_KEY: training_max_step
-    }]
-    mock_est.evaluate.side_effect = expected_eval_metrics
-    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
-
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    class _Listener(training._ContinuousEvalListener):
-
-      def __init__(self):
-        self.call_count = 0
-
-      def after_eval(self, eval_result):
-        self.call_count += 1
-        self.eval_result = eval_result
-        return False
-
-    listener = _Listener()
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
-
-    training._TrainingExecutor(
-        mock_est, mock_train_spec, eval_spec,
-        continuous_eval_listener=listener).run_evaluator()
-
-    # after_eval returns False during the first time, so, evaluate will be
-    # called once.
-    self.assertEqual(1, mock_est.evaluate.call_count)
-    self.assertEqual(1, listener.call_count)
-    self.assertAllEqual(expected_eval_metrics[0], listener.eval_result.metrics)
-    self.assertEqual('path_1', listener.eval_result.checkpoint_path)
-
-  def test_final_export_is_true_in_the_end(self):
-    training_max_step = 200
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: training_max_step // 2},
-        {_GLOBAL_STEP_KEY: training_max_step}
-    ]
-    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
-
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    mock_est.times_export_fn_was_called = 0
-    mock_est.times_the_final_export_was_true = 0
-    def export(estimator, export_path, checkpoint_path, eval_result,
-               is_the_final_export):
-      del export_path, checkpoint_path, eval_result
-      estimator.times_export_fn_was_called += 1
-      if is_the_final_export:
-        estimator.times_the_final_export_was_true += 1
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_how_many_times_export_is_called'
-    exporter.export = export
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1,
-        start_delay_secs=0,
-        throttle_secs=0,
-        exporters=exporter)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    executor.run_evaluator()
-
-    self.assertEqual(2, mock_est.evaluate.call_count)
-    self.assertEqual(2, mock_est.times_export_fn_was_called)
-    self.assertEqual(1, mock_est.times_the_final_export_was_true)
-
-  def test_skip_evaluation_due_to_ckpt(self):
-    training_max_step = 200
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: training_max_step // 2},
-        {_GLOBAL_STEP_KEY: training_max_step}
-    ]
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    # First two items are invalid, next two items are same.
-    mock_est.latest_checkpoint.side_effect = [
-        None, '', 'same', 'same', 'path_2'
-    ]
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=2)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    with test.mock.patch.object(logging, 'warning') as mock_log:
-      executor.run_evaluator()
-
-    # Three checkpoint paths are invalid.
-    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
-    self.assertEqual(2, mock_est.evaluate.call_count)
-
-    # Two warning logs are expected (last warning time is reset after a
-    # successuful evaluation)
-    self.assertEqual(2, mock_log.call_count)
-
-  def test_warning_if_throttle_secs_is_zero(self):
-    training_max_step = 200
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate.side_effect = [
-        {_GLOBAL_STEP_KEY: training_max_step}
-    ]
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    # We need to make the first one invalid, so it will check the
-    # throttle_secs=0.
-    mock_est.latest_checkpoint.side_effect = [None, 'path']
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    with test.mock.patch.object(logging, 'warning') as mock_log:
-      executor.run_evaluator()
-
-    # First ckpt is invalid.
-    self.assertEqual(2, mock_est.latest_checkpoint.call_count)
-    self.assertEqual(1, mock_est.evaluate.call_count)
-
-    self.assertRegexpMatches(str(mock_log.call_args), _INPROPER_THROTTL_SECS)
-
-  def test_continuous_eval_listener_eval_result(self):
-    training_max_step = 200
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    expected_eval_metrics = [{
-        _GLOBAL_STEP_KEY: training_max_step // 2
-    }, {
-        _GLOBAL_STEP_KEY: training_max_step
-    }]
-    mock_est.evaluate.side_effect = expected_eval_metrics
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    class _Listener(training._ContinuousEvalListener):
-
-      def __init__(self):
-        self.eval_results = []
-
-      def after_eval(self, eval_result):
-        self.eval_results.append(eval_result)
-        return True
-
-    continuous_eval_listener = _Listener()
-
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    # First two items are invalid, next two items are same.
-    mock_est.latest_checkpoint.side_effect = [
-        None, '', 'same', 'same', 'path_2'
-    ]
-    expected_eval_results = [
-        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
-        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
-        training._EvalResult(
-            training._EvalStatus.EVALUATED,
-            metrics=expected_eval_metrics[0],
-            checkpoint_path='same'),
-        training._EvalResult(training._EvalStatus.NO_NEW_CHECKPOINT),
-        training._EvalResult(
-            training._EvalStatus.EVALUATED,
-            metrics=expected_eval_metrics[1],
-            checkpoint_path='path_2'),
-    ]
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
-
-    executor = training._TrainingExecutor(
-        mock_est,
-        mock_train_spec,
-        eval_spec,
-        continuous_eval_listener=continuous_eval_listener)
-    executor.run_evaluator()
-
-    # Three checkpoint paths are invalid.
-    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
-    self.assertEqual(2, mock_est.evaluate.call_count)
-
-    self.assertEqual(5, len(continuous_eval_listener.eval_results))
-    for i, result in enumerate(continuous_eval_listener.eval_results):
-      self.assertEqual(expected_eval_results[i].status, result.status)
-      self.assertAllEqual(expected_eval_results[i].metrics, result.metrics)
-      self.assertEqual(expected_eval_results[i].checkpoint_path,
-                       result.checkpoint_path)
-
-  def test_sleep_start_delay_secs(self):
-    training_max_step = 200
-    start_delay_secs = 123
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step}
-    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_train_spec.max_steps = training_max_step
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval',
-        start_delay_secs=start_delay_secs, throttle_secs=0)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    with test.mock.patch.object(time, 'sleep') as mock_sleep:
-      executor.run_evaluator()
-      mock_sleep.assert_called_with(start_delay_secs)
-      self.assertTrue(mock_est.evaluate.called)
-
-  @test.mock.patch.object(time, 'time')
-  @test.mock.patch.object(time, 'sleep')
-  def test_throttle_secs(self, mock_sleep, mock_time):
-    throttle_secs = 123
-    operation_secs = 12
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=throttle_secs)
-
-    mock_time.side_effect = [921, 921 + operation_secs]
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    # Disable logging as it calls time.time also.
-    with test.mock.patch.object(logging, 'info'):
-      executor.run_evaluator()
-    mock_sleep.assert_called_with(throttle_secs - operation_secs)
-    self.assertTrue(mock_est.evaluate.called)
-
-  def test_that_export_is_called(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
-
-    def export(estimator, *args, **kwargs):
-      del args, kwargs
-      estimator.export_was_called = True
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_whether_export_is_called'
-    exporter.export = export
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: 1,
-        steps=2,
-        start_delay_secs=0,
-        throttle_secs=0,
-        exporters=exporter)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
-    executor.run_evaluator()
-
-    # Verify that export was called on the right estimator.
-    self.assertTrue(mock_est.export_was_called)
-
-  def test_errors_out_if_evaluate_returns_empty_dict(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
-                                  start_delay_secs=0, throttle_secs=0)
-    mock_est.evaluate.return_value = {}
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
-      executor.run_evaluator()
-
-  def test_errors_out_if_evaluate_returns_non_dict(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
-                                  start_delay_secs=0, throttle_secs=0)
-    mock_est.evaluate.return_value = 123
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR):
-      executor.run_evaluator()
-
-  def test_errors_out_if_evaluate_returns_dict_without_global_step(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    train_spec = training.TrainSpec(input_fn=lambda: 1)
-    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
-                                  start_delay_secs=0, throttle_secs=0)
-    mock_est.evaluate.return_value = {'loss': 123}
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(ValueError,
-                                 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
-      executor.run_evaluator()
-
-
-class TrainingExecutorRunPsTest(test.TestCase):
-  """Tests run_ps of _TrainingExecutor."""
-
-  @test.mock.patch.object(server_lib, 'Server')
-  def test_std_server(self, mock_server):
-    mock_server_instance = test.mock.Mock()
-    mock_server.return_value = mock_server_instance
-
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    executor.run_ps()
-
-    mock_server.assert_called_with(
-        mock_est.config.cluster_spec,
-        job_name=mock_est.config.task_type,
-        task_index=mock_est.config.task_id,
-        config=test.mock.ANY,
-        protocol=None,
-        start=False)
-
-    self.assertTrue(mock_server_instance.start.called)
-    self.assertTrue(mock_server_instance.join.called)
-
-  def test_fail_with_empty_cluster_spec(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = None
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'ps'
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(mock_est, mock_train_spec,
-                                 mock_eval_spec).run_ps()
-
-  def test_fail_with_empty_master(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
-    mock_est.config.master = ''
-    mock_est.config.task_type = 'ps'
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(mock_est, mock_train_spec,
-                                 mock_eval_spec).run_ps()
-
-  def test_fail_with_empty_task_type(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = ''
-    mock_est.config.task_id = 2
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(mock_est, mock_train_spec,
-                                 mock_eval_spec).run_ps()
-
-  def test_fail_with_none_task_id(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
-    mock_est.config.master = 'grpc://...'
-    mock_est.config.task_type = 'ps'
-    mock_est.config.task_id = None
-
-    with self.assertRaisesRegexp(RuntimeError,
-                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
-      training._TrainingExecutor(mock_est, mock_train_spec,
-                                 mock_eval_spec).run_ps()
-
-
-class StopAtSecsHookTest(test.TestCase):
-  """Tests StopAtSecsHook."""
-
-  @test.mock.patch.object(time, 'time')
-  def test_stops_after_time(self, mock_time):
-    mock_time.return_value = 1484695987.209386
-    hook = training._StopAtSecsHook(1000)
-    with ops.Graph().as_default():
-      no_op = control_flow_ops.no_op()
-      # some time passed before training starts
-      mock_time.return_value += 250
-      with monitored_session.MonitoredSession(hooks=[hook]) as sess:
-        self.assertFalse(sess.should_stop())
-        sess.run(no_op)
-        self.assertFalse(sess.should_stop())
-        mock_time.return_value += 500
-        sess.run(no_op)
-        self.assertFalse(sess.should_stop())
-        mock_time.return_value += 400
-        sess.run(no_op)
-        self.assertFalse(sess.should_stop())
-        mock_time.return_value += 200
-        sess.run(no_op)
-        self.assertTrue(sess.should_stop())
-
-
-class TrainingExecutorRunLocalTest(test.TestCase):
-  """Tests run_local of _TrainingExecutor."""
-
-  def _model_fn(self, features, labels, mode):
-    del labels
-    with ops.control_dependencies([features]):
-      train_op = state_ops.assign_add(training_util.get_global_step(), 1)
-    return model_fn_lib.EstimatorSpec(
-        mode,
-        loss=constant_op.constant(0.),
-        train_op=train_op,
-        predictions=constant_op.constant([[10.]]),
-        eval_metric_ops={'mean_of_features': metrics_lib.mean(features)})
-
-  def _input_fn(self, repeat=True):
-    ds = dataset_ops.Dataset.from_tensors([1])
-    if repeat:
-      return ds.repeat()
-    return ds
-
-  def unique_checkpoint_every_time_fn(self):
-    return 'checkpoint_path_%s/' % random.random()
-
-  def test_runs_evaluate_with_every_new_checkpoint(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-
-    mock_est.times_export_was_called = 0
-    mock_est.times_final_export_was_called = 0
-    def export(estimator, export_path, checkpoint_path, eval_result,
-               is_the_final_export):
-      del export_path, checkpoint_path, eval_result
-      estimator.times_export_was_called += 1
-      # final_export is happened at the end.
-      self.assertEqual(0, estimator.times_final_export_was_called)
-      if is_the_final_export:
-        estimator.times_final_export_was_called += 1
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_how_many_times_export_is_called'
-    exporter.export = export
-
-    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=22)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        throttle_secs=0,
-        exporters=exporter)
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_local()
-
-    self.assertEqual(1, mock_est.train.call_count)
-    self.assertEqual(3, mock_est.evaluate.call_count)
-    self.assertEqual(3, mock_est.times_export_was_called)
-    self.assertEqual(1, mock_est.times_final_export_was_called)
-
-  def test_runs_with_eval_listener_before_eval(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn
-
-    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12)
-    eval_spec = training.EvalSpec(input_fn=lambda: self._input_fn(repeat=False))
-    mock_est.evaluate.side_effect = [{_GLOBAL_STEP_KEY: train_spec.max_steps}]
-
-    class _Listener(training._ContinuousEvalListener):
-
-      def __init__(self):
-        self.call_count = 0
-
-      def before_eval(self):
-        self.call_count += 1
-        return False  # Will stop the run_local before first eval.
-
-    listener = _Listener()
-
-    executor = training._TrainingExecutor(
-        mock_est, train_spec, eval_spec, continuous_eval_listener=listener)
-    executor.run_local()
-
-    self.assertEqual(1, mock_est.train.call_count)
-    self.assertEqual(0, mock_est.evaluate.call_count)
-
-  def test_runs_with_eval_listener_after_eval(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-
-    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=3000)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
-
-    class _Listener(training._ContinuousEvalListener):
-
-      def __init__(self):
-        self.call_count = 0
-
-      def after_eval(self, eval_result):
-        self.call_count += 1
-        return False  # Will stop the run_local after first eval.
-
-    listener = _Listener()
-
-    executor = training._TrainingExecutor(
-        mock_est, train_spec, eval_spec, continuous_eval_listener=listener)
-    metrics, _ = executor.run_local()  # pylint: disable=assignment-from-no-return
-
-    self.assertEqual(1, mock_est.train.call_count)
-    self.assertEqual(1, mock_est.evaluate.call_count)
-    self.assertEqual(1, listener.call_count)
-    # Should be less than max_steps since listener did early stopping.
-    self.assertLess(metrics[_GLOBAL_STEP_KEY], train_spec.max_steps)
-
-  def test_handles_no_new_checkpoint_found(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        # disable saving checkpoint
-        config=run_config_lib.RunConfig(
-            save_checkpoints_steps=None, save_checkpoints_secs=None))
-    train_spec = training.TrainSpec(
-        input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()])
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        hooks=[_FakeHook()],
-        throttle_secs=100)
-
-    executor = training._TrainingExecutor(est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(ValueError,
-                                 'There should be a CheckpointSaverHook'):
-      executor.run_local()
-
-  def test_final_export_is_true_in_the_end(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-
-    mock_est.times_export_fn_was_called = 0
-    mock_est.times_the_final_export_was_true = 0
-    def export(estimator, export_path, checkpoint_path, eval_result,
-               is_the_final_export):
-      del export_path, checkpoint_path, eval_result
-      estimator.times_export_fn_was_called += 1
-      if is_the_final_export:
-        estimator.times_the_final_export_was_true += 1
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_how_many_times_export_is_called'
-    exporter.export = export
-
-    train_spec = training.TrainSpec(
-        input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()])
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        throttle_secs=0,
-        exporters=exporter)
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_local()
-
-    self.assertEqual(1, mock_est.train.call_count)
-    self.assertEqual(2, mock_est.evaluate.call_count)
-    self.assertEqual(2, mock_est.times_export_fn_was_called)
-    self.assertEqual(1, mock_est.times_the_final_export_was_true)
-
-  def test_train_and_evaluate_args(self):
-    est = estimator_lib.Estimator(model_fn=self._model_fn)
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(
-        input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()])
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        steps=2,
-        hooks=[_FakeHook()],
-        name='local_eval')
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    executor.run_local()
-
-    mock_est.evaluate.assert_called_with(
-        name=eval_spec.name,
-        input_fn=eval_spec.input_fn,
-        steps=eval_spec.steps,
-        checkpoint_path=est.latest_checkpoint(),
-        hooks=eval_spec.hooks)
-
-    train_args = mock_est.train.call_args[1]
-    self.assertEqual(list(train_spec.hooks), list(train_args['hooks']))
-    self.assertEqual(train_spec.input_fn, train_args['input_fn'])
-    self.assertEqual(train_spec.max_steps, train_args['max_steps'])
-
-  def test_train_with_no_eval_spec_fails(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
-    eval_spec = None
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
-      executor.run_local()
-
-  def test_train_hooks(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
-    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
-    train_spec = training.TrainSpec(
-        input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
-    eval_spec = training.EvalSpec(input_fn=lambda: 1, steps=2)
-    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps}
-    extra_hooks = [_FakeHook()]
-
-    executor = training._TrainingExecutor(
-        mock_est, train_spec, eval_spec, train_hooks=extra_hooks)
-    executor.run_local()
-
-    train_args = mock_est.train.call_args[1]
-    self.assertEqual(
-        list(train_spec.hooks) + extra_hooks, [
-            h for h in train_args['hooks']
-            if not isinstance(h, training._StopAtSecsHook)
-        ])
-
-  def test_that_export_is_called_with_run_local(self):
-    est = estimator_lib.Estimator(model_fn=self._model_fn)
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12)
-    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps}
-
-    def export(estimator, *args, **kwargs):
-      del args, kwargs
-      estimator.export_was_called = True
-      return 'path_to_export'
-
-    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
-    exporter.name = 'see_whether_export_is_called'
-    exporter.export = export
-
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        steps=2,
-        start_delay_secs=0,
-        throttle_secs=213,
-        exporters=exporter)
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    # pylint: disable=assignment-from-no-return
-    _, export_results = executor.run_local()
-    # pylint: enable=assignment-from-no-return
-
-    self.assertTrue(mock_est.export_was_called)
-    self.assertEqual(export_results, ['path_to_export'])
-
-  def test_errors_out_if_evaluate_returns_empty_dict(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(input_fn=self._input_fn)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
-    mock_est.evaluate.return_value = {}
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
-      executor.run_local()
-
-  def test_errors_out_if_evaluate_returns_non_dict(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(input_fn=self._input_fn)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
-    mock_est.evaluate.return_value = 123
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR):
-      executor.run_local()
-
-  def test_errors_out_if_evaluate_returns_dict_without_global_step(self):
-    est = estimator_lib.Estimator(
-        model_fn=self._model_fn,
-        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(input_fn=self._input_fn)
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
-    mock_est.evaluate.return_value = {'loss': 123}
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    with self.assertRaisesRegexp(ValueError,
-                                 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
-      executor.run_local()
-
-  def test_train_and_evaluate_return_metrics(self):
-    est = estimator_lib.Estimator(model_fn=self._model_fn)
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
-    train_spec = training.TrainSpec(
-        input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()])
-    eval_spec = training.EvalSpec(
-        input_fn=lambda: self._input_fn(repeat=False),
-        steps=2,
-        hooks=[_FakeHook()],
-        name='local_eval')
-
-    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
-    # pylint: disable=assignment-from-no-return
-    metrics, _ = executor.run_local()
-    # pylint: enable=assignment-from-no-return
-    self.assertEqual(metrics['global_step'], 12)
-
-
-class TrainAndEvaluateRunTest(test.TestCase):
-
-  def _test_run_task_and_executor(self, run_config):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = run_config
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-
-    executor.call_task = {}
-
-    def task_fn(name):
-
-      def _fn():
-        executor.call_task[name] = 1
-
-      return _fn
-
-    executor.run_chief = task_fn('chief')
-    executor.run_master = task_fn('master')
-    executor.run_ps = task_fn('ps')
-    executor.run_evaluator = task_fn('evaluator')
-    executor.run_worker = task_fn('worker')
-    executor.run_local = task_fn('local')
-    return executor
-
-  def test_run_chief(self):
-    executor = self._test_run_task_and_executor(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
-    executor.run()
-    self.assertEqual(1, executor.call_task['chief'])
-
-  def test_run_worker(self):
-    executor = self._test_run_task_and_executor(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
-    executor.run()
-    self.assertEqual(1, executor.call_task['worker'])
-
-  def test_run_ps(self):
-    executor = self._test_run_task_and_executor(
-        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS))
-    executor.run()
-    self.assertEqual(1, executor.call_task['ps'])
-
-  def test_run_evaluator(self):
-    executor = self._test_run_task_and_executor(
-        run_config=_create_run_config_with_cluster_spec(
-            _TF_CONFIG_FOR_EVALUATOR))
-    executor.run()
-    self.assertEqual(1, executor.call_task['evaluator'])
-
-  def test_run_local(self):
-    executor = self._test_run_task_and_executor(
-        run_config=run_config_lib.RunConfig())
-    executor.run()
-    self.assertEqual(1, executor.call_task['local'])
-
-  def test_invalid_local_task(self):
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            'local': ['hos1:1'],
-        },
-        'task': {
-            'type': 'local',  # invalid task type.
-            'index': 0
-        }
-    }
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    with self.assertRaisesRegexp(ValueError, _INVALID_LOCAL_TASK_WITH_CLUSTER):
-      executor.run()
-
-  def test_unsupported_task_due_to_missing_run_task(self):
-    unsupported_task = 'alloc'
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            unsupported_task: ['hos1:1'],
-        },
-        'task': {
-            'type': unsupported_task,
-            'index': 0
-        }
-    }
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
-      executor.run()
-
-  def test_unsupported_task_due_to_not_callable(self):
-    unsupported_task = 'alloc'
-    tf_config = {
-        'cluster': {
-            run_config_lib.TaskType.CHIEF: ['host0:0'],
-            unsupported_task: ['hos1:1'],
-        },
-        'task': {
-            'type': unsupported_task,
-            'index': 0
-        }
-    }
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    executor.run_alloc = 123  # not callable
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
-      executor.run()
-
-  def test_invalid_task_type(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
-    mock_est.config = test.mock.Mock()
-    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
-    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
-
-    mock_est.config = test.mock.Mock()
-    mock_est.config.cluster_spec = server_lib.ClusterSpec({'1': ['dummy']})
-    mock_est.config.task_type = ''
-
-    executor = training._TrainingExecutor(mock_est, mock_train_spec,
-                                          mock_eval_spec)
-    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE):
-      executor.run()
-
-
-class TrainAndEvaluateIntegrationTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    if self._model_dir:
-      shutil.rmtree(self._model_dir)
-
-  def _as_label(self, data_in_float):
-    return np.rint(data_in_float).astype(np.int64)
-
-  def _get_exporter(self, name, fc):
-    feature_spec = feature_column.make_parse_example_spec(fc)
-    serving_input_receiver_fn = (
-        export_lib.build_parsing_serving_input_receiver_fn(feature_spec))
-    return exporter_lib.LatestExporter(
-        name, serving_input_receiver_fn=serving_input_receiver_fn)
-
-  def _extract_loss_and_global_step(self, event_folder):
-    """Returns the loss and global step in last event."""
-    event_paths = glob.glob(os.path.join(event_folder, 'events*'))
-
-    loss = None
-    global_step_count = None
-
-    for e in summary_iterator.summary_iterator(event_paths[-1]):
-      current_loss = None
-      for v in e.summary.value:
-        if v.tag == 'loss':
-          current_loss = v.simple_value
-
-      # If loss is not found, global step is meaningless.
-      if current_loss is None:
-        continue
-
-      current_global_step = e.step
-      if global_step_count is None or current_global_step > global_step_count:
-        global_step_count = current_global_step
-        loss = current_loss
-
-    return (loss, global_step_count)
-
-  def test_complete_flow_with_non_distributed_configuration(self):
-    n_classes = 3
-    input_dimension = 2
-    batch_size = 10
-
-    eval_name = 'foo'
-    exporter_name = 'saved_model_exporter'
-
-    # max_steps should be larger than save_summary_steps
-    max_steps = 10
-    save_summary_steps = 9
-
-    data = np.linspace(
-        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
-    x_data = data.reshape(batch_size, input_dimension)
-    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
-
-    # learn y = x
-    def train_input_fn():
-      return dataset_ops.Dataset.from_tensor_slices(({
-          'x': x_data
-      }, y_data)).batch(batch_size).repeat().shuffle(1000)
-
-    def eval_input_fn():
-      return dataset_ops.Dataset.from_tensor_slices(({
-          'x': x_data
-      }, y_data)).batch(batch_size)
-
-    def predict_input_fn():
-      return dataset_ops.Dataset.from_tensor_slices({
-          'x': x_data
-      }).batch(batch_size)
-
-    feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))]
-
-    est = dnn.DNNClassifier(
-        hidden_units=(2, 2),
-        feature_columns=feature_columns,
-        n_classes=n_classes,
-        config=run_config_lib.RunConfig(save_summary_steps=save_summary_steps),
-        model_dir=self._model_dir)
-
-    train_spec = training.TrainSpec(input_fn=train_input_fn,
-                                    max_steps=max_steps)
-
-    eval_spec = training.EvalSpec(
-        name=eval_name,
-        input_fn=eval_input_fn,
-        steps=None,
-        exporters=self._get_exporter(exporter_name, feature_columns),
-        throttle_secs=0)
-
-    training.train_and_evaluate(est, train_spec, eval_spec)
-
-    # Make sure nothing is stuck in limbo.
-    writer_cache.FileWriterCache.clear()
-
-    # Examine the training events. Use a range to check global step to avoid
-    # flakyness due to global step race condition.
-    training_loss, _ = self._extract_loss_and_global_step(est.model_dir)
-    self.assertIsNotNone(training_loss)
-
-    # Examine the eval events. The global step should be accurate.
-    eval_loss, eval_global_step = self._extract_loss_and_global_step(
-        event_folder=est.eval_dir(eval_name))
-    self.assertIsNotNone(eval_loss)
-    self.assertEqual(max_steps, eval_global_step)
-
-    # Examine the export folder.
-    export_dir = os.path.join(os.path.join(est.model_dir, 'export'),
-                              exporter_name)
-    self.assertTrue(gfile.Exists(export_dir))
-
-    # Examine the ckpt for predict.
-    predicted_proba = np.array([
-        x[prediction_keys.PredictionKeys.PROBABILITIES]
-        for x in est.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index fb110c4b7b..9afc4b2c04 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,142 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+"""util python module.
 
-"""Utilities for Estimators."""
+Importing from tensorflow.python.estimator
+is unsupported and will soon break!
+"""
+# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
-import time
+from tensorflow_estimator.python.estimator import util
 
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import training
-from tensorflow.python.util import compat
-from tensorflow.python.util import function_utils
+# Include attrs that start with single underscore.
+util.__all__ = [s for s in dir(util) if not s.startswith('__')]
 
-fn_args = function_utils.fn_args
-
-# When we create a timestamped directory, there is a small chance that the
-# directory already exists because another process is also creating these
-# directories. In this case we just wait one second to get a new timestamp and
-# try again. If this fails several times in a row, then something is seriously
-# wrong.
-MAX_DIRECTORY_CREATION_ATTEMPTS = 10
-
-
-def get_timestamped_dir(dir_base):
-  """Builds a path to a new subdirectory within the base directory.
-
-  The subdirectory will be named using the current time.
-  This guarantees monotonically increasing directory numbers even across
-  multiple runs of the pipeline.
-  The timestamp used is the number of seconds since epoch UTC.
-
-  Args:
-    dir_base: A string containing a directory to create the subdirectory under.
-
-  Returns:
-    The full path of the new subdirectory (which is not actually created yet).
-
-  Raises:
-    RuntimeError: if repeated attempts fail to obtain a unique timestamped
-      directory name.
-  """
-  attempts = 0
-  while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS:
-    timestamp = int(time.time())
-
-    result_dir = os.path.join(
-        compat.as_bytes(dir_base), compat.as_bytes(str(timestamp)))
-    if not gfile.Exists(result_dir):
-      # Collisions are still possible (though extremely unlikely): this
-      # directory is not actually created yet, but it will be almost
-      # instantly on return from this function.
-      return result_dir
-    time.sleep(1)
-    attempts += 1
-    logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format(
-        result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
-  raise RuntimeError('Failed to obtain a unique export directory name after '
-                     '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
-
-
-def parse_input_fn_result(result):
-  """Gets features, labels, and hooks from the result of an Estimator input_fn.
-
-  Args:
-    result: output of an input_fn to an estimator, which should be one of:
-
-      * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
-          tuple (features, labels) with same constraints as below.
-      * A tuple (features, labels): Where `features` is a `Tensor` or a
-        dictionary of string feature name to `Tensor` and `labels` is a
-        `Tensor` or a dictionary of string label name to `Tensor`. Both
-        `features` and `labels` are consumed by `model_fn`. They should
-        satisfy the expectation of `model_fn` from inputs.
-
-  Returns:
-    Tuple of features, labels, and input_hooks, where features are as described
-    above, labels are as described above or None, and input_hooks are a list
-    of SessionRunHooks to be included when running.
-
-  Raises:
-    ValueError: if the result is a list or tuple of length != 2.
-  """
-  input_hooks = []
-  try:
-    # We can't just check whether this is a tf.data.Dataset instance here,
-    # as this is plausibly a PerDeviceDataset. Try treating as a dataset first.
-    iterator = result.make_initializable_iterator()
-  except AttributeError:
-    # Not a dataset or dataset-like-object. Move along.
-    pass
-  else:
-    input_hooks.append(_DatasetInitializerHook(iterator))
-    result = iterator.get_next()
-  return parse_iterator_result(result) + (input_hooks,)
-
-
-def parse_iterator_result(result):
-  """Gets features, labels from result."""
-  if isinstance(result, (list, tuple)):
-    if len(result) != 2:
-      raise ValueError(
-          'input_fn should return (features, labels) as a len 2 tuple.')
-    return result[0], result[1]
-  return result, None
-
-
-class _DatasetInitializerHook(training.SessionRunHook):
-  """Creates a SessionRunHook that initializes the passed iterator."""
-
-  def __init__(self, iterator):
-    self._iterator = iterator
-
-  def begin(self):
-    self._initializer = self._iterator.initializer
-
-  def after_create_session(self, session, coord):
-    del coord
-    session.run(self._initializer)
-
-
-class StrategyInitFinalizeHook(training.SessionRunHook):
-  """Creates a SessionRunHook that initializes and shutsdown devices."""
-
-  def __init__(self, initialization_fn, finalize_fn):
-    self._initialization_fn = initialization_fn
-    self._finalize_fn = finalize_fn
-
-  def begin(self):
-    # We only create the init ops, but don't run it. We rely on SessionManager
-    # to run it for us.
-    self._init_ops = self._initialization_fn()
-    self._finalize_ops = self._finalize_fn()
-
-  def end(self, session):
-    logging.info('Finalize system.')
-    session.run(self._finalize_ops)
+# pylint: disable=g-import-not-at-top
+from tensorflow_estimator.python.estimator.util import *
diff --git a/tensorflow/python/estimator/util_test.py b/tensorflow/python/estimator/util_test.py
deleted file mode 100644
index d440c454dc..0000000000
--- a/tensorflow/python/estimator/util_test.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for util.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import util
-from tensorflow.python.framework import constant_op
-from tensorflow.python.platform import test
-from tensorflow.python.training import training
-
-
-class UtilTest(test.TestCase):
-  """Tests for miscellaneous Estimator utils."""
-
-  def test_parse_input_fn_result_tuple(self):
-    def _input_fn():
-      features = constant_op.constant(np.arange(100))
-      labels = constant_op.constant(np.arange(100, 200))
-      return features, labels
-
-    features, labels, hooks = util.parse_input_fn_result(_input_fn())
-
-    with self.cached_session() as sess:
-      vals = sess.run([features, labels])
-
-    self.assertAllEqual(vals[0], np.arange(100))
-    self.assertAllEqual(vals[1], np.arange(100, 200))
-    self.assertEqual(hooks, [])
-
-  def test_parse_input_fn_result_dataset(self):
-    def _input_fn():
-      features = np.expand_dims(np.arange(100), 0)
-      labels = np.expand_dims(np.arange(100, 200), 0)
-      return dataset_ops.Dataset.from_tensor_slices((features, labels))
-
-    features, labels, hooks = util.parse_input_fn_result(_input_fn())
-
-    with training.MonitoredSession(hooks=hooks) as sess:
-      vals = sess.run([features, labels])
-
-    self.assertAllEqual(vals[0], np.arange(100))
-    self.assertAllEqual(vals[1], np.arange(100, 200))
-    self.assertIsInstance(hooks[0], util._DatasetInitializerHook)
-
-  def test_parse_input_fn_result_features_only(self):
-    def _input_fn():
-      return constant_op.constant(np.arange(100))
-
-    features, labels, hooks = util.parse_input_fn_result(_input_fn())
-
-    with self.cached_session() as sess:
-      vals = sess.run([features])
-
-    self.assertAllEqual(vals[0], np.arange(100))
-    self.assertEqual(labels, None)
-    self.assertEqual(hooks, [])
-
-  def test_parse_input_fn_result_features_only_dataset(self):
-    def _input_fn():
-      features = np.expand_dims(np.arange(100), 0)
-      return dataset_ops.Dataset.from_tensor_slices(features)
-
-    features, labels, hooks = util.parse_input_fn_result(_input_fn())
-
-    with training.MonitoredSession(hooks=hooks) as sess:
-      vals = sess.run([features])
-
-    self.assertAllEqual(vals[0], np.arange(100))
-    self.assertEqual(labels, None)
-    self.assertIsInstance(hooks[0], util._DatasetInitializerHook)
-
-  def test_parse_input_fn_result_invalid(self):
-    def _input_fn():
-      features = np.expand_dims(np.arange(100), 0)
-      labels = np.expand_dims(np.arange(100, 200), 0)
-      return dataset_ops.Dataset.from_tensor_slices((features, labels, labels))
-
-    with self.assertRaisesRegexp(ValueError, 'input_fn should return'):
-      util.parse_input_fn_result(_input_fn())
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 82acde584e..809a73c278 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -157,7 +157,7 @@ py_test(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:estimator_py",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/tools/api/generator/BUILD b/tensorflow/python/tools/api/generator/BUILD
index 90be2cc4f7..9fd069c5be 100644
--- a/tensorflow/python/tools/api/generator/BUILD
+++ b/tensorflow/python/tools/api/generator/BUILD
@@ -4,7 +4,6 @@
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "py_test")
-load("//tensorflow/python/tools/api/generator:api_gen.bzl", "ESTIMATOR_API_INIT_FILES")
 load("//tensorflow/python/tools/api/generator:api_init_files.bzl", "TENSORFLOW_API_INIT_FILES")
 load("//tensorflow/python/tools/api/generator:api_init_files_v1.bzl", "TENSORFLOW_API_INIT_FILES_V1")
 
@@ -66,23 +65,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "estimator_doc_srcs_test",
-    srcs = ["doc_srcs_test.py"],
-    args = [
-        "--package=tensorflow.python.estimator",
-        "--api_name=estimator",
-    ] + ESTIMATOR_API_INIT_FILES,
-    main = "doc_srcs_test.py",
-    srcs_version = "PY2AND3",
-    deps = [
-        ":doc_srcs",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:no_contrib",
-        "//tensorflow/python/estimator:estimator_py",
-    ],
-)
-
 py_test(
     name = "output_init_files_test",
     srcs = ["output_init_files_test.py"],
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
index 082e26b99b..32b84e90ce 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
index 7cc4191eb3..db7776b5bf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
index 9694268199..68145735bd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.BestExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.BestExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index ef3409b1b5..fa352907c0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index 775130468f..154b35f306 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
index 718f415a77..ce6040d0f2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
index b23c019d6c..e85007e16e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
index caa9e3f1de..8a55bb835f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
index 1f5e650940..2c4128ec48 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..376becc3f9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
index ebd3869c9b..9d270a87ab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Estimator"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
index db83ba1bd8..23c2544fe4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EvalSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "exporters"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
index 035af70e52..6c3f0fd910 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Exporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
index ee37b1fa21..e030d401ea 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.FinalExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.FinalExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.FinalExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
index 2a9d029029..d67f2bd625 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LatestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.LatestExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.LatestExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
index 53ec5a0c78..4b5de2e245 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
index 3791162619..0d1510e9ab 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
index 6a1c24fa63..bf7c1abcd8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
index 269e18a0a7..827b1ac5a5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.RunConfig"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.run_config.RunConfig\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.run_config.RunConfig\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "cluster_spec"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
index 7d2f77438a..1d9f51a20e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.TrainSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "hooks"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
index 43f5343359..dca2c1fe11 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.WarmStartSettings"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "ckpt_to_initialize_from"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
index 2df1840c4a..52874dd9b9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ClassificationOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ClassificationOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ClassificationOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "classes"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
index fa62e8ced8..964c315e97 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.export.ExportOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
index e0160b10ce..bb82bc9e58 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.PredictOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.PredictOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.PredictOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "outputs"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
index 905f0e0553..8522834433 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.RegressionOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.RegressionOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.RegressionOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "value"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
index d71b2a4300..a0371a1663 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
index 4fe92643bf..da9d05df23 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.TensorServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
index 082e26b99b..32b84e90ce 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
index 7cc4191eb3..db7776b5bf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
index 9694268199..68145735bd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.BestExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.BestExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index ef3409b1b5..fa352907c0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index 775130468f..154b35f306 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
index 718f415a77..ce6040d0f2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
index b23c019d6c..e85007e16e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
index caa9e3f1de..8a55bb835f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
index 1f5e650940..2c4128ec48 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..376becc3f9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
index ebd3869c9b..9d270a87ab 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Estimator"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
index db83ba1bd8..23c2544fe4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EvalSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "exporters"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
index 035af70e52..6c3f0fd910 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Exporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
index ee37b1fa21..e030d401ea 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.FinalExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.FinalExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.FinalExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
index 2a9d029029..d67f2bd625 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LatestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.exporter.LatestExporter\'>"
-  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.LatestExporter\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
index 53ec5a0c78..4b5de2e245 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearClassifier\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearClassifier\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
index 3791162619..0d1510e9ab 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearRegressor\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearRegressor\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
index 6a1c24fa63..bf7c1abcd8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
index 269e18a0a7..827b1ac5a5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.RunConfig"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.run_config.RunConfig\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.run_config.RunConfig\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "cluster_spec"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
index 7d2f77438a..1d9f51a20e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.TrainSpec"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
-  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "hooks"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
index 43f5343359..dca2c1fe11 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.WarmStartSettings"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
-  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "ckpt_to_initialize_from"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
index 2df1840c4a..52874dd9b9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ClassificationOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ClassificationOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ClassificationOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "classes"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
index fa62e8ced8..964c315e97 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.export.ExportOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
index e0160b10ce..bb82bc9e58 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.PredictOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.PredictOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.PredictOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "outputs"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
index 905f0e0553..8522834433 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.RegressionOutput"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.RegressionOutput\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.RegressionOutput\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "value"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
index d71b2a4300..a0371a1663 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
index 4fe92643bf..da9d05df23 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.TensorServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index dd1dca9ee8..0a20a85c58 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -57,6 +57,7 @@ REQUIRED_PACKAGES = [
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
     'tensorboard >= 1.11.0, < 1.12.0',
+    'tensorflow_estimator >= 1.10.0',
     'termcolor >= 1.1.0',
 ]
 
-- 
GitLab


From 78d2d95c4066a0327174c279209c4dcc33f8b0de Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 11 Oct 2018 09:52:14 -0700
Subject: [PATCH 0779/1085] Update all the RNN tests to also run in eager
 context.

Also fix the initial_state and constants param in the eager context.

PiperOrigin-RevId: 216712447
---
 tensorflow/python/keras/layers/recurrent.py   |   9 +-
 .../python/keras/layers/recurrent_test.py     | 941 +++++++++---------
 2 files changed, 486 insertions(+), 464 deletions(-)

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index b07ec71178..31933070c6 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -653,14 +653,12 @@ class RNN(Layer):
     additional_inputs = []
     additional_specs = []
     if initial_state is not None:
-      kwargs['initial_state'] = initial_state
       additional_inputs += initial_state
       self.state_spec = [
           InputSpec(shape=K.int_shape(state)) for state in initial_state
       ]
       additional_specs += self.state_spec
     if constants is not None:
-      kwargs['constants'] = constants
       additional_inputs += constants
       self.constants_spec = [
           InputSpec(shape=K.int_shape(constant)) for constant in constants
@@ -688,6 +686,10 @@ class RNN(Layer):
       self.input_spec = original_input_spec
       return output
     else:
+      if initial_state is not None:
+        kwargs['initial_state'] = initial_state
+      if constants is not None:
+        kwargs['constants'] = constants
       return super(RNN, self).__call__(inputs, **kwargs)
 
   def call(self,
@@ -706,6 +708,7 @@ class RNN(Layer):
         initial_state = inputs[1:]
       else:
         initial_state = inputs[1:-self._num_constants]
+        constants = inputs[-self._num_constants:]
       if len(initial_state) == 0:
         initial_state = None
       inputs = inputs[0]
@@ -2458,7 +2461,7 @@ def _generate_zero_filled_state_for_cell(cell, inputs, batch_size, dtype):
 
 def _generate_zero_filled_state(batch_size_tensor, state_size, dtype):
   """Generate a zero filled tensor with shape [batch_size, state_size]."""
-  if None in [batch_size_tensor, dtype]:
+  if batch_size_tensor is None or dtype is None:
     raise ValueError(
         'batch_size and dtype cannot be None while constructing initial state: '
         'batch_size={}, dtype={}'.format(batch_size_tensor, dtype))
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index d246be6b45..c343c03a8c 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -24,15 +24,19 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.eager import context
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import special_math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
+from tensorflow.python.training import rmsprop
 from tensorflow.python.training.checkpointable import util as checkpointable_util
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class RNNTest(test.TestCase):
 
   def test_minimal_rnn_cell_non_layer(self):
@@ -50,25 +54,26 @@ class RNNTest(test.TestCase):
         output = keras.backend.dot(inputs, self.kernel) + prev_output
         return output, [output]
 
-    with self.cached_session():
-      # Basic test case.
-      cell = MinimalRNNCell(32, 5)
-      x = keras.Input((None, 5))
-      layer = keras.layers.RNN(cell)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-      # Test stacking.
-      cells = [MinimalRNNCell(8, 5),
-               MinimalRNNCell(32, 8),
-               MinimalRNNCell(32, 32)]
-      layer = keras.layers.RNN(cells)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+    # Basic test case.
+    cell = MinimalRNNCell(32, 5)
+    x = keras.Input((None, 5))
+    layer = keras.layers.RNN(cell)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+
+    # Test stacking.
+    cells = [MinimalRNNCell(8, 5),
+             MinimalRNNCell(32, 8),
+             MinimalRNNCell(32, 32)]
+    layer = keras.layers.RNN(cells)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
   def test_minimal_rnn_cell_non_layer_multiple_states(self):
 
@@ -88,27 +93,28 @@ class RNNTest(test.TestCase):
         output -= prev_output_2
         return output, [output * 2, output * 3]
 
-    with self.cached_session():
-      # Basic test case.
-      cell = MinimalRNNCell(32, 5)
-      x = keras.Input((None, 5))
-      layer = keras.layers.RNN(cell)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-      # Test stacking.
-      cells = [MinimalRNNCell(8, 5),
-               MinimalRNNCell(16, 8),
-               MinimalRNNCell(32, 16)]
-      layer = keras.layers.RNN(cells)
-      self.assertEqual(layer.cell.state_size, (8, 8, 16, 16, 32, 32))
-      self.assertEqual(layer.cell.output_size, 32)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+    # Basic test case.
+    cell = MinimalRNNCell(32, 5)
+    x = keras.Input((None, 5))
+    layer = keras.layers.RNN(cell)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+
+    # Test stacking.
+    cells = [MinimalRNNCell(8, 5),
+             MinimalRNNCell(16, 8),
+             MinimalRNNCell(32, 16)]
+    layer = keras.layers.RNN(cells)
+    self.assertEqual(layer.cell.state_size, (8, 8, 16, 16, 32, 32))
+    self.assertEqual(layer.cell.output_size, 32)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
   def test_minimal_rnn_cell_layer(self):
 
@@ -140,51 +146,52 @@ class RNNTest(test.TestCase):
         base_config = super(MinimalRNNCell, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    with self.cached_session():
-      # Test basic case.
-      x = keras.Input((None, 5))
-      cell = MinimalRNNCell(32)
-      layer = keras.layers.RNN(cell)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-      # Test basic case serialization.
-      x_np = np.random.random((6, 5, 5))
-      y_np = model.predict(x_np)
-      weights = model.get_weights()
-      config = layer.get_config()
-      with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
-        layer = keras.layers.RNN.from_config(config)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.set_weights(weights)
-      y_np_2 = model.predict(x_np)
-      self.assertAllClose(y_np, y_np_2, atol=1e-4)
-
-      # Test stacking.
-      cells = [MinimalRNNCell(8),
-               MinimalRNNCell(12),
-               MinimalRNNCell(32)]
-      layer = keras.layers.RNN(cells)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-      # Test stacked RNN serialization.
-      x_np = np.random.random((6, 5, 5))
-      y_np = model.predict(x_np)
-      weights = model.get_weights()
-      config = layer.get_config()
-      with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
-        layer = keras.layers.RNN.from_config(config)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.set_weights(weights)
-      y_np_2 = model.predict(x_np)
-      self.assertAllClose(y_np, y_np_2, atol=1e-4)
+    # Test basic case.
+    x = keras.Input((None, 5))
+    cell = MinimalRNNCell(32)
+    layer = keras.layers.RNN(cell)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+
+    # Test basic case serialization.
+    x_np = np.random.random((6, 5, 5))
+    y_np = model.predict(x_np)
+    weights = model.get_weights()
+    config = layer.get_config()
+    with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
+      layer = keras.layers.RNN.from_config(config)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.set_weights(weights)
+    y_np_2 = model.predict(x_np)
+    self.assertAllClose(y_np, y_np_2, atol=1e-4)
+
+    # Test stacking.
+    cells = [MinimalRNNCell(8),
+             MinimalRNNCell(12),
+             MinimalRNNCell(32)]
+    layer = keras.layers.RNN(cells)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
+
+    # Test stacked RNN serialization.
+    x_np = np.random.random((6, 5, 5))
+    y_np = model.predict(x_np)
+    weights = model.get_weights()
+    config = layer.get_config()
+    with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
+      layer = keras.layers.RNN.from_config(config)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.set_weights(weights)
+    y_np_2 = model.predict(x_np)
+    self.assertAllClose(y_np, y_np_2, atol=1e-4)
 
   def test_rnn_with_time_major(self):
     batch = 10
@@ -192,89 +199,89 @@ class RNNTest(test.TestCase):
     embedding_dim = 4
     units = 3
 
-    with self.cached_session():
-      # Test basic case.
-      x = keras.Input((time_step, embedding_dim))
-      time_major_x = keras.layers.Lambda(
-          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
-      layer = keras.layers.SimpleRNN(
-          units, time_major=True, return_sequences=True)
-      self.assertEqual(
-          layer.compute_output_shape((time_step, None,
-                                      embedding_dim)).as_list(),
-          [time_step, None, units])
-      y = layer(time_major_x)
-      self.assertEqual(layer.output_shape, (time_step, None, units))
-
-      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
-
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, embedding_dim)),
-          np.zeros((batch, time_step, units)))
-
-    with self.cached_session():
-      # Test stacking.
-      x = keras.Input((time_step, embedding_dim))
-      time_major_x = keras.layers.Lambda(
-          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
-      cell_units = [10, 8, 6]
-      cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)]
-      layer = keras.layers.RNN(cells, time_major=True, return_sequences=True)
-      y = layer(time_major_x)
-      self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1]))
-
-      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, embedding_dim)),
-          np.zeros((batch, time_step, cell_units[-1])))
-
-    with self.cached_session():
-      # Test masking.
-      x = keras.Input((time_step, embedding_dim))
-      time_major = keras.layers.Lambda(
-          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
-      mask = keras.layers.Masking()(time_major)
-      rnn = keras.layers.SimpleRNN(
-          units, time_major=True, return_sequences=True)(mask)
-      y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(rnn)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, embedding_dim)),
-          np.zeros((batch, time_step, units)))
-
-    with self.cached_session():
-      # Test layer output
-      x = keras.Input((time_step, embedding_dim))
-      rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True)
-      y = rnn_1(x)
-
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, embedding_dim)),
-          np.zeros((batch, time_step, units)))
-
-      x_np = np.random.random((batch, time_step, embedding_dim))
-      y_np_1 = model.predict(x_np)
-
-      time_major = keras.layers.Lambda(
-          lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
-      rnn_2 = keras.layers.SimpleRNN(
-          units, time_major=True, return_sequences=True)
-      y_2 = rnn_2(time_major)
-      y_2 = keras.layers.Lambda(
-          lambda t: array_ops.transpose(t, [1, 0, 2]))(y_2)
-
-      model_2 = keras.models.Model(x, y_2)
-      rnn_2.set_weights(rnn_1.get_weights())
-
-      y_np_2 = model_2.predict(x_np)
-      self.assertAllClose(y_np_1, y_np_2, atol=1e-4)
+    # Test basic case.
+    x = keras.Input((time_step, embedding_dim))
+    time_major_x = keras.layers.Lambda(
+        lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+    layer = keras.layers.SimpleRNN(
+        units, time_major=True, return_sequences=True)
+    self.assertEqual(
+        layer.compute_output_shape((time_step, None,
+                                    embedding_dim)).as_list(),
+        [time_step, None, units])
+    y = layer(time_major_x)
+    self.assertEqual(layer.output_shape, (time_step, None, units))
+
+    y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, embedding_dim)),
+        np.zeros((batch, time_step, units)))
+
+    # Test stacking.
+    x = keras.Input((time_step, embedding_dim))
+    time_major_x = keras.layers.Lambda(
+        lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+    cell_units = [10, 8, 6]
+    cells = [keras.layers.SimpleRNNCell(cell_units[i]) for i in range(3)]
+    layer = keras.layers.RNN(cells, time_major=True, return_sequences=True)
+    y = layer(time_major_x)
+    self.assertEqual(layer.output_shape, (time_step, None, cell_units[-1]))
+
+    y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, embedding_dim)),
+        np.zeros((batch, time_step, cell_units[-1])))
+
+    # Test masking.
+    x = keras.Input((time_step, embedding_dim))
+    time_major = keras.layers.Lambda(
+        lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+    mask = keras.layers.Masking()(time_major)
+    rnn = keras.layers.SimpleRNN(
+        units, time_major=True, return_sequences=True)(mask)
+    y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(rnn)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, embedding_dim)),
+        np.zeros((batch, time_step, units)))
+
+    # Test layer output
+    x = keras.Input((time_step, embedding_dim))
+    rnn_1 = keras.layers.SimpleRNN(units, return_sequences=True)
+    y = rnn_1(x)
+
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, embedding_dim)),
+        np.zeros((batch, time_step, units)))
+
+    x_np = np.random.random((batch, time_step, embedding_dim))
+    y_np_1 = model.predict(x_np)
+
+    time_major = keras.layers.Lambda(
+        lambda t: array_ops.transpose(t, [1, 0, 2]))(x)
+    rnn_2 = keras.layers.SimpleRNN(
+        units, time_major=True, return_sequences=True)
+    y_2 = rnn_2(time_major)
+    y_2 = keras.layers.Lambda(
+        lambda t: array_ops.transpose(t, [1, 0, 2]))(y_2)
+
+    model_2 = keras.models.Model(x, y_2)
+    rnn_2.set_weights(rnn_1.get_weights())
+
+    y_np_2 = model_2.predict(x_np)
+    self.assertAllClose(y_np_1, y_np_2, atol=1e-4)
 
   def test_rnn_cell_with_constants_layer(self):
 
@@ -319,89 +326,86 @@ class RNNTest(test.TestCase):
         base_config = super(RNNCellWithConstants, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    with self.cached_session():
-      # Test basic case.
-      x = keras.Input((None, 5))
-      c = keras.Input((3,))
-      cell = RNNCellWithConstants(32)
-      layer = keras.layers.RNN(cell)
-      y = layer(x, constants=c)
-
-      model = keras.models.Model([x, c], y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-          np.zeros((6, 32))
-      )
-
-    with self.cached_session():
-      # Test basic case serialization.
-      x_np = np.random.random((6, 5, 5))
-      c_np = np.random.random((6, 3))
-      y_np = model.predict([x_np, c_np])
-      weights = model.get_weights()
-      config = layer.get_config()
-      custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-      with keras.utils.CustomObjectScope(custom_objects):
-        layer = keras.layers.RNN.from_config(config.copy())
-      y = layer(x, constants=c)
-      model = keras.models.Model([x, c], y)
-      model.set_weights(weights)
-      y_np_2 = model.predict([x_np, c_np])
-      self.assertAllClose(y_np, y_np_2, atol=1e-4)
-
-    with self.cached_session():
-      # test flat list inputs.
-      with keras.utils.CustomObjectScope(custom_objects):
-        layer = keras.layers.RNN.from_config(config.copy())
-      y = layer([x, c])
-      model = keras.models.Model([x, c], y)
-      model.set_weights(weights)
-      y_np_3 = model.predict([x_np, c_np])
-      self.assertAllClose(y_np, y_np_3, atol=1e-4)
-
-    with self.cached_session():
-      # Test stacking.
-      cells = [keras.layers.recurrent.GRUCell(8),
-               RNNCellWithConstants(12),
-               RNNCellWithConstants(32)]
-      layer = keras.layers.recurrent.RNN(cells)
-      y = layer(x, constants=c)
-      model = keras.models.Model([x, c], y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-          np.zeros((6, 32))
-      )
-
-    with self.cached_session():
-      # Test GRUCell reset_after property.
-      x = keras.Input((None, 5))
-      c = keras.Input((3,))
-      cells = [keras.layers.recurrent.GRUCell(32, reset_after=True)]
-      layer = keras.layers.recurrent.RNN(cells)
-      y = layer(x, constants=c)
-      model = keras.models.Model([x, c], y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-          np.zeros((6, 32))
-      )
-
-    with self.cached_session():
-      # Test stacked RNN serialization
-      x_np = np.random.random((6, 5, 5))
-      c_np = np.random.random((6, 3))
-      y_np = model.predict([x_np, c_np])
-      weights = model.get_weights()
-      config = layer.get_config()
-      with keras.utils.CustomObjectScope(custom_objects):
-        layer = keras.layers.recurrent.RNN.from_config(config.copy())
-      y = layer(x, constants=c)
-      model = keras.models.Model([x, c], y)
-      model.set_weights(weights)
-      y_np_2 = model.predict([x_np, c_np])
-      self.assertAllClose(y_np, y_np_2, atol=1e-4)
+    # Test basic case.
+    x = keras.Input((None, 5))
+    c = keras.Input((3,))
+    cell = RNNCellWithConstants(32)
+    layer = keras.layers.RNN(cell)
+    y = layer(x, constants=c)
+
+    model = keras.models.Model([x, c], y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
+        np.zeros((6, 32))
+    )
+
+    # Test basic case serialization.
+    x_np = np.random.random((6, 5, 5))
+    c_np = np.random.random((6, 3))
+    y_np = model.predict([x_np, c_np])
+    weights = model.get_weights()
+    config = layer.get_config()
+    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
+    with keras.utils.CustomObjectScope(custom_objects):
+      layer = keras.layers.RNN.from_config(config.copy())
+    y = layer(x, constants=c)
+    model = keras.models.Model([x, c], y)
+    model.set_weights(weights)
+    y_np_2 = model.predict([x_np, c_np])
+    self.assertAllClose(y_np, y_np_2, atol=1e-4)
+
+    # test flat list inputs.
+    with keras.utils.CustomObjectScope(custom_objects):
+      layer = keras.layers.RNN.from_config(config.copy())
+    y = layer([x, c])
+    model = keras.models.Model([x, c], y)
+    model.set_weights(weights)
+    y_np_3 = model.predict([x_np, c_np])
+    self.assertAllClose(y_np, y_np_3, atol=1e-4)
+
+    # Test stacking.
+    cells = [keras.layers.recurrent.GRUCell(8),
+             RNNCellWithConstants(12),
+             RNNCellWithConstants(32)]
+    layer = keras.layers.recurrent.RNN(cells)
+    y = layer(x, constants=c)
+    model = keras.models.Model([x, c], y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
+        np.zeros((6, 32))
+    )
+
+    # Test GRUCell reset_after property.
+    x = keras.Input((None, 5))
+    c = keras.Input((3,))
+    cells = [keras.layers.recurrent.GRUCell(32, reset_after=True)]
+    layer = keras.layers.recurrent.RNN(cells)
+    y = layer(x, constants=c)
+    model = keras.models.Model([x, c], y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
+        np.zeros((6, 32))
+    )
+
+    # Test stacked RNN serialization
+    x_np = np.random.random((6, 5, 5))
+    c_np = np.random.random((6, 3))
+    y_np = model.predict([x_np, c_np])
+    weights = model.get_weights()
+    config = layer.get_config()
+    with keras.utils.CustomObjectScope(custom_objects):
+      layer = keras.layers.recurrent.RNN.from_config(config.copy())
+    y = layer(x, constants=c)
+    model = keras.models.Model([x, c], y)
+    model.set_weights(weights)
+    y_np_2 = model.predict([x_np, c_np])
+    self.assertAllClose(y_np, y_np_2, atol=1e-4)
 
   def test_rnn_cell_with_constants_layer_passing_initial_state(self):
 
@@ -446,54 +450,55 @@ class RNNTest(test.TestCase):
         base_config = super(RNNCellWithConstants, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    with self.cached_session():
-      # Test basic case.
-      x = keras.Input((None, 5))
-      c = keras.Input((3,))
-      s = keras.Input((32,))
-      cell = RNNCellWithConstants(32)
-      layer = keras.layers.RNN(cell)
-      y = layer(x, initial_state=s, constants=c)
-      model = keras.models.Model([x, s, c], y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))],
-          np.zeros((6, 32))
-      )
-
-    with self.cached_session():
-      # Test basic case serialization.
-      x_np = np.random.random((6, 5, 5))
-      s_np = np.random.random((6, 32))
-      c_np = np.random.random((6, 3))
-      y_np = model.predict([x_np, s_np, c_np])
-      weights = model.get_weights()
-      config = layer.get_config()
-      custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-      with keras.utils.CustomObjectScope(custom_objects):
-        layer = keras.layers.RNN.from_config(config.copy())
-      y = layer(x, initial_state=s, constants=c)
-      model = keras.models.Model([x, s, c], y)
-      model.set_weights(weights)
-      y_np_2 = model.predict([x_np, s_np, c_np])
-      self.assertAllClose(y_np, y_np_2, atol=1e-4)
-
-      # verify that state is used
-      y_np_2_different_s = model.predict([x_np, s_np + 10., c_np])
-      with self.assertRaises(AssertionError):
-        self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4)
-
-    with self.cached_session():
-      # test flat list inputs
-      with keras.utils.CustomObjectScope(custom_objects):
-        layer = keras.layers.RNN.from_config(config.copy())
-      y = layer([x, s, c])
-      model = keras.models.Model([x, s, c], y)
-      model.set_weights(weights)
-      y_np_3 = model.predict([x_np, s_np, c_np])
-      self.assertAllClose(y_np, y_np_3, atol=1e-4)
+    # Test basic case.
+    x = keras.Input((None, 5))
+    c = keras.Input((3,))
+    s = keras.Input((32,))
+    cell = RNNCellWithConstants(32)
+    layer = keras.layers.RNN(cell)
+    y = layer(x, initial_state=s, constants=c)
+    model = keras.models.Model([x, s, c], y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))],
+        np.zeros((6, 32))
+    )
+
+    # Test basic case serialization.
+    x_np = np.random.random((6, 5, 5))
+    s_np = np.random.random((6, 32))
+    c_np = np.random.random((6, 3))
+    y_np = model.predict([x_np, s_np, c_np])
+    weights = model.get_weights()
+    config = layer.get_config()
+    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
+    with keras.utils.CustomObjectScope(custom_objects):
+      layer = keras.layers.RNN.from_config(config.copy())
+    y = layer(x, initial_state=s, constants=c)
+    model = keras.models.Model([x, s, c], y)
+    model.set_weights(weights)
+    y_np_2 = model.predict([x_np, s_np, c_np])
+    self.assertAllClose(y_np, y_np_2, atol=1e-4)
+
+    # verify that state is used
+    y_np_2_different_s = model.predict([x_np, s_np + 10., c_np])
+    with self.assertRaises(AssertionError):
+      self.assertAllClose(y_np, y_np_2_different_s, atol=1e-4)
+
+    # test flat list inputs
+    with keras.utils.CustomObjectScope(custom_objects):
+      layer = keras.layers.RNN.from_config(config.copy())
+    y = layer([x, s, c])
+    model = keras.models.Model([x, s, c], y)
+    model.set_weights(weights)
+    y_np_3 = model.predict([x_np, s_np, c_np])
+    self.assertAllClose(y_np, y_np_3, atol=1e-4)
 
   def test_stacked_rnn_attributes(self):
+    if context.executing_eagerly():
+      self.skipTest('reduce_sum is not available in eager mode.')
+
     cells = [keras.layers.LSTMCell(1),
              keras.layers.LSTMCell(1)]
     layer = keras.layers.RNN(cells)
@@ -558,67 +563,67 @@ class RNNTest(test.TestCase):
     timesteps = 2
     num_samples = 2
 
-    with self.cached_session():
-      input1 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-      layer = layer_class(units,
-                          return_state=True,
-                          return_sequences=True,
-                          dropout=0.2)
-      state = layer(input1)[1:]
+    input1 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    layer = layer_class(units,
+                        return_state=True,
+                        return_sequences=True,
+                        dropout=0.2)
+    state = layer(input1)[1:]
 
-      input2 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-      output = layer_class(units)(input2, initial_state=state)
-      model = keras.Model([input1, input2], output)
+    input2 = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    output = layer_class(units)(input2, initial_state=state)
+    model = keras.Model([input1, input2], output)
 
-      inputs = [np.random.random((num_samples, timesteps, embedding_dim)),
-                np.random.random((num_samples, timesteps, embedding_dim))]
-      model.predict(inputs)
+    inputs = [np.random.random((num_samples, timesteps, embedding_dim)),
+              np.random.random((num_samples, timesteps, embedding_dim))]
+    model.predict(inputs)
 
   def test_builtin_rnn_cell_serialization(self):
     for cell_class in [keras.layers.SimpleRNNCell,
                        keras.layers.GRUCell,
                        keras.layers.LSTMCell]:
-      with self.cached_session():
-        # Test basic case.
-        x = keras.Input((None, 5))
-        cell = cell_class(32)
-        layer = keras.layers.RNN(cell)
-        y = layer(x)
-        model = keras.models.Model(x, y)
-        model.compile(optimizer='rmsprop', loss='mse')
-
-        # Test basic case serialization.
-        x_np = np.random.random((6, 5, 5))
-        y_np = model.predict(x_np)
-        weights = model.get_weights()
-        config = layer.get_config()
-        layer = keras.layers.RNN.from_config(config)
-        y = layer(x)
-        model = keras.models.Model(x, y)
-        model.set_weights(weights)
-        y_np_2 = model.predict(x_np)
-        self.assertAllClose(y_np, y_np_2, atol=1e-4)
-
-        # Test stacking.
-        cells = [cell_class(8),
-                 cell_class(12),
-                 cell_class(32)]
-        layer = keras.layers.RNN(cells)
-        y = layer(x)
-        model = keras.models.Model(x, y)
-        model.compile(optimizer='rmsprop', loss='mse')
-
-        # Test stacked RNN serialization.
-        x_np = np.random.random((6, 5, 5))
-        y_np = model.predict(x_np)
-        weights = model.get_weights()
-        config = layer.get_config()
-        layer = keras.layers.RNN.from_config(config)
-        y = layer(x)
-        model = keras.models.Model(x, y)
-        model.set_weights(weights)
-        y_np_2 = model.predict(x_np)
-        self.assertAllClose(y_np, y_np_2, atol=1e-4)
+      # Test basic case.
+      x = keras.Input((None, 5))
+      cell = cell_class(32)
+      layer = keras.layers.RNN(cell)
+      y = layer(x)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                    loss='mse')
+
+      # Test basic case serialization.
+      x_np = np.random.random((6, 5, 5))
+      y_np = model.predict(x_np)
+      weights = model.get_weights()
+      config = layer.get_config()
+      layer = keras.layers.RNN.from_config(config)
+      y = layer(x)
+      model = keras.models.Model(x, y)
+      model.set_weights(weights)
+      y_np_2 = model.predict(x_np)
+      self.assertAllClose(y_np, y_np_2, atol=1e-4)
+
+      # Test stacking.
+      cells = [cell_class(8),
+               cell_class(12),
+               cell_class(32)]
+      layer = keras.layers.RNN(cells)
+      y = layer(x)
+      model = keras.models.Model(x, y)
+      model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                    loss='mse')
+
+      # Test stacked RNN serialization.
+      x_np = np.random.random((6, 5, 5))
+      y_np = model.predict(x_np)
+      weights = model.get_weights()
+      config = layer.get_config()
+      layer = keras.layers.RNN.from_config(config)
+      y = layer(x)
+      model = keras.models.Model(x, y)
+      model.set_weights(weights)
+      y_np_2 = model.predict(x_np)
+      self.assertAllClose(y_np, y_np_2, atol=1e-4)
 
   def DISABLED_test_stacked_rnn_dropout(self):
     # Temporarily disabled test due an occasional Grappler segfault.
@@ -627,14 +632,13 @@ class RNNTest(test.TestCase):
              keras.layers.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1)]
     layer = keras.layers.RNN(cells)
 
-    with self.cached_session():
-      x = keras.Input((None, 5))
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile('sgd', 'mse')
-      x_np = np.random.random((6, 5, 5))
-      y_np = np.random.random((6, 3))
-      model.train_on_batch(x_np, y_np)
+    x = keras.Input((None, 5))
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile('sgd', 'mse')
+    x_np = np.random.random((6, 5, 5))
+    y_np = np.random.random((6, 3))
+    model.train_on_batch(x_np, y_np)
 
   def test_stacked_rnn_compute_output_shape(self):
     cells = [keras.layers.LSTMCell(3),
@@ -669,62 +673,65 @@ class RNNTest(test.TestCase):
 
   def test_checkpointable_dependencies(self):
     rnn = keras.layers.SimpleRNN
-    with self.cached_session():
-      x = np.random.random((2, 2, 2))
-      y = np.random.random((2, 2))
-      model = keras.models.Sequential()
-      model.add(rnn(2))
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.fit(x, y, epochs=1, batch_size=1)
-
-      # check whether the model variables are present in the
-      # checkpointable list of objects
-      checkpointed_objects = set(checkpointable_util.list_objects(model))
-      for v in model.variables:
-        self.assertIn(v, checkpointed_objects)
+    x = np.random.random((2, 2, 2))
+    y = np.random.random((2, 2))
+    model = keras.models.Sequential()
+    model.add(rnn(2))
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.fit(x, y, epochs=1, batch_size=1)
+
+    # check whether the model variables are present in the
+    # checkpointable list of objects
+    checkpointed_objects = set(checkpointable_util.list_objects(model))
+    for v in model.variables:
+      self.assertIn(v, checkpointed_objects)
 
   def test_high_dimension_RNN(self):
-    with self.cached_session():
-      # Basic test case.
-      unit_a = 10
-      unit_b = 20
-      input_a = 5
-      input_b = 10
-      batch = 32
-      time_step = 4
-
-      cell = Minimal2DRNNCell(unit_a, unit_b)
-      x = keras.Input((None, input_a, input_b))
-      layer = keras.layers.RNN(cell)
-      y = layer(x)
+    # Basic test case.
+    unit_a = 10
+    unit_b = 20
+    input_a = 5
+    input_b = 10
+    batch = 32
+    time_step = 4
+
+    cell = Minimal2DRNNCell(unit_a, unit_b)
+    x = keras.Input((None, input_a, input_b))
+    layer = keras.layers.RNN(cell)
+    y = layer(x)
 
-      self.assertEqual(cell.state_size.as_list(), [unit_a, unit_b])
+    self.assertEqual(cell.state_size.as_list(), [unit_a, unit_b])
+
+    if not context.executing_eagerly():
       init_state = layer.get_initial_state(x)
       self.assertEqual(len(init_state), 1)
       self.assertEqual(init_state[0].get_shape().as_list(),
                        [None, unit_a, unit_b])
 
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, input_a, input_b)),
-          np.zeros((batch, unit_a, unit_b)))
-      self.assertEqual(model.output_shape, (None, unit_a, unit_b))
-
-      # Test stacking.
-      cells = [
-          Minimal2DRNNCell(unit_a, unit_b),
-          Minimal2DRNNCell(unit_a * 2, unit_b * 2),
-          Minimal2DRNNCell(unit_a * 4, unit_b * 4)
-      ]
-      layer = keras.layers.RNN(cells)
-      y = layer(x)
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, input_a, input_b)),
-          np.zeros((batch, unit_a * 4, unit_b * 4)))
-      self.assertEqual(model.output_shape, (None, unit_a * 4, unit_b * 4))
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, input_a, input_b)),
+        np.zeros((batch, unit_a, unit_b)))
+    self.assertEqual(model.output_shape, (None, unit_a, unit_b))
+
+    # Test stacking.
+    cells = [
+        Minimal2DRNNCell(unit_a, unit_b),
+        Minimal2DRNNCell(unit_a * 2, unit_b * 2),
+        Minimal2DRNNCell(unit_a * 4, unit_b * 4)
+    ]
+    layer = keras.layers.RNN(cells)
+    y = layer(x)
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, input_a, input_b)),
+        np.zeros((batch, unit_a * 4, unit_b * 4)))
+    self.assertEqual(model.output_shape, (None, unit_a * 4, unit_b * 4))
 
   def test_high_dimension_RNN_with_init_state(self):
     unit_a = 10
@@ -734,57 +741,57 @@ class RNNTest(test.TestCase):
     batch = 32
     time_step = 4
 
-    with self.cached_session():
-      # Basic test case.
-      cell = Minimal2DRNNCell(unit_a, unit_b)
-      x = keras.Input((None, input_a, input_b))
-      s = keras.Input((unit_a, unit_b))
-      layer = keras.layers.RNN(cell)
-      y = layer(x, initial_state=s)
-
-      model = keras.models.Model([x, s], y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch([
-          np.zeros((batch, time_step, input_a, input_b)),
-          np.zeros((batch, unit_a, unit_b))
-      ], np.zeros((batch, unit_a, unit_b)))
-      self.assertEqual(model.output_shape, (None, unit_a, unit_b))
-
-    with self.cached_session():
-      # Bad init state shape.
-      bad_shape_a = unit_a * 2
-      bad_shape_b = unit_b * 2
-      cell = Minimal2DRNNCell(unit_a, unit_b)
-      x = keras.Input((None, input_a, input_b))
-      s = keras.Input((bad_shape_a, bad_shape_b))
-      layer = keras.layers.RNN(cell)
-      with self.assertRaisesWithPredicateMatch(ValueError,
-                                               'however `cell.state_size` is'):
-        layer(x, initial_state=s)
+    # Basic test case.
+    cell = Minimal2DRNNCell(unit_a, unit_b)
+    x = keras.Input((None, input_a, input_b))
+    s = keras.Input((unit_a, unit_b))
+    layer = keras.layers.RNN(cell)
+    y = layer(x, initial_state=s)
+
+    model = keras.models.Model([x, s], y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch([
+        np.zeros((batch, time_step, input_a, input_b)),
+        np.zeros((batch, unit_a, unit_b))
+    ], np.zeros((batch, unit_a, unit_b)))
+    self.assertEqual(model.output_shape, (None, unit_a, unit_b))
+
+    # Bad init state shape.
+    bad_shape_a = unit_a * 2
+    bad_shape_b = unit_b * 2
+    cell = Minimal2DRNNCell(unit_a, unit_b)
+    x = keras.Input((None, input_a, input_b))
+    s = keras.Input((bad_shape_a, bad_shape_b))
+    layer = keras.layers.RNN(cell)
+    with self.assertRaisesWithPredicateMatch(ValueError,
+                                             'however `cell.state_size` is'):
+      layer(x, initial_state=s)
 
   def test_inconsistent_output_state_size(self):
-    with self.cached_session():
-      batch = 32
-      time_step = 4
-      state_size = 5
-      input_size = 6
-      cell = PlusOneRNNCell(state_size)
-      x = keras.Input((None, input_size))
-      layer = keras.layers.RNN(cell)
-      y = layer(x)
-
-      self.assertEqual(cell.state_size, state_size)
+    batch = 32
+    time_step = 4
+    state_size = 5
+    input_size = 6
+    cell = PlusOneRNNCell(state_size)
+    x = keras.Input((None, input_size))
+    layer = keras.layers.RNN(cell)
+    y = layer(x)
+
+    self.assertEqual(cell.state_size, state_size)
+    if not context.executing_eagerly():
       init_state = layer.get_initial_state(x)
       self.assertEqual(len(init_state), 1)
       self.assertEqual(init_state[0].get_shape().as_list(),
                        [None, state_size])
 
-      model = keras.models.Model(x, y)
-      model.compile(optimizer='rmsprop', loss='mse')
-      model.train_on_batch(
-          np.zeros((batch, time_step, input_size)),
-          np.zeros((batch, input_size)))
-      self.assertEqual(model.output_shape, (None, input_size))
+    model = keras.models.Model(x, y)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        np.zeros((batch, time_step, input_size)),
+        np.zeros((batch, input_size)))
+    self.assertEqual(model.output_shape, (None, input_size))
 
   def test_get_initial_state(self):
     cell = keras.layers.SimpleRNNCell(5)
@@ -792,16 +799,28 @@ class RNNTest(test.TestCase):
                                  'batch_size and dtype cannot be None'):
       cell.get_initial_state(None, None, None)
 
-    inputs = keras.Input((None, 2, 10))
-    initial_state = cell.get_initial_state(inputs, None, None)
-    self.assertEqual(initial_state.shape.as_list(), [None, 5])
-    self.assertEqual(initial_state.dtype, inputs.dtype)
-
-    batch = array_ops.shape(inputs)[0]
-    dtype = inputs.dtype
-    initial_state = cell.get_initial_state(None, batch, dtype)
-    self.assertEqual(initial_state.shape.as_list(), [None, 5])
-    self.assertEqual(initial_state.dtype, inputs.dtype)
+    if not context.executing_eagerly():
+      inputs = keras.Input((None, 10))
+      initial_state = cell.get_initial_state(inputs, None, None)
+      self.assertEqual(initial_state.shape.as_list(), [None, 5])
+      self.assertEqual(initial_state.dtype, inputs.dtype)
+
+      batch = array_ops.shape(inputs)[0]
+      dtype = inputs.dtype
+      initial_state = cell.get_initial_state(None, batch, dtype)
+      self.assertEqual(initial_state.shape.as_list(), [None, 5])
+      self.assertEqual(initial_state.dtype, inputs.dtype)
+    else:
+      batch = 8
+      inputs = np.random.random((batch, 10))
+      initial_state = cell.get_initial_state(inputs, None, None)
+      self.assertEqual(initial_state.shape.as_list(), [8, 5])
+      self.assertEqual(initial_state.dtype, inputs.dtype)
+
+      dtype = inputs.dtype
+      initial_state = cell.get_initial_state(None, batch, dtype)
+      self.assertEqual(initial_state.shape.as_list(), [batch, 5])
+      self.assertEqual(initial_state.dtype, inputs.dtype)
 
 
 class Minimal2DRNNCell(keras.layers.Layer):
-- 
GitLab


From c84976bc9183ca02085d27018c5c370a96fae23d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 10:04:13 -0700
Subject: [PATCH 0780/1085] Java support for initializing tensorflow libraries.

PiperOrigin-RevId: 216714487
---
 tensorflow/contrib/lite/java/BUILD            |  1 +
 .../org/tensorflow/lite/TensorFlowLite.java   |  6 ++++
 .../contrib/lite/java/src/main/native/BUILD   | 31 ++++++++++++++++
 .../src/main/native/init_tensorflow_jni.cc    | 22 ++++++++++++
 .../src/main/native/init_tensorflow_jni.h     | 36 +++++++++++++++++++
 .../tensorflow/lite/InterpreterFlexTest.java  |  4 +++
 tensorflow/contrib/lite/testing/BUILD         | 11 ++++--
 7 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc
 create mode 100644 tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h

diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD
index e68cd26f81..cab8d5277f 100644
--- a/tensorflow/contrib/lite/java/BUILD
+++ b/tensorflow/contrib/lite/java/BUILD
@@ -224,5 +224,6 @@ tflite_jni_binary(
     deps = [
         "//tensorflow/contrib/lite/delegates/flex:delegate",
         "//tensorflow/contrib/lite/java/src/main/native",
+        "//tensorflow/contrib/lite/java/src/main/native:init_tensorflow",
     ],
 )
diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
index d5447b3bf8..deded10182 100644
--- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
+++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java
@@ -26,6 +26,12 @@ public final class TensorFlowLite {
   /** Returns the version of the underlying TensorFlowLite runtime. */
   public static native String version();
 
+  /**
+   * Initialize tensorflow's libraries. This will throw an exception if used when TensorFlow isn't
+   * linked in.
+   */
+  static native void initTensorFlow();
+
   /**
    * Load the TensorFlowLite runtime C library.
    */
diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD
index 4b4e1c21d8..f91345f369 100644
--- a/tensorflow/contrib/lite/java/src/main/native/BUILD
+++ b/tensorflow/contrib/lite/java/src/main/native/BUILD
@@ -73,6 +73,37 @@ genrule(
     cmd = "cp -f $< $@",
 )
 
+cc_library(
+    name = "init_tensorflow",
+    srcs = [
+        "init_tensorflow_jni.cc",
+    ] + select({
+        # The Android toolchain makes "jni.h" available in the include path.
+        # For non-Android toolchains, generate jni.h and jni_md.h.
+        "//tensorflow:android": [],
+        "//conditions:default": [
+            ":jni.h",
+            ":jni_md.h",
+        ],
+    }),
+    hdrs = [
+        "init_tensorflow_jni.h",
+    ],
+    copts = tflite_copts(),
+    includes = select({
+        "//tensorflow:android": [],
+        "//conditions:default": ["."],
+    }),
+    linkopts = [
+        "-lm",
+        "-ldl",
+    ],
+    deps = [
+        "//tensorflow/contrib/lite/testing:init_tensorflow",
+    ],
+    alwayslink = 1,
+)
+
 # This includes all ops. If you want a smaller binary, you should copy and
 # modify builtin_ops_jni.cc.  You should then link your binary against both
 # ":native_framework_only" and your own version of ":native_builtin_ops".
diff --git a/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc
new file mode 100644
index 0000000000..74aa384df3
--- /dev/null
+++ b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc
@@ -0,0 +1,22 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h"
+#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
+
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_TensorFlowLite_initTensorFlow(
+    JNIEnv* env, jclass clazz) {
+  ::tflite::InitTensorFlow();
+}
diff --git a/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h
new file mode 100644
index 0000000000..4689eb05fe
--- /dev/null
+++ b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h
@@ -0,0 +1,36 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_
+#define TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_
+
+#include <jni.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+/*
+ *  Class:     org_tensorflow_lite_TensorFlowLite
+ *  Method:    initTensorFlow
+ *  Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_tensorflow_lite_TensorFlowLite_initTensorFlow(
+    JNIEnv* env, jclass clazz);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_
diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
index 2791c3864b..3b3d9f0e7f 100644
--- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
+++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java
@@ -43,4 +43,8 @@ public final class InterpreterFlexTest {
       interpreter.run(new float[1], new float[1]);
     }
   }
+
+  static {
+    TensorFlowLite.initTensorFlow();
+  }
 }
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index f7f812343b..af203c5507 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -308,9 +308,14 @@ cc_library(
         "//tensorflow/contrib/lite/java/src/main/native:__subpackages__",
         "//tensorflow/contrib/lite/testing:__subpackages__",
     ],
-    deps = [
-        "//tensorflow/core:lib",
-    ],
+    deps = select({
+        "//conditions:default": [
+            "//tensorflow/core:lib",
+        ],
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+    }),
 )
 
 cc_library(
-- 
GitLab


From 7b313d3873280feb8fae0c40c8f84fe8bda2389d Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 11 Oct 2018 10:24:59 -0700
Subject: [PATCH 0781/1085] Make sure no objects are used before TF module gets
 initialized.

PiperOrigin-RevId: 216718180
---
 .../data/kernel_tests/dataset_ops_test.py     | 142 ++++++++++++------
 1 file changed, 95 insertions(+), 47 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/dataset_ops_test.py b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
index b9f8875b9f..63d2be4371 100644
--- a/tensorflow/python/data/kernel_tests/dataset_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_ops_test.py
@@ -72,67 +72,115 @@ class DatasetOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     return interleave_fn
 
   @parameterized.named_parameters(
-      ("FixedLengthRecord", readers.FixedLengthRecordDataset("", 42)),
+      ("FixedLengthRecord",
+       lambda: readers.FixedLengthRecordDataset("", 42)),
       ("FromGenerator",
-       dataset_ops.Dataset.from_generator(make_gen.__func__(), dtypes.int32),
+       lambda: dataset_ops.Dataset.from_generator(
+           DatasetOpsTest.make_gen(), dtypes.int32),
        1),
-      ("FromSparseTensorSlices",
-       dataset_ops.Dataset.from_sparse_tensor_slices(
-           sparse_tensor.SparseTensor(
-               indices=np.array([[0, 0], [1, 0], [2, 0]]),
-               values=np.array([0, 0, 0]),
-               dense_shape=np.array([3, 1])))),
-      ("FromTensors", dataset_ops.Dataset.from_tensors([42])),
-      ("FromTensorSlices", dataset_ops.Dataset.from_tensors([42])),
-      ("Range", dataset_ops.Dataset.range(10)),
-      ("TextLine", readers.TextLineDataset("")),
-      ("TFRecord", readers.TFRecordDataset(""), 1),
+      ("FromTensors", lambda: dataset_ops.Dataset.from_tensors([42])),
+      ("FromTensorSlices", lambda: dataset_ops.Dataset.from_tensors([42])),
+      ("Range", lambda: dataset_ops.Dataset.range(10)),
+      ("TextLine", lambda: readers.TextLineDataset("")),
+      ("TFRecord", lambda: readers.TFRecordDataset(""), 1),
   )
-  def testDatasetSourceInputs(self, dataset, num_inputs=0):
-    self.assertEqual(num_inputs, len(dataset._inputs()))
+  def testDatasetSimpleSourceInputs(self, dataset_fn, num_inputs=0):
+    self.assertEqual(num_inputs, len(dataset_fn()._inputs()))
+
+  def testDatasetComplexSourceInputs(self):
+    dataset_fn = dataset_ops.Dataset.from_sparse_tensor_slices(
+        sparse_tensor.SparseTensor(
+            indices=np.array([[0, 0], [1, 0], [2, 0]]),
+            values=np.array([0, 0, 0]),
+            dense_shape=np.array([3, 1])))
+    self.assertEqual(0, len(dataset_fn._inputs()))
+
+  @parameterized.named_parameters(
+      ("Batch",
+       lambda x: x.batch(10),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Cache",
+       lambda x: x.cache(),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Filter",
+       lambda x: x.filter(lambda x: True),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("FlatMap",
+       lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Map",
+       lambda x: x.map(lambda x: x),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("PaddedBatch",
+       lambda x: x.padded_batch(10, []),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("ParallelMap",
+       lambda x: x.map(lambda x: x, num_parallel_calls=2),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Repeat",
+       lambda x: x.repeat(),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Shuffle",
+       lambda x: x.shuffle(10),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Skip",
+       lambda x: x.skip(1),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Take",
+       lambda x: x.take(1),
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Window",
+       lambda x: x.window(10),
+       lambda: dataset_ops.Dataset.range(0)),
+  )
+  def testUnaryTransformationInputs(self, dataset_fn, input_dataset_fn):
+    input_dataset = input_dataset_fn()
+    self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
+
+  def testUnaryTransformationInputsApply(self):
+    input_dataset = dataset_ops.Dataset.range(0)
+    dataset_fn = self.make_apply_fn(dataset_ops.Dataset.range(0))
+    self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
 
   @parameterized.named_parameters(
-      ("Apply", make_apply_fn.__func__(dataset_ops.Dataset.range(0)),
-       dataset_ops.Dataset.range(0)),
-      ("Batch", lambda x: x.batch(10), dataset_ops.Dataset.range(0)),
-      ("Cache", lambda x: x.cache(), dataset_ops.Dataset.range(0)),
-      ("Filter", lambda x: x.filter(lambda x: True),
-       dataset_ops.Dataset.range(0)),
-      ("FlatMap", lambda x: x.flat_map(lambda x: dataset_ops.Dataset.range(0)),
-       dataset_ops.Dataset.range(0)),
-      ("Interleave", make_interleave_fn.__func__(dataset_ops.Dataset.range(0)),
-       dataset_ops.Dataset.range(0)),
-      ("Map", lambda x: x.map(lambda x: x), dataset_ops.Dataset.range(0)),
-      ("PaddedBatch", lambda x: x.padded_batch(10, []),
-       dataset_ops.Dataset.range(0)),
       ("ParallelInterleave",
-       make_interleave_fn.__func__(dataset_ops.Dataset.range(0), 2),
-       dataset_ops.Dataset.range(0)),
-      ("ParallelMap", lambda x: x.map(lambda x: x, num_parallel_calls=2),
-       dataset_ops.Dataset.range(0)),
-      ("Repeat", lambda x: x.repeat(), dataset_ops.Dataset.range(0)),
-      ("Shuffle", lambda x: x.shuffle(10), dataset_ops.Dataset.range(0)),
-      ("Skip", lambda x: x.skip(1), dataset_ops.Dataset.range(0)),
-      ("Take", lambda x: x.take(1), dataset_ops.Dataset.range(0)),
-      ("Window", lambda x: x.window(10), dataset_ops.Dataset.range(0)),
+       [lambda: dataset_ops.Dataset.range(0), 2],
+       lambda: dataset_ops.Dataset.range(0)),
+      ("Interleave",
+       [lambda: dataset_ops.Dataset.range(0), None],
+       lambda: dataset_ops.Dataset.range(0)),
   )
-  def testUnaryTransformationInputs(self, dataset_fn, input_dataset):
+  def testUnaryTransformationInputsWithInterleaveFn(
+      self, interleave_fn_args, input_dataset_fn):
+    input_dataset = input_dataset_fn()
+    dataset_fn = self.make_interleave_fn(*interleave_fn_args)
     self.assertEqual([input_dataset], dataset_fn(input_dataset)._inputs())
 
   @parameterized.named_parameters(
       ("Concatenate", lambda x, y: x.concatenate(y),
-       dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1)))
-  def testBinaryTransformationInputs(self, dataset_fn, input1, input2):
+       lambda: dataset_ops.Dataset.range(0),
+       lambda: dataset_ops.Dataset.range(1)))
+  def testBinaryTransformationInputs(self, dataset_fn, input1_fn, input2_fn):
+    input1 = input1_fn()
+    input2 = input2_fn()
     self.assertEqual([input1, input2], dataset_fn(input1, input2)._inputs())
 
   @parameterized.named_parameters(
-      ("ZipOne", dataset_ops.Dataset.zip, (dataset_ops.Dataset.range(0))),
-      ("ZipNest", dataset_ops.Dataset.zip,
-       (dataset_ops.Dataset.range(0),
-        (dataset_ops.Dataset.range(1), dataset_ops.Dataset.range(2)))),
-      ("ZipTuple", dataset_ops.Dataset.zip,
-       (dataset_ops.Dataset.range(0), dataset_ops.Dataset.range(1))))
-  def testVariadicTransformationInputs(self, dataset_fn, input_datasets):
+      ("ZipOne",
+       dataset_ops.Dataset.zip,
+       lambda: (dataset_ops.Dataset.range(0))),
+      ("ZipNest",
+       dataset_ops.Dataset.zip,
+       lambda: (dataset_ops.Dataset.range(0),
+                (dataset_ops.Dataset.range(1),
+                 dataset_ops.Dataset.range(2)))),
+      ("ZipTuple",
+       dataset_ops.Dataset.zip,
+       lambda: (dataset_ops.Dataset.range(0),
+                dataset_ops.Dataset.range(1))),
+  )
+  def testVariadicTransformationInputs(self, dataset_fn, input_datasets_fn):
+    input_datasets = input_datasets_fn()
     self.assertEqual(
         nest.flatten(input_datasets),
         dataset_fn(input_datasets)._inputs())
-- 
GitLab


From 41e37ae016ecd2af69d9ed7f4e84d12456f1bcee Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 11 Oct 2018 10:32:52 -0700
Subject: [PATCH 0782/1085] Use the functional operator pattern for logical
 expressions. The current implementation relied on the TF import, which is now
 gone.

PiperOrigin-RevId: 216719675
---
 .../converters/logical_expressions.py         |  88 +++++++----
 .../converters/logical_expressions_test.py    |  41 ++++--
 tensorflow/python/autograph/operators/BUILD   |  11 ++
 .../python/autograph/operators/__init__.py    |  14 ++
 .../python/autograph/operators/logical.py     | 139 ++++++++++++++++++
 .../autograph/operators/logical_test.py       |  85 +++++++++++
 6 files changed, 335 insertions(+), 43 deletions(-)
 create mode 100644 tensorflow/python/autograph/operators/logical.py
 create mode 100644 tensorflow/python/autograph/operators/logical_test.py

diff --git a/tensorflow/python/autograph/converters/logical_expressions.py b/tensorflow/python/autograph/converters/logical_expressions.py
index 8c4d53f9a8..dfcaafdc9e 100644
--- a/tensorflow/python/autograph/converters/logical_expressions.py
+++ b/tensorflow/python/autograph/converters/logical_expressions.py
@@ -28,7 +28,6 @@ from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import templates
 
-
 # TODO(mdan): Properly extrack boolean ops according to lazy eval rules.
 # Note that this isn't completely safe either, because tensors may have control
 # dependencies.
@@ -44,19 +43,22 @@ class LogicalExpressionTransformer(converter.Base):
 
   def __init__(self, ctx):
     super(LogicalExpressionTransformer, self).__init__(ctx)
-    # TODO(mdan): Look into replacing with bitwise operators instead.
-    # TODO(mdan): Skip replacing if the function is trivial.
+    # TODO(mdan): For completeness and consistency, overload everything.
     self.op_mapping = {
-        gast.And: 'tf.logical_and',
-        gast.Eq: 'tf.equal',
-        gast.Gt: 'tf.greater',
-        gast.GtE: 'tf.greater_equal',
-        gast.Lt: 'tf.less',
-        gast.LtE: 'tf.less_equal',
-        gast.Not: 'tf.logical_not',
-        gast.NotEq: 'tf.not_equal',
-        gast.Or: 'tf.logical_or',
-        gast.USub: 'tf.negative',
+        gast.And: 'ag__.and_',
+        gast.Eq: 'ag__.eq',
+        gast.NotEq: 'ag__.not_eq',
+        gast.Lt: 'ag__.lt',
+        gast.LtE: 'ag__.lt_e',
+        gast.Gt: 'ag__.gt',
+        gast.GtE: 'ag__.gt_e',
+        gast.Is: 'ag__.is_',
+        gast.IsNot: 'ag__.is_not',
+        gast.In: 'ag__.in_',
+        gast.Not: 'ag__.not_',
+        gast.NotIn: 'ag__.not_in',
+        gast.Or: 'ag__.or_',
+        gast.USub: 'ag__.u_sub',
     }
 
   def _expect_simple_symbol(self, operand):
@@ -78,27 +80,48 @@ class LogicalExpressionTransformer(converter.Base):
     op_type = type(operator)
     return self.op_mapping[op_type]
 
-  def _as_function(self, func_name, args):
-    template = """
-      func_name(args)
-    """
-    replacement = templates.replace_as_expression(
-        template, func_name=parser.parse_expression(func_name), args=args)
+  def _as_function(self, func_name, args, args_as_lambda=False):
+    if args_as_lambda:
+      args_as_lambda = []
+      for arg in args:
+        template = """
+          lambda: arg
+        """
+        args_as_lambda.append(
+            templates.replace_as_expression(template, arg=arg))
+      args = args_as_lambda
+
+    if not args:
+      template = """
+        func_name()
+      """
+      replacement = templates.replace_as_expression(
+          template, func_name=parser.parse_expression(func_name))
+    elif len(args) == 1:
+      template = """
+        func_name(arg)
+      """
+      replacement = templates.replace_as_expression(
+          template, func_name=parser.parse_expression(func_name), arg=args[0])
+    elif len(args) == 2:
+      template = """
+        func_name(arg1, arg2)
+      """
+      replacement = templates.replace_as_expression(
+          template,
+          func_name=parser.parse_expression(func_name),
+          arg1=args[0],
+          arg2=args[1])
+    else:
+      raise NotImplementedError('{} arguments for {}'.format(
+          len(args), func_name))
+
     anno.setanno(replacement, SAFE_BOOLEAN_OPERAND, True)
     return replacement
 
   def visit_Compare(self, node):
     node = self.generic_visit(node)
 
-    if not all(self._has_matching_func(op) for op in node.ops):
-      if len(node.ops) == 1:
-        # Basic expressions are safe to leave as they are.
-        return node
-      else:
-        raise NotImplementedError(
-            'compound expression with at least one unsupported '
-            'operator: {}'.format(node.ops))
-
     ops_and_comps = list(zip(node.ops, node.comparators))
     left = node.left
     op_tree = None
@@ -113,8 +136,8 @@ class LogicalExpressionTransformer(converter.Base):
         anno.setanno(binary_comparison, SAFE_BOOLEAN_OPERAND, True)
       if op_tree:
         self._expect_simple_symbol(right)
-        op_tree = self._as_function('tf.logical_and',
-                                    (binary_comparison, op_tree))
+        op_tree = self._as_function(
+            'ag__.and_', (op_tree, binary_comparison), args_as_lambda=True)
       else:
         op_tree = binary_comparison
       left = right
@@ -123,7 +146,7 @@ class LogicalExpressionTransformer(converter.Base):
 
   def visit_UnaryOp(self, node):
     node = self.generic_visit(node)
-    return self._as_function(self._matching_func(node.op), node.operand)
+    return self._as_function(self._matching_func(node.op), (node.operand,))
 
   def visit_BoolOp(self, node):
     node = self.generic_visit(node)
@@ -133,7 +156,8 @@ class LogicalExpressionTransformer(converter.Base):
     while node_values:
       left = node_values.pop()
       self._expect_simple_symbol(left)
-      right = self._as_function(self._matching_func(node.op), (left, right))
+      right = self._as_function(
+          self._matching_func(node.op), (left, right), args_as_lambda=True)
     return right
 
 
diff --git a/tensorflow/python/autograph/converters/logical_expressions_test.py b/tensorflow/python/autograph/converters/logical_expressions_test.py
index b78b4d3a6a..99db04a775 100644
--- a/tensorflow/python/autograph/converters/logical_expressions_test.py
+++ b/tensorflow/python/autograph/converters/logical_expressions_test.py
@@ -20,34 +20,53 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.converters import logical_expressions
 from tensorflow.python.autograph.core import converter_testing
-from tensorflow.python.ops import math_ops
+from tensorflow.python.framework import constant_op
 from tensorflow.python.platform import test
 
 
-class GradientsFunctionTest(converter_testing.TestCase):
+class LogicalExpressionTest(converter_testing.TestCase):
 
   def test_equals(self):
 
     def test_fn(a, b):
       return a == b
 
-    with self.converted(test_fn, logical_expressions, {},
-                        math_ops.equal) as result:
+    with self.converted(test_fn, logical_expressions, {}) as result:
       with self.cached_session() as sess:
-        self.assertTrue(sess.run(result.test_fn(1, 1)))
-        self.assertFalse(sess.run(result.test_fn(1, 2)))
+        self.assertTrue(sess.run(result.test_fn(constant_op.constant(1), 1)))
+        self.assertFalse(sess.run(result.test_fn(constant_op.constant(1), 2)))
 
   def test_bool_ops(self):
 
     def test_fn(a, b, c):
-      return (a or b) and (a or b or c)
+      return (a or b) and (a or b or c) and not c
+
+    with self.converted(test_fn, logical_expressions, {}) as result:
+      with self.cached_session() as sess:
+        self.assertTrue(
+            sess.run(result.test_fn(constant_op.constant(True), False, False)))
+        self.assertFalse(
+            sess.run(result.test_fn(constant_op.constant(True), False, True)))
 
-    with self.converted(test_fn, logical_expressions, {}, math_ops.logical_or,
-                        math_ops.logical_and) as result:
+  def test_comparison(self):
+
+    def test_fn(a, b, c, d):
+      return a < b == c > d
+
+    with self.converted(test_fn, logical_expressions, {}) as result:
       with self.cached_session() as sess:
-        self.assertTrue(sess.run(result.test_fn(True, False, True)))
+        # Note: having just the first constant a tensor tests that the
+        # operations execute in the correct order. If anything other than
+        # a < b executed first, the result would be a Python scalar and not a
+        # Tensor. This is valid as long as the dispat is automatic based on
+        # type.
+        self.assertTrue(
+            sess.run(result.test_fn(constant_op.constant(1), 2, 2, 1)))
+        self.assertFalse(
+            sess.run(result.test_fn(constant_op.constant(1), 2, 2, 3)))
+
+  def test_default_ops(self):
 
-  def test_unsupported_ops(self):
     def test_fn(a, b):
       return a in b
 
diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD
index f422911377..aedb901845 100644
--- a/tensorflow/python/autograph/operators/BUILD
+++ b/tensorflow/python/autograph/operators/BUILD
@@ -23,6 +23,7 @@ py_library(
         "control_flow.py",
         "data_structures.py",
         "exceptions.py",
+        "logical.py",
         "py_builtins.py",
         "slices.py",
     ],
@@ -73,6 +74,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "logical_test",
+    srcs = ["logical_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":operators",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 py_test(
     name = "py_builtins_test",
     srcs = ["py_builtins_test.py"],
diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py
index 8ba2558ac2..7a580fe324 100644
--- a/tensorflow/python/autograph/operators/__init__.py
+++ b/tensorflow/python/autograph/operators/__init__.py
@@ -47,6 +47,20 @@ from tensorflow.python.autograph.operators.data_structures import ListPopOpts
 from tensorflow.python.autograph.operators.data_structures import ListStackOpts
 from tensorflow.python.autograph.operators.data_structures import new_list
 from tensorflow.python.autograph.operators.exceptions import assert_stmt
+from tensorflow.python.autograph.operators.logical import and_
+from tensorflow.python.autograph.operators.logical import eq
+from tensorflow.python.autograph.operators.logical import gt
+from tensorflow.python.autograph.operators.logical import gt_e
+from tensorflow.python.autograph.operators.logical import in_
+from tensorflow.python.autograph.operators.logical import is_
+from tensorflow.python.autograph.operators.logical import is_not
+from tensorflow.python.autograph.operators.logical import lt
+from tensorflow.python.autograph.operators.logical import lt_e
+from tensorflow.python.autograph.operators.logical import not_
+from tensorflow.python.autograph.operators.logical import not_eq
+from tensorflow.python.autograph.operators.logical import not_in
+from tensorflow.python.autograph.operators.logical import or_
+from tensorflow.python.autograph.operators.logical import u_sub
 from tensorflow.python.autograph.operators.py_builtins import float_
 from tensorflow.python.autograph.operators.py_builtins import int_
 from tensorflow.python.autograph.operators.py_builtins import len_
diff --git a/tensorflow/python/autograph/operators/logical.py b/tensorflow/python/autograph/operators/logical.py
new file mode 100644
index 0000000000..569db5b91b
--- /dev/null
+++ b/tensorflow/python/autograph/operators/logical.py
@@ -0,0 +1,139 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Logical operators, including comparison and bool operators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_math_ops
+
+
+def not_(a):
+  """Functional form of "not"."""
+  if tensor_util.is_tensor(a):
+    return gen_math_ops.logical_not(a)
+  return not a
+
+
+def and_(a, b):
+  """Functional form of "and". Uses lazy evaluation semantics."""
+  a_val = a()
+  if tensor_util.is_tensor(a_val):
+    return _tf_lazy_and(a_val, b)
+  return _py_lazy_and(a_val, b)
+
+
+def _tf_lazy_and(cond, b):
+  """Lazy-eval equivalent of "and" for Tensors."""
+  # TODO(mdan): Enforce cond is scalar here?
+  return control_flow_ops.cond(cond, b, lambda: cond)
+
+
+def _py_lazy_and(cond, b):
+  """Lazy-eval equivalent of "and" in Python."""
+  return cond and b()
+
+
+def or_(a, b):
+  """Functional form of "or". Uses lazy evaluation semantics."""
+  a_val = a()
+  if tensor_util.is_tensor(a_val):
+    return _tf_lazy_or(a_val, b)
+  return _py_lazy_or(a_val, b)
+
+
+def _tf_lazy_or(cond, b):
+  """Lazy-eval equivalent of "or" for Tensors."""
+  # TODO(mdan): Enforce cond is scalar here?
+  return control_flow_ops.cond(cond, lambda: cond, b)
+
+
+def _py_lazy_or(cond, b):
+  """Lazy-eval equivalent of "or" in Python."""
+  return cond or b()
+
+
+def eq(a, b):
+  """Functional form of "equal"."""
+  if tensor_util.is_tensor(a) or tensor_util.is_tensor(b):
+    return _tf_equal(a, b)
+  return _py_equal(a, b)
+
+
+def _tf_equal(a, b):
+  """Overload of "equal" for Tensors."""
+  return gen_math_ops.equal(a, b)
+
+
+def _py_equal(a, b):
+  """Overload of "equal" that falls back to Python's default implementation."""
+  return a == b
+
+
+def not_eq(a, b):
+  """Functional form of "not-equal"."""
+  return not_(eq(a, b))
+
+
+# Default implementation for the remainings.
+
+
+def gt(a, b):
+  """Functional form of "less-than"."""
+  return a > b
+
+
+def gt_e(a, b):
+  """Functional form of "less-than"."""
+  return a >= b
+
+
+def is_(a, b):
+  """Functional form of "less-than"."""
+  return a is b
+
+
+def is_not(a, b):
+  """Functional form of "less-than"."""
+  return a is not b
+
+
+def in_(a, b):
+  """Functional form of "less-than"."""
+  # TODO(mdan): in and not_in should probably be convertible for some types.
+  return a in b
+
+
+def lt(a, b):
+  """Functional form of "less-than"."""
+  return a < b
+
+
+def lt_e(a, b):
+  """Functional form of "less-than"."""
+  return a <= b
+
+
+def not_in(a, b):
+  """Functional form of "less-than"."""
+  return a not in b
+
+
+def u_sub(a):
+  """Functional form of "unary-sub"."""
+  return -a
diff --git a/tensorflow/python/autograph/operators/logical_test.py b/tensorflow/python/autograph/operators/logical_test.py
new file mode 100644
index 0000000000..d6649f7b2b
--- /dev/null
+++ b/tensorflow/python/autograph/operators/logical_test.py
@@ -0,0 +1,85 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for logical module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.operators import logical
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+
+
+class LogicalOperatorsTest(test.TestCase):
+
+  def assertNotCalled(self):
+    self.fail('this should not be called')
+
+  def _tf_true(self):
+    return constant_op.constant(True)
+
+  def _tf_false(self):
+    return constant_op.constant(False)
+
+  def test_and_python(self):
+    self.assertTrue(logical.and_(lambda: True, lambda: True))
+    self.assertTrue(logical.and_(lambda: [1], lambda: True))
+    self.assertListEqual(logical.and_(lambda: True, lambda: [1]), [1])
+
+    self.assertFalse(logical.and_(lambda: False, lambda: True))
+    self.assertFalse(logical.and_(lambda: False, self.assertNotCalled))
+
+  def test_and_tf(self):
+    with self.cached_session() as sess:
+      t = logical.and_(self._tf_true, self._tf_true)
+      self.assertEqual(sess.run(t), True)
+      t = logical.and_(self._tf_true, lambda: True)
+      self.assertEqual(sess.run(t), True)
+      t = logical.and_(self._tf_false, lambda: True)
+      self.assertEqual(sess.run(t), False)
+      # TODO(mdan): Add a test for ops with side effects.
+
+  def test_or_python(self):
+    self.assertFalse(logical.or_(lambda: False, lambda: False))
+    self.assertFalse(logical.or_(lambda: [], lambda: False))
+    self.assertListEqual(logical.or_(lambda: False, lambda: [1]), [1])
+
+    self.assertTrue(logical.or_(lambda: False, lambda: True))
+    self.assertTrue(logical.or_(lambda: True, self.assertNotCalled))
+
+  def test_or_tf(self):
+    with self.cached_session() as sess:
+      t = logical.or_(self._tf_false, self._tf_true)
+      self.assertEqual(sess.run(t), True)
+      t = logical.or_(self._tf_false, lambda: True)
+      self.assertEqual(sess.run(t), True)
+      t = logical.or_(self._tf_true, lambda: True)
+      self.assertEqual(sess.run(t), True)
+      # TODO(mdan): Add a test for ops with side effects.
+
+  def test_not_python(self):
+    self.assertFalse(logical.not_(True))
+    self.assertFalse(logical.not_([1]))
+    self.assertTrue(logical.not_([]))
+
+  def test_not_tf(self):
+    with self.cached_session() as sess:
+      t = logical.not_(self._tf_false())
+      self.assertEqual(sess.run(t), True)
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 2502702e23e448dcef0fdd13d7e3049f6b5c9726 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 10:41:52 -0700
Subject: [PATCH 0783/1085] repo.bzl: Increase timeout of repo_ctx.execute

patch command can exceed the 10s timeout limit on some slow machines.

Fixes #22761

PiperOrigin-RevId: 216721485
---
 third_party/repo.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index 6e30618d39..6fe80c1694 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -47,7 +47,7 @@ def _use_system_lib(ctx, name):
 # Executes specified command with arguments and calls 'fail' if it exited with
 # non-zero code
 def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
-    result = repo_ctx.execute(cmd_and_args, timeout = 10)
+    result = repo_ctx.execute(cmd_and_args, timeout = 60)
     if result.return_code != 0:
         fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n" +
               "Stderr: {3}").format(
-- 
GitLab


From 2e1f603c3d55edac6a646a78a1117808bddabd2f Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 11 Oct 2018 10:59:27 -0700
Subject: [PATCH 0784/1085] Internal change.

PiperOrigin-RevId: 216725062
---
 tensorflow/compiler/tf2xla/BUILD              |  1 +
 .../tf2xla/functionalize_control_flow.cc      |  8 +-
 tensorflow/compiler/tf2xla/tf2xla_util.cc     | 32 +++++++-
 tensorflow/compiler/tf2xla/tf2xla_util.h      | 28 ++++++-
 .../compiler/tf2xla/tf2xla_util_test.cc       | 74 +++++++++++++++++++
 5 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 3f631f91ec..07210dcf37 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -308,6 +308,7 @@ tf_cc_test(
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
         "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:math_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
index 0362682bd6..f818d80022 100644
--- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc
@@ -116,7 +116,7 @@ Status FunctionalizeControlFlowForFunction(
   std::vector<std::pair<Node*, std::vector<AssociatedFunctionInfo>>>
       nodes_to_associated_functions;
   for (auto* n : g->nodes()) {
-    auto associated_functions = GetAssociatedFunctions(*n, flr);
+    auto associated_functions = GetAssociatedFunctions(*n, fld);
     if (!associated_functions.empty()) {
       nodes_to_associated_functions.push_back({n, associated_functions});
     }
@@ -226,9 +226,13 @@ Status FunctionalizeControlFlowPass::Run(
       pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice);
 
   // Find XLA compile ops and its corresponding FunctionDef.
+  // TPUCompile op is not in the map because graph rewriting might happen
+  // multiple times, and we want to avoid functionalize it again.
   static std::map<string, string>* kNodeTypeToFunctionAttrMapping =
       new std::map<string, string>{
-          {"TPUCompile", "function"},
+          // TPUReplicate ops are generated by EncapsulateTPUComputationsPass.
+          {"TPUReplicate", "computation"},
+          // XlaLaunch ops are generated by EncapsulateXlaComputationsPass.
           {"XlaLaunch", "function"},
       };
   std::map<string, absl::optional<string>> canonicalized_name_to_new_name;
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index 01dd3ba10f..cc83db0562 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -330,8 +330,8 @@ uint32 GetXLARandomSeed() {
 
 // TODO(b/77601805): add tests for associated function related stuff.
 bool HasAssociatedFunction(const NodeDef& node_def,
-                           FunctionLibraryRuntime* flr) {
-  if (flr->GetFunctionLibraryDefinition()->Contains(node_def.op())) {
+                           const FunctionLibraryDefinition* fld) {
+  if (fld->Contains(node_def.op())) {
     return true;
   }
 
@@ -351,10 +351,10 @@ bool HasAssociatedFunction(const NodeDef& node_def,
 }
 
 std::vector<AssociatedFunctionInfo> GetAssociatedFunctions(
-    const Node& node, FunctionLibraryRuntime* flr) {
+    const Node& node, const FunctionLibraryDefinition* fld) {
   std::vector<AssociatedFunctionInfo> results;
   const string& op = node.type_string();
-  if (flr->GetFunctionLibraryDefinition()->Contains(op)) {
+  if (fld->Contains(op)) {
     // This is a function call node.
     AttrValueMap attrs(node.attrs().begin(), node.attrs().end());
     results.emplace_back(AssociatedFunctionInfo::FunctionCall(op, attrs));
@@ -441,4 +441,28 @@ Status RewriteAssociatedFunction(
   return Status::OK();
 }
 
+Status CachedFunctionHandles::GetOrInstantiate(
+    const string& func_name, AttrSlice attrs,
+    FunctionLibraryRuntime::Handle* handle) {
+  string canonicalized_name = Canonicalize(func_name, attrs);
+  auto iter = handles_.find(canonicalized_name);
+  if (iter != handles_.end()) {
+    *handle = iter->second;
+    return Status::OK();
+  }
+
+  TF_RETURN_IF_ERROR(flr_->Instantiate(func_name, attrs, handle));
+  handles_[canonicalized_name] = *handle;
+  return Status::OK();
+}
+
+Status CachedFunctionHandles::ReleaseAllHandles() {
+  Status result;
+  for (auto iter : handles_) {
+    result.Update(flr_->ReleaseHandle(iter.second));
+  }
+  handles_.clear();
+  return result;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.h b/tensorflow/compiler/tf2xla/tf2xla_util.h
index 53eab8b63e..b974b99822 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.h
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.h
@@ -120,7 +120,7 @@ class AssociatedFunctionInfo {
 
 // Returns if the NodeDef has associated function.
 bool HasAssociatedFunction(const NodeDef& node_def,
-                           FunctionLibraryRuntime* flr);
+                           const FunctionLibraryDefinition* fld);
 
 // Gets functions associated with the node. Current cases:
 // 1. For function call node, its function name;
@@ -128,7 +128,7 @@ bool HasAssociatedFunction(const NodeDef& node_def,
 //    and returned attrs will be this node's attributes;
 // 3. For nodes like XlaWhile/XlaIf, all their function attributes.
 std::vector<AssociatedFunctionInfo> GetAssociatedFunctions(
-    const Node& node, FunctionLibraryRuntime* flr);
+    const Node& node, const FunctionLibraryDefinition* fld);
 
 // Changes associated functions for the node. Current cases:
 // 1. For function call node, creates a new node with the new function name and
@@ -144,6 +144,30 @@ Status RewriteAssociatedFunction(
 // Attribute to mark nodes to be executed on host.
 extern const char kXlaOutsideCompilationAttrName[];
 
+// Class to act as cache for FunctionLibraryRuntime::Handle objects.
+class CachedFunctionHandles {
+ public:
+  CachedFunctionHandles(FunctionLibraryRuntime* flr) : flr_(flr) {}
+
+  // Populates `handle` for requested function and attributes. If we have
+  // instantiated the function with the same attributes before, `handle` will be
+  // cached handle; otherwise instantiate the function and populate `handle`.
+  Status GetOrInstantiate(const string& func_name, AttrSlice attrs,
+                          FunctionLibraryRuntime::Handle* handle);
+
+  // Releases all handles in the cache. Returns first non-OK status if any;
+  // returns OK otherwise.
+  Status ReleaseAllHandles();
+
+  ~CachedFunctionHandles() { ReleaseAllHandles().IgnoreError(); }
+
+ private:
+  FunctionLibraryRuntime* flr_;
+  std::map<string, FunctionLibraryRuntime::Handle> handles_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(CachedFunctionHandles);
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_TF2XLA_UTIL_H_
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
index 68441b3d47..202e929315 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc
@@ -23,11 +23,15 @@ limitations under the License.
 #include "tensorflow/cc/ops/function_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/compiler/tf2xla/sharding_util.h"
+#include "tensorflow/core/common_runtime/graph_optimizer.h"
+#include "tensorflow/core/common_runtime/process_function_library_runtime.h"
+#include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/version.h"
 
 namespace tensorflow {
 namespace {
@@ -255,5 +259,75 @@ TEST(SetNodeShardingFromNeighbors, Basic) {
   EXPECT_EQ(1, parse_status.ValueOrDie().value().tile_assignment_devices(0));
 }
 
+REGISTER_OP("One")
+    .Output("y: T")
+    .Attr("T: {float, double, int32, int64}")
+    .Doc(R"doc(
+Returns a tensor with a single element (1) of type T.
+
+y: A scalar in type T.
+
+)doc");
+
+// Tests that CachedFunctionHandles class works.
+TEST(CachedFunctionHandles, Basic) {
+  FunctionDef func = FunctionDefHelper::Define(
+      // Name
+      "TestFunc",
+      // Args
+      {},
+      // Return values
+      {"y:T"},
+      // Attr def
+      {"T:{float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"y"}, "One", {}, {{"T", "$T"}}},
+      });
+  FunctionDefLibrary proto;
+  *proto.add_function() = func;
+  FunctionLibraryDefinition fld(OpRegistry::Global(), proto);
+  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
+      new ProcessFunctionLibraryRuntime(
+          /*device_mgr=*/nullptr, Env::Default(), TF_GRAPH_DEF_VERSION, &fld,
+          OptimizerOptions()));
+  FunctionLibraryRuntime* flr =
+      pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice);
+
+  CachedFunctionHandles cached_function_handles(flr);
+
+  // Tests that GetOrInstantiate() works.
+  FunctionLibraryRuntime::Handle first_handle;
+  AttrValue attr;
+  attr.set_type(DT_FLOAT);
+  AttrValueMap attrs;
+  attrs["T"] = attr;
+  TF_ASSERT_OK(cached_function_handles.GetOrInstantiate(
+      "TestFunc", AttrSlice(&attrs), &first_handle));
+
+  // Tests that we can get FunctionBody.
+  const FunctionBody* body = flr->GetFunctionBody(first_handle);
+  EXPECT_NE(body, nullptr);
+
+  // Tests that GetOrInstantiate() returns cached handle when called with same
+  // function name and attributes.
+  FunctionLibraryRuntime::Handle second_handle;
+  TF_ASSERT_OK(cached_function_handles.GetOrInstantiate(
+      "TestFunc", AttrSlice(&attrs), &second_handle));
+  EXPECT_EQ(first_handle, second_handle);
+
+  // Tests that GetOrInstantiate() returns new handle when called with same
+  // function name but different attributes.
+  attr.set_type(DT_INT32);
+  attrs["T"] = attr;
+  FunctionLibraryRuntime::Handle third_handle;
+  TF_ASSERT_OK(cached_function_handles.GetOrInstantiate(
+      "TestFunc", AttrSlice(&attrs), &third_handle));
+  EXPECT_NE(first_handle, third_handle);
+
+  // Tests that ReleaseAllHandles() works.
+  TF_EXPECT_OK(cached_function_handles.ReleaseAllHandles());
+}
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From 53181bd7d2e3e4ab6e9789f6e3f4f70b3df9571d Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 11 Oct 2018 11:06:30 -0700
Subject: [PATCH 0785/1085] Make sure that all operands and outputs of Sort
 have the same layout.

Also fix the DotLayout test, it would pass even when commenting out the dot specific logic in GpuLayoutAssignment.

PiperOrigin-RevId: 216726796
---
 .../xla/service/gpu/gpu_layout_assignment.cc  | 26 ++++++++++++++++
 .../service/gpu/gpu_layout_assignment_test.cc | 31 ++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 8c9a8adc61..1c0a23fa3e 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -208,6 +208,32 @@ Status GpuLayoutAssignment::AddBackendConstraints(
           constraints->SetOperandLayout(op1_shape, instruction, 1));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(output_shape, instruction));
+    } else if (instruction->opcode() == HloOpcode::kSort &&
+               ShapeUtil::Rank(instruction->operand(0)->shape()) > 1) {
+      // Make sure that all the operands and the output(s) have the same layout.
+      Shape keys_shape = instruction->operand(0)->shape();
+      Layout keys_layout =
+          LayoutUtil::GetDefaultLayoutForRank(ShapeUtil::Rank(keys_shape));
+      for (int64 i = 0; i < instruction->operand_count(); ++i) {
+        Shape shape = instruction->operand(i)->shape();
+        *shape.mutable_layout() = keys_layout;
+        TF_RETURN_IF_ERROR(
+            constraints->SetOperandLayout(shape, instruction, i));
+        const LogicalBuffer* output_buffer;
+        if (ShapeUtil::IsArray(instruction->shape())) {
+          TF_ASSIGN_OR_RETURN(
+              output_buffer,
+              constraints->points_to_analysis().GetBufferDefinedAt(instruction,
+                                                                   {}));
+        } else {
+          TF_ASSIGN_OR_RETURN(
+              output_buffer,
+              constraints->points_to_analysis().GetBufferDefinedAt(instruction,
+                                                                   {i}));
+        }
+        TF_RETURN_IF_ERROR(
+            constraints->SetBufferLayout(keys_layout, *output_buffer));
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index 04681cfcec..4822b820f4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -351,7 +351,8 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
                           ParseHloString(hlo_text));
 
   ComputationLayout computation_layout(
-      module->entry_computation()->ComputeProgramShape());
+      module->entry_computation()->ComputeProgramShape(),
+      /*ignore_layouts=*/false);
   GpuLayoutAssignment layout_assignment(
       &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
       backend().default_stream_executor());
@@ -364,6 +365,34 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
                       op::ShapeWithLayout(expected_shape)));
 }
 
+TEST_F(LayoutAssignmentTest, SortLayout) {
+  const char* hlo_text = R"(
+  HloModule SortLayout
+  ENTRY sort {
+    keys = f32[3,2]{0,1} constant(f32[3,2]{0,1}{{0,1},{0,1},{0,1}})
+    values = f32[2,3]{1,0} parameter(0)
+    transpose = f32[3,2]{1,0} transpose(values), dimensions={1,0}
+    ROOT sort = (f32[3,2]{1,0}, f32[3,2]{1,0}) sort(keys, transpose),
+      dimensions={1}
+  })";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(hlo_text));
+
+  ComputationLayout computation_layout(
+      module->entry_computation()->ComputeProgramShape(),
+      /*ignore_layouts=*/false);
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
+  EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+  Shape expected_shape = ShapeUtil::MakeShapeWithLayout(F32, {3, 2}, {1, 0});
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Sort(op::ShapeWithLayout(expected_shape),
+                       op::ShapeWithLayout(expected_shape)));
+}
+
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
-- 
GitLab


From 7f8a7f2f64ee31b233df325ef7444560a45559cc Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 11 Oct 2018 11:10:38 -0700
Subject: [PATCH 0786/1085] Modify converted_call to accept a separate argument
 when the callable is an attribute, representing the attribute's owner.

This gives converted_call a chance to override the attribute lookup, and in turn allows working around the limitation `super` that prevents it from resolving dynamic attributes.

PiperOrigin-RevId: 216727687
---
 .../python/autograph/converters/call_trees.py | 23 +++++++-
 .../autograph/converters/call_trees_test.py   | 25 +++++++++
 tensorflow/python/autograph/impl/api.py       | 36 +++++++++---
 tensorflow/python/autograph/impl/api_test.py  | 27 ++++++---
 .../python/autograph/impl/conversion.py       |  9 +++
 .../python/autograph/pyct/inspect_utils.py    | 56 +++++++++++++++++++
 .../autograph/pyct/inspect_utils_test.py      | 32 +++++++++++
 7 files changed, 190 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index a7926266d5..09072833d2 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -135,7 +135,19 @@ class CallTreeTransformer(converter.Base):
     # The decorators themselves are not to be converted.
     # If present, the decorators should appear as static functions.
     target_entity = self._try_resolve_target(node.func)
+
     if target_entity is not None:
+
+      # This may be reached when "calling" a callable attribute of an object.
+      # For example:
+      #
+      #   self.fc = tf.keras.layers.Dense()
+      #   self.fc()
+      #
+      for mod in self.ctx.program.uncompiled_modules:
+        if target_entity.__module__.startswith(mod[0] + '.'):
+          return False
+
       # This attribute is set by the decorator itself.
       # TODO(mdan): This may not play nicely with other wrapping decorators.
       if hasattr(target_entity, '__pyct_is_compile_decorator'):
@@ -238,11 +250,18 @@ class CallTreeTransformer(converter.Base):
     # Before we could convert all the time though, we'd need a reasonable
     # caching mechanism.
     template = """
-      ag__.converted_call(func, options, args)
+      ag__.converted_call(func, owner, options, args)
     """
+    if isinstance(node.func, gast.Attribute):
+      func = gast.Str(node.func.attr)
+      owner = node.func.value
+    else:
+      func = node.func
+      owner = parser.parse_expression('None')
     call_expr = templates.replace(
         template,
-        func=node.func,
+        func=func,
+        owner=owner,
         options=self.ctx.program.options.to_ast(self.ctx.info.namespace),
         args=node.args)
     new_call = call_expr[0].value
diff --git a/tensorflow/python/autograph/converters/call_trees_test.py b/tensorflow/python/autograph/converters/call_trees_test.py
index 0e50f42c6a..dbc82a674a 100644
--- a/tensorflow/python/autograph/converters/call_trees_test.py
+++ b/tensorflow/python/autograph/converters/call_trees_test.py
@@ -133,6 +133,31 @@ class CallTreesTest(converter_testing.TestCase):
         result_tensor = result.test_fn(constant_op.constant(1))
         self.assertEquals(sess.run(result_tensor), 3)
 
+  def test_decorated_callee(self):
+
+    # Using this trick to prevent the Python loader from automatically expanding
+    # the decorator. This simulates the situation found when converting a
+    # function from within an actual decorator.
+
+    def wrapper_fn():
+
+      def dec(f):
+        return f
+
+      @dec
+      def called_fn(a):
+        return a
+
+      @dec
+      def test_fn(a):
+        return called_fn(a)
+
+      return test_fn
+
+    node, ctx = self.prepare(wrapper_fn, {})
+    node = node.body[2]
+    node = call_trees.transform(node, ctx)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index 3c31762cab..b3f056965c 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -28,7 +28,6 @@ from tensorflow.python.autograph.operators import py_builtins
 from tensorflow.python.autograph.pyct import compiler
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.utils import py_func
-from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 
@@ -62,7 +61,7 @@ def convert(recursive=False, verbose=False):
     @functools.wraps(f)
     def wrapper(*args, **kwargs):
       return converted_call(
-          f,
+          f, None,
           converter.ConversionOptions(
               recursive=recursive,
               verbose=verbose,
@@ -141,8 +140,22 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
 
 
 # TODO(mdan): Move to a private, undocumented module.
-def converted_call(f, options, *args, **kwargs):
+def converted_call(f, owner, options, *args, **kwargs):
   """Compiles a function call inline. For internal use only."""
+  if owner is not None:
+    if not isinstance(f, str):
+      raise ValueError(
+          'When owner is specified, the function name must be specified as'
+          ' a string: {}'.format(f))
+
+    # Special case when the owner is a 'super' object. In that case lookups of
+    # dynamic attributes won't work. See
+    # inspect_utils.SuperWrapperForDynamicAttrs.
+    if isinstance(owner, super):
+      owner = inspect_utils.SuperWrapperForDynamicAttrs(owner)
+
+    f = getattr(owner, f)
+
   # TODO(mdan): This needs cleanup.
   # In particular, we may want to avoid renaming functions altogether.
   if not options.force_conversion and conversion.is_whitelisted_for_graph(f):
@@ -157,12 +170,24 @@ def converted_call(f, options, *args, **kwargs):
     # Regular functions
     target_entity = f
     arg_map_target = f
-    effective_args = args
     f_class = inspect_utils.getmethodclass(f)
 
     if f_class is not None:
+      # If this is a method call, it may or may not include self.
+      #
+      # Example when self is included:
+      #   converted_call(to_graph(foo.bar), foo)
+      #
+      # Example when self is not included:
+      #   super(...).foo(args)
+      #
+      if owner is not None and (not args or args[0] is not owner):
+        effective_args = (owner,) + args
+      else:
+        effective_args = args
       partial_types = (f_class,)
     else:
+      effective_args = args
       partial_types = ()
 
   elif tf_inspect.isclass(f):
@@ -298,9 +323,6 @@ def to_graph(e,
   setattr(compiled, source_map_attribute_name,
           compiled_module.__dict__['ag_source_map__'])
 
-  if verbose:
-    logging.info('Compiled output of %s:\n\n%s\n', e, compiled_src)
-
   return compiled
 
 
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index 8567c66bf1..86b5ce64cf 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -174,7 +174,7 @@ class ApiTest(test.TestCase):
       @api.convert(recursive=True)
       def test_method(self, x, s, a):
         while tf.reduce_sum(x) > s:
-          x //= api.converted_call(self.called_member,
+          x //= api.converted_call(self.called_member, None,
                                    converter.ConversionOptions(), self, a)
         return x
 
@@ -186,7 +186,7 @@ class ApiTest(test.TestCase):
       self.assertListEqual([0, 1], sess.run(x).tolist())
 
   def test_converted_call_builtin(self):
-    x = api.converted_call(range, converter.ConversionOptions(), 3)
+    x = api.converted_call(range, None, converter.ConversionOptions(), 3)
     self.assertEqual((0, 1, 2), tuple(x))
 
   def test_converted_call_function(self):
@@ -197,10 +197,18 @@ class ApiTest(test.TestCase):
       return x
 
     with self.cached_session() as sess:
-      x = api.converted_call(test_fn, converter.ConversionOptions(),
+      x = api.converted_call(test_fn, None, converter.ConversionOptions(),
                              constant_op.constant(-1))
       self.assertEqual(1, sess.run(x))
 
+  def test_converted_call_method_explicit_owner(self):
+    # TODO(mdan): Implement.
+    pass
+
+  def test_converted_call_method_explicit_super_owner(self):
+    # TODO(mdan): Implement.
+    pass
+
   def test_converted_call_method(self):
 
     class TestClass(object):
@@ -215,7 +223,8 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc.test_method, converter.ConversionOptions(), tc)
+      x = api.converted_call(tc.test_method, None,
+                             converter.ConversionOptions(), tc)
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_method_by_class(self):
@@ -232,7 +241,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(TestClass.test_method,
+      x = api.converted_call(TestClass.test_method, None,
                              converter.ConversionOptions(), tc)
       self.assertEqual(1, sess.run(x))
 
@@ -250,7 +259,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
-      x = api.converted_call(tc, converter.ConversionOptions())
+      x = api.converted_call(tc, None, converter.ConversionOptions())
       self.assertEqual(1, sess.run(x))
 
   def test_converted_call_constructor(self):
@@ -266,7 +275,7 @@ class ApiTest(test.TestCase):
         return self.x
 
     with self.cached_session() as sess:
-      tc = api.converted_call(TestClass, converter.ConversionOptions(),
+      tc = api.converted_call(TestClass, None, converter.ConversionOptions(),
                               constant_op.constant(-1))
       # tc is now a converted object.
       x = tc.test_method()
@@ -278,12 +287,12 @@ class ApiTest(test.TestCase):
       return x == 0
 
     with self.cached_session() as sess:
-      x = api.converted_call(f, converter.ConversionOptions(),
+      x = api.converted_call(f, None, converter.ConversionOptions(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
       converted_f = api.to_graph(f)
-      x = api.converted_call(converted_f, converter.ConversionOptions(),
+      x = api.converted_call(converted_f, None, converter.ConversionOptions(),
                              constant_op.constant(0))
       self.assertTrue(sess.run(x))
 
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 0374406ff2..3490f6b006 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -45,12 +45,14 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import errors
 from tensorflow.python.autograph.core import function_wrapping
 from tensorflow.python.autograph.pyct import ast_util
+from tensorflow.python.autograph.pyct import compiler
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import origin_info
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.pyct import transformer
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_inspect
 
 
@@ -105,6 +107,9 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types):
   Raises:
     ValueError: if the entity type is not supported.
   """
+  if program_ctx.options.verbose:
+    logging.info('Converting {}'.format(o))
+
   if tf_inspect.isclass(o):
     node, name, ns = class_to_graph(o, program_ctx)
   elif tf_inspect.isfunction(o):
@@ -145,6 +150,10 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types):
 
   program_ctx.add_to_cache(o, node)
 
+  if program_ctx.options.verbose:
+    logging.info('Compiled output of {}:\n\n{}\n'.format(
+        o, compiler.ast_to_source(node)))
+
   if program_ctx.options.recursive:
     while True:
       candidate = None
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 1fc3c6006d..a09d481003 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -200,3 +200,59 @@ def getmethodclass(m):
     raise ValueError('Found too many owners of %s: %s' % (m, owners))
 
   return None
+
+
+class SuperWrapperForDynamicAttrs(object):
+  """A wrapper that supports dynamic attribute lookup on the super object.
+
+  For example, in the following code, `super` incorrectly reports that
+  `super(Bar, b)` lacks the `a` attribute:
+
+    class Foo(object):
+      def __init__(self):
+        self.a = lambda: 1
+
+      def bar(self):
+        return hasattr(self, 'a')
+
+    class Bar(Foo):
+      def bar(self):
+        return super(Bar, self).bar()
+
+
+    b = Bar()
+    print(hasattr(super(Bar, b), 'a'))  # False
+    print(super(Bar, b).bar())          # True
+
+  A practical situation when this tends to happen is Keras model hierarchies
+  that hold references to certain layers, like this:
+
+    class MiniModel(keras.Model):
+
+      def __init__(self):
+        super(MiniModel, self).__init__()
+        self.fc = keras.layers.Dense(1)
+
+      def call(self, inputs, training=True):
+        return self.fc(inputs)
+
+    class DefunnedMiniModel(MiniModel):
+
+      def call(self, inputs, training=True):
+        return super(DefunnedMiniModel, self).call(inputs, training=training)
+
+  A side effect of this wrapper is that all attributes become visible, even
+  those created in the subclass.
+  """
+
+  # TODO(mdan): Investigate why that happens - it may be for a reason.
+  # TODO(mdan): Probably need more overrides to make it look like super.
+
+  def __init__(self, target):
+    self._target = target
+
+  def __getattribute__(self, name):
+    target = object.__getattribute__(self, '_target')
+    if hasattr(target, name):
+      return getattr(target, name)
+    return getattr(target.__self__, name)
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index 11074debfc..d4f1fe8410 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -291,6 +291,38 @@ class InspectUtilsTest(test.TestCase):
     self.assertTrue(inspect_utils.isbuiltin(len))
     self.assertFalse(inspect_utils.isbuiltin(function_decorator))
 
+  def test_super_wrapper_for_dynamic_attrs(self):
+
+    a = object()
+    b = object()
+
+    class Base(object):
+
+      def __init__(self):
+        self.a = a
+
+    class Subclass(Base):
+
+      def __init__(self):
+        super(Subclass, self).__init__()
+        self.b = b
+
+    base = Base()
+    sub = Subclass()
+
+    sub_super = super(Subclass, sub)
+    sub_super_wrapped = inspect_utils.SuperWrapperForDynamicAttrs(sub_super)
+
+    self.assertIs(base.a, a)
+    self.assertIs(sub.a, a)
+
+    self.assertFalse(hasattr(sub_super, 'a'))
+    self.assertIs(sub_super_wrapped.a, a)
+
+    # TODO(mdan): Is this side effect harmful? Can it be avoided?
+    # Note that `b` was set in `Subclass.__init__`.
+    self.assertIs(sub_super_wrapped.b, b)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 4c5a81090726fccc80a8a6947b156b94cb368897 Mon Sep 17 00:00:00 2001
From: Asim Shankar <ashankar@google.com>
Date: Thu, 11 Oct 2018 11:16:18 -0700
Subject: [PATCH 0787/1085] [Java]: Release 1.12.0-rc0

PiperOrigin-RevId: 216728781
---
 tensorflow/java/maven/libtensorflow/pom.xml    |  2 +-
 .../java/maven/libtensorflow_jni/pom.xml       |  2 +-
 .../java/maven/libtensorflow_jni_gpu/pom.xml   |  2 +-
 tensorflow/java/maven/pom.xml                  |  2 +-
 tensorflow/java/maven/proto/pom.xml            |  2 +-
 .../maven/spark-tensorflow-connector/pom.xml   | 18 ++++++++++++++++--
 .../java/maven/tensorflow-hadoop/pom.xml       |  2 +-
 tensorflow/java/maven/tensorflow/pom.xml       |  2 +-
 8 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml
index 6b3e305e5d..041e4778cd 100644
--- a/tensorflow/java/maven/libtensorflow/pom.xml
+++ b/tensorflow/java/maven/libtensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml
index f130515934..b4ccf2d77c 100644
--- a/tensorflow/java/maven/libtensorflow_jni/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni</artifactId>
diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
index 67ecc2d597..09adfb2b57 100644
--- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
+++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>libtensorflow_jni_gpu</artifactId>
diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml
index 8ba859da01..d7fe50ce26 100644
--- a/tensorflow/java/maven/pom.xml
+++ b/tensorflow/java/maven/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>org.tensorflow</groupId>
   <artifactId>parentpom</artifactId>
-  <version>1.11.0</version>
+  <version>1.12.0-rc0</version>
   <packaging>pom</packaging>
 
   <url>https://www.tensorflow.org</url>
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index dcd654d713..8bbe834eba 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>proto</artifactId>
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index 45214f834c..b31510f637 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>spark-tensorflow-connector_2.11</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <name>spark-tensorflow-connector</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord connector for Apache Spark DataFrames</description>
@@ -33,7 +33,7 @@
         <scala.test.version>2.2.6</scala.test.version>
         <maven.compiler.version>3.0</maven.compiler.version>
         <java.version>1.8</java.version>
-        <spark.version>2.3.0</spark.version>
+        <spark.version>2.3.1</spark.version>
         <yarn.api.version>2.7.3</yarn.api.version>
         <junit.version>4.11</junit.version>
     </properties>
@@ -213,6 +213,20 @@
         </plugins>
     </build>
 
+    <repositories>
+        <repository>
+            <id>apache.snapshots</id>
+            <name>Apache Development Snapshot Repository</name>
+            <url>https://repository.apache.org/content/repositories/snapshots/</url>
+            <releases>
+                <enabled>false</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+        </repository>
+    </repositories>
+
     <profiles>
         <profile>
             <id>test</id>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index a8669ee72b..8b551e24f1 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -5,7 +5,7 @@
     <groupId>org.tensorflow</groupId>
     <artifactId>tensorflow-hadoop</artifactId>
     <packaging>jar</packaging>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <name>tensorflow-hadoop</name>
     <url>https://www.tensorflow.org</url>
     <description>TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop</description>
diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml
index 67d628ba11..60fcc98bf5 100644
--- a/tensorflow/java/maven/tensorflow/pom.xml
+++ b/tensorflow/java/maven/tensorflow/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.tensorflow</groupId>
     <artifactId>parentpom</artifactId>
-    <version>1.11.0</version>
+    <version>1.12.0-rc0</version>
     <relativePath>../</relativePath>
   </parent>
   <artifactId>tensorflow</artifactId>
-- 
GitLab


From 20f03388ac28fdf5ad33adb87d95346209ef0052 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 11 Oct 2018 11:32:59 -0700
Subject: [PATCH 0788/1085] Automated rollback of commit
 0d054f20851f6156b1af26c35e68f6083bca8e13

PiperOrigin-RevId: 216732038
---
 tensorflow/BUILD                              |    1 +
 tensorflow/api_template.__init__.py           |   17 +-
 tensorflow/contrib/estimator/BUILD            |  337 +-
 tensorflow/contrib/estimator/__init__.py      |   46 +-
 .../estimator/python/estimator/baseline.py    |   92 +-
 .../python/estimator/baseline_test.py         |  436 ++
 .../python/estimator/boosted_trees.py         |  419 +-
 .../python/estimator/boosted_trees_test.py    |  438 ++
 .../contrib/estimator/python/estimator/dnn.py |  156 +-
 .../python/estimator/dnn_linear_combined.py   |  176 +-
 .../estimator/dnn_linear_combined_test.py     |  227 +
 .../estimator/python/estimator/dnn_test.py    |  171 +
 .../estimator/dnn_with_layer_annotations.py   |  430 +-
 .../dnn_with_layer_annotations_test.py        |  611 +++
 .../python/estimator/early_stopping.py        |  498 +-
 .../python/estimator/early_stopping_test.py   |  246 +
 .../estimator/python/estimator/export.py      |  213 +-
 .../estimator/python/estimator/export_test.py |  373 ++
 .../estimator/python/estimator/exporter.py    |  270 +-
 .../python/estimator/exporter_test.py         |  206 +
 .../estimator/python/estimator/extenders.py   |  349 +-
 .../python/estimator/extenders_test.py        |  426 ++
 .../estimator/python/estimator/head.py        |  969 +++-
 .../estimator/python/estimator/head_test.py   | 1482 ++++++
 .../estimator/python/estimator/hooks.py       |  277 +-
 .../estimator/python/estimator/hooks_test.py  |  403 ++
 .../estimator/python/estimator/linear.py      |  130 +-
 .../estimator/python/estimator/linear_test.py |  156 +
 .../estimator/python/estimator/logit_fns.py   |   86 +-
 .../python/estimator/logit_fns_test.py        |   95 +
 .../estimator/python/estimator/multi_head.py  |  416 +-
 .../python/estimator/multi_head_test.py       |  705 +++
 .../python/estimator/replicate_model_fn.py    |  820 +++-
 .../estimator/replicate_model_fn_test.py      | 1649 +++++++
 .../contrib/estimator/python/estimator/rnn.py |  572 ++-
 .../estimator/python/estimator/rnn_test.py    | 1185 +++++
 .../python/estimator/saved_model_estimator.py |  441 +-
 .../estimator/saved_model_estimator_test.py   |  369 ++
 tensorflow/python/estimator/BUILD             |  424 +-
 tensorflow/python/estimator/__init__.py       |   17 +-
 .../python/estimator/canned/__init__.py       |   32 -
 .../python/estimator/canned/baseline.py       |  366 +-
 .../python/estimator/canned/baseline_test.py  | 1558 +++++++
 .../python/estimator/canned/boosted_trees.py  | 1558 ++++++-
 .../estimator/canned/boosted_trees_test.py    | 2549 +++++++++++
 .../estimator/canned/boosted_trees_utils.py   |   72 +-
 .../canned/boosted_trees_utils_test.py        |  187 +
 tensorflow/python/estimator/canned/dnn.py     |  652 ++-
 .../estimator/canned/dnn_linear_combined.py   |  626 ++-
 .../canned/dnn_linear_combined_test.py        | 1123 +++++
 .../python/estimator/canned/dnn_test.py       |  580 +++
 .../estimator/canned/dnn_testing_utils.py     | 2068 ++++++++-
 tensorflow/python/estimator/canned/head.py    | 1593 ++++++-
 .../python/estimator/canned/head_test.py      | 4056 +++++++++++++++++
 tensorflow/python/estimator/canned/linear.py  |  535 ++-
 .../python/estimator/canned/linear_test.py    |  255 ++
 .../estimator/canned/linear_testing_utils.py  | 2349 +++++++++-
 .../python/estimator/canned/metric_keys.py    |   46 +-
 .../python/estimator/canned/optimizers.py     |   72 +-
 .../estimator/canned/optimizers_test.py       |  103 +
 .../python/estimator/canned/parsing_utils.py  |  296 +-
 .../estimator/canned/parsing_utils_test.py    |  211 +
 .../estimator/canned/prediction_keys.py       |   29 +-
 tensorflow/python/estimator/estimator.py      | 2167 ++++++++-
 tensorflow/python/estimator/estimator_lib.py  |   45 +-
 tensorflow/python/estimator/estimator_test.py | 3280 +++++++++++++
 .../python/estimator/export/__init__.py       |   32 -
 tensorflow/python/estimator/export/export.py  |  628 ++-
 .../python/estimator/export/export_lib.py     |   24 +-
 .../python/estimator/export/export_output.py  |  407 +-
 .../estimator/export/export_output_test.py    |  397 ++
 .../python/estimator/export/export_test.py    |  802 ++++
 tensorflow/python/estimator/exporter.py       |  498 +-
 tensorflow/python/estimator/exporter_test.py  |  400 ++
 tensorflow/python/estimator/gc.py             |  199 +-
 tensorflow/python/estimator/gc_test.py        |  156 +
 .../python/estimator/inputs/__init__.py       |   32 -
 tensorflow/python/estimator/inputs/inputs.py  |   19 +-
 .../python/estimator/inputs/numpy_io.py       |  217 +-
 .../python/estimator/inputs/numpy_io_test.py  |  620 +++
 .../python/estimator/inputs/pandas_io.py      |  147 +-
 .../python/estimator/inputs/pandas_io_test.py |  320 ++
 .../estimator/inputs/queues/__init__.py       |   32 -
 .../inputs/queues/feeding_functions.py        |  507 ++-
 .../inputs/queues/feeding_functions_test.py   |  391 ++
 .../inputs/queues/feeding_queue_runner.py     |  172 +-
 .../queues/feeding_queue_runner_test.py       |  140 +
 tensorflow/python/estimator/keras.py          |  492 +-
 tensorflow/python/estimator/keras_test.py     |  805 ++++
 tensorflow/python/estimator/model_fn.py       |  510 ++-
 tensorflow/python/estimator/model_fn_test.py  |  661 +++
 tensorflow/python/estimator/run_config.py     |  907 +++-
 .../python/estimator/run_config_test.py       | 1235 +++++
 tensorflow/python/estimator/training.py       | 1065 ++++-
 tensorflow/python/estimator/training_test.py  | 2198 +++++++++
 tensorflow/python/estimator/util.py           |  143 +-
 tensorflow/python/estimator/util_test.py      |  102 +
 tensorflow/python/feature_column/BUILD        |    2 +-
 tensorflow/python/tools/api/generator/BUILD   |   18 +
 ...rflow.estimator.-baseline-classifier.pbtxt |    4 +-
 ...orflow.estimator.-baseline-regressor.pbtxt |    4 +-
 .../tensorflow.estimator.-best-exporter.pbtxt |    4 +-
 ....estimator.-boosted-trees-classifier.pbtxt |    6 +-
 ...w.estimator.-boosted-trees-regressor.pbtxt |    6 +-
 ...nsorflow.estimator.-d-n-n-classifier.pbtxt |    4 +-
 ...or.-d-n-n-linear-combined-classifier.pbtxt |    4 +-
 ...tor.-d-n-n-linear-combined-regressor.pbtxt |    4 +-
 ...ensorflow.estimator.-d-n-n-regressor.pbtxt |    4 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |    4 +-
 .../v1/tensorflow.estimator.-estimator.pbtxt  |    2 +-
 .../v1/tensorflow.estimator.-eval-spec.pbtxt  |    4 +-
 .../v1/tensorflow.estimator.-exporter.pbtxt   |    2 +-
 ...tensorflow.estimator.-final-exporter.pbtxt |    4 +-
 ...ensorflow.estimator.-latest-exporter.pbtxt |    4 +-
 ...sorflow.estimator.-linear-classifier.pbtxt |    4 +-
 ...nsorflow.estimator.-linear-regressor.pbtxt |    4 +-
 .../v1/tensorflow.estimator.-mode-keys.pbtxt  |    2 +-
 .../v1/tensorflow.estimator.-run-config.pbtxt |    2 +-
 .../v1/tensorflow.estimator.-train-spec.pbtxt |    4 +-
 ...rflow.estimator.-warm-start-settings.pbtxt |    4 +-
 ...imator.export.-classification-output.pbtxt |    4 +-
 ...flow.estimator.export.-export-output.pbtxt |    2 +-
 ...low.estimator.export.-predict-output.pbtxt |    4 +-
 ....estimator.export.-regression-output.pbtxt |    4 +-
 ...mator.export.-serving-input-receiver.pbtxt |    4 +-
 ...xport.-tensor-serving-input-receiver.pbtxt |    4 +-
 ...rflow.estimator.-baseline-classifier.pbtxt |    4 +-
 ...orflow.estimator.-baseline-regressor.pbtxt |    4 +-
 .../tensorflow.estimator.-best-exporter.pbtxt |    4 +-
 ....estimator.-boosted-trees-classifier.pbtxt |    6 +-
 ...w.estimator.-boosted-trees-regressor.pbtxt |    6 +-
 ...nsorflow.estimator.-d-n-n-classifier.pbtxt |    4 +-
 ...or.-d-n-n-linear-combined-classifier.pbtxt |    4 +-
 ...tor.-d-n-n-linear-combined-regressor.pbtxt |    4 +-
 ...ensorflow.estimator.-d-n-n-regressor.pbtxt |    4 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |    4 +-
 .../v2/tensorflow.estimator.-estimator.pbtxt  |    2 +-
 .../v2/tensorflow.estimator.-eval-spec.pbtxt  |    4 +-
 .../v2/tensorflow.estimator.-exporter.pbtxt   |    2 +-
 ...tensorflow.estimator.-final-exporter.pbtxt |    4 +-
 ...ensorflow.estimator.-latest-exporter.pbtxt |    4 +-
 ...sorflow.estimator.-linear-classifier.pbtxt |    4 +-
 ...nsorflow.estimator.-linear-regressor.pbtxt |    4 +-
 .../v2/tensorflow.estimator.-mode-keys.pbtxt  |    2 +-
 .../v2/tensorflow.estimator.-run-config.pbtxt |    2 +-
 .../v2/tensorflow.estimator.-train-spec.pbtxt |    4 +-
 ...rflow.estimator.-warm-start-settings.pbtxt |    4 +-
 ...imator.export.-classification-output.pbtxt |    4 +-
 ...flow.estimator.export.-export-output.pbtxt |    2 +-
 ...low.estimator.export.-predict-output.pbtxt |    4 +-
 ....estimator.export.-regression-output.pbtxt |    4 +-
 ...mator.export.-serving-input-receiver.pbtxt |    4 +-
 ...xport.-tensor-serving-input-receiver.pbtxt |    4 +-
 tensorflow/tools/pip_package/setup.py         |    1 -
 154 files changed, 56307 insertions(+), 918 deletions(-)
 create mode 100644 tensorflow/contrib/estimator/python/estimator/baseline_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/export_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/exporter_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/extenders_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/head_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/hooks_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/linear_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/multi_head_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/rnn_test.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
 create mode 100644 tensorflow/python/estimator/canned/baseline_test.py
 create mode 100644 tensorflow/python/estimator/canned/boosted_trees_test.py
 create mode 100644 tensorflow/python/estimator/canned/boosted_trees_utils_test.py
 create mode 100644 tensorflow/python/estimator/canned/dnn_linear_combined_test.py
 create mode 100644 tensorflow/python/estimator/canned/dnn_test.py
 create mode 100644 tensorflow/python/estimator/canned/head_test.py
 create mode 100644 tensorflow/python/estimator/canned/linear_test.py
 create mode 100644 tensorflow/python/estimator/canned/optimizers_test.py
 create mode 100644 tensorflow/python/estimator/canned/parsing_utils_test.py
 create mode 100644 tensorflow/python/estimator/estimator_test.py
 create mode 100644 tensorflow/python/estimator/export/export_output_test.py
 create mode 100644 tensorflow/python/estimator/export/export_test.py
 create mode 100644 tensorflow/python/estimator/exporter_test.py
 create mode 100644 tensorflow/python/estimator/gc_test.py
 create mode 100644 tensorflow/python/estimator/inputs/numpy_io_test.py
 create mode 100644 tensorflow/python/estimator/inputs/pandas_io_test.py
 create mode 100644 tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
 create mode 100644 tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
 create mode 100644 tensorflow/python/estimator/keras_test.py
 create mode 100644 tensorflow/python/estimator/model_fn_test.py
 create mode 100644 tensorflow/python/estimator/run_config_test.py
 create mode 100644 tensorflow/python/estimator/training_test.py
 create mode 100644 tensorflow/python/estimator/util_test.py

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 8f4927324b..9b62a50452 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -553,6 +553,7 @@ gen_api_init_files(
 
 py_library(
     name = "tensorflow_py",
+    srcs = ["//tensorflow/python/estimator/api:estimator_python_api_gen"],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 65172fd74a..2de740e145 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -23,11 +23,18 @@ import os as _os
 # pylint: disable=g-bad-import-order
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 
-from tensorflow.python.tools import component_api_helper
-component_api_helper.package_hook(
-    parent_package_str=__name__,
-    child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
-del component_api_helper
+try:
+  # Add `estimator` attribute to allow access to estimator APIs via
+  # "tf.estimator..."
+  from tensorflow.python.estimator.api import estimator  # pylint: disable=g-import-not-at-top
+
+  # Add `estimator` to the __path__ to allow "from tensorflow.estimator..."
+  # style imports.
+  from tensorflow.python.estimator import api as estimator_api  # pylint: disable=g-import-not-at-top
+  __path__ += [_os.path.dirname(estimator_api.__file__)]
+  del estimator_api
+except (ImportError, AttributeError):
+  print('tf.estimator package not installed.')
 
 # API IMPORTS PLACEHOLDER
 
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
index 8b99158b30..1ea00fb7f3 100644
--- a/tensorflow/contrib/estimator/BUILD
+++ b/tensorflow/contrib/estimator/BUILD
@@ -8,7 +8,6 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-# PLACEHOLDER PIP REQUIREMENTS
 
 py_library(
     name = "estimator_py",
@@ -21,7 +20,6 @@ py_library(
         ":dnn_linear_combined",
         ":dnn_with_layer_annotations",
         ":early_stopping",
-        ":expect_tensorflow_estimator_installed",
         ":export",
         ":exporter",
         ":extenders",
@@ -34,7 +32,6 @@ py_library(
         ":rnn",
         ":saved_model_estimator",
         "//tensorflow:tensorflow_py_no_contrib",
-        "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
@@ -43,41 +40,98 @@ py_library(
     srcs = ["python/estimator/baseline.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:baseline",
     ],
 )
 
+py_test(
+    name = "baseline_test",
+    size = "small",
+    srcs = ["python/estimator/baseline_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":baseline",
+        ":head",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:metric_keys",
+        "//tensorflow/python/estimator:numpy_io",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "boosted_trees",
     srcs = ["python/estimator/boosted_trees.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:boosted_trees",
     ],
 )
 
+py_test(
+    name = "boosted_trees_test",
+    size = "medium",
+    srcs = ["python/estimator/boosted_trees_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":boosted_trees",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:numpy_io",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_library(
     name = "dnn",
     srcs = ["python/estimator/dnn.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:dnn",
     ],
 )
 
+py_test(
+    name = "dnn_test",
+    size = "medium",
+    srcs = ["python/estimator/dnn_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+        "optonly",  # times out http://b/79220679
+    ],
+    deps = [
+        ":dnn",
+        ":head",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:dnn_testing_utils",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "dnn_with_layer_annotations",
     srcs = ["python/estimator/dnn_with_layer_annotations.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:head",
@@ -86,18 +140,64 @@ py_library(
     ],
 )
 
+py_test(
+    name = "dnn_with_layer_annotations_test",
+    size = "medium",
+    srcs = ["python/estimator/dnn_with_layer_annotations_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",  # b/67510291
+    ],
+    deps = [
+        ":dnn_with_layer_annotations",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:dnn",
+        "//tensorflow/python/estimator:dnn_testing_utils",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:pandas_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "dnn_linear_combined",
     srcs = ["python/estimator/dnn_linear_combined.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:dnn_linear_combined",
     ],
 )
 
+py_test(
+    name = "dnn_linear_combined_test",
+    size = "medium",
+    srcs = ["python/estimator/dnn_linear_combined_test.py"],
+    shard_count = 3,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":dnn_linear_combined",
+        ":head",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:dnn_testing_utils",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:linear_testing_utils",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "extenders",
     srcs = [
@@ -105,7 +205,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:model_fn",
@@ -114,6 +213,23 @@ py_library(
     ],
 )
 
+py_test(
+    name = "extenders_test",
+    size = "medium",
+    srcs = ["python/estimator/extenders_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],  # b/62863147
+    deps = [
+        ":extenders",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/contrib/predictor",
+        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:linear",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_library(
     name = "export",
     srcs = [
@@ -121,7 +237,22 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
+        "//tensorflow/python/estimator:model_fn",
+    ],
+)
+
+py_test(
+    name = "export_test",
+    size = "medium",
+    srcs = ["python/estimator/export_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],  # b/62863147
+    deps = [
+        ":export",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
     ],
 )
@@ -133,12 +264,24 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:exporter",
     ],
 )
 
+py_test(
+    name = "exporter_test",
+    size = "medium",
+    srcs = ["python/estimator/exporter_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":exporter",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:exporter",
+    ],
+)
+
 py_library(
     name = "head",
     srcs = [
@@ -146,7 +289,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:head",
@@ -156,6 +298,22 @@ py_library(
     ],
 )
 
+py_test(
+    name = "head_test",
+    size = "medium",
+    srcs = ["python/estimator/head_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":head",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:metric_keys",
+        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "hooks",
     srcs = [
@@ -163,23 +321,58 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:estimator_py",
     ],
 )
 
+py_test(
+    name = "hooks_test",
+    size = "medium",
+    srcs = ["python/estimator/hooks_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],
+    deps = [
+        ":hooks",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:estimator_py",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "linear",
     srcs = ["python/estimator/linear.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow/python/estimator",
         "//tensorflow/python/estimator:linear",
     ],
 )
 
+py_test(
+    name = "linear_test",
+    size = "medium",
+    srcs = ["python/estimator/linear_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",
+    ],
+    deps = [
+        ":head",
+        ":linear",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:linear_testing_utils",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "logit_fns",
     srcs = [
@@ -187,13 +380,24 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:dnn",
         "//tensorflow/python/estimator:linear",
     ],
 )
 
+py_test(
+    name = "logit_fns_test",
+    size = "small",
+    srcs = ["python/estimator/logit_fns_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":logit_fns",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:model_fn",
+    ],
+)
+
 py_library(
     name = "multi_head",
     srcs = [
@@ -201,7 +405,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:head",
@@ -211,6 +414,23 @@ py_library(
     ],
 )
 
+py_test(
+    name = "multi_head_test",
+    size = "small",
+    srcs = ["python/estimator/multi_head_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":head",
+        ":multi_head",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator:metric_keys",
+        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:prediction_keys",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "replicate_model_fn",
     srcs = [
@@ -218,7 +438,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator:export_output",
         "//tensorflow/python/estimator:model_fn",
@@ -227,12 +446,35 @@ py_library(
     ],
 )
 
+cuda_py_test(
+    name = "replicate_model_fn_test",
+    size = "medium",
+    srcs = ["python/estimator/replicate_model_fn_test.py"],
+    additional_deps = [
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:dnn",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:export_output",
+        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:optimizers",
+        "//tensorflow/python/estimator:prediction_keys",
+        ":replicate_model_fn",
+    ],
+    tags = [
+        "manual",
+        "multi_gpu",
+        "notap",
+    ],
+)
+
 py_library(
     name = "rnn",
     srcs = ["python/estimator/rnn.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":extenders",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/contrib/feature_column:feature_column_py",
@@ -243,22 +485,55 @@ py_library(
     ],
 )
 
+py_test(
+    name = "rnn_test",
+    size = "medium",
+    srcs = ["python/estimator/rnn_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "noasan",  # times out
+        "notsan",
+        "optonly",  # times out http://b/79220679
+    ],
+    deps = [
+        ":head",
+        ":rnn",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/contrib/data",
+        "//tensorflow/python/estimator:numpy_io",
+        "//tensorflow/python/estimator:parsing_utils",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "early_stopping",
     srcs = ["python/estimator/early_stopping.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
     ],
 )
 
+py_test(
+    name = "early_stopping_test",
+    srcs = ["python/estimator/early_stopping_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":early_stopping",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_library(
     name = "saved_model_estimator",
     srcs = ["python/estimator/saved_model_estimator.py"],
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":export",
         "//tensorflow:tensorflow_py_no_contrib",
         "//tensorflow/python/estimator",
@@ -267,9 +542,21 @@ py_library(
     ],
 )
 
-py_library(
-    name = "expect_tensorflow_estimator_installed",
-    # This is a dummy rule used as a dependency in open-source.
-    # We expect tensorflow_estimator to already be installed.
-    visibility = ["//visibility:public"],
+py_test(
+    name = "saved_model_estimator_test",
+    size = "medium",
+    srcs = ["python/estimator/saved_model_estimator_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "notsan",
+    ],
+    deps = [
+        ":export",
+        ":saved_model_estimator",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:export_export",
+        "//tensorflow/python/estimator:export_output",
+        "//tensorflow/python/estimator:model_fn",
+    ],
 )
diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
index fc7d94e4fc..419609b1af 100644
--- a/tensorflow/contrib/estimator/__init__.py
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,37 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""estimator python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Experimental utilities re:tf.estimator.*."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# Importing from tensorflow.python.estimator
-# is unsupported and will soon break!
-
-from tensorflow_estimator.contrib import estimator
-
-# Fixes remove_undocumented not working as intended.
-#
-# Problem is that when the below import happens (for first time,
-# Python only imports things once), Python sets attribute named
-# 'python' to this package. If this first import happens
-# after the call to remove_undocumented, then the 'python'
-# attribute won't be removed.
-import tensorflow.contrib.estimator.python
-
-# Include attrs that start with single underscore.
-estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
+# pylint: disable=unused-import,line-too-long,wildcard-import
+from tensorflow.contrib.estimator.python.estimator.baseline import *
+from tensorflow.contrib.estimator.python.estimator.boosted_trees import *
+from tensorflow.contrib.estimator.python.estimator.dnn import *
+from tensorflow.contrib.estimator.python.estimator.dnn_with_layer_annotations import *
+from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import *
+from tensorflow.contrib.estimator.python.estimator.early_stopping import *
+from tensorflow.contrib.estimator.python.estimator.export import *
+from tensorflow.contrib.estimator.python.estimator.extenders import *
+from tensorflow.contrib.estimator.python.estimator.head import *
+from tensorflow.contrib.estimator.python.estimator.hooks import *
+from tensorflow.contrib.estimator.python.estimator.linear import *
+from tensorflow.contrib.estimator.python.estimator.logit_fns import *
+from tensorflow.contrib.estimator.python.estimator.multi_head import *
+from tensorflow.contrib.estimator.python.estimator.replicate_model_fn import *
+from tensorflow.contrib.estimator.python.estimator.rnn import *
+from tensorflow.contrib.estimator.python.estimator.saved_model_estimator import *
+from tensorflow.python.estimator.export.export import *
 
-from tensorflow_estimator.contrib.estimator import *
 from tensorflow.python.util.all_util import remove_undocumented
+# pylint: enable=unused-import,line-too-long,wildcard-import
 
 _allowed_symbols = [
     'add_metrics',
diff --git a/tensorflow/contrib/estimator/python/estimator/baseline.py b/tensorflow/contrib/estimator/python/estimator/baseline.py
index 5a684befb6..beffbee730 100644
--- a/tensorflow/contrib/estimator/python/estimator/baseline.py
+++ b/tensorflow/contrib/estimator/python/estimator/baseline.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,87 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""baseline python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
+"""Baseline estimators."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import baseline
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import baseline
+
+
+class BaselineEstimator(estimator.Estimator):
+  """An estimator that can establish a simple baseline.
+
+  The estimator uses a user-specified head.
+
+  This estimator ignores feature values and will learn to predict the average
+  value of each label. E.g. for single-label classification problems, this will
+  predict the probability distribution of the classes as seen in the labels.
+  For multi-label classification problems, it will predict the ratio of examples
+  that contain each class.
+
+  Example:
+
+  ```python
+
+  # Build baseline multi-label classifier.
+  estimator = BaselineEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3))
+
+  # Input builders
+  def input_fn_train: # returns x, y (where y represents label's class index).
+    pass
+
+  def input_fn_eval: # returns x, y (where y represents label's class index).
+    pass
+
+  # Fit model.
+  estimator.train(input_fn=input_fn_train)
+
+  # Evaluates cross entropy between the test and train labels.
+  loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
+
+  # For each class, predicts the ratio of training examples that contain the
+  # class.
+  predictions = classifier.predict(new_samples)
+
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+    otherwise there will be a `KeyError`:
+
+  * if `weight_column` passed to the `head` constructor is not `None`, a feature
+    with `key=weight_column` whose value is a `Tensor`.
+  """
 
-# Include attrs that start with single underscore.
-baseline.__all__ = [s for s in dir(baseline) if not s.startswith('__')]
+  def __init__(self,
+               head,
+               model_dir=None,
+               optimizer='Ftrl',
+               config=None):
+    """Initializes a BaselineEstimator instance.
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.baseline import *
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      optimizer: String, `tf.Optimizer` object, or callable that creates the
+        optimizer to use for training. If not specified, will use
+        `FtrlOptimizer` with a default learning rate of 0.3.
+      config: `RunConfig` object to configure the runtime settings.
+    """
+    def _model_fn(features, labels, mode, config):
+      return baseline._baseline_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          optimizer=optimizer,
+          config=config)
+    super(BaselineEstimator, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/baseline_test.py b/tensorflow/contrib/estimator/python/estimator/baseline_test.py
new file mode 100644
index 0000000000..513feb03b6
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/baseline_test.py
@@ -0,0 +1,436 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for baseline.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import baseline
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import saver
+
+# Names of variables created by model.
+BIAS_NAME = 'baseline/bias'
+
+
+def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
+  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
+    expected = ops.convert_to_tensor(expected, name='expected')
+    actual = ops.convert_to_tensor(actual, name='actual')
+    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
+    rtol = ops.convert_to_tensor(rtol, name='rtol')
+    return check_ops.assert_less(
+        rdiff,
+        rtol,
+        data=('Condition expected =~ actual did not hold element-wise:'
+              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
+              'rtol = ', rtol,),
+        name=scope)
+
+
+def save_variables_to_ckpt(model_dir):
+  init_all_op = [variables.global_variables_initializer()]
+  with tf_session.Session() as sess:
+    sess.run(init_all_op)
+    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+
+
+def _baseline_estimator_fn(
+    weight_column=None, label_dimension=1, *args, **kwargs):
+  """Returns a BaselineEstimator that uses regression_head."""
+  return baseline.BaselineEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension,
+          # Tests in core (from which this test inherits) test the sum loss.
+          loss_reduction=losses.Reduction.SUM),
+      *args, **kwargs)
+
+
+class BaselineEstimatorEvaluationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_evaluation_batch(self):
+    """Tests evaluation for batch_size==2."""
+    with ops.Graph().as_default():
+      variables.Variable([13.0], name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir)
+    eval_metrics = baseline_estimator.evaluate(
+        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
+
+    # Logit is bias = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the sum over batch = 9 + 9 = 18
+    # Average loss is the average over batch = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 18.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_weights(self):
+    """Tests evaluation with weights."""
+    with ops.Graph().as_default():
+      variables.Variable([13.0], name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    def _input_fn():
+      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
+      labels = ((10.,), (10.,))
+      return features, labels
+
+    baseline_estimator = _baseline_estimator_fn(
+        weight_column='weights',
+        model_dir=self._model_dir)
+    eval_metrics = baseline_estimator.evaluate(input_fn=_input_fn, steps=1)
+
+    # Logit is bias = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
+    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 27.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_for_multi_dimensions(self):
+    label_dim = 2
+    with ops.Graph().as_default():
+      variables.Variable([46.0, 58.0], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_estimator = _baseline_estimator_fn(
+        label_dimension=label_dim,
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'age': np.array([[2., 4., 5.]]),
+        },
+        y=np.array([[46., 58.]]),
+        batch_size=1,
+        num_epochs=None,
+        shuffle=False)
+    eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1)
+
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is bias which is [46, 58]
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
+
+class BaselineEstimatorPredictTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_1d(self):
+    """Tests predict when all variables are one-dimensional."""
+    with ops.Graph().as_default():
+      variables.Variable([.2], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_estimator = _baseline_estimator_fn(model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[2.]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = baseline_estimator.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x * weight + bias = 2. * 10. + .2 = 20.2
+    self.assertAllClose([[.2]], predicted_scores)
+
+  def testMultiDim(self):
+    """Tests predict when all variables are multi-dimenstional."""
+    batch_size = 2
+    label_dimension = 3
+    with ops.Graph().as_default():
+      variables.Variable(  # shape=[label_dimension]
+          [.2, .4, .6], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_estimator = _baseline_estimator_fn(
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        # x shape=[batch_size, x_dim]
+        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predictions = baseline_estimator.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # score = bias, shape=[batch_size, label_dimension]
+    self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
+                        predicted_scores)
+
+
+class BaselineEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, prediction_length):
+    feature_columns = [
+        feature_column_lib.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = _baseline_estimator_fn(
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    # learn y = x
+    est.train(train_input_fn, steps=200)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array(
+        [x['predictions'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+
+class BaselineEstimatorTrainingTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s:0' % BIAS_NAME
+    ]
+
+    def _minimize(loss, global_step=None, var_list=None):
+      trainable_vars = var_list or ops.get_collection(
+          ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(expected_var_names,
+                            [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+      assert_loss = assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer.Optimizer,
+        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def _assert_checkpoint(self,
+                         label_dimension,
+                         expected_global_step,
+                         expected_bias=None):
+    shapes = {
+        name: shape
+        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(expected_global_step,
+                     checkpoint_utils.load_variable(self._model_dir,
+                                                    ops.GraphKeys.GLOBAL_STEP))
+
+    self.assertEqual([label_dimension], shapes[BIAS_NAME])
+    if expected_bias is not None:
+      self.assertEqual(expected_bias,
+                       checkpoint_utils.load_variable(self._model_dir,
+                                                      BIAS_NAME))
+
+  def testFromScratch(self):
+    # Create BaselineRegressor.
+    label = 5.
+    age = 17
+    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
+    mock_optimizer = self._mock_optimizer(expected_loss=25.)
+    baseline_estimator = _baseline_estimator_fn(
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    baseline_estimator.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        label_dimension=1,
+        expected_global_step=num_steps,
+        expected_bias=[0.])
+
+  def testFromCheckpoint(self):
+    # Create initial checkpoint.
+    bias = 7.0
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable([bias], name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = bias = 6.
+    # loss = (logits - label)^2 = (7 - 5)^2 = 4
+    mock_optimizer = self._mock_optimizer(expected_loss=4.)
+    baseline_estimator = _baseline_estimator_fn(
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    baseline_estimator.train(
+        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        label_dimension=1,
+        expected_global_step=initial_global_step + num_steps,
+        expected_bias=[bias])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
index e2a7d01530..b131ed4f12 100644
--- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py
@@ -12,23 +12,414 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""boosted_trees python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
+"""Boosted Trees estimators."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import boosted_trees
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees
+from tensorflow.python.estimator.canned import head as head_lib
+
+
+def _validate_input_fn_and_repeat_dataset(train_input_fn):
+  """Validates whether the input_fn is valid, and repeat() if tf.Dataset."""
+  def _input_fn():
+    result_input_fn = train_input_fn()
+    if isinstance(result_input_fn, dataset_ops.Dataset):
+      return result_input_fn.repeat()
+    return result_input_fn
+
+  return _input_fn
+
+
+def _is_classification_head(head):
+  """Infers if the head is a classification head."""
+  # Check using all classification heads defined in canned/head.py. However, it
+  # is not a complete list - it does not check for other classification heads
+  # not defined in the head library.
+  # pylint: disable=protected-access
+  return isinstance(head,
+                    (head_lib._BinaryLogisticHeadWithSigmoidCrossEntropyLoss,
+                     head_lib._MultiClassHeadWithSoftmaxCrossEntropyLoss))
+  # pylint: enable=protected-access
+
+
+class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase):  # pylint: disable=protected-access
+  """An Estimator for Tensorflow Boosted Trees models."""
+
+  def __init__(self,
+               feature_columns,
+               n_batches_per_layer,
+               head,
+               model_dir=None,
+               weight_column=None,
+               n_trees=100,
+               max_depth=6,
+               learning_rate=0.1,
+               l1_regularization=0.,
+               l2_regularization=0.,
+               tree_complexity=0.,
+               min_node_weight=0.,
+               config=None,
+               center_bias=False,
+               pruning_mode='none'):
+    """Initializes a `BoostedTreesEstimator` instance.
+
+    Args:
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      n_batches_per_layer: the number of batches to collect statistics per
+        layer.
+      head: the `Head` instance defined for Estimator.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into an estimator
+        to continue training a previously saved model.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to downweight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      n_trees: number trees to be created.
+      max_depth: maximum depth of the tree to grow.
+      learning_rate: shrinkage parameter to be used when a tree added to the
+        model.
+      l1_regularization: regularization multiplier applied to the absolute
+        weights of the tree leafs.
+      l2_regularization: regularization multiplier applied to the square weights
+        of the tree leafs.
+      tree_complexity: regularization factor to penalize trees with more leaves.
+      min_node_weight: minimum hessian a node must have for a split to be
+        considered. The value will be compared with sum(leaf_hessian)/
+        (batch_size * n_batches_per_layer).
+      config: `RunConfig` object to configure the runtime settings.
+      center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
+        pruning (do not split a node if not enough gain is observed) and post
+        pruning (build the tree up to a max depth and then prune branches with
+        negative gain). For pre and post pruning, you MUST provide
+        tree_complexity >0.
+
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+         are requested.
+    """
+    # HParams for the model.
+    # pylint: disable=protected-access
+    tree_hparams = canned_boosted_trees._TreeHParams(
+        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
+        tree_complexity, min_node_weight, center_bias, pruning_mode)
+
+    def _model_fn(features, labels, mode, config):
+      return canned_boosted_trees._bt_model_fn(
+          features,
+          labels,
+          mode,
+          head,
+          feature_columns,
+          tree_hparams,
+          n_batches_per_layer,
+          config=config)
+
+    super(_BoostedTreesEstimator, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=_is_classification_head(head))
+    # pylint: enable=protected-access
+
+
+def boosted_trees_classifier_train_in_memory(
+    train_input_fn,
+    feature_columns,
+    model_dir=None,
+    n_classes=canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT,
+    weight_column=None,
+    label_vocabulary=None,
+    n_trees=100,
+    max_depth=6,
+    learning_rate=0.1,
+    l1_regularization=0.,
+    l2_regularization=0.,
+    tree_complexity=0.,
+    min_node_weight=0.,
+    config=None,
+    train_hooks=None,
+    center_bias=False,
+    pruning_mode='none'):
+  """Trains a boosted tree classifier with in memory dataset.
+
+  Example:
+
+  ```python
+  bucketized_feature_1 = bucketized_column(
+    numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
+  bucketized_feature_2 = bucketized_column(
+    numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
+
+  def train_input_fn():
+    dataset = create-dataset-from-training-data
+    # This is tf.data.Dataset of a tuple of feature dict and label.
+    #   e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}),
+    #                     Dataset.from_tensors(label_array)))
+    # The returned Dataset shouldn't be batched.
+    # If Dataset repeats, only the first repetition would be used for training.
+    return dataset
+
+  classifier = boosted_trees_classifier_train_in_memory(
+      train_input_fn,
+      feature_columns=[bucketized_feature_1, bucketized_feature_2],
+      n_trees=100,
+      ... <some other params>
+  )
+
+  def input_fn_eval():
+    ...
+    return dataset
+
+  metrics = classifier.evaluate(input_fn=input_fn_eval, steps=10)
+  ```
+
+  Args:
+    train_input_fn: the input function returns a dataset containing a single
+      epoch of *unbatched* features and labels.
+    feature_columns: An iterable containing all the feature columns used by
+      the model. All items in the set should be instances of classes derived
+      from `FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can
+      also be used to load checkpoints from the directory into an estimator
+      to continue training a previously saved model.
+    n_classes: number of label classes. Default is binary classification.
+      Multiclass support is not yet implemented.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to downweight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+      then weight_column.normalizer_fn is applied on it to get weight tensor.
+    label_vocabulary: A list of strings represents possible label values. If
+      given, labels must be string type and have any value in
+      `label_vocabulary`. If it is not given, that means labels are
+      already encoded as integer or float within [0, 1] for `n_classes=2` and
+      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+      Also there will be errors if vocabulary is not provided and labels are
+      string.
+    n_trees: number trees to be created.
+    max_depth: maximum depth of the tree to grow.
+    learning_rate: shrinkage parameter to be used when a tree added to the
+      model.
+    l1_regularization: regularization multiplier applied to the absolute
+      weights of the tree leafs.
+    l2_regularization: regularization multiplier applied to the square weights
+      of the tree leafs.
+    tree_complexity: regularization factor to penalize trees with more leaves.
+    min_node_weight: minimum hessian a node must have for a split to be
+        considered. The value will be compared with sum(leaf_hessian)/
+        (batch_size * n_batches_per_layer).
+    config: `RunConfig` object to configure the runtime settings.
+    train_hooks: a list of Hook instances to be passed to estimator.train()
+    center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+    pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
+        pruning (do not split a node if not enough gain is observed) and post
+        pruning (build the tree up to a max depth and then prune branches with
+        negative gain). For pre and post pruning, you MUST provide
+        tree_complexity >0.
+
+  Returns:
+    a `BoostedTreesClassifier` instance created with the given arguments and
+      trained with the data loaded up on memory from the input_fn.
+
+  Raises:
+    ValueError: when wrong arguments are given or unsupported functionalities
+       are requested.
+  """
+  # pylint: disable=protected-access
+  # TODO(nponomareva): Support multi-class cases.
+  if n_classes == canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT:
+    n_classes = 2
+  head, closed_form = (
+      canned_boosted_trees._create_classification_head_and_closed_form(
+          n_classes, weight_column, label_vocabulary=label_vocabulary))
+
+  # HParams for the model.
+  tree_hparams = canned_boosted_trees._TreeHParams(
+      n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
+      tree_complexity, min_node_weight, center_bias, pruning_mode)
+
+  def _model_fn(features, labels, mode, config):
+    return canned_boosted_trees._bt_model_fn(
+        features,
+        labels,
+        mode,
+        head,
+        feature_columns,
+        tree_hparams,
+        n_batches_per_layer=1,
+        config=config,
+        closed_form_grad_and_hess_fn=closed_form,
+        train_in_memory=True)
+
+  in_memory_classifier = estimator.Estimator(
+      model_fn=_model_fn, model_dir=model_dir, config=config)
+
+  in_memory_classifier.train(
+      input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn),
+      hooks=train_hooks)
+
+  return in_memory_classifier
+  # pylint: enable=protected-access
+
+
+def boosted_trees_regressor_train_in_memory(
+    train_input_fn,
+    feature_columns,
+    model_dir=None,
+    label_dimension=canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT,
+    weight_column=None,
+    n_trees=100,
+    max_depth=6,
+    learning_rate=0.1,
+    l1_regularization=0.,
+    l2_regularization=0.,
+    tree_complexity=0.,
+    min_node_weight=0.,
+    config=None,
+    train_hooks=None,
+    center_bias=False,
+    pruning_mode='none'):
+  """Trains a boosted tree regressor with in memory dataset.
+
+  Example:
+
+  ```python
+  bucketized_feature_1 = bucketized_column(
+    numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
+  bucketized_feature_2 = bucketized_column(
+    numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
+
+  def train_input_fn():
+    dataset = create-dataset-from-training-data
+    # This is tf.data.Dataset of a tuple of feature dict and label.
+    #   e.g. Dataset.zip((Dataset.from_tensors({'f1': f1_array, ...}),
+    #                     Dataset.from_tensors(label_array)))
+    # The returned Dataset shouldn't be batched.
+    # If Dataset repeats, only the first repetition would be used for training.
+    return dataset
+
+  regressor = boosted_trees_regressor_train_in_memory(
+      train_input_fn,
+      feature_columns=[bucketized_feature_1, bucketized_feature_2],
+      n_trees=100,
+      ... <some other params>
+  )
+
+  def input_fn_eval():
+    ...
+    return dataset
+
+  metrics = regressor.evaluate(input_fn=input_fn_eval, steps=10)
+  ```
+
+  Args:
+    train_input_fn: the input function returns a dataset containing a single
+      epoch of *unbatched* features and labels.
+    feature_columns: An iterable containing all the feature columns used by
+      the model. All items in the set should be instances of classes derived
+      from `FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can
+      also be used to load checkpoints from the directory into an estimator
+      to continue training a previously saved model.
+    label_dimension: Number of regression targets per example.
+      Multi-dimensional support is not yet implemented.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to downweight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+      then weight_column.normalizer_fn is applied on it to get weight tensor.
+    n_trees: number trees to be created.
+    max_depth: maximum depth of the tree to grow.
+    learning_rate: shrinkage parameter to be used when a tree added to the
+      model.
+    l1_regularization: regularization multiplier applied to the absolute
+      weights of the tree leafs.
+    l2_regularization: regularization multiplier applied to the square weights
+      of the tree leafs.
+    tree_complexity: regularization factor to penalize trees with more leaves.
+    min_node_weight: minimum hessian a node must have for a split to be
+        considered. The value will be compared with sum(leaf_hessian)/
+        (batch_size * n_batches_per_layer).
+    config: `RunConfig` object to configure the runtime settings.
+    train_hooks: a list of Hook instances to be passed to estimator.train().
+    center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+    pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
+        pruning (do not split a node if not enough gain is observed) and post
+        pruning (build the tree up to a max depth and then prune branches with
+        negative gain). For pre and post pruning, you MUST provide
+        tree_complexity >0.
+
+  Returns:
+    a `BoostedTreesClassifier` instance created with the given arguments and
+      trained with the data loaded up on memory from the input_fn.
+
+  Raises:
+    ValueError: when wrong arguments are given or unsupported functionalities
+       are requested.
+  """
+  # pylint: disable=protected-access
+  # TODO(nponomareva): Extend it to multi-dimension cases.
+  if label_dimension == canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT:
+    label_dimension = 1
+  head = canned_boosted_trees._create_regression_head(label_dimension,
+                                                      weight_column)
+
+  # HParams for the model.
+  tree_hparams = canned_boosted_trees._TreeHParams(
+      n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
+      tree_complexity, min_node_weight, center_bias, pruning_mode)
+
+  def _model_fn(features, labels, mode, config):
+    return canned_boosted_trees._bt_model_fn(
+        features,
+        labels,
+        mode,
+        head,
+        feature_columns,
+        tree_hparams,
+        n_batches_per_layer=1,
+        config=config,
+        train_in_memory=True)
+
+  in_memory_regressor = estimator.Estimator(
+      model_fn=_model_fn, model_dir=model_dir, config=config)
 
-# Include attrs that start with single underscore.
-boosted_trees.__all__ = [
-    s for s in dir(boosted_trees) if not s.startswith('__')
-]
+  in_memory_regressor.train(
+      input_fn=_validate_input_fn_and_repeat_dataset(train_input_fn),
+      hooks=train_hooks)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.boosted_trees import *
+  return in_memory_regressor
+  # pylint: enable=protected-access
diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
new file mode 100644
index 0000000000..e23d9c0fc4
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py
@@ -0,0 +1,438 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests boosted_trees estimators."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.estimator.python.estimator import boosted_trees
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+from tensorflow.python.training import checkpoint_utils
+
+NUM_FEATURES = 3
+
+BUCKET_BOUNDARIES = [-2., .5, 12.]  # Boundaries for all the features.
+INPUT_FEATURES = np.array(
+    [
+        [12.5, 1.0, -2.001, -2.0001, -1.999],  # feature_0 quantized:[3,2,0,0,1]
+        [2.0, -3.0, 0.5, 0.0, 0.4995],         # feature_1 quantized:[2,0,2,1,1]
+        [3.0, 20.0, 50.0, -100.0, 102.75],     # feature_2 quantized:[2,3,3,0,3]
+    ],
+    dtype=np.float32)
+CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]]
+REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]]
+FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)}
+
+
+def _make_train_input_fn(is_classification):
+  """Makes train input_fn for classification/regression."""
+
+  def _input_fn():
+    features_dict = dict(FEATURES_DICT)
+    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
+    return features_dict, labels
+
+  return _input_fn
+
+
+def _make_train_input_fn_dataset(is_classification):
+  """Makes input_fn using Dataset."""
+
+  def _input_fn():
+    features_dict = dict(FEATURES_DICT)
+    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
+    ds = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.from_tensors(features_dict),
+         dataset_ops.Dataset.from_tensors(labels)
+        ))
+    return ds
+
+  return _input_fn
+
+
+class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES)
+        for i in range(NUM_FEATURES)
+    }
+
+  def _assert_checkpoint(self, model_dir, global_step, finalized_trees,
+                         attempted_layers):
+    reader = checkpoint_utils.load_checkpoint(model_dir)
+    self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP))
+    serialized = reader.get_tensor('boosted_trees:0_serialized')
+    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+    ensemble_proto.ParseFromString(serialized)
+    self.assertEqual(
+        finalized_trees,
+        sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized]))
+    self.assertEqual(attempted_layers,
+                     ensemble_proto.growing_metadata.num_layers_attempted)
+
+  def testTrainAndEvaluateEstimator(self):
+    input_fn = _make_train_input_fn(is_classification=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        head=self._head,
+        max_depth=5)
+
+    # It will stop after 10 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 1.008551)
+
+  def testTrainAndEvaluateEstimatorWithCenterBias(self):
+    input_fn = _make_train_input_fn(is_classification=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        head=self._head,
+        max_depth=5,
+        center_bias=True)
+
+    # It will stop after 11 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    # 10 steps for training and 2 step for bias centering.
+    self._assert_checkpoint(
+        est.model_dir, global_step=12, finalized_trees=2, attempted_layers=10)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 0.614642)
+
+  def testTrainAndEvaluateEstimatorWithPrePruning(self):
+    input_fn = _make_train_input_fn(is_classification=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        head=self._head,
+        max_depth=5,
+        tree_complexity=0.001,
+        pruning_mode='pre')
+
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    # We stop actually after 2*depth*n_trees steps (via a hook) because we still
+    # could not grow 2 trees of depth 5 (due to pre-pruning).
+    self._assert_checkpoint(
+        est.model_dir, global_step=21, finalized_trees=0, attempted_layers=21)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 3.83943)
+
+  def testTrainAndEvaluateEstimatorWithPostPruning(self):
+    input_fn = _make_train_input_fn(is_classification=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        head=self._head,
+        max_depth=5,
+        tree_complexity=0.001,
+        pruning_mode='post')
+
+    # It will stop after 10 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.37652)
+
+  def testInferEstimator(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        head=self._head)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(train_input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    # Validate predictions.
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testInferEstimatorWithCenterBias(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True,
+        head=self._head)
+
+    # It will stop after 6 steps because of the max depth and num trees (5 for
+    # training and 2 for bias centering).
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(train_input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=7, finalized_trees=1, attempted_layers=5)
+    # Validate predictions.
+    predictions = list(est.predict(input_fn=predict_input_fn))
+
+    self.assertAllClose(
+        [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]],
+        [pred['predictions'] for pred in predictions])
+
+  def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_classifier_train_in_memory(
+        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
+        n_trees=1, max_depth=5)
+    # It will stop after 5 steps because of the max depth and num trees.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    # Validate predictions.
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testBinaryClassifierTrainInMemoryAndEvalAndInferWithCenterBias(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_classifier_train_in_memory(
+        train_input_fn=train_input_fn,
+        feature_columns=self._feature_columns,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+    # It will stop after 5 steps + 3 for bias, because of the max depth and num
+    # trees.
+    self._assert_checkpoint(
+        est.model_dir, global_step=8, finalized_trees=1, attempted_layers=5)
+
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    # Validate predictions.
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testBinaryClassifierTrainInMemoryAndEvalAndInferWithPrePruning(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_classifier_train_in_memory(
+        train_input_fn=train_input_fn,
+        feature_columns=self._feature_columns,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre',
+        tree_complexity=0.01)
+    # We stop actually after 2*depth*n_trees steps (via a hook) because we still
+    # could not grow 1 trees of depth 5 (due to pre-pruning).
+    self._assert_checkpoint(
+        est.model_dir, global_step=11, finalized_trees=0, attempted_layers=11)
+
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    # Validate predictions.
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testBinaryClassifierTrainInMemoryWithDataset(self):
+    train_input_fn = _make_train_input_fn_dataset(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_classifier_train_in_memory(
+        train_input_fn=train_input_fn,
+        feature_columns=self._feature_columns,
+        n_trees=1,
+        max_depth=5)
+    # It will stop after 5 steps because of the max depth and num trees.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testRegressorTrainInMemoryAndEvalAndInfer(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_regressor_train_in_memory(
+        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
+        n_trees=1, max_depth=5)
+    # It will stop after 5 steps because of the max depth and num trees.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testRegressorTrainInMemoryWithDataset(self):
+    train_input_fn = _make_train_input_fn_dataset(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.boosted_trees_regressor_train_in_memory(
+        train_input_fn=train_input_fn, feature_columns=self._feature_columns,
+        n_trees=1, max_depth=5)
+    # It will stop after 5 steps because of the max depth and num trees.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    # Check evaluate and predict.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+
+class BoostedTreesDebugOutputTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._head = canned_boosted_trees._create_regression_head(label_dimension=1)
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
+    }
+
+  def testContribEstimatorThatDFCIsInPredictions(self):
+    # pylint:disable=protected-access
+    head = canned_boosted_trees._create_regression_head(label_dimension=1)
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees._BoostedTreesEstimator(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        head=head,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+    # pylint:enable=protected-access
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([1.8] * 5, biases)
+    self.assertAllClose(({
+        0: -0.070499420166015625,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: -0.53763031959533691,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: -0.51756942272186279,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: 0.1563495397567749,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: 0.96934974193572998,
+        1: 0.063333392143249512,
+        2: 0.0
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == predictions.
+    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
+                            [2.01968288], [2.83268309]]
+    predictions = [
+        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_predictions, predictions)
+
+    # Test when user doesn't include bias or dfc in predict_keys.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['predictions'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('predictions' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn.py b/tensorflow/contrib/estimator/python/estimator/dnn.py
index 6b260de9e3..9efa8f474d 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,153 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Deep Neural Network estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import dnn
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn as dnn_lib
+from tensorflow.python.ops import nn
+
+
+class DNNEstimator(estimator.Estimator):
+  """An estimator for TensorFlow DNN models with user-specified head.
+
+  Example:
+
+  ```python
+  sparse_feature_a = sparse_column_with_hash_bucket(...)
+  sparse_feature_b = sparse_column_with_hash_bucket(...)
+
+  sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a,
+                                          ...)
+  sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b,
+                                          ...)
+
+  estimator = DNNEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
+      hidden_units=[1024, 512, 256])
+
+  # Or estimator using the ProximalAdagradOptimizer optimizer with
+  # regularization.
+  estimator = DNNEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=tf.train.ProximalAdagradOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001
+      ))
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = DNNEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=lambda: tf.AdamOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = DNNEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      warm_start_from="/path/to/checkpoint/dir")
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss and predicted output are determined by the specified head.
+  """
 
-# Include attrs that start with single underscore.
-dnn.__all__ = [s for s in dir(dnn) if not s.startswith('__')]
+  def __init__(self,
+               head,
+               hidden_units,
+               feature_columns,
+               model_dir=None,
+               optimizer='Adagrad',
+               activation_fn=nn.relu,
+               dropout=None,
+               input_layer_partitioner=None,
+               config=None,
+               warm_start_from=None,
+               batch_norm=False):
+    """Initializes a `DNNEstimator` instance.
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.dnn import *
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      hidden_units: Iterable of number hidden units per layer. All layers are
+        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
+        second one has 32.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `_FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      activation_fn: Activation function applied to each layer. If `None`, will
+        use `tf.nn.relu`.
+      dropout: When not `None`, the probability we will drop out a given
+        coordinate.
+      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
+        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      batch_norm: Whether to use batch normalization after each hidden layer.
+    """
+    def _model_fn(features, labels, mode, config):
+      return dnn_lib._dnn_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          hidden_units=hidden_units,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          activation_fn=activation_fn,
+          dropout=dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          batch_norm=batch_norm)
+    super(DNNEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
index 24655c9964..4e7965ef26 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,171 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn_linear_combined python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""TensorFlow estimator for Linear and DNN joined training models."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import dnn_linear_combined
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn_linear_combined as dnn_linear_combined_lib
+from tensorflow.python.ops import nn
+
+
+class DNNLinearCombinedEstimator(estimator.Estimator):
+  """An estimator for TensorFlow Linear and DNN joined models with custom head.
+
+  Note: This estimator is also known as wide-n-deep.
+
+  Example:
+
+  ```python
+  numeric_feature = numeric_column(...)
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_column=categorical_feature_b, ...)
+
+  estimator = DNNLinearCombinedEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      # wide settings
+      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
+      linear_optimizer=tf.train.FtrlOptimizer(...),
+      # deep settings
+      dnn_feature_columns=[
+          categorical_feature_a_emb, categorical_feature_b_emb,
+          numeric_feature],
+      dnn_hidden_units=[1000, 500, 100],
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
+
+  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
+  tf.train.ProximalAdagradOptimizer(
+      learning_rate=0.1,
+      l1_regularization_strength=0.001,
+      l2_regularization_strength=0.001)
+  # To apply learning rate decay, you can set dnn_optimizer to a callable:
+  lambda: tf.AdamOptimizer(
+      learning_rate=tf.exponential_decay(
+          learning_rate=0.1,
+          global_step=tf.get_global_step(),
+          decay_steps=10000,
+          decay_rate=0.96)
+  # It is the same for linear_optimizer.
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using mean squared error.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               head,
+               model_dir=None,
+               linear_feature_columns=None,
+               linear_optimizer='Ftrl',
+               dnn_feature_columns=None,
+               dnn_optimizer='Adagrad',
+               dnn_hidden_units=None,
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               input_layer_partitioner=None,
+               config=None,
+               linear_sparse_combiner='sum'):
+    """Initializes a DNNLinearCombinedEstimator instance.
+
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into an estimator
+        to continue training a previously saved model.
+      linear_feature_columns: An iterable containing all the feature columns
+        used by linear part of the model. All items in the set must be
+        instances of classes derived from `FeatureColumn`.
+      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the linear part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
+        optimizer.
+      dnn_feature_columns: An iterable containing all the feature columns used
+        by deep part of the model. All items in the set must be instances of
+        classes derived from `FeatureColumn`.
+      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the deep part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
+        optimizer.
+      dnn_hidden_units: List of hidden units per layer. All layers are fully
+        connected.
+      dnn_activation_fn: Activation function applied to each layer. If None,
+        will use `tf.nn.relu`.
+      dnn_dropout: When not None, the probability we will drop out
+        a given coordinate.
+      input_layer_partitioner: Partitioner for input layer. Defaults to
+        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: RunConfig object to configure the runtime settings.
+      linear_sparse_combiner: A string specifying how to reduce the linear model
+        if a categorical column is multivalent.  One of "mean", "sqrtn", and
+        "sum" -- these are effectively different ways to do example-level
+        normalization, which can be useful for bag-of-words features.  For more
+        details, see `tf.feature_column.linear_model`.
+
+    Raises:
+      ValueError: If both linear_feature_columns and dnn_features_columns are
+        empty at the same time.
+    """
+    linear_feature_columns = linear_feature_columns or []
+    dnn_feature_columns = dnn_feature_columns or []
+    self._feature_columns = (
+        list(linear_feature_columns) + list(dnn_feature_columns))
+    if not self._feature_columns:
+      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
+                       'must be defined.')
 
-# Include attrs that start with single underscore.
-dnn_linear_combined.__all__ = [
-    s for s in dir(dnn_linear_combined) if not s.startswith('__')
-]
+    def _model_fn(features, labels, mode, config):
+      return dnn_linear_combined_lib._dnn_linear_combined_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          linear_feature_columns=linear_feature_columns,
+          linear_optimizer=linear_optimizer,
+          dnn_feature_columns=dnn_feature_columns,
+          dnn_optimizer=dnn_optimizer,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          linear_sparse_combiner=linear_sparse_combiner)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.dnn_linear_combined import *
+    super(DNNLinearCombinedEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
new file mode 100644
index 0000000000..51b9ce7005
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined_test.py
@@ -0,0 +1,227 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn_linear_combined.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import dnn_linear_combined
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+
+
+def _dnn_only_estimator_fn(
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None):
+  return dnn_linear_combined.DNNLinearCombinedEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension,
+          # Tests in core (from which this test inherits) test the sum loss.
+          loss_reduction=losses.Reduction.SUM),
+      model_dir=model_dir,
+      dnn_feature_columns=feature_columns,
+      dnn_optimizer=optimizer,
+      dnn_hidden_units=hidden_units,
+      dnn_activation_fn=activation_fn,
+      dnn_dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config)
+
+
+class DNNOnlyEstimatorEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+class DNNOnlyEstimatorPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+class DNNOnlyEstimatorTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_only_estimator_fn)
+
+
+def _linear_only_estimator_fn(
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Ftrl',
+    config=None,
+    partitioner=None,
+    sparse_combiner='sum'):
+  return dnn_linear_combined.DNNLinearCombinedEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension,
+          # Tests in core (from which this test inherits) test the sum loss.
+          loss_reduction=losses.Reduction.SUM),
+      model_dir=model_dir,
+      linear_feature_columns=feature_columns,
+      linear_optimizer=optimizer,
+      input_layer_partitioner=partitioner,
+      config=config,
+      linear_sparse_combiner=sparse_combiner)
+
+
+class LinearOnlyEstimatorEvaluateTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class LinearOnlyEstimatorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class LinearOnlyEstimatorTrainTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_only_estimator_fn)
+
+
+class DNNLinearCombinedEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    est = dnn_linear_combined.DNNLinearCombinedEstimator(
+        head=head_lib.regression_head(label_dimension=label_dimension),
+        linear_feature_columns=linear_feature_columns,
+        dnn_feature_columns=dnn_feature_columns,
+        dnn_hidden_units=(2, 2),
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_test.py
new file mode 100644
index 0000000000..050b0428bf
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_test.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import dnn
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+
+
+def _dnn_estimator_fn(weight_column=None, label_dimension=1, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
+  """Returns a DNNEstimator that uses regression_head."""
+  return dnn.DNNEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension,
+          # Tests in core (from which this test inherits) test the sum loss.
+          loss_reduction=losses.Reduction.SUM),
+      *args, **kwargs)
+
+
+def _dnn_estimator_classifier_fn(n_classes=3, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
+  """Returns a DNNEstimator that uses multi_class_head."""
+  return dnn.DNNEstimator(head=head_lib.multi_class_head(n_classes=n_classes),
+                          *args, **kwargs)
+
+
+class DNNEstimatorEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_estimator_fn)
+
+
+class DNNEstimatorPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_estimator_fn)
+
+
+class DNNEstimatorTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_estimator_fn)
+
+
+class DNNEstimatorWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
+                                   test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_estimator_classifier_fn, _dnn_estimator_fn)
+
+
+class DNNEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    est = dnn.DNNEstimator(
+        head=head_lib.regression_head(label_dimension=label_dimension),
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
index 29cbdeeb76..40a91175b7 100644
--- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,425 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn_with_layer_annotations python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Deep Neural Network estimators with layer annotations."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import dnn_with_layer_annotations
+import contextlib
+import pickle
+
+from google.protobuf.any_pb2 import Any
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.saved_model import utils as saved_model_utils
+
+
+class LayerAnnotationsCollectionNames(object):
+  """Names for the collections containing the annotations."""
+
+  UNPROCESSED_FEATURES = 'layer_annotations/unprocessed_features'
+  PROCESSED_FEATURES = 'layer_annotatons/processed_features'
+  FEATURE_COLUMNS = 'layer_annotations/feature_columns'
+
+  @classmethod
+  def keys(cls, collection_name):
+    return '%s/keys' % collection_name
+
+  @classmethod
+  def values(cls, collection_name):
+    return '%s/values' % collection_name
+
+
+def serialize_feature_column(feature_column):
+  if isinstance(feature_column, feature_column_lib._EmbeddingColumn):  # pylint: disable=protected-access
+    # We can't pickle nested functions, and we don't need the value of
+    # layer_creator in most cases anyway, so just discard its value.
+    args = feature_column._asdict()
+    args['layer_creator'] = None
+    temp = type(feature_column)(**args)
+    return pickle.dumps(temp)
+  return pickle.dumps(feature_column)
+
+
+def _to_any_wrapped_tensor_info(tensor):
+  """Converts a `Tensor` to a `TensorInfo` wrapped in a proto `Any`."""
+  any_buf = Any()
+  tensor_info = saved_model_utils.build_tensor_info(tensor)
+  any_buf.Pack(tensor_info)
+  return any_buf
+
+
+def make_input_layer_with_layer_annotations(original_input_layer):
+  """Make an input_layer replacement function that adds layer annotations."""
+
+  def input_layer_with_layer_annotations(features,
+                                         feature_columns,
+                                         weight_collections=None,
+                                         trainable=True,
+                                         cols_to_vars=None,
+                                         scope=None,
+                                         cols_to_output_tensors=None,
+                                         from_template=False):
+    """Returns a dense `Tensor` as input layer based on given `feature_columns`.
+
+    Generally a single example in training data is described with
+    FeatureColumns.
+    At the first layer of the model, this column oriented data should be
+    converted
+    to a single `Tensor`.
+
+    This is like tf.feature_column.input_layer, except with added
+    Integrated-Gradient annotations.
+
+    Args:
+      features: A mapping from key to tensors. `_FeatureColumn`s look up via
+        these keys. For example `numeric_column('price')` will look at 'price'
+        key in this dict. Values can be a `SparseTensor` or a `Tensor` depends
+        on corresponding `_FeatureColumn`.
+      feature_columns: An iterable containing the FeatureColumns to use as
+        inputs to your model. All items should be instances of classes derived
+        from `_DenseColumn` such as `numeric_column`, `embedding_column`,
+        `bucketized_column`, `indicator_column`. If you have categorical
+        features, you can wrap them with an `embedding_column` or
+        `indicator_column`.
+      weight_collections: A list of collection names to which the Variable will
+        be added. Note that variables will also be added to collections
+        `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
+      trainable: If `True` also add the variable to the graph collection
+        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
+      cols_to_vars: If not `None`, must be a dictionary that will be filled with
+        a mapping from `_FeatureColumn` to list of `Variable`s.  For example,
+        after the call, we might have cols_to_vars = {_EmbeddingColumn(
+        categorical_column=_HashedCategoricalColumn( key='sparse_feature',
+        hash_bucket_size=5, dtype=tf.string), dimension=10): [<tf.Variable
+        'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
+          shape=(5, 10)]} If a column creates no variables, its value will be an
+          empty list.
+      scope: A name or variable scope to use
+      cols_to_output_tensors: If not `None`, must be a dictionary that will be
+        filled with a mapping from '_FeatureColumn' to the associated output
+        `Tensor`s.
+      from_template: True if the method is being instantiated from a
+        `make_template`.
+
+    Returns:
+      A `Tensor` which represents input layer of a model. Its shape
+      is (batch_size, first_layer_dimension) and its dtype is `float32`.
+      first_layer_dimension is determined based on given `feature_columns`.
+
+    Raises:
+      ValueError: features and feature_columns have different lengths.
+    """
+
+    local_cols_to_output_tensors = {}
+    input_layer = original_input_layer(
+        features=features,
+        feature_columns=feature_columns,
+        weight_collections=weight_collections,
+        trainable=trainable,
+        cols_to_vars=cols_to_vars,
+        scope=scope,
+        cols_to_output_tensors=local_cols_to_output_tensors,
+        from_template=from_template)
+
+    if cols_to_output_tensors is not None:
+      cols_to_output_tensors = local_cols_to_output_tensors
+
+    # Annotate features.
+    # These are the parsed Tensors, before embedding.
+
+    # Only annotate features used by FeatureColumns.
+    # We figure which ones are used by FeatureColumns by creating a parsing
+    # spec and looking at the keys.
+    spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    for key in spec.keys():
+      tensor = ops.convert_to_tensor_or_indexed_slices(features[key])
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.keys(
+              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES), key)
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.values(
+              LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES),
+          _to_any_wrapped_tensor_info(tensor))
+
+    # Annotate feature columns.
+    for column in feature_columns:
+      # TODO(cyfoo): Find a better way to serialize and deserialize
+      # _FeatureColumn.
+      ops.add_to_collection(LayerAnnotationsCollectionNames.FEATURE_COLUMNS,
+                            serialize_feature_column(column))
+
+    for column, tensor in local_cols_to_output_tensors.items():
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.keys(
+              LayerAnnotationsCollectionNames.PROCESSED_FEATURES), column.name)
+      ops.add_to_collection(
+          LayerAnnotationsCollectionNames.values(
+              LayerAnnotationsCollectionNames.PROCESSED_FEATURES),
+          _to_any_wrapped_tensor_info(tensor))
+
+    return input_layer
+
+  return input_layer_with_layer_annotations
+
+
+@contextlib.contextmanager
+def _monkey_patch(module, function, replacement):
+  old_function = getattr(module, function)
+  setattr(module, function, replacement)
+  yield
+  setattr(module, function, old_function)
+
+
+def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    n_classes=2,
+    weight_column=None,
+    label_vocabulary=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None,
+    warm_start_from=None,
+    loss_reduction=losses.Reduction.SUM):
+  """A classifier for TensorFlow DNN models with layer annotations.
+
+  This classifier is fuctionally identical to estimator.DNNClassifier as far as
+  training and evaluating models is concerned. The key difference is that this
+  classifier adds additional layer annotations, which can be used for computing
+  Integrated Gradients.
+
+  Integrated Gradients is a method for attributing a classifier's predictions
+  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
+  instance, the method assigns attribution scores to individual features in
+  proportion to the feature's importance to the classifier's prediction.
+
+  See estimator.DNNClassifer for example code for training and evaluating models
+  using this classifier.
+
+  This classifier is checkpoint-compatible with estimator.DNNClassifier and
+  therefore the following should work seamlessly:
+
+  # Instantiate ordinary estimator as usual.
+  estimator = tf.estimator.DNNClassifier(
+    config, feature_columns, hidden_units, ...)
+
+  # Train estimator, export checkpoint.
+  tf.estimator.train_and_evaluate(estimator, ...)
+
+  # Instantiate estimator with annotations with the same configuration as the
+  # ordinary estimator.
+  estimator_with_annotations = (
+    tf.contrib.estimator.DNNClassifierWithLayerAnnotations(
+      config, feature_columns, hidden_units, ...))
+
+  # Call export_savedmodel with the same arguments as the ordinary estimator,
+  # using the checkpoint produced for the ordinary estimator.
+  estimator_with_annotations.export_saved_model(
+    export_dir_base, serving_input_receiver, ...
+    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
+
+  Args:
+    hidden_units: Iterable of number hidden units per layer. All layers are
+      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
+      one has 32.
+    feature_columns: An iterable containing all the feature columns used by the
+      model. All items in the set should be instances of classes derived from
+      `_FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can also
+      be used to load checkpoints from the directory into an estimator to
+      continue training a previously saved model.
+    n_classes: Number of label classes. Defaults to 2, namely binary
+      classification. Must be > 1.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+      weight_column.normalizer_fn is applied on it to get weight tensor.
+    label_vocabulary: A list of strings represents possible label values. If
+      given, labels must be string type and have any value in
+      `label_vocabulary`. If it is not given, that means labels are already
+      encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
+      integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
+      will be errors if vocabulary is not provided and labels are string.
+    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
+      to Adagrad optimizer.
+    activation_fn: Activation function applied to each layer. If `None`, will
+      use `tf.nn.relu`.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
+      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
+      `WarmStartSettings` object to fully configure warm-starting.  If the
+      string filepath is provided instead of a `WarmStartSettings`, then all
+      weights are warm-started, and it is assumed that vocabularies and Tensor
+      names are unchanged.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+
+  Returns:
+    DNNClassifier with layer annotations.
+  """
+
+  original = dnn.DNNClassifier(
+      hidden_units=hidden_units,
+      feature_columns=feature_columns,
+      model_dir=model_dir,
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      optimizer=optimizer,
+      activation_fn=activation_fn,
+      dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config,
+      warm_start_from=warm_start_from,
+      loss_reduction=loss_reduction)
+
+  def _model_fn(features, labels, mode, config):
+    with _monkey_patch(
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
+      return original.model_fn(features, labels, mode, config)
+
+  return estimator.Estimator(
+      model_fn=_model_fn,
+      model_dir=model_dir,
+      config=config,
+      warm_start_from=warm_start_from)
+
+
+def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
+    hidden_units,
+    feature_columns,
+    model_dir=None,
+    label_dimension=1,
+    weight_column=None,
+    optimizer='Adagrad',
+    activation_fn=nn.relu,
+    dropout=None,
+    input_layer_partitioner=None,
+    config=None,
+    warm_start_from=None,
+    loss_reduction=losses.Reduction.SUM,
+):
+  """A regressor for TensorFlow DNN models with layer annotations.
+
+  This regressor is fuctionally identical to estimator.DNNRegressor as far as
+  training and evaluating models is concerned. The key difference is that this
+  classifier adds additional layer annotations, which can be used for computing
+  Integrated Gradients.
+
+  Integrated Gradients is a method for attributing a classifier's predictions
+  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
+  instance, the method assigns attribution scores to individual features in
+  proportion to the feature's importance to the classifier's prediction.
+
+  See estimator.DNNRegressor for example code for training and evaluating models
+  using this regressor.
+
+  This regressor is checkpoint-compatible with estimator.DNNRegressor and
+  therefore the following should work seamlessly:
+
+  # Instantiate ordinary estimator as usual.
+  estimator = tf.estimator.DNNRegressor(
+    config, feature_columns, hidden_units, ...)
+
+  # Train estimator, export checkpoint.
+  tf.estimator.train_and_evaluate(estimator, ...)
+
+  # Instantiate estimator with annotations with the same configuration as the
+  # ordinary estimator.
+  estimator_with_annotations = (
+    tf.contrib.estimator.DNNRegressorWithLayerAnnotations(
+      config, feature_columns, hidden_units, ...))
+
+  # Call export_savedmodel with the same arguments as the ordinary estimator,
+  # using the checkpoint produced for the ordinary estimator.
+  estimator_with_annotations.export_saved_model(
+    export_dir_base, serving_input_receiver, ...
+    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')
+
+  Args:
+    hidden_units: Iterable of number hidden units per layer. All layers are
+      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
+      one has 32.
+    feature_columns: An iterable containing all the feature columns used by the
+      model. All items in the set should be instances of classes derived from
+      `_FeatureColumn`.
+    model_dir: Directory to save model parameters, graph and etc. This can also
+      be used to load checkpoints from the directory into a estimator to
+      continue training a previously saved model.
+    label_dimension: Number of regression targets per example. This is the size
+      of the last dimension of the labels and logits `Tensor` objects
+      (typically, these have shape `[batch_size, label_dimension]`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+      weight_column.normalizer_fn is applied on it to get weight tensor.
+    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
+      to Adagrad optimizer.
+    activation_fn: Activation function applied to each layer. If `None`, will
+      use `tf.nn.relu`.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
+      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
+      `WarmStartSettings` object to fully configure warm-starting.  If the
+      string filepath is provided instead of a `WarmStartSettings`, then all
+      weights are warm-started, and it is assumed that vocabularies and Tensor
+      names are unchanged.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+
+  Returns:
+    DNNRegressor with layer annotations.
+  """
+
+  original = dnn.DNNRegressor(
+      hidden_units=hidden_units,
+      feature_columns=feature_columns,
+      model_dir=model_dir,
+      label_dimension=label_dimension,
+      weight_column=weight_column,
+      optimizer=optimizer,
+      activation_fn=activation_fn,
+      dropout=dropout,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config,
+      warm_start_from=warm_start_from,
+      loss_reduction=loss_reduction,
+  )
 
-# Include attrs that start with single underscore.
-dnn_with_layer_annotations.__all__ = [
-    s for s in dir(dnn_with_layer_annotations) if not s.startswith('__')
-]
+  def _model_fn(features, labels, mode, config):
+    with _monkey_patch(
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
+      return original.model_fn(features, labels, mode, config)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.dnn_with_layer_annotations import *
+  return estimator.Estimator(
+      model_fn=_model_fn,
+      model_dir=model_dir,
+      config=config,
+      warm_start_from=warm_start_from)
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
new file mode 100644
index 0000000000..2fe3d4c72e
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations_test.py
@@ -0,0 +1,611 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn_with_layer_annotations.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import dnn_with_layer_annotations
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import queue_runner
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+def _dnn_classifier_fn(*args, **kwargs):
+  return dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
+      *args, **kwargs)
+
+
+class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
+                          test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(self, _dnn_classifier_fn,
+                                                       _dnn_regressor_fn)
+
+
+class DNNWithLayerAnnotationsClassifierEvaluateTest(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn)
+
+
+class DNNClassifierWithLayerAnnotationsPredictTest(
+    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn)
+
+
+class DNNClassifierWithLayerAnnotationsTrainTest(
+    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn)
+
+
+def _dnn_regressor_fn(*args, **kwargs):
+  return dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations(
+      *args, **kwargs)
+
+
+class DNNWithLayerAnnotationsTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def _getLayerAnnotationCollection(self, graph, collection_name):
+    keys = graph.get_collection(
+        dnn_with_layer_annotations.LayerAnnotationsCollectionNames.keys(
+            collection_name))
+    values = graph.get_collection(
+        dnn_with_layer_annotations.LayerAnnotationsCollectionNames.values(
+            collection_name))
+    if len(keys) != len(values):
+      raise ValueError('keys and values should have same length. lengths were: '
+                       '%d and %d, and elements were %s and %s' %
+                       (len(keys), len(values), keys, values))
+    return dict(zip(keys, values))
+
+  def _testAnnotationsPresentForEstimator(self, estimator_class):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(1,)),
+        feature_column.embedding_column(
+            feature_column.categorical_column_with_vocabulary_list(
+                'y', vocabulary_list=['a', 'b', 'c']),
+            dimension=3)
+    ]
+    estimator = estimator_class(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+    model_fn = estimator.model_fn
+
+    graph = ops.Graph()
+    with graph.as_default():
+      model_fn({
+          'x': array_ops.constant([1.0]),
+          'y': array_ops.constant(['a'])
+      }, {},
+               model_fn_lib.ModeKeys.PREDICT,
+               config=None)
+
+      unprocessed_features = self._getLayerAnnotationCollection(
+          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
+          .UNPROCESSED_FEATURES)
+      processed_features = self._getLayerAnnotationCollection(
+          graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames
+          .PROCESSED_FEATURES)
+      feature_columns = graph.get_collection(
+          dnn_with_layer_annotations.LayerAnnotationsCollectionNames
+          .FEATURE_COLUMNS)
+
+      self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y'])
+      self.assertEqual(2, len(processed_features.keys()))
+      self.assertEqual(2, len(feature_columns))
+
+  def testAnnotationsPresentForClassifier(self):
+    self._testAnnotationsPresentForEstimator(
+        dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations)
+
+  def testAnnotationsPresentForRegressor(self):
+    self._testAnnotationsPresentForEstimator(
+        dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations)
+
+  def _testCheckpointCompatibleWithNonAnnotatedEstimator(
+      self, train_input_fn, predict_input_fn, non_annotated_class,
+      annotated_class, prediction_key, estimator_args):
+    input_dimension = 2
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    estimator = non_annotated_class(
+        model_dir=self._model_dir,
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        **estimator_args)
+
+    estimator.train(train_input_fn, steps=10)
+
+    predictions = np.array(
+        [x[prediction_key] for x in estimator.predict(predict_input_fn)])
+
+    annotated_estimator = annotated_class(
+        model_dir=self._model_dir,
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        warm_start_from=self._model_dir,
+        **estimator_args)
+
+    annotated_predictions = np.array([
+        x[prediction_key] for x in annotated_estimator.predict(predict_input_fn)
+    ])
+
+    self.assertAllEqual(predictions.shape, annotated_predictions.shape)
+    for i, (a, b) in enumerate(
+        zip(predictions.flatten(), annotated_predictions.flatten())):
+      self.assertAlmostEqual(a, b, msg='index=%d' % i)
+
+  def testCheckpointCompatibleForClassifier(self):
+    n_classes = 2
+    input_dimension = 2
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = np.reshape(
+        np.rint(data[:batch_size]).astype(np.int64), (batch_size, 1))
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data}, batch_size=batch_size, shuffle=False)
+
+    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
+        train_input_fn,
+        predict_input_fn,
+        dnn.DNNClassifier,
+        dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations,
+        prediction_key=prediction_keys.PredictionKeys.PROBABILITIES,
+        estimator_args={'n_classes': n_classes})
+
+  def testCheckpointCompatibleForRegressor(self):
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, batch_size=batch_size, shuffle=False)
+
+    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
+        train_input_fn,
+        predict_input_fn,
+        dnn.DNNRegressor,
+        dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations,
+        prediction_key=prediction_keys.PredictionKeys.PREDICTIONS,
+        estimator_args={'label_dimension': label_dimension})
+
+
+class DNNRegressorWithLayerAnnotationsEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn)
+
+
+class DNNRegressorWithLayerAnnotationsPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn)
+
+
+class DNNRegressorWithLayerAnnotationsTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn)
+
+
+def _queue_parsed_features(feature_map):
+  tensors_to_enqueue = []
+  keys = []
+  for key, tensor in six.iteritems(feature_map):
+    keys.append(key)
+    tensors_to_enqueue.append(tensor)
+  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
+  queue_runner.add_queue_runner(
+      queue_runner.QueueRunner(input_queue,
+                               [input_queue.enqueue(tensors_to_enqueue)]))
+  dequeued_tensors = input_queue.dequeue()
+  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
+
+
+class DNNRegressorWithLayerAnnotationsIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=data, batch_size=batch_size, shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+  def test_pandas_input_fn(self):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    label_dimension = 1
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+  def test_input_fn_from_parse_example(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(
+          features=feature_pb2.Features(
+              feature={
+                  'x':
+                      feature_pb2.Feature(
+                          float_list=feature_pb2.FloatList(value=datum)),
+                  'y':
+                      feature_pb2.Feature(
+                          float_list=feature_pb2.FloatList(value=datum)),
+              }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+class DNNClassifierWithLayerAnnotationsIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _as_label(self, data_in_float):
+    return np.rint(data_in_float).astype(np.int64)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    n_classes = 3
+    input_dimension = 2
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data}, y=y_data, batch_size=batch_size, shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data}, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+  def test_pandas_input_fn(self):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    input_dimension = 1
+    n_classes = 3
+    batch_size = 10
+    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(self._as_label(data))
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+  def test_input_fn_from_parse_example(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    input_dimension = 2
+    n_classes = 3
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(
+          features=feature_pb2.Features(
+              feature={
+                  'x':
+                      feature_pb2.Feature(
+                          float_list=feature_pb2.FloatList(value=datum)),
+                  'y':
+                      feature_pb2.Feature(
+                          int64_list=feature_pb2.Int64List(
+                              value=self._as_label(datum[:1]))),
+              }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping.py b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
index b5d256dfeb..cafe8279c7 100644
--- a/tensorflow/contrib/estimator/python/estimator/early_stopping.py
+++ b/tensorflow/contrib/estimator/python/estimator/early_stopping.py
@@ -12,23 +12,495 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""early_stopping python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utilities for early stopping."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import early_stopping
+import collections
+import operator
+import os
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.summary import summary_iterator
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+
+_EVENT_FILE_GLOB_PATTERN = 'events.out.tfevents.*'
+
+
+def make_early_stopping_hook(estimator,
+                             should_stop_fn,
+                             run_every_secs=60,
+                             run_every_steps=None):
+  """Creates early-stopping hook.
+
+  Returns a `SessionRunHook` that stops training when `should_stop_fn` returns
+  `True`.
+
+  Usage example:
+
+  ```python
+  estimator = ...
+  hook = early_stopping.make_early_stopping_hook(
+      estimator, should_stop_fn=make_stop_fn(...))
+  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
+  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
+  ```
+
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
+  Args:
+    estimator: A `tf.estimator.Estimator` instance.
+    should_stop_fn: `callable`, function that takes no arguments and returns a
+      `bool`. If the function returns `True`, stopping will be initiated by the
+      chief.
+    run_every_secs: If specified, calls `should_stop_fn` at an interval of
+      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
+      `run_every_steps` must be set.
+    run_every_steps: If specified, calls `should_stop_fn` every
+      `run_every_steps` steps. Either this or `run_every_secs` must be set.
+
+  Returns:
+    A `SessionRunHook` that periodically executes `should_stop_fn` and initiates
+    early stopping if the function returns `True`.
+
+  Raises:
+    TypeError: If `estimator` is not of type `tf.estimator.Estimator`.
+    ValueError: If both `run_every_secs` and `run_every_steps` are set.
+  """
+  if not isinstance(estimator, estimator_lib.Estimator):
+    raise TypeError('`estimator` must have type `tf.estimator.Estimator`. '
+                    'Got: {}'.format(type(estimator)))
+
+  if run_every_secs is not None and run_every_steps is not None:
+    raise ValueError('Only one of `run_every_secs` and `run_every_steps` must '
+                     'be set.')
+
+  if estimator.config.is_chief:
+    return _StopOnPredicateHook(should_stop_fn, run_every_secs, run_every_steps)
+  else:
+    return _CheckForStoppingHook()
+
+
+def stop_if_higher_hook(estimator,
+                        metric_name,
+                        threshold,
+                        eval_dir=None,
+                        min_steps=0,
+                        run_every_secs=60,
+                        run_every_steps=None):
+  """Creates hook to stop if the given metric is higher than the threshold.
+
+  Usage example:
+
+  ```python
+  estimator = ...
+  # Hook to stop training if accuracy becomes higher than 0.9.
+  hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.9)
+  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
+  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
+  ```
+
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
+  Args:
+    estimator: A `tf.estimator.Estimator` instance.
+    metric_name: `str`, metric to track. "loss", "accuracy", etc.
+    threshold: Numeric threshold for the given metric.
+    eval_dir: If set, directory containing summary files with eval metrics. By
+      default, `estimator.eval_dir()` will be used.
+    min_steps: `int`, stop is never requested if global step is less than this
+      value. Defaults to 0.
+    run_every_secs: If specified, calls `should_stop_fn` at an interval of
+      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
+      `run_every_steps` must be set.
+    run_every_steps: If specified, calls `should_stop_fn` every
+      `run_every_steps` steps. Either this or `run_every_secs` must be set.
+
+  Returns:
+    An early-stopping hook of type `SessionRunHook` that periodically checks
+    if the given metric is higher than specified threshold and initiates
+    early stopping if true.
+  """
+  return _stop_if_threshold_crossed_hook(
+      estimator=estimator,
+      metric_name=metric_name,
+      threshold=threshold,
+      higher_is_better=True,
+      eval_dir=eval_dir,
+      min_steps=min_steps,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def stop_if_lower_hook(estimator,
+                       metric_name,
+                       threshold,
+                       eval_dir=None,
+                       min_steps=0,
+                       run_every_secs=60,
+                       run_every_steps=None):
+  """Creates hook to stop if the given metric is lower than the threshold.
+
+  Usage example:
+
+  ```python
+  estimator = ...
+  # Hook to stop training if loss becomes lower than 100.
+  hook = early_stopping.stop_if_lower_hook(estimator, "loss", 100)
+  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
+  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
+  ```
+
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
+  Args:
+    estimator: A `tf.estimator.Estimator` instance.
+    metric_name: `str`, metric to track. "loss", "accuracy", etc.
+    threshold: Numeric threshold for the given metric.
+    eval_dir: If set, directory containing summary files with eval metrics. By
+      default, `estimator.eval_dir()` will be used.
+    min_steps: `int`, stop is never requested if global step is less than this
+      value. Defaults to 0.
+    run_every_secs: If specified, calls `should_stop_fn` at an interval of
+      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
+      `run_every_steps` must be set.
+    run_every_steps: If specified, calls `should_stop_fn` every
+      `run_every_steps` steps. Either this or `run_every_secs` must be set.
+
+  Returns:
+    An early-stopping hook of type `SessionRunHook` that periodically checks
+    if the given metric is lower than specified threshold and initiates
+    early stopping if true.
+  """
+  return _stop_if_threshold_crossed_hook(
+      estimator=estimator,
+      metric_name=metric_name,
+      threshold=threshold,
+      higher_is_better=False,
+      eval_dir=eval_dir,
+      min_steps=min_steps,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def stop_if_no_increase_hook(estimator,
+                             metric_name,
+                             max_steps_without_increase,
+                             eval_dir=None,
+                             min_steps=0,
+                             run_every_secs=60,
+                             run_every_steps=None):
+  """Creates hook to stop if metric does not increase within given max steps.
+
+  Usage example:
+
+  ```python
+  estimator = ...
+  # Hook to stop training if accuracy does not increase in over 100000 steps.
+  hook = early_stopping.stop_if_no_increase_hook(estimator, "accuracy", 100000)
+  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
+  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
+  ```
+
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
+  Args:
+    estimator: A `tf.estimator.Estimator` instance.
+    metric_name: `str`, metric to track. "loss", "accuracy", etc.
+    max_steps_without_increase: `int`, maximum number of training steps with no
+      increase in the given metric.
+    eval_dir: If set, directory containing summary files with eval metrics. By
+      default, `estimator.eval_dir()` will be used.
+    min_steps: `int`, stop is never requested if global step is less than this
+      value. Defaults to 0.
+    run_every_secs: If specified, calls `should_stop_fn` at an interval of
+      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
+      `run_every_steps` must be set.
+    run_every_steps: If specified, calls `should_stop_fn` every
+      `run_every_steps` steps. Either this or `run_every_secs` must be set.
+
+  Returns:
+    An early-stopping hook of type `SessionRunHook` that periodically checks
+    if the given metric shows no increase over given maximum number of
+    training steps, and initiates early stopping if true.
+  """
+  return _stop_if_no_metric_improvement_hook(
+      estimator=estimator,
+      metric_name=metric_name,
+      max_steps_without_improvement=max_steps_without_increase,
+      higher_is_better=True,
+      eval_dir=eval_dir,
+      min_steps=min_steps,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def stop_if_no_decrease_hook(estimator,
+                             metric_name,
+                             max_steps_without_decrease,
+                             eval_dir=None,
+                             min_steps=0,
+                             run_every_secs=60,
+                             run_every_steps=None):
+  """Creates hook to stop if metric does not decrease within given max steps.
+
+  Usage example:
+
+  ```python
+  estimator = ...
+  # Hook to stop training if loss does not decrease in over 100000 steps.
+  hook = early_stopping.stop_if_no_decrease_hook(estimator, "loss", 100000)
+  train_spec = tf.estimator.TrainSpec(..., hooks=[hook])
+  tf.estimator.train_and_evaluate(estimator, train_spec, ...)
+  ```
+
+  Caveat: Current implementation supports early-stopping both training and
+  evaluation in local mode. In distributed mode, training can be stopped but
+  evaluation (where it's a separate job) will indefinitely wait for new model
+  checkpoints to evaluate, so you will need other means to detect and stop it.
+  Early-stopping evaluation in distributed mode requires changes in
+  `train_and_evaluate` API and will be addressed in a future revision.
+
+  Args:
+    estimator: A `tf.estimator.Estimator` instance.
+    metric_name: `str`, metric to track. "loss", "accuracy", etc.
+    max_steps_without_decrease: `int`, maximum number of training steps with no
+      decrease in the given metric.
+    eval_dir: If set, directory containing summary files with eval metrics. By
+      default, `estimator.eval_dir()` will be used.
+    min_steps: `int`, stop is never requested if global step is less than this
+      value. Defaults to 0.
+    run_every_secs: If specified, calls `should_stop_fn` at an interval of
+      `run_every_secs` seconds. Defaults to 60 seconds. Either this or
+      `run_every_steps` must be set.
+    run_every_steps: If specified, calls `should_stop_fn` every
+      `run_every_steps` steps. Either this or `run_every_secs` must be set.
+
+  Returns:
+    An early-stopping hook of type `SessionRunHook` that periodically checks
+    if the given metric shows no decrease over given maximum number of
+    training steps, and initiates early stopping if true.
+  """
+  return _stop_if_no_metric_improvement_hook(
+      estimator=estimator,
+      metric_name=metric_name,
+      max_steps_without_improvement=max_steps_without_decrease,
+      higher_is_better=False,
+      eval_dir=eval_dir,
+      min_steps=min_steps,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def read_eval_metrics(eval_dir):
+  """Helper to read eval metrics from eval summary files.
+
+  Args:
+    eval_dir: Directory containing summary files with eval metrics.
+
+  Returns:
+    A `dict` with global steps mapping to `dict` of metric names and values.
+  """
+  eval_metrics_dict = {}
+  for event in _summaries(eval_dir):
+    if not event.HasField('summary'):
+      continue
+    metrics = {}
+    for value in event.summary.value:
+      if value.HasField('simple_value'):
+        metrics[value.tag] = value.simple_value
+    if metrics:
+      eval_metrics_dict[event.step] = metrics
+  return collections.OrderedDict(
+      sorted(eval_metrics_dict.items(), key=lambda t: t[0]))
+
+
+def _stop_if_threshold_crossed_hook(estimator, metric_name, threshold,
+                                    higher_is_better, eval_dir, min_steps,
+                                    run_every_secs, run_every_steps):
+  """Creates early-stopping hook to stop training if threshold is crossed."""
+
+  if eval_dir is None:
+    eval_dir = estimator.eval_dir()
+
+  is_lhs_better = operator.gt if higher_is_better else operator.lt
+  greater_or_lesser = 'greater than' if higher_is_better else 'less than'
+
+  def stop_if_threshold_crossed_fn():
+    """Returns `True` if the given metric crosses specified threshold."""
+
+    eval_results = read_eval_metrics(eval_dir)
+
+    for step, metrics in eval_results.items():
+      if step < min_steps:
+        continue
+      val = metrics[metric_name]
+      if is_lhs_better(val, threshold):
+        tf_logging.info(
+            'At step %s, metric "%s" has value %s which is %s the configured '
+            'threshold (%s) for early stopping.', step, metric_name, val,
+            greater_or_lesser, threshold)
+        return True
+    return False
+
+  return make_early_stopping_hook(
+      estimator=estimator,
+      should_stop_fn=stop_if_threshold_crossed_fn,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def _stop_if_no_metric_improvement_hook(
+    estimator, metric_name, max_steps_without_improvement, higher_is_better,
+    eval_dir, min_steps, run_every_secs, run_every_steps):
+  """Returns hook to stop training if given metric shows no improvement."""
+
+  if eval_dir is None:
+    eval_dir = estimator.eval_dir()
+
+  is_lhs_better = operator.gt if higher_is_better else operator.lt
+  increase_or_decrease = 'increase' if higher_is_better else 'decrease'
+
+  def stop_if_no_metric_improvement_fn():
+    """Returns `True` if metric does not improve within max steps."""
+
+    eval_results = read_eval_metrics(eval_dir)
+
+    best_val = None
+    best_val_step = None
+    for step, metrics in eval_results.items():
+      if step < min_steps:
+        continue
+      val = metrics[metric_name]
+      if best_val is None or is_lhs_better(val, best_val):
+        best_val = val
+        best_val_step = step
+      if step - best_val_step >= max_steps_without_improvement:
+        tf_logging.info(
+            'No %s in metric "%s" for %s steps, which is greater than or equal '
+            'to max steps (%s) configured for early stopping.',
+            increase_or_decrease, metric_name, step - best_val_step,
+            max_steps_without_improvement)
+        return True
+    return False
+
+  return make_early_stopping_hook(
+      estimator=estimator,
+      should_stop_fn=stop_if_no_metric_improvement_fn,
+      run_every_secs=run_every_secs,
+      run_every_steps=run_every_steps)
+
+
+def _summaries(eval_dir):
+  """Yields `tensorflow.Event` protos from event files in the eval dir.
+
+  Args:
+    eval_dir: Directory containing summary files with eval metrics.
+
+  Yields:
+    `tensorflow.Event` object read from the event files.
+  """
+  if gfile.Exists(eval_dir):
+    for event_file in gfile.Glob(
+        os.path.join(eval_dir, _EVENT_FILE_GLOB_PATTERN)):
+      for event in summary_iterator.summary_iterator(event_file):
+        yield event
+
+
+def _get_or_create_stop_var():
+  with variable_scope.variable_scope(
+      name_or_scope='signal_early_stopping',
+      values=[],
+      reuse=variable_scope.AUTO_REUSE):
+    return variable_scope.get_variable(
+        name='STOP',
+        shape=[],
+        dtype=dtypes.bool,
+        initializer=init_ops.constant_initializer(False),
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        trainable=False)
+
+
+class _StopOnPredicateHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop when `should_stop_fn` returns `True`."""
+
+  def __init__(self, should_stop_fn, run_every_secs=60, run_every_steps=None):
+    if not callable(should_stop_fn):
+      raise TypeError('`should_stop_fn` must be callable.')
+
+    self._should_stop_fn = should_stop_fn
+    self._timer = basic_session_run_hooks.SecondOrStepTimer(
+        every_secs=run_every_secs, every_steps=run_every_steps)
+    self._global_step_tensor = None
+    self._stop_var = None
+    self._stop_op = None
+
+  def begin(self):
+    self._global_step_tensor = training_util.get_global_step()
+    self._stop_var = _get_or_create_stop_var()
+    self._stop_op = state_ops.assign(self._stop_var, True)
+
+  def before_run(self, run_context):
+    del run_context
+    return session_run_hook.SessionRunArgs(self._global_step_tensor)
+
+  def after_run(self, run_context, run_values):
+    global_step = run_values.results
+    if self._timer.should_trigger_for_step(global_step):
+      self._timer.update_last_triggered_step(global_step)
+      if self._should_stop_fn():
+        tf_logging.info('Requesting early stopping at global step %d',
+                        global_step)
+        run_context.session.run(self._stop_op)
+        run_context.request_stop()
+
+
+class _CheckForStoppingHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop if stop is requested by `_StopOnPredicateHook`."""
+
+  def __init__(self):
+    self._stop_var = None
+
+  def begin(self):
+    self._stop_var = _get_or_create_stop_var()
 
-# Include attrs that start with single underscore.
-early_stopping.__all__ = [
-    s for s in dir(early_stopping) if not s.startswith('__')
-]
+  def before_run(self, run_context):
+    del run_context
+    return session_run_hook.SessionRunArgs(self._stop_var)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.early_stopping import *
+  def after_run(self, run_context, run_values):
+    should_early_stop = run_values.results
+    if should_early_stop:
+      tf_logging.info('Early stopping requested, suspending run.')
+      run_context.request_stop()
diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py b/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
new file mode 100644
index 0000000000..e4bfd4b446
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/early_stopping_test.py
@@ -0,0 +1,246 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for early_stopping."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+
+from absl.testing import parameterized
+from tensorflow.contrib.estimator.python.estimator import early_stopping
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import run_config
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.platform import test
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import training_util
+
+
+class _FakeRunConfig(run_config.RunConfig):
+
+  def __init__(self, is_chief):
+    super(_FakeRunConfig, self).__init__()
+    self._is_chief = is_chief
+
+  @property
+  def is_chief(self):
+    return self._is_chief
+
+
+def _dummy_model_fn(features, labels, params):
+  _, _, _ = features, labels, params
+
+
+class _FakeEstimator(estimator.Estimator):
+  """Fake estimator for testing."""
+
+  def __init__(self, config):
+    super(_FakeEstimator, self).__init__(
+        model_fn=_dummy_model_fn, config=config)
+
+
+def _write_events(eval_dir, params):
+  """Test helper to write events to summary files."""
+  for steps, loss, accuracy in params:
+    estimator._write_dict_to_summary(eval_dir, {
+        'loss': loss,
+        'accuracy': accuracy,
+    }, steps)
+
+
+class ReadEvalMetricsTest(test.TestCase):
+
+  def test_read_eval_metrics(self):
+    eval_dir = tempfile.mkdtemp()
+    _write_events(
+        eval_dir,
+        [
+            # steps, loss, accuracy
+            (1000, 1, 2),
+            (2000, 3, 4),
+            (3000, 5, 6),
+        ])
+    self.assertEqual({
+        1000: {
+            'loss': 1,
+            'accuracy': 2
+        },
+        2000: {
+            'loss': 3,
+            'accuracy': 4
+        },
+        3000: {
+            'loss': 5,
+            'accuracy': 6
+        },
+    }, early_stopping.read_eval_metrics(eval_dir))
+
+  def test_read_eval_metrics_when_no_events(self):
+    eval_dir = tempfile.mkdtemp()
+    self.assertTrue(os.path.exists(eval_dir))
+
+    # No error should be raised when eval directory exists with no event files.
+    self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir))
+
+    os.rmdir(eval_dir)
+    self.assertFalse(os.path.exists(eval_dir))
+
+    # No error should be raised when eval directory does not exist.
+    self.assertEqual({}, early_stopping.read_eval_metrics(eval_dir))
+
+
+class EarlyStoppingHooksTest(test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    config = _FakeRunConfig(is_chief=True)
+    self._estimator = _FakeEstimator(config=config)
+    eval_dir = self._estimator.eval_dir()
+    os.makedirs(eval_dir)
+    _write_events(
+        eval_dir,
+        [
+            # steps, loss, accuracy
+            (1000, 0.8, 0.5),
+            (2000, 0.7, 0.6),
+            (3000, 0.4, 0.7),
+            (3500, 0.41, 0.68),
+        ])
+
+  def run_session(self, hooks, should_stop):
+    hooks = hooks if isinstance(hooks, list) else [hooks]
+    with ops.Graph().as_default():
+      training_util.create_global_step()
+      no_op = control_flow_ops.no_op()
+      with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess:
+        mon_sess.run(no_op)
+        self.assertEqual(mon_sess.should_stop(), should_stop)
+
+  @parameterized.parameters((0.8, 0, False), (0.6, 4000, False), (0.6, 0, True))
+  def test_stop_if_higher_hook(self, threshold, min_steps, should_stop):
+    self.run_session(
+        early_stopping.stop_if_higher_hook(
+            self._estimator,
+            metric_name='accuracy',
+            threshold=threshold,
+            min_steps=min_steps), should_stop)
+
+  @parameterized.parameters((0.3, 0, False), (0.5, 4000, False), (0.5, 0, True))
+  def test_stop_if_lower_hook(self, threshold, min_steps, should_stop):
+    self.run_session(
+        early_stopping.stop_if_lower_hook(
+            self._estimator,
+            metric_name='loss',
+            threshold=threshold,
+            min_steps=min_steps), should_stop)
+
+  @parameterized.parameters((1500, 0, False), (500, 4000, False),
+                            (500, 0, True))
+  def test_stop_if_no_increase_hook(self, max_steps, min_steps, should_stop):
+    self.run_session(
+        early_stopping.stop_if_no_increase_hook(
+            self._estimator,
+            metric_name='accuracy',
+            max_steps_without_increase=max_steps,
+            min_steps=min_steps), should_stop)
+
+  @parameterized.parameters((1500, 0, False), (500, 4000, False),
+                            (500, 0, True))
+  def test_stop_if_no_decrease_hook(self, max_steps, min_steps, should_stop):
+    self.run_session(
+        early_stopping.stop_if_no_decrease_hook(
+            self._estimator,
+            metric_name='loss',
+            max_steps_without_decrease=max_steps,
+            min_steps=min_steps), should_stop)
+
+  @parameterized.parameters((1500, 0.3, False), (1500, 0.5, True),
+                            (500, 0.3, True))
+  def test_multiple_hooks(self, max_steps, loss_threshold, should_stop):
+    self.run_session([
+        early_stopping.stop_if_no_decrease_hook(
+            self._estimator,
+            metric_name='loss',
+            max_steps_without_decrease=max_steps),
+        early_stopping.stop_if_lower_hook(
+            self._estimator, metric_name='loss', threshold=loss_threshold)
+    ], should_stop)
+
+  @parameterized.parameters(False, True)
+  def test_make_early_stopping_hook(self, should_stop):
+    self.run_session([
+        early_stopping.make_early_stopping_hook(
+            self._estimator, should_stop_fn=lambda: should_stop)
+    ], should_stop)
+
+  def test_make_early_stopping_hook_typeerror(self):
+    with self.assertRaises(TypeError):
+      early_stopping.make_early_stopping_hook(
+          estimator=object(), should_stop_fn=lambda: True)
+
+  def test_make_early_stopping_hook_valueerror(self):
+    with self.assertRaises(ValueError):
+      early_stopping.make_early_stopping_hook(
+          self._estimator,
+          should_stop_fn=lambda: True,
+          run_every_secs=60,
+          run_every_steps=100)
+
+
+class StopOnPredicateHookTest(test.TestCase):
+
+  def test_stop(self):
+    hook = early_stopping._StopOnPredicateHook(
+        should_stop_fn=lambda: False, run_every_secs=0)
+    with ops.Graph().as_default():
+      training_util.create_global_step()
+      no_op = control_flow_ops.no_op()
+      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.run(no_op)
+        self.assertFalse(mon_sess.should_stop())
+        self.assertFalse(mon_sess.raw_session().run(hook._stop_var))
+
+    hook = early_stopping._StopOnPredicateHook(
+        should_stop_fn=lambda: True, run_every_secs=0)
+    with ops.Graph().as_default():
+      training_util.create_global_step()
+      no_op = control_flow_ops.no_op()
+      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.run(no_op)
+        self.assertTrue(mon_sess.should_stop())
+        self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
+
+
+class CheckForStoppingHookTest(test.TestCase):
+
+  def test_stop(self):
+    hook = early_stopping._CheckForStoppingHook()
+    with ops.Graph().as_default():
+      no_op = control_flow_ops.no_op()
+      assign_op = state_ops.assign(early_stopping._get_or_create_stop_var(),
+                                   True)
+      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.run(no_op)
+        self.assertFalse(mon_sess.should_stop())
+        mon_sess.run(assign_op)
+        self.assertTrue(mon_sess.should_stop())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/export.py b/tensorflow/contrib/estimator/python/estimator/export.py
index 4f3fe9c0dd..b0deb9b494 100644
--- a/tensorflow/contrib/estimator/python/estimator/export.py
+++ b/tensorflow/contrib/estimator/python/estimator/export.py
@@ -12,21 +12,212 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""export python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Wrapper for methods to export train/eval graphs from Estimator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import export
+from tensorflow.python.estimator import model_fn as model_fn_lib
+
+
+def export_saved_model_for_mode(
+    estimator, export_dir_base, input_receiver_fn,
+    assets_extra=None,
+    as_text=False,
+    checkpoint_path=None,
+    strip_default_attrs=False,
+    mode=model_fn_lib.ModeKeys.PREDICT):
+  # pylint: disable=line-too-long
+  """Exports a single train/eval/predict graph as a SavedModel.
+
+  For a detailed guide, see [Using SavedModel with Estimators](
+  https://tensorflow.org/guide/saved_model#using_savedmodel_with_estimators).
+
+  Sample usage:
+  ```python
+  classifier = tf.estimator.LinearClassifier(
+      feature_columns=[age, language])
+  classifier.train(input_fn=input_fn, steps=1000)
+
+  feature_spec = {
+      'age': tf.placeholder(dtype=tf.int64),
+      'language': array_ops.placeholder(dtype=tf.string)
+  }
+  label_spec = tf.placeholder(dtype=dtypes.int64)
+
+  train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
+      feature_spec, label_spec)
+
+  export_dir = tf.contrib.estimator.export_saved_model_for_mode(
+      classifier,
+      export_dir_base='my_model/',
+      input_receiver_fn=train_rcvr_fn,
+      mode=model_fn_lib.ModeKeys.TRAIN)
+
+  # export_dir is a timestamped directory with the SavedModel, which
+  # can be used for serving, analysis with TFMA, or directly loaded in.
+  with ops.Graph().as_default() as graph:
+    with session.Session(graph=graph) as sess:
+      loader.load(sess, [tag_constants.TRAINING], export_dir)
+      weights = graph.get_tensor_by_name(''linear/linear_model/age/weights')
+      ...
+  ```
+
+  This method is a wrapper for _export_all_saved_models, and wraps a raw
+  input_receiver_fn in a dictionary to pass in to that function.
+  See _export_all_saved_models for full docs.
+
+  See tf.contrib.estimator.export_saved_model_for_mode for the currently
+  exposed version of this function.
+
+  Args:
+    estimator: an instance of tf.estimator.Estimator
+    export_dir_base: A string containing a directory in which to create
+      timestamped subdirectories containing exported SavedModels.
+    input_receiver_fn: a function that takes no argument and
+      returns the appropriate subclass of `InputReceiver`.
+    assets_extra: A dict specifying how to populate the assets.extra directory
+      within the exported SavedModel, or `None` if no extra assets are needed.
+    as_text: whether to write the SavedModel proto in text format.
+    checkpoint_path: The checkpoint path to export.  If `None` (the default),
+      the most recent checkpoint found within the model directory is chosen.
+    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+      removed from the NodeDefs. For a detailed guide, see
+      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+    mode: tf.estimator.ModeKeys value indicating with mode will be exported.
+
+  Returns:
+    The string path to the exported directory.
+
+  Raises:
+    ValueError: if input_receiver_fn is None, no export_outputs
+      are provided, or no checkpoint can be found.
+  """
+  # pylint: enable=line-too-long
+
+  # pylint: disable=protected-access
+  return estimator._export_saved_model_for_mode(
+      export_dir_base, input_receiver_fn,
+      assets_extra=assets_extra,
+      as_text=as_text,
+      checkpoint_path=checkpoint_path,
+      strip_default_attrs=strip_default_attrs,
+      mode=mode)
+  # pylint: enable=protected-access
+
+
+def export_all_saved_models(
+    estimator, export_dir_base, input_receiver_fn_map,
+    assets_extra=None,
+    as_text=False,
+    checkpoint_path=None,
+    strip_default_attrs=False):
+  # pylint: disable=line-too-long
+  """Exports requested train/eval/predict graphs as separate SavedModels.
+
+  See tf.contrib.estimator.export_all_saved_models for the currently
+  exposed version of this function.
+
+  For each mode passed in via the input_receiver_fn_map,
+  this method builds a new graph by calling the input_receiver_fn to obtain
+  feature and label `Tensor`s. Next, this method calls the `Estimator`'s
+  model_fn in the passed mode to generate the model graph based on
+  those features and labels, and restores the given checkpoint
+  (or, lacking that, the most recent checkpoint) into the graph.
+  Only one of the modes is used for saving variables to the SavedModel
+  (order of preference: TRAIN, EVAL, then PREDICT), such that up to three
+  MetaGraphDefs are saved with a single set of variables in a single
+  SavedModel directory.
+
+  For prediction, the exported `MetaGraphDef` will provide one `SignatureDef`
+  for each element of the export_outputs dict returned from the model_fn,
+  named using the same keys.  One of these keys is always
+  signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, indicating which
+  signature will be served when a serving request does not specify one.
+  For each signature, the outputs are provided by the corresponding
+  `ExportOutput`s, and the inputs are always the input receivers provided by
+  the serving_input_receiver_fn.
+
+  For training and evaluation, the train_op is stored in an extra collection,
+  and loss, metrics, and predictions are included in a SignatureDef for the
+  mode in question.
+
+  Extra assets may be written into the SavedModel via the assets_extra
+  argument.  This should be a dict, where each key gives a destination path
+  (including the filename) relative to the assets.extra directory.  The
+  corresponding value gives the full path of the source file to be copied.
+  For example, the simple case of copying a single file without renaming it
+  is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+
+  Sample usage:
+  ```python
+  classifier = tf.estimator.LinearClassifier(
+      feature_columns=[age, language])
+  classifier.train(input_fn=input_fn)
+
+  feature_spec = {
+      'age': tf.placeholder(dtype=tf.int64),
+      'language': array_ops.placeholder(dtype=tf.string)
+  }
+  label_spec = tf.placeholder(dtype=dtypes.int64)
+
+  train_rcvr_fn = tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
+      feature_spec, label_spec)
+
+  serve_rcvr_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
+      feature_spec)
+
+  rcvr_fn_map = {
+      model_fn_lib.ModeKeys.TRAIN: train_rcvr_fn,
+      model_fn_lib.ModeKeys.PREDICT: serve_rcvr_fn,
+  }
+
+  export_dir = tf.contrib.estimator.export_all_saved_models(
+      classifier,
+      export_dir_base='my_model/',
+      input_receiver_fn_map=rcvr_fn_map)
+
+  # export_dirs is a dict of directories with SavedModels, which
+  # can be used for serving, analysis with TFMA, or directly loaded in.
+  with ops.Graph().as_default() as graph:
+    with session.Session(graph=graph) as sess:
+      loader.load(sess, [tag_constants.TRAINING], export_dir)
+      weights = graph.get_tensor_by_name('linear/linear_model/age/weights')
+      ...
+  ```
+
+  Args:
+    estimator: an instance of tf.estimator.Estimator
+    export_dir_base: A string containing a directory in which to create
+      timestamped subdirectories containing exported SavedModels.
+    input_receiver_fn_map: dict of tf.estimator.ModeKeys to input_receiver_fn
+      mappings, where the input_receiver_fn is a function that takes no
+      argument and returns the appropriate subclass of `InputReceiver`.
+    assets_extra: A dict specifying how to populate the assets.extra directory
+      within the exported SavedModel, or `None` if no extra assets are needed.
+    as_text: whether to write the SavedModel proto in text format.
+    checkpoint_path: The checkpoint path to export.  If `None` (the default),
+      the most recent checkpoint found within the model directory is chosen.
+    strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+      removed from the NodeDefs. For a detailed guide, see
+      [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+
+  Returns:
+    A dict of tf.estimator.ModeKeys value to string path for each exported
+    directory.
 
-# Include attrs that start with single underscore.
-export.__all__ = [s for s in dir(export) if not s.startswith('__')]
+  Raises:
+    ValueError: if any input_receiver_fn is None, no export_outputs
+      are provided, or no checkpoint can be found.
+  """
+  # pylint: enable=line-too-long
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.export import *
+  # pylint: disable=protected-access
+  return estimator._export_all_saved_models(
+      export_dir_base, input_receiver_fn_map,
+      assets_extra=assets_extra,
+      as_text=as_text,
+      checkpoint_path=checkpoint_path,
+      strip_default_attrs=strip_default_attrs)
+  # pylint: enable=protected-access
diff --git a/tensorflow/contrib/estimator/python/estimator/export_test.py b/tensorflow/contrib/estimator/python/estimator/export_test.py
new file mode 100644
index 0000000000..050821ee67
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/export_test.py
@@ -0,0 +1,373 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for contrib wrapping of export_saved_model_for_mode functionality.
+
+These are direct copies of the tests included in core, with import locations
+changed. These should be removed when the functionality in core is part of the
+public API.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+
+from tensorflow.contrib.estimator.python.estimator import export as contrib_export
+from tensorflow.python.client import session
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import loader
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.training import training
+from tensorflow.python.util import compat
+
+
+def _model_fn_for_export_tests(features, labels, mode):
+  _, _ = features, labels
+  variables.Variable(1., name='weight')
+  scores = constant_op.constant([3.])
+  classes = constant_op.constant(['wumpus'])
+  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
+  with ops.control_dependencies([update_global_step]):
+    train_op = constant_op.constant(2.)
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      predictions=constant_op.constant(10.),
+      loss=constant_op.constant(1.),
+      train_op=train_op,
+      export_outputs={
+          'test': export_output.ClassificationOutput(scores, classes)})
+
+
+def _x_y_input_fn():
+  return ({'x': constant_op.constant([[1], [1]]),
+           'y': constant_op.constant([[2], [2]])},
+          constant_op.constant([[1], [1]]))
+
+
+def _model_fn_with_x_y(features, labels, mode):
+  _ = labels
+  variables.Variable(1., name='weight')
+  scores = constant_op.constant([3.])
+  classes = constant_op.constant(['wumpus'])
+  if mode == model_fn_lib.ModeKeys.PREDICT:
+    variables.Variable(36., name='name_collision')
+    return model_fn_lib.EstimatorSpec(
+        mode,
+        predictions=constant_op.constant(10.),
+        export_outputs={
+            'test': export_output.ClassificationOutput(scores, classes)})
+  else:
+    prefix = 'eval_' if mode == model_fn_lib.ModeKeys.EVAL else ''
+
+    multiplied = math_ops.multiply(
+        features['x'], features['y'], name='{}multiplied'.format(prefix))
+    metrics = {'mean': metrics_lib.mean(features['x'] - features['y'],
+                                        name='{}mean'.format(prefix))}
+    variables.Variable(1., name='later_var')
+    variables.Variable(3., name='name_collision')
+    return model_fn_lib.EstimatorSpec(
+        mode,
+        predictions=multiplied,
+        loss=constant_op.constant(1.),
+        train_op=state_ops.assign_add(training.get_global_step(), 1),
+        eval_metric_ops=metrics)
+
+
+def _get_serving_input_receiver_fn():
+  feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                  'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+  return export.build_parsing_serving_input_receiver_fn(feature_spec)
+
+
+def _get_supervised_input_receiver_fn():
+  feature_spec = {
+      'x': array_ops.placeholder(
+          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
+      'y': array_ops.placeholder(
+          dtype=dtypes.int64, shape=(2, 1), name='feature_y')
+      }
+  label_spec = array_ops.placeholder(
+      dtype=dtypes.float32, shape=[1], name='truth')
+
+  return export.build_raw_supervised_input_receiver_fn(
+      feature_spec, label_spec)
+
+
+class EstimatorExportTest(test.TestCase):
+
+  def test_export_saved_model_train(self):
+    self._test_export_saved_model_for_mode(
+        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.TRAIN)
+
+  def test_export_saved_model_eval(self):
+    self._test_export_saved_model_for_mode(
+        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.EVAL)
+
+  def test_export_saved_model_predict(self):
+    self._test_export_saved_model_for_mode(
+        _get_serving_input_receiver_fn(), model_fn_lib.ModeKeys.PREDICT)
+
+  def _test_export_saved_model_for_mode(self, input_receiver_fn, mode):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=_x_y_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = contrib_export.export_saved_model_for_mode(
+        est, export_dir_base, input_receiver_fn, mode=mode)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self._validate_exported_files(export_dir)
+
+    # Restore, to validate that the export was well-formed.
+    tag_set = model_fn_lib.EXPORT_TAG_MAP[mode]
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, tag_set, export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertFalse('name_collision_1' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_receiver_map(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('input_example_tensor' in graph_ops)
+        self.assertTrue('ParseExample/ParseExample' in graph_ops)
+        self.assertFalse('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_train_only(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('multiplied' in graph_ops)
+        self.assertTrue('mean/update_op' in graph_ops)
+        self.assertFalse('eval_multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_eval_only(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.EVAL], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('eval_multiplied' in graph_ops)
+        self.assertTrue('eval_mean/value' in graph_ops)
+        self.assertFalse('multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_no_serving(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('multiplied' in graph_ops)
+        self.assertFalse('eval_multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.EVAL], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('eval_multiplied' in graph_ops)
+        self.assertFalse('multiplied' in graph_ops)
+        # TODO(karmel): is this the desired behavior when names are shared?
+        self.assertTrue('feature_x_1' in graph_ops)
+        self.assertTrue('feature_y_1' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_three_defs(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    # Restore, to validate that the export was well-formed.
+    for tag_set in model_fn_lib.EXPORT_TAG_MAP.values():
+      with ops.Graph().as_default() as graph:
+        with session.Session(graph=graph) as sess:
+          loader.load(sess, tag_set, export_dir)
+          graph_ops = [x.name for x in graph.get_operations()]
+          self.assertTrue('global_step/Assign' in graph_ops)
+          self.assertTrue('global_step/Initializer/zeros' in graph_ops)
+          self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_all_vars(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('later_var' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertFalse('later_var' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_name_collision(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('name_collision' in graph_ops)
+        self.assertFalse('name_collision_1' in graph_ops)
+        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+        self.assertEqual(3, collection_vars[-1].eval())
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('name_collision' in graph_ops)
+        self.assertFalse('name_collision_1' in graph_ops)
+        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+        # This is a non-obvious detail: when we load the estimator spec
+        # for predict, name_collision gets set to 36. However, we then restore
+        # from checkpoint, which should overwrite that var and make it the 3
+        # from training. In practice, this would not be a good way to write
+        # a model_fn, but leaving this check in for now to ensure consistency
+        # with what would happen given our current order of spec, then
+        # checkpoint.
+        self.assertEqual(3, collection_vars[-1].eval())
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def _test_export_all_saved_models(self, input_receiver_fn_map):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_with_x_y)
+    est.train(input_fn=_x_y_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = contrib_export.export_all_saved_models(
+        est, export_dir_base, input_receiver_fn_map)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+
+    self._validate_exported_files(export_dir)
+
+    return export_dir, tmpdir
+
+  def _validate_exported_files(self, export_dir):
+    self.assertTrue(gfile.Exists(export_dir))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('saved_model.pb'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.index'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.data-00000-of-00001'))))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/exporter.py b/tensorflow/contrib/estimator/python/estimator/exporter.py
index 33d0314905..09d7440605 100644
--- a/tensorflow/contrib/estimator/python/estimator/exporter.py
+++ b/tensorflow/contrib/estimator/python/estimator/exporter.py
@@ -12,21 +12,269 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""exporter python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Implements StepsExporter to export the model in user specified steps."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import exporter
+import os
+
+from tensorflow.python.estimator import exporter
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.summary import summary_iterator
+
+DEFAULT_GLOBAL_STEP_KEY = ops.GraphKeys.GLOBAL_STEP
+
+
+class StepsExporter(exporter.Exporter):
+  """This class exports the model in user specified steps.
+
+  This class exports the model at the steps given by the `steps_to_keep`
+  argument. Each number in the list is treated as a lower bound for model
+  exports, to handle the case when evaluation is performed at different steps.
+
+  Consider this example:
+
+  ```
+  steps_to_keep = [1, 2, 3, 6, 7, 10, 12, 25]
+  ```
+
+  The model is evaluated at step increments of 5: `[5, 10, 15, 20, 25, 30]`.
+  The `StepsExporter` will export the model when it has reached steps
+  `[5, 10, 15, 25]`.
+
+  This example illustrates the two cases when the model is exported:
+
+  1. Model is evaluated on a step defined in the list `steps_to_keep`.
+
+     In the example, the model is exported on step `10` and `25`.
+
+  2. Model is evaluated on a step not defined in the list `steps_to_keep`, but
+     is still exported because a step in `steps_to_keep` was missed.
+
+     In the example, when the model reaches step `5`, the model is exported even
+     though  `steps_to_keep` does not contain `5`. Step `5` is exported to make
+     up for step `3`, which was missed. Steps `1` and `2` in `steps_to_keep` are
+     skipped completely (e.g. say the model is evaluated at step `6`. It will
+     **not** be exported to make up for step `2`).
+
+  Using the `steps_to_keep` list as a lower bound allows users to define
+  approximate step boundaries for exporting their models, and avoid frustrating
+  off-by-one calculation errors.
+
+  Sample Use Cases:
+    There are specific points during the training when having a saved version of
+    the model would be useful. One example is at the end of each training phase
+    when the set of freezed weights is changed.
+    Another good use case is saving the model at the end of each epoch for
+    visualization or retraining.
+  """
+
+  def __init__(self,
+               steps_to_keep,
+               name='steps_exporter',
+               serving_input_receiver_fn=None,
+               event_file_pattern='eval/*.tfevents.*',
+               assets_extra=None,
+               as_text=False):
+    """Create an `StepsExporter` to use with `tf.estimator.EvalSpec`.
+
+    Example of creating a StepsExporter for training and evaluation:
+
+    ```python
+    categorical_feature_a = categorical_column_with_hash_bucket(...)
+    categorical_feature_b = categorical_column_with_hash_bucket(...)
+
+    categorical_feature_a_emb = embedding_column(
+        categorical_column=categorical_feature_a, ...)
+    categorical_feature_b_emb = embedding_column(
+        categorical_column=categorical_feature_b, ...)
+
+    estimator = tf.estimator.DNNClassifier(
+        feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+        hidden_units=[1024, 512, 256])
+
+    # Input pipeline for train and evaluate.
+    def train_input_fn: # returns x, y
+      # please shuffle the data.
+      pass
+    def eval_input_fn_eval: # returns x, y
+      pass
+
+    exporter = tf.contrib.estimator.exporter.StepsExporter(
+        name="steps_exporter",
+        serving_input_receiver_fn=serving_input_receiver_fn,
+        event_file_pattern='eval/*.tfevents.*'
+        steps_to_keep=[...])
+
+    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000)
+
+    eval_spec = [tf.estimator.EvalSpec(
+      input_fn=eval_input_fn,
+      steps=1,
+      exporters=exporter,
+      start_delay_secs=0,
+      throttle_secs=5)]
+
+    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
+
+    # Models will be exported to estimator.model_dir in timestamped directories,
+    # which can be used for serving, analysis with TFMA, or directly loaded in.
+    # For example:
+    export_dir = os.path.join(estimator.model_dir,
+                              <timestamped directory name>)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        tf.saved_model.loader.load(
+            sess, [tf.saved_model.tag_constants.SERVING], export_dir)
+
+    ```
+
+    Args:
+      steps_to_keep: Non-empty list of positive integers containing
+        the step numbers at which the model should be exported. All the exports
+        will be kept, so there is no garbage collection.
+      name: Unique name of this `Exporter` that is going to be used in the
+        export path.
+      serving_input_receiver_fn: A function that takes no arguments and returns
+        a `ServingInputReceiver`.
+      event_file_pattern: Event file name pattern relative to model_dir. If
+        None, however, the exporter would not be preemption-safe. To be
+        preemption-safe, event_file_pattern should be specified.
+      assets_extra: An optional dict specifying how to populate the assets.extra
+        directory within the exported SavedModel.  Each key should give the
+        destination path (including the filename) relative to the assets.extra
+        directory.  The corresponding value gives the full path of the source
+        file to be copied.  For example, the simple case of copying a single
+        file without renaming it is specified as `{'my_asset_file.txt':
+        '/path/to/my_asset_file.txt'}`.
+      as_text: Whether to write the SavedModel proto in text format. Defaults to
+        `False`.
+
+    Raises:
+      ValueError: If any arguments is invalid.
+    """
+    # pylint: disable=protected-access
+    self._saved_model_exporter = exporter._SavedModelExporter(
+        name, serving_input_receiver_fn, assets_extra, as_text)
+    # pylint: enable=protected-access
+
+    self._event_file_pattern = event_file_pattern
+    self._model_dir = None
+
+    self._input_steps_to_keep = steps_to_keep
+    steps_to_keep = [step for step in steps_to_keep if isinstance(step, int)]
+    steps_to_keep = [step for step in steps_to_keep if step > 0]
+    if not steps_to_keep:
+      raise ValueError(
+          '`steps_to_keep` list must have at least one positive integer')
+    elif self._input_steps_to_keep != steps_to_keep:
+      tf_logging.warn('Changed `steps_to_keep`, by omitting non-integer or'
+                      ' less than 1 elements, to [%s]',
+                      ', '.join(str(step) for step in steps_to_keep))
+    self._steps_to_keep = sorted(steps_to_keep)
+    self._steps_kept = []
+
+  @property
+  def name(self):
+    return self._saved_model_exporter.name
+
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    """Exports the given Estimator to a specific format.
+
+    Args:
+      estimator: A `tf.estimator.Estimator` instance to export.
+      export_path: A string containing a directory where to write the export.
+      checkpoint_path: The checkpoint path to export.
+      eval_result: The output of Estimator.evaluate on this checkpoint.
+      is_the_final_export: This boolean is True when this is an export in the
+        end of training. It is False for the intermediate exports during the
+        training. When passing Exporter to tf.estimator.train_and_evaluate
+        is_the_final_export is always False if TrainSpec.max_steps is None.
+
+    Returns:
+      The string path to the exported directory or None if export is skipped.
+
+    Raises:
+      ValueError: If `eval_result` is None or doesn't have
+        `ops.GraphKeys.GLOBAL_STEP` as a key.
+    """
+    export_result = None
+
+    if not eval_result or DEFAULT_GLOBAL_STEP_KEY not in eval_result:
+      raise ValueError(
+          '`eval_result` is empty, or does not have global step. This'
+          ' should never happen as Estimator always sets the global step in '
+          '`eval_result`. Please file a bug report. Got eval_result: %s'
+          % str(eval_result))
+
+    if self._model_dir != estimator.model_dir and self._event_file_pattern:
+      tf_logging.info('Loads the steps that the model was already evaluated at,'
+                      'from event files')
+      self._model_dir = estimator.model_dir
+      full_event_file_pattern = os.path.join(self._model_dir,
+                                             self._event_file_pattern)
+      self._steps_kept = self._get_kept_steps(full_event_file_pattern)
+
+      if self._steps_kept:
+        self._steps_kept = sorted(self._steps_kept)
+        self._steps_to_keep = [step for step in self._steps_to_keep if
+                               step > self._steps_kept[-1]]
+    # It is assumed that the model is exported at any evaluated step 'n' if
+    # there is any `steps_missed` lower than 'n'. As a result, all the steps in
+    # `_steps_to_keep` lower than the last evaluated step will be removed.
+    steps_missed = [step for step in self._steps_to_keep
+                    if step <= eval_result[DEFAULT_GLOBAL_STEP_KEY]]
+
+    if steps_missed:
+      # update the `_steps_to_keep` list by omitting all steps smaller than the
+      # current global step which are missed to be exported
+      export_result = self._saved_model_exporter.export(estimator, export_path,
+                                                        checkpoint_path,
+                                                        eval_result,
+                                                        is_the_final_export)
+      self._steps_to_keep = [step for step in self._steps_to_keep if step
+                             not in steps_missed]
+      # contains all the steps in which export has happened.
+      self._steps_kept.append(eval_result[DEFAULT_GLOBAL_STEP_KEY])
+      # Show warning for all the missed steps except the last one
+      if steps_missed[:-1]:
+        tf_logging.warn('Missed steps [%s] for exporting, as no evaluation'
+                        ' took place at them.', ', '.join(str(step) for step in
+                                                          steps_missed[:-1]))
+      # Log model export if the last missed step is the same as the current step
+      if steps_missed[-1] == eval_result[DEFAULT_GLOBAL_STEP_KEY]:
+        tf_logging.info('Performing model export at step %d.',
+                        eval_result[DEFAULT_GLOBAL_STEP_KEY])
+      # Show warning for exporting model at another step instead of the user
+      #   specified one
+      else:
+        tf_logging.warn('Performing model export at step %d instead of %d, as'
+                        ' no evaluation took place at step %d.',
+                        eval_result[DEFAULT_GLOBAL_STEP_KEY], steps_missed[-1],
+                        steps_missed[-1])
+    return export_result
+
+  def _get_kept_steps(self, event_files):
+    """Get the steps that the model was evaluated at, from event files.
+
+    Args:
+      event_files: Absolute pattern of event files.
 
-# Include attrs that start with single underscore.
-exporter.__all__ = [s for s in dir(exporter) if not s.startswith('__')]
+    Returns:
+      steps_kept: A list of steps in which the model was evaluated.
+    """
+    if not event_files:
+      return None
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.exporter import *
+    steps_kept = []
+    for event_file in gfile.Glob(os.path.join(event_files)):
+      for event in summary_iterator.summary_iterator(event_file):
+        if event.step not in steps_kept:
+          steps_kept.append(event.step)
+    return steps_kept
diff --git a/tensorflow/contrib/estimator/python/estimator/exporter_test.py b/tensorflow/contrib/estimator/python/estimator/exporter_test.py
new file mode 100644
index 0000000000..0d009b945e
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/exporter_test.py
@@ -0,0 +1,206 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `StepsExporter`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+import tempfile
+
+from tensorflow.contrib.estimator.python.estimator import exporter as exporter_lib
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+
+
+class StepsExporterTest(test.TestCase):
+
+  def test_error_out_if_steps_to_keep_has_no_positive_integers(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    with self.assertRaisesRegexp(ValueError, "positive integer"):
+      exporter = exporter_lib.StepsExporter(
+          name="specified_steps_exporter",
+          serving_input_receiver_fn=_serving_input_receiver_fn,
+          steps_to_keep=[-1, 0, 1.1])
+      self.assertEqual("specified_steps_exporter", exporter.name)
+
+  def test_steps_exporter(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    exporter = exporter_lib.StepsExporter(
+        name="steps_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        steps_to_keep=[1])
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+    estimator.model_dir = export_dir_base
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 1},
+                                    False)
+
+    self.assertEqual("export_result_path", export_result)
+    estimator.export_savedmodel.assert_called_with(
+        export_dir_base,
+        _serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
+
+    shutil.rmtree(export_dir_base, ignore_errors=True)
+
+  def test_steps_exporter_with_preemption(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
+    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1)
+    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 2)
+
+    exporter = exporter_lib.StepsExporter(
+        name="steps_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        event_file_pattern="eval_continuous/*.tfevents.*",
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        steps_to_keep=[1, 2, 6, 8])
+
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.model_dir = export_dir_base
+    estimator.export_savedmodel.return_value = "export_result_path"
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 3},
+                                    False)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 6},
+                                    False)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 7},
+                                    False)
+    self.assertEqual(None, export_result)
+
+    shutil.rmtree(export_dir_base, ignore_errors=True)
+
+  def test_specified_step_is_saved(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    exporter = exporter_lib.StepsExporter(
+        name="steps_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        steps_to_keep=[1, 5, 8, 10, 11])
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+    estimator.model_dir = export_dir_base
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 1},
+                                    False)
+
+    self.assertTrue(estimator.export_savedmodel.called)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 2},
+                                    False)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 5},
+                                    False)
+    self.assertTrue(estimator.export_savedmodel.called)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 10},
+                                    False)
+    self.assertTrue(estimator.export_savedmodel.called)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 15},
+                                    False)
+    self.assertTrue(estimator.export_savedmodel.called)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"global_step": 20},
+                                    False)
+    self.assertEqual(None, export_result)
+
+    shutil.rmtree(export_dir_base, ignore_errors=True)
+
+  def test_steps_exporter_with_no_global_step_key(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    exporter = exporter_lib.StepsExporter(
+        name="steps_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        steps_to_keep=[1])
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+    estimator.model_dir = export_dir_base
+
+    with self.assertRaisesRegexp(ValueError, "does not have global step"):
+      exporter.export(estimator, export_dir_base, "checkpoint_path", {}, False)
+
+    shutil.rmtree(export_dir_base, ignore_errors=True)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py
index ca5494db56..e3c44bea66 100644
--- a/tensorflow/contrib/estimator/python/estimator/extenders.py
+++ b/tensorflow/contrib/estimator/python/estimator/extenders.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,346 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""extenders python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Extenders of tf.estimator.Estimator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import extenders
+import six
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export.export_output import PredictOutput
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.util import function_utils
+
+
+_VALID_METRIC_FN_ARGS = set(['features', 'labels', 'predictions', 'config'])
+
+
+def add_metrics(estimator, metric_fn):
+  """Creates a new `tf.estimator.Estimator` which has given metrics.
+
+  Example:
+
+  ```python
+    def my_auc(labels, predictions):
+      return {'auc': tf.metrics.auc(labels, predictions['logistic'])}
+
+    estimator = tf.estimator.DNNClassifier(...)
+    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
+    estimator.train(...)
+    estimator.evaluate(...)
+  ```
+  Example usage of custom metric which uses features:
+
+  ```python
+    def my_auc(features, labels, predictions):
+      return {'auc': tf.metrics.auc(
+        labels, predictions['logistic'], weights=features['weight'])}
+
+    estimator = tf.estimator.DNNClassifier(...)
+    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
+    estimator.train(...)
+    estimator.evaluate(...)
+  ```
+
+  Args:
+    estimator: A `tf.estimator.Estimator` object.
+    metric_fn: A function which should obey the following signature:
+      - Args: can only have following four arguments in any order:
+        * predictions: Predictions `Tensor` or dict of `Tensor` created by given
+          `estimator`.
+        * features: Input `dict` of `Tensor` objects created by `input_fn` which
+          is given to `estimator.evaluate` as an argument.
+        * labels:  Labels `Tensor` or dict of `Tensor` created by `input_fn`
+          which is given to `estimator.evaluate` as an argument.
+        * config: config attribute of the `estimator`.
+       - Returns:
+         Dict of metric results keyed by name. Final metrics are a union of this
+         and `estimator's` existing metrics. If there is a name conflict between
+         this and `estimator`s existing metrics, this will override the existing
+         one. The values of the dict are the results of calling a metric
+         function, namely a `(metric_tensor, update_op)` tuple.
+
+  Returns:
+      A new `tf.estimator.Estimator` which has a union of original metrics with
+        given ones.
+  """
+  _verify_metric_fn_args(metric_fn)
+
+  def new_model_fn(features, labels, mode, config):
+    spec = estimator.model_fn(features, labels, mode, config)
+    if mode != model_fn_lib.ModeKeys.EVAL:
+      return spec
+    new_metrics = _call_metric_fn(metric_fn, features, labels, spec.predictions,
+                                  config)
+    all_metrics = spec.eval_metric_ops or {}
+    all_metrics.update(new_metrics)
+    return spec._replace(eval_metric_ops=all_metrics)
+
+  return estimator_lib.Estimator(
+      model_fn=new_model_fn,
+      model_dir=estimator.model_dir,
+      config=estimator.config,
+      # pylint: disable=protected-access
+      warm_start_from=estimator._warm_start_settings)
+      # pylint: enable=protected-access
+
+
+def clip_gradients_by_norm(optimizer, clip_norm):
+  """Returns an optimizer which clips gradients before applying them.
+
+  Example:
+
+  ```python
+  optimizer = tf.train.ProximalAdagradOptimizer(
+      learning_rate=0.1,
+      l1_regularization_strength=0.001)
+  optimizer = tf.contrib.estimator.clip_gradients_by_norm(
+      optimizer, clip_norm)
+  estimator = tf.estimator.DNNClassifier(
+      feature_columns=[...],
+      hidden_units=[1024, 512, 256],
+      optimizer=optimizer)
+  ```
+
+  Args:
+    optimizer: An `tf.Optimizer` object to apply gradients.
+    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
+
+  Returns:
+    A `tf.Optimizer`.
+  """
+
+  def clip_grads(grads_and_vars):
+    gradients, variables = zip(*grads_and_vars)
+    gradients = clip_ops.clip_by_global_norm(gradients, clip_norm)[0]
+    grads_and_vars = list(zip(gradients, variables))
+    return grads_and_vars
+
+  return _TransformGradients(
+      optimizer=optimizer,
+      transform_grads_fn=clip_grads,
+      name='ClipByNorm' + optimizer.get_name())
+
+
+def forward_features(estimator, keys=None, sparse_default_values=None):
+  """Forward features to predictions dictionary.
+
+  In some cases, user wants to see some of the features in estimators prediction
+  output. As an example, consider a batch prediction service: The service simply
+  runs inference on the users graph and returns the results. Keys are essential
+  because there is no order guarantee on the outputs so they need to be rejoined
+  to the inputs via keys or transclusion of the inputs in the outputs.
+  Example:
+  ```python
+    def input_fn():
+      features, labels = ...
+      features['unique_example_id'] = ...
+      features, labels
+    estimator = tf.estimator.LinearClassifier(...)
+    estimator = tf.contrib.estimator.forward_features(
+        estimator, 'unique_example_id')
+    estimator.train(...)
+    assert 'unique_example_id' in estimator.predict(...)
+  ```
+  Args:
+    estimator: A `tf.estimator.Estimator` object.
+    keys: A `string` or a `list` of `string`. If it is `None`, all of the
+      `features` in `dict` is forwarded to the `predictions`. If it is a
+      `string`, only given key is forwarded. If it is a `list` of strings, all
+      the given `keys` are forwarded.
+    sparse_default_values: A dict of `str` keys mapping the name of the sparse
+      features to be converted to dense, to the default value to use. Only
+      sparse features indicated in the dictionary are converted to dense and the
+      provided default value is used.
+
+  Returns:
+      A new `tf.estimator.Estimator` which forwards features to predictions.
+  Raises:
+    ValueError:
+      * if `keys` is already part of `predictions`. We don't allow
+        override.
+      * if 'keys' does not exist in `features`.
+    TypeError: if `keys` type is not one of `string` or list/tuple of `string`.
+  """
+
+  def verify_key_types(keys):  # pylint: disable=missing-docstring
+    if keys is None:
+      return keys
+    if isinstance(keys, six.string_types):
+      return [keys]
+    if not isinstance(keys, (list, tuple)):
+      raise TypeError('keys should be either a string or a list of strings. '
+                      'Given: {}'.format(type(keys)))
+    for key in keys:
+      if not isinstance(key, six.string_types):
+        raise TypeError('All items in the given keys list should be a string. '
+                        'There exist an item with type: {}'.format(type(key)))
+    return keys
+
+  def get_keys(features):
+    if keys is None:
+      return features.keys()
+    return keys
+
+  def verify_keys_and_predictions(features, predictions):
+    if not isinstance(predictions, dict):
+      raise ValueError(
+          'Predictions should be a dict to be able to forward features. '
+          'Given: {}'.format(type(predictions)))
+    for key in get_keys(features):
+      if key not in features:
+        raise ValueError(
+            'keys should be exist in features. Key "{}" is not in features '
+            'dict. features dict has following keys: {}. Please check '
+            'arguments of forward_features.'.format(key, features.keys()))
+      if key in predictions:
+        raise ValueError(
+            'Cannot forward feature key ({}). Since it does exist in '
+            'predictions. Existing prediction keys: {}. Please check arguments '
+            'of forward_features.'.format(key, predictions.keys()))
+
+  keys = verify_key_types(keys)
+
+  def new_model_fn(features, labels, mode, config):  # pylint: disable=missing-docstring
+    spec = estimator.model_fn(features, labels, mode, config)
+    predictions = spec.predictions
+    if predictions is None:
+      return spec
+    verify_keys_and_predictions(features, predictions)
+    for key in get_keys(features):
+      feature = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
+          features[key])
+      if sparse_default_values and (key in sparse_default_values):
+        if not isinstance(feature, sparse_tensor_lib.SparseTensor):
+          raise ValueError(
+              'Feature ({}) is expected to be a `SparseTensor`.'.format(key))
+        feature = sparse_ops.sparse_tensor_to_dense(
+            feature, default_value=sparse_default_values[key])
+      if not isinstance(feature, ops.Tensor):
+        raise ValueError(
+            'Feature ({}) should be a Tensor. Please use `keys` '
+            'argument of forward_features to filter unwanted features, or'
+            'add key to argument `sparse_default_values`.'
+            'Type of features[{}] is {}.'.format(key, key, type(feature)))
+      predictions[key] = feature
+    spec = spec._replace(predictions=predictions)
+    if spec.export_outputs:
+      for ekey in ['predict', 'serving_default']:
+        if (ekey in spec.export_outputs and
+            isinstance(spec.export_outputs[ekey],
+                       PredictOutput)):
+          export_outputs = spec.export_outputs[ekey].outputs
+          for key in get_keys(features):
+            export_outputs[key] = predictions[key]
+
+    return spec
+
+  return estimator_lib.Estimator(
+      model_fn=new_model_fn,
+      model_dir=estimator.model_dir,
+      config=estimator.config)
+
+
+class _TransformGradients(optimizer_lib.Optimizer):
+  """Add given gradient transformation to the optimizer."""
+
+  def __init__(self, optimizer, transform_grads_fn, name=None):
+    """Construct an `tf.Optimizer` wrapper to apply given transformations.
+
+    Example:
+
+    ```python
+    optimizer = tf.train.ProximalAdagradOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001)
+    def clip_grads(grads_and_vars):
+      gradients, variables = zip(*grads_and_vars)
+      gradients = tf.clip_by_global_norm(grads, my_norm)[0]
+      grads_and_vars = list(zip(gradients, variables))
+      return grads_and_vars
+    optimizer = _TransformGradients(
+        opt=optimizer, transform_grads_fn=clip_grads)
+    estimator = tf.estimator.DNNClassifier(
+        feature_columns=[...],
+        hidden_units=[1024, 512, 256],
+        optimizer=optimizer)
+    ```
+
+    Args:
+      optimizer: An `tf.Optimizer` object to apply gradients.
+      transform_grads_fn: A function which takes a single argument, a list of
+        gradient to variable pairs (tuples), performs any requested gradient
+        updates, such as gradient clipping or multipliers, and returns the
+        updated list.
+      name: A string which will be used for debugging purposes.
+    """
+    super(_TransformGradients, self).__init__(
+        use_locking=False, name=name or optimizer.get_name())
+    self._optimizer = optimizer
+    self._transform_grads_fn = transform_grads_fn
+
+  def compute_gradients(self, *args, **kwargs):
+    """See `tf.Optimizer`."""
+    return self._optimizer.compute_gradients(*args, **kwargs)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
+    """Apply gradients to variables.
+
+    Calls `transform_grads_fn`, and then applies the real optimizer.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        compute_gradients().
+      global_step: Optional Variable to increment by one after the
+        variables have been updated.
+      name: Optional name for the returned operation.  Default to the
+        name passed to the Optimizer constructor.
+
+    Returns:
+      An `Operation` that applies the gradients. If `global_step` was not None,
+      that operation also increments `global_step`.
+
+    Raises:
+      ValueError: If the grads_and_vars is malformed.
+    """
+    grads_and_vars = self._transform_grads_fn(grads_and_vars)
+    return self._optimizer.apply_gradients(grads_and_vars, global_step, name)
+
+  def get_slot(self, *args, **kwargs):
+    """See `tf.Optimizer`."""
+    return self._optimizer.get_slot(*args, **kwargs)
+
+  def get_slot_names(self, *args, **kwargs):
+    """See `tf.Optimizer`."""
+    return self._optimizer.get_slot_names(*args, **kwargs)
+
+
+def _verify_metric_fn_args(metric_fn):
+  args = set(function_utils.fn_args(metric_fn))
+  invalid_args = list(args - _VALID_METRIC_FN_ARGS)
+  if invalid_args:
+    raise ValueError('metric_fn (%s) has following not expected args: %s' %
+                     (metric_fn, invalid_args))
 
-# Include attrs that start with single underscore.
-extenders.__all__ = [s for s in dir(extenders) if not s.startswith('__')]
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.extenders import *
+def _call_metric_fn(metric_fn, features, labels, predictions, config):
+  """Calls metric fn with proper arguments."""
+  metric_fn_args = function_utils.fn_args(metric_fn)
+  kwargs = {}
+  if 'features' in metric_fn_args:
+    kwargs['features'] = features
+  if 'labels' in metric_fn_args:
+    kwargs['labels'] = labels
+  if 'predictions' in metric_fn_args:
+    kwargs['predictions'] = predictions
+  if 'config' in metric_fn_args:
+    kwargs['config'] = config
+  return metric_fn(**kwargs)
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders_test.py b/tensorflow/contrib/estimator/python/estimator/extenders_test.py
new file mode 100644
index 0000000000..c8fdaa8791
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/extenders_test.py
@@ -0,0 +1,426 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""extenders tests."""
+
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+import numpy as np
+
+from tensorflow.contrib.estimator.python.estimator import extenders
+from tensorflow.contrib.layers.python.layers import layers
+from tensorflow.contrib.predictor import from_saved_model
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator_lib
+from tensorflow.python.estimator.canned import linear
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.training import training
+from tensorflow.python.util import compat
+
+
+def get_input_fn(x, y):
+
+  def input_fn():
+    dataset = dataset_ops.Dataset.from_tensor_slices({'x': x, 'y': y})
+    iterator = dataset.make_one_shot_iterator()
+    features = iterator.get_next()
+    labels = features.pop('y')
+    return features, labels
+
+  return input_fn
+
+
+class AddMetricsTest(test.TestCase):
+
+  def test_should_add_metrics(self):
+    input_fn = get_input_fn(
+        x=np.arange(4)[:, None, None], y=np.ones(4)[:, None])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features):
+      return {'mean_x': metrics_lib.mean(features['x'])}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertIn('mean_x', metrics)
+    self.assertEqual(1.5, metrics['mean_x'])
+    # assert that it keeps original estimators metrics
+    self.assertIn('auc', metrics)
+
+  def test_should_error_out_for_not_recognized_args(self):
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features, not_recognized):
+      _, _ = features, not_recognized
+      return {}
+
+    with self.assertRaisesRegexp(ValueError, 'not_recognized'):
+      estimator = extenders.add_metrics(estimator, metric_fn)
+
+  def test_all_supported_args(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features, predictions, labels, config):
+      self.assertIn('x', features)
+      self.assertIsNotNone(labels)
+      self.assertIn('logistic', predictions)
+      self.assertTrue(isinstance(config, estimator_lib.RunConfig))
+      return {}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    estimator.evaluate(input_fn=input_fn)
+
+  def test_all_supported_args_in_different_order(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(labels, config, features, predictions):
+      self.assertIn('x', features)
+      self.assertIsNotNone(labels)
+      self.assertIn('logistic', predictions)
+      self.assertTrue(isinstance(config, estimator_lib.RunConfig))
+      return {}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    estimator.evaluate(input_fn=input_fn)
+
+  def test_all_args_are_optional(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn():
+      return {'two': metrics_lib.mean(constant_op.constant([2.]))}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertEqual(2., metrics['two'])
+
+  def test_overrides_existing_metrics(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertNotEqual(2., metrics['auc'])
+
+    def metric_fn():
+      return {'auc': metrics_lib.mean(constant_op.constant([2.]))}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertEqual(2., metrics['auc'])
+
+
+class ClipGradientsByNormTest(test.TestCase):
+  """Tests clip_gradients_by_norm."""
+
+  def test_applies_norm(self):
+    optimizer = extenders.clip_gradients_by_norm(
+        training.GradientDescentOptimizer(1.0), clip_norm=3.)
+    with ops.Graph().as_default():
+      w = variables.Variable(1., name='weight')
+      x = constant_op.constant(5.)
+      y = -x * w
+      grads = optimizer.compute_gradients(y, var_list=[w])[0]
+      opt_op = optimizer.minimize(y, var_list=[w])
+      with training.MonitoredSession() as sess:
+        grads_value = sess.run(grads)
+        self.assertEqual(-5., grads_value[0])
+        sess.run(opt_op)
+        new_w = sess.run(w)
+        self.assertEqual(4., new_w)  # 1 + 1*3 (w - lr * clipped_grad)
+
+  def test_name(self):
+    optimizer = extenders.clip_gradients_by_norm(
+        training.GradientDescentOptimizer(1.0), clip_norm=3.)
+    self.assertEqual('ClipByNormGradientDescent', optimizer.get_name())
+
+
+class ForwardFeaturesTest(test.TestCase):
+  """Tests forward_features."""
+
+  def _export_estimator(self, estimator, serving_input_fn):
+    tmpdir = tempfile.mkdtemp()
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = estimator.export_savedmodel(export_dir_base, serving_input_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+    return export_dir, tmpdir
+
+  def make_dummy_input_fn(self):
+    def _input_fn():
+      dataset = dataset_ops.Dataset.from_tensors({
+          'x': [[3.], [5.]],
+          'id': [[101], [102]],
+          'sparse_id': sparse_tensor.SparseTensor(
+              values=[1, 2, 3],
+              indices=[[0, 0], [1, 0], [1, 1]],
+              dense_shape=[2, 2]),
+          'labels': [[1.], [2.]]
+      })
+      def _split(x):
+        labels = x.pop('labels')
+        return x, labels
+      dataset = dataset.map(_split)
+      return dataset
+    return _input_fn
+
+  def test_forward_keys(self):
+
+    input_fn = self.make_dummy_input_fn()
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    forwarded_keys = ['id', 'sparse_id']
+
+    for key in forwarded_keys:
+      self.assertNotIn(key, next(estimator.predict(input_fn=input_fn)))
+
+    estimator = extenders.forward_features(
+        estimator, forwarded_keys, sparse_default_values={'sparse_id': 1})
+
+    expected_results = [101, 2, 102, 5]
+    predictions = estimator.predict(input_fn=input_fn)
+    for _ in range(2):
+      prediction = next(predictions)
+      for key in forwarded_keys:
+        self.assertIn(key, prediction)
+        self.assertEqual(expected_results.pop(0), sum(prediction[key]))
+
+  def test_forward_in_exported(self):
+
+    def serving_input_fn():
+      features_ph = {
+          'x': array_ops.placeholder(dtypes.float32, [None]),
+          'id': array_ops.placeholder(dtypes.int32, [None])
+      }
+      features = {
+          key: array_ops.expand_dims(tensor, -1)
+          for key, tensor in features_ph.items()
+      }
+      return estimator_lib.export.ServingInputReceiver(features, features_ph)
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
+    # create estimator
+    feature_columns = [fc.numeric_column('x')]
+    estimator = linear.LinearRegressor(feature_columns)
+    estimator.train(input_fn=input_fn, steps=1)
+    estimator = extenders.forward_features(estimator, 'id')
+
+    # export saved model
+    export_dir, tmpdir = self._export_estimator(estimator, serving_input_fn)
+
+    # restore model
+    predict_fn = from_saved_model(export_dir, signature_def_key='predict')
+    predictions = predict_fn({'x': [3], 'id': [101]})
+
+    # verify that 'id' exists in predictions
+    self.assertIn('id', predictions)
+    self.assertEqual(101, predictions['id'])
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_forward_in_exported_sparse(self):
+    features_columns = [fc.indicator_column(
+        fc.categorical_column_with_vocabulary_list('x', range(10)))]
+
+    classifier = linear.LinearClassifier(feature_columns=features_columns)
+
+    def train_input_fn():
+      dataset = dataset_ops.Dataset.from_tensors({
+          'x': sparse_tensor.SparseTensor(
+              values=[1, 2, 3],
+              indices=[[0, 0], [1, 0], [1, 1]],
+              dense_shape=[2, 2]),
+          'labels': [[0], [1]]
+      })
+      def _split(x):
+        labels = x.pop('labels')
+        return x, labels
+      dataset = dataset.map(_split)
+      return dataset
+
+    classifier.train(train_input_fn, max_steps=1)
+
+    classifier = extenders.forward_features(
+        classifier, keys=['x'], sparse_default_values={'x': 0})
+
+    def serving_input_fn():
+      features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x',
+                                          shape=[None])
+      features = {'x': layers.dense_to_sparse(features_ph)}
+      return estimator_lib.export.ServingInputReceiver(features,
+                                                       {'x': features_ph})
+    export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn)
+    prediction_fn = from_saved_model(export_dir, signature_def_key='predict')
+
+    features = (0, 2)
+    prediction = prediction_fn({'x': features})
+
+    self.assertIn('x', prediction)
+    self.assertEqual(features, tuple(prediction['x']))
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_forward_list(self):
+
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
+
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    self.assertNotIn('id', next(estimator.predict(input_fn=input_fn)))
+    estimator = extenders.forward_features(estimator, ['x', 'id'])
+    predictions = next(estimator.predict(input_fn=input_fn))
+    self.assertIn('id', predictions)
+    self.assertIn('x', predictions)
+    self.assertEqual(101, predictions['id'])
+    self.assertEqual(3., predictions['x'])
+
+  def test_forward_all(self):
+
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
+
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    self.assertNotIn('id', next(estimator.predict(input_fn=input_fn)))
+    self.assertNotIn('x', next(estimator.predict(input_fn=input_fn)))
+    estimator = extenders.forward_features(estimator)
+    predictions = next(estimator.predict(input_fn=input_fn))
+    self.assertIn('id', predictions)
+    self.assertIn('x', predictions)
+    self.assertEqual(101, predictions['id'])
+    self.assertEqual(3., predictions['x'])
+
+  def test_key_should_be_string(self):
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    with self.assertRaisesRegexp(TypeError, 'keys should be either a string'):
+      extenders.forward_features(estimator, estimator)
+
+  def test_key_should_be_list_of_string(self):
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    with self.assertRaisesRegexp(TypeError, 'should be a string'):
+      extenders.forward_features(estimator, ['x', estimator])
+
+  def test_key_should_be_in_features(self):
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]
+
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    estimator = extenders.forward_features(estimator, 'y')
+    with self.assertRaisesRegexp(ValueError,
+                                 'keys should be exist in features'):
+      next(estimator.predict(input_fn=input_fn))
+
+  def test_forwarded_feature_should_not_be_a_sparse_tensor(self):
+    def input_fn():
+      return {
+          'x': [[3.], [5.]],
+          'id': sparse_tensor.SparseTensor(
+              values=['1', '2'],
+              indices=[[0, 0], [1, 0]],
+              dense_shape=[2, 1])
+          }, [[1.], [2.]]
+
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    estimator = extenders.forward_features(estimator)
+    with self.assertRaisesRegexp(ValueError,
+                                 'Feature .* should be a Tensor.*'):
+      next(estimator.predict(input_fn=input_fn))
+
+  def test_forwarded_feature_should_be_a_sparse_tensor(self):
+    input_fn = self.make_dummy_input_fn()
+
+    estimator = linear.LinearRegressor([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn, steps=1)
+
+    estimator = extenders.forward_features(
+        estimator, sparse_default_values={'id': 0, 'sparse_id': 0})
+    with self.assertRaisesRegexp(
+        ValueError, 'Feature .* is expected to be a `SparseTensor`.'):
+      next(estimator.predict(input_fn=input_fn))
+
+  def test_predictions_should_be_dict(self):
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}
+
+    def model_fn(features, mode):
+      del features
+      global_step = training.get_global_step()
+      return estimator_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant([5.]),
+          predictions=constant_op.constant([5.]),
+          train_op=global_step.assign_add(1))
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+    estimator.train(input_fn=input_fn, steps=1)
+
+    estimator = extenders.forward_features(estimator)
+    with self.assertRaisesRegexp(ValueError, 'Predictions should be a dict'):
+      next(estimator.predict(input_fn=input_fn))
+
+  def test_should_not_conflict_with_existing_predictions(self):
+
+    def input_fn():
+      return {'x': [[3.], [5.]], 'id': [[101], [102]]}
+
+    def model_fn(features, mode):
+      del features
+      global_step = training.get_global_step()
+      return estimator_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant([5.]),
+          predictions={'x': constant_op.constant([5.])},
+          train_op=global_step.assign_add(1))
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+    estimator.train(input_fn=input_fn, steps=1)
+
+    estimator = extenders.forward_features(estimator)
+    with self.assertRaisesRegexp(ValueError, 'Cannot forward feature key'):
+      next(estimator.predict(input_fn=input_fn))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py
index f4b4a079ad..34f765d565 100644
--- a/tensorflow/contrib/estimator/python/estimator/head.py
+++ b/tensorflow/contrib/estimator/python/estimator/head.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,966 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""head python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Abstractions for the head(s) of a model."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import head
+import six
+
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.summary import summary
+from tensorflow.python.training import training_util
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def multi_class_head(n_classes,
+                     weight_column=None,
+                     label_vocabulary=None,
+                     loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+                     loss_fn=None,
+                     name=None):
+  """Creates a `_Head` for multi class classification.
+
+  Uses `sparse_softmax_cross_entropy` loss.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`.
+  In many applications, the shape is `[batch_size, n_classes]`.
+
+  `labels` must be a dense `Tensor` with shape matching `logits`, namely
+  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
+  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
+  `labels` must be an integer `Tensor` with values specifying the class index.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
+
+  The loss is the weighted sum over the input dimensions. Namely, if the input
+  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
+  `batch_size`.
+
+  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with
+  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
+  the input labels before passing them to `loss_fn`.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.multi_class_head(n_classes=3)
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.multi_class_head(n_classes=3)
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    n_classes: Number of classes, must be greater than 2 (for 2 classes, use
+      `binary_classification_head`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    label_vocabulary: A list or tuple of strings representing possible label
+      values. If it is not given, that means labels are already encoded as an
+      integer within [0, n_classes). If given, labels must be of string type and
+      have any value in `label_vocabulary`. Note that errors will be raised if
+      `label_vocabulary` is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
+      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
+    loss_fn: Optional loss function.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for multi class classification.
+
+  Raises:
+    ValueError: if `n_classes`, `label_vocabulary` or `loss_reduction` is
+      invalid.
+  """
+  return head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint:disable=protected-access
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      name=name)
+
+
+def binary_classification_head(
+    weight_column=None,
+    thresholds=None,
+    label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+    loss_fn=None,
+    name=None):
+  """Creates a `_Head` for single label binary classification.
+
+  This head uses `sigmoid_cross_entropy_with_logits` loss.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
+  In many applications, the shape is `[batch_size, 1]`.
+
+  `labels` must be a dense `Tensor` with shape matching `logits`, namely
+  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
+  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
+  `labels` must be float `Tensor` with values in the interval `[0, 1]`.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
+
+  The loss is the weighted sum over the input dimensions. Namely, if the input
+  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
+  `batch_size`.
+
+  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
+  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
+  the input labels before passing them to `loss_fn`.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.binary_classification_head()
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.binary_classification_head()
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    thresholds: Iterable of floats in the range `(0, 1)`. For binary
+      classification metrics such as precision and recall, an eval metric is
+      generated for each threshold value. This threshold is applied to the
+      logistic values to determine the binary classification (i.e., above the
+      threshold is `true`, below is `false`.
+    label_vocabulary: A list or tuple of strings representing possible label
+      values. If it is not given, labels must be float with values within
+      [0, 1]. If given, labels must be string type and have any value in
+      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
+      is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
+      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
+    loss_fn: Optional loss function.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for binary classification.
+
+  Raises:
+    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `loss_reduction` is invalid.
+  """
+  return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint:disable=protected-access
+      weight_column=weight_column,
+      thresholds=thresholds,
+      label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      name=name)
+
+
+def regression_head(weight_column=None,
+                    label_dimension=1,
+                    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+                    loss_fn=None,
+                    inverse_link_fn=None,
+                    name=None):
+  """Creates a `_Head` for regression using the `mean_squared_error` loss.
+
+  The loss is the weighted sum over all input dimensions. Namely, if the input
+  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
+  sum over both `batch_size` and `label_dimension`.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
+  In many applications, the shape is `[batch_size, label_dimension]`.
+
+  The `labels` shape must match `logits`, namely
+  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
+  `[D0, D1, ... DN]` is also supported.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
+  `[D0, D1, ... DN, label_dimension]`.
+
+  Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, label_dimension]`.
+
+  Also supports custom `inverse_link_fn`, also known as 'mean function'.
+  `inverse_link_fn` is only used in `PREDICT` mode. It takes `logits` as
+  argument and returns predicted values. This function is the inverse of the
+  link function defined in
+  https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function
+  Namely, for poisson regression, set `inverse_link_fn=tf.exp`.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.regression_head()
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.regression_head()
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    label_dimension: Number of regression labels per example. This is the size
+      of the last dimension of the labels `Tensor` (typically, this has shape
+      `[batch_size, label_dimension]`).
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch and label dimension. Defaults to
+      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
+      `batch size * label_dimension`. See `tf.losses.Reduction`.
+    loss_fn: Optional loss function. Defaults to `mean_squared_error`.
+    inverse_link_fn: Optional inverse link function, also known as 'mean
+      function'. Defaults to identity.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for linear regression.
+
+  Raises:
+    ValueError: If `label_dimension` or `loss_reduction` is invalid.
+  """
+  return head_lib._regression_head(  # pylint:disable=protected-access
+      weight_column=weight_column,
+      label_dimension=label_dimension,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      inverse_link_fn=inverse_link_fn,
+      name=name)
+
+
+def poisson_regression_head(
+    weight_column=None,
+    label_dimension=1,
+    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+    compute_full_loss=True,
+    name=None):
+  """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`.
+
+  The loss is the weighted sum over all input dimensions. Namely, if the input
+  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
+  sum over both `batch_size` and `label_dimension`.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
+  In many applications, the shape is `[batch_size, label_dimension]`.
+
+  The `labels` shape must match `logits`, namely
+  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
+  `[D0, D1, ... DN]` is also supported.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
+  `[D0, D1, ... DN, label_dimension]`.
+
+  This is implemented as a generalized linear model, see
+  https://en.wikipedia.org/wiki/Generalized_linear_model.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.poisson_regression_head()
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.poisson_regression_head()
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    label_dimension: Number of regression labels per example. This is the size
+      of the last dimension of the labels `Tensor` (typically, this has shape
+      `[batch_size, label_dimension]`).
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch and label dimension. Defaults to
+      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
+      `batch size * label_dimension`. See `tf.losses.Reduction`.
+    compute_full_loss: Whether to include the constant `log(z!)` term in
+      computing the poisson loss. See `tf.nn.log_poisson_loss` for the full
+      documentation.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for poisson regression.
+
+  Raises:
+    ValueError: If `label_dimension` or `loss_reduction` is invalid.
+  """
+  def _poisson_loss(labels, logits):
+    return nn.log_poisson_loss(
+        targets=labels, log_input=logits, compute_full_loss=compute_full_loss)
+  return head_lib._regression_head(  # pylint:disable=protected-access
+      weight_column=weight_column,
+      label_dimension=label_dimension,
+      loss_reduction=loss_reduction,
+      loss_fn=_poisson_loss,
+      inverse_link_fn=math_ops.exp,
+      name=name)
+
+
+def logistic_regression_head(
+    weight_column=None,
+    loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+    name=None):
+  """Creates a `_Head` for logistic regression.
+
+  Uses `sigmoid_cross_entropy_with_logits` loss, which is the same as
+  `binary_classification_head`. The differences compared to
+  `binary_classification_head` are:
+
+  * Does not support `label_vocabulary`. Instead, labels must be float in the
+    range [0, 1].
+  * Does not calculate some metrics that do not make sense, such as AUC.
+  * In `PREDICT` mode, only returns logits and predictions
+    (`=tf.sigmoid(logits)`), whereas `binary_classification_head` also returns
+    probabilities, classes, and class_ids.
+  * Export output defaults to `RegressionOutput`, whereas
+    `binary_classification_head` defaults to `PredictOutput`.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
+  In many applications, the shape is `[batch_size, 1]`.
+
+  The `labels` shape must match `logits`, namely
+  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]` or `[D0, D1, ... DN, 1]`.
+
+  This is implemented as a generalized linear model, see
+  https://en.wikipedia.org/wiki/Generalized_linear_model.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.logistic_regression_head()
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.logistic_regression_head()
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch and label dimension. Defaults to
+      `SUM_OVER_BATCH_SIZE`, namely weighted sum of losses divided by
+      `batch size * label_dimension`. See `tf.losses.Reduction`.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for logistic regression.
+
+  Raises:
+    ValueError: If `loss_reduction` is invalid.
+  """
+  def _logistic_loss(labels, logits):
+    labels = head_lib._assert_range(  # pylint:disable=protected-access
+        labels, n_classes=2, message='Labels must be in range [0, 1]')
+    return nn.sigmoid_cross_entropy_with_logits(
+        labels=labels, logits=logits)
+  return head_lib._regression_head(  # pylint:disable=protected-access
+      weight_column=weight_column,
+      label_dimension=1,
+      loss_reduction=loss_reduction,
+      loss_fn=_logistic_loss,
+      inverse_link_fn=math_ops.sigmoid,
+      name=name)
+
+
+def multi_label_head(n_classes,
+                     weight_column=None,
+                     thresholds=None,
+                     label_vocabulary=None,
+                     loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+                     loss_fn=None,
+                     classes_for_class_based_metrics=None,
+                     name=None):
+  """Creates a `_Head` for multi-label classification.
+
+  Multi-label classification handles the case where each example may have zero
+  or more associated labels, from a discrete set. This is distinct from
+  `multi_class_head` which has exactly one label per example.
+
+  Uses `sigmoid_cross_entropy` loss average over classes and weighted sum over
+  the batch. Namely, if the input logits have shape `[batch_size, n_classes]`,
+  the loss is the average over `n_classes` and the weighted sum over
+  `batch_size`.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`. In many
+  applications, the shape is `[batch_size, n_classes]`.
+
+  Labels can be:
+
+  * A multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`
+  * An integer `SparseTensor` of class indices. The `dense_shape` must be
+    `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`.
+  * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape`
+    must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a
+    multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
+
+  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support indicator `labels` with
+  shape `[D0, D1, ... DN, n_classes]`. Namely, the head applies
+  `label_vocabulary` to the input labels before passing them to `loss_fn`.
+
+  The head can be used with a canned estimator. Example:
+
+  ```python
+  my_head = tf.contrib.estimator.multi_label_head(n_classes=3)
+  my_estimator = tf.contrib.estimator.DNNEstimator(
+      head=my_head,
+      hidden_units=...,
+      feature_columns=...)
+  ```
+
+  It can also be used with a custom `model_fn`. Example:
+
+  ```python
+  def _my_model_fn(features, labels, mode):
+    my_head = tf.contrib.estimator.multi_label_head(n_classes=3)
+    logits = tf.keras.Model(...)(features)
+
+    return my_head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=tf.AdagradOptimizer(learning_rate=0.1),
+        logits=logits)
+
+  my_estimator = tf.estimator.Estimator(model_fn=_my_model_fn)
+  ```
+
+  Args:
+    n_classes: Number of classes, must be greater than 1 (for 1 class, use
+      `binary_classification_head`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.  Per-class weighting is
+      not supported.
+    thresholds: Iterable of floats in the range `(0, 1)`. Accuracy, precision
+      and recall metrics are evaluated for each threshold value. The threshold
+      is applied to the predicted probabilities, i.e. above the threshold is
+      `true`, below is `false`.
+    label_vocabulary: A list of strings represents possible label values. If it
+      is not given, that means labels are already encoded as integer within
+      [0, n_classes) or multi-hot Tensor. If given, labels must be SparseTensor
+      string type and have any value in `label_vocabulary`. Also there will be
+      errors if vocabulary is not provided and labels are string.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`, namely
+      weighted sum of losses divided by batch size. See `tf.losses.Reduction`.
+    loss_fn: Optional loss function.
+    classes_for_class_based_metrics: List of integer class IDs or string class
+      names for which per-class metrics are evaluated. If integers, all must be
+      in the range `[0, n_classes - 1]`. If strings, all must be in
+      `label_vocabulary`.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for multi-label classification.
+
+  Raises:
+    ValueError: if `n_classes`, `thresholds`, `loss_reduction`, `loss_fn` or
+    `metric_class_ids` is invalid.
+  """
+  thresholds = tuple(thresholds) if thresholds else tuple()
+  if n_classes is None or n_classes < 2:
+    raise ValueError(
+        'n_classes must be > 1 for multi-class classification. '
+        'Given: {}'.format(n_classes))
+  for threshold in thresholds:
+    if (threshold <= 0.0) or (threshold >= 1.0):
+      raise ValueError(
+          'thresholds must be in (0, 1) range. Given: {}'.format(threshold))
+  if label_vocabulary is not None:
+    if not isinstance(label_vocabulary, (list, tuple)):
+      raise ValueError(
+          'label_vocabulary must be a list or tuple. '
+          'Given type: {}'.format(type(label_vocabulary)))
+    if len(label_vocabulary) != n_classes:
+      raise ValueError(
+          'Length of label_vocabulary must be n_classes ({}). '
+          'Given: {}'.format(n_classes, len(label_vocabulary)))
+  if loss_fn:
+    head_lib._validate_loss_fn_args(loss_fn)  # pylint:disable=protected-access
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
+  classes_for_class_based_metrics = tuple(
+      [] if classes_for_class_based_metrics is None
+      else classes_for_class_based_metrics)
+  if classes_for_class_based_metrics:
+    if isinstance(classes_for_class_based_metrics[0], six.string_types):
+      if not label_vocabulary:
+        raise ValueError(
+            'label_vocabulary must be provided when '
+            'classes_for_class_based_metrics are sting.')
+      class_ids = []
+      for class_string in classes_for_class_based_metrics:
+        class_ids.append(label_vocabulary.index(class_string))
+      classes_for_class_based_metrics = tuple(class_ids)
+    else:
+      for class_id in classes_for_class_based_metrics:
+        if (class_id < 0) or (class_id >= n_classes):
+          raise ValueError(
+              'All classes_for_class_based_metrics must be in range [0, {}]. '
+              'Given: {}'.format(n_classes - 1, class_id))
+  return _MultiLabelHead(
+      n_classes=n_classes, weight_column=weight_column, thresholds=thresholds,
+      label_vocabulary=label_vocabulary, loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      classes_for_class_based_metrics=classes_for_class_based_metrics,
+      name=name)
+
+
+class _MultiLabelHead(head_lib._Head):  # pylint:disable=protected-access
+  """`_Head` for multi-label classification."""
+
+  def __init__(self,
+               n_classes,
+               weight_column=None,
+               thresholds=None,
+               label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+               loss_fn=None,
+               classes_for_class_based_metrics=None,
+               name=None):
+    self._n_classes = n_classes
+    self._weight_column = weight_column
+    self._thresholds = thresholds
+    self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
+    self._loss_fn = loss_fn
+    self._classes_for_class_based_metrics = classes_for_class_based_metrics
+    self._name = name
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def logits_dimension(self):
+    return self._n_classes
+
+  def _process_labels(self, labels):
+    if labels is None:
+      raise ValueError(
+          'You must provide a labels Tensor. Given: None. '
+          'Suggested troubleshooting steps: Check that your data contain '
+          'your label feature. Check that your input_fn properly parses and '
+          'returns labels.')
+    if isinstance(labels, sparse_tensor.SparseTensor):
+      if labels.dtype == dtypes.string:
+        label_ids_values = lookup_ops.index_table_from_tensor(
+            vocabulary_list=tuple(self._label_vocabulary),
+            name='class_id_lookup').lookup(labels.values)
+        label_ids = sparse_tensor.SparseTensor(
+            indices=labels.indices,
+            values=label_ids_values,
+            dense_shape=labels.dense_shape)
+        return math_ops.to_int64(
+            sparse_ops.sparse_to_indicator(label_ids, self._n_classes))
+      else:
+        err_msg = (
+            r'labels must be an integer SparseTensor with values in '
+            r'[0, {})'.format(self._n_classes))
+        assert_int = check_ops.assert_integer(
+            labels.values, message=err_msg)
+        assert_less = check_ops.assert_less(
+            labels.values,
+            ops.convert_to_tensor(self._n_classes, dtype=labels.dtype),
+            message=err_msg)
+        assert_greater = check_ops.assert_non_negative(
+            labels.values, message=err_msg)
+        with ops.control_dependencies(
+            [assert_int, assert_less, assert_greater]):
+          return math_ops.to_int64(
+              sparse_ops.sparse_to_indicator(labels, self._n_classes))
+    err_msg = (
+        r'labels must be an integer indicator Tensor with values in [0, 1]')
+    return head_lib._assert_range(labels, 2, message=err_msg)  # pylint:disable=protected-access,
+
+  def create_loss(self, features, mode, logits, labels):
+    """See `Head`."""
+    del mode  # Unused for this head.
+    logits = ops.convert_to_tensor(logits)
+    processed_labels = self._process_labels(labels)
+    processed_labels = head_lib._check_dense_labels_match_logits_and_reshape(  # pylint:disable=protected-access
+        labels=processed_labels, logits=logits,
+        expected_labels_dimension=self.logits_dimension)
+    if self._loss_fn:
+      unweighted_loss = head_lib._call_loss_fn(  # pylint:disable=protected-access
+          loss_fn=self._loss_fn, labels=processed_labels, logits=logits,
+          features=features, expected_loss_dim=1)
+    else:
+      unweighted_loss = losses.sigmoid_cross_entropy(
+          multi_class_labels=processed_labels, logits=logits,
+          reduction=losses.Reduction.NONE)
+      # Averages loss over classes.
+      unweighted_loss = math_ops.reduce_mean(
+          unweighted_loss, axis=-1, keepdims=True)
+    weights = head_lib._get_weights_and_check_match_logits(  # pylint:disable=protected-access,
+        features=features, weight_column=self._weight_column, logits=logits)
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
+    return head_lib.LossSpec(
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
+        processed_labels=processed_labels)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns an `model_fn._TPUEstimatorSpec`.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` with shape `[D0, D1, ... DN, n_classes]`.
+        For many applications, the shape is `[batch_size, n_classes]`.
+      labels: Labels with shape matching `logits`. Can be multi-hot `Tensor`
+        with shape `[D0, D1, ... DN, n_classes]` or `SparseTensor` with
+        `dense_shape` `[D0, D1, ... DN, ?]`. `labels` is required argument when
+        `mode` equals `TRAIN` or `EVAL`.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns
+        `train_op`. Used if `optimizer` is `None`.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses. These losses are
+        usually expressed as a batch average, so for best results users need to
+        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
+        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
+        avoid scaling errors.
+    Returns:
+      `model_fn._TPUEstimatorSpec`.
+    Raises:
+      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
+        mode, or if both are set.
+    """
+    with ops.name_scope(self._name, 'head'):
+      logits = head_lib._check_logits_final_dim(logits, self.logits_dimension)  # pylint:disable=protected-access
+
+      # Predict.
+      pred_keys = prediction_keys.PredictionKeys
+      with ops.name_scope(None, 'predictions', (logits,)):
+        probabilities = math_ops.sigmoid(logits, name=pred_keys.PROBABILITIES)
+        predictions = {
+            pred_keys.LOGITS: logits,
+            pred_keys.PROBABILITIES: probabilities,
+        }
+      if mode == model_fn.ModeKeys.PREDICT:
+        classifier_output = head_lib._classification_output(  # pylint:disable=protected-access
+            scores=probabilities, n_classes=self._n_classes,
+            label_vocabulary=self._label_vocabulary)
+        return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs={
+                _DEFAULT_SERVING_KEY: classifier_output,
+                head_lib._CLASSIFY_SERVING_KEY: classifier_output,  # pylint:disable=protected-access
+                head_lib._PREDICT_SERVING_KEY: (  # pylint:disable=protected-access
+                    export_output.PredictOutput(predictions))
+            })
+
+      (training_loss, unreduced_loss, weights,
+       processed_labels) = self.create_loss(
+           features=features, mode=mode, logits=logits, labels=labels)
+      if regularization_losses:
+        regularization_loss = math_ops.add_n(regularization_losses)
+        regularized_training_loss = math_ops.add_n(
+            [training_loss, regularization_loss])
+      else:
+        regularization_loss = None
+        regularized_training_loss = training_loss
+
+      # Eval.
+      if mode == model_fn.ModeKeys.EVAL:
+        return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=regularized_training_loss,
+            eval_metrics=head_lib._create_eval_metrics_tuple(  # pylint:disable=protected-access
+                self._eval_metric_ops, {
+                    'labels': processed_labels,
+                    'probabilities': probabilities,
+                    'weights': weights,
+                    'unreduced_loss': unreduced_loss,
+                    'regularization_loss': regularization_loss,
+                }))
 
-# Include attrs that start with single underscore.
-head.__all__ = [s for s in dir(head) if not s.startswith('__')]
+      # Train.
+      if optimizer is not None:
+        if train_op_fn is not None:
+          raise ValueError('train_op_fn and optimizer cannot both be set.')
+        train_op = optimizer.minimize(
+            regularized_training_loss,
+            global_step=training_util.get_global_step())
+      elif train_op_fn is not None:
+        train_op = train_op_fn(regularized_training_loss)
+      else:
+        raise ValueError('train_op_fn and optimizer cannot both be None.')
+      train_op = head_lib._append_update_ops(train_op)  # pylint:disable=protected-access
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
+    with ops.name_scope(''):
+      keys = metric_keys.MetricKeys
+      summary.scalar(
+          head_lib._summary_key(self._name, keys.LOSS),  # pylint:disable=protected-access
+          regularized_training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            head_lib._summary_key(self._name, keys.LOSS_MEAN),  # pylint:disable=protected-access
+            mean_loss)
+      if regularization_loss is not None:
+        summary.scalar(
+            head_lib._summary_key(self._name, keys.LOSS_REGULARIZATION),  # pylint:disable=protected-access
+            regularization_loss)
+    return model_fn._TPUEstimatorSpec(  # pylint:disable=protected-access
+        mode=model_fn.ModeKeys.TRAIN,
+        predictions=predictions,
+        loss=regularized_training_loss,
+        train_op=train_op)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.head import *
+  def _eval_metric_ops(
+      self, labels, probabilities, weights, unreduced_loss,
+      regularization_loss):
+    """Returns a dict of metrics for eval_metric_ops."""
+    with ops.name_scope(
+        None, 'metrics',
+        [labels, probabilities, weights, unreduced_loss, regularization_loss]):
+      keys = metric_keys.MetricKeys
+      metric_ops = {
+          # Estimator already adds a metric for loss.
+          head_lib._summary_key(self._name, keys.LOSS_MEAN):  # pylint:disable=protected-access
+              metrics_lib.mean(
+                  values=unreduced_loss,
+                  weights=weights,
+                  name=keys.LOSS_MEAN),
+          head_lib._summary_key(self._name, keys.AUC):  # pylint:disable=protected-access
+              metrics_lib.auc(labels=labels, predictions=probabilities,
+                              weights=weights, name=keys.AUC),
+          head_lib._summary_key(self._name, keys.AUC_PR):  # pylint:disable=protected-access
+              metrics_lib.auc(labels=labels, predictions=probabilities,
+                              weights=weights, curve='PR',
+                              name=keys.AUC_PR),
+      }
+      if regularization_loss is not None:
+        loss_regularization_key = head_lib._summary_key(  # pylint:disable=protected-access
+            self._name, keys.LOSS_REGULARIZATION)
+        metric_ops[loss_regularization_key] = (
+            metrics_lib.mean(
+                values=regularization_loss,
+                name=keys.LOSS_REGULARIZATION))
+      for threshold in self._thresholds:
+        accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold
+        metric_ops[head_lib._summary_key(self._name, accuracy_key)] = (  # pylint:disable=protected-access
+            head_lib._accuracy_at_threshold(  # pylint:disable=protected-access
+                labels=labels,
+                predictions=probabilities,
+                weights=weights,
+                threshold=threshold,
+                name=accuracy_key))
+        # Precision for positive examples.
+        precision_key = keys.PRECISION_AT_THRESHOLD % threshold
+        metric_ops[head_lib._summary_key(self._name, precision_key)] = (  # pylint:disable=protected-access
+            head_lib._precision_at_threshold(  # pylint:disable=protected-access
+                labels=labels,
+                predictions=probabilities,
+                weights=weights,
+                threshold=threshold,
+                name=precision_key))
+        # Recall for positive examples.
+        recall_key = keys.RECALL_AT_THRESHOLD % threshold
+        metric_ops[head_lib._summary_key(self._name, recall_key)] = (  # pylint:disable=protected-access
+            head_lib._recall_at_threshold(  # pylint:disable=protected-access
+                labels=labels,
+                predictions=probabilities,
+                weights=weights,
+                threshold=threshold,
+                name=recall_key))
+      for class_id in self._classes_for_class_based_metrics:
+        batch_rank = array_ops.rank(probabilities) - 1
+        begin = array_ops.concat(
+            [array_ops.zeros([batch_rank], dtype=dtypes.int32), [class_id]],
+            axis=0)
+        size = array_ops.concat(
+            [-1 * array_ops.ones([batch_rank], dtype=dtypes.int32), [1]],
+            axis=0)
+        class_probabilities = array_ops.slice(
+            probabilities, begin=begin, size=size)
+        class_labels = array_ops.slice(labels, begin=begin, size=size)
+        if self._label_vocabulary is None:
+          prob_key = keys.PROBABILITY_MEAN_AT_CLASS % class_id
+        else:
+          prob_key = (
+              keys.PROBABILITY_MEAN_AT_NAME % self._label_vocabulary[class_id])
+        metric_ops[head_lib._summary_key(self._name, prob_key)] = (  # pylint:disable=protected-access
+            head_lib._predictions_mean(  # pylint:disable=protected-access
+                predictions=class_probabilities,
+                weights=weights,
+                name=prob_key))
+        if self._label_vocabulary is None:
+          auc_key = keys.AUC_AT_CLASS % class_id
+        else:
+          auc_key = keys.AUC_AT_NAME % self._label_vocabulary[class_id]
+        metric_ops[head_lib._summary_key(self._name, auc_key)] = (  # pylint:disable=protected-access
+            head_lib._auc(  # pylint:disable=protected-access
+                labels=class_labels,
+                predictions=class_probabilities,
+                weights=weights,
+                name=auc_key))
+        if self._label_vocabulary is None:
+          auc_pr_key = keys.AUC_PR_AT_CLASS % class_id
+        else:
+          auc_pr_key = keys.AUC_PR_AT_NAME % self._label_vocabulary[class_id]
+        metric_ops[head_lib._summary_key(self._name, auc_pr_key)] = (  # pylint:disable=protected-access
+            head_lib._auc(  # pylint:disable=protected-access
+                labels=class_labels,
+                predictions=class_probabilities,
+                weights=weights,
+                curve='PR',
+                name=auc_pr_key))
+    return metric_ops
diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py
new file mode 100644
index 0000000000..c6e75f8d46
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/head_test.py
@@ -0,0 +1,1482 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for head."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import monitored_session
+
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def _initialize_variables(test_case, scaffold):
+  scaffold.finalize()
+  test_case.assertIsNone(scaffold.init_feed_dict)
+  test_case.assertIsNone(scaffold.init_fn)
+  scaffold.init_op.run()
+  scaffold.ready_for_local_init_op.eval()
+  scaffold.local_init_op.run()
+  scaffold.ready_op.eval()
+  test_case.assertIsNotNone(scaffold.saver)
+
+
+def _assert_simple_summaries(test_case, expected_summaries, summary_str,
+                             tol=1e-6):
+  """Assert summary the specified simple values.
+
+  Args:
+    test_case: test case.
+    expected_summaries: Dict of expected tags and simple values.
+    summary_str: Serialized `summary_pb2.Summary`.
+    tol: Tolerance for relative and absolute.
+  """
+  summary = summary_pb2.Summary()
+  summary.ParseFromString(summary_str)
+  test_case.assertAllClose(expected_summaries, {
+      v.tag: v.simple_value for v in summary.value
+  }, rtol=tol, atol=tol)
+
+
+def _assert_no_hooks(test_case, spec):
+  test_case.assertAllEqual([], spec.training_chief_hooks)
+  test_case.assertAllEqual([], spec.training_hooks)
+
+
+def _sigmoid(logits):
+  return 1 / (1 + np.exp(-logits))
+
+
+def _sigmoid_cross_entropy(labels, logits):
+  """Returns sigmoid cross entropy averaged over classes."""
+  sigmoid_logits = _sigmoid(logits)
+  unreduced_result = (
+      -labels * np.log(sigmoid_logits)
+      -(1 - labels) * np.log(1 - sigmoid_logits))
+  # Mean over classes
+  return np.mean(unreduced_result, axis=-1, keepdims=True)
+
+
+class MultiLabelHead(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_n_classes_is_none(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'n_classes must be > 1 for multi-class classification\. Given: None'):
+      head_lib.multi_label_head(n_classes=None)
+
+  def test_n_classes_is_1(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'n_classes must be > 1 for multi-class classification\. Given: 1'):
+      head_lib.multi_label_head(n_classes=1)
+
+  def test_threshold_too_small(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'thresholds must be in \(0, 1\) range\. Given: 0\.0'):
+      head_lib.multi_label_head(n_classes=2, thresholds=[0., 0.5])
+
+  def test_threshold_too_large(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'thresholds must be in \(0, 1\) range\. Given: 1\.0'):
+      head_lib.multi_label_head(n_classes=2, thresholds=[0.5, 1.0])
+
+  def test_label_vocabulary_dict(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'label_vocabulary must be a list or tuple\. '
+        r'Given type: <(type|class) \'dict\'>'):
+      head_lib.multi_label_head(n_classes=2, label_vocabulary={'foo': 'bar'})
+
+  def test_label_vocabulary_wrong_size(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'Length of label_vocabulary must be n_classes \(3\). Given: 2'):
+      head_lib.multi_label_head(n_classes=3, label_vocabulary=['foo', 'bar'])
+
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib.multi_label_head(
+          n_classes=3, loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib.multi_label_head(
+          n_classes=3, loss_reduction=losses.Reduction.NONE)
+
+  def test_loss_fn_arg_labels_missing(self):
+    def _loss_fn(logits):
+      del logits  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: labels\. '
+        r'Given arguments: \(\'logits\',\)'):
+      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_logits_missing(self):
+    def _loss_fn(labels):
+      del labels  # unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: logits\. '
+        r'Given arguments: \(\'labels\',\)'):
+      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_features_ok(self):
+    def _loss_fn(labels, logits, features):
+      del labels, logits, features  # Unused
+    head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_invalid(self):
+    def _loss_fn(labels, logits, name=None):
+      del labels, logits, name  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn has unexpected args: \[\'name\'\]'):
+      head_lib.multi_label_head(n_classes=3, loss_fn=_loss_fn)
+
+  def test_classes_for_class_based_metrics_invalid(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'All classes_for_class_based_metrics must be in range \[0, 2\]\. '
+        r'Given: -1'):
+      head_lib.multi_label_head(
+          n_classes=3, classes_for_class_based_metrics=[2, -1])
+
+  def test_classes_for_class_based_metrics_string_invalid(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'\'z\' is not in list'):
+      head_lib.multi_label_head(
+          n_classes=3, label_vocabulary=['a', 'b', 'c'],
+          classes_for_class_based_metrics=['c', 'z'])
+
+  def test_name(self):
+    head = head_lib.multi_label_head(n_classes=4, name='foo')
+    self.assertEqual('foo', head.name)
+
+  def test_predict(self):
+    n_classes = 4
+    head = head_lib.multi_label_head(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    logits = np.array(
+        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
+    expected_probabilities = _sigmoid(logits)
+    expected_export_classes = [[b'0', b'1', b'2', b'3']] * 2
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    self.assertItemsEqual(
+        (_DEFAULT_SERVING_KEY, 'predict', 'classification'),
+        spec.export_outputs.keys())
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(logits,
+                          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(
+          expected_probabilities,
+          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+
+      self.assertAllClose(
+          expected_probabilities,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
+      self.assertAllEqual(
+          expected_export_classes,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
+
+  def test_predict_with_label_vocabulary(self):
+    n_classes = 4
+    head = head_lib.multi_label_head(
+        n_classes, label_vocabulary=['foo', 'bar', 'foobar', 'barfoo'])
+
+    logits = np.array(
+        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
+    expected_export_classes = [[b'foo', b'bar', b'foobar', b'barfoo']] * 2
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllEqual(
+          expected_export_classes,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
+
+  def test_weight_should_not_impact_prediction(self):
+    n_classes = 4
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    logits = np.array(
+        [[0., 1., 2., -1.], [-1., -2., -3., 1.]], dtype=np.float32)
+    expected_probabilities = _sigmoid(logits)
+
+    weights_2x1 = [[1.], [2.]]
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'example_weights': weights_2x1,
+        },
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(logits,
+                          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(
+          expected_probabilities,
+          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+
+  def test_eval_create_loss(self):
+    """Tests head.create_loss for eval mode."""
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes)
+
+    logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = (labels * -log(sigmoid(logits)) +
+    #         (1 - labels) * -log(1 - sigmoid(logits))) / 2
+    expected_training_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels, logits=logits))
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss,
+                          actual_training_loss.eval())
+
+  def test_eval_create_loss_large_logits(self):
+    """Tests head.create_loss for eval mode and large logits."""
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes)
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # For large logits, this is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits
+    expected_training_loss = 0.5 * np.sum(
+        np.array([[(10. + 10.) / 2.], [(15. + 0.) / 2.]], dtype=np.float32))
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, actual_training_loss.eval(), atol=1e-4)
+
+  def test_eval_create_loss_labels_wrong_shape(self):
+    """Tests head.create_loss for eval mode when labels has the wrong shape."""
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes)
+
+    logits = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 2\] \[labels_shape: \] \[2 1\]'):
+        actual_training_loss.eval({
+            labels_placeholder: np.array([[1], [1]], dtype=np.int64)
+        })
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'labels shape must be \[D0, D1, ... DN, 2\]\..*'
+          r'\[Received shape: \] \[2\]'):
+        actual_training_loss.eval({
+            labels_placeholder: np.array([1, 1], dtype=np.int64)
+        })
+
+  def test_eval_create_loss_loss_fn(self):
+    """Tests head.create_loss for eval mode and custom loss_fn."""
+    loss = np.array([[1.], [2.]], dtype=np.float32)
+    logits_input = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels_input = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    def _loss_fn(labels, logits):
+      check_labels = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
+          data=[labels])
+      check_logits = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
+          data=[logits])
+      with ops.control_dependencies([check_labels, check_logits]):
+        return constant_op.constant(loss)
+    head = head_lib.multi_label_head(n_classes=2, loss_fn=_loss_fn)
+
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_input,
+        labels=labels_input)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(np.sum(loss) / 2., actual_training_loss.eval())
+
+  def test_eval_create_loss_loss_fn_wrong_shape(self):
+    """Tests custom loss_fn that returns Tensor of unexpected shape."""
+    loss = np.array([1., 2.], dtype=np.float32)
+    def _loss_fn(labels, logits):
+      del labels, logits  # Unused
+      return constant_op.constant(loss)
+    head = head_lib.multi_label_head(n_classes=2, loss_fn=_loss_fn)
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
+          r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2\]'):
+        actual_training_loss.eval()
+
+  def test_eval_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib.multi_label_head(n_classes=2)
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+          labels=None)
+
+  def _test_eval(
+      self, head, logits, labels, expected_loss, expected_metrics,
+      features=None, regularization_losses=None):
+    spec = head.create_estimator_spec(
+        features=features or {},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels,
+        regularization_losses=regularization_losses)
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
+  def test_eval(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes)
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels, logits=logits))
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+    }
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_sparse_labels(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes)
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    # Equivalent to multi_hot = [[1, 0], [1, 1]]
+    labels = sparse_tensor.SparseTensor(
+        values=[0, 0, 1],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+    }
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_regularization_losses(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(
+        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(
+    #     labels * -log(sigmoid(logits)) +
+    #     (1 - labels) * -log(1 - sigmoid(logits))) / batch_size
+    expected_unregularized_loss = np.sum(
+        _sigmoid_cross_entropy(labels=labels, logits=logits)) / 2.
+    expected_regularized_loss = (
+        expected_unregularized_loss + expected_regularization_loss)
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_unregularized_loss,
+        keys.LOSS_REGULARIZATION: expected_regularization_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+    }
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_regularized_loss,
+        expected_metrics=expected_metrics,
+        regularization_losses=regularization_losses)
+
+  def test_eval_with_label_vocabulary(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(
+        n_classes, label_vocabulary=['class0', 'class1'])
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    # Equivalent to multi_hot = [[1, 0], [1, 1]]
+    labels = sparse_tensor.SparseTensor(
+        values=['class0', 'class0', 'class1'],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+    }
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_label_vocabulary_with_multi_hot_input(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(
+        n_classes, label_vocabulary=['class0', 'class1'])
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits))
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+    }
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels_multi_hot,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_thresholds(self):
+    n_classes = 2
+    thresholds = [0.25, 0.5, 0.75]
+    head = head_lib.multi_label_head(n_classes, thresholds=thresholds)
+
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels, logits=logits))
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 2. / 4.,
+        keys.PRECISION_AT_THRESHOLD % thresholds[0]: 2. / 3.,
+        keys.RECALL_AT_THRESHOLD % thresholds[0]: 2. / 3.,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[1]: 1. / 4.,
+        keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1. / 2.,
+        keys.RECALL_AT_THRESHOLD % thresholds[1]: 1. / 3.,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 2. / 4.,
+        keys.PRECISION_AT_THRESHOLD % thresholds[2]: 1. / 1.,
+        keys.RECALL_AT_THRESHOLD % thresholds[2]: 1. / 3.,
+    }
+
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_classes_for_class_based_metrics(self):
+    head = head_lib.multi_label_head(
+        n_classes=2, classes_for_class_based_metrics=[0, 1])
+
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels, logits=logits))
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+        keys.PROBABILITY_MEAN_AT_CLASS % 0: np.sum(_sigmoid(logits[:, 0])) / 2.,
+        keys.AUC_AT_CLASS % 0: 0.,
+        keys.AUC_PR_AT_CLASS % 0: 1.,
+        keys.PROBABILITY_MEAN_AT_CLASS % 1: np.sum(_sigmoid(logits[:, 1])) / 2.,
+        keys.AUC_AT_CLASS % 1: 1.,
+        keys.AUC_PR_AT_CLASS % 1: 1.,
+    }
+
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_classes_for_class_based_metrics_string(self):
+    head = head_lib.multi_label_head(
+        n_classes=2, label_vocabulary=['a', 'b'],
+        classes_for_class_based_metrics=['a', 'b'])
+
+    logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32)
+    labels = sparse_tensor.SparseTensor(
+        values=['a', 'a', 'b'],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    labels_onehot = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # Sum over examples, divide by batch_size.
+    expected_loss = 0.5 * np.sum(
+        _sigmoid_cross_entropy(labels=labels_onehot, logits=logits))
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over examples.
+        keys.LOSS_MEAN: expected_loss,
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.3333,
+        keys.AUC_PR: 0.7639,
+        keys.PROBABILITY_MEAN_AT_NAME % 'a':
+            np.sum(_sigmoid(logits[:, 0])) / 2.,
+        keys.AUC_AT_NAME % 'a': 0.,
+        keys.AUC_PR_AT_NAME % 'a': 1.,
+        keys.PROBABILITY_MEAN_AT_NAME % 'b':
+            np.sum(_sigmoid(logits[:, 1])) / 2.,
+        keys.AUC_AT_NAME % 'b': 1.,
+        keys.AUC_PR_AT_NAME % 'b': 1.,
+    }
+
+    self._test_eval(
+        head=head,
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+  def test_eval_with_weights(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, weighted sum over examples, divide by batch_size.
+    # loss = ( 1 * (10 + 10) / 2 + 2 * (15 + 0) / 2) / 2
+    expected_loss = 12.5
+
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array([[41], [42]], dtype=np.int32),
+            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # Average loss over weighted examples (denominator is sum(weights)).
+        keys.LOSS_MEAN: expected_loss * (2. / 3.),
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.2000,
+        keys.AUC_PR: 0.7833,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
+  def test_train_create_loss_large_logits(self):
+    """Tests head.create_loss for train mode and large logits."""
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # For large logits, this is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits
+    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
+    expected_weights = [[1.], [2.]]
+    expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'example_weights': weights
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), atol=1e-4)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests head.create_loss with loss_reduction."""
+    n_classes = 2
+    head = head_lib.multi_label_head(
+        n_classes, weight_column='example_weights',
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    # loss = labels * -log(sigmoid(logits)) +
+    #        (1 - labels) * -log(1 - sigmoid(logits))
+    # For large logits, this is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits
+    expected_unreduced_loss = [[(10. + 10.) / 2.], [(15. + 0.) / 2.]]
+    expected_weights = [[1.], [2.]]
+    expected_training_loss = (1. * (10. + 10.) / 2. + 2. * (15. + 0.) / 2.) / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'example_weights': weights
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), atol=1e-4)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), atol=1e-4)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib.multi_label_head(n_classes=2)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+          labels=None,
+          train_op_fn=_no_op_train_fn)
+
+  def test_train_invalid_indicator_labels(self):
+    head = head_lib.multi_label_head(n_classes=2)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    # The value 2 is outside the allowed range.
+    labels = np.array([[2, 0], [1, 1]], dtype=np.int64)
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'labels must be an integer indicator Tensor with values in '
+          r'\[0, 1\]'):
+        sess.run(spec.loss)
+
+  def test_train_invalid_sparse_labels(self):
+    head = head_lib.multi_label_head(n_classes=2)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    # The value 2 is outside the allowed range.
+    labels = sparse_tensor.SparseTensor(
+        values=[2, 0, 1],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'labels must be an integer SparseTensor with values in \[0, 2\)'):
+        sess.run(spec.loss)
+
+  def _test_train(self, head, logits, labels, expected_loss):
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(
+          self, {metric_keys.MetricKeys.LOSS: expected_loss}, summary_str, tol)
+
+  def test_train(self):
+    head = head_lib.multi_label_head(n_classes=2)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, sum over examples, divide by batch_size.
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
+    expected_loss = 8.75
+    self._test_train(
+        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
+
+  def test_train_sparse_labels(self):
+    head = head_lib.multi_label_head(n_classes=2)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    # Equivalent to multi_hot = [[1, 0], [1, 1]]
+    labels = sparse_tensor.SparseTensor(
+        values=[0, 0, 1],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, sum over examples, divide by batch_size.
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
+    expected_loss = 8.75
+    self._test_train(
+        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
+
+  def test_train_with_label_vocabulary(self):
+    head = head_lib.multi_label_head(
+        n_classes=2, label_vocabulary=['class0', 'class1'])
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    # Equivalent to multi_hot = [[1, 0], [1, 1]]
+    labels = sparse_tensor.SparseTensor(
+        values=['class0', 'class0', 'class1'],
+        indices=[[0, 0], [1, 0], [1, 1]],
+        dense_shape=[2, 2])
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, sum over examples, divide by batch_size.
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
+    expected_loss = 8.75
+    self._test_train(
+        head=head, logits=logits, labels=labels, expected_loss=expected_loss)
+
+  def test_train_with_optimizer(self):
+    head = head_lib.multi_label_head(n_classes=2)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, sum over examples, divide by batch_size.
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2
+    expected_loss = 8.75
+    expected_train_result = 'my_train_op'
+
+    class _Optimizer(object):
+
+      def minimize(self, loss, global_step):
+        del global_step
+        return string_ops.string_join(
+            [constant_op.constant(expected_train_result),
+             string_ops.as_string(loss, precision=3)])
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        optimizer=_Optimizer())
+
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def test_train_with_update_ops(self):
+    head = head_lib.multi_label_head(n_classes=2)
+
+    with ops.Graph().as_default():
+      w = variables.Variable(1)
+      update_op = w.assign_add(1)
+      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
+
+      t = variables.Variable('')
+      expected_train_result = b'my_train_op'
+      def _train_op_fn(loss):
+        del loss
+        return t.assign(expected_train_result)
+
+      spec = head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+          labels=np.array([[1, 0], [1, 1]], dtype=np.int64),
+          train_op_fn=_train_op_fn)
+
+      with self.cached_session() as sess:
+        _initialize_variables(self, spec.scaffold)
+        sess.run(spec.train_op)
+        w_value, t_value = sess.run([w, t])
+        self.assertEqual(2, w_value)
+        self.assertEqual(expected_train_result, t_value)
+
+  def test_train_with_regularization_losses(self):
+    head = head_lib.multi_label_head(
+        n_classes=2, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    regularization_losses = [1.5, 0.5]
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes and over batch and add regularization loss.
+    expected_loss = 35. / 4. + 2.
+    expected_summaries = {
+        metric_keys.MetricKeys.LOSS: expected_loss,
+        metric_keys.MetricKeys.LOSS_REGULARIZATION: 2.,
+    }
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        regularization_losses=regularization_losses)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, expected_summaries, summary_str, tol)
+
+  def test_train_with_weights(self):
+    n_classes = 2
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
+
+    logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
+    labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # Average over classes, weighted sum over examples, divide by batch_size.
+    # loss = ( 1 * (10 + 10) / 2 + 2 * (15 + 0) / 2 ) / 2
+    expected_loss = 12.5
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array([[41], [42]], dtype=np.int32),
+            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(
+          self, {metric_keys.MetricKeys.LOSS: expected_loss,}, summary_str, tol)
+
+  def test_multi_dim_weighted_train_create_loss(self):
+    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
+    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
+
+    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
+                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
+    labels = np.array([[[1, 0, 0], [1, 0, 0]],
+                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # unreduced_loss =
+    #     [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
+    #   = [[20/3, 10/3], [4, 8]]
+    expected_unreduced_loss = [[[20./3.], [10./3.]], [[4.], [8.]]]
+    # weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
+    expected_training_loss = 9.9167
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    atol = 1.e-3
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), atol=atol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), atol=atol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_multi_dim_weighted_train(self):
+    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
+    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
+
+    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
+                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
+    labels = np.array([[[1, 0, 0], [1, 0, 0]],
+                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
+    #      = [[20/3, 10/3], [4, 8]]
+    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
+    expected_loss = 9.9167
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    atol = 1.e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, monitored_session.Scaffold())
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, atol=atol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def test_multi_dim_weights_wrong_inner_dim(self):
+    """Logits and labels of shape [2, 2, 3], weights [2, 1]."""
+    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
+
+    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
+                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
+    labels = np.array([[[1, 0, 0], [1, 0, 0]],
+                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
+        spec.loss.eval()
+
+  def test_multi_dim_weights_wrong_outer_dim(self):
+    """Logits and labels of shape [2, 2, 3], weights [2, 2, 3]."""
+    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
+
+    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
+                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
+    labels = np.array([[[1, 0, 0], [1, 0, 0]],
+                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
+    weights = np.array([[[1., 1., 1.], [1.5, 1.5, 1.5]],
+                        [[2., 2., 2.], [2.5, 2.5, 2.5]]], dtype=np.float32)
+    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights_placeholder},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 2 3\]'):
+        spec.loss.eval({weights_placeholder: weights})
+
+  def test_multi_dim_weighted_eval(self):
+    """Logits and labels of shape [2, 2, 3], weights [2, 2]."""
+    head = head_lib.multi_label_head(n_classes=3, weight_column='weights')
+
+    logits = np.array([[[-10., 10., -10.], [10., -10., 10.]],
+                       [[-12., 12., -12.], [12., -12., 12.]]], dtype=np.float32)
+    labels = np.array([[[1, 0, 0], [1, 0, 0]],
+                       [[0, 1, 1], [0, 1, 1]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # loss = [[10 + 10 + 0, 0 + 0 + 10], [0 + 0 + 12, 12 + 12 + 0]] / 3
+    #      = [[20/3, 10/3], [4, 8]]
+    # loss = (1*20/3 + 1.5*10/3 + 2*4 + 2.5*8) / 4 = 9.9167
+    expected_loss = 9.9167
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss * (4. / np.sum(weights)),
+        # auc and auc_pr cannot be reliably calculated for only 4 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC: 0.4977,
+        keys.AUC_PR: 0.6645,
+    }
+    self._test_eval(
+        head=head,
+        features={'weights': weights},
+        logits=logits,
+        labels=labels,
+        expected_loss=expected_loss,
+        expected_metrics=expected_metrics)
+
+
+class PoissonRegressionHead(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_train(self):
+    head = head_lib.poisson_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    labels = np.array([[1], [2], [3]], dtype=np.int32)
+    # With x = exp(logits), z = labels.
+    # loss = -ln(exp(-x) * (x^z) / z!)
+    #      = x - z * ln(x) + ln(z!)
+    #      = exp(logits) - labels * logits - ln(labels!)
+    # But for ln(z!) and z > 1, the Stirling approximation is used
+    # ln(z!) = z*ln(z) - z + 0.5*ln(2*pi*z)
+    # loss = [exp(0) - 1 * 0 + ln(1!),
+    #         exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2),
+    #         exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)]
+    #      = [1.0, 3.020, 1.482]
+    # training_loss = (1.0 + 3.020 + 1.482) / 3
+    expected_loss = 1.834
+    atol = 0.001
+    expected_train_result = b'my_train_op'
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_near(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          atol=atol, name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run([spec.loss, spec.train_op])
+      self.assertAlmostEqual(expected_loss, loss, delta=atol)
+      self.assertEqual(expected_train_result, train_result)
+
+  def test_predict(self):
+    head = head_lib.poisson_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    expected_predictions = np.exp(logits)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert spec contains expected tensors.
+    keys = prediction_keys.PredictionKeys
+    self.assertItemsEqual(
+        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
+    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
+
+    # Assert predictions.
+    with self.cached_session():
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllClose(
+          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
+      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
+
+
+class LogisticRegressionHead(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_train(self):
+    head = head_lib.logistic_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    labels = np.array([[.4], [.6], [.8]], dtype=np.float32)
+    # Following the documentation in
+    # tf.nn.sigmoid_cross_entropy_with_logits:
+    # With x = logits, z = labels.
+    # loss  = max(x, 0) - x * z + log(1 + exp(-abs(x)))
+    # loss = [0 - 0 * 0.4 + ln(1 + exp(-0)),
+    #         0 + 1 * 0.6 + ln(1 + exp(-1)),
+    #         1 - 1 * 0.8 + ln(1 + exp(-1))]
+    #      = [0.6931, 0.9133, 0.5133]
+    # training_loss = (0.6931 + 0.9133 + 0.5133) / 3
+    expected_loss = 0.7066
+    atol = 0.001
+    expected_train_result = b'my_train_op'
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_near(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          atol=atol, name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run([spec.loss, spec.train_op])
+      self.assertAlmostEqual(expected_loss, loss, delta=atol)
+      self.assertEqual(expected_train_result, train_result)
+
+  def test_train_labels_too_large(self):
+    head = head_lib.logistic_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    labels = np.array([[.4], [1.2], [.8]], dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    def _train_op_fn(loss):
+      del loss
+      return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[1.2\]\[0.8\]\]'):
+        _ = sess.run(spec.loss)
+
+  def test_train_labels_negative(self):
+    head = head_lib.logistic_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    labels = np.array([[.4], [-0.2], [.8]], dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    def _train_op_fn(loss):
+      del loss
+      return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[Labels must be in range \[0, 1\]\] .* \[\[0.4\]\[-0.2\]\[0.8\]\]'
+      ):
+        _ = sess.run(spec.loss)
+
+  def test_predict(self):
+    head = head_lib.logistic_regression_head()
+
+    # Create estimator spec.
+    logits = np.array([[0], [-1], [1]], dtype=np.float32)
+    expected_predictions = 1. / (1. + np.exp(-logits))
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert spec contains expected tensors.
+    keys = prediction_keys.PredictionKeys
+    self.assertItemsEqual(
+        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
+    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
+
+    # Assert predictions.
+    with self.cached_session():
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllClose(
+          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
+      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/hooks.py b/tensorflow/contrib/estimator/python/estimator/hooks.py
index 33b587ec0b..49f7bbd320 100644
--- a/tensorflow/contrib/estimator/python/estimator/hooks.py
+++ b/tensorflow/contrib/estimator/python/estimator/hooks.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,274 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""hooks python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Some useful session run hooks."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import hooks
+import os
+import time
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+
+
+# pylint: disable=protected-access
+class InMemoryEvaluatorHook(training.SessionRunHook):
+  """Hook to run evaluation in training without a checkpoint.
+
+  Example:
+
+  ```python
+  def train_input_fn():
+    ...
+    return train_dataset
+
+  def eval_input_fn():
+    ...
+    return eval_dataset
+
+  estimator = tf.estimator.DNNClassifier(...)
+
+  evaluator = tf.contrib.estimator.InMemoryEvaluatorHook(
+      estimator, eval_input_fn)
+  estimator.train(train_input_fn, hooks=[evaluator])
+  ```
+
+  Current limitations of this approach are:
+
+  * It doesn't support multi-node distributed mode.
+  * It doesn't support saveable objects other than variables (such as boosted
+    tree support)
+  * It doesn't support custom saver logic (such as ExponentialMovingAverage
+    support)
+
+  """
+
+  def __init__(self,
+               estimator,
+               input_fn,
+               steps=None,
+               hooks=None,
+               name=None,
+               every_n_iter=100):
+    """Initializes a `InMemoryEvaluatorHook`.
+
+    Args:
+      estimator: A `tf.estimator.Estimator` instance to call evaluate.
+      input_fn:  Equivalent to the `input_fn` arg to `estimator.evaluate`. A
+        function that constructs the input data for evaluation.
+        See [Createing input functions](
+        https://tensorflow.org/guide/premade_estimators#create_input_functions)
+        for more information. The function should construct and return one of
+        the following:
+
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple (features, labels) with same constraints as below.
+          * A tuple (features, labels): Where `features` is a `Tensor` or a
+            dictionary of string feature name to `Tensor` and `labels` is a
+            `Tensor` or a dictionary of string label name to `Tensor`. Both
+            `features` and `labels` are consumed by `model_fn`. They should
+            satisfy the expectation of `model_fn` from inputs.
+
+      steps: Equivalent to the `steps` arg to `estimator.evaluate`.  Number of
+        steps for which to evaluate model. If `None`, evaluates until `input_fn`
+        raises an end-of-input exception.
+      hooks: Equivalent to the `hooks` arg to `estimator.evaluate`. List of
+        `SessionRunHook` subclass instances. Used for callbacks inside the
+        evaluation call.
+      name:  Equivalent to the `name` arg to `estimator.evaluate`. Name of the
+        evaluation if user needs to run multiple evaluations on different data
+        sets, such as on training data vs test data. Metrics for different
+        evaluations are saved in separate folders, and appear separately in
+        tensorboard.
+      every_n_iter: `int`, runs the evaluator once every N training iteration.
+
+    Raises:
+      ValueError: if `every_n_iter` is non-positive or it's not a single machine
+        training
+    """
+    if every_n_iter is None or every_n_iter <= 0:
+      raise ValueError('invalid every_n_iter=%s.' % every_n_iter)
+    if (estimator.config.num_ps_replicas > 0 or
+        estimator.config.num_worker_replicas > 1):
+      raise ValueError(
+          'InMemoryEvaluator supports only single machine (aka Local) setting.')
+    self._estimator = estimator
+    self._input_fn = input_fn
+    self._steps = steps
+    self._name = name
+    self._every_n_iter = every_n_iter
+    self._eval_dir = os.path.join(self._estimator.model_dir, 'eval'
+                                  if not name else 'eval_' + name)
+
+    self._graph = None
+    self._hooks = estimator_lib._check_hooks_type(hooks)
+    self._hooks.extend(self._estimator._convert_eval_steps_to_hooks(steps))
+    self._timer = training.SecondOrStepTimer(every_steps=every_n_iter)
+
+  def begin(self):
+    """Build eval graph and restoring op."""
+    self._timer.reset()
+    self._iter_count = 0
+    self._graph = ops.Graph()
+    with self._graph.as_default():
+      (self._scaffold, self._update_op, self._eval_dict,
+       self._all_hooks) = self._estimator._evaluate_build_graph(
+           self._input_fn, self._hooks, checkpoint_path=None)
+
+      if self._scaffold.saver is not None:
+        raise ValueError('InMemoryEvaluator does not support custom saver')
+      if self._scaffold.init_fn is not None:
+        raise ValueError('InMemoryEvaluator does not support custom init_fn')
+
+      self._var_name_to_eval_var = {
+          v.name: v for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      }
+      self._var_name_to_placeholder = {
+          v.name: array_ops.placeholder(v.dtype)
+          for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      }
+
+  def after_create_session(self, session, coord):  # pylint: disable=unused-argument
+    """Does first run which shows the eval metrics before training."""
+    if ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS):
+      raise ValueError(
+          'InMemoryEvaluator does not support saveables other than global '
+          'variables.')
+    self._var_name_to_train_var = {
+        v.name: v for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    }
+    var_names_to_transfer = set(self._var_name_to_placeholder.keys()) & set(
+        self._var_name_to_train_var.keys())
+    # Filter training var names that are not exist in evaluation
+    self._var_name_to_train_var = {
+        v_name: self._var_name_to_train_var[v_name]
+        for v_name in var_names_to_transfer
+    }
+    # Filter eval var names that are not exist in training
+    self._var_name_to_eval_var = {
+        v_name: self._var_name_to_eval_var[v_name]
+        for v_name in var_names_to_transfer
+    }
+
+    with self._graph.as_default():
+      self._var_feed_op = control_flow_ops.group([
+          state_ops.assign(self._var_name_to_eval_var[v_name],
+                           self._var_name_to_placeholder[v_name])
+          for v_name in var_names_to_transfer
+      ])
+
+    self._evaluate(session)
+
+  def _evaluate(self, train_session):
+    var_name_to_value = train_session.run(self._var_name_to_train_var)
+    placeholder_to_value = {
+        self._var_name_to_placeholder[v_name]: var_name_to_value[v_name]
+        for v_name in var_name_to_value
+    }
+
+    def feed_variables(scaffold, session):
+      del scaffold
+      session.run(self._var_feed_op, feed_dict=placeholder_to_value)
+
+    scaffold = training.Scaffold(
+        init_fn=feed_variables, copy_from_scaffold=self._scaffold)
+
+    with self._graph.as_default():
+      self._estimator._evaluate_run(
+          checkpoint_path=None,
+          scaffold=scaffold,
+          update_op=self._update_op,
+          eval_dict=self._eval_dict,
+          all_hooks=self._all_hooks,
+          output_dir=self._eval_dir)
+
+    self._timer.update_last_triggered_step(self._iter_count)
+
+  def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
+    """Runs evaluator."""
+    self._iter_count += 1
+    if self._timer.should_trigger_for_step(self._iter_count):
+      self._evaluate(run_context.session)
+
+  def end(self, session):  # pylint: disable=unused-argument
+    """Runs evaluator for final model."""
+    self._evaluate(session)
+
+
+class _StopAtCheckpointStepHook(training.SessionRunHook):
+  """Hook that requests stop at a specified step based on checkpoint.
+
+  Note: We recommend using 'make_stop_at_checkpoint_step_hook` to get the proper
+  hook.
+  """
+
+  def __init__(self, model_dir, last_step,
+               wait_after_file_check_secs=30):
+    """Initializes a `StopAtCheckpointStepHook`.
+
+    This hook requests stop after a last step has been reached. It checks latest
+    checkpoint to verify last step is written on disk or not.
+
+    Args:
+      model_dir: Directory to read global step from latest checkpoint.
+      last_step: Step after which to stop.
+      wait_after_file_check_secs: Reading same file by many workers may create
+      I/O issues. To throttle that we will wait given secs after each read of
+      the file.
+
+    Raises:
+      ValueError: If one of the arguments is invalid.
+    """
+    if last_step is None:
+      raise ValueError('last_step must be specified.')
+    if model_dir is None:
+      raise ValueError('model_dir must be specified.')
+
+    self._model_dir = model_dir
+    self._last_step = last_step
+    self._wait_after_file_check_secs = wait_after_file_check_secs
+
+  def begin(self):
+    self._global_step_tensor = training_util._get_or_create_global_step_read()  # pylint: disable=protected-access
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          'Global step should be created to use StopAtCheckpointStepHook.')
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return training.SessionRunArgs(self._global_step_tensor)
+
+  def after_run(self, run_context, run_values):
+    global_step = run_values.results + 1
+    if global_step >= self._last_step:
+      # Check latest global step in the checkpoint to ensure that the targeted
+      # last step is written on disk.
+
+      step = estimator_lib._load_global_step_from_checkpoint_dir(
+          self._model_dir)
+      if step >= self._last_step:
+        run_context.request_stop()
+      else:
+        time.sleep(self._wait_after_file_check_secs)
+
+
+def make_stop_at_checkpoint_step_hook(estimator,
+                                      last_step,
+                                      wait_after_file_check_secs=30):
+  """Creates a proper StopAtCheckpointStepHook based on chief status."""
 
-# Include attrs that start with single underscore.
-hooks.__all__ = [s for s in dir(hooks) if not s.startswith('__')]
+  if estimator.config.is_chief:
+    return training.StopAtStepHook(last_step=last_step)
+  return _StopAtCheckpointStepHook(
+      model_dir=estimator.model_dir,
+      last_step=last_step,
+      wait_after_file_check_secs=wait_after_file_check_secs)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.hooks import *
+# pylint: enable=protected-access
diff --git a/tensorflow/contrib/estimator/python/estimator/hooks_test.py b/tensorflow/contrib/estimator/python/estimator/hooks_test.py
new file mode 100644
index 0000000000..62ffad56da
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/hooks_test.py
@@ -0,0 +1,403 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for hooks."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import glob
+import json
+import os
+import tempfile
+import time
+
+from tensorflow.contrib.estimator.python.estimator import hooks as hooks_lib
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.summary import summary_iterator
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import training
+
+
+def summary_step_keyword_to_value_mapping(dir_):
+  writer_cache.FileWriterCache.clear()
+
+  # Get last Event written.
+  event_paths = glob.glob(os.path.join(dir_, 'events*'))
+  step_keyword_to_value = {}
+  for last_event in summary_iterator.summary_iterator(event_paths[-1]):
+    if last_event.step not in step_keyword_to_value:
+      step_keyword_to_value[last_event.step] = {}
+    if last_event.summary is not None:
+      for value in last_event.summary.value:
+        step_keyword_to_value[last_event.step][value.tag] = value.simple_value
+
+  return step_keyword_to_value
+
+
+def get_summary_value(dir_, step, keyword):
+  """Get summary value for given step and keyword."""
+
+  writer_cache.FileWriterCache.clear()
+  # Get last Event written.
+  event_paths = glob.glob(os.path.join(dir_, 'events*'))
+  print('XXX', event_paths)
+  for last_event in summary_iterator.summary_iterator(event_paths[-1]):
+    if last_event.step == step and last_event.summary is not None:
+      for value in last_event.summary.value:
+        if keyword in value.tag:
+          return value.simple_value
+  return None
+
+
+class InMemoryEvaluatorHookTest(test.TestCase):
+
+  def test_runs_eval_metrics(self):
+
+    def model_fn(features, labels, mode):
+      _ = labels
+      if estimator_lib.ModeKeys.TRAIN == mode:
+        with ops.control_dependencies([features]):
+          train_op = state_ops.assign_add(training.get_global_step(), 1)
+        return estimator_lib.EstimatorSpec(
+            mode, loss=constant_op.constant(3.), train_op=train_op)
+      if estimator_lib.ModeKeys.EVAL == mode:
+        return estimator_lib.EstimatorSpec(
+            mode,
+            loss=constant_op.constant(5.),
+            eval_metric_ops={'mean_of_features': metrics_lib.mean(features)})
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+
+    def input_fn():
+      return dataset_ops.Dataset.range(10)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(
+        estimator, input_fn, every_n_iter=4)
+    estimator.train(input_fn, hooks=[evaluator])
+
+    self.assertTrue(os.path.isdir(estimator.eval_dir()))
+    step_keyword_to_value = summary_step_keyword_to_value_mapping(
+        estimator.eval_dir())
+
+    # 4.5 = sum(range(10))/10
+    # before training
+    self.assertEqual(4.5, step_keyword_to_value[0]['mean_of_features'])
+    # intervals (every_n_iter=4)
+    self.assertEqual(4.5, step_keyword_to_value[4]['mean_of_features'])
+    self.assertEqual(4.5, step_keyword_to_value[8]['mean_of_features'])
+    # end
+    self.assertEqual(4.5, step_keyword_to_value[10]['mean_of_features'])
+    self.assertEqual(set([0, 4, 8, 10]), set(step_keyword_to_value.keys()))
+
+  def test_uses_latest_variable_value(self):
+
+    def model_fn(features, labels, mode):
+      _ = labels
+      step = training.get_global_step()
+      w = variable_scope.get_variable(
+          'w',
+          shape=[],
+          initializer=init_ops.zeros_initializer(),
+          dtype=dtypes.int64)
+      if estimator_lib.ModeKeys.TRAIN == mode:
+        # to consume features, we have control dependency
+        with ops.control_dependencies([features]):
+          step_inc = state_ops.assign_add(training.get_global_step(), 1)
+        with ops.control_dependencies([step_inc]):
+          assign_w_to_step_plus_2 = w.assign(step + 2)
+        return estimator_lib.EstimatorSpec(
+            mode,
+            loss=constant_op.constant(3.),
+            train_op=assign_w_to_step_plus_2)
+      if estimator_lib.ModeKeys.EVAL == mode:
+        # to consume features, we have control dependency
+        with ops.control_dependencies([features]):
+          loss = constant_op.constant(5.)
+        return estimator_lib.EstimatorSpec(
+            mode,
+            loss=loss,
+            # w is constant in each step, so the mean.
+            # w = 0 if step==0 else step+2
+            eval_metric_ops={'mean_of_const': metrics_lib.mean(w)})
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+
+    def input_fn():
+      return dataset_ops.Dataset.range(10)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(
+        estimator, input_fn, every_n_iter=4)
+    estimator.train(input_fn, hooks=[evaluator])
+
+    self.assertTrue(os.path.isdir(estimator.eval_dir()))
+    step_keyword_to_value = summary_step_keyword_to_value_mapping(
+        estimator.eval_dir())
+    # w = 0 if step==0 else step+2
+    self.assertEqual(0, step_keyword_to_value[0]['mean_of_const'])
+    self.assertEqual(6, step_keyword_to_value[4]['mean_of_const'])
+    self.assertEqual(12, step_keyword_to_value[10]['mean_of_const'])
+
+  def test_dnn_classifier(self):
+    embedding = feature_column_lib.embedding_column(
+        feature_column_lib.categorical_column_with_vocabulary_list(
+            'wire_cast', ['kima', 'omar', 'stringer']), 8)
+    dnn = estimator_lib.DNNClassifier(
+        feature_columns=[embedding], hidden_units=[3, 1])
+
+    def train_input_fn():
+      return dataset_ops.Dataset.from_tensors(({
+          'wire_cast': [['omar'], ['kima']]
+      }, [[0], [1]])).repeat(3)
+
+    def eval_input_fn():
+      return dataset_ops.Dataset.from_tensors(({
+          'wire_cast': [['stringer'], ['kima']]
+      }, [[0], [1]])).repeat(2)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(
+        dnn, eval_input_fn, name='in-memory')
+    dnn.train(train_input_fn, hooks=[evaluator])
+    self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory')))
+    step_keyword_to_value = summary_step_keyword_to_value_mapping(
+        dnn.eval_dir('in-memory'))
+
+    final_metrics = dnn.evaluate(eval_input_fn)
+    step = final_metrics[ops.GraphKeys.GLOBAL_STEP]
+    for summary_tag in final_metrics:
+      if summary_tag == ops.GraphKeys.GLOBAL_STEP:
+        continue
+      self.assertEqual(final_metrics[summary_tag],
+                       step_keyword_to_value[step][summary_tag])
+
+  def test_raise_error_with_multi_worker(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    with test.mock.patch.dict('os.environ',
+                              {'TF_CONFIG': json.dumps(tf_config)}):
+      dnn = estimator_lib.DNNClassifier(
+          feature_columns=[feature_column_lib.numeric_column('x')],
+          hidden_units=[3, 1])
+
+    def eval_input_fn():
+      pass
+
+    with self.assertRaisesRegexp(ValueError, 'supports only single machine'):
+      hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
+
+  def test_raise_error_with_ps(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    with test.mock.patch.dict('os.environ',
+                              {'TF_CONFIG': json.dumps(tf_config)}):
+      dnn = estimator_lib.DNNClassifier(
+          feature_columns=[feature_column_lib.numeric_column('x')],
+          hidden_units=[3, 1])
+
+    def eval_input_fn():
+      pass
+
+    with self.assertRaisesRegexp(ValueError, 'supports only single machine'):
+      hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
+
+  def test_raise_error_with_custom_saver_in_eval(self):
+
+    def model_fn(features, labels, mode):
+      _, _ = features, labels
+      return estimator_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(3.),
+          scaffold=training.Scaffold(saver=training.Saver()),
+          train_op=constant_op.constant(5.),
+          eval_metric_ops={
+              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
+          })
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+
+    def input_fn():
+      return dataset_ops.Dataset.range(10)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
+    with self.assertRaisesRegexp(ValueError, 'does not support custom saver'):
+      evaluator.begin()
+
+  def test_raise_error_with_custom_init_fn_in_eval(self):
+
+    def model_fn(features, labels, mode):
+      _, _ = features, labels
+
+      def init_fn(scaffold, session):
+        _, _ = scaffold, session
+
+      return estimator_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(3.),
+          scaffold=training.Scaffold(init_fn=init_fn),
+          train_op=constant_op.constant(5.),
+          eval_metric_ops={
+              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
+          })
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+
+    def input_fn():
+      return dataset_ops.Dataset.range(10)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
+    with self.assertRaisesRegexp(ValueError, 'does not support custom init_fn'):
+      evaluator.begin()
+
+  def test_raise_error_with_saveables_other_than_global_variables(self):
+
+    def model_fn(features, labels, mode):
+      _, _ = features, labels
+      w = variables.VariableV1(
+          initial_value=[0.],
+          trainable=False,
+          collections=[ops.GraphKeys.SAVEABLE_OBJECTS])
+      init_op = control_flow_ops.group(
+          [w.initializer, training.get_global_step().initializer])
+      return estimator_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(3.),
+          scaffold=training.Scaffold(init_op=init_op),
+          train_op=constant_op.constant(5.),
+          eval_metric_ops={
+              'mean_of_features': metrics_lib.mean(constant_op.constant(2.))
+          })
+
+    estimator = estimator_lib.Estimator(model_fn=model_fn)
+
+    def input_fn():
+      return dataset_ops.Dataset.range(10)
+
+    evaluator = hooks_lib.InMemoryEvaluatorHook(estimator, input_fn)
+    with self.assertRaisesRegexp(ValueError, 'does not support saveables'):
+      estimator.train(input_fn, hooks=[evaluator])
+
+
+class StopAtCheckpointStepHookTest(test.TestCase):
+
+  def test_do_not_stop_if_checkpoint_is_not_there(self):
+    with ops.Graph().as_default():
+      step = training.create_global_step()
+      assign_ten = step.assign(10)
+      no_op = control_flow_ops.no_op()
+      hook = hooks_lib._StopAtCheckpointStepHook(
+          model_dir=tempfile.mkdtemp(), last_step=10)
+      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.raw_session().run(assign_ten)
+        with test.mock.patch.object(time, 'sleep') as mock_sleep:
+          mon_sess.run(no_op)
+          self.assertTrue(mock_sleep.called)
+        self.assertFalse(mon_sess.should_stop())
+
+  def test_do_not_stop_if_checkpoint_step_is_smaller(self):
+    model_dir = tempfile.mkdtemp()
+    with ops.Graph().as_default():
+      step = training.create_global_step()
+      assign_nine = step.assign(9)
+      assign_ten = step.assign(10)
+      no_op = control_flow_ops.no_op()
+      hook = hooks_lib._StopAtCheckpointStepHook(
+          model_dir=model_dir, last_step=10)
+      with tf_session.Session() as sess:
+        sess.run(assign_nine)
+        training.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.raw_session().run(assign_ten)
+        with test.mock.patch.object(time, 'sleep') as mock_sleep:
+          mon_sess.run(no_op)
+          self.assertTrue(mock_sleep.called)
+        self.assertFalse(mon_sess.should_stop())
+
+  def test_stop_if_checkpoint_step_is_laststep(self):
+    model_dir = tempfile.mkdtemp()
+    with ops.Graph().as_default():
+      step = training.create_global_step()
+      assign_ten = step.assign(10)
+      no_op = control_flow_ops.no_op()
+      hook = hooks_lib._StopAtCheckpointStepHook(
+          model_dir=model_dir, last_step=10)
+      with tf_session.Session() as sess:
+        sess.run(assign_ten)
+        training.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+      with training.SingularMonitoredSession(hooks=[hook]) as mon_sess:
+        mon_sess.raw_session().run(assign_ten)
+        with test.mock.patch.object(time, 'sleep') as mock_sleep:
+          mon_sess.run(no_op)
+          self.assertFalse(mock_sleep.called)
+        self.assertTrue(mon_sess.should_stop())
+
+  def test_creates_regular_stop_at_step_hook_for_chief(self):
+    # by default an estimator is in chief mode
+    dnn = estimator_lib.DNNClassifier(
+        feature_columns=[feature_column_lib.numeric_column('x')],
+        hidden_units=[3, 1])
+    hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300)
+    self.assertIsInstance(hook, training.StopAtStepHook)
+    self.assertEqual(300, hook._last_step)
+
+  def test_creates_checkpoint_hook_for_workers(self):
+
+    class FakeWorkerConfig(estimator_lib.RunConfig):
+
+      @property
+      def is_chief(self):
+        return False
+
+    dnn = estimator_lib.DNNClassifier(
+        feature_columns=[feature_column_lib.numeric_column('x')],
+        hidden_units=[3, 1],
+        config=FakeWorkerConfig())
+    hook = hooks_lib.make_stop_at_checkpoint_step_hook(dnn, 300)
+    self.assertIsInstance(hook, hooks_lib._StopAtCheckpointStepHook)
+    self.assertEqual(300, hook._last_step)
+    self.assertEqual(dnn.model_dir, hook._model_dir)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/linear.py b/tensorflow/contrib/estimator/python/estimator/linear.py
index 1c32255ba0..2b68f24eb2 100644
--- a/tensorflow/contrib/estimator/python/estimator/linear.py
+++ b/tensorflow/contrib/estimator/python/estimator/linear.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,127 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""linear python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Linear estimator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import linear
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import linear as linear_lib
+
+
+class LinearEstimator(estimator.Estimator):
+  """An estimator for TensorFlow linear models with user-specified head.
+
+  Example:
+
+  ```python
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+
+  # Estimator using the default optimizer.
+  estimator = LinearEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = LinearEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      optimizer=lambda: tf.train.FtrlOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator using the FTRL optimizer with regularization.
+  estimator = LinearEstimator(
+      head=tf.contrib.estimator.multi_label_head(n_classes=3),
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+      optimizer=tf.train.FtrlOptimizer(
+          learning_rate=0.1,
+          l1_regularization_strength=0.001
+      ))
+
+  def input_fn_train: # returns x, y (where y represents label's class index).
+    ...
+  estimator.train(input_fn=input_fn_train, steps=100)
+  def input_fn_eval: # returns x, y (where y represents label's class index).
+    ...
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    ...
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss and predicted output are determined by the specified head.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
 
-# Include attrs that start with single underscore.
-linear.__all__ = [s for s in dir(linear) if not s.startswith('__')]
+  def __init__(self,
+               head,
+               feature_columns,
+               model_dir=None,
+               optimizer='Ftrl',
+               config=None,
+               partitioner=None,
+               sparse_combiner='sum'):
+    """Initializes a `LinearEstimator` instance.
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.linear import *
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to FTRL optimizer.
+      config: `RunConfig` object to configure the runtime settings.
+      partitioner: Optional. Partitioner for input layer.
+      sparse_combiner: A string specifying how to reduce if a categorical column
+        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
+        effectively different ways to do example-level normalization, which can
+        be useful for bag-of-words features. for more details, see
+        `tf.feature_column.linear_model`.
+    """
+    def _model_fn(features, labels, mode, config):
+      return linear_lib._linear_model_fn(  # pylint: disable=protected-access
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          partitioner=partitioner,
+          config=config,
+          sparse_combiner=sparse_combiner)
+    super(LinearEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/linear_test.py b/tensorflow/contrib/estimator/python/estimator/linear_test.py
new file mode 100644
index 0000000000..c41996b9c6
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/linear_test.py
@@ -0,0 +1,156 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for linear.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.contrib.estimator.python.estimator import linear
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+
+
+def _linear_estimator_fn(
+    weight_column=None, label_dimension=1, *args, **kwargs):
+  """Returns a LinearEstimator that uses regression_head."""
+  return linear.LinearEstimator(
+      head=head_lib.regression_head(
+          weight_column=weight_column, label_dimension=label_dimension,
+          # Tests in core (from which this test inherits) test the sum loss.
+          loss_reduction=losses.Reduction.SUM),
+      *args, **kwargs)
+
+
+class LinearEstimatorEvaluateTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorTrainTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_estimator_fn)
+
+
+class LinearEstimatorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    est = linear.LinearEstimator(
+        head=head_lib.regression_head(label_dimension=label_dimension),
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/logit_fns.py b/tensorflow/contrib/estimator/python/estimator/logit_fns.py
index 7eba28dc57..c8b0dd6297 100644
--- a/tensorflow/contrib/estimator/python/estimator/logit_fns.py
+++ b/tensorflow/contrib/estimator/python/estimator/logit_fns.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,85 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""logit_fns python module.
+"""Aliases for logit_fn builders used by canned (core) tf.Estimator's.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+A logit_fn is an abstraction within model_fn that factors out the logit
+construction logic.  Its output can be fed into Heads or otherwise composed.  It
+should follow the following signature:
+
+Args:
+`features`: This is the first item returned from the `input_fn` passed to
+            `train`, `evaluate`, and `predict`. This should be a single
+            `Tensor` or `dict` of same, and is the only required argument.
+`mode`: Optional. Specifies if this training, evaluation or prediction. See
+        `ModeKeys`.
+`params`: Optional `dict` of hyperparameters.  Will receive what is passed to
+          Estimator in `params` parameter. This allows configuration of
+          Estimators from hyperparameter tuning.
+`config`: Optional configuration object. Will receive what is passed to
+          Estimator in `config` parameter, or the default `config`. Allows
+          updating things in your model_fn based on configuration such as
+          `num_ps_replicas`, or `model_dir`.
 
+Returns:
+    A Tensor representing the logits.
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import logit_fns
+import six
+
+from tensorflow.python.estimator.canned import dnn as dnn_core
+from tensorflow.python.estimator.canned import linear as linear_core
+from tensorflow.python.framework import ops
+from tensorflow.python.util import function_utils
+
+# pylint: disable=protected-access
+dnn_logit_fn_builder = dnn_core._dnn_logit_fn_builder
+linear_logit_fn_builder = linear_core._linear_logit_fn_builder
+# pylint: enable=protected-access
+
+
+def call_logit_fn(logit_fn, features, mode, params, config):
+  """Calls logit_fn.
+
+  A utility function that calls the provided logit_fn with the relevant subset
+  of provided arguments.  Similar to tf.estimator._call_model_fn().
+
+  Args:
+    logit_fn: A logit_fn as defined above.
+    features: The features dict.
+    mode: TRAIN / EVAL / PREDICT ModeKeys.
+    params: The hyperparameter dict.
+    config: The configuration object.
+
+  Returns:
+    A logit Tensor, the output of logit_fn.
+
+  Raises:
+    ValueError: if logit_fn does not return a Tensor or a dictionary mapping
+      strings to Tensors.
+  """
+  logit_fn_args = function_utils.fn_args(logit_fn)
+  kwargs = {}
+  if 'mode' in logit_fn_args:
+    kwargs['mode'] = mode
+  if 'params' in logit_fn_args:
+    kwargs['params'] = params
+  if 'config' in logit_fn_args:
+    kwargs['config'] = config
+  logit_fn_results = logit_fn(features=features, **kwargs)
+
+  result_is_valid_dictionary = (
+      isinstance(logit_fn_results, dict) and
+      all([(isinstance(k, six.string_types) and isinstance(v, ops.Tensor))
+           for k, v in six.iteritems(logit_fn_results)]))
+  result_is_tensor = isinstance(logit_fn_results, ops.Tensor)
 
-# Include attrs that start with single underscore.
-logit_fns.__all__ = [s for s in dir(logit_fns) if not s.startswith('__')]
+  if not (result_is_valid_dictionary or result_is_tensor):
+    raise ValueError('logit_fn should return a Tensor or a dictionary mapping '
+                     'strings to Tensors.  logit_fn returned: %s' %
+                     logit_fn_results)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.logit_fns import *
+  return logit_fn_results
diff --git a/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py b/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
new file mode 100644
index 0000000000..074ece6cca
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/logit_fns_test.py
@@ -0,0 +1,95 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""logit_fn tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.estimator.python.estimator import logit_fns
+from tensorflow.python.client import session
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+
+
+class LogitFnTest(test.TestCase):
+
+  def test_simple_call_logit_fn(self):
+    def dummy_logit_fn(features, mode):
+      if mode == model_fn.ModeKeys.TRAIN:
+        return features['f1']
+      else:
+        return features['f2']
+    features = {
+        'f1': constant_op.constant([[2., 3.]]),
+        'f2': constant_op.constant([[4., 5.]])
+    }
+    logit_fn_result = logit_fns.call_logit_fn(
+        dummy_logit_fn, features, model_fn.ModeKeys.EVAL, 'fake_params',
+        'fake_config')
+    with session.Session():
+      self.assertAllClose([[4., 5.]], logit_fn_result.eval())
+
+  def test_simple_call_multi_logit_fn(self):
+
+    def dummy_logit_fn(features):
+      return {u'head1': features['f1'], 'head2': features['f2']}
+
+    features = {
+        'f1': constant_op.constant([[2., 3.]]),
+        'f2': constant_op.constant([[4., 5.]])
+    }
+    logit_fn_result = logit_fns.call_logit_fn(dummy_logit_fn, features,
+                                              model_fn.ModeKeys.TRAIN,
+                                              'fake_params', 'fake_config')
+    with session.Session():
+      self.assertAllClose([[2., 3.]], logit_fn_result['head1'].eval())
+      self.assertAllClose([[4., 5.]], logit_fn_result['head2'].eval())
+
+  def test_invalid_logit_fn_results(self):
+
+    def invalid_logit_fn(features, params):
+      return [
+          features['f1'] * params['input_multiplier'],
+          features['f2'] * params['input_multiplier']
+      ]
+
+    features = {
+        'f1': constant_op.constant([[2., 3.]]),
+        'f2': constant_op.constant([[4., 5.]])
+    }
+    params = {'learning_rate': 0.001, 'input_multiplier': 2.0}
+    with self.assertRaisesRegexp(
+        ValueError, 'logit_fn should return a Tensor or a dictionary mapping '
+                    'strings to Tensors'):
+      logit_fns.call_logit_fn(invalid_logit_fn, features, 'fake_mode', params,
+                              'fake_config')
+
+  def test_invalid_logit_fn_results_dict(self):
+
+    def invalid_logit_fn(features):
+      return {'head1': features['f1'], 'head2': features['f2']}
+
+    features = {'f1': constant_op.constant([[2., 3.]]), 'f2': 'some string'}
+    with self.assertRaisesRegexp(
+        ValueError, 'logit_fn should return a Tensor or a dictionary mapping '
+                    'strings to Tensors'):
+      logit_fns.call_logit_fn(invalid_logit_fn, features, 'fake_mode',
+                              'fake_params', 'fake_config')
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py
index 1d8a065299..6e793c8302 100644
--- a/tensorflow/contrib/estimator/python/estimator/multi_head.py
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,413 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""multi_head python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
+"""Abstractions for the head(s) of a model.
 """
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import multi_head
+import six
+
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.export import export_output as export_output_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.summary import summary
+from tensorflow.python.training import training_util
+
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def multi_head(heads, head_weights=None):
+  """Creates a `_Head` for multi-objective learning.
+
+  This class merges the output of multiple `_Head` objects.
+  Specifically:
+  * For training, sums losses of each head, calls `train_op_fn` with this
+    final loss.
+  * For eval, merges metrics by adding `head.name` suffix to the keys in eval
+    metrics, such as `precision/head1`, `precision/head2`.
+  * For prediction, merges predictions and updates keys in prediction dict to a
+    2-tuple, `(head.name, prediction_key)`. Merges `export_outputs` such that
+    by default the first head is served.
+
+  Usage:
+
+  ```python
+  # In `input_fn` specify labels as a dict keyed by head name:
+  def input_fn():
+    features = ...
+    labels1 = ...
+    labels2 = ...
+    return features, {'head1': labels1, 'head2': labels2}
+
+  # In `model_fn`, specify logits as a dict keyed by head name:
+  def model_fn(features, labels, mode):
+    # Create simple heads and specify head name.
+    head1 = multi_class_head(n_classes=3, name='head1')
+    head2 = binary_classification_head(name='head2')
+    # Create multi-head from two simple heads.
+    head = multi_head([head1, head2])
+    # Create logits for each head, and combine them into a dict.
+    logits1, logits2 = logit_fn()
+    logits = {'head1': logits1, 'head2': logits2}
+    # Return the merged EstimatorSpec
+    return head.create_estimator_spec(..., logits=logits, ...)
+
+  # Create an estimator with this model_fn.
+  estimator = tf.estimator.Estimator(model_fn=model_fn)
+  estimator.train(input_fn=input_fn, steps=100)
+  ```
+
+  Also supports `logits` as a `Tensor` of shape
+  `[D0, D1, ... DN, logits_dimension]`. It will split the `Tensor` along the
+  last dimension and distribute it appropriately among the heads. E.g.:
+
+  ```python
+  def model_fn(features, labels, mode):
+    # Create simple heads and specify head name.
+    head1 = multi_class_head(n_classes=3, name='head1')
+    head2 = binary_classification_head(name='head2')
+    # Create multi-head from two simple heads.
+    head = multi_head([head1, head2])
+    # Create logits for the multihead.
+    logits = logit_fn(logits_dimension=head.logits_dimension)
+    # Return the merged EstimatorSpec
+    return head.create_estimator_spec(..., logits=logits, ...)
+  ```
+
+  Args:
+    heads: List or tuple of `_Head` instances. All heads must have `name`
+      specified. The first head in the list is the default used at serving time.
+    head_weights: Optional list of weights, same length as `heads`. Used when
+      merging losses to calculate the weighted sum of losses from each head. If
+      `None`, all losses are weighted equally.
+
+  Returns:
+    A instance of `_Head` that merges multiple heads.
+
+  Raises:
+    ValueError: If `heads` is empty.
+    ValueError: If any of the `heads` does not have `name` specified.
+    ValueError: If `heads` and `head_weights` have different size.
+  """
+  if head_weights:
+    if len(head_weights) != len(heads):
+      raise ValueError(
+          'heads and head_weights must have the same size. '
+          'Given len(heads): {}. Given len(head_weights): {}.'.format(
+              len(heads), len(head_weights)))
+  if not heads:
+    raise ValueError('Must specify heads. Given: {}'.format(heads))
+  for head in heads:
+    if not head.name:
+      raise ValueError(
+          'All given heads must have name specified. '
+          'Given: {}'.format(head))
+
+  return _MultiHead(
+      heads=tuple(heads),
+      head_weights=tuple(head_weights) if head_weights else tuple())
+
+
+def _no_op_train_fn(loss):
+  del loss
+  return control_flow_ops.no_op()
+
+
+def _merge_losses(losses, head_weights=None):
+  """Merges the given losses into one tensor."""
+  losses = tuple(losses)
+  with ops.name_scope(
+      'merge_losses', values=losses + (head_weights or tuple())):
+    if head_weights:
+      weighted_losses = []
+      for loss, weight in zip(losses, head_weights):
+        weighted_losses.append(math_ops.multiply(loss, weight))
+    else:
+      weighted_losses = losses
+    return math_ops.add_n(weighted_losses)
+
+
+def _default_export_output(export_outputs, head_name):
+  """Extracts the default export output from the given export_outputs dict."""
+  if len(export_outputs) == 1:
+    return next(six.itervalues(export_outputs))
+  for k, v in six.iteritems(export_outputs):
+    if k == _DEFAULT_SERVING_KEY:
+      return v
+  raise ValueError(
+      '{} did not specify default export_outputs. '
+      'Given: {} '
+      'Suggested fix: Use one of the heads in tf.contrib.estimator, or include '
+      'key {} in export_outputs.'.format(
+          head_name, export_outputs, _DEFAULT_SERVING_KEY))
+
+
+class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
+  """`_Head` for multi objective learning."""
+
+  def __init__(self, heads, head_weights):
+    self._logits_dimension = 0
+    for head in heads:
+      self._logits_dimension += head.logits_dimension
+
+    self._heads = heads
+    self._head_weights = head_weights
+
+  @property
+  def name(self):
+    return '_'.join([h.name for h in self._heads])
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def create_loss(self, features, mode, logits, labels):
+    """See `Head`."""
+    if isinstance(logits, dict):
+      logits_dict = logits
+    else:
+      logits_dict = self._split_logits(logits)
+    training_losses = []
+    labels_by_head = {}
+    unreduced_losses_by_head = {}
+    example_weights_by_head = {}
+    for i, head in enumerate(self._heads):
+      (training_loss, unreduced_loss,
+       weights, processed_labels) = head.create_loss(
+           features, mode, logits_dict[head.name], labels[head.name])
+      training_losses.append(training_loss)
+      labels_by_head[head.name] = processed_labels
+      if self._head_weights:
+        head_weight = self._head_weights[i]
+        unreduced_losses_by_head[head.name] = math_ops.multiply(
+            unreduced_loss, head_weight)
+        example_weights_by_head[head.name] = math_ops.multiply(
+            weights, head_weight)
+      else:
+        unreduced_losses_by_head[head.name] = unreduced_loss
+        example_weights_by_head[head.name] = weights
+
+    training_losses = tuple(training_losses)
+    with ops.name_scope(
+        'merge_losses',
+        values=training_losses + (self._head_weights or tuple())):
+      if self._head_weights:
+        head_weighted_training_losses = []
+        for training_loss, head_weight in zip(
+            training_losses, self._head_weights):
+          head_weighted_training_losses.append(
+              math_ops.multiply(training_loss, head_weight))
+        merged_training_loss = math_ops.add_n(head_weighted_training_losses)
+      else:
+        merged_training_loss = math_ops.add_n(training_losses)
+
+    return head_lib.LossSpec(
+        training_loss=merged_training_loss,
+        unreduced_loss=unreduced_losses_by_head,
+        weights=example_weights_by_head,
+        processed_labels=labels_by_head)
+
+  # TODO(b/65403806): Support regularization_losses arg.
+  def create_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None):
+    """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=False)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None):
+    """See `_Head`."""
+    return self._create_estimator_spec(
+        features=features, mode=mode, logits=logits, labels=labels,
+        optimizer=optimizer, train_op_fn=train_op_fn, use_tpu=True)
+
+  def _create_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, use_tpu=False):
+    """Returns `EstimatorSpec` or `TPUEstimatorSpec`."""
+    if isinstance(logits, dict):
+      logits_dict = logits
+    else:
+      logits_dict = self._split_logits(logits)
+    if labels and not isinstance(labels, dict):
+      raise ValueError('labels must be a dict. Given: {}'.format(labels))
+
+    all_estimator_spec = []
+    for head in self._heads:
+      head_name = head.name
+      all_estimator_spec.append(
+          head.create_estimator_spec(
+              features=features,
+              mode=mode,
+              logits=logits_dict[head_name],
+              labels=labels[head_name] if labels else None,
+              train_op_fn=_no_op_train_fn))
+
+    if mode == model_fn.ModeKeys.TRAIN:
+      spec = self._merge_train(
+          all_estimator_spec=all_estimator_spec,
+          optimizer=optimizer,
+          train_op_fn=train_op_fn,
+          use_tpu=use_tpu)
+      with ops.name_scope(''):
+        summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss)
+      return spec
+    if mode == model_fn.ModeKeys.PREDICT:
+      return self._merge_predict(all_estimator_spec, use_tpu=use_tpu)
+    if mode == model_fn.ModeKeys.EVAL:
+      return self._merge_eval(all_estimator_spec, use_tpu=use_tpu)
+    raise ValueError('mode={} unrecognized'.format(mode))
+
+  def _split_logits(self, logits):
+    """Splits logits along the last dimension and returns a dict."""
+    logits_dict = {}
+    with ops.name_scope(None, 'split_logits', values=[logits]):
+      logits = ops.convert_to_tensor(logits)
+      batch_shape = array_ops.shape(logits)[:-1]
+      zeros_like_batch_shape = array_ops.zeros_like(batch_shape)
+      minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape)
+      begin_idx = 0
+      for head in self._heads:
+        begin_tensor = array_ops.concat(
+            [zeros_like_batch_shape, [begin_idx]], axis=0)
+        size_tensor = array_ops.concat(
+            [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0)
+        logits_dict[head.name] = array_ops.slice(
+            logits, begin=begin_tensor, size=size_tensor)
+        begin_idx += head.logits_dimension
+    return logits_dict
+
+  def _merge_train(
+      self, all_estimator_spec, optimizer, train_op_fn, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for training.
+
+    Args:
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
+      optimizer: `Optimizer` instance to create train op. See
+        `create_estimator_spec` documentation for more details.
+      train_op_fn: Function to create train op. Used if `optimizer` is `None`.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
+
+    Returns:
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for TRAIN.
+
+    Raises:
+      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
+        mode.
+    """
+    losses = []
+    for spec in all_estimator_spec:
+      losses.append(spec.loss)
+    loss = _merge_losses(losses, self._head_weights)
+    if optimizer is not None:
+      if train_op_fn is not None:
+        raise ValueError('train_op_fn and optimizer cannot both be set.')
+      train_op = optimizer.minimize(
+          loss, global_step=training_util.get_global_step())
+    elif train_op_fn is not None:
+      train_op = train_op_fn(loss)
+    else:
+      raise ValueError('train_op_fn and optimizer cannot both be None.')
+
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
+        mode=model_fn.ModeKeys.TRAIN,
+        loss=loss,
+        train_op=train_op)
+
+  def _merge_predict(self, all_estimator_spec, use_tpu=False):
+    """Merges list of `EstimatorSpec` or `TPUEstimatorSpec` for prediction.
+
+    Args:
+      all_estimator_spec: list of `EstimatorSpec` or `TPUEstimatorSpec` for the
+        individual heads.
+      use_tpu: If `True`, returns `TPUEstimatorSpec`.
+
+    Returns:
+      `EstimatorSpec` or `TPUEstimatorSpec` that merges all heads for PREDICT.
+    """
+    predictions = {}
+    export_outputs = {
+        _DEFAULT_SERVING_KEY: _default_export_output(
+            all_estimator_spec[0].export_outputs,
+            self._heads[0].name),
+    }
+    merged_predict_outputs = {}
+    for head, spec in zip(self._heads, all_estimator_spec):
+      head_name = head.name
+      for k, v in six.iteritems(spec.export_outputs):
+        if k == _DEFAULT_SERVING_KEY:
+          key = head_name
+        else:
+          key = '%s/%s' % (head_name, k)
+        export_outputs[key] = v
+        if (k == head_lib._PREDICT_SERVING_KEY and  # pylint:disable=protected-access
+            isinstance(v, export_output_lib.PredictOutput)):
+          for kp, vp in six.iteritems(v.outputs):
+            key = '%s/%s' % (head_name, kp)
+            merged_predict_outputs[key] = vp
+      for k, v in six.iteritems(spec.predictions):
+        predictions[(head_name, k)] = v
+    export_outputs[head_lib._PREDICT_SERVING_KEY] = (  # pylint:disable=protected-access
+        export_output_lib.PredictOutput(merged_predict_outputs))
+
+    spec_type = (
+        model_fn._TPUEstimatorSpec if use_tpu else model_fn.EstimatorSpec)  # pylint:disable=protected-access
+    return spec_type(
+        mode=model_fn.ModeKeys.PREDICT,
+        predictions=predictions,
+        export_outputs=export_outputs)
+
+  def _merge_eval(self, all_estimator_spec, use_tpu=False):
+    """Merges list of `EstimatorSpec` for eval.
+
+    Args:
+      all_estimator_spec: list of `EstimatorSpec` for the individual heads.
+      use_tpu: If `True`, will raise `NotImplementedError`, because TPU is not
+        yet supported for eval.
 
-# Include attrs that start with single underscore.
-multi_head.__all__ = [s for s in dir(multi_head) if not s.startswith('__')]
+    Returns:
+      `EstimatorSpec` that merges all heads for EVAL.
+    Raises:
+      NotImplementedError: If `use_tpu` is `True`.
+    """
+    if use_tpu:
+      raise NotImplementedError(
+          'TPU evaluation is not implemented for multi_head.')
+    predictions = {}
+    metrics = {}
+    losses = []
+    with ops.name_scope('merge_eval'):
+      for head, spec in zip(self._heads, all_estimator_spec):
+        losses.append(spec.loss)
+        head_name = head.name
+        # Loss metric is not added by default.
+        loss_name = head_lib._summary_key(  # pylint:disable=protected-access
+            head_name, metric_keys.MetricKeys.LOSS)
+        metrics[loss_name] = metrics_lib.mean(spec.loss, name=loss_name)
+        # Metric keys already contain head.name.
+        metrics.update(spec.eval_metric_ops or {})
+        for k, v in six.iteritems(spec.predictions):
+          predictions[(head_name, k)] = v
+      loss = _merge_losses(losses, self._head_weights)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.multi_head import *
+    return model_fn.EstimatorSpec(
+        mode=model_fn.ModeKeys.EVAL,
+        predictions=predictions,
+        loss=loss,
+        eval_metric_ops=metrics)
diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
new file mode 100644
index 0000000000..a602f87b4a
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py
@@ -0,0 +1,705 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for head."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.contrib.estimator.python.estimator import multi_head as multi_head_lib
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def _initialize_variables(test_case, scaffold):
+  scaffold.finalize()
+  test_case.assertIsNone(scaffold.init_feed_dict)
+  test_case.assertIsNone(scaffold.init_fn)
+  scaffold.init_op.run()
+  scaffold.ready_for_local_init_op.eval()
+  scaffold.local_init_op.run()
+  scaffold.ready_op.eval()
+  test_case.assertIsNotNone(scaffold.saver)
+
+
+def _assert_simple_summaries(test_case, expected_summaries, summary_str,
+                             tol=1e-6):
+  """Assert summary the specified simple values.
+
+  Args:
+    test_case: test case.
+    expected_summaries: Dict of expected tags and simple values.
+    summary_str: Serialized `summary_pb2.Summary`.
+    tol: Tolerance for relative and absolute.
+  """
+  summary = summary_pb2.Summary()
+  summary.ParseFromString(summary_str)
+  test_case.assertAllClose(expected_summaries, {
+      v.tag: v.simple_value for v in summary.value
+  }, rtol=tol, atol=tol)
+
+
+def _assert_no_hooks(test_case, spec):
+  test_case.assertAllEqual([], spec.training_chief_hooks)
+  test_case.assertAllEqual([], spec.training_hooks)
+
+
+def _sigmoid(logits):
+  return 1 / (1 + np.exp(-logits))
+
+
+class MultiHeadTest(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_no_heads(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Must specify heads\. Given: \[\]'):
+      multi_head_lib.multi_head(heads=[])
+
+  def test_head_name_missing(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3)
+    with self.assertRaisesRegexp(
+        ValueError, r'All given heads must have name specified\.'):
+      multi_head_lib.multi_head([head1, head2])
+
+  def test_head_weights_wrong_size(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'heads and head_weights must have the same size\. '
+        r'Given len\(heads\): 2. Given len\(head_weights\): 1\.'):
+      multi_head_lib.multi_head([head1, head2], head_weights=[1.])
+
+  def test_name(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head([head1, head2])
+    self.assertEqual('head1_head2', multi_head.name)
+
+  def _test_predict_two_heads_logits_dict(self, use_tpu):
+    """Tests predict with logits as dict."""
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head([head1, head2])
+
+    logits = {
+        'head1': np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32),
+        'head2': np.array([[2., -2., 2.], [-3., 2., -2.]], dtype=np.float32)
+    }
+    expected_probabilities = {
+        'head1': _sigmoid(logits['head1']),
+        'head2': _sigmoid(logits['head2']),
+    }
+
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits)
+
+    self.assertItemsEqual(
+        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
+         'head1/predict', 'head2', 'head2/classification', 'head2/predict'),
+        spec.export_outputs.keys())
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(
+          logits['head1'],
+          predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
+      self.assertAllClose(
+          logits['head2'],
+          predictions[('head2', prediction_keys.PredictionKeys.LOGITS)])
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)])
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)])
+
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(spec.export_outputs['head1'].scores))
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          sess.run(spec.export_outputs['head2'].scores))
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(
+              spec.export_outputs['predict'].outputs['head1/probabilities']))
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          sess.run(
+              spec.export_outputs['predict'].outputs['head2/probabilities']))
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(
+              spec.export_outputs['head1/predict'].outputs['probabilities']))
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          sess.run(
+              spec.export_outputs['head2/predict'].outputs['probabilities']))
+
+  def test_predict_two_heads_logits_dict(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=False)
+
+  def test_predict_two_heads_logits_dict_tpu(self):
+    self._test_predict_two_heads_logits_dict(use_tpu=True)
+
+  def test_predict_two_heads_logits_tensor(self):
+    """Tests predict with logits as Tensor."""
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head([head1, head2])
+
+    logits = np.array(
+        [[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32)
+    expected_logits1 = np.array([[-1., 1.], [-1.5, 1.]], dtype=np.float32)
+    expected_logits2 = np.array([[2., -2., 2.], [-3., 2., -2.]],
+                                dtype=np.float32)
+    expected_probabilities = {
+        'head1': _sigmoid(expected_logits1),
+        'head2': _sigmoid(expected_logits2),
+    }
+
+    spec = multi_head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    self.assertItemsEqual(
+        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/classification',
+         'head1/predict', 'head2', 'head2/classification', 'head2/predict'),
+        spec.export_outputs.keys())
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(
+          expected_logits1,
+          predictions[('head1', prediction_keys.PredictionKeys.LOGITS)])
+      self.assertAllClose(
+          expected_logits2,
+          predictions[('head2', prediction_keys.PredictionKeys.LOGITS)])
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          predictions[('head1', prediction_keys.PredictionKeys.PROBABILITIES)])
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          predictions[('head2', prediction_keys.PredictionKeys.PROBABILITIES)])
+
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
+      self.assertAllClose(
+          expected_probabilities['head1'],
+          sess.run(spec.export_outputs['head1'].scores))
+      self.assertAllClose(
+          expected_probabilities['head2'],
+          sess.run(spec.export_outputs['head2'].scores))
+
+  def test_predict_two_heads_logits_tensor_multi_dim(self):
+    """Tests predict with multi-dimensional logits of shape [2, 2, 5]."""
+    head1 = head_lib.regression_head(label_dimension=2, name='head1')
+    head2 = head_lib.regression_head(label_dimension=3, name='head2')
+    multi_head = multi_head_lib.multi_head([head1, head2])
+
+    logits = np.array(
+        [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]],
+         [[-1.5, 1., -3., 2., -2.], [-1.5, 1., -3., 2., -2.]]],
+        dtype=np.float32)
+    expected_logits1 = np.array(
+        [[[-1., 1.], [-1., 1.]],
+         [[-1.5, 1.], [-1.5, 1.]]],
+        dtype=np.float32)
+    expected_logits2 = np.array(
+        [[[2., -2., 2.], [2., -2., 2.]],
+         [[-3., 2., -2.], [-3., 2., -2.]]],
+        dtype=np.float32)
+
+    spec = multi_head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    self.assertItemsEqual(
+        (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'head1/regression',
+         'head1/predict', 'head2', 'head2/regression', 'head2/predict'),
+        spec.export_outputs.keys())
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(
+          expected_logits1,
+          predictions[('head1', prediction_keys.PredictionKeys.PREDICTIONS)])
+      self.assertAllClose(
+          expected_logits2,
+          predictions[('head2', prediction_keys.PredictionKeys.PREDICTIONS)])
+
+      self.assertAllClose(
+          expected_logits1,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].value))
+      self.assertAllClose(
+          expected_logits1,
+          sess.run(spec.export_outputs['head1'].value))
+      self.assertAllClose(
+          expected_logits2,
+          sess.run(spec.export_outputs['head2'].value))
+
+  def test_eval_two_heads_with_weights(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
+    # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
+    # loss = ( (20 + 20 + 20) / 3 + (30 + 0 + 0) / 3 ) / 2 = 15
+    expected_loss_head1 = 8.75
+    expected_loss_head2 = 15.
+    expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
+
+    spec = multi_head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS + '/head1': expected_loss_head1,
+        keys.LOSS + '/head2': expected_loss_head2,
+        # Average loss over examples.
+        keys.LOSS_MEAN + '/head1': expected_loss_head1,
+        keys.LOSS_MEAN + '/head2': expected_loss_head2,
+        # auc and auc_pr cannot be reliably calculated for only 4-6 samples, but
+        # this assert tests that the algorithm remains consistent.
+        keys.AUC + '/head1': 0.1667,
+        keys.AUC + '/head2': 0.3333,
+        keys.AUC_PR + '/head1': 0.6667,
+        keys.AUC_PR + '/head2': 0.5000,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
+  def test_eval_tpu(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        r'TPU evaluation is not implemented for multi_head\.'):
+      multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits,
+          labels=labels)
+
+  def test_train_create_loss_one_head(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    multi_head = multi_head_lib.multi_head([head1])
+
+    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
+    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
+    loss = multi_head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    tol = 1e-3
+    with self.cached_session():
+      # Unreduced loss of the head is [[(10 + 10) / 2], (15 + 0) / 2]
+      # (averaged over classes, averaged over examples).
+      self.assertAllClose(8.75, loss.eval(), rtol=tol, atol=tol)
+
+  def test_train_create_loss_two_heads_with_weights(self):
+    # Use different example weighting for each head weighting.
+    weights1 = np.array([[1.], [2.]], dtype=np.float32)
+    weights2 = np.array([[2.], [3.]])
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1',
+                                      weight_column='weights1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2',
+                                      weight_column='weights2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'weights1': weights1,
+            'weights2': weights2
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-3
+    with self.cached_session():
+      # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
+      # = [10, 7.5]
+      # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5
+      # head-weighted unreduced_loss = 1 * [10, 7.5]
+      self.assertAllClose(
+          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
+      # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
+      # = [20, 10]
+      # training_loss = (2 * 20 + 3 * 10) / 2 = 35
+      # head-weighted unreduced_loss = 2 * [20, 10]
+      self.assertAllClose(
+          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
+      # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5
+      self.assertAllClose(82.5, training_loss.eval(), rtol=tol, atol=tol)
+      # head-weighted example weights
+      self.assertAllClose(
+          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
+
+  def test_train_create_loss_logits_tensor(self):
+    """Tests create_loss with logits Tensor."""
+    weights1 = np.array([[1.], [2.]], dtype=np.float32)
+    weights2 = np.array([[2.], [3.]])
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1',
+                                      weight_column='weights1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2',
+                                      weight_column='weights2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = np.array([[-10., 10., 20., -20., 20.],
+                       [-15., 10., -30., 20., -20.]], dtype=np.float32)
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+    training_loss, unreduced_losses, weights, _ = multi_head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'weights1': weights1,
+            'weights2': weights2
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-3
+    with self.cached_session():
+      # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
+      # = [10, 7.5]
+      # training_loss = (1 * 10 + 2 * 7.5) / 2 = 12.5
+      # head-weighted unreduced_loss = 1 * [10, 7.5]
+      self.assertAllClose(
+          [[10.], [7.5]], unreduced_losses['head1'].eval(), rtol=tol, atol=tol)
+      # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
+      # = [20, 10]
+      # training_loss = (2 * 20 + 3 * 10) / 2 = 35
+      # head-weighted unreduced_loss = 2 * [20, 10]
+      self.assertAllClose(
+          [[40.], [20.]], unreduced_losses['head2'].eval(), rtol=tol, atol=tol)
+      # head-weighted training_loss = 1 * 12.5 + 2 * 35 = 82.5
+      self.assertAllClose(82.5, training_loss.eval(), rtol=tol, atol=tol)
+      # head-weighted example weights
+      self.assertAllClose(
+          [[1.], [2.]], weights['head1'].eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          [[4.], [6.]], weights['head2'].eval(), rtol=tol, atol=tol)
+
+  def test_train_create_loss_logits_tensor_multi_dim(self):
+    """Tests create_loss with multi-dimensional logits of shape [2, 2, 5]."""
+    head1 = head_lib.regression_head(label_dimension=2, name='head1')
+    head2 = head_lib.regression_head(label_dimension=3, name='head2')
+    multi_head = multi_head_lib.multi_head([head1, head2])
+
+    logits = np.array(
+        [[[-1., 1., 2., -2., 2.], [-1., 1., 2., -2., 2.]],
+         [[-1.5, 1.5, -2., 2., -2.], [-1.5, 1.5, -2., 2., -2.]]],
+        dtype=np.float32)
+    labels = {
+        'head1': np.array([[[1., 0.], [1., 0.]],
+                           [[1.5, 1.5], [1.5, 1.5]]], dtype=np.float32),
+        'head2': np.array([[[0., 1., 0.], [0., 1., 0.]],
+                           [[2., 2., 0.], [2., 2., 0.]]], dtype=np.float32),
+    }
+    # Loss for the first head:
+    # loss1 = ((1+1)^2 + (0-1)^2 + (1+1)^2 + (0-1)^2 +
+    #          (1.5+1.5)^2 + (1.5-1.5)^2 + (1.5+1.5)^2 + (1.5-1.5)^2) / 8
+    #       = 3.5
+    # Loss for the second head:
+    # loss2 = ((0-2)^2 + (1+2)^2 + (0-2)^2 + (0-2)^2 + (1+2)^2 + (0-2)^2 +
+    #          (2+2)^2 + (2-2)^2 + (0+2)^2 + (2+2)^2 + (2-2)^2 + (0+2)^2) / 12
+    #       = 6.167
+    expected_training_loss = 3.5 + 6.167
+
+    training_loss = multi_head.create_loss(
+        features={},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    tol = 1e-3
+    with self.cached_session():
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+
+  def test_train_one_head(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    multi_head = multi_head_lib.multi_head([head1])
+
+    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
+    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
+    expected_loss = 8.75
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    spec = multi_head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS + '/head1': expected_loss,
+      }, summary_str, tol)
+
+  def test_train_one_head_with_optimizer(self):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    multi_head = multi_head_lib.multi_head([head1])
+
+    logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
+    labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
+    expected_loss = 8.75
+    expected_train_result = 'my_train_op'
+
+    class _Optimizer(object):
+
+      def minimize(self, loss, global_step):
+        del global_step
+        return string_ops.string_join(
+            [constant_op.constant(expected_train_result),
+             string_ops.as_string(loss, precision=3)])
+
+    spec = multi_head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        optimizer=_Optimizer())
+
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def _test_train_two_heads_with_weights(self, use_tpu):
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    multi_head = multi_head_lib.multi_head(
+        [head1, head2], head_weights=[1., 2.])
+
+    logits = {
+        'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32),
+        'head2': np.array([[20., -20., 20.], [-30., 20., -20.]],
+                          dtype=np.float32),
+    }
+    labels = {
+        'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
+        'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
+    }
+    # For large logits, sigmoid cross entropy loss is approximated as:
+    # loss = labels * (logits < 0) * (-logits) +
+    #        (1 - labels) * (logits > 0) * logits =>
+    # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
+    # loss = ( (10 + 10) / 2 + (15 + 0) / 2 ) / 2 = 8.75
+    # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
+    # loss = ( (20 + 20 + 20) / 3 + (30 + 0 + 0) / 3 ) / 2 = 15
+    # Average over classes, weighted sum over batch and heads.
+    expected_loss_head1 = 8.75
+    expected_loss_head2 = 15.0
+    expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=3)])
+
+    if use_tpu:
+      spec = multi_head._create_tpu_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn).as_estimator_spec()
+    else:
+      spec = multi_head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=logits,
+          labels=labels,
+          train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-3
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS + '/head1': expected_loss_head1,
+          metric_keys.MetricKeys.LOSS + '/head2': expected_loss_head2,
+      }, summary_str, tol)
+
+  def test_train_two_heads_with_weights(self):
+    self._test_train_two_heads_with_weights(use_tpu=False)
+
+  def test_train_two_heads_with_weights_tpu(self):
+    self._test_train_two_heads_with_weights(use_tpu=True)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
index f500d54acb..cda23aa437 100644
--- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,819 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""replicate_model_fn python module.
+"""Utilities to replicate model_fn's over local GPUs.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
+This file contains util that allow to replicate `Estimator.model_fn` over
+GPUs.  Replicated version of a `model_fn` is returned that can subsequently
+be used with `Estimator`.
 """
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import replicate_model_fn
+from collections import defaultdict
+from contextlib import contextmanager
+import copy
 
-# Include attrs that start with single underscore.
-replicate_model_fn.__all__ = [
-    s for s in dir(replicate_model_fn) if not s.startswith('__')
-]
+import six
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.replicate_model_fn import *
+from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.client import device_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export import export_output as export_output_lib
+from tensorflow.python.framework import device as framework_device
+from tensorflow.python.framework import ops as ops_lib
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.training import device_setter as device_setter_lib
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.util import deprecation
+from tensorflow.python.util import function_utils
+
+
+@deprecation.deprecated(
+    '2018-05-31',
+    'Please use `tf.contrib.distribute.MirroredStrategy` instead.')
+def replicate_model_fn(model_fn,
+                       loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
+                       devices=None):
+  """Replicate `Estimator.model_fn` over GPUs.
+
+  The given `model_fn` specifies a single forward pass of a model.  To replicate
+  such a model over GPUs, each GPU gets its own instance of the forward pass
+  (a.k.a. a tower).  The input features and labels get sharded into the chunks
+  that correspond to the number of GPUs.  Each tower computes a loss based
+  on its input.  For each such loss, gradients are computed.  After that, the
+  available losses are aggregated to form aggregated loss.  Available
+  gradients are summed.  Then, they update weights using the specified
+  optimizer.
+
+  If `devices` are `None`, then all available GPUs are going to be used for
+  replication.  If no GPUs are available, then the model is going to be
+  placed on the CPU.
+
+  Two modes of local replication over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto the GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
+
+  Here is an example of how one might use their `model_fn` to run over GPUs:
+    ```python
+       ...
+       def model_fn(...):  # See `model_fn` in `Estimator`.
+         loss = ...
+         optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
+         optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
+         if mode == tf.estimator.ModeKeys.TRAIN:
+           #  See the section below on `EstimatorSpec.train_op`.
+           return EstimatorSpec(mode=mode, loss=loss,
+                                train_op=optimizer.minimize(loss))
+
+         #  No change for `ModeKeys.EVAL` or `ModeKeys.PREDICT`.
+         return EstimatorSpec(...)
+       ...
+       classifier = tf.estimator.Estimator(
+         model_fn=tf.contrib.estimator.replicate_model_fn(model_fn))
+    ```
+
+  Please see `DNNClassifierIntegrationTest` for an example with a canned
+  Estimator.
+
+  On `EstimatorSpec.train_op`:
+  `model_fn` returns `EstimatorSpec.train_op` for
+  `tf.estimator.GraphKeys.TRAIN`. It is typically derived using an optimizer.
+  Towers are expected to populate it in the same way.  Gradients from all towers
+  are reduced and applied in the last tower.  To achieve that in the case of
+  multiple towers, `TowerOptimizer` needs to be used.  See `TowerOptimizer`.
+
+  On sharding input features and labels:
+  Input features and labels are split for consumption by each tower. They are
+  split across the dimension 0.  Features and labels need to be batch major.
+
+  On reduction algorithms:
+  Certain algorithms were chosen for aggregating results of computations on
+  multiple towers:
+    - Losses from all towers are reduced according to `loss_reduction`.
+    - Gradients from all towers are reduced according to `loss_reduction`
+      for each trainable variable.
+    - `eval_metrics_ops` are reduced per metric using `reduce_mean`.
+    - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are
+      reduced using concatenation.
+    - For all other fields of `EstimatorSpec` the values of the first tower
+      are taken.
+
+  On distribution of variables:
+  Variables are not duplicated between towers.  Instead, they are placed on a
+  single device as defined above and shared across towers.
+
+  On overhead:
+  If only one device is specified, then aggregation of loss and gradients
+  doesn't happen. Replication consists of placing `model_fn` onto the
+  specified device.
+
+  On current limitations:
+    - `predictions` are not supported for `ModeKeys.EVAL`.  They are required
+       for `tf.contrib.estimator.add_metrics`.
+
+  Args:
+    model_fn: `model_fn` as defined in `Estimator`.  See the section above about
+      the train_op argument of `EstimatorSpec`.
+    loss_reduction: controls whether losses are summed or averaged.
+    devices: Optional list of devices to replicate the model across.  This
+      argument can be used to replicate only on the subset of available GPUs.
+      If `None`, then all available GPUs are going to be used for replication.
+      If no GPUs are available, then the model is going to be placed on the CPU.
+
+  Raises:
+    ValueError: if there is no `loss_reduction` or if TowerOptimizer is
+      mis-used.
+
+  Returns:
+    A replicated version of the supplied `model_fn`. Returned function that
+      conforms to the requirements of `Estimator`'s `model_fn` and can be used
+      instead of the supplied `model_fn`.
+  """
+  return _replicate_model_fn_with_mode(
+      model_fn,
+      loss_reduction,
+      devices,
+      # TODO(isaprykin): Query the system configuration to choose modes other
+      # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often
+      # appropriate.
+      mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER)
+
+
+class _VariableDistributionMode(object):
+  """Modes for variable distribution used for forcing a particular one.
+
+  Forcing a mode is meant for performance experimentation purposes rather than
+  for general use cases.
+  """
+
+  SHARED_LOCAL_PARAMETER_SERVER = 1
+  """Variables are placed on a single device and shared across all devices.
+
+  Two ways to achieve this distribution over available GPUs are supported:
+    1)  If exactly 1 GPU is detected, then variables and operations are placed
+        onto GPU.
+    2)  If more than 1 GPU is detected, then variables are going to be placed on
+        the CPU.  Replicas of operations are placed on each individual GPU.
+  """
+
+  SHARED_ROUND_ROBIN = 2
+  """Variables are placed on all devices in a round-robin fashion.
+
+  Every subsequent variable is placed on the next device.  There is only one
+  copy of each variable that is shared across all devices.
+  """
+
+
+def _replicate_model_fn_with_mode(
+    model_fn,
+    loss_reduction,
+    devices=None,
+    mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER):
+  """A version of `replicate_model_fn` that allows to specify a `mode`."""
+  if loss_reduction == losses.Reduction.NONE:
+    raise ValueError('Tower losses need to be reduced in some way, yet {} '
+                     'reduction is specified.'.format(loss_reduction))
+  if not devices:
+    devices = _get_local_devices('GPU') or _get_local_devices('CPU')
+
+  is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0].upper()
+  consolidation_device = devices[0] if is_a_single_gpu_case else '/CPU:0'
+
+  ps_devices = [consolidation_device]
+  if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN:
+    ps_devices = devices
+
+  tf_logging.info('Replicating the `model_fn` across {}.  Variables are going '
+                  'to be placed on {}.  Consolidation device is going to be {}.'
+                  .format(devices, ps_devices, consolidation_device))
+
+  def single_device_model_fn(features, labels, mode, params=None, config=None):
+    """`model_fn` on a single device without reduction overhead."""
+    return _get_loss_towers(
+        model_fn=model_fn,
+        mode=mode,
+        features=[features],
+        labels=[labels],
+        params=params,
+        loss_reduction=loss_reduction,
+        config=config,
+        devices=devices,
+        local_ps_devices=ps_devices)[0]  # One device, so one spec is out.
+
+  def replicated_model_fn(features, labels, mode, params=None, config=None):
+    """Replicated version of `model_fn` to be used instead."""
+    feature_shards, label_shards = _split_batch(
+        features, labels, len(devices), device=consolidation_device)
+    tower_specs = _get_loss_towers(
+        model_fn=model_fn,
+        mode=mode,
+        features=feature_shards,
+        labels=label_shards,
+        params=params,
+        loss_reduction=loss_reduction,
+        config=config,
+        devices=devices,
+        local_ps_devices=ps_devices)
+
+    if mode == model_fn_lib.ModeKeys.TRAIN:
+      train_op = _minimize_towers(tower_specs)
+      return _train_spec(
+          tower_specs, train_op, aggregation_device=consolidation_device)
+    elif mode == model_fn_lib.ModeKeys.EVAL:
+      return _eval_spec(tower_specs, aggregation_device=consolidation_device)
+    elif mode == model_fn_lib.ModeKeys.PREDICT:
+      return _predict_spec(tower_specs, aggregation_device=consolidation_device)
+
+  if len(devices) == 1:
+    return single_device_model_fn
+  else:
+    return replicated_model_fn
+
+
+class TowerOptimizer(optimizer_lib.Optimizer):
+  """Gathers gradients from all towers and reduces them in the last one."""
+
+  COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states'
+
+  @deprecation.deprecated(
+      '2018-05-31',
+      'Please use `tf.contrib.distribute.MirroredStrategy` instead.')
+  def __init__(self, optimizer_or_optimizer_fn):
+    """Wrap an existing optimizer for gathering gradients across towers.
+
+    Each invocation of model_fn has to call the same optimizers in the same
+    order.
+
+    Multiple optimizers that use the same or different losses are supported.
+
+    If TowerOptimizer is used but `replicate_model_fn` isn't, then no
+    aggregation will happen.  All calls will simply be forwarded to the
+    underlying optimizer. The behavior is similar if there is only one tower.
+
+    If TowerOptimizer is used together with SyncReplicasOptimizer that wraps
+    the user's optimizer, then it's the SyncReplicasOptimizer that needs to be
+    wrapped with TowerOptimizer.
+
+    Args:
+      optimizer_or_optimizer_fn: an instance of optimizer to wrap.  That
+        instance is going to be used for optimizer-specific logic.  This can
+        also be a no-argument function that returns such an optimizer instance.
+    """
+    self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn
+
+  @staticmethod
+  def has_been_used():
+    return TowerOptimizer._graph_state().has_tower_optimizer_been_used
+
+  def get_slot(self, *args, **kwargs):
+    return self._get_optimizer().get_slot(*args, **kwargs)
+
+  def get_slot_names(self, *args, **kwargs):
+    return self._get_optimizer().get_slot_names(*args, **kwargs)
+
+  def get_name(self, *args, **kwargs):
+    return self._get_optimizer().get_name(*args, **kwargs)
+
+  def variables(self, *args, **kwargs):
+    return self._get_optimizer().variables(*args, **kwargs)
+
+  def compute_gradients(self, loss, *args, **kwargs):
+    """Compute gradients, but first, if needed, scale the loss."""
+    loss = _scale_loss(loss,
+                       self._graph_state().loss_reduction,
+                       self._graph_state().number_of_towers)
+    return self._get_optimizer().compute_gradients(loss, *args, **kwargs)
+
+  def apply_gradients(self, grads_and_vars, global_step=None, **kwargs):
+    """Collect gradients updates to apply them with the last tower."""
+    if self._graph_state().number_of_towers == 1:
+      # Avoid the overhead of reduction if there's only one tower.
+      #
+      # There assumed to be only one tower if aggregation-related methods were
+      # not called by `_get_loss_towers`, for example if the model_fn uses
+      # TowerEstimator, but `replicate_model_fn` isn't used.
+      return self._get_optimizer().apply_gradients(grads_and_vars, global_step,
+                                                   **kwargs)
+
+    self._graph_state().collect_gradients(grads_and_vars)
+
+    if not self._graph_state().is_the_last_tower:
+      with ops_lib.control_dependencies(_extract_tensors(grads_and_vars)):
+        return self._construct_no_op_train_op()
+    else:
+      # Gradients need to be gathered and applied in the scope of the first
+      # tower, so that the tensors are accessible via names without prefixes.
+      var_scope, name_scope = self._graph_state().scopes_of_the_first_tower
+      with variable_scope.variable_scope(var_scope):
+        with ops_lib.name_scope(name_scope):
+          return self._apply_gathered_gradients(global_step, **kwargs)
+
+  def _apply_gathered_gradients(self, global_step, **kwargs):
+    graph_state = self._graph_state()
+    optimizer = self._get_optimizer()
+
+    grad_lists = {}
+    for grad, var in graph_state.get_latest_gradients_from_all_towers():
+      if grad is not None:
+        grad_lists.setdefault(var, []).append(grad)
+
+    aggregated_grads = []
+    with ops_lib.name_scope('gradient_aggregating'):
+      for var, grads in six.iteritems(grad_lists):
+        grad = _compute_sum_on_device(grads, var.device)
+        aggregated_grads.append((grad, var))
+    return optimizer.apply_gradients(
+        aggregated_grads, global_step=global_step, **kwargs)
+
+  def _get_optimizer(self):
+    if callable(self._optimizer_or_optimizer_fn):
+      # If optimizer is given as a function then we need to wait till we are
+      # under the right graph context before constructing it.  That's why the
+      # optimizer is constructed in _get_optimizer() rather than __init__().
+      self._optimizer_or_optimizer_fn = self._optimizer_or_optimizer_fn()
+    self._graph_state().has_tower_optimizer_been_used = True
+    return self._optimizer_or_optimizer_fn
+
+  def _construct_no_op_train_op(self):
+    return control_flow_ops.no_op(name='train_op_placeholder')
+
+  @staticmethod
+  def _graph_state():
+    graph_states = ops_lib.get_default_graph().get_collection_ref(
+        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
+    if not graph_states:
+      graph_states.append(TowerOptimizer._PerGraphState())
+    return graph_states[-1]
+
+  @staticmethod
+  def _did_towers_have_same_optimizer_calls():
+    graph_state = TowerOptimizer._graph_state()
+    return graph_state.did_towers_have_same_optimizer_calls()
+
+  @staticmethod
+  def _clear_graph_state():
+    # Clearing the Graph collection will prevent _PerGraphState from being
+    # serialized.
+    ops_lib.get_default_graph().clear_collection(
+        TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)
+
+  class _PerGraphState(object):
+    """Gradient reduction related state of a Tensorflow graph."""
+
+    def __init__(self):
+      self._collected_grads_and_vars = defaultdict(list)
+      self._current_tower_index = 0
+      self._number_of_towers = 1
+      self._loss_reduction = None
+      # Scopes of the first tower that don't have a prefix:
+      self._variable_scope = None
+      self._name_scope = None
+      # If needed, alert that TowerOptimizer needs to be used with model_fn.
+      self._has_tower_optimizer_been_used = False
+
+    def collect_gradients(self, grads_and_vars):
+      self._collected_grads_and_vars[self._current_tower_index].append(
+          grads_and_vars)
+
+    def get_latest_gradients_from_all_towers(self):
+      """Get gradients across towers for the last called optimizer."""
+      grads_and_vars = []
+      index_of_last_gradients = len(
+          self._collected_grads_and_vars[self._current_tower_index]) - 1
+      for tower_id in range(self._current_tower_index + 1):
+        grads_and_vars.extend(
+            self._collected_grads_and_vars[tower_id][index_of_last_gradients])
+      return grads_and_vars
+
+    def set_reduction_across_towers(self, loss_reduction, number_of_towers):
+      self._loss_reduction = loss_reduction
+      self._number_of_towers = number_of_towers
+
+    @contextmanager
+    def tower(self, tower_id, var_scope, name_scope):
+      if tower_id == 0:
+        self._variable_scope = var_scope
+        self._name_scope = name_scope
+      self._current_tower_index = tower_id
+      yield
+
+    @property
+    def scopes_of_the_first_tower(self):
+      return self._variable_scope, self._name_scope
+
+    @property
+    def is_the_last_tower(self):
+      return self._current_tower_index == (self._number_of_towers - 1)
+
+    @property
+    def number_of_towers(self):
+      return self._number_of_towers
+
+    @property
+    def loss_reduction(self):
+      return self._loss_reduction
+
+    @property
+    def has_tower_optimizer_been_used(self):
+      return self._has_tower_optimizer_been_used
+
+    @has_tower_optimizer_been_used.setter
+    def has_tower_optimizer_been_used(self, value):
+      self._has_tower_optimizer_been_used = value
+
+    def did_towers_have_same_optimizer_calls(self):
+      total_number_of_grads = sum([
+          len(grads)
+          for _, grads in six.iteritems(self._collected_grads_and_vars)
+      ])
+      return total_number_of_grads % self._number_of_towers == 0
+
+
+def _get_local_devices(device_type):
+  local_device_protos = device_lib.list_local_devices()
+  return [
+      device.name
+      for device in local_device_protos
+      if device.device_type == device_type
+  ]
+
+
+def _split_batch(features, labels, number_of_shards, device):
+  """Split input features and labels into batches."""
+
+  def ensure_divisible_by_shards(sequence):
+    batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0]
+    if batch_size % number_of_shards != 0:
+      raise ValueError(
+          'Batch size {} needs to be divisible by the number of GPUs, which '
+          'is {}.'.format(batch_size, number_of_shards))
+
+  def split_dictionary(dictionary):
+    """Split a dictionary into shards."""
+    shards = [{} for _ in range(number_of_shards)]
+    for name, tensor in six.iteritems(dictionary):
+      if isinstance(tensor, sparse_tensor.SparseTensor):
+        for i, shard in enumerate(
+            sparse_ops.sparse_split(
+                sp_input=tensor, num_split=number_of_shards, axis=0)):
+          shards[i][name] = shard
+      else:
+        ensure_divisible_by_shards(tensor)
+        for i, shard in enumerate(array_ops.split(tensor, number_of_shards)):
+          shards[i][name] = shard
+    return shards
+
+  with ops_lib.name_scope('split_inputs'):
+    with ops_lib.device(device):
+      if isinstance(features, dict):
+        feature_shards = split_dictionary(features)
+      else:
+        ensure_divisible_by_shards(features)
+        feature_shards = array_ops.split(features, number_of_shards)
+
+      if labels is None:
+        label_shards = None
+      elif isinstance(labels, dict):
+        label_shards = split_dictionary(labels)
+      else:
+        ensure_divisible_by_shards(labels)
+        label_shards = array_ops.split(labels, number_of_shards)
+  return feature_shards, label_shards
+
+
+_DEFAULT_NAME_SCOPE_PATTERN = 'tower_{}'
+
+
+def _get_loss_towers(model_fn,
+                     mode,
+                     features,
+                     labels,
+                     params,
+                     config,
+                     devices,
+                     local_ps_devices,
+                     loss_reduction,
+                     name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN):
+  """Replicate the loss computation across devices."""
+  tower_specs = []
+
+  model_fn_args = function_utils.fn_args(model_fn)
+  optional_params = {}
+  if 'params' in model_fn_args:
+    optional_params['params'] = copy.deepcopy(params)
+  if 'config' in model_fn_args:
+    optional_params['config'] = copy.deepcopy(config)
+
+  # pylint: disable=protected-access
+  round_robin_strategy = device_setter_lib._RoundRobinStrategy(
+      num_tasks=len(local_ps_devices))
+  TowerOptimizer._graph_state().set_reduction_across_towers(
+      loss_reduction, len(devices))
+
+  for i, device in enumerate(devices):
+    is_the_first_tower = (i == 0)
+
+    device_setter = _local_device_setter(
+        worker_device=device,
+        ps_devices=local_ps_devices,
+        ps_strategy=round_robin_strategy)
+
+    # We would like to preserve the names of the variables and ops that the user
+    # might be relying on. Names without a prefix are going to resolve to
+    # variables and ops of the first tower.
+    name_scope = name_scope_pattern
+    if is_the_first_tower:
+      name_scope = ''
+
+    with variable_scope.variable_scope(
+        '', reuse=not is_the_first_tower) as var_scope:
+      with ops_lib.name_scope(name_scope.format(i)) as name_scope:
+        with TowerOptimizer._graph_state().tower(
+            tower_id=i, var_scope=var_scope, name_scope=name_scope):
+          with ops_lib.device(device_setter):
+            labels_shard = None
+            if labels:
+              labels_shard = labels[i]
+
+            tower_spec = model_fn(
+                mode=mode,
+                features=features[i],
+                labels=labels_shard,
+                **optional_params)
+
+            if (tower_spec.train_op is not None and len(devices) > 1 and
+                not TowerOptimizer.has_been_used()):
+              raise ValueError('Please wrap optimizers with TowerOptimizer'
+                               ' in order to use replicate_model_fn with'
+                               ' multiple `devices`.')
+
+            # Scaling the loss here doesn't actually affect gradients.  Another
+            # instance of scaling happens inside the TowerOptimizer.
+            tower_spec = _scale_tower_loss(
+                tower_spec, loss_reduction, number_of_towers=len(devices))
+            tower_specs.append(tower_spec)
+
+  if not TowerOptimizer._did_towers_have_same_optimizer_calls():
+    raise ValueError('Each invocation of model_fn was supposed to make the same'
+                     ' optimizer calls.')
+  TowerOptimizer._clear_graph_state()
+  # pylint: enable=protected-access
+  return tower_specs
+
+
+def _local_device_setter(worker_device, ps_devices, ps_strategy):
+  """A device setter that puts distributes Var/Ops to PS/workers."""
+  ps_ops = ['Variable', 'VariableV2', 'VarHandleOp']
+
+  def local_device_chooser(op):
+    current_device = framework_device.DeviceSpec.from_string(op.device or '')
+
+    node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def
+    if node_def.op in ps_ops:
+      ps_device_spec = framework_device.DeviceSpec.from_string(
+          '{}'.format(ps_devices[ps_strategy(op)]))
+
+      ps_device_spec.merge_from(current_device)
+      return ps_device_spec.to_string()
+    else:
+      worker_device_spec = framework_device.DeviceSpec.from_string(
+          worker_device or '')
+      worker_device_spec.merge_from(current_device)
+      return worker_device_spec.to_string()
+
+  return local_device_chooser
+
+
+def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers):
+  """Produce an EstimatorSpec with appropriately scaled loss."""
+  if tower_spec.loss is None:
+    return tower_spec
+
+  estimator_spec = _asdict(tower_spec)
+  estimator_spec['loss'] = _scale_loss(tower_spec.loss, loss_reduction,
+                                       number_of_towers)
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
+def _scale_loss(loss, loss_reduction, number_of_towers):
+  """If needed, scale down the loss for averaging loss by summing."""
+  if loss is None:
+    return None
+  if number_of_towers == 1:
+    return loss
+
+  if loss_reduction != losses.Reduction.SUM:
+    return math_ops.div(loss, 1.0 * number_of_towers, name='averaged_loss')
+  else:
+    return loss
+
+
+def _minimize_towers(tower_specs):
+  """`train_op` of the last tower applies aggregated gradients."""
+  return tower_specs[-1].train_op
+
+
+def _compute_sum_on_device(values, device, name=None):
+  with ops_lib.device(device):
+    if isinstance(values[0], ops_lib.IndexedSlices):
+      if name:
+        raise ValueError('The name {} is not expected to be given to '
+                         'IndexedSlices {}'.format(name, values))
+
+      values_concat = array_ops.concat([v.values for v in values], axis=0)
+      indices_concat = array_ops.concat([v.indices for v in values], axis=0)
+      return ops_lib.IndexedSlices(values_concat, indices_concat,
+                                   values[0].dense_shape)
+    else:
+      return math_ops.add_n(values, name=name)
+
+
+def _train_spec(tower_specs,
+                train_op,
+                aggregation_device,
+                aggregated_loss_name='loss'):
+  """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`."""
+  # Spec of the last tower is used as the template for the final spec, because
+  # some `EstimatorSpec.training_hooks` rely on calls made in model_fn.  For
+  # example, `SyncReplicasOptimizerHook` validates the
+  # `SyncReplicasOptimizer.apply_gradients` call. `TowerEstimator` makes that
+  # call only in the last tower.
+  estimator_spec = _asdict(tower_specs[-1])
+  estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN
+  estimator_spec['train_op'] = train_op
+  estimator_spec['loss'] = _compute_sum_on_device(
+      [spec.loss for spec in tower_specs], aggregation_device,
+      aggregated_loss_name)
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
+def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'):
+  """Populate replicated EstimatorSpec for `GraphKeys.EVAL`."""
+  estimator_spec = _asdict(tower_specs[0])
+  estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL
+  estimator_spec['loss'] = _compute_sum_on_device(
+      [spec.loss for spec in tower_specs], aggregation_device,
+      aggregated_loss_name)
+
+  update_ops = []
+  for tower_spec in tower_specs:
+    for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops):
+      update_ops.append(update_op)
+
+  with ops_lib.control_dependencies(update_ops):
+    reduced_update_op = _reduce_metric_variables(len(tower_specs))
+
+  eval_metric_ops = {}
+  for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops):
+    eval_metric_ops[name] = (metric_tensor, reduced_update_op)
+  estimator_spec['eval_metric_ops'] = eval_metric_ops
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
+def _reduce_metric_variables(number_of_towers):
+  """Aggregate local variables used in metrics into the first tower."""
+  if number_of_towers == 1:
+    return control_flow_ops.no_op(name='no_eval_metric_reduction')
+
+  metric_variables = ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)
+  variables_per_tower = len(metric_variables) // number_of_towers
+
+  if len(metric_variables) % number_of_towers != 0:
+    raise ValueError(
+        'Different `EstimatorSpec.eval_metric_ops` across `model_fn()` calls.'
+        ' Expected {} local variables, but got {} instead.'.format(
+            variables_per_tower * number_of_towers, len(metric_variables)))
+
+  # `metric_variables` has the size of `variables_per_tower` x
+  #  number_of_towers.  Each tower is produced by calling the same model_fn.
+  #  First `variables_per_tower` correspond to the first tower.  Each such
+  #  variable has an replica at the `(variables_per_tower * i)` position, where
+  #  `i` is `[1.. number_of_towers]`.  We are going to add values from replicas
+  #  to each variable of the first tower.  We then zero out replica values, so
+  #  that `_reduce_metric_variables` operation is idempotent.  If a metric
+  #  is then computed based on local variables from the first tower, then the
+  #  resulting metric is an estimate for all `number_of_towers` towers.
+  ops = []
+  for i in range(0, variables_per_tower):
+    next_replica_id = i + variables_per_tower
+    replicas = [
+        metric_variables[replica_id]
+        for replica_id in range(next_replica_id, len(metric_variables),
+                                variables_per_tower)
+    ]  #  `replicas` doesn't contain the first-tower variable.
+
+    reduce_op = state_ops.assign_add(metric_variables[i],
+                                     math_ops.add_n(replicas))
+
+    with ops_lib.control_dependencies([reduce_op]):
+      for replica in replicas:
+        zeros_for_replica = array_ops.zeros(
+            array_ops.shape(replica), dtype=replica.dtype)
+        zero_out_replica_op = state_ops.assign(replica, zeros_for_replica)
+        ops.append(zero_out_replica_op)
+
+  return control_flow_ops.group(*ops)
+
+
+def _predict_spec(tower_specs, aggregation_device):
+  """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`."""
+  estimator_spec = _asdict(tower_specs[0])
+  estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT
+
+  with ops_lib.device(aggregation_device):
+    estimator_spec['predictions'] = _concat_tensor_dicts(
+        *[tower_spec.predictions for tower_spec in tower_specs])
+
+    export_outputs_dict = _dict_concat(
+        *[tower_spec.export_outputs for tower_spec in tower_specs])
+
+    export_outputs = {}
+    for name, export_output_list in six.iteritems(export_outputs_dict):
+      if isinstance(export_output_list[0], export_output_lib.PredictOutput):
+        export_outputs[name] = export_output_lib.PredictOutput(
+            outputs=_concat_tensor_dicts(*[
+                export_output.outputs for export_output in export_output_list
+            ]))
+      elif isinstance(export_output_list[0],
+                      export_output_lib.RegressionOutput):
+        export_outputs[name] = export_output_lib.RegressionOutput(
+            value=array_ops.concat(
+                [export_output.value for export_output in export_output_list],
+                axis=0))
+      elif isinstance(export_output_list[0],
+                      export_output_lib.ClassificationOutput):
+        scores = None
+        if export_output_list[0].scores is not None:
+          scores = array_ops.concat(
+              [export_output.scores for export_output in export_output_list],
+              axis=0)
+
+        classes = None
+        if export_output_list[0].classes is not None:
+          classes = array_ops.stack(
+              [export_output.classes for export_output in export_output_list],
+              axis=0)
+
+        export_outputs[name] = export_output_lib.ClassificationOutput(
+            scores=scores, classes=classes)
+
+  estimator_spec['export_outputs'] = export_outputs
+  return model_fn_lib.EstimatorSpec(**estimator_spec)
+
+
+def _concat_tensor_dicts(*tensor_dicts):
+  return {
+      name: array_ops.concat(tensors, axis=0, name=name)
+      for name, tensors in six.iteritems(_dict_concat(*tensor_dicts))
+  }
+
+
+def _extract_tensors(tensors_and_vars):
+  tensors = []
+  for tensor_and_var in tensors_and_vars:
+    tensor, _ = tensor_and_var
+    if isinstance(tensor, ops_lib.IndexedSlices):
+      tensors.append(tensor.values)
+    elif tensor is not None:
+      tensors.append(tensor)
+  return tensors
+
+
+def _dict_concat(*dicts):
+  list_dict = {}
+  for d in dicts:
+    if d is None:
+      continue
+
+    for k, v in six.iteritems(d):
+      list_dict.setdefault(k, []).append(v)
+  return list_dict
+
+
+def _asdict(namedtuple):
+  """Returns a namedtuple as a dictionary.
+
+  This is required because `_asdict()` in Python 3.x.x is broken in classes
+  that inherit from `collections.namedtuple`. See
+  https://bugs.python.org/issue24931 for more details.
+
+  Args:
+    namedtuple: An object that inherits from `collections.namedtuple`.
+
+  Returns:
+    A dictionary version of the tuple.
+  """
+  return {k: getattr(namedtuple, k) for k in namedtuple._fields}
diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
new file mode 100644
index 0000000000..65229d67bb
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py
@@ -0,0 +1,1649 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for utilities that replicate `Estimator.model_fn` over GPUs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+import shutil
+import tempfile
+from absl.testing import parameterized
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import replicate_model_fn
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops as ops_lib
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import losses
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import adam
+from tensorflow.python.training import device_setter
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import training
+
+
+class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase,
+                                   parameterized.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  @parameterized.named_parameters(
+      ('PublicInterface', None),
+      ('ParameterServerMode', replicate_model_fn._VariableDistributionMode.
+       SHARED_LOCAL_PARAMETER_SERVER),
+      ('RoundRobinMode',
+       replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN))
+  def test_complete_flow_with_mode(self, mode):
+    n_classes = 3
+    input_dimension = 2
+    batch_size = 12
+
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    categorical_data = np.random.random_integers(
+        0, len(x_data), size=len(x_data))
+    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data,
+           'categories': categorical_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data,
+           'categories': categorical_data},
+        y=y_data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data,
+           'categories': categorical_data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,)),
+        feature_column.embedding_column(
+            feature_column.categorical_column_with_vocabulary_list(
+                'categories',
+                vocabulary_list=np.linspace(
+                    0., len(x_data), len(x_data), dtype=np.int64)), 1)
+    ]
+
+    def optimizer_fn():
+      return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
+
+    estimator = dnn.DNNClassifier(
+        hidden_units=(2, 2),
+        # Adagrad is configured with `get_optimizer_instance`, so the function
+        # form of `TowerOptimizer.__init__` is used.
+        optimizer=replicate_model_fn.TowerOptimizer(optimizer_fn),
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    if not mode:  # Use the public `replicate_model_fn`.
+      model_fn = replicate_model_fn.replicate_model_fn(
+          estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'])
+    else:
+      model_fn = replicate_model_fn._replicate_model_fn_with_mode(
+          estimator.model_fn,
+          devices=['/gpu:0', '/gpu:1', '/gpu:2'],
+          loss_reduction=losses.Reduction.SUM,
+          mode=mode)
+
+    estimator = estimator_lib.Estimator(
+        model_fn=model_fn,
+        model_dir=estimator.model_dir,
+        config=estimator.config,
+        params=estimator.params)
+
+    num_steps = 10
+    estimator.train(train_input_fn, steps=num_steps)
+
+    scores = estimator.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in estimator.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
+                                             serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+    # Nothing should be left in the graph so that it doesn't get serialized.
+    self.assertFalse(ops_lib.get_default_graph().get_collection_ref(
+        replicate_model_fn.TowerOptimizer.COLLECTION_FOR_GRAPH_STATES))
+
+  def _as_label(self, data_in_float):
+    return np.rint(data_in_float).astype(np.int64)
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+
+class ReplicateModelTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(params['learning_rate']))
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(loss))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # derivative of loss = (1*c - 1) + (2*c - 2) is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(7.0, session.run(c))
+
+  def test_train_with_mean_reduction(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      # Add another trainable variable that doesn't produce a gradient to
+      # verify that None gradients are supported.
+      _ = variable_scope.get_variable(
+          'another_variable',
+          initializer=constant_op.constant(1, dtype=dtypes.float64),
+          dtype=dtypes.float64)
+
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5.
+      # It's the same computation as without mean reduction, but the
+      # loss from every tower is scaled by 1/<number of towers>.
+      # new value of c = 10 - learning rate * 1.5 = 8.5
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(8.5, session.run(c))
+
+  def test_train_two_steps_collected_gradients_are_reset_between_steps(self):
+    with ops_lib.Graph().as_default():
+      features = array_ops.placeholder(dtypes.float64)
+      labels = array_ops.placeholder(dtypes.float64)
+
+      feature_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
+      label_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]])
+
+      # loss = feature * c - label
+      expected_losses = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0),
+                         (1.5 * 7.0 - 1.5) + (2.5 * 7.0 - 2.5))
+      # Derivative of the loss is 1.0 + 2.0 for the first step and 1.5 + 2.5
+      # for the second.
+      expected_c = 10.0 - 3.0, 7.0 - 4.0
+
+      with self.cached_session() as session, variable_scope.variable_scope(
+          '', reuse=variable_scope.AUTO_REUSE):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn,
+            loss_reduction=losses.Reduction.SUM,
+            devices=['/gpu:0', '/gpu:1'])
+        estimator_spec = replicated_model_fn(
+            features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+        session.run(variables.global_variables_initializer())
+
+        for feature_input, label_input, loss, weight in zip(
+            feature_inputs, label_inputs, expected_losses, expected_c):
+          feeds = {features: feature_input, labels: label_input}
+
+          self.assertEqual(loss, session.run(estimator_spec.loss, feeds))
+
+          session.run(estimator_spec.train_op, feeds)
+          c = variable_scope.get_variable('c', dtype=dtypes.float64)
+          self.assertEqual(weight, session.run(c, feeds))
+
+  def test_eval(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # loss[i] = features[i] * 10 - labels[i].
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02))
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
+  def test_eval_with_mean_reduction(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # loss[i] = features[i] * 10 - labels[i].
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
+  def test_predict(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
+      session.run(variables.global_variables_initializer())
+
+      self.assertAllClose({
+          'probabilities': np.array([[0.1], [0.02]])
+      }, session.run(estimator_spec.predictions))
+
+  def test_train_single_tower(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # loss' of c is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(7.0, session.run(c))
+
+  def test_eval_single_tower(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.EVAL, self.params)
+      session.run(variables.local_variables_initializer())
+      session.run(variables.global_variables_initializer())
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      # Accuracy is 0.0 (no match) in the first tower.
+      # Accuracy is 1.0 (match) in the second tower, since the feature
+      # times weight "c" happened to be equal to the label.
+      total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02))
+
+      self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01)
+
+  def test_predict_single_tower(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.PREDICT, self.params)
+      session.run(variables.global_variables_initializer())
+
+      self.assertAllClose({
+          'probabilities': np.array([[0.1], [0.02]])
+      }, session.run(estimator_spec.predictions))
+
+  def test_batch_size_that_is_not_divisible_by_the_number_of_gpus(self):
+    features = np.array([[1.0], [2.0], [3.0]])
+    labels = np.array([[1.0], [2.0], [3.0]])
+
+    with self.assertRaisesRegexp(
+        ValueError, '.*Batch.+size.+needs.+to.+be.+divisible.+by.+GPUs.+'):
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0', '/gpu:1'])
+      _ = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+
+  def test_unsupported_loss_reduction(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 '.+none.+reduction.+is.+specified.+'):
+      _ = replicate_model_fn.replicate_model_fn(self.model_fn,
+                                                losses.Reduction.NONE)
+
+  def test_places_on_gpu_with_upper_case_spelling(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session():
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/GPU:0'])
+      _ = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', c.device)
+
+  def test_places_on_gpu_with_lower_case_spelling(self):
+    features = np.array([[0.01], [0.002]])
+    labels = np.array([[0.01], [0.02]])
+
+    with self.cached_session():
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      _ = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', c.device)
+
+
+class ReplicateAcrossASingleDeviceWithoutTowerOptimizer(
+    test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(
+        params['learning_rate'])
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(loss))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train_single_tower(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn, devices=['/gpu:0'])
+      estimator_spec = replicated_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.params)
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # loss' of c is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(7.0, session.run(c))
+
+
+class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    features = features['features']
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(
+        params['learning_rate'])
+    optimizer = training.SyncReplicasOptimizer(
+        optimizer, replicas_to_aggregate=1)
+    sync_hook = optimizer.make_session_run_hook(True)
+    optimizer = replicate_model_fn.TowerOptimizer(optimizer)
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        training_hooks=[sync_hook],
+        predictions={'probabilities': predictions},
+        train_op=optimizer.minimize(
+            loss, global_step=training.get_global_step()))
+
+  @property
+  def params(self):
+    params = {}
+    params['learning_rate'] = 1.0
+    return params
+
+  def test_train_multiple_towers(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'features': features}, y=labels, batch_size=2, shuffle=False)
+
+    model_fn = replicate_model_fn.replicate_model_fn(
+        self.model_fn,
+        loss_reduction=losses.Reduction.SUM,
+        devices=['/gpu:0', '/gpu:1'])
+
+    estimator = estimator_lib.Estimator(
+        model_fn=model_fn, model_dir=tempfile.mkdtemp(), params=self.params)
+    estimator.train(train_input_fn, steps=1)
+
+    self.assertEqual(7.0, estimator.get_variable_value('c'))
+
+
+class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    side_effects = variable_scope.get_variable(
+        'side_effects',
+        initializer=constant_op.constant(0, dtype=dtypes.float64),
+        dtype=dtypes.float64,
+        trainable=False)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    first_optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(1.0))
+    second_optimizer = replicate_model_fn.TowerOptimizer(
+        adam.AdamOptimizer(1.0))
+
+    with ops_lib.control_dependencies([side_effects.assign_add(1.0)]):
+      first_grads_and_vars = first_optimizer.compute_gradients(loss)
+
+    train_op = control_flow_ops.group(
+        [first_optimizer.apply_gradients(first_grads_and_vars),
+         second_optimizer.minimize(loss)])
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.TRAIN, {})
+      session.run(variables.global_variables_initializer())
+
+      # loss = feature * c - label
+      total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      # loss' of c is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      # Adam subtracts another ~1.
+      session.run(estimator_spec.train_op)
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertNear(6.0, session.run(c), 0.000001)
+
+        side_effects = variable_scope.get_variable(
+            'side_effects', dtype=dtypes.float64)
+        self.assertNear(2.0, session.run(side_effects), 0.000001)
+
+
+class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._should_skip_optimizer = False
+    self._towers_left_before_skipping_optimizer = -1
+
+  def incorrectly_skip_optimizer_for_tower(self, tower_number):
+    self._should_skip_optimizer = True
+    self._towers_left_before_skipping_optimizer = tower_number
+
+  def should_skip_optimizer(self):
+    if not self._should_skip_optimizer:
+      return False
+    if self._towers_left_before_skipping_optimizer == 0:
+      return True
+    else:
+      self._towers_left_before_skipping_optimizer -= 1
+      return False
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+    d = variable_scope.get_variable(
+        'd',
+        initializer=constant_op.constant(2, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    another_predictions = math_ops.multiply(features, d)
+    another_loss = losses.absolute_difference(
+        labels=labels,
+        predictions=another_predictions,
+        reduction=losses.Reduction.SUM)
+    another_loss = math_ops.reduce_sum(another_loss)
+
+    total_loss = math_ops.add(loss, another_loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    train_ops = []
+
+    optimizer = replicate_model_fn.TowerOptimizer(
+        gradient_descent.GradientDescentOptimizer(1.0))
+    train_ops.append(optimizer.minimize(loss, var_list=[c]))
+    if not self.should_skip_optimizer():
+      another_optimizer = replicate_model_fn.TowerOptimizer(
+          gradient_descent.GradientDescentOptimizer(1.0))
+      train_ops.append(another_optimizer.minimize(another_loss, var_list=[d]))
+
+    train_op = control_flow_ops.group(train_ops)
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=total_loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session() as session:
+      replicated_model_fn = replicate_model_fn.replicate_model_fn(
+          self.model_fn,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'])
+      estimator_spec = replicated_model_fn(features, labels,
+                                           model_fn_lib.ModeKeys.TRAIN, {})
+      session.run(variables.global_variables_initializer())
+
+      # For each tower, loss = (feature * c - label) + (feature * d - label).
+      total_loss = (1.0 * 10 - 1.0 + 1.0 * 2.0 - 1.0) + (
+          2.0 * 10 - 2.0 + 2.0 * 2.0 - 2.0)
+      self.assertEqual(total_loss, session.run(estimator_spec.loss))
+
+      session.run(estimator_spec.train_op)
+
+      # loss' of c or loss' of d is 3.
+      # new value of c = 10 - learning rate * 3 = 7.0.
+      # new value of d = 2  - learning rate * 3 = -1.0.
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertNear(7.0, session.run(c), 0.000001)
+        d = variable_scope.get_variable('d', dtype=dtypes.float64)
+        self.assertNear(-1.0, session.run(d), 0.000001)
+
+  def test_different_optimizer_calls_within_towers(self):
+    self.incorrectly_skip_optimizer_for_tower(1)
+
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session(), ops_lib.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError, '.+was.+supposed.+to.+make.+same.+optimizer.+calls.+'):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn, devices=['/gpu:0', '/gpu:1'])
+        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
+                                {})
+
+
+class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.multiply(features, c)
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+    loss = math_ops.reduce_sum(loss)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+
+    optimizer = gradient_descent.GradientDescentOptimizer(1.0)
+    train_op = optimizer.minimize(loss)
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=loss,
+        eval_metric_ops=metrics,
+        predictions={'probabilities': predictions},
+        train_op=train_op)
+
+  def test_train(self):
+    features = np.array([[1.0], [2.0]])
+    labels = np.array([[1.0], [2.0]])
+
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError,
+                                   'Please.+wrap.+with.+TowerOptimizer'):
+        replicated_model_fn = replicate_model_fn.replicate_model_fn(
+            self.model_fn, devices=['/gpu:0', '/gpu:1'])
+        _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN,
+                                {})
+
+
+class GetLossTowersTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(0.25, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
+    labels = np.array([0.1, 0.2, 0.3, labels[0]])
+
+    loss = losses.absolute_difference(
+        labels=labels, predictions=predictions, reduction=losses.Reduction.SUM)
+
+    return model_fn_lib.EstimatorSpec(mode=mode, loss=math_ops.reduce_sum(loss))
+
+  def test_gradients_are_computed(self):
+    with self.cached_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=None,
+          features=[[0.6], [1.6]],
+          labels=[[0.6], [0.6]],
+          params=None,
+          config=None,
+          loss_reduction=losses.Reduction.SUM,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 2)
+
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('Sum:0', tower_specs[0].loss.name)
+      self.assertEqual(1.0, session.run(tower_specs[0].loss))
+
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('test_tower_1/Sum:0', tower_specs[1].loss.name)
+      # The input batch for the second tower had a loss that is 1.0
+      # bigger: 0.6 vs 1.6.
+      self.assertEqual(2.0, session.run(tower_specs[1].loss))
+
+      self.assertEqual(1, len(variables.global_variables()))
+      self.assertEqual(1, len(variables.trainable_variables()))
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(0.25, session.run(c))
+
+  def test_gradients_are_computed_with_mean_reduction(self):
+    with self.cached_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=model_fn_lib.ModeKeys.EVAL,
+          features=[[0.6], [1.6]],
+          labels=[[0.6], [0.6]],
+          params=None,
+          loss_reduction=losses.Reduction.MEAN,
+          config=None,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 2)
+
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
+      self.assertEqual(0.5, session.run(tower_specs[0].loss))
+
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
+      # The input batch for the second tower had a loss that is 1.0
+      # bigger: 0.6 vs 1.6.
+      self.assertEqual(1.0, session.run(tower_specs[1].loss))
+
+      self.assertEqual(1, len(variables.global_variables()))
+      self.assertEqual(1, len(variables.trainable_variables()))
+
+      with variable_scope.variable_scope('', reuse=True):
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual(0.25, session.run(c))
+
+  def test_variables_are_round_robined_correctly(self):
+    """Test that creates multiple variables and tests round-robin placement."""
+
+    def model_fn(mode, features, labels, params):
+      del params
+      for variable_name in ['a', 'b', 'c', 'd']:
+        c = variable_scope.get_variable(
+            variable_name,
+            initializer=constant_op.constant(0.25, dtype=dtypes.float64),
+            dtype=dtypes.float64)
+
+      predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c)
+      labels = np.array([0.1, 0.2, 0.3, labels[0]])
+      loss = losses.absolute_difference(
+          labels=labels,
+          predictions=predictions,
+          reduction=losses.Reduction.SUM)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=math_ops.reduce_sum(loss))
+
+    with self.cached_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          model_fn,
+          mode=None,
+          features=[[0.6], [1.6], [2.6]],
+          labels=[[0.6], [0.6], [2.6]],
+          params=None,
+          loss_reduction=losses.Reduction.SUM,
+          config=None,
+          devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'],
+          name_scope_pattern='test_tower_{}')
+      session.run(variables.global_variables_initializer())
+
+      self.assertEqual(len(tower_specs), 3)
+      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
+      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
+      self.assertEqual('/device:GPU:3', tower_specs[2].loss.device)
+
+      with variable_scope.variable_scope('', reuse=True):
+        a = variable_scope.get_variable('a', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', a.device)
+        b = variable_scope.get_variable('b', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:1', b.device)
+        c = variable_scope.get_variable('c', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:3', c.device)
+        d = variable_scope.get_variable('d', dtype=dtypes.float64)
+        self.assertEqual('/device:GPU:0', d.device)
+
+
+class SplitBatchTest(test_util.TensorFlowTestCase):
+
+  def evaluate_shards(self, first_list, second_list):
+    evaluate_items = lambda x: x.eval()
+    return list(map(evaluate_items, first_list)), list(
+        map(evaluate_items, second_list))
+
+  def assertSparseValuesEqual(self, a, b):
+    self.assertAllEqual(a.indices, b.indices)
+    self.assertAllEqual(a.values, b.values)
+    self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+  def test_simple_half_split(self):
+    with self.cached_session():
+      features = [0.0, 1.0, 2.0, 3.0]
+      labels = [10.0, 11.0, 12.0, 13.0]
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 2, device='/gpu:0')
+
+      feature_shards, label_shards = self.evaluate_shards(
+          feature_shards, label_shards)
+
+      self.assertAllEqual([[0.0, 1.0], [2.0, 3.0]], feature_shards)
+      self.assertAllEqual([[10.0, 11.0], [12.0, 13.0]], label_shards)
+
+  def test_to_each_their_own(self):
+    with self.cached_session():
+      features = [0.0, 1.0, 2.0, 3.0]
+      labels = [10.0, 11.0, 12.0, 13.0]
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 4, device='/gpu:0')
+
+      feature_shards, label_shards = self.evaluate_shards(
+          feature_shards, label_shards)
+
+      self.assertAllEqual([[0.0], [1.0], [2.0], [3.0]], feature_shards)
+      self.assertAllEqual([[10.0], [11.0], [12.0], [13.0]], label_shards)
+
+  def test_one_batch(self):
+    with self.cached_session():
+      features = [0.0, 1.0, 2.0, 3.0]
+      labels = [10.0, 11.0, 12.0, 13.0]
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 1, device='/gpu:0')
+
+      feature_shards, label_shards = self.evaluate_shards(
+          feature_shards, label_shards)
+
+      self.assertAllEqual([[0.0, 1.0, 2.0, 3.0]], feature_shards)
+      self.assertAllEqual([[10.0, 11.0, 12.0, 13.0]], label_shards)
+
+  def test_half_split_in_dictionary(self):
+    with self.cached_session():
+      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
+      labels = [10.0, 11.0, 12.0, 13.0]
+
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 2, device='/gpu:0')
+
+      self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval())
+      self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval())
+      self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval())
+      self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval())
+      self.assertAllEqual([10.0, 11.0], label_shards[0].eval())
+      self.assertAllEqual([12.0, 13.0], label_shards[1].eval())
+
+  def test_sparse_tensor_can_be_split_unevenly(self):
+    with self.cached_session():
+      features = {
+          'x':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [1, 2], [2, 2]],
+                  values=[1.0, 2.0, 3.0],
+                  dense_shape=[3, 4])
+      }
+      labels = np.array([[1.0], [2.0]])
+
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 2, device='/gpu:0')
+
+      self.assertSparseValuesEqual(
+          sparse_tensor.SparseTensorValue(
+              indices=[[0, 0], [1, 2]], values=[1., 2.], dense_shape=[2, 4]),
+          feature_shards[0]['x'].eval())
+      self.assertSparseValuesEqual(
+          sparse_tensor.SparseTensorValue(
+              indices=[[0, 2]], values=[3.], dense_shape=[1, 4]),
+          feature_shards[1]['x'].eval())
+      self.assertAllEqual([[1.0]], label_shards[0].eval())
+      self.assertAllEqual([[2.0]], label_shards[1].eval())
+
+  def test_sparse_tensor_can_be_split_unevenly_repeated_row(self):
+    with self.cached_session():
+      features = {
+          'x':
+              sparse_tensor.SparseTensor(
+                  indices=[[0, 0], [1, 0], [1, 1]],
+                  values=[1.0, 2.0, 3.0],
+                  dense_shape=[3, 4])
+      }
+      labels = np.array([[1.0], [2.0]])
+
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 2, device='/gpu:0')
+
+      self.assertSparseValuesEqual(
+          sparse_tensor.SparseTensorValue(
+              indices=[[0, 0], [1, 0], [1, 1]],
+              values=[1., 2., 3.],
+              dense_shape=[2, 4]), feature_shards[0]['x'].eval())
+
+      second_batch = feature_shards[1]['x'].eval()
+      self.assertFalse(len(second_batch.indices))
+      self.assertFalse(len(second_batch.values))
+      self.assertAllEqual([1, 4], second_batch.dense_shape)
+      self.assertAllEqual([[1.0]], label_shards[0].eval())
+      self.assertAllEqual([[2.0]], label_shards[1].eval())
+
+  def test_one_batch_in_dictionary(self):
+    with self.cached_session() as session:  # pylint: disable=unused-variable
+      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
+      labels = [10.0, 11.0, 12.0, 13.0]
+
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 1, device='/gpu:0')
+
+      self.assertAllEqual([0.0, 1.0, 2.0, 3.0],
+                          feature_shards[0]['first'].eval())
+      self.assertAllEqual([4.0, 5.0, 6.0, 7.0],
+                          feature_shards[0]['second'].eval())
+      self.assertAllEqual([10.0, 11.0, 12.0, 13.0], label_shards[0].eval())
+
+  def test_feature_and_label_dictionaries(self):
+    with self.cached_session() as session:  # pylint: disable=unused-variable
+      features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]}
+      labels = {'first': [10.0, 11.0], 'second': [12.0, 13.0]}
+
+      feature_shards, label_shards = replicate_model_fn._split_batch(
+          features, labels, 2, device='/gpu:0')
+
+      self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval())
+      self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval())
+      self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval())
+      self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval())
+      self.assertAllEqual([10.0], label_shards[0]['first'].eval())
+      self.assertAllEqual([12.0], label_shards[0]['second'].eval())
+      self.assertAllEqual([11], label_shards[1]['first'].eval())
+      self.assertAllEqual([13.0], label_shards[1]['second'].eval())
+
+
+class TrainSpecTest(test_util.TensorFlowTestCase):
+
+  expected_predictions = {}
+
+  def create_estimator_spec(self, loss):
+    return model_fn_lib.EstimatorSpec(
+        mode=model_fn_lib.ModeKeys.TRAIN,
+        loss=loss,
+        train_op=loss,  # Not used; currently required.
+        predictions=self.expected_predictions)
+
+  def create_constant_loss(self, loss_value):
+    return constant_op.constant(loss_value, dtype=dtypes.float64)
+
+  def test_example(self):
+    with self.cached_session() as session:
+      tower_losses = list(map(self.create_constant_loss, [2, 4, 6]))
+      tower_specs = list(map(self.create_estimator_spec, tower_losses))
+
+      expected_train_op = tower_losses[1]
+
+      estimator_spec = replicate_model_fn._train_spec(
+          tower_specs, expected_train_op, aggregation_device='/gpu:0')
+
+      self.assertEqual(expected_train_op, estimator_spec.train_op)
+      self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss))
+      self.assertEqual(self.expected_predictions, estimator_spec.predictions)
+
+
+class EvalSpecTest(test_util.TensorFlowTestCase):
+
+  def create_estimator_spec(self, loss, metrics):
+    return model_fn_lib.EstimatorSpec(
+        mode=model_fn_lib.ModeKeys.EVAL, loss=loss, eval_metric_ops=metrics)
+
+  def create_constant_loss(self, loss_value):
+    return constant_op.constant(loss_value, dtype=dtypes.float64)
+
+  def create_eval_metrics(self, noise):
+    predictions = np.array([0.1, 0.2, 0.3, 0.6 + noise])
+    labels = np.array([0.1, 0.2, 0.3, 0.6])
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions),
+        'auc': metrics_lib.auc(labels, predictions)
+    }
+    return metrics
+
+  def test_example(self):
+    with self.cached_session() as session:
+      tower_losses = map(self.create_constant_loss, [2, 4, 6])
+      tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3])
+      tower_specs = [
+          self.create_estimator_spec(l, m)
+          for l, m in zip(tower_losses, tower_metrics)
+      ]
+      session.run(variables.local_variables_initializer())
+
+      estimator_spec = replicate_model_fn._eval_spec(
+          tower_specs, aggregation_device='/device:GPU:0')
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      self.assertEqual('/device:CPU:0', accuracy.device)
+      self.assertEqual('/device:CPU:0', auc.device)
+
+      session.run([a, b])
+      accuracy, auc = session.run([accuracy, auc])
+
+      self.assertNear((12 - 2) / 12, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss))
+
+  def test_handles_single_tower(self):
+    with self.cached_session() as session:
+      tower_losses = map(self.create_constant_loss, [5])
+      tower_metrics = map(self.create_eval_metrics, [0.2])
+      tower_specs = [
+          self.create_estimator_spec(l, m)
+          for l, m in zip(tower_losses, tower_metrics)
+      ]
+      session.run(variables.local_variables_initializer())
+
+      estimator_spec = replicate_model_fn._eval_spec(
+          tower_specs, aggregation_device='/device:GPU:0')
+
+      accuracy, a = estimator_spec.eval_metric_ops['accuracy']
+      auc, b = estimator_spec.eval_metric_ops['auc']
+
+      self.assertEqual('/device:CPU:0', accuracy.device)
+      self.assertEqual('/device:CPU:0', auc.device)
+
+      session.run([a, b])
+      accuracy = session.run(accuracy)
+      auc = session.run(auc)
+
+      self.assertNear((4 - 1) / 4, accuracy, 0.01)
+      self.assertEqual(0, auc)
+      self.assertEqual(5, session.run(estimator_spec.loss))
+
+
+class PredictSpecTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(0.25, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = math_ops.add(np.array([features[0], features[0]]), c)
+
+    return model_fn_lib.EstimatorSpec(
+        mode=model_fn_lib.ModeKeys.PREDICT,
+        predictions={
+            'probabilities': predictions
+        })
+
+  def test_example(self):
+    with self.cached_session() as session:
+      tower_specs = replicate_model_fn._get_loss_towers(
+          self.model_fn,
+          mode=None,
+          features=[[0.1], [0.2]],
+          loss_reduction=losses.Reduction.SUM,
+          labels=[[], []],
+          params=None,
+          config=None,
+          devices=['/gpu:0', '/gpu:1'],
+          local_ps_devices=['/gpu:0'],
+      )
+      session.run(variables.global_variables_initializer())
+
+      estimator_spec = replicate_model_fn._predict_spec(
+          tower_specs, aggregation_device='/gpu:0')
+
+      self.assertEqual('/device:GPU:0',
+                       estimator_spec.predictions['probabilities'].device)
+      self.assertAllClose({
+          'probabilities': np.array([0.35, 0.35, 0.45, 0.45])
+      }, session.run(estimator_spec.predictions))
+
+
+class ReduceMetricVariablesTest(test_util.TensorFlowTestCase):
+
+  def create_metric_variable(self, initial_value, name):
+    return variable_scope.variable(
+        initial_value,
+        trainable=False,
+        collections=[ops_lib.GraphKeys.METRIC_VARIABLES],
+        validate_shape=True,
+        name=name)
+
+  def create_tower_metrics(self, tower_id):
+    with variable_scope.variable_scope('', reuse=(tower_id != 0)):
+      self.create_metric_variable(1.3 * (tower_id + 1), 'total')
+      self.create_metric_variable(2.3 * (tower_id + 1), 'count')
+      self.create_metric_variable(
+          np.array([3.3, 3.5, 3.7]) * (tower_id + 1), 'total')
+
+  def test_example(self):
+    with self.cached_session() as session:
+      for tower_id in range(3):
+        self.create_tower_metrics(tower_id)
+
+      session.run(
+          variables.variables_initializer(
+              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
+
+      session.run(
+          replicate_model_fn._reduce_metric_variables(number_of_towers=3))
+
+      # 1st tower = 1.3, 2.3,  [3.3, 3.5, 3.7]
+      # 2nd tower = 2.6, 4.6,  [6.6, 7.0, 7.4]
+      # 3rd tower = 3.9, 6.9,  [9.9, 10.5, 11.1]
+      # Reduced =   7.8, 13.8, [19.8, 21.0, 22.2]
+      # Towers are accumulated in the first tower.
+      local_metrics = session.run(
+          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
+
+      self.assertNear(7.8, local_metrics[0], 0.01)
+      self.assertNear(13.8, local_metrics[1], 0.01)
+      self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01)
+      self.assertNear(0.0, local_metrics[3], 0.01)
+      self.assertNear(0.0, local_metrics[4], 0.01)
+      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01)
+      self.assertNear(0.0, local_metrics[6], 0.01)
+      self.assertNear(0.0, local_metrics[7], 0.01)
+      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01)
+
+  def test_reduce_is_idempotent(self):
+    with self.cached_session() as session:
+      for tower_id in range(3):
+        self.create_tower_metrics(tower_id)
+
+      session.run(
+          variables.variables_initializer(
+              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
+
+      for _ in range(20):
+        session.run(
+            replicate_model_fn._reduce_metric_variables(number_of_towers=3))
+
+      local_metrics = session.run(
+          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
+
+      self.assertNear(7.8, local_metrics[0], 0.01)
+      self.assertNear(13.8, local_metrics[1], 0.01)
+      self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01)
+      self.assertNear(0.0, local_metrics[3], 0.01)
+      self.assertNear(0.0, local_metrics[4], 0.01)
+      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01)
+      self.assertNear(0.0, local_metrics[6], 0.01)
+      self.assertNear(0.0, local_metrics[7], 0.01)
+      self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01)
+
+  def test_handles_single_tower(self):
+    with self.cached_session() as session:
+      self.create_tower_metrics(0)
+      session.run(
+          variables.variables_initializer(
+              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
+
+      session.run(
+          replicate_model_fn._reduce_metric_variables(number_of_towers=1))
+
+      local_metrics = session.run(
+          ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))
+
+      self.assertNear(1.3, local_metrics[0], 0.01)
+      self.assertNear(2.3, local_metrics[1], 0.01)
+      self.assertAllClose([3.3, 3.5, 3.7], local_metrics[2], 0.01)
+
+  def test_doesnt_accept_uneven_number_of_variables(self):
+    with self.cached_session() as session:
+      for tower_id in range(3):
+        self.create_tower_metrics(tower_id)
+      self.create_metric_variable(-1.0, 'oddball')
+
+      session.run(
+          variables.variables_initializer(
+              ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)))
+
+      with self.assertRaisesRegexp(
+          ValueError, '.+Expected.+local.+variables.+but.+got.+instead.+'):
+        session.run(
+            replicate_model_fn._reduce_metric_variables(number_of_towers=3))
+
+
+class MergeExportOutputsTest(test_util.TensorFlowTestCase):
+
+  def model_fn(self, mode, features, labels, params):
+    c = variable_scope.get_variable(
+        'c',
+        initializer=constant_op.constant(10, dtype=dtypes.float64),
+        dtype=dtypes.float64)
+
+    predictions = {'probabilities': math_ops.multiply(features, c)}
+    loss = losses.absolute_difference(
+        labels=labels,
+        predictions=predictions['probabilities'],
+        reduction=losses.Reduction.SUM)
+
+    metrics = {
+        'accuracy': metrics_lib.accuracy(labels, predictions['probabilities']),
+        'auc': metrics_lib.auc(labels, predictions['probabilities'])
+    }
+    tensor_string_repr = str(features)
+    classes = constant_op.constant(
+        re.search('(split_inputs/split:[0-9])', tensor_string_repr).group(1),
+        dtype=dtypes.string)
+
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+            export_output.PredictOutput(predictions),
+        'classification_output':
+            export_output.ClassificationOutput(predictions['probabilities'],
+                                               classes),
+        'classification_scores':
+            export_output.ClassificationOutput(
+                scores=predictions['probabilities']),
+        'classification_classes':
+            export_output.ClassificationOutput(classes=classes),
+        'regression_output':
+            export_output.RegressionOutput(predictions['probabilities']),
+    }
+
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        loss=math_ops.reduce_sum(loss),
+        eval_metric_ops=metrics,
+        predictions=predictions,
+        export_outputs=export_outputs)
+
+  def replicate_estimator_spec(self, session):
+    features = np.array([0.01, 0.002])
+    labels = np.array([0.01, 0.02])
+
+    replicated_model_fn = replicate_model_fn.replicate_model_fn(
+        self.model_fn, devices=['/gpu:0', '/gpu:1'])
+    estimator_spec = replicated_model_fn(features, labels,
+                                         model_fn_lib.ModeKeys.PREDICT, {})
+    session.run(variables.global_variables_initializer())
+    return estimator_spec
+
+  def test_merge_predict_output(self):
+    with self.cached_session() as session:
+      estimator_spec = self.replicate_estimator_spec(session)
+      self.assertAllClose(
+          {
+              'probabilities': np.array([0.1, 0.02])
+          },
+          session.run(estimator_spec.export_outputs[
+              signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs))
+
+  def test_merge_classification_output_scores_classes(self):
+    with self.cached_session() as session:
+      estimator_spec = self.replicate_estimator_spec(session)
+      self.assertAllClose(
+          [0.1, 0.02],
+          session.run(
+              estimator_spec.export_outputs['classification_output'].scores))
+      self.assertAllEqual(
+          [b'split_inputs/split:0', b'split_inputs/split:1'],
+          session.run(
+              estimator_spec.export_outputs['classification_output'].classes))
+
+  def test_merge_classification_output_scores(self):
+    with self.cached_session() as session:
+      estimator_spec = self.replicate_estimator_spec(session)
+      self.assertAllClose(
+          [0.1, 0.02],
+          session.run(
+              estimator_spec.export_outputs['classification_scores'].scores))
+      self.assertEqual(
+          None, estimator_spec.export_outputs['classification_scores'].classes)
+
+  def test_merge_classification_output_classes(self):
+    with self.cached_session() as session:
+      estimator_spec = self.replicate_estimator_spec(session)
+      self.assertAllEqual(
+          [b'split_inputs/split:0', b'split_inputs/split:1'],
+          session.run(
+              estimator_spec.export_outputs['classification_classes'].classes))
+      self.assertEqual(
+          None, estimator_spec.export_outputs['classification_classes'].scores)
+
+  def test_merge_regression_output(self):
+    with self.cached_session() as session:
+      estimator_spec = self.replicate_estimator_spec(session)
+      self.assertAllClose(
+          [0.1, 0.02],
+          session.run(estimator_spec.export_outputs['regression_output'].value))
+
+
+class GetLocalDevicesTest(test_util.TensorFlowTestCase):
+
+  def test_there_is_at_least_a_cpu(self):
+    self.assertTrue(replicate_model_fn._get_local_devices('CPU'))
+
+  def test_there_is_no_xpu(self):
+    self.assertFalse(
+        replicate_model_fn._get_local_devices('XPU'))  # XPU doesn't exist.
+
+  def test_whether_there_is_a_gpu(self):
+    if test.is_gpu_available():
+      self.assertTrue(len(replicate_model_fn._get_local_devices('GPU')))
+
+
+class LocalDeviceSetterTest(test_util.TensorFlowTestCase):
+
+  def test_vars_are_on_ps_but_ops_are_on_workers(self):
+    ps_devices = ['/device:GPU:3']
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
+    local_device_setter = replicate_model_fn._local_device_setter(
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
+
+    with ops_lib.device(local_device_setter):
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:3', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:3', b.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:3', c.device)
+
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
+
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
+
+  def test_round_robin_placement(self):
+    ps_devices = [
+        '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4'
+    ]
+    round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices))
+
+    local_device_setter = replicate_model_fn._local_device_setter(
+        ps_devices=ps_devices,
+        ps_strategy=round_robin,
+        worker_device='/device:GPU:2')
+
+    with ops_lib.device(local_device_setter):
+      a = variables.Variable(0.01)
+      self.assertEqual('/device:GPU:0', a.device)
+
+      b = variables.Variable(0.02)
+      self.assertEqual('/device:GPU:1', b.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:3', c.device)
+
+      a_op = array_ops.concat(a, axis=0)
+      self.assertEqual('/device:GPU:2', a_op.device)
+
+      b_op = array_ops.concat(b, axis=0)
+      self.assertEqual('/device:GPU:2', b_op.device)
+
+      c = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:4', c.device)
+
+      d = variables.Variable(0.03)
+      self.assertEqual('/device:GPU:0', d.device)
+
+      c_op = array_ops.concat(c, axis=0)
+      self.assertEqual('/device:GPU:2', c_op.device)
+
+
+class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase):
+
+  def test_vectors(self):
+    with self.cached_session() as session:
+      total = replicate_model_fn._compute_sum_on_device(
+          [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum')
+
+      self.assertEqual('/device:GPU:0', total.device)
+      self.assertEqual('test_sum', total.op.name)
+      self.assertEqual(10.0, session.run(total))
+
+  def test_tensors(self):
+    with self.cached_session() as session:
+      total = replicate_model_fn._compute_sum_on_device(
+          [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum')
+
+      self.assertEqual('/device:GPU:0', total.device)
+      self.assertEqual('test_sum', total.op.name)
+      self.assertAllEqual([4.0, 6.0], session.run(total))
+
+  def test_indexedslices(self):
+    with self.cached_session() as session:
+      a = ops_lib.IndexedSlices(
+          constant_op.constant([1.0, 2.0]), [0, 1],
+          dense_shape=constant_op.constant([2]))
+      b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
+
+      total = replicate_model_fn._compute_sum_on_device(
+          [a, b], device='/device:GPU:0')
+
+      self.assertEqual('/device:GPU:0', total.device)
+      self.assertAllEqual([4.0, 6.0],
+                          session.run(ops_lib.convert_to_tensor(total)))
+
+  def test_indexedslices_higher_dimensions(self):
+    with self.cached_session() as session:
+      a = ops_lib.IndexedSlices(
+          constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1],
+          dense_shape=constant_op.constant([2, 4]))
+      b = ops_lib.IndexedSlices(
+          constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1])
+
+      total = replicate_model_fn._compute_sum_on_device(
+          [a, b], device='/device:GPU:0')
+
+      self.assertEqual('/device:GPU:0', total.device)
+      self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]],
+                          session.run(ops_lib.convert_to_tensor(total)))
+
+  def test_indexedslices_some_dont_overlap(self):
+    with self.cached_session() as session:
+      a = ops_lib.IndexedSlices(
+          constant_op.constant([1.0, 2.0]), [0, 3],
+          dense_shape=constant_op.constant([4]))
+      b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
+
+      total = replicate_model_fn._compute_sum_on_device(
+          [a, b], device='/device:GPU:0')
+
+      self.assertEqual('/device:GPU:0', total.device)
+      self.assertAllEqual([4.0, 4.0, 0.0, 2.0],
+                          session.run(ops_lib.convert_to_tensor(total)))
+
+  def test_no_name_for_indexslices(self):
+    a = ops_lib.IndexedSlices(
+        constant_op.constant([1.0, 2.0]), [0, 1],
+        dense_shape=constant_op.constant([2]))
+    b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1])
+
+    with self.assertRaisesRegexp(ValueError, '.+name.+not.+expected.+'):
+      _ = replicate_model_fn._compute_sum_on_device(
+          [a, b], device='/device:GPU:0', name='cant_name_indexslices')
+
+
+class ConcatTensorDictsTest(test_util.TensorFlowTestCase):
+
+  def test_example(self):
+    tensor_dicts = [
+        {
+            'a': np.array([1.0, 2.0]),
+            'b': np.array([11.0]),
+            'c': np.array([21.0]),
+        },
+        {
+            'a': np.array([3.0]),
+            'b': np.array([12.0, 13.0]),
+        },
+        {
+            'b': np.array([14.0]),
+        },
+    ]
+
+    with self.cached_session() as session:
+      self.assertAllClose({
+          'a': np.array([1.0, 2.0, 3.0]),
+          'b': np.array([11.0, 12.0, 13.0, 14.0]),
+          'c': np.array([21.0]),
+      }, session.run(replicate_model_fn._concat_tensor_dicts(*tensor_dicts)))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py
index 60a2cd0912..c595f47395 100644
--- a/tensorflow/contrib/estimator/python/estimator/rnn.py
+++ b/tensorflow/contrib/estimator/python/estimator/rnn.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,569 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""rnn python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Recurrent Neural Network estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import rnn
+import six
+
+from tensorflow.contrib.estimator.python.estimator import extenders
+from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.layers import core as core_layers
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import rnn
+from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.summary import summary
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import training_util
+
+
+# The defaults are historical artifacts of the initial implementation, but seem
+# reasonable choices.
+_DEFAULT_LEARNING_RATE = 0.05
+_DEFAULT_CLIP_NORM = 5.0
+
+_CELL_TYPES = {'basic_rnn': rnn_cell.BasicRNNCell,
+               'lstm': rnn_cell.BasicLSTMCell,
+               'gru': rnn_cell.GRUCell}
+
+# Indicates no value was provided by the user to a kwarg.
+USE_DEFAULT = object()
+
+
+def _single_rnn_cell(num_units, cell_type):
+  cell_type = _CELL_TYPES.get(cell_type, cell_type)
+  if not cell_type or not issubclass(cell_type, rnn_cell.RNNCell):
+    raise ValueError('Supported cell types are {}; got {}'.format(
+        list(_CELL_TYPES.keys()), cell_type))
+  return cell_type(num_units=num_units)
+
+
+def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'):
+  """Convenience function to create `rnn_cell_fn` for canned RNN Estimators.
+
+  Args:
+    num_units: Iterable of integer number of hidden units per RNN layer.
+    cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
+      the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
+      `'gru'`.
+
+  Returns:
+    A function that takes a single argument, an instance of
+    `tf.estimator.ModeKeys`, and returns an instance derived from
+    `tf.nn.rnn_cell.RNNCell`.
+
+  Raises:
+    ValueError: If cell_type is not supported.
+  """
+  def rnn_cell_fn(mode):
+    # Unused. Part of the rnn_cell_fn interface since user specified functions
+    # may need different behavior across modes (e.g. dropout).
+    del mode
+    cells = [_single_rnn_cell(n, cell_type) for n in num_units]
+    if len(cells) == 1:
+      return cells[0]
+    return rnn_cell.MultiRNNCell(cells)
+  return rnn_cell_fn
+
+
+def _select_last_activations(activations, sequence_lengths):
+  """Selects the nth set of activations for each n in `sequence_length`.
+
+  Returns a `Tensor` of shape `[batch_size, k]`. If `sequence_length` is not
+  `None`, then `output[i, :] = activations[i, sequence_length[i] - 1, :]`. If
+  `sequence_length` is `None`, then `output[i, :] = activations[i, -1, :]`.
+
+  Args:
+    activations: A `Tensor` with shape `[batch_size, padded_length, k]`.
+    sequence_lengths: A `Tensor` with shape `[batch_size]` or `None`.
+  Returns:
+    A `Tensor` of shape `[batch_size, k]`.
+  """
+  with ops.name_scope(
+      'select_last_activations', values=[activations, sequence_lengths]):
+    activations_shape = array_ops.shape(activations)
+    batch_size = activations_shape[0]
+    padded_length = activations_shape[1]
+    output_units = activations_shape[2]
+    if sequence_lengths is None:
+      sequence_lengths = padded_length
+    start_indices = math_ops.to_int64(
+        math_ops.range(batch_size) * padded_length)
+    last_indices = start_indices + sequence_lengths - 1
+    reshaped_activations = array_ops.reshape(
+        activations, [batch_size * padded_length, output_units])
+
+    last_activations = array_ops.gather(reshaped_activations, last_indices)
+    last_activations.set_shape([activations.shape[0], activations.shape[2]])
+    return last_activations
+
+
+def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns,
+                          context_feature_columns, input_layer_partitioner):
+  """Function builder for a rnn logit_fn.
+
+  Args:
+    output_units: An int indicating the dimension of the logit layer.
+    rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
+      returns an object of type `tf.nn.rnn_cell.RNNCell`.
+    sequence_feature_columns: An iterable containing the `FeatureColumn`s
+      that represent sequential input.
+    context_feature_columns: An iterable containing the `FeatureColumn`s
+      that represent contextual input.
+    input_layer_partitioner: Partitioner for input layer.
+
+  Returns:
+    A logit_fn (see below).
+
+  Raises:
+    ValueError: If output_units is not an int.
+  """
+  if not isinstance(output_units, int):
+    raise ValueError('output_units must be an int.  Given type: {}'.format(
+        type(output_units)))
+
+  def rnn_logit_fn(features, mode):
+    """Recurrent Neural Network logit_fn.
+
+    Args:
+      features: This is the first item returned from the `input_fn`
+                passed to `train`, `evaluate`, and `predict`. This should be a
+                single `Tensor` or `dict` of same.
+      mode: Optional. Specifies if this training, evaluation or prediction. See
+            `ModeKeys`.
+
+    Returns:
+      A `Tensor` representing the logits.
+    """
+    with variable_scope.variable_scope(
+        'sequence_input_layer',
+        values=tuple(six.itervalues(features)),
+        partitioner=input_layer_partitioner):
+      sequence_input, sequence_length = seq_fc.sequence_input_layer(
+          features=features, feature_columns=sequence_feature_columns)
+      summary.histogram('sequence_length', sequence_length)
+
+      if context_feature_columns:
+        context_input = feature_column_lib.input_layer(
+            features=features,
+            feature_columns=context_feature_columns)
+        sequence_input = seq_fc.concatenate_context_input(
+            context_input, sequence_input)
+
+    cell = rnn_cell_fn(mode)
+    # Ignore output state.
+    rnn_outputs, _ = rnn.dynamic_rnn(
+        cell=cell,
+        inputs=sequence_input,
+        sequence_length=sequence_length,
+        dtype=dtypes.float32,
+        time_major=False)
+    last_activations = _select_last_activations(rnn_outputs, sequence_length)
+
+    with variable_scope.variable_scope('logits', values=(rnn_outputs,)):
+      logits = core_layers.dense(
+          last_activations,
+          units=output_units,
+          activation=None,
+          kernel_initializer=init_ops.glorot_uniform_initializer())
+    return logits
+
+  return rnn_logit_fn
+
+
+def _rnn_model_fn(features,
+                  labels,
+                  mode,
+                  head,
+                  rnn_cell_fn,
+                  sequence_feature_columns,
+                  context_feature_columns,
+                  optimizer='Adagrad',
+                  input_layer_partitioner=None,
+                  config=None):
+  """Recurrent Neural Net model_fn.
+
+  Args:
+    features: dict of `Tensor` and `SparseTensor` objects returned from
+      `input_fn`.
+    labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `head_lib._Head` instance.
+    rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
+      returns an object of type `tf.nn.rnn_cell.RNNCell`.
+    sequence_feature_columns: Iterable containing `FeatureColumn`s that
+      represent sequential model inputs.
+    context_feature_columns: Iterable containing `FeatureColumn`s that
+      represent model inputs not associated with a specific timestep.
+    optimizer: String, `tf.Optimizer` object, or callable that creates the
+      optimizer to use for training. If not specified, will use the Adagrad
+      optimizer with a default learning rate of 0.05 and gradient clip norm of
+      5.0.
+    input_layer_partitioner: Partitioner for input layer. Defaults
+      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+
+  Returns:
+    An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: If mode or optimizer is invalid, or features has the wrong type.
+  """
+  if not isinstance(features, dict):
+    raise ValueError('features should be a dictionary of `Tensor`s. '
+                     'Given type: {}'.format(type(features)))
+
+  # If user does not provide an optimizer instance, use the optimizer specified
+  # by the string with default learning rate and gradient clipping.
+  if not isinstance(optimizer, optimizer_lib.Optimizer):
+    optimizer = optimizers.get_optimizer_instance(
+        optimizer, learning_rate=_DEFAULT_LEARNING_RATE)
+    optimizer = extenders.clip_gradients_by_norm(optimizer, _DEFAULT_CLIP_NORM)
+
+  num_ps_replicas = config.num_ps_replicas if config else 0
+  partitioner = partitioned_variables.min_max_variable_partitioner(
+      max_partitions=num_ps_replicas)
+  with variable_scope.variable_scope(
+      'rnn',
+      values=tuple(six.itervalues(features)),
+      partitioner=partitioner):
+    input_layer_partitioner = input_layer_partitioner or (
+        partitioned_variables.min_max_variable_partitioner(
+            max_partitions=num_ps_replicas,
+            min_slice_size=64 << 20))
+
+    logit_fn = _rnn_logit_fn_builder(
+        output_units=head.logits_dimension,
+        rnn_cell_fn=rnn_cell_fn,
+        sequence_feature_columns=sequence_feature_columns,
+        context_feature_columns=context_feature_columns,
+        input_layer_partitioner=input_layer_partitioner)
+    logits = logit_fn(features=features, mode=mode)
+
+    def _train_op_fn(loss):
+      """Returns the op to optimize the loss."""
+      return optimizer.minimize(
+          loss,
+          global_step=training_util.get_global_step())
+
+    return head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        logits=logits)
+
+
+def _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type):
+  """Assert arguments are valid and return rnn_cell_fn."""
+  if rnn_cell_fn and (num_units or cell_type != USE_DEFAULT):
+    raise ValueError(
+        'num_units and cell_type must not be specified when using rnn_cell_fn'
+    )
+  if not rnn_cell_fn:
+    if cell_type == USE_DEFAULT:
+      cell_type = 'basic_rnn'
+    rnn_cell_fn = _make_rnn_cell_fn(num_units, cell_type)
+  return rnn_cell_fn
+
+
+class RNNClassifier(estimator.Estimator):
+  """A classifier for TensorFlow RNN models.
+
+  Trains a recurrent neural network model to classify instances into one of
+  multiple classes.
+
+  Example:
+
+  ```python
+  token_sequence = sequence_categorical_column_with_hash_bucket(...)
+  token_emb = embedding_column(categorical_column=token_sequence, ...)
+
+  estimator = RNNClassifier(
+      sequence_feature_columns=[token_emb],
+      num_units=[32, 16], cell_type='lstm')
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `sequence_feature_columns`:
+    - a feature with `key=column.name` whose `value` is a `SparseTensor`.
+  * for each `column` in `context_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using softmax cross entropy.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               sequence_feature_columns,
+               context_feature_columns=None,
+               num_units=None,
+               cell_type=USE_DEFAULT,
+               rnn_cell_fn=None,
+               model_dir=None,
+               n_classes=2,
+               weight_column=None,
+               label_vocabulary=None,
+               optimizer='Adagrad',
+               loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE,
+               input_layer_partitioner=None,
+               config=None):
+    """Initializes a `RNNClassifier` instance.
+
+    Args:
+      sequence_feature_columns: An iterable containing the `FeatureColumn`s
+        that represent sequential input. All items in the set should either be
+        sequence columns (e.g. `sequence_numeric_column`) or constructed from
+        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
+        input).
+      context_feature_columns: An iterable containing the `FeatureColumn`s
+        for contextual input. The data represented by these columns will be
+        replicated and given to the RNN at each timestep. These columns must be
+        instances of classes derived from `_DenseColumn` such as
+        `numeric_column`, not the sequential variants.
+      num_units: Iterable of integer number of hidden units per RNN layer. If
+        set, `cell_type` must also be specified and `rnn_cell_fn` must be
+        `None`.
+      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
+        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
+        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
+        must be `None`.
+      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
+        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
+        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
+        This is for advanced users who need additional customization beyond
+        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
+        needed for stacked RNNs.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      n_classes: Number of label classes. Defaults to 2, namely binary
+        classification. Must be > 1.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are
+        already encoded as integer or float within [0, 1] for `n_classes=2` and
+        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+        Also there will be errors if vocabulary is not provided and labels are
+        string.
+      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
+        type. Defaults to Adagrad optimizer.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
+      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
+        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: `RunConfig` object to configure the runtime settings.
+
+    Raises:
+      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
+        compatible.
+    """
+    rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)
+
+    if n_classes == 2:
+      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+    else:
+      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
+          n_classes,
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+
+    def _model_fn(features, labels, mode, config):
+      return _rnn_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          rnn_cell_fn=rnn_cell_fn,
+          sequence_feature_columns=tuple(sequence_feature_columns or []),
+          context_feature_columns=tuple(context_feature_columns or []),
+          optimizer=optimizer,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config)
+    super(RNNClassifier, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
+
+
+class RNNEstimator(estimator.Estimator):
+  """An Estimator for TensorFlow RNN models with user-specified head.
+
+  Example:
+
+  ```python
+  token_sequence = sequence_categorical_column_with_hash_bucket(...)
+  token_emb = embedding_column(categorical_column=token_sequence, ...)
+
+  estimator = RNNEstimator(
+      head=tf.contrib.estimator.regression_head(),
+      sequence_feature_columns=[token_emb],
+      num_units=[32, 16], cell_type='lstm')
+
+  # Or with custom RNN cell:
+  def rnn_cell_fn(mode):
+    cells = [ tf.contrib.rnn.LSTMCell(size) for size in [32, 16] ]
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      cells = [ tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=0.5)
+                    for cell in cells ]
+    return tf.contrib.rnn.MultiRNNCell(cells)
+
+  estimator = RNNEstimator(
+      head=tf.contrib.estimator.regression_head(),
+      sequence_feature_columns=[token_emb],
+      rnn_cell_fn=rnn_cell_fn)
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if the head's `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `sequence_feature_columns`:
+    - a feature with `key=column.name` whose `value` is a `SparseTensor`.
+  * for each `column` in `context_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss and predicted output are determined by the specified head.
+
+  @compatibility(eager)
+  Estimators are not compatible with eager execution.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               head,
+               sequence_feature_columns,
+               context_feature_columns=None,
+               num_units=None,
+               cell_type=USE_DEFAULT,
+               rnn_cell_fn=None,
+               model_dir=None,
+               optimizer='Adagrad',
+               input_layer_partitioner=None,
+               config=None):
+    """Initializes a `RNNClassifier` instance.
+
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`. This specifies the model's
+        output and loss function to be optimized.
+      sequence_feature_columns: An iterable containing the `FeatureColumn`s
+        that represent sequential input. All items in the set should either be
+        sequence columns (e.g. `sequence_numeric_column`) or constructed from
+        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
+        input).
+      context_feature_columns: An iterable containing the `FeatureColumn`s
+        for contextual input. The data represented by these columns will be
+        replicated and given to the RNN at each timestep. These columns must be
+        instances of classes derived from `_DenseColumn` such as
+        `numeric_column`, not the sequential variants.
+      num_units: Iterable of integer number of hidden units per RNN layer. If
+        set, `cell_type` must also be specified and `rnn_cell_fn` must be
+        `None`.
+      cell_type: A subclass of `tf.nn.rnn_cell.RNNCell` or a string specifying
+        the cell type. Supported strings are: `'basic_rnn'`, `'lstm'`, and
+        `'gru'`. If set, `num_units` must also be specified and `rnn_cell_fn`
+        must be `None`.
+      rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and
+        returns an object of type `tf.nn.rnn_cell.RNNCell` that will be used to
+        construct the RNN. If set, `num_units` and `cell_type` cannot be set.
+        This is for advanced users who need additional customization beyond
+        `num_units` and `cell_type`. Note that `tf.nn.rnn_cell.MultiRNNCell` is
+        needed for stacked RNNs.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      optimizer: An instance of `tf.Optimizer` or string specifying optimizer
+        type. Defaults to Adagrad optimizer.
+      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
+        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: `RunConfig` object to configure the runtime settings.
 
-# Include attrs that start with single underscore.
-rnn.__all__ = [s for s in dir(rnn) if not s.startswith('__')]
+    Raises:
+      ValueError: If `num_units`, `cell_type`, and `rnn_cell_fn` are not
+        compatible.
+    """
+    rnn_cell_fn = _assert_rnn_cell_fn(rnn_cell_fn, num_units, cell_type)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.rnn import *
+    def _model_fn(features, labels, mode, config):
+      return _rnn_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          rnn_cell_fn=rnn_cell_fn,
+          sequence_feature_columns=tuple(sequence_feature_columns or []),
+          context_feature_columns=tuple(context_feature_columns or []),
+          optimizer=optimizer,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config)
+    super(RNNEstimator, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/tensorflow/contrib/estimator/python/estimator/rnn_test.py b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
new file mode 100644
index 0000000000..89506ee661
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/rnn_test.py
@@ -0,0 +1,1185 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for rnn.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.contrib.estimator.python.estimator import head as head_lib
+from tensorflow.contrib.estimator.python.estimator import rnn
+from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as seq_fc
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.experimental.ops import readers
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import parsing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.lib.io import python_io
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import rnn_cell
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_util
+
+
+# Names of variables created by BasicRNNCell model.
+TOKEN_EMBEDDING_NAME = 'rnn/sequence_input_layer/input_layer/tokens_sequential_embedding/embedding_weights'
+CELL_WEIGHTS_NAME = 'rnn/rnn/basic_rnn_cell/kernel'
+CELL_BIAS_NAME = 'rnn/rnn/basic_rnn_cell/bias'
+MULTI_CELL_WEIGHTS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/kernel'
+MULTI_CELL_BIAS_NAME_PATTERN = 'rnn/rnn/multi_rnn_cell/cell_%d/basic_rnn_cell/bias'
+LOGITS_WEIGHTS_NAME = 'rnn/logits/dense/kernel'
+LOGITS_BIAS_NAME = 'rnn/logits/dense/bias'
+
+
+def _assert_close(expected, actual, rtol=1e-04, name='assert_close'):
+  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
+    expected = ops.convert_to_tensor(expected, name='expected')
+    actual = ops.convert_to_tensor(actual, name='actual')
+    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
+    rtol = ops.convert_to_tensor(rtol, name='rtol')
+    return check_ops.assert_less(
+        rdiff,
+        rtol,
+        data=('Condition expected =~ actual did not hold element-wise:'
+              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
+              'rtol = ', rtol,),
+        name=scope)
+
+
+def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases,
+                      global_step, model_dir):
+  """Create checkpoint file with provided model weights.
+
+  Args:
+    rnn_weights: Iterable of values of weights for the RNN cell.
+    rnn_biases: Iterable of values of biases for the RNN cell.
+    logits_weights: Iterable of values for matrix connecting RNN output to
+      logits.
+    logits_biases: Iterable of values for logits bias term.
+    global_step: Initial global step to save in checkpoint.
+    model_dir: Directory into which checkpoint is saved.
+  """
+  model_weights = {}
+  model_weights[CELL_WEIGHTS_NAME] = rnn_weights
+  model_weights[CELL_BIAS_NAME] = rnn_biases
+  model_weights[LOGITS_WEIGHTS_NAME] = logits_weights
+  model_weights[LOGITS_BIAS_NAME] = logits_biases
+
+  with ops.Graph().as_default():
+    # Create model variables.
+    for k, v in six.iteritems(model_weights):
+      variables_lib.Variable(v, name=k, dtype=dtypes.float32)
+
+    # Create non-model variables.
+    global_step_var = training_util.create_global_step()
+    assign_op = global_step_var.assign(global_step)
+
+    # Initialize vars and save checkpoint.
+    with monitored_session.MonitoredTrainingSession(
+        checkpoint_dir=model_dir) as sess:
+      sess.run(assign_op)
+
+
+class RNNLogitFnTest(test.TestCase):
+  """Tests correctness of logits calculated from _rnn_logit_fn_builder."""
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_logits(self, mode, rnn_units, logits_dimension, features_fn,
+                   sequence_feature_columns, context_feature_columns,
+                   expected_logits):
+    """Tests that the expected logits are calculated."""
+    with ops.Graph().as_default():
+      # Global step needed for MonitoredSession, which is in turn used to
+      # explicitly set variable weights through a checkpoint.
+      training_util.create_global_step()
+      # Use a variable scope here with 'rnn', emulating the rnn model_fn, so
+      # the checkpoint naming is shared.
+      with variable_scope.variable_scope('rnn'):
+        input_layer_partitioner = (
+            partitioned_variables.min_max_variable_partitioner(
+                max_partitions=0, min_slice_size=64 << 20))
+        logit_fn = rnn._rnn_logit_fn_builder(
+            output_units=logits_dimension,
+            rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units),
+            sequence_feature_columns=sequence_feature_columns,
+            context_feature_columns=context_feature_columns,
+            input_layer_partitioner=input_layer_partitioner)
+        # Features are constructed within this function, otherwise the Tensors
+        # containing the features would be defined outside this graph.
+        logits = logit_fn(features=features_fn(), mode=mode)
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
+
+  def testOneDimLogits(self):
+    """Tests one-dimensional logits.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[10]], [[5]]]
+    initial_state = [0, 0]
+    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
+                              tanh(-.2*10 - .3*0 - .4*0 +.5)]]
+                          = [[0.83, -0.91]]
+    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
+                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)]]
+                          = [[0.53, -0.37]]
+    logits = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5.],
+                  indices=[[0, 0], [0, 1]],
+                  dense_shape=[1, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    context_feature_columns = []
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=1,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-0.6033]])
+
+  def testMultiDimLogits(self):
+    """Tests multi-dimensional logits.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[10]], [[5]]]
+    initial_state = [0, 0]
+    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
+                              tanh(-.2*10 - .3*0 - .4*0 +.5)]]
+                          = [[0.83, -0.91]]
+    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
+                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)]]
+                          = [[0.53, -0.37]]
+    logits = [[-1*0.53 - 1*0.37 + 0.3],
+              [0.5*0.53 + 0.3*0.37 + 0.4],
+              [0.2*0.53 - 0.1*0.37 + 0.5]
+           = [[-0.6033, 0.7777, 0.5698]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
+        logits_biases=[0.3, 0.4, 0.5],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5.],
+                  indices=[[0, 0], [0, 1]],
+                  dense_shape=[1, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    context_feature_columns = []
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=3,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-0.6033, 0.7777, 0.5698]])
+
+  def testMultiExampleMultiDim(self):
+    """Tests multiple examples and multi-dimensional logits.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[10], [5]], [[2], [7]]]
+    initial_state = [[0, 0], [0, 0]]
+    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
+                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
+                             [tanh(.1*2 + .2*0 + .3*0 +.2),
+                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
+                          = [[0.83, -0.91], [0.38, 0.10]]
+    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
+                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
+                             [tanh(.1*7 + .2*.38 + .3*.10 +.2),
+                              tanh(-.2*7 - .3*.38 - .4*.10 +.5)]]
+                          = [[0.53, -0.37], [0.76, -0.78]
+    logits = [[-1*0.53 - 1*0.37 + 0.3,
+               0.5*0.53 + 0.3*0.37 + 0.4,
+               0.2*0.53 - 0.1*0.37 + 0.5],
+              [-1*0.76 - 1*0.78 + 0.3,
+               0.5*0.76 +0.3*0.78 + 0.4,
+               0.2*0.76 -0.1*0.78 + 0.5]]
+           = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
+        logits_biases=[0.3, 0.4, 0.5],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2., 7.],
+                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+                  dense_shape=[2, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))
+    ]
+    context_feature_columns = []
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=3,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-0.6033, 0.7777, 0.5698],
+                           [-1.2473, 1.0170, 0.5745]])
+
+  def testMultiExamplesDifferentLength(self):
+    """Tests multiple examples with different lengths.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[10], [5]], [[2], [0]]]
+    initial_state = [[0, 0], [0, 0]]
+    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
+                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
+                             [tanh(.1*2 + .2*0 + .3*0 +.2),
+                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
+                          = [[0.83, -0.91], [0.38, 0.10]]
+    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
+                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
+                             [<ignored-padding>]]
+                          = [[0.53, -0.37], [<ignored-padding>]]
+    logits = [[-1*0.53 - 1*0.37 + 0.3],
+              [-1*0.38 + 1*0.10 + 0.3]]
+           = [[-0.6033], [0.0197]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2.],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    context_feature_columns = []
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=1,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-0.6033], [0.0197]])
+
+  def testMultiExamplesWithContext(self):
+    """Tests multiple examples with context features.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]]
+    initial_state = [[0, 0], [0, 0]]
+    rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2),
+                              tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)],
+                             [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2),
+                              tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]]
+                          = [[0.60, -0.96], [0.83, 0.68]]
+    rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2),
+                              tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)],
+                             [<ignored-padding>]]
+                          = [[0.03, -0.63], [<ignored-padding>]]
+    logits = [[-1*0.03 - 1*0.63 + 0.3],
+              [-1*0.83 + 1*0.68 + 0.3]]
+           = [[-0.3662], [0.1414]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        # Context features weights are inserted between input and state weights.
+        rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2.],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+          'context': [[-0.5], [0.8]],
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    context_feature_columns = [fc.numeric_column('context', shape=(1,))]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=1,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-0.3662], [0.1414]])
+
+  def testMultiExamplesMultiFeatures(self):
+    """Tests examples with multiple sequential feature columns.
+
+    Intermediate values are rounded for ease in reading.
+    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
+    initial_state = [[0, 0], [0, 0]]
+    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
+                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
+                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
+                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
+                          = [[0.94, -0.96], [0.72, -0.38]]
+    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
+                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
+                             [<ignored-padding>]]
+                          = [[0.92, -0.88], [<ignored-padding>]]
+    logits = [[-1*0.92 - 1*0.88 + 0.3],
+              [-1*0.72 - 1*0.38 + 0.3]]
+           = [[-1.5056], [-0.7962]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        # FeatureColumns are sorted alphabetically, so on_sale weights are
+        # inserted before price.
+        rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=base_global_step,
+        model_dir=self._model_dir)
+
+    def features_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2.],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+          'on_sale':
+              sparse_tensor.SparseTensor(
+                  values=[0, 1, 0],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+      }
+
+    price_column = seq_fc.sequence_numeric_column('price', shape=(1,))
+    on_sale_column = fc.indicator_column(
+        seq_fc.sequence_categorical_column_with_identity(
+            'on_sale', num_buckets=2))
+    sequence_feature_columns = [price_column, on_sale_column]
+    context_feature_columns = []
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          rnn_units=[2],
+          logits_dimension=1,
+          features_fn=features_fn,
+          sequence_feature_columns=sequence_feature_columns,
+          context_feature_columns=context_feature_columns,
+          expected_logits=[[-1.5056], [-0.7962]])
+
+
+class RNNClassifierTrainingTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _assert_checkpoint(
+      self, n_classes, input_units, cell_units, expected_global_step):
+
+    shapes = {
+        name: shape for (name, shape) in
+        checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(
+        expected_global_step,
+        checkpoint_utils.load_variable(
+            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
+
+    # RNN Cell variables.
+    if len(cell_units) > 1:
+      for i, cell_unit in enumerate(cell_units):
+        self.assertEqual([input_units + cell_unit, cell_unit],
+                         shapes[MULTI_CELL_WEIGHTS_NAME_PATTERN % i])
+        self.assertEqual([cell_unit],
+                         shapes[MULTI_CELL_BIAS_NAME_PATTERN % i])
+        input_units = cell_unit
+    elif len(cell_units) == 1:
+      self.assertEqual([input_units + cell_unit, cell_unit],
+                       shapes[CELL_WEIGHTS_NAME])
+      self.assertEqual([cell_unit], shapes[CELL_BIAS_NAME])
+
+    # Logits variables.
+    logits_dimension = n_classes if n_classes > 2 else 1
+    self.assertEqual([cell_units[-1], logits_dimension],
+                     shapes[LOGITS_WEIGHTS_NAME])
+    self.assertEqual([logits_dimension], shapes[LOGITS_BIAS_NAME])
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s/part_0:0' % CELL_BIAS_NAME,
+        '%s/part_0:0' % CELL_WEIGHTS_NAME,
+        '%s/part_0:0' % LOGITS_BIAS_NAME,
+        '%s/part_0:0' % LOGITS_WEIGHTS_NAME,
+    ]
+
+    def _minimize(loss, global_step):
+      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(
+          expected_var_names,
+          [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        return state_ops.assign_add(global_step, 1).op
+      assert_loss = _assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        return state_ops.assign_add(global_step, 1).op
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer.Optimizer,
+        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def testConflictingRNNCellFn(self):
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    cell_units = [4, 2]
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        'num_units and cell_type must not be specified when using rnn_cell_fn'):
+      rnn.RNNClassifier(
+          sequence_feature_columns=[embed],
+          rnn_cell_fn=lambda x: x,
+          num_units=cell_units)
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        'num_units and cell_type must not be specified when using rnn_cell_fn'):
+      rnn.RNNClassifier(
+          sequence_feature_columns=[embed],
+          rnn_cell_fn=lambda x: x,
+          cell_type='lstm')
+
+  def _testFromScratchWithDefaultOptimizer(self, n_classes):
+    def train_input_fn():
+      return {
+          'tokens':
+              sparse_tensor.SparseTensor(
+                  values=['the', 'cat', 'sat'],
+                  indices=[[0, 0], [0, 1], [0, 2]],
+                  dense_shape=[1, 3]),
+      }, [[1]]
+
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    input_units = 2
+
+    cell_units = [4, 2]
+    est = rnn.RNNClassifier(
+        sequence_feature_columns=[embed],
+        num_units=cell_units,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    est.train(input_fn=train_input_fn, steps=num_steps)
+    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
+
+  def testBinaryClassFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=2)
+
+  def testMultiClassFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=4)
+
+  def testFromScratchWithCustomRNNCellFn(self):
+    def train_input_fn():
+      return {
+          'tokens':
+              sparse_tensor.SparseTensor(
+                  values=['the', 'cat', 'sat'],
+                  indices=[[0, 0], [0, 1], [0, 2]],
+                  dense_shape=[1, 3]),
+      }, [[1]]
+
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    input_units = 2
+    cell_units = [4, 2]
+    n_classes = 2
+
+    def rnn_cell_fn(mode):
+      del mode  # unused
+      cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units]
+      return rnn_cell.MultiRNNCell(cells)
+
+    est = rnn.RNNClassifier(
+        sequence_feature_columns=[embed],
+        rnn_cell_fn=rnn_cell_fn,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    est.train(input_fn=train_input_fn, steps=num_steps)
+    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
+
+  def _testExampleWeight(self, n_classes):
+    def train_input_fn():
+      return {
+          'tokens':
+              sparse_tensor.SparseTensor(
+                  values=['the', 'cat', 'sat', 'dog', 'barked'],
+                  indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
+                  dense_shape=[2, 3]),
+          'w': [[1], [2]],
+      }, [[1], [0]]
+
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    input_units = 2
+
+    cell_units = [4, 2]
+    est = rnn.RNNClassifier(
+        num_units=cell_units,
+        sequence_feature_columns=[embed],
+        n_classes=n_classes,
+        weight_column='w',
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    est.train(input_fn=train_input_fn, steps=num_steps)
+    self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
+
+  def testBinaryClassWithExampleWeight(self):
+    self._testExampleWeight(n_classes=2)
+
+  def testMultiClassWithExampleWeight(self):
+    self._testExampleWeight(n_classes=4)
+
+  def testBinaryClassFromCheckpoint(self):
+    initial_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=initial_global_step,
+        model_dir=self._model_dir)
+
+    def train_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2.],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+      }, [[0], [1]]
+
+    # Uses same checkpoint and examples as testBinaryClassEvaluationMetrics.
+    # See that test for loss calculation.
+    mock_optimizer = self._mock_optimizer(expected_loss=0.559831)
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=2,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+    est.train(input_fn=train_input_fn, steps=10)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+
+  def testMultiClassFromCheckpoint(self):
+    initial_global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
+        logits_biases=[0.3, 0.4, 0.5],
+        global_step=initial_global_step,
+        model_dir=self._model_dir)
+
+    def train_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2., 7.],
+                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+                  dense_shape=[2, 2]),
+      }, [[0], [1]]
+
+    # Uses same checkpoint and examples as testMultiClassEvaluationMetrics.
+    # See that test for loss calculation.
+    mock_optimizer = self._mock_optimizer(expected_loss=1.331465)
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=3,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+    est.train(input_fn=train_input_fn, steps=10)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+
+
+def sorted_key_dict(unsorted_dict):
+  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
+
+
+class RNNClassifierEvaluationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def testBinaryClassEvaluationMetrics(self):
+    global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=global_step,
+        model_dir=self._model_dir)
+
+    def eval_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2.],
+                  indices=[[0, 0], [0, 1], [1, 0]],
+                  dense_shape=[2, 2]),
+      }, [[0], [1]]
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=2,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(eval_input_fn, steps=1)
+
+    # Uses identical numbers to testMultiExamplesWithDifferentLength.
+    # See that test for logits calculation.
+    # logits = [[-0.603282], [0.019719]]
+    # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]]
+    # loss = -label * ln(p) - (1 - label) * ln(1 - p)
+    #      = [[0.436326], [0.683335]]
+    # sum_over_batch_size = (0.436326 + 0.683335)/2
+    expected_metrics = {
+        ops.GraphKeys.GLOBAL_STEP:
+            global_step,
+        metric_keys.MetricKeys.LOSS:
+            0.559831,
+        metric_keys.MetricKeys.LOSS_MEAN:
+            0.559831,
+        metric_keys.MetricKeys.ACCURACY:
+            1.0,
+        metric_keys.MetricKeys.PREDICTION_MEAN:
+            0.429262,
+        metric_keys.MetricKeys.LABEL_MEAN:
+            0.5,
+        metric_keys.MetricKeys.ACCURACY_BASELINE:
+            0.5,
+        # With default threshold of 0.5, the model is a perfect classifier.
+        metric_keys.MetricKeys.RECALL:
+            1.0,
+        metric_keys.MetricKeys.PRECISION:
+            1.0,
+        # Positive example is scored above negative, so AUC = 1.0.
+        metric_keys.MetricKeys.AUC:
+            1.0,
+        metric_keys.MetricKeys.AUC_PR:
+            1.0,
+    }
+    self.assertAllClose(
+        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
+
+  def testMultiClassEvaluationMetrics(self):
+    global_step = 100
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
+        logits_biases=[0.3, 0.4, 0.5],
+        global_step=global_step,
+        model_dir=self._model_dir)
+
+    def eval_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5., 2., 7.],
+                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+                  dense_shape=[2, 2]),
+      }, [[0], [1]]
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=3,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(eval_input_fn, steps=1)
+
+    # Uses identical numbers to testMultiExampleMultiDim.
+    # See that test for logits calculation.
+    # logits = [[-0.603282, 0.777708, 0.569756],
+    #           [-1.247356, 1.017018, 0.574481]]
+    # logits_exp = exp(logits) / (1 + exp(logits))
+    #            = [[0.547013, 2.176468, 1.767836],
+    #               [0.287263, 2.764937, 1.776208]]
+    # softmax_probabilities = logits_exp / logits_exp.sum()
+    #                       = [[0.121793, 0.484596, 0.393611],
+    #                          [0.059494, 0.572639, 0.367866]]
+    # loss = -1. * log(softmax[label])
+    #      = [[2.105432], [0.557500]]
+    # sum_over_batch_size = (2.105432 + 0.557500)/2
+    expected_metrics = {
+        ops.GraphKeys.GLOBAL_STEP: global_step,
+        metric_keys.MetricKeys.LOSS: 1.331465,
+        metric_keys.MetricKeys.LOSS_MEAN: 1.331466,
+        metric_keys.MetricKeys.ACCURACY: 0.5,
+    }
+
+    self.assertAllClose(
+        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
+
+
+class RNNClassifierPredictionTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def testBinaryClassPredictions(self):
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1.], [1.]],
+        logits_biases=[0.3],
+        global_step=0,
+        model_dir=self._model_dir)
+
+    def predict_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5.],
+                  indices=[[0, 0], [0, 1]],
+                  dense_shape=[1, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    label_vocabulary = ['class_0', 'class_1']
+
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=2,
+        label_vocabulary=label_vocabulary,
+        model_dir=self._model_dir)
+    # Uses identical numbers to testOneDimLogits.
+    # See that test for logits calculation.
+    # logits = [-0.603282]
+    # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593]
+    # probabilities = [0.646407, 0.353593]
+    # class_ids = argmax(probabilities) = [0]
+    predictions = next(est.predict(predict_input_fn))
+    self.assertAllClose([-0.603282],
+                        predictions[prediction_keys.PredictionKeys.LOGITS])
+    self.assertAllClose([0.353593],
+                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
+    self.assertAllClose(
+        [0.646407, 0.353593],
+        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+    self.assertAllClose([0],
+                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+    self.assertEqual([b'class_0'],
+                     predictions[prediction_keys.PredictionKeys.CLASSES])
+
+  def testMultiClassPredictions(self):
+    create_checkpoint(
+        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
+        rnn_biases=[.2, .5],
+        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
+        logits_biases=[0.3, 0.4, 0.5],
+        global_step=0,
+        model_dir=self._model_dir)
+
+    def predict_input_fn():
+      return {
+          'price':
+              sparse_tensor.SparseTensor(
+                  values=[10., 5.],
+                  indices=[[0, 0], [0, 1]],
+                  dense_shape=[1, 2]),
+      }
+
+    sequence_feature_columns = [
+        seq_fc.sequence_numeric_column('price', shape=(1,))]
+    label_vocabulary = ['class_0', 'class_1', 'class_2']
+
+    est = rnn.RNNClassifier(
+        num_units=[2],
+        sequence_feature_columns=sequence_feature_columns,
+        n_classes=3,
+        label_vocabulary=label_vocabulary,
+        model_dir=self._model_dir)
+    # Uses identical numbers to testMultiDimLogits.
+    # See that test for logits calculation.
+    # logits = [-0.603282, 0.777708, 0.569756]
+    # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836]
+    # softmax_probabilities = logits_exp / logits_exp.sum()
+    #                       = [0.121793, 0.484596, 0.393611]
+    # class_ids = argmax(probabilities) = [1]
+    predictions = next(est.predict(predict_input_fn))
+    self.assertAllClose([-0.603282, 0.777708, 0.569756],
+                        predictions[prediction_keys.PredictionKeys.LOGITS])
+    self.assertAllClose(
+        [0.121793, 0.484596, 0.393611],
+        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+    self.assertAllClose([1],
+                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+    self.assertEqual([b'class_1'],
+                     predictions[prediction_keys.PredictionKeys.CLASSES])
+
+
+class BaseRNNClassificationIntegrationTest(object):
+
+  def __init__(self, _create_estimator_fn):
+    self._create_estimator_fn = _create_estimator_fn
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, feature_columns, train_input_fn, eval_input_fn,
+                          predict_input_fn, n_classes, batch_size):
+    cell_units = [4, 2]
+    est = self._create_estimator_fn(feature_columns, n_classes, cell_units,
+                                    self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUATE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    # EXPORT
+    feature_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns,
+        label_key='label',
+        label_dtype=dtypes.int64)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def testNumpyInputFn(self):
+    """Tests complete flow with numpy_input_fn."""
+    n_classes = 3
+    batch_size = 10
+    words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept']
+    # Numpy only supports dense input, so all examples will have same length.
+    # TODO(b/73160931): Update test when support for prepadded data exists.
+    sequence_length = 3
+
+    features = []
+    for _ in range(batch_size):
+      sentence = random.sample(words, sequence_length)
+      features.append(sentence)
+
+    x_data = np.array(features)
+    y_data = np.random.randint(n_classes, size=batch_size)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'tokens': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'tokens': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'tokens': x_data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    feature_columns = [embed]
+
+    self._test_complete_flow(
+        feature_columns=feature_columns,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+  def testParseExampleInputFn(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    n_classes = 3
+    batch_size = 10
+    words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept']
+
+    _, examples_file = tempfile.mkstemp()
+    writer = python_io.TFRecordWriter(examples_file)
+    for _ in range(batch_size):
+      sequence_length = random.randint(1, len(words))
+      sentence = random.sample(words, sequence_length)
+      label = random.randint(0, n_classes - 1)
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'tokens':
+                  feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
+                      value=sentence)),
+              'label':
+                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
+                      value=[label])),
+          }))
+      writer.write(example.SerializeToString())
+    writer.close()
+
+    col = seq_fc.sequence_categorical_column_with_hash_bucket(
+        'tokens', hash_bucket_size=10)
+    embed = fc.embedding_column(col, dimension=2)
+    feature_columns = [embed]
+    feature_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns,
+        label_key='label',
+        label_dtype=dtypes.int64)
+
+    def _train_input_fn():
+      dataset = readers.make_batched_features_dataset(
+          examples_file, batch_size, feature_spec)
+      return dataset.map(lambda features: (features, features.pop('label')))
+    def _eval_input_fn():
+      dataset = readers.make_batched_features_dataset(
+          examples_file, batch_size, feature_spec, num_epochs=1)
+      return dataset.map(lambda features: (features, features.pop('label')))
+    def _predict_input_fn():
+      dataset = readers.make_batched_features_dataset(
+          examples_file, batch_size, feature_spec, num_epochs=1)
+      def features_fn(features):
+        features.pop('label')
+        return features
+      return dataset.map(features_fn)
+
+    self._test_complete_flow(
+        feature_columns=feature_columns,
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+
+def _rnn_classifier_fn(feature_columns, n_classes, cell_units, model_dir):
+  return rnn.RNNClassifier(
+      num_units=cell_units,
+      sequence_feature_columns=feature_columns,
+      n_classes=n_classes,
+      model_dir=model_dir)
+
+
+class RNNClassifierIntegrationTest(BaseRNNClassificationIntegrationTest,
+                                   test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    BaseRNNClassificationIntegrationTest.__init__(self, _rnn_classifier_fn)
+
+
+def _rnn_estimator_fn(feature_columns, n_classes, cell_units, model_dir):
+  return rnn.RNNEstimator(
+      head=head_lib.multi_class_head(n_classes=n_classes),
+      num_units=cell_units,
+      sequence_feature_columns=feature_columns,
+      model_dir=model_dir)
+
+
+class RNNEstimatorIntegrationTest(BaseRNNClassificationIntegrationTest,
+                                  test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    BaseRNNClassificationIntegrationTest.__init__(self, _rnn_estimator_fn)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
index abd579ac7f..ce98e9987e 100644
--- a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
+++ b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator.py
@@ -12,23 +12,438 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""saved_model_estimator python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Class that creates an Estimator from a SavedModel."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.contrib.estimator.python.estimator import saved_model_estimator
+import six
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export import export as export_lib
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import constants
+from tensorflow.python.saved_model import loader_impl
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import training_util
+
+
+class SavedModelEstimator(estimator_lib.Estimator):
+  """Create an Estimator from a SavedModel.
+
+  Only SavedModels exported with
+  `tf.contrib.estimator.export_all_saved_models()` or
+  `tf.estimator.Estimator.export_savedmodel()` are supported for this class.
+
+  Example with `tf.estimator.DNNClassifier`:
+
+  **Step 1: Create and train DNNClassifier.**
+
+  ```python
+  feature1 = tf.feature_column.embedding_column(
+      tf.feature_column.categorical_column_with_vocabulary_list(
+          key='feature1', vocabulary_list=('green', 'yellow')), dimension=1)
+  feature2 = tf.feature_column.numeric_column(key='feature2', default_value=0.0)
+
+  classifier = tf.estimator.DNNClassifier(
+      hidden_units=[4,2], feature_columns=[feature1, feature2])
+
+  def input_fn():
+    features = {'feature1': tf.constant(['green', 'green', 'yellow']),
+                'feature2': tf.constant([3.5, 4.2, 6.1])}
+    label = tf.constant([1., 0., 0.])
+    return tf.data.Dataset.from_tensors((features, label)).repeat()
+
+  classifier.train(input_fn=input_fn, steps=10)
+  ```
+
+  **Step 2: Export classifier.**
+  First, build functions that specify the expected inputs.
+
+  ```python
+  # During train and evaluation, both the features and labels should be defined.
+  supervised_input_receiver_fn = (
+      tf.contrib.estimator.build_raw_supervised_input_receiver_fn(
+          {'feature1': tf.placeholder(dtype=tf.string, shape=[None]),
+           'feature2': tf.placeholder(dtype=tf.float32, shape=[None])},
+          tf.placeholder(dtype=tf.float32, shape=[None])))
+
+  # During predict mode, expect to receive a `tf.Example` proto, so a parsing
+  # function is used.
+  serving_input_receiver_fn = (
+      tf.estimator.export.build_parsing_serving_input_receiver_fn(
+          tf.feature_column.make_parse_example_spec([feature1, feature2])))
+  ```
+
+  Next, export the model as a SavedModel. A timestamped directory will be
+  created (for example `/tmp/export_all/1234567890`).
+
+  ```python
+  # Option 1: Save all modes (train, eval, predict)
+  export_dir = tf.contrib.estimator.export_all_saved_models(
+      classifier, '/tmp/export_all',
+      {tf.estimator.ModeKeys.TRAIN: supervised_input_receiver_fn,
+       tf.estimator.ModeKeys.EVAL: supervised_input_receiver_fn,
+       tf.estimator.ModeKeys.PREDICT: serving_input_receiver_fn})
+
+  # Option 2: Only export predict mode
+  export_dir = classifier.export_savedmodel(
+      '/tmp/export_predict', serving_input_receiver_fn)
+  ```
+
+  **Step 3: Create a SavedModelEstimator from the exported SavedModel.**
+
+  ```python
+  est = tf.contrib.estimator.SavedModelEstimator(export_dir)
+
+  # If all modes were exported, you can immediately evaluate and predict, or
+  # continue training. Otherwise only predict is available.
+  eval_results = est.evaluate(input_fn=input_fn, steps=1)
+  print(eval_results)
+
+  est.train(input_fn=input_fn, steps=20)
+
+  def predict_input_fn():
+    example = tf.train.Example()
+    example.features.feature['feature1'].bytes_list.value.extend(['yellow'])
+    example.features.feature['feature2'].float_list.value.extend([1.])
+    return {'inputs':tf.constant([example.SerializeToString()])}
+
+  predictions = est.predict(predict_input_fn)
+  print(next(predictions))
+  ```
+  """
+
+  def __init__(self, saved_model_dir, model_dir=None):
+    """Initialize a SavedModelEstimator.
+
+    The SavedModelEstimator loads its model function and variable values from
+    the graphs defined in the SavedModel. There is no option to pass in
+    `RunConfig` or `params` arguments, because the model function graph is
+    defined statically in the SavedModel.
+
+    Args:
+      saved_model_dir: Directory containing SavedModel protobuf and subfolders.
+      model_dir: Directory to save new checkpoints during training.
+
+    Raises:
+      NotImplementedError: If a DistributionStrategy is defined in the config.
+        Unless the SavedModelEstimator is subclassed, this shouldn't happen.
+    """
+    checkpoint = estimator_lib._get_saved_model_ckpt(saved_model_dir)  # pylint: disable=protected-access
+    vars_to_warm_start = [name for name, _ in
+                          checkpoint_utils.list_variables(checkpoint)]
+    warm_start_settings = estimator_lib.WarmStartSettings(
+        ckpt_to_initialize_from=checkpoint,
+        vars_to_warm_start=vars_to_warm_start)
+
+    super(SavedModelEstimator, self).__init__(
+        model_fn=self._model_fn_from_saved_model, model_dir=model_dir,
+        warm_start_from=warm_start_settings)
+    if self._train_distribution or self._eval_distribution:
+      raise NotImplementedError(
+          'SavedModelEstimator currently does not support '
+          'DistributionStrategy.')
+    self.saved_model_dir = saved_model_dir
+    self.saved_model_loader = loader_impl.SavedModelLoader(saved_model_dir)
+    self._available_modes = self._extract_available_modes()
+
+  def _extract_available_modes(self):
+    """Return list of modes found in SavedModel."""
+    available_modes = []
+    logging.info('Checking available modes for SavedModelEstimator.')
+    for mode in [model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL,
+                 model_fn_lib.ModeKeys.PREDICT]:
+      try:
+        self._get_meta_graph_def_for_mode(mode)
+      except RuntimeError:
+        logging.warning('%s mode not found in SavedModel.' % mode)
+        continue
+
+      if self._get_signature_def_for_mode(mode) is not None:
+        available_modes.append(mode)
+
+    logging.info('Available modes for Estimator: %s' % available_modes)
+    return available_modes
+
+  def _validate_mode(self, mode):
+    """Make sure that mode can be run using the SavedModel."""
+    if mode not in self._available_modes:
+      raise RuntimeError('%s mode is not available in the SavedModel. Use '
+                         'saved_model_cli to check that the Metagraph for this '
+                         'mode has been exported.' % mode)
+
+  def _get_meta_graph_def_for_mode(self, mode):
+    tags = model_fn_lib.EXPORT_TAG_MAP[mode]
+    return self.saved_model_loader.get_meta_graph_def_from_tags(tags)
+
+  def _get_signature_def_for_mode(self, mode):
+    meta_graph_def = self._get_meta_graph_def_for_mode(mode)
+    sig_def_key = (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+                   if mode == model_fn_lib.ModeKeys.PREDICT else mode)
+    if sig_def_key not in meta_graph_def.signature_def:
+      logging.warning('Metagraph for mode %s was found, but SignatureDef with'
+                      ' key \"%s\" is missing.' % (mode, sig_def_key))
+      return None
+    return meta_graph_def.signature_def[sig_def_key]
+
+  def _create_and_assert_global_step(self, graph):
+    # Do nothing here. The global step variable will be created/loaded from the
+    # SavedModel. If a global step variable were created here, the result
+    # will be two duplicate global step variables, causing issues during
+    # the warm-start phase.
+    # Due to the global variable being created in the model function, this may
+    # cause issues when running DistributionStrategy. Thus, DistributionStrategy
+    # is not yet supported with SavedModelEstimator.
+    return None
+
+  def _model_fn_from_saved_model(self, features, labels, mode):
+    """Load a SavedModel graph and return an EstimatorSpec."""
+    # TODO(kathywu): Model function loads placeholders from the graph. Calling
+    # export_all_saved_models creates another placeholder for the inputs, on top
+    # of the original placeholders. There should be a way to avoid this.
+    self._validate_mode(mode)
+
+    g = ops.get_default_graph()
+    if  training_util.get_global_step(g) is not None:
+      raise RuntimeError(
+          'Graph must not contain a global step tensor before the SavedModel is'
+          ' loaded. Please make sure that the input function does not create a '
+          'global step.')
+
+    # Extract SignatureDef for information about the input and output tensors.
+    signature_def = self._get_signature_def_for_mode(mode)
+
+    # Generate input map for replacing the inputs in the SavedModel graph with
+    # the provided features and labels.
+    input_map = _generate_input_map(signature_def, features, labels)
+
+    # Create a list of the names of output tensors. When the graph is loaded,
+    # names of the output tensors may be remapped. This ensures that the correct
+    # tensors are returned in the EstimatorSpec.
+    output_tensor_names = [
+        value.name for value in six.itervalues(signature_def.outputs)]
+
+    # Load the graph. `output_tensors` contains output `Tensors` in the same
+    # same order as the `output_tensor_names` list.
+    tags = model_fn_lib.EXPORT_TAG_MAP[mode]
+    _, output_tensors = self.saved_model_loader.load_graph(
+        g, tags, input_map=input_map, return_elements=output_tensor_names)
+
+    # Create a scaffold from the MetaGraphDef that contains ops to initialize
+    # the graph. This should mirror the steps from _add_meta_graph_for_mode(),
+    # which creates a MetaGraphDef from the EstimatorSpec's scaffold.
+    scaffold = monitored_session.Scaffold(
+        local_init_op=loader_impl._get_main_op_tensor(  # pylint: disable=protected-access
+            self._get_meta_graph_def_for_mode(mode)))
+
+    # Ensure that a global step tensor has been created.
+    global_step_tensor = training_util.get_global_step(g)
+    training_util.assert_global_step(global_step_tensor)
+
+    # Extract values to return in the EstimatorSpec.
+    output_map = dict(zip(output_tensor_names, output_tensors))
+    outputs = {key: output_map[value.name]
+               for key, value in six.iteritems(signature_def.outputs)}
+
+    loss, predictions, metrics = _validate_and_extract_outputs(
+        mode, outputs, signature_def.method_name)
+
+    train_op = ops.get_collection(constants.TRAIN_OP_KEY)
+    if len(train_op) > 1:
+      raise RuntimeError('Multiple ops found in the train_op collection.')
+    train_op = None if not train_op else train_op[0]
+
+    _clear_saved_model_collections()
+    return model_fn_lib.EstimatorSpec(
+        scaffold=scaffold,
+        mode=mode,
+        loss=loss,
+        train_op=train_op,
+        predictions=predictions,
+        eval_metric_ops=metrics)
+
+
+def _clear_saved_model_collections():
+  """Clear collections that are expected empty when exporting a SavedModel.
+
+  The SavedModel builder uses these collections to track ops necessary to
+  restore the graph state. These collections are expected to be empty before
+  MetaGraphs are added to the builder.
+  """
+  del ops.get_collection_ref(constants.ASSETS_KEY)[:]
+  del ops.get_collection_ref(constants.LEGACY_INIT_OP_KEY)[:]
+  del ops.get_collection_ref(constants.MAIN_OP_KEY)[:]
+  del ops.get_collection_ref(constants.TRAIN_OP_KEY)[:]
+
+
+def _generate_input_map(signature_def, features, labels):
+  """Return dict mapping an input tensor name to a feature or label tensor.
+
+  Args:
+    signature_def: SignatureDef loaded from SavedModel
+    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the features to be passed to the model.
+    labels: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the labels to be passed to the model. May be
+      `None`.
+
+  Returns:
+    dict mapping string names of inputs to features or labels tensors
+
+  Raises:
+    ValueError: if SignatureDef inputs are not completely mapped by the input
+      features and labels.
+  """
+  # pylint: disable=protected-access
+  if not isinstance(features, dict):
+    features = {export_lib._SINGLE_FEATURE_DEFAULT_NAME: features}
+  if labels is not None and not isinstance(labels, dict):
+    labels = {export_lib._SINGLE_LABEL_DEFAULT_NAME: labels}
+  # pylint: enable=protected-access
+
+  inputs = signature_def.inputs
+  input_map = {}
+  for key, tensor_info in six.iteritems(inputs):
+    input_name = tensor_info.name
+    if ':' in input_name:
+      input_name = input_name[:input_name.find(':')]
+
+    # When tensors are used as control inputs for operations, their names are
+    # prepended with a '^' character in the GraphDef. To handle possible control
+    # flow edge cases, control input names must be included in the input map.
+    control_dependency_name = '^' + input_name
+
+    if key in features:
+      _check_same_dtype_and_shape(features[key], tensor_info, key)
+      input_map[input_name] = input_map[control_dependency_name] = features[key]
+    elif labels is not None and key in labels:
+      _check_same_dtype_and_shape(labels[key], tensor_info, key)
+      input_map[input_name] = input_map[control_dependency_name] = labels[key]
+    else:
+      raise ValueError(
+          'Key \"%s\" not found in features or labels passed in to the model '
+          'function. All required keys: %s' % (key, inputs.keys()))
+
+  return input_map
+
+
+def _check_same_dtype_and_shape(tensor, tensor_info, name):
+  """Validate that tensor has the same properties as the TensorInfo proto.
+
+  Args:
+    tensor: a `Tensor` object.
+    tensor_info: a `TensorInfo` proto.
+    name: Name of the input (to identify Tensor if an error is raised).
+
+  Raises:
+    ValueError: If the tensor shape or dtype don't match the TensorInfo
+  """
+  dtype_error = (tensor.dtype != dtypes.DType(tensor_info.dtype))
+  shape_error = not tensor.shape.is_compatible_with(tensor_info.tensor_shape)
+
+  if dtype_error or shape_error:
+    msg = 'Tensor shape and/or dtype validation failed for input %s:' % name
+    if dtype_error:
+      msg += ('\n\tExpected dtype: %s, Got: %s'
+              % (dtypes.DType(tensor_info.dtype), tensor.dtype))
+    if shape_error:
+      msg += ('\n\tExpected shape: %s, Got: %s'
+              % (tensor_shape.TensorShape(tensor_info.tensor_shape),
+                 tensor.shape))
+
+    raise ValueError(msg)
+
+
+def _extract_eval_metrics(output_dict):
+  """Return a eval metric dict extracted from the output_dict.
+
+  Eval metrics consist of a value tensor and an update op. Both must be in the
+  passed-in tensor dictionary for an eval metric to be added to the returned
+  dictionary.
+
+  Args:
+    output_dict: a dict that maps strings to tensors.
+
+  Returns:
+    dict mapping strings to (value, update_op) tuples.
+  """
+  # pylint: disable=protected-access
+  metric_ops = {}
+  separator_char = export_output._SupervisedOutput._SEPARATOR_CHAR
+
+  for key, tensor in six.iteritems(output_dict):
+    split_key = key.split(separator_char)
+
+    # The metric name may contain the separator character, so recreate its name.
+    metric_name = separator_char.join(split_key[:-1])
+
+    if split_key[0] == export_output._SupervisedOutput.METRICS_NAME:
+      # If the key ends with the value suffix, and there is a corresponding
+      # key ending with the update_op suffix, then add tensors to metrics dict.
+      if split_key[-1] == export_output._SupervisedOutput.METRIC_VALUE_SUFFIX:
+        update_op = ''.join(
+            [metric_name, separator_char,
+             export_output._SupervisedOutput.METRIC_UPDATE_SUFFIX])
+        if update_op in output_dict:
+          update_op_tensor = output_dict[update_op]
+          metric_ops[metric_name] = (tensor, update_op_tensor)
+
+  # pylint: enable=protected-access
+  return metric_ops
+
+
+def _validate_and_extract_outputs(mode, output_dict, method_name):
+  """Extract values from SignatureDef output dictionary.
+
+  Args:
+    mode: One of the modes enumerated in `tf.estimator.ModeKeys`.
+    output_dict: dict of string SignatureDef keys to `Tensor`.
+    method_name: Method name of the SignatureDef as a string.
+
+  Returns:
+    Tuple of (
+      loss: `Tensor` object,
+      predictions: dictionary mapping string keys to `Tensor` objects,
+      metrics: dictionary mapping string keys to a tuple of two `Tensor` objects
+    )
+
+  Raises:
+    RuntimeError: raised if SignatureDef has an invalid method name for the mode
+  """
+  # pylint: disable=protected-access
+  loss, predictions, metrics = None, None, None
+
+  if mode == model_fn_lib.ModeKeys.PREDICT:
+    predictions = output_dict
+  else:
+    # Validate that the SignatureDef's method name matches the expected name for
+    # the given mode.
+    expected_method_name = signature_constants.SUPERVISED_TRAIN_METHOD_NAME
+    if mode == model_fn_lib.ModeKeys.EVAL:
+      expected_method_name = signature_constants.SUPERVISED_EVAL_METHOD_NAME
+    if method_name != expected_method_name:
+      raise RuntimeError(
+          'Invalid SignatureDef method name for mode %s.\n\tExpected: %s\n\t'
+          'Got: %s\nPlease ensure that the SavedModel was exported with '
+          '`tf.contrib.estimator.export_all_saved_models()`.' %
+          (mode, expected_method_name, method_name))
 
-# Include attrs that start with single underscore.
-saved_model_estimator.__all__ = [
-    s for s in dir(saved_model_estimator) if not s.startswith('__')
-]
+    # Extract loss, metrics and predictions from the output dict.
+    loss = output_dict[export_output._SupervisedOutput.LOSS_NAME]
+    metrics = _extract_eval_metrics(output_dict)
+    predictions = {
+        key: value for key, value in six.iteritems(output_dict)
+        if key.split(export_output._SupervisedOutput._SEPARATOR_CHAR)[0] == (
+            export_output._SupervisedOutput.PREDICTIONS_NAME)}
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.contrib.estimator.python.estimator.saved_model_estimator import *
+  # pylint: enable=protected-access
+  return loss, predictions, metrics
diff --git a/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
new file mode 100644
index 0000000000..718da1367c
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/saved_model_estimator_test.py
@@ -0,0 +1,369 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for SavedModelEstimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+from tensorflow.contrib.estimator.python.estimator import export as contrib_export
+from tensorflow.contrib.estimator.python.estimator import saved_model_estimator
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import training
+
+
+def dummy_input_fn():
+  return dataset_ops.Dataset.from_tensors((
+      {'x': constant_op.constant([[1], [-2]], dtype=dtypes.int64)},
+      constant_op.constant([[4], [-3]], dtype=dtypes.float32))).repeat()
+
+
+def dummy_input_fn_features_only():
+  return dataset_ops.Dataset.from_tensors(
+      {'x': constant_op.constant([[5], [6]], dtype=dtypes.int64)}).repeat()
+
+
+def dummy_supervised_receiver_fn():
+  feature_spec = {
+      'x': array_ops.placeholder(
+          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
+      }
+  label_spec = array_ops.placeholder(
+      dtype=dtypes.float32, shape=[2, 1], name='truth')
+  return export.build_raw_supervised_input_receiver_fn(
+      feature_spec, label_spec)
+
+
+def dummy_serving_receiver_fn():
+  feature_spec = {'x': array_ops.placeholder(
+      dtype=dtypes.int64, shape=(2, 1), name='feature_x'),}
+  return export.build_raw_serving_input_receiver_fn(feature_spec)
+
+
+def model_fn_diff_modes(features, labels, mode):
+  _, _ = features, labels
+  v = variables.Variable(21, name='some_var')
+  train_op = None
+  loss = constant_op.constant(104)
+  if mode == model_fn_lib.ModeKeys.TRAIN:
+    loss = constant_op.constant(105)
+    predictions = constant_op.constant([501])
+    train_op = control_flow_ops.group(
+        state_ops.assign_add(training.get_global_step(), 1),
+        state_ops.assign_add(v, 3))
+  elif mode == model_fn_lib.ModeKeys.EVAL:
+    loss = constant_op.constant(106)
+    predictions = constant_op.constant([502])
+  else:
+    loss = constant_op.constant(107)
+    predictions = constant_op.constant([503])
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      loss=loss,
+      train_op=train_op,
+      eval_metric_ops={
+          'abs_err': metrics_lib.mean_absolute_error(
+              constant_op.constant(0), predictions)},
+      predictions=predictions)
+
+
+class SavedModelEstimatorTest(test.TestCase):
+
+  def setUp(self):
+    self.tmpdirs = []
+
+  def tearDown(self):
+    for tmpdir in self.tmpdirs:
+      # gfile.DeleteRecursively fails in the windows cmake test, so use shutil.
+      shutil.rmtree(tmpdir, ignore_errors=True)
+    self.tmpdirs = []
+
+  def _get_tmp_dir(self):
+    tmpdir = tempfile.mkdtemp()
+    self.tmpdirs.append(tmpdir)
+    return tmpdir
+
+  def _export_estimator(self, train=True, evaluate=True, predict=True,
+                        model_fn=model_fn_diff_modes):
+    est = estimator.Estimator(model_fn, self._get_tmp_dir())
+    est.train(input_fn=dummy_input_fn, steps=10)
+
+    input_receiver_fn_map = {}
+    if train:
+      input_receiver_fn_map[model_fn_lib.ModeKeys.TRAIN] = (
+          dummy_supervised_receiver_fn())
+    if evaluate:
+      input_receiver_fn_map[model_fn_lib.ModeKeys.EVAL] = (
+          dummy_supervised_receiver_fn())
+    if predict:
+      input_receiver_fn_map[model_fn_lib.ModeKeys.PREDICT] = (
+          dummy_serving_receiver_fn())
+
+    export_base_path = self._get_tmp_dir()
+    export_dir = contrib_export.export_all_saved_models(
+        est, export_base_path, input_receiver_fn_map)
+    return export_dir
+
+  def test_load_all_modes(self):
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+    sme.train(input_fn=dummy_input_fn, steps=1)
+    sme.train(input_fn=dummy_input_fn, steps=2)
+    self.assertEqual(13, sme.get_variable_value('global_step'))
+    self.assertEqual(60, sme.get_variable_value('some_var'))
+
+    eval_results = sme.evaluate(dummy_input_fn, steps=5)
+
+    self.assertEqual(13, eval_results['global_step'])
+    self.assertEqual(106, eval_results['loss'])
+    self.assertEqual(502, eval_results['metrics/abs_err'])
+
+    predictions = next(sme.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+  def test_load_all_modes_no_train(self):
+    """Ensure that all functions can be used without requiring a ckpt."""
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+    eval_results = sme.evaluate(dummy_input_fn, steps=5)
+    self.assertEqual(10, eval_results['global_step'])
+    self.assertEqual(106, eval_results['loss'])
+    self.assertEqual(502, eval_results['metrics/abs_err'])
+
+    predictions = next(sme.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+  def test_partial_exported_estimator(self):
+    sme1 = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(train=False, predict=False), self._get_tmp_dir())
+    sme1.evaluate(dummy_input_fn, steps=5)
+    with self.assertRaisesRegexp(RuntimeError, 'train mode is not available'):
+      sme1.train(input_fn=dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(RuntimeError, 'infer mode is not available'):
+      next(sme1.predict(dummy_input_fn_features_only))
+
+    sme2 = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(evaluate=False), self._get_tmp_dir())
+    sme2.train(input_fn=dummy_input_fn, steps=1)
+    next(sme2.predict(dummy_input_fn_features_only))
+    with self.assertRaisesRegexp(RuntimeError, 'eval mode is not available'):
+      sme2.evaluate(dummy_input_fn, steps=5)
+
+  def test_with_incorrect_input(self):
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+
+    def bad_shape_input_fn():
+      return dataset_ops.Dataset.from_tensors((
+          {'x': constant_op.constant([1, 2], dtype=dtypes.int64)},
+          constant_op.constant([1, 2], dtype=dtypes.float32)))
+
+    with self.assertRaisesRegexp(ValueError, 'Expected shape'):
+      sme.train(bad_shape_input_fn, steps=1)
+
+    def bad_dtype_input_fn():
+      return dataset_ops.Dataset.from_tensors((
+          {'x': constant_op.constant([[1], [1]], dtype=dtypes.int32)},
+          constant_op.constant([[1], [1]], dtype=dtypes.int64)))
+
+    with self.assertRaisesRegexp(ValueError, 'Expected dtype'):
+      sme.train(bad_dtype_input_fn, steps=1)
+
+  def test_input_fn_with_global_step(self):
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+
+    def bad_input_fn():
+      training.get_or_create_global_step()
+      return dataset_ops.Dataset.from_tensors((
+          {'x': constant_op.constant([[1], [1]], dtype=dtypes.int64)},
+          constant_op.constant([[1], [1]], dtype=dtypes.float32)))
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 'Graph must not contain a global step tensor'):
+      sme.train(bad_input_fn, steps=1)
+
+  def test_re_export_saved_model_serving_only(self):
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+    sme.train(dummy_input_fn, steps=3)
+    self.assertEqual(13, sme.get_variable_value('global_step'))
+    self.assertEqual(60, sme.get_variable_value('some_var'))
+
+    predictions = next(sme.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+    # Export SavedModel, and test that the variable and prediction values are
+    # the same.
+    sme_export_dir = sme.export_savedmodel(
+        self._get_tmp_dir(), dummy_serving_receiver_fn())
+
+    sme2 = saved_model_estimator.SavedModelEstimator(
+        sme_export_dir, self._get_tmp_dir())
+    self.assertEqual(60, sme.get_variable_value('some_var'))
+    self.assertEqual(13, sme.get_variable_value('global_step'))
+
+    predictions = next(sme2.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+  def test_re_export_saved_model(self):
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(), self._get_tmp_dir())
+    self.assertDictEqual(
+        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 10},
+        sme.evaluate(dummy_input_fn, steps=1))
+
+    sme.train(dummy_input_fn, steps=3)
+    self.assertDictEqual(
+        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 13},
+        sme.evaluate(dummy_input_fn, steps=1))
+    self.assertEqual(60, sme.get_variable_value('some_var'))
+
+    predictions = next(sme.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+    # Export SavedModel for all modes
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: dummy_supervised_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: dummy_supervised_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: dummy_serving_receiver_fn()}
+    sme_export_dir = contrib_export.export_all_saved_models(
+        sme, self._get_tmp_dir(), input_receiver_fn_map)
+
+    sme2 = saved_model_estimator.SavedModelEstimator(
+        sme_export_dir, self._get_tmp_dir())
+    self.assertDictEqual(
+        {'loss': 106, 'metrics/abs_err': 502, 'global_step': 13},
+        sme.evaluate(dummy_input_fn, steps=1))
+    self.assertEqual(60, sme.get_variable_value('some_var'))
+
+    sme.train(dummy_input_fn, steps=7)
+    self.assertEqual(20, sme.get_variable_value('global_step'))
+
+    predictions = next(sme2.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'output': 503}, predictions)
+
+  def test_load_saved_model_from_serving_only(self):
+    def model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant([103]),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([502]),
+          export_outputs={'test': export_output.ClassificationOutput(
+              constant_op.constant([[32.]]))})
+
+    est = estimator.Estimator(model_fn, self._get_tmp_dir())
+    est.train(input_fn=dummy_input_fn, steps=10)
+
+    def serving_input_receiver_fn():
+      return export.ServingInputReceiver(
+          {'test-features': constant_op.constant([[1], [1]])},
+          array_ops.placeholder(dtype=dtypes.string))
+
+    export_dir = est.export_savedmodel(
+        self._get_tmp_dir(), serving_input_receiver_fn)
+
+    sme = saved_model_estimator.SavedModelEstimator(
+        export_dir, self._get_tmp_dir())
+
+    def input_fn():
+      return {'inputs': constant_op.constant('someinputstr')}
+
+    prediction = next(sme.predict(input_fn))
+    self.assertDictEqual({'scores': 32}, prediction)
+
+  def test_with_local_init_op(self):
+    def model_fn(features, labels, mode):
+      _, _ = features, labels
+      v = variables.Variable(21, name='some_var')
+      scaffold = monitored_session.Scaffold(
+          local_init_op=state_ops.assign_add(v, -3).op
+      )
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          scaffold=scaffold,
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          loss=array_ops.identity(v))
+    export_dir = self._export_estimator(predict=False, model_fn=model_fn)
+    sme = saved_model_estimator.SavedModelEstimator(
+        export_dir, self._get_tmp_dir())
+
+    eval_results1 = sme.evaluate(dummy_input_fn, steps=2)
+    self.assertEqual(15, eval_results1['loss'])
+
+    sme.train(dummy_input_fn, steps=1)
+    self.assertEqual(15, sme.get_variable_value('some_var'))
+
+    eval_results2 = sme.evaluate(dummy_input_fn, steps=5)
+    self.assertEqual(12, eval_results2['loss'])
+
+  def test_with_working_input_fn(self):
+    def model_fn(features, labels, mode):
+      loss = None
+      if labels is not None:
+        loss = labels[0][0] + labels[1][0]
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=loss,
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={'features_0': array_ops.identity([features['x'][0][0]]),
+                       'features_1': array_ops.identity([features['x'][1][0]])})
+
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(model_fn=model_fn), self._get_tmp_dir())
+    eval_results = sme.evaluate(dummy_input_fn, steps=1)
+    self.assertEqual(1, eval_results['loss'])
+
+    predictions = next(sme.predict(dummy_input_fn_features_only))
+    self.assertDictEqual({'features_0': 5, 'features_1': 6}, predictions)
+
+  def test_control_dependency(self):
+    # Control dependencies are saved with "^" appended to the start of the input
+    # name. The input map must include control dependencies as well.
+    def model_fn(features, labels, mode):
+      _ = labels
+      with ops.control_dependencies([features['x']]):
+        loss = features['x'][1][0]
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=loss,
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+    sme = saved_model_estimator.SavedModelEstimator(
+        self._export_estimator(train=False, predict=False, model_fn=model_fn),
+        self._get_tmp_dir())
+    sme.evaluate(dummy_input_fn, steps=1)  # Should run without error
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index 7363a112af..1c4c5951df 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -2,6 +2,8 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
+load("//tensorflow:tensorflow.bzl", "py_test")
+
 py_library(
     name = "estimator_py",
     srcs = [
@@ -19,7 +21,6 @@ py_library(
         ":dnn",
         ":dnn_linear_combined",
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":export",
         ":exporter",
         ":inputs",
@@ -38,7 +39,6 @@ py_library(
     srcs = ["exporter.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":gc",
         ":metric_keys",
         ":util",
@@ -46,12 +46,34 @@ py_library(
     ],
 )
 
+py_test(
+    name = "exporter_test",
+    size = "small",
+    srcs = ["exporter_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":estimator",
+        ":exporter",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "gc",
     srcs = ["gc.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
+py_test(
+    name = "gc_test",
+    size = "small",
+    srcs = ["gc_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":gc",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
@@ -61,20 +83,30 @@ py_library(
     srcs = ["model_fn.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":export_output",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "model_fn_test",
+    size = "small",
+    srcs = ["model_fn_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_output",
+        ":model_fn",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "training",
     srcs = ["training.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":exporter",
         ":run_config",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -82,24 +114,51 @@ py_library(
     ],
 )
 
+py_test(
+    name = "training_test",
+    size = "medium",
+    srcs = ["training_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],
+    deps = [
+        ":dnn",
+        ":estimator",
+        ":exporter",
+        ":inputs",
+        ":run_config",
+        ":training",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "run_config",
     srcs = ["run_config.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "run_config_test",
+    size = "small",
+    srcs = ["run_config_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":run_config",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "baseline",
     srcs = ["canned/baseline.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         ":optimizers",
@@ -108,6 +167,31 @@ py_library(
     ],
 )
 
+py_test(
+    name = "baseline_test",
+    size = "medium",
+    srcs = ["canned/baseline_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "noasan",  # test flakily times out in asan mode.
+        "notsan",  # b/67510291
+        "optonly",  # flakily times out in fastbuild
+    ],
+    deps = [
+        ":baseline",
+        ":estimator",
+        ":export_export",
+        ":metric_keys",
+        ":numpy_io",
+        ":pandas_io",
+        ":run_config",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "boosted_trees",
     srcs = ["canned/boosted_trees.py"],
@@ -115,33 +199,66 @@ py_library(
     deps = [
         ":boosted_trees_utils",
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
+py_test(
+    name = "boosted_trees_test",
+    size = "medium",
+    srcs = ["canned/boosted_trees_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = [
+        "manual",
+        "no_oss",
+        "notap",
+        "optonly",
+    ],
+    deps = [
+        ":boosted_trees",
+        ":inputs",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "boosted_trees_utils",
     srcs = ["canned/boosted_trees_utils.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
+py_test(
+    name = "boosted_trees_utils_test",
+    size = "medium",
+    srcs = ["canned/boosted_trees_utils_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = [
+        "optonly",
+    ],
+    deps = [
+        ":boosted_trees",
+        ":inputs",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_library(
     name = "dnn",
     srcs = ["canned/dnn.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":model_fn",
         ":optimizers",
@@ -157,7 +274,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":metric_keys",
         ":model_fn",
@@ -170,6 +286,29 @@ py_library(
     ],
 )
 
+py_test(
+    name = "dnn_test",
+    size = "medium",
+    srcs = ["canned/dnn_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",  # b/67510291
+    ],
+    deps = [
+        ":dnn",
+        ":dnn_testing_utils",
+        ":export_export",
+        ":numpy_io",
+        ":pandas_io",
+        ":prediction_keys",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "dnn_linear_combined",
     srcs = ["canned/dnn_linear_combined.py"],
@@ -177,7 +316,6 @@ py_library(
     deps = [
         ":dnn",
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":linear",
         ":model_fn",
@@ -187,6 +325,30 @@ py_library(
     ],
 )
 
+py_test(
+    name = "dnn_linear_combined_test",
+    size = "medium",
+    srcs = ["canned/dnn_linear_combined_test.py"],
+    shard_count = 8,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",  # b/67510291
+    ],
+    deps = [
+        ":dnn_linear_combined",
+        ":dnn_testing_utils",
+        ":export_export",
+        ":linear_testing_utils",
+        ":numpy_io",
+        ":pandas_io",
+        ":prediction_keys",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "util",
     srcs = [
@@ -194,11 +356,23 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
+py_test(
+    name = "util_test",
+    srcs = ["util_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],  # b/67510291
+    deps = [
+        ":util",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "estimator",
     srcs = [
@@ -206,7 +380,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":model_fn",
         ":run_config",
@@ -217,6 +390,25 @@ py_library(
     ],
 )
 
+py_test(
+    name = "estimator_test",
+    srcs = ["estimator_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],  # b/67510291
+    deps = [
+        ":estimator",
+        ":export_export",
+        ":export_output",
+        ":model_fn",
+        ":numpy_io",
+        ":run_config",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "parsing_utils",
     srcs = [
@@ -224,23 +416,42 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "parsing_utils_test",
+    srcs = ["canned/parsing_utils_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":parsing_utils",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "export_output",
     srcs = ["export/export_output.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "export_output_test",
+    size = "small",
+    srcs = ["export/export_output_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_output",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "export",
     srcs = [
@@ -248,7 +459,6 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":export_output",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -262,19 +472,30 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":util",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "export_test",
+    size = "small",
+    srcs = ["export/export_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_export",
+        ":export_output",
+        ":util",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "head",
     srcs = ["canned/head.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":export_output",
         ":metric_keys",
         ":model_fn",
@@ -284,12 +505,31 @@ py_library(
     ],
 )
 
+py_test(
+    name = "head_test",
+    size = "medium",
+    srcs = ["canned/head_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":dnn_testing_utils",
+        ":head",
+        ":metric_keys",
+        ":model_fn",
+        ":numpy_io",
+        ":prediction_keys",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_library(
     name = "inputs",
     srcs = ["inputs/inputs.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":numpy_io",
         ":pandas_io",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -302,7 +542,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":head",
         ":optimizers",
         "//tensorflow:tensorflow_py_no_contrib",
@@ -317,7 +556,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":linear",
         ":metric_keys",
@@ -329,12 +567,28 @@ py_library(
     ],
 )
 
+py_test(
+    name = "linear_test",
+    size = "medium",
+    srcs = ["canned/linear_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_pip",
+        "notsan",  # b/67510291
+    ],
+    deps = [
+        ":linear",
+        ":linear_testing_utils",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "metric_keys",
     srcs = ["canned/metric_keys.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":model_fn",
     ],
 )
@@ -344,29 +598,57 @@ py_library(
     srcs = ["inputs/numpy_io.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         ":inputs_queues",
     ],
 )
 
+py_test(
+    name = "numpy_io_test",
+    size = "small",
+    srcs = ["inputs/numpy_io_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":numpy_io",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "optimizers",
     srcs = ["canned/optimizers.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "optimizers_test",
+    size = "small",
+    srcs = ["canned/optimizers_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":optimizers",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "pandas_io",
     srcs = ["inputs/pandas_io.py"],
     srcs_version = "PY2AND3",
+    deps = [":inputs_queues"],
+)
+
+py_test(
+    name = "pandas_io_test",
+    size = "small",
+    srcs = ["inputs/pandas_io_test.py"],
+    srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
-        ":inputs_queues",
+        ":pandas_io",
+        "//tensorflow:tensorflow_py_no_contrib",
     ],
 )
 
@@ -374,9 +656,7 @@ py_library(
     name = "prediction_keys",
     srcs = ["canned/prediction_keys.py"],
     srcs_version = "PY2AND3",
-    deps = [
-        ":expect_tensorflow_estimator_installed",
-    ],
+    deps = [],
 )
 
 py_library(
@@ -388,19 +668,41 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":expect_tensorflow_estimator_installed",
         "//tensorflow:tensorflow_py_no_contrib",
         "@six_archive//:six",
     ],
 )
 
+py_test(
+    name = "feeding_functions_test",
+    size = "small",
+    srcs = [
+        "inputs/queues/feeding_functions_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":inputs_queues",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
+py_test(
+    name = "feeding_queue_runner_test",
+    size = "small",
+    srcs = ["inputs/queues/feeding_queue_runner_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":inputs_queues",
+        "//tensorflow:tensorflow_py_no_contrib",
+    ],
+)
+
 py_library(
     name = "keras",
     srcs = ["keras.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":estimator",
-        ":expect_tensorflow_estimator_installed",
         ":export_export",
         ":model_fn",
         ":run_config",
@@ -408,9 +710,61 @@ py_library(
     ],
 )
 
+py_test(
+    name = "keras_test",
+    size = "large",
+    srcs = ["keras_test.py"],
+    shard_count = 4,
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_windows",
+        "notsan",  # b/67510291
+    ],
+    deps = [
+        ":keras",
+        ":numpy_io",
+        ":run_config",
+        "//tensorflow:tensorflow_py_no_contrib",
+        "//third_party/py/numpy",
+    ],
+)
+
+py_library(
+    name = "expect_numpy_installed",
+    # This is a dummy rule used as a numpy dependency in open-source.
+    # We expect numpy to already be installed on the system, e.g. via
+    # `pip install numpy`
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "expect_pandas_installed",
+    # This is a dummy rule used as a numpy dependency in open-source.
+    # We expect pandas to already be installed on the system, e.g. via
+    # `pip install pandas`
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "expect_h5py_installed",
+    # This is a dummy rule used as a numpy dependency in open-source.
+    # We expect h5py to already be installed on the system, e.g. via
+    # `pip install h5py'
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "expect_six_installed",
+    # This is a dummy rule used as a numpy dependency in open-source.
+    # We expect six to already be installed on the system, e.g. via
+    # `pip install six`
+    visibility = ["//visibility:public"],
+)
+
 py_library(
-    name = "expect_tensorflow_estimator_installed",
-    # This is a dummy rule used as a dependency in open-source.
-    # We expect tensorflow_estimator to already be installed.
+    name = "expect_tensorflow_installed",
+    # This is a dummy rule used as a numpy dependency in open-source.
+    # We expect tensorflow to already be installed on the system, e.g. via
+    # `pip install tensorflow` or `pip install tensorflow_gpu`
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/python/estimator/__init__.py b/tensorflow/python/estimator/__init__.py
index 03d310a6cf..8cf8df567f 100644
--- a/tensorflow/python/estimator/__init__.py
+++ b/tensorflow/python/estimator/__init__.py
@@ -12,21 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""estimator python module.
+"""Import Estimator APIs.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
+Note: This file is imported by the create_estimator_api genrule. It must
+transitively import all Estimator modules/packages for their @estimator_export
+annotations to generate the public Estimator python API.
 """
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python import estimator
-
-# Include attrs that start with single underscore.
-estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
-
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator import *
+import tensorflow.python.estimator.estimator_lib
diff --git a/tensorflow/python/estimator/canned/__init__.py b/tensorflow/python/estimator/canned/__init__.py
index fd46937941..e69de29bb2 100644
--- a/tensorflow/python/estimator/canned/__init__.py
+++ b/tensorflow/python/estimator/canned/__init__.py
@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""canned python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import canned
-
-# Include attrs that start with single underscore.
-canned.__all__ = [s for s in dir(canned) if not s.startswith('__')]
-
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned import *
diff --git a/tensorflow/python/estimator/canned/baseline.py b/tensorflow/python/estimator/canned/baseline.py
index 9dd64bad0a..20c7a69b7c 100644
--- a/tensorflow/python/estimator/canned/baseline.py
+++ b/tensorflow/python/estimator/canned/baseline.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,365 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""baseline python module.
+"""Baseline estimators.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+Baseline estimators are bias-only estimators that can be used for debugging
+and as simple baselines.
+
+Example:
+
+```
+# Build BaselineClassifier
+classifier = BaselineClassifier(n_classes=3)
+
+# Input builders
+def input_fn_train(): # returns x, y (where y represents label's class index).
+  pass
 
+def input_fn_eval(): # returns x, y (where y represents label's class index).
+  pass
+
+# Fit model.
+classifier.train(input_fn=input_fn_train)
+
+# Evaluate cross entropy between the test and train labels.
+loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
+
+# predict outputs the probability distribution of the classes as seen in
+# training.
+predictions = classifier.predict(new_samples)
+```
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import baseline
+import six
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.training import training_util
+from tensorflow.python.util.tf_export import estimator_export
+
+# The default learning rate of 0.3 is a historical artifact of the initial
+# implementation, but seems a reasonable choice.
+_LEARNING_RATE = 0.3
+
+
+def _get_weight_column_key(weight_column):
+  if weight_column is None:
+    return None
+  if isinstance(weight_column, six.string_types):
+    return weight_column
+  if not isinstance(weight_column, feature_column_lib._NumericColumn):  # pylint: disable=protected-access
+    raise TypeError('Weight column must be either a string or _NumericColumn.'
+                    ' Given type: {}.'.format(type(weight_column)))
+  return weight_column.key()
+
+
+def _baseline_logit_fn_builder(num_outputs, weight_column=None):
+  """Function builder for a baseline logit_fn.
+
+  Args:
+    num_outputs: Number of outputs for the model.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+       weights. It will be multiplied by the loss of the example.
+  Returns:
+    A logit_fn (see below).
+  """
+
+  def baseline_logit_fn(features):
+    """Baseline model logit_fn.
+
+    The baseline model simply learns a bias, so the output logits are a
+    `Variable` with one weight for each output that learns the bias for the
+    corresponding output.
+
+    Args:
+      features: The first item returned from the `input_fn` passed to `train`,
+        `evaluate`, and `predict`. This should be a single `Tensor` or dict with
+        `Tensor` values.
+    Returns:
+      A `Tensor` representing the logits.
+    """
+    size_checks = []
+    batch_size = None
+
+    weight_column_key = _get_weight_column_key(weight_column)
+
+    # The first dimension is assumed to be a batch size and must be consistent
+    # among all of the features.
+    for key, feature in features.items():
+      # Skip weight_column to ensure we don't add size checks to it.
+      # These would introduce a dependency on the weight at serving time.
+      if key == weight_column_key:
+        continue
+      first_dim = array_ops.shape(feature)[0]
+      if batch_size is None:
+        batch_size = first_dim
+      else:
+        size_checks.append(check_ops.assert_equal(batch_size, first_dim))
+
+    with ops.control_dependencies(size_checks):
+      with variable_scope.variable_scope('baseline'):
+        bias = variable_scope.get_variable('bias', shape=[num_outputs],
+                                           initializer=init_ops.Zeros)
+        return math_ops.multiply(bias, array_ops.ones([batch_size,
+                                                       num_outputs]))
+
+  return baseline_logit_fn
+
+
+def _baseline_model_fn(features, labels, mode, head, optimizer,
+                       weight_column=None, config=None):
+  """Model_fn for baseline models.
+
+  Args:
+    features: `Tensor` or dict of `Tensor` (depends on data passed to `train`).
+    labels: `Tensor` of labels that are compatible with the `Head` instance.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `Head` instance.
+    optimizer: String, `tf.Optimizer` object, or callable that creates the
+      optimizer to use for training. If not specified, will use `FtrlOptimizer`
+      with a default learning rate of 0.3.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+       weights. It will be multiplied by the loss of the example.
+    config: `RunConfig` object to configure the runtime settings.
+
+  Raises:
+    KeyError: If weight column is specified but not present.
+    ValueError: If features is an empty dictionary.
+
+  Returns:
+    An `EstimatorSpec` instance.
+  """
+  del config  # Unused.
+
+  logit_fn = _baseline_logit_fn_builder(head.logits_dimension, weight_column)
+  logits = logit_fn(features)
+
+  def train_op_fn(loss):
+    opt = optimizers.get_optimizer_instance(
+        optimizer, learning_rate=_LEARNING_RATE)
+    return opt.minimize(loss, global_step=training_util.get_global_step())
+
+  return head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      logits=logits,
+      labels=labels,
+      train_op_fn=train_op_fn)
+
+
+@estimator_export('estimator.BaselineClassifier')
+class BaselineClassifier(estimator.Estimator):
+  """A classifier that can establish a simple baseline.
+
+  This classifier ignores feature values and will learn to predict the average
+  value of each label. For single-label problems, this will predict the
+  probability distribution of the classes as seen in the labels. For multi-label
+  problems, this will predict the fraction of examples that are positive for
+  each class.
+
+  Example:
+
+  ```python
+
+  # Build BaselineClassifier
+  classifier = BaselineClassifier(n_classes=3)
+
+  # Input builders
+  def input_fn_train: # returns x, y (where y represents label's class index).
+    pass
+
+  def input_fn_eval: # returns x, y (where y represents label's class index).
+    pass
+
+  # Fit model.
+  classifier.train(input_fn=input_fn_train)
+
+  # Evaluate cross entropy between the test and train labels.
+  loss = classifier.evaluate(input_fn=input_fn_eval)["loss"]
+
+  # predict outputs the probability distribution of the classes as seen in
+  # training.
+  predictions = classifier.predict(new_samples)
+
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+    otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+     `key=weight_column` whose value is a `Tensor`.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               model_dir=None,
+               n_classes=2,
+               weight_column=None,
+               label_vocabulary=None,
+               optimizer='Ftrl',
+               config=None,
+               loss_reduction=losses.Reduction.SUM):
+    """Initializes a BaselineClassifier instance.
+
+    Args:
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      n_classes: number of label classes. Default is binary classification.
+        It must be greater than 1. Note: Class labels are integers representing
+        the class index (i.e. values from 0 to n_classes-1). For arbitrary
+        label values (e.g. string labels), convert to class indices first.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+         weights. It will be multiplied by the loss of the example.
+      label_vocabulary: Optional list of strings with size `[n_classes]`
+        defining the label vocabulary. Only supported for `n_classes` > 2.
+      optimizer: String, `tf.Optimizer` object, or callable that creates the
+        optimizer to use for training. If not specified, will use
+        `FtrlOptimizer` with a default learning rate of 0.3.
+      config: `RunConfig` object to configure the runtime settings.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+    Returns:
+      A `BaselineClassifier` estimator.
+
+    Raises:
+      ValueError: If `n_classes` < 2.
+    """
+    if n_classes == 2:
+      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+    else:
+      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
+          n_classes, weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+    def _model_fn(features, labels, mode, config):
+      return _baseline_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          optimizer=optimizer,
+          weight_column=weight_column,
+          config=config)
+    super(BaselineClassifier, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config)
+
+
+@estimator_export('estimator.BaselineRegressor')
+class BaselineRegressor(estimator.Estimator):
+  """A regressor that can establish a simple baseline.
+
+  This regressor ignores feature values and will learn to predict the average
+  value of each label.
+
+  Example:
+
+  ```python
+
+  # Build BaselineRegressor
+  regressor = BaselineRegressor()
+
+  # Input builders
+  def input_fn_train: # returns x, y (where y is the label).
+    pass
+
+  def input_fn_eval: # returns x, y (where y is the label).
+    pass
+
+  # Fit model.
+  regressor.train(input_fn=input_fn_train)
+
+  # Evaluate squared-loss between the test and train targets.
+  loss = regressor.evaluate(input_fn=input_fn_eval)["loss"]
+
+  # predict outputs the mean value seen during training.
+  predictions = regressor.predict(new_samples)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+    otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+     `key=weight_column` whose value is a `Tensor`.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               model_dir=None,
+               label_dimension=1,
+               weight_column=None,
+               optimizer='Ftrl',
+               config=None,
+               loss_reduction=losses.Reduction.SUM):
+    """Initializes a BaselineRegressor instance.
 
-# Include attrs that start with single underscore.
-baseline.__all__ = [s for s in dir(baseline) if not s.startswith('__')]
+    Args:
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+         weights. It will be multiplied by the loss of the example.
+      optimizer: String, `tf.Optimizer` object, or callable that creates the
+        optimizer to use for training. If not specified, will use
+        `FtrlOptimizer` with a default learning rate of 0.3.
+      config: `RunConfig` object to configure the runtime settings.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+    Returns:
+      A `BaselineRegressor` estimator.
+    """
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.baseline import *
+    head = head_lib._regression_head(  # pylint: disable=protected-access
+        label_dimension=label_dimension,
+        weight_column=weight_column,
+        loss_reduction=loss_reduction)
+    def _model_fn(features, labels, mode, config):
+      return _baseline_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          optimizer=optimizer,
+          config=config)
+    super(BaselineRegressor, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config)
diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py
new file mode 100644
index 0000000000..1df7216ba6
--- /dev/null
+++ b/tensorflow/python/estimator/canned/baseline_test.py
@@ -0,0 +1,1558 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for baseline.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import os
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.estimator.canned import baseline
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import queue_runner
+from tensorflow.python.training import saver
+
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+# pylint rules which are disabled by default for test files.
+# pylint: disable=invalid-name,protected-access,missing-docstring
+
+# Names of variables created by model.
+BIAS_NAME = 'baseline/bias'
+
+
+def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
+  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
+    expected = ops.convert_to_tensor(expected, name='expected')
+    actual = ops.convert_to_tensor(actual, name='actual')
+    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
+    rtol = ops.convert_to_tensor(rtol, name='rtol')
+    return check_ops.assert_less(
+        rdiff,
+        rtol,
+        data=('Condition expected =~ actual did not hold element-wise:'
+              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
+              'rtol = ', rtol,),
+        name=scope)
+
+
+def save_variables_to_ckpt(model_dir):
+  init_all_op = [variables.global_variables_initializer()]
+  with tf_session.Session() as sess:
+    sess.run(init_all_op)
+    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+
+
+def queue_parsed_features(feature_map):
+  tensors_to_enqueue = []
+  keys = []
+  for key, tensor in six.iteritems(feature_map):
+    keys.append(key)
+    tensors_to_enqueue.append(tensor)
+  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
+  queue_runner.add_queue_runner(
+      queue_runner.QueueRunner(input_queue,
+                               [input_queue.enqueue(tensors_to_enqueue)]))
+  dequeued_tensors = input_queue.dequeue()
+  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
+
+
+def sorted_key_dict(unsorted_dict):
+  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
+
+
+def sigmoid(x):
+  return 1 / (1 + np.exp(-1.0 * x))
+
+
+def _baseline_regressor_fn(*args, **kwargs):
+  return baseline.BaselineRegressor(*args, **kwargs)
+
+
+def _baseline_classifier_fn(*args, **kwargs):
+  return baseline.BaselineClassifier(*args, **kwargs)
+
+
+# Tests for Baseline Regressor.
+
+
+# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
+class BaselineRegressorEvaluationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_evaluation_for_simple_data(self):
+    with ops.Graph().as_default():
+      variables.Variable([13.0], name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
+    eval_metrics = baseline_regressor.evaluate(
+        input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
+
+    # Logit is bias = 13, while label is 10. Loss is 3**2 = 9.
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 9.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_batch(self):
+    """Tests evaluation for batch_size==2."""
+    with ops.Graph().as_default():
+      variables.Variable([13.0], name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
+    eval_metrics = baseline_regressor.evaluate(
+        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
+
+    # Logit is bias = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the sum over batch = 9 + 9 = 18
+    # Average loss is the average over batch = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 18.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_weights(self):
+    """Tests evaluation with weights."""
+    with ops.Graph().as_default():
+      variables.Variable([13.0], name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    def _input_fn():
+      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
+      labels = ((10.,), (10.,))
+      return features, labels
+
+    baseline_regressor = _baseline_regressor_fn(
+        weight_column='weights',
+        model_dir=self._model_dir)
+    eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1)
+
+    # Logit is bias = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
+    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 27.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_for_multi_dimensions(self):
+    label_dim = 2
+    with ops.Graph().as_default():
+      variables.Variable([46.0, 58.0], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_regressor = _baseline_regressor_fn(
+        label_dimension=label_dim,
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'age': np.array([[2., 4., 5.]]),
+        },
+        y=np.array([[46., 58.]]),
+        batch_size=1,
+        num_epochs=None,
+        shuffle=False)
+    eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1)
+
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is bias which is [46, 58]
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
+
+class BaselineRegressorPredictTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_1d(self):
+    """Tests predict when all variables are one-dimensional."""
+    with ops.Graph().as_default():
+      variables.Variable([.2], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[2.]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = baseline_regressor.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x * weight + bias = 2. * 10. + .2 = 20.2
+    self.assertAllClose([[.2]], predicted_scores)
+
+  def testMultiDim(self):
+    """Tests predict when all variables are multi-dimenstional."""
+    batch_size = 2
+    label_dimension = 3
+    with ops.Graph().as_default():
+      variables.Variable(  # shape=[label_dimension]
+          [.2, .4, .6], name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    baseline_regressor = _baseline_regressor_fn(
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        # x shape=[batch_size, x_dim]
+        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predictions = baseline_regressor.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # score = bias, shape=[batch_size, label_dimension]
+    self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
+                        predicted_scores)
+
+
+class BaselineRegressorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, prediction_length):
+    feature_columns = [
+        feature_column_lib.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = _baseline_regressor_fn(
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    # learn y = x
+    est.train(train_input_fn, steps=200)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array(
+        [x['predictions'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+  def test_pandas_input_fn(self):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+
+    # Pandas DataFrame natually supports 1 dim data only.
+    label_dimension = 1
+    input_dimension = label_dimension
+    batch_size = 10
+    data = np.array([1., 2., 3., 4.], dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    prediction_length = 4
+
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+  def test_input_fn_from_parse_example(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum)),
+              'y':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum[:label_dimension])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+
+class BaselineRegressorTrainingTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s:0' % BIAS_NAME
+    ]
+
+    def _minimize(loss, global_step=None, var_list=None):
+      trainable_vars = var_list or ops.get_collection(
+          ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(expected_var_names,
+                            [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+      assert_loss = assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer.Optimizer,
+        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def _assert_checkpoint(self,
+                         label_dimension,
+                         expected_global_step,
+                         expected_bias=None):
+    shapes = {
+        name: shape
+        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(expected_global_step,
+                     checkpoint_utils.load_variable(self._model_dir,
+                                                    ops.GraphKeys.GLOBAL_STEP))
+
+    self.assertEqual([label_dimension], shapes[BIAS_NAME])
+    if expected_bias is not None:
+      self.assertEqual(expected_bias,
+                       checkpoint_utils.load_variable(self._model_dir,
+                                                      BIAS_NAME))
+
+  def testFromScratchWithDefaultOptimizer(self):
+    # Create BaselineRegressor.
+    label = 5.
+    age = 17
+    baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    baseline_regressor.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps)
+
+  def testTrainWithOneDimLabel(self):
+    label_dimension = 1
+    batch_size = 20
+    est = _baseline_regressor_fn(
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
+    self.assertEqual((batch_size,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(label_dimension=1, expected_global_step=200)
+
+  def testTrainWithOneDimWeight(self):
+    label_dimension = 1
+    batch_size = 20
+    est = _baseline_regressor_fn(
+        label_dimension=label_dimension,
+        weight_column='w',
+        model_dir=self._model_dir)
+
+    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
+    self.assertEqual((batch_size,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1,
+           'w': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(label_dimension=1, expected_global_step=200)
+
+  def testFromScratch(self):
+    # Create BaselineRegressor.
+    label = 5.
+    age = 17
+    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
+    mock_optimizer = self._mock_optimizer(expected_loss=25.)
+    baseline_regressor = _baseline_regressor_fn(
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    baseline_regressor.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        label_dimension=1,
+        expected_global_step=num_steps,
+        expected_bias=[0.])
+
+  def testFromCheckpoint(self):
+    # Create initial checkpoint.
+    bias = 7.0
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable([bias], name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = bias = 6.
+    # loss = (logits - label)^2 = (7 - 5)^2 = 4
+    mock_optimizer = self._mock_optimizer(expected_loss=4.)
+    baseline_regressor = _baseline_regressor_fn(
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    baseline_regressor.train(
+        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        label_dimension=1,
+        expected_global_step=initial_global_step + num_steps,
+        expected_bias=[bias])
+
+  def testFromCheckpointMultiBatch(self):
+    # Create initial checkpoint.
+    bias = 5.0
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable([bias], name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = bias
+    # logits[0] = 5.
+    # logits[1] = 5.
+    # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4
+    mock_optimizer = self._mock_optimizer(expected_loss=4.)
+    baseline_regressor = _baseline_regressor_fn(
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    baseline_regressor.train(
+        input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))),
+        steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        label_dimension=1,
+        expected_global_step=initial_global_step + num_steps,
+        expected_bias=bias)
+
+
+# Tests for Baseline Classifier.
+
+
+class BaselineClassifierTrainingTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s:0' % BIAS_NAME
+    ]
+
+    def _minimize(loss, global_step):
+      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(
+          expected_var_names,
+          [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        return state_ops.assign_add(global_step, 1).op
+      assert_loss = assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        return state_ops.assign_add(global_step, 1).op
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer.Optimizer,
+        wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def _assert_checkpoint(
+      self, n_classes, expected_global_step, expected_bias=None):
+    logits_dimension = n_classes if n_classes > 2 else 1
+
+    shapes = {
+        name: shape for (name, shape) in
+        checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(
+        expected_global_step,
+        checkpoint_utils.load_variable(
+            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
+
+    self.assertEqual([logits_dimension], shapes[BIAS_NAME])
+    if expected_bias is not None:
+      self.assertAllEqual(expected_bias,
+                          checkpoint_utils.load_variable(
+                              self._model_dir, BIAS_NAME))
+
+  def _testFromScratchWithDefaultOptimizer(self, n_classes):
+    label = 0
+    age = 17
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self._assert_checkpoint(n_classes, num_steps)
+
+  def testBinaryClassesFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=2)
+
+  def testMultiClassesFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=4)
+
+  def _testTrainWithTwoDimsLabel(self, n_classes):
+    batch_size = 20
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    data_rank_2 = np.array([[0], [1]])
+    self.assertEqual((2,), data_rank_1.shape)
+    self.assertEqual((2, 1), data_rank_2.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_2,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithTwoDimsLabel(self):
+    self._testTrainWithTwoDimsLabel(n_classes=2)
+
+  def testMultiClassesTrainWithTwoDimsLabel(self):
+    self._testTrainWithTwoDimsLabel(n_classes=4)
+
+  def _testTrainWithOneDimLabel(self, n_classes):
+    batch_size = 20
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    self.assertEqual((2,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithOneDimLabel(self):
+    self._testTrainWithOneDimLabel(n_classes=2)
+
+  def testMultiClassesTrainWithOneDimLabel(self):
+    self._testTrainWithOneDimLabel(n_classes=4)
+
+  def _testTrainWithTwoDimsWeight(self, n_classes):
+    batch_size = 20
+
+    est = baseline.BaselineClassifier(
+        weight_column='w',
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    data_rank_2 = np.array([[0], [1]])
+    self.assertEqual((2,), data_rank_1.shape)
+    self.assertEqual((2, 1), data_rank_2.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1,
+        batch_size=batch_size, num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithTwoDimsWeight(self):
+    self._testTrainWithTwoDimsWeight(n_classes=2)
+
+  def testMultiClassesTrainWithTwoDimsWeight(self):
+    self._testTrainWithTwoDimsWeight(n_classes=4)
+
+  def _testTrainWithOneDimWeight(self, n_classes):
+    batch_size = 20
+
+    est = baseline.BaselineClassifier(
+        weight_column='w',
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    self.assertEqual((2,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1,
+        batch_size=batch_size, num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithOneDimWeight(self):
+    self._testTrainWithOneDimWeight(n_classes=2)
+
+  def testMultiClassesTrainWithOneDimWeight(self):
+    self._testTrainWithOneDimWeight(n_classes=4)
+
+  def _testFromScratch(self, n_classes):
+    label = 1
+    age = 17
+    # For binary classifier:
+    #   loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are
+    #   all zero initially) and label = 1 so,
+    #      loss = 1 * -log ( sigmoid(logits) ) = 0.69315
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label) where logits are all 0s (weights are
+    #   all zero initially) and label = 1 so,
+    #      loss = 1 * -log ( 1.0 / n_classes )
+    # For this particular test case, as logits are same, the formula
+    # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases.
+    mock_optimizer = self._mock_optimizer(
+        expected_loss=-1 * math.log(1.0/n_classes))
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=num_steps,
+        expected_bias=[0.] if n_classes == 2 else [.0] * n_classes)
+
+  def testBinaryClassesFromScratch(self):
+    self._testFromScratch(n_classes=2)
+
+  def testMultiClassesFromScratch(self):
+    self._testFromScratch(n_classes=4)
+
+  def _testFromCheckpoint(self, n_classes):
+    # Create initial checkpoint.
+    label = 1
+    age = 17
+    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # For binary classifier:
+    #   logits = bias = -1.
+    #   loss = sigmoid_cross_entropy(logits, label)
+    #   so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label)
+    #   where logits = bias and label = 1
+    #   so, loss = 1 * -log ( softmax(logits)[1] )
+    if n_classes == 2:
+      expected_loss = 1.3133
+    else:
+      logits = bias
+      logits_exp = np.exp(logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_loss = -1 * math.log(softmax[label])
+
+    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=initial_global_step + num_steps,
+        expected_bias=bias)
+
+  def testBinaryClassesFromCheckpoint(self):
+    self._testFromCheckpoint(n_classes=2)
+
+  def testMultiClassesFromCheckpoint(self):
+    self._testFromCheckpoint(n_classes=4)
+
+  def _testFromCheckpointFloatLabels(self, n_classes):
+    """Tests float labels for binary classification."""
+    # Create initial checkpoint.
+    if n_classes > 2:
+      return
+    label = 0.8
+    age = 17
+    bias = [-1.0]
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = bias = -1.
+    # loss = sigmoid_cross_entropy(logits, label)
+    # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617
+    mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+
+  def testBinaryClassesFromCheckpointFloatLabels(self):
+    self._testFromCheckpointFloatLabels(n_classes=2)
+
+  def testMultiClassesFromCheckpointFloatLabels(self):
+    self._testFromCheckpointFloatLabels(n_classes=4)
+
+  def _testFromCheckpointMultiBatch(self, n_classes):
+    # Create initial checkpoint.
+    label = [1, 0]
+    age = [17, 18.5]
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # For binary classifier:
+    #   logits = bias
+    #   logits[0] = -1.
+    #   logits[1] = -1.
+    #   loss = sigmoid_cross_entropy(logits, label)
+    #   so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133
+    #       loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label)
+    #   where logits = bias and label = [1, 0]
+    #   so, loss = 1 * -log ( softmax(logits)[label] )
+    if n_classes == 2:
+      expected_loss = (1.3133 + 0.3132)
+    else:
+      # Expand logits since batch_size=2
+      logits = bias * np.ones(shape=(2, 1))
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      expected_loss = expected_loss_0 + expected_loss_1
+
+    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
+
+    est = baseline.BaselineClassifier(
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': (age)}, (label)),
+        steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=initial_global_step + num_steps,
+        expected_bias=bias)
+
+  def testBinaryClassesFromCheckpointMultiBatch(self):
+    self._testFromCheckpointMultiBatch(n_classes=2)
+
+  def testMultiClassesFromCheckpointMultiBatch(self):
+    self._testFromCheckpointMultiBatch(n_classes=4)
+
+
+class BaselineClassifierEvaluationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _test_evaluation_for_simple_data(self, n_classes):
+    label = 1
+    age = 1.
+
+    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
+
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = _baseline_classifier_fn(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1)
+
+    if n_classes == 2:
+      # Binary classes: loss = -log(sigmoid(-1)) = 1.3133
+      # Prediction = sigmoid(-1) = 0.2689
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: 1.3133,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: 1.3133,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
+          metric_keys.MetricKeys.LABEL_MEAN: 1.,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: 1,
+          metric_keys.MetricKeys.AUC: 0.,
+          metric_keys.MetricKeys.AUC_PR: 1.,
+      }
+    else:
+      # Multi classes: loss = 1 * -log ( softmax(logits)[label] )
+      logits = bias
+      logits_exp = np.exp(logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_loss = -1 * math.log(softmax[label])
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_for_simple_data(self):
+    self._test_evaluation_for_simple_data(n_classes=2)
+
+  def test_multi_classes_evaluation_for_simple_data(self):
+    self._test_evaluation_for_simple_data(n_classes=4)
+
+  def _test_evaluation_batch(self, n_classes):
+    """Tests evaluation for batch_size==2."""
+    label = [1, 0]
+    age = [17., 18.]
+    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = _baseline_classifier_fn(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': (age)}, (label)), steps=1)
+
+    if n_classes == 2:
+      # Logits are (-1., -1.) labels are (1, 0).
+      # Loss is
+      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
+      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
+      # Prediction = sigmoid(-1) = 0.2689
+      expected_loss = 1.3133 + 0.3132
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+          metric_keys.MetricKeys.ACCURACY: 0.5,
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
+          metric_keys.MetricKeys.LABEL_MEAN: 0.5,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
+          metric_keys.MetricKeys.AUC: 0.5,
+          metric_keys.MetricKeys.AUC_PR: 0.75,
+      }
+    else:
+      # Expand logits since batch_size=2
+      logits = bias * np.ones(shape=(2, 1))
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      expected_loss = expected_loss_0 + expected_loss_1
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+          metric_keys.MetricKeys.ACCURACY: 0.5,
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_batch(self):
+    self._test_evaluation_batch(n_classes=2)
+
+  def test_multi_classes_evaluation_batch(self):
+    self._test_evaluation_batch(n_classes=4)
+
+  def _test_evaluation_weights(self, n_classes):
+    """Tests evaluation with weights."""
+
+    label = [1, 0]
+    age = [17., 18.]
+    weights = [1., 2.]
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(
+          initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = _baseline_classifier_fn(
+        n_classes=n_classes,
+        weight_column='w',
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1)
+
+    if n_classes == 2:
+      # Logits are (-1., -1.) labels are (1, 0).
+      # Loss is
+      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
+      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
+      #   weights = [1., 2.]
+      expected_loss = 1.3133 * 1. + 0.3132 * 2.
+      loss_mean = expected_loss / (1.0 + 2.0)
+      label_mean = np.average(label, weights=weights)
+      logits = [-1, -1]
+      logistics = sigmoid(np.array(logits))
+      predictions_mean = np.average(logistics, weights=weights)
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
+          metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean,
+          metric_keys.MetricKeys.LABEL_MEAN: label_mean,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: (
+              max(label_mean, 1-label_mean)),
+          metric_keys.MetricKeys.AUC: 0.5,
+          metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.),
+      }
+    else:
+      # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] )
+      # Expand logits since batch_size=2
+      logits = bias * np.ones(shape=(2, 1))
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      loss_mean = np.average([expected_loss_0, expected_loss_1],
+                             weights=weights)
+      expected_loss = loss_mean * np.sum(weights)
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
+          metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_weights(self):
+    self._test_evaluation_weights(n_classes=2)
+
+  def test_multi_classes_evaluation_weights(self):
+    self._test_evaluation_weights(n_classes=4)
+
+
+class BaselineClassifierPredictTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _testPredictions(self, n_classes, label_vocabulary, label_output_fn):
+    """Tests predict when all variables are one-dimensional."""
+    age = 1.
+
+    bias = [10.0] if n_classes == 2 else [10.0] * n_classes
+
+    with ops.Graph().as_default():
+      variables.Variable(bias, name=BIAS_NAME)
+      variables.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = _baseline_classifier_fn(
+        label_vocabulary=label_vocabulary,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'age': np.array([[age]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+
+    if n_classes == 2:
+      scalar_logits = bias[0]
+      two_classes_logits = [0, scalar_logits]
+      two_classes_logits_exp = np.exp(two_classes_logits)
+      softmax = two_classes_logits_exp / two_classes_logits_exp.sum()
+
+      expected_predictions = {
+          'class_ids': [1],
+          'classes': [label_output_fn(1)],
+          'logistic': [sigmoid(np.array(scalar_logits))],
+          'logits': [scalar_logits],
+          'probabilities': softmax,
+      }
+    else:
+      onedim_logits = np.array(bias)
+      class_ids = onedim_logits.argmax()
+      logits_exp = np.exp(onedim_logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_predictions = {
+          'class_ids': [class_ids],
+          'classes': [label_output_fn(class_ids)],
+          'logits': onedim_logits,
+          'probabilities': softmax,
+      }
+
+    self.assertEqual(1, len(predictions))
+    # assertAllClose cannot handle byte type.
+    self.assertEqual(expected_predictions['classes'], predictions[0]['classes'])
+    expected_predictions.pop('classes')
+    predictions[0].pop('classes')
+    self.assertAllClose(sorted_key_dict(expected_predictions),
+                        sorted_key_dict(predictions[0]))
+
+  def testBinaryClassesWithoutLabelVocabulary(self):
+    n_classes = 2
+    self._testPredictions(n_classes,
+                          label_vocabulary=None,
+                          label_output_fn=lambda x: ('%s' % x).encode())
+
+  def testBinaryClassesWithLabelVocabulary(self):
+    n_classes = 2
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=['class_vocab_{}'.format(i)
+                          for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+  def testMultiClassesWithoutLabelVocabulary(self):
+    n_classes = 4
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=None,
+        label_output_fn=lambda x: ('%s' % x).encode())
+
+  def testMultiClassesWithLabelVocabulary(self):
+    n_classes = 4
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=['class_vocab_{}'.format(i)
+                          for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+
+class BaselineClassifierIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
+                          predict_input_fn, input_dimension, prediction_length):
+    feature_columns = [
+        feature_column_lib.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = _baseline_classifier_fn(
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    # learn y = x
+    est.train(train_input_fn, steps=200)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array(
+        [x['classes'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, 1), predictions.shape)
+
+    # EXPORT
+    feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def _test_numpy_input_fn(self, n_classes):
+    """Tests complete flow with numpy_input_fn."""
+    input_dimension = 4
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+    target = np.array([1] * batch_size)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=target,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=target,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_numpy_input_fn(self):
+    self._test_numpy_input_fn(n_classes=2)
+
+  def test_multi_classes_numpy_input_fn(self):
+    self._test_numpy_input_fn(n_classes=4)
+
+  def _test_pandas_input_fn(self, n_classes):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+
+    # Pandas DataFrame natually supports 1 dim data only.
+    input_dimension = 1
+    batch_size = 10
+    data = np.array([1., 2., 3., 4.], dtype=np.float32)
+    target = np.array([1, 0, 1, 0], dtype=np.int32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(target)
+    prediction_length = 4
+
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_pandas_input_fn(self):
+    self._test_pandas_input_fn(n_classes=2)
+
+  def test_multi_classes_pandas_input_fn(self):
+    self._test_pandas_input_fn(n_classes=4)
+
+  def _test_input_fn_from_parse_example(self, n_classes):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    input_dimension = 2
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+    target = np.array([1] * batch_size, dtype=np.int64)
+
+    serialized_examples = []
+    for x, y in zip(data, target):
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=x)),
+              'y':
+                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
+                      value=[y])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_input_fn_from_parse_example(self):
+    self._test_input_fn_from_parse_example(n_classes=2)
+
+  def test_multi_classes_input_fn_from_parse_example(self):
+    self._test_input_fn_from_parse_example(n_classes=4)
+
+
+# Tests for Baseline logit_fn.
+
+
+class BaselineLogitFnTest(test.TestCase):
+
+  def test_basic_logit_correctness(self):
+    """baseline_logit_fn simply returns the bias variable."""
+    with ops.Graph().as_default():
+      logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2)
+      logits = logit_fn(features={'age': [[23.], [31.]]})
+      with variable_scope.variable_scope('baseline', reuse=True):
+        bias_var = variable_scope.get_variable('bias')
+      with tf_session.Session() as sess:
+        sess.run([variables.global_variables_initializer()])
+        self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
+        sess.run(bias_var.assign([10., 5.]))
+        self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 62a178830c..0278990cfc 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -12,23 +12,1553 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""boosted_trees python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
+"""Estimator classes for BoostedTrees."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import boosted_trees
+import abc
+import collections
+import functools
+
+import numpy as np
+
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.canned import boosted_trees_utils
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import boosted_trees_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.array_ops import identity as tf_identity
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.summary import summary
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+from tensorflow.python.util.tf_export import estimator_export
+
+# TODO(nponomareva): Reveal pruning params here.
+_TreeHParams = collections.namedtuple('TreeHParams', [
+    'n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity',
+    'min_node_weight', 'center_bias', 'pruning_mode'
+])
+
+_HOLD_FOR_MULTI_CLASS_SUPPORT = object()
+_HOLD_FOR_MULTI_DIM_SUPPORT = object()
+_DUMMY_NUM_BUCKETS = -1
+_DUMMY_NODE_ID = -1
+
+
+def _get_transformed_features(features, sorted_feature_columns):
+  """Gets the transformed features from features/feature_columns pair.
+
+  Args:
+    features: a dicionary of name to Tensor.
+    sorted_feature_columns: a list/set of tf.feature_column, sorted by name.
+
+  Returns:
+    result_features: a list of the transformed features, sorted by the name.
+
+  Raises:
+    ValueError: when unsupported features/columns are tried.
+  """
+  # pylint:disable=protected-access
+  transformed_features = feature_column_lib._transform_features(
+      features, sorted_feature_columns)
+  result_features = []
+  for column in sorted_feature_columns:
+    if isinstance(column, feature_column_lib._BucketizedColumn):
+      source_name = column.source_column.name
+      squeezed_tensor = array_ops.squeeze(transformed_features[column], axis=1)
+      if len(squeezed_tensor.shape) > 1:
+        raise ValueError('For now, only supports features equivalent to rank 1 '
+                         'but column `{}` got: {}'.format(
+                             source_name, features[source_name].shape))
+      result_features.append(squeezed_tensor)
+    elif isinstance(column, feature_column_lib._IndicatorColumn):
+      source_name = column.categorical_column.name
+      tensor = math_ops.to_int32(transformed_features[column])
+      if len(tensor.shape) > 2:
+        raise ValueError('Rank of indicator column must be no more than 2, '
+                         'but column `{}` got: {}'.format(
+                             source_name, features[source_name].shape))
+      unstacked = array_ops.unstack(tensor, axis=1)
+      result_features.extend(unstacked)
+    else:
+      raise ValueError(
+          'For now, only bucketized_column and indicator_column is supported '
+          'but got: {}'.format(column))
+    # pylint:enable=protected-access
+
+  return result_features
+
+
+def _local_variable(initial_value, name=None):
+  """Stores a tensor as a local Variable for faster read."""
+  result = variable_scope.variable(
+      initial_value=initial_value,
+      trainable=False,
+      collections=[ops.GraphKeys.LOCAL_VARIABLES],
+      validate_shape=False,
+      name=name)
+  if isinstance(initial_value, ops.Tensor):
+    # Match the resulting variable's shape if the initial_value is a Tensor.
+    result.set_shape(initial_value.shape)
+  return result
+
+
+def _group_features_by_num_buckets(sorted_feature_columns):
+  """Groups feature ids by the number of buckets.
+
+  Derives the feature ids based on iterating through ordered feature columns
+  and groups them by the number of buckets each feature require. Returns a
+  sorted list of buckets and a list of lists of feature ids for each of those
+  buckets.
+
+  Args:
+    sorted_feature_columns: a list/set of tf.feature_column sorted by name.
+
+  Returns:
+    bucket_size_list: a list of required bucket sizes.
+    feature_ids_list: a list of lists of feature ids for each bucket size.
+
+  Raises:
+    ValueError: when unsupported features columns are provided.
+  """
+  bucket_size_to_feature_ids_dict = collections.OrderedDict()
+
+  # TODO(nponomareva) for now we preserve the previous functionality and bucket
+  # all numeric into the same num of buckets. Can be easily changed to using
+  # each numeric's real buckets num, but we need to test that it does not cause
+  # a performance hit.
+
+  # We will replace this dummy key with the real max after we calculate it.
+  bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS] = []
+
+  max_buckets_for_bucketized = 2
+  max_buckets_for_indicator = 2
+
+  feature_idx = 0
+  # pylint:disable=protected-access
+
+  for column in sorted_feature_columns:
+    if isinstance(column, feature_column_lib._IndicatorColumn):
+      num_categorical_features = column.categorical_column._num_buckets
+      if max_buckets_for_indicator not in bucket_size_to_feature_ids_dict:
+        bucket_size_to_feature_ids_dict[max_buckets_for_indicator] = []
+
+      for _ in range(num_categorical_features):
+        # We use bucket size of 2 for categorical.
+        bucket_size_to_feature_ids_dict[max_buckets_for_indicator].append(
+            feature_idx)
+        feature_idx += 1
+    elif isinstance(column, feature_column_lib._BucketizedColumn):
+      max_buckets_for_bucketized = max(max_buckets_for_bucketized,
+                                       len(column.boundaries) + 1)
+      bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS].append(feature_idx)
+      feature_idx += 1
+    elif not isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
+      raise ValueError(
+          'For now, only bucketized_column and indicator column are supported '
+          'but got: {}'.format(column))
+
+  # pylint:enable=protected-access
+  # Replace the dummy key with the real max num of buckets for all bucketized
+  # columns.
+  if max_buckets_for_bucketized not in bucket_size_to_feature_ids_dict:
+    bucket_size_to_feature_ids_dict[max_buckets_for_bucketized] = []
+  bucket_size_to_feature_ids_dict[max_buckets_for_bucketized].extend(
+      bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS])
+  del bucket_size_to_feature_ids_dict[_DUMMY_NUM_BUCKETS]
+
+  feature_ids_list = list(bucket_size_to_feature_ids_dict.values())
+  bucket_size_list = list(bucket_size_to_feature_ids_dict.keys())
+  return bucket_size_list, feature_ids_list
+
+
+def _calculate_num_features(sorted_feature_columns):
+  num_features = 0
+  for column in sorted_feature_columns:
+    if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
+      num_features += column.categorical_column._num_buckets  # pylint:disable=protected-access
+    else:
+      num_features += 1
+  return num_features
+
+
+def _generate_feature_name_mapping(sorted_feature_columns):
+  """Return a list of feature name for feature ids.
+
+  Args:
+    sorted_feature_columns: a list/set of tf.feature_column sorted by name.
+
+  Returns:
+    feature_name_mapping: a list of feature names indexed by the feature ids.
+
+  Raises:
+    ValueError: when unsupported features/columns are tried.
+  """
+  names = []
+  for column in sorted_feature_columns:
+    if isinstance(column, feature_column_lib._IndicatorColumn):  # pylint:disable=protected-access
+      categorical_column = column.categorical_column
+      if isinstance(categorical_column,
+                    feature_column_lib._VocabularyListCategoricalColumn):  # pylint:disable=protected-access
+        for value in categorical_column.vocabulary_list:
+          names.append('{}:{}'.format(column.name, value))
+      elif isinstance(categorical_column,
+                      feature_column_lib._BucketizedColumn):  # pylint:disable=protected-access
+        boundaries = [-np.inf] + list(categorical_column.boundaries) + [np.inf]
+        for pair in zip(boundaries[:-1], boundaries[1:]):
+          names.append('{}:{}'.format(column.name, pair))
+      else:
+        for num in range(categorical_column._num_buckets):  # pylint:disable=protected-access
+          names.append('{}:{}'.format(column.name, num))
+    elif isinstance(column, feature_column_lib._BucketizedColumn):
+      names.append(column.name)
+    else:
+      raise ValueError(
+          'For now, only bucketized_column and indicator_column is supported '
+          'but got: {}'.format(column))
+  return names
+
+
+def _cache_transformed_features(features, sorted_feature_columns, batch_size):
+  """Transform features and cache, then returns (cached_features, cache_op)."""
+  num_features = _calculate_num_features(sorted_feature_columns)
+  cached_features = [
+      _local_variable(
+          array_ops.zeros([batch_size], dtype=dtypes.int32),
+          name='cached_feature_{}'.format(i)) for i in range(num_features)
+  ]
+  are_features_cached = _local_variable(False, name='are_features_cached')
+
+  def cache_features_and_return():
+    """Caches transformed features.
+
+    The intention is to hide get_transformed_features() from the graph by
+    caching the result except the first step, since bucketize operation
+    (inside get_transformed_features) is expensive.
+
+    Returns:
+      input_feature_list: a list of input features.
+      cache_flip_op: op to add to graph to make sure cache update is included to
+          the graph.
+    """
+
+    transformed_features = _get_transformed_features(features,
+                                                     sorted_feature_columns)
+    cached = [
+        state_ops.assign(cached_features[i], transformed_features[i])
+        for i in range(num_features)
+    ]
+    # TODO(youngheek): Try other combination of dependencies so that the
+    # function returns a single result, not a tuple.
+    with ops.control_dependencies(cached):
+      cache_flip_op = are_features_cached.assign(True)
+    return cached, cache_flip_op
+
+  input_feature_list, cache_flip_op = control_flow_ops.cond(
+      are_features_cached, lambda: (cached_features, control_flow_ops.no_op()),
+      cache_features_and_return)
+  return input_feature_list, cache_flip_op
+
+
+class _CacheTrainingStatesUsingHashTable(object):
+  """Caching logits, etc. using MutableHashTable."""
+
+  def __init__(self, example_ids, logits_dimension):
+    """Creates a cache with the given configuration.
+
+    It maintains a MutableDenseHashTable for all values.
+    The API lookup() and insert() would have those specs,
+      tree_ids: shape=[batch_size], dtype=int32
+      node_ids: shape=[batch_size], dtype=int32
+      logits: shape=[batch_size, logits_dimension], dtype=float32
+    However in the MutableDenseHashTable, ids are bitcasted into float32 and
+    all values are concatenated as a single tensor (of float32).
+
+    Hence conversion happens internally before inserting to the HashTable and
+    after lookup from it.
+
+    Args:
+      example_ids: a Rank 1 tensor to be used as a key of the cache.
+      logits_dimension: a constant (int) for the dimension of logits.
+
+    Raises:
+      ValueError: if example_ids is other than int64 or string.
+    """
+    if dtypes.as_dtype(dtypes.int64).is_compatible_with(example_ids.dtype):
+      empty_key = -1 << 62
+    elif dtypes.as_dtype(dtypes.string).is_compatible_with(example_ids.dtype):
+      empty_key = ''
+    else:
+      raise ValueError(
+          'Unsupported example_id_feature dtype %s.' % example_ids.dtype)
+    # Cache holds latest <tree_id, node_id, logits> for each example.
+    # tree_id and node_id are both int32 but logits is a float32.
+    # To reduce the overhead, we store all of them together as float32 and
+    # bitcast the ids to int32.
+    self._table_ref = lookup_ops.mutable_dense_hash_table_v2(
+        empty_key=empty_key, value_dtype=dtypes.float32, value_shape=[3])
+    self._example_ids = ops.convert_to_tensor(example_ids)
+    if self._example_ids.shape.ndims not in (None, 1):
+      raise ValueError(
+          'example_id should have rank 1, but got %s' % self._example_ids)
+    self._logits_dimension = logits_dimension
+
+  def lookup(self):
+    """Returns cached_tree_ids, cached_node_ids, cached_logits."""
+    cached_tree_ids, cached_node_ids, cached_logits = array_ops.split(
+        lookup_ops.lookup_table_find_v2(
+            self._table_ref,
+            self._example_ids,
+            default_value=[0.0, _DUMMY_NODE_ID, 0.0]),
+        [1, 1, self._logits_dimension],
+        axis=1)
+    cached_tree_ids = array_ops.squeeze(
+        array_ops.bitcast(cached_tree_ids, dtypes.int32))
+    cached_node_ids = array_ops.squeeze(
+        array_ops.bitcast(cached_node_ids, dtypes.int32))
+    if self._example_ids.shape.ndims is not None:
+      cached_logits.set_shape(
+          [self._example_ids.shape[0], self._logits_dimension])
+    return (cached_tree_ids, cached_node_ids, cached_logits)
+
+  def insert(self, tree_ids, node_ids, logits):
+    """Inserts values and returns the op."""
+    insert_op = lookup_ops.lookup_table_insert_v2(
+        self._table_ref, self._example_ids,
+        array_ops.concat(
+            [
+                array_ops.expand_dims(
+                    array_ops.bitcast(tree_ids, dtypes.float32), 1),
+                array_ops.expand_dims(
+                    array_ops.bitcast(node_ids, dtypes.float32), 1),
+                logits,
+            ],
+            axis=1,
+            name='value_concat_for_cache_insert'))
+    return insert_op
+
+
+class _CacheTrainingStatesUsingVariables(object):
+  """Caching logits, etc. using Variables."""
+
+  def __init__(self, batch_size, logits_dimension):
+    """Creates a cache with the given configuration.
+
+    It maintains three variables, tree_ids, node_ids, logits, for caching.
+      tree_ids: shape=[batch_size], dtype=int32
+      node_ids: shape=[batch_size], dtype=int32
+      logits: shape=[batch_size, logits_dimension], dtype=float32
+
+    Note, this can be used only with in-memory data setting.
+
+    Args:
+      batch_size: `int`, the size of the cache.
+      logits_dimension: a constant (int) for the dimension of logits.
+    """
+    self._logits_dimension = logits_dimension
+    self._tree_ids = _local_variable(
+        array_ops.zeros([batch_size], dtype=dtypes.int32),
+        name='tree_ids_cache')
+    self._node_ids = _local_variable(
+        _DUMMY_NODE_ID * array_ops.ones([batch_size], dtype=dtypes.int32),
+        name='node_ids_cache')
+    self._logits = _local_variable(
+        array_ops.zeros([batch_size, logits_dimension], dtype=dtypes.float32),
+        name='logits_cache')
+
+  def lookup(self):
+    """Returns cached_tree_ids, cached_node_ids, cached_logits."""
+    return (self._tree_ids, self._node_ids, self._logits)
+
+  def insert(self, tree_ids, node_ids, logits):
+    """Inserts values and returns the op."""
+    return control_flow_ops.group(
+        [
+            self._tree_ids.assign(tree_ids),
+            self._node_ids.assign(node_ids),
+            self._logits.assign(logits)
+        ],
+        name='cache_insert')
+
+
+class _StopAtAttemptsHook(session_run_hook.SessionRunHook):
+  """Hook that requests stop at the number of attempts."""
+
+  def __init__(self, num_finalized_trees_tensor, num_attempted_layers_tensor,
+               max_trees, max_depth):
+    self._num_finalized_trees_tensor = num_finalized_trees_tensor
+    self._num_attempted_layers_tensor = num_attempted_layers_tensor
+    self._max_trees = max_trees
+    self._max_depth = max_depth
+
+  def before_run(self, run_context):
+    return session_run_hook.SessionRunArgs(
+        [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor])
+
+  def after_run(self, run_context, run_values):
+    # num_* tensors should be retrieved by a separate session than the training
+    # one, in order to read the values after growing.
+    # So, if it's approaching to the limit, get the actual value by additional
+    # session.
+    num_finalized_trees, num_attempted_layers = run_values.results
+    if (num_finalized_trees >= self._max_trees - 1 or
+        num_attempted_layers > 2 * self._max_trees * self._max_depth - 1):
+      num_finalized_trees, num_attempted_layers = run_context.session.run(
+          [self._num_finalized_trees_tensor, self._num_attempted_layers_tensor])
+    if (num_finalized_trees >= self._max_trees or
+        num_attempted_layers > 2 * self._max_trees * self._max_depth):
+      run_context.request_stop()
+
+
+def _get_max_splits(tree_hparams):
+  """Calculates the max possible number of splits based on tree params."""
+  # maximum number of splits possible in the whole tree =2^(D-1)-1
+  max_splits = (1 << tree_hparams.max_depth) - 1
+  return max_splits
+
+
+class _EnsembleGrower(object):
+  """Abstract base class for different types of ensemble growers.
+
+  Use it to receive training ops for growing and centering bias, depending
+  on the implementation (for example, in memory or accumulator-based
+  distributed):
+    grower = ...create subclass grower(tree_ensemble, tree_hparams)
+    grow_op = grower.grow_tree(stats_summaries_list, feature_ids_list,
+                               last_layer_nodes_range)
+    training_ops.append(grow_op)
+  """
+
+  def __init__(self, tree_ensemble, tree_hparams, feature_ids_list):
+    """Initializes a grower object.
+
+    Args:
+      tree_ensemble: A TreeEnsemble variable.
+      tree_hparams: TODO. collections.namedtuple for hyper parameters.
+      feature_ids_list: a list of lists of feature ids for each bucket size.
+
+    Raises:
+      ValueError: when pruning mode is invalid or pruning is used and no tree
+      complexity is set.
+    """
+    self._tree_ensemble = tree_ensemble
+    self._tree_hparams = tree_hparams
+    self._feature_ids_list = feature_ids_list
+    # pylint: disable=protected-access
+    self._pruning_mode_parsed = boosted_trees_ops.PruningMode.from_str(
+        tree_hparams.pruning_mode)
+
+    if tree_hparams.tree_complexity > 0:
+      if self._pruning_mode_parsed == boosted_trees_ops.PruningMode.NO_PRUNING:
+        raise ValueError(
+            'Tree complexity have no effect unless pruning mode is chosen.')
+    else:
+      if self._pruning_mode_parsed != boosted_trees_ops.PruningMode.NO_PRUNING:
+        raise ValueError('For pruning, tree_complexity must be positive.')
+    # pylint: enable=protected-access
+
+  @abc.abstractmethod
+  def center_bias(self, center_bias_var, gradients, hessians):
+    """Centers bias, if ready, based on statistics.
+
+    Args:
+      center_bias_var: A variable that will be updated when bias centering
+        finished.
+      gradients: A rank 2 tensor of gradients.
+      hessians: A rank 2 tensor of hessians.
+
+    Returns:
+      An operation for centering bias.
+    """
+
+  @abc.abstractmethod
+  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
+    """Grows a tree, if ready, based on provided statistics.
+
+    Args:
+      stats_summaries_list: List of stats summary tensors, representing sums of
+        gradients and hessians for each feature bucket.
+      last_layer_nodes_range: A tensor representing ids of the nodes in the
+        current layer, to be split.
+
+    Returns:
+      An op for growing a tree.
+    """
+
+  def chief_init_op(self):
+    """Ops that chief needs to run to initialize the state."""
+    return control_flow_ops.no_op()
+
+  #  ============= Helper methods ===========
+
+  def _center_bias_fn(self, center_bias_var, mean_gradients, mean_hessians):
+    """Updates the ensembles and cache (if needed) with logits prior."""
+    continue_centering = boosted_trees_ops.center_bias(
+        self._tree_ensemble.resource_handle,
+        mean_gradients=mean_gradients,
+        mean_hessians=mean_hessians,
+        l1=self._tree_hparams.l1,
+        l2=self._tree_hparams.l2)
+    return center_bias_var.assign(continue_centering)
+
+  def _grow_tree_from_stats_summaries(self, stats_summaries_list,
+                                      last_layer_nodes_range):
+    """Updates ensemble based on the best gains from stats summaries."""
+    node_ids_per_feature = []
+    gains_list = []
+    thresholds_list = []
+    left_node_contribs_list = []
+    right_node_contribs_list = []
+    all_feature_ids = []
+    assert len(stats_summaries_list) == len(self._feature_ids_list)
+
+    max_splits = _get_max_splits(self._tree_hparams)
+
+    for i, feature_ids in enumerate(self._feature_ids_list):
+      (numeric_node_ids_per_feature, numeric_gains_list,
+       numeric_thresholds_list, numeric_left_node_contribs_list,
+       numeric_right_node_contribs_list) = (
+           boosted_trees_ops.calculate_best_gains_per_feature(
+               node_id_range=last_layer_nodes_range,
+               stats_summary_list=stats_summaries_list[i],
+               l1=self._tree_hparams.l1,
+               l2=self._tree_hparams.l2,
+               tree_complexity=self._tree_hparams.tree_complexity,
+               min_node_weight=self._tree_hparams.min_node_weight,
+               max_splits=max_splits))
+
+      all_feature_ids += feature_ids
+      node_ids_per_feature += numeric_node_ids_per_feature
+      gains_list += numeric_gains_list
+      thresholds_list += numeric_thresholds_list
+      left_node_contribs_list += numeric_left_node_contribs_list
+      right_node_contribs_list += numeric_right_node_contribs_list
+
+    grow_op = boosted_trees_ops.update_ensemble(
+        # Confirm if local_tree_ensemble or tree_ensemble should be used.
+        self._tree_ensemble.resource_handle,
+        feature_ids=all_feature_ids,
+        node_ids=node_ids_per_feature,
+        gains=gains_list,
+        thresholds=thresholds_list,
+        left_node_contribs=left_node_contribs_list,
+        right_node_contribs=right_node_contribs_list,
+        learning_rate=self._tree_hparams.learning_rate,
+        max_depth=self._tree_hparams.max_depth,
+        pruning_mode=self._pruning_mode_parsed)
+    return grow_op
+
+
+class _InMemoryEnsembleGrower(_EnsembleGrower):
+  """An in-memory ensemble grower."""
+
+  def __init__(self, tree_ensemble, tree_hparams, feature_ids_list):
+
+    super(_InMemoryEnsembleGrower, self).__init__(
+        tree_ensemble=tree_ensemble, tree_hparams=tree_hparams,
+        feature_ids_list=feature_ids_list)
+
+  def center_bias(self, center_bias_var, gradients, hessians):
+    # For in memory, we already have a full batch of gradients and hessians,
+    # so just take a mean and proceed with centering.
+    mean_gradients = array_ops.expand_dims(
+        math_ops.reduce_mean(gradients, 0), 0)
+    mean_heassians = array_ops.expand_dims(math_ops.reduce_mean(hessians, 0), 0)
+    return self._center_bias_fn(center_bias_var, mean_gradients, mean_heassians)
+
+  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
+    # For in memory, we already have full data in one batch, so we can grow the
+    # tree immediately.
+    return self._grow_tree_from_stats_summaries(
+        stats_summaries_list, last_layer_nodes_range)
+
+
+class _AccumulatorEnsembleGrower(_EnsembleGrower):
+  """An accumulator based ensemble grower."""
+
+  def __init__(self, tree_ensemble, tree_hparams, stamp_token,
+               n_batches_per_layer, bucket_size_list, is_chief, center_bias,
+               feature_ids_list):
+    super(_AccumulatorEnsembleGrower, self).__init__(
+        tree_ensemble=tree_ensemble, tree_hparams=tree_hparams,
+        feature_ids_list=feature_ids_list)
+    self._stamp_token = stamp_token
+    self._n_batches_per_layer = n_batches_per_layer
+    self._bucket_size_list = bucket_size_list
+    self._is_chief = is_chief
+    self._growing_accumulators = []
+    self._chief_init_ops = []
+    max_splits = _get_max_splits(self._tree_hparams)
+    for i, feature_ids in enumerate(self._feature_ids_list):
+      accumulator = data_flow_ops.ConditionalAccumulator(
+          dtype=dtypes.float32,
+          # The stats consist of grads and hessians (the last dimension).
+          shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2],
+          shared_name='numeric_stats_summary_accumulator_' + str(i))
+      self._chief_init_ops.append(
+          accumulator.set_global_step(self._stamp_token))
+      self._growing_accumulators.append(accumulator)
+    self._center_bias = center_bias
+    if center_bias:
+      self._bias_accumulator = data_flow_ops.ConditionalAccumulator(
+          dtype=dtypes.float32,
+          # The stats consist of grads and hessians means only.
+          # TODO(nponomareva): this will change for a multiclass
+          shape=[2, 1],
+          shared_name='bias_accumulator')
+      self._chief_init_ops.append(
+          self._bias_accumulator.set_global_step(self._stamp_token))
+
+  def center_bias(self, center_bias_var, gradients, hessians):
+    # For not in memory situation, we need to accumulate enough of batches first
+    # before proceeding with centering bias.
+
+    # Create an accumulator.
+    if not self._center_bias:
+      raise RuntimeError('center_bias called but bias centering is disabled.')
+    bias_dependencies = []
+    grads_and_hess = array_ops.stack([gradients, hessians], axis=0)
+    grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1)
+
+    apply_grad = self._bias_accumulator.apply_grad(
+        grads_and_hess, self._stamp_token)
+    bias_dependencies.append(apply_grad)
+
+    # Center bias if enough batches were processed.
+    with ops.control_dependencies(bias_dependencies):
+      if not self._is_chief:
+        return control_flow_ops.no_op()
+      def _set_accumulators_stamp():
+        return control_flow_ops.group(
+            [acc.set_global_step(self._stamp_token + 1) for acc in
+             self._growing_accumulators])
+
+      def center_bias_from_accumulator():
+        accumulated = array_ops.unstack(self._bias_accumulator.take_grad(1),
+                                        axis=0)
+        center_bias_op = self._center_bias_fn(
+            center_bias_var,
+            array_ops.expand_dims(accumulated[0], 0),
+            array_ops.expand_dims(accumulated[1], 0))
+        with ops.control_dependencies([center_bias_op]):
+          return control_flow_ops.cond(center_bias_var,
+                                       control_flow_ops.no_op,
+                                       _set_accumulators_stamp)
+
+      center_bias_op = control_flow_ops.cond(
+          math_ops.greater_equal(self._bias_accumulator.num_accumulated(),
+                                 self._n_batches_per_layer),
+          center_bias_from_accumulator,
+          control_flow_ops.no_op,
+          name='wait_until_n_batches_for_bias_accumulated')
+      return center_bias_op
+
+  def grow_tree(self, stats_summaries_list, last_layer_nodes_range):
+    dependencies = []
+    for i in range(len(self._feature_ids_list)):
+      stats_summaries = stats_summaries_list[i]
+      apply_grad = self._growing_accumulators[i].apply_grad(
+          array_ops.stack(stats_summaries, axis=0), self._stamp_token)
+      dependencies.append(apply_grad)
+
+    # Grow the tree if enough batches is accumulated.
+    with ops.control_dependencies(dependencies):
+      if not self._is_chief:
+        return control_flow_ops.no_op()
+
+      min_accumulated = math_ops.reduce_min(
+          array_ops.stack([acc.num_accumulated() for acc in
+                           self._growing_accumulators]))
+
+      def grow_tree_from_accumulated_summaries_fn():
+        """Updates tree with the best layer from accumulated summaries."""
+        # Take out the accumulated summaries from the accumulator and grow.
+        stats_summaries_list = []
+        stats_summaries_list = [
+            array_ops.unstack(accumulator.take_grad(1), axis=0)
+            for accumulator in self._growing_accumulators
+        ]
+        grow_op = self._grow_tree_from_stats_summaries(
+            stats_summaries_list, last_layer_nodes_range
+        )
+        return grow_op
+
+      grow_model = control_flow_ops.cond(
+          math_ops.greater_equal(min_accumulated, self._n_batches_per_layer),
+          grow_tree_from_accumulated_summaries_fn,
+          control_flow_ops.no_op,
+          name='wait_until_n_batches_accumulated')
+      return grow_model
+
+  def chief_init_op(self):
+    """Ops that chief needs to run to initialize the state."""
+    return control_flow_ops.group(self._chief_init_ops)
+
+
+def _bt_model_fn(
+    features,
+    labels,
+    mode,
+    head,
+    feature_columns,
+    tree_hparams,
+    n_batches_per_layer,
+    config,
+    closed_form_grad_and_hess_fn=None,
+    example_id_column_name=None,
+    # TODO(youngheek): replace this later using other options.
+    train_in_memory=False,
+    name='boosted_trees'):
+  """Gradient Boosted Trees model_fn.
+
+  Args:
+    features: dict of `Tensor`.
+    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `head_lib._Head` instance.
+    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
+    tree_hparams: TODO. collections.namedtuple for hyper parameters.
+    n_batches_per_layer: A `Tensor` of `int64`. Each layer is built after at
+      least n_batches_per_layer accumulations.
+    config: `RunConfig` object to configure the runtime settings.
+    closed_form_grad_and_hess_fn: a function that accepts logits and labels
+      and returns gradients and hessians. By default, they are created by
+      tf.gradients() from the loss.
+    example_id_column_name: Name of the feature for a unique ID per example.
+      Currently experimental -- not exposed to public API.
+    train_in_memory: `bool`, when true, it assumes the dataset is in memory,
+      i.e., input_fn should return the entire dataset as a single batch, and
+      also n_batches_per_layer should be set as 1.
+    name: Name to use for the model.
+
+  Returns:
+      An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: mode or params are invalid, or features has the wrong type.
+  """
+  sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name)
+  with ops.name_scope(name) as name:
+    # Prepare.
+    global_step = training_util.get_or_create_global_step()
+    bucket_size_list, feature_ids_list = _group_features_by_num_buckets(
+        sorted_feature_columns)
+    # Create Ensemble resources.
+    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
+
+    # Create logits.
+    if mode != model_fn_lib.ModeKeys.TRAIN:
+      input_feature_list = _get_transformed_features(features,
+                                                     sorted_feature_columns)
+      logits = boosted_trees_ops.predict(
+          # For non-TRAIN mode, ensemble doesn't change after initialization,
+          # so no local copy is needed; using tree_ensemble directly.
+          tree_ensemble_handle=tree_ensemble.resource_handle,
+          bucketized_features=input_feature_list,
+          logits_dimension=head.logits_dimension)
+      return head.create_estimator_spec(
+          features=features,
+          mode=mode,
+          labels=labels,
+          train_op_fn=control_flow_ops.no_op,
+          logits=logits)
+
+    # ============== Training graph ==============
+    center_bias = tree_hparams.center_bias
+    is_single_machine = (config.num_worker_replicas <= 1)
+
+    if train_in_memory:
+      assert n_batches_per_layer == 1, (
+          'When train_in_memory is enabled, input_fn should return the entire '
+          'dataset as a single batch, and n_batches_per_layer should be set as '
+          '1.')
+      if (not config.is_chief or config.num_worker_replicas > 1 or
+          config.num_ps_replicas > 0):
+        raise ValueError('train_in_memory is supported only for '
+                         'non-distributed training.')
+    worker_device = control_flow_ops.no_op().device
+    train_op = []
+    # Extract input features and set up cache for training.
+    training_state_cache = None
+    if train_in_memory:
+      # cache transformed features as well for in-memory training.
+      batch_size = array_ops.shape(labels)[0]
+      input_feature_list, input_cache_op = (
+          _cache_transformed_features(features, sorted_feature_columns,
+                                      batch_size))
+      train_op.append(input_cache_op)
+      training_state_cache = _CacheTrainingStatesUsingVariables(
+          batch_size, head.logits_dimension)
+    else:
+      input_feature_list = _get_transformed_features(features,
+                                                     sorted_feature_columns)
+      if example_id_column_name:
+        example_ids = features[example_id_column_name]
+        training_state_cache = _CacheTrainingStatesUsingHashTable(
+            example_ids, head.logits_dimension)
+    if training_state_cache:
+      cached_tree_ids, cached_node_ids, cached_logits = (
+          training_state_cache.lookup())
+    else:
+      # Always start from the beginning when no cache is set up.
+      batch_size = array_ops.shape(labels)[0]
+      cached_tree_ids, cached_node_ids, cached_logits = (
+          array_ops.zeros([batch_size], dtype=dtypes.int32),
+          _DUMMY_NODE_ID * array_ops.ones([batch_size], dtype=dtypes.int32),
+          array_ops.zeros(
+              [batch_size, head.logits_dimension], dtype=dtypes.float32))
+
+    if is_single_machine:
+      local_tree_ensemble = tree_ensemble
+      ensemble_reload = control_flow_ops.no_op()
+    else:
+      # Have a local copy of ensemble for the distributed setting.
+      with ops.device(worker_device):
+        local_tree_ensemble = boosted_trees_ops.TreeEnsemble(
+            name=name + '_local', is_local=True)
+      # TODO(soroush): Do partial updates if this becomes a bottleneck.
+      ensemble_reload = local_tree_ensemble.deserialize(
+          *tree_ensemble.serialize())
+    with ops.control_dependencies([ensemble_reload]):
+      (stamp_token, num_trees, num_finalized_trees, num_attempted_layers,
+       last_layer_nodes_range) = local_tree_ensemble.get_states()
+      partial_logits, tree_ids, node_ids = boosted_trees_ops.training_predict(
+          tree_ensemble_handle=local_tree_ensemble.resource_handle,
+          cached_tree_ids=cached_tree_ids,
+          cached_node_ids=cached_node_ids,
+          bucketized_features=input_feature_list,
+          logits_dimension=head.logits_dimension)
+    logits = cached_logits + partial_logits
+
+    if train_in_memory:
+      grower = _InMemoryEnsembleGrower(tree_ensemble, tree_hparams,
+                                       feature_ids_list=feature_ids_list)
+    else:
+      grower = _AccumulatorEnsembleGrower(tree_ensemble, tree_hparams,
+                                          stamp_token, n_batches_per_layer,
+                                          bucket_size_list, config.is_chief,
+                                          center_bias=center_bias,
+                                          feature_ids_list=feature_ids_list)
+
+    summary.scalar('ensemble/num_trees', num_trees)
+    summary.scalar('ensemble/num_finalized_trees', num_finalized_trees)
+    summary.scalar('ensemble/num_attempted_layers', num_attempted_layers)
+
+    # Variable that determines whether bias centering is needed.
+    center_bias_var = variable_scope.variable(
+        initial_value=center_bias, name='center_bias_needed', trainable=False,
+        use_resource=True)
+    # Create training graph.
+    def _train_op_fn(loss):
+      """Run one training iteration."""
+      if training_state_cache:
+        # Cache logits only after center_bias is complete, if it's in progress.
+        train_op.append(
+            control_flow_ops.cond(
+                center_bias_var, control_flow_ops.no_op,
+                lambda: training_state_cache.insert(tree_ids, node_ids, logits))
+        )
+
+      if closed_form_grad_and_hess_fn:
+        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
+      else:
+        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
+        hessians = gradients_impl.gradients(
+            gradients, logits, name='Hessians')[0]
+
+      # TODO(youngheek): perhaps storage could be optimized by storing stats
+      # with the dimension max_splits_per_layer, instead of max_splits (for the
+      # entire tree).
+      max_splits = _get_max_splits(tree_hparams)
+
+      stats_summaries_list = []
+      for i, feature_ids in enumerate(feature_ids_list):
+        num_buckets = bucket_size_list[i]
+        summaries = [
+            array_ops.squeeze(
+                boosted_trees_ops.make_stats_summary(
+                    node_ids=node_ids,
+                    gradients=gradients,
+                    hessians=hessians,
+                    bucketized_features_list=[input_feature_list[f]],
+                    max_splits=max_splits,
+                    num_buckets=num_buckets),
+                axis=0) for f in feature_ids
+        ]
+        stats_summaries_list.append(summaries)
+      if center_bias:
+        update_model = control_flow_ops.cond(
+            center_bias_var,
+            functools.partial(
+                grower.center_bias,
+                center_bias_var,
+                gradients,
+                hessians,
+            ),
+            functools.partial(grower.grow_tree, stats_summaries_list,
+                              last_layer_nodes_range))
+      else:
+        update_model = grower.grow_tree(stats_summaries_list,
+                                        last_layer_nodes_range)
+      train_op.append(update_model)
+
+      with ops.control_dependencies([update_model]):
+        increment_global = state_ops.assign_add(global_step, 1).op
+        train_op.append(increment_global)
+
+      return control_flow_ops.group(train_op, name='train_op')
+
+  estimator_spec = head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_train_op_fn,
+      logits=logits)
+
+  # Add an early stop hook.
+  estimator_spec = estimator_spec._replace(
+      training_hooks=estimator_spec.training_hooks +
+      (_StopAtAttemptsHook(num_finalized_trees, num_attempted_layers,
+                           tree_hparams.n_trees, tree_hparams.max_depth),),
+      training_chief_hooks=[GrowerInitializationHook(grower.chief_init_op())] +
+      list(estimator_spec.training_chief_hooks))
+  return estimator_spec
+
+
+class GrowerInitializationHook(session_run_hook.SessionRunHook):
+  """A SessionRunHook handles initialization of `_EnsembleGrower`."""
+
+  def __init__(self, init_op):
+    self._init_op = init_op
+
+  def after_create_session(self, session, coord):
+    session.run(self._init_op)
+
+
+def _create_classification_head(n_classes,
+                                weight_column=None,
+                                label_vocabulary=None):
+  """Creates a classification head. Refer to canned.head for details on args."""
+  # TODO(nponomareva): Support multi-class cases.
+  if n_classes == 2:
+    # pylint: disable=protected-access
+    return head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column=weight_column,
+        label_vocabulary=label_vocabulary,
+        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    # pylint: enable=protected-access
+  else:
+    raise ValueError('For now only binary classification is supported.'
+                     'n_classes given as {}'.format(n_classes))
+
+
+def _create_classification_head_and_closed_form(n_classes, weight_column,
+                                                label_vocabulary):
+  """Creates a head for classifier and the closed form gradients/hessians."""
+  head = _create_classification_head(n_classes, weight_column, label_vocabulary)
+  if (n_classes == 2 and head.logits_dimension == 1 and
+      weight_column is None and label_vocabulary is None):
+    # Use the closed-form gradients/hessians for 2 class.
+    def _grad_and_hess_for_logloss(logits, labels):
+      """A closed form gradient and hessian for logistic loss."""
+      # TODO(youngheek): add weights handling.
+      predictions = math_ops.reciprocal(math_ops.exp(-logits) + 1.0)
+      normalizer = math_ops.reciprocal(
+          math_ops.cast(array_ops.size(predictions), dtypes.float32))
+      labels = math_ops.cast(labels, dtypes.float32)
+      labels = head_lib._check_dense_labels_match_logits_and_reshape(  # pylint: disable=protected-access
+          labels, logits, head.logits_dimension)
+      gradients = (predictions - labels) * normalizer
+      hessians = predictions * (1.0 - predictions) * normalizer
+      return gradients, hessians
+
+    closed_form = _grad_and_hess_for_logloss
+  else:
+    closed_form = None
+  return (head, closed_form)
+
+
+def _create_regression_head(label_dimension, weight_column=None):
+  if label_dimension != 1:
+    raise ValueError('For now only 1 dimension regression is supported.'
+                     'label_dimension given as {}'.format(label_dimension))
+  # pylint: disable=protected-access
+  return head_lib._regression_head(
+      label_dimension=label_dimension,
+      weight_column=weight_column,
+      loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+  # pylint: enable=protected-access
+
+
+def _compute_feature_importances_per_tree(tree, num_features):
+  """Computes the importance of each feature in the tree."""
+  importances = np.zeros(num_features)
+
+  for node in tree.nodes:
+    node_type = node.WhichOneof('node')
+    if node_type == 'bucketized_split':
+      feature_id = node.bucketized_split.feature_id
+      importances[feature_id] += node.metadata.gain
+    elif node_type == 'leaf':
+      assert node.metadata.gain == 0
+    else:
+      raise ValueError('Unexpected split type %s', node_type)
+
+  return importances
+
+
+def _compute_feature_importances(tree_ensemble, num_features, normalize):
+  """Computes gain-based feature importances.
+
+  The higher the value, the more important the feature.
+
+  Args:
+    tree_ensemble: a trained tree ensemble, instance of proto
+      boosted_trees.TreeEnsemble.
+    num_features: The total number of feature ids.
+    normalize: If True, normalize the feature importances.
+
+  Returns:
+    sorted_feature_idx: A list of feature_id which is sorted
+      by its feature importance.
+    feature_importances: A list of corresponding feature importances.
+
+  Raises:
+    AssertionError: When normalize = True, if feature importances
+      contain negative value, or if normalization is not possible
+      (e.g. ensemble is empty or trees contain only a root node).
+  """
+  tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
+                      for tree in tree_ensemble.trees]
+  tree_importances = np.array(tree_importances)
+  tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
+  feature_importances = np.sum(tree_importances * tree_weights, axis=0)
+  if normalize:
+    assert np.all(feature_importances >= 0), ('feature_importances '
+                                              'must be non-negative.')
+    normalizer = np.sum(feature_importances)
+    assert normalizer > 0, 'Trees are all empty or contain only a root node.'
+    feature_importances /= normalizer
+
+  sorted_feature_idx = np.argsort(feature_importances)[::-1]
+  return sorted_feature_idx, feature_importances[sorted_feature_idx]
+
+
+def _bt_explanations_fn(features,
+                        head,
+                        sorted_feature_columns,
+                        name='boosted_trees'):
+  """Gradient Boosted Trees predict with explanations model_fn.
+
+  Args:
+    features: dict of `Tensor`.
+    head: A `head_lib._Head` instance.
+    sorted_feature_columns: Sorted iterable of `feature_column._FeatureColumn`
+      model inputs.
+    name: Name used for the model.
+
+  Returns:
+      An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: mode or params are invalid, or features has the wrong type.
+  """
+  mode = model_fn_lib.ModeKeys.PREDICT
+  with ops.name_scope(name) as name:
+    # Create Ensemble resources.
+    tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
+
+    input_feature_list = _get_transformed_features(features,
+                                                   sorted_feature_columns)
+
+    logits = boosted_trees_ops.predict(
+        # For non-TRAIN mode, ensemble doesn't change after initialization,
+        # so no local copy is needed; using tree_ensemble directly.
+        tree_ensemble_handle=tree_ensemble.resource_handle,
+        bucketized_features=input_feature_list,
+        logits_dimension=head.logits_dimension)
+
+    estimator_spec = head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=None,
+        train_op_fn=control_flow_ops.no_op,
+        logits=logits)
+
+    debug_op = boosted_trees_ops.example_debug_outputs(
+        tree_ensemble.resource_handle,
+        bucketized_features=input_feature_list,
+        logits_dimension=head.logits_dimension)
+    estimator_spec.predictions[boosted_trees_utils._DEBUG_PROTO_KEY] = debug_op  # pylint: disable=protected-access
+    return estimator_spec
+
+
+class _BoostedTreesBase(estimator.Estimator):
+  """Base class for boosted trees estimators.
+
+  This class is intended to keep tree-specific functions (E.g., methods for
+  feature importances and directional feature contributions) in one central
+  place.
+
+  It is not a valid (working) Estimator on its own and should only be used as a
+  base class.
+  """
+
+  def __init__(self, model_fn, model_dir, config, feature_columns, head,
+               center_bias, is_classification):
+    """Initializes a `_BoostedTreesBase` instance.
+
+    Args:
+      model_fn: model_fn: Model function. See base class for more detail.
+      model_dir: Directory to save model parameters, graph and etc. See base
+        class for more detail.
+      config: `estimator.RunConfig` configuration object.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`
+      head: A `head_lib._Head` instance.
+      center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+      is_classification: If the estimator is for classification.
+    """
+    super(_BoostedTreesBase, self).__init__(
+        model_fn=model_fn, model_dir=model_dir, config=config)
+    self._sorted_feature_columns = sorted(
+        feature_columns, key=lambda tc: tc.name)
+    self._head = head
+    self._n_features = _calculate_num_features(self._sorted_feature_columns)
+    self._names_for_feature_id = np.array(
+        _generate_feature_name_mapping(self._sorted_feature_columns))
+    self._center_bias = center_bias
+    self._is_classification = is_classification
+
+  def experimental_feature_importances(self, normalize=False):
+    """Computes gain-based feature importances.
+
+    The higher the value, the more important the corresponding feature.
+
+    Args:
+      normalize: If True, normalize the feature importances.
+
+    Returns:
+      sorted_feature_names: 1-D array of feature name which is sorted
+        by its feature importance.
+      feature_importances: 1-D array of the corresponding feature importance.
+
+    Raises:
+      ValueError: When attempting to normalize on an empty ensemble
+        or an ensemble of trees which have no splits. Or when attempting
+        to normalize and feature importances have negative values.
+    """
+    reader = checkpoint_utils.load_checkpoint(self._model_dir)
+    serialized = reader.get_tensor('boosted_trees:0_serialized')
+    if not serialized:
+      raise ValueError('Found empty serialized string for TreeEnsemble.'
+                       'You should only call this method after training.')
+    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+    ensemble_proto.ParseFromString(serialized)
+
+    sorted_feature_id, importances = _compute_feature_importances(
+        ensemble_proto, self._n_features, normalize)
+    return self._names_for_feature_id[sorted_feature_id], importances
+
+  def experimental_predict_with_explanations(self,
+                                             input_fn,
+                                             predict_keys=None,
+                                             hooks=None,
+                                             checkpoint_path=None):
+    """Computes model explainability outputs per example along with predictions.
+
+    Currently supports directional feature contributions (DFCs). For each
+    instance, DFCs indicate the aggregate contribution of each feature. See
+    https://arxiv.org/abs/1312.1121 and
+    http://blog.datadive.net/interpreting-random-forests/ for more details.
+    Args:
+      input_fn: A function that provides input data for predicting as
+        minibatches. See [Premade Estimators](
+        https://tensorflow.org/guide/premade_estimators#create_input_functions)
+          for more information. The function should construct and return one of
+        the following:  * A `tf.data.Dataset` object: Outputs of `Dataset`
+          object must be a tuple `(features, labels)` with same constraints as
+        below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor`
+          or a dictionary of string feature name to `Tensor` and `labels` is a
+          `Tensor` or a dictionary of string label name to `Tensor`. Both
+          `features` and `labels` are consumed by `model_fn`. They should
+          satisfy the expectation of `model_fn` from inputs.
+      predict_keys: list of `str`, name of the keys to predict. It is used if
+        the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If
+        `predict_keys` is used then rest of the predictions will be filtered
+        from the dictionary, with the exception of 'bias' and 'dfc', which will
+        always be in the dictionary. If `None`, returns all keys in prediction
+        dict, as well as two new keys 'dfc' and 'bias'.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the prediction call.
+      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
+        latest checkpoint in `model_dir` is used.  If there are no checkpoints
+        in `model_dir`, prediction is run with newly initialized `Variables`
+        instead of ones restored from checkpoint.
+
+    Yields:
+      Evaluated values of `predictions` tensors. The `predictions` tensors will
+      contain at least two keys 'dfc' and 'bias' for model explanations. The
+      `dfc` value corresponds to the contribution of each feature to the overall
+      prediction for this instance (positive indicating that the feature makes
+      it more likely to select class 1 and negative less likely). The 'bias'
+      value will be the same across all the instances, corresponding to the
+      probability (classification) or prediction (regression) of the training
+      data distribution.
+
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+       are requested.
+    """
+    if not self._center_bias:
+      raise ValueError('center_bias must be enabled during estimator '
+                       'instantiation when using '
+                       'experimental_predict_with_explanations.')
+    # pylint: disable=protected-access
+    if not self._is_classification:
+      identity_inverse_link_fn = self._head._inverse_link_fn in (None,
+                                                                 tf_identity)
+      # pylint:enable=protected-access
+      if not identity_inverse_link_fn:
+        raise ValueError(
+            'For now only identity inverse_link_fn in regression_head is '
+            'supported for experimental_predict_with_explanations.')
+
+    # pylint:disable=unused-argument
+    def new_model_fn(features, labels, mode):
+      return _bt_explanations_fn(features, self._head,
+                                 self._sorted_feature_columns)
+
+    # pylint:enable=unused-argument
+    est = estimator.Estimator(
+        model_fn=new_model_fn,
+        model_dir=self.model_dir,
+        config=self.config,
+        warm_start_from=self._warm_start_settings)
+    # Make sure bias and dfc will be in prediction dict.
+    user_supplied_predict_keys = predict_keys is not None
+    if user_supplied_predict_keys:
+      predict_keys = set(predict_keys)
+      predict_keys.add(boosted_trees_utils._DEBUG_PROTO_KEY)
+    predictions = est.predict(
+        input_fn,
+        predict_keys=predict_keys,
+        hooks=hooks,
+        checkpoint_path=checkpoint_path,
+        yield_single_examples=True)
+    for pred in predictions:
+      bias, dfcs = boosted_trees_utils._parse_explanations_from_prediction(
+          pred[boosted_trees_utils._DEBUG_PROTO_KEY], self._n_features,
+          self._is_classification)
+      pred['bias'] = bias
+      pred['dfc'] = dfcs
+      # Don't need to expose serialized proto to end user.
+      del pred[boosted_trees_utils._DEBUG_PROTO_KEY]
+      yield pred
+
+
+# pylint: disable=protected-access
+@estimator_export('estimator.BoostedTreesClassifier')
+class BoostedTreesClassifier(_BoostedTreesBase):
+  """A Classifier for Tensorflow Boosted Trees models.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               feature_columns,
+               n_batches_per_layer,
+               model_dir=None,
+               n_classes=_HOLD_FOR_MULTI_CLASS_SUPPORT,
+               weight_column=None,
+               label_vocabulary=None,
+               n_trees=100,
+               max_depth=6,
+               learning_rate=0.1,
+               l1_regularization=0.,
+               l2_regularization=0.,
+               tree_complexity=0.,
+               min_node_weight=0.,
+               config=None,
+               center_bias=False,
+               pruning_mode='none'):
+    """Initializes a `BoostedTreesClassifier` instance.
+
+    Example:
+
+    ```python
+    bucketized_feature_1 = bucketized_column(
+      numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
+    bucketized_feature_2 = bucketized_column(
+      numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
+
+    # Need to see a large portion of the data before we can build a layer, for
+    # example half of data n_batches_per_layer = 0.5 * NUM_EXAMPLES / BATCH_SIZE
+    classifier = estimator.BoostedTreesClassifier(
+        feature_columns=[bucketized_feature_1, bucketized_feature_2],
+        n_batches_per_layer=n_batches_per_layer,
+        n_trees=100,
+        ... <some other params>
+    )
+
+    def input_fn_train():
+      ...
+      return dataset
+
+    classifier.train(input_fn=input_fn_train)
+
+    def input_fn_eval():
+      ...
+      return dataset
+
+    metrics = classifier.evaluate(input_fn=input_fn_eval)
+    ```
+
+    Args:
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      n_batches_per_layer: the number of batches to collect statistics per
+        layer. The total number of batches is total number of data divided by
+        batch size.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      n_classes: number of label classes. Default is binary classification.
+        Multiclass support is not yet implemented.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to downweight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are
+        already encoded as integer or float within [0, 1] for `n_classes=2` and
+        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+        Also there will be errors if vocabulary is not provided and labels are
+        string.
+      n_trees: number trees to be created.
+      max_depth: maximum depth of the tree to grow.
+      learning_rate: shrinkage parameter to be used when a tree added to the
+        model.
+      l1_regularization: regularization multiplier applied to the absolute
+        weights of the tree leafs.
+      l2_regularization: regularization multiplier applied to the square weights
+        of the tree leafs.
+      tree_complexity: regularization factor to penalize trees with more leaves.
+      min_node_weight: min_node_weight: minimum hessian a node must have for a
+        split to be considered. The value will be compared with
+        sum(leaf_hessian)/(batch_size * n_batches_per_layer).
+      config: `RunConfig` object to configure the runtime settings.
+      center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
+        pruning (do not split a node if not enough gain is observed) and post
+        pruning (build the tree up to a max depth and then prune branches with
+        negative gain). For pre and post pruning, you MUST provide
+        tree_complexity >0.
+
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+         are requested.
+    """
+    # TODO(nponomareva): Support multi-class cases.
+    if n_classes == _HOLD_FOR_MULTI_CLASS_SUPPORT:
+      n_classes = 2
+    head, closed_form = _create_classification_head_and_closed_form(
+        n_classes, weight_column, label_vocabulary=label_vocabulary)
+    # HParams for the model.
+    tree_hparams = _TreeHParams(
+        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
+        tree_complexity, min_node_weight, center_bias, pruning_mode)
+
+    def _model_fn(features, labels, mode, config):
+      return _bt_model_fn(
+          features,
+          labels,
+          mode,
+          head,
+          feature_columns,
+          tree_hparams,
+          n_batches_per_layer,
+          config,
+          closed_form_grad_and_hess_fn=closed_form)
+
+    super(BoostedTreesClassifier, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=True)
+
+
+@estimator_export('estimator.BoostedTreesRegressor')
+class BoostedTreesRegressor(_BoostedTreesBase):
+  """A Regressor for Tensorflow Boosted Trees models.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               feature_columns,
+               n_batches_per_layer,
+               model_dir=None,
+               label_dimension=_HOLD_FOR_MULTI_DIM_SUPPORT,
+               weight_column=None,
+               n_trees=100,
+               max_depth=6,
+               learning_rate=0.1,
+               l1_regularization=0.,
+               l2_regularization=0.,
+               tree_complexity=0.,
+               min_node_weight=0.,
+               config=None,
+               center_bias=False,
+               pruning_mode='none'):
+    """Initializes a `BoostedTreesRegressor` instance.
+
+    Example:
+
+    ```python
+    bucketized_feature_1 = bucketized_column(
+      numeric_column('feature_1'), BUCKET_BOUNDARIES_1)
+    bucketized_feature_2 = bucketized_column(
+      numeric_column('feature_2'), BUCKET_BOUNDARIES_2)
+
+    # Need to see a large portion of the data before we can build a layer, for
+    # example half of data n_batches_per_layer = 0.5 * NUM_EXAMPLES / BATCH_SIZE
+    regressor = estimator.BoostedTreesRegressor(
+        feature_columns=[bucketized_feature_1, bucketized_feature_2],
+        n_batches_per_layer=n_batches_per_layer,
+        n_trees=100,
+        ... <some other params>
+    )
+
+    def input_fn_train():
+      ...
+      return dataset
+
+    regressor.train(input_fn=input_fn_train)
+
+    def input_fn_eval():
+      ...
+      return dataset
+
+    metrics = regressor.evaluate(input_fn=input_fn_eval)
+    ```
+
+    Args:
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      n_batches_per_layer: the number of batches to collect statistics per
+        layer. The total number of batches is total number of data divided by
+        batch size.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      label_dimension: Number of regression targets per example.
+        Multi-dimensional support is not yet implemented.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to downweight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      n_trees: number trees to be created.
+      max_depth: maximum depth of the tree to grow.
+      learning_rate: shrinkage parameter to be used when a tree added to the
+        model.
+      l1_regularization: regularization multiplier applied to the absolute
+        weights of the tree leafs.
+      l2_regularization: regularization multiplier applied to the square weights
+        of the tree leafs.
+      tree_complexity: regularization factor to penalize trees with more leaves.
+      min_node_weight: min_node_weight: minimum hessian a node must have for a
+        split to be considered. The value will be compared with
+        sum(leaf_hessian)/(batch_size * n_batches_per_layer).
+      config: `RunConfig` object to configure the runtime settings.
+      center_bias: Whether bias centering needs to occur. Bias centering refers
+        to the first node in the very first tree returning the prediction that
+        is aligned with the original labels distribution. For example, for
+        regression problems, the first node will return the mean of the labels.
+        For binary classification problems, it will return a logit for a prior
+        probability of label 1.
+      pruning_mode: one of 'none', 'pre', 'post' to indicate no pruning, pre-
+        pruning (do not split a node if not enough gain is observed) and post
+        pruning (build the tree up to a max depth and then prune branches with
+        negative gain). For pre and post pruning, you MUST provide
+        tree_complexity >0.
+
+    Raises:
+      ValueError: when wrong arguments are given or unsupported functionalities
+         are requested.
+    """
+    # TODO(nponomareva): Extend it to multi-dimension cases.
+    if label_dimension == _HOLD_FOR_MULTI_DIM_SUPPORT:
+      label_dimension = 1
+    head = _create_regression_head(label_dimension, weight_column)
+
+    # HParams for the model.
+    tree_hparams = _TreeHParams(
+        n_trees, max_depth, learning_rate, l1_regularization, l2_regularization,
+        tree_complexity, min_node_weight, center_bias, pruning_mode)
+
+    def _model_fn(features, labels, mode, config):
+      return _bt_model_fn(features, labels, mode, head, feature_columns,
+                          tree_hparams, n_batches_per_layer, config)
+
+    super(BoostedTreesRegressor, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_columns=feature_columns,
+        head=head,
+        center_bias=center_bias,
+        is_classification=False)
 
-# Include attrs that start with single underscore.
-boosted_trees.__all__ = [
-    s for s in dir(boosted_trees) if not s.startswith('__')
-]
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.boosted_trees import *
+# pylint: enable=protected-access
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
new file mode 100644
index 0000000000..23687a738b
--- /dev/null
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -0,0 +1,2549 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests boosted_trees estimators and model_fn."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from google.protobuf import text_format
+import numpy as np
+
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator.canned import boosted_trees
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_boosted_trees_ops
+from tensorflow.python.ops import boosted_trees_ops
+from tensorflow.python.ops import resources
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import googletest
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import session_run_hook
+
+NUM_FEATURES = 3
+
+BUCKET_BOUNDARIES = [-2., .5, 12.]  # Boundaries for all the features.
+INPUT_FEATURES = np.array(
+    [
+        [12.5, 1.0, -2.001, -2.0001, -1.999],  # feature_0 quantized:[3,2,0,0,1]
+        [2.0, -3.0, 0.5, 0.0, 0.4995],         # feature_1 quantized:[2,0,2,1,1]
+        [3.0, 20.0, 50.0, -100.0, 102.75],     # feature_2 quantized:[2,3,3,0,3]
+    ],
+    dtype=np.float32)
+
+CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]]
+REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]]
+FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)}
+
+# EXAMPLE_ID is not exposed to Estimator yet, but supported at model_fn level.
+EXAMPLE_IDS = np.array([0, 1, 2, 3, 4], dtype=np.int64)
+EXAMPLE_ID_COLUMN = '__example_id__'
+
+
+def _make_train_input_fn(is_classification):
+  """Makes train input_fn for classification/regression."""
+
+  def _input_fn():
+    features_dict = dict(FEATURES_DICT)  # copies the dict to add an entry.
+    features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS)
+    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
+    return features_dict, labels
+
+  return _input_fn
+
+
+def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None):
+  """Makes input_fn using Dataset."""
+
+  def _input_fn():
+    features_dict = dict(FEATURES_DICT)  # copies the dict to add an entry.
+    features_dict[EXAMPLE_ID_COLUMN] = constant_op.constant(EXAMPLE_IDS)
+    labels = CLASSIFICATION_LABELS if is_classification else REGRESSION_LABELS
+    if batch:
+      ds = dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensor_slices(features_dict),
+           dataset_ops.Dataset.from_tensor_slices(labels))).batch(batch)
+    else:
+      ds = dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.from_tensors(features_dict),
+           dataset_ops.Dataset.from_tensors(labels)))
+    # repeat indefinitely by default, or stop at the given step.
+    ds = ds.repeat(repeat)
+    return ds
+
+  return _input_fn
+
+
+class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES)
+        for i in range(NUM_FEATURES)
+    }
+
+  def _assert_checkpoint(self, model_dir, global_step, finalized_trees,
+                         attempted_layers):
+    self._assert_checkpoint_and_return_model(model_dir, global_step,
+                                             finalized_trees, attempted_layers)
+
+  def _assert_checkpoint_and_return_model(self, model_dir, global_step,
+                                          finalized_trees, attempted_layers):
+    reader = checkpoint_utils.load_checkpoint(model_dir)
+    self.assertEqual(global_step, reader.get_tensor(ops.GraphKeys.GLOBAL_STEP))
+    serialized = reader.get_tensor('boosted_trees:0_serialized')
+    ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+    ensemble_proto.ParseFromString(serialized)
+
+    self.assertEqual(
+        finalized_trees,
+        sum([1 for t in ensemble_proto.tree_metadata if t.is_finalized]))
+    self.assertEqual(attempted_layers,
+                     ensemble_proto.growing_metadata.num_layers_attempted)
+
+    return ensemble_proto
+
+  def testFirstCheckpointWorksFine(self):
+    """Tests that eval/pred doesn't crash with the very first checkpoint.
+
+    The step-0 checkpoint will have only an empty ensemble, and a separate eval
+    job might read from it and crash.
+    This test ensures that prediction/evaluation works fine with it.
+    """
+    input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    class BailOutWithoutTraining(session_run_hook.SessionRunHook):
+
+      def before_run(self, run_context):
+        raise StopIteration('to bail out.')
+
+    est.train(input_fn, steps=100,  # must stop at 0 anyway.
+              hooks=[BailOutWithoutTraining()])
+    self._assert_checkpoint(
+        est.model_dir, global_step=0, finalized_trees=0, attempted_layers=0)
+    # Empty ensemble returns 0 logits, so that all output labels are 0.
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 0.6)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [0], [0], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testTrainAndEvaluateBinaryClassifier(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+
+  def testTrainTwiceAndEvaluateBinaryClassifier(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=5,
+        max_depth=10)
+
+    num_steps = 2
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    est.train(input_fn, steps=num_steps)
+
+    self._assert_checkpoint(
+        est.model_dir, global_step=num_steps * 2,
+        finalized_trees=0, attempted_layers=4)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+
+  def testInferBinaryClassifier(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(train_input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testTrainClassifierWithRankOneLabel(self):
+    """Tests that label with rank-1 tensor is also accepted by classifier."""
+    def _input_fn_with_rank_one_label():
+      return FEATURES_DICT, [0., 1., 1., 0., 0.]
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(_input_fn_with_rank_one_label, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+
+  def testTrainClassifierWithLabelVocabulary(self):
+    apple, banana = 'apple', 'banana'
+    def _input_fn_with_label_vocab():
+      return FEATURES_DICT, [[apple], [banana], [banana], [apple], [apple]]
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        label_vocabulary=[apple, banana])
+    est.train(input_fn=_input_fn_with_label_vocab, steps=5)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=_input_fn_with_label_vocab, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testTrainClassifierWithIntegerLabel(self):
+    def _input_fn_with_integer_label():
+      return (FEATURES_DICT,
+              constant_op.constant([[0], [1], [1], [0], [0]], dtypes.int32))
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    est.train(input_fn=_input_fn_with_integer_label, steps=5)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=_input_fn_with_integer_label, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testTrainClassifierWithDataset(self):
+    train_input_fn = _make_train_input_fn_dataset(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['accuracy'], 1.0)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose([[0], [1], [1], [0], [0]],
+                        [pred['class_ids'] for pred in predictions])
+
+  def testTrainAndEvaluateRegressor(self):
+    input_fn = _make_train_input_fn(is_classification=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    # It will stop after 10 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 1.008551)
+
+  def testInferRegressor(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(train_input_fn, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainRegressorWithRankOneLabel(self):
+    """Tests that label with rank-1 tensor is also accepted by regressor."""
+    def _input_fn_with_rank_one_label():
+      return FEATURES_DICT, [1.5, 0.3, 0.2, 2., 5.]
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(_input_fn_with_rank_one_label, steps=num_steps)
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+
+  def testTrainRegressorWithDataset(self):
+    train_input_fn = _make_train_input_fn_dataset(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainRegressorWithDatasetBatch(self):
+    # The batch_size as the entire data size should yield the same result as
+    # dataset without batching.
+    train_input_fn = _make_train_input_fn_dataset(
+        is_classification=False, batch=5)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainRegressorWithDatasetLargerBatch(self):
+    # The batch_size as the multiple of the entire data size should still yield
+    # the same result.
+    train_input_fn = _make_train_input_fn_dataset(
+        is_classification=False, batch=15)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
+    self._assert_checkpoint(
+        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainRegressorWithDatasetSmallerBatch(self):
+    # Even when using small batches, if (n_batches_per_layer * batch_size) makes
+    # the same entire data size, the result should be the same.
+    train_input_fn = _make_train_input_fn_dataset(
+        is_classification=False, batch=1)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=5,
+        n_trees=1,
+        max_depth=5)
+    # Train stops after (n_batches_per_layer * n_trees * max_depth) steps.
+    est.train(train_input_fn, steps=100)
+    self._assert_checkpoint(
+        est.model_dir, global_step=25, finalized_trees=1, attempted_layers=5)
+    # 5 batches = one epoch.
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=5)
+    self.assertAllClose(eval_res['average_loss'], 2.478283)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self):
+    train_input_fn = _make_train_input_fn_dataset(
+        is_classification=False, repeat=3)  # to stop input after 3 steps.
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+    # Note that training will stop when input exhausts.
+    # This might not be a typical pattern, but dataset.repeat(3) causes
+    # the input stream to cease after 3 steps.
+    est.train(train_input_fn, steps=100)
+    self._assert_checkpoint(
+        est.model_dir, global_step=3, finalized_trees=0, attempted_layers=3)
+    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
+    self.assertAllClose(eval_res['average_loss'], 3.777295)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+    self.assertAllClose(
+        [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]],
+        [pred['predictions'] for pred in predictions])
+
+  def testTrainEvaluateAndPredictWithIndicatorColumn(self):
+    categorical = feature_column.categorical_column_with_vocabulary_list(
+        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
+    feature_indicator = feature_column.indicator_column(categorical)
+    bucketized_col = feature_column.bucketized_column(
+        feature_column.numeric_column(
+            'an_uninformative_feature', dtype=dtypes.float32),
+        BUCKET_BOUNDARIES)
+
+    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
+    # Our categorical feature defines the labels perfectly
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'an_uninformative_feature': np.array([1, 1, 1, 1, 1]),
+            'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
+        },
+        y=labels,
+        batch_size=5,
+        shuffle=False)
+
+    # Train depth 1 tree.
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=[bucketized_col, feature_indicator],
+        n_batches_per_layer=1,
+        n_trees=1,
+        learning_rate=1.0,
+        max_depth=1)
+
+    num_steps = 1
+    est.train(input_fn, steps=num_steps)
+    ensemble = self._assert_checkpoint_and_return_model(
+        est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1)
+
+    # We learnt perfectly.
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['loss'], 0)
+
+    predictions = list(est.predict(input_fn))
+    self.assertAllClose(
+        labels,
+        [pred['predictions'] for pred in predictions])
+
+    self.assertEqual(3, len(ensemble.trees[0].nodes))
+
+    # Check that the split happened on 'good' value, which will be encoded as
+    # feature with index 2 (0-numeric, 1 - 'bad')
+    self.assertEqual(2, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
+    self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
+
+  def testTrainEvaluateAndPredictWithOnlyIndicatorColumn(self):
+    categorical = feature_column.categorical_column_with_vocabulary_list(
+        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
+    feature_indicator = feature_column.indicator_column(categorical)
+
+    labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
+    # Our categorical feature defines the labels perfectly
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']),
+        },
+        y=labels,
+        batch_size=5,
+        shuffle=False)
+
+    # Train depth 1 tree.
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=[feature_indicator],
+        n_batches_per_layer=1,
+        n_trees=1,
+        learning_rate=1.0,
+        max_depth=1)
+
+    num_steps = 1
+    est.train(input_fn, steps=num_steps)
+    ensemble = self._assert_checkpoint_and_return_model(
+        est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1)
+
+    # We learnt perfectly.
+    eval_res = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertAllClose(eval_res['loss'], 0)
+
+    predictions = list(est.predict(input_fn))
+    self.assertAllClose(
+        labels,
+        [pred['predictions'] for pred in predictions])
+
+    self.assertEqual(3, len(ensemble.trees[0].nodes))
+
+    # Check that the split happened on 'good' value, which will be encoded as
+    # feature with index 1 (0 - 'bad', 2 - 'ok')
+    self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
+    self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
+
+  def testFeatureImportancesWithTrainedEnsemble(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    # It will stop after 5 steps because of the max depth and num trees.
+    num_steps = 100
+    # Train for a few steps, and validate final checkpoint.
+    est.train(input_fn, steps=num_steps)
+
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.833933, 0.606342, 0.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.579010, 0.420990, 0.0], importances)
+
+  def testFeatureImportancesOnEmptyEnsemble(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    class BailOutWithoutTraining(session_run_hook.SessionRunHook):
+
+      def before_run(self, run_context):
+        raise StopIteration('to bail out.')
+
+    # The step-0 checkpoint will have only an empty ensemble.
+    est.train(input_fn,
+              steps=100,  # must stop at 0 anyway.
+              hooks=[BailOutWithoutTraining()])
+
+    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
+      est.experimental_feature_importances(normalize=False)
+
+    with self.assertRaisesRegexp(ValueError, 'empty serialized string'):
+      est.experimental_feature_importances(normalize=True)
+
+  def _create_fake_checkpoint_with_tree_ensemble_proto(self,
+                                                       est,
+                                                       tree_ensemble_text):
+    with ops.Graph().as_default():
+      with ops.name_scope('boosted_trees') as name:
+        tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name)
+        tree_ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+        text_format.Merge(tree_ensemble_text, tree_ensemble_proto)
+        stamp_token, _ = tree_ensemble.serialize()
+        restore_op = tree_ensemble.deserialize(
+            stamp_token, tree_ensemble_proto.SerializeToString())
+
+        with session.Session() as sess:
+          resources.initialize_resources(resources.shared_resources()).run()
+          restore_op.run()
+          saver = saver_lib.Saver()
+          save_path = os.path.join(est.model_dir, 'model.ckpt')
+          saver.save(sess, save_path)
+
+  def testFeatureImportancesOnNonEmptyEnsemble(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 3.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 7
+              right_id: 8
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 3.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 3.34
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 3.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    # Gain sum for each features:
+    # = 1.0 * [3 + 1, 2, 2] + 1.0 * [1, 1, 0]
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
+  def testFeatureImportancesWithTreeWeights(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=3,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 12.5
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 0.4
+        tree_weights: 0.6
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['f_0_bucketized',
+                              'f_2_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    # Gain sum for each features:
+    # = 0.4 * [12.5, 0, 5] + 0.6 * [0, 5, 0] + 1.0 * [0, 0, 0]
+    self.assertAllClose([5.0, 3.0, 2.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.3, 0.2], importances)
+
+  def testFeatureImportancesWithAllEmptyTree(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=2,
+        max_depth=5)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    # Reverse order because feature importances are sorted by np.argsort(f)[::-1]
+    feature_names_expected = ['f_2_bucketized',
+                              'f_1_bucketized',
+                              'f_0_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.0, 0.0, 0.0], importances)
+
+    with self.assertRaisesRegexp(AssertionError,
+                                 'all empty or contain only a root node'):
+      est.experimental_feature_importances(normalize=True)
+
+  def testNegativeFeatureImportances(self):
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5)
+
+    # In order to generate a negative feature importances,
+    # We assign an invalid value -1 to tree_weights here.
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+        }
+        tree_weights: -1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    # Github #21509 (nataliaponomareva):
+    # The gains stored in the splits can be negative
+    # if people are using complexity regularization.
+    feature_names_expected = ['f_2_bucketized',
+                              'f_0_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.0, 0.0, -5.0], importances)
+
+    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
+      est.experimental_feature_importances(normalize=True)
+
+  def testFeatureImportancesNamesForCategoricalColumn(self):
+    categorical = feature_column.categorical_column_with_vocabulary_list(
+        key='categorical', vocabulary_list=('bad', 'good', 'ok'))
+    feature_indicator = feature_column.indicator_column(categorical)
+    bucketized_col = feature_column.bucketized_column(
+        feature_column.numeric_column(
+            'continuous', dtype=dtypes.float32),
+        BUCKET_BOUNDARIES)
+    bucketized_indicator = feature_column.indicator_column(bucketized_col)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=[feature_indicator,
+                         bucketized_col,
+                         bucketized_indicator],
+        n_batches_per_layer=1,
+        n_trees=2,
+        learning_rate=1.0,
+        max_depth=1)
+
+    tree_ensemble_text = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 5.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 4
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.0
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 5
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -2.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 3.34
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 4.34
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        """
+    self._create_fake_checkpoint_with_tree_ensemble_proto(
+        est, tree_ensemble_text)
+
+    feature_names_expected = ['categorical_indicator:ok',
+                              'continuous_bucketized_indicator:(-2.0, 0.5)',
+                              'continuous_bucketized_indicator:(-inf, -2.0)',
+                              'categorical_indicator:bad',
+                              # Reverse order because feature importances
+                              # are sorted by np.argsort(f)[::-1]
+                              'continuous_bucketized_indicator:(12.0, inf)',
+                              'continuous_bucketized_indicator:(0.5, 12.0)',
+                              'continuous_bucketized',
+                              'categorical_indicator:good']
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    # Gain sum for each features:
+    # = 1.0 * [5, 0, 2, 0, 0, 0, 0, 0] + 1.0 * [0, 2, 0, 1, 0, 0, 0, 0]
+    self.assertAllClose([5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0], importances)
+
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=True)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances)
+
+  def testFeatureImportancesNamesForUnsupportedColumn(self):
+    numeric_col = feature_column.numeric_column(
+        'continuous', dtype=dtypes.float32)
+
+    with self.assertRaisesRegexp(ValueError,
+                                 'only bucketized_column and indicator_column'):
+      _ = boosted_trees.BoostedTreesRegressor(
+          feature_columns=[numeric_col],
+          n_batches_per_layer=1,
+          n_trees=2,
+          learning_rate=1.0,
+          max_depth=1)
+
+  def testTreeComplexityIsSetCorrectly(self):
+    input_fn = _make_train_input_fn(is_classification=True)
+
+    num_steps = 10
+    # Tree complexity is set but no pruning.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        tree_complexity=1e-3)
+    with self.assertRaisesRegexp(ValueError, 'Tree complexity have no effect'):
+      est.train(input_fn, steps=num_steps)
+
+    # Pruning but no tree complexity.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre')
+    with self.assertRaisesRegexp(ValueError,
+                                 'tree_complexity must be positive'):
+      est.train(input_fn, steps=num_steps)
+
+    # All is good.
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        pruning_mode='pre',
+        tree_complexity=1e-3)
+    est.train(input_fn, steps=num_steps)
+
+
+class BoostedTreesDebugOutputsTest(test_util.TensorFlowTestCase):
+  """Test debug/model explainability outputs for individual predictions.
+
+  Includes directional feature contributions (DFC).
+  """
+
+  def setUp(self):
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
+    }
+
+  def testBinaryClassifierThatDFCIsInPredictions(self):
+    train_input_fn = _make_train_input_fn(is_classification=True)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=3, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesClassifier(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([0.4] * 5, biases)
+    self.assertAllClose(({
+        0: -0.12108613453574479,
+        1: 0.0,
+        2: -0.039254929814481143
+    }, {
+        0: 0.19650601422250574,
+        1: 0.0,
+        2: 0.02693827052766018
+    }, {
+        0: 0.16057487356133376,
+        1: 0.0,
+        2: 0.02693827052766018
+    }, {
+        0: -0.12108613453574479,
+        1: 0.0,
+        2: -0.039254929814481143
+    }, {
+        0: -0.10832468554550384,
+        1: 0.0,
+        2: 0.02693827052766018
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == probabilities.
+    expected_probabilities = [
+        0.23965894, 0.62344426, 0.58751315, 0.23965894, 0.31861359
+    ]
+    probabilities = [
+        sum(dfc.values()) + bias for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_probabilities, probabilities)
+
+    # When user doesn't include bias or dfc in predict_keys, make sure to still
+    # include dfc and bias.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['probabilities'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('probabilities' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+  def testRegressorThatDFCIsInPredictions(self):
+    train_input_fn = _make_train_input_fn(is_classification=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)
+
+    est = boosted_trees.BoostedTreesRegressor(
+        feature_columns=self._feature_columns,
+        n_batches_per_layer=1,
+        n_trees=1,
+        max_depth=5,
+        center_bias=True)
+
+    num_steps = 100
+    # Train for a few steps. Validate debug outputs in prediction dicts.
+    est.train(train_input_fn, steps=num_steps)
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn)
+    biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
+                         for pred in debug_predictions])
+    self.assertAllClose([1.8] * 5, biases)
+    self.assertAllClose(({
+        0: -0.070499420166015625,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: -0.53763031959533691,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: -0.51756942272186279,
+        1: -0.095000028610229492,
+        2: 0.0
+    }, {
+        0: 0.1563495397567749,
+        1: 0.063333392143249512,
+        2: 0.0
+    }, {
+        0: 0.96934974193572998,
+        1: 0.063333392143249512,
+        2: 0.0
+    }), dfcs)
+
+    # Assert sum(dfcs) + bias == predictions.
+    expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
+                            [2.01968288], [2.83268309]]
+    predictions = [
+        [sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)
+    ]
+    self.assertAllClose(expected_predictions, predictions)
+
+    # Test when user doesn't include bias or dfc in predict_keys.
+    debug_predictions = est.experimental_predict_with_explanations(
+        predict_input_fn, predict_keys=['predictions'])
+    for prediction_dict in debug_predictions:
+      self.assertTrue('bias' in prediction_dict)
+      self.assertTrue('dfc' in prediction_dict)
+      self.assertTrue('predictions' in prediction_dict)
+      self.assertEqual(len(prediction_dict), 3)
+
+
+class ModelFnTests(test_util.TensorFlowTestCase):
+  """Tests bt_model_fn including unexposed internal functionalities."""
+
+  def setUp(self):
+    self._feature_columns = {
+        feature_column.bucketized_column(
+            feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32),
+            BUCKET_BOUNDARIES) for i in range(NUM_FEATURES)
+    }
+
+  def _get_expected_ensembles_for_classification(self):
+    first_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.387675
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.181818
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.0625
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 1
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    second_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.387675
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 3
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 0.0
+              original_leaf {
+                scalar: -0.181818
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.105518
+              original_leaf {
+                scalar: 0.0625
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.348397
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.181818
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.224091
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.056815
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 0
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 2
+          last_layer_node_start: 0
+          last_layer_node_end: 1
+        }
+        """
+    third_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.387675
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 3
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 0.0
+              original_leaf {
+                scalar: -0.181818
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.105518
+              original_leaf {
+                scalar: 0.0625
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.348397
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.181818
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.224091
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.056815
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.287131
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.162042
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.086986
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 3
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    return (first_round, second_round, third_round)
+
+  def _get_expected_ensembles_for_classification_with_bias(self):
+    first_round = """
+        trees {
+          nodes {
+            leaf {
+              scalar: -0.405086
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+        }
+        """
+    second_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.407711
+              original_leaf {
+                scalar: -0.405086
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.556054
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.301233
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 1
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    third_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.407711
+              original_leaf {
+                scalar: -0.405086
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 3
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              original_leaf {
+                scalar: -0.556054
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.09876
+              original_leaf {
+                scalar: -0.301233
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.698072
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.556054
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.106016
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.27349
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 2
+          last_layer_node_end: 1
+        }
+        """
+    forth_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.4077113
+              original_leaf {
+                scalar: -0.405086
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              threshold: 3
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              original_leaf {
+                scalar: -0.556054
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.09876
+              original_leaf {
+                scalar: -0.301233
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.698072
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.556054
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.106016
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.27349
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 2
+              threshold: 2
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.289927
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.134588
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.083838            
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 1
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 3
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    return (first_round, second_round, third_round, forth_round)
+
+  def _get_expected_ensembles_for_regression(self):
+    first_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.169714
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.241322
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.083951
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 1
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    second_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.169714
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.673407
+              original_leaf {
+                scalar: 0.241322
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.324102
+              original_leaf {
+                scalar: 0.083951
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.563167
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.247047
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.095273
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.222102
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 0
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 2
+          last_layer_node_start: 0
+          last_layer_node_end: 1
+        }
+        """
+    third_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.169714
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.673407
+              original_leaf {
+                scalar: 0.241322
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.324102
+              original_leaf {
+                scalar: 0.083951
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.563167
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.247047
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.095273
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.222102
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 0
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.981026
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.005166
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.180281
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 3
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    return (first_round, second_round, third_round)
+
+  def _get_expected_ensembles_for_regression_with_bias(self):
+    first_round = """
+        trees {
+          nodes {
+            leaf {
+              scalar: 1.799974
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+        }
+        """
+    second_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.190442
+              original_leaf {
+                scalar: 1.799974
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.862786
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.706149
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 1
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 1
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    third_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.190442
+              original_leaf {
+                scalar: 1.799974
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.683594
+              original_leaf {
+                scalar: 1.862786
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              feature_id: 0
+              threshold: 0
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.322693
+              original_leaf {
+                scalar: 1.706149
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 2.024487
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.710319
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.559208
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.686037
+            }
+          }
+        }
+        trees {
+          nodes {
+            leaf {
+              scalar: 0.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 0
+          is_finalized: false
+        }
+        growing_metadata {
+          num_trees_attempted: 1
+          num_layers_attempted: 2
+          last_layer_node_start: 0
+          last_layer_node_end: 1
+        }
+        """
+    forth_round = """
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              threshold: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 1.190442
+              original_leaf {
+                scalar:  1.799974
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              threshold: 1
+              left_id: 3
+              right_id: 4
+            }
+            metadata {
+              gain: 2.683594
+              original_leaf {
+                scalar: 1.8627863
+              }
+            }
+          }
+          nodes {
+            bucketized_split {
+              left_id: 5
+              right_id: 6
+            }
+            metadata {
+              gain: 0.322693
+              original_leaf {
+                scalar: 1.706149
+              }
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 2.024487
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.710319
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.5592078
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 1.686037
+            }
+          }
+        }
+        trees {
+          nodes {
+            bucketized_split {
+              feature_id: 1
+              left_id: 1
+              right_id: 2
+            }
+            metadata {
+              gain: 0.972589
+            }
+          }
+          nodes {
+            leaf {
+              scalar: -0.137592
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 0.034926
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_weights: 1.0
+        tree_metadata {
+          num_layers_grown: 2
+          is_finalized: true
+        }
+        tree_metadata {
+          num_layers_grown: 1
+        }
+        growing_metadata {
+          num_trees_attempted: 2
+          num_layers_attempted: 3
+          last_layer_node_start: 1
+          last_layer_node_end: 3
+        }
+        """
+    return (first_round, second_round, third_round, forth_round)
+
+  def _get_train_op_and_ensemble(self,
+                                 head,
+                                 config,
+                                 is_classification,
+                                 train_in_memory,
+                                 center_bias=False):
+    """Calls bt_model_fn() and returns the train_op and ensemble_serialzed."""
+    features, labels = _make_train_input_fn(is_classification)()
+
+    tree_hparams = boosted_trees._TreeHParams(  # pylint:disable=protected-access
+        n_trees=2,
+        max_depth=2,
+        learning_rate=0.1,
+        l1=0.,
+        l2=0.01,
+        tree_complexity=0.,
+        min_node_weight=0.,
+        center_bias=center_bias,
+        pruning_mode='none')
+
+    estimator_spec = boosted_trees._bt_model_fn(  # pylint:disable=protected-access
+        features=features,
+        labels=labels,
+        mode=model_fn.ModeKeys.TRAIN,
+        head=head,
+        feature_columns=self._feature_columns,
+        tree_hparams=tree_hparams,
+        example_id_column_name=EXAMPLE_ID_COLUMN,
+        n_batches_per_layer=1,
+        config=config,
+        train_in_memory=train_in_memory)
+    resources.initialize_resources(resources.shared_resources()).run()
+    variables.global_variables_initializer().run()
+    variables.local_variables_initializer().run()
+
+    # Gets the train_op and serialized proto of the ensemble.
+    shared_resources = resources.shared_resources()
+    self.assertEqual(1, len(shared_resources))
+    train_op = estimator_spec.train_op
+    with ops.control_dependencies([train_op]):
+      _, ensemble_serialized = (
+          gen_boosted_trees_ops.boosted_trees_serialize_ensemble(
+              shared_resources[0].handle))
+    return train_op, ensemble_serialized
+
+  def testTrainClassifierInMemory(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third = (
+        self._get_expected_ensembles_for_classification())
+    with self.cached_session() as sess:
+      # Train with train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_classification_head(n_classes=2),
+            run_config.RunConfig(),
+            is_classification=True,
+            train_in_memory=True)
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+  def testTrainClassifierWithCenterBiasInMemory(self):
+    ops.reset_default_graph()
+
+    # When bias centering is on, we expect the very first node to have the
+    expected_first, expected_second, expected_third, expected_forth = (
+        self._get_expected_ensembles_for_classification_with_bias())
+
+    with self.cached_session() as sess:
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_classification_head(n_classes=2),
+            run_config.RunConfig(),
+            is_classification=True,
+            train_in_memory=True,
+            center_bias=True)
+
+      # 4 iterations to center bias.
+      for _ in range(4):
+        _, serialized = sess.run([train_op, ensemble_serialized])
+
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+      # Forth round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+
+      self.assertProtoEquals(expected_forth, ensemble_proto)
+
+  def testTrainClassifierNonInMemory(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third = (
+        self._get_expected_ensembles_for_classification())
+    with self.cached_session() as sess:
+      # Train without train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_classification_head(n_classes=2),
+            run_config.RunConfig(),
+            is_classification=True,
+            train_in_memory=False)
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+  def testTrainClassifierWithCenterBiasNonInMemory(self):
+    ops.reset_default_graph()
+
+    # When bias centering is on, we expect the very first node to have the
+    expected_first, expected_second, expected_third, expected_forth = (
+        self._get_expected_ensembles_for_classification_with_bias())
+
+    with self.cached_session() as sess:
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_classification_head(n_classes=2),
+            run_config.RunConfig(),
+            is_classification=True,
+            train_in_memory=False,
+            center_bias=True)
+      # 4 iterations to center bias.
+      for _ in range(4):
+        _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+      # Forth round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_forth, ensemble_proto)
+
+  def testTrainRegressorInMemory(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third = (
+        self._get_expected_ensembles_for_regression())
+    with self.cached_session() as sess:
+      # Train with train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_regression_head(label_dimension=1),
+            run_config.RunConfig(),
+            is_classification=False,
+            train_in_memory=True)
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+  def testTrainRegressorInMemoryWithCenterBias(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third, expected_forth = (
+        self._get_expected_ensembles_for_regression_with_bias())
+    with self.cached_session() as sess:
+      # Train with train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_regression_head(label_dimension=1),
+            run_config.RunConfig(),
+            is_classification=False,
+            train_in_memory=True,
+            center_bias=True)
+      # 3 iterations to center bias.
+      for _ in range(3):
+        _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+      # Forth round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_forth, ensemble_proto)
+
+  def testTrainRegressorNonInMemory(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third = (
+        self._get_expected_ensembles_for_regression())
+    with self.cached_session() as sess:
+      # Train without train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_regression_head(label_dimension=1),
+            run_config.RunConfig(),
+            is_classification=False,
+            train_in_memory=False)
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+  def testTrainRegressorNotInMemoryWithCenterBias(self):
+    ops.reset_default_graph()
+    expected_first, expected_second, expected_third, expected_forth = (
+        self._get_expected_ensembles_for_regression_with_bias())
+    with self.cached_session() as sess:
+      # Train with train_in_memory mode.
+      with sess.graph.as_default():
+        train_op, ensemble_serialized = self._get_train_op_and_ensemble(
+            boosted_trees._create_regression_head(label_dimension=1),
+            run_config.RunConfig(),
+            is_classification=False,
+            train_in_memory=False,
+            center_bias=True)
+      # 3 iterations to center the bias (because we are using regularization).
+      for _ in range(3):
+        _, serialized = sess.run([train_op, ensemble_serialized])
+
+      # Validate the trained ensemble.
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_first, ensemble_proto)
+
+      # Run one more time and validate the trained ensemble.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_second, ensemble_proto)
+
+      # Third round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_third, ensemble_proto)
+
+      # Forth round training and validation.
+      _, serialized = sess.run([train_op, ensemble_serialized])
+      ensemble_proto = boosted_trees_pb2.TreeEnsemble()
+      ensemble_proto.ParseFromString(serialized)
+      self.assertProtoEquals(expected_forth, ensemble_proto)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils.py b/tensorflow/python/estimator/canned/boosted_trees_utils.py
index 0ff70ddff1..85efc2304a 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_utils.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_utils.py
@@ -12,23 +12,69 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""boosted_trees_utils python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Debug and model explainability logic for boosted trees."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import boosted_trees_utils
+import numpy as np
+
+from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2
+
+# For directional feature contributions.
+_DEBUG_PROTO_KEY = '_serialized_debug_outputs_proto'
+_BIAS_ID = 0
+
+
+def _parse_debug_proto_string(example_proto_serialized):
+  example_debug_outputs = boosted_trees_pb2.DebugOutput()
+  example_debug_outputs.ParseFromString(example_proto_serialized)
+  feature_ids = example_debug_outputs.feature_ids
+  logits_path = example_debug_outputs.logits_path
+  return feature_ids, logits_path
+
+
+def _compute_directional_feature_contributions(example_feature_ids,
+                                               example_logits_paths, activation,
+                                               num_bucketized_features):
+  """Directional feature contributions and bias, per example."""
+  # Initialize contributions to 0.
+  dfcs = {k: 0 for k in range(num_bucketized_features)}
+
+  # Traverse tree subtracting child prediction from parent prediction and
+  # associating change with feature id used to split.
+  predictions = np.array(activation(example_logits_paths))
+  delta_pred = predictions[_BIAS_ID + 1:] - predictions[:-1]
+  # Group by feature id, then sum delta_pred.
+  contribs = np.bincount(
+      example_feature_ids,
+      weights=delta_pred,
+      minlength=num_bucketized_features)
+  for f, dfc in zip(range(num_bucketized_features), contribs):
+    dfcs[f] = dfc
+  return predictions[_BIAS_ID], dfcs
+
+
+def _identity(logits):
+  return logits
+
+
+def _sigmoid(logits):
+  # TODO(crawles): Change to softmax once multiclass support is available.
+  return 1 / (1 + np.exp(-np.array(logits)))
 
-# Include attrs that start with single underscore.
-boosted_trees_utils.__all__ = [
-    s for s in dir(boosted_trees_utils) if not s.startswith('__')
-]
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.boosted_trees_utils import *
+def _parse_explanations_from_prediction(serialized_debug_proto,
+                                        n_features,
+                                        classification=False):
+  """Parse serialized explanability proto, compute dfc, and return bias, dfc."""
+  feature_ids, logits_path = _parse_debug_proto_string(serialized_debug_proto)
+  if classification:
+    activation = _sigmoid
+  else:
+    activation = _identity
+  bias, dfcs = _compute_directional_feature_contributions(
+      feature_ids, logits_path, activation, n_features)
+  # TODO(crawles): Prediction path and leaf IDs.
+  return bias, dfcs
diff --git a/tensorflow/python/estimator/canned/boosted_trees_utils_test.py b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
new file mode 100644
index 0000000000..506d4ea6fb
--- /dev/null
+++ b/tensorflow/python/estimator/canned/boosted_trees_utils_test.py
@@ -0,0 +1,187 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests boosted_trees estimators and model_fn."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator.canned import boosted_trees_utils
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+class BoostedTreesDFCTest(test_util.TensorFlowTestCase):
+  """Test directional feature contributions (DFC) helper functions. """
+
+  def testDirectionalFeatureContributionsCompute(self):
+    """Tests logic to compute DFCs given feature ids and logits paths."""
+    num_bucketized_features = 3  # Includes one unused feature.
+    examples_feature_ids = ((2, 2, 0, 0), (2, 2, 0))
+    e1_feature_ids, e2_feature_ids = examples_feature_ids
+
+    # DFCs are computed by traversing the prediction path and subtracting each
+    # child prediction from its parent prediction and associating the change in
+    # prediction with the respective feature id used for the split.
+    # For each activation function, f, (currently identity or sigmoid), DFCs are
+    # calculated for the two examples as:
+    # example 1:
+    #   feature_0 = (f(1.114) - f(1.214)) + (f(6.114) - f(1.114))
+    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
+    #   feature_2 = (f(0.114) - bias_pred) + (f(1.214) - f(0.114))
+    # example 2:
+    #   feature_0 = f(-5.486) - f(1.514)
+    #   feature_1 = 0  # Feature not in ensemble, thus zero contrib.
+    #   feature_2 = (f(0.114) - bias_pred) + (f(1.514) - f(0.114))
+    # where bias_pred is = f(0) or f(0.21), with center_bias = {True, False},
+    # respectively.
+    # Keys are center_bias.
+    expected_dfcs_identity = {
+        False: ({
+            0: 4.9,
+            1: 0,
+            2: 1.214
+        }, {
+            0: -7.0,
+            1: 0,
+            2: 1.514
+        }),
+        True: ({
+            0: 4.9,
+            1: 0,
+            2: 1.0039999999999998
+        }, {
+            0: -7.0,
+            1: 0,
+            2: 1.3039999999999998
+        })
+    }
+    expected_dfcs_sigmoid = {
+        False: ({
+            0: 0.22678725678805578,
+            1: 0,
+            2: 0.2710059376234506
+        }, {
+            0: -0.81552596670046507,
+            1: 0,
+            2: 0.319653250251275
+        }),
+        True: ({
+            0: 0.22678725678805578,
+            1: 0,
+            2: 0.2186980280491253
+        }, {
+            0: -0.81552596670046507,
+            1: 0,
+            2: 0.26734534067694971
+        })
+    }
+    # pylint: disable=protected-access
+    for f, expected_dfcs in zip(
+        (boosted_trees_utils._identity, boosted_trees_utils._sigmoid),
+        (expected_dfcs_identity, expected_dfcs_sigmoid)):
+      for center_bias in [False, True]:
+        # If not center_bias, the bias after activation is 0.
+        if center_bias:
+          bias_logit = 0.21  # Root node of tree_0.
+        else:
+          bias_logit = 0  # 0 is default value when there is no original_leaf.
+        f_bias = f(bias_logit)
+
+        # Logits before and after, as is outputed from
+        # boosted_trees_ops.example_debug_outputs
+        examples_logits_paths = ((bias_logit, 0.114, 1.214, 1.114, 6.114),
+                                 (bias_logit, 0.114, 1.514, -5.486))
+        e1_logits_path, e2_logits_path = examples_logits_paths
+        e1_expected_dfcs, e2_expected_dfcs = expected_dfcs[center_bias]
+        # Check feature contributions are correct for both examples.
+        # Example 1.
+        # pylint:disable=line-too-long
+        e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+            e1_feature_ids, e1_logits_path, f, num_bucketized_features)
+        self.assertAllClose(e1_bias, f_bias)
+        self.assertAllClose(e1_dfc, e1_expected_dfcs)
+        # Example 2.
+        e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+            e2_feature_ids, e2_logits_path, f, num_bucketized_features)
+        # pylint:enable=line-too-long
+        self.assertAllClose(e2_bias, f_bias)
+        self.assertAllClose(e2_dfc, e2_expected_dfcs)
+        # Check if contributions sum to final prediction.
+        # For each tree, get leaf of last tree.
+        expected_logits = (e1_logits_path[-1], e2_logits_path[-1])
+        # Predictions should be the sum of contributions + bias.
+        expected_preds = [f(logit) for logit in expected_logits]
+        e1_pred = e1_bias + sum(e1_dfc.values())
+        e2_pred = e2_bias + sum(e2_dfc.values())
+        preds = [e1_pred, e2_pred]
+        self.assertAllClose(preds, expected_preds)
+    # pylint: enable=protected-access
+
+  def testDFCComputeComparedToExternalExample(self):
+    """Tests `compute_dfc` compared to external example (regression).
+
+    Example from http://blog.datadive.net/interpreting-random-forests.
+    """
+    # DIS:3, RM: 2, LSTAT:1, NOX:0
+    num_bucketized_features = 4
+    e1_feature_ids = (2, 1, 0)
+    e2_feature_ids = (2, 2, 2)
+    e3_feature_ids = (2, 2, 0)
+
+    bias_logit = 22.60  # Root node of tree_0.
+    activation = boosted_trees_utils._identity
+    f_bias = activation(bias_logit)
+    # Logits before and after, as is outputed from
+    # boosted_trees_ops.example_debug_outputs
+    e1_logits_path = (bias_logit, 19.96, 14.91, 18.11)
+    e2_logits_path = (bias_logit, 37.42, 45.10, 45.90)
+    e3_logits_path = (bias_logit, 37.42, 32.30, 33.58)
+    e1_expected_dfcs = {0: 3.20, 1: -5.05, 2: -2.64, 3: 0}
+    e2_expected_dfcs = {0: 0, 1: 0, 2: 23.3, 3: 0}
+    e3_expected_dfcs = {0: 1.28, 1: 0, 2: 9.7, 3: 0}
+    # Check feature contributions are correct for both examples.
+    # Example 1.
+    # pylint: disable=protected-access
+    # pylint: disable=line-too-long
+    e1_bias, e1_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e1_feature_ids, e1_logits_path, activation, num_bucketized_features)
+    self.assertAllClose(e1_bias, f_bias)
+    self.assertAllClose(e1_dfc, e1_expected_dfcs)
+    # Example 2.
+    e2_bias, e2_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e2_feature_ids, e2_logits_path, activation, num_bucketized_features)
+    self.assertAllClose(e2_bias, f_bias)
+    self.assertAllClose(e2_dfc, e2_expected_dfcs)
+    # Example 3.
+    e3_bias, e3_dfc = boosted_trees_utils._compute_directional_feature_contributions(
+        e3_feature_ids, e3_logits_path, activation, num_bucketized_features)
+    # pylint: enable=line-too-long
+    self.assertAllClose(e3_bias, f_bias)
+    self.assertAllClose(e3_dfc, e3_expected_dfcs)
+    # pylint: enable=protected-access
+    # Check if contributions sum to final prediction.
+    # For each tree, get leaf of last tree.
+    expected_logits = (18.11, 45.90, 33.58)
+    # Predictions should be the sum of contributions + bias.
+    expected_preds = [activation(logit) for logit in expected_logits]
+    e1_pred = e1_bias + sum(e1_dfc.values())
+    e2_pred = e2_bias + sum(e2_dfc.values())
+    e3_pred = e3_bias + sum(e3_dfc.values())
+    preds = [e1_pred, e2_pred, e3_pred]
+    self.assertAllClose(preds, expected_preds)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
index 6b80bd5224..a6c2aaa7d9 100644
--- a/tensorflow/python/estimator/canned/dnn.py
+++ b/tensorflow/python/estimator/canned/dnn.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,649 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Deep Neural Network estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import dnn
+import six
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import ops
+from tensorflow.python.keras.engine import training
+from tensorflow.python.layers import core as core_layers
+from tensorflow.python.layers import normalization
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.summary import summary
+from tensorflow.python.util.tf_export import estimator_export
+
+# The default learning rate of 0.05 is a historical artifact of the initial
+# implementation, but seems a reasonable choice.
+_LEARNING_RATE = 0.05
+
+
+def _add_hidden_layer_summary(value, tag):
+  summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value))
+  summary.histogram('%s/activation' % tag, value)
+
+
+def _dnn_logit_fn_builder(units,
+                          hidden_units,
+                          feature_columns,
+                          activation_fn,
+                          dropout,
+                          input_layer_partitioner,
+                          batch_norm,
+                          shared_state_manager=None):
+  """Function builder for a dnn logit_fn.
+
+  Args:
+    units: An int indicating the dimension of the logit layer.  In the
+      MultiHead case, this should be the sum of all component Heads' logit
+      dimensions.
+    hidden_units: Iterable of integer number of hidden units per layer.
+    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
+    activation_fn: Activation function applied to each layer.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Partitioner for input layer.
+    batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
+
+  Returns:
+    A logit_fn (see below).
+
+  Raises:
+    ValueError: If units is not an int.
+  """
+  if not isinstance(units, int):
+    raise ValueError('units must be an int.  Given type: {}'.format(
+        type(units)))
+
+  def dnn_logit_fn(features, mode):
+    """Deep Neural Network logit_fn.
+
+    Args:
+      features: This is the first item returned from the `input_fn`
+                passed to `train`, `evaluate`, and `predict`. This should be a
+                single `Tensor` or `dict` of same.
+      mode: Optional. Specifies if this training, evaluation or prediction. See
+            `ModeKeys`.
+
+    Returns:
+      A `Tensor` representing the logits, or a list of `Tensor`'s representing
+      multiple logits in the MultiHead case.
+    """
+    dnn_model = _DNNModel(
+        units,
+        hidden_units,
+        feature_columns,
+        activation_fn,
+        dropout,
+        input_layer_partitioner,
+        batch_norm,
+        shared_state_manager,
+        name='dnn')
+    return dnn_model(features, mode)
+
+  return dnn_logit_fn
+
+
+def _get_previous_name_scope():
+  current_name_scope = ops.get_name_scope()
+  return current_name_scope.rsplit('/', 1)[0] + '/'
+
+
+class _DNNModel(training.Model):
+  """A DNN Model."""
+
+  def __init__(self,
+               units,
+               hidden_units,
+               feature_columns,
+               activation_fn,
+               dropout,
+               input_layer_partitioner,
+               batch_norm,
+               shared_state_manager,
+               name=None,
+               **kwargs):
+    super(_DNNModel, self).__init__(name=name, **kwargs)
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      self._input_layer = feature_column_v2.FeatureLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          shared_state_manager=shared_state_manager)
+    else:
+      self._input_layer = feature_column.InputLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          create_scope_now=False)
+
+    self._add_layer(self._input_layer, 'input_layer')
+
+    self._dropout = dropout
+    self._batch_norm = batch_norm
+
+    self._hidden_layers = []
+    self._dropout_layers = []
+    self._batch_norm_layers = []
+    self._hidden_layer_scope_names = []
+    for layer_id, num_hidden_units in enumerate(hidden_units):
+      with variable_scope.variable_scope(
+          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
+        hidden_layer = core_layers.Dense(
+            units=num_hidden_units,
+            activation=activation_fn,
+            kernel_initializer=init_ops.glorot_uniform_initializer(),
+            name=hidden_layer_scope,
+            _scope=hidden_layer_scope)
+        self._add_layer(hidden_layer, hidden_layer_scope.name)
+        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
+        self._hidden_layers.append(hidden_layer)
+        if self._dropout is not None:
+          dropout_layer = core_layers.Dropout(rate=self._dropout)
+          self._add_layer(dropout_layer, dropout_layer.name)
+          self._dropout_layers.append(dropout_layer)
+        if self._batch_norm:
+          batch_norm_layer = normalization.BatchNormalization(
+              # The default momentum 0.99 actually crashes on certain
+              # problem, so here we use 0.999, which is the default of
+              # tf.contrib.layers.batch_norm.
+              momentum=0.999,
+              trainable=True,
+              name='batchnorm_%d' % layer_id,
+              _scope='batchnorm_%d' % layer_id)
+          self._add_layer(batch_norm_layer, batch_norm_layer.name)
+          self._batch_norm_layers.append(batch_norm_layer)
+
+    with variable_scope.variable_scope('logits') as logits_scope:
+      self._logits_layer = core_layers.Dense(
+          units=units,
+          activation=None,
+          kernel_initializer=init_ops.glorot_uniform_initializer(),
+          name=logits_scope,
+          _scope=logits_scope)
+      self._add_layer(self._logits_layer, logits_scope.name)
+      self._logits_scope_name = logits_scope.name
+    self._input_layer_partitioner = input_layer_partitioner
+
+  def call(self, features, mode):
+    is_training = mode == model_fn.ModeKeys.TRAIN
+    # The Keras training.Model adds a name_scope with the name of the model
+    # which modifies the constructed graph. Hence we add another name_scope
+    # here which is the one before the training.Model one was applied.
+    # TODO(rohanj): Remove this in TF 2.0 (b/116728605)
+    with ops.name_scope(name=_get_previous_name_scope()):
+      # TODO(rohanj): Remove dependence on variable scope for partitioning.
+      with variable_scope.variable_scope(
+          'input_from_feature_columns',
+          partitioner=self._input_layer_partitioner):
+        net = self._input_layer(features)
+      for i in range(len(self._hidden_layers)):
+        net = self._hidden_layers[i](net)
+        if self._dropout is not None and is_training:
+          net = self._dropout_layers[i](net, training=True)
+        if self._batch_norm:
+          net = self._batch_norm_layers[i](net, training=is_training)
+        _add_hidden_layer_summary(net, self._hidden_layer_scope_names[i])
+
+      logits = self._logits_layer(net)
+      _add_hidden_layer_summary(logits, self._logits_scope_name)
+      return logits
+
+  def _add_layer(self, layer, layer_name):
+    # "Magic" required for keras.Model classes to track all the variables in
+    # a list of layers.Layer objects.
+    # TODO(ashankar): Figure out API so user code doesn't have to do this.
+    setattr(self, layer_name, layer)
+
+
+def _dnn_model_fn(features,
+                  labels,
+                  mode,
+                  head,
+                  hidden_units,
+                  feature_columns,
+                  optimizer='Adagrad',
+                  activation_fn=nn.relu,
+                  dropout=None,
+                  input_layer_partitioner=None,
+                  config=None,
+                  use_tpu=False,
+                  batch_norm=False,
+                  shared_state_manager=None):
+  """Deep Neural Net model_fn.
+
+  Args:
+    features: dict of `Tensor`.
+    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `head_lib._Head` instance.
+    hidden_units: Iterable of integer number of hidden units per layer.
+    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
+    optimizer: String, `tf.Optimizer` object, or callable that creates the
+      optimizer to use for training. If not specified, will use the Adagrad
+      optimizer with a default learning rate of 0.05.
+    activation_fn: Activation function applied to each layer.
+    dropout: When not `None`, the probability we will drop out a given
+      coordinate.
+    input_layer_partitioner: Partitioner for input layer. Defaults
+      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+    config: `RunConfig` object to configure the runtime settings.
+    use_tpu: Whether to make a DNN model able to run on TPU. Will make function
+      return a `_TPUEstimatorSpec` instance and disable variable partitioning.
+    batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
+
+  Returns:
+    An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: If features has the wrong type.
+  """
+  if not isinstance(features, dict):
+    raise ValueError('features should be a dictionary of `Tensor`s. '
+                     'Given type: {}'.format(type(features)))
+
+  optimizer = optimizers.get_optimizer_instance(
+      optimizer, learning_rate=_LEARNING_RATE)
+  num_ps_replicas = config.num_ps_replicas if config else 0
+
+  partitioner = (None if use_tpu else
+                 partitioned_variables.min_max_variable_partitioner(
+                     max_partitions=num_ps_replicas))
+  with variable_scope.variable_scope(
+      'dnn',
+      values=tuple(six.itervalues(features)),
+      partitioner=partitioner):
+    input_layer_partitioner = input_layer_partitioner or (
+        None if use_tpu else
+        partitioned_variables.min_max_variable_partitioner(
+            max_partitions=num_ps_replicas,
+            min_slice_size=64 << 20))
+
+    logit_fn = _dnn_logit_fn_builder(
+        units=head.logits_dimension,
+        hidden_units=hidden_units,
+        feature_columns=feature_columns,
+        activation_fn=activation_fn,
+        dropout=dropout,
+        input_layer_partitioner=input_layer_partitioner,
+        batch_norm=batch_norm,
+        shared_state_manager=shared_state_manager)
+    logits = logit_fn(features=features, mode=mode)
+
+    if use_tpu:
+      return head._create_tpu_estimator_spec(  # pylint: disable=protected-access
+          features=features,
+          mode=mode,
+          labels=labels,
+          optimizer=optimizer,
+          logits=logits)
+    else:
+      return head.create_estimator_spec(
+          features=features,
+          mode=mode,
+          labels=labels,
+          optimizer=optimizer,
+          logits=logits)
+
+
+@estimator_export('estimator.DNNClassifier')
+class DNNClassifier(estimator.Estimator):
+  """A classifier for TensorFlow DNN models.
+
+  Example:
+
+  ```python
+  categorical_feature_a = categorical_column_with_hash_bucket(...)
+  categorical_feature_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_column=categorical_feature_b, ...)
+
+  estimator = DNNClassifier(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256])
+
+  # Or estimator using the ProximalAdagradOptimizer optimizer with
+  # regularization.
+  estimator = DNNClassifier(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=tf.train.ProximalAdagradOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001
+      ))
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = DNNClassifier(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=lambda: tf.AdamOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = DNNClassifier(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      warm_start_from="/path/to/checkpoint/dir")
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using softmax cross entropy.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(
+      self,
+      hidden_units,
+      feature_columns,
+      model_dir=None,
+      n_classes=2,
+      weight_column=None,
+      label_vocabulary=None,
+      optimizer='Adagrad',
+      activation_fn=nn.relu,
+      dropout=None,
+      input_layer_partitioner=None,
+      config=None,
+      warm_start_from=None,
+      loss_reduction=losses.Reduction.SUM,
+      batch_norm=False,
+  ):
+    """Initializes a `DNNClassifier` instance.
+
+    Args:
+      hidden_units: Iterable of number hidden units per layer. All layers are
+        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
+        second one has 32.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `_FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      n_classes: Number of label classes. Defaults to 2, namely binary
+        classification. Must be > 1.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are
+        already encoded as integer or float within [0, 1] for `n_classes=2` and
+        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+        Also there will be errors if vocabulary is not provided and labels are
+        string.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      activation_fn: Activation function applied to each layer. If `None`, will
+        use `tf.nn.relu`.
+      dropout: When not `None`, the probability we will drop out a given
+        coordinate.
+      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
+        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      batch_norm: Whether to use batch normalization after each hidden layer.
+    """
+    head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
+        n_classes, weight_column, label_vocabulary, loss_reduction)
+
+    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+        feature_columns)
+
+    def _model_fn(features, labels, mode, config):
+      """Call the defined shared _dnn_model_fn."""
+      return _dnn_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          hidden_units=hidden_units,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          activation_fn=activation_fn,
+          dropout=dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
+
+    super(DNNClassifier, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
+
+
+@estimator_export('estimator.DNNRegressor')
+class DNNRegressor(estimator.Estimator):
+  """A regressor for TensorFlow DNN models.
+
+  Example:
+
+  ```python
+  categorical_feature_a = categorical_column_with_hash_bucket(...)
+  categorical_feature_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_column=categorical_feature_b, ...)
+
+  estimator = DNNRegressor(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256])
+
+  # Or estimator using the ProximalAdagradOptimizer optimizer with
+  # regularization.
+  estimator = DNNRegressor(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=tf.train.ProximalAdagradOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001
+      ))
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = DNNRegressor(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      optimizer=lambda: tf.AdamOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = DNNRegressor(
+      feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+      hidden_units=[1024, 512, 256],
+      warm_start_from="/path/to/checkpoint/dir")
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using mean squared error.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(
+      self,
+      hidden_units,
+      feature_columns,
+      model_dir=None,
+      label_dimension=1,
+      weight_column=None,
+      optimizer='Adagrad',
+      activation_fn=nn.relu,
+      dropout=None,
+      input_layer_partitioner=None,
+      config=None,
+      warm_start_from=None,
+      loss_reduction=losses.Reduction.SUM,
+      batch_norm=False,
+  ):
+    """Initializes a `DNNRegressor` instance.
+
+    Args:
+      hidden_units: Iterable of number hidden units per layer. All layers are
+        fully connected. Ex. `[64, 32]` means first layer has 64 nodes and
+        second one has 32.
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `_FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      activation_fn: Activation function applied to each layer. If `None`, will
+        use `tf.nn.relu`.
+      dropout: When not `None`, the probability we will drop out a given
+        coordinate.
+      input_layer_partitioner: Optional. Partitioner for input layer. Defaults
+        to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      batch_norm: Whether to use batch normalization after each hidden layer.
+    """
+
+    shared_state_manager = None
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
 
-# Include attrs that start with single underscore.
-dnn.__all__ = [s for s in dir(dnn) if not s.startswith('__')]
+    def _model_fn(features, labels, mode, config):
+      """Call the defined shared _dnn_model_fn."""
+      return _dnn_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head_lib._regression_head(  # pylint: disable=protected-access
+              label_dimension=label_dimension,
+              weight_column=weight_column,
+              loss_reduction=loss_reduction),
+          hidden_units=hidden_units,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          activation_fn=activation_fn,
+          dropout=dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.dnn import *
+    super(DNNRegressor, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
index 7d6b4a4bb1..f712244c8d 100644
--- a/tensorflow/python/estimator/canned/dnn_linear_combined.py
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,621 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn_linear_combined python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""TensorFlow estimators for Linear and DNN joined training models."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import dnn_linear_combined
+import math
+
+import six
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import linear
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.summary import summary
+from tensorflow.python.training import sync_replicas_optimizer
+from tensorflow.python.training import training_util
+from tensorflow.python.util.tf_export import estimator_export
+
+# The default learning rates are a historical artifact of the initial
+# implementation.
+_DNN_LEARNING_RATE = 0.001
+_LINEAR_LEARNING_RATE = 0.005
+
+
+def _check_no_sync_replicas_optimizer(optimizer):
+  if isinstance(optimizer, sync_replicas_optimizer.SyncReplicasOptimizer):
+    raise ValueError(
+        'SyncReplicasOptimizer does not support multi optimizers case. '
+        'Therefore, it is not supported in DNNLinearCombined model. '
+        'If you want to use this optimizer, please use either DNN or Linear '
+        'model.')
+
+
+def _linear_learning_rate(num_linear_feature_columns):
+  """Returns the default learning rate of the linear model.
+
+  The calculation is a historical artifact of this initial implementation, but
+  has proven a reasonable choice.
+
+  Args:
+    num_linear_feature_columns: The number of feature columns of the linear
+      model.
+
+  Returns:
+    A float.
+  """
+  default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
+  return min(_LINEAR_LEARNING_RATE, default_learning_rate)
+
+
+def _add_layer_summary(value, tag):
+  summary.scalar('%s/fraction_of_zero_values' % tag, nn.zero_fraction(value))
+  summary.histogram('%s/activation' % tag, value)
+
+
+def _dnn_linear_combined_model_fn(features,
+                                  labels,
+                                  mode,
+                                  head,
+                                  linear_feature_columns=None,
+                                  linear_optimizer='Ftrl',
+                                  dnn_feature_columns=None,
+                                  dnn_optimizer='Adagrad',
+                                  dnn_hidden_units=None,
+                                  dnn_activation_fn=nn.relu,
+                                  dnn_dropout=None,
+                                  input_layer_partitioner=None,
+                                  config=None,
+                                  batch_norm=False,
+                                  linear_sparse_combiner='sum'):
+  """Deep Neural Net and Linear combined model_fn.
+
+  Args:
+    features: dict of `Tensor`.
+    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
+      `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `Head` instance.
+    linear_feature_columns: An iterable containing all the feature columns used
+      by the Linear model.
+    linear_optimizer: string, `Optimizer` object, or callable that defines the
+      optimizer to use for training the Linear model. Defaults to the Ftrl
+      optimizer.
+    dnn_feature_columns: An iterable containing all the feature columns used by
+      the DNN model.
+    dnn_optimizer: string, `Optimizer` object, or callable that defines the
+      optimizer to use for training the DNN model. Defaults to the Adagrad
+      optimizer.
+    dnn_hidden_units: List of hidden units per DNN layer.
+    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
+      will use `tf.nn.relu`.
+    dnn_dropout: When not `None`, the probability we will drop out a given DNN
+      coordinate.
+    input_layer_partitioner: Partitioner for input layer.
+    config: `RunConfig` object to configure the runtime settings.
+    batch_norm: Whether to use batch normalization after each hidden layer.
+    linear_sparse_combiner: A string specifying how to reduce the linear model
+      if a categorical column is multivalent.  One of "mean", "sqrtn", and
+      "sum".
+  Returns:
+    An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
+      are empty at the same time, or `input_layer_partitioner` is missing,
+      or features has the wrong type.
+  """
+  if not isinstance(features, dict):
+    raise ValueError('features should be a dictionary of `Tensor`s. '
+                     'Given type: {}'.format(type(features)))
+  if not linear_feature_columns and not dnn_feature_columns:
+    raise ValueError(
+        'Either linear_feature_columns or dnn_feature_columns must be defined.')
+
+  num_ps_replicas = config.num_ps_replicas if config else 0
+  input_layer_partitioner = input_layer_partitioner or (
+      partitioned_variables.min_max_variable_partitioner(
+          max_partitions=num_ps_replicas,
+          min_slice_size=64 << 20))
+
+  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+      list(linear_feature_columns) + list(dnn_feature_columns))
+
+  # Build DNN Logits.
+  dnn_parent_scope = 'dnn'
+
+  if not dnn_feature_columns:
+    dnn_logits = None
+  else:
+    dnn_optimizer = optimizers.get_optimizer_instance(
+        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
+    _check_no_sync_replicas_optimizer(dnn_optimizer)
+    if not dnn_hidden_units:
+      raise ValueError(
+          'dnn_hidden_units must be defined when dnn_feature_columns is '
+          'specified.')
+    dnn_partitioner = (
+        partitioned_variables.min_max_variable_partitioner(
+            max_partitions=num_ps_replicas))
+    with variable_scope.variable_scope(
+        dnn_parent_scope,
+        values=tuple(six.itervalues(features)),
+        partitioner=dnn_partitioner) as scope:
+      dnn_absolute_scope = scope.name
+      dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
+          units=head.logits_dimension,
+          hidden_units=dnn_hidden_units,
+          feature_columns=dnn_feature_columns,
+          activation_fn=dnn_activation_fn,
+          dropout=dnn_dropout,
+          batch_norm=batch_norm,
+          input_layer_partitioner=input_layer_partitioner,
+          shared_state_manager=shared_state_manager)
+      dnn_logits = dnn_logit_fn(features=features, mode=mode)
+
+  linear_parent_scope = 'linear'
+
+  if not linear_feature_columns:
+    linear_logits = None
+  else:
+    linear_optimizer = optimizers.get_optimizer_instance(
+        linear_optimizer,
+        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
+    _check_no_sync_replicas_optimizer(linear_optimizer)
+    with variable_scope.variable_scope(
+        linear_parent_scope,
+        values=tuple(six.itervalues(features)),
+        partitioner=input_layer_partitioner) as scope:
+      linear_absolute_scope = scope.name
+      logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
+          units=head.logits_dimension,
+          feature_columns=linear_feature_columns,
+          sparse_combiner=linear_sparse_combiner)
+      linear_logits = logit_fn(features=features)
+      _add_layer_summary(linear_logits, scope.name)
+
+  # Combine logits and build full model.
+  if dnn_logits is not None and linear_logits is not None:
+    logits = dnn_logits + linear_logits
+  elif dnn_logits is not None:
+    logits = dnn_logits
+  else:
+    logits = linear_logits
+
+  def _train_op_fn(loss):
+    """Returns the op to optimize the loss."""
+    train_ops = []
+    global_step = training_util.get_global_step()
+    if dnn_logits is not None:
+      train_ops.append(
+          dnn_optimizer.minimize(
+              loss,
+              var_list=ops.get_collection(
+                  ops.GraphKeys.TRAINABLE_VARIABLES,
+                  scope=dnn_absolute_scope)))
+    if linear_logits is not None:
+      train_ops.append(
+          linear_optimizer.minimize(
+              loss,
+              var_list=ops.get_collection(
+                  ops.GraphKeys.TRAINABLE_VARIABLES,
+                  scope=linear_absolute_scope)))
+
+    train_op = control_flow_ops.group(*train_ops)
+    with ops.control_dependencies([train_op]):
+      return state_ops.assign_add(global_step, 1).op
+
+  return head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      labels=labels,
+      train_op_fn=_train_op_fn,
+      logits=logits)
+
+
+@estimator_export('estimator.DNNLinearCombinedClassifier')
+class DNNLinearCombinedClassifier(estimator.Estimator):
+  """An estimator for TensorFlow Linear and DNN joined classification models.
+
+  Note: This estimator is also known as wide-n-deep.
+
+  Example:
+
+  ```python
+  numeric_feature = numeric_column(...)
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_id_column=categorical_feature_b, ...)
+
+  estimator = DNNLinearCombinedClassifier(
+      # wide settings
+      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
+      linear_optimizer=tf.train.FtrlOptimizer(...),
+      # deep settings
+      dnn_feature_columns=[
+          categorical_feature_a_emb, categorical_feature_b_emb,
+          numeric_feature],
+      dnn_hidden_units=[1000, 500, 100],
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
+      # warm-start settings
+      warm_start_from="/path/to/checkpoint/dir")
+
+  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
+  tf.train.ProximalAdagradOptimizer(
+      learning_rate=0.1,
+      l1_regularization_strength=0.001,
+      l2_regularization_strength=0.001)
+  # To apply learning rate decay, you can set dnn_optimizer to a callable:
+  lambda: tf.AdamOptimizer(
+      learning_rate=tf.exponential_decay(
+          learning_rate=0.1,
+          global_step=tf.get_global_step(),
+          decay_steps=10000,
+          decay_rate=0.96)
+  # It is the same for linear_optimizer.
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using softmax cross entropy.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               model_dir=None,
+               linear_feature_columns=None,
+               linear_optimizer='Ftrl',
+               dnn_feature_columns=None,
+               dnn_optimizer='Adagrad',
+               dnn_hidden_units=None,
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               n_classes=2,
+               weight_column=None,
+               label_vocabulary=None,
+               input_layer_partitioner=None,
+               config=None,
+               warm_start_from=None,
+               loss_reduction=losses.Reduction.SUM,
+               batch_norm=False,
+               linear_sparse_combiner='sum'):
+    """Initializes a DNNLinearCombinedClassifier instance.
+
+    Args:
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      linear_feature_columns: An iterable containing all the feature columns
+        used by linear part of the model. All items in the set must be
+        instances of classes derived from `FeatureColumn`.
+      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the linear part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
+        optimizer.
+      dnn_feature_columns: An iterable containing all the feature columns used
+        by deep part of the model. All items in the set must be instances of
+        classes derived from `FeatureColumn`.
+      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the deep part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
+        optimizer.
+      dnn_hidden_units: List of hidden units per layer. All layers are fully
+        connected.
+      dnn_activation_fn: Activation function applied to each layer. If None,
+        will use `tf.nn.relu`.
+      dnn_dropout: When not None, the probability we will drop out
+        a given coordinate.
+      n_classes: Number of label classes. Defaults to 2, namely binary
+        classification. Must be > 1.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are
+        already encoded as integer or float within [0, 1] for `n_classes=2` and
+        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+        Also there will be errors if vocabulary is not provided and labels are
+        string.
+      input_layer_partitioner: Partitioner for input layer. Defaults to
+        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: RunConfig object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      batch_norm: Whether to use batch normalization after each hidden layer.
+      linear_sparse_combiner: A string specifying how to reduce the linear model
+        if a categorical column is multivalent.  One of "mean", "sqrtn", and
+        "sum" -- these are effectively different ways to do example-level
+        normalization, which can be useful for bag-of-words features.  For more
+        details, see `tf.feature_column.linear_model`.
+
+    Raises:
+      ValueError: If both linear_feature_columns and dnn_features_columns are
+        empty at the same time.
+    """
+    linear_feature_columns = linear_feature_columns or []
+    dnn_feature_columns = dnn_feature_columns or []
+    self._feature_columns = (
+        list(linear_feature_columns) + list(dnn_feature_columns))
+    if not self._feature_columns:
+      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
+                       'must be defined.')
+    if n_classes == 2:
+      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+    else:
+      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
+          n_classes,
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+
+    def _model_fn(features, labels, mode, config):
+      """Call the _dnn_linear_combined_model_fn."""
+      return _dnn_linear_combined_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          linear_feature_columns=linear_feature_columns,
+          linear_optimizer=linear_optimizer,
+          dnn_feature_columns=dnn_feature_columns,
+          dnn_optimizer=dnn_optimizer,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          batch_norm=batch_norm,
+          linear_sparse_combiner=linear_sparse_combiner)
+
+    super(DNNLinearCombinedClassifier, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
+
+
+@estimator_export('estimator.DNNLinearCombinedRegressor')
+class DNNLinearCombinedRegressor(estimator.Estimator):
+  """An estimator for TensorFlow Linear and DNN joined models for regression.
+
+  Note: This estimator is also known as wide-n-deep.
+
+  Example:
+
+  ```python
+  numeric_feature = numeric_column(...)
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+  categorical_feature_a_emb = embedding_column(
+      categorical_column=categorical_feature_a, ...)
+  categorical_feature_b_emb = embedding_column(
+      categorical_column=categorical_feature_b, ...)
+
+  estimator = DNNLinearCombinedRegressor(
+      # wide settings
+      linear_feature_columns=[categorical_feature_a_x_categorical_feature_b],
+      linear_optimizer=tf.train.FtrlOptimizer(...),
+      # deep settings
+      dnn_feature_columns=[
+          categorical_feature_a_emb, categorical_feature_b_emb,
+          numeric_feature],
+      dnn_hidden_units=[1000, 500, 100],
+      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...),
+      # warm-start settings
+      warm_start_from="/path/to/checkpoint/dir")
+
+  # To apply L1 and L2 regularization, you can set dnn_optimizer to:
+  tf.train.ProximalAdagradOptimizer(
+      learning_rate=0.1,
+      l1_regularization_strength=0.001,
+      l2_regularization_strength=0.001)
+  # To apply learning rate decay, you can set dnn_optimizer to a callable:
+  lambda: tf.AdamOptimizer(
+      learning_rate=tf.exponential_decay(
+          learning_rate=0.1,
+          global_step=tf.get_global_step(),
+          decay_steps=10000,
+          decay_rate=0.96)
+  # It is the same for linear_optimizer.
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    pass
+  estimator.train(input_fn=input_fn_train, steps=100)
+
+  def input_fn_eval: # returns x, y
+    pass
+  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
+  def input_fn_predict: # returns x, None
+    pass
+  predictions = estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+  otherwise there will be a `KeyError`:
+
+  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
+    - if `column` is a `_CategoricalColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `_WeightedCategoricalColumn`, two features: the first
+      with `key` the id column name, the second with `key` the weight column
+      name. Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `_DenseColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using mean squared error.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               model_dir=None,
+               linear_feature_columns=None,
+               linear_optimizer='Ftrl',
+               dnn_feature_columns=None,
+               dnn_optimizer='Adagrad',
+               dnn_hidden_units=None,
+               dnn_activation_fn=nn.relu,
+               dnn_dropout=None,
+               label_dimension=1,
+               weight_column=None,
+               input_layer_partitioner=None,
+               config=None,
+               warm_start_from=None,
+               loss_reduction=losses.Reduction.SUM,
+               batch_norm=False,
+               linear_sparse_combiner='sum'):
+    """Initializes a DNNLinearCombinedRegressor instance.
+
+    Args:
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      linear_feature_columns: An iterable containing all the feature columns
+        used by linear part of the model. All items in the set must be
+        instances of classes derived from `FeatureColumn`.
+      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the linear part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to FTRL
+        optimizer.
+      dnn_feature_columns: An iterable containing all the feature columns used
+        by deep part of the model. All items in the set must be instances of
+        classes derived from `FeatureColumn`.
+      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the deep part of the model. Can also be a string (one of 'Adagrad',
+        'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad
+        optimizer.
+      dnn_hidden_units: List of hidden units per layer. All layers are fully
+        connected.
+      dnn_activation_fn: Activation function applied to each layer. If None,
+        will use `tf.nn.relu`.
+      dnn_dropout: When not None, the probability we will drop out
+        a given coordinate.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      input_layer_partitioner: Partitioner for input layer. Defaults to
+        `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
+      config: RunConfig object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      batch_norm: Whether to use batch normalization after each hidden layer.
+      linear_sparse_combiner: A string specifying how to reduce the linear model
+        if a categorical column is multivalent.  One of "mean", "sqrtn", and
+        "sum" -- these are effectively different ways to do example-level
+        normalization, which can be useful for bag-of-words features.  For more
+        details, see `tf.feature_column.linear_model`.
+
+    Raises:
+      ValueError: If both linear_feature_columns and dnn_features_columns are
+        empty at the same time.
+    """
+    linear_feature_columns = linear_feature_columns or []
+    dnn_feature_columns = dnn_feature_columns or []
+    self._feature_columns = (
+        list(linear_feature_columns) + list(dnn_feature_columns))
+    if not self._feature_columns:
+      raise ValueError('Either linear_feature_columns or dnn_feature_columns '
+                       'must be defined.')
 
-# Include attrs that start with single underscore.
-dnn_linear_combined.__all__ = [
-    s for s in dir(dnn_linear_combined) if not s.startswith('__')
-]
+    def _model_fn(features, labels, mode, config):
+      """Call the _dnn_linear_combined_model_fn."""
+      return _dnn_linear_combined_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head_lib._regression_head(  # pylint: disable=protected-access
+              label_dimension=label_dimension, weight_column=weight_column,
+              loss_reduction=loss_reduction),
+          linear_feature_columns=linear_feature_columns,
+          linear_optimizer=linear_optimizer,
+          dnn_feature_columns=dnn_feature_columns,
+          dnn_optimizer=dnn_optimizer,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          input_layer_partitioner=input_layer_partitioner,
+          config=config,
+          batch_norm=batch_norm,
+          linear_sparse_combiner=linear_sparse_combiner)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.dnn_linear_combined import *
+    super(DNNLinearCombinedRegressor, self).__init__(
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
new file mode 100644
index 0000000000..ab945d7b1a
--- /dev/null
+++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py
@@ -0,0 +1,1123 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn_linear_combined.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+from absl.testing import parameterized
+import numpy as np
+import six
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import dnn_linear_combined
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import optimizer as optimizer_lib
+
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+class DNNOnlyModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(self, self._dnn_only_model_fn)
+
+  def _dnn_only_model_fn(self,
+                         features,
+                         labels,
+                         mode,
+                         head,
+                         hidden_units,
+                         feature_columns,
+                         optimizer='Adagrad',
+                         activation_fn=nn.relu,
+                         dropout=None,
+                         input_layer_partitioner=None,
+                         config=None):
+    return dnn_linear_combined._dnn_linear_combined_model_fn(
+        features=features,
+        labels=labels,
+        mode=mode,
+        head=head,
+        linear_feature_columns=[],
+        dnn_hidden_units=hidden_units,
+        dnn_feature_columns=feature_columns,
+        dnn_optimizer=optimizer,
+        dnn_activation_fn=activation_fn,
+        dnn_dropout=dropout,
+        input_layer_partitioner=input_layer_partitioner,
+        config=config)
+
+
+# A function to mimic linear-regressor init reuse same tests.
+def _linear_regressor_fn(feature_columns,
+                         model_dir=None,
+                         label_dimension=1,
+                         weight_column=None,
+                         optimizer='Ftrl',
+                         config=None,
+                         partitioner=None,
+                         sparse_combiner='sum'):
+  return dnn_linear_combined.DNNLinearCombinedRegressor(
+      model_dir=model_dir,
+      linear_feature_columns=feature_columns,
+      linear_optimizer=optimizer,
+      label_dimension=label_dimension,
+      weight_column=weight_column,
+      input_layer_partitioner=partitioner,
+      config=config,
+      linear_sparse_combiner=sparse_combiner)
+
+
+class LinearOnlyRegressorPartitionerTest(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorPartitionerV2Test(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearOnlyRegressorEvaluationTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorEvaluationV2Test(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearOnlyRegressorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorPredictV2Test(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearOnlyRegressorIntegrationTest(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorIntegrationV2Test(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearOnlyRegressorTrainingTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearOnlyRegressorTrainingV2Test(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+def _linear_classifier_fn(feature_columns,
+                          model_dir=None,
+                          n_classes=2,
+                          weight_column=None,
+                          label_vocabulary=None,
+                          optimizer='Ftrl',
+                          config=None,
+                          partitioner=None,
+                          sparse_combiner='sum'):
+  return dnn_linear_combined.DNNLinearCombinedClassifier(
+      model_dir=model_dir,
+      linear_feature_columns=feature_columns,
+      linear_optimizer=optimizer,
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      input_layer_partitioner=partitioner,
+      config=config,
+      linear_sparse_combiner=sparse_combiner)
+
+
+class LinearOnlyClassifierTrainingTest(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierTrainingV2Test(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearOnlyClassifierClassesEvaluationTest(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierClassesEvaluationV2Test(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearOnlyClassifierPredictTest(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierPredictV2Test(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearOnlyClassifierIntegrationTest(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearOnlyClassifierIntegrationV2Test(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNLinearCombinedRegressorIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow_helper(
+      self, linear_feature_columns, dnn_feature_columns, feature_spec,
+      train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, batch_size):
+    est = dnn_linear_combined.DNNLinearCombinedRegressor(
+        linear_feature_columns=linear_feature_columns,
+        dnn_hidden_units=(2, 2),
+        dnn_feature_columns=dnn_feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    linear_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix1(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_complete_flow_mix2(self, train_input_fn, eval_input_fn,
+                               predict_input_fn, input_dimension,
+                               label_dimension, batch_size, fc_impl):
+    del fc_impl
+    linear_feature_columns = [
+        feature_column_v2.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    self._test_complete_flow_helper(linear_feature_columns, dnn_feature_columns,
+                                    feature_spec, train_input_fn, eval_input_fn,
+                                    predict_input_fn, input_dimension,
+                                    label_dimension, batch_size)
+
+  def _test_numpy_input_fn_helper(self, fc_impl, fn_to_run):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    fn_to_run(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_numpy_input_fn_basic(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_numpy_input_fn_mix1(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_numpy_input_fn_mix2(self, fc_impl):
+    self._test_numpy_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    label_dimension = 1
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    fn_to_run(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_pandas_input_fn_basic(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow)
+
+  def test_pandas_input_fn_mix1(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix1)
+
+  def test_pandas_input_fn_mix2(self, fc_impl):
+    self._test_pandas_input_fn_helper(fc_impl, self._test_complete_flow_mix2)
+
+  def _test_input_fn_from_parse_example_helper(self, fc_impl, fn_to_run):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+              'y': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    fn_to_run(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_input_fn_from_parse_example_basic(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow)
+
+  def test_input_fn_from_parse_example_mix1(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix1)
+
+  def test_input_fn_from_parse_example_mix2(self, fc_impl):
+    self._test_input_fn_from_parse_example_helper(fc_impl,
+                                                  self._test_complete_flow_mix2)
+
+
+# A function to mimic dnn-classifier init reuse same tests.
+def _dnn_classifier_fn(hidden_units,
+                       feature_columns,
+                       model_dir=None,
+                       n_classes=2,
+                       weight_column=None,
+                       label_vocabulary=None,
+                       optimizer='Adagrad',
+                       config=None,
+                       input_layer_partitioner=None):
+  return dnn_linear_combined.DNNLinearCombinedClassifier(
+      model_dir=model_dir,
+      dnn_hidden_units=hidden_units,
+      dnn_feature_columns=feature_columns,
+      dnn_optimizer=optimizer,
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config)
+
+
+class DNNOnlyClassifierEvaluateTest(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierEvaluateV2Test(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+class DNNOnlyClassifierPredictTest(
+    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierPredictV2Test(
+    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+class DNNOnlyClassifierTrainTest(
+    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNOnlyClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
+                                   test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+# A function to mimic dnn-regressor init reuse same tests.
+def _dnn_regressor_fn(hidden_units,
+                      feature_columns,
+                      model_dir=None,
+                      label_dimension=1,
+                      weight_column=None,
+                      optimizer='Adagrad',
+                      config=None,
+                      input_layer_partitioner=None):
+  return dnn_linear_combined.DNNLinearCombinedRegressor(
+      model_dir=model_dir,
+      dnn_hidden_units=hidden_units,
+      dnn_feature_columns=feature_columns,
+      dnn_optimizer=optimizer,
+      label_dimension=label_dimension,
+      weight_column=weight_column,
+      input_layer_partitioner=input_layer_partitioner,
+      config=config)
+
+
+class DNNOnlyRegressorEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNOnlyRegressorEvaluateV2Test(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+class DNNOnlyRegressorPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNOnlyRegressorPredictV2Test(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+class DNNOnlyRegressorTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNOnlyRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
+                                  test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNLinearCombinedClassifierIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _as_label(self, data_in_float):
+    return np.rint(data_in_float).astype(np.int64)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size, fc_impl):
+    linear_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    dnn_feature_columns = [
+        fc_impl.numeric_column('x', shape=(input_dimension,))
+    ]
+    feature_columns = linear_feature_columns + dnn_feature_columns
+    est = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=linear_feature_columns,
+        dnn_hidden_units=(2, 2),
+        dnn_feature_columns=dnn_feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    # EXPORT
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self, fc_impl):
+    """Tests complete flow with numpy_input_fn."""
+    n_classes = 3
+    input_dimension = 2
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = self._as_label(np.reshape(data[:batch_size], (batch_size, 1)))
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_pandas_input_fn(self, fc_impl):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    input_dimension = 1
+    n_classes = 2
+    batch_size = 10
+    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(self._as_label(data))
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_input_fn_from_parse_example(self, fc_impl):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    input_dimension = 2
+    n_classes = 3
+    batch_size = 10
+    data = np.linspace(0., n_classes-1., batch_size * input_dimension,
+                       dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum)),
+              'y':
+                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
+                      value=self._as_label(datum[:1]))),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
+    }
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = linear_testing_utils.queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNLinearCombinedTests(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, real_optimizer, var_name_prefix):
+    """Verifies global_step is None and var_names start with given prefix."""
+
+    def _minimize(loss, global_step=None, var_list=None):
+      self.assertIsNone(global_step)
+      trainable_vars = var_list or ops.get_collection(
+          ops.GraphKeys.TRAINABLE_VARIABLES)
+      var_names = [var.name for var in trainable_vars]
+      self.assertTrue(
+          all([name.startswith(var_name_prefix) for name in var_names]))
+      # var is used to check this op called by training.
+      with ops.name_scope(''):
+        var = variables_lib.Variable(0., name=(var_name_prefix + '_called'))
+      with ops.control_dependencies([var.assign(100.)]):
+        return real_optimizer.minimize(loss, global_step, var_list)
+
+    optimizer_mock = test.mock.NonCallableMagicMock(
+        spec=optimizer_lib.Optimizer, wraps=real_optimizer)
+    optimizer_mock.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    return optimizer_mock
+
+  def test_train_op_calls_both_dnn_and_linear(self, fc_impl):
+    opt = gradient_descent.GradientDescentOptimizer(1.)
+    x_column = fc_impl.numeric_column('x')
+    input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[0.], [1.]])},
+        y=np.array([[0.], [1.]]),
+        batch_size=1,
+        shuffle=False)
+    est = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=[x_column],
+        # verifies linear_optimizer is used only for linear part.
+        linear_optimizer=self._mock_optimizer(opt, 'linear'),
+        dnn_hidden_units=(2, 2),
+        dnn_feature_columns=[x_column],
+        # verifies dnn_optimizer is used only for linear part.
+        dnn_optimizer=self._mock_optimizer(opt, 'dnn'),
+        model_dir=self._model_dir)
+    est.train(input_fn, steps=1)
+    # verifies train_op fires linear minimize op
+    self.assertEqual(100.,
+                     checkpoint_utils.load_variable(
+                         self._model_dir, 'linear_called'))
+    # verifies train_op fires dnn minimize op
+    self.assertEqual(100.,
+                     checkpoint_utils.load_variable(
+                         self._model_dir, 'dnn_called'))
+
+  def test_dnn_and_linear_logits_are_added(self, fc_impl):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights')
+      variables_lib.Variable([2.0], name='linear/linear_model/bias_weights')
+      variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel')
+      variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias')
+      variables_lib.Variable([[5.0]], name='dnn/logits/kernel')
+      variables_lib.Variable([6.0], name='dnn/logits/bias')
+      variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
+      linear_testing_utils.save_variables_to_ckpt(self._model_dir)
+
+    x_column = fc_impl.numeric_column('x')
+    est = dnn_linear_combined.DNNLinearCombinedRegressor(
+        linear_feature_columns=[x_column],
+        dnn_hidden_units=[1],
+        dnn_feature_columns=[x_column],
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
+    # linear logits = 10*1 + 2 = 12
+    # dnn logits = (10*3 + 4)*5 + 6 = 176
+    # logits = dnn + linear = 176 + 12 = 188
+    self.assertAllClose(
+        {
+            prediction_keys.PredictionKeys.PREDICTIONS: [188.],
+        },
+        next(est.predict(input_fn=input_fn)))
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNLinearCombinedWarmStartingTest(test.TestCase):
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'age': [[23.], [31.]],
+          'city': [['Palo Alto'], ['Mountain View']],
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def test_classifier_basic_warm_starting(self, fc_impl):
+    """Tests correctness of DNNLinearCombinedClassifier default warm-start."""
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
+    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_classifier = (
+        dnn_linear_combined.DNNLinearCombinedClassifier(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            n_classes=4,
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            warm_start_from=dnn_lc_classifier.model_dir))
+
+    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
+      self.assertAllClose(
+          dnn_lc_classifier.get_variable_value(variable_name),
+          warm_started_dnn_lc_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self, fc_impl):
+    """Tests correctness of DNNLinearCombinedRegressor default warm-start."""
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedRegressor and train to save a checkpoint.
+    dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedRegressor, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_regressor = (
+        dnn_linear_combined.DNNLinearCombinedRegressor(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            warm_start_from=dnn_lc_regressor.model_dir))
+
+    warm_started_dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_regressor.get_variable_names():
+      self.assertAllClose(
+          dnn_lc_regressor.get_variable_value(variable_name),
+          warm_started_dnn_lc_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self, fc_impl):
+    """Tests selecting variables to warm-start."""
+    age = fc_impl.numeric_column('age')
+    city = fc_impl.embedding_column(
+        fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNLinearCombinedClassifier and train to save a checkpoint.
+    dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
+        linear_feature_columns=[age],
+        dnn_feature_columns=[city],
+        dnn_hidden_units=[256, 128],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        linear_optimizer='SGD',
+        dnn_optimizer='SGD')
+    dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNLinearCombinedClassifier, warm-started from the first.
+    # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
+    # have accumulator values that change).
+    warm_started_dnn_lc_classifier = (
+        dnn_linear_combined.DNNLinearCombinedClassifier(
+            linear_feature_columns=[age],
+            dnn_feature_columns=[city],
+            dnn_hidden_units=[256, 128],
+            n_classes=4,
+            linear_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            dnn_optimizer=gradient_descent.GradientDescentOptimizer(
+                learning_rate=0.0),
+            # The provided regular expression will only warm-start the deep
+            # portion of the model.
+            warm_start_from=estimator.WarmStartSettings(
+                ckpt_to_initialize_from=dnn_lc_classifier.model_dir,
+                vars_to_warm_start='.*(dnn).*')))
+
+    warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_lc_classifier.get_variable_names():
+      if 'dnn' in variable_name:
+        self.assertAllClose(
+            dnn_lc_classifier.get_variable_value(variable_name),
+            warm_started_dnn_lc_classifier.get_variable_value(variable_name))
+      elif 'linear' in variable_name:
+        linear_values = warm_started_dnn_lc_classifier.get_variable_value(
+            variable_name)
+        # Since they're not warm-started, the linear weights will be
+        # zero-initialized.
+        self.assertAllClose(np.zeros_like(linear_values), linear_values)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/dnn_test.py b/tensorflow/python/estimator/canned/dnn_test.py
new file mode 100644
index 0000000000..756696cea0
--- /dev/null
+++ b/tensorflow/python/estimator/canned/dnn_test.py
@@ -0,0 +1,580 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dnn.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import shutil
+import tempfile
+
+from absl.testing import parameterized
+import numpy as np
+import six
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import queue_runner
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+def _dnn_classifier_fn(*args, **kwargs):
+  return dnn.DNNClassifier(*args, **kwargs)
+
+
+class DNNModelFnTest(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(
+        self, dnn._dnn_model_fn, fc_impl=feature_column)
+
+
+class DNNModelFnV2Test(dnn_testing_utils.BaseDNNModelFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNModelFnTest.__init__(
+        self, dnn._dnn_model_fn, fc_impl=feature_column_v2)
+
+
+class DNNLogitFnTest(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
+        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column)
+
+
+class DNNLogitFnV2Test(dnn_testing_utils.BaseDNNLogitFnTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNLogitFnTest.__init__(
+        self, dnn._dnn_logit_fn_builder, fc_impl=feature_column_v2)
+
+
+class DNNWarmStartingTest(dnn_testing_utils.BaseDNNWarmStartingTest,
+                          test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNWarmStartingV2Test(dnn_testing_utils.BaseDNNWarmStartingTest,
+                            test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNWarmStartingTest.__init__(
+        self, _dnn_classifier_fn, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+class DNNClassifierEvaluateTest(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierEvaluateV2Test(
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierEvaluateTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+class DNNClassifierPredictTest(
+    dnn_testing_utils.BaseDNNClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierPredictV2Test(dnn_testing_utils.BaseDNNClassifierPredictTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierPredictTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+class DNNClassifierTrainTest(
+    dnn_testing_utils.BaseDNNClassifierTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column)
+
+
+class DNNClassifierTrainV2Test(dnn_testing_utils.BaseDNNClassifierTrainTest,
+                               test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNClassifierTrainTest.__init__(
+        self, _dnn_classifier_fn, fc_impl=feature_column_v2)
+
+
+def _dnn_regressor_fn(*args, **kwargs):
+  return dnn.DNNRegressor(*args, **kwargs)
+
+
+class DNNRegressorEvaluateTest(
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorEvaluateV2Test(dnn_testing_utils.BaseDNNRegressorEvaluateTest,
+                                 test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorEvaluateTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+class DNNRegressorPredictTest(
+    dnn_testing_utils.BaseDNNRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorPredictV2Test(dnn_testing_utils.BaseDNNRegressorPredictTest,
+                                test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorPredictTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+class DNNRegressorTrainTest(
+    dnn_testing_utils.BaseDNNRegressorTrainTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column)
+
+
+class DNNRegressorTrainV2Test(dnn_testing_utils.BaseDNNRegressorTrainTest,
+                              test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    dnn_testing_utils.BaseDNNRegressorTrainTest.__init__(
+        self, _dnn_regressor_fn, fc_impl=feature_column_v2)
+
+
+def _queue_parsed_features(feature_map):
+  tensors_to_enqueue = []
+  keys = []
+  for key, tensor in six.iteritems(feature_map):
+    keys.append(key)
+    tensors_to_enqueue.append(tensor)
+  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
+  queue_runner.add_queue_runner(
+      queue_runner.QueueRunner(
+          input_queue,
+          [input_queue.enqueue(tensors_to_enqueue)]))
+  dequeued_tensors = input_queue.dequeue()
+  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNRegressorIntegrationTest(test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, batch_size,
+                          fc_impl):
+    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
+
+    est = dnn.DNNRegressor(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array([
+        x[prediction_keys.PredictionKeys.PREDICTIONS]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self, fc_impl):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_pandas_input_fn(self, fc_impl):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    label_dimension = 1
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_input_fn_from_parse_example(self, fc_impl):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+              'y': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=label_dimension,
+        label_dimension=label_dimension,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+
+@parameterized.parameters((feature_column,), (feature_column_v2,))
+class DNNClassifierIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _as_label(self, data_in_float):
+    return np.rint(data_in_float).astype(np.int64)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, n_classes, batch_size, fc_impl):
+    feature_columns = [fc_impl.numeric_column('x', shape=(input_dimension,))]
+
+    est = dnn.DNNClassifier(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    # EXPORT
+    feature_spec = fc_impl.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self, fc_impl):
+    """Tests complete flow with numpy_input_fn."""
+    n_classes = 3
+    input_dimension = 2
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_pandas_input_fn(self, fc_impl):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+    input_dimension = 1
+    n_classes = 3
+    batch_size = 10
+    data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(self._as_label(data))
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+  def test_input_fn_from_parse_example(self, fc_impl):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    input_dimension = 2
+    n_classes = 3
+    batch_size = 10
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum)),
+              'y':
+                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
+                      value=self._as_label(datum[:1]))),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
+    }
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size,
+        fc_impl=fc_impl)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py
index ab3f9b1020..71d7e54783 100644
--- a/tensorflow/python/estimator/canned/dnn_testing_utils.py
+++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,2063 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""dnn_testing_utils python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utils to be used in testing DNN estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import dnn_testing_utils
+import os
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+from tensorflow.python.summary import summary as summary_lib
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import saver
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+
+# pylint rules which are disabled by default for test files.
+# pylint: disable=invalid-name,protected-access,missing-docstring
+
+# Names of variables created by model.
+LEARNING_RATE_NAME = 'dnn/regression_head/dnn/learning_rate'
+HIDDEN_WEIGHTS_NAME_PATTERN = 'dnn/hiddenlayer_%d/kernel'
+HIDDEN_BIASES_NAME_PATTERN = 'dnn/hiddenlayer_%d/bias'
+BATCH_NORM_BETA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/beta'
+BATCH_NORM_GAMMA_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/gamma'
+BATCH_NORM_MEAN_NAME_PATTERN = 'dnn/hiddenlayer_%d/batchnorm_%d/moving_mean'
+BATCH_NORM_VARIANCE_NAME_PATTERN = (
+    'dnn/hiddenlayer_%d/batchnorm_%d/moving_variance')
+LOGITS_WEIGHTS_NAME = 'dnn/logits/kernel'
+LOGITS_BIASES_NAME = 'dnn/logits/bias'
+OCCUPATION_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
+                             'occupation_embedding/embedding_weights')
+CITY_EMBEDDING_NAME = ('dnn/input_from_feature_columns/input_layer/'
+                       'city_embedding/embedding_weights')
+
+
+def assert_close(expected, actual, rtol=1e-04, message='', name='assert_close'):
+  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
+    expected = ops.convert_to_tensor(expected, name='expected')
+    actual = ops.convert_to_tensor(actual, name='actual')
+    rdiff = math_ops.abs((expected - actual) / expected, 'diff')
+    rtol = ops.convert_to_tensor(rtol, name='rtol')
+    return check_ops.assert_less(
+        rdiff,
+        rtol,
+        data=(message, 'Condition expected =~ actual did not hold element-wise:'
+              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
+              'rtol = ', rtol,),
+        summarize=expected.get_shape().num_elements(),
+        name=scope)
+
+
+def create_checkpoint(weights_and_biases,
+                      global_step,
+                      model_dir,
+                      batch_norm_vars=None):
+  """Create checkpoint file with provided model weights.
+
+  Args:
+    weights_and_biases: Iterable of tuples of weight and bias values.
+    global_step: Initial global step to save in checkpoint.
+    model_dir: Directory into which checkpoint is saved.
+    batch_norm_vars: Variables used for batch normalization.
+  """
+  weights, biases = zip(*weights_and_biases)
+  if batch_norm_vars:
+    assert len(batch_norm_vars) == len(weights_and_biases) - 1
+    (bn_betas, bn_gammas, bn_means, bn_variances) = zip(*batch_norm_vars)
+  model_weights = {}
+
+  # Hidden layer weights.
+  for i in range(0, len(weights) - 1):
+    model_weights[HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i]
+    model_weights[HIDDEN_BIASES_NAME_PATTERN % i] = biases[i]
+    if batch_norm_vars:
+      model_weights[BATCH_NORM_BETA_NAME_PATTERN % (i, i)] = bn_betas[i]
+      model_weights[BATCH_NORM_GAMMA_NAME_PATTERN % (i, i)] = bn_gammas[i]
+      model_weights[BATCH_NORM_MEAN_NAME_PATTERN % (i, i)] = bn_means[i]
+      model_weights[BATCH_NORM_VARIANCE_NAME_PATTERN % (i, i)] = bn_variances[i]
+
+  # Output layer weights.
+  model_weights[LOGITS_WEIGHTS_NAME] = weights[-1]
+  model_weights[LOGITS_BIASES_NAME] = biases[-1]
+
+  with ops.Graph().as_default():
+    # Create model variables.
+    for k, v in six.iteritems(model_weights):
+      variables_lib.Variable(v, name=k, dtype=dtypes.float32)
+
+    # Create non-model variables.
+    global_step_var = training_util.create_global_step()
+
+    # Initialize vars and save checkpoint.
+    with tf_session.Session() as sess:
+      variables_lib.global_variables_initializer().run()
+      global_step_var.assign(global_step).eval()
+      saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+
+
+def mock_head(testcase, hidden_units, logits_dimension, expected_logits):
+  """Returns a mock head that validates logits values and variable names."""
+  hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i
+                          for i in range(len(hidden_units))]
+  hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i
+                         for i in range(len(hidden_units))]
+  expected_var_names = (
+      hidden_weights_names + hidden_biases_names +
+      [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0'])
+
+  def _create_tpu_estimator_spec(
+      features, mode, logits, labels, train_op_fn=None, optimizer=None):
+    del features, labels  # Not used.
+    trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    testcase.assertItemsEqual(expected_var_names,
+                              [var.name for var in trainable_vars])
+    loss = constant_op.constant(1.)
+    assert_logits = assert_close(
+        expected_logits, logits, message='Failed for mode={}. '.format(mode))
+    with ops.control_dependencies([assert_logits]):
+      if mode == model_fn.ModeKeys.TRAIN:
+        if train_op_fn is not None:
+          train_op = train_op_fn(loss)
+        elif optimizer is not None:
+          train_op = optimizer.minimize(loss, global_step=None)
+        return model_fn._TPUEstimatorSpec(
+            mode=mode, loss=loss, train_op=train_op)
+      elif mode == model_fn.ModeKeys.EVAL:
+        return model_fn._TPUEstimatorSpec(
+            mode=mode, loss=array_ops.identity(loss))
+      elif mode == model_fn.ModeKeys.PREDICT:
+        return model_fn._TPUEstimatorSpec(
+            mode=mode, predictions={'logits': array_ops.identity(logits)})
+      else:
+        testcase.fail('Invalid mode: {}'.format(mode))
+
+  def _create_estimator_spec(
+      features, mode, logits, labels, train_op_fn=None, optimizer=None):
+    tpu_spec = _create_tpu_estimator_spec(
+        features, mode, logits, labels, train_op_fn, optimizer)
+    return tpu_spec.as_estimator_spec()
+
+  head = test.mock.NonCallableMagicMock(spec=head_lib._Head)
+  head.logits_dimension = logits_dimension
+  head._create_tpu_estimator_spec = test.mock.MagicMock(
+      wraps=_create_tpu_estimator_spec)
+  head.create_estimator_spec = test.mock.MagicMock(
+      wraps=_create_estimator_spec)
+
+  return head
+
+
+def mock_optimizer(testcase, hidden_units, expected_loss=None):
+  """Creates a mock optimizer to test the train method.
+
+  Args:
+    testcase: A TestCase instance.
+    hidden_units: Iterable of integer sizes for the hidden layers.
+    expected_loss: If given, will assert the loss value.
+
+  Returns:
+    A mock Optimizer.
+  """
+  hidden_weights_names = [(HIDDEN_WEIGHTS_NAME_PATTERN + '/part_0:0') % i
+                          for i in range(len(hidden_units))]
+  hidden_biases_names = [(HIDDEN_BIASES_NAME_PATTERN + '/part_0:0') % i
+                         for i in range(len(hidden_units))]
+  expected_var_names = (
+      hidden_weights_names + hidden_biases_names +
+      [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0'])
+
+  def _minimize(loss, global_step=None, var_list=None):
+    """Mock of optimizer.minimize."""
+    trainable_vars = var_list or ops.get_collection(
+        ops.GraphKeys.TRAINABLE_VARIABLES)
+    testcase.assertItemsEqual(expected_var_names,
+                              [var.name for var in trainable_vars])
+
+    # Verify loss. We can't check the value directly, so we add an assert op.
+    testcase.assertEquals(0, loss.shape.ndims)
+    if expected_loss is None:
+      if global_step is not None:
+        return state_ops.assign_add(global_step, 1).op
+      return control_flow_ops.no_op()
+    assert_loss = assert_close(
+        math_ops.to_float(expected_loss, name='expected'),
+        loss,
+        name='assert_loss')
+    with ops.control_dependencies((assert_loss,)):
+      if global_step is not None:
+        return state_ops.assign_add(global_step, 1).op
+      return control_flow_ops.no_op()
+
+  optimizer_mock = test.mock.NonCallableMagicMock(
+      spec=optimizer_lib.Optimizer,
+      wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
+  optimizer_mock.minimize = test.mock.MagicMock(wraps=_minimize)
+
+  return optimizer_mock
+
+
+class BaseDNNModelFnTest(object):
+  """Tests that _dnn_model_fn passes expected logits to mock head."""
+
+  def __init__(self, dnn_model_fn, fc_impl=feature_column):
+    self._dnn_model_fn = dnn_model_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_logits(self, mode, hidden_units, logits_dimension, inputs,
+                   expected_logits):
+    """Tests that the expected logits are passed to mock head."""
+    with ops.Graph().as_default():
+      training_util.create_global_step()
+      head = mock_head(
+          self,
+          hidden_units=hidden_units,
+          logits_dimension=logits_dimension,
+          expected_logits=expected_logits)
+      estimator_spec = self._dnn_model_fn(
+          features={'age': constant_op.constant(inputs)},
+          labels=constant_op.constant([[1]]),
+          mode=mode,
+          head=head,
+          hidden_units=hidden_units,
+          feature_columns=[
+              self._fc_impl.numeric_column(
+                  'age', shape=np.array(inputs).shape[1:])
+          ],
+          optimizer=mock_optimizer(self, hidden_units))
+      with monitored_session.MonitoredTrainingSession(
+          checkpoint_dir=self._model_dir) as sess:
+        if mode == model_fn.ModeKeys.TRAIN:
+          sess.run(estimator_spec.train_op)
+        elif mode == model_fn.ModeKeys.EVAL:
+          sess.run(estimator_spec.loss)
+        elif mode == model_fn.ModeKeys.PREDICT:
+          sess.run(estimator_spec.predictions)
+        else:
+          self.fail('Invalid mode: {}'.format(mode))
+
+  def test_one_dim_logits(self):
+    """Tests one-dimensional logits.
+
+    input_layer = [[10]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
+                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
+    logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=1,
+          inputs=[[10.]],
+          expected_logits=[[-2.08]])
+
+  def test_multi_dim_logits(self):
+    """Tests multi-dimensional logits.
+
+    input_layer = [[10]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
+                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
+    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]]
+           = [[-2.08, 2.08, 1.19]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                                 [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10.]],
+          expected_logits=[[-2.08, 2.08, 1.19]])
+
+  def test_multi_example_multi_dim_logits(self):
+    """Tests multiple examples and multi-dimensional logits.
+
+    input_layer = [[10], [5]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)],
+                      [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]]
+                   = [[6.1, 4.9], [3.1, 2.4]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)],
+                      [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]]
+                   = [[2.38, 0], [1.38, 0]]
+    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38],
+              [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]]
+           = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                                 [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10.], [5.]],
+          expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]])
+
+  def test_multi_dim_input_one_dim_logits(self):
+    """Tests multi-dimensional inputs and one-dimensional logits.
+
+    input_layer = [[10, 8]]
+    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
+                   = [[1.3, 0.9]]
+    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
+                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
+    logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1.], [1.]], [.3]),), base_global_step,
+                      self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=1,
+          inputs=[[10., 8.]],
+          expected_logits=[[-0.48]])
+
+  def test_multi_dim_input_multi_dim_logits(self):
+    """Tests multi-dimensional inputs and multi-dimensional logits.
+
+    input_layer = [[10, 8]]
+    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
+                   = [[1.3, 0.9]]
+    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
+                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
+    logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10., 8.]],
+          expected_logits=[[-0.48, 0.48, 0.39]])
+
+  def test_multi_feature_column_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        training_util.create_global_step()
+        head = mock_head(
+            self,
+            hidden_units=hidden_units,
+            logits_dimension=logits_dimension,
+            expected_logits=expected_logits)
+        estimator_spec = self._dnn_model_fn(
+            features={
+                'age': constant_op.constant(inputs[0]),
+                'height': constant_op.constant(inputs[1])
+            },
+            labels=constant_op.constant([[1]]),
+            mode=mode,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                self._fc_impl.numeric_column('age'),
+                self._fc_impl.numeric_column('height')
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          if mode == model_fn.ModeKeys.TRAIN:
+            sess.run(estimator_spec.train_op)
+          elif mode == model_fn.ModeKeys.EVAL:
+            sess.run(estimator_spec.loss)
+          elif mode == model_fn.ModeKeys.PREDICT:
+            sess.run(estimator_spec.predictions)
+          else:
+            self.fail('Invalid mode: {}'.format(mode))
+
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        training_util.create_global_step()
+        head = mock_head(
+            self,
+            hidden_units=hidden_units,
+            logits_dimension=logits_dimension,
+            expected_logits=expected_logits)
+        estimator_spec = self._dnn_model_fn(
+            features={
+                'age': constant_op.constant(inputs[0]),
+                'height': constant_op.constant(inputs[1])
+            },
+            labels=constant_op.constant([[1]]),
+            mode=mode,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                feature_column.numeric_column('age'),
+                feature_column_v2.numeric_column('height')
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          if mode == model_fn.ModeKeys.TRAIN:
+            sess.run(estimator_spec.train_op)
+          elif mode == model_fn.ModeKeys.EVAL:
+            sess.run(estimator_spec.loss)
+          elif mode == model_fn.ModeKeys.PREDICT:
+            sess.run(estimator_spec.predictions)
+          else:
+            self.fail('Invalid mode: {}'.format(mode))
+
+  def test_features_tensor_raises_value_error(self):
+    """Tests that passing a Tensor for features raises a ValueError."""
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[0, 0, 0]]
+
+    with ops.Graph().as_default():
+      training_util.create_global_step()
+      head = mock_head(
+          self,
+          hidden_units=hidden_units,
+          logits_dimension=logits_dimension,
+          expected_logits=expected_logits)
+      with self.assertRaisesRegexp(ValueError, 'features should be a dict'):
+        self._dnn_model_fn(
+            features=constant_op.constant(inputs),
+            labels=constant_op.constant([[1]]),
+            mode=model_fn.ModeKeys.TRAIN,
+            head=head,
+            hidden_units=hidden_units,
+            feature_columns=[
+                self._fc_impl.numeric_column(
+                    'age', shape=np.array(inputs).shape[1:])
+            ],
+            optimizer=mock_optimizer(self, hidden_units))
+
+
+class BaseDNNLogitFnTest(object):
+  """Tests correctness of logits calculated from _dnn_logit_fn_builder."""
+
+  def __init__(self, dnn_logit_fn_builder, fc_impl=feature_column):
+    self._dnn_logit_fn_builder = dnn_logit_fn_builder
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_logits(self,
+                   mode,
+                   hidden_units,
+                   logits_dimension,
+                   inputs,
+                   expected_logits,
+                   batch_norm=False):
+    """Tests that the expected logits are calculated."""
+    with ops.Graph().as_default():
+      # Global step needed for MonitoredSession, which is in turn used to
+      # explicitly set variable weights through a checkpoint.
+      training_util.create_global_step()
+      # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+      # the checkpoint naming is shared.
+      with variable_scope.variable_scope('dnn'):
+        input_layer_partitioner = (
+            partitioned_variables.min_max_variable_partitioner(
+                max_partitions=0, min_slice_size=64 << 20))
+        logit_fn = self._dnn_logit_fn_builder(
+            units=logits_dimension,
+            hidden_units=hidden_units,
+            feature_columns=[
+                self._fc_impl.numeric_column(
+                    'age', shape=np.array(inputs).shape[1:])
+            ],
+            activation_fn=nn.relu,
+            dropout=None,
+            input_layer_partitioner=input_layer_partitioner,
+            batch_norm=batch_norm)
+        logits = logit_fn(
+            features={'age': constant_op.constant(inputs)}, mode=mode)
+        with monitored_session.MonitoredTrainingSession(
+            checkpoint_dir=self._model_dir) as sess:
+          self.assertAllClose(expected_logits, sess.run(logits))
+
+  def test_one_dim_logits(self):
+    """Tests one-dimensional logits.
+
+    input_layer = [[10]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
+                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
+    logits = [[-1*2.38 +1*0 +0.3]] = [[-2.08]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=1,
+          inputs=[[10.]],
+          expected_logits=[[-2.08]])
+
+  def test_one_dim_logits_with_batch_norm(self):
+    """Tests one-dimensional logits.
+
+    input_layer = [[10]]
+    hidden_layer_0 = [[relu(0.6*10 +1), relu(0.5*10 -1)]] = [[7, 4]]
+    hidden_layer_0 = [[relu(0.6*20 +1), relu(0.5*20 -1)]] = [[13, 9]]
+
+    batch_norm_0, training (epsilon = 0.001):
+      mean1 = 1/2*(7+13) = 10,
+      variance1 = 1/2*(3^2+3^2) = 9
+      x11 = (7-10)/sqrt(9+0.001) = -0.999944449,
+      x21 = (13-10)/sqrt(9+0.001) = 0.999944449,
+
+      mean2 = 1/2*(4+9) = 6.5,
+      variance2 = 1/2*(2.5^2+.2.5^2) = 6.25
+      x12 = (4-6.5)/sqrt(6.25+0.001) = -0.99992001,
+      x22 = (9-6.5)/sqrt(6.25+0.001) = 0.99992001,
+
+    logits = [[-1*(-0.999944449) + 2*(-0.99992001) + 0.3],
+              [-1*0.999944449 + 2*0.99992001 + 0.3]]
+           = [[-0.699895571],[1.299895571]]
+
+    batch_norm_0, not training (epsilon = 0.001):
+      moving_mean1 = 0, moving_variance1 = 1
+      x11 = (7-0)/sqrt(1+0.001) = 6.996502623,
+      x21 = (13-0)/sqrt(1+0.001) = 12.993504871,
+      moving_mean2 = 0, moving_variance2 = 1
+      x12 = (4-0)/sqrt(1+0.001) = 3.998001499,
+      x22 = (9-0)/sqrt(1+0.001) = 8.995503372,
+
+    logits = [[-1*6.996502623 + 2*3.998001499 + 0.3],
+              [-1*12.993504871 + 2*8.995503372 + 0.3]]
+           = [[1.299500375],[5.297501873]]
+    """
+    base_global_step = 100
+    create_checkpoint(
+        (
+            ([[.6, .5]], [1., -1.]),
+            ([[-1.], [2.]], [.3]),
+        ),
+        base_global_step,
+        self._model_dir,
+        batch_norm_vars=([[0, 0],  # beta.
+                          [1, 1],  # gamma.
+                          [0, 0],  # moving mean.
+                          [1, 1],  # moving variance.
+                         ],))
+    self._test_logits(
+        model_fn.ModeKeys.TRAIN,
+        hidden_units=[2],
+        logits_dimension=1,
+        inputs=[[10.], [20.]],
+        expected_logits=[[-0.699895571], [1.299895571]],
+        batch_norm=True)
+    for mode in [model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT]:
+      self._test_logits(
+          mode,
+          hidden_units=[2],
+          logits_dimension=1,
+          inputs=[[10.], [20.]],
+          expected_logits=[[1.299500375], [5.297501873]],
+          batch_norm=True)
+
+  def test_multi_dim_logits(self):
+    """Tests multi-dimensional logits.
+
+    input_layer = [[10]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)]] = [[6.1, 4.9]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)]]
+                   = [[relu(2.38), relu(-0.12)]] = [[2.38, 0]]
+    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38]]
+           = [[-2.08, 2.08, 1.19]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                                 [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10.]],
+          expected_logits=[[-2.08, 2.08, 1.19]])
+
+  def test_multi_example_multi_dim_logits(self):
+    """Tests multiple examples and multi-dimensional logits.
+
+    input_layer = [[10], [5]]
+    hidden_layer_0 = [[relu(0.6*10 +0.1), relu(0.5*10 -0.1)],
+                      [relu(0.6*5 +0.1), relu(0.5*5 -0.1)]]
+                   = [[6.1, 4.9], [3.1, 2.4]]
+    hidden_layer_1 = [[relu(1*6.1 -0.8*4.9 +0.2), relu(0.8*6.1 -1*4.9 -0.1)],
+                      [relu(1*3.1 -0.8*2.4 +0.2), relu(0.8*3.1 -1*2.4 -0.1)]]
+                   = [[2.38, 0], [1.38, 0]]
+    logits = [[-1*2.38 +0.3, 1*2.38 -0.3, 0.5*2.38],
+              [-1*1.38 +0.3, 1*1.38 -0.3, 0.5*1.38]]
+           = [[-2.08, 2.08, 1.19], [-1.08, 1.08, 0.69]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                                 [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10.], [5.]],
+          expected_logits=[[-2.08, 2.08, 1.19], [-1.08, 1.08, .69]])
+
+  def test_multi_dim_input_one_dim_logits(self):
+    """Tests multi-dimensional inputs and one-dimensional logits.
+
+    input_layer = [[10, 8]]
+    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
+                   = [[1.3, 0.9]]
+    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
+                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
+    logits = [[-1*0.78 +1*0 +0.3]] = [[-0.48]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1.], [1.]], [.3]),), base_global_step,
+                      self._model_dir)
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=1,
+          inputs=[[10., 8.]],
+          expected_logits=[[-0.48]])
+
+  def test_multi_dim_input_multi_dim_logits(self):
+    """Tests multi-dimensional inputs and multi-dimensional logits.
+
+    input_layer = [[10, 8]]
+    hidden_layer_0 = [[relu(0.6*10 -0.6*8 +0.1), relu(0.5*10 -0.5*8 -0.1)]]
+                   = [[1.3, 0.9]]
+    hidden_layer_1 = [[relu(1*1.3 -0.8*0.9 + 0.2), relu(0.8*1.3 -1*0.9 -0.2)]]
+                   = [[0.78, relu(-0.06)]] = [[0.78, 0]]
+    logits = [[-1*0.78 + 0.3, 1*0.78 -0.3, 0.5*0.78]] = [[-0.48, 0.48, 0.39]]
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      self._test_logits(
+          mode,
+          hidden_units=(2, 2),
+          logits_dimension=3,
+          inputs=[[10., 8.]],
+          expected_logits=[[-0.48, 0.48, 0.39]])
+
+  def test_multi_feature_column_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      base_global_step, self._model_dir)
+
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        # Global step needed for MonitoredSession, which is in turn used to
+        # explicitly set variable weights through a checkpoint.
+        training_util.create_global_step()
+        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+        # the checkpoint naming is shared.
+        with variable_scope.variable_scope('dnn'):
+          input_layer_partitioner = (
+              partitioned_variables.min_max_variable_partitioner(
+                  max_partitions=0, min_slice_size=64 << 20))
+          logit_fn = self._dnn_logit_fn_builder(
+              units=logits_dimension,
+              hidden_units=hidden_units,
+              feature_columns=[
+                  self._fc_impl.numeric_column('age'),
+                  self._fc_impl.numeric_column('height')
+              ],
+              activation_fn=nn.relu,
+              dropout=None,
+              input_layer_partitioner=input_layer_partitioner,
+              batch_norm=False)
+          logits = logit_fn(
+              features={
+                  'age': constant_op.constant(inputs[0]),
+                  'height': constant_op.constant(inputs[1])
+              },
+              mode=mode)
+          with monitored_session.MonitoredTrainingSession(
+              checkpoint_dir=self._model_dir) as sess:
+            self.assertAllClose(expected_logits, sess.run(logits))
+
+  def test_multi_feature_column_mix_multi_dim_logits(self):
+    """Tests multiple feature columns and multi-dimensional logits.
+
+    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
+    difference is that the input consists of two 1D feature columns, instead of
+    one 2D feature column.
+    """
+    base_global_step = 100
+    create_checkpoint((
+        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
+        ([[1., .8], [-.8, -1.]], [.2, -.2]),
+        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
+    ), base_global_step, self._model_dir)
+
+    hidden_units = (2, 2)
+    logits_dimension = 3
+    inputs = ([[10.]], [[8.]])
+    expected_logits = [[-0.48, 0.48, 0.39]]
+
+    for mode in [
+        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
+        model_fn.ModeKeys.PREDICT
+    ]:
+      with ops.Graph().as_default():
+        # Global step needed for MonitoredSession, which is in turn used to
+        # explicitly set variable weights through a checkpoint.
+        training_util.create_global_step()
+        # Use a variable scope here with 'dnn', emulating the dnn model_fn, so
+        # the checkpoint naming is shared.
+        with variable_scope.variable_scope('dnn'):
+          input_layer_partitioner = (
+              partitioned_variables.min_max_variable_partitioner(
+                  max_partitions=0, min_slice_size=64 << 20))
+          logit_fn = self._dnn_logit_fn_builder(
+              units=logits_dimension,
+              hidden_units=hidden_units,
+              feature_columns=[
+                  feature_column.numeric_column('age'),
+                  feature_column_v2.numeric_column('height')
+              ],
+              activation_fn=nn.relu,
+              dropout=None,
+              input_layer_partitioner=input_layer_partitioner,
+              batch_norm=False)
+          logits = logit_fn(
+              features={
+                  'age': constant_op.constant(inputs[0]),
+                  'height': constant_op.constant(inputs[1])
+              },
+              mode=mode)
+          with monitored_session.MonitoredTrainingSession(
+              checkpoint_dir=self._model_dir) as sess:
+            self.assertAllClose(expected_logits, sess.run(logits))
+
+
+class BaseDNNWarmStartingTest(object):
+
+  def __init__(self,
+               _dnn_classifier_fn,
+               _dnn_regressor_fn,
+               fc_impl=feature_column):
+    self._dnn_classifier_fn = _dnn_classifier_fn
+    self._dnn_regressor_fn = _dnn_regressor_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'city': [['Palo Alto'], ['Mountain View']],
+          'locality': [['Palo Alto'], ['Mountain View']],
+          'occupation': [['doctor'], ['consultant']]
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def assertAllNotClose(self, t1, t2):
+    """Helper assert for arrays."""
+    sum_of_abs_diff = 0.0
+    for x, y in zip(t1, t2):
+      try:
+        for a, b in zip(x, y):
+          sum_of_abs_diff += abs(b - a)
+      except TypeError:
+        sum_of_abs_diff += abs(y - x)
+    self.assertGreater(sum_of_abs_diff, 0)
+
+  def test_classifier_basic_warm_starting(self):
+    """Tests correctness of DNNClassifier default warm-start."""
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=dnn_classifier.model_dir)
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      self.assertAllClose(
+          dnn_classifier.get_variable_value(variable_name),
+          warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self):
+    """Tests correctness of DNNRegressor default warm-start."""
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNRegressor and train to save a checkpoint.
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        optimizer='SGD')
+    dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNRegressor, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=dnn_regressor.model_dir)
+
+    warm_started_dnn_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_regressor.get_variable_names():
+      self.assertAllClose(
+          dnn_regressor.get_variable_value(variable_name),
+          warm_started_dnn_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self):
+    """Tests selecting variables to warm-start."""
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The provided regular expression will only warm-start the city
+        # embedding, not the kernels and biases of the hidden weights.
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            vars_to_warm_start='.*(city).*'))
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'city' in variable_name:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+      elif 'bias' in variable_name:
+        # Hidden layer biases are zero-initialized.
+        bias_values = warm_started_dnn_classifier.get_variable_value(
+            variable_name)
+        self.assertAllClose(np.zeros_like(bias_values), bias_values)
+      elif 'kernel' in variable_name:
+        # We can't override the glorot uniform initializer used for the kernels
+        # in the dense layers, so just make sure we're not getting the same
+        # values from the old checkpoint.
+        self.assertAllNotClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
+    """Tests warm-starting with vocab remapping and partitioning."""
+    vocab_list = ['doctor', 'lawyer', 'consultant']
+    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
+    with open(vocab_file, 'w') as f:
+      f.write('\n'.join(vocab_list))
+    occupation = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_file(
+            'occupation',
+            vocabulary_file=vocab_file,
+            vocabulary_size=len(vocab_list)),
+        dimension=2)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[occupation],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD',
+        input_layer_partitioner=partitioner)
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).  Use a new FeatureColumn with a
+    # different vocabulary for occupation.
+    new_vocab_list = ['doctor', 'consultant', 'engineer']
+    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
+                                  'new_occupation_vocab')
+    with open(new_vocab_file, 'w') as f:
+      f.write('\n'.join(new_vocab_list))
+    new_occupation = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_file(
+            'occupation',
+            vocabulary_file=new_vocab_file,
+            vocabulary_size=len(new_vocab_list)),
+        dimension=2)
+    # We can create our VocabInfo object from the new and old occupation
+    # FeatureColumn's.
+    occupation_vocab_info = estimator.VocabInfo(
+        new_vocab=new_occupation.categorical_column.vocabulary_file,
+        new_vocab_size=new_occupation.categorical_column.vocabulary_size,
+        num_oov_buckets=new_occupation.categorical_column.num_oov_buckets,
+        old_vocab=occupation.categorical_column.vocabulary_file,
+        old_vocab_size=occupation.categorical_column.vocabulary_size,
+        # Can't use constant_initializer with load_and_remap.  In practice,
+        # use a truncated normal initializer.
+        backup_initializer=init_ops.random_uniform_initializer(
+            minval=0.39, maxval=0.39))
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[occupation],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            var_name_to_vocab_info={
+                OCCUPATION_EMBEDDING_NAME: occupation_vocab_info
+            },
+            # Explicitly providing None here will only warm-start variables
+            # referenced in var_name_to_vocab_info (no hidden weights will be
+            # warmstarted).
+            vars_to_warm_start=None),
+        input_layer_partitioner=partitioner)
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    # 'doctor' was ID-0 and still ID-0.
+    self.assertAllClose(
+        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[0, :],
+        warm_started_dnn_classifier.get_variable_value(
+            OCCUPATION_EMBEDDING_NAME)[0, :])
+    # 'consultant' was ID-2 and now ID-1.
+    self.assertAllClose(
+        dnn_classifier.get_variable_value(OCCUPATION_EMBEDDING_NAME)[2, :],
+        warm_started_dnn_classifier.get_variable_value(
+            OCCUPATION_EMBEDDING_NAME)[1, :])
+    # 'engineer' is a new entry and should be initialized with the
+    # backup_initializer in VocabInfo.
+    self.assertAllClose([0.39] * 2,
+                        warm_started_dnn_classifier.get_variable_value(
+                            OCCUPATION_EMBEDDING_NAME)[2, :])
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'bias' in variable_name:
+        # Hidden layer biases are zero-initialized.
+        bias_values = warm_started_dnn_classifier.get_variable_value(
+            variable_name)
+        self.assertAllClose(np.zeros_like(bias_values), bias_values)
+      elif 'kernel' in variable_name:
+        # We can't override the glorot uniform initializer used for the kernels
+        # in the dense layers, so just make sure we're not getting the same
+        # values from the old checkpoint.
+        self.assertAllNotClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+  def test_warm_starting_with_naming_change(self):
+    """Tests warm-starting with a Tensor name remapping."""
+    locality = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
+            'locality', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+
+    # Create a DNNClassifier and train to save a checkpoint.
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[locality],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second DNNClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    city = self._fc_impl.embedding_column(
+        self._fc_impl.categorical_column_with_vocabulary_list(
+            'city', vocabulary_list=['Mountain View', 'Palo Alto']),
+        dimension=5)
+    warm_started_dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=[256, 128],
+        feature_columns=[city],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The 'city' variable correspond to the 'locality' variable in the
+        # previous model.
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=dnn_classifier.model_dir,
+            var_name_to_prev_var_name={
+                CITY_EMBEDDING_NAME:
+                    CITY_EMBEDDING_NAME.replace('city', 'locality')
+            }))
+
+    warm_started_dnn_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_dnn_classifier.get_variable_names():
+      if 'city' in variable_name:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(
+                CITY_EMBEDDING_NAME.replace('city', 'locality')),
+            warm_started_dnn_classifier.get_variable_value(CITY_EMBEDDING_NAME))
+      else:
+        self.assertAllClose(
+            dnn_classifier.get_variable_value(variable_name),
+            warm_started_dnn_classifier.get_variable_value(variable_name))
+
+
+class BaseDNNClassifierEvaluateTest(object):
+
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
+    self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_one_dim(self):
+    """Asserts evaluation metrics for one-dimensional input and logits."""
+    global_step = 100
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age')],
+        model_dir=self._model_dir)
+    def _input_fn():
+      # batch_size = 2, one false label, and one true.
+      return {'age': [[10.], [10.]]}, [[1], [0]]
+    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-2.08], [-2.08]] =>
+    # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]]
+    # loss = -1. * log(0.111) -1. * log(0.889) = 2.31544200
+    expected_loss = 2.31544200
+    self.assertAllClose({
+        metric_keys.MetricKeys.LOSS: expected_loss,
+        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2.,
+        metric_keys.MetricKeys.ACCURACY: 0.5,
+        metric_keys.MetricKeys.PRECISION: 0.0,
+        metric_keys.MetricKeys.RECALL: 0.0,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597,
+        metric_keys.MetricKeys.LABEL_MEAN: 0.5,
+        metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
+        # There is no good way to calculate AUC for only two data points. But
+        # that is what the algorithm returns.
+        metric_keys.MetricKeys.AUC: 0.5,
+        metric_keys.MetricKeys.AUC_PR: 0.75,
+
+        ops.GraphKeys.GLOBAL_STEP: global_step
+    }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
+
+  def test_multi_dim(self):
+    """Asserts evaluation metrics for multi-dimensional input and logits."""
+    global_step = 100
+    create_checkpoint(
+        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                               [.2, -.2]),
+         ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3,
+                                           .0]),), global_step, self._model_dir)
+    n_classes = 3
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    def _input_fn():
+      # batch_size = 2, one false label, and one true.
+      return {'age': [[10., 8.], [10., 8.]]}, [[1], [0]]
+    # Uses identical numbers as
+    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-0.48, 0.48, 0.39], [-0.48, 0.48, 0.39]]
+    # probabilities = exp(logits)/sum(exp(logits))
+    #               = [[0.16670536, 0.43538380, 0.39791084],
+    #                  [0.16670536, 0.43538380, 0.39791084]]
+    # loss = -log(0.43538380) - log(0.16670536)
+    expected_loss = 2.62305466
+    self.assertAllClose({
+        metric_keys.MetricKeys.LOSS: expected_loss,
+        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+        metric_keys.MetricKeys.ACCURACY: 0.5,
+        ops.GraphKeys.GLOBAL_STEP: global_step
+    }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
+
+  def test_float_labels(self):
+    """Asserts evaluation metrics for float labels in binary classification."""
+    global_step = 100
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age')],
+        model_dir=self._model_dir)
+    def _input_fn():
+      # batch_size = 2, one false label, and one true.
+      return {'age': [[10.], [10.]]}, [[0.8], [0.4]]
+    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-2.08], [-2.08]] =>
+    # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]]
+    # loss = -0.8 * log(0.111) -0.2 * log(0.889)
+    #        -0.4 * log(0.111) -0.6 * log(0.889) = 2.7314420
+    metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1)
+    self.assertAlmostEqual(2.7314420, metrics[metric_keys.MetricKeys.LOSS])
+
+  def test_multi_dim_weights(self):
+    """Tests evaluation with weights."""
+    # Uses same checkpoint with test_multi_dims
+    global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      global_step, self._model_dir)
+    n_classes = 3
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
+        n_classes=n_classes,
+        weight_column='w',
+        model_dir=self._model_dir)
+
+    def _input_fn():
+      # batch_size = 2, one false label, and one true.
+      return {'age': [[10., 8.], [10., 8.]], 'w': [[10.], [100.]]}, [[1], [0]]
+
+    # Uses identical numbers as test_multi_dims
+    # See that test for calculation of logits.
+    # loss = -log(0.43538380)*10 - log(0.16670536)*100
+    expected_loss = 187.468007
+    metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1)
+    self.assertAlmostEqual(
+        expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
+
+
+class BaseDNNRegressorEvaluateTest(object):
+
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
+    self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_one_dim(self):
+    """Asserts evaluation metrics for one-dimensional input and logits."""
+    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
+    global_step = 100
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), global_step, self._model_dir)
+
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age')],
+        model_dir=self._model_dir)
+    def _input_fn():
+      return {'age': [[10.]]}, [[1.]]
+    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-2.08]] => predictions = [-2.08].
+    # loss = (1+2.08)^2 = 9.4864
+    expected_loss = 9.4864
+    self.assertAllClose({
+        metric_keys.MetricKeys.LOSS: expected_loss,
+        metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+        metric_keys.MetricKeys.PREDICTION_MEAN: -2.08,
+        metric_keys.MetricKeys.LABEL_MEAN: 1.0,
+        ops.GraphKeys.GLOBAL_STEP: global_step
+    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
+
+  def test_multi_dim(self):
+    """Asserts evaluation metrics for multi-dimensional input and logits."""
+    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
+    global_step = 100
+    create_checkpoint(
+        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                               [.2, -.2]),
+         ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3,
+                                           .0]),), global_step, self._model_dir)
+    label_dimension = 3
+
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+    def _input_fn():
+      return {'age': [[10., 8.]]}, [[1., -1., 0.5]]
+    # Uses identical numbers as
+    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-0.48, 0.48, 0.39]]
+    # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
+    expected_loss = 4.3929
+    self.assertAllClose({
+        metric_keys.MetricKeys.LOSS: expected_loss,
+        metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 0.39 / 3.0,
+        metric_keys.MetricKeys.LABEL_MEAN: 0.5 / 3.0,
+        ops.GraphKeys.GLOBAL_STEP: global_step
+    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
+
+  def test_multi_dim_weights(self):
+    """Asserts evaluation metrics for multi-dimensional input and logits."""
+    # same checkpoint with test_multi_dim.
+    global_step = 100
+    create_checkpoint((([[.6, .5], [-.6, -.5]],
+                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
+                      global_step, self._model_dir)
+    label_dimension = 3
+
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=(2, 2),
+        feature_columns=[self._fc_impl.numeric_column('age', shape=[2])],
+        label_dimension=label_dimension,
+        weight_column='w',
+        model_dir=self._model_dir)
+
+    def _input_fn():
+      return {'age': [[10., 8.]], 'w': [10.]}, [[1., -1., 0.5]]
+
+    # Uses identical numbers as test_multi_dim.
+    # See that test for calculation of logits.
+    # loss = 4.3929*10
+    expected_loss = 43.929
+    metrics = dnn_regressor.evaluate(input_fn=_input_fn, steps=1)
+    self.assertAlmostEqual(
+        expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
+
+
+class BaseDNNClassifierPredictTest(object):
+
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
+    self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_one_dim(self, label_vocabulary, label_output_fn):
+    """Asserts predictions for one-dimensional input and logits."""
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),),
+        global_step=0,
+        model_dir=self._model_dir)
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        label_vocabulary=label_vocabulary,
+        feature_columns=(self._fc_impl.numeric_column('x'),),
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
+    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-2.08] =>
+    # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597
+    # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597]
+    # class_ids = argmax(probabilities) = [0]
+    predictions = next(dnn_classifier.predict(input_fn=input_fn))
+    self.assertAllClose([-2.08],
+                        predictions[prediction_keys.PredictionKeys.LOGITS])
+    self.assertAllClose([0.11105597],
+                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
+    self.assertAllClose(
+        [0.88894403,
+         0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+    self.assertAllClose([0],
+                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+    self.assertAllEqual([label_output_fn(0)],
+                        predictions[prediction_keys.PredictionKeys.CLASSES])
+
+  def test_one_dim_without_label_vocabulary(self):
+    self._test_one_dim(label_vocabulary=None,
+                       label_output_fn=lambda x: ('%s' % x).encode())
+
+  def test_one_dim_with_label_vocabulary(self):
+    n_classes = 2
+    self._test_one_dim(
+        label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+  def _test_multi_dim_with_3_classes(self, label_vocabulary, label_output_fn):
+    """Asserts predictions for multi-dimensional input and logits."""
+    create_checkpoint(
+        (([[.6, .5], [-.6, -.5]], [.1, -.1]),
+         ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]],
+                                               [.3, -.3, .0]),),
+        global_step=0,
+        model_dir=self._model_dir)
+
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=(2, 2),
+        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
+        label_vocabulary=label_vocabulary,
+        n_classes=3,
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        # Inputs shape is (batch_size, num_inputs).
+        x={'x': np.array([[10., 8.]])},
+        batch_size=1,
+        shuffle=False)
+    # Uses identical numbers as
+    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-0.48, 0.48, 0.39] =>
+    # probabilities[i] = exp(logits[i]) / sum_j exp(logits[j]) =>
+    # probabilities = [0.16670536, 0.43538380, 0.39791084]
+    # class_ids = argmax(probabilities) = [1]
+    predictions = next(dnn_classifier.predict(input_fn=input_fn))
+    self.assertItemsEqual(
+        [prediction_keys.PredictionKeys.LOGITS,
+         prediction_keys.PredictionKeys.PROBABILITIES,
+         prediction_keys.PredictionKeys.CLASS_IDS,
+         prediction_keys.PredictionKeys.CLASSES],
+        six.iterkeys(predictions))
+    self.assertAllClose(
+        [-0.48, 0.48, 0.39], predictions[prediction_keys.PredictionKeys.LOGITS])
+    self.assertAllClose(
+        [0.16670536, 0.43538380, 0.39791084],
+        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+    self.assertAllEqual(
+        [1], predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+    self.assertAllEqual(
+        [label_output_fn(1)],
+        predictions[prediction_keys.PredictionKeys.CLASSES])
+
+  def test_multi_dim_with_3_classes_but_no_label_vocab(self):
+    self._test_multi_dim_with_3_classes(
+        label_vocabulary=None,
+        label_output_fn=lambda x: ('%s' % x).encode())
+
+  def test_multi_dim_with_3_classes_and_label_vocab(self):
+    n_classes = 3
+    self._test_multi_dim_with_3_classes(
+        label_vocabulary=['class_vocab_{}'.format(i) for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+
+class BaseDNNRegressorPredictTest(object):
+
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
+    self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_one_dim(self):
+    """Asserts predictions for one-dimensional input and logits."""
+    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),),
+        global_step=0,
+        model_dir=self._model_dir)
+
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=(2, 2),
+        feature_columns=(self._fc_impl.numeric_column('x'),),
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
+    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-2.08]] => predictions = [-2.08].
+    self.assertAllClose({
+        prediction_keys.PredictionKeys.PREDICTIONS: [-2.08],
+    }, next(dnn_regressor.predict(input_fn=input_fn)))
+
+  def test_multi_dim(self):
+    """Asserts predictions for multi-dimensional input and logits."""
+    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
+    create_checkpoint(
+        (([[.6, .5], [-.6, -.5]], [.1, -.1]),
+         ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]],
+                                               [.3, -.3,
+                                                .0]),), 100, self._model_dir)
+
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=(2, 2),
+        feature_columns=(self._fc_impl.numeric_column('x', shape=(2,)),),
+        label_dimension=3,
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        # Inputs shape is (batch_size, num_inputs).
+        x={'x': np.array([[10., 8.]])},
+        batch_size=1,
+        shuffle=False)
+    # Uses identical numbers as
+    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39]
+    self.assertAllClose({
+        prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39],
+    }, next(dnn_regressor.predict(input_fn=input_fn)))
+
+
+class _SummaryHook(session_run_hook.SessionRunHook):
+  """Saves summaries every N steps."""
+
+  def __init__(self):
+    self._summaries = []
+
+  def begin(self):
+    self._summary_op = summary_lib.merge_all()
+
+  def before_run(self, run_context):
+    return session_run_hook.SessionRunArgs({'summary': self._summary_op})
+
+  def after_run(self, run_context, run_values):
+    s = summary_pb2.Summary()
+    s.ParseFromString(run_values.results['summary'])
+    self._summaries.append(s)
+
+  def summaries(self):
+    return tuple(self._summaries)
+
+
+def _assert_checkpoint(
+    testcase, global_step, input_units, hidden_units, output_units, model_dir):
+  """Asserts checkpoint contains expected variables with proper shapes.
+
+  Args:
+    testcase: A TestCase instance.
+    global_step: Expected global step value.
+    input_units: The dimension of input layer.
+    hidden_units: Iterable of integer sizes for the hidden layers.
+    output_units: The dimension of output layer (logits).
+    model_dir: The model directory.
+  """
+  shapes = {
+      name: shape
+      for (name, shape) in checkpoint_utils.list_variables(model_dir)
+  }
+
+  # Global step.
+  testcase.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+  testcase.assertEqual(
+      global_step,
+      checkpoint_utils.load_variable(
+          model_dir, ops.GraphKeys.GLOBAL_STEP))
+
+  # Hidden layer weights.
+  prev_layer_units = input_units
+  for i in range(len(hidden_units)):
+    layer_units = hidden_units[i]
+    testcase.assertAllEqual(
+        (prev_layer_units, layer_units),
+        shapes[HIDDEN_WEIGHTS_NAME_PATTERN % i])
+    testcase.assertAllEqual(
+        (layer_units,),
+        shapes[HIDDEN_BIASES_NAME_PATTERN % i])
+    prev_layer_units = layer_units
+
+  # Output layer weights.
+  testcase.assertAllEqual((prev_layer_units, output_units),
+                          shapes[LOGITS_WEIGHTS_NAME])
+  testcase.assertAllEqual((output_units,),
+                          shapes[LOGITS_BIASES_NAME])
+
+
+def _assert_simple_summary(testcase, expected_values, actual_summary):
+  """Assert summary the specified simple values.
+
+  Args:
+    testcase: A TestCase instance.
+    expected_values: Dict of expected tags and simple values.
+    actual_summary: `summary_pb2.Summary`.
+  """
+  testcase.assertAllClose(expected_values, {
+      v.tag: v.simple_value
+      for v in actual_summary.value if (v.tag in expected_values)
+  })
+
+
+class BaseDNNClassifierTrainTest(object):
+
+  def __init__(self, dnn_classifier_fn, fc_impl=feature_column):
+    self._dnn_classifier_fn = dnn_classifier_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_from_scratch_with_default_optimizer_binary(self):
+    hidden_units = (2, 2)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        model_dir=self._model_dir)
+
+    # Train for a few steps, then validate final checkpoint.
+    num_steps = 5
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps)
+    _assert_checkpoint(
+        self, num_steps, input_units=1, hidden_units=hidden_units,
+        output_units=1, model_dir=self._model_dir)
+
+  def test_from_scratch_with_default_optimizer_multi_class(self):
+    hidden_units = (2, 2)
+    n_classes = 3
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # Train for a few steps, then validate final checkpoint.
+    num_steps = 5
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[2]]), steps=num_steps)
+    _assert_checkpoint(
+        self, num_steps, input_units=1, hidden_units=hidden_units,
+        output_units=n_classes, model_dir=self._model_dir)
+
+  def test_from_scratch_validate_summary(self):
+    hidden_units = (2, 2)
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    _assert_checkpoint(
+        self, num_steps, input_units=1, hidden_units=hidden_units,
+        output_units=1, model_dir=self._model_dir)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      summary_keys = [v.tag for v in summary.value]
+      self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys)
+      self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
+
+  def test_binary_classification(self):
+    base_global_step = 100
+    hidden_units = (2, 2)
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
+
+    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-2.08] => probabilities = [0.889, 0.111]
+    # loss = -1. * log(0.111) = 2.19772100
+    expected_loss = 2.19772100
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units, expected_loss=expected_loss)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      _assert_simple_summary(
+          self,
+          {
+              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
+              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5,
+              'dnn/dnn/logits/fraction_of_zero_values': 0.,
+              metric_keys.MetricKeys.LOSS: expected_loss,
+          },
+          summary)
+    _assert_checkpoint(
+        self, base_global_step + num_steps, input_units=1,
+        hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
+
+  def test_binary_classification_float_labels(self):
+    base_global_step = 100
+    hidden_units = (2, 2)
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
+
+    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-2.08] => probabilities = [0.889, 0.111]
+    # loss = -0.8 * log(0.111) -0.2 * log(0.889) = 1.7817210
+    expected_loss = 1.7817210
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units, expected_loss=expected_loss)
+    dnn_classifier = self._dnn_classifier_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[0.8]]), steps=num_steps)
+    self.assertEqual(1, opt.minimize.call_count)
+
+  def test_multi_class(self):
+    n_classes = 3
+    base_global_step = 100
+    hidden_units = (2, 2)
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1., 1., .5], [-1., 1., .5]],
+          [.3, -.3, .0]),), base_global_step, self._model_dir)
+
+    # Uses identical numbers as DNNModelFnTest.test_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-2.08, 2.08, 1.19] => probabilities = [0.0109, 0.7011, 0.2879]
+    # loss = -1. * log(0.7011) = 0.35505795
+    expected_loss = 0.35505795
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units, expected_loss=expected_loss)
+    dnn_classifier = self._dnn_classifier_fn(
+        n_classes=n_classes,
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_classifier.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      _assert_simple_summary(
+          self,
+          {
+              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
+              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': .5,
+              'dnn/dnn/logits/fraction_of_zero_values': 0.,
+              metric_keys.MetricKeys.LOSS: expected_loss,
+          },
+          summary)
+    _assert_checkpoint(
+        self, base_global_step + num_steps, input_units=1,
+        hidden_units=hidden_units, output_units=n_classes,
+        model_dir=self._model_dir)
+
+
+class BaseDNNRegressorTrainTest(object):
+
+  def __init__(self, dnn_regressor_fn, fc_impl=feature_column):
+    self._dnn_regressor_fn = dnn_regressor_fn
+    self._fc_impl = fc_impl
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_from_scratch_with_default_optimizer(self):
+    hidden_units = (2, 2)
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        model_dir=self._model_dir)
+
+    # Train for a few steps, then validate final checkpoint.
+    num_steps = 5
+    dnn_regressor.train(
+        input_fn=lambda: ({'age': ((1,),)}, ((10,),)), steps=num_steps)
+    _assert_checkpoint(
+        self, num_steps, input_units=1, hidden_units=hidden_units,
+        output_units=1, model_dir=self._model_dir)
+
+  def test_from_scratch(self):
+    hidden_units = (2, 2)
+    opt = mock_optimizer(self, hidden_units=hidden_units)
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_regressor.train(
+        input_fn=lambda: ({'age': ((1,),)}, ((5.,),)), steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    _assert_checkpoint(
+        self, num_steps, input_units=1, hidden_units=hidden_units,
+        output_units=1, model_dir=self._model_dir)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      summary_keys = [v.tag for v in summary.value]
+      self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys)
+      self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
+
+  def test_one_dim(self):
+    """Asserts train loss for one-dimensional input and logits."""
+    base_global_step = 100
+    hidden_units = (2, 2)
+    create_checkpoint(
+        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
+         ([[-1.], [1.]], [.3]),), base_global_step, self._model_dir)
+
+    # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [-2.08] => predictions = [-2.08]
+    # loss = (1 + 2.08)^2 = 9.4864
+    expected_loss = 9.4864
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units, expected_loss=expected_loss)
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=hidden_units,
+        feature_columns=(self._fc_impl.numeric_column('age'),),
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
+
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_regressor.train(
+        input_fn=lambda: ({'age': [[10.]]}, [[1.]]), steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      _assert_simple_summary(
+          self,
+          {
+              metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
+              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5,
+              'dnn/dnn/logits/fraction_of_zero_values': 0.,
+              metric_keys.MetricKeys.LOSS: expected_loss,
+          },
+          summary)
+    _assert_checkpoint(
+        self, base_global_step + num_steps, input_units=1,
+        hidden_units=hidden_units, output_units=1, model_dir=self._model_dir)
+
+  def test_multi_dim(self):
+    """Asserts train loss for multi-dimensional input and logits."""
+    base_global_step = 100
+    hidden_units = (2, 2)
+    create_checkpoint(
+        (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]],
+                                               [.2, -.2]),
+         ([[-1., 1., .5], [-1., 1., .5]],
+          [.3, -.3, .0]),), base_global_step, self._model_dir)
+    input_dimension = 2
+    label_dimension = 3
 
-# Include attrs that start with single underscore.
-dnn_testing_utils.__all__ = [
-    s for s in dir(dnn_testing_utils) if not s.startswith('__')
-]
+    # Uses identical numbers as
+    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
+    # See that test for calculation of logits.
+    # logits = [[-0.48, 0.48, 0.39]]
+    # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
+    expected_loss = 4.3929
+    opt = mock_optimizer(
+        self, hidden_units=hidden_units, expected_loss=expected_loss)
+    dnn_regressor = self._dnn_regressor_fn(
+        hidden_units=hidden_units,
+        feature_columns=[
+            self._fc_impl.numeric_column('age', shape=[input_dimension])
+        ],
+        label_dimension=label_dimension,
+        optimizer=opt,
+        model_dir=self._model_dir)
+    self.assertEqual(0, opt.minimize.call_count)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.dnn_testing_utils import *
+    # Train for a few steps, then validate optimizer, summaries, and
+    # checkpoint.
+    num_steps = 5
+    summary_hook = _SummaryHook()
+    dnn_regressor.train(
+        input_fn=lambda: ({'age': [[10., 8.]]}, [[1., -1., 0.5]]),
+        steps=num_steps,
+        hooks=(summary_hook,))
+    self.assertEqual(1, opt.minimize.call_count)
+    summaries = summary_hook.summaries()
+    self.assertEqual(num_steps, len(summaries))
+    for summary in summaries:
+      _assert_simple_summary(
+          self,
+          {
+              metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension,
+              'dnn/dnn/hiddenlayer_0/fraction_of_zero_values': 0.,
+              'dnn/dnn/hiddenlayer_1/fraction_of_zero_values': 0.5,
+              'dnn/dnn/logits/fraction_of_zero_values': 0.,
+              metric_keys.MetricKeys.LOSS: expected_loss,
+          },
+          summary)
+    _assert_checkpoint(
+        self, base_global_step + num_steps, input_units=input_dimension,
+        hidden_units=hidden_units, output_units=label_dimension,
+        model_dir=self._model_dir)
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index 68befa2a9b..06593f9520 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,1590 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""head python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Abstractions for the head(s) of a model."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import head
+import abc
+import collections
+
+import six
+
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import weights_broadcast_ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.summary import summary
+from tensorflow.python.training import training_util
+from tensorflow.python.util import function_utils
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+# The above default is defined by TF Serving, but these next three are just
+# a local convention without any special meaning.
+_CLASSIFY_SERVING_KEY = 'classification'
+_REGRESS_SERVING_KEY = 'regression'
+_PREDICT_SERVING_KEY = 'predict'
+
+
+# A LossSpec contains
+# * a scalar `Tensor` representing reduced weighted training loss
+# * a `Tensor` representing the unreduced unweighted loss
+# * a `Tensor` representing the example weights
+# * possibly processed labels (e.g. vocabulary lookup, shape manipulation, etc)
+LossSpec = collections.namedtuple(
+    'LossSpec', ['training_loss', 'unreduced_loss', 'weights',
+                 'processed_labels'])
+
+
+def _summary_key(head_name, val):
+  return '%s/%s' % (val, head_name) if head_name else val
+
+
+def _create_eval_metrics_tuple(fn, kwargs):
+  """Creates TPU eval metrics tuple.
+
+  Helper function to make eval_metric tuple (eval_metric_fn, fn_kwargs) used
+  by `TPUEstimator`. TPUEstimator requires that `eval_metric_fn` take
+  exclusively Tensor arguments. This helper can help create such a function from
+  a more generic function that can take both Tensor and non-Tensor arguments.
+
+  Args:
+    fn: A eval_metric_fn that takes both Tensor and non-Tensor arguments.
+        This function must return a dict of form
+        {'metric name': (metric_tensor, eval_op)}
+    kwargs: Dict of arguments for `fn`.
+
+  Returns:
+    `eval_metric` tuple that can be passed to a `model_fn._TPUEstimatorSpec`.
+  """
+  tensor_kwargs = {}
+  nontensor_kwargs = {}
+  for k, v in six.iteritems(kwargs):
+    if tensor_util.is_tensor(v):
+      tensor_kwargs[k] = v
+    else:
+      nontensor_kwargs[k] = v
+  def _fn(**tensors):
+    return fn(**dict(nontensor_kwargs, **tensors))
+  return (_fn, tensor_kwargs)
+
+
+class _Head(object):
+  """Interface for the head/top of a model.
+
+  Given logits (or output of a hidden layer), a Head knows how to compute
+  predictions, loss, train_op, metrics and export outputs. It is meant to:
+
+  1. Simplify writing model_fn and to make model_fn more configurable
+  2. Support wide range of machine learning models. Since most heads can work
+     with logits, they can support DNN, RNN, Wide, Wide&Deep,
+     Global objectives, Gradient boosted trees and many other types
+     of machine learning models.
+
+  Common usage:
+  Here is simplified model_fn to build a DNN regression model.
+    ```python
+    def _my_dnn_model_fn(features, labels, mode, params, config=None):
+      # Optionally your callers can pass head to model_fn as a param.
+      head = tf.contrib.estimator.regression_head(...)
+      inputs = tf.feature_column.input_layer(features, ...)
+      hidden_layer0 = tf.layers.dense(
+          inputs, units=1000, activation=tf.nn.relu)
+      hidden_layer1 = tf.layers.dense(
+          hidden_layer0, units=500, activation=tf.nn.relu)
+      logits = tf.layers.dense(
+          hidden_layer1, units=head.logits_dimension, activation=None)
+
+      return head.create_estimator_spec(
+          features=features,
+          labels=labels,
+          mode=mode,
+          logits=logits,
+          optimizer=optimizer)
+    ```
+
+  There are cases where computing and applying gradients can not be meaningfully
+  captured with optimizer or train_op_fn we support (for example, with sync
+  optimizer). In such case, you can take the responsibility on your own. Here is
+  a common use case,
+    ```python
+    estimator_spec = head.create_estimator_spec(
+        features=features,
+        labels=labels,
+        mode=mode,
+        logits=logits,
+        train_op_fn=lambda _: tf.no_op())
+    if mode == model_fn.ModeKeys.TRAIN:
+      optimizer = ...
+      sync = tf.train.SyncReplicasOptimizer(opt=optimizer, ...)
+      update_op = sync.minimize(
+          estimator_spec.loss, global_step=tf.get_global_step())
+      hooks = [sync.make_session_run_hook(is_chief)]
+      ... update train_op and hooks in EstimatorSpec and return
+    ```
+  """
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractproperty
+  def name(self):
+    """The name of this head.
+
+    Returns:
+      A string.
+    """
+    raise NotImplementedError('Calling an abstract method.')
+
+  @abc.abstractproperty
+  def logits_dimension(self):
+    """Size of the last dimension of the logits `Tensor`.
+
+    Typically, logits is of shape `[batch_size, logits_dimension]`.
+
+    Returns:
+      The expected size of the `logits` tensor.
+    """
+    raise NotImplementedError('Calling an abstract method.')
+
+  @abc.abstractmethod
+  def create_loss(self, features, mode, logits, labels):
+    """Returns a loss Tensor from provided logits.
+
+    This function is designed to be used by framework developers.  Almost all
+    users should use create_estimator_spec(), which calls this internally.
+    `mode` and `features` are most likely not used, but some Head
+    implementations may require them.
+
+    Args:
+      features: Input `dict` of `Tensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` to be used for loss construction.
+      labels: Labels `Tensor`, or `dict` of same.
+
+    Returns:
+      A LossSpec that contains
+      * the scalar `Tensor` representing reduced weighted training loss
+      * the `Tensor` representing the unreduced unweighted loss
+      * the `Tensor` representing the example weights
+      * possibly processed labels (e.g. vocabulary lookup, shape manipulation,
+        etc.)
+
+      To be extendable in the future.
+    """
+    raise NotImplementedError('Calling an abstract method.')
+
+  # TODO(b/65403806): By default, collect regularization_losses from
+  # GraphKeys.REGULARIZATION_LOSSES collection.
+  def create_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns `EstimatorSpec` that a model_fn can return.
+
+    Please note that,
+    + All args must be passed via name.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` to be used by the head.
+      labels: Labels `Tensor`, or `dict` of same.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns an op
+        to optimize the model with the loss in TRAIN mode. Used if `optimizer`
+        is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in
+        TRAIN mode. None is allowed in other modes. If you want to optimize loss
+        yourself you can pass `lambda _: tf.no_op()` and then use
+        EstimatorSpec.loss to compute and apply gradients.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses.
+
+    Returns:
+      `EstimatorSpec`.
+    """
+    try:
+      tpu_estimator_spec = (
+          self._create_tpu_estimator_spec(
+              features, mode, logits, labels, optimizer, train_op_fn,
+              regularization_losses))
+      return tpu_estimator_spec.as_estimator_spec()
+    except NotImplementedError:
+      # Not all subclasses of _Head will have implemented
+      # _create_tpu_estimator_spec. If it is implemented, we can use it to
+      # create our `EstimatorSpec` here.
+      raise NotImplementedError(
+          'Subclasses of _Head must implement `create_estimator_spec()` or '
+          '_create_tpu_estimator_spec().')
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns `model_fn._TPUEstimatorSpec` that a model_fn can return.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` to be used by the head.
+      labels: Labels `Tensor`, or `dict` of same.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns an op
+        to optimize the model with the loss in TRAIN mode. Used if `optimizer`
+        is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in
+        TRAIN mode. None is allowed in other modes. If you want to optimize loss
+        yourself you can pass `lambda _: tf.no_op()` and then use
+        EstimatorSpec.loss to compute and apply gradients.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses.
+
+    Returns:
+      A `model_fn._TPUEstimatorSpec' instance.
+    """
+    raise NotImplementedError(
+        'TPUEstimatorSpec not available for this model head.')
+
+
+def _check_dense_labels_match_logits_and_reshape(
+    labels, logits, expected_labels_dimension):
+  """Checks that labels shape matches logits and reshapes if needed.
+
+  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Then labels
+  shape must be [D0, D1, ... DN, expected_labels_dimension].
+  If expected_labels_dimension=1, labels could be [D0, D1, ... DN] and this
+  method reshapes them to [D0, D1, ... DN, 1].
+
+  Args:
+    labels: labels Tensor.
+    logits: logits Tensor.
+    expected_labels_dimension: Integer.
+  Returns:
+    Validated and reshaped labels Tensor.
+  Raises:
+    ValueError: If labels is a SparseTensor.
+    ValueError: If labels shape is statically defined and fails validation.
+    OpError: If labels shape is not statically defined and fails validation.
+  """
+  if labels is None:
+    raise ValueError(
+        'You must provide a labels Tensor. Given: None. '
+        'Suggested troubleshooting steps: Check that your data contain '
+        'your label feature. Check that your input_fn properly parses and '
+        'returns labels.')
+  with ops.name_scope(None, 'labels', (labels, logits)) as scope:
+    labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels)
+    if isinstance(labels, sparse_tensor.SparseTensor):
+      raise ValueError(
+          'SparseTensor labels are not supported. '
+          'labels must be a Tensor of shape [D0, D1, ..., DN, %s], '
+          'e.g. [batch_size, %s]. '
+          'Suggested Fix (1): Check the label feature in your data. '
+          'Each example must contain %s value(s). If not, your choice of label '
+          'was probably incorrect. '
+          'Suggested Fix (2): In your input_fn, use '
+          'tf.sparse_tensor_to_dense() to turn labels into a Tensor.'
+          '' % (expected_labels_dimension, expected_labels_dimension,
+                expected_labels_dimension))
+    if (labels.shape.ndims is not None and logits.shape.ndims is not None and
+        labels.shape.ndims == logits.shape.ndims - 1):
+      labels = array_ops.expand_dims(labels, -1)
+    labels_shape = array_ops.shape(labels)
+    logits_shape = array_ops.shape(logits)
+    err_msg = (
+        'labels shape must be [D0, D1, ... DN, {}]. '
+        'Suggested Fix: check your n_classes argument to the estimator '
+        'and/or the shape of your label.'.format(expected_labels_dimension))
+    assert_rank = check_ops.assert_rank_at_least(labels, 2, message=err_msg)
+    with ops.control_dependencies([assert_rank]):
+      static_shape = labels.shape
+      if static_shape.ndims is not None:
+        dim1 = static_shape[-1]
+        if (dim1 is not None) and (dim1 != expected_labels_dimension):
+          raise ValueError(
+              'Mismatched label shape. '
+              'Expected labels dimension=%s.  Received %s. '
+              'Suggested Fix:'
+              'If your classifier expects one-hot encoding label,'
+              'check your n_classes argument to the estimator '
+              'and/or the shape of your label. '
+              'Otherwise, check the shape of your label.' %
+              (expected_labels_dimension, dim1))
+      expected_labels_shape = array_ops.concat(
+          [logits_shape[:-1], [expected_labels_dimension]], axis=0)
+      assert_dimension = check_ops.assert_equal(
+          expected_labels_shape, labels_shape, message=err_msg,
+          data=['expected_labels_shape: ', expected_labels_shape,
+                'labels_shape: ', labels_shape])
+      with ops.control_dependencies([assert_dimension]):
+        return array_ops.identity(labels, name=scope)
+
+
+def _get_weights_and_check_match_logits(
+    features, weight_column, logits, allow_per_logit_weights=False):
+  """Fetches weights from features and checks that the shape matches logits.
+
+  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
+  can be either:
+  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
+  * [D0, D1, ... DN, 1]
+  * [D0, D1, ... DN]: In this case, weights is reshaped into
+    [D0, D1, ... DN, 1] to work with weight broadcasting rules.
+
+  Args:
+    features: The features dict that contains weights.
+    weight_column: The weight column. If not given, this method returns 1.
+    logits: logits Tensor.
+    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
+      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.
+  Returns:
+    Validated and reshaped weights Tensor.
+  Raises:
+    ValueError: If the weights `Tensor` cannot be cast into float.
+  """
+  if allow_per_logit_weights:
+    err_msg = (
+        'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
+        '[D0, D1, ... DN, logits_dimension]')
+  else:
+    err_msg = (
+        'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
+  with ops.name_scope(
+      None, 'weights',
+      values=tuple(six.itervalues(features)) + (logits,)) as scope:
+    # Fetch the weights.
+    if weight_column is None:
+      return 1.
+    if isinstance(weight_column, six.string_types):
+      weight_column = feature_column_lib.numeric_column(
+          key=weight_column, shape=(1,))
+    if not isinstance(weight_column, feature_column_lib._NumericColumn):  # pylint: disable=protected-access
+      raise TypeError('Weight column must be either a string or _NumericColumn.'
+                      ' Given type: {}.'.format(type(weight_column)))
+    weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
+        feature_column_lib._LazyBuilder(features))  # pylint: disable=protected-access
+    if not (weights.dtype.is_floating or weights.dtype.is_integer):
+      raise ValueError('Weight column should be castable to float. '
+                       'Given dtype: {}'.format(weights.dtype))
+    weights = math_ops.to_float(weights, name='weights')
+
+    # Validate the weights shape.
+    weights_shape = array_ops.shape(weights, name='weights_shape')
+    logits_shape = array_ops.shape(logits, name='logits_shape')
+    if (weights.shape.ndims is not None and logits.shape.ndims is not None and
+        weights.shape.ndims == logits.shape.ndims - 1):
+      assert_dimension = check_ops.assert_equal(
+          logits_shape[:-1], weights_shape, message=err_msg,
+          data=['logits_shape: ', logits_shape,
+                'weights_shape: ', weights_shape])
+      with ops.control_dependencies([assert_dimension]):
+        return array_ops.expand_dims(weights, -1, name=scope)
+    supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0)
+    if allow_per_logit_weights:
+      condition = math_ops.reduce_any(
+          [math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)),
+           math_ops.reduce_all(math_ops.equal(
+               supported_weights_shape, weights_shape))])
+      assert_dimension = control_flow_ops.Assert(
+          condition=condition,
+          data=[err_msg, 'logits_shape: ', logits_shape,
+                'weights_shape: ', weights_shape])
+    else:
+      assert_dimension = check_ops.assert_equal(
+          supported_weights_shape, weights_shape, message=err_msg,
+          data=['logits_shape: ', logits_shape,
+                'weights_shape: ', weights_shape])
+    with ops.control_dependencies([assert_dimension]):
+      return array_ops.identity(weights, name=scope)
+
+
+def _check_logits_final_dim(logits, expected_logits_dimension):
+  """Checks that logits shape is [D0, D1, ... DN, logits_dimension]."""
+  with ops.name_scope(None, 'logits', (logits,)) as scope:
+    logits = math_ops.to_float(logits)
+    logits_shape = array_ops.shape(logits)
+    assert_rank = check_ops.assert_rank_at_least(
+        logits, 2, data=[logits_shape],
+        message='logits shape must be [D0, D1, ... DN, logits_dimension]')
+    with ops.control_dependencies([assert_rank]):
+      static_shape = logits.shape
+      if static_shape.ndims is not None and static_shape[-1] is not None:
+        if static_shape[-1] != expected_logits_dimension:
+          raise ValueError(
+              'logits shape must be [D0, D1, ... DN, logits_dimension], '
+              'got %s.' % (static_shape,))
+        return logits
+      assert_dimension = check_ops.assert_equal(
+          expected_logits_dimension, logits_shape[-1], data=[logits_shape],
+          message='logits shape must be [D0, D1, ... DN, logits_dimension]')
+      with ops.control_dependencies([assert_dimension]):
+        return array_ops.identity(logits, name=scope)
+
+
+def _validate_loss_fn_args(loss_fn):
+  """Validates loss_fn arguments.
+
+  Required arguments: labels, logits.
+  Optional arguments: features.
+
+  Args:
+    loss_fn: The loss function.
+  Raises:
+    ValueError: If the signature is unexpected.
+  """
+  loss_fn_args = function_utils.fn_args(loss_fn)
+  for required_arg in ['labels', 'logits']:
+    if required_arg not in loss_fn_args:
+      raise ValueError(
+          'loss_fn must contain argument: {}. '
+          'Given arguments: {}'.format(required_arg, loss_fn_args))
+  invalid_args = list(set(loss_fn_args) - set(['labels', 'logits', 'features']))
+  if invalid_args:
+    raise ValueError('loss_fn has unexpected args: {}'.format(invalid_args))
+
+
+def _call_loss_fn(loss_fn, labels, logits, features, expected_loss_dim=1):
+  """Calls loss_fn and checks the returned shape.
+
+  Args:
+    loss_fn: The loss function.
+    labels: Processed labels Tensor.
+    logits: Logits Tensor of shape [D0, D1, ... DN, logits_dimension].
+    features: Features dict.
+    expected_loss_dim: The expected last dimension of loss Tensor.
+  Returns:
+    Loss Tensor with shape [D0, D1, ... DN, expected_loss_dim].
+  """
+  loss_fn_args = function_utils.fn_args(loss_fn)
+  kwargs = {}
+  if 'features' in loss_fn_args:
+    kwargs['features'] = features
+  with ops.name_scope(
+      None, 'call_loss_fn',
+      values=[labels, logits] + list(six.itervalues(features))):
+    unweighted_loss = loss_fn(labels=labels, logits=logits, **kwargs)
+    logits_shape = array_ops.shape(logits, name='logits_shape')
+    expected_loss_shape = array_ops.concat(
+        [logits_shape[:-1], [expected_loss_dim]], axis=0,
+        name='expected_loss_shape')
+    loss_shape = array_ops.shape(unweighted_loss, name='loss_shape')
+    check_loss_shape_op = control_flow_ops.Assert(
+        math_ops.reduce_all(math_ops.equal(loss_shape, expected_loss_shape)),
+        data=[
+            'loss_fn must return Tensor of shape '
+            '[D0, D1, ... DN, {}]. '.format(expected_loss_dim),
+            'logits_shape: ', logits_shape, 'loss_shape: ', loss_shape],
+        name='check_loss_shape')
+    with ops.control_dependencies([check_loss_shape_op]):
+      return array_ops.identity(unweighted_loss)
+
+
+def _indicator_labels_mean(labels, weights=None, name=None):
+  with ops.name_scope(name, 'labels_mean', (labels, weights)) as scope:
+    labels = math_ops.to_float(labels, name='labels')
+    if weights is not None:
+      weights = weights_broadcast_ops.broadcast_weights(weights, labels)
+    return metrics_lib.mean(labels, weights=weights, name=scope)
+
+
+def _classification_output(scores, n_classes, label_vocabulary=None):
+  batch_size = array_ops.shape(scores)[0]
+  if label_vocabulary:
+    export_class_list = label_vocabulary
+  else:
+    export_class_list = string_ops.as_string(math_ops.range(n_classes))
+  export_output_classes = array_ops.tile(
+      input=array_ops.expand_dims(input=export_class_list, axis=0),
+      multiples=[batch_size, 1])
+  return export_output.ClassificationOutput(
+      scores=scores,
+      # `ClassificationOutput` requires string classes.
+      classes=export_output_classes)
+
+
+def _accuracy_baseline(labels_mean):
+  """Return accuracy baseline based on labels mean.
+
+  This is the best the model could do by always predicting one class.
+
+  Args:
+    labels_mean: Tuple of value and update op.
+
+  Returns:
+    Tuple of value and update op.
+  """
+  with ops.name_scope(None, 'accuracy_baseline', labels_mean):
+    value, update_op = labels_mean
+    return (
+        math_ops.maximum(value, 1. - value, name='value'),
+        math_ops.maximum(update_op, 1 - update_op, name='update_op'))
+
+
+def _predictions_mean(predictions, weights=None, name=None):
+  with ops.name_scope(
+      name, 'predictions_mean', (predictions, weights)) as scope:
+    predictions = math_ops.to_float(predictions, name='predictions')
+    if weights is not None:
+      weights = weights_broadcast_ops.broadcast_weights(weights, predictions)
+    return metrics_lib.mean(predictions, weights=weights, name=scope)
+
+
+def _auc(labels, predictions, weights=None, curve='ROC', name=None):
+  with ops.name_scope(name, 'auc', (predictions, labels, weights)) as scope:
+    predictions = math_ops.to_float(predictions, name='predictions')
+    if weights is not None:
+      weights = weights_broadcast_ops.broadcast_weights(weights, predictions)
+    return metrics_lib.auc(
+        labels=labels, predictions=predictions, weights=weights, curve=curve,
+        name=scope)
+
+
+def _accuracy_at_threshold(labels, predictions, weights, threshold, name=None):
+  with ops.name_scope(
+      name, 'accuracy_at_%s' % threshold,
+      (predictions, labels, weights, threshold)) as scope:
+    threshold_predictions = math_ops.to_float(
+        math_ops.greater_equal(predictions, threshold))
+    return metrics_lib.accuracy(
+        labels=labels, predictions=threshold_predictions, weights=weights,
+        name=scope)
+
+
+def _precision_at_threshold(labels, predictions, weights, threshold, name=None):
+  with ops.name_scope(
+      name, 'precision_at_%s' % threshold,
+      (predictions, labels, weights, threshold)) as scope:
+    precision_tensor, update_op = metrics_lib.precision_at_thresholds(
+        labels=labels, predictions=predictions, thresholds=(threshold,),
+        weights=weights, name=scope)
+    return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
+
+
+def _recall_at_threshold(labels, predictions, weights, threshold, name=None):
+  with ops.name_scope(
+      name, 'recall_at_%s' % threshold,
+      (predictions, labels, weights, threshold)) as scope:
+    precision_tensor, update_op = metrics_lib.recall_at_thresholds(
+        labels=labels, predictions=predictions, thresholds=(threshold,),
+        weights=weights, name=scope)
+    return array_ops.squeeze(precision_tensor), array_ops.squeeze(update_op)
+
+
+def _multi_class_head_with_softmax_cross_entropy_loss(
+    n_classes,
+    weight_column=None,
+    label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM,
+    loss_fn=None,
+    name=None):
+  """Creates a '_Head' for multi class classification.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, n_classes]`.
+  In many applications, the shape is `[batch_size, n_classes]`.
+
+  `labels` must be a dense `Tensor` with shape matching `logits`, namely
+  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
+  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
+  `labels` must be an integer `Tensor` with values specifying the class index.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
+
+  The loss is the weighted sum over the input dimensions. Namely, if the input
+  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
+  `batch_size`.
+
+  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support integer `labels` with
+  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
+  the input labels before passing them to `loss_fn`.
+
+  Args:
+    n_classes: Number of classes, must be greater than 2 (for 2 classes, use
+      `_BinaryLogisticHeadWithSigmoidCrossEntropyLoss`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    label_vocabulary: A list or tuple of strings representing possible label
+      values. If it is not given, that means labels are already encoded as an
+      integer within [0, n_classes). If given, labels must be of string type and
+      have any value in `label_vocabulary`. Note that errors will be raised if
+      `label_vocabulary` is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+    loss_fn: Optional loss function.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for multi class classification.
+
+  Raises:
+    ValueError: If `n_classes`, `label_vocabulary` or `loss_reduction` is
+      invalid.
+  """
+  if label_vocabulary is not None and not isinstance(label_vocabulary,
+                                                     (list, tuple)):
+    raise ValueError(
+        'label_vocabulary should be a list or a tuple. Given type: {}'.format(
+            type(label_vocabulary)))
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
+  if loss_fn:
+    _validate_loss_fn_args(loss_fn)
+  return _MultiClassHeadWithSoftmaxCrossEntropyLoss(
+      n_classes=n_classes,
+      weight_column=weight_column,
+      label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      name=name)
+
+
+class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head):
+  """See `_multi_class_head_with_softmax_cross_entropy_loss`."""
+
+  def __init__(self,
+               n_classes,
+               weight_column=None,
+               label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM,
+               loss_fn=None,
+               name=None):
+    if (n_classes is None) or (n_classes <= 2):
+      raise ValueError('n_classes must be > 2: %s.' % n_classes)
+    self._n_classes = n_classes
+    self._weight_column = weight_column
+    self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
+    self._loss_fn = loss_fn
+    self._name = name
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def logits_dimension(self):
+    return self._n_classes
+
+  def _eval_metric_ops(
+      self, labels, class_ids, weights, unreduced_loss, regularization_loss):
+    """Returns the Eval metric ops."""
+    with ops.name_scope(
+        None, 'metrics',
+        (labels, class_ids, weights, unreduced_loss, regularization_loss)):
+      keys = metric_keys.MetricKeys
+      metric_ops = {
+          # Estimator already adds a metric for loss.
+          # TODO(xiejw): Any other metrics?
+          _summary_key(self._name, keys.LOSS_MEAN):
+              metrics_lib.mean(
+                  values=unreduced_loss,
+                  weights=weights,
+                  name=keys.LOSS_MEAN),
+          _summary_key(self._name, keys.ACCURACY):
+              metrics_lib.accuracy(
+                  labels=labels,
+                  predictions=class_ids,
+                  weights=weights,
+                  name=keys.ACCURACY),
+      }
+      if regularization_loss is not None:
+        metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = (
+            metrics_lib.mean(
+                values=regularization_loss,
+                name=keys.LOSS_REGULARIZATION))
+    return metric_ops
+
+  def _label_ids(self, labels):
+    """Converts labels to integer id space."""
+    if self._label_vocabulary is None:
+      if not labels.dtype.is_integer:
+        raise ValueError('Labels dtype should be integer. Instead got {}.'.
+                         format(labels.dtype))
+      label_ids = labels
+    else:
+      if labels.dtype != dtypes.string:
+        raise ValueError('Labels dtype should be string if there is a '
+                         'vocabulary. Instead got {}'.format(labels.dtype))
+      label_ids = lookup_ops.index_table_from_tensor(
+          vocabulary_list=tuple(self._label_vocabulary),
+          name='class_id_lookup').lookup(labels)
+    return _assert_range(label_ids, self._n_classes)
+
+  def create_loss(self, features, mode, logits, labels):
+    """See `Head`."""
+    del mode  # Unused for this head.
+    logits = ops.convert_to_tensor(logits)
+    labels = _check_dense_labels_match_logits_and_reshape(
+        labels=labels, logits=logits, expected_labels_dimension=1)
+    label_ids = self._label_ids(labels)
+    if self._loss_fn:
+      unweighted_loss = _call_loss_fn(
+          loss_fn=self._loss_fn, labels=label_ids, logits=logits,
+          features=features, expected_loss_dim=1)
+    else:
+      unweighted_loss = losses.sparse_softmax_cross_entropy(
+          labels=label_ids, logits=logits, reduction=losses.Reduction.NONE)
+      # Restore the squeezed dim, so unweighted_loss matches the weights shape.
+      unweighted_loss = array_ops.expand_dims(unweighted_loss, axis=-1)
+    weights = _get_weights_and_check_match_logits(
+        features=features, weight_column=self._weight_column, logits=logits)
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
+    return LossSpec(
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
+        processed_labels=label_ids)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns a `model_fn._TPUEstimatorSpec`.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`.
+        For many applications, the shape is `[batch_size, logits_dimension]`.
+      labels: Labels integer or string `Tensor` with shape matching `logits`,
+        namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is
+        required argument when `mode` equals `TRAIN` or `EVAL`.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns
+        `train_op`. Used if `optimizer` is `None`.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses. These losses are
+        usually expressed as a batch average, so for best results users need to
+        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
+        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
+        avoid scaling errors.
+    Returns:
+      A `model_fn._TPUEstimatorSpec` instance.
+    Raises:
+      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
+        mode, or if both are set.
+    """
+    with ops.name_scope(self._name, 'head'):
+      logits = _check_logits_final_dim(logits, self.logits_dimension)
+
+      # Predict.
+      pred_keys = prediction_keys.PredictionKeys
+      with ops.name_scope(None, 'predictions', (logits,)):
+        # class_ids's shape is [D0, D1, ... DN].
+        class_ids = math_ops.argmax(logits, axis=-1, name=pred_keys.CLASS_IDS)
+        class_ids = array_ops.expand_dims(class_ids, axis=-1)
+        if self._label_vocabulary:
+          table = lookup_ops.index_to_string_table_from_tensor(
+              vocabulary_list=self._label_vocabulary,
+              name='class_string_lookup')
+          classes = table.lookup(class_ids)
+        else:
+          classes = string_ops.as_string(class_ids, name='str_classes')
+
+        probabilities = nn.softmax(logits, name=pred_keys.PROBABILITIES)
+        predictions = {
+            pred_keys.LOGITS: logits,
+            pred_keys.PROBABILITIES: probabilities,
+            # Expand to [batch_size, 1]
+            pred_keys.CLASS_IDS: class_ids,
+            pred_keys.CLASSES: classes,
+        }
+      if mode == model_fn.ModeKeys.PREDICT:
+        classifier_output = _classification_output(
+            scores=probabilities, n_classes=self._n_classes,
+            label_vocabulary=self._label_vocabulary)
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs={
+                _DEFAULT_SERVING_KEY: classifier_output,
+                _CLASSIFY_SERVING_KEY: classifier_output,
+                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
+            })
+
+      training_loss, unreduced_loss, weights, label_ids = self.create_loss(
+          features=features, mode=mode, logits=logits, labels=labels)
+      if regularization_losses:
+        regularization_loss = math_ops.add_n(regularization_losses)
+        regularized_training_loss = math_ops.add_n(
+            [training_loss, regularization_loss])
+      else:
+        regularization_loss = None
+        regularized_training_loss = training_loss
+      # Eval.
+      if mode == model_fn.ModeKeys.EVAL:
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=regularized_training_loss,
+            eval_metrics=_create_eval_metrics_tuple(self._eval_metric_ops, {
+                'labels': label_ids,
+                'class_ids': class_ids,
+                'weights': weights,
+                'unreduced_loss': unreduced_loss,
+                'regularization_loss': regularization_loss
+            }))
+
+      # Train.
+      if optimizer is not None:
+        if train_op_fn is not None:
+          raise ValueError('train_op_fn and optimizer cannot both be set.')
+        train_op = optimizer.minimize(
+            regularized_training_loss,
+            global_step=training_util.get_global_step())
+      elif train_op_fn is not None:
+        train_op = train_op_fn(regularized_training_loss)
+      else:
+        raise ValueError('train_op_fn and optimizer cannot both be None.')
+      train_op = _append_update_ops(train_op)
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
+    with ops.name_scope(''):
+      keys = metric_keys.MetricKeys
+      summary.scalar(
+          _summary_key(self._name, keys.LOSS),
+          regularized_training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_MEAN),
+            mean_loss)
+      if regularization_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_REGULARIZATION),
+            regularization_loss)
+    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+        mode=model_fn.ModeKeys.TRAIN,
+        predictions=predictions,
+        loss=regularized_training_loss,
+        train_op=train_op)
+
+
+def _binary_logistic_head_with_sigmoid_cross_entropy_loss(
+    weight_column=None,
+    thresholds=None,
+    label_vocabulary=None,
+    loss_reduction=losses.Reduction.SUM,
+    loss_fn=None,
+    name=None):
+  """Creates a `_Head` for single label binary classification.
+
+  This head uses `sigmoid_cross_entropy_with_logits` loss.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, 1]`.
+  In many applications, the shape is `[batch_size, 1]`.
+
+  `labels` must be a dense `Tensor` with shape matching `logits`, namely
+  `[D0, D1, ... DN, 1]`. If `label_vocabulary` given, `labels` must be a string
+  `Tensor` with values from the vocabulary. If `label_vocabulary` is not given,
+  `labels` must be float `Tensor` with values in the interval `[0, 1]`.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`.
+
+  The loss is the weighted sum over the input dimensions. Namely, if the input
+  labels have shape `[batch_size, 1]`, the loss is the weighted sum over
+  `batch_size`.
+
+  Also supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, 1]`. `loss_fn` must support float `labels` with
+  shape `[D0, D1, ... DN, 1]`. Namely, the head applies `label_vocabulary` to
+  the input labels before passing them to `loss_fn`.
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    thresholds: Iterable of floats in the range `(0, 1)`. For binary
+      classification metrics such as precision and recall, an eval metric is
+      generated for each threshold value. This threshold is applied to the
+      logistic values to determine the binary classification (i.e., above the
+      threshold is `true`, below is `false`.
+    label_vocabulary: A list or tuple of strings representing possible label
+      values. If it is not given, that means labels are already encoded within
+      [0, 1]. If given, labels must be string type and have any value in
+      `label_vocabulary`. Note that errors will be raised if `label_vocabulary`
+      is not provided but labels are strings.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+    loss_fn: Optional loss function.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for binary classification.
+
+  Raises:
+    ValueError: If `thresholds` contains a value outside of `(0, 1)`.
+    ValueError: If `loss_reduction` is invalid.
+    TypeError: if `label_vocabulary` has invalid type.
+  """
+  thresholds = tuple(thresholds) if thresholds else tuple()
+  if label_vocabulary is not None and not isinstance(label_vocabulary,
+                                                     (list, tuple)):
+    raise TypeError(
+        'label_vocabulary should be a list or tuple. Given type: {}'.format(
+            type(label_vocabulary)))
+
+  for threshold in thresholds:
+    if (threshold <= 0.0) or (threshold >= 1.0):
+      raise ValueError('thresholds not in (0, 1): {}.'.format((thresholds,)))
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
+  if loss_fn:
+    _validate_loss_fn_args(loss_fn)
+  return _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(
+      weight_column=weight_column,
+      thresholds=thresholds,
+      label_vocabulary=label_vocabulary,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      name=name)
+
+
+class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head):
+  """See `_binary_logistic_head_with_sigmoid_cross_entropy_loss`."""
+
+  def __init__(self,
+               weight_column=None,
+               thresholds=None,
+               label_vocabulary=None,
+               loss_reduction=losses.Reduction.SUM,
+               loss_fn=None,
+               name=None):
+    self._weight_column = weight_column
+    self._thresholds = thresholds
+    self._label_vocabulary = label_vocabulary
+    self._loss_reduction = loss_reduction
+    self._loss_fn = loss_fn
+    self._name = name
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def logits_dimension(self):
+    return 1
+
+  def _eval_metric_ops(self, labels, logits, logistic, class_ids, weights,
+                       unreduced_loss, regularization_loss):
+    with ops.name_scope(None, 'metrics',
+                        (labels, logits, logistic, class_ids, weights,
+                         unreduced_loss, regularization_loss)):
+      keys = metric_keys.MetricKeys
+      labels_mean = _indicator_labels_mean(
+          labels=labels, weights=weights, name=keys.LABEL_MEAN)
+      metric_ops = {
+          # Estimator already adds a metric for loss.
+          _summary_key(self._name, keys.LOSS_MEAN):
+              metrics_lib.mean(
+                  values=unreduced_loss,
+                  weights=weights,
+                  name=keys.LOSS_MEAN),
+          _summary_key(self._name, keys.ACCURACY):
+              metrics_lib.accuracy(
+                  labels=labels,
+                  predictions=class_ids,
+                  weights=weights,
+                  name=keys.ACCURACY),
+          _summary_key(self._name, keys.PRECISION):
+              metrics_lib.precision(
+                  labels=labels,
+                  predictions=class_ids,
+                  weights=weights,
+                  name=keys.PRECISION),
+          _summary_key(self._name, keys.RECALL):
+              metrics_lib.recall(
+                  labels=labels,
+                  predictions=class_ids,
+                  weights=weights,
+                  name=keys.RECALL),
+          _summary_key(self._name, keys.PREDICTION_MEAN):
+              _predictions_mean(
+                  predictions=logistic,
+                  weights=weights,
+                  name=keys.PREDICTION_MEAN),
+          _summary_key(self._name, keys.LABEL_MEAN):
+              labels_mean,
+          _summary_key(self._name, keys.ACCURACY_BASELINE):
+              _accuracy_baseline(labels_mean),
+          _summary_key(self._name, keys.AUC):
+              _auc(
+                  labels=labels,
+                  predictions=logistic,
+                  weights=weights,
+                  name=keys.AUC),
+          _summary_key(self._name, keys.AUC_PR):
+              _auc(
+                  labels=labels,
+                  predictions=logistic,
+                  weights=weights,
+                  curve='PR',
+                  name=keys.AUC_PR)
+      }
+      if regularization_loss is not None:
+        metric_ops[_summary_key(self._name, keys.LOSS_REGULARIZATION)] = (
+            metrics_lib.mean(
+                values=regularization_loss,
+                name=keys.LOSS_REGULARIZATION))
+      for threshold in self._thresholds:
+        accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold
+        metric_ops[_summary_key(self._name,
+                                accuracy_key)] = _accuracy_at_threshold(
+                                    labels=labels,
+                                    predictions=logistic,
+                                    weights=weights,
+                                    threshold=threshold,
+                                    name=accuracy_key)
+        # Precision for positive examples.
+        precision_key = keys.PRECISION_AT_THRESHOLD % threshold
+        metric_ops[_summary_key(self._name,
+                                precision_key)] = _precision_at_threshold(
+                                    labels=labels,
+                                    predictions=logistic,
+                                    weights=weights,
+                                    threshold=threshold,
+                                    name=precision_key)
+        # Recall for positive examples.
+        recall_key = keys.RECALL_AT_THRESHOLD % threshold
+        metric_ops[_summary_key(self._name,
+                                recall_key)] = _recall_at_threshold(
+                                    labels=labels,
+                                    predictions=logistic,
+                                    weights=weights,
+                                    threshold=threshold,
+                                    name=recall_key)
+      return metric_ops
+
+  def create_loss(self, features, mode, logits, labels):
+    """See `Head`."""
+    del mode  # Unused for this head.
+    logits = ops.convert_to_tensor(logits)
+    labels = _check_dense_labels_match_logits_and_reshape(
+        labels=labels, logits=logits, expected_labels_dimension=1)
+    if self._label_vocabulary is not None:
+      labels = lookup_ops.index_table_from_tensor(
+          vocabulary_list=tuple(self._label_vocabulary),
+          name='class_id_lookup').lookup(labels)
+    labels = math_ops.to_float(labels)
+    labels = _assert_range(labels, n_classes=2)
+    if self._loss_fn:
+      unweighted_loss = _call_loss_fn(
+          loss_fn=self._loss_fn, labels=labels, logits=logits,
+          features=features, expected_loss_dim=1)
+    else:
+      unweighted_loss = nn.sigmoid_cross_entropy_with_logits(
+          labels=labels, logits=logits)
+    weights = _get_weights_and_check_match_logits(
+        features=features, weight_column=self._weight_column, logits=logits)
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
+    return LossSpec(
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
+        processed_labels=labels)
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns an `EstimatorSpec`.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` with shape `[D0, D1, ... DN, 1]`. For many
+        applications, the shape is `[batch_size, 1]`.
+      labels: Labels integer or string `Tensor` with shape matching `logits`,
+        namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required
+        argument when `mode` equals `TRAIN` or `EVAL`.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns
+        `train_op`. Used if `optimizer` is `None`.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses. These losses are
+        usually expressed as a batch average, so for best results users need to
+        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
+        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
+        avoid scaling errors.
+    Returns:
+      `EstimatorSpec`.
+    Raises:
+      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
+        mode, or if both are set.
+    """
+    # Predict.
+    with ops.name_scope(self._name, 'head'):
+      with ops.name_scope(None, 'predictions', (logits,)):
+        pred_keys = prediction_keys.PredictionKeys
+        logits = _check_logits_final_dim(logits, self.logits_dimension)
+        logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
+        two_class_logits = array_ops.concat(
+            (array_ops.zeros_like(logits), logits),
+            axis=-1, name='two_class_logits')
+        probabilities = nn.softmax(
+            two_class_logits, name=pred_keys.PROBABILITIES)
+        class_ids = math_ops.argmax(
+            two_class_logits, axis=-1, name=pred_keys.CLASS_IDS)
+        class_ids = array_ops.expand_dims(class_ids, axis=-1)
+        if self._label_vocabulary:
+          table = lookup_ops.index_to_string_table_from_tensor(
+              vocabulary_list=self._label_vocabulary,
+              name='class_string_lookup')
+          classes = table.lookup(class_ids)
+        else:
+          classes = string_ops.as_string(class_ids, name='str_classes')
+        predictions = {
+            pred_keys.LOGITS: logits,
+            pred_keys.LOGISTIC: logistic,
+            pred_keys.PROBABILITIES: probabilities,
+            pred_keys.CLASS_IDS: class_ids,
+            pred_keys.CLASSES: classes,
+        }
+      if mode == model_fn.ModeKeys.PREDICT:
+        classifier_output = _classification_output(
+            scores=probabilities, n_classes=2,
+            label_vocabulary=self._label_vocabulary)
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs={
+                _DEFAULT_SERVING_KEY: classifier_output,
+                _CLASSIFY_SERVING_KEY: classifier_output,
+                _REGRESS_SERVING_KEY: export_output.RegressionOutput(
+                    value=logistic),
+                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
+            })
+
+      (training_loss, unreduced_loss, weights, processed_labels) = (
+          self.create_loss(
+              features=features, mode=mode, logits=logits, labels=labels))
+      if regularization_losses:
+        regularization_loss = math_ops.add_n(regularization_losses)
+        regularized_training_loss = math_ops.add_n(
+            [training_loss, regularization_loss])
+      else:
+        regularization_loss = None
+        regularized_training_loss = training_loss
+
+      # Eval.
+      if mode == model_fn.ModeKeys.EVAL:
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=regularized_training_loss,
+            eval_metrics=_create_eval_metrics_tuple(
+                self._eval_metric_ops,
+                {
+                    'labels': processed_labels,
+                    'logits': logits,
+                    'logistic': logistic,
+                    'class_ids': class_ids,
+                    'weights': weights,
+                    'unreduced_loss': unreduced_loss,
+                    'regularization_loss': regularization_loss
+                }
+            ))
+
+      # Train.
+      if optimizer is not None:
+        if train_op_fn is not None:
+          raise ValueError('train_op_fn and optimizer cannot both be set.')
+        train_op = optimizer.minimize(
+            regularized_training_loss,
+            global_step=training_util.get_global_step())
+      elif train_op_fn is not None:
+        train_op = train_op_fn(regularized_training_loss)
+      else:
+        raise ValueError('train_op_fn and optimizer cannot both be None.')
+      train_op = _append_update_ops(train_op)
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
+    with ops.name_scope(''):
+      keys = metric_keys.MetricKeys
+      summary.scalar(
+          _summary_key(self._name, keys.LOSS),
+          regularized_training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_MEAN), mean_loss)
+      if regularization_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_REGULARIZATION),
+            regularization_loss)
+    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+        mode=model_fn.ModeKeys.TRAIN,
+        predictions=predictions,
+        loss=regularized_training_loss,
+        train_op=train_op)
+
+
+def _regression_head(
+    weight_column=None,
+    label_dimension=1,
+    loss_reduction=losses.Reduction.SUM,
+    loss_fn=None,
+    inverse_link_fn=None,
+    name=None):
+  """Creates a `_Head` for regression using the `mean_squared_error` loss.
+
+  The loss is the weighted sum over all input dimensions. Namely, if the input
+  labels have shape `[batch_size, label_dimension]`, the loss is the weighted
+  sum over both `batch_size` and `label_dimension`.
+
+  The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`.
+  In many applications, the shape is `[batch_size, label_dimension]`.
+
+  The `labels` shape must match `logits`, namely
+  `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape
+  `[D0, D1, ... DN]` is also supported.
+
+  If `weight_column` is specified, weights must be of shape
+  `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or
+  `[D0, D1, ... DN, label_dimension]`.
+
+  Supports custom `loss_fn`. `loss_fn` takes `(labels, logits)` or
+  `(labels, logits, features)` as arguments and returns unreduced loss with
+  shape `[D0, D1, ... DN, label_dimension]`.
+
+  Also supports custom `inverse_link_fn`, also known as 'mean function'.
+  `inverse_link_fn` takes `logits` as argument and returns predicted values.
+  This function is the inverse of the link function defined in
+  https://en.wikipedia.org/wiki/Generalized_linear_model#Link_function
+  Namely, for poisson regression, set `inverse_link_fn=tf.exp`.
+
+  Args:
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    label_dimension: Number of regression labels per example. This is the size
+      of the last dimension of the labels `Tensor` (typically, this has shape
+      `[batch_size, label_dimension]`).
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
+      reduce training loss over batch. Defaults to `SUM`.
+    loss_fn: Optional loss function. Defaults to `mean_squared_error`.
+    inverse_link_fn: Optional inverse link function, also known as 'mean
+      function'. Defaults to identity.
+    name: name of the head. If provided, summary and metrics keys will be
+      suffixed by `"/" + name`. Also used as `name_scope` when creating ops.
+
+  Returns:
+    An instance of `_Head` for linear regression.
+
+  Raises:
+    ValueError: If `label_dimension` or `loss_reduction` is invalid.
+  """
+  if (loss_reduction not in losses.Reduction.all() or
+      loss_reduction == losses.Reduction.NONE):
+    raise ValueError('Invalid loss_reduction: {}'.format(loss_reduction))
+  if loss_fn:
+    _validate_loss_fn_args(loss_fn)
+  return _RegressionHeadWithMeanSquaredErrorLoss(
+      weight_column=weight_column,
+      label_dimension=label_dimension,
+      loss_reduction=loss_reduction,
+      loss_fn=loss_fn,
+      inverse_link_fn=inverse_link_fn,
+      name=name)
+
+
+class _RegressionHeadWithMeanSquaredErrorLoss(_Head):
+  """`Head` for regression using the mean squared loss."""
+
+  def __init__(
+      self,
+      label_dimension,
+      weight_column=None,
+      loss_reduction=losses.Reduction.SUM,
+      loss_fn=None,
+      inverse_link_fn=None,
+      name=None):
+    """`Head` for regression."""
+    if label_dimension < 1:
+      raise ValueError('Invalid label_dimension %s.' % label_dimension)
+    self._logits_dimension = label_dimension
+    self._weight_column = weight_column
+    self._loss_reduction = loss_reduction
+    self._loss_fn = loss_fn
+    self._inverse_link_fn = inverse_link_fn
+    self._name = name
+
+  @property
+  def name(self):
+    return self._name
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def create_loss(self, features, mode, logits, labels):
+    """See `Head`."""
+    del mode  # Unused for this head.
+    logits = ops.convert_to_tensor(logits)
+    labels = _check_dense_labels_match_logits_and_reshape(
+        labels=labels, logits=logits,
+        expected_labels_dimension=self._logits_dimension)
+    labels = math_ops.to_float(labels)
+    if self._loss_fn:
+      unweighted_loss = _call_loss_fn(
+          loss_fn=self._loss_fn, labels=labels, logits=logits,
+          features=features, expected_loss_dim=self._logits_dimension)
+    else:
+      unweighted_loss = losses.mean_squared_error(
+          labels=labels, predictions=logits, reduction=losses.Reduction.NONE)
+    weights = _get_weights_and_check_match_logits(
+        features=features, weight_column=self._weight_column, logits=logits,
+        allow_per_logit_weights=True)
+    training_loss = losses.compute_weighted_loss(
+        unweighted_loss, weights=weights, reduction=self._loss_reduction)
+    return LossSpec(
+        training_loss=training_loss,
+        unreduced_loss=unweighted_loss,
+        weights=weights,
+        processed_labels=labels)
+
+  def _eval_metric_ops(self, predicted_value, labels, weights, unreduced_loss,
+                       regularization_loss):
+    """Returns the Eval metric ops."""
+    keys = metric_keys.MetricKeys
+    # Estimator already adds a metric for loss.
+    eval_metric_ops = {
+        _summary_key(self._name, keys.LOSS_MEAN):
+            metrics_lib.mean(values=unreduced_loss, weights=weights),
+        _summary_key(self._name, keys.PREDICTION_MEAN):
+            _predictions_mean(
+                predictions=predicted_value,
+                weights=weights,
+                name=keys.PREDICTION_MEAN),
+        _summary_key(self._name, keys.LABEL_MEAN):
+            metrics_lib.mean(values=labels, weights=weights)
+    }
+    if regularization_loss is not None:
+      regularization_loss_key = _summary_key(
+          self._name, keys.LOSS_REGULARIZATION)
+      eval_metric_ops[regularization_loss_key] = metrics_lib.mean(
+          values=regularization_loss,
+          name=keys.LOSS_REGULARIZATION)
+    return eval_metric_ops
+
+  def _create_tpu_estimator_spec(
+      self, features, mode, logits, labels=None, optimizer=None,
+      train_op_fn=None, regularization_losses=None):
+    """Returns an `EstimatorSpec`.
+
+    Args:
+      features: Input `dict` of `Tensor` or `SparseTensor` objects.
+      mode: Estimator's `ModeKeys`.
+      logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`.
+        For many applications, the shape is `[batch_size, logits_dimension]`.
+      labels: Labels `Tensor` with shape matching `logits`, namely
+        `[D0, D1, ... DN, logits_dimension]`. When `logits_dimension=1`, shape
+        `[D0, D1, ... DN]` is also supported. `labels` is required argument when
+        `mode` equals `TRAIN` or `EVAL`.
+      optimizer: `Optimizer` instance to optimize the loss in TRAIN mode.
+        Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which
+        updates variables and increments `global_step`.
+      train_op_fn: Function that takes a scalar loss `Tensor` and returns
+        `train_op`. Used if `optimizer` is `None`.
+      regularization_losses: A list of additional scalar losses to be added to
+        the training loss, such as regularization losses. These losses are
+        usually expressed as a batch average, so for best results users need to
+        set `loss_reduction=SUM_OVER_BATCH_SIZE` or
+        `loss_reduction=SUM_OVER_NONZERO_WEIGHTS` when creating the head to
+        avoid scaling errors.
+    Returns:
+      A `model_fn._TPUEstimatorSpec` instance.
+    Raises:
+      ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN
+        mode, or if both are set.
+    """
+    # Predict.
+    with ops.name_scope(self._name, 'head'):
+      logits = _check_logits_final_dim(logits, self._logits_dimension)
+      if self._inverse_link_fn:
+        predicted_value = self._inverse_link_fn(logits)
+        predictions = {
+            prediction_keys.PredictionKeys.PREDICTIONS: predicted_value,
+            prediction_keys.PredictionKeys.LOGITS: logits,
+        }
+      else:
+        predicted_value = logits
+        predictions = {
+            prediction_keys.PredictionKeys.PREDICTIONS: predicted_value}
+      if mode == model_fn.ModeKeys.PREDICT:
+        regression_output = export_output.RegressionOutput(
+            value=predicted_value)
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs={
+                _DEFAULT_SERVING_KEY: regression_output,
+                _REGRESS_SERVING_KEY: regression_output,
+                _PREDICT_SERVING_KEY: export_output.PredictOutput(predictions)
+            })
+
+      training_loss, unreduced_loss, weights, _ = self.create_loss(
+          features=features, mode=mode, logits=logits, labels=labels)
+      if regularization_losses:
+        regularization_loss = math_ops.add_n(regularization_losses)
+        regularized_training_loss = math_ops.add_n(
+            [training_loss, regularization_loss])
+      else:
+        regularization_loss = None
+        regularized_training_loss = training_loss
+
+      # Eval.
+      if mode == model_fn.ModeKeys.EVAL:
+        return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=regularized_training_loss,
+            eval_metrics=_create_eval_metrics_tuple(
+                self._eval_metric_ops, {
+                    'predicted_value': predicted_value,
+                    'labels': labels,
+                    'weights': weights,
+                    'unreduced_loss': unreduced_loss,
+                    'regularization_loss': regularization_loss,
+                }))
+
+      # Train.
+      if optimizer is not None:
+        if train_op_fn is not None:
+          raise ValueError('train_op_fn and optimizer cannot both be set.')
+        train_op = optimizer.minimize(
+            regularized_training_loss,
+            global_step=training_util.get_global_step())
+      elif train_op_fn is not None:
+        train_op = train_op_fn(regularized_training_loss)
+      else:
+        raise ValueError('train_op_fn and optimizer cannot both be None.')
+      train_op = _append_update_ops(train_op)
+      # Only summarize mean_loss for SUM reduction to preserve backwards
+      # compatibility. Otherwise skip it to avoid unnecessary computation.
+      if self._loss_reduction == losses.Reduction.SUM:
+        example_weight_sum = math_ops.reduce_sum(
+            weights * array_ops.ones_like(unreduced_loss))
+        mean_loss = training_loss / example_weight_sum
+      else:
+        mean_loss = None
+    with ops.name_scope(''):
+      keys = metric_keys.MetricKeys
+      summary.scalar(
+          _summary_key(self._name, keys.LOSS),
+          regularized_training_loss)
+      if mean_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_MEAN), mean_loss)
+      if regularization_loss is not None:
+        summary.scalar(
+            _summary_key(self._name, keys.LOSS_REGULARIZATION),
+            regularization_loss)
+    return model_fn._TPUEstimatorSpec(  # pylint: disable=protected-access
+        mode=model_fn.ModeKeys.TRAIN,
+        predictions=predictions,
+        loss=regularized_training_loss,
+        train_op=train_op)
+
+
+def _append_update_ops(train_op):
+  """Returns `train_op` appending `UPDATE_OPS` collection if present."""
+  update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
+  if update_ops:
+    return control_flow_ops.group(train_op, *update_ops)
+  return train_op
+
+
+def _assert_range(labels, n_classes, message=None):
+  with ops.name_scope(None, 'assert_range', (labels,)):
+    assert_less = check_ops.assert_less_equal(
+        labels,
+        ops.convert_to_tensor(n_classes - 1, dtype=labels.dtype),
+        message=message or 'Labels must <= n_classes - 1')
+    assert_greater = check_ops.assert_non_negative(
+        labels, message=message or 'Labels must >= 0')
+    with ops.control_dependencies((assert_less, assert_greater)):
+      return array_ops.identity(labels)
+
+
+def _binary_logistic_or_multi_class_head(
+    n_classes, weight_column, label_vocabulary, loss_reduction):
+  """Creates either binary or multi-class head.
 
-# Include attrs that start with single underscore.
-head.__all__ = [s for s in dir(head) if not s.startswith('__')]
+  Args:
+    n_classes: Number of label classes.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+      then weight_column.normalizer_fn is applied on it to get weight tensor.
+    label_vocabulary: A list of strings represents possible label values. If
+      given, labels must be string type and have any value in
+      `label_vocabulary`. If it is not given, that means labels are
+      already encoded as integer or float within [0, 1] for `n_classes=2` and
+      encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+      Also there will be errors if vocabulary is not provided and labels are
+      string.
+    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+      to reduce training loss over batch. Defaults to `SUM`.
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.head import *
+  Returns:
+    `head._Head` instance.
+  """
+  if n_classes == 2:
+    head = _binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column=weight_column,
+        label_vocabulary=label_vocabulary,
+        loss_reduction=loss_reduction)
+  else:
+    head = _multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, weight_column=weight_column,
+        label_vocabulary=label_vocabulary,
+        loss_reduction=loss_reduction)
+  return head
diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py
new file mode 100644
index 0000000000..de9c84d2ef
--- /dev/null
+++ b/tensorflow/python/estimator/canned/head_test.py
@@ -0,0 +1,4056 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for head.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.canned import dnn_testing_utils
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import queue_runner_impl
+
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def _initialize_variables(test_case, scaffold):
+  scaffold.finalize()
+  test_case.assertIsNone(scaffold.init_feed_dict)
+  test_case.assertIsNone(scaffold.init_fn)
+  scaffold.init_op.run()
+  scaffold.ready_for_local_init_op.eval()
+  scaffold.local_init_op.run()
+  scaffold.ready_op.eval()
+  test_case.assertIsNotNone(scaffold.saver)
+
+
+def _assert_simple_summaries(test_case, expected_summaries, summary_str,
+                             tol=1e-6):
+  """Assert summary the specified simple values.
+
+  Args:
+    test_case: test case.
+    expected_summaries: Dict of expected tags and simple values.
+    summary_str: Serialized `summary_pb2.Summary`.
+    tol: Tolerance for relative and absolute.
+  """
+  summary = summary_pb2.Summary()
+  summary.ParseFromString(summary_str)
+  test_case.assertAllClose(expected_summaries, {
+      v.tag: v.simple_value for v in summary.value
+  }, rtol=tol, atol=tol)
+
+
+def _assert_no_hooks(test_case, spec):
+  test_case.assertAllEqual([], spec.training_chief_hooks)
+  test_case.assertAllEqual([], spec.training_hooks)
+
+
+def _sigmoid(logits):
+  return 1 / (1 + np.exp(-logits))
+
+
+class CreateEstimatorSpecTest(test.TestCase):
+
+  class _HeadWithTPUSupport(head_lib._Head):
+    """Head that overrides _create_tpu_estimator_spec."""
+
+    def name(self):
+      return 'HeadWithTPUSupport'
+
+    def logits_dimension(self):
+      return None
+
+    def create_loss(self, features, mode, logits, labels):
+      return None
+
+    def _create_tpu_estimator_spec(self, features, mode, logits, labels=None,
+                                   optimizer=None, train_op_fn=None,
+                                   regularization_losses=None):
+      return model_fn._TPUEstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          loss=constant_op.constant(0.0, dtype=dtypes.float32))
+
+  class _HeadWithOutTPUSupport(head_lib._Head):
+    """Head that overrides create_estimator_spec."""
+
+    def name(self):
+      return 'HeadWithOutTPUSupport'
+
+    def logits_dimension(self):
+      return None
+
+    def create_loss(self, features, mode, logits, labels):
+      return None
+
+    def create_estimator_spec(self, features, mode, logits, labels=None,
+                              optimizer=None, train_op_fn=None,
+                              regularization_losses=None):
+      return model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          loss=constant_op.constant(0.0, dtype=dtypes.float32))
+
+  class _InvalidHead(head_lib._Head):
+    """Head that overrides neither estimator_spec functions."""
+
+    def name(self):
+      return 'InvalidHead'
+
+    def logits_dimension(self):
+      return None
+
+    def create_loss(self, features, mode, logits, labels):
+      return None
+
+  def test_head_override_tpu_estimator_spec(self):
+    """Test for `_Head` that overrides _create_tpu_estimator_spec."""
+    head = self._HeadWithTPUSupport()
+
+    tpu_spec = head._create_tpu_estimator_spec(
+        features=None, mode=None, logits=None)
+    self.assertTrue(isinstance(tpu_spec, model_fn._TPUEstimatorSpec))
+    est_spec = head.create_estimator_spec(
+        features=None, mode=None, logits=None)
+    self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec))
+
+  def test_head_override_estimator_spec(self):
+    """Test for `_Head` that overrides create_estimator_spec."""
+    head = self._HeadWithOutTPUSupport()
+
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        'TPUEstimatorSpec not available for this model head.'):
+      _ = head._create_tpu_estimator_spec(
+          features=None, mode=None, logits=None)
+    est_spec = head.create_estimator_spec(
+        features=None, mode=None, logits=None)
+    self.assertTrue(isinstance(est_spec, model_fn.EstimatorSpec))
+
+  def test_invalid_head_class(self):
+    head = self._InvalidHead()
+
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        'TPUEstimatorSpec not available for this model head.'):
+      _ = head._create_tpu_estimator_spec(
+          features=None, mode=None, logits=None)
+    with self.assertRaisesRegexp(
+        NotImplementedError,
+        r'Subclasses of _Head must implement `create_estimator_spec\(\)` or '
+        r'_create_tpu_estimator_spec\(\).'):
+      _ = head.create_estimator_spec(
+          features=None, mode=None, logits=None)
+
+
+class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_n_classes_is_none(self):
+    with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=None)
+
+  def test_n_classes_is_2(self):
+    with self.assertRaisesRegexp(ValueError, 'n_classes must be > 2'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=2)
+
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_reduction=losses.Reduction.NONE)
+
+  def test_loss_fn_arg_labels_missing(self):
+    def _loss_fn(logits):
+      del logits  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: labels\. '
+        r'Given arguments: \(\'logits\',\)'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_logits_missing(self):
+    def _loss_fn(labels):
+      del labels  # unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: logits\. '
+        r'Given arguments: \(\'labels\',\)'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_features_ok(self):
+    def _loss_fn(labels, logits, features):
+      del labels, logits, features  # Unused
+    head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_invalid(self):
+    def _loss_fn(labels, logits, name=None):
+      del labels, logits, name  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn has unexpected args: \[\'name\'\]'):
+      head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+          n_classes=3, loss_fn=_loss_fn)
+
+  def test_invalid_logits_shape(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    # Logits should be shape (batch_size, 3).
+    logits_2x2 = np.array(((45., 44.), (41., 42.),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'logits shape'):
+      head.create_estimator_spec(
+          features={'x': np.array(((30.,), (42.,),))},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits_2x2)
+
+    # Dynamic shape.
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((30.,), (42.,),))},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits_placeholder)
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
+        spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval({
+            logits_placeholder: logits_2x2
+        })
+
+  def test_invalid_labels_shape(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    # Logits should be shape (batch_size, 3).
+    # Labels should be shape (batch_size, 1).
+    labels_2x2 = np.array(((45, 44), (41, 42),), dtype=np.int)
+    logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
+    features = {'x': np.array(((42.,),))}
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
+      head.create_loss(
+          features=features,
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits_2x3,
+          labels=labels_2x2)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
+        training_loss.eval({
+            logits_placeholder: logits_2x3,
+            labels_placeholder: labels_2x2
+        })
+
+  def test_invalid_labels_type(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    # Logits should be shape (batch_size, 3).
+    # Labels should be shape (batch_size, 1).
+    labels_2x1 = np.array(((1.,), (1.,),))
+    logits_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
+    features = {'x': np.array(((42.,),))}
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'Labels dtype'):
+      head.create_loss(
+          features=features,
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits_2x3,
+          labels=labels_2x1)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    with self.assertRaisesRegexp(ValueError, 'Labels dtype'):
+      head.create_loss(
+          features=features,
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits_placeholder,
+          labels=labels_placeholder)
+
+  def test_invalid_labels_values(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    labels_2x1_with_large_id = np.array(((45,), (1,),), dtype=np.int)
+    labels_2x1_with_negative_id = np.array(((-5,), (1,),), dtype=np.int)
+    logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),))
+
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    training_loss = head.create_loss(
+        features={'x': np.array(((42.,),))},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesOpError('Labels must <= n_classes - 1'):
+        training_loss.eval({
+            labels_placeholder: labels_2x1_with_large_id,
+            logits_placeholder: logits_2x3
+        })
+
+    with self.cached_session():
+      with self.assertRaisesOpError('Labels must >= 0'):
+        training_loss.eval({
+            labels_placeholder: labels_2x1_with_negative_id,
+            logits_placeholder: logits_2x3
+        })
+
+  def test_invalid_labels_sparse_tensor(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    labels_2x1 = sparse_tensor.SparseTensor(
+        values=['english', 'italian'],
+        indices=[[0, 0], [1, 0]],
+        dense_shape=[2, 1])
+    logits_2x3 = np.array(((1., 2., 4.), (1., 2., 3.),))
+
+    with self.assertRaisesRegexp(
+        ValueError, 'SparseTensor labels are not supported.'):
+      head.create_loss(
+          features={'x': np.array(((42.,),))},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits_2x3,
+          labels=labels_2x1)
+
+  def test_incompatible_labels_shape(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    # Logits should be shape (batch_size, 3).
+    # Labels should be shape (batch_size, 1).
+    # Here batch sizes are different.
+    values_3x1 = np.array(((1,), (1,), (1,),))
+    values_2x3 = np.array(((1., 2., 3.), (1., 2., 3.),))
+    features = {'x': values_2x3}
+
+    # Static shape.
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'Shape mismatch: The shape of labels \(received \(3,\)\) should equal '
+        r'the shape of logits except for the last dimension '
+        r'\(received \(2, 3\)\)\.'
+    ):
+      head.create_loss(
+          features=features,
+          mode=model_fn.ModeKeys.EVAL,
+          logits=values_2x3,
+          labels=values_3x1)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.int64)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
+        training_loss.eval({
+            labels_placeholder: values_3x1,
+            logits_placeholder: values_2x3
+        })
+
+  def test_name(self):
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, name='foo')
+    self.assertEqual('foo', head.name)
+
+  def test_predict(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    self.assertEqual(n_classes, head.logits_dimension)
+
+    logits = [[1., 0., 0.], [0., 0., 1.]]
+    expected_probabilities = [[0.576117, 0.2119416, 0.2119416],
+                              [0.2119416, 0.2119416, 0.576117]]
+    expected_class_ids = [[0], [2]]
+    expected_classes = [[b'0'], [b'2']]
+    expected_export_classes = [[b'0', b'1', b'2']] * 2
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    self.assertItemsEqual(
+        (_DEFAULT_SERVING_KEY, 'predict', 'classification'),
+        spec.export_outputs.keys())
+
+    # Assert predictions and export_outputs.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(logits,
+                          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(
+          expected_probabilities,
+          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+      self.assertAllClose(expected_class_ids,
+                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+      self.assertAllEqual(expected_classes,
+                          predictions[prediction_keys.PredictionKeys.CLASSES])
+
+      self.assertAllClose(
+          expected_probabilities,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
+      self.assertAllEqual(
+          expected_export_classes,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
+
+  def test_predict_with_vocabulary_list(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
+
+    logits = [[1., 0., 0.], [0., 0., 1.]]
+    expected_classes = [[b'aang'], [b'zuko']]
+    expected_export_classes = [[b'aang', b'iroh', b'zuko']] * 2
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllEqual(
+          expected_classes,
+          sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES]))
+      self.assertAllEqual(
+          expected_export_classes,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
+
+  def test_weight_should_not_impact_prediction(self):
+    n_classes = 3
+    logits = [[1., 0., 0.], [0., 0., 1.]]
+    expected_probabilities = [[0.576117, 0.2119416, 0.2119416],
+                              [0.2119416, 0.2119416, 0.576117]]
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, weight_column='label_weights')
+
+    weights_2x1 = [[1.], [2.]]
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'label_weights': weights_2x1,
+        },
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(logits,
+                          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(
+          expected_probabilities,
+          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+
+  def test_eval_create_loss(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # loss = cross_entropy(labels, logits) = [10, 0].
+    expected_training_loss = 10.
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_eval_create_loss_loss_fn(self):
+    """Tests head.create_loss for eval mode and custom loss_fn."""
+    loss = np.array([[1.], [2.]], dtype=np.float32)
+    logits_input = np.array([[-10., 10., 0.], [-15., 10., 0]], dtype=np.float32)
+    labels_input = np.array([[1], [2]], dtype=np.int64)
+    def _loss_fn(labels, logits):
+      check_labels = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
+          data=[labels])
+      check_logits = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
+          data=[logits])
+      with ops.control_dependencies([check_labels, check_logits]):
+        return constant_op.constant(loss)
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, loss_fn=_loss_fn)
+
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_input,
+        labels=labels_input)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
+
+  def test_eval_create_loss_loss_fn_wrong_shape(self):
+    """Tests custom loss_fn that returns Tensor of unexpected shape."""
+    loss = np.array([1., 2.], dtype=np.float32)
+    def _loss_fn(labels, logits):
+      del labels, logits  # Unused
+      return constant_op.constant(loss)
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, loss_fn=_loss_fn)
+
+    logits = np.array([[-10., 10., 0.], [-15., 10., 0.]], dtype=np.float32)
+    labels = np.array([[1], [2]], dtype=np.int64)
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
+          r'\[logits_shape: \] \[2 3\] \[loss_shape: \] \[2\]'):
+        actual_training_loss.eval()
+
+  def test_eval_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3)
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
+          labels=None)
+
+  def test_eval(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss / 2,
+        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval()
+                             for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
+  def test_eval_metric_ops_with_head_name(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, name='some_multiclass_head')
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    expected_metric_keys = [
+        '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
+        '{}/some_multiclass_head'.format(metric_keys.MetricKeys.ACCURACY)
+    ]
+    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
+
+  def test_eval_with_regularization_losses(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(10, 0) / 2 = 5.
+    expected_unregularized_loss = 5.
+    expected_regularized_loss = (
+        expected_unregularized_loss + expected_regularization_loss)
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels,
+        regularization_losses=regularization_losses)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_unregularized_loss,
+        keys.LOSS_REGULARIZATION: expected_regularization_loss,
+        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
+    }
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_regularized_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval()
+                             for k in value_ops},
+          rtol=tol,
+          atol=tol)
+
+  def test_eval_with_label_vocabulary_create_loss(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
+    logits = [[10., 0, 0], [0, 10, 0]]
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # loss = cross_entropy(labels, logits) = [10, 0].
+    expected_training_loss = 10.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_eval_with_label_vocabulary(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
+
+    logits = [[10., 0, 0], [0, 10, 0]]
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss / 2,
+        keys.ACCURACY: 0.5,  # 1 of 2 labels is correct.
+    }
+
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol, atol=tol)
+
+  def test_weighted_multi_example_eval(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
+    labels = np.array(((1,), (2,), (2,)), dtype=np.int64)
+    weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64)
+    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
+    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
+    expected_loss = 30.
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'label_weights': weights_3x1,
+        },
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss / np.sum(weights_3x1),
+        # Weighted accuracy is 1 * 3.0 / sum weights = 0.5
+        keys.ACCURACY: 0.5,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert loss, and metrics.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol, atol=tol)
+
+  def test_train_create_loss(self):
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
+    expected_unreduced_loss = [[10.], [0.]]
+    # Weights default to 1.
+    expected_weights = 1.
+    # training_loss = 1 * 10 + 1 * 0
+    expected_training_loss = 10.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-2
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 0].
+    expected_unreduced_loss = [[10.], [0.]]
+    # Weights default to 1.
+    expected_weights = 1.
+    # training_loss = 1 * 10 + 1 * 0 / num_nonzero_weights
+    expected_training_loss = 10. / 2.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-2
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
+          labels=None,
+          train_op_fn=_no_op_train_fn)
+
+  def test_train(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+      }, summary_str, tol)
+
+  def test_train_with_optimizer(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    expected_train_result = 'my_train_op'
+
+    class _Optimizer(object):
+
+      def minimize(self, loss, global_step):
+        del global_step
+        return string_ops.string_join(
+            [constant_op.constant(expected_train_result),
+             string_ops.as_string(loss, precision=2)])
+
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        optimizer=_Optimizer())
+
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def test_train_with_update_ops(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes)
+
+    with ops.Graph().as_default():
+      w = variables.Variable(1)
+      update_op = w.assign_add(1)
+      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
+
+      t = variables.Variable('')
+      expected_train_result = b'my_train_op'
+      def _train_op_fn(loss):
+        del loss
+        return t.assign(expected_train_result)
+
+      spec = head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32),
+          labels=np.array(((1,), (1,)), dtype=np.int64),
+          train_op_fn=_train_op_fn)
+
+      with self.cached_session() as sess:
+        _initialize_variables(self, spec.scaffold)
+        sess.run(spec.train_op)
+        w_value, t_value = sess.run([w, t])
+        self.assertEqual(2, w_value)
+        self.assertEqual(expected_train_result, t_value)
+
+  def test_train_summaries_with_head_name(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, name='some_multiclass_head')
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      summary_str = sess.run(spec.scaffold.summary_op)
+      _assert_simple_summaries(self, {
+          '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS):
+              expected_loss,
+          '{}/some_multiclass_head'.format(metric_keys.MetricKeys.LOSS_MEAN):
+              expected_loss / 2,
+      }, summary_str, tol)
+
+  def test_train_with_regularization_losses(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+
+    logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(10, 0) / 2 = 5.
+    # loss = unregularized_loss + regularization_loss = 7.
+    expected_loss = 7.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        regularization_losses=regularization_losses)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
+              expected_regularization_loss),
+      }, summary_str, tol)
+
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='label_weights')
+
+    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
+    labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
+    weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64)
+    features = {
+        'x': np.array(((42,),), dtype=np.float32),
+        'label_weights': weights_rank_1
+    }
+
+    # unreduced_loss = cross_entropy(labels, logits) = [10, 10, 0].
+    expected_unreduced_loss = [[10.], [10.], [0.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [2.], [3.]]
+    # training_loss = 1 * 10 + 2 * 10 + 3 * 0 = 30.
+    expected_training_loss = 30.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1)
+    tol = 1e-2
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='label_weights')
+
+    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
+    labels_rank_1 = np.array((1, 2, 2,), dtype=np.int64)
+    weights_rank_1 = np.array((1., 2., 3.,), dtype=np.float64)
+
+    self.assertEqual((3,), labels_rank_1.shape)
+    self.assertEqual((3,), weights_rank_1.shape)
+
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
+    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
+    expected_loss = 30.
+
+    features = {
+        'x': np.array(((42,),), dtype=np.float32),
+        'label_weights': weights_rank_1
+    }
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_MEAN: (
+              expected_loss / np.sum(weights_rank_1)),
+      }, summary_str, tol)
+
+  def test_train_with_vocabulary_create_loss(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
+
+    logits = [[10., 0, 0], [0, 10, 0]]
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # loss = cross_entropy(labels, logits) = [10, 0].
+    expected_training_loss = 10.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_train_with_vocabulary(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, label_vocabulary=['aang', 'iroh', 'zuko'])
+
+    logits = [[10., 0, 0], [0, 10, 0]]
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10.
+    expected_loss = 10.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss = sess.run(spec.loss)
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+
+  def test_weighted_multi_example_train(self):
+    n_classes = 3
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes, weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((10, 0, 0), (0, 10, 0), (0, 0, 10),), dtype=np.float32)
+    labels = np.array(((1,), (2,), (2,)), dtype=np.int64)
+    weights_3x1 = np.array(((1.,), (2.,), (3.,)), dtype=np.float64)
+    expected_train_result = 'my_train_op'
+    # loss = sum(cross_entropy(labels, logits) * [1, 2, 3])
+    #      = sum([10, 10, 0] * [1, 2, 3]) = 30
+    expected_loss = 30.
+
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,),), dtype=np.float32),
+            'label_weights': weights_3x1,
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss mean = sum(cross_entropy(labels, logits) * [1,2,3]) / (1+2+3)
+          #      = sum([10, 10, 0] * [1, 2, 3]) / 6 = 30 / 6
+          metric_keys.MetricKeys.LOSS_MEAN:
+              expected_loss / np.sum(weights_3x1),
+      }, summary_str, tol)
+
+  def test_multi_dim_weighted_train_create_loss(self):
+    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='weights')
+
+    logits = np.array([[[10, 0, 0], [12, 0, 0]],
+                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
+    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+
+    # unreduced_loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
+    expected_unreduced_loss = [[[0.], [12.]], [[0.], [15.]]]
+    # weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # training_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
+    expected_training_loss = 55.5
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-2
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(), rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_multi_dim_weighted_train(self):
+    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='weights')
+
+    logits = np.array([[[10, 0, 0], [12, 0, 0]],
+                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
+    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
+    # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
+    expected_loss = 55.5
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def test_multi_dim_train_weights_wrong_inner_dim(self):
+    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 1]."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='weights')
+    logits = np.array([[[10, 0, 0], [12, 0, 0]],
+                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
+    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
+        spec.loss.eval()
+
+  def test_multi_dim_train_weights_wrong_outer_dim(self):
+    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2, 3]."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='weights')
+    logits = np.array([[[10, 0, 0], [12, 0, 0]],
+                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
+    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
+    weights = np.array([[[1., 1.1, 1.2], [1.5, 1.6, 1.7]],
+                        [[2., 2.1, 2.2], [2.5, 2.6, 2.7]]])
+    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights_placeholder},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 3\]'):
+        spec.loss.eval({weights_placeholder: weights})
+
+  def test_multi_dim_weighted_eval(self):
+    """Logits of shape [2, 2, 2], labels [2, 2, 1], weights [2, 2]."""
+    head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
+        n_classes=3, weight_column='weights')
+    logits = np.array([[[10, 0, 0], [12, 0, 0]],
+                       [[0, 10, 0], [0, 15, 0]]], dtype=np.float32)
+    labels = np.array([[[0], [1]], [[1], [2]]], dtype=np.int64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # loss = cross_entropy(labels, logits) = [[0, 12], [0, 15]].
+    # weighted_sum_loss = 1*0 + 1.5*12 + 2*0 + 2.5*15 = 55.5
+    expected_loss = 55.5
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss / np.sum(weights),
+        keys.ACCURACY: (1.*1. + 1.5*0. + 2.*1. + 2.5*0.) / np.sum(weights),
+    }
+
+    # Assert predictions, loss, and metrics.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol, atol=tol)
+
+
+class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_threshold_too_small(self):
+    with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          thresholds=(0., 0.5))
+
+  def test_threshold_too_large(self):
+    with self.assertRaisesRegexp(ValueError, r'thresholds not in \(0, 1\)'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          thresholds=(0.5, 1.))
+
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_reduction=losses.Reduction.NONE)
+
+  def test_loss_fn_arg_labels_missing(self):
+    def _loss_fn(logits):
+      del logits  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: labels\. '
+        r'Given arguments: \(\'logits\',\)'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_logits_missing(self):
+    def _loss_fn(labels):
+      del labels  # unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: logits\. '
+        r'Given arguments: \(\'labels\',\)'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_features_ok(self):
+    def _loss_fn(labels, logits, features):
+      del labels, logits, features  # Unused
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_invalid(self):
+    def _loss_fn(labels, logits, name=None):
+      del labels, logits, name  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn has unexpected args: \[\'name\'\]'):
+      head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+          loss_fn=_loss_fn)
+
+  def test_invalid_logits_shape(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Logits should be shape (batch_size, 1).
+    logits_2x2 = np.array(((45., 44.), (41., 42.),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'logits shape'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42.,),))},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits_2x2)
+
+    # Dynamic shape.
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),))},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits_placeholder)
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
+        spec.predictions[prediction_keys.PredictionKeys.PROBABILITIES].eval({
+            logits_placeholder: logits_2x2
+        })
+
+  def test_invalid_labels_shape(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Labels and logits should be shape (batch_size, 1).
+    labels_2x2 = np.array(((45., 44.), (41., 42.),))
+    logits_2x1 = np.array(((45.,), (41.,),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
+      head.create_loss(
+          features={'x': np.array(((42.,),))},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=logits_2x1,
+          labels=labels_2x2)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    training_loss = head.create_loss(
+        features={'x': np.array(((42.,),))},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[2 2\]'):
+        training_loss.eval({
+            logits_placeholder: logits_2x1,
+            labels_placeholder: labels_2x2
+        })
+
+  def test_incompatible_labels_shape(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Both logits and labels should be shape (batch_size, 1).
+    values_2x1 = np.array(((0.,), (1.,),))
+    values_3x1 = np.array(((0.,), (1.,), (0.,),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(
+        ValueError, 'logits and labels must have the same shape'):
+      head.create_loss(
+          features={'x': values_2x1},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=values_2x1,
+          labels=values_3x1)
+    with self.assertRaisesRegexp(
+        ValueError, 'logits and labels must have the same shape'):
+      head.create_loss(
+          features={'x': values_2x1},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=values_3x1,
+          labels=values_2x1)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    training_loss = head.create_loss(
+        features={'x': values_2x1},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[3 1\] \[labels_shape: \] \[2 1\]'):
+        training_loss.eval({
+            labels_placeholder: values_2x1,
+            logits_placeholder: values_3x1
+        })
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 1\] \[labels_shape: \] \[3 1\]'):
+        training_loss.eval({
+            labels_placeholder: values_3x1,
+            logits_placeholder: values_2x1
+        })
+
+  def test_name(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        name='foo')
+    self.assertEqual('foo', head.name)
+
+  def test_predict(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = [[0.3], [-0.4]]
+    expected_logistics = [[0.574443], [0.401312]]
+    expected_probabilities = [[0.425557, 0.574443], [0.598688, 0.401312]]
+    expected_class_ids = [[1], [0]]
+    expected_classes = [[b'1'], [b'0']]
+    expected_export_classes = [[b'0', b'1']] * 2
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert spec contains expected tensors.
+    self.assertIsNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNone(spec.train_op)
+    self.assertItemsEqual(('classification', 'regression', 'predict',
+                           _DEFAULT_SERVING_KEY), spec.export_outputs.keys())
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(logits,
+                          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(expected_logistics,
+                          predictions[prediction_keys.PredictionKeys.LOGISTIC])
+      self.assertAllClose(
+          expected_probabilities,
+          predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+      self.assertAllClose(expected_class_ids,
+                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+      self.assertAllEqual(expected_classes,
+                          predictions[prediction_keys.PredictionKeys.CLASSES])
+      self.assertAllClose(
+          expected_probabilities,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].scores))
+      self.assertAllEqual(
+          expected_export_classes,
+          sess.run(spec.export_outputs[_DEFAULT_SERVING_KEY].classes))
+      self.assertAllClose(expected_logistics,
+                          sess.run(spec.export_outputs['regression'].value))
+
+  def test_predict_with_vocabulary_list(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        label_vocabulary=['aang', 'iroh'])
+
+    logits = [[1.], [0.]]
+    expected_classes = [[b'iroh'], [b'aang']]
+
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllEqual(
+          expected_classes,
+          sess.run(spec.predictions[prediction_keys.PredictionKeys.CLASSES]))
+
+  def test_eval_create_loss(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # loss = cross_entropy(labels, logits) = [0, 41].
+    expected_training_loss = 41.
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_eval_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=np.array(((45,), (-41,),), dtype=np.float32),
+          labels=None)
+
+  def test_eval(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
+        # loss_mean = loss/2 = 41./2 = 20.5
+        keys.LOSS_MEAN: 20.5,
+        keys.ACCURACY: 1./2,
+        keys.PRECISION: 1.,
+        keys.RECALL: 1./2,
+        keys.PREDICTION_MEAN: 1./2,
+        keys.LABEL_MEAN: 2./2,
+        keys.ACCURACY_BASELINE: 2./2,
+        keys.AUC: 0.,
+        keys.AUC_PR: 1.,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(41., loss)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
+
+  def test_eval_metric_ops_with_head_name(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        name='some_binary_head')
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    expected_metric_keys = [
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.PRECISION),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.RECALL),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.PREDICTION_MEAN),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.LABEL_MEAN),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY_BASELINE),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC),
+        '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC_PR),
+    ]
+    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
+
+  def test_eval_with_regularization_losses(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(0, 41) / 2 = 20.5
+    expected_unregularized_loss = 20.5
+    expected_regularized_loss = (
+        expected_unregularized_loss + expected_regularization_loss)
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels,
+        regularization_losses=regularization_losses)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_unregularized_loss,
+        keys.LOSS_REGULARIZATION: expected_regularization_loss,
+        keys.ACCURACY: 1./2,
+        keys.PRECISION: 1.,
+        keys.RECALL: 1./2,
+        keys.PREDICTION_MEAN: 1./2,
+        keys.LABEL_MEAN: 2./2,
+        keys.ACCURACY_BASELINE: 2./2,
+        keys.AUC: 0.,
+        keys.AUC_PR: 1.,
+    }
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_regularized_loss, loss)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
+
+  def test_eval_with_vocabulary_list_create_loss(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        label_vocabulary=['aang', 'iroh'])
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(41., training_loss.eval())
+
+  def test_eval_with_vocabulary_list(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        label_vocabulary=['aang', 'iroh'])
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = [[b'iroh'], [b'iroh']]
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      sess.run(update_ops)
+      self.assertAllClose(1. / 2,
+                          value_ops[metric_keys.MetricKeys.ACCURACY].eval())
+
+  def test_eval_with_thresholds_create_loss(self):
+    thresholds = [0.25, 0.5, 0.75]
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        thresholds=thresholds)
+    logits = np.array(((-1,), (1,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # probabilities[i] = 1/(1 + exp(-logits[i])) =>
+    # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731]
+    # loss = -ln(probabilities[label[i]])) = [-ln(0.269), -ln(0.731)]
+    #      = [1.31304389, 0.31334182]
+    # weighted sum loss = 1.62638571
+    expected_training_loss = 1.62638571
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_eval_with_thresholds(self):
+    thresholds = [0.25, 0.5, 0.75]
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        thresholds=thresholds)
+    logits = np.array(((-1,), (1,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    # probabilities[i] = 1/(1 + exp(-logits[i])) =>
+    # probabilities = [1/(1 + exp(1)), 1/(1 + exp(-1))] = [0.269, 0.731]
+    # loss = -sum(ln(probabilities[label[i]])) = -ln(0.269) -ln(0.731)
+    #      = 1.62652338
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: 1.62652338 / 2.,
+        keys.ACCURACY: 1./2,
+        keys.PRECISION: 1.,
+        keys.RECALL: .5,
+        keys.PREDICTION_MEAN: 1./2,
+        keys.LABEL_MEAN: 2./2,
+        keys.ACCURACY_BASELINE: 2./2,
+        keys.AUC: 0.,
+        keys.AUC_PR: 1.,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1.,
+        keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1.,
+        keys.RECALL_AT_THRESHOLD % thresholds[0]: 1.,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[1]: .5,
+        keys.PRECISION_AT_THRESHOLD % thresholds[1]: 1.,
+        keys.RECALL_AT_THRESHOLD % thresholds[1]: .5,
+        keys.ACCURACY_AT_THRESHOLD % thresholds[2]: 0.,
+        keys.PRECISION_AT_THRESHOLD % thresholds[2]: 0.,
+        keys.RECALL_AT_THRESHOLD % thresholds[2]: 0.,
+    }
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(1.62652338, loss)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval()
+                             for k in value_ops},
+          atol=tol,
+          rtol=tol)
+
+  def test_train_create_loss(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
+    expected_unreduced_loss = [[0.], [41.]]
+    # weights default to 1.
+    expected_weights = 1.
+    # training loss = 1 * 0 + 1 * 41
+    expected_training_loss = 41.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41]
+    expected_unreduced_loss = [[0.], [41.]]
+    # weights default to 1.
+    expected_weights = 1.
+    # training loss = (1 * 0 + 1 * 41) / num_nonzero_weights
+    expected_training_loss = 41. / 2.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_eval_create_loss_loss_fn(self):
+    """Tests head.create_loss for eval mode and custom loss_fn."""
+    loss = np.array([[1.], [2.]], dtype=np.float32)
+    logits_input = np.array([[-10.], [10.]], dtype=np.float32)
+    labels_input = np.array([[1], [0]], dtype=np.int64)
+    def _loss_fn(labels, logits):
+      check_labels = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
+          data=[labels])
+      check_logits = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
+          data=[logits])
+      with ops.control_dependencies([check_labels, check_logits]):
+        return constant_op.constant(loss)
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_fn=_loss_fn)
+
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_input,
+        labels=labels_input)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
+
+  def test_eval_create_loss_loss_fn_wrong_shape(self):
+    """Tests custom loss_fn that returns Tensor of unexpected shape."""
+    loss = np.array([1., 2.], dtype=np.float32)
+    def _loss_fn(labels, logits):
+      del labels, logits  # Unused
+      return constant_op.constant(loss)
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_fn=_loss_fn)
+
+    logits = np.array([[-10.], [10.]], dtype=np.float32)
+    labels = np.array([[1], [0]], dtype=np.int64)
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 1\]\. \] '
+          r'\[logits_shape: \] \[2 1\] \[loss_shape: \] \[2\]'):
+        actual_training_loss.eval()
+
+  def test_train_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((45,), (-41,),), dtype=np.float32),
+          labels=None,
+          train_op_fn=_no_op_train_fn)
+
+  def test_train(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
+    expected_loss = 41.
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/2 = 41/2 = 20.5
+          metric_keys.MetricKeys.LOSS_MEAN: 20.5,
+      }, summary_str)
+
+  def test_train_with_optimizer(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
+    expected_loss = 41.
+
+    class _Optimizer(object):
+
+      def minimize(self, loss, global_step):
+        del global_step
+        with ops.control_dependencies((check_ops.assert_equal(
+            math_ops.to_float(expected_loss), math_ops.to_float(loss),
+            name='assert_loss'),)):
+          return constant_op.constant(expected_train_result)
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        optimizer=_Optimizer())
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+
+  def test_train_with_update_ops(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    with ops.Graph().as_default():
+      w = variables.Variable(1)
+      update_op = w.assign_add(1)
+      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
+
+      t = variables.Variable('')
+      expected_train_result = b'my_train_op'
+      def _train_op_fn(loss):
+        del loss
+        return t.assign(expected_train_result)
+
+      spec = head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((45,), (-41,),), dtype=np.float32),
+          labels=np.array(((1,), (1,),), dtype=np.float64),
+          train_op_fn=_train_op_fn)
+
+      with self.cached_session() as sess:
+        _initialize_variables(self, spec.scaffold)
+        sess.run(spec.train_op)
+        w_value, t_value = sess.run([w, t])
+        self.assertEqual(2, w_value)
+        self.assertEqual(expected_train_result, t_value)
+
+  def test_train_summaries_with_head_name(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        name='some_binary_head')
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41
+    expected_loss = 41.
+
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    # Assert summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      summary_str = sess.run(spec.scaffold.summary_op)
+      _assert_simple_summaries(
+          self,
+          {
+              '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS):
+                  expected_loss,
+              # loss_mean = loss/2 = 41/2 = 20.5
+              '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN):
+                  20.5,
+          },
+          summary_str)
+
+  def test_train_with_regularization_losses(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+
+    logits = np.array(((45,), (-41,),), dtype=np.float32)
+    labels = np.array(((1,), (1,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = sum(cross_entropy(labels, logits)) / batch_size
+    #                    = sum(0, 41) / 2 = 20.5
+    # loss = unregularized_loss + regularization_loss = 7.
+    expected_loss = 22.5
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        regularization_losses=regularization_losses)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((spec.loss, spec.train_op,
+                                                  spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
+              expected_regularization_loss),
+      }, summary_str)
+
+  def test_float_labels_invalid_values(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
+    labels = np.array([[1.2], [0.4]], dtype=np.float32)
+    features = {'x': np.array([[42]], dtype=np.float32)}
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        r'Labels must <= n_classes - 1'):
+      with self.cached_session():
+        _initialize_variables(self, monitored_session.Scaffold())
+        training_loss.eval()
+
+  def test_float_labels_train_create_loss(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
+    labels = np.array([[0.8], [0.4]], dtype=np.float32)
+    features = {'x': np.array([[42]], dtype=np.float32)}
+    # loss = cross_entropy(labels, logits)
+    #      = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])
+    #      = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)),
+    #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
+    #      = [0.57407698418, 0.67435524446]
+    # weighted sum loss = 0.57407698418 + 0.67435524446
+    expected_training_loss = 1.24843222864
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_float_labels_train(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
+    labels = np.array([[0.8], [0.4]], dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array([[42]], dtype=np.float32)}
+    # loss = sum(cross_entropy(labels, logits))
+    #      = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]))
+    #      = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5))
+    #        -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))
+    #      = 1.2484322
+    expected_loss = 1.2484322
+    def _train_op_fn(loss):
+      with ops.control_dependencies((dnn_testing_utils.assert_close(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss)),)):
+        return constant_op.constant(expected_train_result)
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAlmostEqual(expected_loss, loss, delta=1.e-5)
+      self.assertEqual(expected_train_result, train_result)
+
+  def test_float_labels_eval_create_loss(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
+    labels = np.array([[0.8], [0.4]], dtype=np.float32)
+    features = {'x': np.array([[42]], dtype=np.float32)}
+    # loss = cross_entropy(labels, logits)
+    #      = -label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i])
+    #      = [-0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5)),
+    #         -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))]
+    #      = [0.57407698418, 0.67435524446]
+    # weighted sum loss = 0.57407698418 + 0.67435524446
+    expected_training_loss = 1.24843222864
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(), rtol=1e-2, atol=1e-2)
+
+  def test_float_labels_eval(self):
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss()
+
+    logits = np.array([[0.5], [-0.3]], dtype=np.float32)
+    labels = np.array([[0.8], [0.4]], dtype=np.float32)
+    features = {'x': np.array([[42]], dtype=np.float32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    # loss = sum(cross_entropy(labels, logits))
+    #      = sum(-label[i]*sigmoid(logit[i]) -(1-label[i])*sigmoid(-logit[i]))
+    #      = -0.8 * log(sigmoid(0.5)) -0.2 * log(sigmoid(-0.5))
+    #        -0.4 * log(sigmoid(-0.3)) -0.6 * log(sigmoid(0.3))
+    #      = 1.2484322
+    expected_loss = 1.2484322
+
+    # Assert loss.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAlmostEqual(expected_loss, loss, delta=1.e-5)
+      self.assertAlmostEqual(
+          expected_loss / 2., metrics[metric_keys.MetricKeys.LOSS_MEAN])
+
+  def test_weighted_multi_example_predict(self):
+    """3 examples, 1 batch."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((45,), (-41,), (44,)), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
+            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      predictions = sess.run(spec.predictions)
+      self.assertAllClose(
+          logits.astype(np.float32),
+          predictions[prediction_keys.PredictionKeys.LOGITS])
+      self.assertAllClose(
+          _sigmoid(logits).astype(np.float32),
+          predictions[prediction_keys.PredictionKeys.LOGISTIC])
+      self.assertAllClose(
+          [[0., 1.], [1., 0.],
+           [0., 1.]], predictions[prediction_keys.PredictionKeys.PROBABILITIES])
+      self.assertAllClose([[1], [0], [1]],
+                          predictions[prediction_keys.PredictionKeys.CLASS_IDS])
+      self.assertAllEqual([[b'1'], [b'0'], [b'1']],
+                          predictions[prediction_keys.PredictionKeys.CLASSES])
+
+  def test_weighted_multi_example_eval(self):
+    """3 examples, 1 batch."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((45,), (-41,), (44,)), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
+            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=np.array(((1,), (1,), (0,)), dtype=np.int32))
+
+    # label_mean = (1*1 + .1*1 + 1.5*0)/(1 + .1 + 1.5) = 1.1/2.6
+    #            = .42307692307
+    expected_label_mean = .42307692307
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        # losses = label_weights*cross_entropy(labels, logits)
+        #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
+        # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
+        # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
+        #           = 70.1/2.6 = 26.9615384615
+        keys.LOSS_MEAN: 26.9615384615,
+        # accuracy = (1*1 + .1*0 + 1.5*0)/(1 + .1 + 1.5) = 1/2.6 = .38461538461
+        keys.ACCURACY: .38461538461,
+        keys.PRECISION: 1./2.5,
+        keys.RECALL: 1./1.1,
+        # prediction_mean = (1*1 + .1*0 + 1.5*1)/(1 + .1 + 1.5) = 2.5/2.6
+        #                 = .96153846153
+        keys.PREDICTION_MEAN: .96153846153,
+        keys.LABEL_MEAN: expected_label_mean,
+        keys.ACCURACY_BASELINE: 1 - expected_label_mean,
+        keys.AUC: .45454565,
+        keys.AUC_PR: .6737757325172424,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(70.1, loss)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
+
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
+    labels_rank_1 = np.array((1., 1., 0.,))
+    weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64)
+    features = {
+        'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
+        'label_weights': weights_rank_1,
+    }
+    # unreduced_loss = cross_entropy(labels, logits) = [0, 41, 44]
+    expected_unreduced_loss = [[0.], [41.], [44.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [.1], [1.5]]
+    # training loss = 1 * 0 + .1 * 41 + 1.5 * 44
+    expected_training_loss = 70.1
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(),
+          rtol=1e-2, atol=1e-2)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(),
+          rtol=1e-2, atol=1e-2)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
+    labels_rank_1 = np.array((1., 1., 0.,))
+    weights_rank_1 = np.array(((1., .1, 1.5,)), dtype=np.float64)
+    self.assertEqual((3,), labels_rank_1.shape)
+    self.assertEqual((3,), weights_rank_1.shape)
+    features = {
+        'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
+        'label_weights': weights_rank_1,
+    }
+    expected_train_result = b'my_train_op'
+    # losses = label_weights*cross_entropy(labels, logits)
+    #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
+    # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
+    expected_loss = 70.1
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1,
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertIsNotNone(spec.train_op)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((
+          spec.loss, spec.train_op, spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
+          #           = 70.1/2.6 = 26.9615384615
+          metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615,
+      }, summary_str)
+
+  def test_weighted_multi_example_train(self):
+    """3 examples, 1 batch."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='label_weights')
+
+    # Create estimator spec.
+    logits = np.array(((45,), (-41,), (44,)), dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    # losses = label_weights*cross_entropy(labels, logits)
+    #        = (1*0 + .1*41 + 1.5*44) = (1, 4.1, 66)
+    # loss = sum(losses) = 1 + 4.1 + 66 = 70.1
+    expected_loss = 70.1
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42.,), (43.,), (44.,)), dtype=np.float32),
+            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64),
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=np.array(((1.,), (1.,), (0.,))),
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    self.assertIsNotNone(spec.loss)
+    self.assertIsNotNone(spec.train_op)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      loss, train_result, summary_str = sess.run((
+          spec.loss, spec.train_op, spec.scaffold.summary_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/sum(label_weights) = 70.1/(1 + .1 + 1.5)
+          #           = 70.1/2.6 = 26.9615384615
+          metric_keys.MetricKeys.LOSS_MEAN: 26.9615384615,
+      }, summary_str)
+
+  def test_multi_dim_weighted_train_create_loss(self):
+    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='weights')
+
+    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
+    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # unreduced_loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
+    expected_unreduced_loss = [[[10.], [0.]], [[0.], [12.]]]
+    # Weights are reshaped to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # training_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
+    expected_training_loss = 40.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-2
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(
+          expected_training_loss, training_loss.eval(),
+          rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_unreduced_loss, unreduced_loss.eval(),
+          rtol=tol, atol=tol)
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_multi_dim_weighted_train(self):
+    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='weights')
+
+    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
+    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
+    # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
+    expected_loss = 40.
+    expected_train_result = 'my_train_op'
+    def _train_op_fn(loss):
+      return string_ops.string_join(
+          [constant_op.constant(expected_train_result),
+           string_ops.as_string(loss, precision=2)])
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert predictions, loss, train_op, and summaries.
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      self.assertEqual(
+          six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)),
+          train_result)
+
+  def test_multi_dim_train_weights_wrong_inner_dim(self):
+    """Logits and labels of shape [2, 2, 1], weights [2, 1]."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='weights')
+
+    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
+    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
+    weights = np.array([[1.], [2.]], dtype=np.float32)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \] \[2 2 1\] \[weights_shape: \] \[2 1\]'):
+        spec.loss.eval()
+
+  def test_multi_dim_train_weights_wrong_outer_dim(self):
+    """Logits and labels of shape [2, 2, 1], weights [2, 2, 2]."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='weights')
+
+    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
+    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
+    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features={'weights': weights_placeholder},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \]\s\[2 2 1\]\s\[weights_shape: \]\s\[2 2 2\]'):
+        spec.loss.eval({
+            weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]],
+                                           [[2., 2.1], [2.5, 2.6]]])})
+
+  def test_multi_dim_weighted_eval(self):
+    """Logits and labels of shape [2, 2, 1], weights [2, 2]."""
+    head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        weight_column='weights')
+
+    logits = np.array([[[10], [-10]], [[12], [-12]]], dtype=np.float32)
+    labels = np.array([[[0], [0]], [[1], [1]]], dtype=np.float64)
+    weights = np.array([[1., 1.5], [2., 2.5]], dtype=np.float32)
+    # loss = cross_entropy(labels, logits) = [[10, 0], [0, 12]].
+    # weighted_sum_loss = 1*10 + 1.5*0 + 2*0 + 2.5*12 = 40
+    expected_loss = 40.
+
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features={'weights': weights},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_loss / np.sum(weights),
+        keys.ACCURACY: (1.*0. + 1.5*1. + 2.*1. + 2.5*0.) / np.sum(weights),
+        keys.PRECISION: 2.0/3.0,
+        keys.RECALL: 2.0/4.5,
+        keys.PREDICTION_MEAN: (1.*1 + 1.5*0 + 2.*1 + 2.5*0) / np.sum(weights),
+        keys.LABEL_MEAN: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights),
+        keys.ACCURACY_BASELINE: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights),
+        # We cannot reliably calculate AUC with only 4 data points, but the
+        # values should not change because of backwards-compatibility.
+        keys.AUC: 0.5222,
+        keys.AUC_PR: 0.7341,
+    }
+
+    tol = 1e-2
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      loss, metrics = sess.run((spec.loss, update_ops))
+      self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics, rtol=tol, atol=tol)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops},
+          rtol=tol, atol=tol)
+
+
+class RegressionHead(test.TestCase):
+
+  def setUp(self):
+    ops.reset_default_graph()
+
+  def test_invalid_label_dimension(self):
+    with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'):
+      head_lib._regression_head(label_dimension=-1)
+    with self.assertRaisesRegexp(ValueError, r'Invalid label_dimension'):
+      head_lib._regression_head(label_dimension=0)
+
+  def test_invalid_loss_reduction(self):
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: invalid_loss_reduction'):
+      head_lib._regression_head(loss_reduction='invalid_loss_reduction')
+    with self.assertRaisesRegexp(
+        ValueError, r'Invalid loss_reduction: none'):
+      head_lib._regression_head(loss_reduction=losses.Reduction.NONE)
+
+  def test_loss_fn_arg_labels_missing(self):
+    def _loss_fn(logits):
+      del logits  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: labels\. '
+        r'Given arguments: \(\'logits\',\)'):
+      head_lib._regression_head(loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_logits_missing(self):
+    def _loss_fn(labels):
+      del labels  # unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn must contain argument: logits\. '
+        r'Given arguments: \(\'labels\',\)'):
+      head_lib._regression_head(loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_features_ok(self):
+    def _loss_fn(labels, logits, features):
+      del labels, logits, features  # Unused
+      head_lib._regression_head(loss_fn=_loss_fn)
+
+  def test_loss_fn_arg_invalid(self):
+    def _loss_fn(labels, logits, name=None):
+      del labels, logits, name  # Unused
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'loss_fn has unexpected args: \[\'name\'\]'):
+      head_lib._regression_head(loss_fn=_loss_fn)
+
+  def test_invalid_logits(self):
+    head = head_lib._regression_head(label_dimension=3)
+    self.assertEqual(3, head.logits_dimension)
+    logits_1d = np.array(((45.,), (41.,),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'logits shape'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42.,),))},
+          mode=model_fn.ModeKeys.PREDICT,
+          logits=logits_1d)
+
+    # Dynamic shape.
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),))},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits_placeholder)
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
+        spec.predictions[prediction_keys.PredictionKeys.PREDICTIONS].eval({
+            logits_placeholder: logits_1d
+        })
+
+  def test_incompatible_labels_eval(self):
+    head = head_lib._regression_head(label_dimension=3)
+    self.assertEqual(3, head.logits_dimension)
+    values_3d = np.array(((45., 46., 47.), (41., 42., 43.),))
+    values_1d = np.array(((43.,), (44.,),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
+      head.create_loss(
+          features={'x': values_1d},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=values_3d,
+          labels=values_1d)
+    with self.assertRaisesRegexp(ValueError, 'logits shape'):
+      head.create_estimator_spec(
+          features={'x': values_3d}, labels=values_3d,
+          mode=model_fn.ModeKeys.EVAL, logits=values_1d, train_op_fn=None)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    spec = head.create_estimator_spec(
+        features={'x': values_1d},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
+        spec.loss.eval({
+            labels_placeholder: values_3d,
+            logits_placeholder: values_1d
+        })
+    training_loss = head.create_loss(
+        features={'x': values_1d},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
+        training_loss.eval({
+            labels_placeholder: values_1d,
+            logits_placeholder: values_3d
+        })
+
+  def test_incompatible_labels_train(self):
+    head = head_lib._regression_head(label_dimension=3)
+    self.assertEqual(3, head.logits_dimension)
+    values_3d = np.array(((45., 46., 47.), (41., 42., 43.),))
+    values_1d = np.array(((43.,), (44.,),))
+
+    # Static shape.
+    with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'):
+      head.create_loss(
+          features={'x': values_1d},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=values_3d,
+          labels=values_1d)
+
+    with self.assertRaisesRegexp(ValueError, 'logits shape'):
+      head.create_estimator_spec(
+          features={'x': values_3d},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=values_1d,
+          labels=values_3d,
+          train_op_fn=lambda x: x)
+
+    # Dynamic shape.
+    labels_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    logits_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    spec = head.create_estimator_spec(
+        features={'x': values_1d},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits_placeholder,
+        labels=labels_placeholder,
+        train_op_fn=lambda x: x)
+    with self.cached_session():
+      with self.assertRaisesRegexp(errors.OpError, 'logits shape'):
+        spec.loss.eval({
+            labels_placeholder: values_3d,
+            logits_placeholder: values_1d
+        })
+    training_loss = head.create_loss(
+        features={'x': values_1d},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits_placeholder,
+        labels=labels_placeholder)[0]
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[expected_labels_shape: \] \[2 3\] \[labels_shape: \] \[2 1\]'):
+        training_loss.eval({
+            labels_placeholder: values_1d,
+            logits_placeholder: values_3d
+        })
+
+  def test_name(self):
+    head = head_lib._regression_head(name='foo')
+    self.assertEqual('foo', head.name)
+
+  def test_predict(self):
+    head = head_lib._regression_head()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertIsNone(spec.loss)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNone(spec.train_op)
+    default_serving_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+    self.assertItemsEqual(
+        (default_serving_key, 'predict', 'regression'),
+        spec.export_outputs.keys())
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions.
+    with self.cached_session():
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllClose(logits, spec.predictions[prediction_key].eval())
+      self.assertAllClose(
+          logits, spec.export_outputs[default_serving_key].value.eval())
+      self.assertAllClose(
+          logits, spec.export_outputs['regression'].value.eval())
+      self.assertAllClose(
+          logits, spec.export_outputs['predict'].outputs['predictions'].eval())
+
+  def test_predict_with_inverse_link_fn(self):
+    def _inverse_link_fn(logits):
+      return logits - 10.
+    head = head_lib._regression_head(inverse_link_fn=_inverse_link_fn)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.int32)
+    expected_predictions = np.array(((35,), (31,),), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={'x': np.array(((42.,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.PREDICT,
+        logits=logits)
+
+    # Assert spec contains expected tensors.
+    keys = prediction_keys.PredictionKeys
+    self.assertItemsEqual(
+        (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype)
+    self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype)
+    default_serving_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+    self.assertItemsEqual(
+        (default_serving_key, 'predict', 'regression'),
+        spec.export_outputs.keys())
+
+    # Assert predictions.
+    with self.cached_session():
+      _initialize_variables(self, spec.scaffold)
+      self.assertAllClose(
+          expected_predictions, spec.predictions[keys.PREDICTIONS].eval())
+      self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval())
+      self.assertAllClose(
+          expected_predictions,
+          spec.export_outputs[default_serving_key].value.eval())
+      self.assertAllClose(
+          expected_predictions, spec.export_outputs['regression'].value.eval())
+      self.assertAllClose(
+          expected_predictions,
+          spec.export_outputs['predict'].outputs['predictions'].eval())
+      self.assertAllClose(
+          logits, spec.export_outputs['predict'].outputs['logits'].eval())
+
+  def test_eval_create_loss(self):
+    head = head_lib._regression_head()
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      # loss = [(43-45)^2, (44-41)] = [4, 9]
+      self.assertAllClose(13., training_loss.eval())
+
+  def test_eval_create_loss_loss_fn(self):
+    """Tests head.create_loss for eval mode and custom loss_fn."""
+    loss = np.array([[0., 1.], [2., 3.]], dtype=np.float32)
+    logits_input = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32)
+    labels_input = np.array([[1., 0.], [2., -1.]], dtype=np.float32)
+    def _loss_fn(labels, logits):
+      check_labels = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(labels, labels_input)),
+          data=[labels])
+      check_logits = control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(logits, logits_input)),
+          data=[logits])
+      with ops.control_dependencies([check_labels, check_logits]):
+        return constant_op.constant(loss)
+    head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn)
+
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits_input,
+        labels=labels_input)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(np.sum(loss), actual_training_loss.eval())
+
+  def test_eval_create_loss_loss_fn_wrong_shape(self):
+    """Tests custom loss_fn that returns Tensor of unexpected shape."""
+    loss = np.array([[1.], [2.]], dtype=np.float32)
+    def _loss_fn(labels, logits):
+      del labels, logits  # Unused
+      return constant_op.constant(loss)
+    head = head_lib._regression_head(label_dimension=2, loss_fn=_loss_fn)
+
+    logits = np.array([[-1., 1.], [-2., 2.]], dtype=np.float32)
+    labels = np.array([[1., 0.], [2., -1.]], dtype=np.float32)
+    actual_training_loss = head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[loss_fn must return Tensor of shape \[D0, D1, ... DN, 2\]\. \] '
+          r'\[logits_shape: \] \[2 2\] \[loss_shape: \] \[2 1\]'):
+        actual_training_loss.eval()
+
+  def test_eval_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._regression_head()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.EVAL,
+          logits=np.array(((45,), (41,),), dtype=np.float32),
+          labels=None)
+
+  def test_eval(self):
+    head = head_lib._regression_head()
+    self.assertEqual(1, head.logits_dimension)
+
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
+                           metric_keys.MetricKeys.PREDICTION_MEAN,
+                           metric_keys.MetricKeys.LABEL_MEAN),
+                          spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
+          metric_keys.MetricKeys.LOSS_MEAN]
+      predictions, loss, loss_mean = sess.run((
+          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
+      self.assertAllClose(logits, predictions)
+      # loss = (43-45)^2 + (44-41)^2 = 4+9 = 13
+      self.assertAllClose(13., loss)
+      # loss_mean = loss/2 = 13/2 = 6.5
+      expected_loss_mean = 6.5
+      # Check results of both update (in `loss_mean`) and value ops.
+      self.assertAllClose(expected_loss_mean, loss_mean)
+      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
+
+  def test_eval_metric_ops_with_head_name_for_regression(self):
+    head = head_lib._regression_head(name='some_regression_head')
+    logits = np.array(((1,), (9,)), dtype=np.float32)
+    labels = np.array(((1,), (1,)), dtype=np.int64)
+    features = {'x': np.array(((42,),), dtype=np.int32)}
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    expected_metric_keys = [
+        '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS_MEAN),
+        '{}/some_regression_head'.format(
+            metric_keys.MetricKeys.PREDICTION_MEAN),
+        '{}/some_regression_head'.format(metric_keys.MetricKeys.LABEL_MEAN),
+    ]
+    self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys())
+
+  def test_eval_with_regularization_losses(self):
+    head = head_lib._regression_head(
+        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    self.assertEqual(1, head.logits_dimension)
+
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size
+    #                    = (4 + 9) / 2 = 6.5
+    expected_unregularized_loss = 6.5
+    expected_regularized_loss = (
+        expected_unregularized_loss + expected_regularization_loss)
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels,
+        regularization_losses=regularization_losses)
+
+    keys = metric_keys.MetricKeys
+    expected_metrics = {
+        keys.LOSS_MEAN: expected_unregularized_loss,
+        keys.LOSS_REGULARIZATION: expected_regularization_loss,
+        keys.PREDICTION_MEAN: (45 + 41) / 2.0,
+        keys.LABEL_MEAN: (43 + 44) / 2.0,
+    }
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      value_ops = {k: spec.eval_metric_ops[k][0] for k in spec.eval_metric_ops}
+      update_ops = {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+      prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+      predictions, loss, metrics = sess.run((
+          spec.predictions[prediction_key], spec.loss, update_ops))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_regularized_loss, loss)
+      # Check results of both update (in `metrics`) and value ops.
+      self.assertAllClose(expected_metrics, metrics)
+      self.assertAllClose(
+          expected_metrics, {k: value_ops[k].eval() for k in value_ops})
+
+  def test_train_create_loss(self):
+    head = head_lib._regression_head()
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
+    expected_unreduced_loss = [[4.], [9.]]
+    # weights default to 1.
+    expected_weights = 1
+    # training_loss = 1 * 4 + 1 * 9 = 13
+    expected_training_loss = 13.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_create_loss_loss_reduction(self):
+    """Tests create_loss with loss_reduction."""
+    head = head_lib._regression_head(
+        loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS)
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43,), (44,),), dtype=np.int32)
+    features = {'x': np.array(((42,),), dtype=np.float32)}
+    # unreduced_loss = [(43-45)^2, (44-41)] = [4, 9]
+    expected_unreduced_loss = [[4.], [9.]]
+    # weights default to 1.
+    expected_weights = 1
+    # training_loss = (1 * 4 + 1 * 9) / num_nonzero_weights
+    expected_training_loss = 13. / 2.
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights)
+
+  def test_train_labels_none(self):
+    """Tests that error is raised when labels is None."""
+    head = head_lib._regression_head()
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'You must provide a labels Tensor\. Given: None\.'):
+      head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((45,), (41,),), dtype=np.float32),
+          labels=None,
+          train_op_fn=_no_op_train_fn)
+
+  def test_train(self):
+    head = head_lib._regression_head()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43.,), (44.,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42.,),), dtype=np.float32)}
+    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
+    expected_loss = 13
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      predictions, loss, train_result, summary_str = sess.run((
+          spec.predictions[prediction_key], spec.loss, spec.train_op,
+          spec.scaffold.summary_op))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/2 = 13/2 = 6.5
+          metric_keys.MetricKeys.LOSS_MEAN: 6.5,
+      }, summary_str)
+
+  def test_train_with_optimizer(self):
+    head = head_lib._regression_head()
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43.,), (44.,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42.,),), dtype=np.float32)}
+    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
+    expected_loss = 13
+
+    class _Optimizer(object):
+
+      def minimize(self, loss, global_step):
+        del global_step
+        with ops.control_dependencies((check_ops.assert_equal(
+            math_ops.to_float(expected_loss), math_ops.to_float(loss),
+            name='assert_loss'),)):
+          return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        optimizer=_Optimizer())
+
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss, train_result = sess.run((spec.loss, spec.train_op))
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+
+  def test_train_with_update_ops(self):
+    head = head_lib._regression_head()
+
+    with ops.Graph().as_default():
+      w = variables.Variable(1)
+      update_op = w.assign_add(1)
+      ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
+
+      t = variables.Variable('')
+      expected_train_result = b'my_train_op'
+      def _train_op_fn(loss):
+        del loss
+        return t.assign(expected_train_result)
+
+      spec = head.create_estimator_spec(
+          features={'x': np.array(((42,),), dtype=np.int32)},
+          mode=model_fn.ModeKeys.TRAIN,
+          logits=np.array(((45,), (41,),), dtype=np.float32),
+          labels=np.array(((43.,), (44.,),), dtype=np.float64),
+          train_op_fn=_train_op_fn)
+
+      with self.cached_session() as sess:
+        _initialize_variables(self, spec.scaffold)
+        sess.run(spec.train_op)
+        w_value, t_value = sess.run([w, t])
+        self.assertEqual(2, w_value)
+        self.assertEqual(expected_train_result, t_value)
+
+  def test_train_summaries_with_head_name(self):
+    head = head_lib._regression_head(name='some_regression_head')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43.,), (44.,),), dtype=np.float64)
+    features = {'x': np.array(((42.,),), dtype=np.float32)}
+    # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13
+    expected_loss = 13
+
+    def _train_op_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      summary_str = sess.run(spec.scaffold.summary_op)
+      _assert_simple_summaries(
+          self,
+          {
+              '{}/some_regression_head'.format(metric_keys.MetricKeys.LOSS):
+                  expected_loss,
+              # loss_mean = loss/2 = 13/2 = 6.5
+              '{}/some_regression_head'
+              .format(metric_keys.MetricKeys.LOSS_MEAN):
+                  6.5,
+          },
+          summary_str)
+
+  def test_train_with_regularization_losses(self):
+    head = head_lib._regression_head(
+        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,),), dtype=np.float32)
+    labels = np.array(((43.,), (44.,),), dtype=np.float64)
+    expected_train_result = b'my_train_op'
+    features = {'x': np.array(((42.,),), dtype=np.float32)}
+    regularization_losses = [1.5, 0.5]
+    expected_regularization_loss = 2.
+    # unregularized_loss = ((43-45)^2 + (44-41)^2) / batch_size
+    #                    = (4 + 9) / 2 = 6.5
+    # loss = unregularized_loss + regularization_loss = 8.5
+    expected_loss = 8.5
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn,
+        regularization_losses=regularization_losses)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+      predictions, loss, train_result, summary_str = sess.run((
+          spec.predictions[prediction_key], spec.loss, spec.train_op,
+          spec.scaffold.summary_op))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          metric_keys.MetricKeys.LOSS_REGULARIZATION: (
+              expected_regularization_loss),
+      }, summary_str)
+
+  def test_weighted_multi_example_eval(self):
+    """1d label, 3 examples, 1 batch."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,), (44,)), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,), (43,), (44,)), dtype=np.int32),
+            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=np.array(((35,), (42,), (45,)), dtype=np.int32))
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
+                           metric_keys.MetricKeys.PREDICTION_MEAN,
+                           metric_keys.MetricKeys.LABEL_MEAN),
+                          spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
+          metric_keys.MetricKeys.LOSS_MEAN]
+      predictions, loss, loss_mean = sess.run((
+          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
+      self.assertAllClose(logits, predictions)
+      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+      self.assertAllClose(101.6, loss)
+      # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
+      expected_loss_mean = 39.0769231
+      # Check results of both update (in `loss_mean`) and value ops.
+      self.assertAllClose(expected_loss_mean, loss_mean)
+      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
+
+  def test_weight_with_numeric_column(self):
+    """1d label, 3 examples, 1 batch."""
+    head = head_lib._regression_head(
+        weight_column=feature_column_lib.numeric_column(
+            'label_weights', normalizer_fn=lambda x: x + 1.))
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,), (44,)), dtype=np.int32)
+    spec = head.create_estimator_spec(
+        features={
+            'x':
+                np.array(((42,), (43,), (44,)), dtype=np.int32),
+            'label_weights':
+                np.array(((0.,), (-0.9,), (0.5,)), dtype=np.float32),
+        },
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=np.array(((35,), (42,), (45,)), dtype=np.int32))
+
+    # Assert loss.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      loss = sess.run(spec.loss)
+      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+      self.assertAllClose(101.6, loss)
+
+  def test_weighted_multi_example_train(self):
+    """1d label, 3 examples, 1 batch."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+    expected_loss = 101.6
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+    spec = head.create_estimator_spec(
+        features={
+            'x': np.array(((42,), (43,), (44,)), dtype=np.float32),
+            'label_weights': np.array(((1.,), (.1,), (1.5,)), dtype=np.float64),
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=np.array(((35.,), (42.,), (45.,)), dtype=np.float32),
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      predictions, loss, train_result, summary_str = sess.run((
+          spec.predictions[prediction_key], spec.loss, spec.train_op,
+          spec.scaffold.summary_op))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
+          metric_keys.MetricKeys.LOSS_MEAN: 39.0769231,
+      }, summary_str)
+
+  def test_train_one_dim_create_loss(self):
+    """Tests create_loss with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
+    x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32)
+    weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64)
+    labels_rank_1 = np.array((35., 42., 45.,))
+    # unreduced_loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
+    expected_unreduced_loss = [[100.], [1.], [1.]]
+    # weights are reshaped to [3, 1] to match logits.
+    expected_weights = [[1.], [.1], [1.5]]
+    # training_loss = 100 * 1 + 1 * .1 + 1.5 * 1 = 101.6
+    expected_training_loss = 101.6
+    features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1}
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_train_one_dim(self):
+    """Tests train with 1D labels and weights (shape [batch_size])."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45,), (41,), (44,)), dtype=np.float32)
+    expected_train_result = b'my_train_op'
+    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+    expected_loss = 101.6
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    x_feature_rank_1 = np.array((42., 43., 44.,), dtype=np.float32)
+    weight_rank_1 = np.array((1., .1, 1.5,), dtype=np.float64)
+    labels_rank_1 = np.array((35., 42., 45.,))
+    features = {'x': x_feature_rank_1, 'label_weights': weight_rank_1}
+    self.assertEqual((3,), x_feature_rank_1.shape)
+    self.assertEqual((3,), weight_rank_1.shape)
+    self.assertEqual((3,), labels_rank_1.shape)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels_rank_1,
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      predictions, loss, train_result, summary_str = sess.run((
+          spec.predictions[prediction_key], spec.loss, spec.train_op,
+          spec.scaffold.summary_op))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.0769231
+          metric_keys.MetricKeys.LOSS_MEAN: 39.0769231,
+      }, summary_str)
+
+  def test_weighted_multi_value_eval_create_loss(self):
+    """3d label, 1 example, 1 batch."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    logits = np.array(((45., 41., 44.),))
+    labels = np.array(((35., 42., 45.),))
+    features = {
+        'x': np.array(((42., 43., 44.),)),
+        'label_weights': np.array(((1., .1, 1.5),))
+    }
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
+      # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
+      self.assertAllClose(101.6, training_loss.eval())
+
+  def test_weighted_multi_value_eval(self):
+    """3d label, 1 example, 1 batch."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    self.assertEqual(3, head.logits_dimension)
+
+    logits = np.array(((45., 41., 44.),))
+    labels = np.array(((35., 42., 45.),))
+    features = {
+        'x': np.array(((42., 43., 44.),)),
+        'label_weights': np.array(((1., .1, 1.5),))
+    }
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=logits,
+        labels=labels)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertItemsEqual((metric_keys.MetricKeys.LOSS_MEAN,
+                           metric_keys.MetricKeys.PREDICTION_MEAN,
+                           metric_keys.MetricKeys.LABEL_MEAN),
+                          spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Assert predictions, loss, and metrics.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNone(spec.scaffold.summary_op)
+      loss_mean_value_op, loss_mean_update_op = spec.eval_metric_ops[
+          metric_keys.MetricKeys.LOSS_MEAN]
+      predictions, loss, loss_mean = sess.run((
+          spec.predictions[prediction_key], spec.loss, loss_mean_update_op))
+      self.assertAllClose(logits, predictions)
+      # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+      self.assertAllClose(101.6, loss)
+      # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
+      expected_loss_mean = 39.076923
+      # Check results of both update (in `loss_mean`) and value ops.
+      self.assertAllClose(expected_loss_mean, loss_mean)
+      self.assertAllClose(expected_loss_mean, loss_mean_value_op.eval())
+
+  def test_weighted_multi_value_train_create_loss(self):
+    """3d label, 1 example, 1 batch."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    logits = np.array(((45., 41., 44.),))
+    labels = np.array(((35., 42., 45.),))
+    features = {
+        'x': np.array(((42., 43., 44.),)),
+        'label_weights': np.array(((1., .1, 1.5),))
+    }
+    # Create loss.
+    training_loss = head.create_loss(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      # loss = [(35-45)^2, (42-41)^2, (45-44)^2] = [100, 1, 1].
+      # weighted sum loss = 1 * 100 + .1 * 1 + 1.5 * 1 = 101.6
+      self.assertAllClose(101.6, training_loss.eval())
+
+  def test_weighted_multi_value_train(self):
+    """3d label, 1 example, 1 batch."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    self.assertEqual(3, head.logits_dimension)
+
+    logits = np.array(((45., 41., 44.),))
+    labels = np.array(((35., 42., 45.),))
+    expected_train_result = b'my_train_op'
+    # loss = 1*(35-45)^2 + .1*(42-41)^2 + 1.5*(45-44)^2 = 100+.1+1.5 = 101.6
+    expected_loss = 101.6
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    features = {
+        'x': np.array(((42., 43., 44.),)),
+        'label_weights': np.array(((1., .1, 1.5),)),
+    }
+    # Create estimator spec.
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+
+    # Assert spec contains expected tensors.
+    prediction_key = prediction_keys.PredictionKeys.PREDICTIONS
+    self.assertItemsEqual((prediction_key,), spec.predictions.keys())
+    self.assertEqual(dtypes.float32, spec.predictions[prediction_key].dtype)
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertEqual({}, spec.eval_metric_ops)
+    self.assertIsNotNone(spec.train_op)
+    self.assertIsNone(spec.export_outputs)
+    _assert_no_hooks(self, spec)
+
+    # Evaluate predictions, loss, train_op, and summaries.
+    with self.cached_session() as sess:
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      predictions, loss, train_result, summary_str = sess.run((
+          spec.predictions[prediction_key], spec.loss, spec.train_op,
+          spec.scaffold.summary_op))
+      self.assertAllClose(logits, predictions)
+      self.assertAllClose(expected_loss, loss)
+      self.assertEqual(expected_train_result, train_result)
+      _assert_simple_summaries(self, {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
+          metric_keys.MetricKeys.LOSS_MEAN: 39.076923,
+      }, summary_str)
+
+  def test_weighted_multi_batch_eval(self):
+    """1d label, 1 example, 3 batches."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45.,), (41.,), (44.,)))
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'x': np.array(((42.,), (43.,), (44.,))),
+            'label_weights': np.array(((1.,), (.1,), (1.5,))),
+            # 'logits' is not a feature, but we use `numpy_input_fn` to make a
+            # batched version of it, and pop it off before passing to
+            # `create_estimator_spec`.
+            'logits': logits,
+        },
+        y=np.array(((35.,), (42.,), (45.,))),
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    batched_features, batched_labels = input_fn()
+    batched_logits = batched_features.pop('logits')
+    spec = head.create_estimator_spec(
+        features=batched_features,
+        mode=model_fn.ModeKeys.EVAL,
+        logits=batched_logits,
+        labels=batched_labels,
+        train_op_fn=None)
+
+    # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5]
+    # loss = sum(losses) = 100+.1+1.5 = 101.6
+    # loss_mean = loss/(1+.1+1.5) = 101.6/2.6 = 39.076923
+    expected_metrics = {
+        metric_keys.MetricKeys.LOSS_MEAN:
+            39.076923,
+        metric_keys.MetricKeys.PREDICTION_MEAN:
+            (45 + 41 * 0.1 + 44 * 1.5) / 2.6,
+        metric_keys.MetricKeys.LABEL_MEAN: (35 + 42 * 0.1 + 45 * 1.5) / 2.6,
+    }
+
+    # Assert spec contains expected tensors.
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertItemsEqual(expected_metrics.keys(), spec.eval_metric_ops.keys())
+    self.assertIsNone(spec.train_op)
+    _assert_no_hooks(self, spec)
+
+    with self.cached_session() as sess:
+      # Finalize graph and initialize variables.
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      queue_runner_impl.start_queue_runners()
+
+      # Run tensors for `steps` steps.
+      steps = len(logits)
+      results = tuple([
+          sess.run((
+              spec.loss,
+              # The `[1]` gives us the metric update op.
+              {k: spec.eval_metric_ops[k][1] for k in spec.eval_metric_ops}
+          )) for _ in range(steps)
+      ])
+
+      # Assert losses and metrics.
+      self.assertAllClose((100, .1, 1.5), [r[0] for r in results])
+      # For metrics, check results of both update (in `results`) and value ops.
+      # Note: we only check the result of the last step for streaming metrics.
+      self.assertAllClose(expected_metrics, results[steps - 1][1])
+      self.assertAllClose(expected_metrics, {
+          k: spec.eval_metric_ops[k][0].eval() for k in spec.eval_metric_ops
+      })
+
+  def test_weighted_multi_batch_train(self):
+    """1d label, 1 example, 3 batches."""
+    head = head_lib._regression_head(weight_column='label_weights')
+    self.assertEqual(1, head.logits_dimension)
+
+    # Create estimator spec.
+    logits = np.array(((45.,), (41.,), (44.,)))
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'x': np.array(((42.,), (43.,), (44.,))),
+            'label_weights': np.array(((1.,), (.1,), (1.5,))),
+            # 'logits' is not a feature, but we use `numpy_input_fn` to make a
+            # batched version of it, and pop it off before passing to
+            # `create_estimator_spec`.
+            'logits': logits,
+        },
+        y=np.array(((35.,), (42.,), (45.,))),
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    batched_features, batched_labels = input_fn()
+    batched_logits = batched_features.pop('logits')
+    spec = head.create_estimator_spec(
+        features=batched_features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=batched_logits,
+        labels=batched_labels,
+        train_op_fn=lambda loss: loss * -7.)
+
+    # Assert spec contains expected tensors.
+    self.assertEqual(dtypes.float32, spec.loss.dtype)
+    self.assertIsNotNone(spec.train_op)
+
+    with self.cached_session() as sess:
+      # Finalize graph and initialize variables.
+      _initialize_variables(self, spec.scaffold)
+      self.assertIsNotNone(spec.scaffold.summary_op)
+      queue_runner_impl.start_queue_runners()
+
+      results = tuple([
+          sess.run((spec.loss, spec.train_op)) for _ in range(len(logits))
+      ])
+
+      # losses = [1*(35-45)^2, .1*(42-41)^2, 1.5*(45-44)^2] = [100, .1, 1.5]
+      expected_losses = np.array((100, .1, 1.5))
+      self.assertAllClose(expected_losses, [r[0] for r in results])
+      self.assertAllClose(expected_losses * -7., [r[1] for r in results])
+
+  def test_multi_dim_weighted_train_create_loss(self):
+    """Logits, labels of shape [2, 2, 3], weight shape [2, 2]."""
+    label_dimension = 3
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=label_dimension)
+    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
+                       [[20., 21., 22.], [30., 31., 32.]]])
+    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
+                       [[23., 24., 25.], [34., 35., 36.]]])
+    weights = np.array([[1., 1.5], [2., 2.5]])
+    expected_unreduced_loss = [[[1., 1., 1.], [4., 4., 4.]],
+                               [[9., 9., 9.], [16., 16., 16.]]]
+    expected_training_loss = np.sum(
+        np.array([[[1. * x for x in [1., 1., 1.]],
+                   [1.5 * x for x in [4., 4., 4.]]],
+                  [[2. * x for x in [9., 9., 9.]],
+                   [2.5 * x for x in [16., 16., 16.]]]]))
+    # Weights are expanded to [2, 2, 1] to match logits.
+    expected_weights = [[[1.], [1.5]], [[2.], [2.5]]]
+    # Create loss.
+    training_loss, unreduced_loss, actual_weights, _ = head.create_loss(
+        features={'label_weights': weights},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_training_loss, training_loss.eval())
+      self.assertAllClose(expected_unreduced_loss, unreduced_loss.eval())
+      self.assertAllClose(expected_weights, actual_weights.eval())
+
+  def test_multi_dim_weighted_train(self):
+    """Logits, labels of shape [2, 2, 3], weight shape [2, 2]."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
+                       [[20., 21., 22.], [30., 31., 32.]]])
+    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
+                       [[23., 24., 25.], [34., 35., 36.]]])
+    expected_train_result = b'my_train_op'
+    features = {
+        'label_weights': np.array([[1., 1.5], [2., 2.5]]),
+    }
+    # loss = 1*3*1^2 + 1.5*3*2^2 + 2*3*3^2 +2.5*3*4^2 = 195
+    expected_loss = 195.
+    # Create estimator spec.
+    def _train_op_fn(loss):
+      with ops.control_dependencies((check_ops.assert_equal(
+          math_ops.to_float(expected_loss), math_ops.to_float(loss),
+          name='assert_loss'),)):
+        return constant_op.constant(expected_train_result)
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_train_op_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      self.assertAllClose(expected_loss, spec.loss.eval())
+
+  def test_multi_dim_train_weights_wrong_inner_dim(self):
+    """Logits, labels of shape [2, 2, 3], weight shape [2, 1]."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
+                       [[20., 21., 22.], [30., 31., 32.]]])
+    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
+                       [[23., 24., 25.], [34., 35., 36.]]])
+    features = {
+        'label_weights': np.array([[1.], [2]]),
+    }
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \] \[2 2 3\] \[weights_shape: \] \[2 1\]'):
+        spec.loss.eval()
+
+  def test_multi_dim_train_weights_wrong_outer_dim(self):
+    """Logits, labels of shape [2, 2, 3], weight shape [2, 2, 2]."""
+    head = head_lib._regression_head(
+        weight_column='label_weights', label_dimension=3)
+    logits = np.array([[[00., 01., 02.], [10., 11., 12.]],
+                       [[20., 21., 22.], [30., 31., 32.]]])
+    labels = np.array([[[01., 02., 03.], [12., 13., 14.]],
+                       [[23., 24., 25.], [34., 35., 36.]]])
+    weights_placeholder = array_ops.placeholder(dtype=dtypes.float32)
+    features = {
+        'label_weights': weights_placeholder,
+    }
+    def _no_op_train_fn(loss):
+      del loss
+      return control_flow_ops.no_op()
+
+    spec = head.create_estimator_spec(
+        features=features,
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels,
+        train_op_fn=_no_op_train_fn)
+    with self.cached_session():
+      _initialize_variables(self, monitored_session.Scaffold())
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          r'\[logits_shape: \]\s\[2 2 3\]\s\[weights_shape: \]\s\[2 2 2\]'):
+        spec.loss.eval({
+            weights_placeholder: np.array([[[1., 1.1], [1.5, 1.6]],
+                                           [[2., 2.1], [2.5, 2.6]]])})
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py
index f674e50f92..8b96284bd3 100644
--- a/tensorflow/python/estimator/canned/linear.py
+++ b/tensorflow/python/estimator/canned/linear.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,532 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""linear python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Linear Estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import linear
+import math
+
+import six
+
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variable_ops
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.summary import summary
+from tensorflow.python.training import ftrl
+from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import estimator_export
+
+
+# The default learning rate of 0.2 is a historical artifact of the initial
+# implementation, but seems a reasonable choice.
+_LEARNING_RATE = 0.2
+
+
+def _get_default_optimizer(feature_columns):
+  learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
+  return ftrl.FtrlOptimizer(learning_rate=learning_rate)
+
+
+def _get_expanded_variable_list(var_list):
+  """Given a list of variables, expands them if they are partitioned.
+
+  Args:
+    var_list: A list of variables.
+
+  Returns:
+    A list of variables where each partitioned variable is expanded to its
+    components.
+  """
+  returned_list = []
+  for variable in var_list:
+    if (isinstance(variable, variable_ops.Variable) or
+        resource_variable_ops.is_resource_variable(variable)):
+      returned_list.append(variable)  # Single variable case.
+    else:  # Must be a PartitionedVariable, so convert into a list.
+      returned_list.extend(list(variable))
+  return returned_list
+
+
+# TODO(rohanj): Consider making this a public utility method.
+def _compute_fraction_of_zero(variables):
+  """Given a linear variables list, compute the fraction of zero weights.
+
+  Args:
+    variables: A list or list of list of variables
+
+  Returns:
+    The fraction of zeros (sparsity) in the linear model.
+  """
+  all_weight_vars = []
+  for var_or_var_list in variables:
+    var_list = nest.flatten(var_or_var_list)
+    # Skip empty-lists associated with columns that created no Variables.
+    if var_list:
+      all_weight_vars += [array_ops.reshape(var, [-1]) for var in var_list]
+  return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0))
+
+
+def _linear_logit_fn_builder(units, feature_columns, sparse_combiner='sum'):
+  """Function builder for a linear logit_fn.
+
+  Args:
+    units: An int indicating the dimension of the logit layer.
+    feature_columns: An iterable containing all the feature columns used by
+      the model.
+    sparse_combiner: A string specifying how to reduce if a categorical column
+      is multivalent.  One of "mean", "sqrtn", and "sum".
+
+  Returns:
+    A logit_fn (see below).
+
+  """
+
+  def linear_logit_fn(features):
+    """Linear model logit_fn.
+
+    Args:
+      features: This is the first item returned from the `input_fn`
+                passed to `train`, `evaluate`, and `predict`. This should be a
+                single `Tensor` or `dict` of same.
+
+    Returns:
+      A `Tensor` representing the logits.
+    """
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+      linear_model = feature_column_v2.LinearModel(
+          feature_columns=feature_columns,
+          units=units,
+          sparse_combiner=sparse_combiner,
+          shared_state_manager=shared_state_manager)
+      logits = linear_model(features)
+      bias = linear_model.bias_variable
+
+      # We'd like to get all the non-bias variables associated with this
+      # LinearModel. This includes the shared embedding variables as well.
+      variables = linear_model.variables
+      variables.remove(bias)
+      variables.extend(shared_state_manager.variables)
+
+      # Expand (potential) Partitioned variables
+      bias = _get_expanded_variable_list([bias])
+      variables = _get_expanded_variable_list(variables)
+    else:
+      linear_model = feature_column._LinearModel(  # pylint: disable=protected-access
+          feature_columns=feature_columns,
+          units=units,
+          sparse_combiner=sparse_combiner,
+          name='linear_model')
+      logits = linear_model(features)
+      cols_to_vars = linear_model.cols_to_vars()
+      bias = cols_to_vars.pop('bias')
+      variables = cols_to_vars.values()
+
+    if units > 1:
+      summary.histogram('bias', bias)
+    else:
+      # If units == 1, the bias value is a length-1 list of a scalar Tensor,
+      # so we should provide a scalar summary.
+      summary.scalar('bias', bias[0][0])
+    summary.scalar('fraction_of_zero_weights',
+                   _compute_fraction_of_zero(variables))
+    return logits
+
+  return linear_logit_fn
+
+
+def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
+                     partitioner, config, sparse_combiner='sum'):
+  """A model_fn for linear models that use a gradient-based optimizer.
+
+  Args:
+    features: dict of `Tensor`.
+    labels: `Tensor` of shape `[batch_size, logits_dimension]`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    head: A `Head` instance.
+    feature_columns: An iterable containing all the feature columns used by
+      the model.
+    optimizer: string, `Optimizer` object, or callable that defines the
+      optimizer to use for training. If `None`, will use a FTRL optimizer.
+    partitioner: Partitioner for variables.
+    config: `RunConfig` object to configure the runtime settings.
+    sparse_combiner: A string specifying how to reduce if a categorical column
+      is multivalent.  One of "mean", "sqrtn", and "sum".
+
+  Returns:
+    An `EstimatorSpec` instance.
+
+  Raises:
+    ValueError: mode or params are invalid, or features has the wrong type.
+  """
+  if not isinstance(features, dict):
+    raise ValueError('features should be a dictionary of `Tensor`s. '
+                     'Given type: {}'.format(type(features)))
+
+  optimizer = optimizers.get_optimizer_instance(
+      optimizer or _get_default_optimizer(feature_columns),
+      learning_rate=_LEARNING_RATE)
+  num_ps_replicas = config.num_ps_replicas if config else 0
+
+  partitioner = partitioner or (
+      partitioned_variables.min_max_variable_partitioner(
+          max_partitions=num_ps_replicas,
+          min_slice_size=64 << 20))
+
+  with variable_scope.variable_scope(
+      'linear',
+      values=tuple(six.itervalues(features)),
+      partitioner=partitioner):
+
+    logit_fn = _linear_logit_fn_builder(
+        units=head.logits_dimension, feature_columns=feature_columns,
+        sparse_combiner=sparse_combiner)
+    logits = logit_fn(features=features)
+
+    return head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=optimizer,
+        logits=logits)
+
+
+@estimator_export('estimator.LinearClassifier')
+class LinearClassifier(estimator.Estimator):
+  """Linear classifier model.
+
+  Train a linear model to classify instances into one of multiple possible
+  classes. When number of possible classes is 2, this is binary classification.
+
+  Example:
+
+  ```python
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+
+  # Estimator using the default optimizer.
+  estimator = LinearClassifier(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+
+  # Or estimator using the FTRL optimizer with regularization.
+  estimator = LinearClassifier(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      optimizer=tf.train.FtrlOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001
+      ))
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = LinearClassifier(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      optimizer=lambda: tf.train.FtrlOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = LinearClassifier(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      warm_start_from="/path/to/checkpoint/dir")
+
+
+  # Input builders
+  def input_fn_train: # returns x, y (where y represents label's class index).
+    ...
+  def input_fn_eval: # returns x, y (where y represents label's class index).
+    ...
+  estimator.train(input_fn=input_fn_train)
+  estimator.evaluate(input_fn=input_fn_eval)
+  estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+    otherwise there will be a `KeyError`:
+
+  * if `weight_column` is not `None`, a feature with
+    `key=weight_column` whose value is a `Tensor`.
+  * for each `column` in `feature_columns`:
+    - if `column` is a `SparseColumn`, a feature with `key=column.name`
+      whose `value` is a `SparseTensor`.
+    - if `column` is a `WeightedSparseColumn`, two features: the first with
+      `key` the id column name, the second with `key` the weight column name.
+      Both features' `value` must be a `SparseTensor`.
+    - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
+      whose `value` is a `Tensor`.
+
+  Loss is calculated by using softmax cross entropy.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               feature_columns,
+               model_dir=None,
+               n_classes=2,
+               weight_column=None,
+               label_vocabulary=None,
+               optimizer='Ftrl',
+               config=None,
+               partitioner=None,
+               warm_start_from=None,
+               loss_reduction=losses.Reduction.SUM,
+               sparse_combiner='sum'):
+    """Construct a `LinearClassifier` estimator object.
+
+    Args:
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      n_classes: number of label classes. Default is binary classification.
+        Note that class labels are integers representing the class index (i.e.
+        values from 0 to n_classes-1). For arbitrary label values (e.g. string
+        labels), convert to class indices first.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are
+        already encoded as integer or float within [0, 1] for `n_classes=2` and
+        encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
+        Also there will be errors if vocabulary is not provided and labels are
+        string.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to FTRL optimizer.
+      config: `RunConfig` object to configure the runtime settings.
+      partitioner: Optional. Partitioner for input layer.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights and biases are warm-started, and it is assumed that vocabularies
+        and Tensor names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      sparse_combiner: A string specifying how to reduce if a categorical column
+        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
+        effectively different ways to do example-level normalization, which can
+        be useful for bag-of-words features. for more details, see
+        `tf.feature_column.linear_model`.
+
+    Returns:
+      A `LinearClassifier` estimator.
+
+    Raises:
+      ValueError: if n_classes < 2.
+    """
+    if n_classes == 2:
+      head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
+          weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+    else:
+      head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
+          n_classes, weight_column=weight_column,
+          label_vocabulary=label_vocabulary,
+          loss_reduction=loss_reduction)
+
+    def _model_fn(features, labels, mode, config):
+      """Call the defined shared _linear_model_fn."""
+      return _linear_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          partitioner=partitioner,
+          config=config,
+          sparse_combiner=sparse_combiner)
+
+    super(LinearClassifier, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from)
+
+
+@estimator_export('estimator.LinearRegressor')
+class LinearRegressor(estimator.Estimator):
+  """An estimator for TensorFlow Linear regression problems.
+
+  Train a linear regression model to predict label value given observation of
+  feature values.
+
+  Example:
+
+  ```python
+  categorical_column_a = categorical_column_with_hash_bucket(...)
+  categorical_column_b = categorical_column_with_hash_bucket(...)
+
+  categorical_feature_a_x_categorical_feature_b = crossed_column(...)
+
+  # Estimator using the default optimizer.
+  estimator = LinearRegressor(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b])
+
+  # Or estimator using the FTRL optimizer with regularization.
+  estimator = LinearRegressor(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      optimizer=tf.train.FtrlOptimizer(
+        learning_rate=0.1,
+        l1_regularization_strength=0.001
+      ))
+
+  # Or estimator using an optimizer with a learning rate decay.
+  estimator = LinearRegressor(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      optimizer=lambda: tf.train.FtrlOptimizer(
+          learning_rate=tf.exponential_decay(
+              learning_rate=0.1,
+              global_step=tf.get_global_step(),
+              decay_steps=10000,
+              decay_rate=0.96))
+
+  # Or estimator with warm-starting from a previous checkpoint.
+  estimator = LinearRegressor(
+      feature_columns=[categorical_column_a,
+                       categorical_feature_a_x_categorical_feature_b],
+      warm_start_from="/path/to/checkpoint/dir")
+
+
+  # Input builders
+  def input_fn_train: # returns x, y
+    ...
+  def input_fn_eval: # returns x, y
+    ...
+  estimator.train(input_fn=input_fn_train)
+  estimator.evaluate(input_fn=input_fn_eval)
+  estimator.predict(input_fn=input_fn_predict)
+  ```
+
+  Input of `train` and `evaluate` should have following features,
+    otherwise there will be a KeyError:
+
+  * if `weight_column` is not `None`:
+    key=weight_column, value=a `Tensor`
+  * for column in `feature_columns`:
+    - if isinstance(column, `SparseColumn`):
+        key=column.name, value=a `SparseTensor`
+    - if isinstance(column, `WeightedSparseColumn`):
+        {key=id column name, value=a `SparseTensor`,
+         key=weight column name, value=a `SparseTensor`}
+    - if isinstance(column, `RealValuedColumn`):
+        key=column.name, value=a `Tensor`
+
+  Loss is calculated by using mean squared error.
+
+  @compatibility(eager)
+  Estimators can be used while eager execution is enabled. Note that `input_fn`
+  and all hooks are executed inside a graph context, so they have to be written
+  to be compatible with graph mode. Note that `input_fn` code using `tf.data`
+  generally works in both graph and eager modes.
+  @end_compatibility
+  """
+
+  def __init__(self,
+               feature_columns,
+               model_dir=None,
+               label_dimension=1,
+               weight_column=None,
+               optimizer='Ftrl',
+               config=None,
+               partitioner=None,
+               warm_start_from=None,
+               loss_reduction=losses.Reduction.SUM,
+               sparse_combiner='sum'):
+    """Initializes a `LinearRegressor` instance.
+
+    Args:
+      feature_columns: An iterable containing all the feature columns used by
+        the model. All items in the set should be instances of classes derived
+        from `FeatureColumn`.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+        then weight_column.normalizer_fn is applied on it to get weight tensor.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to FTRL optimizer.
+      config: `RunConfig` object to configure the runtime settings.
+      partitioner: Optional. Partitioner for input layer.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights and biases are warm-started, and it is assumed that vocabularies
+        and Tensor names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM`.
+      sparse_combiner: A string specifying how to reduce if a categorical column
+        is multivalent.  One of "mean", "sqrtn", and "sum" -- these are
+        effectively different ways to do example-level normalization, which can
+        be useful for bag-of-words features. for more details, see
+        `tf.feature_column.linear_model`.
+    """
+    head = head_lib._regression_head(  # pylint: disable=protected-access
+        label_dimension=label_dimension, weight_column=weight_column,
+        loss_reduction=loss_reduction)
 
-# Include attrs that start with single underscore.
-linear.__all__ = [s for s in dir(linear) if not s.startswith('__')]
+    def _model_fn(features, labels, mode, config):
+      """Call the defined shared _linear_model_fn."""
+      return _linear_model_fn(
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          feature_columns=tuple(feature_columns or []),
+          optimizer=optimizer,
+          partitioner=partitioner,
+          config=config,
+          sparse_combiner=sparse_combiner)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.linear import *
+    super(LinearRegressor, self).__init__(
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from)
diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py
new file mode 100644
index 0000000000..3e6da5de22
--- /dev/null
+++ b/tensorflow/python/estimator/canned/linear_test.py
@@ -0,0 +1,255 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for linear.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator.canned import linear
+from tensorflow.python.estimator.canned import linear_testing_utils
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.platform import test
+
+
+def _linear_regressor_fn(*args, **kwargs):
+  return linear.LinearRegressor(*args, **kwargs)
+
+
+def _linear_classifier_fn(*args, **kwargs):
+  return linear.LinearClassifier(*args, **kwargs)
+
+
+# Tests for Linear Regressor.
+
+
+class LinearRegressorPartitionerTest(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorPartitionerV2Test(
+    linear_testing_utils.BaseLinearRegressorPartitionerTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPartitionerTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearRegressorEvaluationTest(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorEvaluationV2Test(
+    linear_testing_utils.BaseLinearRegressorEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorEvaluationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearRegressorPredictTest(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorPredictV2Test(
+    linear_testing_utils.BaseLinearRegressorPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorPredictTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearRegressorIntegrationTest(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorIntegrationV2Test(
+    linear_testing_utils.BaseLinearRegressorIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorIntegrationTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+class LinearRegressorTrainingTest(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column)
+
+
+class LinearRegressorTrainingV2Test(
+    linear_testing_utils.BaseLinearRegressorTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearRegressorTrainingTest.__init__(
+        self, _linear_regressor_fn, fc_lib=feature_column_v2)
+
+
+# Tests for Linear Classifier.
+class LinearClassifierTrainingTest(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierTrainingV2Test(
+    linear_testing_utils.BaseLinearClassifierTrainingTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierTrainingTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearClassifierEvaluationTest(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierEvaluationV2Test(
+    linear_testing_utils.BaseLinearClassifierEvaluationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierEvaluationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearClassifierPredictTest(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierPredictV2Test(
+    linear_testing_utils.BaseLinearClassifierPredictTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierPredictTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+class LinearClassifierIntegrationTest(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self, linear_classifier_fn=_linear_classifier_fn, fc_lib=feature_column)
+
+
+class LinearClassifierIntegrationV2Test(
+    linear_testing_utils.BaseLinearClassifierIntegrationTest, test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearClassifierIntegrationTest.__init__(
+        self,
+        linear_classifier_fn=_linear_classifier_fn,
+        fc_lib=feature_column_v2)
+
+
+# Tests for Linear logit_fn.
+class LinearLogitFnTest(linear_testing_utils.BaseLinearLogitFnTest,
+                        test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearLogitFnTest.__init__(
+        self, fc_lib=feature_column)
+
+
+class LinearLogitFnV2Test(linear_testing_utils.BaseLinearLogitFnTest,
+                          test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearLogitFnTest.__init__(
+        self, fc_lib=feature_column_v2)
+
+
+# Tests for warm-starting with Linear logit_fn.
+class LinearWarmStartingTest(linear_testing_utils.BaseLinearWarmStartingTest,
+                             test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
+        self,
+        _linear_classifier_fn,
+        _linear_regressor_fn,
+        fc_lib=feature_column)
+
+
+class LinearWarmStartingV2Test(linear_testing_utils.BaseLinearWarmStartingTest,
+                               test.TestCase):
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    linear_testing_utils.BaseLinearWarmStartingTest.__init__(
+        self,
+        _linear_classifier_fn,
+        _linear_regressor_fn,
+        fc_lib=feature_column_v2)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py
index f6d26348c9..2cfa2a8e15 100644
--- a/tensorflow/python/estimator/canned/linear_testing_utils.py
+++ b/tensorflow/python/estimator/canned/linear_testing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,2344 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""linear_testing_utils python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utils for testing linear estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import linear_testing_utils
+import math
+import os
+import shutil
+import tempfile
+
+import numpy as np
+import six
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator.canned import linear
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import input as input_lib
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import queue_runner
+from tensorflow.python.training import saver
+from tensorflow.python.training import session_run_hook
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+# pylint rules which are disabled by default for test files.
+# pylint: disable=invalid-name,protected-access,missing-docstring
+
+# Names of variables created by model.
+AGE_WEIGHT_NAME = 'linear/linear_model/age/weights'
+HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights'
+OCCUPATION_WEIGHT_NAME = 'linear/linear_model/occupation/weights'
+BIAS_NAME = 'linear/linear_model/bias_weights'
+LANGUAGE_WEIGHT_NAME = 'linear/linear_model/language/weights'
+
+
+def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
+  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
+    expected = ops.convert_to_tensor(expected, name='expected')
+    actual = ops.convert_to_tensor(actual, name='actual')
+    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
+    rtol = ops.convert_to_tensor(rtol, name='rtol')
+    return check_ops.assert_less(
+        rdiff,
+        rtol,
+        data=('Condition expected =~ actual did not hold element-wise:'
+              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
+              'rtol = ', rtol,),
+        name=scope)
+
+
+def save_variables_to_ckpt(model_dir):
+  init_all_op = [variables_lib.global_variables_initializer()]
+  with tf_session.Session() as sess:
+    sess.run(init_all_op)
+    saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
+
+
+def queue_parsed_features(feature_map):
+  tensors_to_enqueue = []
+  keys = []
+  for key, tensor in six.iteritems(feature_map):
+    keys.append(key)
+    tensors_to_enqueue.append(tensor)
+  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
+  queue_runner.add_queue_runner(
+      queue_runner.QueueRunner(input_queue,
+                               [input_queue.enqueue(tensors_to_enqueue)]))
+  dequeued_tensors = input_queue.dequeue()
+  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
+
+
+def sorted_key_dict(unsorted_dict):
+  return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
+
+
+def sigmoid(x):
+  return 1 / (1 + np.exp(-1.0 * x))
+
+
+class CheckPartitionerVarHook(session_run_hook.SessionRunHook):
+  """A `SessionRunHook` to check a partitioned variable."""
+
+  def __init__(self, test_case, var_name, var_dim, partitions):
+    self._test_case = test_case
+    self._var_name = var_name
+    self._var_dim = var_dim
+    self._partitions = partitions
+
+  def begin(self):
+    with variable_scope.variable_scope(
+        variable_scope.get_variable_scope()) as scope:
+      scope.reuse_variables()
+      partitioned_weight = variable_scope.get_variable(
+          self._var_name, shape=(self._var_dim, 1))
+      self._test_case.assertTrue(
+          isinstance(partitioned_weight, variables_lib.PartitionedVariable))
+      for part in partitioned_weight:
+        self._test_case.assertEqual(self._var_dim // self._partitions,
+                                    part.get_shape()[0])
+
+
+class BaseLinearRegressorPartitionerTest(object):
+
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
+    self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def testPartitioner(self):
+    x_dim = 64
+    partitions = 4
+
+    def _partitioner(shape, dtype):
+      del dtype  # unused; required by Fn signature.
+      # Only partition the embedding tensor.
+      return [partitions, 1] if shape[0] == x_dim else [1]
+
+    regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
+            'language', hash_bucket_size=x_dim),),
+        partitioner=_partitioner,
+        model_dir=self._model_dir)
+
+    def _input_fn():
+      return {
+          'language':
+              sparse_tensor.SparseTensor(
+                  values=['english', 'spanish'],
+                  indices=[[0, 0], [0, 1]],
+                  dense_shape=[1, 2])
+      }, [[10.]]
+
+    hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim,
+                                   partitions)
+    regressor.train(input_fn=_input_fn, steps=1, hooks=[hook])
+
+  def testDefaultPartitionerWithMultiplePsReplicas(self):
+    partitions = 2
+    # This results in weights larger than the default partition size of 64M,
+    # so partitioned weights are created (each weight uses 4 bytes).
+    x_dim = 32 << 20
+
+    class FakeRunConfig(run_config.RunConfig):
+
+      @property
+      def num_ps_replicas(self):
+        return partitions
+
+    # Mock the device setter as ps is not available on test machines.
+    with test.mock.patch.object(
+        estimator,
+        '_get_replica_device_setter',
+        return_value=lambda _: '/cpu:0'):
+      linear_regressor = self._linear_regressor_fn(
+          feature_columns=(self._fc_lib.categorical_column_with_hash_bucket(
+              'language', hash_bucket_size=x_dim),),
+          config=FakeRunConfig(),
+          model_dir=self._model_dir)
+
+      def _input_fn():
+        return {
+            'language':
+                sparse_tensor.SparseTensor(
+                    values=['english', 'spanish'],
+                    indices=[[0, 0], [0, 1]],
+                    dense_shape=[1, 2])
+        }, [[10.]]
+
+      hook = CheckPartitionerVarHook(self, LANGUAGE_WEIGHT_NAME, x_dim,
+                                     partitions)
+      linear_regressor.train(input_fn=_input_fn, steps=1, hooks=[hook])
+
+
+# TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
+class BaseLinearRegressorEvaluationTest(object):
+
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
+    self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_evaluation_for_simple_data(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir)
+    eval_metrics = linear_regressor.evaluate(
+        input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
+
+    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. Loss is 3**2 = 9.
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 9.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_batch(self):
+    """Tests evaluation for batch_size==2."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir)
+    eval_metrics = linear_regressor.evaluate(
+        input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
+
+    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the sum over batch = 9 + 9 = 18
+    # Average loss is the average over batch = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 18.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_weights(self):
+    """Tests evaluation with weights."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[11.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([2.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    def _input_fn():
+      features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
+      labels = ((10.,), (10.,))
+      return features, labels
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        weight_column='weights',
+        model_dir=self._model_dir)
+    eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1)
+
+    # Logit is (1. * 11.0 + 2.0) = 13, while label is 10.
+    # Loss per example is 3**2 = 9.
+    # Training loss is the weighted sum over batch = 9 + 2*9 = 27
+    # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
+    self.assertDictEqual({
+        metric_keys.MetricKeys.LOSS: 27.,
+        metric_keys.MetricKeys.LOSS_MEAN: 9.,
+        metric_keys.MetricKeys.PREDICTION_MEAN: 13.,
+        metric_keys.MetricKeys.LABEL_MEAN: 10.,
+        ops.GraphKeys.GLOBAL_STEP: 100
+    }, eval_metrics)
+
+  def test_evaluation_for_multi_dimensions(self):
+    x_dim = 3
+    label_dim = 2
+    with ops.Graph().as_default():
+      variables_lib.Variable(
+          [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([7.0, 8.0], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age', shape=(x_dim,)),),
+        label_dimension=label_dim,
+        model_dir=self._model_dir)
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'age': np.array([[2., 4., 5.]]),
+        },
+        y=np.array([[46., 58.]]),
+        batch_size=1,
+        num_epochs=None,
+        shuffle=False)
+    eval_metrics = linear_regressor.evaluate(input_fn=input_fn, steps=1)
+
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is
+    #   [2., 4., 5.] * [1.0, 2.0] + [7.0, 8.0] = [39, 50] + [7.0, 8.0]
+    #                  [3.0, 4.0]
+    #                  [5.0, 6.0]
+    # which is [46, 58]
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
+  def test_evaluation_for_multiple_feature_columns(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    batch_size = 2
+    feature_columns = [
+        self._fc_lib.numeric_column('age'),
+        self._fc_lib.numeric_column('height')
+    ]
+    input_fn = numpy_io.numpy_input_fn(
+        x={'age': np.array([20, 40]),
+           'height': np.array([4, 8])},
+        y=np.array([[213.], [421.]]),
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=False)
+
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns, model_dir=self._model_dir)
+
+    eval_metrics = est.evaluate(input_fn=input_fn, steps=1)
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
+    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
+  def test_evaluation_for_multiple_feature_columns_mix(self):
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.0]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([[2.0]], name=HEIGHT_WEIGHT_NAME)
+      variables_lib.Variable([5.0], name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    batch_size = 2
+    feature_columns = [
+        feature_column.numeric_column('age'),
+        feature_column_v2.numeric_column('height')
+    ]
+
+    def _input_fn():
+      features_ds = dataset_ops.Dataset.from_tensor_slices({
+          'age': np.array([20, 40]),
+          'height': np.array([4, 8])
+      })
+      labels_ds = dataset_ops.Dataset.from_tensor_slices(
+          np.array([[213.], [421.]]))
+      return (dataset_ops.Dataset.zip((features_ds, labels_ds))
+              .batch(batch_size).repeat(None))
+
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns, model_dir=self._model_dir)
+
+    eval_metrics = est.evaluate(input_fn=_input_fn, steps=1)
+    self.assertItemsEqual(
+        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
+         metric_keys.MetricKeys.PREDICTION_MEAN,
+         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
+        eval_metrics.keys())
+
+    # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] =
+    # [213.0, 421.0], while label is [213., 421.]. Loss = 0.
+    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
+
+
+class BaseLinearRegressorPredictTest(object):
+
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
+    self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def test_1d(self):
+    """Tests predict when all variables are one-dimensional."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('x'),),
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': np.array([[2.]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = linear_regressor.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x * weight + bias = 2. * 10. + .2 = 20.2
+    self.assertAllClose([[20.2]], predicted_scores)
+
+  def testMultiDim(self):
+    """Tests predict when all variables are multi-dimenstional."""
+    batch_size = 2
+    label_dimension = 3
+    x_dim = 4
+    feature_columns = (self._fc_lib.numeric_column('x', shape=(x_dim,)),)
+    with ops.Graph().as_default():
+      variables_lib.Variable(  # shape=[x_dim, label_dimension]
+          [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]],
+          name='linear/linear_model/x/weights')
+      variables_lib.Variable(  # shape=[label_dimension]
+          [.2, .4, .6], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        # x shape=[batch_size, x_dim]
+        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predictions = linear_regressor.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # score = x * weight + bias, shape=[batch_size, label_dimension]
+    self.assertAllClose([[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]],
+                        predicted_scores)
+
+  def testTwoFeatureColumns(self):
+    """Tests predict with two feature columns."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('x0'),
+                         self._fc_lib.numeric_column('x1')),
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x0': np.array([[2.]]),
+           'x1': np.array([[3.]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = linear_regressor.predict(input_fn=predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
+    self.assertAllClose([[80.2]], predicted_scores)
+
+  def testTwoFeatureColumnsMix(self):
+    """Tests predict with two feature columns."""
+    with ops.Graph().as_default():
+      variables_lib.Variable([[10.]], name='linear/linear_model/x0/weights')
+      variables_lib.Variable([[20.]], name='linear/linear_model/x1/weights')
+      variables_lib.Variable([.2], name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(feature_column.numeric_column('x0'),
+                         feature_column_v2.numeric_column('x1')),
+        model_dir=self._model_dir)
+
+    def _predict_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices({
+          'x0': np.array([[2.]]),
+          'x1': np.array([[3.]])
+      }).batch(1)
+
+    predictions = linear_regressor.predict(input_fn=_predict_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2
+    self.assertAllClose([[80.2]], predicted_scores)
+
+  def testSparseCombiner(self):
+    w_a = 2.0
+    w_b = 3.0
+    w_c = 5.0
+    bias = 5.0
+    with ops.Graph().as_default():
+      variables_lib.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(1, name=ops.GraphKeys.GLOBAL_STEP,
+                             dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors({
+          'language': sparse_tensor.SparseTensor(
+              values=['a', 'c', 'b', 'c'],
+              indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+              dense_shape=[2, 2]),
+      })
+
+    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
+        'language', vocabulary_list=['a', 'b', 'c']),)
+
+    # Check prediction for each sparse_combiner.
+    # With sparse_combiner = 'sum', we have
+    # logits_1 = w_a + w_c + bias
+    #          = 2.0 + 5.0 + 5.0 = 12.0
+    # logits_2 = w_b + w_c + bias
+    #          = 3.0 + 5.0 + 5.0 = 13.0
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+    predictions = linear_regressor.predict(input_fn=_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    self.assertAllClose([[12.0], [13.0]], predicted_scores)
+
+    # With sparse_combiner = 'mean', we have
+    # logits_1 = 1/2 * (w_a + w_c) + bias
+    #          = 1/2 * (2.0 + 5.0) + 5.0 = 8.5
+    # logits_2 = 1/2 * (w_b + w_c) + bias
+    #          = 1/2 * (3.0 + 5.0) + 5.0 = 9.0
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir,
+        sparse_combiner='mean')
+    predictions = linear_regressor.predict(input_fn=_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    self.assertAllClose([[8.5], [9.0]], predicted_scores)
+
+    # With sparse_combiner = 'sqrtn', we have
+    # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias
+    #          = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974
+    # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias
+    #          = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir,
+        sparse_combiner='sqrtn')
+    predictions = linear_regressor.predict(input_fn=_input_fn)
+    predicted_scores = list([x['predictions'] for x in predictions])
+    self.assertAllClose([[9.94974], [10.65685]], predicted_scores)
+
+
+class BaseLinearRegressorIntegrationTest(object):
+
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
+    self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
+                          input_dimension, label_dimension, prediction_length):
+    feature_columns = [
+        self._fc_lib.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    # learn y = x
+    est.train(train_input_fn, steps=200)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array(
+        [x['predictions'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
+
+    # EXPORT
+    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=data,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+  def test_pandas_input_fn(self):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+
+    # Pandas DataFrame natually supports 1 dim data only.
+    label_dimension = 1
+    input_dimension = label_dimension
+    batch_size = 10
+    data = np.array([1., 2., 3., 4.], dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    prediction_length = 4
+
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+  def test_input_fn_from_parse_example(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum)),
+              'y':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=datum[:label_dimension])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length)
+
+
+class BaseLinearRegressorTrainingTest(object):
+
+  def __init__(self, linear_regressor_fn, fc_lib=feature_column):
+    self._linear_regressor_fn = linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      writer_cache.FileWriterCache.clear()
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s/part_0:0' % AGE_WEIGHT_NAME,
+        '%s/part_0:0' % BIAS_NAME
+    ]
+
+    def _minimize(loss, global_step=None, var_list=None):
+      trainable_vars = var_list or ops.get_collection(
+          ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(expected_var_names,
+                            [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+      assert_loss = assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        if global_step is not None:
+          return state_ops.assign_add(global_step, 1).op
+        return control_flow_ops.no_op()
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer_lib.Optimizer,
+        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def _assert_checkpoint(self,
+                         expected_global_step,
+                         expected_age_weight=None,
+                         expected_bias=None):
+    shapes = {
+        name: shape
+        for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(expected_global_step,
+                     checkpoint_utils.load_variable(self._model_dir,
+                                                    ops.GraphKeys.GLOBAL_STEP))
+
+    self.assertEqual([1, 1], shapes[AGE_WEIGHT_NAME])
+    if expected_age_weight is not None:
+      self.assertEqual(expected_age_weight,
+                       checkpoint_utils.load_variable(self._model_dir,
+                                                      AGE_WEIGHT_NAME))
+
+    self.assertEqual([1], shapes[BIAS_NAME])
+    if expected_bias is not None:
+      self.assertEqual(expected_bias,
+                       checkpoint_utils.load_variable(self._model_dir,
+                                                      BIAS_NAME))
+
+  def testFromScratchWithDefaultOptimizer(self):
+    # Create LinearRegressor.
+    label = 5.
+    age = 17
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    linear_regressor.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self._assert_checkpoint(num_steps)
+
+  def testTrainWithOneDimLabel(self):
+    label_dimension = 1
+    batch_size = 20
+    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        model_dir=self._model_dir)
+    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
+    self.assertEqual((batch_size,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(200)
+
+  def testTrainWithOneDimWeight(self):
+    label_dimension = 1
+    batch_size = 20
+    feature_columns = [self._fc_lib.numeric_column('age', shape=(1,))]
+    est = self._linear_regressor_fn(
+        feature_columns=feature_columns,
+        label_dimension=label_dimension,
+        weight_column='w',
+        model_dir=self._model_dir)
+
+    data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
+    self.assertEqual((batch_size,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1,
+           'w': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(200)
+
+  def testFromScratch(self):
+    # Create LinearRegressor.
+    label = 5.
+    age = 17
+    # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
+    mock_optimizer = self._mock_optimizer(expected_loss=25.)
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    linear_regressor.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        expected_global_step=num_steps,
+        expected_age_weight=0.,
+        expected_bias=0.)
+
+  def testFromCheckpoint(self):
+    # Create initial checkpoint.
+    age_weight = 10.0
+    bias = 5.0
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = age * age_weight + bias = 17 * 10. + 5. = 175
+    # loss = (logits - label)^2 = (175 - 5)^2 = 28900
+    mock_optimizer = self._mock_optimizer(expected_loss=28900.)
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    linear_regressor.train(
+        input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        expected_global_step=initial_global_step + num_steps,
+        expected_age_weight=age_weight,
+        expected_bias=bias)
+
+  def testFromCheckpointMultiBatch(self):
+    # Create initial checkpoint.
+    age_weight = 10.0
+    bias = 5.0
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable([[age_weight]], name=AGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = age * age_weight + bias
+    # logits[0] = 17 * 10. + 5. = 175
+    # logits[1] = 15 * 10. + 5. = 155
+    # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004
+    mock_optimizer = self._mock_optimizer(expected_loss=52004.)
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        model_dir=self._model_dir,
+        optimizer=mock_optimizer)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    linear_regressor.train(
+        input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))),
+        steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        expected_global_step=initial_global_step + num_steps,
+        expected_age_weight=age_weight,
+        expected_bias=bias)
+
+
+class BaseLinearClassifierTrainingTest(object):
+
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
+    self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _mock_optimizer(self, expected_loss=None):
+    expected_var_names = [
+        '%s/part_0:0' % AGE_WEIGHT_NAME,
+        '%s/part_0:0' % BIAS_NAME
+    ]
+
+    def _minimize(loss, global_step):
+      trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      self.assertItemsEqual(
+          expected_var_names,
+          [var.name for var in trainable_vars])
+
+      # Verify loss. We can't check the value directly, so we add an assert op.
+      self.assertEquals(0, loss.shape.ndims)
+      if expected_loss is None:
+        return state_ops.assign_add(global_step, 1).op
+      assert_loss = assert_close(
+          math_ops.to_float(expected_loss, name='expected'),
+          loss,
+          name='assert_loss')
+      with ops.control_dependencies((assert_loss,)):
+        return state_ops.assign_add(global_step, 1).op
+
+    mock_optimizer = test.mock.NonCallableMock(
+        spec=optimizer_lib.Optimizer,
+        wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer'))
+    mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
+
+    # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
+    # So, return mock_optimizer itself for deepcopy.
+    mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
+    return mock_optimizer
+
+  def _assert_checkpoint(
+      self, n_classes, expected_global_step, expected_age_weight=None,
+      expected_bias=None):
+    logits_dimension = n_classes if n_classes > 2 else 1
+
+    shapes = {
+        name: shape for (name, shape) in
+        checkpoint_utils.list_variables(self._model_dir)
+    }
+
+    self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
+    self.assertEqual(
+        expected_global_step,
+        checkpoint_utils.load_variable(
+            self._model_dir, ops.GraphKeys.GLOBAL_STEP))
+
+    self.assertEqual([1, logits_dimension],
+                     shapes[AGE_WEIGHT_NAME])
+    if expected_age_weight is not None:
+      self.assertAllEqual(expected_age_weight,
+                          checkpoint_utils.load_variable(
+                              self._model_dir,
+                              AGE_WEIGHT_NAME))
+
+    self.assertEqual([logits_dimension], shapes[BIAS_NAME])
+    if expected_bias is not None:
+      self.assertAllEqual(expected_bias,
+                          checkpoint_utils.load_variable(
+                              self._model_dir, BIAS_NAME))
+
+  def _testFromScratchWithDefaultOptimizer(self, n_classes):
+    label = 0
+    age = 17
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # Train for a few steps, and validate final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self._assert_checkpoint(n_classes, num_steps)
+
+  def testBinaryClassesFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=2)
+
+  def testMultiClassesFromScratchWithDefaultOptimizer(self):
+    self._testFromScratchWithDefaultOptimizer(n_classes=4)
+
+  def _testTrainWithTwoDimsLabel(self, n_classes):
+    batch_size = 20
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    data_rank_2 = np.array([[0], [1]])
+    self.assertEqual((2,), data_rank_1.shape)
+    self.assertEqual((2, 1), data_rank_2.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_2,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithTwoDimsLabel(self):
+    self._testTrainWithTwoDimsLabel(n_classes=2)
+
+  def testMultiClassesTrainWithTwoDimsLabel(self):
+    self._testTrainWithTwoDimsLabel(n_classes=4)
+
+  def _testTrainWithOneDimLabel(self, n_classes):
+    batch_size = 20
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    self.assertEqual((2,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1},
+        y=data_rank_1,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithOneDimLabel(self):
+    self._testTrainWithOneDimLabel(n_classes=2)
+
+  def testMultiClassesTrainWithOneDimLabel(self):
+    self._testTrainWithOneDimLabel(n_classes=4)
+
+  def _testTrainWithTwoDimsWeight(self, n_classes):
+    batch_size = 20
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        weight_column='w',
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    data_rank_2 = np.array([[0], [1]])
+    self.assertEqual((2,), data_rank_1.shape)
+    self.assertEqual((2, 1), data_rank_2.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1,
+        batch_size=batch_size, num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithTwoDimsWeight(self):
+    self._testTrainWithTwoDimsWeight(n_classes=2)
+
+  def testMultiClassesTrainWithTwoDimsWeight(self):
+    self._testTrainWithTwoDimsWeight(n_classes=4)
+
+  def _testTrainWithOneDimWeight(self, n_classes):
+    batch_size = 20
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        weight_column='w',
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    data_rank_1 = np.array([0, 1])
+    self.assertEqual((2,), data_rank_1.shape)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1,
+        batch_size=batch_size, num_epochs=None,
+        shuffle=True)
+    est.train(train_input_fn, steps=200)
+    self._assert_checkpoint(n_classes, 200)
+
+  def testBinaryClassesTrainWithOneDimWeight(self):
+    self._testTrainWithOneDimWeight(n_classes=2)
+
+  def testMultiClassesTrainWithOneDimWeight(self):
+    self._testTrainWithOneDimWeight(n_classes=4)
+
+  def _testFromScratch(self, n_classes):
+    label = 1
+    age = 17
+    # For binary classifier:
+    #   loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are
+    #   all zero initially) and label = 1 so,
+    #      loss = 1 * -log ( sigmoid(logits) ) = 0.69315
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label) where logits are all 0s (weights are
+    #   all zero initially) and label = 1 so,
+    #      loss = 1 * -log ( 1.0 / n_classes )
+    # For this particular test case, as logits are same, the formular
+    # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases.
+    mock_optimizer = self._mock_optimizer(
+        expected_loss=-1 * math.log(1.0/n_classes))
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=num_steps,
+        expected_age_weight=[[0.]] if n_classes == 2 else [[0.] * n_classes],
+        expected_bias=[0.] if n_classes == 2 else [.0] * n_classes)
+
+  def testBinaryClassesFromScratch(self):
+    self._testFromScratch(n_classes=2)
+
+  def testMultiClassesFromScratch(self):
+    self._testFromScratch(n_classes=4)
+
+  def _testFromCheckpoint(self, n_classes):
+    # Create initial checkpoint.
+    label = 1
+    age = 17
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[2.0]] if n_classes == 2 else (
+        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # For binary classifier:
+    #   logits = age * age_weight + bias = 17 * 2. - 35. = -1.
+    #   loss = sigmoid_cross_entropy(logits, label)
+    #   so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label)
+    #   where logits = 17 * age_weight + bias and label = 1
+    #   so, loss = 1 * -log ( soft_max(logits)[1] )
+    if n_classes == 2:
+      expected_loss = 1.3133
+    else:
+      logits = age_weight * age + bias
+      logits_exp = np.exp(logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_loss = -1 * math.log(softmax[0, label])
+
+    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=initial_global_step + num_steps,
+        expected_age_weight=age_weight,
+        expected_bias=bias)
+
+  def testBinaryClassesFromCheckpoint(self):
+    self._testFromCheckpoint(n_classes=2)
+
+  def testMultiClassesFromCheckpoint(self):
+    self._testFromCheckpoint(n_classes=4)
+
+  def _testFromCheckpointFloatLabels(self, n_classes):
+    """Tests float labels for binary classification."""
+    # Create initial checkpoint.
+    if n_classes > 2:
+      return
+    label = 0.8
+    age = 17
+    age_weight = [[2.0]]
+    bias = [-35.0]
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # logits = age * age_weight + bias = 17 * 2. - 35. = -1.
+    # loss = sigmoid_cross_entropy(logits, label)
+    # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617
+    mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+
+  def testBinaryClassesFromCheckpointFloatLabels(self):
+    self._testFromCheckpointFloatLabels(n_classes=2)
+
+  def testMultiClassesFromCheckpointFloatLabels(self):
+    self._testFromCheckpointFloatLabels(n_classes=4)
+
+  def _testFromCheckpointMultiBatch(self, n_classes):
+    # Create initial checkpoint.
+    label = [1, 0]
+    age = [17, 18.5]
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[2.0]] if n_classes == 2 else (
+        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    # For binary classifier:
+    #   logits = age * age_weight + bias
+    #   logits[0] = 17 * 2. - 35. = -1.
+    #   logits[1] = 18.5 * 2. - 35. = 2.
+    #   loss = sigmoid_cross_entropy(logits, label)
+    #   so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133
+    #       loss[1] = (1 - 0) * -log ( 1- sigmoid(2) ) = 2.1269
+    # For multi class classifier:
+    #   loss = cross_entropy(logits, label)
+    #   where logits = [17, 18.5] * age_weight + bias and label = [1, 0]
+    #   so, loss = 1 * -log ( soft_max(logits)[label] )
+    if n_classes == 2:
+      expected_loss = (1.3133 + 2.1269)
+    else:
+      logits = age_weight * np.reshape(age, (2, 1)) + bias
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      expected_loss = expected_loss_0 + expected_loss_1
+
+    mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
+
+    est = linear.LinearClassifier(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        optimizer=mock_optimizer,
+        model_dir=self._model_dir)
+    self.assertEqual(0, mock_optimizer.minimize.call_count)
+
+    # Train for a few steps, and validate optimizer and final checkpoint.
+    num_steps = 10
+    est.train(
+        input_fn=lambda: ({'age': (age)}, (label)),
+        steps=num_steps)
+    self.assertEqual(1, mock_optimizer.minimize.call_count)
+    self._assert_checkpoint(
+        n_classes,
+        expected_global_step=initial_global_step + num_steps,
+        expected_age_weight=age_weight,
+        expected_bias=bias)
+
+  def testBinaryClassesFromCheckpointMultiBatch(self):
+    self._testFromCheckpointMultiBatch(n_classes=2)
+
+  def testMultiClassesFromCheckpointMultiBatch(self):
+    self._testFromCheckpointMultiBatch(n_classes=4)
+
+
+class BaseLinearClassifierEvaluationTest(object):
+
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
+    self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _test_evaluation_for_simple_data(self, n_classes):
+    label = 1
+    age = 1.
+
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[-11.0]] if n_classes == 2 else (
+        np.reshape(-11.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [-30.0] if n_classes == 2 else [-30.0] * n_classes
+
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = self._linear_classifier_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1)
+
+    if n_classes == 2:
+      # Binary classes: loss = sum(corss_entropy(41)) = 41.
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: 41.,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: 41.,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: 0.,
+          metric_keys.MetricKeys.LABEL_MEAN: 1.,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: 1,
+          metric_keys.MetricKeys.AUC: 0.,
+          metric_keys.MetricKeys.AUC_PR: 1.,
+      }
+    else:
+      # Multi classes: loss = 1 * -log ( soft_max(logits)[label] )
+      logits = age_weight * age + bias
+      logits_exp = np.exp(logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_loss = -1 * math.log(softmax[0, label])
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_for_simple_data(self):
+    self._test_evaluation_for_simple_data(n_classes=2)
+
+  def test_multi_classes_evaluation_for_simple_data(self):
+    self._test_evaluation_for_simple_data(n_classes=4)
+
+  def _test_evaluation_batch(self, n_classes):
+    """Tests evaluation for batch_size==2."""
+    label = [1, 0]
+    age = [17., 18.]
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[2.0]] if n_classes == 2 else (
+        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = self._linear_classifier_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': (age)}, (label)), steps=1)
+
+    if n_classes == 2:
+      # Logits are (-1., 1.) labels are (1, 0).
+      # Loss is
+      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
+      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133
+      expected_loss = 1.3133 * 2
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: 0.5,
+          metric_keys.MetricKeys.LABEL_MEAN: 0.5,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
+          metric_keys.MetricKeys.AUC: 0.,
+          metric_keys.MetricKeys.AUC_PR: 0.25,
+      }
+    else:
+      # Multi classes: loss = 1 * -log ( soft_max(logits)[label] )
+      logits = age_weight * np.reshape(age, (2, 1)) + bias
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      expected_loss = expected_loss_0 + expected_loss_1
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_batch(self):
+    self._test_evaluation_batch(n_classes=2)
+
+  def test_multi_classes_evaluation_batch(self):
+    self._test_evaluation_batch(n_classes=4)
+
+  def _test_evaluation_weights(self, n_classes):
+    """Tests evaluation with weights."""
+
+    label = [1, 0]
+    age = [17., 18.]
+    weights = [1., 2.]
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[2.0]] if n_classes == 2 else (
+        np.reshape(2.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [-35.0] if n_classes == 2 else [-35.0] * n_classes
+    initial_global_step = 100
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(
+          initial_global_step,
+          name=ops.GraphKeys.GLOBAL_STEP,
+          dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = self._linear_classifier_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        n_classes=n_classes,
+        weight_column='w',
+        model_dir=self._model_dir)
+    eval_metrics = est.evaluate(
+        input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1)
+
+    if n_classes == 2:
+      # Logits are (-1., 1.) labels are (1, 0).
+      # Loss is
+      #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
+      #   loss for row 2: (1 - 0) * -log(1 - sigmoid(1)) = 1.3133
+      #   weights = [1., 2.]
+      expected_loss = 1.3133 * (1. + 2.)
+      loss_mean = expected_loss / (1.0 + 2.0)
+      label_mean = np.average(label, weights=weights)
+      logits = [-1, 1]
+      logistics = sigmoid(np.array(logits))
+      predictions_mean = np.average(logistics, weights=weights)
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+          metric_keys.MetricKeys.PRECISION: 0.,
+          metric_keys.MetricKeys.RECALL: 0.,
+          metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean,
+          metric_keys.MetricKeys.LABEL_MEAN: label_mean,
+          metric_keys.MetricKeys.ACCURACY_BASELINE: (
+              max(label_mean, 1-label_mean)),
+          metric_keys.MetricKeys.AUC: 0.,
+          metric_keys.MetricKeys.AUC_PR: 0.1668,
+      }
+    else:
+      # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] )
+      logits = age_weight * np.reshape(age, (2, 1)) + bias
+      logits_exp = np.exp(logits)
+      softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
+      softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
+      expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
+      expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
+      loss_mean = np.average([expected_loss_0, expected_loss_1],
+                             weights=weights)
+      expected_loss = loss_mean * np.sum(weights)
+
+      expected_metrics = {
+          metric_keys.MetricKeys.LOSS: expected_loss,
+          ops.GraphKeys.GLOBAL_STEP: 100,
+          metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
+          metric_keys.MetricKeys.ACCURACY: 0.,
+      }
+
+    self.assertAllClose(sorted_key_dict(expected_metrics),
+                        sorted_key_dict(eval_metrics), rtol=1e-3)
+
+  def test_binary_classes_evaluation_weights(self):
+    self._test_evaluation_weights(n_classes=2)
+
+  def test_multi_classes_evaluation_weights(self):
+    self._test_evaluation_weights(n_classes=4)
+
+
+class BaseLinearClassifierPredictTest(object):
+
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
+    self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _testPredictions(self, n_classes, label_vocabulary, label_output_fn):
+    """Tests predict when all variables are one-dimensional."""
+    age = 1.
+
+    # For binary case, the expected weight has shape (1,1). For multi class
+    # case, the shape is (1, n_classes). In order to test the weights, set
+    # weights as 2.0 * range(n_classes).
+    age_weight = [[-11.0]] if n_classes == 2 else (
+        np.reshape(-11.0 * np.array(list(range(n_classes)), dtype=np.float32),
+                   (1, n_classes)))
+    bias = [10.0] if n_classes == 2 else [10.0] * n_classes
+
+    with ops.Graph().as_default():
+      variables_lib.Variable(age_weight, name=AGE_WEIGHT_NAME)
+      variables_lib.Variable(bias, name=BIAS_NAME)
+      variables_lib.Variable(100, name='global_step', dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    est = self._linear_classifier_fn(
+        feature_columns=(self._fc_lib.numeric_column('age'),),
+        label_vocabulary=label_vocabulary,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'age': np.array([[age]])},
+        y=None,
+        batch_size=1,
+        num_epochs=1,
+        shuffle=False)
+    predictions = list(est.predict(input_fn=predict_input_fn))
+
+    if n_classes == 2:
+      scalar_logits = np.asscalar(
+          np.reshape(np.array(age_weight) * age + bias, (1,)))
+      two_classes_logits = [0, scalar_logits]
+      two_classes_logits_exp = np.exp(two_classes_logits)
+      softmax = two_classes_logits_exp / two_classes_logits_exp.sum()
+
+      expected_predictions = {
+          'class_ids': [0],
+          'classes': [label_output_fn(0)],
+          'logistic': [sigmoid(np.array(scalar_logits))],
+          'logits': [scalar_logits],
+          'probabilities': softmax,
+      }
+    else:
+      onedim_logits = np.reshape(np.array(age_weight) * age + bias, (-1,))
+      class_ids = onedim_logits.argmax()
+      logits_exp = np.exp(onedim_logits)
+      softmax = logits_exp / logits_exp.sum()
+      expected_predictions = {
+          'class_ids': [class_ids],
+          'classes': [label_output_fn(class_ids)],
+          'logits': onedim_logits,
+          'probabilities': softmax,
+      }
+
+    self.assertEqual(1, len(predictions))
+    # assertAllClose cannot handle byte type.
+    self.assertEqual(expected_predictions['classes'], predictions[0]['classes'])
+    expected_predictions.pop('classes')
+    predictions[0].pop('classes')
+    self.assertAllClose(sorted_key_dict(expected_predictions),
+                        sorted_key_dict(predictions[0]))
+
+  def testBinaryClassesWithoutLabelVocabulary(self):
+    n_classes = 2
+    self._testPredictions(n_classes,
+                          label_vocabulary=None,
+                          label_output_fn=lambda x: ('%s' % x).encode())
+
+  def testBinaryClassesWithLabelVocabulary(self):
+    n_classes = 2
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=['class_vocab_{}'.format(i)
+                          for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+  def testMultiClassesWithoutLabelVocabulary(self):
+    n_classes = 4
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=None,
+        label_output_fn=lambda x: ('%s' % x).encode())
+
+  def testMultiClassesWithLabelVocabulary(self):
+    n_classes = 4
+    self._testPredictions(
+        n_classes,
+        label_vocabulary=['class_vocab_{}'.format(i)
+                          for i in range(n_classes)],
+        label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
+
+  def testSparseCombiner(self):
+    w_a = 2.0
+    w_b = 3.0
+    w_c = 5.0
+    bias = 5.0
+    with ops.Graph().as_default():
+      variables_lib.Variable([[w_a], [w_b], [w_c]], name=LANGUAGE_WEIGHT_NAME)
+      variables_lib.Variable([bias], name=BIAS_NAME)
+      variables_lib.Variable(1, name=ops.GraphKeys.GLOBAL_STEP,
+                             dtype=dtypes.int64)
+      save_variables_to_ckpt(self._model_dir)
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors({
+          'language': sparse_tensor.SparseTensor(
+              values=['a', 'c', 'b', 'c'],
+              indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+              dense_shape=[2, 2]),
+      })
+
+    feature_columns = (self._fc_lib.categorical_column_with_vocabulary_list(
+        'language', vocabulary_list=['a', 'b', 'c']),)
+
+    # Check prediction for each sparse_combiner.
+    # With sparse_combiner = 'sum', we have
+    # logits_1 = w_a + w_c + bias
+    #          = 2.0 + 5.0 + 5.0 = 12.0
+    # logits_2 = w_b + w_c + bias
+    #          = 3.0 + 5.0 + 5.0 = 13.0
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir)
+    predictions = linear_classifier.predict(input_fn=_input_fn)
+    predicted_scores = list([x['logits'] for x in predictions])
+    self.assertAllClose([[12.0], [13.0]], predicted_scores)
+
+    # With sparse_combiner = 'mean', we have
+    # logits_1 = 1/2 * (w_a + w_c) + bias
+    #          = 1/2 * (2.0 + 5.0) + 5.0 = 8.5
+    # logits_2 = 1/2 * (w_b + w_c) + bias
+    #          = 1/2 * (3.0 + 5.0) + 5.0 = 9.0
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir,
+        sparse_combiner='mean')
+    predictions = linear_classifier.predict(input_fn=_input_fn)
+    predicted_scores = list([x['logits'] for x in predictions])
+    self.assertAllClose([[8.5], [9.0]], predicted_scores)
+
+    # With sparse_combiner = 'sqrtn', we have
+    # logits_1 = sqrt(2)/2 * (w_a + w_c) + bias
+    #          = sqrt(2)/2 * (2.0 + 5.0) + 5.0 = 9.94974
+    # logits_2 = sqrt(2)/2 * (w_b + w_c) + bias
+    #          = sqrt(2)/2 * (3.0 + 5.0) + 5.0 = 10.65685
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=feature_columns,
+        model_dir=self._model_dir,
+        sparse_combiner='sqrtn')
+    predictions = linear_classifier.predict(input_fn=_input_fn)
+    predicted_scores = list([x['logits'] for x in predictions])
+    self.assertAllClose([[9.94974], [10.65685]], predicted_scores)
+
+
+class BaseLinearClassifierIntegrationTest(object):
+
+  def __init__(self, linear_classifier_fn, fc_lib=feature_column):
+    self._linear_classifier_fn = linear_classifier_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
+                          predict_input_fn, input_dimension, prediction_length):
+    feature_columns = [
+        self._fc_lib.numeric_column('x', shape=(input_dimension,))
+    ]
+    est = self._linear_classifier_fn(
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    # learn y = x
+    est.train(train_input_fn, steps=200)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
+
+    # PREDICT
+    predictions = np.array(
+        [x['classes'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, 1), predictions.shape)
+
+    # EXPORT
+    feature_spec = self._fc_lib.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def _test_numpy_input_fn(self, n_classes):
+    """Tests complete flow with numpy_input_fn."""
+    input_dimension = 4
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+    target = np.array([1] * batch_size)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=target,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=target,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data},
+        y=None,
+        batch_size=batch_size,
+        num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_numpy_input_fn(self):
+    self._test_numpy_input_fn(n_classes=2)
+
+  def test_multi_classes_numpy_input_fn(self):
+    self._test_numpy_input_fn(n_classes=4)
+
+  def _test_pandas_input_fn(self, n_classes):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+
+    # Pandas DataFrame natually supports 1 dim data only.
+    input_dimension = 1
+    batch_size = 10
+    data = np.array([1., 2., 3., 4.], dtype=np.float32)
+    target = np.array([1, 0, 1, 0], dtype=np.int32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(target)
+    prediction_length = 4
+
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x, y=y, batch_size=batch_size, shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x, batch_size=batch_size, shuffle=False)
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_pandas_input_fn(self):
+    self._test_pandas_input_fn(n_classes=2)
+
+  def test_multi_classes_pandas_input_fn(self):
+    self._test_pandas_input_fn(n_classes=4)
+
+  def _test_input_fn_from_parse_example(self, n_classes):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    input_dimension = 2
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+    target = np.array([1] * batch_size, dtype=np.int64)
+
+    serialized_examples = []
+    for x, y in zip(data, target):
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x':
+                  feature_pb2.Feature(float_list=feature_pb2.FloatList(
+                      value=x)),
+              'y':
+                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
+                      value=[y])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        n_classes=n_classes,
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        prediction_length=prediction_length)
+
+  def test_binary_classes_input_fn_from_parse_example(self):
+    self._test_input_fn_from_parse_example(n_classes=2)
+
+  def test_multi_classes_input_fn_from_parse_example(self):
+    self._test_input_fn_from_parse_example(n_classes=4)
+
+
+class BaseLinearLogitFnTest(object):
+
+  def __init__(self, fc_lib=feature_column):
+    self._fc_lib = fc_lib
+
+  def test_basic_logit_correctness(self):
+    """linear_logit_fn simply wraps feature_column_lib.linear_model."""
+    age = self._fc_lib.numeric_column('age')
+    with ops.Graph().as_default():
+      logit_fn = linear._linear_logit_fn_builder(units=2, feature_columns=[age])
+      logits = logit_fn(features={'age': [[23.], [31.]]})
+      bias_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                    'linear_model/bias_weights')[0]
+      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                   'linear_model/age')[0]
+      with tf_session.Session() as sess:
+        sess.run([variables_lib.global_variables_initializer()])
+        self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
+        sess.run(bias_var.assign([10., 5.]))
+        self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
+        sess.run(age_var.assign([[2.0, 3.0]]))
+        # [2 * 23 + 10, 3 * 23 + 5] = [56, 74].
+        # [2 * 31 + 10, 3 * 31 + 5] = [72, 98]
+        self.assertAllClose([[56., 74.], [72., 98.]], logits.eval())
+
+  def test_compute_fraction_of_zero(self):
+    """Tests the calculation of sparsity."""
+    if self._fc_lib != feature_column:
+      return
+    age = feature_column.numeric_column('age')
+    occupation = feature_column.categorical_column_with_hash_bucket(
+        'occupation', hash_bucket_size=5)
+    with ops.Graph().as_default():
+      cols_to_vars = {}
+      feature_column.linear_model(
+          features={
+              'age': [[23.], [31.]],
+              'occupation': [['doctor'], ['engineer']]
+          },
+          feature_columns=[age, occupation],
+          units=3,
+          cols_to_vars=cols_to_vars)
+      cols_to_vars.pop('bias')
+      fraction_zero = linear._compute_fraction_of_zero(cols_to_vars.values())
+      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                   'linear_model/age')[0]
+      with tf_session.Session() as sess:
+        sess.run([variables_lib.global_variables_initializer()])
+        # Upon initialization, all variables will be zero.
+        self.assertAllClose(1, fraction_zero.eval())
+
+        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
+        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
+        # x 3-dim output) are zero.
+        self.assertAllClose(16. / 18., fraction_zero.eval())
+
+  def test_compute_fraction_of_zero_v2(self):
+    """Tests the calculation of sparsity."""
+    if self._fc_lib != feature_column_v2:
+      return
+
+    age = feature_column_v2.numeric_column('age')
+    occupation = feature_column_v2.categorical_column_with_hash_bucket(
+        'occupation', hash_bucket_size=5)
+    shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+    with ops.Graph().as_default():
+      model = feature_column_v2.LinearModel(
+          feature_columns=[age, occupation],
+          units=3,
+          shared_state_manager=shared_state_manager)
+      features = {
+          'age': [[23.], [31.]],
+          'occupation': [['doctor'], ['engineer']]
+      }
+      model(features)
+      variables = model.variables
+      variables.remove(model.bias_variable)
+      variables.extend(shared_state_manager.variables)
+      fraction_zero = linear._compute_fraction_of_zero(variables)
+      age_var = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                                   'linear_model/age')[0]
+      with tf_session.Session() as sess:
+        sess.run([variables_lib.global_variables_initializer()])
+        # Upon initialization, all variables will be zero.
+        self.assertAllClose(1, fraction_zero.eval())
+
+        sess.run(age_var.assign([[2.0, 0.0, -1.0]]))
+        # 1 of the 3 age weights are zero, and all of the 15 (5 hash buckets
+        # x 3-dim output) are zero.
+        self.assertAllClose(16. / 18., fraction_zero.eval())
+
+
+class BaseLinearWarmStartingTest(object):
+
+  def __init__(self,
+               _linear_classifier_fn,
+               _linear_regressor_fn,
+               fc_lib=feature_column):
+    self._linear_classifier_fn = _linear_classifier_fn
+    self._linear_regressor_fn = _linear_regressor_fn
+    self._fc_lib = fc_lib
+
+  def setUp(self):
+    # Create a directory to save our old checkpoint and vocabularies to.
+    self._ckpt_and_vocab_dir = tempfile.mkdtemp()
+
+    # Make a dummy input_fn.
+    def _input_fn():
+      features = {
+          'age': [[23.], [31.]],
+          'age_in_years': [[23.], [31.]],
+          'occupation': [['doctor'], ['consultant']]
+      }
+      return features, [0, 1]
+
+    self._input_fn = _input_fn
+
+  def tearDown(self):
+    # Clean up checkpoint / vocab dir.
+    writer_cache.FileWriterCache.clear()
+    shutil.rmtree(self._ckpt_and_vocab_dir)
+
+  def test_classifier_basic_warm_starting(self):
+    """Tests correctness of LinearClassifier default warm-start."""
+    age = self._fc_lib.numeric_column('age')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=linear_classifier.model_dir)
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_linear_classifier.get_variable_names():
+      self.assertAllClose(
+          linear_classifier.get_variable_value(variable_name),
+          warm_started_linear_classifier.get_variable_value(variable_name))
+
+  def test_regressor_basic_warm_starting(self):
+    """Tests correctness of LinearRegressor default warm-start."""
+    age = self._fc_lib.numeric_column('age')
+
+    # Create a LinearRegressor and train to save a checkpoint.
+    linear_regressor = self._linear_regressor_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        optimizer='SGD')
+    linear_regressor.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearRegressor, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_regressor = self._linear_regressor_fn(
+        feature_columns=[age],
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=linear_regressor.model_dir)
+
+    warm_started_linear_regressor.train(input_fn=self._input_fn, max_steps=1)
+    for variable_name in warm_started_linear_regressor.get_variable_names():
+      self.assertAllClose(
+          linear_regressor.get_variable_value(variable_name),
+          warm_started_linear_regressor.get_variable_value(variable_name))
+
+  def test_warm_starting_selective_variables(self):
+    """Tests selecting variables to warm-start."""
+    age = self._fc_lib.numeric_column('age')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The provided regular expression will only warm-start the age variable
+        # and not the bias.
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            vars_to_warm_start='.*(age).*'))
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    self.assertAllClose(
+        linear_classifier.get_variable_value(AGE_WEIGHT_NAME),
+        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
+    # Bias should still be zero from initialization.
+    self.assertAllClose(
+        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
+
+  def test_warm_starting_with_vocab_remapping_and_partitioning(self):
+    """Tests warm-starting with vocab remapping and partitioning."""
+    vocab_list = ['doctor', 'lawyer', 'consultant']
+    vocab_file = os.path.join(self._ckpt_and_vocab_dir, 'occupation_vocab')
+    with open(vocab_file, 'w') as f:
+      f.write('\n'.join(vocab_list))
+    occupation = self._fc_lib.categorical_column_with_vocabulary_file(
+        'occupation',
+        vocabulary_file=vocab_file,
+        vocabulary_size=len(vocab_list))
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2)
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[occupation],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD',
+        partitioner=partitioner)
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).  Use a new FeatureColumn with a
+    # different vocabulary for occupation.
+    new_vocab_list = ['doctor', 'consultant', 'engineer']
+    new_vocab_file = os.path.join(self._ckpt_and_vocab_dir,
+                                  'new_occupation_vocab')
+    with open(new_vocab_file, 'w') as f:
+      f.write('\n'.join(new_vocab_list))
+    new_occupation = self._fc_lib.categorical_column_with_vocabulary_file(
+        'occupation',
+        vocabulary_file=new_vocab_file,
+        vocabulary_size=len(new_vocab_list))
+    # We can create our VocabInfo object from the new and old occupation
+    # FeatureColumn's.
+    occupation_vocab_info = estimator.VocabInfo(
+        new_vocab=new_occupation.vocabulary_file,
+        new_vocab_size=new_occupation.vocabulary_size,
+        num_oov_buckets=new_occupation.num_oov_buckets,
+        old_vocab=occupation.vocabulary_file,
+        old_vocab_size=occupation.vocabulary_size,
+        # Can't use constant_initializer with load_and_remap.  In practice,
+        # use a truncated normal initializer.
+        backup_initializer=init_ops.random_uniform_initializer(
+            minval=0.39, maxval=0.39))
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[occupation],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            var_name_to_vocab_info={
+                OCCUPATION_WEIGHT_NAME: occupation_vocab_info
+            },
+            # Explicitly providing None here will only warm-start variables
+            # referenced in var_name_to_vocab_info (the bias will not be
+            # warm-started).
+            vars_to_warm_start=None),
+        partitioner=partitioner)
+
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    # 'doctor' was ID-0 and still ID-0.
+    self.assertAllClose(
+        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[0, :],
+        warm_started_linear_classifier.get_variable_value(
+            OCCUPATION_WEIGHT_NAME)[0, :])
+    # 'consultant' was ID-2 and now ID-1.
+    self.assertAllClose(
+        linear_classifier.get_variable_value(OCCUPATION_WEIGHT_NAME)[2, :],
+        warm_started_linear_classifier.get_variable_value(
+            OCCUPATION_WEIGHT_NAME)[1, :])
+    # 'engineer' is a new entry and should be initialized with the
+    # backup_initializer in VocabInfo.
+    self.assertAllClose([0.39] * 4,
+                        warm_started_linear_classifier.get_variable_value(
+                            OCCUPATION_WEIGHT_NAME)[2, :])
+    # Bias should still be zero (from initialization logic).
+    self.assertAllClose(
+        [0.0] * 4, warm_started_linear_classifier.get_variable_value(BIAS_NAME))
+
+  def test_warm_starting_with_naming_change(self):
+    """Tests warm-starting with a Tensor name remapping."""
+    age_in_years = self._fc_lib.numeric_column('age_in_years')
+
+    # Create a LinearClassifier and train to save a checkpoint.
+    linear_classifier = self._linear_classifier_fn(
+        feature_columns=[age_in_years],
+        model_dir=self._ckpt_and_vocab_dir,
+        n_classes=4,
+        optimizer='SGD')
+    linear_classifier.train(input_fn=self._input_fn, max_steps=1)
 
-# Include attrs that start with single underscore.
-linear_testing_utils.__all__ = [
-    s for s in dir(linear_testing_utils) if not s.startswith('__')
-]
+    # Create a second LinearClassifier, warm-started from the first.  Use a
+    # learning_rate = 0.0 optimizer to check values (use SGD so we don't have
+    # accumulator values that change).
+    warm_started_linear_classifier = self._linear_classifier_fn(
+        feature_columns=[self._fc_lib.numeric_column('age')],
+        n_classes=4,
+        optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0),
+        # The 'age' variable correspond to the 'age_in_years' variable in the
+        # previous model.
+        warm_start_from=estimator.WarmStartSettings(
+            ckpt_to_initialize_from=linear_classifier.model_dir,
+            var_name_to_prev_var_name={
+                AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years')
+            }))
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.linear_testing_utils import *
+    warm_started_linear_classifier.train(input_fn=self._input_fn, max_steps=1)
+    self.assertAllClose(
+        linear_classifier.get_variable_value(
+            AGE_WEIGHT_NAME.replace('age', 'age_in_years')),
+        warm_started_linear_classifier.get_variable_value(AGE_WEIGHT_NAME))
+    # The bias is also warm-started (with no name remapping).
+    self.assertAllClose(
+        linear_classifier.get_variable_value(BIAS_NAME),
+        warm_started_linear_classifier.get_variable_value(BIAS_NAME))
diff --git a/tensorflow/python/estimator/canned/metric_keys.py b/tensorflow/python/estimator/canned/metric_keys.py
index 959bb58e0c..9d49240fea 100644
--- a/tensorflow/python/estimator/canned/metric_keys.py
+++ b/tensorflow/python/estimator/canned/metric_keys.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""metric_keys python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Enum for model prediction keys."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import metric_keys
+from tensorflow.python.estimator import model_fn
+
+
+class MetricKeys(object):
+  """Metric key strings."""
+  LOSS = model_fn.LOSS_METRIC_KEY
+  LOSS_MEAN = model_fn.AVERAGE_LOSS_METRIC_KEY
+  LOSS_REGULARIZATION = 'regularization_loss'
+
+  ACCURACY = 'accuracy'
+  PRECISION = 'precision'
+  RECALL = 'recall'
+  # This is the best the model could do by always predicting one class.
+  # Should be < ACCURACY in a trained model.
+  ACCURACY_BASELINE = 'accuracy_baseline'
+  AUC = 'auc'
+  AUC_PR = 'auc_precision_recall'
+  LABEL_MEAN = 'label/mean'
+  PREDICTION_MEAN = 'prediction/mean'
+
+  # The following require a threshold applied, should be float in range (0, 1).
+  ACCURACY_AT_THRESHOLD = 'accuracy/positive_threshold_%g'
+  PRECISION_AT_THRESHOLD = 'precision/positive_threshold_%g'
+  RECALL_AT_THRESHOLD = 'recall/positive_threshold_%g'
 
-# Include attrs that start with single underscore.
-metric_keys.__all__ = [s for s in dir(metric_keys) if not s.startswith('__')]
+  # The following require a class id applied.
+  PROBABILITY_MEAN_AT_CLASS = 'probability_mean/class%d'
+  AUC_AT_CLASS = 'auc/class%d'
+  AUC_PR_AT_CLASS = 'auc_precision_recall/class%d'
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.metric_keys import *
+  # The following require a class name applied.
+  PROBABILITY_MEAN_AT_NAME = 'probability_mean/%s'
+  AUC_AT_NAME = 'auc/%s'
+  AUC_PR_AT_NAME = 'auc_precision_recall/%s'
diff --git a/tensorflow/python/estimator/canned/optimizers.py b/tensorflow/python/estimator/canned/optimizers.py
index 99b4c49ace..8f51cc3a80 100644
--- a/tensorflow/python/estimator/canned/optimizers.py
+++ b/tensorflow/python/estimator/canned/optimizers.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,69 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""optimizers python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Methods related to optimizers used in canned_estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import optimizers
+import six
+
+
+from tensorflow.python.training import adagrad
+from tensorflow.python.training import adam
+from tensorflow.python.training import ftrl
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import rmsprop
+
+
+_OPTIMIZER_CLS_NAMES = {
+    'Adagrad': adagrad.AdagradOptimizer,
+    'Adam': adam.AdamOptimizer,
+    'Ftrl': ftrl.FtrlOptimizer,
+    'RMSProp': rmsprop.RMSPropOptimizer,
+    'SGD': gradient_descent.GradientDescentOptimizer,
+}
+
+
+def get_optimizer_instance(opt, learning_rate=None):
+  """Returns an optimizer instance.
+
+  Supports the following types for the given `opt`:
+  * An `Optimizer` instance: Returns the given `opt`.
+  * A string: Creates an `Optimizer` subclass with the given `learning_rate`.
+    Supported strings:
+    * 'Adagrad': Returns an `AdagradOptimizer`.
+    * 'Adam': Returns an `AdamOptimizer`.
+    * 'Ftrl': Returns an `FtrlOptimizer`.
+    * 'RMSProp': Returns an `RMSPropOptimizer`.
+    * 'SGD': Returns a `GradientDescentOptimizer`.
+
+  Args:
+    opt: An `Optimizer` instance, or string, as discussed above.
+    learning_rate: A float. Only used if `opt` is a string.
 
-# Include attrs that start with single underscore.
-optimizers.__all__ = [s for s in dir(optimizers) if not s.startswith('__')]
+  Returns:
+    An `Optimizer` instance.
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.optimizers import *
+  Raises:
+    ValueError: If `opt` is an unsupported string.
+    ValueError: If `opt` is a supported string but `learning_rate` was not
+      specified.
+    ValueError: If `opt` is none of the above types.
+  """
+  if isinstance(opt, six.string_types):
+    if opt in six.iterkeys(_OPTIMIZER_CLS_NAMES):
+      if not learning_rate:
+        raise ValueError('learning_rate must be specified when opt is string.')
+      return _OPTIMIZER_CLS_NAMES[opt](learning_rate=learning_rate)
+    raise ValueError(
+        'Unsupported optimizer name: {}. Supported names are: {}'.format(
+            opt, tuple(sorted(six.iterkeys(_OPTIMIZER_CLS_NAMES)))))
+  if callable(opt):
+    opt = opt()
+  if not isinstance(opt, optimizer_lib.Optimizer):
+    raise ValueError(
+        'The given object is not an Optimizer instance. Given: {}'.format(opt))
+  return opt
diff --git a/tensorflow/python/estimator/canned/optimizers_test.py b/tensorflow/python/estimator/canned/optimizers_test.py
new file mode 100644
index 0000000000..eadabdbc49
--- /dev/null
+++ b/tensorflow/python/estimator/canned/optimizers_test.py
@@ -0,0 +1,103 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for optimizers.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.platform import test
+from tensorflow.python.training import adagrad
+from tensorflow.python.training import adam
+from tensorflow.python.training import ftrl
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import rmsprop
+
+
+class _TestOptimizer(optimizer_lib.Optimizer):
+
+  def __init__(self):
+    super(_TestOptimizer, self).__init__(
+        use_locking=False, name='TestOptimizer')
+
+
+class GetOptimizerInstance(test.TestCase):
+
+  def test_unsupported_name(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'Unsupported optimizer name: unsupported_name'):
+      optimizers.get_optimizer_instance('unsupported_name', learning_rate=0.1)
+
+  def test_supported_name_but_learning_rate_none(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'learning_rate must be specified when opt is string'):
+      optimizers.get_optimizer_instance('Adagrad', learning_rate=None)
+
+  def test_adagrad(self):
+    opt = optimizers.get_optimizer_instance('Adagrad', learning_rate=0.1)
+    self.assertIsInstance(opt, adagrad.AdagradOptimizer)
+    self.assertAlmostEqual(0.1, opt._learning_rate)
+
+  def test_adam(self):
+    opt = optimizers.get_optimizer_instance('Adam', learning_rate=0.1)
+    self.assertIsInstance(opt, adam.AdamOptimizer)
+    self.assertAlmostEqual(0.1, opt._lr)
+
+  def test_ftrl(self):
+    opt = optimizers.get_optimizer_instance('Ftrl', learning_rate=0.1)
+    self.assertIsInstance(opt, ftrl.FtrlOptimizer)
+    self.assertAlmostEqual(0.1, opt._learning_rate)
+
+  def test_rmsprop(self):
+    opt = optimizers.get_optimizer_instance('RMSProp', learning_rate=0.1)
+    self.assertIsInstance(opt, rmsprop.RMSPropOptimizer)
+    self.assertAlmostEqual(0.1, opt._learning_rate)
+
+  def test_sgd(self):
+    opt = optimizers.get_optimizer_instance('SGD', learning_rate=0.1)
+    self.assertIsInstance(opt, gradient_descent.GradientDescentOptimizer)
+    self.assertAlmostEqual(0.1, opt._learning_rate)
+
+  def test_object(self):
+    opt = optimizers.get_optimizer_instance(_TestOptimizer())
+    self.assertIsInstance(opt, _TestOptimizer)
+
+  def test_object_invalid(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'The given object is not an Optimizer instance'):
+      optimizers.get_optimizer_instance((1, 2, 3))
+
+  def test_callable(self):
+    def _optimizer_fn():
+      return _TestOptimizer()
+    opt = optimizers.get_optimizer_instance(_optimizer_fn)
+    self.assertIsInstance(opt, _TestOptimizer)
+
+  def test_lambda(self):
+    opt = optimizers.get_optimizer_instance(lambda: _TestOptimizer())  # pylint: disable=unnecessary-lambda
+    self.assertIsInstance(opt, _TestOptimizer)
+
+  def test_callable_returns_invalid(self):
+    def _optimizer_fn():
+      return (1, 2, 3)
+    with self.assertRaisesRegexp(
+        ValueError, 'The given object is not an Optimizer instance'):
+      optimizers.get_optimizer_instance(_optimizer_fn)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/parsing_utils.py b/tensorflow/python/estimator/canned/parsing_utils.py
index 74bb3158be..1ae0f1e9f7 100644
--- a/tensorflow/python/estimator/canned/parsing_utils.py
+++ b/tensorflow/python/estimator/canned/parsing_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,291 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""parsing_utils python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Parsing related helper function to be used in `input_fn`."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import parsing_utils
+import six
+
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.util.tf_export import estimator_export
+
+
+@estimator_export('estimator.classifier_parse_example_spec')
+def classifier_parse_example_spec(feature_columns,
+                                  label_key,
+                                  label_dtype=dtypes.int64,
+                                  label_default=None,
+                                  weight_column=None):
+  """Generates parsing spec for tf.parse_example to be used with classifiers.
+
+  If users keep data in tf.Example format, they need to call tf.parse_example
+  with a proper feature spec. There are two main things that this utility helps:
+
+  * Users need to combine parsing spec of features with labels and weights
+    (if any) since they are all parsed from same tf.Example instance. This
+    utility combines these specs.
+  * It is difficult to map expected label by a classifier such as
+    `DNNClassifier` to corresponding tf.parse_example spec. This utility encodes
+    it by getting related information from users (key, dtype).
+
+  Example output of parsing spec:
+
+  ```python
+  # Define features and transformations
+  feature_b = tf.feature_column.numeric_column(...)
+  feature_c_bucketized = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column("feature_c"), ...)
+  feature_a_x_feature_c = tf.feature_column.crossed_column(
+      columns=["feature_a", feature_c_bucketized], ...)
+
+  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
+  parsing_spec = tf.estimator.classifier_parse_example_spec(
+      feature_columns, label_key='my-label', label_dtype=tf.string)
+
+  # For the above example, classifier_parse_example_spec would return the dict:
+  assert parsing_spec == {
+    "feature_a": parsing_ops.VarLenFeature(tf.string),
+    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
+    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
+    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.string)
+  }
+  ```
+
+  Example usage with a classifier:
+
+  ```python
+  feature_columns = # define features via tf.feature_column
+  estimator = DNNClassifier(
+      n_classes=1000,
+      feature_columns=feature_columns,
+      weight_column='example-weight',
+      label_vocabulary=['photos', 'keep', ...],
+      hidden_units=[256, 64, 16])
+  # This label configuration tells the classifier the following:
+  # * weights are retrieved with key 'example-weight'
+  # * label is string and can be one of the following ['photos', 'keep', ...]
+  # * integer id for label 'photos' is 0, 'keep' is 1, ...
+
+
+  # Input builders
+  def input_fn_train():  # Returns a tuple of features and labels.
+    features = tf.contrib.learn.read_keyed_batch_features(
+        file_pattern=train_files,
+        batch_size=batch_size,
+        # creates parsing configuration for tf.parse_example
+        features=tf.estimator.classifier_parse_example_spec(
+            feature_columns,
+            label_key='my-label',
+            label_dtype=tf.string,
+            weight_column='example-weight'),
+        reader=tf.RecordIOReader)
+     labels = features.pop('my-label')
+     return features, labels
+
+  estimator.train(input_fn=input_fn_train)
+  ```
+
+  Args:
+    feature_columns: An iterable containing all feature columns. All items
+      should be instances of classes derived from `_FeatureColumn`.
+    label_key: A string identifying the label. It means tf.Example stores labels
+      with this key.
+    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
+      `tf.int64`. If user defines a `label_vocabulary`, this should be set as
+      `tf.string`. `tf.float32` labels are only supported for binary
+      classification.
+    label_default: used as label if label_key does not exist in given
+      tf.Example. An example usage: let's say `label_key` is 'clicked' and
+      tf.Example contains clicked data only for positive examples in following
+      format `key:clicked, value:1`. This means that if there is no data with
+      key 'clicked' it should count as negative example by setting
+      `label_deafault=0`. Type of this value should be compatible with
+      `label_dtype`.
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+      then weight_column.normalizer_fn is applied on it to get weight tensor.
+
+  Returns:
+    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
+    value.
+
+  Raises:
+    ValueError: If label is used in `feature_columns`.
+    ValueError: If weight_column is used in `feature_columns`.
+    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
+      instance.
+    ValueError: If `weight_column` is not a `_NumericColumn` instance.
+    ValueError: if label_key is None.
+  """
+  parsing_spec = fc.make_parse_example_spec(feature_columns)
+  if label_key in parsing_spec:
+    raise ValueError('label should not be used as feature. '
+                     'label_key: {}, features: {}'.format(
+                         label_key, parsing_spec.keys()))
+  parsing_spec[label_key] = parsing_ops.FixedLenFeature((1,), label_dtype,
+                                                        label_default)
+
+  if weight_column is None:
+    return parsing_spec
+
+  if isinstance(weight_column, six.string_types):
+    weight_column = fc.numeric_column(weight_column)
+
+  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
+    raise ValueError('weight_column should be an instance of '
+                     'tf.feature_column.numeric_column. '
+                     'Given type: {} value: {}'.format(
+                         type(weight_column), weight_column))
+
+  if weight_column.key in parsing_spec:
+    raise ValueError('weight_column should not be used as feature. '
+                     'weight_column: {}, features: {}'.format(
+                         weight_column.key, parsing_spec.keys()))
+
+  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
+  return parsing_spec
+
+
+@estimator_export('estimator.regressor_parse_example_spec')
+def regressor_parse_example_spec(feature_columns,
+                                 label_key,
+                                 label_dtype=dtypes.float32,
+                                 label_default=None,
+                                 label_dimension=1,
+                                 weight_column=None):
+  """Generates parsing spec for tf.parse_example to be used with regressors.
+
+  If users keep data in tf.Example format, they need to call tf.parse_example
+  with a proper feature spec. There are two main things that this utility helps:
+
+  * Users need to combine parsing spec of features with labels and weights
+    (if any) since they are all parsed from same tf.Example instance. This
+    utility combines these specs.
+  * It is difficult to map expected label by a regressor such as `DNNRegressor`
+    to corresponding tf.parse_example spec. This utility encodes it by getting
+    related information from users (key, dtype).
+
+  Example output of parsing spec:
+
+  ```python
+  # Define features and transformations
+  feature_b = tf.feature_column.numeric_column(...)
+  feature_c_bucketized = tf.feature_column.bucketized_column(
+    tf.feature_column.numeric_column("feature_c"), ...)
+  feature_a_x_feature_c = tf.feature_column.crossed_column(
+      columns=["feature_a", feature_c_bucketized], ...)
+
+  feature_columns = [feature_b, feature_c_bucketized, feature_a_x_feature_c]
+  parsing_spec = tf.estimator.regressor_parse_example_spec(
+      feature_columns, label_key='my-label')
+
+  # For the above example, regressor_parse_example_spec would return the dict:
+  assert parsing_spec == {
+    "feature_a": parsing_ops.VarLenFeature(tf.string),
+    "feature_b": parsing_ops.FixedLenFeature([1], dtype=tf.float32),
+    "feature_c": parsing_ops.FixedLenFeature([1], dtype=tf.float32)
+    "my-label" : parsing_ops.FixedLenFeature([1], dtype=tf.float32)
+  }
+  ```
+
+  Example usage with a regressor:
+
+  ```python
+  feature_columns = # define features via tf.feature_column
+  estimator = DNNRegressor(
+      hidden_units=[256, 64, 16],
+      feature_columns=feature_columns,
+      weight_column='example-weight',
+      label_dimension=3)
+  # This label configuration tells the regressor the following:
+  # * weights are retrieved with key 'example-weight'
+  # * label is a 3 dimension tensor with float32 dtype.
+
+
+  # Input builders
+  def input_fn_train():  # Returns a tuple of features and labels.
+    features = tf.contrib.learn.read_keyed_batch_features(
+        file_pattern=train_files,
+        batch_size=batch_size,
+        # creates parsing configuration for tf.parse_example
+        features=tf.estimator.classifier_parse_example_spec(
+            feature_columns,
+            label_key='my-label',
+            label_dimension=3,
+            weight_column='example-weight'),
+        reader=tf.RecordIOReader)
+     labels = features.pop('my-label')
+     return features, labels
+
+  estimator.train(input_fn=input_fn_train)
+  ```
+
+  Args:
+    feature_columns: An iterable containing all feature columns. All items
+      should be instances of classes derived from `_FeatureColumn`.
+    label_key: A string identifying the label. It means tf.Example stores labels
+      with this key.
+    label_dtype: A `tf.dtype` identifies the type of labels. By default it is
+      `tf.float32`.
+    label_default: used as label if label_key does not exist in given
+      tf.Example. By default default_value is none, which means
+      `tf.parse_example` will error out if there is any missing label.
+    label_dimension: Number of regression targets per example. This is the
+      size of the last dimension of the labels and logits `Tensor` objects
+      (typically, these have shape `[batch_size, label_dimension]`).
+    weight_column: A string or a `_NumericColumn` created by
+      `tf.feature_column.numeric_column` defining feature column representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example. If it is a string, it is
+      used as a key to fetch weight tensor from the `features`. If it is a
+      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
+      then weight_column.normalizer_fn is applied on it to get weight tensor.
+
+  Returns:
+    A dict mapping each feature key to a `FixedLenFeature` or `VarLenFeature`
+    value.
+
+  Raises:
+    ValueError: If label is used in `feature_columns`.
+    ValueError: If weight_column is used in `feature_columns`.
+    ValueError: If any of the given `feature_columns` is not a `_FeatureColumn`
+      instance.
+    ValueError: If `weight_column` is not a `_NumericColumn` instance.
+    ValueError: if label_key is None.
+  """
+  parsing_spec = fc.make_parse_example_spec(feature_columns)
+  if label_key in parsing_spec:
+    raise ValueError('label should not be used as feature. '
+                     'label_key: {}, features: {}'.format(
+                         label_key, parsing_spec.keys()))
+  parsing_spec[label_key] = parsing_ops.FixedLenFeature(
+      (label_dimension,), label_dtype, label_default)
+
+  if weight_column is None:
+    return parsing_spec
+
+  if isinstance(weight_column, six.string_types):
+    weight_column = fc.numeric_column(weight_column)
+
+  if not isinstance(weight_column, fc._NumericColumn):  # pylint: disable=protected-access
+    raise ValueError('weight_column should be an instance of '
+                     'tf.feature_column.numeric_column. '
+                     'Given type: {} value: {}'.format(
+                         type(weight_column), weight_column))
 
-# Include attrs that start with single underscore.
-parsing_utils.__all__ = [
-    s for s in dir(parsing_utils) if not s.startswith('__')
-]
+  if weight_column.key in parsing_spec:
+    raise ValueError('weight_column should not be used as feature. '
+                     'weight_column: {}, features: {}'.format(
+                         weight_column.key, parsing_spec.keys()))
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.parsing_utils import *
+  parsing_spec.update(weight_column._parse_example_spec)  # pylint: disable=protected-access
+  return parsing_spec
diff --git a/tensorflow/python/estimator/canned/parsing_utils_test.py b/tensorflow/python/estimator/canned/parsing_utils_test.py
new file mode 100644
index 0000000000..366bb104ca
--- /dev/null
+++ b/tensorflow/python/estimator/canned/parsing_utils_test.py
@@ -0,0 +1,211 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for parsing_utils.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator.canned import parsing_utils
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+
+
+class ClassifierParseExampleSpec(test.TestCase):
+  """Tests tf.estimator.classifier_parse_example_spec."""
+
+  def test_defaults(self):
+    parsing_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')], label_key='b')
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_string(self):
+    parsing_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        label_dtype=dtypes.string)
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.string),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  # TODO(ispir): test label_default_value compatibility with label_dtype
+  def test_label_default_value(self):
+    parsing_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        label_default=0)
+    expected_spec = {
+        'a':
+            parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b':
+            parsing_ops.FixedLenFeature(
+                (1,), dtype=dtypes.int64, default_value=0),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_weight_column_as_string(self):
+    parsing_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        weight_column='c')
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
+        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_weight_column_as_numeric_column(self):
+    parsing_spec = parsing_utils.classifier_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        weight_column=fc.numeric_column('c'))
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
+        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_label_key_should_not_be_used_as_feature(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'label should not be used as feature'):
+      parsing_utils.classifier_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')], label_key='a')
+
+  def test_weight_column_should_not_be_used_as_feature(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'weight_column should not be used as feature'):
+      parsing_utils.classifier_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')],
+          label_key='b',
+          weight_column=fc.numeric_column('a'))
+
+  def test_weight_column_should_be_a_numeric_column(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'tf.feature_column.numeric_column'):
+      not_a_numeric_column = 3
+      parsing_utils.classifier_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')],
+          label_key='b',
+          weight_column=not_a_numeric_column)
+
+
+class RegressorParseExampleSpec(test.TestCase):
+  """Tests tf.estimator.classifier_parse_example_spec."""
+
+  def test_defaults(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')], label_key='b')
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_int64(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        label_dtype=dtypes.int64)
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_label_default_value(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        label_default=0.)
+    expected_spec = {
+        'a':
+            parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b':
+            parsing_ops.FixedLenFeature(
+                (1,), dtype=dtypes.float32, default_value=0.),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_label_dimension(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        label_dimension=3)
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((3,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_weight_column_as_string(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        weight_column='c')
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_weight_column_as_numeric_column(self):
+    parsing_spec = parsing_utils.regressor_parse_example_spec(
+        feature_columns=[fc.numeric_column('a')],
+        label_key='b',
+        weight_column=fc.numeric_column('c'))
+    expected_spec = {
+        'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+        'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32),
+    }
+    self.assertDictEqual(expected_spec, parsing_spec)
+
+  def test_label_key_should_not_be_used_as_feature(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'label should not be used as feature'):
+      parsing_utils.regressor_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')], label_key='a')
+
+  def test_weight_column_should_not_be_used_as_feature(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'weight_column should not be used as feature'):
+      parsing_utils.regressor_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')],
+          label_key='b',
+          weight_column=fc.numeric_column('a'))
+
+  def test_weight_column_should_be_a_numeric_column(self):
+    with self.assertRaisesRegexp(ValueError,
+                                 'tf.feature_column.numeric_column'):
+      not_a_numeric_column = 3
+      parsing_utils.regressor_parse_example_spec(
+          feature_columns=[fc.numeric_column('a')],
+          label_key='b',
+          weight_column=not_a_numeric_column)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/canned/prediction_keys.py b/tensorflow/python/estimator/canned/prediction_keys.py
index 2cf332ce19..daa275b46b 100644
--- a/tensorflow/python/estimator/canned/prediction_keys.py
+++ b/tensorflow/python/estimator/canned/prediction_keys.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""prediction_keys python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Enum for model prediction keys."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.canned import prediction_keys
 
-# Include attrs that start with single underscore.
-prediction_keys.__all__ = [
-    s for s in dir(prediction_keys) if not s.startswith('__')
-]
+class PredictionKeys(object):
+  """Enum for canonical model prediction keys.
+
+  The following values are defined:
+  PREDICTIONS: Used by models that predict values, such as regressor models.
+  """
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.canned.prediction_keys import *
+  CLASSES = 'classes'
+  CLASS_IDS = 'class_ids'
+  LOGISTIC = 'logistic'
+  LOGITS = 'logits'
+  PREDICTIONS = 'predictions'
+  PROBABILITIES = 'probabilities'
+  TOP_K = 'top_k'
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index c43f0513bd..e6d82f0db7 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,2166 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""estimator python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Base Estimator class."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import estimator
+import collections
+import copy
+import os
+import tempfile
 
-# Include attrs that start with single underscore.
-estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
+import numpy as np
+import six
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.estimator import *
+from google.protobuf import message
+from tensorflow.core.framework import summary_pb2
+from tensorflow.python.client import session as tf_session
+from tensorflow.python.eager import context
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator import util as estimator_util
+from tensorflow.python.estimator.export import export as export_helpers
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import builder as saved_model_builder
+from tensorflow.python.saved_model import utils_impl as saved_model_utils
+from tensorflow.python.summary import summary
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import device_setter
+from tensorflow.python.training import distribute as distribute_lib
+from tensorflow.python.training import evaluation
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import saver
+from tensorflow.python.training import training
+from tensorflow.python.training import training_util
+from tensorflow.python.training import warm_starting_util
+from tensorflow.python.util import compat
+from tensorflow.python.util import compat_internal
+from tensorflow.python.util import function_utils
+from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import estimator_export
+
+
+_VALID_MODEL_FN_ARGS = set(
+    ['features', 'labels', 'mode', 'params', 'self', 'config'])
+
+
+@estimator_export('estimator.Estimator')
+class Estimator(object):
+  """Estimator class to train and evaluate TensorFlow models.
+
+  The `Estimator` object wraps a model which is specified by a `model_fn`,
+  which, given inputs and a number of other parameters, returns the ops
+  necessary to perform training, evaluation, or predictions.
+
+  All outputs (checkpoints, event files, etc.) are written to `model_dir`, or a
+  subdirectory thereof. If `model_dir` is not set, a temporary directory is
+  used.
+
+  The `config` argument can be passed `tf.estimator.RunConfig` object containing
+  information about the execution environment. It is passed on to the
+  `model_fn`, if the `model_fn` has a parameter named "config" (and input
+  functions in the same manner). If the `config` parameter is not passed, it is
+  instantiated by the `Estimator`. Not passing config means that defaults useful
+  for local execution are used. `Estimator` makes config available to the model
+  (for instance, to allow specialization based on the number of workers
+  available), and also uses some of its fields to control internals, especially
+  regarding checkpointing.
+
+  The `params` argument contains hyperparameters. It is passed to the
+  `model_fn`, if the `model_fn` has a parameter named "params", and to the input
+  functions in the same manner. `Estimator` only passes params along, it does
+  not inspect it. The structure of `params` is therefore entirely up to the
+  developer.
+
+  None of `Estimator`'s methods can be overridden in subclasses (its
+  constructor enforces this). Subclasses should use `model_fn` to configure
+  the base class, and may add methods implementing specialized functionality.
+
+  @compatibility(eager)
+  Calling methods of `Estimator` will work while eager execution is enabled.
+  However, the `model_fn` and `input_fn` is not executed eagerly, `Estimator`
+  will switch to graph model before calling all user-provided functions (incl.
+  hooks), so their code has to be compatible with graph mode execution. Note
+  that `input_fn` code using `tf.data` generally works in both graph and eager
+  modes.
+  @end_compatibility
+  """
+
+  def __init__(self, model_fn, model_dir=None, config=None, params=None,
+               warm_start_from=None):
+    """Constructs an `Estimator` instance.
+
+    See [estimators](https://tensorflow.org/guide/estimators) for more
+    information.
+
+    To warm-start an `Estimator`:
+
+    ```python
+    estimator = tf.estimator.DNNClassifier(
+        feature_columns=[categorical_feature_a_emb, categorical_feature_b_emb],
+        hidden_units=[1024, 512, 256],
+        warm_start_from="/path/to/checkpoint/dir")
+    ```
+
+    For more details on warm-start configuration, see
+    `tf.estimator.WarmStartSettings`.
+
+    Args:
+      model_fn: Model function. Follows the signature:
+
+        * Args:
+
+          * `features`: This is the first item returned from the `input_fn`
+                 passed to `train`, `evaluate`, and `predict`. This should be a
+                 single `tf.Tensor` or `dict` of same.
+          * `labels`: This is the second item returned from the `input_fn`
+                 passed to `train`, `evaluate`, and `predict`. This should be a
+                 single `tf.Tensor` or `dict` of same (for multi-head models).
+                 If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will
+                 be passed. If the `model_fn`'s signature does not accept
+                 `mode`, the `model_fn` must still be able to handle
+                 `labels=None`.
+          * `mode`: Optional. Specifies if this training, evaluation or
+                 prediction. See `tf.estimator.ModeKeys`.
+          * `params`: Optional `dict` of hyperparameters.  Will receive what
+                 is passed to Estimator in `params` parameter. This allows
+                 to configure Estimators from hyper parameter tuning.
+          * `config`: Optional `estimator.RunConfig` object. Will receive what
+                 is passed to Estimator as its `config` parameter, or a default
+                 value. Allows setting up things in your `model_fn` based on
+                 configuration such as `num_ps_replicas`, or `model_dir`.
+
+        * Returns:
+          `tf.estimator.EstimatorSpec`
+
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into an estimator to
+        continue training a previously saved model. If `PathLike` object, the
+        path will be resolved. If `None`, the model_dir in `config` will be used
+        if set. If both are set, they must be same. If both are `None`, a
+        temporary directory will be used.
+      config: `estimator.RunConfig` configuration object.
+      params: `dict` of hyper parameters that will be passed into `model_fn`.
+              Keys are names of parameters, values are basic python types.
+      warm_start_from: Optional string filepath to a checkpoint or SavedModel to
+                       warm-start from, or a `tf.estimator.WarmStartSettings`
+                       object to fully configure warm-starting.  If the string
+                       filepath is provided instead of a
+                       `tf.estimator.WarmStartSettings`, then all variables are
+                       warm-started, and it is assumed that vocabularies
+                       and `tf.Tensor` names are unchanged.
+
+    Raises:
+      ValueError: parameters of `model_fn` don't match `params`.
+      ValueError: if this is called via a subclass and if that class overrides
+        a member of `Estimator`.
+    """
+    Estimator._assert_members_are_not_overridden(self)
+
+    self._config = maybe_overwrite_model_dir_and_session_config(config,
+                                                                model_dir)
+
+    # The distribute field contains an instance of DistributionStrategy.
+    self._train_distribution = self._config.train_distribute
+    self._eval_distribution = self._config.eval_distribute
+    # Model directory.
+    self._model_dir = self._config.model_dir
+    self._session_config = self._config.session_config
+    logging.info('Using config: %s', str(vars(self._config)))
+
+    self._device_fn = (
+        self._config.device_fn or _get_replica_device_setter(self._config))
+
+    if model_fn is None:
+      raise ValueError('model_fn must be provided to Estimator.')
+    _verify_model_fn_args(model_fn, params)
+    self._model_fn = model_fn
+    self._params = copy.deepcopy(params or {})
+
+    # pylint: disable=protected-access
+    self._warm_start_settings = _get_default_warm_start_settings(
+        warm_start_from)
+    # pylint: enable=protected-access
+
+  @property
+  def model_dir(self):
+    return self._model_dir
+
+  @property
+  def config(self):
+    return copy.deepcopy(self._config)
+
+  @property
+  def params(self):
+    return copy.deepcopy(self._params)
+
+  @property
+  def model_fn(self):
+    """Returns the `model_fn` which is bound to `self.params`.
+
+    Returns:
+      The `model_fn` with following signature:
+        `def model_fn(features, labels, mode, config)`
+    """
+
+    def public_model_fn(features, labels, mode, config):
+      return self._call_model_fn(features, labels, mode, config)
+
+    return public_model_fn
+
+  # TODO(ispir): support a list of names
+  def get_variable_value(self, name):
+    """Returns value of the variable given by name.
+
+    Args:
+      name: string or a list of string, name of the tensor.
+
+    Returns:
+      Numpy array - value of the tensor.
+
+    Raises:
+      ValueError: If the `Estimator` has not produced a checkpoint yet.
+    """
+    _check_checkpoint_available(self.model_dir)
+    with context.graph_mode():
+      return training.load_variable(self.model_dir, name)
+
+  def get_variable_names(self):
+    """Returns list of all variable names in this model.
+
+    Returns:
+      List of names.
+
+    Raises:
+      ValueError: If the `Estimator` has not produced a checkpoint yet.
+    """
+    _check_checkpoint_available(self.model_dir)
+    with context.graph_mode():
+      return [name for name, _ in training.list_variables(self.model_dir)]
+
+  def latest_checkpoint(self):
+    """Finds the filename of the latest saved checkpoint file in `model_dir`.
+
+    Returns:
+      The full path to the latest checkpoint or `None` if no checkpoint was
+      found.
+    """
+    with context.graph_mode():
+      return checkpoint_management.latest_checkpoint(self.model_dir)
+
+  def train(self,
+            input_fn,
+            hooks=None,
+            steps=None,
+            max_steps=None,
+            saving_listeners=None):
+    """Trains a model given training data `input_fn`.
+
+    Args:
+      input_fn: A function that provides input data for training as minibatches.
+        See [Premade Estimators](
+        https://tensorflow.org/guide/premade_estimators#create_input_functions)
+        for more information. The function should construct and return one of
+        the following:  * A
+        `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
+        `(features, labels)` with same constraints as below. * A tuple
+        `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
+        of string feature name to `Tensor` and `labels` is a `Tensor` or a
+        dictionary of string label name to `Tensor`. Both `features` and
+        `labels` are consumed by `model_fn`. They should satisfy the expectation
+        of `model_fn` from inputs.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the training loop.
+      steps: Number of steps for which to train the model. If `None`, train
+        forever or train until `input_fn` generates the `tf.errors.OutOfRange`
+        error or `StopIteration` exception. `steps` works incrementally. If you
+        call two times `train(steps=10)` then training occurs in total 20 steps.
+        If `OutOfRange` or `StopIteration` occurs in the middle, training stops
+        before 20 steps. If you don't want to have incremental behavior please
+        set `max_steps` instead. If set, `max_steps` must be `None`.
+      max_steps: Number of total steps for which to train model. If `None`,
+        train forever or train until `input_fn` generates the
+        `tf.errors.OutOfRange` error or `StopIteration` exception. If set,
+        `steps` must be `None`. If `OutOfRange` or `StopIteration` occurs in the
+        middle, training stops before `max_steps` steps. Two calls to
+        `train(steps=100)` means 200 training iterations. On the other hand, two
+        calls to `train(max_steps=100)` means that the second call will not do
+        any iteration since first call did all 100 steps.
+      saving_listeners: list of `CheckpointSaverListener` objects. Used for
+        callbacks that run immediately before or after checkpoint savings.
+
+    Returns:
+      `self`, for chaining.
+
+    Raises:
+      ValueError: If both `steps` and `max_steps` are not `None`.
+      ValueError: If either `steps` or `max_steps <= 0`.
+    """
+    if self.config.task_type in (run_config.TaskType.EVALUATOR,
+                                 run_config.TaskType.PS):
+      raise ValueError(
+          'Train has been called wrong configuration. Please use '
+          'tf.estimator.train_and_evaluate which calls proper API according '
+          'to given configuration. Current configuration: {}.'.format(
+              self.config))
+
+    with context.graph_mode():
+      if (steps is not None) and (max_steps is not None):
+        raise ValueError('Can not provide both steps and max_steps.')
+      if steps is not None and steps <= 0:
+        raise ValueError('Must specify steps > 0, given: {}'.format(steps))
+      if max_steps is not None and max_steps <= 0:
+        raise ValueError(
+            'Must specify max_steps > 0, given: {}'.format(max_steps))
+
+      if max_steps is not None:
+        start_step = _load_global_step_from_checkpoint_dir(self._model_dir)
+        if max_steps <= start_step:
+          logging.info('Skipping training since max_steps has already saved.')
+          return self
+
+      hooks = _check_hooks_type(hooks)
+      hooks.extend(self._convert_train_steps_to_hooks(steps, max_steps))
+
+      saving_listeners = _check_listeners_type(saving_listeners)
+      loss = self._train_model(input_fn, hooks, saving_listeners)
+      logging.info('Loss for final step: %s.', loss)
+      return self
+
+  def _convert_train_steps_to_hooks(self, steps, max_steps):
+    """Create hooks to run correct number of steps in training.
+
+    Args:
+      steps: number of steps to run during training.
+      max_steps: maximum number of steps to be run during training. It'll be
+        the maximum number of steps the model will train to after restoring
+        from checkpoint even across multiple estimator.train calls.
+
+    Returns:
+      List of hooks to be passed to the estimator.
+    """
+    if steps is not None or max_steps is not None:
+      if self._train_distribution:
+        steps_per_run = getattr(self._train_distribution, 'steps_per_run', 1)
+        if steps_per_run > 1:
+          return [basic_session_run_hooks._MultiStepStopAtStepHook(  # pylint: disable=protected-access
+              steps, max_steps, steps_per_run)]
+      return [training.StopAtStepHook(steps, max_steps)]
+    else:
+      return []
+
+  def eval_dir(self, name=None):
+    """Shows the directory name where evaluation metrics are dumped.
+
+    Args:
+      name: Name of the evaluation if user needs to run multiple evaluations on
+        different data sets, such as on training data vs test data. Metrics for
+        different evaluations are saved in separate folders, and appear
+        separately in tensorboard.
+
+    Returns:
+      A string which is the path of directory contains evaluation metrics.
+    """
+    return os.path.join(self._model_dir, 'eval' if not name else
+                        'eval_' + name)
+
+  def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None,
+               name=None):
+    """Evaluates the model given evaluation data `input_fn`.
+
+    For each step, calls `input_fn`, which returns one batch of data.
+    Evaluates until:
+    - `steps` batches are processed, or
+    - `input_fn` raises an end-of-input exception (`tf.errors.OutOfRangeError`
+    or
+    `StopIteration`).
+
+    Args:
+      input_fn: A function that constructs the input data for evaluation. See
+        [Premade Estimators](
+        https://tensorflow.org/guide/premade#create_input_functions)
+        for more information. The
+        function should construct and return one of the following:  * A
+        `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
+        `(features, labels)` with same constraints as below. * A tuple
+        `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
+        of string feature name to `Tensor` and `labels` is a `Tensor` or a
+        dictionary of string label name to `Tensor`. Both `features` and
+        `labels` are consumed by `model_fn`. They should satisfy the expectation
+        of `model_fn` from inputs.
+      steps: Number of steps for which to evaluate model. If `None`, evaluates
+        until `input_fn` raises an end-of-input exception.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the evaluation call.
+      checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the
+        latest checkpoint in `model_dir` is used.  If there are no checkpoints
+        in `model_dir`, evaluation is run with newly initialized `Variables`
+        instead of ones restored from checkpoint.
+      name: Name of the evaluation if user needs to run multiple evaluations on
+        different data sets, such as on training data vs test data. Metrics for
+        different evaluations are saved in separate folders, and appear
+        separately in tensorboard.
+
+    Returns:
+      A dict containing the evaluation metrics specified in `model_fn` keyed by
+      name, as well as an entry `global_step` which contains the value of the
+      global step for which this evaluation was performed. For canned
+      estimators, the dict contains the `loss` (mean loss per mini-batch) and
+      the `average_loss` (mean loss per sample). Canned classifiers also return
+      the `accuracy`. Canned regressors also return the `label/mean` and the
+      `prediction/mean`.
+
+    Raises:
+      ValueError: If `steps <= 0`.
+      ValueError: If no model has been trained, namely `model_dir`, or the
+        given `checkpoint_path` is empty.
+    """
+    with context.graph_mode():
+      hooks = _check_hooks_type(hooks)
+      hooks.extend(self._convert_eval_steps_to_hooks(steps))
+
+      # Check that model has been trained (if nothing has been set explicitly).
+      if not checkpoint_path:
+        latest_path = checkpoint_management.latest_checkpoint(self._model_dir)
+        if not latest_path:
+          logging.info('Could not find trained model in model_dir: {}, running '
+                       'initialization to evaluate.'.format(self._model_dir))
+        checkpoint_path = latest_path
+
+      def _evaluate():
+        (scaffold, update_op, eval_dict, all_hooks) = (
+            self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
+        return self._evaluate_run(
+            checkpoint_path=checkpoint_path,
+            scaffold=scaffold,
+            update_op=update_op,
+            eval_dict=eval_dict,
+            all_hooks=all_hooks,
+            output_dir=self.eval_dir(name))
+
+      with ops.Graph().as_default():
+        if self._eval_distribution:
+          # We want to create the iterations variable outside the distribution
+          # scope as that is just stored on the host and mainly used to drive
+          # the loop and doesn't need to be a Mirrored/Device variable.
+          training.get_or_create_steps_per_run_variable()
+          with self._eval_distribution.scope():
+            return _evaluate()
+        else:
+          return _evaluate()
+
+  def _convert_eval_steps_to_hooks(self, steps):
+    """Create hooks to run correct number of steps in evaluation.
+
+    Args:
+      steps: number of steps to run during evaluation.
+
+    Raises:
+      ValueError: if steps is less than or equal to zero.
+
+    Returns:
+      List of hooks to be passed to the estimator.
+    """
+    if steps is None:
+      return []
+
+    if steps <= 0:
+      raise ValueError('Must specify steps > 0, given: {}'.format(steps))
+
+    # The hooks are declared as private in evaluation.py discourage the use
+    # by other libraries or open source users. This should be the only usage
+    # of the estimator evaluation hooks.
+    if self._eval_distribution:
+      steps_per_run = getattr(self._eval_distribution, 'steps_per_run', 1)
+      if steps_per_run > 1:
+        return [evaluation._MultiStepStopAfterNEvalsHook(  # pylint: disable=protected-access
+            num_evals=steps, steps_per_run=steps_per_run)]
+    return [evaluation._StopAfterNEvalsHook(num_evals=steps)]  # pylint: disable=protected-access
+
+  def predict(self,
+              input_fn,
+              predict_keys=None,
+              hooks=None,
+              checkpoint_path=None,
+              yield_single_examples=True):
+    """Yields predictions for given features.
+
+    Please note that interleaving two predict outputs does not work. See:
+    [issue/20506](
+    https://github.com/tensorflow/tensorflow/issues/20506#issuecomment-422208517)
+
+    Args:
+      input_fn: A function that constructs the features. Prediction continues
+        until `input_fn` raises an end-of-input exception
+        (`tf.errors.OutOfRangeError` or `StopIteration`).
+        See [Premade Estimators](
+        https://tensorflow.org/guide/premade_estimators#create_input_functions)
+        for more information. The function should construct and return one of
+        the following:
+
+          * A `tf.data.Dataset` object: Outputs of `Dataset` object must have
+            same constraints as below.
+          * features: A `tf.Tensor` or a dictionary of string feature name to
+            `Tensor`. features are consumed by `model_fn`. They should satisfy
+            the expectation of `model_fn` from inputs.
+          * A tuple, in which case the first item is extracted as features.
+
+      predict_keys: list of `str`, name of the keys to predict. It is used if
+        the `tf.estimator.EstimatorSpec.predictions` is a `dict`. If
+        `predict_keys` is used then rest of the predictions will be filtered
+        from the dictionary. If `None`, returns all.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the prediction call.
+      checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
+        latest checkpoint in `model_dir` is used.  If there are no checkpoints
+        in `model_dir`, prediction is run with newly initialized `Variables`
+        instead of ones restored from checkpoint.
+      yield_single_examples: If `False`, yields the whole batch as returned by
+        the `model_fn` instead of decomposing the batch into individual
+        elements. This is useful if `model_fn` returns some tensors whose first
+        dimension is not equal to the batch size.
+
+    Yields:
+      Evaluated values of `predictions` tensors.
+
+    Raises:
+      ValueError: Could not find a trained model in `model_dir`.
+      ValueError: If batch length of predictions is not the same and
+        `yield_single_examples` is `True`.
+      ValueError: If there is a conflict between `predict_keys` and
+        `predictions`. For example if `predict_keys` is not `None` but
+        `tf.estimator.EstimatorSpec.predictions` is not a `dict`.
+    """
+    with context.graph_mode():
+      hooks = _check_hooks_type(hooks)
+      # Check that model has been trained.
+      if not checkpoint_path:
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            self._model_dir)
+      if not checkpoint_path:
+        logging.info('Could not find trained model in model_dir: {}, running '
+                     'initialization to predict.'.format(self._model_dir))
+      with ops.Graph().as_default() as g:
+        random_seed.set_random_seed(self._config.tf_random_seed)
+        self._create_and_assert_global_step(g)
+        features, input_hooks = self._get_features_from_input_fn(
+            input_fn, model_fn_lib.ModeKeys.PREDICT)
+        estimator_spec = self._call_model_fn(
+            features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
+
+        # Call to warm_start has to be after model_fn is called.
+        self._maybe_warm_start(checkpoint_path)
+
+        predictions = self._extract_keys(
+            estimator_spec.predictions, predict_keys)
+        all_hooks = list(input_hooks)
+        all_hooks.extend(hooks)
+        all_hooks.extend(list(estimator_spec.prediction_hooks or []))
+        with training.MonitoredSession(
+            session_creator=training.ChiefSessionCreator(
+                checkpoint_filename_with_path=checkpoint_path,
+                master=self._config.master,
+                scaffold=estimator_spec.scaffold,
+                config=self._session_config),
+            hooks=all_hooks) as mon_sess:
+          while not mon_sess.should_stop():
+            preds_evaluated = mon_sess.run(predictions)
+            if not yield_single_examples:
+              yield preds_evaluated
+            elif not isinstance(predictions, dict):
+              for pred in preds_evaluated:
+                yield pred
+            else:
+              for i in range(self._extract_batch_length(preds_evaluated)):
+                yield {
+                    key: value[i]
+                    for key, value in six.iteritems(preds_evaluated)
+                }
+
+  def _assert_members_are_not_overridden(self):
+    """Asserts members of `Estimator` are not overridden."""
+    # TPUEstimator is special cased (owned by TF).
+    if self.__class__.__name__ == 'TPUEstimator':
+      return
+
+    allowed_overrides = set([
+        '_create_and_assert_global_step',
+        '_tf_api_names', '_tf_api_names_v1', '_estimator_api_names',
+        '_estimator_api_names_v1', '_estimator_api_constants',
+        '_estimator_api_constants_v1',
+    ])
+    estimator_members = set([m for m in Estimator.__dict__.keys()
+                             if not m.startswith('__')])
+    subclass_members = set(self.__class__.__dict__.keys())
+    common_members = estimator_members & subclass_members - allowed_overrides
+    overridden_members = [
+        m for m in common_members
+        if Estimator.__dict__[m] != self.__class__.__dict__[m]]
+    if overridden_members:
+      raise ValueError(
+          'Subclasses of Estimator cannot override members of Estimator. '
+          '{} does override {}'.format(self.__class__, overridden_members))
+
+  def export_savedmodel(
+      self, export_dir_base, serving_input_receiver_fn,
+      assets_extra=None,
+      as_text=False,
+      checkpoint_path=None,
+      strip_default_attrs=False):
+    # pylint: disable=line-too-long,g-doc-args,g-doc-return-or-yield
+    """Exports inference graph as a `SavedModel` into the given dir.
+
+    Note that `export_to_savedmodel` will be renamed to `export_saved_model`
+    in TensorFlow 2.0. At that time, `export_to_savedmodel` without the
+    additional underscore will be available only through tf.compat.v1.
+
+    Please see `tf.estimator.Estimator.export_saved_model` for more information.
+
+    There is one additional arg versus the new method:
+      strip_default_attrs: This parameter is going away in TF 2.0, and
+        the new behavior will automatically strip all default attributes.
+        Boolean. If `True`, default-valued attributes will be
+        removed from the `NodeDef`s. For a detailed guide, see [Stripping
+        Default-Valued Attributes](
+        https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+    """
+    # pylint: enable=line-too-long,g-doc-args,g-doc-return-or-yield
+    return self._export_saved_model_for_mode(
+        export_dir_base,
+        serving_input_receiver_fn,
+        assets_extra=assets_extra,
+        as_text=as_text,
+        checkpoint_path=checkpoint_path,
+        strip_default_attrs=strip_default_attrs,
+        mode=model_fn_lib.ModeKeys.PREDICT)
+
+  def export_saved_model(
+      self, export_dir_base, serving_input_receiver_fn,
+      assets_extra=None,
+      as_text=False,
+      checkpoint_path=None):
+    # pylint: disable=line-too-long
+    """Exports inference graph as a `SavedModel` into the given dir.
+
+    For a detailed guide, see
+    [Using SavedModel with Estimators](https://tensorflow.org/guide/saved_model#using_savedmodel_with_estimators).
+
+    This method builds a new graph by first calling the
+    `serving_input_receiver_fn` to obtain feature `Tensor`s, and then calling
+    this `Estimator`'s `model_fn` to generate the model graph based on those
+    features. It restores the given checkpoint (or, lacking that, the most
+    recent checkpoint) into this graph in a fresh session.  Finally it creates
+    a timestamped export directory below the given `export_dir_base`, and writes
+    a `SavedModel` into it containing a single `tf.MetaGraphDef` saved from this
+    session.
+
+    The exported `MetaGraphDef` will provide one `SignatureDef` for each
+    element of the `export_outputs` dict returned from the `model_fn`, named
+    using
+    the same keys.  One of these keys is always
+    `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`,
+    indicating which
+    signature will be served when a serving request does not specify one.
+    For each signature, the outputs are provided by the corresponding
+    `tf.estimator.export.ExportOutput`s, and the inputs are always the input
+    receivers provided by
+    the `serving_input_receiver_fn`.
+
+    Extra assets may be written into the `SavedModel` via the `assets_extra`
+    argument.  This should be a dict, where each key gives a destination path
+    (including the filename) relative to the assets.extra directory.  The
+    corresponding value gives the full path of the source file to be copied.
+    For example, the simple case of copying a single file without renaming it
+    is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+
+    Args:
+      export_dir_base: A string containing a directory in which to create
+        timestamped subdirectories containing exported `SavedModel`s.
+      serving_input_receiver_fn: A function that takes no argument and returns a
+        `tf.estimator.export.ServingInputReceiver` or
+        `tf.estimator.export.TensorServingInputReceiver`.
+      assets_extra: A dict specifying how to populate the assets.extra directory
+        within the exported `SavedModel`, or `None` if no extra assets are
+        needed.
+      as_text: whether to write the `SavedModel` proto in text format.
+      checkpoint_path: The checkpoint path to export.  If `None` (the default),
+        the most recent checkpoint found within the model directory is chosen.
+
+    Returns:
+      The string path to the exported directory.
+
+    Raises:
+      ValueError: if no `serving_input_receiver_fn` is provided, no
+      `export_outputs` are provided, or no checkpoint can be found.
+    """
+    # pylint: enable=line-too-long
+    # TODO(b/111442174): `export_to_savedmodel` will be renamed to
+    # `export_saved_model` in TensorFlow 2.0. This function is a wrapper
+    # while staging the new version; do not add any logic here.
+    return self.export_savedmodel(
+        export_dir_base,
+        serving_input_receiver_fn,
+        assets_extra=assets_extra,
+        as_text=as_text,
+        checkpoint_path=checkpoint_path,
+        strip_default_attrs=True)
+
+  def _export_saved_model_for_mode(
+      self, export_dir_base, input_receiver_fn,
+      assets_extra=None,
+      as_text=False,
+      checkpoint_path=None,
+      strip_default_attrs=False,
+      mode=model_fn_lib.ModeKeys.PREDICT):
+    # pylint: disable=line-too-long
+    """Exports a single train/eval/predict graph as a `SavedModel`.
+
+    This method is a wrapper for `_export_all_saved_models`, and wraps a raw
+    `input_receiver_fn` in a dictionary to pass in to that function.
+    See `_export_all_saved_models` for full docs.
+
+    See `tf.contrib.estimator.export_saved_model_for_mode` for the currently
+    exposed version of this function.
+
+    Args:
+      export_dir_base: A string containing a directory in which to create
+        timestamped subdirectories containing exported `SavedModel`s.
+      input_receiver_fn: a function that takes no argument and returns the
+        appropriate subclass of `InputReceiver`.
+      assets_extra: A dict specifying how to populate the assets.extra directory
+        within the exported `SavedModel`, or `None` if no extra assets are
+        needed.
+      as_text: whether to write the `SavedModel` proto in text format.
+      checkpoint_path: The checkpoint path to export.  If `None` (the default),
+        the most recent checkpoint found within the model directory is chosen.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the `NodeDef`s. For a detailed guide, see [Stripping
+        Default-Valued
+        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+      mode: `tf.estimator.ModeKeys` value indicating with mode will be exported.
+
+    Returns:
+      The string path to the exported directory.
+
+    Raises:
+      ValueError: if `input_receiver_fn` is `None`, no `export_outputs`
+        are provided, or no checkpoint can be found.
+    """
+    # pylint: enable=line-too-long
+    if not input_receiver_fn:
+      raise ValueError('An input_receiver_fn must be defined.')
+
+    input_receiver_fn_map = {mode: input_receiver_fn}
+
+    return self._export_all_saved_models(
+        export_dir_base,
+        input_receiver_fn_map,
+        assets_extra=assets_extra,
+        as_text=as_text,
+        checkpoint_path=checkpoint_path,
+        strip_default_attrs=strip_default_attrs)
+
+  def _export_all_saved_models(
+      self, export_dir_base, input_receiver_fn_map,
+      assets_extra=None,
+      as_text=False,
+      checkpoint_path=None,
+      strip_default_attrs=False):
+    # pylint: disable=line-too-long
+    """Exports a `SavedModel` containing `tf.MetaGraphDefs` for each requested mode.
+
+    See `tf.contrib.estimator.export_all_saved_models` for the currently
+    exposed version of this function.
+
+    For each mode passed in via the `input_receiver_fn_map`,
+    this method builds a new graph by calling the `input_receiver_fn` to obtain
+    feature and label `Tensor`s. Next, this method calls the `Estimator`'s
+    `model_fn` in the passed mode to generate the model graph based on
+    those features and labels, and restores the given checkpoint
+    (or, lacking that, the most recent checkpoint) into the graph.
+    Only one of the modes is used for saving variables to the `SavedModel`
+    (order of preference: `tf.estimator.ModeKeys.TRAIN`,
+    `tf.estimator.ModeKeys.EVAL`, then
+    `tf.estimator.ModeKeys.PREDICT`), such that up to three
+    `tf.MetaGraphDefs` are saved with a single set of variables in a single
+    `SavedModel` directory.
+
+    For the variables and `tf.MetaGraphDefs`, a timestamped export directory
+    below
+    `export_dir_base`, and writes a `SavedModel` into it containing
+    the `tf.MetaGraphDef` for the given mode and its associated signatures.
+
+    For prediction, the exported `MetaGraphDef` will provide one `SignatureDef`
+    for each element of the `export_outputs` dict returned from the `model_fn`,
+    named using the same keys.  One of these keys is always
+    `tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`,
+    indicating which
+    signature will be served when a serving request does not specify one.
+    For each signature, the outputs are provided by the corresponding
+    `tf.estimator.export.ExportOutput`s, and the inputs are always the input
+    receivers provided by
+    the `serving_input_receiver_fn`.
+
+    For training and evaluation, the `train_op` is stored in an extra
+    collection,
+    and loss, metrics, and predictions are included in a `SignatureDef` for the
+    mode in question.
+
+    Extra assets may be written into the `SavedModel` via the `assets_extra`
+    argument.  This should be a dict, where each key gives a destination path
+    (including the filename) relative to the assets.extra directory.  The
+    corresponding value gives the full path of the source file to be copied.
+    For example, the simple case of copying a single file without renaming it
+    is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+
+    Args:
+      export_dir_base: A string containing a directory in which to create
+        timestamped subdirectories containing exported `SavedModel`s.
+      input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to
+        `input_receiver_fn` mappings, where the `input_receiver_fn` is a
+        function that takes no arguments and returns the appropriate subclass of
+        `InputReceiver`.
+      assets_extra: A dict specifying how to populate the assets.extra directory
+        within the exported `SavedModel`, or `None` if no extra assets are
+        needed.
+      as_text: whether to write the `SavedModel` proto in text format.
+      checkpoint_path: The checkpoint path to export.  If `None` (the default),
+        the most recent checkpoint found within the model directory is chosen.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the `NodeDef`s. For a detailed guide, see [Stripping
+        Default-Valued
+        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+
+    Returns:
+      A dict of `tf.estimator.ModeKeys` value to string path for each exported
+      directory.
+
+    Raises:
+      ValueError: if any `input_receiver_fn` is `None`, no `export_outputs`
+        are provided, or no checkpoint can be found.
+    """
+    # pylint: enable=line-too-long
+    # TODO(b/65561022): Consider allowing multiple input_receiver_fns per mode.
+    with context.graph_mode():
+      if not checkpoint_path:
+        # Locate the latest checkpoint
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            self._model_dir)
+      if not checkpoint_path:
+        raise ValueError("Couldn't find trained model at %s." % self._model_dir)
+
+      export_dir = export_helpers.get_timestamped_export_dir(export_dir_base)
+      temp_export_dir = export_helpers.get_temp_export_dir(export_dir)
+
+      builder = saved_model_builder.SavedModelBuilder(temp_export_dir)
+
+      save_variables = True
+      # Note that the order in which we run here matters, as the first
+      # mode we pass through will be used to save the variables. We run TRAIN
+      # first, as that is also the mode used for checkpoints, and therefore
+      # we are not likely to have vars in PREDICT that are not in the checkpoint
+      # created by TRAIN.
+      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.TRAIN):
+        self._add_meta_graph_for_mode(
+            builder, input_receiver_fn_map, checkpoint_path,
+            strip_default_attrs, save_variables,
+            mode=model_fn_lib.ModeKeys.TRAIN)
+        save_variables = False
+      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.EVAL):
+        self._add_meta_graph_for_mode(
+            builder, input_receiver_fn_map, checkpoint_path,
+            strip_default_attrs, save_variables,
+            mode=model_fn_lib.ModeKeys.EVAL)
+        save_variables = False
+      if input_receiver_fn_map.get(model_fn_lib.ModeKeys.PREDICT):
+        self._add_meta_graph_for_mode(
+            builder, input_receiver_fn_map, checkpoint_path,
+            strip_default_attrs, save_variables,
+            mode=model_fn_lib.ModeKeys.PREDICT)
+        save_variables = False
+
+      if save_variables:
+        raise ValueError('No valid modes for exporting found. Got {}.'.format(
+            input_receiver_fn_map.keys()))
+
+      builder.save(as_text)
+
+      # Add the extra assets
+      if assets_extra:
+        assets_extra_path = os.path.join(compat.as_bytes(temp_export_dir),
+                                         compat.as_bytes('assets.extra'))
+        for dest_relative, source in assets_extra.items():
+          dest_absolute = os.path.join(compat.as_bytes(assets_extra_path),
+                                       compat.as_bytes(dest_relative))
+          dest_path = os.path.dirname(dest_absolute)
+          gfile.MakeDirs(dest_path)
+          gfile.Copy(source, dest_absolute)
+
+      gfile.Rename(temp_export_dir, export_dir)
+      return export_dir
+
+  def _add_meta_graph_for_mode(self,
+                               builder,
+                               input_receiver_fn_map,
+                               checkpoint_path,
+                               strip_default_attrs,
+                               save_variables=True,
+                               mode=model_fn_lib.ModeKeys.PREDICT,
+                               export_tags=None,
+                               check_variables=True):
+    # pylint: disable=line-too-long
+    """Loads variables and adds them along with a `tf.MetaGraphDef` for saving.
+
+    Args:
+      builder: instance of `tf.saved_modle.builder.SavedModelBuilder` that will
+        be used for saving.
+      input_receiver_fn_map: dict of `tf.estimator.ModeKeys` to
+        `input_receiver_fn` mappings, where the `input_receiver_fn` is a
+        function that takes no argument and returns the appropriate subclass of
+        `InputReceiver`.
+      checkpoint_path: The checkpoint path to export.  If `None` (the default),
+        the most recent checkpoint found within the model directory is chosen.
+      strip_default_attrs: Boolean. If `True`, default-valued attributes will be
+        removed from the `NodeDef`s. For a detailed guide, see [Stripping
+        Default-Valued
+        Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes).
+      save_variables: bool, whether variables should be saved. If `False`, just
+        the `tf.MetaGraphDef` will be saved. Note that `save_variables` should
+        only be `True` for the first call to this function, and the
+        `SavedModelBuilder` will raise an error if that is not the case.
+      mode: `tf.estimator.ModeKeys` value indicating which mode will be
+        exported.
+      export_tags: The set of tags with which to save `tf.MetaGraphDef`. If
+        `None`, a default set will be selected to matched the passed mode.
+      check_variables: bool, whether to check the checkpoint has all variables.
+
+    Raises:
+      ValueError: if `save_variables` is `True` and `check_variable` is `False`.
+    """
+    # pylint: enable=line-too-long
+    if export_tags is None:
+      export_tags = model_fn_lib.EXPORT_TAG_MAP[mode]
+    input_receiver_fn = input_receiver_fn_map[mode]
+
+    with ops.Graph().as_default() as g:
+      self._create_and_assert_global_step(g)
+      random_seed.set_random_seed(self._config.tf_random_seed)
+
+      input_receiver = input_receiver_fn()
+
+      # Call the model_fn and collect the export_outputs.
+      estimator_spec = self._call_model_fn(
+          features=input_receiver.features,
+          labels=getattr(input_receiver, 'labels', None),
+          mode=mode,
+          config=self.config)
+
+      export_outputs = model_fn_lib.export_outputs_for_mode(
+          mode=estimator_spec.mode,
+          serving_export_outputs=estimator_spec.export_outputs,
+          predictions=estimator_spec.predictions,
+          loss=estimator_spec.loss,
+          metrics=estimator_spec.eval_metric_ops)
+
+      # Build the SignatureDefs from receivers and all outputs
+      signature_def_map = export_helpers.build_all_signature_defs(
+          input_receiver.receiver_tensors,
+          export_outputs,
+          getattr(input_receiver, 'receiver_tensors_alternatives', None),
+          serving_only=(mode == model_fn_lib.ModeKeys.PREDICT))
+
+      with tf_session.Session(config=self._session_config) as session:
+
+        if estimator_spec.scaffold.local_init_op is not None:
+          local_init_op = estimator_spec.scaffold.local_init_op
+        else:
+          local_init_op = monitored_session.Scaffold.default_local_init_op()
+
+        # This saver will be used both for restoring variables now,
+        # and in saving out the metagraph below. This ensures that any
+        # Custom Savers stored with the Scaffold are passed through to the
+        # SavedModel for restore later.
+        graph_saver = estimator_spec.scaffold.saver or saver.Saver(sharded=True)
+
+        if save_variables and not check_variables:
+          raise ValueError('If `save_variables` is `True, `check_variables`'
+                           'must not be `False`.')
+        if check_variables:
+          try:
+            graph_saver.restore(session, checkpoint_path)
+          except errors.NotFoundError as e:
+            msg = ('Could not load all requested variables from checkpoint. '
+                   'Please make sure your model_fn does not expect variables '
+                   'that were not saved in the checkpoint.\n\n'
+                   'Encountered error with mode `{}` while restoring '
+                   'checkpoint from: `{}`. Full Traceback:\n\n{}').format(
+                       mode, checkpoint_path, e)
+            raise ValueError(msg)
+
+        # We add the train op explicitly for now, so that we don't have to
+        # change the Builder public interface. Note that this is a no-op
+        # for prediction, where train_op is None.
+        builder._add_train_op(estimator_spec.train_op)  # pylint: disable=protected-access
+
+        meta_graph_kwargs = dict(
+            tags=export_tags,
+            signature_def_map=signature_def_map,
+            assets_collection=ops.get_collection(
+                ops.GraphKeys.ASSET_FILEPATHS),
+            strip_default_attrs=strip_default_attrs,
+            legacy_init_op=local_init_op,
+            saver=graph_saver)
+
+        if save_variables:
+          builder.add_meta_graph_and_variables(
+              session, **meta_graph_kwargs)
+        else:
+          builder.add_meta_graph(**meta_graph_kwargs)
+
+  def _get_features_from_input_fn(self, input_fn, mode):
+    """Extracts the `features` from return values of `input_fn`."""
+    result = self._call_input_fn(input_fn, mode)
+    result, _, hooks = estimator_util.parse_input_fn_result(result)
+    self._validate_features_in_predict_input(result)
+    return result, hooks
+
+  def _validate_features_in_predict_input(self, result):
+    if not _has_dataset_or_queue_runner(result):
+      logging.warning('Input graph does not use tf.data.Dataset or contain a '
+                      'QueueRunner. That means predict yields forever. '
+                      'This is probably a mistake.')
+
+  def _get_iterator_from_input_fn(self, input_fn, mode, distribution=None):
+    if distribution is not None:
+      result = distribution.distribute_dataset(
+          lambda: self._call_input_fn(input_fn, mode))
+    else:
+      result = self._call_input_fn(input_fn, mode)
+
+    iterator = result.make_initializable_iterator()
+    input_hooks = [estimator_util._DatasetInitializerHook(iterator)]  # pylint: disable=protected-access
+    return iterator, input_hooks
+
+  def _get_features_and_labels_from_input_fn(self, input_fn, mode):
+    """Extracts the `features` and labels from return values of `input_fn`."""
+    return estimator_util.parse_input_fn_result(
+        self._call_input_fn(input_fn, mode))
+
+  def _extract_batch_length(self, preds_evaluated):
+    """Extracts batch length of predictions."""
+    batch_length = None
+    for key, value in six.iteritems(preds_evaluated):
+      batch_length = batch_length or value.shape[0]
+      if value.shape[0] != batch_length:
+        raise ValueError('Batch length of predictions should be same. %s has '
+                         'different batch length than others.' % key)
+    return batch_length
+
+  def _extract_keys(self, predictions, predict_keys):
+    """Extracts `predict_keys` from `predictions`."""
+    if not predict_keys:
+      return predictions
+    if not isinstance(predictions, dict):
+      raise ValueError(
+          'predict_keys argument is not valid in case of non-dict predictions.')
+    existing_keys = predictions.keys()
+    predictions = {
+        key: value
+        for key, value in six.iteritems(predictions) if key in predict_keys
+    }
+    if not predictions:
+      raise ValueError('Expected to run at least one output from %s, '
+                       'provided %s.' % (existing_keys, predict_keys))
+    return predictions
+
+  def _create_global_step(self, graph):
+    """Creates the global step tensor in graph.
+
+    The global step tensor must be an integer type with name 'global_step' and
+    be added to the collection `tf.GraphKeys.GLOBAL_STEP`.
+
+    Args:
+      graph: The graph in which to create the global step tensor.
+
+    Returns:
+      The global step `tf.Tensor`.
+    """
+    return training.create_global_step(graph)
+
+  def _create_and_assert_global_step(self, graph):
+    """Creates and asserts properties of the global step.
+
+    Args:
+      graph: The graph in which to create the global step tensor.
+
+    Returns:
+      The global step `tf.Tensor`.
+    """
+    step = self._create_global_step(graph)
+    assert step == training.get_global_step()
+    assert step.dtype.is_integer
+    return step
+
+  def _call_input_fn(self, input_fn, mode):
+    """Calls the input function.
+
+    Args:
+      input_fn: The input function.
+      mode: `tf.estimator.ModeKeys`
+
+    Returns:
+      The return value of the passed `input_fn`, which should be one of:
+
+        * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple `(features, labels)` with same constraints as below.
+        * A tuple `(features, labels)`: Where `features` is a `Tensor` or a
+          dictionary of string feature name to `Tensor` and `labels` is a
+          `Tensor` or a dictionary of string label name to `Tensor`. Both
+          `features` and `labels` are consumed by `model_fn`. They should
+          satisfy the expectation of `model_fn` from inputs.
+
+    Raises:
+      ValueError: if `input_fn` takes invalid arguments.
+    """
+    input_fn_args = function_utils.fn_args(input_fn)
+    kwargs = {}
+    if 'mode' in input_fn_args:
+      kwargs['mode'] = mode
+    if 'params' in input_fn_args:
+      kwargs['params'] = self.params
+    if 'config' in input_fn_args:
+      kwargs['config'] = self.config
+    with ops.device('/cpu:0'):
+      return input_fn(**kwargs)
+
+  def _call_model_fn(self, features, labels, mode, config):
+    """Calls model function.
+
+    Args:
+      features: features dict.
+      labels: labels dict.
+      mode: `tf.estimator.ModeKeys`
+      config: `tf.estimator.RunConfig`
+
+    Returns:
+      An `tf.estimator.EstimatorSpec` object.
+
+    Raises:
+      ValueError: if `model_fn` returns invalid objects.
+    """
+    model_fn_args = function_utils.fn_args(self._model_fn)
+    kwargs = {}
+    if 'labels' in model_fn_args:
+      kwargs['labels'] = labels
+    else:
+      if labels is not None:
+        raise ValueError(
+            'model_fn does not take labels, but input_fn returns labels.')
+    if 'mode' in model_fn_args:
+      kwargs['mode'] = mode
+    if 'params' in model_fn_args:
+      kwargs['params'] = self.params
+    if 'config' in model_fn_args:
+      kwargs['config'] = config
+
+    logging.info('Calling model_fn.')
+    model_fn_results = self._model_fn(features=features, **kwargs)
+    logging.info('Done calling model_fn.')
+
+    if not isinstance(model_fn_results, model_fn_lib.EstimatorSpec):
+      raise ValueError('model_fn should return an EstimatorSpec.')
+
+    return model_fn_results
+
+  def _train_model(self, input_fn, hooks, saving_listeners):
+    if self._train_distribution:
+      return self._train_model_distributed(input_fn, hooks, saving_listeners)
+    else:
+      return self._train_model_default(input_fn, hooks, saving_listeners)
+
+  def _train_model_default(self, input_fn, hooks, saving_listeners):
+    """Initiate training with `input_fn`, without `DistributionStrategies`.
+
+    Args:
+      input_fn: A function that provides input data for training as minibatches.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the training loop.
+      saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used
+        for callbacks that run immediately before or after checkpoint savings.
+
+    Returns:
+      Loss from training
+    """
+    worker_hooks = []
+    with ops.Graph().as_default() as g, g.device(self._device_fn):
+      random_seed.set_random_seed(self._config.tf_random_seed)
+      global_step_tensor = self._create_and_assert_global_step(g)
+
+      # Skip creating a read variable if _create_and_assert_global_step
+      # returns None (e.g. tf.contrib.estimator.SavedModelEstimator).
+      if global_step_tensor is not None:
+        training_util._get_or_create_global_step_read(g)  # pylint: disable=protected-access
+
+      features, labels, input_hooks = (
+          self._get_features_and_labels_from_input_fn(
+              input_fn, model_fn_lib.ModeKeys.TRAIN))
+      worker_hooks.extend(input_hooks)
+      estimator_spec = self._call_model_fn(
+          features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
+      global_step_tensor = training_util.get_global_step(g)
+      return self._train_with_estimator_spec(estimator_spec, worker_hooks,
+                                             hooks, global_step_tensor,
+                                             saving_listeners)
+
+  def _train_model_distributed(self, input_fn, hooks, saving_listeners):
+    """Initiate training with `input_fn`, using `DistributionStrategies`.
+
+    Args:
+      input_fn: A function that provides input data for training as minibatches.
+      hooks: List of `tf.train.SessionRunHook` subclass instances. Used for
+        callbacks inside the training loop.
+      saving_listeners: list of `tf.train.CheckpointSaverListener` objects. Used
+        for callbacks that run immediately before or after checkpoint savings.
+
+    Returns:
+      Loss from training
+    """
+    self._train_distribution.configure(self._session_config)
+
+    # TODO(sourabhbajaj): Remove this hack once we migrate the other strategies
+    # to use the new API
+    is_tpu_strategy = (
+        self._train_distribution.__class__.__name__ == 'TPUStrategy')
+
+    worker_hooks = []
+    with ops.Graph().as_default() as g:
+      # We want to create the iterations variable outside the distribution scope
+      # as that is just stored on the host and mainly used to drive the loop
+      # and doesn't need to be a Mirrored/Device variable.
+      if is_tpu_strategy:
+        steps_per_run_variable = training.get_or_create_steps_per_run_variable()
+      with self._train_distribution.scope():
+        random_seed.set_random_seed(self._config.tf_random_seed)
+        iterator, input_hooks = self._get_iterator_from_input_fn(
+            input_fn, model_fn_lib.ModeKeys.TRAIN, self._train_distribution)
+        worker_hooks.extend(input_hooks)
+        global_step_tensor = self._create_and_assert_global_step(g)
+        # we want to add to the global collection in the main thread not the
+        # tower threads.
+        ops.add_to_collection(
+            training_util.GLOBAL_STEP_READ_KEY,
+            self._train_distribution.read_var(global_step_tensor))
+
+        if is_tpu_strategy:
+          # Create a step_fn from the train_op of grouped_estimator_spec
+          def step_fn(ctx, features, labels=None):
+            """A single step that is passed to run_on_dataset."""
+            estimator_spec = self._train_distribution.call_for_each_tower(
+                self._call_model_fn,
+                features,
+                labels,
+                model_fn_lib.ModeKeys.TRAIN,
+                self.config)
+            ctx.set_last_step_output(
+                name='loss',
+                output=estimator_spec.loss,
+                aggregation=distribute_lib.get_loss_reduction())
+            ctx.set_non_tensor_output(
+                name='estimator_spec', output=estimator_spec)
+            return estimator_spec.train_op
+
+          # Create new train_op post graph rewrites
+          initial_training_loss = constant_op.constant(1e7)
+          ctx = self._train_distribution.run_steps_on_dataset(
+              step_fn, iterator, iterations=steps_per_run_variable,
+              initial_loop_values={'loss': initial_training_loss})
+          distributed_train_op = ctx.run_op
+          loss = ctx.last_step_outputs['loss']
+          grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec']
+        else:
+          features, labels = estimator_util.parse_iterator_result(
+              iterator.get_next())
+          grouped_estimator_spec = self._train_distribution.call_for_each_tower(
+              self._call_model_fn,
+              features,
+              labels,  # although this will be None it seems
+              model_fn_lib.ModeKeys.TRAIN,
+              self.config)
+          loss = self._train_distribution.unwrap(
+              self._train_distribution.reduce(
+                  distribute_lib.get_loss_reduction(),
+                  grouped_estimator_spec.loss,
+                  destinations='/device:CPU:0'))[0]
+          distributed_train_op = grouped_estimator_spec.train_op
+
+        scaffold = _combine_distributed_scaffold(
+            grouped_estimator_spec.scaffold, self._train_distribution)
+
+        # TODO(yuefengz): add a test for unwrapping per_device_hooks.
+        def get_hooks_from_the_first_device(per_device_hooks):
+          return [
+              self._distribution.unwrap(per_device_hook)[0]
+              for per_device_hook in per_device_hooks
+          ]
+
+        training_hooks = get_hooks_from_the_first_device(
+            grouped_estimator_spec.training_hooks)
+        training_chief_hooks = get_hooks_from_the_first_device(
+            grouped_estimator_spec.training_chief_hooks)
+        worker_hooks.append(
+            estimator_util.StrategyInitFinalizeHook(
+                self._train_distribution.initialize,
+                self._train_distribution.finalize))
+
+        estimator_spec = model_fn_lib.EstimatorSpec(
+            mode=grouped_estimator_spec.mode,
+            loss=loss,
+            train_op=self._train_distribution.group(distributed_train_op),
+            training_hooks=training_hooks,
+            training_chief_hooks=training_chief_hooks,
+            scaffold=scaffold)
+        return self._train_with_estimator_spec(estimator_spec, worker_hooks,
+                                               hooks, global_step_tensor,
+                                               saving_listeners)
+
+  def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks,
+                                 global_step_tensor, saving_listeners):
+    """Train a model with the given Estimator Spec."""
+    if self._warm_start_settings:
+      logging.info('Warm-starting with WarmStartSettings: %s' %
+                   (self._warm_start_settings,))
+      warm_starting_util.warm_start(*self._warm_start_settings)
+    # Check if the user created a loss summary, and add one if they didn't.
+    # We assume here that the summary is called 'loss'. If it is not, we will
+    # make another one with the name 'loss' to ensure it shows up in the right
+    # graph in TensorBoard.
+    if not any([x.op.name == 'loss'
+                for x in ops.get_collection(ops.GraphKeys.SUMMARIES)]):
+      summary.scalar('loss', estimator_spec.loss)
+    ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss)
+    worker_hooks.extend(hooks)
+    worker_hooks.append(
+        training.NanTensorHook(estimator_spec.loss)
+    )
+    if self._config.log_step_count_steps is not None:
+      worker_hooks.append(
+          training.LoggingTensorHook(
+              {
+                  'loss': estimator_spec.loss,
+                  'step': global_step_tensor
+              },
+              every_n_iter=self._config.log_step_count_steps)
+      )
+    worker_hooks.extend(estimator_spec.training_hooks)
+
+    if not (estimator_spec.scaffold.saver or
+            ops.get_collection(ops.GraphKeys.SAVERS)):
+      ops.add_to_collection(
+          ops.GraphKeys.SAVERS,
+          training.Saver(
+              sharded=True,
+              max_to_keep=self._config.keep_checkpoint_max,
+              keep_checkpoint_every_n_hours=(
+                  self._config.keep_checkpoint_every_n_hours),
+              defer_build=True,
+              save_relative_paths=True))
+
+    chief_hooks = []
+    all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks)
+    saver_hooks = [
+        h for h in all_hooks if isinstance(h, training.CheckpointSaverHook)]
+    if (self._config.save_checkpoints_secs or
+        self._config.save_checkpoints_steps):
+      if not saver_hooks:
+        chief_hooks = [
+            training.CheckpointSaverHook(
+                self._model_dir,
+                save_secs=self._config.save_checkpoints_secs,
+                save_steps=self._config.save_checkpoints_steps,
+                scaffold=estimator_spec.scaffold)
+        ]
+        saver_hooks = [chief_hooks[0]]
+    if saving_listeners:
+      if not saver_hooks:
+        raise ValueError(
+            'There should be a CheckpointSaverHook to use saving_listeners. '
+            'Please set one of the RunConfig.save_checkpoints_steps or '
+            'RunConfig.save_checkpoints_secs.')
+      else:
+        # It is expected to have one CheckpointSaverHook. If multiple, we pick
+        # up the first one to add listener.
+        saver_hooks[0]._listeners.extend(saving_listeners)  # pylint: disable=protected-access
+
+    # Add summary hooks to worker 0 if we are running with a master, to ensure
+    # that summaries are written at correct intervals even with long-running
+    # evaluations.
+    save_summary_steps = self._config.save_summary_steps
+    log_step_count_steps = self._config.log_step_count_steps
+    if (self._config.cluster_spec and self._config.cluster_spec.jobs and
+        (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)):
+      # Update config values to prevent the default hooks from being created on
+      # the master or other workers.
+      save_summary_steps = 0
+      log_step_count_steps = None
+
+      if (self._config.task_type == run_config.TaskType.WORKER and
+          self._config.task_id == 0):
+        if (self._config.save_summary_steps and
+            self._config.save_summary_steps > 0):
+          worker_hooks.append(
+              training.SummarySaverHook(
+                  save_steps=self._config.save_summary_steps,
+                  output_dir=self._config.model_dir,
+                  scaffold=estimator_spec.scaffold))
+
+        if (self._config.log_step_count_steps and
+            self._config.log_step_count_steps > 0):
+          worker_hooks.append(
+              training.StepCounterHook(
+                  every_n_steps=self._config.log_step_count_steps,
+                  output_dir=self._config.model_dir))
+
+    with training.MonitoredTrainingSession(
+        master=self._config.master,
+        is_chief=self._config.is_chief,
+        checkpoint_dir=self._model_dir,
+        scaffold=estimator_spec.scaffold,
+        hooks=worker_hooks,
+        chief_only_hooks=(
+            tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)),
+        save_checkpoint_secs=0,  # Saving is handled by a hook.
+        save_summaries_steps=save_summary_steps,
+        config=self._session_config,
+        log_step_count_steps=log_step_count_steps) as mon_sess:
+      loss = None
+      while not mon_sess.should_stop():
+        _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
+    return loss
+
+  def _evaluate_build_graph(self, input_fn, hooks=None, checkpoint_path=None):
+    """Builds the graph and related hooks to run evaluation."""
+    random_seed.set_random_seed(self._config.tf_random_seed)
+    self._create_and_assert_global_step(ops.get_default_graph())
+
+    if self._eval_distribution:
+      (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = (
+          self._call_model_fn_eval_distributed(input_fn, self.config))
+    else:
+      (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict) = (
+          self._call_model_fn_eval(input_fn, self.config))
+
+    global_step_tensor = training_util.get_global_step(ops.get_default_graph())
+    # Call to warm_start has to be after model_fn is called.
+    self._maybe_warm_start(checkpoint_path)
+
+    if ops.GraphKeys.GLOBAL_STEP in eval_dict:
+      raise ValueError(
+          'Metric with name `global_step` is not allowed, because Estimator '
+          'already defines a default metric with the same name.')
+    eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor
+
+    all_hooks = list(input_hooks)
+    all_hooks.extend(hooks)
+    all_hooks.extend(list(evaluation_hooks or []))
+    # New local variables have been added, so update the estimator spec's
+    # local init op if it was defined.
+    if scaffold and scaffold.local_init_op:
+      # Ensure that eval step has been created before updating local init op.
+      evaluation._get_or_create_eval_step()  # pylint: disable=protected-access
+
+      scaffold = monitored_session.Scaffold(
+          local_init_op=control_flow_ops.group(
+              scaffold.local_init_op,
+              monitored_session.Scaffold.default_local_init_op()),
+          copy_from_scaffold=scaffold
+      )
+
+    return scaffold, update_op, eval_dict, all_hooks
+
+  def _call_model_fn_eval(self, input_fn, config):
+    """Call model_fn for evaluation and handle return values."""
+    features, labels, input_hooks = self._get_features_and_labels_from_input_fn(
+        input_fn, model_fn_lib.ModeKeys.EVAL)
+
+    estimator_spec = self._call_model_fn(
+        features, labels, model_fn_lib.ModeKeys.EVAL, config)
+    eval_metric_ops = _verify_and_create_loss_metric(
+        estimator_spec.eval_metric_ops, estimator_spec.loss)
+    update_op, eval_dict = _extract_metric_update_ops(eval_metric_ops)
+    return (estimator_spec.scaffold, estimator_spec.evaluation_hooks,
+            input_hooks, update_op, eval_dict)
+
+  def _call_model_fn_eval_distributed(self, input_fn, config):
+    """Call model_fn in distribution mode and handle return values."""
+
+    iterator, input_hooks = self._get_iterator_from_input_fn(
+        input_fn, model_fn_lib.ModeKeys.EVAL, self._eval_distribution)
+
+    is_tpu_strategy = (
+        self._eval_distribution.__class__.__name__ == 'TPUStrategy')
+
+    if is_tpu_strategy:
+      steps_per_run_variable = training.get_or_create_steps_per_run_variable()
+      def step_fn(ctx, features, labels=None):
+        """Runs one step of the eval computation and captures outputs."""
+        estimator_spec = self._eval_distribution.call_for_each_tower(
+            self._call_model_fn, features, labels, model_fn_lib.ModeKeys.EVAL,
+            config)
+        eval_metric_ops = _verify_and_create_loss_metric(
+            estimator_spec.eval_metric_ops, estimator_spec.loss,
+            self._eval_distribution)
+        update_op, eval_dict = _extract_metric_update_ops(
+            eval_metric_ops, self._eval_distribution)
+        ctx.set_non_tensor_output(name='estimator_spec', output=estimator_spec)
+        ctx.set_non_tensor_output(name='eval_dict', output=eval_dict)
+        return update_op
+
+      # TODO(priyag): Fix eval step hook to account for steps_per_run.
+      ctx = self._eval_distribution.run_steps_on_dataset(
+          step_fn, iterator, iterations=steps_per_run_variable)
+      update_op = ctx.run_op
+      eval_dict = ctx.non_tensor_outputs['eval_dict']
+      grouped_estimator_spec = ctx.non_tensor_outputs['estimator_spec']
+    else:
+      features, labels = estimator_util.parse_iterator_result(
+          iterator.get_next())
+      grouped_estimator_spec = self._eval_distribution.call_for_each_tower(
+          self._call_model_fn, features, labels,
+          model_fn_lib.ModeKeys.EVAL, config)
+      eval_metric_ops = _verify_and_create_loss_metric(
+          grouped_estimator_spec.eval_metric_ops, grouped_estimator_spec.loss,
+          self._eval_distribution)
+      update_op, eval_dict = _extract_metric_update_ops(
+          eval_metric_ops, self._eval_distribution)
+
+    scaffold = _combine_distributed_scaffold(
+        grouped_estimator_spec.scaffold, self._eval_distribution)
+    evaluation_hooks = self._eval_distribution.unwrap(
+        grouped_estimator_spec.evaluation_hooks)[0]
+    evaluation_hooks = evaluation_hooks + (
+        estimator_util.StrategyInitFinalizeHook(
+            self._eval_distribution.initialize,
+            self._eval_distribution.finalize),)
+
+    return (scaffold, evaluation_hooks, input_hooks, update_op, eval_dict)
+
+  def _evaluate_run(self, checkpoint_path, scaffold, update_op, eval_dict,
+                    all_hooks, output_dir):
+    """Run evaluation."""
+    eval_results = evaluation._evaluate_once(  # pylint: disable=protected-access
+        checkpoint_path=checkpoint_path,
+        master=self._config.evaluation_master,
+        scaffold=scaffold,
+        eval_ops=update_op,
+        final_ops=eval_dict,
+        hooks=all_hooks,
+        config=self._session_config)
+
+    current_global_step = eval_results[ops.GraphKeys.GLOBAL_STEP]
+
+    _write_dict_to_summary(
+        output_dir=output_dir,
+        dictionary=eval_results,
+        current_global_step=current_global_step)
+
+    if checkpoint_path:
+      _write_checkpoint_path_to_summary(
+          output_dir=output_dir,
+          checkpoint_path=checkpoint_path,
+          current_global_step=current_global_step)
+
+    return eval_results
+
+  def _maybe_warm_start(self, checkpoint_path):
+    if not checkpoint_path and self._warm_start_settings:
+      logging.info('Warm-starting with WarmStartSettings: %s' %
+                   (self._warm_start_settings,))
+      warm_starting_util.warm_start(*self._warm_start_settings)
+
+
+def _verify_and_create_loss_metric(eval_metric_ops, loss, distribution=None):
+  """Creates a metric for loss and throws an error if one already exists."""
+  if model_fn_lib.LOSS_METRIC_KEY in eval_metric_ops:
+    raise ValueError(
+        'Metric with name "%s" is not allowed, because Estimator ' %
+        (model_fn_lib.LOSS_METRIC_KEY) +
+        'already defines a default metric with the same name.')
+
+  if distribution is None:
+    loss_metric = metrics_lib.mean(loss)
+  else:
+    loss_metric = distribution.call_for_each_tower(
+        metrics_lib.mean, loss)
+  eval_metric_ops[model_fn_lib.LOSS_METRIC_KEY] = loss_metric
+  return eval_metric_ops
+
+
+def maybe_overwrite_model_dir_and_session_config(config, model_dir):
+  """Overwrite estimator config by `model_dir` and `session_config` if needed.
+
+  Args:
+    config: Original estimator config.
+    model_dir: Estimator model checkpoint directory.
+
+  Returns:
+    Overwritten estimator config.
+
+  Raises:
+    ValueError: Model directory inconsistent between `model_dir` and `config`.
+  """
+
+  if config is None:
+    config = run_config.RunConfig()
+    logging.info('Using default config.')
+  if not isinstance(config, run_config.RunConfig):
+    raise ValueError(
+        'config must be an instance of `RunConfig`, but provided %s.' % config)
+
+  if config.session_config is None:
+    session_config = run_config.get_default_session_config()
+    config = run_config.RunConfig.replace(config, session_config=session_config)
+
+  model_dir = compat_internal.path_to_str(model_dir)
+  if model_dir is not None:
+    if (getattr(config, 'model_dir', None) is not None and
+        config.model_dir != model_dir):
+      raise ValueError(
+          "`model_dir` are set both in constructor and `RunConfig`, but with "
+          "different values. In constructor: '{}', in `RunConfig`: "
+          "'{}' ".format(model_dir, config.model_dir))
+  if model_dir:
+    config = run_config.RunConfig.replace(config, model_dir=model_dir)
+  elif getattr(config, 'model_dir', None) is None:
+    model_dir = tempfile.mkdtemp()
+    logging.warning('Using temporary folder as model directory: %s', model_dir)
+    config = run_config.RunConfig.replace(config, model_dir=model_dir)
+
+  return config
+
+
+def create_per_tower_ready_for_local_init_op(scaffold):
+  """Create a `tf.train.Scaffold.ready_for_local_init_op` inside a tower."""
+  if scaffold.ready_for_local_init_op:
+    return scaffold.ready_for_local_init_op
+
+  def default_ready_for_local_init_op():
+    return variables.report_uninitialized_variables(
+        variables.global_variables())
+
+  return monitored_session.Scaffold.get_or_default(
+      'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP,
+      default_ready_for_local_init_op)
+
+
+def _combine_distributed_scaffold(grouped_scaffold, distribution):
+  """Combines scaffold(s) returned from `distribution.call_for_each_tower`."""
+
+  # TODO(anjalisridhar): Figure out how to resolve the following scaffold
+  # parameters: init_feed_dict, init_fn.
+  scaffold_list = distribution.unwrap(grouped_scaffold)
+  init_feed_dict = [
+      s.init_feed_dict
+      for s in scaffold_list
+      if s.init_feed_dict is not None
+  ]
+  if init_feed_dict:
+    init_feed_dict = distribution.group(init_feed_dict)
+  else:
+    init_feed_dict = None
+
+  init_fn = [s.init_fn for s in scaffold_list if s.init_fn is not None]
+  if init_fn:
+    init_fn = distribution.group(init_fn)
+  else:
+    init_fn = None
+
+  init_op = [s.init_op for s in scaffold_list if s.init_op is not None]
+  if init_op:
+    init_op = distribution.group(init_op)
+  else:
+    init_op = None
+
+  def _unwrap_and_concat(value):
+    value = nest.flatten(distribution.unwrap(value))
+    if len(value) != 1:
+      return array_ops.concat(value, 0)
+    return value[0]
+
+  ready_op = distribution.call_for_each_tower(
+      lambda scaffold: scaffold.ready_op, grouped_scaffold)
+  if ready_op is not None:
+    ready_op = _unwrap_and_concat(ready_op)
+
+  ready_for_local_init_op = distribution.call_for_each_tower(
+      create_per_tower_ready_for_local_init_op, grouped_scaffold)
+  if ready_for_local_init_op is not None:
+    ready_for_local_init_op = _unwrap_and_concat(ready_for_local_init_op)
+  else:
+    ready_for_local_init_op = None
+
+  local_init_op = [
+      s.local_init_op
+      for s in scaffold_list
+      if s.local_init_op is not None
+  ]
+  if local_init_op:
+    local_init_op = distribution.group(local_init_op)
+  else:
+    local_init_op = None
+
+  summary_op = [
+      s.summary_op for s in scaffold_list if s.summary_op is not None
+  ]
+  if summary_op:
+    summary_op = distribution.group(summary_op)
+  else:
+    summary_op = None
+
+  scaffold = monitored_session.Scaffold(
+      init_op=init_op,
+      ready_op=ready_op,
+      ready_for_local_init_op=ready_for_local_init_op,
+      local_init_op=local_init_op,
+      summary_op=summary_op,
+      init_feed_dict=init_feed_dict,
+      init_fn=init_fn)
+  return scaffold
+
+
+def _check_checkpoint_available(model_dir):
+  latest_path = checkpoint_management.latest_checkpoint(model_dir)
+  if not latest_path:
+    raise ValueError(
+        'Could not find trained model in model_dir: {}.'.format(model_dir))
+
+
+def _check_hooks_type(hooks):
+  """Returns hooks if all are `SessionRunHook`, raises TypeError otherwise."""
+  hooks = list(hooks or [])
+  for h in hooks:
+    if not isinstance(h, training.SessionRunHook):
+      raise TypeError('Hooks must be a SessionRunHook, given: {}'.format(h))
+  return hooks
+
+
+def _check_listeners_type(saving_listeners):
+  """Check listeners type."""
+  listeners = list(saving_listeners or [])
+  for l in listeners:
+    if not isinstance(l, training.CheckpointSaverListener):
+      raise TypeError(
+          'saving_listeners must be a list of CheckpointSaverListener, '
+          'given: {}'.format(l))
+  return listeners
+
+
+def _get_replica_device_setter(config):
+  """Creates a replica device setter if required as a default `device_fn`.
+
+  `Estimator` uses `tf.train.ReplicaDeviceSetter` as a default device placer. It
+  sets the
+  distributed related arguments such as number of `ps_replicas` based on given
+  `config`.
+
+  Args:
+    config: A `tf.estimator.RunConfig` instance.
+
+  Returns:
+    A replica device setter, or `None`.
+  """
+  if config.task_type:
+    worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id)
+  else:
+    worker_device = '/job:worker'
+
+  if config.num_ps_replicas > 0:
+    return training.replica_device_setter(
+        ps_tasks=config.num_ps_replicas,
+        worker_device=worker_device,
+        merge_devices=True,
+        ps_ops=list(device_setter.STANDARD_PS_OPS),
+        cluster=config.cluster_spec)
+  else:
+    return None
+
+
+def _verify_model_fn_args(model_fn, params):
+  """Verifies `model_fn` arguments."""
+  args = set(function_utils.fn_args(model_fn))
+  if 'features' not in args:
+    raise ValueError('model_fn (%s) must include features argument.' % model_fn)
+  if params is not None and 'params' not in args:
+    raise ValueError('model_fn (%s) does not include params argument, '
+                     'but params (%s) is passed to Estimator.' % (model_fn,
+                                                                  params))
+  if params is None and 'params' in args:
+    logging.warning('Estimator\'s model_fn (%s) includes params '
+                    'argument, but params are not passed to Estimator.',
+                    model_fn)
+  non_valid_args = list(args - _VALID_MODEL_FN_ARGS)
+  if non_valid_args:
+    raise ValueError('model_fn (%s) has following not expected args: %s' %
+                     (model_fn, non_valid_args))
+
+
+def _load_global_step_from_checkpoint_dir(checkpoint_dir):
+  try:
+    checkpoint_reader = training.NewCheckpointReader(
+        training.latest_checkpoint(checkpoint_dir))
+    return checkpoint_reader.get_tensor(ops.GraphKeys.GLOBAL_STEP)
+  except:  # pylint: disable=bare-except
+    return 0
+
+
+def _extract_metric_update_ops(eval_dict, distribution=None):
+  """Separate update operations from metric value operations."""
+  update_ops = []
+  value_ops = {}
+  # Sort metrics lexicographically so graph is identical every time.
+  for name, value in sorted(six.iteritems(eval_dict)):
+    value_ops[name] = value[0]
+    update_ops.append(
+        distribution.group(value[1]) if distribution else value[1])
+
+  update_op = control_flow_ops.group(*update_ops) if update_ops else None
+  return update_op, value_ops
+
+
+def _dict_to_str(dictionary):
+  """Get a `str` representation of a `dict`.
+
+  Args:
+    dictionary: The `dict` to be represented as `str`.
+
+  Returns:
+    A `str` representing the `dictionary`.
+  """
+  return ', '.join('%s = %s' % (k, v)
+                   for k, v in sorted(six.iteritems(dictionary))
+                   if not isinstance(v, six.binary_type))
+
+
+def _write_dict_to_summary(output_dir,
+                           dictionary,
+                           current_global_step):
+  """Writes a `dict` into summary file in given output directory.
+
+  Args:
+    output_dir: `str`, directory to write the summary file in.
+    dictionary: the `dict` to be written to summary file.
+    current_global_step: `int`, the current global step.
+  """
+  logging.info('Saving dict for global step %d: %s', current_global_step,
+               _dict_to_str(dictionary))
+  summary_writer = writer_cache.FileWriterCache.get(output_dir)
+  summary_proto = summary_pb2.Summary()
+  for key in dictionary:
+    if dictionary[key] is None:
+      continue
+    if key == 'global_step':
+      continue
+    if (isinstance(dictionary[key], np.float32) or
+        isinstance(dictionary[key], float)):
+      summary_proto.value.add(tag=key, simple_value=float(dictionary[key]))
+    elif (isinstance(dictionary[key], np.int64) or
+          isinstance(dictionary[key], np.int32) or
+          isinstance(dictionary[key], int)):
+      summary_proto.value.add(tag=key, simple_value=int(dictionary[key]))
+    elif isinstance(dictionary[key], six.binary_type):
+      try:
+        summ = summary_pb2.Summary.FromString(dictionary[key])
+        for i, _ in enumerate(summ.value):
+          summ.value[i].tag = '%s/%d' % (key, i)
+        summary_proto.value.extend(summ.value)
+      except message.DecodeError:
+        logging.warn('Skipping summary for %s, cannot parse string to Summary.',
+                     key)
+        continue
+    elif isinstance(dictionary[key], np.ndarray):
+      value = summary_proto.value.add()
+      value.tag = key
+      value.node_name = key
+      tensor_proto = tensor_util.make_tensor_proto(dictionary[key])
+      value.tensor.CopyFrom(tensor_proto)
+      # pylint: disable=line-too-long
+      logging.info(
+          'Summary for np.ndarray is not visible in Tensorboard by default. '
+          'Consider using a Tensorboard plugin for visualization (see '
+          'https://github.com/tensorflow/tensorboard-plugin-example/blob/master/README.md'
+          ' for more information).')
+      # pylint: enable=line-too-long
+    else:
+      logging.warn(
+          'Skipping summary for %s, must be a float, np.float32, np.int64, '
+          'np.int32 or int or np.ndarray or a serialized string of Summary.',
+          key)
+  summary_writer.add_summary(summary_proto, current_global_step)
+  summary_writer.flush()
+
+
+def _write_checkpoint_path_to_summary(output_dir, checkpoint_path,
+                                      current_global_step):
+  """Writes `checkpoint_path` into summary file in the given output directory.
+
+  Args:
+    output_dir: `str`, directory to write the summary file in.
+    checkpoint_path: `str`, checkpoint file path to be written to summary file.
+    current_global_step: `int`, the current global step.
+  """
+
+  checkpoint_path_tag = 'checkpoint_path'
+
+  logging.info('Saving \'%s\' summary for global step %d: %s',
+               checkpoint_path_tag, current_global_step, checkpoint_path)
+  summary_proto = summary_pb2.Summary()
+  summary_proto.value.add(
+      tag=checkpoint_path_tag,
+      tensor=tensor_util.make_tensor_proto(
+          checkpoint_path, dtype=dtypes.string))
+  summary_writer = writer_cache.FileWriterCache.get(output_dir)
+  summary_writer.add_summary(summary_proto, current_global_step)
+  summary_writer.flush()
+
+
+def _has_dataset_or_queue_runner(maybe_tensor):
+  """Returns `True` if `Dataset` or `QueueRunner` has been used."""
+  # Check TF dataset first. Here, we use a simple algorithm to check the top
+  # level Tensors only, which should be sufficient for most users.
+  tensors = [x for x in nest.flatten(maybe_tensor) if isinstance(x, ops.Tensor)]
+  if any([t.op.type == 'IteratorGetNext' for t in tensors]):
+    return True
+
+  # Now, check queue.
+  return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS)
+
+
+VocabInfo = warm_starting_util.VocabInfo  # pylint: disable=invalid-name
+estimator_export('estimator.VocabInfo')(VocabInfo)
+
+
+@estimator_export('estimator.WarmStartSettings')
+class WarmStartSettings(
+    collections.namedtuple('WarmStartSettings', [
+        'ckpt_to_initialize_from',
+        'vars_to_warm_start',
+        'var_name_to_vocab_info',
+        'var_name_to_prev_var_name',
+    ])):
+  """Settings for warm-starting in `tf.estimator.Estimators`.
+
+  Example Use with canned `tf.estimator.DNNEstimator`:
+
+  ```
+  emb_vocab_file = tf.feature_column.embedding_column(
+      tf.feature_column.categorical_column_with_vocabulary_file(
+          "sc_vocab_file", "new_vocab.txt", vocab_size=100),
+      dimension=8)
+  emb_vocab_list = tf.feature_column.embedding_column(
+      tf.feature_column.categorical_column_with_vocabulary_list(
+          "sc_vocab_list", vocabulary_list=["a", "b"]),
+      dimension=8)
+  estimator = tf.estimator.DNNClassifier(
+    hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list],
+    warm_start_from=ws)
+  ```
+
+  where `ws` could be defined as:
+
+  Warm-start all weights in the model (input layer and hidden weights).
+  Either the directory or a specific checkpoint can be provided (in the case
+  of the former, the latest checkpoint will be used):
+
+  ```
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp")
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000")
+  ```
+
+  Warm-start only the embeddings (input layer):
+
+  ```
+  ws = WarmStartSettings(ckpt_to_initialize_from="/tmp",
+                         vars_to_warm_start=".*input_layer.*")
+  ```
+
+  Warm-start all weights but the embedding parameters corresponding to
+  `sc_vocab_file` have a different vocab from the one used in the current
+  model:
+
+  ```
+  vocab_info = tf.estimator.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt"
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start only `sc_vocab_file` embeddings (and no other variables), which
+  have a different vocab from the one used in the current model:
+
+  ```
+  vocab_info = tf.estimator.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt"
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      vars_to_warm_start=None,
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
+  have a different vocab from the one used in current checkpoint, and only
+  100 of those entries were used:
+
+  ```
+  vocab_info = tf.estimator.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt",
+      old_vocab_size=100
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      })
+  ```
+
+  Warm-start all weights but the parameters corresponding to `sc_vocab_file`
+  have a different vocab from the one used in current checkpoint and the
+  parameters corresponding to `sc_vocab_list` have a different name from the
+  current checkpoint:
+
+  ```
+  vocab_info = tf.estimator.VocabInfo(
+      new_vocab=sc_vocab_file.vocabulary_file,
+      new_vocab_size=sc_vocab_file.vocabulary_size,
+      num_oov_buckets=sc_vocab_file.num_oov_buckets,
+      old_vocab="old_vocab.txt",
+      old_vocab_size=100
+  )
+  ws = WarmStartSettings(
+      ckpt_to_initialize_from="/tmp",
+      var_name_to_vocab_info={
+          "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info
+      },
+      var_name_to_prev_var_name={
+          "input_layer/sc_vocab_list_embedding/embedding_weights":
+              "old_tensor_name"
+      })
+  ```
+
+  Attributes:
+    ckpt_to_initialize_from: [Required] A string specifying the directory with
+      checkpoint file(s) or path to checkpoint from which to warm-start the
+      model parameters.
+    vars_to_warm_start: [Optional] One of the following:  - A regular expression
+      (string) that captures which variables to warm-start (see
+      `tf.get_collection`).  This expression will only consider variables in the
+      `TRAINABLE_VARIABLES` collection. - A list of Variables to warm-start. - A
+      list of strings, each representing a full variable name to warm-start. -
+      `None`, in which case only variables specified in `var_name_to_vocab_info`
+      will be warm-started.  Defaults to `'.*'`, which warm-starts all variables
+      in the `TRAINABLE_VARIABLES` collection.  Note that this excludes
+      variables such as accumulators and moving statistics from batch norm.
+    var_name_to_vocab_info: [Optional] Dict of variable names (strings) to
+      `tf.estimator.VocabInfo`. The variable names should be "full" variables,
+      not the names of the partitions.  If not explicitly provided, the variable
+      is assumed to have no (changes to) vocabulary.
+    var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to
+      name of the previously-trained variable in `ckpt_to_initialize_from`. If
+      not explicitly provided, the name of the variable is assumed to be same
+      between previous checkpoint and current model.
+  """
+
+  def __new__(cls,
+              ckpt_to_initialize_from,
+              vars_to_warm_start='.*',
+              var_name_to_vocab_info=None,
+              var_name_to_prev_var_name=None):
+    if not ckpt_to_initialize_from:
+      raise ValueError(
+          '`ckpt_to_initialize_from` MUST be set in WarmStartSettings')
+    return super(WarmStartSettings, cls).__new__(
+        cls,
+        ckpt_to_initialize_from,
+        vars_to_warm_start,
+        var_name_to_vocab_info or {},
+        var_name_to_prev_var_name or {},
+    )
+
+
+def _get_saved_model_ckpt(saved_model_dir):
+  """Return path to variables checkpoint in a `SavedModel` directory."""
+  if not gfile.Exists(
+      os.path.join(saved_model_utils.get_variables_dir(saved_model_dir),
+                   compat.as_text('variables.index'))):
+    raise ValueError('Directory provided has an invalid SavedModel format: %s'
+                     % saved_model_dir)
+  return saved_model_utils.get_variables_path(saved_model_dir)
+
+
+def _get_default_warm_start_settings(warm_start_from):
+  """Returns default `tf.estimator.WarmStartSettings`.
+
+  Args:
+    warm_start_from: Either a string representing the filepath of a checkpoint
+      or `SavedModel` to initialize from, or an instance of
+      `tf.estimator.WarmStartSettings`.
+
+  Returns:
+    Either None or an instance of `WarmStartSettings`.
+
+  Raises:
+    ValueError: If `warm_start_from` is not `None` but is neither a string nor
+    an
+      instance of `WarmStartSettings`.
+  """
+  if warm_start_from is None:
+    return None
+  if isinstance(warm_start_from, (six.string_types, six.binary_type)):
+    # Infer that this is a SavedModel if export_path +
+    # 'variables/variables.index' exists, and if so, construct the
+    # WarmStartSettings pointing to the variables path
+    # (export_path + 'variables/variables').
+    if gfile.Exists(os.path.join(
+        saved_model_utils.get_variables_dir(warm_start_from),
+        compat.as_text('variables.index'))):
+      logging.info('Warm-starting from a SavedModel')
+      return WarmStartSettings(
+          ckpt_to_initialize_from=saved_model_utils.get_variables_path(
+              warm_start_from))
+    return WarmStartSettings(ckpt_to_initialize_from=warm_start_from)
+  elif isinstance(warm_start_from, WarmStartSettings):
+    return warm_start_from
+  else:
+    raise ValueError('warm_start_from must be a string or a WarmStartSettings, '
+                     'instead got {}'.format(type(warm_start_from)))
diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py
index bfda6591ac..f188f2d4e6 100644
--- a/tensorflow/python/estimator/estimator_lib.py
+++ b/tensorflow/python/estimator/estimator_lib.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""estimator_lib python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Estimator: High level tools for working with models."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import estimator_lib
+# pylint: disable=unused-import,line-too-long,wildcard-import
+from tensorflow.python.estimator.canned.baseline import BaselineClassifier
+from tensorflow.python.estimator.canned.baseline import BaselineRegressor
+from tensorflow.python.estimator.canned.boosted_trees import BoostedTreesClassifier
+from tensorflow.python.estimator.canned.boosted_trees import BoostedTreesRegressor
+from tensorflow.python.estimator.canned.dnn import DNNClassifier
+from tensorflow.python.estimator.canned.dnn import DNNRegressor
+from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedClassifier
+from tensorflow.python.estimator.canned.dnn_linear_combined import DNNLinearCombinedRegressor
+from tensorflow.python.estimator.canned.linear import LinearClassifier
+from tensorflow.python.estimator.canned.linear import LinearRegressor
+from tensorflow.python.estimator.canned.parsing_utils import classifier_parse_example_spec
+from tensorflow.python.estimator.canned.parsing_utils import regressor_parse_example_spec
+from tensorflow.python.estimator.estimator import Estimator
+from tensorflow.python.estimator.estimator import VocabInfo
+from tensorflow.python.estimator.estimator import WarmStartSettings
+from tensorflow.python.estimator.export import export_lib as export
+from tensorflow.python.estimator.exporter import Exporter
+from tensorflow.python.estimator.exporter import FinalExporter
+from tensorflow.python.estimator.exporter import LatestExporter
+from tensorflow.python.estimator.inputs import inputs
+from tensorflow.python.estimator.keras import model_to_estimator
+from tensorflow.python.estimator.model_fn import EstimatorSpec
+from tensorflow.python.estimator.model_fn import ModeKeys
+from tensorflow.python.estimator.run_config import RunConfig
+from tensorflow.python.estimator.training import EvalSpec
+from tensorflow.python.estimator.training import train_and_evaluate
+from tensorflow.python.estimator.training import TrainSpec
 
-# Include attrs that start with single underscore.
-estimator_lib.__all__ = [
-    s for s in dir(estimator_lib) if not s.startswith('__')
-]
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.estimator_lib import *
+# pylint: enable=unused-import,line-too-long,wildcard-import
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
new file mode 100644
index 0000000000..246dfb1a4b
--- /dev/null
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -0,0 +1,3280 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import glob
+import json
+import os
+import tempfile
+
+import numpy as np
+import six
+
+from google.protobuf import text_format
+
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.layers import layers
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.ops.random_ops import random_uniform
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import loader
+from tensorflow.python.saved_model import loader_impl
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.summary import summary
+from tensorflow.python.summary import summary_iterator
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import checkpoint_state_pb2
+from tensorflow.python.training import saver
+from tensorflow.python.training import saver_test_utils
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training
+from tensorflow.python.util import compat
+from tensorflow.python.util import function_utils
+
+_TMP_DIR = '/tmp'
+_ANOTHER_TMP_DIR = '/another_tmp'
+
+
+def dummy_model_fn(features, labels, params):
+  _, _, _ = features, labels, params
+
+
+def summaries_with_matching_keyword(keyword, dir_):
+  """Yields summary protos matching given keyword from event file."""
+
+  writer_cache.FileWriterCache.clear()
+
+  event_paths = glob.glob(os.path.join(dir_, 'events*'))
+  for event in summary_iterator.summary_iterator(event_paths[-1]):
+    if event.summary is not None:
+      for value in event.summary.value:
+        if keyword in value.tag:
+          yield event.summary
+
+
+def check_eventfile_for_keyword(keyword, dir_):
+  """Checks event files for the keyword."""
+  return any(summaries_with_matching_keyword(keyword, dir_))
+
+
+def get_mock_saver():
+  real_saver = saver.Saver()
+  return test.mock.Mock(wraps=real_saver, saver_def=real_saver.saver_def)
+
+
+class EstimatorInheritanceConstraintTest(test.TestCase):
+  """Tests that sub classes cannot override methods of Estimator."""
+
+  def test_override_a_method(self):
+    class _Estimator(estimator.Estimator):
+
+      def __init__(self):
+        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
+
+      def predict(self, input_fn, predict_keys=None, hooks=None):
+        pass
+
+    with self.assertRaisesRegexp(
+        ValueError, 'cannot override members of Estimator.*predict'):
+      _Estimator()
+
+  def test_override_a_method_with_tricks(self):
+    class _Estimator(estimator.Estimator):
+
+      def __init__(self):
+        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
+
+      def _assert_members_are_not_overridden(self):
+        pass  # HAHA! I tricked you!
+
+      def predict(self, input_fn, predict_keys=None, hooks=None):
+        pass
+
+    with self.assertRaisesRegexp(
+        ValueError, 'cannot override members of Estimator.*predict'):
+      _Estimator()
+
+  def test_extension_of_api_is_ok(self):
+    class _Estimator(estimator.Estimator):
+
+      def __init__(self):
+        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
+
+      def predict_proba(self, input_fn, predict_keys=None, hooks=None):
+        pass
+
+    _Estimator()
+
+  def test_override_allowed_method(self):
+    class _Estimator(estimator.Estimator):
+
+      def __init__(self):
+        super(_Estimator, self).__init__(model_fn=dummy_model_fn)
+
+      def _tf_api_names(self):
+        pass
+
+    _Estimator()
+
+
+class EstimatorConstructorTest(test.TestCase):
+
+  def test_config_must_be_a_run_config(self):
+    with self.assertRaisesRegexp(ValueError, 'an instance of `RunConfig`'):
+      estimator.Estimator(model_fn=None, config='NotARunConfig')
+
+  def test_model_fn_must_be_provided(self):
+    with self.assertRaisesRegexp(ValueError, 'model_fn.* must be'):
+      estimator.Estimator(model_fn=None)
+
+  def test_property_accessors(self):
+
+    def model_fn(features, labels, params):
+      _, _, _ = features, labels, params
+
+    class FakeConfig(run_config.RunConfig):
+      pass
+
+    params = {'hidden_layers': [3, 4]}
+    est = estimator.Estimator(
+        model_fn=model_fn, model_dir='bla', config=FakeConfig(), params=params)
+    self.assertTrue(isinstance(est.config, FakeConfig))
+    self.assertEqual(params, est.params)
+    self.assertEqual('bla', est.model_dir)
+
+  def test_default_config(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    est = estimator.Estimator(model_fn=model_fn)
+    self.assertTrue(isinstance(est.config, run_config.RunConfig))
+    self.assertTrue(est._session_config.allow_soft_placement)
+    rewrite_options = est._session_config.graph_options.rewrite_options
+    self.assertEqual(rewrite_options.meta_optimizer_iterations,
+                     rewriter_config_pb2.RewriterConfig.ONE)
+
+  def test_default_model_dir(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
+      est = estimator.Estimator(model_fn=model_fn)
+      self.assertEqual(_TMP_DIR, est.config.model_dir)
+      self.assertEqual(_TMP_DIR, est.model_dir)
+
+  def test_model_dir_in_constructor(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    est = estimator.Estimator(model_fn=model_fn, model_dir=_TMP_DIR)
+    self.assertEqual(_TMP_DIR, est.config.model_dir)
+    self.assertEqual(_TMP_DIR, est.model_dir)
+
+  def test_empty_model_dir(self):
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
+      est = estimator.Estimator(model_fn=model_fn, model_dir='')
+      self.assertEqual(_TMP_DIR, est.config.model_dir)
+      self.assertEqual(_TMP_DIR, est.model_dir)
+
+  def test_model_dir_in_run_config(self):
+
+    class FakeConfig(run_config.RunConfig):
+
+      @property
+      def model_dir(self):
+        return _TMP_DIR
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    est = estimator.Estimator(model_fn=model_fn, config=FakeConfig())
+    self.assertEqual(_TMP_DIR, est.config.model_dir)
+    self.assertEqual(_TMP_DIR, est.model_dir)
+
+  def test_same_model_dir_in_constructor_and_run_config(self):
+
+    class FakeConfig(run_config.RunConfig):
+
+      @property
+      def model_dir(self):
+        return _TMP_DIR
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    est = estimator.Estimator(
+        model_fn=model_fn, config=FakeConfig(), model_dir=_TMP_DIR)
+    self.assertEqual(_TMP_DIR, est.config.model_dir)
+    self.assertEqual(_TMP_DIR, est.model_dir)
+
+  def test_different_model_dir_in_constructor_and_run_config(self):
+
+    class FakeConfig(run_config.RunConfig):
+
+      @property
+      def model_dir(self):
+        return _TMP_DIR
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        '`model_dir` are set both in constructor and `RunConfig`, but '
+        'with different values'):
+      estimator.Estimator(
+          model_fn=model_fn, config=FakeConfig(), model_dir=_ANOTHER_TMP_DIR)
+
+  def test_model_fn_args_must_include_features(self):
+
+    def model_fn(x, labels):
+      _, _ = x, labels
+
+    with self.assertRaisesRegexp(ValueError, 'features'):
+      estimator.Estimator(model_fn=model_fn)
+
+  def test_model_fn_args_labels_is_optional(self):
+
+    def model_fn(features):
+      _ = features
+
+    estimator.Estimator(model_fn=model_fn)
+
+  def test_if_params_provided_then_model_fn_should_accept_it(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    estimator.Estimator(model_fn=model_fn)
+    with self.assertRaisesRegexp(ValueError, 'params'):
+      estimator.Estimator(model_fn=model_fn, params={'hidden_layers': 4})
+
+  def test_internal_params_is_a_deepcopy(self):
+
+    def model_fn(features, labels, params):
+      _, _, _ = features, labels, params
+
+    params = {'hidden_layers': 4}
+    est = estimator.Estimator(model_fn=model_fn, params=params)
+
+    params['hidden_layers'] = 5
+    self.assertEqual(4, est.params['hidden_layers'])
+
+  def test_not_known_model_fn_args(self):
+
+    def model_fn(features, labels, something):
+      _, _, _ = features, labels, something
+
+    with self.assertRaisesRegexp(ValueError, 'something'):
+      estimator.Estimator(model_fn=model_fn)
+
+  def test_not_known_model_fn_args_handled_by_lambda(self):
+    def model_fn(features, labels, something):
+      _, _, _ = features, labels, something
+
+    new_model_fn = lambda features, labels: model_fn(  # pylint: disable=g-long-lambda
+        features, labels, 'something')
+    estimator.Estimator(model_fn=new_model_fn)
+
+  def test_if_model_fn_is_a_member_function_of_a_class(self):
+
+    class ModelFnClass(object):
+
+      def __init__(self):
+        estimator.Estimator(model_fn=self.model_fn)
+
+      def model_fn(self, features, labels, mode):
+        _, _, _ = features, labels, mode
+
+    ModelFnClass()
+
+  def test_model_fn_property_binds_params(self):
+
+    def model_fn(features, labels, mode, config, params):
+      _, _, _, _, _ = features, labels, mode, config, params
+
+    est = estimator.Estimator(model_fn=model_fn)
+    model_fn_args = function_utils.fn_args(est.model_fn)
+    self.assertEqual(
+        set(['features', 'labels', 'mode', 'config']), set(model_fn_args))
+
+  def test_model_fn_property_returns_fixed_signature(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+
+    est = estimator.Estimator(model_fn=model_fn)
+    model_fn_args = function_utils.fn_args(est.model_fn)
+    self.assertEqual(
+        set(['features', 'labels', 'mode', 'config']), set(model_fn_args))
+
+
+def dummy_input_fn():
+  return ({'x': constant_op.constant([[1], [1]])},
+          constant_op.constant([[1], [1]]))
+
+
+def model_fn_global_step_incrementer(features, labels, mode):
+  _, _ = features, labels
+  global_step = training.get_global_step()
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      loss=constant_op.constant(1.),
+      train_op=state_ops.assign_add(global_step, 1))
+
+
+def assert_features_op(expected_features, actual_features):
+  return [
+      check_ops.assert_equal(
+          expected_features[k], actual_features[k], name='assert_%s' % k)
+      for k in expected_features
+  ]
+
+
+def _estimator_spec(
+    expected_features, expected_labels, actual_features, actual_labels, mode):
+  assert_ops = tuple(
+      assert_features_op(expected_features, actual_features) + [
+          check_ops.assert_equal(
+              expected_labels, actual_labels, name='assert_labels')
+      ])
+  global_step = training.get_global_step()
+  with ops.control_dependencies(assert_ops):
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        predictions=constant_op.constant(0.),
+        loss=constant_op.constant(0.),
+        train_op=state_ops.assign_add(global_step, 1))
+
+
+def _make_input_fn(features, labels):
+  def _input_fn():
+    return {
+        k: constant_op.constant(v)
+        for k, v in six.iteritems(features)
+    }, constant_op.constant(labels)
+  return _input_fn
+
+
+class EstimatorTrainTest(test.TestCase):
+
+  def test_callable_model_fn(self):
+    expected_features = {'x': 42., 'y': 43.}
+    expected_labels = 44.
+
+    model_fn_call_count = [0]
+
+    test_self = self
+
+    class ModelFn(object):
+
+      def __call__(self, features, labels):
+        model_fn_call_count[0] += 1
+        test_self.assertItemsEqual(expected_features.keys(), features.keys())
+        return _estimator_spec(
+            expected_features, expected_labels, features, labels,
+            model_fn_lib.ModeKeys.TRAIN)
+
+    with self.assertRaisesRegexp(ValueError, 'does not include params'):
+      estimator.Estimator(model_fn=ModelFn(), params={'a': 'b'})
+    est = estimator.Estimator(model_fn=ModelFn(), config=run_config.RunConfig())
+    self.assertEqual(0, model_fn_call_count[0])
+    est.train(
+        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
+    self.assertEqual(1, model_fn_call_count[0])
+
+  def test_callable_input_fn(self):
+    expected_mode = model_fn_lib.ModeKeys.TRAIN
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    test_self = self
+
+    class InputFn(object):
+
+      def __call__(self, mode, params, config):
+        input_fn_call_count[0] += 1
+        test_self.assertEqual(expected_mode, mode)
+        test_self.assertEqual(expected_params, params)
+        test_self.assertEqual(4321, config.tf_random_seed)
+        return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.train(InputFn(), steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
+  def test_nested_input_fn(self):
+    expected_params = {'batch_size': 10}
+
+    def _input_fn():
+      dataset_features = dataset_ops.Dataset.from_tensor_slices(
+          (random_uniform([4]),
+           random_uniform([4, 100], maxval=100, dtype=dtypes.int32)))
+      dataset_labels = dataset_ops.Dataset.from_tensor_slices(
+          random_uniform([4, 10]))
+      dataset = dataset_ops.Dataset.zip((dataset_features, dataset_labels))
+      dataset = dataset.repeat(-1)
+      iterator = dataset.make_initializable_iterator()
+      return iterator.get_next()
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    est = estimator.Estimator(
+        model_fn=_model_fn, params=expected_params, config=expected_config)
+    est.train(_input_fn, steps=4)
+
+  def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.TRAIN
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    def _input_fn(mode, params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.train(_input_fn, steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
+  def test_minimal_model_fn_args(self):
+    expected_features = {'x': 4, 'y': 5}
+
+    def _input_fn():
+      return expected_features
+
+    model_fn_call_count = [0]
+    def _model_fn(features):
+      model_fn_call_count[0] += 1
+      self.assertItemsEqual(expected_features.keys(), features.keys())
+      with ops.control_dependencies(
+          assert_features_op(expected_features, features)):
+        return model_fn_lib.EstimatorSpec(
+            mode=None,
+            predictions=constant_op.constant(0.),
+            loss=constant_op.constant(0.),
+            train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    self.assertEqual(0, model_fn_call_count[0])
+    est.train(input_fn=_input_fn, steps=1)
+    self.assertEqual(1, model_fn_call_count[0])
+
+  def test_labels_should_be_none_if_model_fn_does_not_use_labels(self):
+
+    def _input_fn_with_labels():
+      return {'x': 4, 'y': 5}, [4]
+
+    def _model_fn(features):
+      _ = features
+      return model_fn_lib.EstimatorSpec(
+          mode=None,
+          predictions=constant_op.constant(0.),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    with self.assertRaisesRegexp(ValueError, 'model_fn does not take labels'):
+      est.train(input_fn=_input_fn_with_labels, steps=1)
+
+  def test_input_fn_len_should_be_2_if_tuple_or_list(self):
+
+    def _input_fn():
+      return 4, 5, 6
+
+    def _model_fn(features):
+      _ = features
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    with self.assertRaisesRegexp(ValueError, 'len 2 tuple'):
+      est.train(input_fn=_input_fn, steps=1)
+
+  def test_all_model_fn_args(self):
+    expected_features = {'x': 42., 'y': 43.}
+    expected_labels = 44.
+    expected_params = {'some_param': 'some_value'}
+    expected_config = run_config.RunConfig()
+    expected_config.i_am_test = True
+
+    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
+    # doesn't work with mock fns.
+    model_fn_call_count = [0]
+
+    # Note that args are all passed by keyword, so can be in any order.
+    def _model_fn(mode, params, features, labels, config):
+      model_fn_call_count[0] += 1
+      self.assertItemsEqual(expected_features.keys(), features.keys())
+      self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)
+      self.assertEqual(expected_params, params)
+      self.assertTrue(config.i_am_test)
+      return _estimator_spec(
+          expected_features, expected_labels, features, labels, mode)
+
+    est = estimator.Estimator(
+        model_fn=_model_fn, params=expected_params, config=expected_config)
+    self.assertEqual(0, model_fn_call_count[0])
+    est.train(
+        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
+    self.assertEqual(1, model_fn_call_count[0])
+
+  def test_partial_model_fn_args(self):
+    expected_features = {'x': 42., 'y': 43.}
+    expected_labels = 44.
+    expected_params = {'some_param': 'some_value'}
+    expected_config = run_config.RunConfig()
+    expected_config.i_am_test = True
+    expected_foo = 45.
+    expected_bar = 46.
+
+    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
+    # doesn't work with mock fns.
+    model_fn_call_count = [0]
+
+    def _model_fn(features, labels, foo, mode, params, config, bar):
+      model_fn_call_count[0] += 1
+      self.assertEqual(expected_foo, foo)
+      self.assertEqual(expected_bar, bar)
+      self.assertItemsEqual(expected_features.keys(), features.keys())
+      self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode)
+      self.assertEqual(expected_params, params)
+      self.assertTrue(config.i_am_test)
+      return _estimator_spec(
+          expected_features, expected_labels, features, labels, mode)
+    partial_model_fn = functools.partial(
+        _model_fn, foo=expected_foo, bar=expected_bar)
+
+    est = estimator.Estimator(
+        model_fn=partial_model_fn, params=expected_params,
+        config=expected_config)
+    self.assertEqual(0, model_fn_call_count[0])
+    est.train(
+        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
+    self.assertEqual(1, model_fn_call_count[0])
+
+  def test_model_fn_must_return_estimator_spec(self):
+
+    def model_fn(features, labels):
+      _, _ = features, labels
+      return 'NotGoodNotGood'
+
+    est = estimator.Estimator(model_fn=model_fn)
+    with self.assertRaisesRegexp(ValueError, 'EstimatorSpec'):
+      est.train(dummy_input_fn, steps=1)
+
+  def test_run_train_op_and_saves_at_the_end(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, steps=5)
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
+
+  def test_loss_summary(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer,
+                              config=run_config.RunConfig(save_summary_steps=1))
+    est.train(dummy_input_fn, steps=1)
+
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
+
+    if check_eventfile_for_keyword('loss', est.model_dir):
+      return
+    self.fail('{} should be part of reported summaries.'.format('loss'))
+
+  def test_latest_checkpoint(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    self.assertIsNone(est.latest_checkpoint())
+    est.train(dummy_input_fn, steps=5)
+    self.assertIsNotNone(est.latest_checkpoint())
+    self.assertTrue(est.latest_checkpoint().startswith(est.model_dir))
+
+  def test_steps_and_saves_reloads(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, steps=5)
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
+    est.train(dummy_input_fn, steps=5)
+    self.assertEqual(
+        10, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
+
+  def test_warm_starts(self):
+    def _make_model_fn(x):
+      def _variable_creating_model_fn(features, labels, mode):
+        _, _ = features, labels
+        variable_scope.get_variable('x', initializer=x)
+        global_step = training.get_global_step()
+        return model_fn_lib.EstimatorSpec(
+            mode,
+            loss=constant_op.constant(1.),
+            train_op=state_ops.assign_add(global_step, 1))
+      return _variable_creating_model_fn
+
+    est = estimator.Estimator(model_fn=_make_model_fn(42.))
+    est.train(dummy_input_fn, steps=10)
+
+    warm_started_est = estimator.Estimator(
+        model_fn=_make_model_fn(36.),
+        warm_start_from=est.model_dir)
+    warm_started_est.train(dummy_input_fn, steps=5)
+    # warm_start is called after the model_fn, so x should have the value
+    # from the checkpoint.
+    self.assertEqual(42., warm_started_est.get_variable_value('x'))
+    # global_step should not be warm-started.
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(
+            warm_started_est.model_dir))
+
+  def test_warm_starts_from_savedmodel(self):
+    def _make_model_fn(x):
+      def _variable_creating_and_export_model_fn(features, labels, mode):
+        _, _ = features, labels
+        variable_scope.get_variable('x', initializer=x)
+        global_step = training.get_global_step()
+        return model_fn_lib.EstimatorSpec(
+            mode,
+            predictions={'y': constant_op.constant(1.0)},
+            loss=constant_op.constant(1.),
+            train_op=state_ops.assign_add(global_step, 1),
+            export_outputs={'test': export_output.ClassificationOutput(
+                constant_op.constant([4.2]), constant_op.constant(['label']))})
+      return _variable_creating_and_export_model_fn
+
+    est = estimator.Estimator(model_fn=_make_model_fn(42.))
+    est.train(dummy_input_fn, steps=10)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    tmpdir = tempfile.mkdtemp()
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn)
+
+    warm_started_est = estimator.Estimator(
+        model_fn=_make_model_fn(36.),
+        warm_start_from=export_dir)
+    warm_started_est.train(dummy_input_fn, steps=5)
+    # warm_start is called after the model_fn, so x should have the value
+    # from the SavedModel.
+    self.assertEqual(42., warm_started_est.get_variable_value('x'))
+
+  def test_max_step(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, max_steps=5)
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
+    est.train(dummy_input_fn, max_steps=5)
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(est.model_dir))
+
+  def test_checkpoint_contains_relative_paths(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(
+        model_dir=tmpdir,
+        model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, steps=5)
+
+    checkpoint_file_content = file_io.read_file_to_string(
+        os.path.join(tmpdir, 'checkpoint'))
+    ckpt = checkpoint_state_pb2.CheckpointState()
+    text_format.Merge(checkpoint_file_content, ckpt)
+    self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5')
+    # TODO(b/78461127): Please modify tests to not directly rely on names of
+    # checkpoints.
+    self.assertAllEqual(
+        ['model.ckpt-0', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths)
+
+  def test_train_save_copy_reload(self):
+    tmpdir = tempfile.mkdtemp()
+    model_dir1 = os.path.join(tmpdir, 'model_dir1')
+    est1 = estimator.Estimator(
+        model_dir=model_dir1,
+        model_fn=model_fn_global_step_incrementer)
+    est1.train(dummy_input_fn, steps=5)
+
+    # We have to clear the cache before we can rename the directory,
+    # otherwise open file handles will prevent the delete on Windows.
+    writer_cache.FileWriterCache.clear()
+    model_dir2 = os.path.join(tmpdir, 'model_dir2')
+    os.renames(model_dir1, model_dir2)
+
+    est2 = estimator.Estimator(
+        model_dir=model_dir2,
+        model_fn=model_fn_global_step_incrementer)
+    self.assertEqual(
+        5, estimator._load_global_step_from_checkpoint_dir(est2.model_dir))
+    est2.train(dummy_input_fn, steps=5)
+    self.assertEqual(
+        10, estimator._load_global_step_from_checkpoint_dir(est2.model_dir))
+
+  def test_steps0_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
+      est.train(dummy_input_fn, steps=0)
+
+  def test_steps_negative_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
+      est.train(dummy_input_fn, steps=-1)
+
+  def test_max_steps0_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'):
+      est.train(dummy_input_fn, max_steps=0)
+
+  def test_max_steps_negative_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    with self.assertRaisesRegexp(ValueError, 'Must specify max_steps > 0'):
+      est.train(dummy_input_fn, max_steps=-1)
+
+  def test_scaffold_is_used(self):
+    self.is_init_fn_called = False
+
+    def _init_fn(scaffold, sess):
+      _, _ = scaffold, sess
+      self.is_init_fn_called = True
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(init_fn=_init_fn))
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    self.assertTrue(self.is_init_fn_called)
+
+  def test_hooks_should_be_session_run_hook(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
+      est.train(dummy_input_fn, steps=1, hooks=['NotAHook'])
+
+  def test_training_hooks_are_used(self):
+    chief_hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+    hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+
+    def _model_fn_hooks(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          training_chief_hooks=[chief_hook],
+          training_hooks=[hook])
+
+    est = estimator.Estimator(model_fn=_model_fn_hooks)
+    self.assertFalse(chief_hook.begin.called)
+    self.assertFalse(hook.begin.called)
+    est.train(dummy_input_fn, steps=1)
+    self.assertTrue(chief_hook.begin.called)
+    self.assertTrue(hook.begin.called)
+
+  def test_saving_listeners_are_used(self):
+    listener = test.mock.Mock(spec=training.CheckpointSaverListener)
+    listener.after_save.return_value = None
+    est = estimator.Estimator(
+        model_fn=model_fn_global_step_incrementer,
+        config=run_config.RunConfig(save_checkpoints_steps=10))
+    est.train(dummy_input_fn, steps=26, saving_listeners=[listener])
+    self.assertEqual(4, listener.before_save.call_count)
+    self.assertEqual(4, listener.after_save.call_count)
+
+  def test_saver_hook_should_exist_to_use_saving_listeners(self):
+    listener = test.mock.Mock(spec=training.CheckpointSaverListener)
+    est = estimator.Estimator(
+        model_fn=model_fn_global_step_incrementer,
+        config=run_config.RunConfig(save_checkpoints_steps=None,
+                                    save_checkpoints_secs=None))
+    with self.assertRaisesRegexp(
+        ValueError, 'CheckpointSaverHook to use saving_listeners'):
+      est.train(dummy_input_fn, steps=1, saving_listeners=[listener])
+
+  def test_listeners_should_be_listeners(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    with self.assertRaisesRegexp(
+        TypeError, 'must be a list of CheckpointSaverListener'):
+      est.train(dummy_input_fn, steps=1, saving_listeners=['not-a-listener'])
+
+  def test_chief_only_hook_should_not_be_called_on_non_chief(self):
+    chief_hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+    hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+
+    def _model_fn_hooks(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          training_chief_hooks=[chief_hook],
+          training_hooks=[hook])
+
+    class NonChiefRunConfig(run_config.RunConfig):
+      @property
+      def is_chief(self):  # pylint: disable=g-wrong-blank-lines
+        return False
+
+    # Mocking the SessionManager.wait_for_session, so that worker doesn't wait
+    # for chief.
+    def get_initialized_session(*args, **kwargs):
+      # Session doesn't take 'max_wait_secs' argument.
+      kwargs.pop('max_wait_secs', None)
+      scaffold = training.Scaffold().finalize()
+      sess = session.Session(*args, **kwargs)
+      sess.run(scaffold.init_op)
+      return sess
+
+    with test.mock.patch.object(
+        training.SessionManager,
+        'wait_for_session',
+        side_effect=get_initialized_session):
+      est = estimator.Estimator(
+          model_fn=_model_fn_hooks, config=NonChiefRunConfig())
+      self.assertFalse(chief_hook.begin.called)
+      self.assertFalse(hook.begin.called)
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(chief_hook.begin.called)
+      self.assertTrue(hook.begin.called)
+
+  def test_features_labels_mode(self):
+    given_features = {'test-features': [[1], [1]]}
+    given_labels = {'test-labels': [[1], [1]]}
+
+    def _input_fn():
+      return given_features, given_labels
+
+    def _model_fn(features, labels, mode):
+      self.features, self.labels, self.mode = features, labels, mode
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    self.assertEqual(given_features, self.features)
+    self.assertEqual(given_labels, self.labels)
+    self.assertEqual(model_fn_lib.ModeKeys.TRAIN, self.mode)
+
+  def test_graph_initialization_global_step_and_random_seed(self):
+    expected_random_seed = run_config.RunConfig().tf_random_seed
+    def _model_fn(features, labels, mode):
+      _, _, _ = features, labels, mode
+      self.assertIsNotNone(training.get_global_step())
+      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+
+  def test_config_should_not_be_evaluator_or_ps(self):
+
+    class FakeEvaluatorConfig(run_config.RunConfig):
+
+      @property
+      def task_type(self):
+        return run_config.TaskType.EVALUATOR
+
+    est = estimator.Estimator(
+        model_fn=dummy_model_fn, config=FakeEvaluatorConfig())
+    with self.assertRaisesRegexp(ValueError, 'train_and_evaluate'):
+      est.train(dummy_input_fn, steps=1)
+
+  def test_master_distributed_hooks(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_0(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertTrue(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_distributed_hooks_for_worker_nonzero(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.PS: ['localhost:1234'],
+            run_config.TaskType.WORKER: ['localhost:1235', 'localhost:1237'],
+            run_config.TaskType.MASTER: ['localhost:1236']
+        },
+        'task': {
+            'type': run_config.TaskType.WORKER,
+            'index': 1
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig())
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
+
+
+def _model_fn_with_eval_metric_ops(features, labels, mode, params):
+  _, _ = features, labels
+  global_step = training.get_global_step()
+  loss = constant_op.constant(1.)
+  metric_name_1 = params.get('metric_name') or 'metric'
+  metric_value_1 = params.get('metric_value') or 2.
+  metric_name_2 = params.get('metric_name_2') or 'metric2'
+  metric_value_2 = params.get('metric_value_2') or 2.
+
+  metric_update_op = loss.op
+  metric_tensor = control_flow_ops.with_dependencies(
+      [metric_update_op], constant_op.constant(metric_value_1))
+
+  mean = metrics_module.Mean()
+  mean.update_state(metric_value_2)
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      loss=loss,
+      predictions={'predictions': constant_op.constant(1.)},
+      train_op=state_ops.assign_add(global_step, 1),
+      eval_metric_ops={
+          metric_name_1: (metric_tensor, metric_update_op),
+          metric_name_2: mean,
+      })
+
+
+class _StepCounterHook(session_run_hook.SessionRunHook):
+  """Hooks that counts the number of times it is called."""
+
+  def __init__(self):
+    self._steps = 0
+
+  def before_run(self, run_context):
+    del run_context
+    self._steps += 1
+
+  @property
+  def steps(self):
+    return self._steps
+
+
+class EstimatorGetVariablesTest(test.TestCase):
+
+  def test_model_should_be_trained(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='one')
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    with self.assertRaisesRegexp(ValueError, 'not find trained model'):
+      est.get_variable_names()
+    with self.assertRaisesRegexp(ValueError, 'not find trained model'):
+      est.get_variable_value('one')
+
+  def test_get_variable_utils(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='one')
+      variables.VariableV1(3., name='three')
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    self.assertEqual(
+        set(['one', 'three', 'global_step']), set(est.get_variable_names()))
+    self.assertEqual(1., est.get_variable_value('one'))
+    self.assertEqual(3., est.get_variable_value('three'))
+
+
+class EstimatorDatasetIntegrationTest(test.TestCase):
+  """Tests dataset integration."""
+
+  def test_returned_by_input_fn(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors(([1.], [2.]))
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features + labels,  # 1 + 2
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(3., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_none_labels(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([7.])
+
+    def _model_fn(features, labels, mode):
+      self.assertIsNone(labels)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=features,  # 7
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    scores = est.evaluate(_input_fn, steps=1)
+    self.assertEqual(7., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_with_predict(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensors([10.])
+
+    def _model_fn(features, labels, mode):
+      _ = labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,  # 10
+          loss=features,  # 10
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    self.assertEqual([10.], next(est.predict(input_fn=_input_fn)))
+
+  def test_batching(self):
+
+    def _input_fn():
+      return dataset_ops.Dataset.from_tensor_slices(([[1.], [2.]],
+                                                     [[10.], [20.]])).batch(1)
+
+    def _model_fn(features, labels, mode):
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=features,
+          loss=features + (0 if labels is None else labels),  # 11, 22
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn)
+    scores = est.evaluate(_input_fn)
+    # (11 + 22)/2 = 16.5
+    self.assertEqual(16.5, scores[model_fn_lib.LOSS_METRIC_KEY])
+    self.assertEqual([1., 2.], list(est.predict(_input_fn)))
+
+
+class EstimatorEvaluateTest(test.TestCase):
+
+  def test_eval_dir(self):
+    est = estimator.Estimator(
+        model_fn=model_fn_global_step_incrementer,
+        model_dir='some_path')
+    expected_eval_dir = os.path.join('some_path', 'eval')
+    self.assertEqual(expected_eval_dir, est.eval_dir())
+    expected_eval_dir_name = os.path.join('some_path', 'eval_a_name')
+    self.assertEqual(expected_eval_dir_name, est.eval_dir('a_name'))
+
+  def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.EVAL
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    def _input_fn(mode, params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    est.train(dummy_input_fn, steps=1)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.evaluate(_input_fn, steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
+  def test_model_fn_must_return_estimator_spec(self):
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      if mode == model_fn_lib.ModeKeys.EVAL:
+        return 'NotGoodNotGood'
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(
+        ValueError, 'model_fn should return an EstimatorSpec'):
+      est.evaluate(dummy_input_fn, steps=1)
+
+  def test_no_checkpoint_uses_init(self):
+    def _model_fn(features, labels, mode, params):
+      del features, labels, params
+      mean = metrics_module.Mean()
+      mean.update_state(variables.VariableV1(2.) + 1)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(1.),
+          eval_metric_ops={
+              'mean1': mean,
+              'mean2': metrics_lib.mean(variables.VariableV1(2.) + 1)
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    scores = est.evaluate(dummy_input_fn, steps=1)
+    # Metric value here is set to 1 + the value of the Variable that is newly
+    # initialized (since there is no checkpoint).
+    self.assertEqual(3., scores['mean1'])
+    self.assertEqual(3., scores['mean2'])
+
+  def test_no_checkpoint_uses_init_with_warm_starting(self):
+    def _make_model_fn(x):
+      def _variable_creating_and_export_model_fn(features, labels, mode):
+        _, _ = features, labels
+        x_var = variable_scope.get_variable('x', initializer=x)
+        global_step = training.get_global_step()
+        mean = metrics_module.Mean()
+        mean.update_state(x_var + 1)
+        return model_fn_lib.EstimatorSpec(
+            mode,
+            predictions={'y': constant_op.constant(1.0)},
+            loss=constant_op.constant(1.),
+            eval_metric_ops={
+                'mean1': mean,
+                'mean2': metrics_lib.mean(x_var + 1)
+            },
+            train_op=state_ops.assign_add(global_step, 1),
+            export_outputs={
+                'test':
+                    export_output.ClassificationOutput(
+                        constant_op.constant([4.2]),
+                        constant_op.constant(['label']))
+            })
+
+      return _variable_creating_and_export_model_fn
+
+    first_est = estimator.Estimator(model_fn=_make_model_fn(42.))
+    first_est.train(dummy_input_fn, steps=10)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    tmpdir = tempfile.mkdtemp()
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    exported_path = first_est.export_savedmodel(export_dir_base,
+                                                serving_input_receiver_fn)
+
+    # Test that we can pass either warm_start_from as an external checkpoint
+    # or an exported SavedModel.
+    est = estimator.Estimator(model_fn=_make_model_fn(52.),
+                              warm_start_from=exported_path)
+    eval_metrics = est.evaluate(dummy_input_fn, steps=1)
+    # Metric value here is set to 1 + the value of the Variable that is
+    # warm-started from the SavedModel of the first model (42.), as opposed to
+    # the initialization in the new model_fn (52.).
+    self.assertEqual(43., eval_metrics['mean1'])
+    self.assertEqual(43., eval_metrics['mean2'])
+
+    est = estimator.Estimator(model_fn=_make_model_fn(62.),
+                              warm_start_from=first_est.model_dir)
+    eval_metrics = est.evaluate(dummy_input_fn, steps=1)
+    # Metric value here is set to 1 + the value of the Variable that is
+    # warm-started from a checkpoint of the first model (42.), as opposed to
+    # the initialization in the new model_fn (52.).
+    self.assertEqual(43., eval_metrics['mean1'])
+    self.assertEqual(43., eval_metrics['mean2'])
+
+  def test_scores(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params={
+            'metric_name': 'metric',
+            'metric_value': 2.,
+            'metric_name_2': 'metric2',
+            'metric_value_2': 3.,
+        })
+    est.train(dummy_input_fn, steps=5)
+    scores = est.evaluate(dummy_input_fn, steps=1)
+    self.assertIn('metric', scores)
+    self.assertAlmostEqual(2., scores['metric'])
+    self.assertIn('metric2', scores)
+    self.assertAlmostEqual(3., scores['metric2'])
+
+  def test_tuple_metrics(self):
+    def _model_fn(features, labels, mode):
+      del features  # unused
+      del labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          loss=constant_op.constant(1.),
+          eval_metric_ops={
+              'nested_metric': (
+                  ((constant_op.constant(2.), constant_op.constant(1)),
+                   constant_op.constant(3., dtype=dtypes.float64)),
+                  control_flow_ops.no_op())})
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    evaluation = est.evaluate(dummy_input_fn, steps=1)
+    ((two_float, one_integer), three_double) = evaluation['nested_metric']
+    self.assertAlmostEqual(2., two_float)
+    self.assertEqual(1, one_integer)
+    self.assertAlmostEqual(3., three_double)
+
+  def test_steps0_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    est.train(dummy_input_fn, steps=5)
+    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
+      est.evaluate(dummy_input_fn, steps=0)
+
+  def test_steps_negative_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops)
+    est.train(dummy_input_fn, steps=5)
+    with self.assertRaisesRegexp(ValueError, 'Must specify steps > 0'):
+      est.evaluate(dummy_input_fn, steps=-1)
+
+  def test_global_step_metric_raises_error(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params={
+            'metric_name': 'global_step',
+            'metric_value': 2.})
+    est.train(dummy_input_fn, steps=5)
+    with self.assertRaisesRegexp(
+        ValueError, 'Metric with name `global_step` is not allowed'):
+      est.evaluate(dummy_input_fn, steps=1)
+
+  def test_global_step_is_reported(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params={
+            'metric_name': 'metric',
+            'metric_value': 2.,
+            'metric_name_2': 'metric2',
+            'metric_value_2': 3.,
+        })
+    est.train(dummy_input_fn, steps=5)
+    scores = est.evaluate(dummy_input_fn, steps=1)
+    self.assertIn('global_step', scores)
+    self.assertEqual(5, scores['global_step'])
+
+  def test_loss_metric_is_reported(self):
+
+    def _model_fn_with_incremental_loss(features, labels, mode):
+      _, _ = features, labels
+      local_weight = variables.VariableV1(
+          0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      # Loss will be 2, 4, 6, ...
+      loss = 2 * state_ops.assign_add(local_weight, 1.)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=loss,
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn_with_incremental_loss)
+    est.train(dummy_input_fn, steps=1)
+    scores = est.evaluate(dummy_input_fn, steps=5)
+    self.assertIn(model_fn_lib.LOSS_METRIC_KEY, scores)
+    # Average loss will be (2 + 4 + 6 + 8 + 10)/5=6
+    self.assertAlmostEqual(6., scores[model_fn_lib.LOSS_METRIC_KEY])
+
+  def test_hooks_should_be_session_run_hook(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
+      est.evaluate(dummy_input_fn, steps=5, hooks=['NotAHook'])
+
+  def test_hooks_are_used(self):
+    step_counter_hook = _StepCounterHook()
+
+    est = estimator.Estimator(model_fn=_model_fn_with_eval_metric_ops)
+    est.train(dummy_input_fn, steps=1)
+    est.evaluate(dummy_input_fn, steps=5, hooks=[step_counter_hook])
+    self.assertEqual(5, step_counter_hook.steps)
+
+  def test_evaluate_from_checkpoint(self):
+    params = {
+        'metric_name': 'metric',
+        'metric_value': 2.,
+        'metric_name_2': 'metric2',
+        'metric_value_2': 3.,
+    }
+    est1 = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params=params)
+    est1.train(dummy_input_fn, steps=5)
+    est2 = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params=params)
+    scores = est2.evaluate(
+        dummy_input_fn, steps=1, checkpoint_path=est1.latest_checkpoint())
+    self.assertEqual(5, scores['global_step'])
+
+  def test_wrong_shape_throws_reasonable_error(self):
+    """Make sure we are helpful when model_fns change. See b/110263146."""
+    def _get_model_fn(val=1):
+      def _model_fn(features, labels, mode):
+        del features, labels  # unused
+        variables.VariableV1(val, name='weight')
+        return model_fn_lib.EstimatorSpec(
+            mode=mode,
+            predictions=constant_op.constant([[1.]]),
+            loss=constant_op.constant(0.),
+            train_op=state_ops.assign_add(training.get_global_step(), 1))
+      return _model_fn
+
+    model_fn_1 = _get_model_fn()
+    model_fn_2 = _get_model_fn(val=[1])
+
+    est1 = estimator.Estimator(model_fn=model_fn_1)
+    est1.train(dummy_input_fn, steps=5)
+    est2 = estimator.Estimator(
+        model_fn=model_fn_2, model_dir=est1.model_dir)
+
+    expected_msg = 'Restoring from checkpoint failed.*a mismatch between'
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, expected_msg):
+      est2.train(dummy_input_fn, steps=1,)
+
+  def test_scaffold_is_used(self):
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='weight')
+      self.mock_saver = get_mock_saver()
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(saver=self.mock_saver))
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    est.evaluate(dummy_input_fn, steps=1)
+    self.assertTrue(self.mock_saver.restore.called)
+
+  def test_features_labels_mode(self):
+    given_features = {'test-features': [[1], [1]]}
+    given_labels = {'test-labels': [[1], [1]]}
+
+    def _input_fn():
+      return given_features, given_labels
+
+    def _model_fn(features, labels, mode):
+      self.features, self.labels, self.mode = features, labels, mode
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    est.evaluate(_input_fn, steps=1)
+    self.assertEqual(given_features, self.features)
+    self.assertEqual(given_labels, self.labels)
+    self.assertEqual(model_fn_lib.ModeKeys.EVAL, self.mode)
+
+  def test_graph_initialization_global_step_and_random_seed(self):
+    expected_random_seed = run_config.RunConfig().tf_random_seed
+    def _model_fn(features, labels, mode):
+      _, _, _ = features, labels, mode
+      self.assertIsNotNone(training.get_global_step())
+      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    est.evaluate(dummy_input_fn, steps=1)
+
+  def test_evaluation_hooks_are_used(self):
+    hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+
+    def _model_fn_hooks(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          evaluation_hooks=[hook])
+
+    est = estimator.Estimator(model_fn=_model_fn_hooks)
+    est.train(dummy_input_fn, steps=1)
+    self.assertFalse(hook.begin.called)
+    est.evaluate(dummy_input_fn, steps=1)
+    self.assertTrue(hook.begin.called)
+
+  def test_summary_writing_with_summary_proto(self):
+
+    def model_fn_global_step_incrementer_image(features, labels, mode):
+      _, _ = features, labels
+      global_step = training.get_global_step()
+
+      image = array_ops.zeros([5, 3, 3, 1])
+      eval_metric_ops = {
+          'foo': (summary.image('image', image, max_outputs=3),
+                  constant_op.constant(1))
+      }
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(global_step, 1),
+          eval_metric_ops=eval_metric_ops)
+
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer_image,
+                              config=run_config.RunConfig(save_summary_steps=1))
+    est.train(dummy_input_fn, steps=200)
+    est.evaluate(
+        input_fn=dummy_input_fn,
+        steps=200,
+    )
+
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
+
+    # Get last evaluation Event written.
+    for key in ['foo/0', 'foo/1', 'foo/2']:
+      self.assertTrue(
+          check_eventfile_for_keyword(key, est.eval_dir()),
+          '{} should be part of reported summaries.'.format(key))
+
+    # Verify that evaluated checkpoint path is written to event file.
+    checkpoint_path_tag = 'checkpoint_path'
+    self.assertTrue(
+        check_eventfile_for_keyword(checkpoint_path_tag, est.eval_dir()),
+        '{} should be part of reported summaries.'.format(checkpoint_path_tag))
+
+    expected_tensor_proto = tensor_util.make_tensor_proto(
+        est.latest_checkpoint(), dtype=dtypes.string)
+    summaries = summaries_with_matching_keyword(checkpoint_path_tag,
+                                                est.eval_dir())
+    self.assertProtoEquals(expected_tensor_proto,
+                           next(summaries).value[0].tensor)
+
+  def test_summary_writing_with_tensor(self):
+
+    def model_fn_with_prediction_mean_tensor_eval_metric_ops(
+        features, labels, mode, params):
+      _, _ = features, labels
+      global_step = training.get_global_step()
+
+      metric_name = params.get('metric_name') or 'metric'
+      predictions = constant_op.constant([1., .5, 0.])
+      eval_metric_ops = {metric_name: metrics_lib.mean_tensor(predictions)}
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(1.),
+          predictions={'predictions': predictions},
+          train_op=state_ops.assign_add(global_step, 1),
+          eval_metric_ops=eval_metric_ops)
+
+    metric_key = 'PMT'
+    params = {
+        'metric_name': metric_key,
+    }
+    est = estimator.Estimator(
+        model_fn=model_fn_with_prediction_mean_tensor_eval_metric_ops,
+        params=params,
+        config=run_config.RunConfig(save_summary_steps=1))
+    est.train(input_fn=dummy_input_fn, steps=10)
+    est.evaluate(
+        input_fn=dummy_input_fn,
+        steps=10,
+    )
+
+    writer_cache.FileWriterCache.clear()
+
+    self.assertTrue(
+        check_eventfile_for_keyword(metric_key, est.eval_dir()),
+        '{} should be part of reported summaries.'.format(metric_key))
+
+    summaries = summaries_with_matching_keyword(metric_key, est.eval_dir())
+    for value in next(summaries).value:
+      if value.tag == metric_key:
+        self.assertTrue(value.HasField('tensor'))
+
+
+class EstimatorPredictTest(test.TestCase):
+
+  def test_input_fn_args(self):
+    expected_mode = model_fn_lib.ModeKeys.PREDICT
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del features, labels, params, config
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    def _input_fn(mode, params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_mode, mode)
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    est.train(dummy_input_fn, steps=1)
+    self.assertEqual(0, input_fn_call_count[0])
+    next(est.predict(_input_fn))
+    self.assertEqual(1, input_fn_call_count[0])
+
+  def test_no_checkpoint_uses_init(self):
+    def _model_fn(features, labels, mode, params, config):
+      del features, labels, params, config
+      x = variables.VariableV1([[3.]], name='x')
+      return model_fn_lib.EstimatorSpec(mode, predictions=math_ops.add(x, 1.))
+    est = estimator.Estimator(model_fn=_model_fn)
+    # Expected prediction value is 1 + the value of the Variable that is newly
+    # initialized (since there is no checkpoint).
+    self.assertEqual(4., next(est.predict(dummy_input_fn)))
+
+  def test_no_checkpoint_uses_init_with_warm_starting(self):
+    def _make_model_fn(x):
+      def _variable_creating_and_export_model_fn(features, labels, mode):
+        _, _ = features, labels
+        x_var = variables.VariableV1([[x]], name='x')
+        return model_fn_lib.EstimatorSpec(
+            mode,
+            predictions=math_ops.add(x_var, 1.),
+            loss=constant_op.constant(1.),
+            train_op=state_ops.assign_add(training.get_global_step(), 1),
+            export_outputs={'test': export_output.ClassificationOutput(
+                constant_op.constant([4.2]),
+                constant_op.constant(['label']))})
+      return _variable_creating_and_export_model_fn
+
+    first_est = estimator.Estimator(model_fn=_make_model_fn(3.))
+    first_est.train(dummy_input_fn, steps=10)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    tmpdir = tempfile.mkdtemp()
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    exported_path = first_est.export_savedmodel(export_dir_base,
+                                                serving_input_receiver_fn)
+
+    # Test that we can pass either warm_start_from as an external checkpoint
+    # or an exported SavedModel.
+    est = estimator.Estimator(model_fn=_make_model_fn(30.),
+                              warm_start_from=exported_path)
+    # Prediction here is set to 1 + the value of the Variable that is
+    # warm-started from the SavedModel of the first model (3.), as opposed to
+    # the initialization in the new model_fn (30.).
+    self.assertEqual(4., next(est.predict(dummy_input_fn)))
+
+    est = estimator.Estimator(model_fn=_make_model_fn(40.),
+                              warm_start_from=first_est.model_dir)
+    # Prediction here is set to 1 + the value of the Variable that is
+    # warm-started from a checkpoint of the first model (3.), as opposed to
+    # the initialization in the new model_fn (40.).
+    self.assertEqual(4., next(est.predict(dummy_input_fn)))
+
+  def test_no_trained_model_invalid_checkpoint_path(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    with self.assertRaises(ValueError):
+      next(
+          est.predict(
+              dummy_input_fn,
+              checkpoint_path=
+              checkpoint_management.latest_checkpoint('fakedir')))
+
+  def test_tensor_predictions(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    self.assertEqual(10., next(est.predict(dummy_input_fn)))
+
+  def test_predictionhooks_are_used(self):
+    hook = test.mock.MagicMock(
+        wraps=training.SessionRunHook(), spec=training.SessionRunHook)
+
+    def _model_fn_hooks(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]),
+          prediction_hooks=[hook])
+
+    est = estimator.Estimator(model_fn=_model_fn_hooks)
+    est.train(dummy_input_fn, steps=1)
+    self.assertFalse(hook.begin.called)
+    next(est.predict(dummy_input_fn))
+    self.assertTrue(hook.begin.called)
+
+  def test_warn_if_no_queue_runner(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      next(est.predict(dummy_input_fn))
+      self.assertRegexpMatches(
+          str(mock_log.call_args),
+          'Input graph does not.*contain a QueueRunner.')
+
+  def test_skip_warn_if_dataset_returns_features(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    def _input_fn():
+      it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator()
+      return it.get_next()
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      next(est.predict(_input_fn))
+      # The warning should not have keyword QueueRunner.
+      self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$')
+
+  def test_skip_warn_if_dataset_returns_features_dict(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    def _input_fn():
+      it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator()
+      features = {'age': it.get_next()}
+      return features
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      next(est.predict(_input_fn))
+      # The warning should not have keyword QueueRunner.
+      self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$')
+
+  def test_input_fn_can_return_just_features(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+
+    def _only_features():
+      return {'x': constant_op.constant([[0.]])}
+
+    self.assertEqual([10.], next(est.predict(_only_features)))
+
+  def test_batch_size_mismatch(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={
+              'y1': constant_op.constant([[10.]]),
+              'y2': constant_op.constant([[12.], [13]])
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError,
+                                 'Batch length of predictions should be same'):
+      next(est.predict(dummy_input_fn))
+
+  def test_iterate_batches(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={
+              # First dim is different but the prediction should still work
+              'y1': array_ops.zeros(shape=[3]),
+              'y2': array_ops.zeros(shape=[5, 3])
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+
+    predictions = next(est.predict(dummy_input_fn, yield_single_examples=False))
+    self.assertAllEqual(predictions['y1'].shape, [3])
+    self.assertAllEqual(predictions['y2'].shape, [5, 3])
+
+  def test_predict_keys_defined_for_tensor(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(
+        ValueError,
+        'predict_keys argument is not valid in case of non-dict predictions'):
+      next(est.predict(dummy_input_fn, predict_keys=['y']))
+
+  def test_predict_keys_does_not_exists(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={
+              'y1': constant_op.constant([[10.]]),
+              'y2': constant_op.constant([[12.]])
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError,
+                                 'Expected to run at least one output from'):
+      next(est.predict(dummy_input_fn, predict_keys=['y3']))
+
+  def test_return_given_predict_keys(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={
+              'y1': constant_op.constant([[10.]]),
+              'y2': constant_op.constant([[12.]])
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    results = next(est.predict(dummy_input_fn, predict_keys=['y1']))
+    self.assertIn('y1', results)
+    self.assertNotIn('y2', results)
+
+  def test_yield_rows_of_tensor(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.], [12.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    results = est.predict(dummy_input_fn)
+    self.assertEqual([10.], next(results))
+    self.assertEqual([12.], next(results))
+
+  def test_yield_rows_of_dict(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions={
+              'y1': constant_op.constant([[10.], [12]]),
+              'y2': constant_op.constant([[0.], [2.]])
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    results = est.predict(dummy_input_fn)
+    self.assertDictEqual({'y1': [10.], 'y2': [0.]}, next(results))
+    self.assertDictEqual({'y1': [12.], 'y2': [2.]}, next(results))
+
+  def test_hooks_should_be_session_run_hook(self):
+    est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
+    est.train(dummy_input_fn, steps=1)
+    with self.assertRaisesRegexp(TypeError, 'must be a SessionRunHook'):
+      next(est.predict(dummy_input_fn, hooks=['NotAHook']))
+
+  def test_hooks_are_used(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.], [12.]]))
+
+    step_counter_hook = _StepCounterHook()
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    results = est.predict(dummy_input_fn, hooks=[step_counter_hook])
+    self.assertEqual(0, step_counter_hook.steps)  # not called yet
+    next(results)
+    self.assertEqual(1, step_counter_hook.steps)  # first call
+    next(results)
+    self.assertEqual(1, step_counter_hook.steps)  # it's in same batch
+    next(results)
+    self.assertEqual(2, step_counter_hook.steps)  # next batch
+
+  def test_predict_from_old_model_dir(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      v = variables.VariableV1([[16.]], name='weight')
+      prediction = v * 2
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=prediction)
+
+    est1 = estimator.Estimator(model_fn=_model_fn)
+    est1.train(dummy_input_fn, steps=1)
+    est2 = estimator.Estimator(model_fn=_model_fn, model_dir=est1.model_dir)
+    self.assertEqual([32.], next(est2.predict(dummy_input_fn)))
+
+  def test_predict_from_checkpoint_path(self):
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      v = variables.VariableV1([[16.]], name='weight')
+      prediction = v * 2
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=prediction)
+
+    est1 = estimator.Estimator(model_fn=_model_fn)
+    est1.train(dummy_input_fn, steps=1)
+    est2 = estimator.Estimator(model_fn=_model_fn, model_dir=est1.model_dir)
+    self.assertEqual([32.],
+                     next(
+                         est2.predict(
+                             dummy_input_fn,
+                             checkpoint_path=est2.latest_checkpoint())))
+
+  def test_scaffold_is_used(self):
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='weight')
+      self.mock_saver = get_mock_saver()
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(saver=self.mock_saver))
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    next(est.predict(dummy_input_fn))
+    self.assertTrue(self.mock_saver.restore.called)
+
+  def test_features_labels_mode(self):
+    given_features = {'test-features': [[1], [1]]}
+    given_labels = {'test-labels': [[1], [1]]}
+
+    def _input_fn():
+      return given_features, given_labels
+
+    def _model_fn(features, labels, mode):
+      self.features, self.labels, self.mode = features, labels, mode
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(_input_fn, steps=1)
+    next(est.predict(_input_fn))
+    self.assertEqual(given_features, self.features)
+    self.assertIsNone(self.labels)
+    self.assertEqual(model_fn_lib.ModeKeys.PREDICT, self.mode)
+
+  def test_graph_initialization_global_step_and_random_seed(self):
+    expected_random_seed = run_config.RunConfig().tf_random_seed
+    def _model_fn(features, labels, mode):
+      _, _, _ = features, labels, mode
+      self.assertIsNotNone(training.get_global_step())
+      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    next(est.predict(dummy_input_fn))
+
+
+def _model_fn_for_export_tests(features, labels, mode):
+  _, _ = features, labels
+  variables.VariableV1(1., name='weight')
+  scores = constant_op.constant([3.])
+  classes = constant_op.constant(['wumpus'])
+  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
+  with ops.control_dependencies([update_global_step]):
+    train_op = constant_op.constant(2.)
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      predictions=constant_op.constant(10.),
+      loss=constant_op.constant(1.),
+      train_op=train_op,
+      export_outputs={
+          'test': export_output.ClassificationOutput(scores, classes)})
+
+
+def _x_y_input_fn():
+  return ({'x': constant_op.constant([[1], [1]]),
+           'y': constant_op.constant([[2], [2]])},
+          constant_op.constant([[1], [1]]))
+
+
+def _model_fn_with_x_y(features, labels, mode):
+  _ = labels
+  variables.VariableV1(1., name='weight')
+  scores = constant_op.constant([3.])
+  classes = constant_op.constant(['wumpus'])
+  if mode == model_fn_lib.ModeKeys.PREDICT:
+    variables.VariableV1(36., name='name_collision')
+    return model_fn_lib.EstimatorSpec(
+        mode,
+        predictions=constant_op.constant(10.),
+        export_outputs={
+            'test': export_output.ClassificationOutput(scores, classes)})
+  else:
+    prefix = 'eval_' if mode == model_fn_lib.ModeKeys.EVAL else ''
+
+    multiplied = math_ops.multiply(
+        features['x'], features['y'], name='{}multiplied'.format(prefix))
+    mean = metrics_module.Mean(name='{}mean'.format(prefix))
+    mean.update_state(features['x'] - features['y'])
+    eval_metrics = {
+        'mean1':
+            mean,
+        'mean2':
+            metrics_lib.mean(
+                features['x'] - features['y'], name='{}mean'.format(prefix))
+    }
+    variables.VariableV1(1., name='later_var')
+    variables.VariableV1(3., name='name_collision')
+    return model_fn_lib.EstimatorSpec(
+        mode,
+        predictions=multiplied,
+        loss=constant_op.constant(1.),
+        train_op=state_ops.assign_add(training.get_global_step(), 1),
+        eval_metric_ops=eval_metrics)
+
+
+def _model_fn_with_saveables_for_export_tests(features, labels, mode):
+  _, _ = features, labels
+  table = saver_test_utils.CheckpointedOp(name='v2')
+  update_global_step = state_ops.assign_add(training.get_global_step(), 1)
+  with ops.control_dependencies([update_global_step]):
+    train_op = table.insert('k1', 30.0)
+  prediction = table.lookup('k1', 0.0)
+  return model_fn_lib.EstimatorSpec(
+      mode,
+      predictions=prediction,
+      loss=constant_op.constant(1.),
+      train_op=train_op,
+      export_outputs={
+          'test': export_output.PredictOutput({'prediction': prediction})})
+
+
+def _get_serving_input_receiver_fn():
+  feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                  'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+  return export.build_parsing_serving_input_receiver_fn(feature_spec)
+
+
+def _get_supervised_input_receiver_fn():
+  feature_spec = {
+      'x': array_ops.placeholder(
+          dtype=dtypes.int64, shape=(2, 1), name='feature_x'),
+      'y': array_ops.placeholder(
+          dtype=dtypes.int64, shape=(2, 1), name='feature_y')
+      }
+  label_spec = array_ops.placeholder(
+      dtype=dtypes.float32, shape=[1], name='truth')
+
+  return export.build_raw_supervised_input_receiver_fn(feature_spec, label_spec)
+
+
+_VOCAB_FILE_CONTENT = 'emerson\nlake\npalmer\n'
+_EXTRA_FILE_CONTENT = 'kermit\npiggy\nralph\n'
+
+
+class EstimatorExportTest(test.TestCase):
+
+  def test_export_savedmodel_proto_roundtrip_raw_receiver(self):
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self._validate_exported_files(export_dir)
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('input_example_tensor' in graph_ops)
+        self.assertTrue('ParseExample/ParseExample' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+  def test_export_saved_model_train(self):
+    self._test_export_saved_model_for_mode(
+        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.TRAIN)
+
+  def test_export_saved_model_eval(self):
+    self._test_export_saved_model_for_mode(
+        _get_supervised_input_receiver_fn(), model_fn_lib.ModeKeys.EVAL)
+
+  def test_export_saved_model_predict(self):
+    self._test_export_saved_model_for_mode(
+        _get_serving_input_receiver_fn(), model_fn_lib.ModeKeys.PREDICT)
+
+  def _test_export_saved_model_for_mode(self, input_receiver_fn, mode):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=_x_y_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est._export_saved_model_for_mode(
+        export_dir_base, input_receiver_fn, mode=mode)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self._validate_exported_files(export_dir)
+
+    # Restore, to validate that the export was well-formed.
+    tag_set = model_fn_lib.EXPORT_TAG_MAP[mode]
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, tag_set, export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertFalse('name_collision_1' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_receiver_map(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('input_example_tensor' in graph_ops)
+        self.assertTrue('ParseExample/ParseExample' in graph_ops)
+        self.assertFalse('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_train_only(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('multiplied' in graph_ops)
+        self.assertTrue('mean/update_op' in graph_ops)
+        self.assertFalse('eval_multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_eval_only(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.EVAL], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('eval_multiplied' in graph_ops)
+        self.assertTrue('eval_mean/value' in graph_ops)
+        self.assertFalse('multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_no_serving(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('multiplied' in graph_ops)
+        self.assertFalse('eval_multiplied' in graph_ops)
+        self.assertTrue('feature_x' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.EVAL], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('eval_multiplied' in graph_ops)
+        self.assertFalse('multiplied' in graph_ops)
+        # TODO(karmel): is this the desired behavior when names are shared?
+        self.assertTrue('feature_x_1' in graph_ops)
+        self.assertTrue('feature_y_1' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_three_defs(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    # Restore, to validate that the export was well-formed.
+    for tag_set in model_fn_lib.EXPORT_TAG_MAP.values():
+      with ops.Graph().as_default() as graph:
+        with session.Session(graph=graph) as sess:
+          loader.load(sess, tag_set, export_dir)
+          graph_ops = [x.name for x in graph.get_operations()]
+          self.assertTrue('global_step/Assign' in graph_ops)
+          self.assertTrue('global_step/Initializer/zeros' in graph_ops)
+          self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_proto_roundtrip_all_vars(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('later_var' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertFalse('later_var' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_all_saved_models_name_collision(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+    export_dir, tmpdir = self._test_export_all_saved_models(
+        input_receiver_fn_map)
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('name_collision' in graph_ops)
+        self.assertFalse('name_collision_1' in graph_ops)
+        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+        self.assertEqual(3, collection_vars[-1].eval())
+
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('name_collision' in graph_ops)
+        self.assertFalse('name_collision_1' in graph_ops)
+        collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+        # This is a non-obvious detail: when we load the estimator spec
+        # for predict, name_collision gets set to 36. However, we then restore
+        # from checkpoint, which should overwrite that var and make it the 3
+        # from training. In practice, this would not be a good way to write
+        # a model_fn, but leaving this check in for now to ensure consistency
+        # with what would happen given our current order of spec, then
+        # checkpoint.
+        self.assertEqual(3, collection_vars[-1].eval())
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def _test_export_all_saved_models(self, input_receiver_fn_map):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_with_x_y)
+    est.train(input_fn=_x_y_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est._export_all_saved_models(
+        export_dir_base, input_receiver_fn_map)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+
+    self._validate_exported_files(export_dir)
+
+    return export_dir, tmpdir
+
+  def _validate_exported_files(self, export_dir):
+    self.assertTrue(gfile.Exists(export_dir))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('saved_model.pb'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.index'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.data-00000-of-00001'))))
+
+  def test_export_all_saved_models_var_not_found(self):
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+
+    def _model_fn_with_predict_only_vars(features, labels, mode):
+      _, _ = features, labels
+      if mode == model_fn_lib.ModeKeys.PREDICT:
+        variables.VariableV1(1., name='only_in_predict')
+      else:
+        variables.VariableV1(1., name='otherwise')
+
+      prediction = constant_op.constant(1.)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=prediction,
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          export_outputs={
+              'test': export_output.PredictOutput({'prediction': prediction})
+          })
+
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_with_predict_only_vars)
+    est.train(input_fn=_x_y_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+
+    err_regex = r'Could not load all requested variables[\w\W]*infer'
+    with self.assertRaisesRegexp(ValueError, err_regex):
+      est._export_all_saved_models(export_dir_base, input_receiver_fn_map)
+
+  def test_export_all_saved_models_metric_operation(self):
+    """Ensures metrics ops.Operations can be expoerted (b/109740581)."""
+
+    def _model_fn(features, labels, mode):
+      del features, labels  # Unused
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      eval_metrics = {
+          'metrics1': (constant_op.constant([0]), control_flow_ops.no_op()),
+          'metrics2': metric_obj,
+      }
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=constant_op.constant(10.),
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          eval_metric_ops=eval_metrics)
+
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(input_fn=dummy_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('metric_operation_export'))
+
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn()}
+
+    export_dir = est._export_all_saved_models(
+        export_dir_base, input_receiver_fn_map)
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        meta_graph = loader.load(sess, [tag_constants.EVAL], export_dir)
+        sig_outputs = meta_graph.signature_def[
+            model_fn_lib.ModeKeys.EVAL].outputs
+        self.assertTrue(sig_outputs['metrics1/update_op'].name.startswith(
+            'metric_op_wrapper'))
+        self.assertTrue(sig_outputs['metrics2/update_op'].name.startswith(
+            'metric_op_wrapper'))
+
+  def test_export_savedmodel_with_saveables_proto_roundtrip(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_saveables_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn)
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self.assertTrue(gfile.Exists(export_dir))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('saved_model.pb'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.index'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.data-00000-of-00001'))))
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('input_example_tensor' in graph_ops)
+        self.assertTrue('ParseExample/ParseExample' in graph_ops)
+        # The original saver is used to restore variables
+        self.assertTrue('save/LookupTableImportV2' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_savedmodel_assets(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Create a fake asset.
+    vocab_file_name = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('my_vocab_file'))
+    vocab_file = gfile.GFile(vocab_file_name, mode='w')
+    vocab_file.write(_VOCAB_FILE_CONTENT)
+    vocab_file.close()
+
+    # hack in an op that uses the asset, in order to test asset export.
+    # this is not actually valid, of course.
+    def serving_input_receiver_with_asset_fn():
+      features, receiver_tensor, _ = serving_input_receiver_fn()
+      filename = ops.convert_to_tensor(vocab_file_name,
+                                       dtypes.string,
+                                       name='asset_filepath')
+      ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filename)
+      features['bogus_filename'] = filename
+
+      return export.ServingInputReceiver(features, receiver_tensor)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_with_asset_fn)
+
+    # Check that the asset files are in the right places.
+    expected_vocab_file_name = os.path.join(
+        compat.as_bytes(export_dir), compat.as_bytes('assets/my_vocab_file'))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir), compat.as_bytes('assets'))))
+    self.assertTrue(gfile.Exists(expected_vocab_file_name))
+    self.assertEqual(
+        compat.as_bytes(_VOCAB_FILE_CONTENT),
+        compat.as_bytes(gfile.GFile(expected_vocab_file_name).read()))
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        assets = [
+            x.eval()
+            for x in graph.get_collection(ops.GraphKeys.ASSET_FILEPATHS)
+        ]
+        self.assertItemsEqual([vocab_file_name], assets)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('input_example_tensor' in graph_ops)
+        self.assertTrue('ParseExample/ParseExample' in graph_ops)
+        self.assertTrue('asset_filepath' in graph_ops)
+        self.assertTrue('weight' in graph_ops)
+
+    # cleanup
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_savedmodel_extra_assets(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Create a fake asset.
+    extra_file_name = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('my_extra_file'))
+    extra_file = gfile.GFile(extra_file_name, mode='w')
+    extra_file.write(_EXTRA_FILE_CONTENT)
+    extra_file.close()
+
+    # Perform the export.
+    assets_extra = {'some/sub/directory/my_extra_file': extra_file_name}
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(export_dir_base,
+                                       serving_input_receiver_fn,
+                                       assets_extra=assets_extra)
+
+    # Check that the asset files are in the right places.
+    expected_extra_path = os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('assets.extra/some/sub/directory/my_extra_file'))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir), compat.as_bytes('assets.extra'))))
+    self.assertTrue(gfile.Exists(expected_extra_path))
+    self.assertEqual(
+        compat.as_bytes(_EXTRA_FILE_CONTENT),
+        compat.as_bytes(gfile.GFile(expected_extra_path).read()))
+
+    # cleanup
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_savedmodel_tensor_features(self):
+    """Test that models accepting a single raw Tensor can be exported.
+
+    See https://github.com/tensorflow/tensorflow/issues/11674
+
+    If the model_fn and receiver_fn accept raw tensors rather than dictionaries
+    as input, export_savedmodel should be okay with that, too.
+
+    """
+
+    tmpdir = tempfile.mkdtemp()
+
+    def _input_fn_tensor_features():
+      t = array_ops.constant([1, 2, 3], dtype=dtypes.float32, shape=[1, 3])
+      return (t, None)
+
+    def _model_fn_tensor_features(features, labels, mode):
+      _ = labels
+      prediction = math_ops.matmul(features, features, transpose_b=True)
+
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=prediction,
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          export_outputs={
+              'test': export_output.PredictOutput({'prediction': prediction})
+          })
+
+    def _serving_input_receiver_fn():
+      feat = array_ops.placeholder(dtype=dtypes.float32)
+      return export.TensorServingInputReceiver(
+          features=feat, receiver_tensors=feat)
+
+    est = estimator.Estimator(model_fn=_model_fn_tensor_features)
+    est.train(input_fn=_input_fn_tensor_features, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, _serving_input_receiver_fn)
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name.lower() for x in graph.get_operations()]
+        self.assertTrue('const' in graph_ops)
+        self.assertTrue('matmul' in graph_ops)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_scaffold_is_used_for_saver(self):
+    tmpdir = tempfile.mkdtemp()
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='weight')
+      self.mock_saver = get_mock_saver()
+      scores = constant_op.constant([3.])
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(saver=self.mock_saver),
+          export_outputs={'test': export_output.ClassificationOutput(scores)})
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    est.export_savedmodel(export_dir_base, serving_input_receiver_fn)
+
+    self.assertTrue(self.mock_saver.restore.called)
+    self.assertTrue(self.mock_saver.export_meta_graph.called)
+    self.assertTrue(self.mock_saver.save.called)
+
+  def test_scaffold_is_used_for_saver_multiple_modes(self):
+    tmpdir = tempfile.mkdtemp()
+    savers = {'predict_saver': None, 'train_saver': None}
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='weight')
+
+      scores = constant_op.constant([3.])
+      if mode == model_fn_lib.ModeKeys.PREDICT:
+        savers['predict_saver'] = get_mock_saver()
+        scaffold = training.Scaffold(saver=savers['predict_saver'])
+      elif mode == model_fn_lib.ModeKeys.TRAIN:
+        savers['train_saver'] = get_mock_saver()
+        scaffold = training.Scaffold(saver=savers['train_saver'])
+      else:
+        scaffold = training.Scaffold()
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=scaffold,
+          export_outputs={'test': export_output.ClassificationOutput(scores)})
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    est._export_all_saved_models(export_dir_base, input_receiver_fn_map)
+
+    self.assertTrue(savers['train_saver'].restore.called)
+    self.assertEqual(savers['train_saver'].export_meta_graph.call_count, 1)
+    self.assertEqual(savers['train_saver'].save.call_count, 1)
+
+    self.assertTrue(savers['predict_saver'].restore.called)
+    self.assertEqual(savers['predict_saver'].export_meta_graph.call_count, 1)
+    self.assertEqual(savers['predict_saver'].save.call_count, 0)
+
+  def test_scaffold_is_used_for_local_init(self):
+    tmpdir = tempfile.mkdtemp()
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      my_int = variables.VariableV1(1, name='my_int',
+                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      _ = training.get_or_create_steps_per_run_variable()
+      scores = constant_op.constant([3.])
+      with ops.control_dependencies([
+          variables.local_variables_initializer(),
+          lookup_ops.tables_initializer()
+      ]):
+        assign_op = state_ops.assign(my_int, 12345)
+
+      # local_initSop must be an Operation, not a Tensor.
+      custom_local_init_op = control_flow_ops.group(assign_op)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(local_init_op=custom_local_init_op),
+          export_outputs={'test': export_output.ClassificationOutput(scores)})
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est.export_savedmodel(export_dir_base,
+                                       serving_input_receiver_fn)
+
+    # Restore, to validate that the custom local_init_op runs.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        my_int = graph.get_tensor_by_name('my_int:0')
+        my_int_value = sess.run(my_int)
+        self.assertEqual(12345, my_int_value)
+
+  def test_scaffold_is_used_for_local_init_multiple_modes(self):
+    tmpdir = tempfile.mkdtemp()
+
+    def _model_fn_scaffold(features, labels, mode):
+      _, _ = features, labels
+      my_int = variables.VariableV1(1, name='my_int',
+                                    collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      scores = constant_op.constant([3.])
+      with ops.control_dependencies([
+          variables.local_variables_initializer(),
+          lookup_ops.tables_initializer()
+      ]):
+        assign_op = state_ops.assign(my_int, 12345)
+
+      custom_local_init_op = None
+      if mode == model_fn_lib.ModeKeys.PREDICT:
+        # local_initSop must be an Operation, not a Tensor.
+        custom_local_init_op = control_flow_ops.group(assign_op)
+
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=constant_op.constant([[1.]]),
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          scaffold=training.Scaffold(local_init_op=custom_local_init_op),
+          export_outputs={'test': export_output.ClassificationOutput(scores)})
+
+    est = estimator.Estimator(model_fn=_model_fn_scaffold)
+    est.train(dummy_input_fn, steps=1)
+    input_receiver_fn_map = {
+        model_fn_lib.ModeKeys.TRAIN: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.EVAL: _get_supervised_input_receiver_fn(),
+        model_fn_lib.ModeKeys.PREDICT: _get_serving_input_receiver_fn()
+    }
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir = est._export_all_saved_models(
+        export_dir_base, input_receiver_fn_map)
+
+    # Restore, to validate that the custom local_init_op runs.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.SERVING], export_dir)
+        my_int = graph.get_tensor_by_name('my_int:0')
+        my_int_value = sess.run(my_int)
+        self.assertEqual(12345, my_int_value)
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        loader.load(sess, [tag_constants.TRAINING], export_dir)
+        my_int = graph.get_tensor_by_name('my_int:0')
+        my_int_value = sess.run(my_int)
+        self.assertEqual(1, my_int_value)
+
+  def test_features_labels_mode(self):
+    given_features = {'test-features': constant_op.constant([[1], [1]])}
+
+    def serving_input_receiver_fn():
+      return export.ServingInputReceiver(
+          given_features, array_ops.placeholder(dtype=dtypes.string))
+
+    def _model_fn(features, labels, mode):
+      self.features, self.labels, self.mode = features, labels, mode
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]),
+          export_outputs={
+              'test': export_output.ClassificationOutput(
+                  constant_op.constant([[0.]]))
+          })
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn)
+    self.assertEqual(given_features, self.features)
+    self.assertIsNone(self.labels)
+    self.assertEqual(model_fn_lib.ModeKeys.PREDICT, self.mode)
+
+  def test_graph_initialization_global_step_and_random_seed(self):
+    expected_random_seed = run_config.RunConfig().tf_random_seed
+    def _model_fn(features, labels, mode):
+      _, _, _ = features, labels, mode
+      self.assertIsNotNone(training.get_global_step())
+      self.assertEqual(expected_random_seed, ops.get_default_graph().seed)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[0.]]),
+          export_outputs={
+              'test': export_output.ClassificationOutput(
+                  constant_op.constant([[0.]]))
+          })
+
+    def serving_input_receiver_fn():
+      return export.ServingInputReceiver(
+          {'test-features': constant_op.constant([[1], [1]])},
+          array_ops.placeholder(dtype=dtypes.string))
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(dummy_input_fn, steps=1)
+    est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn)
+
+  def test_export_savedmodel_respects_soft_placement(self):
+    def model_fn_with_a_gpu_op_but_no_kernel(features, labels, mode):
+      _, _ = features, labels
+      table = saver_test_utils.CheckpointedOp(name='v2')
+
+      update_global_step = state_ops.assign_add(training.get_global_step(), 1)
+      with ops.control_dependencies([update_global_step]):
+        train_op = table.insert('k1', 30.0)
+
+      #  In this test, there are no GPUs available.  The goal is to verify that
+      #  export_savedmodel executes nevertheless.
+      with ops.device('/gpu:0'):
+        string_op = string_ops.as_string(update_global_step)
+
+      with ops.control_dependencies([string_op]):
+        prediction = table.lookup('k1', 0.0)
+
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=prediction,
+          loss=constant_op.constant(1.),
+          train_op=train_op,
+          export_outputs={
+              'test': export_output.PredictOutput({
+                  'prediction': prediction
+              })
+          })
+
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(
+        model_fn=model_fn_with_a_gpu_op_but_no_kernel)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+
+    export_dir = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn)
+
+    # At this point, if export_savedmodel executed with
+    # allow_soft_placement=True, then the GPU-assigned operation was silently
+    # placed on the CPU.  Otherwise, an exception would have been raised
+    # related to the fact that the requested GPU device isn't available.
+
+    # Expectations below assume that export_savedmodel has completed normally.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self.assertTrue(gfile.Exists(export_dir))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('saved_model.pb'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.index'))))
+    self.assertTrue(gfile.Exists(os.path.join(
+        compat.as_bytes(export_dir),
+        compat.as_bytes('variables/variables.data-00000-of-00001'))))
+
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_savedmodel_proto_strip_default_attrs(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
+    est.train(input_fn=dummy_input_fn, steps=1)
+    feature_spec = {'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
+                    'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('export'))
+    export_dir_stripped = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn, strip_default_attrs=True)
+    export_dir_not_stripped = est.export_savedmodel(
+        export_dir_base, serving_input_receiver_fn, strip_default_attrs=False)
+
+    # Load the SavedModel from disk as-is to verify default attrs
+    # are stripped. Reimporting the SavedModel via the loader causes the
+    # default attrs to be populated in the NodeDefs.
+
+    # pylint: disable=protected-access
+    saved_model_stripped_pb = loader_impl._parse_saved_model(
+        export_dir_stripped)
+    saved_model_not_stripped_pb = loader_impl._parse_saved_model(
+        export_dir_not_stripped)
+    self.assertIsNotNone(saved_model_stripped_pb)
+    self.assertIsNotNone(saved_model_not_stripped_pb)
+    # pylint: enable=protected-access
+
+    meta_graph_def_stripped = [
+        x for x in saved_model_stripped_pb.meta_graphs
+        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
+    meta_graph_def_not_stripped = [
+        x for x in saved_model_not_stripped_pb.meta_graphs
+        if x.meta_info_def.tags == [tag_constants.SERVING]][0]
+
+    # "weight" node in graph is a "Variable" Op with 2 default valued attrs.
+    #   o "container"    : "".
+    #   o "shared_name"  : "".
+
+    # saved_model_stripped_pb was exported with strip_default_attrs set to True.
+    # "weight" node shouldn't have attributes "container" and "shared_name".
+    node_def = test_util.get_node_def_from_graph(
+        'weight', meta_graph_def_stripped.graph_def)
+    self.assertNotIn('container', node_def.attr)
+    self.assertNotIn('shared_name', node_def.attr)
+
+    # saved_model_not_stripped_pb was exported with strip_default_attrs
+    # disabled. "weight" node should have attributes "container" and
+    # "shared_name".
+    node_def = test_util.get_node_def_from_graph(
+        'weight', meta_graph_def_not_stripped.graph_def)
+    self.assertIn('container', node_def.attr)
+    self.assertIn('shared_name', node_def.attr)
+
+    # Clean up.
+    gfile.DeleteRecursively(tmpdir)
+
+  def test_export_savedmodel_no_export_outputs(self):
+    """Ensure that an EstimatorSpec without outputs defined can be exported."""
+
+    def _model_fn(features, labels, mode):
+      _, _ = features, labels
+      variables.VariableV1(1., name='weight')
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=constant_op.constant(10.),
+          loss=constant_op.constant(1.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(model_fn=_model_fn)
+    est.train(input_fn=dummy_input_fn, steps=1)
+
+    # Perform the export.
+    export_dir_base = os.path.join(
+        compat.as_bytes(tmpdir), compat.as_bytes('no_export_outputs'))
+    export_dir = est.export_savedmodel(
+        export_dir_base, _get_serving_input_receiver_fn())
+
+    # Check that all the files are in the right places.
+    self.assertTrue(gfile.Exists(export_dir_base))
+    self._validate_exported_files(export_dir)
+
+    # Restore, to validate that the export was well-formed.
+    with ops.Graph().as_default() as graph:
+      with session.Session(graph=graph) as sess:
+        meta_graph = loader.load(sess, [tag_constants.SERVING], export_dir)
+        graph_ops = [x.name for x in graph.get_operations()]
+        self.assertTrue('weight' in graph_ops)
+
+        sig_def = meta_graph.signature_def
+        self.assertEqual(len(sig_def), 1)
+        sig_outputs = sig_def[
+            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs
+        self.assertEqual(sig_outputs['output'].name, 'Const:0')
+
+
+class EstimatorHookOrderingTest(test.TestCase):
+
+  def testCustomHooksAreCalledBeforeNanTensorHook(self):
+
+    def nan_making_model_fn(mode, features, labels):
+      """A graph that generates NaN's for testing."""
+      del features, labels
+
+      global_step = variables.VariableV1(
+          0, dtype=dtypes.int64, name='global_step')
+      inc_global_step = state_ops.assign_add(global_step, 1)
+      nan_const = constant_op.constant(np.nan, dtype=dtypes.float32)
+      loss = control_flow_ops.cond(
+          inc_global_step > 1, lambda: nan_const, lambda: 1.0)
+
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          predictions=global_step.read_value(),
+          loss=loss,
+          train_op=inc_global_step)
+
+    def empty_input_fn():
+      return dict(), None
+
+    class AfterRunCountingHook(session_run_hook.SessionRunHook):
+      """Hooks that counts the number of times after_run() is called."""
+
+      def __init__(self):
+        self.after_run_count = 0
+
+      def after_run(self, run_context, run_values):
+        del run_context, run_values
+        self.after_run_count += 1
+
+    test_hook = AfterRunCountingHook()
+    est = estimator.Estimator(model_fn=nan_making_model_fn)
+    with self.assertRaises(basic_session_run_hooks.NanLossDuringTrainingError):
+      est.train(input_fn=empty_input_fn, steps=2, hooks=[test_hook])
+    self.assertEqual(2, test_hook.after_run_count)
+
+
+class EstimatorIntegrationTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_complete_flow_with_a_simple_linear_model(self):
+
+    def _model_fn(features, labels, mode):
+      predictions = layers.dense(
+          features['x'], 1, kernel_initializer=init_ops.zeros_initializer())
+      export_outputs = {
+          'predictions': export_output.RegressionOutput(predictions)
+      }
+
+      if mode == model_fn_lib.ModeKeys.PREDICT:
+        return model_fn_lib.EstimatorSpec(
+            mode, predictions=predictions, export_outputs=export_outputs)
+
+      loss = losses.mean_squared_error(labels, predictions)
+      train_op = training.GradientDescentOptimizer(learning_rate=0.5).minimize(
+          loss, training.get_global_step())
+      mean = metrics_module.Mean()
+      mean.update_state(loss)
+      eval_metric_ops = {
+          'absolute_error':
+              metrics_lib.mean_absolute_error(labels, predictions),
+          'mean':
+              mean,
+      }
+
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          predictions=predictions,
+          loss=loss,
+          train_op=train_op,
+          eval_metric_ops=eval_metric_ops,
+          export_outputs=export_outputs)
+
+    est = estimator.Estimator(model_fn=_model_fn)
+    data = np.linspace(0., 1., 100, dtype=np.float32).reshape(-1, 1)
+
+    # TRAIN
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=data, batch_size=50, num_epochs=None, shuffle=True)
+    est.train(train_input_fn, steps=200)
+
+    # EVALUATE
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=data, batch_size=50, num_epochs=1, shuffle=True)
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(200, scores['global_step'])
+    self.assertGreater(0.1, scores['absolute_error'])
+    self.assertAlmostEqual(4.4e-14, scores['mean'], places=2)
+
+    # PREDICT
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=None, batch_size=10, num_epochs=1, shuffle=False)
+    predictions = list(est.predict(predict_input_fn))
+    self.assertAllClose(data, predictions, atol=0.01)
+
+    # EXPORT
+    feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/export/__init__.py b/tensorflow/python/estimator/export/__init__.py
index 0ba905a148..e69de29bb2 100644
--- a/tensorflow/python/estimator/export/__init__.py
+++ b/tensorflow/python/estimator/export/__init__.py
@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""export python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import export
-
-# Include attrs that start with single underscore.
-export.__all__ = [s for s in dir(export) if not s.startswith('__')]
-
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.export import *
diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py
index fd1616adea..55aace5fa9 100644
--- a/tensorflow/python/estimator/export/export.py
+++ b/tensorflow/python/estimator/export/export.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,625 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""export python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Configuration and utilities for receiving inputs at serving time."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.export import export
+import collections
+import os
+
+import six
+
+from tensorflow.python.estimator import util
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.util import compat
+from tensorflow.python.util.tf_export import estimator_export
+
+_SINGLE_FEATURE_DEFAULT_NAME = 'feature'
+_SINGLE_RECEIVER_DEFAULT_NAME = 'input'
+_SINGLE_LABEL_DEFAULT_NAME = 'label'
+
+_SINGLE_TENSOR_DEFAULT_NAMES = {
+    'feature': _SINGLE_FEATURE_DEFAULT_NAME,
+    'label': _SINGLE_LABEL_DEFAULT_NAME,
+    'receiver_tensor': _SINGLE_RECEIVER_DEFAULT_NAME,
+    'receiver_tensors_alternative': _SINGLE_RECEIVER_DEFAULT_NAME
+}
+
+
+def _wrap_and_check_input_tensors(tensors, field_name):
+  """Ensure that tensors is a dict of str to Tensor mappings.
+
+  Args:
+    tensors: dict of str to Tensors, or a single Tensor.
+    field_name: name of the member field of `ServingInputReceiver`
+      whose value is being passed to `tensors`.
+
+  Returns:
+    dict of str to Tensors; this is the original dict if one was passed, or
+    the original tensor wrapped in a dictionary.
+
+  Raises:
+    ValueError: if tensors is None, or has non-string keys,
+      or non-Tensor values
+  """
+  if tensors is None:
+    raise ValueError('{}s must be defined.'.format(field_name))
+  if not isinstance(tensors, dict):
+    tensors = {_SINGLE_TENSOR_DEFAULT_NAMES[field_name]: tensors}
+  for name, tensor in tensors.items():
+    _check_tensor_key(name, error_label=field_name)
+    _check_tensor(tensor, name, error_label=field_name)
+  return tensors
+
+
+def _check_tensor(tensor, name, error_label='feature'):
+  """Check that passed `tensor` is a Tensor or SparseTensor."""
+  if not (isinstance(tensor, ops.Tensor) or
+          isinstance(tensor, sparse_tensor.SparseTensor)):
+    fmt_name = ' {}'.format(name) if name else ''
+    value_error = ValueError('{}{} must be a Tensor or SparseTensor.'.format(
+        error_label, fmt_name))
+    # NOTE(ericmc): This if-else block is a specific carve-out for
+    # LabeledTensor, which has a `.tensor` attribute and which is
+    # convertible to tf.Tensor via ops.convert_to_tensor.
+    # Allowing all types convertible to tf.Tensor is considered by soergel@
+    # to be too permissive.
+    # TODO(soergel): accept any type convertible to Tensor,
+    # as in cl/193238295 snapshot #6.
+    if hasattr(tensor, 'tensor'):
+      try:
+        ops.convert_to_tensor(tensor)
+      except TypeError:
+        raise value_error
+    else:
+      raise value_error
+
+
+def _check_tensor_key(name, error_label='feature'):
+  if not isinstance(name, six.string_types):
+    raise ValueError('{} keys must be strings: {}.'.format(error_label, name))
+
+
+@estimator_export('estimator.export.ServingInputReceiver')
+class ServingInputReceiver(
+    collections.namedtuple(
+        'ServingInputReceiver',
+        ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])):
+  """A return type for a serving_input_receiver_fn.
+
+  The expected return values are:
+    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the features to be passed to the model. Note:
+      if `features` passed is not a dict, it will be wrapped in a dict with a
+      single entry, using 'feature' as the key.  Consequently, the model must
+      accept a feature dict of the form {'feature': tensor}.  You may use
+      `TensorServingInputReceiver` if you want the tensor to be passed as is.
+    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
+      or `SparseTensor`, specifying input nodes where this receiver expects to
+      be fed by default.  Typically, this is a single placeholder expecting
+      serialized `tf.Example` protos.
+    receiver_tensors_alternatives: a dict of string to additional
+      groups of receiver tensors, each of which may be a `Tensor`,
+      `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`.
+      These named receiver tensor alternatives generate additional serving
+      signatures, which may be used to feed inputs at different points within
+      the input receiver subgraph.  A typical usage is to allow feeding raw
+      feature `Tensor`s *downstream* of the tf.parse_example() op.
+      Defaults to None.
+  """
+
+  def __new__(cls,
+              features,
+              receiver_tensors,
+              receiver_tensors_alternatives=None):
+    features = _wrap_and_check_input_tensors(features, 'feature')
+
+    receiver_tensors = _wrap_and_check_input_tensors(receiver_tensors,
+                                                     'receiver_tensor')
+
+    if receiver_tensors_alternatives is not None:
+      if not isinstance(receiver_tensors_alternatives, dict):
+        raise ValueError(
+            'receiver_tensors_alternatives must be a dict: {}.'.format(
+                receiver_tensors_alternatives))
+      for alternative_name, receiver_tensors_alt in (
+          six.iteritems(receiver_tensors_alternatives)):
+        # Updating dict during iteration is OK in this case.
+        receiver_tensors_alternatives[alternative_name] = (
+            _wrap_and_check_input_tensors(
+                receiver_tensors_alt, 'receiver_tensors_alternative'))
+
+    return super(ServingInputReceiver, cls).__new__(
+        cls,
+        features=features,
+        receiver_tensors=receiver_tensors,
+        receiver_tensors_alternatives=receiver_tensors_alternatives)
+
+
+@estimator_export('estimator.export.TensorServingInputReceiver')
+class TensorServingInputReceiver(
+    collections.namedtuple(
+        'TensorServingInputReceiver',
+        ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])):
+  """A return type for a serving_input_receiver_fn.
+
+  This is for use with models that expect a single `Tensor` or `SparseTensor`
+  as an input feature, as opposed to a dict of features.
+
+  The normal `ServingInputReceiver` always returns a feature dict, even if it
+  contains only one entry, and so can be used only with models that accept such
+  a dict.  For models that accept only a single raw feature, the
+  `serving_input_receiver_fn` provided to `Estimator.export_savedmodel()` should
+  return this `TensorServingInputReceiver` instead.  See:
+  https://github.com/tensorflow/tensorflow/issues/11674
+
+  Note that the receiver_tensors and receiver_tensor_alternatives arguments
+  will be automatically converted to the dict representation in either case,
+  because the SavedModel format requires each input `Tensor` to have a name
+  (provided by the dict key).
+
+  The expected return values are:
+    features: A single `Tensor` or `SparseTensor`, representing the feature
+      to be passed to the model.
+    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
+      or `SparseTensor`, specifying input nodes where this receiver expects to
+      be fed by default.  Typically, this is a single placeholder expecting
+      serialized `tf.Example` protos.
+    receiver_tensors_alternatives: a dict of string to additional
+      groups of receiver tensors, each of which may be a `Tensor`,
+      `SparseTensor`, or dict of string to `Tensor` or`SparseTensor`.
+      These named receiver tensor alternatives generate additional serving
+      signatures, which may be used to feed inputs at different points within
+      the input receiver subgraph.  A typical usage is to allow feeding raw
+      feature `Tensor`s *downstream* of the tf.parse_example() op.
+      Defaults to None.
+  """
+
+  def __new__(cls,
+              features,
+              receiver_tensors,
+              receiver_tensors_alternatives=None):
+    if features is None:
+      raise ValueError('features must be defined.')
+    _check_tensor(features, None)
+
+    receiver = ServingInputReceiver(
+        features=features,
+        receiver_tensors=receiver_tensors,
+        receiver_tensors_alternatives=receiver_tensors_alternatives)
+
+    return super(TensorServingInputReceiver, cls).__new__(
+        cls,
+        features=receiver.features[_SINGLE_FEATURE_DEFAULT_NAME],
+        receiver_tensors=receiver.receiver_tensors,
+        receiver_tensors_alternatives=receiver.receiver_tensors_alternatives)
+
+
+class UnsupervisedInputReceiver(ServingInputReceiver):
+  """A return type for a training_input_receiver_fn or eval_input_receiver_fn.
+
+  This differs from SupervisedInputReceiver in that it does not require a set
+  of labels.
+
+  The expected return values are:
+    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the features to be passed to the model.
+    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
+      or `SparseTensor`, specifying input nodes where this receiver expects to
+      be fed by default.  Typically, this is a single placeholder expecting
+      serialized `tf.Example` protos.
+  """
+
+  def __new__(cls, features, receiver_tensors):
+    return super(UnsupervisedInputReceiver, cls).__new__(
+        cls,
+        features=features,
+        receiver_tensors=receiver_tensors,
+        receiver_tensors_alternatives=None)
+
+
+class SupervisedInputReceiver(
+    collections.namedtuple('SupervisedInputReceiver',
+                           ['features', 'labels', 'receiver_tensors'])):
+  """A return type for a training_input_receiver_fn or eval_input_receiver_fn.
+
+  This differs from a ServingInputReceiver in that (1) this receiver expects
+  a set of labels to be passed in with features, and (2) this receiver does
+  not support receiver_tensors_alternatives, which are primarily used for
+  serving.
+
+  The expected return values are:
+    features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the features to be passed to the model.
+    labels: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or
+      `SparseTensor`, specifying the labels to be passed to the model.
+    receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor`
+      or `SparseTensor`, specifying input nodes where this receiver expects to
+      be fed by default.  Typically, this is a single placeholder expecting
+      serialized `tf.Example` protos.
+
+  """
+
+  def __new__(cls, features, labels, receiver_tensors):
+    # Both features and labels can be dicts or raw tensors.
+    for input_vals, error_label in ((features, 'feature'), (labels, 'label')):
+      # _wrap_and_check_input_tensors is called here only to validate the
+      # tensors. The wrapped dict that is returned is deliberately discarded.
+      _wrap_and_check_input_tensors(input_vals, error_label)
+
+    receiver_tensors = _wrap_and_check_input_tensors(receiver_tensors,
+                                                     'receiver_tensor')
+
+    return super(SupervisedInputReceiver, cls).__new__(
+        cls,
+        features=features,
+        labels=labels,
+        receiver_tensors=receiver_tensors)
+
+
+@estimator_export('estimator.export.build_parsing_serving_input_receiver_fn')
+def build_parsing_serving_input_receiver_fn(feature_spec,
+                                            default_batch_size=None):
+  """Build a serving_input_receiver_fn expecting fed tf.Examples.
+
+  Creates a serving_input_receiver_fn that expects a serialized tf.Example fed
+  into a string placeholder.  The function parses the tf.Example according to
+  the provided feature_spec, and returns all parsed Tensors as features.
+
+  Args:
+    feature_spec: a dict of string to `VarLenFeature`/`FixedLenFeature`.
+    default_batch_size: the number of query examples expected per batch.
+        Leave unset for variable batch size (recommended).
+
+  Returns:
+    A serving_input_receiver_fn suitable for use in serving.
+  """
+
+  def serving_input_receiver_fn():
+    """An input_fn that expects a serialized tf.Example."""
+    serialized_tf_example = array_ops.placeholder(
+        dtype=dtypes.string,
+        shape=[default_batch_size],
+        name='input_example_tensor')
+    receiver_tensors = {'examples': serialized_tf_example}
+    features = parsing_ops.parse_example(serialized_tf_example, feature_spec)
+    return ServingInputReceiver(features, receiver_tensors)
+
+  return serving_input_receiver_fn
+
+
+def _placeholder_from_tensor(t, default_batch_size=None):
+  """Creates a placeholder that matches the dtype and shape of passed tensor.
+
+  Args:
+    t: Tensor or EagerTensor
+    default_batch_size: the number of query examples expected per batch.
+        Leave unset for variable batch size (recommended).
+
+  Returns:
+    Placeholder that matches the passed tensor.
+  """
+  batch_shape = tensor_shape.TensorShape([default_batch_size])
+  shape = batch_shape.concatenate(t.get_shape()[1:])
+
+  # Reuse the feature tensor's op name (t.op.name) for the placeholder,
+  # excluding the index from the tensor's name (t.name):
+  # t.name = "%s:%d" % (t.op.name, t._value_index)
+  try:
+    name = t.op.name
+  except AttributeError:
+    # In Eager mode, tensors don't have ops or names, and while they do have
+    # IDs, those are not maintained across runs. The name here is used
+    # primarily for debugging, and is not critical to the placeholder.
+    # So, in order to make this Eager-compatible, continue with an empty
+    # name if none is available.
+    name = None
+
+  return array_ops.placeholder(dtype=t.dtype, shape=shape, name=name)
+
+
+def _placeholders_from_receiver_tensors_dict(input_vals,
+                                             default_batch_size=None):
+  return {
+      name: _placeholder_from_tensor(t, default_batch_size)
+      for name, t in input_vals.items()
+  }
+
+
+@estimator_export('estimator.export.build_raw_serving_input_receiver_fn')
+def build_raw_serving_input_receiver_fn(features, default_batch_size=None):
+  """Build a serving_input_receiver_fn expecting feature Tensors.
+
+  Creates an serving_input_receiver_fn that expects all features to be fed
+  directly.
+
+  Args:
+    features: a dict of string to `Tensor`.
+    default_batch_size: the number of query examples expected per batch.
+        Leave unset for variable batch size (recommended).
+
+  Returns:
+    A serving_input_receiver_fn.
+  """
+
+  def serving_input_receiver_fn():
+    """A serving_input_receiver_fn that expects features to be fed directly."""
+    receiver_tensors = _placeholders_from_receiver_tensors_dict(
+        features, default_batch_size)
+    return ServingInputReceiver(receiver_tensors, receiver_tensors)
+
+  return serving_input_receiver_fn
+
+
+def build_raw_supervised_input_receiver_fn(features,
+                                           labels,
+                                           default_batch_size=None):
+  """Build a supervised_input_receiver_fn for raw features and labels.
+
+  This function wraps tensor placeholders in a supervised_receiver_fn
+  with the expectation that the features and labels appear precisely as
+  the model_fn expects them. Features and labels can therefore be dicts of
+  tensors, or raw tensors.
+
+  Args:
+    features: a dict of string to `Tensor` or `Tensor`.
+    labels: a dict of string to `Tensor` or `Tensor`.
+    default_batch_size: the number of query examples expected per batch.
+        Leave unset for variable batch size (recommended).
+
+  Returns:
+    A supervised_input_receiver_fn.
+
+  Raises:
+    ValueError: if features and labels have overlapping keys.
+  """
+  # Check for overlapping keys before beginning.
+  try:
+    feat_keys = features.keys()
+  except AttributeError:
+    feat_keys = [_SINGLE_RECEIVER_DEFAULT_NAME]
+  try:
+    label_keys = labels.keys()
+  except AttributeError:
+    label_keys = [_SINGLE_LABEL_DEFAULT_NAME]
+
+  overlap_keys = set(feat_keys) & set(label_keys)
+  if overlap_keys:
+    raise ValueError('Features and labels must have distinct keys. '
+                     'Found overlapping keys: {}'.format(overlap_keys))
+
+  def supervised_input_receiver_fn():
+    """A receiver_fn that expects pass-through features and labels."""
+    if not isinstance(features, dict):
+      features_cp = _placeholder_from_tensor(features, default_batch_size)
+      receiver_features = {_SINGLE_RECEIVER_DEFAULT_NAME: features_cp}
+    else:
+      receiver_features = _placeholders_from_receiver_tensors_dict(
+          features, default_batch_size)
+      features_cp = receiver_features
+
+    if not isinstance(labels, dict):
+      labels_cp = _placeholder_from_tensor(labels, default_batch_size)
+      receiver_labels = {_SINGLE_LABEL_DEFAULT_NAME: labels_cp}
+    else:
+      receiver_labels = _placeholders_from_receiver_tensors_dict(
+          labels, default_batch_size)
+      labels_cp = receiver_labels
+
+    receiver_tensors = dict(receiver_features)
+    receiver_tensors.update(receiver_labels)
+    return SupervisedInputReceiver(features_cp, labels_cp, receiver_tensors)
+
+  return supervised_input_receiver_fn
+
+
+def build_supervised_input_receiver_fn_from_input_fn(input_fn, **input_fn_args):
+  """Get a function that returns a SupervisedInputReceiver matching an input_fn.
+
+  Note that this function calls the input_fn in a local graph in order to
+  extract features and labels. Placeholders are then created from those
+  features and labels in the default graph.
+
+  Args:
+    input_fn: An Estimator input_fn, which is a function that returns one of:
+
+      * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+          tuple (features, labels) with same constraints as below.
+      * A tuple (features, labels): Where `features` is a `Tensor` or a
+        dictionary of string feature name to `Tensor` and `labels` is a
+        `Tensor` or a dictionary of string label name to `Tensor`. Both
+        `features` and `labels` are consumed by `model_fn`. They should
+        satisfy the expectation of `model_fn` from inputs.
+
+    **input_fn_args: set of kwargs to be passed to the input_fn. Note that
+      these will not be checked or validated here, and any errors raised by
+      the input_fn will be thrown to the top.
+
+  Returns:
+    A function taking no arguments that, when called, returns a
+    SupervisedInputReceiver. This function can be passed in as part of the
+    input_receiver_map when exporting SavedModels from Estimator with multiple
+    modes.
+  """
+  # Wrap the input_fn call in a graph to prevent sullying the default namespace
+  with ops.Graph().as_default():
+    result = input_fn(**input_fn_args)
+    features, labels, _ = util.parse_input_fn_result(result)
+  # Placeholders are created back in the default graph.
+  return build_raw_supervised_input_receiver_fn(features, labels)
+
+
+### Below utilities are specific to SavedModel exports.
+
+
+def build_all_signature_defs(receiver_tensors,
+                             export_outputs,
+                             receiver_tensors_alternatives=None,
+                             serving_only=True):
+  """Build `SignatureDef`s for all export outputs.
+
+  Args:
+    receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
+      input nodes where this receiver expects to be fed by default.  Typically,
+      this is a single placeholder expecting serialized `tf.Example` protos.
+    export_outputs: a dict of ExportOutput instances, each of which has
+      an as_signature_def instance method that will be called to retrieve
+      the signature_def for all export output tensors.
+    receiver_tensors_alternatives: a dict of string to additional
+      groups of receiver tensors, each of which may be a `Tensor` or a dict of
+      string to `Tensor`.  These named receiver tensor alternatives generate
+      additional serving signatures, which may be used to feed inputs at
+      different points within the input receiver subgraph.  A typical usage is
+      to allow feeding raw feature `Tensor`s *downstream* of the
+      tf.parse_example() op.  Defaults to None.
+    serving_only: boolean; if true, resulting signature defs will only include
+      valid serving signatures. If false, all requested signatures will be
+      returned.
+
+  Returns:
+    signature_def representing all passed args.
+
+  Raises:
+    ValueError: if export_outputs is not a dict
+  """
+  if not isinstance(receiver_tensors, dict):
+    receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
+  if export_outputs is None or not isinstance(export_outputs, dict):
+    raise ValueError('export_outputs must be a dict and not'
+                     '{}'.format(type(export_outputs)))
+
+  signature_def_map = {}
+  excluded_signatures = {}
+  for output_key, export_output in export_outputs.items():
+    signature_name = '{}'.format(output_key or 'None')
+    try:
+      signature = export_output.as_signature_def(receiver_tensors)
+      signature_def_map[signature_name] = signature
+    except ValueError as e:
+      excluded_signatures[signature_name] = str(e)
+
+  if receiver_tensors_alternatives:
+    for receiver_name, receiver_tensors_alt in (
+        six.iteritems(receiver_tensors_alternatives)):
+      if not isinstance(receiver_tensors_alt, dict):
+        receiver_tensors_alt = {
+            _SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt
+        }
+      for output_key, export_output in export_outputs.items():
+        signature_name = '{}:{}'.format(receiver_name or 'None', output_key or
+                                        'None')
+        try:
+          signature = export_output.as_signature_def(receiver_tensors_alt)
+          signature_def_map[signature_name] = signature
+        except ValueError as e:
+          excluded_signatures[signature_name] = str(e)
+
+  _log_signature_report(signature_def_map, excluded_signatures)
+
+  # The above calls to export_output.as_signature_def should return only
+  # valid signatures; if there is a validity problem, they raise a ValueError,
+  # in which case we exclude that signature from signature_def_map above.
+  # The is_valid_signature check ensures that the signatures produced are
+  # valid for serving, and acts as an additional sanity check for export
+  # signatures produced for serving. We skip this check for training and eval
+  # signatures, which are not intended for serving.
+  if serving_only:
+    signature_def_map = {
+        k: v
+        for k, v in signature_def_map.items()
+        if signature_def_utils.is_valid_signature(v)
+    }
+  return signature_def_map
+
+
+_FRIENDLY_METHOD_NAMES = {
+    signature_constants.CLASSIFY_METHOD_NAME: 'Classify',
+    signature_constants.REGRESS_METHOD_NAME: 'Regress',
+    signature_constants.PREDICT_METHOD_NAME: 'Predict',
+    signature_constants.SUPERVISED_TRAIN_METHOD_NAME: 'Train',
+    signature_constants.SUPERVISED_EVAL_METHOD_NAME: 'Eval',
+}
+
+
+def _log_signature_report(signature_def_map, excluded_signatures):
+  """Log a report of which signatures were produced."""
+  sig_names_by_method_name = collections.defaultdict(list)
+
+  # We'll collect whatever method_names are present, but also we want to make
+  # sure to output a line for each of the three standard methods even if they
+  # have no signatures.
+  for method_name in _FRIENDLY_METHOD_NAMES:
+    sig_names_by_method_name[method_name] = []
+
+  for signature_name, sig in signature_def_map.items():
+    sig_names_by_method_name[sig.method_name].append(signature_name)
+
+  # TODO(b/67733540): consider printing the full signatures, not just names
+  for method_name, sig_names in sig_names_by_method_name.items():
+    if method_name in _FRIENDLY_METHOD_NAMES:
+      method_name = _FRIENDLY_METHOD_NAMES[method_name]
+    logging.info('Signatures INCLUDED in export for {}: {}'.format(
+        method_name, sig_names if sig_names else 'None'))
+
+  if excluded_signatures:
+    logging.info('Signatures EXCLUDED from export because they cannot be '
+                 'be served via TensorFlow Serving APIs:')
+    for signature_name, message in excluded_signatures.items():
+      logging.info('\'{}\' : {}'.format(signature_name, message))
+
+  if not signature_def_map:
+    logging.warn('Export includes no signatures!')
+  elif (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in
+        signature_def_map):
+    logging.warn('Export includes no default signature!')
+
+
+def get_timestamped_export_dir(export_dir_base):
+  """Builds a path to a new subdirectory within the base directory.
+
+  Each export is written into a new subdirectory named using the
+  current time.  This guarantees monotonically increasing version
+  numbers even across multiple runs of the pipeline.
+  The timestamp used is the number of seconds since epoch UTC.
+
+  Args:
+    export_dir_base: A string containing a directory to write the exported
+        graph and checkpoints.
+  Returns:
+    The full path of the new subdirectory (which is not actually created yet).
+
+  Raises:
+    RuntimeError: if repeated attempts fail to obtain a unique timestamped
+      directory name.
+  """
+  return util.get_timestamped_dir(export_dir_base)
+
+
+def get_temp_export_dir(timestamped_export_dir):
+  """Builds a directory name based on the argument but starting with 'temp-'.
+
+  This relies on the fact that TensorFlow Serving ignores subdirectories of
+  the base directory that can't be parsed as integers.
 
-# Include attrs that start with single underscore.
-export.__all__ = [s for s in dir(export) if not s.startswith('__')]
+  Args:
+    timestamped_export_dir: the name of the eventual export directory, e.g.
+      /foo/bar/<timestamp>
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.export.export import *
+  Returns:
+    A sister directory prefixed with 'temp-', e.g. /foo/bar/temp-<timestamp>.
+  """
+  (dirname, basename) = os.path.split(timestamped_export_dir)
+  temp_export_dir = os.path.join(
+      compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename)))
+  return temp_export_dir
diff --git a/tensorflow/python/estimator/export/export_lib.py b/tensorflow/python/estimator/export/export_lib.py
index ce49f89a7d..f4ac8581ea 100644
--- a/tensorflow/python/estimator/export/export_lib.py
+++ b/tensorflow/python/estimator/export/export_lib.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""export_lib python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utility methods for exporting Estimator."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.export import export_lib
+# pylint: disable=unused-import,line-too-long
+from tensorflow.python.estimator.export.export import build_parsing_serving_input_receiver_fn
+from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
+from tensorflow.python.estimator.export.export import ServingInputReceiver
+from tensorflow.python.estimator.export.export import TensorServingInputReceiver
+from tensorflow.python.estimator.export.export_output import ClassificationOutput
+from tensorflow.python.estimator.export.export_output import ExportOutput
+from tensorflow.python.estimator.export.export_output import PredictOutput
+from tensorflow.python.estimator.export.export_output import RegressionOutput
 
-# Include attrs that start with single underscore.
-export_lib.__all__ = [s for s in dir(export_lib) if not s.startswith('__')]
+# pylint: enable=unused-import,line-too-long
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.export.export_lib import *
diff --git a/tensorflow/python/estimator/export/export_output.py b/tensorflow/python/estimator/export/export_output.py
index e61162e13f..c17fc08f21 100644
--- a/tensorflow/python/estimator/export/export_output.py
+++ b/tensorflow/python/estimator/export/export_output.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,402 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""export_output python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Classes for different types of export output."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.export import export_output
+import abc
+
+import six
+
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.util.tf_export import estimator_export
+
+
+@estimator_export('estimator.export.ExportOutput')
+class ExportOutput(object):
+  """Represents an output of a model that can be served.
+
+  These typically correspond to model heads.
+  """
+
+  __metaclass__ = abc.ABCMeta
+
+  _SEPARATOR_CHAR = '/'
+
+  @abc.abstractmethod
+  def as_signature_def(self, receiver_tensors):
+    """Generate a SignatureDef proto for inclusion in a MetaGraphDef.
+
+    The SignatureDef will specify outputs as described in this ExportOutput,
+    and will use the provided receiver_tensors as inputs.
+
+    Args:
+      receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
+        input nodes that will be fed.
+    """
+    pass
+
+  def _check_output_key(self, key, error_label):
+    # For multi-head models, the key can be a tuple.
+    if isinstance(key, tuple):
+      key = self._SEPARATOR_CHAR.join(key)
+
+    if not isinstance(key, six.string_types):
+      raise ValueError(
+          '{} output key must be a string; got {}.'.format(error_label, key))
+    return key
+
+  def _wrap_and_check_outputs(
+      self, outputs, single_output_default_name, error_label=None):
+    """Wraps raw tensors as dicts and checks type.
+
+    Note that we create a new dict here so that we can overwrite the keys
+    if necessary.
+
+    Args:
+      outputs: A `Tensor` or a dict of string to `Tensor`.
+      single_output_default_name: A string key for use in the output dict
+        if the provided `outputs` is a raw tensor.
+      error_label: descriptive string for use in error messages. If none,
+        single_output_default_name will be used.
+
+    Returns:
+      A dict of tensors
+
+    Raises:
+      ValueError: if the outputs dict keys are not strings or tuples of strings
+        or the values are not Tensors.
+    """
+    if not isinstance(outputs, dict):
+      outputs = {single_output_default_name: outputs}
+
+    output_dict = {}
+    for key, value in outputs.items():
+      error_name = error_label or single_output_default_name
+      key = self._check_output_key(key, error_name)
+      if not isinstance(value, ops.Tensor):
+        raise ValueError(
+            '{} output value must be a Tensor; got {}.'.format(
+                error_name, value))
+
+      output_dict[key] = value
+    return output_dict
+
+
+@estimator_export('estimator.export.ClassificationOutput')
+class ClassificationOutput(ExportOutput):
+  """Represents the output of a classification head.
+
+  Either classes or scores or both must be set.
+
+  The classes `Tensor` must provide string labels, not integer class IDs.
+
+  If only classes is set, it is interpreted as providing top-k results in
+  descending order.
+
+  If only scores is set, it is interpreted as providing a score for every class
+  in order of class ID.
+
+  If both classes and scores are set, they are interpreted as zipped, so each
+  score corresponds to the class at the same index.  Clients should not depend
+  on the order of the entries.
+  """
+
+  def __init__(self, scores=None, classes=None):
+    """Constructor for `ClassificationOutput`.
+
+    Args:
+      scores: A float `Tensor` giving scores (sometimes but not always
+          interpretable as probabilities) for each class.  May be `None`, but
+          only if `classes` is set.  Interpretation varies-- see class doc.
+      classes: A string `Tensor` giving predicted class labels.  May be `None`,
+          but only if `scores` is set.  Interpretation varies-- see class doc.
+
+    Raises:
+      ValueError: if neither classes nor scores is set, or one of them is not a
+          `Tensor` with the correct dtype.
+    """
+    if (scores is not None
+        and not (isinstance(scores, ops.Tensor)
+                 and scores.dtype.is_floating)):
+      raise ValueError('Classification scores must be a float32 Tensor; '
+                       'got {}'.format(scores))
+    if (classes is not None
+        and not (isinstance(classes, ops.Tensor)
+                 and dtypes.as_dtype(classes.dtype) == dtypes.string)):
+      raise ValueError('Classification classes must be a string Tensor; '
+                       'got {}'.format(classes))
+    if scores is None and classes is None:
+      raise ValueError('At least one of scores and classes must be set.')
+
+    self._scores = scores
+    self._classes = classes
+
+  @property
+  def scores(self):
+    return self._scores
+
+  @property
+  def classes(self):
+    return self._classes
+
+  def as_signature_def(self, receiver_tensors):
+    if len(receiver_tensors) != 1:
+      raise ValueError('Classification input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    (_, examples), = receiver_tensors.items()
+    if dtypes.as_dtype(examples.dtype) != dtypes.string:
+      raise ValueError('Classification input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    return signature_def_utils.classification_signature_def(
+        examples, self.classes, self.scores)
+
+
+@estimator_export('estimator.export.RegressionOutput')
+class RegressionOutput(ExportOutput):
+  """Represents the output of a regression head."""
+
+  def __init__(self, value):
+    """Constructor for `RegressionOutput`.
+
+    Args:
+      value: a float `Tensor` giving the predicted values.  Required.
+
+    Raises:
+      ValueError: if the value is not a `Tensor` with dtype tf.float32.
+    """
+    if not (isinstance(value, ops.Tensor) and value.dtype.is_floating):
+      raise ValueError('Regression output value must be a float32 Tensor; '
+                       'got {}'.format(value))
+    self._value = value
+
+  @property
+  def value(self):
+    return self._value
+
+  def as_signature_def(self, receiver_tensors):
+    if len(receiver_tensors) != 1:
+      raise ValueError('Regression input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    (_, examples), = receiver_tensors.items()
+    if dtypes.as_dtype(examples.dtype) != dtypes.string:
+      raise ValueError('Regression input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    return signature_def_utils.regression_signature_def(examples, self.value)
+
+
+@estimator_export('estimator.export.PredictOutput')
+class PredictOutput(ExportOutput):
+  """Represents the output of a generic prediction head.
+
+  A generic prediction need not be either a classification or a regression.
+
+  Named outputs must be provided as a dict from string to `Tensor`,
+  """
+  _SINGLE_OUTPUT_DEFAULT_NAME = 'output'
+
+  def __init__(self, outputs):
+    """Constructor for PredictOutput.
+
+    Args:
+      outputs: A `Tensor` or a dict of string to `Tensor` representing the
+        predictions.
+
+    Raises:
+      ValueError: if the outputs is not dict, or any of its keys are not
+          strings, or any of its values are not `Tensor`s.
+    """
+
+    self._outputs = self._wrap_and_check_outputs(
+        outputs, self._SINGLE_OUTPUT_DEFAULT_NAME, error_label='Prediction')
+
+  @property
+  def outputs(self):
+    return self._outputs
+
+  def as_signature_def(self, receiver_tensors):
+    return signature_def_utils.predict_signature_def(receiver_tensors,
+                                                     self.outputs)
+
+
+class _SupervisedOutput(ExportOutput):
+  """Represents the output of a supervised training or eval process."""
+  __metaclass__ = abc.ABCMeta
+
+  LOSS_NAME = 'loss'
+  PREDICTIONS_NAME = 'predictions'
+  METRICS_NAME = 'metrics'
+
+  METRIC_VALUE_SUFFIX = 'value'
+  METRIC_UPDATE_SUFFIX = 'update_op'
+
+  _loss = None
+  _predictions = None
+  _metrics = None
+
+  def __init__(self, loss=None, predictions=None, metrics=None):
+    """Constructor for SupervisedOutput (ie, Train or Eval output).
+
+    Args:
+      loss: dict of Tensors or single Tensor representing calculated loss.
+      predictions: dict of Tensors or single Tensor representing model
+        predictions.
+      metrics: Dict of metric results keyed by name.
+        The values of the dict can be one of the following:
+        (1) instance of `Metric` class.
+        (2) (metric_value, update_op) tuples, or a single tuple.
+        metric_value must be a Tensor, and update_op must be a Tensor or Op.
+
+    Raises:
+      ValueError: if any of the outputs' dict keys are not strings or tuples of
+        strings or the values are not Tensors (or Operations in the case of
+        update_op).
+    """
+
+    if loss is not None:
+      loss_dict = self._wrap_and_check_outputs(loss, self.LOSS_NAME)
+      self._loss = self._prefix_output_keys(loss_dict, self.LOSS_NAME)
+    if predictions is not None:
+      pred_dict = self._wrap_and_check_outputs(
+          predictions, self.PREDICTIONS_NAME)
+      self._predictions = self._prefix_output_keys(
+          pred_dict, self.PREDICTIONS_NAME)
+    if metrics is not None:
+      self._metrics = self._wrap_and_check_metrics(metrics)
+
+  def _prefix_output_keys(self, output_dict, output_name):
+    """Prepend output_name to the output_dict keys if it doesn't exist.
+
+    This produces predictable prefixes for the pre-determined outputs
+    of SupervisedOutput.
+
+    Args:
+      output_dict: dict of string to Tensor, assumed valid.
+      output_name: prefix string to prepend to existing keys.
+
+    Returns:
+      dict with updated keys and existing values.
+    """
+
+    new_outputs = {}
+    for key, val in output_dict.items():
+      key = self._prefix_key(key, output_name)
+      new_outputs[key] = val
+    return new_outputs
+
+  def _prefix_key(self, key, output_name):
+    if key.find(output_name) != 0:
+      key = output_name + self._SEPARATOR_CHAR + key
+    return key
+
+  def _wrap_and_check_metrics(self, metrics):
+    """Handle the saving of metrics.
+
+    Metrics is either a tuple of (value, update_op), or a dict of such tuples.
+    Here, we separate out the tuples and create a dict with names to tensors.
+
+    Args:
+      metrics: Dict of metric results keyed by name.
+        The values of the dict can be one of the following:
+        (1) instance of `Metric` class.
+        (2) (metric_value, update_op) tuples, or a single tuple.
+        metric_value must be a Tensor, and update_op must be a Tensor or Op.
+
+    Returns:
+      dict of output_names to tensors
+
+    Raises:
+      ValueError: if the dict key is not a string, or the metric values or ops
+        are not tensors.
+    """
+    if not isinstance(metrics, dict):
+      metrics = {self.METRICS_NAME: metrics}
+
+    outputs = {}
+    for key, value in metrics.items():
+      if isinstance(value, metrics_module.Metric):
+        metric_val = value.result()
+        assert len(value.updates) == 1  # We expect only one update op.
+        metric_op = value.updates[0]
+      else:
+        metric_val, metric_op = value
+      key = self._check_output_key(key, self.METRICS_NAME)
+      key = self._prefix_key(key, self.METRICS_NAME)
+
+      val_name = key + self._SEPARATOR_CHAR + self.METRIC_VALUE_SUFFIX
+      op_name = key + self._SEPARATOR_CHAR + self.METRIC_UPDATE_SUFFIX
+      if not isinstance(metric_val, ops.Tensor):
+        raise ValueError(
+            '{} output value must be a Tensor; got {}.'.format(
+                key, metric_val))
+      if (not isinstance(metric_op, ops.Tensor) and
+          not isinstance(metric_op, ops.Operation)):
+        raise ValueError(
+            '{} update_op must be a Tensor or Operation; got {}.'.format(
+                key, metric_op))
+
+      # We must wrap any ops in a Tensor before export, as the SignatureDef
+      # proto expects tensors only. See b/109740581
+      metric_op_tensor = metric_op
+      if isinstance(metric_op, ops.Operation):
+        with ops.control_dependencies([metric_op]):
+          metric_op_tensor = constant_op.constant([], name='metric_op_wrapper')
+
+      outputs[val_name] = metric_val
+      outputs[op_name] = metric_op_tensor
+
+    return outputs
+
+  @property
+  def loss(self):
+    return self._loss
+
+  @property
+  def predictions(self):
+    return self._predictions
+
+  @property
+  def metrics(self):
+    return self._metrics
+
+  @abc.abstractmethod
+  def _get_signature_def_fn(self):
+    """Returns a function that produces a SignatureDef given desired outputs."""
+    pass
+
+  def as_signature_def(self, receiver_tensors):
+    signature_def_fn = self._get_signature_def_fn()
+    return signature_def_fn(
+        receiver_tensors, self.loss, self.predictions, self.metrics)
+
+
+class TrainOutput(_SupervisedOutput):
+  """Represents the output of a supervised training process.
+
+  This class generates the appropriate signature def for exporting
+  training output by type-checking and wrapping loss, predictions, and metrics
+  values.
+  """
+
+  def _get_signature_def_fn(self):
+    return signature_def_utils.supervised_train_signature_def
+
+
+class EvalOutput(_SupervisedOutput):
+  """Represents the output of a supervised eval process.
 
-# Include attrs that start with single underscore.
-export_output.__all__ = [
-    s for s in dir(export_output) if not s.startswith('__')
-]
+  This class generates the appropriate signature def for exporting
+  eval output by type-checking and wrapping loss, predictions, and metrics
+  values.
+  """
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.export.export_output import *
+  def _get_signature_def_fn(self):
+    return signature_def_utils.supervised_eval_signature_def
diff --git a/tensorflow/python/estimator/export/export_output_test.py b/tensorflow/python/estimator/export/export_output_test.py
new file mode 100644
index 0000000000..96ce0e580d
--- /dev/null
+++ b/tensorflow/python/estimator/export/export_output_test.py
@@ -0,0 +1,397 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for export."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.framework import tensor_shape_pb2
+from tensorflow.core.framework import types_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.python.estimator.export import export_output as export_output_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+
+
+class ExportOutputTest(test.TestCase):
+
+  def test_regress_value_must_be_float(self):
+    value = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
+    with self.assertRaises(ValueError) as e:
+      export_output_lib.RegressionOutput(value)
+    self.assertEqual('Regression output value must be a float32 Tensor; got '
+                     'Tensor("output-tensor-1:0", shape=(1,), dtype=string)',
+                     str(e.exception))
+
+  def test_classify_classes_must_be_strings(self):
+    classes = array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1")
+    with self.assertRaises(ValueError) as e:
+      export_output_lib.ClassificationOutput(classes=classes)
+    self.assertEqual('Classification classes must be a string Tensor; got '
+                     'Tensor("output-tensor-1:0", shape=(1,), dtype=float32)',
+                     str(e.exception))
+
+  def test_classify_scores_must_be_float(self):
+    scores = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
+    with self.assertRaises(ValueError) as e:
+      export_output_lib.ClassificationOutput(scores=scores)
+    self.assertEqual('Classification scores must be a float32 Tensor; got '
+                     'Tensor("output-tensor-1:0", shape=(1,), dtype=string)',
+                     str(e.exception))
+
+  def test_classify_requires_classes_or_scores(self):
+    with self.assertRaises(ValueError) as e:
+      export_output_lib.ClassificationOutput()
+    self.assertEqual("At least one of scores and classes must be set.",
+                     str(e.exception))
+
+  def test_build_standardized_signature_def_regression(self):
+    input_tensors = {
+        "input-1":
+            array_ops.placeholder(
+                dtypes.string, 1, name="input-tensor-1")
+    }
+    value = array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1")
+
+    export_output = export_output_lib.RegressionOutput(value)
+    actual_signature_def = export_output.as_signature_def(input_tensors)
+
+    expected_signature_def = meta_graph_pb2.SignatureDef()
+    shape = tensor_shape_pb2.TensorShapeProto(
+        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
+    dtype_string = types_pb2.DataType.Value("DT_STRING")
+    expected_signature_def.inputs[
+        signature_constants.REGRESS_INPUTS].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+    expected_signature_def.outputs[
+        signature_constants.REGRESS_OUTPUTS].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="output-tensor-1:0",
+                                      dtype=dtype_float,
+                                      tensor_shape=shape))
+
+    expected_signature_def.method_name = signature_constants.REGRESS_METHOD_NAME
+    self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_classes_only(self):
+    """Tests classification with one output tensor."""
+    input_tensors = {
+        "input-1":
+            array_ops.placeholder(
+                dtypes.string, 1, name="input-tensor-1")
+    }
+    classes = array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
+
+    export_output = export_output_lib.ClassificationOutput(classes=classes)
+    actual_signature_def = export_output.as_signature_def(input_tensors)
+
+    expected_signature_def = meta_graph_pb2.SignatureDef()
+    shape = tensor_shape_pb2.TensorShapeProto(
+        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+    dtype_string = types_pb2.DataType.Value("DT_STRING")
+    expected_signature_def.inputs[
+        signature_constants.CLASSIFY_INPUTS].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+    expected_signature_def.outputs[
+        signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="output-tensor-1:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+
+    expected_signature_def.method_name = (
+        signature_constants.CLASSIFY_METHOD_NAME)
+    self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_both(self):
+    """Tests multiple output tensors that include classes and scores."""
+    input_tensors = {
+        "input-1":
+            array_ops.placeholder(
+                dtypes.string, 1, name="input-tensor-1")
+    }
+    classes = array_ops.placeholder(dtypes.string, 1,
+                                    name="output-tensor-classes")
+    scores = array_ops.placeholder(dtypes.float32, 1,
+                                   name="output-tensor-scores")
+
+    export_output = export_output_lib.ClassificationOutput(
+        scores=scores, classes=classes)
+    actual_signature_def = export_output.as_signature_def(input_tensors)
+
+    expected_signature_def = meta_graph_pb2.SignatureDef()
+    shape = tensor_shape_pb2.TensorShapeProto(
+        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
+    dtype_string = types_pb2.DataType.Value("DT_STRING")
+    expected_signature_def.inputs[
+        signature_constants.CLASSIFY_INPUTS].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+    expected_signature_def.outputs[
+        signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="output-tensor-classes:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+    expected_signature_def.outputs[
+        signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="output-tensor-scores:0",
+                                      dtype=dtype_float,
+                                      tensor_shape=shape))
+
+    expected_signature_def.method_name = (
+        signature_constants.CLASSIFY_METHOD_NAME)
+    self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_scores_only(self):
+    """Tests classification without classes tensor."""
+    input_tensors = {
+        "input-1":
+            array_ops.placeholder(
+                dtypes.string, 1, name="input-tensor-1")
+    }
+
+    scores = array_ops.placeholder(dtypes.float32, 1,
+                                   name="output-tensor-scores")
+
+    export_output = export_output_lib.ClassificationOutput(
+        scores=scores)
+    actual_signature_def = export_output.as_signature_def(input_tensors)
+
+    expected_signature_def = meta_graph_pb2.SignatureDef()
+    shape = tensor_shape_pb2.TensorShapeProto(
+        dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+    dtype_float = types_pb2.DataType.Value("DT_FLOAT")
+    dtype_string = types_pb2.DataType.Value("DT_STRING")
+    expected_signature_def.inputs[
+        signature_constants.CLASSIFY_INPUTS].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="input-tensor-1:0",
+                                      dtype=dtype_string,
+                                      tensor_shape=shape))
+    expected_signature_def.outputs[
+        signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
+            meta_graph_pb2.TensorInfo(name="output-tensor-scores:0",
+                                      dtype=dtype_float,
+                                      tensor_shape=shape))
+
+    expected_signature_def.method_name = (
+        signature_constants.CLASSIFY_METHOD_NAME)
+    self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_predict_outputs_valid(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    outputs = {
+        "output0": constant_op.constant([0]),
+        u"output1": constant_op.constant(["foo"]),
+    }
+    export_output_lib.PredictOutput(outputs)
+
+    # Single Tensor is OK too
+    export_output_lib.PredictOutput(constant_op.constant([0]))
+
+  def test_predict_outputs_invalid(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Prediction output key must be a string"):
+      export_output_lib.PredictOutput({1: constant_op.constant([0])})
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        "Prediction output value must be a Tensor"):
+      export_output_lib.PredictOutput({
+          "prediction1": sparse_tensor.SparseTensor(
+              indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+      })
+
+
+class MockSupervisedOutput(export_output_lib._SupervisedOutput):
+  """So that we can test the abstract class methods directly."""
+
+  def _get_signature_def_fn(self):
+    pass
+
+
+class SupervisedOutputTest(test.TestCase):
+
+  def test_supervised_outputs_valid(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    loss = {"my_loss": constant_op.constant([0])}
+    predictions = {u"output1": constant_op.constant(["foo"])}
+    metric_obj = metrics_module.Mean()
+    metric_obj.update_state(constant_op.constant([0]))
+    metrics = {
+        "metrics": metric_obj,
+        "metrics2": (constant_op.constant([0]), constant_op.constant([10]))
+    }
+
+    outputter = MockSupervisedOutput(loss, predictions, metrics)
+    self.assertEqual(outputter.loss["loss/my_loss"], loss["my_loss"])
+    self.assertEqual(
+        outputter.predictions["predictions/output1"], predictions["output1"])
+    self.assertEqual(outputter.metrics["metrics/update_op"].name,
+                     "metric_op_wrapper:0")
+    self.assertEqual(
+        outputter.metrics["metrics2/update_op"], metrics["metrics2"][1])
+
+    # Single Tensor is OK too
+    outputter = MockSupervisedOutput(
+        loss["my_loss"], predictions["output1"], metrics["metrics"])
+    self.assertEqual(outputter.loss, {"loss": loss["my_loss"]})
+    self.assertEqual(
+        outputter.predictions, {"predictions": predictions["output1"]})
+    self.assertEqual(outputter.metrics["metrics/update_op"].name,
+                     "metric_op_wrapper_1:0")
+
+  def test_supervised_outputs_none(self):
+    outputter = MockSupervisedOutput(
+        constant_op.constant([0]), None, None)
+    self.assertEqual(len(outputter.loss), 1)
+    self.assertEqual(outputter.predictions, None)
+    self.assertEqual(outputter.metrics, None)
+
+  def test_supervised_outputs_invalid(self):
+    with self.assertRaisesRegexp(ValueError, "predictions output value must"):
+      MockSupervisedOutput(constant_op.constant([0]), [3], None)
+    with self.assertRaisesRegexp(ValueError, "loss output value must"):
+      MockSupervisedOutput("str", None, None)
+    with self.assertRaisesRegexp(ValueError, "metrics output value must"):
+      MockSupervisedOutput(None, None, (15.3, 4))
+    with self.assertRaisesRegexp(ValueError, "loss output key must"):
+      MockSupervisedOutput({25: "Tensor"}, None, None)
+
+  def test_supervised_outputs_tuples(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    loss = {("my", "loss"): constant_op.constant([0])}
+    predictions = {(u"output1", "2"): constant_op.constant(["foo"])}
+    metric_obj = metrics_module.Mean()
+    metric_obj.update_state(constant_op.constant([0]))
+    metrics = {
+        ("metrics", "1"):
+            metric_obj,
+        ("metrics", "2"): (constant_op.constant([0]),
+                           constant_op.constant([10]))
+    }
+
+    outputter = MockSupervisedOutput(loss, predictions, metrics)
+    self.assertEqual(set(outputter.loss.keys()), set(["loss/my/loss"]))
+    self.assertEqual(set(outputter.predictions.keys()),
+                     set(["predictions/output1/2"]))
+    self.assertEqual(
+        set(outputter.metrics.keys()),
+        set([
+            "metrics/1/value", "metrics/1/update_op", "metrics/2/value",
+            "metrics/2/update_op"
+        ]))
+
+  def test_supervised_outputs_no_prepend(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    loss = {"loss": constant_op.constant([0])}
+    predictions = {u"predictions": constant_op.constant(["foo"])}
+    metric_obj = metrics_module.Mean()
+    metric_obj.update_state(constant_op.constant([0]))
+    metrics = {
+        "metrics_1": metric_obj,
+        "metrics_2": (constant_op.constant([0]), constant_op.constant([10]))
+    }
+
+    outputter = MockSupervisedOutput(loss, predictions, metrics)
+    self.assertEqual(set(outputter.loss.keys()), set(["loss"]))
+    self.assertEqual(set(outputter.predictions.keys()), set(["predictions"]))
+    self.assertEqual(
+        set(outputter.metrics.keys()),
+        set([
+            "metrics_1/value", "metrics_1/update_op", "metrics_2/update_op",
+            "metrics_2/value"
+        ]))
+
+  def test_train_signature_def(self):
+    loss = {"my_loss": constant_op.constant([0])}
+    predictions = {u"output1": constant_op.constant(["foo"])}
+    metric_obj = metrics_module.Mean()
+    metric_obj.update_state(constant_op.constant([0]))
+    metrics = {
+        "metrics_1": metric_obj,
+        "metrics_2": (constant_op.constant([0]), constant_op.constant([10]))
+    }
+
+    outputter = export_output_lib.TrainOutput(loss, predictions, metrics)
+
+    receiver = {u"features": constant_op.constant(100, shape=(100, 2)),
+                "labels": constant_op.constant(100, shape=(100, 1))}
+    sig_def = outputter.as_signature_def(receiver)
+
+    self.assertTrue("loss/my_loss" in sig_def.outputs)
+    self.assertTrue("metrics_1/value" in sig_def.outputs)
+    self.assertTrue("metrics_2/value" in sig_def.outputs)
+    self.assertTrue("predictions/output1" in sig_def.outputs)
+    self.assertTrue("features" in sig_def.inputs)
+
+  def test_eval_signature_def(self):
+    loss = {"my_loss": constant_op.constant([0])}
+    predictions = {u"output1": constant_op.constant(["foo"])}
+
+    outputter = export_output_lib.EvalOutput(loss, predictions, None)
+
+    receiver = {u"features": constant_op.constant(100, shape=(100, 2)),
+                "labels": constant_op.constant(100, shape=(100, 1))}
+    sig_def = outputter.as_signature_def(receiver)
+
+    self.assertTrue("loss/my_loss" in sig_def.outputs)
+    self.assertFalse("metrics/value" in sig_def.outputs)
+    self.assertTrue("predictions/output1" in sig_def.outputs)
+    self.assertTrue("features" in sig_def.inputs)
+
+  def test_metric_op_is_tensor(self):
+    """Tests that ops.Operation is wrapped by a tensor for metric_ops."""
+    loss = {"my_loss": constant_op.constant([0])}
+    predictions = {u"output1": constant_op.constant(["foo"])}
+    metric_obj = metrics_module.Mean()
+    metric_obj.update_state(constant_op.constant([0]))
+    metrics = {
+        "metrics_1": metric_obj,
+        "metrics_2": (constant_op.constant([0]), control_flow_ops.no_op())
+    }
+
+    outputter = MockSupervisedOutput(loss, predictions, metrics)
+
+    self.assertTrue(outputter.metrics["metrics_1/update_op"].name.startswith(
+        "metric_op_wrapper"))
+    self.assertTrue(
+        isinstance(outputter.metrics["metrics_1/update_op"], ops.Tensor))
+    self.assertTrue(
+        isinstance(outputter.metrics["metrics_1/value"], ops.Tensor))
+
+    self.assertEqual(outputter.metrics["metrics_2/value"],
+                     metrics["metrics_2"][0])
+    self.assertTrue(outputter.metrics["metrics_2/update_op"].name.startswith(
+        "metric_op_wrapper"))
+    self.assertTrue(
+        isinstance(outputter.metrics["metrics_2/update_op"], ops.Tensor))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py
new file mode 100644
index 0000000000..ed3219c49b
--- /dev/null
+++ b/tensorflow/python/estimator/export/export_test.py
@@ -0,0 +1,802 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for export."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+import time
+
+from google.protobuf import text_format
+
+from tensorflow.core.example import example_pb2
+from tensorflow.python.estimator.export import export
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import signature_def_utils
+
+
+class LabeledTensorMock(object):
+  """Mock class emulating LabeledTensor."""
+
+  def __init__(self):
+    self.tensor = constant_op.constant([1])
+
+
+def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs):
+  return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs)
+
+
+ops.register_tensor_conversion_function(LabeledTensorMock,
+                                        _convert_labeled_tensor_mock_to_tensor)
+
+
+class ServingInputReceiverTest(test_util.TensorFlowTestCase):
+
+  def test_serving_input_receiver_constructor(self):
+    """Tests that no errors are raised when input is expected."""
+    features = {
+        "feature0": constant_op.constant([0]),
+        u"feature1": constant_op.constant([1]),
+        "feature2": sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    export.ServingInputReceiver(features, receiver_tensors)
+
+  def test_serving_input_receiver_features_invalid(self):
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+
+    with self.assertRaisesRegexp(ValueError, "features must be defined"):
+      export.ServingInputReceiver(
+          features=None,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(ValueError, "feature keys must be strings"):
+      export.ServingInputReceiver(
+          features={1: constant_op.constant([1])},
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(
+        ValueError, "feature feature1 must be a Tensor or SparseTensor"):
+      export.ServingInputReceiver(
+          features={"feature1": [1]},
+          receiver_tensors=receiver_tensors)
+
+  def test_serving_input_receiver_receiver_tensors_invalid(self):
+    features = {
+        "feature0": constant_op.constant([0]),
+        u"feature1": constant_op.constant([1]),
+        "feature2": sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensors must be defined"):
+      export.ServingInputReceiver(
+          features=features,
+          receiver_tensors=None)
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor keys must be strings"):
+      export.ServingInputReceiver(
+          features=features,
+          receiver_tensors={
+              1: array_ops.placeholder(dtypes.string, name="example0")})
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor example1 must be a Tensor"):
+      export.ServingInputReceiver(
+          features=features,
+          receiver_tensors={"example1": [1]})
+
+  def test_single_feature_single_receiver(self):
+    feature = constant_op.constant(5)
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    input_receiver = export.ServingInputReceiver(
+        feature, receiver_tensor)
+    # single feature is automatically named
+    feature_key, = input_receiver.features.keys()
+    self.assertEqual("feature", feature_key)
+    # single receiver is automatically named
+    receiver_key, = input_receiver.receiver_tensors.keys()
+    self.assertEqual("input", receiver_key)
+
+  def test_multi_feature_single_receiver(self):
+    features = {"foo": constant_op.constant(5),
+                "bar": constant_op.constant(6)}
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    _ = export.ServingInputReceiver(features, receiver_tensor)
+
+  def test_multi_feature_multi_receiver(self):
+    features = {"foo": constant_op.constant(5),
+                "bar": constant_op.constant(6)}
+    receiver_tensors = {"baz": array_ops.placeholder(dtypes.int64),
+                        "qux": array_ops.placeholder(dtypes.float32)}
+    _ = export.ServingInputReceiver(features, receiver_tensors)
+
+  def test_feature_wrong_type(self):
+    feature = "not a tensor"
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    with self.assertRaises(ValueError):
+      _ = export.ServingInputReceiver(feature, receiver_tensor)
+
+  def test_feature_labeled_tensor(self):
+    feature = LabeledTensorMock()
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    _ = export.ServingInputReceiver(feature, receiver_tensor)
+
+  def test_receiver_wrong_type(self):
+    feature = constant_op.constant(5)
+    receiver_tensor = "not a tensor"
+    with self.assertRaises(ValueError):
+      _ = export.ServingInputReceiver(feature, receiver_tensor)
+
+
+class UnsupervisedInputReceiverTest(test_util.TensorFlowTestCase):
+
+  # Since this is basically a wrapper around ServingInputReceiver, we only
+  # have a simple sanity check to ensure that it works.
+
+  def test_unsupervised_input_receiver_constructor(self):
+    """Tests that no errors are raised when input is expected."""
+    features = {
+        "feature0":
+            constant_op.constant([0]),
+        u"feature1":
+            constant_op.constant([1]),
+        "feature2":
+            sparse_tensor.SparseTensor(
+                indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    export.UnsupervisedInputReceiver(features, receiver_tensors)
+
+
+class SupervisedInputReceiverTest(test_util.TensorFlowTestCase):
+
+  def test_input_receiver_constructor(self):
+    """Tests that no errors are raised when input is expected."""
+    features = {
+        "feature0": constant_op.constant([0]),
+        u"feature1": constant_op.constant([1]),
+        "feature2": sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+    labels = {
+        "classes": constant_op.constant([0] * 100),
+    }
+
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    export.SupervisedInputReceiver(features, labels, receiver_tensors)
+
+  def test_input_receiver_raw_values(self):
+    """Tests that no errors are raised when input is expected."""
+    features = {
+        "feature0": constant_op.constant([0]),
+        u"feature1": constant_op.constant([1]),
+        "feature2": sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+
+    labels = {
+        "classes": constant_op.constant([0] * 100),
+    }
+
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    rec = export.SupervisedInputReceiver(
+        features["feature2"], labels, receiver_tensors)
+    self.assertIsInstance(rec.features, sparse_tensor.SparseTensor)
+
+    rec = export.SupervisedInputReceiver(
+        features, labels["classes"], receiver_tensors)
+    self.assertIsInstance(rec.labels, ops.Tensor)
+
+  def test_input_receiver_features_invalid(self):
+    features = constant_op.constant([0] * 100)
+    labels = constant_op.constant([0])
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+
+    with self.assertRaisesRegexp(ValueError, "features must be defined"):
+      export.SupervisedInputReceiver(
+          features=None,
+          labels=labels,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(ValueError, "feature keys must be strings"):
+      export.SupervisedInputReceiver(
+          features={1: constant_op.constant([1])},
+          labels=labels,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(ValueError, "label keys must be strings"):
+      export.SupervisedInputReceiver(
+          features=features,
+          labels={1: constant_op.constant([1])},
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(
+        ValueError, "feature feature1 must be a Tensor or SparseTensor"):
+      export.SupervisedInputReceiver(
+          features={"feature1": [1]},
+          labels=labels,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(
+        ValueError, "feature must be a Tensor or SparseTensor"):
+      export.SupervisedInputReceiver(
+          features=[1],
+          labels=labels,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(
+        ValueError, "label must be a Tensor or SparseTensor"):
+      export.SupervisedInputReceiver(
+          features=features,
+          labels=100,
+          receiver_tensors=receiver_tensors)
+
+  def test_input_receiver_receiver_tensors_invalid(self):
+    features = {
+        "feature0": constant_op.constant([0]),
+        u"feature1": constant_op.constant([1]),
+        "feature2": sparse_tensor.SparseTensor(
+            indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+    }
+    labels = constant_op.constant([0])
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensors must be defined"):
+      export.SupervisedInputReceiver(
+          features=features,
+          labels=labels,
+          receiver_tensors=None)
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor keys must be strings"):
+      export.SupervisedInputReceiver(
+          features=features,
+          labels=labels,
+          receiver_tensors={
+              1: array_ops.placeholder(dtypes.string, name="example0")})
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor example1 must be a Tensor"):
+      export.SupervisedInputReceiver(
+          features=features,
+          labels=labels,
+          receiver_tensors={"example1": [1]})
+
+  def test_single_feature_single_receiver(self):
+    feature = constant_op.constant(5)
+    label = constant_op.constant(5)
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    input_receiver = export.SupervisedInputReceiver(
+        feature, label, receiver_tensor)
+
+    # single receiver is automatically named
+    receiver_key, = input_receiver.receiver_tensors.keys()
+    self.assertEqual("input", receiver_key)
+
+  def test_multi_feature_single_receiver(self):
+    features = {"foo": constant_op.constant(5),
+                "bar": constant_op.constant(6)}
+    labels = {"value": constant_op.constant(5)}
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    _ = export.SupervisedInputReceiver(features, labels, receiver_tensor)
+
+  def test_multi_feature_multi_receiver(self):
+    features = {"foo": constant_op.constant(5),
+                "bar": constant_op.constant(6)}
+    labels = {"value": constant_op.constant(5)}
+    receiver_tensors = {"baz": array_ops.placeholder(dtypes.int64),
+                        "qux": array_ops.placeholder(dtypes.float32)}
+    _ = export.SupervisedInputReceiver(features, labels, receiver_tensors)
+
+  def test_feature_labeled_tensor(self):
+    feature = LabeledTensorMock()
+    label = constant_op.constant(5)
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    _ = export.SupervisedInputReceiver(feature, label, receiver_tensor)
+
+
+class ExportTest(test_util.TensorFlowTestCase):
+
+  def test_build_parsing_serving_input_receiver_fn(self):
+    feature_spec = {"int_feature": parsing_ops.VarLenFeature(dtypes.int64),
+                    "float_feature": parsing_ops.VarLenFeature(dtypes.float32)}
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    with ops.Graph().as_default():
+      serving_input_receiver = serving_input_receiver_fn()
+      self.assertEqual(set(["int_feature", "float_feature"]),
+                       set(serving_input_receiver.features.keys()))
+      self.assertEqual(set(["examples"]),
+                       set(serving_input_receiver.receiver_tensors.keys()))
+
+      example = example_pb2.Example()
+      text_format.Parse("features: { "
+                        "  feature: { "
+                        "    key: 'int_feature' "
+                        "    value: { "
+                        "      int64_list: { "
+                        "        value: [ 21, 2, 5 ] "
+                        "      } "
+                        "    } "
+                        "  } "
+                        "  feature: { "
+                        "    key: 'float_feature' "
+                        "    value: { "
+                        "      float_list: { "
+                        "        value: [ 525.25 ] "
+                        "      } "
+                        "    } "
+                        "  } "
+                        "} ", example)
+
+      with self.cached_session() as sess:
+        sparse_result = sess.run(
+            serving_input_receiver.features,
+            feed_dict={
+                serving_input_receiver.receiver_tensors["examples"].name:
+                [example.SerializeToString()]})
+        self.assertAllEqual([[0, 0], [0, 1], [0, 2]],
+                            sparse_result["int_feature"].indices)
+        self.assertAllEqual([21, 2, 5],
+                            sparse_result["int_feature"].values)
+        self.assertAllEqual([[0, 0]],
+                            sparse_result["float_feature"].indices)
+        self.assertAllEqual([525.25],
+                            sparse_result["float_feature"].values)
+
+  def test_build_raw_serving_input_receiver_fn_name(self):
+    """Test case for issue #12755."""
+    f = {
+        "feature":
+            array_ops.placeholder(
+                name="feature", shape=[32], dtype=dtypes.float32)
+    }
+    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f)
+    v = serving_input_receiver_fn()
+    self.assertTrue(isinstance(v, export.ServingInputReceiver))
+
+  def test_build_raw_serving_input_receiver_fn_without_shape(self):
+    """Test case for issue #21178."""
+    f = {"feature_1": array_ops.placeholder(dtypes.float32),
+         "feature_2": array_ops.placeholder(dtypes.int32)}
+    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(f)
+    v = serving_input_receiver_fn()
+    self.assertTrue(isinstance(v, export.ServingInputReceiver))
+    self.assertEqual(
+        tensor_shape.unknown_shape(),
+        v.receiver_tensors["feature_1"].shape)
+    self.assertEqual(
+        tensor_shape.unknown_shape(),
+        v.receiver_tensors["feature_2"].shape)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_raw_serving_input_receiver_fn(self):
+    features = {"feature_1": constant_op.constant(["hello"]),
+                "feature_2": constant_op.constant([42])}
+    serving_input_receiver_fn = export.build_raw_serving_input_receiver_fn(
+        features)
+    with ops.Graph().as_default():
+      serving_input_receiver = serving_input_receiver_fn()
+      self.assertEqual(set(["feature_1", "feature_2"]),
+                       set(serving_input_receiver.features.keys()))
+      self.assertEqual(set(["feature_1", "feature_2"]),
+                       set(serving_input_receiver.receiver_tensors.keys()))
+      self.assertEqual(
+          dtypes.string,
+          serving_input_receiver.receiver_tensors["feature_1"].dtype)
+      self.assertEqual(
+          dtypes.int32,
+          serving_input_receiver.receiver_tensors["feature_2"].dtype)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_raw_supervised_input_receiver_fn(self):
+    features = {"feature_1": constant_op.constant(["hello"]),
+                "feature_2": constant_op.constant([42])}
+    labels = {"foo": constant_op.constant([5]),
+              "bar": constant_op.constant([6])}
+    input_receiver_fn = export.build_raw_supervised_input_receiver_fn(
+        features, labels)
+    with ops.Graph().as_default():
+      input_receiver = input_receiver_fn()
+      self.assertEqual(set(["feature_1", "feature_2"]),
+                       set(input_receiver.features.keys()))
+      self.assertEqual(set(["foo", "bar"]),
+                       set(input_receiver.labels.keys()))
+      self.assertEqual(set(["feature_1", "feature_2", "foo", "bar"]),
+                       set(input_receiver.receiver_tensors.keys()))
+      self.assertEqual(
+          dtypes.string, input_receiver.receiver_tensors["feature_1"].dtype)
+      self.assertEqual(
+          dtypes.int32, input_receiver.receiver_tensors["feature_2"].dtype)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_raw_supervised_input_receiver_fn_raw_tensors(self):
+    features = {"feature_1": constant_op.constant(["hello"]),
+                "feature_2": constant_op.constant([42])}
+    labels = {"foo": constant_op.constant([5]),
+              "bar": constant_op.constant([6])}
+    input_receiver_fn1 = export.build_raw_supervised_input_receiver_fn(
+        features["feature_1"], labels)
+    input_receiver_fn2 = export.build_raw_supervised_input_receiver_fn(
+        features["feature_1"], labels["foo"])
+    with ops.Graph().as_default():
+      input_receiver = input_receiver_fn1()
+      self.assertIsInstance(input_receiver.features, ops.Tensor)
+      self.assertEqual(set(["foo", "bar"]),
+                       set(input_receiver.labels.keys()))
+      self.assertEqual(set(["input", "foo", "bar"]),
+                       set(input_receiver.receiver_tensors.keys()))
+
+      input_receiver = input_receiver_fn2()
+      self.assertIsInstance(input_receiver.features, ops.Tensor)
+      self.assertIsInstance(input_receiver.labels, ops.Tensor)
+      self.assertEqual(set(["input", "label"]),
+                       set(input_receiver.receiver_tensors.keys()))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_raw_supervised_input_receiver_fn_batch_size(self):
+    features = {"feature_1": constant_op.constant(["hello"]),
+                "feature_2": constant_op.constant([42])}
+    labels = {"foo": constant_op.constant([5]),
+              "bar": constant_op.constant([6])}
+    input_receiver_fn = export.build_raw_supervised_input_receiver_fn(
+        features, labels, default_batch_size=10)
+    with ops.Graph().as_default():
+      input_receiver = input_receiver_fn()
+      self.assertEqual([10], input_receiver.receiver_tensors["feature_1"].shape)
+      self.assertEqual([10], input_receiver.features["feature_1"].shape)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_raw_supervised_input_receiver_fn_overlapping_keys(self):
+    features = {"feature_1": constant_op.constant(["hello"]),
+                "feature_2": constant_op.constant([42])}
+    labels = {"feature_1": constant_op.constant([5]),
+              "bar": constant_op.constant([6])}
+    with self.assertRaises(ValueError):
+      export.build_raw_supervised_input_receiver_fn(features, labels)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_supervised_input_receiver_fn_from_input_fn(self):
+    def dummy_input_fn():
+      return ({"x": constant_op.constant([[1], [1]]),
+               "y": constant_op.constant(["hello", "goodbye"])},
+              constant_op.constant([[1], [1]]))
+
+    input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn(
+        dummy_input_fn)
+
+    with ops.Graph().as_default():
+      input_receiver = input_receiver_fn()
+      self.assertEqual(set(["x", "y"]),
+                       set(input_receiver.features.keys()))
+      self.assertIsInstance(input_receiver.labels, ops.Tensor)
+      self.assertEqual(set(["x", "y", "label"]),
+                       set(input_receiver.receiver_tensors.keys()))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_build_supervised_input_receiver_fn_from_input_fn_args(self):
+    def dummy_input_fn(feature_key="x"):
+      return ({feature_key: constant_op.constant([[1], [1]]),
+               "y": constant_op.constant(["hello", "goodbye"])},
+              {"my_label": constant_op.constant([[1], [1]])})
+
+    input_receiver_fn = export.build_supervised_input_receiver_fn_from_input_fn(
+        dummy_input_fn, feature_key="z")
+
+    with ops.Graph().as_default():
+      input_receiver = input_receiver_fn()
+      self.assertEqual(set(["z", "y"]),
+                       set(input_receiver.features.keys()))
+      self.assertEqual(set(["my_label"]),
+                       set(input_receiver.labels.keys()))
+      self.assertEqual(set(["z", "y", "my_label"]),
+                       set(input_receiver.receiver_tensors.keys()))
+
+  def test_build_all_signature_defs_without_receiver_alternatives(self):
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    output_1 = constant_op.constant([1.])
+    output_2 = constant_op.constant(["2"])
+    output_3 = constant_op.constant(["3"])
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+            export_output.RegressionOutput(value=output_1),
+        "head-2": export_output.ClassificationOutput(classes=output_2),
+        "head-3": export_output.PredictOutput(outputs={
+            "some_output_3": output_3
+        }),
+    }
+
+    signature_defs = export.build_all_signature_defs(
+        receiver_tensor, export_outputs)
+
+    expected_signature_defs = {
+        "serving_default":
+            signature_def_utils.regression_signature_def(receiver_tensor,
+                                                         output_1),
+        "head-2":
+            signature_def_utils.classification_signature_def(receiver_tensor,
+                                                             output_2, None),
+        "head-3":
+            signature_def_utils.predict_signature_def({
+                "input": receiver_tensor
+            }, {"some_output_3": output_3})
+    }
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_with_dict_alternatives(self):
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    receiver_tensors_alternative_1 = {
+        "foo": array_ops.placeholder(dtypes.int64),
+        "bar": array_ops.sparse_placeholder(dtypes.float32)}
+    receiver_tensors_alternatives = {"other": receiver_tensors_alternative_1}
+    output_1 = constant_op.constant([1.])
+    output_2 = constant_op.constant(["2"])
+    output_3 = constant_op.constant(["3"])
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+            export_output.RegressionOutput(value=output_1),
+        "head-2": export_output.ClassificationOutput(classes=output_2),
+        "head-3": export_output.PredictOutput(outputs={
+            "some_output_3": output_3
+        }),
+    }
+
+    signature_defs = export.build_all_signature_defs(
+        receiver_tensor, export_outputs, receiver_tensors_alternatives)
+
+    expected_signature_defs = {
+        "serving_default":
+            signature_def_utils.regression_signature_def(
+                receiver_tensor,
+                output_1),
+        "head-2":
+            signature_def_utils.classification_signature_def(
+                receiver_tensor,
+                output_2, None),
+        "head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensor},
+                {"some_output_3": output_3}),
+        "other:head-3":
+            signature_def_utils.predict_signature_def(
+                receiver_tensors_alternative_1,
+                {"some_output_3": output_3})
+
+        # Note that the alternatives 'other:serving_default' and 'other:head-2'
+        # are invalid, because regession and classification signatures must take
+        # a single string input.  Here we verify that these invalid signatures
+        # are not included in the export.
+    }
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_with_single_alternatives(self):
+    receiver_tensor = array_ops.placeholder(dtypes.string)
+    receiver_tensors_alternative_1 = array_ops.placeholder(dtypes.int64)
+    receiver_tensors_alternative_2 = array_ops.sparse_placeholder(
+        dtypes.float32)
+    # Note we are passing single Tensors as values of
+    # receiver_tensors_alternatives, where normally that is a dict.
+    # In this case a dict will be created using the default receiver tensor
+    # name "input".
+    receiver_tensors_alternatives = {"other1": receiver_tensors_alternative_1,
+                                     "other2": receiver_tensors_alternative_2}
+    output_1 = constant_op.constant([1.])
+    output_2 = constant_op.constant(["2"])
+    output_3 = constant_op.constant(["3"])
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+            export_output.RegressionOutput(value=output_1),
+        "head-2": export_output.ClassificationOutput(classes=output_2),
+        "head-3": export_output.PredictOutput(outputs={
+            "some_output_3": output_3
+        }),
+    }
+
+    signature_defs = export.build_all_signature_defs(
+        receiver_tensor, export_outputs, receiver_tensors_alternatives)
+
+    expected_signature_defs = {
+        "serving_default":
+            signature_def_utils.regression_signature_def(
+                receiver_tensor,
+                output_1),
+        "head-2":
+            signature_def_utils.classification_signature_def(
+                receiver_tensor,
+                output_2, None),
+        "head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensor},
+                {"some_output_3": output_3}),
+        "other1:head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensors_alternative_1},
+                {"some_output_3": output_3}),
+        "other2:head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensors_alternative_2},
+                {"some_output_3": output_3})
+
+        # Note that the alternatives 'other:serving_default' and 'other:head-2'
+        # are invalid, because regession and classification signatures must take
+        # a single string input.  Here we verify that these invalid signatures
+        # are not included in the export.
+    }
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_export_outputs_required(self):
+    receiver_tensor = constant_op.constant(["11"])
+
+    with self.assertRaises(ValueError) as e:
+      export.build_all_signature_defs(receiver_tensor, None)
+
+    self.assertTrue(str(e.exception).startswith(
+        "export_outputs must be a dict"))
+
+  def test_get_timestamped_export_dir(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    export_dir_1 = export.get_timestamped_export_dir(
+        export_dir_base)
+    time.sleep(2)
+    export_dir_2 = export.get_timestamped_export_dir(
+        export_dir_base)
+    time.sleep(2)
+    export_dir_3 = export.get_timestamped_export_dir(
+        export_dir_base)
+
+    # Export directories should be named using a timestamp that is seconds
+    # since epoch.  Such a timestamp is 10 digits long.
+    time_1 = os.path.basename(export_dir_1)
+    self.assertEqual(10, len(time_1))
+    time_2 = os.path.basename(export_dir_2)
+    self.assertEqual(10, len(time_2))
+    time_3 = os.path.basename(export_dir_3)
+    self.assertEqual(10, len(time_3))
+
+    self.assertTrue(int(time_1) < int(time_2))
+    self.assertTrue(int(time_2) < int(time_3))
+
+  def test_build_all_signature_defs_serving_only(self):
+    receiver_tensor = {"input": array_ops.placeholder(dtypes.string)}
+    output_1 = constant_op.constant([1.])
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+            export_output.PredictOutput(outputs=output_1),
+        "train": export_output.TrainOutput(loss=output_1),
+    }
+
+    signature_defs = export.build_all_signature_defs(
+        receiver_tensor, export_outputs)
+
+    expected_signature_defs = {
+        "serving_default": signature_def_utils.predict_signature_def(
+            receiver_tensor, {"output": output_1})
+    }
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+    signature_defs = export.build_all_signature_defs(
+        receiver_tensor, export_outputs, serving_only=False)
+
+    expected_signature_defs.update({
+        "train": signature_def_utils.supervised_train_signature_def(
+            receiver_tensor, loss={"loss": output_1})
+    })
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+
+class TensorServingReceiverTest(test_util.TensorFlowTestCase):
+
+  def test_tensor_serving_input_receiver_constructor(self):
+    features = constant_op.constant([0])
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    r = export.TensorServingInputReceiver(features, receiver_tensors)
+    self.assertTrue(isinstance(r.features, ops.Tensor))
+    self.assertTrue(isinstance(r.receiver_tensors, dict))
+
+  def test_tensor_serving_input_receiver_sparse(self):
+    features = sparse_tensor.SparseTensor(
+        indices=[[0, 0]], values=[1], dense_shape=[1, 1])
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+    r = export.TensorServingInputReceiver(features, receiver_tensors)
+    self.assertTrue(isinstance(r.features, sparse_tensor.SparseTensor))
+    self.assertTrue(isinstance(r.receiver_tensors, dict))
+
+  def test_serving_input_receiver_features_invalid(self):
+    receiver_tensors = {
+        "example0": array_ops.placeholder(dtypes.string, name="example0"),
+        u"example1": array_ops.placeholder(dtypes.string, name="example1"),
+    }
+
+    with self.assertRaisesRegexp(ValueError, "features must be defined"):
+      export.TensorServingInputReceiver(
+          features=None,
+          receiver_tensors=receiver_tensors)
+
+    with self.assertRaisesRegexp(ValueError, "feature must be a Tensor"):
+      export.TensorServingInputReceiver(
+          features={"1": constant_op.constant([1])},
+          receiver_tensors=receiver_tensors)
+
+  def test_serving_input_receiver_receiver_tensors_invalid(self):
+    features = constant_op.constant([0])
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensors must be defined"):
+      export.TensorServingInputReceiver(
+          features=features,
+          receiver_tensors=None)
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor keys must be strings"):
+      export.TensorServingInputReceiver(
+          features=features,
+          receiver_tensors={
+              1: array_ops.placeholder(dtypes.string, name="example0")})
+
+    with self.assertRaisesRegexp(
+        ValueError, "receiver_tensor example1 must be a Tensor"):
+      export.TensorServingInputReceiver(
+          features=features,
+          receiver_tensors={"example1": [1]})
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py
index 7e14da56bc..b18212cfcd 100644
--- a/tensorflow/python/estimator/exporter.py
+++ b/tensorflow/python/estimator/exporter.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,495 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""exporter python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""`Exporter` class represents different flavors of model export."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import exporter
+import abc
+import os
+
+from tensorflow.python.estimator import gc
+from tensorflow.python.estimator import util
+from tensorflow.python.estimator.canned import metric_keys
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging
+from tensorflow.python.summary import summary_iterator
+from tensorflow.python.util.tf_export import estimator_export
+
+
+@estimator_export('estimator.Exporter')
+class Exporter(object):
+  """A class representing a type of model export."""
+
+  @abc.abstractproperty
+  def name(self):
+    """Directory name.
+
+    A directory name under the export base directory where exports of
+    this type are written.  Should not be `None` nor empty.
+    """
+    pass
+
+  @abc.abstractmethod
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    """Exports the given `Estimator` to a specific format.
+
+    Args:
+      estimator: the `Estimator` to export.
+      export_path: A string containing a directory where to write the export.
+      checkpoint_path: The checkpoint path to export.
+      eval_result: The output of `Estimator.evaluate` on this checkpoint.
+      is_the_final_export: This boolean is True when this is an export in the
+        end of training.  It is False for the intermediate exports during
+        the training.
+        When passing `Exporter` to `tf.estimator.train_and_evaluate`
+        `is_the_final_export` is always False if `TrainSpec.max_steps` is
+        `None`.
+
+    Returns:
+      The string path to the exported directory or `None` if export is skipped.
+    """
+    pass
+
+
+class _SavedModelExporter(Exporter):
+  """This class exports the serving graph and checkpoints.
+
+     This class provides a basic exporting functionality and serves as a
+     foundation for specialized `Exporter`s.
+  """
+
+  def __init__(self,
+               name,
+               serving_input_receiver_fn,
+               assets_extra=None,
+               as_text=False,
+               strip_default_attrs=True):
+    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
+
+    Args:
+      name: unique name of this `Exporter` that is going to be used in the
+        export path.
+      serving_input_receiver_fn: a function that takes no arguments and returns
+        a `ServingInputReceiver`.
+      assets_extra: An optional dict specifying how to populate the assets.extra
+        directory within the exported SavedModel.  Each key should give the
+        destination path (including the filename) relative to the assets.extra
+        directory.  The corresponding value gives the full path of the source
+        file to be copied.  For example, the simple case of copying a single
+        file without renaming it is specified as
+        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+      as_text: whether to write the SavedModel proto in text format. Defaults to
+        `False`.
+      strip_default_attrs: Boolean. If set, default attrs in the `GraphDef` will
+        be stripped on write. This is the default behavior and recommended for
+        better forward compatibility of the resulting `SavedModel`.
+
+    Raises:
+      ValueError: if any arguments is invalid.
+    """
+    self._name = name
+    self._serving_input_receiver_fn = serving_input_receiver_fn
+    self._assets_extra = assets_extra
+    self._as_text = as_text
+    self._strip_default_attrs = strip_default_attrs
+
+  @property
+  def name(self):
+    return self._name
+
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    del is_the_final_export
+
+    export_result = estimator.export_savedmodel(
+        export_path,
+        self._serving_input_receiver_fn,
+        assets_extra=self._assets_extra,
+        as_text=self._as_text,
+        checkpoint_path=checkpoint_path,
+        strip_default_attrs=self._strip_default_attrs)
+
+    return export_result
+
+
+def _loss_smaller(best_eval_result, current_eval_result):
+  """Compares two evaluation results and returns true if the 2nd one is smaller.
+
+  Both evaluation results should have the values for MetricKeys.LOSS, which are
+  used for comparison.
+
+  Args:
+    best_eval_result: best eval metrics.
+    current_eval_result: current eval metrics.
+
+  Returns:
+    True if the loss of current_eval_result is smaller; otherwise, False.
+
+  Raises:
+    ValueError: If input eval result is None or no loss is available.
+  """
+  default_key = metric_keys.MetricKeys.LOSS
+  if not best_eval_result or default_key not in best_eval_result:
+    raise ValueError(
+        'best_eval_result cannot be empty or no loss is found in it.')
+
+  if not current_eval_result or default_key not in current_eval_result:
+    raise ValueError(
+        'current_eval_result cannot be empty or no loss is found in it.')
+
+  return best_eval_result[default_key] > current_eval_result[default_key]
+
+
+def _verify_compare_fn_args(compare_fn):
+  """Verifies compare_fn arguments."""
+  args = set(util.fn_args(compare_fn))
+  if 'best_eval_result' not in args:
+    raise ValueError(
+        'compare_fn (%s) must include best_eval_result argument.' % compare_fn)
+  if 'current_eval_result' not in args:
+    raise ValueError(
+        'compare_fn (%s) must include current_eval_result argument.' %
+        compare_fn)
+  non_valid_args = list(args - set(['best_eval_result', 'current_eval_result']))
+  if non_valid_args:
+    raise ValueError('compare_fn (%s) has following not expected args: %s' %
+                     (compare_fn, non_valid_args))
+
+
+@estimator_export('estimator.BestExporter')
+class BestExporter(Exporter):
+  """This class exports the serving graph and checkpoints of the best models.
+
+  This class performs a model export everytime when the new model is better
+  than any exsiting model.
+  """
+
+  def __init__(self,
+               name='best_exporter',
+               serving_input_receiver_fn=None,
+               event_file_pattern='eval/*.tfevents.*',
+               compare_fn=_loss_smaller,
+               assets_extra=None,
+               as_text=False,
+               exports_to_keep=5):
+    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
+
+    Example of creating a BestExporter for training and evluation:
+    ```python
+    def make_train_and_eval_fn():
+      # Set up feature columns.
+      categorial_feature_a = (
+          tf.feature_column.categorical_column_with_hash_bucket(...))
+      categorial_feature_a_emb = embedding_column(
+          categorical_column=categorial_feature_a, ...)
+      ...  # other feature columns
+
+      estimator = tf.estimator.DNNClassifier(
+          config=tf.estimator.RunConfig(
+              model_dir='/my_model', save_summary_steps=100),
+          feature_columns=[categorial_feature_a_emb, ...],
+          hidden_units=[1024, 512, 256])
+
+      serving_feature_spec = tf.feature_column.make_parse_example_spec(
+          categorial_feature_a_emb)
+      serving_input_receiver_fn = (
+          tf.estimator.export.build_parsing_serving_input_receiver_fn(
+          serving_feature_spec))
+
+      exporter = tf.estimator.BestExporter(
+          name="best_exporter",
+          serving_input_receiver_fn=serving_input_receiver_fn,
+          exports_to_keep=5)
+
+      train_spec = tf.estimator.TrainSpec(...)
+
+      eval_spec = [tf.estimator.EvalSpec(
+        input_fn=eval_input_fn,
+        steps=100,
+        exporters=exporter,
+        start_delay_secs=0,
+        throttle_secs=5)]
+
+      return tf.estimator.DistributedTrainingSpec(estimator, train_spec,
+                                                  eval_spec)
+    ```
+
+    Args:
+      name: unique name of this `Exporter` that is going to be used in the
+        export path.
+      serving_input_receiver_fn: a function that takes no arguments and returns
+        a `ServingInputReceiver`.
+      event_file_pattern: event file name pattern relative to model_dir. If
+        None, however, the exporter would not be preemption-safe. To be
+        preemption-safe, event_file_pattern should be specified.
+      compare_fn: a function that compares two evaluation results and returns
+        true if current evaluation result is better. Follows the signature:
+        * Args:
+          * `best_eval_result`: This is the evaluation result of the best model.
+          * `current_eval_result`: This is the evaluation result of current
+                 candidate model.
+        * Returns:
+          True if current evaluation result is better; otherwise, False.
+      assets_extra: An optional dict specifying how to populate the assets.extra
+        directory within the exported SavedModel.  Each key should give the
+        destination path (including the filename) relative to the assets.extra
+        directory.  The corresponding value gives the full path of the source
+        file to be copied.  For example, the simple case of copying a single
+        file without renaming it is specified as `{'my_asset_file.txt':
+        '/path/to/my_asset_file.txt'}`.
+      as_text: whether to write the SavedModel proto in text format. Defaults to
+        `False`.
+      exports_to_keep: Number of exports to keep.  Older exports will be
+        garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
+        collection.
+
+    Raises:
+      ValueError: if any arguments is invalid.
+    """
+    self._compare_fn = compare_fn
+    if self._compare_fn is None:
+      raise ValueError('`compare_fn` must not be None.')
+    _verify_compare_fn_args(self._compare_fn)
+
+    self._saved_model_exporter = _SavedModelExporter(
+        name, serving_input_receiver_fn, assets_extra, as_text)
+
+    self._event_file_pattern = event_file_pattern
+    self._model_dir = None
+    self._best_eval_result = None
+
+    self._exports_to_keep = exports_to_keep
+    if exports_to_keep is not None and exports_to_keep <= 0:
+      raise ValueError(
+          '`exports_to_keep`, if provided, must be positive number')
+
+  @property
+  def name(self):
+    return self._saved_model_exporter.name
+
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    export_result = None
+
+    if self._model_dir != estimator.model_dir and self._event_file_pattern:
+      # Loads best metric from event files.
+      tf_logging.info('Loading best metric from event files.')
+
+      self._model_dir = estimator.model_dir
+      full_event_file_pattern = os.path.join(self._model_dir,
+                                             self._event_file_pattern)
+      self._best_eval_result = self._get_best_eval_result(
+          full_event_file_pattern)
+
+    if self._best_eval_result is None or self._compare_fn(
+        best_eval_result=self._best_eval_result,
+        current_eval_result=eval_result):
+      tf_logging.info('Performing best model export.')
+      self._best_eval_result = eval_result
+      export_result = self._saved_model_exporter.export(
+          estimator, export_path, checkpoint_path, eval_result,
+          is_the_final_export)
+      self._garbage_collect_exports(export_path)
+
+    return export_result
+
+  def _garbage_collect_exports(self, export_dir_base):
+    """Deletes older exports, retaining only a given number of the most recent.
+
+    Export subdirectories are assumed to be named with monotonically increasing
+    integers; the most recent are taken to be those with the largest values.
+
+    Args:
+      export_dir_base: the base directory under which each export is in a
+        versioned subdirectory.
+    """
+    if self._exports_to_keep is None:
+      return
+
+    def _export_version_parser(path):
+      # create a simple parser that pulls the export_version from the directory.
+      filename = os.path.basename(path.path)
+      if not (len(filename) == 10 and filename.isdigit()):
+        return None
+      return path._replace(export_version=int(filename))
+
+    # pylint: disable=protected-access
+    keep_filter = gc._largest_export_versions(self._exports_to_keep)
+    delete_filter = gc._negation(keep_filter)
+    for p in delete_filter(
+        gc._get_paths(export_dir_base, parser=_export_version_parser)):
+      try:
+        gfile.DeleteRecursively(p.path)
+      except errors_impl.NotFoundError as e:
+        tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
+    # pylint: enable=protected-access
+
+  def _get_best_eval_result(self, event_files):
+    """Get the best eval result from event files.
+
+    Args:
+      event_files: Absolute pattern of event files.
+
+    Returns:
+      The best eval result.
+    """
+    if not event_files:
+      return None
+
+    best_eval_result = None
+    for event_file in gfile.Glob(os.path.join(event_files)):
+      for event in summary_iterator.summary_iterator(event_file):
+        if event.HasField('summary'):
+          event_eval_result = {}
+          for value in event.summary.value:
+            if value.HasField('simple_value'):
+              event_eval_result[value.tag] = value.simple_value
+          if event_eval_result:
+            if best_eval_result is None or self._compare_fn(
+                best_eval_result, event_eval_result):
+              best_eval_result = event_eval_result
+    return best_eval_result
+
+
+@estimator_export('estimator.FinalExporter')
+class FinalExporter(Exporter):
+  """This class exports the serving graph and checkpoints in the end.
+
+  This class performs a single export in the end of training.
+  """
+
+  def __init__(self,
+               name,
+               serving_input_receiver_fn,
+               assets_extra=None,
+               as_text=False):
+    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
+
+    Args:
+      name: unique name of this `Exporter` that is going to be used in the
+        export path.
+      serving_input_receiver_fn: a function that takes no arguments and returns
+        a `ServingInputReceiver`.
+      assets_extra: An optional dict specifying how to populate the assets.extra
+        directory within the exported SavedModel.  Each key should give the
+        destination path (including the filename) relative to the assets.extra
+        directory.  The corresponding value gives the full path of the source
+        file to be copied.  For example, the simple case of copying a single
+        file without renaming it is specified as
+        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+      as_text: whether to write the SavedModel proto in text format. Defaults to
+        `False`.
+
+    Raises:
+      ValueError: if any arguments is invalid.
+    """
+    self._saved_model_exporter = _SavedModelExporter(
+        name, serving_input_receiver_fn, assets_extra, as_text)
+
+  @property
+  def name(self):
+    return self._saved_model_exporter.name
+
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    if not is_the_final_export:
+      return None
+
+    tf_logging.info('Performing the final export in the end of training.')
+
+    return self._saved_model_exporter.export(estimator, export_path,
+                                             checkpoint_path, eval_result,
+                                             is_the_final_export)
+
+
+@estimator_export('estimator.LatestExporter')
+class LatestExporter(Exporter):
+  """This class regularly exports the serving graph and checkpoints.
+
+  In addition to exporting, this class also garbage collects stale exports.
+  """
+
+  def __init__(self,
+               name,
+               serving_input_receiver_fn,
+               assets_extra=None,
+               as_text=False,
+               exports_to_keep=5):
+    """Create an `Exporter` to use with `tf.estimator.EvalSpec`.
+
+    Args:
+      name: unique name of this `Exporter` that is going to be used in the
+        export path.
+      serving_input_receiver_fn: a function that takes no arguments and returns
+        a `ServingInputReceiver`.
+      assets_extra: An optional dict specifying how to populate the assets.extra
+        directory within the exported SavedModel.  Each key should give the
+        destination path (including the filename) relative to the assets.extra
+        directory.  The corresponding value gives the full path of the source
+        file to be copied.  For example, the simple case of copying a single
+        file without renaming it is specified as
+        `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.
+      as_text: whether to write the SavedModel proto in text format. Defaults to
+        `False`.
+      exports_to_keep: Number of exports to keep.  Older exports will be
+        garbage-collected.  Defaults to 5.  Set to `None` to disable garbage
+        collection.
+
+    Raises:
+      ValueError: if any arguments is invalid.
+    """
+    self._saved_model_exporter = _SavedModelExporter(
+        name, serving_input_receiver_fn, assets_extra, as_text)
+    self._exports_to_keep = exports_to_keep
+    if exports_to_keep is not None and exports_to_keep <= 0:
+      raise ValueError(
+          '`exports_to_keep`, if provided, must be positive number')
+
+  @property
+  def name(self):
+    return self._saved_model_exporter.name
+
+  def export(self, estimator, export_path, checkpoint_path, eval_result,
+             is_the_final_export):
+    export_result = self._saved_model_exporter.export(
+        estimator, export_path, checkpoint_path, eval_result,
+        is_the_final_export)
+
+    self._garbage_collect_exports(export_path)
+    return export_result
+
+  def _garbage_collect_exports(self, export_dir_base):
+    """Deletes older exports, retaining only a given number of the most recent.
+
+    Export subdirectories are assumed to be named with monotonically increasing
+    integers; the most recent are taken to be those with the largest values.
+
+    Args:
+      export_dir_base: the base directory under which each export is in a
+        versioned subdirectory.
+    """
+    if self._exports_to_keep is None:
+      return
 
-# Include attrs that start with single underscore.
-exporter.__all__ = [s for s in dir(exporter) if not s.startswith('__')]
+    def _export_version_parser(path):
+      # create a simple parser that pulls the export_version from the directory.
+      filename = os.path.basename(path.path)
+      if not (len(filename) == 10 and filename.isdigit()):
+        return None
+      return path._replace(export_version=int(filename))
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.exporter import *
+    # pylint: disable=protected-access
+    keep_filter = gc._largest_export_versions(self._exports_to_keep)
+    delete_filter = gc._negation(keep_filter)
+    for p in delete_filter(
+        gc._get_paths(export_dir_base, parser=_export_version_parser)):
+      try:
+        gfile.DeleteRecursively(p.path)
+      except errors_impl.NotFoundError as e:
+        tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
+    # pylint: enable=protected-access
diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py
new file mode 100644
index 0000000000..fcccfbde7a
--- /dev/null
+++ b/tensorflow/python/estimator/exporter_test.py
@@ -0,0 +1,400 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `Exporter`s."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+import time
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import exporter as exporter_lib
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.util import compat
+
+
+class BestExporterTest(test.TestCase):
+
+  def test_error_out_if_exports_to_keep_is_zero(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    with self.assertRaisesRegexp(ValueError, "positive number"):
+      exporter = exporter_lib.BestExporter(
+          name="best_exporter",
+          serving_input_receiver_fn=_serving_input_receiver_fn,
+          exports_to_keep=0)
+      self.assertEqual("best_exporter", exporter.name)
+
+  def test_best_exporter(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    exporter = exporter_lib.BestExporter(
+        name="best_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        exports_to_keep=5)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+    estimator.model_dir = export_dir_base
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {}, False)
+
+    self.assertEqual("export_result_path", export_result)
+    estimator.export_savedmodel.assert_called_with(
+        export_dir_base,
+        _serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
+
+  def test_best_export_is_saved(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    exporter = exporter_lib.BestExporter(
+        name="best_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        exports_to_keep=1)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+    estimator.model_dir = export_dir_base
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 0.5}, False)
+
+    self.assertTrue(estimator.export_savedmodel.called)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 0.6}, False)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 0.4}, False)
+    self.assertEqual("export_result_path", export_result)
+
+  def test_best_exporter_with_preemption(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
+    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 50}, 1)
+    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2)
+
+    exporter = exporter_lib.BestExporter(
+        name="best_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        event_file_pattern="eval_continuous/*.tfevents.*",
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        exports_to_keep=1)
+
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.model_dir = export_dir_base
+    estimator.export_savedmodel.return_value = "export_result_path"
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 100}, False)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 10}, False)
+    self.assertEqual("export_result_path", export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 20}, False)
+    self.assertEqual(None, export_result)
+
+  def test_best_exporter_with_empty_event(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    eval_dir_base = os.path.join(export_dir_base, "eval_continuous")
+    estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1)
+    estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2)
+
+    exporter = exporter_lib.BestExporter(
+        name="best_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        event_file_pattern="eval_continuous/*.tfevents.*",
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        exports_to_keep=1)
+
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.model_dir = export_dir_base
+    estimator.export_savedmodel.return_value = "export_result_path"
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 100}, False)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {"loss": 10}, False)
+    self.assertEqual("export_result_path", export_result)
+
+  def test_garbage_collect_exports(self):
+    export_dir_base = tempfile.mkdtemp()
+    gfile.MkDir(export_dir_base)
+    gfile.MkDir(export_dir_base + "/export")
+    gfile.MkDir(export_dir_base + "/eval")
+
+    export_dir_1 = _create_test_export_dir(export_dir_base)
+    export_dir_2 = _create_test_export_dir(export_dir_base)
+    export_dir_3 = _create_test_export_dir(export_dir_base)
+    export_dir_4 = _create_test_export_dir(export_dir_base)
+
+    self.assertTrue(gfile.Exists(export_dir_1))
+    self.assertTrue(gfile.Exists(export_dir_2))
+    self.assertTrue(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+    def _serving_input_receiver_fn():
+      return array_ops.constant([1]), None
+
+    exporter = exporter_lib.BestExporter(
+        name="best_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        exports_to_keep=2)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.model_dir = export_dir_base
+    # Garbage collect all but the most recent 2 exports,
+    # where recency is determined based on the timestamp directory names.
+    exporter.export(estimator, export_dir_base, None, None, False)
+
+    self.assertFalse(gfile.Exists(export_dir_1))
+    self.assertFalse(gfile.Exists(export_dir_2))
+    self.assertTrue(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+
+class LatestExporterTest(test.TestCase):
+
+  def test_error_out_if_exports_to_keep_is_zero(self):
+    def _serving_input_receiver_fn():
+      pass
+
+    with self.assertRaisesRegexp(ValueError, "positive number"):
+      exporter = exporter_lib.LatestExporter(
+          name="latest_exporter",
+          serving_input_receiver_fn=_serving_input_receiver_fn,
+          exports_to_keep=0)
+      self.assertEqual("latest_exporter", exporter.name)
+
+  def test_latest_exporter(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    gfile.MkDir(export_dir_base)
+
+    exporter = exporter_lib.LatestExporter(
+        name="latest_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        exports_to_keep=5)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {}, False)
+
+    self.assertEqual("export_result_path", export_result)
+    estimator.export_savedmodel.assert_called_with(
+        export_dir_base,
+        _serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
+
+  def test_only_the_last_export_is_saved(self):
+
+    def _serving_input_receiver_fn():
+      pass
+
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    gfile.MkDir(export_dir_base)
+
+    exporter = exporter_lib.FinalExporter(
+        name="latest_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    estimator.export_savedmodel.return_value = "export_result_path"
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {}, False)
+
+    self.assertFalse(estimator.export_savedmodel.called)
+    self.assertEqual(None, export_result)
+
+    export_result = exporter.export(estimator, export_dir_base,
+                                    "checkpoint_path", {}, True)
+
+    self.assertEqual("export_result_path", export_result)
+    estimator.export_savedmodel.assert_called_with(
+        export_dir_base,
+        _serving_input_receiver_fn,
+        assets_extra={"from/path": "to/path"},
+        as_text=False,
+        checkpoint_path="checkpoint_path",
+        strip_default_attrs=True)
+
+  def test_garbage_collect_exports(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    gfile.MkDir(export_dir_base)
+    export_dir_1 = _create_test_export_dir(export_dir_base)
+    export_dir_2 = _create_test_export_dir(export_dir_base)
+    export_dir_3 = _create_test_export_dir(export_dir_base)
+    export_dir_4 = _create_test_export_dir(export_dir_base)
+
+    self.assertTrue(gfile.Exists(export_dir_1))
+    self.assertTrue(gfile.Exists(export_dir_2))
+    self.assertTrue(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+    def _serving_input_receiver_fn():
+      return array_ops.constant([1]), None
+
+    exporter = exporter_lib.LatestExporter(
+        name="latest_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        exports_to_keep=2)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    # Garbage collect all but the most recent 2 exports,
+    # where recency is determined based on the timestamp directory names.
+    exporter.export(estimator, export_dir_base, None, None, False)
+
+    self.assertFalse(gfile.Exists(export_dir_1))
+    self.assertFalse(gfile.Exists(export_dir_2))
+    self.assertTrue(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+  def test_garbage_collect_exports_with_trailing_delimiter(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    gfile.MkDir(export_dir_base)
+    export_dir_1 = _create_test_export_dir(export_dir_base)
+    export_dir_2 = _create_test_export_dir(export_dir_base)
+    export_dir_3 = _create_test_export_dir(export_dir_base)
+    export_dir_4 = _create_test_export_dir(export_dir_base)
+
+    self.assertTrue(gfile.Exists(export_dir_1))
+    self.assertTrue(gfile.Exists(export_dir_2))
+    self.assertTrue(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+    def _serving_input_receiver_fn():
+      return array_ops.constant([1]), None
+
+    exporter = exporter_lib.LatestExporter(
+        name="latest_exporter",
+        serving_input_receiver_fn=_serving_input_receiver_fn,
+        exports_to_keep=1)
+    estimator = test.mock.Mock(spec=estimator_lib.Estimator)
+    # Garbage collect all but the most recent 2 exports,
+    # where recency is determined based on the timestamp directory names.
+    with test.mock.patch.object(gfile, "ListDirectory") as mock_list_directory:
+      mock_list_directory.return_value = [
+          os.path.basename(export_dir_1) + b"/",
+          os.path.basename(export_dir_2) + b"/",
+          os.path.basename(export_dir_3) + b"/",
+          os.path.basename(export_dir_4) + b"/",
+          ]
+      exporter.export(estimator, export_dir_base, None, None, False)
+
+    self.assertFalse(gfile.Exists(export_dir_1))
+    self.assertFalse(gfile.Exists(export_dir_2))
+    self.assertFalse(gfile.Exists(export_dir_3))
+    self.assertTrue(gfile.Exists(export_dir_4))
+
+
+def _create_test_export_dir(export_dir_base):
+  export_dir = _get_timestamped_export_dir(export_dir_base)
+  gfile.MkDir(export_dir)
+  time.sleep(2)
+  return export_dir
+
+
+def _get_timestamped_export_dir(export_dir_base):
+  # When we create a timestamped directory, there is a small chance that the
+  # directory already exists because another worker is also writing exports.
+  # In this case we just wait one second to get a new timestamp and try again.
+  # If this fails several times in a row, then something is seriously wrong.
+  max_directory_creation_attempts = 10
+
+  attempts = 0
+  while attempts < max_directory_creation_attempts:
+    export_timestamp = int(time.time())
+
+    export_dir = os.path.join(
+        compat.as_bytes(export_dir_base), compat.as_bytes(
+            str(export_timestamp)))
+    if not gfile.Exists(export_dir):
+      # Collisions are still possible (though extremely unlikely): this
+      # directory is not actually created yet, but it will be almost
+      # instantly on return from this function.
+      return export_dir
+    time.sleep(1)
+    attempts += 1
+    logging.warn(
+        "Export directory {} already exists; retrying (attempt {}/{})".format(
+            export_dir, attempts, max_directory_creation_attempts))
+  raise RuntimeError("Failed to obtain a unique export directory name after "
+                     "{} attempts.".format(max_directory_creation_attempts))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/gc.py b/tensorflow/python/estimator/gc.py
index 10db0a1e2a..03ad33dd6b 100644
--- a/tensorflow/python/estimator/gc.py
+++ b/tensorflow/python/estimator/gc.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,200 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""gc python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
+r"""System for specifying garbage collection (GC) of path based data.
+
+This framework allows for GC of data specified by path names, for example files
+on disk.  gc.Path objects each represent a single item stored at a path and may
+be a base directory,
+  /tmp/exports/0/...
+  /tmp/exports/1/...
+  ...
+or a fully qualified file,
+  /tmp/train-1.ckpt
+  /tmp/train-2.ckpt
+  ...
+
+A gc filter function takes and returns a list of gc.Path items.  Filter
+functions are responsible for selecting Path items for preservation or deletion.
+Note that functions should always return a sorted list.
+
+For example,
+  base_dir = "/tmp"
+  # Create the directories.
+  for e in xrange(10):
+    os.mkdir("%s/%d" % (base_dir, e), 0o755)
+
+  # Create a simple parser that pulls the export_version from the directory.
+  path_regex = "^" + re.escape(base_dir) + "/(\\d+)$"
+  def parser(path):
+    match = re.match(path_regex, path.path)
+    if not match:
+      return None
+    return path._replace(export_version=int(match.group(1)))
+
+  path_list = gc._get_paths("/tmp", parser)  # contains all ten Paths
+
+  every_fifth = gc._mod_export_version(5)
+  print(every_fifth(path_list))  # shows ["/tmp/0", "/tmp/5"]
+
+  largest_three = gc.largest_export_versions(3)
+  print(largest_three(all_paths))  # shows ["/tmp/7", "/tmp/8", "/tmp/9"]
+
+  both = gc._union(every_fifth, largest_three)
+  print(both(all_paths))  # shows ["/tmp/0", "/tmp/5",
+                          #        "/tmp/7", "/tmp/8", "/tmp/9"]
+  # Delete everything not in 'both'.
+  to_delete = gc._negation(both)
+  for p in to_delete(all_paths):
+    gfile.DeleteRecursively(p.path)  # deletes:  "/tmp/1", "/tmp/2",
+                                     # "/tmp/3", "/tmp/4", "/tmp/6",
 """
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import gc
+import collections
+import heapq
+import math
+import os
+
+from tensorflow.python.platform import gfile
+from tensorflow.python.util import compat
+
+Path = collections.namedtuple('Path', 'path export_version')
+
+
+def _largest_export_versions(n):
+  """Creates a filter that keeps the largest n export versions.
+
+  Args:
+    n: number of versions to keep.
+
+  Returns:
+    A filter function that keeps the n largest paths.
+  """
+  def keep(paths):
+    heap = []
+    for idx, path in enumerate(paths):
+      if path.export_version is not None:
+        heapq.heappush(heap, (path.export_version, idx))
+    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
+    return sorted(keepers)
+
+  return keep
+
+
+def _one_of_every_n_export_versions(n):
+  """Creates a filter that keeps one of every n export versions.
+
+  Args:
+    n: interval size.
+
+  Returns:
+    A filter function that keeps exactly one path from each interval
+    [0, n], (n, 2n], (2n, 3n], etc...  If more than one path exists in an
+    interval the largest is kept.
+  """
+  def keep(paths):
+    """A filter function that keeps exactly one out of every n paths."""
+
+    keeper_map = {}  # map from interval to largest path seen in that interval
+    for p in paths:
+      if p.export_version is None:
+        # Skip missing export_versions.
+        continue
+      # Find the interval (with a special case to map export_version = 0 to
+      # interval 0.
+      interval = math.floor(
+          (p.export_version - 1) / n) if p.export_version else 0
+      existing = keeper_map.get(interval, None)
+      if (not existing) or (existing.export_version < p.export_version):
+        keeper_map[interval] = p
+    return sorted(keeper_map.values())
+
+  return keep
+
+
+def _mod_export_version(n):
+  """Creates a filter that keeps every export that is a multiple of n.
+
+  Args:
+    n: step size.
+
+  Returns:
+    A filter function that keeps paths where export_version % n == 0.
+  """
+  def keep(paths):
+    keepers = []
+    for p in paths:
+      if p.export_version % n == 0:
+        keepers.append(p)
+    return sorted(keepers)
+  return keep
+
+
+def _union(lf, rf):
+  """Creates a filter that keeps the union of two filters.
+
+  Args:
+    lf: first filter
+    rf: second filter
+
+  Returns:
+    A filter function that keeps the n largest paths.
+  """
+  def keep(paths):
+    l = set(lf(paths))
+    r = set(rf(paths))
+    return sorted(list(l|r))
+  return keep
+
+
+def _negation(f):
+  """Negate a filter.
+
+  Args:
+    f: filter function to invert
+
+  Returns:
+    A filter function that returns the negation of f.
+  """
+  def keep(paths):
+    l = set(paths)
+    r = set(f(paths))
+    return sorted(list(l-r))
+  return keep
+
+
+def _get_paths(base_dir, parser):
+  """Gets a list of Paths in a given directory.
 
-# Include attrs that start with single underscore.
-gc.__all__ = [s for s in dir(gc) if not s.startswith('__')]
+  Args:
+    base_dir: directory.
+    parser: a function which gets the raw Path and can augment it with
+      information such as the export_version, or ignore the path by returning
+      None.  An example parser may extract the export version from a path
+      such as "/tmp/exports/100" an another may extract from a full file
+      name such as "/tmp/checkpoint-99.out".
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.gc import *
+  Returns:
+    A list of Paths contained in the base directory with the parsing function
+    applied.
+    By default the following fields are populated,
+      - Path.path
+    The parsing function is responsible for populating,
+      - Path.export_version
+  """
+  raw_paths = gfile.ListDirectory(base_dir)
+  paths = []
+  for r in raw_paths:
+    # ListDirectory() return paths with "/" at the last if base_dir was GCS URL
+    r = compat.as_str_any(r)
+    if r[-1] == '/':
+      r = r[0:len(r)-1]
+    p = parser(Path(os.path.join(compat.as_str_any(base_dir), r), None))
+    if p:
+      paths.append(p)
+  return sorted(paths)
diff --git a/tensorflow/python/estimator/gc_test.py b/tensorflow/python/estimator/gc_test.py
new file mode 100644
index 0000000000..53c3d4ca2a
--- /dev/null
+++ b/tensorflow/python/estimator/gc_test.py
@@ -0,0 +1,156 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for garbage collection utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import re
+
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.estimator import gc
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+def _create_parser(base_dir):
+  # create a simple parser that pulls the export_version from the directory.
+  def parser(path):
+    # Modify the path object for RegEx match for Windows Paths
+    if os.name == "nt":
+      match = re.match(
+          "^" + compat.as_str_any(base_dir).replace("\\", "/") + "/(\\d+)$",
+          compat.as_str_any(path.path).replace("\\", "/"))
+    else:
+      match = re.match("^" + compat.as_str_any(base_dir) + "/(\\d+)$",
+                       compat.as_str_any(path.path))
+    if not match:
+      return None
+    return path._replace(export_version=int(match.group(1)))
+
+  return parser
+
+
+class GcTest(test_util.TensorFlowTestCase):
+
+  def testLargestExportVersions(self):
+    paths = [gc.Path("/foo", 8), gc.Path("/foo", 9), gc.Path("/foo", 10)]
+    newest = gc._largest_export_versions(2)
+    n = newest(paths)
+    self.assertEqual(n, [gc.Path("/foo", 9), gc.Path("/foo", 10)])
+
+  def testLargestExportVersionsDoesNotDeleteZeroFolder(self):
+    paths = [gc.Path("/foo", 0), gc.Path("/foo", 3)]
+    newest = gc._largest_export_versions(2)
+    n = newest(paths)
+    self.assertEqual(n, [gc.Path("/foo", 0), gc.Path("/foo", 3)])
+
+  def testModExportVersion(self):
+    paths = [
+        gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6),
+        gc.Path("/foo", 9)
+    ]
+    mod = gc._mod_export_version(2)
+    self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 6)])
+    mod = gc._mod_export_version(3)
+    self.assertEqual(mod(paths), [gc.Path("/foo", 6), gc.Path("/foo", 9)])
+
+  def testOneOfEveryNExportVersions(self):
+    paths = [
+        gc.Path("/foo", 0), gc.Path("/foo", 1), gc.Path("/foo", 3),
+        gc.Path("/foo", 5), gc.Path("/foo", 6), gc.Path("/foo", 7),
+        gc.Path("/foo", 8), gc.Path("/foo", 33)
+    ]
+    one_of = gc._one_of_every_n_export_versions(3)
+    self.assertEqual(
+        one_of(paths), [
+            gc.Path("/foo", 3), gc.Path("/foo", 6), gc.Path("/foo", 8),
+            gc.Path("/foo", 33)
+        ])
+
+  def testOneOfEveryNExportVersionsZero(self):
+    # Zero is a special case since it gets rolled into the first interval.
+    # Test that here.
+    paths = [gc.Path("/foo", 0), gc.Path("/foo", 4), gc.Path("/foo", 5)]
+    one_of = gc._one_of_every_n_export_versions(3)
+    self.assertEqual(one_of(paths), [gc.Path("/foo", 0), gc.Path("/foo", 5)])
+
+  def testUnion(self):
+    paths = []
+    for i in xrange(10):
+      paths.append(gc.Path("/foo", i))
+    f = gc._union(gc._largest_export_versions(3), gc._mod_export_version(3))
+    self.assertEqual(
+        f(paths), [
+            gc.Path("/foo", 0), gc.Path("/foo", 3), gc.Path("/foo", 6),
+            gc.Path("/foo", 7), gc.Path("/foo", 8), gc.Path("/foo", 9)
+        ])
+
+  def testNegation(self):
+    paths = [
+        gc.Path("/foo", 4), gc.Path("/foo", 5), gc.Path("/foo", 6),
+        gc.Path("/foo", 9)
+    ]
+    mod = gc._negation(gc._mod_export_version(2))
+    self.assertEqual(mod(paths), [gc.Path("/foo", 5), gc.Path("/foo", 9)])
+    mod = gc._negation(gc._mod_export_version(3))
+    self.assertEqual(mod(paths), [gc.Path("/foo", 4), gc.Path("/foo", 5)])
+
+  def testPathsWithParse(self):
+    base_dir = os.path.join(test.get_temp_dir(), "paths_parse")
+    self.assertFalse(gfile.Exists(base_dir))
+    for p in xrange(3):
+      gfile.MakeDirs(os.path.join(base_dir, "%d" % p))
+    # add a base_directory to ignore
+    gfile.MakeDirs(os.path.join(base_dir, "ignore"))
+
+    self.assertEqual(
+        gc._get_paths(base_dir, _create_parser(base_dir)),
+        [
+            gc.Path(os.path.join(base_dir, "0"), 0),
+            gc.Path(os.path.join(base_dir, "1"), 1),
+            gc.Path(os.path.join(base_dir, "2"), 2)
+        ])
+
+  def testMixedStrTypes(self):
+    temp_dir = compat.as_bytes(test.get_temp_dir())
+
+    for sub_dir in ["str", b"bytes", u"unicode"]:
+      base_dir = os.path.join(
+          (temp_dir if isinstance(sub_dir, bytes) else temp_dir.decode()),
+          sub_dir)
+      self.assertFalse(gfile.Exists(base_dir))
+      gfile.MakeDirs(os.path.join(compat.as_str_any(base_dir), "42"))
+      gc._get_paths(base_dir, _create_parser(base_dir))
+
+  def testGcsDirWithSeparator(self):
+    base_dir = "gs://bucket/foo"
+    with test.mock.patch.object(gfile, "ListDirectory") as mock_list_directory:
+      # gfile.ListDirectory returns directory names with separator '/'
+      mock_list_directory.return_value = ["0/", "1/"]
+      self.assertEqual(
+          gc._get_paths(base_dir, _create_parser(base_dir)),
+          [
+              gc.Path(os.path.join(base_dir, "0"), 0),
+              gc.Path(os.path.join(base_dir, "1"), 1)
+          ])
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/inputs/__init__.py b/tensorflow/python/estimator/inputs/__init__.py
index b35091b11f..e69de29bb2 100644
--- a/tensorflow/python/estimator/inputs/__init__.py
+++ b/tensorflow/python/estimator/inputs/__init__.py
@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""inputs python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import inputs
-
-# Include attrs that start with single underscore.
-inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')]
-
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs import *
diff --git a/tensorflow/python/estimator/inputs/inputs.py b/tensorflow/python/estimator/inputs/inputs.py
index 6084cee72b..6be168ee08 100644
--- a/tensorflow/python/estimator/inputs/inputs.py
+++ b/tensorflow/python/estimator/inputs/inputs.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""inputs python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utility methods to create simple input_fns."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.inputs import inputs
-
-# Include attrs that start with single underscore.
-inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')]
+# pylint: disable=unused-import,line-too-long
+from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn
+from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.inputs import *
+# pylint: enable=unused-import,line-too-long
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index d408d6384d..a6cefdece2 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,214 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""numpy_io python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Methods to allow dict of numpy arrays."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.inputs import numpy_io
+import collections
+
+import numpy as np
+from six import string_types
+
+from tensorflow.python.estimator.inputs.queues import feeding_functions
+from tensorflow.python.util.tf_export import estimator_export
+
+# Key name to pack the target into dict of `features`. See
+# `_get_unique_target_key` for details.
+_TARGET_KEY = '__target_key__'
+
+
+def _get_unique_target_key(features):
+  """Returns a key not existed in the input dict `features`.
+
+  Caller of `input_fn` usually provides `features` (dict of numpy arrays) and
+  `target`, but the underlying feeding module expects a single dict of numpy
+  arrays as input. So, the `target` needs to be packed into the `features`
+  temporarily and unpacked after calling the feeding function. Toward this goal,
+  this function returns a key not existed in the `features` to pack the
+  `target`.
+
+  Args:
+    features: OrderedDict of numpy arrays
+
+  Returns:
+    A unique key that can be used to insert the subsequent target into
+      features dict.
+  """
+  target_key = _TARGET_KEY
+  while target_key in features:
+    target_key += '_n'
+  return target_key
+
+
+def _validate_and_convert_features(x):
+  """Type check input data and make a shadow copy as an ordered dict.
+
+  Args:
+    x: numpy array object or dict of numpy array objects. If an array,
+      the array will be treated as a single feature.
+
+  Returns:
+    OrderedDict copy of x.
+
+  Raises:
+    ValueError: if x is empty
+    TypeError: if x is an unknown type.
+  """
+  if isinstance(x, dict):
+    if not x:
+      raise ValueError('x cannot be an empty dict')
+    # Make a shadow copy and also ensure the order of iteration is consistent.
+    ordered_dict_data = collections.OrderedDict(
+        sorted(x.items(), key=lambda t: t[0]))
+  elif isinstance(x, np.ndarray):
+    if x.size == 0:
+      raise ValueError('x cannot be an empty array')
+
+    # Make a shadow copy and convert to dict to align with dict processing.
+    ordered_dict_data = collections.OrderedDict({'__direct_np_input__': x})
+  else:
+    x_type = type(x).__name__
+    raise TypeError('x must be a dict or array; got {}'.format(x_type))
+
+  return ordered_dict_data
+
+
+@estimator_export('estimator.inputs.numpy_input_fn')
+def numpy_input_fn(x,
+                   y=None,
+                   batch_size=128,
+                   num_epochs=1,
+                   shuffle=None,
+                   queue_capacity=1000,
+                   num_threads=1):
+  """Returns input function that would feed dict of numpy arrays into the model.
+
+  This returns a function outputting `features` and `targets` based on the dict
+  of numpy arrays. The dict `features` has the same keys as the `x`. The dict
+  `targets` has the same keys as the `y` if `y` is a dict.
+
+  Example:
+
+  ```python
+  age = np.arange(4) * 1.0
+  height = np.arange(32, 36)
+  x = {'age': age, 'height': height}
+  y = np.arange(-32, -28)
+
+  with tf.Session() as session:
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+  ```
+
+  Args:
+    x: numpy array object or dict of numpy array objects. If an array,
+      the array will be treated as a single feature.
+    y: numpy array object or dict of numpy array object. `None` if absent.
+    batch_size: Integer, size of batches to return.
+    num_epochs: Integer, number of epochs to iterate over data. If `None` will
+      run forever.
+    shuffle: Boolean, if True shuffles the queue. Avoid shuffle at prediction
+      time.
+    queue_capacity: Integer, size of queue to accumulate.
+    num_threads: Integer, number of threads used for reading and enqueueing. In
+      order to have predicted and repeatable order of reading and enqueueing,
+      such as in prediction and evaluation mode, `num_threads` should be 1.
+
+  Returns:
+    Function, that has signature of ()->(dict of `features`, `targets`)
+
+  Raises:
+    ValueError: if the shape of `y` mismatches the shape of values in `x` (i.e.,
+      values in `x` have same shape).
+    ValueError: if duplicate keys are in both `x` and `y` when `y` is a dict.
+    ValueError: if x or y is an empty dict.
+    TypeError: `x` is not a dict or array.
+    ValueError: if 'shuffle' is not provided or a bool.
+  """
+  if not isinstance(shuffle, bool):
+    raise ValueError('shuffle must be provided and explicitly set as boolean '
+                     '(it is recommended to set it as True for training); '
+                     'got {}'.format(shuffle))
+
+  def input_fn():
+    """Numpy input function."""
+
+    # Note that `x` should not be used after conversion to ordered_dict_data,
+    # as type could be either dict or array.
+    ordered_dict_data = _validate_and_convert_features(x)
+
+    # Deep copy keys which is a view in python 3
+    feature_keys = list(ordered_dict_data.keys())
+
+    if y is None:
+      target_keys = None
+    elif isinstance(y, dict):
+      if not y:
+        raise ValueError('y cannot be empty dict, use None instead.')
+
+      ordered_dict_y = collections.OrderedDict(
+          sorted(y.items(), key=lambda t: t[0]))
+      target_keys = list(ordered_dict_y.keys())
+
+      duplicate_keys = set(feature_keys).intersection(set(target_keys))
+      if duplicate_keys:
+        raise ValueError('{} duplicate keys are found in both x and y: '
+                         '{}'.format(len(duplicate_keys), duplicate_keys))
+
+      ordered_dict_data.update(ordered_dict_y)
+    else:
+      target_keys = _get_unique_target_key(ordered_dict_data)
+      ordered_dict_data[target_keys] = y
+
+    if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
+      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
+
+      if target_keys is None:
+        shape_of_y = None
+      elif isinstance(target_keys, string_types):
+        shape_of_y = y.shape
+      else:
+        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
+
+      raise ValueError('Length of tensors in x and y is mismatched. All '
+                       'elements in x and y must have the same length.\n'
+                       'Shapes in x: {}\n'
+                       'Shapes in y: {}\n'.format(shape_dict_of_x, shape_of_y))
+
+    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
+        ordered_dict_data,
+        queue_capacity,
+        shuffle=shuffle,
+        num_threads=num_threads,
+        enqueue_size=batch_size,
+        num_epochs=num_epochs)
+
+    batch = (
+        queue.dequeue_many(batch_size)
+        if num_epochs is None else queue.dequeue_up_to(batch_size))
+
+    # Remove the first `Tensor` in `batch`, which is the row number.
+    if batch:
+      batch.pop(0)
+
+    if isinstance(x, np.ndarray):
+      # Return as the same type as original array.
+      features = batch[0]
+    else:
+      # Return as the original dict type
+      features = dict(zip(feature_keys, batch[:len(feature_keys)]))
 
-# Include attrs that start with single underscore.
-numpy_io.__all__ = [s for s in dir(numpy_io) if not s.startswith('__')]
+    if target_keys is None:
+      # TODO(martinwicke), return consistent result
+      return features
+    elif isinstance(target_keys, string_types):
+      target = batch[-1]
+      return features, target
+    else:
+      target = dict(zip(target_keys, batch[-len(target_keys):]))
+      return features, target
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.numpy_io import *
+  return input_fn
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
new file mode 100644
index 0000000000..632908415f
--- /dev/null
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -0,0 +1,620 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for numpy_io."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.feature_column import feature_column_lib as fc
+from tensorflow.python.feature_column.feature_column import _LinearModel
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+from tensorflow.python.training import coordinator
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import queue_runner_impl
+
+
+class NumpyIoTest(test.TestCase):
+
+  def testNumpyInputFn(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -28)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1])
+      self.assertAllEqual(res[0]['b'], [32, 33])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      session.run([features, target])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self):
+    a = np.arange(2) * 1.0
+    b = np.arange(32, 34)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -30)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=128, shuffle=False, num_epochs=2)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1, 0, 1])
+      self.assertAllEqual(res[0]['b'], [32, 33, 32, 33])
+      self.assertAllEqual(res[1], [-32, -31, -32, -31])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithZeroEpochs(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -28)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=0)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
+    batch_size = 2
+    a = np.arange(5) * 1.0
+    b = np.arange(32, 37)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -27)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1])
+      self.assertAllEqual(res[0]['b'], [32, 33])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [2, 3])
+      self.assertAllEqual(res[0]['b'], [34, 35])
+      self.assertAllEqual(res[1], [-30, -29])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [4])
+      self.assertAllEqual(res[0]['b'], [36])
+      self.assertAllEqual(res[1], [-28])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithBatchSizeNotDividedByDataSizeAndMultipleEpochs(self):
+    batch_size = 2
+    a = np.arange(3) * 1.0
+    b = np.arange(32, 35)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -29)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=batch_size, shuffle=False, num_epochs=3)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1])
+      self.assertAllEqual(res[0]['b'], [32, 33])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [2, 0])
+      self.assertAllEqual(res[0]['b'], [34, 32])
+      self.assertAllEqual(res[1], [-30, -32])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [1, 2])
+      self.assertAllEqual(res[0]['b'], [33, 34])
+      self.assertAllEqual(res[1], [-31, -30])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1])
+      self.assertAllEqual(res[0]['b'], [32, 33])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [2])
+      self.assertAllEqual(res[0]['b'], [34])
+      self.assertAllEqual(res[1], [-30])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithBatchSizeLargerThanDataSize(self):
+    batch_size = 10
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -28)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [0, 1, 2, 3])
+      self.assertAllEqual(res[0]['b'], [32, 33, 34, 35])
+      self.assertAllEqual(res[1], [-32, -31, -30, -29])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithDifferentDimensionsOfFeatures(self):
+    a = np.array([[1, 2], [3, 4]])
+    b = np.array([5, 6])
+    x = {'a': a, 'b': b}
+    y = np.arange(-32, -30)
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features, target = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      res = session.run([features, target])
+      self.assertAllEqual(res[0]['a'], [[1, 2], [3, 4]])
+      self.assertAllEqual(res[0]['b'], [5, 6])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithXAsNonDict(self):
+    x = list(range(32, 36))
+    y = np.arange(4)
+    with self.cached_session():
+      with self.assertRaisesRegexp(TypeError, 'x must be a dict or array'):
+        failing_input_fn = numpy_io.numpy_input_fn(
+            x, y, batch_size=2, shuffle=False, num_epochs=1)
+        failing_input_fn()
+
+  def testNumpyInputFnWithXIsEmptyDict(self):
+    x = {}
+    y = np.arange(4)
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithXIsEmptyArray(self):
+    x = np.array([[], []])
+    y = np.arange(4)
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithYIsNone(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = None
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      feature = session.run(features_tensor)
+      self.assertEqual(len(feature), 2)
+      self.assertAllEqual(feature['a'], [0, 1])
+      self.assertAllEqual(feature['b'], [32, 33])
+
+      session.run([features_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithNonBoolShuffle(self):
+    x = np.arange(32, 36)
+    y = np.arange(4)
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError,
+                                   'shuffle must be provided and explicitly '
+                                   'set as boolean'):
+        # Default shuffle is None.
+        numpy_io.numpy_input_fn(x, y)
+
+  def testNumpyInputFnWithTargetKeyAlreadyInX(self):
+    array = np.arange(32, 36)
+    x = {'__target_key__': array}
+    y = np.arange(4)
+
+    with self.cached_session():
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      input_fn()
+      self.assertAllEqual(x['__target_key__'], array)
+      # The input x should not be mutated.
+      self.assertItemsEqual(x.keys(), ['__target_key__'])
+
+  def testNumpyInputFnWithMismatchLengthOfInputs(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    x_mismatch_length = {'a': np.arange(1), 'b': b}
+    y_longer_length = np.arange(10)
+
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'Length of tensors in x and y is mismatched.'):
+        failing_input_fn = numpy_io.numpy_input_fn(
+            x, y_longer_length, batch_size=2, shuffle=False, num_epochs=1)
+        failing_input_fn()
+
+      with self.assertRaisesRegexp(
+          ValueError, 'Length of tensors in x and y is mismatched.'):
+        failing_input_fn = numpy_io.numpy_input_fn(
+            x=x_mismatch_length,
+            y=None,
+            batch_size=2,
+            shuffle=False,
+            num_epochs=1)
+        failing_input_fn()
+
+  def testNumpyInputFnWithYAsDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}
+
+    with self.cached_session() as session:
+      input_fn = numpy_io.numpy_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+      features_tensor, targets_tensor = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertEqual(len(targets), 2)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+      self.assertAllEqual(targets['y2'], [32, 31])
+
+      session.run([features_tensor, targets_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor, targets_tensor])
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testNumpyInputFnWithYIsEmptyDict(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {}
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithDuplicateKeysInXAndY(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x = {'a': a, 'b': b}
+    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
+    with self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, '2 duplicate keys are found in both x and y'):
+        failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
+        failing_input_fn()
+
+  def testNumpyInputFnWithXIsArray(self):
+    x = np.arange(4) * 1.0
+    y = np.arange(-32, -28)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features, target = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      res = session.run([features, target])
+      self.assertAllEqual(res[0], [0, 1])
+      self.assertAllEqual(res[1], [-32, -31])
+
+      session.run([features, target])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+  def testNumpyInputFnWithXIsNDArray(self):
+    x = np.arange(16).reshape(4, 2, 2) * 1.0
+    y = np.arange(-48, -32).reshape(4, 2, 2)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features, target = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      res = session.run([features, target])
+      self.assertAllEqual(res[0], [[[0, 1], [2, 3]], [[4, 5], [6, 7]]])
+      self.assertAllEqual(
+          res[1], [[[-48, -47], [-46, -45]], [[-44, -43], [-42, -41]]])
+
+      session.run([features, target])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features, target])
+
+  def testNumpyInputFnWithXIsArrayYIsDict(self):
+    x = np.arange(4) * 1.0
+    y = {'y1': np.arange(-32, -28)}
+
+    input_fn = numpy_io.numpy_input_fn(
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_tensor, targets_tensor = input_fn()
+
+    with monitored_session.MonitoredSession() as session:
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features, [0, 1])
+      self.assertEqual(len(targets), 1)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+
+      session.run([features_tensor, targets_tensor])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run([features_tensor, targets_tensor])
+
+  def testArrayAndDictGiveSameOutput(self):
+    a = np.arange(4) * 1.0
+    b = np.arange(32, 36)
+    x_arr = np.vstack((a, b))
+    x_dict = {'feature1': x_arr}
+    y = np.arange(-48, -40).reshape(2, 4)
+
+    input_fn_arr = numpy_io.numpy_input_fn(
+        x_arr, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_arr, targets_arr = input_fn_arr()
+
+    input_fn_dict = numpy_io.numpy_input_fn(
+        x_dict, y, batch_size=2, shuffle=False, num_epochs=1)
+    features_dict, targets_dict = input_fn_dict()
+
+    with monitored_session.MonitoredSession() as session:
+      res_arr, res_dict = session.run([
+          (features_arr, targets_arr), (features_dict, targets_dict)])
+
+      self.assertAllEqual(res_arr[0], res_dict[0]['feature1'])
+      self.assertAllEqual(res_arr[1], res_dict[1])
+
+
+class FeatureColumnIntegrationTest(test.TestCase):
+
+  def _initialized_session(self, config=None):
+    sess = session_lib.Session(config=config)
+    sess.run(variables_lib.global_variables_initializer())
+    sess.run(lookup_ops.tables_initializer())
+    return sess
+
+  def _get_linear_model_bias(self, name='linear_model'):
+    with variable_scope.variable_scope(name, reuse=True):
+      return variable_scope.get_variable('bias_weights')
+
+  def _get_linear_model_column_var(self, column, name='linear_model'):
+    return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
+                              name + '/' + column.name)[0]
+
+  def _get_keras_linear_model_predictions(
+      self,
+      features,
+      feature_columns,
+      units=1,
+      sparse_combiner='sum',
+      weight_collections=None,
+      trainable=True,
+      cols_to_vars=None):
+    keras_linear_model = _LinearModel(
+        feature_columns,
+        units,
+        sparse_combiner,
+        weight_collections,
+        trainable,
+        name='linear_model')
+    retval = keras_linear_model(features)  # pylint: disable=not-callable
+    if cols_to_vars is not None:
+      cols_to_vars.update(keras_linear_model.cols_to_vars())
+    return retval
+
+  def test_linear_model_numpy_input_fn(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'price': np.array([-1., 2., 13., 104.]),
+            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
+        },
+        batch_size=2,
+        shuffle=False)
+    features = input_fn()
+    net = fc.linear_model(features, [price_buckets, body_style])
+    # self.assertEqual(1 + 3 + 5, net.shape[1])
+    with self._initialized_session() as sess:
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+
+      bias = self._get_linear_model_bias()
+      price_buckets_var = self._get_linear_model_column_var(price_buckets)
+      body_style_var = self._get_linear_model_column_var(body_style)
+
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def test_linear_model_impl_numpy_input_fn(self):
+    price = fc.numeric_column('price')
+    price_buckets = fc.bucketized_column(
+        price, boundaries=[
+            0.,
+            10.,
+            100.,
+        ])
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'price': np.array([-1., 2., 13., 104.]),
+            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
+        },
+        batch_size=2,
+        shuffle=False)
+    features = input_fn()
+    net = self._get_keras_linear_model_predictions(
+        features, [price_buckets, body_style])
+    # self.assertEqual(1 + 3 + 5, net.shape[1])
+    with self._initialized_session() as sess:
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+
+      bias = self._get_linear_model_bias()
+      price_buckets_var = self._get_linear_model_column_var(price_buckets)
+      body_style_var = self._get_linear_model_column_var(body_style)
+
+      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
+      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
+      sess.run(bias.assign([5.]))
+
+      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def test_functional_input_layer_with_numpy_input_fn(self):
+    embedding_values = (
+        (1., 2., 3., 4., 5.),  # id 0
+        (6., 7., 8., 9., 10.),  # id 1
+        (11., 12., 13., 14., 15.)  # id 2
+    )
+    def _initializer(shape, dtype, partition_info):
+      del shape, dtype, partition_info
+      return embedding_values
+
+    # price has 1 dimension in input_layer
+    price = fc.numeric_column('price')
+    body_style = fc.categorical_column_with_vocabulary_list(
+        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
+    # one_hot_body_style has 3 dims in input_layer.
+    one_hot_body_style = fc.indicator_column(body_style)
+    # embedded_body_style has 5 dims in input_layer.
+    embedded_body_style = fc.embedding_column(body_style, dimension=5,
+                                              initializer=_initializer)
+
+    input_fn = numpy_io.numpy_input_fn(
+        x={
+            'price': np.array([11., 12., 13., 14.]),
+            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
+        },
+        batch_size=2,
+        shuffle=False)
+    features = input_fn()
+    net = fc.input_layer(features,
+                         [price, one_hot_body_style, embedded_body_style])
+    self.assertEqual(1 + 3 + 5, net.shape[1])
+    with self._initialized_session() as sess:
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
+
+      # Each row is formed by concatenating `embedded_body_style`,
+      # `one_hot_body_style`, and `price` in order.
+      self.assertAllEqual(
+          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
+           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
+          sess.run(net))
+
+      coord.request_stop()
+      coord.join(threads)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py
index 50bff2f717..616bcb410f 100644
--- a/tensorflow/python/estimator/inputs/pandas_io.py
+++ b/tensorflow/python/estimator/inputs/pandas_io.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,146 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""pandas_io python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Methods to allow pandas.DataFrame."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import six
+import uuid
 
-from tensorflow_estimator.python.estimator.inputs import pandas_io
+import numpy as np
+from tensorflow.python.estimator.inputs.queues import feeding_functions
+from tensorflow.python.util.tf_export import estimator_export
 
-# Include attrs that start with single underscore.
-pandas_io.__all__ = [s for s in dir(pandas_io) if not s.startswith('__')]
+try:
+  # pylint: disable=g-import-not-at-top
+  # pylint: disable=unused-import
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.pandas_io import *
+
+def _get_unique_target_key(features, target_column_name):
+  """Returns a key that does not exist in the input DataFrame `features`.
+
+  Args:
+    features: DataFrame
+    target_column_name: Name of the target column as a `str`
+
+  Returns:
+    A unique key that can be used to insert the target into
+      features.
+  """
+  if target_column_name in features:
+    target_column_name += '_' + str(uuid.uuid4())
+  return target_column_name
+
+
+@estimator_export('estimator.inputs.pandas_input_fn')
+def pandas_input_fn(x,
+                    y=None,
+                    batch_size=128,
+                    num_epochs=1,
+                    shuffle=None,
+                    queue_capacity=1000,
+                    num_threads=1,
+                    target_column='target'):
+  """Returns input function that would feed Pandas DataFrame into the model.
+
+  Note: `y`'s index must match `x`'s index.
+
+  Args:
+    x: pandas `DataFrame` object.
+    y: pandas `Series` object or `DataFrame`. `None` if absent.
+    batch_size: int, size of batches to return.
+    num_epochs: int, number of epochs to iterate over data. If not `None`,
+      read attempts that would exceed this value will raise `OutOfRangeError`.
+    shuffle: bool, whether to read the records in random order.
+    queue_capacity: int, size of the read queue. If `None`, it will be set
+      roughly to the size of `x`.
+    num_threads: Integer, number of threads used for reading and enqueueing. In
+      order to have predicted and repeatable order of reading and enqueueing,
+      such as in prediction and evaluation mode, `num_threads` should be 1.
+    target_column: str, name to give the target column `y`. This parameter
+      is not used when `y` is a `DataFrame`.
+
+  Returns:
+    Function, that has signature of ()->(dict of `features`, `target`)
+
+  Raises:
+    ValueError: if `x` already contains a column with the same name as `y`, or
+      if the indexes of `x` and `y` don't match.
+    ValueError: if 'shuffle' is not provided or a bool.
+  """
+  if not HAS_PANDAS:
+    raise TypeError(
+        'pandas_input_fn should not be called without pandas installed')
+
+  if not isinstance(shuffle, bool):
+    raise ValueError('shuffle must be provided and explicitly set as boolean '
+                     '(it is recommended to set it as True for training); '
+                     'got {}'.format(shuffle))
+
+  if not isinstance(target_column, six.string_types):
+    raise TypeError('target_column must be a string type')
+
+  x = x.copy()
+  if y is not None:
+    if target_column in x:
+      raise ValueError(
+          'Cannot use name %s for target column: DataFrame already has a '
+          'column with that name: %s' % (target_column, x.columns))
+    if not np.array_equal(x.index, y.index):
+      raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n'
+                       'Index for y: %s\n' % (x.index, y.index))
+    if isinstance(y, pd.DataFrame):
+      y_columns = [(column, _get_unique_target_key(x, column))
+                   for column in list(y)]
+      target_column = [v for _, v in y_columns]
+      x[target_column] = y
+    else:
+      x[target_column] = y
+
+  # TODO(mdan): These are memory copies. We probably don't need 4x slack space.
+  # The sizes below are consistent with what I've seen elsewhere.
+  if queue_capacity is None:
+    if shuffle:
+      queue_capacity = 4 * len(x)
+    else:
+      queue_capacity = len(x)
+  min_after_dequeue = max(queue_capacity / 4, 1)
+
+  def input_fn():
+    """Pandas input function."""
+    queue = feeding_functions._enqueue_data(  # pylint: disable=protected-access
+        x,
+        queue_capacity,
+        shuffle=shuffle,
+        min_after_dequeue=min_after_dequeue,
+        num_threads=num_threads,
+        enqueue_size=batch_size,
+        num_epochs=num_epochs)
+    if num_epochs is None:
+      features = queue.dequeue_many(batch_size)
+    else:
+      features = queue.dequeue_up_to(batch_size)
+    assert len(features) == len(x.columns) + 1, ('Features should have one '
+                                                 'extra element for the index.')
+    features = features[1:]
+    features = dict(zip(list(x.columns), features))
+    if y is not None:
+      if isinstance(target_column, list):
+        keys = [k for k, _ in y_columns]
+        values = [features.pop(column) for column in target_column]
+        target = {k: v for k, v in zip(keys, values)}
+      else:
+        target = features.pop(target_column)
+      return features, target
+    return features
+  return input_fn
diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py
new file mode 100644
index 0000000000..9e69fc72dc
--- /dev/null
+++ b/tensorflow/python/estimator/inputs/pandas_io_test.py
@@ -0,0 +1,320 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for pandas_io."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.estimator.inputs import pandas_io
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import test
+from tensorflow.python.training import coordinator
+from tensorflow.python.training import queue_runner_impl
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+class PandasIoTest(test.TestCase):
+
+  def makeTestDataFrame(self):
+    index = np.arange(100, 104)
+    a = np.arange(4)
+    b = np.arange(32, 36)
+    x = pd.DataFrame({'a': a, 'b': b}, index=index)
+    y = pd.Series(np.arange(-32, -28), index=index)
+    return x, y
+
+  def makeTestDataFrameWithYAsDataFrame(self):
+    index = np.arange(100, 104)
+    a = np.arange(4)
+    b = np.arange(32, 36)
+    a_label = np.arange(10, 14)
+    b_label = np.arange(50, 54)
+    x = pd.DataFrame({'a': a, 'b': b}, index=index)
+    y = pd.DataFrame({'a_target': a_label, 'b_target': b_label}, index=index)
+    return x, y
+
+  def callInputFnOnce(self, input_fn, session):
+    results = input_fn()
+    coord = coordinator.Coordinator()
+    threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+    result_values = session.run(results)
+    coord.request_stop()
+    coord.join(threads)
+    return result_values
+
+  def testPandasInputFn_IndexMismatch(self):
+    if not HAS_PANDAS:
+      return
+    x, _ = self.makeTestDataFrame()
+    y_noindex = pd.Series(np.arange(-32, -28))
+    with self.assertRaises(ValueError):
+      pandas_io.pandas_input_fn(
+          x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
+
+  def testPandasInputFn_RaisesWhenTargetColumnIsAList(self):
+    if not HAS_PANDAS:
+      return
+
+    x, y = self.makeTestDataFrame()
+
+    with self.assertRaisesRegexp(TypeError,
+                                 'target_column must be a string type'):
+      pandas_io.pandas_input_fn(x, y, batch_size=2,
+                                shuffle=False,
+                                num_epochs=1,
+                                target_column=['one', 'two'])
+
+  def testPandasInputFn_NonBoolShuffle(self):
+    if not HAS_PANDAS:
+      return
+    x, _ = self.makeTestDataFrame()
+    y_noindex = pd.Series(np.arange(-32, -28))
+    with self.assertRaisesRegexp(ValueError,
+                                 'shuffle must be provided and explicitly '
+                                 'set as boolean'):
+      # Default shuffle is None
+      pandas_io.pandas_input_fn(x, y_noindex)
+
+  def testPandasInputFn_ProducesExpectedOutputs(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      features, target = self.callInputFnOnce(input_fn, session)
+
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertAllEqual(target, [-32, -31])
+
+  def testPandasInputFnWhenYIsDataFrame_ProducesExpectedOutput(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrameWithYAsDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      features, targets = self.callInputFnOnce(input_fn, session)
+
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertAllEqual(targets['a_target'], [10, 11])
+      self.assertAllEqual(targets['b_target'], [50, 51])
+
+  def testPandasInputFnYIsDataFrame_HandlesOverlappingColumns(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrameWithYAsDataFrame()
+      y = y.rename(columns={'a_target': 'a', 'b_target': 'b'})
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      features, targets = self.callInputFnOnce(input_fn, session)
+
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertAllEqual(targets['a'], [10, 11])
+      self.assertAllEqual(targets['b'], [50, 51])
+
+  def testPandasInputFnYIsDataFrame_HandlesOverlappingColumnsInTargets(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrameWithYAsDataFrame()
+      y = y.rename(columns={'a_target': 'a', 'b_target': 'a_n'})
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      features, targets = self.callInputFnOnce(input_fn, session)
+
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertAllEqual(targets['a'], [10, 11])
+      self.assertAllEqual(targets['a_n'], [50, 51])
+
+  def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      index = np.arange(100, 102)
+      a = np.arange(2)
+      b = np.arange(32, 34)
+      x = pd.DataFrame({'a': a, 'b': b}, index=index)
+      y = pd.Series(np.arange(-32, -30), index=index)
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=128, shuffle=False, num_epochs=2)
+
+      results = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      features, target = session.run(results)
+      self.assertAllEqual(features['a'], [0, 1, 0, 1])
+      self.assertAllEqual(features['b'], [32, 33, 32, 33])
+      self.assertAllEqual(target, [-32, -31, -32, -31])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run(results)
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      index = np.arange(100, 105)
+      a = np.arange(5)
+      b = np.arange(32, 37)
+      x = pd.DataFrame({'a': a, 'b': b}, index=index)
+      y = pd.Series(np.arange(-32, -27), index=index)
+
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      results = input_fn()
+
+      coord = coordinator.Coordinator()
+      threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+
+      features, target = session.run(results)
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertAllEqual(target, [-32, -31])
+
+      features, target = session.run(results)
+      self.assertAllEqual(features['a'], [2, 3])
+      self.assertAllEqual(features['b'], [34, 35])
+      self.assertAllEqual(target, [-30, -29])
+
+      features, target = session.run(results)
+      self.assertAllEqual(features['a'], [4])
+      self.assertAllEqual(features['b'], [36])
+      self.assertAllEqual(target, [-28])
+
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run(results)
+
+      coord.request_stop()
+      coord.join(threads)
+
+  def testPandasInputFn_OnlyX(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, _ = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y=None, batch_size=2, shuffle=False, num_epochs=1)
+
+      features = self.callInputFnOnce(input_fn, session)
+
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+
+  def testPandasInputFn_ExcludesIndex(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)
+
+      features, _ = self.callInputFnOnce(input_fn, session)
+
+      self.assertFalse('index' in features)
+
+  def assertInputsCallableNTimes(self, input_fn, session, n):
+    inputs = input_fn()
+    coord = coordinator.Coordinator()
+    threads = queue_runner_impl.start_queue_runners(session, coord=coord)
+    for _ in range(n):
+      session.run(inputs)
+    with self.assertRaises(errors.OutOfRangeError):
+      session.run(inputs)
+    coord.request_stop()
+    coord.join(threads)
+
+  def testPandasInputFn_RespectsEpoch_NoShuffle(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=4, shuffle=False, num_epochs=1)
+
+      self.assertInputsCallableNTimes(input_fn, session, 1)
+
+  def testPandasInputFn_RespectsEpoch_WithShuffle(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=4, shuffle=True, num_epochs=1)
+
+      self.assertInputsCallableNTimes(input_fn, session, 1)
+
+  def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self):
+    if not HAS_PANDAS:
+      return
+    with self.cached_session() as session:
+      x, y = self.makeTestDataFrame()
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2)
+
+      self.assertInputsCallableNTimes(input_fn, session, 4)
+
+  def testPandasInputFn_RespectsEpochUnevenBatches(self):
+    if not HAS_PANDAS:
+      return
+    x, y = self.makeTestDataFrame()
+    with self.cached_session() as session:
+      input_fn = pandas_io.pandas_input_fn(
+          x, y, batch_size=3, shuffle=False, num_epochs=1)
+
+      # Before the last batch, only one element of the epoch should remain.
+      self.assertInputsCallableNTimes(input_fn, session, 2)
+
+  def testPandasInputFn_Idempotent(self):
+    if not HAS_PANDAS:
+      return
+    x, y = self.makeTestDataFrame()
+    for _ in range(2):
+      pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=False, num_epochs=1)()
+    for _ in range(2):
+      pandas_io.pandas_input_fn(
+          x, y, batch_size=2, shuffle=True, num_epochs=1)()
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/inputs/queues/__init__.py b/tensorflow/python/estimator/inputs/queues/__init__.py
index 70b95b81c3..e69de29bb2 100644
--- a/tensorflow/python/estimator/inputs/queues/__init__.py
+++ b/tensorflow/python/estimator/inputs/queues/__init__.py
@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""queues python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator.inputs import queues
-
-# Include attrs that start with single underscore.
-queues.__all__ = [s for s in dir(queues) if not s.startswith('__')]
-
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.queues import *
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index d6b0231501..51a61adb21 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,502 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""feeding_functions python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Helper functions for enqueuing data from arrays and pandas `DataFrame`s."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.inputs.queues import feeding_functions
+import collections
+import random
+import types as tp
+import numpy as np
+import six
+
+from tensorflow.python.estimator.inputs.queues import feeding_queue_runner as fqr
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.summary import summary
+from tensorflow.python.training import queue_runner
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+def _fill_array(arr, seq, fillvalue=0):
+  """Recursively fills padded arr with elements from seq.
+
+  If length of seq is less than arr padded length, fillvalue used.
+  Args:
+    arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len].
+    seq: Non-padded list of data samples of shape
+      [batch_size, ..., padded_dim(None)]
+    fillvalue: Default fillvalue to use.
+  """
+  if arr.ndim == 1:
+    try:
+      len_ = len(seq)
+    except TypeError:
+      len_ = 0
+    arr[:len_] = seq
+    arr[len_:] = fillvalue
+  else:
+    for subarr, subseq in six.moves.zip_longest(arr, seq, fillvalue=()):
+      _fill_array(subarr, subseq, fillvalue)
+
+
+def _pad_if_needed(batch_key_item, fillvalue=0):
+  """ Returns padded batch.
+
+  Args:
+    batch_key_item: List of data samples of any type with shape
+      [batch_size, ..., padded_dim(None)].
+    fillvalue: Default fillvalue to use.
+
+  Returns:
+    Padded with zeros tensor of same type and shape
+      [batch_size, ..., max_padded_dim_len].
+
+  Raises:
+    ValueError if data samples have different shapes (except last padded dim).
+  """
+  shapes = [
+      seq.shape[:-1] if len(seq.shape) > 0 else -1 for seq in batch_key_item
+  ]
+  if not all(shapes[0] == x for x in shapes):
+    raise ValueError("Array shapes must match.")
+
+  last_length = [
+      seq.shape[-1] if len(seq.shape) > 0 else 0 for seq in batch_key_item
+  ]
+  if all([x == last_length[0] for x in last_length]):
+    return batch_key_item
+
+  batch_size = len(batch_key_item)
+  max_sequence_length = max(last_length)
+  result_batch = np.zeros(
+      shape=[batch_size] + list(shapes[0]) + [max_sequence_length],
+      dtype=batch_key_item[0].dtype)
+  _fill_array(result_batch, batch_key_item, fillvalue)
+  return result_batch
+
+
+def _get_integer_indices_for_next_batch(batch_indices_start, batch_size,
+                                        epoch_end, array_length, current_epoch,
+                                        total_epochs):
+  """Returns the integer indices for next batch.
+
+  If total epochs is not None and current epoch is the final epoch, the end
+  index of the next batch should not exceed the `epoch_end` (i.e., the final
+  batch might not have size `batch_size` to avoid overshooting the last epoch).
+
+  Args:
+    batch_indices_start: Integer, the index to start next batch.
+    batch_size: Integer, size of batches to return.
+    epoch_end: Integer, the end index of the epoch. The epoch could start from a
+      random position, so `epoch_end` provides the end index for that.
+    array_length: Integer, the length of the array.
+    current_epoch: Integer, the epoch number has been emitted.
+    total_epochs: Integer or `None`, the total number of epochs to emit. If
+      `None` will run forever.
+
+  Returns:
+    A tuple of a list with integer indices for next batch and `current_epoch`
+    value after the next batch.
+
+  Raises:
+    OutOfRangeError if `current_epoch` is not less than `total_epochs`.
+
+  """
+  if total_epochs is not None and current_epoch >= total_epochs:
+    raise errors.OutOfRangeError(None, None,
+                                 "Already emitted %s epochs." % current_epoch)
+
+  batch_indices_end = batch_indices_start + batch_size
+  batch_indices = [
+      j % array_length for j in range(batch_indices_start, batch_indices_end)
+  ]
+  epoch_end_indices = [i for i, x in enumerate(batch_indices) if x == epoch_end]
+  current_epoch += len(epoch_end_indices)
+
+  if total_epochs is None or current_epoch < total_epochs:
+    return (batch_indices, current_epoch)
+
+  # Now we might have emitted more data for expected epochs. Need to trim.
+  final_epoch_end_inclusive = epoch_end_indices[
+      -(current_epoch - total_epochs + 1)]
+  batch_indices = batch_indices[:final_epoch_end_inclusive + 1]
+
+  return (batch_indices, total_epochs)
+
+
+class _ArrayFeedFn(object):
+  """Creates feed dictionaries from numpy arrays."""
+
+  def __init__(self,
+               placeholders,
+               array,
+               batch_size,
+               random_start=False,
+               seed=None,
+               num_epochs=None):
+    if len(placeholders) != 2:
+      raise ValueError("_array_feed_fn expects 2 placeholders; got {}.".format(
+          len(placeholders)))
+    self._placeholders = placeholders
+    self._array = array
+    self._max = len(array)
+    self._batch_size = batch_size
+    self._num_epochs = num_epochs
+    self._epoch = 0
+    random.seed(seed)
+    self._trav = random.randrange(self._max) if random_start else 0
+    self._epoch_end = (self._trav - 1) % self._max
+
+  def __call__(self):
+    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
+        batch_indices_start=self._trav,
+        batch_size=self._batch_size,
+        epoch_end=self._epoch_end,
+        array_length=self._max,
+        current_epoch=self._epoch,
+        total_epochs=self._num_epochs)
+
+    self._trav = (integer_indexes[-1] + 1) % self._max
+    return {
+        self._placeholders[0]: integer_indexes,
+        self._placeholders[1]: self._array[integer_indexes]
+    }
+
+
+class _OrderedDictNumpyFeedFn(object):
+  """Creates feed dictionaries from `OrderedDict`s of numpy arrays."""
+
+  def __init__(self,
+               placeholders,
+               ordered_dict_of_arrays,
+               batch_size,
+               random_start=False,
+               seed=None,
+               num_epochs=None):
+    if len(placeholders) != len(ordered_dict_of_arrays) + 1:
+      raise ValueError("Expected {} placeholders; got {}.".format(
+          len(ordered_dict_of_arrays), len(placeholders)))
+    self._index_placeholder = placeholders[0]
+    self._col_placeholders = placeholders[1:]
+    self._ordered_dict_of_arrays = ordered_dict_of_arrays
+    self._max = len(next(iter(ordered_dict_of_arrays.values())))
+    for _, v in ordered_dict_of_arrays.items():
+      if len(v) != self._max:
+        raise ValueError("Array lengths must match.")
+    self._batch_size = batch_size
+    self._num_epochs = num_epochs
+    self._epoch = 0
+    random.seed(seed)
+    self._trav = random.randrange(self._max) if random_start else 0
+    self._epoch_end = (self._trav - 1) % self._max
+
+  def __call__(self):
+    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
+        batch_indices_start=self._trav,
+        batch_size=self._batch_size,
+        epoch_end=self._epoch_end,
+        array_length=self._max,
+        current_epoch=self._epoch,
+        total_epochs=self._num_epochs)
+
+    self._trav = (integer_indexes[-1] + 1) % self._max
+    feed_dict = {self._index_placeholder: integer_indexes}
+    cols = [
+        column[integer_indexes]
+        for column in self._ordered_dict_of_arrays.values()
+    ]
+    feed_dict.update(dict(zip(self._col_placeholders, cols)))
+    return feed_dict
+
+
+class _PandasFeedFn(object):
+  """Creates feed dictionaries from pandas `DataFrames`."""
+
+  def __init__(self,
+               placeholders,
+               dataframe,
+               batch_size,
+               random_start=False,
+               seed=None,
+               num_epochs=None):
+    if len(placeholders) != len(dataframe.columns) + 1:
+      raise ValueError("Expected {} placeholders; got {}.".format(
+          len(dataframe.columns) + 1, len(placeholders)))
+    self._index_placeholder = placeholders[0]
+    self._col_placeholders = placeholders[1:]
+    self._dataframe = dataframe
+    self._max = len(dataframe)
+    self._batch_size = batch_size
+    self._num_epochs = num_epochs
+    self._epoch = 0
+    random.seed(seed)
+    self._trav = random.randrange(self._max) if random_start else 0
+    self._epoch_end = (self._trav - 1) % self._max
+
+  def __call__(self):
+    integer_indexes, self._epoch = _get_integer_indices_for_next_batch(
+        batch_indices_start=self._trav,
+        batch_size=self._batch_size,
+        epoch_end=self._epoch_end,
+        array_length=self._max,
+        current_epoch=self._epoch,
+        total_epochs=self._num_epochs)
+
+    self._trav = (integer_indexes[-1] + 1) % self._max
+    result = self._dataframe.iloc[integer_indexes]
+    cols = [result[col].values for col in result.columns]
+    feed_dict = dict(zip(self._col_placeholders, cols))
+    feed_dict[self._index_placeholder] = result.index.values
+    return feed_dict
+
+
+class _GeneratorFeedFn(object):
+  """Creates feed dictionaries from `Generator` of `dicts` of numpy arrays."""
+
+  def __init__(self,
+               placeholders,
+               generator,
+               batch_size,
+               random_start=False,
+               seed=None,
+               num_epochs=None,
+               pad_value=None):
+    first_sample = next(generator())
+    if len(placeholders) != len(first_sample):
+      raise ValueError("Expected {} placeholders; got {}.".format(
+          len(first_sample), len(placeholders)))
+    self._keys = sorted(list(first_sample.keys()))
+    self._col_placeholders = placeholders
+    self._generator_function = generator
+    self._iterator = generator()
+    self._batch_size = batch_size
+    self._num_epochs = num_epochs
+    self._epoch = 0
+    self._pad_value = pad_value
+    random.seed(seed)
+
+  def __call__(self):
+    if self._num_epochs and self._epoch >= self._num_epochs:
+      raise errors.OutOfRangeError(None, None,
+                                   "Already emitted %s epochs." % self._epoch)
+    list_dict = {}
+    list_dict_size = 0
+    while list_dict_size < self._batch_size:
+      try:
+        data_row = next(self._iterator)
+      except StopIteration:
+        self._epoch += 1
+        self._iterator = self._generator_function()
+        data_row = next(self._iterator)
+      for index, key in enumerate(self._keys):
+        if key not in data_row.keys():
+          raise KeyError("key mismatch between dicts emitted by GenFun "
+                         "Expected {} keys; got {}".format(
+                             self._keys, data_row.keys()))
+        list_dict.setdefault(self._col_placeholders[index], list()).append(
+            data_row[key])
+        list_dict_size += 1
+
+    if self._pad_value is not None:
+      feed_dict = {
+          key: np.asarray(_pad_if_needed(item, self._pad_value))
+          for key, item in list(list_dict.items())
+      }
+    else:
+      feed_dict = {
+          key: np.asarray(item)
+          for key, item in list(list_dict.items())
+      }
+    return feed_dict
+
+
+def _enqueue_data(data,
+                  capacity,
+                  shuffle=False,
+                  min_after_dequeue=None,
+                  num_threads=1,
+                  seed=None,
+                  name="enqueue_input",
+                  enqueue_size=1,
+                  num_epochs=None,
+                  pad_value=None):
+  """Creates a queue filled from a numpy array or pandas `DataFrame`.
+
+    Returns a queue filled with the rows of the given (`OrderedDict` of) array
+    or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued
+    `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of)
+    numpy arrays, the first enqueued `Tensor` contains the row number.
+
+  Args:
+    data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator
+       yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
+       into the queue.
+    capacity: the capacity of the queue.
+    shuffle: whether or not to shuffle the rows of the array.
+    min_after_dequeue: minimum number of elements that can remain in the queue
+    after a dequeue operation. Only used when `shuffle` is true. If not set,
+    defaults to `capacity` / 4.
+    num_threads: number of threads used for reading and enqueueing.
+    seed: used to seed shuffling and reader starting points.
+    name: a scope name identifying the data.
+    enqueue_size: the number of rows to enqueue per step.
+    num_epochs: limit enqueuing to a specified number of epochs, if provided.
+    pad_value: default value for dynamic padding of data samples, if provided.
+
+  Returns:
+    A queue filled with the rows of the given (`OrderedDict` of) array or
+      `DataFrame`.
+
+  Raises:
+    TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy
+      arrays, a numpy `ndarray`, or a generator producing these.
+    NotImplementedError: padding and shuffling data at the same time.
+    NotImplementedError: padding usage with non generator data type.
+  """
+  with ops.name_scope(name):
+    if isinstance(data, np.ndarray):
+      types = [dtypes.int64, dtypes.as_dtype(data.dtype)]
+      queue_shapes = [(), data.shape[1:]]
+      get_feed_fn = _ArrayFeedFn
+    elif isinstance(data, collections.OrderedDict):
+      types = [dtypes.int64
+              ] + [dtypes.as_dtype(col.dtype) for col in data.values()]
+      queue_shapes = [()] + [col.shape[1:] for col in data.values()]
+      get_feed_fn = _OrderedDictNumpyFeedFn
+    elif isinstance(data, tp.FunctionType):
+      x_first_el = six.next(data())
+      x_first_keys = sorted(x_first_el.keys())
+      x_first_values = [x_first_el[key] for key in x_first_keys]
+      types = [dtypes.as_dtype(col.dtype) for col in x_first_values]
+      queue_shapes = [col.shape for col in x_first_values]
+      get_feed_fn = _GeneratorFeedFn
+    elif HAS_PANDAS and isinstance(data, pd.DataFrame):
+      types = [
+          dtypes.as_dtype(dt) for dt in [data.index.dtype] + list(data.dtypes)
+      ]
+      queue_shapes = [() for _ in types]
+      get_feed_fn = _PandasFeedFn
+    else:
+      raise TypeError(
+          "data must be either a numpy array or pandas DataFrame if pandas is "
+          "installed; got {}".format(type(data).__name__))
+
+    pad_data = pad_value is not None
+    if pad_data and get_feed_fn is not _GeneratorFeedFn:
+      raise NotImplementedError(
+          "padding is only available with generator usage")
+    if shuffle and pad_data:
+      raise NotImplementedError(
+          "padding and shuffling data at the same time is not implemented")
+
+    # TODO(jamieas): TensorBoard warnings for all warnings below once available.
+
+    if num_threads > 1 and num_epochs is not None:
+      logging.warning(
+          "enqueue_data was called with num_epochs and num_threads > 1. "
+          "num_epochs is applied per thread, so this will produce more "
+          "epochs than you probably intend. "
+          "If you want to limit epochs, use one thread.")
+
+    if shuffle and num_threads > 1 and num_epochs is not None:
+      logging.warning(
+          "enqueue_data was called with shuffle=True, num_threads > 1, and "
+          "num_epochs. This will create multiple threads, all reading the "
+          "array/dataframe in order adding to the same shuffling queue; the "
+          "results will likely not be sufficiently shuffled.")
+
+    if not shuffle and num_threads > 1:
+      logging.warning(
+          "enqueue_data was called with shuffle=False and num_threads > 1. "
+          "This will create multiple threads, all reading the "
+          "array/dataframe in order. If you want examples read in order, use"
+          " one thread; if you want multiple threads, enable shuffling.")
+
+    if shuffle:
+      min_after_dequeue = int(capacity / 4 if min_after_dequeue is None else
+                              min_after_dequeue)
+      queue = data_flow_ops.RandomShuffleQueue(
+          capacity,
+          min_after_dequeue,
+          dtypes=types,
+          shapes=queue_shapes,
+          seed=seed)
+    elif pad_data:
+      min_after_dequeue = 0  # just for the summary text
+      queue_shapes = list(
+          map(lambda x: tuple(list(x[:-1]) + [None]) if len(x) > 0 else x,
+              queue_shapes))
+      queue = data_flow_ops.PaddingFIFOQueue(
+          capacity, dtypes=types, shapes=queue_shapes)
+    else:
+      min_after_dequeue = 0  # just for the summary text
+      queue = data_flow_ops.FIFOQueue(
+          capacity, dtypes=types, shapes=queue_shapes)
+
+    enqueue_ops = []
+    feed_fns = []
+
+    for i in range(num_threads):
+      # Note the placeholders have no shapes, so they will accept any
+      # enqueue_size.  enqueue_many below will break them up.
+      placeholders = [array_ops.placeholder(t) for t in types]
+
+      enqueue_ops.append(queue.enqueue_many(placeholders))
+      seed_i = None if seed is None else (i + 1) * seed
+
+      if not pad_data:
+        feed_fns.append(
+            get_feed_fn(
+                placeholders,
+                data,
+                enqueue_size,
+                random_start=shuffle,
+                seed=seed_i,
+                num_epochs=num_epochs))
+      else:
+        feed_fns.append(
+            get_feed_fn(
+                placeholders,
+                data,
+                enqueue_size,
+                random_start=shuffle,
+                seed=seed_i,
+                num_epochs=num_epochs,
+                pad_value=pad_value))
 
-# Include attrs that start with single underscore.
-feeding_functions.__all__ = [
-    s for s in dir(feeding_functions) if not s.startswith('__')
-]
+    runner = fqr._FeedingQueueRunner(  # pylint: disable=protected-access
+        queue=queue,
+        enqueue_ops=enqueue_ops,
+        feed_fns=feed_fns)
+    queue_runner.add_queue_runner(runner)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.queues.feeding_functions import *
+    full = (
+        math_ops.cast(
+            math_ops.maximum(0,
+                             queue.size() - min_after_dequeue), dtypes.float32)
+        * (1. / (capacity - min_after_dequeue)))
+    # Note that name contains a '/' at the end so we intentionally do not place
+    # a '/' after %s below.
+    summary_name = ("queue/%sfraction_over_%d_of_%d_full" %
+                    (queue.name, min_after_dequeue,
+                     capacity - min_after_dequeue))
+    summary.scalar(summary_name, full)
+    return queue
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py b/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
new file mode 100644
index 0000000000..30abd82130
--- /dev/null
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions_test.py
@@ -0,0 +1,391 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests feeding functions using arrays and `DataFrames`."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import numpy as np
+
+from tensorflow.python.estimator.inputs.queues import feeding_functions as ff
+from tensorflow.python.platform import test
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+def vals_to_list(a):
+  return {
+      key: val.tolist() if isinstance(val, np.ndarray) else val
+      for key, val in a.items()
+  }
+
+
+class _FeedingFunctionsTestCase(test.TestCase):
+  """Tests for feeding functions."""
+
+  def testArrayFeedFnBatchOne(self):
+    array = np.arange(32).reshape([16, 2])
+    placeholders = ["index_placeholder", "value_placeholder"]
+    aff = ff._ArrayFeedFn(placeholders, array, 1)
+
+    # cycle around a couple times
+    for x in range(0, 100):
+      i = x % 16
+      expected = {
+          "index_placeholder": [i],
+          "value_placeholder": [[2 * i, 2 * i + 1]]
+      }
+      actual = aff()
+      self.assertEqual(expected, vals_to_list(actual))
+
+  def testArrayFeedFnBatchFive(self):
+    array = np.arange(32).reshape([16, 2])
+    placeholders = ["index_placeholder", "value_placeholder"]
+    aff = ff._ArrayFeedFn(placeholders, array, 5)
+
+    # cycle around a couple times
+    for _ in range(0, 101, 2):
+      aff()
+
+    expected = {
+        "index_placeholder": [15, 0, 1, 2, 3],
+        "value_placeholder": [[30, 31], [0, 1], [2, 3], [4, 5], [6, 7]]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testArrayFeedFnBatchTwoWithOneEpoch(self):
+    array = np.arange(5) + 10
+    placeholders = ["index_placeholder", "value_placeholder"]
+    aff = ff._ArrayFeedFn(placeholders, array, batch_size=2, num_epochs=1)
+
+    expected = {
+        "index_placeholder": [0, 1],
+        "value_placeholder": [10, 11]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [2, 3],
+        "value_placeholder": [12, 13]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [4],
+        "value_placeholder": [14]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testArrayFeedFnBatchOneHundred(self):
+    array = np.arange(32).reshape([16, 2])
+    placeholders = ["index_placeholder", "value_placeholder"]
+    aff = ff._ArrayFeedFn(placeholders, array, 100)
+
+    expected = {
+        "index_placeholder":
+            list(range(0, 16)) * 6 + list(range(0, 4)),
+        "value_placeholder":
+            np.arange(32).reshape([16, 2]).tolist() * 6 +
+            [[0, 1], [2, 3], [4, 5], [6, 7]]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testArrayFeedFnBatchOneHundredWithSmallerArrayAndMultipleEpochs(self):
+    array = np.arange(2) + 10
+    placeholders = ["index_placeholder", "value_placeholder"]
+    aff = ff._ArrayFeedFn(placeholders, array, batch_size=100, num_epochs=2)
+
+    expected = {
+        "index_placeholder": [0, 1, 0, 1],
+        "value_placeholder": [10, 11, 10, 11],
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testPandasFeedFnBatchOne(self):
+    if not HAS_PANDAS:
+      return
+    array1 = np.arange(32, 64)
+    array2 = np.arange(64, 96)
+    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._PandasFeedFn(placeholders, df, 1)
+
+    # cycle around a couple times
+    for x in range(0, 100):
+      i = x % 32
+      expected = {
+          "index_placeholder": [i + 96],
+          "a_placeholder": [32 + i],
+          "b_placeholder": [64 + i]
+      }
+      actual = aff()
+      self.assertEqual(expected, vals_to_list(actual))
+
+  def testPandasFeedFnBatchFive(self):
+    if not HAS_PANDAS:
+      return
+    array1 = np.arange(32, 64)
+    array2 = np.arange(64, 96)
+    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._PandasFeedFn(placeholders, df, 5)
+
+    # cycle around a couple times
+    for _ in range(0, 101, 2):
+      aff()
+
+    expected = {
+        "index_placeholder": [127, 96, 97, 98, 99],
+        "a_placeholder": [63, 32, 33, 34, 35],
+        "b_placeholder": [95, 64, 65, 66, 67]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testPandasFeedFnBatchTwoWithOneEpoch(self):
+    if not HAS_PANDAS:
+      return
+    array1 = np.arange(32, 37)
+    array2 = np.arange(64, 69)
+    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 101))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._PandasFeedFn(placeholders, df, batch_size=2, num_epochs=1)
+
+    expected = {
+        "index_placeholder": [96, 97],
+        "a_placeholder": [32, 33],
+        "b_placeholder": [64, 65]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [98, 99],
+        "a_placeholder": [34, 35],
+        "b_placeholder": [66, 67]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [100],
+        "a_placeholder": [36],
+        "b_placeholder": [68]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testPandasFeedFnBatchOneHundred(self):
+    if not HAS_PANDAS:
+      return
+    array1 = np.arange(32, 64)
+    array2 = np.arange(64, 96)
+    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 128))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._PandasFeedFn(placeholders, df, 100)
+
+    expected = {
+        "index_placeholder": list(range(96, 128)) * 3 + list(range(96, 100)),
+        "a_placeholder": list(range(32, 64)) * 3 + list(range(32, 36)),
+        "b_placeholder": list(range(64, 96)) * 3 + list(range(64, 68))
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testPandasFeedFnBatchOneHundredWithSmallDataArrayAndMultipleEpochs(self):
+    if not HAS_PANDAS:
+      return
+    array1 = np.arange(32, 34)
+    array2 = np.arange(64, 66)
+    df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(96, 98))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._PandasFeedFn(placeholders, df, batch_size=100, num_epochs=2)
+
+    expected = {
+        "index_placeholder": [96, 97, 96, 97],
+        "a_placeholder": [32, 33, 32, 33],
+        "b_placeholder": [64, 65, 64, 65]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testOrderedDictNumpyFeedFnBatchTwoWithOneEpoch(self):
+    a = np.arange(32, 37)
+    b = np.arange(64, 69)
+    x = {"a": a, "b": b}
+    ordered_dict_x = collections.OrderedDict(
+        sorted(x.items(), key=lambda t: t[0]))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._OrderedDictNumpyFeedFn(
+        placeholders, ordered_dict_x, batch_size=2, num_epochs=1)
+
+    expected = {
+        "index_placeholder": [0, 1],
+        "a_placeholder": [32, 33],
+        "b_placeholder": [64, 65]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [2, 3],
+        "a_placeholder": [34, 35],
+        "b_placeholder": [66, 67]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+    expected = {
+        "index_placeholder": [4],
+        "a_placeholder": [36],
+        "b_placeholder": [68]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testOrderedDictNumpyFeedFnLargeBatchWithSmallArrayAndMultipleEpochs(self):
+    a = np.arange(32, 34)
+    b = np.arange(64, 66)
+    x = {"a": a, "b": b}
+    ordered_dict_x = collections.OrderedDict(
+        sorted(x.items(), key=lambda t: t[0]))
+    placeholders = ["index_placeholder", "a_placeholder", "b_placeholder"]
+    aff = ff._OrderedDictNumpyFeedFn(
+        placeholders, ordered_dict_x, batch_size=100, num_epochs=2)
+
+    expected = {
+        "index_placeholder": [0, 1, 0, 1],
+        "a_placeholder": [32, 33, 32, 33],
+        "b_placeholder": [64, 65, 64, 65]
+    }
+    actual = aff()
+    self.assertEqual(expected, vals_to_list(actual))
+
+  def testFillArraySmall(self):
+    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[32, 36], dtype=np.int32).tolist())
+    actual = np.ones(shape=[64, 36], dtype=np.int32)
+    ff._fill_array(actual, a)
+    expected = np.ones(shape=[64, 36], dtype=np.int32)
+    expected[:32, 32:] = 0
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testFillArrayLarge(self):
+    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
+    actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    ff._fill_array(actual, a)
+    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    expected[:8, ..., 32:] = 0
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testFillArraySmallWithSpecifiedValue(self):
+    fill_value = 8
+    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[32, 36], dtype=np.int32).tolist())
+    actual = np.ones(shape=[64, 36], dtype=np.int32)
+    ff._fill_array(actual, a, fill_value)
+    expected = np.ones(shape=[64, 36], dtype=np.int32)
+    expected[:32, 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testFillArrayLargeWithSpecifiedValue(self):
+    fill_value = 8
+    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
+    actual = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    ff._fill_array(actual, a, fill_value)
+    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    expected[:8, ..., 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededSmall(self):
+    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[32, 36], dtype=np.int32).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a)
+    expected = np.ones(shape=[64, 36], dtype=np.int32)
+    expected[:32, 32:] = 0
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededLarge(self):
+    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a)
+    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    expected[:8, ..., 32:] = 0
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededSmallWithSpecifiedValue(self):
+    fill_value = 8
+    a = (np.ones(shape=[32, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[32, 36], dtype=np.int32).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a, fill_value)
+    expected = np.ones(shape=[64, 36], dtype=np.int32)
+    expected[:32, 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededLargeWithSpecifiedValue(self):
+    fill_value = 8
+    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.int32).tolist() +
+         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.int32).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a, fill_value)
+    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.int32)
+    expected[:8, ..., 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededSmallWithSpecifiedNonNumericValue(self):
+    fill_value = False
+    a = (np.ones(shape=[32, 32], dtype=np.bool).tolist() +
+         np.ones(shape=[32, 36], dtype=np.bool).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a, fill_value)
+    expected = np.ones(shape=[64, 36], dtype=np.bool)
+    expected[:32, 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+  def testPadIfNeededLargeWithSpecifiedNonNumericValue(self):
+    fill_value = False
+    a = (np.ones(shape=[8, 8, 8, 8, 32], dtype=np.bool).tolist() +
+         np.ones(shape=[8, 8, 8, 8, 36], dtype=np.bool).tolist())
+    a = list(map(np.array, a))
+    actual = ff._pad_if_needed(a, fill_value)
+    expected = np.ones(shape=[16, 8, 8, 8, 36], dtype=np.bool)
+    expected[:8, ..., 32:] = fill_value
+    self.assertEqual(expected.tolist(), actual.tolist())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
index c940909def..afbcab596a 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,169 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""feeding_queue_runner python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""A `QueueRunner` that takes a feed function as an argument."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator.inputs.queues import feeding_queue_runner
+import threading
 
-# Include attrs that start with single underscore.
-feeding_queue_runner.__all__ = [
-    s for s in dir(feeding_queue_runner) if not s.startswith('__')
-]
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import queue_runner as qr
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.inputs.queues.feeding_queue_runner import *
+
+class _FeedingQueueRunner(qr.QueueRunner):
+  """A queue runner that allows the feeding of values such as numpy arrays."""
+
+  def __init__(self, queue=None, enqueue_ops=None, close_op=None,
+               cancel_op=None, feed_fns=None,
+               queue_closed_exception_types=None):
+    """Initialize the queue runner.
+
+    For further documentation, see `queue_runner.py`. Note that
+    `FeedingQueueRunner` does not support construction from protobuffer nor
+    serialization to protobuffer.
+
+    Args:
+      queue: A `Queue`.
+      enqueue_ops: List of enqueue ops to run in threads later.
+      close_op: Op to close the queue. Pending enqueue ops are preserved.
+      cancel_op: Op to close the queue and cancel pending enqueue ops.
+      feed_fns: a list of functions that return a dictionary mapping fed
+        `Tensor`s to values. Must be the same length as `enqueue_ops`.
+      queue_closed_exception_types: Optional tuple of Exception types that
+        indicate that the queue has been closed when raised during an enqueue
+        operation.  Defaults to
+        `(tf.errors.OutOfRangeError, tf.errors.CancelledError)`.
+
+    Raises:
+      ValueError: `feed_fns` is not `None` and has different length than
+        `enqueue_ops`.
+    """
+    if queue_closed_exception_types is None:
+      queue_closed_exception_types = (
+          errors.OutOfRangeError, errors.CancelledError)
+    super(_FeedingQueueRunner, self).__init__(
+        queue, enqueue_ops, close_op,
+        cancel_op, queue_closed_exception_types=queue_closed_exception_types)
+    if feed_fns is None:
+      self._feed_fns = [None for _ in enqueue_ops]
+    else:
+      if len(feed_fns) != len(enqueue_ops):
+        raise ValueError(
+            "If feed_fns is not None, it must have the same length as "
+            "enqueue_ops.")
+      self._feed_fns = feed_fns
+
+  # pylint: disable=broad-except
+  def _run(self, sess, enqueue_op, feed_fn, coord=None):
+    """Execute the enqueue op in a loop, close the queue in case of error.
+
+    Args:
+      sess: A `Session`.
+      enqueue_op: The `Operation` to run.
+      feed_fn: the feed function to pass to `sess.run`.
+      coord: Optional `Coordinator` object for reporting errors and checking
+        for stop conditions.
+
+    """
+    # TODO(jamieas): Reduce code duplication with `QueueRunner`.
+    if coord:
+      coord.register_thread(threading.current_thread())
+    decremented = False
+    try:
+      while True:
+        if coord and coord.should_stop():
+          break
+        try:
+          feed_dict = None if feed_fn is None else feed_fn()
+          sess.run(enqueue_op, feed_dict=feed_dict)
+        except (errors.OutOfRangeError, errors.CancelledError):
+          # This exception indicates that a queue was closed.
+          with self._lock:
+            self._runs_per_session[sess] -= 1
+            decremented = True
+            if self._runs_per_session[sess] == 0:
+              try:
+                sess.run(self._close_op)
+              except Exception as e:
+                # Intentionally ignore errors from close_op.
+                logging.vlog(1, "Ignored exception: %s", str(e))
+            return
+    except Exception as e:
+      # This catches all other exceptions.
+      if coord:
+        coord.request_stop(e)
+      else:
+        logging.error("Exception in QueueRunner: %s", str(e))
+        with self._lock:
+          self._exceptions_raised.append(e)
+        raise
+    finally:
+      # Make sure we account for all terminations: normal or errors.
+      if not decremented:
+        with self._lock:
+          self._runs_per_session[sess] -= 1
+
+  def create_threads(self, sess, coord=None, daemon=False, start=False):
+    """Create threads to run the enqueue ops for the given session.
+
+    This method requires a session in which the graph was launched.  It creates
+    a list of threads, optionally starting them.  There is one thread for each
+    op passed in `enqueue_ops`.
+
+    The `coord` argument is an optional coordinator, that the threads will use
+    to terminate together and report exceptions.  If a coordinator is given,
+    this method starts an additional thread to close the queue when the
+    coordinator requests a stop.
+
+    If previously created threads for the given session are still running, no
+    new threads will be created.
+
+    Args:
+      sess: A `Session`.
+      coord: Optional `Coordinator` object for reporting errors and checking
+        stop conditions.
+      daemon: Boolean.  If `True` make the threads daemon threads.
+      start: Boolean.  If `True` starts the threads.  If `False` the
+        caller must call the `start()` method of the returned threads.
+
+    Returns:
+      A list of threads.
+    """
+    with self._lock:
+      try:
+        if self._runs_per_session[sess] > 0:
+          # Already started: no new threads to return.
+          return []
+      except KeyError:
+        # We haven't seen this session yet.
+        pass
+      self._runs_per_session[sess] = len(self._enqueue_ops)
+      self._exceptions_raised = []
+
+    ret_threads = [threading.Thread(target=self._run,
+                                    args=(sess, op, feed_fn, coord))
+                   for op, feed_fn in zip(self._enqueue_ops, self._feed_fns)]
+    if coord:
+      ret_threads.append(threading.Thread(target=self._close_on_stop,
+                                          args=(sess, self._cancel_op, coord)))
+    for t in ret_threads:
+      if daemon:
+        t.daemon = True
+      if start:
+        t.start()
+    return ret_threads
+
+  def _init_from_proto(self, queue_runner_def):
+    raise NotImplementedError(
+        "{} does not support initialization from proto.".format(type(
+            self).__name__))
+
+  def to_proto(self):
+    raise NotImplementedError(
+        "{} does not support serialization to proto.".format(type(
+            self).__name__))
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
new file mode 100644
index 0000000000..6292eb7da1
--- /dev/null
+++ b/tensorflow/python/estimator/inputs/queues/feeding_queue_runner_test.py
@@ -0,0 +1,140 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests `FeedingQueueRunner` using arrays and `DataFrames`."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.estimator.inputs.queues import feeding_functions as ff
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+from tensorflow.python.training import coordinator
+from tensorflow.python.training import queue_runner_impl
+
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
+def get_rows(array, row_indices):
+  rows = [array[i] for i in row_indices]
+  return np.vstack(rows)
+
+
+class FeedingQueueRunnerTestCase(test.TestCase):
+  """Tests for `FeedingQueueRunner`."""
+
+  def testArrayFeeding(self):
+    with ops.Graph().as_default():
+      array = np.arange(32).reshape([16, 2])
+      q = ff._enqueue_data(array, capacity=100)
+      batch_size = 3
+      dq_op = q.dequeue_many(batch_size)
+      with session.Session() as sess:
+        coord = coordinator.Coordinator()
+        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
+        for i in range(100):
+          indices = [
+              j % array.shape[0]
+              for j in range(batch_size * i, batch_size * (i + 1))
+          ]
+          expected_dq = get_rows(array, indices)
+          dq = sess.run(dq_op)
+          np.testing.assert_array_equal(indices, dq[0])
+          np.testing.assert_array_equal(expected_dq, dq[1])
+        coord.request_stop()
+        coord.join(threads)
+
+  def testArrayFeedingMultiThread(self):
+    with ops.Graph().as_default():
+      array = np.arange(256).reshape([128, 2])
+      q = ff._enqueue_data(array, capacity=128, num_threads=8, shuffle=True)
+      batch_size = 3
+      dq_op = q.dequeue_many(batch_size)
+      with session.Session() as sess:
+        coord = coordinator.Coordinator()
+        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
+        for _ in range(100):
+          dq = sess.run(dq_op)
+          indices = dq[0]
+          expected_dq = get_rows(array, indices)
+          np.testing.assert_array_equal(expected_dq, dq[1])
+        coord.request_stop()
+        coord.join(threads)
+
+  def testPandasFeeding(self):
+    if not HAS_PANDAS:
+      return
+    with ops.Graph().as_default():
+      array1 = np.arange(32)
+      array2 = np.arange(32, 64)
+      df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96))
+      q = ff._enqueue_data(df, capacity=100)
+      batch_size = 5
+      dq_op = q.dequeue_many(5)
+      with session.Session() as sess:
+        coord = coordinator.Coordinator()
+        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
+        for i in range(100):
+          indices = [
+              j % array1.shape[0]
+              for j in range(batch_size * i, batch_size * (i + 1))
+          ]
+          expected_df_indices = df.index[indices]
+          expected_rows = df.iloc[indices]
+          dq = sess.run(dq_op)
+          np.testing.assert_array_equal(expected_df_indices, dq[0])
+          for col_num, col in enumerate(df.columns):
+            np.testing.assert_array_equal(expected_rows[col].values,
+                                          dq[col_num + 1])
+        coord.request_stop()
+        coord.join(threads)
+
+  def testPandasFeedingMultiThread(self):
+    if not HAS_PANDAS:
+      return
+    with ops.Graph().as_default():
+      array1 = np.arange(128, 256)
+      array2 = 2 * array1
+      df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
+      q = ff._enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
+      batch_size = 5
+      dq_op = q.dequeue_many(batch_size)
+      with session.Session() as sess:
+        coord = coordinator.Coordinator()
+        threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
+        for _ in range(100):
+          dq = sess.run(dq_op)
+          indices = dq[0]
+          expected_rows = df.iloc[indices]
+          for col_num, col in enumerate(df.columns):
+            np.testing.assert_array_equal(expected_rows[col].values,
+                                          dq[col_num + 1])
+        coord.request_stop()
+        coord.join(threads)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/estimator/keras.py b/tensorflow/python/estimator/keras.py
index 07e57873d5..5d5ed81fbb 100644
--- a/tensorflow/python/estimator/keras.py
+++ b/tensorflow/python/estimator/keras.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,489 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""keras python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
+# pylint: disable=protected-access
+"""Home of estimator related functions.
 """
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import keras
+import os
+import re
+import six
+
+from tensorflow.python.client import session
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import export as export_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import metrics
+from tensorflow.python.keras import models
+from tensorflow.python.keras import optimizers
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import metrics as metrics_module
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import distribution_strategy_context
+from tensorflow.python.training import optimizer as tf_optimizer_module
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import training_util
+
+
+_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+
+def _cast_tensor_to_floatx(x):
+  """Cast tensor to keras's floatx dtype if it is not already the same dtype."""
+  if x.dtype == K.floatx():
+    return x
+  else:
+    return math_ops.cast(x, K.floatx())
+
+
+def _convert_tensor(x):
+  """Create or cast tensor if needed."""
+  if not tensor_util.is_tensor(x):
+    # x is a numpy array
+    x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
+  if check_ops.is_numeric_tensor(x):
+    # is_numeric_tensor returns False if provided with a numpy array
+    x = _cast_tensor_to_floatx(x)
+  return x
+
+
+def _any_weight_initialized(keras_model):
+  """Check if any weights has been initialized in the Keras model.
+
+  Args:
+    keras_model: An instance of compiled keras model.
+
+  Returns:
+    boolean, True if at least one weight has been initialized, else False.
+    Currently keras initialize all weights at get_session().
+  """
+  if keras_model is None:
+    return False
+  for layer in keras_model.layers:
+    for weight in layer.weights:
+      if hasattr(weight, '_keras_initialized'):
+        return True
+  return False
+
+
+def _convert_estimator_io_to_keras(keras_model, features, labels):
+  """Converts estimator features and labels to keras input and target tensors.
+
+  Args:
+    keras_model: a compiled `tf.keras.Model` instance, used to determine the
+      order of the returned lists.
+    features: Dict of tensors or `None`.
+    labels: Dict of tensors, a single tensor, or `None`.
+
+  Returns:
+    Tuple of (
+      list of input tensors or `None`,
+      list of target tensors or `None`)
+    The order of tensors is determined by the order set in the keras model.
+  """
+
+  def _to_ordered_tensor_list(obj, key_order, obj_name, order_name):
+    """Convert obj to an ordered list of tensors.
+
+    Args:
+      obj: List, dict, or single tensor. May be `None`.
+      key_order: List of strings with the order to return (used if obj is a
+        dict).
+      obj_name: String name of object (e.g. "features" or "labels")
+      order_name: String name of the key order (e.g. "inputs" or "outputs")
+
+    Returns:
+      List of tensors, or `None`
+
+    Raises:
+      KeyError: If obj has invalid keys.
+    """
+    if obj is None:
+      return None
+    elif isinstance(obj, (list, tuple)):
+      return [_convert_tensor(x) for x in obj]
+    elif isinstance(obj, dict):
+      # Ensure that the obj keys and keys in key_order are exactly the same.
+      different_keys = set(obj.keys()) ^ set(key_order)
+
+      if different_keys:
+        raise KeyError(
+            'The dictionary passed into {obj_name} does not have the expected '
+            '{order_name} keys defined in the keras model.'
+            '\n\tExpected keys: {order_keys}'
+            '\n\t{obj_name} keys: {obj_keys}'
+            '\n\tDifference: {different_keys}'.format(
+                order_name=order_name, order_keys=set(key_order),
+                obj_name=obj_name, obj_keys=set(obj.keys()),
+                different_keys=different_keys))
+
+      return [_convert_tensor(obj[key]) for key in key_order]
+    else:  # Assume obj is a tensor.
+      return [_convert_tensor(obj)]
+
+  input_names = None
+  output_names = None
+  if isinstance(features, dict):
+    input_names = (
+        keras_model.input_names if keras_model._is_graph_network else
+        ['input_%d' % i for i in range(1, len(features) + 1)])
+  if isinstance(labels, dict):
+    output_names = (
+        keras_model.output_names if keras_model._is_graph_network else
+        ['output_%d' % i for i in range(1, len(labels) + 1)])
+
+  input_tensors = _to_ordered_tensor_list(
+      features, input_names, 'features', 'inputs')
+  target_tensors = _to_ordered_tensor_list(
+      labels, output_names, 'labels', 'outputs')
+
+  return input_tensors, target_tensors
+
+
+def _clone_and_build_model(mode,
+                           keras_model,
+                           custom_objects,
+                           features=None,
+                           labels=None):
+  """Clone and build the given keras_model.
+
+  Args:
+    mode: training mode.
+    keras_model: an instance of compiled keras model.
+    custom_objects: Dictionary for custom objects.
+    features: Dict of tensors.
+    labels: Dict of tensors, or single tensor instance.
+
+  Returns:
+    The newly built model.
+  """
+  # Set to True during training, False for inference or testing.
+  K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)
+  input_tensors, target_tensors = _convert_estimator_io_to_keras(
+      keras_model, features, labels)
+
+  compile_clone = (mode != model_fn_lib.ModeKeys.PREDICT)
+
+  global_step = None
+  if compile_clone:
+    # Set iterations to the global step created by tf.train.create_global_step()
+    # which is automatically run in the estimator framework.
+    global_step = training_util.get_or_create_global_step()
+    K.track_variable(global_step)
+
+  clone = models.clone_and_build_model(
+      keras_model, input_tensors, target_tensors, custom_objects,
+      compile_clone=compile_clone,
+      in_place_reset=(not keras_model._is_graph_network),
+      optimizer_iterations=global_step)
+
+  return clone
+
+
+def _convert_keras_metrics_to_estimator(model):
+  """Convert metrics from a Keras model to ops used by the Estimator framework.
+
+  Args:
+    model: A `tf.keras.Model` object.
+
+  Returns:
+    Dictionary mapping metric names to tuples of (value, update) ops. May return
+    `None` if the model does not contain any metrics.
+  """
+  if not getattr(model, 'metrics', None):
+    return None
+
+  eval_metric_ops = {}
+
+  def get_metric_name(metric):
+    if isinstance(metric, metrics.Metric):
+      return metric.name
+    if callable(metric):
+      return metric.__name__
+    assert isinstance(metric, six.string_types)
+    return metric
+
+  # When each metric maps to an output
+  if isinstance(model.metrics, dict):
+    for i, output_name in enumerate(model.metrics.keys()):
+      # `metric` is the user given metric value in `compile`. This can be
+      # metric name (`acc`), metric function (binary_accuracy) or a metric
+      # object (BinaryAccuracy()).
+      metric = model.metrics[output_name]
+      metric_name = get_metric_name(metric)
+      # When some outputs use the same metric
+      if list(model.metrics.values()).count(metric_name) > 1:
+        metric_name += '_' + output_name
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i - len(model.metrics)])
+  else:
+    for i, metric in enumerate(model.metrics):
+      metric_name = get_metric_name(metric)
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i])
+  return eval_metric_ops
+
+
+def _create_keras_model_fn(keras_model, custom_objects=None):
+  """Creates model_fn for keras Estimator.
+
+  Args:
+    keras_model: an instance of compiled keras model.
+    custom_objects: Dictionary for custom objects.
+
+  Returns:
+    The model_fn for a keras Estimator.
+  """
+
+  def model_fn(features, labels, mode):
+    """model_fn for keras Estimator."""
+    # Raise an error when users use DistributionStrategy with native Keras
+    # optimizers. Currently we only support native TensorFlow optimizers.
+    if distribution_strategy_context.has_distribution_strategy() and \
+        not isinstance(keras_model.optimizer,
+                       (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+      raise ValueError('Only TensorFlow native optimizers are supported with '
+                       'DistributionStrategy.')
+
+    model = _clone_and_build_model(mode, keras_model, custom_objects, features,
+                                   labels)
+    model_output_names = []
+    # We need to make sure that the output names of the last layer in the model
+    # is the same for each of the cloned models. This is required for mirrored
+    # strategy when we call regroup.
+    if distribution_strategy_context.has_distribution_strategy():
+      for name in model.output_names:
+        name = re.compile(r'_\d$').sub('', name)
+        model_output_names.append(name)
+    else:
+      model_output_names = model.output_names
+
+    # Get inputs to EstimatorSpec
+    predictions = dict(zip(model_output_names, model.outputs))
+
+    loss = None
+    train_op = None
+    eval_metric_ops = None
+
+    # Set loss and metric only during train and evaluate.
+    if mode is not model_fn_lib.ModeKeys.PREDICT:
+      if mode is model_fn_lib.ModeKeys.TRAIN:
+        model._make_train_function()  # pylint: disable=protected-access
+      else:
+        model._make_test_function()  # pylint: disable=protected-access
+      loss = model.total_loss
+
+      eval_metric_ops = _convert_keras_metrics_to_estimator(model)
+
+    # Set train_op only during train.
+    if mode is model_fn_lib.ModeKeys.TRAIN:
+      train_op = model.train_function.updates_op
+
+    if not model._is_graph_network:
+      # Reset model state to original state,
+      # to avoid `model_fn` being destructive for the initial model argument.
+      models.in_place_subclassed_model_state_restoration(keras_model)
+    return model_fn_lib.EstimatorSpec(
+        mode=mode,
+        predictions=predictions,
+        loss=loss,
+        train_op=train_op,
+        eval_metric_ops=eval_metric_ops,
+        export_outputs={
+            _DEFAULT_SERVING_KEY:
+            export_lib.export_output.PredictOutput(predictions)
+        })
+
+  return model_fn
+
+
+def _save_first_checkpoint(keras_model, custom_objects, config):
+  """Save first checkpoint for the keras Estimator.
+
+  Args:
+    keras_model: an instance of compiled keras model.
+    custom_objects: Dictionary for custom objects.
+    config: Estimator config.
+
+  Returns:
+    The path where keras model checkpoint is saved.
+  """
+  # save checkpoint into subdirectory to allow warm start
+  keras_model_dir = os.path.join(config.model_dir, 'keras')
+  # Load weights and save to checkpoint if there is no checkpoint
+  latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
+  if not latest_path:
+    keras_weights = None
+    if _any_weight_initialized(keras_model):
+      keras_weights = keras_model.get_weights()
+    if not gfile.IsDirectory(keras_model_dir):
+      gfile.MakeDirs(keras_model_dir)
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(config.tf_random_seed)
+      training_util.create_global_step()
+      model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model,
+                                     custom_objects)
+      # save to checkpoint
+      with session.Session(config=config.session_config) as sess:
+        if keras_weights:
+          model.set_weights(keras_weights)
+        # Make update ops and initialize all variables.
+        if not model.train_function:
+          # pylint: disable=protected-access
+          model._make_train_function()
+          K._initialize_variables(sess)
+          # pylint: enable=protected-access
+        saver = saver_lib.Saver()
+        latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt')
+        saver.save(sess, latest_path)
+  return latest_path
+
+
+def _get_file_from_google_storage(keras_model_path, model_dir):
+  """Get file from google storage and download to local file.
+
+  Args:
+    keras_model_path: a google storage path for compiled keras model.
+    model_dir: the directory from estimator config.
+
+  Returns:
+    The path where keras model is saved.
+
+  Raises:
+    ValueError: if storage object name does not end with .h5.
+  """
+  try:
+    from google.cloud import storage  # pylint:disable=g-import-not-at-top
+  except ImportError:
+    raise TypeError('Could not save model to Google cloud storage; please '
+                    'install `google-cloud-storage` via '
+                    '`pip install google-cloud-storage`.')
+  storage_client = storage.Client()
+  path, blob_name = os.path.split(keras_model_path)
+  _, bucket_name = os.path.split(path)
+  keras_model_dir = os.path.join(model_dir, 'keras')
+  if not gfile.Exists(keras_model_dir):
+    gfile.MakeDirs(keras_model_dir)
+  file_name = os.path.join(keras_model_dir, 'keras_model.h5')
+  try:
+    blob = storage_client.get_bucket(bucket_name).blob(blob_name)
+    blob.download_to_filename(file_name)
+  except:
+    raise ValueError('Failed to download keras model, please check '
+                     'environment variable GOOGLE_APPLICATION_CREDENTIALS '
+                     'and model path storage.googleapis.com/{bucket}/{object}.')
+  logging.info('Saving model to {}'.format(file_name))
+  del storage_client
+  return file_name
+
+
+def model_to_estimator(keras_model=None,
+                       keras_model_path=None,
+                       custom_objects=None,
+                       model_dir=None,
+                       config=None):
+  """Constructs an `Estimator` instance from given keras model.
+
+  For usage example, please see:
+  [Creating estimators from Keras
+  Models](https://tensorflow.org/guide/estimators#model_to_estimator).
+
+  Args:
+    keras_model: A compiled Keras model object. This argument is mutually
+      exclusive with `keras_model_path`.
+    keras_model_path: Path to a compiled Keras model saved on disk, in HDF5
+      format, which can be generated with the `save()` method of a Keras model.
+      This argument is mutually exclusive with `keras_model`.
+    custom_objects: Dictionary for custom objects.
+    model_dir: Directory to save `Estimator` model parameters, graph, summary
+      files for TensorBoard, etc.
+    config: `RunConfig` to config `Estimator`.
+
+  Returns:
+    An Estimator from given keras model.
+
+  Raises:
+    ValueError: if neither keras_model nor keras_model_path was given.
+    ValueError: if both keras_model and keras_model_path was given.
+    ValueError: if the keras_model_path is a GCS URI.
+    ValueError: if keras_model has not been compiled.
+  """
+  if not (keras_model or keras_model_path):
+    raise ValueError(
+        'Either `keras_model` or `keras_model_path` needs to be provided.')
+  if keras_model and keras_model_path:
+    raise ValueError(
+        'Please specity either `keras_model` or `keras_model_path`, '
+        'but not both.')
+
+  config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+      config, model_dir)
+  if not keras_model:
+    if keras_model_path.startswith(
+        'gs://') or 'storage.googleapis.com' in keras_model_path:
+      keras_model_path = _get_file_from_google_storage(keras_model_path,
+                                                       config.model_dir)
+    logging.info('Loading models from %s', keras_model_path)
+    keras_model = models.load_model(keras_model_path)
+  else:
+    logging.info('Using the Keras model provided.')
+    keras_model = keras_model
+
+  if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer:
+    raise ValueError(
+        'The given keras model has not been compiled yet. '
+        'Please compile the model with `model.compile()` '
+        'before calling `model_to_estimator()`.')
+
+  keras_model_fn = _create_keras_model_fn(keras_model, custom_objects)
+  if _any_weight_initialized(keras_model):
+    # Warn if config passed to estimator tries to update GPUOptions. If a
+    # session has already been created, the GPUOptions passed to the first
+    # session sticks.
+    if config.session_config.HasField('gpu_options'):
+      logging.warning(
+          'The Keras backend session has already been set. '
+          'The _session_config passed to model_to_estimator will not be used.')
+  else:
+    # Pass the config into keras backend's default session.
+    sess = session.Session(config=config.session_config)
+    K.set_session(sess)
+
+  warm_start_path = None
+  if keras_model._is_graph_network:
+    warm_start_path = _save_first_checkpoint(keras_model, custom_objects,
+                                             config)
+  elif keras_model.built:
+    logging.warning('You are creating an Estimator from a Keras model manually '
+                    'subclassed from `Model`, that was already called on some '
+                    'inputs (and thus already had weights). We are currently '
+                    'unable to preserve the model\'s state (its weights) as '
+                    'part of the estimator in this case. Be warned that the '
+                    'estimator has been created using a freshly initialized '
+                    'version of your model.\n'
+                    'Note that this doesn\'t affect the state of the model '
+                    'instance you passed as `keras_model` argument.')
 
-# Include attrs that start with single underscore.
-keras.__all__ = [s for s in dir(keras) if not s.startswith('__')]
+  estimator = estimator_lib.Estimator(keras_model_fn,
+                                      config=config,
+                                      warm_start_from=warm_start_path)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.keras import *
+  return estimator
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
new file mode 100644
index 0000000000..4e285fa25a
--- /dev/null
+++ b/tensorflow/python/estimator/keras_test.py
@@ -0,0 +1,805 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for training routines."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+from math import log10
+import os
+import tempfile
+
+import numpy as np
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import keras as keras_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.optimizers import SGD
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.parsing_ops import gen_parsing_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import rmsprop
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+
+
+try:
+  import h5py  # pylint:disable=g-import-not-at-top
+except ImportError:
+  h5py = None
+
+_RANDOM_SEED = 1337
+_TRAIN_SIZE = 200
+_INPUT_SIZE = (10,)
+_NUM_CLASS = 2
+
+_TMP_DIR = '/tmp'
+
+
+def simple_sequential_model():
+  model = keras.models.Sequential()
+  model.add(keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE))
+  model.add(keras.layers.Dropout(0.1))
+  model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax'))
+  return model
+
+
+def simple_functional_model(activation='relu'):
+  a = keras.layers.Input(shape=_INPUT_SIZE)
+  b = keras.layers.Dense(16, activation=activation)(a)
+  b = keras.layers.Dropout(0.1)(b)
+  b = keras.layers.Dense(_NUM_CLASS, activation='softmax')(b)
+  model = keras.models.Model(inputs=[a], outputs=[b])
+  return model
+
+
+def simple_subclassed_model():
+
+  class SimpleModel(keras.Model):
+
+    def __init__(self):
+      super(SimpleModel, self).__init__()
+      self.dense1 = keras.layers.Dense(16, activation='relu')
+      self.dp = keras.layers.Dropout(0.1)
+      self.dense2 = keras.layers.Dense(_NUM_CLASS, activation='softmax')
+
+    def call(self, inputs):
+      x = self.dense1(inputs)
+      x = self.dp(x)
+      return self.dense2(x)
+
+  return SimpleModel()
+
+
+def gen_input_fn(x, y=None, batch_size=128, num_epochs=1, shuffle=False):
+  def input_fn():
+    ds = dataset_ops.Dataset.from_tensor_slices((x, y) if y is not None else x)
+    if shuffle:
+      ds = ds.shuffle(1000)
+    return ds.repeat(num_epochs).batch(batch_size)
+  return input_fn
+
+
+def get_multi_inputs_multi_outputs_data():
+  (a_train, c_train), (a_test, c_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(16,),
+      num_classes=3,
+      random_seed=_RANDOM_SEED)
+  (b_train, d_train), (b_test, d_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(16,),
+      num_classes=2,
+      random_seed=_RANDOM_SEED)
+  (m_train, _), (m_test, _) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(8,),
+      num_classes=2,
+      random_seed=_RANDOM_SEED)
+
+  c_train = keras.utils.to_categorical(c_train)
+  c_test = keras.utils.to_categorical(c_test)
+  d_train = keras.utils.to_categorical(d_train)
+  d_test = keras.utils.to_categorical(d_test)
+
+  train_data = {
+      'input_a': a_train,
+      'input_b': b_train,
+      'input_m': m_train,
+      'output_c': c_train,
+      'output_d': d_train
+  }
+  test_data = {
+      'input_a': a_test,
+      'input_b': b_test,
+      'input_m': m_test,
+      'output_c': c_test,
+      'output_d': d_test
+  }
+
+  return (train_data, test_data)
+
+
+def get_resource_for_simple_model(model_type='sequential',
+                                  is_evaluate=False,):
+  if model_type == 'sequential':
+    model = simple_sequential_model()
+    model.build()
+  elif model_type == 'subclass':
+    model = simple_subclassed_model()
+  else:
+    assert model_type == 'functional'
+    model = simple_functional_model()
+
+  if model_type == 'subclass':
+    input_name = 'input_1'
+    output_name = 'output_1'
+  else:
+    input_name = model.input_names[0]
+    output_name = model.output_names[0]
+
+  np.random.seed(_RANDOM_SEED)
+  (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=_INPUT_SIZE,
+      num_classes=_NUM_CLASS)
+  y_train = keras.utils.to_categorical(y_train)
+  y_test = keras.utils.to_categorical(y_test)
+
+  train_input_fn = gen_input_fn(
+      x=randomize_io_type(x_train, input_name),
+      y=randomize_io_type(y_train, output_name),
+      shuffle=False,
+      num_epochs=None,
+      batch_size=16)
+
+  evaluate_input_fn = gen_input_fn(
+      x=randomize_io_type(x_test, input_name),
+      y=randomize_io_type(y_test, output_name),
+      num_epochs=1, shuffle=False)
+
+  predict_input_fn = gen_input_fn(
+      x=randomize_io_type(x_test, input_name), num_epochs=1, shuffle=False)
+
+  inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn
+
+  return model, (x_train, y_train), (x_test,
+                                     y_test), train_input_fn, inference_input_fn
+
+
+def randomize_io_type(array, name):
+  switch = np.random.random()
+  if switch > 0.5:
+    return array
+  else:
+    return {name: array}
+
+
+def multi_inputs_multi_outputs_model():
+  input_a = keras.layers.Input(shape=(16,), name='input_a')
+  input_b = keras.layers.Input(shape=(16,), name='input_b')
+  input_m = keras.layers.Input(shape=(8,), dtype='string', name='input_m')
+  dense = keras.layers.Dense(8, name='dense_1')
+
+  interm_a = dense(input_a)
+  # Read m
+  interm_m = keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m)
+  interm_s = keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a])
+  interm_b = dense(input_b)
+  merged = keras.layers.concatenate([interm_s, interm_b], name='merge')
+  output_c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
+  output_d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
+  model = keras.models.Model(
+      inputs=[input_a, input_b, input_m], outputs=[output_c, output_d])
+  model.compile(
+      loss='categorical_crossentropy',
+      optimizer='rmsprop',
+      metrics={
+          'dense_2': 'categorical_accuracy',
+          'dense_3': 'categorical_accuracy'
+      })
+  return model
+
+
+class MyHook(session_run_hook.SessionRunHook):
+
+  def begin(self):
+    _ = variable_scope.get_variable('temp', [1])
+
+
+class TestKerasEstimator(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    self._base_dir = os.path.join(self.get_temp_dir(), 'keras_estimator_test')
+    gfile.MakeDirs(self._base_dir)
+    self._config = run_config_lib.RunConfig(
+        tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir)
+    super(TestKerasEstimator, self).setUp()
+
+  def tearDown(self):
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
+    if os.path.isdir(self._base_dir):
+      gfile.DeleteRecursively(self._base_dir)
+    super(TestKerasEstimator, self).tearDown()
+
+  def test_train(self):
+    for model_type in ['sequential', 'functional']:
+      keras_model, (_, _), (
+          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
+              model_type=model_type, is_evaluate=True)
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer='rmsprop',
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+      with self.cached_session():
+        est_keras = keras_lib.model_to_estimator(
+            keras_model=keras_model, config=self._config)
+        before_eval_results = est_keras.evaluate(
+            input_fn=eval_input_fn, steps=1)
+        est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      writer_cache.FileWriterCache.clear()
+      gfile.DeleteRecursively(self._config.model_dir)
+
+  # see b/109935364
+  @test_util.run_in_graph_and_eager_modes
+  def test_train_with_hooks(self):
+    for model_type in ['sequential', 'functional']:
+      keras_model, (_, _), (
+          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
+              model_type=model_type, is_evaluate=True)
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer=rmsprop.RMSPropOptimizer(1e-3),
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+      my_hook = MyHook()
+      with self.cached_session():
+        est_keras = keras_lib.model_to_estimator(
+            keras_model=keras_model, config=self._config)
+        before_eval_results = est_keras.evaluate(
+            input_fn=eval_input_fn, steps=1)
+        est_keras.train(input_fn=train_input_fn, hooks=[my_hook],
+                        steps=_TRAIN_SIZE / 16)
+        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      writer_cache.FileWriterCache.clear()
+      gfile.DeleteRecursively(self._config.model_dir)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_train_with_model_fit_and_hooks(self):
+    keras_model, (x_train, y_train), _, \
+      train_input_fn, eval_input_fn = get_resource_for_simple_model(
+          model_type='sequential', is_evaluate=True)
+
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=rmsprop.RMSPropOptimizer(1e-3),
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+    my_hook = MyHook()
+    with self.cached_session():
+      keras_model.fit(x_train, y_train, epochs=1)
+
+      keras_est = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+      before_eval_results = keras_est.evaluate(input_fn=eval_input_fn)
+      keras_est.train(input_fn=train_input_fn, hooks=[my_hook],
+                      steps=_TRAIN_SIZE / 16)
+      after_eval_results = keras_est.evaluate(input_fn=eval_input_fn, steps=1)
+      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_train_with_tf_optimizer(self):
+    for model_type in ['sequential', 'functional']:
+      keras_model, (_, _), (
+          _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
+              model_type=model_type, is_evaluate=True)
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer=rmsprop.RMSPropOptimizer(1e-3),
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+      with self.cached_session():
+        est_keras = keras_lib.model_to_estimator(
+            keras_model=keras_model,
+            config=self._config)
+        before_eval_results = est_keras.evaluate(
+            input_fn=eval_input_fn, steps=1)
+        est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+        after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+        self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      writer_cache.FileWriterCache.clear()
+      gfile.DeleteRecursively(self._config.model_dir)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_train_with_subclassed_model(self):
+    keras_model, (_, _), (
+        _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
+            model_type='subclass', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=rmsprop.RMSPropOptimizer(1e-3),
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      before_eval_results = est_keras.evaluate(
+          input_fn=eval_input_fn, steps=1)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+  def test_train_with_subclassed_model_with_existing_state(self):
+    keras_model, (_, _), (
+        _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model(
+            model_type='subclass', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=rmsprop.RMSPropOptimizer(1e-3),
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    with self.cached_session():
+      # Create state
+      keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE),
+                                 np.random.random((10, _NUM_CLASS)))
+      original_preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE))
+
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      before_eval_results = est_keras.evaluate(
+          input_fn=eval_input_fn, steps=1)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+      # Check that original model state was not altered
+      preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE))
+      self.assertAllClose(original_preds, preds, atol=1e-5)
+      # Check that the original model compilation did not break
+      keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE),
+                                 np.random.random((10, _NUM_CLASS)))
+
+  def test_evaluate(self):
+    keras_model, (x_train, y_train), (
+        x_test, y_test), _, eval_input_fn = get_resource_for_simple_model(
+            model_type='functional', is_evaluate=True)
+
+    with self.cached_session():
+      metrics = [
+          'binary_accuracy', 'binary_crossentropy', 'categorical_accuracy',
+          'categorical_crossentropy', 'cosine_proximity', 'hinge',
+          'kullback_leibler_divergence', 'mean_absolute_error',
+          'mean_absolute_percentage_error', 'mean_squared_error',
+          'mean_squared_logarithmic_error', 'poisson', 'squared_hinge',
+          'top_k_categorical_accuracy'
+      ]
+      keras_model.compile(
+          loss='categorical_crossentropy', optimizer='adam', metrics=metrics)
+      keras_model.fit(x_train, y_train, epochs=1)
+      keras_eval = keras_model.evaluate(x_test, y_test, batch_size=32)
+
+    with self.cached_session():
+      keras_est = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+      est_eval = keras_est.evaluate(input_fn=eval_input_fn)
+
+    metrics = ['loss'] + metrics
+
+    # Check loss and all metrics match between keras and estimator.
+    def shift(val):
+      if val == 0:
+        return 0
+      else:
+        return val / 10**int(log10(abs(val)))
+
+    for i, metric_name in enumerate(metrics):
+      self.assertAlmostEqual(
+          shift(est_eval[metric_name]),
+          shift(keras_eval[i]),
+          places=4,
+          msg='%s mismatch, keras model: %s, estimator: %s' %
+          (metric_name, est_eval[metric_name], keras_eval[i]))
+
+  def test_predict(self):
+    # Check that predict on a pretrained model yield the same result.
+    keras_model, (x_train, y_train), (
+        x_test, _), _, pred_input_fn = get_resource_for_simple_model(
+            model_type='sequential', is_evaluate=False)
+
+    with self.cached_session():
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer='adam',
+          metrics=['accuracy'])
+      keras_model.fit(x_train, y_train, epochs=1)
+      keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)]
+
+    with self.cached_session():
+      keras_est = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+      est_pred = [
+          np.argmax(y[keras_model.output_names[0]])
+          for y in keras_est.predict(input_fn=pred_input_fn)
+      ]
+    self.assertAllEqual(est_pred, keras_pred)
+
+  def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self):
+    train_data, test_data = get_multi_inputs_multi_outputs_data()
+
+    def train_input_fn():
+      input_dict = {
+          'input_a': train_data['input_a'],
+          'input_b': train_data['input_b'],
+          'input_m': train_data['input_m'].astype(np.str)
+      }
+      output_dict = {
+          'dense_2': train_data['output_c'],
+          'dense_3': train_data['output_d']
+      }
+      return input_dict, output_dict
+
+    def eval_input_fn():
+      input_dict = {
+          'input_a': test_data['input_a'],
+          'input_b': test_data['input_b'],
+          'input_m': test_data['input_m'].astype(np.str)
+      }
+      output_dict = {
+          'dense_2': test_data['output_c'],
+          'dense_3': test_data['output_d']
+      }
+      return input_dict, output_dict
+
+    def pred_input_fn():
+      input_dict = {
+          'input_a': test_data['input_a'],
+          'input_b': test_data['input_b'],
+          'input_m': test_data['input_m'].astype(np.str)
+      }
+      return input_dict
+
+    self.do_test_multi_inputs_multi_outputs_with_input_fn(
+        train_input_fn, eval_input_fn, pred_input_fn)
+
+  def test_multi_inputs_multi_outputs_with_input_fn_as_list(self):
+    train_data, test_data = get_multi_inputs_multi_outputs_data()
+
+    def train_input_fn():
+      input_list = [
+          train_data['input_a'], train_data['input_b'],
+          train_data['input_m'].astype(np.str)
+      ]
+      output_list = [train_data['output_c'], train_data['output_d']]
+      return input_list, output_list
+
+    def eval_input_fn():
+      input_list = [
+          test_data['input_a'], test_data['input_b'],
+          test_data['input_m'].astype(np.str)
+      ]
+      output_list = [test_data['output_c'], test_data['output_d']]
+      return input_list, output_list
+
+    def pred_input_fn():
+      input_list = [
+          test_data['input_a'], test_data['input_b'],
+          test_data['input_m'].astype(np.str)
+      ]
+      return input_list
+
+    self.do_test_multi_inputs_multi_outputs_with_input_fn(
+        train_input_fn, eval_input_fn, pred_input_fn)
+
+  def do_test_multi_inputs_multi_outputs_with_input_fn(
+      self, train_input_fn, eval_input_fn, pred_input_fn):
+    with self.cached_session():
+      model = multi_inputs_multi_outputs_model()
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=model, config=self._config)
+      baseline_eval_results = est_keras.evaluate(
+          input_fn=eval_input_fn, steps=1)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+      self.assertLess(eval_results['loss'], baseline_eval_results['loss'])
+      est_keras.predict(input_fn=pred_input_fn)
+
+  def test_init_from_file(self):
+    if h5py is None:
+      return  # Skip test if models cannot be saved.
+
+    keras_model, (x_train, y_train), (
+        x_test, _), _, pred_input_fn = get_resource_for_simple_model(
+            model_type='functional', is_evaluate=False)
+
+    with self.cached_session():
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer='rmsprop',
+          metrics=['categorical_accuracy'])
+      keras_model.fit(x_train, y_train, epochs=1)
+      keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)]
+      fname = os.path.join(self._base_dir, 'keras_model.h5')
+      keras.models.save_model(keras_model, fname)
+
+    with self.cached_session():
+      keras_est = keras_lib.model_to_estimator(
+          keras_model_path=fname, config=self._config)
+      est_pred = [
+          np.argmax(y[keras_model.output_names[0]])
+          for y in keras_est.predict(input_fn=pred_input_fn)
+      ]
+    self.assertAllEqual(est_pred, keras_pred)
+
+  def test_keras_model_init_error(self):
+    with self.assertRaisesRegexp(ValueError, 'Either'):
+      keras_lib.model_to_estimator()
+
+    with self.cached_session():
+      keras_model = simple_sequential_model()
+      with self.assertRaisesRegexp(ValueError, 'not both'):
+        keras_lib.model_to_estimator(
+            keras_model=keras_model,
+            keras_model_path=tempfile.mkdtemp(dir=self._base_dir))
+
+    with self.cached_session():
+      keras_model = simple_sequential_model()
+      with self.assertRaisesRegexp(ValueError, 'compiled'):
+        keras_lib.model_to_estimator(keras_model=keras_model)
+
+  def test_invalid_ionames_error(self):
+    (x_train, y_train), (_, _) = testing_utils.get_test_data(
+        train_samples=_TRAIN_SIZE,
+        test_samples=100,
+        input_shape=(10,),
+        num_classes=2)
+    y_train = keras.utils.to_categorical(y_train)
+
+    def invald_input_name_input_fn():
+      input_dict = {'invalid_input_name': x_train}
+      return input_dict, y_train
+
+    def invald_output_name_input_fn():
+      input_dict = {'input_1': x_train}
+      output_dict = {'invalid_output_name': y_train}
+      return input_dict, output_dict
+    model = simple_functional_model()
+    model.compile(
+        loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=model, config=self._config)
+    with self.cached_session():
+      with self.assertRaisesRegexp(KeyError,
+                                   'Difference: .*invalid_input_name'):
+        est_keras.train(input_fn=invald_input_name_input_fn, steps=100)
+
+      with self.assertRaisesRegexp(KeyError,
+                                   'Difference: .*invalid_output_name'):
+        est_keras.train(input_fn=invald_output_name_input_fn, steps=100)
+
+  def test_custom_objects(self):
+
+    def relu6(x):
+      return keras.backend.relu(x, max_value=6)
+
+    keras_model = simple_functional_model(activation=relu6)
+    keras_model.compile(loss='categorical_crossentropy', optimizer='adam')
+    custom_objects = {
+        'relu6': relu6
+    }
+
+    (x_train, y_train), _ = testing_utils.get_test_data(
+        train_samples=_TRAIN_SIZE,
+        test_samples=50,
+        input_shape=(10,),
+        num_classes=2)
+    y_train = keras.utils.to_categorical(y_train, 2)
+    input_name = keras_model.input_names[0]
+    output_name = keras_model.output_names[0]
+    train_input_fn = gen_input_fn(
+        x=randomize_io_type(x_train, input_name),
+        y=randomize_io_type(y_train, output_name),
+        shuffle=False,
+        num_epochs=None,
+        batch_size=16)
+    with self.assertRaisesRegexp(ValueError, 'relu6'):
+      with self.cached_session():
+        est = keras_lib.model_to_estimator(
+            keras_model=keras_model,
+            model_dir=tempfile.mkdtemp(dir=self._base_dir))
+        est.train(input_fn=train_input_fn, steps=1)
+
+    with self.cached_session():
+      est = keras_lib.model_to_estimator(
+          keras_model=keras_model,
+          model_dir=tempfile.mkdtemp(dir=self._base_dir),
+          custom_objects=custom_objects)
+      est.train(input_fn=train_input_fn, steps=1)
+
+  def test_tf_config(self):
+    keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer='rmsprop',
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    tf_config = json.dumps({
+        'cluster': {
+            run_config_lib.TaskType.PS: ['localhost:1234'],
+            run_config_lib.TaskType.WORKER: ['localhost:1236'],
+            run_config_lib.TaskType.MASTER: ['localhost:1238']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      with self.cached_session():
+        keras_lib.model_to_estimator(
+            keras_model=keras_model,
+            model_dir=tempfile.mkdtemp(dir=self._base_dir))
+
+  def test_gpu_config(self):
+    with ops.Graph().as_default():
+      keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer='rmsprop',
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+      gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3)
+      sess_config = config_pb2.ConfigProto(gpu_options=gpu_options)
+      self._config._session_config = sess_config
+      with self.cached_session():
+        keras_lib.model_to_estimator(
+            keras_model=keras_model, config=self._config)
+        self.assertEqual(
+            keras.backend.get_session()
+            ._config.gpu_options.per_process_gpu_memory_fraction,
+            gpu_options.per_process_gpu_memory_fraction)
+
+  def test_with_empty_config(self):
+    keras_model, _, _, _, _ = get_resource_for_simple_model(
+        model_type='sequential', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer='rmsprop',
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, model_dir=self._base_dir,
+          config=run_config_lib.RunConfig())
+      self.assertEqual(run_config_lib.get_default_session_config(),
+                       est_keras._session_config)
+      self.assertEqual(est_keras._session_config,
+                       est_keras._config.session_config)
+      self.assertEqual(self._base_dir, est_keras._config.model_dir)
+      self.assertEqual(self._base_dir, est_keras._model_dir)
+
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, model_dir=self._base_dir,
+          config=None)
+      self.assertEqual(run_config_lib.get_default_session_config(),
+                       est_keras._session_config)
+      self.assertEqual(est_keras._session_config,
+                       est_keras._config.session_config)
+      self.assertEqual(self._base_dir, est_keras._config.model_dir)
+      self.assertEqual(self._base_dir, est_keras._model_dir)
+
+  def test_with_empty_config_and_empty_model_dir(self):
+    keras_model, _, _, _, _ = get_resource_for_simple_model(
+        model_type='sequential', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer='rmsprop',
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    with self.cached_session():
+      with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
+        est_keras = keras_lib.model_to_estimator(
+            keras_model=keras_model,
+            config=run_config_lib.RunConfig())
+        self.assertEqual(est_keras._model_dir, _TMP_DIR)
+
+  def test_with_conflicting_model_dir_and_config(self):
+    keras_model, _, _, _, _ = get_resource_for_simple_model(
+        model_type='sequential', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer='rmsprop',
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+
+    with self.cached_session():
+      with self.assertRaisesRegexp(ValueError, '`model_dir` are set both in '
+                                   'constructor and `RunConfig`'):
+        keras_lib.model_to_estimator(
+            keras_model=keras_model, model_dir=self._base_dir,
+            config=run_config_lib.RunConfig(model_dir=_TMP_DIR))
+
+  def test_pretrained_weights(self):
+    keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=rmsprop.RMSPropOptimizer(1e-3),
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+    with self.cached_session():
+      keras_model.train_on_batch(
+          np.random.random((10,) + _INPUT_SIZE),
+          np.random.random((10, _NUM_CLASS)))
+      weights = keras_model.get_weights()
+      keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model()
+      keras_model.set_weights(weights)
+      keras_model.compile(
+          loss='categorical_crossentropy',
+          optimizer=SGD(lr=0.0001, momentum=0.9),
+          metrics=['mse', keras.metrics.CategoricalAccuracy()])
+      keras_lib.model_to_estimator(
+          keras_model=keras_model, config=self._config)
+
+  def assert_increasing_global_step(self, optimizer):
+    keras_model, _, _, train_input_fn, _ = get_resource_for_simple_model(
+        model_type='sequential', is_evaluate=True)
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=optimizer,
+        metrics=['mse', keras.metrics.CategoricalAccuracy()])
+    with self.cached_session() as sess:
+      keras_model_fn = keras_lib._create_keras_model_fn(keras_model)
+      global_step = training_util.create_global_step()
+      features, labels = train_input_fn().make_one_shot_iterator().get_next()
+      spec = keras_model_fn(features, labels, mode=model_fn_lib.ModeKeys.TRAIN)
+
+      sess.run(variables.global_variables_initializer())
+      sess.run(variables.local_variables_initializer())
+
+      self.assertEqual(global_step.eval(), 0)  # Sanity check
+      sess.run(spec.train_op)
+      self.assertEqual(global_step.eval(), 1)
+
+  def test_model_fn_increments_global_step_tf_optimizer(self):
+    self.assert_increasing_global_step(rmsprop.RMSPropOptimizer(1e-3))
+
+  def test_model_fn_increments_global_step_keras_optimizer(self):
+    self.assert_increasing_global_step('rmsprop')
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 3bb3d5785d..824789467d 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,509 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""model_fn python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Classes and methods related to model_fn."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import model_fn
+import collections
 
-# Include attrs that start with single underscore.
-model_fn.__all__ = [s for s in dir(model_fn) if not s.startswith('__')]
+import six
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.model_fn import *
+from tensorflow.python.estimator.export import export_output as export_output_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras.metrics import Metric
+from tensorflow.python.ops import array_ops
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import estimator_export
+
+
+@estimator_export('estimator.ModeKeys')
+class ModeKeys(object):
+  """Standard names for model modes.
+
+  The following standard keys are defined:
+
+  * `TRAIN`: training mode.
+  * `EVAL`: evaluation mode.
+  * `PREDICT`: inference mode.
+  """
+
+  TRAIN = 'train'
+  EVAL = 'eval'
+  PREDICT = 'infer'
+
+
+LOSS_METRIC_KEY = 'loss'
+AVERAGE_LOSS_METRIC_KEY = 'average_loss'
+
+# Mapping of the modes to appropriate tag_constants that are used for saving.
+EXPORT_TAG_MAP = {
+    ModeKeys.PREDICT: [tag_constants.SERVING],
+    ModeKeys.TRAIN: [tag_constants.TRAINING],
+    ModeKeys.EVAL: [tag_constants.EVAL],
+}
+
+
+@estimator_export('estimator.EstimatorSpec')
+class EstimatorSpec(
+    collections.namedtuple('EstimatorSpec', [
+        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
+        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
+        'evaluation_hooks', 'prediction_hooks'
+    ])):
+  """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
+
+  `EstimatorSpec` fully defines the model to be run by an `Estimator`.
+  """
+
+  def __new__(cls,
+              mode,
+              predictions=None,
+              loss=None,
+              train_op=None,
+              eval_metric_ops=None,
+              export_outputs=None,
+              training_chief_hooks=None,
+              training_hooks=None,
+              scaffold=None,
+              evaluation_hooks=None,
+              prediction_hooks=None):
+    """Creates a validated `EstimatorSpec` instance.
+
+    Depending on the value of `mode`, different arguments are required. Namely
+
+    * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`.
+    * For `mode == ModeKeys.EVAL`: required field is `loss`.
+    * For `mode == ModeKeys.PREDICT`: required fields are `predictions`.
+
+    model_fn can populate all arguments independent of mode. In this case, some
+    arguments will be ignored by an `Estimator`. E.g. `train_op` will be
+    ignored in eval and infer modes. Example:
+
+    ```python
+    def my_model_fn(features, labels, mode):
+      predictions = ...
+      loss = ...
+      train_op = ...
+      return tf.estimator.EstimatorSpec(
+          mode=mode,
+          predictions=predictions,
+          loss=loss,
+          train_op=train_op)
+    ```
+
+    Alternatively, model_fn can just populate the arguments appropriate to the
+    given mode. Example:
+
+    ```python
+    def my_model_fn(features, labels, mode):
+      if (mode == tf.estimator.ModeKeys.TRAIN or
+          mode == tf.estimator.ModeKeys.EVAL):
+        loss = ...
+      else:
+        loss = None
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        train_op = ...
+      else:
+        train_op = None
+      if mode == tf.estimator.ModeKeys.PREDICT:
+        predictions = ...
+      else:
+        predictions = None
+
+      return tf.estimator.EstimatorSpec(
+          mode=mode,
+          predictions=predictions,
+          loss=loss,
+          train_op=train_op)
+    ```
+
+    Args:
+      mode: A `ModeKeys`. Specifies if this is training, evaluation or
+        prediction.
+      predictions: Predictions `Tensor` or dict of `Tensor`.
+      loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`.
+      train_op: Op for the training step.
+      eval_metric_ops: Dict of metric results keyed by name.
+        The values of the dict can be one of the following:
+        (1) instance of `Metric` class.
+        (2) Results of calling a metric function, namely a
+        `(metric_tensor, update_op)` tuple. `metric_tensor` should be
+        evaluated without any impact on state (typically is a pure computation
+        results based on variables.). For example, it should not trigger the
+        `update_op` or requires any input fetching.
+      export_outputs: Describes the output signatures to be exported to
+        `SavedModel` and used during serving.
+        A dict `{name: output}` where:
+        * name: An arbitrary name for this output.
+        * output: an `ExportOutput` object such as `ClassificationOutput`,
+            `RegressionOutput`, or `PredictOutput`.
+        Single-headed models only need to specify one entry in this dictionary.
+        Multi-headed models should specify one entry for each head, one of
+        which must be named using
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
+        If no entry is provided, a default `PredictOutput` mapping to
+        `predictions` will be created.
+      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run on the chief worker during training.
+      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on all workers during training.
+      scaffold: A `tf.train.Scaffold` object that can be used to set
+        initialization, saver, and more to be used in training.
+      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during evaluation.
+      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during predictions.
+
+    Returns:
+      A validated `EstimatorSpec` object.
+
+    Raises:
+      ValueError: If validation fails.
+      TypeError: If any of the arguments is not the expected type.
+    """
+    # Validate train_op.
+    if train_op is None:
+      if mode == ModeKeys.TRAIN:
+        raise ValueError('Missing train_op.')
+    else:
+      _check_is_tensor_or_operation(train_op, 'train_op')
+
+    # Validate loss.
+    if loss is None:
+      if mode in (ModeKeys.TRAIN, ModeKeys.EVAL):
+        raise ValueError('Missing loss.')
+    else:
+      loss = _check_is_tensor(loss, 'loss')
+      loss_shape = loss.get_shape()
+      if loss_shape.num_elements() not in (None, 1):
+        raise ValueError('Loss must be scalar, given: {}'.format(loss))
+      if not loss_shape.is_compatible_with(tensor_shape.scalar()):
+        loss = array_ops.reshape(loss, [])
+
+    # Validate predictions.
+    if predictions is None:
+      if mode == ModeKeys.PREDICT:
+        raise ValueError('Missing predictions.')
+      predictions = {}
+    else:
+      if isinstance(predictions, dict):
+        predictions = {
+            k: _check_is_tensor(v, 'predictions[{}]'.format(k))
+            for k, v in six.iteritems(predictions)
+        }
+      else:
+        predictions = _check_is_tensor(predictions, 'predictions')
+
+    # Validate eval_metric_ops.
+    if eval_metric_ops is None:
+      eval_metric_ops = {}
+    else:
+      if not isinstance(eval_metric_ops, dict):
+        raise TypeError(
+            'eval_metric_ops must be a dict, given: {}'.format(eval_metric_ops))
+      for key, value in six.iteritems(eval_metric_ops):
+        # TODO(psv): When we deprecate the old metrics, throw an error here if
+        # the value is not an instance of `Metric` class.
+        if isinstance(value, Metric):
+          if not value.updates:  # Check if metrics updates are available.
+            raise ValueError(
+                'Please call update_state(...) on the "{metric_name}" metric'
+                .format(metric_name=value.name))
+        else:
+          if not isinstance(value, tuple) or len(value) != 2:
+            raise TypeError(
+                'Values of eval_metric_ops must be (metric_value, update_op) '
+                'tuples, given: {} for key: {}'.format(value, key))
+          metric_value, metric_update = value
+          for metric_value_member in nest.flatten(metric_value):
+            # Allow (possibly nested) tuples for metric values, but require that
+            # each of them be Tensors or Operations.
+            _check_is_tensor_or_operation(metric_value_member,
+                                          'eval_metric_ops[{}]'.format(key))
+          _check_is_tensor_or_operation(metric_update,
+                                        'eval_metric_ops[{}]'.format(key))
+
+    # Validate the passed export outputs, or generate defaults.
+    if mode == ModeKeys.PREDICT:
+      export_outputs = _get_export_outputs(export_outputs, predictions)
+
+    # Validate that all tensors and ops are from the default graph.
+    default_graph = ops.get_default_graph()
+
+    # We enumerate possible error causes here to aid in debugging.
+    error_message_template = (
+        '{0} with "{1}" must be from the default graph. '
+        'Possible causes of this error include: \n\n'
+        '1) {0} was created outside the context of the default graph.'
+        '\n\n'
+        '2) The object passed through to EstimatorSpec was not created '
+        'in the most recent call to "model_fn".')
+
+    if isinstance(predictions, dict):
+      for key, value in six.iteritems(predictions):
+        if value.graph is not default_graph:
+          raise ValueError(error_message_template.format(
+              'prediction values',
+              '{0}: {1}'.format(key, value.name)))
+    elif predictions is not None:
+      # 'predictions' must be a single Tensor.
+      if predictions.graph is not default_graph:
+        raise ValueError(error_message_template.format(
+            'prediction values', predictions.name))
+
+    if loss is not None and loss.graph is not default_graph:
+      raise ValueError(error_message_template.format('loss', loss.name))
+    if train_op is not None and train_op.graph is not default_graph:
+      raise ValueError(error_message_template.format('train_op', train_op.name))
+    for key, value in list(six.iteritems(eval_metric_ops)):
+      if isinstance(value, Metric):
+        values_to_check = value.updates[:]
+        values_to_check.append(value.result())
+      else:
+        values_to_check = nest.flatten(value)
+      for val in values_to_check:
+        if val.graph is not default_graph:
+          raise ValueError(error_message_template.format(
+              'eval_metric_ops',
+              '{0}: {1}'.format(key, val.name)))
+
+    # Validate hooks.
+    training_chief_hooks = tuple(training_chief_hooks or [])
+    training_hooks = tuple(training_hooks or [])
+    evaluation_hooks = tuple(evaluation_hooks or [])
+    prediction_hooks = tuple(prediction_hooks or [])
+
+    for hook in (training_hooks + training_chief_hooks + evaluation_hooks +
+                 prediction_hooks):
+      if not isinstance(hook, session_run_hook.SessionRunHook):
+        raise TypeError(
+            'All hooks must be SessionRunHook instances, given: {}'.format(
+                hook))
+
+    # Add metric variables to the `LOCAL_VARIABLES` collection. Metric variables
+    # are by default not added to any collections. We are doing this here, so
+    # that metric variables get initialized.
+    local_vars = set(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
+    vars_to_add = set()
+    for key, value in six.iteritems(eval_metric_ops):
+      if isinstance(value, Metric):
+        vars_to_add.update(value.variables)
+        # Convert Metric instances to (value_tensor, update_op) tuple.
+        eval_metric_ops[key] = (value.result(), value.updates[0])
+    # Remove variables that are in the local variables collection already.
+    vars_to_add = vars_to_add.difference(local_vars)
+    for v in vars_to_add:
+      ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, v)
+
+    scaffold = scaffold or monitored_session.Scaffold()
+    # Validate scaffold.
+    if not isinstance(scaffold, monitored_session.Scaffold):
+      raise TypeError(
+          'scaffold must be tf.train.Scaffold. Given: {}'.format(scaffold))
+
+    return super(EstimatorSpec, cls).__new__(
+        cls,
+        mode=mode,
+        predictions=predictions,
+        loss=loss,
+        train_op=train_op,
+        eval_metric_ops=eval_metric_ops,
+        export_outputs=export_outputs,
+        training_chief_hooks=training_chief_hooks,
+        training_hooks=training_hooks,
+        scaffold=scaffold,
+        evaluation_hooks=evaluation_hooks,
+        prediction_hooks=prediction_hooks)
+
+  def _replace(self, **kwds):
+    """Return a new EstimatorSpec replacing specified fields with new values."""
+    if 'mode' in kwds:
+      if self.mode != kwds['mode']:
+        raise ValueError('mode of EstimatorSpec cannot be changed.')
+    new_fields = map(kwds.pop, self._fields, list(self))
+    return EstimatorSpec(*new_fields)
+
+
+def _get_export_outputs(export_outputs, predictions):
+  """Validate export_outputs or create default export_outputs.
+
+  Args:
+    export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict or None.
+    predictions:  Predictions `Tensor` or dict of `Tensor`.
+
+  Returns:
+    Valid export_outputs dict
+
+  Raises:
+    TypeError: if export_outputs is not a dict or its values are not
+      ExportOutput instances.
+  """
+  if export_outputs is None:
+    default_output = export_output_lib.PredictOutput(predictions)
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: default_output}
+
+  if not isinstance(export_outputs, dict):
+    raise TypeError('export_outputs must be dict, given: {}'.format(
+        export_outputs))
+  for v in six.itervalues(export_outputs):
+    if not isinstance(v, export_output_lib.ExportOutput):
+      raise TypeError(
+          'Values in export_outputs must be ExportOutput objects. '
+          'Given: {}'.format(export_outputs))
+
+  _maybe_add_default_serving_output(export_outputs)
+
+  return export_outputs
+
+
+def _maybe_add_default_serving_output(export_outputs):
+  """Add a default serving output to the export_outputs if not present.
+
+  Args:
+    export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict.
+
+  Returns:
+    export_outputs dict with default serving signature added if necessary
+
+  Raises:
+    ValueError: if multiple export_outputs were provided without a default
+      serving key.
+  """
+  if len(export_outputs) == 1:
+    (key, value), = export_outputs.items()
+    if key != signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+      export_outputs[
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value
+  if len(export_outputs) > 1:
+    if (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+        not in export_outputs):
+      raise ValueError(
+          'Multiple export_outputs were provided, but none of them is '
+          'specified as the default.  Do this by naming one of them with '
+          'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.')
+
+  return export_outputs
+
+
+class _TPUEstimatorSpec(
+    collections.namedtuple('TPUEstimatorSpec', [
+        'mode', 'predictions', 'loss', 'train_op', 'eval_metrics',
+        'export_outputs', 'scaffold_fn', 'host_call', 'training_hooks',
+        'evaluation_hooks', 'prediction_hooks'
+    ])):
+  """Ops and objects returned from a `model_fn` and passed to `TPUEstimator`.
+
+  This is a simplified implementation of `tf.contrib.tpu.EstimatorSpec`. See
+  tensorflow/contrib/tpu/python/tpu/tpu_estimator.py for more detailed
+  documentation.
+  """
+
+  def __new__(cls,
+              mode,
+              predictions=None,
+              loss=None,
+              train_op=None,
+              eval_metrics=None,
+              export_outputs=None,
+              scaffold_fn=None,
+              host_call=None,
+              training_hooks=None,
+              evaluation_hooks=None,
+              prediction_hooks=None):
+    """Creates a `_TPUEstimatorSpec` instance."""
+    return super(_TPUEstimatorSpec, cls).__new__(
+        cls,
+        mode=mode,
+        predictions=predictions,
+        loss=loss,
+        train_op=train_op,
+        eval_metrics=eval_metrics,
+        export_outputs=export_outputs,
+        scaffold_fn=scaffold_fn,
+        host_call=host_call,
+        training_hooks=training_hooks,
+        evaluation_hooks=evaluation_hooks,
+        prediction_hooks=prediction_hooks)
+
+  def as_estimator_spec(self):
+    """Creates an equivalent `EstimatorSpec` used by CPU train/eval."""
+    if not self.eval_metrics:
+      eval_metric_ops = None
+    else:
+      metric_fn, tensors = self.eval_metrics
+      eval_metric_ops = metric_fn(**tensors)
+    return EstimatorSpec(
+        mode=self.mode,
+        predictions=self.predictions,
+        loss=self.loss,
+        train_op=self.train_op,
+        eval_metric_ops=eval_metric_ops,
+        export_outputs=self.export_outputs,
+        training_hooks=self.training_hooks,
+        evaluation_hooks=self.evaluation_hooks,
+        prediction_hooks=self.prediction_hooks)
+
+
+def _check_is_tensor_or_operation(x, name):
+  if not (isinstance(x, ops.Operation) or ops.is_dense_tensor_like(x)):
+    raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x))
+
+
+def _check_is_tensor(x, tensor_name):
+  """Returns `x` if it is a `Tensor`, raises TypeError otherwise."""
+  if not ops.is_dense_tensor_like(x):
+    raise TypeError('{} must be Tensor, given: {}'.format(tensor_name, x))
+  return x
+
+
+def export_outputs_for_mode(
+    mode, serving_export_outputs=None, predictions=None, loss=None,
+    metrics=None):
+  """Util function for constructing a `ExportOutput` dict given a mode.
+
+  The returned dict can be directly passed to `build_all_signature_defs` helper
+  function as the `export_outputs` argument, used for generating a SignatureDef
+  map.
+
+  Args:
+    mode: A `ModeKeys` specifying the mode.
+    serving_export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict or None.
+    predictions: A dict of Tensors or single Tensor representing model
+        predictions. This argument is only used if serving_export_outputs is not
+        set.
+    loss: A dict of Tensors or single Tensor representing calculated loss.
+    metrics: A dict of (metric_value, update_op) tuples, or a single tuple.
+      metric_value must be a Tensor, and update_op must be a Tensor or Op
+
+  Returns:
+    Dictionary mapping the a key to an `tf.estimator.export.ExportOutput` object
+    The key is the expected SignatureDef key for the mode.
+
+  Raises:
+    ValueError: if an appropriate ExportOutput cannot be found for the mode.
+  """
+  # TODO(b/113185250): move all model export helper functions into an util file.
+  if mode == ModeKeys.PREDICT:
+    return _get_export_outputs(serving_export_outputs, predictions)
+  elif mode == ModeKeys.TRAIN:
+    return {mode: export_output_lib.TrainOutput(
+        loss=loss, predictions=predictions, metrics=metrics)}
+  elif mode == ModeKeys.EVAL:
+    return {mode: export_output_lib.EvalOutput(
+        loss=loss, predictions=predictions, metrics=metrics)}
+  else:
+    raise ValueError(
+        'Export output type not found for mode: {}'.format(mode))
diff --git a/tensorflow/python/estimator/model_fn_test.py b/tensorflow/python/estimator/model_fn_test.py
new file mode 100644
index 0000000000..8a3a9f3f51
--- /dev/null
+++ b/tensorflow/python/estimator/model_fn_test.py
@@ -0,0 +1,661 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for model_fn.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator import model_fn
+from tensorflow.python.estimator.export import export_output
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import metrics
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import session_run_hook
+
+
+class _FakeHook(session_run_hook.SessionRunHook):
+  """Fake implementation of `SessionRunHook`."""
+
+
+class _InvalidHook(object):
+  """Invalid hook (not a subclass of `SessionRunHook`)."""
+
+
+class _InvalidScaffold(object):
+  """Invalid scaffold (not a subclass of `Scaffold`)."""
+
+
+class EstimatorSpecTrainTest(test.TestCase):
+  """Tests EstimatorSpec in train mode."""
+
+  def testRequiredArgumentsSet(self):
+    """Tests that no errors are raised when all required arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.TRAIN,
+          loss=constant_op.constant(1.),
+          train_op=control_flow_ops.no_op())
+
+  def testAllArgumentsSet(self):
+    """Tests that no errors are raised when all arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      predictions = {'loss': loss}
+      classes = constant_op.constant('hello')
+      metric_obj = metrics.Mean()
+      metric_obj.update_state(loss)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.TRAIN,
+          predictions=predictions,
+          loss=loss,
+          train_op=control_flow_ops.no_op(),
+          eval_metric_ops={
+              'loss': (control_flow_ops.no_op(), loss),
+              'mean': metric_obj,
+          },
+          export_outputs={
+              'head_name': export_output.ClassificationOutput(classes=classes)
+          },
+          training_chief_hooks=[_FakeHook()],
+          training_hooks=[_FakeHook()],
+          scaffold=monitored_session.Scaffold(),
+          evaluation_hooks=[_FakeHook()],
+          prediction_hooks=[_FakeHook()])
+
+  def testLossNumber(self):
+    """Tests that error is raised when loss is a number (not Tensor)."""
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=1.,
+            train_op=control_flow_ops.no_op())
+
+  def testLoss1DTensor(self):
+    """Tests that no errors are raised when loss is 1D tensor."""
+    with ops.Graph().as_default(), self.cached_session():
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.TRAIN,
+          loss=constant_op.constant([1.]),
+          train_op=control_flow_ops.no_op())
+
+  def testLossMissing(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'Missing loss'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN, train_op=control_flow_ops.no_op())
+
+  def testLossNotScalar(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant([1., 2.]),
+            train_op=control_flow_ops.no_op())
+
+  def testLossSparseTensor(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = sparse_tensor.SparseTensor(
+          indices=[[0]],
+          values=[0.],
+          dense_shape=[1])
+      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=loss,
+            train_op=control_flow_ops.no_op())
+
+  def testLossFromDifferentGraph(self):
+    with ops.Graph().as_default():
+      loss = constant_op.constant(1.)
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=loss,
+            train_op=control_flow_ops.no_op())
+
+  def testTrainOpMissing(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'Missing train_op'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN, loss=constant_op.constant(1.))
+
+  def testTrainOpNotOperationAndTensor(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(TypeError,
+                                   'train_op must be Operation or Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant(1.),
+            train_op='Not an Operation or Tensor')
+
+  def testTrainOpFromDifferentGraph(self):
+    with ops.Graph().as_default():
+      train_op = control_flow_ops.no_op()
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant(1.),
+            train_op=train_op)
+
+  def testTrainingChiefHookInvalid(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, 'All hooks must be SessionRunHook instances'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant(1.),
+            train_op=control_flow_ops.no_op(),
+            training_chief_hooks=[_InvalidHook()])
+
+  def testTrainingHookInvalid(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, 'All hooks must be SessionRunHook instances'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant(1.),
+            train_op=control_flow_ops.no_op(),
+            training_hooks=[_InvalidHook()])
+
+  def testScaffoldInvalid(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, r'scaffold must be tf\.train\.Scaffold'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.TRAIN,
+            loss=constant_op.constant(1.),
+            train_op=control_flow_ops.no_op(),
+            scaffold=_InvalidScaffold())
+
+  def testReturnDefaultScaffold(self):
+    with ops.Graph().as_default(), self.cached_session():
+      estimator_spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.TRAIN,
+          loss=constant_op.constant(1.),
+          train_op=control_flow_ops.no_op())
+      self.assertIsNotNone(estimator_spec.scaffold)
+
+
+class EstimatorSpecEvalTest(test.TestCase):
+  """Tests EstimatorSpec in eval mode."""
+
+  def testRequiredArgumentsSet(self):
+    """Tests that no errors are raised when all required arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          predictions={'loss': loss},
+          loss=loss)
+
+  def testAllArgumentsSet(self):
+    """Tests that no errors are raised when all arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      predictions = {'loss': loss}
+      classes = constant_op.constant('hello')
+      metric_obj = metrics.Mean()
+      metric_obj.update_state(loss)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          predictions=predictions,
+          loss=loss,
+          train_op=control_flow_ops.no_op(),
+          eval_metric_ops={
+              'loss': (control_flow_ops.no_op(), loss),
+              'mean': metric_obj,
+          },
+          export_outputs={
+              'head_name': export_output.ClassificationOutput(classes=classes)
+          },
+          training_chief_hooks=[_FakeHook()],
+          training_hooks=[_FakeHook()],
+          scaffold=monitored_session.Scaffold(),
+          evaluation_hooks=[_FakeHook()])
+
+  def testEvaluationHookInvalid(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, 'All hooks must be SessionRunHook instances'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            loss=constant_op.constant(1.),
+            evaluation_hooks=[_InvalidHook()])
+
+  def testTupleMetric(self):
+    """Tests that no errors are raised when a metric is tuple-valued."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          loss=loss,
+          eval_metric_ops={
+              'some_metric': ((loss, loss, (constant_op.constant(2), loss)),
+                              control_flow_ops.no_op())})
+
+  def testLoss1DTensor(self):
+    """Tests that no errors are raised when loss is 1D tensor."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant([1.])
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          predictions={'loss': loss},
+          loss=loss)
+
+  def testLossNumber(self):
+    """Tests that error is raised when loss is a number (not Tensor)."""
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(TypeError, 'loss must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': constant_op.constant(1.)},
+            loss=1.)
+
+  def testLossMissing(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'Missing loss'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': constant_op.constant(1.)})
+
+  def testLossNotScalar(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant([1., 2.])
+      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss)
+
+  def testLossSparseTensor(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = sparse_tensor.SparseTensor(
+          indices=[[0]],
+          values=[0.],
+          dense_shape=[1])
+      with self.assertRaisesRegexp(
+          TypeError, 'loss must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'prediction': constant_op.constant(1.)},
+            loss=loss)
+
+  def testLossFromDifferentGraph(self):
+    with ops.Graph().as_default():
+      loss = constant_op.constant(1.)
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'prediction': constant_op.constant(1.)},
+            loss=loss)
+
+  def testReplaceRaisesConstructorChecks(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
+      with self.assertRaisesRegexp(ValueError, 'Loss must be scalar'):
+        spec._replace(loss=constant_op.constant([1., 2.]))
+
+  def testReplaceDoesReplace(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
+      new_spec = spec._replace(predictions={'m': loss})
+      self.assertEqual(['m'], list(new_spec.predictions.keys()))
+
+  def testReplaceNotAllowModeChange(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL, predictions={'loss': loss}, loss=loss)
+      spec._replace(mode=model_fn.ModeKeys.EVAL)
+      with self.assertRaisesRegexp(ValueError,
+                                   'mode of EstimatorSpec cannot be changed'):
+        spec._replace(mode=model_fn.ModeKeys.TRAIN)
+
+  def testPredictionsMissingIsOkay(self):
+    with ops.Graph().as_default(), self.cached_session():
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL, loss=constant_op.constant(1.))
+
+  def testPredictionsTensor(self):
+    """Tests that no error is raised when predictions is Tensor (not dict)."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.EVAL,
+          predictions=loss,
+          loss=loss)
+
+  def testPredictionsNumber(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, r'predictions\[number\] must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'number': 1.},
+            loss=constant_op.constant(1.))
+
+  def testPredictionsSparseTensor(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {
+          'sparse': sparse_tensor.SparseTensor(
+              indices=[[0]],
+              values=[0.],
+              dense_shape=[1])}
+      with self.assertRaisesRegexp(
+          TypeError, r'predictions\[sparse\] must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=constant_op.constant(1.))
+
+  def testPredictionsFromDifferentGraph(self):
+    with ops.Graph().as_default():
+      predictions = {'loss': constant_op.constant(1.)}
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions=predictions,
+            loss=constant_op.constant(1.))
+
+  def testEvalMetricOpsNoDict(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(
+          TypeError, 'eval_metric_ops must be a dict'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops=loss)
+
+  def testEvalMetricOpsNoTuple(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(
+          TypeError,
+          (r'Values of eval_metric_ops must be \(metric_value, update_op\) '
+           'tuples')):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops={'loss': loss})
+
+  def testEvalMetricOpsNoTensorOrOperation(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(TypeError, 'must be Operation or Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops={'loss': ('NonTensor', loss)})
+
+  def testEvalMetricNestedNoTensorOrOperation(self):
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(TypeError, 'must be Operation or Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops={'loss': ((('NonTensor',),),
+                                      control_flow_ops.no_op())})
+
+  def testEvalMetricOpsFromDifferentGraphWithMetricTuple(self):
+    with ops.Graph().as_default():
+      eval_metric_ops = {
+          'loss': (control_flow_ops.no_op(), constant_op.constant(1.))}
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops=eval_metric_ops)
+
+  def testEvalMetricOpsFromDifferentGraphWithMetricObject(self):
+    with ops.Graph().as_default():
+      metric_obj = metrics.Mean()
+      metric_obj.update_state(constant_op.constant(1.))
+      eval_metric_ops = {'metric': metric_obj}
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(
+          ValueError, 'must be from the default graph'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops=eval_metric_ops)
+
+  def testEvalMetricOpsWithoutUpdates(self):
+    with ops.Graph().as_default():
+      eval_metric_ops = {'mean': metrics.Mean()}
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      with self.assertRaisesRegexp(ValueError, 'Please call update_state(...)'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.EVAL,
+            predictions={'loss': loss},
+            loss=loss,
+            eval_metric_ops=eval_metric_ops)
+
+
+class EstimatorSpecInferTest(test.TestCase):
+  """Tests EstimatorSpec in infer mode."""
+
+  def testRequiredArgumentsSet(self):
+    """Tests that no errors are raised when all required arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.PREDICT,
+          predictions={'loss': constant_op.constant(1.)})
+
+  def testAllArgumentsSet(self):
+    """Tests that no errors are raised when all arguments are set."""
+    with ops.Graph().as_default(), self.cached_session():
+      loss = constant_op.constant(1.)
+      predictions = {'loss': loss}
+      classes = constant_op.constant('hello')
+      metric_obj = metrics.Mean()
+      metric_obj.update_state(loss)
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.PREDICT,
+          predictions=predictions,
+          loss=loss,
+          train_op=control_flow_ops.no_op(),
+          eval_metric_ops={
+              'loss': (control_flow_ops.no_op(), loss),
+              'mean': metric_obj,
+          },
+          export_outputs={
+              'head_name': export_output.ClassificationOutput(classes=classes)
+          },
+          training_chief_hooks=[_FakeHook()],
+          training_hooks=[_FakeHook()],
+          scaffold=monitored_session.Scaffold(),
+          evaluation_hooks=[_FakeHook()],
+          prediction_hooks=[_FakeHook()])
+
+  def testPredictionHookInvalid(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, 'All hooks must be SessionRunHook instances'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=constant_op.constant(1.),
+            prediction_hooks=[_InvalidHook()])
+
+  def testPredictionsMissing(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(ValueError, 'Missing predictions'):
+        model_fn.EstimatorSpec(mode=model_fn.ModeKeys.PREDICT)
+
+  def testPredictionsTensor(self):
+    """Tests that no error is raised when predictions is Tensor (not dict)."""
+    with ops.Graph().as_default(), self.cached_session():
+      model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.PREDICT, predictions=constant_op.constant(1.))
+
+  def testPredictionsNumber(self):
+    with ops.Graph().as_default(), self.cached_session():
+      with self.assertRaisesRegexp(
+          TypeError, r'predictions\[number\] must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT, predictions={'number': 1.})
+
+  def testPredictionsSparseTensor(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {
+          'sparse': sparse_tensor.SparseTensor(
+              indices=[[0]],
+              values=[0.],
+              dense_shape=[1])}
+      with self.assertRaisesRegexp(
+          TypeError, r'predictions\[sparse\] must be Tensor'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT, predictions=predictions)
+
+  def testExportOutputsNoDict(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.)}
+      classes = constant_op.constant('hello')
+      with self.assertRaisesRegexp(
+          TypeError, 'export_outputs must be dict'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs=export_output.ClassificationOutput(classes=classes))
+
+  def testExportOutputsValueNotExportOutput(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.)}
+      with self.assertRaisesRegexp(
+          TypeError,
+          r"Values in export_outputs must be ExportOutput objects. "
+          r"Given: {'head_name': {'loss': <tf.Tensor 'Const:0' shape=\(\) "
+          r"dtype=float32>}}"):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs={'head_name': predictions})
+
+  def testExportOutputsSingleheadMissingDefault(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.)}
+      output_1 = constant_op.constant([1.])
+      regression_output = export_output.RegressionOutput(value=output_1)
+      export_outputs = {
+          'head-1': regression_output,
+          }
+      estimator_spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.PREDICT,
+          predictions=predictions,
+          export_outputs=export_outputs)
+      expected_export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+          regression_output,
+          'head-1': regression_output,
+      }
+      self.assertEqual(expected_export_outputs, estimator_spec.export_outputs)
+
+  def testExportOutputsMultiheadWithDefault(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.)}
+      output_1 = constant_op.constant([1.])
+      output_2 = constant_op.constant(['2'])
+      output_3 = constant_op.constant(['3'])
+      export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+          export_output.RegressionOutput(value=output_1),
+          'head-2': export_output.ClassificationOutput(classes=output_2),
+          'head-3': export_output.PredictOutput(outputs={
+              'some_output_3': output_3
+          })}
+      estimator_spec = model_fn.EstimatorSpec(
+          mode=model_fn.ModeKeys.PREDICT,
+          predictions=predictions,
+          export_outputs=export_outputs)
+      self.assertEqual(export_outputs, estimator_spec.export_outputs)
+
+  def testExportOutputsMultiheadMissingDefault(self):
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.)}
+      output_1 = constant_op.constant([1.])
+      output_2 = constant_op.constant(['2'])
+      output_3 = constant_op.constant(['3'])
+      export_outputs = {
+          'head-1': export_output.RegressionOutput(value=output_1),
+          'head-2': export_output.ClassificationOutput(classes=output_2),
+          'head-3': export_output.PredictOutput(outputs={
+              'some_output_3': output_3
+          })}
+      with self.assertRaisesRegexp(
+          ValueError,
+          'Multiple export_outputs were provided, but none of them is '
+          'specified as the default.  Do this by naming one of them with '
+          'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.'):
+        model_fn.EstimatorSpec(
+            mode=model_fn.ModeKeys.PREDICT,
+            predictions=predictions,
+            export_outputs=export_outputs)
+
+  def testDefaultExportOutputCreated(self):
+    """Ensure that a default PredictOutput is created for export."""
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = constant_op.constant(1.)
+      self._assertDefaultExportOutputForPredictions(predictions)
+
+  def testDefaultExportOutputCreatedDict(self):
+    """Ensure that a default PredictOutput is created for export for dicts."""
+    with ops.Graph().as_default(), self.cached_session():
+      predictions = {'loss': constant_op.constant(1.),
+                     'score': constant_op.constant(10.)}
+      self._assertDefaultExportOutputForPredictions(predictions)
+
+  def _assertDefaultExportOutputForPredictions(self, predictions):
+    spec = model_fn.EstimatorSpec(
+        mode=model_fn.ModeKeys.PREDICT, predictions=predictions)
+
+    expected = export_output.PredictOutput(predictions).outputs
+    serving_output = spec.export_outputs[
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    self.assertEqual(serving_output.outputs, expected)
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py
index bccad8fe00..3773810a04 100644
--- a/tensorflow/python/estimator/run_config.py
+++ b/tensorflow/python/estimator/run_config.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,904 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""run_config python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Environment configuration object for Estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import run_config
+import copy
+import json
+import os
+
+import six
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.distribute import estimator_training as distribute_coordinator_training
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import server_lib
+from tensorflow.python.util import compat_internal
+from tensorflow.python.util import function_utils
+from tensorflow.python.util.tf_export import estimator_export
+
+
+_USE_DEFAULT = object()
+_VALID_DEVICE_FN_ARGS = set(['op'])
+
+# A list of the property names in RunConfig that the user is allowed to change.
+_DEFAULT_REPLACEABLE_LIST = [
+    'model_dir',
+    'tf_random_seed',
+    'save_summary_steps',
+    'save_checkpoints_steps',
+    'save_checkpoints_secs',
+    'session_config',
+    'keep_checkpoint_max',
+    'keep_checkpoint_every_n_hours',
+    'log_step_count_steps',
+    'train_distribute',
+    'device_fn',
+    'protocol',
+    'eval_distribute',
+    'experimental_distribute',
+]
+
+_SAVE_CKPT_ERR = (
+    '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.'
+)
+
+_TF_CONFIG_ENV = 'TF_CONFIG'
+_TASK_ENV_KEY = 'task'
+_TASK_TYPE_KEY = 'type'
+_TASK_ID_KEY = 'index'
+_CLUSTER_KEY = 'cluster'
+_SERVICE_KEY = 'service'
+_SESSION_MASTER_KEY = 'session_master'
+_EVAL_SESSION_MASTER_KEY = 'eval_session_master'
+_MODEL_DIR_KEY = 'model_dir'
+_LOCAL_MASTER = ''
+_GRPC_SCHEME = 'grpc://'
+
+
+def _get_session_master(cluster_spec, task_type, task_id, tf_config):
+  """Returns the appropriate address for TensorFlow master.
+
+  The order of precedence to deteremine the TF session master is as follows:
+  1. If `tf_session_master` is set in TF_CONFIG environment variable, takes it.
+  2. If the cluster has only one node, returns empty string ''.
+  3. Returns the grpc address according to the task type and id in the cluster.
+     This is between-graph replication.
+
+  Note: task_type and task_id must be validated. Typically, validated using
+  `_validate_task_type_and_task_id`.
+
+  Args:
+    cluster_spec: A `ClusterSpec` instance.
+    task_type: String. Task type for current node.
+    task_id: Int. Task id for current node.
+    tf_config: Dict. Python dict for the TF_CONFIG environment variable.
+
+  Raises:
+    RuntimeError: If `cluster_spec` is not set.
+
+  """
+  if _SESSION_MASTER_KEY in tf_config:
+    return tf_config[_SESSION_MASTER_KEY]
+
+  if not cluster_spec:
+    raise RuntimeError('Internal error: `_get_session_master` '
+                       'does not expect empty cluster_spec.')
+
+  jobs = cluster_spec.jobs
+
+  # If there is only one node in the cluster, do things locally by setting
+  # master to ''.  If a service or user sets TF_CONFIG with a single node, it's
+  # more performant to use a direct master rather than an RPC service.
+  if len(jobs) == 1 and len(cluster_spec.job_tasks(jobs[0])) == 1:
+    return _LOCAL_MASTER
+
+  # Lookup the master in cluster_spec using task_type and task_id,
+  # if possible.
+  addresses = cluster_spec.job_tasks(task_type)
+  return _GRPC_SCHEME + addresses[task_id]
+
+
+def _get_eval_session_master(task_type, tf_config):
+  """Returns the appropriate address for TensorFlow evaluation master."""
+  if task_type == TaskType.EVALUATOR:
+    return tf_config.get(_EVAL_SESSION_MASTER_KEY, _LOCAL_MASTER)
+
+  if _EVAL_SESSION_MASTER_KEY in tf_config:
+    raise ValueError('Key ({}) should not be set for task type other than {}. '
+                     'Task type: {}'.format(_EVAL_SESSION_MASTER_KEY,
+                                            TaskType.EVALUATOR, task_type))
+  return _LOCAL_MASTER
+
+
+def _count_ps(cluster_spec):
+  """Counts the number of parameter servers in cluster_spec."""
+  if not cluster_spec:
+    raise RuntimeError(
+        'Internal error: `_count_ps` does not expect empty cluster_spec.')
+
+  return len(cluster_spec.as_dict().get(TaskType.PS, []))
+
+
+def _count_worker(cluster_spec, chief_task_type):
+  """Counts the number of workers (including chief) in cluster_spec."""
+  if not cluster_spec:
+    raise RuntimeError(
+        'Internal error: `_count_worker` does not expect empty cluster_spec.')
+
+  return (len(cluster_spec.as_dict().get(TaskType.WORKER, [])) +
+          len(cluster_spec.as_dict().get(chief_task_type, [])))
+
+
+def _validate_service(service):
+  """Validates the service key."""
+  if service is not None and not isinstance(service, dict):
+    raise TypeError(
+        'If "service" is set in TF_CONFIG, it must be a dict. Given %s' %
+        type(service))
+  return service
+
+
+def _validate_task_type_and_task_id(cluster_spec, task_env, chief_task_type):
+  """Validates the task type and index in `task_env` according to cluster."""
+  if chief_task_type not in cluster_spec.jobs:
+    raise ValueError(
+        'If "cluster" is set in TF_CONFIG, it must have one "%s" node.' %
+        chief_task_type)
+  if len(cluster_spec.job_tasks(chief_task_type)) > 1:
+    raise ValueError(
+        'The "cluster" in TF_CONFIG must have only one "%s" node.' %
+        chief_task_type)
+
+  task_type = task_env.get(_TASK_TYPE_KEY, None)
+  task_id = task_env.get(_TASK_ID_KEY, None)
+
+  if not task_type:
+    raise ValueError(
+        'If "cluster" is set in TF_CONFIG, task type must be set.')
+  if task_id is None:
+    raise ValueError(
+        'If "cluster" is set in TF_CONFIG, task index must be set.')
+
+  task_id = int(task_id)
+
+  # Check the task id bounds. Upper bound is not necessary as
+  # - for evaluator, there is no upper bound.
+  # - for non-evaluator, task id is upper bounded by the number of jobs in
+  # cluster spec, which will be checked later (when retrieving the `master`)
+  if task_id < 0:
+    raise ValueError('Task index must be non-negative number.')
+
+  # Evaluator is not part of the training cluster.
+  if task_type == TaskType.EVALUATOR:
+    return task_type, task_id
+
+  if task_type not in cluster_spec.jobs:
+    raise ValueError(
+        '%s is not a valid task_type in the cluster_spec:\n'
+        '%s\n\n'
+        'Note that these values may be coming from the TF_CONFIG environment '
+        'variable.' % (task_type, cluster_spec))
+  addresses = cluster_spec.job_tasks(task_type)
+  if not 0 <= task_id < len(addresses):
+    raise ValueError(
+        '%d is not a valid task_id for task_type %s in the cluster_spec:\n'
+        '%s\n\n'
+        'Note that these values may be coming from the TF_CONFIG environment '
+        'variable.' % (task_id, task_type, cluster_spec))
+
+  return task_type, task_id
+
+
+def _get_global_id_in_cluster(
+    cluster_spec, task_type, task_id, chief_task_type):
+  """Returns the global id in cluster."""
+  # Note: This is implementation details, which user should not rely on.
+  # The first id is 0, which is always for the `chief` node. All other nodes,
+  # except `ps`, are ordered alphabetical based on task type (alphabetically)
+  # and task id (ascendingly). `ps` are ordered last.
+
+  # Sort task names in cluster
+  task_type_ordered_list = [chief_task_type]
+  task_type_ordered_list.extend([
+      t for t in sorted(cluster_spec.jobs)
+      if t != chief_task_type and t != TaskType.PS
+  ])
+  if TaskType.PS in cluster_spec.jobs:
+    task_type_ordered_list.append(TaskType.PS)
+
+  next_global_id = 0
+  for t in task_type_ordered_list:
+    if t == task_type:
+      return next_global_id + task_id
+    next_global_id += len(cluster_spec.job_tasks(t))
+
+  # This should never happen.
+  raise RuntimeError('Internal Error: `task_type` ({}) is not in '
+                     'cluster_spec ({}).'.format(task_type, cluster_spec))
+
+
+def _validate_save_ckpt_with_replaced_keys(new_copy, replaced_keys):
+  """Validates the save ckpt properties."""
+  # Ensure one (and only one) of save_steps and save_secs is not None.
+  # Also, if user sets one save ckpt property, say steps, the other one (secs)
+  # should be set as None to improve usability.
+
+  save_steps = new_copy.save_checkpoints_steps
+  save_secs = new_copy.save_checkpoints_secs
+
+  if ('save_checkpoints_steps' in replaced_keys and
+      'save_checkpoints_secs' in replaced_keys):
+    # If user sets both properties explicitly, we need to error out if both
+    # are set or neither of them are set.
+    if save_steps is not None and save_secs is not None:
+      raise ValueError(_SAVE_CKPT_ERR)
+  elif 'save_checkpoints_steps' in replaced_keys and save_steps is not None:
+    new_copy._save_checkpoints_secs = None  # pylint: disable=protected-access
+  elif 'save_checkpoints_secs' in replaced_keys and save_secs is not None:
+    new_copy._save_checkpoints_steps = None  # pylint: disable=protected-access
+
+
+def _validate_properties(run_config):
+  """Validates the properties."""
+  def _validate(property_name, cond, message):
+    property_value = getattr(run_config, property_name)
+    if property_value is not None and not cond(property_value):
+      raise ValueError(message)
+
+  _validate('model_dir', lambda dir: dir,
+            message='model_dir should be non-empty')
+
+  _validate('save_summary_steps', lambda steps: steps >= 0,
+            message='save_summary_steps should be >= 0')
+
+  _validate('save_checkpoints_steps', lambda steps: steps >= 0,
+            message='save_checkpoints_steps should be >= 0')
+  _validate('save_checkpoints_secs', lambda secs: secs >= 0,
+            message='save_checkpoints_secs should be >= 0')
+
+  _validate('session_config',
+            lambda sc: isinstance(sc, config_pb2.ConfigProto),
+            message='session_config must be instance of ConfigProto')
+
+  _validate('keep_checkpoint_max', lambda keep_max: keep_max >= 0,
+            message='keep_checkpoint_max should be >= 0')
+  _validate('keep_checkpoint_every_n_hours', lambda keep_hours: keep_hours > 0,
+            message='keep_checkpoint_every_n_hours should be > 0')
+  _validate('log_step_count_steps', lambda num_steps: num_steps > 0,
+            message='log_step_count_steps should be > 0')
+
+  _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types),
+            message='tf_random_seed must be integer.')
+
+  _validate('device_fn', lambda device_fn: six.callable(device_fn) and
+            set(function_utils.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
+            message='device_fn must be callable with exactly'
+                    ' one argument "op".')
+
+  _validate('protocol',
+            lambda protocol: protocol in (None, "grpc", "grpc+verbs"),
+            message='protocol should be grpc or grpc+verbs')
+
+
+def get_default_session_config():
+  """Returns tf.ConfigProto instance."""
+
+  rewrite_opts = rewriter_config_pb2.RewriterConfig(
+      meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
+  graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
+
+  return config_pb2.ConfigProto(allow_soft_placement=True,
+                                graph_options=graph_opts)
+
+
+class TaskType(object):
+  MASTER = 'master'
+  PS = 'ps'
+  WORKER = 'worker'
+  CHIEF = 'chief'
+  EVALUATOR = 'evaluator'
+
+
+@estimator_export('estimator.RunConfig')
+class RunConfig(object):
+  """This class specifies the configurations for an `Estimator` run."""
+
+  def __init__(self,
+               model_dir=None,
+               tf_random_seed=None,
+               save_summary_steps=100,
+               save_checkpoints_steps=_USE_DEFAULT,
+               save_checkpoints_secs=_USE_DEFAULT,
+               session_config=None,
+               keep_checkpoint_max=5,
+               keep_checkpoint_every_n_hours=10000,
+               log_step_count_steps=100,
+               train_distribute=None,
+               device_fn=None,
+               protocol=None,
+               eval_distribute=None,
+               experimental_distribute=None):
+    """Constructs a RunConfig.
+
+    All distributed training related properties `cluster_spec`, `is_chief`,
+    `master` , `num_worker_replicas`, `num_ps_replicas`, `task_id`, and
+    `task_type` are set based on the `TF_CONFIG` environment variable, if the
+    pertinent information is present. The `TF_CONFIG` environment variable is a
+    JSON object with attributes: `cluster` and `task`.
+
+    `cluster` is a JSON serialized version of `ClusterSpec`'s Python dict from
+    `server_lib.py`, mapping task types (usually one of the `TaskType` enums) to
+    a list of task addresses.
+
+    `task` has two attributes: `type` and `index`, where `type` can be any of
+    the task types in `cluster`. When `TF_CONFIG` contains said information,
+    the following properties are set on this class:
+
+    * `cluster_spec` is parsed from `TF_CONFIG['cluster']`. Defaults to {}. If
+      present, must have one and only one node in the `chief` attribute of
+      `cluster_spec`.
+    * `task_type` is set to `TF_CONFIG['task']['type']`. Must set if
+      `cluster_spec` is present; must be `worker` (the default value) if
+      `cluster_spec` is not set.
+    * `task_id` is set to `TF_CONFIG['task']['index']`. Must set if
+      `cluster_spec` is present; must be 0 (the default value) if
+      `cluster_spec` is not set.
+    * `master` is determined by looking up `task_type` and `task_id` in the
+      `cluster_spec`. Defaults to ''.
+    * `num_ps_replicas` is set by counting the number of nodes listed
+      in the `ps` attribute of `cluster_spec`. Defaults to 0.
+    * `num_worker_replicas` is set by counting the number of nodes listed
+      in the `worker` and `chief` attributes of `cluster_spec`. Defaults to 1.
+    * `is_chief` is determined based on `task_type` and `cluster`.
+
+    There is a special node with `task_type` as `evaluator`, which is not part
+    of the (training) `cluster_spec`. It handles the distributed evaluation job.
+
+    Example of non-chief node:
+    ```
+      cluster = {'chief': ['host0:2222'],
+                 'ps': ['host1:2222', 'host2:2222'],
+                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
+      os.environ['TF_CONFIG'] = json.dumps(
+          {'cluster': cluster,
+           'task': {'type': 'worker', 'index': 1}})
+      config = RunConfig()
+      assert config.master == 'host4:2222'
+      assert config.task_id == 1
+      assert config.num_ps_replicas == 2
+      assert config.num_worker_replicas == 4
+      assert config.cluster_spec == server_lib.ClusterSpec(cluster)
+      assert config.task_type == 'worker'
+      assert not config.is_chief
+    ```
+
+    Example of chief node:
+    ```
+      cluster = {'chief': ['host0:2222'],
+                 'ps': ['host1:2222', 'host2:2222'],
+                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
+      os.environ['TF_CONFIG'] = json.dumps(
+          {'cluster': cluster,
+           'task': {'type': 'chief', 'index': 0}})
+      config = RunConfig()
+      assert config.master == 'host0:2222'
+      assert config.task_id == 0
+      assert config.num_ps_replicas == 2
+      assert config.num_worker_replicas == 4
+      assert config.cluster_spec == server_lib.ClusterSpec(cluster)
+      assert config.task_type == 'chief'
+      assert config.is_chief
+    ```
+
+    Example of evaluator node (evaluator is not part of training cluster):
+    ```
+      cluster = {'chief': ['host0:2222'],
+                 'ps': ['host1:2222', 'host2:2222'],
+                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
+      os.environ['TF_CONFIG'] = json.dumps(
+          {'cluster': cluster,
+           'task': {'type': 'evaluator', 'index': 0}})
+      config = RunConfig()
+      assert config.master == ''
+      assert config.evaluator_master == ''
+      assert config.task_id == 0
+      assert config.num_ps_replicas == 0
+      assert config.num_worker_replicas == 0
+      assert config.cluster_spec == {}
+      assert config.task_type == 'evaluator'
+      assert not config.is_chief
+    ```
+
+    N.B.: If `save_checkpoints_steps` or `save_checkpoints_secs` is set,
+    `keep_checkpoint_max` might need to be adjusted accordingly, especially in
+    distributed training. For example, setting `save_checkpoints_secs` as 60
+    without adjusting `keep_checkpoint_max` (defaults to 5) leads to situation
+    that checkpoint would be garbage collected after 5 minutes. In distributed
+    training, the evaluation job starts asynchronously and might fail to load or
+    find the checkpoint due to race condition.
+
+    Args:
+      model_dir: directory where model parameters, graph, etc are saved. If
+        `PathLike` object, the path will be resolved. If `None`, will use a
+        default value set by the Estimator.
+      tf_random_seed: Random seed for TensorFlow initializers.
+        Setting this value allows consistency between reruns.
+      save_summary_steps: Save summaries every this many steps.
+      save_checkpoints_steps: Save checkpoints every this many steps. Can not be
+          specified with `save_checkpoints_secs`.
+      save_checkpoints_secs: Save checkpoints every this many seconds. Can not
+          be specified with `save_checkpoints_steps`. Defaults to 600 seconds if
+          both `save_checkpoints_steps` and `save_checkpoints_secs` are not set
+          in constructor.  If both `save_checkpoints_steps` and
+          `save_checkpoints_secs` are None, then checkpoints are disabled.
+      session_config: a ConfigProto used to set session parameters, or None.
+      keep_checkpoint_max: The maximum number of recent checkpoint files to
+        keep. As new files are created, older files are deleted. If None or 0,
+        all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent
+        checkpoint files are kept.)
+      keep_checkpoint_every_n_hours: Number of hours between each checkpoint
+        to be saved. The default value of 10,000 hours effectively disables
+        the feature.
+      log_step_count_steps: The frequency, in number of global steps, that the
+        global step/sec and the loss will be logged during training.
+      train_distribute: An optional instance of
+        `tf.contrib.distribute.DistributionStrategy`. If specified,
+        then Estimator will distribute the user's model during training,
+        according to the policy specified by that strategy. Setting
+        `experimental_distribute.train_distribute` is preferred.
+      device_fn: A callable invoked for every `Operation` that takes the
+        `Operation` and returns the device string. If `None`, defaults to
+        the device function returned by `tf.train.replica_device_setter`
+        with round-robin strategy.
+      protocol: An optional argument which specifies the protocol used when
+        starting server. None means default to grpc.
+      eval_distribute: An optional instance of
+        `tf.contrib.distribute.DistributionStrategy`. If specified,
+        then Estimator will distribute the user's model during evaluation,
+        according to the policy specified by that strategy. Setting
+        `experimental_distribute.eval_distribute` is preferred.
+      experimental_distribute: an optional
+        `tf.contrib.distribute.DistributeConfig` object specifying
+        DistributionStrategy-related configuration. The `train_distribute` and
+        `eval_distribute` can be passed as parameters to `RunConfig` or set in
+        `experimental_distribute` but not both.
+
+    Raises:
+      ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs`
+      are set.
+    """
+    if (save_checkpoints_steps == _USE_DEFAULT and
+        save_checkpoints_secs == _USE_DEFAULT):
+      save_checkpoints_steps = None
+      save_checkpoints_secs = 600
+    elif save_checkpoints_secs == _USE_DEFAULT:
+      save_checkpoints_secs = None
+    elif save_checkpoints_steps == _USE_DEFAULT:
+      save_checkpoints_steps = None
+    elif (save_checkpoints_steps is not None and
+          save_checkpoints_secs is not None):
+      raise ValueError(_SAVE_CKPT_ERR)
+
+    tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV, '{}'))
+    if tf_config:
+      logging.info('TF_CONFIG environment variable: %s', tf_config)
+
+    model_dir = _get_model_dir(tf_config,
+                               compat_internal.path_to_str(model_dir))
+
+    RunConfig._replace(
+        self,
+        allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
+        model_dir=model_dir,
+        tf_random_seed=tf_random_seed,
+        save_summary_steps=save_summary_steps,
+        save_checkpoints_steps=save_checkpoints_steps,
+        save_checkpoints_secs=save_checkpoints_secs,
+        session_config=session_config,
+        keep_checkpoint_max=keep_checkpoint_max,
+        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
+        log_step_count_steps=log_step_count_steps,
+        train_distribute=train_distribute,
+        device_fn=device_fn,
+        protocol=protocol,
+        eval_distribute=eval_distribute,
+        experimental_distribute=experimental_distribute)
+
+    # TODO(frankchn,priyag): Eventually use distributed coordinator for TPUs.
+    if ((train_distribute and
+         train_distribute.__class__.__name__ != 'TPUStrategy') or
+        (eval_distribute and
+         eval_distribute.__class__.__name__ != 'TPUStrategy') or
+        experimental_distribute):
+      logging.info('Initializing RunConfig with distribution strategies.')
+      distribute_coordinator_training.init_run_config(self, tf_config)
+    else:
+      self._init_distributed_setting_from_environment_var(tf_config)
+      self._maybe_overwrite_session_config_for_distributed_training()
+
+  def _maybe_overwrite_session_config_for_distributed_training(self):
+    """Overwrites the session_config for distributed training.
+
+    The default overwrite is optimized for between-graph training. Subclass
+    should override this method if necessary.
+    """
+    # Get session_config only for between-graph distributed mode (cluster_spec
+    # is present).
+    if not self._session_config and self._cluster_spec:
+      RunConfig._replace(
+          self,
+          allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
+          session_config=self._get_default_session_config_distributed())
+
+  def _get_default_session_config_distributed(self):
+    """Returns None or tf.ConfigProto instance with default device_filters set.
+
+    Device filters are set such that chief/master and worker communicates with
+    only ps. session_config=None for evaluators or any other TaskType.
+    """
+
+    rewrite_opts = rewriter_config_pb2.RewriterConfig(
+        meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
+    graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
+
+    device_filters = None
+    if self._task_type == TaskType.MASTER:
+      device_filters = ['/job:ps', '/job:master']
+    elif self._task_type == TaskType.CHIEF:
+      device_filters = ['/job:ps', '/job:chief']
+    elif self._task_type == TaskType.WORKER:
+      device_filters = ['/job:ps', '/job:worker/task:%d' % self._task_id]
+    elif self._task_type == TaskType.PS:
+      device_filters = ['/job:ps', '/job:worker', '/job:master']
+    else:
+      # If the task_type is `EVALUATOR` or something other than the ones in
+      # TaskType then don't set any device filters.
+      return None
+
+    return config_pb2.ConfigProto(
+        allow_soft_placement=True,
+        graph_options=graph_opts,
+        device_filters=device_filters)
+
+  def _init_distributed_setting_from_environment_var(self, tf_config):
+    """Initialize distributed properties based on `tf_config`."""
+
+    self._service = _validate_service(tf_config.get(_SERVICE_KEY))
+    self._cluster_spec = server_lib.ClusterSpec(tf_config.get(_CLUSTER_KEY, {}))
+    task_env = tf_config.get(_TASK_ENV_KEY, {})
+
+    if self._cluster_spec and TaskType.MASTER in self._cluster_spec.jobs:
+      return self._init_distributed_setting_from_environment_var_with_master(
+          tf_config)
+
+    if self._cluster_spec:
+      # Distributed mode.
+      self._task_type, self._task_id = _validate_task_type_and_task_id(
+          self._cluster_spec, task_env, TaskType.CHIEF)
+
+      self._evaluation_master = _get_eval_session_master(
+          self._task_type, tf_config)
+
+      if self._task_type != TaskType.EVALUATOR:
+        self._master = _get_session_master(self._cluster_spec, self._task_type,
+                                           self._task_id, tf_config)
+        self._num_ps_replicas = _count_ps(self._cluster_spec)
+        self._num_worker_replicas = _count_worker(
+            self._cluster_spec, chief_task_type=TaskType.CHIEF)
+        self._global_id_in_cluster = _get_global_id_in_cluster(
+            self._cluster_spec,
+            self._task_type,
+            self._task_id,
+            chief_task_type=TaskType.CHIEF)
+      else:
+        # Evaluator is not part of the training cluster.
+        self._cluster_spec = server_lib.ClusterSpec({})
+        self._master = _LOCAL_MASTER
+        self._num_ps_replicas = 0
+        self._num_worker_replicas = 0
+        self._global_id_in_cluster = None  # undefined
+
+      self._is_chief = self._task_type == TaskType.CHIEF
+    else:
+      # Local mode.
+      self._task_type = task_env.get(_TASK_TYPE_KEY, TaskType.WORKER)
+      self._task_id = int(task_env.get(_TASK_ID_KEY, 0))
+      self._global_id_in_cluster = 0
+
+      if self._task_type != TaskType.WORKER:
+        raise ValueError(
+            'If "cluster" is not set in TF_CONFIG, task type must be WORKER.')
+      if self._task_id != 0:
+        raise ValueError(
+            'If "cluster" is not set in TF_CONFIG, task index must be 0.')
+
+      self._master = tf_config.get(_SESSION_MASTER_KEY, _LOCAL_MASTER)
+      self._evaluation_master = tf_config.get(_EVAL_SESSION_MASTER_KEY,
+                                              _LOCAL_MASTER)
+      self._is_chief = True
+      self._num_ps_replicas = 0
+      self._num_worker_replicas = 1
+
+  def _init_distributed_setting_from_environment_var_with_master(self,
+                                                                 tf_config):
+    """Initialize distributed properties for legacy cluster with `master`."""
+    # There is no tech reason, why user cannot have chief and master in the same
+    # cluster, but it is super confusing (which is really the chief?). So, block
+    # this case.
+    if TaskType.CHIEF in self._cluster_spec.jobs:
+      raise ValueError('If `master` node exists in `cluster`, job '
+                       '`chief` is not supported.')
+
+    task_env = tf_config.get(_TASK_ENV_KEY, {})
+
+    self._task_type, self._task_id = _validate_task_type_and_task_id(
+        self._cluster_spec, task_env, TaskType.MASTER)
+
+    if self._task_type == TaskType.EVALUATOR:
+      raise ValueError('If `master` node exists in `cluster`, task_type '
+                       '`evaluator` is not supported.')
+
+    self._global_id_in_cluster = _get_global_id_in_cluster(
+        self._cluster_spec,
+        self._task_type,
+        self._task_id,
+        chief_task_type=TaskType.MASTER)
+
+    self._master = _get_session_master(self._cluster_spec, self._task_type,
+                                       self._task_id, tf_config)
+    self._evaluation_master = _get_eval_session_master(self._task_type,
+                                                       tf_config)
+    self._num_ps_replicas = _count_ps(self._cluster_spec)
+    self._num_worker_replicas = _count_worker(
+        self._cluster_spec, chief_task_type=TaskType.MASTER)
+
+    self._is_chief = self._task_type == TaskType.MASTER
+
+  @property
+  def cluster_spec(self):
+    return self._cluster_spec
+
+  @property
+  def device_fn(self):
+    """Returns the device_fn.
+
+    If device_fn is not `None`, it overrides the default
+    device function used in `Estimator`.
+    Otherwise the default one is used.
+    """
+    return self._device_fn
+
+  @property
+  def evaluation_master(self):
+    return self._evaluation_master
+
+  @property
+  def is_chief(self):
+    return self._is_chief
+
+  @property
+  def master(self):
+    return self._master
+
+  @property
+  def num_ps_replicas(self):
+    return self._num_ps_replicas
+
+  @property
+  def num_worker_replicas(self):
+    return self._num_worker_replicas
+
+  @property
+  def task_id(self):
+    return self._task_id
+
+  @property
+  def global_id_in_cluster(self):
+    """The global id in the training cluster.
+
+    All global ids in the training cluster are assigned from an increasing
+    sequence of consecutive integers. The first id is 0.
+
+    Note: Task id (the property field `task_id`) is tracking the index of the
+    node among all nodes with the SAME task type. For example, given the cluster
+    definition as follows:
+
+    ```
+      cluster = {'chief': ['host0:2222'],
+                 'ps': ['host1:2222', 'host2:2222'],
+                 'worker': ['host3:2222', 'host4:2222', 'host5:2222']}
+    ```
+
+    Nodes with task type `worker` can have id 0, 1, 2.  Nodes with task type
+    `ps` can have id, 0, 1. So, `task_id` is not unique, but the pair
+    (`task_type`, `task_id`) can uniquely determine a node in the cluster.
+
+    Global id, i.e., this field, is tracking the index of the node among ALL
+    nodes in the cluster. It is uniquely assigned.  For example, for the cluster
+    spec given above, the global ids are assigned as:
+    ```
+      task_type  | task_id  |  global_id
+      --------------------------------
+      chief      | 0        |  0
+      worker     | 0        |  1
+      worker     | 1        |  2
+      worker     | 2        |  3
+      ps         | 0        |  4
+      ps         | 1        |  5
+    ```
+
+    Returns:
+      An integer id.
+    """
+    return self._global_id_in_cluster
+
+  @property
+  def task_type(self):
+    return self._task_type
+
+  @property
+  def tf_random_seed(self):
+    return self._tf_random_seed
+
+  @property
+  def save_summary_steps(self):
+    return self._save_summary_steps
+
+  @property
+  def save_checkpoints_secs(self):
+    return self._save_checkpoints_secs
+
+  @property
+  def session_config(self):
+    return self._session_config
+
+  @property
+  def save_checkpoints_steps(self):
+    return self._save_checkpoints_steps
+
+  @property
+  def keep_checkpoint_max(self):
+    return self._keep_checkpoint_max
+
+  @property
+  def keep_checkpoint_every_n_hours(self):
+    return self._keep_checkpoint_every_n_hours
+
+  @property
+  def log_step_count_steps(self):
+    return self._log_step_count_steps
+
+  @property
+  def model_dir(self):
+    return self._model_dir
+
+  @property
+  def service(self):
+    """Returns the platform defined (in TF_CONFIG) service dict."""
+    return self._service
+
+  @property
+  def train_distribute(self):
+    """Optional `tf.contrib.distribute.DistributionStrategy` for training.
+    """
+    return self._train_distribute
+
+  @property
+  def eval_distribute(self):
+    """Optional `tf.contrib.distribute.DistributionStrategy` for evaluation.
+    """
+    return self._eval_distribute
+
+  @property
+  def protocol(self):
+    """Returns the optional protocol value."""
+    return self._protocol
+
+  def replace(self, **kwargs):
+    """Returns a new instance of `RunConfig` replacing specified properties.
+
+    Only the properties in the following list are allowed to be replaced:
+
+      - `model_dir`,
+      - `tf_random_seed`,
+      - `save_summary_steps`,
+      - `save_checkpoints_steps`,
+      - `save_checkpoints_secs`,
+      - `session_config`,
+      - `keep_checkpoint_max`,
+      - `keep_checkpoint_every_n_hours`,
+      - `log_step_count_steps`,
+      - `train_distribute`,
+      - `device_fn`,
+      - `protocol`.
+      - `eval_distribute`,
+      - `experimental_distribute`,
+
+    In addition, either `save_checkpoints_steps` or `save_checkpoints_secs`
+    can be set (should not be both).
+
+    Args:
+      **kwargs: keyword named properties with new values.
+
+    Raises:
+      ValueError: If any property name in `kwargs` does not exist or is not
+        allowed to be replaced, or both `save_checkpoints_steps` and
+        `save_checkpoints_secs` are set.
+
+    Returns:
+      a new instance of `RunConfig`.
+    """
+    return RunConfig._replace(
+        copy.deepcopy(self),
+        allowed_properties_list=_DEFAULT_REPLACEABLE_LIST,
+        **kwargs)
+
+  @staticmethod
+  def _replace(config, allowed_properties_list=None, **kwargs):
+    """See `replace`.
+
+    N.B.: This implementation assumes that for key named "foo", the underlying
+    property the RunConfig holds is "_foo" (with one leading underscore).
+
+    Args:
+      config: The RunConfig to replace the values of.
+      allowed_properties_list: The property name list allowed to be replaced.
+      **kwargs: keyword named properties with new values.
+
+    Raises:
+      ValueError: If any property name in `kwargs` does not exist or is not
+        allowed to be replaced, or both `save_checkpoints_steps` and
+        `save_checkpoints_secs` are set.
+
+    Returns:
+      a new instance of `RunConfig`.
+    """
+
+    allowed_properties_list = allowed_properties_list or []
+
+    for key, new_value in six.iteritems(kwargs):
+      if key in allowed_properties_list:
+        setattr(config, '_' + key, new_value)
+        continue
+
+      raise ValueError(
+          'Replacing {} is not supported. Allowed properties are {}.'.format(
+              key, allowed_properties_list))
+
+    _validate_save_ckpt_with_replaced_keys(config, kwargs.keys())
+    _validate_properties(config)
+    return config
+
+
+def _get_model_dir(tf_config, model_dir):
+  """Returns `model_dir` based user provided `tf_config` or `model_dir`."""
+  # pylint: disable=g-explicit-bool-comparison
+
+  # Empty string is treated as False in Python condition check, which triggers
+  # some confusing error messages. For example, 'a or b' returns None if a is ''
+  # and b is None. `None` is allowed for model_dir but '' is not allowed. Here,
+  # explicitly check empty string to provide clear error message.
+  if model_dir == '':
+    raise ValueError('model_dir should be non-empty.')
+
+  model_dir_in_tf_config = tf_config.get('model_dir')
+  if model_dir_in_tf_config == '':
+    raise ValueError('model_dir in TF_CONFIG should be non-empty.')
+
+  if model_dir_in_tf_config:
+    if model_dir and model_dir_in_tf_config != model_dir:
+      raise ValueError(
+          '`model_dir` provided in RunConfig construct, if set, '
+          'must have the same value as the model_dir in TF_CONFIG. '
+          'model_dir: {}\nTF_CONFIG["model_dir"]: {}.\n'.format(
+              model_dir, model_dir_in_tf_config))
 
-# Include attrs that start with single underscore.
-run_config.__all__ = [s for s in dir(run_config) if not s.startswith('__')]
+    logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config)
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.run_config import *
+  return model_dir or model_dir_in_tf_config
diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
new file mode 100644
index 0000000000..06df7cb9dd
--- /dev/null
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -0,0 +1,1235 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""RunConfig tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.platform import test
+
+_TEST_DIR = 'test_dir'
+_MASTER = 'master_'
+_NOT_SUPPORTED_REPLACE_PROPERTY_MSG = 'Replacing .*is not supported'
+_SAVE_CKPT_ERR = (
+    '`save_checkpoints_steps` and `save_checkpoints_secs` cannot be both set.'
+)
+_MODEL_DIR_ERR = 'model_dir should be non-empty'
+_MODEL_DIR_TF_CONFIG_ERR = 'model_dir in TF_CONFIG should be non-empty'
+_MODEL_DIR_MISMATCH_ERR = (
+    '`model_dir` provided in RunConfig construct, if set, '
+    'must have the same value as the model_dir in TF_CONFIG. ')
+_SAVE_SUMMARY_STEPS_ERR = 'save_summary_steps should be >= 0'
+_SAVE_CKPT_STEPS_ERR = 'save_checkpoints_steps should be >= 0'
+_SAVE_CKPT_SECS_ERR = 'save_checkpoints_secs should be >= 0'
+_SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto'
+_KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
+_KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
+_TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
+_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".'
+_ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.'
+_ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.'
+_INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
+    'Key.*eval.*master.*should not be set for task type other than')
+_MISSING_CHIEF_ERR = 'If "cluster" is set .* it must have one "chief" node'
+_MISSING_TASK_TYPE_ERR = 'If "cluster" is set .* task type must be set'
+_MISSING_TASK_ID_ERR = 'If "cluster" is set .* task index must be set'
+_INVALID_TASK_INDEX_ERR = 'is not a valid task_id'
+_NEGATIVE_TASK_INDEX_ERR = 'Task index must be non-negative number.'
+_INVALID_TASK_TYPE_ERR = 'is not a valid task_type'
+_INVALID_TASK_TYPE_FOR_LOCAL_ERR = (
+    'If "cluster" is not set in TF_CONFIG, task type must be WORKER.')
+_INVALID_TASK_INDEX_FOR_LOCAL_ERR = (
+    'If "cluster" is not set in TF_CONFIG, task index must be 0.')
+_INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR = (
+    'If `master` node exists in `cluster`, task_type `evaluator` is not '
+    'supported.')
+_INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR = (
+    'If `master` node exists in `cluster`, job `chief` is not supported.')
+_INVALID_SERVICE_TYPE_ERR = (
+    'If "service" is set in TF_CONFIG, it must be a dict. Given')
+
+
+def _create_run_config_with_cluster_spec(tf_config, **kwargs):
+  with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}):
+    return run_config_lib.RunConfig(**kwargs)
+
+
+class RunConfigTest(test.TestCase):
+
+  def test_default_property_values(self):
+    config = run_config_lib.RunConfig()
+    self.assertIsNone(config.model_dir)
+    self.assertIsNone(config.session_config)
+    self.assertIsNone(config.tf_random_seed)
+    self.assertEqual(100, config.save_summary_steps)
+    self.assertEqual(600, config.save_checkpoints_secs)
+    self.assertIsNone(config.save_checkpoints_steps)
+    self.assertEqual(5, config.keep_checkpoint_max)
+    self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.service)
+    self.assertIsNone(config.device_fn)
+
+  def test_model_dir(self):
+    empty_config = run_config_lib.RunConfig()
+    self.assertIsNone(empty_config.model_dir)
+
+    new_config = empty_config.replace(model_dir=_TEST_DIR)
+    self.assertEqual(_TEST_DIR, new_config.model_dir)
+
+  def test_replace_with_allowed_properties(self):
+    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    device_fn = lambda op: "/cpu:0"
+
+    config = run_config_lib.RunConfig().replace(
+        tf_random_seed=11,
+        save_summary_steps=12,
+        save_checkpoints_secs=14,
+        session_config=session_config,
+        keep_checkpoint_max=16,
+        keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn)
+    self.assertEqual(11, config.tf_random_seed)
+    self.assertEqual(12, config.save_summary_steps)
+    self.assertEqual(14, config.save_checkpoints_secs)
+    self.assertEqual(session_config, config.session_config)
+    self.assertEqual(16, config.keep_checkpoint_max)
+    self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
+
+  def test_replace_none_value(self):
+    config = run_config_lib.RunConfig().replace(
+        tf_random_seed=None,
+        model_dir=None,
+        save_summary_steps=None,
+        save_checkpoints_secs=None,
+        save_checkpoints_steps=None,
+        session_config=None,
+        keep_checkpoint_max=None,
+        keep_checkpoint_every_n_hours=None,
+        device_fn=None)
+    self.assertIsNone(config.tf_random_seed)
+    self.assertIsNone(config.model_dir)
+    self.assertIsNone(config.save_summary_steps)
+    self.assertIsNone(config.save_checkpoints_secs)
+    self.assertIsNone(config.save_checkpoints_steps)
+    self.assertIsNone(config.session_config)
+    self.assertIsNone(config.keep_checkpoint_max)
+    self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
+
+  def test_replace_with_disallowallowed_properties(self):
+    config = run_config_lib.RunConfig()
+    with self.assertRaises(ValueError):
+      # tf_random_seed is not allowed to be replaced.
+      config.replace(master='_master')
+    with self.assertRaises(ValueError):
+      config.replace(some_undefined_property=123)
+
+  def test_replace(self):
+    config = run_config_lib.RunConfig()
+
+    with self.assertRaisesRegexp(
+        ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG):
+      # master is not allowed to be replaced.
+      config.replace(master=_MASTER)
+
+    with self.assertRaisesRegexp(
+        ValueError, _NOT_SUPPORTED_REPLACE_PROPERTY_MSG):
+      config.replace(some_undefined_property=_MASTER)
+
+  def test_replace_invalid_values(self):
+    config = run_config_lib.RunConfig()
+
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
+      config.replace(model_dir='')
+    with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR):
+      config.replace(save_summary_steps=-1)
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR):
+      config.replace(save_checkpoints_steps=-1)
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR):
+      config.replace(save_checkpoints_secs=-1)
+    with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR):
+      config.replace(session_config={})
+    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR):
+      config.replace(keep_checkpoint_max=-1)
+    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR):
+      config.replace(keep_checkpoint_every_n_hours=0)
+    with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
+      config.replace(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      config.replace(device_fn=lambda x, y: 0)
+
+  def test_init_with_allowed_properties(self):
+    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    device_fn = lambda op: "/cpu:0"
+
+    config = run_config_lib.RunConfig(
+        tf_random_seed=11,
+        save_summary_steps=12,
+        save_checkpoints_secs=14,
+        session_config=session_config,
+        keep_checkpoint_max=16,
+        keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn)
+    self.assertEqual(11, config.tf_random_seed)
+    self.assertEqual(12, config.save_summary_steps)
+    self.assertEqual(14, config.save_checkpoints_secs)
+    self.assertEqual(session_config, config.session_config)
+    self.assertEqual(16, config.keep_checkpoint_max)
+    self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
+
+  def test_init_none_value(self):
+    config = run_config_lib.RunConfig(
+        tf_random_seed=None,
+        model_dir=None,
+        save_summary_steps=None,
+        save_checkpoints_secs=None,
+        save_checkpoints_steps=None,
+        session_config=None,
+        keep_checkpoint_max=None,
+        keep_checkpoint_every_n_hours=None,
+        device_fn=None)
+    self.assertIsNone(config.tf_random_seed)
+    self.assertIsNone(config.model_dir)
+    self.assertIsNone(config.save_summary_steps)
+    self.assertIsNone(config.save_checkpoints_secs)
+    self.assertIsNone(config.save_checkpoints_steps)
+    self.assertIsNone(config.session_config)
+    self.assertIsNone(config.keep_checkpoint_max)
+    self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
+
+  def test_init_invalid_values(self):
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
+      run_config_lib.RunConfig(model_dir='')
+    with self.assertRaisesRegexp(ValueError, _SAVE_SUMMARY_STEPS_ERR):
+      run_config_lib.RunConfig(save_summary_steps=-1)
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_STEPS_ERR):
+      run_config_lib.RunConfig(save_checkpoints_steps=-1)
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_SECS_ERR):
+      run_config_lib.RunConfig(save_checkpoints_secs=-1)
+    with self.assertRaisesRegexp(ValueError, _SESSION_CONFIG_ERR):
+      run_config_lib.RunConfig(session_config={})
+    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_MAX_ERR):
+      run_config_lib.RunConfig(keep_checkpoint_max=-1)
+    with self.assertRaisesRegexp(ValueError, _KEEP_CKPT_HOURS_ERR):
+      run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0)
+    with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
+      run_config_lib.RunConfig(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0")
+
+
+class RunConfigDistributedSettingTest(test.TestCase):
+
+  def _assert_distributed_properties(self, run_config,
+                                     expected_cluster_spec,
+                                     expected_task_type,
+                                     expected_task_id,
+                                     expected_master,
+                                     expected_evaluation_master,
+                                     expected_is_chief,
+                                     expected_num_worker_replicas,
+                                     expected_num_ps_replicas):
+    self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict())
+    self.assertEqual(expected_task_type, run_config.task_type)
+    self.assertEqual(expected_task_id, run_config.task_id)
+    self.assertEqual(expected_master, run_config.master)
+    self.assertEqual(expected_evaluation_master, run_config.evaluation_master)
+    self.assertEqual(expected_is_chief, run_config.is_chief)
+    self.assertEqual(expected_num_worker_replicas,
+                     run_config.num_worker_replicas)
+    self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas)
+
+  def test_default_values(self):
+    self._assert_distributed_properties(
+        run_config=run_config_lib.RunConfig(),
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_tf_config_for_local(self):
+    tf_config = {
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 0
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_distributed_properties(
+        run_config=run_config,
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+    self.assertIsNone(run_config.session_config, None)
+
+  def test_session_master_for_local(self):
+    tf_config = {'session_master': '_my_master'}
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='_my_master',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_eval_session_master_for_local(self):
+    tf_config = {'eval_session_master': '_my_eval_master'}
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='_my_eval_master',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_invalid_task_type_for_local(self):
+    tf_config = {
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_invalid_task_index_for_local(self):
+    tf_config = {
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_FOR_LOCAL_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_chief_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.CHIEF,
+        expected_task_id=0,
+        expected_master='grpc://host0:0',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_session_master_from_single_node_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_session_master_from_multiple_nodes_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_fail_with_eval_session_master_for_non_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    with self.assertRaisesRegexp(
+        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_multiple_chief_nodes(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0', 'host:6:6'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+    }
+    with self.assertRaisesRegexp(ValueError, _ONE_CHIEF_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_missing_chief_node(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+    }
+    with self.assertRaisesRegexp(ValueError, _MISSING_CHIEF_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_single_chief_node(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.CHIEF,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_fail_with_missing_task_type_for_distributed(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+    }
+    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_missing_task_index_for_distributed(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_index_is_too_large(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_invalid_task_index(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': -1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_invalid_task_type(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 0
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_worker_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 1
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=1,
+        expected_master='grpc://host4:4',
+        expected_evaluation_master='',
+        expected_is_chief=False,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_ps_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.PS,
+        expected_task_id=0,
+        expected_master='grpc://host1:1',
+        expected_evaluation_master='',
+        expected_is_chief=False,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_evaluator_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 12
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_distributed_properties(
+        run_config=run_config,
+        expected_cluster_spec={},
+        expected_task_type=run_config_lib.TaskType.EVALUATOR,
+        expected_task_id=12,
+        expected_master='',
+        expected_evaluation_master='',
+        expected_is_chief=False,  # evaluator is never chief.
+        expected_num_worker_replicas=0,  # evaluator is not in training cluster.
+        expected_num_ps_replicas=0)
+    self.assertIsNone(run_config.global_id_in_cluster)
+
+  def test_eval_master_for_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 12
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual('grpc://123', run_config.evaluation_master)
+
+  def test_fail_with_invalid_task_index_for_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': -1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_global_id_in_cluster_for_chief(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_worker(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 2,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(3, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_ps(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(5, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_multipe_worker_types(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            'worker': ['host3:3', 'host4:4', 'host5:5'],
+            'other_type': ['host3:1', 'host4:2'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': 'other_type',
+            'index': 1,
+        },
+    }
+    # Though 'other_type' is defined after 'worker', based on alphabetical
+    # order, the task type order should be 'chief', 'other_type', 'worker',
+    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
+    # order list.
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(2, run_config.global_id_in_cluster)
+
+
+class RunConfigDistributedSettingWithMasterTest(test.TestCase):
+
+  def _assert_distributed_properties(self, run_config,
+                                     expected_cluster_spec,
+                                     expected_task_type,
+                                     expected_task_id,
+                                     expected_master,
+                                     expected_evaluation_master,
+                                     expected_is_chief,
+                                     expected_num_worker_replicas,
+                                     expected_num_ps_replicas):
+    self.assertEqual(expected_cluster_spec, run_config.cluster_spec.as_dict())
+    self.assertEqual(expected_task_type, run_config.task_type)
+    self.assertEqual(expected_task_id, run_config.task_id)
+    self.assertEqual(expected_master, run_config.master)
+    self.assertEqual(expected_evaluation_master, run_config.evaluation_master)
+    self.assertEqual(expected_is_chief, run_config.is_chief)
+    self.assertEqual(expected_num_worker_replicas,
+                     run_config.num_worker_replicas)
+    self.assertEqual(expected_num_ps_replicas, run_config.num_ps_replicas)
+
+  def test_invalid_task_type_for_local(self):
+    tf_config = {
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_FOR_LOCAL_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_master_node(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.MASTER,
+        expected_task_id=0,
+        expected_master='grpc://host0:0',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_session_master_in_single_node_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_session_master_in_multiple_nodes_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'session_master': '_my_master'
+    }
+    self.assertEqual('_my_master',
+                     _create_run_config_with_cluster_spec(tf_config).master)
+
+  def test_fail_with_eval_session_master(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        },
+        'eval_session_master': 'grpc://123',
+    }
+    with self.assertRaisesRegexp(
+        ValueError, _INVALID_TASK_TYPE_FOR_EVAL_MASTER):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_multiple_master_nodes(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0', 'host:6:6'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+    }
+    with self.assertRaisesRegexp(ValueError, _ONE_MASTER_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_single_master_node(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.MASTER,
+        expected_task_id=0,
+        expected_master='',
+        expected_evaluation_master='',
+        expected_is_chief=True,
+        expected_num_worker_replicas=1,
+        expected_num_ps_replicas=0)
+
+  def test_fail_with_missing_task_type_for_distributed(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host3:3']
+        },
+    }
+    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_TYPE_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_missing_task_index_for_distributed(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _MISSING_TASK_ID_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_index_is_too_large(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_INDEX_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_invalid_task_index(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': -1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _NEGATIVE_TASK_INDEX_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_invalid_task_type(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host3:3']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 0
+        }
+    }
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_worker_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 1
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.WORKER,
+        expected_task_id=1,
+        expected_master='grpc://host4:4',
+        expected_evaluation_master='',
+        expected_is_chief=False,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_ps_tf_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 0
+        }
+    }
+    self._assert_distributed_properties(
+        run_config=_create_run_config_with_cluster_spec(tf_config),
+        expected_cluster_spec=tf_config['cluster'],
+        expected_task_type=run_config_lib.TaskType.PS,
+        expected_task_id=0,
+        expected_master='grpc://host1:1',
+        expected_evaluation_master='',
+        expected_is_chief=False,
+        expected_num_worker_replicas=4,
+        expected_num_ps_replicas=2)
+
+  def test_fail_with_evaluator(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError,
+                                 _INVALID_EVALUATOR_IN_CLUSTER_WITH_MASTER_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_fail_with_chief(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.CHIEF: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1
+        }
+    }
+    with self.assertRaisesRegexp(ValueError,
+                                 _INVALID_CHIEF_IN_CLUSTER_WITH_MASTER_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+  def test_global_id_in_cluster_for_master(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(0, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_worker(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 2,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(3, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_ps(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(5, run_config.global_id_in_cluster)
+
+  def test_global_id_in_cluster_for_multipe_worker_types(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            'worker': ['host3:3', 'host4:4', 'host5:5'],
+            'other_type': ['host3:1', 'host4:2'],
+            run_config_lib.TaskType.PS: ['host6:3', 'host7:4', 'host8:5']
+        },
+        'task': {
+            'type': 'other_type',
+            'index': 1,
+        },
+    }
+    # Though 'other_type' is defined after 'worker', based on alphabetical
+    # order, the task type order should be 'chief', 'other_type', 'worker',
+    # 'ps', where 'chief' and 'ps' are predefined to be the top and last in the
+    # order list.
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(2, run_config.global_id_in_cluster)
+
+
+class RunConfigSaveCheckpointsTest(test.TestCase):
+
+  def test_save_checkpoint(self):
+    empty_config = run_config_lib.RunConfig()
+    self.assertEqual(600, empty_config.save_checkpoints_secs)
+    self.assertIsNone(empty_config.save_checkpoints_steps)
+
+    config_with_steps = empty_config.replace(save_checkpoints_steps=100)
+    del empty_config
+    self.assertEqual(100, config_with_steps.save_checkpoints_steps)
+    self.assertIsNone(config_with_steps.save_checkpoints_secs)
+
+    config_with_secs = config_with_steps.replace(save_checkpoints_secs=200)
+    del config_with_steps
+    self.assertEqual(200, config_with_secs.save_checkpoints_secs)
+    self.assertIsNone(config_with_secs.save_checkpoints_steps)
+
+  def test_save_checkpoint_both_steps_and_secs_are_not_none(self):
+    empty_config = run_config_lib.RunConfig()
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR):
+      empty_config.replace(save_checkpoints_steps=100,
+                           save_checkpoints_secs=200)
+
+    with self.assertRaisesRegexp(ValueError, _SAVE_CKPT_ERR):
+      run_config_lib.RunConfig(save_checkpoints_steps=100,
+                               save_checkpoints_secs=200)
+
+  def test_save_checkpoint_both_steps_and_secs_are_none(self):
+    config_with_secs = run_config_lib.RunConfig()
+    config_without_ckpt = config_with_secs.replace(
+        save_checkpoints_steps=None, save_checkpoints_secs=None)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
+
+  def test_save_checkpoint_flip_secs_to_none(self):
+    config_with_secs = run_config_lib.RunConfig()
+    config_without_ckpt = config_with_secs.replace(save_checkpoints_secs=None)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
+
+  def test_save_checkpoint_flip_steps_to_none(self):
+    config_with_steps = run_config_lib.RunConfig().replace(
+        save_checkpoints_steps=100)
+    config_without_ckpt = config_with_steps.replace(save_checkpoints_steps=None)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_steps)
+    self.assertIsNone(config_without_ckpt.save_checkpoints_secs)
+
+
+class RunConfigServiceKeyTest(test.TestCase):
+
+  def test_arbitrary_key_value_pairs(self):
+    tf_config = {
+        'service': {
+            'key1': [1, 2],
+            'key2': {'a': 3, 'b': 4},
+            'key3': 789,
+        },
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual(tf_config['service'], run_config.service)
+
+  def test_missing_service_key(self):
+    tf_config = {
+        'model_dir': '/tmp/123',
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertIsNone(run_config.service)
+
+  def test_fail_with_non_dict(self):
+    tf_config = {
+        'service': 789,
+    }
+    with self.assertRaisesRegexp(TypeError, _INVALID_SERVICE_TYPE_ERR):
+      _create_run_config_with_cluster_spec(tf_config)
+
+
+class RunConfigModelDirTest(test.TestCase):
+
+  def test_default(self):
+    run_config = run_config_lib.RunConfig()
+    self.assertIsNone(run_config.model_dir)
+
+  def test_model_dir_in_constructor(self):
+    run_config = run_config_lib.RunConfig(model_dir='/tmp/123')
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_in_tf_config(self):
+    tf_config = {
+        'model_dir': '/tmp/123',
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_both_set_in_both_constructor_and_tf_config(self):
+    model_dir = '/tmp/123'
+    tf_config = {'model_dir': model_dir}
+    kwargs = {'model_dir': model_dir}
+    run_config = _create_run_config_with_cluster_spec(tf_config, **kwargs)
+    self.assertEqual('/tmp/123', run_config.model_dir)
+
+  def test_model_dir_different_in_both_constructor_and_tf_config(self):
+    tf_config = {'model_dir': '/tmp/123'}
+    kwargs = {'model_dir': '/tmp/456'}
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_MISMATCH_ERR):
+      _create_run_config_with_cluster_spec(tf_config, **kwargs)
+
+  def test_fail_with_empty_string_in_constructor(self):
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
+      run_config_lib.RunConfig(model_dir='')
+
+  def test_fail_with_empty_string_in_tf_config(self):
+    with self.assertRaisesRegexp(ValueError, _MODEL_DIR_TF_CONFIG_ERR):
+      tf_config = {'model_dir': ''}
+      _create_run_config_with_cluster_spec(tf_config)
+
+
+class RunConfigSessionConfigTest(test.TestCase):
+
+  def _assert_equal_session_config(self, session_config,
+                                   expected_device_filters):
+
+    rewrite_opts = rewriter_config_pb2.RewriterConfig(
+        meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE)
+    graph_opts = config_pb2.GraphOptions(rewrite_options=rewrite_opts)
+    expected_session_config = config_pb2.ConfigProto(
+        allow_soft_placement=True,
+        graph_options=graph_opts,
+        device_filters=expected_device_filters)
+    self.assertEqual(session_config, expected_session_config)
+
+  def test_master_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.MASTER,
+            'index': 0
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_equal_session_config(run_config.session_config,
+                                      ['/job:ps', '/job:master'])
+
+  def test_chief_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.CHIEF,
+            'index': 0
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_equal_session_config(run_config.session_config,
+                                      ['/job:ps', '/job:chief'])
+
+  def test_worker_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.WORKER,
+            'index': 1
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_equal_session_config(run_config.session_config,
+                                      ['/job:ps', '/job:worker/task:1'])
+
+  def test_ps_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.PS,
+            'index': 1
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self._assert_equal_session_config(run_config.session_config,
+                                      ['/job:ps', '/job:worker', '/job:master'])
+
+  def test_evaluator_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 0
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertIsNone(run_config.session_config)
+
+  def test_other_type_session_config(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.MASTER: ['host0:0'],
+            run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+            'other_type': ['host3:1', 'host4:2'],
+            run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4', 'host5:5']
+        },
+        'task': {
+            'type': 'other_type',
+            'index': 0
+        }
+    }
+    run_config = _create_run_config_with_cluster_spec(tf_config)
+    self.assertIsNone(run_config.session_config)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py
index e621a9531a..240be5dabe 100644
--- a/tensorflow/python/estimator/training.py
+++ b/tensorflow/python/estimator/training.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,1062 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""training python module.
-
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Classes and functions related to train_and_evaluate."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import training
+import collections
+import json
+import os
+import time
+
+import six
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.distribute import estimator_training as distribute_coordinator_training
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import exporter as exporter_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.util import compat
+from tensorflow.python.util.tf_export import estimator_export
+
+_MAX_DELAY_SECS = 60
+_DELAY_SECS_PER_WORKER = 5
+_TF_CONFIG_ENV = 'TF_CONFIG'
+_ENVIRONMENT_KEY = 'environment'
+_ENVIRONMENT_GOOGLE_VALUE = 'google'
+_TRAINER_JOBS = (run_config_lib.TaskType.CHIEF, run_config_lib.TaskType.MASTER,
+                 run_config_lib.TaskType.WORKER)
+
+
+def _validate_input_fn(input_fn):
+  """Validates the `input_fn`."""
+  if not callable(input_fn):
+    raise TypeError('`input_fn` must be callable, given: {}'.format(input_fn))
+
+
+def _validate_hooks(hooks):
+  """Validates the `hooks`."""
+  hooks = tuple(hooks or [])
+  for hook in hooks:
+    if not isinstance(hook, session_run_hook.SessionRunHook):
+      raise TypeError(
+          'All hooks must be `SessionRunHook` instances, given: {}'.format(
+              hook))
+  return hooks
+
+
+def _validate_exporters(exporters):
+  """Validates `exporters` and returns them as a tuple."""
+  if not exporters:
+    return ()
+
+  if isinstance(exporters, exporter_lib.Exporter):
+    exporters = [exporters]
+
+  unique_names = []  # `Exporter`s should have unique names.
+  try:
+    for exporter in exporters:
+      if not isinstance(exporter, exporter_lib.Exporter):
+        # Error message will be printed out by the outer try/except.
+        raise TypeError
+
+      if not exporter.name:
+        full_list_of_names = [e.name for e in exporters]
+        raise ValueError('An Exporter cannot have a name that is `None` or'
+                         ' empty. All exporter names:'
+                         ' {}'.format(full_list_of_names))
+
+      if not isinstance(exporter.name, six.string_types):
+        raise ValueError('An Exporter must have a string name. Given: '
+                         '{}'.format(type(exporter.name)))
+
+      if exporter.name in unique_names:
+        full_list_of_names = [e.name for e in exporters]
+        raise ValueError(
+            '`exporters` must have unique names. Such a name cannot be `None`.'
+            ' All exporter names: {}'.format(full_list_of_names))
+      unique_names.append(exporter.name)
+  except TypeError:
+    # Two possibilities:
+    # - `exporters` is neither `Exporter` nor iterable.  Python has
+    #   raised a `TypeError` when iterating over `exporters`.
+    # - an `exporter` was None or not of type `Exporter`, so we raised a
+    #   `TypeError`.
+    raise TypeError('`exporters` must be an Exporter,'
+                    ' an iterable of Exporter, or `None`,'
+                    ' found %s.' % exporters)
+
+  return tuple(exporters)
+
+
+def _is_google_env():
+  """Detects whether current environment is google."""
+  tf_config = json.loads(os.environ.get(_TF_CONFIG_ENV) or '{}')
+  if not tf_config:
+    logging.warn('TF_CONFIG should not be empty in distributed environment.')
+  return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE
+
+
+@estimator_export('estimator.TrainSpec')
+class TrainSpec(
+    collections.namedtuple('TrainSpec', ['input_fn', 'max_steps', 'hooks'])):
+  """Configuration for the "train" part for the `train_and_evaluate` call.
+
+  `TrainSpec` determines the input data for the training, as well as the
+  duration. Optional hooks run at various stages of training.
+  """
+
+  def __new__(cls, input_fn, max_steps=None, hooks=None):
+    """Creates a validated `TrainSpec` instance.
+
+    Args:
+      input_fn: A function that provides input data for training as minibatches.
+        See [Premade Estimators](https://tensorflow.org/guide/premade_estimators#create_input_functions)
+        for more information. The function should construct and return one of
+        the following:
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple (features, labels) with same constraints as below.
+          * A tuple (features, labels): Where features is a `Tensor` or a
+            dictionary of string feature name to `Tensor` and labels is a
+            `Tensor` or a dictionary of string label name to `Tensor`.
+
+      max_steps: Int. Positive number of total steps for which to train model.
+        If `None`, train forever. The training `input_fn` is not expected to
+        generate `OutOfRangeError` or `StopIteration` exceptions. See the
+        `train_and_evaluate` stop condition section for details.
+      hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on all workers (including chief) during training.
+
+    Returns:
+      A validated `TrainSpec` object.
+
+    Raises:
+      ValueError: If any of the input arguments is invalid.
+      TypeError: If any of the arguments is not of the expected type.
+    """
+    # Validate input_fn.
+    _validate_input_fn(input_fn)
+
+    # Validate max_steps.
+    if max_steps is not None and max_steps <= 0:
+      raise ValueError(
+          'Must specify max_steps > 0, given: {}'.format(max_steps))
+
+    # Validate hooks.
+    hooks = _validate_hooks(hooks)
+
+    return super(TrainSpec, cls).__new__(
+        cls, input_fn=input_fn, max_steps=max_steps, hooks=hooks)
+
+
+@estimator_export('estimator.EvalSpec')
+class EvalSpec(
+    collections.namedtuple('EvalSpec', [
+        'input_fn', 'steps', 'name', 'hooks', 'exporters', 'start_delay_secs',
+        'throttle_secs'
+    ])):
+  """Configuration for the "eval" part for the `train_and_evaluate` call.
+
+  `EvalSpec` combines details of evaluation of the trained model as well as its
+  export. Evaluation consists of computing metrics to judge the performance of
+  the trained model.  Export writes out the trained model on to external
+  storage.
+  """
+
+  def __new__(cls,
+              input_fn,
+              steps=100,
+              name=None,
+              hooks=None,
+              exporters=None,
+              start_delay_secs=120,
+              throttle_secs=600):
+    """Creates a validated `EvalSpec` instance.
+
+    Args:
+      input_fn: A function that constructs the input data for evaluation.
+        See [Premade Estimators](https://tensorflow.org/api_guides/premade_estimators#create_input_functions)
+        for more information. The function should construct and return one of
+        the following:
+          * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+            tuple (features, labels) with same constraints as below.
+          * A tuple (features, labels): Where features is a `Tensor` or a
+            dictionary of string feature name to `Tensor` and labels is a
+            `Tensor` or a dictionary of string label name to `Tensor`.
+
+      steps: Int. Positive number of steps for which to evaluate model. If
+        `None`, evaluates until `input_fn` raises an end-of-input exception.
+        See `Estimator.evaluate` for details.
+      name: String. Name of the evaluation if user needs to run multiple
+        evaluations on different data sets. Metrics for different evaluations
+        are saved in separate folders, and appear separately in tensorboard.
+      hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        during evaluation.
+      exporters: Iterable of `Exporter`s, or a single one, or `None`.
+        `exporters` will be invoked after each evaluation.
+      start_delay_secs: Int. Start evaluating after waiting for this many
+        seconds.
+      throttle_secs: Int. Do not re-evaluate unless the last evaluation was
+        started at least this many seconds ago. Of course, evaluation does not
+        occur if no new checkpoints are available, hence, this is the minimum.
+
+    Returns:
+      A validated `EvalSpec` object.
+
+    Raises:
+      ValueError: If any of the input arguments is invalid.
+      TypeError: If any of the arguments is not of the expected type.
+    """
+    # Validate input_fn.
+    _validate_input_fn(input_fn)
+
+    # Validate steps.
+    if steps is not None and steps <= 0:
+      raise ValueError('Must specify steps > 0, given: {}'.format(steps))
+
+    # Validate name.
+    if name is not None and not isinstance(name, six.string_types):
+      raise TypeError('`name` must be string, given: {}'.format(name))
+
+    # Validate hooks.
+    hooks = _validate_hooks(hooks)
+
+    # Validate exporters.
+    exporters = _validate_exporters(exporters)
+
+    # Validate start_delay_secs.
+    if start_delay_secs < 0:
+      raise ValueError('Must specify start_delay_secs >= 0, given: {}'.format(
+          start_delay_secs))
+
+    # Validate throttle_secs.
+    if throttle_secs < 0:
+      raise ValueError(
+          'Must specify throttle_secs >= 0, given: {}'.format(throttle_secs))
+
+    return super(EvalSpec, cls).__new__(
+        cls,
+        input_fn=input_fn,
+        steps=steps,
+        name=name,
+        hooks=hooks,
+        exporters=exporters,
+        start_delay_secs=start_delay_secs,
+        throttle_secs=throttle_secs)
+
+
+@estimator_export('estimator.train_and_evaluate')
+def train_and_evaluate(estimator, train_spec, eval_spec):
+  """Train and evaluate the `estimator`.
+
+  This utility function trains, evaluates, and (optionally) exports the model by
+  using the given `estimator`. All training related specification is held in
+  `train_spec`, including training `input_fn` and training max steps, etc. All
+  evaluation and export related specification is held in `eval_spec`, including
+  evaluation `input_fn`, steps, etc.
+
+  This utility function provides consistent behavior for both local
+  (non-distributed) and distributed configurations. The default distribution
+  configuration is parameter server-based between-graph replication. For other
+  types of distribution configurations such as all-reduce training, please use
+  [DistributionStrategies](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute).  # pylint: disable=line-too-long
+
+  Overfitting: In order to avoid overfitting, it is recommended to set up the
+  training `input_fn` to shuffle the training data properly.
+
+  Stop condition: In order to support both distributed and non-distributed
+  configuration reliably, the only supported stop condition for model
+  training is `train_spec.max_steps`. If `train_spec.max_steps` is `None`, the
+  model is trained forever. *Use with care* if model stop condition is
+  different. For example, assume that the model is expected to be trained with
+  one epoch of training data, and the training `input_fn` is configured to throw
+  `OutOfRangeError` after going through one epoch, which stops the
+  `Estimator.train`. For a three-training-worker distributed configuration, each
+  training worker is likely to go through the whole epoch independently. So, the
+  model will be trained with three epochs of training data instead of one epoch.
+
+  Example of local (non-distributed) training:
+
+  ```python
+  # Set up feature columns.
+  categorial_feature_a = categorial_column_with_hash_bucket(...)
+  categorial_feature_a_emb = embedding_column(
+      categorical_column=categorial_feature_a, ...)
+  ...  # other feature columns
+
+  estimator = DNNClassifier(
+      feature_columns=[categorial_feature_a_emb, ...],
+      hidden_units=[1024, 512, 256])
+
+  # Or set up the model directory
+  #   estimator = DNNClassifier(
+  #       config=tf.estimator.RunConfig(
+  #           model_dir='/my_model', save_summary_steps=100),
+  #       feature_columns=[categorial_feature_a_emb, ...],
+  #       hidden_units=[1024, 512, 256])
+
+  # Input pipeline for train and evaluate.
+  def train_input_fn(): # returns x, y
+    # please shuffle the data.
+    pass
+  def eval_input_fn(): # returns x, y
+    pass
+
+  train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=1000)
+  eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
+
+  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
+  ```
+  Note that in current implementation `estimator.evaluate` will be called
+  multiple times. This means that evaluation graph (including eval_input_fn)
+  will be re-created for each `evaluate` call. `estimator.train` will be called
+  only once.
+
+  Example of distributed training:
+
+  Regarding the example of distributed training, the code above can be used
+  without a change (Please do make sure that the `RunConfig.model_dir` for all
+  workers is set to the same directory, i.e., a shared file system all workers
+  can read and write). The only extra work to do is setting the environment
+  variable `TF_CONFIG` properly for each worker correspondingly.
+
+  Also see
+  [Distributed TensorFlow](https://www.tensorflow.org/deploy/distributed).
+
+  Setting environment variable depends on the platform. For example, on Linux,
+  it can be done as follows (`$` is the shell prompt):
+
+  ```
+  $ TF_CONFIG='<replace_with_real_content>' python train_model.py
+  ```
+
+  For the content in `TF_CONFIG`, assume that the training cluster spec looks
+  like:
+
+  ```
+  cluster = {"chief": ["host0:2222"],
+             "worker": ["host1:2222", "host2:2222", "host3:2222"],
+             "ps": ["host4:2222", "host5:2222"]}
+  ```
+
+  Example of `TF_CONFIG` for chief training worker (must have one and only one):
+
+  ```
+  # This should be a JSON string, which is set as environment variable. Usually
+  # the cluster manager handles that.
+  TF_CONFIG='{
+      "cluster": {
+          "chief": ["host0:2222"],
+          "worker": ["host1:2222", "host2:2222", "host3:2222"],
+          "ps": ["host4:2222", "host5:2222"]
+      },
+      "task": {"type": "chief", "index": 0}
+  }'
+  ```
+  Note that the chief worker also does the model training job, similar to other
+  non-chief training workers (see next paragraph). In addition to the model
+  training, it manages some extra work, e.g., checkpoint saving and restoring,
+  writing summaries, etc.
+
+  Example of `TF_CONFIG` for non-chief training worker (optional, could be
+  multiple):
+
+  ```
+  # This should be a JSON string, which is set as environment variable. Usually
+  # the cluster manager handles that.
+  TF_CONFIG='{
+      "cluster": {
+          "chief": ["host0:2222"],
+          "worker": ["host1:2222", "host2:2222", "host3:2222"],
+          "ps": ["host4:2222", "host5:2222"]
+      },
+      "task": {"type": "worker", "index": 0}
+  }'
+  ```
+  where the `task.index` should be set as 0, 1, 2, in this example, respectively
+  for non-chief training workers.
+
+  Example of `TF_CONFIG` for parameter server, aka ps (could be multiple):
+
+  ```
+  # This should be a JSON string, which is set as environment variable. Usually
+  # the cluster manager handles that.
+  TF_CONFIG='{
+      "cluster": {
+          "chief": ["host0:2222"],
+          "worker": ["host1:2222", "host2:2222", "host3:2222"],
+          "ps": ["host4:2222", "host5:2222"]
+      },
+      "task": {"type": "ps", "index": 0}
+  }'
+  ```
+  where the `task.index` should be set as 0 and 1, in this example, respectively
+  for parameter servers.
+
+  Example of `TF_CONFIG` for evaluator task. Evaluator is a special task that is
+  not part of the training cluster. There could be only one. It is used for
+  model evaluation.
+
+  ```
+  # This should be a JSON string, which is set as environment variable. Usually
+  # the cluster manager handles that.
+  TF_CONFIG='{
+      "cluster": {
+          "chief": ["host0:2222"],
+          "worker": ["host1:2222", "host2:2222", "host3:2222"],
+          "ps": ["host4:2222", "host5:2222"]
+      },
+      "task": {"type": "evaluator", "index": 0}
+  }'
+  ```
+
+  When `distribute` or `experimental_distribute.train_distribute` and
+  `experimental_distribute.remote_cluster` is set, this method will start a
+  client running on the current host which connects to the `remote_cluster` for
+  training and evaluation.
+
+  Args:
+    estimator: An `Estimator` instance to train and evaluate.
+    train_spec: A `TrainSpec` instance to specify the training specification.
+    eval_spec: A `EvalSpec` instance to specify the evaluation and export
+      specification.
+
+  Returns:
+    A tuple of the result of the `evaluate` call to the `Estimator` and the
+    export results using the specified `ExportStrategy`.
+    Currently, the return value is undefined for distributed training mode.
+
+  Raises:
+    ValueError: if environment variable `TF_CONFIG` is incorrectly set.
+  """
+  _assert_eval_spec(eval_spec)  # fail fast if eval_spec is invalid.
+
+  executor = _TrainingExecutor(
+      estimator=estimator, train_spec=train_spec, eval_spec=eval_spec)
+  config = estimator.config
+
+  # If `distribute_coordinator_mode` is set and running in distributed
+  # environment, we run `train_and_evaluate` via distribute coordinator.
+  if distribute_coordinator_training.should_run_distribute_coordinator(config):
+    logging.info('Running `train_and_evaluate` with Distribute Coordinator.')
+    distribute_coordinator_training.train_and_evaluate(
+        estimator, train_spec, eval_spec, _TrainingExecutor)
+    return
+
+  if (config.task_type == run_config_lib.TaskType.EVALUATOR and
+      config.task_id > 0):
+    raise ValueError(
+        'For distributed training, there can only be one `evaluator` task '
+        '(with task id 0).  Given task id {}'.format(config.task_id))
+
+  return executor.run()
+
+
+class _StopAtSecsHook(session_run_hook.SessionRunHook):
+  """Stops given secs after begin is called."""
+
+  def __init__(self, stop_after_secs):
+    self._stop_after_secs = stop_after_secs
+    self._start_time = None
+
+  def begin(self):
+    self._start_time = time.time()
+
+  def after_run(self, run_context, run_values):
+    del run_values
+    if time.time() - self._start_time >= self._stop_after_secs:
+      run_context.request_stop()
+
+
+class _NewCheckpointListenerForEvaluate(
+    basic_session_run_hooks.CheckpointSaverListener):
+  """A saver listener to run evaluate with every checkpoint."""
+
+  def __init__(self, evaluator, eval_throttle_secs, continuous_eval_listener):
+    self._evaluator = evaluator
+    self._eval_throttle_secs = eval_throttle_secs
+    self._continuous_eval_listener = continuous_eval_listener
+    self.eval_result, self.export_results = None, None
+
+  def begin(self):
+    self._timer = basic_session_run_hooks.SecondOrStepTimer(
+        every_secs=self._eval_throttle_secs)
+    self._is_first_run = True
+
+  def after_save(self, session, global_step_value):
+    del session  # unused; required by signature.
+    # skip first run model is not trained yet.
+    if self._is_first_run:
+      self._is_first_run = False
+      return
+
+    if not self._continuous_eval_listener.before_eval():
+      logging.info('Exiting training and evaluation loop, as requested by '
+                   '_ContinuousEvalListener.before_eval.')
+      return True
+    if self._timer.should_trigger_for_step(global_step_value):
+      self._evaluate(global_step_value)  # updates self.eval_result
+      if not self._continuous_eval_listener.after_eval(self.eval_result):
+        logging.info('Exiting evaluation, as requested by '
+                     '_ContinuousEvalListener.after_eval.')
+        return True
+    else:
+      # TODO(ispir): add remaining time in the log.
+      logging.info('Skip the current checkpoint eval due to throttle secs '
+                   '({} secs).'.format(self._eval_throttle_secs))
+
+  def end(self, session, global_step_value):
+    # Evaluate if the last step has not been evaluated, yet.
+    if global_step_value != self._timer.last_triggered_step():
+      if self._continuous_eval_listener.before_eval():
+        self._evaluate(global_step_value)
+        self._continuous_eval_listener.after_eval(self.eval_result)
+
+  def _evaluate(self, global_step_value):
+    self._timer.update_last_triggered_step(global_step_value)
+    self.eval_result, self.export_results = (
+        self._evaluator.evaluate_and_export())
+    if self.eval_result.status != _EvalStatus.EVALUATED:
+      #  This is unexpected; should never happen.
+      #  Training should always end with a new checkpoint.
+      raise RuntimeError('There was no new checkpoint after the training. '
+                         'Eval status: {}'.format(self.eval_result.status))
+
+
+class _TrainingExecutor(object):
+  """The executor to run `Estimator` training and evaluation.
+
+  This implementation supports both distributed and non-distributed (aka local)
+  training and evaluation based on the setting in `tf.estimator.RunConfig`.
+  """
+
+  def __init__(self,
+               estimator,
+               train_spec,
+               eval_spec,
+               train_hooks=None,
+               continuous_eval_listener=None):
+    if not isinstance(estimator, estimator_lib.Estimator):
+      raise TypeError(
+          '`estimator` must have type `tf.estimator.Estimator`. '
+          'Got: {}'.format(type(estimator)))
+    self._estimator = estimator
+
+    if not isinstance(train_spec, TrainSpec):
+      raise TypeError(
+          '`train_spec` must have type `tf.estimator.TrainSpec`. '
+          'Got: {}'.format(type(train_spec)))
+    self._train_spec = train_spec
+
+    if eval_spec and not isinstance(eval_spec, EvalSpec):
+      raise TypeError('`eval_spec` must be either `None` or have type '
+                      '`tf.estimator.EvalSpec`. Got: {}'.format(
+                          type(eval_spec)))
+    self._eval_spec = eval_spec
+
+    self._train_hooks = _validate_hooks(train_hooks)
+
+    if (continuous_eval_listener and
+        not isinstance(continuous_eval_listener, _ContinuousEvalListener)):
+      raise TypeError('`continuous_eval_listener` must have type '
+                      '`_ContinuousEvalListener`.')
+    self._continuous_eval_listener = (
+        continuous_eval_listener or _ContinuousEvalListener())
+
+  @property
+  def estimator(self):
+    return self._estimator
+
+  def run(self):
+    """Executes the run_foo for task type `foo`.
+
+    `_TrainingExecutor` predefines the procedure for task type 'chief',
+    'worker', 'ps', and 'evaluator'. For task type `foo`, the corresponding
+    procedure is `run_foo'. This `run` method invoke the procedure base on the
+    `RunConfig.task_type`.
+
+    Returns:
+      A tuple of the result of the `evaluate` call to the `Estimator` and the
+      export results using the specified `ExportStrategy`.
+      Currently undefined for distributed training mode.
+
+    Raises:
+      ValueError: if the estimator.config is mis-configured.
+    """
+    config = self._estimator.config
+
+    if (not config.cluster_spec and
+        config.task_type != run_config_lib.TaskType.EVALUATOR):
+      logging.info('Running training and evaluation locally (non-distributed).')
+      return self.run_local()
+
+    # Distributed case.
+    if not config.task_type:
+      # TODO(xiejw): Improve the error message about how to set the TF_CONFIG
+      # correctly.
+      raise ValueError(
+          '`estimator.config` must have task_type set. This usually means '
+          'TF_CONFIG environment is not set correctly.')
+
+    if config.task_type == 'local':
+      raise ValueError(
+          '`task.type` in TF_CONFIG cannot be `local`. Leaving `cluster` and '
+          '`task` properties in TF_CONFIG absent triggers train and evaluate '
+          '`Estimator` locally (non-distributed).')
+
+    # For task type foo, call executor.run_foo.
+    available_tasks = [
+        x for x in dir(self)
+        if x.startswith('run_') and x != 'run_local' and
+        callable(getattr(self, x))
+    ]
+    task_to_run = 'run_' + config.task_type
+    if task_to_run not in available_tasks:
+      raise ValueError(
+          'Task type {} is not supported. Supported task types are {}'.format(
+              config.task_type, [x[len('run_'):] for x in available_tasks]))
+    getattr(self, task_to_run)()
+
+  def run_chief(self):
+    """Runs task chief."""
+    # TODO(xiejw): To allow execution framework to add train hooks.
+    return self._start_distributed_training()
+
+  def run_worker(self):
+    """Runs task (training) worker."""
+    # TODO(xiejw): To allow execution framework to add train hooks.
+    return self._start_distributed_training()
+
+  def run_master(self):
+    """Runs task master."""
+    _assert_eval_spec(self._eval_spec)
+
+    # Final export signal: For any eval result with global_step >= train
+    # max_steps, the evaluator will send the final export signal. There is a
+    # small chance that the Estimator.train stopping logic sees a different
+    # global_step value (due to global step race condition and the fact the
+    # saver sees a larger value for checkpoint saving), which does not end
+    # the training. When the training ends, a new checkpoint is generated, which
+    # triggers the listener again. So, it could be the case the final export is
+    # triggered twice.
+    #
+    # But here, throttle_secs will skip the next intermediate checkpoint and,
+    # so, the double final export chance is very small.
+    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
+                                             self._train_spec.max_steps)
+
+    # When the underlying `Estimator` object saves a new checkpoint, we would
+    # like this callback to be called so that evaluation and export can trigger.
+    saving_listeners = [
+        _NewCheckpointListenerForEvaluate(evaluator,
+                                          self._eval_spec.throttle_secs,
+                                          _ContinuousEvalListener())
+    ]
+    self._start_distributed_training(saving_listeners=saving_listeners)
+
+  def run_evaluator(self):
+    """Runs task evaluator."""
+    # TODO(xiejw): To allow execution framework to add continuous eval listener.
+    return self._start_continuous_evaluation()
+
+  def run_ps(self):
+    """Runs task parameter server (in training cluster spec)."""
+    config = self._estimator.config
+    server = self._start_std_server(config)
+    server.join()
+
+  def run_local(self):
+    """Runs training and evaluation locally (non-distributed)."""
+    _assert_eval_spec(self._eval_spec)
+
+    train_hooks = list(self._train_spec.hooks) + list(self._train_hooks)
+    logging.info('Start train and evaluate loop. The evaluate will happen '
+                 'after every checkpoint. Checkpoint frequency is determined '
+                 'based on RunConfig arguments: save_checkpoints_steps {} or '
+                 'save_checkpoints_secs {}.'.format(
+                     self._estimator.config.save_checkpoints_steps,
+                     self._estimator.config.save_checkpoints_secs))
+
+    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
+                                             self._train_spec.max_steps)
+
+    listener_for_eval = _NewCheckpointListenerForEvaluate(
+        evaluator, self._eval_spec.throttle_secs,
+        self._continuous_eval_listener)
+    saving_listeners = [listener_for_eval]
+
+    self._estimator.train(
+        input_fn=self._train_spec.input_fn,
+        max_steps=self._train_spec.max_steps,
+        hooks=train_hooks,
+        saving_listeners=saving_listeners)
+
+    eval_result = listener_for_eval.eval_result or _EvalResult(
+        status=_EvalStatus.MISSING_CHECKPOINT)
+    return eval_result.metrics, listener_for_eval.export_results
+
+  def _start_std_server(self, config):
+    """Creates, starts, and returns a server_lib.Server."""
+    if (not config.cluster_spec or not config.task_type or
+        config.task_id is None):
+      raise RuntimeError('Could not start server; be sure to specify '
+                         'cluster_spec, task_type, and task in '
+                         'RunConfig or set the TF_CONFIG environment variable.')
+
+    if not config.master:
+      jobs = config.cluster_spec.jobs
+      if (len(jobs) == 1 and
+          len(config.cluster_spec.job_tasks(jobs[0])) == 1 and
+          config.task_type in _TRAINER_JOBS):
+        # For distributed training, config.master is empty if and only if it has
+        # a single node in the cluster spec. In this case, we should not start
+        # the server.
+        logging.info('Skip starting Tensorflow server as there is only one '
+                     'node in the cluster.')
+        return
+      else:
+        raise RuntimeError(
+            'Could not start server; be sure to specify master in '
+            'RunConfig or set the TF_CONFIG environment variable.')
+
+    logging.info('Start Tensorflow server.')
+
+    if config.session_config is None:
+      session_config = config_pb2.ConfigProto(log_device_placement=False)
+    else:
+      session_config = config_pb2.ConfigProto(
+          log_device_placement=False,
+          gpu_options=config.session_config.gpu_options)
+
+    server = server_lib.Server(
+        config.cluster_spec,
+        job_name=config.task_type,
+        task_index=config.task_id,
+        config=session_config,
+        start=False,
+        protocol=config.protocol)
+    server.start()
+    return server
+
+  def _start_distributed_training(self, saving_listeners=None):
+    """Calls `Estimator` train in a distributed setting."""
+    config = self._estimator.config
+
+    # Start in-process TensorFlow server if needed. It's important to start the
+    # server before we (optionally) sleep. Otherwise, the servers will wait to
+    # connect to each other before starting to train.
+    if not _is_google_env():
+      self._start_std_server(config)
+
+    # Delay worker to start. For asynchronous training, this usually helps model
+    # to converge faster.  Chief starts the training immediately, so, worker
+    # with task id x (0-based) should wait (x+1) * _DELAY_SECS_PER_WORKER.
+    start_delay_secs = 0
+    if config.task_type == run_config_lib.TaskType.WORKER:
+      # TODO(xiejw): Replace the hard code logic (task_id + 1) with unique id in
+      # training cluster.
+      start_delay_secs = min(_MAX_DELAY_SECS,
+                             (config.task_id + 1) * _DELAY_SECS_PER_WORKER)
+    if start_delay_secs > 0:
+      logging.info('Waiting %d secs before starting training.',
+                   start_delay_secs)
+      time.sleep(start_delay_secs)
+
+    self._estimator.train(
+        input_fn=self._train_spec.input_fn,
+        max_steps=self._train_spec.max_steps,
+        hooks=list(self._train_spec.hooks) + list(self._train_hooks),
+        saving_listeners=saving_listeners)
+
+  def _start_continuous_evaluation(self):
+    """Repeatedly calls `Estimator` evaluate and export until training ends."""
+
+    _assert_eval_spec(self._eval_spec)
+
+    start_delay_secs = self._eval_spec.start_delay_secs
+    if start_delay_secs:
+      logging.info('Waiting %f secs before starting eval.', start_delay_secs)
+      time.sleep(start_delay_secs)
+
+    latest_eval_result = None
+    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec,
+                                             self._train_spec.max_steps)
+
+    should_early_stop = False
+    while not should_early_stop:
+      if (latest_eval_result and
+          latest_eval_result.status == _EvalStatus.EVALUATED):
+        global_step = latest_eval_result.metrics.get(ops.GraphKeys.GLOBAL_STEP)
+        if (global_step and self._train_spec.max_steps and
+            global_step >= self._train_spec.max_steps):
+          logging.info(
+              'Exiting evaluation, global_step=%s >= train max_steps=%s',
+              global_step, self._train_spec.max_steps)
+          return
+
+      latest_eval_result, should_early_stop = self._execute_evaluator_once(
+          evaluator, self._continuous_eval_listener,
+          self._eval_spec.throttle_secs)
+
+  def _execute_evaluator_once(self, evaluator, continuous_eval_listener,
+                              throttle_secs):
+    """Executes the `evaluator`."""
+
+    _assert_eval_spec(self._eval_spec)
+
+    start = time.time()
+
+    eval_result = None
+    should_early_stop = False
+
+    if not continuous_eval_listener.before_eval():
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.before_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Final export signal: For any eval result with global_step >= train
+    # max_steps, the evaluator will send the final export signal. The next
+    # iteration of while loop will end the continuous eval as the stopping
+    # condition is satisfied (both checks use the same global_step value,
+    # i.e., no race condition)
+    eval_result, _ = evaluator.evaluate_and_export()
+
+    if not self._continuous_eval_listener.after_eval(eval_result):
+      logging.info('Exiting evaluation, as requested by '
+                   '_ContinuousEvalListener.after_eval.')
+      should_early_stop = True
+      return (eval_result, should_early_stop)
+
+    # Throttle if necessary.
+    elapsed_time = time.time() - start
+    difference = throttle_secs - elapsed_time
+    if difference > 0:
+      logging.info('Waiting %f secs before starting next eval run.', difference)
+      time.sleep(difference)
+    elif (throttle_secs == 0 and
+          eval_result.status != _EvalStatus.EVALUATED):
+      # Prints a user-actionable warning to avoid unnecessary load on evaluator.
+      logging.warning(
+          'EvalSpec.throttle_secs is set as 0. This might overload the job '
+          'before finding (next) new checkpoint. Please consider to increase '
+          'it.')
+
+    return (eval_result, should_early_stop)
+
+  class _Evaluator(object):
+    """A helper class to call `Estimator.evaluate` and export model."""
+
+    def __init__(self, estimator, eval_spec, max_training_steps):
+      self._estimator = estimator
+
+      _assert_eval_spec(eval_spec)
+      self._eval_spec = eval_spec
+
+      self._is_final_export_triggered = False
+      self._previous_ckpt_path = None
+      self._last_warning_time = 0
+      self._max_training_steps = max_training_steps
+
+    @property
+    def is_final_export_triggered(self):
+      return self._is_final_export_triggered
+
+    def evaluate_and_export(self):
+      """Evaluate and (maybe) export the current model.
+
+      Returns:
+        A tuple of `EvalResult` instance and the export results.
+
+      Raises:
+        RuntimeError: for any unexpected internal error.
+        TypeError: if evaluation result has wrong type.
+      """
+      latest_ckpt_path = self._estimator.latest_checkpoint()
+      if not latest_ckpt_path:
+        self._log_err_msg('Estimator is not trained yet. Will start an '
+                          'evaluation when a checkpoint is ready.')
+        return _EvalResult(status=_EvalStatus.MISSING_CHECKPOINT), []
+
+      if latest_ckpt_path == self._previous_ckpt_path:
+        self._log_err_msg(
+            'No new checkpoint ready for evaluation. Skip the current '
+            'evaluation pass as evaluation results are expected to be same '
+            'for the same checkpoint.')
+        return _EvalResult(status=_EvalStatus.NO_NEW_CHECKPOINT), []
+
+      metrics = self._estimator.evaluate(
+          input_fn=self._eval_spec.input_fn,
+          steps=self._eval_spec.steps,
+          name=self._eval_spec.name,
+          checkpoint_path=latest_ckpt_path,
+          hooks=self._eval_spec.hooks)
+
+      # _EvalResult validates the metrics.
+      eval_result = _EvalResult(
+          status=_EvalStatus.EVALUATED,
+          metrics=metrics,
+          checkpoint_path=latest_ckpt_path)
+
+      is_the_final_export = (
+          eval_result.metrics[ops.GraphKeys.GLOBAL_STEP] >=
+          self._max_training_steps if self._max_training_steps else False)
+      export_results = self._export_eval_result(eval_result,
+                                                is_the_final_export)
+
+      if is_the_final_export:
+        logging.debug('Calling exporter with the `is_the_final_export=True`.')
+        self._is_final_export_triggered = True
+
+      self._last_warning_time = 0
+      self._previous_ckpt_path = latest_ckpt_path
+      return eval_result, export_results
+
+    def _log_err_msg(self, message):
+      """Prints warning `message` every 10 mins."""
+      current_time = time.time()
+      if current_time - self._last_warning_time > 600:
+        logging.warning(message)
+        self._last_warning_time = current_time
+
+    def _export_eval_result(self, eval_result, is_the_final_export):
+      """Export `eval_result` according to exporters in `EvalSpec`."""
+      export_dir_base = os.path.join(
+          compat.as_str_any(self._estimator.model_dir),
+          compat.as_str_any('export'))
+
+      export_results = []
+      for exporter in self._eval_spec.exporters:
+        export_results.append(
+            exporter.export(
+                estimator=self._estimator,
+                export_path=os.path.join(
+                    compat.as_str_any(export_dir_base),
+                    compat.as_str_any(exporter.name)),
+                checkpoint_path=eval_result.checkpoint_path,
+                eval_result=eval_result.metrics,
+                is_the_final_export=is_the_final_export))
+      return export_results
+
+
+class _EvalStatus(object):
+  """The status of an evaluation event.
+
+  For local training and evaluation, the status can only be `EVALUATED` as
+  `Estimator.train` always generates a new checkpoint.
+
+  For distributed training and evaluation, a separated evaluator keeps looking
+  for new checkpoint. So, multiple situations might occur:
+
+  - EVALUATED: A new checkpoint is found since last evaluation.
+      `Estimator.evaluate` will be invoked.
+  - MISSING_CHECKPOINT: No checkpoint can be found. Typically, this means
+      the trainer has not yet produced any checkpoint.
+  - NO_NEW_CHECKPOINT: No new checkpoint can be found since last evaluation.
+      Typically, this means the trainer has not yet produced any new checkpoint.
+  """
+
+  EVALUATED = 'evaluated'
+  MISSING_CHECKPOINT = 'missing checkpoint'
+  NO_NEW_CHECKPOINT = 'no new checkpoint'
+
+
+class _EvalResult(
+    collections.namedtuple('EvalResult',
+                           ['status', 'metrics', 'checkpoint_path'])):
+  """_EvalResult holds the result of an evaluation event."""
+
+  def __new__(cls, status, metrics=None, checkpoint_path=None):
+    """Creates a validated `_EvalResult`.
+
+    Args:
+      status: See `_EvalStatus`.
+      metrics: The evaluation results returned by `Estimator.evaluate`. Only set
+          if status is `EVALUATED`.
+      checkpoint_path: The corresponding checkpoint path for the `metrics`. Only
+          set if status is `EVALUATED`.
+    Returns:
+      A validated `_EvalResult` object.
+
+    Raises:
+      ValueError: If validation fails.
+      TypeError: If any of the arguments is not the expected type.
+    """
+
+    if status != _EvalStatus.EVALUATED:
+      if metrics:
+        raise ValueError(
+            'metrics must be `None` if status is not {}; got status {},'
+            ' metrics {}'.format(_EvalStatus.EVALUATED, status, metrics))
+      if checkpoint_path:
+        raise ValueError(
+            'checkpoint must be `None` if status is not {}; got status {}, '
+            'checkpoint_path {}'.format(_EvalStatus.EVALUATED, status,
+                                        checkpoint_path))
+      return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                             checkpoint_path)
+
+    # Now, evaluated case.
+    assert status == _EvalStatus.EVALUATED
+
+    # Validates metrics.
+    if not metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` should never return empty '
+          'metrics.')
+    if not isinstance(metrics, dict):
+      raise TypeError(
+          '`Estimator.evaluate` should return dict. Given {}.'.format(
+              type(metrics)))
+    if ops.GraphKeys.GLOBAL_STEP not in metrics:
+      raise ValueError(
+          'Internal error: `Estimator.evaluate` result should have '
+          '`global_step` in result. Given {}'.format(metrics))
+
+    # Validates checkpoint_path.
+    if not checkpoint_path:
+      raise ValueError(
+          'Internal error: `checkpoint_path` should never be empty.')
+
+    return super(_EvalResult, cls).__new__(cls, status, metrics,
+                                           checkpoint_path)
+
+
+class _ContinuousEvalListener(object):
+  """Interface for listeners that take action before or after evaluation."""
+
+  def before_eval(self):
+    """Called before evaluation.
+
+    Returns:
+      `False` if you want to skip the current evaluation and early stop the
+      continuous evaluation; `True` otherwise.
+    """
+    return True
+
+  def after_eval(self, eval_result):
+    """Called after the evaluation is executed.
+
+    Args:
+      eval_result: An `_EvalResult` instance.
+
+    Returns:
+      False if you want to early stop continuous evaluation; `True` otherwise.
+    """
+    del eval_result
+    return True
 
-# Include attrs that start with single underscore.
-training.__all__ = [s for s in dir(training) if not s.startswith('__')]
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.training import *
+def _assert_eval_spec(eval_spec):
+  """Raise error if `eval_spec` is not of the right type."""
+  if not isinstance(eval_spec, EvalSpec):
+    raise TypeError('`eval_spec` must have type `tf.estimator.EvalSpec`. '
+                    'Got: {}'.format(type(eval_spec)))
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
new file mode 100644
index 0000000000..7d46917a6f
--- /dev/null
+++ b/tensorflow/python/estimator/training_test.py
@@ -0,0 +1,2198 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for training.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import glob
+import json
+import os
+import random
+import shutil
+import tempfile
+import time
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import exporter as exporter_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.estimator import training
+from tensorflow.python.estimator.canned import dnn
+from tensorflow.python.estimator.canned import prediction_keys
+from tensorflow.python.estimator.export import export as export_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.ops import state_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.summary import summary_iterator
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import monitored_session
+from tensorflow.python.training import server_lib
+from tensorflow.python.training import session_run_hook
+from tensorflow.python.training import training_util
+from tensorflow.python.util import compat
+
+_DEFAULT_EVAL_STEPS = 100
+_DEFAULT_EVAL_DELAY_SECS = 120
+_DEFAULT_EVAL_THROTTLE_SECS = 600
+_DELAY_SECS_PER_WORKER = 5
+_GLOBAL_STEP_KEY = ops.GraphKeys.GLOBAL_STEP
+_INVALID_INPUT_FN_MSG = '`input_fn` must be callable'
+_INVALID_HOOK_MSG = 'All hooks must be `SessionRunHook` instances'
+_INVALID_MAX_STEPS_MSG = 'Must specify max_steps > 0'
+_INVALID_STEPS_MSG = 'Must specify steps > 0'
+_INVALID_NAME_MSG = '`name` must be string'
+_INVALID_EVAL_DELAY_SECS_MSG = 'Must specify start_delay_secs >= 0'
+_INVALID_EVAL_THROTTLE_SECS_MSG = 'Must specify throttle_secs >= 0'
+_INVALID_ESTIMATOR_MSG = '`estimator` must have type `tf.estimator.Estimator`'
+_STALE_CHECKPOINT_MSG = 'There was no new checkpoint after the training.'
+_INVALID_EXPORTER_MSG = '`exporters` must be an Exporter'
+_INVALID_EXPORTER_NAME_TYPE_MSG = 'An Exporter must have a string name'
+_DUPLICATE_EXPORTER_NAMES_MSG = '`exporters` must have unique names.'
+_NONE_EXPORTER_NAME_MSG = (
+    'An Exporter cannot have a name that is `None` or empty.')
+_INVALID_TRAIN_SPEC_MSG = '`train_spec` must have type `tf.estimator.TrainSpec`'
+_INVALID_EVAL_SPEC_MSG = '`eval_spec` must have type `tf.estimator.EvalSpec`'
+_EVAL_SPEC_OR_NONE_MSG = (
+    '`eval_spec` must be either `None` or have type `tf.estimator.EvalSpec`')
+_INVALID_EVAL_LISTENER_MSG = 'must have type `_ContinuousEvalListener`'
+_INVALID_CONFIG_FOR_STD_SERVER_MSG = 'Could not start server; .*TF_CONFIG'
+_INVALID_LOCAL_TASK_WITH_CLUSTER = '`task.type` in TF_CONFIG cannot be `local`'
+_INVALID_TASK_TYPE = '`estimator.config` must have task_type set.'
+_INPROPER_THROTTL_SECS = (
+    'EvalSpec.throttle_secs is set as 0.*Please consider to increase')
+
+# The message should NOT have 'local' word as part of it. As (?!word) is looking
+# ahead, so, the $ (ending) check is required; otherwise, it will match
+# partially and return successuful.
+_INVALID_TASK_TO_RUN = (
+    'Task type .* is not supported. Supported task types are ((?!local).)*$')
+_INVALID_EMPTY_EVAL_RESULT_ERR = (
+    'Internal error: `Estimator.evaluate` should never return empty metrics')
+_INVALID_EVAL_RESULT_TYPE_ERR = '`Estimator.evaluate` should return dict.'
+_MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR = (
+    'Internal error: `Estimator.evaluate` result should have `global_step`')
+_INVALID_EVAL_TASK_ID_ERR = (
+    'there can only be one `evaluator` task .*with task id 0')
+
+_TF_CONFIG_FOR_CHIEF = {
+    'cluster': {
+        run_config_lib.TaskType.CHIEF: ['host0:0'],
+        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
+    },
+    'task': {
+        'type': run_config_lib.TaskType.CHIEF,
+        'index': 0
+    }
+}
+
+_TF_CONFIG_FOR_MASTER = {
+    'cluster': {
+        run_config_lib.TaskType.MASTER: ['host0:0'],
+        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
+    },
+    'task': {
+        'type': run_config_lib.TaskType.MASTER,
+        'index': 0
+    }
+}
+
+_TF_CONFIG_FOR_WORKER = {
+    'cluster': {
+        run_config_lib.TaskType.CHIEF: ['host0:0'],
+        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
+    },
+    'task': {
+        'type': run_config_lib.TaskType.WORKER,
+        'index': 1
+    }
+}
+
+_TF_CONFIG_FOR_PS = {
+    'cluster': {
+        run_config_lib.TaskType.CHIEF: ['host0:0'],
+        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
+    },
+    'task': {
+        'type': run_config_lib.TaskType.PS,
+        'index': 1
+    }
+}
+
+_TF_CONFIG_FOR_EVALUATOR = {
+    'cluster': {
+        run_config_lib.TaskType.CHIEF: ['host0:0'],
+        run_config_lib.TaskType.PS: ['host1:1', 'host2:2'],
+        run_config_lib.TaskType.WORKER: ['host3:3', 'host4:4']
+    },
+    'task': {
+        'type': run_config_lib.TaskType.EVALUATOR,
+        'index': 0
+    }
+}
+
+_TF_CONFIG_FOR_GOOGLE = {'environment': 'google'}
+
+
+class _FakeHook(session_run_hook.SessionRunHook):
+  """Fake implementation of `SessionRunHook`."""
+
+
+class _InvalidHook(object):
+  """Invalid hook (not a subclass of `SessionRunHook`)."""
+
+
+def _create_exporter(name):
+  class FakeExporter(exporter_lib.Exporter):
+
+    def __init__(self, name):
+      self._name = name
+
+    @property
+    def name(self):
+      return self._name
+
+    def export(self, *args, **kwargs):
+      del args, kwargs
+
+  return FakeExporter(name=name)
+
+
+def _create_run_config_with_cluster_spec(tf_config):
+  with test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}):
+    return run_config_lib.RunConfig()
+
+
+class TrainSpecTest(test.TestCase):
+  """Tests TrainSpec."""
+
+  def testRequiredArgumentsSet(self):
+    """Tests that no errors are raised when all required arguments are set."""
+    spec = training.TrainSpec(input_fn=lambda: 1)
+    self.assertEqual(1, spec.input_fn())
+    self.assertIsNone(spec.max_steps)
+    self.assertEqual(0, len(spec.hooks))
+
+  def testAllArgumentsSet(self):
+    """Tests that no errors are raised when all arguments are set."""
+    hooks = [_FakeHook()]
+    spec = training.TrainSpec(input_fn=lambda: 1, max_steps=2, hooks=hooks)
+    self.assertEqual(1, spec.input_fn())
+    self.assertEqual(2, spec.max_steps)
+    self.assertEqual(tuple(hooks), spec.hooks)
+
+  def testInvalidInputFn(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG):
+      training.TrainSpec(input_fn='invalid')
+
+  def testInvalidMaxStep(self):
+    with self.assertRaisesRegexp(ValueError, _INVALID_MAX_STEPS_MSG):
+      training.TrainSpec(input_fn=lambda: 1, max_steps=0)
+
+  def testInvalidHook(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
+      training.TrainSpec(input_fn=lambda: 1, hooks=[_InvalidHook()])
+
+
+class EvalSpecTest(test.TestCase):
+  """Tests EvalSpec."""
+
+  def testRequiredArgumentsSet(self):
+    """Tests that no errors are raised when all required arguments are set."""
+    spec = training.EvalSpec(input_fn=lambda: 1)
+    self.assertEqual(1, spec.input_fn())
+    self.assertEqual(_DEFAULT_EVAL_STEPS, spec.steps)
+    self.assertIsNone(spec.name)
+    self.assertEqual(0, len(spec.hooks))
+    self.assertEqual(0, len(spec.exporters))
+    self.assertEqual(_DEFAULT_EVAL_DELAY_SECS, spec.start_delay_secs)
+    self.assertEqual(_DEFAULT_EVAL_THROTTLE_SECS, spec.throttle_secs)
+
+  def testAllArgumentsSet(self):
+    """Tests that no errors are raised when all arguments are set."""
+    hooks = [_FakeHook()]
+    exporter = _create_exporter('a')
+
+    spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        steps=2,
+        name='name',
+        hooks=hooks,
+        exporters=exporter,
+        start_delay_secs=3,
+        throttle_secs=4)
+    self.assertEqual(1, spec.input_fn())
+    self.assertEqual(2, spec.steps)
+    self.assertEqual('name', spec.name)
+    self.assertEqual(tuple(hooks), spec.hooks)
+    self.assertEqual((exporter,), spec.exporters)
+    self.assertEqual(3, spec.start_delay_secs)
+    self.assertEqual(4, spec.throttle_secs)
+
+  def testListOfExporters(self):
+    """Tests that no errors are raised with multiple exporters."""
+    exporters = [_create_exporter('a'), _create_exporter('b')]
+
+    spec = training.EvalSpec(input_fn=lambda: 1, exporters=exporters)
+    self.assertEqual(1, spec.input_fn())
+    self.assertEqual(tuple(exporters), spec.exporters)
+
+  def testInvalidInputFn(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_INPUT_FN_MSG):
+      training.EvalSpec(input_fn='invalid')
+
+  def testInvalidMaxStep(self):
+    with self.assertRaisesRegexp(ValueError, _INVALID_STEPS_MSG):
+      training.EvalSpec(input_fn=lambda: 1, steps=0)
+
+  def testInvalidName(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_NAME_MSG):
+      training.EvalSpec(input_fn=lambda: 1, name=123)
+
+  def testInvalidHook(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
+      training.EvalSpec(input_fn=lambda: 1, hooks=[_InvalidHook()])
+
+  def testInvalidDelaySecs(self):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_DELAY_SECS_MSG):
+      training.EvalSpec(input_fn=lambda: 1, start_delay_secs=-1)
+
+  def testInvalidThrottleSecs(self):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_THROTTLE_SECS_MSG):
+      training.EvalSpec(input_fn=lambda: 1, throttle_secs=-1)
+
+  def testInvalidTypeOfListOfExporters(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG):
+      training.EvalSpec(
+          input_fn=lambda: 1, exporters=[_create_exporter('a'),
+                                         _FakeHook()])
+
+  def testInvalidTypeOfIndividualExporter(self):
+    with self.assertRaisesRegexp(TypeError, _INVALID_EXPORTER_MSG):
+      training.EvalSpec(input_fn=lambda: 1, exporters=_FakeHook())
+
+  def testInvalidTypeOfExporterName(self):
+    with self.assertRaisesRegexp(ValueError, _INVALID_EXPORTER_NAME_TYPE_MSG):
+      training.EvalSpec(input_fn=lambda: 1,
+                        exporters=_create_exporter(name=123))
+
+  def testMultipleExportersWithTheSameName(self):
+    with self.assertRaisesRegexp(ValueError, _DUPLICATE_EXPORTER_NAMES_MSG):
+      training.EvalSpec(
+          input_fn=lambda: 1,
+          exporters=[_create_exporter('a'), _create_exporter('a')])
+
+  def testMultipleExportersAndOneWithoutAName(self):
+    with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG):
+      training.EvalSpec(
+          input_fn=lambda: 1,
+          exporters=[_create_exporter('a'),
+                     _create_exporter(None)])
+
+  def testSingleExporterWithoutAName(self):
+    with self.assertRaisesRegexp(ValueError, _NONE_EXPORTER_NAME_MSG):
+      training.EvalSpec(input_fn=lambda: 1, exporters=_create_exporter(None))
+
+
+class TrainAndEvaluateTest(test.TestCase):
+
+  def test_run_task(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
+      mock_executor_instance = test.mock.Mock()
+      mock_executor.return_value = mock_executor_instance
+      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
+      mock_executor.assert_called_with(estimator=mock_est,
+                                       train_spec=mock_train_spec,
+                                       eval_spec=mock_eval_spec)
+      self.assertTrue(mock_executor_instance.run.called)
+
+  def test_error_out_if_evaluator_task_id_is_non_zero(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+        },
+        'task': {
+            'type': run_config_lib.TaskType.EVALUATOR,
+            'index': 1
+        }
+    }
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    with self.assertRaisesRegexp(ValueError, _INVALID_EVAL_TASK_ID_ERR):
+      training.train_and_evaluate(mock_est, mock_train_spec, mock_eval_spec)
+
+  def test_invalid_estimator(self):
+    invalid_estimator = object()
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG):
+      training.train_and_evaluate(invalid_estimator, mock_train_spec,
+                                  mock_eval_spec)
+
+  def test_fail_fast_if_invalid_eval_spec(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    invalid_eval_spec = object()
+
+    with test.mock.patch.object(training, '_TrainingExecutor') as mock_executor:
+      with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
+        training.train_and_evaluate(mock_est, mock_train_spec,
+                                    invalid_eval_spec)
+
+      mock_executor.assert_not_called()
+
+
+class TrainingExecutorConstructorTest(test.TestCase):
+  """Tests constructor of _TrainingExecutor."""
+
+  def test_required_arguments_set(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+
+    executor = training._TrainingExecutor(estimator, train_spec, eval_spec)
+    self.assertEqual(estimator, executor.estimator)
+
+  def test_invalid_estimator(self):
+    invalid_estimator = object()
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_ESTIMATOR_MSG):
+      training._TrainingExecutor(invalid_estimator, train_spec, eval_spec)
+
+  def test_invalid_train_spec(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    invalid_train_spec = object()
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_TRAIN_SPEC_MSG):
+      training._TrainingExecutor(estimator, invalid_train_spec, eval_spec)
+
+  def test_invalid_eval_spec(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    invalid_eval_spec = object()
+
+    with self.assertRaisesRegexp(TypeError, _EVAL_SPEC_OR_NONE_MSG):
+      training._TrainingExecutor(estimator, train_spec, invalid_eval_spec)
+
+  def test_eval_spec_none(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = None
+
+    # Tests that no error is raised.
+    training._TrainingExecutor(estimator, train_spec, eval_spec)
+
+  def test_invalid_train_hooks(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+    invalid_train_hooks = [object()]
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_HOOK_MSG):
+      training._TrainingExecutor(
+          estimator, train_spec, eval_spec, train_hooks=invalid_train_hooks)
+
+  def test_invalid_continuous_eval_listener(self):
+    estimator = estimator_lib.Estimator(model_fn=lambda features: features)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=lambda: 1)
+    invalid_continuous_eval_listener = object()
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_LISTENER_MSG):
+      training._TrainingExecutor(
+          estimator,
+          train_spec,
+          eval_spec,
+          continuous_eval_listener=invalid_continuous_eval_listener)
+
+
+class _TrainingExecutorTrainingTest(object):
+  """Tests training of _TrainingExecutor."""
+
+  def __init__(self, run_config):
+    self._run_config = run_config
+
+  def _run_task(self, executor):
+    # We should not call executor.run as the test here is intended to test
+    # run_foo explicitly (foo is the task type).
+    return getattr(executor, 'run_' + self._run_config.task_type)()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_spec(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+    mock_server_instance = mock_server.return_value
+
+    executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec)
+    self._run_task(executor)
+
+    mock_server.assert_called_with(
+        mock_est.config.cluster_spec,
+        job_name=mock_est.config.task_type,
+        task_index=mock_est.config.task_id,
+        config=test.mock.ANY,
+        protocol=None,
+        start=False)
+
+    self.assertTrue(mock_server_instance.start.called)
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks),
+        saving_listeners=test.mock.ANY)
+    mock_est.evaluate.assert_not_called()
+    mock_est.export_savedmodel.assert_not_called()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_no_eval_spec(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    eval_spec = None
+    mock_server_instance = mock_server.return_value
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    self._run_task(executor)
+
+    mock_server.assert_called_with(
+        mock_est.config.cluster_spec,
+        job_name=mock_est.config.task_type,
+        task_index=mock_est.config.task_id,
+        config=test.mock.ANY,
+        protocol=None,
+        start=False)
+
+    self.assertTrue(mock_server_instance.start.called)
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks),
+        saving_listeners=test.mock.ANY)
+    mock_est.evaluate.assert_not_called()
+    mock_est.export_savedmodel.assert_not_called()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_hooks(self, unused_mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
+    self._run_task(executor)
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks) + extra_hooks,
+        saving_listeners=test.mock.ANY)
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)}
+    with test.mock.patch.dict('os.environ', tf_config):
+      self._run_task(executor)
+      mock_server.assert_not_called()
+
+  def test_fail_with_empty_cluster_spec(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = None
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                                mock_eval_spec))
+
+  def test_fail_with_empty_master(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'worker': ['dummy', 'dummy1']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                                mock_eval_spec))
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_worker_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    # Single node cluster.
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = 2
+
+    self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                              mock_eval_spec))
+    self.assertTrue(mock_est.train.called)
+    mock_server.assert_not_called()
+
+  def test_fail_with_empty_task_type(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = ''
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                                mock_eval_spec))
+
+  def test_fail_with_none_task_id(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'worker': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'worker'
+    mock_est.config.task_id = None
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
+                                                mock_eval_spec))
+
+
+class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest,
+                                    test.TestCase):
+  """Tests run_worker of _TrainingExecutor."""
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    _TrainingExecutorTrainingTest.__init__(
+        self,
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
+
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_delay_for_worker(self, _):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+
+    expected_secs = (self._run_config.task_id + 1) * _DELAY_SECS_PER_WORKER
+    with test.mock.patch.object(time, 'sleep') as mock_sleep:
+      mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s)
+      self._run_task(executor)
+      self.assertTrue(mock_sleep.called)
+
+
+class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest,
+                                   test.TestCase):
+  """Tests run_chief of _TrainingExecutor."""
+
+  def __init__(self, methodName='runTest'):  # pylint: disable=invalid-name
+    test.TestCase.__init__(self, methodName)
+    _TrainingExecutorTrainingTest.__init__(
+        self,
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
+
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_no_delay_for_chief(self, _):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = self._run_config
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+
+    with test.mock.patch.object(time, 'sleep') as mock_sleep:
+      self._run_task(executor)
+      mock_sleep.assert_not_called()
+
+
+class TrainingExecutorRunMasterTest(test.TestCase):
+  """Tests run_chief of _TrainingExecutor."""
+
+  def setUp(self):
+    self._run_config = _create_run_config_with_cluster_spec(
+        _TF_CONFIG_FOR_MASTER)
+
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_no_delay_for_master(self, _):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+
+    with test.mock.patch.object(time, 'sleep') as mock_sleep:
+      executor.run_master()
+      mock_sleep.assert_not_called()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_spec(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+    mock_server_instance = mock_server.return_value
+
+    executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec)
+    executor.run_master()
+
+    mock_server.assert_called_with(
+        mock_est.config.cluster_spec,
+        job_name=mock_est.config.task_type,
+        task_index=mock_est.config.task_id,
+        config=test.mock.ANY,
+        protocol=None,
+        start=False)
+
+    self.assertTrue(mock_server_instance.start.called)
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks),
+        saving_listeners=test.mock.ANY)
+    mock_est.export_savedmodel.assert_not_called()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_no_eval_spec_fails(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    eval_spec = None
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
+      executor.run_master()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_train_with_train_hooks(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, mock_eval_spec, train_hooks=extra_hooks)
+    executor.run_master()
+
+    mock_est.train.assert_called_with(
+        input_fn=train_spec.input_fn,
+        max_steps=train_spec.max_steps,
+        hooks=list(train_spec.hooks) + extra_hooks,
+        saving_listeners=test.mock.ANY)
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+    mock_est.config = self._run_config
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)}
+    with test.mock.patch.dict('os.environ', tf_config):
+      executor.run_master()
+      mock_server.assert_not_called()
+
+  def test_fail_with_empty_cluster_spec(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = None
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(
+          mock_est, mock_train_spec, mock_eval_spec).run_master()
+
+  def test_fail_with_empty_master(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy'], 'worker': ['dummy1']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(
+          mock_est, mock_train_spec, mock_eval_spec).run_master()
+
+  @test.mock.patch.object(time, 'sleep')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_single_master_node_with_empty_tf_master(
+      self, mock_server, unused_mock_sleep):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123}
+
+    mock_train_spec = test.mock.Mock(
+        spec=training.TrainSpec, max_steps=123, hooks=[])
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[])
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec(
+        {'master': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = 0
+
+    executor = training._TrainingExecutor(
+        mock_est, mock_train_spec, mock_eval_spec)
+    executor.run_master()
+
+    mock_server.assert_not_called()
+    self.assertTrue(mock_est.train.called)
+
+  def test_fail_with_empty_task_type(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = ''
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(
+          mock_est, mock_train_spec, mock_eval_spec).run_master()
+
+  def test_fail_with_none_task_id(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'master': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'master'
+    mock_est.config.task_id = None
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(
+          mock_est, mock_train_spec, mock_eval_spec).run_master()
+
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_run_master_triggers_evaluate_and_export(self, _):
+
+    def estimator_train(saving_listeners, *args, **kwargs):
+      #  There shalt be a saving_listener.  Estimator is going to call
+      # `after_save`.
+      del args, kwargs
+      saving_listeners[0].begin()
+      saving_listeners[0].after_save(session=None, global_step_value=0)
+      saving_listeners[0].after_save(session=None, global_step_value=10)
+
+    mock_est = test.mock.Mock(
+        spec=estimator_lib.Estimator, model_dir='path/', train=estimator_train)
+    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
+    mock_est.config = self._run_config
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_whether_export_is_called'
+
+    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, steps=2, exporters=exporter)
+    eval_result = {_GLOBAL_STEP_KEY: train_spec.max_steps}
+    mock_est.evaluate.return_value = eval_result
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_master()
+
+    mock_est.evaluate.assert_called_with(
+        name=eval_spec.name,
+        input_fn=eval_spec.input_fn,
+        steps=eval_spec.steps,
+        checkpoint_path='checkpoint_path/',
+        hooks=eval_spec.hooks)
+    self.assertEqual(1, exporter.export.call_count)
+    exporter.export.assert_called_with(
+        estimator=mock_est,
+        export_path=os.path.join('path/', 'export', exporter.name),
+        checkpoint_path='checkpoint_path/',
+        eval_result=eval_result,
+        is_the_final_export=True)
+
+  @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_run_master_throttle_eval(self, _, mock_timer_class):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+
+    mock_timer = test.mock.Mock()
+    mock_timer_class.return_value = mock_timer
+
+    def estimator_train(saving_listeners, *args, **kwargs):
+      del args, kwargs
+      saving_listeners[0].begin()
+
+      # Call four times.
+      mock_timer.should_trigger_for_step.return_value = True
+      saving_listeners[0].after_save(session=None, global_step_value=None)
+
+      mock_timer.should_trigger_for_step.return_value = True
+      saving_listeners[0].after_save(session=None, global_step_value=None)
+
+      mock_timer.should_trigger_for_step.return_value = False
+      saving_listeners[0].after_save(session=None, global_step_value=None)
+
+      mock_timer.should_trigger_for_step.return_value = True
+      saving_listeners[0].after_save(session=None, global_step_value=None)
+
+    mock_est.train = estimator_train
+    mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2']
+    mock_est.config = self._run_config
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_whether_export_is_called'
+
+    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10)
+
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: train_spec.max_steps //2},
+        {_GLOBAL_STEP_KEY: train_spec.max_steps}
+    ]
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_master()
+
+    self.assertEqual(2, mock_est.evaluate.call_count)
+    self.assertEqual(2, exporter.export.call_count)
+
+    is_final_export_list = [call[1]['is_the_final_export']
+                            for call in exporter.export.call_args_list]
+    self.assertEqual([False, True], is_final_export_list)
+
+  @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer')
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_run_master_throttle_eval_which_skips_final_ckpt(
+      self, _, mock_timer_class):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+
+    mock_timer = test.mock.Mock()
+    mock_timer_class.return_value = mock_timer
+
+    def estimator_train(saving_listeners, *args, **kwargs):
+      del args, kwargs
+      saving_listeners[0].begin()
+
+      # Call tree times (one for first saving).
+      mock_timer.should_trigger_for_step.return_value = True
+      saving_listeners[0].after_save(session=None, global_step_value=0)
+
+      mock_timer.should_trigger_for_step.return_value = True
+      saving_listeners[0].after_save(session=None, global_step_value=125)
+
+      mock_timer.should_trigger_for_step.return_value = False
+      saving_listeners[0].after_save(session=None, global_step_value=250)
+
+      # At the end evaluate should be called even if throttle secs prevents it.
+      mock_timer.should_trigger_for_step.return_value = False
+      saving_listeners[0].end(session=None, global_step_value=300)
+
+    mock_est.train = estimator_train
+    mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2']
+    mock_est.config = self._run_config
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_whether_export_is_called'
+
+    train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10)
+
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: train_spec.max_steps //2},
+        {_GLOBAL_STEP_KEY: train_spec.max_steps}
+    ]
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_master()
+
+    self.assertEqual(2, mock_est.evaluate.call_count)
+    self.assertEqual(2, exporter.export.call_count)
+
+    is_final_export_list = [call[1]['is_the_final_export']
+                            for call in exporter.export.call_args_list]
+    self.assertEqual([False, True], is_final_export_list)
+
+
+class TrainingExecutorRunEvaluatorTest(test.TestCase):
+  """Tests run_evaluator of _TrainingExecutor."""
+
+  def _set_up_mock_est_to_train_and_evaluate_once(self, mock_est,
+                                                  mock_train_spec):
+    """Sets global step in eval result to end the while True eval loop."""
+    training_max_step = 200
+    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step}
+    mock_train_spec.max_steps = training_max_step
+
+  def test_evaluate_with_evaluate_spec(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.latest_checkpoint.return_value = 'latest_it_is'
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval',
+        start_delay_secs=0, throttle_secs=0)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    executor.run_evaluator()
+
+    mock_est.evaluate.assert_called_with(
+        name='cont_eval',
+        input_fn=eval_spec.input_fn,
+        steps=eval_spec.steps,
+        checkpoint_path='latest_it_is',
+        hooks=eval_spec.hooks)
+    self.assertFalse(mock_est.train.called)
+
+  def test_evaluate_with_no_eval_spec_fails(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.latest_checkpoint.return_value = 'latest_it_is'
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    eval_spec = None
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
+      executor.run_evaluator()
+
+  def test_evaluate_with_train_hooks(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.latest_checkpoint.return_value = 'latest_it_is'
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        steps=2,
+        hooks=[_FakeHook()],
+        name='cont_eval',
+        start_delay_secs=0,
+        throttle_secs=0)
+
+    # The train_hooks will not be called during eval.
+    mock_hook = test.mock.Mock(spec=session_run_hook.SessionRunHook)
+    executor = training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec, train_hooks=[mock_hook])
+    executor.run_evaluator()
+
+    mock_hook.begin.assert_not_called()
+
+  def test_evaluate_multiple_times(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: training_max_step // 2},
+        {_GLOBAL_STEP_KEY: training_max_step}
+    ]
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_how_many_times_export_is_called'
+
+    mock_est.times_export_was_called = 0
+    mock_est.times_final_export_was_called = 0
+    def export(estimator, export_path, checkpoint_path, eval_result,
+               is_the_final_export):
+      del export_path, checkpoint_path, eval_result
+      estimator.times_export_was_called += 1
+      # final_export is happened at the end.
+      self.assertEqual(0, estimator.times_final_export_was_called)
+      if is_the_final_export:
+        estimator.times_final_export_was_called += 1
+
+    exporter.export = export
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        start_delay_secs=0,
+        throttle_secs=0,
+        exporters=exporter)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    executor.run_evaluator()
+
+    self.assertEqual(2, mock_est.evaluate.call_count)
+    self.assertEqual(2, mock_est.times_export_was_called)
+    self.assertEqual(1, mock_est.times_final_export_was_called)
+
+  def test_evaluate_listener_before_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    mock_est.evaluate.side_effect = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec, hooks=[])
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def before_eval(self):
+        self.call_count += 1
+        return  self.call_count == 1
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec,
+        continuous_eval_listener=listener).run_evaluator()
+
+    # Before_eval returns False during the second time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(2, listener.call_count)
+
+  def test_evaluate_listener_after_eval(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    # Without early stopping, this eval will be run twice.
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def after_eval(self, eval_result):
+        self.call_count += 1
+        self.eval_result = eval_result
+        return False
+
+    listener = _Listener()
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    training._TrainingExecutor(
+        mock_est, mock_train_spec, eval_spec,
+        continuous_eval_listener=listener).run_evaluator()
+
+    # after_eval returns False during the first time, so, evaluate will be
+    # called once.
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(1, listener.call_count)
+    self.assertAllEqual(expected_eval_metrics[0], listener.eval_result.metrics)
+    self.assertEqual('path_1', listener.eval_result.checkpoint_path)
+
+  def test_final_export_is_true_in_the_end(self):
+    training_max_step = 200
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: training_max_step // 2},
+        {_GLOBAL_STEP_KEY: training_max_step}
+    ]
+    mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2']
+
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    mock_est.times_export_fn_was_called = 0
+    mock_est.times_the_final_export_was_true = 0
+    def export(estimator, export_path, checkpoint_path, eval_result,
+               is_the_final_export):
+      del export_path, checkpoint_path, eval_result
+      estimator.times_export_fn_was_called += 1
+      if is_the_final_export:
+        estimator.times_the_final_export_was_true += 1
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_how_many_times_export_is_called'
+    exporter.export = export
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        start_delay_secs=0,
+        throttle_secs=0,
+        exporters=exporter)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    executor.run_evaluator()
+
+    self.assertEqual(2, mock_est.evaluate.call_count)
+    self.assertEqual(2, mock_est.times_export_fn_was_called)
+    self.assertEqual(1, mock_est.times_the_final_export_was_true)
+
+  def test_skip_evaluation_due_to_ckpt(self):
+    training_max_step = 200
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: training_max_step // 2},
+        {_GLOBAL_STEP_KEY: training_max_step}
+    ]
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    # First two items are invalid, next two items are same.
+    mock_est.latest_checkpoint.side_effect = [
+        None, '', 'same', 'same', 'path_2'
+    ]
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=2)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      executor.run_evaluator()
+
+    # Three checkpoint paths are invalid.
+    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
+    self.assertEqual(2, mock_est.evaluate.call_count)
+
+    # Two warning logs are expected (last warning time is reset after a
+    # successuful evaluation)
+    self.assertEqual(2, mock_log.call_count)
+
+  def test_warning_if_throttle_secs_is_zero(self):
+    training_max_step = 200
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate.side_effect = [
+        {_GLOBAL_STEP_KEY: training_max_step}
+    ]
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    # We need to make the first one invalid, so it will check the
+    # throttle_secs=0.
+    mock_est.latest_checkpoint.side_effect = [None, 'path']
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      executor.run_evaluator()
+
+    # First ckpt is invalid.
+    self.assertEqual(2, mock_est.latest_checkpoint.call_count)
+    self.assertEqual(1, mock_est.evaluate.call_count)
+
+    self.assertRegexpMatches(str(mock_log.call_args), _INPROPER_THROTTL_SECS)
+
+  def test_continuous_eval_listener_eval_result(self):
+    training_max_step = 200
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    expected_eval_metrics = [{
+        _GLOBAL_STEP_KEY: training_max_step // 2
+    }, {
+        _GLOBAL_STEP_KEY: training_max_step
+    }]
+    mock_est.evaluate.side_effect = expected_eval_metrics
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.eval_results = []
+
+      def after_eval(self, eval_result):
+        self.eval_results.append(eval_result)
+        return True
+
+    continuous_eval_listener = _Listener()
+
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    # First two items are invalid, next two items are same.
+    mock_est.latest_checkpoint.side_effect = [
+        None, '', 'same', 'same', 'path_2'
+    ]
+    expected_eval_results = [
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(training._EvalStatus.MISSING_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[0],
+            checkpoint_path='same'),
+        training._EvalResult(training._EvalStatus.NO_NEW_CHECKPOINT),
+        training._EvalResult(
+            training._EvalStatus.EVALUATED,
+            metrics=expected_eval_metrics[1],
+            checkpoint_path='path_2'),
+    ]
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=0)
+
+    executor = training._TrainingExecutor(
+        mock_est,
+        mock_train_spec,
+        eval_spec,
+        continuous_eval_listener=continuous_eval_listener)
+    executor.run_evaluator()
+
+    # Three checkpoint paths are invalid.
+    self.assertEqual(5, mock_est.latest_checkpoint.call_count)
+    self.assertEqual(2, mock_est.evaluate.call_count)
+
+    self.assertEqual(5, len(continuous_eval_listener.eval_results))
+    for i, result in enumerate(continuous_eval_listener.eval_results):
+      self.assertEqual(expected_eval_results[i].status, result.status)
+      self.assertAllEqual(expected_eval_results[i].metrics, result.metrics)
+      self.assertEqual(expected_eval_results[i].checkpoint_path,
+                       result.checkpoint_path)
+
+  def test_sleep_start_delay_secs(self):
+    training_max_step = 200
+    start_delay_secs = 123
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: training_max_step}
+    mock_est.model_dir = compat.as_bytes(test.get_temp_dir())
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = training_max_step
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, steps=2, hooks=[_FakeHook()], name='cont_eval',
+        start_delay_secs=start_delay_secs, throttle_secs=0)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    with test.mock.patch.object(time, 'sleep') as mock_sleep:
+      executor.run_evaluator()
+      mock_sleep.assert_called_with(start_delay_secs)
+      self.assertTrue(mock_est.evaluate.called)
+
+  @test.mock.patch.object(time, 'time')
+  @test.mock.patch.object(time, 'sleep')
+  def test_throttle_secs(self, mock_sleep, mock_time):
+    throttle_secs = 123
+    operation_secs = 12
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1, start_delay_secs=0, throttle_secs=throttle_secs)
+
+    mock_time.side_effect = [921, 921 + operation_secs]
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    # Disable logging as it calls time.time also.
+    with test.mock.patch.object(logging, 'info'):
+      executor.run_evaluator()
+    mock_sleep.assert_called_with(throttle_secs - operation_secs)
+    self.assertTrue(mock_est.evaluate.called)
+
+  def test_that_export_is_called(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
+
+    def export(estimator, *args, **kwargs):
+      del args, kwargs
+      estimator.export_was_called = True
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_whether_export_is_called'
+    exporter.export = export
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        steps=2,
+        start_delay_secs=0,
+        throttle_secs=0,
+        exporters=exporter)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    executor.run_evaluator()
+
+    # Verify that export was called on the right estimator.
+    self.assertTrue(mock_est.export_was_called)
+
+  def test_errors_out_if_evaluate_returns_empty_dict(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
+                                  start_delay_secs=0, throttle_secs=0)
+    mock_est.evaluate.return_value = {}
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+      executor.run_evaluator()
+
+  def test_errors_out_if_evaluate_returns_non_dict(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
+                                  start_delay_secs=0, throttle_secs=0)
+    mock_est.evaluate.return_value = 123
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR):
+      executor.run_evaluator()
+
+  def test_errors_out_if_evaluate_returns_dict_without_global_step(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    train_spec = training.TrainSpec(input_fn=lambda: 1)
+    eval_spec = training.EvalSpec(input_fn=(lambda: 1),
+                                  start_delay_secs=0, throttle_secs=0)
+    mock_est.evaluate.return_value = {'loss': 123}
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(ValueError,
+                                 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
+      executor.run_evaluator()
+
+
+class TrainingExecutorRunPsTest(test.TestCase):
+  """Tests run_ps of _TrainingExecutor."""
+
+  @test.mock.patch.object(server_lib, 'Server')
+  def test_std_server(self, mock_server):
+    mock_server_instance = test.mock.Mock()
+    mock_server.return_value = mock_server_instance
+
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    executor.run_ps()
+
+    mock_server.assert_called_with(
+        mock_est.config.cluster_spec,
+        job_name=mock_est.config.task_type,
+        task_index=mock_est.config.task_id,
+        config=test.mock.ANY,
+        protocol=None,
+        start=False)
+
+    self.assertTrue(mock_server_instance.start.called)
+    self.assertTrue(mock_server_instance.join.called)
+
+  def test_fail_with_empty_cluster_spec(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = None
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'ps'
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(mock_est, mock_train_spec,
+                                 mock_eval_spec).run_ps()
+
+  def test_fail_with_empty_master(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
+    mock_est.config.master = ''
+    mock_est.config.task_type = 'ps'
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(mock_est, mock_train_spec,
+                                 mock_eval_spec).run_ps()
+
+  def test_fail_with_empty_task_type(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = ''
+    mock_est.config.task_id = 2
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(mock_est, mock_train_spec,
+                                 mock_eval_spec).run_ps()
+
+  def test_fail_with_none_task_id(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig)
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'ps': ['dummy']})
+    mock_est.config.master = 'grpc://...'
+    mock_est.config.task_type = 'ps'
+    mock_est.config.task_id = None
+
+    with self.assertRaisesRegexp(RuntimeError,
+                                 _INVALID_CONFIG_FOR_STD_SERVER_MSG):
+      training._TrainingExecutor(mock_est, mock_train_spec,
+                                 mock_eval_spec).run_ps()
+
+
+class StopAtSecsHookTest(test.TestCase):
+  """Tests StopAtSecsHook."""
+
+  @test.mock.patch.object(time, 'time')
+  def test_stops_after_time(self, mock_time):
+    mock_time.return_value = 1484695987.209386
+    hook = training._StopAtSecsHook(1000)
+    with ops.Graph().as_default():
+      no_op = control_flow_ops.no_op()
+      # some time passed before training starts
+      mock_time.return_value += 250
+      with monitored_session.MonitoredSession(hooks=[hook]) as sess:
+        self.assertFalse(sess.should_stop())
+        sess.run(no_op)
+        self.assertFalse(sess.should_stop())
+        mock_time.return_value += 500
+        sess.run(no_op)
+        self.assertFalse(sess.should_stop())
+        mock_time.return_value += 400
+        sess.run(no_op)
+        self.assertFalse(sess.should_stop())
+        mock_time.return_value += 200
+        sess.run(no_op)
+        self.assertTrue(sess.should_stop())
+
+
+class TrainingExecutorRunLocalTest(test.TestCase):
+  """Tests run_local of _TrainingExecutor."""
+
+  def _model_fn(self, features, labels, mode):
+    del labels
+    with ops.control_dependencies([features]):
+      train_op = state_ops.assign_add(training_util.get_global_step(), 1)
+    return model_fn_lib.EstimatorSpec(
+        mode,
+        loss=constant_op.constant(0.),
+        train_op=train_op,
+        predictions=constant_op.constant([[10.]]),
+        eval_metric_ops={'mean_of_features': metrics_lib.mean(features)})
+
+  def _input_fn(self, repeat=True):
+    ds = dataset_ops.Dataset.from_tensors([1])
+    if repeat:
+      return ds.repeat()
+    return ds
+
+  def unique_checkpoint_every_time_fn(self):
+    return 'checkpoint_path_%s/' % random.random()
+
+  def test_runs_evaluate_with_every_new_checkpoint(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+
+    mock_est.times_export_was_called = 0
+    mock_est.times_final_export_was_called = 0
+    def export(estimator, export_path, checkpoint_path, eval_result,
+               is_the_final_export):
+      del export_path, checkpoint_path, eval_result
+      estimator.times_export_was_called += 1
+      # final_export is happened at the end.
+      self.assertEqual(0, estimator.times_final_export_was_called)
+      if is_the_final_export:
+        estimator.times_final_export_was_called += 1
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_how_many_times_export_is_called'
+    exporter.export = export
+
+    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=22)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        throttle_secs=0,
+        exporters=exporter)
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_local()
+
+    self.assertEqual(1, mock_est.train.call_count)
+    self.assertEqual(3, mock_est.evaluate.call_count)
+    self.assertEqual(3, mock_est.times_export_was_called)
+    self.assertEqual(1, mock_est.times_final_export_was_called)
+
+  def test_runs_with_eval_listener_before_eval(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn
+
+    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12)
+    eval_spec = training.EvalSpec(input_fn=lambda: self._input_fn(repeat=False))
+    mock_est.evaluate.side_effect = [{_GLOBAL_STEP_KEY: train_spec.max_steps}]
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def before_eval(self):
+        self.call_count += 1
+        return False  # Will stop the run_local before first eval.
+
+    listener = _Listener()
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, eval_spec, continuous_eval_listener=listener)
+    executor.run_local()
+
+    self.assertEqual(1, mock_est.train.call_count)
+    self.assertEqual(0, mock_est.evaluate.call_count)
+
+  def test_runs_with_eval_listener_after_eval(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+
+    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=3000)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
+
+    class _Listener(training._ContinuousEvalListener):
+
+      def __init__(self):
+        self.call_count = 0
+
+      def after_eval(self, eval_result):
+        self.call_count += 1
+        return False  # Will stop the run_local after first eval.
+
+    listener = _Listener()
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, eval_spec, continuous_eval_listener=listener)
+    metrics, _ = executor.run_local()  # pylint: disable=assignment-from-no-return
+
+    self.assertEqual(1, mock_est.train.call_count)
+    self.assertEqual(1, mock_est.evaluate.call_count)
+    self.assertEqual(1, listener.call_count)
+    # Should be less than max_steps since listener did early stopping.
+    self.assertLess(metrics[_GLOBAL_STEP_KEY], train_spec.max_steps)
+
+  def test_handles_no_new_checkpoint_found(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        # disable saving checkpoint
+        config=run_config_lib.RunConfig(
+            save_checkpoints_steps=None, save_checkpoints_secs=None))
+    train_spec = training.TrainSpec(
+        input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        hooks=[_FakeHook()],
+        throttle_secs=100)
+
+    executor = training._TrainingExecutor(est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(ValueError,
+                                 'There should be a CheckpointSaverHook'):
+      executor.run_local()
+
+  def test_final_export_is_true_in_the_end(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=10))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+
+    mock_est.times_export_fn_was_called = 0
+    mock_est.times_the_final_export_was_true = 0
+    def export(estimator, export_path, checkpoint_path, eval_result,
+               is_the_final_export):
+      del export_path, checkpoint_path, eval_result
+      estimator.times_export_fn_was_called += 1
+      if is_the_final_export:
+        estimator.times_the_final_export_was_true += 1
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_how_many_times_export_is_called'
+    exporter.export = export
+
+    train_spec = training.TrainSpec(
+        input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        throttle_secs=0,
+        exporters=exporter)
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_local()
+
+    self.assertEqual(1, mock_est.train.call_count)
+    self.assertEqual(2, mock_est.evaluate.call_count)
+    self.assertEqual(2, mock_est.times_export_fn_was_called)
+    self.assertEqual(1, mock_est.times_the_final_export_was_true)
+
+  def test_train_and_evaluate_args(self):
+    est = estimator_lib.Estimator(model_fn=self._model_fn)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(
+        input_fn=self._input_fn, max_steps=300, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        steps=2,
+        hooks=[_FakeHook()],
+        name='local_eval')
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    executor.run_local()
+
+    mock_est.evaluate.assert_called_with(
+        name=eval_spec.name,
+        input_fn=eval_spec.input_fn,
+        steps=eval_spec.steps,
+        checkpoint_path=est.latest_checkpoint(),
+        hooks=eval_spec.hooks)
+
+    train_args = mock_est.train.call_args[1]
+    self.assertEqual(list(train_spec.hooks), list(train_args['hooks']))
+    self.assertEqual(train_spec.input_fn, train_args['input_fn'])
+    self.assertEqual(train_spec.max_steps, train_args['max_steps'])
+
+  def test_train_with_no_eval_spec_fails(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
+    eval_spec = None
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_SPEC_MSG):
+      executor.run_local()
+
+  def test_train_hooks(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
+    train_spec = training.TrainSpec(
+        input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(input_fn=lambda: 1, steps=2)
+    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps}
+    extra_hooks = [_FakeHook()]
+
+    executor = training._TrainingExecutor(
+        mock_est, train_spec, eval_spec, train_hooks=extra_hooks)
+    executor.run_local()
+
+    train_args = mock_est.train.call_args[1]
+    self.assertEqual(
+        list(train_spec.hooks) + extra_hooks, [
+            h for h in train_args['hooks']
+            if not isinstance(h, training._StopAtSecsHook)
+        ])
+
+  def test_that_export_is_called_with_run_local(self):
+    est = estimator_lib.Estimator(model_fn=self._model_fn)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(input_fn=self._input_fn, max_steps=12)
+    mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps}
+
+    def export(estimator, *args, **kwargs):
+      del args, kwargs
+      estimator.export_was_called = True
+      return 'path_to_export'
+
+    exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter)
+    exporter.name = 'see_whether_export_is_called'
+    exporter.export = export
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        steps=2,
+        start_delay_secs=0,
+        throttle_secs=213,
+        exporters=exporter)
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    # pylint: disable=assignment-from-no-return
+    _, export_results = executor.run_local()
+    # pylint: enable=assignment-from-no-return
+
+    self.assertTrue(mock_est.export_was_called)
+    self.assertEqual(export_results, ['path_to_export'])
+
+  def test_errors_out_if_evaluate_returns_empty_dict(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(input_fn=self._input_fn)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
+    mock_est.evaluate.return_value = {}
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_EMPTY_EVAL_RESULT_ERR):
+      executor.run_local()
+
+  def test_errors_out_if_evaluate_returns_non_dict(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(input_fn=self._input_fn)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
+    mock_est.evaluate.return_value = 123
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(TypeError, _INVALID_EVAL_RESULT_TYPE_ERR):
+      executor.run_local()
+
+  def test_errors_out_if_evaluate_returns_dict_without_global_step(self):
+    est = estimator_lib.Estimator(
+        model_fn=self._model_fn,
+        config=run_config_lib.RunConfig(save_checkpoints_steps=2))
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(input_fn=self._input_fn)
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False), throttle_secs=0)
+    mock_est.evaluate.return_value = {'loss': 123}
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    with self.assertRaisesRegexp(ValueError,
+                                 _MISSING_GLOBAL_STEP_IN_EVAL_RESULT_ERR):
+      executor.run_local()
+
+  def test_train_and_evaluate_return_metrics(self):
+    est = estimator_lib.Estimator(model_fn=self._model_fn)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, wraps=est)
+    train_spec = training.TrainSpec(
+        input_fn=self._input_fn, max_steps=12, hooks=[_FakeHook()])
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: self._input_fn(repeat=False),
+        steps=2,
+        hooks=[_FakeHook()],
+        name='local_eval')
+
+    executor = training._TrainingExecutor(mock_est, train_spec, eval_spec)
+    # pylint: disable=assignment-from-no-return
+    metrics, _ = executor.run_local()
+    # pylint: enable=assignment-from-no-return
+    self.assertEqual(metrics['global_step'], 12)
+
+
+class TrainAndEvaluateRunTest(test.TestCase):
+
+  def _test_run_task_and_executor(self, run_config):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = run_config
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+
+    executor.call_task = {}
+
+    def task_fn(name):
+
+      def _fn():
+        executor.call_task[name] = 1
+
+      return _fn
+
+    executor.run_chief = task_fn('chief')
+    executor.run_master = task_fn('master')
+    executor.run_ps = task_fn('ps')
+    executor.run_evaluator = task_fn('evaluator')
+    executor.run_worker = task_fn('worker')
+    executor.run_local = task_fn('local')
+    return executor
+
+  def test_run_chief(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_CHIEF))
+    executor.run()
+    self.assertEqual(1, executor.call_task['chief'])
+
+  def test_run_worker(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_WORKER))
+    executor.run()
+    self.assertEqual(1, executor.call_task['worker'])
+
+  def test_run_ps(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_PS))
+    executor.run()
+    self.assertEqual(1, executor.call_task['ps'])
+
+  def test_run_evaluator(self):
+    executor = self._test_run_task_and_executor(
+        run_config=_create_run_config_with_cluster_spec(
+            _TF_CONFIG_FOR_EVALUATOR))
+    executor.run()
+    self.assertEqual(1, executor.call_task['evaluator'])
+
+  def test_run_local(self):
+    executor = self._test_run_task_and_executor(
+        run_config=run_config_lib.RunConfig())
+    executor.run()
+    self.assertEqual(1, executor.call_task['local'])
+
+  def test_invalid_local_task(self):
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            'local': ['hos1:1'],
+        },
+        'task': {
+            'type': 'local',  # invalid task type.
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_LOCAL_TASK_WITH_CLUSTER):
+      executor.run()
+
+  def test_unsupported_task_due_to_missing_run_task(self):
+    unsupported_task = 'alloc'
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            unsupported_task: ['hos1:1'],
+        },
+        'task': {
+            'type': unsupported_task,
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
+      executor.run()
+
+  def test_unsupported_task_due_to_not_callable(self):
+    unsupported_task = 'alloc'
+    tf_config = {
+        'cluster': {
+            run_config_lib.TaskType.CHIEF: ['host0:0'],
+            unsupported_task: ['hos1:1'],
+        },
+        'task': {
+            'type': unsupported_task,
+            'index': 0
+        }
+    }
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = _create_run_config_with_cluster_spec(tf_config)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    executor.run_alloc = 123  # not callable
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TO_RUN):
+      executor.run()
+
+  def test_invalid_task_type(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est.config = test.mock.Mock()
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_eval_spec = test.mock.Mock(spec=training.EvalSpec)
+
+    mock_est.config = test.mock.Mock()
+    mock_est.config.cluster_spec = server_lib.ClusterSpec({'1': ['dummy']})
+    mock_est.config.task_type = ''
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec,
+                                          mock_eval_spec)
+    with self.assertRaisesRegexp(ValueError, _INVALID_TASK_TYPE):
+      executor.run()
+
+
+class TrainAndEvaluateIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _as_label(self, data_in_float):
+    return np.rint(data_in_float).astype(np.int64)
+
+  def _get_exporter(self, name, fc):
+    feature_spec = feature_column.make_parse_example_spec(fc)
+    serving_input_receiver_fn = (
+        export_lib.build_parsing_serving_input_receiver_fn(feature_spec))
+    return exporter_lib.LatestExporter(
+        name, serving_input_receiver_fn=serving_input_receiver_fn)
+
+  def _extract_loss_and_global_step(self, event_folder):
+    """Returns the loss and global step in last event."""
+    event_paths = glob.glob(os.path.join(event_folder, 'events*'))
+
+    loss = None
+    global_step_count = None
+
+    for e in summary_iterator.summary_iterator(event_paths[-1]):
+      current_loss = None
+      for v in e.summary.value:
+        if v.tag == 'loss':
+          current_loss = v.simple_value
+
+      # If loss is not found, global step is meaningless.
+      if current_loss is None:
+        continue
+
+      current_global_step = e.step
+      if global_step_count is None or current_global_step > global_step_count:
+        global_step_count = current_global_step
+        loss = current_loss
+
+    return (loss, global_step_count)
+
+  def test_complete_flow_with_non_distributed_configuration(self):
+    n_classes = 3
+    input_dimension = 2
+    batch_size = 10
+
+    eval_name = 'foo'
+    exporter_name = 'saved_model_exporter'
+
+    # max_steps should be larger than save_summary_steps
+    max_steps = 10
+    save_summary_steps = 9
+
+    data = np.linspace(
+        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
+
+    # learn y = x
+    def train_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices(({
+          'x': x_data
+      }, y_data)).batch(batch_size).repeat().shuffle(1000)
+
+    def eval_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices(({
+          'x': x_data
+      }, y_data)).batch(batch_size)
+
+    def predict_input_fn():
+      return dataset_ops.Dataset.from_tensor_slices({
+          'x': x_data
+      }).batch(batch_size)
+
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+
+    est = dnn.DNNClassifier(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        config=run_config_lib.RunConfig(save_summary_steps=save_summary_steps),
+        model_dir=self._model_dir)
+
+    train_spec = training.TrainSpec(input_fn=train_input_fn,
+                                    max_steps=max_steps)
+
+    eval_spec = training.EvalSpec(
+        name=eval_name,
+        input_fn=eval_input_fn,
+        steps=None,
+        exporters=self._get_exporter(exporter_name, feature_columns),
+        throttle_secs=0)
+
+    training.train_and_evaluate(est, train_spec, eval_spec)
+
+    # Make sure nothing is stuck in limbo.
+    writer_cache.FileWriterCache.clear()
+
+    # Examine the training events. Use a range to check global step to avoid
+    # flakyness due to global step race condition.
+    training_loss, _ = self._extract_loss_and_global_step(est.model_dir)
+    self.assertIsNotNone(training_loss)
+
+    # Examine the eval events. The global step should be accurate.
+    eval_loss, eval_global_step = self._extract_loss_and_global_step(
+        event_folder=est.eval_dir(eval_name))
+    self.assertIsNotNone(eval_loss)
+    self.assertEqual(max_steps, eval_global_step)
+
+    # Examine the export folder.
+    export_dir = os.path.join(os.path.join(est.model_dir, 'export'),
+                              exporter_name)
+    self.assertTrue(gfile.Exists(export_dir))
+
+    # Examine the ckpt for predict.
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index 9afc4b2c04..fb110c4b7b 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -1,4 +1,4 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,142 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""util python module.
 
-Importing from tensorflow.python.estimator
-is unsupported and will soon break!
-"""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
+"""Utilities for Estimators."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_estimator.python.estimator import util
+import os
+import time
 
-# Include attrs that start with single underscore.
-util.__all__ = [s for s in dir(util) if not s.startswith('__')]
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import training
+from tensorflow.python.util import compat
+from tensorflow.python.util import function_utils
 
-# pylint: disable=g-import-not-at-top
-from tensorflow_estimator.python.estimator.util import *
+fn_args = function_utils.fn_args
+
+# When we create a timestamped directory, there is a small chance that the
+# directory already exists because another process is also creating these
+# directories. In this case we just wait one second to get a new timestamp and
+# try again. If this fails several times in a row, then something is seriously
+# wrong.
+MAX_DIRECTORY_CREATION_ATTEMPTS = 10
+
+
+def get_timestamped_dir(dir_base):
+  """Builds a path to a new subdirectory within the base directory.
+
+  The subdirectory will be named using the current time.
+  This guarantees monotonically increasing directory numbers even across
+  multiple runs of the pipeline.
+  The timestamp used is the number of seconds since epoch UTC.
+
+  Args:
+    dir_base: A string containing a directory to create the subdirectory under.
+
+  Returns:
+    The full path of the new subdirectory (which is not actually created yet).
+
+  Raises:
+    RuntimeError: if repeated attempts fail to obtain a unique timestamped
+      directory name.
+  """
+  attempts = 0
+  while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS:
+    timestamp = int(time.time())
+
+    result_dir = os.path.join(
+        compat.as_bytes(dir_base), compat.as_bytes(str(timestamp)))
+    if not gfile.Exists(result_dir):
+      # Collisions are still possible (though extremely unlikely): this
+      # directory is not actually created yet, but it will be almost
+      # instantly on return from this function.
+      return result_dir
+    time.sleep(1)
+    attempts += 1
+    logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format(
+        result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
+  raise RuntimeError('Failed to obtain a unique export directory name after '
+                     '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
+
+
+def parse_input_fn_result(result):
+  """Gets features, labels, and hooks from the result of an Estimator input_fn.
+
+  Args:
+    result: output of an input_fn to an estimator, which should be one of:
+
+      * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a
+          tuple (features, labels) with same constraints as below.
+      * A tuple (features, labels): Where `features` is a `Tensor` or a
+        dictionary of string feature name to `Tensor` and `labels` is a
+        `Tensor` or a dictionary of string label name to `Tensor`. Both
+        `features` and `labels` are consumed by `model_fn`. They should
+        satisfy the expectation of `model_fn` from inputs.
+
+  Returns:
+    Tuple of features, labels, and input_hooks, where features are as described
+    above, labels are as described above or None, and input_hooks are a list
+    of SessionRunHooks to be included when running.
+
+  Raises:
+    ValueError: if the result is a list or tuple of length != 2.
+  """
+  input_hooks = []
+  try:
+    # We can't just check whether this is a tf.data.Dataset instance here,
+    # as this is plausibly a PerDeviceDataset. Try treating as a dataset first.
+    iterator = result.make_initializable_iterator()
+  except AttributeError:
+    # Not a dataset or dataset-like-object. Move along.
+    pass
+  else:
+    input_hooks.append(_DatasetInitializerHook(iterator))
+    result = iterator.get_next()
+  return parse_iterator_result(result) + (input_hooks,)
+
+
+def parse_iterator_result(result):
+  """Gets features, labels from result."""
+  if isinstance(result, (list, tuple)):
+    if len(result) != 2:
+      raise ValueError(
+          'input_fn should return (features, labels) as a len 2 tuple.')
+    return result[0], result[1]
+  return result, None
+
+
+class _DatasetInitializerHook(training.SessionRunHook):
+  """Creates a SessionRunHook that initializes the passed iterator."""
+
+  def __init__(self, iterator):
+    self._iterator = iterator
+
+  def begin(self):
+    self._initializer = self._iterator.initializer
+
+  def after_create_session(self, session, coord):
+    del coord
+    session.run(self._initializer)
+
+
+class StrategyInitFinalizeHook(training.SessionRunHook):
+  """Creates a SessionRunHook that initializes and shutsdown devices."""
+
+  def __init__(self, initialization_fn, finalize_fn):
+    self._initialization_fn = initialization_fn
+    self._finalize_fn = finalize_fn
+
+  def begin(self):
+    # We only create the init ops, but don't run it. We rely on SessionManager
+    # to run it for us.
+    self._init_ops = self._initialization_fn()
+    self._finalize_ops = self._finalize_fn()
+
+  def end(self, session):
+    logging.info('Finalize system.')
+    session.run(self._finalize_ops)
diff --git a/tensorflow/python/estimator/util_test.py b/tensorflow/python/estimator/util_test.py
new file mode 100644
index 0000000000..d440c454dc
--- /dev/null
+++ b/tensorflow/python/estimator/util_test.py
@@ -0,0 +1,102 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for util.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import util
+from tensorflow.python.framework import constant_op
+from tensorflow.python.platform import test
+from tensorflow.python.training import training
+
+
+class UtilTest(test.TestCase):
+  """Tests for miscellaneous Estimator utils."""
+
+  def test_parse_input_fn_result_tuple(self):
+    def _input_fn():
+      features = constant_op.constant(np.arange(100))
+      labels = constant_op.constant(np.arange(100, 200))
+      return features, labels
+
+    features, labels, hooks = util.parse_input_fn_result(_input_fn())
+
+    with self.cached_session() as sess:
+      vals = sess.run([features, labels])
+
+    self.assertAllEqual(vals[0], np.arange(100))
+    self.assertAllEqual(vals[1], np.arange(100, 200))
+    self.assertEqual(hooks, [])
+
+  def test_parse_input_fn_result_dataset(self):
+    def _input_fn():
+      features = np.expand_dims(np.arange(100), 0)
+      labels = np.expand_dims(np.arange(100, 200), 0)
+      return dataset_ops.Dataset.from_tensor_slices((features, labels))
+
+    features, labels, hooks = util.parse_input_fn_result(_input_fn())
+
+    with training.MonitoredSession(hooks=hooks) as sess:
+      vals = sess.run([features, labels])
+
+    self.assertAllEqual(vals[0], np.arange(100))
+    self.assertAllEqual(vals[1], np.arange(100, 200))
+    self.assertIsInstance(hooks[0], util._DatasetInitializerHook)
+
+  def test_parse_input_fn_result_features_only(self):
+    def _input_fn():
+      return constant_op.constant(np.arange(100))
+
+    features, labels, hooks = util.parse_input_fn_result(_input_fn())
+
+    with self.cached_session() as sess:
+      vals = sess.run([features])
+
+    self.assertAllEqual(vals[0], np.arange(100))
+    self.assertEqual(labels, None)
+    self.assertEqual(hooks, [])
+
+  def test_parse_input_fn_result_features_only_dataset(self):
+    def _input_fn():
+      features = np.expand_dims(np.arange(100), 0)
+      return dataset_ops.Dataset.from_tensor_slices(features)
+
+    features, labels, hooks = util.parse_input_fn_result(_input_fn())
+
+    with training.MonitoredSession(hooks=hooks) as sess:
+      vals = sess.run([features])
+
+    self.assertAllEqual(vals[0], np.arange(100))
+    self.assertEqual(labels, None)
+    self.assertIsInstance(hooks[0], util._DatasetInitializerHook)
+
+  def test_parse_input_fn_result_invalid(self):
+    def _input_fn():
+      features = np.expand_dims(np.arange(100), 0)
+      labels = np.expand_dims(np.arange(100, 200), 0)
+      return dataset_ops.Dataset.from_tensor_slices((features, labels, labels))
+
+    with self.assertRaisesRegexp(ValueError, 'input_fn should return'):
+      util.parse_input_fn_result(_input_fn())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 809a73c278..82acde584e 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -157,7 +157,7 @@ py_test(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
-        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:numpy_io",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/tools/api/generator/BUILD b/tensorflow/python/tools/api/generator/BUILD
index 9fd069c5be..90be2cc4f7 100644
--- a/tensorflow/python/tools/api/generator/BUILD
+++ b/tensorflow/python/tools/api/generator/BUILD
@@ -4,6 +4,7 @@
 licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow/python/tools/api/generator:api_gen.bzl", "ESTIMATOR_API_INIT_FILES")
 load("//tensorflow/python/tools/api/generator:api_init_files.bzl", "TENSORFLOW_API_INIT_FILES")
 load("//tensorflow/python/tools/api/generator:api_init_files_v1.bzl", "TENSORFLOW_API_INIT_FILES_V1")
 
@@ -65,6 +66,23 @@ py_test(
     ],
 )
 
+py_test(
+    name = "estimator_doc_srcs_test",
+    srcs = ["doc_srcs_test.py"],
+    args = [
+        "--package=tensorflow.python.estimator",
+        "--api_name=estimator",
+    ] + ESTIMATOR_API_INIT_FILES,
+    main = "doc_srcs_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":doc_srcs",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:no_contrib",
+        "//tensorflow/python/estimator:estimator_py",
+    ],
+)
+
 py_test(
     name = "output_init_files_test",
     srcs = ["output_init_files_test.py"],
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
index 32b84e90ce..082e26b99b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
index db7776b5bf..7cc4191eb3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-baseline-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
index 68145735bd..9694268199 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-best-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.BestExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.BestExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index fa352907c0..ef3409b1b5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index 154b35f306..775130468f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
index ce6040d0f2..718f415a77 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
index e85007e16e..b23c019d6c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
index 8a55bb835f..caa9e3f1de 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
index 2c4128ec48..1f5e650940 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-d-n-n-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index 376becc3f9..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
index 9d270a87ab..ebd3869c9b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Estimator"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
index 23c2544fe4..db83ba1bd8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-eval-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EvalSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "exporters"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
index 6c3f0fd910..035af70e52 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-exporter.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Exporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
index e030d401ea..ee37b1fa21 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.FinalExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.FinalExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.FinalExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
index d67f2bd625..2a9d029029 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-latest-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LatestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.LatestExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.LatestExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
index 4b5de2e245..53ec5a0c78 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
index 0d1510e9ab..3791162619 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-linear-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
index bf7c1abcd8..6a1c24fa63 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-mode-keys.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.ModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
index 827b1ac5a5..269e18a0a7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-run-config.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.RunConfig"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.run_config.RunConfig\'>"
+  is_instance: "<class \'tensorflow.python.estimator.run_config.RunConfig\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "cluster_spec"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
index 1d9f51a20e..7d2f77438a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-train-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.TrainSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "hooks"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
index dca2c1fe11..43f5343359 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-warm-start-settings.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.WarmStartSettings"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "ckpt_to_initialize_from"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
index 52874dd9b9..2df1840c4a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-classification-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ClassificationOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ClassificationOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ClassificationOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "classes"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
index 964c315e97..fa62e8ced8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-export-output.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.export.ExportOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
index bb82bc9e58..e0160b10ce 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-predict-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.PredictOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.PredictOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.PredictOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "outputs"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
index 8522834433..905f0e0553 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-regression-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.RegressionOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.RegressionOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.RegressionOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "value"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
index a0371a1663..d71b2a4300 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
index da9d05df23..4fe92643bf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.TensorServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
index 32b84e90ce..082e26b99b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
index db7776b5bf..7cc4191eb3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-baseline-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BaselineRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.baseline.BaselineRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.baseline.BaselineRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
index 68145735bd..9694268199 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-best-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.BestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.BestExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.BestExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
index fa352907c0..ef3409b1b5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
index 154b35f306..775130468f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.estimator.BoostedTreesRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees.BoostedTreesRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.boosted_trees._BoostedTreesBase\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
index ce6040d0f2..718f415a77 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
index e85007e16e..b23c019d6c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
index 8a55bb835f..caa9e3f1de 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNLinearCombinedRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
index 2c4128ec48..1f5e650940 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-d-n-n-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.DNNRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.dnn.DNNRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index 376becc3f9..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.EstimatorSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
index 9d270a87ab..ebd3869c9b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Estimator"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
index 23c2544fe4..db83ba1bd8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-eval-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EvalSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.EvalSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "exporters"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
index 6c3f0fd910..035af70e52 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-exporter.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.Exporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
index e030d401ea..ee37b1fa21 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.FinalExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.FinalExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.FinalExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
index d67f2bd625..2a9d029029 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-latest-exporter.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LatestExporter"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.LatestExporter\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.exporter.Exporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.LatestExporter\'>"
+  is_instance: "<class \'tensorflow.python.estimator.exporter.Exporter\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "name"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
index 4b5de2e245..53ec5a0c78 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-classifier.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearClassifier"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearClassifier\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearClassifier\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
index 0d1510e9ab..3791162619 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-linear-regressor.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.LinearRegressor"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.canned.linear.LinearRegressor\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.Estimator\'>"
+  is_instance: "<class \'tensorflow.python.estimator.canned.linear.LinearRegressor\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.Estimator\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "config"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
index bf7c1abcd8..6a1c24fa63 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow.python.estimator.model_fn.ModeKeys\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
index 827b1ac5a5..269e18a0a7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-run-config.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.RunConfig"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.run_config.RunConfig\'>"
+  is_instance: "<class \'tensorflow.python.estimator.run_config.RunConfig\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "cluster_spec"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
index 1d9f51a20e..7d2f77438a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-train-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.TrainSpec"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
+  is_instance: "<class \'tensorflow.python.estimator.training.TrainSpec\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "hooks"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
index dca2c1fe11..43f5343359 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-warm-start-settings.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.WarmStartSettings"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
+  is_instance: "<class \'tensorflow.python.estimator.estimator.WarmStartSettings\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "ckpt_to_initialize_from"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
index 52874dd9b9..2df1840c4a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-classification-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ClassificationOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ClassificationOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ClassificationOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "classes"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
index 964c315e97..fa62e8ced8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-export-output.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.export.ExportOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
index bb82bc9e58..e0160b10ce 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-predict-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.PredictOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.PredictOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.PredictOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "outputs"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
index 8522834433..905f0e0553 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-regression-output.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.RegressionOutput"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.RegressionOutput\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export_output.ExportOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.RegressionOutput\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export_output.ExportOutput\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "value"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
index a0371a1663..d71b2a4300 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.ServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.ServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
index da9d05df23..4fe92643bf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.export.TensorServingInputReceiver"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
-  is_instance: "<class \'tensorflow_estimator.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
+  is_instance: "<class \'tensorflow.python.estimator.export.export.TensorServingInputReceiver\'>"
   is_instance: "<type \'tuple\'>"
   member {
     name: "features"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 0a20a85c58..dd1dca9ee8 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -57,7 +57,6 @@ REQUIRED_PACKAGES = [
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
     'tensorboard >= 1.11.0, < 1.12.0',
-    'tensorflow_estimator >= 1.10.0',
     'termcolor >= 1.1.0',
 ]
 
-- 
GitLab


From 779746b736279ade4712f40160fcb8d909190fbf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 11:47:16 -0700
Subject: [PATCH 0789/1085] Symmetric quantization support for
 tf.contrib.quantize.

This quantization is configured so that signed integers can be used for inference without biasing, so narrow_range=false implies the negative limit is slightly larger than the positive limit, as is the case for signed two's-complement representation.

PiperOrigin-RevId: 216734571
---
 .../contrib/quantize/python/quant_ops.py      |  64 +++++++---
 .../contrib/quantize/python/quant_ops_test.py | 109 +++++++++++-------
 .../contrib/quantize/python/quantize.py       |  13 +++
 .../contrib/quantize/python/quantize_graph.py |  12 ++
 .../quantize/python/quantize_graph_test.py    |   8 +-
 5 files changed, 146 insertions(+), 60 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py
index c7c099e1c6..6f659347fb 100644
--- a/tensorflow/contrib/quantize/python/quant_ops.py
+++ b/tensorflow/contrib/quantize/python/quant_ops.py
@@ -67,7 +67,8 @@ def LastValueQuantize(inputs,
                       reuse=None,
                       is_training=True,
                       num_bits=8,
-                      narrow_range=False):
+                      narrow_range=False,
+                      symmetric=False):
   """Adds a layer that collects quantization ranges as last input ranges.
 
   LastValueQuantize creates variables called 'min' and 'max', representing the
@@ -88,6 +89,8 @@ def LastValueQuantize(inputs,
     num_bits: Number of bits to use for quantization, must be between 2 and 8.
     narrow_range: Whether to use the narrow quantization range
       [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
   Returns:
     a tensor containing quantized values.
   """
@@ -140,9 +143,6 @@ def LastValueQuantize(inputs,
         batch_min = inputs
     else:
       batch_min = math_ops.reduce_min(inputs, name='BatchMin')
-    # TFLite requires that 0.0 if always in the [min; max] range.
-    batch_min = math_ops.minimum(batch_min, 0.0)
-    assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast')
 
     if per_channel:
       if input_dim >= 2:
@@ -152,9 +152,26 @@ def LastValueQuantize(inputs,
         batch_max = inputs
     else:
       batch_max = math_ops.reduce_max(inputs, name='BatchMax')
-    # TFLite requires that 0.0 if always in the [min; max] range.
-    batch_max = math_ops.maximum(batch_max, 0.0)
-    assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast')
+
+    if symmetric:
+      if narrow_range:
+        min_max_ratio = -1
+      else:
+        # In two's complement notation, the negative range is slightly larger
+        # than the positive range.
+        min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)
+
+      # TFLite requires that 0.0 if always in the [min; max] range. Because
+      # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
+      range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio)
+      range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio)
+    else:
+      # TFLite requires that 0.0 if always in the [min; max] range.
+      range_min = math_ops.minimum(batch_min, 0.0)
+      range_max = math_ops.maximum(batch_max, 0.0)
+
+    assign_min = state_ops.assign(min_var, range_min, name='AssignMinLast')
+    assign_max = state_ops.assign(max_var, range_max, name='AssignMaxLast')
 
     return _FakeQuantWithMinMaxVars(
         inputs,
@@ -175,7 +192,8 @@ def MovingAvgQuantize(inputs,
                       reuse=None,
                       is_training=True,
                       num_bits=8,
-                      narrow_range=False):
+                      narrow_range=False,
+                      symmetric=False):
   """Adds a layer that collects quantization ranges as EMAs of input ranges.
 
   MovingAvgQuantize creates variables called 'min' and 'max', representing the
@@ -197,6 +215,8 @@ def MovingAvgQuantize(inputs,
     num_bits: Number of bits to use for quantization, must be between 2 and 8.
     narrow_range: Whether to use the narrow quantization range
       [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
   Returns:
     a tensor containing quantized values.
   """
@@ -248,10 +268,6 @@ def MovingAvgQuantize(inputs,
         batch_min = inputs
     else:
       batch_min = math_ops.reduce_min(inputs, name='BatchMin')
-    # B-eng requires that 0.0 if always in the [min; max] range.
-    batch_min = math_ops.minimum(batch_min, 0.0)
-    assign_min = moving_averages.assign_moving_average(
-        min_var, batch_min, ema_decay, name='AssignMinEma')
 
     if per_channel:
       if input_dim >= 2:
@@ -261,10 +277,28 @@ def MovingAvgQuantize(inputs,
         batch_max = inputs
     else:
       batch_max = math_ops.reduce_max(inputs, name='BatchMax')
-    # B-eng requires that 0.0 if always in the [min; max] range.
-    batch_max = math_ops.maximum(batch_max, 0.0)
+
+    if symmetric:
+      if narrow_range:
+        min_max_ratio = -1
+      else:
+        # In two's complement notation, the negative range is slightly larger
+        # than the positive range.
+        min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)
+
+      # TFLite requires that 0.0 if always in the [min; max] range. Because
+      # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
+      range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio)
+      range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio)
+    else:
+      # TFLite requires that 0.0 if always in the [min; max] range.
+      range_min = math_ops.minimum(batch_min, 0.0)
+      range_max = math_ops.maximum(batch_max, 0.0)
+
+    assign_min = moving_averages.assign_moving_average(
+        min_var, range_min, ema_decay, name='AssignMinEma')
     assign_max = moving_averages.assign_moving_average(
-        max_var, batch_max, ema_decay, name='AssignMaxEma')
+        max_var, range_max, ema_decay, name='AssignMaxEma')
 
     return _FakeQuantWithMinMaxVars(
         inputs,
diff --git a/tensorflow/contrib/quantize/python/quant_ops_test.py b/tensorflow/contrib/quantize/python/quant_ops_test.py
index a45840009b..36d2af94e0 100644
--- a/tensorflow/contrib/quantize/python/quant_ops_test.py
+++ b/tensorflow/contrib/quantize/python/quant_ops_test.py
@@ -29,51 +29,55 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 _MIN_MAX_VARS = 'min_max_vars'
+_SYMMETRIC_RANGE_RATIO = 0.9921875  # 127 / 128
 
 
 class QuantOpsTest(googletest.TestCase):
 
   def testLastValueQuantizeTrainingAssign(self):
-    g = ops.Graph()
-    with session.Session(graph=g) as sess:
-      x = array_ops.placeholder(dtypes.float32, shape=[2])
-      y = quant_ops.LastValueQuantize(
-          x,
-          init_min=0.0,
-          init_max=0.0,
-          is_training=True,
-          vars_collection=_MIN_MAX_VARS)
+    min_value, max_value = self._GetMinMaxValues(quant_ops.LastValueQuantize,
+                                                 [[-1, 1]])
+    self.assertEqual(min_value, -1.0)
+    self.assertEqual(max_value, 1.0)
 
-      # Run the step.
-      sess.run(variables.global_variables_initializer())
-      sess.run(y, feed_dict={x: [-1.0, 1.0]})
-      # Now check that the min_max_vars were, in fact, updated.
-      min_value, max_value = self._GetMinMaxValues(sess)
-      self.assertEqual(min_value, -1.0)
-      self.assertEqual(max_value, 1.0)
+  def testLastValueSymmetricQuantizeTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.LastValueQuantize,
+        [[-_SYMMETRIC_RANGE_RATIO, _SYMMETRIC_RANGE_RATIO]],
+        symmetric=True,
+        narrow_range=False)
+    self.assertEqual(min_value, -1.0)
+    self.assertEqual(max_value, _SYMMETRIC_RANGE_RATIO)
+
+  def testLastValueSymmetricQuantizeNarrowRangeTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.LastValueQuantize, [[-1, 0.5]],
+        symmetric=True,
+        narrow_range=True)
+    self.assertEqual(min_value, -1.0)
+    self.assertEqual(max_value, 1)
 
   def testMovingAvgQuantizeTrainingAssign(self):
-    g = ops.Graph()
-    with session.Session(graph=g) as sess:
-      x = array_ops.placeholder(dtypes.float32, shape=[2])
-      y = quant_ops.MovingAvgQuantize(
-          x,
-          init_min=0.0,
-          init_max=0.0,
-          is_training=True,
-          vars_collection=_MIN_MAX_VARS)
+    min_value, max_value = self._GetMinMaxValues(quant_ops.MovingAvgQuantize,
+                                                 [[-1, 1], [0, 0]])
+    self.assertAlmostEqual(min_value, -0.5, delta=1e-3)
+    self.assertAlmostEqual(max_value, 0.5, delta=1e-3)
 
-      # Run the step.
-      sess.run(variables.global_variables_initializer())
-      # Do two runs to avoid zero debias.
-      sess.run(y, feed_dict={x: [-1.0, 1.0]})
-      sess.run(y, feed_dict={x: [0.0, 0.0]})
-      # Now check that the min_max_vars were, in fact, updated.
-      min_value, max_value = self._GetMinMaxValues(sess)
-      self.assertGreater(min_value, -1.0)
-      self.assertLess(min_value, 0.0)
-      self.assertGreater(max_value, 0.0)
-      self.assertLess(max_value, 1.0)
+  def testMovingAvgSymmetricQuantizeTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.MovingAvgQuantize, [[-1, 0.5], [0, 0]], symmetric=True)
+    self.assertAlmostEqual(min_value, -0.5, delta=1e-3)
+    self.assertAlmostEqual(max_value, 0.5 * _SYMMETRIC_RANGE_RATIO, delta=1e-3)
+    self.assertAlmostEqual(max_value, min_value * -_SYMMETRIC_RANGE_RATIO)
+
+  def testMovingAvgSymmetricQuantizeNarrowRangeTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.MovingAvgQuantize, [[-1, 0.5], [0, 0]],
+        symmetric=True,
+        narrow_range=True)
+    self.assertAlmostEqual(min_value, -0.5, delta=1e-3)
+    self.assertAlmostEqual(max_value, 0.5, delta=1e-3)
+    self.assertAlmostEqual(max_value, -min_value)
 
   def testVariablesNotPartitioned_LastValue(self):
     # Variables added should not use a default partiioner since they are
@@ -105,14 +109,31 @@ class QuantOpsTest(googletest.TestCase):
             is_training=True,
             vars_collection=_MIN_MAX_VARS)
 
-  def _GetMinMaxValues(self, sess):
-    min_max_vars = ops.get_collection(_MIN_MAX_VARS)
-    self.assertEqual(len(min_max_vars), 2)
-    min_idx = 0 if 'min' in min_max_vars[0].name else 1
-    max_idx = (min_idx + 1) % 2
-    min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx]
-    min_max_values = sess.run([min_var, max_var])
-    return min_max_values[0], min_max_values[1]
+  def _GetMinMaxValues(self, quantize_fn, input_values, **kwds):
+    g = ops.Graph()
+    with session.Session(graph=g) as sess:
+      x = array_ops.placeholder(dtypes.float32, shape=[2])
+      y = quantize_fn(
+          x,
+          init_min=0.0,
+          init_max=0.0,
+          is_training=True,
+          vars_collection=_MIN_MAX_VARS,
+          **kwds)
+
+      # Run the step.
+      sess.run(variables.global_variables_initializer())
+      for input_elem in input_values:
+        sess.run(y, feed_dict={x: input_elem})
+
+      # Now check that the min_max_vars were, in fact, updated.
+      min_max_vars = ops.get_collection(_MIN_MAX_VARS)
+      self.assertEqual(len(min_max_vars), 2)
+      min_idx = 0 if 'min' in min_max_vars[0].name else 1
+      max_idx = (min_idx + 1) % 2
+      min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx]
+      min_max_values = sess.run([min_var, max_var])
+      return min_max_values[0], min_max_values[1]
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index fd86a96905..4ab888e0a4 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -41,6 +41,7 @@ def Quantize(graph,
              is_training,
              weight_bits=8,
              activation_bits=8,
+             symmetric=False,
              ema_decay=0.999,
              quant_delay=None,
              vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
@@ -58,6 +59,8 @@ def Quantize(graph,
     is_training: Whether quantizing training graph or eval graph.
     weight_bits: Number of bits to use for quantizing weights.
     activation_bits: Number of bits to use for quantizing activations.
+    symmetric: (Optional) If true, use symmetric quantization limits instead of
+      training the minimum and maximum of each quantization range separately.
     ema_decay: (Optional) Float, EMA decay parameter.  EMA is used to update
       quantization intervals for quantizing activations (see here about EMA:
       https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average).
@@ -92,6 +95,7 @@ def Quantize(graph,
         narrow_range=True,
         vars_collection=vars_collection,
         bits=weight_bits,
+        symmetric=symmetric,
         consumer_scope=scope)
 
     # Quantize the activations.
@@ -117,6 +121,7 @@ def Quantize(graph,
         quant_delay=quant_delay,
         vars_collection=vars_collection,
         bits=activation_bits,
+        symmetric=symmetric,
         init_min=0.0,
         producer_scope=scope)
 
@@ -135,6 +140,7 @@ def Quantize(graph,
           quant_delay=quant_delay,
           vars_collection=vars_collection,
           bits=activation_bits,
+          symmetric=symmetric,
           producer_scope=scope,
           consumer_scope=scope)
       # Make sure the op following this isn't an activation. In which case, we
@@ -156,6 +162,7 @@ def Quantize(graph,
             quant_delay=quant_delay,
             vars_collection=vars_collection,
             bits=activation_bits,
+            symmetric=symmetric,
             producer_scope=scope,
             consumer_scope=scope)
 
@@ -189,6 +196,7 @@ def Quantize(graph,
             quant_delay=quant_delay,
             vars_collection=vars_collection,
             bits=activation_bits,
+            symmetric=symmetric,
             producer_scope=scope)
 
 
@@ -517,6 +525,7 @@ def _InsertQuantOp(context,
                    init_min=-6.0,
                    init_max=6.0,
                    bits=8,
+                   symmetric=False,
                    ema_decay=0.999,
                    quant_delay=None,
                    vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
@@ -537,6 +546,8 @@ def _InsertQuantOp(context,
     init_min: Starting minimum value for the new quantization op.
     init_max: Starting maximum value for the new quantization op.
     bits: Number of bits to use for quantization, must be between 2 and 8.
+    symmetric: (Optional) If true, use symmetric quantization limits instead of
+      training the minimum and maximum of each quantization range separately.
     ema_decay: (Optional) Float, EMA decay parameter.  EMA is used to update
       quantization intervals for quantizing activations (see here about EMA:
       https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average).
@@ -603,6 +614,7 @@ def _InsertQuantOp(context,
               ema_decay=ema_decay,
               is_training=is_training,
               num_bits=bits,
+              symmetric=symmetric,
               narrow_range=narrow_range,
               vars_collection=vars_collection,
               name_prefix=name_prefix))
@@ -614,6 +626,7 @@ def _InsertQuantOp(context,
               init_max=init_max,
               is_training=is_training,
               num_bits=bits,
+              symmetric=symmetric,
               narrow_range=narrow_range,
               vars_collection=vars_collection,
               name_prefix=name_prefix))
diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py
index 484493f1b2..2a256a3c51 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph.py
@@ -27,6 +27,7 @@ def _create_graph(input_graph=None,
                   is_training=True,
                   weight_bits=8,
                   activation_bits=8,
+                  symmetric=False,
                   quant_delay=None,
                   freeze_bn_delay=None,
                   scope=None):
@@ -43,6 +44,8 @@ def _create_graph(input_graph=None,
     is_training: Whether quantizing training or eval graph.
     weight_bits: Number of bits to use for quantizing weights.
     activation_bits: Number of bits to use for quantizing activations.
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
     quant_delay: Number of steps after which weights and activations are
       quantized during training.
     freeze_bn_delay: Number of steps after which moving mean and variance are
@@ -74,6 +77,7 @@ def _create_graph(input_graph=None,
         quant_delay=quant_delay,
         weight_bits=weight_bits,
         activation_bits=activation_bits,
+        symmetric=symmetric,
         scope=scope)
 
 
@@ -142,6 +146,7 @@ def create_eval_graph(input_graph=None):
 def experimental_create_training_graph(input_graph=None,
                                        weight_bits=8,
                                        activation_bits=8,
+                                       symmetric=False,
                                        quant_delay=0,
                                        freeze_bn_delay=None,
                                        scope=None):
@@ -173,6 +178,8 @@ def experimental_create_training_graph(input_graph=None,
       default graph.
     weight_bits: Number of bits to use for quantizing weights.
     activation_bits: Number of bits to use for quantizing activations.
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
     quant_delay: Number of steps after which weights and activations are
       quantized during training.
     freeze_bn_delay: Number of steps after which moving mean and variance are
@@ -192,6 +199,7 @@ def experimental_create_training_graph(input_graph=None,
       is_training=True,
       weight_bits=weight_bits,
       activation_bits=activation_bits,
+      symmetric=symmetric,
       quant_delay=quant_delay,
       freeze_bn_delay=freeze_bn_delay,
       scope=scope)
@@ -200,6 +208,7 @@ def experimental_create_training_graph(input_graph=None,
 def experimental_create_eval_graph(input_graph=None,
                                    weight_bits=8,
                                    activation_bits=8,
+                                   symmetric=False,
                                    quant_delay=None,
                                    scope=None):
   """Rewrites an eval input_graph in place for simulated quantization.
@@ -219,6 +228,8 @@ def experimental_create_eval_graph(input_graph=None,
       default graph.
     weight_bits: Number of bits to use for quantizing weights.
     activation_bits: Number of bits to use for quantizing activations.
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
     quant_delay: Number of steps after which weights and activations are
       quantized during eval.
     scope: The scope to be transformed. If it's not None, only the ops which
@@ -233,6 +244,7 @@ def experimental_create_eval_graph(input_graph=None,
       is_training=False,
       weight_bits=weight_bits,
       activation_bits=activation_bits,
+      symmetric=symmetric,
       quant_delay=quant_delay,
       scope=scope)
 
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index f0fd0949dd..623212fc56 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from tensorflow.contrib.layers.python.layers import layers
 from tensorflow.contrib.quantize.python import quantize_graph
 from tensorflow.python import training
@@ -26,8 +28,8 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import template
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import template
 from tensorflow.python.platform import googletest
 
 
@@ -49,6 +51,8 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     rewrite_fns = [
         quantize_graph.create_training_graph,
         quantize_graph.experimental_create_training_graph,
+        functools.partial(
+            quantize_graph.experimental_create_training_graph, symmetric=True),
     ]
     for fn in rewrite_fns:
       test_fn(fn)
@@ -57,6 +61,8 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     rewrite_fns = [
         quantize_graph.create_eval_graph,
         quantize_graph.experimental_create_eval_graph,
+        functools.partial(
+            quantize_graph.experimental_create_eval_graph, symmetric=True),
     ]
     for fn in rewrite_fns:
       test_fn(fn)
-- 
GitLab


From 5c286cf56d6cdd50092ce131a677dd69ab573061 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 11:58:25 -0700
Subject: [PATCH 0790/1085] Register int64 version of AddN on GPU.

PiperOrigin-RevId: 216736399
---
 tensorflow/core/kernels/aggregate_ops.cc             | 1 +
 tensorflow/core/kernels/aggregate_ops_gpu.cu.cc      | 1 +
 tensorflow/python/kernel_tests/aggregate_ops_test.py | 6 ++++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 2f125312d0..150e8fe637 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -227,6 +227,7 @@ REGISTER_ADDN_CPU(Variant);
 #if GOOGLE_CUDA
 #define REGISTER_ADDN_GPU(type) REGISTER_ADDN(type, GPU)
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU);
+TF_CALL_int64(REGISTER_ADDN_GPU);
 TF_CALL_complex64(REGISTER_ADDN_GPU);
 TF_CALL_complex128(REGISTER_ADDN_GPU);
 TF_CALL_variant(REGISTER_ADDN_GPU);
diff --git a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc
index 3f449be754..8fef84305f 100644
--- a/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/aggregate_ops_gpu.cu.cc
@@ -154,6 +154,7 @@ struct Add9Functor<GPUDevice, T> {
   template struct functor::Add9Functor<GPUDevice, type>;
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_FUNCTORS);
+TF_CALL_int64(REGISTER_FUNCTORS);
 TF_CALL_complex64(REGISTER_FUNCTORS);
 TF_CALL_complex128(REGISTER_FUNCTORS);
 
diff --git a/tensorflow/python/kernel_tests/aggregate_ops_test.py b/tensorflow/python/kernel_tests/aggregate_ops_test.py
index 0a08c01dad..72dff6b3da 100644
--- a/tensorflow/python/kernel_tests/aggregate_ops_test.py
+++ b/tensorflow/python/kernel_tests/aggregate_ops_test.py
@@ -39,8 +39,10 @@ class AddNTest(test.TestCase):
 
   def _supported_types(self):
     if test.is_gpu_available():
-      return [dtypes.float16, dtypes.float32, dtypes.float64, dtypes.complex64,
-              dtypes.complex128]
+      return [
+          dtypes.float16, dtypes.float32, dtypes.float64, dtypes.complex64,
+          dtypes.complex128, dtypes.int64
+      ]
     return [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
             dtypes.float16, dtypes.float32, dtypes.float64, dtypes.complex64,
             dtypes.complex128]
-- 
GitLab


From c304bd9bc9165cc3c600c8e77713e884844dc0e3 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Thu, 11 Oct 2018 12:07:19 -0700
Subject: [PATCH 0791/1085] Add Unimplemented visitor for token element types
 to HLO evaluator. The evaluator would crash when encountering an instruction
 which produced a TOKEN shape.

PiperOrigin-RevId: 216737937
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 6cba46135c..bb6806dc0b 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -190,6 +190,11 @@ HloEvaluator::HloEvaluator(int64 max_loop_iterations)
         return Unimplemented(
             "HloEvaluatorTypedVisitor: unhandled primitive type: OPAQUE.");
       });
+  typed_visitors_[TOKEN] =
+      absl::make_unique<FunctionVisitor>([](HloInstruction*) {
+        return Unimplemented(
+            "HloEvaluatorTypedVisitor: unhandled primitive type: TOKEN.");
+      });
 }
 
 template <typename LiteralPtr>
-- 
GitLab


From 028410c7f4b0555c5ec3b818892ff8fdac90fc25 Mon Sep 17 00:00:00 2001
From: Yunxing Dai <yunxing@google.com>
Date: Thu, 11 Oct 2018 12:07:37 -0700
Subject: [PATCH 0792/1085] [Resubmit][XLA] Introduce input/output alias
 config.

- This CL intruduces input/output alias config in HLO module that allows any HLO pass to configure it. Once the alias_config is set, each backend needs to follow the contract during execution time to make sure the input and output are indeed aliased.

- Copy insertion / buffer assignment and alias analysis has been updated to correctly honor the config and avoid any possible liveness interference.

PiperOrigin-RevId: 216737975
---
 tensorflow/compiler/xla/service/BUILD         |  21 ++
 .../compiler/xla/service/buffer_assignment.cc |  34 +--
 .../compiler/xla/service/buffer_value.h       |   3 +
 .../compiler/xla/service/copy_insertion.cc    |  88 +++++++-
 .../xla/service/copy_insertion_test.cc        | 212 ++++++++++++++++++
 tensorflow/compiler/xla/service/hlo.proto     |  29 +++
 .../xla/service/hlo_alias_analysis.cc         |  46 +++-
 .../xla/service/hlo_alias_analysis_test.cc    | 175 +++++++++++++++
 .../xla/service/hlo_dataflow_analysis.cc      |   2 +-
 .../service/hlo_input_output_alias_config.cc  | 182 +++++++++++++++
 .../service/hlo_input_output_alias_config.h   | 102 +++++++++
 .../hlo_input_output_alias_config_test.cc     | 184 +++++++++++++++
 tensorflow/compiler/xla/service/hlo_module.cc |   9 +
 tensorflow/compiler/xla/service/hlo_module.h  |  14 ++
 .../compiler/xla/service/hlo_verifier.cc      |   2 +
 tensorflow/compiler/xla/shape_util.h          |   2 +-
 16 files changed, 1078 insertions(+), 27 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
 create mode 100644 tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index f9f741aaee..6c3b9764b7 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -294,6 +294,7 @@ cc_library(
     srcs = [
         "dfs_hlo_visitor.cc",
         "hlo_computation.cc",
+        "hlo_input_output_alias_config.cc",
         "hlo_instruction.cc",
         "hlo_instructions.cc",
         "hlo_module.cc",
@@ -308,6 +309,7 @@ cc_library(
         "hlo_clone_context.h",
         "hlo_computation.h",
         "hlo_domain_metadata.h",
+        "hlo_input_output_alias_config.h",
         "hlo_instruction.h",
         "hlo_instructions.h",
         "hlo_module.h",
@@ -1268,6 +1270,25 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "hlo_input_output_alias_config_test",
+    srcs = ["hlo_input_output_alias_config_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_dce",
+        ":hlo_memory_scheduler",
+        ":hlo_ordering",
+        ":hlo_parser",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+        "@com_google_absl//absl/algorithm:container",
+    ],
+)
+
 cc_library(
     name = "hlo_memory_scheduler",
     srcs = ["hlo_memory_scheduler.cc"],
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 2c2d1626c2..d5d6a044a8 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice(
 
 void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset,
                                      int64 size) {
-  VLOG(4) << "Trying to add " << buffer << " to " << this;
+  VLOG(4) << "Trying to add " << buffer << " to allocation #" << index();
   CHECK(assigned_buffers_.count(&buffer) == 0)
       << "LogicalBuffer " << buffer << " already assigned to allocation "
       << index_;
@@ -784,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     }
   }
 
-  if (allow_input_output_aliasing_ && allocation->maybe_live_out()) {
-    const HloComputation* entry_computation =
-        assignment->module_->entry_computation();
-    for (auto param : entry_computation->parameter_instructions()) {
-      for (auto& param_buffer :
-           assignment->points_to_analysis().GetBuffersDefinedByInstruction(
-               param)) {
-        if (assignment->liveness().MayInterfere(*param_buffer, buffer)) {
-          VLOG(4) << "Can't assign: Parameter interference with result";
-          return false;
-        }
-      }
-    }
-  }
-
   // If the buffer is live out of the computation then it should only be
   // assigned a buffer which exactly fits the result to avoid wasting memory
   // (result buffers can have arbitrary lifetimes).
@@ -1434,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets(
 
 // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated
 // in the same allocation (currently just supports kWhile, kCall, and
-// kConditional).
+// kConditional and input output aliasing).
 void BufferAssigner::BuildColocatedBufferSets(
     const HloModule* module, const BufferLiveness& buffer_liveness,
     const LogicalBuffer::SizeFunction& buffer_size,
     std::vector<ColocatedBufferSet>* colocated_buffer_sets) {
   const TuplePointsToAnalysis& points_to_analysis =
       buffer_liveness.points_to_analysis();
+
+  // Set up colocated buffer set for input and output.
+  module->input_output_alias_config().ForEachAlias(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& param_index) {
+        std::vector<const LogicalBuffer*> colocated_set;
+        AddBufferToColocatedSet(module->entry_computation()->root_instruction(),
+                                output_index, points_to_analysis,
+                                &colocated_set);
+        AddBufferToColocatedSet(
+            module->entry_computation()->parameter_instruction(param_number),
+            param_index, points_to_analysis, &colocated_set);
+        AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets);
+      });
+
   for (const HloComputation* computation : module->MakeComputationPostOrder()) {
     if (computation->IsFusionComputation()) {
       continue;
diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h
index 69b3646356..11d8abc5ba 100644
--- a/tensorflow/compiler/xla/service/buffer_value.h
+++ b/tensorflow/compiler/xla/service/buffer_value.h
@@ -141,6 +141,9 @@ class BufferValue {
   // operator< is required for std::set.
   bool operator<(const BufferValue& other) const { return id_ < other.id_; }
 
+  bool operator==(const BufferValue& other) const { return id_ == other.id_; }
+  bool operator!=(const BufferValue& other) const { return id_ != other.id_; }
+
   virtual string ToString() const = 0;
 
   // TODO(lauj) rename LogicalBufferProto to BufferValueProto.
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index f35324aa35..245db6be2a 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -40,10 +40,12 @@ namespace {
 
 using absl::StrAppend;
 
-bool IsEntryParameterValue(const HloValue& value) {
+bool IsReadonlyEntryParameterValue(const HloValue& value) {
   const HloComputation* computation = value.defining_instruction()->parent();
   return value.defining_instruction()->opcode() == HloOpcode::kParameter &&
-         computation == computation->parent()->entry_computation();
+         computation == computation->parent()->entry_computation() &&
+         !computation->parent()->input_output_alias_config().ParameterHasAlias(
+             value.defining_instruction()->parameter_number(), value.index());
 }
 
 bool IsConstantValue(const HloValue& value) {
@@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) {
 }
 
 bool ValueIsReadOnly(const HloValue& value) {
-  return IsConstantValue(value) || IsEntryParameterValue(value);
+  return IsConstantValue(value) || IsReadonlyEntryParameterValue(value);
 }
 
 // Data structure describing the action which should be taken on parts of a
@@ -79,8 +81,7 @@ SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node,
 bool ShouldCopyRootValue(const HloValue& value,
                          const SpecialCaseCopyPolicy& policy) {
   if (policy.copy_parameters_and_constants) {
-    return IsConstantValue(value) ||
-           value.defining_instruction()->opcode() == HloOpcode::kParameter;
+    return ValueIsReadOnly(value);
   }
   return false;
 }
@@ -332,6 +333,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis,
   return Status::OK();
 }
 
+// Conservatively adds copies before root instruction of entry computation and
+// each aliased parameter to resolve interference of aliased input and output
+// buffer. We later rely on the CopyRemover to drop the unnecessary ones.
+Status AddCopiesForAliasedInputOutputs(HloModule* module) {
+  HloComputation* entry = module->entry_computation();
+  HloInstruction* root = entry->root_instruction();
+
+  ShapeTree<bool> output_indices_to_copy(root->shape());
+  std::vector<ShapeTree<HloInstruction*>> copied_parameters;
+  bool has_alias = false;
+  for (auto* param : entry->parameter_instructions()) {
+    bool param_has_alias = false;
+    ShapeTree<bool> param_indices_to_copy(param->shape());
+
+    module->input_output_alias_config().ForEachAlias(
+        [&](const ShapeIndex& output_index, int64 param_number,
+            const ShapeIndex& param_index) {
+          if (param_number == param->parameter_number()) {
+            param_has_alias = true;
+            *(param_indices_to_copy.mutable_element(param_index)) = true;
+            *(output_indices_to_copy.mutable_element(output_index)) = true;
+          }
+        });
+
+    if (!param_has_alias) {
+      continue;
+    }
+
+    has_alias = true;
+    // Store a snapshot of users before DeepCopyInstruction, as
+    // DeepCopyInstruction introduces new users of the instruction.
+    std::vector<HloInstruction*> users = param->users();
+    ShapeTree<HloInstruction*> param_copy_tree(param->shape(),
+                                               /*init_value=*/nullptr);
+    TF_ASSIGN_OR_RETURN(HloInstruction * copied,
+                        entry->DeepCopyInstruction(
+                            param, &param_indices_to_copy, &param_copy_tree));
+    for (HloInstruction* user : users) {
+      TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied));
+    }
+
+    copied_parameters.push_back(param_copy_tree);
+  }
+
+  if (!has_alias) {
+    return Status::OK();
+  }
+
+  // Add copies before root instruction.
+  ShapeTree<HloInstruction*> output_copy_tree(root->shape(),
+                                              /*init_value=*/nullptr);
+
+  TF_ASSIGN_OR_RETURN(HloInstruction * root_copied,
+                      root->parent()->DeepCopyInstruction(
+                          root, &output_indices_to_copy, &output_copy_tree));
+
+  // Add control dependencies between the input/output copies.
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus(
+      [&](const ShapeIndex& output_index, int64 param_number,
+          const ShapeIndex& input_index) -> Status {
+        HloInstruction* from =
+            copied_parameters[param_number].element(input_index);
+        HloInstruction* to = output_copy_tree.element(output_index);
+
+        TF_RET_CHECK(from != nullptr);
+        TF_RET_CHECK(to != nullptr);
+        TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to));
+        return Status::OK();
+      }));
+
+  entry->set_root_instruction(root_copied);
+
+  return Status::OK();
+}
+
 // Removes any control dependencies to or from the given instruction.
 Status StripControlDependenciesFrom(HloInstruction* instruction) {
   while (!instruction->control_successors().empty()) {
@@ -953,6 +1029,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) {
       }
     }
   }
+
+  TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index 892d0d7b54..4533ebb99b 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -1351,6 +1351,218 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) {
   EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy()));
 }
 
+TEST_F(CopyInsertionTest, CrossingParameters) {
+  // Test a case where two parameters' dataflow cross with each other while
+  // input and output are aliased with same index:
+  //
+  //  (p0 ,  p1)
+  //   | \   /|
+  //   |  \ / |
+  // alias X  alias
+  //   |  / \ |
+  //   | /   \|
+  //  (p1  ,  p0)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 4);
+}
+
+TEST_F(CopyInsertionTest, ParametersAliasing) {
+  // Test a case where two parameters' dataflow don't interfere with each other
+  // while aliased.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias   alias
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
+TEST_F(CopyInsertionTest, ParameterWithNoAliasing) {
+  // Test a case where no parameter is aliased with result. In this case, copy
+  // should be added
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  //   |      |
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::Copy(op::GetTupleElement(param, 0)),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 2);
+}
+
+TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //  (p0 ,  p1)
+  //   |      |
+  //   |      |
+  // alias    |
+  //   |      |
+  //   |      |
+  //  (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::GetTupleElement(param, 0),
+                        op::Copy(op::GetTupleElement(param, 1))));
+
+  EXPECT_EQ(CountCopies(*module), 1);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+  builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
+TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) {
+  // Test a case where one parameter is aliased with result while another one
+  // isn't.
+  //
+  //   +-- (p0 ,  p1)
+  //   |    |      |
+  //   |    |      |
+  // alias Negate  Negate
+  //   |    |      |
+  //   |    Add----+
+  //   |    |      |
+  //   +-- (p0 ,  p1)
+  auto module = CreateNewModule();
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto add = builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, negate0, negate1));
+  builder.AddInstruction(HloInstruction::CreateTuple({add, negate1}));
+  module->AddEntryComputation(builder.Build());
+  ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  InsertCopies(module.get());
+
+  EXPECT_EQ(CountCopies(*module), 0);
+}
+
 TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) {
   // Test a while instruction with a body which permutes its tuple parameter
   // elements and applies one operation to one of the elements. The addition of
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index a0eb9e6ddc..82c8fb1904 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -225,6 +225,32 @@ message HloScheduleProto {
   map<int64, InstructionSequence> sequences = 1;
 }
 
+message HloInputOutputAliasProto {
+  // The following proto describes a pair of aliased an input
+  // (described by parameter number and a ShapeIndex of the parameter)
+  // and an output (described by a ShapeIndex of the root
+  // instruction). For example:
+  //
+  // entry = {
+  //  output_shape_index={1},
+  //  parameter_number=0,
+  //  parameter_shape_index={1, 2},
+  // }
+  //
+  // This entry indicates that the first paremter's {1, 2} element is
+  // aliased with the {1} element of the root instruction.
+  message AliasEntryProto {
+    // ShapeIndex of the root hlo.
+    repeated int64 output_shape_index = 1;
+    // Number of the parameter in entry computation.
+    int64 parameter_number = 2;
+    // ShapeIndex of the parameter instruction.
+    repeated int64 parameter_shape_index = 3;
+  }
+
+  repeated AliasEntryProto entries = 1;
+}
+
 // Serialization of HloModule.
 message HloModuleProto {
   string name = 1;
@@ -243,6 +269,9 @@ message HloModuleProto {
 
   // The schedule for this module.
   HloScheduleProto schedule = 7;
+
+  // Describes alias information between inputs and outputs.
+  HloInputOutputAliasProto input_output_alias = 8;
 }
 
 // Serialization of LogicalBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index c3da12e273..cf8e6594cb 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -59,8 +59,9 @@ class BufferValueMap {
   // construction process.
   using BufferNumber = int64;
 
-  explicit BufferValueMap(const HloDataflowAnalysis& dataflow)
-      : dataflow_(dataflow) {
+  explicit BufferValueMap(HloModule* module,
+                          const HloDataflowAnalysis& dataflow)
+      : module_(module), dataflow_(dataflow) {
     buffers_.reserve(dataflow_.values().size());
     value_to_buffer_number_.reserve(dataflow_.values().size());
     for (const HloValue* value : dataflow_.values()) {
@@ -171,6 +172,42 @@ class BufferValueMap {
     return value_to_buffer_number_.at(&value);
   }
 
+  void ComputeInputOutputAliasedBuffers(
+      const HloValue& value, std::vector<BufferNumber>* aliased_buffers) {
+    // Get parameter value from an aliased_input object.
+    const auto get_parameter_value =
+        [this](const std::pair<int64, ShapeIndex>& aliased_input)
+        -> const HloValue& {
+      int64 param_number = aliased_input.first;
+      const ShapeIndex& param_index = aliased_input.second;
+      return dataflow_.GetUniqueValueAt(
+          module_->entry_computation()->parameter_instruction(param_number),
+          param_index);
+    };
+
+    // If the value shows up in a root instruction, alias it with parameter
+    // intruction.
+    for (const HloPosition& pos : value.positions()) {
+      if (pos.instruction == module_->entry_computation()->root_instruction()) {
+        ShapeIndex output_index = pos.index;
+
+        auto aliased_input =
+            module_->input_output_alias_config().GetAliasedParameter(
+                output_index);
+        if (aliased_input) {
+          aliased_buffers->push_back(
+              GetBufferForValue(get_parameter_value(*aliased_input)));
+        }
+      }
+    }
+
+    // If the value is parameter instruction itself, alias it with itself.
+    if (value.instruction()->opcode() == HloOpcode::kParameter &&
+        value.instruction()->parent() == module_->entry_computation()) {
+      aliased_buffers->push_back(GetBufferForValue(value));
+    }
+  }
+
   void ComputeWhileAliasedBuffers(const HloValue& value,
                                   std::vector<BufferNumber>* aliased_buffers) {
     VLOG(3) << "Compute kWhile aliases";
@@ -278,6 +315,7 @@ class BufferValueMap {
       VLOG(2) << "Use of value " << value.ToShortString() << ": " << use;
     }
     std::vector<BufferNumber> aliased_buffers;
+    ComputeInputOutputAliasedBuffers(value, &aliased_buffers);
     ComputeWhileAliasedBuffers(value, &aliased_buffers);
     ComputeConditionalAliasedBuffers(value, &aliased_buffers);
     // Uniquify aliased buffers.
@@ -288,6 +326,8 @@ class BufferValueMap {
     return aliased_buffers;
   }
 
+  HloModule* module_;
+
   // Dataflow analysis used to construct the buffer map.
   const HloDataflowAnalysis& dataflow_;
 
@@ -461,7 +501,7 @@ StatusOr<std::unique_ptr<HloAliasAnalysis>> HloAliasAnalysis::Run(
                                                /*bitcast_defines_value=*/false,
                                                fusion_can_share_buffer));
 
-  BufferValueMap buffer_map(alias_analysis->dataflow_analysis());
+  BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis());
   buffer_map.MergeAliasedBuffers();
 
   // Create a vector of HloBuffers, one for each set of values in the
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
index 0cd0ab36fc..5c8d97b2d1 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc
@@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) {
   EXPECT_FALSE(AnyValuesInSameBufferInterfere());
 }
 
+TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) {
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+
+  auto negate0 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0));
+  auto negate1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1));
+
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) {
+  // parameter 0 aliased with output 1 and parameter 1 aliased with output 0.
+  //
+  //  (p0 ,  p1)
+  //     \   /
+  //      \ /
+  // alias X
+  //      / \
+  //     /   \
+  //  (p0  ,  p1)
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+  auto gte0 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0));
+  auto gte1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0}));
+
+  // Cannot alias an output twice.
+  ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  // Every Ops in this graph are aliased with each other.
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte0),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{0}));
+  EXPECT_EQ(analysis.GetUniqueBufferAt(gte1),
+            analysis.GetUniqueBufferAt(tuple, /*index=*/{1}));
+}
+
+TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) {
+  // Test a simple single while instruction can be aliased with input and output
+  // of the computation.
+  //
+  // body((F32[], F32[]) %tuple_param):
+  //   %add = Add(%tuple_param{0}, %tuple_param{1})
+  //   return Tuple(%tuple_param{0}, %add)
+  //
+  // condition((F32[], F32[]) %tuple_param):
+  //   return Constant(false)
+  //
+  // entry:
+  //   %param1 = param1
+  //   %while = While(%param1, body, condition)
+  //   %while_1 = GTE(%while, 0)
+  //   %while_2 = GTE(%while, 1)
+  //   %negate_1 = Negate(%while_1)
+  //   %negate_2 = Negate(%while_2)
+  //   return Tuple(negate_1, negate_2)
+  //
+  const Shape tuple_shape =
+      ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_});
+
+  // Element 0 passes transparently through the body.
+  auto body_builder = HloComputation::Builder("body");
+  auto body_param = body_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  auto body_element_0 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0));
+  auto body_element_1 = body_builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1));
+  auto add = body_builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1));
+  auto body_tuple = body_builder.AddInstruction(
+      HloInstruction::CreateTuple({body_element_0, add}));
+  HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build());
+
+  // Condition computation trivially returns a constant "false".
+  auto cond_builder = HloComputation::Builder("condition");
+  auto cond_param = cond_builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "param"));
+  cond_builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
+  HloComputation* condition =
+      module_->AddEmbeddedComputation(cond_builder.Build());
+
+  auto builder = HloComputation::Builder(TestName());
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, tuple_shape, "p0"));
+
+  auto xla_while = builder.AddInstruction(
+      HloInstruction::CreateWhile(tuple_shape, condition, body, param));
+  auto while_element_1 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0));
+  auto while_element_2 = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1));
+  auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_1));
+  auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape_, HloOpcode::kNegate, while_element_2));
+  auto tuple =
+      builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2}));
+  module_->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0}));
+  TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias(
+      /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1}));
+
+  const HloAliasAnalysis& analysis = RunAnalysis();
+
+  EXPECT_THAT(
+      GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})),
+      UnorderedElementsAre(GetValueDefinedAt(param, {1}),
+                           GetValueDefinedAt(xla_while, /*index=*/{1}),
+                           GetValueDefinedAt(body_param, {1}),
+                           GetValueDefinedAt(cond_param, {1}),
+                           GetValueDefinedAt(add),
+                           GetValueDefinedAt(negate_2)));
+
+  EXPECT_THAT(
+      analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(),
+      UnorderedElementsAre(
+          HloPosition{param, {1}}, HloPosition{xla_while, {1}},
+          HloPosition{while_element_2, {}}, HloPosition{body_param, {1}},
+          HloPosition{body_element_1, {}}, HloPosition{add, {}},
+          HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}},
+          HloPosition{cond_param, {1}}, HloPosition{negate_2, {}}));
+
+  EXPECT_FALSE(AnyValuesInSameBufferInterfere());
+}
+
 TEST_F(HloAliasAnalysisTest, SingleCall) {
   // Test a single call of a subcomputation. The subcomputation adds its two
   // array-shaped parameters.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 71122e73b1..5dcf6bc985 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction,
 
 const HloValue& HloDataflowAnalysis::GetValueDefinedAt(
     const HloInstruction* instruction, const ShapeIndex& index) const {
-  CHECK(ValueIsDefinedAt(instruction, index));
+  CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString();
   return GetUniqueValueAt(instruction, index);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
new file mode 100644
index 0000000000..8128fad07c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc
@@ -0,0 +1,182 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index,
+                                             int64 param_number,
+                                             const ShapeIndex& param_index) {
+  TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index))
+      << absl::StrCat("Tring to set up alias at ", output_index.ToString(),
+                      " which is an invalid index for shape ",
+                      ShapeUtil::HumanString(alias_.shape()));
+  // Output can't be aliased with multiple parameters.
+  TF_RET_CHECK(!alias_.element(output_index)) << absl::StrFormat(
+      "Trying to set up output alias for param %lld at %s but failed: output "
+      "index %s is already aliased with param %lld at %s",
+      param_number, param_index.ToString(), output_index.ToString(),
+      alias_.element(output_index)->first,
+      alias_.element(output_index)->second.ToString());
+  (*alias_.mutable_element(output_index)) =
+      std::make_pair(param_number, param_index);
+  return Status::OK();
+}
+
+HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const {
+  HloInputOutputAliasProto result;
+  alias_.ForEachElement(
+      [&](const ShapeIndex& index,
+          const absl::optional<std::pair<int64, ShapeIndex>>& data) {
+        if (data) {
+          HloInputOutputAliasProto::AliasEntryProto entry;
+          for (int64 i : index) {
+            entry.add_output_shape_index(i);
+          }
+          entry.set_parameter_number(data->first);
+          for (int64 i : data->second) {
+            entry.add_parameter_shape_index(i);
+          }
+          result.add_entries()->Swap(&entry);
+        }
+      });
+  return result;
+}
+
+StatusOr<HloInputOutputAliasConfig> HloInputOutputAliasConfig::CreateFromProto(
+    const Shape& output_shape, const HloInputOutputAliasProto& proto) {
+  HloInputOutputAliasConfig result(output_shape);
+  for (const HloInputOutputAliasProto::AliasEntryProto& entry :
+       proto.entries()) {
+    ShapeIndex output_index(entry.output_shape_index().begin(),
+                            entry.output_shape_index().end());
+
+    int64 param_number = entry.parameter_number();
+    ShapeIndex param_index(entry.parameter_shape_index().begin(),
+                           entry.parameter_shape_index().end());
+    TF_RETURN_IF_ERROR(
+        result.SetUpAlias(output_index, param_number, param_index));
+  }
+
+  return result;
+}
+
+string HloInputOutputAliasConfig::ToString() const {
+  std::vector<string> pieces;
+  pieces.push_back("HloInputOutputAliasConfig");
+
+  ForEachAlias([&](const ShapeIndex& output_index, int64 param_number,
+                   const ShapeIndex& param_index) {
+    pieces.push_back(absl::StrFormat(
+        "  OutputIndex %s is aliased with parameter %lld at %s:",
+        output_index.ToString(), param_number, param_index.ToString()));
+  });
+
+  return absl::StrJoin(pieces, "\n");
+}
+
+bool HloInputOutputAliasConfig::ParameterHasAlias(
+    int64 param_number, const ShapeIndex& param_index) const {
+  bool output = false;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex&,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number &&
+            alias->second == param_index) {
+          output = true;
+        }
+      });
+  return output;
+}
+
+absl::optional<ShapeIndex> HloInputOutputAliasConfig::GetAliasedOutput(
+    int64 param_number, const ShapeIndex& param_index) const {
+  absl::optional<ShapeIndex> output;
+  alias_.ForEachElement(
+      [&](const xla::ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> alias) {
+        if (alias && alias->first == param_number &&
+            alias->second == param_index) {
+          output = output_index;
+        }
+      });
+  return output;
+}
+
+absl::optional<std::pair<int64, ShapeIndex>>
+HloInputOutputAliasConfig::GetAliasedParameter(
+    const ShapeIndex& output_index) const {
+  CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index));
+  return alias_.element(output_index);
+}
+
+void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const {
+  alias_.ForEachElement(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          fn(output_index, aliased->first, aliased->second);
+        }
+      });
+}
+
+Status HloInputOutputAliasConfig::ForEachAliasWithStatus(
+    AliasFnWithStatus fn) const {
+  return alias_.ForEachElementWithStatus(
+      [&](const ShapeIndex& output_index,
+          absl::optional<std::pair<int64, ShapeIndex>> aliased) {
+        if (aliased) {
+          TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second));
+        }
+        return Status::OK();
+      });
+}
+
+Status HloInputOutputAliasConfig::Verify(const HloModule& module) const {
+  std::vector<ShapeTree<bool>> param_has_seen;
+  const HloComputation* entry = module.entry_computation();
+  for (int64 i = 0; i < entry->num_parameters(); ++i) {
+    HloInstruction* param = entry->parameter_instruction(i);
+    param_has_seen.emplace_back(param->shape());
+  }
+  return ForEachAliasWithStatus([&](const ShapeIndex& output_index,
+                                    int64 param_number,
+                                    const ShapeIndex& param_index) -> Status {
+    const HloInstruction* root = entry->root_instruction();
+
+    const Shape& param_shape =
+        entry->parameter_instruction(param_number)->shape();
+    const Shape& output_shape = root->shape();
+    TF_RET_CHECK(entry->num_parameters() > param_number);
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index));
+    TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index));
+
+    // Check each param_number and param_index pair only show up once. No
+    // input can be aliased with output buffers.
+    TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false);
+
+    *(param_has_seen[param_number].mutable_element(param_index)) = true;
+
+    return Status::OK();
+  });
+}
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config) {
+  out << config.ToString();
+  return out;
+}
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
new file mode 100644
index 0000000000..0fae75842b
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
+
+#include <utility>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/shape_tree.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+class HloModule;
+
+// This class specifies the alias map from output index to parameter number and
+// parameter index in the entry computation.
+class HloInputOutputAliasConfig {
+ public:
+  HloInputOutputAliasConfig() = default;
+
+  explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {}
+
+  virtual ~HloInputOutputAliasConfig() = default;
+
+  // Sets up alias config from `output_index` to `param_index` at
+  // `param_number`.
+  Status SetUpAlias(const ShapeIndex& output_index, int64 param_number,
+                    const ShapeIndex& param_index);
+
+  // Returns true if the given parameter is aliased with one of the output
+  // buffers.
+  bool ParameterHasAlias(int64 param_number,
+                         const ShapeIndex& param_index) const;
+
+  // (De)Serializes an HloInputOutoutAliasConfig to/from an
+  // HloInputOutoutAliasProto.
+  HloInputOutputAliasProto ToProto() const;
+
+  static StatusOr<HloInputOutputAliasConfig> CreateFromProto(
+      const Shape& output_shape, const HloInputOutputAliasProto& proto);
+
+  // Returns the output index that the given parameter and parameter index is
+  // aliased with. A nullopt is returned if there is no output that is aliased
+  // with the parameter number and index.
+  absl::optional<ShapeIndex> GetAliasedOutput(
+      int64 param_number, const ShapeIndex& param_index) const;
+
+  // Returns the number of parameter and index of the parameter buffer that the
+  // given output buffer index is aliased with. A nullopt is returned if there
+  // is no parameter is aliased with the specific output.
+  absl::optional<std::pair<int64, ShapeIndex>> GetAliasedParameter(
+      const ShapeIndex& output_index) const;
+
+  using AliasFn =
+      std::function<void(const ShapeIndex& output_index, int64 param_number,
+                         const ShapeIndex& param_index)>;
+
+  // Iterates through each aliased output and input.
+  void ForEachAlias(AliasFn fn) const;
+
+  using AliasFnWithStatus =
+      std::function<Status(const ShapeIndex& output_index, int64 param_number,
+                           const ShapeIndex& param_index)>;
+
+  // Verifies that the given config is valid for the given module.
+  // Specifically, the config's input and output should be in-bound and size of
+  // the aliased buffers should match.
+  Status Verify(const HloModule& module) const;
+
+  Status ForEachAliasWithStatus(AliasFnWithStatus fn) const;
+
+  string ToString() const;
+
+ private:
+  // A ShapeTree which indicates the list of buffers that's expected to be
+  // aliased. The key on this shape tree represents the output index. The value
+  // is a pair of parameter number and index into the buffer. If the value is
+  // nullopt, it means there is no parameter aliasing for this output.
+  ShapeTree<absl::optional<std::pair<int64, ShapeIndex>>> alias_;
+};
+
+std::ostream& operator<<(std::ostream& out,
+                         const HloInputOutputAliasConfig& config);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_
diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
new file mode 100644
index 0000000000..3b61ff04e6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc
@@ -0,0 +1,184 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
+
+#include <memory>
+#include <string>
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_ordering.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+class HloInputOutputAliasConfigTest : public HloTestBase {
+ protected:
+  void expect_aliased(const ShapeIndex& output_index, int64 param_number,
+                      const ShapeIndex& param_index,
+                      const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_TRUE(aliased_output);
+    EXPECT_EQ(aliased_output.value(), output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_TRUE(aliased_param);
+    EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index));
+  }
+
+  void expect_not_aliased(const ShapeIndex& output_index, int64 param_number,
+                          const ShapeIndex& param_index,
+                          const HloInputOutputAliasConfig& config) {
+    absl::optional<ShapeIndex> aliased_output =
+        config.GetAliasedOutput(param_number, param_index);
+
+    EXPECT_FALSE(aliased_output && aliased_output == output_index);
+
+    absl::optional<std::pair<int64, ShapeIndex>> aliased_param =
+        config.GetAliasedParameter(output_index);
+
+    EXPECT_FALSE(aliased_param && aliased_param->first == param_number &&
+                 aliased_param->second == param_index);
+  }
+};
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                 /*param_index=*/{}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/1,
+                 /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  param = (f32[], f32[]) parameter(0)
+  gte1 = f32[] get-tuple-element(%param), index=0
+  gte2 = f32[] get-tuple-element(%param), index=1
+  ROOT root = (f32[], f32[]) tuple(%gte1, %gte2)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{0}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{1}));
+
+  expect_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                 /*param_index=*/{0}, config);
+
+  expect_aliased(/*output_index=*/{1}, /*param_number=*/0,
+                 /*param_index=*/{1}, config);
+
+  expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1,
+                     /*param_index=*/{}, config);
+
+  expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0,
+                     /*param_index=*/{}, config);
+}
+
+TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.Verify(*module));
+}
+
+TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) {
+  const string module_str = R"(
+HloModule TEST
+
+ENTRY main {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT root = (f32[], f32[]) tuple(%a, %b)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(module_str));
+
+  HloInputOutputAliasConfig config(
+      module->entry_computation()->root_instruction()->shape());
+
+  TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0,
+                                 /*param_index=*/{}));
+
+  ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1,
+                                     /*param_index=*/{}));
+}
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 4b0612b368..a53aaed49b 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -73,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal(
       config_.SetDefaultComputationLayout(
           entry_computation_->ComputeProgramShape());
     }
+    input_output_alias_config_ = HloInputOutputAliasConfig(
+        entry_computation_->root_instruction()->shape());
   }
 
   if (uniquify_identifiers) {
@@ -252,6 +254,9 @@ HloModuleProto HloModule::ToProto() const {
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
+
+  *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
+
   return proto;
 }
 
@@ -328,6 +333,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
   }
   TF_RET_CHECK(module->entry_computation_ != nullptr);
 
+  TF_ASSIGN_OR_RETURN(module->input_output_alias_config_,
+                      HloInputOutputAliasConfig::CreateFromProto(
+                          result_shape, proto.input_output_alias()));
+
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
   absl::flat_hash_set<string> computation_names;
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index de6d3a13bf..5dc795fabe 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_clone_context.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
@@ -222,6 +223,15 @@ class HloModule {
     return result;
   }
 
+  // input_output_alias_config indicates the list of aliased buffers that are
+  // expected from the module.
+  HloInputOutputAliasConfig& input_output_alias_config() {
+    return input_output_alias_config_;
+  }
+  const HloInputOutputAliasConfig& input_output_alias_config() const {
+    return input_output_alias_config_;
+  }
+
   // Returns an id that is unique to this module across all modules created over
   // the lifetime of this process.
   int unique_id() const { return unique_id_; }
@@ -290,6 +300,10 @@ class HloModule {
   // sequential order of instructions for each non-fusion computation in the
   // module.
   absl::optional<HloSchedule> schedule_;
+
+  // alias_config indicates the alias information of input/output buffers that
+  // are expected from the module.
+  HloInputOutputAliasConfig input_output_alias_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 912d2dbe75..c3289d2b22 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1316,6 +1316,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     TF_RETURN_IF_ERROR(module->schedule().Verify());
   }
 
+  TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module));
+
   return false;
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 73f541d505..51cedce7f0 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -72,7 +72,7 @@ class ShapeIndex {
   void push_back(int64 value) { indices_.push_back(value); }
   void pop_back() { indices_.pop_back(); }
 
-  // push_front is O(n^2), but shapes don't usually have a ton of dimensions.
+  // push_front is O(n), but shapes don't usually have a ton of dimensions.
   void push_front(int64 value) { indices_.insert(indices_.begin(), value); }
 
   using container_type = absl::InlinedVector<int64, 2>;
-- 
GitLab


From 55cf8c0db7bf6bf68380035032c24da2e5fa385b Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 11 Oct 2018 12:13:10 -0700
Subject: [PATCH 0793/1085] Don't segfault if unable to convert non-rectangular
 inputs to tensor

PiperOrigin-RevId: 216738986
---
 tensorflow/python/eager/tensor_test.py      | 7 +++++++
 tensorflow/python/lib/core/py_seq_tensor.cc | 1 +
 2 files changed, 8 insertions(+)

diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 344a9b25bd..cc79f55d3b 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -347,6 +347,13 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
     instance_dir.remove("test_attr")
     self.assertEqual(instance_dir, type_dir)
 
+  def testNonRectangularPackAsConstant(self):
+    l = [array_ops.zeros((10, 1)).numpy(), array_ops.zeros(1).numpy()]
+
+    with self.assertRaisesRegexp(
+        ValueError, "non-rectangular Python sequence"):
+      constant_op.constant(l)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index 269142a7c2..f681cff6cf 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -220,6 +220,7 @@ const char ErrorFoundFloat[] =
       /* Iterate over outer dim, and recursively convert each element. */ \
       const int64 s = shape.dim_size(0);                                  \
       Safe_PyObjectPtr seq = make_safe(PySequence_Fast(obj, ""));         \
+      if (TF_PREDICT_FALSE(seq == nullptr)) return ErrorRectangular;      \
       if (TF_PREDICT_FALSE(s != PySequence_Fast_GET_SIZE(seq.get()))) {   \
         return ErrorRectangular;                                          \
       }                                                                   \
-- 
GitLab


From 2b279edb86bac0057132874329ed07274c2b0cd1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 12:53:55 -0700
Subject: [PATCH 0794/1085] [XLA] More module proto verification added.

PiperOrigin-RevId: 216745236
---
 .../cpu/cpu_hlo_support_checker_test.cc       |  7 +++-
 .../gpu/gpu_hlo_support_checker_test.cc       |  7 +++-
 .../compiler/xla/service/hlo_instruction.cc   |  1 -
 .../compiler/xla/service/hlo_proto_util.cc    |  2 +-
 .../compiler/xla/service/hlo_sharding.cc      |  3 ++
 .../compiler/xla/service/hlo_verifier.cc      | 37 +++++++++++++++----
 .../compiler/xla/service/hlo_verifier.h       |  2 +
 .../compiler/xla/service/layout_assignment.cc |  4 ++
 tensorflow/compiler/xla/service/service.cc    |  9 ++---
 9 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
index be1208fb2d..e6b6fcdf68 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
@@ -57,10 +57,13 @@ TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) {
       HloInstruction::CreateParameter(1, sparse_shape, "param1"));
   builder.AddInstruction(HloInstruction::CreateBinary(
       sparse_shape, HloOpcode::kAdd, param0, param1));
-  auto module = CreateNewModule();
+  // Since verifier is reporting sparse layouts as errors, we should
+  // use a regular HloModule instead of VerifiedHloModule to avoid
+  // verifier errors being triggered in the destructor.
+  auto module = HloTestBase::CreateNewModule();
   module->AddEntryComputation(builder.Build());
 
-  Status status = checker().Run(module).status();
+  Status status = checker().Run(module.get()).status();
   ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
   EXPECT_THAT(status.error_message(),
               HasSubstr("CPU backend does not support"));
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
index 27a4d0b601..7d01eeb025 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
@@ -57,10 +57,13 @@ TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) {
       HloInstruction::CreateParameter(1, sparse_shape, "param1"));
   builder.AddInstruction(HloInstruction::CreateBinary(
       sparse_shape, HloOpcode::kAdd, param0, param1));
-  auto module = CreateNewModule();
+  // Since verifier is reporting sparse layouts as errors, we should
+  // use a regular HloModule instead of VerifiedHloModule to avoid
+  // verifier errors being triggered in the destructor.
+  auto module = HloTestBase::CreateNewModule();
   module->AddEntryComputation(builder.Build());
 
-  Status status = checker().Run(module).status();
+  Status status = checker().Run(module.get()).status();
   ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
   EXPECT_THAT(status.error_message(),
               HasSubstr("GPU backend does not support"));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 306d29a2ae..b6df63c983 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2677,7 +2677,6 @@ Status HloInstruction::AcceptOrdered(
 }
 
 const Shape& HloInstruction::shape() const {
-  TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(shape_));
   return shape_;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index 026a0e8fba..7bb65ae665 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -42,7 +42,7 @@ StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
                       HloModule::CreateFromProto(proto, module_config));
   TF_RETURN_IF_ERROR(
-      HloVerifier(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false)
+      HloVerifier(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false)
           .Run(module.get())
           .status());
   return std::move(module);
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 188f4acc79..70a860c356 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -469,6 +469,9 @@ absl::optional<HloSharding> HloSharding::ExtractSingleSharding() const {
   if (!IsTuple()) {
     return *this;
   }
+  if (tuple_elements_.empty()) {
+    return absl::nullopt;
+  }
   for (int64 i = 1; i < tuple_elements_.size(); ++i) {
     if (tuple_elements_[0] != tuple_elements_[i]) {
       return absl::nullopt;
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index c3289d2b22..ba95cef21d 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -27,6 +27,14 @@ limitations under the License.
 
 namespace xla {
 
+Status ShapeVerifier::Preprocess(HloInstruction* hlo) {
+  if (LayoutUtil::IsSparseArray(hlo->shape())) {
+    return InternalError("Sparse arrays are not yet fully supported: %s",
+                         hlo->ToString());
+  }
+  return Status::OK();
+}
+
 static Status CheckOperandCount(const HloInstruction* hlo, int expected) {
   if (hlo->operand_count() != expected) {
     return InternalError("Expected %d operands for %s instruction: %s",
@@ -286,6 +294,10 @@ Status ShapeVerifier::HandleSort(HloInstruction* sort) {
 
 Status ShapeVerifier::HandleConstant(HloInstruction* constant) {
   TF_RETURN_IF_ERROR(CheckOperandCount(constant, 0));
+  if (!Cast<HloConstantInstruction>(constant)->HasLiteral()) {
+    return InternalError("Constant is required to have a valid literal: %s",
+                         constant->ToString());
+  }
   return CheckShape(constant, constant->literal().shape());
 }
 
@@ -877,14 +889,21 @@ Status VerifyEntryAndExitShapes(const HloModule& module) {
 Status CheckEntryComputationLayout(const HloModule& module) {
   const HloComputation* computation = module.entry_computation();
   const auto& layout = module.entry_computation_layout();
+  const ShapeLayout& result_layout = layout.result_layout();
+
+  if (LayoutUtil::IsSparseArray(result_layout.shape())) {
+    return Unimplemented(
+        "Sparse arrays are not yet fully supported in program result shape: %s",
+        ShapeUtil::HumanStringWithLayout(result_layout.shape()));
+  }
 
   if (!ShapeUtil::Compatible(computation->root_instruction()->shape(),
-                             layout.result_layout().shape())) {
+                             result_layout.shape())) {
     return InternalError(
         "Shape of the root instruction of entry computation (%s) should be "
         "compatible to one specified in module's entry computation layout (%s)",
         ShapeUtil::HumanString(computation->root_instruction()->shape()),
-        ShapeUtil::HumanString(layout.result_layout().shape()));
+        ShapeUtil::HumanString(result_layout.shape()));
   }
 
   if (computation->num_parameters() != layout.parameter_count()) {
@@ -895,15 +914,19 @@ Status CheckEntryComputationLayout(const HloModule& module) {
   }
 
   for (int i = 0; i < computation->num_parameters(); ++i) {
-    if (!ShapeUtil::Compatible(computation->parameter_instruction(i)->shape(),
-                               layout.parameter_shape(i))) {
+    const HloInstruction* parameter = computation->parameter_instruction(i);
+    if (LayoutUtil::IsSparseArray(layout.parameter_shape(i))) {
+      return Unimplemented(
+          "Sparse arrays are not yet fully supported "
+          "in program parameter shape: %s",
+          ShapeUtil::HumanStringWithLayout(layout.parameter_shape(i)));
+    }
+    if (!ShapeUtil::Compatible(parameter->shape(), layout.parameter_shape(i))) {
       return InternalError(
           "Shape of the entry computation parameter %d is %s should be "
           "compatible to the one specified in module's entry computation "
           "layout %s",
-          i,
-          ShapeUtil::HumanString(
-              computation->parameter_instruction(i)->shape()),
+          i, ShapeUtil::HumanString(parameter->shape()),
           ShapeUtil::HumanString(layout.parameter_shape(i)));
     }
   }
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index cb49cb95ba..e1f3402465 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -32,6 +32,8 @@ class ShapeVerifier : public DfsHloVisitor {
       : layout_sensitive_(layout_sensitive),
         allow_mixed_precision_(allow_mixed_precision) {}
 
+  Status Preprocess(HloInstruction* hlo) override;
+
   Status HandleElementwiseUnary(HloInstruction* hlo) override;
   Status HandleElementwiseBinary(HloInstruction* hlo) override;
   Status HandleClamp(HloInstruction* clamp) override;
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index be0351fa6b..232d1dc087 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1523,6 +1523,10 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
     // Execute extra verification step once the layout has been finalized.
     TF_RETURN_IF_ERROR(Verify(instruction));
 
+    // Shape must be valid.
+    TF_RETURN_IF_ERROR(
+        ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+
     // Verify all layouts in the shape have been set.
     TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape()));
   }
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 084df17951..d290c0eb5d 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -345,8 +345,7 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
   for (int64 i = 0; i < module_protos.size(); ++i) {
     const HloModuleProto* proto = module_protos[i];
     const HloModuleConfig& config = *module_configs[i];
-    TF_ASSIGN_OR_RETURN(auto module,
-                        HloModule::CreateFromProto(*proto, config));
+    TF_ASSIGN_OR_RETURN(auto module, CreateModuleFromProto(*proto, config));
     modules.push_back(std::move(module));
   }
 
@@ -810,7 +809,7 @@ StatusOr<std::unique_ptr<Executable>> Service::BuildExecutable(
   }
 
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
-                      HloModule::CreateFromProto(module_proto, *module_config));
+                      CreateModuleFromProto(module_proto, *module_config));
 
   TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*module));
 
@@ -1081,7 +1080,7 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg,
   HloModuleConfig config(program_shape);
 
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
-                      HloModule::CreateFromProto(arg->computation(), config));
+                      CreateModuleFromProto(arg->computation(), config));
 
   HloEvaluator evaluator;
   TF_ASSIGN_OR_RETURN(auto result_literal, evaluator.Evaluate<Literal>(
@@ -1118,7 +1117,7 @@ Status Service::GetComputationGraphStats(
   HloModuleConfig config(arg->computation().program_shape());
   config.set_debug_options(arg->debug_options());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
-                      HloModule::CreateFromProto(arg->computation(), config));
+                      CreateModuleFromProto(arg->computation(), config));
 
   hlo_graph_dumper::MaybeDumpHloModule(*module,
                                        "computation statistics subject");
-- 
GitLab


From 2f509fe594d51c74eb4779c537ae94fd0486aa97 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 12:59:15 -0700
Subject: [PATCH 0795/1085] Convert TensorFlow's highwayhash dependency to new
 third party import method.

PiperOrigin-RevId: 216746022
---
 tensorflow/workspace.bzl                          | 13 ++-----------
 third_party/highwayhash/BUILD                     |  1 +
 .../BUILD.bazel}                                  |  2 +-
 third_party/highwayhash/workspace.bzl             | 15 +++++++++++++++
 4 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 third_party/highwayhash/BUILD
 rename third_party/{highwayhash.BUILD => highwayhash/BUILD.bazel} (100%)
 create mode 100644 third_party/highwayhash/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6229e01bbe..c697bf6452 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -22,6 +22,7 @@ load(
 )
 load("//third_party/aws:workspace.bzl", aws = "repo")
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
+load("//third_party/highwayhash:workspace.bzl", highwayhash = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 load("//third_party/nasm:workspace.bzl", nasm = "repo")
@@ -30,6 +31,7 @@ def initialize_third_party():
     """ Load third party repositories.  See above load() statements. """
     aws()
     flatbuffers()
+    highwayhash()
     icu()
     jpeg()
     nasm()
@@ -229,17 +231,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "highwayhash",
-        build_file = clean_dep("//third_party:highwayhash.BUILD"),
-        sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37",
-        strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968",
-        urls = [
-            "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
-            "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
-        ],
-    )
-
     tf_http_archive(
         name = "png_archive",
         build_file = clean_dep("//third_party:png.BUILD"),
diff --git a/third_party/highwayhash/BUILD b/third_party/highwayhash/BUILD
new file mode 100644
index 0000000000..2f5d02becb
--- /dev/null
+++ b/third_party/highwayhash/BUILD
@@ -0,0 +1 @@
+# Dummy BUILD file to make this directory a package.
diff --git a/third_party/highwayhash.BUILD b/third_party/highwayhash/BUILD.bazel
similarity index 100%
rename from third_party/highwayhash.BUILD
rename to third_party/highwayhash/BUILD.bazel
index 08cb84ea2c..39b148bc00 100644
--- a/third_party/highwayhash.BUILD
+++ b/third_party/highwayhash/BUILD.bazel
@@ -9,8 +9,8 @@ cc_library(
     name = "sip_hash",
     srcs = ["highwayhash/sip_hash.cc"],
     hdrs = [
-        "highwayhash/sip_hash.h",
         "highwayhash/endianess.h",
+        "highwayhash/sip_hash.h",
         "highwayhash/state_helpers.h",
     ],
     visibility = ["//visibility:public"],
diff --git a/third_party/highwayhash/workspace.bzl b/third_party/highwayhash/workspace.bzl
new file mode 100644
index 0000000000..793297b9ba
--- /dev/null
+++ b/third_party/highwayhash/workspace.bzl
@@ -0,0 +1,15 @@
+"""loads the highwayhash library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "highwayhash",
+        urls = [
+            "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
+            "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz",
+        ],
+        sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37",
+        strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968",
+        build_file = "//third_party/highwayhash:BUILD.bazel",
+    )
-- 
GitLab


From 3fd902b26cb09374a19391058ca0b72d997e76b2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 13:02:00 -0700
Subject: [PATCH 0796/1085] Validate the control flow information after
 subgraph rewrite, since the rewrite may remove some special nodes (e.g.
 TPUReplicateMetadata) which break the rule.

PiperOrigin-RevId: 216746513
---
 tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index da27f837e8..da030b3bcc 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -1109,6 +1109,9 @@ Status Encapsulator::Subgraph::BuildFunctionDef(
   function_def_name_ = name;
 
   FunctionDef fdef;
+  // Verify that the graph has well-formed control flow structure.
+  std::vector<ControlFlowInfo> dummy;
+  TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph_.get(), &dummy));
   TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef));
 
   if (VLOG_IS_ON(1)) {
@@ -1531,9 +1534,6 @@ Status Encapsulator::SplitIntoSubgraphs(FunctionLibraryDefinition* library) {
   for (auto& entry : subgraphs_) {
     Subgraph& subgraph = entry.second;
     FixupSourceAndSinkEdges(subgraph.GetGraph());
-    // Verify that the graph has well-formed control flow structure.
-    std::vector<ControlFlowInfo> dummy;
-    TF_RETURN_IF_ERROR(BuildControlFlowInfo(subgraph.GetGraph(), &dummy));
   }
 
   if (VLOG_IS_ON(1)) {
-- 
GitLab


From 43b043148b97460b57481cebcc6dc7a53f53e8c6 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 11 Oct 2018 13:13:23 -0700
Subject: [PATCH 0797/1085] Move ParseHostComputeCore to side_effect_util.cc.
 No functionality change.

PiperOrigin-RevId: 216748472
---
 tensorflow/compiler/tf2xla/BUILD              |  1 +
 .../compiler/tf2xla/side_effect_util.cc       | 25 +++++++++++++++++++
 tensorflow/compiler/tf2xla/side_effect_util.h |  6 +++++
 3 files changed, 32 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 07210dcf37..f0e7791e98 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -662,5 +662,6 @@ cc_library(
     hdrs = ["side_effect_util.h"],
     deps = [
         "//tensorflow/core:core_cpu",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/side_effect_util.cc b/tensorflow/compiler/tf2xla/side_effect_util.cc
index 6cd7b24592..b233e6b2c2 100644
--- a/tensorflow/compiler/tf2xla/side_effect_util.cc
+++ b/tensorflow/compiler/tf2xla/side_effect_util.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/side_effect_util.h"
 
+#include "absl/strings/numbers.h"
 #include "tensorflow/core/graph/algorithm.h"
 
 namespace tensorflow {
@@ -64,4 +65,28 @@ bool HasSideEffectingNodes(const Graph& g) {
   return false;
 }
 
+Status ParseHostComputeCoreList(absl::Span<const string> list_from_attr,
+                                std::map<string, int>* host_compute_core) {
+  for (const auto& hc_core : list_from_attr) {
+    std::vector<string> parts = str_util::Split(hc_core, ":");
+    if (parts.size() != 2) {
+      return errors::InvalidArgument(
+          "Malformed host_compute_core entry ", hc_core,
+          " should be <cluster_name>:<core_number>.");
+    }
+    int core;
+    if (!absl::numbers_internal::safe_strto32_base(parts[1], &core, 10)) {
+      return errors::InvalidArgument("Malformed host_compute_core entry ",
+                                     hc_core,
+                                     " part after ':' should be an integer.");
+    }
+    if (host_compute_core->find(parts[0]) != host_compute_core->end()) {
+      return errors::InvalidArgument(
+          "Duplicate host_compute_core entry for cluster ", parts[0]);
+    }
+    (*host_compute_core)[parts[0]] = core;
+  }
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/side_effect_util.h b/tensorflow/compiler/tf2xla/side_effect_util.h
index ad07624729..f22ddb2f58 100644
--- a/tensorflow/compiler/tf2xla/side_effect_util.h
+++ b/tensorflow/compiler/tf2xla/side_effect_util.h
@@ -42,6 +42,12 @@ std::set<std::string> CalculateTokenInputsForOutputToken(const Graph& g);
 // Returns whether a graph contains side-effecting nodes.
 bool HasSideEffectingNodes(const Graph& g);
 
+// Parse the mapping from outside_compilation_subgraph name to core number,
+// which is specified in an attr as a list of strings
+// <subgraph_name>:<core_index>.
+Status ParseHostComputeCoreList(absl::Span<const string> list_from_attr,
+                                std::map<string, int>* host_compute_core);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_SIDE_EFFECT_UTIL_H_
-- 
GitLab


From fc2b559e6049537109ac7d7956aaddce2dd297d8 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 11 Oct 2018 13:27:02 -0700
Subject: [PATCH 0798/1085] In control flow functionalization, mark generated
 If/While with outside compilation attribute if necessary.

PiperOrigin-RevId: 216750679
---
 tensorflow/compiler/tf2xla/functionalize_cond.cc  | 6 ++++++
 tensorflow/compiler/tf2xla/functionalize_while.cc | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc
index db256e577a..46649b8cc4 100644
--- a/tensorflow/compiler/tf2xla/functionalize_cond.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc
@@ -695,6 +695,12 @@ Status Conditional::BuildIfNode(Graph* graph,
   VLOG(3) << "Build output type: " << DataTypeVectorString(out_type);
 
   builder.Attr("Tcond", DT_BOOL);
+  string outside_compilation;
+  if (GetNodeAttr(predicate_.node->def(), kXlaOutsideCompilationAttrName,
+                  &outside_compilation)
+          .ok()) {
+    builder.Attr(kXlaOutsideCompilationAttrName, outside_compilation);
+  }
   builder.Device(predicate_.node->assigned_device_name());
   // Conditional should be the first input ...
   builder.Input(NodeDefBuilder::NodeOut(predicate_.node->name(),
diff --git a/tensorflow/compiler/tf2xla/functionalize_while.cc b/tensorflow/compiler/tf2xla/functionalize_while.cc
index 7c3ad448ef..d87436a7b4 100644
--- a/tensorflow/compiler/tf2xla/functionalize_while.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_while.cc
@@ -523,6 +523,12 @@ Status FunctionalizeLoop(const FunctionLibraryDefinition* lookup_library,
   builder.Attr("T", arg_types);
   builder.Attr("cond", cond_name);
   builder.Attr("body", body_name);
+  string outside_compilation;
+  if (GetNodeAttr(frame->loop_cond->def(), kXlaOutsideCompilationAttrName,
+                  &outside_compilation)
+          .ok()) {
+    builder.Attr(kXlaOutsideCompilationAttrName, outside_compilation);
+  }
   std::vector<NodeDefBuilder::NodeOut> inputs;
   for (int i = 0; i < frame->args.size(); ++i) {
     const Arg& arg = frame->args[i];
-- 
GitLab


From fa343effdeb59b68be0218dee014d510f4c95597 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 11 Oct 2018 13:46:15 -0700
Subject: [PATCH 0799/1085] [tf.data] Performance modeling fixes.

PiperOrigin-RevId: 216753799
---
 tensorflow/core/framework/model.cc            | 32 +++++++++++++------
 .../core/kernels/data/cache_dataset_ops.cc    | 14 ++++----
 tensorflow/python/data/ops/dataset_ops.py     | 29 ++++++++---------
 3 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index bfdb3a6658..9684b736a7 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -59,9 +59,15 @@ int64 Model::Node::ProcessingTimeLocked() {
       return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs();
     }
     case Type::FILTER: {
+      if (inputs_.size() <= 1) {
+        return NanosPerElementLocked();
+      }
       std::shared_ptr<Node> input = inputs_.front();
-      double ratio = static_cast<double>(input->num_elements()) /
-                     static_cast<double>(num_elements_);
+      double ratio = 0.0L;
+      if (num_elements_ > 0) {
+        ratio = static_cast<double>(input->num_elements()) /
+                static_cast<double>(num_elements_);
+      }
       return NanosPerElementLocked() +
              static_cast<int64>(ratio *
                                 static_cast<double>(ProcessingTimeForInputs()));
@@ -115,15 +121,21 @@ int64 Model::Node::OutputTimeLocked(std::vector<int64>* input_times) {
              batch_size * OutputTimeForInputs(input_times);
     }
     case Type::FILTER: {
+      if (inputs_.size() <= 1) {
+        return NanosPerElementLocked();
+      }
       std::shared_ptr<Node> input = inputs_.front();
-      int64 old_value = (*input_times)[input_times->size() - 1];
-      double ratio = static_cast<double>(input->num_elements()) /
-                     static_cast<double>(num_elements_);
-      (*input_times)[input_times->size() - 1] = static_cast<int64>(
-          static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
-      auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
-        (*input_times)[input_times->size() - 1] = old_value;
-      });
+      double ratio = 0.0L;
+      if (num_elements_ > 0) {
+        ratio = static_cast<double>(input->num_elements()) /
+                static_cast<double>(num_elements_);
+        int64 old_value = (*input_times)[input_times->size() - 1];
+        (*input_times)[input_times->size() - 1] = static_cast<int64>(
+            static_cast<double>(old_value + NanosPerElementLocked()) / ratio);
+        auto cleanup = gtl::MakeCleanup([input_times, old_value]() {
+          (*input_times)[input_times->size() - 1] = old_value;
+        });
+      }
       return NanosPerElementLocked() +
              static_cast<int64>(
                  static_cast<double>(OutputTimeForInputs(input_times)) * ratio);
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index 34c6c86538..f2419db3dc 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -516,10 +516,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
         // `FileReaderIterator` and seek to the `cur_index`.
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(new FileReaderIterator({dataset(), prefix()}));
+            iterator_.reset(new FileReaderIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}));
             break;
           case Mode::write:
-            iterator_.reset(new FileWriterIterator({dataset(), prefix()}));
+            iterator_.reset(new FileWriterIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}));
         }
       }
 
@@ -866,12 +868,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
       void InitializeIterator() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         switch (mode_) {
           case Mode::read:
-            iterator_.reset(
-                new MemoryReaderIterator({dataset(), prefix()}, cache_));
+            iterator_.reset(new MemoryReaderIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
             break;
           case Mode::write:
-            iterator_.reset(
-                new MemoryWriterIterator({dataset(), prefix()}, cache_));
+            iterator_.reset(new MemoryWriterIterator(
+                {dataset(), strings::StrCat(prefix(), "Impl")}, cache_));
         }
       }
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index cdb883cac9..c0b5027e73 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -99,6 +99,16 @@ class Dataset(object):
         return options
     return Options()
 
+  def _apply_options(self):
+    dataset = self
+    options = self.options()
+    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
+    if static_optimizations:
+      dataset = _OptimizeDataset(dataset, static_optimizations)
+    if options.experimental_autotune:
+      dataset = _ModelDataset(dataset)
+    return dataset
+
   def make_initializable_iterator(self, shared_name=None):
     """Creates an `Iterator` for enumerating the elements of this dataset.
 
@@ -127,13 +137,7 @@ class Dataset(object):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
-    dataset = self
-    options = self.options()
-    static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
-    if static_optimizations:
-      dataset = _OptimizeDataset(dataset, static_optimizations)
-    if options.experimental_autotune:
-      dataset = _ModelDataset(dataset)
+    dataset = self._apply_options()
     if shared_name is None:
       shared_name = ""
     if compat.forward_compatible(2018, 8, 3):
@@ -163,7 +167,8 @@ class Dataset(object):
       RuntimeError: If eager execution is not enabled.
     """
     if context.executing_eagerly():
-      return iterator_ops.EagerIterator(self)
+      dataset = self._apply_options()
+      return iterator_ops.EagerIterator(dataset)
     else:
       raise RuntimeError("dataset.__iter__() is only supported when eager "
                          "execution is enabled.")
@@ -194,13 +199,7 @@ class Dataset(object):
         core_random_seed.set_random_seed(
             (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1))
 
-      dataset = self
-      options = self.options()
-      static_optimizations = options._static_optimizations()  # pylint: disable=protected-access
-      if static_optimizations:
-        dataset = _OptimizeDataset(dataset, static_optimizations)
-      if options.experimental_autotune:
-        dataset = _ModelDataset(dataset)
+      dataset = self._apply_options()
       return dataset._as_variant_tensor()  # pylint: disable=protected-access
 
     try:
-- 
GitLab


From 5fede0de07da90d466082c2f552399078ed4f2c0 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 11 Oct 2018 13:47:11 -0700
Subject: [PATCH 0800/1085] Make sure the ops are not created before TF fully
 initializes.

PiperOrigin-RevId: 216753969
---
 .../python/kernel_tests/linalg_ops_test.py    | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index ccb3feeaf6..28391aaa87 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -155,17 +155,21 @@ class EyeTest(parameterized.TestCase, test.TestCase):
             num_rows=2, num_columns=3, batch_shape=batch_shape).shape)
 
   @parameterized.named_parameters(
-      ("DynamicRow", array_ops.placeholder_with_default(2, shape=None), None),
+      ("DynamicRow",
+       lambda: array_ops.placeholder_with_default(2, shape=None),
+       lambda: None),
       ("DynamicRowStaticColumn",
-       array_ops.placeholder_with_default(2, shape=None),
-       3),
+       lambda: array_ops.placeholder_with_default(2, shape=None),
+       lambda: 3),
       ("StaticRowDynamicColumn",
-       2,
-       array_ops.placeholder_with_default(3, shape=None)),
+       lambda: 2,
+       lambda: array_ops.placeholder_with_default(3, shape=None)),
       ("DynamicRowDynamicColumn",
-       array_ops.placeholder_with_default(2, shape=None),
-       array_ops.placeholder_with_default(3, shape=None)))
-  def testShapeInferenceStaticBatchWith(self, num_rows, num_columns):
+       lambda: array_ops.placeholder_with_default(2, shape=None),
+       lambda: array_ops.placeholder_with_default(3, shape=None)))
+  def testShapeInferenceStaticBatchWith(self, num_rows_fn, num_columns_fn):
+    num_rows = num_rows_fn()
+    num_columns = num_columns_fn()
     batch_shape = (2, 3)
     identity_matrix = linalg_ops.eye(
         num_rows=num_rows,
-- 
GitLab


From 6a1f3348dc0657690a4bd48560c39ab6e915b16f Mon Sep 17 00:00:00 2001
From: Richard Wei <rxwei@google.com>
Date: Thu, 11 Oct 2018 13:49:53 -0700
Subject: [PATCH 0801/1085] Fix typo

PiperOrigin-RevId: 216754413
---
 tensorflow/contrib/lite/kernels/test_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index 84deb0e0e8..670120219f 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -76,7 +76,7 @@ inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
 // A helper struct to construct test tensors. This is particularly useful for
 // quantized tensor which must have their scale and zero_point defined before
 // the actual data is known. This mimics what happens in practice: quantization
-// parameters are calculate during training.
+// parameters are calculated during training.
 struct TensorData {
   TensorType type;
   std::vector<int> shape;
-- 
GitLab


From 397c2c5d4f083ee75b5ae02152fe9fd3dd016c65 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Thu, 11 Oct 2018 13:50:39 -0700
Subject: [PATCH 0802/1085] [XLA] Expose multi-value sort through the
 XlaBuilder interface, and add Evaluator support.

Not exposed through the other interfaces yet, e.g. the local computation builder, or the XlaKeyValueSort TF op.

PiperOrigin-RevId: 216754567
---
 .../compiler/tf2xla/kernels/permute_op.cc     |  2 +-
 .../compiler/tf2xla/kernels/random_ops.cc     |  2 +-
 .../compiler/tf2xla/kernels/sort_ops.cc       |  2 +-
 tensorflow/compiler/xla/client/lib/sorting.cc |  2 +-
 tensorflow/compiler/xla/client/xla_builder.cc | 22 +++++++--------
 tensorflow/compiler/xla/client/xla_builder.h  | 19 +++++++------
 .../xla/python/local_computation_builder.cc   |  4 +--
 .../compiler/xla/service/hlo_evaluator.cc     | 28 +++++++++++++------
 8 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
index 0764e5503d..3ca5eecf1a 100644
--- a/tensorflow/compiler/tf2xla/kernels/permute_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
@@ -78,7 +78,7 @@ class DataFormatVecPermuteOp : public XlaOpKernel {
       keys = xla::BroadcastInDim(
           keys, xla::ShapeUtil::MakeShape(xla::S32, {4, 2}), {0});
     }
-    auto sorted = xla::Sort(keys, ctx->Input(0), 0);
+    auto sorted = xla::Sort(keys, {ctx->Input(0)}, 0);
     auto output = xla::GetTupleElement(sorted, 1);
     ctx->SetOutput(0, output);
   }
diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
index afd5986846..7ef6fa305b 100644
--- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
@@ -135,7 +135,7 @@ class RandomShuffleOp : public XlaOpKernel {
       xla::XlaOp curr = input;
       for (int i = 0; i < rounds; ++i) {
         xla::XlaOp keys = xla::RngUniform(zero, max_value, key_shape);
-        xla::XlaOp sorted = xla::Sort(keys, curr);
+        xla::XlaOp sorted = xla::Sort(keys, {curr});
         curr = xla::GetTupleElement(sorted, 1);
       }
 
diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
index 45f03d8c21..6cfdf4a5ae 100644
--- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
@@ -38,7 +38,7 @@ class XlaKeyValueSortOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* context) override {
     xla::XlaOp result =
-        xla::Sort(context->Input("keys"), context->Input("values"));
+        xla::Sort(context->Input("keys"), {context->Input("values")});
     context->SetOutput(0, xla::GetTupleElement(result, 0));
     context->SetOutput(1, xla::GetTupleElement(result, 1));
   }
diff --git a/tensorflow/compiler/xla/client/lib/sorting.cc b/tensorflow/compiler/xla/client/lib/sorting.cc
index a904be259a..0475fd9c94 100644
--- a/tensorflow/compiler/xla/client/lib/sorting.cc
+++ b/tensorflow/compiler/xla/client/lib/sorting.cc
@@ -29,7 +29,7 @@ XlaOp TopK(XlaOp input, int64 k) {
     auto input_dims = input_shape.dimensions();
     std::vector<int64> broadcast_dims(input_dims.begin(), input_dims.end() - 1);
     XlaOp broadcast_s32 = Broadcast(iota_s32, broadcast_dims);
-    XlaOp sort_result = Sort(Neg(input), broadcast_s32);
+    XlaOp sort_result = Sort(Neg(input), {broadcast_s32});
     std::vector<int64> start_indices(input_shape.dimensions_size(), 0);
     std::vector<int64> limit_indices(input_dims.begin(), input_dims.end());
     limit_indices[last_dim] = k;
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index e7cf9ae363..ebfd9ac4e4 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1494,18 +1494,17 @@ XlaOp XlaBuilder::Rev(const XlaOp& operand,
   });
 }
 
-XlaOp XlaBuilder::Sort(XlaOp keys, absl::optional<XlaOp> values,
+XlaOp XlaBuilder::Sort(const XlaOp& keys, absl::Span<const XlaOp> values,
                        int64 dimension) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     std::vector<const Shape*> operand_shape_ptrs;
     TF_ASSIGN_OR_RETURN(const Shape& keys_shape, GetShape(keys));
     operand_shape_ptrs.push_back(&keys_shape);
-    Shape values_shape;
-    if (values.has_value()) {
-      TF_ASSIGN_OR_RETURN(values_shape, GetShape(*values));
-      operand_shape_ptrs.push_back(&values_shape);
-    }
+    TF_ASSIGN_OR_RETURN(std::vector<Shape> values_shapes,
+                        GetOperandShapes(values));
+    absl::c_transform(values_shapes, std::back_inserter(operand_shape_ptrs),
+                      [](const Shape& shape) { return &shape; });
     TF_ASSIGN_OR_RETURN(*instr.mutable_shape(),
                         ShapeInference::InferVariadicOpShape(
                             HloOpcode::kSort, operand_shape_ptrs));
@@ -1514,10 +1513,9 @@ XlaOp XlaBuilder::Sort(XlaOp keys, absl::optional<XlaOp> values,
       dimension = ShapeUtil::Rank(keys_shape) - 1;
     }
     instr.add_dimensions(dimension);
-    return values.has_value()
-               ? AddInstruction(std::move(instr), HloOpcode::kSort,
-                                {keys, *values})
-               : AddInstruction(std::move(instr), HloOpcode::kSort, {keys});
+    std::vector<XlaOp> operands{keys};
+    operands.insert(operands.end(), values.begin(), values.end());
+    return AddInstruction(std::move(instr), HloOpcode::kSort, operands);
   });
 }
 
@@ -2954,8 +2952,8 @@ XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions) {
   return operand.builder()->Rev(operand, dimensions);
 }
 
-XlaOp Sort(XlaOp keys, absl::optional<XlaOp> values, int64 dimension) {
-  return keys.builder()->Sort(keys, std::move(values), dimension);
+XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values, int64 dimension) {
+  return keys.builder()->Sort(keys, values, dimension);
 }
 
 XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) {
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 2916ac1b2a..5747661c34 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -834,12 +834,12 @@ class XlaBuilder {
   // the last dimension is chosen by default.
   //
   // If both keys and values are provided:
-  // * The keys and the values must tensors with the same dimensions. The
+  // * The keys and all values must be tensors with the same dimensions. The
   // element types of the tensors may be different.
   // * The result is a tuple that consists of a sorted tensor of keys (along the
-  // provided dimension, as above) as the first element, and a tensor with their
-  // corresponding values as the second element.
-  XlaOp Sort(XlaOp keys, absl::optional<XlaOp> values = absl::nullopt,
+  // provided dimension, as above) as the first element, and tensors with their
+  // corresponding values as the other elements.
+  XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values = {},
              int64 dimension = -1);
 
   // Enqueues a clamp instruction onto the computation.
@@ -1311,7 +1311,8 @@ class XlaBuilder {
   friend XlaOp Transpose(const XlaOp& operand,
                          absl::Span<const int64> permutation);
   friend XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
-  friend XlaOp Sort(XlaOp keys, absl::optional<XlaOp> values, int64 dimension);
+  friend XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values,
+                    int64 dimension);
   friend XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max);
   friend XlaOp Map(XlaBuilder* builder, absl::Span<const XlaOp> operands,
                    const XlaComputation& computation,
@@ -2002,12 +2003,12 @@ XlaOp Rev(const XlaOp& operand, absl::Span<const int64> dimensions);
 // the last dimension is chosen by default.
 //
 // If both keys and values are provided:
-// * The keys and the values must be tensors with the same dimensions. The
+// * The keys and all values must be tensors with the same dimensions. The
 // element types of the tensors may be different.
 // * The result is a tuple that consists of a sorted tensor of keys (along the
-// provided dimension, as above) as the first element, and a tensor with their
-// corresponding values as the second element.
-XlaOp Sort(XlaOp keys, absl::optional<XlaOp> values = absl::nullopt,
+// provided dimension, as above) as the first element, and tensors with their
+// corresponding values as the other elements.
+XlaOp Sort(const XlaOp& keys, absl::Span<const XlaOp> values = {},
            int64 dimension = -1);
 
 // Enqueues a clamp instruction onto the computation.
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index ffa336f304..92df404b8e 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -572,13 +572,13 @@ StatusOr<bool> LocalComputationBuilder::IsConstant(const LocalOp& operand) {
 }
 
 LocalOp LocalComputationBuilder::Sort(const LocalOp& operand, int64 dimension) {
-  return xla::Sort(operand.op(), absl::nullopt, dimension);
+  return xla::Sort(operand.op(), {}, dimension);
 }
 
 LocalOp LocalComputationBuilder::SortKeyVal(const LocalOp& keys,
                                             const LocalOp& values,
                                             int64 dimension) {
-  return xla::Sort(keys.op(), values.op(), dimension);
+  return xla::Sort(keys.op(), {values.op()}, dimension);
 }
 
 StatusOr<LocalComputation*> LocalComputationBuilder::BuildConstantSubGraph(
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index bb6806dc0b..c299888385 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1234,7 +1234,7 @@ StatusOr<Literal> EvaluateSortInternal(HloInstruction* sort,
   TF_RET_CHECK(
       ShapeUtil::SameDimensions(keys_literal.shape(), values_literal.shape()))
       << "Sort keys and values must have the same dimensions";
-  TF_RET_CHECK(sort->operand_count() == 2) << "Expected key-value sort";
+  TF_RET_CHECK(sort->operand_count() >= 2) << "Expected key-value sort";
   // We need to sort an array of keys and an array of values, where the
   // sorted order of the values is determined by the keys. The simplest(?)
   // way to do this is to go to an array-of-pairs representation, sort the
@@ -1323,7 +1323,7 @@ template <typename KeyType>
 StatusOr<Literal> EvaluateSortCurried(HloInstruction* sort,
                                       const Literal& keys_literal,
                                       const Literal& values_literal) {
-  switch (sort->operand(1)->shape().element_type()) {
+  switch (values_literal.shape().element_type()) {
     case PRED:
       return EvaluateSortInternal<KeyType, bool>(sort, keys_literal,
                                                  values_literal);
@@ -1366,14 +1366,24 @@ Status HloEvaluator::HandleSort(HloInstruction* sort) {
   if (!ShapeUtil::IsTuple(sort->shape())) {
     return DefaultAction(sort);
   } else {
-    auto result = EvaluateSort(sort, GetEvaluatedLiteralFor(sort->operand(0)),
-                               GetEvaluatedLiteralFor(sort->operand(1)));
-    if (result.ok()) {
-      evaluated_[sort] = std::move(result.ValueOrDie());
-      return Status::OK();
-    } else {
-      return result.status();
+    // This is a really stupid work-around for the fact it's hard to support a
+    // multi-value sort directly, due to the fact we need to template the
+    // evaluation function on all of the value types.
+    std::vector<Literal> sort_results_backing;
+    for (int64 i = 0; i < sort->operand_count(); ++i) {
+      auto result = EvaluateSort(sort, GetEvaluatedLiteralFor(sort->operand(0)),
+                                 GetEvaluatedLiteralFor(sort->operand(i)));
+      if (!result.ok()) {
+        return result.status();
+      }
+      sort_results_backing.push_back(
+          std::move(result.ValueOrDie().DecomposeTuple()[1]));
     }
+    std::vector<const Literal*> sort_results;
+    absl::c_transform(sort_results_backing, std::back_inserter(sort_results),
+                      [](const Literal& literal) { return &literal; });
+    evaluated_[sort] = LiteralUtil::MakeTuple(sort_results);
+    return Status::OK();
   }
 }
 
-- 
GitLab


From 879a5020f0b05026951d463bad47c00d94da6879 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 11 Oct 2018 14:06:31 -0700
Subject: [PATCH 0803/1085] Add the logging for the Google internal test

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 23587a2d00..f12e376ddc 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -258,6 +258,9 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           const string& current_dir = current_path.first;
           std::vector<string> children;
           Status s = fs->GetChildren(current_dir, &children);
+          std::cout << "Children Num: " << children.size()
+                    << "; Status: " << s.ToString()
+                    << "; Current dir: " << current_dir << std::endl;
           ret.Update(s);
 
           // If GetChildren() fails, continue the next search.
-- 
GitLab


From 6a81e4414ba4a0a1fa77791f7f1aa647b1b8ac10 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 11 Oct 2018 13:51:51 -0700
Subject: [PATCH 0804/1085] Make predicted and output types match, so equal op
 can run.

This will allow https://www.tensorflow.org/tutorials/keras/basic_classification to run in eager mode.

PiperOrigin-RevId: 216754760
---
 tensorflow/python/keras/metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 920eaf5596..91a48acbd5 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -666,10 +666,10 @@ def sparse_categorical_accuracy(y_true, y_pred):
     y_true = array_ops.squeeze(y_true, [-1])
   y_pred = math_ops.argmax(y_pred, axis=-1)
 
-  # If the expected labels are float, we need to cast the int returned by
-  # argmax to compare.
-  if K.dtype(y_true) == K.floatx():
-    y_pred = math_ops.cast(y_pred, K.floatx())
+  # If the predicted output and actual output types don't match, force cast them
+  # to match.
+  if K.dtype(y_pred) != K.dtype(y_true):
+    y_pred = math_ops.cast(y_pred, K.dtype(y_true))
 
   return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
 
-- 
GitLab


From 3f750304649c5928638e13dc96380ab5c1005f0e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 13:54:12 -0700
Subject: [PATCH 0805/1085] Remove 'not trainable' int types from NCCL gradient
 tests.

PiperOrigin-RevId: 216755268
---
 tensorflow/contrib/nccl/python/ops/nccl_ops_test.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
index 423a8689ae..a39efedaf0 100644
--- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
+++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from functools import partial
+import os
 import numpy as np
 
 from tensorflow.contrib import nccl
@@ -60,6 +61,7 @@ class NcclTestCase(test.TestCase):
   def _Test(self,
             nccl_reduce,
             numpy_fn,
+            dtypes=[np.float16, np.float32, np.int32, np.int64, np.float64],
             device_sets=(['/device:GPU:1', '/device:GPU:2', '/device:GPU:0'],
                          ['/device:GPU:1', '/device:GPU:0'])):
     """Tests that nccl_reduce does the same as reduction with numpy_fn.
@@ -72,7 +74,10 @@ class NcclTestCase(test.TestCase):
           two.
       device_sets: Tuple of virtual devices to run test on.
     """
-    for dtype in [np.float16, np.float32, np.int32, np.int64, np.float64]:
+    # Enable NCCL printouts.
+    os.environ["NCCL_DEBUG"] = "INFO"
+
+    for dtype in dtypes:
       # Create session inside outer loop to test use of
       # same communicator across multiple sessions.
       with self.test_session(use_gpu=True) as sess:
@@ -124,7 +129,8 @@ class NcclTestCase(test.TestCase):
           reduce_tensors, inputs, losses, colocate_gradients_with_ops=True)
       return [g for g in grads if g is not None]
 
-    self._Test(_Gradient, numpy_fn)
+    # int types are considered not 'trainable' and no gradients are generated.
+    self._Test(_Gradient, numpy_fn, dtypes=[np.float16, np.float32, np.float64])
 
 
 class AllReduceTest(NcclTestCase):
-- 
GitLab


From 6437ceef99796e8c2694b5e91f42d14351e4c435 Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Thu, 11 Oct 2018 13:58:28 -0700
Subject: [PATCH 0806/1085] Calculate batch size correctly when create a
 Dataset from numpy inputs. Also add a warning message when we don't process
 all the input samples when we create a Dataset object from input numpy
 arrays.

PiperOrigin-RevId: 216755953
---
 .../contrib/distribute/python/keras_test.py   | 77 +++++++++++++++++++
 .../engine/distributed_training_utils.py      | 27 +++++++
 tensorflow/python/keras/engine/training.py    |  8 +-
 .../keras/engine/training_distributed.py      |  4 +-
 4 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 6553642ad3..dfa3891289 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -367,6 +367,83 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
       # Verify that the numpy value is copied to the variable.
       self.assertAllEqual(x, val)
 
+  def test_calculating_batch_params(self):
+    # This verifies that we calculate the number of steps when the batch size
+    # is specified.
+    with self.cached_session():
+      # 64 is the number of input samples.
+      inputs = np.zeros((64, 3), dtype=np.float32)
+      # The number of towers is equal to 3.
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0',
+                                                     '/device:GPU:1'])
+
+      with self.assertRaisesRegexp(ValueError, 'Please specify a batch_size '
+                                               'that is smaller than'):
+        # The batch size(128) is larger than the number of input
+        # samples(64).
+        distributed_training_utils.get_input_batch_params(inputs,
+                                                          128,
+                                                          strategy)
+
+      with self.assertRaisesRegexp(ValueError, 'is smaller than the number '
+                                               'of towers'):
+        # The batch size(32) * num_towers(3) is 96 which is greater than the
+        # number of input samples(64).
+        distributed_training_utils.get_input_batch_params(inputs,
+                                                          32,
+                                                          strategy)
+
+      # The number of towers now is equal to 2.
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      # 32 is the batch size per tower.
+      steps = distributed_training_utils.get_input_batch_params(inputs,
+                                                                32,
+                                                                strategy)
+      # The number of batches is the ratio of input samples(64) to
+      # batch size(32) which is 2. The number of steps(1) is the ratio of
+      # number of batches(2) to the number of towers(2).
+      self.assertEqual(steps, 1)
+
+      # 16 is the batch size per tower.
+      steps = distributed_training_utils.get_input_batch_params(inputs,
+                                                                16,
+                                                                strategy)
+      # The number of batches is the ratio of input samples(64) to
+      # batch size(16) which is 4. The number of steps(2) is the ratio of
+      # number of batches(4) to the number of towers(2).
+      self.assertEqual(steps, 2)
+
+  def test_calculating_batch_size(self):
+    with self.cached_session():
+      # 64 is the number of input samples.
+      inputs = np.zeros((64, 3), dtype=np.float32)
+      targets = np.zeros((64, 4), dtype=np.float32)
+
+      model = get_model()
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
+                                                     '/device:CPU:0'])
+      strategy._require_static_shapes = True
+
+      model.compile(optimizer, loss, distribute=strategy)
+      iterator = model._distribution_standardize_user_data(inputs,
+                                                           targets,
+                                                           batch_size=None,
+                                                           check_steps=True,
+                                                           steps_name='steps',
+                                                           steps=3)
+
+      # The global batch size(21) across all towers is the ratio of the input
+      # samples(64) to the steps(3).
+      # The batch size(10) per device is the ratio of the global batch size(21)
+      # to the number of towers(2).
+      # The global batch size and batch size are rounded integer values.
+      self.assertEqual(10, distributed_training_utils.get_batch_dimension(
+          iterator._iterator))
+
   @combinations.generate(strategy_combinations())
   def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index 050602868a..3c65e5ab0e 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -380,6 +380,33 @@ def get_batch_dimension(iterator):
   return dims[0] if dims else None
 
 
+def get_batch_size(num_towers, num_samples, steps):
+  """Calculate and return batch size for numpy inputs.
+
+  Args:
+    num_towers: Number of devices over which the model input is distributed.
+    num_samples: Total number of input samples in the input numpy arrays.
+    steps: Number of steps that we run the model for.
+
+  Returns:
+    batch size used to create the Dataset object from the input numpy arrays.
+
+  """
+  if num_samples % steps != 0:
+    logging.warning('The number of input samples %d is not evenly '
+                    'divisible by the number of steps %d. '
+                    'Some samples will not be processed as expected.' %
+                    (num_samples, steps))
+  global_batch_size = num_samples // steps
+  if global_batch_size % num_towers != 0:
+    logging.warning('The total number of batches per step %d is not evenly '
+                    'divisible by the number of towers %d used in '
+                    'DistributionStrategy. Some samples will not be processed '
+                    'as expected.' %
+                    (global_batch_size, num_towers))
+  return global_batch_size // num_towers
+
+
 def get_cpu_device(distribution_strategy):
   """Returns the CPU device of the TPU host or the default CPU device string.
 
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index ff2ae54ad4..3cb9316399 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -815,7 +815,8 @@ class Model(Network):
     if isinstance(first_x_value, np.ndarray):
       x_shape = first_x_value.shape
       if batch_size is None:
-        batch_size = x_shape[0] // steps
+        batch_size = distributed_training_utils.get_batch_size(
+            self._distribution_strategy.num_towers, x_shape[0], steps)
       # We need to use the drop_remainder argument to allow for a static
       # input shape which is required for TPUs.
       drop_remainder = self._distribution_strategy.require_static_shapes
@@ -833,10 +834,7 @@ class Model(Network):
         y = None
       else:
         # This case is for the predict call where the dataset only contains
-        # inputs and no targets i.e it does not return a tuple.
-        # TODO(anjalisridhar): Raise an error if we are not able to process
-        # all the predict samples. This can happen if the number of batches is
-        # not evenly divisible by the number of worker devices.
+        # inputs and no targets, i.e. it does not return a tuple
         var_x = distributed_training_utils.get_var_for_numpy(
             self._distribution_strategy, x)
         x = dataset_ops.Dataset.from_tensor_slices(var_x)
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index ac759ef3aa..d99a1cf075 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -20,13 +20,13 @@ from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks as cbks
+from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import distributed_training_utils
-from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras.utils.generic_utils import Progbar
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variable_scope
-- 
GitLab


From df9a5f92616411697bd23cd19d0a685f7cdfce05 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 11 Oct 2018 14:14:13 -0700
Subject: [PATCH 0807/1085] Remove GlobalKernelRegistry (returning void*) from
 the op kernel API.

This was used with RegisterKernels which was removed now.

PiperOrigin-RevId: 216758675
---
 tensorflow/core/framework/op_kernel.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 4bbd6c3d7d..3b1f57a457 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -1304,8 +1304,6 @@ class Name : public KernelDefBuilder {
             return new __VA_ARGS__(context);                             \
           });
 
-void* GlobalKernelRegistry();
-
 // If node_def has a corresponding kernel registered on device_type,
 // returns OK and fill in the kernel def and kernel_class_name. <def> and
 // <kernel_class_name> may be null.
-- 
GitLab


From a56e03673153fea350b6cbc3af07701877d982e6 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 11 Oct 2018 14:20:49 -0700
Subject: [PATCH 0808/1085] [tf.data vectorization] Add vectorizer for
 "Reshape"

PiperOrigin-RevId: 216759791
---
 .../optimizers/data/vectorization/BUILD       |   8 ++
 .../data/vectorization/reshape_vectorizer.cc  |  80 ++++++++++++++
 .../data/vectorization_utils_test.cc          | 100 ++++++++++++++++--
 .../optimization/map_vectorization_test.py    |  73 +++++++++----
 4 files changed, 235 insertions(+), 26 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/reshape_vectorizer.cc

diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index cf84ac710a..60f1df1c52 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -75,6 +75,13 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "reshape_vectorizer",
+    srcs = ["reshape_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
 cc_library(
     name = "unpack_vectorizer",
     srcs = ["unpack_vectorizer.cc"],
@@ -89,6 +96,7 @@ cc_library(
     deps = [
         ":add_vectorizer",
         ":cast_vectorizer",
+        ":reshape_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
         ":vectorizer_registry",
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/reshape_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/reshape_vectorizer.cc
new file mode 100644
index 0000000000..dfb855ffa5
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/reshape_vectorizer.cc
@@ -0,0 +1,80 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/math_ops.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+const char* const kReshapePrefix = "vectorized/reshape";
+
+// The vectorized shape should be the original shape with an additional leading
+// dimension that is the same as the leading dimension of the stacked
+// input tensor.
+Output GetVectorizedShape(Scope* s, Output tensor, Output original_shape) {
+  Output const_vec_1 = ops::Const(*s, {1});
+  Output shape = ops::Shape(*s, tensor);
+
+  // shape[:1]
+  Output dim_0 =
+      ops::StridedSlice(*s, shape, const_vec_1, const_vec_1, const_vec_1,
+                        ops::StridedSlice::Attrs().BeginMask(1));
+
+  // tf.concat([dim_0, original], 0)
+  return ops::Concat(*s, {dim_0, original_shape}, ops::Const(*s, 0));
+}
+
+class ReshapeVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (!inputs[0].stacked || inputs[1].stacked) {
+      return errors::InvalidArgument(
+          "Expecting input 0 (`tensor`) to be stacked and input 1 (`shape`) to "
+          "be unstacked.");
+    }
+
+    Status status;
+    Scope parent = NewInternalScope(outer_scope, &status, nullptr);
+    Scope s = parent.NewSubScope(kReshapePrefix);
+
+    Output tensor = {inputs[0].node, inputs[0].output_index};
+    Output vectorized_reshape =
+        ops::Reshape(s, tensor,
+                     GetVectorizedShape(
+                         &s, tensor, {inputs[1].node, inputs[1].output_index}));
+
+    TF_RETURN_IF_ERROR(status);
+
+    // Add output mappings
+    outputs->push_back({vectorized_reshape.node(), 0, true});
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("Reshape", ReshapeVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index be498d150b..767f61226b 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -1007,14 +1007,98 @@ TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
       !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
 }
 
-// TODO(rachelim): More test cases when we get around to implementing them:
-// [] A badly defined converter, e.g. doesn't produce nodes that have the
-//    same number of outputs/inputs as the nodes to be converted
-// [] Converter where the 'converted' form has multiple nodes.
-// [] Case with dependent nodes, e.g. ops with const inputs that are
-//    broadcasted.
-// [] Python-side tests to actually run the functions to make sure
-//    they work.
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------------------+
+// |               +---+--+                     |
+// |                   |                        |
+// |               +---v--+                     |
+// |   +-----------+ Arg0 +-----------------+   |
+// |   |           +---+--+                 |   |
+// |   |               |                    |   |
+// |   |               |                    |   |
+// |   |               |   (3,3,3)          |   |
+// |   |               |   +-----+          |   |
+// |   |               |   |Const|          |   |
+// |   |               |   +--+--+          |   |
+// |   |               |      |             |   |
+// |   |               | +----+             |   |
+// |   |           +---v-v-+                |   |
+// |   |           |Reshape|                |   |
+// |   |           +---+---+                |   |
+// |   |               |                    |   |
+// |   | MapDefun  +---v--+                 |   |
+// |   +-----------+ Ret0 +-----------------+   |
+// |               +---+--+                     |
+// |                   |                        |
+// |               +---v--+                     |
+// +---------------+ Ret0 +---------------------+
+//                 +------+
+//
+//
+//  After:
+//
+//           +------+
+// +---------+ Arg0 +------------------------+
+// |         +---+--+                        |
+// |             |                           |
+// |             |                           |
+// |             |     +-----+               |
+// |             |     |Const|               |
+// |             |     +--+--+               |
+// |             |        |                  |
+// |             |    +---v---+              |
+// |             |    |Concat*|              |
+// |             |    +---+---+              |
+// |             |        |                  |
+// |             | +------+                  |
+// |             | |                         |
+// |         +---v-v-+                       |
+// |         |Reshape|                       |
+// |         +---+---+                       |
+// |             |                           |
+// |         +---v--+                        |
+// +---------+ Ret0 +------------------------+
+//           +------+
+//
+// (Where Concat* appends the 0th dim of the input to the new shape)
+//
+TEST(VectorizeMapDefunTest, VectorizeReshape) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
+      {/* nodes */ FunctionDefHelper::Const("Const",
+                                            gtl::ArraySlice<int>({3, 3, 3})),
+       {{"Reshape"},
+        "Reshape",
+        {"arg0", "Const:output:0"},
+        {{"T", DT_INT32}, {"Tshape", DT_INT32}}}},
+      {{"ret0", "Reshape:output:0"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
+      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  EXPECT_TRUE(
+      function_utils::ContainsFunctionNodeWithOp("Reshape", *vectorized));
+  auto reshape_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Reshape", *vectorized));
+  EXPECT_EQ(GetRetval(*vectorized, 0),
+            strings::StrCat(reshape_node.name(), ":output:0"));
+}
 
 }  // namespace
 }  // namespace vectorization_utils
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index d1d6cf28ab..bcceab60bb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -36,6 +37,36 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+def _generate_optimization_test_cases():
+
+  def base_dataset_factory():
+    return dataset_ops.Dataset.from_tensors(np.random.rand(10, 3)).repeat(5)
+
+  rand_val = np.random.rand(1, 1, 1, 1, 1, 1)
+
+  test_cases = [
+      ("Basic", lambda x: (x, x + 1), base_dataset_factory),
+      ("Const", lambda x: 2, base_dataset_factory),
+      # Math ops exercise broadcasting capabilities
+      ("Add", lambda x: x + rand_val, base_dataset_factory),
+      ("Cast", lambda x: math_ops.cast(x, dtypes.float64),
+       base_dataset_factory),
+      ("Reshape", lambda x: array_ops.gather(x, 0), base_dataset_factory),
+      ("Unpack", array_ops.unstack, base_dataset_factory),
+  ]
+
+  return [{
+      "testcase_name":
+          x[0] + "Parallel" if num_parallel_calls is not None else x[0],
+      "map_fn":
+          x[1],
+      "base_dataset_factory":
+          x[2],
+      "num_parallel_calls":
+          num_parallel_calls
+  } for x in test_cases for num_parallel_calls in (None, 12)]
+
+
 class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _get_test_datasets(self,
@@ -76,16 +107,9 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
     optimized = optimized.with_options(options)
     return unoptimized, optimized
 
-  @parameterized.named_parameters(
-      ("Basic", lambda x: (x, x + 1), None),
-      ("Const", lambda x: 2, 12),
-      ("Parallel", lambda x: (x, x + 1), 12),
-      ("Broadcast", lambda x: x + np.random.rand(5, 4, 3, 2), None),
-      ("Gather", lambda x: array_ops.gather(x, 0), 12),
-  )
-  def testOptimization(self, map_fn, num_parallel_calls):
-    base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2],
-                                                           [3, 4]]).repeat(5)
+  @parameterized.named_parameters(_generate_optimization_test_cases())
+  def testOptimization(self, map_fn, base_dataset_factory, num_parallel_calls):
+    base_dataset = base_dataset_factory()
     unoptimized, optimized = self._get_test_datasets(base_dataset, map_fn,
                                                      num_parallel_calls)
     self.assertDatasetsEqual(unoptimized, optimized)
@@ -178,8 +202,8 @@ class MapVectorizationBenchmark(test.Benchmark):
     return median_time
 
   def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
-    num_elems = np.prod(input_size)
-    name_template = "{}__batch_size_{}_input_size_{}_{}"
+    num_elems = np.sum([np.prod(x) for x in input_size])
+    name_template = "{}__batch_size_{}_input_element_size_{}_{}"
     unoptimized = input_dataset.map(map_fn).batch(batch_size)
     unoptimized_op = unoptimized.make_one_shot_iterator().get_next()
 
@@ -197,7 +221,7 @@ class MapVectorizationBenchmark(test.Benchmark):
         name=name_template.format(str_id, batch_size, num_elems, "optimized"))
 
     print("Batch size: {}\n"
-          "Input size: {}\n"
+          "Input element size: {}\n"
           "Transformation: {}\n"
           "Speedup: {}\n".format(batch_size, input_size, str_id,
                                  (unoptimized_time / optimized_time)))
@@ -220,13 +244,26 @@ class MapVectorizationBenchmark(test.Benchmark):
     self._benchmark_helper(
         lambda *args: [math_ops.cast(x, dtypes.float64) for x in args], "cast")
 
-  def _benchmark_helper(self, map_fn, str_id):
+  def benchmarkReshape(self):
+    self._benchmark_helper(
+        lambda *args: [array_ops.reshape(x, (-1, 30)) for x in args], "reshape")
+
+  def _default_dataset_factory(self):
     input_sizes = [(10, 10, 3), (10, 100, 300)]
+    for sz in input_sizes:
+      yield dataset_ops.Dataset.from_tensor_slices(np.random.rand(*sz)).repeat()
+
+  def _benchmark_helper(self, map_fn, str_id, base_dataset_factory=None):
+    if base_dataset_factory is None:
+      base_dataset_factory = self._default_dataset_factory
+
     batch_size = 1000
-    for input_size in input_sizes:
-      input_dataset = dataset_ops.Dataset.from_tensor_slices(
-          (np.random.rand(*input_size), np.random.rand(*input_size))).repeat()
-      self._compare(input_dataset, map_fn, batch_size, input_size, str_id)
+    for base_dataset in base_dataset_factory():
+      input_size = [
+          tuple(shape.as_list())
+          for shape in nest.flatten(base_dataset.output_shapes)
+      ]
+      self._compare(base_dataset, map_fn, batch_size, input_size, str_id)
 
 
 if __name__ == "__main__":
-- 
GitLab


From d354efcf0d4388d77aa8dc57aeeaab2f9088b608 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 11 Oct 2018 14:30:55 -0700
Subject: [PATCH 0809/1085] Fix use-of-uninitialized-value bug for bfloat16 in
 MatrixBandPart.

MatrixBandPart ends up calling bfloat16() to create a zero value, but the implementation of bfloat16::bfloat16() returned an uninitialized value, not zero.

PiperOrigin-RevId: 216761660
---
 tensorflow/core/framework/bfloat16_test.cc         | 14 ++++++++++++++
 tensorflow/core/lib/bfloat16/bfloat16.h            |  7 ++++++-
 .../kernel_tests/matrix_band_part_op_test.py       |  9 +++++----
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc
index 0a1b5e1975..d71f92151d 100644
--- a/tensorflow/core/framework/bfloat16_test.cc
+++ b/tensorflow/core/framework/bfloat16_test.cc
@@ -23,6 +23,20 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+TEST(Bfloat16Test, DefaultValueIsZero) {
+  EXPECT_EQ(0.0f, static_cast<float>(bfloat16()));
+}
+
+TEST(Bfloat16Test, RepresentableFloatsRoundTripViaBfloat16) {
+  const std::vector<float> values = {
+      -std::numeric_limits<float>::infinity(), -1.0, -0.5, -0.0, 0.0, 0.5, 1.0,
+      std::numeric_limits<float>::infinity(),
+  };
+  for (float v : values) {
+    EXPECT_EQ(v, static_cast<float>(static_cast<bfloat16>(v)));
+  }
+}
+
 TEST(Bfloat16Test, Simple) {
   bfloat16 a(12);
   // Floating point representation of 12: 0x41400000
diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h
index 5c917e80c1..4408546580 100644
--- a/tensorflow/core/lib/bfloat16/bfloat16.h
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h
@@ -43,7 +43,9 @@ typedef std::complex<double> complex128;
 
 // see framework/bfloat16.h for description.
 struct bfloat16 {
-  B16_DEVICE_FUNC bfloat16() {}
+  // The default constructor must yield a zero value, not an uninitialized
+  // value; some TF kernels use T() as a zero value.
+  B16_DEVICE_FUNC bfloat16() : value(ZERO_VALUE) {}
 
   B16_DEVICE_FUNC static bfloat16 truncate_to_bfloat16(const float v) {
     bfloat16 output;
@@ -376,6 +378,9 @@ struct bfloat16 {
   static const uint16_t NAN_VALUE = 0x7FC0;
 
  private:
+  // A value that represents "zero".
+  static const uint16_t ZERO_VALUE = 0;
+
   B16_DEVICE_FUNC static bool float_isnan(const float& x) {
 #ifdef __CUDA_ARCH__
     return ::isnan(x);
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index a0ef3a607e..5660a29493 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -138,12 +138,13 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
 
 
 if __name__ == "__main__":
-  dtypes = (np.bool, np.int32, np.int64, np.float32, np.float64, np.complex64,
-            np.complex128)
+  dtypes = (np.bool, np.int32, np.int64, np.float16,
+            dtypes_lib.bfloat16.as_numpy_dtype, np.float32, np.float64,
+            np.complex64, np.complex128)
   for dtype in dtypes:
     for batch_shape in ((), (2,), (1, 3, 2)):
-      for rows in 1, 2, 7:
-        for cols in 1, 2, 7:
+      for rows in 1, 2, 7, 23:
+        for cols in 1, 2, 7, 23:
           shape = (rows, cols)
           name = "%s_%s" % (dtype.__name__,
                             "_".join(map(str, batch_shape + shape)))
-- 
GitLab


From 3aa5ab7fa0f3f54791d0c04083700dd0ee847616 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 11 Oct 2018 14:35:02 -0700
Subject: [PATCH 0810/1085] Disable tests that have data dependencies on
 windows.

Path separators need to be reconfigured on windows before opening files.

PiperOrigin-RevId: 216762475
---
 tensorflow/python/BUILD                | 5 ++++-
 tensorflow/python/feature_column/BUILD | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c4b5be7a34..e7548c9587 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1242,7 +1242,10 @@ py_test(
     data = ["//tensorflow/python:meta_graph_testdata"],
     main = "framework/meta_graph_test.py",
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "no_windows",
+    ],
     deps = [
         ":array_ops",
         ":client_testlib",
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 82acde584e..8c0b5d9038 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -102,6 +102,7 @@ py_test(
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
+        "no_windows",
     ],
     deps = [
         ":feature_column",
@@ -135,6 +136,7 @@ py_test(
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
+        "no_windows",
     ],
     deps = [
         ":feature_column_py",
-- 
GitLab


From 11e9f65ea3d1bda973415363b592e23809a04435 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 11 Oct 2018 14:44:56 -0700
Subject: [PATCH 0811/1085] Don't use hardcoded device string when merging
 device specs.

PiperOrigin-RevId: 216764209
---
 tensorflow/contrib/eager/python/remote_test.py | 14 ++++++++++++++
 tensorflow/python/eager/context.py             |  7 +++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index 7aa4b598b8..9ad3bdaa3c 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -206,6 +206,20 @@ class RemoteExecutionTest(test.TestCase):
       y = math_ops.matmul(x1, x2)
     np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
 
+  @run_sync_and_async
+  def testContextDeviceUpdated(self):
+    """Tests that the context device is correctly updated."""
+
+    with ops.device("cpu:0"):
+      x1 = array_ops.ones([2, 2])
+      x2 = array_ops.ones([2, 2])
+      y = math_ops.matmul(x1, x2)
+    np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
+
+    # `y` is placed on the local CPU as expected.
+    self.assertEqual(y.device,
+                     "/job:%s/replica:0/task:0/device:CPU:0" % JOB_NAME)
+
 
 if __name__ == "__main__":
   ops.enable_eager_execution()
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 74e648ee6f..617c3c6f49 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -457,6 +457,10 @@ class Context(object):
     Raises:
       ValueError: If name is not a string or is an invalid device name.
     """
+    devices = self._context_devices
+    if devices is None:
+      self._initialize_handle_and_devices()
+      devices = self._context_devices
     eager_context = self._eager_context
     old_device_name = eager_context.device_name
     old_device_spec = eager_context.device_spec
@@ -477,8 +481,7 @@ class Context(object):
         if old_device_name:
           new_device_spec = copy.copy(old_device_spec)
         else:
-          new_device_spec = pydev.DeviceSpec.from_string(
-              "/job:localhost/replica:0/task:0/device:CPU:0")
+          new_device_spec = pydev.DeviceSpec.from_string(devices[0])
         new_device_spec.merge_from(device_spec)
       else:
         new_device_spec = pydev.DeviceSpec.from_string("")
-- 
GitLab


From e70b97f778cd19949f80eb72ea115c8fe8cde8fe Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 11 Oct 2018 14:45:32 -0700
Subject: [PATCH 0812/1085] Add utility function to remove FunctionDef.

PiperOrigin-RevId: 216764308
---
 tensorflow/core/framework/function.cc | 10 ++++++++--
 tensorflow/core/framework/function.h  | 11 ++++++++++-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index aa2f274752..be11f8874c 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -1112,7 +1112,7 @@ Status FunctionLibraryDefinition::ReplaceFunction(const string& func,
                                                   const FunctionDef& fdef) {
   mutex_lock l(mu_);
   bool added;
-  TF_RETURN_IF_ERROR(RemoveFunction(func));
+  TF_RETURN_IF_ERROR(RemoveFunctionHelper(func));
   TF_RETURN_IF_ERROR(AddFunctionDefHelper(fdef, &added));
   return Status::OK();
 }
@@ -1126,6 +1126,12 @@ Status FunctionLibraryDefinition::ReplaceGradient(const GradientDef& grad) {
 }
 
 Status FunctionLibraryDefinition::RemoveFunction(const string& func) {
+  mutex_lock l(mu_);
+  TF_RETURN_IF_ERROR(RemoveFunctionHelper(func));
+  return Status::OK();
+}
+
+Status FunctionLibraryDefinition::RemoveFunctionHelper(const string& func) {
   const auto& i = function_defs_.find(func);
   if (i == function_defs_.end()) {
     return errors::InvalidArgument("Tried to remove non-existent function ",
@@ -1149,7 +1155,7 @@ void FunctionLibraryDefinition::Remove(
     const std::vector<string>& funcs,
     const std::vector<string>& funcs_with_grads) {
   for (const string& f : funcs) {
-    Status s = RemoveFunction(f);
+    Status s = RemoveFunctionHelper(f);
     DCHECK(s.ok());
   }
   for (const string& f : funcs_with_grads) {
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index d4beca7e11..fa58e36a21 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -329,6 +329,8 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
 
   // Replaces the function corresponding to `func` with `fdef`. Returns
   // a non-OK status if "func" was not found in the library, OK otherwise.
+  // Please be careful when replacing function: make sure all previous pointers
+  // returned by `Find()` are no longer in use.
   Status ReplaceFunction(const string& func, const FunctionDef& fdef);
 
   // Replaces the gradient corresponding to `grad.function_name()`. Returns
@@ -336,6 +338,13 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // otherwise.
   Status ReplaceGradient(const GradientDef& grad);
 
+  // Removes the function corresponding to 'func'. Returns a non-OK status if
+  // 'func' was not found in the library, OK otherwise.
+  // Please be careful when removing function: make sure there are no other
+  // nodes using the function, and all previous pointers returned by `Find()`
+  // are no longer in use.
+  Status RemoveFunction(const string& func);
+
   // Adds the functions and gradients in 'other' to this function library.
   // Duplicate functions and gradients are ignored.
   // This operation is atomic.
@@ -441,7 +450,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // Remove `func` from the library. Returns non-OK Status unless `func` is in
   // the library. This should only be called when there is a guarantee that the
   // function being removed hasn't been retrieved with `Find`.
-  Status RemoveFunction(const string& func) EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  Status RemoveFunctionHelper(const string& func) EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   // Remove gradient of function `func` from the library. Returns non-OK Status
   // unless `func` has a gradient.
-- 
GitLab


From bbc543de4cd0436fb42aec8c46d4f6e4dd5fbddf Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Thu, 11 Oct 2018 15:20:57 -0700
Subject: [PATCH 0813/1085] [tf.data: NUMA-map-and-batch]: Synchronize access
 to internal state.

PiperOrigin-RevId: 216770405
---
 .../experimental/numa_map_and_batch_dataset_op.cc   | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
index d83edb9667..5cf97cdb62 100644
--- a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
@@ -235,10 +235,15 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
           worker = workers_[cur_block_].get();
           cur_block_ = (cur_block_ + 1) % workers_.size();
         }
-        TF_RETURN_IF_ERROR(worker->manager.GetBatch(
-            ctx, dataset()->drop_remainder_, &global_end_of_input_, out_tensors,
-            end_of_sequence));
-        return Status::OK();
+        bool global_end_of_input_local = false;
+        Status s = worker->manager.GetBatch(ctx, dataset()->drop_remainder_,
+                                            &global_end_of_input_local,
+                                            out_tensors, end_of_sequence);
+        if (global_end_of_input_local) {
+          mutex_lock l(*mu_);
+          global_end_of_input_ = global_end_of_input_local;
+        }
+        return s;
       }
 
      protected:
-- 
GitLab


From e48365c24665d5ddf7a10f4ae7aef50b4f2495d4 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 11 Oct 2018 15:22:19 -0700
Subject: [PATCH 0814/1085] Update tf.keras version number.

PiperOrigin-RevId: 216770622
---
 tensorflow/python/keras/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py
index 198c66d9e1..be46a894e1 100644
--- a/tensorflow/python/keras/__init__.py
+++ b/tensorflow/python/keras/__init__.py
@@ -44,7 +44,7 @@ from tensorflow.python.keras.models import Sequential
 
 from tensorflow.python.util.tf_export import tf_export
 
-__version__ = '2.1.6-tf'
+__version__ = '2.2.4-tf'
 
 tf_export('keras.__version__').export_constant(__name__, '__version__')
 
-- 
GitLab


From e4211acd8f45ad56bfc818ecac7ef225e50b72c7 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 11 Oct 2018 15:23:27 -0700
Subject: [PATCH 0815/1085] Fix Keras support in Python 3.

PiperOrigin-RevId: 216770808
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index af183b3232..d628258b9d 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -286,7 +286,8 @@ def _cross_replica_concat(tensor, core_id, num_cores, name):
                     '{}.'.format(input_dtype, name))
 
   batch_size = tensor.shape[0]
-  mask = math_ops.to_float(math_ops.equal(range(num_cores), core_id))
+  mask = math_ops.to_float(
+      math_ops.equal(np.arange(num_cores, dtype=np.int32), core_id))
   mask = array_ops.reshape(mask, [num_cores] + [1] * tensor.shape.ndims)
   result = mask * math_ops.to_float(tensor)
   local_tensor_with_holes = array_ops.reshape(result,
-- 
GitLab


From 66286d2d7cf1318bf6f27a9993ff66de06e5f145 Mon Sep 17 00:00:00 2001
From: knight <1004815462@qq.com>
Date: Fri, 12 Oct 2018 06:46:11 +0800
Subject: [PATCH 0816/1085] Update kafka_test.sh

---
 .../kafka/python/kernel_tests/kafka_test.sh      | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
index def41c670f..69553c3bd1 100644
--- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
+++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh
@@ -25,22 +25,22 @@ fi
 action=$1
 container=$2
 if [ "$action" == "start" ]; then
-    echo "pull spotify/kafka"
+    echo pull spotify/kafka
     docker pull spotify/kafka
-    echo "pull spotify/kafka successfully"
+    echo pull spotify/kafka successfully
     docker run -d --rm --net=host --name=$container spotify/kafka
-    echo "Wait 5 secs until kafka is up and running"
+    echo Wait 5 secs until kafka is up and running
     sleep 5
-    echo "Create test topic"
+    echo Create test topic
     docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test'
-    echo "Create test message"
+    echo Create test message
     docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test'
-    echo "Produce test message"
+    echo Produce test message
     docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test'
-    echo "Container $container started successfully"
+    echo Container $container started successfully
 elif [ "$action" == "stop" ]; then
     docker rm -f $container
-    echo "Container $container removed successfully"
+    echo Container $container removed successfully
 else
   echo "Usage: $0 start|stop <kafka container name>" >&2
   exit 1
-- 
GitLab


From 19d8dbb7df90a2caf8f07e407a658640be4a99c2 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 15:47:31 -0700
Subject: [PATCH 0817/1085] 1.12-rc1 cherry-pick request: TPUMirroredVariable
 device and init fixes. (#22907)

* Add 'device' property to TPUMirroredVariable, so tf.train.init_from_checkpoint can be supported.

PiperOrigin-RevId: 215843249

* In TPUMirroredVariable, when setting _initializer_op and _initial_value attributes, set the attributes of all the contained variables. This fixes a bug that tf.train.init_from_checkpoint doesn't overwrite the initialization values correctly for TPUMirroredVariable.

PiperOrigin-RevId: 216429476
---
 tensorflow/contrib/distribute/python/values.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 18ceba42c2..472cb4230c 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -475,6 +475,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     self._aggregation = aggregation
     # Needed for GradientTape
     self._trainable = self._primary_var.trainable
+    # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s
+    # initializer is composed of the initializers of the components variables.
+    # However, in some cases, such as when restoring from a checkpoint, we may
+    # set the _initializer_op property on the entire `TPUMirroredVariable`.
+    self._initializer_op = None
 
   def _get(self, device=None):
     """Returns the value for the current device or raises a ValueError."""
@@ -571,6 +576,10 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
           ValueError("Device %s not found in %s (current device %s)" %
                      (device, self._index.keys(), device_util.current())), e)
 
+  @property
+  def device(self):
+    return self._get().device
+
   # The arguments to update() are automatically unwrapped so the update()
   # function would normally see regular variables, not MirroredVariables.
   # However, the update function can still operate on wrapped MirroredVariables
@@ -700,8 +709,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
 
   @property
   def initializer(self):
-    return control_flow_ops.group(
-        [v.initializer for v in nest.flatten(self._index)])
+    if self._initializer_op:
+      init_op = self._initializer_op
+    else:
+      init_op = control_flow_ops.group(
+          [v.initializer for v in self._index.values()])
+    return init_op
 
   @property
   def graph(self):
-- 
GitLab


From 2b976431322e680b2c70a5a48a2c5de22b863501 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 11 Oct 2018 15:41:51 -0700
Subject: [PATCH 0818/1085] Add support for tracking deleted variables in
 activity analysis. Refactor the field names to be more consistent. Remove the
 unused `returned` field.

PiperOrigin-RevId: 216773791
---
 .../autograph/converters/control_flow.py      |  2 +-
 .../converters/side_effect_guards.py          |  2 +-
 .../pyct/static_analysis/activity.py          | 69 ++++++++++---------
 .../pyct/static_analysis/activity_test.py     | 10 +--
 .../pyct/static_analysis/liveness.py          |  6 +-
 .../static_analysis/reaching_definitions.py   |  6 +-
 6 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index e52e98f42a..a7596be291 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -287,7 +287,7 @@ class ControlFlowTransformer(converter.Base):
     # TODO(mdan): Handle the case above.
     cond_scope = anno.getanno(node, annos.NodeAnno.COND_SCOPE)
     cond_closure = set()
-    for s in cond_scope.used:
+    for s in cond_scope.read:
       cond_closure.update(s.support_set)
     cond_closure -= loop_state
 
diff --git a/tensorflow/python/autograph/converters/side_effect_guards.py b/tensorflow/python/autograph/converters/side_effect_guards.py
index 6e48e57bde..910c470f97 100644
--- a/tensorflow/python/autograph/converters/side_effect_guards.py
+++ b/tensorflow/python/autograph/converters/side_effect_guards.py
@@ -126,7 +126,7 @@ class SideEffectGuardTransformer(converter.Base):
       # In addition, avoid renaming well-known names.
       # TODO(mdan): Move these names into config.
       unguarded_names = (qual_names.QN('self'), qual_names.QN('tf'))
-      guarded_args = tuple(s for s in args_scope.used
+      guarded_args = tuple(s for s in args_scope.read
                            if not s.is_composite() and s not in unguarded_names)
 
       # TODO(mdan): Include all arguments which depended on guarded_args too.
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 0b95b714fb..27c940d4f4 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -42,9 +42,20 @@ class Scope(object):
   Note that scopes do not necessarily align with Python's scopes. For example,
   the body of an if statement may be considered a separate scope.
 
+  Caution - the AST references held by this object are weak.
+
   Attributes:
-    modified: identifiers modified in this scope
-    used: identifiers referenced in this scope
+    modified: Set[qual_names.QN], identifiers modified in this scope
+    read: Set[qual_names.QN], identifiers read in this scope
+    deleted: Set[qual_names.QN], identifiers deleted in this scope
+    params: WeakValueDictionary[qual_names.QN, ast.Node], function arguments
+      visible in this scope, mapped to the function node that defines them
+
+  Note - simple statements may never delete and modify a symbol at the same
+  time. However, compound ones like if statements can. In that latter case, it's
+  undefined whether the symbol is actually modified or deleted upon statement
+  exit. Certain analyses like reaching definitions need to be careful about
+  this.
   """
 
   def __init__(self, parent, isolated=True, add_unknown_symbols=False):
@@ -63,19 +74,22 @@ class Scope(object):
     self.parent = parent
     self.add_unknown_symbols = add_unknown_symbols
     self.modified = set()
-    self.used = set()
-    self.params = {}
-    self.returned = set()
+    self.read = set()
+    self.deleted = set()
+    self.params = weakref.WeakValueDictionary()
+
+  @property
+  def affects_parent(self):
+    return not self.isolated and self.parent is not None
 
-  # TODO(mdan): Rename to `reserved`
   @property
   def referenced(self):
-    if not self.isolated and self.parent is not None:
-      return self.used | self.parent.referenced
-    return self.used
+    if self.affects_parent:
+      return self.read | self.parent.referenced
+    return self.read
 
   def __repr__(self):
-    return 'Scope{r=%s, w=%s}' % (tuple(self.used), tuple(self.modified))
+    return 'Scope{r=%s, w=%s}' % (tuple(self.read), tuple(self.modified))
 
   def copy_from(self, other):
     """Recursively copies the contents of this scope from another scope."""
@@ -85,9 +99,8 @@ class Scope(object):
       self.parent.copy_from(other.parent)
     self.isolated = other.isolated
     self.modified = copy.copy(other.modified)
-    self.used = copy.copy(other.used)
+    self.read = copy.copy(other.read)
     self.params = copy.copy(other.params)
-    self.returned = copy.copy(other.returned)
 
   @classmethod
   def copy_of(cls, other):
@@ -105,32 +118,27 @@ class Scope(object):
     if other.parent is not None:
       self.parent.merge_from(other.parent)
     self.modified |= other.modified
-    self.used |= other.used
+    self.read |= other.read
     self.params.update(other.params)
-    self.returned |= other.returned
 
   def mark_read(self, name):
-    self.used.add(name)
+    self.read.add(name)
     if self.parent is not None and name not in self.params:
       self.parent.mark_read(name)
 
   def mark_modified(self, name):
-    """Marks the given symbol as modified in the current scope."""
     self.modified.add(name)
-    if not self.isolated:
-      if self.parent is not None:
-        self.parent.mark_modified(name)
+    if self.affects_parent:
+      self.parent.mark_modified(name)
+
+  def mark_deleted(self, name):
+    self.deleted.add(name)
 
   def mark_param(self, name, owner):
     # Assumption: all AST nodes have the same life span. This lets us use
     # a weak reference to mark the connection between a symbol node and the
     # function node whose argument that symbol is.
-    self.params[name] = weakref.ref(owner)
-
-  def mark_returned(self, name):
-    self.returned.add(name)
-    if not self.isolated and self.parent is not None:
-      self.parent.mark_returned(name)
+    self.params[name] = owner
 
 
 class _Lambda(object):
@@ -157,7 +165,6 @@ class ActivityAnalyzer(transformer.Base):
 
     # Note: all these flags crucially rely on the respective nodes are
     # leaves in the AST, that is, they cannot contain other statements.
-    self._in_return_statement = False
     self._in_aug_assign = False
     self._in_function_def_args = False
 
@@ -212,13 +219,12 @@ class ActivityAnalyzer(transformer.Base):
         # TODO(mdan): Is this case possible at all?
         raise NotImplementedError(
             'Param "{}" outside a function arguments or lambda.'.format(qn))
+    elif isinstance(node.ctx, gast.Del):
+      self.scope.mark_deleted(qn)
     else:
       raise ValueError('Unknown context {} for node "{}".'.format(
           type(node.ctx), qn))
 
-    if self._in_return_statement:
-      self.scope.mark_returned(qn)
-
   def _enter_scope(self, isolated):
     self.scope = Scope(self.scope, isolated=isolated)
 
@@ -242,10 +248,7 @@ class ActivityAnalyzer(transformer.Base):
     return self._process_statement(node)
 
   def visit_Return(self, node):
-    self._in_return_statement = True
-    node = self._process_statement(node)
-    self._in_return_statement = False
-    return node
+    return self._process_statement(node)
 
   def visit_Assign(self, node):
     return self._process_statement(node)
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index 65267751c1..997d9a8aff 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -33,19 +33,19 @@ from tensorflow.python.platform import test
 class ScopeTest(test.TestCase):
 
   def assertMissing(self, qn, scope):
-    self.assertNotIn(qn, scope.used)
+    self.assertNotIn(qn, scope.read)
     self.assertNotIn(qn, scope.modified)
 
   def assertReadOnly(self, qn, scope):
-    self.assertIn(qn, scope.used)
+    self.assertIn(qn, scope.read)
     self.assertNotIn(qn, scope.modified)
 
   def assertWriteOnly(self, qn, scope):
-    self.assertNotIn(qn, scope.used)
+    self.assertNotIn(qn, scope.read)
     self.assertIn(qn, scope.modified)
 
   def assertReadWrite(self, qn, scope):
-    self.assertIn(qn, scope.used)
+    self.assertIn(qn, scope.read)
     self.assertIn(qn, scope.modified)
 
   def test_basic(self):
@@ -137,7 +137,7 @@ class ActivityAnalyzerTest(test.TestCase):
 
   def assertScopeIs(self, scope, used, modified):
     """Assert the scope contains specific used, modified & created variables."""
-    self.assertSymbolSetsAre(used, scope.used, 'read')
+    self.assertSymbolSetsAre(used, scope.read, 'read')
     self.assertSymbolSetsAre(modified, scope.modified, 'modified')
 
   def test_print_statement(self):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
index 36960d0103..ad11057a0b 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
@@ -55,11 +55,11 @@ class Analyzer(cfg.GraphVisitor):
     if anno.hasanno(node.ast_node, anno.Static.SCOPE):
       node_scope = anno.getanno(node.ast_node, anno.Static.SCOPE)
 
-      gen = node_scope.used | self.extra_gen.get(node.ast_node, frozenset())
+      gen = node_scope.read | self.extra_gen.get(node.ast_node, frozenset())
       # TODO(mdan): verify whether composites' parents need to be added.
-      # E.g. if x.y is live whether x needs to be added. Theoretically the
+      # E.g. whether x needs to be added if x.y is live. Theoretically the
       # activity analysis should have both so that wouldn't be needed.
-      kill = node_scope.modified
+      kill = node_scope.modified | node_scope.deleted
 
       live_out = set()
       for n in node.next:
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
index 9aaf318a9f..d1587d8178 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
@@ -28,6 +28,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import weakref
+
 import gast
 
 from tensorflow.python.autograph.pyct import anno
@@ -137,12 +139,12 @@ class Analyzer(cfg.GraphVisitor):
         for s in node_scope.modified:
           def_ = self._definition_factory()
           if s in node_scope.params:
-            def_.param_of = node_scope.params[s]
+            def_.param_of = weakref.ref(node_scope.params[s])
           node_symbols[s] = def_
         self.gen_map[node] = _NodeState(node_symbols)
 
       gen = self.gen_map[node]
-      kill = node_scope.modified
+      kill = node_scope.modified | node_scope.deleted
       defs_out = gen | (defs_in - kill)
 
     else:
-- 
GitLab


From eddd87f0e3b7ae67a4476436b77cf311e0d448d5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 15:41:59 -0700
Subject: [PATCH 0819/1085] Move macros to cc from headers. This will allow us
 to undef the macros.

PiperOrigin-RevId: 216773822
---
 .../reduce_slice_ops/kernels/reduce_slice_ops.cc       | 10 ++++++++++
 .../reduce_slice_ops/kernels/reduce_slice_ops.h        |  5 -----
 .../kernels/reduce_slice_ops_gpu.cu.cc                 | 10 ++++++++++
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc
index 2def4f3f17..edcef3adea 100644
--- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc
+++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc
@@ -30,6 +30,11 @@ using thread::ThreadPool;
 
 namespace functor {
 
+#define Sum(a, b) ((a) + (b))
+#define Prod(a, b) ((a) * (b))
+#define Max(a, b) ((a) > (b) ? (a) : (b))
+#define Min(a, b) ((a) < (b) ? (a) : (b))
+
 #define CPUReduceSliceFunctorReduceop(reduceop, beginning)                    \
   template <typename T, typename Index>                                       \
   struct ReduceSliceFunctor##reduceop<CPUDevice, T, Index> {                  \
@@ -234,6 +239,11 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_REDUCE_SLICE_KERNELS_ALL);
 #undef REGISTER_GPU_REDUCE_SLICE_KERNELS
 #undef REGISTER_GPU_REDUCE_SLICE_KERNELS_ALL
 
+#undef Sum
+#undef Prod
+#undef Min
+#undef Max
+
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h
index 69ef521c01..12bff1e916 100644
--- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h
+++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h
@@ -21,11 +21,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 
-#define Sum(a, b) ((a) + (b))
-#define Prod(a, b) ((a) * (b))
-#define Max(a, b) ((a) > (b) ? (a) : (b))
-#define Min(a, b) ((a) < (b) ? (a) : (b))
-
 namespace tensorflow {
 
 class OpKernelContext;
diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
index 9f2be03d71..204b83f7f5 100644
--- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
+++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc
@@ -29,6 +29,11 @@ using GPUDevice = Eigen::GpuDevice;
 
 namespace functor {
 
+#define Sum(a, b) ((a) + (b))
+#define Prod(a, b) ((a) * (b))
+#define Max(a, b) ((a) > (b) ? (a) : (b))
+#define Min(a, b) ((a) < (b) ? (a) : (b))
+
 #define GPUReduceSliceFunctorReduceop(reduceop, beginning)                     \
   template <typename T, typename Index>                                        \
   __global__ void ReduceSliceDeviceKernel##reduceop(                           \
@@ -94,6 +99,11 @@ TF_CALL_REAL_NUMBER_TYPES(DEFINE_GPU_SPECS)
 #undef DEFINE_GPU_REDUCEOP_SPECS_INDEX
 #undef DEFINE_GPU_SPECS
 
+#undef Sum
+#undef Prod
+#undef Min
+#undef Max
+
 }  // namespace functor
 }  // namespace tensorflow
 
-- 
GitLab


From 15a8a88452041517b7711d099ccd04a130c20d70 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Thu, 11 Oct 2018 16:04:16 -0700
Subject: [PATCH 0820/1085] Rely on call op placement for the default device
 inside a graph function

Device placements inside a function will follow the placement of the call operation unless a device scope is opened inside the function body. This means we no longer need device stacks in defun cache keys, and when functions get serialized in SavedModels the device won't be hard-coded.

Requires adding the current distribution strategy stack to the function cache key, since distribution strategies rely on functions being retraced for each new device (e.g. to access different variables on different devices), and before this CL retracing happened because the function was called with different devices set. This cache key addition does slow things down a bit, but (on my machine at least) the slowdown is more than offset by the gains from not specializing on the device stack.

Baseline before this CL:

entry {
  name: "MicroBenchmarks.benchmark_defun_without_signature"
  iters: 30000
  wall_time: 88.3192300797
  extras {
    key: "examples_per_sec"
    value {
      double_value: 11322.5624714
    }
  }
}

After this CL (includes distribution strategies in cache key):

entry {
  name: "MicroBenchmarks.benchmark_defun_without_signature"
  iters: 30000
  wall_time: 84.1960986455
  extras {
    key: "examples_per_sec"
    value {
      double_value: 11877.0348756
    }
  }
}

Hypothetical world where we didn't have to add distribution strategies to the cache key and also didn't need to add devices (i.e. max speedup to be had by optimizing the distribution strategies cache key addition):

entry {
  name: "MicroBenchmarks.benchmark_defun_without_signature"
  iters: 30000
  wall_time: 72.5416739782
  extras {
    key: "examples_per_sec"
    value {
      double_value: 13785.1795411
    }
  }
}

PiperOrigin-RevId: 216777533
---
 .../common_runtime/graph_execution_state.cc   |  3 +-
 tensorflow/core/common_runtime/placer.cc      | 46 +++++++---
 tensorflow/core/common_runtime/placer.h       | 12 ++-
 tensorflow/core/common_runtime/placer_test.cc |  2 +-
 .../core/kernels/partitioned_function_ops.cc  | 26 +++---
 tensorflow/python/eager/BUILD                 |  1 +
 tensorflow/python/eager/function.py           | 92 +++++++++++--------
 tensorflow/python/eager/function_test.py      | 64 +++++--------
 tensorflow/python/framework/test_ops.cc       | 23 +++++
 tensorflow/python/kernel_tests/BUILD          |  1 +
 .../python/kernel_tests/cond_v2_test.py       | 31 ++++---
 11 files changed, 181 insertions(+), 120 deletions(-)

diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc
index 4475fa979e..afa219cc0b 100644
--- a/tensorflow/core/common_runtime/graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/graph_execution_state.cc
@@ -531,7 +531,8 @@ Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) {
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::PRE_PLACEMENT, optimization_options));
 
-  Placer placer(new_graph.get(), device_set_, session_options_);
+  Placer placer(new_graph.get(), device_set_, session_options_,
+                /* default_device= */ nullptr);
   // TODO(mrry): Consider making the Placer cancelable.
   TF_RETURN_IF_ERROR(placer.Run());
 
diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index 3b59995433..5e1ed13080 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc
@@ -44,12 +44,18 @@ const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
 // returned list is sorted by preferred type (higher numeric type is preferred).
 std::vector<Device*> FilterSupportedDevices(
     const std::vector<Device*>& devices,
-    const DeviceTypeVector& supported_device_types) {
+    const DeviceTypeVector& supported_device_types,
+    const Device* default_device) {
+  Device* filtered_default_device = nullptr;
   std::vector<Device*> filtered_devices;
   for (const DeviceType& d : supported_device_types) {
     for (Device* device : devices) {
       if (DeviceType(device->attributes().device_type()) == d) {
-        filtered_devices.emplace_back(device);
+        if (device == default_device) {
+          filtered_default_device = device;
+        } else {
+          filtered_devices.emplace_back(device);
+        }
       }
     }
   }
@@ -64,7 +70,16 @@ std::vector<Device*> FilterSupportedDevices(
     }
     return StringPiece(a->name()) < StringPiece(b->name());
   };
-  std::sort(filtered_devices.begin(), filtered_devices.end(), device_sort);
+  std::vector<Device*>::iterator sort_start;
+  if (filtered_default_device != nullptr) {
+    // Put the default device first outside of the normal ordering.
+    filtered_devices.emplace_back(filtered_default_device);
+    std::iter_swap(filtered_devices.begin(), std::prev(filtered_devices.end()));
+    sort_start = std::next(filtered_devices.begin());
+  } else {
+    sort_start = filtered_devices.begin();
+  }
+  std::sort(sort_start, filtered_devices.end(), device_sort);
   return filtered_devices;
 }
 
@@ -99,11 +114,12 @@ std::vector<Device*> FilterSupportedDevices(
 class ColocationGraph {
  public:
   ColocationGraph(Graph* graph, const DeviceSet* device_set,
-                  bool allow_soft_placement)
+                  bool allow_soft_placement, const Device* default_device)
       : graph_(graph),
         device_set_(device_set),
         device_types_(device_set->PrioritizedDeviceTypeList()),
-        allow_soft_placement_(allow_soft_placement) {
+        allow_soft_placement_(allow_soft_placement),
+        default_device_(default_device) {
     members_.resize(graph->num_node_ids());
   }
 
@@ -314,7 +330,8 @@ class ColocationGraph {
         // Filter devices into those that are compatible with the root
         // node (and its children).
         devices = FilterSupportedDevices(
-            devices, members_[node_root].supported_device_types);
+            devices, members_[node_root].supported_device_types,
+            default_device_);
       }
 
       // Perform soft placement if allow_soft_placement_ is set.
@@ -329,7 +346,8 @@ class ColocationGraph {
         device_set_->FindMatchingDevices(soft_device_name, &devices);
         if (!devices.empty()) {
           devices = FilterSupportedDevices(
-              devices, members_[node_root].supported_device_types);
+              devices, members_[node_root].supported_device_types,
+              default_device_);
         }
       }
 
@@ -396,7 +414,8 @@ class ColocationGraph {
         return errors::Internal("No devices are registered");
       }
       devices = FilterSupportedDevices(
-          device_set_->devices(), members_[node_root].supported_device_types);
+          device_set_->devices(), members_[node_root].supported_device_types,
+          default_device_);
 
       if (devices.empty()) {
         return errors::InvalidArgument(
@@ -659,6 +678,7 @@ class ColocationGraph {
   const DeviceSet* device_set_;  // Not owned.
   const std::vector<DeviceType> device_types_;
   const bool allow_soft_placement_;
+  const Device* default_device_;
 };
 
 // Returns true if the node has no inputs and produces outputs
@@ -684,15 +704,16 @@ bool IsExemptFromResourceInputColocation(const Node* node) {
 }  // namespace
 
 Placer::Placer(Graph* graph, const DeviceSet* devices,
-               const SessionOptions* options)
+               const SessionOptions* options, const Device* default_device)
     : graph_(graph),
       devices_(devices),
       options_(options),
       log_device_placement_(options != nullptr &&
-                            options->config.log_device_placement()) {}
+                            options->config.log_device_placement()),
+      default_device_(default_device) {}
 
 Placer::Placer(Graph* graph, const DeviceSet* devices)
-    : Placer(graph, devices, nullptr) {}
+    : Placer(graph, devices, nullptr, nullptr) {}
 
 Placer::~Placer() {}
 
@@ -703,7 +724,8 @@ Status Placer::Run() {
 
   ColocationGraph colocation_graph(
       graph_, devices_,
-      options_ == nullptr || options_->config.allow_soft_placement());
+      options_ == nullptr || options_->config.allow_soft_placement(),
+      default_device_);
 
   TF_RETURN_IF_ERROR(colocation_graph.InitializeMembers());
 
diff --git a/tensorflow/core/common_runtime/placer.h b/tensorflow/core/common_runtime/placer.h
index f97ffe7372..e3e8f3790c 100644
--- a/tensorflow/core/common_runtime/placer.h
+++ b/tensorflow/core/common_runtime/placer.h
@@ -62,9 +62,14 @@ class Placer {
   // Graph "graph" (nodes in which may or may not be assigned) on the
   // given DeviceSet "devices".
   //
-  // The "graph", and "devices" pointer arguments
-  // are borrowed by this Placer, and must outlive it.
-  Placer(Graph* graph, const DeviceSet* devices, const SessionOptions* options);
+  // If non-null, default_device is used where possible as a placement for nodes
+  // which do not have a device specified, ahead of other devices which would
+  // otherwise be higher priority.
+  //
+  // The "graph", "devices", and "default_device" pointer arguments are borrowed
+  // by this Placer, and must outlive it.
+  Placer(Graph* graph, const DeviceSet* devices, const SessionOptions* options,
+         const Device* default_device);
 
   Placer(Graph* graph, const DeviceSet* devices);
 
@@ -92,6 +97,7 @@ class Placer {
   const DeviceSet* const devices_;  // Not owned.
   const SessionOptions* options_;   // Not owned.
   const bool log_device_placement_;
+  const Device* default_device_;  // Not owned.
 
   TF_DISALLOW_COPY_AND_ASSIGN(Placer);
 };
diff --git a/tensorflow/core/common_runtime/placer_test.cc b/tensorflow/core/common_runtime/placer_test.cc
index 9b8a95e3b6..e3d2663b98 100644
--- a/tensorflow/core/common_runtime/placer_test.cc
+++ b/tensorflow/core/common_runtime/placer_test.cc
@@ -208,7 +208,7 @@ class PlacerTest : public ::testing::Test {
   //
   // REQUIRES: "*graph" was produced by the most recent call to BuildGraph.
   Status Place(Graph* graph, DeviceSet* devices, SessionOptions* options) {
-    Placer placer(graph, devices, options);
+    Placer placer(graph, devices, options, nullptr);
     return placer.Run();
   }
 
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index b2b3cef59b..9efd1deba0 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -85,12 +85,6 @@ class PartitionedCallOp : public AsyncOpKernel {
     {
       mutex_lock l(mu_);
       if (function_handles_.find(lib) == function_handles_.end()) {
-        if (local_device_name_.empty()) {
-          // The full local device name isn't known at kernel construction
-          // time, hence the need to set it here.
-          local_device_name_ = lib->device()->name();
-        }
-
         // TODO(b/37549631): Because this kernel may correspond to a stateful
         // op, it may be shared by multiple subgraphs, which in turn may have
         // different `FunctionLibraryRuntime` objects and therefore different
@@ -153,7 +147,14 @@ class PartitionedCallOp : public AsyncOpKernel {
             OptimizationPassRegistry::Global()->RunGrouping(
                 OptimizationPassRegistry::PRE_PLACEMENT, optimization_options),
             done);
-        Placer placer(graph.get(), &device_set);
+
+        // Make the FunctionLibraryRuntime's device the default device if
+        // nothing else is hard coded. This allows the same function definition
+        // to be specialized to different devices depending on the
+        // PartitionedCallOp's device.
+        Placer placer(graph.get(), &device_set,
+                      nullptr, /* No session options */
+                      lib->device() /* Default device */);
         OP_REQUIRES_OK_ASYNC(ctx, placer.Run(), done);
         OP_REQUIRES_OK_ASYNC(
             ctx,
@@ -392,6 +393,7 @@ class PartitionedCallOp : public AsyncOpKernel {
       return;
     }
 
+    const string& local_device_name = lib->device()->name();
     FunctionLibraryRuntime::Options opts;
     opts.step_id = ctx->step_id();
     opts.step_container = ctx->step_container();
@@ -400,7 +402,7 @@ class PartitionedCallOp : public AsyncOpKernel {
     // TODO(akshayka): Consider selecting a runner on a per-device basis, i.e.,
     // using device-specific threadpools when available.
     opts.runner = ctx->runner();
-    opts.source_device = local_device_name_;
+    opts.source_device = local_device_name;
     opts.allow_dead_tensors = true;
     // TODO(akshayka): Accommodate the multiple-worker scenario by adding the
     // constructed rendezvous to a rendezvous manager.
@@ -428,7 +430,7 @@ class PartitionedCallOp : public AsyncOpKernel {
       const std::vector<int>& ret_indices = indices.second;
       opts.args_alloc_attrs = alloc_attrs.first;
       opts.rets_alloc_attrs = alloc_attrs.second;
-      if (target == local_device_name_) {
+      if (target == local_device_name) {
         opts.remote_execution = false;
         std::vector<Tensor> args = GetArgsForIndices(arg_indices, op_args);
         std::vector<Tensor>* rets = new std::vector<Tensor>;
@@ -530,10 +532,12 @@ class PartitionedCallOp : public AsyncOpKernel {
 
   NameAttrList func_;
   RewriterConfig rewriter_config_;
-  string local_device_name_;
   // Contains maps from device names to handles of function partitions, keyed by
   // FunctionLibraryRuntime pointers. (Because this kernel may be instantiated
-  // for a stateful op, different invocations of it may use different FLRs.)
+  // for a stateful op, different invocations of it may use different
+  // FLRs. Different device placements of PartitionedCallOp also use different
+  // FLRs, and we use this to set the "default" device for the function to
+  // PartitionedCallOp's device.)
   gtl::FlatMap<FunctionLibraryRuntime*,
                std::unique_ptr<gtl::FlatMap<string, FHandle>>>
       function_handles_ GUARDED_BY(mu_);
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index 72cf97dca3..005c7d9e7f 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -150,6 +150,7 @@ cuda_py_test(
         ":function",
         ":tape",
         ":test",
+        "//tensorflow/python:test_ops",
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:init_ops",
         "//tensorflow/python:layers",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6ef07b71a9..7b97d3f6f1 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -85,12 +85,10 @@ def _create_substitute_placeholder(value, name=None, dtype=None):
   return placeholder
 
 
-def _get_device_functions(ctx, graph):
-  """Returns a tuple of device functions representing the device stack."""
-  if ctx.executing_eagerly():
-    return (pydev.merge_device(ctx.device_name),)
-  else:
-    return tuple(graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
+def _device_stack_has_callable(device_stack):
+  """Checks whether a device stack contains a callable."""
+  return any(callable(spec._device_name_or_function)  # pylint: disable=protected-access
+             for spec in device_stack.peek_objs())
 
 
 def _parse_func_attrs(attributes):
@@ -157,8 +155,8 @@ class FuncGraph(ops.Graph):
   def __init__(self, name):
     """Construct a new FuncGraph.
 
-    The graph will inherit its graph key, collections, seed, device stack, and
-    distribution strategy stack from the current context or graph.
+    The graph will inherit its graph key, collections, seed, and distribution
+    strategy stack from the current context or graph.
 
     Args:
       name: the name of the function.
@@ -181,26 +179,39 @@ class FuncGraph(ops.Graph):
 
     graph = self.outer_graph
 
+    # pylint: disable=protected-access
+    # TODO(b/112906995, nareshmodi): distribution strategy depends on inheriting
+    # this stack from the default graph even in eager mode. Maybe it should be
+    # part of the eager context? This would also allow us to remove a
+    # get_default_graph() call from the function cache lookup.
+    self._distribution_strategy_stack = graph._distribution_strategy_stack
+    # We ignore device placements from any outer scopes while tracing the
+    # function when possible, to avoid hard-coding them in the function
+    # graph. "Default" placements come from the PartitionedCallOp's placement,
+    # so that the same trace of the Python function may be placed on several
+    # different devices and saved functions may be placed on new devices when
+    # restored.
     if context.executing_eagerly():
       self.seed = context.global_seed()
       self._xla_compile = (context.context().device_spec.device_type == "TPU")
-      self._add_device_to_stack(context.context().device_name)
+      if self._distribution_strategy_stack or self._xla_compile:
+        self._add_device_to_stack(context.context().device_name)
     else:
       self.seed = graph.seed
       self._xla_compile = getattr(graph, "_xla_compile", False)
-      self._device_function_stack = graph._device_function_stack.copy()  # pylint: disable=protected-access
-      self._colocation_stack = graph._colocation_stack.copy()  # pylint: disable=protected-access
-
+      # TODO(allenl): Figure out if we can remove colocation stack
+      # specialization (currently used in cond_v2), here and in the cache key.
+      self._colocation_stack = graph._colocation_stack.copy()
+      if (self._distribution_strategy_stack
+          or self._xla_compile
+          or _device_stack_has_callable(graph._device_function_stack)):
+        # Hard-code devices from device functions in the function body
+        self._device_function_stack = graph._device_function_stack.copy()
     # TODO(b/112165328, b/112906995): summaries depend on inheriting collections
     # from the default graph even in eager mode. It'd be nice to not have a
     # default graph with eager execution, so hopefully this will go away when we
     # remove collections.
-    # pylint: disable=protected-access
     self._collections = graph._collections
-    # TODO(b/112906995): distribution strategy depends on inheriting this stack
-    # from the default graph even in eager mode. Maybe it should be part of the
-    # eager context?
-    self._distribution_strategy_stack = graph._distribution_strategy_stack
     self._variable_creator_stack = graph._variable_creator_stack
     # Inherit the graph key, since this is used for matching variables in
     # optimizers.
@@ -556,8 +567,6 @@ class Function(object):
     self._output_shapes = tuple(
         output.shape for output in self._func_graph.outputs)
     self._attrs = _parse_func_attrs(attrs or {})
-    self._device_functions = tuple(
-        self._func_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
 
     self._inference_function = _EagerDefinedFunction(
         _inference_name(self._func_graph.name), self._func_graph,
@@ -575,19 +584,9 @@ class Function(object):
       The result of applying the TF function to `args`.
 
     Raises:
-      ValueError: If the current device stack does not match the device stack
-        under which the function was created, or if `args` contains anything
-        other than Tensors or Variables.
+      ValueError: If `args` contains anything other than Tensors or Variables.
     """
     ctx = context.context()
-    device_functions = _get_device_functions(ctx, ops.get_default_graph())
-    if device_functions != self._device_functions:
-      raise ValueError(
-          "The current device stack does not match the device stack under "
-          "which the TensorFlow function '%s' was created.\n"
-          "Current device stack: %s\n%s device stack: %s" %
-          (self._inference_function.name, device_functions,
-           self._inference_function.name, self._device_functions))
 
     for v in self._func_graph.variables:
       if v.trainable:
@@ -1149,17 +1148,34 @@ class PolymorphicFunction(object):
       executing_eagerly = ctx.executing_eagerly()
       execution_context = executing_eagerly or ops.get_default_graph()
 
+    # pylint: disable=protected-access
+    default_graph = ops.get_default_graph()
+    # TODO(b/117617952): The current distribution strategy will affect graph
+    # building (e.g. accessing different variables from different devices) and
+    # so requires retracing for each device.
+    uses_distribution_strategy = bool(
+        default_graph._distribution_strategy_stack)
     if executing_eagerly:
-      device_functions = (pydev.merge_device(ctx.device_name),)
       colocation_stack = ()
+      uses_xla = ctx.device_spec.device_type == "TPU"
+      if uses_distribution_strategy or uses_xla:
+        device_functions = (pydev.merge_device(ctx.device_name),)
+      else:
+        device_functions = ()
     else:
-      default_graph = ops.get_default_graph()
-      # Putting the device in the cache key ensures that call-site device
-      # annotations are respected.
-      device_functions = tuple(default_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
-      colocation_stack = tuple(default_graph._colocation_stack.peek_objs())  # pylint: disable=protected-access
-
-    return (cache_key, execution_context, device_functions, colocation_stack)
+      colocation_stack = tuple(default_graph._colocation_stack.peek_objs())
+      uses_xla = getattr(default_graph, "_xla_compile", False)
+      if (uses_distribution_strategy
+          or uses_xla
+          or _device_stack_has_callable(default_graph._device_function_stack)):
+        # Putting the device in the cache key ensures that call-site device
+        # annotations are respected.
+        device_functions = tuple(default_graph._device_functions_outer_to_inner)
+      else:
+        device_functions = ()
+    # pylint: enable=protected-access
+    return (cache_key, execution_context, device_functions, colocation_stack,
+            uses_xla)
 
   def _canonicalize_function_inputs(self, *args, **kwargs):
     """Canonicalizes `args` and `kwargs`.
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 7b708622f1..74e4dd3365 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -28,7 +28,6 @@ import numpy
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import keras
-from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
@@ -40,6 +39,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import test_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training as keras_training
 from tensorflow.python.layers import convolutional
@@ -1187,16 +1187,12 @@ class FunctionTest(test.TestCase):
 
     def multi_device_fn():
       with ops.device('/cpu:0'):
-        s0 = iterator_ops.Iterator.from_structure(
-            (dtypes.float32,)).string_handle()
+        s0 = test_ops.device_placement_op()
       with ops.device('/cpu:1'):
-        s1 = iterator_ops.Iterator.from_structure(
-            (dtypes.float32,)).string_handle()
+        s1 = test_ops.device_placement_op()
       with ops.device('/cpu:2'):
-        s2 = iterator_ops.Iterator.from_structure(
-            (dtypes.float32,)).string_handle()
-      s3 = iterator_ops.Iterator.from_structure(
-          (dtypes.float32,)).string_handle()
+        s2 = test_ops.device_placement_op()
+      s3 = test_ops.device_placement_op()
       return s0, s1, s2, s3
 
     defined = function.defun(multi_device_fn)
@@ -1208,24 +1204,24 @@ class FunctionTest(test.TestCase):
 
     with ops.device('/cpu:3'):
       outputs = self.evaluate(defined())
-    self.assertEqual(len(defined._function_cache), 2)
+    # All function definitions are agnostic to call site devices.
+    self.assertEqual(len(defined._function_cache), 1)
     self.assertIn(compat.as_bytes('CPU:0'), outputs[0])
     self.assertIn(compat.as_bytes('CPU:1'), outputs[1])
     self.assertIn(compat.as_bytes('CPU:2'), outputs[2])
     self.assertIn(compat.as_bytes('CPU:3'), outputs[3])
 
-    # This should retrieve the call-site-device agnostic function
-    defined()
-    self.assertEqual(len(defined._function_cache), 2)
-
-    # And this should retrieve the function created for '/cpu:3'
-    with ops.device('/cpu:3'):
-      defined()
-    self.assertEqual(len(defined._function_cache), 2)
+    with ops.device('/cpu:0'):
+      outputs = self.evaluate(defined())
+    self.assertEqual(len(defined._function_cache), 1)
+    self.assertIn(compat.as_bytes('CPU:0'), outputs[0])
+    self.assertIn(compat.as_bytes('CPU:1'), outputs[1])
+    self.assertIn(compat.as_bytes('CPU:2'), outputs[2])
+    self.assertIn(compat.as_bytes('CPU:0'), outputs[3])
 
   @test_util.run_in_graph_and_eager_modes(
       config=config_pb2.ConfigProto(device_count={'CPU': 2}))
-  def testCallingGraphFunctionOnIncompatibleDeviceRaisesError(self):
+  def testCallingGraphFunctionOnDifferentDevice(self):
 
     def func():
       return constant_op.constant(0)
@@ -1238,33 +1234,18 @@ class FunctionTest(test.TestCase):
       self.assertEqual(
           self.evaluate(cpu_graph_function()), self.evaluate(func()))
 
-    with self.assertRaisesRegexp(
-        ValueError,
-        'The current device stack does not match the device stack under '
-        'which the TensorFlow function \'.*func.*\' was created.\n'
-        'Current device stack: .*\n.*func.* device stack.*'):
-      with ops.device('cpu:1'):
-        cpu_graph_function()
+    with ops.device('cpu:1'):
+      self.assertEqual(0., self.evaluate(cpu_graph_function()))
 
-    with self.assertRaisesRegexp(
-        ValueError,
-        'The current device stack does not match the device stack under '
-        'which the TensorFlow function \'.*func.*\' was created.\n'
-        'Current device stack: .*\n.*func.* device stack.*'):
-      with ops.device(None):
-        cpu_graph_function()
+    with ops.device(None):
+      self.assertEqual(0., self.evaluate(cpu_graph_function()))
 
     default_graph_function = defined.get_concrete_function()
     self.assertEqual(
         self.evaluate(default_graph_function()), self.evaluate(func()))
 
-    with self.assertRaisesRegexp(
-        ValueError,
-        'The current device stack does not match the device stack under '
-        'which the TensorFlow function \'.*func.*\' was created.\n'
-        'Current device stack: .*\n.*func.* device stack.*'):
-      with ops.device('cpu:1'):
-        default_graph_function()
+    with ops.device('cpu:1'):
+      self.assertEqual(0., self.evaluate(default_graph_function()))
 
   @test_util.run_in_graph_and_eager_modes
   def testColocateWithRespected(self):
@@ -1280,8 +1261,7 @@ class FunctionTest(test.TestCase):
 
     @function.defun
     def foo():
-      return iterator_ops.Iterator.from_structure(
-          (dtypes.float32,)).string_handle()
+      return test_ops.device_placement_op()
 
     with ops.colocate_with(x):
       self.assertIn(compat.as_bytes('CPU:0'), self.evaluate(foo()))
diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc
index 070b5ac11f..99e184a8ac 100644
--- a/tensorflow/python/framework/test_ops.cc
+++ b/tensorflow/python/framework/test_ops.cc
@@ -657,4 +657,27 @@ REGISTER_OP("ComplexStruct")
     .Attr("t_c: list(type) >= 0")
     .SetShapeFn(shape_inference::UnknownShape);
 
+// An op which returns its own device placement as a string, useful for testing
+// where ops get placed.
+REGISTER_OP("DevicePlacementOp")
+    .Output("device: string")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape);
+
+class DevicePlacementOp : public OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(OpKernelContext* ctx) override {
+    Tensor* output;
+    OP_REQUIRES_OK(ctx,
+                   ctx->allocate_output("device", TensorShape({}), &output));
+    output->scalar<string>()() = ctx->device()->name();
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("DevicePlacementOp").Device(DEVICE_CPU),
+                        DevicePlacementOp);
+REGISTER_KERNEL_BUILDER(Name("DevicePlacementOp").Device(DEVICE_GPU),
+                        DevicePlacementOp);
 }  // end namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index cc6fbf26c2..776d1ecc25 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3262,6 +3262,7 @@ cuda_py_test(
     srcs = ["cond_v2_test.py"],
     additional_deps = [
         "//tensorflow/python:array_ops",
+        "//tensorflow/python:test_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:cond_v2",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 0e7c2f8ae6..833a0d152c 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -24,6 +24,8 @@ from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
@@ -878,22 +880,27 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
       with self.session(graph=g):
 
         def fn():
-          c = constant_op.constant(3.0)
-          self.assertEqual("/device:CPU:0", c.op.device)
-          return c
+          self.assertEqual("", constant_op.constant(3.0).op.device)
+          return test_ops.device_placement_op()
 
         with ops.device("/device:CPU:0"):
-          self.assertEquals(
-              cond_v2.cond_v2(constant_op.constant(True), fn, fn).eval(), 3)
+          self.assertIn(
+              compat.as_bytes("CPU:0"),
+              self.evaluate(cond_v2.cond_v2(constant_op.constant(True),
+                                            fn, fn)))
 
         def fn2():
-          c = constant_op.constant(3.0)
-          self.assertEqual("/device:GPU:0", c.op.device)
-          return c
-
-        with ops.device("/device:GPU:0"):
-          self.assertEquals(
-              cond_v2.cond_v2(constant_op.constant(True), fn2, fn2).eval(), 3)
+          self.assertEqual("", constant_op.constant(3.0).op.device)
+          return test_ops.device_placement_op()
+
+        if test_util.is_gpu_available():
+          with ops.device("/device:GPU:0"):
+            self.assertIn(
+                compat.as_bytes("GPU:0"),
+                self.evaluate(cond_v2.cond_v2(constant_op.constant(True),
+                                              fn2, fn2)))
+        else:
+          self.skipTest("Test requrires a GPU to check GPU device placement.")
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
-- 
GitLab


From df5efef8beed6437e57954609e729b4b32dd5d70 Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Thu, 11 Oct 2018 16:22:02 -0700
Subject: [PATCH 0821/1085] _create_c_op: copy over device placement

---
 tensorflow/python/framework/ops.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 50ab118fd6..52ad54a44e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1605,6 +1605,8 @@ def _create_c_op(graph, node_def, inputs, control_inputs):
   op_desc = c_api.TF_NewOperation(graph._c_graph,
                                   compat.as_str(node_def.op),
                                   compat.as_str(node_def.name))
+  if node_def.device:
+    c_api.TF_SetDevice(op_desc, node_def.device)
   # Add inputs
   for op_input in inputs:
     if isinstance(op_input, (list, tuple)):
-- 
GitLab


From 0e525204967f7eaf01e79c9a651a3dd1f1d4d3c1 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 16:36:57 -0700
Subject: [PATCH 0822/1085] Fix Keras support in Python 3. (#22910)

PiperOrigin-RevId: 216770808
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index af183b3232..d628258b9d 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -286,7 +286,8 @@ def _cross_replica_concat(tensor, core_id, num_cores, name):
                     '{}.'.format(input_dtype, name))
 
   batch_size = tensor.shape[0]
-  mask = math_ops.to_float(math_ops.equal(range(num_cores), core_id))
+  mask = math_ops.to_float(
+      math_ops.equal(np.arange(num_cores, dtype=np.int32), core_id))
   mask = array_ops.reshape(mask, [num_cores] + [1] * tensor.shape.ndims)
   result = mask * math_ops.to_float(tensor)
   local_tensor_with_holes = array_ops.reshape(result,
-- 
GitLab


From 6f0357c4ca7dc8335973837cb9bc9b4f4a034d2b Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 11 Oct 2018 16:42:29 -0700
Subject: [PATCH 0823/1085] Allow the "del" statement in static analysis.

PiperOrigin-RevId: 216783344
---
 .../pyct/static_analysis/activity.py          |  6 ++++
 .../pyct/static_analysis/liveness_test.py     | 28 +++++++++++++++
 .../reaching_definitions_test.py              | 35 +++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index 27c940d4f4..4359e0a268 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -220,6 +220,9 @@ class ActivityAnalyzer(transformer.Base):
         raise NotImplementedError(
             'Param "{}" outside a function arguments or lambda.'.format(qn))
     elif isinstance(node.ctx, gast.Del):
+      # The read matches the Python semantics - attempting to delete an
+      # undefined symbol is illegal.
+      self.scope.mark_read(qn)
       self.scope.mark_deleted(qn)
     else:
       raise ValueError('Unknown context {} for node "{}".'.format(
@@ -261,6 +264,9 @@ class ActivityAnalyzer(transformer.Base):
     self._in_aug_assign = False
     return node
 
+  def visit_Delete(self, node):
+    return self._process_statement(node)
+
   def visit_Name(self, node):
     node = self.generic_visit(node)
     self._track_symbol(node)
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
index 7b67f8f608..4366808d49 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
@@ -153,6 +153,20 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveOut(fn_body[0], 'max')
 
+  def test_live_out_deletion(self):
+
+    def test_fn(x, y, a):
+      for _ in a:
+        if x:
+          del y
+        else:
+          y = 0
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveOut(fn_body[0], ())
+
   def test_live_in_stacked_if(self):
 
     def test_fn(x, a, b, c):
@@ -214,6 +228,20 @@ class LivenessTest(test.TestCase):
 
     self.assertHasLiveIn(fn_body[0], ('a', 'y', 'z'))
 
+  def test_live_in_deletion(self):
+
+    def test_fn(x, y, a):
+      for _ in a:
+        if x:
+          del y
+        else:
+          y = 0
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    self.assertHasLiveIn(fn_body[0], ('a', 'x', 'y'))
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
index 373a2cb38f..8c0d518507 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
@@ -238,6 +238,41 @@ class DefinitionInfoTest(test.TestCase):
     self.assertSameDef(creation, mutation)
     self.assertSameDef(creation, use)
 
+  def test_deletion_partial(self):
+
+    def test_fn(a):
+      a = 0
+      if a:
+        del a
+      else:
+        a = 1
+      return a
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    first_def = fn_body[0].targets[0]
+    second_def = fn_body[1].orelse[0].targets[0]
+    use = fn_body[2].value
+    self.assertNotSameDef(use, first_def)
+    self.assertSameDef(use, second_def)
+
+  def test_deletion_total(self):
+
+    def test_fn(a):
+      if a:
+        a = 0
+      else:
+        a = 1
+      del a
+      return a
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body[0].body
+
+    use = fn_body[2].value
+    self.assertHasDefs(use, 0)
+
   def test_replacement(self):
 
     def foo(a):
-- 
GitLab


From 8c078cec59659ea0991f9a8c7be4ae216322b8ca Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 16:48:57 -0700
Subject: [PATCH 0824/1085] Fix use-of-uninitialized-value bug for bfloat16 in
 MatrixBandPart. (#22911)

MatrixBandPart ends up calling bfloat16() to create a zero value, but the implementation of bfloat16::bfloat16() returned an uninitialized value, not zero.

PiperOrigin-RevId: 216761660
---
 tensorflow/core/framework/bfloat16_test.cc         | 14 ++++++++++++++
 tensorflow/core/lib/bfloat16/bfloat16.h            |  7 ++++++-
 .../kernel_tests/matrix_band_part_op_test.py       |  9 +++++----
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc
index 0a1b5e1975..d71f92151d 100644
--- a/tensorflow/core/framework/bfloat16_test.cc
+++ b/tensorflow/core/framework/bfloat16_test.cc
@@ -23,6 +23,20 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+TEST(Bfloat16Test, DefaultValueIsZero) {
+  EXPECT_EQ(0.0f, static_cast<float>(bfloat16()));
+}
+
+TEST(Bfloat16Test, RepresentableFloatsRoundTripViaBfloat16) {
+  const std::vector<float> values = {
+      -std::numeric_limits<float>::infinity(), -1.0, -0.5, -0.0, 0.0, 0.5, 1.0,
+      std::numeric_limits<float>::infinity(),
+  };
+  for (float v : values) {
+    EXPECT_EQ(v, static_cast<float>(static_cast<bfloat16>(v)));
+  }
+}
+
 TEST(Bfloat16Test, Simple) {
   bfloat16 a(12);
   // Floating point representation of 12: 0x41400000
diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h
index 5c917e80c1..4408546580 100644
--- a/tensorflow/core/lib/bfloat16/bfloat16.h
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h
@@ -43,7 +43,9 @@ typedef std::complex<double> complex128;
 
 // see framework/bfloat16.h for description.
 struct bfloat16 {
-  B16_DEVICE_FUNC bfloat16() {}
+  // The default constructor must yield a zero value, not an uninitialized
+  // value; some TF kernels use T() as a zero value.
+  B16_DEVICE_FUNC bfloat16() : value(ZERO_VALUE) {}
 
   B16_DEVICE_FUNC static bfloat16 truncate_to_bfloat16(const float v) {
     bfloat16 output;
@@ -376,6 +378,9 @@ struct bfloat16 {
   static const uint16_t NAN_VALUE = 0x7FC0;
 
  private:
+  // A value that represents "zero".
+  static const uint16_t ZERO_VALUE = 0;
+
   B16_DEVICE_FUNC static bool float_isnan(const float& x) {
 #ifdef __CUDA_ARCH__
     return ::isnan(x);
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index 68d626de2c..b73a73cb0d 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -137,12 +137,13 @@ class MatrixBandPartBenchmark(test_lib.Benchmark):
 
 
 if __name__ == "__main__":
-  dtypes = (np.bool, np.int32, np.int64, np.float32, np.float64, np.complex64,
-            np.complex128)
+  dtypes = (np.bool, np.int32, np.int64, np.float16,
+            dtypes_lib.bfloat16.as_numpy_dtype, np.float32, np.float64,
+            np.complex64, np.complex128)
   for dtype in dtypes:
     for batch_shape in ((), (2,), (1, 3, 2)):
-      for rows in 1, 2, 7:
-        for cols in 1, 2, 7:
+      for rows in 1, 2, 7, 23:
+        for cols in 1, 2, 7, 23:
           shape = (rows, cols)
           name = "%s_%s" % (dtype.__name__,
                             "_".join(map(str, batch_shape + shape)))
-- 
GitLab


From 3b5d346681f4dbdfc1252d4bbcff1300cbb2ccc9 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Thu, 11 Oct 2018 16:53:21 -0700
Subject: [PATCH 0825/1085] [XLA] Add key-value sort test with int64 values.

Also some small improvements to HloSortInstruction.

PiperOrigin-RevId: 216784851
---
 tensorflow/compiler/tests/sort_ops_test.py         | 9 ++++++---
 tensorflow/compiler/xla/service/hlo_instructions.h | 7 ++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py
index 57f0ab7a9e..3e499c2fb1 100644
--- a/tensorflow/compiler/tests/sort_ops_test.py
+++ b/tensorflow/compiler/tests/sort_ops_test.py
@@ -57,10 +57,13 @@ class XlaSortOpTest(xla_test.XLATestCase):
           xla.sort, [x], expected=[np.arange(101, dtype=dtype)])
 
   def testKeyValueSort(self):
-    supported_types = set(
+    supported_key_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
-    for key_type in supported_types.intersection(self.numeric_types):
-      for value_type in supported_types.intersection(self.numeric_types):
+    supported_value_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32,
+         dtypes.int64.as_numpy_dtype, dtypes.uint64.as_numpy_dtype])
+    for key_type in supported_key_types.intersection(self.numeric_types):
+      for value_type in supported_value_types.intersection(self.numeric_types):
         x = np.arange(101, dtype=key_type)
         np.random.shuffle(x)
         y = (-x).astype(value_type)
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 3a0b7490dc..5f06dc0932 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -423,9 +423,14 @@ class HloSortInstruction : public HloInstruction {
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
   // Returns the sort dimension for this instruction
-  int64 sort_dimension() { return dimensions(0); }
+  int64 sort_dimension() const { return dimensions(0); }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
+  // Returns the key operand to this instruction.
+  const HloInstruction* keys() const { return operand(0); }
+  HloInstruction* mutable_keys() { return mutable_operand(0); }
+  // Returns the number of value operands.
+  int64 values_count() const { return operand_count() - 1; }
 
  private:
   std::vector<string> ExtraAttributesToStringImpl(
-- 
GitLab


From cb8f301d8e94a393c9bce4c6e1b96ba163b41603 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 11 Oct 2018 17:06:08 -0700
Subject: [PATCH 0826/1085] Implement lazy compilation for XLA

This CL implements lazy compilation for XLA with a very simple policy: compile a
cluster for a specific signature the second time we see the signature.  This
policy is only for bootstrapping; I suspect we will have to substantially tune
this based on real world workloads.

This CL is organized as follows:

 - Mark the compilation_successful output / input in the _XlaCompile/_XlaRun
   kernels as HostMem.  This was a bug in the CL that introduced
   _XlaCompile/_XlaRun, but was unnoticed till now because we never read the
   compilation_successful boolean.
    * Also mark the key output / input as HostMem for symmetry even though
     DT_STRING tensors are always placed on the host.

 - Add a couple of new TF node matchers to make it easier to unit-test the
   updated build_xla_ops pass.

 - Update some tests that relied on JIT compilation to have a "warmup" phase so
   that compilation is guaranteed.

 - The main part of the CL is in kernels/xla_ops and jit/xla_compilation_cache.

PiperOrigin-RevId: 216786799
---
 tensorflow/compiler/jit/BUILD                 |   5 +-
 tensorflow/compiler/jit/build_xla_ops_pass.cc | 212 ++++++++++++++++--
 tensorflow/compiler/jit/build_xla_ops_pass.h  |  11 +
 .../compiler/jit/build_xla_ops_pass_test.cc   | 120 +++++++++-
 tensorflow/compiler/jit/kernels/xla_ops.cc    |  44 ++--
 tensorflow/compiler/jit/kernels/xla_ops.h     |   2 +
 tensorflow/compiler/jit/legacy_flags/BUILD    |  12 +
 .../legacy_flags/build_xla_ops_pass_flags.cc  |  47 ++++
 .../legacy_flags/build_xla_ops_pass_flags.h   |  37 +++
 tensorflow/compiler/jit/node_matchers.cc      | 120 +++++++---
 tensorflow/compiler/jit/node_matchers.h       |  70 ++++--
 tensorflow/compiler/jit/node_matchers_test.cc |  47 +++-
 tensorflow/compiler/jit/ops/xla_ops.cc        |   9 +-
 .../compiler/jit/xla_compilation_cache.cc     |  25 ++-
 .../compiler/jit/xla_compilation_cache.h      |  23 +-
 tensorflow/compiler/jit/xla_device_ops.h      |  12 +-
 tensorflow/compiler/tests/BUILD               |   2 +
 tensorflow/compiler/tests/dense_layer_test.py |  13 +-
 tensorflow/compiler/tests/jit_test.py         | 161 +++++++++----
 tensorflow/compiler/tests/test_utils.py       |  12 +
 20 files changed, 827 insertions(+), 157 deletions(-)
 create mode 100644 tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc
 create mode 100644 tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 64adc885bc..ced0cd03f7 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -324,7 +324,6 @@ cc_library(
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -390,6 +389,7 @@ cc_library(
         "//tensorflow/cc:ops",
         "//tensorflow/cc:scope_internal",
         "//tensorflow/compiler/jit/graphcycles",
+        "//tensorflow/compiler/jit/legacy_flags:build_xla_ops_pass_flags",
         "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
         "//tensorflow/compiler/jit/ops:xla_ops",
         "//tensorflow/compiler/tf2xla:dump_graph",
@@ -499,6 +499,7 @@ tf_cc_test(
         ":compilation_passes",
         ":node_matchers",
         ":xla_cluster_util",
+        ":xla_cpu_device",
         ":xla_gpu_device",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:cc_ops_internal",
@@ -622,6 +623,7 @@ cc_library(
     deps = [
         "//tensorflow/cc:ops",
         "//tensorflow/compiler/xla:test",
+        "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/algorithm:container",
@@ -637,6 +639,7 @@ tf_cc_test(
     deps = [
         ":node_matchers",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
         "//tensorflow/cc:ops",
         "//tensorflow/core:ops",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 5974696b77..054f31ba33 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -15,10 +15,16 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/build_xla_ops_pass.h"
 #include "absl/algorithm/container.h"
+#include "absl/strings/str_cat.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/control_flow_ops.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h"
+#include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -48,6 +54,88 @@ void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
   }
 }
 
+// Returns a data value that is dead iff `control` is dead.
+Output ControlToData(const Scope& scope, Node* control) {
+  Output data = ops::Const(scope.WithOpName("ctrl_as_data"),
+                           Tensor(DT_BOOL, TensorShape({0})));
+  scope.graph()->AddControlEdge(control, data.node());
+  return Output(data.node());
+}
+
+// Returns an operation that can be control-depended on that is dead iff `data`
+// is dead.
+Operation DataToControl(const Scope& scope, Output data) {
+  return Operation(
+      ops::Identity(scope.WithOpName("data_as_ctrl"), data).node());
+}
+
+// Replaces each outgoing edge from `old_node` with a merge node that merges in
+// the corresponding output from `new_node`.
+void MergeOutgoingDataEdges(const Scope& s, Node* old_node, Node* new_node) {
+  if (!s.status().ok()) {
+    return;
+  }
+
+  std::vector<Output> merged_outputs(old_node->num_outputs(), Output(nullptr));
+
+  std::vector<const Edge*> data_edges;
+  absl::c_copy_if(old_node->out_edges(), std::back_inserter(data_edges),
+                  [](const Edge* e) { return !e->IsControlEdge(); });
+
+  for (const Edge* e : data_edges) {
+    int oidx = e->src_output();
+    Output merged_output = merged_outputs[oidx];
+    if (merged_output.node() == nullptr) {
+      ops::Merge merge_op(s.WithOpName(absl::StrCat("merge_oidx_", oidx)),
+                          {Output(old_node, oidx), Output(new_node, oidx)});
+      merged_output = merged_outputs[oidx] = merge_op.output;
+    }
+
+    Node* dst = e->dst();
+    int dst_idx = e->dst_input();
+
+    s.graph()->RemoveEdge(e);
+    s.graph()->AddEdge(merged_output.node(), merged_output.index(), dst,
+                       dst_idx);
+  }
+}
+
+// Replaces each control successor of `old_node` to execute whenever either
+// `old_node` or `new_node` is executed.
+void MergeOutgoingControlEdges(const Scope& s, Node* old_node, Node* new_node) {
+  if (!s.status().ok()) {
+    return;
+  }
+
+  std::vector<const Edge*> ctrl_edges;
+  absl::c_copy_if(old_node->out_edges(), std::back_inserter(ctrl_edges),
+                  [](const Edge* e) { return e->IsControlEdge(); });
+
+  if (ctrl_edges.empty()) {
+    return;
+  }
+
+  // We can't merge control edges directly so we instead first "convert" them to
+  // normal values that can be merged, merge the values and then "convert" the
+  // merged value back into control.
+  //
+  // NB! We need to copy out the outgoing control edges before constructing
+  // old_ctrl_as_data otherwise the control edge from old_node to the constant
+  // in ControlToData will be present in ctrl_edges.
+
+  Output old_ctrl_as_data = ControlToData(s, old_node);
+  Output new_ctrl_as_data = ControlToData(s, new_node);
+
+  ops::Merge ctrl_merge_as_data(s.WithOpName("ctrl_merge"),
+                                {old_ctrl_as_data, new_ctrl_as_data});
+  Operation ctrl_merge = DataToControl(s, ctrl_merge_as_data.output);
+
+  for (const Edge* e : ctrl_edges) {
+    s.graph()->AddControlEdge(ctrl_merge.node(), e->dst());
+    s.graph()->RemoveControlEdge(e);
+  }
+}
+
 struct XlaClusterInfo {
   std::vector<Output> constant_inputs;
   std::vector<Output> non_constant_inputs;
@@ -107,7 +195,38 @@ Status CopyIncomingControlEdges(Graph* g, Node* from, Node* to) {
   return Status::OK();
 }
 
-Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) {
+void RemoveAllIncomingControlEdges(Graph* g, Node* n) {
+  std::vector<const Edge*> incoming_ctrl_edges;
+  absl::c_copy_if(n->in_edges(), std::back_inserter(incoming_ctrl_edges),
+                  [](const Edge* e) { return e->IsControlEdge(); });
+  for (const Edge* e : incoming_ctrl_edges) {
+    g->RemoveControlEdge(e);
+  }
+}
+
+// Returns true (into `result`) if `node` must be compiled.
+Status NodeRequiresCompilation(Node* n, bool* result) {
+  DeviceType device_type("");
+  TF_RETURN_IF_ERROR(
+      DeviceToDeviceType(n->assigned_device_name(), &device_type));
+  const XlaOpRegistry::DeviceRegistration* registration = nullptr;
+  if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), &registration)) {
+    return errors::Internal("Could not find compilation device ",
+                            device_type.type());
+  }
+  *result = registration->requires_compilation;
+  return Status::OK();
+}
+
+Status ReplaceNodeWithXlaCompileAndXlaRun(
+    const FunctionLibraryDefinition& flib_def, bool lazy_compilation_enabled,
+    Graph* g, Node* n) {
+  bool requires_compilation;
+  TF_RETURN_IF_ERROR(NodeRequiresCompilation(n, &requires_compilation));
+  if (!lazy_compilation_enabled) {
+    requires_compilation = true;
+  }
+
   Status status;
   Scope root = NewInternalScope(g, &status, /*refiner=*/nullptr)
                    .NewSubScope(n->name())
@@ -121,18 +240,63 @@ Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) {
                                /*constants=*/cluster_info.constant_inputs,
                                /*args=*/cluster_info.non_constant_inputs,
                                /*resources=*/cluster_info.resource_inputs,
+                               /*must_compile=*/requires_compilation,
                                cluster_info.function);
   TF_RETURN_IF_ERROR(
       CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node()));
 
-  std::vector<Output> xla_run_args = cluster_info.non_constant_inputs;
-  absl::c_copy(cluster_info.resource_inputs, std::back_inserter(xla_run_args));
-  ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args,
-                       xla_compile.key, n->output_types());
+  if (requires_compilation) {
+    // "Strict" compilation:  every _XlaCompile invocation must compile the
+    // cluster.
+    std::vector<Output> xla_run_args = cluster_info.non_constant_inputs;
+    absl::c_copy(cluster_info.resource_inputs,
+                 std::back_inserter(xla_run_args));
+    ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args,
+                         xla_compile.key, n->output_types());
+
+    MoveOutgoingEdges(g, /*old_node=*/n,
+                      /*new_node=*/xla_run.operation.node());
+    g->RemoveNode(n);
+  } else {
+    // "Lazy" compilation: an _XlaCompile invocation may decide not to compile
+    // the cluster based on profitability heuristics.
 
-  MoveOutgoingEdges(g, /*old_node=*/n,
-                    /*new_node=*/xla_run.operation.node());
-  g->RemoveNode(n);
+    // We generate the following graph:
+    //
+    //   (use_tf_call, use_xla_run) =
+    //       Switch(pred=xla_compile.compilation_successful,
+    //              value=xla_compile.key)
+    //
+    //   tf_call_outputs = cluster_N(..., ^use_tf_call)
+    //   xla_run_outputs = _XlaRun(..., key=use_xla_run)
+    //   outputs = Merge(tf_call_outputs, xla_run_outputs).
+    ops::Switch s(root.WithOpName("predicated_compilation_key"),
+                  xla_compile.key, xla_compile.compilation_successful);
+    Output predicated_compilation_key = s.output_true;
+    Output inverse_predicated_compilation_key = s.output_false;
+
+    std::vector<Output> xla_run_args = cluster_info.non_constant_inputs;
+    absl::c_copy(cluster_info.resource_inputs,
+                 std::back_inserter(xla_run_args));
+    ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args,
+                         predicated_compilation_key, n->output_types());
+
+    MergeOutgoingControlEdges(root, /*old_node=*/n,
+                              /*new_node=*/xla_run.operation.node());
+
+    MergeOutgoingDataEdges(root, /*old_node=*/n,
+                           /*new_node=*/xla_run.operation.node());
+
+    TF_RETURN_IF_ERROR(root.status());
+
+    // We already have a TensorFlow function call into the cluster -- the
+    // original node we set out to rewrite.  We just wire in the correct control
+    // deps and we're done.
+    RemoveAllIncomingControlEdges(g, n);
+    g->AddControlEdge(
+        DataToControl(root, inverse_predicated_compilation_key).node(), n);
+    n->ClearAttr(kXlaCompiledKernelAttr);
+  }
 
   return Status::OK();
 }
@@ -141,22 +305,34 @@ Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) {
 Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) {
   Graph* graph = options.graph->get();
 
-  for (Node* n : graph->op_nodes()) {
-    // In all cases, only try to compile computational nodes.
-    if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) {
-      continue;
-    }
+  // Copy out the nodes we want to rewrite to avoid modifying the graph while we
+  // iterate on graph->op_nodes().
+  std::vector<Node*> xla_compiled_kernels;
+  absl::c_copy_if(graph->op_nodes(), std::back_inserter(xla_compiled_kernels),
+                  [](const Node* n) {
+                    if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) {
+                      return false;
+                    }
 
-    // Only compile nodes that are marked for compilation by the
-    // compilation-marking pass (via 'attr_name').
-    if (IsXlaCompiledKernel(*n)) {
-      TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(graph, n));
-    }
+                    // Only compile nodes that are marked for compilation by the
+                    // compilation-marking pass (via 'attr_name').
+                    return IsXlaCompiledKernel(*n);
+                  });
+
+  bool lazy_compilation_enabled = enable_lazy_compilation_
+                                      ? *enable_lazy_compilation_
+                                      : legacy_flags::GetBuildXlaOpsPassFlags()
+                                            .tf_xla_enable_lazy_compilation;
+
+  for (Node* n : xla_compiled_kernels) {
+    TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(
+        *options.flib_def, lazy_compilation_enabled, graph, n));
   }
 
   if (VLOG_IS_ON(1)) {
     dump_graph::DumpGraphToFile("build_xla_ops", *graph, options.flib_def);
   }
+
   return Status::OK();
 }
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.h b/tensorflow/compiler/jit/build_xla_ops_pass.h
index 1dd38fa951..58f7c4b3a0 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.h
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_
 #define TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_
 
+#include "absl/types/optional.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/lib/core/status.h"
 
@@ -25,7 +26,17 @@ namespace tensorflow {
 // executes (using XLA) TF function calls marked with "_XlaCompiledKernel".
 class BuildXlaOpsPass : public GraphOptimizationPass {
  public:
+  // If enable_lazy_compilation is not nullopt then *enable_lazy_compilation
+  // overrides --tf_xla_enable_lazy_compilation flag in deciding whether lazy
+  // compilation is enabled.
+  explicit BuildXlaOpsPass(
+      absl::optional<bool> enable_lazy_compilation = absl::nullopt)
+      : enable_lazy_compilation_(enable_lazy_compilation) {}
+
   Status Run(const GraphOptimizationPassOptions& options) override;
+
+ private:
+  absl::optional<bool> enable_lazy_compilation_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
index 9d56db7b6b..11df946cc1 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc
@@ -22,18 +22,44 @@ limitations under the License.
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/jit/node_matchers.h"
+#include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 namespace {
 
+class BuildXlaOpsTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // This is needed to register the XLA_* devices.
+    CHECK(DeviceFactory::AddDevices(
+              SessionOptions(), "/job:localhost/replica:0/task:0", &devices_)
+              .ok());
+  }
+
+  void TearDown() override {
+    for (Device* device : devices_) {
+      delete device;
+    }
+  }
+
+ private:
+  std::vector<Device*> devices_;
+};
+
 using ::tensorflow::testing::FindNodeByName;
+using ::tensorflow::testing::matchers::Attr;
 using ::tensorflow::testing::matchers::CtrlDeps;
+using ::tensorflow::testing::matchers::Inputs;
 using ::tensorflow::testing::matchers::NodeWith;
 using ::tensorflow::testing::matchers::Op;
+using ::tensorflow::testing::matchers::Out;
+using ::testing::_;
 
 Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
   auto graph = absl::make_unique<Graph>(OpRegistry::Global());
@@ -42,15 +68,18 @@ Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
   // Assign all nodes to the CPU device.
   static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0";
   for (Node* n : graph->nodes()) {
-    if (n->assigned_device_name().empty()) {
+    if (n->requested_device().empty()) {
       n->set_assigned_device_name(kCpuDevice);
+    } else {
+      n->set_assigned_device_name(n->requested_device());
     }
   }
 
   GraphOptimizationPassOptions opt_options;
   opt_options.graph = &graph;
-  BuildXlaOpsPass pass;
+  BuildXlaOpsPass pass(/*enable_lazy_compilation=*/true);
   TF_RETURN_IF_ERROR(pass.Run(opt_options));
+  VLOG(3) << graph->ToGraphDefDebug().DebugString();
   *result = std::move(graph);
   return Status::OK();
 }
@@ -76,16 +105,19 @@ Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
                                result);
 }
 
-Node* MakeWrite(const Scope& scope, const string& id) {
-  Output var_handle =
-      ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({}));
-  Output value_to_write =
-      ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f);
-  ops::AssignVariableOp assign_op(scope.WithOpName("Assignee" + id), var_handle,
-                                  value_to_write);
+Node* MakeWrite(const Scope& scope, Output value_to_write, const string& id) {
+  Output var_handle = ops::VarHandleOp(scope.WithOpName("Var_" + id), DT_FLOAT,
+                                       TensorShape({}));
+  ops::AssignVariableOp assign_op(scope.WithOpName("Assignee_" + id),
+                                  var_handle, value_to_write);
   return assign_op.operation.node();
 }
 
+Node* MakeWrite(const Scope& scope, const string& id) {
+  return MakeWrite(
+      scope, ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f), id);
+}
+
 FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) {
   FunctionDefLibrary flib_def;
   FunctionDef func = FunctionDefHelper::Create(
@@ -97,14 +129,16 @@ FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) {
   return flib_def;
 }
 
-TEST(BuildXlaOps, ControlDepsPreserved) {
-  Scope root = Scope::NewRootScope().ExitOnError();
+TEST_F(BuildXlaOpsTest, ControlDepsPreserved) {
+  const char* kXlaDeviceName = "/job:worker/replica:0/task:0/device:XLA_CPU:0";
+  Scope root = Scope::NewRootScope().WithDevice(kXlaDeviceName).ExitOnError();
 
   FunctionDefLibrary flib_def =
       CreateFunctionDefLibWithConstFunction("cluster_0");
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
   Node* call;
   TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call));
+  call->set_requested_device(kXlaDeviceName);
   Node* write_op = MakeWrite(root, "write");
   root.graph()->AddControlEdge(call, write_op);
 
@@ -116,15 +150,17 @@ TEST(BuildXlaOps, ControlDepsPreserved) {
   EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun")))));
 }
 
-TEST(BuildXlaOps, CleanFailureOnBogusAttr) {
+TEST_F(BuildXlaOpsTest, CleanFailureOnBogusAttr) {
   Scope root = Scope::NewRootScope().ExitOnError();
 
   FunctionDefLibrary flib_def =
       CreateFunctionDefLibWithConstFunction("cluster_0");
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+
   Node* call;
   TF_ASSERT_OK(
       MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", 100, 100, &call));
+
   Node* write_op = MakeWrite(root, "write");
   root.graph()->AddControlEdge(call, write_op);
 
@@ -134,5 +170,65 @@ TEST(BuildXlaOps, CleanFailureOnBogusAttr) {
   EXPECT_EQ(failure_status.code(), error::INVALID_ARGUMENT);
 }
 
+TEST_F(BuildXlaOpsTest, OnNonXlaDevice) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+
+  Node* call;
+  TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call));
+  TF_ASSERT_OK(root.DoShapeInference(call));
+
+  Node* write_op = MakeWrite(root, Output(call), "write_result");
+
+  auto xla_compile = NodeWith(Op("_XlaCompile"), Attr("must_compile", false));
+  auto predicated_compilation_key =
+      NodeWith(Op("Switch"), Inputs(Out(0, xla_compile), Out(1, xla_compile)));
+  auto xla_run =
+      NodeWith(Op("_XlaRun"), Inputs(Out(1, predicated_compilation_key)));
+  auto tf_call =
+      NodeWith(Op("cluster_0"),
+               CtrlDeps(NodeWith(Op("Identity"),
+                                 Inputs(Out(0, predicated_compilation_key)))));
+  auto merge = NodeWith(Op("Merge"), Inputs(Out(tf_call), Out(xla_run)));
+  auto assign_var = NodeWith(Op("AssignVariableOp"), Inputs(_, Out(merge)));
+
+  std::unique_ptr<Graph> graph;
+  TF_ASSERT_OK(BuildXlaOps(root, &graph));
+
+  Node* write_op_new = FindNodeByName(graph.get(), write_op->name());
+  ASSERT_NE(write_op_new, nullptr);
+  EXPECT_THAT(write_op_new, assign_var);
+}
+
+TEST_F(BuildXlaOpsTest, OnXlaDevice) {
+  const char* kXlaDeviceName = "/job:worker/replica:0/task:0/device:XLA_CPU:0";
+  Scope root = Scope::NewRootScope().WithDevice(kXlaDeviceName).ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction("cluster_0");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+
+  Node* call;
+  TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call));
+  call->set_requested_device(kXlaDeviceName);
+  TF_ASSERT_OK(root.DoShapeInference(call));
+
+  Node* write_op = MakeWrite(root, Output(call), "write_result");
+
+  std::unique_ptr<Graph> graph;
+  TF_ASSERT_OK(BuildXlaOps(root, &graph));
+
+  auto xla_op =
+      NodeWith(Op("_XlaRun"), Inputs(Out(NodeWith(Op("_XlaCompile")))));
+  auto assign_var =
+      NodeWith(Op("AssignVariableOp"), Inputs(Out(NodeWith()), Out(xla_op)));
+
+  Node* write_op_new = FindNodeByName(graph.get(), write_op->name());
+  ASSERT_NE(write_op_new, nullptr);
+  EXPECT_THAT(write_op_new, assign_var);
+}
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
index accc86a86d..2268d90428 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.cc
+++ b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -219,7 +219,7 @@ static Status BuildCompilationCache(OpKernelContext* ctx,
 static Status CompileToLocalExecutable(
     OpKernelContext* ctx, const NameAttrList& function,
     const XlaPlatformInfo& platform_info, absl::Span<const int> resources,
-    absl::Span<const int> constants, xla::LocalClient** client,
+    absl::Span<const int> constants, bool lazy, xla::LocalClient** client,
     std::map<int, OptionalTensor>* variables,
     const XlaCompiler::CompilationResult** kernel,
     xla::LocalExecutable** executable) {
@@ -277,7 +277,10 @@ static Status CompileToLocalExecutable(
   compile_options.always_return_tuple = false;
 
   return cache->Compile(options, function, constant_args, *variables, ctx,
-                        compile_options, kernel, executable);
+                        compile_options,
+                        lazy ? XlaCompilationCache::CompileMode::kLazy
+                             : XlaCompilationCache::CompileMode::kStrict,
+                        kernel, executable);
 }
 
 void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
@@ -291,8 +294,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
 
   OP_REQUIRES_OK(
       ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_,
-                                    constants_, &client, &variables, &kernel,
-                                    &executable));
+                                    constants_, /*lazy=*/false, &client,
+                                    &variables, &kernel, &executable));
 
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
@@ -394,9 +397,12 @@ XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx)
       resources_(ResourcesVector(ctx)),
       function_(FunctionAttr(ctx)) {
   OP_REQUIRES_OK(ctx, PlatformInfoFromContext(ctx, &platform_info_));
+  OP_REQUIRES_OK(ctx, ctx->GetAttr("must_compile", &must_compile_));
 }
 
 void XlaCompileOp::Compute(OpKernelContext* ctx) {
+  VLOG(3) << "XlaCompileOp " << def().name()
+          << (must_compile_ ? "(must-compile)" : "");
   xla::LocalClient* client;
   const XlaCompiler::CompilationResult* kernel;
   xla::LocalExecutable* executable;
@@ -404,8 +410,24 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) {
 
   OP_REQUIRES_OK(
       ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_,
-                                    constants_, &client, &variables, &kernel,
-                                    &executable));
+                                    constants_, /*lazy=*/!must_compile_,
+                                    &client, &variables, &kernel, &executable));
+
+  AllocatorAttributes host_alloc_attrs;
+  host_alloc_attrs.set_gpu_compatible(true);
+  host_alloc_attrs.set_on_host(true);
+  Allocator* cpu_allocator = ctx->device()->GetAllocator(host_alloc_attrs);
+
+  if (!executable) {
+    DCHECK(!must_compile_);
+    Tensor compilation_key(cpu_allocator, DT_STRING, TensorShape({}));
+
+    Tensor compilation_successful(cpu_allocator, DT_BOOL, TensorShape({}));
+    compilation_successful.scalar<bool>()() = false;
+    ctx->set_output(0, Tensor(cpu_allocator, DT_STRING, TensorShape({})));
+    ctx->set_output(1, compilation_successful);
+    return;
+  }
 
   // Each execution of an XlaCompile op creates a new XlaExecutableClosure, even
   // if it didn't have to compile the cluster because of a compilation-cache
@@ -415,13 +437,6 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) {
       XlaExecutableClosureStore::Global()->Produce(XlaExecutableClosure(
           client, executable, kernel, std::move(variables), constants_.size()));
 
-  Allocator* cpu_allocator = [&] {
-    AllocatorAttributes host_alloc_attrs;
-    host_alloc_attrs.set_gpu_compatible(true);
-    host_alloc_attrs.set_on_host(true);
-    return ctx->device()->GetAllocator(host_alloc_attrs);
-  }();
-
   Tensor compilation_key(cpu_allocator, DT_STRING, TensorShape({}));
   compilation_key.flat<string>()(0) = key;
 
@@ -437,6 +452,7 @@ XlaRunOp::XlaRunOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
 }
 
 void XlaRunOp::Compute(OpKernelContext* ctx) {
+  VLOG(3) << "XlaRunOp " << def().name();
   Tensor key_tensor = ctx->input(ctx->num_inputs() - 1);
   const XlaExecutableClosureStore::KeyT& key = key_tensor.flat<string>()(0);
 
@@ -491,6 +507,8 @@ REGISTER_KERNEL_BUILDER(Name("_XlaCompile").Device(DEVICE_CPU), XlaCompileOp);
 REGISTER_KERNEL_BUILDER(Name("_XlaCompile")
                             .Device(DEVICE_GPU)
                             .HostMemory("constants")
+                            .HostMemory("key")
+                            .HostMemory("compilation_successful")
                             .HostMemory("resources"),
                         XlaCompileOp);
 
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.h b/tensorflow/compiler/jit/kernels/xla_ops.h
index 489d26eb30..ac90837e0d 100644
--- a/tensorflow/compiler/jit/kernels/xla_ops.h
+++ b/tensorflow/compiler/jit/kernels/xla_ops.h
@@ -151,6 +151,8 @@ class XlaCompileOp : public OpKernel {
   NameAttrList function_;
 
   XlaPlatformInfo platform_info_;
+
+  bool must_compile_;
 };
 
 class XlaRunOp : public OpKernel {
diff --git a/tensorflow/compiler/jit/legacy_flags/BUILD b/tensorflow/compiler/jit/legacy_flags/BUILD
index 07c5b23188..d8fe4026f5 100644
--- a/tensorflow/compiler/jit/legacy_flags/BUILD
+++ b/tensorflow/compiler/jit/legacy_flags/BUILD
@@ -39,3 +39,15 @@ cc_library(
             "//tensorflow/core:lib",
         ],
 )
+
+cc_library(
+    name = "build_xla_ops_pass_flags",
+    srcs = ["build_xla_ops_pass_flags.cc"],
+    hdrs = ["build_xla_ops_pass_flags.h"],
+    deps =
+        [
+            "//tensorflow/compiler/xla/legacy_flags:parse_flags_from_env",
+            "//tensorflow/core:framework_internal",
+            "//tensorflow/core:lib",
+        ],
+)
diff --git a/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc
new file mode 100644
index 0000000000..58157d2b98
--- /dev/null
+++ b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <mutex>  // NOLINT
+
+#include "tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h"
+#include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace tensorflow {
+namespace legacy_flags {
+namespace {
+
+BuildXlaOpsPassFlags* flags;
+std::vector<Flag>* flag_list;
+std::once_flag flags_init;
+
+void AllocateAndParseFlags() {
+  flags = new BuildXlaOpsPassFlags;
+  flags->tf_xla_enable_lazy_compilation = false;
+  flag_list = new std::vector<Flag>({
+      Flag("tf_xla_enable_lazy_compilation",
+           &flags->tf_xla_enable_lazy_compilation, ""),
+  });
+  xla::legacy_flags::ParseFlagsFromEnv(*flag_list);
+}
+
+}  // namespace
+
+const BuildXlaOpsPassFlags& GetBuildXlaOpsPassFlags() {
+  std::call_once(flags_init, &AllocateAndParseFlags);
+  return *flags;
+}
+}  // namespace legacy_flags
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h
new file mode 100644
index 0000000000..539314cbf7
--- /dev/null
+++ b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h
@@ -0,0 +1,37 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_
+#define TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_
+
+namespace tensorflow {
+namespace legacy_flags {
+
+// Flags for the build_xla_ops pass.
+struct BuildXlaOpsPassFlags {
+  // Enables lazy compilation for TF/XLA (only when auto-clustering) if true.
+  // Defaults to false.
+  bool tf_xla_enable_lazy_compilation;
+};
+
+// Parses the flags in BuildXlaOpsPassFlags from the TF_XLA_FLAGS environment
+// variable and returns a reference to the parsed copy.  Parses TF_XLA_FLAGS
+// only the first time this routine is called.
+const BuildXlaOpsPassFlags& GetBuildXlaOpsPassFlags();
+
+}  // namespace legacy_flags
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_
diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc
index d8ace628e6..a09a6eb155 100644
--- a/tensorflow/compiler/jit/node_matchers.cc
+++ b/tensorflow/compiler/jit/node_matchers.cc
@@ -19,7 +19,10 @@ limitations under the License.
 #include "absl/algorithm/container.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
+#include "absl/strings/str_replace.h"
 #include "absl/strings/str_split.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 
 namespace tensorflow {
@@ -28,6 +31,7 @@ namespace matchers {
 namespace {
 
 using impl::NodeMatcherProperties;
+using impl::OutEdge;
 
 string IndentAllButFirstLine(absl::string_view text) {
   std::vector<std::string> lines = absl::StrSplit(text, '\n');
@@ -99,8 +103,6 @@ bool MatchAndExplainTensor(const Tensor& tensor, const Tensor& expected_tensor,
   }
 }
 
-using Input = std::pair<const Node*, int>;
-
 struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
   bool MatchAndExplain(
       const Node* node,
@@ -191,6 +193,29 @@ struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
       }
       return false;
     }
+
+    const AttrValueMap attr_value_map = node->def().attr();
+    for (const auto& attr_kv_pair : attrs) {
+      auto it = attr_value_map.find(attr_kv_pair.first);
+      if (it == attr_value_map.end()) {
+        if (listener->IsInterested()) {
+          *listener << "did not find attribute named \"" << attr_kv_pair.first
+                    << "\" in node";
+        }
+        return false;
+      }
+      if (!AreAttrValuesEqual(it->second, attr_kv_pair.second)) {
+        if (listener->IsInterested()) {
+          *listener << "attribute named " << attr_kv_pair.first
+                    << " does not match value; expected: \""
+                    << SummarizeAttrValue(attr_kv_pair.second)
+                    << "\", found: \"" << SummarizeAttrValue(it->second)
+                    << "\"";
+        }
+        return false;
+      }
+    }
+
     return true;
   }
 
@@ -232,7 +257,7 @@ struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
         *os << "matching " << ss.str();
       } else {
         int edge_idx = 0;
-        for (const ::testing::Matcher<Input>& matcher : (*input_matchers)) {
+        for (const ::testing::Matcher<OutEdge>& matcher : (*input_matchers)) {
           *os << "\n  [" << edge_idx << "] matching (";
           ::std::stringstream ss;
           matcher.DescribeTo(&ss);
@@ -250,6 +275,19 @@ struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
       control_dep_set->DescribeTo(os);
     }
 
+    if (!attrs.empty()) {
+      printed_something = true;
+      std::vector<string> attrs_str;
+      absl::c_transform(attrs, std::back_inserter(attrs_str),
+                        [](const std::pair<string, AttrValue>& attr_kv_pair) {
+                          return absl::StrCat(
+                              attr_kv_pair.first, "->",
+                              SummarizeAttrValue(attr_kv_pair.second));
+                        });
+      *os << " and attr values matching [" << absl::StrJoin(attrs_str, ", ")
+          << "]";
+    }
+
     if (!printed_something) {
       *os << "is any node";
     }
@@ -266,7 +304,7 @@ struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
     }
 
     ::testing::StringMatchResultListener inner_listener;
-    Input input = {edge->src(), edge->src_output()};
+    OutEdge input = {edge->src(), edge->src_output()};
     if ((*input_matchers)[input_idx].MatchAndExplain(input, &inner_listener)) {
       return true;
     }
@@ -286,22 +324,24 @@ struct NodeMatcher : public ::testing::MatcherInterface<const Node*> {
   absl::optional<string> name;
   absl::optional<string> assigned_device;
   absl::optional<Tensor> constant_value;
-  absl::optional<std::vector<::testing::Matcher<Input>>> input_matchers;
+  absl::optional<std::vector<::testing::Matcher<OutEdge>>> input_matchers;
   absl::optional<::testing::Matcher<absl::Span<const Node* const>>>
       control_dep_set;
+  std::map<string, AttrValue> attrs;
 };
 
 // Matches a dst and dst_output on an input edge.  Today we only use this with
 // dst_output=0 but we will eventually need to support multi-output operations.
-class InputMatcher : public ::testing::MatcherInterface<Input> {
+class OutEdgeMatcher : public ::testing::MatcherInterface<OutEdge> {
  public:
-  InputMatcher(::testing::Matcher<const Node*> src_matcher, int src_output)
-      : src_matcher_(std::move(src_matcher)), src_output_(src_output) {}
+  OutEdgeMatcher(::testing::Matcher<const Node*> src_matcher, int src_oidx)
+      : src_matcher_(std::move(src_matcher)), src_oidx_(src_oidx) {}
 
   bool MatchAndExplain(
-      Input input, ::testing::MatchResultListener* listener) const override {
+      OutEdge out_edge,
+      ::testing::MatchResultListener* listener) const override {
     ::testing::StringMatchResultListener inner_listener;
-    if (!src_matcher_.MatchAndExplain(input.first, &inner_listener)) {
+    if (!src_matcher_.MatchAndExplain(out_edge.first, &inner_listener)) {
       if (listener->IsInterested()) {
         *listener << "\nsource does not match expected ";
         src_matcher_.DescribeTo(listener->stream());
@@ -312,10 +352,10 @@ class InputMatcher : public ::testing::MatcherInterface<Input> {
       }
       return false;
     }
-    if (input.second != src_output_) {
+    if (out_edge.second != src_oidx_) {
       if (listener->IsInterested()) {
-        *listener << "\nexpected output slot to be " << src_output_
-                  << " but found " << input.second;
+        *listener << "\nexpected output slot to be " << src_oidx_
+                  << " but found " << out_edge.second;
       }
       return false;
     }
@@ -324,31 +364,21 @@ class InputMatcher : public ::testing::MatcherInterface<Input> {
   }
 
   void DescribeTo(::std::ostream* os) const override {
-    if (src_output_) {
-      *os << "output slot: " << src_output_ << ", source: (";
+    if (src_oidx_) {
+      *os << "output slot: " << src_oidx_ << ", source: (";
     }
 
     src_matcher_.DescribeTo(os);
 
-    if (src_output_) {
+    if (src_oidx_) {
       *os << ")";
     }
   }
 
  private:
   ::testing::Matcher<const Node*> src_matcher_;
-  int src_output_;
+  int src_oidx_;
 };
-
-std::vector<::testing::Matcher<Input>> NodeMatchersToInputMatchers(
-    absl::Span<const ::testing::Matcher<const Node*>> node_matchers) {
-  std::vector<::testing::Matcher<Input>> result;
-  absl::c_transform(node_matchers, std::back_inserter(result),
-                    [](::testing::Matcher<const Node*> n) {
-                      return ::testing::MakeMatcher(new InputMatcher(n, 0));
-                    });
-  return result;
-}
 }  // namespace
 
 ::testing::Matcher<const Node*> impl::NodeWith(
@@ -375,10 +405,9 @@ std::vector<::testing::Matcher<Input>> NodeMatchersToInputMatchers(
       matcher->assigned_device = prop.assigned_device();
     }
 
-    if (prop.input_nodes()) {
+    if (prop.inputs()) {
       DCHECK(!matcher->input_matchers);
-      matcher->input_matchers =
-          NodeMatchersToInputMatchers(*prop.input_nodes());
+      matcher->input_matchers = *prop.inputs();
     }
 
     if (prop.control_deps()) {
@@ -386,6 +415,11 @@ std::vector<::testing::Matcher<Input>> NodeMatchersToInputMatchers(
       matcher->control_dep_set =
           ::testing::UnorderedElementsAreArray(*prop.control_deps());
     }
+
+    if (prop.attr()) {
+      auto insert_result = matcher->attrs.insert(*prop.attr());
+      DCHECK(insert_result.second);
+    }
   }
 
   return ::testing::MakeMatcher(matcher);
@@ -412,12 +446,12 @@ impl::NodeMatcherProperties AssignedDevice(string assigned_device) {
 }
 
 impl::NodeMatcherProperties impl::Inputs(
-    absl::Span<const ::testing::Matcher<const Node*>> inputs) {
-  std::vector<::testing::Matcher<const Node*>> inputs_vector;
+    absl::Span<const ::testing::Matcher<OutEdge>> inputs) {
+  std::vector<::testing::Matcher<OutEdge>> inputs_vector;
   absl::c_copy(inputs, std::back_inserter(inputs_vector));
 
   impl::NodeMatcherProperties props;
-  props.set_input_nodes(std::move(inputs_vector));
+  props.set_inputs(std::move(inputs_vector));
   return props;
 }
 
@@ -431,6 +465,19 @@ impl::NodeMatcherProperties impl::CtrlDeps(
   return props;
 }
 
+std::pair<string, AttrValue> impl::AttrLiteralHelper(
+    const std::pair<string, bool>& bool_attr) {
+  AttrValue attr_value;
+  attr_value.set_b(bool_attr.second);
+  return {bool_attr.first, attr_value};
+}
+
+impl::NodeMatcherProperties impl::Attr(std::pair<string, AttrValue> attr) {
+  impl::NodeMatcherProperties props;
+  props.set_attr(std::move(attr));
+  return props;
+}
+
 NodeMatcherProperties ConstantValue(
     const ::tensorflow::Input::Initializer& val) {
   TF_CHECK_OK(val.status);
@@ -443,6 +490,10 @@ NodeMatcherProperties ConstantValue(
     const ::tensorflow::Input::Initializer& val) {
   return NodeWith(ConstantValue(val));
 }
+::testing::Matcher<impl::OutEdge> Out(
+    int oidx, ::testing::Matcher<const Node*> node_matcher) {
+  return ::testing::MakeMatcher(new OutEdgeMatcher(node_matcher, oidx));
+}
 }  // namespace matchers
 
 Node* FindNodeByName(Graph* g, absl::string_view name) {
@@ -455,4 +506,7 @@ Node* FindNodeByName(Graph* g, absl::string_view name) {
   return nullptr;
 }
 }  // namespace testing
+
+void PrintTo(const Node* n, ::std::ostream* os) { *os << SummarizeNode(*n); }
+void PrintTo(Node* n, ::std::ostream* os) { *os << SummarizeNode(*n); }
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h
index 0437a7e95c..35c2f5fd7b 100644
--- a/tensorflow/compiler/jit/node_matchers.h
+++ b/tensorflow/compiler/jit/node_matchers.h
@@ -19,7 +19,7 @@ limitations under the License.
 //
 //  tensorflow::Node* node = ...;
 //  EXPECT_THAT(node, NodeWith(Name("name"), Op("op"),
-//                             Inputs(NodeWith(Name("input")))))
+//                             Inputs(Out(3, NodeWith(Name("input"))))))
 //
 // Matchable node properties (the expressions that go inside NodeWith(...))
 // are:
@@ -32,7 +32,8 @@ limitations under the License.
 //  - AssignedDevice(string): matches the assigned device exactly.
 //
 //  - Inputs(<ordered list>): matches the list of non-control inputs to the node
-//    exactly (i.e. does not match a suffix or a prefix).
+//    exactly (i.e. does not match a suffix or a prefix) where each element
+//    matches an output of a node (see Out(idx, node) below).
 //
 //  - CtrlDeps(<unordered list>): matches the list of control dependences on the
 //    node exactly but in any order.
@@ -40,10 +41,16 @@ limitations under the License.
 //  - ConstantValue(tensorflow::Input::Initializer init): matches a Const node
 //    with the constant value `init`.  Implies Op("Const").
 //
-// Node properties may not be repeated in a single NodeWith(...)  matcher.
-// E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail.  Since ConstantValue
-// implies Op("Const"), a single NodeWith matcher can't have both
-// ConstantValue(...) and Op(...).
+//  - Attr(name, value): Matches a single attribute with name `name` and value
+//    `value`.  Right now only boolean values are supported.
+//
+// Overlapping node properties may not be repeated in a single NodeWith(...)
+// matcher.  E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail.  Since
+// ConstantValue implies Op("Const"), a single NodeWith matcher can't have both
+// ConstantValue(...) and Op(...).  Multiple Attr() values can be combined as
+// long as the attribute names are different.
+//
+// Out(idx, node) matches the `idx`'th output of a node that matches `node`.
 
 #ifndef TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
 #define TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
@@ -66,6 +73,8 @@ namespace matchers {
 
 namespace impl {
 
+using OutEdge = std::pair<const Node*, int>;
+
 // -----------------------------------------------------------------------------
 // Implementation details.
 
@@ -74,6 +83,8 @@ namespace impl {
 class NodeMatcherProperties {
  public:
   using NodeSeqMatcher = std::vector<::testing::Matcher<const Node*>>;
+  using InputSeqMatcher = std::vector<::testing::Matcher<OutEdge>>;
+  using AttrKeyValuePair = std::pair<string, AttrValue>;
 
   const absl::optional<string>& name() const { return name_; }
   const absl::optional<string>& op() const { return op_; }
@@ -83,12 +94,13 @@ class NodeMatcherProperties {
   const absl::optional<Tensor>& constant_value() const {
     return constant_value_;
   }
-  const absl::optional<NodeSeqMatcher>& input_nodes() const {
-    return input_nodes_;
+  const absl::optional<InputSeqMatcher>& inputs() const {
+    return input_matchers_;
   }
   const absl::optional<NodeSeqMatcher>& control_deps() const {
     return control_deps_;
   }
+  const absl::optional<AttrKeyValuePair>& attr() const { return attr_; }
 
   void set_name(string name) {
     DCHECK(IsEmpty());
@@ -111,9 +123,9 @@ class NodeMatcherProperties {
     op_ = "Const";
   }
 
-  void set_input_nodes(NodeSeqMatcher input_nodes) {
+  void set_inputs(InputSeqMatcher inputs) {
     DCHECK(IsEmpty());
-    input_nodes_ = std::move(input_nodes);
+    input_matchers_ = std::move(inputs);
   }
 
   void set_control_deps(NodeSeqMatcher control_deps) {
@@ -121,9 +133,14 @@ class NodeMatcherProperties {
     control_deps_ = std::move(control_deps);
   }
 
+  void set_attr(AttrKeyValuePair attr) {
+    DCHECK(IsEmpty());
+    attr_ = std::move(attr);
+  }
+
   bool IsEmpty() const {
-    return !name().has_value() && !op().has_value() &&
-           !input_nodes().has_value() && !control_deps().has_value();
+    return !name().has_value() && !op().has_value() && !inputs().has_value() &&
+           !control_deps().has_value() && !attr().has_value();
   }
 
  private:
@@ -131,18 +148,24 @@ class NodeMatcherProperties {
   absl::optional<string> op_;
   absl::optional<string> assigned_device_;
   absl::optional<Tensor> constant_value_;
-  absl::optional<NodeSeqMatcher> input_nodes_;
+  absl::optional<InputSeqMatcher> input_matchers_;
   absl::optional<NodeSeqMatcher> control_deps_;
+  absl::optional<AttrKeyValuePair> attr_;
 };
 
 ::testing::Matcher<const Node*> NodeWith(
     absl::Span<const NodeMatcherProperties> props);
 
 impl::NodeMatcherProperties Inputs(
-    absl::Span<const ::testing::Matcher<const Node*>> inputs);
+    absl::Span<const ::testing::Matcher<OutEdge>> inputs);
 
 impl::NodeMatcherProperties CtrlDeps(
     absl::Span<const ::testing::Matcher<const Node*>> control_deps);
+
+impl::NodeMatcherProperties Attr(std::pair<string, AttrValue> attrs);
+
+std::pair<string, AttrValue> AttrLiteralHelper(
+    const std::pair<string, bool>& bool_attr);
 }  // namespace impl
 
 // -----------------------------------------------------------------------------
@@ -157,6 +180,13 @@ impl::NodeMatcherProperties Op(string op);
 // Matches a node with assigned device `assigned_device`.
 impl::NodeMatcherProperties AssignedDevice(string assigned_device);
 
+// Matches a node with a boolean typed attrbute named `name` and with value
+// `value`.
+template <typename ValueTy>
+impl::NodeMatcherProperties Attr(const string& name, ValueTy value) {
+  return impl::Attr({impl::AttrLiteralHelper({name, value})});
+}
+
 // Matches a node with inputs `inputs`.
 //
 // `inputs` are ordered; `inputs`[i] must match input i.
@@ -165,6 +195,15 @@ impl::NodeMatcherProperties Inputs(Ts... inputs) {
   return impl::Inputs({inputs...});
 }
 
+// Matches the `idx`'th output of a node that matches `node`.
+::testing::Matcher<impl::OutEdge> Out(int oidx,
+                                      ::testing::Matcher<const Node*> node);
+
+// Matches the first output of a node that matches `node`.
+::testing::Matcher<impl::OutEdge> Out(::testing::Matcher<const Node*> node) {
+  return Out(0, node);
+}
+
 // Matches a node with control dependences `control_deps`.
 //
 // `control_deps` are unordered and will match the control deps of a node in any
@@ -192,6 +231,9 @@ template <typename... Ts>
 // If `g` has a node named `name` returns it, otherwise returns null.
 Node* FindNodeByName(Graph* g, absl::string_view name);
 }  // namespace testing
+
+void PrintTo(const Node* n, ::std::ostream* os);
+void PrintTo(Node* n, ::std::ostream* os);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_
diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc
index 93a8994307..c3f0dfece8 100644
--- a/tensorflow/compiler/jit/node_matchers_test.cc
+++ b/tensorflow/compiler/jit/node_matchers_test.cc
@@ -18,6 +18,8 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/ops/array_ops.h"
 #include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/control_flow_ops.h"
+#include "tensorflow/cc/ops/control_flow_ops_internal.h"
 #include "tensorflow/cc/ops/math_ops.h"
 
 namespace tensorflow {
@@ -27,12 +29,14 @@ namespace {
 using ::testing::_;
 
 using testing::matchers::AssignedDevice;
+using testing::matchers::Attr;
 using testing::matchers::ConstantValue;
 using testing::matchers::CtrlDeps;
 using testing::matchers::Inputs;
 using testing::matchers::Name;
 using testing::matchers::NodeWith;
 using testing::matchers::Op;
+using testing::matchers::Out;
 
 template <typename M, typename T>
 string Explain(const T& t, const M& m) {
@@ -61,7 +65,7 @@ TEST(NodeMatchers, CheckAgainstConstant) {
             "\nexpected op Add but found Placeholder");
   EXPECT_EQ(Explain(placeholder.node(), NodeWith(Name("add"))),
             "\nexpected name add but found placeholder");
-  EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(NodeWith()))),
+  EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(Out(NodeWith())))),
             "\nexpected 1 inputs but node has 0");
 }
 
@@ -74,18 +78,19 @@ TEST(NodeMatchers, CheckAgainstBinary) {
       ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT);
   Output add = ops::Add(root.WithOpName("add"), placeholder_a, placeholder_b);
 
-  EXPECT_THAT(add.node(), NodeWith(Op("Add"), Name("add"),
-                                   Inputs(NodeWith(Name("placeholder_a")),
-                                          NodeWith(Name("placeholder_b")))));
+  EXPECT_THAT(add.node(),
+              NodeWith(Op("Add"), Name("add"),
+                       Inputs(Out(NodeWith(Name("placeholder_a"))),
+                              Out(NodeWith(Name("placeholder_b"))))));
 
   EXPECT_EQ(Explain(add.node(), NodeWith(Inputs())),
             "\nexpected 0 inputs but node has 2");
   EXPECT_EQ(
-      Explain(add.node(), NodeWith(Inputs(NodeWith(Name("blah")), _))),
+      Explain(add.node(), NodeWith(Inputs(Out(NodeWith(Name("blah"))), _))),
       "\ninput 0 does not match expected:\nname: blah, \nsource does not match "
       "expected name: blah\n\t\nexpected name blah but found placeholder_a");
   EXPECT_EQ(
-      Explain(add.node(), NodeWith(Inputs(_, NodeWith(Name("blah"))))),
+      Explain(add.node(), NodeWith(Inputs(_, Out(NodeWith(Name("blah")))))),
       "\ninput 1 does not match expected:\nname: blah, \nsource does not match "
       "expected name: blah\n\t\nexpected name blah but found placeholder_b");
 }
@@ -174,6 +179,36 @@ TEST(NodeMatchers, AssignedDevice) {
             "/job:localhost/replica:0/task:0/device:CPU:0 but found \"\"");
 }
 
+TEST(NodeMatchers, OutputIndices) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Output pred = ops::Placeholder(root.WithOpName("pred"), DT_BOOL);
+
+  Output data = ops::Placeholder(root.WithOpName("data"), DT_FLOAT);
+  ops::Switch sw(root.WithOpName("switch"), data, pred);
+  Output add = ops::Add(root.WithOpName("add"), sw.output_true,
+                        ops::Placeholder(root.WithOpName("addend"), DT_FLOAT));
+
+  EXPECT_THAT(add.node(), NodeWith(Inputs(Out(1, NodeWith(Op("Switch"))), _)));
+  EXPECT_EQ(
+      Explain(add.node(), NodeWith(Inputs(Out(0, NodeWith(Op("Switch"))), _))),
+      "\ninput 0 does not match expected:\nop: Switch, \nexpected output slot "
+      "to be 0 but found 1");
+}
+
+TEST(NodeMatchers, Attrs) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  Output enter = ops::internal::Enter(
+      root.WithOpName("enter"),
+      ops::Placeholder(root.WithOpName("data"), DT_FLOAT), "frame_name",
+      ops::internal::Enter::Attrs{}.IsConstant(true));
+  EXPECT_THAT(enter.node(), NodeWith(Attr("is_constant", true)));
+  EXPECT_EQ(Explain(enter.node(), NodeWith(Attr("is_constant", false))),
+            "attribute named is_constant does not match value; expected: "
+            "\"false\", found: \"true\"");
+  EXPECT_EQ(Explain(enter.node(), NodeWith(Attr("missing_attr", false))),
+            "did not find attribute named \"missing_attr\" in node");
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index bcd1a29b1f..95d12e95fd 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -54,6 +54,7 @@ REGISTER_OP("XlaClusterOutput")
 REGISTER_OP("_XlaCompile")
     .Input("constants: Tconstants")
     .Attr("Tconstants: list(type) >= 0")
+    .Attr("must_compile: bool")
     .Input("args: Targs")
     .Attr("Targs: list(type) >= 0")
     .Input("resources: Nresources * resource")
@@ -71,8 +72,12 @@ that _XlaRun can use to look up the LocalExecutable and execute it.
 key: A key that can be used to look up the local executable compiled by the
    node and associated metadata.
 
-compilation_successful: True iff the compilation was successful.  Always true
-for now.
+compilation_successful: If the `must_compile` attr is false the _XlaCompile op
+   can decide not to compile the clusters based on some profitability
+   heuristics.  In that case `compilation_successful` is false if _XlaCompile
+   chose not to compile the cluster.  If the `must_compile` attr is true then
+   _XlaCompile always attempts to compile the cluster and
+   `compilation_successful` is always true.
 )");
 
 REGISTER_OP("_XlaRun")
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index 0471995015..826e98b966 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -40,6 +40,7 @@ namespace tensorflow {
 XlaCompilationCache::XlaCompilationCache(xla::LocalClient* client,
                                          DeviceType device_type)
     : client_(client), device_type_(std::move(device_type)) {}
+
 XlaCompilationCache::~XlaCompilationCache() {
   // Ensure any use of our programs have completed by waiting for all stream
   // executors to complete.
@@ -229,10 +230,15 @@ Status XlaCompilationCache::Compile(
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
     const XlaCompiler::CompileOptions& compile_options,
+    CompileMode compile_mode,
     const XlaCompiler::CompilationResult** out_compilation_result,
     xla::LocalExecutable** out_executable) {
+  // Set the compile threshold to 1 to implement CompileMode::kStrict.
+  int64 compile_threshold =
+      compile_mode == CompileMode::kLazy ? kDefaultCompilationThreshold : 1;
   return CompileImpl(options, function, constant_args, variable_args, ctx,
                      compile_options, /*compile_single_op=*/false,
+                     /*compile_threshold=*/compile_threshold,
                      out_compilation_result, out_executable);
 }
 
@@ -247,9 +253,10 @@ Status XlaCompilationCache::CompileSingleOp(
   NameAttrList name;
   name.set_name(def.op());
   *name.mutable_attr() = def.attr();
-  return CompileImpl(
-      options, name, constant_args, variable_args, ctx, compile_options,
-      /*compile_single_op=*/true, out_compilation_result, out_executable);
+  return CompileImpl(options, name, constant_args, variable_args, ctx,
+                     compile_options,
+                     /*compile_single_op=*/true, /*compile_threshold=*/1,
+                     out_compilation_result, out_executable);
 }
 
 Status XlaCompilationCache::CompileImpl(
@@ -257,6 +264,7 @@ Status XlaCompilationCache::CompileImpl(
     const std::map<int, Tensor>& constant_args,
     const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
     const XlaCompiler::CompileOptions& compile_options, bool compile_single_op,
+    int64 compile_threshold,
     const XlaCompiler::CompilationResult** out_compilation_result,
     xla::LocalExecutable** out_executable) {
   DCHECK_NE(out_executable, nullptr);
@@ -310,9 +318,18 @@ Status XlaCompilationCache::CompileImpl(
   // TODO(phawkins): this locking will need to be restructured when we implement
   // cache eviction.
   mutex_lock entry_lock(entry->mu);
+  int64 current_request_count = ++entry->request_count;
   if (!entry->compiled) {
     VLOG(2) << "Compilation cache miss for signature: "
-            << SignatureDebugString(signature);
+            << SignatureDebugString(signature) << " with request count "
+            << current_request_count << " and compile threshold "
+            << compile_threshold;
+    if (current_request_count < compile_threshold) {
+      *out_compilation_result = nullptr;
+      *out_executable = nullptr;
+      return Status::OK();
+    }
+
     tensorflow::Env* env = tensorflow::Env::Default();
     const uint64 compile_start_us = env->NowMicros();
     // Do the actual JIT compilation without holding the lock (it can take
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h
index 75c7758f73..f06a991818 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.h
+++ b/tensorflow/compiler/jit/xla_compilation_cache.h
@@ -50,6 +50,11 @@ class XlaCompilationCache : public ResourceBase {
   XlaCompilationCache(xla::LocalClient* client, DeviceType device_type);
   ~XlaCompilationCache() override;
 
+  enum class CompileMode {
+    kLazy,
+    kStrict,
+  };
+
   // Compiles a function into a XlaCompiler::CompilationResult that can be used
   // to execute an XLA Computation. Compilation results are cached.
   // `function` is the name of a Tensorflow function to compile.
@@ -58,6 +63,14 @@ class XlaCompilationCache : public ResourceBase {
   // `variable_args` is a snapshot of the current values of the
   // resource variable arguments to `function`; uninitialized variables are
   // represented by an absent OptionalTensor.
+  //
+  // `compile_mode` controls the behavior of the compilation cache on a cache
+  // miss.  If `compile_mode` is `kLazy` then, based on some profitability
+  // heuristics, the compilation cache may decide not to compile the cluster at
+  // this time.  In this case it returns null into both `out_compilation_result`
+  // and `out_executable`.  If `compile_mode` is `kStrict` then the compilation
+  // cache always attempts the compilation on a cache miss.
+  //
   // The result of compilation is written to `*compilation_result`, which must
   // be non-null. If `executable` is non-null, also builds an
   // xla::LocalExecutable and sets `executable` to point to it. The resulting
@@ -69,6 +82,7 @@ class XlaCompilationCache : public ResourceBase {
                  const std::map<int, OptionalTensor>& variable_args,
                  OpKernelContext* ctx,
                  const XlaCompiler::CompileOptions& compile_options,
+                 CompileMode compile_mode,
                  const XlaCompiler::CompilationResult** out_compilation_result,
                  xla::LocalExecutable** out_executable);
 
@@ -94,7 +108,7 @@ class XlaCompilationCache : public ResourceBase {
       const std::map<int, Tensor>& constant_args,
       const std::map<int, OptionalTensor>& variable_args, OpKernelContext* ctx,
       const XlaCompiler::CompileOptions& compile_options,
-      bool compile_single_op,
+      bool compile_single_op, int64 compile_threshold,
       const XlaCompiler::CompilationResult** out_compilation_result,
       xla::LocalExecutable** out_executable);
 
@@ -139,6 +153,9 @@ class XlaCompilationCache : public ResourceBase {
     // Have we tried compiling this entry?
     bool compiled = false;
 
+    // The number of times a compilation with this signature has been requested.
+    int64 request_count = 0;
+
     // Did compilation succeed?
     Status compilation_status GUARDED_BY(mu);
 
@@ -167,6 +184,10 @@ class XlaCompilationCache : public ResourceBase {
   absl::flat_hash_map<string, CompileStats> compile_stats_
       GUARDED_BY(compile_stats_mu_);
 
+  // The number of times a lazy compilation must be requested for a specific
+  // signature before  we attempt to compile it.
+  static constexpr int64 kDefaultCompilationThreshold = 2;
+
   TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache);
 };
 
diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 6967ad1f03..14a232b7a8 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -65,11 +65,13 @@ class XlaAssignVariableOp : public AsyncOpKernel {
                               .HostMemory("resources"),   \
                           KERNEL);
 
-#define REGISTER_XLA_COMPILE_KERNEL(DEVICE, KERNEL, TYPES) \
-  REGISTER_KERNEL_BUILDER(Name("_XlaCompile")              \
-                              .Device(DEVICE)              \
-                              .HostMemory("constants")     \
-                              .HostMemory("resources"),    \
+#define REGISTER_XLA_COMPILE_KERNEL(DEVICE, KERNEL, TYPES)          \
+  REGISTER_KERNEL_BUILDER(Name("_XlaCompile")                       \
+                              .Device(DEVICE)                       \
+                              .HostMemory("constants")              \
+                              .HostMemory("key")                    \
+                              .HostMemory("compilation_successful") \
+                              .HostMemory("resources"),             \
                           KERNEL);
 
 #define REGISTER_XLA_RUN_KERNEL(DEVICE, KERNEL, TYPES) \
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index ba2401ed26..a8a9f39e10 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1089,6 +1089,7 @@ cuda_py_test(
     size = "medium",
     srcs = ["jit_test.py"],
     additional_deps = [
+        ":test_utils",
         "//tensorflow/contrib/compiler:compiler_py",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -1107,6 +1108,7 @@ cuda_py_test(
     size = "small",
     srcs = ["dense_layer_test.py"],
     additional_deps = [
+        ":test_utils",
         "//tensorflow/contrib/compiler:compiler_py",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py
index 9390870e07..618996e9d9 100644
--- a/tensorflow/compiler/tests/dense_layer_test.py
+++ b/tensorflow/compiler/tests/dense_layer_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import os
 import numpy as np
 
+from tensorflow.compiler.tests import test_utils
 from tensorflow.contrib.compiler import jit
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.layers import layers
@@ -30,7 +31,6 @@ from tensorflow.python.platform import test
 
 jit_scope = jit.experimental_jit_scope
 
-
 def GetRunMetadataLabels(run_metadata):
   """Returns all labels in run_metadata."""
   labels = []
@@ -74,7 +74,8 @@ class DenseLayerTest(test.TestCase):
 
       sess.run(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
-      sess.run(
+      test_utils.RunWithWarmup(
+          sess,
           y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
           run_metadata=run_metadata,
           options=config_pb2.RunOptions(
@@ -98,7 +99,8 @@ class DenseLayerTest(test.TestCase):
 
       sess.run(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
-      sess.run(
+      test_utils.RunWithWarmup(
+          sess,
           y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
           run_metadata=run_metadata,
           options=config_pb2.RunOptions(
@@ -126,7 +128,8 @@ class DenseLayerTest(test.TestCase):
 
       sess.run(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
-      sess.run(
+      test_utils.RunWithWarmup(
+          sess,
           y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])},
           run_metadata=run_metadata,
           options=config_pb2.RunOptions(
@@ -138,4 +141,6 @@ class DenseLayerTest(test.TestCase):
 
 
 if __name__ == "__main__":
+  os.environ["TF_XLA_FLAGS"] = ("--tf_xla_enable_lazy_compilation=true " +
+                                os.environ.get("TF_XLA_FLAGS", ""))
   test.main()
diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py
index de68ff0e32..e31c25b259 100644
--- a/tensorflow/compiler/tests/jit_test.py
+++ b/tensorflow/compiler/tests/jit_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import os
 import numpy as np
 
+from tensorflow.compiler.tests import test_utils
 from tensorflow.contrib.compiler import jit
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
@@ -36,8 +37,8 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import test
 
-jit_scope = jit.experimental_jit_scope
 
+jit_scope = jit.experimental_jit_scope
 
 # Disable rewrites to make sure we don't end up having to update this test
 # whenever we implement new ones.
@@ -77,11 +78,11 @@ def InLabels(labels, substr):
   return any([substr in x for x in labels])
 
 
-def MetadataHasXlaOp(run_metadata):
+def MetadataHasXlaRunOp(run_metadata):
   """Returns true if there are XlaRun kernels in run_metadata's timeline."""
 
   # TODO(phawkins): find a less hacky way to test whether a kernel ran.
-  return InLabels(RunMetadataLabels(run_metadata), "XlaRun")
+  return InLabels(RunMetadataLabels(run_metadata), "_XlaRun")
 
 
 class JitLaunchTest(test.TestCase):
@@ -108,15 +109,14 @@ class JitLaunchTest(test.TestCase):
       direct_op = fn(*placeholders)
 
       run_metadata = config_pb2.RunMetadata()
-      compiled = sess.run(compiled_op,
-                          feeds,
-                          run_metadata=run_metadata,
-                          options=config_pb2.RunOptions(
-                              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      compiled = test_utils.RunWithWarmup(
+          sess, compiled_op, feeds,
+          config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE),
+          run_metadata)
       print("Compiled Result {}".format(compiled))
 
       if require_kernel_launch:
-        self.assert_(MetadataHasXlaOp(run_metadata))
+        self.assert_(MetadataHasXlaRunOp(run_metadata))
 
         direct = sess.run(direct_op, feeds)
         print("Direct Result {}".format(direct))
@@ -137,7 +137,7 @@ class JitLaunchTest(test.TestCase):
         a = constant_op.constant(100)  # pylint: disable=unused-variable
 
       call = KernelWithNoOutputs()  # pylint: disable=assignment-from-no-return
-      sess.run(call, {})
+      test_utils.RunWithWarmup(sess, call, {})
 
   def testAliasing(self):
     """Regression test for compiled functions that return an aliased buffer.
@@ -250,17 +250,21 @@ class JitLaunchTest(test.TestCase):
       dx = np.random.random_sample((batch_size, image_size)).astype(np.float32)
       with session_lib.Session() as sess:
         run_metadata = config_pb2.RunMetadata()
-        output = sess.run(y, {x: dx,
-                              w: dw,
-                              b: db},
-                          run_metadata=run_metadata,
-                          options=config_pb2.RunOptions(
-                              trace_level=config_pb2.RunOptions.FULL_TRACE))
+        output = test_utils.RunWithWarmup(
+            sess,
+            y, {
+                x: dx,
+                w: dw,
+                b: db
+            },
+            run_metadata=run_metadata,
+            options=config_pb2.RunOptions(
+                trace_level=config_pb2.RunOptions.FULL_TRACE))
 
         # TODO(phawkins): really we would like to test that there were exactly
         # two kernel launches. However, we have no reliable way to determine
         # that.
-        self.assert_(MetadataHasXlaOp(run_metadata))
+        self.assert_(MetadataHasXlaRunOp(run_metadata))
 
         expected = np.square(np.dot(dx, dw) + db)
         self.assertAllClose(expected, output, rtol=1e-1)
@@ -284,13 +288,16 @@ class XlaCompilationTest(test.TestCase):
         # statically known as part of the JIT compilation's input graph.
         z = array_ops.reshape(x, y)
       run_metadata = config_pb2.RunMetadata()
-      out = sess.run(z,
-                     {x: np.array([1, 2, 3, 4, 5, 6], np.float32),
-                      y: [-1, 3]},
-                     run_metadata=run_metadata,
-                     options=config_pb2.RunOptions(
-                         trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaOp(run_metadata))
+      out = test_utils.RunWithWarmup(
+          sess,
+          z, {
+              x: np.array([1, 2, 3, 4, 5, 6], np.float32),
+              y: [-1, 3]
+          },
+          run_metadata=run_metadata,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assert_(MetadataHasXlaRunOp(run_metadata))
       self.assertAllClose(np.array([[1, 2, 3], [4, 5, 6]], np.float32), out)
 
   def testIgnoredArguments(self):
@@ -309,12 +316,16 @@ class XlaCompilationTest(test.TestCase):
           t = math_ops.add(z, z)
 
       run_metadata = config_pb2.RunMetadata()
-      out = sess.run(t, {x: np.int32(7),
-                         y: np.int32(404)},
-                     run_metadata=run_metadata,
-                     options=config_pb2.RunOptions(
-                         trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaOp(run_metadata))
+      out = test_utils.RunWithWarmup(
+          sess,
+          t, {
+              x: np.int32(7),
+              y: np.int32(404)
+          },
+          run_metadata=run_metadata,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assert_(MetadataHasXlaRunOp(run_metadata))
       self.assertAllClose(28, out)
 
   def testLoops(self):
@@ -332,7 +343,7 @@ class XlaCompilationTest(test.TestCase):
                            run_metadata=run_metadata,
                            options=config_pb2.RunOptions(
                                trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaOp(run_metadata))
+      self.assert_(MetadataHasXlaRunOp(run_metadata))
       self.assertAllClose(result, np.float32(95), rtol=1e-1)
 
   def testCond(self):
@@ -351,13 +362,17 @@ class XlaCompilationTest(test.TestCase):
       # deadlock.
 
       run_metadata = config_pb2.RunMetadata()
-      result = session.run(t, {x: np.float32(2),
-                               y: np.float32(4),
-                               c: True},
-                           run_metadata=run_metadata,
-                           options=config_pb2.RunOptions(
-                               trace_level=config_pb2.RunOptions.FULL_TRACE))
-      self.assert_(MetadataHasXlaOp(run_metadata))
+      result = test_utils.RunWithWarmup(
+          session,
+          t, {
+              x: np.float32(2),
+              y: np.float32(4),
+              c: True
+          },
+          run_metadata=run_metadata,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assert_(MetadataHasXlaRunOp(run_metadata))
       self.assertAllClose(result, np.float32(6), rtol=1e-1)
 
   def testNestedFunction(self):
@@ -425,11 +440,13 @@ class XlaCompilationTest(test.TestCase):
       cfg.graph_options.optimizer_options.do_function_inlining = True
       with session_lib.Session(graph=g, config=cfg) as sess:
         run_metadata = config_pb2.RunMetadata()
-        dx_val = sess.run(dx,
-                          feed_dict={x: 100.},
-                          run_metadata=run_metadata,
-                          options=config_pb2.RunOptions(
-                              trace_level=config_pb2.RunOptions.FULL_TRACE))
+        dx_val = test_utils.RunWithWarmup(
+            sess,
+            dx,
+            feed_dict={x: 100.},
+            run_metadata=run_metadata,
+            options=config_pb2.RunOptions(
+                trace_level=config_pb2.RunOptions.FULL_TRACE))
       self.assertAllClose(dx_val, 0.01)
       return RunMetadataLabels(run_metadata)
 
@@ -475,7 +492,8 @@ class ElementWiseFusionTest(test.TestCase):
       a7 = a6 + a2
 
       run_metadata = config_pb2.RunMetadata()
-      output = sess.run(
+      output = test_utils.RunWithWarmup(
+          sess,
           a7, {
               a1: arg0,
               a2: arg1
@@ -509,5 +527,60 @@ class ElementWiseFusionTest(test.TestCase):
     self.assertAllClose(tf_op, tfef_op, rtol=1e-1)
 
 
+class LazyCompilationTest(test.TestCase):
+
+  def testLazyCompilation(self):
+
+    @function.Defun(compiled=True)
+    def CompiledFunction(x):
+      return math_ops.log(x)
+
+    with session_lib.Session(config=NoRewriteSessionConfig()) as sess:
+      x = array_ops.placeholder(dtypes.float32)
+      y = CompiledFunction(x)
+
+      run_metadata_before_warmup = config_pb2.RunMetadata()
+      sess.run(
+          y,
+          feed_dict={x: [2., 10.]},
+          run_metadata=run_metadata_before_warmup,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assertTrue(
+          InLabels(
+              RunMetadataLabels(run_metadata_before_warmup), "_XlaCompile"))
+      self.assertFalse(
+          InLabels(RunMetadataLabels(run_metadata_before_warmup), "_XlaRun"))
+
+      # We compile when we see the same shape a second time.
+
+      run_metadata_after_warmup = config_pb2.RunMetadata()
+      sess.run(
+          y,
+          feed_dict={x: [2., 10.]},
+          run_metadata=run_metadata_after_warmup,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assertTrue(
+          InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaCompile"))
+      self.assertTrue(
+          InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaRun"))
+
+      run_metadata_for_new_shape = config_pb2.RunMetadata()
+      sess.run(
+          y,
+          feed_dict={x: [2., 10., 12.]},
+          run_metadata=run_metadata_for_new_shape,
+          options=config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE))
+      self.assertTrue(
+          InLabels(
+              RunMetadataLabels(run_metadata_for_new_shape), "_XlaCompile"))
+      self.assertFalse(
+          InLabels(RunMetadataLabels(run_metadata_for_new_shape), "_XlaRun"))
+
+
 if __name__ == "__main__":
+  os.environ["TF_XLA_FLAGS"] = ("--tf_xla_enable_lazy_compilation=true " +
+                                os.environ.get("TF_XLA_FLAGS", ""))
   test.main()
diff --git a/tensorflow/compiler/tests/test_utils.py b/tensorflow/compiler/tests/test_utils.py
index 6abde18ea9..0e77dbf1a7 100644
--- a/tensorflow/compiler/tests/test_utils.py
+++ b/tensorflow/compiler/tests/test_utils.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
 
 
 def ConvertBetweenDataFormats(x, data_format_src, data_format_dst):
@@ -61,3 +62,14 @@ def PermuteDimsBetweenDataFormats(dims, data_format_src, data_format_dst):
   dim_map = {d: i for i, d in enumerate(data_format_src)}
   permuted_dims = [dims[dim_map[d]] for d in data_format_dst]
   return permuted_dims
+
+
+_JIT_WARMUP_ITERATIONS = 10
+
+
+def RunWithWarmup(sess, op_to_run, feed_dict, options=None, run_metadata=None):
+  """Runs a graph a few times to ensure that its clusters are compiled."""
+  for _ in xrange(0, _JIT_WARMUP_ITERATIONS):
+    sess.run(op_to_run, feed_dict, options=options)
+  return sess.run(
+      op_to_run, feed_dict, options=options, run_metadata=run_metadata)
-- 
GitLab


From 1beadaf06c83241693f969cc8dc08fd8f49feff5 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 11 Oct 2018 17:09:03 -0700
Subject: [PATCH 0827/1085] Traverse through all enum names even if they have
 duplicate values.

PiperOrigin-RevId: 216787178
---
 tensorflow/tools/common/traverse.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/tools/common/traverse.py b/tensorflow/tools/common/traverse.py
index 9607f80686..1eb9192dc1 100644
--- a/tensorflow/tools/common/traverse.py
+++ b/tensorflow/tools/common/traverse.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import enum
 import sys
 
 from tensorflow.python.util import tf_inspect
@@ -34,6 +35,12 @@ def _traverse_internal(root, visit, stack, path):
 
   try:
     children = tf_inspect.getmembers(root)
+
+    # Add labels for duplicate values in Enum.
+    if tf_inspect.isclass(root) and issubclass(root, enum.Enum):
+      for enum_member in root.__members__.items():
+        if enum_member not in children:
+          children.append(enum_member)
   except ImportError:
     # On some Python installations, some modules do not support enumerating
     # members (six in particular), leading to import errors.
-- 
GitLab


From cbe4e5195ed72f668837df13b759921d5d8cce8d Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 17:16:20 -0700
Subject: [PATCH 0828/1085] r1.12-rc1 cherry-pick request: Query whether to
 enable XLA support on MacOS with no as a default (#22915)

* [XLA] Query whether to enable XLA support on MacOS with no as a default

* Query on Windows as well
---
 configure.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure.py b/configure.py
index 7e47175b98..b45b191dc1 100644
--- a/configure.py
+++ b/configure.py
@@ -1566,13 +1566,11 @@ def main():
     # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on
     # Windows.
     environ_cp['TF_DOWNLOAD_CLANG'] = '0'
-    environ_cp['TF_ENABLE_XLA'] = '0'
     environ_cp['TF_NEED_MPI'] = '0'
     environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
 
   if is_macos():
     environ_cp['TF_NEED_TENSORRT'] = '0'
-    environ_cp['TF_ENABLE_XLA'] = '0'
 
   # The numpy package on ppc64le uses OpenBLAS which has multi-threading
   # issues that lead to incorrect answers.  Set OMP_NUM_THREADS=1 at
@@ -1583,8 +1581,9 @@ def main():
 
   set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite',
                 'with_ignite_support', True, 'ignite')
+  xla_enabled_by_default = is_linux()
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
-                True, 'xla')
+                xla_enabled_by_default, 'xla')
 
   set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
   if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
-- 
GitLab


From dc8c87a5ace57c76e58f82b0e24c04465056fb83 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 11 Oct 2018 17:18:41 -0700
Subject: [PATCH 0829/1085] Removing deprecated endpoints in V2. Also,
 deprecating "saved_model.loader.load" which is replaced by "saved_model.load"
 and saved_model.main_op, which will be replaced by "saved_model.main_op" in
 V2.

PiperOrigin-RevId: 216788275
---
 tensorflow/python/framework/errors_impl.py    |   2 +-
 tensorflow/python/framework/random_seed.py    |   2 +-
 tensorflow/python/lib/io/tf_record.py         |  15 +-
 tensorflow/python/ops/array_ops.py            |  15 +-
 .../python/ops/candidate_sampling_ops.py      |  12 +-
 tensorflow/python/ops/check_ops.py            |  67 +++-
 tensorflow/python/ops/clip_ops.py             |   2 +-
 tensorflow/python/ops/confusion_matrix.py     |   3 +-
 tensorflow/python/ops/data_flow_ops.py        |  15 +-
 tensorflow/python/ops/init_ops.py             |  35 +-
 tensorflow/python/ops/linalg_ops.py           |  11 +-
 tensorflow/python/ops/manip_ops.py            |   2 +-
 tensorflow/python/ops/math_ops.py             |  42 ++-
 tensorflow/python/ops/numerics.py             |   4 +-
 tensorflow/python/ops/parsing_ops.py          |   2 +-
 tensorflow/python/ops/random_ops.py           |   4 +-
 tensorflow/python/ops/sparse_ops.py           |  45 ++-
 tensorflow/python/ops/special_math_ops.py     |   2 +-
 tensorflow/python/ops/string_ops.py           |   5 +-
 tensorflow/python/saved_model/builder_impl.py |   5 +-
 tensorflow/python/saved_model/loader_impl.py  |  12 +-
 tensorflow/python/saved_model/main_op_impl.py |  11 +-
 .../saved_model/signature_def_utils_impl.py   |  40 +-
 tensorflow/python/saved_model/utils_impl.py   |  13 +-
 .../tools/api/generator/api_init_files.bzl    |   6 -
 tensorflow/python/training/input.py           |   4 +-
 .../api/golden/v2/tensorflow.-op-error.pbtxt  |  29 --
 .../tensorflow.-padding-f-i-f-o-queue.pbtxt   |  66 ----
 .../v2/tensorflow.-priority-queue.pbtxt       |  66 ----
 .../golden/v2/tensorflow.-queue-base.pbtxt    |  65 ----
 .../v2/tensorflow.-random-shuffle-queue.pbtxt |  66 ----
 ...flow.-sparse-conditional-accumulator.pbtxt |  46 ---
 ...tensorflow.glorot_normal_initializer.pbtxt |  19 -
 .../api/golden/v2/tensorflow.manip.pbtxt      |   4 -
 .../tools/api/golden/v2/tensorflow.nn.pbtxt   |   8 -
 .../tensorflow.orthogonal_initializer.pbtxt   |  18 -
 .../tools/api/golden/v2/tensorflow.pbtxt      | 344 ------------------
 ...thon_io.-t-f-record-compression-type.pbtxt |  20 -
 ...orflow.python_io.-t-f-record-options.pbtxt |  17 -
 ...sorflow.python_io.-t-f-record-writer.pbtxt |  21 --
 .../api/golden/v2/tensorflow.python_io.pbtxt  |  19 -
 ...d_model.builder.-saved-model-builder.pbtxt |  21 --
 .../v2/tensorflow.saved_model.builder.pbtxt   |   7 -
 .../v2/tensorflow.saved_model.loader.pbtxt    |  11 -
 .../v2/tensorflow.saved_model.main_op.pbtxt   |  11 -
 .../golden/v2/tensorflow.saved_model.pbtxt    |  24 +-
 ...flow.saved_model.signature_def_utils.pbtxt |  23 --
 .../v2/tensorflow.saved_model.utils.pbtxt     |  11 -
 .../api/golden/v2/tensorflow.train.pbtxt      |   4 -
 ...low.uniform_unit_scaling_initializer.pbtxt |  18 -
 ...sorflow.variance_scaling_initializer.pbtxt |  18 -
 51 files changed, 253 insertions(+), 1079 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-op-error.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-padding-f-i-f-o-queue.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-priority-queue.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-queue-base.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-random-shuffle-queue.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-sparse-conditional-accumulator.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.glorot_normal_initializer.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.orthogonal_initializer.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-compression-type.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-options.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-writer.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.python_io.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.loader.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.main_op.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_def_utils.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.utils.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.uniform_unit_scaling_initializer.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.variance_scaling_initializer.pbtxt

diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index 8b303fa8a9..faa4fa7c6f 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -30,7 +30,7 @@ from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("errors.OpError", "OpError")
+@tf_export("errors.OpError", v1=["errors.OpError", "OpError"])
 @deprecation.deprecated_endpoints("OpError")
 class OpError(Exception):
   """A generic error that is raised when TensorFlow execution fails.
diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py
index 6f9f347a99..777bb2fe8c 100644
--- a/tensorflow/python/framework/random_seed.py
+++ b/tensorflow/python/framework/random_seed.py
@@ -34,7 +34,7 @@ def _truncate_seed(seed):
   return seed % _MAXINT32  # Truncate to fit into 32-bit integer
 
 
-@tf_export('random.get_seed', 'get_seed')
+@tf_export('random.get_seed', v1=['random.get_seed', 'get_seed'])
 @deprecation.deprecated_endpoints('get_seed')
 def get_seed(op_seed):
   """Returns the local seeds an operation should use given an op-specific seed.
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
index 9ab683d96a..b7fae85295 100644
--- a/tensorflow/python/lib/io/tf_record.py
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -26,7 +26,9 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("io.TFRecordCompressionType", "python_io.TFRecordCompressionType")
+@tf_export(
+    "io.TFRecordCompressionType",
+    v1=["io.TFRecordCompressionType", "python_io.TFRecordCompressionType"])
 @deprecation.deprecated_endpoints("python_io.TFRecordCompressionType")
 class TFRecordCompressionType(object):
   """The type of compression for the record."""
@@ -35,7 +37,9 @@ class TFRecordCompressionType(object):
   GZIP = 2
 
 
-@tf_export("io.TFRecordOptions", "python_io.TFRecordOptions")
+@tf_export(
+    "io.TFRecordOptions",
+    v1=["io.TFRecordOptions", "python_io.TFRecordOptions"])
 @deprecation.deprecated_endpoints("python_io.TFRecordOptions")
 class TFRecordOptions(object):
   """Options used for manipulating TFRecord files."""
@@ -146,7 +150,9 @@ class TFRecordOptions(object):
     return options
 
 
-@tf_export("io.tf_record_iterator", "python_io.tf_record_iterator")
+@tf_export(
+    "io.tf_record_iterator",
+    v1=["io.tf_record_iterator", "python_io.tf_record_iterator"])
 @deprecation.deprecated_endpoints("python_io.tf_record_iterator")
 def tf_record_iterator(path, options=None):
   """An iterator that read the records from a TFRecords file.
@@ -179,7 +185,8 @@ def tf_record_iterator(path, options=None):
     reader.Close()
 
 
-@tf_export("io.TFRecordWriter", "python_io.TFRecordWriter")
+@tf_export(
+    "io.TFRecordWriter", v1=["io.TFRecordWriter", "python_io.TFRecordWriter"])
 @deprecation.deprecated_endpoints("python_io.TFRecordWriter")
 class TFRecordWriter(object):
   """A class to write records to a TFRecords file.
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index e3e4d5f910..c28b637c08 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1204,7 +1204,7 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
     return _apply_mask_1d(tensor, mask, axis)
 
 
-@tf_export("sparse.mask", "sparse_mask")
+@tf_export("sparse.mask", v1=["sparse.mask", "sparse_mask"])
 @deprecation.deprecated_endpoints("sparse_mask")
 def sparse_mask(a, mask_indices, name=None):
   """Masks elements of `IndexedSlices`.
@@ -1427,7 +1427,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
 
 
 # pylint: disable=invalid-name
-@tf_export("linalg.transpose", "matrix_transpose")
+@tf_export("linalg.transpose", v1=["linalg.transpose", "matrix_transpose"])
 @deprecation.deprecated_endpoints("matrix_transpose")
 def matrix_transpose(a, name="matrix_transpose", conjugate=False):
   """Transposes last two dimensions of tensor `a`.
@@ -1763,7 +1763,8 @@ def _normalize_sparse_shape(shape, name):
   return (ops.convert_to_tensor(shape, dtype=dtypes.int64, name=name), rank)
 
 
-@tf_export("sparse.placeholder", "sparse_placeholder")
+@tf_export(
+    "sparse.placeholder", v1=["sparse.placeholder", "sparse_placeholder"])
 @deprecation.deprecated_endpoints("sparse_placeholder")
 def sparse_placeholder(dtype, shape=None, name=None):
   """Inserts a placeholder for a sparse tensor that will be always fed.
@@ -2258,7 +2259,7 @@ def required_space_to_batch_paddings(input_shape,
     return result_paddings, result_crops
 
 
-@tf_export("nn.space_to_batch", "space_to_batch")
+@tf_export("nn.space_to_batch", v1=["nn.space_to_batch", "space_to_batch"])
 @deprecation.deprecated_endpoints("space_to_batch")
 def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=redefined-builtin
   result = space_to_batch_nd(
@@ -2273,7 +2274,7 @@ def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=r
 space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__
 
 
-@tf_export("nn.space_to_depth", "space_to_depth")
+@tf_export("nn.space_to_depth", v1=["nn.space_to_depth", "space_to_depth"])
 @deprecation.deprecated_endpoints("space_to_depth")
 def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.space_to_depth(input, block_size, data_format, name=name)
@@ -2282,7 +2283,7 @@ def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint:
 space_to_depth.__doc__ = gen_array_ops.space_to_depth.__doc__
 
 
-@tf_export("nn.depth_to_space", "depth_to_space")
+@tf_export("nn.depth_to_space", v1=["nn.depth_to_space", "depth_to_space"])
 @deprecation.deprecated_endpoints("depth_to_space")
 def depth_to_space(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.depth_to_space(input, block_size, data_format, name=name)
@@ -2787,7 +2788,7 @@ quantize_v2.__doc__ = """Please use `tf.quantize` instead."""
 
 # We want to expose tf.quantize instead of tf.quantize_v2; we can deprecate
 # tf.quantize_v2 in next version of TensorFlow.
-@tf_export("quantization.quantize", "quantize")
+@tf_export("quantization.quantize", v1=["quantization.quantize", "quantize"])
 @deprecation.deprecated_endpoints("quantize")
 def quantize(input,  # pylint: disable=redefined-builtin
              min_range,
diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py
index 98dde995c9..f0bfdb2b7a 100644
--- a/tensorflow/python/ops/candidate_sampling_ops.py
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@@ -27,7 +27,9 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('random.uniform_candidate_sampler', 'nn.uniform_candidate_sampler')
+@tf_export(
+    'random.uniform_candidate_sampler',
+    v1=['random.uniform_candidate_sampler', 'nn.uniform_candidate_sampler'])
 @deprecation.deprecated_endpoints('nn.uniform_candidate_sampler')
 def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                               range_max, seed=None, name=None):
@@ -84,8 +86,12 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
       seed2=seed2, name=name)
 
 
-@tf_export('random.log_uniform_candidate_sampler',
-           'nn.log_uniform_candidate_sampler')
+@tf_export(
+    'random.log_uniform_candidate_sampler',
+    v1=[
+        'random.log_uniform_candidate_sampler',
+        'nn.log_uniform_candidate_sampler'
+    ])
 @deprecation.deprecated_endpoints('nn.log_uniform_candidate_sampler')
 def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
                                   range_max, seed=None, name=None):
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index d607f1d9fb..67e0514fb1 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -92,7 +92,9 @@ def _shape_and_dtype_str(tensor):
   return 'shape=%s dtype=%s' % (tensor.shape, tensor.dtype.name)
 
 
-@tf_export('debugging.assert_proper_iterable', 'assert_proper_iterable')
+@tf_export(
+    'debugging.assert_proper_iterable',
+    v1=['debugging.assert_proper_iterable', 'assert_proper_iterable'])
 @deprecation.deprecated_endpoints('assert_proper_iterable')
 def assert_proper_iterable(values):
   """Static assert that values is a "proper" iterable.
@@ -121,7 +123,9 @@ def assert_proper_iterable(values):
         'Expected argument "values" to be iterable.  Found: %s' % type(values))
 
 
-@tf_export('debugging.assert_negative', 'assert_negative')
+@tf_export(
+    'debugging.assert_negative',
+    v1=['debugging.assert_negative', 'assert_negative'])
 @deprecation.deprecated_endpoints('assert_negative')
 def assert_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x < 0` holds element-wise.
@@ -163,7 +167,9 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less(x, zero, data=data, summarize=summarize)
 
 
-@tf_export('debugging.assert_positive', 'assert_positive')
+@tf_export(
+    'debugging.assert_positive',
+    v1=['debugging.assert_positive', 'assert_positive'])
 @deprecation.deprecated_endpoints('assert_positive')
 def assert_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x > 0` holds element-wise.
@@ -204,7 +210,9 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None):
     return assert_less(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('debugging.assert_non_negative', 'assert_non_negative')
+@tf_export(
+    'debugging.assert_non_negative',
+    v1=['debugging.assert_non_negative', 'assert_non_negative'])
 @deprecation.deprecated_endpoints('assert_non_negative')
 def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x >= 0` holds element-wise.
@@ -247,7 +255,9 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
     return assert_less_equal(zero, x, data=data, summarize=summarize)
 
 
-@tf_export('debugging.assert_non_positive', 'assert_non_positive')
+@tf_export(
+    'debugging.assert_non_positive',
+    v1=['debugging.assert_non_positive', 'assert_non_positive'])
 @deprecation.deprecated_endpoints('assert_non_positive')
 def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= 0` holds element-wise.
@@ -390,7 +400,9 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('debugging.assert_none_equal', 'assert_none_equal')
+@tf_export(
+    'debugging.assert_none_equal',
+    v1=['debugging.assert_none_equal', 'assert_none_equal'])
 @deprecation.deprecated_endpoints('assert_none_equal')
 def assert_none_equal(
     x, y, data=None, summarize=None, message=None, name=None):
@@ -442,7 +454,7 @@ def assert_none_equal(
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('debugging.assert_near', 'assert_near')
+@tf_export('debugging.assert_near', v1=['debugging.assert_near', 'assert_near'])
 @deprecation.deprecated_endpoints('assert_near')
 def assert_near(
     x, y, rtol=None, atol=None, data=None, summarize=None, message=None,
@@ -569,7 +581,9 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('debugging.assert_less_equal', 'assert_less_equal')
+@tf_export(
+    'debugging.assert_less_equal',
+    v1=['debugging.assert_less_equal', 'assert_less_equal'])
 @deprecation.deprecated_endpoints('assert_less_equal')
 def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
   """Assert the condition `x <= y` holds element-wise.
@@ -666,7 +680,9 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
     return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('debugging.assert_greater_equal', 'assert_greater_equal')
+@tf_export(
+    'debugging.assert_greater_equal',
+    v1=['debugging.assert_greater_equal', 'assert_greater_equal'])
 @deprecation.deprecated_endpoints('assert_greater_equal')
 def assert_greater_equal(x, y, data=None, summarize=None, message=None,
                          name=None):
@@ -827,7 +843,9 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
   return assert_op
 
 
-@tf_export('debugging.assert_rank_at_least', 'assert_rank_at_least')
+@tf_export(
+    'debugging.assert_rank_at_least',
+    v1=['debugging.assert_rank_at_least', 'assert_rank_at_least'])
 @deprecation.deprecated_endpoints('assert_rank_at_least')
 def assert_rank_at_least(
     x, rank, data=None, summarize=None, message=None, name=None):
@@ -959,7 +977,9 @@ def _assert_ranks_condition(
   return control_flow_ops.Assert(condition, data, summarize=summarize)
 
 
-@tf_export('debugging.assert_rank_in', 'assert_rank_in')
+@tf_export(
+    'debugging.assert_rank_in',
+    v1=['debugging.assert_rank_in', 'assert_rank_in'])
 @deprecation.deprecated_endpoints('assert_rank_in')
 def assert_rank_in(
     x, ranks, data=None, summarize=None, message=None, name=None):
@@ -1022,7 +1042,9 @@ def assert_rank_in(
   return assert_op
 
 
-@tf_export('debugging.assert_integer', 'assert_integer')
+@tf_export(
+    'debugging.assert_integer',
+    v1=['debugging.assert_integer', 'assert_integer'])
 @deprecation.deprecated_endpoints('assert_integer')
 def assert_integer(x, message=None, name=None):
   """Assert that `x` is of integer dtype.
@@ -1061,7 +1083,7 @@ def assert_integer(x, message=None, name=None):
     return control_flow_ops.no_op('statically_determined_was_integer')
 
 
-@tf_export('debugging.assert_type', 'assert_type')
+@tf_export('debugging.assert_type', v1=['debugging.assert_type', 'assert_type'])
 @deprecation.deprecated_endpoints('assert_type')
 def assert_type(tensor, tf_type, message=None, name=None):
   """Statically asserts that the given `Tensor` is of the specified type.
@@ -1109,13 +1131,17 @@ def _get_diff_for_monotonic_comparison(x):
   return control_flow_ops.cond(is_shorter_than_two, short_result, diff)
 
 
-@tf_export('debugging.is_numeric_tensor', 'is_numeric_tensor')
+@tf_export(
+    'debugging.is_numeric_tensor',
+    v1=['debugging.is_numeric_tensor', 'is_numeric_tensor'])
 @deprecation.deprecated_endpoints('is_numeric_tensor')
 def is_numeric_tensor(tensor):
   return isinstance(tensor, ops.Tensor) and tensor.dtype in NUMERIC_TYPES
 
 
-@tf_export('debugging.is_non_decreasing', 'is_non_decreasing')
+@tf_export(
+    'debugging.is_non_decreasing',
+    v1=['debugging.is_non_decreasing', 'is_non_decreasing'])
 @deprecation.deprecated_endpoints('is_non_decreasing')
 def is_non_decreasing(x, name=None):
   """Returns `True` if `x` is non-decreasing.
@@ -1143,7 +1169,9 @@ def is_non_decreasing(x, name=None):
     return math_ops.reduce_all(math_ops.less_equal(zero, diff))
 
 
-@tf_export('debugging.is_strictly_increasing', 'is_strictly_increasing')
+@tf_export(
+    'debugging.is_strictly_increasing',
+    v1=['debugging.is_strictly_increasing', 'is_strictly_increasing'])
 @deprecation.deprecated_endpoints('is_strictly_increasing')
 def is_strictly_increasing(x, name=None):
   """Returns `True` if `x` is strictly increasing.
@@ -1219,7 +1247,9 @@ def _assert_same_base_type(items, expected_type=None):
     return expected_type
 
 
-@tf_export('debugging.assert_same_float_dtype', 'assert_same_float_dtype')
+@tf_export(
+    'debugging.assert_same_float_dtype',
+    v1=['debugging.assert_same_float_dtype', 'assert_same_float_dtype'])
 @deprecation.deprecated_endpoints('assert_same_float_dtype')
 def assert_same_float_dtype(tensors=None, dtype=None):
   """Validate and return float type based on `tensors` and `dtype`.
@@ -1249,7 +1279,8 @@ def assert_same_float_dtype(tensors=None, dtype=None):
   return dtype
 
 
-@tf_export('debugging.assert_scalar', 'assert_scalar')
+@tf_export(
+    'debugging.assert_scalar', v1=['debugging.assert_scalar', 'assert_scalar'])
 @deprecation.deprecated_endpoints('assert_scalar')
 def assert_scalar(tensor, name=None):
   with ops.name_scope(name, 'assert_scalar', [tensor]) as name_scope:
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 45516068f4..cc003c3298 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -160,7 +160,7 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
   return tclip
 
 
-@tf_export("linalg.global_norm", "global_norm")
+@tf_export("linalg.global_norm", v1=["linalg.global_norm", "global_norm"])
 @deprecation.deprecated_endpoints("global_norm")
 def global_norm(t_list, name=None):
   """Computes the global norm of multiple tensors.
diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index 8259142456..994c7affbd 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -90,7 +90,8 @@ def remove_squeezable_dimensions(
     return labels, predictions
 
 
-@tf_export('train.confusion_matrix', 'confusion_matrix')
+@tf_export(
+    'train.confusion_matrix', v1=['train.confusion_matrix', 'confusion_matrix'])
 @deprecation.deprecated_endpoints('confusion_matrix')
 def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32,
                      name=None, weights=None):
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 97b6f3bd9c..f4f8cbf560 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -113,7 +113,7 @@ def _shape_common(s1, s2):
 
 
 # pylint: disable=protected-access
-@tf_export("io.QueueBase", "QueueBase")
+@tf_export("io.QueueBase", v1=["io.QueueBase", "QueueBase"])
 @deprecation.deprecated_endpoints("QueueBase")
 class QueueBase(object):
   """Base class for queue implementations.
@@ -606,7 +606,8 @@ def _shared_name(shared_name):
   return shared_name
 
 
-@tf_export("io.RandomShuffleQueue", "RandomShuffleQueue")
+@tf_export(
+    "io.RandomShuffleQueue", v1=["io.RandomShuffleQueue", "RandomShuffleQueue"])
 @deprecation.deprecated_endpoints("RandomShuffleQueue")
 class RandomShuffleQueue(QueueBase):
   """A queue implementation that dequeues elements in a random order.
@@ -749,7 +750,8 @@ class FIFOQueue(QueueBase):
     super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("io.PaddingFIFOQueue", "PaddingFIFOQueue")
+@tf_export(
+    "io.PaddingFIFOQueue", v1=["io.PaddingFIFOQueue", "PaddingFIFOQueue"])
 @deprecation.deprecated_endpoints("PaddingFIFOQueue")
 class PaddingFIFOQueue(QueueBase):
   """A FIFOQueue that supports batching variable-sized tensors by padding.
@@ -824,7 +826,7 @@ class PaddingFIFOQueue(QueueBase):
     super(PaddingFIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("io.PriorityQueue", "PriorityQueue")
+@tf_export("io.PriorityQueue", v1=["io.PriorityQueue", "PriorityQueue"])
 @deprecation.deprecated_endpoints("PriorityQueue")
 class PriorityQueue(QueueBase):
   """A queue implementation that dequeues elements in prioritized order.
@@ -1305,8 +1307,9 @@ class ConditionalAccumulator(ConditionalAccumulatorBase):
     return out
 
 
-@tf_export("sparse.SparseConditionalAccumulator",
-           "SparseConditionalAccumulator")
+@tf_export(
+    "sparse.SparseConditionalAccumulator",
+    v1=["sparse.SparseConditionalAccumulator", "SparseConditionalAccumulator"])
 @deprecation.deprecated_endpoints("SparseConditionalAccumulator")
 class SparseConditionalAccumulator(ConditionalAccumulatorBase):
   """A conditional accumulator for aggregating sparse gradients.
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index 65bb77b474..4fe6d05620 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -340,8 +340,11 @@ class TruncatedNormal(Initializer):
     }
 
 
-@tf_export("initializers.uniform_unit_scaling",
-           "uniform_unit_scaling_initializer")
+@tf_export(
+    "initializers.uniform_unit_scaling",
+    v1=[
+        "initializers.uniform_unit_scaling", "uniform_unit_scaling_initializer"
+    ])
 @deprecation.deprecated_endpoints("uniform_unit_scaling_initializer")
 class UniformUnitScaling(Initializer):
   """Initializer that generates tensors without scaling variance.
@@ -401,8 +404,13 @@ class UniformUnitScaling(Initializer):
     return {"factor": self.factor, "seed": self.seed, "dtype": self.dtype.name}
 
 
-@tf_export("keras.initializers.VarianceScaling",
-           "initializers.variance_scaling", "variance_scaling_initializer")
+@tf_export(
+    "keras.initializers.VarianceScaling",
+    "initializers.variance_scaling",
+    v1=[
+        "keras.initializers.VarianceScaling", "initializers.variance_scaling",
+        "variance_scaling_initializer"
+    ])
 @deprecation.deprecated_endpoints("variance_scaling_initializer")
 class VarianceScaling(Initializer):
   """Initializer capable of adapting its scale to the shape of weights tensors.
@@ -495,8 +503,14 @@ class VarianceScaling(Initializer):
     }
 
 
-@tf_export("keras.initializers.Orthogonal", "initializers.orthogonal",
-           "orthogonal_initializer", "keras.initializers.orthogonal")
+@tf_export(
+    "keras.initializers.Orthogonal",
+    "initializers.orthogonal",
+    "keras.initializers.orthogonal",
+    v1=[
+        "keras.initializers.Orthogonal", "initializers.orthogonal",
+        "orthogonal_initializer", "keras.initializers.orthogonal"
+    ])
 @deprecation.deprecated_endpoints("orthogonal_initializer")
 class Orthogonal(Initializer):
   """Initializer that generates an orthogonal matrix.
@@ -1151,8 +1165,13 @@ class GlorotUniform(VarianceScaling):
     }
 
 
-@tf_export("glorot_normal_initializer", "keras.initializers.glorot_normal",
-           "initializers.glorot_normal")
+@tf_export(
+    "keras.initializers.glorot_normal",
+    "initializers.glorot_normal",
+    v1=[
+        "glorot_normal_initializer", "keras.initializers.glorot_normal",
+        "initializers.glorot_normal"
+    ])
 @deprecation.deprecated_endpoints("glorot_normal_initializer")
 class GlorotNormal(VarianceScaling):
   """The Glorot normal initializer, also called Xavier normal initializer.
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index bf4354fa73..bbccc7e036 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -79,7 +79,8 @@ def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind):
   return gen_linalg_ops.cholesky(gramian)
 
 
-@tf_export('cholesky_solve', 'linalg.cholesky_solve')
+@tf_export(
+    'linalg.cholesky_solve', v1=['linalg.cholesky_solve', 'cholesky_solve'])
 @deprecation.deprecated_endpoints('cholesky_solve')
 def cholesky_solve(chol, rhs, name=None):
   """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations.
@@ -168,7 +169,7 @@ def eye(num_rows,
                              name=name)
 
 
-@tf_export('linalg.lstsq', 'matrix_solve_ls')
+@tf_export('linalg.lstsq', v1=['linalg.lstsq', 'matrix_solve_ls'])
 @deprecation.deprecated_endpoints('matrix_solve_ls')
 def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
   r"""Solves one or more linear least-squares problems.
@@ -305,7 +306,7 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
         matrix, rhs, l2_regularizer, fast=fast, name=name)
 
 
-@tf_export('linalg.eigh', 'self_adjoint_eig')
+@tf_export('linalg.eigh', v1=['linalg.eigh', 'self_adjoint_eig'])
 @deprecation.deprecated_endpoints('self_adjoint_eig')
 def self_adjoint_eig(tensor, name=None):
   """Computes the eigen decomposition of a batch of self-adjoint matrices.
@@ -328,7 +329,7 @@ def self_adjoint_eig(tensor, name=None):
   return e, v
 
 
-@tf_export('linalg.eigvalsh', 'self_adjoint_eigvals')
+@tf_export('linalg.eigvalsh', v1=['linalg.eigvalsh', 'self_adjoint_eigvals'])
 @deprecation.deprecated_endpoints('self_adjoint_eigvals')
 def self_adjoint_eigvals(tensor, name=None):
   """Computes the eigenvalues of one or more self-adjoint matrices.
@@ -351,7 +352,7 @@ def self_adjoint_eigvals(tensor, name=None):
   return e
 
 
-@tf_export('svd', 'linalg.svd')
+@tf_export('linalg.svd', v1=['linalg.svd', 'svd'])
 @deprecation.deprecated_endpoints('svd')
 def svd(tensor, full_matrices=False, compute_uv=True, name=None):
   r"""Computes the singular value decompositions of one or more matrices.
diff --git a/tensorflow/python/ops/manip_ops.py b/tensorflow/python/ops/manip_ops.py
index d9d0728287..046ea0dfb1 100644
--- a/tensorflow/python/ops/manip_ops.py
+++ b/tensorflow/python/ops/manip_ops.py
@@ -24,7 +24,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 # pylint: disable=protected-access
-@tf_export('roll', 'manip.roll')
+@tf_export('roll', v1=['roll', 'manip.roll'])
 @deprecation.deprecated_endpoints('manip.roll')
 def roll(input, shift, axis):  # pylint: disable=redefined-builtin
   return _gen_manip_ops.roll(input, shift, axis)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 83b8b5a3a4..81f244a423 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -365,7 +365,7 @@ def sqrt(x, name=None):
       return gen_math_ops.sqrt(x, name=name)
 
 
-@tf_export("math.erf", "erf")
+@tf_export("math.erf", v1=["math.erf", "erf"])
 @deprecation.deprecated_endpoints("erf")
 def erf(x, name=None):
   """Computes the Gauss error function of `x` element-wise.
@@ -487,7 +487,7 @@ def complex(real, imag, name=None):
     return gen_math_ops._complex(real, imag, Tout=Tout, name=name)
 
 
-@tf_export("math.real", "real")
+@tf_export("math.real", v1=["math.real", "real"])
 @deprecation.deprecated_endpoints("real")
 def real(input, name=None):
   r"""Returns the real part of a complex (or real) tensor.
@@ -519,7 +519,7 @@ def real(input, name=None):
       return input
 
 
-@tf_export("math.imag", "imag")
+@tf_export("math.imag", v1=["math.imag", "imag"])
 @deprecation.deprecated_endpoints("imag")
 def imag(input, name=None):
   r"""Returns the imaginary part of a complex (or real) tensor.
@@ -550,7 +550,7 @@ def imag(input, name=None):
       return array_ops.zeros_like(input)
 
 
-@tf_export("math.angle", "angle")
+@tf_export("math.angle", v1=["math.angle", "angle"])
 @deprecation.deprecated_endpoints("angle")
 def angle(input, name=None):
   r"""Returns the element-wise argument of a complex (or real) tensor.
@@ -1082,7 +1082,7 @@ mod = gen_math_ops.floor_mod
 
 # TODO(aselle): Deprecate this once all internal functionality uses
 # tf.truncatediv
-@tf_export("math.floordiv", "floordiv")
+@tf_export("math.floordiv", v1=["math.floordiv", "floordiv"])
 @deprecation.deprecated_endpoints("floordiv")
 def floordiv(x, y, name=None):
   """Divides `x / y` elementwise, rounding toward the most negative integer.
@@ -1156,7 +1156,7 @@ _OverrideBinaryOperatorHelper(gen_math_ops.floor_mod, "mod")
 _OverrideBinaryOperatorHelper(pow, "pow")
 
 
-@tf_export("math.logical_xor", "logical_xor")
+@tf_export("math.logical_xor", v1=["math.logical_xor", "logical_xor"])
 @deprecation.deprecated_endpoints("logical_xor")
 def logical_xor(x, y, name="LogicalXor"):
   """x ^ y = (x | y) & ~(x & y)."""
@@ -1833,7 +1833,7 @@ def reduce_logsumexp(input_tensor,
     return _may_reduce_to_scalar(keepdims, axis, reduction_indices, result)
 
 
-@tf_export("linalg.trace", "trace")
+@tf_export("linalg.trace", v1=["linalg.trace", "trace"])
 @deprecation.deprecated_endpoints("trace")
 def trace(x, name=None):
   """Compute the trace of a tensor `x`.
@@ -2173,7 +2173,7 @@ def add_n(inputs, name=None):
   return gen_math_ops.add_n(inputs, name=name)
 
 
-@tf_export("math.accumulate_n", "accumulate_n")
+@tf_export("math.accumulate_n", v1=["math.accumulate_n", "accumulate_n"])
 @deprecation.deprecated_endpoints("accumulate_n")
 def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
   """Returns the element-wise sum of a list of tensors.
@@ -2283,7 +2283,7 @@ def sigmoid(x, name=None):
     return gen_math_ops.sigmoid(x, name=name)
 
 
-@tf_export("math.log_sigmoid", "log_sigmoid")
+@tf_export("math.log_sigmoid", v1=["math.log_sigmoid", "log_sigmoid"])
 @deprecation.deprecated_endpoints("log_sigmoid")
 def log_sigmoid(x, name=None):
   """Computes log sigmoid of `x` element-wise.
@@ -2324,7 +2324,7 @@ def tanh(x, name=None):
       return gen_math_ops.tanh(x, name=name)
 
 
-@tf_export("math.bincount", "bincount")
+@tf_export("math.bincount", v1=["math.bincount", "bincount"])
 @deprecation.deprecated_endpoints("bincount")
 def bincount(arr,
              weights=None,
@@ -2424,7 +2424,7 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("math.cumprod", "cumprod")
+@tf_export("math.cumprod", v1=["math.cumprod", "cumprod"])
 @deprecation.deprecated_endpoints("cumprod")
 def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
   """Compute the cumulative product of the tensor `x` along `axis`.
@@ -2477,7 +2477,7 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
-@tf_export("math.conj", "conj")
+@tf_export("math.conj", v1=["math.conj", "conj"])
 @deprecation.deprecated_endpoints("conj")
 def conj(x, name=None):
   r"""Returns the complex conjugate of a complex number.
@@ -2578,7 +2578,9 @@ def _unsorted_segment_N(data, segment_ids, num_segments):
   return gen_math_ops.maximum(N, 1)
 
 
-@tf_export("math.unsorted_segment_mean", "unsorted_segment_mean")
+@tf_export(
+    "math.unsorted_segment_mean",
+    v1=["math.unsorted_segment_mean", "unsorted_segment_mean"])
 @deprecation.deprecated_endpoints("unsorted_segment_mean")
 def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
   r"""Computes the mean along segments of a tensor.
@@ -2621,7 +2623,9 @@ def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
     return summed / N
 
 
-@tf_export("math.unsorted_segment_sqrt_n", "unsorted_segment_sqrt_n")
+@tf_export(
+    "math.unsorted_segment_sqrt_n",
+    v1=["math.unsorted_segment_sqrt_n", "unsorted_segment_sqrt_n"])
 @deprecation.deprecated_endpoints("unsorted_segment_sqrt_n")
 def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
   r"""Computes the sum along segments of a tensor divided by the sqrt(N).
@@ -2667,7 +2671,8 @@ def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
     return summed / gen_math_ops.sqrt(N)
 
 
-@tf_export("sparse.segment_sum", "sparse_segment_sum")
+@tf_export(
+    "sparse.segment_sum", v1=["sparse.segment_sum", "sparse_segment_sum"])
 @deprecation.deprecated_endpoints("sparse_segment_sum")
 def sparse_segment_sum(data, indices, segment_ids, name=None,
                        num_segments=None):
@@ -2741,7 +2746,8 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse.segment_mean", "sparse_segment_mean")
+@tf_export(
+    "sparse.segment_mean", v1=["sparse.segment_mean", "sparse_segment_mean"])
 @deprecation.deprecated_endpoints("sparse_segment_mean")
 def sparse_segment_mean(data,
                         indices,
@@ -2787,7 +2793,9 @@ def sparse_segment_mean(data,
         data=data, indices=indices, segment_ids=segment_ids, name=name)
 
 
-@tf_export("sparse.segment_sqrt_n", "sparse_segment_sqrt_n")
+@tf_export(
+    "sparse.segment_sqrt_n",
+    v1=["sparse.segment_sqrt_n", "sparse_segment_sqrt_n"])
 @deprecation.deprecated_endpoints("sparse_segment_sqrt_n")
 def sparse_segment_sqrt_n(data,
                           indices,
diff --git a/tensorflow/python/ops/numerics.py b/tensorflow/python/ops/numerics.py
index 002e87b411..0c0d81afb6 100644
--- a/tensorflow/python/ops/numerics.py
+++ b/tensorflow/python/ops/numerics.py
@@ -28,7 +28,9 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("debugging.assert_all_finite", "verify_tensor_all_finite")
+@tf_export(
+    "debugging.assert_all_finite",
+    v1=["debugging.assert_all_finite", "verify_tensor_all_finite"])
 @deprecation.deprecated_endpoints("verify_tensor_all_finite")
 def verify_tensor_all_finite(t, msg, name=None):
   """Assert that the tensor does not contain any NaN's or Inf's.
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index a2da6412ed..b111e7f3df 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -1566,7 +1566,7 @@ def _parse_single_sequence_example_raw(serialized,
 
 
 # Swap `name` and `na_value` for backward compatibility.
-@tf_export("io.decode_csv", "decode_csv")
+@tf_export("io.decode_csv", v1=["io.decode_csv", "decode_csv"])
 @deprecation.deprecated_endpoints("decode_csv")
 def decode_csv(records,
                record_defaults,
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index c2eb9dfc5d..f827a20ff8 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -357,7 +357,7 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
 ops.NotDifferentiable("Multinomial")
 
 
-@tf_export("random.gamma", "random_gamma")
+@tf_export("random.gamma", v1=["random.gamma", "random_gamma"])
 @deprecation.deprecated_endpoints("random_gamma")
 def random_gamma(shape,
                  alpha,
@@ -441,7 +441,7 @@ def random_gamma(shape,
             shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta)
 
 
-@tf_export("random.poisson", "random_poisson")
+@tf_export("random.poisson", v1=["random.poisson", "random_poisson"])
 @deprecation.deprecated_endpoints("random_poisson")
 def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
   """Draws `shape` samples from each of the given Poisson distribution(s).
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 7e3dbdbad4..14f1263df3 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -318,7 +318,7 @@ def sparse_concat(axis,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse.add", "sparse_add")
+@tf_export("sparse.add", v1=["sparse.add", "sparse_add"])
 @deprecation.deprecated_endpoints("sparse_add")
 def sparse_add(a, b, thresh=0):
   """Adds two tensors, at least one of each is a `SparseTensor`.
@@ -559,7 +559,7 @@ def sparse_dense_cwise_add(sp_t, dense_t):
   return sparse_tensor.SparseTensor(sp_t.indices, result, sp_t.dense_shape)
 
 
-@tf_export("sparse.reorder", "sparse_reorder")
+@tf_export("sparse.reorder", v1=["sparse.reorder", "sparse_reorder"])
 @deprecation.deprecated_endpoints("sparse_reorder")
 def sparse_reorder(sp_input, name=None):
   """Reorders a `SparseTensor` into the canonical, row-major ordering.
@@ -610,7 +610,7 @@ def sparse_reorder(sp_input, name=None):
   return sparse_tensor.SparseTensor(reordered_ind, reordered_val, dense_shape)
 
 
-@tf_export("sparse.reshape", "sparse_reshape")
+@tf_export("sparse.reshape", v1=["sparse.reshape", "sparse_reshape"])
 @deprecation.deprecated_endpoints("sparse_reshape")
 def sparse_reshape(sp_input, shape, name=None):
   """Reshapes a `SparseTensor` to represent values in a new dense shape.
@@ -778,7 +778,7 @@ def sparse_split(keyword_required=KeywordRequired(),
   return sparse_tensors
 
 
-@tf_export("sparse.slice", "sparse_slice")
+@tf_export("sparse.slice", v1=["sparse.slice", "sparse_slice"])
 @deprecation.deprecated_endpoints("sparse_slice")
 def sparse_slice(sp_input, start, size, name=None):
   """Slice a `SparseTensor` based on the `start` and `size.
@@ -1112,7 +1112,7 @@ def sparse_reduce_sum_sparse(sp_input,
   return sparse_tensor.SparseTensor(output_ind, output_val, output_shape)
 
 
-@tf_export("sparse.to_dense", "sparse_tensor_to_dense")
+@tf_export("sparse.to_dense", v1=["sparse.to_dense", "sparse_tensor_to_dense"])
 @deprecation.deprecated_endpoints("sparse_tensor_to_dense")
 def sparse_tensor_to_dense(sp_input,
                            default_value=0,
@@ -1165,7 +1165,8 @@ def sparse_tensor_to_dense(sp_input,
       name=name)
 
 
-@tf_export("sparse.to_indicator", "sparse_to_indicator")
+@tf_export(
+    "sparse.to_indicator", v1=["sparse.to_indicator", "sparse_to_indicator"])
 @deprecation.deprecated_endpoints("sparse_to_indicator")
 def sparse_to_indicator(sp_input, vocab_size, name=None):
   """Converts a `SparseTensor` of ids into a dense bool indicator tensor.
@@ -1229,7 +1230,7 @@ def sparse_to_indicator(sp_input, vocab_size, name=None):
         sp_new, default_value=False, validate_indices=False, name=name)
 
 
-@tf_export("sparse.merge", "sparse_merge")
+@tf_export("sparse.merge", v1=["sparse.merge", "sparse_merge"])
 @deprecation.deprecated_endpoints("sparse_merge")
 def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
                  already_sorted=False):
@@ -1374,7 +1375,7 @@ def sparse_merge(sp_ids, sp_values, vocab_size, name=None,
         sorted_result.indices, sorted_result.values, new_shape)
 
 
-@tf_export("sparse.retain", "sparse_retain")
+@tf_export("sparse.retain", v1=["sparse.retain", "sparse_retain"])
 @deprecation.deprecated_endpoints("sparse_retain")
 def sparse_retain(sp_input, to_retain):
   """Retains specified non-empty values within a `SparseTensor`.
@@ -1419,7 +1420,8 @@ def sparse_retain(sp_input, to_retain):
                                     array_ops.identity(sp_input.dense_shape))
 
 
-@tf_export("sparse.reset_shape", "sparse_reset_shape")
+@tf_export(
+    "sparse.reset_shape", v1=["sparse.reset_shape", "sparse_reset_shape"])
 @deprecation.deprecated_endpoints("sparse_reset_shape")
 def sparse_reset_shape(sp_input, new_shape=None):
   """Resets the shape of a `SparseTensor` with indices and values unchanged.
@@ -1521,7 +1523,9 @@ def sparse_reset_shape(sp_input, new_shape=None):
   return sparse_tensor.SparseTensor(in_indices, in_values, output_shape_tensor)
 
 
-@tf_export("sparse.fill_empty_rows", "sparse_fill_empty_rows")
+@tf_export(
+    "sparse.fill_empty_rows",
+    v1=["sparse.fill_empty_rows", "sparse_fill_empty_rows"])
 @deprecation.deprecated_endpoints("sparse_fill_empty_rows")
 def sparse_fill_empty_rows(sp_input, default_value, name=None):
   """Fills empty rows in the input 2-D `SparseTensor` with a default value.
@@ -1586,7 +1590,8 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None):
         dense_shape=sp_input.dense_shape), empty_row_indicator)
 
 
-@tf_export("io.serialize_sparse", "serialize_sparse")
+@tf_export(
+    "io.serialize_sparse", v1=["io.serialize_sparse", "serialize_sparse"])
 @deprecation.deprecated_endpoints("serialize_sparse")
 def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize a `SparseTensor` into a 3-vector (1-D `Tensor`) object.
@@ -1613,7 +1618,9 @@ def serialize_sparse(sp_input, name=None, out_type=dtypes.string):
       out_type=out_type)
 
 
-@tf_export("io.serialize_many_sparse", "serialize_many_sparse")
+@tf_export(
+    "io.serialize_many_sparse",
+    v1=["io.serialize_many_sparse", "serialize_many_sparse"])
 @deprecation.deprecated_endpoints("serialize_many_sparse")
 def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string):
   """Serialize `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor`.
@@ -1715,7 +1722,9 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("io.deserialize_many_sparse", "deserialize_many_sparse")
+@tf_export(
+    "io.deserialize_many_sparse",
+    v1=["io.deserialize_many_sparse", "deserialize_many_sparse"])
 @deprecation.deprecated_endpoints("deserialize_many_sparse")
 def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   """Deserialize and concatenate `SparseTensors` from a serialized minibatch.
@@ -1786,7 +1795,7 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None):
   return sparse_tensor.SparseTensor(output_indices, output_values, output_shape)
 
 
-@tf_export("sparse.matmul", "sparse_tensor_dense_matmul")
+@tf_export("sparse.matmul", v1=["sparse.matmul", "sparse_tensor_dense_matmul"])
 @deprecation.deprecated_endpoints("sparse_tensor_dense_matmul")
 def sparse_tensor_dense_matmul(sp_a,
                                b,
@@ -2004,7 +2013,7 @@ def sparse_tensor_dense_matmul(sp_a,
         adjoint_b=adjoint_b)
 
 
-@tf_export("sparse.softmax", "sparse_softmax")
+@tf_export("sparse.softmax", v1=["sparse.softmax", "sparse_softmax"])
 @deprecation.deprecated_endpoints("sparse_softmax")
 def sparse_softmax(sp_input, name=None):
   """Applies softmax to a batched N-D `SparseTensor`.
@@ -2060,7 +2069,7 @@ def sparse_softmax(sp_input, name=None):
                                       sp_input.dense_shape)
 
 
-@tf_export("sparse.maximum", "sparse_maximum")
+@tf_export("sparse.maximum", v1=["sparse.maximum", "sparse_maximum"])
 @deprecation.deprecated_endpoints("sparse_maximum")
 def sparse_maximum(sp_a, sp_b, name=None):
   """Returns the element-wise max of two SparseTensors.
@@ -2098,7 +2107,7 @@ def sparse_maximum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse.minimum", "sparse_minimum")
+@tf_export("sparse.minimum", v1=["sparse.minimum", "sparse_minimum"])
 @deprecation.deprecated_endpoints("sparse_minimum")
 def sparse_minimum(sp_a, sp_b, name=None):
   """Returns the element-wise min of two SparseTensors.
@@ -2136,7 +2145,7 @@ def sparse_minimum(sp_a, sp_b, name=None):
   return sparse_tensor.SparseTensor(out_indices, out_values, sp_a.dense_shape)
 
 
-@tf_export("sparse.transpose", "sparse_transpose")
+@tf_export("sparse.transpose", v1=["sparse.transpose", "sparse_transpose"])
 @deprecation.deprecated_endpoints("sparse_transpose")
 def sparse_transpose(sp_input, perm=None, name=None):
   """Transposes a `SparseTensor`
diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py
index cfab943896..e44bafedfc 100644
--- a/tensorflow/python/ops/special_math_ops.py
+++ b/tensorflow/python/ops/special_math_ops.py
@@ -34,7 +34,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 # TODO(b/27419586) Change docstring for required dtype of x once int allowed
-@tf_export('math.lbeta', 'lbeta')
+@tf_export('math.lbeta', v1=['math.lbeta', 'lbeta'])
 @deprecation.deprecated_endpoints('lbeta')
 def lbeta(x, name=None):
   r"""Computes \\(ln(|Beta(x)|)\\), reducing along the last dimension.
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index f26388efea..ed14aa7d90 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -75,7 +75,8 @@ def regex_full_match(input, pattern, name=None):
 regex_full_match.__doc__ = gen_string_ops.regex_full_match.__doc__
 
 
-@tf_export("strings.regex_replace", "regex_replace")
+@tf_export(
+    "strings.regex_replace", v1=["strings.regex_replace", "regex_replace"])
 @deprecation.deprecated_endpoints("regex_replace")
 def regex_replace(input, pattern, rewrite, replace_global=True, name=None):
   r"""Replace elements of `input` matching regex `pattern` with `rewrite`.
@@ -313,7 +314,7 @@ def _reduce_join_reduction_dims(x, axis, reduction_indices):
     return math_ops.range(array_ops.rank(x) - 1, -1, -1)
 
 
-@tf_export("strings.reduce_join", "reduce_join")
+@tf_export("strings.reduce_join", v1=["strings.reduce_join", "reduce_join"])
 @deprecation.deprecated_endpoints("reduce_join")
 def reduce_join(inputs, axis=None,  # pylint: disable=missing-docstring
                 keep_dims=False,
diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py
index 8bf057f69d..8e7bea36de 100644
--- a/tensorflow/python/saved_model/builder_impl.py
+++ b/tensorflow/python/saved_model/builder_impl.py
@@ -40,8 +40,9 @@ from tensorflow.python.util.deprecation import deprecated_endpoints
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("saved_model.Builder",
-           "saved_model.builder.SavedModelBuilder")
+@tf_export(
+    "saved_model.Builder",
+    v1=["saved_model.Builder", "saved_model.builder.SavedModelBuilder"])
 @deprecated_endpoints("saved_model.builder.SavedModelBuilder")
 class SavedModelBuilder(object):
   """Builds the `SavedModel` protocol buffer and saves variables and assets.
diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py
index 895644a030..55ef273fee 100644
--- a/tensorflow/python/saved_model/loader_impl.py
+++ b/tensorflow/python/saved_model/loader_impl.py
@@ -145,8 +145,12 @@ def _get_main_op_tensor(
   return main_op_tensor
 
 
-@tf_export("saved_model.maybe_saved_model_directory",
-           "saved_model.loader.maybe_saved_model_directory")
+@tf_export(
+    "saved_model.maybe_saved_model_directory",
+    v1=[
+        "saved_model.maybe_saved_model_directory",
+        "saved_model.loader.maybe_saved_model_directory"
+    ])
 @deprecation.deprecated_endpoints(
     "saved_model.loader.maybe_saved_model_directory")
 def maybe_saved_model_directory(export_dir):
@@ -169,7 +173,9 @@ def maybe_saved_model_directory(export_dir):
   return file_io.file_exists(txt_path) or file_io.file_exists(pb_path)
 
 
-@tf_export("saved_model.load", "saved_model.loader.load")
+@tf_export("saved_model.load",
+           v1=["saved_model.load", "saved_model.loader.load"])
+@deprecation.deprecated_endpoints("saved_model.loader.load")
 def load(sess, tags, export_dir, import_scope=None, **saver_kwargs):
   """Loads the model from a SavedModel as specified by tags.
 
diff --git a/tensorflow/python/saved_model/main_op_impl.py b/tensorflow/python/saved_model/main_op_impl.py
index ad4511b28e..d567b95795 100644
--- a/tensorflow/python/saved_model/main_op_impl.py
+++ b/tensorflow/python/saved_model/main_op_impl.py
@@ -26,7 +26,8 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('saved_model.main_op.main_op')
+@tf_export('saved_model.main_op', v1=['saved_model.main_op.main_op'])
+@deprecation.deprecated_endpoints('saved_model.main_op.main_op')
 def main_op():
   """Returns a main op to init variables and tables.
 
@@ -43,8 +44,12 @@ def main_op():
 
 
 # TODO(sukritiramesh): Integrate with Saver for complete restore functionality.
-@tf_export('saved_model.main_op_with_restore',
-           'saved_model.main_op.main_op_with_restore')
+@tf_export(
+    'saved_model.main_op_with_restore',
+    v1=[
+        'saved_model.main_op_with_restore',
+        'saved_model.main_op.main_op_with_restore'
+    ])
 @deprecation.deprecated_endpoints('saved_model.main_op.main_op_with_restore')
 def main_op_with_restore(restore_op_name):
   """Returns a main op to init variables, tables and restore the graph.
diff --git a/tensorflow/python/saved_model/signature_def_utils_impl.py b/tensorflow/python/saved_model/signature_def_utils_impl.py
index a1034416e9..6e5e3bc682 100644
--- a/tensorflow/python/saved_model/signature_def_utils_impl.py
+++ b/tensorflow/python/saved_model/signature_def_utils_impl.py
@@ -28,8 +28,12 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('saved_model.build_signature_def',
-           'saved_model.signature_def_utils.build_signature_def')
+@tf_export(
+    'saved_model.build_signature_def',
+    v1=[
+        'saved_model.build_signature_def',
+        'saved_model.signature_def_utils.build_signature_def'
+    ])
 @deprecation.deprecated_endpoints(
     'saved_model.signature_def_utils.build_signature_def')
 def build_signature_def(inputs=None, outputs=None, method_name=None):
@@ -57,8 +61,12 @@ def build_signature_def(inputs=None, outputs=None, method_name=None):
   return signature_def
 
 
-@tf_export('saved_model.regression_signature_def',
-           'saved_model.signature_def_utils.regression_signature_def')
+@tf_export(
+    'saved_model.regression_signature_def',
+    v1=[
+        'saved_model.regression_signature_def',
+        'saved_model.signature_def_utils.regression_signature_def'
+    ])
 @deprecation.deprecated_endpoints(
     'saved_model.signature_def_utils.regression_signature_def')
 def regression_signature_def(examples, predictions):
@@ -102,8 +110,12 @@ def regression_signature_def(examples, predictions):
   return signature_def
 
 
-@tf_export('saved_model.classification_signature_def',
-           'saved_model.signature_def_utils.classification_signature_def')
+@tf_export(
+    'saved_model.classification_signature_def',
+    v1=[
+        'saved_model.classification_signature_def',
+        'saved_model.signature_def_utils.classification_signature_def'
+    ])
 @deprecation.deprecated_endpoints(
     'saved_model.signature_def_utils.classification_signature_def')
 def classification_signature_def(examples, classes, scores):
@@ -158,8 +170,12 @@ def classification_signature_def(examples, classes, scores):
   return signature_def
 
 
-@tf_export('saved_model.predict_signature_def',
-           'saved_model.signature_def_utils.predict_signature_def')
+@tf_export(
+    'saved_model.predict_signature_def',
+    v1=[
+        'saved_model.predict_signature_def',
+        'saved_model.signature_def_utils.predict_signature_def'
+    ])
 @deprecation.deprecated_endpoints(
     'saved_model.signature_def_utils.predict_signature_def')
 def predict_signature_def(inputs, outputs):
@@ -252,8 +268,12 @@ def _supervised_signature_def(
   return signature_def
 
 
-@tf_export('saved_model.is_valid_signature',
-           'saved_model.signature_def_utils.is_valid_signature')
+@tf_export(
+    'saved_model.is_valid_signature',
+    v1=[
+        'saved_model.is_valid_signature',
+        'saved_model.signature_def_utils.is_valid_signature'
+    ])
 @deprecation.deprecated_endpoints(
     'saved_model.signature_def_utils.is_valid_signature')
 def is_valid_signature(signature_def):
diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py
index 0bba7b6fac..b3c27dbd81 100644
--- a/tensorflow/python/saved_model/utils_impl.py
+++ b/tensorflow/python/saved_model/utils_impl.py
@@ -34,8 +34,9 @@ from tensorflow.python.util.tf_export import tf_export
 # TensorInfo helpers.
 
 
-@tf_export("saved_model.build_tensor_info",
-           "saved_model.utils.build_tensor_info")
+@tf_export(
+    "saved_model.build_tensor_info",
+    v1=["saved_model.build_tensor_info", "saved_model.utils.build_tensor_info"])
 @deprecation.deprecated_endpoints("saved_model.utils.build_tensor_info")
 def build_tensor_info(tensor):
   """Utility function to build TensorInfo proto.
@@ -60,8 +61,12 @@ def build_tensor_info(tensor):
   return tensor_info
 
 
-@tf_export("saved_model.get_tensor_from_tensor_info",
-           "saved_model.utils.get_tensor_from_tensor_info")
+@tf_export(
+    "saved_model.get_tensor_from_tensor_info",
+    v1=[
+        "saved_model.get_tensor_from_tensor_info",
+        "saved_model.utils.get_tensor_from_tensor_info"
+    ])
 @deprecation.deprecated_endpoints(
     "saved_model.utils.get_tensor_from_tensor_info")
 def get_tensor_from_tensor_info(tensor_info, graph=None, import_scope=None):
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 533a138a39..dcf33f056b 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -68,20 +68,14 @@ TENSORFLOW_API_INIT_FILES = [
     "nn/__init__.py",
     "nn/rnn_cell/__init__.py",
     "profiler/__init__.py",
-    "python_io/__init__.py",
     "quantization/__init__.py",
     "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
-    "saved_model/builder/__init__.py",
     "saved_model/constants/__init__.py",
-    "saved_model/loader/__init__.py",
-    "saved_model/main_op/__init__.py",
     "saved_model/signature_constants/__init__.py",
-    "saved_model/signature_def_utils/__init__.py",
     "saved_model/tag_constants/__init__.py",
-    "saved_model/utils/__init__.py",
     "sets/__init__.py",
     "sparse/__init__.py",
     "spectral/__init__.py",
diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index eb131ac9f7..94c815dc8c 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -56,7 +56,9 @@ _restore_sparse = sparse_ops._take_many_sparse_from_tensors_map
 # pylint: enable=protected-access
 
 
-@tf_export("io.match_filenames_once", "train.match_filenames_once")
+@tf_export(
+    "io.match_filenames_once",
+    v1=["io.match_filenames_once", "train.match_filenames_once"])
 @deprecation.deprecated_endpoints("train.match_filenames_once")
 def match_filenames_once(pattern, name=None):
   """Save the list of files matching pattern, so it is only computed once.
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-op-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-op-error.pbtxt
deleted file mode 100644
index 7e59615534..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-op-error.pbtxt
+++ /dev/null
@@ -1,29 +0,0 @@
-path: "tensorflow.OpError"
-tf_class {
-  is_instance: "<class \'tensorflow.python.framework.errors_impl.OpError\'>"
-  is_instance: "<type \'exceptions.Exception\'>"
-  member {
-    name: "args"
-    mtype: "<type \'getset_descriptor\'>"
-  }
-  member {
-    name: "error_code"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "message"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "node_def"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "op"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'node_def\', \'op\', \'message\', \'error_code\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-padding-f-i-f-o-queue.pbtxt
deleted file mode 100644
index 8fed133561..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-padding-f-i-f-o-queue.pbtxt
+++ /dev/null
@@ -1,66 +0,0 @@
-path: "tensorflow.PaddingFIFOQueue"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "dtypes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "names"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "queue_ref"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "shapes"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
-  }
-  member_method {
-    name: "close"
-    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "dequeue"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_many"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_up_to"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue_many"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_list"
-    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_closed"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "size"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-priority-queue.pbtxt
deleted file mode 100644
index ebb017e81b..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-priority-queue.pbtxt
+++ /dev/null
@@ -1,66 +0,0 @@
-path: "tensorflow.PriorityQueue"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "dtypes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "names"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "queue_ref"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "shapes"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
-  }
-  member_method {
-    name: "close"
-    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "dequeue"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_many"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_up_to"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue_many"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_list"
-    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_closed"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "size"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-queue-base.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-queue-base.pbtxt
deleted file mode 100644
index 761f90989f..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-queue-base.pbtxt
+++ /dev/null
@@ -1,65 +0,0 @@
-path: "tensorflow.QueueBase"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "dtypes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "names"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "queue_ref"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "shapes"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "close"
-    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "dequeue"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_many"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_up_to"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue_many"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_list"
-    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_closed"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "size"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-random-shuffle-queue.pbtxt
deleted file mode 100644
index f3ca841393..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-random-shuffle-queue.pbtxt
+++ /dev/null
@@ -1,66 +0,0 @@
-path: "tensorflow.RandomShuffleQueue"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "dtypes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "names"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "queue_ref"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "shapes"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
-  }
-  member_method {
-    name: "close"
-    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "dequeue"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_many"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "dequeue_up_to"
-    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "enqueue_many"
-    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "from_list"
-    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_closed"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "size"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-sparse-conditional-accumulator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-sparse-conditional-accumulator.pbtxt
deleted file mode 100644
index 39ff336c4f..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.-sparse-conditional-accumulator.pbtxt
+++ /dev/null
@@ -1,46 +0,0 @@
-path: "tensorflow.SparseConditionalAccumulator"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.SparseConditionalAccumulator\'>"
-  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.ConditionalAccumulatorBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "accumulator_ref"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'dtype\', \'shape\', \'shared_name\', \'name\', \'reduction_type\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'sparse_conditional_accumulator\', \'MEAN\'], "
-  }
-  member_method {
-    name: "apply_grad"
-    argspec: "args=[\'self\', \'grad_indices\', \'grad_values\', \'grad_shape\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], "
-  }
-  member_method {
-    name: "apply_indexed_slices_grad"
-    argspec: "args=[\'self\', \'grad\', \'local_step\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
-  }
-  member_method {
-    name: "num_accumulated"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "set_global_step"
-    argspec: "args=[\'self\', \'new_global_step\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "take_grad"
-    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "take_indexed_slices_grad"
-    argspec: "args=[\'self\', \'num_required\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.glorot_normal_initializer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.glorot_normal_initializer.pbtxt
deleted file mode 100644
index 483d1f8ba0..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.glorot_normal_initializer.pbtxt
+++ /dev/null
@@ -1,19 +0,0 @@
-path: "tensorflow.glorot_normal_initializer"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.init_ops.GlorotNormal\'>"
-  is_instance: "<class \'tensorflow.python.ops.init_ops.VarianceScaling\'>"
-  is_instance: "<class \'tensorflow.python.ops.init_ops.Initializer\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\"], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.manip.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.manip.pbtxt
index 9add462396..d6924d26b9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.manip.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.manip.pbtxt
@@ -16,10 +16,6 @@ tf_module {
     name: "reverse"
     argspec: "args=[\'tensor\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "roll"
-    argspec: "args=[\'input\', \'shift\', \'axis\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "scatter_nd"
     argspec: "args=[\'indices\', \'updates\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index f6c5e42034..5b3f13653f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -196,10 +196,6 @@ tf_module {
     name: "log_softmax"
     argspec: "args=[\'logits\', \'axis\', \'name\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "log_uniform_candidate_sampler"
-    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "lrn"
     argspec: "args=[\'input\', \'depth_radius\', \'bias\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'5\', \'1\', \'1\', \'0.5\', \'None\'], "
@@ -344,10 +340,6 @@ tf_module {
     name: "top_k"
     argspec: "args=[\'input\', \'k\', \'sorted\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'True\', \'None\'], "
   }
-  member_method {
-    name: "uniform_candidate_sampler"
-    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "weighted_cross_entropy_with_logits"
     argspec: "args=[\'targets\', \'logits\', \'pos_weight\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.orthogonal_initializer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.orthogonal_initializer.pbtxt
deleted file mode 100644
index 13ec7454f4..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.orthogonal_initializer.pbtxt
+++ /dev/null
@@ -1,18 +0,0 @@
-path: "tensorflow.orthogonal_initializer"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.init_ops.Orthogonal\'>"
-  is_instance: "<class \'tensorflow.python.ops.init_ops.Initializer\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'gain\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'None\', \"<dtype: \'float32\'>\"], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 978afcf985..37e8e654b7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -128,10 +128,6 @@ tf_module {
     name: "NodeDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "OpError"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "Operation"
     mtype: "<type \'type\'>"
@@ -140,26 +136,10 @@ tf_module {
     name: "OptimizerOptions"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "PaddingFIFOQueue"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "PriorityQueue"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "QUANTIZED_DTYPES"
     mtype: "<type \'frozenset\'>"
   }
-  member {
-    name: "QueueBase"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "RandomShuffleQueue"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "RegisterGradient"
     mtype: "<type \'type\'>"
@@ -180,10 +160,6 @@ tf_module {
     name: "SessionLog"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "SparseConditionalAccumulator"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "SparseFeature"
     mtype: "<type \'type\'>"
@@ -328,10 +304,6 @@ tf_module {
     name: "gfile"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "glorot_normal_initializer"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "glorot_uniform_initializer"
     mtype: "<type \'type\'>"
@@ -420,18 +392,10 @@ tf_module {
     name: "ones_initializer"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "orthogonal_initializer"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "profiler"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "python_io"
-    mtype: "<type \'module\'>"
-  }
   member {
     name: "pywrap_tensorflow"
     mtype: "<type \'module\'>"
@@ -540,18 +504,10 @@ tf_module {
     name: "uint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
-  member {
-    name: "uniform_unit_scaling_initializer"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "user_ops"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "variance_scaling_initializer"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "variant"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
@@ -576,10 +532,6 @@ tf_module {
     name: "abs"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "accumulate_n"
-    argspec: "args=[\'inputs\', \'shape\', \'tensor_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
   member_method {
     name: "acos"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -608,10 +560,6 @@ tf_module {
     name: "add_to_collections"
     argspec: "args=[\'names\', \'value\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "angle"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "arg_max"
     argspec: "args=[\'input\', \'dimension\', \'output_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
@@ -652,74 +600,14 @@ tf_module {
     name: "assert_greater"
     argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "assert_greater_equal"
-    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_integer"
-    argspec: "args=[\'x\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "assert_less"
     argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "assert_less_equal"
-    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_near"
-    argspec: "args=[\'x\', \'y\', \'rtol\', \'atol\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_negative"
-    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_non_negative"
-    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_non_positive"
-    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_none_equal"
-    argspec: "args=[\'x\', \'y\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_positive"
-    argspec: "args=[\'x\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_proper_iterable"
-    argspec: "args=[\'values\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "assert_rank"
     argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "assert_rank_at_least"
-    argspec: "args=[\'x\', \'rank\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_rank_in"
-    argspec: "args=[\'x\', \'ranks\', \'data\', \'summarize\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_same_float_dtype"
-    argspec: "args=[\'tensors\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "assert_scalar"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "assert_type"
-    argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "atan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -752,10 +640,6 @@ tf_module {
     name: "betainc"
     argspec: "args=[\'a\', \'b\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "bincount"
-    argspec: "args=[\'arr\', \'weights\', \'minlength\', \'maxlength\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \"<dtype: \'int32\'>\"], "
-  }
   member_method {
     name: "bitcast"
     argspec: "args=[\'input\', \'type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -796,10 +680,6 @@ tf_module {
     name: "cholesky"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "cholesky_solve"
-    argspec: "args=[\'chol\', \'rhs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "clip_by_average_norm"
     argspec: "args=[\'t\', \'clip_norm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -832,14 +712,6 @@ tf_module {
     name: "cond"
     argspec: "args=[\'pred\', \'true_fn\', \'false_fn\', \'strict\', \'name\', \'fn1\', \'fn2\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "confusion_matrix"
-    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
-  }
-  member_method {
-    name: "conj"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "constant"
     argspec: "args=[\'value\', \'dtype\', \'shape\', \'name\', \'verify_shape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'Const\', \'False\'], "
@@ -884,10 +756,6 @@ tf_module {
     name: "cross"
     argspec: "args=[\'a\', \'b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "cumprod"
-    argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
-  }
   member_method {
     name: "cumsum"
     argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], "
@@ -904,10 +772,6 @@ tf_module {
     name: "decode_compressed"
     argspec: "args=[\'bytes\', \'compression_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'None\'], "
   }
-  member_method {
-    name: "decode_csv"
-    argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\', \'select_cols\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\', \'None\'], "
-  }
   member_method {
     name: "decode_json_example"
     argspec: "args=[\'json_examples\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -920,18 +784,10 @@ tf_module {
     name: "delete_session_tensor"
     argspec: "args=[\'handle\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "depth_to_space"
-    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
-  }
   member_method {
     name: "dequantize"
     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'None\'], "
   }
-  member_method {
-    name: "deserialize_many_sparse"
-    argspec: "args=[\'serialized_sparse\', \'dtype\', \'rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "device"
     argspec: "args=[\'device_name_or_function\'], varargs=None, keywords=None, defaults=None"
@@ -992,10 +848,6 @@ tf_module {
     name: "equal"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "erf"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "erfc"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1080,10 +932,6 @@ tf_module {
     name: "floor_div"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "floordiv"
-    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "floormod"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1120,10 +968,6 @@ tf_module {
     name: "get_default_session"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_seed"
-    argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_session_handle"
     argspec: "args=[\'data\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1132,10 +976,6 @@ tf_module {
     name: "get_session_tensor"
     argspec: "args=[\'handle\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "global_norm"
-    argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "gradients"
     argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\', \'unconnected_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\', \'UnconnectedGradients.NONE\'], "
@@ -1196,10 +1036,6 @@ tf_module {
     name: "igammac"
     argspec: "args=[\'a\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "imag"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "import_graph_def"
     argspec: "args=[\'graph_def\', \'input_map\', \'return_elements\', \'name\', \'op_dict\', \'producer_op_list\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
@@ -1228,22 +1064,6 @@ tf_module {
     name: "is_nan"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "is_non_decreasing"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "is_numeric_tensor"
-    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_strictly_increasing"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "lbeta"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "less"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1284,10 +1104,6 @@ tf_module {
     name: "log1p"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "log_sigmoid"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "logical_and"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1300,10 +1116,6 @@ tf_module {
     name: "logical_or"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "logical_xor"
-    argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'LogicalXor\'], "
-  }
   member_method {
     name: "make_ndarray"
     argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
@@ -1356,14 +1168,6 @@ tf_module {
     name: "matrix_solve"
     argspec: "args=[\'matrix\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
-  member_method {
-    name: "matrix_solve_ls"
-    argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "matrix_transpose"
-    argspec: "args=[\'a\', \'name\', \'conjugate\'], varargs=None, keywords=None, defaults=[\'matrix_transpose\', \'False\'], "
-  }
   member_method {
     name: "matrix_triangular_solve"
     argspec: "args=[\'matrix\', \'rhs\', \'lower\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'False\', \'None\'], "
@@ -1484,10 +1288,6 @@ tf_module {
     name: "qr"
     argspec: "args=[\'input\', \'full_matrices\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
-  member_method {
-    name: "quantize"
-    argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
-  }
   member_method {
     name: "quantize_v2"
     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'name\', \'round_mode\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'None\', \'HALF_AWAY_FROM_ZERO\'], "
@@ -1500,18 +1300,10 @@ tf_module {
     name: "random_crop"
     argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
-  member_method {
-    name: "random_gamma"
-    argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
-  }
   member_method {
     name: "random_normal"
     argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
-  member_method {
-    name: "random_poisson"
-    argspec: "args=[\'lam\', \'shape\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\', \'None\'], "
-  }
   member_method {
     name: "random_shuffle"
     argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
@@ -1532,10 +1324,6 @@ tf_module {
     name: "read_file"
     argspec: "args=[\'filename\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "real"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "realdiv"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1552,10 +1340,6 @@ tf_module {
     name: "reduce_any"
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "reduce_join"
-    argspec: "args=[\'inputs\', \'axis\', \'keep_dims\', \'separator\', \'name\', \'reduction_indices\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'\', \'None\', \'None\'], "
-  }
   member_method {
     name: "reduce_logsumexp"
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
@@ -1580,10 +1364,6 @@ tf_module {
     name: "reduce_sum"
     argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "regex_replace"
-    argspec: "args=[\'input\', \'pattern\', \'rewrite\', \'replace_global\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
-  }
   member_method {
     name: "register_tensor_conversion_function"
     argspec: "args=[\'base_type\', \'conversion_func\', \'priority\'], varargs=None, keywords=None, defaults=[\'100\'], "
@@ -1684,26 +1464,10 @@ tf_module {
     name: "segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "self_adjoint_eig"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "self_adjoint_eigvals"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "sequence_mask"
     argspec: "args=[\'lengths\', \'maxlen\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'bool\'>\", \'None\'], "
   }
-  member_method {
-    name: "serialize_many_sparse"
-    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
-  }
-  member_method {
-    name: "serialize_sparse"
-    argspec: "args=[\'sp_input\', \'name\', \'out_type\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'string\'>\"], "
-  }
   member_method {
     name: "serialize_tensor"
     argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1748,54 +1512,18 @@ tf_module {
     name: "slice"
     argspec: "args=[\'input_\', \'begin\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "space_to_batch"
-    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "space_to_batch_nd"
     argspec: "args=[\'input\', \'block_shape\', \'paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "space_to_depth"
-    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
-  }
-  member_method {
-    name: "sparse_add"
-    argspec: "args=[\'a\', \'b\', \'thresh\'], varargs=None, keywords=None, defaults=[\'0\'], "
-  }
   member_method {
     name: "sparse_concat"
     argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], "
   }
-  member_method {
-    name: "sparse_fill_empty_rows"
-    argspec: "args=[\'sp_input\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_mask"
-    argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "sparse_matmul"
     argspec: "args=[\'a\', \'b\', \'transpose_a\', \'transpose_b\', \'a_is_sparse\', \'b_is_sparse\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'None\'], "
   }
-  member_method {
-    name: "sparse_maximum"
-    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_merge"
-    argspec: "args=[\'sp_ids\', \'sp_values\', \'vocab_size\', \'name\', \'already_sorted\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], "
-  }
-  member_method {
-    name: "sparse_minimum"
-    argspec: "args=[\'sp_a\', \'sp_b\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_placeholder"
-    argspec: "args=[\'dtype\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "sparse_reduce_max"
     argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
@@ -1812,66 +1540,14 @@ tf_module {
     name: "sparse_reduce_sum_sparse"
     argspec: "args=[\'sp_input\', \'axis\', \'keepdims\', \'reduction_axes\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "sparse_reorder"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_reset_shape"
-    argspec: "args=[\'sp_input\', \'new_shape\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_reshape"
-    argspec: "args=[\'sp_input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_retain"
-    argspec: "args=[\'sp_input\', \'to_retain\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "sparse_segment_mean"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "sparse_segment_sqrt_n"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "sparse_segment_sum"
-    argspec: "args=[\'data\', \'indices\', \'segment_ids\', \'name\', \'num_segments\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "sparse_slice"
-    argspec: "args=[\'sp_input\', \'start\', \'size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_softmax"
-    argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "sparse_split"
     argspec: "args=[\'keyword_required\', \'sp_input\', \'num_split\', \'axis\', \'name\', \'split_dim\'], varargs=None, keywords=None, defaults=[\'KeywordRequired()\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
-  member_method {
-    name: "sparse_tensor_dense_matmul"
-    argspec: "args=[\'sp_a\', \'b\', \'adjoint_a\', \'adjoint_b\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
-  }
-  member_method {
-    name: "sparse_tensor_to_dense"
-    argspec: "args=[\'sp_input\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
-  }
   member_method {
     name: "sparse_to_dense"
     argspec: "args=[\'sparse_indices\', \'output_shape\', \'sparse_values\', \'default_value\', \'validate_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'True\', \'None\'], "
   }
-  member_method {
-    name: "sparse_to_indicator"
-    argspec: "args=[\'sp_input\', \'vocab_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "sparse_transpose"
-    argspec: "args=[\'sp_input\', \'perm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "split"
     argspec: "args=[\'value\', \'num_or_size_splits\', \'axis\', \'num\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'split\'], "
@@ -1940,10 +1616,6 @@ tf_module {
     name: "subtract"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "svd"
-    argspec: "args=[\'tensor\', \'full_matrices\', \'compute_uv\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], "
-  }
   member_method {
     name: "tables_initializer"
     argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], "
@@ -1996,10 +1668,6 @@ tf_module {
     name: "to_int64"
     argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'ToInt64\'], "
   }
-  member_method {
-    name: "trace"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "transpose"
     argspec: "args=[\'a\', \'perm\', \'name\', \'conjugate\'], varargs=None, keywords=None, defaults=[\'None\', \'transpose\', \'False\'], "
@@ -2040,10 +1708,6 @@ tf_module {
     name: "unsorted_segment_max"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "unsorted_segment_mean"
-    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "unsorted_segment_min"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -2052,10 +1716,6 @@ tf_module {
     name: "unsorted_segment_prod"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "unsorted_segment_sqrt_n"
-    argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "unsorted_segment_sum"
     argspec: "args=[\'data\', \'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -2068,10 +1728,6 @@ tf_module {
     name: "variable_axis_size_partitioner"
     argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], "
   }
-  member_method {
-    name: "verify_tensor_all_finite"
-    argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "where"
     argspec: "args=[\'condition\', \'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-compression-type.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-compression-type.pbtxt
deleted file mode 100644
index 4941dda50e..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-compression-type.pbtxt
+++ /dev/null
@@ -1,20 +0,0 @@
-path: "tensorflow.python_io.TFRecordCompressionType"
-tf_class {
-  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordCompressionType\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "GZIP"
-    mtype: "<type \'int\'>"
-  }
-  member {
-    name: "NONE"
-    mtype: "<type \'int\'>"
-  }
-  member {
-    name: "ZLIB"
-    mtype: "<type \'int\'>"
-  }
-  member_method {
-    name: "__init__"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-options.pbtxt
deleted file mode 100644
index 614ba42d3e..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-options.pbtxt
+++ /dev/null
@@ -1,17 +0,0 @@
-path: "tensorflow.python_io.TFRecordOptions"
-tf_class {
-  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordOptions\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "compression_type_map"
-    mtype: "<type \'dict\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'compression_type\', \'flush_mode\', \'input_buffer_size\', \'output_buffer_size\', \'window_bits\', \'compression_level\', \'compression_method\', \'mem_level\', \'compression_strategy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "get_compression_type_string"
-    argspec: "args=[\'cls\', \'options\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-writer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-writer.pbtxt
deleted file mode 100644
index 31775de2d1..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.python_io.-t-f-record-writer.pbtxt
+++ /dev/null
@@ -1,21 +0,0 @@
-path: "tensorflow.python_io.TFRecordWriter"
-tf_class {
-  is_instance: "<class \'tensorflow.python.lib.io.tf_record.TFRecordWriter\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "close"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "flush"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "write"
-    argspec: "args=[\'self\', \'record\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.python_io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.python_io.pbtxt
deleted file mode 100644
index 7c9953e5fe..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.python_io.pbtxt
+++ /dev/null
@@ -1,19 +0,0 @@
-path: "tensorflow.python_io"
-tf_module {
-  member {
-    name: "TFRecordCompressionType"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TFRecordOptions"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TFRecordWriter"
-    mtype: "<type \'type\'>"
-  }
-  member_method {
-    name: "tf_record_iterator"
-    argspec: "args=[\'path\', \'options\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.-saved-model-builder.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
deleted file mode 100644
index 83bd703540..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.-saved-model-builder.pbtxt
+++ /dev/null
@@ -1,21 +0,0 @@
-path: "tensorflow.saved_model.builder.SavedModelBuilder"
-tf_class {
-  is_instance: "<class \'tensorflow.python.saved_model.builder_impl.SavedModelBuilder\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'export_dir\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "add_meta_graph"
-    argspec: "args=[\'self\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
-  }
-  member_method {
-    name: "add_meta_graph_and_variables"
-    argspec: "args=[\'self\', \'sess\', \'tags\', \'signature_def_map\', \'assets_collection\', \'legacy_init_op\', \'clear_devices\', \'main_op\', \'strip_default_attrs\', \'saver\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\', \'None\', \'False\', \'None\'], "
-  }
-  member_method {
-    name: "save"
-    argspec: "args=[\'self\', \'as_text\'], varargs=None, keywords=None, defaults=[\'False\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.pbtxt
deleted file mode 100644
index adc697ad1c..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.builder.pbtxt
+++ /dev/null
@@ -1,7 +0,0 @@
-path: "tensorflow.saved_model.builder"
-tf_module {
-  member {
-    name: "SavedModelBuilder"
-    mtype: "<type \'type\'>"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.loader.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.loader.pbtxt
deleted file mode 100644
index 511e6b4712..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.loader.pbtxt
+++ /dev/null
@@ -1,11 +0,0 @@
-path: "tensorflow.saved_model.loader"
-tf_module {
-  member_method {
-    name: "load"
-    argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "maybe_saved_model_directory"
-    argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.main_op.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.main_op.pbtxt
deleted file mode 100644
index 176cb788c2..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.main_op.pbtxt
+++ /dev/null
@@ -1,11 +0,0 @@
-path: "tensorflow.saved_model.main_op"
-tf_module {
-  member_method {
-    name: "main_op"
-    argspec: "args=[], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "main_op_with_restore"
-    argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
index 3f4965fc69..a95ab4a3bc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
@@ -4,38 +4,18 @@ tf_module {
     name: "Builder"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "builder"
-    mtype: "<type \'module\'>"
-  }
   member {
     name: "constants"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "loader"
-    mtype: "<type \'module\'>"
-  }
-  member {
-    name: "main_op"
-    mtype: "<type \'module\'>"
-  }
   member {
     name: "signature_constants"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "signature_def_utils"
-    mtype: "<type \'module\'>"
-  }
   member {
     name: "tag_constants"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "utils"
-    mtype: "<type \'module\'>"
-  }
   member_method {
     name: "build_signature_def"
     argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
@@ -60,6 +40,10 @@ tf_module {
     name: "load"
     argspec: "args=[\'sess\', \'tags\', \'export_dir\', \'import_scope\'], varargs=None, keywords=saver_kwargs, defaults=[\'None\'], "
   }
+  member_method {
+    name: "main_op"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "main_op_with_restore"
     argspec: "args=[\'restore_op_name\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_def_utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_def_utils.pbtxt
deleted file mode 100644
index a5602464ee..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_def_utils.pbtxt
+++ /dev/null
@@ -1,23 +0,0 @@
-path: "tensorflow.saved_model.signature_def_utils"
-tf_module {
-  member_method {
-    name: "build_signature_def"
-    argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "classification_signature_def"
-    argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "is_valid_signature"
-    argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "predict_signature_def"
-    argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "regression_signature_def"
-    argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.utils.pbtxt
deleted file mode 100644
index d95c946682..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.utils.pbtxt
+++ /dev/null
@@ -1,11 +0,0 @@
-path: "tensorflow.saved_model.utils"
-tf_module {
-  member_method {
-    name: "build_tensor_info"
-    argspec: "args=[\'tensor\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_tensor_from_tensor_info"
-    argspec: "args=[\'tensor_info\', \'graph\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index 7e980fe44d..82445eb18d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -332,10 +332,6 @@ tf_module {
     name: "load_variable"
     argspec: "args=[\'ckpt_dir_or_file\', \'name\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "match_filenames_once"
-    argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "natural_exp_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'decay_rate\', \'staircase\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.uniform_unit_scaling_initializer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.uniform_unit_scaling_initializer.pbtxt
deleted file mode 100644
index e1b18dc92f..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.uniform_unit_scaling_initializer.pbtxt
+++ /dev/null
@@ -1,18 +0,0 @@
-path: "tensorflow.uniform_unit_scaling_initializer"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.init_ops.UniformUnitScaling\'>"
-  is_instance: "<class \'tensorflow.python.ops.init_ops.Initializer\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'factor\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'None\', \"<dtype: \'float32\'>\"], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.variance_scaling_initializer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.variance_scaling_initializer.pbtxt
deleted file mode 100644
index 09d7bc03b4..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.variance_scaling_initializer.pbtxt
+++ /dev/null
@@ -1,18 +0,0 @@
-path: "tensorflow.variance_scaling_initializer"
-tf_class {
-  is_instance: "<class \'tensorflow.python.ops.init_ops.VarianceScaling\'>"
-  is_instance: "<class \'tensorflow.python.ops.init_ops.Initializer\'>"
-  is_instance: "<type \'object\'>"
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], "
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-}
-- 
GitLab


From a3349b208b8c91cebd7a1ab4c8ccd7fa37c76d47 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 17:18:43 -0700
Subject: [PATCH 0830/1085] Internal change.

PiperOrigin-RevId: 216788277
---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 776d1ecc25..51e024b197 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -641,7 +641,7 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
     ],
-    tags = ["notap"],
+    tags = ["optonly"],
 )
 
 cuda_py_test(
-- 
GitLab


From af09da939381a0a8e2a9413a4a8a188b8d1cd4d0 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Thu, 11 Oct 2018 17:30:07 -0700
Subject: [PATCH 0831/1085] Fix bug in MklSlice op when allocating output
 tensor. (#22914)

Wrongly "+1" for output shape, that will cause CopyFrom failure in MklToTf op because of tensor size and shape mismatch.
---
 tensorflow/core/kernels/mkl_slice_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index d63e14adf6..85cabeb92b 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -327,7 +327,7 @@ class MklDnnSliceOp : public OpKernel {
       output_mkl_shape->SetTfLayout(input_mkl_shape.GetDimension(), output_dims,
                                     input_mkl_shape.GetTfDataFormat());
 
-      output_tf_shape.AddDim((output_pd->get_size() / sizeof(T)) + 1);
+      output_tf_shape.AddDim(output_pd->get_size() / sizeof(T));
     } else {
       // If input is not in Mkl layout, then output won't be in Mkl layout.
       output_mkl_shape->SetMklTensor(false);
-- 
GitLab


From 65111abb76a41760abd1460be5724e6cfcf16e51 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 11 Oct 2018 17:21:05 -0700
Subject: [PATCH 0832/1085] Correctly check for while loops in
 AutomaticControlDependencies.

AutomaticControlDependencies isn't implemented for v1 while_loops, but
didn't check parent control flow contexts for WhileLoopContexts.

PiperOrigin-RevId: 216788530
---
 tensorflow/python/eager/function.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 7b97d3f6f1..8b4efef0be 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -46,6 +46,7 @@ from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
@@ -1919,7 +1920,7 @@ class AutomaticControlDependencies(object):
     # this.
     for op in new_operations:
       # TODO(apassos) make this code safely support while loops.
-      if isinstance(op._control_flow_context, control_flow_ops.WhileContext):  # pylint: disable=protected-access
+      if control_flow_util.IsInWhileLoop(op):
         continue
       control_inputs = set()
       # Ensure stateful ops run
-- 
GitLab


From 988fae336c9146b1534a750edcd3b4905f207814 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Thu, 11 Oct 2018 17:35:13 -0700
Subject: [PATCH 0833/1085] Minor change on testNonExistingDirectory

---
 .../python/data/kernel_tests/matching_files_dataset_op_test.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index d811844cae..2a60b653d2 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -48,7 +48,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
   def testNonExistingDirectory(self):
     """Test the MatchingFiles dataset with a non-existing directory"""
 
-    self.tearDown()
+    self.tmp_dir = os.path.join(self.tmp_dir, "nonexistingdir")
     dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
-- 
GitLab


From cbd462590ae0c27b6209079ccfa8bbc2007b9dc0 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 11 Oct 2018 17:21:42 -0700
Subject: [PATCH 0834/1085] [tf.data] Add an experimental dataset that
 introduces a sleep into the pipeline.

This dataset will enable us to experiment with "delay scheduling", which could lessen the interference between input pipeline work and critical path activities (such as launching GPU kernels, etc.).

PiperOrigin-RevId: 216788581
---
 .../api_def_ExperimentalSleepDataset.pbtxt    |   4 +
 .../core/kernels/data/experimental/BUILD      |  10 ++
 .../data/experimental/sleep_dataset_op.cc     | 134 ++++++++++++++++++
 .../core/ops/experimental_dataset_ops.cc      |  14 ++
 .../data/experimental/kernel_tests/BUILD      |  13 ++
 .../experimental/kernel_tests/sleep_test.py   |  52 +++++++
 tensorflow/python/data/experimental/ops/BUILD |  11 ++
 .../python/data/experimental/ops/sleep.py     |  66 +++++++++
 8 files changed, 304 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalSleepDataset.pbtxt
 create mode 100644 tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/sleep_test.py
 create mode 100644 tensorflow/python/data/experimental/ops/sleep.py

diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalSleepDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalSleepDataset.pbtxt
new file mode 100644
index 0000000000..9caff54394
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalSleepDataset.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ExperimentalSleepDataset"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 4cf5643bc0..441bdc2898 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -139,6 +139,15 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "sleep_dataset_op",
+    srcs = ["sleep_dataset_op.cc"],
+    deps = [
+        "//tensorflow/core:experimental_dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+    ],
+)
+
 tf_kernel_library(
     name = "dataset_kernels",
     deps = [
@@ -150,6 +159,7 @@ tf_kernel_library(
         ":lmdb_dataset_op",
         ":numa_map_and_batch_dataset_op",
         ":prefetching_kernels",
+        ":sleep_dataset_op",
         ":threadpool_dataset_op",
         ":unique_dataset_op",
     ],
diff --git a/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc b/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc
new file mode 100644
index 0000000000..fba63056be
--- /dev/null
+++ b/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc
@@ -0,0 +1,134 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+namespace data {
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class SleepDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  using UnaryDatasetOpKernel::UnaryDatasetOpKernel;
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    int64 sleep_microseconds;
+    OP_REQUIRES_OK(ctx, ParseScalarArgument<int64>(ctx, "sleep_microseconds",
+                                                   &sleep_microseconds));
+
+    OP_REQUIRES(ctx, sleep_microseconds >= 0,
+                errors::InvalidArgument("`sleep_microseconds` must be >= 0"));
+
+    *output = new Dataset(ctx, input, sleep_microseconds);
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(OpKernelContext* ctx, const DatasetBase* input,
+            int64 sleep_microseconds)
+        : DatasetBase(DatasetContext(ctx)),
+          input_(input),
+          sleep_microseconds_(sleep_microseconds) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr<IteratorBase> MakeIteratorInternal(
+        const string& prefix) const override {
+      return MakeUnique<Iterator>(
+          Iterator::Params{this, strings::StrCat(prefix, "::Sleep")});
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return input_->output_dtypes();
+    }
+    const std::vector<PartialTensorShape>& output_shapes() const override {
+      return input_->output_shapes();
+    }
+
+    string DebugString() const override { return "SleepDatasetOp::Dataset"; }
+
+   protected:
+    Status AsGraphDefInternal(SerializationContext* ctx,
+                              DatasetGraphDefBuilder* b,
+                              Node** output) const override {
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+
+      Node* sleep_microseconds = nullptr;
+      TF_RETURN_IF_ERROR(
+          b->AddScalar(sleep_microseconds_, &sleep_microseconds));
+
+      return b->AddDataset(this,
+                           {{0, input_graph_node},
+                            {1, sleep_microseconds}},  // Single tensor inputs.
+                           {},                         // Tensor list inputs.
+                           {},                         // Attrs
+                           output);
+    }
+
+   private:
+    class Iterator : public DatasetIterator<Dataset> {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator<Dataset>(params) {}
+
+      Status Initialize(IteratorContext* ctx) override {
+        return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_);
+      }
+
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector<Tensor>* out_tensors,
+                             bool* end_of_sequence) override {
+        RecordStop(ctx);
+        ctx->env()->SleepForMicroseconds(dataset()->sleep_microseconds_);
+        RecordStart(ctx);
+        return input_impl_->GetNext(ctx, out_tensors, end_of_sequence);
+      }
+
+     protected:
+      Status SaveInternal(IteratorStateWriter* writer) override {
+        return SaveInput(writer, input_impl_);
+      }
+
+      Status RestoreInternal(IteratorContext* ctx,
+                             IteratorStateReader* reader) override {
+        return RestoreInput(ctx, reader, input_impl_);
+      }
+
+     private:
+      std::unique_ptr<IteratorBase> input_impl_;
+    };
+
+    const DatasetBase* const input_;
+    // TODO(b/117612213): Investigate autotuning for this value.
+    const int64 sleep_microseconds_;
+  };
+};
+
+REGISTER_KERNEL_BUILDER(Name("ExperimentalSleepDataset").Device(DEVICE_CPU),
+                        SleepDatasetOp);
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index bbbecc50f8..d077954f9e 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -75,6 +75,20 @@ REGISTER_OP("ExperimentalIgnoreErrorsDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP("ExperimentalSleepDataset")
+    .Input("input_dataset: variant")
+    .Input("sleep_microseconds: int64")
+    .Output("handle: variant")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      shape_inference::ShapeHandle unused;
+      // Both inputs are scalar.
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(1), 0, &unused));
+      return shape_inference::ScalarShape(c);
+    });
+
 REGISTER_OP("ExperimentalUniqueDataset")
     .Input("input_dataset: variant")
     .Output("handle: variant")
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 4eef9580ad..e9fda2f6b5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -565,6 +565,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "sleep_test",
+    srcs = ["sleep_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/experimental/ops:sleep",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_library(
     name = "sql_dataset_test_base",
     srcs = ["sql_dataset_test_base.py"],
diff --git a/tensorflow/python/data/experimental/kernel_tests/sleep_test.py b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
new file mode 100644
index 0000000000..bf53acc82a
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
@@ -0,0 +1,52 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.experimental.sleep()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from tensorflow.python.data.experimental.ops import sleep
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import test
+
+_NUMPY_RANDOM_SEED = 42
+
+
+class SleepTest(test_base.DatasetTestBase):
+
+  def testSleep(self):
+    sleep_microseconds = 100
+    dataset = dataset_ops.Dataset.range(10).apply(
+        sleep.sleep(sleep_microseconds))
+    iterator = dataset.make_initializable_iterator()
+    next_element = iterator.get_next()
+
+    with self.cached_session() as sess:
+      sess.run(iterator.initializer)
+      start_time = time.time()
+      for i in range(10):
+        self.assertEqual(i, sess.run(next_element))
+      end_time = time.time()
+      self.assertGreater(end_time - start_time, (10 * sleep_microseconds) / 1e6)
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(next_element)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD
index 46a9552b61..d069ae7293 100644
--- a/tensorflow/python/data/experimental/ops/BUILD
+++ b/tensorflow/python/data/experimental/ops/BUILD
@@ -268,6 +268,16 @@ py_library(
     ],
 )
 
+py_library(
+    name = "sleep",
+    srcs = ["sleep.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:experimental_dataset_ops_gen",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
+
 py_library(
     name = "stats_ops",
     srcs = ["stats_ops.py"],
@@ -366,6 +376,7 @@ py_library(
         ":resampling",
         ":scan_ops",
         ":shuffle_ops",
+        ":sleep",
         ":stats_ops",
         ":threadpool",
         ":unique",
diff --git a/tensorflow/python/data/experimental/ops/sleep.py b/tensorflow/python/data/experimental/ops/sleep.py
new file mode 100644
index 0000000000..7e7d370f70
--- /dev/null
+++ b/tensorflow/python/data/experimental/ops/sleep.py
@@ -0,0 +1,66 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental API for controlling threading in `tf.data` pipelines."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.ops import gen_experimental_dataset_ops
+
+
+class _SleepDataset(dataset_ops.UnaryDataset):
+  """A `Dataset` that sleeps before producing each upstream element."""
+
+  def __init__(self, input_dataset, sleep_microseconds):
+    super(_SleepDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
+    self._sleep_microseconds = sleep_microseconds
+
+  def _as_variant_tensor(self):
+    return gen_experimental_dataset_ops.experimental_sleep_dataset(
+        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        self._sleep_microseconds,
+        **dataset_ops.flat_structure(self))
+
+  @property
+  def output_shapes(self):
+    return self._input_dataset.output_shapes
+
+  @property
+  def output_types(self):
+    return self._input_dataset.output_types
+
+  @property
+  def output_classes(self):
+    return self._input_dataset.output_classes
+
+
+def sleep(sleep_microseconds):
+  """Sleeps for `sleep_microseconds` before producing each input element.
+
+  Args:
+    sleep_microseconds: The number of microseconds to sleep before producing an
+      input element.
+
+  Returns:
+    A `Dataset` transformation function, which can be passed to
+    `tf.data.Dataset.apply`.
+  """
+
+  def _apply_fn(dataset):
+    return _SleepDataset(dataset, sleep_microseconds)
+
+  return _apply_fn
-- 
GitLab


From 7c2fb35f1ae4fcd74199757bccafb1702221352a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 17:46:21 -0700
Subject: [PATCH 0835/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216791162

---
 tensorflow/go/op/wrappers.go | 144 +++++++++++++++++------------------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index c6ecd75587..109da6e5c9 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4615,6 +4615,78 @@ func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output,
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
+// CTCLossAttr is an optional argument to CTCLoss.
+type CTCLossAttr func(optionalAttr)
+
+// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
+//
+// value: Scalar, if true then repeated labels are
+// collapsed prior to the CTC calculation.
+// If not specified, defaults to false
+func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["preprocess_collapse_repeated"] = value
+	}
+}
+
+// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
+//
+// value: Scalar.  If set to false, *during* CTC calculation
+// repeated non-blank labels will not be merged and are interpreted as
+// individual labels.  This is a simplified version of CTC.
+// If not specified, defaults to true
+func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ctc_merge_repeated"] = value
+	}
+}
+
+// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
+//
+// value: Scalar. If set to true, during CTC
+// calculation, items that have longer output sequences than input sequences
+// are skipped: they don't contribute to the loss term and have zero-gradient.
+// If not specified, defaults to false
+func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ignore_longer_outputs_than_inputs"] = value
+	}
+}
+
+// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+//
+// the gradient.  This class performs the softmax operation for you, so inputs
+// should be e.g. linear projections of outputs by an LSTM.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
+// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+// `(batch b, time t)`.
+//	labels_values: The values (labels) associated with the given batch and time.
+//	sequence_length: A vector containing sequence lengths (batch).
+//
+// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
+// `(max_time x batch_size x num_classes)`.
+func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCLoss",
+		Input: []tf.Input{
+			inputs, labels_indices, labels_values, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -33086,75 +33158,3 @@ func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.
 	}
 	return weights, biases
 }
-
-// CTCLossAttr is an optional argument to CTCLoss.
-type CTCLossAttr func(optionalAttr)
-
-// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
-//
-// value: Scalar, if true then repeated labels are
-// collapsed prior to the CTC calculation.
-// If not specified, defaults to false
-func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["preprocess_collapse_repeated"] = value
-	}
-}
-
-// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
-//
-// value: Scalar.  If set to false, *during* CTC calculation
-// repeated non-blank labels will not be merged and are interpreted as
-// individual labels.  This is a simplified version of CTC.
-// If not specified, defaults to true
-func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ctc_merge_repeated"] = value
-	}
-}
-
-// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
-//
-// value: Scalar. If set to true, during CTC
-// calculation, items that have longer output sequences than input sequences
-// are skipped: they don't contribute to the loss term and have zero-gradient.
-// If not specified, defaults to false
-func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ignore_longer_outputs_than_inputs"] = value
-	}
-}
-
-// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-//
-// the gradient.  This class performs the softmax operation for you, so inputs
-// should be e.g. linear projections of outputs by an LSTM.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
-// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-// `(batch b, time t)`.
-//	labels_values: The values (labels) associated with the given batch and time.
-//	sequence_length: A vector containing sequence lengths (batch).
-//
-// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
-// `(max_time x batch_size x num_classes)`.
-func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCLoss",
-		Input: []tf.Input{
-			inputs, labels_indices, labels_values, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-- 
GitLab


From 9905d0b0b29bac16a98bc6d1e51e63e27c1d63ee Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 18:42:16 -0700
Subject: [PATCH 0836/1085] Automated rollback of commit
 6aebb0866718cae2c921e875f3fd74573ee9acc8 (#22917)

PiperOrigin-RevId: 216598193
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 3aa5b6efa1..0f6866aa1a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1779,19 +1779,18 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
         summary_writer=summary_writer)
 
   def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_steps_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_steps_per_sec
+    global_step_per_sec = elapsed_steps / elapsed_time
+    examples_per_sec = self._batch_size * global_step_per_sec
     if self._summary_writer is not None:
       global_step_summary = Summary(value=[
-          Summary.Value(tag='global_steps/sec',
-                        simple_value=global_steps_per_sec)
+          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
       ])
       example_summary = Summary(value=[
           Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
       ])
       self._summary_writer.add_summary(global_step_summary, global_step)
       self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_steps/sec: %g', global_steps_per_sec)
+    logging.info('global_step/sec: %g', global_step_per_sec)
     logging.info('examples/sec: %g', examples_per_sec)
 
 
-- 
GitLab


From 612cf572e5b7d9e750dd18c15d8d43777d442553 Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi <tayo@google.com>
Date: Thu, 11 Oct 2018 18:36:55 -0700
Subject: [PATCH 0837/1085]   Changes NMS XLA implementation to match that of
 CPU.

PiperOrigin-RevId: 216796208
---
 tensorflow/compiler/tests/image_ops_test.py   | 336 ++++++++++--------
 .../compiler/tf2xla/kernels/image_ops.cc      | 238 +++++++++----
 tensorflow/python/ops/image_ops_test.py       |  24 ++
 3 files changed, 379 insertions(+), 219 deletions(-)

diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
index 68fdb5caf4..d67b16f8e9 100644
--- a/tensorflow/compiler/tests/image_ops_test.py
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -26,7 +26,6 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -605,168 +604,205 @@ class ResizeBilinearTest(xla_test.XLATestCase):
 class NonMaxSuppressionTest(xla_test.XLATestCase):
 
   def testNMS128From1024(self):
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      num_boxes = 1024
-      boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4")
-      scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4")
-
-      max_output_size = 128
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.0, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            score_threshold: score_threshold_np,
-            iou_threshold: iou_threshold_np
-        }
-        (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
+    num_boxes = 1024
+    boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4")
+    scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4")
+
+    max_output_size = 128
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.0, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          score_threshold: score_threshold_np,
+          iou_threshold: iou_threshold_np
+      }
+      (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
 
   def testNMS3From6Boxes(self):
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      # Three boxes are selected based on IOU.
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-
-      max_output_size = 3
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.0, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            score_threshold: score_threshold_np,
-            iou_threshold: iou_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 3)
-        self.assertAllClose(indices_tf[:num_valid], [3, 0, 5])
+    # Three boxes are selected based on IOU.
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.0, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          score_threshold: score_threshold_np,
+          iou_threshold: iou_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 3)
+      self.assertAllClose(indices_tf[:num_valid], [3, 0, 5])
 
   def testNMS3Then2WithScoreThresh(self):
     # Three boxes are selected based on IOU.
     # One is filtered out by score threshold.
 
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-      max_output_size = 3
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.4, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            iou_threshold: iou_threshold_np,
-            score_threshold: score_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 2)
-        self.assertAllClose(indices_tf[:num_valid], [3, 0])
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.4, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 2)
+      self.assertAllClose(indices_tf[:num_valid], [3, 0])
 
   def testNMS3Then1WithScoreMaxThresh(self):
     # Three boxes are selected based on IOU.
     # One is filtered out by score threshold.
     # One is filtered out by max_output_size.
 
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-      max_output_size = 1
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.4, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            iou_threshold: iou_threshold_np,
-            score_threshold: score_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 1)
-        self.assertAllClose(indices_tf[:num_valid], [3])
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 1
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.4, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 1)
+      self.assertAllClose(indices_tf[:num_valid], [3])
+
+  def testSelectFromContinuousOverLap(self):
+    # Tests that a suppressed box does not itself suppress other boxes.
+
+    boxes_data = [[0, 0, 1, 1], [0, 0.2, 1, 1.2], [0, 0.4, 1, 1.4],
+                  [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 3]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.5, 0.4, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.1, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 3)
+      self.assertAllClose(indices_tf[:num_valid], [0, 2, 4])
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
index 921b4340c0..6713d6bc92 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -316,6 +318,70 @@ class AdjustHueOp : public XlaOpKernel {
 };
 REGISTER_XLA_OP(Name("AdjustHue"), AdjustHueOp);
 
+struct WhileCondFn {
+  const int64 num_boxes;
+  const int64 output_size;
+
+  explicit WhileCondFn(int64 num_boxes, int64 output_size)
+      : num_boxes(num_boxes), output_size(output_size) {}
+
+  xla::StatusOr<xla::XlaOp> operator()(absl::Span<const xla::XlaOp> values,
+                                       xla::XlaBuilder* cond_builder) const {
+    xla::XlaOp row_idx = values[0];
+    xla::XlaOp row_in_bounds =
+        xla::Lt(row_idx, xla::ConstantR0<int32>(cond_builder, num_boxes));
+    xla::XlaOp num_outputs_so_far = values[1];
+    xla::XlaOp results_not_full = xla::Lt(
+        num_outputs_so_far, xla::ConstantR0<int32>(cond_builder, output_size));
+    return xla::And(row_in_bounds, results_not_full);
+  }
+};
+
+// Process the boxes one-by-one using the iou matrix mask.
+// This implementation uses a correct, but greedy, sequential algorithm
+// to ensure that suppressed boxes cannot themselves suppress other
+// boxes.
+struct SuppressBodyFn {
+  const int64 num_boxes;
+
+  explicit SuppressBodyFn(int64 num_boxes) : num_boxes(num_boxes) {}
+
+  xla::StatusOr<std::vector<xla::XlaOp>> operator()(
+      absl::Span<const xla::XlaOp> values, xla::XlaBuilder* builder) const {
+    auto row_idx = values[0];
+    auto num_outputs_so_far = values[1];
+    auto iou_mask = values[2];
+    auto included_iou = values[3];
+    auto zero_r1 = xla::ConstantR1<int32>(builder, {0});
+    // Determine if current elem is active using a slice.
+    auto row_idx_r1 = xla::Reshape(row_idx, {1});
+    auto active_elem = xla::DynamicSlice(included_iou, row_idx_r1, {1});
+    active_elem = xla::Reshape(active_elem, {});
+    // Increment output count iff current elem is not suppressed.
+    num_outputs_so_far = xla::Select(
+        active_elem, num_outputs_so_far + xla::ConstantR0<int32>(builder, 1),
+        num_outputs_so_far);
+    // Slice out the row_idx.
+    auto starts = xla::ConcatInDim(builder, {row_idx_r1, zero_r1}, 0);
+    auto row_iou = xla::DynamicSlice(iou_mask, starts, {1, num_boxes});
+    // Remove the diagonal from consideration. An elem cannot suppress
+    // itself.
+    auto update_starts = xla::ConcatInDim(builder, {zero_r1, row_idx_r1}, 0);
+    row_iou = xla::DynamicUpdateSlice(
+        row_iou, xla::ConstantR2FromArray2D<bool>(builder, {{false}}),
+        update_starts);
+    // Create a suppression by inverting polarity.
+    row_iou = xla::Reshape(row_iou, {num_boxes});
+    auto supp_mask = xla::Not(row_iou);
+    // Update mask iff current elem is not suppressed.
+    included_iou = xla::Select(xla::Broadcast(active_elem, {num_boxes}),
+                               xla::And(included_iou, supp_mask), included_iou);
+    row_idx = row_idx + xla::ConstantR0<int32>(builder, 1);
+    return std::vector<xla::XlaOp>{row_idx, num_outputs_so_far, iou_mask,
+                                   included_iou};
+  }
+};
+
 class NonMaxSuppressionOp : public XlaOpKernel {
  public:
   explicit NonMaxSuppressionOp(OpKernelConstruction* context)
@@ -326,14 +392,12 @@ class NonMaxSuppressionOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* context) override {
     // TODO(b/111646731): Improve scalability of this op, using blocking.
-    int num_boxes_dim = 0;
-    int coords_dim = 1;
     const TensorShape& boxes_shape = context->InputShape("boxes");
     OP_REQUIRES(context, TensorShapeUtils::IsMatrix(boxes_shape),
                 errors::InvalidArgument("boxes must be 2-D, currently: ",
                                         boxes_shape.DebugString()));
-    const int64 num_boxes = boxes_shape.dim_size(num_boxes_dim);
-    OP_REQUIRES(context, boxes_shape.dim_size(coords_dim) == 4,
+    const int64 num_boxes = boxes_shape.dim_size(0);
+    OP_REQUIRES(context, boxes_shape.dim_size(1) == 4,
                 errors::InvalidArgument("boxes must have 4 columns",
                                         boxes_shape.DebugString()));
     const TensorShape& scores_shape = context->InputShape("scores");
@@ -347,9 +411,13 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     OP_REQUIRES(context, pad_to_max_output_size_,
                 errors::InvalidArgument(
                     "XLA compilation requires pad_to_max_output_size == True"));
+    OP_REQUIRES(context, num_boxes <= kint32max,
+                errors::InvalidArgument("XLA compilation requires number of "
+                                        "boxes to be <= kint32max, got ",
+                                        num_boxes));
 
-    xla::XlaOp boxes = context->Input("boxes");
-    xla::XlaOp scores = context->Input("scores");
+    const xla::XlaOp boxes_input = context->Input("boxes");
+    const xla::XlaOp scores_input = context->Input("scores");
     int64 output_size;
     OP_REQUIRES_OK(context, context->ConstantInputAsIntScalar(2, &output_size));
     OP_REQUIRES(
@@ -358,90 +426,113 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     OP_REQUIRES(context, output_size <= kint32max,
                 errors::InvalidArgument("Need output_size <= kint32Max, got ",
                                         output_size));
-    xla::XlaOp score_thresh = context->Input("score_threshold");
-    xla::XlaOp iou_thresh = context->Input("iou_threshold");
-
+    const xla::XlaOp score_thresh = context->Input("score_threshold");
+    const xla::XlaOp iou_thresh = context->Input("iou_threshold");
     xla::XlaBuilder* const builder = context->builder();
 
     // Choose a more convenient layout.
-    xla::XlaOp boxes_t = xla::Transpose(boxes, {1, 0});
-    coords_dim = 0;
-    num_boxes_dim = 1;
-
-    // Shapes are henceforth [1, num_boxes].
-    xla::XlaOp coord_y0 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/0,
-                                          /*limit_index=*/1,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_x0 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/1,
-                                          /*limit_index=*/2,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_y1 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/2,
-                                          /*limit_index=*/3,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_x1 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/3,
-                                          /*limit_index=*/4,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp y1 =
-        xla::Select(xla::Le(coord_y0, coord_y1), coord_y0, coord_y1);
-    xla::XlaOp y2 =
-        xla::Select(xla::Le(coord_y0, coord_y1), coord_y1, coord_y0);
-    xla::XlaOp x1 =
-        xla::Select(xla::Le(coord_x0, coord_x1), coord_x0, coord_x1);
-    xla::XlaOp x2 =
-        xla::Select(xla::Le(coord_x0, coord_x1), coord_x1, coord_x0);
+    const xla::XlaOp boxes = xla::Transpose(boxes_input, {1, 0});
+    const xla::XlaOp boxes_sorted = xla::GetTupleElement(
+        xla::Sort(/*keys=*/-xla::Broadcast(scores_input, {4}),
+                  /*values=*/{boxes},
+                  /*dimension=*/1),
+        1);
+    // Track the mapping of indices into sorted domain.
+    const xla::XlaOp iota_indices = xla::Iota(builder, xla::S32, num_boxes);
+    const xla::XlaOp indices_sort = xla::Sort(-scores_input, {iota_indices});
+    const xla::XlaOp indices_sorted = xla::GetTupleElement(indices_sort, 1);
+    const xla::XlaOp scores = xla::Neg(xla::GetTupleElement(indices_sort, 0));
+
+    // Shapes are henceforth [1, num_boxes]. 'c_y0' denotes 'coordinate' y0.
+    const xla::XlaOp c_y0 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/0,
+                                                         /*limit_index=*/1,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_x0 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/1,
+                                                         /*limit_index=*/2,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_y1 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/2,
+                                                         /*limit_index=*/3,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_x1 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/3,
+                                                         /*limit_index=*/4,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+
+    xla::XlaOp y1 = xla::Select(xla::Le(c_y0, c_y1), c_y0, c_y1);
+    xla::XlaOp y2 = xla::Select(xla::Le(c_y0, c_y1), c_y1, c_y0);
+    xla::XlaOp x1 = xla::Select(xla::Le(c_x0, c_x1), c_x0, c_x1);
+    xla::XlaOp x2 = xla::Select(xla::Le(c_x0, c_x1), c_x1, c_x0);
     xla::XlaOp area = (y2 - y1) * (x2 - x1);
 
-    // Transpose the 1xN tensors, instead of the NxN tensors.
-    xla::XlaOp y1_t = xla::Transpose(y1, {1, 0});
-    xla::XlaOp y2_t = xla::Transpose(y2, {1, 0});
-    xla::XlaOp x1_t = xla::Transpose(x1, {1, 0});
-    xla::XlaOp x2_t = xla::Transpose(x2, {1, 0});
-    xla::XlaOp area_t = xla::Transpose(area, {1, 0});
+    // Shapes are henceforth [1, num_boxes].
+    y1 = xla::Broadcast(y1, {1});
+    y2 = xla::Broadcast(y2, {1});
+    x1 = xla::Broadcast(x1, {1});
+    x2 = xla::Broadcast(x2, {1});
+    area = xla::Broadcast(area, {1});
 
     // Shapes are henceforth [num_boxes, num_boxes].
-    xla::XlaOp i_xmin = xla::Max(x1, x1_t);
-    xla::XlaOp i_ymin = xla::Max(y1, y1_t);
-    xla::XlaOp i_xmax = xla::Min(x2, x2_t);
-    xla::XlaOp i_ymax = xla::Min(y2, y2_t);
+    xla::XlaOp i_xmin = xla::Max(x1, xla::Transpose(x1, {1, 0}));
+    xla::XlaOp i_ymin = xla::Max(y1, xla::Transpose(y1, {1, 0}));
+    xla::XlaOp i_xmax = xla::Min(x2, xla::Transpose(x2, {1, 0}));
+    xla::XlaOp i_ymax = xla::Min(y2, xla::Transpose(y2, {1, 0}));
     auto square_zero = xla::ZerosLike(i_xmin);
 
     xla::XlaOp i_area = xla::Max(i_xmax - i_xmin, square_zero) *
                         xla::Max(i_ymax - i_ymin, square_zero);
-    xla::XlaOp u_area = area + area_t - i_area;
+    xla::XlaOp u_area = area + xla::Transpose(area, {1, 0}) - i_area;
     xla::XlaOp iou = i_area / u_area;
 
     xla::XlaOp iou_thresh_mask = xla::Gt(iou, iou_thresh + square_zero);
-    xla::XlaOp scores_2d = xla::Reshape(scores, {num_boxes, 1});
-    xla::XlaOp score_cmp_mask =
-        xla::Gt(scores_2d, xla::Transpose(scores_2d, {1, 0}));
-    xla::XlaOp suppress = xla::And(iou_thresh_mask, score_cmp_mask);
-
-    // Shapes are [num_boxes] after the reduce.
-    xla::XlaOp included_iou = xla::Not(xla::Reduce(
-        suppress,
-        /*init_value=*/xla::ConstantR0<bool>(builder, false),
-        /*computation=*/CreateScalarOrComputation(xla::PRED, builder),
-        /*dimensions_to_reduce=*/{0}));
+    xla::XlaOp included_iou =
+        xla::Broadcast(xla::ConstantR0<bool>(builder, true), {num_boxes});
+
+    std::vector<xla::XlaOp> init_values;
+    init_values.reserve(4);
+    init_values.push_back(xla::ConstantR0<int32>(builder, 0));  // col_idx
+    init_values.push_back(xla::ConstantR0<int32>(builder, 0));  // num_outputs
+    init_values.push_back(iou_thresh_mask);
+    init_values.push_back(included_iou);
+
+    auto suppress_loop_result =
+        XlaWhileLoop(WhileCondFn(num_boxes, output_size),
+                     SuppressBodyFn(num_boxes), init_values, "suppress_loop",
+                     builder)
+            .ValueOrDie();
+
     xla::XlaOp included_score =
         xla::Gt(scores, xla::Broadcast(score_thresh, {num_boxes}));
-    xla::XlaOp included = xla::And(included_iou, included_score);
+    xla::XlaOp included = xla::And(included_score, suppress_loop_result[3]);
+
+    // Only consider boxes over which we have iterated. This allows for accurate
+    // counting. DynamicSlice would require knowledge of the size of the output.
+    auto valid_elem = xla::Lt(
+        iota_indices, xla::Broadcast(suppress_loop_result[0], {num_boxes}));
+    included = xla::And(included, valid_elem);
+
     xla::XlaOp neg_inf =
         xla::Broadcast(xla::MinValue(builder, xla::F32), {num_boxes});
     xla::XlaOp scores_included = xla::Select(included, scores, neg_inf);
-
+    xla::XlaOp output_tuple = TopK(scores_included, output_size);
+    xla::XlaOp selected_indices_sorted = xla::GetTupleElement(output_tuple, 1);
+    // Calculate num_valid.
+    // Note: num_valid cannot be taken from the loop outputs, because outputs
+    // can be suppressed by score threshold.
     xla::XlaOp ones_included = xla::Select(
         included,
         xla::Broadcast(xla::ConstantR0<int32>(builder, 1), {num_boxes}),
         xla::Broadcast(xla::ConstantR0<int32>(builder, 0), {num_boxes}));
-
     // num_valid is scalar. Value should be bound by output_size.
     xla::XlaOp num_valid_total = xla::Reduce(
         ones_included,
@@ -451,8 +542,17 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     xla::XlaOp num_valid =
         xla::Min(num_valid_total, xla::ConstantR0<int32>(builder, output_size));
 
-    xla::XlaOp output_tuple = TopK(scores_included, output_size);
-    xla::XlaOp selected_indices = xla::GetTupleElement(output_tuple, 1);
+    // Re-index into the original scores input tensor, using a Gather.
+    // Boxes were suppressed in the sorted domain.
+    xla::XlaOp selected_indices;
+    DataType gather_type = context->expected_output_dtype(0);
+    OP_REQUIRES_OK(
+        context,
+        XlaGather(indices_sorted, scores_shape, selected_indices_sorted,
+                  TensorShape({output_size}),
+                  /*axis=*/0,
+                  /*indices_are_nd=*/false,
+                  /*dtype=*/gather_type, DT_INT32, builder, &selected_indices));
 
     context->SetOutput(0, selected_indices);
     context->SetOutput(1, num_valid);
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index ff86df6346..dae71caa81 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -3737,6 +3737,30 @@ class NonMaxSuppressionPaddedTest(test_util.TensorFlowTestCase):
       self.assertAllClose(selected_indices.eval(), [3, 0, 5])
       self.assertEqual(num_valid.eval(), 3)
 
+  def testSelectFromContinuousOverLap(self):
+    boxes_np = [[0, 0, 1, 1], [0, 0.2, 1, 1.2], [0, 0.4, 1, 1.4],
+                [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]]
+    scores_np = [0.9, 0.75, 0.6, 0.5, 0.4, 0.3]
+    max_output_size_np = 3
+    iou_threshold_np = 0.5
+    score_threshold_np = 0.1
+    boxes = constant_op.constant(boxes_np)
+    scores = constant_op.constant(scores_np)
+    max_output_size = constant_op.constant(max_output_size_np)
+    iou_threshold = constant_op.constant(iou_threshold_np)
+    score_threshold = constant_op.constant(score_threshold_np)
+    selected_indices, num_valid = image_ops.non_max_suppression_padded(
+        boxes,
+        scores,
+        max_output_size,
+        iou_threshold,
+        score_threshold)
+    # The output shape of the padded operation must be fully defined.
+    self.assertEqual(selected_indices.shape.is_fully_defined(), False)
+    with self.cached_session():
+      self.assertAllClose(selected_indices.eval(), [0, 2, 4])
+      self.assertEqual(num_valid.eval(), 3)
+
 
 class VerifyCompatibleImageShapesTest(test_util.TensorFlowTestCase):
   """Tests utility function used by ssim() and psnr()."""
-- 
GitLab


From b0a3c8e2688161ab3a480dfeee2def27cd56ae0a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 18:38:41 -0700
Subject: [PATCH 0838/1085] Internal change.

PiperOrigin-RevId: 216796407
---
 tensorflow/contrib/lite/kernels/basic_rnn.cc  |  18 +-
 .../kernels/bidirectional_sequence_rnn.cc     |  50 +--
 .../bidirectional_sequence_rnn_test.cc        |  49 ++-
 .../lite/kernels/internal/kernel_utils.cc     | 346 +++++++++++++-----
 .../lite/kernels/internal/kernel_utils.h      |  34 +-
 tensorflow/contrib/lite/kernels/lstm_eval.cc  | 245 +++++++++----
 .../kernels/unidirectional_sequence_rnn.cc    |  20 +-
 7 files changed, 542 insertions(+), 220 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/basic_rnn.cc b/tensorflow/contrib/lite/kernels/basic_rnn.cc
index 1aa27602e5..74de23df73 100644
--- a/tensorflow/contrib/lite/kernels/basic_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/basic_rnn.cc
@@ -133,6 +133,8 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
   const int batch_size = input->dims->data[0];
   const int num_units = input_weights->dims->data[0];
   const int input_size = input->dims->data[1];
+  const int output_batch_leading_dim =
+      output->dims->data[output->dims->size - 1];
 
   // Initialize the pointer to hidden state.
   float* hidden_state_ptr_batch = hidden_state->data.f;
@@ -144,10 +146,10 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
   const float* recurrent_weights_ptr = recurrent_weights->data.f;
   const float* bias_ptr = bias->data.f;
 
-  kernel_utils::RnnBatchStep(input_ptr_batch, input_weights_ptr,
-                             recurrent_weights_ptr, bias_ptr, input_size,
-                             num_units, batch_size, params->activation,
-                             hidden_state_ptr_batch, output_ptr_batch);
+  kernel_utils::RnnBatchStep(
+      input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr,
+      input_size, num_units, batch_size, output_batch_leading_dim,
+      params->activation, hidden_state_ptr_batch, output_ptr_batch);
   return kTfLiteOk;
 }
 
@@ -162,6 +164,8 @@ TfLiteStatus EvalHybrid(const TfLiteTensor* input,
   const int batch_size = input->dims->data[0];
   const int num_units = input_weights->dims->data[0];
   const int input_size = input->dims->data[1];
+  const int output_batch_leading_dim =
+      output->dims->data[output->dims->size - 1];
 
   // Initialize the pointer to hidden state.
   float* hidden_state_ptr_batch = hidden_state->data.f;
@@ -187,9 +191,9 @@ TfLiteStatus EvalHybrid(const TfLiteTensor* input,
   kernel_utils::RnnBatchStep(
       input_ptr_batch, input_weights_ptr, input_weights_scale,
       recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size,
-      num_units, batch_size, params->activation, quantized_input_ptr,
-      quantized_hidden_state_ptr, scaling_factors_ptr, hidden_state_ptr_batch,
-      output_ptr_batch);
+      num_units, batch_size, output_batch_leading_dim, params->activation,
+      quantized_input_ptr, quantized_hidden_state_ptr, scaling_factors_ptr,
+      hidden_state_ptr_batch, output_ptr_batch);
   return kTfLiteOk;
 }
 
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index f544dd5ffa..9f764313ff 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -14,8 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include <cassert>
 #include <cmath>
-#include <cstdlib>
 #include <cstdio>
+#include <cstdlib>
 #include <iostream>
 #include <limits>
 
@@ -299,9 +299,6 @@ TfLiteStatus EvalFloat(
   const int bw_output_step =
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   if (time_major) {
-    // TODO(mirkov): add merge_outputs support for time_major inputs.
-    TF_LITE_ASSERT_EQ(params->merge_outputs, false);
-
     // Forward cell.
     float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
     for (int s = 0; s < max_time; s++) {
@@ -312,12 +309,12 @@ TfLiteStatus EvalFloat(
               ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
       float* output_ptr_batch =
-          fw_output->data.f + s * fw_num_units * batch_size;
+          fw_output->data.f + s * fw_output_step * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
           fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
-          input_size, aux_input_size, fw_num_units, batch_size,
+          input_size, aux_input_size, fw_num_units, batch_size, fw_output_step,
           params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
     }
     // Backward cell.
@@ -330,12 +327,14 @@ TfLiteStatus EvalFloat(
               ? aux_input->data.f + s * input_size * batch_size
               : nullptr;
       float* output_ptr_batch =
-          bw_output->data.f + s * bw_num_units * batch_size;
+          (params->merge_outputs ? fw_output->data.f + fw_num_units
+                                 : bw_output->data.f) +
+          s * bw_output_step * batch_size;
 
       kernel_utils::RnnBatchStep(
           input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
           bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
-          input_size, aux_input_size, bw_num_units, batch_size,
+          input_size, aux_input_size, bw_num_units, batch_size, bw_output_step,
           params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
     }
   } else {
@@ -358,7 +357,8 @@ TfLiteStatus EvalFloat(
             input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch,
             fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr,
             input_size, aux_input_size, fw_num_units, /*batch_size=*/1,
-            params->activation, fw_hidden_state_ptr_batch, output_ptr_batch);
+            fw_output_step, params->activation, fw_hidden_state_ptr_batch,
+            output_ptr_batch);
       }
       // Backward cell.
       float* bw_hidden_state_ptr_batch =
@@ -380,7 +380,8 @@ TfLiteStatus EvalFloat(
             input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch,
             bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr,
             input_size, aux_input_size, bw_num_units, /*batch_size=*/1,
-            params->activation, bw_hidden_state_ptr_batch, output_ptr_batch);
+            bw_output_step, params->activation, bw_hidden_state_ptr_batch,
+            output_ptr_batch);
       }
     }
   }
@@ -457,9 +458,6 @@ TfLiteStatus EvalHybrid(
       params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units;
   if (time_major) {
     for (int t = 0; t < max_time; t++) {
-      // TODO(mirkov): add merge_outputs support for time_major inputs.
-      TF_LITE_ASSERT_EQ(params->merge_outputs, false);
-
       // Forward cell.
       float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f;
       for (int s = 0; s < max_time; s++) {
@@ -470,16 +468,17 @@ TfLiteStatus EvalHybrid(
                 ? aux_input->data.f + s * input_size * batch_size
                 : nullptr;
         float* output_ptr_batch =
-            fw_output->data.f + s * fw_num_units * batch_size;
+            fw_output->data.f + s * fw_output_step * batch_size;
 
         kernel_utils::RnnBatchStep(
             input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale,
             aux_input_ptr_batch, aux_fw_input_weights_ptr,
             aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
             fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
-            fw_num_units, batch_size, params->activation, quantized_input_ptr,
-            aux_quantized_input_ptr, fw_quantized_hidden_state_ptr,
-            scaling_factors_ptr, fw_hidden_state_ptr_batch, output_ptr_batch);
+            fw_num_units, batch_size, fw_output_step, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            fw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            fw_hidden_state_ptr_batch, output_ptr_batch);
       }
       // Backward cell.
       float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f;
@@ -491,16 +490,19 @@ TfLiteStatus EvalHybrid(
                 ? aux_input->data.f + s * input_size * batch_size
                 : nullptr;
         float* output_ptr_batch =
-            bw_output->data.f + s * bw_num_units * batch_size;
+            (params->merge_outputs ? fw_output->data.f + fw_num_units
+                                   : bw_output->data.f) +
+            s * bw_output_step * batch_size;
 
         kernel_utils::RnnBatchStep(
             input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale,
             aux_input_ptr_batch, aux_bw_input_weights_ptr,
             aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
             bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
-            bw_num_units, batch_size, params->activation, quantized_input_ptr,
-            aux_quantized_input_ptr, bw_quantized_hidden_state_ptr,
-            scaling_factors_ptr, bw_hidden_state_ptr_batch, output_ptr_batch);
+            bw_num_units, batch_size, bw_output_step, params->activation,
+            quantized_input_ptr, aux_quantized_input_ptr,
+            bw_quantized_hidden_state_ptr, scaling_factors_ptr,
+            bw_hidden_state_ptr_batch, output_ptr_batch);
       }
     }
   } else {
@@ -524,7 +526,7 @@ TfLiteStatus EvalHybrid(
             aux_input_ptr_batch, aux_fw_input_weights_ptr,
             aux_fw_input_weights_scale, fw_recurrent_weights_ptr,
             fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size,
-            fw_num_units, /*batch_size=*/1, params->activation,
+            fw_num_units, /*batch_size=*/1, fw_output_step, params->activation,
             quantized_input_ptr, aux_quantized_input_ptr,
             fw_quantized_hidden_state_ptr, scaling_factors_ptr,
             fw_hidden_state_ptr_batch, output_ptr_batch);
@@ -534,7 +536,7 @@ TfLiteStatus EvalHybrid(
           bw_hidden_state->data.f + b * bw_num_units;
       float* bw_output_offset =
           params->merge_outputs
-              ? fw_output->data.f + b * bw_output_step * max_time
+              ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units
               : bw_output->data.f + b * bw_output_step * max_time;
       for (int s = max_time - 1; s >= 0; s--) {
         const float* input_ptr_batch =
@@ -550,7 +552,7 @@ TfLiteStatus EvalHybrid(
             aux_input_ptr_batch, aux_bw_input_weights_ptr,
             aux_bw_input_weights_scale, bw_recurrent_weights_ptr,
             bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size,
-            bw_num_units, /*batch_size=*/1, params->activation,
+            bw_num_units, /*batch_size=*/1, bw_output_step, params->activation,
             quantized_input_ptr, aux_quantized_input_ptr,
             bw_quantized_hidden_state_ptr, scaling_factors_ptr,
             bw_hidden_state_ptr_batch, output_ptr_batch);
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
index 6c179ca05d..d0d04428c9 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -14,8 +14,8 @@ limitations under the License.
 ==============================================================================*/
 // Unit test for TFLite Bidirectional RNN op.
 
-#include <vector>
 #include <iomanip>
+#include <vector>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -884,6 +884,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) {
               ElementsAreArray(ArrayFloatNear(merged_expected)));
 }
 
+// Same as BlackBox test, but input is reshuffled to time_major format.
+TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajorMergeOutputs) {
+  BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16,
+                              /*fw_units=*/16, /*bw_units=*/16,
+                              /*input_size=*/8, /*time_major=*/true,
+                              /*merge_outputs=*/true);
+  rnn.SetFwWeights(weights);
+  rnn.SetBwWeights(weights);
+  rnn.SetFwBias(biases);
+  rnn.SetBwBias(biases);
+  rnn.SetFwRecurrentWeights(recurrent_weights);
+  rnn.SetBwRecurrentWeights(recurrent_weights);
+
+  // Insert the inputs in time_major format. The batch_major format is:
+  // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as:
+  // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15].
+  for (int i = 0; i < rnn.sequence_len(); i++) {
+    float* batch_start = rnn_input + i * rnn.input_size();
+    float* batch_end = batch_start + rnn.input_size();
+    // The two batches are identical.
+    rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end);
+    rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end);
+  }
+
+  rnn.Invoke();
+
+  std::vector<float> merged_expected;
+  for (int step = 0; step < rnn.sequence_len(); step++) {
+    for (int bid = 0; bid < rnn.num_batches(); bid++) {
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_fw_output + rnn.num_fw_units() * step,
+          rnn_golden_fw_output + rnn.num_fw_units() * (step + 1));
+      merged_expected.insert(
+          merged_expected.end(),
+          rnn_golden_bw_output + rnn.num_bw_units() * step,
+          rnn_golden_bw_output + rnn.num_bw_units() * (step + 1));
+    }
+  }
+  EXPECT_THAT(rnn.GetFwOutput(),
+              ElementsAreArray(ArrayFloatNear(merged_expected)));
+}
+
 // Check that if the input sequence is reversed the outputs are the same just
 // forward and backward are swapped (and reversed).
 TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) {
@@ -954,8 +997,8 @@ TEST(BidirectionalRNNOpTest, EndToEndTest) {
       0.3492105,   0.56452453,   0.4389236,   -0.59929526, -0.19762468,
       -0.36868393, -0.13198286,  -0.53800809, -0.22850353};
 
-  std::initializer_list<float> dnn_biases = {
-    0.29177809, -0.98799044, 0.065919638, 0.68781924};
+  std::initializer_list<float> dnn_biases = {0.29177809, -0.98799044,
+                                             0.065919638, 0.68781924};
 
   rnn.SetFwWeights(weights);
   rnn.SetBwWeights(weights);
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
index 083e5839bd..7875b23979 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc
@@ -22,14 +22,15 @@ namespace kernel_utils {
 void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
                   const float* recurrent_weights_ptr, const float* bias_ptr,
                   int input_size, int num_units, int batch_size,
+                  int output_batch_leading_dim,
                   TfLiteFusedActivation activation,
                   float* hidden_state_ptr_batch, float* output_ptr_batch) {
   RnnBatchStep(input_ptr_batch, input_weights_ptr,
                /*aux_input_ptr_batch=*/nullptr,
                /*aux_input_weights_ptr=*/nullptr, recurrent_weights_ptr,
                bias_ptr, input_size, /*aux_input_size=*/0, num_units,
-               batch_size, activation, hidden_state_ptr_batch,
-               output_ptr_batch);
+               batch_size, output_batch_leading_dim, activation,
+               hidden_state_ptr_batch, output_ptr_batch);
 }
 
 void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
@@ -37,49 +38,100 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
                   const float* aux_input_weights_ptr,
                   const float* recurrent_weights_ptr, const float* bias_ptr,
                   int input_size, int aux_input_size, int num_units,
-                  int batch_size, TfLiteFusedActivation activation,
+                  int batch_size, int output_batch_leading_dim,
+                  TfLiteFusedActivation activation,
                   float* hidden_state_ptr_batch, float* output_ptr_batch) {
-  // Output = bias
-  tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size,
-                                        output_ptr_batch);
-  // Output += input * input_weights
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      input_weights_ptr, num_units, input_size, input_ptr_batch, batch_size,
-      output_ptr_batch, /*result_stride=*/1);
-  // Output += aux_input * aux_input_weights (if they are not empty).
-  if (aux_input_size > 0) {
+  // Since the output batch rows may not be contiguous (output_batch_leading_dim
+  // != n_output), we unroll the batched operations where this is the case.
+  if (output_batch_leading_dim == num_units) {
+    // Output = bias
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size,
+                                          output_ptr_batch);
+
+    // Output += input * input_weights
     tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_weights_ptr, num_units, aux_input_size, aux_input_ptr_batch,
+        input_weights_ptr, num_units, input_size, input_ptr_batch, batch_size,
+        output_ptr_batch, /*result_stride=*/1);
+
+    // Output += aux_input * aux_input_weights (if they are not empty).
+    if (aux_input_size > 0) {
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_weights_ptr, num_units, aux_input_size, aux_input_ptr_batch,
+          batch_size, output_ptr_batch, /*result_stride=*/1);
+    }
+
+    // Output += recurrent_weights * hidden_state
+    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+        recurrent_weights_ptr, num_units, num_units, hidden_state_ptr_batch,
         batch_size, output_ptr_batch, /*result_stride=*/1);
+
+    // Output = activation(Output) and update hidden_state
+    tensor_utils::ApplyActivationToVector(
+        output_ptr_batch, num_units * batch_size, activation, output_ptr_batch);
+    tensor_utils::CopyVector(output_ptr_batch, num_units * batch_size,
+                             hidden_state_ptr_batch);
+  } else {
+    // Output = bias
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::CopyVector(bias_ptr, num_units,
+                               output_ptr_batch + k * output_batch_leading_dim);
+    }
+
+    // Output += input * input_weights
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          input_weights_ptr, num_units, input_size,
+          input_ptr_batch + k * input_size, /*n_batch=*/1,
+          output_ptr_batch + k * output_batch_leading_dim, /*result_stride=*/1);
+    }
+
+    // Output += aux_input * aux_input_weights (if they are not empty).
+    if (aux_input_size > 0) {
+      for (int k = 0; k < batch_size; k++) {
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            aux_input_weights_ptr, num_units, aux_input_size,
+            aux_input_ptr_batch + k * aux_input_size,
+            /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+            /*result_stride=*/1);
+      }
+    }
+
+    // Output += recurrent_weights * hidden_state
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          recurrent_weights_ptr, num_units, num_units,
+          hidden_state_ptr_batch + k * num_units,
+          /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+          /*result_stride=*/1);
+    }
+
+    // Output = activation(Output) and update hidden_state
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::ApplyActivationToVector(
+          output_ptr_batch + k * output_batch_leading_dim, num_units,
+          activation, output_ptr_batch + k * output_batch_leading_dim);
+      tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim,
+                               num_units,
+                               hidden_state_ptr_batch + k * num_units);
+    }
   }
-  // Output += recurrent_weights * hidden_state
-  tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      recurrent_weights_ptr, num_units, num_units, hidden_state_ptr_batch,
-      batch_size, output_ptr_batch, /*result_stride=*/1);
-  // Output = activation(Output) and update hidden_state
-  tensor_utils::ApplyActivationToVector(
-      output_ptr_batch, num_units * batch_size, activation, output_ptr_batch);
-  tensor_utils::VectorBatchVectorAssign(output_ptr_batch, num_units, batch_size,
-                                        hidden_state_ptr_batch);
 }
 
-void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
-                  float input_weights_scale,
-                  const int8_t* recurrent_weights_ptr,
-                  float recurrent_weights_scale, const float* bias_ptr,
-                  int input_size, int num_units, int batch_size,
-                  TfLiteFusedActivation activation,
-                  int8_t* quantized_input_ptr_batch,
-                  int8_t* quantized_hidden_state_ptr_batch,
-                  float* scaling_factors, float* hidden_state_ptr_batch,
-                  float* output_ptr_batch) {
+void RnnBatchStep(
+    const float* input_ptr_batch, const int8_t* input_weights_ptr,
+    float input_weights_scale, const int8_t* recurrent_weights_ptr,
+    float recurrent_weights_scale, const float* bias_ptr, int input_size,
+    int num_units, int batch_size, int output_batch_leading_dim,
+    TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch,
+    int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
+    float* hidden_state_ptr_batch, float* output_ptr_batch) {
   RnnBatchStep(input_ptr_batch, input_weights_ptr, input_weights_scale,
                /*aux_input_ptr_batch=*/nullptr,
                /*aux_input_weights_ptr=*/nullptr,
                /*aux_input_weights_scale=*/0.0f, recurrent_weights_ptr,
                recurrent_weights_scale, bias_ptr, input_size,
-               /*aux_input_size=*/0, num_units, batch_size, activation,
-               quantized_input_ptr_batch,
+               /*aux_input_size=*/0, num_units, batch_size,
+               output_batch_leading_dim, activation, quantized_input_ptr_batch,
                /*aux_quantized_input_ptr_batch=*/nullptr,
                quantized_hidden_state_ptr_batch, scaling_factors,
                hidden_state_ptr_batch, output_ptr_batch);
@@ -91,82 +143,178 @@ void RnnBatchStep(
     const int8_t* aux_input_weights_ptr, float aux_input_weights_scale,
     const int8_t* recurrent_weights_ptr, float recurrent_weights_scale,
     const float* bias_ptr, int input_size, int aux_input_size, int num_units,
-    int batch_size, TfLiteFusedActivation activation,
-    int8_t* quantized_input_ptr_batch, int8_t* aux_quantized_input_ptr_batch,
+    int batch_size, int output_batch_leading_dim,
+    TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch,
+    int8_t* aux_quantized_input_ptr_batch,
     int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
     float* hidden_state_ptr_batch, float* output_ptr_batch) {
-  // Output = bias
-  tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size,
-                                        output_ptr_batch);
-
-  // Save quantization and matmul computation for all zero input.
-  if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
-    // Quantize input from float to uint8 + quantization params (scaling
-    // factor).
-    float unused_min, unused_max;
-    // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
-    // whichever is faster.
-    for (int b = 0; b < batch_size; ++b) {
-      const int offset = b * input_size;
-      tensor_utils::SymmetricQuantizeFloats(
-          input_ptr_batch + offset, input_size,
-          quantized_input_ptr_batch + offset, &unused_min, &unused_max,
-          &scaling_factors[b]);
-      scaling_factors[b] *= input_weights_scale;
+  // Since the output batch rows may not be contiguous (output_batch_leading_dim
+  // != n_output), we unroll the batched operations where this is the case.
+  if (output_batch_leading_dim == num_units) {
+    // Output = bias
+    tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size,
+                                          output_ptr_batch);
+
+    // Save quantization and matmul computation for all zero input.
+    if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
+      // Quantize input from float to uint8 + quantization params (scaling
+      // factor).
+      float unused_min, unused_max;
+      // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
+      // whichever is faster.
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * input_size;
+        tensor_utils::SymmetricQuantizeFloats(
+            input_ptr_batch + offset, input_size,
+            quantized_input_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= input_weights_scale;
+      }
+
+      // Output += input * input_weights
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          input_weights_ptr, num_units, input_size, quantized_input_ptr_batch,
+          scaling_factors, batch_size, output_ptr_batch, /*result_stride=*/1);
     }
 
-    // Output += input * input_weights
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        input_weights_ptr, num_units, input_size, quantized_input_ptr_batch,
-        scaling_factors, batch_size, output_ptr_batch, /*result_stride=*/1);
-  }
+    if (aux_input_ptr_batch &&
+        !tensor_utils::IsZeroVector(aux_input_ptr_batch,
+                                    batch_size * aux_input_size)) {
+      float unused_min, unused_max;
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * aux_input_size;
+        tensor_utils::SymmetricQuantizeFloats(
+            aux_input_ptr_batch + offset, aux_input_size,
+            aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= aux_input_weights_scale;
+      }
 
-  if (aux_input_ptr_batch &&
-      !tensor_utils::IsZeroVector(aux_input_ptr_batch,
-                                  batch_size * aux_input_size)) {
-    float unused_min, unused_max;
-    for (int b = 0; b < batch_size; ++b) {
-      const int offset = b * aux_input_size;
-      tensor_utils::SymmetricQuantizeFloats(
-          aux_input_ptr_batch + offset, aux_input_size,
-          aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
-          &scaling_factors[b]);
-      scaling_factors[b] *= aux_input_weights_scale;
+      // Output += aux_input * aux_input_weights
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          aux_input_weights_ptr, num_units, aux_input_size,
+          aux_quantized_input_ptr_batch, scaling_factors, batch_size,
+          output_ptr_batch, /*result_stride=*/1);
     }
 
-    // Output += aux_input * aux_input_weights
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        aux_input_weights_ptr, num_units, aux_input_size,
-        aux_quantized_input_ptr_batch, scaling_factors, batch_size,
-        output_ptr_batch, /*result_stride=*/1);
-  }
+    // Save quantization and matmul computation for all zero input.
+    if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
+                                    batch_size * num_units)) {
+      // Quantize hidden_state
+      float unused_min, unused_max;
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * num_units;
+        tensor_utils::SymmetricQuantizeFloats(
+            hidden_state_ptr_batch + offset, num_units,
+            quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= recurrent_weights_scale;
+      }
 
-  // Save quantization and matmul computation for all zero input.
-  if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
-                                  batch_size * num_units)) {
-    // Quantize hidden_state
-    float unused_min, unused_max;
-    for (int b = 0; b < batch_size; ++b) {
-      const int offset = b * num_units;
-      tensor_utils::SymmetricQuantizeFloats(
-          hidden_state_ptr_batch + offset, num_units,
-          quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max,
-          &scaling_factors[b]);
-      scaling_factors[b] *= recurrent_weights_scale;
+      // Output += recurrent_weights * hidden_state
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          recurrent_weights_ptr, num_units, num_units,
+          quantized_hidden_state_ptr_batch, scaling_factors, batch_size,
+          output_ptr_batch, /*result_stride=*/1);
     }
 
-    // Output += recurrent_weights * hidden_state
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        recurrent_weights_ptr, num_units, num_units,
-        quantized_hidden_state_ptr_batch, scaling_factors, batch_size,
-        output_ptr_batch, /*result_stride=*/1);
-  }
+    // Output = activation(Output) and update hidden_state
+    tensor_utils::ApplyActivationToVector(
+        output_ptr_batch, num_units * batch_size, activation, output_ptr_batch);
+    tensor_utils::CopyVector(output_ptr_batch, num_units * batch_size,
+                             hidden_state_ptr_batch);
+  } else {
+    // Output = bias
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::CopyVector(bias_ptr, num_units,
+                               output_ptr_batch + k * output_batch_leading_dim);
+    }
 
-  // Output = activation(Output) and update hidden_state
-  tensor_utils::ApplyActivationToVector(
-      output_ptr_batch, num_units * batch_size, activation, output_ptr_batch);
-  tensor_utils::VectorBatchVectorAssign(output_ptr_batch, num_units, batch_size,
-                                        hidden_state_ptr_batch);
+    // Save quantization and matmul computation for all zero input.
+    if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) {
+      // Quantize input from float to uint8 + quantization params (scaling
+      // factor).
+      float unused_min, unused_max;
+      // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function)
+      // whichever is faster.
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * input_size;
+        tensor_utils::SymmetricQuantizeFloats(
+            input_ptr_batch + offset, input_size,
+            quantized_input_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= input_weights_scale;
+      }
+
+      // Output += input * input_weights
+      for (int k = 0; k < batch_size; k++) {
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            input_weights_ptr, num_units, input_size,
+            quantized_input_ptr_batch + k * input_size, &scaling_factors[k],
+            /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+            /*result_stride=*/1);
+      }
+    }
+
+    if (aux_input_ptr_batch &&
+        !tensor_utils::IsZeroVector(aux_input_ptr_batch,
+                                    batch_size * aux_input_size)) {
+      float unused_min, unused_max;
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * aux_input_size;
+        tensor_utils::SymmetricQuantizeFloats(
+            aux_input_ptr_batch + offset, aux_input_size,
+            aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= aux_input_weights_scale;
+      }
+
+      // Output += aux_input * aux_input_weights
+      for (int k = 0; k < batch_size; k++) {
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            aux_input_weights_ptr, num_units, aux_input_size,
+            aux_quantized_input_ptr_batch + k * aux_input_size,
+            &scaling_factors[k],
+            /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+            /*result_stride=*/1);
+      }
+    }
+
+    // Save quantization and matmul computation for all zero input.
+    if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch,
+                                    batch_size * num_units)) {
+      // Quantize hidden_state
+      float unused_min, unused_max;
+      for (int b = 0; b < batch_size; ++b) {
+        const int offset = b * num_units;
+        tensor_utils::SymmetricQuantizeFloats(
+            hidden_state_ptr_batch + offset, num_units,
+            quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max,
+            &scaling_factors[b]);
+        scaling_factors[b] *= recurrent_weights_scale;
+      }
+
+      // Output += recurrent_weights * hidden_state
+      for (int k = 0; k < batch_size; k++) {
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            recurrent_weights_ptr, num_units, num_units,
+            quantized_hidden_state_ptr_batch + k * num_units,
+            &scaling_factors[k],
+            /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+            /*result_stride=*/1);
+      }
+    }
+
+    // Output = activation(Output) and update hidden_state
+    for (int k = 0; k < batch_size; k++) {
+      tensor_utils::ApplyActivationToVector(
+          output_ptr_batch + k * output_batch_leading_dim, num_units,
+          activation, output_ptr_batch + k * output_batch_leading_dim);
+      tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim,
+                               num_units,
+                               hidden_state_ptr_batch + k * num_units);
+    }
+  }
 }
 
 }  // namespace kernel_utils
diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
index 74e0a4a53d..0387d753e5 100644
--- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
+++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h
@@ -29,9 +29,17 @@ namespace kernel_utils {
 // The pointers with the suffix "_batch" point to data aligned in batch_major
 // order, and each step processes batch_size many inputs from input_ptr_batch,
 // and updates batch_size many outputs and hidden states.
+//
+// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the
+// output tensor, and in most cases will be equal to num_units. It is usually
+// not when we want to store the RNN output into a slice of the output tensor,
+// e.g. for bidirectional RNNs with merge_outputs. In this case, the batched
+// operations cannot be used since they assume that the batched outputs are
+// contiguous, and we manually loop over the batched outputs.
 void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
                   const float* recurrent_weights_ptr, const float* bias_ptr,
                   int input_size, int num_units, int batch_size,
+                  int output_batch_leading_dim,
                   TfLiteFusedActivation activation,
                   float* hidden_state_ptr_batch, float* output_ptr_batch);
 
@@ -41,7 +49,8 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
                   const float* aux_input_weights_ptr,
                   const float* recurrent_weights_ptr, const float* bias_ptr,
                   int input_size, int aux_input_size, int num_units,
-                  int batch_size, TfLiteFusedActivation activation,
+                  int batch_size, int output_batch_leading_dim,
+                  TfLiteFusedActivation activation,
                   float* hidden_state_ptr_batch, float* output_ptr_batch);
 
 // Performs a quantized RNN batch inference step. Same as above, but for
@@ -54,16 +63,14 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr,
 // batch_size) is used to store the scaling factors of the quantization (used
 // for recovery).
 // {input,recurrent}_weights_scale params are used for dequantization/recovery.
-void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr,
-                  float input_weights_scale,
-                  const int8_t* recurrent_weights_ptr,
-                  float recurrent_weights_scale, const float* bias_ptr,
-                  int input_size, int num_units, int batch_size,
-                  TfLiteFusedActivation activation,
-                  int8_t* quantized_input_ptr_batch,
-                  int8_t* quantized_hidden_state_ptr_batch,
-                  float* scaling_factors, float* hidden_state_ptr_batch,
-                  float* output_ptr_batch);
+void RnnBatchStep(
+    const float* input_ptr_batch, const int8_t* input_weights_ptr,
+    float input_weights_scale, const int8_t* recurrent_weights_ptr,
+    float recurrent_weights_scale, const float* bias_ptr, int input_size,
+    int num_units, int batch_size, int output_batch_leading_dim,
+    TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch,
+    int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
+    float* hidden_state_ptr_batch, float* output_ptr_batch);
 
 void RnnBatchStep(
     const float* input_ptr_batch, const int8_t* input_weights_ptr,
@@ -71,8 +78,9 @@ void RnnBatchStep(
     const int8_t* aux_input_weights_ptr, float aux_input_weights_scale,
     const int8_t* recurrent_weights_ptr, float recurrent_weights_scale,
     const float* bias_ptr, int input_size, int aux_input_size, int num_units,
-    int batch_size, TfLiteFusedActivation activation,
-    int8_t* quantized_input_ptr_batch, int8_t* aux_quantized_input_ptr_batch,
+    int batch_size, int output_batch_leading_dim,
+    TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch,
+    int8_t* aux_quantized_input_ptr_batch,
     int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors,
     float* hidden_state_ptr_batch, float* output_ptr_batch);
 
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index 20a4e30009..f228488c65 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -36,12 +36,20 @@ namespace {
 //  - n_cell: number of cells (or units),
 //  - n_input: the input size,
 //  - n_output: the output size.
+//  - output_batch_leading_dim: the leading dimension of the output buffer.
 //
 // The pointers to the cell and output state and the output are updated.
 //
 // The pointers with the suffix "_batch" point to data aligned in batch_major
 // order, and each step processes batch_size many inputs from input_ptr_batch,
 // and updates batch_size many cell and output states.
+//
+// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the
+// output tensor, and in most cases will be equal to n_output. It is usually not
+// when we want to store the LSTM output into a slice of the output tensor, e.g.
+// for bidirectional LSTMs with merge_outputs. In this case, the batched
+// operations cannot be used since they assume that the batched outputs are
+// contiguous, and we manually loop over the batched outputs.
 inline void LstmStepWithAuxInput(
     const float* input_ptr_batch, const float* input_to_input_weights_ptr,
     const float* input_to_forget_weights_ptr,
@@ -62,7 +70,8 @@ inline void LstmStepWithAuxInput(
     const float* output_gate_bias_ptr, const float* projection_weights_ptr,
     const float* projection_bias_ptr, const TfLiteLSTMParams* params,
     int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
+    int output_batch_leading_dim, float* output_state_ptr,
+    float* cell_state_ptr, float* input_gate_scratch,
     float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
     float* output_ptr_batch) {
   // Since we have already checked that weights are all there or none, we can
@@ -188,29 +197,72 @@ inline void LstmStepWithAuxInput(
   tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
                                          n_batch * n_cell, output_gate_scratch);
 
-  // For each batch: update the projection and output_state.
   const bool use_projection_weight = (projection_weights_ptr != nullptr);
   const bool use_projection_bias = (projection_bias_ptr != nullptr);
-  if (use_projection_weight) {
-    if (use_projection_bias) {
-      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                            n_batch, output_ptr_batch);
+
+  // For each batch: update the projection and output_state. Note that since
+  // the output batch rows may not be contiguous (output_batch_leading_dim !=
+  // n_output), we unroll the batched operations where this is the case.
+  if (output_batch_leading_dim == n_output) {
+    if (use_projection_weight) {
+      if (use_projection_bias) {
+        tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                              n_batch, output_ptr_batch);
+      } else {
+        tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+      }
+      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+          projection_weights_ptr, n_output, n_cell, output_gate_scratch,
+          n_batch, output_ptr_batch, /*result_stride=*/1);
+      if (params->proj_clip > 0.0) {
+        tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                                 params->proj_clip, output_ptr_batch);
+      }
     } else {
-      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
-    }
-    tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-        projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch,
-        output_ptr_batch, /*result_stride=*/1);
-    if (params->proj_clip > 0.0) {
-      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                               params->proj_clip, output_ptr_batch);
+      tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                               output_ptr_batch);
     }
+    tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                             output_state_ptr);
   } else {
-    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                             output_ptr_batch);
+    if (use_projection_weight) {
+      if (use_projection_bias) {
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::CopyVector(
+              projection_bias_ptr, n_output,
+              output_ptr_batch + k * output_batch_leading_dim);
+        }
+      } else {
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::ZeroVector(
+              output_ptr_batch + k * output_batch_leading_dim, n_output);
+        }
+      }
+      for (int k = 0; k < n_batch; k++) {
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            projection_weights_ptr, n_output, n_cell,
+            output_gate_scratch + k * n_cell,
+            /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+            /*result_stride=*/1);
+        if (params->proj_clip > 0.0) {
+          tensor_utils::ClipVector(
+              output_ptr_batch + k * output_batch_leading_dim, n_output,
+              params->proj_clip,
+              output_ptr_batch + k * output_batch_leading_dim);
+        }
+      }
+    } else {
+      for (int k = 0; k < n_batch; k++) {
+        tensor_utils::CopyVector(
+            output_gate_scratch + k * n_output, n_output,
+            output_ptr_batch + k * output_batch_leading_dim);
+      }
+    }
+    for (int k = 0; k < n_batch; k++) {
+      tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim,
+                               n_output, output_state_ptr + k * n_output);
+    }
   }
-  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                           output_state_ptr);
 }
 
 // Same as above but with quantized weight matrices. In detail:
@@ -263,7 +315,7 @@ inline void LstmStepWithAuxInput(
 // Outputs:
 //   output_state_ptr - size 'n_batch * n_output'
 //   cell_state_ptr   - size 'n_batch * n_cell'
-//   output_ptr_batch - size 'n_batch * n_output'
+//   output_ptr_batch - size 'n_batch * output_batch_leading_dim'
 inline void LstmStepWithAuxInput(
     const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr,
     float input_to_input_weights_scale,
@@ -297,13 +349,13 @@ inline void LstmStepWithAuxInput(
     const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
     float projection_weights_scale, const float* projection_bias_ptr,
     const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, float* input_gate_scratch,
-    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
-    float* scaling_factors, float* product_scaling_factors,
-    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
-    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
-    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
-    float* cell_state_ptr, float* output_ptr_batch) {
+    int n_aux_input, int n_output, int output_batch_leading_dim,
+    float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch,
+    float* output_gate_scratch, float* scaling_factors,
+    float* product_scaling_factors, float* recovered_cell_weights,
+    int8_t* quantized_input_ptr_batch, int8_t* quantized_aux_input_ptr_batch,
+    int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr,
+    float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch) {
   // Since we have already checked that weights are all there or none, we
   // can check the existense of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
@@ -536,45 +588,106 @@ inline void LstmStepWithAuxInput(
   tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch,
                                          n_batch * n_cell, output_gate_scratch);
 
-  // For each batch: update the projection and output_state.
   const bool use_projection_weight = (projection_weights_ptr != nullptr);
   const bool use_projection_bias = (projection_bias_ptr != nullptr);
-  if (use_projection_weight) {
-    if (use_projection_bias) {
-      tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
-                                            n_batch, output_ptr_batch);
+
+  // For each batch: update the projection and output_state. Note that since
+  // the output batch rows may not be contiguous (output_batch_leading_dim !=
+  // n_output), we unroll the batched operations where this is the case.
+  if (output_batch_leading_dim == n_output) {
+    if (use_projection_weight) {
+      if (use_projection_bias) {
+        tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output,
+                                              n_batch, output_ptr_batch);
+      } else {
+        tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+      }
+      if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
+        // Save quantization and matmul computation for all zero input.
+        float unused_min, unused_max;
+        for (int b = 0; b < n_batch; ++b) {
+          const int offset = b * n_cell;
+          tensor_utils::SymmetricQuantizeFloats(
+              output_gate_scratch + offset, n_cell,
+              quantized_cell_state_ptr + offset, &unused_min, &unused_max,
+              &scaling_factors[b]);
+        }
+        for (int b = 0; b < n_batch; ++b) {
+          product_scaling_factors[b] =
+              scaling_factors[b] * projection_weights_scale;
+        }
+        tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+            projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr,
+            product_scaling_factors, n_batch, output_ptr_batch,
+            /*result_stride=*/1);
+      }
+      if (params->proj_clip > 0.0) {
+        tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
+                                 params->proj_clip, output_ptr_batch);
+      }
     } else {
-      tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output);
+      tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
+                               output_ptr_batch);
     }
-    if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
-      // Save quantization and matmul computation for all zero input.
-      float unused_min, unused_max;
-      for (int b = 0; b < n_batch; ++b) {
-        const int offset = b * n_cell;
-        tensor_utils::SymmetricQuantizeFloats(
-            output_gate_scratch + offset, n_cell,
-            quantized_cell_state_ptr + offset, &unused_min, &unused_max,
-            &scaling_factors[b]);
+    tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
+                             output_state_ptr);
+  } else {
+    if (use_projection_weight) {
+      if (use_projection_bias) {
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::CopyVector(
+              projection_bias_ptr, n_output,
+              output_ptr_batch + k * output_batch_leading_dim);
+        }
+      } else {
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::ZeroVector(
+              output_ptr_batch + k * output_batch_leading_dim, n_output);
+        }
       }
-      for (int b = 0; b < n_batch; ++b) {
-        product_scaling_factors[b] =
-            scaling_factors[b] * projection_weights_scale;
+      if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
+        // Save quantization and matmul computation for all zero input.
+        float unused_min, unused_max;
+        for (int b = 0; b < n_batch; ++b) {
+          const int offset = b * n_cell;
+          tensor_utils::SymmetricQuantizeFloats(
+              output_gate_scratch + offset, n_cell,
+              quantized_cell_state_ptr + offset, &unused_min, &unused_max,
+              &scaling_factors[b]);
+        }
+        for (int b = 0; b < n_batch; ++b) {
+          product_scaling_factors[b] =
+              scaling_factors[b] * projection_weights_scale;
+        }
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+              projection_weights_ptr, n_output, n_cell,
+              quantized_cell_state_ptr + k * n_cell,
+              &product_scaling_factors[k],
+              /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim,
+              /*result_stride=*/1);
+        }
+      }
+      if (params->proj_clip > 0.0) {
+        for (int k = 0; k < n_batch; k++) {
+          tensor_utils::ClipVector(
+              output_ptr_batch + k * output_batch_leading_dim, n_output,
+              params->proj_clip,
+              output_ptr_batch + k * output_batch_leading_dim);
+        }
+      }
+    } else {
+      for (int k = 0; k < n_batch; k++) {
+        tensor_utils::CopyVector(
+            output_gate_scratch + k * n_output, n_output,
+            output_ptr_batch + k * output_batch_leading_dim);
       }
-      tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-          projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr,
-          product_scaling_factors, n_batch, output_ptr_batch,
-          /*result_stride=*/1);
     }
-    if (params->proj_clip > 0.0) {
-      tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output,
-                               params->proj_clip, output_ptr_batch);
+    for (int k = 0; k < n_batch; k++) {
+      tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim,
+                               n_output, output_state_ptr + k * n_output);
     }
-  } else {
-    tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output,
-                             output_ptr_batch);
   }
-  tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output,
-                           output_state_ptr);
 }
 }  // namespace
 
@@ -664,8 +777,10 @@ TfLiteStatus EvalFloat(
   }
 
   // Loop through the sequence.
+  const int output_batch_leading_dim =
+      output->dims->data[output->dims->size - 1];
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  const int output_step = n_batch * output_batch_leading_dim;
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
@@ -685,9 +800,9 @@ TfLiteStatus EvalFloat(
         input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
         output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
         params, n_batch, n_cell, n_input, aux_input_size, n_output,
-        activation_state->data.f, cell_state->data.f, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        output_ptr_time);
+        output_batch_leading_dim, activation_state->data.f, cell_state->data.f,
+        input_gate_scratch, forget_gate_scratch, cell_scratch,
+        output_gate_scratch, output_ptr_time);
   }
   return kTfLiteOk;
 }
@@ -868,8 +983,10 @@ TfLiteStatus EvalHybrid(
   }
 
   // Feed the sequence into the LSTM step-by-step.
+  const int output_batch_leading_dim =
+      output->dims->data[output->dims->size - 1];
   const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output->dims->data[output->dims->size - 1];
+  const int output_step = n_batch * output_batch_leading_dim;
   for (int t = 0; t < max_time; t++) {
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
@@ -895,9 +1012,9 @@ TfLiteStatus EvalHybrid(
         cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
         cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
         projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, aux_input_size, n_output, input_gate_scratch,
-        forget_gate_scratch, cell_scratch, output_gate_scratch,
-        scaling_factors_ptr, prod_scaling_factors_ptr,
+        n_input, aux_input_size, n_output, output_batch_leading_dim,
+        input_gate_scratch, forget_gate_scratch, cell_scratch,
+        output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
         recovered_cell_weights_ptr, quantized_input_ptr,
         quantized_aux_input_ptr, quantized_output_state_ptr,
         quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
index 744ee7c109..354b837b3e 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
@@ -166,10 +166,10 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
           input->data.f + s * input_size * batch_size;
       float* output_ptr_batch = output->data.f + s * num_units * batch_size;
 
-      kernel_utils::RnnBatchStep(input_ptr_batch, input_weights_ptr,
-                                 recurrent_weights_ptr, bias_ptr, input_size,
-                                 num_units, batch_size, params->activation,
-                                 hidden_state_ptr_batch, output_ptr_batch);
+      kernel_utils::RnnBatchStep(
+          input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr,
+          input_size, num_units, batch_size, num_units, params->activation,
+          hidden_state_ptr_batch, output_ptr_batch);
     }
   } else {
     // For each batch
@@ -185,8 +185,8 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input,
 
         kernel_utils::RnnBatchStep(
             input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr,
-            input_size, num_units, /*batch_size=*/1, params->activation,
-            hidden_state_ptr_batch, output_ptr_batch);
+            input_size, num_units, /*batch_size=*/1, num_units,
+            params->activation, hidden_state_ptr_batch, output_ptr_batch);
       }
     }
   }
@@ -237,8 +237,8 @@ TfLiteStatus EvalHybrid(
       kernel_utils::RnnBatchStep(
           input_ptr_batch, input_weights_ptr, input_weights_scale,
           recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size,
-          num_units, batch_size, params->activation, quantized_input_ptr,
-          quantized_hidden_state_ptr, scaling_factors_ptr,
+          num_units, batch_size, num_units, params->activation,
+          quantized_input_ptr, quantized_hidden_state_ptr, scaling_factors_ptr,
           hidden_state_ptr_batch, output_ptr_batch);
     }
   } else {
@@ -256,8 +256,8 @@ TfLiteStatus EvalHybrid(
         kernel_utils::RnnBatchStep(
             input_ptr_batch, input_weights_ptr, input_weights_scale,
             recurrent_weights_ptr, recurrent_weights_scale, bias_ptr,
-            input_size, num_units, /*batch_size=*/1, params->activation,
-            quantized_input_ptr, quantized_hidden_state_ptr,
+            input_size, num_units, /*batch_size=*/1, num_units,
+            params->activation, quantized_input_ptr, quantized_hidden_state_ptr,
             scaling_factors_ptr, hidden_state_ptr_batch, output_ptr_batch);
       }
     }
-- 
GitLab


From bfdfd1617d712a0b78f12a146cf9f2f4df6d75f7 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Thu, 11 Oct 2018 18:39:13 -0700
Subject: [PATCH 0839/1085] TPUEstimator: Initialize dataset iterators in
 parallel.

PiperOrigin-RevId: 216796462
---
 tensorflow/contrib/tpu/BUILD                  |  1 +
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 56 +++++++++----------
 .../python/tpu/tpu_estimator_signals_test.py  | 25 ++++-----
 tensorflow/python/estimator/util.py           | 14 +++++
 4 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 8c36d5a297..acdc47482b 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -86,6 +86,7 @@ py_library(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
         "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:util",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 23c30e3f06..a00acdcbce 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -471,8 +471,10 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
   def after_create_session(self, session, coord):
     logging.info('Init TPU system')
+    start = time.time()
     session.run(self._init_ops,
                 options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
+    logging.info('Initialized TPU in %d seconds', time.time() - start)
 
     self._infeed_controller = self._create_infeed_controller(
         name='InfeedController', target=self._run_infeed, args=(session,))
@@ -700,7 +702,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
   captured_infeed_queue = _CapturedObject()
 
-  hooks = []
+  dataset_initializer = None
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
@@ -722,7 +724,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
           add_padding=True)
 
     if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
+      dataset_initializer = inputs.dataset_initializer()
 
     tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
 
@@ -768,14 +770,14 @@ def generate_per_host_enqueue_ops_fn_for_host(
             'signals': signals,
         }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 def generate_per_host_v2_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder, device, host_id):
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
   captured_infeed_queue = _CapturedObject()
-  hooks = []
+  dataset_initializer = None
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
@@ -796,7 +798,7 @@ def generate_per_host_v2_enqueue_ops_fn_for_host(
           add_padding=True,
           num_invocations_per_step=ctx.num_of_replicas_per_host)
 
-    hooks.append(inputs.dataset_initializer_hook())
+    dataset_initializer = inputs.dataset_initializer()
     tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
 
   def enqueue_ops_fn():
@@ -857,14 +859,14 @@ def generate_per_host_v2_enqueue_ops_fn_for_host(
           'signals': signals,
       }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
                                       num_hosts):
   """Generates infeed enqueue ops for one input_fn on all the hosts."""
   captured_infeed_queue = _CapturedObject()
-  hooks = []
+  dataset_initializer = None
   device_0 = ctx.tpu_host_placement_function(host_id=0)
   with ops.device(device_0):
     user_context = tpu_context.TPUContext(
@@ -884,7 +886,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
           add_padding=True)
 
     if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
+      dataset_initializer = inputs.dataset_initializer()
     num_replicas_per_host = ctx.num_of_replicas_per_host
 
   def tpu_ordinal_function_impl(replica_id):
@@ -935,7 +937,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
           'signals': signals,
       }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 class _InputPipeline(object):
@@ -1139,7 +1141,7 @@ class _InputPipeline(object):
     """Deploys the input pipeline and record input structure."""
     enqueue_ops = []
     infeed_queues = []
-    all_hooks = []
+    all_dataset_initializers = []
     num_hosts = self._ctx.num_hosts
     tpu_host_placement_fn = self._ctx.tpu_host_placement_function
 
@@ -1171,12 +1173,12 @@ class _InputPipeline(object):
     elif self._ctx.is_input_broadcast_with_iterators():
       # Only calls input_fn in host 0.
       host_device = tpu_host_placement_fn(host_id=0)
-      enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+      enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
           generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn,
                                             self._inputs_structure_recorder,
                                             num_hosts))
-      all_hooks.extend(hooks)
-      if is_dataset:
+      if dataset_initializer:
+        all_dataset_initializers.append(dataset_initializer)
         run_infeed_loop_on_coordinator = False
         wrap_fn = (
             _wrap_computation_in_while_loop
@@ -1192,17 +1194,16 @@ class _InputPipeline(object):
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
             if self._ctx.is_input_per_host_with_iterators():
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
                   generate_per_host_v2_enqueue_ops_fn_for_host(
                       self._ctx, self._input_fn,
                       self._inputs_structure_recorder, host_device, host_id))
             else:
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
                   generate_per_host_enqueue_ops_fn_for_host(
                       self._ctx, self._input_fn,
                       self._inputs_structure_recorder, self._batch_axis,
                       host_device, host_id))
-            all_hooks.extend(hooks)
 
             # NOTE(xiejw): We dispatch here based on the return type of the
             # users `input_fn`.
@@ -1216,7 +1217,8 @@ class _InputPipeline(object):
             # handled in TF control flow properly. In this case, we will use
             # python loop to enqueue the data into TPU system.  This may be
             # slow compared to the previous case.
-            if is_dataset:
+            if dataset_initializer:
+              all_dataset_initializers.append(dataset_initializer)
               run_infeed_loop_on_coordinator = False
               wrap_fn = (
                   _wrap_computation_in_while_loop
@@ -1231,7 +1233,9 @@ class _InputPipeline(object):
     # dequeue is dtypes and types. So, any one can be used. Here, grab the
     # first one.
     self._infeed_queue = infeed_queues[0]
-    return enqueue_ops, all_hooks, run_infeed_loop_on_coordinator
+    return enqueue_ops, [
+        estimator_util.MultiHostDatasetInitializerHook(all_dataset_initializers)
+    ], run_infeed_loop_on_coordinator
 
   def _validate_input_pipeline(self):
     """Validates the input pipeline.
@@ -3052,23 +3056,19 @@ class _Inputs(object):
     """Returns True if the return value from input_fn is Dataset."""
     return self._dataset is not None
 
-  def dataset_initializer_hook(self):
-    """Returns a `SessionRunHook` to initialize this dataset.
+  def dataset_initializer(self):
+    """Returns the dataset's initializer.
 
-    This must be called before `features_and_labels`.
+    The initializer must be run before calling `features_and_labels`.
     """
-    iterator = self._dataset.make_initializable_iterator()
-    # pylint: disable=protected-access
-    hook = estimator_util._DatasetInitializerHook(iterator)
-    # pylint: enable=protected-access
-    self._iterator = iterator
-    return hook
+    self._iterator = self._dataset.make_initializable_iterator()
+    return self._iterator.initializer
 
   def features_and_labels(self):
     """Gets `features` and `labels`."""
     if self.is_dataset:
       if self._iterator is None:
-        raise RuntimeError('Internal error: Must call dataset_initializer_hook '
+        raise RuntimeError('Internal error: Must run dataset_initializer '
                            'before calling features_and_labels(). Please file '
                            'a bug!')
       return _Inputs._parse_inputs(self._iterator.get_next())
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
index bd530fdc3a..3786e52b94 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
@@ -100,7 +100,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase):
     with ops.Graph().as_default():
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -108,8 +108,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase):
       self.assertIsNone(features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -143,7 +142,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -151,8 +150,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       self.assertEqual(batch_size, features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -187,7 +185,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, labels = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -195,8 +193,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       self.assertEqual(batch_size, features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         evaluated_features, evaluated_labels, evaluated_signals = (
             sess.run([features, labels, signals]))
@@ -255,7 +252,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -264,8 +261,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
               features, signals))
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([sliced_features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -297,7 +293,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(
           dataset, batch_size, add_padding=True, num_invocations_per_step=2)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -305,8 +301,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
           tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals))
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([sliced_features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index fb110c4b7b..bc621f948d 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -135,6 +135,20 @@ class _DatasetInitializerHook(training.SessionRunHook):
     session.run(self._initializer)
 
 
+class MultiHostDatasetInitializerHook(training.SessionRunHook):
+  """Creates a SessionRunHook that initializes all passed iterators."""
+
+  def __init__(self, dataset_initializers):
+    self._initializers = dataset_initializers
+
+  def after_create_session(self, session, coord):
+    del coord
+    start = time.time()
+    session.run(self._initializers)
+    logging.info('Initialized dataset iterators in %d seconds',
+                 time.time() - start)
+
+
 class StrategyInitFinalizeHook(training.SessionRunHook):
   """Creates a SessionRunHook that initializes and shutsdown devices."""
 
-- 
GitLab


From 5408fbd7f0be820a46d0d245899ef63d721ed96e Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Thu, 11 Oct 2018 18:58:48 -0700
Subject: [PATCH 0840/1085] [XLA:GPU] Adding a test case for Scatter where GPU
 implementation fails.

PiperOrigin-RevId: 216798034
---
 tensorflow/compiler/xla/tests/scatter_test.cc | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc
index d0cb93befa..1854224dff 100644
--- a/tensorflow/compiler/xla/tests/scatter_test.cc
+++ b/tensorflow/compiler/xla/tests/scatter_test.cc
@@ -129,6 +129,38 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
+// TODO(b/117627031): fails on GPU on 2018-10-11.
+XLA_TEST_F(ScatterTest, DISABLED_ON_GPU(SimpleR4)) {
+  const char* hlo_text = R"(
+HloModule SimpleR4
+
+add_f32 (lhs: f32[], rhs: f32[]) -> f32[] {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(f32[] lhs, f32[] rhs)
+}
+
+ENTRY main {
+  operand = f32[1,2,2,1] parameter(0)
+  indices = s32[1,3] parameter(1)
+  updates = f32[1,2,2,1] parameter(2)
+  ROOT scatter = f32[1,2,2,1] scatter(operand, indices, updates),
+      to_apply=add_f32,
+      update_window_dims={1,2,3},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0, 2, 1},
+      index_vector_dim=1
+}
+)";
+
+  Literal operand =
+      LiteralUtil::CreateR4<float>({{{{0.f}, {0.f}}, {{0.f}, {0.f}}}});
+  Literal updates =
+      LiteralUtil::CreateR4<float>({{{{0.12}, {0.28}}, {{0.018}, {0.42}}}});
+  Literal scatter_indices = LiteralUtil::CreateR2<int32>({{0, 0, 0}});
+  RunTest(hlo_text, &operand, &scatter_indices, &updates);
+}
+
 XLA_TEST_F(ScatterTest, TensorFlowScatter_Add) {
   const string hlo_text = R"(
 HloModule TensorFlowScatter_Add
-- 
GitLab


From f9172aae8aff2b0bbfb290097546b524859a7934 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Thu, 11 Oct 2018 18:59:45 -0700
Subject: [PATCH 0841/1085] LSTMBlockCell Fp16 python change

PiperOrigin-RevId: 216798109
---
 tensorflow/contrib/rnn/python/ops/lstm_ops.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index f645165efe..f2975b9806 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -346,6 +346,7 @@ class LSTMBlockCell(LayerRNNCell):
                forget_bias=1.0,
                cell_clip=None,
                use_peephole=False,
+               dtype=None,
                reuse=None,
                name="lstm_cell"):
     """Initialize the basic LSTM cell.
@@ -355,6 +356,7 @@ class LSTMBlockCell(LayerRNNCell):
       forget_bias: float, The bias added to forget gates (see above).
       cell_clip: An optional `float`. Defaults to `-1` (no clipping).
       use_peephole: Whether to use peephole connections or not.
+      dtype: the variable dtype of this layer. Default to tf.float32.
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
         given variables, an error is raised.
@@ -366,7 +368,7 @@ class LSTMBlockCell(LayerRNNCell):
       When restoring from CudnnLSTM-trained checkpoints, must use
       CudnnCompatibleLSTMBlockCell instead.
     """
-    super(LSTMBlockCell, self).__init__(_reuse=reuse, name=name)
+    super(LSTMBlockCell, self).__init__(_reuse=reuse, dtype=dtype, name=name)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._use_peephole = use_peephole
@@ -417,7 +419,7 @@ class LSTMBlockCell(LayerRNNCell):
       wcf = self._w_f_diag
       wco = self._w_o_diag
     else:
-      wci = wcf = wco = array_ops.zeros([self._num_units])
+      wci = wcf = wco = array_ops.zeros([self._num_units], dtype=self.dtype)
 
     (cs_prev, h_prev) = state
     (_, cs, _, _, _, _, h) = _lstm_block_cell(
@@ -603,7 +605,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
     Args:
       num_units: int, The number of units in the LSTM cell.
       forget_bias: float, The bias added to forget gates (see above).
-      cell_clip: clip the cell to this value. Default is no cell clipping.
+      cell_clip: clip the cell to this value. Defaults is no cell clipping.
       use_peephole: Whether to use peephole connections or not.
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
-- 
GitLab


From f921f0b26b9b41b735738d3d0c23db39e8e91410 Mon Sep 17 00:00:00 2001
From: Anjali Sridhar <anjalisridhar@google.com>
Date: Thu, 11 Oct 2018 19:04:19 -0700
Subject: [PATCH 0842/1085] Removed TODOs that are obsolete.

PiperOrigin-RevId: 216798710
---
 tensorflow/python/keras/engine/training.py | 26 +++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 3cb9316399..275eb0efda 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -802,17 +802,17 @@ class Model(Network):
 
     # Validates `steps` argument right at the beginning since we use it to
     # construct the dataset object.
-    # TODO(anjalisridhar): This may not be a valid error since we now accept
-    # numpy array inputs. We still want to assert that we have a populated steps
-    # parameter.
-    if check_steps:
-      if steps is None:
-        raise ValueError('When using DistributionStrategy, '
-                         'you should specify the `{steps_name}` argument.'
-                         .format(steps_name=steps_name))
+    # TODO(anjalisridhar): Remove this check once we refactor the
+    # _standardize_user_data code path. This check is already present elsewhere
+    # in the codebase.
+    if check_steps and isinstance(x, dataset_ops.Dataset) and steps is None:
+      raise ValueError('When using Datasets as input, '
+                       'you should specify the `{steps_name}` argument.'
+                       .format(steps_name=steps_name))
 
     first_x_value = nest.flatten(x)[0]
     if isinstance(first_x_value, np.ndarray):
+      assert steps is not None
       x_shape = first_x_value.shape
       if batch_size is None:
         batch_size = distributed_training_utils.get_batch_size(
@@ -827,8 +827,11 @@ class Model(Network):
             self._distribution_strategy, y)
 
         x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
-        # TODO(anjalisridhar): What should the buffer size be?
-        x = x.shuffle(10000)
+        # 1024 is a good buffer size since it is much larger than the average
+        # batch size provided by the user and provides sufficient randomness.
+        # One thing to keep in mind is the memory usage based on the size of
+        # each sample.
+        x = x.shuffle(1024)
         x = x.repeat()
         x = x.batch(batch_size, drop_remainder=drop_remainder)
         y = None
@@ -841,9 +844,6 @@ class Model(Network):
         x = x.repeat()
         x = x.batch(batch_size, drop_remainder=drop_remainder)
 
-    # TODO(anjalisridhar): Can we use the iterator and getnext op cache?
-    # We require users to pass Datasets since we distribute the dataset across
-    # multiple devices.
     assert isinstance(x, dataset_ops.Dataset)
 
     # TODO(anjalisridhar): We want distribute_dataset() to accept a Dataset or a
-- 
GitLab


From 8fbed5f43ccac490088679c86d6989afae7d9e1a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 19:18:29 -0700
Subject: [PATCH 0843/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216799716
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 27 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 27 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 713ca5a651..80d4528cd7 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -21906,6 +21906,33 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "ExperimentalSleepDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "sleep_microseconds"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 78f796fb7f..d8814355fe 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -10413,6 +10413,33 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "ExperimentalSleepDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "sleep_microseconds"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+}
 op {
   name: "ExperimentalThreadPoolDataset"
   input_arg {
-- 
GitLab


From e37d357b4141c7c2b1f4ea3d3fe7ef6727a3e161 Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Thu, 11 Oct 2018 19:28:40 -0700
Subject: [PATCH 0844/1085] call compat.as_str

---
 tensorflow/python/framework/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 52ad54a44e..e6ffc3d19c 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1606,7 +1606,7 @@ def _create_c_op(graph, node_def, inputs, control_inputs):
                                   compat.as_str(node_def.op),
                                   compat.as_str(node_def.name))
   if node_def.device:
-    c_api.TF_SetDevice(op_desc, node_def.device)
+    c_api.TF_SetDevice(op_desc, compat.as_str(node_def.device))
   # Add inputs
   for op_input in inputs:
     if isinstance(op_input, (list, tuple)):
-- 
GitLab


From 856cf9927774b4e348435c784726f31701931415 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 19:44:43 -0700
Subject: [PATCH 0845/1085] TPUEstimator: Initialize dataset iterators in
 parallel. (#22919)

PiperOrigin-RevId: 216796462
---
 tensorflow/contrib/tpu/BUILD                  |  1 +
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 56 +++++++++----------
 .../python/tpu/tpu_estimator_signals_test.py  | 25 ++++-----
 tensorflow/python/estimator/util.py           | 14 +++++
 4 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 8c36d5a297..acdc47482b 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -86,6 +86,7 @@ py_library(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
         "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/estimator:util",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 0f6866aa1a..bc0d86a3a5 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -456,8 +456,10 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
   def after_create_session(self, session, coord):
     logging.info('Init TPU system')
+    start = time.time()
     session.run(self._init_ops,
                 options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
+    logging.info('Initialized TPU in %d seconds', time.time() - start)
 
     self._infeed_controller = self._create_infeed_controller(
         name='InfeedController', target=self._run_infeed, args=(session,))
@@ -679,7 +681,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
   captured_infeed_queue = _CapturedObject()
 
-  hooks = []
+  dataset_initializer = None
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
@@ -701,7 +703,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
           add_padding=True)
 
     if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
+      dataset_initializer = inputs.dataset_initializer()
 
     tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
 
@@ -747,14 +749,14 @@ def generate_per_host_enqueue_ops_fn_for_host(
             'signals': signals,
         }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 def generate_per_host_v2_enqueue_ops_fn_for_host(
     ctx, input_fn, inputs_structure_recorder, device, host_id):
   """Generates infeed enqueue ops for per-host input_fn on a single host."""
   captured_infeed_queue = _CapturedObject()
-  hooks = []
+  dataset_initializer = None
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
@@ -775,7 +777,7 @@ def generate_per_host_v2_enqueue_ops_fn_for_host(
           add_padding=True,
           num_invocations_per_step=ctx.num_of_replicas_per_host)
 
-    hooks.append(inputs.dataset_initializer_hook())
+    dataset_initializer = inputs.dataset_initializer()
     tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id)
 
   def enqueue_ops_fn():
@@ -836,14 +838,14 @@ def generate_per_host_v2_enqueue_ops_fn_for_host(
           'signals': signals,
       }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
                                       num_hosts):
   """Generates infeed enqueue ops for one input_fn on all the hosts."""
   captured_infeed_queue = _CapturedObject()
-  hooks = []
+  dataset_initializer = None
   device_0 = ctx.tpu_host_placement_function(host_id=0)
   with ops.device(device_0):
     user_context = tpu_context.TPUContext(
@@ -863,7 +865,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
           add_padding=True)
 
     if is_dataset:
-      hooks.append(inputs.dataset_initializer_hook())
+      dataset_initializer = inputs.dataset_initializer()
     num_replicas_per_host = ctx.num_of_replicas_per_host
 
   def tpu_ordinal_function_impl(replica_id):
@@ -914,7 +916,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder,
           'signals': signals,
       }
 
-  return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset
+  return enqueue_ops_fn, captured_infeed_queue, dataset_initializer
 
 
 class _InputPipeline(object):
@@ -1118,7 +1120,7 @@ class _InputPipeline(object):
     """Deploys the input pipeline and record input structure."""
     enqueue_ops = []
     infeed_queues = []
-    all_hooks = []
+    all_dataset_initializers = []
     num_hosts = self._ctx.num_hosts
     tpu_host_placement_fn = self._ctx.tpu_host_placement_function
 
@@ -1150,12 +1152,12 @@ class _InputPipeline(object):
     elif self._ctx.is_input_broadcast_with_iterators():
       # Only calls input_fn in host 0.
       host_device = tpu_host_placement_fn(host_id=0)
-      enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+      enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
           generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn,
                                             self._inputs_structure_recorder,
                                             num_hosts))
-      all_hooks.extend(hooks)
-      if is_dataset:
+      if dataset_initializer:
+        all_dataset_initializers.append(dataset_initializer)
         run_infeed_loop_on_coordinator = False
         wrap_fn = (
             _wrap_computation_in_while_loop
@@ -1171,17 +1173,16 @@ class _InputPipeline(object):
         with ops.device(host_device):
           with ops.name_scope('input_pipeline_task%d' % (host_id)):
             if self._ctx.is_input_per_host_with_iterators():
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
                   generate_per_host_v2_enqueue_ops_fn_for_host(
                       self._ctx, self._input_fn,
                       self._inputs_structure_recorder, host_device, host_id))
             else:
-              enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = (
+              enqueue_ops_fn, captured_infeed_queue, dataset_initializer = (
                   generate_per_host_enqueue_ops_fn_for_host(
                       self._ctx, self._input_fn,
                       self._inputs_structure_recorder, self._batch_axis,
                       host_device, host_id))
-            all_hooks.extend(hooks)
 
             # NOTE(xiejw): We dispatch here based on the return type of the
             # users `input_fn`.
@@ -1195,7 +1196,8 @@ class _InputPipeline(object):
             # handled in TF control flow properly. In this case, we will use
             # python loop to enqueue the data into TPU system.  This may be
             # slow compared to the previous case.
-            if is_dataset:
+            if dataset_initializer:
+              all_dataset_initializers.append(dataset_initializer)
               run_infeed_loop_on_coordinator = False
               wrap_fn = (
                   _wrap_computation_in_while_loop
@@ -1210,7 +1212,9 @@ class _InputPipeline(object):
     # dequeue is dtypes and types. So, any one can be used. Here, grab the
     # first one.
     self._infeed_queue = infeed_queues[0]
-    return enqueue_ops, all_hooks, run_infeed_loop_on_coordinator
+    return enqueue_ops, [
+        estimator_util.MultiHostDatasetInitializerHook(all_dataset_initializers)
+    ], run_infeed_loop_on_coordinator
 
   def _validate_input_pipeline(self):
     """Validates the input pipeline.
@@ -3031,23 +3035,19 @@ class _Inputs(object):
     """Returns True if the return value from input_fn is Dataset."""
     return self._dataset is not None
 
-  def dataset_initializer_hook(self):
-    """Returns a `SessionRunHook` to initialize this dataset.
+  def dataset_initializer(self):
+    """Returns the dataset's initializer.
 
-    This must be called before `features_and_labels`.
+    The initializer must be run before calling `features_and_labels`.
     """
-    iterator = self._dataset.make_initializable_iterator()
-    # pylint: disable=protected-access
-    hook = estimator_util._DatasetInitializerHook(iterator)
-    # pylint: enable=protected-access
-    self._iterator = iterator
-    return hook
+    self._iterator = self._dataset.make_initializable_iterator()
+    return self._iterator.initializer
 
   def features_and_labels(self):
     """Gets `features` and `labels`."""
     if self.is_dataset:
       if self._iterator is None:
-        raise RuntimeError('Internal error: Must call dataset_initializer_hook '
+        raise RuntimeError('Internal error: Must run dataset_initializer '
                            'before calling features_and_labels(). Please file '
                            'a bug!')
       return _Inputs._parse_inputs(self._iterator.get_next())
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
index bd530fdc3a..3786e52b94 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
@@ -100,7 +100,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase):
     with ops.Graph().as_default():
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -108,8 +108,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase):
       self.assertIsNone(features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -143,7 +142,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -151,8 +150,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       self.assertEqual(batch_size, features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -187,7 +185,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, labels = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -195,8 +193,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       self.assertEqual(batch_size, features['a'].shape.as_list()[0])
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         evaluated_features, evaluated_labels, evaluated_signals = (
             sess.run([features, labels, signals]))
@@ -255,7 +252,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size,
                                                         add_padding=True)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -264,8 +261,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
               features, signals))
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([sliced_features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
@@ -297,7 +293,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
       dataset = input_fn(params)
       inputs = tpu_estimator._InputsWithStoppingSignals(
           dataset, batch_size, add_padding=True, num_invocations_per_step=2)
-      hook = inputs.dataset_initializer_hook()
+      dataset_initializer = inputs.dataset_initializer()
       features, _ = inputs.features_and_labels()
       signals = inputs.signals()
 
@@ -305,8 +301,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase):
           tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals))
 
       with session.Session() as sess:
-        hook.begin()
-        hook.after_create_session(sess, coord=None)
+        sess.run(dataset_initializer)
 
         result, evaluated_signals = sess.run([sliced_features, signals])
         self.assertAllEqual(a[:batch_size], result['a'])
diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py
index fb110c4b7b..bc621f948d 100644
--- a/tensorflow/python/estimator/util.py
+++ b/tensorflow/python/estimator/util.py
@@ -135,6 +135,20 @@ class _DatasetInitializerHook(training.SessionRunHook):
     session.run(self._initializer)
 
 
+class MultiHostDatasetInitializerHook(training.SessionRunHook):
+  """Creates a SessionRunHook that initializes all passed iterators."""
+
+  def __init__(self, dataset_initializers):
+    self._initializers = dataset_initializers
+
+  def after_create_session(self, session, coord):
+    del coord
+    start = time.time()
+    session.run(self._initializers)
+    logging.info('Initialized dataset iterators in %d seconds',
+                 time.time() - start)
+
+
 class StrategyInitFinalizeHook(training.SessionRunHook):
   """Creates a SessionRunHook that initializes and shutsdown devices."""
 
-- 
GitLab


From 1d02ebdb3062ebacfa3b1a4bef5d93214e6ef6a5 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 21:25:39 -0700
Subject: [PATCH 0846/1085] r1.12-rc1 cherry-pick request: Make sure that all
 operands and outputs of Sort have the same layout. (#22922)

* [XLA] Migrate from gtl::FlatSet to absl::flat_hash_set

PiperOrigin-RevId: 215324035

* [XLA] Modify the function that determines whether an instruction can change
layout so that it can be used by the HLO verifier.

Change the function to a static member function of the LayoutAssignment class.

Add an std::function member to LayoutAssignment to store the function object
passed down from the backend compiler class and use it to decide whether an
instruction can change layouts.

Fix affected test cases.

PiperOrigin-RevId: 215515611

* Make sure that all operands and outputs of Sort have the same layout.

Also fix the DotLayout test, it would pass even when commenting out the dot specific logic in GpuLayoutAssignment.

PiperOrigin-RevId: 216726796
---
 tensorflow/compiler/jit/BUILD                 |  2 +
 tensorflow/compiler/jit/deadness_analysis.cc  | 10 ++--
 .../jit/encapsulate_subgraphs_pass.cc         |  7 +--
 .../jit/encapsulate_xla_computations_pass.cc  | 10 ++--
 .../compiler/jit/mark_for_compilation_pass.cc |  6 +--
 .../compiler/jit/partially_decluster_pass.cc  |  7 +--
 .../jit/resource_operation_safety_analysis.cc |  4 +-
 tensorflow/compiler/tests/BUILD               |  1 +
 tensorflow/compiler/tests/randomized_tests.cc | 14 +++---
 tensorflow/compiler/xla/client/BUILD          |  1 +
 tensorflow/compiler/xla/client/xla_builder.cc |  4 +-
 tensorflow/compiler/xla/client/xla_builder.h  |  4 +-
 tensorflow/compiler/xla/service/BUILD         | 27 +++++++++++
 .../xla/service/bfloat16_propagation.cc       |  9 ++--
 .../xla/service/bfloat16_propagation.h        | 11 +++--
 .../compiler/xla/service/buffer_assignment.cc | 48 ++++++++++---------
 .../compiler/xla/service/buffer_assignment.h  | 22 ++++-----
 .../compiler/xla/service/buffer_liveness.h    |  4 +-
 .../xla/service/buffer_value_containers.h     |  4 +-
 tensorflow/compiler/xla/service/call_graph.cc |  9 ++--
 tensorflow/compiler/xla/service/call_graph.h  | 10 ++--
 .../compiler/xla/service/copy_insertion.cc    |  6 +--
 tensorflow/compiler/xla/service/cpu/BUILD     |  1 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  3 +-
 .../xla/service/cpu/cpu_layout_assignment.h   |  5 +-
 .../service/cpu/cpu_layout_assignment_test.cc | 10 ++--
 .../compiler/xla/service/cpu/ir_emitter.cc    |  8 ++--
 .../xla/service/cpu/tests/cpu_noalias_test.cc |  2 +-
 tensorflow/compiler/xla/service/gpu/BUILD     |  3 ++
 .../xla/service/gpu/gpu_copy_insertion.cc     |  2 +-
 .../xla/service/gpu/gpu_layout_assignment.cc  | 26 ++++++++++
 .../xla/service/gpu/gpu_layout_assignment.h   |  5 +-
 .../service/gpu/gpu_layout_assignment_test.cc | 48 ++++++++++++++++---
 .../xla/service/gpu/instruction_fusion.cc     |  5 +-
 .../xla/service/gpu/multi_output_fusion.cc    |  6 +--
 .../xla/service/gpu/nvptx_compiler.cc         |  3 +-
 .../compiler/xla/service/heap_simulator.cc    | 13 ++---
 .../compiler/xla/service/heap_simulator.h     |  6 +--
 .../xla/service/hlo_alias_analysis.cc         |  9 ++--
 tensorflow/compiler/xla/service/hlo_buffer.cc |  2 +-
 .../compiler/xla/service/hlo_computation.cc   | 11 ++---
 .../compiler/xla/service/hlo_computation.h    |  2 +-
 tensorflow/compiler/xla/service/hlo_cse.cc    |  6 +--
 .../xla/service/hlo_dataflow_analysis.cc      |  9 ++--
 .../compiler/xla/service/hlo_domain_map.cc    |  3 +-
 .../compiler/xla/service/hlo_domain_map.h     |  4 +-
 .../xla/service/hlo_domain_metadata.h         |  8 ++--
 .../compiler/xla/service/hlo_instruction.cc   |  4 +-
 .../xla/service/hlo_memory_scheduler.cc       |  7 +--
 tensorflow/compiler/xla/service/hlo_module.cc |  9 ++--
 .../xla/service/hlo_module_group_util.cc      |  6 +--
 .../compiler/xla/service/hlo_pass_pipeline.cc |  6 +--
 .../xla/service/hlo_rematerialization.cc      |  3 +-
 .../xla/service/hlo_rematerialization.h       |  3 +-
 .../compiler/xla/service/hlo_schedule.cc      |  5 +-
 tensorflow/compiler/xla/service/hlo_value.cc  |  4 +-
 .../xla/service/indexed_array_analysis.cc     |  2 +-
 .../xla/service/interpreter/compiler.cc       |  3 +-
 .../compiler/xla/service/layout_assignment.cc | 18 ++++---
 .../compiler/xla/service/layout_assignment.h  | 25 ++++++----
 .../xla/service/layout_assignment_test.cc     |  3 +-
 tensorflow/compiler/xla/service/llvm_ir/BUILD |  1 +
 .../xla/service/llvm_ir/alias_analysis.cc     |  6 +--
 .../xla/service/llvm_ir/alias_analysis.h      |  1 -
 .../xla/service/multi_output_fusion.cc        |  6 +--
 .../compiler/xla/service/name_uniquer.h       |  4 +-
 .../compiler/xla/service/shape_inference.cc   |  4 +-
 .../compiler/xla/service/shaped_buffer.cc     |  4 +-
 .../xla/service/tuple_points_to_analysis.h    |  1 -
 .../while_loop_invariant_code_motion.cc       |  8 ++--
 .../xla/service/while_loop_simplifier.cc      |  3 +-
 tensorflow/compiler/xla/tests/BUILD           |  2 +-
 .../compiler/xla/tests/test_utils_test.cc     |  5 +-
 73 files changed, 350 insertions(+), 213 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index f20270931f..661b444a42 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -325,6 +325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
@@ -407,6 +408,7 @@ cc_library(
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
diff --git a/tensorflow/compiler/jit/deadness_analysis.cc b/tensorflow/compiler/jit/deadness_analysis.cc
index e63d4b7792..e0b9932d80 100644
--- a/tensorflow/compiler/jit/deadness_analysis.cc
+++ b/tensorflow/compiler/jit/deadness_analysis.cc
@@ -16,11 +16,11 @@ limitations under the License.
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/jit/deadness_analysis_internal.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 // ALGORITHM OVERVIEW
@@ -298,7 +298,7 @@ class SymbolPredicate : public Predicate {
 
 template <typename FunctionTy>
 /*static*/ void Predicate::Visit(Predicate* p, const FunctionTy& func) {
-  gtl::FlatSet<Predicate*> visited;
+  absl::flat_hash_set<Predicate*> visited;
   std::vector<Predicate*> stack;
 
   stack.push_back(p);
@@ -467,7 +467,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
       is_and ? Predicate::Kind::kAnd : Predicate::Kind::kOr;
   Predicate::Kind other_pred_kind =
       is_and ? Predicate::Kind::kOr : Predicate::Kind::kAnd;
-  gtl::FlatSet<Predicate*> simplified_ops_set;
+  absl::flat_hash_set<Predicate*> simplified_ops_set;
   std::vector<Predicate*> simplified_ops;
   for (Predicate* op : operands) {
     // Simplify A&A => A and  A|A => A.
@@ -492,7 +492,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   }
 
   // Simplify "A&~A=>False" and "A|~A=>True".
-  gtl::FlatSet<Predicate*> negated_ops;
+  absl::flat_hash_set<Predicate*> negated_ops;
   for (Predicate* op : simplified_ops) {
     if (op->kind() == Predicate::Kind::kNot) {
       negated_ops.insert(dynamic_cast<NotPredicate&>(*op).operand());
@@ -512,7 +512,7 @@ Predicate* PredicateFactory::MakeAndOrImpl(
   //
   // First find any predicates contained in all subops.
   std::vector<Predicate*> common_inner_operands;
-  gtl::FlatSet<Predicate*> common_inner_operands_set;
+  absl::flat_hash_set<Predicate*> common_inner_operands_set;
   for (Predicate* op : simplified_ops) {
     if (op->kind() != other_pred_kind) {
       common_inner_operands.clear();
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index d165341f21..da27f837e8 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/public/session_options.h"
@@ -78,7 +78,8 @@ void SortControlInputs(GraphDef* gdef) {
 namespace {
 
 bool AreAllParentsGuaranteedConst(
-    const Node& n, const gtl::FlatSet<const Node*>& runtime_const_nodes) {
+    const Node& n,
+    const absl::flat_hash_set<const Node*>& runtime_const_nodes) {
   if (n.type_string() == "GuaranteeConst") {
     // If the current node is itself a cast-to-const, no need
     // to look at the incoming edges.
@@ -101,7 +102,7 @@ bool AreAllParentsGuaranteedConst(
 void MarkGuaranteedConstants(
     const Graph& graph,
     const std::vector<std::pair<const Node*, Node*>>& src_arg_pairs) {
-  gtl::FlatSet<const Node*> guaranteed_const_nodes;
+  absl::flat_hash_set<const Node*> guaranteed_const_nodes;
   std::vector<const Node*> srcs;
   srcs.reserve(src_arg_pairs.size());
   for (const auto& src_arg : src_arg_pairs) {
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 755c364c62..2ce6fa73fc 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -15,13 +15,13 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -62,7 +62,7 @@ DataType EdgeType(const Edge* edge) {
 }
 
 // Adds the control inputs of `node` to `*deps`.
-void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlInputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.in_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->src());
@@ -71,7 +71,7 @@ void AddControlInputs(const Node& node, gtl::FlatSet<Node*>* deps) {
 }
 
 // Adds the control outputs of `node` to `*deps`.
-void AddControlOutputs(const Node& node, gtl::FlatSet<Node*>* deps) {
+void AddControlOutputs(const Node& node, absl::flat_hash_set<Node*>* deps) {
   for (const Edge* edge : node.out_edges()) {
     if (edge->IsControlEdge()) {
       deps->insert(edge->dst());
@@ -246,7 +246,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Data and control inputs to the new XlaLaunch node.
     std::vector<std::pair<Node*, int>> data_inputs(num_inputs);
-    gtl::FlatSet<Node*> control_inputs;
+    absl::flat_hash_set<Node*> control_inputs;
     DataTypeVector arg_types(num_args);
 
     AddControlInputs(*launch, &control_inputs);
@@ -266,7 +266,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     // Outputs.
     const int num_outputs = launch->output_types().size();
-    gtl::FlatSet<Node*> control_outputs;
+    absl::flat_hash_set<Node*> control_outputs;
     std::vector<std::vector<std::pair<Node*, int>>> data_outputs(num_outputs);
     DataTypeVector output_types(num_outputs);
 
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index 133d982360..4f0c370e65 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/jit/deadness_analysis.h"
 #include "tensorflow/compiler/jit/defs.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
@@ -42,7 +43,6 @@ limitations under the License.
 #include "tensorflow/core/graph/control_flow.h"
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/public/version.h"
 
@@ -371,7 +371,7 @@ bool IsXlaFusable(const NodeDef& node) {
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    OrderedNodeSet* candidates, gtl::FlatSet<Node*>* isolated_nodes) {
+    OrderedNodeSet* candidates, absl::flat_hash_set<Node*>* isolated_nodes) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -849,7 +849,7 @@ Status MarkForCompilationPass::RunImpl(
   Graph* graph = options.graph->get();
 
   OrderedNodeSet compilation_candidates;
-  gtl::FlatSet<Node*> isolated_nodes;
+  absl::flat_hash_set<Node*> isolated_nodes;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc
index 10fc9e85d9..b1f9e9088f 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass.cc
@@ -15,17 +15,18 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/partially_decluster_pass.h"
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/xla_cluster_util.h"
 #include "tensorflow/compiler/tf2xla/const_analysis.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace tensorflow {
 namespace {
-Status FindNodesToDecluster(const Graph& graph, gtl::FlatSet<Node*>* result,
+Status FindNodesToDecluster(const Graph& graph,
+                            absl::flat_hash_set<Node*>* result,
                             absl::Span<Node* const> post_order) {
   // Find nodes that have at least one user outside their cluster that expects
   // hostmem output.  These nodes should be cloned to outside the cluster to
@@ -171,7 +172,7 @@ Status PartiallyDeclusterToRemoveDeviceToHostCopies(Graph* graph) {
   GetPostOrder(*graph, &post_order, /*stable_comparator=*/NodeComparatorName(),
                /*edge_filter=*/NotBackedge);
 
-  gtl::FlatSet<Node*> nodes_to_partially_decluster;
+  absl::flat_hash_set<Node*> nodes_to_partially_decluster;
   TF_RETURN_IF_ERROR(
       FindNodesToDecluster(*graph, &nodes_to_partially_decluster, post_order));
 
diff --git a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
index 657bb409db..e039d46ec8 100644
--- a/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
+++ b/tensorflow/compiler/jit/resource_operation_safety_analysis.cc
@@ -82,6 +82,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/resource_operation_safety_analysis.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -89,7 +90,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/tensor_id.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/util/ptr_util.h"
 
@@ -176,7 +176,7 @@ string ResourceOpToString(const ResourceOp& resource_op) {
 // point.
 class ResourceOpSet {
  private:
-  using Impl = gtl::FlatSet<ResourceOp>;
+  using Impl = absl::flat_hash_set<ResourceOp>;
 
  public:
   ResourceOpSet() = default;
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 3cf74fa788..822fedf121 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1105,6 +1105,7 @@ cc_library(
         "//tensorflow/core:test",
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index bddda6f302..7a96f4c25c 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -45,6 +45,7 @@ limitations under the License.
 #include <random>
 #include <unordered_map>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/jit/defs.h"
@@ -63,7 +64,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -457,7 +457,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
   Tensor tensor(dtype, TensorShape(shape));
   switch (dtype) {
     case DT_FLOAT: {
-      gtl::FlatSet<float> already_generated;
+      absl::flat_hash_set<float> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<float>(&tensor, [&](int i) -> float {
         float generated;
@@ -470,7 +470,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_DOUBLE: {
-      gtl::FlatSet<double> already_generated;
+      absl::flat_hash_set<double> already_generated;
       std::uniform_real_distribution<double> distribution(-1.0, 1.0);
       test::FillFn<double>(&tensor, [&](int i) -> double {
         double generated;
@@ -483,7 +483,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_COMPLEX64: {
-      gtl::FlatSet<std::pair<float, float>> already_generated;
+      absl::flat_hash_set<std::pair<float, float>> already_generated;
       std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
       test::FillFn<complex64>(&tensor, [&](int i) {
         complex64 generated;
@@ -500,7 +500,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT32: {
-      gtl::FlatSet<int32> already_generated;
+      absl::flat_hash_set<int32> already_generated;
       std::uniform_int_distribution<int32> distribution(-(1 << 20), 1 << 20);
       test::FillFn<int32>(&tensor, [&](int i) -> int32 {
         int32 generated;
@@ -513,7 +513,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_INT64: {
-      gtl::FlatSet<int64> already_generated;
+      absl::flat_hash_set<int64> already_generated;
       std::uniform_int_distribution<int64> distribution(-(1LL << 40),
                                                         1LL << 40);
       test::FillFn<int64>(&tensor, [&](int i) -> int64 {
@@ -527,7 +527,7 @@ Tensor OpTest::RandomTensor(DataType dtype, bool needs_unique_values,
       break;
     }
     case DT_BOOL: {
-      gtl::FlatSet<bool> already_generated;
+      absl::flat_hash_set<bool> already_generated;
       std::bernoulli_distribution distribution;
       test::FillFn<bool>(&tensor, [&](int i) -> bool {
         bool generated;
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index 1191cff109..dc097f3696 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -221,6 +221,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 5277de6a85..e0ec91dba1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/mutex.h"
 
 namespace xla {
@@ -2290,7 +2290,7 @@ StatusOr<XlaComputation> XlaBuilder::BuildConstantSubGraph(
   // also a valid dependency order). The related ops will be added to the
   // subgraph in the same order.
   std::set<int64> related_ops;
-  tensorflow::gtl::FlatSet<int64> related_calls;  // Related computations.
+  absl::flat_hash_set<int64> related_calls;  // Related computations.
   std::queue<int64> worklist;
   worklist.push(root->id());
   related_ops.insert(root->id());
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index b7295e8a53..cd0d5ca5d3 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/padding.h"
@@ -35,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/stacktrace.h"
 #include "tensorflow/core/platform/types.h"
@@ -1035,7 +1035,7 @@ class XlaBuilder {
   std::map<int64, HloComputationProto> embedded_;
 
   // The unique parameter numbers.
-  tensorflow::gtl::FlatSet<int64> parameter_numbers_;
+  absl::flat_hash_set<int64> parameter_numbers_;
 
   // The metadata to attach to each op. This is structured as a "modal"-like
   // operation, in order to simplify client code (and not sprinkle this metadata
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 8da6364786..13803f5ebe 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -147,6 +147,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -183,6 +184,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
@@ -336,6 +338,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -490,6 +493,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -781,6 +785,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -959,6 +964,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -995,6 +1001,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
     ],
@@ -1043,6 +1050,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -1136,6 +1144,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -1230,6 +1239,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -1275,6 +1285,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -1348,6 +1359,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1660,6 +1672,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
     ],
@@ -2064,6 +2077,7 @@ cc_library(
         ":logical_buffer",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -2099,6 +2113,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
@@ -2120,6 +2135,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2203,6 +2219,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2225,6 +2242,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
@@ -2286,6 +2304,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2343,6 +2362,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2370,6 +2390,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -2487,6 +2508,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2616,6 +2638,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -2655,6 +2678,7 @@ cc_library(
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -2730,6 +2754,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
@@ -3300,6 +3325,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
     ],
 )
@@ -3387,6 +3413,7 @@ cc_library(
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
index 58f78f8e24..002be9c970 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/bfloat16_propagation.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -81,7 +82,7 @@ void BFloat16Propagation::RevertIfFusionInternalBF16Changes(
   };
 
   auto root = fusion->fused_instructions_computation()->root_instruction();
-  tensorflow::gtl::FlatSet<const HloValue*> changed_root_buffers;
+  absl::flat_hash_set<const HloValue*> changed_root_buffers;
 
   auto root_changes_it = changes_to_bf16_.find(root);
   if (root_changes_it != changes_to_bf16_.end()) {
@@ -500,7 +501,7 @@ void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) {
 
 bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
     HloComputation* computation,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations) {
+    absl::flat_hash_set<const HloComputation*>* visited_computations) {
   bool parameter_changed = false;
   auto insts = computation->MakeInstructionPostOrder();
   // Do the adjustment on each instruction in the computation in reverse
@@ -560,7 +561,7 @@ bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper(
       // another input parameter. A fixed point will be reached because the
       // parameters can only be changed from BF16 to F32, not the other way
       // around.
-      tensorflow::gtl::FlatSet<const HloComputation*> visited_in_while;
+      absl::flat_hash_set<const HloComputation*> visited_in_while;
       while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(),
                                                          &visited_in_while) ||
              ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(),
@@ -587,7 +588,7 @@ void BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers(
     HloModule* module) {
   const auto& computations_topological_order =
       module->MakeComputationPostOrder();
-  tensorflow::gtl::FlatSet<const HloComputation*> resolved;
+  absl::flat_hash_set<const HloComputation*> resolved;
   for (auto comp_it = computations_topological_order.rbegin();
        comp_it != computations_topological_order.rend(); ++comp_it) {
     if (ContainsKey(resolved, *comp_it)) {
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h
index c74326f631..5fcaa15c83 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.h
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/bfloat16_support.h"
 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -82,7 +83,7 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of instructions to consider using bfloat16, computed in the forward
   // pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*> consider_using_bfloat16_;
+  absl::flat_hash_set<const HloInstruction*> consider_using_bfloat16_;
 
   // ***************************
   // Functions called and state produced by the backward pass (from root to
@@ -111,12 +112,12 @@ class BFloat16Propagation : public HloModulePass {
 
   // The set of HloInstructions that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
+  absl::flat_hash_set<const HloInstruction*>
       instructions_visited_in_backward_pass_;
 
   // The set of HloComputations that have been visited in the
   // opportunity-finding pass.
-  tensorflow::gtl::FlatSet<const HloComputation*>
+  absl::flat_hash_set<const HloComputation*>
       computations_visited_in_backward_pass_;
 
   // ***************************
@@ -132,7 +133,7 @@ class BFloat16Propagation : public HloModulePass {
   // point is reached.
   bool ResolveInconsistencyOfAliasingBuffersHelper(
       HloComputation* computation,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited_computations);
+      absl::flat_hash_set<const HloComputation*>* visited_computations);
 
   // Makes the parameters of called computations match how they are called by
   // the given HLO.
@@ -183,7 +184,7 @@ class BFloat16Propagation : public HloModulePass {
                                       PrimitiveType target_type);
 
   // The set of F32 HLO values that must be kept in F32.
-  tensorflow::gtl::FlatSet<const HloValue*> values_that_must_be_kept_as_f32_;
+  absl::flat_hash_set<const HloValue*> values_that_must_be_kept_as_f32_;
 
   // Mapping from each HloComputation to the number of callers to it in the
   // module. Populated at the beginning of this pass.
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 3efa0b1dad..2c2d1626c2 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -43,9 +44,9 @@ namespace xla {
 namespace {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::StrAppend;
 using absl::StrAppendFormat;
-using ::tensorflow::gtl::FlatSet;
 using ::tensorflow::strings::HumanReadableNumBytes;
 
 template <typename T>
@@ -129,8 +130,8 @@ Status GatherComputationsByAllocationType(
 
   // Sets for quickly checking membership. Computations are returned in vectors
   // for stable iteration.
-  FlatSet<const HloComputation*> thread_local_set;
-  FlatSet<const HloComputation*> global_set;
+  flat_hash_set<const HloComputation*> thread_local_set;
+  flat_hash_set<const HloComputation*> global_set;
 
   while (!worklist.empty()) {
     auto worklist_front = worklist.front();
@@ -445,7 +446,7 @@ bool BufferAssignment::SharesSliceAtIndex(
 bool BufferAssignment::HaveDisjointSlices(const HloInstruction* hlo_a,
                                           const HloInstruction* hlo_b) const {
   using SliceSet =
-      FlatSet<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
+      flat_hash_set<BufferAllocation::Slice, BufferAllocation::Slice::Hasher>;
   // Gets the slices all of instr's subshapes.  If any subshape doesn't have an
   // assigned slice, returns the empty set.
   auto collect_slices = [&](const HloInstruction* instr) -> SliceSet {
@@ -815,9 +816,9 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
 
 Status BufferAssigner::AssignBuffersForComputation(
     const HloComputation* computation, bool is_thread_local,
-    const FlatSet<const LogicalBuffer*>& colocated_buffers,
-    const FlatSet<BufferAllocation::Index>& colocated_allocations,
-    flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>*
+    const flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+    const flat_hash_set<BufferAllocation::Index>& colocated_allocations,
+    flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>*
         buffers_to_assign_sequentially,
     BufferAssignment* assignment) {
   // Buffers are sorted and assigned to BufferAllocations in decreasing order of
@@ -853,8 +854,8 @@ Status BufferAssigner::AssignBuffersForComputation(
     // buffers_to_assign_sequentially map, even if we end up with an empty set
     // of buffers. This ensures we can correctly determine whether to run
     // whole-module heap simulation.
-    buffers_to_assign_sequentially->emplace(computation,
-                                            FlatSet<const LogicalBuffer*>());
+    buffers_to_assign_sequentially->emplace(
+        computation, flat_hash_set<const LogicalBuffer*>());
   }
 
   // Sort the LogicalBuffers first by size. We assign the larger LogicalBuffers
@@ -1046,11 +1047,11 @@ Status BufferAssigner::AssignBuffersForComputation(
   return Status::OK();
 }
 
-flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
               LogicalBuffer::Color::Hasher>
 BufferAssigner::SplitBuffersByColor(
-    const FlatSet<const LogicalBuffer*>& buffers) {
-  flat_hash_map<LogicalBuffer::Color, FlatSet<const LogicalBuffer*>,
+    const flat_hash_set<const LogicalBuffer*>& buffers) {
+  flat_hash_map<LogicalBuffer::Color, flat_hash_set<const LogicalBuffer*>,
                 LogicalBuffer::Color::Hasher>
       color_map;
   for (auto buffer : buffers) {
@@ -1060,7 +1061,8 @@ BufferAssigner::SplitBuffersByColor(
 }
 
 Status BufferAssigner::AssignBuffersWithSequentialOrdering(
-    const flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>&
+    const flat_hash_map<const HloComputation*,
+                        flat_hash_set<const LogicalBuffer*>>&
         buffers_to_assign_sequentially,
     bool run_whole_module_heap_simulation, BufferAssignment* assignment) {
   // Run the sequence of instructions through the heap simulator.  The heuristic
@@ -1086,10 +1088,11 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     // only live for the duration of their calling instructions.
     VLOG(1) << "Running whole-module heap simulation";
     HloSchedule schedule(&assignment->module());
-    FlatSet<const LogicalBuffer*> all_buffers_to_assign;
+    flat_hash_set<const LogicalBuffer*> all_buffers_to_assign;
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1123,7 +1126,8 @@ Status BufferAssigner::AssignBuffersWithSequentialOrdering(
     VLOG(1) << "Running per-computation heap simulation";
     for (const auto& pair : buffers_to_assign_sequentially) {
       const HloComputation* computation = pair.first;
-      const FlatSet<const LogicalBuffer*>& buffers_to_assign = pair.second;
+      const flat_hash_set<const LogicalBuffer*>& buffers_to_assign =
+          pair.second;
       const std::vector<const HloInstruction*>* instruction_sequence =
           hlo_ordering.SequentialOrder(*computation);
       CHECK(instruction_sequence != nullptr) << computation->name();
@@ -1198,7 +1202,7 @@ std::vector<const LogicalBuffer*> ComputePeakMemoryLogicalBuffers(
 
   // Next gather the set of logical buffers live at the earliest point of
   // maximal live set size.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> live_buffers;
+  absl::flat_hash_set<const LogicalBuffer*> live_buffers;
   live_size = 0;
   for (const auto& event : heap_trace.events()) {
     const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id());
@@ -1588,8 +1592,8 @@ void BufferAssigner::BuildColocatedBufferSets(
 void BufferAssigner::AssignColocatedBufferSets(
     const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
     BufferAssignment* assignment,
-    FlatSet<const LogicalBuffer*>* colocated_buffers,
-    FlatSet<BufferAllocation::Index>* colocated_allocations) {
+    flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+    flat_hash_set<BufferAllocation::Index>* colocated_allocations) {
   for (const ColocatedBufferSet& colocated_buffer_set : colocated_buffer_sets) {
     BufferAllocation* allocation = nullptr;
     // Set 'entry_parameter_number' and 'entry_parameter_shape_idx' if entry
@@ -1662,8 +1666,8 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
   // Once b/32491382 enables module-level liveness analysis, we may be able
   // to assign colocated buffers (or at least reuse their allocation for
   // buffers outside of the set) in AssignBuffersForComputation.
-  FlatSet<const LogicalBuffer*> colocated_buffers;
-  FlatSet<BufferAllocation::Index> colocated_allocations;
+  flat_hash_set<const LogicalBuffer*> colocated_buffers;
+  flat_hash_set<BufferAllocation::Index> colocated_allocations;
   std::vector<ColocatedBufferSet> colocated_buffer_sets;
   BuildColocatedBufferSets(module, assignment->liveness(),
                            assignment->buffer_size_, &colocated_buffer_sets);
@@ -1681,7 +1685,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
 
   // First assign buffers for global computatations. Temporary buffers for
   // sequential computations are collected in 'buffers_to_assign_sequentially'.
-  flat_hash_map<const HloComputation*, FlatSet<const LogicalBuffer*>>
+  flat_hash_map<const HloComputation*, flat_hash_set<const LogicalBuffer*>>
       buffers_to_assign_sequentially;
   for (auto* computation : global_computations) {
     TF_RETURN_IF_ERROR(AssignBuffersForComputation(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 9ba40617a3..899cd36e1f 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -554,11 +554,10 @@ class BufferAssigner {
   // true.
   Status AssignBuffersForComputation(
       const HloComputation* computation, bool is_thread_local,
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers,
-      const tensorflow::gtl::FlatSet<BufferAllocation::Index>&
-          colocated_allocations,
+      const absl::flat_hash_set<const LogicalBuffer*>& colocated_buffers,
+      const absl::flat_hash_set<BufferAllocation::Index>& colocated_allocations,
       absl::flat_hash_map<const HloComputation*,
-                          tensorflow::gtl::FlatSet<const LogicalBuffer*>>*
+                          absl::flat_hash_set<const LogicalBuffer*>>*
           buffers_to_assign_sequentially,
       BufferAssignment* assignment);
 
@@ -569,7 +568,7 @@ class BufferAssigner {
   // assuming all global computations are sequentially ordered.
   Status AssignBuffersWithSequentialOrdering(
       const absl::flat_hash_map<const HloComputation*,
-                                tensorflow::gtl::FlatSet<const LogicalBuffer*>>&
+                                absl::flat_hash_set<const LogicalBuffer*>>&
           buffers_to_assign_sequentially,
       bool run_whole_module_heap_simulation, BufferAssignment* assignment);
 
@@ -589,7 +588,7 @@ class BufferAssigner {
   // alias. Explicitly handling these colocated buffers is necessary because
   // points-to analysis is computation level scope and does not recognize
   // aliasing across computations (b/32491382).
-  using ColocatedBufferSet = tensorflow::gtl::FlatSet<const LogicalBuffer*>;
+  using ColocatedBufferSet = absl::flat_hash_set<const LogicalBuffer*>;
 
   // Returns a vector of ColocatedBufferSet objects, where each
   // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module'
@@ -604,8 +603,8 @@ class BufferAssigner {
   void AssignColocatedBufferSets(
       const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
       BufferAssignment* assignment,
-      tensorflow::gtl::FlatSet<const LogicalBuffer*>* colocated_buffers,
-      tensorflow::gtl::FlatSet<BufferAllocation::Index>* colocated_allocations);
+      absl::flat_hash_set<const LogicalBuffer*>* colocated_buffers,
+      absl::flat_hash_set<BufferAllocation::Index>* colocated_allocations);
 
   // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining
   // the invariant that all sets in 'colocated_buffer_sets' are disjoint.
@@ -624,10 +623,9 @@ class BufferAssigner {
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
   absl::flat_hash_map<LogicalBuffer::Color,
-                      tensorflow::gtl::FlatSet<const LogicalBuffer*>,
+                      absl::flat_hash_set<const LogicalBuffer*>,
                       LogicalBuffer::Color::Hasher>
-  SplitBuffersByColor(
-      const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers);
+  SplitBuffersByColor(const absl::flat_hash_set<const LogicalBuffer*>& buffers);
 
   // If true, buffer assignments assumes that input parameter buffers and output
   // buffers can be shared if their sizes match.
diff --git a/tensorflow/compiler/xla/service/buffer_liveness.h b/tensorflow/compiler/xla/service/buffer_liveness.h
index 2911bbcfbf..f939a426ea 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness.h
+++ b/tensorflow/compiler/xla/service/buffer_liveness.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -101,7 +101,7 @@ class BufferLiveness {
   // Set of LogicalBuffers which are aliased in the output of other
   // instructions. For example, a LogicalBuffer which is inserted into a tuple
   // is considered to be aliased and will be in this set.
-  tensorflow::gtl::FlatSet<const LogicalBuffer*> aliased_buffers_;
+  absl::flat_hash_set<const LogicalBuffer*> aliased_buffers_;
 
   // LogicalBuffers that may be live out of the entry computation.
   PointsToSet::BufferSet maybe_live_out_buffers_;
diff --git a/tensorflow/compiler/xla/service/buffer_value_containers.h b/tensorflow/compiler/xla/service/buffer_value_containers.h
index 305914fca8..cc46af5eee 100644
--- a/tensorflow/compiler/xla/service/buffer_value_containers.h
+++ b/tensorflow/compiler/xla/service/buffer_value_containers.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_VALUE_CONTAINERS_H_
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -38,7 +38,7 @@ BufferValueCompactPointerSet ToBufferValueCompactPointerSet(
   return output;
 }
 
-using BufferValueFlatSet = tensorflow::gtl::FlatSet<const BufferValue*>;
+using BufferValueFlatSet = absl::flat_hash_set<const BufferValue*>;
 template <class LogicalBufferContainerT>
 BufferValueFlatSet ToBufferValueFlatSet(
     const LogicalBufferContainerT& logical_buffer_container) {
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 23b2a32709..bdd5069632 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <queue>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -138,7 +139,7 @@ CallGraphNode& CallGraph::GetNode(const HloComputation* computation) {
 
 bool CallGraph::DominatesHelper(
     const HloComputation* a, const HloComputation* b,
-    tensorflow::gtl::FlatSet<const HloComputation*>* visited) const {
+    absl::flat_hash_set<const HloComputation*>* visited) const {
   if (a == b || ContainsKey(*visited, b)) {
     // The call graph is guaranteed to be acyclic so any previously visited node
     // we encounter was already determined to be dominated.
@@ -163,7 +164,7 @@ bool CallGraph::DominatesHelper(
 
 bool CallGraph::Dominates(const HloComputation* a,
                           const HloComputation* b) const {
-  tensorflow::gtl::FlatSet<const HloComputation*> visited;
+  absl::flat_hash_set<const HloComputation*> visited;
   return DominatesHelper(a, b, &visited);
 }
 
@@ -277,7 +278,7 @@ std::unique_ptr<CallGraph> CallGraph::Build(const HloModule* module) {
 
 Status CallGraph::VisitNodesInternal(
     const VisitorFunction& visitor_func, const CallGraphNode& node,
-    tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const {
+    absl::flat_hash_set<const CallGraphNode*>* visited) const {
   auto pair = visited->insert(&node);
   if (!pair.second) {
     // Node was not inserted. Node has already been visited.
@@ -294,7 +295,7 @@ Status CallGraph::VisitNodesInternal(
 
 Status CallGraph::VisitNodes(const VisitorFunction& visitor_func,
                              bool visit_unreachable_nodes) const {
-  tensorflow::gtl::FlatSet<const CallGraphNode*> visited;
+  absl::flat_hash_set<const CallGraphNode*> visited;
   if (visit_unreachable_nodes) {
     // Traverse from all roots in the call graph.
     for (const CallGraphNode& node : nodes()) {
diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h
index 0c2e9b99db..cb56f4789d 100644
--- a/tensorflow/compiler/xla/service/call_graph.h
+++ b/tensorflow/compiler/xla/service/call_graph.h
@@ -21,10 +21,10 @@ limitations under the License.
 #include <ostream>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -145,12 +145,12 @@ class CallGraphNode {
   // The computations called by this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callees_;
-  tensorflow::gtl::FlatSet<HloComputation*> callee_set_;
+  absl::flat_hash_set<HloComputation*> callee_set_;
 
   // The computations which call this computation. The vector is used for a
   // stable ordering and the set enables fast membership testing.
   std::vector<HloComputation*> callers_;
-  tensorflow::gtl::FlatSet<HloComputation*> caller_set_;
+  absl::flat_hash_set<HloComputation*> caller_set_;
 
   // The call sites in this computation
   std::vector<CallSite> callsites_;
@@ -250,14 +250,14 @@ class CallGraph {
   // 'visited'.
   Status VisitNodesInternal(
       const VisitorFunction& visitor_func, const CallGraphNode& node,
-      tensorflow::gtl::FlatSet<const CallGraphNode*>* visited) const;
+      absl::flat_hash_set<const CallGraphNode*>* visited) const;
 
   // Recursive helper for computing whether 'a' dominates 'b' in the call
   // graph. 'b_ancestor' is the currently visited node (which starts at 'b'),
   // and 'visited' is the set of computations which have been visited.
   bool DominatesHelper(
       const HloComputation* a, const HloComputation* b,
-      tensorflow::gtl::FlatSet<const HloComputation*>* visited) const;
+      absl::flat_hash_set<const HloComputation*>* visited) const;
 
   // The HLO module represented by this call graph.
   const HloModule* module_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index 7f78412924..f35324aa35 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -904,7 +904,7 @@ class CopyRemover {
     // The heads of all the value lists. Each value list represents the HLO
     // values contained in a particular HLO buffer. The values in the list are
     // in dependency order.
-    tensorflow::gtl::FlatSet<const ValueNode*> value_lists_;
+    absl::flat_hash_set<const ValueNode*> value_lists_;
 
     // Copy removal requires fast access to the value list elements
     // corresponding to the source and destination values of the kCopy
@@ -1009,7 +1009,7 @@ Status CopyInsertion::AddSpecialCaseCopies(const CallGraph& call_graph,
     HloInstruction* root = computation->root_instruction();
 
     // Mark nondistinct/ambiguous indices.
-    tensorflow::gtl::FlatSet<const HloBuffer*> seen;
+    absl::flat_hash_set<const HloBuffer*> seen;
     ShapeUtil::ForEachSubshape(
         root->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) {
           std::vector<const HloBuffer*> buffers_at_index =
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 6a83909a3b..ae4c6e962d 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -291,6 +291,7 @@ cc_library(
         "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 18fc144efe..ea8c200dee 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -308,7 +308,8 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
       ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
 
   pipeline.AddPass<CpuLayoutAssignment>(
-      module->mutable_entry_computation_layout(), target_machine_features);
+      module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout, target_machine_features);
   return pipeline.Run(module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
index 3c4fe68b83..f4da35dd37 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h
@@ -30,8 +30,11 @@ class CpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit CpuLayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func,
       const TargetMachineFeatures* target_machine_features)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         target_machine_features_(*target_machine_features) {}
   ~CpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
index 4668f3872d..97659b88a7 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_layout_assignment_test.cc
@@ -54,8 +54,9 @@ class CpuLayoutAssignmentTest : public HloTestBase {
         [](int64 shape_size) {
           return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
         });
-    cpu::CpuLayoutAssignment layout_assignment(entry_computation_layout,
-                                               &target_machine_features);
+    cpu::CpuLayoutAssignment layout_assignment(
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        &target_machine_features);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 };
@@ -321,8 +322,9 @@ static StatusOr<DotOutputFusionLayoutAssignmentResult> RunDotOutputFusion(
       [](int64 shape_size) {
         return cpu::TargetMachineFeatures::kEigenExpectedTensorAlignment;
       });
-  cpu::CpuLayoutAssignment layout_assignment(&computation_layout,
-                                             &target_machine_features);
+  cpu::CpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      &target_machine_features);
   TF_ASSIGN_OR_RETURN(result.layout_assignment_changed_something,
                       layout_assignment.Run(module));
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 953a75c35f..a70abb117a 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
@@ -68,7 +69,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -1400,8 +1400,8 @@ static bool ReductionPreservesLayout(const HloInstruction& reduce) {
   // [0->0, 3->1].
   absl::flat_hash_map<int64, int64> unreduced_dim_map;
 
-  gtl::FlatSet<int64> reduced_dims(reduce.dimensions().begin(),
-                                   reduce.dimensions().end());
+  absl::flat_hash_set<int64> reduced_dims(reduce.dimensions().begin(),
+                                          reduce.dimensions().end());
 
   const Shape& operand_shape = reduce.operand(0)->shape();
   const Shape& result_shape = reduce.shape();
@@ -1977,7 +1977,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice) {
   //
   // * Implement the memcpy within the innermost loop.
 
-  gtl::FlatSet<int64> inner_dims;
+  absl::flat_hash_set<int64> inner_dims;
   for (int64 dim : LayoutUtil::MinorToMajor(layout)) {
     if (operand->shape().dimensions(dim) != slice->shape().dimensions(dim)) {
       break;
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
index 7af51db55a..b35fd9dad8 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_noalias_test.cc
@@ -121,7 +121,7 @@ TEST_F(CpuNoAliasTest, Concat) {
     CHECK: %read_concat2_array = load {{.*}} !alias.scope [[concat1_noalias]], !noalias [[concat1_scope]]
     CHECK-DAG: [[buf_size32:![0-9]+]] = !{!"buffer:{{.*}} size:32
     CHECK-DAG: [[buf_size48:![0-9]+]] = !{!"buffer:{{.*}} size:48
-    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size32]], [[buf_size48]]}
+    CHECK-DAG: [[param_x_noalias]] = !{[[buf_size48]], [[buf_size32]]}
     CHECK-DAG: [[concat1_scope]] = !{[[buf_size32]]}
     CHECK-DAG: [[concat1_noalias]] = !{[[buf_size48]]}
   )";
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index e65d3fa332..a838464cae 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -476,6 +476,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:instruction_fusion",
         "//tensorflow/compiler/xla/service:pattern_matcher",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -508,6 +509,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:multi_output_fusion",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -541,6 +543,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
         "//tensorflow/compiler/xla/service:hlo_pass",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
index 79c74e7e8b..e2ab00ce41 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <set>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
index 74352f26aa..af4d24956f 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@@ -215,6 +215,32 @@ Status GpuLayoutAssignment::AddBackendConstraints(
           constraints->SetOperandLayout(op1_shape, instruction, 1));
       TF_RETURN_IF_ERROR(
           constraints->SetInstructionLayout(output_shape, instruction));
+    } else if (instruction->opcode() == HloOpcode::kSort &&
+               ShapeUtil::Rank(instruction->operand(0)->shape()) > 1) {
+      // Make sure that all the operands and the output(s) have the same layout.
+      Shape keys_shape = instruction->operand(0)->shape();
+      Layout keys_layout =
+          LayoutUtil::GetDefaultLayoutForRank(ShapeUtil::Rank(keys_shape));
+      for (int64 i = 0; i < instruction->operand_count(); ++i) {
+        Shape shape = instruction->operand(i)->shape();
+        *shape.mutable_layout() = keys_layout;
+        TF_RETURN_IF_ERROR(
+            constraints->SetOperandLayout(shape, instruction, i));
+        const LogicalBuffer* output_buffer;
+        if (ShapeUtil::IsArray(instruction->shape())) {
+          TF_ASSIGN_OR_RETURN(
+              output_buffer,
+              constraints->points_to_analysis().GetBufferDefinedAt(instruction,
+                                                                   {}));
+        } else {
+          TF_ASSIGN_OR_RETURN(
+              output_buffer,
+              constraints->points_to_analysis().GetBufferDefinedAt(instruction,
+                                                                   {i}));
+        }
+        TF_RETURN_IF_ERROR(
+            constraints->SetBufferLayout(keys_layout, *output_buffer));
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
index e2b96a81d4..4ba7989e9c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h
@@ -30,8 +30,11 @@ namespace gpu {
 class GpuLayoutAssignment : public LayoutAssignment {
  public:
   explicit GpuLayoutAssignment(ComputationLayout* entry_computation_layout,
+                               std::function<bool(const HloInstruction*)>
+                                   instruction_can_change_layout_func,
                                se::StreamExecutor* stream_executor)
-      : LayoutAssignment(entry_computation_layout),
+      : LayoutAssignment(entry_computation_layout,
+                         std::move(instruction_can_change_layout_func)),
         stream_executor_(stream_executor) {}
   ~GpuLayoutAssignment() override {}
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index fbc8ddf599..4822b820f4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -75,7 +75,8 @@ TEST_F(LayoutAssignmentTest, Elementwise) {
             ShapeLayout(result_shape_with_layout);
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         for (const HloInstruction* operand : add->operands()) {
@@ -163,7 +164,8 @@ TEST_F(LayoutAssignmentTest, BatchNormInference) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -233,7 +235,8 @@ TEST_F(LayoutAssignmentTest, BatchNormTraining) {
       }
 
       GpuLayoutAssignment layout_assignment(
-          &computation_layout, backend().default_stream_executor());
+          &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+          backend().default_stream_executor());
       EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
       // The first operand to batchnorm should have the same layout as the
@@ -314,7 +317,8 @@ TEST_F(LayoutAssignmentTest, BatchNormGrad) {
         }
 
         GpuLayoutAssignment layout_assignment(
-            &computation_layout, backend().default_stream_executor());
+            &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+            backend().default_stream_executor());
         EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
         // The first and fourth operands to the batchnorm call should have the
@@ -347,9 +351,11 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
                           ParseHloString(hlo_text));
 
   ComputationLayout computation_layout(
-      module->entry_computation()->ComputeProgramShape());
-  GpuLayoutAssignment layout_assignment(&computation_layout,
-                                        backend().default_stream_executor());
+      module->entry_computation()->ComputeProgramShape(),
+      /*ignore_layouts=*/false);
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
   EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
 
   Shape expected_shape =
@@ -359,6 +365,34 @@ TEST_F(LayoutAssignmentTest, DotLayout) {
                       op::ShapeWithLayout(expected_shape)));
 }
 
+TEST_F(LayoutAssignmentTest, SortLayout) {
+  const char* hlo_text = R"(
+  HloModule SortLayout
+  ENTRY sort {
+    keys = f32[3,2]{0,1} constant(f32[3,2]{0,1}{{0,1},{0,1},{0,1}})
+    values = f32[2,3]{1,0} parameter(0)
+    transpose = f32[3,2]{1,0} transpose(values), dimensions={1,0}
+    ROOT sort = (f32[3,2]{1,0}, f32[3,2]{1,0}) sort(keys, transpose),
+      dimensions={1}
+  })";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(hlo_text));
+
+  ComputationLayout computation_layout(
+      module->entry_computation()->ComputeProgramShape(),
+      /*ignore_layouts=*/false);
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
+  EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+  Shape expected_shape = ShapeUtil::MakeShapeWithLayout(F32, {3, 2}, {1, 0});
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Sort(op::ShapeWithLayout(expected_shape),
+                       op::ShapeWithLayout(expected_shape)));
+}
+
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index 4d5d8e99f8..b61f038739 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -125,8 +126,8 @@ bool IsIEEEFloatingPointScalarConstant(const HloInstruction* constant) {
   }
 
   // Compute the precise number of operands to the new fusion.
-  tensorflow::gtl::FlatSet<const HloInstruction*> operands(
-      a->operands().begin(), a->operands().end());
+  absl::flat_hash_set<const HloInstruction*> operands(a->operands().begin(),
+                                                      a->operands().end());
   operands.insert(b->operands().begin(), b->operands().end());
   // If there's an edge between `a` and `b`, don't count it: We're fusing that
   // producer -> consumer relationship.
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
index c21f76f6eb..835924024b 100644
--- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_fusible.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -101,7 +101,7 @@ bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) {
 
 int64 GpuMultiOutputFusion::GetProfit(HloInstruction* instr1,
                                       HloInstruction* instr2) {
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   for (auto instr : instr1->operands()) {
     if (!IsProfitableOperand(instr)) {
       continue;
@@ -148,7 +148,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
   bool changed = false;
   RecomputeReachability();
 
-  tensorflow::gtl::FlatSet<HloInstruction*> to_fuse;
+  absl::flat_hash_set<HloInstruction*> to_fuse;
   // Keep a list of the instructions to fuse after making all the fusion
   // decisions. We first aggressively add instructions to potential_fusion_list,
   // then filter out instructions that will be no longer fusible because of
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 0b3b429710..b4ae2e42c7 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -232,7 +232,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     // a layout-sensitive verifier!
     HloPassPipeline pipeline("layout assignment");
     pipeline.AddPass<GpuLayoutAssignment>(
-        hlo_module->mutable_entry_computation_layout(), stream_exec);
+        hlo_module->mutable_entry_computation_layout(),
+        LayoutAssignment::InstructionCanChangeLayout, stream_exec);
     TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status());
   }
 
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 147776c8c4..b343305554 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -26,7 +27,7 @@ limitations under the License.
 namespace xla {
 
 using absl::flat_hash_map;
-using tensorflow::gtl::FlatSet;
+using absl::flat_hash_set;
 
 /*static*/
 StatusOr<int64> HeapSimulator::MinimumMemoryForModule(
@@ -116,9 +117,9 @@ Status HeapSimulator::RunComputation(
   // 'used_buffers' is the reverse map - it tracks which buffers were used by an
   // instruction, so that we can remove the instructions from a buffer's live
   // set after they are visited.
-  flat_hash_map<const BufferValue*, FlatSet<const HloInstruction*>>
+  flat_hash_map<const BufferValue*, flat_hash_set<const HloInstruction*>>
       live_buffers;
-  flat_hash_map<const HloInstruction*, FlatSet<const BufferValue*>>
+  flat_hash_map<const HloInstruction*, flat_hash_set<const BufferValue*>>
       used_buffers;
   auto add_user_to_buffer = [this, &live_buffers, &used_buffers](
                                 const HloInstruction* user,
@@ -216,7 +217,7 @@ Status HeapSimulator::RunComputation(
       VLOG(4) << "  Removing user " << instruction->name() << " from buffer "
               << operand_buffer->ToString();
       auto it = live_buffers.find(operand_buffer);
-      FlatSet<const HloInstruction*>* live_set = &it->second;
+      flat_hash_set<const HloInstruction*>* live_set = &it->second;
       live_set->erase(instruction);
       if (live_set->empty()) {
         live_buffers.erase(it);
@@ -238,7 +239,7 @@ Status HeapSimulator::RunComputation(
     // that we should assign.
 
     // Make sure each buffer get reused at most once.
-    FlatSet<const BufferValue*> reused_buffers;
+    flat_hash_set<const BufferValue*> reused_buffers;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -326,7 +327,7 @@ Status HeapSimulator::RunComputation(
   to_free.reserve(live_buffers.size());
   for (const auto& buffer_pending : live_buffers) {
     const BufferValue* buffer = buffer_pending.first;
-    const FlatSet<const HloInstruction*>& pending = buffer_pending.second;
+    const flat_hash_set<const HloInstruction*>& pending = buffer_pending.second;
     CHECK_EQ(pending.size(), 1) << *buffer;
     CHECK(*pending.begin() == nullptr) << *buffer;
     to_free.push_back(buffer);
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index a5bb3f81f7..b0295a6163 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_value.h"
 #include "tensorflow/compiler/xla/service/buffer_value_containers.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -197,8 +197,8 @@ class HeapSimulator {
       shared_buffers_;
 
   // Hold some sets for error-checking the sequence of Alloc and Free calls.
-  tensorflow::gtl::FlatSet<const BufferValue*> allocated_buffers_;
-  tensorflow::gtl::FlatSet<const BufferValue*> freed_buffers_;
+  absl::flat_hash_set<const BufferValue*> allocated_buffers_;
+  absl::flat_hash_set<const BufferValue*> freed_buffers_;
 
   // Debugging information filled in while the heap simulator runs.
   HeapSimulatorTrace debug_trace_;
diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
index b6e1f52cf5..c3da12e273 100644
--- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -120,7 +121,7 @@ class BufferValueMap {
   }
 
   // Return a set of all the values in the given buffer.
-  const tensorflow::gtl::FlatSet<const HloValue*>& GetValuesInBuffer(
+  const absl::flat_hash_set<const HloValue*>& GetValuesInBuffer(
       BufferNumber buffer_number) const {
     return buffers_.at(buffer_number);
   }
@@ -143,7 +144,7 @@ class BufferValueMap {
   // Move the given value into the given buffer.
   void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) {
     BufferNumber old_buffer_number = value_to_buffer_number_.at(&value);
-    tensorflow::gtl::FlatSet<const HloValue*>& old_value_set =
+    absl::flat_hash_set<const HloValue*>& old_value_set =
         buffers_.at(old_buffer_number);
     old_value_set.erase(&value);
     if (old_value_set.empty()) {
@@ -291,7 +292,7 @@ class BufferValueMap {
   const HloDataflowAnalysis& dataflow_;
 
   // A map containing the set of values contained in each buffer.
-  absl::flat_hash_map<BufferNumber, tensorflow::gtl::FlatSet<const HloValue*>>
+  absl::flat_hash_map<BufferNumber, absl::flat_hash_set<const HloValue*>>
       buffers_;
 
   // A map indicating which buffer each value is contained in.
@@ -351,7 +352,7 @@ bool HloAliasAnalysis::InstructionBuffersAreAmbiguous(
 
 bool HloAliasAnalysis::InstructionBuffersAreDistinct(
     const HloInstruction* instruction) const {
-  tensorflow::gtl::FlatSet<const HloBuffer*> buffers_seen;
+  absl::flat_hash_set<const HloBuffer*> buffers_seen;
   for (const auto& pair :
        dataflow_analysis_->GetInstructionValueSet(instruction)) {
     const HloValueSet& value_set = pair.second;
diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc
index 6c11a073b7..9c3aa0e64d 100644
--- a/tensorflow/compiler/xla/service/hlo_buffer.cc
+++ b/tensorflow/compiler/xla/service/hlo_buffer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 257dd5876f..6ef67ab0a8 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -278,10 +278,9 @@ void HloComputation::set_root_instruction(HloInstruction* new_root_instruction,
 namespace {
 
 // Helper which builds a post order of the HLO call graph.
-void ComputeComputationPostOrder(
-    HloComputation* computation,
-    tensorflow::gtl::FlatSet<HloComputation*>* visited,
-    std::vector<HloComputation*>* post_order) {
+void ComputeComputationPostOrder(HloComputation* computation,
+                                 absl::flat_hash_set<HloComputation*>* visited,
+                                 std::vector<HloComputation*>* post_order) {
   if (visited->insert(computation).second) {
     for (auto* instruction : computation->instructions()) {
       for (HloComputation* called_computation :
@@ -416,7 +415,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
 
 std::vector<HloComputation*> HloComputation::MakeEmbeddedComputationsList()
     const {
-  tensorflow::gtl::FlatSet<HloComputation*> visited;
+  absl::flat_hash_set<HloComputation*> visited;
   std::vector<HloComputation*> post_order;
 
   // To avoid special handling of this computation, cast away const of
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index af929ac009..d87ab4bda1 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/iterator_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -41,7 +42,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc
index b59c9ba3ed..e602107cbe 100644
--- a/tensorflow/compiler/xla/service/hlo_cse.cc
+++ b/tensorflow/compiler/xla/service/hlo_cse.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 namespace xla {
@@ -137,8 +137,8 @@ StatusOr<bool> HloCSE::Run(HloModule* module) {
     // HLO instructions are grouped into equivalency classes by using the
     // cse_equal predicate defined above. This set holds a representative
     // instruction for each class.
-    tensorflow::gtl::FlatSet<HloInstruction*, decltype(&CseHash),
-                             decltype(cse_equal)>
+    absl::flat_hash_set<HloInstruction*, decltype(&CseHash),
+                        decltype(cse_equal)>
         representatives(/*N=*/computation->instruction_count() + 1, &CseHash,
                         cse_equal);
     for (auto instruction : computation->MakeInstructionPostOrder()) {
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 6a63681996..44cde4a3d2 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -91,7 +92,7 @@ HloDataflowAnalysis::HloDataflowAnalysis(
 
 bool HloDataflowAnalysis::AreTransitiveUsesElementwiseOrTuple(
     const HloInstruction* inst) {
-  tensorflow::gtl::FlatSet<const HloInstruction*> visited;
+  absl::flat_hash_set<const HloInstruction*> visited;
   absl::InlinedVector<const HloInstruction*, 4> stack;
   stack.push_back(inst);
   while (!stack.empty()) {
@@ -159,8 +160,8 @@ void HloDataflowAnalysis::MarkValueForDeletion(HloValue::Id value_id) {
 void HloDataflowAnalysis::DeleteMarkedValues() {
 #ifndef NDEBUG
   // Verify that no marked-for-deletion values are in any of the value sets.
-  tensorflow::gtl::FlatSet<HloValue::Id> id_set(value_ids_to_delete_.begin(),
-                                                value_ids_to_delete_.end());
+  absl::flat_hash_set<HloValue::Id> id_set(value_ids_to_delete_.begin(),
+                                           value_ids_to_delete_.end());
   for (const auto& pair : value_sets_) {
     const HloInstruction* instruction = pair.first;
     const InstructionValueSet& instruction_value_set = pair.second;
@@ -673,7 +674,7 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet(
 
 void HloDataflowAnalysis::Propagate() {
   std::queue<HloInstruction*> worklist;
-  tensorflow::gtl::FlatSet<HloInstruction*> workset;
+  absl::flat_hash_set<HloInstruction*> workset;
   auto add_to_worklist = [&worklist, &workset](HloInstruction* instruction) {
     if (workset.insert(instruction).second) {
       worklist.push(instruction);
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc
index 159c39d557..6ca1255ede 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -217,7 +218,7 @@ bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const {
 
 /* static */ std::vector<HloInstruction*>
 HloDomainMap::MakeNonDomainInstructions(
-    const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+    const absl::flat_hash_set<HloInstruction*>& instruction_set,
     const InstructionOrderMap& instructions_order) {
   std::vector<HloInstruction*> instructions;
   instructions.reserve(instruction_set.size());
diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h
index 8584bc021d..c8d581b746 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_map.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_map.h
@@ -20,13 +20,13 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -110,7 +110,7 @@ class HloDomainMap {
   // Out of an instruction set, returns a vector of all the ones which are not
   // a kDomain kind.
   static std::vector<HloInstruction*> MakeNonDomainInstructions(
-      const tensorflow::gtl::FlatSet<HloInstruction*>& instruction_set,
+      const absl::flat_hash_set<HloInstruction*>& instruction_set,
       const InstructionOrderMap& instructions_order);
 
   // Populates domain_metadata_id_ that maps each HloInstruction to the unique
diff --git a/tensorflow/compiler/xla/service/hlo_domain_metadata.h b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
index 302807f816..d3c83c15ae 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_metadata.h
@@ -20,11 +20,11 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
@@ -42,7 +42,7 @@ class DomainMetadata {
     // operand/user pathways, without crossing a kDomain instruction of a given
     // kind. The reach_set can contain kDomain instructions of other kinds, if
     // two domains of different kind intersect each other.
-    tensorflow::gtl::FlatSet<HloInstruction*> reach_set;
+    absl::flat_hash_set<HloInstruction*> reach_set;
 
     // The same instructions in reach_set, but purged from kDomain instructions
     // and ordered according to their computation graph post-order, i.e.
@@ -55,8 +55,8 @@ class DomainMetadata {
     // whose dataflow enters the reach set (domain), while the exit_domains
     // contains the set of kDomain instructions whose dataflow exit the reach
     // set.
-    tensorflow::gtl::FlatSet<HloInstruction*> enter_domains;
-    tensorflow::gtl::FlatSet<HloInstruction*> exit_domains;
+    absl::flat_hash_set<HloInstruction*> enter_domains;
+    absl::flat_hash_set<HloInstruction*> exit_domains;
   };
 
   virtual ~DomainMetadata() = default;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5d5c9c7e58..0207f9ae3f 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/ascii.h"
@@ -44,7 +45,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/human_readable_json.h"
 #include "tensorflow/core/platform/logging.h"
@@ -1433,7 +1433,7 @@ int64 HloInstruction::operand_index(const HloInstruction* target) const {
 
 HloInstruction::InstructionVector HloInstruction::unique_operands() const {
   InstructionVector unique;
-  tensorflow::gtl::FlatSet<const HloInstruction*> seen;
+  absl::flat_hash_set<const HloInstruction*> seen;
   for (HloInstruction* operand : operands()) {
     if (seen.insert(operand).second) {
       unique.push_back(operand);
diff --git a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
index 1c2b2868fd..55314d0ae9 100644
--- a/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
+++ b/tensorflow/compiler/xla/service/hlo_memory_scheduler.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/heap_simulator.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
@@ -111,7 +112,7 @@ class ListScheduler {
     // LogicalBuffer is in an operand of the instruction as indicated by
     // points-to analysis.
     for (auto* instruction : computation.instructions()) {
-      tensorflow::gtl::FlatSet<const LogicalBuffer*> instr_uses;
+      absl::flat_hash_set<const LogicalBuffer*> instr_uses;
       for (auto* operand : instruction->operands()) {
         points_to_analysis.GetPointsToSet(operand).ForEachElement(
             [&](const ShapeIndex& /*index*/,
@@ -360,7 +361,7 @@ class ListScheduler {
   std::unordered_map<const LogicalBuffer*, int64> unscheduled_use_count_;
 
   // Set of instructions which have been scheduled.
-  tensorflow::gtl::FlatSet<const HloInstruction*> scheduled_instructions_;
+  absl::flat_hash_set<const HloInstruction*> scheduled_instructions_;
 };
 
 int64 SumLogicalBufferSizes(
@@ -418,7 +419,7 @@ StatusOr<HloInstructionSequence> DFSMemoryScheduler(
         points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function);
     total_sizes[hlo] = logical_buffer_size;
     cumulative_total_size += logical_buffer_size;
-    tensorflow::gtl::FlatSet<const HloInstruction*> unique_operands(
+    absl::flat_hash_set<const HloInstruction*> unique_operands(
         hlo->operands().begin(), hlo->operands().end());
     for (const HloInstruction* operand : unique_operands) {
       extra_users[hlo] += extra_users[operand];
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 9359e9a8be..7527e35c95 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -328,10 +329,10 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 
   // Because we didn't uniquify the names or the ids, double-check that the
   // instruction and computation names and ids are unique from the proto.
-  tensorflow::gtl::FlatSet<string> computation_names;
-  tensorflow::gtl::FlatSet<string> instruction_names;
-  tensorflow::gtl::FlatSet<int> computation_ids;
-  tensorflow::gtl::FlatSet<int> instruction_ids;
+  absl::flat_hash_set<string> computation_names;
+  absl::flat_hash_set<string> instruction_names;
+  absl::flat_hash_set<int> computation_ids;
+  absl::flat_hash_set<int> instruction_ids;
   for (HloComputation* computation : module->computations()) {
     TF_RET_CHECK(!ContainsKey(computation_names, computation->name()))
         << "Computation name is not unique: " << computation->name();
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
index d83ee71490..fddeb5f0a2 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -42,7 +42,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalPredecessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       predecessors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique predecessors list; if the predecessors is a companion
   // instruction, also add companion instructions; if the predecessors is a
@@ -119,7 +119,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalSuccessors(
     HloInstruction* instruction) {
   std::vector<HloInstruction*>
       successors;  // Use a vector to avoid non-determinism.
-  tensorflow::gtl::FlatSet<HloInstruction*> unique;
+  absl::flat_hash_set<HloInstruction*> unique;
 
   // Adds to the unique successors list; if the successor is a companion
   // instruction, also add companion instructions; if the successor is a
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 59fd01cb58..5e004ce78a 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <functional>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
@@ -25,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -75,8 +75,8 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
 std::vector<HloPassInterface*> HloPassPipeline::GetEnabledPasses(
     const DebugOptions& debug_options) {
   auto repeated_field = debug_options.xla_disable_hlo_passes();
-  tensorflow::gtl::FlatSet<string> disabled_pass_names(repeated_field.begin(),
-                                                       repeated_field.end());
+  absl::flat_hash_set<string> disabled_pass_names(repeated_field.begin(),
+                                                  repeated_field.end());
   if (!disabled_pass_names.empty()) {
     VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
             << absl::StrJoin(disabled_pass_names, ", ");
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index abdd9a9212..5ac43808ee 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -981,7 +982,7 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
   // rematerialization is essentially a move). If the next rematerialization of
   // the instruction is also a move then the rematerialization is added to the
   // blacklist.
-  tensorflow::gtl::FlatSet<const HloInstruction*> remat_move_instructions;
+  absl::flat_hash_set<const HloInstruction*> remat_move_instructions;
 
   // The map from instructions to their rematerializable status.
   absl::flat_hash_map<const HloInstruction*, bool> remat_able;
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h
index 5a02e3a8bb..70d83c04f0 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.h
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h
@@ -16,6 +16,7 @@
 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_REMATERIALIZATION_H_
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -122,7 +123,7 @@ class HloRematerialization : public HloModulePass {
 
   // Set of computations which have had rematerialization
   // applied. Rematerialization is only applied once per computation.
-  tensorflow::gtl::FlatSet<const HloComputation*> rematerialized_computations_;
+  absl::flat_hash_set<const HloComputation*> rematerialized_computations_;
 
   // Count of the total instructions rematerialized.
   int64 instructions_rematerialized_ = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 7c5c98f04e..9972eb2077 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/compiler/xla/map_util.h"
@@ -119,7 +120,7 @@ Status HloSchedule::UpdateComputationSchedule(
   }
 
   // Set of all HloInstructions in the schedule.
-  tensorflow::gtl::FlatSet<int> ids_in_schedule;
+  absl::flat_hash_set<int> ids_in_schedule;
   for (int id : sequences_.at(computation->unique_id()).ids()) {
     InsertOrDie(&ids_in_schedule, id);
   }
@@ -210,7 +211,7 @@ Status HloSchedule::Update() {
   if (sequences_.size() > nonfusion_computations.size()) {
     // Schedule contains some computations which have been removed from the
     // HloModule. Remove them from the schedule as well.
-    tensorflow::gtl::FlatSet<int64> nonfusion_computations_ids;
+    absl::flat_hash_set<int64> nonfusion_computations_ids;
     for (const HloComputation* computation : nonfusion_computations) {
       nonfusion_computations_ids.insert(computation->unique_id());
     }
diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc
index 8549487702..59594ab2f0 100644
--- a/tensorflow/compiler/xla/service/hlo_value.cc
+++ b/tensorflow/compiler/xla/service/hlo_value.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -31,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -167,7 +167,7 @@ void HloValue::SetPositionsAndComputeUses(
   positions_.insert(positions_.end(), positions.begin(), positions.end());
 
   // Gather the computation roots at which this value appears.
-  tensorflow::gtl::FlatSet<HloInstruction*> root_positions;
+  absl::flat_hash_set<HloInstruction*> root_positions;
   for (const HloPosition& position : positions_) {
     if (position.instruction ==
         position.instruction->parent()->root_instruction()) {
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 7ee789276d..1ebb331977 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace gtl = ::tensorflow::gtl;
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index bb69cb9c47..27fe89375d 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -44,7 +44,8 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) {
   HloPassPipeline pipeline("Interpreter");
 
   pipeline.AddPass<LayoutAssignment>(
-      hlo_module->mutable_entry_computation_layout());
+      hlo_module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout);
   return pipeline.Run(hlo_module).status();
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 082bf8bffe..395e01fb59 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -958,10 +958,15 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
 
 LayoutAssignment::LayoutAssignment(
     ComputationLayout* entry_computation_layout,
+    std::function<bool(const HloInstruction*)>
+        instruction_can_change_layout_func,
     ChannelLayoutConstraints* channel_constraints)
     : entry_computation_layout_(entry_computation_layout),
+
       saved_entry_computation_layout_(*entry_computation_layout),
-      channel_layout_constraints_(channel_constraints) {
+      channel_layout_constraints_(channel_constraints),
+      instruction_can_change_layout_func_(
+          std::move(instruction_can_change_layout_func)) {
   if (channel_layout_constraints_ != nullptr) {
     // Save a copy of the input ChannelLayoutConstraints so that we can reset it
     // if we have to undo previous operations (ClearPreviousPassSideEffects()).
@@ -982,7 +987,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOperandLayoutFromOutputLayout(
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) ==
           ShapeUtil::Rank(instruction->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(instruction)) {
+      !instruction_can_change_layout_func_(instruction)) {
     // Propagate the result layout to the operand layout if the instruction
     // requires the same layout out for the result and the operand.
     //
@@ -1060,7 +1065,7 @@ std::unique_ptr<Layout> LayoutAssignment::ChooseOutputLayoutFromOperandLayout(
 
   if (!ShapeUtil::IsScalar(operand->shape()) &&
       ShapeUtil::Rank(operand->shape()) == ShapeUtil::Rank(user->shape()) &&
-      InstructionRequiresInputLayoutEqualToOutputLayout(user)) {
+      !instruction_can_change_layout_func_(user)) {
     // Assign users the same layout as the operand.
     return absl::make_unique<Layout>(operand_layout);
   }
@@ -1803,7 +1808,8 @@ StatusOr<bool> LayoutAssignment::Run(HloModule* module) {
   return true;
 }
 
-bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
+/* static */
+bool LayoutAssignment::InstructionCanChangeLayout(
     const HloInstruction* instruction) {
   switch (instruction->opcode()) {
     case HloOpcode::kAbs:
@@ -1869,7 +1875,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTanh:
     case HloOpcode::kTupleSelect:
     case HloOpcode::kWhile:
-      return true;
+      return false;
     case HloOpcode::kBatchNormGrad:
     case HloOpcode::kBatchNormInference:
     case HloOpcode::kBatchNormTraining:
@@ -1900,7 +1906,7 @@ bool LayoutAssignment::InstructionRequiresInputLayoutEqualToOutputLayout(
     case HloOpcode::kTrace:
     case HloOpcode::kTranspose:
     case HloOpcode::kTuple:
-      return false;
+      return true;
   }
 }
 
diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h
index 1591256fad..2d48e12263 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.h
+++ b/tensorflow/compiler/xla/service/layout_assignment.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/computation_layout.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -39,7 +40,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -286,6 +286,11 @@ class LayoutAssignment : public HloModulePass {
   // entry_computation_layout is modified to populate a layout for the result in
   // the case that no particular layout is requested.
   //
+  // instruction_can_change_layout_func is a function object that determines
+  // whether an instruction can change layouts. An instruction not being able to
+  // change layout means that it requires operands with the same rank as the
+  // output to have the same layout as the output.
+  //
   // channel_constraints is both an input and output. Any sends or recvs that
   // are present in channel_constraints will be laid out as constrained. Any
   // unconstrained sends or recvs will be laid out as locally optimal and their
@@ -295,6 +300,8 @@ class LayoutAssignment : public HloModulePass {
   // within any module passed to `Run`.
   explicit LayoutAssignment(
       ComputationLayout* entry_computation_layout,
+      std::function<bool(const HloInstruction*)>
+          instruction_can_change_layout_func = InstructionCanChangeLayout,
       ChannelLayoutConstraints* channel_constraints = nullptr);
   ~LayoutAssignment() override {}
   absl::string_view name() const override { return "layout-assignment"; }
@@ -303,10 +310,10 @@ class LayoutAssignment : public HloModulePass {
   // (any layouts were changed).
   StatusOr<bool> Run(HloModule* module) override;
 
-  // Returns true if the instruction requires that operands with the same rank
-  // as the output have to have the same layout as the output.
-  virtual bool InstructionRequiresInputLayoutEqualToOutputLayout(
-      const HloInstruction* instruction);
+  // Determines whether an instruction can change layouts. An instruction not
+  // being able to change layout means that it requires operands with the same
+  // rank as the output to have the same layout as the output.
+  static bool InstructionCanChangeLayout(const HloInstruction* instruction);
 
  protected:
   // These methods, invoked by PropagateConstraints, propagate a layout
@@ -504,7 +511,7 @@ class LayoutAssignment : public HloModulePass {
 
   // Every copy added to the module by the layout assignment pass is registered
   // here.
-  tensorflow::gtl::FlatSet<HloInstruction*> added_copies_;
+  absl::flat_hash_set<HloInstruction*> added_copies_;
 
   // The pointer to the channel layout constraints passed in with the
   // constructor. If not nullptr, this is an input/output argument.
@@ -521,8 +528,10 @@ class LayoutAssignment : public HloModulePass {
 
   // The set of HLO instructions which lacked any layout constraint, thus
   // receiving propagated default layouts.
-  tensorflow::gtl::FlatSet<const HloInstruction*>
-      unconstrained_layout_instructions_;
+  absl::flat_hash_set<const HloInstruction*> unconstrained_layout_instructions_;
+
+  std::function<bool(const HloInstruction*)>
+      instruction_can_change_layout_func_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 752a61476d..5af6068702 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -55,7 +55,8 @@ class LayoutAssignmentTest : public HloVerifiedTestBase {
                      ComputationLayout* entry_computation_layout,
                      ChannelLayoutConstraints* channel_constraints = nullptr) {
     LayoutAssignment layout_assignment(
-        entry_computation_layout, /*channel_constraints=*/channel_constraints);
+        entry_computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+        /*channel_constraints=*/channel_constraints);
     EXPECT_IS_OK(layout_assignment.Run(module).status());
   }
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index 3934d2e493..6223a34b12 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -39,6 +39,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:logical_buffer",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@llvm//:core",
     ],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
index e5370eca56..643ecd0fba 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h"
 
-#include <unordered_set>
+#include <map>
 
 #include "llvm/IR/MDBuilder.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
@@ -164,9 +164,7 @@ llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer(
     add_buffers_to_worklist(operand);
   }
 
-  tensorflow::gtl::FlatSet<BufferAllocation::Slice,
-                           BufferAllocation::Slice::Hasher>
-      buffers;
+  std::set<BufferAllocation::Slice> buffers;
   for (const LogicalBuffer* buffer : worklist) {
     // Skip buffers which cannot be added to the noalias set.
     if (!assignment.HasAllocation(*buffer) ||
diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
index 88cde2d3d9..2b46b3c396 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 namespace llvm_ir {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 95b1c20663..2ca527bc4c 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -15,10 +15,10 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/multi_output_fusion.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -50,7 +50,7 @@ StatusOr<bool> MultiOutputFusion::Run(HloModule* module) {
       all_fusion_candidates_.push_back(instruction);
 
       std::vector<HloInstruction*> candidates;
-      tensorflow::gtl::FlatSet<HloInstruction*> candidates_set;
+      absl::flat_hash_set<HloInstruction*> candidates_set;
       VLOG(10) << "Looking at instruction: " << instruction->name();
       for (auto operand : instruction->operands()) {
         // Filter out the non-interesting instructions -- they
@@ -172,7 +172,7 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
   // Update the fusible list for fusion. Variable new_fusibles keeps
   // track of the new or changed entries.
   std::vector<std::pair<HloInstruction*, int64>> new_fusibles;
-  tensorflow::gtl::FlatSet<HloInstruction*> in_list;
+  absl::flat_hash_set<HloInstruction*> in_list;
   auto it = fusion_node.fusibles.begin();
   while (it != fusion_node.fusibles.end()) {
     HloInstruction* instr = it->first;
diff --git a/tensorflow/compiler/xla/service/name_uniquer.h b/tensorflow/compiler/xla/service/name_uniquer.h
index 1ac60f1cf4..8909d0f4fe 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.h
+++ b/tensorflow/compiler/xla/service/name_uniquer.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include <string>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 
 namespace xla {
@@ -69,7 +69,7 @@ class NameUniquer {
     int64 next_ = 0;
 
     // Set of all the identifiers which has been used.
-    tensorflow::gtl::FlatSet<int64> used_;
+    absl::flat_hash_set<int64> used_;
   };
 
   // The string to use to separate the prefix of the name from the uniquing
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 6ccea9d2b5..e379911462 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/window_util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/math/math_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -577,7 +577,7 @@ Status ValidateDotDimensionNumbers(
   // Check that dimension numbers are unique.
   auto dims_unique = [](absl::Span<const int64> contracting_dims,
                         absl::Span<const int64> batch_dims) -> bool {
-    tensorflow::gtl::FlatSet<int64> dim_set;
+    absl::flat_hash_set<int64> dim_set;
     auto is_unique = [&dim_set](int64 i) -> bool {
       return dim_set.insert(i).second;
     };
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index 921a984589..56952e3ada 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
@@ -26,7 +27,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -147,7 +147,7 @@ void ScopedShapedBuffer::Deallocate() {
   // Deallocate all non-null buffers. A buffer may appear in more than one spot
   // in the shape (eg, a tuple with a repeated element) so keep track of what
   // has been deallocated.
-  tensorflow::gtl::FlatSet<void*> deallocated_ptrs;
+  absl::flat_hash_set<void*> deallocated_ptrs;
   for (auto& pair : buffers_) {
     se::DeviceMemoryBase& memory_base = pair.second;
     if (!memory_base.is_null() &&
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
index 78392d3bb2..64ad1dc80e 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/compactptrset.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index 2590473c77..9795b2830b 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -16,17 +16,17 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/while_loop_invariant_code_motion.h"
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/tuple_util.h"
 #include "tensorflow/compiler/xla/service/while_util.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace xla {
 
 using absl::flat_hash_map;
+using absl::flat_hash_set;
 using absl::InlinedVector;
-using tensorflow::gtl::FlatSet;
 
 // Copies `to_hoist` to the computation containing `while_instr`, hoisting its
 // operands as needed.  All of its transitive operands are expected to be either
@@ -35,7 +35,7 @@ using tensorflow::gtl::FlatSet;
 // them into `hoisted_instructions`.
 static void CreateLoopInvariantCopy(
     flat_hash_map<HloInstruction*, HloInstruction*>* hoisted_instructions,
-    FlatSet<HloInstruction*>* unhoisted_invariant_instructions,
+    flat_hash_set<HloInstruction*>* unhoisted_invariant_instructions,
     HloInstruction* while_instr, HloInstruction* to_hoist) {
   HloComputation* parent_of_while = while_instr->parent();
   HloComputation* while_body = while_instr->while_body();
@@ -153,7 +153,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
   // unprofitable to be hoisted alone by NotWorthHoistingIndividually.  When we
   // hoist an instruction in this set, we move it from
   // unhoisted_invariant_instructions to hoisted_instructions.
-  FlatSet<HloInstruction*> unhoisted_invariant_instructions;
+  flat_hash_set<HloInstruction*> unhoisted_invariant_instructions;
 
   // Invariant GTE's axiomatically satisfy the constraints for
   // unhoisted_invariant_instructions -- they can be legally hoisted, but there
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
index 07de8492ba..630d71e5ca 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/while_loop_simplifier.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
 #include "absl/types/optional.h"
@@ -114,7 +115,7 @@ static StatusOr<bool> TryRemoveDeadWhileParams(HloInstruction* while_op) {
     return false;
   }
 
-  tensorflow::gtl::FlatSet<int64> used_tuple_indices;
+  absl::flat_hash_set<int64> used_tuple_indices;
   for (HloComputation* comp : {while_body, while_cond}) {
     // The HLO verifier ensures that while_input's shape matches while_init's
     // shape, which we verified above is a tuple.
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 06b6330321..8a0ae33042 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -2146,11 +2146,11 @@ xla_test(
         ":test_utils",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index 181e5cbe29..bc433eac8f 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/test_utils.h"
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -145,7 +146,7 @@ ENTRY %sort.148.1589 (parameter.0: f32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<uint32> key_set;
+  absl::flat_hash_set<uint32> key_set;
   for (const float& value : key_arg.data<float>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
@@ -168,7 +169,7 @@ ENTRY %sort.148.1589 (parameter.0: s32[1048576], parameter.1: s32[1048576]) -> (
   ASSERT_EQ(args.size(), 2);
   const Literal& key_arg = args[0];
 
-  tensorflow::gtl::FlatSet<int32> key_set;
+  absl::flat_hash_set<int32> key_set;
   for (const int32& value : key_arg.data<int32>()) {
     EXPECT_TRUE(key_set.insert(tensorflow::bit_cast<uint32>(value)).second);
   }
-- 
GitLab


From ea46c6d91837b0e4da9a9a4a8f0c1a1ee3a0280b Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Thu, 11 Oct 2018 21:26:22 -0700
Subject: [PATCH 0847/1085] Clang format errors fixed.

---
 tensorflow/core/kernels/mkl_aggregate_ops.cc | 2 +-
 tensorflow/core/kernels/mkl_reshape_op.cc    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc
index ae1e6b6751..8eb334f2b4 100644
--- a/tensorflow/core/kernels/mkl_aggregate_ops.cc
+++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 
-#include "tensorflow/core/util/mkl_util.h"
 #include "mkldnn.hpp"
+#include "tensorflow/core/util/mkl_util.h"
 using mkldnn::stream;
 using mkldnn::sum;
 
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 358233c913..342e2265ee 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 
-#include "tensorflow/core/util/mkl_util.h"
 #include "mkldnn.hpp"
+#include "tensorflow/core/util/mkl_util.h"
 using mkldnn::stream;
 
 namespace tensorflow {
-- 
GitLab


From 82249722d239c74b3746c65b3696775c7f100e69 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 11 Oct 2018 21:26:30 -0700
Subject: [PATCH 0848/1085] Sort members when adding enum names in TF API
 traverser.

PiperOrigin-RevId: 216809329
---
 tensorflow/tools/common/traverse.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/common/traverse.py b/tensorflow/tools/common/traverse.py
index 1eb9192dc1..b121a87062 100644
--- a/tensorflow/tools/common/traverse.py
+++ b/tensorflow/tools/common/traverse.py
@@ -41,6 +41,7 @@ def _traverse_internal(root, visit, stack, path):
       for enum_member in root.__members__.items():
         if enum_member not in children:
           children.append(enum_member)
+      children = sorted(children)
   except ImportError:
     # On some Python installations, some modules do not support enumerating
     # members (six in particular), leading to import errors.
-- 
GitLab


From 67b35eb48e90d98b78b08a68379c16dcaa2c3db3 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 22:03:14 -0700
Subject: [PATCH 0849/1085]   Changes NMS XLA implementation to match that of
 CPU. (#22921)

PiperOrigin-RevId: 216796208
---
 tensorflow/compiler/tests/image_ops_test.py   | 336 ++++++++++--------
 .../compiler/tf2xla/kernels/image_ops.cc      | 238 +++++++++----
 tensorflow/python/ops/image_ops_test.py       |  24 ++
 3 files changed, 379 insertions(+), 219 deletions(-)

diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
index 68fdb5caf4..d67b16f8e9 100644
--- a/tensorflow/compiler/tests/image_ops_test.py
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -26,7 +26,6 @@ import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -605,168 +604,205 @@ class ResizeBilinearTest(xla_test.XLATestCase):
 class NonMaxSuppressionTest(xla_test.XLATestCase):
 
   def testNMS128From1024(self):
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      num_boxes = 1024
-      boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4")
-      scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4")
-
-      max_output_size = 128
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.0, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            score_threshold: score_threshold_np,
-            iou_threshold: iou_threshold_np
-        }
-        (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
+    num_boxes = 1024
+    boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4")
+    scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4")
+
+    max_output_size = 128
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.0, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          score_threshold: score_threshold_np,
+          iou_threshold: iou_threshold_np
+      }
+      (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
 
   def testNMS3From6Boxes(self):
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      # Three boxes are selected based on IOU.
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-
-      max_output_size = 3
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.0, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            score_threshold: score_threshold_np,
-            iou_threshold: iou_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 3)
-        self.assertAllClose(indices_tf[:num_valid], [3, 0, 5])
+    # Three boxes are selected based on IOU.
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.0, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          score_threshold: score_threshold_np,
+          iou_threshold: iou_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 3)
+      self.assertAllClose(indices_tf[:num_valid], [3, 0, 5])
 
   def testNMS3Then2WithScoreThresh(self):
     # Three boxes are selected based on IOU.
     # One is filtered out by score threshold.
 
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-      max_output_size = 3
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.4, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            iou_threshold: iou_threshold_np,
-            score_threshold: score_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 2)
-        self.assertAllClose(indices_tf[:num_valid], [3, 0])
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.4, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 2)
+      self.assertAllClose(indices_tf[:num_valid], [3, 0])
 
   def testNMS3Then1WithScoreMaxThresh(self):
     # Three boxes are selected based on IOU.
     # One is filtered out by score threshold.
     # One is filtered out by max_output_size.
 
-    with compat.forward_compatibility_horizon(2018, 8, 8):
-      boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
-                    [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
-      boxes_np = np.array(boxes_data, dtype=np.float32)
-
-      scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
-      scores_np = np.array(scores_data, dtype=np.float32)
-      max_output_size = 1
-      iou_threshold_np = np.array(0.5, dtype=np.float32)
-      score_threshold_np = np.array(0.4, dtype=np.float32)
-
-      with self.cached_session() as sess:
-        boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
-        scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
-        iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
-                                              iou_threshold_np.shape)
-        score_threshold = array_ops.placeholder(score_threshold_np.dtype,
-                                                score_threshold_np.shape)
-        with self.test_scope():
-          selected_indices = image_ops.non_max_suppression_padded(
-              boxes=boxes,
-              scores=scores,
-              max_output_size=max_output_size,
-              iou_threshold=iou_threshold,
-              score_threshold=score_threshold,
-              pad_to_max_output_size=True)
-        inputs_feed = {
-            boxes: boxes_np,
-            scores: scores_np,
-            iou_threshold: iou_threshold_np,
-            score_threshold: score_threshold_np
-        }
-        (indices_tf, num_valid) = sess.run(
-            selected_indices, feed_dict=inputs_feed)
-
-        self.assertEqual(indices_tf.size, max_output_size)
-        self.assertEqual(num_valid, 1)
-        self.assertAllClose(indices_tf[:num_valid], [3])
+    boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                  [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 1
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.4, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 1)
+      self.assertAllClose(indices_tf[:num_valid], [3])
+
+  def testSelectFromContinuousOverLap(self):
+    # Tests that a suppressed box does not itself suppress other boxes.
+
+    boxes_data = [[0, 0, 1, 1], [0, 0.2, 1, 1.2], [0, 0.4, 1, 1.4],
+                  [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 3]]
+    boxes_np = np.array(boxes_data, dtype=np.float32)
+
+    scores_data = [0.9, 0.75, 0.6, 0.5, 0.4, 0.3]
+    scores_np = np.array(scores_data, dtype=np.float32)
+    max_output_size = 3
+    iou_threshold_np = np.array(0.5, dtype=np.float32)
+    score_threshold_np = np.array(0.1, dtype=np.float32)
+
+    with self.cached_session() as sess:
+      boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape)
+      scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape)
+      iou_threshold = array_ops.placeholder(iou_threshold_np.dtype,
+                                            iou_threshold_np.shape)
+      score_threshold = array_ops.placeholder(score_threshold_np.dtype,
+                                              score_threshold_np.shape)
+      with self.test_scope():
+        selected_indices = image_ops.non_max_suppression_padded(
+            boxes=boxes,
+            scores=scores,
+            max_output_size=max_output_size,
+            iou_threshold=iou_threshold,
+            score_threshold=score_threshold,
+            pad_to_max_output_size=True)
+      inputs_feed = {
+          boxes: boxes_np,
+          scores: scores_np,
+          iou_threshold: iou_threshold_np,
+          score_threshold: score_threshold_np
+      }
+      (indices_tf, num_valid) = sess.run(
+          selected_indices, feed_dict=inputs_feed)
+
+      self.assertEqual(indices_tf.size, max_output_size)
+      self.assertEqual(num_valid, 3)
+      self.assertAllClose(indices_tf[:num_valid], [0, 2, 4])
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
index 921b4340c0..6713d6bc92 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
@@ -316,6 +318,70 @@ class AdjustHueOp : public XlaOpKernel {
 };
 REGISTER_XLA_OP(Name("AdjustHue"), AdjustHueOp);
 
+struct WhileCondFn {
+  const int64 num_boxes;
+  const int64 output_size;
+
+  explicit WhileCondFn(int64 num_boxes, int64 output_size)
+      : num_boxes(num_boxes), output_size(output_size) {}
+
+  xla::StatusOr<xla::XlaOp> operator()(absl::Span<const xla::XlaOp> values,
+                                       xla::XlaBuilder* cond_builder) const {
+    xla::XlaOp row_idx = values[0];
+    xla::XlaOp row_in_bounds =
+        xla::Lt(row_idx, xla::ConstantR0<int32>(cond_builder, num_boxes));
+    xla::XlaOp num_outputs_so_far = values[1];
+    xla::XlaOp results_not_full = xla::Lt(
+        num_outputs_so_far, xla::ConstantR0<int32>(cond_builder, output_size));
+    return xla::And(row_in_bounds, results_not_full);
+  }
+};
+
+// Process the boxes one-by-one using the iou matrix mask.
+// This implementation uses a correct, but greedy, sequential algorithm
+// to ensure that suppressed boxes cannot themselves suppress other
+// boxes.
+struct SuppressBodyFn {
+  const int64 num_boxes;
+
+  explicit SuppressBodyFn(int64 num_boxes) : num_boxes(num_boxes) {}
+
+  xla::StatusOr<std::vector<xla::XlaOp>> operator()(
+      absl::Span<const xla::XlaOp> values, xla::XlaBuilder* builder) const {
+    auto row_idx = values[0];
+    auto num_outputs_so_far = values[1];
+    auto iou_mask = values[2];
+    auto included_iou = values[3];
+    auto zero_r1 = xla::ConstantR1<int32>(builder, {0});
+    // Determine if current elem is active using a slice.
+    auto row_idx_r1 = xla::Reshape(row_idx, {1});
+    auto active_elem = xla::DynamicSlice(included_iou, row_idx_r1, {1});
+    active_elem = xla::Reshape(active_elem, {});
+    // Increment output count iff current elem is not suppressed.
+    num_outputs_so_far = xla::Select(
+        active_elem, num_outputs_so_far + xla::ConstantR0<int32>(builder, 1),
+        num_outputs_so_far);
+    // Slice out the row_idx.
+    auto starts = xla::ConcatInDim(builder, {row_idx_r1, zero_r1}, 0);
+    auto row_iou = xla::DynamicSlice(iou_mask, starts, {1, num_boxes});
+    // Remove the diagonal from consideration. An elem cannot suppress
+    // itself.
+    auto update_starts = xla::ConcatInDim(builder, {zero_r1, row_idx_r1}, 0);
+    row_iou = xla::DynamicUpdateSlice(
+        row_iou, xla::ConstantR2FromArray2D<bool>(builder, {{false}}),
+        update_starts);
+    // Create a suppression by inverting polarity.
+    row_iou = xla::Reshape(row_iou, {num_boxes});
+    auto supp_mask = xla::Not(row_iou);
+    // Update mask iff current elem is not suppressed.
+    included_iou = xla::Select(xla::Broadcast(active_elem, {num_boxes}),
+                               xla::And(included_iou, supp_mask), included_iou);
+    row_idx = row_idx + xla::ConstantR0<int32>(builder, 1);
+    return std::vector<xla::XlaOp>{row_idx, num_outputs_so_far, iou_mask,
+                                   included_iou};
+  }
+};
+
 class NonMaxSuppressionOp : public XlaOpKernel {
  public:
   explicit NonMaxSuppressionOp(OpKernelConstruction* context)
@@ -326,14 +392,12 @@ class NonMaxSuppressionOp : public XlaOpKernel {
 
   void Compile(XlaOpKernelContext* context) override {
     // TODO(b/111646731): Improve scalability of this op, using blocking.
-    int num_boxes_dim = 0;
-    int coords_dim = 1;
     const TensorShape& boxes_shape = context->InputShape("boxes");
     OP_REQUIRES(context, TensorShapeUtils::IsMatrix(boxes_shape),
                 errors::InvalidArgument("boxes must be 2-D, currently: ",
                                         boxes_shape.DebugString()));
-    const int64 num_boxes = boxes_shape.dim_size(num_boxes_dim);
-    OP_REQUIRES(context, boxes_shape.dim_size(coords_dim) == 4,
+    const int64 num_boxes = boxes_shape.dim_size(0);
+    OP_REQUIRES(context, boxes_shape.dim_size(1) == 4,
                 errors::InvalidArgument("boxes must have 4 columns",
                                         boxes_shape.DebugString()));
     const TensorShape& scores_shape = context->InputShape("scores");
@@ -347,9 +411,13 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     OP_REQUIRES(context, pad_to_max_output_size_,
                 errors::InvalidArgument(
                     "XLA compilation requires pad_to_max_output_size == True"));
+    OP_REQUIRES(context, num_boxes <= kint32max,
+                errors::InvalidArgument("XLA compilation requires number of "
+                                        "boxes to be <= kint32max, got ",
+                                        num_boxes));
 
-    xla::XlaOp boxes = context->Input("boxes");
-    xla::XlaOp scores = context->Input("scores");
+    const xla::XlaOp boxes_input = context->Input("boxes");
+    const xla::XlaOp scores_input = context->Input("scores");
     int64 output_size;
     OP_REQUIRES_OK(context, context->ConstantInputAsIntScalar(2, &output_size));
     OP_REQUIRES(
@@ -358,90 +426,113 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     OP_REQUIRES(context, output_size <= kint32max,
                 errors::InvalidArgument("Need output_size <= kint32Max, got ",
                                         output_size));
-    xla::XlaOp score_thresh = context->Input("score_threshold");
-    xla::XlaOp iou_thresh = context->Input("iou_threshold");
-
+    const xla::XlaOp score_thresh = context->Input("score_threshold");
+    const xla::XlaOp iou_thresh = context->Input("iou_threshold");
     xla::XlaBuilder* const builder = context->builder();
 
     // Choose a more convenient layout.
-    xla::XlaOp boxes_t = xla::Transpose(boxes, {1, 0});
-    coords_dim = 0;
-    num_boxes_dim = 1;
-
-    // Shapes are henceforth [1, num_boxes].
-    xla::XlaOp coord_y0 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/0,
-                                          /*limit_index=*/1,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_x0 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/1,
-                                          /*limit_index=*/2,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_y1 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/2,
-                                          /*limit_index=*/3,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp coord_x1 = xla::SliceInDim(boxes_t,
-                                          /*start_index=*/3,
-                                          /*limit_index=*/4,
-                                          /*stride=*/1,
-                                          /*dimno=*/coords_dim);
-    xla::XlaOp y1 =
-        xla::Select(xla::Le(coord_y0, coord_y1), coord_y0, coord_y1);
-    xla::XlaOp y2 =
-        xla::Select(xla::Le(coord_y0, coord_y1), coord_y1, coord_y0);
-    xla::XlaOp x1 =
-        xla::Select(xla::Le(coord_x0, coord_x1), coord_x0, coord_x1);
-    xla::XlaOp x2 =
-        xla::Select(xla::Le(coord_x0, coord_x1), coord_x1, coord_x0);
+    const xla::XlaOp boxes = xla::Transpose(boxes_input, {1, 0});
+    const xla::XlaOp boxes_sorted = xla::GetTupleElement(
+        xla::Sort(/*keys=*/-xla::Broadcast(scores_input, {4}),
+                  /*values=*/{boxes},
+                  /*dimension=*/1),
+        1);
+    // Track the mapping of indices into sorted domain.
+    const xla::XlaOp iota_indices = xla::Iota(builder, xla::S32, num_boxes);
+    const xla::XlaOp indices_sort = xla::Sort(-scores_input, {iota_indices});
+    const xla::XlaOp indices_sorted = xla::GetTupleElement(indices_sort, 1);
+    const xla::XlaOp scores = xla::Neg(xla::GetTupleElement(indices_sort, 0));
+
+    // Shapes are henceforth [1, num_boxes]. 'c_y0' denotes 'coordinate' y0.
+    const xla::XlaOp c_y0 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/0,
+                                                         /*limit_index=*/1,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_x0 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/1,
+                                                         /*limit_index=*/2,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_y1 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/2,
+                                                         /*limit_index=*/3,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+    const xla::XlaOp c_x1 = xla::Reshape(xla::SliceInDim(boxes_sorted,
+                                                         /*start_index=*/3,
+                                                         /*limit_index=*/4,
+                                                         /*stride=*/1,
+                                                         /*dimno=*/0),
+                                         {num_boxes});
+
+    xla::XlaOp y1 = xla::Select(xla::Le(c_y0, c_y1), c_y0, c_y1);
+    xla::XlaOp y2 = xla::Select(xla::Le(c_y0, c_y1), c_y1, c_y0);
+    xla::XlaOp x1 = xla::Select(xla::Le(c_x0, c_x1), c_x0, c_x1);
+    xla::XlaOp x2 = xla::Select(xla::Le(c_x0, c_x1), c_x1, c_x0);
     xla::XlaOp area = (y2 - y1) * (x2 - x1);
 
-    // Transpose the 1xN tensors, instead of the NxN tensors.
-    xla::XlaOp y1_t = xla::Transpose(y1, {1, 0});
-    xla::XlaOp y2_t = xla::Transpose(y2, {1, 0});
-    xla::XlaOp x1_t = xla::Transpose(x1, {1, 0});
-    xla::XlaOp x2_t = xla::Transpose(x2, {1, 0});
-    xla::XlaOp area_t = xla::Transpose(area, {1, 0});
+    // Shapes are henceforth [1, num_boxes].
+    y1 = xla::Broadcast(y1, {1});
+    y2 = xla::Broadcast(y2, {1});
+    x1 = xla::Broadcast(x1, {1});
+    x2 = xla::Broadcast(x2, {1});
+    area = xla::Broadcast(area, {1});
 
     // Shapes are henceforth [num_boxes, num_boxes].
-    xla::XlaOp i_xmin = xla::Max(x1, x1_t);
-    xla::XlaOp i_ymin = xla::Max(y1, y1_t);
-    xla::XlaOp i_xmax = xla::Min(x2, x2_t);
-    xla::XlaOp i_ymax = xla::Min(y2, y2_t);
+    xla::XlaOp i_xmin = xla::Max(x1, xla::Transpose(x1, {1, 0}));
+    xla::XlaOp i_ymin = xla::Max(y1, xla::Transpose(y1, {1, 0}));
+    xla::XlaOp i_xmax = xla::Min(x2, xla::Transpose(x2, {1, 0}));
+    xla::XlaOp i_ymax = xla::Min(y2, xla::Transpose(y2, {1, 0}));
     auto square_zero = xla::ZerosLike(i_xmin);
 
     xla::XlaOp i_area = xla::Max(i_xmax - i_xmin, square_zero) *
                         xla::Max(i_ymax - i_ymin, square_zero);
-    xla::XlaOp u_area = area + area_t - i_area;
+    xla::XlaOp u_area = area + xla::Transpose(area, {1, 0}) - i_area;
     xla::XlaOp iou = i_area / u_area;
 
     xla::XlaOp iou_thresh_mask = xla::Gt(iou, iou_thresh + square_zero);
-    xla::XlaOp scores_2d = xla::Reshape(scores, {num_boxes, 1});
-    xla::XlaOp score_cmp_mask =
-        xla::Gt(scores_2d, xla::Transpose(scores_2d, {1, 0}));
-    xla::XlaOp suppress = xla::And(iou_thresh_mask, score_cmp_mask);
-
-    // Shapes are [num_boxes] after the reduce.
-    xla::XlaOp included_iou = xla::Not(xla::Reduce(
-        suppress,
-        /*init_value=*/xla::ConstantR0<bool>(builder, false),
-        /*computation=*/CreateScalarOrComputation(xla::PRED, builder),
-        /*dimensions_to_reduce=*/{0}));
+    xla::XlaOp included_iou =
+        xla::Broadcast(xla::ConstantR0<bool>(builder, true), {num_boxes});
+
+    std::vector<xla::XlaOp> init_values;
+    init_values.reserve(4);
+    init_values.push_back(xla::ConstantR0<int32>(builder, 0));  // col_idx
+    init_values.push_back(xla::ConstantR0<int32>(builder, 0));  // num_outputs
+    init_values.push_back(iou_thresh_mask);
+    init_values.push_back(included_iou);
+
+    auto suppress_loop_result =
+        XlaWhileLoop(WhileCondFn(num_boxes, output_size),
+                     SuppressBodyFn(num_boxes), init_values, "suppress_loop",
+                     builder)
+            .ValueOrDie();
+
     xla::XlaOp included_score =
         xla::Gt(scores, xla::Broadcast(score_thresh, {num_boxes}));
-    xla::XlaOp included = xla::And(included_iou, included_score);
+    xla::XlaOp included = xla::And(included_score, suppress_loop_result[3]);
+
+    // Only consider boxes over which we have iterated. This allows for accurate
+    // counting. DynamicSlice would require knowledge of the size of the output.
+    auto valid_elem = xla::Lt(
+        iota_indices, xla::Broadcast(suppress_loop_result[0], {num_boxes}));
+    included = xla::And(included, valid_elem);
+
     xla::XlaOp neg_inf =
         xla::Broadcast(xla::MinValue(builder, xla::F32), {num_boxes});
     xla::XlaOp scores_included = xla::Select(included, scores, neg_inf);
-
+    xla::XlaOp output_tuple = TopK(scores_included, output_size);
+    xla::XlaOp selected_indices_sorted = xla::GetTupleElement(output_tuple, 1);
+    // Calculate num_valid.
+    // Note: num_valid cannot be taken from the loop outputs, because outputs
+    // can be suppressed by score threshold.
     xla::XlaOp ones_included = xla::Select(
         included,
         xla::Broadcast(xla::ConstantR0<int32>(builder, 1), {num_boxes}),
         xla::Broadcast(xla::ConstantR0<int32>(builder, 0), {num_boxes}));
-
     // num_valid is scalar. Value should be bound by output_size.
     xla::XlaOp num_valid_total = xla::Reduce(
         ones_included,
@@ -451,8 +542,17 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     xla::XlaOp num_valid =
         xla::Min(num_valid_total, xla::ConstantR0<int32>(builder, output_size));
 
-    xla::XlaOp output_tuple = TopK(scores_included, output_size);
-    xla::XlaOp selected_indices = xla::GetTupleElement(output_tuple, 1);
+    // Re-index into the original scores input tensor, using a Gather.
+    // Boxes were suppressed in the sorted domain.
+    xla::XlaOp selected_indices;
+    DataType gather_type = context->expected_output_dtype(0);
+    OP_REQUIRES_OK(
+        context,
+        XlaGather(indices_sorted, scores_shape, selected_indices_sorted,
+                  TensorShape({output_size}),
+                  /*axis=*/0,
+                  /*indices_are_nd=*/false,
+                  /*dtype=*/gather_type, DT_INT32, builder, &selected_indices));
 
     context->SetOutput(0, selected_indices);
     context->SetOutput(1, num_valid);
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 35fdee4fad..71661d606e 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -3739,6 +3739,30 @@ class NonMaxSuppressionPaddedTest(test_util.TensorFlowTestCase):
       self.assertAllClose(selected_indices.eval(), [3, 0, 5])
       self.assertEqual(num_valid.eval(), 3)
 
+  def testSelectFromContinuousOverLap(self):
+    boxes_np = [[0, 0, 1, 1], [0, 0.2, 1, 1.2], [0, 0.4, 1, 1.4],
+                [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]]
+    scores_np = [0.9, 0.75, 0.6, 0.5, 0.4, 0.3]
+    max_output_size_np = 3
+    iou_threshold_np = 0.5
+    score_threshold_np = 0.1
+    boxes = constant_op.constant(boxes_np)
+    scores = constant_op.constant(scores_np)
+    max_output_size = constant_op.constant(max_output_size_np)
+    iou_threshold = constant_op.constant(iou_threshold_np)
+    score_threshold = constant_op.constant(score_threshold_np)
+    selected_indices, num_valid = image_ops.non_max_suppression_padded(
+        boxes,
+        scores,
+        max_output_size,
+        iou_threshold,
+        score_threshold)
+    # The output shape of the padded operation must be fully defined.
+    self.assertEqual(selected_indices.shape.is_fully_defined(), False)
+    with self.cached_session():
+      self.assertAllClose(selected_indices.eval(), [0, 2, 4])
+      self.assertEqual(num_valid.eval(), 3)
+
 
 class VerifyCompatibleImageShapesTest(test_util.TensorFlowTestCase):
   """Tests utility function used by ssim() and psnr()."""
-- 
GitLab


From a24b32c2d212a08eea72313aad2b126287e79c37 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 22:17:34 -0700
Subject: [PATCH 0850/1085] Batched per_image_standardization

PiperOrigin-RevId: 216813273
---
 tensorflow/python/ops/image_ops_impl.py | 12 +++++++-----
 tensorflow/python/ops/image_ops_test.py | 10 ++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 1c75aab578..5a8e5d8399 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1184,7 +1184,8 @@ def per_image_standardization(image):
   away from zero to protect against division by 0 when handling uniform images.
 
   Args:
-    image: 3-D tensor of shape `[height, width, channels]`.
+    image: An n-D Tensor where the last 3 dimensions are
+           `[height, width, channels]`.
 
   Returns:
     The standardized image with same shape as `image`.
@@ -1194,14 +1195,15 @@ def per_image_standardization(image):
   """
   with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
     image = ops.convert_to_tensor(image, name='image')
-    image = _Assert3DImage(image)
-    num_pixels = math_ops.reduce_prod(array_ops.shape(image))
+    image = _AssertAtLeast3DImage(image)
+    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
 
     image = math_ops.cast(image, dtype=dtypes.float32)
-    image_mean = math_ops.reduce_mean(image)
+    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
 
     variance = (
-        math_ops.reduce_mean(math_ops.square(image)) -
+        math_ops.reduce_mean(
+            math_ops.square(image), axis=[-1, -2, -3], keepdims=True) -
         math_ops.square(image_mean))
     variance = gen_nn_ops.relu(variance)
     stddev = math_ops.sqrt(variance)
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index dae71caa81..81c2cc526e 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -1491,6 +1491,16 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase):
       whiten_np = whiten.eval()
       self.assertFalse(np.any(np.isnan(whiten_np)))
 
+  def testBatchWhitening(self):
+    imgs_np = np.random.uniform(0., 255., [4, 24, 24, 3])
+    whiten_np = [self._NumpyPerImageWhitening(img) for img in imgs_np]
+    with self.test_session(use_gpu=True):
+      imgs = constant_op.constant(imgs_np)
+      whiten = image_ops.per_image_standardization(imgs)
+      whiten_tf = whiten.eval()
+      for w_tf, w_np in zip(whiten_tf, whiten_np):
+        self.assertAllClose(w_tf, w_np, atol=1e-4)
+
 
 class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
 
-- 
GitLab


From b9a6f08935e41d4b711f9434735fc1a7f885d476 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 11 Oct 2018 22:35:30 -0700
Subject: [PATCH 0851/1085] r1.12-rc1 cherry-pick request: Update XlaSort to
 match the underlying HLO. (#22923)

* Remove CHECKs from HloInstruction constructors.
Move these checks to RET_CHECKs in the HloVerifier. Added a new visitor class
InstructionVerifier inside of hlo_verifier.cc for handling these random
non-result-shape verifications.

PiperOrigin-RevId: 215745043

* Update XlaSort to match the underlying HLO.

PiperOrigin-RevId: 215917470
---
 tensorflow/compiler/tests/sort_ops_test.py    |  18 +-
 .../compiler/tf2xla/kernels/sort_ops.cc       |  17 +-
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     |  23 +-
 tensorflow/compiler/tf2xla/python/xla.py      |  12 +-
 .../compiler/xla/service/hlo_instructions.cc  |  12 -
 .../compiler/xla/service/hlo_instructions.h   |   1 -
 .../compiler/xla/service/hlo_verifier.cc      | 458 ++++++++++--------
 .../compiler/xla/service/hlo_verifier.h       |  11 -
 8 files changed, 311 insertions(+), 241 deletions(-)

diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py
index dbf4beb693..57f0ab7a9e 100644
--- a/tensorflow/compiler/tests/sort_ops_test.py
+++ b/tensorflow/compiler/tests/sort_ops_test.py
@@ -48,13 +48,29 @@ class XlaSortOpTest(xla_test.XLATestCase):
         self.assertAllClose(v, result, rtol=1e-3)
 
   def testSort(self):
-    supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32])
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
     for dtype in supported_types.intersection(self.numeric_types):
       x = np.arange(101, dtype=dtype)
       np.random.shuffle(x)
       self._assertOpOutputMatchesExpected(
           xla.sort, [x], expected=[np.arange(101, dtype=dtype)])
 
+  def testKeyValueSort(self):
+    supported_types = set(
+        [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
+    for key_type in supported_types.intersection(self.numeric_types):
+      for value_type in supported_types.intersection(self.numeric_types):
+        x = np.arange(101, dtype=key_type)
+        np.random.shuffle(x)
+        y = (-x).astype(value_type)
+        self._assertOpOutputMatchesExpected(
+            xla.key_value_sort, [x, y],
+            expected=[
+                np.arange(101, dtype=key_type),
+                -np.arange(101, dtype=value_type)
+            ])
+
   def testTopK(self):
     supported_types = set(
         [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32])
diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
index aaeeae01cc..45f03d8c21 100644
--- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc
@@ -25,11 +25,26 @@ class XlaSortOp : public XlaOpKernel {
   explicit XlaSortOp(OpKernelConstruction* context) : XlaOpKernel(context) {}
 
   void Compile(XlaOpKernelContext* context) override {
-    context->SetOutput(0, xla::Sort(context->Input(0)));
+    context->SetOutput(0, xla::Sort(context->Input("input")));
   }
 };
 
 REGISTER_XLA_OP(Name("XlaSort"), XlaSortOp);
 
+class XlaKeyValueSortOp : public XlaOpKernel {
+ public:
+  explicit XlaKeyValueSortOp(OpKernelConstruction* context)
+      : XlaOpKernel(context) {}
+
+  void Compile(XlaOpKernelContext* context) override {
+    xla::XlaOp result =
+        xla::Sort(context->Input("keys"), context->Input("values"));
+    context->SetOutput(0, xla::GetTupleElement(result, 0));
+    context->SetOutput(1, xla::GetTupleElement(result, 1));
+  }
+};
+
+REGISTER_XLA_OP(Name("XlaKeyValueSort"), XlaKeyValueSortOp);
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index 733eeed3c6..557911553d 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -354,12 +354,33 @@ Wraps the XLA Sort operator, documented at
  https://www.tensorflow.org/performance/xla/operation_semantics#sort
 .
 
-Sorts a tensor. Currently only rank 1 sorts in ascending order are supported.
+Sorts a tensor. Currently only sorts in ascending order are supported.
 
 input: A `Tensor` of type T.
 output: A `Tensor` of type T.
 )doc");
 
+REGISTER_OP("XlaKeyValueSort")
+    .Input("keys: K")
+    .Input("values: V")
+    .Output("sorted_keys: K")
+    .Output("sorted_values: V")
+    .Attr("K: realnumbertype")
+    .Attr("V: type")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+Wraps the XLA Sort operator, documented at
+ https://www.tensorflow.org/performance/xla/operation_semantics#sort
+.
+
+Sorts a tensor. Currently only sorts in ascending order are supported.
+
+keys: A `Tensor` of type K.
+values: A `Tensor` of type V.
+sorted_keys: A `Tensor` of type K.
+sorted_values: A `Tensor` of type V.
+)doc");
+
 // TODO(b/37549631) setting the While Op to always be stateful is too
 // conservative.
 REGISTER_OP("XlaWhile")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 27dd18a9bb..bc7924c371 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -212,9 +212,9 @@ bitcast_convert_type = array_ops.bitcast
 
 def broadcast(x, dims, name=None):
   x = ops.convert_to_tensor(x)
-  shape = array_ops.concat(
-      [constant_op.constant(dims),
-       array_ops.shape(x)], axis=0)
+  shape = array_ops.concat([constant_op.constant(dims),
+                            array_ops.shape(x)],
+                           axis=0)
   return array_ops.broadcast_to(x, shape, name=name)
 
 
@@ -332,12 +332,13 @@ def reduce_window(operand,
     init: a scalar tensor representing the initial value for the reduction
     reducer: a reduction function that combines a pair of scalars.
     window_dimensions: shape of the window, as a list of integers
-    window_strides: inter-window strides, as a list of integers. Optional;
-      if omitted, defaults to strides of 1.
+    window_strides: inter-window strides, as a list of integers. Optional; if
+      omitted, defaults to strides of 1.
     padding: padding to apply to 'operand'. List of (low, high) pairs of
       integers that specify the padding to apply before and after each
       dimension. Optional; if omitted, defaults to no padding.
     name: the operator name, or None.
+
   Returns:
     A tensor that represents the output of the reduce_window operator.
   """
@@ -377,4 +378,5 @@ def slice(x, start_dims, limit_dims, strides):
 
 
 sort = gen_xla_ops.xla_sort
+key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 1bc168c8b7..0f76c7ab6c 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -641,14 +641,6 @@ HloTransposeInstruction::HloTransposeInstruction(
     absl::Span<const int64> dimensions)
     : HloInstruction(HloOpcode::kTranspose, shape),
       dimensions_(dimensions.begin(), dimensions.end()) {
-  CHECK_EQ(shape.dimensions().size(), dimensions.size());
-  CHECK_EQ(shape.dimensions().size(), operand->shape().dimensions().size());
-  CHECK(std::equal(operand->shape().dimensions().begin(),
-                   operand->shape().dimensions().end(),
-                   Permute(dimensions, shape.dimensions()).begin()))
-      << "shape: " << ShapeUtil::HumanString(shape)
-      << ", operand->shape(): " << ShapeUtil::HumanString(shape)
-      << ", dimensions: {" << StrJoin(dimensions, ", ") << "}";
   AppendOperand(operand);
 }
 
@@ -1489,7 +1481,6 @@ HloParameterInstruction::CloneWithNewOperandsImpl(
 HloGetTupleElementInstruction::HloGetTupleElementInstruction(
     const Shape& shape, HloInstruction* operand, int64 index)
     : HloInstruction(HloOpcode::kGetTupleElement, shape), tuple_index_(index) {
-  CHECK(ShapeUtil::IsTuple(operand->shape()));
   AppendOperand(operand);
 }
 
@@ -1611,9 +1602,6 @@ HloOutfeedInstruction::HloOutfeedInstruction(const Shape& outfeed_shape,
     : HloInstruction(HloOpcode::kOutfeed, ShapeUtil::MakeTokenShape()),
       outfeed_shape_(outfeed_shape),
       outfeed_config_(outfeed_config) {
-  CHECK(ShapeUtil::Compatible(operand->shape(), outfeed_shape))
-      << "Outfeed shape " << outfeed_shape
-      << " must be compatible with operand shape " << operand->shape();
   AppendOperand(operand);
   AppendOperand(token_operand);
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 9c22f5db7e..e2aa50c8d4 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -910,7 +910,6 @@ class HloOutfeedInstruction : public HloInstruction {
                                  absl::string_view outfeed_config);
   // Returns the shape for the Outfeed instruction.
   const Shape& outfeed_shape() const {
-    TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(outfeed_shape_));
     return outfeed_shape_;
   }
   // Returns the config for the Outfeed instruction.
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index a7727824fe..c22ee03388 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -548,6 +548,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
     case HloOpcode::kTupleSelect:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
+    case HloOpcode::kSort:
     case HloOpcode::kTuple:
     case HloOpcode::kWhile:
       break;
@@ -763,7 +764,136 @@ Status VerifyHloStructure(HloModule* module) {
   return Status::OK();
 }
 
-Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
+namespace {
+
+// Returns true if the given Shape has a TOKEN shape as any subshape.
+bool ShapeContainsToken(const Shape& shape) {
+  bool contains_token = false;
+  ShapeUtil::ForEachSubshape(
+      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
+        if (ShapeUtil::IsToken(subshape)) {
+          contains_token = true;
+        }
+      });
+  return contains_token;
+}
+
+// Verifies that all types entering and exiting the entry computation are
+// legal.
+Status VerifyEntryAndExitShapes(const HloModule& module) {
+  // Tokens cannot be passed as entry parameters.
+  // TODO(b/80000000): Remove this constraint.
+  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
+    HloInstruction* param =
+        module.entry_computation()->parameter_instruction(i);
+    if (ShapeContainsToken(param->shape())) {
+      return InternalError(
+          "Entry parameter %d is or contains a token shape: %s", i,
+          ShapeUtil::HumanString(param->shape()));
+    }
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions share the same channel id.
+Status CheckSameChannel(const HloInstruction* instr1,
+                        const HloInstruction* instr2) {
+  if (instr1->channel_id() != instr2->channel_id()) {
+    return InternalError(
+        "Expected to have the same channel id, actual channel ids are: %s "
+        "(%d), %s (%d)",
+        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
+        instr2->channel_id());
+  }
+  return Status::OK();
+}
+
+// Checks if the given two instructions have the same is_host_transfer
+// attribute value. Intsructions must be send/recv instructions or their
+// 'done' variant.
+Status CheckSameIsHostTransfer(const HloInstruction* instr1,
+                               const HloInstruction* instr2) {
+  const HloSendRecvInstruction* send_recv1 =
+      DynCast<const HloSendRecvInstruction>(instr1);
+  const HloSendRecvInstruction* send_recv2 =
+      DynCast<const HloSendRecvInstruction>(instr2);
+  TF_RET_CHECK(send_recv1 != nullptr);
+  TF_RET_CHECK(send_recv2 != nullptr);
+  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
+    return InternalError(
+        "Expected instructions to have the same is-host-transfer property: "
+        "%s, "
+        "%s ",
+        instr1->ToString(), instr2->ToString());
+  }
+  return Status::OK();
+}
+
+// Checks various invariants of send and recv instructions.
+Status VerifySendsAndRecvs(const HloModule& module) {
+  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
+  // Host send/recv instructions must have their own unique channel.
+  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
+    const HloSendRecvInstruction* sendrecv =
+        DynCast<const HloSendRecvInstruction>(instruction);
+    if (sendrecv->is_host_transfer()) {
+      auto it_inserted =
+          host_channels.insert({sendrecv->channel_id(), sendrecv});
+      if (!it_inserted.second) {
+        return FailedPrecondition(
+            "Channel %d is used for multiple host send/recv instructions: "
+            "%s "
+            "and "
+            "%s",
+            sendrecv->channel_id(), sendrecv->ToString(),
+            it_inserted.first->second->ToString());
+      }
+    }
+
+    return Status::OK();
+  };
+
+  // Send/Recv instruction must have a single user: the corresponding
+  // SendDone/RecvDone. with matching channel.
+  for (const HloComputation* computation : module.computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      switch (instruction->opcode()) {
+        case HloOpcode::kSend: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* send_done = instruction->users().front();
+          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
+          break;
+        }
+        case HloOpcode::kRecv: {
+          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
+          TF_RET_CHECK(instruction->users().size() == 1);
+          const HloInstruction* recv_done = instruction->users().front();
+          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
+          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
+          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
+          break;
+        }
+        case HloOpcode::kSendDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
+          break;
+        case HloOpcode::kRecvDone:
+          TF_RET_CHECK(instruction->operands().size() == 1);
+          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// CHECKs various invariants of a fusion instruction.
+Status CheckFusionInstruction(HloInstruction* fusion) {
   // The parent fusion instruction of the fusion computation must be 'fusion'.
   HloComputation* fused_computation = fusion->fused_instructions_computation();
   if (fusion != fused_computation->FusionInstruction()) {
@@ -866,50 +996,32 @@ Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
     }
   }
 
+  TF_RET_CHECK(fusion->called_computations() ==
+               absl::Span<HloComputation* const>(
+                   {fusion->fused_instructions_computation()}))
+      << "Fusion HLO calls computations other than the "
+         "fused_instructions_computation: "
+      << fusion->ToString() << " fusion->fused_instructions_computation(): "
+      << fusion->fused_instructions_computation()->ToString()
+      << " fusion->called_computations(): "
+      << ComputationsToString(fusion->called_computations());
+
+  for (const auto& fused : fusion->fused_instructions()) {
+    TF_RET_CHECK(fused->parent() == fusion->fused_instructions_computation())
+        << "Fused HLO was missing a parent: " << fused->ToString()
+        << " parent: " << fused->parent()
+        << " computation: " << fusion->parent();
+  }
+
   // TODO(b/65423525): We'd like to check that all operands are distinct.
   // This is currently disabled due to the invariant being violated by
   // multi-output fusion.
   return Status::OK();
 }
 
-Status HloVerifier::CheckWhileInstruction(HloInstruction* instruction) {
-  auto* while_cond = instruction->while_condition();
-  auto* while_body = instruction->while_body();
-  if (while_cond->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While condition must have exactly 1 parameter; had %d : %s",
-        while_cond->num_parameters(), while_cond->ToString());
-  }
-  if (while_body->num_parameters() != 1) {
-    return FailedPrecondition(
-        "While body must have exactly 1 parameter; had %d : %s",
-        while_body->num_parameters(), while_body->ToString());
-  }
-  if (instruction->operand_count() != 1) {
-    return FailedPrecondition(
-        "While loop must have exactly one operand; had %d : %s",
-        instruction->operand_count(), instruction->ToString());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckConditionalInstruction(HloInstruction* instruction) {
-  if (instruction->true_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "True computation %s of %s must have 1 parameter insted of %d",
-        instruction->true_computation()->name(), instruction->ToString(),
-        instruction->true_computation()->num_parameters());
-  }
-  if (instruction->false_computation()->num_parameters() != 1) {
-    return FailedPrecondition(
-        "False computation %s of %s must have 1 parameter insted of %d",
-        instruction->false_computation()->name(), instruction->ToString(),
-        instruction->false_computation()->num_parameters());
-  }
-  return Status::OK();
-}
-
-Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
+// Checks that the non-scalar operand shapes are compatible to the output
+// shape, i.e., that there are no implicit broadcasts of size-one dimensions.
+Status CheckElementwiseInstruction(HloInstruction* instruction) {
   const Shape& out_shape = instruction->shape();
   for (HloInstruction* operand : instruction->operands()) {
     const Shape& operand_shape = operand->shape();
@@ -926,133 +1038,114 @@ Status HloVerifier::CheckElementwiseInstruction(HloInstruction* instruction) {
   return Status::OK();
 }
 
-namespace {
+// Visitor which verifies various fields on the HLO instruction. This class does
+// not check result shape as that is checked in the ShapeVerifier.
+class InstructionVerifier : public DfsHloVisitorWithDefault {
+ public:
+  InstructionVerifier() {}
 
-// Returns true if the given Shape has a TOKEN shape as any subshape.
-bool ShapeContainsToken(const Shape& shape) {
-  bool contains_token = false;
-  ShapeUtil::ForEachSubshape(
-      shape, [&contains_token](const Shape& subshape, const ShapeIndex&) {
-        if (ShapeUtil::IsToken(subshape)) {
-          contains_token = true;
-        }
-      });
-  return contains_token;
-}
+  Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
-// Verifies that all types entering and exiting the entry computation are
-// legal.
-Status VerifyEntryAndExitShapes(const HloModule& module) {
-  // Tokens cannot be passed as entry parameters.
-  // TODO(b/80000000): Remove this constraint.
-  for (int i = 0; i < module.entry_computation()->num_parameters(); ++i) {
-    HloInstruction* param =
-        module.entry_computation()->parameter_instruction(i);
-    if (ShapeContainsToken(param->shape())) {
-      return InternalError(
-          "Entry parameter %d is or contains a token shape: %s", i,
-          ShapeUtil::HumanString(param->shape()));
-    }
+  Status HandleFusion(HloInstruction* fusion) override {
+    return CheckFusionInstruction(fusion);
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions share the same channel id.
-Status CheckSameChannel(const HloInstruction* instr1,
-                        const HloInstruction* instr2) {
-  if (instr1->channel_id() != instr2->channel_id()) {
-    return InternalError(
-        "Expected to have the same channel id, actual channel ids are: %s "
-        "(%d), %s (%d)",
-        instr1->ToString(), instr1->channel_id(), instr2->ToString(),
-        instr2->channel_id());
+  Status HandleBroadcast(HloInstruction* broadcast) override {
+    // If you see this failure then someone has confused the difference
+    // between the HLO broadcast op, and the UserComputation broadcast
+    // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
+    // or ComputationLowerer::Visit()
+    TF_RET_CHECK(broadcast->dimensions().size() ==
+                 ShapeUtil::Rank(broadcast->operand(0)->shape()))
+        << "Broadcast HLO (" << broadcast->ToShortString()
+        << ") has invalid number of dimensions: "
+        << broadcast->dimensions().size()
+        << " != " << ShapeUtil::Rank(broadcast->operand(0)->shape());
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks if the given two instructions have the same is_host_transfer
-// attribute value. Intsructions must be send/recv instructions or their
-// 'done' variant.
-Status CheckSameIsHostTransfer(const HloInstruction* instr1,
-                               const HloInstruction* instr2) {
-  const HloSendRecvInstruction* send_recv1 =
-      DynCast<const HloSendRecvInstruction>(instr1);
-  const HloSendRecvInstruction* send_recv2 =
-      DynCast<const HloSendRecvInstruction>(instr2);
-  TF_RET_CHECK(send_recv1 != nullptr);
-  TF_RET_CHECK(send_recv2 != nullptr);
-  if (send_recv1->is_host_transfer() != send_recv2->is_host_transfer()) {
-    return InternalError(
-        "Expected instructions to have the same is-host-transfer property: "
-        "%s, "
-        "%s ",
-        instr1->ToString(), instr2->ToString());
+  Status HandleWhile(HloInstruction* xla_while) override {
+    auto* while_cond = xla_while->while_condition();
+    auto* while_body = xla_while->while_body();
+    if (while_cond->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While condition must have exactly 1 parameter; had %d : %s",
+          while_cond->num_parameters(), while_cond->ToString());
+    }
+    if (while_body->num_parameters() != 1) {
+      return FailedPrecondition(
+          "While body must have exactly 1 parameter; had %d : %s",
+          while_body->num_parameters(), while_body->ToString());
+    }
+    if (xla_while->operand_count() != 1) {
+      return FailedPrecondition(
+          "While loop must have exactly one operand; had %d : %s",
+          xla_while->operand_count(), xla_while->ToString());
+    }
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// Checks various invariants of send and recv instructions.
-Status VerifySendsAndRecvs(const HloModule& module) {
-  absl::flat_hash_map<int64, const HloInstruction*> host_channels;
-  // Host send/recv instructions must have their own unique channel.
-  auto check_unique_host_channel = [&](const HloInstruction* instruction) {
-    const HloSendRecvInstruction* sendrecv =
-        DynCast<const HloSendRecvInstruction>(instruction);
-    if (sendrecv->is_host_transfer()) {
-      auto it_inserted =
-          host_channels.insert({sendrecv->channel_id(), sendrecv});
-      if (!it_inserted.second) {
-        return FailedPrecondition(
-            "Channel %d is used for multiple host send/recv instructions: "
-            "%s "
-            "and "
-            "%s",
-            sendrecv->channel_id(), sendrecv->ToString(),
-            it_inserted.first->second->ToString());
-      }
+  Status HandleConditional(HloInstruction* conditional) override {
+    if (conditional->true_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "True computation %s of %s must have 1 parameter insted of %d",
+          conditional->true_computation()->name(), conditional->ToString(),
+          conditional->true_computation()->num_parameters());
+    }
+    if (conditional->false_computation()->num_parameters() != 1) {
+      return FailedPrecondition(
+          "False computation %s of %s must have 1 parameter insted of %d",
+          conditional->false_computation()->name(), conditional->ToString(),
+          conditional->false_computation()->num_parameters());
     }
+    return Status::OK();
+  }
+
+  Status HandleElementwiseUnary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
 
+  Status HandleElementwiseBinary(HloInstruction* instruction) override {
+    return CheckElementwiseInstruction(instruction);
+  }
+
+  Status HandleGetTupleElement(HloInstruction* gte) override {
+    TF_RET_CHECK(ShapeUtil::IsTuple(gte->operand(0)->shape()));
     return Status::OK();
-  };
+  }
 
-  // Send/Recv instruction must have a single user: the corresponding
-  // SendDone/RecvDone. with matching channel.
-  for (const HloComputation* computation : module.computations()) {
-    for (const HloInstruction* instruction : computation->instructions()) {
-      switch (instruction->opcode()) {
-        case HloOpcode::kSend: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* send_done = instruction->users().front();
-          TF_RET_CHECK(send_done->opcode() == HloOpcode::kSendDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, send_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, send_done));
-          break;
-        }
-        case HloOpcode::kRecv: {
-          TF_RETURN_IF_ERROR(check_unique_host_channel(instruction));
-          TF_RET_CHECK(instruction->users().size() == 1);
-          const HloInstruction* recv_done = instruction->users().front();
-          TF_RET_CHECK(recv_done->opcode() == HloOpcode::kRecvDone);
-          TF_RETURN_IF_ERROR(CheckSameChannel(instruction, recv_done));
-          TF_RETURN_IF_ERROR(CheckSameIsHostTransfer(instruction, recv_done));
-          break;
-        }
-        case HloOpcode::kSendDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kSend);
-          break;
-        case HloOpcode::kRecvDone:
-          TF_RET_CHECK(instruction->operands().size() == 1);
-          TF_RET_CHECK(instruction->operand(0)->opcode() == HloOpcode::kRecv);
-          break;
-        default:
-          break;
-      }
-    }
+  Status HandleTranspose(HloInstruction* transpose) override {
+    const Shape& shape = transpose->shape();
+    const HloInstruction* operand = transpose->operand(0);
+    TF_RET_CHECK(shape.dimensions().size() == transpose->dimensions().size());
+    TF_RET_CHECK(shape.dimensions().size() ==
+                 transpose->operand(0)->shape().dimensions().size());
+    TF_RET_CHECK(std::equal(
+        operand->shape().dimensions().begin(),
+        operand->shape().dimensions().end(),
+        Permute(transpose->dimensions(), shape.dimensions()).begin()))
+        << "shape: " << shape << ", operand->shape(): " << shape
+        << ", dimensions: {" << absl::StrJoin(transpose->dimensions(), ", ")
+        << "}";
+    return Status::OK();
+  }
+
+  Status Preprocess(HloInstruction* instruction) override {
+    auto previous = instructions_by_name_.find(instruction->name());
+    TF_RET_CHECK(previous == instructions_by_name_.end())
+        << "HLO has name that is not unique within module:\n"
+        << instruction->ToString()
+        << " in computation: " << instruction->parent()->name()
+        << "\nPrevious HLO with same name:\n"
+        << previous->second->ToString()
+        << " in computation: " << previous->second->parent()->name();
+    instructions_by_name_[instruction->name()] = instruction;
+    return Status::OK();
   }
-  return Status::OK();
-}
+
+ private:
+  absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
+};
 
 }  // namespace
 
@@ -1061,65 +1154,12 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
-  absl::flat_hash_map<string, const HloInstruction*> instructions;
-
   for (auto* computation : module->computations()) {
-    for (const auto& instruction : computation->instructions()) {
-      TF_RET_CHECK(instruction->parent() == computation);
-      if (instruction->opcode() == HloOpcode::kFusion) {
-        TF_RETURN_IF_ERROR(CheckFusionInstruction(instruction));
-        TF_RET_CHECK(instruction->called_computations() ==
-                     absl::Span<HloComputation* const>(
-                         {instruction->fused_instructions_computation()}))
-            << "Fusion HLO calls computations other than the "
-               "fused_instructions_computation: "
-            << instruction->ToString()
-            << " instruction->fused_instructions_computation(): "
-            << instruction->fused_instructions_computation()->ToString()
-            << " instruction->called_computations(): "
-            << ComputationsToString(instruction->called_computations());
-
-        for (const auto& fused : instruction->fused_instructions()) {
-          TF_RET_CHECK(fused->parent() ==
-                       instruction->fused_instructions_computation())
-              << "Fused HLO was missing a parent: " << fused->ToString()
-              << " parent: " << fused->parent()
-              << " computation: " << computation;
-        }
-      } else if (instruction->opcode() == HloOpcode::kBroadcast) {
-        // If you see this failure then someone has confused the difference
-        // between the HLO broadcast op, and the UserComputation broadcast
-        // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I
-        // or ComputationLowerer::Visit()
-        TF_RET_CHECK(instruction->dimensions().size() ==
-                     ShapeUtil::Rank(instruction->operand(0)->shape()))
-            << "Broadcast HLO (" << instruction->ToShortString()
-            << ") has invalid number of dimensions: "
-            << instruction->dimensions().size()
-            << " != " << ShapeUtil::Rank(instruction->operand(0)->shape());
-      } else if (instruction->opcode() == HloOpcode::kWhile) {
-        TF_RETURN_IF_ERROR(CheckWhileInstruction(instruction));
-      } else if (instruction->opcode() == HloOpcode::kConditional) {
-        TF_RETURN_IF_ERROR(CheckConditionalInstruction(instruction));
-      } else if (instruction->opcode() !=
-                     HloOpcode::kRng /* Rng operands are always scalar. */
-                 && instruction->IsElementwise()) {
-        TF_RETURN_IF_ERROR(CheckElementwiseInstruction(instruction));
-      }
-
-      auto previous = instructions.find(instruction->name());
-      TF_RET_CHECK(previous == instructions.end())
-          << "HLO has name that is not unique within module:\n"
-          << instruction->ToString()
-          << " in computation: " << computation->name()
-          << "\nPrevious HLO with same name:\n"
-          << previous->second->ToString()
-          << " in computation: " << previous->second->parent()->name();
-      instructions[instruction->name()] = instruction;
-    }
-
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
+
+    InstructionVerifier instruction_verifier;
+    TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
   TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module));
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 0cde4a31af..6d16586c2c 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -172,17 +172,6 @@ class HloVerifier : public HloModulePass {
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
-  // CHECKs various invariants of a fusion instruction.
-  Status CheckFusionInstruction(HloInstruction* fusion) const;
-
-  Status CheckWhileInstruction(HloInstruction* instruction);
-
-  Status CheckConditionalInstruction(HloInstruction* instruction);
-
-  // Checks that the non-scalar operand shapes are compatible to the output
-  // shape, i.e., that there are no implicit broadcasts of size-one dimensions.
-  Status CheckElementwiseInstruction(HloInstruction* instruction);
-
   // Creates a ShapeVerifier that checks that shapes match inferred
   // expectations. This is a factory function because ShapeVerifier,
   // being a DfsHloVisitor, is stateful. We want a clean object
-- 
GitLab


From ea5cd0a4a6c1eac892cdf5fa8782bb05ebe2bf98 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 11 Oct 2018 22:52:15 -0700
Subject: [PATCH 0852/1085] Internal change.

PiperOrigin-RevId: 216815486
---
 tensorflow/contrib/lite/kernels/BUILD         |   1 +
 .../bidirectional_sequence_lstm_test.cc       | 257 ++++++++++--------
 2 files changed, 144 insertions(+), 114 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD
index d2d8073abd..f20bb420a0 100644
--- a/tensorflow/contrib/lite/kernels/BUILD
+++ b/tensorflow/contrib/lite/kernels/BUILD
@@ -586,6 +586,7 @@ tf_cc_test(
         ":builtin_ops",
         "//tensorflow/contrib/lite:framework",
         "//tensorflow/contrib/lite/kernels:test_util",
+        "//tensorflow/contrib/lite/schema:schema_fbs",
         "@com_google_googletest//:gtest",
     ],
 )
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
index 9cc04907e1..db98d6c49d 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 // Unit test for TFLite Bidirectional LSTM op.
 
+#include <initializer_list>
 #include <iomanip>
 #include <memory>
 #include <vector>
@@ -24,6 +25,7 @@ limitations under the License.
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/kernels/test_util.h"
 #include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
 
 namespace tflite {
 namespace {
@@ -37,6 +39,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
                            bool use_peephole, bool use_projection_weights,
                            bool use_projection_bias, bool merge_outputs,
                            float cell_clip, float proj_clip,
+                           bool quantize_weights,
                            const std::vector<std::vector<int>>& input_shapes)
       : n_batch_(n_batch),
         n_input_(n_input),
@@ -44,37 +47,40 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
         n_bw_cell_(n_cell),
         n_fw_output_(n_output),
         n_bw_output_(n_output),
-        sequence_length_(sequence_length) {
+        sequence_length_(sequence_length),
+        quantize_weights_(quantize_weights) {
     input_ = AddInput(TensorType_FLOAT32);
+    const auto weight_type =
+        quantize_weights_ ? TensorType_UINT8 : TensorType_FLOAT32;
 
     if (use_cifg) {
       fw_input_to_input_weights_ = AddNullInput();
     } else {
-      fw_input_to_input_weights_ = AddInput(TensorType_FLOAT32);
+      fw_input_to_input_weights_ = AddInput(weight_type);
     }
 
-    fw_input_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-    fw_input_to_cell_weights_ = AddInput(TensorType_FLOAT32);
-    fw_input_to_output_weights_ = AddInput(TensorType_FLOAT32);
+    fw_input_to_forget_weights_ = AddInput(weight_type);
+    fw_input_to_cell_weights_ = AddInput(weight_type);
+    fw_input_to_output_weights_ = AddInput(weight_type);
 
     if (use_cifg) {
       fw_recurrent_to_input_weights_ = AddNullInput();
     } else {
-      fw_recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32);
+      fw_recurrent_to_input_weights_ = AddInput(weight_type);
     }
 
-    fw_recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-    fw_recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32);
-    fw_recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32);
+    fw_recurrent_to_forget_weights_ = AddInput(weight_type);
+    fw_recurrent_to_cell_weights_ = AddInput(weight_type);
+    fw_recurrent_to_output_weights_ = AddInput(weight_type);
 
     if (use_peephole) {
       if (use_cifg) {
         fw_cell_to_input_weights_ = AddNullInput();
       } else {
-        fw_cell_to_input_weights_ = AddInput(TensorType_FLOAT32);
+        fw_cell_to_input_weights_ = AddInput(weight_type);
       }
-      fw_cell_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-      fw_cell_to_output_weights_ = AddInput(TensorType_FLOAT32);
+      fw_cell_to_forget_weights_ = AddInput(weight_type);
+      fw_cell_to_output_weights_ = AddInput(weight_type);
     } else {
       fw_cell_to_input_weights_ = AddNullInput();
       fw_cell_to_forget_weights_ = AddNullInput();
@@ -105,31 +111,31 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     if (use_cifg) {
       bw_input_to_input_weights_ = AddNullInput();
     } else {
-      bw_input_to_input_weights_ = AddInput(TensorType_FLOAT32);
+      bw_input_to_input_weights_ = AddInput(weight_type);
     }
 
-    bw_input_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-    bw_input_to_cell_weights_ = AddInput(TensorType_FLOAT32);
-    bw_input_to_output_weights_ = AddInput(TensorType_FLOAT32);
+    bw_input_to_forget_weights_ = AddInput(weight_type);
+    bw_input_to_cell_weights_ = AddInput(weight_type);
+    bw_input_to_output_weights_ = AddInput(weight_type);
 
     if (use_cifg) {
       bw_recurrent_to_input_weights_ = AddNullInput();
     } else {
-      bw_recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32);
+      bw_recurrent_to_input_weights_ = AddInput(weight_type);
     }
 
-    bw_recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-    bw_recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32);
-    bw_recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32);
+    bw_recurrent_to_forget_weights_ = AddInput(weight_type);
+    bw_recurrent_to_cell_weights_ = AddInput(weight_type);
+    bw_recurrent_to_output_weights_ = AddInput(weight_type);
 
     if (use_peephole) {
       if (use_cifg) {
         bw_cell_to_input_weights_ = AddNullInput();
       } else {
-        bw_cell_to_input_weights_ = AddInput(TensorType_FLOAT32);
+        bw_cell_to_input_weights_ = AddInput(weight_type);
       }
-      bw_cell_to_forget_weights_ = AddInput(TensorType_FLOAT32);
-      bw_cell_to_output_weights_ = AddInput(TensorType_FLOAT32);
+      bw_cell_to_forget_weights_ = AddInput(weight_type);
+      bw_cell_to_output_weights_ = AddInput(weight_type);
     } else {
       bw_cell_to_input_weights_ = AddNullInput();
       bw_cell_to_forget_weights_ = AddNullInput();
@@ -146,7 +152,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     bw_output_gate_bias_ = AddInput(TensorType_FLOAT32);
 
     if (use_projection_weights) {
-      bw_projection_weights_ = AddInput(TensorType_FLOAT32);
+      bw_projection_weights_ = AddInput(weight_type);
       if (use_projection_bias) {
         bw_projection_bias_ = AddInput(TensorType_FLOAT32);
       } else {
@@ -198,88 +204,96 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
+  void PopulateWeightTensor(int tensor_id, const std::vector<float>& f) {
+    if (quantize_weights_) {
+      SymmetricQuantizeAndPopulate(tensor_id, f);
+    } else {
+      PopulateTensor(tensor_id, f);
+    }
+  }
+
   // Set weights in forward and backward cells to be the same.
-  void SetInputToInputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_input_to_input_weights_, f);
-    PopulateTensor(bw_input_to_input_weights_, f);
+  void SetInputToInputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_input_to_input_weights_, f);
+    PopulateWeightTensor(bw_input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_input_to_forget_weights_, f);
-    PopulateTensor(bw_input_to_forget_weights_, f);
+  void SetInputToForgetWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_input_to_forget_weights_, f);
+    PopulateWeightTensor(bw_input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_input_to_cell_weights_, f);
-    PopulateTensor(bw_input_to_cell_weights_, f);
+  void SetInputToCellWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_input_to_cell_weights_, f);
+    PopulateWeightTensor(bw_input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_input_to_output_weights_, f);
-    PopulateTensor(bw_input_to_output_weights_, f);
+  void SetInputToOutputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_input_to_output_weights_, f);
+    PopulateWeightTensor(bw_input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_recurrent_to_input_weights_, f);
-    PopulateTensor(bw_recurrent_to_input_weights_, f);
+  void SetRecurrentToInputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_recurrent_to_input_weights_, f);
+    PopulateWeightTensor(bw_recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_recurrent_to_forget_weights_, f);
-    PopulateTensor(bw_recurrent_to_forget_weights_, f);
+  void SetRecurrentToForgetWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_recurrent_to_forget_weights_, f);
+    PopulateWeightTensor(bw_recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_recurrent_to_cell_weights_, f);
-    PopulateTensor(bw_recurrent_to_cell_weights_, f);
+  void SetRecurrentToCellWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_recurrent_to_cell_weights_, f);
+    PopulateWeightTensor(bw_recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_recurrent_to_output_weights_, f);
-    PopulateTensor(bw_recurrent_to_output_weights_, f);
+  void SetRecurrentToOutputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_recurrent_to_output_weights_, f);
+    PopulateWeightTensor(bw_recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_cell_to_input_weights_, f);
-    PopulateTensor(bw_cell_to_input_weights_, f);
+  void SetCellToInputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_cell_to_input_weights_, f);
+    PopulateWeightTensor(bw_cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_cell_to_forget_weights_, f);
-    PopulateTensor(bw_cell_to_forget_weights_, f);
+  void SetCellToForgetWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_cell_to_forget_weights_, f);
+    PopulateWeightTensor(bw_cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_cell_to_output_weights_, f);
-    PopulateTensor(bw_cell_to_output_weights_, f);
+  void SetCellToOutputWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_cell_to_output_weights_, f);
+    PopulateWeightTensor(bw_cell_to_output_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(const std::vector<float>& f) {
     PopulateTensor(fw_input_gate_bias_, f);
     PopulateTensor(bw_input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(const std::vector<float>& f) {
     PopulateTensor(fw_forget_gate_bias_, f);
     PopulateTensor(bw_forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
+  void SetCellBias(const std::vector<float>& f) {
     PopulateTensor(fw_cell_bias_, f);
     PopulateTensor(bw_cell_bias_, f);
   }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(const std::vector<float>& f) {
     PopulateTensor(fw_output_gate_bias_, f);
     PopulateTensor(bw_output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
-    PopulateTensor(fw_projection_weights_, f);
-    PopulateTensor(bw_projection_weights_, f);
+  void SetProjectionWeights(const std::vector<float>& f) {
+    PopulateWeightTensor(fw_projection_weights_, f);
+    PopulateWeightTensor(bw_projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(const std::vector<float>& f) {
     PopulateTensor(fw_projection_bias_, f);
     PopulateTensor(bw_projection_bias_, f);
   }
@@ -370,21 +384,30 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
   int n_fw_output_;
   int n_bw_output_;
   int sequence_length_;
+
+  bool quantize_weights_;
 };
 
-TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
+// Declare LSTMOpTest as a parameterized test, where the parameter is a boolean
+// indicating whether to use quantization or not.
+class LSTMOpTest : public ::testing::TestWithParam<bool> {};
+
+INSTANTIATE_TEST_CASE_P(QuantizationOrNot, LSTMOpTest, ::testing::Bool());
+
+TEST_P(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   const int n_batch = 1;
   const int n_input = 2;
   // n_cell and n_output have the same size when there is no projection.
   const int n_cell = 4;
   const int n_output = 4;
   const int sequence_length = 3;
+  const bool quantize_weights = GetParam();
 
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, quantize_weights,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -502,9 +525,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
       -0.03716109, 0.12507336, 0.41193449, -0.20860538,
       -0.15053082, 0.09120187, 0.24278517, -0.12222792};
   static float lstm_bw_golden_output[] = {
-      -0.0806187, 0.139077, 0.400476, -0.197842,
-      -0.0332076, 0.123838, 0.309777, -0.17621,
-      -0.0490733, 0.0739237, 0.067706, -0.0208124};
+      -0.0806187, 0.139077, 0.400476,   -0.197842, -0.0332076, 0.123838,
+      0.309777,   -0.17621, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
 
   float* batch0_start = lstm_input;
   float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length();
@@ -519,7 +541,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   std::vector<float> fw_expected;
   fw_expected.insert(fw_expected.end(), fw_golden_start, fw_golden_end);
   EXPECT_THAT(lstm.GetFwOutput(),
-              ElementsAreArray(ArrayFloatNear(fw_expected)));
+              ElementsAreArray(
+                  ArrayFloatNear(fw_expected, quantize_weights ? 1e-2 : 1e-5)));
 
   float* bw_golden_start = lstm_bw_golden_output;
   float* bw_golden_end =
@@ -527,23 +550,26 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
   std::vector<float> bw_expected;
   bw_expected.insert(bw_expected.end(), bw_golden_start, bw_golden_end);
   EXPECT_THAT(lstm.GetBwOutput(),
-              ElementsAreArray(ArrayFloatNear(bw_expected)));
+              ElementsAreArray(
+                  ArrayFloatNear(bw_expected, quantize_weights ? 1e-2 : 1e-5)));
 }
 
-// Same as the previous test, yet with a single merged output tensor.
-TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
-  const int n_batch = 1;
+// Same as the previous test, yet with a single merged output tensor and n_batch
+// of 2.
+TEST_P(LSTMOpTest, BlackBoxTestMergedOutput) {
+  const int n_batch = 2;
   const int n_input = 2;
   // n_cell and n_output have the same size when there is no projection.
   const int n_cell = 4;
   const int n_output = 4;
   const int sequence_length = 3;
+  const bool quantize_weights = GetParam();
 
   BidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, quantize_weights,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -655,24 +681,29 @@ TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
        -0.51818722, -0.15390486, 0.0468148, 0.39922136});
 
   // Input should have n_input * sequence_length many values.
-  static float lstm_input[] = {2., 3., 3., 4., 1., 1.};
+  static float lstm_input[] = {2., 3., 2., 3., 3., 4., 3., 4., 1., 1., 1., 1.};
   static float lstm_fw_golden_output[] = {
-      -0.02973187, 0.1229473,  0.20885126, -0.15358765,
-      -0.03716109, 0.12507336, 0.41193449, -0.20860538,
-      -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+      -0.02973187, 0.1229473,   0.20885126,  -0.15358765, -0.02973187,
+      0.1229473,   0.20885126,  -0.15358765, -0.03716109, 0.12507336,
+      0.41193449,  -0.20860538, -0.03716109, 0.12507336,  0.41193449,
+      -0.20860538, -0.15053082, 0.09120187,  0.24278517,  -0.12222792,
+      -0.15053082, 0.09120187,  0.24278517,  -0.12222792};
   static float lstm_bw_golden_output[] = {
-      -0.0806187, 0.139077, 0.400476,   -0.197842, -0.0332076, 0.123838,
-      0.309777,   -0.17621, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
+      -0.0806187, 0.139077,   0.400476,   -0.197842, -0.0806187, 0.139077,
+      0.400476,   -0.197842,  -0.0332076, 0.123838,  0.309777,   -0.17621,
+      -0.0332076, 0.123838,   0.309777,   -0.17621,  -0.0490733, 0.0739237,
+      0.067706,   -0.0208124, -0.0490733, 0.0739237, 0.067706,   -0.0208124};
 
   float* batch0_start = lstm_input;
-  float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length();
+  float* batch0_end = batch0_start + lstm.num_inputs() * lstm.num_batches() *
+                                         lstm.sequence_length();
 
   lstm.SetInput(0, batch0_start, batch0_end);
 
   lstm.Invoke();
 
   std::vector<float> merged_expected;
-  for (int k = 0; k < lstm.sequence_length(); k++) {
+  for (int k = 0; k < lstm.sequence_length() * lstm.num_batches(); k++) {
     merged_expected.insert(
         merged_expected.end(),
         lstm_fw_golden_output + k * lstm.num_fw_outputs(),
@@ -683,7 +714,8 @@ TEST(LSTMOpTest, BlackBoxTestMergedOutput) {
         lstm_bw_golden_output + (k + 1) * lstm.num_bw_outputs());
   }
   EXPECT_THAT(lstm.GetFwOutput(),
-              ElementsAreArray(ArrayFloatNear(merged_expected)));
+              ElementsAreArray(ArrayFloatNear(merged_expected,
+                                              quantize_weights ? 1e-2 : 1e-5)));
 }
 
 TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
@@ -698,7 +730,7 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -858,7 +890,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -1008,7 +1040,7 @@ TEST(LSTMOpTest,
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -1158,7 +1190,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/true, /*use_projection_weights=*/true,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -1779,31 +1811,28 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
        0.0412031,    0.0118723,   0.0239643,   0.0394009}};
 
   static float lstm_combined_golden_output[][64] = {
-    {
-      -0.022014,  0.073544, -0.002235,  0.040068, -0.037136, -0.052788,
-      0.075325, -0.029378,  0.024298, -0.07733 , -0.030674, -0.060229,
-      0.040599,  0.011608,  0.042005,  0.045977, -0.039225,  0.076294,
-      0.000735,  0.032852, -0.069869, -0.053312,  0.073527, -0.028136,
-      0.021585, -0.102679, -0.004327, -0.043304,  0.072861,  0.027077,
-      0.034558,  0.068292, -0.036292,  0.069832, -0.003032,  0.053829,
-      -0.043821, -0.072713,  0.085029, -0.040374,  0.020014, -0.104521,
-      -0.034504, -0.059759,  0.062569,  0.025652,  0.049306,  0.061189,
-      -0.025146,  0.079643, -0.005188,  0.033080, -0.048079, -0.048082,
-      0.069369, -0.028900,  0.024572, -0.077547, -0.022517, -0.054477,
-      0.038857,  0.013336,  0.043234,  0.044788},
-    {
-      -0.039186,  0.070792, -0.005913,  0.02642,  -0.068274, -0.05022,
-      0.061444, -0.031241,  0.014996, -0.094544, -0.004146, -0.03464,
-      0.058981,  0.026097,  0.039781,  0.058408, -0.031887,  0.069252,
-      0.00576,   0.054062, -0.042801, -0.059974,  0.085272, -0.034453,
-      0.026097, -0.0959,   -0.031164, -0.058699,  0.06839,   0.020512,
-      0.044727,  0.063609, -0.039863,  0.084819, -0.003909,  0.028666,
-      -0.075677, -0.045125,  0.070379, -0.033895,  0.022111, -0.097184,
-      -0.004921, -0.040851,  0.062316,  0.017435,  0.041437,  0.064568,
-      -0.039656,  0.060726, -0.003402,  0.036854, -0.056503, -0.058554,
-      0.068588, -0.034879,  0.01352,  -0.09962,  -0.01434,  -0.039505,
-      0.065133,  0.024321,  0.038473,  0.062438
-    }};
+      {-0.022014, 0.073544,  -0.002235, 0.040068,  -0.037136, -0.052788,
+       0.075325,  -0.029378, 0.024298,  -0.07733,  -0.030674, -0.060229,
+       0.040599,  0.011608,  0.042005,  0.045977,  -0.039225, 0.076294,
+       0.000735,  0.032852,  -0.069869, -0.053312, 0.073527,  -0.028136,
+       0.021585,  -0.102679, -0.004327, -0.043304, 0.072861,  0.027077,
+       0.034558,  0.068292,  -0.036292, 0.069832,  -0.003032, 0.053829,
+       -0.043821, -0.072713, 0.085029,  -0.040374, 0.020014,  -0.104521,
+       -0.034504, -0.059759, 0.062569,  0.025652,  0.049306,  0.061189,
+       -0.025146, 0.079643,  -0.005188, 0.033080,  -0.048079, -0.048082,
+       0.069369,  -0.028900, 0.024572,  -0.077547, -0.022517, -0.054477,
+       0.038857,  0.013336,  0.043234,  0.044788},
+      {-0.039186, 0.070792,  -0.005913, 0.02642,   -0.068274, -0.05022,
+       0.061444,  -0.031241, 0.014996,  -0.094544, -0.004146, -0.03464,
+       0.058981,  0.026097,  0.039781,  0.058408,  -0.031887, 0.069252,
+       0.00576,   0.054062,  -0.042801, -0.059974, 0.085272,  -0.034453,
+       0.026097,  -0.0959,   -0.031164, -0.058699, 0.06839,   0.020512,
+       0.044727,  0.063609,  -0.039863, 0.084819,  -0.003909, 0.028666,
+       -0.075677, -0.045125, 0.070379,  -0.033895, 0.022111,  -0.097184,
+       -0.004921, -0.040851, 0.062316,  0.017435,  0.041437,  0.064568,
+       -0.039656, 0.060726,  -0.003402, 0.036854,  -0.056503, -0.058554,
+       0.068588,  -0.034879, 0.01352,   -0.09962,  -0.01434,  -0.039505,
+       0.065133,  0.024321,  0.038473,  0.062438}};
 
   for (int i = 0; i < lstm.sequence_length(); i++) {
     float* batch0_start = lstm_input[0] + i * lstm.num_inputs();
-- 
GitLab


From ca1ded0b88a80c7051a73346458bb075e37b8376 Mon Sep 17 00:00:00 2001
From: Yifei Feng <1192265+yifeif@users.noreply.github.com>
Date: Thu, 11 Oct 2018 23:45:43 -0700
Subject: [PATCH 0853/1085] r1.12-rc1 cherry-pick request: Update to new RBE
 toolchain with Clang 8.0.0 r340178. (#22904)

* Update to new toolchain with Clang 8.0.0 r340178.
 - Also update bazel to 0.16.1 because the new toolchain image requires it.

PiperOrigin-RevId: 216590129

* Fix lstm_test&layer_norm_lstm_test w/ Clang 8.0.0

PiperOrigin-RevId: 216475683

* Fix lite/kernels:add_test for Clang 8.0.0

PiperOrigin-RevId: 216455772

* Fix mul_test with Clang 8.0.0

PiperOrigin-RevId: 216570443

* Add tf.contrib.estimator.python to exclude path
---
 tensorflow/contrib/lite/kernels/add_test.cc   |  36 +++---
 .../lite/kernels/layer_norm_lstm_test.cc      | 116 +++++++++---------
 tensorflow/contrib/lite/kernels/lstm_test.cc  |  92 +++++++-------
 tensorflow/contrib/lite/kernels/mul_test.cc   |  10 +-
 tensorflow/tools/docs/generate_lib.py         |   1 +
 tensorflow/workspace.bzl                      |   8 +-
 third_party/toolchains/BUILD                  |   2 +-
 7 files changed, 131 insertions(+), 134 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc
index 0b58443211..261dd36ef0 100644
--- a/tensorflow/contrib/lite/kernels/add_test.cc
+++ b/tensorflow/contrib/lite/kernels/add_test.cc
@@ -108,7 +108,7 @@ TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(FloatAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -124,7 +124,7 @@ TEST(FloatAddOpModel, VariousInputShapes) {
 }
 
 TEST(FloatAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -161,7 +161,7 @@ TEST(IntegerAddOpModel, ActivationRELU_N1_TO_1) {
 }
 
 TEST(IntegerAddOpModel, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -176,7 +176,7 @@ TEST(IntegerAddOpModel, VariousInputShapes) {
 }
 
 TEST(IntegerAddOpModel, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerAddOpModel m({TensorType_INT32, test_shapes[i]},
@@ -193,11 +193,11 @@ TEST(IntegerAddOpModel, WithBroadcast) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -217,11 +217,11 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
   const float kMin = -1.f;
   const float kMax = 32767.f / 32768.f;
   float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
-  std::vector<std::initializer_list<float>> inputs1 = {
+  std::vector<std::vector<float>> inputs1 = {
       {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {
+  std::vector<std::vector<float>> inputs2 = {
       {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {
+  std::vector<std::vector<float>> results = {
       {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax},
@@ -240,12 +240,12 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
 
 TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
   float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<std::initializer_list<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
-                                                       {-0.8, 0.2, 0.7, 0.3}};
-  std::vector<std::initializer_list<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
-                                                       {0.6, 0.4, -0.8, 0.5}};
-  std::vector<std::initializer_list<float>> results = {{-0.2, 0.6, 1.0, -0.1},
-                                                       {-0.2, 0.6, -0.1, 0.8}};
+  std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
+                                             {-0.8, 0.2, 0.7, 0.3}};
+  std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
+                                             {0.6, 0.4, -0.8, 0.5}};
+  std::vector<std::vector<float>> results = {{-0.2, 0.6, 1.0, -0.1},
+                                             {-0.2, 0.6, -0.1, 0.8}};
   for (int i = 0; i < inputs1.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
                           {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0},
@@ -262,7 +262,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) {
 
 TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
@@ -281,7 +281,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
 
 TEST(QuantizedAddOpModel, QuantizedWithBroadcast) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
index 479f6a7d3c..1535f750f9 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc
@@ -129,87 +129,85 @@ class LayerNormLSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -278,67 +276,67 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
                              use_projection_bias, cell_clip, proj_clip,
                              input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetInputLayerNormWeights(std::initializer_list<float> f) {
+  void SetInputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(input_layer_norm_weights_, f);
   }
 
-  void SetForgetLayerNormWeights(std::initializer_list<float> f) {
+  void SetForgetLayerNormWeights(std::vector<float> f) {
     PopulateTensor(forget_layer_norm_weights_, f);
   }
 
-  void SetCellLayerNormWeights(std::initializer_list<float> f) {
+  void SetCellLayerNormWeights(std::vector<float> f) {
     PopulateTensor(cell_layer_norm_weights_, f);
   }
 
-  void SetOutputLayerNormWeights(std::initializer_list<float> f) {
+  void SetOutputLayerNormWeights(std::vector<float> f) {
     PopulateTensor(output_layer_norm_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -346,26 +344,26 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
 class BaseLayerNormLstmTest : public ::testing::Test {
  protected:
   // Weights of the Layer Norm LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> input_layer_norm_weights_;
-  std::initializer_list<float> forget_layer_norm_weights_;
-  std::initializer_list<float> cell_layer_norm_weights_;
-  std::initializer_list<float> output_layer_norm_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> input_layer_norm_weights_;
+  std::vector<float> forget_layer_norm_weights_;
+  std::vector<float> cell_layer_norm_weights_;
+  std::vector<float> output_layer_norm_weights_;
+  std::vector<float> projection_weights_;
 
   // Layer Norm LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> layer_norm_lstm_input_;
diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc
index e7ddfceb45..f8947db724 100644
--- a/tensorflow/contrib/lite/kernels/lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_test.cc
@@ -116,71 +116,69 @@ class LSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(std::vector<float> f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
-    PopulateTensor(cell_bias_, f);
-  }
+  void SetCellBias(std::vector<float> f) { PopulateTensor(cell_bias_, f); }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(std::vector<float> f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(std::vector<float> f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -243,51 +241,51 @@ class HybridLSTMOpModel : public LSTMOpModel {
                     use_projection_weights, use_projection_bias, cell_clip,
                     proj_clip, input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(std::vector<float> f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -295,22 +293,22 @@ class HybridLSTMOpModel : public LSTMOpModel {
 class BaseLstmTest : public ::testing::Test {
  protected:
   // Weights of the LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> projection_weights_;
 
   // LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> lstm_input_;
diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc
index 2807550a6b..0f9c0c2eee 100644
--- a/tensorflow/contrib/lite/kernels/mul_test.cc
+++ b/tensorflow/contrib/lite/kernels/mul_test.cc
@@ -107,7 +107,7 @@ TEST(FloatMulOpTest, ActivationRELU_N1_TO_1) {
 }
 
 TEST(FloatMulOpTest, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -124,7 +124,7 @@ TEST(FloatMulOpTest, VariousInputShapes) {
 }
 
 TEST(FloatMulOpTest, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]},
@@ -161,7 +161,7 @@ TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) {
 }
 
 TEST(IntegerMulOpTest, VariousInputShapes) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
@@ -176,7 +176,7 @@ TEST(IntegerMulOpTest, VariousInputShapes) {
 }
 
 TEST(IntegerMulOpTest, WithBroadcast) {
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     IntegerMulOpModel m({TensorType_INT32, test_shapes[i]},
@@ -245,7 +245,7 @@ float GetTolerance(int min, int max) {
 
 TEST(QuantizedMulOpTest, WithBroadcast) {
   float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
-  std::vector<std::initializer_list<int>> test_shapes = {
+  std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
     QuantizedMulOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index 77a3ca2052..0e1a682d58 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -230,6 +230,7 @@ def _get_default_private_map():
       'tf.contrib.autograph': ['utils', 'operators'],
       'tf.test': ['mock'],
       'tf.compat': ['v1', 'v2'],
+      'tf.contrib.estimator': ['python'],
   }
 
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index bcc89ef729..aaea475c04 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -723,11 +723,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "bazel_toolchains",
-        sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b",
-        strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1",
+        sha256 = "07dfbe80638eb1fe681f7c07e61b34b579c6710c691e49ee90ccdc6e9e75ebbb",
+        strip_prefix = "bazel-toolchains-9a111bd82161c1fbe8ed17a593ca1023fd941c70",
         urls = [
-            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
-            "https://github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz",
+            "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
+            "https://github.com/bazelbuild/bazel-toolchains/archive/9a111bd82161c1fbe8ed17a593ca1023fd941c70.tar.gz",
         ],
     )
 
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index bcbc4dda11..6e1416ced1 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -17,7 +17,7 @@ platform(
     remote_execution_properties = """
         properties: {
             name: "container-image"
-            value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c"
+            value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:63a0e981a4e7ce5da2a851cf063e430f72947fd999d9336b7e54e2eebe8e0bf5"
         }""",
 )
 
-- 
GitLab


From 690d9e3f465a55cbad43051574dde04189768a0c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 00:56:12 -0700
Subject: [PATCH 0854/1085] Fix typo in documentation.

Eps is defined as '1 + eps != 1', not as '1 + eps != eps'.

PiperOrigin-RevId: 216824143
---
 tensorflow/python/ops/check_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index 67e0514fb1..40b111ea0c 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -476,7 +476,7 @@ def assert_near(
   If both `x` and `y` are empty, this is trivially satisfied.
 
   The default `atol` and `rtol` is `10 * eps`, where `eps` is the smallest
-  representable positive number such that `1 + eps != eps`.  This is about
+  representable positive number such that `1 + eps != 1`.  This is about
   `1.2e-6` in `32bit`, `2.22e-15` in `64bit`, and `0.00977` in `16bit`.
   See `numpy.finfo`.
 
-- 
GitLab


From 9e0fa9578638f9147c0b180e6ea89d67d5c0bae3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 02:02:06 -0700
Subject: [PATCH 0855/1085] compat: Update forward compatibility horizon to
 2018-10-12

PiperOrigin-RevId: 216829748
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 292b9a8480..676fc869e4 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 11)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 12)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 0668e3f7010771e732d2cbd2a4d616366791ff33 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 12 Oct 2018 17:57:01 +0800
Subject: [PATCH 0856/1085] [Features] Support more types for
 Partitionedvariable assginment method

---
 .../python/kernel_tests/variables_test.py     | 50 ++++++++++++++++---
 tensorflow/python/ops/variables.py            | 29 +++++++----
 2 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index c2b86089f4..843c47f1a3 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -697,47 +697,81 @@ class PartitionedVariableTest(test.TestCase):
             partitions=partitions)
 
   def testPartitionedVariableAssignments(self):
-    with ops.Graph().as_default(), self.cached_session() as sess:
+    with ops.Graph().as_default(), self.cached_session():
       v0 = variables.Variable(initial_value=[0.0])
       v1 = variables.Variable(initial_value=[1.0])
+      v2 = variables.Variable(initial_value=[20.0])
+      v3 = variables.Variable(initial_value=[30.0])
       v0._set_save_slice_info(
           variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1]))
       v1._set_save_slice_info(
-          variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1]))
+          variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1]))
+      v2._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v2.name, [2], [0], [1]))
+      v3._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(v3.name, [2], [1], [1]))
+
       partitions = [2]
 
       # Pass variable_list as [v1, v0] to ensure they are properly
       # re-sorted to [v0, v1] based on their slice info offsets.
-      partitioned_variable = variables.PartitionedVariable(
+      pv_0 = variables.PartitionedVariable(
           name="two_vars",
           shape=[2],
           dtype=v0.dtype,
           variable_list=[v0, v1],
           partitions=partitions)
 
+      pv_1 = variables.PartitionedVariable(
+          name="two_vars",
+          shape=[2],
+          dtype=v0.dtype,
+          variable_list=[v2, v3],
+          partitions=partitions)
+      
       deltas_a = constant_op.constant([1.0, 2.0])
       deltas_b = constant_op.constant([3.0, 4.0])
       ones = array_ops.ones([2])
-      plus_delta = partitioned_variable.assign_add(deltas_a)
-      minus_delta = partitioned_variable.assign_sub(deltas_b)
-      assign_ones = partitioned_variable.assign(ones)
+      plus_delta = pv_0.assign_add(deltas_a)
+      minus_delta = pv_0.assign_sub(deltas_b)
+      assign_ones = pv_0.assign(ones)
+      
+      c_0 = constant_op.constant([2.0])
+      c_1 = constant_op.constant([3.0])
+      assign_list = pv_1.assign([c_0, c_1])
+      assign_part_value = pv_1.assign_add(plus_delta)
+      assign_part_var = pv_1.assign_sub(pv_0)
       variables.global_variables_initializer().run()
 
       self.assertEqual([1.0], plus_delta[0].eval())
       self.assertEqual([1.0], v0.eval())
       self.assertEqual([3.0], plus_delta[1].eval())
       self.assertEqual([3.0], v1.eval())
-
+      
       self.assertEqual([-2.0], minus_delta[0].eval())
       self.assertEqual([-2.0], v0.eval())
       self.assertEqual([-1.0], minus_delta[1].eval())
       self.assertEqual([-1.0], v1.eval())
-
+ 
       self.assertEqual([1.0], assign_ones[0].eval())
       self.assertEqual([1.0], v0.eval())
       self.assertEqual([1.0], assign_ones[1].eval())
       self.assertEqual([1.0], v1.eval())
 
+      self.assertEqual([2.0], assign_list[0].eval())
+      self.assertEqual([2.0], v2.eval())
+      self.assertEqual([3.0], assign_list[1].eval())
+      self.assertEqual([3.0], v3.eval())
+
+      self.assertEqual([3.0], assign_part_value[0].eval())
+      self.assertEqual([3.0], v2.eval())
+      self.assertEqual([6.0], assign_part_value[1].eval())
+      self.assertEqual([6.0], v3.eval())
+
+      self.assertEqual([2.0], assign_part_var[0].eval())
+      self.assertEqual([2.0], v2.eval())
+      self.assertEqual([5.0], assign_part_var[1].eval())
+      self.assertEqual([5.0], v3.eval())
 
 class VariableContainerTest(test.TestCase):
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 45c8618610..944ebc3412 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -2620,22 +2620,30 @@ class PartitionedVariable(object):
   def _get_partitions(self):
     return self._partitions
 
-  def _apply_assign_fn(self, assign_fn, value):
+  def _apply_assign_fn(self,
+                       assign_fn,
+                       value):
     partition_axes = self._partition_axes()
     if len(partition_axes) > 1:
       raise NotImplementedError(
           "Cannot do assign action along more than one dimension: %s.  "
-          "Multi-axis partition assign action is not supported " %
-          str(partition_axes))
-    partition_ix = partition_axes[0]
-    size_splits_list = [
-        var.shape[partition_ix].value for var in self._variable_list
-    ]
-    value_list = array_ops.split(value, size_splits_list, axis=partition_ix)
+          "Multi-axis partition assign action is not supported "
+          % str(partition_axes))
+    if isinstance(value, list):
+      assert len(value) == len(self._variable_list)
+      value_list = value
+    elif isinstance(value, PartitionedVariable):
+      value_list = [var_part for var_part in value]
+    else:
+      partition_ix = partition_axes[0]
+      size_splits_list = [
+          var.shape[partition_ix].value for var in self._variable_list]
+      value_list = array_ops.split(
+          value, size_splits_list, axis=partition_ix)
+
     op_list = [
         assign_fn(var, value_list[idx], idx)
-        for idx, var in enumerate(self._variable_list)
-    ]
+        for idx, var in enumerate(self._variable_list)]
     return op_list
 
   def assign(self, value, use_locking=False, name=None, read_value=True):
@@ -2665,7 +2673,6 @@ class PartitionedVariable(object):
       return assign_list
     return [assign.op for assign in assign_list]
 
-
 @tf_export(v1=["global_variables"])
 def global_variables(scope=None):
   """Returns global variables.
-- 
GitLab


From e2cdc7dae27658aa9c75605fa0a97f43e1971c26 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 12 Oct 2018 18:51:21 +0800
Subject: [PATCH 0857/1085] fix ut failure

---
 tensorflow/python/kernel_tests/variables_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 843c47f1a3..5e91e32399 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -739,7 +739,7 @@ class PartitionedVariableTest(test.TestCase):
       c_0 = constant_op.constant([2.0])
       c_1 = constant_op.constant([3.0])
       assign_list = pv_1.assign([c_0, c_1])
-      assign_part_value = pv_1.assign_add(plus_delta)
+      assign_part_value = pv_1.assign_add(assign_ones)
       assign_part_var = pv_1.assign_sub(pv_0)
       variables.global_variables_initializer().run()
 
@@ -765,13 +765,13 @@ class PartitionedVariableTest(test.TestCase):
 
       self.assertEqual([3.0], assign_part_value[0].eval())
       self.assertEqual([3.0], v2.eval())
-      self.assertEqual([6.0], assign_part_value[1].eval())
-      self.assertEqual([6.0], v3.eval())
+      self.assertEqual([4.0], assign_part_value[1].eval())
+      self.assertEqual([4.0], v3.eval())
 
       self.assertEqual([2.0], assign_part_var[0].eval())
       self.assertEqual([2.0], v2.eval())
-      self.assertEqual([5.0], assign_part_var[1].eval())
-      self.assertEqual([5.0], v3.eval())
+      self.assertEqual([3.0], assign_part_var[1].eval())
+      self.assertEqual([3.0], v3.eval())
 
 class VariableContainerTest(test.TestCase):
 
-- 
GitLab


From 72bf28cd1f2e1a8bd6e4493ef3e2625816235e76 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 12 Oct 2018 06:33:28 -0700
Subject: [PATCH 0858/1085] Add a utility function to build node name to node
 index.

PiperOrigin-RevId: 216853788
---
 .../encapsulate_xla_computations_pass_test.cc |  5 +--
 tensorflow/compiler/tf2xla/test_util.cc       |  8 -----
 tensorflow/compiler/tf2xla/test_util.h        |  3 --
 .../common_runtime/constant_folding_test.cc   | 35 +++++++------------
 tensorflow/core/graph/graph.cc                |  8 +++++
 tensorflow/core/graph/graph.h                 |  3 ++
 tensorflow/core/graph/graph_test.cc           | 17 +++++++++
 7 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
index 22531a4ace..192e1c7b32 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -256,7 +256,7 @@ TEST(EncapsulateXlaComputations, Encapsulate) {
 
   TF_ASSERT_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def));
 
-  std::unordered_map<string, Node*> index = BuildNodeIndex(*graph);
+  std::unordered_map<string, Node*> index = graph->BuildNodeNameIndex();
   string function = index.at("launch0")->type_string();
 
   // Tests the outer graph is as expected.
@@ -291,7 +291,8 @@ TEST(EncapsulateXlaComputations, Encapsulate) {
   // function. Encapsulation should be deterministic to avoid recompilation.
   TF_ASSERT_OK(
       EncapsulateXlaComputationsPass::Encapsulate(&graph_copy, &flib_def));
-  std::unordered_map<string, Node*> index_copy = BuildNodeIndex(*graph_copy);
+  std::unordered_map<string, Node*> index_copy =
+      graph_copy->BuildNodeNameIndex();
   string function_copy = index_copy.at("launch0")->type_string();
   EXPECT_EQ(function, function_copy);
 }
diff --git a/tensorflow/compiler/tf2xla/test_util.cc b/tensorflow/compiler/tf2xla/test_util.cc
index f31bfb45a2..3c6c9a91b6 100644
--- a/tensorflow/compiler/tf2xla/test_util.cc
+++ b/tensorflow/compiler/tf2xla/test_util.cc
@@ -40,12 +40,4 @@ Status InstantiateFunctionForTest(const string& name,
   return Status::OK();
 }
 
-std::unordered_map<string, Node*> BuildNodeIndex(const Graph& graph) {
-  std::unordered_map<string, Node*> index;
-  for (Node* node : graph.nodes()) {
-    index[node->name()] = node;
-  }
-  return index;
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/test_util.h b/tensorflow/compiler/tf2xla/test_util.h
index 350a868568..4ffc94ae3b 100644
--- a/tensorflow/compiler/tf2xla/test_util.h
+++ b/tensorflow/compiler/tf2xla/test_util.h
@@ -44,9 +44,6 @@ Status InstantiateFunctionForTest(const string& name,
                                   const FunctionLibraryDefinition& library,
                                   InstantiationResultForTest* result);
 
-// Builds a map from node name to Node* for `graph`.
-std::unordered_map<string, Node*> BuildNodeIndex(const Graph& graph);
-
 }  // namespace tensorflow
 
 // Variant of TF_EXPECT_GRAPH_EQ that also compares internal attributes for
diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc
index 16b61315f2..98aefcde27 100644
--- a/tensorflow/core/common_runtime/constant_folding_test.cc
+++ b/tensorflow/core/common_runtime/constant_folding_test.cc
@@ -70,15 +70,6 @@ class ConstantFoldingTest : public ::testing::Test {
     test::ExpectTensorEqual<T>(t, test::AsTensor(values, shape));
   }
 
-  // Builds a map from node name to Node* for `graph`.
-  std::unordered_map<string, Node*> NodeNameIndex(const Graph& graph) {
-    std::unordered_map<string, Node*> index;
-    for (Node* node : graph.nodes()) {
-      index[node->name()] = node;
-    }
-    return index;
-  }
-
   // Constructs the following graph.
   /*
         s1  s2
@@ -110,7 +101,7 @@ TEST_F(ConstantFoldingTest, Basic) {
                             nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* s1 = index.at("s1");
   Node* s2 = index.at("s2");
   // Nodes s1 and s2 now should now have a constant input
@@ -165,7 +156,7 @@ TEST_F(ConstantFoldingTest, DeterministicFolding) {
   Graph g2(OpRegistry::Global());
   TF_ASSERT_OK(build_graph_and_constant_folding(g2, true));
   EXPECT_EQ(g1.num_nodes(), g2.num_nodes());
-  auto index = NodeNameIndex(g2);
+  auto index = g2.BuildNodeNameIndex();
 
   // All the nodes in g1 are expected to be present in g2.
   for (int64 i = 0; i < g1.num_nodes(); ++i) {
@@ -188,7 +179,7 @@ TEST_F(ConstantFoldingTest, ConsiderFunction) {
       ConstantFold(opts, nullptr, Env::Default(), nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* s1 = index.at("s1");
   Node* s2 = index.at("s2");
   Node* m2 = index.at("m2");
@@ -217,7 +208,7 @@ TEST_F(ConstantFoldingTest, TestNoReplaceAnotherConstant) {
                             nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* d = index.at("d");
   Node* s3 = index.at("s3");
 
@@ -245,7 +236,7 @@ TEST_F(ConstantFoldingTest, TwoOutputs) {
                             nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* b0 = index.at("b0");
   Node* b1 = index.at("b1");
 
@@ -277,7 +268,7 @@ TEST_F(ConstantFoldingTest, TwoOutputsFoldOneOutput) {
       ConstantFold(opts, nullptr, Env::Default(), nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* b0 = index.at("b0");
   Node* b1 = index.at("b1");
   Node* b1_ident = index.at("b1_ident");
@@ -412,7 +403,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) {
                             nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* recv1 = index.at("recv1");
   Node* recv2 = index.at("recv2");
   Node* send = index.at("send");
@@ -454,7 +445,7 @@ TEST_F(ConstantFoldingTest, SimpleShapeKnown) {
                             "receiver");
     TF_ASSERT_OK(s.ToGraph(&g));
   }
-  std::unordered_map<string, Node*> orig_index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> orig_index = g.BuildNodeNameIndex();
   Node* recv0 = orig_index.at("recv0");
   Node* recv1 = orig_index.at("recv1");
   PartialTensorShape ps0;
@@ -473,7 +464,7 @@ TEST_F(ConstantFoldingTest, SimpleShapeKnown) {
       ConstantFold(opts, nullptr, Env::Default(), nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* recv2 = index.at("recv2");
   Node* send0 = index.at("send0");
   Node* send1 = index.at("send1");
@@ -533,7 +524,7 @@ TEST_F(ConstantFoldingTest, PartialShape) {
                             "receiver");
     TF_ASSERT_OK(s.ToGraph(&g));
   }
-  std::unordered_map<string, Node*> orig_index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> orig_index = g.BuildNodeNameIndex();
   Node* recv0 = orig_index.at("recv0");
   Node* recv1 = orig_index.at("recv1");
   PartialTensorShape ps0;
@@ -550,7 +541,7 @@ TEST_F(ConstantFoldingTest, PartialShape) {
       ConstantFold(opts, nullptr, Env::Default(), nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* shape = index.at("shape");
   Node* size = index.at("size");
   Node* rank1 = index.at("rank1");
@@ -590,7 +581,7 @@ TEST_F(ConstantFoldingTest, ConstShapeKnown) {
                             "receiver");
     TF_ASSERT_OK(s.ToGraph(&g));
   }
-  std::unordered_map<string, Node*> orig_index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> orig_index = g.BuildNodeNameIndex();
   Node* c0 = orig_index.at("c0");
   PartialTensorShape ps0;
   int c0_dims[] = {};
@@ -604,7 +595,7 @@ TEST_F(ConstantFoldingTest, ConstShapeKnown) {
       ConstantFold(opts, nullptr, Env::Default(), nullptr, &g, &was_mutated));
   EXPECT_TRUE(was_mutated);
 
-  std::unordered_map<string, Node*> index = NodeNameIndex(g);
+  std::unordered_map<string, Node*> index = g.BuildNodeNameIndex();
   Node* recv0 = index.at("recv0");
   Node* send0 = index.at("send0");
 
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index a17491d4f7..bc0a6ae346 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -750,6 +750,14 @@ Status Graph::AddWhileContext(StringPiece frame_name,
   return Status::OK();
 }
 
+std::unordered_map<string, Node*> Graph::BuildNodeNameIndex() const {
+  std::unordered_map<string, Node*> result;
+  for (Node* n : nodes()) {
+    result[n->name()] = n;
+  }
+  return result;
+}
+
 string Edge::DebugString() const {
   return strings::Printf("[id=%d %s:%d -> %s:%d]", id_, src_->name().c_str(),
                          src_output_, dst_->name().c_str(), dst_input_);
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 228b1331d9..027ab522ed 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -614,6 +614,9 @@ class Graph {
                          std::vector<OutputTensor> body_outputs,
                          WhileContext** result);
 
+  // Builds a node name to node pointer index for all nodes in the graph.
+  std::unordered_map<string, Node*> BuildNodeNameIndex() const;
+
   // TODO(josh11b): uint64 hash() const;
 
  private:
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index c8c2b225fe..2ab189fb9c 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 
 #include <set>
+#include <unordered_map>
 #include <vector>
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/function_testlib.h"
@@ -643,6 +644,22 @@ TEST_F(GraphTest, AddFunctionLibrary) {
             "because it already has gradient function 'Undefined'");
 }
 
+TEST_F(GraphTest, BuildNodeNameIndex) {
+  FromGraphDef(
+      "node { name: 'A' op: 'OneOutput' }"
+      "node { name: 'B' op: 'OneInputTwoOutputs' input: [ 'A:0' ] }"
+      "node { name: 'C' op: 'NoOp' } ");
+
+  auto node_name_index = graph_.BuildNodeNameIndex();
+  EXPECT_EQ(node_name_index.size(), 5);
+
+  std::vector<string> node_names{"_SOURCE", "_SINK", "A", "B", "C"};
+  for (const string& node_name : node_names) {
+    EXPECT_NE(node_name_index.find(node_name), node_name_index.end());
+    EXPECT_EQ(node_name_index[node_name], FindNode(node_name));
+  }
+}
+
 REGISTER_OP("Input").Output("o: float");
 REGISTER_OP("In2Out1").Input("a: float").Input("b: float").Output("o: float");
 
-- 
GitLab


From 1e8ee52c70b5b25f343ca74ea821665e7ef0f01b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 08:25:41 -0700
Subject: [PATCH 0859/1085] Add RaggedTensors to tf.core. Moving the
 RaggedRange op kernel.

PiperOrigin-RevId: 216865682
---
 tensorflow/core/BUILD                         |   2 +
 .../base_api/api_def_RaggedRange.pbtxt        |  47 ++++
 tensorflow/core/kernels/BUILD                 |  24 ++
 tensorflow/core/kernels/ragged_range_op.cc    | 127 ++++++++++
 .../core/kernels/ragged_range_op_test.cc      | 224 ++++++++++++++++++
 tensorflow/core/ops/ragged_math_ops.cc        |  80 +++++++
 tensorflow/python/BUILD                       |   8 +
 7 files changed, 512 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
 create mode 100644 tensorflow/core/kernels/ragged_range_op.cc
 create mode 100644 tensorflow/core/kernels/ragged_range_op_test.cc
 create mode 100644 tensorflow/core/ops/ragged_math_ops.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 25651252a7..7789ea22fc 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1158,12 +1158,14 @@ cc_library(
     name = "ragged_ops",
     deps = [
         ":ragged_array_ops_op_lib",
+        ":ragged_math_ops_op_lib",
     ],
 )
 
 tf_gen_op_libs(
     op_lib_names = [
         "ragged_array_ops",
+        "ragged_math_ops",
     ],
 )
 
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
new file mode 100644
index 0000000000..927e839b72
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
@@ -0,0 +1,47 @@
+op {
+  graph_op_name: "RaggedRange"
+  visibility: HIDDEN
+  in_arg{
+    name: "starts"
+    description: "The starts of each range."
+  }
+  in_arg{
+    name: "limits"
+    description: "The limits of each range."
+  }
+  in_arg{
+    name: "deltas"
+    description: "The deltas of each range."
+  }
+  out_arg{
+    name: "rt_nested_splits"
+    description: "The `row_splits` for the returned `RaggedTensor`."
+  }
+  out_arg{
+    name: "rt_dense_values"
+    description: "The `inner_values` for the returned `RaggedTensor`."
+  }
+  summary: <<END
+Returns a `RaggedTensor` containing the specified sequences of numbers.
+END
+  description: <<END
+
+Returns a `RaggedTensor` `result` composed from `rt_dense_values` and
+`rt_nested_splits`, such that
+`result[i] = range(starts[i], limits[i], deltas[i])`.
+
+```python
+>>> (rt_nested_splits, rt_dense_values) = gen_ragged_ops.ragged_range(
+...     starts=[2, 5, 8], limits=[3, 5, 12], deltas=1)
+>>> result = ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
+>>> print result.eval().tolist()
+[[2],               # result[0] = range(2, 3)
+ [],                # result[1] = range(5, 5)
+ [8, 9, 10, 11]]    # result[2] = range(8, 12)
+```
+
+The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
+The vector inputs must all have the same size.  Scalar inputs are broadcast
+to match the size of the vector inputs.
+END
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index f0a2924378..1f401b257b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -962,6 +962,7 @@ cc_library(
     name = "ragged_ops",
     deps = [
         ":ragged_gather_op",
+        ":ragged_range_op",
     ],
 )
 
@@ -974,6 +975,15 @@ tf_kernel_library(
     ],
 )
 
+tf_kernel_library(
+    name = "ragged_range_op",
+    srcs = ["ragged_range_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_math_ops_op_lib",
+    ],
+)
+
 tf_cc_test(
     name = "ragged_gather_op_test",
     size = "small",
@@ -989,6 +999,20 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "ragged_range_op_test",
+    srcs = ["ragged_range_op_test.cc"],
+    deps = [
+        ":ragged_range_op",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ragged_math_ops_op_lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+    ],
+)
+
 tf_kernel_library(
     name = "cudnn_rnn_kernels",
     srcs = ["cudnn_rnn_ops.cc"],
diff --git a/tensorflow/core/kernels/ragged_range_op.cc b/tensorflow/core/kernels/ragged_range_op.cc
new file mode 100644
index 0000000000..cb7546c397
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_range_op.cc
@@ -0,0 +1,127 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+
+namespace tensorflow {
+
+using errors::InvalidArgument;
+
+template <typename T>
+class RaggedRangeOp : public OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& starts_in = context->input(0);
+    const Tensor& limits_in = context->input(1);
+    const Tensor& deltas_in = context->input(2);
+
+    // Check input tensor shapes.
+    OP_REQUIRES(context, starts_in.shape().dims() <= 1,
+                InvalidArgument("starts must be a scalar or vector"));
+    OP_REQUIRES(context, limits_in.shape().dims() <= 1,
+                InvalidArgument("limits must be a scalar or vector"));
+    OP_REQUIRES(context, deltas_in.shape().dims() <= 1,
+                InvalidArgument("deltas must be a scalar or vector"));
+
+    // Determine which tensors we need to broadcast.
+    bool broadcast_starts = starts_in.shape().dims() == 0;
+    bool broadcast_limits = limits_in.shape().dims() == 0;
+    bool broadcast_deltas = deltas_in.shape().dims() == 0;
+
+    // nrows (number of output rows) is the size of the non-broadcast inputs,
+    // or 1 if all inputs are scalars.
+    std::vector<int> in_sizes;
+    if (!broadcast_starts) in_sizes.push_back(starts_in.shape().dim_size(0));
+    if (!broadcast_limits) in_sizes.push_back(limits_in.shape().dim_size(0));
+    if (!broadcast_deltas) in_sizes.push_back(deltas_in.shape().dim_size(0));
+    for (int i = 1; i < in_sizes.size(); ++i) {
+      OP_REQUIRES(context, in_sizes[i] == in_sizes[i - 1],
+                  InvalidArgument("starts, limits, and deltas must have the "
+                                  "same shape"));
+    }
+    int64 nrows = in_sizes.empty() ? 1 : in_sizes[0];
+
+    const auto& starts = starts_in.flat<T>();
+    const auto& limits = limits_in.flat<T>();
+    const auto& deltas = deltas_in.flat<T>();
+
+    // Construct the rt_nested_splits tensor.
+    Tensor* rt_nested_splits_out = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({nrows + 1}),
+                                            &rt_nested_splits_out));
+    auto rt_nested_splits = rt_nested_splits_out->flat<int64>();
+    rt_nested_splits(0) = 0;
+    for (int row = 0; row < nrows; ++row) {
+      T start = broadcast_starts ? starts(0) : starts(row);
+      T limit = broadcast_limits ? limits(0) : limits(row);
+      T delta = broadcast_deltas ? deltas(0) : deltas(row);
+      OP_REQUIRES(context, delta != 0, InvalidArgument("Requires delta != 0"));
+      rt_nested_splits(row + 1) =
+          rt_nested_splits(row) + RangeSize(start, limit, delta);
+    }
+    int64 nvals = rt_nested_splits(nrows);
+
+    // Construct the rt_dense_values tensor.
+    Tensor* rt_dense_values_out = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape({nvals}),
+                                                     &rt_dense_values_out));
+    auto rt_dense_values = rt_dense_values_out->flat<T>();
+    int value_index = 0;
+    for (int row = 0; row < nrows; ++row) {
+      int64 row_size = rt_nested_splits(row + 1) - rt_nested_splits(row);
+      T value = broadcast_starts ? starts(0) : starts(row);
+      T delta = broadcast_deltas ? deltas(0) : deltas(row);
+      for (int64 i = 0; i < row_size; ++i) {
+        rt_dense_values(value_index++) = T(value);
+        value += delta;
+      }
+    }
+  }
+
+ private:
+  // Returns the number of elements in the specified range.
+  int64 RangeSize(T start, T limit, T delta) {
+    if (((delta > 0) && (limit < start)) || ((delta < 0) && (limit > start))) {
+      return 0;
+    }
+    // The following is copied from tensorflow::RangeOp::Compute().
+    return (std::is_integral<T>::value
+                ? ((std::abs(limit - start) + std::abs(delta) - 1) /
+                   std::abs(delta))
+                : std::ceil(std::abs((limit - start) / delta)));
+  }
+};
+
+#define REGISTER_CPU_KERNEL(TYPE)                                       \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("RaggedRange").Device(DEVICE_CPU).TypeConstraint<TYPE>("T"), \
+      RaggedRangeOp<TYPE>);
+TF_CALL_float(REGISTER_CPU_KERNEL);
+TF_CALL_double(REGISTER_CPU_KERNEL);
+TF_CALL_int32(REGISTER_CPU_KERNEL);
+TF_CALL_int64(REGISTER_CPU_KERNEL);
+#undef REGISTER_CPU_KERNEL
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/ragged_range_op_test.cc b/tensorflow/core/kernels/ragged_range_op_test.cc
new file mode 100644
index 0000000000..66f097091e
--- /dev/null
+++ b/tensorflow/core/kernels/ragged_range_op_test.cc
@@ -0,0 +1,224 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class RaggedRangeOpTest : public ::tensorflow::OpsTestBase {
+ protected:
+  // Indices of output tensors.
+  static constexpr int kSplitsOutput = 0;
+  static constexpr int kValuesOutput = 1;
+
+  // Builds the tensorflow test graph for the RaggedRange op.
+  template <typename T>
+  void BuildRaggedRangeGraph() {
+    const auto& dtype = DataTypeToEnum<T>::v();
+    TF_ASSERT_OK(NodeDefBuilder("tested_op", "RaggedRange")
+                     .Input(FakeInput(dtype))  // starts
+                     .Input(FakeInput(dtype))  // limits
+                     .Input(FakeInput(dtype))  // deltas
+                     .Attr("T", dtype)
+                     .Finalize(node_def()));
+    TF_ASSERT_OK(InitOp());
+  }
+};
+
+TEST_F(RaggedRangeOpTest, IntValues) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4}), {0, 5, 8, 5});   // starts
+  AddInputFromArray<int>(TensorShape({4}), {8, 7, 8, 1});   // limits
+  AddInputFromArray<int>(TensorShape({4}), {2, 1, 1, -1});  // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 2, 4, 6], [5, 6], [], [5, 4, 3, 2]]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 4, 6, 6, 10}));
+  test::ExpectTensorEqual<int>(
+      *GetOutput(kValuesOutput),
+      test::AsTensor<int>({0, 2, 4, 6, 5, 6, 5, 4, 3, 2}));
+}
+
+TEST_F(RaggedRangeOpTest, FloatValues) {
+  BuildRaggedRangeGraph<float>();
+  AddInputFromArray<float>(TensorShape({4}), {0, 5, 8, 5});   // starts
+  AddInputFromArray<float>(TensorShape({4}), {8, 7, 8, 1});   // limits
+  AddInputFromArray<float>(TensorShape({4}), {2, 1, 1, -1});  // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 2, 4, 6], [5, 6], [], [5, 4, 3, 2]]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 4, 6, 6, 10}));
+  test::ExpectTensorNear<float>(
+      *GetOutput(kValuesOutput),
+      test::AsTensor<float>({0, 2, 4, 6, 5, 6, 5, 4, 3, 2}), 0.1);
+}
+
+TEST_F(RaggedRangeOpTest, BroadcastDeltas) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({3}), {0, 5, 8});  // starts
+  AddInputFromArray<int>(TensorShape({3}), {8, 7, 8});  // limits
+  AddInputFromArray<int>(TensorShape({}), {1});         // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 1, 2, 3, 4, 5, 6, 7], [5, 6], []]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 8, 10, 10}));
+  test::ExpectTensorEqual<int>(
+      *GetOutput(kValuesOutput),
+      test::AsTensor<int>({0, 1, 2, 3, 4, 5, 6, 7, 5, 6}));
+}
+
+TEST_F(RaggedRangeOpTest, BroadcastLimitsAndDeltas) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({}), {0});         // starts
+  AddInputFromArray<int>(TensorShape({3}), {3, 0, 2});  // limits
+  AddInputFromArray<int>(TensorShape({}), {1});         // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 1, 2], [], [0, 1]]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 3, 3, 5}));
+  test::ExpectTensorEqual<int>(*GetOutput(kValuesOutput),
+                               test::AsTensor<int>({0, 1, 2, 0, 1}));
+}
+
+TEST_F(RaggedRangeOpTest, BroadcastStartsAndLimits) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({}), {0});         // starts
+  AddInputFromArray<int>(TensorShape({}), {12});        // limits
+  AddInputFromArray<int>(TensorShape({3}), {3, 4, 5});  // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 3, 6, 9], [0, 4, 8], [0, 5, 10]]]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 4, 7, 10}));
+  test::ExpectTensorEqual<int>(
+      *GetOutput(kValuesOutput),
+      test::AsTensor<int>({0, 3, 6, 9, 0, 4, 8, 0, 5, 10}));
+}
+
+TEST_F(RaggedRangeOpTest, AllScalarInputs) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({}), {0});  // starts
+  AddInputFromArray<int>(TensorShape({}), {5});  // limits
+  AddInputFromArray<int>(TensorShape({}), {1});  // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 1, 2, 3, 4]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 5}));
+  test::ExpectTensorEqual<int>(*GetOutput(kValuesOutput),
+                               test::AsTensor<int>({0, 1, 2, 3, 4}));
+}
+
+TEST_F(RaggedRangeOpTest, InvalidArgsStarts) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4, 1}), {0, 5, 8, 5});  // starts
+  AddInputFromArray<int>(TensorShape({4}), {8, 7, 8, 1});     // limits
+  AddInputFromArray<int>(TensorShape({4}), {2, 1, 1, -1});    // deltas
+  EXPECT_EQ("starts must be a scalar or vector", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedRangeOpTest, InvalidArgsLimits) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4}), {0, 5, 8, 5});     // starts
+  AddInputFromArray<int>(TensorShape({4, 1}), {8, 7, 8, 1});  // limits
+  AddInputFromArray<int>(TensorShape({4}), {2, 1, 1, -1});    // deltas
+  EXPECT_EQ("limits must be a scalar or vector", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedRangeOpTest, InvalidArgsDeltas) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4}), {0, 5, 8, 5});      // starts
+  AddInputFromArray<int>(TensorShape({4}), {8, 7, 8, 1});      // limits
+  AddInputFromArray<int>(TensorShape({4, 1}), {2, 1, 1, -1});  // deltas
+  EXPECT_EQ("deltas must be a scalar or vector", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedRangeOpTest, InvalidArgsShapeMismatch) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4}), {0, 5, 8, 5});   // starts
+  AddInputFromArray<int>(TensorShape({3}), {7, 8, 1});      // limits
+  AddInputFromArray<int>(TensorShape({4}), {2, 1, 1, -1});  // deltas
+  EXPECT_EQ("starts, limits, and deltas must have the same shape",
+            RunOpKernel().error_message());
+}
+
+TEST_F(RaggedRangeOpTest, InvalidArgsZeroDelta) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({4}), {0, 5, 8, 5});   // starts
+  AddInputFromArray<int>(TensorShape({4}), {7, 8, 8, 1});   // limits
+  AddInputFromArray<int>(TensorShape({4}), {2, 1, 0, -1});  // deltas
+  EXPECT_EQ("Requires delta != 0", RunOpKernel().error_message());
+}
+
+TEST_F(RaggedRangeOpTest, EmptyRangePositiveDelta) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({2}), {0, 5});  // starts
+  AddInputFromArray<int>(TensorShape({2}), {5, 0});  // limits
+  AddInputFromArray<int>(TensorShape({}), {2});      // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[0, 2, 4], []]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 3, 3}));
+  test::ExpectTensorEqual<int>(*GetOutput(kValuesOutput),
+                               test::AsTensor<int>({0, 2, 4}));
+}
+
+TEST_F(RaggedRangeOpTest, EmptyRangeNegativeDelta) {
+  BuildRaggedRangeGraph<int>();
+  AddInputFromArray<int>(TensorShape({2}), {0, 5});  // starts
+  AddInputFromArray<int>(TensorShape({2}), {5, 0});  // limits
+  AddInputFromArray<int>(TensorShape({}), {-2});     // deltas
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Expected: [[], [5, 3, 1]]
+  test::ExpectTensorEqual<int64>(*GetOutput(kSplitsOutput),
+                                 test::AsTensor<int64>({0, 0, 3}));
+  test::ExpectTensorEqual<int>(*GetOutput(kValuesOutput),
+                               test::AsTensor<int>({5, 3, 1}));
+}
+
+TEST_F(RaggedRangeOpTest, ShapeFn) {
+  // RaggedRange(starts, limits, deltas) -> [splits, values]
+  ShapeInferenceTestOp op("RaggedRange");
+  INFER_OK(op, "?;?;?", "[?];[?]");
+  INFER_OK(op, "[3];[3];[3]", "[4];[?]");
+  INFER_OK(op, "[3];[3];[]", "[4];[?]");  // broadcast deltas
+  INFER_OK(op, "[3];[];[3]", "[4];[?]");  // broadcast limits
+  INFER_OK(op, "[];[3];[3]", "[4];[?]");  // broadcast starts
+  INFER_OK(op, "[];[];[]", "[2];[?]");    // degenerate case: all scalar inputs
+  INFER_ERROR("Shape must be at most rank 1 but is rank 2", op,
+              "[5,5];[5];[5]");
+  INFER_ERROR("Shape must be at most rank 1 but is rank 2", op,
+              "[5];[5,5];[5]");
+  INFER_ERROR("Shape must be at most rank 1 but is rank 2", op,
+              "[5];[5];[5,5]");
+  INFER_ERROR("Dimensions must be equal, but are 4 and 3", op, "[3];[4];[3]");
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/ragged_math_ops.cc b/tensorflow/core/ops/ragged_math_ops.cc
new file mode 100644
index 0000000000..d739c69798
--- /dev/null
+++ b/tensorflow/core/ops/ragged_math_ops.cc
@@ -0,0 +1,80 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status RaggedRangeShapeFn(InferenceContext* c);
+
+//==============================================================================
+// Registered Ops
+//==============================================================================
+
+REGISTER_OP("RaggedRange")
+    .Input("starts: T")
+    .Input("limits: T")
+    .Input("deltas: T")
+    .Output("rt_nested_splits: int64")
+    .Output("rt_dense_values: T")
+    .Attr("T: {bfloat16, float, double, int32, int64} = DT_INT32")
+    .SetShapeFn(RaggedRangeShapeFn);
+
+//==============================================================================
+// Shape Functions
+//==============================================================================
+
+Status RaggedRangeShapeFn(InferenceContext* c) {
+  // Check that all inputs (starts, limits, and deltas) have rank 0 or 1.
+  ShapeHandle starts = c->input(0);
+  ShapeHandle limits = c->input(1);
+  ShapeHandle deltas = c->input(2);
+  TF_RETURN_IF_ERROR(c->WithRankAtMost(starts, 1, &starts));
+  TF_RETURN_IF_ERROR(c->WithRankAtMost(limits, 1, &limits));
+  TF_RETURN_IF_ERROR(c->WithRankAtMost(deltas, 1, &deltas));
+
+  // For the inputs with rank 1, make sure shapes match.
+  DimensionHandle dim = c->UnknownDim();
+  if (c->Rank(starts) == 1) {
+    TF_RETURN_IF_ERROR(c->Merge(c->Dim(starts, 0), dim, &dim));
+  }
+  if (c->Rank(limits) == 1) {
+    TF_RETURN_IF_ERROR(c->Merge(c->Dim(limits, 0), dim, &dim));
+  }
+  if (c->Rank(deltas) == 1) {
+    TF_RETURN_IF_ERROR(c->Merge(c->Dim(deltas, 0), dim, &dim));
+  }
+
+  // If any input shape is known, then calculate `rt_nested_splits` shape.
+  int64 rt_nested_splits_dim = InferenceContext::kUnknownDim;
+  if (c->ValueKnown(dim)) {
+    rt_nested_splits_dim = c->Value(dim) + 1;
+  } else if (c->Rank(starts) == 0 && c->Rank(limits) == 0 &&
+             c->Rank(deltas) == 0) {
+    rt_nested_splits_dim = 2;
+  }
+  c->set_output(0, c->Vector(rt_nested_splits_dim));
+
+  // `rt_dense_values` is rank 1, but size can't be calculated statically.
+  c->set_output(1, c->UnknownShapeOfRank(1));
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e7548c9587..d016de3261 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1799,6 +1799,14 @@ tf_gen_op_wrapper_private_py(
     out = "training/gen_training_ops.py",
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "ragged_math_ops_gen",
+    visibility = [
+        "//learning/brain/contrib/text:__pkg__",
+        "//learning/brain/contrib/text/python/ragged:__pkg__",
+    ],
+)
+
 py_library(
     name = "array_grad",
     srcs = ["ops/array_grad.py"],
-- 
GitLab


From 5f8df55dfb95d26189b0443b66fede916928ea27 Mon Sep 17 00:00:00 2001
From: wangsiyu <siyu.wsy@gmail.com>
Date: Fri, 12 Oct 2018 23:37:12 +0800
Subject: [PATCH 0860/1085] fix pylint

---
 tensorflow/python/kernel_tests/variables_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 5e91e32399..c64fb8426d 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -728,14 +728,14 @@ class PartitionedVariableTest(test.TestCase):
           dtype=v0.dtype,
           variable_list=[v2, v3],
           partitions=partitions)
-      
+
       deltas_a = constant_op.constant([1.0, 2.0])
       deltas_b = constant_op.constant([3.0, 4.0])
       ones = array_ops.ones([2])
       plus_delta = pv_0.assign_add(deltas_a)
       minus_delta = pv_0.assign_sub(deltas_b)
       assign_ones = pv_0.assign(ones)
-      
+
       c_0 = constant_op.constant([2.0])
       c_1 = constant_op.constant([3.0])
       assign_list = pv_1.assign([c_0, c_1])
@@ -747,12 +747,12 @@ class PartitionedVariableTest(test.TestCase):
       self.assertEqual([1.0], v0.eval())
       self.assertEqual([3.0], plus_delta[1].eval())
       self.assertEqual([3.0], v1.eval())
-      
+
       self.assertEqual([-2.0], minus_delta[0].eval())
       self.assertEqual([-2.0], v0.eval())
       self.assertEqual([-1.0], minus_delta[1].eval())
       self.assertEqual([-1.0], v1.eval())
- 
+
       self.assertEqual([1.0], assign_ones[0].eval())
       self.assertEqual([1.0], v0.eval())
       self.assertEqual([1.0], assign_ones[1].eval())
-- 
GitLab


From b94f5bb165cd518956b7ec1070b4b448b0a0935e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 08:44:54 -0700
Subject: [PATCH 0861/1085] Move from deprecated self.test_session() to
 self.session() or self.cached_session().

Move to cached_session() if the session is create more than once per test. Move to session() otherwise.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function:
* the session is not cached anymore (a new session is created).
* the session is closed when exiting the "with" scope.

PiperOrigin-RevId: 216868101
---
 .../converters/builtin_functions_test.py      |  6 +-
 .../autograph/operators/py_builtins_test.py   |  2 +-
 tensorflow/python/client/timeline_test.py     |  4 +-
 tensorflow/python/client/virtual_gpu_test.py  |  4 +-
 .../data/kernel_tests/iterator_ops_test.py    |  2 +-
 .../python/debug/lib/grpc_large_data_test.py  | 12 +--
 tensorflow/python/framework/function_test.py  |  4 +-
 .../python/keras/engine/training_gpu_test.py  |  2 +-
 .../python/keras/layers/convolutional_test.py | 83 +++++++++----------
 .../keras/layers/cudnn_recurrent_test.py      | 20 ++---
 .../python/keras/layers/normalization_test.py |  2 +-
 .../kernel_tests/distributions/util_test.py   |  2 +-
 .../kernel_tests/random/random_gamma_test.py  |  4 +-
 .../kernel_tests/random/random_ops_test.py    | 16 ++--
 .../random/random_poisson_test.py             |  4 +-
 .../python/layers/normalization_test.py       | 20 ++---
 .../tools/optimize_for_inference_test.py      |  9 +-
 tensorflow/python/training/adam_test.py       |  2 +-
 .../training/checkpointable/util_test.py      | 20 ++---
 tensorflow/python/training/rmsprop_test.py    |  8 +-
 tensorflow/python/training/saver_test.py      |  4 +-
 .../python/training/training_ops_test.py      | 12 +--
 22 files changed, 119 insertions(+), 123 deletions(-)

diff --git a/tensorflow/python/autograph/converters/builtin_functions_test.py b/tensorflow/python/autograph/converters/builtin_functions_test.py
index 2ed14c14e7..30cfb13233 100644
--- a/tensorflow/python/autograph/converters/builtin_functions_test.py
+++ b/tensorflow/python/autograph/converters/builtin_functions_test.py
@@ -36,7 +36,7 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return len(a)
 
     with self.converted(test_fn, builtin_functions, {'len': len}) as result:
-      with self.test_session() as sess:
+      with self.session() as sess:
         p = array_ops.placeholder(dtype=dtypes.int32, shape=None)
         ops = result.test_fn(p)
         self.assertEqual(sess.run(ops, {p: [0, 0, 0]}), 3)
@@ -50,7 +50,7 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return print(a)
 
     with self.converted(test_fn, builtin_functions, {'print': print}) as result:
-      with self.test_session() as sess:
+      with self.session() as sess:
         with self.assertPrints('a\n'):
           sess.run(result.test_fn('a'))
 
@@ -63,7 +63,7 @@ class BuiltinFunctionsTest(converter_testing.TestCase):
       return print(a, b, c)
 
     with self.converted(test_fn, builtin_functions, {'print': print}) as result:
-      with self.test_session() as sess:
+      with self.session() as sess:
         with self.assertPrints('a 1 [2, 3]\n'):
           sess.run(
               result.test_fn(
diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py
index c94a918d5a..443e30a475 100644
--- a/tensorflow/python/autograph/operators/py_builtins_test.py
+++ b/tensorflow/python/autograph/operators/py_builtins_test.py
@@ -127,7 +127,7 @@ class PyBuiltinsTest(test.TestCase):
       self.assertAllEqual(sess.run(r), [2, 1])
 
   def test_range_tensor_empty_range(self):
-    with self.test_session() as sess:
+    with self.session() as sess:
       r = py_builtins.range_(constant_op.constant(-3))
       self.assertAllEqual(sess.run(r), [])
       r = py_builtins.range_(5, constant_op.constant(2))
diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py
index 032bbf7c4e..dfd0147643 100644
--- a/tensorflow/python/client/timeline_test.py
+++ b/tensorflow/python/client/timeline_test.py
@@ -62,7 +62,7 @@ class TimelineTest(test.TestCase):
         trace_level=config_pb2.RunOptions.FULL_TRACE)
     run_metadata = config_pb2.RunMetadata()
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       const1 = constant_op.constant(1.0, name='const1')
       const2 = constant_op.constant(2.0, name='const2')
       result = math_ops.add(const1, const2) + const1 * const2
@@ -93,7 +93,7 @@ class TimelineTest(test.TestCase):
         trace_level=config_pb2.RunOptions.FULL_TRACE)
     run_metadata = config_pb2.RunMetadata()
 
-    with self.test_session(force_gpu=True) as sess:
+    with self.session(force_gpu=True) as sess:
       const1 = constant_op.constant(1.0, name='const1')
       const2 = constant_op.constant(2.0, name='const2')
       result = math_ops.add(const1, const2) + const1 * const2
diff --git a/tensorflow/python/client/virtual_gpu_test.py b/tensorflow/python/client/virtual_gpu_test.py
index 52e1b56886..5892e0fc84 100644
--- a/tensorflow/python/client/virtual_gpu_test.py
+++ b/tensorflow/python/client/virtual_gpu_test.py
@@ -199,7 +199,7 @@ class VirtualGpuTest(test_util.TensorFlowTestCase):
     self._util = VirtualGpuTestUtil()
 
   def testStatsContainAllDeviceNames(self):
-    with self.test_session(config=self._util.config) as sess:
+    with self.session(config=self._util.config) as sess:
       # TODO(laigd): b/70811538. The is_gpu_available() call will invoke
       # DeviceFactory::AddDevices() with a default SessionOption, which prevents
       # adding virtual devices in the future, thus must be called within a
@@ -232,7 +232,7 @@ class VirtualGpuTest(test_util.TensorFlowTestCase):
     self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:2' in devices)
 
   def testLargeRandomGraph(self):
-    with self.test_session(config=self._util.config) as sess:
+    with self.session(config=self._util.config) as sess:
       if not test.is_gpu_available(cuda_only=True):
         self.skipTest('No GPU available')
       for _ in range(5):
diff --git a/tensorflow/python/data/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_test.py
index 671e5d4812..7cf6f34958 100644
--- a/tensorflow/python/data/kernel_tests/iterator_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/iterator_ops_test.py
@@ -573,7 +573,7 @@ class IteratorTest(test.TestCase):
           f=_remote_fn,
           target=target_placeholder)
 
-    with self.test_session(config=worker_config) as sess:
+    with self.session(config=worker_config) as sess:
       elem = sess.run(
           remote_op,
           feed_dict={
diff --git a/tensorflow/python/debug/lib/grpc_large_data_test.py b/tensorflow/python/debug/lib/grpc_large_data_test.py
index ccc21bcf94..a7fdbebaf5 100644
--- a/tensorflow/python/debug/lib/grpc_large_data_test.py
+++ b/tensorflow/python/debug/lib/grpc_large_data_test.py
@@ -58,7 +58,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
     self.debug_server.clear_data()
 
   def testSendingLargeGraphDefsWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u = variables.VariableV1(42.0, name="original_u")
@@ -86,7 +86,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
       self.assertGreater(max_graph_def_size, 4 * 1024 * 1024)
 
   def testSendingLargeFloatTensorWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init_val_array = list(xrange(1200 * 1024))
@@ -110,7 +110,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
           self.debug_server.debug_tensor_values["u_init:0:DebugIdentity"][0])
 
   def testSendingStringTensorWithAlmostTooLargeStringsWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init_val = [
@@ -133,7 +133,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
           self.debug_server.debug_tensor_values["u_init:0:DebugIdentity"][0])
 
   def testSendingLargeStringTensorWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       strs_total_size_threshold = 5000 * 1024
@@ -162,7 +162,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
           self.debug_server.debug_tensor_values["u_init:0:DebugIdentity"][0])
 
   def testSendingEmptyFloatTensorWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init = constant_op.constant(
@@ -184,7 +184,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase):
       self.assertEqual(0, len(u_init_value))
 
   def testSendingEmptyStringTensorWorks(self):
-    with self.test_session(
+    with self.session(
         use_gpu=True,
         config=session_debug_testlib.no_rewrite_session_config()) as sess:
       u_init = constant_op.constant(
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 16d4903d79..2d11a1644d 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -455,7 +455,7 @@ class FunctionTest(test.TestCase):
         _ = MyFn(100.0).eval()
 
   def testWhileLoopCallsFunc(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
 
       @function.Defun(dtypes.float32)
       def Times2(x):
@@ -1077,7 +1077,7 @@ class FunctionTest(test.TestCase):
       self.assertNotEqual("GuaranteeConst", fifth.consumers()[0].node_def.op)
       return output
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sess.run(var.initializer)
       _ = sess.run(CapturesGuaranteedConst(), {also_not_const: 1.0})
 
diff --git a/tensorflow/python/keras/engine/training_gpu_test.py b/tensorflow/python/keras/engine/training_gpu_test.py
index 5825ce814f..596d085f3f 100644
--- a/tensorflow/python/keras/engine/training_gpu_test.py
+++ b/tensorflow/python/keras/engine/training_gpu_test.py
@@ -69,7 +69,7 @@ class TrainingGPUTest(test.TestCase):
       return simple_model
 
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         losses_to_test = ['sparse_categorical_crossentropy',
                           'categorical_crossentropy', 'binary_crossentropy']
 
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index bdc175b8b9..4afddbc8cc 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -39,7 +39,7 @@ class Convolution1DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.Conv1D,
             kwargs=test_kwargs,
@@ -74,7 +74,7 @@ class Convolution1DTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv1D(**kwargs)
       layer.build((None, 5, 2))
       self.assertEqual(len(layer.losses), 2)
@@ -93,7 +93,7 @@ class Convolution1DTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv1D(**kwargs)
       layer.build((None, 5, 2))
       self.assertEqual(layer.kernel.constraint, k_constraint)
@@ -111,7 +111,7 @@ class Conv2DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.Conv2D,
             kwargs=test_kwargs,
@@ -149,7 +149,7 @@ class Conv2DTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv2D(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(len(layer.losses), 2)
@@ -168,7 +168,7 @@ class Conv2DTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv2D(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(layer.kernel.constraint, k_constraint)
@@ -186,7 +186,7 @@ class Conv2DTransposeTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.Conv2DTranspose,
             kwargs=test_kwargs,
@@ -217,7 +217,7 @@ class Conv2DTransposeTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv2DTranspose(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(len(layer.losses), 2)
@@ -236,7 +236,7 @@ class Conv2DTransposeTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv2DTranspose(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(layer.kernel.constraint, k_constraint)
@@ -280,7 +280,7 @@ class Conv3DTransposeTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.Conv3DTranspose,
             kwargs=test_kwargs,
@@ -311,7 +311,7 @@ class Conv3DTransposeTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv3DTranspose(**kwargs)
       layer.build((None, 5, 5, 5, 2))
       self.assertEqual(len(layer.losses), 2)
@@ -330,7 +330,7 @@ class Conv3DTransposeTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv3DTranspose(**kwargs)
       layer.build((None, 5, 5, 5, 2))
       self.assertEqual(layer.kernel.constraint, k_constraint)
@@ -347,7 +347,7 @@ class SeparableConv1DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.SeparableConv1D,
             kwargs=test_kwargs,
@@ -383,7 +383,7 @@ class SeparableConv1DTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.SeparableConv1D(**kwargs)
       layer.build((None, 5, 2))
       self.assertEqual(len(layer.losses), 3)
@@ -404,7 +404,7 @@ class SeparableConv1DTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.SeparableConv1D(**kwargs)
       layer.build((None, 5, 2))
       self.assertEqual(layer.depthwise_kernel.constraint, d_constraint)
@@ -423,7 +423,7 @@ class SeparableConv2DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.SeparableConv2D,
             kwargs=test_kwargs,
@@ -461,7 +461,7 @@ class SeparableConv2DTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.SeparableConv2D(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(len(layer.losses), 3)
@@ -482,7 +482,7 @@ class SeparableConv2DTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.SeparableConv2D(**kwargs)
       layer.build((None, 5, 5, 2))
       self.assertEqual(layer.depthwise_kernel.constraint, d_constraint)
@@ -502,7 +502,7 @@ class Conv3DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.Conv3D,
             kwargs=test_kwargs,
@@ -531,7 +531,7 @@ class Conv3DTest(test.TestCase):
         'activity_regularizer': 'l2',
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv3D(**kwargs)
       layer.build((None, 5, 5, 5, 2))
       self.assertEqual(len(layer.losses), 2)
@@ -551,7 +551,7 @@ class Conv3DTest(test.TestCase):
         'bias_constraint': b_constraint,
         'strides': 1
     }
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       layer = keras.layers.Conv3D(**kwargs)
       layer.build((None, 5, 5, 5, 2))
       self.assertEqual(layer.kernel.constraint, k_constraint)
@@ -568,8 +568,8 @@ class ZeroPaddingTest(test.TestCase):
     shape = (num_samples, num_steps, input_dim)
     inputs = np.ones(shape)
 
-    # basic test
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
+      # basic test
       testing_utils.layer_test(
           keras.layers.ZeroPadding1D,
           kwargs={'padding': 2},
@@ -579,8 +579,7 @@ class ZeroPaddingTest(test.TestCase):
           kwargs={'padding': (1, 2)},
           input_shape=inputs.shape)
 
-    # correctness test
-    with self.test_session(use_gpu=True):
+      # correctness test
       layer = keras.layers.ZeroPadding1D(padding=2)
       layer.build(shape)
       output = layer(keras.backend.variable(inputs))
@@ -623,7 +622,7 @@ class ZeroPaddingTest(test.TestCase):
       inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col))
 
       # basic test
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.ZeroPadding2D,
             kwargs={'padding': (2, 2),
@@ -636,7 +635,7 @@ class ZeroPaddingTest(test.TestCase):
             input_shape=inputs.shape)
 
       # correctness test
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         layer = keras.layers.ZeroPadding2D(
             padding=(2, 2), data_format=data_format)
         layer.build(inputs.shape)
@@ -702,15 +701,14 @@ class ZeroPaddingTest(test.TestCase):
     inputs = np.ones((num_samples, input_len_dim1, input_len_dim2,
                       input_len_dim3, stack_size))
 
-    # basic test
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
+      # basic test
       testing_utils.layer_test(
           keras.layers.ZeroPadding3D,
           kwargs={'padding': (2, 2, 2)},
           input_shape=inputs.shape)
 
-    # correctness test
-    with self.test_session(use_gpu=True):
+      # correctness test
       layer = keras.layers.ZeroPadding3D(padding=(2, 2, 2))
       layer.build(inputs.shape)
       output = layer(keras.backend.variable(inputs))
@@ -735,7 +733,7 @@ class UpSamplingTest(test.TestCase):
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_upsampling_1d(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       testing_utils.layer_test(
           keras.layers.UpSampling1D, kwargs={'size': 2}, input_shape=(3, 5, 4))
 
@@ -755,7 +753,7 @@ class UpSamplingTest(test.TestCase):
                                 stack_size)
 
       # basic test
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.UpSampling2D,
             kwargs={'size': (2, 2),
@@ -842,7 +840,7 @@ class UpSamplingTest(test.TestCase):
                                 input_len_dim3, stack_size)
 
       # basic test
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.UpSampling3D,
             kwargs={'size': (2, 2, 2),
@@ -892,7 +890,7 @@ class CroppingTest(test.TestCase):
     input_len_dim1 = 2
     inputs = np.random.rand(num_samples, time_length, input_len_dim1)
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       testing_utils.layer_test(
           keras.layers.Cropping1D,
           kwargs={'cropping': (2, 2)},
@@ -919,15 +917,14 @@ class CroppingTest(test.TestCase):
       else:
         inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2,
                                 stack_size)
-      # basic test
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
+        # basic test
         testing_utils.layer_test(
             keras.layers.Cropping2D,
             kwargs={'cropping': cropping,
                     'data_format': data_format},
             input_shape=inputs.shape)
-      # correctness test
-      with self.test_session(use_gpu=True):
+        # correctness test
         layer = keras.layers.Cropping2D(
             cropping=cropping, data_format=data_format)
         layer.build(inputs.shape)
@@ -953,7 +950,7 @@ class CroppingTest(test.TestCase):
         inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2,
                                 stack_size)
       # another correctness test (no cropping)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         cropping = ((0, 0), (0, 0))
         layer = keras.layers.Cropping2D(
             cropping=cropping, data_format=data_format)
@@ -990,7 +987,7 @@ class CroppingTest(test.TestCase):
           inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2,
                                   input_len_dim3, stack_size)
         # basic test
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           testing_utils.layer_test(
               keras.layers.Cropping3D,
               kwargs={'cropping': cropping,
@@ -999,7 +996,7 @@ class CroppingTest(test.TestCase):
 
         if len(croppings) == 3 and len(croppings[0]) == 2:
           # correctness test
-          with self.test_session(use_gpu=True):
+          with self.cached_session(use_gpu=True):
             layer = keras.layers.Cropping3D(
                 cropping=cropping, data_format=data_format)
             layer.build(inputs.shape)
@@ -1039,7 +1036,7 @@ class DepthwiseConv2DTest(test.TestCase):
     test_kwargs = copy.copy(kwargs)
     for value in values:
       test_kwargs[arg] = value
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         testing_utils.layer_test(
             keras.layers.DepthwiseConv2D,
             kwargs=test_kwargs,
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 7becbfede1..cc93364aae 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -36,7 +36,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def test_cudnn_rnn_basics(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         units = 2
@@ -64,7 +64,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def test_trainability(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         units = 2
         for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
@@ -88,7 +88,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   )
   def test_regularizer(self, layer_class):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         units = 2
@@ -120,7 +120,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   )
   def test_return_state(self, layer_class):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         units = 2
@@ -171,7 +171,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   )
   def test_specify_initial_state_keras_tensor(self, layer_class):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         units = 2
@@ -203,7 +203,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   )
   def test_statefulness(self, layer_class):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         units = 2
@@ -255,7 +255,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
                                              bidirectional, implementation,
                                              model_nest_level, model_type):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         timesteps = 6
         input_shape = (timesteps, input_size)
@@ -335,7 +335,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
     # Similar test as test_load_weights_between_noncudnn_rnn() but has different
     # rank of input due to usage of TimeDistributed. Issue: #10356.
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_size = 10
         steps = 6
         timesteps = 6
@@ -377,7 +377,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def test_cudnnrnn_bidirectional(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         rnn = keras.layers.CuDNNGRU
         samples = 2
         dim = 2
@@ -441,7 +441,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
     Should fail fast with an exception.
     """
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         input_shape = (3, 5)
 
         def gru(cudnn=False, **kwargs):
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index 2844b84799..ff705183ef 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -115,7 +115,7 @@ class NormalizationLayersTest(test.TestCase):
 
   def test_batchnorm_convnet(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         model = keras.models.Sequential()
         norm = keras.layers.BatchNormalization(
             axis=1, input_shape=(3, 4, 4), momentum=0.8)
diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index 27d652c2c6..f4e651b25b 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -879,7 +879,7 @@ class SoftplusTest(test.TestCase):
   def _testSoftplus(self, np_features, use_gpu=False):
     np_features = np.asarray(np_features)
     np_softplus = self._npSoftplus(np_features)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       softplus = nn_ops.softplus(np_features)
       softplus_inverse = du.softplus_inverse(softplus)
       [tf_softplus, tf_softplus_inverse] = sess.run([
diff --git a/tensorflow/python/kernel_tests/random/random_gamma_test.py b/tensorflow/python/kernel_tests/random/random_gamma_test.py
index d969944493..606e8862c4 100644
--- a/tensorflow/python/kernel_tests/random/random_gamma_test.py
+++ b/tensorflow/python/kernel_tests/random/random_gamma_test.py
@@ -43,7 +43,7 @@ class RandomGammaTest(test.TestCase):
   def _Sampler(self, num, alpha, beta, dtype, use_gpu, seed=None):
 
     def func():
-      with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+      with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
         rng = random_ops.random_gamma(
             [num], alpha, beta=beta, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
@@ -216,7 +216,7 @@ class RandomGammaTest(test.TestCase):
     """
     for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
       for use_gpu in [False, True]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           rnd1 = random_ops.random_gamma([24], 2.0, dtype=dtype)
           rnd2 = random_ops.random_gamma([24], 2.0, dtype=dtype)
           diff = rnd2 - rnd1
diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py
index d199a9d9dd..6de894846b 100644
--- a/tensorflow/python/kernel_tests/random/random_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/random_ops_test.py
@@ -44,7 +44,7 @@ class RandomOpTestCommon(test.TestCase):
                                     use_gpu,
                                     op_seed=None,
                                     graph_seed=None):
-    with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+    with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
       if graph_seed is not None:
         random_seed.set_random_seed(graph_seed)
       x = rng_func([num], min_or_mean, max_or_stddev, dtype=dtype, seed=op_seed)
@@ -64,7 +64,7 @@ class RandomNormalTest(RandomOpTestCommon):
   def _Sampler(self, num, mu, sigma, dtype, use_gpu, seed=None):
 
     def func():
-      with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+      with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
         rng = random_ops.random_normal(
             [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
@@ -112,7 +112,7 @@ class RandomNormalTest(RandomOpTestCommon):
 
   def testNoCSE(self):
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.session(use_gpu=use_gpu):
         shape = [2, 3, 4]
         rnd1 = random_ops.random_normal(shape, 0.0, 1.0, dtypes.float32)
         rnd2 = random_ops.random_normal(shape, 0.0, 1.0, dtypes.float32)
@@ -155,7 +155,7 @@ class TruncatedNormalTest(test.TestCase):
   def _Sampler(self, num, mu, sigma, dtype, use_gpu, seed=None):
 
     def func():
-      with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+      with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
         rng = random_ops.truncated_normal(
             [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
@@ -220,14 +220,14 @@ class TruncatedNormalTest(test.TestCase):
       self.assertTrue(abs(np.std(x) / stddev - 0.85) < 0.04)
 
   def testLargeShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = variables.Variable(
           array_ops.zeros(dtype=dtypes.float32, shape=[2**33, 1]))
       n = random_ops.truncated_normal(v.shape)
       self.assertEqual([8589934592, 1], n.shape.as_list())
 
   def testNoCSE(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3, 4]
       rnd1 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32)
       rnd2 = random_ops.truncated_normal(shape, 0.0, 1.0, dtypes.float32)
@@ -251,7 +251,7 @@ class RandomUniformTest(RandomOpTestCommon):
   def _Sampler(self, num, minv, maxv, dtype, use_gpu, seed=None):
 
     def func():
-      with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+      with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
         rng = random_ops.random_uniform(
             [num], minval=minv, maxval=maxv, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
@@ -353,7 +353,7 @@ class RandomUniformTest(RandomOpTestCommon):
   def testNoCSE(self):
     shape = [2, 3, 4]
     for dtype in dtypes.float16, dtypes.float32, dtypes.int32:
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         rnd1 = random_ops.random_uniform(shape, 0, 17, dtype=dtype)
         rnd2 = random_ops.random_uniform(shape, 0, 17, dtype=dtype)
         diff = (rnd2 - rnd1).eval()
diff --git a/tensorflow/python/kernel_tests/random/random_poisson_test.py b/tensorflow/python/kernel_tests/random/random_poisson_test.py
index 15ab95cdb7..417588f8a3 100644
--- a/tensorflow/python/kernel_tests/random/random_poisson_test.py
+++ b/tensorflow/python/kernel_tests/random/random_poisson_test.py
@@ -39,7 +39,7 @@ class RandomPoissonTest(test.TestCase):
   def _Sampler(self, num, lam, dtype, use_gpu, seed=None):
 
     def func():
-      with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
+      with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
         rng = random_ops.random_poisson(lam, [num], dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
@@ -128,7 +128,7 @@ class RandomPoissonTest(test.TestCase):
     merged.
     """
     for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         rnd1 = random_ops.random_poisson(2.0, [24], dtype=dtype)
         rnd2 = random_ops.random_poisson(2.0, [24], dtype=dtype)
         diff = rnd2 - rnd1
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index a72d147a0b..ba2bf10cf3 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -402,7 +402,7 @@ class BNTest(test.TestCase):
       training = array_ops.placeholder(dtype='bool')
       outputs = bn.apply(inputs, training=training)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         # Test training with placeholder learning phase.
         sess.run(variables.global_variables_initializer())
         np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
@@ -884,7 +884,7 @@ class BNTest(test.TestCase):
     moving_variance = 1.
     renorm_mean = renorm_stddev = 0.
     renorm_weight = 0.
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -936,7 +936,7 @@ class BNTest(test.TestCase):
 
     moving_mean = 0.
     moving_variance = 1.
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -989,7 +989,7 @@ class BNTest(test.TestCase):
     moving_variance = 1.
     renorm_mean = renorm_stddev = 0.
     renorm_weight = 0.
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -1039,7 +1039,7 @@ class BNTest(test.TestCase):
     self.assertListEqual(
         out1.shape.as_list(), out2.shape.as_list())
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
 
       x = np.random.random(shape)
@@ -1061,7 +1061,7 @@ class BNTest(test.TestCase):
     out = normalization_layers.batch_normalization(
         inp, virtual_batch_size=2)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
 
       x = np.random.random(np_shape)
@@ -1092,7 +1092,7 @@ class BNTest(test.TestCase):
                     shape[0] // virtual_batch_size,
                     shape[1]])
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -1145,7 +1145,7 @@ class BNTest(test.TestCase):
     ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] +
                    shape[1:])
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -1199,7 +1199,7 @@ class BNTest(test.TestCase):
     ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] +
                    shape[1:])
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
@@ -1349,7 +1349,7 @@ class BNTest(test.TestCase):
     ghost_shape = ([virtual_batch_size, shape[0] // virtual_batch_size] +
                    shape[1:])
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index a39c046761..10bfb0dc70 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -173,7 +173,7 @@ class OptimizeForInferenceTest(test.TestCase):
 
   def testFoldFusedBatchNorms(self):
     for data_format, use_gpu in [("NHWC", False), ("NCHW", True)]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
         input_op = constant_op.constant(
             np.array(inputs),
@@ -212,10 +212,9 @@ class OptimizeForInferenceTest(test.TestCase):
       optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(
           original_graph_def)
 
-      with self.test_session(use_gpu=use_gpu) as sess:
-        _ = importer.import_graph_def(
-            optimized_graph_def, input_map={}, name="optimized")
-        optimized_result = sess.run(["optimized/output:0"])
+      _ = importer.import_graph_def(
+          optimized_graph_def, input_map={}, name="optimized")
+      optimized_result = sess.run(["optimized/output:0"])
 
       self.assertAllClose(
           original_result, optimized_result, rtol=1e-04, atol=1e-06)
diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index 48db6e3733..0d42cc7b9c 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -109,7 +109,7 @@ class AdamOptimizerTest(test.TestCase):
 
   def testSparseDevicePlacement(self):
     for index_dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(force_gpu=test.is_gpu_available()):
+      with self.cached_session(force_gpu=test.is_gpu_available()):
         # If a GPU is available, tests that all optimizer ops can be placed on
         # it (i.e. they have GPU kernels).
         var = variables.Variable([[1.0], [2.0]])
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index 14b47a1940..66d5171334 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -534,7 +534,7 @@ class CheckpointingTests(test.TestCase):
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
     for training_continuation in range(3):
-      with ops.Graph().as_default(), self.test_session(
+      with ops.Graph().as_default(), self.session(
           graph=ops.get_default_graph()), test_util.device(use_gpu=True):
         model = MyModel()
         optimizer = adam.AdamOptimizer(0.001)
@@ -621,7 +621,7 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
-      with ops.Graph().as_default(), self.test_session(
+      with ops.Graph().as_default(), self.session(
           graph=ops.get_default_graph()), test_util.device(use_gpu=True):
         model = MyModel()
         # Don't actually train so we can test variable values
@@ -1018,7 +1018,7 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     save_graph = ops.Graph()
-    with save_graph.as_default(), self.test_session(save_graph):
+    with save_graph.as_default(), self.session(save_graph):
       first = tracking.Checkpointable()
       first.var1 = variable_scope.get_variable(
           name="outside_var", initializer=0.)
@@ -1029,7 +1029,7 @@ class CheckpointingTests(test.TestCase):
       save_path = checkpointable_utils.CheckpointableSaver(first).save(
           checkpoint_prefix)
     restore_graph = ops.Graph()
-    with restore_graph.as_default(), self.test_session(restore_graph):
+    with restore_graph.as_default(), self.session(restore_graph):
       second = tracking.Checkpointable()
       second.var2 = variable_scope.get_variable(
           name="blah", initializer=0.)
@@ -1248,7 +1248,7 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
-    with ops.Graph().as_default(), self.test_session(
+    with ops.Graph().as_default(), self.session(
         graph=ops.get_default_graph()), test_util.device(use_gpu=True):
       model = MyModel()
       optimizer = adam.AdamOptimizer(0.001)
@@ -1276,7 +1276,7 @@ class CheckpointingTests(test.TestCase):
       optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
 
     # Restore into a graph with the optimizer
-    with ops.Graph().as_default(), self.test_session(
+    with ops.Graph().as_default(), self.session(
         graph=ops.get_default_graph()), test_util.device(use_gpu=True):
       model = MyModel()
       optimizer = adam.AdamOptimizer(0.001)
@@ -1299,7 +1299,7 @@ class CheckpointingTests(test.TestCase):
         status.assert_consumed()
 
     # Make sure initialization doesn't clobber later restores
-    with ops.Graph().as_default(), self.test_session(
+    with ops.Graph().as_default(), self.session(
         graph=ops.get_default_graph()), test_util.device(use_gpu=True):
       model = MyModel()
       optimizer = adam.AdamOptimizer(0.001, beta1=1.0)
@@ -1483,7 +1483,7 @@ class CheckpointCompatibilityTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph) as session:
         root = self._initialized_model()
         name_saver = saver_lib.Saver()
@@ -1539,7 +1539,7 @@ class CheckpointCompatibilityTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph) as session:
         root = self._initialized_model()
         save_path = root.save(session=session, file_prefix=checkpoint_prefix)
@@ -1557,7 +1557,7 @@ class CheckpointCompatibilityTests(test.TestCase):
       save_path = root.save(file_prefix=checkpoint_prefix)
     with context.graph_mode():
       save_graph = ops.Graph()
-      with save_graph.as_default(), self.test_session(
+      with save_graph.as_default(), self.session(
           graph=save_graph):
         root = self._initialized_model()
         self._set_sentinels(root)
diff --git a/tensorflow/python/training/rmsprop_test.py b/tensorflow/python/training/rmsprop_test.py
index 4f5f96e2b4..b63abe0529 100644
--- a/tensorflow/python/training/rmsprop_test.py
+++ b/tensorflow/python/training/rmsprop_test.py
@@ -92,7 +92,7 @@ class RMSPropOptimizerTest(test.TestCase):
     # TODO(yori): Use ParameterizedTest when available
     for (dtype, learning_rate, decay, momentum,
          epsilon, centered, use_resource) in _TESTPARAMS:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Initialize variables for numpy implementation.
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
         grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
@@ -211,7 +211,7 @@ class RMSPropOptimizerTest(test.TestCase):
     # TODO(yori): Use ParameterizedTest when available
     for (dtype, learning_rate, decay,
          momentum, epsilon, centered, _) in _TESTPARAMS:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Initialize variables for numpy implementation.
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
         grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
@@ -285,7 +285,7 @@ class RMSPropOptimizerTest(test.TestCase):
 
   def testWithoutMomentum(self):
     for dtype in [dtypes.half, dtypes.float32]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
@@ -351,7 +351,7 @@ class RMSPropOptimizerTest(test.TestCase):
 
   def testWithMomentum(self):
     for dtype in [dtypes.half, dtypes.float32]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 49e6e6546d..efb464410b 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -831,7 +831,7 @@ class SaverTest(test.TestCase):
       orig_vals = sess.run(orig_vars)
 
     restore_graph = ops_lib.Graph()
-    with restore_graph.as_default(), self.test_session(
+    with restore_graph.as_default(), self.session(
         graph=restore_graph) as sess:
       restored_vars = _model()
       save = saver_module.Saver(max_to_keep=1)
@@ -3015,7 +3015,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
           checkpoint_directory, "second"))
 
     restore_graph = ops_lib.Graph()
-    with restore_graph.as_default(), self.test_session(
+    with restore_graph.as_default(), self.session(
         graph=restore_graph) as sess:
       root = self._initialized_model()
       self._set_sentinels(root)
diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py
index f410ceaaff..0216482825 100644
--- a/tensorflow/python/training/training_ops_test.py
+++ b/tensorflow/python/training/training_ops_test.py
@@ -50,7 +50,7 @@ class TrainingOpsTest(TensorFlowTestCase):
 
   def _testTypes(self, x, alpha, delta, use_gpu=None):
     self.setUp()
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       variables.global_variables_initializer().run()
       self.assertAllCloseAccordingToType(x, var.eval())
@@ -69,7 +69,7 @@ class TrainingOpsTest(TensorFlowTestCase):
 
   def _testTypesForAdagrad(self, x, y, lr, grad, use_gpu=None):
     self.setUp()
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       variables.global_variables_initializer().run()
@@ -93,7 +93,7 @@ class TrainingOpsTest(TensorFlowTestCase):
                         l2=0.0,
                         lr_power=-0.5):
     self.setUp()
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       linear = variables.VariableV1(z)
@@ -147,7 +147,7 @@ class TrainingOpsTest(TensorFlowTestCase):
 
   def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
     self.setUp()
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       variables.global_variables_initializer().run()
@@ -177,7 +177,7 @@ class TrainingOpsTest(TensorFlowTestCase):
                               l2=0.0,
                               lr_power=-0.5):
     self.setUp()
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       linear = variables.VariableV1(z)
@@ -256,7 +256,7 @@ class TrainingOpsTest(TensorFlowTestCase):
 
   def _testTypesForAdam(self, var, m, v, grad, use_gpu):
     self.setUp()
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       var_t = variables.VariableV1(var)
       m_t = variables.VariableV1(m)
       v_t = variables.VariableV1(v)
-- 
GitLab


From a8c990335ef8f6f8a0d2d1d79acf573807dd4a7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 08:45:23 -0700
Subject: [PATCH 0862/1085] Move from deprecated self.test_session() to
 self.session() or self.cached_session().

Move to cached_session() if the session is create more than once per test. Move to session() otherwise.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function:
* the session is not cached anymore (a new session is created).
* the session is closed when exiting the "with" scope.

PiperOrigin-RevId: 216868175
---
 tensorflow/compiler/tests/dense_layer_test.py |  2 +-
 tensorflow/compiler/tests/jit_test.py         | 12 ++++----
 tensorflow/python/ops/bitwise_ops_test.py     | 14 ++++-----
 tensorflow/python/ops/collective_ops_test.py  |  4 +--
 .../python/ops/gradient_checker_test.py       |  6 ++--
 tensorflow/python/ops/gradients_test.py       | 12 ++++----
 tensorflow/python/ops/histogram_ops_test.py   | 10 +++----
 tensorflow/python/ops/image_grad_test.py      | 16 +++++-----
 tensorflow/python/ops/math_grad_test.py       |  4 +--
 tensorflow/python/ops/math_ops_test.py        | 30 +++++++++----------
 tensorflow/python/ops/nn_batchnorm_test.py    | 12 ++++----
 .../python/ops/nn_fused_batchnorm_test.py     |  8 ++---
 tensorflow/python/ops/nn_test.py              | 26 ++++++++--------
 tensorflow/python/ops/nn_xent_test.py         |  8 ++---
 .../python/ops/quantized_conv_ops_test.py     |  2 +-
 tensorflow/python/ops/quantized_ops_test.py   |  4 +--
 .../python/ops/special_math_ops_test.py       | 24 +++++++--------
 17 files changed, 97 insertions(+), 97 deletions(-)

diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py
index 618996e9d9..d1b90f098d 100644
--- a/tensorflow/compiler/tests/dense_layer_test.py
+++ b/tensorflow/compiler/tests/dense_layer_test.py
@@ -68,7 +68,7 @@ class DenseLayerTest(test.TestCase):
     config.graph_options.optimizer_options.global_jit_level = (
         config_pb2.OptimizerOptions.ON_1)
 
-    with self.test_session(config=config) as sess:
+    with self.session(config=config) as sess:
       x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32)
       y = layers.dense(x, 3)
 
diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py
index e31c25b259..8778b54dfa 100644
--- a/tensorflow/compiler/tests/jit_test.py
+++ b/tensorflow/compiler/tests/jit_test.py
@@ -276,7 +276,7 @@ class XlaCompilationTest(test.TestCase):
   def testReshape(self):
     """Tests an operator with compile-time constant and non-constant inputs."""
 
-    with self.test_session(config=NoRewriteSessionConfig()) as sess:
+    with self.session(config=NoRewriteSessionConfig()) as sess:
       x = array_ops.placeholder(dtypes.float32)
       y = array_ops.placeholder(dtypes.int32)
       with jit_scope():
@@ -303,7 +303,7 @@ class XlaCompilationTest(test.TestCase):
   def testIgnoredArguments(self):
     """Tests that JIT computations can ignore formal parameters."""
 
-    with self.test_session(config=NoRewriteSessionConfig()) as sess:
+    with self.session(config=NoRewriteSessionConfig()) as sess:
       x = array_ops.placeholder(dtypes.int32)
       y = array_ops.placeholder(dtypes.int32)
       with jit_scope():
@@ -331,7 +331,7 @@ class XlaCompilationTest(test.TestCase):
   def testLoops(self):
     """Tests that compilation accepts computations containing loops."""
 
-    with self.test_session(config=NoRewriteSessionConfig()) as session:
+    with self.session(config=NoRewriteSessionConfig()) as session:
       x = array_ops.placeholder(dtypes.float32)
       with jit_scope():
         c = lambda i, _: math_ops.less(i, 5)
@@ -349,7 +349,7 @@ class XlaCompilationTest(test.TestCase):
   def testCond(self):
     """Tests that compilation handles switch operators."""
 
-    with self.test_session(config=NoRewriteSessionConfig()) as session:
+    with self.session(config=NoRewriteSessionConfig()) as session:
       x = array_ops.placeholder(dtypes.float32)
       y = array_ops.placeholder(dtypes.float32)
       c = array_ops.placeholder(dtypes.bool)
@@ -394,7 +394,7 @@ class XlaCompilationTest(test.TestCase):
       inp = array_ops.placeholder(dtypes.float32)
       out = Entry(inp)
 
-    with self.test_session(
+    with self.session(
         config=NoRewriteSessionConfig(), graph=g, use_gpu=True) as sess:
       run_metadata = config_pb2.RunMetadata()
       val = sess.run(out,
@@ -407,7 +407,7 @@ class XlaCompilationTest(test.TestCase):
   def testLoopDeadlock(self):
     """Regression test for bug that caused deadlocks in graphs with loops."""
 
-    with self.test_session(config=NoRewriteSessionConfig()) as session:
+    with self.session(config=NoRewriteSessionConfig()) as session:
       x = array_ops.placeholder(dtypes.float32)
       with jit_scope():
         y = x + 1.0
diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index c4cfc0da19..dfb40db2d5 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py
@@ -38,7 +38,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                   dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         lhs = constant_op.constant([0, 5, 3, 14], dtype=dtype)
         rhs = constant_op.constant([5, 0, 7, 11], dtype=dtype)
@@ -61,7 +61,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
     def count_bits(x):
       return sum([bin(z).count("1") for z in six.iterbytes(x.tobytes())])
     for dtype in dtype_list:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         print("PopulationCount test: ", dtype)
         inputs = np.array(raw_inputs, dtype=dtype.as_numpy_dtype)
         truth = [count_bits(x) for x in inputs]
@@ -73,7 +73,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                   dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
     inputs = [0, 5, 3, 14]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         # Because of issues with negative numbers, let's test this indirectly.
         # 1. invert(a) and a = 0
@@ -97,7 +97,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
     dtype_list = [np.int8, np.int16, np.int32, np.int64,
                   np.uint8, np.uint16, np.uint32, np.uint64]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         lhs = np.array([0, 5, 3, 14], dtype=dtype)
         rhs = np.array([5, 0, 7, 3], dtype=dtype)
@@ -110,7 +110,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
   def testShiftsWithNegativeLHS(self):
     dtype_list = [np.int8, np.int16, np.int32, np.int64]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         lhs = np.array([-1, -5, -3, -14], dtype=dtype)
         rhs = np.array([5, 0, 7, 11], dtype=dtype)
@@ -123,7 +123,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
   def testImplementationDefinedShiftsDoNotCrash(self):
     dtype_list = [np.int8, np.int16, np.int32, np.int64]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         lhs = np.array([-1, -5, -3, -14], dtype=dtype)
         rhs = np.array([-2, 64, 101, 32], dtype=dtype)
@@ -139,7 +139,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
     dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
                   dtypes.uint8, dtypes.uint16]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in dtype_list:
         lhs = constant_op.constant([[0], [3], [5]], dtype=dtype)
         rhs = constant_op.constant([[1, 2, 4]], dtype=dtype)
diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py
index 78c4b4bfe0..9c772a9354 100644
--- a/tensorflow/python/ops/collective_ops_test.py
+++ b/tensorflow/python/ops/collective_ops_test.py
@@ -32,7 +32,7 @@ class CollectiveOpTest(test.TestCase):
   def _testCollectiveReduce(self, t0, t1, expected, set_graph_key):
     group_key = 1
     instance_key = 1
-    with self.test_session(
+    with self.session(
         config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess:
       with ops.device('/CPU:0'):
         in0 = constant_op.constant(t0)
@@ -65,7 +65,7 @@ class CollectiveOpTest(test.TestCase):
   def _testCollectiveBroadcast(self, t0):
     group_key = 1
     instance_key = 1
-    with self.test_session(
+    with self.session(
         config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess:
       with ops.device('/CPU:0'):
         in0 = constant_op.constant(t0)
diff --git a/tensorflow/python/ops/gradient_checker_test.py b/tensorflow/python/ops/gradient_checker_test.py
index fbb84b9018..66c7b9a71b 100644
--- a/tensorflow/python/ops/gradient_checker_test.py
+++ b/tensorflow/python/ops/gradient_checker_test.py
@@ -48,7 +48,7 @@ class GradientCheckerTest(test.TestCase):
 
   def testAddSimple(self):
     np.random.seed(1)  # Fix seed to avoid flakiness
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       # a test case for Add operation
       size = (2, 3)
       x1 = constant_op.constant(2.0, shape=size, name="x1")
@@ -62,7 +62,7 @@ class GradientCheckerTest(test.TestCase):
 
   def testAddSimpleGPU(self):
     np.random.seed(2)  # Fix seed to avoid flakiness
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # a test case for Add operation
       size = (2, 3)
       x1 = constant_op.constant(2.0, shape=size, name="x1")
@@ -216,7 +216,7 @@ class MiniMNISTTest(test.TestCase):
     s = label_data.sum(axis=1)
     label_data /= s[:, None]
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # We treat the inputs as "parameters" here
       inp = constant_op.constant(
           inp_data.tolist(),
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index c93e2493ee..103e3902b6 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -621,7 +621,7 @@ class HessianVectorProductTest(test_util.TensorFlowTestCase):
     hess_value = mat_value + mat_value.T
     hess_v_value = np.dot(hess_value, v_value)
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         mat = constant_op.constant(mat_value)
         v = constant_op.constant(v_value)
         x = constant_op.constant(x_value)
@@ -643,7 +643,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     mat_value = rng.randn(m, m).astype("float32")
     x_value = rng.randn(m).astype("float32")
     hess_value = mat_value + mat_value.T
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       mat = constant_op.constant(mat_value)
       x = constant_op.constant(x_value)
       x_mat_x = math_ops.reduce_sum(x[:, None] * mat * x[None, :])
@@ -659,7 +659,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     mat_values = [rng.randn(m, m).astype("float32") for _ in range(n)]
     x_values = [rng.randn(m).astype("float32") for _ in range(n)]
     hess_values = [mat_value + mat_value.T for mat_value in mat_values]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       mats = [constant_op.constant(mat_value) for mat_value in mat_values]
       xs = [constant_op.constant(x_value) for x_value in x_values]
       xs_mats_xs = [
@@ -673,7 +673,7 @@ class HessianTest(test_util.TensorFlowTestCase):
 
   def testHessianInvalidDimension(self):
     for shape in [(10, 10), None]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = array_ops.placeholder(dtypes.float32, shape)
         # Expect a ValueError because the dimensions are wrong
         with self.assertRaises(ValueError):
@@ -686,7 +686,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     m = 3
     rng = np.random.RandomState([1, 2, 3])
     x_value = rng.randn(m, m).astype("float32")
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant(x_value)
       x_square = math_ops.reduce_sum(
           math_ops.matmul(array_ops.transpose(x), x) * 0.5
@@ -705,7 +705,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     n = 4
     rng = np.random.RandomState([1, 2, 3])
     x_value = rng.randn(m, n).astype("float32")
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant(x_value)
       x_square = math_ops.reduce_sum(
           math_ops.matmul(array_ops.transpose(x), x) * 0.5
diff --git a/tensorflow/python/ops/histogram_ops_test.py b/tensorflow/python/ops/histogram_ops_test.py
index 1ba805dbb4..810dd44611 100644
--- a/tensorflow/python/ops/histogram_ops_test.py
+++ b/tensorflow/python/ops/histogram_ops_test.py
@@ -107,7 +107,7 @@ class HistogramFixedWidthTest(test.TestCase):
     value_range = [0.0, 5.0]
     values = []
     expected_bin_counts = [0, 0, 0, 0, 0]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5)
       self.assertEqual(dtypes.int32, hist.dtype)
       self.assertAllClose(expected_bin_counts, hist.eval())
@@ -118,7 +118,7 @@ class HistogramFixedWidthTest(test.TestCase):
     value_range = [0.0, 5.0]
     values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
     expected_bin_counts = [2, 1, 1, 0, 2]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       hist = histogram_ops.histogram_fixed_width(
           values, value_range, nbins=5, dtype=dtypes.int64)
       self.assertEqual(dtypes.int64, hist.dtype)
@@ -130,7 +130,7 @@ class HistogramFixedWidthTest(test.TestCase):
     value_range = np.float64([0.0, 5.0])
     values = np.float64([-1.0, 0.0, 1.5, 2.0, 5.0, 15])
     expected_bin_counts = [2, 1, 1, 0, 2]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5)
       self.assertEqual(dtypes.int32, hist.dtype)
       self.assertAllClose(expected_bin_counts, hist.eval())
@@ -141,7 +141,7 @@ class HistogramFixedWidthTest(test.TestCase):
     value_range = [0.0, 5.0]
     values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]]
     expected_bin_counts = [2, 1, 1, 0, 2]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5)
       self.assertEqual(dtypes.int32, hist.dtype)
       self.assertAllClose(expected_bin_counts, hist.eval())
@@ -151,7 +151,7 @@ class HistogramFixedWidthTest(test.TestCase):
     values = [[-1.0, 0.0, 1.5], [2.0, 5.0, 15]]
     expected_bin_counts = [2, 1, 1, 0, 2]
     placeholder = array_ops.placeholder(dtypes.int32)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       hist = histogram_ops.histogram_fixed_width(values, value_range, nbins=5)
       self.assertAllEqual(hist.shape.as_list(), (5,))
       self.assertEqual(dtypes.int32, hist.dtype)
diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py
index fddde75f6b..32c2f37c0b 100644
--- a/tensorflow/python/ops/image_grad_test.py
+++ b/tensorflow/python/ops/image_grad_test.py
@@ -38,7 +38,7 @@ class ResizeNearestNeighborOpTest(test.TestCase):
     for nptype in self.TYPES:
       x = np.arange(0, 4).reshape(in_shape).astype(nptype)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         input_tensor = constant_op.constant(x, shape=in_shape)
         resize_out = image_ops.resize_nearest_neighbor(input_tensor,
                                                        out_shape[1:3])
@@ -54,7 +54,7 @@ class ResizeNearestNeighborOpTest(test.TestCase):
     for nptype in self.TYPES:
       x = np.arange(0, 6).reshape(in_shape).astype(nptype)
 
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         input_tensor = constant_op.constant(x, shape=in_shape)
         resize_out = image_ops.resize_nearest_neighbor(input_tensor,
                                                        out_shape[1:3])
@@ -69,7 +69,7 @@ class ResizeNearestNeighborOpTest(test.TestCase):
     for nptype in self.TYPES:
       x = np.arange(0, 24).reshape(in_shape).astype(nptype)
 
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         input_tensor = constant_op.constant(x, shape=in_shape)
         resize_out = image_ops.resize_nearest_neighbor(input_tensor,
                                                        out_shape[1:3])
@@ -84,14 +84,14 @@ class ResizeNearestNeighborOpTest(test.TestCase):
     for nptype in self.TYPES:
       x = np.arange(0, np.prod(in_shape)).reshape(in_shape).astype(nptype)
       for align_corners in [True, False]:
-        with self.test_session(use_gpu=False):
+        with self.cached_session(use_gpu=False):
           input_tensor = constant_op.constant(x, shape=in_shape)
           resize_out = image_ops.resize_nearest_neighbor(
               input_tensor, out_shape[1:3], align_corners=align_corners)
           grad_cpu = gradient_checker.compute_gradient(
               input_tensor, in_shape, resize_out, out_shape, x_init_value=x)
 
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           input_tensor = constant_op.constant(x, shape=in_shape)
           resize_out = image_ops.resize_nearest_neighbor(
               input_tensor, out_shape[1:3], align_corners=align_corners)
@@ -151,7 +151,7 @@ class ResizeBilinearOpTest(test.TestCase):
     for align_corners in [True, False]:
       grad = {}
       for use_gpu in [False, True]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           input_tensor = constant_op.constant(x, shape=in_shape)
           resized_tensor = image_ops.resize_bilinear(
               input_tensor, out_shape[1:3], align_corners=align_corners)
@@ -262,7 +262,7 @@ class CropAndResizeOpTest(test.TestCase):
     boxes = np.array([[0, 0, 1, 1], [.1, .2, .7, .8]], dtype=np.float32)
     box_ind = np.array([0, 1], dtype=np.int32)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       crops = image_ops.crop_and_resize(
           constant_op.constant(
               image, shape=image_shape),
@@ -351,7 +351,7 @@ class CropAndResizeOpTest(test.TestCase):
               boxes = np.array(boxes, dtype=np.float32)
               box_ind = np.arange(batch, dtype=np.int32)
 
-              with self.test_session(use_gpu=True):
+              with self.cached_session(use_gpu=True):
                 image_tensor = constant_op.constant(image, shape=image_shape)
                 boxes_tensor = constant_op.constant(boxes, shape=[num_boxes, 4])
                 box_ind_tensor = constant_op.constant(
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 9cfb050942..d1fe834fc7 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -41,7 +41,7 @@ class SquaredDifferenceOpTest(test.TestCase):
     l = np.random.randn(*left_shape)
     r = np.random.randn(*right_shape)
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       left_tensor = constant_op.constant(l, shape=left_shape)
       right_tensor = constant_op.constant(r, shape=right_shape)
       output = math_ops.squared_difference(left_tensor, right_tensor)
@@ -77,7 +77,7 @@ class AbsOpTest(test.TestCase):
           self._biasedRandN(
               shape, bias=bias), dtype=dtype)
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       output = math_ops.abs(value)
       error = gradient_checker.compute_gradient_error(
           value, shape, output, output.get_shape().as_list())
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index f051850d92..10b87b3fcc 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -75,7 +75,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
   def testReduceLogSumExp(self):
     for dtype in [np.float16, np.float32, np.double]:
       x_np = np.random.rand(5, 5).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         y_tf_np = math_ops.reduce_logsumexp(x_np).eval()
         y_np = log(np.sum(exp(x_np)))
         self.assertAllClose(y_tf_np, y_np)
@@ -83,7 +83,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
   def testReductionIndices(self):
     for dtype in [np.float16, np.float32, np.double]:
       x_np = np.random.rand(5, 5).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=[0])
         y_np = log(np.sum(exp(x_np), axis=0))
         self.assertShapeEqual(y_np, y_tf)
@@ -93,7 +93,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
   def testReductionIndices2(self):
     for dtype in [np.float16, np.float32, np.double]:
       x_np = np.random.rand(5, 5).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         y_tf = math_ops.reduce_logsumexp(x_np, reduction_indices=0)
         y_np = log(np.sum(exp(x_np), axis=0))
         self.assertShapeEqual(y_np, y_tf)
@@ -103,7 +103,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
   def testKeepDims(self):
     for dtype in [np.float16, np.float32, np.double]:
       x_np = np.random.rand(5, 5).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         y_tf_np = math_ops.reduce_logsumexp(x_np, keepdims=True).eval()
         self.assertEqual(y_tf_np.ndim, x_np.ndim)
         y_np = log(np.sum(exp(x_np), keepdims=True))
@@ -120,7 +120,7 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
         if out == np.inf:
           raise RuntimeWarning("overflow encountered in exp")
 
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x_tf = constant_op.constant(x_np, shape=x_np.shape)
         y_tf_np = math_ops.reduce_logsumexp(x_tf).eval()
         y_np = log(np.sum(exp(x_np - max_np))) + max_np
@@ -137,14 +137,14 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
         if out == -np.inf:
           raise RuntimeWarning("divide by zero encountered in log")
 
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x_tf = constant_op.constant(x_np, shape=x_np.shape)
         y_tf_np = math_ops.reduce_logsumexp(x_tf).eval()
         y_np = log(np.sum(exp(x_np - max_np))) + max_np
         self.assertAllClose(y_tf_np, y_np)
 
   def testInfinity(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       res = math_ops.reduce_logsumexp(-np.inf).eval()
       self.assertEqual(-np.inf, res)
 
@@ -172,7 +172,7 @@ class ModTest(test_util.TensorFlowTestCase):
       # Test scalar and vector versions.
       for denom in [x[0], [x[0]] * 3]:
         x_np = np.array(x, dtype=dtype)
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           x_tf = constant_op.constant(x_np, shape=x_np.shape)
           y_tf = math_ops.mod(x_tf, denom)
           y_tf_np = y_tf.eval()
@@ -185,7 +185,7 @@ class ModTest(test_util.TensorFlowTestCase):
       # Test scalar and vector versions.
       for denom in [x[0], x]:
         x_np = np.array(x, dtype=dtype)
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           x_tf = constant_op.constant(x_np, shape=x_np.shape)
           y_tf = math_ops.mod(x_tf, denom)
           y_tf_np = y_tf.eval()
@@ -293,7 +293,7 @@ class AccumulateNTest(test_util.TensorFlowTestCase):
     np.random.seed(12345)
     x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)]
     tf_x = ops.convert_n_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval())
       self.assertAllClose(x[0] * 5, math_ops.accumulate_n([tf_x[0]] * 5).eval())
 
@@ -301,7 +301,7 @@ class AccumulateNTest(test_util.TensorFlowTestCase):
     np.random.seed(54321)
     x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)]
     tf_x = ops.convert_n_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval())
       self.assertAllEqual(x[0] * 6, math_ops.accumulate_n([tf_x[0]] * 6).eval())
 
@@ -318,7 +318,7 @@ class AddNTest(test_util.TensorFlowTestCase):
                         constant_op.constant(1)]))
 
     res = math_ops.add_n(partials) + constant_op.constant(0)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(res.eval(), 100)
 
   def testFloat(self):
@@ -326,7 +326,7 @@ class AddNTest(test_util.TensorFlowTestCase):
     for num_inputs in range(1, 10):
       x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(num_inputs)]
       tf_x = ops.convert_n_to_tensor(x)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         self.assertAllClose(sum(x), math_ops.add_n(tf_x).eval())
         self.assertAllClose(x[0] * num_inputs,
                             math_ops.add_n([tf_x[0]] * num_inputs).eval())
@@ -339,7 +339,7 @@ class AddNTest(test_util.TensorFlowTestCase):
           for _ in range(num_inputs)
       ]
       tf_x = ops.convert_n_to_tensor(x)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         self.assertAllEqual(sum(x), math_ops.add_n(tf_x).eval())
         self.assertAllEqual(x[0] * num_inputs,
                             math_ops.add_n([tf_x[0]] * num_inputs).eval())
@@ -347,7 +347,7 @@ class AddNTest(test_util.TensorFlowTestCase):
   def testGrad(self):
     np.random.seed(42)
     for num_inputs in range(1, 10):
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         input_vars = [
             variables.Variable(10.0 * np.random.random())
             for i in range(0, num_inputs)
diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py
index a7467aa943..c8a5b58e45 100644
--- a/tensorflow/python/ops/nn_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_batchnorm_test.py
@@ -80,7 +80,7 @@ class BatchNormalizationTest(test.TestCase):
     beta_val = np.random.random_sample(param_shape).astype(np.float32)
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         x = constant_op.constant(x_val, name="x")
         m = constant_op.constant(m_val, name="m")
         v = constant_op.constant(v_val, name="v")
@@ -210,7 +210,7 @@ class BatchNormalizationTest(test.TestCase):
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     backprop_val = np.random.random_sample(x_shape).astype(np.float32)
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         x = constant_op.constant(x_val, name="x")
         m = constant_op.constant(m_val, name="m")
         v = constant_op.constant(v_val, name="v")
@@ -259,7 +259,7 @@ class BatchNormalizationTest(test.TestCase):
     beta_val = np.random.random_sample(param_shape).astype(np.float32)
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         x = constant_op.constant(x_val, name="x")
         m = constant_op.constant(m_val, name="m")
         v = constant_op.constant(v_val, name="v")
@@ -302,7 +302,7 @@ class BatchNormalizationTest(test.TestCase):
     beta_val = np.random.random_sample(param_shape).astype(numpy_param_dtype)
     gamma_val = np.random.random_sample(param_shape).astype(numpy_param_dtype)
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         x = constant_op.constant(x_val, name="x")
         m = constant_op.constant(m_val, name="m")
         v = constant_op.constant(v_val, name="v")
@@ -365,7 +365,7 @@ class SufficientStatisticsTest(test.TestCase):
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     np_c, np_m, np_v, np_s = self._npSuffStats(x_val, axes, shift, keep_dims)
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         if has_shape:
           x = constant_op.constant(x_val, name="x")
           x.set_shape(x_shape)
@@ -422,7 +422,7 @@ class NormalizeMomentsTest(test.TestCase):
       shift_v = None
     npm, npv = self._npNormalizeMoments(counts, mean_ss, variance_ss, shift_v)
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         tf_counts = constant_op.constant(counts, name="counts")
         tf_mean_ss = constant_op.constant(mean_ss, name="mean_ss")
         tf_variance_ss = constant_op.constant(variance_ss, name="variance_ss")
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index a08b836025..5ac8eba6f7 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -66,7 +66,7 @@ class BatchNormalizationTest(test.TestCase):
     mean_val = np.random.random_sample(scale_shape).astype(scale_dtype)
     var_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
       scale = constant_op.constant(scale_val, name='scale')
       offset = constant_op.constant(offset_val, name='offset')
@@ -115,7 +115,7 @@ class BatchNormalizationTest(test.TestCase):
     x_val = np.random.random_sample(x_shape).astype(x_dtype)
     scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
     offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
       scale = constant_op.constant(scale_val, name='scale')
       offset = constant_op.constant(offset_val, name='offset')
@@ -190,7 +190,7 @@ class BatchNormalizationTest(test.TestCase):
     scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
     offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       x = constant_op.constant(x_val, name='x')
       scale = constant_op.constant(scale_val, name='scale')
       offset = constant_op.constant(offset_val, name='offset')
@@ -252,7 +252,7 @@ class BatchNormalizationTest(test.TestCase):
     scale_val = np.random.random_sample(scale_shape).astype(scale_dtype)
     offset_val = np.random.random_sample(scale_shape).astype(scale_dtype)
 
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       x = constant_op.constant(x_val, name='x')
       grad_y = constant_op.constant(grad_y_val, name='grad_y')
       scale = constant_op.constant(scale_val, name='scale')
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 2fabb2e966..4b9f0a1d48 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -1074,7 +1074,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
   def _test(self, x_val, y_val_expected):
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_dim_map(x)
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.cached_session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, y_val_expected)
 
@@ -1097,7 +1097,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
     y_val_expected = [2, 2, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="NCHW")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, y_val_expected)
 
@@ -1106,7 +1106,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
     y_val_expected = [2, 0, 1, 3, 2, 0, 1, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="HWNC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, y_val_expected)
 
@@ -1115,7 +1115,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
     y_val_expected = [3, 1, 0, 2, 3, 1, 0, 2]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_dim_map(x, src_format="NHWC", dst_format="WHCN")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, y_val_expected)
 
@@ -1124,7 +1124,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
     y_val_expected = [3, 2, 1, 0, 3, 2, 1, 0]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_dim_map(x, src_format="qwer", dst_format="rewq")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, y_val_expected)
 
@@ -1135,7 +1135,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [7, 4, 9, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x)
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [7, 3, 4, 9])
 
@@ -1143,7 +1143,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [7, 4, 9, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [7, 9, 3, 4])
 
@@ -1151,7 +1151,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [7, 4, 9, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="NHWC", dst_format="HWNC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [4, 9, 7, 3])
 
@@ -1159,7 +1159,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [7, 4, 9, 3]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="HWNC", dst_format="NHWC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [9, 7, 4, 3])
 
@@ -1167,7 +1167,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x)
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [[7, 4], [5, 1], [9, 3], [4, 5]])
 
@@ -1175,7 +1175,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="NHWC", dst_format="HWNC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [[9, 3], [4, 5], [7, 4], [5, 1]])
 
@@ -1183,7 +1183,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="HWNC", dst_format="NHWC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [[4, 5], [7, 4], [9, 3], [5, 1]])
 
@@ -1191,7 +1191,7 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
     x_val = [[7, 4], [9, 3], [4, 5], [5, 1]]
     x = constant_op.constant(x_val)
     y = nn_ops.data_format_vec_permute(x, src_format="NCHW", dst_format="NHWC")
-    with self.test_session(use_gpu=test_lib.is_gpu_available()) as sess:
+    with self.session(use_gpu=test_lib.is_gpu_available()) as sess:
       y_val = sess.run(y)
       self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]])
 
diff --git a/tensorflow/python/ops/nn_xent_test.py b/tensorflow/python/ops/nn_xent_test.py
index 54a0e26bfb..57ce4fd0a9 100644
--- a/tensorflow/python/ops/nn_xent_test.py
+++ b/tensorflow/python/ops/nn_xent_test.py
@@ -63,7 +63,7 @@ class SigmoidCrossEntropyWithLogitsTest(test.TestCase):
   def testLogisticOutput(self):
     for use_gpu in [True, False]:
       for dtype in [dtypes.float32, dtypes.float16]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           logits, targets, losses = self._Inputs(dtype=dtype)
           loss = nn_impl.sigmoid_cross_entropy_with_logits(
               labels=targets, logits=logits)
@@ -74,7 +74,7 @@ class SigmoidCrossEntropyWithLogitsTest(test.TestCase):
   def testLogisticOutputMultiDim(self):
     for use_gpu in [True, False]:
       for dtype in [dtypes.float32, dtypes.float16]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           logits, targets, losses = self._Inputs(dtype=dtype, sizes=[2, 2, 2])
           loss = nn_impl.sigmoid_cross_entropy_with_logits(
               labels=targets, logits=logits)
@@ -138,7 +138,7 @@ class WeightedCrossEntropyTest(test.TestCase):
 
   def testOutput(self):
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         logits, targets, pos_weight, losses = self._Inputs(dtype=dtypes.float32)
         loss = nn_impl.weighted_cross_entropy_with_logits(
             targets=targets, logits=logits, pos_weight=pos_weight)
@@ -148,7 +148,7 @@ class WeightedCrossEntropyTest(test.TestCase):
 
   def testOutputMultiDim(self):
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         logits, targets, pos_weight, losses = self._Inputs(
             dtype=dtypes.float32, sizes=[2, 2, 2])
         loss = nn_impl.weighted_cross_entropy_with_logits(
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 4ac2a8f634..f7fa264461 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -60,7 +60,7 @@ class Conv2DTest(test.TestCase):
     x2 = x2.astype(np.uint8).reshape(filter_in_sizes)
     x2_min = 0.0
     x2_max = 255.0
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtypes.quint8)
       t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtypes.quint8)
       conv = nn_ops.quantized_conv2d(
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
index d590bc4be6..0f3b04e4ad 100644
--- a/tensorflow/python/ops/quantized_ops_test.py
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -33,7 +33,7 @@ class QuantizedOpsTest(test.TestCase):
 
   def testQuantizeOp(self):
     expected_output = [1, 1, 2, 127, 255, 255]
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       x = constant_op.constant(
           [1.0, 1.25, 1.75, 127.0, 255.0, 500.0],
           shape=[6],
@@ -47,7 +47,7 @@ class QuantizedOpsTest(test.TestCase):
   def testDequantizeOp(self):
     expected_output = [1.0, 2.0, 4.0, 8.0, 16.0, 255.0]
     inp = np.array([1, 2, 4, 8, 16, 255]).astype(np.uint8)
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       x = constant_op.constant(inp, shape=[6], dtype=dtypes.quint8)
       x_min = 0.0
       x_max = 255.0
diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index 9bc4098d5b..b9dfc79311 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -39,7 +39,7 @@ class LBetaTest(test.TestCase):
     # Should evaluate to 1 and 1/2.
     x_one = [1, 1.]
     x_one_half = [2, 1.]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(
           1, self.evaluate(math_ops.exp(special_math_ops.lbeta(x_one))))
       self.assertAllClose(
@@ -50,7 +50,7 @@ class LBetaTest(test.TestCase):
     # Should evaluate to 1 and 1/2.
     x_one = [1, 1.]
     x_one_half = [2, 1.]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ph = array_ops.placeholder(dtypes.float32)
       beta_ph = math_ops.exp(special_math_ops.lbeta(ph))
       self.assertAllClose(1, beta_ph.eval(feed_dict={ph: x_one}))
@@ -65,7 +65,7 @@ class LBetaTest(test.TestCase):
     #     = Gamma(1) * Gamma(1) * Gamma(1) * Gamma(1) / Gamma(1 + 1 + 1 + 1)
     #     = 1 / 6
     expected_beta_x = 1 / 6 * np.ones((3, 2, 3))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x_ph = array_ops.placeholder(dtypes.float32, [3, 2, 3, None])
       beta_ph = math_ops.exp(special_math_ops.lbeta(x_ph))
       self.assertAllClose(expected_beta_x,
@@ -75,7 +75,7 @@ class LBetaTest(test.TestCase):
   def test_two_dimensional_arg(self):
     # Should evaluate to 1/2.
     x_one_half = [[2, 1.], [2, 1.]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(
           [0.5, 0.5],
           self.evaluate(math_ops.exp(special_math_ops.lbeta(x_one_half))))
@@ -84,7 +84,7 @@ class LBetaTest(test.TestCase):
   def test_two_dimensional_arg_dynamic(self):
     # Should evaluate to 1/2.
     x_one_half = [[2, 1.], [2, 1.]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ph = array_ops.placeholder(dtypes.float32)
       beta_ph = math_ops.exp(special_math_ops.lbeta(ph))
       self.assertAllClose([0.5, 0.5],
@@ -94,7 +94,7 @@ class LBetaTest(test.TestCase):
   def test_two_dimensional_proper_shape(self):
     # Should evaluate to 1/2.
     x_one_half = [[2, 1.], [2, 1.]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(
           [0.5, 0.5],
           self.evaluate(math_ops.exp(special_math_ops.lbeta(x_one_half))))
@@ -107,7 +107,7 @@ class LBetaTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def test_complicated_shape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = ops.convert_to_tensor(np.random.rand(3, 2, 2))
       self.assertAllEqual(
           (3, 2), self.evaluate(array_ops.shape(special_math_ops.lbeta(x))))
@@ -121,7 +121,7 @@ class LBetaTest(test.TestCase):
     # as the answer, always.
     x_a = [5.5]
     x_b = [0.1]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(
           1, self.evaluate(math_ops.exp(special_math_ops.lbeta(x_a))))
       self.assertAllClose(
@@ -130,7 +130,7 @@ class LBetaTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def test_empty_rank1_returns_negative_infinity(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([], shape=[0])
       lbeta_x = special_math_ops.lbeta(x)
       expected_result = constant_op.constant(-np.inf, shape=())
@@ -141,7 +141,7 @@ class LBetaTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def test_empty_rank2_with_zero_last_dim_returns_negative_infinity(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       event_size = 0
       for batch_size in [0, 1, 2]:
         x = constant_op.constant([], shape=[batch_size, event_size])
@@ -154,7 +154,7 @@ class LBetaTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def test_empty_rank2_with_zero_batch_dim_returns_empty(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       batch_size = 0
       for event_size in [0, 1, 2]:
         x = constant_op.constant([], shape=[batch_size, event_size])
@@ -333,7 +333,7 @@ class EinsumTest(test.TestCase):
     input_tensors = [constant_op.constant(val) for val in input_vals]
     output_tensor = special_math_ops.einsum(axes, *input_tensors)
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       output_value = self.evaluate(output_tensor)
 
     correct_value = np.einsum(axes, *input_vals)
-- 
GitLab


From 3075443673a80993e84c576a8a7d099741063aab Mon Sep 17 00:00:00 2001
From: Eddie Zhou <eddz@google.com>
Date: Fri, 12 Oct 2018 08:46:03 -0700
Subject: [PATCH 0863/1085] Make SDCA use VariableV1 (locking defaults change).

PiperOrigin-RevId: 216868252
---
 tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index b5099a0bf6..94ff1dd5b0 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -204,7 +204,7 @@ class SdcaModel(object):
             with ops.colocate_with(v):
               # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109
               # is fixed.
-              slot_var = var_ops.Variable(
+              slot_var = var_ops.VariableV1(
                   initial_value=array_ops.zeros_like(v.initialized_value(),
                                                      dtypes.float32),
                   name=v.op.name + '_unshrinked/SDCAOptimizer')
@@ -216,7 +216,7 @@ class SdcaModel(object):
             # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 is
             # fixed.
             self._slots['unshrinked_' + name].append(
-                var_ops.Variable(
+                var_ops.VariableV1(
                     array_ops.zeros_like(var.initialized_value(),
                                          dtypes.float32),
                     name=var.op.name + '_unshrinked/SDCAOptimizer'))
-- 
GitLab


From 51f0eb5849be0f9ce20e5eb8370158088711f19d Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Fri, 12 Oct 2018 08:51:50 -0700
Subject: [PATCH 0864/1085] Add methods to xla::Compiler which operate on an
 HloModuleGroup. Methods which currently take a std::vector<HloModule> now
 take a HloModuleGroup.

PiperOrigin-RevId: 216869028
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../xla/service/compile_only_service.cc       |  6 +++--
 tensorflow/compiler/xla/service/compiler.cc   |  4 +--
 tensorflow/compiler/xla/service/compiler.h    | 27 ++++++++++++++-----
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  7 +++--
 .../compiler/xla/service/cpu/cpu_compiler.h   |  2 +-
 .../xla/service/gpu/nvptx_compiler.cc         |  5 ++--
 .../compiler/xla/service/gpu/nvptx_compiler.h |  2 +-
 .../compiler/xla/service/hlo_module_group.cc  |  5 ++--
 .../compiler/xla/service/hlo_module_group.h   | 13 ++++++++-
 .../xla/service/hlo_module_group_test.cc      |  2 +-
 .../xla/service/interpreter/compiler.cc       | 23 +++++++++++++---
 .../xla/service/interpreter/compiler.h        | 11 ++++++--
 .../compiler/xla/service/llvm_compiler.cc     | 19 ++++++++++++-
 .../compiler/xla/service/llvm_compiler.h      | 11 +++++++-
 tensorflow/compiler/xla/service/service.cc    |  9 ++++---
 .../compiler/xla/tests/codegen_test_base.cc   |  5 ++--
 .../compiler/xla/tests/llvm_compiler_test.cc  | 13 ++++-----
 18 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 6c3b9764b7..7d03eba800 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -860,6 +860,7 @@ cc_library(
         ":executable",
         ":hlo",
         ":hlo_module_config",
+        ":hlo_module_group",
         ":logical_buffer",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index bd5045b9b9..c9b0e4c08c 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -103,8 +103,10 @@ CompileOnlyService::CompileAheadOfTime(
     hlo_modules.push_back(std::move(hlo_module));
   }
 
-  return compiler_->CompileAheadOfTime(std::move(hlo_modules), options,
-                                       metadata);
+  return compiler_->CompileAheadOfTime(
+      absl::make_unique<HloModuleGroup>(hlo_modules[0]->name(),
+                                        absl::MakeSpan(hlo_modules)),
+      options, metadata);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc
index 687ecafe0c..80c630c620 100644
--- a/tensorflow/compiler/xla/service/compiler.cc
+++ b/tensorflow/compiler/xla/service/compiler.cc
@@ -45,7 +45,7 @@ Compiler::ComputeDefaultBackendConfig(const HloInstruction& hlo,
 // Define a default version where metadata is not used.
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 Compiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     const AotCompilationOptions& options,
     std::unique_ptr<AotCompilationMetadata>* metadata) {
   if (metadata != nullptr) {
@@ -53,7 +53,7 @@ Compiler::CompileAheadOfTime(
         "Populating AotCompilationMetadata is not implemented on this "
         "compiler.");
   }
-  return CompileAheadOfTime(std::move(modules), options);
+  return CompileAheadOfTime(std::move(module_group), options);
 }
 
 /* static */ std::map<se::Platform::Id, Compiler::CompilerFactory>*
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index 1fdda31c34..9d9796239c 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -135,6 +136,13 @@ class Compiler {
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
+  // Overload which optimizes a HLO module group, a set of module which runs
+  // concurrently on multiple devices potentially communicating data between the
+  // modules.
+  virtual Status RunHloPasses(HloModuleGroup* module_group,
+                              se::StreamExecutor* executor,
+                              DeviceMemoryAllocator* device_allocator) = 0;
+
   // Compiles the HLO module for execution on a device given by the executor,
   // and returns an executable object or an error status. No HLO passes are
   // applied to module. Generally a module should be passed through RunHloPasses
@@ -145,12 +153,17 @@ class Compiler {
   // (not just type of device) indicated by the executor.
   //
   // device_allocator is optional; see RunHloPasses.
-  //
-  // Use the overload below to compile computations that run in parallel.
   virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
+  // Overload which compiles a set of HLO modules that can run in parallel,
+  // potentially communicating data between the modules.
+  virtual StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) = 0;
+
   // Compiles a set of HLO modules that can run in parallel, potentially
   // communicating data between the modules, and returns a corresponding
   // sequence of executable objects.
@@ -160,7 +173,7 @@ class Compiler {
   // TODO(b/68666782): Remove this method after adding support for multiple
   // modules to RunHloPasses and RunBackends.
   virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) = 0;
 
@@ -184,16 +197,16 @@ class Compiler {
   ComputeDefaultBackendConfig(const HloInstruction& hlo,
                               se::StreamExecutor* executor) const;
 
-  // Compiles the HLO module for ahead-of-time execution.  This is intended for
-  // use in static compilation.
+  // Compiles the HLO module group for ahead-of-time execution.  This is
+  // intended for use in static compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options) = 0;
 
   // Similar to CompileAheadOfTime above but AotCompilationMetadata
   // has an argument that can be populated during compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options,
                      std::unique_ptr<AotCompilationMetadata>* metadata);
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 68c715a086..da01c0caf2 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -676,9 +676,12 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                                 const AotCompilationOptions& aot_options) {
-  TF_RET_CHECK(!modules.empty());
+  TF_RET_CHECK(!module_group->empty());
+  std::vector<std::unique_ptr<HloModule>> modules =
+      module_group->ConsumeModules();
+
   std::call_once(llvm_command_line_options_initialized,
                  &llvm_ir::InitializeLLVMCommandLineOptions,
                  modules[0]->config());
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
index f2af923782..c67307548d 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
@@ -142,7 +142,7 @@ class CpuCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 829d1499bc..791d414c91 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -825,9 +825,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx,
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-NVPTXCompiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> module,
-    const AotCompilationOptions& options) {
+NVPTXCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+                                  const AotCompilationOptions& options) {
   return Unimplemented(
       "not yet implemented: NVPTXCompiler::CompileAheadOfTime");
 }
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index c4a0b727cd..f79ae2990a 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -59,7 +59,7 @@ class NVPTXCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> module,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      AotCompilationOptions const& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc
index f9b56ef464..8999ac9f32 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group.cc
@@ -17,9 +17,8 @@ limitations under the License.
 
 namespace xla {
 
-HloModuleGroup::HloModuleGroup(absl::string_view name,
-                               std::unique_ptr<HloModule> module)
-    : name_(name) {
+HloModuleGroup::HloModuleGroup(std::unique_ptr<HloModule> module)
+    : name_(module->name()) {
   push_back(std::move(module));
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h
index 7338be8b9c..7c39cf1781 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group.h
@@ -35,7 +35,7 @@ class HloModuleGroup {
   explicit HloModuleGroup(absl::string_view name) : name_(name) {}
 
   // Construct a module group containing a single module.
-  HloModuleGroup(absl::string_view name, std::unique_ptr<HloModule> module);
+  explicit HloModuleGroup(std::unique_ptr<HloModule> module);
 
   // Construct a module group containing any number of modules.
   HloModuleGroup(absl::string_view name,
@@ -50,11 +50,16 @@ class HloModuleGroup {
   // Add a module to the back of vector of modules in the group.
   void push_back(std::unique_ptr<HloModule> module);
 
+  // Replaces the existing module at the given index with the given module. The
+  // existing module is discarded.
+  void ReplaceModule(int index, std::unique_ptr<HloModule> module);
+
   // Moves all modules from the group into the returned vector. After this
   // method runs, the module group will be empty.
   std::vector<std::unique_ptr<HloModule>> ConsumeModules();
 
   string name() const { return name_; }
+
   string ToString() const;
 
   // Serialize the module group to/from a proto.
@@ -63,6 +68,12 @@ class HloModuleGroup {
       const HloModuleGroupProto& proto,
       absl::Span<const HloModuleConfig> module_configs);
 
+  // Returns the number of modules in the module group.
+  int size() const { return modules_.size(); }
+
+  // Returns true if there are no modules in the module group.
+  bool empty() const { return modules_.empty(); }
+
  private:
   string name_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
index b7b12cb72b..5a9a86af56 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -46,7 +46,7 @@ ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
 )";
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                           ParseHloString(text));
-  HloModuleGroup group(TestName(), std::move(module));
+  HloModuleGroup group(std::move(module));
 
   EXPECT_EQ(group.modules().size(), 1);
   EXPECT_THAT(
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 7c79eb7d79..60478e5850 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -57,6 +57,12 @@ StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses(
   return std::move(hlo_module);
 }
 
+Status InterpreterCompiler::RunHloPasses(
+    HloModuleGroup* module_group, se::StreamExecutor* executor,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented("Module group compilation not supported on Interpreter");
+}
+
 StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
     std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
     DeviceMemoryAllocator* /*device_allocator*/) {
@@ -76,17 +82,26 @@ StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
   return std::move(executable);
 }
 
+StatusOr<std::vector<std::unique_ptr<Executable>>>
+InterpreterCompiler::RunBackend(
+    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Module group compilation is not supported on Interpreter.");
+}
+
 StatusOr<std::vector<std::unique_ptr<Executable>>> InterpreterCompiler::Compile(
-    std::vector<std::unique_ptr<HloModule>> /*hlo_modules*/,
+    std::unique_ptr<HloModuleGroup> /*module_group*/,
     std::vector<std::vector<se::StreamExecutor*>> /*stream_execs*/,
     DeviceMemoryAllocator* /*device_allocator*/) {
-  return tensorflow::errors::Unimplemented(
-      "Compilation of multiple HLO modules is not supported on Interpreter.");
+  return Unimplemented(
+      "Module group compilation is not supported on Interpreter.");
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 InterpreterCompiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> hlo_modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     const AotCompilationOptions& aot_options) {
   return tensorflow::errors::InvalidArgument(
       "AOT compilation not supported on Interpreter");
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h
index e90ae3e818..f14970f9ff 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.h
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.h
@@ -46,18 +46,25 @@ class InterpreterCompiler : public Compiler {
   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
+  Status RunHloPasses(HloModuleGroup* module_group,
+                      se::StreamExecutor* executor,
+                      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
+  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> hlo_modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> hlo_modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& aot_options) override;
 
   HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override;
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc
index b17c9d5045..6a63309762 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.cc
+++ b/tensorflow/compiler/xla/service/llvm_compiler.cc
@@ -21,8 +21,23 @@ limitations under the License.
 #endif
 
 namespace xla {
+Status LLVMCompiler::RunHloPasses(HloModuleGroup* module_group,
+                                  se::StreamExecutor* executor,
+                                  DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Model partitioning not implemented for the CPU/GPU compilers!");
+}
+
+StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::RunBackend(
+    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Model partitioning not implemented for the CPU/GPU compilers!");
+}
+
 StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
-    std::vector<std::unique_ptr<HloModule>> modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     std::vector<std::vector<se::StreamExecutor*>> stream_execs,
     DeviceMemoryAllocator* device_allocator) {
   // Tensorflow tries to enable the following behaviors in all its threads:
@@ -38,6 +53,8 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
   tensorflow::port::ScopedDontFlushDenormal dont_flush_denormals;
 
   std::vector<std::unique_ptr<Executable>> result;
+  std::vector<std::unique_ptr<HloModule>> modules =
+      module_group->ConsumeModules();
   for (size_t i = 0; i < modules.size(); i++) {
     if (stream_execs[i].size() != 1) {
       return Unimplemented(
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h
index f1c623508c..299f670c18 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.h
+++ b/tensorflow/compiler/xla/service/llvm_compiler.h
@@ -69,8 +69,17 @@ class LLVMCompiler : public Compiler {
   using Compiler::RunBackend;
   using Compiler::RunHloPasses;
 
+  Status RunHloPasses(HloModuleGroup* module_group,
+                      se::StreamExecutor* executor,
+                      DeviceMemoryAllocator* device_allocator) override;
+
+  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) override;
+
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_execs,
       DeviceMemoryAllocator* device_allocator) override;
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index d290c0eb5d..cb6a9e6707 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -341,18 +341,19 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
   }
 
   CHECK_EQ(module_protos.size(), module_configs.size());
-  std::vector<std::unique_ptr<HloModule>> modules;
+  auto module_group =
+      absl::make_unique<HloModuleGroup>(module_protos[0]->name());
   for (int64 i = 0; i < module_protos.size(); ++i) {
     const HloModuleProto* proto = module_protos[i];
     const HloModuleConfig& config = *module_configs[i];
     TF_ASSIGN_OR_RETURN(auto module, CreateModuleFromProto(*proto, config));
-    modules.push_back(std::move(module));
+    module_group->push_back(std::move(module));
   }
 
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<Executable>> executables,
-      backend->compiler()->Compile(std::move(modules), std::move(executors),
-                                   device_allocator));
+      backend->compiler()->Compile(std::move(module_group),
+                                   std::move(executors), device_allocator));
 
   for (size_t i = 0; i < module_protos.size(); ++i) {
     if (!module_configs[i]->debug_options().xla_dump_executions_to().empty()) {
diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc
index 022641394f..fbebe04087 100644
--- a/tensorflow/compiler/xla/tests/codegen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc
@@ -32,11 +32,10 @@ StatusOr<std::unique_ptr<AotCompilationResult>>
 CodegenTestBase::CompileToAotCompilationResult(
     std::unique_ptr<HloModule> hlo_module,
     const AotCompilationOptions& options) {
-  std::vector<std::unique_ptr<HloModule>> hlo_modules;
-  hlo_modules.push_back(std::move(hlo_module));
+  auto module_group = absl::make_unique<HloModuleGroup>(std::move(hlo_module));
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<AotCompilationResult>> results,
-      backend().compiler()->CompileAheadOfTime(std::move(hlo_modules),
+      backend().compiler()->CompileAheadOfTime(std::move(module_group),
                                                options));
   return std::move(results.front());
 }
diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
index 8d65869557..c622b29509 100644
--- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
+++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
@@ -93,15 +93,16 @@ class LLVMCompilerTest : public ::testing::Test {
     std::unique_ptr<HloModule> hlo_module = CreateNewModule();
     hlo_module->AddEntryComputation(builder.Build());
 
-    std::vector<std::unique_ptr<HloModule>> modules;
-    modules.push_back(hlo_module->Clone());
-    modules.push_back(std::move(hlo_module));
+    auto module_group = absl::make_unique<HloModuleGroup>("test_module_group");
+    module_group->push_back(hlo_module->Clone());
+    module_group->push_back(std::move(hlo_module));
 
     std::vector<std::vector<se::StreamExecutor *>> executors;
     executors.push_back({backend_->default_stream_executor()});
     executors.push_back({backend_->default_stream_executor()});
 
-    EXPECT_IS_OK(compiler->Compile(std::move(modules), std::move(executors),
+    EXPECT_IS_OK(compiler->Compile(std::move(module_group),
+                                   std::move(executors),
                                    /*device_allocator=*/nullptr));
   }
 
@@ -150,12 +151,12 @@ TEST_F(GpuCompilerTest, HooksTest) {
   TestCompilerHooks(&compiler);
 }
 
-TEST_F(CpuCompilerTest, MultiModuleCompilation) {
+TEST_F(CpuCompilerTest, CpuMultiModuleCompilation) {
   cpu::CpuCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
 
-TEST_F(GpuCompilerTest, MultModuleCompilation) {
+TEST_F(GpuCompilerTest, NVPTXMultiModuleCompilation) {
   gpu::NVPTXCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
-- 
GitLab


From 9fa9cd1c1c2cf393e8ff7df3876cd11f086e4821 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 09:18:16 -0700
Subject: [PATCH 0865/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216872436
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 39 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 39 +++++++++++++++++++
 2 files changed, 78 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 80d4528cd7..98ac343d21 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -44092,6 +44092,45 @@ op {
     has_minimum: true
   }
 }
+op {
+  name: "RaggedRange"
+  input_arg {
+    name: "starts"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "limits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "deltas"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "rt_nested_splits"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "rt_dense_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index d8814355fe..8b5881a850 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -22440,6 +22440,45 @@ op {
     has_minimum: true
   }
 }
+op {
+  name: "RaggedRange"
+  input_arg {
+    name: "starts"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "limits"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "deltas"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "rt_nested_splits"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "rt_dense_values"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "RandomCrop"
   input_arg {
-- 
GitLab


From d14c6ed4ead3685638341b650641940e6190a0d3 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Fri, 12 Oct 2018 09:45:03 -0700
Subject: [PATCH 0866/1085] Clean out MKL_ML code from backward conv2D ops

---
 .../core/kernels/mkl_conv_grad_filter_ops.cc  | 394 +-----------------
 .../core/kernels/mkl_conv_grad_input_ops.cc   | 324 --------------
 tensorflow/core/kernels/mkl_conv_ops.h        |   8 -
 3 files changed, 1 insertion(+), 725 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index f406ad2ab5..eebd788545 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -38,25 +38,17 @@ limitations under the License.
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
 
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
+#include "tensorflow/core/util/mkl_util.h"
 
 using mkldnn::convolution_backward_weights;
 using mkldnn::memory;
 using mkldnn::prop_kind;
 using mkldnn::stream;
-#else
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
-
-#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifndef INTEL_MKL_ML_ONLY
-
 struct MklConvBwdFilterParams {
   memory::dims src_dims;
   memory::dims diff_filter_dims;
@@ -358,388 +350,6 @@ class MklConvBwdFilterPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 };
 
-#endif
-
-#ifdef INTEL_MKL_ML_ONLY
-
-template <typename Device, class T>
-class MklConv2DCustomBackpropFilterOp : public OpKernel {
- public:
-  explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
-                errors::InvalidArgument("Invalid data format"));
-
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    int stride_n = GetTensorDim(strides_, data_format_, 'N');
-    int stride_c = GetTensorDim(strides_, data_format_, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    MklConv2DGradFilterOpContext mkl_context;
-    const Tensor& input = MklGetInput(context, 0);
-    GetMklShape(context, 0, &(mkl_context.input_shape));
-    bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor();
-
-    const Tensor& filter_sizes = MklGetInput(context, 1);
-
-    const Tensor& out_backprop = MklGetInput(context, 2);
-    GetMklShape(context, 2, &(mkl_context.out_backprop_shape));
-    bool out_backprop_in_mkl_format =
-        mkl_context.out_backprop_shape.IsMklTensor();
-
-    TensorShape input_shape, filter_shape, out_backprop_shape;
-
-    OP_REQUIRES(
-        context, TensorShapeUtils::IsVector(filter_sizes.shape()),
-        errors::InvalidArgument(
-            "Conv2DCustomBackpropFilter: filter_sizes input must be 1-dim, "
-            "not ",
-            filter_sizes.dims()));
-    OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                filter_sizes.vec<int32>(), &filter_shape));
-
-    ConvBackpropDimensions backprop_dims;
-
-    // Generate shape for input if input is in MKL format.
-    if (input_in_mkl_format) {
-      OP_REQUIRES(context, mkl_context.input_shape.GetDimension() == 4,
-                  errors::InvalidArgument(
-                      "Conv2DCustomBackpropFilter: input size must be 4-dim"));
-
-      MklSizesToTFSizes(context, data_format_, mkl_context.input_shape,
-                        &input_shape);
-    } else {
-      input_shape = input.shape();
-    }
-
-    // Generate shape for outback prop if input is in MKL format.
-    if (out_backprop_in_mkl_format) {
-      OP_REQUIRES(
-          context, mkl_context.out_backprop_shape.GetDimension() == 4,
-          errors::InvalidArgument(
-              "Conv2DCustomBackpropFilter: outbackprop size must be 4-dim"));
-
-      MklSizesToTFSizes(context, data_format_, mkl_context.out_backprop_shape,
-                        &out_backprop_shape);
-    } else {
-      out_backprop_shape = out_backprop.shape();
-    }
-
-    OP_REQUIRES_OK(context,
-                   ConvBackpropComputeDimensions(
-                       "Conv2DCustomBackpropFilter", /*num_spatial_dims=*/2,
-                       input_shape, filter_shape, out_backprop_shape, strides_,
-                       padding_, data_format_, &backprop_dims));
-
-    int64 pad_top, pad_bottom;
-    int64 pad_left, pad_right;
-    OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
-                                backprop_dims.spatial_dims[0].input_size,
-                                backprop_dims.spatial_dims[0].filter_size,
-                                backprop_dims.spatial_dims[0].stride, padding_,
-                                &backprop_dims.spatial_dims[0].output_size,
-                                &pad_top, &pad_bottom));
-    OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
-                                backprop_dims.spatial_dims[1].input_size,
-                                backprop_dims.spatial_dims[1].filter_size,
-                                backprop_dims.spatial_dims[1].stride, padding_,
-                                &backprop_dims.spatial_dims[1].output_size,
-                                &pad_left, &pad_right));
-
-    // Create MKL primitives for convolution filter grad
-    mkl_context.in_dims = input_in_mkl_format
-                              ? mkl_context.input_shape.GetDimension()
-                              : input.dims();
-    mkl_context.out_dims = out_backprop_in_mkl_format
-                               ? mkl_context.out_backprop_shape.GetDimension()
-                               : out_backprop.dims();
-    mkl_context.in_sizes[0] =
-        static_cast<size_t>(backprop_dims.spatial_dims[1].input_size);
-    mkl_context.in_sizes[1] =
-        static_cast<size_t>(backprop_dims.spatial_dims[0].input_size);
-    mkl_context.in_sizes[2] = static_cast<size_t>(backprop_dims.in_depth);
-    mkl_context.in_sizes[3] = static_cast<size_t>(backprop_dims.batch_size);
-    mkl_context.out_sizes[0] =
-        static_cast<size_t>(backprop_dims.spatial_dims[1].output_size);
-    mkl_context.out_sizes[1] =
-        static_cast<size_t>(backprop_dims.spatial_dims[0].output_size);
-    mkl_context.out_sizes[2] = static_cast<size_t>(backprop_dims.out_depth);
-    mkl_context.out_sizes[3] = static_cast<size_t>(backprop_dims.batch_size);
-    mkl_context.input_offsets[0] = static_cast<int>(-pad_left);
-    mkl_context.input_offsets[1] = static_cast<int>(-pad_top);
-    mkl_context.conv_strides[0] =
-        static_cast<size_t>(backprop_dims.spatial_dims[1].stride);
-    mkl_context.conv_strides[1] =
-        static_cast<size_t>(backprop_dims.spatial_dims[0].stride);
-
-    GetStridesFromSizes(data_format_, mkl_context.in_strides,
-                        mkl_context.in_sizes);
-    GetStridesFromSizes(data_format_, mkl_context.out_strides,
-                        mkl_context.out_sizes);
-
-    // MKL understands dimensions in 0, 1, 2, and 3 indices denotes
-    // filter cols, rows, input channels, and output depth/channels.
-    mkl_context.filter_dims = 4;
-    mkl_context.filter_sizes[0] = backprop_dims.spatial_dims[1].filter_size;
-    mkl_context.filter_sizes[1] = backprop_dims.spatial_dims[0].filter_size;
-    mkl_context.filter_sizes[2] = backprop_dims.in_depth;
-    mkl_context.filter_sizes[3] = backprop_dims.out_depth;
-
-    // We want filter grad to be in TF format, so
-    // make the strides accordingly to reflect this fact.
-    // Note TF filter layout : (rows, cols, in_depth, out_depth),
-    // while row is the innermost dimension.
-    mkl_context.filter_strides[0] =
-        backprop_dims.out_depth * backprop_dims.in_depth;
-    mkl_context.filter_strides[1] = backprop_dims.out_depth *
-                                    backprop_dims.in_depth *
-                                    backprop_dims.spatial_dims[1].filter_size;
-    mkl_context.filter_strides[2] = backprop_dims.out_depth;
-    mkl_context.filter_strides[3] = 1;
-
-    mkl_context.conv_strides[0] = backprop_dims.spatial_dims[1].stride;
-    mkl_context.conv_strides[1] = backprop_dims.spatial_dims[0].stride;
-
-    // Create convolution-grad-filter primitive
-    CHECK_EQ(dnnConvolutionCreateBackwardFilter_F32(
-                 &mkl_context.prim_conv_bwdfilter, nullptr,
-                 dnnAlgorithmConvolutionDirect, mkl_context.in_dims,
-                 mkl_context.in_sizes, mkl_context.out_sizes,
-                 mkl_context.filter_sizes, mkl_context.conv_strides,
-                 mkl_context.input_offsets, dnnBorderZeros),
-             E_SUCCESS);
-
-    // Create the layouts for entities in received context.
-    mkl_context.MklCreateInputLayouts(context);
-
-    // Mkl needs the entities in its native format.
-    // So create temporary tensors along with buffers to
-    // convert the received entities.
-    Tensor mkl_tmp_input_buf_tensor, mkl_tmp_out_backprop_buf_tensor;
-    // This preparation sets (1) dnnResourceSrc (2) dnnResourceDiffDst
-    mkl_context.MklPrepareInputs(context, &mkl_tmp_input_buf_tensor,
-                                 &mkl_tmp_out_backprop_buf_tensor);
-
-    // Final conv-grad-filter should be in TF layout.
-    Tensor* grad_filter;
-    mkl_context.grad_filter_shape.SetMklTensor(false);
-    mkl_context.grad_filter_shape.SetTfLayout(mkl_context.filter_dims,
-                                              mkl_context.filter_sizes,
-                                              mkl_context.filter_strides);
-    AllocateOutputSetMklShape(context, 0, &grad_filter, filter_shape,
-                              mkl_context.grad_filter_shape);
-
-    // Need to set member variable for TF layout
-    mkl_context.lt_grad_filter = mkl_context.grad_filter_shape.GetTfLayout();
-
-    // MKL conv-grad-filter might produce grad in its internal layout
-    Tensor mkl_tmp_grad_filter_buf_tensor;
-    // This preparation sets conversion primitive if required
-    // and allocates temporary tensor and its buffer without doing conversions.
-    // Also sets (3) dnnResourceDiffFilter accordingly
-    mkl_context.MklPrepareGradFilter(context, grad_filter,
-                                     &mkl_tmp_grad_filter_buf_tensor);
-
-    // After setting all the required dnnResources, ready for execution!
-    CHECK_EQ(
-        dnnExecute_F32(mkl_context.prim_conv_bwdfilter, mkl_context.conv_res),
-        E_SUCCESS);
-
-    // Convert grad-filter to TF layout
-    if (mkl_context.convert_bwdfilter != nullptr) {
-      void* mkl_buf_convert_grad_filter =
-          const_cast<void*>(static_cast<const void*>(
-              mkl_tmp_grad_filter_buf_tensor.flat<T>().data()));
-      void* mkl_buf_grad_filter = const_cast<void*>(
-          static_cast<const void*>(grad_filter->flat<T>().data()));
-      CHECK_EQ(dnnConversionExecute_F32(mkl_context.convert_bwdfilter,
-                                        mkl_buf_convert_grad_filter,
-                                        mkl_buf_grad_filter),
-               E_SUCCESS);
-    }
-
-    mkl_context.MklCleanup();
-  }
-
- private:
-  typedef struct {
-    int in_dims;
-    size_t in_sizes[4];
-    size_t in_strides[4];
-    int out_dims;
-    size_t out_sizes[4];
-    size_t out_strides[4];
-    int filter_dims;
-    size_t filter_sizes[4];
-    size_t filter_strides[4];
-    int input_offsets[2];
-    size_t conv_strides[2];
-    MklShape input_shape, grad_filter_shape, out_backprop_shape;
-    dnnPrimitive_t prim_conv_bwdfilter = nullptr;
-    dnnPrimitive_t convert_bwdfilter = nullptr;
-    dnnLayout_t lt_input = nullptr;
-    dnnLayout_t lt_grad_filter = nullptr;
-    dnnLayout_t lt_out_backprop = nullptr;
-    void* conv_res[dnnResourceNumber];
-
-    void MklCleanup() {
-      // Cleanup member layouts and primitives except "lt_grad_filter_"
-      // which points to MklShape's TFLayout
-      bool input_in_mkl_format = input_shape.IsMklTensor();
-      bool out_backprop_in_mkl_format = out_backprop_shape.IsMklTensor();
-      if (!input_in_mkl_format) dnnLayoutDelete_F32(lt_input);
-      if (!out_backprop_in_mkl_format) dnnLayoutDelete_F32(lt_out_backprop);
-      if (convert_bwdfilter != nullptr) dnnDelete_F32(convert_bwdfilter);
-      dnnDelete_F32(prim_conv_bwdfilter);
-    }
-
-    // Create MKL dnnLayout_t objects for tensors coming into the layer
-    void MklCreateInputLayouts(OpKernelContext* context) {
-      bool input_in_mkl_format = input_shape.IsMklTensor();
-      if (input_in_mkl_format) {
-        lt_input = static_cast<dnnLayout_t>(input_shape.GetCurLayout());
-      } else {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_input, in_dims, in_sizes, in_strides),
-                 E_SUCCESS);
-      }
-
-      bool out_backprop_in_mkl_format = out_backprop_shape.IsMklTensor();
-      if (out_backprop_in_mkl_format) {
-        lt_out_backprop =
-            static_cast<dnnLayout_t>(out_backprop_shape.GetCurLayout());
-      } else {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_out_backprop, out_dims, out_sizes,
-                                     out_strides),
-                 E_SUCCESS);
-      }
-    }
-
-    // Compare incoming tensor layouts with MKL preferred layouts and convert
-    // data to the preferred layout if necessary
-    void MklPrepareInputs(OpKernelContext* context,
-                          Tensor* mkl_tmp_input_buf_tensor,
-                          Tensor* mkl_tmp_out_backprop_buf_tensor) {
-      bool mkl_convert_input, mkl_convert_out_backprop;
-      dnnPrimitive_t mkl_prim_convert_input, mkl_prim_convert_out_backprop;
-      dnnLayout_t mkl_lt_internal_input, mkl_lt_internal_out_backprop;
-      void *mkl_buf_convert_input, *mkl_buf_convert_out_backprop;
-
-      mkl_prim_convert_input = nullptr;
-      mkl_prim_convert_out_backprop = nullptr;
-      mkl_lt_internal_input = nullptr;
-      mkl_lt_internal_out_backprop = nullptr;
-      mkl_buf_convert_input = nullptr;
-      mkl_buf_convert_out_backprop = nullptr;
-
-      // Compare with internal layouts and convert if needed
-      const Tensor& input = MklGetInput(context, 0);
-      void* mkl_buf_input =
-          const_cast<void*>(static_cast<const void*>(input.flat<T>().data()));
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
-                   &mkl_lt_internal_input, prim_conv_bwdfilter, dnnResourceSrc),
-               E_SUCCESS);
-      mkl_convert_input =
-          !dnnLayoutCompare_F32(mkl_lt_internal_input, lt_input);
-      if (mkl_convert_input) {
-        CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, lt_input,
-                                         mkl_lt_internal_input),
-                 E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input,
-                       &mkl_buf_convert_input);
-        CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input,
-                                          mkl_buf_convert_input),
-                 E_SUCCESS);
-        dnnDelete_F32(mkl_prim_convert_input);
-      }
-      dnnLayoutDelete_F32(mkl_lt_internal_input);
-
-      conv_res[dnnResourceSrc] =
-          (mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input;
-
-      const Tensor& out_backprop = MklGetInput(context, 2);
-      void* mkl_buf_out_backprop = const_cast<void*>(
-          static_cast<const void*>(out_backprop.flat<T>().data()));
-
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_out_backprop,
-                                                prim_conv_bwdfilter,
-                                                dnnResourceDiffDst),
-               E_SUCCESS);
-      mkl_convert_out_backprop =
-          !dnnLayoutCompare_F32(mkl_lt_internal_out_backprop, lt_out_backprop);
-      if (mkl_convert_out_backprop) {
-        CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_out_backprop,
-                                         lt_out_backprop,
-                                         mkl_lt_internal_out_backprop),
-                 E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_out_backprop_buf_tensor,
-                       lt_out_backprop, &mkl_buf_convert_out_backprop);
-        CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_out_backprop,
-                                          mkl_buf_out_backprop,
-                                          mkl_buf_convert_out_backprop),
-                 E_SUCCESS);
-        dnnDelete_F32(mkl_prim_convert_out_backprop);
-      }
-      dnnLayoutDelete_F32(mkl_lt_internal_out_backprop);
-
-      conv_res[dnnResourceDiffDst] = (mkl_convert_out_backprop)
-                                         ? mkl_buf_convert_out_backprop
-                                         : mkl_buf_out_backprop;
-    }
-
-    void MklPrepareGradFilter(OpKernelContext* context, Tensor* grad_filter,
-                              Tensor* mkl_tmp_grad_filter_buf_tensor) {
-      bool mkl_convert_grad_filter;
-      dnnLayout_t mkl_lt_internal_grad_filter = nullptr;
-      void* mkl_buf_convert_grad_filter = nullptr;
-      void* mkl_buf_grad_filter = const_cast<void*>(
-          static_cast<const void*>(grad_filter->flat<T>().data()));
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_grad_filter,
-                                                prim_conv_bwdfilter,
-                                                dnnResourceDiffFilter),
-               E_SUCCESS);
-      mkl_convert_grad_filter =
-          !dnnLayoutCompare_F32(mkl_lt_internal_grad_filter, lt_grad_filter);
-      if (mkl_convert_grad_filter) {
-        CHECK_EQ(dnnConversionCreate_F32(&convert_bwdfilter,
-                                         mkl_lt_internal_grad_filter,
-                                         lt_grad_filter),
-                 E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_grad_filter_buf_tensor,
-                       mkl_lt_internal_grad_filter,
-                       &mkl_buf_convert_grad_filter);
-      }
-      dnnLayoutDelete_F32(mkl_lt_internal_grad_filter);
-
-      conv_res[dnnResourceDiffFilter] = (mkl_convert_grad_filter)
-                                            ? mkl_buf_convert_grad_filter
-                                            : mkl_buf_grad_filter;
-    }
-  } MklConv2DGradFilterOpContext;
-
-  std::vector<int32> strides_;
-  Padding padding_;
-  TensorFormat data_format_;
-};
-
-#define REGISTER_MKL_FILTER_KERNELS(T)                              \
-  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter")          \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropFilterOp<CPUDevice, T>);
-TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
-#undef REGISTER_MKL_FILTER_KERNELS
-
-#else
-
 template <typename Device, class T, bool biasEnabled>
 class MklConvCustomBackpropFilterOp
     : public MklConvBackpropCommonOp<Device, T> {
@@ -1080,8 +690,6 @@ class MklConvCustomBackpropFilterOp
 TF_CALL_float(REGISTER_MKL_FILTER_KERNELS);
 #undef REGISTER_MKL_FILTER_KERNELS
 
-#endif  // INTEL_MKL_ML_ONLY
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index a501ce2c93..1f9e6abe44 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -23,10 +23,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <vector>
-#ifdef INTEL_MKL_ML_ONLY
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-#endif
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -46,19 +42,15 @@ limitations under the License.
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
 
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
 
 using mkldnn::convolution_backward_data;
 using mkldnn::prop_kind;
 using mkldnn::stream;
-#endif
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifndef INTEL_MKL_ML_ONLY
-
 /// utility classes enabling primitive reuse for backward conv ops.
 struct MklConvBwdInputParams {
   memory::dims diff_src_dims;
@@ -293,320 +285,6 @@ class MklConvBwdInputPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 };
 
-#endif
-
-#ifdef INTEL_MKL_ML_ONLY
-
-template <typename Device, class T>
-class MklConv2DCustomBackpropInputOp : public OpKernel {
- public:
-  ~MklConv2DCustomBackpropInputOp() {}
-  explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    string dataformat;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &dataformat));
-    OP_REQUIRES(context, FormatFromString(dataformat, &data_format),
-                errors::InvalidArgument("Invalid data format"));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides));
-    int stride_n = GetTensorDim(strides, data_format, 'N');
-    int stride_c = GetTensorDim(strides, data_format, 'C');
-    OP_REQUIRES(
-        context, (stride_n == 1 && stride_c == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    MklConvBackInputOpContext mkl_context;
-    const Tensor& input = MklGetInput(context, 0);
-    const Tensor& filter = MklGetInput(context, 1);
-
-    GetMklShape(context, 1, &(mkl_context.filter_shape));
-    bool filter_in_mkl_format = mkl_context.filter_shape.IsMklTensor();
-
-    const Tensor& out_backprop = MklGetInput(context, 2);
-    GetMklShape(context, 2, &(mkl_context.outback_shape));
-    bool outback_in_mkl_format = mkl_context.outback_shape.IsMklTensor();
-
-    TensorShape input_shape, filter_shape, outback_shape;
-
-    // Generate input shape.
-    OP_REQUIRES(
-        context, TensorShapeUtils::IsVector(input.shape()),
-        errors::InvalidArgument(
-            "Conv2DBackpropInput: input_sizes input must be 1-dim, not ",
-            input.dims()));
-    OP_REQUIRES_OK(
-        context, TensorShapeUtils::MakeShape(input.vec<int32>(), &input_shape));
-
-    // Generate shape for filter prop if input is in MKL format.
-    if (filter_in_mkl_format) {
-      OP_REQUIRES(context, mkl_context.filter_shape.GetDimension() == 4,
-                  errors::InvalidArgument(
-                      "Conv2DCustomBackpropInput: size must be 4-dim"));
-
-      const int64* filter_sizes =
-          (const int64*)mkl_context.filter_shape.GetSizes();
-      const int64 filter_dims = mkl_context.filter_shape.GetDimension();
-
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                  filter_sizes, filter_dims, &filter_shape));
-    } else {
-      filter_shape = filter.shape();
-    }
-
-    // Generate shape for outback prop if input is in MKL format.
-    if (outback_in_mkl_format) {
-      OP_REQUIRES(context, mkl_context.outback_shape.GetDimension() == 4,
-                  errors::InvalidArgument(
-                      "Conv2DCustomBackpropInput: size must be 4-dim"));
-
-      MklSizesToTFSizes(context, data_format, mkl_context.outback_shape,
-                        &outback_shape);
-    } else {
-      outback_shape = out_backprop.shape();
-    }
-
-    ConvBackpropDimensions dims;
-    OP_REQUIRES_OK(
-        context,
-        ConvBackpropComputeDimensions(
-            "Conv2DCustomBackpropInput", /*num_spatial_dims=*/2, input_shape,
-            filter_shape, outback_shape, strides, padding, data_format, &dims));
-
-    int64 pad_top, pad_bottom;
-    int64 pad_left, pad_right;
-    OP_REQUIRES_OK(
-        context,
-        GetWindowedOutputSizeVerbose(
-            dims.spatial_dims[0].input_size, dims.spatial_dims[0].filter_size,
-            dims.spatial_dims[0].stride, padding,
-            &dims.spatial_dims[0].output_size, &pad_top, &pad_bottom));
-    OP_REQUIRES_OK(
-        context,
-        GetWindowedOutputSizeVerbose(
-            dims.spatial_dims[1].input_size, dims.spatial_dims[1].filter_size,
-            dims.spatial_dims[1].stride, padding,
-            &dims.spatial_dims[1].output_size, &pad_left, &pad_right));
-
-    mkl_context.in_dims = 4;
-
-    mkl_context.in_sizes[0] =
-        static_cast<size_t>(dims.spatial_dims[1].input_size);
-    mkl_context.in_sizes[1] =
-        static_cast<size_t>(dims.spatial_dims[0].input_size);
-    mkl_context.in_sizes[2] = static_cast<size_t>(dims.in_depth);
-    mkl_context.in_sizes[3] = static_cast<size_t>(dims.batch_size);
-
-    mkl_context.out_sizes[0] =
-        static_cast<size_t>(dims.spatial_dims[1].output_size);
-    mkl_context.out_sizes[1] =
-        static_cast<size_t>(dims.spatial_dims[0].output_size);
-    mkl_context.out_sizes[2] = static_cast<size_t>(dims.out_depth);
-    mkl_context.out_sizes[3] = static_cast<size_t>(dims.batch_size);
-
-    mkl_context.input_offset[0] = static_cast<int>(-pad_left);
-    mkl_context.input_offset[1] = static_cast<int>(-pad_top);
-
-    mkl_context.conv_strides[0] =
-        static_cast<size_t>(dims.spatial_dims[1].stride);
-    mkl_context.conv_strides[1] =
-        static_cast<size_t>(dims.spatial_dims[0].stride);
-
-    GetStridesFromSizes(data_format, mkl_context.out_strides,
-                        mkl_context.out_sizes);
-    GetStridesFromSizes(data_format, mkl_context.in_strides,
-                        mkl_context.in_sizes);
-
-    mkl_context.filter_size[0] = dims.spatial_dims[1].filter_size;
-    mkl_context.filter_size[1] = dims.spatial_dims[0].filter_size;
-    mkl_context.filter_size[2] = dims.in_depth;
-    mkl_context.filter_size[3] = dims.out_depth;
-
-    mkl_context.filter_stride[0] =
-        mkl_context.filter_size[2] * mkl_context.filter_size[3];
-    mkl_context.filter_stride[1] = mkl_context.filter_size[2] *
-                                   mkl_context.filter_size[0] *
-                                   mkl_context.filter_size[3];
-    mkl_context.filter_stride[2] = mkl_context.filter_size[3];
-    mkl_context.filter_stride[3] = 1;
-
-    CHECK_EQ(
-        dnnConvolutionCreateBackwardData_F32(
-            &mkl_context.prim_bwddata, NULL, dnnAlgorithmConvolutionDirect,
-            mkl_context.in_dims, mkl_context.in_sizes, mkl_context.out_sizes,
-            mkl_context.filter_size, mkl_context.conv_strides,
-            mkl_context.input_offset, dnnBorderZeros),
-        E_SUCCESS);
-
-    // Allocate output tensor and shape
-    TensorShape mkl_out_shape;
-    MklShape mklOutputShape;
-    mklOutputShape.SetMklTensor(true);
-    mklOutputShape.SetMklLayout(mkl_context.prim_bwddata, dnnResourceDiffSrc);
-    mklOutputShape.SetTfLayout(mkl_context.in_dims, mkl_context.in_sizes,
-                               mkl_context.in_strides);
-    // MKL might change the dimension ordering.
-    // Create mapping to recover the original TF dimension order
-    mklOutputShape.SetTfDimOrder(mkl_context.in_dims, data_format);
-
-    Tensor* in_backprop = nullptr;
-    mkl_out_shape.AddDim(dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>(
-                             mklOutputShape.GetMklLayout())) /
-                         sizeof(T));
-    AllocateOutputSetMklShape(context, 0, &in_backprop, mkl_out_shape,
-                              mklOutputShape);
-
-    mkl_context.conv_res[dnnResourceDiffSrc] =
-        static_cast<void*>(const_cast<T*>(in_backprop->flat<T>().data()));
-
-    mkl_context.MklCreateInputLayouts(context);
-    Tensor mkl_tmp_outbackprop_buf_tensor, mkl_tmp_filter_buf_tensor;
-    mkl_context.MklPrepareConvolutionInputs(
-        context, &mkl_tmp_outbackprop_buf_tensor, &mkl_tmp_filter_buf_tensor);
-
-    CHECK_EQ(dnnExecute_F32(mkl_context.prim_bwddata, mkl_context.conv_res),
-             E_SUCCESS);
-    mkl_context.MklCleanup();
-  }
-
- private:
-  typedef struct {
-    int in_dims;
-    size_t in_sizes[4];
-    size_t in_strides[4];
-    size_t out_sizes[4];
-    size_t out_strides[4];
-    int input_offset[2];
-    size_t filter_size[4];
-    size_t filter_stride[4];
-    size_t conv_strides[2];
-    MklShape filter_shape, outback_shape;
-    dnnPrimitive_t prim_bwddata;
-    void* conv_res[dnnResourceNumber];
-    dnnLayout_t lt_filter, lt_outbackprop;
-
-    // Create MKL dnnLayout_t objects for tensors coming into the layer
-    void MklCreateInputLayouts(OpKernelContext* context) {
-      bool filter_in_mkl_format = filter_shape.IsMklTensor();
-      bool outback_in_mkl_format = outback_shape.IsMklTensor();
-      if (filter_in_mkl_format) {
-        lt_filter = (dnnLayout_t)filter_shape.GetCurLayout();
-      } else {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_filter, in_dims, filter_size,
-                                     filter_stride),
-                 E_SUCCESS);
-      }
-
-      if (outback_in_mkl_format) {
-        lt_outbackprop = (dnnLayout_t)outback_shape.GetCurLayout();
-      } else {
-        CHECK_EQ(dnnLayoutCreate_F32(&lt_outbackprop, in_dims, out_sizes,
-                                     out_strides),
-                 E_SUCCESS);
-      }
-    }
-
-    // Compare incoming input tensor layouts with MKL preferred layouts and
-    // convert data to the preferred layout if necessary
-    void MklPrepareConvolutionInputs(OpKernelContext* context,
-                                     Tensor* mkl_tmp_outbackprop_buf_tensor,
-                                     Tensor* mkl_tmp_filter_buf_tensor) {
-      dnnPrimitive_t mkl_convert_filter = nullptr,
-                     mkl_convert_outbackprop = nullptr;
-      void *mkl_filter_buf = nullptr, *mkl_outbackprop_buf = nullptr;
-      dnnLayout_t mkl_lt_filter_internal = nullptr,
-                  mkl_lt_outbackprop_internal = nullptr;
-      CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(
-                   &mkl_lt_filter_internal, prim_bwddata, dnnResourceFilter),
-               E_SUCCESS);
-
-      const Tensor& filter = MklGetInput(context, 1);
-
-      CHECK_EQ(
-          dnnLayoutCreateFromPrimitive_F32(&mkl_lt_outbackprop_internal,
-                                           prim_bwddata, dnnResourceDiffDst),
-          E_SUCCESS);
-      if (!dnnLayoutCompare_F32(mkl_lt_filter_internal, lt_filter)) {
-        // Create conversion primitive
-        CHECK_EQ(dnnConversionCreate_F32(&mkl_convert_filter, lt_filter,
-                                         mkl_lt_filter_internal),
-                 E_SUCCESS);
-
-        AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor,
-                       mkl_lt_filter_internal, &mkl_filter_buf);
-        CHECK_EQ(
-            dnnConversionExecute_F32(
-                mkl_convert_filter,
-                static_cast<void*>(const_cast<T*>(filter.flat<T>().data())),
-                mkl_filter_buf),
-            E_SUCCESS);
-
-        // Assign filter buf to resources[] for convolution.
-        conv_res[dnnResourceFilter] = mkl_filter_buf;
-        dnnDelete_F32(mkl_convert_filter);
-      } else {
-        // If we do not need any layout conversion for filter, then
-        // we directly assign input filter to resources[].
-        conv_res[dnnResourceFilter] =
-            static_cast<void*>(const_cast<T*>(filter.flat<T>().data()));
-      }
-      dnnLayoutDelete_F32(mkl_lt_filter_internal);
-      const Tensor& out_backprop = MklGetInput(context, 2);
-      // --
-      // We do similar steps as above for outputbackprop.
-      if (!dnnLayoutCompare_F32(mkl_lt_outbackprop_internal, lt_outbackprop)) {
-        CHECK_EQ(
-            dnnConversionCreate_F32(&mkl_convert_outbackprop, lt_outbackprop,
-                                    mkl_lt_outbackprop_internal),
-            E_SUCCESS);
-        AllocTmpBuffer(context, mkl_tmp_outbackprop_buf_tensor,
-                       mkl_lt_outbackprop_internal, &mkl_outbackprop_buf);
-
-        CHECK_EQ(dnnConversionExecute_F32(mkl_convert_outbackprop,
-                                          static_cast<void*>(const_cast<T*>(
-                                              out_backprop.flat<T>().data())),
-                                          mkl_outbackprop_buf),
-                 E_SUCCESS);
-
-        conv_res[dnnResourceDiffDst] = mkl_outbackprop_buf;
-        dnnDelete_F32(mkl_convert_outbackprop);
-      } else {
-        conv_res[dnnResourceDiffDst] =
-            static_cast<void*>(const_cast<T*>(out_backprop.flat<T>().data()));
-      }
-      dnnLayoutDelete_F32(mkl_lt_outbackprop_internal);
-    }
-
-    // Cleanup member layouts and primitives
-    void MklCleanup() {
-      bool filter_in_mkl_format = filter_shape.IsMklTensor();
-      bool outback_in_mkl_format = outback_shape.IsMklTensor();
-      if (!filter_in_mkl_format) dnnLayoutDelete_F32(lt_filter);
-      if (!outback_in_mkl_format) dnnLayoutDelete_F32(lt_outbackprop);
-      dnnDelete_F32(prim_bwddata);
-    }
-  } MklConvBackInputOpContext;
-
-  std::vector<int32> strides;
-  Padding padding;
-  TensorFormat data_format;
-};
-
-#define REGISTER_MKL_CPU_KERNELS(T)                                 \
-  REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropInput")           \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<T>("T")               \
-                              .Label(mkl_op_registry::kMklOpLabel), \
-                          MklConv2DCustomBackpropInputOp<CPUDevice, T>);
-
-TF_CALL_float(REGISTER_MKL_CPU_KERNELS);
-#undef REGISTER_MKL_CPU_KERNELS
-
-#else
-
 template <typename Device, class T>
 class MklConvCustomBackpropInputOp : public MklConvBackpropCommonOp<Device, T> {
  public:
@@ -881,7 +559,5 @@ class MklConvCustomBackpropInputOp : public MklConvBackpropCommonOp<Device, T> {
 TF_CALL_float(REGISTER_MKL_CPU_KERNELS);
 #undef REGISTER_MKL_CPU_KERNELS
 
-#endif  // INTEL_MKL_ML_ONLY
-
 }  // namespace tensorflow
 #endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index 01cc606f41..28d521c9be 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -37,23 +37,16 @@ limitations under the License.
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
-
 #include "tensorflow/core/util/mkl_util.h"
-
-#ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
 
 using mkldnn::prop_kind;
 using mkldnn::stream;
-
 using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
-#endif
 
 namespace tensorflow {
 
-#ifndef INTEL_MKL_ML_ONLY
-
 class MklDnnConvUtil {
  protected:
   OpKernelContext* context_;  // We don't own this.
@@ -543,7 +536,6 @@ class MklConvBackpropCommonOp : public OpKernel {
   TensorFormat data_format_;  // NCHW or NHWC
 };
 
-#endif  // INTEL_MKL_ML_ONLY
 
 /////////////////////////////////////////////////////////////////////
 ///  Dummy Mkl op that is just used for operators that are intermediate
-- 
GitLab


From 9043defdb01457617fc3520f2e164a0ab9d45eb6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 09:46:34 -0700
Subject: [PATCH 0867/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216876525

---
 tensorflow/go/op/wrappers.go | 382 +++++++++++++++++------------------
 1 file changed, 191 insertions(+), 191 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 109da6e5c9..0bd1e07dae 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5474,79 +5474,6 @@ func Sign(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
-
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
-	return func(m optionalAttr) {
-		m["output_type"] = value
-	}
-}
-
-// Returns the index with the smallest value across dimensions of a tensor.
-//
-// Note that in case of ties the identity of the return value is not guaranteed.
-//
-// Arguments:
-//
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ArgMin",
-		Input: []tf.Input{
-			input, dimension,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
-//
-// output range specified with 'requested_output_min' and 'requested_output_max'.
-//
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "Requantize",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // PreventGradientAttr is an optional argument to PreventGradient.
 type PreventGradientAttr func(optionalAttr)
 
@@ -12631,6 +12558,124 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM
 	return op.Output(0)
 }
 
+// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
+type CudnnRNNCanonicalToParamsAttr func(optionalAttr)
+
+// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Converts CudnnRNN params from canonical form to usable form.
+//
+// Writes a set of weights into the opaque params buffer so they can be used in
+// upcoming training or inferences.
+//
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// num_params: number of parameter sets for all layers.
+//     Each layer may contain multiple parameter sets, with each set consisting of
+//     a weight matrix and a bias vector.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNCanonicalToParams",
+		Input: []tf.Input{
+			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
+func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "FilterByLastComponentDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Elementwise computes the bitwise AND of `x` and `y`.
 //
 // The result will have those bits set, that are set in both `x` and `y`. The
@@ -18036,124 +18081,6 @@ func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator t
 	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
-func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "FilterByLastComponentDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
-type CudnnRNNCanonicalToParamsAttr func(optionalAttr)
-
-// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Converts CudnnRNN params from canonical form to usable form.
-//
-// Writes a set of weights into the opaque params buffer so they can be used in
-// upcoming training or inferences.
-//
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// num_params: number of parameter sets for all layers.
-//     Each layer may contain multiple parameter sets, with each set consisting of
-//     a weight matrix and a bias vector.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNCanonicalToParams",
-		Input: []tf.Input{
-			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
 type SparseReduceMaxSparseAttr func(optionalAttr)
 
@@ -22027,6 +21954,79 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
+
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Returns the index with the smallest value across dimensions of a tensor.
+//
+// Note that in case of ties the identity of the return value is not guaranteed.
+//
+// Arguments:
+//
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ArgMin",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+//
+// output range specified with 'requested_output_min' and 'requested_output_max'.
+//
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "Requantize",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // Creates a dataset that emits the lines of one or more text files.
 //
 // Arguments:
-- 
GitLab


From 7294f306c0e144db5340b713c903c45e79105e5e Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 12 Oct 2018 10:02:31 -0700
Subject: [PATCH 0868/1085] Auto load kernel libraries under
 tensorflow/core/kernels when TF kernel registry is accessed.

PiperOrigin-RevId: 216878902
---
 tensorflow/core/framework/op_kernel.cc | 53 +++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 3e34bf0418..f21821f116 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 
+#include <mutex>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -38,6 +39,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -924,12 +926,50 @@ struct KernelRegistration {
 // KernelDef.
 typedef std::unordered_multimap<string, KernelRegistration> KernelRegistry;
 
+#if defined(_WIN32)
+static const char kKernelLibPattern[] = "libtfkernel*.dll";
+#elif defined(__APPLE__)
+static const char kKernelLibPattern[] = "libtfkernel*.dylib";
+#else
+static const char kKernelLibPattern[] = "libtfkernel*.so";
+#endif
+
+void LoadDynamicKernelsInternal() {
+  Env* env = Env::Default();
+  string bazel_kernel_dir = io::JoinPath(env->GetRunfilesDir(),
+                                         "tensorflow",
+                                         "core",
+                                         "kernels");
+  std::vector<string> files;
+  Status s_kernel_dir = env->GetChildren(bazel_kernel_dir, &files);
+  if (s_kernel_dir.ok()) {
+    string dll_spec = io::JoinPath(bazel_kernel_dir, kKernelLibPattern);
+    for (const auto&  file : files) {
+      string fullpath =  io::JoinPath(bazel_kernel_dir, file);
+      if (env->MatchPath(fullpath, dll_spec)) {
+        // TODO(gunan): Store the handles to the opened files.
+        void* unused_filehandle;
+        TF_CHECK_OK(env->LoadLibrary(fullpath.c_str(), &unused_filehandle));
+      }
+    }
+  }
+}
+
+// Mechanism for loading existing kernel libraries.
+void LoadDynamicKernels() {
+  // TODO(gunan): As more features are available, add intelligent kernel
+  // selection, and dropping unsuitable kernel logic here.
+  static std::once_flag dll_loader_flag;
+  std::call_once(dll_loader_flag, LoadDynamicKernelsInternal);
+}
+
 void* GlobalKernelRegistry() {
   static KernelRegistry* global_kernel_registry = new KernelRegistry;
   return global_kernel_registry;
 }
 
 static KernelRegistry* GlobalKernelRegistryTyped() {
+  LoadDynamicKernels();
   return reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry());
 }
 
@@ -949,8 +989,17 @@ void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
     const string key =
         Key(kernel_def->op(), DeviceType(kernel_def->device_type()),
             kernel_def->label());
-    GlobalKernelRegistryTyped()->insert(std::make_pair(
-        key, KernelRegistration(*kernel_def, kernel_class_name, factory)));
+
+    // To avoid calling LoadDynamicKernels DO NOT CALL GlobalKernelRegistryTyped
+    // here.
+    // InitInternal gets called by static initializers, so it ends up executing
+    // before main. This causes LoadKernelLibraries function to get called
+    // before some file libraries can initialize, which in turn crashes the
+    // program flakily. Until we get rid of static initializers in kernel
+    // registration mechanism, we have this workaround here.
+    reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry())
+        ->insert(std::make_pair(
+            key, KernelRegistration(*kernel_def, kernel_class_name, factory)));
   }
   delete kernel_def;
 }
-- 
GitLab


From e803e276845f3c3d66a8b97b8544d3298428cc7b Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Fri, 12 Oct 2018 10:04:48 -0700
Subject: [PATCH 0869/1085] Automated rollback of commit
 51f0eb5849be0f9ce20e5eb8370158088711f19d

PiperOrigin-RevId: 216879444
---
 tensorflow/compiler/xla/service/BUILD         |  1 -
 .../xla/service/compile_only_service.cc       |  6 ++---
 tensorflow/compiler/xla/service/compiler.cc   |  4 +--
 tensorflow/compiler/xla/service/compiler.h    | 27 +++++--------------
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  7 ++---
 .../compiler/xla/service/cpu/cpu_compiler.h   |  2 +-
 .../xla/service/gpu/nvptx_compiler.cc         |  5 ++--
 .../compiler/xla/service/gpu/nvptx_compiler.h |  2 +-
 .../compiler/xla/service/hlo_module_group.cc  |  5 ++--
 .../compiler/xla/service/hlo_module_group.h   | 13 +--------
 .../xla/service/hlo_module_group_test.cc      |  2 +-
 .../xla/service/interpreter/compiler.cc       | 23 +++-------------
 .../xla/service/interpreter/compiler.h        | 11 ++------
 .../compiler/xla/service/llvm_compiler.cc     | 19 +------------
 .../compiler/xla/service/llvm_compiler.h      | 11 +-------
 tensorflow/compiler/xla/service/service.cc    |  9 +++----
 .../compiler/xla/tests/codegen_test_base.cc   |  5 ++--
 .../compiler/xla/tests/llvm_compiler_test.cc  | 13 +++++----
 18 files changed, 44 insertions(+), 121 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 7d03eba800..6c3b9764b7 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -860,7 +860,6 @@ cc_library(
         ":executable",
         ":hlo",
         ":hlo_module_config",
-        ":hlo_module_group",
         ":logical_buffer",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index c9b0e4c08c..bd5045b9b9 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -103,10 +103,8 @@ CompileOnlyService::CompileAheadOfTime(
     hlo_modules.push_back(std::move(hlo_module));
   }
 
-  return compiler_->CompileAheadOfTime(
-      absl::make_unique<HloModuleGroup>(hlo_modules[0]->name(),
-                                        absl::MakeSpan(hlo_modules)),
-      options, metadata);
+  return compiler_->CompileAheadOfTime(std::move(hlo_modules), options,
+                                       metadata);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc
index 80c630c620..687ecafe0c 100644
--- a/tensorflow/compiler/xla/service/compiler.cc
+++ b/tensorflow/compiler/xla/service/compiler.cc
@@ -45,7 +45,7 @@ Compiler::ComputeDefaultBackendConfig(const HloInstruction& hlo,
 // Define a default version where metadata is not used.
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 Compiler::CompileAheadOfTime(
-    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::unique_ptr<HloModule>> modules,
     const AotCompilationOptions& options,
     std::unique_ptr<AotCompilationMetadata>* metadata) {
   if (metadata != nullptr) {
@@ -53,7 +53,7 @@ Compiler::CompileAheadOfTime(
         "Populating AotCompilationMetadata is not implemented on this "
         "compiler.");
   }
-  return CompileAheadOfTime(std::move(module_group), options);
+  return CompileAheadOfTime(std::move(modules), options);
 }
 
 /* static */ std::map<se::Platform::Id, Compiler::CompilerFactory>*
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index 9d9796239c..1fdda31c34 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -32,7 +32,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
-#include "tensorflow/compiler/xla/service/hlo_module_group.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -136,13 +135,6 @@ class Compiler {
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
-  // Overload which optimizes a HLO module group, a set of module which runs
-  // concurrently on multiple devices potentially communicating data between the
-  // modules.
-  virtual Status RunHloPasses(HloModuleGroup* module_group,
-                              se::StreamExecutor* executor,
-                              DeviceMemoryAllocator* device_allocator) = 0;
-
   // Compiles the HLO module for execution on a device given by the executor,
   // and returns an executable object or an error status. No HLO passes are
   // applied to module. Generally a module should be passed through RunHloPasses
@@ -153,17 +145,12 @@ class Compiler {
   // (not just type of device) indicated by the executor.
   //
   // device_allocator is optional; see RunHloPasses.
+  //
+  // Use the overload below to compile computations that run in parallel.
   virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
-  // Overload which compiles a set of HLO modules that can run in parallel,
-  // potentially communicating data between the modules.
-  virtual StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
-      std::unique_ptr<HloModuleGroup> module_group,
-      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
-      DeviceMemoryAllocator* device_allocator) = 0;
-
   // Compiles a set of HLO modules that can run in parallel, potentially
   // communicating data between the modules, and returns a corresponding
   // sequence of executable objects.
@@ -173,7 +160,7 @@ class Compiler {
   // TODO(b/68666782): Remove this method after adding support for multiple
   // modules to RunHloPasses and RunBackends.
   virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::unique_ptr<HloModule>> modules,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) = 0;
 
@@ -197,16 +184,16 @@ class Compiler {
   ComputeDefaultBackendConfig(const HloInstruction& hlo,
                               se::StreamExecutor* executor) const;
 
-  // Compiles the HLO module group for ahead-of-time execution.  This is
-  // intended for use in static compilation.
+  // Compiles the HLO module for ahead-of-time execution.  This is intended for
+  // use in static compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                      const AotCompilationOptions& options) = 0;
 
   // Similar to CompileAheadOfTime above but AotCompilationMetadata
   // has an argument that can be populated during compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                      const AotCompilationOptions& options,
                      std::unique_ptr<AotCompilationMetadata>* metadata);
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index da01c0caf2..68c715a086 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -676,12 +676,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                                 const AotCompilationOptions& aot_options) {
-  TF_RET_CHECK(!module_group->empty());
-  std::vector<std::unique_ptr<HloModule>> modules =
-      module_group->ConsumeModules();
-
+  TF_RET_CHECK(!modules.empty());
   std::call_once(llvm_command_line_options_initialized,
                  &llvm_ir::InitializeLLVMCommandLineOptions,
                  modules[0]->config());
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
index c67307548d..f2af923782 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
@@ -142,7 +142,7 @@ class CpuCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                      const AotCompilationOptions& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 791d414c91..829d1499bc 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -825,8 +825,9 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx,
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-NVPTXCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
-                                  const AotCompilationOptions& options) {
+NVPTXCompiler::CompileAheadOfTime(
+    std::vector<std::unique_ptr<HloModule>> module,
+    const AotCompilationOptions& options) {
   return Unimplemented(
       "not yet implemented: NVPTXCompiler::CompileAheadOfTime");
 }
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index f79ae2990a..c4a0b727cd 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -59,7 +59,7 @@ class NVPTXCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> module,
                      AotCompilationOptions const& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc
index 8999ac9f32..f9b56ef464 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group.cc
@@ -17,8 +17,9 @@ limitations under the License.
 
 namespace xla {
 
-HloModuleGroup::HloModuleGroup(std::unique_ptr<HloModule> module)
-    : name_(module->name()) {
+HloModuleGroup::HloModuleGroup(absl::string_view name,
+                               std::unique_ptr<HloModule> module)
+    : name_(name) {
   push_back(std::move(module));
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h
index 7c39cf1781..7338be8b9c 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group.h
@@ -35,7 +35,7 @@ class HloModuleGroup {
   explicit HloModuleGroup(absl::string_view name) : name_(name) {}
 
   // Construct a module group containing a single module.
-  explicit HloModuleGroup(std::unique_ptr<HloModule> module);
+  HloModuleGroup(absl::string_view name, std::unique_ptr<HloModule> module);
 
   // Construct a module group containing any number of modules.
   HloModuleGroup(absl::string_view name,
@@ -50,16 +50,11 @@ class HloModuleGroup {
   // Add a module to the back of vector of modules in the group.
   void push_back(std::unique_ptr<HloModule> module);
 
-  // Replaces the existing module at the given index with the given module. The
-  // existing module is discarded.
-  void ReplaceModule(int index, std::unique_ptr<HloModule> module);
-
   // Moves all modules from the group into the returned vector. After this
   // method runs, the module group will be empty.
   std::vector<std::unique_ptr<HloModule>> ConsumeModules();
 
   string name() const { return name_; }
-
   string ToString() const;
 
   // Serialize the module group to/from a proto.
@@ -68,12 +63,6 @@ class HloModuleGroup {
       const HloModuleGroupProto& proto,
       absl::Span<const HloModuleConfig> module_configs);
 
-  // Returns the number of modules in the module group.
-  int size() const { return modules_.size(); }
-
-  // Returns true if there are no modules in the module group.
-  bool empty() const { return modules_.empty(); }
-
  private:
   string name_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
index 5a9a86af56..b7b12cb72b 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -46,7 +46,7 @@ ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
 )";
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                           ParseHloString(text));
-  HloModuleGroup group(std::move(module));
+  HloModuleGroup group(TestName(), std::move(module));
 
   EXPECT_EQ(group.modules().size(), 1);
   EXPECT_THAT(
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 60478e5850..7c79eb7d79 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -57,12 +57,6 @@ StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses(
   return std::move(hlo_module);
 }
 
-Status InterpreterCompiler::RunHloPasses(
-    HloModuleGroup* module_group, se::StreamExecutor* executor,
-    DeviceMemoryAllocator* device_allocator) {
-  return Unimplemented("Module group compilation not supported on Interpreter");
-}
-
 StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
     std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
     DeviceMemoryAllocator* /*device_allocator*/) {
@@ -82,26 +76,17 @@ StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
   return std::move(executable);
 }
 
-StatusOr<std::vector<std::unique_ptr<Executable>>>
-InterpreterCompiler::RunBackend(
-    std::unique_ptr<HloModuleGroup> module_group,
-    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
-    DeviceMemoryAllocator* device_allocator) {
-  return Unimplemented(
-      "Module group compilation is not supported on Interpreter.");
-}
-
 StatusOr<std::vector<std::unique_ptr<Executable>>> InterpreterCompiler::Compile(
-    std::unique_ptr<HloModuleGroup> /*module_group*/,
+    std::vector<std::unique_ptr<HloModule>> /*hlo_modules*/,
     std::vector<std::vector<se::StreamExecutor*>> /*stream_execs*/,
     DeviceMemoryAllocator* /*device_allocator*/) {
-  return Unimplemented(
-      "Module group compilation is not supported on Interpreter.");
+  return tensorflow::errors::Unimplemented(
+      "Compilation of multiple HLO modules is not supported on Interpreter.");
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 InterpreterCompiler::CompileAheadOfTime(
-    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::unique_ptr<HloModule>> hlo_modules,
     const AotCompilationOptions& aot_options) {
   return tensorflow::errors::InvalidArgument(
       "AOT compilation not supported on Interpreter");
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h
index f14970f9ff..e90ae3e818 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.h
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.h
@@ -46,25 +46,18 @@ class InterpreterCompiler : public Compiler {
   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
-  Status RunHloPasses(HloModuleGroup* module_group,
-                      se::StreamExecutor* executor,
-                      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
-  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
-      std::unique_ptr<HloModuleGroup> module_group,
-      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
-      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::unique_ptr<HloModule>> hlo_modules,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> hlo_modules,
                      const AotCompilationOptions& aot_options) override;
 
   HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override;
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc
index 6a63309762..b17c9d5045 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.cc
+++ b/tensorflow/compiler/xla/service/llvm_compiler.cc
@@ -21,23 +21,8 @@ limitations under the License.
 #endif
 
 namespace xla {
-Status LLVMCompiler::RunHloPasses(HloModuleGroup* module_group,
-                                  se::StreamExecutor* executor,
-                                  DeviceMemoryAllocator* device_allocator) {
-  return Unimplemented(
-      "Model partitioning not implemented for the CPU/GPU compilers!");
-}
-
-StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::RunBackend(
-    std::unique_ptr<HloModuleGroup> module_group,
-    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
-    DeviceMemoryAllocator* device_allocator) {
-  return Unimplemented(
-      "Model partitioning not implemented for the CPU/GPU compilers!");
-}
-
 StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
-    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::unique_ptr<HloModule>> modules,
     std::vector<std::vector<se::StreamExecutor*>> stream_execs,
     DeviceMemoryAllocator* device_allocator) {
   // Tensorflow tries to enable the following behaviors in all its threads:
@@ -53,8 +38,6 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
   tensorflow::port::ScopedDontFlushDenormal dont_flush_denormals;
 
   std::vector<std::unique_ptr<Executable>> result;
-  std::vector<std::unique_ptr<HloModule>> modules =
-      module_group->ConsumeModules();
   for (size_t i = 0; i < modules.size(); i++) {
     if (stream_execs[i].size() != 1) {
       return Unimplemented(
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h
index 299f670c18..f1c623508c 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.h
+++ b/tensorflow/compiler/xla/service/llvm_compiler.h
@@ -69,17 +69,8 @@ class LLVMCompiler : public Compiler {
   using Compiler::RunBackend;
   using Compiler::RunHloPasses;
 
-  Status RunHloPasses(HloModuleGroup* module_group,
-                      se::StreamExecutor* executor,
-                      DeviceMemoryAllocator* device_allocator) override;
-
-  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackend(
-      std::unique_ptr<HloModuleGroup> module_group,
-      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
-      DeviceMemoryAllocator* device_allocator) override;
-
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::unique_ptr<HloModule>> modules,
       std::vector<std::vector<se::StreamExecutor*>> stream_execs,
       DeviceMemoryAllocator* device_allocator) override;
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index cb6a9e6707..d290c0eb5d 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -341,19 +341,18 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
   }
 
   CHECK_EQ(module_protos.size(), module_configs.size());
-  auto module_group =
-      absl::make_unique<HloModuleGroup>(module_protos[0]->name());
+  std::vector<std::unique_ptr<HloModule>> modules;
   for (int64 i = 0; i < module_protos.size(); ++i) {
     const HloModuleProto* proto = module_protos[i];
     const HloModuleConfig& config = *module_configs[i];
     TF_ASSIGN_OR_RETURN(auto module, CreateModuleFromProto(*proto, config));
-    module_group->push_back(std::move(module));
+    modules.push_back(std::move(module));
   }
 
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<Executable>> executables,
-      backend->compiler()->Compile(std::move(module_group),
-                                   std::move(executors), device_allocator));
+      backend->compiler()->Compile(std::move(modules), std::move(executors),
+                                   device_allocator));
 
   for (size_t i = 0; i < module_protos.size(); ++i) {
     if (!module_configs[i]->debug_options().xla_dump_executions_to().empty()) {
diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc
index fbebe04087..022641394f 100644
--- a/tensorflow/compiler/xla/tests/codegen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc
@@ -32,10 +32,11 @@ StatusOr<std::unique_ptr<AotCompilationResult>>
 CodegenTestBase::CompileToAotCompilationResult(
     std::unique_ptr<HloModule> hlo_module,
     const AotCompilationOptions& options) {
-  auto module_group = absl::make_unique<HloModuleGroup>(std::move(hlo_module));
+  std::vector<std::unique_ptr<HloModule>> hlo_modules;
+  hlo_modules.push_back(std::move(hlo_module));
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<AotCompilationResult>> results,
-      backend().compiler()->CompileAheadOfTime(std::move(module_group),
+      backend().compiler()->CompileAheadOfTime(std::move(hlo_modules),
                                                options));
   return std::move(results.front());
 }
diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
index c622b29509..8d65869557 100644
--- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
+++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
@@ -93,16 +93,15 @@ class LLVMCompilerTest : public ::testing::Test {
     std::unique_ptr<HloModule> hlo_module = CreateNewModule();
     hlo_module->AddEntryComputation(builder.Build());
 
-    auto module_group = absl::make_unique<HloModuleGroup>("test_module_group");
-    module_group->push_back(hlo_module->Clone());
-    module_group->push_back(std::move(hlo_module));
+    std::vector<std::unique_ptr<HloModule>> modules;
+    modules.push_back(hlo_module->Clone());
+    modules.push_back(std::move(hlo_module));
 
     std::vector<std::vector<se::StreamExecutor *>> executors;
     executors.push_back({backend_->default_stream_executor()});
     executors.push_back({backend_->default_stream_executor()});
 
-    EXPECT_IS_OK(compiler->Compile(std::move(module_group),
-                                   std::move(executors),
+    EXPECT_IS_OK(compiler->Compile(std::move(modules), std::move(executors),
                                    /*device_allocator=*/nullptr));
   }
 
@@ -151,12 +150,12 @@ TEST_F(GpuCompilerTest, HooksTest) {
   TestCompilerHooks(&compiler);
 }
 
-TEST_F(CpuCompilerTest, CpuMultiModuleCompilation) {
+TEST_F(CpuCompilerTest, MultiModuleCompilation) {
   cpu::CpuCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
 
-TEST_F(GpuCompilerTest, NVPTXMultiModuleCompilation) {
+TEST_F(GpuCompilerTest, MultModuleCompilation) {
   gpu::NVPTXCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
-- 
GitLab


From 5d82100b6957b96f10427baeb2e68c490d83997e Mon Sep 17 00:00:00 2001
From: Tiezhen WANG <wangtz@google.com>
Date: Fri, 12 Oct 2018 10:09:29 -0700
Subject: [PATCH 0870/1085] FeatureLayer: Nit: Slightly speed it up by
 pre-sorting the columns by name.

PiperOrigin-RevId: 216880147
---
 tensorflow/python/feature_column/feature_column_v2.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 67de174a67..aeb666cf6a 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -374,9 +374,10 @@ class FeatureLayer(Layer):
     super(FeatureLayer, self).__init__(name=name, trainable=trainable, **kwargs)
 
     self._feature_columns = _normalize_feature_columns(feature_columns)
+    self._feature_columns = sorted(self._feature_columns, key=lambda x: x.name)
     self._state_manager = _StateManagerImpl(self, self.trainable)
     self._shared_state_manager = shared_state_manager
-    for column in sorted(self._feature_columns, key=lambda x: x.name):
+    for column in self._feature_columns:
       if not isinstance(column, DenseColumn):
         raise ValueError(
             'Items of feature_columns must be a DenseColumn. '
@@ -388,7 +389,7 @@ class FeatureLayer(Layer):
     return True
 
   def build(self, _):
-    for column in sorted(self._feature_columns, key=lambda x: x.name):
+    for column in self._feature_columns:
       if isinstance(column, SharedEmbeddingColumn):
         column.create_state(self._shared_state_manager)
       else:
@@ -422,7 +423,7 @@ class FeatureLayer(Layer):
     transformation_cache = FeatureTransformationCache(features)
     output_tensors = []
     ordered_columns = []
-    for column in sorted(self._feature_columns, key=lambda x: x.name):
+    for column in self._feature_columns:
       with ops.name_scope(column.name):
         ordered_columns.append(column)
         if isinstance(column, SharedEmbeddingColumn):
@@ -443,7 +444,7 @@ class FeatureLayer(Layer):
 
   def compute_output_shape(self, input_shape):
     total_elements = 0
-    for column in sorted(self._feature_columns, key=lambda x: x.name):
+    for column in self._feature_columns:
       total_elements += column.variable_shape.num_elements()
     return (input_shape[0], total_elements)
 
-- 
GitLab


From 2c93733eba3f9616b61d9de2e18f788a8180d8a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 10:16:40 -0700
Subject: [PATCH 0871/1085] Call InitTensorflow in benchmark_main.cc

PiperOrigin-RevId: 216881319
---
 tensorflow/contrib/lite/testing/BUILD                     | 1 +
 tensorflow/contrib/lite/tools/benchmark/BUILD             | 2 ++
 tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index af203c5507..8f45d05ce3 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -307,6 +307,7 @@ cc_library(
     visibility = [
         "//tensorflow/contrib/lite/java/src/main/native:__subpackages__",
         "//tensorflow/contrib/lite/testing:__subpackages__",
+        "//tensorflow/contrib/lite/tools/benchmark:__subpackages__",
     ],
     deps = select({
         "//conditions:default": [
diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index 71bf61657e..af54421155 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -32,6 +32,7 @@ cc_binary(
     deps = [
         ":benchmark_tflite_model_lib",
         ":logging",
+        "//tensorflow/contrib/lite/testing:init_tensorflow",
     ],
 )
 
@@ -52,6 +53,7 @@ cc_binary(
         ":benchmark_tflite_model_lib",
         ":logging",
         "//tensorflow/contrib/lite/delegates/flex:delegate",
+        "//tensorflow/contrib/lite/testing:init_tensorflow",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
index 372d31e838..b9cf6c67d2 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
 #include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h"
 #include "tensorflow/contrib/lite/tools/benchmark/logging.h"
 
@@ -20,6 +21,7 @@ namespace tflite {
 namespace benchmark {
 
 int Main(int argc, char** argv) {
+  ::tflite::InitTensorFlow();
 #ifdef TFLITE_CUSTOM_OPS_HEADER
   TFLITE_LOG(INFO) << "STARTING with custom ops!";
 #else
-- 
GitLab


From 3516b82a48699dd7f0b2464d6e78a84ba32c1564 Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Fri, 12 Oct 2018 10:30:10 -0700
Subject: [PATCH 0872/1085] adjust headers inclusion order per PR review
 recommenation

---
 tensorflow/core/kernels/mkl_conv_ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index 28d521c9be..ba0fb48b7b 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -35,9 +35,9 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
-#include "tensorflow/core/util/mkl_util.h"
 #include "mkldnn.hpp"
 
 using mkldnn::prop_kind;
-- 
GitLab


From 3b1b3312eb8cf48d15a7963b13b31fd0d1e9492d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 10:28:36 -0700
Subject: [PATCH 0873/1085] Add timeout mechanism for Grappler optimizers.
 Sprinkle calls to GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED() in the most
 expensive optimizers.

PiperOrigin-RevId: 216883291
---
 .../optimizers/arithmetic_optimizer.cc        |  2 +
 .../grappler/optimizers/constant_folding.cc   |  1 +
 .../optimizers/dependency_optimizer.cc        |  1 +
 .../grappler/optimizers/graph_optimizer.h     | 22 +++++++
 .../grappler/optimizers/layout_optimizer.cc   |  1 +
 .../grappler/optimizers/memory_optimizer.cc   |  2 +
 .../grappler/optimizers/meta_optimizer.cc     | 30 ++++++++-
 .../core/grappler/optimizers/meta_optimizer.h |  3 +-
 .../optimizers/meta_optimizer_test.cc         | 61 +++++++++++++++++++
 .../optimizers/pin_to_host_optimizer.cc       |  2 +
 .../core/protobuf/rewriter_config.proto       |  4 ++
 11 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 0c2686a419..e2bcf91950 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3480,6 +3480,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) {
           << str_util::Join(pipeline.StageNames(), ", ");
 
   while (!nodes_to_simplify.Empty()) {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     NodeDef* node = nodes_to_simplify.PopBack();
 
     string simplified_tensor = "";
@@ -3549,6 +3550,7 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/,
   if (options_.dedup_computations) {
     DedupComputations();
   }
+  GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
 
   // Perform topological sort on the graph in order to help AddOpsRewrite to
   // optimize larger subgraphs starting from the roots with more inputs.
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 3d0d95bba7..11331c9406 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -3042,6 +3042,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
   *optimized_graph = item.graph;
   int64 node_count;
   do {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     graph_modified_ = false;
     item_to_optimize.graph.Swap(optimized_graph);
     graph_ = &item_to_optimize.graph;
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index bb14ce310d..6613768a35 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -652,6 +652,7 @@ Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
   const int num_iterations = 2;
   for (int iteration = 0; iteration < num_iterations; ++iteration) {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     Status topo_sort_status;
     // Perform topological sort to prepare the graph for transitive reduction.
     topo_sort_status = TopologicalSort(optimized_graph_);
diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer.h b/tensorflow/core/grappler/optimizers/graph_optimizer.h
index 765dd13263..ede1c8056b 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -29,6 +30,7 @@ struct GrapplerItem;
 // optimization of a GrapplerItem for running on a cluster.
 class GraphOptimizer {
  public:
+  GraphOptimizer() : deadline_usec_(0) {}
   virtual ~GraphOptimizer() {}
 
   virtual string name() const = 0;
@@ -45,8 +47,28 @@ class GraphOptimizer {
   // call to Optimize) performed.  Lower "result" scores are better.
   virtual void Feedback(Cluster* cluster, const GrapplerItem& item,
                         const GraphDef& optimized_graph, double result) = 0;
+
+  // Set deadline in microseconds since epoch. A value of zero means no
+  // deadline.
+  void set_deadline_usec(uint64 deadline_usec) {
+    deadline_usec_ = deadline_usec;
+  }
+  uint64 deadline_usec() const { return deadline_usec_; }
+  bool DeadlineExceeded() const {
+    return deadline_usec_ > 0 && Env::Default()->NowMicros() > deadline_usec_;
+  }
+
+ private:
+  uint64 deadline_usec_;
 };
 
+#define GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED()                              \
+  do {                                                                      \
+    if (this->DeadlineExceeded()) {                                         \
+      return errors::DeadlineExceeded(this->name(), " exceeded deadline."); \
+    }                                                                       \
+  } while (0)
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index 3251e7cb10..7dc62e24df 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -2188,6 +2188,7 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     *output = item.graph;
     return status;
   }
+  GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
 
   TuningConfig config;
   config.no_gemm = true;
diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index 73f0977242..95079c5e21 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -1299,6 +1299,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // that simply won't fit in memory.
   bool updated_graph = true;
   for (int i = 0; i < 25 && updated_graph; ++i) {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     updated_graph = false;
     if ((optimization_level_ == RewriterConfig::DEFAULT_MEM_OPT ||
          optimization_level_ == RewriterConfig::SCHEDULING_HEURISTICS ||
@@ -1307,6 +1308,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       updated_graph |= SchedulingPass(cluster, &optimized_item);
     }
 
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     if ((optimization_level_ == RewriterConfig::DEFAULT_MEM_OPT ||
          optimization_level_ == RewriterConfig::SWAPPING_HEURISTICS ||
          optimization_level_ == RewriterConfig::HEURISTICS ||
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index c3d70a1fdf..889265de64 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -85,6 +85,18 @@ bool IsTPUGraphDef(const GraphDef& def) {
   return false;
 }
 
+uint64 DeadlineMicroSeconds(const RewriterConfig& cfg) {
+  const uint64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000;
+  if (cfg.meta_optimizer_timeout_ms() < 0) {
+    return 0;
+  } else {
+    return cfg.meta_optimizer_timeout_ms() == 0
+               ? Env::Default()->NowMicros() + kFiveMinutesInUsec
+               : Env::Default()->NowMicros() +
+                     cfg.meta_optimizer_timeout_ms() * 1000;
+  }
+}
+
 }  // namespace
 
 #define MK_OPT(NAME, VALUE) \
@@ -114,6 +126,12 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
 
 #undef MK_OPT
 
+MetaOptimizer::MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
+    : cpu_device_(cpu_device), cfg_(cfg) {
+  DCHECK(cpu_device_ == nullptr ||
+         cpu_device_->attributes().device_type() == "CPU");
+}
+
 Status MetaOptimizer::InitializeOptimizers(
     std::vector<std::unique_ptr<GraphOptimizer>>* optimizers) const {
   if (cfg_.disable_meta_optimizer()) {
@@ -309,6 +327,7 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
 
     VLOG(4) << "Starting optimization iteration " << iteration;
     for (const auto& optimizer : optimizers) {
+      GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
       // Some optimizers can run only once.
       if (iteration > 0 && IsRunOnceOptimizer(optimizer->name())) continue;
       // Some must run only on the last iteration.
@@ -367,6 +386,7 @@ Status MetaOptimizer::RunOptimizer(
   // resets optimized_graph to an empty graph.
   optimized_graph->Swap(&optimized_item->graph);
   *optimized_graph = GraphDef();
+  optimizer->set_deadline_usec(this->deadline_usec());
   Status status =
       optimizer->Optimize(cluster, *optimized_item, optimized_graph);
   uint64 end_us = Env::Default()->NowMicros();
@@ -396,6 +416,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   // 1. Optimize main graph
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
+  GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
 
   // Skip optimizing functions if this is a TPU graph. Currently, Grappler
   // passes do not handle TPU functions correctly in a variety of ways (Note
@@ -431,6 +452,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
     optimize_function_library = false;
 
     for (const FunctionDef& func : optimized_graph->library().function()) {
+      GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
+
       const string& func_name = func.signature().name();
 
       // Skip already optimized functions.
@@ -535,7 +558,12 @@ Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,
                         DeviceBase* cpu_device, Cluster* cluster,
                         GraphDef* optimized_graph) {
   MetaOptimizer optimizer(cpu_device, cfg);
-  return optimizer.Optimize(cluster, item, optimized_graph);
+  optimizer.set_deadline_usec(DeadlineMicroSeconds(cfg));
+  Status status = optimizer.Optimize(cluster, item, optimized_graph);
+  if (!status.ok()) {
+    *optimized_graph = item.graph;
+  }
+  return status;
 }
 
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h
index 99a0a33ffa..e599a9201b 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h
@@ -28,8 +28,7 @@ namespace grappler {
 // Run the other grappler optimizers based on the specified rewriter config.
 class MetaOptimizer : public GraphOptimizer {
  public:
-  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg)
-      : cpu_device_(cpu_device), cfg_(cfg) {}
+  MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg);
   ~MetaOptimizer() override = default;
 
   string name() const override { return "meta_optimizer"; };
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 3f3f43382f..79a0726597 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -461,6 +461,67 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
   EXPECT_FALSE(allowed_optimizations_my_mul_2->non_differentiable_rewrites);
 }
 
+class SleepingOptimizer : public CustomGraphOptimizer {
+ public:
+  SleepingOptimizer() {}
+  string name() const override { return "test_optimizer"; }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override {
+    *optimized_graph = item.graph;
+    optimized_graph->add_node();
+    sleep(1);
+    return Status::OK();
+  }
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimized_graph, double result) override {}
+};
+
+REGISTER_GRAPH_OPTIMIZER(SleepingOptimizer);
+
+TEST_F(MetaOptimizerTest, OptimizerTimesOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+
+  GraphDef output;
+  const Status status =
+      RunMetaOptimizer(item, rewriter_config, nullptr, nullptr, &output);
+  EXPECT_EQ(status.error_message(), "meta_optimizer exceeded deadline.");
+  // Make sure the graph was reverted to the original regardless of when the
+  // optimizer timed out.
+  CompareGraphs(item.graph, output);
+}
+
+TEST_F(MetaOptimizerTest, OptimizerDoesNotTimeOut) {
+  TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+
+  RewriterConfig rewriter_config;
+  rewriter_config.add_optimizers("SleepingOptimizer");
+  rewriter_config.set_min_graph_nodes(-1);
+  rewriter_config.set_meta_optimizer_timeout_ms(1500);
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::ONE);
+  GraphDef output;
+  const Status status =
+      RunMetaOptimizer(item, rewriter_config, nullptr, nullptr, &output);
+  TF_EXPECT_OK(status);
+  EXPECT_EQ(item.graph.node_size() + 1, output.node_size());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 8ed4271fa4..8278bf8289 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -325,6 +325,7 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   std::vector<std::pair<NodeDef*, string>> const_nodes;
 
   for (auto& node : *optimized_graph->mutable_node()) {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     bool is_candidate = false;
     TF_RETURN_IF_ERROR(
         internal::IsNodeHostCandidate(graph, &properties, node, &is_candidate));
@@ -342,6 +343,7 @@ Status PinToHostOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
   // Traverse all `const_nodes`, and map them back to GPU greedily.
   for (auto& it : const_nodes) {
+    GRAPPLER_RETURN_IF_DEADLINE_EXCEEDED();
     NodeDef* node = it.first;
     const string& device = it.second;
 
diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto
index 8c31468ff5..143df115f4 100644
--- a/tensorflow/core/protobuf/rewriter_config.proto
+++ b/tensorflow/core/protobuf/rewriter_config.proto
@@ -128,6 +128,10 @@ message RewriterConfig {
   // "gradients/", the default, it will match node name "gradients/foo",
   // "foo/gradients/bar", but not "foo_gradients/"
   string memory_optimizer_target_node_name_scope = 6;
+  // Maximum number of milliseconds to spend optimizing a single graph before
+  // timing out. If equal to 0 the system picks a default (currently 5 minutes).
+  // If less than 0 the optimizer will never time out.
+  int64 meta_optimizer_timeout_ms = 20;
 
   // Configures AutoParallel optimization passes either through the
   // meta-optimizer or when manually specified through the optimizers field.
-- 
GitLab


From ca6756bd164de8a037513e4f15f395fe9fd128c3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 10:29:05 -0700
Subject: [PATCH 0874/1085] Bugfix for optimized TransposeConv(). Was
 incorrectly interpreting weights as HWIO, instead of HWOI.

PiperOrigin-RevId: 216883371
---
 .../contrib/lite/kernels/internal/optimized/optimized_ops.h   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 77f84e0c1c..aa09a0a9d4 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -5674,12 +5674,12 @@ void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
   const int input_height = input_shape.Dims(1);
   const int input_width = input_shape.Dims(2);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
-  MatchingDim(output_shape, 3, filter_shape, 3);  // output_depth
+  MatchingDim(output_shape, 3, filter_shape, 0);  // output_depth
 
   // Construct the MxN sized im2col matrix.
   // The rows M, are sub-ordered B x H x W
-- 
GitLab


From fd3b31c81e1e1d926269b6d6e6142195de6ee6f8 Mon Sep 17 00:00:00 2001
From: Shafi Dayatar <shafi.dayatar@gmail.com>
Date: Fri, 12 Oct 2018 10:39:59 -0700
Subject: [PATCH 0875/1085] Update README.md

Provided a link of docker document to start the docker instance.
Added command to access the Notebook on Windows.
---
 tensorflow/examples/udacity/README.md | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md
index f80c56d1c1..d9c5c2a9d4 100644
--- a/tensorflow/examples/udacity/README.md
+++ b/tensorflow/examples/udacity/README.md
@@ -3,6 +3,10 @@ Assignments for Udacity Deep Learning class with TensorFlow
 
 Course information can be found at https://www.udacity.com/course/deep-learning--ud730
 
+Getting Started with Docker
+------------------------------
+If you are new to Docker, follow [Docker document](https://docs.docker.com/machine/get-started/) to start a docker instance. Kindly read the requirements of Windows and Mac carefully. 
+
 Running the Docker container from the Google Cloud repository
 -------------------------------------------------------------
 
@@ -17,11 +21,18 @@ Accessing the Notebooks
 
 On linux, go to: http://127.0.0.1:8888
 
-On mac, find the virtual machine's IP using:
+On mac, go to terminal and find the virtual machine's IP using:
+
+    docker-machine ip default
+
+Then go to: http://(ip address received from the above command):8888 (likely http://192.168.99.100:8888)
+
+On Windows, use powershell to find the virtual machine's IP using:
 
     docker-machine ip default
+    
+Then go to: http://(ip address received from the above command):8888 (likely http://192.168.99.100:8888)
 
-Then go to: http://IP:8888 (likely http://192.168.99.100:8888)
 
 FAQ
 ---
-- 
GitLab


From e19fa8ab03fccba85c102ad3a42dcbe1160a051a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 10:49:26 -0700
Subject: [PATCH 0876/1085] Handle strings in tooling_util.cc

PiperOrigin-RevId: 216886885
---
 tensorflow/contrib/lite/toco/tooling_util.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc
index a770ff8544..0deaf56593 100644
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@@ -2204,6 +2204,8 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) {
       return ArrayDataType::kInt64;
     case BOOL:
       return ArrayDataType::kBool;
+    case STRING:
+      return ArrayDataType::kString;
     default:
       return ArrayDataType::kNone;
   }
-- 
GitLab


From 445c05746531c7249ee819cd0ab18d601bea1eff Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Fri, 12 Oct 2018 11:02:14 -0700
Subject: [PATCH 0877/1085] add a unit test

---
 tensorflow/python/framework/ops_test.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index c3a3437743..b600cd0deb 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -254,6 +254,10 @@ class OperationTest(test_util.TensorFlowTestCase):
     input:'myop1' input:'myop2:1' input:'myop2:1'
     """, op3.node_def)
 
+  def testDevicePresent(self):
+    op = ops.Operation(ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'), ops.Graph(), [], [])
+    self.assertEqual('/job:goo/device:GPU:0', op.device)
+
   def testDeviceObject(self):
     op = ops.Operation(ops._NodeDef("None", "myop"), ops.Graph(), [], [])
     op._set_device("/job:goo/device:GPU:0")
-- 
GitLab


From a0653f833684787bcba74ee1870b6c81b016f42a Mon Sep 17 00:00:00 2001
From: Guozhong Zhuang <guozhong.zhuang@intel.com>
Date: Fri, 12 Oct 2018 11:05:23 -0700
Subject: [PATCH 0878/1085] adjust headers inclusion order per review
 suggestion

---
 tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc | 5 ++---
 tensorflow/core/kernels/mkl_conv_grad_input_ops.cc  | 3 +--
 tensorflow/core/kernels/mkl_conv_ops.h              | 6 +++---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
index eebd788545..c1b182be4a 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -33,14 +34,12 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
 
-#include "mkldnn.hpp"
-#include "tensorflow/core/util/mkl_util.h"
-
 using mkldnn::convolution_backward_weights;
 using mkldnn::memory;
 using mkldnn::prop_kind;
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 1f9e6abe44..786a30bb10 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include <algorithm>
 #include <vector>
+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -42,8 +43,6 @@ limitations under the License.
 #include "tensorflow/core/util/use_cudnn.h"
 #include "tensorflow/core/util/work_sharder.h"
 
-#include "mkldnn.hpp"
-
 using mkldnn::convolution_backward_data;
 using mkldnn::prop_kind;
 using mkldnn::stream;
diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h
index ba0fb48b7b..e6989d884d 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.h
+++ b/tensorflow/core/kernels/mkl_conv_ops.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 #include <memory>
 
+#include "mkldnn.hpp"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -38,12 +39,11 @@ limitations under the License.
 #include "tensorflow/core/util/mkl_util.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
-#include "mkldnn.hpp"
 
-using mkldnn::prop_kind;
-using mkldnn::stream;
 using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
+using mkldnn::prop_kind;
+using mkldnn::stream;
 
 namespace tensorflow {
 
-- 
GitLab


From 865b2783aad179eaf06161d016a42fbd4c18bfb4 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Fri, 12 Oct 2018 11:03:07 -0700
Subject: [PATCH 0879/1085] UPdate tflite converter diagram. Rename URL paths.

PiperOrigin-RevId: 216889190
---
 tensorflow/contrib/lite/g3doc/_book.yaml      |  17 +++++++-------
 tensorflow/contrib/lite/g3doc/_index.yaml     |   2 +-
 .../cmdline_examples.md                       |   6 ++---
 .../cmdline_reference.md                      |   2 +-
 .../contrib/lite/g3doc/convert/index.md       |  19 +++++++++++++++
 .../{tflite_convert => convert}/python_api.md |   2 +-
 .../g3doc/images/convert/sample_after.png     | Bin 0 -> 185267 bytes
 .../g3doc/images/convert/sample_before.png    | Bin 0 -> 155610 bytes
 .../lite/g3doc/images/convert/workflow.svg    |   1 +
 .../lite/g3doc/tflite_convert/index.md        |  22 ------------------
 .../g3doc/tflite_convert/toco_landscape.svg   |   1 -
 11 files changed, 35 insertions(+), 37 deletions(-)
 rename tensorflow/contrib/lite/g3doc/{tflite_convert => convert}/cmdline_examples.md (98%)
 rename tensorflow/contrib/lite/g3doc/{tflite_convert => convert}/cmdline_reference.md (99%)
 create mode 100644 tensorflow/contrib/lite/g3doc/convert/index.md
 rename tensorflow/contrib/lite/g3doc/{tflite_convert => convert}/python_api.md (99%)
 create mode 100644 tensorflow/contrib/lite/g3doc/images/convert/sample_after.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/convert/sample_before.png
 create mode 100644 tensorflow/contrib/lite/g3doc/images/convert/workflow.svg
 delete mode 100644 tensorflow/contrib/lite/g3doc/tflite_convert/index.md
 delete mode 100644 tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg

diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml
index f6ec387ad2..05c65441c3 100644
--- a/tensorflow/contrib/lite/g3doc/_book.yaml
+++ b/tensorflow/contrib/lite/g3doc/_book.yaml
@@ -38,15 +38,16 @@ upper_tabs:
         path: /lite/ios
       - title: TensorFlow Lite for Raspberry Pi
         path: /lite/rpi
-      - heading: TFLite Converter
+
+      - heading: TF Lite converter
       - title: Overview
-        path: /lite/tflite_convert/
-      - title: Python API
-        path: /lite/tflite_convert/python_api
-      - title: Command Line Examples
-        path: /lite/tflite_convert/cmdline_examples
-      - title: Command Line Reference
-        path: /lite/tflite_convert/cmdline_reference
+        path: /lite/convert/
+      - title: Python API guide
+        path: /lite/convert/python_api
+      - title: Command line examples
+        path: /lite/convert/cmdline_examples
+      - title: Command line reference
+        path: /lite/convert/cmdline_reference
 
       - title: TF Mobile
         style: accordion
diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml
index eb32d3e94a..44ee6ba750 100644
--- a/tensorflow/contrib/lite/g3doc/_index.yaml
+++ b/tensorflow/contrib/lite/g3doc/_index.yaml
@@ -132,7 +132,7 @@ landing_page:
         TensorFlow Lite Converter.
       buttons:
       - label: Read the converter guide
-        path: /lite/tflite_convert/python_api
+        path: /lite/convert/
         classname: button button-primary tfo-button-primary
     - heading: Deploy
       icon:
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md
similarity index 98%
rename from tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
rename to tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md
index ffb73b77b8..44fb4f19ae 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_examples.md
+++ b/tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md
@@ -1,4 +1,4 @@
-# TensorFlow Lite Converter command-line examples
+# Converter command-line examples
 
 This page shows how to use the TensorFlow Lite Converter in the command line.
 
@@ -306,12 +306,12 @@ Sample output files can be seen here below. Note that it is the same
 <table><tr>
   <td>
     <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf">
-      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_before.png"/>
+      <img src="../images/convert/sample_before.png"/>
     </a>
   </td>
   <td>
     <a target="_blank" href="https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf">
-      <img src="https://www.tensorflow.org/images/tflite_convert/tflite_convert_after.png"/>
+      <img src="../images/convert/sample_after.png"/>
     </a>
   </td>
 </tr>
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md
similarity index 99%
rename from tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
rename to tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md
index eab26f5cb2..d72a46760d 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/cmdline_reference.md
+++ b/tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md
@@ -1,4 +1,4 @@
-# TensorFlow Lite Converter command-line glossary
+# Converter command-line reference
 
 This page is complete reference of command-line flags used by the TensorFlow
 Lite Converter's command line starting from TensorFlow 1.9 up until the most
diff --git a/tensorflow/contrib/lite/g3doc/convert/index.md b/tensorflow/contrib/lite/g3doc/convert/index.md
new file mode 100644
index 0000000000..bc92a1c1a1
--- /dev/null
+++ b/tensorflow/contrib/lite/g3doc/convert/index.md
@@ -0,0 +1,19 @@
+# TensorFlow Lite Converter
+
+The TensorFlow Lite Converter takes a TensorFlow graph file and creates a graph
+file used by the TensorFlow Lite interpreter.
+
+## From model training to device deployment
+
+After a TensorFlow model is trained, the TensorFlow Lite converter uses that
+model to generate a TensorFlow Lite [FlatBuffer](https://google.github.io/flatbuffers/)
+file (`.tflite`). The converter supports as input:
+[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
+frozen graphs (models generated by
+[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
+and `tf.keras` models.  The TensorFlow Lite `FlatBuffer` file is deployed to a
+client device (generally a mobile or embedded device), and the TensorFlow Lite
+interpreter uses the compressed model for on-device inference. This conversion
+process is shown in the diagram below:
+
+![TFLite converter workflow](../images/convert/workflow.svg)
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md b/tensorflow/contrib/lite/g3doc/convert/python_api.md
similarity index 99%
rename from tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
rename to tensorflow/contrib/lite/g3doc/convert/python_api.md
index 71a38c7bea..9dcb79187e 100644
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/python_api.md
+++ b/tensorflow/contrib/lite/g3doc/convert/python_api.md
@@ -1,4 +1,4 @@
-# TensorFlow Lite Converter & Interpreter Python API reference
+# Converter Python API guide
 
 This page provides examples on how to use the TensorFlow Lite Converter and the
 TensorFlow Lite interpreter using the Python API.
diff --git a/tensorflow/contrib/lite/g3doc/images/convert/sample_after.png b/tensorflow/contrib/lite/g3doc/images/convert/sample_after.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c451f97903f7f70a9f28dee8abf6daeb7ec5693
GIT binary patch
literal 185267
zcmeAS@N?(olHy`uVBq!ia0y~yU~Xh!U`yj*V_;wqySy-vfkD!>DkP#LD6w3jpeR2r
zGbdG{q_QAYA+;hije()!*4kO=lVmI>wf=uBQq7{#Zo#+Y=Dmde{r8>2f=!pVZ{D^h
zFU`=%n5p1ni~Ekx_Wx_|)_*)+bzUzzcj@#;KPOkeEBxU7dA|Mp>kmHv{#~r`<4ft6
z<&XP<_e}or!e0Jo&6(++D|wqAfBVaAS}!}_UcS7vwyyK-YnS-^Kc$D3XuH?_{1x-t
zDzIz*<A<;BudA=ke<Hu_^Vj;w`B&TiZao$MZ?5*i8^?YIm3&+>!KPk3R&`_3l}i^4
z|9xBeL2>>o>q|dx2mPyk6Z1Rj)B1zIzunf~`1$ecieHu@@3gP1`8{2tR+j(U?;CqL
z{_a@x;qNK?y@{40{~ynnPbz<(_kG^^cW!zvt9GpS{kwJQcWL=KEk=9Sue8znFTMZz
z_sc&sa|$PJ^fgnrKiw*)G;yzDyR6%ehkLGzig};;c;r-ekM^?iHA|(cWArs6?H?ZV
zlG>*1Zm7FynuW~%wW@nwe=FZBDiwaO(f`<ilT7!2-0uH<_u&82=eG;rU2Vm2aJ&B;
z4eo2Ap_g@>XI{Qv)78@QTQs=(W3BzW_FW(CRm>(jRww4=6#V#Ud34K%z1uFGE`C(}
zZ1d;&*EmY+=Qaow<|Z2|PGpRC+7!^V-td}0N3ybg^VH*09;+}P)Oq(nZ>ML8Q?Kr;
zTQ$quzTeFFSS*pDC^(I4?WHRkD%$Cb>$38$T-cCub7thmth2j%Cwb|Gd98GnFwI)E
zGfH>swXkYg-K}@lObv_9-rFlz&2(tZ^tjmkz26OYym)Y>IiKIo@K}%WS)1Hxk?FH*
z`&@sn*^@IhG(5VtSGN4tmj8OYcik<!|G33jb8gL>DZ&2Me|x;w-YxmOH9Bwa+i%wU
zQgv&#zV(mf-k<Vr{${a#o0j!F{l4>;ZMMw1uj_+Z4t8p$r|3L$IJVlz!O{IG&y91I
zTE(YkzMr^kgU7WdO-pNy<P?MX0&m;$cmAHYv;NHN&41*d>i%1O#&YSq|H{RFD}SH5
zbN}<V*=yc@lu+HvvXWagc)s3&W9@D^f7s>zzkIEk8EL!m`_4-~(#w`UkD9&s+{1kC
z;)@!O-yH8--WMBrK}pW~W2@mQ6N#nktPfll5PoshHgfjeNT025LQfpaY_$-)U-5iS
zt#H`s+UYjOVqeD153`;swk+hkZr8H4cPkH1e<Swe_QB%?))70xUMvv4$+=ZE*rWLN
zOe3G~E?4996u--<-H+coNzhK{dFMNyZQE|G3p>WQ`ixxk$y*EU|6aS4%d}2Ca7y7+
z>F+&X=lL<eo%K%5J@sAD<lURwjq`QfE~M`&J$stZ%+V)Ve_x@(Z0VJa0p~iyZbhk{
zxx#TguVv$!*gvJdEOyuVjHf#tJn+DHKCkGOJ*VEyi`u7pW68<hY2C^5f}^+lewJX3
zUw_}c+Wge3iRT((U(8~>YifVenc+}q=FXs-TT*YQ%=C&$Zu6;ie-s(LVX@YNr3_||
zHU;0E+Z!OGy0|9IVrzN5_nq%7xA)%cn#tB<e7)$HRb`HK%x$yR(hK9ZFPd3*(@wr#
zygJ0HR$Xk_{Y_U+F5S=18lV)qQT^^kTh5Fn+0SH8`OW>pzJEv3i-{t#`K4!eSD*M=
zHqH6a`*rOeN2<N%ZV~>kVb&tIdvj>gj9_iQM)TQ^e_0>CGv~<s+mAKnQcm{%`mU}M
zXm^oob#Hb{cy{g+vzv2Ub)LP^%5c84F#XlHYXxq%(%0=4n10dde@6R+*tN&GTxE}2
zyq)xDtMVqr=Gv0@rLJD{J9}4s(A%#dp~Y3QzI0kZutdR9QT?gRaa%Y;w!}&0Gq)~(
zd*a#l&u^q}ytvkPiS^c#o8GfjWJ@|HFL;0BTVnE3JykZPHQT?q27h&YzTw#0<-3==
zDiy3ac9rQwDMye*&E;@0V}6Gc3+CSK$|;jyPI5YZ-r9!!{F5bbp6?1ixW=LQw&dSS
z@_Xl>opelxA@aRx!(o@1!L6KS@(U^_&tfbvvr>{<WOcm6>2cDu9TuJE9JCk;&uaX+
zb?ipWf{XQ{Pj03&Ojcj_I{a47T>tAu<uli^9XAzU;rM%s<dt=`suTK+S4L&T@Xunr
zb|pRk!QBQa9;+ztBh!=8m@jZX;Ag#%&YpW+#D4y+TUB>=X2)M`;azxX&Qjm4yQXeF
zs>>k8>+^3n`^H;^ji*?y_v~X)*X-sBWN1-e{Q3Q|o40dx&u=>ER}t>{yUp&9MYGb^
z1qKP%?^q<9&soYf)kws!e~G_O3FpFWcbDkvpVs()4Qbsu@5b6ntA)p|U$|dx$z=A6
zNu7Hu<1I#JgAjI>L#(HJt2}co92h^8%=zqa+$G~)NMr3az9&oqddhsK_QrPCUSG89
ztC#81!i3LuN7n3pyVUKg>6`9F4nG7JSx01vl&P^hHaacPKf|cv*y>R4w4KrHD)U>0
zlkXgub(n=F<)jws-9OD({N{o;NB9ShX|pao*D00ZTCa3)H)Fi9(BEr}*OF=k?^g3Q
zo}6?y^i0~O*%Qx2^A<jgsARd6bXX@R$w_Gb33f(-mj^nv@*`Q+@^A3xdcQ>_Kt72x
zYW<u8lVUQ~Z;w27Ztm)i0<OP?2MbHG87K5^JNZEI>Z})e1r2>}nuUA+?p*QoS=dsc
z_KWgQmPy)j9F=?_{Jf(w=>7t6U(er%ud=Vp%lVjbzw%4KuWu6-9x8m4&Yk-#bAiqV
z<{+zBUcF{{jpm$!TWn3)`xOhdEIuDfP^g&p?T(23h6g+*?-><3vzJ6nbPG#7ZTY}x
zZhvlKy2544l_D=@uba;$@rHlTN`;%dOt~iYEs}a+J8jXt9|xE_E^lBK4PAO`%86VP
zUa`(!%!k;{Fbc?Sb$AkM9<j^5j)(v48fO`XopKiy4K}F9zE=3cvF8K3RC|g*N=%vT
zFN5%`yK+~!A|Lg%9-0!8$U5PrVKdK>J&Hn7Pdvgld1S~L?=1<7nvt}eXJrJ-g2i8x
z9Bj{orilMk`hV%?g5`HJpSZoS?&*HQbSUyC>&oLEbGhF0Z!!~lWpzaIo6!OLF2ygL
zf{k-0ty4bcv@-6?D__N~;8%BchB6*|eSFr7wKvp7-?3z?<S+!h6}ToBz_Rn5!_u%#
z{>4`ex}99+F`jI?*LQD1#TVTfMQT4}yP9PJo~|t28hu-?lDmF$=mfji)CY^i4=~u=
z(YSQ7bk(;5+!?|@wHLZLTdbNK^Y-NOXl|h?9eS@i#6L}c^6c7^8ab0DwikkaJsB$Y
zvtNXBNY1=JZS||97kcRq41oulG#{HYPF&h~K!!_T#Vv-Z%Xeo*7Z^QHy!up5B&uoM
zgE|!r$L|aVZ}>Uo6VJ75NIthkG-lIMKgYXXN4Gi!Z^>rf*-+!M+G%UZ%D@RJg<@N#
zI!+0%+Pyfuf#b_`-TEKTUL9fm<#OAbKR~rk&xBh=Klh^k+{b*g7InT^D|CO|r$v1Z
zTnAT`2;RQaYpe4_;+?kti-dDsGowzt{{5hG(;}9N;|&4-6t?O2tA3MSoS?4|{gQ=`
zrPrrGPG!#Ku)W>O9OM~__PkY?P`q$C?*q@NKZCwc+Zm89-@=!{@$@0X3ulcI=EWQ@
zgL5}+t*~9tz$YMkwRZ`xn(5uYZf!30#)@xmN!&Qql<+%)+swn@xsQSVTJ|DI--pV&
zwSALs@Jw+F75>>FYJ0&{x5rBFMnc#7%+HZq*G}enGyU?3X&N%k8u!&^JHJ*F`LJki
zM$svQiGr847#2GVPHAJGX*$*RV}!s9HrAI*lxO-z2AW)d{<7f-=S-zf%&S~<rYOj0
zO;2$7%)L^Y!*S{-#WD{u7uky|S1^`2K4E$yW@}~b|3odNJ%P!1r-{#-jhnU~`pkRw
zVA+I)rOB_%Y`p(m^p@N%)U{^rg6b@@pv({lZn@(Zr&KXVzMUb|VmN<s@`Xz-T8F2+
znYS-zy3>9IH_!HWNB2+HD4%j+Aw$6SFE%$099LL>F|a_DbMKe$d(GnK#`c_v)UdUw
znDo=NpSLMx#;jK5=7ba_<84u|Bu^Zh^UzS>#)AY`xhujMs)ypuS}*<(o8#nlHDva#
zJASVmUsar#WfEe4M{7=_Y1099WiOL03$jm5VrYA-e8?++{ml_J&qEtdCRVeFzU~z|
z;NAG=fKq09`IJ9xQVVO6HijxNNX&NPUB0Ox?S|t9wmDX_ekooq$k?kSSRCuY{c6V9
z@7f=yYGtjA{kBtj#=Vfz2VW;0-^ggj&G7JRoXm8#_l5d3|CF~pv9?gvXgqT)r0FK3
z?JVgN8u2>US>3Zjl;85CJ)6sQH=gUBShhi-qGr@m3k9)#1xC+~UhXO|oSN}+vAk!6
z`?{6!E~b*tTyH4-b*nvY64LMZ=Z>p}0jJo0foq;eFSC?r++-;;Tz~TW_WL~FW0)NR
zbNsbW{JC&`f92FAjx(Az#4>rz>F($ae8V|W@!$7_SF&1N-K4Yb7j(=@+a?vIpyA+R
zW62|Z+u>5+`YkFa?ydW@=iv3J3Db0HJ0=%P*WI^OT`|Wk_sERB8y0fSRLD|k+;DMc
zj_f-QjV@JAEn&%ZoVu<5-0mGxTC-&RTz7{^Q_}^{Yb2}~m|k}>J36lHbO@K$xZf4g
zTy|Pnm`BjZJ78gsR&J8TN7rh(oxNSM@3j<5Dw^1p7sQ$FWlOklEArO~`^_^_^ct>;
z?74m0yu*1_#HD7Lw1nx`4w*68PW)5WdwxT8fPKa-u{Cn45gXOE<lD1_>YHCQOs}2P
zl36u**}~f@_0_fx>Bs)*yPVtUw87D8W#qnTyP1!e8Pu>#E>%mIq;-p%b%IOM$*iE3
z;P(w3E23^KJYr!I)wPLR!y%%^f?M`y+nf1YbQk4!v~ApEc0PIA%&Ub{O!Gv07d&j9
zrNM8pMl|!O$K##{7w&!8bNs@}fF{njHupaVd{{6sQ!X-_`NE29C5FEI8EU?nbzIUn
zn)kG6&%bk+Pef>g%IT>*hHC@w&lXc)TAZiy!*$Jix7LZXx}GrGv%M8gvO2YOJLBnw
z%YHWuR3v@(F#Jt(TYgT%LgkY5ALRot{svjiwY_CmQI+p|c<nQ$R)^#6GJ0W4B3f*p
z9Dme0rQ`gxyGte;#!S$z=GimB<!;BK6~_)7nXvVh#%>YSa}&g_sIat6kkc-4F}NbQ
zL|gUg{WC{PgwNE~vUd0v>F9XRR6cy$?Q?1GCV^9t--DafpP86nm~_sxVE;Dfh)E0d
z6VzAq_h+5tJI%CEVMg+1oeCL$#(6rOH#757gij`1P8GMBx$9+QQJ>0V|Gzcmm)E(i
zVb;AmCGbvZ+ZjR68=()PKYQ=%ob^m-i{bxcYcID&vi-m2_W#mi$7iaRQ$(ZuH*@~a
zs?mMLG4V>DMNN#RWk+|JlE;)kDZ#bR=5inK_m1wCn07^$`;~^@jrM0#bdGWdUumej
zsvxYr=;NaXYsbhd(;eSF+j*pE;=0!Iy}Nfgx2?D%&Nju%VLS8L&=Q}v#O|JT*Vb%1
zaKmuHOZm$(KX1BD&Hm&k92mm4WkCSjZuiY>28?qJpG@&*)Stj46=r|`pjFA?1zi^%
z|EapE?|Bit_x(AU_stnjmIcCXndh8$2Su=TT`~|f->mV&c9Po~RjJwu>0YkepQX7r
z?b0h_ke|?Cp6%1ybAy%js&SOqgGHAv{`$M;Bx{t(caw*)VLjJxN;yw%oftcXaY8Yt
zruJQl(%Y9@E&Y4>8f8w&{MBZ#uMwFlls9eefny2NZ^Rp&KBuq4(lPCb!vg*yfyU-1
z20RX`9_ME+>~Ys}Vf-0b>^4icM_Y2=Dx;ldn~o)!yw&8;-@sPpl=*BXE5q!(J?(lH
zFDBe){BY&xg!#hdEgJKs!X7a_joLlEqN)4C#3^<h3?C%+9N>Dnpl#Pu(WvLEwkvO3
zIeFe+!*;`f&`c+p0}+*}4C1;yyB4l0cvRBw`D{6Ngm!qc^J6#FHw!hZT23Vj9%0Mi
zI>x`|)?EjQMaJfhc?X`aS35QRh2JIqDPo2Dc5N2Wmpmst(JUY$wET|>D~H;{Ide@j
zMEXB}%RT&m`;z4yY(|EPOzn9rN(G7%EY?ivN)uMBeaA9CcAvb7!r2f-heDIP3!bJ}
zX9`=1ocA+N<C=DQN;&(Q%k2}G%&G!*_I>wS`@*y0k?8*qj?O_fis#swKVH#%;p2Me
z&VgkMK5}0Y$^Vn%AHQSHGY<QysX9!57HbKdR89#L*yg#JqyO$Vsl|)0e(WeoVKVpG
zq}{lj!7+N%A{obL57>%cDeN~<$?2(SQvNw9yn<Qa*7n#I{u!p3d-HnZ8=iKzOxexV
zC&7K>hWNK^w-TSAQ2X}2v^Sxb(?0Do{jj{U$;X53y5+)UQCl1*+%Ng-72JG+`LgrY
zpX+u%<;dtr?NVmDxV3GytN(N{E7<^LfnRN<yIQx-cInDqY%(Rk<J!cY!h;GC8ae9!
z6V@Ki_A`1sHKgU<&$ohF>sr|kg`0WtP3hOGT+(ynkNo=;sxdnpB_%sDr8Wc~Y`k!}
z<Ffb`&r%UThw98Uw(bil!OUNE_5VKM(P>j&;Fh-Fc$d6%-)X5^9vdFc4D?i;`Yyug
zF1MclmAx<7mc9Hr<C&?H59{Q+HPeN+H*Q$)rLgwXBf%xpE0ZcqB95$kti8BRVv$MT
z8ZT|hDNMikdDnRzD`vFcqBEf{W9IukC$xH3eDVD8Lg)LEy?K6*WU^b5**rg6ir!(6
zZCLI+dDAZM(*<i9(gcH?7-qK5z3B5m-tFi^UcqyzZ%j@1ax1?Q?3&RbeyKn2@LR<b
z{0G)n3(mN(@ONhF+y&A#hntq2PTZ}q+1ym{YD*>OQ33V0kCuMbaChDl)W-Xe@wr#k
z#98IDyNcto_!AZ^ZH(I1W$&2zxpA5F#kj*?xMt=?iOuDC_}J*9^Na5C=&2sAcN-0S
z6z16U<|yANjn?(DYm|Az%)8*wx>+l4nb|I5%9DMO>M{9A5MLPEiwm{e<U~r1EZqyQ
z$?fflTVeBH-QVU*|6We@ihp34I_asp%(v|49$^vlCT6v`TZQiJSRfdts$(2~GCk`9
z`^?2ji#b<6ozz~`<QR8jO^4vs<u-G)|8$8?-0{7sO+%LXVE}ilTKBBilQ>SO7aKAZ
z3%&J`K6pH&EV97-*>W9$bssI)*+1ALEu=WP_F=t9xNOjw6RA6@_MJ)eI2R>r<(6?b
zVuzT9&9+5u|1#2(e#><<tY}=t*d4I0hT(}rp{j*$6~lyS*{AH=xU_GXifZqAzdOO`
zO{v!x#%kjUOsefgwl_ScwU?}s3Ym0GAv5dJ)(SPj3ob4R&l+8r89p>^t?3bRx$q|b
z<-_^$oC*pHJ}OqtQh4<(W?$%`2Oj@s>Cb3gdQWk>h20ws`Fkwdww`n5Eo@oYeusJ0
zv`?3KmMeB0`^vK8upa9kW*x(Y9_0zUGYecT7pnza-NwCBjzK*Bfp@@^-8V`lv+v3u
zTe946>OCh`9?AL4E|2B(nlio4D_lG-kuuT3iE(CHpOmHO^6ncvOwwQMk5rY;z9ela
z`tgB9kKA;zoC?1wUycN^Of-1g`^dw8UR3$QMOpV&RQ`30|MSaA;H7wU&T&Px2}yQu
z-K-``Z8=vxOZy?eRM~0oa>h6Zm7XHU=pVeHEeWx^b{BVa*#EOq)Zcw9B=7Rw6iI(y
z{*@9bj~k1eZVRe=tX^<d>$sq;@5Y>nmU4f6r@4L=;fkM5{`5R@Naw+QbH(Flvui$9
zRy5^J(%dgKCGPG=b~Tlo+ur?OoH42TyS?f4t6%2Db}xE(hVPAX5Z|Md;Y=JJ^KK-t
zF!ja%m)hU^M>p=BJ-2~UOnKIii%yp7=gqxv=K7qHJ@c$TEDU+jQQo1h<@fkoN{Pg|
zidWwPFA8fEc=1b3o{^X5-!;>iaZ~kf+Z*eag+%z-bzX8-miBR&`jXAyR>>WK#mk(z
zB^~c7zAt$!`GPaJIr@mt@=c{5mmU0*vt3d6LG;SY6MtXkk=|K7!S`{D?M_FgK8vl=
zml9uxnO)rzoSo-el`G;BbLlhJ*S>wnKObD?uJcFUsiFKnpGbY$rGU69FXdj%gRXlQ
z^)+{G&b;KFo)LCRx-i2->G8UYw+k=$wRrn$b(~zu&K)7(+RD+W>iPAExvb5r&X$Ir
z>=O&p%Y|kgy}l^&MEbF3zpp5>7lvgjlvJ?_EO)d1eL#dEWS_>*vZw=BKG(f|Vd%@2
zHkWnhX|H8+osMVS9<<(BBdWA#L3O}~Dv2tGuC<8_%lIRhbp+=WoYS+=bT%ngb1Z4R
ze>T?khHFOE-=2wGX}>LGc8JvKU*J&@^iP_b8^rBj#@SQ-+wjr5{(zrX${)Av&8_U`
zjyX_O()Ye<&XV8LWTfiq<gNt#e`zRFG5IOm2AQ)ZOD9d_6}fpK*l2z=U+Co?=4CRU
z+*_4pWQ3TiSJ!zcUB0kW=2N?bd<n0GYo~0CMP1z<oAglC4|6t#o_IMgQ08~a2M=wQ
zN~i0+iUwsFG5rTD0<^EL`5swv+m-*gM5{1s(1QZ!$0wd%m}VC9u;lqprM`=kIdac@
zpD8UH?E3d;$>lEI%y~S#35<5<nOT$^zt(zYe1Go0KJ%WcsignWhSQGxFL{zYmp|Ur
zuf|Z3mo@EOz9sV>PTQ#olNU=na>$kHKc8Xwu#{hB^N*@L>3X{la{~FR0~fZi*ml2p
zT~+Yh&E>Jm>fmR;UDhw$*e{oBa@{6IqP8br=yX<#X_H`uKoMKii!bHkZ=D#vU;MC7
zi;ZUm&-uBL{7*{m+H6~Od`H>+fG3CkcK$YHE^#hTD!c0IVH;_2vR^Ln#IBW>zWkXj
z$9~CH*>APF+(i}#nbj5N7m4>?nBQ6Sf7!B+PP=#iXBFe<7h2!{E}VgZM?KToIl$A|
zSs@_E-_6s<m4TsR&eZAb5y2<L8lF#6QBm3Az_O&}p3BS7tPDn$C0u4sLLM55DlCH5
zn^qi%3fP$78t$5t&6u(=QNKIkJwLa`sa+8p7JgFWT(J6n_514Qzn|T+Pd_)u^7);e
zldTz694|QXC?|xuK_+mYikZm-OGV38&ngxH2L_P^+&&gDD=ie%)Px))cl{5(cemkM
zz`9$fYyaN)AMZN(`1B%1ha`pNAD7&bIl<0&;O>qjPdkPdxfSNKr|Gx(v;;Eca`Fgl
z+BwH~${WS!CaUgQIggSfCj>JFEG)8^#KUmqNueS44#^AsQLHKKg8mv!cX)12*M9Ip
zp}x&VOI=-j(=OAL*IVBIN%?a){niG5KK0}oQ+H~(sCsCKrWY^!KmX~E`;#W?O3ymX
zW46jcNKE^DWZd>OcQ`DA73T{tb#-;V)>PwrBKXGXo!$!E-V$5>ADFr+;Vc6)!<18x
zSzH~=8gjHw^>xWy`Mr$W`No9Aw-+lC?X2T0#h!W=CA0rs+**@aa7Un6rEBLCv(!y%
zS{fYZOS9B`n{a-al4H#zBfl+c9cD<dM)H2L%?yh<vf%7zB}VP66K+b~Q;+k^w`9Ad
zs<_ch`izac)fDk(ssfu9vsoI&3&<~%>GpkCB(m|P)43<!>MndoPd;X6)&6{_>*u1D
zgFiNU71kxZS@1Wku<XnN+iPde^G$v2tGL~Z`%&AmZ_f@NU3NyC{n4Kp6{$Hp%GA_t
z+$5RvxRn^WSs2wO9Gu2h<ftaF?VHJ$1M#Qk2^?S)n!xzuKwW^biIV3<<)-Tv0WANT
zj<8>PSlHMVXQ$EdOaE}x)2nhzw5zy9ls7quKamk&-1IPiX3~_5ift|*Sh8B|&z@`D
zn08q0{cq3XA@Y|5n>!tk7$k9AWV?R6wlLpDDDH1!@gduS^9y(EyD7Yj<86z3%j*`t
zLvL3eN;=6u(fFXn1;%6^$uoLyf4?d4esNWl|48kE`WMpa4}KkxD{nMreNb1B7rx@J
zUD(ZnWB0QxUv$55a674d;$O;ZdF6o3p~bS_l-6JQzBS<3RI?b}YX#@~D>v+0Ezf^X
zxo*o5u_X2ln{RNwvANb`%Ai)plYK-l(fx_d9<zBG*N$Dt+1ORFX@UGh|IdYN&!>d@
zz6yAL;q8aY4g60&bANuS&#fJ96RUJa;E6HwgS@lpn_TZ)Ts*^%udhIS>ifT^);{HR
z-nd-3hx>j><#EOjI?bng@~!ypH1m}+I@EvnP5&mP_j@UWOy=WLDy&|zdiziO54`el
z_9FJ42lrBWjf<G_4oLrKP7vT*<Y?o;7Mw7bwP3E-0TTn^Gt5gD$mp<YAB@@{n8Rp&
zAa8@bNkduz(;P-|N9i4W?+$If#2MER_kr(=GKZ5>28ZlL=N>`sBYcHUHUjY~5}htd
z6H`<~Pdd1C@XYKy<9=$QSBmZ?$(62k?tIGX6Fz%b7fDxklqj#C=&#Wh6qtKOV5`gV
z1#%e-QjNzJPrk@w!msP(ec|YftSxMJTdyrvztH`nbPIEB<F`bH;G+^VIMezo6LfT>
zyboS7<TR7kKD=s!R1W_&mib>o*D$|6X!povPM>jM^^Um1A6VveKYzISBUg>?KIZku
z{U3&ZY_HJ&H`PH|Lf{QY9E({ZlVY<$Z-Cbdr5t4$r8#Zy{nQ`IPV`6-SljXL!(DH+
zG@*0>?T*)-)t$#X-yS)B#7?MliTftypA!~&$^_Z1oU)~DOLs|6N$VBQUketkN?P-1
ziO`}+i<E*i!&U~}Tv-(IwChyx)VfV<H=EwL_Nje7arvb8iRhEQPfkDaE3$W;oaFj(
z;gMh?&CeREHLEq2Ykt?r4+#zR4YdtDzM3oa`Konccb8ZBeO>s<eU;eNX{Q$XEjqQL
zd*$tw$5(D&nV(@YOZDvhX-=n?>n^)`$?uZf1-lg&uez<?c(o#{_{yrQudc3Ju{T70
zb@U4M75;(xLF<F;7t}@Wb#Gpixbono2N`Fst+-~frZ9N1|KxQqS4gg%xj1s+&fw2`
zo*C;-nzl4-gP8wnm+bCqo@<Yn^yImvU)=fS=a;o#E`RaMU9@egS+sTV?wIQBWvk29
z_f6~T_FLq)&~NKG!MS{F$2O#Fki6NW+Vi(Zy2oFc$DO6;L4Slv()kW;<$JIDu6(`z
z^!2ot@2>7$_uI37rTCiZOSk*1cYMFkZe9Lr`?&g9|JeRs{$;DY)xE0!NuQ4N&iW^^
z%vTOvQ@EDUc2KO@o2|CZt!-gj(qYp!b>8Y83AcZ4Gu^a%`lL#wFZb5=wjR^&>6f%m
ze`UZt<K~&GXM)ef2{TVN5T1E5ZF1P;x?t_gESHlm?_Ku&tVWt#+RHTUGf`)%&i0-?
zKJ&cEdgJ}cj@v$L5Qtc_$s+sXy-TNzP8Kz$Ha|7ij@8~4Ix%$aRom=YTj!U~EA9Pi
zD!r(;-FMpAs@Z1ISFT+RONh>`zR9<>c(d@WwcE4hT_v7cdYP<ByW(5Ds<xrFv3Bm?
z!xwulPF&pWZr)R8d923g)Qr^RbA3NwJ$N<Y?S`ul*Daowyd!yI^5n<a>e1@!g125a
zO`lcV@mc1%+w*X}GQFDVyn5SXUaor_BfewXj=I9)^2s~i7QQYsFZVBve_nUl`L^=0
z#fKixQ9o#Wp|~%7^K*B7^Yu0B|Hjo<->y!teE+4+>{^yi{hMnW`)hq_E$n93TGh<l
zw{qX?8tXqke=O=u>-ztk{Zskx_MiD|H&{~G-f&e&z7Wvhv=R9+@9EK5N3G;qSX<eq
zvTkL|W>sfd&s!&uBsEDeNc@uYD=91GFBKAsI!ZZ_H+;7#&OW^^^?Tdy_VZl!qK>^C
z-4W6sH7=P23Eec|lNGni`?;e!P%LoKina`avwd-Lwf4uf)byYAe?0Qh@bT*7<wvcK
zo$IlccJH4pwcYq#Y)!(SNk4XTUgO-<GR@`d5zo%8o$n_%P12fdrPedkAp6kKl)ov7
zMaf*6s)ni2UiXhY=+ss(^DCMgV)gP^%IcG?lh6BKoIlC->4}i9E9b0a58_<uxzak=
zzIc(P>70EgWw&;1iPP)e-SxZa`T6LBub=(?p#9VTa_uGU%kxuzr0AS~A@+lF6^pIY
zw1TxCLenCi9Xs<q@?zxe&1{<{Z~Fdh#giq^WW+K%H@Pe?>Mi0fzJ0XpQBa}#yzqOc
z_QZL%q|6qX=r_&mS@z-nH{M3sH}yBQv+iTP$|}E1)z3O$MWlw-9+SIezZSjp?{?~{
zo85T+!MTi?nMTF;&a`i4O3R(sd&Y0}U8CbUekbRg+qtl~{LJDrr)Pan-{@SJ`tkNp
z&(E{Hc)XuYv-zj`W%{n5twEkI&$8a*Hq$#I&LzH0uPAOy+QsWeaW`9w+CQ!ObYApM
z7x#Xvf4hENZ#??4J9_#K-J2Vx><HQN<(Sd_RBJ6$?QSi3efjNs>R5|iZ+30hUcSyI
z{%O$GLebQzIibg2ow(YYedfAO?B~k032(o?I`C@mYo2SmQQzxq^Z!oe{U`k`$K+1L
zzK@sH9xjWX?QDMidc?JmT*W-i8<)147QViD-==Q$-DUgN*4&={cF|i|>!k8It4i0e
zy_>nA*i$x2ef8^mrkUZl-z~V?Tm9yH&b`w&qWAH?nf$i;U-hf^=HJ}k<Xb){OgQh@
z-Ta#GBLCE@%3m+Pu9jxumF+#*`?mcs|8|o~bJ<_XU*vDk_MNr5|AkCs(UhVsPhK3@
zxG1^y_|fClekpS*?ig)-`tBsB`ue$2^{c*?9C=xs?sRUBjda1c8<z8n&pzLKPJZ8m
z!i5hP-9Gxud);)tzb}71Ul_Li+ON~^rc2+pxjpOF@on;ZAC*5na&^J#^mQB8$L^6U
z-TT+=P;a!k`rS7>j_&^YrtI6>@N%wqU+&MX?k`yXX5X>DtJvB2v;L)iUcXm1Pu5+I
z&34ZhjdvIC9=~&3f1bzuns+)c7nO$En$|`AKJzlNxZI=s_UF#?*6|(jU%pTIxAf=r
zUDL1W^Z$GM|NGK=oO|c4S6R;%7ry6W_2$28Pv4%lZ^l0M_eJ-<?$`gl;Cte6_x}EL
zd)3;iTlQ~_Uw(e2Zx%mmpIgmd^9jZV3eP+&y8Ib=%uH-$8?N7GT%dY5zuk;MLwIVs
zajU|q&r6yP)F(at>8qH>kn^2gAyJQE;s@ze<{YsJD?jM{XX*dWzGCy|0%?)VU7IIg
z5*M8p!6DDac29!IA&cojcS^#7`o9MbOkZno<2HlL`QUk+PnGPO^Q4rQZ#y^rf%{MM
zjC+Mzt@|g`F);8S%?ybsiSYHYO3u&KOH9d6O4X~#Enolv8~cia#N_PM5{0DH^vpb4
zrT4q{D=B2A*eZpa`WpBaIHzW0dQ=sq23ProBv)l8Tc#-4+i}@cSXJZ}<ffJ+Dpcg=
z<P}?0*eZd|waP2D0txFYDS(xfWZNo5_y#CA=NF|anCO}48R)uJWR@8z*>Ne@6s4qD
z1-ZCEjVMY<vsKC{DJihh*Do(G*DE*H%P&gTH?*|0)Hg8FH!{*KN=ef#uFNY*tkBIX
zRt6aXF~cRbIJqdZpd>RtPXT0RVp4u-iLH_nmx6)<)bPxLl4RG461W8*KG^u;k`#T<
zf|6vDirfO%iV}Sz0|N_P10!7{OMPVh6}bhzzHr@n#n4bp&d=4aNG#Ad)HBe}%|+2s
zT;f`Wun((_;*iRMRQ;gT;{4L0<W$$P)Vva9WJ?QjN{f;0EG<q234z_6lai)ikqfaI
zS&zG?Zvb2mC>YZ-a|^&aK&p{drX<7F6_gg`fYqcV>!;?V=BDPA6zd!68KQUyv}6yi
z1I0fe8E_CF8()!IfL%2#yugaV;cDfQpIi#E)YHXQ3FKa@l>Fq(6e}<@Eyc*d(!xAN
zH_gyIN!KLN(o{FeJjGBq#XQ9%$;dR>(jYkz$tcgf;*!L?<W!JR6}bg^nVBh8#wJN7
zCWa<Sx~VA!sk$cSi7C2?#+IqN2B{{7Nk*xrmPQtaNJjV<Wu|B5CFUTz3S?ADW{Op+
zsj-1Ya!QhJYNBO|u8Bpmxo)C)s)cTHnwh0xvZ0}=iJ>LfC{R3DIR<#zDjDe+AVdOk
z5=+wZi*jw1d=rxu{DToPA(^?U!6k{HP%|_&wy-cZHMKM~H8(Y}G(ji|OD!tS%+CWE
zYG|Nm29`)kwsOla%1tb>Rm#jwOi$G>$V&%HfTG1JAhRMhC&DE&H#HBc)4~Gc9)xrx
zlC+_P0YthYx4_D|C^fMpzbGU>KgU)H<Rb+mJwpR<##68XWg*v!5-Xql<U~-`11nBV
zu><EiaNc%GOxFi<ZS?Ue#xN;3BfmTktPzsG@tA{93OB{EsH8l<DBHitKPf9UxdgvC
z*c8J}3QA4SEG{Xk^d#UZG<7zRMD3ATT#{c@iDZ9pY9WLN@m5YIIH(j9z(s{sVlpTc
zZIy}>%TiOo5vTx@O3p~kOHWO)Rf1+nn0O|(#GYuDW?*S(qMK-DW~ytFVv($yWRhg4
zn_^;SWNDI?oSK*fNjGrQi}TY;$`gxH9n(|uN^F(fGjj{T@vNW$Nu!#mjw#Pb1tmZO
zLknF)16>2N5Cd~7LrW`DQ(XgdD+2>1NR%ts=!1%4n3rupg_W<b6>^~1fC@7!Pf(f-
zE-fg?$xJPR1P?gz1g924OtR6(p$<tuIPK*V)Q+qSqT4yYpt2}4J)^|GJP)t+SQJ7`
z@X1V0%`3)hH?k5)upre~c3jY)3UYC?<Fe5QSDT<J6cQGoDvXvGngnTSg@VGUB_xIK
zXmE`N7fB&NlH$?SH5yzbg#by4M^hKof{P2$H&4w=u~jNpvbUQh$t}Xbz`&N|?e4<x
z9|RZdT|SwCfq}EYBeIx*K};2d89jqkJ}@vau$OrHy0SmwV&fAwi}6?&%D|w&;OXKR
zQgQ3e-SU{wuaA#^y#92PIY&qJ`@P@899#+&))hsknf-|}+xP4AnP1y-BeM>hY=7h=
zA~SRGnkzo8#^2*CW-^uk`F9{B#K%`y@BQ<~7at3j7ry%~$N#+c{PUW=gDFOlf?zPa
z=;x<YE)flf|Ns8}e6@PLNJmU^etP<H5f>21ITQc;CtEY)+t1$Hgfa{yrZjbcK<~~%
z`}p5~ij-?r6_h|=QvVHYCa`(8A3uG1w2_%zBX}9lzJ2@l{Qvix%Qb|_6>Qqkr>Cb|
zmb?hqcDqmBp6|fT&FMe?e!su@?(=8Q+;$c{6^XG+*$}mMmTU+}wMxHs<R%sOKAD}%
zpC37POh`s%&4*8tbA>fP7A7%siR;yzKd%|LXGc>orwGXIiF;P$-``jMH=_BV!`7^;
zD?(OILXxWf{_e)@`*)|vhpnD!SNluDWU2#5li<nKTp}6;{?EUEeRDH-;YyY+kOw{D
zuB{GVzrVKUV8VjJ$H%5<1UfY$q;?jk&zU>-{SOIVR#B)a(hr|KbE~NMAuFG3BGuW`
z6B41-2y&-V5_i@2ceTfV-`<wnJ%9fErXYx01)~Logqo&L5AXY)t+8SS#MT=E?(Xip
z%HP-B{yk^ToELX?ZgzqzpLAJg;ascILx&GLGeTX`p__SglWJpQqXbXeZmsr3Lg0{5
zs;pGMf39zyZFSPtsP%WBJ$&exk+I^CAk?AJC7*Q`3JMBZSXydI1VS9M(e>-AtBXrs
zUb39#1QimLE`Iz}A!=*ZQU?}>YFBV9S6+Q{bF-v<olV;2$izEtRaI6D^XAP9SsCQo
z_+&xQQZJpD9UrE3mgG!5SoQekBCzi(JyKFsK0iOde9G$XwjECoq-;zuk@)0$aA!Ds
z&}9Y>503-q&+EtUNtXpXWTK7!vO?J&Ez2VLV{h&q-y^*I^VRBW;%Z={l%N5s1oJ0V
z1;{Y`KOZz)ikQDkRcz6BI`!vi#1uEN?F>DvF~vKKc~Ac|jtKtu_oz`=c3X9KnuW^W
z6FW0sf3bu{-yQ4c$JrlUH()rv`L@OWcT<=c-$e5+i};>ZUej}Qj@z@1_B;2KNNGm<
z+wW;LWO-m)remjCyY6nvz3uVxmuxQ-1b~w7#BA2ZR(4;R7z+4bcR6-4Bt(4^YyZ{%
z^wY~Pub1j4TBobkpZ)p1{^_ZsC2xB4ET*pUJg`|<!(zwL!_gA2BPTsxWU;?c{@VV$
zOYTb`$*0Kvjdjc8oISVLnO{8)^xJ*<`swTQW9}6^sN%HQKmE%5u*nv=Ov~rJKYw!S
z?{#;dU&>LK7b+iDwru{j2|vDi8GT&|P0M%8o2}>RYRCK+bl;<-H05-!;N2h1hQDW~
zzTA9I_2Z(SQDu5lwd|e*RrQ<+sa5;#{2Y=67(!k*F5fR0e5mk)Zr5JH9?|HBugvCO
znKGMAW*2Kh{E_d=f{Fw$?=z14yQ1c3cI(Nik1Wqxmb|{ZGEX|DOYDdP+p^?n&)Zcm
zuFd^&?4d<u`Gil!TzU)7ZY<y7^6Jsm8UL$#v(6Ui1@l<9`{+zRSCD%3SN8Y2XVyg-
z$7qWko-niP<(b5FIlpX=Kb%~?ImdGPrj&Jae2!f{&dtz1Nwnc^`0Zo{zg+2u57G|I
z;+%H5+T->v_2bhwY2O!_eyVXoW>ZhfrTX}0O~+=tP)La<r_jH_O~PQZ#ZT2Uw)>Cl
zU!%>o=VRR-%axNiowv!nzQq3MuPeJ&GUq+N@i{-)jOD@KBN;Y-Hy@iC6}+vu<i_m?
zua<xht#v!9?)_MkH>2<WnU(+RdUjUS+Bn^Q6u8)^Z=Ktj<=Kxe?pD$Zm(O4I<mATn
z`oSNr9xwb_c*u+GK?mc8Z?F6gWuKo|^rr2n53lM6$-i3nQf)WuKcAp-iGS{a6r-ec
zE006f_y4`!H1nOBL$h@s<Gbs1XKrj~W;m{=%<(2h{_)wlhJQ`O)%H6->YaQ-VpVX$
zpUV|uXZM}4G3)60Vd1y;``WaeS_TstaFKmPndLoKUWD*t&s*t=mSwlME>7+Hdh2n)
z<-@w#_n%%^zIwf=Q1-9JHB1}{wx@rJ-&YmhFw4LA+0pV16)gcJG4?SwlXv_$Yr*%m
zzOVSUFvAA@TN9KXvp*2IBYEHLYC(x<5^GPRXyG>hz1w8U&h7irE7`NUU-((Dw8f?M
z^6kI-RbMWWY`)0<fBF1VW-*ulPF=&4;d$fG#k#!~U;YL8G#q@<SbVc<N4QS=;_cek
zc1LWxZIr&g?VIepqrcZb@|Mcn&G+!ibKkC#b^WrLF~6Semw9!)`|x{x1z`gV)<?hJ
zL|;EIyJfZi!jtPCL|zfT{E;{E^qu+>TT=_yNcmjc$h>9UDo7IkQ|xDZJCT3My7*b4
zZ?>ILwlsWg*Idh4V->#dt7r7SXMg5L@*7!9J-B+}^{>BIT|LZ^@@LnKEh5@-=gW^X
zdPINYn||#t<F(ByzCRkY!;cq8bzMDX@Or<D+&%{5$mjPWPd+#k+-&hFQ)Ie%zEa{r
zz8=5uc>A9*VU}C|dEPqtWX<gw?pXU<Vi!2)b49vezGSxUgw}HF^y0L)7gww5Hh+Hn
zyWq|K`H$<wPd|H9_S$A8+xsAktGNx{5&3`a|FOBPs&eVkK9!e60*sDJ89nZQP!+!}
zd*;_&>%|uTZd$ErU=**JFViaf;9HIToVf?G?9R>nUsty^Kz*{w!%)4s=b|K%S1NFA
z(Ox3@P5u(3fI4}fNA$zWaEa_+jCyDGSMkmMsw2Lr{K3WLRt)QMtrE9aeAjgroV>Vh
z$$`~xH!|O8-d&V<IfUVk)2l><-Xk-36T;>DgCfPF%6Y5jK6?Als_9wF$MrKkJ|5n=
zNIi4^(a96#8oPyOd_OvYZ9^)f_%;T%@;Cgd4`R;Eof2=BVta8vn_pabfAB2d>*v1~
z7w&kt;=$L`Rw?hSTDFFMT+<ny6{hmB;rCtr+t*ItytmtMgI??Wm9yWP1s_V(bN5#-
zyeogByi&G$Qy-Vw(Z5B(jq@BY9Tlti#W3$_p>~|m^{&Y05wrO7)E*x@9yNLAmL%<a
z%&*dQ)0@-g#GCrRUhB00)RH*ycVo7AU$=0CX50Jj^@#=B*nJ}2cE`W(oEQ2r>&elh
zzBjuCs>LTw5onpdUvPKO*TT>Lr%n0%^~0)1tjn)WsyV}?+Lf4p#_HF!ZI#+H&3hJY
znPp^n!@9vX<q=cN!AX~S^sl7|{r|4h$?){{<NxO(?l9+Dl%K2kz1;G84omlDy|)Q>
zi-kU46`D}_O-W>R3IFM7fA>b~=RG{~e6^v-hn6KQKJDKUJa<0)9>#UXvLxH+<nF#l
zH@8@@#S~;-;&WeaZBz3%?b7dS$4_z`Q<`e%)F=F~1L7}_b3E@VSW{)+ZeAu`xVq}^
zCbqE6KVMBax~A>v#f949>*Zda4m(qKqc{9}mc)l+n`={p`nMkB^oS9j`*p^a*W$+e
zKOJ__JAQxRJ_)ZsXFp9l@O$dSA0Z!(XW1S9>G^oWwko;(T;{$@ib}Y{gOm%3S$7A$
z{g5YecB$|5jrq~0uV2jh%5w6xpW^{ro!h==()lLtEdO2Wo^LgE&fm3nUmm=cJmb>A
zlcEN<?*H5fZ7xh&{7ClW7xN_B-4m7d{q}tQ_sT!0D8-wnA$DWh4)NL3N=x1!GZNn1
zqaC~YVGhgkTecrp2k@+Z$<evmFIC%uO{Q>NT;u7FyYv6rKE3@Q?h(_;)%tf9GK8rY
zeA3=w(WsipeLVgLgYkoVugaFb<VcVI-Q|?ef9&Rqj`J5hcYOCaa`3pYwr9+|pFg)M
zPA}h^vwB<V{<{_Bp@$l{^-8@a*1S1z*3>9F@T9`u53c{L1b&pe|I$h4zjHTd=_F-e
z9%WDi=;yTP{<Is6C+*kl_{&^&PUYjHuSSn$1*>Ks+S=Sb`+S=1vE@4NT<3b+-_vyF
z_MSQ3AA)!MlFrjD*XKXB-t(vR`yJakjy=r^_KLOs9p$-c)*AD&Rj-3CZ=7{t){gny
zg|9vD?(gP4z3*|ve)SzYUOixr-FDmaZqbil+1GRGPXFaQ%Jt`~htF^G31y{9axvZy
z!`>~Mm88+u{Q9a;g{GtSI!K~8QOoh7EnifA%`)YfKFf;V$0H{1+){IVtNhz+w)<tv
z&S(c_ualj-X_8COnYP#EYDXr{N!S;q@|UCHvu@1kRNnhrwC~Srn!l%Mvtcv0@$+<T
zYxDS~gXfMv{Z%BQHT|poyRU`1HD7Pln9mc=&}h8>f8Md9oR_YhfA(pO{*Dhf)nn%E
zx2X=_Gd-2LV8_E3^M9T@{#a*q(z>$-S>dZB<;=ybomux}i?SUM**|GYi?PbTnh&pv
zEFN85T>kBlgka0#t2z39-8+9tv2OhT`ols$fj#O{d|Kd;-@ss|n&9Jd?PsB++M7ST
z6B73RP1c`tv}S9v<cGumuP(RvcwLp}Us0K6XZ!9)(U&-1uhhNS-V-%>r$xE)yu;Bc
zyn9QuJP%%+ym7|a8?kenKA&pK_qdaO&~C-Ot=U4iM0NU`rBmYO<{X_=^!LW1;>`<1
z*4~n?mOr2U{J7*Ct$>tMITn(KFP^`=F80S=(;S|qhw>t1HcNU}39Yo(e{yF_X!+ip
zwnx4nb7P;~dM(j?{qWjXW<ihmp9U-_FZi{(RHS|rYuS!~2S>{{?D@{7xt#0a?^7$*
zoU9P~|Kv<{H^Ye;U+wim|M9$E<acBGYljCHM9%p{wHUsT<SIR(wO8}>K82^Ro~Pv8
zV`$%>X7E*S$;qU5jR&)~Mu|3qO1CGj6DLkQGv9vx-Q3wpCU>5eD7FcS%=u%s?q=Fz
zlj6)vm$#q!_tGp&h%t0e-dpp|-4@@@{K#2)_`;J)FT0BkOC<k4yu<sP`QOoXPZhep
z<aY1<c+9D+^T^WR9UCg7*PZuSk`Q}8dfxPHi9*ZwN3pY&?>S!A<L`Mr`+MrU|MObn
z?`U3G{_kow_p-v5vp>c}l?$vdPkSg`@!V(9^IkQ@X9w=<&5k`Su4%a7TW<W}E$M>C
z|4sj_D*fiww?zs26Xw`%_WwU~<MV32|L;$~IeJ#0;dzJnhn0akw<ow}|FYCOH|N09
zQufV768|ObT=VDpnPlFpx14@C@UBeH&At7V>VA_aOlX)te?BDTcbuJipnY5V+rHq>
zt5VkdmV7NMmmI&9tIY4f)YB=)-am{hKlb+2^#k_~H~f52b|$>P>*mz;jm^uAm)X}H
zoWm6I!gXV~b)t{`nVH|0Cw|lJ+B;jC&7Ij|$D@cXN^)OM9b72U*Ejd$foX61PRH(H
z`o?%K!T0!{=ik>nJ!80>;mzu4ht6@X2|qV!=H-jW*cI%jIq<TJf_ev)0&7k#S^xWI
zzI^hnpQr7AnNHh#dvm<c-&eWU^*Hla_&qq`Cvoz@qw4>vx${}Sy*v4Bjz~j)+T%p8
z+6AY}jz6{gr7r3I-t6?|8D;aXT78|6kt6{!RWLee50{@`r0|*wmoJC!y<Nr>uus!2
zS?%%L<Y%8Ash-l7zcdljw~j6<5r6q|Zb2badzYigZyS!{ETL&7+GX?RN%Tfuf94%k
zmM`8eetV%Zq_4c~$gbwgi`6GoSTyc`reV6P>cNA@opUuiuiSrZVYo*A`Lq0>u%A=<
zAkC~Bvo`N9`nNUSlK+ON{!G*2j9ZmDoSvuLU}Ft4pT0<JRk)d9u>zD(JD}B8M+dZj
zLR|r9BEU5KiQ=~|2heE7r(K%u{fX%Y8XP;*Kl{xI19x}@Pil9GYO@`1|J|>_von9)
zx^+8t?Rqx*d-jH?wPCeyK!wjF@z|Y3tv`NL=!kV6+R-bl?ss8pc6inIcV~;w+b(w~
z3RvXA$t9*UBmdrdNWgf^d-~>$&)r?6qAq-VayA?e3!Pe5#P8q7B<LV~7!njT2?Cg*
zFq$R-K1dArSeLk-0~D(+pZ4|i@CXSBZMs|b>q{oXq)C%33Lm*_`yCS#^Wp2)(1M>6
z5QU}3y|ATTqGj*z)%u_R`LJC+<Km*$L#NYWeJ3^xOUt{Ti*0OeMdv`%(-YUfzrOB%
zeO!u%?bLLA|FuzDMG#W9RbLLAd-tyF{n{|?+FxG+GoWJ_lfqfq*e-2KJ-xfGW_{dV
zky$XqQp<}=&L<cIEccT=HC2200ccK9(SLnytu!AWUwKVWTT>TQb?=8ypB6c{g9HyI
zEO76a<MLJw0DC#d@%_EM?<?xN#dI}dc5FZz(GnADn?3t>?Q|FDAksveBS(+6u8ZA$
z@0%SvJG+vSlE@^e@i!*T(fbb?0_*SZXMzuMU0N4wT~k||zcbd^(G_Cd5w@GRZ{Ig!
zUeVpnJ=dx<$O77)OX|+Edv$%Ow|Go!tY{B3UEElt<~u83OUA`)`{yO|7Cf>4)}`=$
zQ-sbOn@XY8S63P2bj+VGucxQS<@CvP^UX6WgO_iq-E7Fn$Osw@<JhRF?mzEHj@k2@
zPx-mIlP@jtWC&RoGc);k-_x7lgBCb2%ATxo;0gQ7dTy%U91F*c3=QkDHvtl6$1b1l
zKH4pIaaE{xtK!c~jm+#EnfK@Y{rydoNBZ&e_vbgg$dEG0SYV)X{`83x3$m}TJ96ZR
zMo-M4+qY-$*kSSN`ug<-AxydF*TwGU;NyIF<l|$JHP<B7`yE=7l+P^Fp7XQTjxC$*
z@^b&>>F4Khxqh0e8@=s<oynBq=jQ|?Y?<7*GjSbtn_(1WvANcc?;G2*^RwL}B4#*E
z+_T$mwvUUO+op_*N>Zj-E^DK<e){s;vi_gVym|91EG(Yf{61mQq(f^WHy4zbr{CRG
z8nQafS9_J$g0~U->uhDMOO-S&TTk6p3tF<xt7)yL&CbcW_uVGUtC;oue*OH3%I;bs
zC&k0oL^y8m;J9RbCglB3Q^rthEfMMBmnYW$vD>S@Y)iM#ER#+}=Qa(Ilf`Sov}=EV
zOFh;jS@QZ?Z~p$jV%FBaiziC2iP<^n#0ig@nwlFM5}BW#o*u68-7~vH-i&8@B3
z?yp3RxBfmK%(x|RUhC?M4_02?@pJm~R`IxmBOQWP`d&HruB;4xd1Irp+ME@}6)U+G
z^nTI{oL}YqxZso4F=2n3gW2o%E(=gOFKLz|VVrhmLGABvHGjWe*J{}55fC77d71Cy
zJ3EWFWL|DlcJI@e@_UJL<IW?;<?GLM2r3tpd|w{+pS414<-&lRn6z82+k9W2Tzc4E
zSW@!jmzS4K7dG8kQuz4T$6ML!A3lohiDF({z`tTaKu(O?C&{|sYE4HbTT7T^2uxP<
zJ#;Uw_@d{#?}xbcKOAJ2zp%)aTTCyeWAo<CPA6N_&doU~>~G^Z(<l`b0<B!4K^j_(
zmEBjfQm?EC1dX5X_%(IuxdW{`eoPB6_}OClnalb)&$Bn{GG>|QpPQuW{o&iUFoPuN
z*V}KN*^qd6hEXb)T<fZ!Y6lU&c~-|Q_Zuu#-(36b@tpg=Z5J+FcxJXazqz?NtMblk
zuh;KCcVlC6=7j}}7cX9niZ9-&zyHsq=kx35otmn>x$bWjm&gUnXIhPwE^cmYe}8{Z
z|Muo)h3I<sxBrR@3$Aj_dB2ZiL&ObX+pgr<`<8f3)jD$QSlaGo{T^{sX3g?CKhL(a
zukYCQ9Pcc<Z?fOc&Ery=zj21~O;OW1!TZlH%)7fwCw`wyeSJNv^3HSZ@^vRpOjKSO
zxR^~lY|Vkwr@a?GVH8&LN%->O;+1u=v$tknfAn(Yeq*i1oiRyCN^`Bt)85|N`Xiig
zY1R6C1FILe3V+^QxK;RuxcRh0u3ZA+(%x5Bg^G4P@qB)6uCk6!Pki0a)&sL_SKO*q
zDsy9Jo!tMnTlVxqd9{F5C7EGsV?1XXrHXVtkqTbs^YEnl{3TAUTx()>3fbG+w@zx`
z^YNJUp1<F2n`B;UnW`QB%Bo4Lap(1ulhrft@3XZod$S;;xiN&RUcPy5;6V=c?Pn9O
zH!c5IP@ep9lIoU>i%ttY<`uoTpxD&hd~<ugyy|Rw@15Ts8%XeQxmoOb>HSToR?yzw
ze%kRP0qf&zTb26noSA7{`RC(t&A>%0tHak{+nHK<WkulP85V^~Z*Of?wz7&U)>*;w
z%&z{=M|Q`92W9NbdN#dv3*L3Mo6B{R<^Fu$+v#79o||>B{C=%_O3D(ai8c?n->*|%
zQd=z~B(&nf$6PNHxm5uwvp0Xd_-2MNhq}8m_p|frCMF>ZPbhD$_?V=mq@)qCL1A_H
z`eiPhXHQO;&MR*>XJznmAvw8oO|0A|l2`9vn6v%=qF!n9ODhVK{pMJBPJI`@g5gxI
zsK)iSUH)qhJb2>VFK7GX#o~UEt|wK6g@q>B_vU;S*9%^JzC_ev=h}u<vMY@%G}Y$3
zxUy0>zW(o4r-|;5*1tac^VcsSVd2G08O(M+p9m`{D@Sfh;jI1r?P$_QjVaa!eX`bN
zYmzE{i0Q?6R8&~JoEh->V<bx$ceutRW%ct-B5Q7S7T2#!y1D44imIxhh{%$}(n;FB
zv(0)z^B^0G7k+DC{Slh6IzYu;@1E<&^;hO?-W|T7;2zJLH*ZY7#QqDFHp@}a(cuvn
z7f(t`S`o8zlhZ~<w&unS8w|?c-8p%vmD{rPm58kMt65ttpDlg3fd78_{Muui&)an`
zo^5aX`~2b68M>`X<*LSCYp!i&>Hh;t|1-^UrTY5%xZH|*&F|MtzPQ-^@tfEwx0uBq
z#BG@5G;#N&c@?wnFDp}i8Dity#`CaSf8T+OD?dMeD#*RLsrB00=;VzN9T#`5+_p2<
z(|>vN_PjN*yQc{%yM1`rF0Umr^+5gft65K<PLF?fxBULmtqz%4{4p)rjgz?C-gR0U
z_sVW&pSZ!gZJKWMrizbAzOlOfbB-n@{`l~)=HpTE3YM^i-wtTZu`ylPbVoY4N@#!m
zcM0)t{~eC^$<8!R?@RkOxn0f9Zl0KK)Pz~HPAzin&bYT{r_;v3l`B`4yt>lqJKM}K
z?To~+Ug_llAx!^eXU?3-BV#c^(m3rw=o-GQzs?`N=o`#;y0&g%!p0BBCy879JAFN5
zZPe5oHzK-3G#4d2ndsr|y}11SJ)O8c68DYHuKfAKCzd(j-G6S@VzrN-=Rf=MzIbK)
zemg@$!&!^s<#yEm{&sOqr181=_UG^HEav3o4Ad~wGc=rd;X=T++}p=?zu$MdTR%&W
zZ|Z?|9Zk*7b+NlQRejC+qtzO)_<BfB-><abe><D+??3-<YJR|yjKHvCM_<~o{k$H#
zqW~1aX1TXq5_&mSmHlV3uI3G#R(~vO>n8ngrhMyv&oD|oy~#+g@6u9lZHY>SJ~>;j
z%gcN%3m>t7s$=izdR(gWE$aW-96WgN&&T8PHUEA-xA}S{n9Ei5!$XVGS0Zn3Z%_aI
z?XAel`}0rDwZCG@@U+otW2xBg#chvYzI6O@X{kx}HJ^ni_CNaY@Ni2@%ZKOl>w`Ak
zEI8HLcRj`RQ$qQ*`dMe@y_o*t`}g#tpPpV>8SK9BgniMkFPXDUvyc7z`+KHMrBT1_
zw;4_++kZcEbaa%lshIHc^779=9`|dDOg+G!eSV%T3oCo^n-_tnw$!#7<;I6;d<vNJ
zyV%q^?c32lpR{vxJY8H`oF=ws-QJelvL*lg&CTg61-^-?&f*CMsqBnDb<VQ6uxyih
z)<GuT+mnBnJv_t;s(04K?*8%NFn{Hzlj@>gcBNll1m4<~dwON?@)`E^cCow5&VHZy
z!m5$~gune?m&M0=E$jZUOx<0(j7fukYJBUqE44|MZ0`9=%E~{#T=p03vN2g5wsukE
z=Vx=Q>nBR>UABthlym5YEcI7BQ9jF#f+A_(-*3^aO5b%14JR&GpiuVq*3lh>k3$$f
z?|gK$dvndtqCbEBytuhpT{~<|0OysgkLPvc_RP4Jduzg!Nq%eh6>CQMx6gd~@Uu?T
z<SPHQhn6~I&a3?v>DDXN%J=4Uvsv;n9w%q#lLpc&Z3`4xSHF&ts{B#oWxDL%1q&G)
zF2jTa49EKA*9SaVw{+REOADRZx8>bEwIXn_gk{kZC&5$GbiGSTOy0eFSMv7O)Y8}2
zp1!s*k@+J3>(A%&l6Ex~e0g<OUYYZ+PueQl^=GwF(7BDdQT%%Hcb(?j)y}Xg)#Bsh
z<8u4Aar^zc+2L`Otm*3yU6B%Kbzh*SG->Bc_Fu5I0QKQjk2LFkFO&9MUT#?OBEatV
z8{=5-fOmh)4V9bwWUV(9J@snil|EMee(&-CmDy{fw+l&0aUGatnhomwc`djzJ3PL2
z>i@s*>vxE0xUOa7F8qAiC+$<l4CBi;46_+H>(?yEy}ixF)%D@am72ew{)^BN1BH>A
z-<%HjgI*vfZQicusnp;0(o1Q7{q=Tk_G`j&a_8RM++0{#7-&%RdRLN>prqu<MXud1
zuCJFDR`<K|B(x}6+B{Fi#zsb5T>RSFX!osIS3{VdzE4d{GfFuju*`RMTlmGOD^dpX
z?&kHZ%KcNWCZ&CU^uOTI_577_d#yHZ+$f^8q`Um|-kn8HxoUrXNql&y^~aPvueJL&
zZz%s3Wbi3t{|tjv0sDH1<=f9MPdwbF6TPj6iIq!3V&}fT2%Rr)Z-2j)k{P_*@8~(d
zbX(7JVQZsYzrVZt@#DwD!)?4%^yA}H1B#~2nd9T)!ctgR_#>2Ok=y%xL#thy5+_ah
z*3`8#ufD%1;^U(=J>83wj&^amzB|>*E&k%hMrHfIUxZm#moJTaqVnd=T#=q9Tw$xH
z)}D4Rt~i`NMLWE2s&;tLrW8+BWd@s{PbT}!G;00!_O@~QIhi+a-->qqS*z+j?Zb`a
z{=gjtiDr3sJUWF`Lzte1rJtL#u=e-2oZH)a50%O<_qyI+%Ik1)YG3}$gYw+-kDfj~
zIrZX2lgvvl3s1<(+Ez_@@F3yao14y^YKJxCzfEfk^juf+<hcHi;+xtmEGz=MlUTQA
zT@BLsd52qn&jEJ%8i&)<bR%~ZDEj&R`sDTQ-kQkG5=JQ;OTDM3JwDd^GfiZw*Mrzw
zyUX=K4GYVf8JBY77Uk>?7wvkI_55GV(Qh+rH=56wX<Ka;yDKBiedpe{x3>#l+Go4m
zf9|517ZqM+^==D2)|I6lS1LPK%_N@gE~XpfvAgW8h?m{hU$562m%WLQGRbf#DKW{*
zlbWmg{oQH({ZD>ApTE55>8UmG`|aX+%&#ZB-2LJ6dHc&70+svZEWMtto4<m=Yu<zK
zg|emXM-oauEQ#Kp=QGD5aN!BvegFSe&oawCc60Od7583Ci#nVX<_df7^tQ44gn=|C
zFYnXc@An03{Jg*}UvuEb#$-_2vRh0yh;hnqvy>A8_i8@-mc75{ySwb|j+&pF&hAft
zd}Cws@mGt_-@LTco0FSc`E6IHRR7r}0cZ2LTtD%ouXkbzd3j=|ZJ|!*EZ3?ktK3^#
zE*jV~ZfZN=;^LC9%_?B^|Cm48Tf;RbrSJG5a(sP~L3+7v`oB3ZEi5fTbr<Vof1i0a
zGt=kSPU{lY-c<9msHV17wCj&Due8~btgWEl&b9UN{_ErR=KPuzt<_k+x$))g?fPu)
z`mcCmTJmo`<8u8Z!yYcrlwJ1t(Zb#}k#_T&+4+UU#Fk~}J}rK~@3)@6?N^b$cdVes
z)xD#ywM9-A&$;uLbIrY0?%&roCB2k6H`m(T!=q#0i%o^b>E}`|EO4Bn6FKR|jR-C=
zofQF38Z++gvFz;Zba8e4_<H^RON-t4XU|RXc+T=H;mi!f9(j8?@%xjm$O*L0u8(Y0
zs+?u1JoWA7Uxj(+y+FORkPs0$IXO@pKSHNR!m!ET?kCIlhAxo$&HjO`%9XQTo=V;3
z`~K|FX6cugmU3@SJA3Hn&B%i{798ys7ZwzBJlMn<xwoox->kEN|7;Jp^QXVM0&Z%o
zSfMdnr%9{P{J)~%%!)^y>YyTXo@-W6^?@3hyUZdzH&|95J+sdBd~wm|lB{cMPIifE
zbMWwl82ogps<P_0{U*UJrgLC<NOIP<xvkCJ{Y`?i6AV)~6yKXX=jO%<$9koOWo6eY
zHkqmE>Gk!=T0eUG_Ux&t+Mn;1-hC2(XI_L3kCaJA<>zM?SBLATotcqXe0^>R=N?Bd
zFD^kr!6V0x1ubxj&iXcUT{c6PuhoWE^)eBk&5n9cNl1;65!DI_&@g-V=xBFKd;8|Z
z!)#n(uYI=41()zTY_tqpJ$3KtxTh!j=LauzVtw@JQOS}MZaFtLG%i@6FlX-E&feao
zQCqXPT;;STpLCgTR~xY{N3xlnKkd{MO*7LgF{&R{ZB9QgBqep~+S+K%nC1Lach@z|
z4HTCWy<J-Kx#VBO7n^(c;*RJ<yDfI>73u1=DtjXVZKH_f|4|V=bnjTg@{<dXmdW1x
zAX%OK&->s3het`EOw#6b^6<8tn@+E<t=$-*bL0rLWAeZ2%a;-*C8Zz#e!s8$_w)Ho
zgTyAU1-hU<4XCuRE9tnjHf}-A2Z^;`C+VyF5j@@(`M1CzYNl~I-@}IwxsD!tb7$w|
z2NMMR=Gu7foKQF=>*pM&i8T+ueA9UJod4+6qn)7flV!frt&`Y~9zTA0Pi66?OP3<H
zWC)&}oju9!XE~_iT^X!ymV4{S48vpz^Sn8}d)C+d5B;!y|35EK`=aOv$JAXu0gO{>
zj_waKC~em{oK&(r`}#VcStgzfZ%o*lef`ps%;0l#JQqZr*AHb-jptl*vG*49B9HSX
z0zyKE&df9x^^)71f8Xxbwp{O`BBRY|XM-50xF5QDb!scOc+l!F-Fp>}dGA%fx1C)P
z|MQoSprN5*OW6yDtSGjtzkQY$to2y?a?*Q?YSCl+y(FIh>q!0BG&3bF?Ui(~v_bj1
zm|bOWy_Q{_H|uYjaDb)HLXUlqA4My@j%xaNdWLzm)w*@-K&AS@Cyeg>a!XTBPn%;`
zJL~1;<-%gia%b3B`T6;^w6_aOOP_vobF*f|hJ<<Y)8^#2H`o1qI-N(>N~Bs`KI_}u
z=5o$hW$#bhd!B6mW46`0Q@w6^{QkO{uUEs(s&i$czaM0m|8R&~|G^VUkG21}Kj^IC
z6j7<)UG&jnzwcYOPd9CCKA$nJ`Et?S%>43vUCY{ECcd-Ho*rmq23205^>QL7O<$+&
zE_-WH@j>D8GT+7d_xEWvR-VjWzxUXV!p8+4H67nJ2JZMd{c@Bp(+uOF8+M!JpZ|+-
z_3+^M{QUg!yLV#`J`oICJ$0h8yHVmHmOFn=2d6)9np4}QDIus}{8nhc|NEmyo$W8K
zjW$m^Gh<;k=QF40=jShX?G}4;Yil>Bu-b<s!u}c}Cqw&Wt=(pu<${{mpP!vw9KAhH
zt8wStlPO6rE-VBM?No@Z^LyLKT~MOGVr%3RHnGi}cf033elkrGG?MW2^z`5j3;xZ0
zQugxY%P+65_wOuz{wQ{n;?`ehFPbvA=H1&j^U_Vr@0{n}#Q0oZ<{P=IMAOdhSNWu9
z6*aX>yGpY`EkEh}JqPRmeU@)k+Ii{U-`|{ET#pX5a_dBHYWeo|_G*@AVVatnpkcR~
zpG90#@_l>l?kB!qqAa6infD;9@A-r8?*li5Xoej-8n|vw-;C#B)AeGNY;5LyeSKXx
zV5#!es=C&Tj_wvNdB?Lhet5nq+~U_|6|K-!E-^7ON4s8j{9HV3)-11r0t0FDyeCVi
z$36Q0@BM#~t|wW$%ibzkSj^b6#Uv>?IrHM8R<DG-$rC0xWMpVWZ_8O2@OECv%Wslp
zZtQFKD%C%}p&xv6^AGzUKYnenQEOx4mC~3J^X$jR$1N=_8?vwKb(zXI9rb(vN~%Gr
z{NaUN{x`NK@w9S@tCbz)S{1&2U4Y!gFJDTmN?#p0HC0>EEN8~qGy62Q<-fkRwxz9&
zX+zpssaMy}TNn4-sNu9Jc)$QEA#4g7f^^*j^itMbi<nqFVM8^mbsGPHgL2Q#&h<Xt
zCoAfuCnzuPU*N%^?muTi%!dfCtx^jET;3gAdH?z5?0>;Kc3EvMIVl8Mj~MVTRc&(T
zySuwV;}yTYzCM1WQ`k^~hs*6#Pp`DOkepoK=FP^hudO|89ItWn%k`tP&GVNfE_Rc#
zshH69@cn9rt~>1iPrY4fT%jpu9d1?r?#vX;U=c694S9F1W|`!kT6sBr#jWH`TnkRr
zYJ_aI-pZ!)z)OvVogLJ8YyG6ZDMIJV`}^}DwbsG(`JgUMrQhXczM!H`Q&V$G?(J!N
ztG};evGY1TU4Oaj>1i=LW_X;nyC1mcqQKgvO5acJcBy^E+k5!rY6*Y4pG(eNtvK@R
z>}>ad0D+a^XRf@n??1G)N=szo?~nyK-mh$TGFeY&`ts^(_xJnt@eBWW&#QW+3F@?E
zWN3iKPV{aoR)*~^dppCf)=FG2W<lxeYch5<5vm^oe>|I=4;o<s4~uFthMLy+KAB(i
zS>L^RdHBP~9ai~!KC(5l@g7Ps(wtF$(_OySrC-k0z~X1X_fAm}jq7d;JN`J!wXT8<
zxE>XcPdU;d_~i0wjsMwheKIGfXa;9oT_tLqcIHB9rt10ES5^v7R`-8)q*M61p5nyW
zH}6<BK0em_c<c4J!#8(JY-M+QC>P+Ez4QcgikE-d?=#0O_kXW;a(13+o-db|m&f%?
zadXt#JwG0G*L*n0jy&F}#2<h$@>*C_6lB1E<HpA1%D-Q)XWrPL=sjKU>V1KYn(?(?
zMbF!Qmzk;+n)LVA*B?KBu4J+EN!vU#@o<}A>M0Q$uF$358Z_qEFmF}svSAV1+!-5q
z*z)V`oLAS@p8oXo^vCbt!wvX7k;Xfxub5xT>u{Zk%k4?x8~>EJ#OHSu=k&&VeSUT}
za#ITDy?gh#o-zJ>(9B=)uvL6T<Yu+g)Af&o1~5C${`~Q{zw+zV@Rgyf#j>xjJ3ICD
zPTwg9u3QPJ`u4`Nq{L)jYIM-&kCHd6D>HaSDo=bny6@aVdAlVcFE4@m8#y;OaXn?+
zUGvjuu66mdcKNyk`-?QLR@XLPJm`9`!^S17yVdsMVvB7ry;p^-bjr!maqSihHTZww
z$;ru*RwWvu+F>esdV1I86a_26t=)T7uXUx&^UkaaU0qOG8p>HS0n{LD=ac>M^?E$#
znw!n5?y7_G`~OAB_Vs7ysp>z7J}_PM^z>lRz~rZfIX0D(CQJ}0dw0k4B3ttck$-Y*
z){C12e@>VA^-%fS(S2?Ym0UZzdzWTiUB&g(?(7W1=6$ulL0uqFpKGC0t4P<4J=5ds
zX8POzow6<W_NJVhn_^7&*!WLB;M~q9C?a#_U+r&=n~y9{weDHBvei|tar5q^8<yz`
zeSBp)GwlE0Y7Kt+@9n+4vmZ<l=##g0yScz{O5WclC&5Y<zr5Lp*RlFPe)r7l%cZ5@
zG1ohbUAx5u1qB@w5)`tptyx&{@e!Bnr{45=mCtT1nHm@vxHC;?iA^-q_Po1Zzb^!U
zhDL9El<hLF3(B5(Lb>GS3H?8IYt^59ISp#EJ2tavPWinh@i3cJ*(;Z%B%!IhYuzSV
z?G^1hp<GgT|9j5-j}~Ruc=r8%XRV~HEP5)otGjz~^6|bOKYnZ|dg=w5X$w$^-x#68
zBWE*XXYun(o74T%&dr&rt=FC;V_o*<#-1IHoSFCMEqP+H>ie%YrPALSyuy`7=Irf$
z6s|d?#Xf99!M$<`9=51$IV%(11%`x(Jbd^tCB^CN@pQom-I#?PHc9JPy#g21So<A!
zH%@=Hy`sAM@`glb*3af<IX4{I`D6=9OI6j>+PtP}X^NaI=V4=(wJK4tvzyn-Enf8L
ziKkWBn;$3K^`Gq&vdFz<QuXCU<JEJmpkd1Jg>$dzO^i8oaV<lm>8`@U|NKXEXCFT~
zS)G%Y_bO|{`%BB`*PS}h$h@&!(_yZo_Xo|p5eqw>2>H!p)r>Hm^SL@)cyEE2r>7?i
zE9*;>DKa_t_RM_nAmPoMHywR_%fi-1ak+ldwYIiCbolViO{v_0fq^l*N;)S@nBX4m
z_xV!61@uAF3uXHPKYceS_`GtF_~bQ9?woCZk+k5%*~8twVSUT*{w@;XHV_KAJg>)A
zvZnk?ei$#i^)Iu+1J^TB|N2Y*nO64r^^J>LHZivb`__C|ZR!8?-}J|xH}CJC-OV_E
z_nDiv2RyDPu`Sv*>)jtos~duWr{(k)Po2L#Ws~-0vuo*)AyNzRZ|hZcbb59cKYz9>
za$}M3>Z_-`_4gjBe!sVUb-ZL2e?&)i<D^^rGJh|78?85GW>sVE<*I_0C)R&?b8~X$
z<z)|Fyja0|lNXeSy2W%oyuFh@K00cWeeKOe{opDuu4S|Hipz>0uUmG@%U}Iy$>q&&
z=N1<4`rf%KNKfYZ_OJF2L-q@Q<+i^6^Wd}YKbxMZpG&O@TAypoG$*fS?&LokQ;pU4
zyb`*%z++vkc;58iR~ZV6Pw#6rTE4vC=`;4{6E^=ml(<c1Q^0n=m)7-d&${2-*wlT*
zM_kK%Rr7o0dC|J2rc*zBD5(4OQr)dz?yPkFo{uM{9ktWX@i~)beBS0WPkhZsR=LK|
z6}ARk>i*HQLQgsDFEC$quVn3&BY$7o?0)wCU)HUUZt>xH^LAK$&9FKgcIR*WonyUo
zDi{6wdNuL?NuLMjtUZrvTU^-Xbj@dt>KU=R-v@p67o<nA#{CzcR(FVL#^;cC3^9g=
zRkuX@?iU1~o_4N8e_rkDi4_`i)5GJ=-@JR=<Nve1E4(I6H_!M-`@Y$KSZK=ncMcJ{
z><7-ApKX+z*1N6z(}HgeKi@=sFgZQ#ET|o=6TdI!a^wBHb|d4-jLd8X#m{{9bX{K>
zwtuI^uA4!Apr}fVzHQDP9{-9b?o{8+<{8H6e9Qgkr+s{Mbl1JRp})VqU7UM+n~J)+
zarwJDF@X$<txq3E8L6rL-YzxwUhJXhg{l9Z?c}V9+par5dh`G2{NJ}^>|WJ<-+Xl5
zjUT!Po$v4e_IdT^rvKYm)ZTZdUZ}svvfy}&#IY^hPoC%BOa0JZyxw-%MeX9ELe4$4
z?-;)2x_y3p+;(T7@Yed1(IwK`Req;`dB${)YkkFDVHLUbuT3j^{uJmP>sowc!vd3e
z<q=J%Z;M^1zHnSZrfp+h^j!CO9jjMs2L=YVw6z)4{i(3Av5_#%x}qF8EymtWTUWO?
z@9wUTN5$hGL@%@a+IH+#1S89aPyc#S+Rl4FzQen{YTDN2KkwUYuHkLZvG!QzBK*E~
za*e;W2gB^T^ACPIFRQLPH&y$3`Jzda*ECF>zb$n`PW#JS+wFE1em`;l@C4oOKYwr5
ze|qlg$}jsf**O)`T&0-BVjr&ey*Y1waQ+R)P01DE;db36CvQ12ZHU<U$^7)CWo!Ok
zovSOnJ11xDHe;&*(;}m#Q8wqFot>R|V}&3)zuXnMOTE@NJ3B$%DSqYy8qY63VI-~H
z0FJlWx6KzfbDPS~cHG$33|>xgYm273-<%WQZ*N)b-hXMCuk^XO*2lMGUgqHAJE!XX
z%x!wP+2+5;f7+z&_?PPbrGEOoPx%k032m$`-NqDQSS0g$`|<n1j4T>K>zU(w_iy*)
zc^i9Y{oi`Wc=H3DrxgFrw0s$QDsjiJb4ByNOqr6i(|zw<``dr>_5RGfpRd)xAgXjf
z<@v7n8L1~$sjokKtHPOakH;Nx<rNn#S|)~jZ#a1SV}9S0SvPm3nKH;8XL~%``_soS
zUs7IPS_)cXFg>oS^Su547}XDHpiWQe>uaFy!~rQm&;ZKb&CAc`*-X}-_4DnTr^oD#
zx8#>Bzk1uDJ?rNB&KZR#YvP^r)?|t;TYpyNrQE#YvU;}p>pYV$eqXs~uW2^(&i5;3
zvd%Wz^!CkDHO5rg%Jtig&EqcFq@5Lglz(HkW|YPAGrZr}WEk{PKfNsT-X?qZ@^Yh@
zTQqu=%V+LCc+?{`pE)5lyk?p2y31_MjYV(cmnZ$Gma!;kVB?kElzLihtNnv3D}zn4
zuk~DA9Ui=-u0G4w?({N!ruA=UZ_78^Ub}ho;-KgJ8q#0tj~qXKc|oFc+53BQ^S5U%
z_n*HE)L6AH>+zhdCMY-Wx7t>ts?bGmH?Up5{^rd3$S;pyTH7eM@@}?2Q|5iqYwG)a
zkA;)%c@N*Ze6ZmA?tAB!I{&n*QvAFq+-Qa0HSv~tt=)l(Gqq<)7d_YeGqw5Rm0Yv#
zBhsg1Zv^kT`0H2TdD-9RpZsB3X3`hE*5d2m8SAd@&EIi&qSMu?O+|mX+^&@Gx!tOm
zcT7G(!#!ZJ&`vG&`1trmix+DKFXQpI|7&u2xqmq4pVJQyw`bnjVQByVkFn}3zU5Kp
z*KgRo{jcG7&d!_d8ltNnc}$g&dmVUZo|Jig?<f9aR?_d13qD-?n>Ovu<~t>E9aD?L
zixPM6xw&mEY&kP~|G{lWf2a3k+y9)hReibTeXFZw2jp)2jDDv3zVhHQpE|p2#g}^g
z{8Luv^s_rGYc*BRt=_A5?*(Vpx1W97;=-Bt7O`&Jxbf#MhIcnNw}WCM`8eOHEwxj%
zV(rg~i#J~X=6l;bI%L;Zk5wh}kDLr%o_S{pr?gqliSpZ9K0iNyd6};?pNz!;P{V(|
z-OloBW%rX9Z`S=QI%K5x<chmr&ZDQP=DewA4DSBdH^2M3xR+&$mjBbB*8E$4DuXsn
zb$-Wvy<|rJA@}4pZwoJf+I8{Y%kXu7P2TVKww!v{`%T==hT4dq```KIRrbE}l@7RZ
zT|EB#?$!F!AJ;1#%Q|>t0rytsqWh8sjg5_<5vDB}fowby2bx&9LzwFS|1Fm?%Q|we
z_V<blAL~Ons*UfT+4uCtjIU=ie{HS*@3t#s@89EBw)Qb6sQ&X8uHPSNcwxnwi62&d
zYY8*CwCBUz&n7b&*{u&{TL=Ump11Aj+aj5wfUR35KC}D&&8}P2OepSZW5=p*mU`Y-
zFS$8&r{4|NIWS|Zj_BKyg&XEHH;Re&x?Cwx*t5viHUEa4R7}sFd$*syD&`gWdg9l6
z{q*lCMGBvPxtaedbJvjR*`kxgo@Tu9x6c0`oAct#UYD!L?f+b9x=m?8misH7xF$~Z
zLx&Fi_;x%0b1EZfdNA|K4#VU9^A{8yaXGc6R`dRz^4$AdYBz_vzWTB#Y=7mFC25zo
z<=(EZO@9~Y<>RBGq0w=7ce!!hpNcjf$w{lHn;WZ&zIhe+{#Ak3>3RJfcWZxc>%Y6{
zi}r@q$A9q0?=<`K$?uY||Hks~dEegMJ$=vlxQEaF!{0946Feh+b8oV?b=jL|7fy0p
z@)_<g$k9J8^nR_wvN^WL-rcpnv9a3x_&K%bKXz67r`?@i^w45=`J05}_mZFNxbe)T
z@9o{QeX`a!_I{VpG}kNmRk%;?Vy|SFahTDLIg|XnPJIn8S|@PC`|D=4rj2QJXPa)z
zHk!(&{oB2I<LVzaXT2={ANId>rqA>=<BJO$?RJ*RByLPP+R@p0@c#YzaeJ#aCLQgn
ze_Q^1SJa-Yt6IBC-g@1d%9HhFme;{ov7he8JrXsxmoXQp`P!dx)2dIT_k0~+`@zHW
z+8E0UG$-#64qFv=;^)&<p`{5C!s>Z)8D<8TeRAH2KAW?rw25VV5?{@&JBqJo-&rlU
zJ1OnhACs`<P4x{E{jSQah&&|6&u;OjSnIX^zrTIW2M?ckQ6PTm&X(g>jTI;Ji!Up_
zb^OEwHNGkQ8;)OP^x;(Vy1r>~-h;2QM>nvg+0C4GEZ^K_f2?x)uaNWmtjaSJQ$K&t
z^tqG%^l6;ll!6`mHXCm#zsB_Dzv0x6AH@!wdQ>5-oSnMYZJ}3R$jvF!-r7vd+A8yb
zP4C_LKgXYMmx$@P@?jhEnm4zSPnoaY=6}-dinP@6{QD)z2O1b<?Ca)ST<rdQ_V@i6
z7nQ2MzG^+0wIuBR&L5#U%Rxo#t$jB+_J7J<@%u>8Hs9BNDJd%Q^?wY{+kEcv-T(Z`
zl@KRq=bvx4->>-jbh@Nrl8ao#E0H(RrB7a#O?~<QlL^DUxqI3^^zTmlZ=_*4Z|3~>
z$6kIspAlx(da-<t&nu<E$@h|<q<iP@T>kz?e&xF2yU}T{Z1-FDP2crb-tgM%+hV`X
z4oCcX7d)lJXhPjXi`nLDm8uS^AIUlQq%~6d<^0@=6u<oU2KRfrAIzQcCYkA$-Dktc
z^KzP_A0>wA^d>)@rT?RDbEntg|Hl*B-%DC1UgbS)*cB@O{pkO;=kj+fy8WE*pZd4^
zO_WjM$=O@9Q_tU7erV>0?68-{|NrPY^4?>8aJ6#wi`bjY9~8cHF5L6^^>Kz8(^A#i
zSzpeU&AazgcXdR<)(ekMmi_<pXRkJU<hSbq7phW=ze+`J%ZXgHcrhm@=R(j@yko~M
zEpqK{|5sgnb>e~d`+oObxDa4;#x09KW`;etLW7{$pDBfZ|A?kn@0xn<U*Oj4Ck@;i
zK5ysYyPvo_WP6{m@@$KTO;deW2fehnocXYOo!^BA2`Wn#YuhsCeLm}=$!>Ed%uh99
zUA^*?oF^OKottvhVZPZp6RoLRn_0s5NB+0wbG!HYt&^lV_v`Sbm+FhSy4U+fJ$Uv0
z!0Fh{ZNI1K6qT}bZuq`^dESD?!@rF!KV1(nN>|=HEB3x(*3C<;c30ne+Dtt7BY#q5
z;YCru*^E3_vfmzh%ow(B*4#t-)7RPAUp{00IY?r^#?<`(d-i?2`!kzud49yTzMFjk
z5*+JN_#<xa_&Wdnl_}cwwJWY1TYRl%Yt?S{kOc=fT&-XUOFT2f5H!5J`DV`N_tlS%
zbe6on#=E*%y|n$vlk-d~K*`VS+~y45SG=*U-i?=Re!tn=BW2oUQ=+=AdeXdke$(}0
ze|$VHU-|3ha!KPfpL>x%L{?nc_Ic~)&pz4z`b&>*n!c=g^|KAvvL0@I*ZWcFE1&+f
z1J{{v8+_WlbW82C;=0a$b-Oz2;`M%QMztmNZT@#_PH(o8wVfsUG|C{Q;(oTfs&BU4
z^3N-Fs!Tex%0K@3;q__)n=M}(>|6Xdwx`2b_SxIEhkK-N&pN4Hr(#$4dh64Ozb}h<
zX6=z&lbKVQu5>@&ERc`6zUCX3vi-S~C)V!`7F8c!efp!`{dKlQZ&Uk^?e$;2F#qDq
zsro-Nt8JpMpG_;MWZ&@TK!@MopI@7_=kAX<t=^w{H6ywCdF02-`hT|6**+9qSAREb
zL83(SI=?f?|9-|^d*gTN+rl;TO(ow%UD{drvHp)>{J+otZ(4uev0~M~{{8RoNTxA3
zIyx4)yB6FR{m%Mbclw!mch0K&=6>9`Y2KD!H<#)9FUy^_a%E=NTc6@m*jj+B*6O^O
z+?|<PdI~!q^l@|XoG~bV{_o$8!#oNKT1u}f_s8u^c~|vr@AJBM->c7kuj#iI*%G(&
zk-enT+gn?IKAj$KAi)C~RJ^;p+}_6T!IzhpLCYhn-aE|diEOXGYG$+L#eqrdHoR^6
z&3tde%7b^EQYX9=(rNa6I6YZw-mg8;$?YZ=*Ub3Lu5-q6ovglT;KQdLtBsF%Es7Su
z%&X{eE!%9_8Pm_Nx=fDgot$*_{F3LLzl4mZ&-(ej{N<ffA+fc)r8Jr6^lUPCzhiT0
z)#TP-KW1<F1<M0h9pWwy>fC6tdu1+HxA@G;;>lCZr!6Z!wsUR1r!SAHy=}A8){>|1
z*Pl|j$2MK)ZH-)Rv84Ietd~2dteZ69&7S#Pr#`Ao{o%r%zUirU!3IrlwY?kI<ktNT
zS~O>#mD+<@b4xYL|8?AM+v&WN&$cD>&I@Um%Nt*RSbwe0viRAB{q^;iE?<_v@w)cw
z)$o}HiA?kUg=SclF{^5u3H?(0;``z@b65CFEfe|g(yH2~51aYz5>8A|Oi4+x3AmoI
z{dVP}PW2lb5}B3VdLI1!et-Y^Q|sf-JLGdcJSiTvW14U5uEVl@E9X3Hzpgy-aQw2`
z4RY0XJA3=q&pLd3xABG6-p5-FbLU2>1@FG@vQJ`dZ`Hl87d@`cIq@!~jsN$ASnagW
ze{M{lDgU)uJ9}mA<=bnl5*A;Vj^CX0l7F4gOxA}NE=B5uiK^+&@><=c%BZ}x+V=1f
zsaauif0LA-dtZ5Sw)uC(>cySOD^8p}zEP{nOu72ola=Q8lJ^wf-cjslxI^wRyS|<a
zU-iuEY}*fnvn9XW)l>JF_4!$wWAE!#6HI;u?aNa4o#{2@!2Rpe@%8n4HoMNZKeYHg
zZ^_@P@{3{N&U5}gl3SLyEkWj!#nwIVOE_j6YAk&h5b*!^k~p)hD;-ZyPhYIT6}!6(
zG?~P|P54ED#SDW)ruF-NY1yS_U#z*KR@0RAaYHgw-L0t)<+y{r&MTyMm)&h+JH@a(
z*{GvDZ??4P>X#Rp*h9*WrsSV0o3nYNy>aQ}?LOA?-~66umr~rCzEYEGbJ-5j4Kc3Y
zrE~YUu9_zKDMa<oWXr{SV*hKMewUlYmeJ;`mihc`PKoJ(l`)%B_uZ2!b@~z|F+=Bb
z+O%Xp?sEPaoEqBc{2x!K?>usBjjhN%Bj!2DWxLFaxLz<uH>&M8*WYd$qNQFnS-;m!
zSAE4lhq}LC!>vkQ9GI#dF1I3k%gr1fdAm6;FE0<?l%zbX_c!l*qtMoUoVCKgzBMF&
z(DI#Jzg}rUTKP1cNT=`b?t1$9)hVny`>?>E=0`!@zn|%Bd@?8A+}!-$-(76Q_3|y*
zYvz|;T<#KkwJLYL%lg!fs|^0#kNt2qocoN+`Up;k<8`^4Tm74Sy8pfVI8nMRO<wl)
z#&GTLyQ3GW3Gb^vx;5{+V|l}#@W%Bogl>oBF+Mna@UVnw)|7AgDcW{@myft^t#|(;
zYMtUKe|2HB`zi_5>o=RUGL$AoZqs%z+vr-lK&E#7!kNcqOuIfDl;6?!@$bIio5zix
zDXWB9+dg~MxOMHU<LioV-L>7~I9vY1)qTA?9-Cx-57fx;i`trX^!xq#{+l_=f|vUp
zId<%xt@WF@xD`i}K+9@Vo}QZeBQ)3daH98x*+)`V>rGa;=y%1GeXaLPhR$cYHZ#6=
z_wFnG(s1b2-Ji!buCAT2zkmPhC3BB`{nFra_LS8R_4m2LNpEBFmP)3_*%_s7DAazo
z>xpt;)HR;DKO&^3Zx*?m#@-xf9H;k0rlM9@b%of9*u`IG9eT&&YVG%|(9yd*uRP$w
zk>9H}w=YVcP<DQ{nW2HY<HD)NiHF*h%jX#!6xCX4Q?oG0e8#y=jl2A7dRL|h)aT9i
zt>_lZiP%5y!Gj0C|J@1-3X(8LVA%inn{?`-T><}3=1;WU)3N~^&l41+Z88nMSo8h2
z={!68IjDzqZEf^%<^DVOi)H&|t*5DaPrI-=-T&FSx!r!tcjvEn)w<j9pJ(k+<8Hpj
zsvg$;Z<b#@A#knctLmhOnpNifyE+xCXYIXMSuXra`)XPZFZ2APH*f5+X8YXmEAsBk
zoZHhlvJVS*rfpoc;q83kh?T{MEe_Rq?mhD`nd`-@Pp{e!Ydp`=mO8a!InRw8vki_n
z=jgrl-847OqKu{CnX|g_rcl<LbEjpkN<`RlZZPmH<-V7Z#`XB5c+`epk7XCgJ>Gnx
zc1!&)!LWV%1!Hem+`6h!c>KJnu}Fycwa@!FSkHPc_nRBBB}1^8onK8!sp;+Q?ZHdE
zrm`RB+*9>cYj5@UwCCsMPFZ_>{;vM|PQQGY33F2~mPM6cJ2pw8`HRifh()s)Cp~)W
za`)jz;YoQ9oAZkc4l<?sG(0x5ds9~KeQV`mNgb~<;nTR|bX>K&bAo@Qg+FY${b$Q*
zx9fFoUosS9^vf=0atiz1*sPkpBQ|DXP`doNfR)nowy6XzK7L!mdzyOL)zvNAGY;<B
znw@!L!^7HYU(oDW_4jvw{(iqdCEKd!*55QckzfaoTH#-6Uv!UOzIO7;JA2{alSMK=
z-pbAbt=ITlb?DHccc)*^G|3c-um5W*YgLkPagpnbufKl(;XJwJ$90Z~FG37~^Vg(Q
z-*r24BRBQJ?^=zH-SeiskBVQLu6wR6>zu;<Wx6-}FYvM5SfemAZ}GtZ{hd|3&a<{G
zpIKiQ75u@jes5rQnwMVa+-qy2MNY-8zg}*kDF6P6gqFMg|B3TYhg}vr)bncAV)L(M
z1?^4Wq^|th$H{uu^ZmWOpmTa8Y^%1M7Swsewxs$}P4TNMno-+wPM(`<z4A_$%-4UW
z{uT|D6N>J*_1yfs<Ccjb1K+kc`=*{OSIr7N_dam-950<62dAW!-MN->vMN1t)zgbj
z`ae(FNEKCw2EEsx9Nu2P+sbHbexjAE>BVF5Gd;yF1gb}P&-n2Cp7s0>H)OR~H@3f7
z)RexkXm$FE|4*3o;yoAn-rkmTW5dIJd-`tNh?v#h{`vd;`e4%}qf)jrVtflOm@mxd
zx4fzpx9nT;{NiV4BBQqCoa~deez!Zj;&HF}kNf}sxmQ$J*#G&!?Dn`har;Z{txwm^
zo3(iHt?9;PN3uPN_~o+0H(dDZb3FU?yBRgHtC#s^KZ-u!w!rM^=_2cLbKV1jXCDgh
z^sjXMB*`~_$KFKojr(Pl);B!6+|FP8RqWGl&!w|JzX`~FF@4pV#8XplF39#jf34LZ
zM#)%2<i&+mziMO6=T|)9OglU4=*P#$J3Bfae0qAiwd#*jIn(#|_x(SAC`d|9UU~6x
zu2c2??-$N6MhNg2St_xF)cvsjef(zC!E@_bCHBbIp84y>7QTDq)^$}4=SurduQXq;
z<#_I(Wq6U{iZ|Ood<xG@S~U53@A3IY2PW5T<;u``ZZluU*zS>=_X@XKW##CZ9L7&K
z=j-YG*KT*5lA~_)^4$FO)*)Vrubt(zrb?&2{<ieuim53YRaM{KoIKRZ{eJbQ_qVoo
zi|IxeSv@q_`s@6ns^|Z^+O@v;zL?F>@X43$a={Oypfh1gN=gwsi=Ng7f8)3RGa>Wx
zvWp8GnZwpZG)}+%SNc}oc?QkfCzSK8_<#Fv%RLldF=-a7r(Y7+GGG6*v77TF{d0=W
zRn0$hQuo$xF^Q7NcABQ05zn`6U3K3gc}4ra2fdy0wqIG**IJ&QbYziWt;D{sH3#_~
zP5it3@e3cj$l~OfzhD1)zqET{dM@h9n(bfIPHd5}Q2PB+?zMaEee3V+>yE!GY-`%*
z@VMyL?>KX}Ua6y?)!Y-6-Cx|<X?%Ts{C@kn2YhFnUEJj>trxq?X>a}6#a_4BtN+c3
zc3)JLDJ!9}Db3WsW8+SvGmWR_U%Yv^$tSW^tDA9xM1J|(bRHvv1Ny&rDmiVLuKcs5
zcWdTPX<Oe}nU7NgME0FeUE_Bxg-apu)P%h@S-TE3>a0EQJnzGj$(wB$>x_ciD*}DP
zBDbHNw%>4yrr5k&runBY-Fg##_e6B=7r(a?U)(u#r2a5(g#L|)c}#3KE9d)ItzwcW
z`NDBJHbnSw?vXUlC!+Nq%!JgQ1by44c<bjSmlYxNeuU@xUf%fnLiP02A0Hm>`TcJ9
zch{ZuKc9+QmA&yO^0MM>tMzY9JACx82&lz5q1R~P+{{m3qztB{9B5#y`F1n?=)N=Y
zbw5@0_kIyF&A#^JdVKxTl#{<L%bTa}c(Fg=->zA@J)2$L=B?wI<)PT~BGSIP)Zg>$
ztKQVe<RwRwh3CWtnQ$wAe_k+Q+o#@D3MXwPZm!MZTduczdg(`hUCCa(H>`5Sy{;wu
z4_EicoY_<=C2n+?GhObOh|_*WQ&Znf^~V=K-uG}xaZ_dXCr>focV~Kj{5m5UFgs`Q
zL7SrTDd*y@Mm=7&_0^-OZEv2s&)ocU$LFuxi)wxgMe)|Pi~X-`-gQr?Z1uF@$e({%
zJQ=)%3Jb-Hzt3O2EiKEvocmgLzo}+|lSHlExn+kMb?uoie6-_ZJ$v%=dHeJA|9{8-
z`}aG4SNZ$Cn>l(@9*fNs-jQ-rXjkd$wySEE#!P$y@y#-9LA%rB&WXj&Y!s~0+96dZ
z>yjqLe6noX+~W7557doU^zClFp&j7g?{ISK{dVa!=f3O~+WWim(qVoBkDj;hJx!St
zwi%~4pZyw_n&tWRn48?%CCUeajm^|dt~j-stu6F7h}pa8jh9D)&dSA2pWk<drZUaR
z-xPCbZ_P#d$W4q=DQz}!O*<Fw{rR9Ux9I6-cAc8}EUHYYTGpipS5L9#@%nTz@L1)_
zs`{DR=1rKCko;S;{B-TnC^b9j$7k;t-n;eWRM(+Drn3Sok8PdXr_NUS>0yArT>hfv
zKJR>sbow{$bV>E~Ie6$K6H}^IO4ck#B^~cDr<1|U{4(E8wH8_3p*5dh_0+X1&xCZM
zyRXMp_kMhQ{BeOrS8nD0KcBpReShEYtoQH?$Bj@MmxcLG_gQuZZE|v+Ww)8p|9?Vw
z*_TIF(;qxK+Fki<X8MY#ty&W&PMo3_`)gkrgYdtel!s~BtKJq*TW8mDGc@$?3)7F9
zXVs-F8b3IcKb~y*Q)R#2oTtC`mi{j5i!I&0XiA<@i~h0gED4XRoo8%T6=U5|_o#ds
zPbGiDb#Y&{BN~mdsuzE**+2ika{Kw%gIT7<HNS<F-0GQjeLM5FrsZ_y&N{7m7SF$R
zE<d+RDmzG+Z?m;u@SH23#pj+*z13H7Z|d8SpT%l>qs$i6|Nm!_ce7h^wQkSPKe|#s
zy)?r4gtxk0?#p%W+j-dL>~xE#C3?yAw~tG%?%G$AeLX~+Ve|aA&VP6PeG$H#v-a4&
zZoM3t%Wb9?q%2p3lv*y7Kl7vM<^Ojl7X18u=kvz2!|i#8x13WrY0!6WE7!Fh344W(
z*QMW-O<!Mha?aDc8)ecjn@4G6od2N3KP7L^w_DjZ-|rNImZ{m*{z}*y_5RQ4HDZmQ
zKwI~G=GjP2ey?mI^HbQa|B7Hd<EDC>bl%TL)Ti@*?NUuk&zN$iJ=w-`zh!TKU#9$<
zX}LC)R{b9qZP^@lY;94S!j#LOW89TRi(1TLyiPoryj60}$xUlwjg|Ya8tzc@EZbXh
z{Y}mGV~4hhoPAK0HbMNX_tQ7q%0HYB?abQA<7T%*x3|B~f@969=lf-Sr<{t)IqFz4
zW$O7P5v|rrgSB&y^B#}=8qFp+*Vl5>1Ivg#8oj-lLWiqog7y~d_1Kq?d{9bzrgN-{
z{mg#(?<LEoUgtRyd0OX*t%%$4FNSeH=58=bJAC%o=@&Ds?sRAEe7bqNW{su(+~U$6
zpX^iy5xW_a^`cI3H{30nTg%Npx#SQ3uWxTpgSIPv`Jxk+TJv`6^@{Iz%Rhcv?{qj&
z`-5JvuZ;yLak^aHuPOQL^qc4p7T5j%AHA^9`RB9Q`34d^N0-%ThpkC?aG=rV(+TCA
zJ3A(NP1U-)-~4vbl#{;l>04(lKInXXo_zV)Yfko8jeXW%pFC^-zmtm(My1;C{^`8S
zZo9&D=d!A=)oFa|9v2yWU;l5~%*pA(y|>p)o6Z05wYmC*HH)ktryiQW|HIAc`McB_
z&oBA8VdbCuKaS6hN=yCRlW4PD<M(WplYeJe{bIWO?9Hx)Ht*MTJh~{Qx?#?}vtORi
zf41DBT%}R3gh77qSDCrlQ`Jnrg&$&1kzb|olKsQo^|n8EJ<r&dveSK19mB->eV(Gp
zQi0w7kH2_%dehtMzwPA>3oF8zjGw;zC3r8#wqyRB;|5>m9s0F>e!=?vjovje+lAZj
z#~r)ge@`d!lB$^P^<K%@`6YEvp6PFRH~ZobnJBLx3(6<(JpNI8e+T<Abp!45cE5E%
z%OlI*-&+&0@loF-<GZ2i7akTE6h3lEJ3Gs@^FV-9Z%pIK^-Tf)s$ClTT~y`IHtej>
zeC~3nGQZmPsAG-S!)Z&a7W7T3I<T(DByg#L)i&=R9xZ9pEKfSt<Qdyat4OhXG@Ocm
zo)`C(nW=P&&&kIT+hgoxA3jwznk1WJt$ysZNQC<vNtr*1I^z5~poN@kqPB9?|F716
zTef@d#l`NQ-)_JED0&}buiL*4_fCh82U@@#=CFh*Jtp7uB@aGb{QBjcoyMo9>mNUU
z{Q11;*>`u9etCJB-QM0Fw6Mj)qoX2%^^n)6tFI&8EVy3%`2GCv4=h$&&U~G=*8Sp>
zSgH8;hkO*4O{n?GC1$<AJn+{2>4z+KCHXz`N}K6D?bvmV=i3B3!`CsZa`1fF`%`xJ
ztd6VJ0`bX-Q}idcY`%VBx|aLV148^Myu}tWJ#x0QQcq9&ctqI$!`<@xcfXnFmY?O^
z5~0H*Y1H!c^z_H#cP6nNo|f?8i|{W60ggP$(9q84zf#M$)w^Y6XuQ6@etFT;Q{UIv
ztX#Pgv`?<|^|e5&EyBEP`Yzu-i1B+V2;AAV&r*bM{hSjwU*v=K<7Qu9r>ddxV&2sB
z+uL$W-rf@R_xA_wpP6f=ROPLp;dkS0SsRa}Q%Z^oXw`pw{ok*-syFv&`y~Vg34!*>
z{QmZK%G%r0cQx-53U=UNX|jHH<=gp=*st-Y*jP8_y^WGG$q0Cox})Nwl5N$Og!lLM
zPRX{J5%o8H;pDH69)i?An%{f*XZqvnkX11=d0$@_mXeBkk_uY3lYG37VNK-bwqw20
z-R#N&BF;ueMiLeU3ZUhvKcCM}e|Bc(_j{Z+<?m$XT9rOJF;O`r%W~R<xc7=LOCDu%
zvNSn8Jk%(9<c|TH|77;nO3In{_tj3E`};_za7j<cgD0KBA#Z;OUES>?$nT;cAR?8%
zNW3WJDfbku&`G7)*DtM!+^l5%eW7!E#^q(aFJHd=^Xatymb|-BHUf<cBqq<B=QrQ3
z_QuX)_2A`xskgRdZr@VBusVHy{Xa|hKADAC$ES0=Hg>jXU}{_tz`5%Be7D7634Xp$
zr-sLAUMjBta?yQ`O<~i8dzV|8H3T@g=5qPAUrf01T_tw!sn_fG$1QpO@55n!PcN^e
zkB^QXVie(c$82M38@aE>^5DUPdp@7DUb1Z2y-WXE-z`i%7aO>6<qTzS1%W%Ve^zfh
zxVhx2(`@bMVPDtQJXSyIEWp81-rBH1ucBhQW>KxT|8*ml(pOhJZCDx~*d4rneR|p3
zTMr8?PMkQgW4qr12L%BR7N$lJ)un%kQv}9gX=>+~c*(C}`|_6fIMajciq!tfiJn=I
z9HGtpZ_}x5b0y#G<$k&%I@W(PgKVZJmv_I`A|nnKr~57YI9Zwwxo=F`cB42p_0u=E
zed?)RkIwsA@67qUG;_Hb%e_{Yf8qWzPd?myUBPy3Nn!7$MRFYK0ReL|?jJjH;K7}z
z>beI{?by1lI%u8OLN=>5DTWnmEN<++8h0Z7!pDe~&Qk||*1psc4Rury0JUNj1VpT+
zN0@AsHa3gSk5E2o`y=r=Pga$;Yuu_Ui(gl~vVYHD@Z#}RpSLkLKEGGnxafR9+N!c+
zXKr`RC|FXVc*v1;&AkcdIxI8))astjQ9N0ecGi1uI-5|#(MNjzSzda^GbL2CD(?OJ
zSgUn1qv&};bWet=I0v{O;b19mW@tP2t6TQ8y#mYf&FZZmj!p8Fx%2;@Z6O2WmwBo&
z_0zw8-&e?dwCiTo@8jF&+aB<_+P6gBVf(K|;v04-Rxz>q$*i$RS?;L4Hr49wtV40P
z_v!{F)v#?2T%oWs$L86IjbTZTCg1z<f9|m>fBSA*)pZ-J{dIf2W{$V8sRPXScWk}d
zwmt1kliRcPcGnW_g?<?iY<2rrX7QW3w-wD4YGIpxa^p3xCr85O$nTxF{PombYagq>
z`N!L^L0x%8))W_3jkJz=4vo^))2?OgtPpv*zM?h0&+^1fjor1?!e4z8!t(aN+P-Q*
z?30;`&CWb}$qkF)-97G{XC;HKnryiL`qBv&w(ASR3U*4T7$*MBiMyL~(7%3n)665&
zPM9)$c@wsN$BA_c+>6u$0y0>Byv&G-TP_*Rb!2W}$9dzMjV|4<tWH(G_SkIYVX;(T
z^6_n@t4{q;*Ua(0RJT1RG0A0xn)=eeB`iNyBn0k!l5>-v<H|*ofcArY>3Ui*TbzTs
zW}OxIv{EJbCc}dV=acz9eqdp0ykOqw5U^&W+WhBNW~y9xeei*2z|O88&t{%EdTOWe
z{{qXSeJ7u|&ph??gZZ;}U9q;Z?K@LKwSHyg9lZIL`-Nd9BZInr+|j5=cD4LFyQ3}3
zABkMQ^tR&H6h(sz(gqLbKK=1+Gy9K<a<)l#n!^^V9z87Vx8T7+m*Xw}+-9-d(a@Xj
zmtv6Y%r<?q4)>a@gsgOBJIigp`<4~hoou`peEes9&WhBpCM(t)KhB`O_|lDOPZk8t
zn7I7)&egdNy2A2Tm_)3DpG9@PNL}PyDJe3wM8zhRVd@Fhv}wzCoS!PG)ygqd2$aiM
zm>f5%b$*SnJFvvResAL04co;()ZAA~`O2!hy!?|}|Hd_8o6VWN%2~~8Ti8FL&~)m9
z4+U3*>ji#Z)(LaJT~T<l%<YP{Yu1~fU44HqY(2mI{I@634(lvi=i1vfe!P<x_u6XK
z)m6=Dva>f<f6ntQSJQp=dLnPk@6@)YDxP;*PL8d@8WjP&Oa4ngbxmbn61(Z+J1*_?
zhJAwbzO2;~7I<^ey8qCOcq4`hH7Uy*mu$)}wwf<fY_MUsx~=xfAJZ=VYxHWEFQ~uQ
zB~7I1uB6!15B_ZLmuUW3zMIqk!4o^~;(m=Omzvl0vCE!19ig9Ukk(qZPrbVG;?g}W
zy6!V)TRSiHJ9gW3#T0NJ5a8(2oc<#%B;4-aT*u|<_Rr@U=6-a0cwtq)<d;)SVt$eb
zH!k^EW4mePw#rw6XBn4By?57J<X*t}O~1PLVE$jHCyUN^uw2-h7BT&-%iqoI7U^3W
z+T)6^c!j(_e~$NR#oE^y+g9+Y)oyM({(8!xRVOMYAKIjOenV8&nLWPomvsXAZ4GC2
z3i>C0ypUcTRqnEm^})@KUrHidZqz+wX|i5W+!Dxb9VGJ1idUs!zWLJ4f4kIljw$fZ
z<G;8n>baO)6!%Ww#I3;<burmbdoFWqiaV8XM8^2MuEFQ`?=S6CY&x|}sNS=*_2TWz
zGIKlwJ3WIkLyl@1M;^PVb2sHES4FC&OJ`A})6%|<HI+<_4liaqC}_-+i8pQUFHc{<
zEqy#Y`PQPRAEvClo2?YzvyS(`x_-A!&stn>|J(jPVy)E$b9tB73A(@2|2pa$>}#7Q
zq5bFP&m%HFCQY}WbG&P*{RPKU54ldb)%>lE-Ies^<Kf%4#p~nqJ#3GD%aDH1dv8hR
zB)bio!ppWicpvhe!TtA9<~1jHSl23VYT{B4_`slaamgk%rcCiSZ_aKki=6hQdf_=k
zy|y#Y6j*iEZRP8WEtU#dnzAb0^4Qv*nfs=#+SRkEzglu-E`w=oV)mY1i|;2hBHh=Q
zHg*SSHaaML@l_BIiH1*3aGrR#_Tr-d`(yW;z3E{;kXEdA@YA_j%%|tKz1=eJ(2Ez!
zD(RM8*L_b<P+f8*r0Ehv-`XCDpK8{AF^mh`+70*Iek6E3jnlgRvBWA4R#CqN4;oD;
z+LTT)ezMo@>5fNPjNi3%gBcA{&h;lx%AA*a;mpsJ<rzz@E$U}Ky(yetE*V<)ly{HG
zso9=qzv$eZGEr4_k9EkaEr^U+c(e#KtbRUy@%oV4R}XJ5E$2x)urAAx?VIjx%e=5y
zzT^beTq~}fL6v**x7+;cDyd$z?3vUty>y@VWj+5=RMYoLU)mWL`P}VYzy(7kkr0c<
zCidxn*r%?FIekSh@({}#+liUSJW5{XlyFUS+3Vn|cJ6=i?>6sOmeQB&t{+-#bUtwV
z!cJCaACn@zlTun+gIl~fS(p~)Gl7~g_3iug9nZDNU!Bv)6q0wrEBfh;Q|jIS3*06w
z-(a6*%b>jbZfCNZ@V`Sbc2c(vZB=>odSyv=+;n;Vm@9M6>nvO#>!PuufQgk~Zt5YP
zJr~{HXU#m9!+D<jwP|aG!ov)<__vq88L>(H$eAL(*}XdWxb!6b8$WWszm?Wgtx;Jg
zv(>OHQL0)m)v|B<yh3Kv<};#NQ+1q{!iww|&&0onJT8gq${256uCG7KY;C80fKmy=
z>*sdsEi*PBznF0)MeTe3;VV~Pewp{ha`CLGJDZI(XPk{Infu3d_Ou$G^E!_?|L1>*
zC{wu=yKq4pBQ)?f-LkoQ&SUB%t>ea`Csi)r(tlnil<MQBta;Tou()l}-`Y<brk*|i
zMy6})CW}jFiudmj)_-zj-JP1s<$d%1tnX$ty?b!E_NB_owXLVmeXWw0?N`|8y!4R^
zq5x74|F$OK(i+w)uh#hfOSr#j-i;K~<6$S~+*#DB!ov1*_q11Eqht;(xmo-(E5m%3
zt#$i}vw?feH|5S-xInJbhiiUr<txFqEj5jkxu&eWK2wr?apZ?z1-WypdV;6x?{IQF
z{!L`zf<v4lcg$yO+IHhf;uMd0R&g><JX00y&Ye4^Y&PBT%C@QQ+j6t7m<zBpG;h5<
zS1I&vGp8@l0u7yihmReyH~R63Ut0S7%AeEL#vIiNT2$ViIaPOc=)+xCQ<*{y!Y=KO
zdLu0S=Fk0WJmoh}zl>cNr2on>9JUtVVxdxq7p%1*qRJOEx7Tgblh%7vs{L=?{JY<%
z>PY*G!^<z#zbHNzyEsx}r~U1+>)-RAN{2d5sy5gZ9m=3{{JN0gqu3`Krb}K>obyc9
z>cT?j;wEPACq-)8zrC?7y14hox?P6?`sZAlv_HmH@VMn_7uMtN-}IcFGvkWK<m2-n
zukDNKo-u33eNnDkYCbJZV&z9{8kr0n95`n>^4^IQ*u%uT=SG~;NiH@8-^Ne<CqlR<
z&7FGeVB=#xzGV?N<2;v@F5p}LNp8>8V+X(3znQW+FCl41*`CFLVb`p#E=hf+a%|I?
zDTX&$Pp(_BBqlWC=j_&8&49g|deWDjnQ|qN(Smt}rq*>v{{#A0PDHEPu8>&r_)w^5
zitOZF)gR79<uX0b3pH)r92dxOPDpF3rmIaEs2m2jKwLD!gub~LC2u#|6kaNm{PfaP
zleIz&X3}i>`Z?Eye-vLe63e&aa#+5ubj775YVkey4;s&Zn*tix3eK}!Y~3=c3VCSj
z=7q24`En<fJ$><e%bL%1-;S-8^3A-klz(yCx1~+nk9-aOcx2_Q;8_)~bkbI3+P12C
z1&Vg+9@yk5FYv)h)$Z?or+e`mB^tiZ`!it!@A^v`7g~x``~F57Ib4^!zqxwrngz^O
zL5HJTKxKEpo6JiWj<mE{u4?jhpBvD>r`*W;#%mLkXX|{PSG7#^yZSX`P3oh~QRi}V
z_{?&*Zg{c7dhZAQ{plCJdj`Lo8aV9(_XXos`#X<`&HT0W4pY(>^AF!7W~O#uikSUO
z;Z=*%{RGJgD<o6|_p8r6E5|nP+w_mCpB!vYzLV;|FCqDwzS%$1sOU2Flak<40aR=~
zn6^PCwdnHk$hB!R^BirD9eNoYIpchu?5l9blv_5&v)^mgo<4JO^US|*7}Bh-pFZ1e
z@ct~P$?6w*C9Cefk2gqmUU$5^yIW09Z<eULQN{K<MR9Q@H@51%T3y(3Uiv_n{e6`q
zKR9BS@jjWo@_uu&_WXbRaevE%ZC6=Zh$<C_#h%>IcRKdBR}sJ6-%6$B>R(wBQnsr>
zN|xV^w!3wEZq%x~2mCCY+$wMG8NxqBV<r3QSXH%;#{^a!kIG*AT>X1wM9mq;N_)=y
z<+hKX>IQFK)tL3qh*SGa%^d&iZ|9{i{nQR{XBCdw^+dr=)*<n^UW3k4k3xaRHs@xY
zeL3Z8!;X4zQ3INEaZu1u@<}{nQPCi|y_P}wYqj0kKl2WI?yHjRj+OrY{C)jnj{SeL
zx9BXGyf5-_!z{IR@#d4~t!rHq9VB$~|8f5_Gq(42?VcuRZ2A7)u3LYXeMsNEd}m&x
z&{F1)SKHJ+o$0EzVU9R^a>iM)H!n0LU-S#M9`T=c*(TOMV5`xincV6D9|{%<&7bl8
z#&oH#y<gU-B=4VcaQ5!4JXIe(Bew@k@LPT6{qx4P3}t6ty09<LUno;AD}HPuw3YTQ
zyywR+_D8di-+KLfx>3cg?^kNd?38DRGaWF$eav)4v}@wAJ*+RdA23$@W`8!*<JgP0
z-50H2A53M^W)s$syHwlk!K18Ulw~<t{sr?Fp0%?!zw+F|VQjE^zM$UCAE8Uk#Uah&
z#svrFSZ)w)SRB75>HEvqme-;z#kWrlzh&Q@`RDQ#k;~>bUpnSFI8H45vBl~BCo2bc
zmvnt4k*O)&T$@_f8#_<?I%{s^M9D{K&??iRfIDQxb)g%LS@GvqygPmIYKB+8RH(vR
zuGMDwYzj3us@F)~b-JU|Sir>U>VBtAd%hOmyo@)Fl9QK6y5AF8Fsa5j@%lZUX>!7f
zzR(7mz#T2s*Zk?1`Jc}C|Ma`gy+7)GJ2JwKx;BeAy<a%F^36v%6F#wEbB-2ajUDW7
zIyY`qKgPz^$#qVp%2RXIi8OTqQ0J3{>7Z=Twy^ZLnOA3qi)g2H%U($SWY(24m+2u3
zR~M(*RAaM3X>bN=bWqSJNm0Ic&#h3NrBX@gFuZ@y1adT_;}7cpe^5|5<k;OHWM_3N
zY?|idrGNdrgDtB=A0Dt2H5TeOy<D6!zt{PwLCp%k(3x(jTeda5_T;kev)oa`{kuJ6
z(#y!`^Y_+<ZCfgSXfMx{m+^r~KjXhuo1J_1ZmQ#=;$+)bnbOk2pKqR7@OWcemDee)
z!0N8_y@H_G@iO~Y(Tt`ZFJ?b*boYF8V4;fB>(HlhEFn>6)814{zN->;du%K5aQO_&
z@BTWw%jYjXd0<|M#uQDpHFJKv&gaPU*--y*hYIp|0`Ei?cZ<gF7b0ieIOwu9sB(`+
zm)9i8KQ)fFL3`}VCM`dsUF+NOD^0(8e;MDlT#xj)rdjWg*L_*67R8?0HRV+1-mmB0
zEuDX+W1E}_=Oo9#8$YMd{jsQSUin6zw^G#?)-PmVnqrX|eq+Xq6UDcszVdv@%3u&#
zRL05esqn@3MfAn22^Y){HlMqBfB(ai&&59cyJh=jMTXX+&9fsvXIU@H+xA4+S+V5J
z&Az<9yM!;=d|79;YFe>%W%415u;Z3@gJOfHeS7WMYHhXB=}`RancvTO{}D-lepK}~
z=QsUOj-|dyuJae333=NybNkd|i?q1aKg3;F*V;Mp>z(c2OZH5iy8G>ssE|K@Q>x59
zE9}|RGhe@<`O(*vmp@)h@YgyqOL6)$2L0yC5tB-{ynT1_scPn*S(C2+(`rvLzMy@}
zl&QR+)=KS*`9&24?<bp_j|Lt2{y1-{qRN>A^|vHHp4oJ!ZnDlN!K+)Vrq&!fr}%Ld
zCuk5vJBe55ms-UJ&!oG{?=u?Izs%dU@6NUk<Ls)-`PbyE<nEu~u2mE2<=VAu&*loP
zUZ0gRKdZ&Pm)D+p7JB@~o-Dnh&*Bl^uV%>Y^lmkoy7q9(OYW7%?{(AGSTR-HpZK}J
zHcvU2?N7Vzs@;YsoZ~Houiri|m#{3${SyCz=-P|i6`%DdEVz()NoRHAlZ4Ar#=+({
zZ_HklwC&(amsb7orJv8empu2buG9U`lBD2&({g)4COoWJ)B3UaY|rfUv{zBw*G0;=
zzPxs8!h6Fq+0Y;FG7=+%w8YQd{!x@_wfx;o)ynUu5>5u(+^RhH=kGVHx8gocSsCgz
zyZTK4cnqjk_*d8#DU~n29~MlO+C8)X@s?lZDIaUTsjvRNL`HS>wgvv~Yj^ujVp<_6
z<#SZ(>x)l6B%Zy$e<tO*)zkAc_v|zH^-k2~+tqWoH^)AdIk#EIdD`04vv<U~_padF
zUC*rW<|M}|Db>K4t-=voJmwi4mfm&S<6G{w4I2Fy7A3nI&k$eDAjG)u=+aGITev1Z
z%wk)gSK9AwIXB_CefFAn5?`6NSo{?Avfq5le|L_Hs+Gsi^v^o?_ZrQy%S%t}IW+M}
zQ)^u7l}nGA6N)#yoKtZhJgOtQmW{Jk*ye`Dn}0vqzx%tJa9_Xw+b%QISNXPrqh&+=
zUW>zDi@*0NtK83BZn7b0q0DlZqq|Jjht6}bpYNoXC={)9o#DdfXj?=5Bu0UU3tD}i
zo5}51we{w?^5dWW|9E?6;=v06=cVOb8s_&cds><h?%24)nCs)qosy|myTUIQDjYi}
z+3d1A%ZjPTO6dIUo6}yHoZ}I?>V0QNe$Aw9`xecA(xtwA(c(Rhsh8hNn&-T?NPoWK
zsE2IuJ+`pj25<jlYH}@qpZGQ_CRFW)yL;5E=9bfZNt51{dCzkx-7h^oZo9)to$se7
z&HDW(kyW<v!nR#%*(cMx>ds7X2$1=zd%%BDO5fj@_mO;WcP9w`nkKHcFrSI>)zrOb
z;!0+8EPQPE*RuU;VVT+9&&uk*uP^lSTko`SQqDO`xf>np*QW)}oILqt&h3f4_bSr8
zpCvuL(^<b`PSWv|Pm6t?7X7%Nv`M^nr`DG#C2PWDEx!FZ?7F&Z{pRndXVuILPM_qk
zp1oP!@u}9M+0P~Hw4eLUc@(v4+s^cr3R-Nx?roU5cLMWoZmH^oeN}y%#r2XmzBc?h
zWl>Fm<(^{edpDVD_E<A}zH?lCVbUTaZuJc-Ca##Ss`2~D!JgUavo>qi$fv5Rnq8i8
z$#CKRiE51IT?dy6OP;y0Kz4Jnv9jRRFDyT^78;w@rmVVRyo1-Ir{vD$bF=F;S;EW=
zR1YctJ~-*EPh{3Z9-;oSkBdChirBm7ia($6S!274X8M$nbA_@~S4mfxD*e9EU+w5I
zqw#r^^v9Ehzc1<C{BfV_b*zbw%D=1UyR&XCIw2DFHsHmbQ-`0X++KgH;;H*<-Bg7V
z=_5v8y<aj+b#{OIs8pDv_V?+75AyDroJd?Rb7$HycXt_<uq(z(H{bqbxx=zo{Mn4p
zd+b1ycP#wo?dEz<XN1fRpD*csvgmk(Z8Gz!T1)$XAM@j~;uWitFRwhi<;~}yZK-no
z`;W`7|9(tb`}Tf`vrAmt84XIR`d7@{YsfV1vUTO+;}K!2Z%SXR;+OS5e8K(1j#*y4
zdHomH+&L?AZS#?pQOpzWEKz&fBQ)X2FMa!jOUw7)+wd1U%64<oi5Sa06V2IE<>#|2
znAdmxd~fT#*x~ZtYKd#cI<x+%8Q1kN#RfacR9m!l>6I9q7T@R8QT~l%@|l2u{)4fu
z8X1g&4;Qp{I=Tm33%l6U^!D5DI_c`%TaSP24~#S6oZz`TCHd5x5Vtd)PoGzuRpR9M
z_F%5NY5KX{Q_f{7WXiwT^_8b0CisW-J-@WwAyd`dZb>N^+BFAG6uVxO({-7VCE#aC
z<gRZQ!$h9xZhZ2Sae7Tj%B{TRkM4#AEfwHj<|^^gMYXC}O8ME6Pen^wPrKSIS?pHJ
zAD5ke#Jy}o-P)Og#X&RcBea)?H#8dNzDp};Xx{#8=_Xb0YrCIBNZTJx+3>k*+nbCK
ztHt@39hf*)KIEHvYu<vc@4gCO&Mq)2&iwXWude+W1Cxr}_Kz1C-~M>czx;Eco$Yk+
z)PP*k8}*+b-}XvW^)A}C-lbQsU#MxBbGKdW@vAbkXRj)I{@~e9lhQop_Wf^47`C>p
zS~vN*fy<<et+uXD+-EU+z((-g1cikUmj}zUDimEz_nB?f`qFsuhNsuuC+7xot=?bJ
znVj+7>o;FwMR8}oo2$Xg%keM2ym|gdQrOw&Ox-kJdBy{WG?ix<+_x?J@MvCs=Jy}m
zITLSvdKtA==lAx%Cj>m*etwp2E3KNiH~jt8-5IG@rT@<Om@M+pzy91m=FgFmn<u|y
z&VJbOx39kTGyBckuO{6P+$q+OuKcPyAmAa3nC+xao!BdUeDim0H58kx+!JD!aqh+)
z(Xi!{+3fSZ(szG$&gS{_;HQABeZI%@o4$G4Z=Nu6Cr?{yTs*neKeqXUD$^Xd*^lSU
zjTfG?zQ^&|MXz^ZLYe{X`~{k3&AC4%%;wILG!fjL_V{%2yJ>%y->S6e)#09RIbD6;
z=FeCEE!sRu#eZhuL%9a8FSkmjS@m}0*58hCTRi{&-RZ6Q;ZZvt{JFh-7rXGS*)Ia3
z+&^(@U;cjo=7Ra#65j=APBisb_;Pk_KKJ#=lRnQP_pVcnUJ-EDfwSPlY5AY~>md=C
zr}r-EsQ;?w>Hn^>DJx9neZDeo)(MNgb18QpzBb=CyV1Au=~p`jjllPtKhN1M9=%t{
zf6c3e7cGrZW=Riz<lo;IRk|SGuH)Lls5MoPamQ<C<^H{?N#d*VT3uv0*TFdbpQZ1k
zotI^9{Nv95mVa2=e(BCR=dZJwKlriA5>(#B-mr+1zI<(Ev(eUS-zVJ{w=&)UFJ|A+
z@Kq(g=MP7NVZ6`6TGzEYo9cbl^8Yv$*U3g#zG@DQcz^Qpi34wPN+x;zww`MxpLKOL
zW5e46Cv%Ed-qM##a81+5kew(RV$t~hUdke$mC?8Nd8V$NwLEsiX7l5>QmgN>xo51{
zHrLv)vDyCn?$+m3(;R<9o}FSEnAf~8&bd6s|2Z35&x$`E%kRk-Y6hH7zM6XA;3kjN
zub*<SS?8|3?bJM(-_d7WuQ!PVdHZWJ%s%u>XYGlsuDPnebLQA&KmVO8=^7x!K0m$B
zd{U_i=hfz$=hpC+rx|nwXf|HhzUQm2Z}^SUgFEkp`{&z#ufDA0;U3l0yW)?RI;dCp
zKvBJw_w#e*_WeHVt7bk*TXOmG`rX@pBwXAUBfIFry8fM2%=Hob&u{&6WLtd4wXKW8
zFMrBAyUFr?{QEUOm;cGUu}+iW;>Ajpib5CTk00eq-dviieth3C|H85-X|^*>k{RpI
z$htA4Eec=5vLJc?HkOpzk;`2F*YB>&y&>tJ^+&HRNOnQC|8lm}&HMXPXB&Sr)ctXH
zTa|6$v+9e|FI*$GtFI8e)mW#yI=g#KxMgKvaQeyH%F+4phtA!%Xw!0X%oj9N+hDM<
zS#-{0H<eRAUq6UB7<F3JU-s#zlV0<RVjlJ|iKcwe(hXLeU9Fy|;h?->R?74D*;;44
zJtdmIlzcyxkUOhy+rg-9d-gm(I%&q0yDfqp_i_?*XI1-NUUg3Rwv3GL&hqX1rWLOV
zids~3c*l(U<<UR)a-5!2VA9OHuxb}eX0Fcke;e<9jF1gqF-6H~DT~wnBITm_y?S?h
z8(dB=W7~H>VX<E<v)ziE->0XoZw4=ZeDLsCQH`~6S+wK!)>8Y~Cp^wF*nHUVuE@Gz
z^|OtyZ@$}d{-$^MCOPN!Yf4oY6&Kk>9^I%uTOeuAnO)bF4gdc3&PtQqoff{8CjvYq
zEawuN&pCl}jmV0f*;7wFpY?_1!C%qY9Qvw>j6U~NKYl-FwCg;0Ncr`geL=a$t{Z%a
zoYT8@cXG$|<4=msQ>0w}NZ0*hk%_jf(3-hxYnRo<E}y^0<CBg)cKGBOR`*FXc>g)0
zbB4Ds<dti@kPzgm^6r>-z{$n8yuk3Z&VIM`(b>Hx?kk7yoc6Qsw9t0zkn*kCEuQYT
zt*mNRy;{le=C}3xYu6w4R|OqsVJ}~@^U%UgdzP=8m9nS(`@LluZOzmB`;3k*(95xZ
z{LYkPMY_tNg7o9XtzL6Q7X~pc%-?A>iL)$e&ePAl*Mg;Agaq@awZ;nzg6C~dFt@&t
z7iS1qe6n<z@62OgUS6JQS1Xmj-tMWCct=&w(TTr{N+V`{m0gpY6n}2l-|HLIT|Vwy
zWw5sVa87hs-UYAUhc{037fbKU_D-z7QpMm_cKUAT#3ynr;e1Ez4JH&dHr|wVP|y4s
zw^-lt+>DT`a}J)J_1=r^mw5B@B(+!V{3|YR63@^1ocOR!R;Rdc{u!@p0T=kd!#dwD
z_$dAB`h9g(#QEl1+c%UilU}y9<$3SdSw**X*K@kRol{h|Uw&m;iVw4`2AA|_j~NCV
zri;rqmB{pZJzqSdCdK)h;G1wBxyJQAe{FasSv9(fC?!mgynbQAThrg+`Te);=2(Yt
zCGmGcvi7O%cE|3XxVnDtL<8YDHc6+ABRfHBnVA?@y3g4D!r)lq{LM-CxRR2RV*c~3
zi8$DG|M?D?gDcMOd+MS0aH6WyZG(%J>+=8Iitmj1tG#Q3I0wJk(;ImVpdoYF>~9uQ
zy|d0d`IkFmm(|>RlkL(&Hs6_DnKwh-Kzz@f|M_=in=R+R1r4*WF21yJ>Wtu(JNLdg
zcYU|!)#jS_2c12WF0R<tGw+*j;$gd#>z~3iL)NMMib>Hk7P(^J02(R$Aouz5d&ze*
z3L~F&rkTH=@$SyC6UC+vDxO7WPs*%qKByRYY^Cb8E7qLT3pdz>6-tVfGL_8fKYL>9
z*RAO->b9p}cl3MhPrR^y-JHhr?Xxu&?l^s2Md$a{s7<F%#`4*0(|o7(H=|v8?Gn(U
zwT<&TU%3Xbw|zGfQdyl7J^NVQbK|S*6(79L-mboTXJHUmG-yKbMf5zGKd0ZFDbWeE
zz0x4qdX~LN{<RLMjC5cq__KX}r`yx7`+AG=S7_XwlP>?c$kMmF&VJtMu)31j3%;_Y
z{rO=RY=8Uce)SUN?f*>n#&Q3cV^Xab&->PGd-`*Mot9nS-L{=I7WuMlMpNeH;McNS
zAd8pFHD^wa-r~0DfI_{NIcNFWn;)KEl;$gcn$uMFc9+)=OYYDqkB{qc21ZrreQ>#7
zw(fV5fyG>#>%CrutxbJzH9@nf75#z1Z~jbC-CJ?AO)V~Sj{8=V7ZSf$h&N|Ta(rs)
zb<fdjUYxu7jqYmGSJE6sR)yKyvu{VntaZB8a(btvdTP<k8(G)q@dRzMXx&}p$#HpI
zZ)fAxRD;`{Ud`^WU&Q{ey7v9S<a=&5>nwjf%-_Ab@YTdBwYIdG*O!G&I9F<0sxA6x
z^~`SH@3-%pg<IA%_txCG+iW$j!7(YJrE_`q$vO9?2UdK}cskK}xm&HWEK}(c)f5B$
zg9p#>zFTQ|=$2IG?CB3uGasI|bdS6(@Vr7nP)k+dSDA2a@Qos;!q&oNsyB+yR_J_r
zd(8wi%+q+ZXo}_f{|$O?SZ`g?n0VaHNVna$@Nd_j_xy8CK9$wH|0%G_YnGlz5x-Tc
zC&y!}yC1i@^i+sjzDZ>(;aFbT&R6o{!kh;O?wot8aQLQ%nrGaL&tDJqY^lg;D&C$U
z=JxLVwr{`wt*u$C<x#}%R<^opWmb0h`KRmp_diWATW;8Tw);8rh2Q`Gxpr(We_NF_
z&C`hMpz;;zJNFs+Ys+}IF5mIn=)8JdVb){SZ}J_N|COx^(|=dm+O&;Rc;SNWe|V1Q
zYSh%-p2FAFd2nHsgVLH#_2O);H`YmyAE>fVn(lHof!SB<<wfR?)t4%N>$3*jz&`!(
zZIfK@rd>kfTZ*^(c(;U1Rdfqp`in8|vC5KJazdMr3#6o7dM&i%aNX59XQvrux;{N?
zp01I)SJhT>X6>vMZ*HYK`-a@CIrzXwNI~%8?b%VBTqoyQIWgbK<q17x8+qZtxheU&
z>#ls*?&Pq*r}fMVu4b1R{BuqQ<(XbQw@R3+wt4PMR_)3Uj|{dSxUv4`kCg9omm2Jk
zlw{IQwNq#3zJ4p_=`YQ=#DY@E36f?fj!YF5<4ZO6QN7i=gLOlSq^yPVH&y?cR!g`3
zuFwb*)SvF~;<oRXT;DCdb_b2zLcTpQ>TSNT{mzN#=9Yb&auI8uM}9jNkbA1_lU&5w
zC$>Mfy9mgt$VP{-ulj3!Z_e#5|6@;H>@;pQ_|RGqR=;QRhULFfy*VbIw71^Ed3NgJ
z_m?-F`+ZY7GRtJC<8-?osaw;z)Y#L%uYInYo<C#Nn#9lNman*CCXy_d>$h|^&%^GI
z))mh-&&{dH=O{j<n_ki$5aj>o+!V$4j30h2I_bCkT>r!Qb;rKxC3lqNq@FmU|KsK3
z<(wb=l0RS1H%Q(t>ik}F&yNg=nQ`HF?qyuRFD`uc%9@BretC8qtMBx^`ng$Lv0HfE
z_Y(UK%jDgKWeNI8^)YW}iC>iaYIeSFPT$StQ>TfRIJjq3bO&6B4yy|d<u?77^yo<E
znK_n|w@pqq_!(NWB0n#|a;9OfPl)!^>Z3lp)6%bsac2u{t~X$JlXw{tJ3qD?HmAZo
zp#rqvwphko^Pbn1<&AnzOU-rW@6g<B_$2t`fj95m`11Tt+cj=-e0Df+p1HqW!Zzd7
z!es}Q4fjm%<a_!w$L&gMwm^v2p}fKZm2<~_Nk|)RpOff+{K~_)kkh)sExr#=SH8R%
zw4eV?(xUW&vc>O$oY;)7rQP|aa*S2;@J9pwd->1JPWj1uq`%T@J+O-H-m9E5|8zuK
z^`B=wU3dG&i6v?~`9E3i)Sp|KS+#7d5d)X}8BcNDr;$G%H@AgUAL(vp=NG#PS{8S*
ziIw}sk2I#LV9$m5M?>z<lMHHlwrQ^T{;etX?_2lXJ#g0K#o4=DjZXKw=B&*RT^xDX
z;NOqO{Yuu)L6@s+N<F=!?5z}A^Fzk{htsC@X1y-jv?biO#sB*M3g>U<A8RkakpAkt
zxy-=})l#?bzn*_qaLfF=9?k3YKlnY7ljA-2^r&C&GQS@uYP^^A&p7q}XYG+}`=^s$
zxq{d6p1XSROv$=$@P)d2rb8F%>a;&j^R+p#=YQ>?^wpdE7a70#y0fR=MkZeYyyW)J
zc8BZn0_)yHI_(yG|7=&@CG*)5H}~i)5cqnwyXW5z^Xt2g$N!ymQTI&Gd*i@jw$imP
z<LlJ5{j4R7^R>^LX@9=(>gbP;k0UoEFov&-Ie35n1JBwP;Ru$BYA=@Go>RU}`!J1>
zRY*_6+)zSg6}#7zE6rEe2~Cdpk*pdOa<%Zm4(0odN48&C^guK4w)>&uEYtNYJN&1w
z5IzyBS-np}<rRyEL}Rxn-?=6C_m?hj>F@jSNl5Y}-?q8Jk5gNOHNLEMc;7p}&&s01
zOKVRu7ifFh9EXEv11-vbaiwarJ+rdQ{m-9oTyg8W&#yFI=B!ye>hAhf?J{PuQ=Ry?
z{?Cz~%IlH4vgZ~5>bQLrvNHCTgiHPZKe48(EoZ(?<6dxl_m=(3o*re${uH_{`oQib
z_A@yH*PaaDnY1x@Q%UBT=R!NCty5nS<ys^Eh_P||YnHBp3yU)Hy}~;^_h03o<Mk)j
zOs`C~T;$cqE7I$NRn;!$e({xf;;K7&*7J(VIikVSEXt1@y1iZaQA%*<$)gKah&(!|
zoy&8>n5!T@m9w_((4qM{E;rxB-u3h?-0*QlMLUbr{k)fZJ&T@LTh=7Lj-Hs+eZjnO
zfrN7Kx}=14+3ViNpL&paKlt;_pFKHooDBDKI?b~Kws#%sspQTx`nO&|;^SBTB?~HT
zd++UIsk-ka7GV53eb2F1o7ML0$=g>N6(x0+-9N41^QY})qWk`_{JOVe&79(y<wp~k
zT$k?7Z4uVc2-@Ql)HwIRB|EE6XATreM~H~>H@CPqUQ+aY;cMk3!7#fbkEwA%MK;)3
z6~)gEgfaBhZS1;hn{{DL(zOgB?dOlmKAvoNZs0q^`D~K?)kZmk&6TG$vs&)#SyNb8
z$Wl@r7Xq5}k@E|&i%r|NT-C!SFZqj1;ztgaCU*a5&}z)1v$?)TF<A$^JuP{))^eAW
z@5{i&TuWjTuRc4=(86EcZ@M`84tsd=lGjhxzqE?934h4K1)2~7tqX06(Tuc>U6=`5
z3s7${yH4ZyAN~F5AD`J59&z2Qr6;vSpo)L)ndJWO3ko;Pc;=um$Dz>yq?f7jLF2-l
zwdeL2tu|h-5q3t*dcpB0xgwM2aJL9+z}B4FTsYyjXK(K#JC1jnR)yR!dzl&^JaqiR
z!qwHXE3r!H)afNLTR!`qd7J)+hvUk{z3Ujiww$%xxWz$1KtxWIxpd3A)CDoK=JuCq
zw^qG5bm`#kHpfM=y3D$#SJmu2SazB7MfcyY94tp$R;%+BFHH$``s3?bUljf#^IcxU
z$rKm8R}~>g&+M#Ey1Dnaj4#`_B~Ba9w`t9BXnbHc@x|?1E~b(z^on)NdULnWII=F4
zTX^xSm$9N|{}!ESQWxOh@?YF$pS8$&!fl~V^GX9|1YZ$p<GW}3-2d6zh4VsA%#n`?
z)Hr%}&Vi&FTPDYik8I+<{uDo5wb%Db^<K?a3Ky>Y_}aq%BY)rRD;#efcF&7_d2h9_
zDbxFTi?b}ZPc~QUNiLmpS@WuCYUciVAp#ROerFNn)6x*S6nm`k)YRJsB6@STZhW~x
z_P4IIxsa&M(ZAvxEI$`+DvoDc&9}q!S>N6LF<YMdm40~l@WJ=}Mmr`%s<n45<tT|&
zsF2vPDV@1Hl=b$i$~RtH#DD(uZuzFDmL|S=laH;S*&ACW^EIg_&E<{j5-bWgM*sL#
zzy8AhWZ`G|_SM%<C22*sb6yrV+8OTqTqysdwCWs(gUxvfwL6u1<;x;|d}=S@yL(Kq
zPgO&wchkf=?`lt<noF|{O<hdIIatiE&zN79ddGz2)3>e9&y{{yml*srx1MeG`8!>&
zqkGo7D`=R_xVf!h+vlm4#~w}n{Vve5CgbMb*{{P}9|$y^4qX!W`JBz?V=-pB1>Z&L
zO!By@*RLvHF0n~pH2cb8@AGAgvN+TOK5UpU>rsMl&)gMDtPNJ@ZQQv6vXWKIIh2LT
zkx_Q`>1o#XUU9dAC9EpU4O1k=LnbtAcvNT0ba3ync^kS{&c3@y=<ChxGKQ<aFVl&v
zPf2+{QRjAwfIRP-rGE=tiXWW$x;tvxq{gW(Pc_&&;%>ByHoJTkx+xy1qvh#gGwJc`
z{Ee&UcwYRs?uGi!M>o%_ubeTrK3qrr@`?C%#;HeEbKO37SEFds^1gRdRqO0m?f7<j
z(=@TGrBl9No0wKQp}<Q~ZGWEq^1`c&cKKB1%W7yvC%;)2_Gg-|PY=gcomrRate9#-
zHmr!*lxiQp!1K_YXAfRfiUwFt)?3x)5Fp_7<3&bv<T384M^f0^g=KbrTf_=pZ_8dL
zD$-%SSYZC?YPHH&FXDWUH%eHi^9a3N^^(Kjm4{7!di0`KHq%^A%KVND3O#ot@5+`n
zze=>9p71QSFF(}udSxE($yKdl-O<*0%WPRM9(0)8a(3OMS#M5+dNaNXnYuEie1n?f
z(xe4Gi``2k)@Vid-+I5tJh(CYT=A8!mnWu${(L@V?hFyGb+6ev#ipA2Zr9Wj7ZzP4
zl)q<MgM+LubE(9ef@g~kxmv3%U9t0ybmN16$>(3~T#$XfWqHkyFRwica-J@~-*3Ft
zeXGp-Ia5<Mn;yUCR#R&_)lgl1x6_r>#pd^pt(h6KW{&ko{^`MQX86=iSsBAu|9EX$
z)kMyf;mmb5fBwIjTV5e@^?Jkon>WwqZSquLR5IPdz^M7@B9n(cQxJ<tpktSYMub2}
zNXU^kC41?k8omw_)Gk%O5a8@+^zhDDIE$fCWp>W|_x*F<ESsF*p!B_Vo{fK5*{QiR
ztIy9Xez)^EL&2vrqATK!F8@4Vs4TlB>Vsb7G%v@0mGe$cs+n0VJ}Jif^&ZXWeM0|!
zy<uC~AusN>;6c3dicNhVW%q2#@wQ1!>vcUn;aB8t*H4n%>vJ|e6kqeWd0o}k%Nl>I
z?jOmDOsmn)UnRYH-v0VUz5oAaDe`rim2TZqAK?CUl2n0(MWxN$9cOY^E_=M!uj<dH
zt%g6_nooQBZMi3PPR?kvg5l-qs;T#+A54C_>+Wo;8MgLwH(&j_XTnRlQ&$WPcJe*g
zU|hFs=Inz}c5kx|@NbwQ`h{)!r1YIhsXx3{zmQ>a+~?8rB2{5cPMNB+VwStr<-YWk
zw^t2T_0Kz&TBN1inZBlG-$~J`n=4n&<GZ?b%Gc=(t+(ooLSMZ$I>~JNmVb%y!v&Mm
zcS%%g%zjsxk}eh8XUVy0NotLH|FS|ArC!aA=a{xDU$_1`P4eRP*_R~SXT9l(O?WnI
z`<WSa_okG*f0^>_&6b+f^PfD_d*!b@(`0{r_3FnUZ`+&G4fwZO-q>*PjQ(_y8s?O?
ztycn#J-Yk(-gK6&GQDzP#~Sq&%d3sq@Bf=}PX7M~S^jrVeea%mze9D7eb%$Rv1L12
zWAZQGIpAPAar)6^3$E`>$+>S~oS(CG_j<z>yBo_yQwlVeC_mKQ{p;eZR<&u%J0_}W
zJX<37`1wnBLmT^ULH!>efA?p1I3=Fky;|a0uE|p)>&@p}FD!a49m)6o)pyC=_ue-O
za_>&(Nj_6Ey>Z#=ACeQ}cMF}FasK_&<~-$y)hTDE-AjJpA$3iw{cDw$>24dp%b&K-
zx?NuIF=fs1lOc<l7_Rr_B_5i*ezWfFpHI`y8$RDG{&T;MXUe1-KmMj1^NF4M_|5q`
zo9W^5**z!z7v(D+a(o{w^k_|uROacV7kf9}J=a&8woL9@3+EjB3FowD$NXM;<o1t8
z4X^+2uzk5^(@CGYTeDxesau;bH?+8OB)?EEeyYEu?Bib*f-<X;6TJ1OFJeeIJ*&Xs
z)8tU*fQJDtQq6~hBc<P4wby&jj0!Cc(O>O2eeGYxuU|vVEiCJ<oa&TbRUWDSxguNk
z<MK5%o9?}dv#<CzVdh6}-<q9w4za4Zm6|+gKXmQtN}m5GkF2=*Y9F6#2~Xn#F?-2;
zej}l<Fg4A4r*3=*VLR^^-mB56mdn$!@^`JpmG)z)DVKzDd-(lR3o2Gc+f7;d>ZtnO
z7(b0<&)zJCUFQPV@8er9&G)Zo^W=x(>n}z4XU{!!y*udUv<HE1kG}2TacF3{Y_?3M
zZ0`S07fZG?MqDU+(l+T`!kaTb*S|DPdiCx4q2Qly7hS&l&WP>P6x+@GnqRV3o(|?S
z%UhD}<86CS<IU|ilbRQYtvDa5etBs-|KH{->4A=>42-#wy1l2p=JakZ)Qg-N&o*;;
z)L-ZS>l~NmMb6wJ&9^RvD|U6r@dG<{&8m4?6=160cIEma*Tm<qLfX!oef-Bay?Cup
z$GnA4V}G+=|GFraRnmLW)Pu&pdn}Bn`o8)sQq0oypXuZ6`qbHvQj+?s5;#&{7=-Uw
znl$_9tLDg9|F*1HOCiH{wH+x7Wxg%TV%Q~<*uO{g9D}Eu@GqIw%U4Lc1^4gdaai^F
z=qrz;q@*7|_r@`-nP=bI`S`f;%hw(`JI{WvNWD9)DoI!WTzL4mtXo0r7wOl_|L8RP
z8<#x$%B+>IW1sim+c|}CQqSY>kv6vrbCzvw?mQ!PtI;qx{Nl%_FaO91JNx*UbgT2S
zCEWXQuxFOAs#U826YF;_^~Je!f**7LIeoHa#clpKn<q__Ul7zW?abtwwX3rj*|n4A
z*nI1lcj>@Q597`L){L$9x6Qiy{K~Y8K~JYlO?;>}?bzy5L1yVS%b&&R_3uA>@K(fi
zJMZh!cdl<&iISAbODd}gV?LX=>GNCvOrKk+Pd6XUoD;N_?Y{!=(p;{>&MxbJcjrBO
z;?(8jGpXr`y6~E)DQ2O;-g{HhefJ3OoxUPiS>TELZN923$pdwVkNx=iif7i@gYSHf
z1+dz`xZ>!aD*K?o;Bz>)_?7(n+q2HS%U)OeHFa;r*0#3$W=lVR3Omp9=jP$GHhsV6
zuh*t6liQSTxQXFe?uH45o+3g1(_QVZa!+DV*Dl}5;*of3qI~#Sxu277R{T~z_t;?b
z%3}d}o6?^<de18g_%u!O!VT3adh%w%>#o`R`WAe6mepOQ*Qqy`S7K7kvGkru-p3m)
zBMj~zX;~{~7cu4CwAueu|E<&s?&$J(y!7bHp6G<6%sJ_W$r-`6amPPTd(AfA=i%m8
z-Cg!i1Mbgs`RQ<d^S}LCuN-Gy^VPeRb&%;t$3iuYZz`Lfy;VJtnE$CDboz&^OT`<0
z-+uXd+Y^q;X-%^X-u<1oE>Z9&YqfsmlkOtjKH;v(jf>|kSY%b=m;Z0()-K`P&FYmq
zR%wgvJ2-j0;ihHY^IG=Ww9hykoEd4iTyRF!Po3#PLKgL(47B&npRu&%$PLT)f&TS<
zlFrK*3~G*5+zKn28_&1qY;<&505j*D+m(5rq;w9vG<{gJ^?sMy7u9>R_oAkrHsUXn
zKiiYY&$J=`l)$yQhTZEFdmIk#ZJeB&ekDGk*{40><X+ovZNDDJ1$m2`>Un><d}+xP
zmrI<|^$$56R|!4y{SmPE_xeq5UdC-bT`{HWPJy~oK}y&<o9ox7f00-|v(q}`X}{0A
zM^kvN@7_K|(?whS>ajH|HggNb{`zIGrEN9i>&0tb<_bxsur&Rt)4LT~+B<!7{mo4p
z2HNEjKRz#->)fuB<1sN~4Oin9?Qb%Po!!Q~ccNb1+R3ms{mp;5w}CZzYSx>jB6p-c
z{1zLy{de|D!3T!RW=+33K`dW6NcM4S?(w<m%VpMt#I8Q2(^`9zYn`?J%?`gXdx_Lr
zKX$&;Zr*abY}=B`bG3)39bUJ~His|lSk=70x7MclrP%1rZL_MYynp^H3w!uku1xbc
zZvuJ`R9!VqnV-LR;;J3JAJ*@BT^aDaYWwZSIdep+7A=@jY+S{CS(JTo{7X+amfZ`Z
zs_m+-h<?bu|I5(2$gm^J_S24Pvx1Xu{%RMEt;2<uW;%aY6SX;fWm%tna+<x50`F}T
zMg^@`5+^IS=znV8Sdw>t-n!a(&-SkLW_V#xVDkCe`^199>>j<hj~gB3oVBf_=0)S0
zW3QOcN_^b$+Ab&Cyy#Mj(yQyD3|rpcon;jHZCd8N=at`=`&K`8dA_4vAe-rZaoL~5
z$14RSZH0JWWh>rm6Ir0TCZw(2L21pzX-{vz-IBS=x&P3z<}wxgnL)39_3jWhTbFjp
zDyQs1CZps!g{PSeOfQT?q?&z<?eB1A=&5T5aCwv$S#IysP7hs^wBEAf#Qm)oYq+02
zyI`vR)+^KD+J#+v&Ckkh`uv&A__AK-KLvp&txwbim-PKie!giPe_)VI&E8`V&5oUV
z{mIV%R3^K)Oh6#p{2(LI%`E>tBaK3y9ew3&W>tQbSx9%*iyx_zdv|>;`LS@r=LNpG
z?aMB_-SI)+=6coCPv4Kev(8=~SgWfLxO)>@(v&m5uFgF;@p8po!Pg7IWF;r8hzeuY
zJQu_7>$}1DoBDLc`zu$@o5x#fd99jR#v(J_L03{&Ai(jswDVRO%ap`ZVH@?L1MY0x
zkbUt#!;QVUrgrVBYZ|7n7iYg-Icfi%?Irg8dv032HqZ;4-^l!0!1TYq(*8*2H<x=?
zF5G{r?R$7~#lPw-X<I*q-H$%i-QNE8*_M}{uOw9+)HH60TI;cgADH3Jz4uq`$!XVT
zCHb2+ADk|v`sd^v?`MzV4S(L5f5usMUVG~OhGOgX<o+THt@jFfxgVN))~rmv<tcsp
z`|*UTW$RujedkD0`(Wqpx#{!rUCYibZ|XBx5IjTAyQs<N>Xs>Pi()%?%-`gAFus{r
za`<^))X%!<&-1U@ef|G7EG+I+{oK3q={67J=32iDZBz}szcjb#=ajt(k&n0*^8DSR
zGnd)i=Itf+EXiM6^dC!|SmeyXQu$EsvykZNXa6rx++TjtIJs3vbcKi`AG6*)zn)ib
zKdxW()Nw&_uI>z(HC{Wa-|mn;qHg*ApKGyTYhfqf_u|d>lU8jEGS!Xe`}%Zdo7J0-
zVRmteYSr_mU-33md$%Fs<P(M!*DoBn!^rl}zsJn6P;6?wyHR$pr0>~TG576ec1~6^
z%)F$c);HfHhu1%6a_T`DzGo|rt6%=IF+E24?7lg?7tJ>wcQ=YHG}!2VSf@IyZpG5q
z^3TimMQ-r^dUuI{>`uW3#aO8)E2Xxsi*0rI@6dE=y1ItJYQ_iwFBaCcpL3?YzxQuS
zXmW(;WKqFl)gFg~wnuJd%-(IiC3n^0;tf5v7f-1Ru5u4kH(s=P-l+p056?3&So`a6
ztS!I)HW7u(uf?)+AN5E7bd3GJSbuTytpNFb(rHW92~JvnW5%}qFOIsp+PALk_VwxA
zk`n0SbeQMe5>}>0h2^Q!6GK8d&&4KB_+t`cULoVMPIB9xjduzPst-DHNhjMBtuBwY
zUC3rpeP){K<@AZ$ANOVH3zceRf7<)JnDboqBJDZ;XLieem3r}W#pK3IR@ZkOy7+qS
z9IwD4i6{H&gs%79mN3-sW$v6Qw<hqkxBrXJa}Ek{yes{8Ys;hv&6fQA@rTxH^9c(J
zKmPao>-G4VmYZ6$_PA~=_#1E8_kHcvu8XgB?X!FxzOm}9mdV2Pf?-!D<nG^>Gv}uk
z!^Hc0kNoGY?cV-t%cH$pUw)A=p1aW3`}y4yIi*aWkL;W~qj=@cnuTqV8M6WwuerFl
zul)DF88x!seP>-c7GENm$inqb_EG=syPT{}(yugoovtuSZ`@e6ZpA52k5JtkGC5Y$
zJ|ExjmCP(#TU568W9{CTvF`hp3Z;f_E}CbXrKoi6T)euDPDJjbsZ*!f9Nt`1vFLN)
zIivU5KQm{3*fNpN%0eLb@zVm~*^gxNRCSay8yyr**i9%ne~YE%#fzD?$Nbfee`d<F
zu^xWOqkBN%bAf|j^71XKPDcdg+bh4H*qmi+)9&J-D_bSwBJ+)>O!aQ$m$xVEl2@F!
zXY`(DoHlct`_U@9khSxQ-x)3GIhM;3=d1hR@J{AqSsrekyzQ4|x6S`tB_#WL<*9`?
z4<?nyE=)QfwnFN~jeE2ImtI@A#CqnhY^(fr=DVkhX8bjYy!4!#<tSUR4)^{WP22uD
z%wl;e*L`6}y8ZY6(dzn7bNW6?IZO{+chlkZCH;#0TXys0ueKhDIk)uct~;v9`(~fy
zXWo6`>C=KRcb88$CtUk~Vq(zF!WqB+`|g)N|MBnE?vLvP*K?{VEU;QDS1MY+A^-M?
z&L?j^natdLJH~JKf=%mo3uIqx5!Bd`Ua-m7ZnB8iw#^E0jC0<c+G6{1j{44~N$(zP
zuwD{$xZ&Tul8~uuj2N?c?IRNp`&~b<J$25p1q<WUnhiCMbO-j9J}B6bFuk{O_nMmh
zr;kmGZkaeuYwgKJXF|27>Rygn7TL`%SMWAk-(}fM$AfJypQGfCW<9LhY`uj?P*6+Y
zN!O%z3H43O*%Icf*Gfb<ALF>fam^@mg89<I4NA8TPD?iXek13uQMT8FS-d4vLZ6=d
zb$8w0GkM&OCzB4O#NCg+Wxagq=C?hEZPPA4di&<gCLLGJ5^IM=OZ4QOy_OfMUzqeK
zWnT7@)0rGk6{`>ZDVpBQSy)!?mRcJrTg$vjYL#r(j&sv$jvf28rAe|n?%sc!%Ns3c
zue#vtJFocPgsXq8C*1xkVU)7MuSorCR*d_(!2cK0SZg<2vvr;=kpKMf+a%is3o43}
zuEt!`@{6lB%UAY^UbxfRu_r11+@FqO%Wo?$EE4bc()+1+^6rZnnu<NuJ@exH+YioH
zYTxI1zbw%t`+8s5+gX98C(QZRhb;T}bn1S|v=!erRGSsu54SH==}w&Pbfn3W;meN?
znprQtn$*`!S>@+um|t2lt@v)wy>mK88&%4aJ*H=^%jLXoY%6dvzeP~vNBB*d)Z*%&
z{W2d~YSvk2gxl`m+Wr5;t`Ft^^j06=y?V0bh4a&A9`5mZ_gqV^{^y37@snciuJK#u
zVtjDsHQ}usq3-*2jO%vj7aRXQE0gwQwH;g3YGXdp)0IYNSf_k%Dfhf5r6pn8<-S5*
zLzBzg_}kj0&lktp^}KxGyIJA-R_7?G7xDL&YW!q78Dg^IouS@6O{->y3GCh5Qn(gQ
zGcCz@e|F2meS2<Sb%=~iyr0Hh&*07%bz`kmkDchAA1kJ=zT(p>AiMZ*OxMwm9^JQB
zWD0BTe(KztCFk$cY$UfcP<eXfrqaBfOA8dN(vzAFZCe~<I<tpU|7Q5(X@>6SD!dX*
z-oH8=?*Gf+d1ik2PtMZn`HgPJOecMwW}EuN=E;J|`sR^$BD<@<d|MXX;61JP#be)6
z*@R8P>#k*g<(;&0ji39klvDF`er9T{-R3T>arxHn5AVa(z4I#V7VVt2t4?TYQ;gr<
zE~6!}8;xErIK0;GNtyk#p7p6S1)XbuR85!|m>8F5tFLlq_Rhn-SFZ2;CVBa4ch7>Y
zdarhGkJ)|KAhGem{l||QQ@_mJ5%f3ZWV*lZ(=59;|F%?|Vc9nSyxH0O6$gda?@W8_
zH2?KnRYf1R<==A3em3>hJ9h6XT{-*ZdjI)~K7Z<1#jjY+=ewMG?cD#XwQs)N-QBr&
zUVMflL-R7f;#95Dyz~PvB#)@wJ~+|m$oKc3f7WsDl6x$9TU(?dsF&TqnYZ}vr<)9i
zw$DAlc*kaQ^G9Yo<_CX&<_9|KGphOhn;2+(;mCbXwTqSiR8?F9yS6xedvS5`&tI?C
zXZ{W2X>0I$#3`;EC-=ZxS>O6``fTIYM6W%DS57p)G;`D2^rYh}->PL#=YC`JN}K0c
zY|+`Z#X$F%V}Pmb{IW~W3%;n#-}vE_n)Iak(=7e(E2%c$Jojg^!G^kRn{UYI+<d<v
z{`8xZ$3^R6Gn_8pnSFd!m6TYy!rXp-?HZdC?VD208@x)1Z<+ZaB9TQ*yv#Sx<7<(f
zjAhZ2b+N@KCMcenYd!s&@S}@TUU#OSziaMs`o}xAQ)%b+WEO8K^|t!F`O4=h6^okZ
z^(jV$Cq6#&JI~zs{}$m16(2q2XH7k`>VeA9$c|6fF79knJO1rr^k%7QeZCJ%XXd28
z@lRX#?4NOXL)n1^_w2TCpL!{7yUDm$^SB6GsriL<P3{>HLb*EDtmj)^dU%%=e3>$r
zd1IN<WD&X7b3(t(u}xi^R2n<4NI+r5o@wE?Uay?td(*pT`=_&2+Tsll0`%3koz#0j
zXU)x=rOxWk+isevb3a?2s-+mn(0af>?Tm8Ot7(^>OjvoGKXS&F`|D2q-ShO9wISE!
z>XfYR!za(lihbezViJ6QcG=Nm>p6v(U+v!1n`U`jRQmq9EqA`VJ^0qY>hhLEjooE$
zcV%CHbAP{mx48bZW4+QZN~}IA2XmZR9)5Yp^V^ZzUe2F;?%$i6_GdTlxw=F@d+MBB
z$JF<`U0S^?UF>{aAveG6C#m^s>>Spw+WEqBW#AQF=}Gy!x5%mq86KN`?e4u3Uw`+>
z|2cHs|H<67mnRjp-2DH$Ei+qbb58xYzcYK@h)>JicG>C6t@%HGt$p8dYKmxFo>_yu
zde5Rv)%YO)HK&3mUwGT_%5dV{`CkRI8}lbmRCYHie;2bZcDGwX!U6*(*6j=Lux702
zo_u1-n%k#YGFMD__oRQyo_=1=O94*YH$?l?HAJqg|LhW;FULF0viiiCrU-RWCY7F5
zeUFo;tc>+_|LPF2_rj4+Qx4q^UaG(=$~$4k3l%-v+qFMEZ62sEWIq>M^lYI6SBs!V
z&w-fZVGV5x`5SJCCJBA|Ejf{cbCSkF-4pv<)wjQMu|D;~ljXN(#3j+BNg^uwo$R~<
zlb48ieG_>i#Za$qwKL(jc-%TaB^8HVx-;i}T=RO`bcNH!Wx1Dci}I;y?A$+HMBPx+
zX&a|<z=EoP9VP#y_(Ql(l`i%AGO52Z?U8=Z%&mR<w({QDlc{7I%e&FvQ=#g*k=^{-
z$*#ZCRzFqFlke7?Ia|$m!O;jUf#iMO={hR$`#W#Gi`pz9q2aT;`YMOW#2rli!I~#M
zQ}k{N3(rxx_vprs+b5#JH#g36zv{C@IE24ACMb+iV{Z72%ePN&{<dVo9RGqMyZ-k=
z!6HJzA|D^IHr<wt_VhcaE}G)sx}(L~{M@HW(dCO|Z>F|QS|>0$uHt6!jO;sdLW}2p
zS=jgR+EW>ZEt`%kO-{2rcj8#i@8##c6xELJdUt2r-Jkyy1w`UgPK9%w`Z{~|#qNr<
zNBURlD*2peu`)lHulhF4_f=@&ho5hDW^_ATzApWj(@4x@{c^eg2NWi<Zjn&9#=`Z_
z^i@dF&PiKRxA3f4c5>kn6aK3obMtLgZbeL0n>QUa=g21C9P(g)P@R*k07n<MIA1>d
zgC9MnZ|v)hZ*LdVn|tTnj&1tM5<i)?ew|d2&=ezR+}Kp#yZE`RO_(Ed#)}Il+xhrf
zQ#LbgYT18Kb&HPqo0{EGDLuj*EKN<1SG8&haQvGcvgSV1jpn5Ia~IZ~zIY|aY5S~D
zg||}KN=ghI)!&S-@n-AEO>>ZFY^v^`d{Jzf)toQ+Ph2ybpBVk?5n3>z=J3IN_G@&y
zN;}w9A@i&Yf)*WA*rum>e$M;)M8_lVE&Sa!K1&htFnwY8L09hm^<KA7?@o4A0k;JY
z?mczr>18%FyWp8Mlg-0RGe-!zMX>32JNv7Wq&wT^N<80?V04>Nmu+#}6l*aK<$!=Q
z8#Mp>iP=EreVZP2tvQ}?rTe*phDnm@4(MF4gMz>v_l}tdl6o#iD?Ko~_n=<+PSB&y
zu(&I{R}!{adG30aaJJ4mXhK{2>lYgH)6!4XZJZaNAn?iegn6Sw!ETEUGtS)LUK=ZI
z@T$8yVOi&k+-Qr6XHxt2HBMO>Gke+Dtb<>@Wvm5`9u?|+d38<G3#CIGB7da44r%*K
zt_*!qweD1|c-;Q<3nyzVF3dh<)v6*KVs`3B%d3|kH#jXi8Jd5p=prYFswdpO>Lv4j
z^lsQEFY;^0v*fym#}!MvH@>ZIj6DDS$!ccaibdI6-q+<0Pbz9wIoiJI-?|HzpXL8o
z>TyV9;rgeW!{@+$V~&h{uDaey&rbG|EvsIAvb&jo<K~Cy%yau)JW^$^GRLNhcm2M!
zJF%80<l4cHb6@&cEi)*aFp+JOuxjw$U%Qvdy;RZ464hJM{ZB)H<EPq_*$yA#m)>9T
zJ>&iovwyopuGQLhvHs3oka>vxviqk6`;Xjz-v9aa8vPq(ml`jwwz$8tId-!A^VNUD
z)c#n_J)Ui~a{j%xa&v2q%R9uc@~?d?!gXx-H5qQT-Ete>1WGhE#h5HTx%u)>@4qj^
z7ZqxIUQ1y=ofI-PWB*h?**%qNODgo4r~XNinmUC+{oA5bUtwzj+|TdavPjeW(u>I8
z!#^+k{jz=Qd~^48DL4CEl@I4l?sb=S?SngBKIM)7ZOJ<;w){GINHbn?`stG%ch!1V
ztew6hc)#FVxy+L-ZMHij=H#1wRq?L#bDp&OV?rvk!o})OYFTr4IK6zO`ol7ZdHIyx
zPrEj4s4DT+<G+1~`>S_XU&N7)BL+t$@|jhRN8RNTno!E?yRG1h?(f`&7JtKi&%H!b
z_Jp#kKK?hQL|uFF+dmT*6<n?Pe(Tb<wT*0S2AwN(8XZoUE8b^m`mYeCk{d5qDBa-j
zO|)sIeb(v^H;Uc<`D_)xZSA+WVoGk8u&d|iJG1R3SbcphkfP%`wQSe!Ytw%8-v3zE
zGLvDgMopo*fN|qdvDTflFRz>$^&vh(_2|J%%wp@ZRGpYM)rn7k5|yn~>1~tIz!Uqp
zXp;I#r@L832g}r7KN6K{JrQE+wPdNLjNR)KTlY9G{bclCe1&7X)1gv!Ykf`2Zz~0L
zf{(jDdQfOB{waG;l)J$Rjh*IGIVP;&NYKyNG0Ek#0Iwq(=j>QT)0;E1XB(}%zGqV#
zmm9BH`=R@sd`iMLzqe)NB^2mR56}`_u&`%7Q(CJ_;4>#sTA!*oH|u1?rwO%A`#4#?
z+wI%BYjZ`Mgk!74vTv6^|9H2l@5z%VKG9!)tj>P9VaATU!WCRQdM@67@hvmfzJAi`
zA_LRYuRSb^yR<dSc<SmxIM%fonp>Zkc;;uYxQEg0i&9K0Vr-w+ot%EIKrz(&>8z=r
z_#c);7|c94C8;pqYiH)67U3<&nACS|REd9fr&Z-bmg95Zo1dnBIIlb9a^j<)Kd$SR
z&A6DwJ4GkD@l)eGvA8?7)AN#_F%^7!|2je|`#^$={u{?c7On?>GTEPVuZ&o0o00y7
zL7BHFa{K$+bNqf}sxL@%h|fQ@s`*XDl$l2ubiZn4Yp!JW;fqoTHoW}tZ^5S0QmN1S
zD_3>QKl3B*UdVa1++XUqe%xO#_UOv_x`S5t-oBc|W1AHzx$V-d^wZ7Rm4+La>UAHw
zym0kIzO3K%u^CCBb|)2Hfu|$)zDoUWaOGLTVK(FaDM8b2nV1`F*e9l&uu=0xsov2w
z`EzEPZ@b0GclEPV?fDCu!N(6gv1&Mc?#Ik$e|jSu9SY)4Y#0Ci<HDLHEE-edlO9c1
zX{izB=#s6wvhscS3gs`32V?%;KfEPk^2zGOnTM`yHO`1W$)IAGU%;O>ucR_mO((+f
zX7}%#o8EX&bG><?w7ye(%1xi>*E4@jf0B3R)*nsZW}D>CU*5HTk@=W;tvmbHr>m#@
zUxaLBVffZ<o$>L62g9qL$ODrU`Jdi=cjfVZfp1Sz&a*u)Yb)a`-d_K5-_0xMu6Ow)
zJ^nsf(zrCVFMr<O3cdAq*JkiU8`OPwij(~)xlzL;H>&&ZZ+`}Z7oSu2e9X{TsV?^M
zM2G&}<Nv>WTKtFQ#Ma}R%QOG)-Y)mizK?@_@B4rsS&f~|C%b-sJ}0Uc>d2&e_uv<e
z=?Pvov(AZmF3SuJb!0L)6=I?_Xa9=_tIl5Twfgd5;a0v2nA4W|`ykVnVczUNs&~!b
zw&d8;ujiV72#49lB|U0)F7w}geRf5DXQzJnl8vVFtSbwZZIh#3eRNB_|ICD~eYsJV
zt5ta6*6rT>^1oK+?OTu)$Fp+1YxkjZS6A}P&wcb)R`2l^@Kl}RzbRAwugyLw@IIY?
zrsVz$N2Z_Fb#fHqXsKaV@!FOCRb|zbv;UWeoiJ~Fu)k&AyUAt7PdlwvajbCs@$Kd+
zMvfQSMsJ$0$GDxp!n<XYyK(h3ah>4Q$+>4smaXMazOti`C*zfnUstm1`O<l3SWPXI
z>K;%3BJ)wx_Wr+#doLQw+}YTXXj=N<&bb%!ns3(MRb@VFuD3I~^wasvKc}7l$Xh#S
zZ*_h8<1-ucnvH5M%hlcAJ$>%&{R`8Ud9=<^G0M(hKjstbdVKow&(%IWhK3h)=9x$|
zR6b@m&hneG+V|t@mu8QGlT3R#`+rQnxp}(q&O51R{+YbX{ibr`Bk#+@_k1^Js_#sB
zJ|pt=kH6pd=FGn8EYWa?L*!4eYv#N6VcVN7t*N|b683i0$%e_i6Z!aLwix_bUb5XO
z!GD{v<((td*_R}(Z@cuIygaq4<s`=RpXG8cy<Ew)-ZPmV`QPZ+`S3_mnb6nWruD3c
zmkCT?A~5}pXF!3e=fj#wPCiceN)M{!&t&Y472i{H<4qV_u&~UYowpXX?e;EtnX|wo
z;loFcowHXx60JYw*tIMEr$g^*-OeuSkbAI6L8V>O92A~NFW6vt%dN{-QS;)lP08Cm
z)+m8$*ZIaQ);yJtqMjSi#8xVt-fmcZT{-jjCzsV-HWz*rrs+q%Gplg=?VIxa;mRHf
zgNDM*<)1DtUT0Z%^q+BeS!}~>LmxS7e#<Yu_iC3}{xf>veYIG6n#RUA782{F%4C0L
zB=FDLRzK&Yvh|aEhm&r9{`OAc`f#R(ZQ6(9FW&nnEU^FSnm8{`_+0q!wHZ$FkM2HP
zd-e4Rn;W$z#%;D72li<?N0cAyv3=^+cAdB0wCw8kDp|RyCJT-fy<5eZV*9W0-#gQL
zRr?uw-0GSBwsJ;H=_t7Hd&-o{Ws7{Pq$C{U!`$QTwND4fc)31S;Qi*J9UbbwW69Sw
zyxZ;-q<CFfV30bwRlfT5MJC47ttNU8g_1%K?cvOS5*BK7v><y$EL&L|Q;y%hm^oo9
z%A~m}=TBXgt+{erX}r(1ude63%@$4mYVwJ_$0+wtLUL2{>NH)uG;8Y+)s=Cjym5<8
zbFet=6Ls1rYmxErfMfTBe5QkcS0%7-PYNo}KepC09W=(YBtH3-q^iO3O-a83o!omD
zdbPdTaQiNs|Iu5e93}IQ_8-6Zr>!!(UAX(uzr)XWrgbFFe$?_f-k{(`nNd;NyZig^
z*Iv<YKlia}`Kc2{l2!Y^O77*p`0mj1Erx<$Uvn|s`kv=IbIml>v%hWljbqtfE!$gY
zb4hgy<6GyQF&lSRmwmkTl3PXNr{ye{wxmQBt}cPrr|fG?G9I&fY{^`g{8LTZ^i{@3
zW<J@I&O9l_teTu<emB2dxzxC=WzD*+fgRR~kF(U~9!X=Ld#&`3Xw2)BcWhs^p4#8s
z`+m(_x360pT;86Nns)q#<nPI^?^&B(y}WrLpZ&#17AD2}paPzy)ye<1;^Ty~><{jo
zTcoSL;=1;f*@o4Tt1W&mD&BjCe{<SY|DcssU-Dlab^on9$42J#%7$bH6X_pf&QW1U
zmRW6K<CE#)o4q=0)jE-HbClkmORzgHAGoci=OMdfWbpAVlg+lh??3p*r~X*&Zk~qA
z``4?0&MOIRnW*05a4=T&-;Um3-90PTG5=p>b!bDNxeGTRdvp@Fox)w$xo;Du$Ig9q
zCs6ydaMwX&k5iy%&#c_Kw(*jcN7wdGA05@#e>3Aqg*TW35^AzbpRHXs|I^Pp?$!1Y
z2T$+4r<Qk`2Rv@SbwTlesXaBZ#%Yr-a_j4dS7a2O6p*Xi@zb_&{?FqjKZCa|;!)4l
zP}Am9`1+@AS)IWg*5A2bs_XfxttEaed24(~V$bOg)5h<nT>D*b--%rq9?s0`<)t!z
zWBSUop#eIcepmnN2HNuTZxvCvy!zPwi*Hq4g<N~`Tj!jr#megr=iTS>Ou44%;=07J
zYVXuw&8`pAK&g`Rr<wo<SEu~lxT>S3TyO0+mz?Cf_GgaU_l0kbT5H7SsQEg744eLK
z-i<%vCoA8S|9cU*?*37!`H~CQ-2Q(2$?TeCo|A9gSR^iZtyF`1(W(zuUwPbnf1Ra%
zai7V>NYI$}ZW+sr$6debcP8l8X}tNn*x={_6|dFj)}%~UXITZETi6bsTX_CXw6e47
ztH;4rSBqC!DW;m_slLow<#N0#>zdT&=A*B28k5f3yWP|4(&T1w+6Q)b(ZZ0&=WNt>
zJ~_lMd_Q^Zm5$@`>%uyWRf@}Id*@x{`6S|4D))a@{<XQMHhw?*!nLIKQ>@;s_d!PL
z&Y@AZueAOp+}}1)_@vjoxBObKPw#X6&+Kr!PGRlpU%SdKW_>*QTqtf)#2(P#$FZ0f
zqI>=7?54b2kYXJ@iL0(;ubb7WRnH^$9zOI?x@&hf?~C`o?xAs0IVV)+H}GFgZRNF&
zJ>>9d+RuW9i60h%20pp#>-*+#&9=U}`&X>foV#;(`p1X4-)~s+yL`pNpgEJy--wvc
z*4C5uLdEGyz4Y~o#aHe-z3yCE_ki)b!76bjVYQ@HpzKm08pAK2mGUc??ce*dcmAMi
zR?c<y`{Q$_Jia+|eKpUl<MW?5)~`GezPl_zc(Vxmq;Hb*vf^jH(zV{bKl)Fa{-N!2
z!(5wMFYgw=^l7Gl#=l?e>r_mpai5X;uI1dm@6e^@aPG2ltMxVSbbGZWOSSIoewq32
zheqfV@KCqe;*#B~{}!3Im2e)L@@&%LbKT)P*1h}uJ>K1NrADN$@a{+RWNpgcNikeM
zKj+CL&x&O&f*L0p*UxXO<F9mE<IgJV&+5H>&B~&b{a0hFpS|(4{@KYnpQ}o3%cY>}
z{);M^_2WV>PnN%*oVc9#+51gZdrn!+Wc*t*)AG!TCw1!{otn-+`>yL`?a=E%H#HVs
z_iVX#^=Y)af%D3j5?Z_e=ik#<U9u{~cUjM`t#4)rmR~qIeYQ@-mE<bO05hl5#pAPI
zSsh4GubJ#!Xq<jREHdp#fc!qWl$4Me)1&#?S}KjpcTBX?xD&FS^~$Bsf%@{tN<J)$
zlr&rd3U(E%E7ulX_x$lA`(?~-g(62#xX(NO@WK22N)e(9)Bcs_rN2DItP=gn+{erG
z^|2GP^eR7{T0C#j%bgpaZvJ=X<~Q-{zG8ZA>yvIIgkSvfU3{N|tK=3@xw7f83_spI
zooBM*`s{n9(wB>mEYEO?PkOX@-J%aG3wQs>Ive)wegA{WmDZnmuezA3B!6?;^*;CX
zy~~mNe0%Lr{hO?(Iiu=l{`a3=rg8^*ChA`K#;;Iwn^We%Sz~Jn`v=D1zn|}S<=rc`
zIBM0g${MTKwtYui!sV79HhwR#n(dCi)PJth*Zq$=LYlI6e2cY7GrE8NU(+?)Umu>A
z&VJE;h(jdh*~NADd}?pCA6se?8TVc}w%12GWk+k{(<ez?Ocpmwe_WHBrd7(*Q+w;g
z2Lt^reCofIF3ni7Ox=HP%Hz3b)ACZRdc&q2c&L))Z<YW4b4}sjU6<DeER8LG60L5q
zqfNZHUHWCrLF43zb$yLH5BKu$Uo%*-onOB6(r(t(b^*taAFh=Qj&}Ci)B8Pa_RfSL
z2ldPA=j#`@OL1y1Ry4eK>IZ9DY43zX{ett9PQEaiaz*n^kH_=x*{_mPBbDBE&6U;k
zQ#x;PhbwyNCaFh!OFo~?+_)kV)Y*HuY4@}FPH(3EdD=7~pUKhiK+T-(a#<ej>h~42
zg4rIZ3b$Wa6v#YH&iCZ=2M$@LU!5*}Tl&^{X@2EOO?`XM*J-^KPiB@|gs$cf4uAi!
z(`u>F9*Zqixn{=4ZC_eCbgGKA%B}ZLJpE|8#_}#9=a;9h1n{oh=9~TOwTZXw-mYDx
z(uNh6yuNSyFugi(`#*tg@kWA{k691x>Mgln{e*x0r8Rl6Ve@=Sx4u03{>RVH;TcUw
z%Jsi5b-cT4%NxT<bA9Khef#AxZ$&)Uh09OBzI->UqwjC_&##Y~Th`w@v3*tIYa2<|
z;}`#~$kg$F-s!)C_tMJ$vyb1l&s`d=+Wey1wPlHh*F-knemUD&>-YbgbuIsB0>=uK
zc?E?I>Ywih<nLJ@0qW#!o?LOd=Jy)&I)_h^-1+&Ng`?b<b|3m!!zJ0ylBuy!_5R;e
zEEyahn-70H$5gT6pJnPFm8QQjOdsF7sEZesYIt7JeZOn}3>MYXW_x2FCN!m=2;sUE
zaFL<2)9=wG*YI7l-^{u*;nmX`i=rndVs}5Apy)iqx?JzA`jN$^Qv~!KPi(LGHaSB?
zC(*M=XmU|`Ug(MK|M_m)ukyRxw(!lZt*_U`Zm$3TPbY5A3~BSc1(#nQ@z(HqlU}(x
z(f`~YyM9$?xl?x!<Xr0C+BJFJe4WRO-#y(~c9tiKdFdz3EAI?ci_fN%N!F-(JzRf&
z=W<oc#_W|*avJg#<z8+Hia&q(o+-PK)yu*rRebk()gj*#pY~?Yj?GrlxNNxWHRm0Z
z=KPfkf4(l#etPW0MCF;*<$BlG#VQ+Yc%a$t5@f(QH>&y}`y1<$*P3N(#b@MjvFqQx
zRW2~SAbWjb(xk~pmh3KH->J#fskuFCx06rb@6)-`pt0Y$62(S`6We2+oH$tVblttx
zptj-OlP`I>BUW3?{N-^s`rYhlS?<&Ra&mpz|8(D`i}Mwd|NVaXw^`$Tz4`mF{`v%g
z?HQttj*hH1*p3}o5Z(5dJC8q+<z72W=;e=b(fTQ?Cx5;!vWormgq&nYM-j1&a^VV~
zku(7gmdYf%{nI(+&#36Gu;c$f-@IE*@7U`5=3BJl8MfZ`Uh#|xv_3#U_tlFJmpvCu
zUYW`gaAViiN$EEx8#)xJH+~8d7K#z*4}_U!X|eUM`n5Or`QNA~@h-?b^d|7_XP)C*
z)R#|Ku<qQhil0hqPDkHNf6%8abofsD8{4el%_nz?EK#1A_DucB!ho$t23y+?&Yj?}
zT21B0w$%~(3IZJ9reV{Yko?akzE@gwJWPM29Gq^#duFfBk)s_eZLZXHSU)^vX!`J!
zwRe9vyXKC?9WxL7p3rjHtgZd_iWlmQktdZa8&(CpH4?5?5I>k2exz9h+~8qhax~*P
zm^(xGv%1=3NglVJpD8ITFVu;!^V`kuZqo}XX*6JBE$`sgWDPD6y4>od0&_}}a<Yk-
z&Whmm;+gua<@1&J*-a&4zFO=3-|1r$+vrf>zC*IU;Y;@NyPuxrO)T82-Ci%O@2DVf
zM{wQzTb)-+zOFmHoyR%l{5;!7svImwTefU5`S$KEcf<7Q;S2k8KUp<?{P=N8?)5(7
z<^I82-@d-`;?u#}zeT5hc&2=RA904Ob7u=^F89kym5qNE*xsL2dDeOB6^;uFoFBfr
z&lmPA_`r2@2j}ThMye^Cv#*|%cYk~2&Z2w4j<*9VRIE%dEt<1^qw2yH^Ghzyzx_g2
za6<07JBzLwtZ5bxe0}NWq_XO_6ZV*vtkw+n;<pg-n^z_<b4sqnD;2G@|9`h@#|UiQ
z<zcgJ;$d#~g_A|Y)<if?*Nc63Ypeep3&-8x-tMe<DP+2<#^dU?^_}8DXHMkCIL8^c
zvrg77k5YSnz3)z4@}!_CyMxL@1HQy`#`&F@^zg?IiHPlGx@)7h?#jMySMow&ZS;1w
zgtxc0n&jR2VORBuPe;x8TTb*=HszgD?j0`ttHXZIBJ<9QD8cLZHWxg5Fh5|cU?_99
zS;YDmx8D@B?oO+Vi_)$CzjM~wlf06bR{nas{czFT$?E4OOvz(kD?c?WWO>atrAD#I
znzw&nN-}u$K1laSHS_ALrylp)pS!j;y7KSW>obi~y%zTAe=4}};6Z}Y_qU6qx97>+
z`DcIT<n+o7@5?iu{LWvv_lf%FkC*RR)-5>iw#-oR)=W>$36ghibstMjEwH#zeZeng
z-LaX|IPdaKxwf;1eYeg+)?KmN4J%iw9yjzaG5e4adR=?l4UsQao!@#z`QG|GpPhUz
zX3FHIt&htxE}!VW$GcZU^Kj(uW?j}5ia*}Zi#}X^{#St6_Vqb@)0W+hvyL(`HQ2Jy
z?DT=``x5i^Wu!im(hd)8&HpmlO)8~ZX=BVrgPNtizh`p)u77l+>pBMqPr}<-rY)_l
zcdx#_zACi)zOwSmOG~-k&1)K-K6B0b`a{N3xhO|e+^Y1$vyEIg?@d2^@VN2i%_nv@
z^*p!Mot%HQZ-IdRz84ejeqOzyYGLP7%}24~=I7r1>6`reSn#$OOS3ic{>^N>?_PbC
zHp@A&tMqlnlZoyxuC5lBHp^KMx3}v1@x%VMCp$LOy?de~*%P~C_Q9{-RW7cdShwlS
zK0M>Ut{Y$N<k-`z0!>tZ$Q+UHN%*Fozv0c2xf{95gH1Q2T%7ZhZ|l$ZvBobxJo>(~
z?)$kzcemf|>)x-Qa;12_;Ow`tUuu54+f^*HtU6FrW!hW4Z?11{_Mt_!n^&e!dDpF%
zz5TYNMS()}_Po>^8xm`NKAqlbcU(47SVBbPNT00ri|gy<UtiNbH0zf+@4T7)VehN!
zJxi=37;U)%1z-AfmnL0dH0xhzxT?SLSgMhxiXUrn9^2CGXO8dXox*ZSFRC)LSM<V%
zZwm5Rtgj^gtMtU?1om=FU-UU6RI*@Gm8aC{$qUk$_3v?hUX+-f?-q4w=Cf~M;(KIU
zm%6R!yMFt{$8&!ox>kr?pKDXT*KOAwUWW%YTTHu)!Y`C<$;f-q`fC+aM%V=2N!1L$
z!aqFvdyZ>;&L&1KdmX>hz2(hkcBt=N`C`-Q_t}rk>QB8~J3n}*m#A6p9gnlKO#e?U
z3t1J?X<Pm6!sc}UvbVn&i%*HZvi+&N>0HBud&i#@Xo`K4HdE8tHetyxb>CK*^W|Y`
z;%j6%3&Y&zZBE+cI6I83?Mk7i$n1)<N4Z=BEp|SWJ@<B|Uq*(8w0WM2g~gBfOz+iU
zYk$1me*eY2z1Fr>UlKk)J9}kC;N$)Og|-MePGV{KaCOzrl2xx>|2jL>_}SD+H!PBs
zl9%OvJJB#H&eiC3$>%3x$G+Fj&FxD6Q=Qp;Xu{6o+%}bKHKy9v^HtV|zYN;{BzNoM
zN9$JFERve$dwaXOX43525;0$IetxjK$G^HYX~nhP7ccTo7#(pxv*5P&edc4o)h>2k
ze)*(TJTBqvEK{4WSAtvG+m$_A-y4^osm(Q!iriVGdfxWC%ymDv0);)=*RJe;AFWvb
zZ&7x^?3@&?%Yi$JZXWxteaW)CJN|8)?3XDl1r{@Q9-DQM_eag<qo%TPLC06Hty5I}
znfX%q(XaoX61h)Lj;WsCvu)GcS;7-CF2$X1-hQ#~P|`N7H@7FBo8$aVseW1cyE{Vt
zrWYE|9_%{%@=T{ky5q6?*G%?VzSz-QmhmDop4*Jwd)gACKkCAV5BcVFeV=oB|Ej)q
zb``%md4q!I+}Uy7)N1)*zcZTx=iK{c$t!L4<ok&e^=BE*EJ!&yDd*mvnd<W@np$Rr
z%FOvHvF(e>!n0z^Yfk#<rATGBPSplQ=pI?gT>EJ+MD({>oHF0tdbqo-DDQj;=*G%>
zd#nGOG8;6Xn`^zf;$zaf`2FV=_uF~Z)cpDT*=Y$^h|YWd6DKy!IT`SAQOw?dIk$Gs
zyXd>O>&v7~-<f$L_RK3aJ}&n0?46xv_j~Rg2v^VD^I`wdoT&ZtuC5N>{ogE4(&Ity
z*2hNPalZt2-t=B`<G1?e{eP#48csY@yMKRm|JC>B)vhhtai9MLOQ=Tq{gb`n@wHQr
zc8gyIUDJAYcKb84#S`CU@JtR{o%-y|%sv1A{Z@&$I`jUrRianVUVGKrb?d}iQ-9~U
zs9AWPOrPfdX<Fv$HvOfsTRyu#lb_&S*0(yiEcDe&4c?Nhb-{a&`+ajwxBd7_B6Q(|
zRI6(jL{`aePyI7*o3Z|}gBzbL3J-X9WoNT<X~=cQPpsC_e%3bxUnh7Ui=T7(?M;0j
z>n7)|Q(O~Y&nmg5T6ypPp|3CB?0Z%sJ>Sl#$;V<%<{!)dntHM`{GXqjdvj;8I={__
z1GX2<_Qsvp-~Z>3_WC`CdN=>>nDvq4nMz>Vo{G~?ntGpG>rdv^zkkH0<m2JIeJ2mz
zto`x(!}>2TFSqMPZxav@DDdC1w*37)(88H+as5kcqs{f=_tnVnopHxZYUTd|<5TQx
zLNB7XtlFHvw8~_4aN`1o{Yv{LT3<~Q7Tml{;pE*HB4OR1rfLOl%?kY<|I^`c?pEcm
zN8-`~d*#*ld+^q;{e9!u-($v)e~Rg?_4GWPqkrJ_FMi{^I~Hl@=lLG*la1Vz!g;Gc
z!0XAnGjr;GJZukH6ERUx*-gN7u`xeyl+VdizDM;P7*~cwswF4=y;SMW=WO(D*5qBH
zCJ9qAAF6*$ungV3(b95O?%SwGuIw)x%mY?W<Jrh+zM$r>%Bi&1hH1tTTLP3Wf7_w5
zbOKkGi-+F+YV)%JN|zg-UAdE8C)u^-tWd4=Zky15p<jKZ*Qvj4YmN;}6Pp{B7tOgk
z)bZG)jMG~)zOIt6=HLENNY++5ELf@i*|*2fe%e=jyO}=IvRKXP?%u82A{Jl#aMt{O
zz{Vukdo`bZzny>Kaahs&qjtFeo&!mJ59KDFJhbC?e$u|eiO$8pCVl#D=+-B5a{K+d
z>f87JSRZX*WHy-T(<5Qn^zQEN&4rKMyr%2v#>LHx@bKIA<b&DEuL&C~p0wnAIlnb;
z!C$_4*7h=L!pm}A`=;!bTYI<Z`IV|aSC8mCyKG&le~4$z<4&>H7p~+Sk5*>8xY#-Q
z&;LecYsGI8iHq+o%uG19;h0+NuEYJ+?|;jEJE=Y&bjQ`jMXua+zu#`JkBNG+uJyd#
z|39CnXovTy|N8p)!5O(L)meOwrMoz11~O!YPOv?7^ynP7+ZR9cnO<yk`75BiG<5C3
zqO&uV4I3vdDmck@we*3oig44~11IBWnVWv_@wg^9uj0_5l@79rsWzLteqUQIEEhlH
zre){ys^0;&-F5F1&dRaRjjGxy{QAcF+Y;NaOiET;r*ZpDxbJjj#cqGkneJASnNu^8
zZ><;mx2v~&M`gCv?9V^+Zrs1jH{1D3Im6|vmt|&MZr_rfUh(_w_Mcxa`@etrZQsXZ
z(x6o2*vuv<dT(kw+drR#uV#;2xn|b5r*2wMF{$XOM*FibM{7O{O;+=Lw08TwUFo;2
zYJL<11qVx7m+9=S`kHiiSLqauK&L%NVm#BHX2<@h*?&A@a>@CRpPP2oe0}oz-4f%i
z@ju#F*Oxfd_s{u#XPe{~8FTZ~+Z_M@p6RxH*~e2RS3gY3GAMnlbZgr58y^q<j{H_S
z@6NIG_4$vDeWUk9eN&v9HEq}0<>708SEN1rc=xQz^&%tBPma~|!z}(ko1OpT%Vqz&
zoO8IMnvLw=t-8Ik`un?_``dbj{<$rBZh!da{pTDr0~w5_|MfV1G&F@ZJeL`^7GRsk
zOj+l52j3SwF)Lm=;a$P)$X2g^n<}%d7KcTD=ul8Uz5O=F{j}1`4L)W2c+EvGF_g@G
zzWC<KoYPaj>iul#O*IVrwrb&|<I0yM!#tQjl--(Bb@SZ$>N74fTMOU0UHkn(bMfO|
zMz*e7{83xySn-_o`?jp}U)|?F@-DUaXMTB|KGQ7smTf*m+J`H_{u>hyv+-E{NG^+C
z=h7Xu(n;-p<V+onx)}StvzZ^pZkR1J!@mBXZ7xHaLE0G!yB`mj|NZ&w|Le<3=crjf
z7M_}G#4~5_{F%zpJGOuM^N916{f*9thhH&#ICyBSj_wx5iuCibYkx=lyYq|Lx8Upn
z&s+O=UfgN;=5O!Mt5RZnZ1dX_T(_o~GgQsmv#CXuJN@I&-^mqQJk{RsEw*0#drRWo
znOBtRH--PRVrbYFtG(*w(%<XS&(1RKF27gVers#?=FH2>l!}ks`Ff){?d&YUo%!-p
zSBE^i(RID!_K%4Zq+L4~3w*q|-7t54)yZT1?Qh?`yS#n#JWgBQjKWPn!i%SEHEeyl
zTr_RnivPdcD-2hyZ7f@AqB4J@f#$Do%TI7!PE);m@|lG7=d#Ej358P4>B~31PFj{N
zy>-v6Eic60dEPV9J)OnST6>!-|H;4W9ya?rneFYmQoPfCetNp+%O&rltNy;dy*>T=
zx>y$0sn0$vdTz1#akS#(7N32KjUoeTe*FHx^>4;qY5kiAca^@DFi3FNnEAVfQ@G;q
z*Xx}<Jxk*E*G1@veRTRPaU@Z{?nLpVY=iax>g;BlolCrNX8((yCynN<_<z8*GVd+3
z?hKRU9p3z#^wa<4OV2c|V=264r!(_|N~+OY>4<$=rLW8%U#xTfQFBhQYz^P@kF(~z
z+*z1^MtP%#{7TDdqVeyUSF*+Ly(#*)_S#}G-KZn0*YES%nsxQYuF~v@Z7Kh?8DE%Y
zT~R2yZ};#0|9}0LZ|_q{d+BW<Tc&8;#mW9HvLi5f&a{=Q7H?wgbDtIYzkhDC-?H}s
zo8O$2lDd9rSK2GHoB#Hj-JWwMYVtY$nMPk&Sw-hA`xcTO-gfYbaoC~9Q-AM@kL{Rs
ze*WiQkJGOR3ko{EzqhyhX{|}dg@*ge%ACBss%3wxW#Vo5TmxGk2ynbh2DLW796C5f
zQ})EW*xfI#tPI|`yInJQ*@r{i`Wtd@o3ZgoG~Ctuvh9xM@z1AYgk{bN+kgHwL-R#d
zPkPMy3)R2w@Nbbi<let4DB`TNecSqNkrnm#GP3R@JzE=cb(&AatCN3T+Eh-_Ubg7>
zS?dp<Q>JP9Rn`5G%sqWSe&d4GVQU|?ipMRe`T5Dk#U)@}%*8aPzt_VQ?LQn~esg1E
zvtI13f|n1@%gp+&!nfM<L|@E>z>byqI}5mN^Yvdg9Gc>rcIobX*_<xDSv+$;9$%ZY
z>yc&nqm_%I<UgOkwWoal%;uBp{(oI+5~9;22HNUc9LalUgJI$2zp?C(u1xckiM8wM
z64l<2|2{5iOGe<D_v#iFGxk=0zqBdU`_cC^4?Y}Hw0L`{STE(%+({>!szqG`EjB)j
z^;=UndzNXo(9V2$e%mhr8~4taG*0_)#`ye(yt`KOs^3}8@e7=#VqTN|HRWAK|HA(z
z-?zDjg#7cHcl$(g{yzunlc`c3yPju6?{8pz{j`YZ*9-3J|IfXZu@5&<T)47cH~B+&
z^+o&Ns`4V8g0VF>_EZ|5xBIQL)O)&`l2X&DsoLT{P5)gD3E8)A-<r6+vko@17r#9s
zw&>9AL|<QlOUlybmu%%k=f`Z}Y`*QictVA5;dR9+Pw$9`a^{yRoz4r|ad*jiucqeg
znOo*G%692xKYqtO@55=MuU|#3uKb@ld;9z?JE!idU$x@rwCkVpy!8!k{;c@;SDI~=
zh3ucNuUF5x^(k_i<d*+TjEsyqH#fD~)&BbMeWIw$oU78&zjzfdwD{~(Hi|5G|8PsR
z<)_mI=ibiq`uyzd&E4htp{qhPuYF%~F#|N_*l+ty;`B7#$4{rni%sOK6u6~u<nRAI
z6N@cg@A!URcPiJyZLgjL<!t^@cXuU|{OYwAPS{FkJf3gO{)RJf&z~plFRFY}ZqJT=
zx7y$WsMoc>zP`szdA51}zT~YF|9`DH618@kW%08Q?{>eRRP)ilZvH*a%U6S)s!|@U
zxUDgXW95X;Q?IUk!s)-~&-Ui-RmT?UFI80e&G7wlK-|{aH7EV@%HM6xjjo+`b3=oG
zJ{z0p!LzSY;<8qpzT`UpRiMdJ;eVO4^H-geD?0rlTtiWMNoo;4bJga#C%Mmj&sB41
zH|=%)Jpb9Fe>+Rw+?W`<yG*ol#o_(E2NM!rURv5CW!kmGxZ2`Ft@s&pJ+6gnDMb@c
zryShddUwv5gEwn;2<`2O+gr7<^mUlmRIRV`cbKmYJN@Wr_scswjf<b3dwSOVzKFif
zasE53X?3qomae=}|EzcB?{`0okLUmDeA#t;QH;&I<Zto%D<-Z#dOe(Z*0w9Ff1H1&
zHZ{9l;_gFBzS4^6Yv+H}+!T4`(Sd`_>@RO`*N<8|tykJSXn);b>*rT?hUfiy+;9Kv
z_xt_Fi<IZBkes_yvfhD1aB7RiZtFy^pI$ewFZOJi5)_}jY3|xguHAks3x8Shmey=~
z7rU~_O*e|C{%>jZI%VHVS>~+Fjq%%!R1dw%b8lbQRxhJwxnlRcO`8s#`<m&p`qqSP
zJ>rkE{%>9vC6ZgWqwVz5`O~U?$N8W1Pb<o2zL#b_x!&lxhPmOJLiuCYul{-dd&||5
zsm#XJ1z&HlvGGV8*j4)a!{^Vx{qL=ITl{eM`+dommUu4tQ`_^`*lJSIQ+*aEHSwuF
zVt1k=Cg}@*tref-_x{mQ@%WUVpPp7!SN~uB?&9gw-J;rIA3hxBH?I9vqUJY8BW}-e
zFXd0|8_)Etb^5W4L#?!Y@9*UMw&yoZUitma&Z+yqe|q)$;g!0F?qzFcy}9+G=TR$v
zxX2mHQsX{eK`|9y%|AW=6`Gf8PCI{_FD?9cdB=}s_Ki2+)n7MX99i+@S^cKxM~)o1
zusU2n$4ol=+M0!#m#w3N|IWK)*3K_~u1i$=f<=(!hvagh{@gwCsqT6fH%pIwZS~(U
z?b^)vgbmXIOwB%?V3byS@L;}K(3R`AR|ST#KE24c>h#T&^uoHOGBTCrGs}0sxcDgL
zDx0=@(%N~K_}FyUZ#!k1*=2e8S7Ei>^K~~J{C6wxW_}DyJ8u26YVyt4TZ&t@+MTWy
ze|Lv9zj*nws(bI&JYOjl`QDl9(j_$yW{b#;m&>BxPS}%bSAA>tr2`Y~|Igq1yKmK_
zsNWX<Y(+G=!~}M`wXV2P-K!cXuz|6Qql;B2yT|m&*<Rga895cbdm9xNnYcQ#2)>C8
zaS>b-C>V0UZDQ3udy{9gW<)G1YIweR+Uqz!znMSJ&awP_=HBLVxzJauFK?g7Rx6#E
z5E(zWu(tF?%7Tigdy<cOroH0zv)O;)@vB!|%a<=d<;(P6amTJ*kDfktjXP%RxAv9i
zt*y;P>y{k)_w)Ckk9{Zei{6N=QvRDKb3EK;_XXqhb24FTA{uv9eU$kY8Xi7<!UO?O
zpB&U_d~~$i`+(TL`Ty?M8~ff~T)c0GPXE&1Uv8GH$!fWJMO1%T{0!3q-R=M1%{Bf1
z;%kVP^IzM2-(OFcl-uy~|GsBkyIAe5wjW;5ceMMZ#;0$7=|}fHfB)wPo8rEGEiQAF
z-ufS3oIE?cNJ!Psd*2h`ujMg)Z=`o@eSKan)o$~g*`GBYDv9gc{hmHyg2Ror%$~Ua
z6MkLoj=%k2s&@E`ySvR_zI@5T#&&7x>Gt{A5l?5yfSQLPAtJx3tmb^(^88z8@6mq~
zf-ZW@eYO97%CeOU{%K3OI4vrg7hf56{Nl1Jo8R{@lQQ0wz3|}X&MV=ub!AKo&zw5`
z{LyCP_O4~OdRF{=!*Fq7uUYPCO$JqW{y$&swp`b5Z(3Npb>(W#@A_$*AK#4Rows}Q
z&%b`lyO!Omx%^;#__5u8`~yR;eOSMHZlJHP+P2FqTpxI9!%{TgKYdvL^!wr-D?jF~
znJZ34@B2Pmf3uUj?CfW=uB=|$`SZDz&fC?lyFYQ=GQX74y45U7!%|?@(bd@p>f`uY
z`>ZyzzYO^K^M3E`OyeEheAi^RZhW_*H&$FWDm1LUz4A>>K+X5B$G4v^DoW+w6tsSC
zhHS8g*SR_N&FuVYCMHvYq*z_@Hz-=F1)Q4|dg*n4r2OL<f7DJZh8%Bx?QXGimHJNq
zNgqCZc<}rEe*JH+*Q{UfUr}MfD{Yo?YfI*u$j!?l*5{XHF+E>2@2i@YhtK1G^W6^C
zR6Xag=b6{-bXf2@WAD7$Zr&*4+<OPok2l@<>N8{iXMI+Y6yf?;ce?ja5_<pdYtsem
zecxYCliXyv?Vo#{<;330_39TNRBFfVei;1nf6as$z47&D*R_N^a-RO^*W>;jb+$cv
zy8BYkA9XR3Qy1J;q@;Jc|3d92u4CnE|M}gVSUY>_;@pV;d#qp8%jCSwO>UoicXPd&
znVC=A9_F<Nn=@C%?>Q%(cXwCo)TyGHnwmK`H?eYaclUqxULJDMx=L6`$iUeA`F3gZ
z6`xzRkJ)zJI~cNX@wp&%*)IV?J1if&GJe```eVz&T~}(aYAEmgx#re{qZ{q_-sk_j
zW##JsZ<aJZ53iR~dA32r;<<i!!>%nC(>#iM)74deE?CI-M7%)Zlcw}K(LGO#G-ep>
zFzKz^Up4vTo4Re>8WMNo+qbdl&!3}mH05Kx`{&F%LER_U#@#(t{Mbj#*#Aj#$bON}
zyZ;ojz6tXYyWICIImCVPP1`MT`y?&w=ZC(Y_w>`R?|K!lQ};#rcKfl_+g_}iGpF0V
z!t7#X{ogg_k?z{uv20gr{&L>=$vji1OS|HwO2MfMmrO3TojH<oj79uzpiA&0<&KYK
zPO%e@KRY{n<Hn7e8`Znh&(B-Byp^?l^5m7*U#Dl4N@=HGknfz@%Ez3w>i^sCFMao&
zm;X28PuXe4fX#K9b#H>W{(bxIR^w5Ap1rW{_4JJFOaTdjhMhYrJEQ(Td6HuH`;GCs
zn4O1anPwN1l;}ihfA}J_ZtYu+z>;I{U#!>XT4;agcy^6{UFV<I8s}f-s&A;?%l-6m
z`Gr&8`irlw4}Y7Mviz0JUde+Qr#t67>A#%2^;a^-XL)B)g|AoAcJciFIknrg@^4(*
z_O(YYmsh>Nu3s!_bmQ5rshNuB{qjtz{~x%w^IM3*>=UYwCH^n3=5;?a^X$+6KJ_m@
zGhBHUxiRCIO4XjCb%)F&`8w`fao)c9aQ)qoVy6C!-!B{K|MTnTUoiJAPr&P6=C}R6
z?gtGaKe)9uJLk?0L3Q=z{o%_sLMtMcKY5a(qoZR{{Y@t{+WO*!Do(Dgi!QHy^wssj
zZo`$U`a10<@S9Km-1&F;_p2cno(h|Kt#+<jHtDi?>VwAXD-)Nv?cK!XwbI$IZ<4ep
zYs^WlEqhl)h%|OCH!n6!kooniisk(s&*gq|z5djFyPdy3F)z=rrp9KgRqxZ_)t0KO
zLPJ8k4=k$knWnVi!yTJVv%IZiS4M19+U@L|d2x~J$uke1`C65}a!E>3nm2ErYxcV9
z*Totiwf^CGch^%{S-<;EjPI!_#tT*jJ}f=2bILU#D@*Iyvu8(+9^II9l*_GO?r)%d
z{L=K;H8DGl*!g4*T)Q^Sw%X=o<s_fr_TClKFX=YL&-(s7Z2J2r#XrwU+gXO6{CB!Y
z`Q+^#rRy7_je<{B7w5ibTKDE>=PT1Jfo;|1%a$z@;tE(DrrRfD>9js>uSHdcrN6xT
z<r{gTSyD??<|w&|_MPNUx%aN$Z)Hf4Wr>wJpU%-6M_N8?Fvz;TE;i}&ipb4s)!*Me
z-F)6I`NoFC7q4E4ofQk@y}AAW#H!4mtSMnLk6yf{Gi(3MetG+4`S<r7IeOHv{@<Ve
zI0muK*jRu6{s|KV=FFY@@zbYASE96M@qb<#^0LeIk;5mQ1lHPLuL6uU<$Hs;@{T`y
zQ1AZis$l(k_t`-vSDI}~w~9CWh-}@+I)(M;5mw)a-#+r1<lUKZcXxU4)~wK*JN_?r
zYUO%!cegk@JNveLeg*Zva%bo6xp+ZO`(cg3tn<%bU0xg3H8YJXs7NH^$@9%G6XGV$
zUcb3#qSaxkY=?c5x<dcGuv(=#`)9NEiSw%0@_A0C*Jj?{rhD+<K?y0TX+L6oe0(}O
zIu>|O*E`>zef#H|&F4ev4}PAK!X)x0`PLrZnydTRcFx>+<;@4y-rg;#?cs($GpFCI
zHeoa|-C26%^UuYSB2%*yXU?2?<Hn7GXJ;gr`OZFeWo0n;@_F+ui=X*S-Tz|2y1uPj
zO)V`gU0Qw=zCYy0^J-?G*Y-VL`rDmi9sjP%O0f<6q$2+1>y-<a!wNoE>3zJU9{lI;
zJm>AtW?i`wzIe;BCU^fSOVUL*{P*yicr*V~@n(zdl^Q40YhPYlEB)}{!=k1y<*uE}
z)XK`rmMmFPU|`cQWrsnJkM6|_*CrR(xjc;Z>-CcQ@SszOH`nykw7>;3Ll69uW)<0b
z?%GqW%!Ir}ee0%4b;+pmU!J-|<=F<!g)i262Tq#)x5Zfc?aq3`mV0}vg=J)XZf+`i
z_^jN`&8_C+QSqExTRPW9Z@;uPJA75>>SedM*Pc<14xayPu2o`?+4Lz#Qr_7!O@8LP
za)mOp(aKqKrc{)@?|tfdO_)pT%JSgf(@NgjcjWQ*1bkgPb?)4=2O60z3mz~CtNA>b
z9$%-qXR~kUKJ^cwVQV4|?kIeGK_}Pa^TK^o)?Rel$Lq9W!ksfAi)`6Gy?Exy#w*p*
z-p;<%%TxTg`Nz-a?LW`||A+nUrmGo-N)1L+r93p8R$St|^EhSE-Q3DaBB#USYeUy8
zZ}_pDPtK-e=T6J0sHm2<wz><R>svLrK7IOBP*9+tqQW9BE`DuYZ1jrL!B4sz8gg%M
zGt9W4@M+2`lhO}IHq`Oo+}skt7@4`hu&SANre^pIqg1ZX&(BXk=y&hZv0mxI!a~P|
zk!@*yvo+&)FK$t4;quro?*D}?dY;{$qy6^(PHf4%tTN>~2S5Mv@bz(^vE2_J3he%V
z3FdU2;=#(wYGG~t`1R}43mltoY{?YP%4)rx7pc<i&~Uh&|MKecd^y`HALiLhL-I`{
zz8NTTU3`3`c+STc{(iq|@7%fb<Js(dl_}*SVq$H*YjyoM7iZcO=uVaTJ~^O8!-YRj
z=INT}3S67yH<!JQYU7vJTexT8+uPeO@2{`d3SH&m;=-~u>*}Ncmrl@pz@58yUtV6$
z&&bG_ks<PLgO2Mq4wfAyFNM;cJ-f53a`LXb`<LgHhu=KiqQSCq`uC!|$--HuWXy7J
z9cf@>R+&<-X=OEQo|xF3{q^&!_C60>eOQCjwZo}Ezf9dQdd@|yrG5?j|GUpVvD4z?
zw{K}DCMec?I;sBV=H_;-&{ZB9i;fhOmZsj^lv?us-rNTd61HsF5|XsDt3;4v!jvg4
z=jYjWc6Kg|nw>b~TXf8lXF?*2R(SW^J9M;R<BO@9XV1x6zqqq=bIYfBG5hOejf;!d
z#q2wyK6n4IBT3sz){8z;-c?zmJH5>3=!FXtF2wH6xVma;%cs6;Ya$Pyn5eumeEmEg
zF*UEA`E|cKcNRatw9uLT+nbxs>(;&UDGq<q?NDKMy>0DUT|q&?kemcZVfOMXrll$8
zg&#5R7TY>QJ8|=hsI^iL6BA8xZUi`0o(~ER-nePgqaPn1hvZo`xG{&Fh-q@FVVPcj
z{>d*p^H<l_dPhV^l)k<;HQ?V;*KRSx)Kem}%yPXlGFJ4g((2l(>OJkl{QrO2pPij8
zEGoLR=y&x-4JO8-;$lg=8jE}P?oC;Hv+J4j_MlZ;Vp4@3IR}V|6l5OXe#dItG`C)<
zjn&`tuF5m*;W&Nj)S-L#=E+)@1uV=HpT&E#rES|j7Ud)C0Tbq*Zfz)3am`;?`ubW+
zOABbig!7p#XdY#A+Sx_9x3|5yvvcyLrQY5ei*7`2&y&4h^O<+qvSl(B1q{s0%u}O}
zDH-}dkY0P@^G=q{n>Kaad3t<rx1z)~WB1_gbKZ11?Q#FP)8F#BanBOt=ogoka#w$Q
zGf{X6|C{eSS>E5<dwQWQ<B}Mz<0r0g@vb!z>JxY$7*}SMZp&Wy-pL>=EDYqJb#vyp
z@9~e_U8ZVlD{F6W?>EoJ^Z&oUp-B^X6z@$xJKNki?To~$Yim#MtNm?YWE6Dv*2*0s
zjSO6^O>(wX2?+@Y&N;}xo5#JtY3{j<m*LZFgF3z|m>c(Ilt*9Eb2lt_;K0Ph^hE2O
z`=Vvb+}6kKy>Q_|!uNA)@7T(>99$mn(a)}UZ}J=U^3J&H)7$sU9zS}NH9tRps^o!%
zI}DADl`So2HZ(9KB_(axxKZ=V!%wXh@=Z-m5{5}ES6785|Nr+lWPMz0>FZ^WF3Em5
z&&0%3P*S22ztU0I-Jj=m>2%xRj(SDyw`v#PnWfy>f96Q1aO9SZi9A6;9(V5E{rTy%
ze&FRx%V$pJe`=qpzmVgj{*wFN4|;a%M)KzwB_HGIy7;wlYG^<}!{o`r_wL>M^8UVj
zd3m`ei|a%&Ik~>Mb7h~Np8ov7!DcOc{j`m`OpJVTHXiToYAgPz$Og|&4av6j_!gje
z@8DzW=a!H2yuVG}trNAS!*jBlr$%ee_jh-1+_*Jq?qq-A-(Mw6U+FI7_{b;suWZg{
z>9;4Ve@(c$I{fjYM@kE8R4OYgZ`{1OG55Av8=vf{Pft%jeY8kp5}&M<%l&<|7cX8+
z{QvK-N#31^r_*;vh&M8b>&G2Ab7#(x+uIGzX1mQjzx>2Jts~JMdxehf^mPt+Vt@4X
z>F!2mb}tRBM^{z`M?^$Tnl#DjuifXBH~(pKx^`?Yc<;pVe0z^Wuyf=7`SM|5VM3?e
zr%j&hJm0RCgPS|~%nU=Fhz$--g081tUS7U&%N7;|Bcn;z)<%nWKGpl?s^E~6q$DOL
zHpjkx-mBcXnHS0xxn3429LZLl`}OIb_Zp|BTuXi4CtLFNR;W{;;q`T~htJ%ZvpRfT
z!0yv5J=N!?XuovY(-?i+ydd*;ik3u3LbwM<?yW5o1GqfH!o<?j(qybkI`a4bohE6V
z=Ap63qocQXY5MtjDXFP~Vq#%So_XmBP7T<ab=9Keg+T1?vZvoZew}mIWX7L6IyZ}3
zf3RQwuKV!MG=0srtDWcG+M3#@BO1EgPqt6a(&_cJwLACKb8g(Hta;-6)P$BF?9&g|
zspQ>uv0M3<^LU@!RHh9RUcPu?Q1&LGr?+>eeZ5_`nC_&2mRIubdhyH3-``uZeEH_w
z+uM{v8K%up5^#u*pTA*)!JD_>s{QioUlV4}RIA<~Q2*(-OvCi@^9Qb6nR2kN4>UHk
z$d&uhp+hPPq9GfTT)(}&-9B}ysAu{%p|^God)}xE^$64lO_+bYa{tAc{wSBz({wG%
zzGxgfc1*}eF?w5$WM5xjN_u*7R@SL6FE2;#t=h_>9CGdKZ1atqHYus9w(3T2yRbR^
zywY|))m8o}D}$DT#*`<k`#*b_uWqsNQqP(#vp1=8fBelE6leC_jQ{boiC1gB-AtFU
ztC<nHI!sVP!XqzFZ`!nJXJ!~SH#ISFdLMo9>Xnd)$dT#sb)L7TYWS_)r11M>ET>z?
z^G};DTW-GCx8(BdjT<&3{Qvh?%ep??b76?qOrun;`1-%5$BrFyadq8T{yq*Af+m?l
zv(0iRO=vmg>F0Os)Tyo`M_g=dY(Tku((6E_cbqj-RlTPf6h3m<@<r=n%?$frS7)7w
z)AibNXAS$DuO$x-epTj?FksNvUqAiXiCtA+wf_G8{`}3&&0k(zY(6{NJUK(7_{z#f
zk(<*Di=X+FoVvc`=Ld#U617?jJA`!A|LR?oX8Z8toMjuo{JB}C*(pt*oTkm4dv=1N
z^M}u$7dyA}y}7ZGnVFgSL8P|#a=*DAK0Ze-Ulz{J&gS%4(Q@TV$eFWeJG;A+k9LXn
z$l2~XvCg|wmx-~awpP+8g@d&#$VB!{Yval1o65U|UcPL;?Xvs!ggX6sA0Hoo{OsAc
zQ$J_Tv#UL|qwsOW{yN*MtHY09zdrrj+tfa*&@FX;tK9lzI(LOd&f>l6v@<GKRU}X;
z_0E2$ExUCiGtFn1<x1KA`?0ts=)|Q<mtNf7uHVilyQu#EzlzGrg=^RTy;Qs|W~UR#
z!M?t!udl6j-L!kIMx2~|UC+{`s_EzEB>wyJ(`T;LR*&`mopMT-&z<Z0@S&h&>($P0
z5(nlQN2l<MtDc*@B*Jga8^2$BE1sR7f4=_z@B91bZU~Q#o?ZI-+QWD6{IasNVt17!
zo}8pAw1}&ytSs$#pX`dyQ!c;$Rq)=aqeaPUPP@5(QPEV%oBa8ks=j7vGK3!3nteUv
z{yy7wKG~#8OFV!4`t@%q1B-!~nVOOkQ*CXnjEsy$!GnetbuQnuG&M6bvpJT<ZJC#s
ziT-2~@T%~B@IEOy+3@|%$=02nGiID$QntNw)Anv5#ka5O&Q<cZ{n0VYyK`cJWAnWq
zTMZ2jzr4Rc|MYZyP=o8*`uP5R`}Vc0O7fd$)7iRIHFIU=mG`?RNWGl{3fDXRw|&n%
zOU%e>cz<o}mv?ugoitO_{pKtH<>n<zHl&;s+V}6*>+AcTU0E4i^Y7<#5fKpqA)%m@
zX9>bb-OI|%{{H^1Ze%p6Q&@dd#m7fU8{>jl?8<+C%dPtM=430k_==1FykC8vr(ew3
zm8AP~j{U}ci!WTgs+RG)%<li6x4WI~>;6cjr>Fn<``6IW@ZkOX^VNKx{EG@LetJsu
z%$c)0>)uLj-R*a1$?`J?RlBxFtK00@lB@Jody2Y>#f-AIw?K8s@jh7|DHD#RK_{P`
zo&ECq`uo!Rzg<}9JY(j}g-e&ZW@N1RbFQObO<$j%ot@pu$?3<Z)B2H{(|k8Zny)OH
zw>oaGRXe}@vf}6G?pW=bR(#Lz$#c7@2`(E;9v=uws+nVGWc1|1!RGfN42L$a4FB`z
zPeF0<<72(jTe7eB^-7z29nd;<<Hn3+_x?+(Lbcbuk3Lg*KTzb!b5Das5xa!!+IJkt
zUlF-l&Ane{B2xgXPV_dO?Cfl>>3Y5)Ay=YB8yMzVl_vfE_BJIg?b338c_ASom5`4I
zMMXp&JbL8h;laVj$G0YG>#B&g^S7}4^DHYf^Yioj@#|O6yUArH^1E1l=YA6E(GZ*B
zrTXz;hQo}lu(bgHkDooPy)u5kU1DP5l{Jx{_wM<Aak2Zx&6^*8dwbi$+In-&%}wF=
zo-1<syScHIm6g4@wUxWs-+uYZ#o|&?sVWkIHhwogecrL-t@OJ&w_H9yJIl$-yEJI2
zmynO*|7SjPt){wmizOx{Ib~#Ml)k=J`~3UueYMtqe}7*dwA2eUiMcvVmou`);PJ8E
z!*}n_-B<hj(fjxDZ`?$tDSgQ2i<+$Fn{;xL>XdaqG<Drs7RBysV^Qw<1}=NI_dR*{
zGZ!>*G}pR3Xy2dTdk+-;|5qy`D|>TuI)AUUdD`o1YfUmPI2;OBtgNi;k+Gb_D{ZzQ
zeygUg+mFCiJ##d=7wOuoHy7V?_UydhymP1JzkmOxXex_aa0REOsX4dv9ld}5zkaa8
zAL)Dd?(s+(wM1>r5)>44oT?S7lJoK2&Rx4U<=ix?`tsu7xw+Qyvy4{n2mp7iZ(gZi
z)g#JP_3cgPym@kpw`)D@<U1M*!?%2S;^HadyEBsC@^PZOSB-_4nHdW!>%!&Bzu&W8
zUh?vig_YH!g$o<^@3)sW%UKY#)N4z|MJKbSu7Kd+#rgO5iHL}7P(Lz#_w(|Ei1qAC
zy}~CHxJ;Zfxp4lAuYW5F{u+Q>eEc}OtgI~OGJ(6hO1X`Vjg^#@B@Gf7Zg0=8&u4Bo
z&AimIDs(kyIv^rKV%xTDFJ8U!dhuxQOxtR+ZgKr($;bOzT3SSIKknSZvhUb5-ROd%
zBBzz>d@moIZD8<DYhj1d)0c-`&h5DU=}Df<<72(Qk83L2Ti(ph|LDbw6KkWlOIQ>r
zWM5k|@uG9+nOUaU61G()QdUnh%}TVNW}RA5y(r_P$IeqTdCgLm-35&@r=Oc+DdeSS
zUG+ud+O=zUc9-`{8mAf5{wfjSI$HgI&e3jh<H}Ddke-J|>gj1yKOEiv?Afzp{Puqw
zy2W%my1KNkJ=YCl+1I#Yg~qRMuif3;#CX})Eb-dRDAc7PG*2b|K~Hu4JiA?o_f&o^
zC@uZFV*dPT)7my~Hom<*f4SdWs~<mpTv+BSebYcI<@vd}H+W?{^5je;pZK{3b91_i
zaQ5fTeb&=1yL0ABo%nroE-rTW)@VI*`SRrT`+jkimX@03-SPPU@2~xf=b%L)GmX<P
zE%TLbYHrqy-L)l$<>R?)>tav0a0(lknm#?y$ozik?@c0&M-Tk`{2Wv;FR?B+c==O#
z>+U+`g&b3-mAzGXzDFkTS=xhl@6J6q*vy?9IZZeE*c8oR4GoQib8{?xW*R+hj*%35
z|Ln)d#}QGH9~Is*e*LYx%<c9=p{{_;@A|F6n=3VDWwHMF@ng@|jLDDwYH4U3xO#PJ
zE4O&iVmIEVrlyR0dw#nAe4`n>Y(vIHrCnujy%G`<Hts3+uBn)9p8xE${{AI-cX!Fy
zRBUj$lda|YAbLmXYca^cVBqG%8n3?2(k|xYnmR>B*kjFmkCH9k8*6Wy?J9pC_u_QP
zyE~SkmRre7p=ZyYRa8|arKG4xd%MQ$tC2J=F3yYD?>G1S@}!(^%`HkHm)=gEcO-YV
z!_7^<T3FaGFLY+-Tq<B&_vgn^n=H_pmD}5L<!eK(U0m$GIqRyHj*iZUj~^X_gM-a_
zDv})?9eLzzW~>TbUG(C@LLuW<YeX9JUmk2`2dz|eZs!YJS!gfCAJaUmEv8gx)#L5X
zr#t!o&GVY7z1iAiO4`|3E6;37O-V7xyJKNvW3wV=r%_y7oL1n1uJm{J_I58_s(SI_
z#gJT6iQw1&7cE<-c#vaiVd?ppb?@f7pN-neaeMFVUteE)YqajPT^G01f4*JJo(e&A
zb@gj&B7-$_3%!;KsjI7-<=$#pyH;0DPEP*mz4E{>t*xz~4&ABg`sWuqw;LE5Y8t-Q
zyFX!j$iA9NB_*X7_xIbEpU;2(<yYO4G}|c&E(@#8*dKg9!d`!V`TV+58yCBWu3~6O
zU);WH_wL7U-}<__u|0eC?9cuGf8{GHjS6>atq55u^!xk!<9F`Nv0iRgc<u7MKXZ6E
z-9-L<T&pmjoAL3^UFCAVv(28~xVSioL2T2C6&km;<(^*ZJ$;6Ky`8jqp3ataGq_rr
z`1$$QtX<2=!IALh#zqh6_KLZnZ2jPY!%U;pj*bqEKvo40?+4b}8X5_QS~&mwdcA&%
z;VS=b!N!c~tJMy69J(q0xya(Zb=Ph~#x42x=gqY)U-gIgEa<|dd-vvDUhaQ+Uv0Hn
z{=GBm^J|o(gIS+EefsdxqeI7fr7y0()+2WKuSTfbu6JFBCeELHa_8B@9y>+czQ4Q6
z$;0zUA^1**`SzlxUdQ_7`}_L%?Ck6!A|qEGDSG<k$%W<q@`nx`0+ln|;(AkhRpVY>
zSsASV?e>Wi9#Px%Di<f8wrPAHWveX`5fQOq_3G2>7U<sT<y_JPEsSfP2!C7N-|<bt
z<J8i-UtVADm$fbv=~V314q0(vL*n5RXU-hCe_x)TpTDQK*U;2-YH!quD_5?77Uox0
zR)WS%^~==RzMj!os$I<4)n5}df1=YDwrsf=>k|(SHh=v3b*d!;_vi0w`u&BEj~QlP
z)5*HN?(Dt2)h};sT>NUzl~t>gv$Ib(GP84Va449YPhS_i`&65-zGeMCo4r+EmDJVS
zFI>1FR2{!@3(G!sP!%L?#?#uS=i6Qpyf81F)3rwAX8${Zy$7eMm+Ko<eaV=n8y%+d
zzWK)1Z1KZwypR9={oNyDDP(SLZh0|SaCO+)q&GJ<X1oYl@Y5jv)U^!Fg&qA$#$T;2
zO6Q#L+1s{ywRZ9Ib5jFbxdWr3q?p-wKz-D_ySpAff4+Q^t=~MG%0tiR*PmM%ynKdf
zwpj2opNYI04e8AbzP`BF+|Dn*srb3ynX_lTbHsvJ_NfaB3P#+%F=6J<DQ;z}SKhf2
z<|HT|^6Rd}BJ&%cz62^2KRfdfG|71Bb@B6avTXcvJ|Q6@y;7!0udl7Wq_sk+si|qh
z_U-E19eUN8ZC2bk5aT2$e`WFQbc@G^4R?A(rs>6kx&kUwoJ7RM-RIlY?kIZ7wb-rq
z(e?OxS7+y~?7d#z-iy7b>m50EY*WTXCAVHFRq0JFyDLAd-Q8WTuBpjsZ*QNIljBt)
zXJl}u`qJL&az0rbkE2~|M}G7?ICd~qsOv|9T#WshlcBaJef^IeKD_vZcmqS|>M&I$
zr6$k_+>RX<QCqW)g1SM;s~h&!{uYvy?5vZ_Jy&(B`1g|KElPGXru?v-6PMho^xg2^
zzki^Nci}?7t*zO|d!@}Iw&h4_hp!9y$s{oEZjpeXVB^M(hLhF(kDWg4ZI;G4ZOW9E
zrAt*iJ3A{XDyIBk5?G@8Vejv^+wU8go2%Q|&5M#f{vmVy{H4FUD%QW7cmLeawegGA
zF7-@saM<|fRKbrAi83-WJ1Ree2Hn?0Zf@I@divBc{gjU%KhBsryZH0=`zwAQEx+>i
zW4%yU#d;zC%3{M0*<ZeX<+!(Vvfo@QPmR`$hlg5s?AjHyF2?fDpFcnTd_LcO$Lh|m
z(r&-GR)xR6<(lQ)(NO=d8sO!{Wn2ACMNhA9s&=^7>J1`|@_tcKQhTevKRYo|IV3B|
zIeGWh6`4~eB^=qm|3pEG=tt}Qi<6Faz2V~p4JsZji<P&jXaH5l+j19|y}bphjJM@R
z-{j<qEGjYr4T#uQPg}KiZbrslmeyIjSd{;5U0waWxb(cts_QEpD<UL#rA#ISaC!Rq
z@hQ9a9XWisnT=O!L-KLHp5ET>H@{+b7Pb1#wVF6<)~RW_(Hk~wn9v*JWK;Ep!`s{Y
z+&tUUO|0Bc{$2R5c-PUsflE|tL;3r-HEY-IlomemlK0vZ+gg=_9p%eHv^;NoW&UU#
zV`ygf?8nCBqq`Vd(u<4O*w{AJ{4AO@Y0`@;D~0#}`P6;pZ^#L)@O5Vznb}`lT+CkX
zS5<VZSm3ASOrf5N?~kJ$?ZlbX3rf$+%reP5^fCGP5(XWY?Rj?}Z9Z>z`0!z7Zf@>A
zS?jPR+zt%M$;sc|-R<7D&#tep@6G-F@n$8Q6K2lr?Cs@EKR++^*O!+;4F5W}u+%xQ
zva<Hb+s}J?divt<lT*Z$nJ#EWhc*f3Z<l}iT<EoV+mp@vw&dQP_TWJR4==BG2%Djy
zA&-p31n=p3iD_wV%a^N9)e4<7cd@|JXU`tJdDAm}x_D~#7Es~fHFcfF!H(^d=FfIL
z^X%cBe(tBA&)cWp-j?f{)HVB&mXVQAb#?W-`}_M>uhzEz|EGAOOyRHhbL?to<=x#C
z7!cr4P+$NWfa;C&Nli~r&d&A@2@w%i_e=Ts=%~=C`5xX6+Ihl#ds9zOGc11>Gw;sc
z6CXwIR!*u3Y!b}ZdH*tb(cRyZYpzYVtNkS)DjNET%b`KdcUHl#FPUyV5{?xW7QVC1
zQqRw`-PA0i<m>BeV{7~K-R}1lCEBxuwa=XJ*{gT3!+igQ2{TWv+`4zCym(i0GxNfQ
z3xgQM7FB+JmYA02R#s-#*w}bxw)y%9(FO+H=xr(2*Tss6h+J47Z+~vCwYu~^i+R>h
zpYD~&{c~D>{}iSV`5xX6+#fxCdbC4OIUr+NrP=fZ*_}6I|7?D9{G?0%hKHV?Soi&U
zr43#=v+GXv)5nhwuLxXR@#zU?@N&P@b8{?Rla}myl=Jh`)0)3uuV+j%Tk<miR01n5
z?5LkIv#L3xT-qpjol)tlkh-5w#U)+VPOz!_Gb8bE8)yL2$JdvWhi6IL-m0Tja}OUr
z{PON@^IyMywY0a3|Ng4$$;HJbVOL}E@83TQ3yTL&pN78COY;znV0n9UGkdnQ^rm^w
zF5JlDojNPRTjSBxOYe``&u0GcP`B>SNB3pEv(*;<>Ce8tE-^p<{K?7cpsB}u_wHR;
z8y()H>2iBpZt{f%j!!hNX1;pSnYdR@sH>v%;>3zRW4XU4J@=oVr0OkUU$>_v$U`q~
zPe*Sr@5hfHL1V6ohuOTQ>%}Tvv(?bxh>ni#>FMd{=y)*M-)^GpqK;s#sXfx>eEj_U
zzrMb1pFe+ojLJXuAgOv6+o~@Q9yBsbNKJV@=X^n(^`0On!TOI6js^OyGgAM4Qr0Zz
zh63vY?JFyT+Yh(%8>gNU@tb4eczKyGcmOZBW6j#Npmmc~OSf8VE>!4TzDHH$qqrt#
zwdK}hLaE=Jx8>c{VogxIwX0M+_tut&udc2JjVrH>-ac&xlfb;Q^{-yO^eiee3JVKU
zQB^(Z7kFgnu3e8FK6LDtv%Rq?wOgcNy`pYo>w{yx(lchyo;+c~0rmO4hmU+xf61oI
zutR+J<Qcl>GNuNpAGjaiwy}<R;>3wO(q=u;+w&4rQ;+T{eZ3-P=O&h5553r3Ei-po
zZqK{7B=7tCmACJPItenU%=`8*>4C+UQ&ZAFqrC6#L^^4*R904ky3VTJ(*jlosn*oi
zdVkp#vAaw+zV@r=*H>4Sb#!{VL{l49FM4rYzW&Md__~Flu3LNi;?=9Q7kUV;m=zQp
z{POB*@#B56i;FHZnn-PMdM2dvFi$S_@KV|PNf{X)FV@9w$-3(0RO#F=XPb0qM`6yb
zEu7BI&S7h#TGy^!%M$DXD&1~wRyQ;>49PZ@xm?$sqIXc}UDOk&buH(YMSXqM^7fwZ
z;Wl2?DNY67-^IRp^Twj!0fVen$$|6d`&WggUMVs+H&^qW<uX~#mxGsgY0S=|O`Am&
zyT$d>Zf(ijl5w$V#R?6{gjc+t<rkci5)vFzQdDYxe^dQANB-iwzbC7IMKozPPVrKm
z?wnqrnW0ts`pUx}8<T?=_AT9%div4Rr>*nn%X=-I^zZNQ$4{Odd3^lSTrHl^RUr=_
z9Bc;71FOs{c)n)3^3R=-LOz~0`$R<qw$~bkA4<QxB2c+c-dc?{LD912hk<U?mIntK
znL!!z&F$^}TnC=3`OP`8CUSE}XXn9Nw`P6$Qo^~kBVuzJ@8xB_hp$|j^5H{)Cd2)v
zEi7W2{`{#q)+>E{Rp{yqtM7cc8PoJDRAq+HA_e>D-2ak2tu{}+wl-Q=NJuF*@ZZuU
zOH}5}oy*C~yEN_Wtcr??3u~jzIhP4MdG^ex=10Mvy?ZsIH;bNa&s!Rj>CCxo$)9~s
z?NpDYvwYWYyRg(-ys4>4BYN8!mSB!MckUFFmbwN9Pd3fI_Ta?}jfHy-U0E3%xuZZa
zc2|j_fq_77ZZ4;5Yh`uy=d0oIf!p(9`DCqBwm0oj{W{Y){oE7@uKroGr1n;QbqWri
zeAjwg(f2=PJU;stgh*NjYQ6ikX?{W4V}*C|JfQLA2@?cfHhq$N@$%)v7cV@Xo|?+Z
z#g+8;*H@dWFCC!9s_!HD-|TL&vf0_$M#jdSJYS#RJ1W5QdgkJl+P^C|?q`@V({0Oc
z-OTVspmBl5#zxhw3xc5^9~@+U^Y(4zt`g1C*Vj~SZ05+>raZZ~KRGS!*xlXboE#hv
z{`~wL5gmQ{ijP*-?{9A{tgVCBMw#--SU7CWx~ei|yNYh3R?6dJy%yHii^IJ7)BNnF
ztiS8%q`7R<{5wz2utm$M&pm$hXlv-|u#@~}*$bYY5^dv=Jhbt7>;<#jTP~Z^&Z^9C
z+EDT`NX=)4!^%sdnRaLR|GwHJ+qHVlmf4_j=feL+pkb$nhgyw(L@HWTebM;#?c1F_
zmBQ@o>|yKUd}kV^s?4|^pf#08)=ETQUq2@&=fj5&6Yi&r3X6%g&7CXD#>OUNQz0NM
zEX?^-_JnPtR?3SD3t8CN7q6Ul`sLTZJEV6zJ88CEdaqiL<XSGLJ~#c@nVD;1cZby_
z*a;*jC-cc#9eHzev!{p00^iwYfBt+v|K#~|ZwvXGKR!PG`1y159rrm=$(~+^;y9N|
zRLzk%+&T5`$sP4Gk9Lc1&bX+g$-up^19VyKT<h{@|Nj176S<i!KR;iS;r;}tg$M55
zm6ev3W@2UrZDG0;x5x^#;-I;ixvHwl#l^*g@kjo*(oD_}+vWcAj~%)+>G${d!CRLq
z->SNFeCLi6=W9>Ry*&HG`MNW&uC6XDESwl%H|fQT3{V3;CPqfqswCm@v0e#jX>Wtp
z56t&=`^(GOdoIs9t(Y;@Y(hX+=bkw{@8*1R@}1ede7QO^JKvN5yG>K3h_vxayLotU
ze0_Z#G^&5BSK8a)y@I-W`@Y)Wo2tL(?J9rocY2zx>J+DG^X8pv<CP8w4+o7QUt1gP
zt?|DtXvLD%;p??}eSM}h{LxPPUs75+_29vSn<k4lty!b<w1rc63RkPs%GIk^$LuOu
zxoN=yho+w=^kQ}_SQEFnO@_bgq5a=YsZ#?313^0WR)5!89kzBA%M|A}p1@_xmwRjc
z4@yg0w#j?C-lw}^8;e)`{r&y)xu0?#-V3_oTX$7`T~*?&k+v#qtrqhEkxjL~zZoVS
z;aC^H-|zam*qa*?nYU(NpO#>^Xu1FVqQAesCe_z~#=7+M^n7NU@m5#=PTC*4I()sJ
z^OpJ)P>Da=toP^V=ZjabKK<$G>B6F-NeTDjlQ}s#Ig6wl=QxX62eEJ|dM)|#rDU1U
z%uh)hT3ETo46?3haJuaY7Zel(P2-4ahZ$6T$>4O^6MlVNET{sXV^P=?wl?bF<HyQ(
z@3ac!yRfwhI!*CNJvAlq>Z;I|_V(a?HI;7ta=kNWN^Z})JE@>WK<oVN+q0jYon8Ft
ziRUuExu+&7yDwo7aq<w<Xc5R?<lZlL<k&Hzq$3>P-rRIv?A9Bxziuy!a!Y{c%a<=L
zEG-QyJ|vWsmU41)C;$8NGb1a@%fg96lc`nUk?+gbuQ%8It#Wd5(g<G0qo=1A5gA$d
z{G2SOJI6vJzqwYfpP!xm`03N3OP40im?1IqPb`O`>jD9%Kho2tO^ex4(5UF#wjukv
zo|IWm$MosbS(IBeLcEqv`uX`eXbdUu-k#3cv!xFnJQ%VjBJhwP#J&H+b{0MT@a0R&
z?{9B2GBY1Oc><aOUK$#}5vVa~@?=AE^W#^qPTf`dy6Df3LN%Wm4NeOu6toC{Li)(>
zE8Fwq^<s83?A>c?UG`>yV>8>Dn4Lk(as-`fOzz#gXOeScf^GG;2XEe-d3kwx=Cw65
zS(IBQC<SmR+KDhTGuPDA+}N1R{_EGT9!cZ2^LD>`cI>d=bm#bZP(@w+^7?ptHh#ID
zUAwIQ{rk5jdOKfiY%Hfc#KSc_pib1Iquo8y=6#FZ`!B7DG)_A=$J5WRuSL0~Vtrly
zUv4h0L+8%*b&KmKWoK(2<WO9~A>!20@9Xd1A7B4>>fv_&&o7tHH_E=IGs`sl*h1%a
z6&b}N`@ij9TQ4Og_2c*N&84ryczAhLWfWUP8l5--XWZJF?Osx1vN`?yvs+uUOWxg?
z8MxT(<BN;RAQ$`X*|W#M)KpbNqoa|Tz3AZ~*37@ckN{rg5*$2PzV1ikj2RME<?lcx
zgQ2OZs)0ekAwj1-8<#9m`Ss;x^W@3Gvu4c#*_(P=459bcWq*5BEv>Gt+1Dq{oY}c{
zt?spJ*No=22t3-lXV0FD8ygg@%ikTjcQ5W8%sSUrr-cWO9&O#R!veIs;oIBWsaBkd
zNACZ+e{FxzQZGRXiH`n$el|8Xp*{hpE(gUH4KJ<mbv^6XulG?`Z21u_ZJt+i@t1kV
z1qD#VZr!>S7G+(V!hO{ZlaKMt^mFPEdUI~B^_N#yyAuz$?alamdr#%&E&n(nVf^UP
z`OBVvemw3s$-Nb_Y_h;2mS20n>Z_=*u(Px4#qH5pTMtR@i+23F|59aL{C>U5o*aQq
zDr#!`GX7q^b}jAqx3?A5)xis$SYh!kk`D5-YX9!t*1b}uN-8QW?(Xh-F*_XY@2gdn
zfX2|I6D2}UBK4h}oe@z{pzOD0>sC;Ge0iDgrC;ihbmyf7a{Z&zckj+^<CPYYk~;O^
zVDrw3k4m+_zD!JjB<q%nuO&inZ*PD8<>h6eJ^|M=Z*Omp-}3L}t5=(nkMr%>vqvL-
z-<}p|LOyc3ZoXgAp%%_LR;68Lxwk-@Fy(A26drOceDvw_=b!KE|97ukskw0BLeTsO
zx47Pg-R1c(Z*}ZnzI=K6?Ag*6FJ3HpaiMXlc6iYGINLjS?=J0LA)xi@$H&K8^6t*!
z6jlpZ=)~I8-2Cy&mnqlmA^ugo7c*JS7u2MblatdB;R1F3mix`MD1SGnMY*M`;==>R
z#csWeoZI=196R>uZu$MfkB?l}tXZQ1P0@<?<~c1)cyVFjk>kgW%icuv$=idf5lQ>H
zJ!f<^TogZl`n0I-?=R4%FwnThw>Oc`&du#!w{9KW>-&S(#aMy{;X!?)ySq&H|NEu=
z_0`qGr%!ty5`3iG-Q5j3e#OGla$$(p#}6Ms^Y=4m&4QJ7PJ80d&NhF3Vxsbz=<R)f
z{?yEwGv~#foyHqCZWQVhkdFup6T4sk-xk!F{r~T8&DX2pF}q4S7dp4Y4XCl;YGq1K
zPZtsrk}%B@0ZlwsetzaN%cN7?f1ZbfQw@Wnq9Uja+FATODJ$#L!^7>M&XCV9JxF3W
zavW5&hpme_xiR^8#<ew)d3kwPR)_bWoo()I;j~A6d*0nc$Bwnd?X3b$n~CYiflIOb
z`}V>@z~y1a-Cd@~`{mQGt_n>_Pk;R4#fj(h>-}P4=Cmjuu|9SBbn?YTt}|xM+LU|S
zEGIX2XUR(;VRb*3*w|PmAxP=4XafJ8tSr#f+m9a=bLP&yxjkPVG#Gt5f4{1&Z7h?}
zBV8HGq82gTD1+2fB0W7lpux+3|Ne!n4)cW-;Z9p6q@*_G-L(QOAAWqS7u2@ll`=VS
z{=EMo!AGV+!NG-vg$sk1`yDxc{PSUcdjV<b?$*{;m@itgvahd`RrjBla&uGalI6>j
z)6?5$&XoN4@uN_mK>eZ+El@rE_4W1izrVhgyuQ{uV}?Z9`FXv4eSIt{kbra%w4AYe
zrD55dh)0hfOInp^2nq_CWL|1{zyJTdGiQ7_JvbE1%%+{2YyJ4m8&H8h+blQf{k^?c
zR)_n;{IDp)$jIo>;ls_%?EIG&I5IywH`lwFjhBOmXG(!f=hTT44{pC-_dDs`S;ORG
z1urkD%E-#Xie|;G_C8tbqF-M!r|Co<I^52`aqHHl)!*M4UEHHlxGn#F-^!Jm++sQl
zGA}Qau`cU*dU`r2-NN(WBiGp2x%vBkw*B}~0cvTbq^O*oWt#lv#>OQ}mwGxZis<g|
zKYr>IS9W%`oL$X{_51&w>J(PbxVvjBBmk8JoH`z@EqZ!t$L`(5kB)HO+LC#=OH|vy
z(D32)_<B`+eSapQqo)qH^Ut&>R02)9USAh$VPOIC#IauK_F1#$eB*%R=)k5eSy#K1
z-TO9VUREn9DUq<RvpGA<R9Q*M>5!mnTwR^r>aewkjvQ%G^`3TNO{8(3jAhgI?dGAY
zLMAq}2t2CSnBuiGC`rJnW0I<}a&zV9XB!J1I?c1I^_p*28?mRt5VT&g;NHcR!OM3P
zJw4^!Ki9r~-u(K1GX<61E-ZHMpZ;%lV<Y3;-R15J13W7J2Z08q)O-}w)Y`N{SAF<=
z-rl(6MZn20NKMM|G2J}xPQt4zD}Vg{9lR<;Q_W|_0e<^G4fE#BYxyj8>GI|KnwxLj
zh>$YN(O}%S`1SSmpqa{TxwqS<O%nqh7x4T2{^dS1L8}KL8R60VH~06?2gO72^K*ea
zi&E?U{Y-D;l~&W$%wL#wb(M;$>fWo_n>KB_aP8W(5BC<nm}i{MmzS3ZTB($reE3i+
z_sZDa+gg-c1o9o;af&!`fDX6n=<7?nx+?U^)2E<jCumyd;$nBzIp!u+UtVml+BbRf
zWRZq^&y^vM9zHw>noQbL`FTb7dbx=cC$g}xcra>!D+D8}%cZcj0E#UYujN@xP2Kur
zPCh+7eWpPo6KG#Z^6@?kE2~K}W@s?(ZEJ2m%wAjPS^ngNAjrlK4-cO_dp7mysi`jR
z?w3KmT6Ye`dy`7oMV&Z4f<}Up9v$g?b8qkLOG~{Uzj}4*%F5uSUth^@$-LYayxi~O
zuK8ik&d!r2PYw(ZpMD_@wC14Z_uK71e*9Rla^=Yz8<S@mCb#)~(`ym<-?W8=OR=S5
z>94P^g{7pt3JMHB%Nc)td1;byfuX*>p7VLWg@wfjwd-N*d@>rW53*BIQ$dq1udc3c
z@9*aa4Jg~!%bA;-YcjM#+6Dshlcr7;-MDe1PRx!8?EG>D2?rQ<mA*c<v-r8n9Q%#i
zx7RN|Sh7U>h5hYKsobC<;@O#*SJp&Mj@@0hG5fmStE;QkA3_tiq8*#BudkZ_JRc_~
zCOthp8H<93X}ZxySywcIf`U{QJl6_e_ox5+3QZBNtZQpL+xcWw)`VNs{xX?g`%SW$
zjrY;V$H#Z<+$kt5+zctZz)jE=4IVc)w>7c5rv)$fySUI9)cx%3?BvYL%X{+dnU}+#
zV(s}sE2o4V2;WitT~FDq2h?Gdv90>Dl(Eai)KpbLv8CdCN`i!9i@^Jw*Vop9f)><~
z6&4l-4K=P=yY~Kztvh$_jM-hb_D1Q)>K`8z-Q3*jqeYG%IdWk~Ve-w&hB<R&?%cUE
z!#JJKf1b_4=<Rtrk(*l7d}n!BIB|Sr^z>dJ;M7rWR`9?<FLu`vfBU~KK|w-hW@aUC
zZ%uvi;>GoS-$2vKpo%tn`?`o!(4>NnuI~R&JDe6SNIcvY#PDg2>zD7p|Lyzz&YFjp
zmy?4-!N@3x%Rv!R^tP<JeDUH#`~N@rr|ZWbJ9@MgRDP}s)o$aH{k8ow_dLtuwq9v-
zCWE3U9!<^7-JxphUww*<+1<2k*)q@)>6qBq`!9a7^UE!X-Cf2bWzuowN(iXvjfs&l
zGc)^?D&*AB@3N~Il&sb#B_)AY%vXPV;}{-3{p4iz$FE*_y}YzE-e!69_PjT@x4*y1
zKhddi+BC7>-`+0Hy}j+IQuCTMI$B|CJlx!ldH2WfF6+IuHCudk?%atJ9k=J*wW$2`
zr2Emn$jxc%Yw~JpY}Tz?2U_j1a^=Ytfs1F@)mpW+wS8JD=+x2g6UEdj;It>)cb18w
zjm?}169lrZt~&bW=4L}f!^FS8zCu<}eOkoOlCGO_^XAQnsHjO3CIsjhUATC$^5s2`
zRwu{X+j6@@8E4F%-Mw||*KQ_(iqz}}d#k@o7^m@onv{^SSiP7X2QDr3UdkW>X`(ub
zs85?V?Z}ZM8<LKKw;2=`F0B0gtVh!LnSSw0&8c3OE?qJJFDTv_vF88(vcDl(Q!T8l
z#AfHN4$&$sDoQ#(&sN`7PEO7*B}Ii{=FFLYzFhW~ke1$D^wcYAd*0cHhuc4X|9<?)
z5tn@mP8@+9ORQH2ICb#9`}p|ynVH7zN4v!*PoMt1|9tMXHInRnG7kR!>pj*N$HmD_
z)e2S8)#WucHGOwyr}I*;sa;3E$=KJ~?Em-cwf6t$ZZTbu$Bfg?C>R<FCMPF9J3G65
z?b@|9f-TUDt?F4-Wi`L%6K7^-=AL)UpLgr;Q!p|4qF;O@c6V9tn>TNiHlLfU?ho1(
z$t9{)@Z!S4Ma4x60s^+@-3>o?@%v?ed&AsYTdwbGU${{5@^b(5D=PwxuFn^Ml;w+5
zX4%zRZB9R5_xQQBmDQuCProjD{bG`;_Y8}|MJzuj`pht3G&kQqT`qca8n3c@UrjD!
z<i46pAt50H1A_?)tpZN0nI0@o9Ex^ZtjphRNI2NEy{Z>f)2$9m{rBf5!<~J#(hd4N
zrvgKlhfIw8-o&JumDQ!Rb?=_M`;TvJ&DOUS6A(DCB5-klhRFXLlYamHI~R0>+?g4M
zn?7?xs#-;_MVXhEef;wB^7gvBO|0B2;`UlSKHgvd@kaPot>DmTZ>PTvdxYQb{Zbwh
zBGSypTNU_ZZ=ch`glA`F^2l0!S-R4(MZjqVo1|<AhvE_Tsk3ICYU7o@_i`&&YZIri
z+JooM)g8q^BR^H&-}%PH$>mqEcLuF|^6&3&ov1AyOt927#RN3U@aD!wlZ*=uK}+wg
zlYai*wSDnoWzaZ-f&C`!@O24!d46eWYVVC&4)jPGd-(cJ1qIJhkMaP;76JJw)24}?
zo~FyJU~fNvb@+NvzcVR0d1t{xrswD9t1Bu#%wOIYpf&Z$vuBT9zt&EybMc>V2WlEE
zS+eBMYHo&pIonB$Ni70SM^C(1;KZTG6`Pl*$H~cAugSMQZm*Z4<3aE9cW*7)cSra~
zW2s$HpHWqnP35O2>(%+zOGvl{2MY@e3rm?~I5e~Iu880NulxVit5>Hw!0L`iXH(MC
zg~i2>w{nZmFwd9UxN&0;OC8I6S<COIj(`2~#lY0`ulL6A<$khPu3lBOP;Ajq0;hkD
zsY{nGb={nH7Stq9KQ~9w%q-05XM{S3lvIzL?XRUt>x-VA66%4bHK&e8ZoZ7urcKj{
z-4$|30Ge+*9<9@{N)wy1Hm}7=_{3S2-}iit{N^fH|5Tjy>|BUO?Zn-U?y6df;QC#$
zMZk6E591jI%~G<yaep6Mh!kDE6tFsg@u+DjXjlf4rkzA0L)WY{FK4M*I{)v=Gds@a
z{$IP~oxQVTrt+7n8v&O$2}g-)F-({}S9vC<VvEM6luZ?iEdr{4rf)F*D)v8+;aS}6
z(>oIv@08xtzel}a!(-nW<@NCv??1=~y?e^MU|oD*9Vp<yLr6{{u59)-t@l3H8J==a
z-|=8qOSoV3`ksf!u4kWQ5^1}7!A0>Q$c{<i;OdyP`jo>eQ@!7h&R==^IzE=+-#UdB
z>*wu(%LPUL{A9nng*{a|+;Qe?W=qMGIXsHL^A5(}b2@%TZtff7C2#xPH@oVenib|Y
zv2r?lqjh}EwzkaRe>Sh<4z86bny=w_o@K4ndG&<ww29Kw9U5~y;UTtu`OQ^^OXvNs
zPyH4;sVRToH`D$<=EcTlGh?QlmkY>tKHGXfA;kIrQ+2+-U1w$l2&JuGaNx?US<Te}
z+tPSVwk~)uO*3`PtHhA#??*3ZfBV_weOPVM!WCY<`E`dD96Fsp{n2M#bEd}^-Xypu
zPcN&t*;lA#-lTu>n1~a{$72w03wdvMoNE8?#ObQVVq2TKZ}0g3`^18*xhtPr@-CM8
z_2k~2^E(-9Eld*(%+KAa{`SvTC*#ZMh=3`M=buC}u3)*6WpaGQwx;73FWUH>W^)MK
zrDv-Cg)vp>y8E|h=hnUvpa1i~|42rT3DRcm-M{DWt^c*~X%GV!tkSIrmfGLX#{A^3
z*l+jO_oTJ{-{ttS#bIXoZ0*V7w)KCNgX-qFe43j+*V^>emnCj*nditG%-F&cz;No#
z?U<jp=C$lJ_K?^+rRVuyhpk%E>ObBq7i0POTVl!6U6K30G{<gRX6F^;@h5*qjte}z
z^WXJbGh96#R=Zo?wUv`?8Qb}LPj3|(#c3(XPu%kQO85R#Ti>&BEw;D&`btqw%|5?8
zL~Y{VJs00E6*77G|K{%ZA2d$w?R|XzY@XCY)d>v?J@qfm=A0|QbZNE23MQ^?;Q}Ju
zVlMsdO0H9s5*B{H=Oq5}s|TmYR5isZ9!^4&?w;LOpX0H(bzV{Zy3^j9b{kv&o_+J?
z&h&Ye^IpbS7N7nXwsX~kNUMK*ed1ORB5$yszIea0;=~0lC8b3I8cIqtqhd;uI82^=
zI`qT9w(i+i#qQ60=Wkngdul=J`6=&JWhUEY7WrI_Zdvy){?Gn7AF^&sHR$NRe$X;i
zyE{FDwX37UX+cNFiNhU#v{gI}PS3jFc}DoB;MTdmI}Wv<{5Y|$VD0SZg|<d^Y}4`^
zRx7es)^H{5E}U?IS!}I?pkN?}h@fDl#Kz-(DkdDJGd@k;zx=(kZ~bm%$=_4=RmZz-
ziL-M&vsh=rTNcIoIotH-E9(Db<5OBL-I5>QthAt`LxZKOqr+y6a_~-(`unmD*V<wW
zs{<xqShQBts)i%a++0XdPF~?|*MmT-qbKUxoOT@fZk^1_?R)oA#QPeJSJSt;Fq?Hx
zoOAW`LkEuO&);9!B`he|>L4gM(Y>(pi_W6_=VrlmUVc)3+xC?IuRL?9OhEMi?a6am
z_ZB`7n!bI0U;VMpJ+W14)oZ<<hi~X-k~nx&le=-n5p~^XyU*Q!G41m?_QzpDMK9Re
ze$*xExNjGfTlnyytILy%nHwqu1yA15(|vZr-EWHAC06tFf)5(5CkwkfY<Bfuxcgt(
zOl%H!xI+2YRViolj)>LuJpRijC@9DtY_I-g>B=X%4|<Y2pG|pFK3O<9?ctRgfu&JX
z7#{q(R2akGd3d2$@up+(k|s}eJB`_rHaJe)wBty*LGlMZ&u@G9TH00kE3b?Fb39md
z)*$KWsZ}$h(xJ)VQ-t5yry1ARK9RF%_%pRT*+{ZbJ9h4&rK{B|15K`*>py%ipROty
z@{diNZIAmK_xY377cp+S`o`tytho#uj>bGav?Z}+jcr@-?e&`XH#!{(Ve9vrGWWmq
z^JQ-o<KH*5$R{^z+8;gs_teVrY85_3#)1^S>xX7Pd|5Ez!rZjA`mm&sv#mQu_M=_+
z$;RZFK|3lclg`W%Wmn%2^LzH}{qI%Y$z5)(`z^Mxt;Ql;tL-G`qa>#eyAR=qypzQF
z(%v81z5ngh1%)dkc1XwkcvpQWr~0a54=jyDnj5gcKP2<#gQ8K||D*1&SxrPAHC-26
zI8|9m$-lv>O%M#g=@G;Nr2-I*Q()qAS7(=wj*cV6EAGs)5tZHX=kuZ6{hU8m@cb;O
zU#q)~+4#rz17CmdGx7d>Z-@Aq#;2Fc^z?5f{@=LnjzE9cQbVhzr}w+>+}Yds!ubEA
zd!PRs$jmtSmHkYxi;Ih5fQySt<`3=5J%4O(Jr2FWJo~4}_4+s$o^9#|&t;yx_^QYH
zH~!d77W1U-_v~4$LY6yhpO(U$x9;}ARA;-mAJ6QcXuq5Ir}S-=K(Vb%djFd{9d}lN
zGCEhIl9JL&@A|8n7DWtkteaHq(oF3qu5QrFWe;BU`1HxTjK^pABj#9rJ;8Q^L%HOp
z-yQpZmUaxsifTVQozwp#+a6RW^5V+0IdV7OUVd_B;a3eMCI0W9mkV`tcq~ihetd0Z
z?w_x3Rg{wEy;-x?Twie019rb0i@C2di>_$q$FDiv{=@uAzqr=ZP3^~vr7izH);Xgn
z-g(nz?&R8C#SdL97}l|JXWBxG6OUI;4>+Cb|IF)Pu20-%b2D;w^D-eL^NtnT7gzpq
zUDp@R`8#xP$6V<S?LSO@+vgiEtNF<5#OALQlpWyWBG)*p8B{E2%oo*cT-=?#>G-2{
z@kghrn!8QE-S|n$&|UwI;abJ5?*o_1-JHXyv}4=9`UH)67j&*F&X;GMmHXgNSF)#A
z0{4HJs_vwF_bd)fetW{r#YKe4)y2hVvy3cz;Fd|@n~pCk(^r1f>h?tQfz1DosT1V?
z^>go^U%lnK$1M>P6+WKdon0@RCL1>hSMKUp@ZNZHj)bwc;)lCGR`Ij%SjYehI@hy~
zt}ZTe3UBQ9-muS7zV>5(ZcAa^5s`ELcYAm@w8}ru_<gNkhM{nU-@6~n7Cii2zrEe+
zHs`;{LY14RK^bqU<o*YD&wMZA31JX{7F){|^Nwfu{rShwz;;LT!_5S*x(gf}kIyV`
z?x^WeRF$cDHSup*@P$xoVY&WUp9K$c|MxLVZ!XT;v%B7@HE{mpS+9;QQ&Lg_rxX6S
z&-#^j=DnXXS?B5M$!B>FP3wDDb>eYs|LKSEa?GByn$!eM(+z&J`968|^FvC%wb%-`
znKvJwoaSr)6jq>=-0&-?`s0;Xu9BtPa@v^B=9zwBQt*Y@+h;c|E2&bg-+zDR|K3^d
zA(IyB@GSc?MZ!?RJysCzm6ZQwdCv`)D>eTV?mqro(eIMT7q3an7Mv?(E;u*M=1ydp
zkNS?=7g|<NWN3>KJrNmU{~Ts-$dexra_>y~^EPOwa;3IyRr+mCU&r;bemPuOC)&Ti
zejoJh^!4|16!OfM$IE-C9NN+5m$E+2Q5zP=^DOTF6IuPt_s&sX7XRC`1rB}MYyRcp
ztQ4tJ>SnBW_gZe$T=7=vVY-r`NKVoB2FdO7CM-4=`q#s|dsV42H1Jl6PyONldT*WK
z>fk@Q_B<{HWp6ANP2+z!|MR!Dg>2h5iBzw@7^}YdXq3?Z+&(`+EopTV*1fwj!#6EY
z(u-s6c_MA_?A*EdNq5{clzTd4#kX*|LQ93MK2xP0m+kIdUL1BO=w5RAiR0Y=WgYm^
zJMLJuW~aaL{5W6mv+z6qlGRtIJLT5ApK)$)RAY<%T&{gn-ei{lH~Q%7*e_V7R{5)E
zuTbLr)?!7kKYz4YcWpUfm~S_y);gf%Q2Ub(`NeicpLPEAUdU)vQu-vYzgnxYqa!7X
zuT<mTXZ9x^MU&?roH9S@@brXk%hMnA-U}saxJ}et{NY4iy`A^Z@Pbs~i){H^HCN=G
zw`lfOl>K74zGtzeS)FoZRjvEXW2%$(#$KCscV*OOYx8UUkB<0@pNO@ZZhPWwe7*bn
z1sji_W6&>rea4gL-q916Kb3yk;#1Y1U=lvf@4=fM->80Q3{TQ3jJCL#eEM&Evh~52
zac3=Sk3SZ-{2yn1?tb#76)Tx<SbwcEK6lW5|DQu!*Q6<3f4k%J%LlzO|KIYMhp$oP
zE6@pMzpk{ikl~TizlYD>TVMQY53PPY&GQ6ymI+S$&N=^~wx7zMOul<fmJy-Cf)kG~
zuLk9R7ic-)0xhONG`L^@F+k-uh$bu`*XXG#AUN?$%E6_pD|TC~6z~t6<7+AaZ4RB=
zvv)7!jQW?Wx<2w$R8*|sP*RedaxiW4%bNKe9ZM=VZru3h=4STpqm%NcRldKs7qlp{
z?(eUQt3p?oY`(+C$M@sMj|VSatl;%`QDMF`)j?3uQ+&>xIV&PJr|pVL+kBJ9p;Jh8
zMfCP{VT(FCmT<n3a059SlE6SL!p<i{1@U1sbPd16G1*H&N$Jy%b#Z%7E%ly0$+qm-
z8A%4cxIHHpI=4@<eg5v9-@iXULoEaaE1&V0%|5%#sH3C9X4i@p8aB4Jon2j3D_(-8
z`93~68tNb@Sh?(qOo)ri6T_Xmb|o1|$W>YS`}>RfLsQ@->E?r=`BnL>S<`f*CmE;v
zA*2=`IB;P3uPQ;oiR!N3!7XG(Rj(-zo}8R)^J4kcvuAs?!`D4{^=j3k&W;nt%e5Po
zl$88Eo}ZiRd3NX3Yipym8DO<=)%06x^Rl?Kl$6d-hSaH!p{v7E|Nr~@eUH+MhYt@v
zJw4rUrb)>9jt(37T|yd4N}pI*+1T!F_snSLlNDibb$PNAoB}7RznEYDPqF^rpYJ?P
z^?yFHZ_T<Iw9v)n$<B+Q1Ugau#O>R&tx8|r`MBd?f<pbjKUda72D=JDtF@jb#m~=`
z+oWy2$-%>OB*9>XgObvxn3a|*Iyz1Wi;0Q3&9kXAn0@x7-2eOk|GBqviH0Z$3RbQw
zQ4Mf$dBRy-UcS8W@iESNX&%th&b6S*7hJ_jhF;ppkZ;Z^Dk$j54GC+_j;=1H@^^P!
z+})WAK0Wd564PCEsH5XV@$$y5C+pUPX(uKoYPm=y7)YF$sy&@cQL^i7+T&keU#EQe
zJmKrBtI1zqU9G67n4s*=w=Qn)tcCXZ=jK?NvMuWHsIu61WvYYc%R{Z)SH6FK{G?M@
z%T4Ox>({4enP#7uXS=&g@#VdrpPy&m+M?OcCmUpFG^6I{C(v<5T48HeB!L>SUR8op
zQeJOwZMCfXV=*sv)`KU{RxYwhzj*z8!QU%lSyxxN1_TI1Zb}IZRJm(a@<O0j+Wgu4
z`hTDS(I@BTS|6@|*3rSSDedg6hwW{k<@MgvbPnFU30iga`2G9!ym6oc;L@%Q8w_4u
zU48suGrQ)^Kl?>i{an^H$xSBOtfTn0x{!dt0m#Chpg@(oH@9R8uZ`NOWMwt$<YaZw
z_83vs=AV!I?O$A22wFk<=hJEZl9!iUT?GX_o3B`}VEAUd1LUR$2b)E*Eg~2VdO<c+
zgugy^Yist)3k#XWx?No-mVnk9n`B+#Xjtk!U5g<tU~BgEw7<W;R#aAA+?pNU#w)#S
z5jZrWL}X=sH>aJwvA5d1_4T*K9?kqAQ%sH}7<>qNz46J~3w@wjxW(@Lj~+b=k?<4;
zjn9LQlzMk(C+NV9(%08Q1C2HqrJmYQ`8n;>6wSl#^0h~Hzu&j|@6-;DDC=z@#TU+;
z%`v;YDpWgcZImnH{p*@{i<WdvYC9UK`^R!IcX;2rXT9e45-u(A)aqIiX*Rp}`~CX)
zhRMeQR)uIz(~I>2W%L<7kB;?9@A>s=b<W*gQ$foQil6z!#KeS12u?I{2xl@jTz%Cm
zAwgj^=vZ8>#wl5bPOhG><0BS#a?Pzjyg0(P?$=9oHa?jZi#%qD2n#zO?~|1@PUA7n
zx{~ns*H^BFnLd3omZ04jxwp0?UR>n*<M;3E7mA=vQ#mikOnPtC*QA3@tQAvpJ#TKa
zPx&$Lwb7aB*Pi$+&D(q;@<l2rNcR2t<SpuYX-fV7ztbBU7@nP-?Y=r}ZN$bT)_3pT
zY5Ax<dGqGXmdwi)&*zrk*p@48nsw#Dot?#6J)lm%4gdQ+pZWIx{U*KB+iXeA@|6-Y
zJz8E?NhL*#KV^SD`7U;zqI27ayXE&+YD`iCEq{vInzb?YwAin2Z%==DdAYExEG*H;
zu=yZpr_zsaxAQH_-bmQl*~!>c1hfhYR?hpKc2{`D&zdh6-Fc+Vc)lxbUHRu0Q%~P6
zcjdiZy?ajG^Yq;z`Ab|_-EWEid^@c!8*|VKgTlwhV)j%_T<F~X>EG}7t2HK_`SbHL
z=<1om!a~pn_8&iguG9dv(mrje{e0FOv=>i?b#>6v>p?!}B15N?=&9MX*a}Mv%9c%g
zd3pKeoyF;*uJd-@um3++OgBnERP^W)&&e-tY+T%>^!-klRoNSlk`fb85X`kI4GMH|
znP+Iqe%9q!pX})e2b)h$`>v|Be_H^fL36>mdDFG_PJcY-?rhNE_Z1(LL|x~tto-~;
zP(YyJY+CZiM@P9DisDOOUrW8atMtjaxw8)(aB%AhSe^oQYY1pDb=}`z51&QO(b_xx
z!HZeJt|uL0Y`KD#eh>0lw-c1*9336Cx@?3OyZ0}1Zs)70sMt{UHp=w<<Pgp1Z922f
z^OuFJ466C@u>H&H>;B1{T^%;gS$wlNLGu^~4miXVi7lzRZ))-ULs+2EiVpAg8^42%
zd$vV`rtWTS&0ekXDda|u85^I>i4}p1H3Ap0cu&_`nsBg5tH)-_h7AUBwO>QG<=^-F
z{q615i4(wygY)!s{pF>vug$n)v+`OxZ)A~ZSfJ4k>F3`z<!5=v%;CwowPWI{(ABFn
zKDit{b_}$K>Dif?&HerSpP!#Ue)zC+>qK`UVPWU<^K3y^CxA+Uo10RfoLPBIOG(Lp
z@~q~x1kfJi*xh9kv3FKp%3jHopf+)FLa0{ReCIE^#=Oi-79}qxbSe42Q#sr#ZIX4R
z!?O6<g*}zUVe4X6GHCQ4y?Jxy%gf6@KbxKZ>9~A-z~(gHbMx)j9|Dyvrw-rRmMeWW
zddq>dlBGrGT~p2md%2!;vFW<9%;~vgOZ)vLjr;A@{pJJ&KG_(%r(z-~T~~fiyI1j;
zH|^}K*6!}M857kD3?4l=aAKnJ&tI?CbMo;WJAT|<eS?e3JS9{1vkl+g-}m49>ucw}
z)TKr1k3P})vZ-s5XRO_xo@V3SllANEw&mVkRlV)WL{@fo(0XsZm>mZuC^}166fEdc
z@;`cKXEEpqsdekt>BR1uQuXx}C`J3vSjUoffO%^hBcqGk(SU`wbyyGh==t9io04fh
zkNf%g$IqX;dw6toDe=E86W%*vf`FR;yfq9O{-N!BvY^!mX0v-i1;DwvtHI^#slA{L
zF>L?-wfwDa=9;^F;_3)nQP&gUONEz4)gE6Ny!^xW@6T^-&DQE#a;u4z`@{F|$CEaK
zw&gN0Glz;)zIt_i{rOv4vrkUd7MHayOS!TlPz&6sSW@!v5bLYs@h>&Mzw)n`mbYe+
zM^lsCUd?+yv_Z>b7Q6Lw@biZUK55+d|6esIS>3sNmy?TYkz+HPR+o+O)6>(_FE8_*
zq7ylZk(uqozu)f{TStIeM5=Ka85+FrBeu<ftp$)Wd2r|E<|&!KnJcxoga$t0Ykj@f
zU2s4D4bT#vStg2FT3)UbcW=6xQ}f{<`=0OjsztQJdVFV_1#V1o6?L1pvx${^Q{rJZ
z&<P*5RVH(-N|nsa!dgKcg)?^!9XbT^|BsK4r{oy+{0ZH?LZit4C=*-ohG(;LZg1-a
zEu0q?UcAV|Xye9>B`+_vPE>Xe+FMm>mUF{lp;K#!L}lKcyLTfuCb8;9ZAo}{XD15_
zOTpt~yrOO{E{X=lyDC4cfvU<23!O!{fAlz+{#vE+$zkcN|Kjhn?t->a$Xb^jN!qwV
zgUO+y!eafNPh6m5r7|xsTNAalD`scWN`p^#cJA0=Q2fjXlql=||1HnDw&vu~Zt+kF
zLBZAoXH>1MW-arby{Y<p9@q7^?t=DTuc@#6j#+U|@p*S?srQw2v9o)n%`dGE*9WaE
zTD^KTmvZLwhNA}?FD`aBPB|d}TItf?&+pzR6PO5!4CD42HzGibE2im2YdrXEbu?ab
z(V`R1h87k9->!OAOv^lUW~TAW+u`98jW;iL?|=08@!^Bb>_7i}KL7LI@ArmMy;?m>
zJek?~Qa(L72|BH6bDHnAoSTcl2{SDxZ1vOw2OPrI$Mu#^JqNBcC%XUsB<&l*@kg{j
z>FW%eN+V`Ao`683g=?d?gSL)?8sJMjCrg-QOz2X|j7vyR=oZsGl(qHJhD7IQ=jX47
znCoLUyLX;#^`?S{PM@Y^AN9^Xo@gKuB2oE8w&CPVue)iJH(xjnI?`Kz|DPb&lP+Sq
zQ7!xT+ox@w$;vHuVQIJ3v`JqUgI3ta?iMRAFSjarap1<r<WSIX#(|#abIb1~-rAD+
zL{HRVXJ4>^#fqKNZ0r^Xi)rmQoB!sXsDx1phq8NLNT5-peEpw^Wp8gyRCecUYHH%(
z;0Op*xw|r8A=7NL+(i)^lR&$C8=2X&bQ}dIh8Wz~vfW3VjaTZ(hlhs?^@C=gTCaQH
z#fwFYP6(&=?K(EE(|3ntRS&1I+6%jbJg3q&fBbk{-Z<xmL6?~BsZFV;D?T0-50&t|
zzAkpR+wX60Z*I*NpKDbL+J<`c<VjC(TKCz%@87R%yWek&i$vCVO^x$5v`h~Te6q5G
zlRxqNl4CBh)`!2mynNNhU@>d$=9%+stBsP6@vI74d+Jat_sO}|+qsk_ok~hf?(Qy6
ze|l=_mG$xSRlTQ8u&pkGB=x_bW&D4CeO12Aox1YZF)x|e;#C@-O8A$oo1c7s$+07D
zF`yRAmb|-Bt|u9kl$0`VZ_|B!ZSCW!;c=kwOgTBpwRIxT(X6eY+8(s4>eN*2<Fib&
zKYaTZ22P5X`&R}pzqGqN|Bg*}@Y?rTbMDxLid3p8t)G+deu>(Vqb(b2eiliYW-VE?
zBpI|EG4IX{`~QC?gFKRUMuL}7QS#P@hljyu{BVgVXlQg?TI#(T+<?fuKgY5d)E`kz
zcU<*7wz-&Z?#U_X+K(<Je)G#~Gt`^4V1dG`D=QZ-TH;*)_p5lXl<A{4H#gV(dbu2Q
zCZASU3YT%(nFnjP-&2a-mIFGUA!=(Ds37NR%)GuS^>oJNWxRcPaUpLv&(hh+wOZp-
z%pHsU{k6eS58iOhHp^QQwKYr2%W6~Y@3L9u`R972%_Yq9WbW=R4-Zrc2DNbKT9+S7
z+6XEFZf(m2mFWu<l`hTs`uh6ieYMp+dhbHs?G0fJn73MnbydK|-!XYdBRDdybU3&3
zt<-pV<kC{_%nJ(`x98nWy0@p2t6`F+Y0ixa3l=C8Jvrg{``g=}58LHKz)5>c>g{d0
zTQY76&AvGGl&$#WtE-oEDV?9|J~1lfg;Dc|53@}^e1CqVQ&`JO$~^H9%dIV$&ezsN
zMs7~y?UgnU3skwhDrBV)C~0S0T(sx+yWKoeCLW;HiPHkF31*-rXW_F<4(1pJtUO+`
z>ACj$MIN&z?oM$&Rr+h@>hN_z_s{olPCvg&BlC*-{F+12`FmX#I<?-|QK%fcDkLya
z<#P4MqvD|Dk)VZEKR!Gxd3}v{^UXDfIy{ywt!BF1@Zdp0*7cQ#_nYShtvqg1B)WR}
zB9B=mh5u#x&b)TMxXAV9u2SvF&(A^wFL87qb$Wkq@6Wf}?}H9q0F@t?gKfFGRYYZF
zeXp;Jjoe#h`tRRA(5Aeco10b|C{23#!s|hC*{ds>f22$l-rScCd0`XF7;tGz%~b24
zRaeVF>yg=br3&8OG8I+j-BI~j?d&Ym$J_7MEe>4lwkC438)Lu|y_nco(DkRb)!$P7
z{`v|khP|ijtu#<tB%pEr9O$^A_s`v5JPkNiI-@5&v-hZ4X1wG4dm{Cz8V3&^WMN?u
zkds@t$i{uTe*7}OxmGN!tQYrImxGp?%$_})OSy>OZSlmy$HzeX9sd3K>|aw;V^Q{I
z2A8rDSL3G~zB_;Z{IRHiC$sc+pK|_BGp(*A(yg!m$-DlsO934|mwBmW#R?74r<N9V
ze=I<)c8h`rf4iS8A0Ho2HkuhCF)`%FkH`Iz_H{Pm`f*F#dZlvi?~8Q>cTK7e9XbSB
z8V@>mDWyB{$@*KMe&6zl1ItBL{S>>sEq8I~>aZh6j;zo)f8pF*>y?3v*`A-9`?y`c
z?!je$`_MoaxuEaw?#{HYw=*;}{PFYW#*~vnqOL9?OixdCcXTWW{8N#Al*w*+K<_@S
zt|if`v+KS0W!(j33DDj?P_23Xz^2sGE8_Rt1qKG5nQJ}$dR(=yt6-%IWT8>Vo12?2
zFY}c?)-MmL8Jrf_SYEt%(Iop?k8Sn0fE{u5T|an1WwQ3=DxOO1E$J^WEd?E?xnqZg
z=u^#KFPF~;EjrH2%LDCQSM!~<LPM#4Wzo}9Id^tUJk-kl`P=RM&nMOAuh0O+V!ytb
z*|crBw>PDp7UTN0&++52_*EK{6eVP2G=9Hwu{mv$dTmYQm$$1|-}XP9KEKv&zFn<G
z#0CXv^SmWlS669uoe&OM;xVyhqv`&?-=cSwzhAeJ#Z~38-2Tit2bQ1D+6r1iCZ-qD
zQ9kwDo-6O%dDxn{lqJ39sJ8vxU-sbFNng;pS%Hh)R%-Zn98G%IYkm)OO6<a*rCvHw
zTR8Of^+nyDfCfYsWnW(h@=ZengP2~7$F(()p%Q{814>jsXl}ll^X1LW$*tVtla|M1
z$7NajF$NetJ9h-q$^5y9f3bW2vu^!;0fF-xPMq)n6;=BC|2Q3P<K^Vy3Stnc?Anxa
z^3a~j&!A1`+1J-C^_y$e($ca5oW@sv|8_gyIQN!G(ex~roqdxHEhIukPHyP&i(GLh
z+3x0E?GIPO;|+_Rc!)kVY-wu?+MXACZB3-}biLS!4GE3M7(^<?ej6>G2&z#*Nt2nK
z@4@TWr$0VE9x5Ri$T5Xo+9X3jSk33a;tW}>l6h^-7Xw`<@|=G%X|C2@P(dalU-N-c
zSlurqQ10N#$?6|JeoVZ-uNJfl&vUX`CXd6!^f#CN?Vp0u<(-|yKYsn%RQ%je)YZj^
z<*x9LDLk@PQ_kD}p9Ak*?waplG<`<L={GZPbgalu2c7E{ySpqjFevl;ySpa2x2Av=
z;ijFDsQUV9>7ouB_toL+mt|jHcO%CPwBoN@OgBUX<mxBKdL*6y|NDFLn^WYfeH(?`
zug}ryO7Yr$=-lS&B;_P!J;VBcHMh28E?#7F;M7#@%=`On&1UyzUS4)_OJ=aBs|xRp
z-R1hZx3@k0_4PGq`@LQ5uN4|flk!$tz7RG`Jtgw%`}_Xc>71*+#4Tdbm^rJvnsL>a
zxVFX7c~)g_PRugR4w=IK#y)CGM&s($+N;CYhcSqFI)m!_W4+RvAu9xGe}7B;@Zg|U
zSI3g*T|z%t&d#%)4O&?~X|uxB-|G%qwtp{<S394>HfOS3!p1_yUxLe74=Lri*c~bo
z`CudzaM9(>#)xI}cmG!lUpJ+b?Prfm%r=9s8%&SZ$gBuGCiq^1`~KT)%WmdA%-g#A
zt&_{A^Y5Q!9k6}6Htu_bf8^etEt&}l5A9CZUU%M^q_(QsN<cI%_1nhh@yk0VD8Ko;
zQuEl3jHafh6~W8<$|N*F7nXu{eK@spZ7g{i)FrMzFVQaACq-r26m^a}cRWJPBMl8*
zqJn&Kiw>Us`Ox}i*Pgn@ztP(s$LwXjx;OUS|D<e*-DWw@S6NzL{geIp>6{X~{{<!Q
z<@BDd@H##H^U~G3j*APJZ+}v)k?o_hJ!sz#xzcTM|7Y(_f3zZM>50GB7n=4oSN99#
zl`ZYb7qv0+*q^a*;X=^5VrF)}3ya<PK^?T;rLHP>FUGy*jNFzZ8N0jesAY8AsxNWV
z)R-283%a+N`$)Pzn(?n#L|V?^U+&IVdORyqb|v3(-MlD5Y~ruf6%St@l71@uy!ey6
zalw9pj0K^(;$2dWNp60BM30JXZhWU3yJEiiM-HWDNiq|{oEJ2{)4i)vz1>eHKT_h)
zZCyQ;$`x_8;<v<o>PkfF)Y|?>&)zX<(=O?kfyX;nRrV%q-Bdfjb@KO`diGnU1q%fV
zUOJd;zU<(->1l1}sr9-k_os-NWZs(MD{UUMHp=v^#fE;y8{6|ib&=Q6F44#>8G@Cc
zo}6@;Z!jq|{_5k+Q%|1H+9I)U%6}8hUk#b-4fd_Ff1G=A|LcG8XA*sWZ(14G)E!;F
zH1PhBd0F-|ufJ$i{P1j}>6fbi(hOETaVOJxKX3Z{{NWclQ}c(W`{d7i9#;JDu9V%t
zRN~CGjk2D*x~+D8vETAHEAvz7m$OX&E^i7+TE@Nqz%SPQ1_@VUdY0Nf>1TC4Q8!<8
zM_~eE<KAOQ1vTCG`9n?`L~ofsUEI;p5!8zV?M$06LE!5hZKaoevzqM~UteFpy!Q7u
z&=AC(YR$-^y))FNX@A)=Q*Lse(f=i;H?{KLd|q&QBU{u|Unz!MnOkDM{hs{d42)%e
z^r!Wv>+qZUZn6AWyME@||M%+@rirraa;9&+9P_8xWWs&Z!kzBtJR-|2wy1W=X)-As
zICb^no^3zGw)0=@d)wnB#2{O-+*how;_6j72F6D@cXq`;(8^d6P?K@L$l`5?mV2XC
z$Q!%q(S~A7Ql^hCEOee(cVAFc)OB^(+8I`*TD(#w3K|+3&8!X#=285&56{2-?DEkG
zJKjhBd%4MaA9s06hy8VR?uMmtf4g++<?2>2a7<Vq6}+tGZ^8HcnK8oFpHDr0$Yv3|
ztdKv=?@!AkiAVO4w}gcyXRdk2{r|S|&CmCBZk_xe#NyB}X?@+{h4+^S-%qgD`H^((
znhK*q&jQZBD+3lAGCy+Opt$@p^SUQdH@7F+GMwF?)8g)Sd)3`LGwkc@4lVdp`s&KV
zPft%<DF}i3tVy@G<(lMQWP&#|Q)WdU3;lieh|15~o0kYregC;?eNvxDsn4u!Idd%j
zW+WaGlRdu0H!WzsY_3m?$k9Ey+h<tl&r<%_KhGjP^<ZDbx@9NV9&q^YVE${0$G_63
zoe#y@CokFh=iYa_6r&kD7EFJ#)9%bKJo!qn=;WCrM=#DXU7nX&<9(BP&(nDA=tY;p
zmfCb(S>!Z*CS*XUewkeP>r+#;K|7SDYKIF+NSpw5fo7RL{-%_3CQ$1C_Bg*go6ZJD
z?s;3sT;Trc_?y6!awp?=omp(ES@P}3@|)Twrc3^*pI_=BY^E1~WY5#=m!YcW3^(_3
z2P_P$eI;?}ng8>(mrt*Zsup|bc{p~nx#X+q6<0q$IQg4J)Z0Wn$865--le`jmwexB
z{n2S>-<rFdX8pT5H)yv3%UKJqwnr>K#7<4Pz2#&IXgdRFO!dwE{qphg@qTkG7BVXf
z26CKv@$LQn^V+_%5;j@|tUS)N>ACiNGx0CedT-p`|1s=9#uC@5)$*QO*G@{Yab9fu
z<b%ZbA_=)o7eAiuun3dgw!!$#xrU0bVYjsN_g3kJ6f}Cgo_pFlH$3N1{JWz*Kkg*U
z>1tUgG4Z`MU%un|`D0D8X=QE41QUJlhVM$N`+aIx!I?iE-}CKqcY6N{-gj9)W^Wbm
zel}i<>TfyAZiCv&T=DVo-`?Cje4vqercI?$U!P>XOkh7#+{u6QY^qqVzn|)^_wK=B
zQO#LK$D-u^p4H{IUMsl%kg7)T4eO)t^sV);`8U5l^~pRritkv2Wpvlm(#KP`+xz{<
zzPJ4Jj*|XJmi4g*ICE<5?RzHsStI^+>K<F04<}~rI@~7RzJ2p0vG$KouZ4U&b)$S9
z=%ifGrq<2r=O4X(eR^H&?ve%-C9X!v(1HR3US_6_p00&S-)}zA39<FnU(eZLc-!FU
z|4sWN4o*xe=gx^!TlK_5p!n#a_=hWB8lPy$H&oU=Ju4&Vl!t8o;lKUCYZ?|a>uOtE
zTGzz+zlpCrHz>8Bbb+Vl<HM%gi%bHxPnlRIDw7#`%H?0R%<`$aNlQE(e;0{;FuFB;
z>6Fep-)H19<}2%-wz{-#&C9;?;fJrc%Bj9}J^S=sL%JFBJkNXPUuMh<es-k!)U<u&
zo|pUFUOg>lSUuHL{gGU@ZMt^Gk_pNVGP~u9<NPOw-+Z>`3aI5Cb6~wAvy+pP(b;{;
z=jYk}`1K34kISuJ4s>j;(b;r!ceh4o(^W^!m-kqkwV56?fA{Ow-qXt(oK5%1eJynQ
ze7d^(;e6xcoP2iYK0ZEfDKAxbKiqoeoC8vB9}d>a*hrk2S-<Z7=FXpAKOeW0|5w0h
zWc0=6;6Zjv37IoLFU<Zv=l#d}H|JgB_B(ty*eqjHXBWA;H)YMkIregA4mPuM+SS?p
z^jtoJ+vn~5X_98zW=T5*C7n}J9?gmIVl!B=>+ipU*_B${@5)%OeKP;tzW(^a5c@3g
zvdS~2xlbRog)*4r#{J#*oi|Mc)W3RnXQzv+YvBI6+ORbd3z<~~JwM-ckh=sL=*+&*
zu%SLbVB={Wo33lV;+g-?%-{HA-K!ja>pr!pX=M*~FFx7qbo!Owj0W54vneZ!|DRnP
z6X||$)ytmjQ}WK6r<!Kx89twH?YtzK(f!!fLY3%A$9LV?WRzTI@_I4P72WJZ?^WHo
zo^EMh^!WIq_m>&H7PaluE76-@QyG1I>&EnC6Z?4`%By!zyk{z_`7l+fTRCD~ghtTG
z##NQV=bKHcZ=U+T-*xl<GJ&&cDFxo)UCTTc$*!LvxmBa>M$oyFR(n6{JU-N6)Nu9C
z1I_L4|8Cy+WP|F$$#ccMtUq4rJCe4J;i;D8Bw-HgOuuhG7Vo$#GR5SEh>7ZZFWHU9
zT#8Cg{st|5*NxsbK|6dM&))YDTeC#p-rjyZYpYTAwKYB#4@y*){W@E@J5jQ#=8H7L
zVfEYvPj8r=^#5_k=FFLo2QA9~Hr_g_Bv~{6+>&*y>z|+Id%yYQw3_&|nw{eNYksRu
z-5c}H$XB5GR^7Q%Dwk(S?KwK>Vg9E@n;U1m7QeAMS@yJe&G}QdX>8|idG&u<De+GI
zQ~t90t9^;ulg%SP)R*hTf6Z)&{&9azoY;|Dyuvqj9CvzIzHjOF$8Kyrann;Q?NaXE
z$g5DB`R~iM$Sp5^&lj=&TGbQxi<g&s_Ke?Y-fr)=wcEWu_mE|ejZB08v$qNxe9RQ2
zeeb<Ez2R;$<BU(IqpYsKnfAtRZ+7+djR(Zs+wFMe|Nq+?y*#pTw`RuHq?u{2^DZsn
zTphM{QS9!rEjc$q#{z%)^?Lp51)eUKa?B>Q7ySEE`Reyi!79rsMY3v-BX<S<iu_Ta
zoqb+qo}FI!MZ=p{^_y2uWcw-eU|Or>^G62vmUCuY)jG$s;$@GvS=pNIMaP%S-m}@_
zR^9n|izgmCyZOS!*XLexiIl0#zItq_c424wp4gs}%=b<Hn(n)<m26^nVEFvk+F|F~
zINNZm>E?oUhSS#9?>oufoiCWgY7lVt{M2<HPk&6>_k)3BhK+B?+3W}JC#Rjex9`!*
zq$R;`e+WFyNO4)9?RxjojZJ!8a+8%Jh3;SU7V~$v%X)ldXYj_F%~l(or|;*#lk{PY
zhnrrH=e37N7v9~dGN-)U<kFs;OY7%0x-Io>=w16;r~hwho#j5w7x&e5GdF3zkiFgP
zuiv%bXv%{J2~12(9Q^#}H~j7c?QIm(jRGA%uqt%5Q6huSiN-14@~0Nm&r3c0{7;?T
zPxHTDc3I|ZI{MM|x73pd_tK5^-v900wf<Z3<b{0pYwn$Vc)0y%ara(ksn_dPmVA~!
z@$Ypx+lj(v_RYI0V&m$UM;{9HJl*zj%dY~xhc27)?g+T~N}fGs`|b1(KefZn_S2u{
z|9gFT(!=aOGqzMoxAVVG&Z}9j^ZNK+_w}CD2gJ_aOSIoq*x&j{!s6HA<2(}CX$LkW
ze#w6?ykF+<&(h!|;g`jZ%9$6x+i&?%VRZYy&6$1o(=YFCQ9hEptiD_B&Axf9VZvq~
zYsI(!_;9=b>h1E4wWr0Rruo?1opyS;<laZt&o;i3IoI}}BSky)^cv3x0sUd7Tz22I
z_I(Ub3cm2^-ptoA#a~}t1&v;@a)~?ub<Vr>_g!e5+@Zm;tQ@qPY`r7%x%t-aGq>4K
z*j_WcWa}Tb^oZ@sp6Z4-eYfb&s3>^+dz0|ZhROfCRfTq3kJV=jSB_nEYyRGn4`P%3
zZl7OvpJ(f*ho5Hrkh9@6;Vam=xHI+4{xmlK&L@vu1Ha4_oqOYAcQ<E!I`>|6SEK(i
ze{TK^xznCeV19AolG+<B54DplGuyNmiXD4zsNS;XQ>xD$p3Rr?FZuG_@7n(-cbidi
zl}dbG$HVTM7h^ikAG=w6<MqW=6aRL-yl<WV@$*ZI1IH%uZnu#BZ#8*&r_~LM%A?aR
zB(GG}>f;tP<7;1P^P(r&dD1*5@6Zyj72aX_!l|(h^}1_1EB1Vyebw4C_`>3AC7WZ#
z*8Q#*`S^Rn-{0RaFYy#!v}lphoqdm2uiv*Qe0^L+b@gV@>`hIL(c8K!Wy)fcy$hLs
z|5UU2cJobc=UKzG;xoSgEb6h74_d#0^YmUTm#&|)IoH>+JIv>7UKxJi-pam<i%puZ
z`EM{+?7i)?UQqj8M$*g6xhmFTy^r_qlmB%3TqWn;X;Ef1XS<ryX4M+bblv=1^~;o*
zQtoN>=XqvNWxQ;^IIU5p!C2q4dh$QjH@{CM3tubN%+B0rQ_`@oPH)bEwvSJ@M{G=&
zS#O%_wO)VU)3-ma%l%icOD}kApv>lXIO3JF-P<?bWy%)MwcOw9|2iuEnPvZb>-NpX
zWuHVpZh!xAQ`%AG-O@L0Z3?`%@z*v^yT8r<$7}n`=IU3D7FFuc&)w5nJUv|VaazS0
z)10RdW*u3!KyGG%w0_(ki`izmi;|9Zf%fcwd3RU3`)E@~hf{);h?tn$;Wl2&%110q
zqj=QgubzAPnL+2!p>G#<v^0dgO+DV?yU^3HdPbD!ee0)t`}T5)m?q{+q$ay}DRMme
zXZZQ~+iuG|w^wQ2c5i<wKWkZ?vW-Lge!h2&_<N@9{Si4U|JkH=&wg_@u-9$ho0rFy
z{P@4`lV{I{d6jwhqi0kU2$!Bb^s8k};^X+wZx4%C>*kpY*IqnsnmBKA)vmx3Gk#pv
zd^_{kuJ!ljX5VmLHQP|JW`B_7KZcJF9+y7~TD<0@T)}0>;0sf_SZll%M&)nLy=@k?
zHOtk<$EWQ^e#F|aUteAx7LTuS{Q2oAXx)mc_cWEwn;D))fA!mOT|ey?|C^ruN#(ic
zv(j#?+4HxG;pXG`YIAe{cL|NUd@`>d#vA_JAk}80e|h`dy$g3$rV1Qap8P)e&#bFH
zId!wPn!G>rQ{&;zyX`{jAE$eMRo``J+R@nz5;u2B+g5+u^8b63ubce#_c1xsX3p7@
z)~NDU`jz>&$FAwlhju^h-9M?m`sO;>y9d>xWv(S3epyvL&$jx}<w}=BHqT@%X8)->
z_WO-T*N^SqJD9TLJ{nv9+Zg`+MAYve4f5Mpok{M0KF@P@)3Pb*{&G!CO&`90Pk(h~
zC8#1gal*sJ<r3Fj;Tc?@z5ZSD*VjZGcrLQ$qIzuoJlEf6uGAmB=h<-b@Wj5GNjF1I
zO#I`vLr7+Jf&ZPoQ?v{I7qeUc+ZM2N52r!Et_jR@c74x2|2OqvmT&duD>^rOo*erV
zFn{Ta>9vtb|F&-WJxBDoO7xbQKUUA2W)TxCCw=$1$MFeg9rPC4%FO$7`pi`o_lPv^
zZ{H1^I9A#x=XGz}q^i=ltWx&P!`-WtC!dR4DS2P<%&o=!w>!O)f-ii2xkNL=OLg{N
zcJCaO*Vq1ffhMOnBrvK4|GMky>gsxNW3u}+{dm8>zrN12tv0*4DfKen*Kgl-!c*_P
zk7_etF3)(Ivo7|Rx@o-jxmEMkk_z6qexCmMUiq(XcfR}2Bb7y^_waEabGnnpH<x#L
z%*2%Ct0(QX%3gV^#86^p;^s|HuSh0obbmfvGh=o*r(GgzdsdCjCh2>e&ll{vw0XL6
z+Gfc!6Vy|e)H>uZS5)s~`cQP;aZi}B{j(o;ldrF{Ej*HQKicY<($(uTV!Q-zoS!~*
z@%F~IEcSZ#4tvex5<Y(TYB8m%dzY$QeaEERzK*h9re{lh9fMEJGS%i5(|PcA`~5V~
z{_NG^{h_PF9=?3JQnmq9)|Y}tpL1_bL5x0o{9pKOPHXMMb0H`B5*D<r7p}Z3+h}p^
zP0gRQmgk4$lrGG8%(0VoXKaJTZRh>dZ)VLuma)yK)cwt5%?YX>Umd>rAYxhjz2^?s
zOuuf9-Ei1v@wIa^Hkhl+N<2t-FYv7AY2(4)jqK8%vzqe!-F+Q{wJv9wT)Uw$O>eII
z_Po0@43pXT_+G9*c<Act@XTv#BzKp+U6g#h&u5N>VK5Kl$AX?8Wv>`7+cP9Qzi~82
zH+*K|?&KE+dsy{tp4};~uyt{ES!}KP+WL^2HrsR^{{Lxe&WAz|KMH*vc=vIgdPh)w
z*Rdx#_Ybx_*Ku1Lwa<UatEbVW%YVFhJLkZ$jUPWpPFlWVuk(y~y%m2lCfyL(64@R#
zbCYI<S6^QrXy<N4b+vKQ5ssRgnuy(Hx(<!~_h4%QT)V|W3tgV<3@X0x8+3n+?`*R_
zACJq6thw0!sCU25_W7!nI-3o@J0vBXio4O-WvTdQ&P-nZQpIx)Go8NQZpoR$>szbP
z)74Y`<lqBeIk$ILKdi`$RMxkB@^QQ7zt5t3kB9m<@18jAsQkOv$=s%!-^WKE3SXa-
zSlw>0``c{!>c46xti1m9zK)ZxUOm{j>8<*M$Yq+F)6awM`g{4#c#d7IRpOcr|Nj04
z&0w+~sIEQFbuBR?xcb7zNU=-Xk1v<#@y!U|e!}_99kb#eZ|=4wcC6p&Y})nI_pbu$
z*3VBXcdTyM{ym<du$sfJ_`9M$$9{(oGqd|cmhZnN`!Ft6Z7x6GvtQ10z8gNNnpwVk
z+Q#d5c6A+WKHlhTJL{U(ryts1%7OzH)SXSbbzm~+z6j6~v-ERwJSCJvK&ugWH$|;2
zd3B`|bY5TVQe)NG{NBgiquQ*)N?sTm*Uy~HzU{4bu(;c|IUY8TG-AG->ppqZMpo!q
z<=1D;<_GtDwA~T!Eh!=YtZCZC6SsNa*<8>IJUH`u)~*1pt%u{5R-6}Kz3ajJr{_}T
z?p&LCEp^g%&8<t4D#hP<2R<&~K7Clu+Dg@o)pnlU>?XC<>n^&q9*92~y2oy^y8pAk
z-|zdIpWc^$->#;%*0S`Kh_ZX%lh^C_A4@P$_;`O}guxS!UmeoCyH(@&+x=zIoZEHI
zlPlEsO-%aRb;;#6^=}{T{2|QET3COfVevMDf45Tp=S|Zrd)~3oZ`qamn%`sBbN-Q?
zyeZUK;&Szt1;3wpubkZ(+%=~<hbKAw?#*@oj?Ld(x%BYzm_4c9QJ2`?>a4uD&;8T(
z?CZ0*ZpznN@BMiF-j8Jq{;qhmw#`j*@4sKK&&;!(-O<s}vTPw}L04wS*|f)7ug8JT
zpLwEreb)3_|0b;Ce`535@&V6|R6`Z+TJHZoJSx*(r#)*?<rllz)4ASMozr5Wx-RG5
z6szZ@OV(C@dMPEc{r`{l`G%XVj)tzB;<z&F>yJdOHDS-*Y_p&FAx@8P%IQfp_bg>6
zw{O*T7kb}TsQ>KDg<H#7{1T*o?fTH!uWYE8CgwI_Rm00CSqH>&pC(MwE{nC=^+^7|
zr0w0_nro`uuDxd0atxa_H~N;&v-!3@cxRGZ&AY<a;V(Rm>s4R!{(rL2{e^(q(M{GB
zvxR0gy|c_aD`BlRSy*)UxkG<l<3XbY&*#_Adp6xtL!$$9LfPYEylLm=oSdD%Z{^%f
zBSV?{oB3bwdwr6v_QbEC$1QRuh9b+|FLRwV|K2g%^K!%?``E54UNiEJzqoH9GS8&K
zbwlAXeFhWxOP!C-wTUd$t30)9&*^)Tz0ND^ws`LlH#Ge5*1pQ}<m$y7JMYf@@b=~o
zk>#b+FMZp)G*wOa{;X5?KQf;=-tzzK7e`*nvQ12rZ|!s0wA*0LJGm)(aogW-=(@(^
zQIYemU3cb|mnDC$+qYegP7mDt+5E@8|Ey2%@I)lNX}%g!Z|^M8VA}Kexc>dpe|oN2
zA7{otnz^y@?W$#)oFy*T9$GfFdG<7h2g{c&_<jYl)U0#!W@FG2pI28_YIT81bJ6_$
zf6Xp0_g61p={DuHg=Dj{mG0!lwp+feO0f^9TDV8}?H*zNw!Zhq>O8##3E@c!CB-He
zE?n2Ni@0GvdHU)ZAAEjT_f5=BezaTM{KB5C+P-%f-I;4|?+Cgg*s;YRbFJmW9S^^4
z4BDu5Zi~)~hUKemm+TEbc~7P2@T%L)E35<5raL>m(4LWE@SVxEr{b9WN{P)@6XZKA
z($ijVJ)+&gdTPgqWO?&tUkW$#%jIwVbXu$W^7S=wZd^y@zo(p>d86jagtLuzKF&Y;
zBymZfw$FcMvyGcs-IZhG&YqfbFR!QKs$IpOrOn%C2(xYDklrp`zjJy?TE=M|o{R^l
zPS5*(D=kOICdbbrBdXM*^lYl_-kvA>YtKB(SP(L4!pr$fG+(rJ^nA88u}WPS)L)r$
zJ{+`)(<tdkhso|Wpe2^Rv(1iPyf{%*JM6-yRPVPQ!7K-gcTY3UP3AHCT#>fk#wq2{
zPp<M=U*D&mcX@mN;BoCjc~Rki=BJm{&j0ZJ+1hB!!oOjMO77PGxM+7S{!ZGiFY^y{
z7H~%#zG?BXxb1D<!Hqu?*tY42*0Y^*TX9xS<oO??`48&X2?`6Je7SaW#)WDfiz~^e
ze#mh?e0uEWWSRc@+7~ywIn}fC=5eQeFZ_M);j^WuKlV$ny!?1?%RRrfyN_L1<K_xl
z?=V&Bti-lYVztNkmll;VU&}uD=1$6e^<2v;rHwPwpNO72_I|?usPYDGLo=VBS%wS0
zdvngOzq4C+MZh}KwWq_Dy59f)AmEkmzE@#_4R`d#JUJFBxA{KUzy9~dhtrww?45r%
z$K>6&b+!MW{}AIn{lw#L)Sj|A_K&CAzI#-<PUUUs*K3((K3mOMb6<U4WcKs-hmRi<
z54Z6?IX%7q-d^w#t1C5>KAn-wF`Mn1>ha}ayFBP<Hk+<%3xoQ9O321udYis7>TlTz
zFTGbWJo#(4u}_rR8e6%zqqlMYT`!3jwJHALMfa5t=*$v5x#`D(hfi}K{r@xP<orc1
zuT3r9*b^%|JLkZcU03I3hF_bQqZyTR>`!*$vsKYKoeb-h`u;3m_*r(ZXzTpW#QC$8
zzwUj<6V|@X<>TEz>ASOc2H3}cD!%zpF|96E^l+WAh|bd^;ibV_HY)XrzD~-#aDnx@
zNdNg6az_vDQM3D4teAf3X4K;wn~mG#C-d}P)H*OVtW`L^q$uAc{{7~A-|w8R|DAI1
zuMbb{HM<p@>zL=4o_D&qJGU)OTeyCo>*0HOBBqCyGGDCOb9>gqeUqQ9$bK{T%9M`N
z?vnO(HM4df`gYd*zCp!@gip`UgU*fCh~2fNY>DSxt78kY)^EBr?NLj+v+c9@66yKB
zGIt-SKW)DB%cj^ZIyv`m=;z6JF8=@f>$=Tf{#WoNTTS2k-r0%&<L4C{|LjOg-*mm?
zcMVtm{#TRVG+o^p>;1Fv(R9w{1NXlbZ9gyZ>D%g$zc)#k@x4Cw-`9T9pW~Nq%sOAy
z{^9Bp+44N*?y^gpT*C6^-alJ^zq*<AxNx+F#gv@W<<9^6+fs`PKd(>yvE{GI&l=y{
znLF2LPM-ZOEqIcYq)%?-zSrJyC4EWHdjh}wdw(b8vT6N)>tE&hE!W@1p6^{;6S=y%
z_6E;^jXx8b_Pv|&<B#k*gI#m%55}KQ{q%OhnZ5DWfB#Dv=RBS}@zReX`)fXLeYYuZ
z{`m3(FWd3$KmHzw-X%1HOHok~G>HjXMRh&CelDl5T7robc*ecvS{tA2DbNXQeN9(a
z+^W`eQ#Th?6}8zcsWIcM9E*N@=i?^J()D`<1&^OpuXw7JZN{vcf8JWBZ}LU|w0%eS
z$ec)I(lP66z4h)^&E|&b%s*ovoSJ%N@A~D_nJty(oiPg2Oj79=ma$KLy2LU$FWFFV
z`c31FhJn}pKx+tgmA(CSXZp#kt)I?WzX#nhcWRpM=}D^I6%Sj*w>VeC-S$YaFjvZF
zRJ&8OR7T@(+<I2=?J+6k)sxuog&kTS|9Bd!!OjV%m971g-d#IUu<mZ$Gnt(y-sSAM
z-OaJLjQK>@^y|&mhdX!5dG*W|zd3jQkx#8f?;h=+QB)OtKzp^>iIA?24g~{)37~b9
zdn${cotf!8O(!x0v=E^t3^t|z&&hJto{dc4DSZ#ABJDReyVXtA%>KwfJ3rrlwps4H
z+nYd#RILnF2kkJpyQ_4jaXR14&FR;@geJD9nazHAd%J$|voi~;zP?%$vvbm&oyDsS
zls<*Load16>B-42@9)c-pBD;ww>N?@K!vxcP~Tx{oOfZ#Y2V|0vXPt9&K5`ZWnW+S
z@cHxOp!Hw(_t|<+(+Ok{ndtuP#fuYDG=n7!5*T)uzdv^U`t-}o{lP}3{?cwNes%_Q
zZnOAzr;zu%*`B}2I;T2mnrn+p;(LEz!(*<(A|ftxtxD(Jte$UQFIQh*-zRT>uKNAn
z<L38k7Aq>aJn5Wik}1T@#&h8EWzeC}0V{)4!OMB(x7GaoT>SIY{wpuGGl%y(-BNP7
z)F}P@`{vije}UFQ=|*p}+5Els``vQTax@96l8jwa`vY7q^)L0F4w`<`UcYD2Mi$p6
zD^Gqo>!_@(d~)9UMNA7ktR^jXNR25HQ!q06G;`nkv$MY$z6Pzin5v-U?|OQg?#a2<
z=7NHPT5XWS1v)x9IzR^;fN1b(105Y5C+^M&VLEuxp>BG$y=>WOw<pC?Eqf{)m3`xl
zHr5yOL@#1=XWw<T<jCQS<kR0GzPa0<w6Zx`X|MRSYvzh~FC9W=Ouc!0&8;$5xdT$d
znesdSuG`=6|NOm~?(TtW{{No&a^@M8$t*$&NeYhJ1d=(lthKBc<;1x>{3jAPLAy&w
zYsG>^F)pluA0M&BGdczc2)c$%lx=nBh}*%an8+)j;Ly<GnPznP@_zd&qsfYi_n+3D
zJ8Ajy6$q@F_bX)Y*J+}&U3a=kTA%xsnr-=tYkA%4-yNG`{zX4|^G`mSe~ya9tFl+e
zrcOSkz|o}8!6EYJNSL|tqR0!8k9UOjD!YeVTVfG8qjah4DXABKCrqu@7Arkx@@?jv
zs<`Z#nUSY?97L}kme;;JeZ9=Ogj>_umHW4B3SzbRk<Mtm<Q3N;%bOcN*zV%_z4A)&
z+WWK49`8REKe=Y^t!*=w$5*j8<Ryz6tU3F%%&%;Y<=lA>lF$0iKdHS^JJeBt#gXGh
zD3`hI%-dX*Sxs}l9=EBxSbpg27gyFZ|2I$gyHiT-=z+DOz9nxnk}Do-+ZL_cm|CrP
zlKqcytgG-uzV`Trd8c->7-pFTSXvmo*tE2^m7_cK!tPGFu;nLp7AVZ&-)hybqMOUR
zc$fN*LruH-5`He*lC8$T=yP!4zW@21Tc-9bu-a&?Z&&-h@odeSorW`ZeQ=PS$tuF)
zDB$;7c9N1nSIc$&%si>8bQbR7&}nI_Cag57c2@lQD(XeJ|9sWmx+hQBm*1K>v-WAx
z=DgWnv!a&<E_FCKbMN8_{sAj<E=0_Fxbwsn-aS8N^q%<t%s6NAbED<8PcvllKXH{z
zz4|jKnf2kH&IOgn&l%KeGx+BFD|qePX1OUW`}n8ItrlJyTUSl9f4gRKrT)Fc2k&`T
zs;zwEx^(`|hv5v5<g;B(yB$|GC~!0hG@N1K`se5QvH7x<hjj4VCC{?#1V!s#PhD>%
zm@cu%<L*Ku^TUUaz7x2<XXjL_unnuH+rQPm8S;MRn*CG$-j2DR_>bkG)$x!sPxgMO
zGWZ(Su}aF_>FSHi_S;kbed2VUZrx!JyC%r?Tvz2Uwa0JeJ)9<P*XL+bnDcM5hO5FH
z{oDPOE2P(zt1IOPGSqHcleWgm*GA_|OWH>_tr<Kj%Xi+4FIjlgVe>0Dt%#l-bEH;U
z-+iNh^<t0X{(DI(_Wr5&BeT+zOj}Pp)>_>^-9}t0ZAqqRtANR-V%wavUka{Nuk%dS
z_P=^*{=%4+iCl?N7AAc=&Z*}z_hw6UZjvh4Y3ld1Dx$XS>%`8hB1w1U#L}{kxM<#~
zT`jmdIQgmBc76S&OS8{^-|%(;TgDsxXEW4v!WMQ(9yt8#b-}bCfq(BTiupH{|1UI{
zc|gYJ=nIKM=U<#T>Ur(mT$!`Q$y?n5_Ia61OfPVlyjfqidS=#<me=xonD_fGyt_b?
z;f>{-=FMG#Ys|Xz%Acid{C<J+&a#A4;XQ|zHC#>O7N~m1JX>=2LV@N>ebrf0Cr(#~
z<T$-~H#qitUc4yN>;8W8g$=VhKZP-$lQHL=J!{3I*jty3j(Og9)w^Y~?Ndcig6Zo!
zKFX<;jPpf>jW?Q};mJ)^{Gs!Gy};57E4R(GUOj`u`0%_ZHDU7KSvppx%Wt~0;?c{w
zy1i3ES<f6kV|?^ekmA8pGgur-ji059_nw<n_SVI?_PyrJ{c(Czk9NzRIdh4lH{eBW
z|I8g4!is4H@`b@~^_0JA{Yv+>HVKh5-`Zeb^XS}>-TYCU%yam7yF>MppD%R!bw>9~
zY<LP+*Ni5vL!r+s_V>siUf^G^SGer>2~D0q?zUaoj>q?H-E;L_q*z8{v#IR-ig~-U
z?s_Gj(_Cei)v~Rc_j6i*vSU~G!TbM?1$(AiZmng@o&Wbh`PnP~-p=IG*uMYZt@qZS
z_rFwrVVk9QY{xH?$NXVSRwt}E-%~nyW$x$t`Q1K_&2stL%N8^#a5Qx|JY=#s8)9T;
z*HL|1s&<i<`Wcto7Q$)=cq5d2Lkcr?7$z_^HSw6_%bj_=Riubt#OU30CqK6t5?4-o
zxgK@+JMExN;iYq1ayr&Z9_g6oxGUmw*Wncz!s~4kmhQQ!|3NN4RrO-pf)h@sWGA>Q
zJJ07?I8C%KUF_4jMNWOQMD1RP`c(2<$xl-)En<BCWc8je3#KL{oS)C9_HSv>wUh;C
znGdG>+}<x2lzewRtIv(6iY8u(70(k+%@&n=A;DySYv%i=+gdX@K2Gag+0<E;@#f~k
zLjtLbymm0I;rjpHPPl$eAv4>rt=oj!j%}(eTbAmWv8B^ccCAFH?t`0BRlbhj-xPFf
zu26_E?K4;z9(^oNV@c`%^Oei}gL78o%(S1D=e+F0S84x=g@3g7y@`zPuY08*%Mdj8
z`*YvLu~!4kYQJ0c@3);^dD$*muHj=`)t&uwp3If@{La_HI<sV<Sk&XhYkDisz0R%P
z?DW|n<C4=ZwP{CJYGp(}yHI^4M(XSTNu9M)%v#dR_nrw(=JNloXn*egXEDYL`$J_7
zSw&eK1-x9Zv2ZP2)FPEBle;8xPv=2?cJ>^_jp>V<{lhI@*zRm#y6@3+Y|F~BxEbLN
zpT0Sz<$b)cb{f;FQ@=lR-+b7ld^69^zwoC^Y)})Y!KW?eZ=0iK_uV}BM#udB_LV!%
zt~hqw_|2c!(tqbIzr1H^YgAal{DP0i7CGI0a%t(O+l>A1>!z-evhK?eS<6=Ry!yuc
z<rB;96d&8ddywnyUcN%UyfTBt>GSTb%$XYg%yMt0WW|Fz?Q4&<^nEN`XC01K)0_J8
z@*AG`XHV8jvcCDwb^3!*Rr`5?Wdg57cj~?G^-tY7bN<VNLQmxP^7rbcas8TJ!7AFa
z`sli!!V<d|3MALW$GyKSa`%Gc!#+O!Pp<E+86v(X`is3Qd-1XUZyW0@_Ta;LccMeT
z|BGCHo7+Zyx%n5jzx$PPKUc*4OfV`=JF}?u;g<OcN1ZBCEEMlKdq=RCMO#dKX!h;j
z#6|VyO<ivUMPAwdlKHxoneo2v_lIZCXd1ElimZochYc>XUS2kAnRj-LO4x<tlRo>e
z`15J?+=naU6d!$>@bqftPj;?IwmGMQB=>KcaCoBH-IAa{iF>DyvAn<E;kIw3t=64I
z0@0UUL|w%zrT<H76v%wLbp7^D@0G`NS5>8-+Qwq#J9GD`llA;Netz~>@BV!5@15n#
zWrOd!M6TzEEx)ymXP%Du?4rHwX02(7yA*Vk{x6ul^k<!5^`Yo6*OeK3yy+7=-UvCZ
zpY_XF=T?ZvDZl@FXQ{k0UYEqB#St!Styx^hvLN;OvJj4tY?Hbl@%t95W|zO1?=t=S
z>{RV{(b|)C&w6Rxs;OB&F<NKpgyLrh^Mj<sLeFVi?KV#OB~=u1AgcGdJMV%`TW4s#
zxD|iWK{z<TQGn&)nhwhq0aiQp>>}PA5@NG3G3sZx^4DDCz<NgY@M`y$kGanYYe^f0
zo;jK|YxN7E?d@!#XYCw!>b`%pDT~9n;_lw;KU>WUWc#Kq|ManD|AJi?^a9_`*xWyP
z&F#m_@+NT_6jX~UFUdUmyEFax(ofF~`E^f8IvQTul4`*FzU1@SB=-5KKUwo8YQC^D
z1f`SqWcm6fJI?)iELqKd{`kzFI(Ltr;?2Av?|)?N%i^MUqWd?VzIS%FpTDA(nb&`n
zBVmWiCf@Aax0%&+rYD=!W;fg8YrfC^d~W*s4*|c$jwb!|b^P6-xk7=XsX#16EkI-H
zA7QymebpXkzf71FyNkhQ_r8lqCp-$8xn<pikJ*3fCwKjef4<D&<8_zQN7JtLp4Zu2
zbaZv}Qse&kS<BC@+{}HW;*h?K?nJdJTd`9Utf~%*n6t)$irRwG{mtpN(tfV*8G6(7
zU07G$sqC*iwRzHipZLdfzn)JH{-n?FrRKq-)|oG-srk${p7zVtIdb0AJ^xCq-$_|b
zR(73#C~4UO-6J9)0xXUKf*LUf(=$14imVKH%=3Jfs#b``s>ZH~s|3ZXh5t|amb-Cj
z`W=C^C2k*<+H^LyXr*WCNmXywj}ZvtyZqyewe@^8YfU>rsrx6q*1t$vwqPeX@pK2j
z^q#rL;&A%9okh;KBGVtfzP^8Mfx?zB@w%B70u4|6e-)Ll53l_C@VCUa1r2@8cFV(?
zKZ(y(znZ6RvCcGpsoulGb#rSTuB%jjX`N@PRTmskEDkBjTv)iShxlmO++Ve1Z$^wP
zx9$6lLF;CmSS2p6@j&M2KlABHJKlVL@bcL1ZG1{uGhT|6MY(J~G~>8ah0(c)Uti}~
zzMaBTzhu=_@3cj3tox5&S-kw*cJpu*A77sOGj7_(Id`5c`p;tRZ4??`>Ef<c7kr@v
zoJ<ZErj<#ac|J3B$NIf_^B=^0<i1_(AHR6c>3O2NI$c7Jdc?0dxkS2#?^lY_|L;eb
zZ7$R^eV(TNIqv+jbQ4xytHaAa96h}K?3=U2#pO*;!*1FA){~xI<NMv>@9Zi2D*4Nb
zYL_iIzfByRKO9{a6zrS9quO%nreVOnsH#6#qyOlIDP=`e{I;9CojGNJZ?g7!Q=`W=
zPaZ86*?wN~^jgobc^%)DPkdnf)+Au}6v=%rB-U$&HJ)O-uQO+2`|j2cF21bN0mj#k
ztWa;(>_62%+lFy-+x9z)mdEdIb6ULN(&cma9;U7Tep5&EY)L0a&A%|yuUCr7cSlT#
z-*rR#;zsUo(>6yRZf!ia^^eo1+pc_<x~}B6oJ{AMHA!#Tf~Eq^6@9@+uZF&yIEVdb
z;TP?iH9IR;RtojXzg*s49rUB>-m-=j>gH=xCM`<}xOMS!V-|x&neL*i!S5Fx+3z%6
ztT*GvB39iflO<Up8Jgyw>o5QN@p^;(jkQ0|1pL2M&C@XF?#m}L4oBF(Ex(^I!Sq)i
zTm6S^|92g(|K;QOz8ex=4`uGwX)-Y?n4CHBxct{@XOAhJWr2BOY1!#}mZkBwDm`Ad
zU=vsQjM$uy>txTIF8O%d(D?hb%dCDrO;gVc{rSzYXjRSD4FYFbPxH^*xqnfhc&Kcy
zO3JDyY|<-358AsGMlViScIRc<pQ`@+!)4x*%2ONv9_U%>CuJ|@BhiqwB=mGb)Yc>F
z>#qHMb;Ug6{ACMH7G}Y%I(uT(I%aga{rh7&KiSbsGqd_fLg!?Dxh)9?9gd$nDDo%b
zz@cEad2c0rN{tiBjT1ZnJy;mX6}sn;#ga8nyfJSa%-wZweZC;F@`8!PwTkZD$85v?
z%?w_UZy~=l;O465J2&)POy}CA<Ns=fLd=|B!G*WJfBd|paNmNd*>#VW8}Vw&GwGda
znGyTu?xaIK{mUjcwmD`VkG*<2;M}#j`t$zJeQp<h`qKAZ#)`k#tG2H=y7KPoip9UH
z8TMGd4b7_Z)i3|IF8|WHyz2RCo$uLGl=arxmEXJH8R!3}`IH&6`uBGVv6uI+zgf+n
zuzZ(YfLPSDEQtul8uJVu(YTJMm;3iEi@z_n;`Y7gha<Ot&bt09BH-pGPw{2E?>ry9
zxb<^a_ys-P&dr^WLb8l!*0N0VV#X=G9S0-|*}wkQ*^_x%VO5mC`|X@sUt{W)P2Sh-
zRn?k#&t}H*tf_P6-rl~mA@TJewO;?S4VM-LCG5H5thY?3<Wsx;%}b9mWvm!QPfHb8
z+x(gO&S|BTi_?vlbEWz3`<_k@v}<vjS+TfzYS+=V{;_h?bafM=mhGSQ|M9<se}Px)
z*UL(CTZA3<I=m^}&syu&k%nnvTJx{%w3-xoqFzjNlYB=nKi3WajT*f__lsRx)jRdU
z<wMKsUoWWKE}+HKcr8Khnr^w!Rh=2HxMcopZdfRtqV}OAtlvvzVH9gnaPazDw$<O>
zoSdQ=eB#upMLsi)J_+S9@Tu2(uD<tw=Ys4rviBcv|87;aR(wuQl)=>g_`fY8f9?w|
zO)TFhbFZJx@Mzjkzoipytau)I*=(!SvE3K#Kb+dKw$c9BGQBgA_H$;5F-O<Qcy3QL
zu6^F-7wh)<&-={V<&qhajMqzRGG&~rO;g?-{%&ZscY@vI@Js)NuE}v-XAfN!qG)F~
z&ouj*M<Pd)f>MCXgI>>!8I4hKAIm=NXqRP8U(s`r?Hz})Kw7J^yUiO7g9#F>nK#A5
zT$7c0Tv(egubunx)(j2hskPrN|8LQ5k?i<tclx2S+_&9&{*s{_#s%Ip7`Zo?pFSM8
zAkVZlx=EpS`6<5XSCj=C4{bf_nRm`pCa?Bxd`ikm(Yo1by-z+08cUjFD#qq*FZ#4(
zZWGhqk5->H-Q1F9wpe|B%CTK1@4Zz%*JG<bdomw~uDI)tr(bGr6z;e?Bj;(y7R?5)
zZ~J`wL}sjrux%@d6wpu#5MX&I(_y)y;Hc%)<r8O>=P521%~&ET(!YG-wAzQ?Z<pUW
zyzlYzj>&Hm@4MQZo@mpnTsgh|&XO%9!R064%vXr9kKpL<QToG^?w>d7;A7>qqU!(a
zU)(ehYxvM`qVUQ--_$QpG-sT9JpH<uWGS1wk=P++-Q`c66}6UW1%8^u;waGa*+u3s
zN0S1_A=ORwpS>8i)f&#{{p~le?3!oTFRwO}_x)dgrtgT|mJh0W*BKfKYk(9__jtNQ
z?ZEqtck(a3KH!&l{OYpaJF^q^$NoG$&6siJv8~v*Je^imNS_sCiS%5f(s?O2*JSLI
zvY-6&^!3M{{)u%}ToPHAK6ZvzF3Glj6*hIPG>fAEi(n(DXZB(H!)YyMJd)Sj6jurF
zKFm8~!!4ang}XuHPM{ET6!>>dLoCOvdrC8??Q&PCFA<~{G#Dh1r<9@=up=YqAe+*;
zfaW=Ty~^R)`PQ2ok6jgg*J)~y+S$F>+>3EmuT9bPPNOcjr3D#su(bf6+{B|(^c^0u
zxomyDq4<4X-}9Y}9DLmiYJPUz5fbfdntS&3_Q0KSu}bTmXP=l;`!Xfv<?oX%u15p*
zW|=&<uU|fC_SSCG{Y!(NFDwh$$0KKdb&hS$owp0)?q*#-(Q@VFyDR^luI>uyHHn?Q
z<?7>{dplZV!VNfk*PpLa2B+#BnVj9G#@`%ITWU>kd&kKiy-n08@v`WR&*#>Bc%K)x
zN+fsJnpIPt9usUV^h~q5lgbt{>3S!VNS?vR>5~s`kFPuSBGcR3Hoqs^ar%;)eg75P
zB~KQ2Y>9Qhtz_~2^W7EGD}Mi8+gQ2oYOB7T<SE;W*EV;}G@b1J;f{OpUh%8xZokVy
z_OZP7H!ZjKPOm?@pnsopXML;b=GAix?GI^xF+1rKd+tejTw2+#*$40cbGv$C({+`%
zlk3kt+qnFH^T9jQL+h_iZBv7IWZsF>B^RrkX5X9JywlukyH?rrT{Ab|Yt=q-LiVcf
zoyb7NJ!_mEyB)j}d-v$;;yKeJ7gR?4zwacowRweT9z%cIV>YeBVWwZ6P2YX{tk}w+
zL%c4}@`E$JzhU3^_}}8?7j{<Wo{IFf@RLjVduY9c!7J;1{_M?{-dX)#Xn8{X|9a*5
zOLrR_ySlvcX3Vtw8|&%=xBWk`nP1Ex`)x*^{o)MMz`ys7ONytY9$4|7;kK&H{ldDR
zNi5BFzpsiXX-JoZrZf64(|_8lr@ztZ^N&MKK5f}kwt2lWsts89_`^0AuV7&o#~n#0
zW;r|*y71#?ect`Rhn!PecZbyZ7JvMBhpFn{3x)I5=YkIUygza}=5CA>_vM%+a-Z)M
zsHSs-d1z!am~2~eQfEQI;{uDH-t!quG~UkGlcHRE>$>u(G{-=ZoQYF2Z`Mle|8LMX
zOX7l!q>SClIP;(O8~#Y!d6$1Y<8J@q`JD9<=0DjdO|LntT)1n><twc|5j+0sXll3|
zSG}<1<lO0s=8kuwe$Us5+}j~8%@RBLQ$SPx!r$fH5f+6Xw))@R*d4#@KWp2pJNI{V
znV5ky+@Y^sS4A$Eb4}^EnJ1S1i#6)%?R95<Y}*;J>6-MCJ6UQoIG776Bc`c-_1NK9
zkn-W;e!Gvdeg869c+VwRMJ~J6T=VPU<OQh{N)Oe|wP~39^>TVb?K5w-JMpK#pXBP@
zH+%cOA9H3^q_5<<`&^GJ{9)>e{@x|k(n7M5o+&T3{<P%lUFmzc#mmLDCHC_c|7B}a
z8=pC5-k7<2@sR*e0hWheO7W2oyB-B?Ek4%S8MedvQio*k8=0wBBURV06i8iCH$`x>
z^f&o1%QtP0E2Wl~>-sY+6m*LU4KmQ4P<!F*t2aTP63i~_=$hzdrmPqGxL})Ck>*FC
z)tX0k+3K9RvLf1f;i|I^b^DGzv(@_TJ^O&MbVk@+57utM>A}jS%XeR?@Xda4_xzo|
zd1W4^f_VaEy0cC(t<Rjl^Ktc-RaM^-r+cma=g58dx`@z?ZF6?A-`xG*Vunuw-?F3<
zo`*}<t}K2wVc%zssX~u7tStNX_Ri_T_Sqe$3Xix%?ah4p$mXrlQL#Q>#t`eB-xfuB
zuto=n%?_UY<4?1jYF_G&rFyneQ4-tk6uL3))Q{c2-*l1C(>u?yzleow`u_IGg+lgg
z8#qo)?^ZJUb7}ixi`{QZC(VvtG-YGNftMlP(|)rqeOtfo>}fHk3;U9rSMN~^5@2z3
zS)dT(#&bpMx%}O8wvrj!GC6`P9xqH-b(?AZujhi|@@EUHj!tY;w~ojR^kfWO%)oq*
ztKrz5{I`E5guglDlyUin*X<>buHJH*eU`6n?dNjmS*F`QxlG~|eo&Gbapu>H`suQK
zadFR<IZbg|ek(02+%WJO&yCPo3j%gV*zJClcymw9GSS&bvoGH`_1Lq~cU7@*@06#k
zHPar}mpN~6n!Cx(A^Eh~_7LHRYzNOR6YZ{bXa0Y`_W8FZnX67|-}gBlwEBeLiNZQb
z`!>h<|6-J~`X>fU{OGTW2)McRpLku|A8z6O@oa6gdj2dje?3q7mGA<F=H+^iSMEyv
z`E~I%o|$vyCtR~Go>Zx!Cvv>8)FktJlYLE`(P8u2(zY|tiZb@=WjHl4-Tt`p?rMo!
zE9(#3+LnFr<d>;?te>!oEn%-SkNW<fU-{RTT}_8Hr|;CaIq-tFPBFv4{;c_BVaeNX
zRfF~@{r{H!@9-DKwL3ICU)?zRM=AKiFViJEIhzz>5|7A)?5Gim{w6Gy_T*;!nk&5e
zMNAJ8eX}-lq_rCMvj*RNmiTr3PaD&N=lrer$Zp_DYqMFizQ5C#XOXKa>+^|D^JZl{
z-`Tl5qR8QW$J7}Kn`dUsPEj&H?)#~1dw!XMnr27XJ;&cGvK+to7$rI-f87)OWoFLv
zv!S~p!&{}qs$89?N(l3=s$x}W@LjmcH~Bu})Wv%Bmou*#wk*46adtNQ=ZZ<4cbHpy
z=G1geuP-U^xayh%8rKwjdOq!}#eU5f`7>u-k$!mWKyAxg?LTYvu2k72cW@bcdb(&b
zxX-w<TC~~j@62m|?bB<cq%t#S|H!)%%uw_9Sn!wMTC5FCYqsuUV7Xva@%U!A<*izU
z#qRuz_FYf;e0aI~=J>4_5`VC%+1)=<e*Ajc+Q$ob1Td`GxHj^ut3Si9nq4#6SGCQv
zT)q6;=7ZYnq^|8gE59dOu-D4&JePCqv2(xYZrNfIH2wU7>gLtf!WBiAbKX_%kmoM9
zJ#yXKY|^x8#|{-%-PHY?q97dhZhuqx`UAp~nNRP${JVcriM#Z#FMbRU*5*(D`1dH6
z!-FSf`!zFyw)FUOgQv9eB%G4Y^1M!X`dB@;=2rwquU6IDCK;{$I$95&CpCZlpOL-L
zW`p|8gHK}R93C%E<@;7ux@cun=d!dNyE9}H4gXps&lFY{2vTB~vNjVlcu>v$INa)o
z!Of?upM5l}3f<oJ!}jvrFXxof-i7St%i(cxm%2IQ)spj9n7F!r?c14m%x8mfP90;j
z`fbn6zi0a&cjIc~5WbtGxnR{E`~4GS^(-d6efD|EqTkXUULVS{W*z(O>v&XqMZw_*
z8V?%Ej~!pI<#C_ZHtt<D*Z*+#+zse3+^4_(duG>JZK3xsJ}AY1*~i>aD*r9zVV-z-
ziQLKh`hVNs-*S8`ZuIi%^341D)a89-7Hls&&)8@E<&Vj$pF(eWtEB%Yd~16&pSx^%
z{_%vWIqtK<?W+HU=_#=CdiVdoa4`K@)tvWi`<Hc}D9pdSEK8C>Cva=!y8QdA!@r%g
z?Rjg*#AEt1<DtR3Z|e^C<bG)I7g_#h-=*1)HB74B-M7iTus{CuG5_R~`tcX5ABkD*
zHJ<H0Id5mnmQXYOZ?l`a!UG%yTD1FlwsTJBpWb+Cst=>`!PVk7XT~ILJMFQztE&C&
zHn)R5A97h!4=noe_)Z+-w)vOVZw=u|%3)^~t}ooQ)FJgLpZLq=`gaRg^yr)2t~m5=
ze|}52;j@(0GNm&(3U@?46>Z$QTH5li{``Ya;*SSaIO(cgGqH<#^yKupUviy-qM|}`
z-I^FhS|9Gs*~gbLai*q2YqUttj+5fS0T1&Vo}E;9zJ%-f+dYT<PR)^L%(%HzQ`Apb
zyjT98xX+cdX4BW)b=<wG(_&Au2HR5E@7MZ29^K~RGVKrJ=3}2<aQ8+Sl<Br^UcUKd
z>$E$vc?|As_v`2QtTtyacztu;^9Zhd{-1o!E4hu?w|%kfDSY#*cjHSv1GD9of4v%a
z^6jn)divylm|^tigsdyQZAa5jItl;$zT~3L*$)>s1wRTsadPdS>~FVyUVW09_xApW
zO9}?%HO-3rN8=wcG|y<_f+X~aPyAw+%2&4gSFr{^;eK~c#W;nn>Y{Orq{j2u>)Vre
zcP~EL_BJ-pU^W9QUt&V?uG#Olio_O#8htL(7ykeI^2zqoULVgaXQ^VhEpnLuQ2X8V
zcOlzm9}F)2cy(E0t6%v{%jNfX1?`#lt|W3rDqHXjr@I=lZ!G;+<eUik`bXTBKY^{F
znuASCbY|14BrdHx2{W2CpJ%g3zOl`EK5vE5^P4B{AD7omzsGe{1a+YEJGYeY>EmzP
zif>BZ`{@uav0KbW^Gw{v?%ndM&Tag^E`{;x%nPsjHpk37?*G58PsHTdzaK`Xy8jN&
z-aGr+>-i18%7b1?<(2LKzuuR@B!6N=(%tx+!)0HxD!zUaidrok92aviIQi>dUisG-
z{!F^ABsQ~2>hW#RL<xAdsLh!%Mk_f?)M(L6yX|6enjPPzR`bM$`@G!wcXLklb}=K}
z-#MMTBsOxz<)?l8#>jhR-|16REX*G-v;Of|X_I8|*&_#@g`JxHup&)SIoOuxP0mv%
znI*eJ*6#{7mAU609HTY)+T6Rjf9$(_9QivmR}?fSt~~Hox9PxZ+s`3!i~-N~=%2TL
zeX~F-EKK2UYnr~x(?i#_W@wo{dpj+xO}E@2|A#(L-3tEf{c{_39*(}!Z8Nzc*mIsq
zN8)MOJ;txgZ_U}4c3~<1l?-+6oztvkXXzYdSaWZuyn1KEy28+o^});+TpwINUw?G*
zO6Q$FWE=i0TOhmZE@=E#fg_wVQdlFV%xJslr?Y(Z(_da&lE8M&WSY^hgauVTFGDW~
zEc_7QtGqCgtuMFu<D2V0b8LdvNT(TJ{2s`?jr&aWTPv}9dC5!nL@K9N+Ue_QrKHa*
zySP~KT(Vr3%#>M5qUTahT+s<sG+c8+L}XjOU~s@gx5I8Vf0ylB?0@`>`g5jrdQpdW
z7dcC%-!|P)_xHAbQQAqj%W{89FFyM>)z$l%VfxO=jr}Hei!*MSyiKj@k3AiIf8C>n
zEymnY{#9J<MeF~b*_(ZCzD$U>c#x&#&Tw_%S9NOptS^?Y*t!4V)6Um=c6*hA0~Q>$
z1&sxR#us)h{8>7)`ASmlq}@?bFGEg++|K>FW(%KYr0)F}LO#z~TCeFg|9nwr9M#zF
z|6Jt5DW&%FHCIH^cw(LxpIcbPyf(Yq_LpVIzR!8L7wkM=F0Wm0^>lC9@}0}OcJ_pe
z%*zkGl$;O)9tytnpltoim8&)d`8c*pPq3O>{ZnRQ#kSfrZ@#vlt310-^!{dpyT@wZ
zmTEo&4dHrotcv+oVcYK(y64Bv1x2ON7w;}eSg~>P_qeL&-e+-J_HC|NS-AcD|Ayo4
z`VY6}hkwin+SyQi>tQI5goUvC#a(g!T-yK3Efo4V>!Zu#A7(mjw!ERf<6r*2Z9>oa
zb>zJ_U%OxUG<AX3r~kZZNyl=1A~wwll>Jik_v^|1Gv?O4S`=invv%=ykuR*Ty1?UM
z9S#qV9Vm1YT=wzg8zw%{)mi-ZQfFiv!WL}$+47A0{lD`GQSL9?MU_O_O8FP=TC}lZ
zD|fhJM3S$_&#$M?m3)3bHQiw67O53sKFzK7vcAl$5G%d3##PNbtvUbNy11rA)@7Fd
zS=UX21K&#;y)QRk-F^0z>y-^3zSXL|=ULil*1LAkwG%y`CvYdPxp(K&QIU`xft<c6
zH&&?Ewm+`2KmL6s`?&?;eVc18b(g&QpI`P&D&vob{rm;Ls_rdwP~4EII@v_D`BL9J
z{*cSA(<03^%_W?e#Pojr-#WSY*T1K2r%L253ZAt-aQW3%vk<+bK8D3VmzPKyu@)Z}
zG~qj)`uf`1d;3c+ng2gE;o4uzlJwF^v$ic>7XHPrB2Ptn)8Aa%>ilKjE5j!3dVWJy
zEN3IT&e1&!vo3GjSM%r3FXf&YQ+@h^Sxu+=ezsruwn6jD|3!~g*PhkZ%5P8zez|R~
zy7}2f-_tX<)P2r<*V(t|@9Fu!*;xPGee3-#zTlSD?C?J<4&IR21woBF3V#n8DBHgK
zw0hlyXkQzV+z;OqW3=Au6#YuSd4@~BeETJnLkG5)wlY4vySvS3<Mv*|*so_F^XHWr
zeBN<=W!EBu7Y7x(j(wJ2v8-Ha&he14ga=`(4~EapzW;3frSkAk<z}a*TtD1>?fpUx
zo7>tg{qwh6Tpu`LJ$KmRK!f04H!61AxU^t`mss2VbS1+x4v(!IdDbkq|Ge$~+%;v6
zam%{z_x(&~NfugDbM5%CBX(xBpDRM2u70Z3`SgoT&dqgRPX+TLR+UPeIWN!qq3>(T
zhxh8YFQuM1{%6*%*4oI(H{SP8eTo!*5wl#A`Pjcx7gp}S@c-VaDXXu~)!*~`jBf3E
zYvHgQcUHf5aYv=^-e-PrHgkQI?B*3Y@w3<5KQ!6#m*4(}nb#Vd?F@e&zgDaIDSUOR
zc<c;`tWO!6C-Eg3U9a)n&G`7M|CK%FFK=vAj@@1M^`GsT!~J_tFs-`0{cWtoDbt5v
z#iM^8iCZ7mZ<Kef|BTdsskd=b@4Y`aAD>nH{J_j~(_&lcbIC7rq89S!Rq{P4I3i~e
zoTU*ye`3D<6YF`Vi_CprUsK%ZCoCs#)Ofr4PrJHw*q)Z{uTp-y?KiNmcu>$O(YI#n
zo{N8Nd6pfo{Cd$PeqUSft7ZGuudBpW)XQ1EVqNm~#?eilyEHR?yKVP;EX3ltqf5fX
zclXM-mrl94B=qcP{#7gHmAiu>>2Y(-xp+VI^6iruz5mQ(cx}4j())D|#`Eo`KR0$Z
z{S@V`oyyGCX1hei`lpWXdry<hoToEYSUF1GOsGC_bjMD;d9P%Hl9tq;pU>A9x;1Ef
zPC`Z9vdQ6QnLRHvzMlWHQZ8(LQPRrfg$u)%Je4o{cumMW{~L$i-N>F-iFdWwg^g^E
z&K3d>aZd_Tnjb7?Wf^t)P)GHdhEp>*S&f7%BqMCKCbndWdieV8&HDc4=4SQaWj>19
z+TBK}r|w*N`u@zn?#OuGgfCl`E}fTuHCf_sRr$iSC7KQbh!JLcTd%|KYApB2^Cmel
zs-JfMo^W8xfv`)tUwq`w%sdmkcsfhle*5W(Z&!)uFVK;`G~-}jqDuW8!vfm{yXFU$
zZ%f}*_N`H(`1to9Z};}yW7JhYdHMeG?<?N_5$yT;^LjE@yr2KFu+#6py!fT?{q9tc
zC2yVfE|y8W)xa*X_fF)ao=YEC#r$8zo_P|Rdf9*e=O5C-Opm@+eAZ?wTU&qeTvA`|
zpJ{3Sd!t|Nbeufx;^v3C7Tf;&?A2TU^U&}9c%S*z+K*4foGN}B$5S!=*pGdGjCSj9
zpXYw7zfEs>;P!HZ8NWBZI=|0IIHE@@bd`X-{P|g?*-H#qKogxxAKrWRYAC1rG(;yy
zZ!2rNSi)?&h-Zpnqw#s?sC%lR)k2GTA6)Y|eU0gmPG0Yo9A4)Qhb-1}oVoip<pr1d
zM^T>VCzM;-Bv*(o<Gwcexa}Ja4KDLbD<(A_is4_qImLEC$ASI)t1h2AIaz(<?YSa5
zViNe4)r5SpWSKbm&YZ~>cM^<lcJyqCy=U`O?umhD2jBjl>@#!9ir$pTKt{HY`uXuq
zw|)P-&cfRA_?sPuiQ2-QXN#Dx%T}AsWcN$ZlJ@lpSz)3E8q$q%O%dHzaQ5cJx0d}8
zQr4UFr&nC^&{jO+#r@fAlEQ}FCPC+&_sl(Y+2ibGFV629N@nlLe)zsUX``TI>h1%P
zGTaX*22S6z|BmEmkvi>*)2#!$dp*1h52h<$`h8(}neL;l6T3M&xh%`Ae*P4hvcFw%
zefi^S-8aJ=FP`3g#Qa&-vxh}{)uTm1A1_)W$O&%#w!9XIJfzNX=w_XVZ`!xqh(^VA
z&vq(jTc3!i)z;s)sxj$D^i%L~vY-Zsxyd#TW>;;wW~bheHsh*&!YmJ;9h15}OaAh|
z^2^WRs&stcZT{rbv_Er((%GIj$!g}8Pd2#fX)1sffX83{%#GN6%<<T<AfF|79$jz+
zji`g8nWNsjPo(34Fk}q--@FrPA3=lQpwVs)B_`H~RtIwy6z=If_<gpf{!&E_r_2?8
zZ_0j4{+nYl_j$p`)pF^7C$Z{8Y-r1Qe)W9axp!+*o>=HT{8)T*U!5ZNb$N^VH=d-W
zZb-Ps^v(L{BhZS5G}kkZt*=A=X#Uwfd#aY%`&%}lOLg@6yiS7F0#1&1vpcxp{q}jL
z(+t<y^_yg$Wai(t*fC~jL-F!$y3bxtzsI=X_6=5^?BJ(b&;Bd?zy9+7$Jq9XD*LZb
z<i7d)?!>Lrw*UH?>bOg~;h1y!rFDr*ji+6nbyB(F-Qu11m{y+e`Sx~2&O|8{j-~=*
zCF90JrCse`|LdkyuPa}2Yw^PgPJ%28S;TJbGwt4T<>;Opht_9Q^~^c6$hCWhakf_e
z_t#zOwylxzeao&NJ}a>~A=-g!*@J-l|F^r`T9@^%KR4!3iSJR<dZX8y?uwVJwcMiR
zzwe)6|B;uEcc!1;+4<_b-MJl#clR7OlVlZx1pIYLz24SLy`cWeB|0pQ7W}#qGsSe}
zzsQDpcc`nWwe4uxRZ{oO*G%tE&#xELScBX9p7qD<*IcqS%6i)Q*@xFudROdvch$sZ
z`{rM-{lC<2<yrRs$8*-$wJrC(obDdGva-gm$WPHquW_00yD7VrAk!i1^3QbruU{?F
zuE3Fg@aZS+-A?5XtQ@j)jM?_za<l)iWJ7L!UV}pycoAjBl+`oh%b!`b>3+X$p;B))
zb^jajU4G|%AF>`P+rhPT5ojtViB~Bmo5y5bqROeP1M}V|-ge}@a3emz=ozPi$sv!`
zUR-IcoA`G{&W-hIKbf<jP*izpj$6#CV-IttJ_&lH8W!A9S{*TCW#INUr_kAre`<Qq
zR)>6>zDd2tXUn0x6Wz{aq-=A6)SuBHNIK^du{Yo9HDU)nf*rJK)>O#9wdbJv_<
zmN{FTU=*nty;11m(iy#0#hY$azhKD=Hh8M-mKb~>YxCT|h`bmB!Je}(zn4hNue0R0
z(Vn?COZ@PyQ2T7Vx0b(e?zU$)57P?Q_Yc_cFlXb<>HDTX<}<(6;Wedr_EJ!&aVRmh
zX1hnM=`5GuBE)7PD<!u2-o!#VttIF0Ty|T{z0fyVJ9lzdcqDhOnYF~WbNl<2MJ8+e
ztIRZD^qbyxbj|XcsqM|vr|(<|S^9V@)Mtt*bJ<iijS#(S^Ivzhr+)r4bK29XD-*f$
zZA5&&Uj9|{-{#Sy#g=|$CsvEJyq<8GH@0s>(mkjA#p^t$ellBMaNGaXuK1be)iT^c
zF+Bgina}gmdc9lg^Y(nnA8)Et7F=B5sB4yUgYUrP#p$1me*aMV>Hg`9;1@Yl@#L7=
zSs&T8FNNOyd1}|mE8C_jUGn>T)O>#a(O>I5ZGQVmyxu>@pG#_U-2a*P-o`z-7BpY8
z|LNoV$p@C(&kyIEb8Bzc!nwaTI|*;D0%hYj`zJLOOf|T$e(O{NlWAwIUFWYX`*pTB
zfMLNjqr+*&7tKz(<n_&d@;0w)&yM3NtQSvNZ;%bu?U?7O=995zUU`?@<hOfvdWlaK
z&5C#-${U*GW3u9vXaj>;>nXF|TK15>U4PCl3Oz0`YZfDDMM~4kyw4kcU9XupE8+Qa
z*<CXuJy?yM<@hFrYjRvyJh$hu#oL`vo;}-;|Juv#>#q45HFsS<e|!E5_q`gz3D2+l
z|2yk0e0-CF>wgB8nSYwjPTbj1c7C6C+eSIzY5%96oLsN)v%Q>ghE<Bj{%YBsb8Z>6
z-&_3b(u)7nbLM>(oqy(Mv*!AH=WotgpR#VwVXm`>JNmXBl=5GHkCP!|o#z>E-Lw3g
z=Kee!ZTR5w{LC+B*w^@aE9>e`&A(T-sf2L{)^S7TnVg^{+Mv}rUQBJ8N2-1+>8*eH
ze5H>0XJgxlCNJaMOE)%dY+$@^EwTU7!S<7S@uGeG7gujIJyL)7q)oni_HwUz;U#YH
z@w=`yzIV~b@7#{NabX!SoGv|6YevaN4UR*lN8A0kFU~uxW|Dbn3j6MF$-mgdGbAJC
z|C}>({gzL8M-vtwj*d4<Kc*8U;q^~u*Jr<q|C<H#F85kFuU5ZsFM&n3d9V9d{$uy-
zgTAcy3D{u1&uOpw{Kfmq8k&NN%Ugarr!O?AYOP-VYW3XL<yEK3zkOamJ<0xUSF&`v
zaY~sT+vm%BY#%TlV%!k0yf{Cu$NH~w31~Er&0#)dG|#pyBzNBH+u>U6pqfZ2V24=-
z-&yO~=?@z96u+!mS-wFmS4$-|T~pfHI^@{$Ux)ogisuPDjD0qF^4g*+3n$heI;*vw
zV@>Sc6EBNP*@Q(TCy31pxEbV=QMcpBOKb7Ki!OV+s)Tp04Kxt0x>-{nDpD`JYj4vY
zf5#p7Ui`mkt~1NLd*x&MB#-@aSH4uSmHvs3f5kT^B5CoLOJUq>;XTW%Pgg{hzIT{&
zeaYt=>;9%>H(qaZefgVj-R}+7%FEU(y0fmiT2Xg}K`YvuFKh0dEmO{$P8DY_`P5~(
zAuRjk?YE_ZFXPN6h3@Xo?|Pha_l-YehRK4XnadVAx`gq?l)kv}RXX`{Y(kFT=U<J#
z*|ZWT23dltXV5T+a*S5J+~*66<_L-?`$n@}{@`bRU#ILtWSHj1sSfWKxbwX*u1P$4
z$S-xbq)q7bf?TPH7bVjzAJnXz^K7Hz>9Wn2Qx{H?^hp)b(%NNlXXD+Y7n5#XdCIog
zbN@d9HitP!I1YVv*?D`FaIoo=8LK$2?EPN1>w8U)XS00L8!q*Csq&XIOH!}x{dpkL
zJU#r>-J=r<H#6SYTBj#m#h>h``TF&u_c@NfX_Fd{tqDAD`1<d&J)fUy+g#b_s=3lC
z()h75LtV6C|Aez_T#XAZC<KXH)oyI7-QT!vy~$Pkz&@XAIv390ogDIW&iQ@im#b|5
ze|^7X(piP6;3kZqMvT4B+*pT*?E<f+UbWVo<{`XSj(e$R5#wUBmEu#=d`yLzXGd%1
zw>_TO+T-)pV8iCGcajVO<~K<eG)G^QK6!6#tIz3a<!T(}T+8zX8<!gGz4xQzr}nRj
zjcyO${9MAH6}{{7w4^z=wm16Ue|7i^uimbGKSi^nBP*Bxe9V}1p~PEw*;z^N|LyTA
zth_ebm49DO<$3>u&HLVO^`pJ4ciJb<G`ZhssMj}VYO=({hyLn%dVH5Im#(z<s~uE*
z-@Vy`dBLRj)$2ar2)MU9w5@EDK<Fw^8!Lb-iBBuWSgSp==GR<d>i{3vT7bF!ljq(m
zt+x8uyXH&Rz7Joru5bKMCawMStd!>I&y|INiq@aEbj!Ugko#Wo{5wxbzvkYR5=tU+
zt2EDg2;{Nwq%P_-p7a0Ajg7v>8DA3gmo2|@>_+9uJvX=7Jv7_wKS6Zit+y=4w?12a
zMYZO}`pNO#XQe&w7p`V*xiL+(_@u#=ggZYX&-^Sj&dki*x8Jh(nZd`OYE>@&T=(@p
zNv+=fcN&{!H2>W4f3`0*Wlo7|vx4HsWkJE-l4FN_cu*GxcJK1qB|QB=+%?PKC8fFR
zi&@%FmEXHqdiHJn|MGd3i}zfAXR`gquEV!qMP|wdZTh+AqUwxuoLzeY9Ct8Y@RL)#
zyL;aG1df9tr#J#9tXyGh9r@h!w#n9-re&edUlS&;Jo4*cu5tC}J9p+L?3}vo{Jzu2
zRO2t(`uW?`ewN!mS=^oN{=WSp!Kc+%$cO&=5VF@Vw)Vfi^k>`ZD+j98`lAm{G`5+4
z_xD!g>g~r4PCj(%zbs!z@}c)G$8;~ZG%0|azN;QxD@a^WxOS1&ijQxV*fU-C3HV(P
z4P(CAWqE*m@6{K^A9Y@xi0EEtD7_-^%0jOIj(rU|SJ!1PTXe5F>7JJQZ2ydX%P#$$
ze{f6Rg?=p;4W8F>PNlBNAG4-|%9(Q<MsuGDdD))$QvPf5!@2j(%QL;_91VEX{P}pi
z>!Oprw(<S1Eekqi@?SjEjjnat^g{iLh}5}XnfreqnFm?iSJr8h`@fQl+k{{LOGejQ
zo}Xr+S#OF37VoT2xb<mo!dlPg%k!@vj+tEXe0Ax+Lc`~R-`-vCH(PVVhc94jruBT~
zIh^NvQ=GTeYv@$xFAG;#r&4;}aHR&+?^=&!S=JZ!Ppx-*J$LE_jn-3Pu>$W-d|_bA
z`&xO;IOw-oed*rallET@Tee5;oyIFsO|J9bxE?-M&;61u^2C^Z?M00lnzcDLGOy0N
zd8l$9es4TcB5gt8Tm`mkm#01h578XlV%qv|rpsoh@9P%&yva@W39tS9qj|RM85_Oz
z!2;`awZ6Ui|MBq|)_oIP?aw{iekJ7E|7|TVAKzbIJomTc-aD@+-?-BH@x0k*&;66D
z%YU@Qe!IsUXPAEI+1;{Zo8K18z4)qinBB%{_3`-6oF@bGc@zv+%16&tge<nxj9Y(P
zE7~lVX~98T&lYBOvvbQ%ckFaM^>&T?@#x3zzRK2`XIN-|pS$Rxu=9%dqGjIpf5c<k
zYtH!F`#!%~HFw60cZUKlcg*@@=PN8cvv!Bs`u#U<seh~5S~aC`b6v`{AJ;R!U#f_|
zC2w<Wjy3x^osF`s3LH%zz8_zZdCPZi!h#$ZmSY{(EAFQ+ywbM1^X7>Y>dq@TW-z7w
zwOJGWF2RYd=Z);4%z5W;eqP?39J=<3uC&Cw`7fVL^_5Dtiu!9@zpDM}>NydwA~GWU
z#JhgB$L!g9<m_zQChu~w#Z?j8PaSMYIw!Sg)y#^uOQ&w~O4u_cMScF#PTQ+nlfJHS
z5n2_hU~2kx)!ohM=jZv%G-_S5M(5NNO=bOuyAGV|XP#x4v-9yBL+P>?e#`mhX4dBG
zzW13k`RU?ngM?et4#>L&i$7Z<?{{^wbmhjYm)Cr-YBap|>X5j+O>^-(Z=M+|;xAWS
zc+RtC<IKL=>5bC_C-@zoe#~%7xOY?8*~M|o7cxHAv9(Kgp7{N|l8oe@sQun6<vi}s
zoa}S^!rGo!-+4A){%c&#JO4#*`Yn|p9@7^ellbK_E>E#eI<@eDO69NLU*x{dwHHjT
z{r7doq3M?=ue`d++IE-SiP~~ip-kb)-(L4(9m8lfVBX#$xq+d2vyIH&En2=wTNv+1
zW!{W(m~nLN#93<-Il?5Ki#G1uJ(nl;q1wKty0hDI-u5hhWV)zliecfoilz>Shwob^
z&v`RBVUwU_CO1d%>>L-@TTG9hJ%g^STQ_6JrsvNR$}J9vc!i``oSP$aCsj%FYnh+5
zWtlY}d&{(Lr8`OL>g`GAl{z>?SRSUiZ0itMXX*aQr0+@Wx{VyYTXf#q9NNRH!0|?}
z_sW{$`}OYI+~cMx+$o9Z=;^ZRH?Xy_x%B8Z!#}S??*1cTZ`7upIAQ$0JY%C^rl#JH
z9G6pdpdP!q?IyK=9XkcH{XhjWWX;8&y&GL6G(Jg7h-59<B+?0LNpUn42rE8YaLCtn
z^6xb^lOL@Aw#>s=`S*;IXG@F{CgyyFs8Q(P5NX*jntk2k#ElG%%0p{CAA-7j(6ZtE
ziHS^tHJ}<yf#cBG?lqjC)-0%j;<%%8!c2#Q?yT?QYQ<Zklj{%m{@k<mvG0~uOD2@x
zIJ-o9$IEJ;Er<3_?VZv1C#HAm?K|mrxGxIiEQy`tWXZiCYsvc{Pdh#1>4(m?zbbiD
zuv`C!`q`|_1&{M<^U^POEnV`?&MhNDMRe62zvi~B3hLea&6X7`@UfN4ity9|`%Wof
zhe^`oW{HbBFPhdD%N1Qpf7kmW;OGJScUyyE8M8D}a?YoJeOkw}dE!BS_Q?@u1-wb^
zX>wx!XPiIt=D-VIsfd@Q(=`_aZd=3NFRPJq<j)~vE_cTtzZ28H75i3JH-A2)XM8RF
z7^wFonV0Z&_k7R)ua4a4Ev@)+&}Lu!2k~=zgk`Qft&mv9f7w*dD%fR#0!LGUCuj>t
z(_{BLYi_<ssVh=+uRY|Ed^=6uUd`I`-os@FtP7XjJMjCBs7Up;<mp-rXW6va*}Kjh
zyzAtlldbT*WA>A`>ylD4pEUA?Wr>J4^scpeEZX4L?)LF#{|7I}J(c?-wU!kG3yQ4b
zwp@BpdC$*_;x)hD>g=od?Ro#_5B0W>Z`b6{w0bEV_xA2B>8#xc-OWO;^;EWOl;)DO
z`(1ob%697ZXtrcN&c#lf+gJ7-{5>UH+uHAq-^%VL1&(_66tw`3g!9Lk+Lo`fQ!Ktc
z*Kx}CSN&^e$NZVY|A1-pq2qg-IQJ#a4@v$rr)E}|;iTdNXQz34oj%arf3xFHzTIK7
zN`@6PC0rKqmgVRqqy$#3(|NMfy(u%|97kHKRpJI^fAQA2lYAZo&HS3o$ld=s)9%}X
zslSs;4{!UhDou{3aHB^2s}%*G7noGFiV9Z?ytFNAfA?kX>X@~Bo}3>UuH@cms?O?J
zayh#_?+v@%)N^a{KN)Jg-Lbe!)Zjwp8KJCPdzYWDQhzMFm{gV(p8e$P$vxulA11tC
zYBM<`Olz?M$D#W?GdV?SoY-u1{A|717yodQo_^~6aT6U!^L00`uC(FjzI;bTe*cz|
zS?1e=j&prWaxHf+ZT)lCZ$HBquJf#!s@E-!9-F<Q<JhL3u1o>_zuzums<VFfaqoNq
z*Q}KVpC@d;<lFvEx0XrZz(cJs@*-^8mS&y3-JGt<YW~4^zSV}QqO;h2PajX`OXyTl
z)NIs<j?Bw0{HnubRR6PO;b~cy8BY!?b2NP@cS&Pn{mWh-t!N)5@~@on`kJWM2SgZ6
zm6%!=Jf3o_ZvD$-pB6#xUCF}jhu^!XZJgz_=Jw%3KE1n4MONM7dGb6v=icI9>v#|3
zEQl;s7S(Z!@$0=Z_npwktYca#EfOAgpQZi(>2*IrnLo^ML)PrBzh6GQax(+%e0_88
zCi~qr=e}8IWnQ{*DtPM`zo4BQ%A$u%3Nrqfn9rZ@wR8T~jJ1<aAKqs3_kPq=kvZ-$
zKBx60|35b|{~mWF%GYtn#*H68G)c;A47K_1?Ib+IxK!)mJe#88?)e)8-o<*iU-s(@
z{`&8O=8Z$S`!=qfQ~OdTG^w-OZ1Ub}_4ZG9A`Tx~AtK$pRzx=C{JrD%79TH6R$5e}
zV;H!wQSeGkS^oY0m0OCJFSgofePG`6?R(EyXCCik_c6<v_E|8=Z&TU(r!I^$d<0^)
zDH#SnOy4mdZLDZwo$_Uj-LN`~*jXI~Tv)j3%}t)=?K1GMxf!W-+Nr5xfA)gF6~@Pp
zzP@j5$ra+wvnahO?ktbr)UUry9<7?8k<Io-%Qs*}OirVBxQ)*2>F2lT)JM!d7AEJ<
zwPw}a*_lgp^dD^Isa>X{bM4G){#UYl`?hkuwB7z8=Td)Kr&`p##ARNQ$}6;9-sZZ&
zxFs`TVQ8ta=waT5V{a3=wyVzM)V{Vb=cVb)-rnsWd^XmEdTcOKV+^>v$M9mHQ-@TO
z=)9HbY$nrm^*+p8@Nd_ehWPUnF5f6v{QmEbkd;dhopsJQ^R^}Kf~eS|S87J}C-(Bp
zjL)&V5Ha6hyvFgfgm%H4_48kyI8su}lKJC{{K|?1X7BX+18+pcpYhdJy3Ib5&Mot=
zZ1tApOY7fXQaE+{+S}?GW{(8j#Jia<?D}6XadElmt~WuEcFTB~m)$-XF@Jsi>@#;C
zrf-{Nu{Bq%cUSL6_KL7eum8F2`%}L^&uIJnRK*7y*Lq9dd#f6jF|VOu>BGx4U+)#)
zkxl*Dn)>gXF7y1_ejXDklNCqXcyBXS6n)8w%acF9@~v^k?>FrGegyyCp02gv&Of&7
z{r{g|jqV4n29F5@t#MP?R4(%Al+peNMbAYp9oJ@H3|m{cDfp_jYxnkw6~R}v85nc8
zrshih=3Bw&7aUc}vv8%k-{)rsZRVvYeRoh)R-gaqsl2RAP^n`0{!{~hmzN)v4*d<i
z8FJ0z>E<VvX7|nre`k5gByeD9;FY<Xr<zu=E~x9j<(byxx8hHE`|L%Aziz*Z%*bjx
zGq-im<`WN%?Qefx-fX^a>*s^~=Y*Ixl<k>WXxpT@{`-d$7DaVWqrZmb%gM@JJ5}Z2
zXt7-2jlnB(yBd>&w_CD}5}&55XJFBp`tt8J?p-f_mKc8DX>M3~>Gv7Gi}nude$M}V
zbCKH@X`_~RykGh!EYo}P^7ru#@vL{MC9b|uycd6G5@&avx~}fj_inbV2QK-uefrWO
zl5Zy?<N2L$Rb|kP(i3%M2c`dWWM5zRe0pc|-uVZg`9yrOetmsi>eo~M1<J+pm$#ey
zEseY`9+8-FYI*l0PjBz!%dg(Q%nJy(?!U<W^}#<AA8$Q&BF1HR`hC^I5&fK}|DS$x
z(tg69!@GCgkT#lffAigzYZWuLdaGSNw4lD4%{S`AudmsWhAS6yIDi-XHU-3R?DF_J
zD}JWZ*+0gI>dy(JF6v_1X?^F6N8zRC|2LQ4|7d!*Ugmhbxp}ZsN0)7O_r+Pdp^L(t
zgO#tW*c-JqYHgtF&y`xCQC(fWYggDh31lr%O6xgx&wApCxbNTHl~(vV9sB*WqQd6M
zz1hX*E9XzHKL5OC|MTB-g(rrX9Bn&dR2C(1e{JW2|7Q&T9DXM|t#`GgVT$qcR8Hk1
zM?YSkmEm{nK=12~f8I$?4p}nQJ15oVe?HH@?T<UVdX9ge|3bZt-L~Flmfqz<5mj~{
zSN*JwofDSRv*3EI>)V^o6HDS{4z`?~=yOYc_LqH@YlORwPd0zo`$SS&?)*_}wLkCY
zUq657ME~8lKVH?Y{v9RU`{l)y&bFEJ=Bz$=%;RfJT0`gE=*1?kEhmh{no6z8ynn2J
z$+9{8UG0O&|FN$PXP6&;ySL`ank#c=dQVdowNZ`kN%+0c^Gn9<X^qC)jW4M#oZhTi
z#>oAD--(H>`;G3491s7zZ*kQ>od(fVsam)Fk2qN$f7FcQJgT(q$}7trx6nD67hk8p
z60P@|z3ie`b#?Tn^xfYzA5J(b7hPQ~BlAYy{(a(=$YmveBpcoZw*_d|ANnmJ>t%Q#
zo9n27!yfaw$JZ%xb?hzv{qCvW8{ywOZtabf5^t8A;aBi3O+sSrhVl*1-yNSE`Yh+h
zty_;1)(Zu=sMrRU2Y9lniT!p9-q@#_upq6JTWqbT#uYn_>^@7i8h-UZIR^X9?>T;X
zyW_XpQ<IOMk7+D3-*`yo#I(~L!BZj+y;$s~eAxPC(RTACQ;+Ul{m8ONYNP734Kpf^
z>GIXT`&u_8$FpAa<DcSq@tY4ORL8EJSLxO{qhi`R{^RFPIjlS+7NS*CF6UruSGFzX
z@2unH(Yc}LyVf|In3=I`?)8?BO6UI^4SQf)my>e%fT`e{f2n-!(IH+ltPMV~oUXU`
zI;p>J(S^IGw9L#_J~$b^>Ps#2#gu1{^VN*XbIbh_lFl!5zEPRanh-O)ENA_kimj)&
z^xJl>ty^}3b<UozraxuQo{apcTsP<E#p1Q)Z;JEMU!1&KG2_v}HDdB1Cw?9%ZkrSO
z^5UJg^)`n(BVT(SnBX?$T<D~2=Tvu2S|1qn`qA3U;yd%IE<N|&!>1uADB7shU-mbJ
z=YQDtB`u~Yr>e`Xn2obU%$y>0UdtQ4I9acsKYQKrBCTUxLShN?ZhE*LZ<#!0#qGj3
zmh$aKYu&SX5*P)f`*{VXDMhCqc(jz=$>8Dihy%ISg6+wI+-}#l9^%?iWD+D|@w4N;
z)`Jw!y)kJ8)@_2)$t4Y)Hsab@r7|4ZZH{})x%d8Cx+dGR>2LYQYa-|Uzn-%Db^V|5
zqZNwPk#;MSPM)w0WceX`NKrxTjFxdmjP)lK-YY*f=UPPz2n#bCPMCZn{M*&4mmU9w
z=a~jA^b7wmgX7~p89Q;Oo0swpKZNr$`2F8JDKmCo@>);Vc^w^H4il$qlueWENGver
zKG0JY+LL3E<>sgyV<GnWOsr(`Bcav1Ercg_`qm$tVUhNx&rkm7r<W38bIp&hZ7@uf
zmj5WV+vZtK^j)DtNk8u=l`@M~3YMw9GF)(qzvrg#(N~G5Up}?!m>d^upnY?CR_ogN
zg_>!eldnk|MJHr?{HWjPwROd#3BQ&fKl$jzhYQII#lIP3MwCB$-?{MUg~&InlFzP)
z$_?S^>PRuzr5-TJ_rO7kbE;x_H@hM}UA2DZU~{j1UVX&r711B>?5tjTNO9rOH-Y!c
zje?FT<vuO>apvx;YX|E6U3vSS_2lOChNkEwKX<p=G&R6wo@<)jpFQRiF8CI|ob+@}
zq=4UIzpaH8jY~sUg{Ul)uGpjRexAktm#d<<W!1B5tM6Z_$p3S`)$UQ}5q|g0r$o-C
zo?qL{z4F=y;m@_)A5Uk=w{2U|vv)14RI!(fi|c|Dr{{ET_~nsQuEnkFx3kA3wm0>&
z@%J>74{k}JB0^R7Lo*jooL4Hh?n~G7?TeC2v%RABKgm+Re1&`C`5iLZt6RicZrm>9
z7pZJJ<95t!<1sa(X&Z}`-tUe#a-9Bbro^Kq_K_3Qr|u21wUQS4Rr25Pk@YnxtxJ2{
zCO(gF;z-}FR?{GQzA*pX-uc%44|A^6zhBIq6xZIL@xJ!y)ZF-kURNjm+y8O?V}9HE
z*Oz{_oEO{7l(M-u?fapR3^T0jz0Mx$Y`!{U_U9#rt8`2w_kfbe$p`Zmp18T&+G?e#
zz=IbL)8$_+$Z_Q^x~r>Jer(g<6MOHU4ZG4c@%>D7zQDW%@2s!!tV?sea#dp5T~7(m
z&%d{A^Ld?e`aswDvP+Xp-p|`Anfl(EyUV94`ZDwFn)>T!&ENU;X4I|sFFiMhv)(VY
zTdH#Xgo$CIx+`90RJ`zC=X<neCsXbBd#n3+&D`9!`^|8A^r11LXu8RxS1Em~CtJ!-
zGx^lIn%8ubv_++7>^7ChM%y>koV$O**ZRBudr_0K(Lu-l-P~Px@7C^#lFKZ3p5<oT
zT_EOrYns=Ol#bYj*UybDq$hny6cjC*R>}Oz=vhkTGE;446`vjVRtBs8>WMhhD}Hld
zd)_sU=dX?);a_rOdZhTui6%xb-^D*Guz2$C&=<bwXXlv~eEhyvBTdvhqT<l!kJHx)
z&G@k@+r3X{f{B68BTdIOnk!2`+}PPN?d$<##u&cKDNB`<UOux96+EFF>9ABtOvF1Y
z)i~gHt#@CQd$G|Y&;1hp#}`KIt2!<7=+OtOzdshK2FGc)?N2}S*g<Aycf^IAL5~bx
zTQ9yL$2_M$@whH`R*Juc;mP=0vraDcJ(A>MQSkWDjIDYP;|~7GpA~FSxMlYwKd~pD
zyQ?3W2qw6_e3RyRJ4|7^q{_P2zg2rZ`^B}BSpK&N&;MlkZtuPuv!4~%id~i!KG?uJ
zVQq`3^5%$1_R}RNeoV+^3)!^j<WdQ-_dU-h^-9LRx)pGNNl&KBN9yC{gyfVI?b%0|
zBu<5Gem3!N*qy$_(g}RNCucl5v~F|m)m2O8oA<|_+IQ=x=6rQ-|6Ok;?|s$&MaQo3
zB%|S%vXw1Yh2vhE{t`}m8rhRQb&<=QFHgGpI43XT|GQcGXOCM^=?mwLm7I%PkNq`n
zbbIf&pyJ~r78VwU3$?FK9PxbkenaZB{5Lm+S*yQ&xTmgXQh&=t(B<L&)9N3SuSl<t
zU9K7W`|Of*=kxNuQmrOmPMIDmI5aJ1%Y2iWvR`*SmVWPhyDs>{kEPN_a}M7$s+CCo
z=8|}^g5z(jH&ft^G_M~$J@)^sd9CIy?!2pMv+Yi{<DWv0{5p%w(}(UhYc@5mJ@9D#
zu}O>b8P$5{pZ0(KebVtT-DV}FmyJ<OPZimumG_r!dffeL8vh)FRPBV56UxeXYP_Z&
z+040lr*Mhdw}pDW|3g=7G5s5T`_kT7s?Q2PzPTrKuK(Znb!NM(T{e8Z^f!FIFxP<z
zs}28HsWZ(#a%{_i13#8Lym~m_b|*uLPEup_cb||Q`9VL*U!^^|UL{s@?ULB^J>R<y
zY`U8JV`(4jTItk9zAvk<`xH&z{A{|Fn1hX7`TLKD9sb3n&AB6eaFg)&YnIV*+*MmP
zq^2ZG7r7qWSW_x-|Lwl-f7-XMIq{w6yQ!R=$YiI*3%Zu%{nz_B?NjL7pQmceFC{O#
zk-BAd_}s2qG25r_4>byf>KM*2UmVtIKJQnY{r4=-v#Uf`-cK`F?q!mC>DAmFQ_t}=
zRGur3+)>ANw)_5X9S#nT18ZE2>moPx#+iP)yZ4Ce!lKS{quEy%xvWz^zfruZO4QF!
z^yP=gr<NRF_O<Bf${6=O`t1*mmr7r?d()Z!Xp5O${g;KSCJSpbI~kt;tQWlfkLP)}
zs~x|em(SW0&A9y2`%BfA)Nb*XrXPRdFZ8}~%c6tR_p6`nl-QoGTOX@+c+1s!e{QKR
z{Ay%(U^@S=tqXSK%Dw6>@YeEB@11{of#vL<2K%@2xd~4EQnWAMXr|E@mXyfNsfTr<
zCAG@qmoHYFd1Y;~^Xx@wIT1p~7A<J!J`;cY<^)L&OW~sn)^40~Cb1zx`1tzSa@9uZ
z({DO-p5b(jE(rLe9IafOV!Svb&7#Ea-&Aq$pR#99r1P5R-HF~VbMuVG-!{d>j5Z75
z^z}#Vmx_kYJN`|6u12g1dtF`K`Zu=LWp7Trxw&~~%}=AqO(_@sJh+q28$DRtZ}ItQ
z%(Rj<A3l7Zcw91ICu_Fv#~PDU%@cb+6}%2pnGrqt??&OR#|4Edv=_eqksL36^y}&E
z2InUUw})S;5)&13Du_@?=*ul*m;1XdkR^_vtvFgnCP%(P=h_`H_x5Vm?1%@YP09!L
zb}N4ivzijOw275FXj6*kzkk2;TNRZwo$flSRz^w`YG&0eTi5+;nt)jHq4%FIuXk>_
zXZW!E`N{RW1x3#HUcVCi>Co2-J)5N3Z7Y8-JCR*lb-3o*tVc6L?!R2Qo|nJ(XvUV~
z7cc(PTyK)}{=tdK@u{uJ^K90aM6b<xTezk6)~@J>4_^G>>2F-yth~*t#l<CrrOW1w
zgLrthR=T<JhcKDKnY}5jHMR2>zFu@pLSSNwSY@L4;r5-v?;`r7@^5b_IFh6j?ek=>
z&G`?d=_}W}2nq%YYJB=4WUzVSshvF$j5~`}W&8ro7tQ=LgE28@v*bIzoc5)2C4WB9
zWZu!yVY55<UU9_cbp8$Py4ehyPo4R5=;y}f<44-=Jgj;YD>!{!j8e~}tiM8n6HEM=
z51;ThyxaNsz}g?b{Z=IX{JQ>y`@U~?zpt5j<%U(}2FYS}?tj<s_Qgv_X?O^FiskRS
zKkJkV->(#nV=I0>Uh(L|i%dnm+?g({&-*-?*U5Ox6r_H*^q4Ei#pROeR+dwzr#1R~
z<VauF+nDpL;3Mbvw{L%0PZF+{FEkR%4(d^wq_c*5&v~8y%q^|1O<Q&Sr6#Mc|1AFF
zXKC#b^Tqq5T*1wf;8xce^D<q|NegQIa8X$*xp8alHov)HVN&0MTwILi@!Xfaa_Q2f
z3)#mzIQK7I?5NVqvAc5Ci3i;gQBnIYecu~xuqJxBW~z%y>YvZ&?eFZV3>MdLQE`=Q
z=a;v$QD#p_Ogy;DRGY)*q3^m+XAO+6x94R|SKiFXuFd@I``^t*jz4OXFItEiURp52
z^66(O%_*FDtg|@k6OK!tNdNu7=uNMH(CV7t-6w8Ozwx`ByD(7EXO6|hpru}WH+|QO
z*<m0jCkMKql%1VjD|FQo*-vL4b#QenXsNZ$`2BBY&$Jo#kBU-a9z@pkvrV%0-<Gm3
zO7f_ry?tKvbEVn7pY(o~t}l)`A^L}3=IN1}n`3qgdM$fnxxujh&%Yfq56)jqR=24B
z$F-16x+vx3mq)J!#a$y@t}y>LJC<_fQj9adO}}aOwFl3hwJlw$y8r(_Yu7H3Kn<ly
zN=AQDJ~BAB^QAsHF)>9qq_=DCwB#*kmMAN4mD@10*C#jkaJ~1}&2|rY>?QgS9y@WM
zf30TX|M}JYk00IKXV^6Jxo<;bU2d-C|M~Zl0>c--*k|OXxIF3N!s{RB-kPvsw)#xV
zVl`RovYMx_3!a^ke06p8@spF)e||cxFKL*>!q)6){XEmD|Ki4|?j?>9?_T|X&Sy2{
zbXB}!v7ml`WQ}Q^;ju~gKWulmelt0h^^wK<?X5Ex?>nZf@NMDu+LfyNzm@hqe$aJ4
z(>O(nA#~N0g!A+253Vt?uk?9v_)(->&DXVud;ac{{rvq%Lf9Uoe6}}FS=3gQN+17m
zk5_KvJOAi@jcYt>4jwQS<o|QcI$k%a%3WRd_xTfd@6N6J`|II@2M2bQzFrZtbCZ+c
zM3KeSOpSudZUxWJ$*TFxIPh=h+RELBleTg!3)?8b8<BXZ+?!>x|B>Wu#v=?f3^(=i
z{_Js>)A^y|M9Rd9qCAtuY0lDl|0K_?vR3;SAu}r=BuZNHhS9_FM<)4+tHgu_Tju{h
z-g4LG<of;penoLPcx3S2*;{R{r>EB=Z$Iz!bbaH}S0Oq&Ca2y`-k-~3A;sg6Ah4Pd
zwiY0;F4A=NuGY6g|DN@fyXSEpDn7-rZ`Pid_gv?%nI<v$;o4^AJrbARcyF8au2g)3
ziGSZA+tM|kUz#q?G)@s@d))r#=XZ%yx1>4jUO0X=JN$E5B)iTv-6eT-uUo|3=Ov`9
zHLIBM=x4%(8nrDe8CW*%D35jfcX9r$iDGYlE%Uq7x-C7LUt6m0)53=u$}aPs_WRE^
zuvB1+_Llv2KI_Mahdr{^)0AAh9=v?%d2x}eXjjJ)jjxwIzMR(Ie_?BOIFo<g(+5r8
zjvQYZpO97lFzAq{X9Q0R<NJN-u6D{XKg}o2dzV;aXWaAtntPG;yL%7sEoeKs_kL&N
z1?!J@!XC%JTK{+PyAAni!S7;h53LAX{Nn2B>)EmM&F@t-uZ`Y*X-8qQm~NEIWHsL%
z#n1c7u531Vq8cw(eW2rF#-8;Fn^ekg`rp`5Yx{SVO8(b`Ehjy1AAKG9{GR<xb2ZU5
z#VhRh^F%Q@Y`?#MNlub}htAX4kMDhrHJH1ge!nV<+P*(0E@@WGcx0&k)AIhdl@oqs
zm_^n6);?+=V7i^3oBwXjyRQaPy%N0%Th2PJt8tTke*V`@F=_AZd3PnO%XFq{g&sP7
z+?{nn#|n;`tIPc6dcD53c4uLZ?LY5b$zA>78Y^!yKbK@%C2s7LVA8xNd9s_<-U7R6
z`qy0rf1kKNaf^bUTGQXn(Lyze^380#Ngo~@T$!}jDD~8lPft(3ytY<4dVAi|xV=>|
zJBwVc-^@H$BmZ@Q?$fLg)&6E?zOs|Qucl1juM_J1{HS5{vy^)iuJ1p$+<a}qYE8N3
zkIr^0rhIOEu4S@f{s;4tKk7B-T%4Y8PW$(SwV~KMz39$=&RLsn-p&!e^<Pczj&Dd;
zy86O}3r+Ix&0*yh`|z+`{?XgFzI`&5B0U{@gkJGg)w|}T9h+lW%)!l_{M3uTXWF(Y
zncr47c{1~ur9Ihdu(9&?Vd1OC<NKagED`m)c1m+?XMgXr1An%$%)RNkK5p-ihwbt~
z*GoATPt3f$Y@&I7TpO>n+uK`PBX<_5s(MYiaL!vWu%*T2Z@GR#Dr;ci_g}@*xo_ny
z?Gtji^nQ1W8yz~t@!P+?c=1%xXBz|WL~jxO_ATuE&U1-B@?s}$*mEj>^7U5-^JZPE
z^bWqAa4am?M!J0NlC-7uA3fe&i(KmSuAS@Fqp8`4ru80K^YzJ>32sw1nc6(AI`Mf|
z*;}czvrLo!|NC3=?#|4W!ON4@{nT`Iaea`@bL7sPqut_{R|G2W+4C$RFVnm8*kVy1
zt@N&r#qYNqKO2&vkp1Xw)W%)!_20g^{KJAz_tlxT^?P63*r+V5?&q`hwN=TBfKN|P
zgKjKY5xksFPEIalb(pWU;my_!oAQ~Dz1bPQMpDOWbHYC1{agMRWnP#WbXU!2f4S_M
zpp%=<=gvLk&YW)lA>#K(?iCeTMtjxYd(~`C^7$0wvDW5%;a06zY{$2$#r#+mv-ini
z=3O_QF7is{-uABZQ1xp2<(uaCRNPr3rux~|=8yNjqNk0$0`JfMD^{GMHP^L0hRdSz
zlZt%p7s2P}<|g0RkeKqp@s{fo)4=iz?i)63N_u)~>Ym^4cF(!}(^}p9pZ~%oiEU@n
zo_Opjs(-g+)d7=0hFLy8s%y@b{tw-;bEjeLuP-6?OzF%0=0@zTG7St2+_7WFhN`bw
zZ%?F5Z}v}rcX03X-Jbn5UnWaGJ#}Si_4G>&Jim*Lz31;{@VS{Po_S?H^X&f%j5v>P
zIJieL_x04{J4?g65)Ul;D6mZ~?q3b7f`boBdHRm6TWhYE#(KYbBlJHn=b%Vg{ujU5
z?pxBN*1j$2SZWX-A3taA+?@pvnJzE)4`<X+(qamp?K9J8W8Ph>dv(8a{qiMGOTO>q
zQtkb<pv(1;=k2!=%jR9EotJrVqiB3#>V2h{n3#wS35;&Zw|30joOrk`=k~VVGc%1Z
zuL{*(=0AVk+?j8BLVmxgwvra=>%1ELeB;GpvDiEPA1-R}-#PWvXX<IQ^FQ_M{a(&1
zn5bMP`8lk@DA|MGZ<cxL$q&=^T$r2ijBQ?}(_#Zz%k{T+mA(dDJOA+EL(t*ada=7S
zL<9o`XGo=<oYdMYZ636%B-1ZP;#8%4dF!NO5;A8q<F0TV*<4XN<y`ZnhKADng&*fj
zURf6_&ClO&V*b@>sn^sSd#lY|yF?b|-`~e0ZRTTbdh_YB)yF*;9{rC!Fg^ZnkLzPa
zo)X=etESKWytl8XWFuq3y9w)mq-KjaKmWFP#?)ssecq>?o8x(Un(oOdn!@+)-80F$
z;<2!!qwByGo$7CIDk>{KKAW9iRC6>u=GJzPg-hlNO;|4#mCX7huQR1~hmhx6zubSF
zn{U@I&APg3P1IJd?AKc?XPuv&zwhL`ySrD0tQ2~Dtaowr_PirUGV~@rNeO?Xoc4Y7
z>@UTy-L_83Z2C4OW}B{j{b%;AyqxiS*}BhNshY|8I@#Ce{P8FgL92qe=84MgmsW@C
zbBXDk02vsxw95%JFq-N3?#|Aedn%19KR<o^;?nsY&-QyQT*CM^Hn(6?+hOa&GuPxT
zSb1-!>}1QWnIcb5PftJG#(U%D(zm}S7d<)AxP5zh(DZB1&dwH=l3FD@x5sPRe)IXa
z?#$b4AwB)q^xPA1Q-V6bv&~9MGbp{gQapZ!Z`vm{J#8JUZxd7FGcpb}m>unUuHb&n
z#`C<3O;YtEC1Lk)GZAI~Fm2fceN8j7Y3J>J_kgZ^?-JF1^!Ty!#-yVnJsm4Je7r&4
zn09t{rSfrUnYbIBv;XG>X{?+Pz1Zi<xzuBY^Ev9ig)`2$6?@0Hz5Y7SGM7%FoLgHu
z*F<h!*8O(V)~%^ePfaa(cSllRU;owh_2;){U*B?gvhy9+JM}xRyWSVs-LU>{`SM?>
z^Y2@<-}n8lwc#l9|Ksx<E_dsmnZc|+y~XoYZMmHKll|x0<C6OGow!Tx9jZJTt5f)D
zQbm4WJWu71_a7$YCkf7&KIdjA%f6ls8w~E;xiiDMT#r}EB;oe9Tnj5JEeTL^6%*Bu
zn{zfU?#Augg#~rdO#c7Z1_UWiw=&!D#PsnJe-?YWnh!<WBjvd3^7DNIk50*ovD`3a
z@#)_I8v+!~^6$-gr{--Y)w^T&ZsVLA27EFW4sUO5wJdzpQugO`Xy%VU7v;YTx6asB
z$Zc=twd4G|8~L#xIXHZ`o;HiU?K)q7dElzsCEM0Y&5oGx^U~YjPxUMIZxWpvdrN)I
z*7tULt~37yGygk%?MNkK>?-j?yF8XIam@cFv-8vaO|1;~-!ep)<vSJLUH;?Hy7>K|
zE4*&*C{&iUDoObI>gtVKw;<k|HdQxzTJOAh5z#Y?6V9o#%)h@nAZX(E`<sugT_JcV
z=_4n5xccWM^?FZrXWqEMb^7+lIUyk{AE&&0VfANviF*0FJ2`hZweG3>yzF+atX+-8
z<>mg*54CbzSX*ySKQAZC&|h*tguUM~pU-Ybi{9kFKNDVfta@H}<mv1Clb6N+*!Q$V
z&o1wz*fh`o+fB^X9&ehz@0`GqKaP7XGILIJoV$7=Zb{;$5|NFw+du98epEc_`JAb*
zA14_c7TMlsx?A5i;LN+ojVuh^#}==gd0gtm*>+_q<_$mFzw+mFzq}OpEYEX>a^a2T
zp@#b$%9xh>%{_GU=FGppzZ<8V5J)>aYpLf0R~OfWs=%E^sn^!VcJEZ2R=at2@$;_z
zPM*B153H}1=*<0*nw{+zoi05qWV_$5=V50jotg3B?s`V6pyp`xZ3QpRf3vViNk0AT
ztcYHW#ohek>3XrC3tI&R1x<2pOxT)z{guOy{uR-TGCp${Z2tu*m>-hMx!bj8Pq$*O
zd$K|PVFq(`2A-SRpYQhT3U|$%A^GKRrfDPp-Opl2_gC|){VjQV>G0j6_>!Cc(diwB
z(vNW*d(*k-?h$>>)Y$<cVXuFfo)u>AoOyY*_-m#-+wWG%u|4;McdP0w{@(v)j|}Gl
z)%lfQ=9RPX9R0C0{P2>Uyxuien$NK*FPkCBGk@D`i-sNAXa1~{S2qY;b^E=ER?hc7
zuN8m%{Q2<Bn=?C$pU<!=)lzov3(-(oq-3ynW#neH)6@0aX9RpZo8feWxAcA0TAdyf
zz3)8X(T5*JRNq^&CvNkC&ATlPwwso1GpoqG<#KY8s_~g^yLaq(@b+!*+_|#zYd&!r
zr=7XL$asMF+1GU*N54luoOq+v@ZXR3Caa>ReVTiBCzH#+-CbT^cWt~X(yv*@xVlVb
zORDR<?@3P%9QE|JvMU$Z8ujnoUB3S9-~M$@4tBlOv%`GX=PhyTEG#VzU(au8dUkZ$
z-!G+Y2GjUgI8KijdZ+yV^5xkFjvUH5eP#ZnI{wR?`!avo?SH%H*O}Kf;c?IPBg~Tx
zjy~Fcac`XJEHh2_zj}`iv@iB#zJ3;{eZF>n@3ERMw|7rtJnQ}Nb9DHr>A6k8KX?7v
zq&`cmd71I<^0VwwjPuiNjW&IKef{zRM`kBy=g56EmUnlThifQJS|#bDHu+pf{Gs^z
zzpb-pOHZ3w9K7>4TaFBu#*`^G3AsIgZk=m8GvmkW^9EseD_^Q!+P_!V@0viJX4m=a
zE2bJOZH=*Mm|vZ&(^Hu9?bD*Dtyz*r?UU90gZ9_e`pvgH+rY>Sx_;r;y$2^A^Q^ej
zw6$%P&fUM!LD@4@7Vi+yo3r1`h3CeNy^iy2_-s}_PCN0HJ7%veqnO#AhcnpOZ$EkQ
zO!C;Ld+$Hro#tUDv897Ech05SH=yg$!q>;8oSijwYxecD8|O^~J)^G({g^b%Eceu#
zo0~g%_O;BfPG01+PHRed*`dW8mu6kaK7PY|wPxBfjcaS-<}UP~)+MHU3RH1LY*b1<
z*0a#PU+&1UV`)9T*L8z54`=xQJ9JbtZjZ&!`9Ta!H>{s6ulV>#M1PL`Os)ex+l$?M
zrCiU?v(*e(z_7dgeHf#L(l4!Z9(I4f1Uor9|NQg${Kw+*rA7Uv5h<tEB?P_b@U_U`
zt$C8=+^AhRBU)<53;EvTPt)_StcyN6$!MnHwG6kDM}D5@m3pn<+IrWfcm8hekAVl*
zL~LxjwKY5ahmVY`?9FYt(s_A#XJ(uCuMS_oDdAw#7t2Wx?slGiy!Yq3<1<UD)$I)*
z8j4hG=S!7c`;~ptvZIB+=IhGr&OD^_%;yq=apJ|772S^?y|}yEd~^EwXKT0LOZxlk
z>ytAxgWH=rR&boju>1YS807Y6v-3X`=Q}OU%Qo1)E!;q}K&kSTo6Jn%XvX5biAORT
zj5nUYy!xgCqq}j@Es2iqX7BLF22#F1-i3u<m?9)uB541;zk_r4!9%Uwg@uKI8l5@0
zxshA5MDOk@O}@4!^2+-7`TPI<(*Ao^Gi{>i^Lg_>Pg|-gaQ0>L@s2}{N~T{;r|{X$
z*je&OLOp-xhE1CmeVqQcqi@^O+Us8`*XLJMSj?~cr73NevmkD7l}_|F9&2lBSvI9f
z%U;YokYZNyBEYRr#`9WJ=*k<FtZt`$xLl34ytt74X6CI4A7(F~0U0C-(dhqpxBULc
z{r~^=JGF9cEPCqIC3dxEn$pUXz0&3x*VahZ)zxjuzRp)(Uaq5~Bhu5c#B!Iq!OUI>
z!=}2wzY2ffv41oFcGr&^8m&r_VLmPbb?<yR_E&b@ulw!G`XIPR#&S~T<z<DSyPdDB
zJbZ8Oto9I<troSvOwOG<_v7bJ!;A|G&(6$T%oN~qNoOm|vI%*2cNy0HDk++h<?(ad
zXG7yvrzfZAYz!>_Q2f8+0rziE?@YoZV}etq8|W(O)YH=>c-Wquo$Y?OjaRhmMD~{a
z`*xr-e|EMx=%SSQ_5XG{2~ITe)os*F0bTc*cXyWb@oSIwFR@<sqoP$w(k;e^<#+ma
zl^=)f*m$LuFnzGB{c_Pg=kBhlJ~NFjE^_5gJ2z)#fJ>aPzrX*h>+AhJJUC8I(+vhU
z-JfzEoHf%h{hUlpZ0yde3(d->e=l$gKgsbZd9kIzdS1u$GEjT4?#Dy+)=x~ULRTMK
z7rR>{U;)G4s;`gg|NoxvBv^U$*}1vhvu8_(g@xtZ+|;V(J4-`E&{O=1tVt{A0?!?V
z$!58?TpC}^58U*zpUd@<Zg7nZyZDypzb0NSd2?f;q;Xn;K}OJr>4k4^nM#@EocQ<m
zcjo<lwxA^S?Afb0y-Alq7t8(s_uZaLM5AH(a&=YjY2a!hU|wuRg~iubSCgNgn=4ZI
zZ`Pu(Z5kq;^{fj{w*0>&7jtuahnQ~EhJuGqXU<%?sbJaC)^=%6WwDK|ZRUjqjGv#K
zRSs6`wb7X*^ytx}7q_?T8>gRJQuFiEn)v<m?(8hq1~tAk&g-kFwA|X7y*dBBU6-kZ
z<HM+0k*+1$?$hf}9<5Kiup&_T->=vDy|bpDNlHq3aciqK=r*3Dqg^`D+j?Xyi!>xA
zrijSN`Ry)y8?nF6R=(x~WBuQ+;xmoY!wi%ry?bHwS34;`|NOCDY0cPOChBH9Pxtii
zcA8kCoN!k_@1Joq$mWulL81+s2M!%-y16;sxZ*>?pFe*J%F3R7d3l-3O=aetJ(ZLB
z?f*;wRRh~{qpz)vUd|NY62fxo^yzuF*0as?pPe<o|Kvy4`W;WU2Q2iMRema)Cw=>W
zx2;)ML7m8J*RFA?vi3@ucHOW4Ki4?@oXXiqH6{J$Z*FeBvND)GK0bcm&u7xl&dpuj
z9jP>F-Ad~h*`Q0#@2!oSdwRNF@Vc)#Mbk<{H72Rey=^$N{yNVxr&cad(pwX`S!+so
z%F!;-Em>E&jE#*!3A~w&SETC%_l^Da_P(>to*wI!295K^?XA)j5%m0iMd*i=h(<ue
z+URW`-1^Kd_N*{A@6Qj?nDi{>hVg8W*(oO{<=oj3$ofG0+sEVbn=3!3ty!}s<KiOL
z;%8?9HI&Y0US8%4DvlmJNU*W731R5!=yLFkJlZ9iym`9#w{K-KF&CyCxLmI-;%U52
zCgRY>r?r|PI}B!<=U-z`U4L@-`+cX^#qK^aU0)uQ=bojARPud(etvo7=VuWS5f7d|
zb-ldISG21mMdB6TuUS8T{R+B$bLI+(6*nqXBe_C?G$yTzwcj+YncGe^eBGQkw{}i;
zYUR?Lay@oR{7xs(HOmzr6y{o$CLL<wl<0M1RsJNAe5@z&NQdB)^YiEPN}C1jD#>hB
zQqp32diB=6TI;W`udAO+@LighePYh;HBJ*lUS8|eeKY^^q@2jNH$Q&AKg-5abZ7B%
zKF}q3r%!jc@k$$&zl%Ams_{ww<HwI*USH=gFE0mOdDX@%ePXhDf0N)u9{0PB!Hu&_
zvo|Fj<?7%)obM)lC~r=H#-vv)bMKyQ+_`q+=B)>_FRO_*tn_(yW@hB}JlWja+fx7j
z`U+}J&Nk22l9=csC@2Vu(em>0tn2H}_Q_g9g1K@psFSqFwY!6Nf9w3++JcSGMZ1<r
z9TfjwoKVKSM(6B@I~y9gM71=ggq%2ZsA*^MbEAX<4By_}?cTiEIQ`t5K!ZsSS=rbg
zJ$iI#mT7jz)m5U>W;qA$-HSU2T82>h7gQk5u`KRDGy+5_-@c2nJ@s#<uc5K)#<Z_l
zt3p>V3kWi7K6s$t{@;oG{eO?Wyu7@ytSs&SzrS2=Mr(?np9@^>C;RW`bNOH2-=A+{
z<=(JmONfEeBBg+!{%yIpjZ#mE9AZBI==nRZk{P>-Preo|`z-Q1t;VOOgaedow`N}t
z)5ts`9$(Yg-p+pT;6YGQ^PO!X+M8m-&L^{=?Cq^N7KKfm!fFO(Zz6cunwvm<=Pre>
zFE1`;Pd?tK`sG(4sQH?cvcYL$xSr7qrQdgAJ2`hBoL~R%WV?LbihxTqb~7K1h={nb
zCerxX`T6SuR1V+RQ)vuJ3K|+6pz*Qg{_?F#peAwY>-+odo!j|TpL|mMcD6`a&njl&
z32kB7s@}cEc1n-Fv8J7w(KvhdY$wmfSyxsZ1f}cA>il<im#3eZVaVlrX>TjH_@ynG
z!98xuv(0iZrOyBjLTq{b`~Cjsr}g(g`L?tEhW&QYj~fHIT!UhKDirE2+pGO~rdjv<
zt+`dniv<Cfb_6Z;dieOU^Uk8D5{5}Ed#k^z>FN0$^w?#RcgJE^$xEkmb1ZM}ELJzo
zz80dPr1XDbCDY}GcXxMxJ}zHBVR4A_sqJ(7XI_k1=+V{DyQ}&Bmt!pPch0}KxLDGn
zV1bjTyOWdCj~|cwZ|*A9*3;8FGt-z|T3VV_`K41$jZJ**SJC=EAK6n+O@Ur*=zp-I
z!=@>U>2iZ+@Ujcbe5LbbqWk|(dN<?IW6>^^-8*J*d^?}#_~GbZ9yyx`)R{QUE6
zywVwWcbR5iU-$Gdzx|VIYooc`F6}vS!sFJK%)|2ae-8eBzyJKXxz?c!U7*o~$JQs$
z^xWK>{`uo^`Qon&*xjdncjVnMX%3>MytunO->p|_X}~2-Z*T8s=jWfl9$$ZUo^3T~
zK=S#yxxogL(pXtpA3l0?XjSOyjGLQO=gpg!a&nUE!48lGzt5aK8@VMz@awCqk2k%K
z`4?;KvCw0a!O`P|8Hwt7tU1eqm;3q5FmPPBq?oO_aem#e&Q4+VqEAmeLHV$enO&r3
ziKVz+%mTl;RzH6HxUfFnUQ92hW9rnYO@e}zw{|wX<MfF>bolVf`2BX)Wp5UAe7-KS
z*4$;`iEi-*HQ|5WyNXl)Cq-?`IXS=nU+2u3>v&UDyr=6e&Aq)XA}Z?B^ZE5b%l%|q
zl`;=4bZ*bMw1hK!ecaRM^Xs3T*540`-}4^n)tNrad}c=MF4Gm&4tw%t+QvB_!&dk1
z*^#Wh@6oA$M@_P>bX0zRwvypY!<{>KZfwaEc3V7gneS|n*;*o=U-#_YyE1&e-1fY?
zNf#HnYB9LFgs?2D|Mu-$MO77asP5R$yO%|~Y_>O)I`8;#o@dV7xskg{G{wZkxKx{0
zMQ&EJE`O&IzAmQZ?X9UBlaFhOcy1Nbk2`Z*zP{&R0_d{2X}Zx`B7zg2FL}2h^TyTH
z;VT0dvn}_VyQt@O$fvNJr2$W7&iV9fVsPBzm0@e8;^N{!gA^hUFD>;p$+*B!Uthng
z^!2gx_W##hOY{sq)-Qj){C=(bJe$fJ8xomQPfrWg0F~ax^FBX6AH3I%S1)#t%g$??
zPext~U3fxx?%!%5#_at|k*Y@#PazqZp4F?hZ*R+8T=@9dnR&L-+1c5w%IiaGYHaS;
zeC7>a=CiQm<)xe(8yf%o{H!e@I8l6w_nyp{Mkdx9n<|YhWr_<b0@L&4?k@>=vQh8N
zOy2{y@(W8!mQ;U#r^WD8F>+H%;^Skzp!?ykt`2|v=#f)G!h)uW*VVMNy8P|`PSFfr
z_TgUj`;E!R`9vF(l=MAy8(+5U-*5l^>Y9^tFL2y@^`g;v;&+iQn+K;FRb}Gt^#1wz
zSy)ukGbBXh-@kve0wU#3rWjq^UtfQ$U*7-Yqobhd1y(MR0E16ntgNgTH>G-)y}Q#n
zd$#oFXJ;>q2q-C?bO6~fd9tvyt?kBbM$Zq-@#nDHAHL9I(rfo?kM=KN-&y=T?f16a
zGcydEA3R8~Du1`8;K@WDHfH<(e~cd=?^jn)Xt=dCyZFTgMONkYXO4D@|9sHQe`9B{
z`rhjAY4B8Ysy=jG%*?N^uO;S*9p4vuc1Q8k4Nii2b8nnk*eZKs-rdHv(c1!61Sm?G
zX1TQU$==ves2sf9FVy2x#NS_EK{qtBu(1WLkF(vDcXw5Q%F&;{e+RD))9sVDKj&}%
z_sEe>VN;KZ9UewO<rhja9v|y9$-2VP92%xo9^aqam&B#o%w@H?yViJiQhN4Lv;6#1
z)Ai?niSh8@@bmLyVP!3RaY2zwTyM>RCmZ+d-CI~%s%m31C-d^Mk0;gVYlwIX-rkn`
zc(T8pqLLD*y?1$6X?ClU(xfvbk-D<0!q>}ni|ecDDGAt@hbu9i*Pg<o@#9h5xAU2e
zQ<qB4lQy#nU~oEAGub%pjDm^Dls;MOMXwz{scg-<n)LkK+$U#dPChr++PLmdMXS=u
zyqA}j?%1^}$!KQ8{<^(f4xqZ~lg%uX%tL5>ZJsCRt}cEv|E9=d%~__a!-KbGU3K5L
zF7NKHFE1`O=ic6CSop|=Ro(x|lP4|x{rs%lVo!Fz-?zB_|Gy*0k84YKiV3UxEpcw=
z+w<|5^q;@q?_XIN?7pz0V~Id1yYKU5zO&u><!m<;>o_ib_j<#YTIYpJiWANKH<m{+
zKT=`>B`Ep29}g$T%Zs_~EPA@5=qVSdrmgt+h$~R;^|iIpQ}kkIEm)wCl$2CZTAKRo
z%*<Kd0dX@<rX(F`VDy=1Gjo!v_oJ6DS5C}ubqSgg#dLY+ySuxM^X^zUt^a>|pQLX`
zFHe}p%ZQW<u7B>>E2+Iv;}+Me$ztS4$j&~!CUSE}PtTIt-`|cLJ*w$Zxdt@O{QCNO
z<J40kckbMY*jc37D{Zc(q2clHL~(al*P__nWgshGU0uC7`??+|N3Cxt{PE%8VNvH_
z-`?$h&$OZNtWeXrH%j3@(`HEOiuhOw%U1E;w~y)IusXgba`U{`akH9Oxr=^(%eAqw
zIWa|3*fi@(z=BT`TDirIik^6Y;%0_na>l(qm6r-P=iRl6iHX^;eS3JoC!f6g`+PU2
zo&E6T%aY8?%htr~oOEWUF{r!<cev|lJYoI5UtJ#`AAcOMMJ&HOqG{&NXVG5zNr#wk
zTAyvTzTRh)c}b<}+nb+vPJ=w`)XFu-wtCv$>hF)9JXw;Udh+S%>4il_hYlTLs;{pv
zdU9gompeNOAOHCEdOhgo%+u5Lm*?Ev#N{*Z#iga*HGe)Hm$9vyGS9a9)9dy7v$!|}
zCx&~>-ywP8y#0Tl?Rj@OY+jx{$ojUkzdKapr9;jY*>7iaPR-4Bm$a?=!Y$gsaP8W)
zGc%3bZ`_EuwLRZIXH6gf&gI3=&#|zw8s^^G!p`N;@bB;M&(G)A7kz%_`|8@-)7x@y
zi_EaOHc{DqMd)fV_kOvj$K~r&E-mqd)F577vy4)?a&K*Ucx>ZWnYcT=lij{~Uw)LN
zbi;n*)W`J<3<?aME{-9)Sr3YT2X(I4_+)+ron9TaRV%*!uj%h^Z<DXA2;_=rXklDt
zmUF}5_qVq{e?0D&|2ZpcZPde;FF6}luh#aSt`}PHX~O>h|7L@3J^%Zw^x4_j#~T=#
zMY=jpWG{HPKr$dFM<*{YZ%xF;CY!0Z1DCyAFTumcI{Da<<8GVl^Stv|a~Ar~xAU1{
zpcpw#T2|I~sn=ACx<3|nc6NLIe!I<O&1_xz3N*2D{NTaH!|nW&RlTRF@B4RQp|eTG
zg$C{LbsrA%+g|{+4K)}*Jx{&ZT?v_)C*$k?y58MYDhhJNqG<<sPTT=cvIeh@v(@{3
z?&*EgW)Ux`<gWh57Y_N|;kx0p+;8rWr_<vn<=&h&Z5pV_)$r}@ZDCp2wFit}wYL4J
zuqt`sP*GvAIqhs!79&T%x){rvni`GJRU*yo{Lg0R?{mB^m3nE3CupRFoxOd@H3#s5
ztMxOME>#7k61QHdIrlAFC(lzB^@`GD@Kxi}|Jn1}J?!$PRPSZ}^XI+M-I>4ltJwXz
z-@5<){R7RfEldx1e5^P5c%N*|$D`usO$*CGHQ&x+^{cDH(?2~qxh4C0--;C)tg{^L
z*R%1-oM>QV)`;6<vAg^|s2j@V=JLcexcmZ-o{9=fdV2bwKYui?J*eio{%UnV(9XrD
ze;0n7Khq@d&WzO4(-IRW8eOhEa_m^p+9*>#8H)pbveqlY*3LR~-r>*Rzo415RxZ(`
z|Ns8pxN*bgyOENza^~%Ax~He<E_Umc(uvzM1JvKW!=q$nHEUh$?ne(F9<+YH=kTkm
zt543e-Q5HZhn0>A2@0m!*Pfi1s63@a*1fTOm)4XJt%SGR=lrb}Dt&b&@$Idx5s^#Z
zn8>^L$t+Ad+9jeBF@c$#Z%=@tLZ-iu4^QZ-kcS^09)5FY=j2tPtM`4_^yu;9mzS6G
zPge6?RPyo?D0S_r{H*n)M9;!vhQHm<mbbUJKYsP<)S*`HuRj%)CUvc_Uh&|;gNEtT
z#q-5<Pko&FcPD5}V2RW6J#|ms+@D<Wd~SKf&Z5*V@dk#}Q&S%Pe!m}7;~s2g&sI+n
zxL^O@mYq)~;lY7M{ZIy`88c^oyq&+_Fzt*)pPa4N+NiA-MNc}M{Eq9z@AJ94tMul+
zTI>0>-z1?!O_NToPz{(g$G(2vE7Pokmyc9WZJp<PZqqt0)u|S{|C%#h<6Y>~%2iQa
zZGV|T_N1zv-8?DNtOt)DH*eo=Zkm1V3ZLh3P{(am_<Fyan^JFXPWRtbW*)a8fl=0~
zL?L*Yk51&Kmhbnf{Tr7XAMcYreD&&7@9BDmSywb-ZG=F}{xT1C3ahX9c(3}-j>_&o
zp}P|@r#mm)llHJNeaD`1yQN-JlYW1D3#y~m*T^z4F|7z#$h5Qg`LhQHn{V8_nR$GP
zv!I}0$cli*;^*f!mcNgiGiT1EzqWh!?8&&a#PibMr1g(0rcDtP6Fc_h<z-G@-qd3~
zl3Vic$1Mc4?5AuwbpQVRS?~Az2P7;w<e|zXu6d|$|9>_Ac{b~FW!3!Vc--Gt3+f{V
zFY~$R=h<HM{oULvrJrZ#@AKUF-Ll{T1Fy8%lUG+)bJ@<_xOHpl%}uFS*2l*uLX1|k
zu$VDf-CwKY$_Mu>gMd{bE3eo2_nu5Sbn~WUety1L-klYd`!3DYjo$X*AiI3P-m21V
zxwqS9MxMH7Bqt^1<>1ipOu#3dOH^x#paZDI-Q`hL^z4k}sj1rT+j4GN)cp9Mz1KW2
z$giNlAocV#RRe<w&FuW2p3TnJ5)l-fSmLv|x$A_ncFYcg$H#gfA8O_P@^-bQ>g8^`
z+Fu0^53wHWl?IJQxu`G)crWO%+0Mo*_2A{plc4U;ogEvQJP(JhiEx~(=9_V00psiI
z>%$e4Kuu^@7rC;Jk6h=>nX_Tz#)l^+DqC1sXiPDAajaK*ie_-zjT;ev{{D@BeZW~s
zN$Jz871k>{Y_`|@`~+%<tc%<0b$*_$sE5?mz18J;cXk|na&j`L2iK9Jki{u7QT*Af
zSEp8mu8!DMqIq_<dALI6i90)sGw<)S&CAQviQMGU*Z~Sx5y8q`+1J+vF7uJ(VPkgh
zlR0?tV&K9hjG+Ei&dp6XZrphA{{4EzzpgGWMiQm$q7%iZ8K?WH`OiCZcDDJ;dwX{?
zc{2N`3D31ERkE;<C@(JuwFZ>5m|R`v`JbMqn|W~&t9IC$gnxg2a;Z$6ZBuC!7Z(Sb
zYZez5XH`;Cn#8!$dPT>cEz#TaKu%b+Xb~uHu}OL^^_yei*vchZP*U>b!b0bib8{jW
zc64;4IDq^%eVT1`*)reR$KKxF{_@h&ZYIyxM~@z5Twf<EC@6Slo^3Sif)3DNir~cQ
zk6*p=f{sbPd-sk@WvZlc8jqcw-Iuqwr+>d+9}fzJl!RA&T1qFMeS3S`!qQSuNa)bR
z!|j?88x}ZuvhvB<cuds_)ri`nQCC+N5gn}!N)0?dx{XScayF%%J=G&=yrcLz-(0Iw
zr|UD~_P=<M0h&m8^!V{i>+*G=#wI9FDxF+i_xD%H>ubHb(c26vKc%pKn#C(?HRW(S
zKWL6rD{RdQ2NxF?m%EOxDnDD>+Jd&_M6Qe9f9_y2yJpmu6;74BWp8gCt$x4v`0f1t
zXW!l3ZD?*DuA!u)v}l@xpl4lKS=pIcrrjMK9H6E|P0b#s%A<!49RdXisD7Jk9S#nV
z6&xZHr-ZH!)2;gY>gc-I-4gb7dzdPZ?)&#E+i$MbQGWYB4l9F}ih!;IR|zZ+5cD)e
zDhl%N+j&jbTN@D92wG6Gzpggx>Z+sDbfXO<c(`0$TtF+9PpsZp{XH-1%8G@*FU3v#
zz5Jy`$qRv2F3}*6OFuDyQjf^jbJp)K?60r?_U`WK7Z(?cdT_nIxmkU>UhJdk@pXZW
zAeH?~K%OcL3=9le6EU%&f#KVmo6fCVq9R^gA3l6IaQ1AkTd&l{s;^m~1O!TIPrlyV
zo-ZG|I&5jl%S%6g{YuKrTp2Lw;@0fz1w}=NQj8wHfA7C8W~WG3M@NUvq+QKjC$v9)
zyPXee1o6pQ9r=F0e!at_$m{E3cUFE@1ND!<&hQ60qcSxqDQQL2R;_kE*`(|1VnbF2
zxh_;`o@tzZX=ibI)b_lyO|0DD5CaA7mBaV%&+ih|4qD<N`0f4u{;gZLI#qI}pP#oh
zW@k~#xjB;c_4S)lPcI8_adCOlc||tFB~DXaUA>K8{@lmM$A5mioiBRA+hC>-Xtuz*
z{GCDZGaqnv`s8y(NJD9TOiYZ-)z#t0uV0^@dwW~pVmIE_PfTh)GaUBU{e5w9F?;Yb
zpFmK#vPdbwC1`#9|G(3>->d4jsr*#%;elgLZm#I5VvDjj68Zc8npJ*&7Fq`?RARuc
z=oV7-0=29PUR}`yje@g&l1WNV&b+=(_Tt5hXJ#6M79xU6i-}u6&9^ISA}4DGFMIIx
z=~9Pz7y0e~9Jsf)`sMxo_E%SjgG;tg5Mx(gUhW^fucmTS>gl%q`|ZDd`^NQ@^Ut3@
zpz2p6a+AuwefuIJBQ-??1qCbhKpFe`DvyauJ+ju*xW)A@Y|oEp{lqeH;zUq0t6^vH
z^HmHWm7<MGlgz%pxY%6v^_5}vHJxkgVpluJJ-(g4|LKd1i+QBYdO(A(0w6OLLHT;+
zp`+d6g+)b5!OMJp{Qmv;!Gi?>?>5)|F3Y;U&UdO-=#7oZ>~4z#L8Z$H?m+Ja9ZR@=
zetMdGsD(3TXHl#Ez8_AFchpTxrtryHCFJIwonu)H8V3d!FP`$ChT`L6y^%W#6hYO$
zLg%lquaD0(PM0uF<2l+b4hf7PP#6bIT^+tYXqk^>kDKzpfB!73tkPcCTw5F6-rda&
zY7$*u<}1O&CaVI9lps)auk>0MvvY<`rIC5wodxdwa*|fv(cAMb?krB<lydUWn#j!v
zXUw<z`9v7hQ`=W#$t|XHVuNMPzn{-z_Et@eum3ys&(F`V4+sbfPE1+&Zh_Jyt-E_F
zh1J#77cE}=@%wjm|9Lk2HudlBSb}<}_xIIa&Y6BuSlurnJ>C7?ot^Skc^4PCZppgZ
zrRF>9!}a+3mknT(7K5_r)O!_=c|qC&0t8}rl~g{kEH5wT64g5L;^N|;KcCOP{N$Uf
zo7<;vxAP~f`^zo!o&D`V@pS$8W$EYVSy)-weVGF)X}TN)J-5fi#{T?r**|iBo$b4K
z?_&1V%xvS8zO*&_`jX#|_w3no>B+XYH#Rn3xDdcl_2q@*;Wpl#H9w6QVt17&8W>E-
zyu56pvOAwF2dF+-0}ku0x3}j%e)6Pc?p)dM^>L~e7C#R18#Xu2HqT$S^W;-@9*G0~
z_J6+|NRHf`wls2c8fX-z?CmYX^m8(@3?Nh9fFmHPq{Kwkd)kxx_5W2>RGut+?yDvo
zy)DPH`1aG6FMnRhn1Ar#L6BCTnMQ^+KMLN~2!flxN|N2}?d+h2*5_wuJz4KBUb0nh
z;q~?L&mSCY=FdHUcmCnShbuu1(CvA1`Iw-3UdAURD6Eazdg$!g-USO5sPDU$V6Y+c
z@-maHwdLKnmWy`j>FI$+`>e~~y;uNB+@NewXPR-L!L?hAu|NOs?$aqog}=UJo||hu
zJ#E|j$gXR<Z@+!_&hO$P*PRs~7xD4HESlWi&3(Z6`x#r$s8>R-UcUSpvwnZwUn}#x
zI~N)iA*~XXS}C&}3GMK8D&Es{&i}DKf9a@^sYTtNic28^=ausB?EwvM?D={v+R6Y_
zYq>7ySP~Cf`Pt4V`)F?Yy~<_R^KWhG++y(f?6F?y`M+lE)?Kh}-8vbok`5`;tQVjZ
zBnWc;zo+Nt&wqP+yK%vT1MFRk-9g1~JHNc&*V|uozBA97J^S<R{QVn~j&_-t{|9By
z6&xahm733<J?oLPoy99{Ho>;~oBBSpt@p(xgoQs}vE=O*)oxqAUO&F(V{6$yNb_c5
z52)q^Ssk@C%l4b;%D?*@<mKfL9X>2y^(T)z{r9)GH}}`uKRrGDI-j^8XlO`D2{bnM
z`Po?>c{`crgC7FE2ju7HCnY5Xz5QjsPJ2afFYo7PXOq9Zxq0K(tt|I$XiH-Wzpk-y
z8?W@SXJ=;@mXy>y*!kf@K}}6f(A!=4cXoVSc<Afe?e`uXXk^}!dppcJ98zO;*!Y3g
zsMY*1`1<N<^@ERBZmE9x_VzZY4l|lLiR++U?XL%rg??L{U0qzBOf{Tw>ClW<N-s}b
zT^-)p)fFUg@<@u2Vd^Q7x?eAsFFAg0p6%>c*NUH>n)>DCW%dT0$W0$UpSS;fkX=55
zU2XB5oyEzSnJX0_b}y0JoOX6m@Nz#cCl%HmyLN%5%2*YZls<_(>vDK<)w1{*55s$_
zl!X;(ybP1>JbL7NWR_2aWN^Iy_YE^F*15R`T1@6mOJjSxCdKUcom3_!CXp6!P)xj%
z8PI92nYUaiKxJpne!(RIE-o(fCbG2&{<N?DE*l>D_juu-(w*XZ1%jfY;>*%3jHUSs
zrbG*W*9kx86+P>pOlsE5sf%M$XGKoaK0A?Flszi)osMa_-kcQ*C#F`u|NFP_qf%E_
z8tbn*JGuSG_4S_5`E5JD+TznQ>poC<(%}SZ`*n15Xs~pF`BW7k-S89p4=)kwNMUA^
zzti>PXu<N|N8^u_{VV!r_qFC|!RKAOKbuWk-+12Gv|#@qZt;CTKidDi)8qQ--jd#r
zH`dQNocMS8tt8F1onf*yr*G`ANMkGe8`25(n}^sxN8Xp-6U|cJSJ>aaC~sj{6SvK-
z-21xY6Yc4r`Mwn#PT2I=W9KrycE11Wj0WG`gHMDWs5pK0P3Lq^t6%DYo7E;sJlyg9
z?T3}ljr_YGJ$;?iuKZMgk)tB0aiSRDvZ?#Y<lmjYKO1ZliLZHW*V`^BqmuZy()Obv
zpAD1OgJ3&>#ey=LeL3~}3Kz$#o%<kBd7MYV-7bB?#`*szbDu72=)WnMGij26Xcfz8
zVgE-nZ~L5DDH~(;q&xnr1J8l_AXayoi_F$S(VtVf%4^xuPv;(K)mMBMAtTrL^kd~K
zPMbZ&4<fBPn%>)=T&mf;W6R4kws|K!_1f1T{O6;qv-50%_rZno%7TJ|tqy`G<<_<I
zKD;yM{+xJ;KOa|pHIZBSP^s>u`H{Kh|ElDtE>?84t9^K`y+19qFE-}aQ~e?n{g*tA
z;@^}07Cz;jeYEbk1Zz6avB#$(la}YJxqm%y<=3C`9D6;bg?Ei3_8dGY`Y2pS;oqL-
zAM)Ws#&_H2INKe2FBs2LF=6#*?<2WVey41GFS2jYvtwvHcuoM+faSN*<x!rbJENeW
z^TldLfri&=rz+!wf9kDQdbWny+`s?#a+Z_VU6dEUT$X6~?K{W1KmXJ%lsxuL5|w&4
z^<A;gcT3Ah9W!@H-M+ehul%9aC+v%(&1PJ>cYT4phM?d?_CMy{f+s%(IXy2pxSHi;
z`3C<x-#;t-?<`p_seSFBwt_v|B#+J;j(_?Vo4<J;w^>rIbN7ZktFDbTvU|LqA1_c9
z%6@T6`q{*tQ`a|!cm9!>w0=TKhW<8|4f8F^4i`9YpLF%3R`dbMCzBp|{Hc}Id~Pf;
z<wY>p`(3FXe;%^SJbnEj*2?~y^6VXx9If+xESH|RX*)yvF<SzwfymysdfRiRAC(VI
z-2X+=_?Jl8r-U<aXa4*8Blw1rp0#)rCyxhjB<Br2xzk53oPKgU|8X$a`t(!DpS~R^
zRGsvLH+y63{slb;<%{no+|hk^W3m6F9p7~(d_3e(b|7Gr)wE6gi3RObY9AhXZ&`h~
z?ARyM4?2M?=HFA^Og}eq!?WdE4pbdB-OT#>nT5@(YX|3?X8yf2L#XcTg1IyI@PT?K
ziUCg!KC^JR^PT-&hWPJqsz>7fF3NEJ^u3-j|9<xN4A)a9&(5i=X4*FC{G{cvx_i1r
z4yLgEK50~8#&cx*jP2R4ZF;kdc3l^k6vXVlPEq;9-wu7gXrVHjImH5=j~-1}7t`m_
z+ue5j#OHt7KiGaO%a1w!PUS7<<1;fPZ}8exT*>x2m*p7I^^|$ij-Wk8(~fsr`_J7K
z^Wy&8mfLd+t}gp=wL+-+#&#9=J83s3`)ujXiq*LEu(Ra8-yO4V@6N3!<M#&qlGvVZ
zr+!@Ln82N>O8TL`bK51(xBk1^xw~MtLe#b^rsj_%f)-tJck({}{lj{>8UKnhZqK_B
zncNYz`Fy2S=x*T;L9sp!Z@rY<@}F<;zkM;lk7xeWf4stq?&tYGtd6sJyY-lF$N!(&
z=jwJI`?<AV_S62WobT_X$*3^Oohz=|$NRi}Z=>m<zUZG;PuBeAnP$|uWA;J8XxaPU
z7%Sp}gWQjAbMgNpZ)lxlR4Tjdg?g-u%aizp4;LtX-z~$-S@E8APmR9t->cO!Z$Gh#
z|Fht4&{XI+^uC<6**vb_f_VXR0rv`rz9hCs_5bJoc;S+M;e(#zz3H#5zi0pB+A06>
zsMyAN$#>%(xO|`I^0$<A!_u!=J42T}xN`6Z+mA^_pVIyIe6Vrf{iS5@_T!6rIVA3h
z>V$sCzQ{2@>8C-$;Rg0;$Ga!jJk6RvS+F_%7h@0G_dPqNuW4}qdAv|Rp0W6^xW<P&
zlPx;-*RGFJyPX<uJ8Sa7_DfCo-rbz@&t!`B<zIzW$x-~-H8&*u`Xb-(*zWgxbmf&Y
zLrkR2HB)zEU#`B@isv6RyFb3Jap$Y&_43@`^=kKKZR`J@E&O4<oX4k!1xi)+=Ra=r
z_T1FSs9#i8GkZ?I)WHC*|9Yi{lU%MGnQnP;HTQ9~wWZ%~ELFKXqrTSrVfbE)lj+}u
zH@}^*y8QZPH}5^VYz99jf1fK?@!Txg`m^!z_=?8l*tTNJTWtqtOs`r0<q*RT`><Y_
zr*j!ERNi4Yv+tvdjGa$V(yQzz3g2J9f1GNV^3Dp>Z)$ZA+{rGv!C(4L<Bz9{cYJwl
zSKv}qdFQywk~AIXiRvG1c6M~=*=~$0JSSePGEZewefhaQ>7zHz)hEx`wN^7<k717-
zug6)Fm6PAe6dv)3Y+~lW?tApob5@(P4>rtwa_9EsxeJd79)DVxU-)v(=Z0gCzMd4`
zGkx=3tD=7mqU+Wz+?j8tUcLWJ>j&Q>EoTo`#h#zHF@l%ro}A0Wf6SaO>^q!fr5-(<
zy|XH1SL|1=Nq_z_h0pa-)!J3{MrgTy$<-sDPfBGLzu~d_VG}0P{@CK8{hPgp4HKF7
zZ?{`;V8uz1xrV)ma#t;rE%w(~ZWl9UTj9#*C!;UQyPt}mBWc(xqJ5+M{L?$Dl-CvH
ztc+xrs}kBQna<E(%J;V<XTIN+#+KP<IJYM9&OULn@x9R3mjQn5a-O--dN$f(3`NO%
zcGdaZVNG9ACZq7LWX@;P{Y?^$M{d3^eAK&h-NS|7Uf5fHbuvHKQ_&u*&#SKY?Um(o
z)$--bjZ0;#B2A(LU0j~rUjWJqU*}h9O<ugC*kgJ5<MPTC_pUOQr}Z4$5`W3o`?t{l
z=yh9W6qE>uultr{d2Zq)j<ok%+;wm0sWRM?=x!JI(5L_0Zl=$tp9Ok<<9b>&tR=c0
z1>drqf37S=`sJ_1GKDcwJDX!-wXJW5E@czHen$P{hfEF8j^>I*Y3mu<=bpar<6}KF
zn}6ab@rjqs8;&whnq=@q-+A4x(Ad2}=B46qyKV}%rnB1f-0Pp9Y@vQ}cG7`;Q?;K9
z8;Tn^@8kL*QdrE_Si6FAA1jYaKKK5Ue|)9c_A6CXF1NM5P?)AH*lf2uaot?^50B4z
z<Y+k_5I2t5YCh|--=)PF3T-R)7d&_)`$hVXWM1+92bX5eJ+d~g-~HqMyo|=jrxNwf
z+?;Z4k+)&ye}yOKc;_hmdr@h_T)QFf=gptJHKGR3OZ@LXoaOFQv2*vo+jV*Qw=Avs
z3hHjL>HN*Is^;xxpR63Ow*IiJ;jf*df`Wk@A`^FqE#X*pWoOmL+ht3l6E07j)N}9&
zpPI+Mx}|}S*XHwnoL&BS#gCrP;(y$;0xF7JA02vLlo*qh@p*m6ab2dFkC*4Fl&$Nz
z$#LvZsv%RK_RRQiYEM7#t^2d<+pO?wMNjweAKf)&_t#U?f4<uj8}4MDwElvKdC!k4
znGRo;{S0_jS;IXe_;9zl<*chuKP_MVF)vh>+4G2A<m`_hcOLo~r~k+-f$QJXBCB|v
z2*%Tk^Nzgpynj1WZ_@RdJI~2K`}FU#M2x(r5#OD;w%ar(N<1<v`kCWiByNyo@Jy-v
zIOC6Ne<y#Q-=~sQ&DNaU`!bw;>;HZ8r||hN`J4QpL;2g`>vKPQdfR#L3q1T$^TF)A
z|2(nvXW!g&s7dnOb2i~bw9I#(_;#*FB_$=UMx~<fk1kie7c#4HJ0o}d?Dn%qvSzJ|
zzq8%<;gj!iB6-1*+gCfTC^N6o3VZw|`>^LDvwp<_mmO8&hO^%Nn8?buxSB2Z*UTHo
z&)+d9`O;SVeoHL-^unJt9G0IY4cV?8X#Tll`>L6G+?6(AqUMJSi^4uMpXfbucMGqY
zkL#TsHqV^({Qq>w>|5#Va$m;PY5V`BuV<FrnB#eJf`H?f>G5|y&C%nPYAigHQ1jWc
zdF4h2ORhck?K_?dTmJvM>~_dy+r!z4*B{h9(VNQO{+3C6`JVR0Ve+4*ecIdXugQ4)
z>S^7hZHJ$%`J1~QG>P%^c&6Wi6Se0UBt9JYyfH{JKVTD|-Pd_LHYeMK@ZA1XDD(e*
z`jL4(zSVcPi?G_in!C$t+2yNpTW4={|5utH*4lIX#L+_I4fpj9+{@f8Q+OoLVnWpE
zJx^*DvhVc%nbgmrn)FTY`LSn@p1l6gG_Rrb@TzD%zsGZu%ja+_?<<_U{h)-y!jFHA
z_9#ko+0|U$f34CqzjwDP`>}@0F>mg-1}}`heD%`ukJ?YFw4Tex8T1DxymAM5>-W#M
zE+y5yH++8^eO%EUedSY5O4i?ZZ%aY#N>JQ!$MIG@oO{0DvHqXtstMbpPM(of*~)x;
z`Eib}KkE%$@}4}ptzTI4%4^bky$=t=IUfI<-?@Jl@80fY&OPtKk~NyDUtLyzGyUB1
zw#D@e%U|f-Vx6!_>C9Gv9{;yiJCc>nO7t|d>;2w-{c_;<#YYxG{er^3!k^s|+SwHx
zpYL1!`i`ik$&R<ZGtY!fVXBqB|6y14?e7{F7Fo_)DlPosAIlzHwn@+Wl^30sc6q!1
zR++M#D_8mt$z`7QJlEgOWUE-pH*@bMyWEH3?p=@7e&5&eS$6eOndZBDaqXayYypi=
z30?mGQ=QmqO`jj`uzvq%?OlhsKkju6<qL{E=0~4QKWSfRFz=YkqO|(?|0=fi{C;*v
z=iJd`8BWa|Q~8R%F1cC%cUtiMw_dj|2ztJGwtBT+)r8HlzrQdpE3LI*V4vRkPfdg&
z_@GU=a(LVGqz~Wr<o>?&euDhupSvCui@z<J_aH3koBQ&)abNuOayK>BDjTw2s&Wec
zwod)!v|g`+g=cr(H1BzQ=h@}V_wl<}f8Ms1mrT67LH+I5<JJKWo(eqw5YlWvZQgr9
z0ilk!FSPXBRURJTo?pek`<&sk#osr^?cDXb)c3==&f})53#3j>EBI<+HA%Se^jtIh
zD<KA7f4107{iye(*fcz?>-gvM%dUN53XJ#J+x!~T853c0ec~o2Z!@d>p!U|~M;pX<
zR+Su_qVN9etxTcI`P(m~PNf@&EB~LcQsi%znCty_J=^tF{{0c%*#1W1lzGXyR_30c
z-phC$YgYK&+Wde^<MQl(u(bfJkEWh~P&04-N)}f3^3r=QJa$hi_H-EEm~+pcYl6vh
zXT7;4b_VA}B%B(jM<+1&e2rh6&|w@CF}=R;>mFvFr5kJ}seg^x^Ks3$ie1H}+L7%0
zrTBNp8QFz!zIW%ov&rSy^3@OON}ryLVU4}{wC`3a@9Rlt*DYLDxUe|Jv^V;nG~XQl
z_I}R2d5#jz{Ljo}Ka|z_S)RDLk=L!eKYOxvaNABfU&aS_AJqTYAHaWDK7M)iUF{vp
zNB7Ni-6(l3o!u%&-O@ZwEd2VHk9im6?=PQYYG@OsUTS*1La6ROf7_xR(d+b5CiAaf
zmC4`rT}ya^*srcB$MSq%OMY4N(TMT!@}M`H_x*Tcwk*zhsm7n9F>+G(4DYU-#S}f^
z|5LLa_EY4GZ*SKKx&N~=QoP=E&)1@5kTT`DqzqrXbE?44avoW$DT2yw8+3kndi{!c
z_fFy8S`DLRUwxl<9@UjQ^i%#4zh}wnn{yA%eVw#xnfUR<-7aq~>394}R9|*1KX8ff
z&o4hGddK{!vfjD-n(y_Wj~@t4-*#;B?oIaB%ehtMZ=boDD17PJQprCb-`<{oN3?56
z{SoI!tc%k7m7g6~esF(^Qu4h04{KyPk6ZkGl)w{fw;*8cBguK$r~Vyvsj>Lq_F(Gk
zzy2p>3mzLSnErhK+13lnGqbPrmV3*8@|okt_nA>CPG+8&)};WE|10Gm=H5N?ai8#|
zXXhll9@+m?*12accj{p3x$pIwx*5Admp_;i{$bCulYKYrEk6E=scGj?`L4Wk{p!zU
zO78VOlSDtP^jp}u{sG?($;<oQ&#WnQS%2Djp?>_qnppmOQ$JKLnUde0zwdSSuAbBL
zN>9Y<tbKU0{jlZ!qwP(yTFcLU?|FVH{C#oeePQ#xVzS%3kDQt7@nu%+%zb?S`_k)g
z_U+6t{PyndYU$pNjt-{<Cu+r*-v3&Dm;LhMrv=8$oQ5jzZ=S#Z@1gmR<f<(Sd1?MK
z&%aHQDf}sRTlDx@p1u~3N#AA8DwQ9w&Yr$7Mbt*-w}*|$Y`c`{>l<HQdarW#&7c2y
z#`0RNyZ0Asto!>TF6dnPAG<vMsfS+7=WbS>I7N2<=ki}y?|6kBm~yz`?GgDKL5EjG
zpXA!{yK2&=d~UPsU-lj<?`NK0dH(yOIIHVx*RQXCWEH|8A}F|1xyq&GMDrm*_fzk5
zzyFvqamky$J>|FmzBb*txy+I;PtmaX`rF<0`_9H3*njhkFuR9U;qSL^)cF$Z3-j+?
zuKUn0eePu4hkl;(@&^*ijtiDIEP4C?Ah-%BcPVCab$RlB^U@V;klI|S$llmy%IUBD
z9ntHQX2x2CxVX6dOP1o(Qc@~Ev-$dQ5gx54zizE{5a6#BcKP)0=bS$U8@W`Jloo-8
zD!xx{Isdz`#_^x@{=%cH9zjRVPW(Ml_is*@f|AlC-b_E}a2n|&UnC5?;WRu&S#7Gr
zll#{zf9uI@tXM6e^!>y{?T=FeC0$%xM3`JR&00RcZq}a4&lBcMIiuL_$a?b7n=<S0
zM~^=ilpjBQ*qQa@qoQBGE^SD37IkuQQ4!|hYd^fusH0=a_Vn{|r>5(lzp&7`^84NL
zP|%3_&issl^(-|;EOCb902}@%*+;9f!{!BO<v{-azti4roiRhArly92oBQ#VmBBCf
zJ%9cB^aMrc51&5=e{^wiQMqksA;Z^xN<oR=C_g{{(c{POGv=4Rz81K<ELYUu#l>Zl
zEHAt0ME1DJYQFC>suy4MxVkDd)B-fg!671evd^mQO@;gI@)s8v{r&w#{UFK3Wz*s|
zUg=5N;p=?z@9(Rv7WIXRYnPXo-`bKnd1dbBXJ@_VT9t;Fz>GR)bbI&pb+OhyYPxH5
zl$4ZiP7h-j6=Zx<X3faR2wFcFeOIjh=TmWAE@hDHtSTl~7Y1{4^Lh3Ee$HldXg+wr
zncvo>pkPCnDm2G_Z;s~WEVYo?l7C;W`QQSl&W;X`ZOa}mP*S?i!Nc=p{r-QaHn<mD
zUl%(?KR(V?NKkNMj<0Q_lG61T7Zx7oWxSAJ09uj8>Hu}&&ig4QDqmg&1PBTWPW;{2
z*4)(*^L*v<d7zcBFRNJkWh|XOJ~|pIBPb|%^4k?*4W**F-`?Kd&9vZP!UC_UT3ii~
z)|XPz{;GF7pU13ju#({e?R;3&(a~|D*7>lbtBcWxGsfpXT=utTEco^&^3(J4>miZ{
zmk4#7*t;$1D3_RCOvl~4^V^<Jm@$LHLSD%}<HCZRzm-#TA}9U-`@VjL&#tGSnUX3d
z*C%qbEee%te|>S>K5dPvib~7R&(9yfeY-XZT>88+Hx>N(TZWIFnT;pm&5eyVpH3*B
znQJ}0p@Bj4nf<gmbIvp{GXHqD`~8ZLl|l;_E_`x&x_>JyAgVn*JwcOXmc`EkRtBkx
z>BV@=v#AW3p_6iBLn3HeSxD7OKupZ7Q%E%gIwUgDT3lM%`}8zj&>o;(Dbq){^Y<S;
zecHSAWE1Gro&9xxPs}#g*A82=An9nA7HCGnX+ej_v$>YVZEd{Lo3gL#ftJeu`1sgR
zs#nWLY+K38pj{;|n=WRgJUrBDl5-=#6*Lij;Du$QQc?J~_xJq+0t9xKy-hkXLD6TH
zi6^6hbjGbMnhO^$tf;8Cu%j?J?aYjYps)pH*E_e?@Bim@bycWk=_`@R>i%gT9vsy2
zG82-KIkT(uwMN7Sh1lI?sZUN!1i9-3v&&&e*C&0T<zdg~Rr8sfn}c>kK5V&pW2SL>
z#=Sk3)!*JM3|}8t^XujE9Xns<H!CSAeYgIbt*<oGEcKK~)V7??{(k<|VQYgJ&O8Gh
zC^5@4yW-c&<v;&?KA(AYmFUNhAGLbG)o=#~kEBsc+}^6dEg6A_4j-<3wQ_mLl-~{O
z*Xw&v(^**h`r4llhxxCp3~pbsLPONe#pQ_|D0LTkYieqKd2@5Jq;cAZBf|b0GA=6p
z`TI9?#_yiP4--B-IOsFWq%&x#m!O!K+r>q$p%ReM{mKauYr{Y*EmnoDUJ<@tZgcwi
zV@VrV7<}L8Iav*~Bb7@;;{Zr5U|~}iG%52pojBnkWtw%QPuBXyjg89lYrjdVP4;Z9
zWYCG&&@j)oI>~Hy<emz{(pOgk16^EP-YL%#)=)CHsQF=V@ZiBWH#RmiGP4<!z6xPI
z!0z$-+S<rXDV%K0jcvTrM(O8dL?I)}F7Lu`Z_8D-u$Ym2yw5Q45K9}c^s+@Z4fE=L
zWnNno>AbJz=ZgyqnUjz8K<p3(Rmysxq~tr>?8e(N&_W9-^Sm{SY##W{v+>-Vb{4c<
zFn`ZSw)r)mI_IfsC@D=k`!c>rkTK!cmzO5Fw?sDI^vTFr!L)!uKtSNYt*zOde0*t#
zS~zXK-zip6Q(I*KDPJbd-TVDsHE7K`Xp3;(U8|<1rVx?BlXI=jL5ou(H>cJ7|N9-Z
zrOSJ|-b#?Bn^W4FyKD~Rm`U%g{{F06f8T?R$K^oV14MKp0$L~X+<aRG+DCR}Ww2)W
zI+@jBYn2of9H6;w;u{4!yLr#&Rrf9Ro*uM0%@?#ucK!PGT*})`%ictQ&SQFbsFjnO
zdvVOpB2YpFH49Qw#r5M<R8(4YqqkkymK(kA$0P1LckhNuoZMnr{OrQ&aD5#yZtv-O
zPhT#d9|nq`O`K;rMJf--SeNxYc#yCy=cdzqyV@VGR<9Rfc-pr$|NcDJZn4D8H$my+
ze*OQwuyhc4_S{@+PA;xTA08eCO&U*Bc26>p2$9&SUHa;ZqK;0_ot?#(*Tq_^`OR4Y
z%5^EsQhZvU8X`8Q@#;ozQ_<Aye0g~}Xw|N$>$}e6<9!#`L>lwS+xh(e_gB)kYRkVJ
zpf*zLO7(yzj0V})bfiqPS}ta!yu7rOtD(qWR6DHa!-oQ`&{aonY)pQ6Wu@@RlP6(W
z=#RKtuaxU@zqud2e@{O<%k;{MKxfy9)q4K(Y(R_J)92StySX_XbV`(7{Ju4Zz-43C
ziTy|Owtv2ry&jb8p*tS8Oxgcu@t!?<W|-&8f%d7ty|wkqioj;$^m8jfQ5)IwjHT;N
zyO59&C?9Z(>n*AJ`s&PV^ZuBfMJo+Htp{x@k+3XMaqSjU)zRq*TI#hDI@IvVzoV~j
zS?1+sId^tU6w{3|sQp!PCT%m9^8a0+4E*c$`pL%We$({h{XlgaXw?L3hQ*2#wJJ(V
zO<7l0Z7h8323lvf{eInQ295tS^<sA!<lnP-^!V}0*xhE{({#X@0@SySySca8+|kkT
z&F$^|-|yGY-<W*-(X(apg+KFcf4|ubI{ZgoULLexYl>zttl0j!8?;9K>#M7sJv}ME
zzP$YM_I9}IM4iP+N4r|u+k>}c1cL0Ipy&+JF!BF{XDnSQ#h@*&!OQ(BDk==h-^FBI
zU$=IV$21X1Nzc>MbU_>VVt17!-rABm!$%EVXvHs^=P)rR=g*Iipan?w_V!`x<JK~0
zM9$j3-##%h@yhD(eo5oB3rjqO*MzMGRsNtR_4daF7BBAax3B&E4YY(56#pSB798@J
zmcB6pbQqMZby>>QRiQSY&ltP7xxun*kuj*|dU2t#-~Qi>Q&Y7;dmg&PbXOhn*p_HC
zbB0x^7N|Nr-Y3h#&R+cELn6d|i!2+JZhGY8=$xHxp8ogO*Or!+4`<EqgO+6-^4MmY
zd&}h4x3}K&Y$_u*Cb3S{3Jn6~q<@L8I7LpX`OUZU{rc)EXvGa^tK_>oJGmMrz5V*;
z=H#8l&xORr-OtW4jog^j3d>2qzr9t~(CE0kyWF_sMSz;$oQ{hbA&F1+fxPeJ>})vu
ztV!;zDeL$B@&Z+OfgB>01%E%C)(=@5HTCjxf6z`6Ha?jZi%!(4YiM+U+Hora6nB-s
zKi46s4347u4p0;&c8lvj`}utS@eC8A>Tfx(uCHIe=tS+?Gc%31WL#{zySx1Jqi%hp
z%1<d@tGYWn?j(b}vPjK;p3myAwI9BIRn^h)VGNMtVC5D|`TFWAXqlS+z8_7~bfbmj
z<<~<q(VhGEs$T2HRX!E%64&>usIc(2`?;h`iJvDvKECe9L-t$Sa!+4c>b){_wV1em
zTu36QdJyDg7d`0|6chwnW(V4@=5O~?rCUr_DSls#R*%gD89sLBHlD;kKR&Xsv2CjQ
zn$;z$y$oibJZNp|-Cd@MiHRl|7aC?7rzdTU2$86KQ2Xs>I**KnKz)6E*t(dL!u~d}
zsy`+F`MJ4^0~fpf`F1-Wbb_4cWVJ-2nIRGrL%PLu7gc|MS5Z*`sy|;{UA-JsaA~k~
zrTDk=$u4qiW&`a?acpM$c+UEL%FJ!5QEQjPe4n-asME4*Sw22TW)<DNr{=Ho%sT$%
zMXAzB_v?O%7C&7UqW7fob#Ly)oXGFm;*)AuhHPj1_bj`=cvkK0?S)$DuCqcl=cKOh
z?0GdW%0Wl0_}69MEt3lV9y4FLS@)I1?fzRUe_T1d{U1;6#W%P9sc$?PH-)#3YroDW
zi|6O<|NHDNd%GcOZOz}W*Gt~q2s8&RAltoQ9oxkJ7pCb(b8vDhT3OBVoUC?nZS;1X
zD^}Xt_g0^@UidOREyt>gQ{F_YaCYr^<DE&dYSuX~<JMZgu|BrF(&VzP#nwVL-Z;6}
zzkaR#$(tw5^!VHQMZae&GiI3UO(}mRn(#S%@ym}VZLXUgY~FwRkfOokUCc9H@jRT)
zdA{(i!o$Pfb_=dndta(|Qx^Z=xcO4%OucU^+2S?6`~9CK>ex2S7q{touNx(>xtQ_o
zZvO)h?#-4jx0{-(6};Gu7gV`_es&gAqA&NGd#Tvi)y3sqQy9DG&wicweRHC=W+iTo
z`uT46`%MuHEE_H)T`~Lj<7u^()TLcb)AsJ2GI{$=<7fR_Uos`U-1h$CW%=rN8VpP#
z>qHJsf4{%P{r8MLH8KC^-apSYL-Y0q?#;{hxGJ<=F+N=-wDwv_Y5b4C=_U8&cV-yx
zj~8cP^x7(W@|}Eo(CG(z=JL<p{Bah8gzt^#wJQU*D*g>}-k>=Bcy3iu?wg41Vhw7C
zr5{W``YL2yj3ub(oN1IQVUi)Re*ZtKvfZ)fkk;_THE&Pr?_UzRIjy3y5_GtTlate?
zoakQ5(*fJdMXz7)d-77}-_tj`-!^>ue8>ONUZH?xZ|?GHnmkut@o{sDgi-zKPc=IP
zAD$L7tiPGH%YLdj!+j^$6#;w2{G~TN)75`?!bt1d4CgmnAFeCZce~>E`s<z2A9v33
zPY^oq+0I=2oO4T!&UMrK9~LD1d>--lve&P$Zzpn+Ic9I==V`pcuQyBeb?A<Q#B1wf
zy|-pvonewGlzV%ds-mK!xfr;mIB`wpj}H$)4a|lH2GIWBn4Lw_@;Ao|{x6C5m)<^S
z#q6gOgJ(-@`Lk<bkEdq&|7V)zyVeW-f267zz2`!yZQMSQ)lcVr?V4TG^YLt3LXFY!
z(zjbbywPrsjXzsot(l%z)UkK9cmW5y^X-t`4<-6T*E=cN+yALCD&O6I^RxCsKI>yz
zx)qn~U!RjWF;!a}v}4A#TTCZr$Aqe{uLNaf*GfZL&mPwjH{X15bF=#UeZTcYwL%j9
z{`$(o#+KH-O+9*jL$s>aQoiL|-pij~wbxKGZ+Gp>=?}s>zuQZ`k*cdK?wn&+HDPZx
zf6<2rj!H_&`8&6^pJdLt_~!1`=GfieioTSHY@3}UEm}CC<Ayc&gli`_zP-D9blu+S
z&W@hlFAMqSmDQb>DxI`<ciG0*2`ruFe;*`wTI`q}ySr@T_XL(}+0t_=CcUhUHawkp
z*ip#vn#-DNTa+uOtvY|;)VbCKnUj|zCj|26oHE=ezgKMgrP>vNcRs(5TlywyJ5$31
zwW;}*`3Bksv(MK2e!Kn4tE=6hW#Vh2OqG<BN;V4$3hw09<x&2mzoY0W*V|iLlYf4C
zdSzYgY|G+j6O`Tia!T03e74T|;I(&CiR2evhQ-U*Exy?@?d9_oN8cuXd$M<ib*)#g
z`x>5y&${{J@9duT@a?^ny~=-U^pdB(tNEL#Kkv);GYRtz*tSV|y)gYf<JFgE?HBiI
zq?t{SD4mo$``U(_gDy_nRWH|{bKA8<TCQ^QW$R;SzDF5rnf#q_XxDn*2YaW#O6asb
zc4}$jnz-+l4{rx)e16sbrGS6={T-&)w<I%UtCgtTH~lSt)1rNue*J`hd+%f!<(>3+
zC2DTCKHT_NZfTp{?PnLStT&Qs*#0bkQSD6`2R^C2Ka3V{^J92$dVToig;CE-K7Y~6
zi$BcNKDSK&>hIs5zYFzk`12~@f>(b^xAx_;XL~m$9}n75kl4l}=@bwk06H+I=I5tf
zA3Hia?!3A0Kh@#M{L+VqSV7C~KRrEN`EKX)h%Fg`X50<yWISf>Ive}>bI>*3r^h?K
zEPQ!l*87O}8;sZZh87h)UDj^3{jc=A%-ii>+m`<<d7Ad^&CSE|_LkxcHs;Rsd%o-M
zPu*9?7%VeS2!yfOIiC-=-d&{{e9d>+&xXn)NvUU-l>dBky09hBo9)2ilpTL7m&!a|
zydbS7E_C)v?fS&?^JLF>$4*IoyTts3U8%IiqnmcGj1O&JD$vnbJD+da{g#wpy;83C
z%^X+0l{nk;^>}Q%t)SVoW%~6Fmk(b4@wMOf&5uGoTd!Tswam4%%M!Dvn4h(eKYyfe
z+LaHt+o$f&dw6|*-J-~qTV5;s2PE%Wu6HK+w#}^4YW?NE1L|fOi{8n)wC9y`{cq(y
z%L!&8Up6}3e4XD?`}<q!uP-lM+}wipR+XCN+*mNXAKY&}vH$&xi;F=`642Sd+w<f1
z{rTj5=kDFjPj^}*O){utyR^@L!|Sc(&;Cs^let_r>8JDbL>?)Jvyaq*Cgm3`IFej*
z?`twc+^-rA({79K&nCa7CO)`zey+2}Tj>{eX+L?Lb(*dEQ!g*_-1G4h!@heq_qS$c
zp4#<2a-a6hV~3Y*%y?IG-D~E|kI(OPo+-NV!u+%BD$$diGc>R75U=&NvS0pp{`#jq
z{*rC)@15+pHBaVH<3_LiY5B8Vu1^ry#s7Okc+hs+3M<nEN8cp+{LeR>CU@YrY|}^6
zCQJXyDxTMqqs}=0+3}bqZL@x>{5tN2gU8wJ<GJ_M)kR$LeWUHMng8~VeZLZ(v4zJ5
zoy&bb_XEor+1o+al77sdzo9pF^`*9^U#W+)zwCTIFZtu6qc`4`Ntt9E`1A8~sjv`u
zYW?STP)h`~sr$!=#AAK3r+>fS-+wUS#I@&Bm~5`?Y0q94|LBbW%{jM{%L{LNrCH|e
zxwP4C-^$x*D(^n1iCMgP5H@*N|8xKEw$A^w_T0I}-SFgs0bkVR?`+rdVsvwVtn`<R
z`<pl6b#-+4a@!q;tS_gR%$qN^o;@L_{anfGYiED%Uw2ia{KCv6S<TIsHHNGnufMLj
zy20$|8Mf=YML7zVZCjnYdq=+TYuU1#-|m-RzB1fhs=VT`#C5}cw<~S_*0{Ys8~wB3
z?zMGtH=aC=-}rLN=3Vvc%IxF|3O=j<ynVj-e4DeQanSCv+_3d=z4PbGg9@lWKR)h~
z?Cj`>aW7_a{l~aEeEqW2)6-Pc)s2gv`9y8WD7-&sw&MB8dv31O*_-x{v(NcV#3%Nv
zyVy28>eiIFv-|QAug2(i^{>v`ZvU}v;_=sCT3Ce->}$MxJX!PqhpqSh?dvZrFX6S4
z>N!(%<HyVOi&Z{pJhQf(xr6(SZ3^>tFCMwuGo9A?-SR7W{m!qb$VkQc-)rT!xnb6}
z)fw^(p;o+0vbQo!3a)mwE`8NzeNu&a$8*KYr>|8^TUI+i!Rh#4ratGIZ8_?u@j53S
zUCb`jzB@bJ>(`&R=fA!R2-to}@n)XJi?3QCD?poE&(1PkoPU4c9Gl8XhRMfLe%nLa
ztto+^W1^zB=SAp<fey+3`s(WY&Bi7|e`>^M{Iq8<^N%=l?32GyR<_bxH}j<r?%P!P
z{kN9r$T;J>GU}Z`-r)x?YoF(*8SSXzW!Ap7WYhh%PEDqt+fA*jjgyOQoxZ+0_<hN%
ze$)EK1BLsGIM{!y*PrMA>1`q2^75L(-6WH8^<0gN$7NSHKMlC>T2wb`O6lus7k8Cr
z+x`2I>^I-;Y>%YzhYuev_<Fgx$W=)3X%+F;{&?69YAXNv`FV5M+gra47?!ooX`h$o
z(qa6<B1r60(e&+x64Doc&1^TW7x>NgP3HUyL!Om)3-hK{EOB@y9~7{-;GgwtR-aPV
z1N(aSx*OL$Vcn>Hv--=U*sCuO%(CBk(^+5X+dI`$(|#sPehPWG)h~6f>Grh=Cnk74
z2%9|N761Rg{GdVD1z}s=-_3q)0_w;|Y;2k^LEzcBx!#A{csqN0ul9O@rY%C5Chk9<
zxBc_0)$2c9i_Q;(Hpy;oN_{{3v$rRc^SSK!jREUF&wja$^VQ{fMl&PkEXg>kaJ_#0
zy-oky-pAXN9+IrOBd4_esTq5*OL4;9;x(x^-9O#?kjo$ZEd5mNoxjC>s&V_3GEIzk
zn>=mhO*bl^@Y-tTo8QNc6OYE5=ZJki9y2fZyV-Ifnd{TOnDvC@URM2kduyH}>;3)q
zM+~1G_;zLCg4&8z7yg<$965cuduQ?UOM9!!udR>w@0BvOsQtAiFA6e&RIab6*m!Mi
z^yi!D^FjNOKrQ2iPOa}28~Z$Yuub+xnak4Z7tQ`V+t!Ji7_wYaUE;KBZ_f0`>wJ^f
z{r^(4V^{Wm-^cqB%inB0_}b{<=6!4nPVPQ0oM9Ft{_4{5HO0|$x-agS`p!A~dd!h6
z55C{tSA0FEwso_5?#sio(iL>ArWHOrIpwCwYumZWEBR;m_t+jg)A&`QbO%##^i<`$
zFDm<cnlG)?c>ZdE>Z+^dd(HA4jF+vmpR|j^N6Fpn+`_2#i7fjqdp?OVYj%D-SM|4I
zN(Mu;2|NG1(;t4!;}3s6_g3b4|2Ow{C8>$G7~WONxpiafKX!(!`|8e)`5ouoK&Nr1
zytuIN&8@B73l=DRdwYBN?CIc@*(c_9_w_CFn`;HyEj>*)dQ-|tp`vGJD&tuX2uDx*
zm|LOL_vGXm-oL`DvpJIUq`tp>cg7_*PW6Pn?grIka>*~Z3&+Oi-`FwHIgoSR-?$%N
zjGq_(dn5hg#`j{rqstm}r+v_LmfUJ8_+8EU#r-u~ia(36PvlDv`19`d`<vCjlsY@Z
z`f}zt&y^ECuvRVJ@7*IVeZ%AY#RjFnPdr{8WBl`x-<#Cej_C)wt&8=NjlVr)fSwER
zwN%-^yz=nYTPt%<S4f4|&Y7kg{VC+?u^ukp2TM;XS6sT@@b*!h*?NDb2g_^QFSS)h
zoiDaAjggDkJb7{_pHJ)R&z1c}>psu_5*4g9CEu~zUHj6Pr~C!lcV~aMvgdQSm&Cu@
z>BWq|`JV&N-`euK>N#)!@q$y1htI9IEZ$z7|7z;lJ6oq!d{n4kvh8l2R@dk2>vqj5
zXeqrsw^2i7#f{p(pREcWI4tv-dFbWk<twAMYVH5?sXH%8N$Hb=kp+03sHi-y=A$cU
zQ>ga(J)ooh->tvOF8s}SLr}Dn?91>cN7ky{&!6=~=GRM$lkWYk0ckJAW{H-63z*9#
z@7;VoJ*DEruIBy8HSO+m52_TI$JZJ)o}ZTQx83*o!uU^Kw-=PihN*AUdAM2ga4di0
z{#9Pj%PYQ5-Nq8Z<Dm3#vDN2aXY4QEf6}8{vHW=Cz1h27wtee~*)lUtwbrBK#wqvw
zJtvx)GhdhPnwoX=xN_X`xSKXdWFx!+R&T!~fBj0=O>gHJVwak~UcYi8A?xTS-NTw`
z`wl&eoPXi*;>wrbvOhgL{-t&y<G=9TACkTq?y0hVo@~tUUCo)J`v1-?@9#gbd3@@6
z>igfHXOx+06o1Z5-*<09;6rmaujHwAYi8YRmZ?9dy5g<}&%3=p3)A2K-lYDa3RIbu
zzP|S4%*@FrPI&aVEzV1W<mZ`}mv{<K(~s}FniXpJE@+YIG>L4cghP{_|6*TyKPURb
zmL~_V7wOkmZWoj;o!BCB;EAW4ef+zl7jv&g=*8t0pR@Tft5-7D<;hWr1MKzxZ|k>B
zmJsz^bW}e6^WKbj6?MPZLsM1@xc;e?oAG*G?eA}(xr!e@Dr{_Rzcy<qDe;TgDmguQ
z|4(4%m*g+yGj#X;h^~46b6fejj`j2HzyEr1?97&%83*s*pY(Iq&!5Tr(%*hwUg7zl
z{oSt1i<Mklo;WVE1g&s`41z0x0BArCM1xknfoTkZPw^LTeN+0h9W+XE=Z#wOrz1K4
z^yQA_Du4STSa+{t0@qn7!2~mgf{zcv%;c^7*F6$l8D{q1wq|$6wl%+}a-ZB6vMQRn
zzF6$ff*j<b4|`kXSQi%;#Q>Kl=OQBwe@Ncr^xyGQt<52;-NyN_*!iI4UYh@I)=W8Z
zK7OzJdjDw_`%6|Ixii6yWk&FAs~4$mvOb3rUL80k+swJc-BBa&zt*n(mO>3>kDV%)
z$6eI<?s@c0jKlAD!Uj9OKRZ6x|K%l7LBUFgOp6sAe|*KFeD0jn_dXuX*{ogBJ7@Eh
zsol>$oV;*-#S;skAD{FkFYFbay1V|Q<Zb(#a>A;s$`c+fC|ma}_V;zY`3+ZY&x?-R
z^WTlZXG_%O{i~<<zuy1(cjTrA&r_0*@7l^>B6*L&K6Zzak`lkpSC+05wi7=;Oqg}e
zY_qNnW3!^Xs<-I!j++h>C-n#J5A{y_mV08w2bY}aW5!=@bNRfEdMTgPInhb=^rQS=
z+%w9Z6LNn&<T@q0zs3K+;d0p)p+23(L9aLe`4sJ66CD2;JdEH~$SCh6dVcm-y9F)E
zEoX1Bw>i9G7r)A?t)%pcA=rOG$DLxn<D!>*g-o73{`qflOnlzoRQZknOP+c9ow87h
z`lA`SO{KNw{ipl{&EpdHb2yHv7bon|tNA6a?|Ji4>xr)5w@Kgrc&#|xx6zAx-k0n5
z|GT)j$ff-9b#VDNced}sq!x)g9Hlol=Kt?5?&PaE%a-IX|44^bGxEgTj^=v7xsRMX
zH}OA|3drnMeYAeEi~5p;PiK5D3b}4bnXze&i;Rt5i4)hno!pbvkN$nu>)G?@!ABOa
zDSHez+*~v9Pw5{W&WcIa;c<Vse=a?n=woy8%fGcVo^D7~>6FM;Vi5DbrE`2mM{`86
zozFxbr?jLul6~`g*d}w{*{cz-f@7bg&xD>XixaHLie6%$4oa0ydaS<K<Kr3Mm6KeJ
zX86xM@pAF;og(v_!u$68_#MsQuk%vu@y3nWcexmxW`&e}je|~Op0Gbw_)&yimXG0%
zmWbM`>q~34RH$t}^j-MMXW1tI{=}FS#f39>9Tqd#=bH(tQ!Y=p_o!FcKC36%Xj0YD
z;$3lF*1wdSXYV^!)&5sZ*Fz=geOZ`(^tl}~Pa8~=J8+)cDqgrlPx#-}_C5bjUXs3_
zf6(LAR1;{2nav*7VgBzM?a;ZxS0~zF;)UFkU*>O~sMr5=*E5BSExwY@`~9ciyRhoq
z`|}TapNSeCl}mnl$sla~?>QfGHK$a5b*|dcemVcntS!=Ia~apnyfCw7W&H9v_fFmB
ztrB>(A$Vt5>bACgR)rn+bwFzX-}=IP+%J+u1O+P(fOCxTrN5mojTX-T-|uYUobWU$
z{pX9Vif6d)`zx@S`AZ)>@*{yMSbUnL?DPYj8#bEM{$!rTH}BZ``ZtNk`-@)&UXpzy
z{o?i(?&!sHj(1sanzTS|{f2uVL>^E3duX0*^$kJ0>3vf#e`h%+%klVYS+b;gu_Qy=
zA-U^cnB0ute~sGn?_|&|zQ40%tG{h{-XB_GB5`nEqxo&WXFOuF>i5m-l$~2U<(O7&
z#Lw4ab$>HO?N%TAbaMJ*j=R%%8rn}5oVA{xUbWro%kL7+()ihDrrB2C*!-OTS?v|p
zhWz%Dhga8cZ&aT5cCnpuO*B`d#EtJS6Mp%6EW5uV@Jp`n6Q0`FZpO)f3&VZI`ChuN
zc&NFL>;K&KYIR57Ka3AP{eVsG<^O%+#R|C^FOploY_|uGunA7AK6mQmex(i@*@NqP
zy(R8#Vrv$QDQsK5fU)Moy8yBO|7^GN<ef`+5!R_Htp2g~M~zyoQD@-R+@6>Jb&p+Q
zuGsBtu+Polxu$=AV(pyva+ycRW!2|AT>C$oVUI~+%j)tAZ{p|0?X6y(J<)&VGM#&S
zXSBY*vj3LLbm7zL#!5DgMsw9al->3^r#Sr>!z`YHHvRU$3gT<{&ZNb2*~eU$RI07{
z@OkTl*WwPV%lD^Tyz}!ML)l#A)60$WvW4=NR)7BH?Z2F_*feNeVZ7b@K)wcF`EpgA
zRThWK->10b`tv<J5V6$!^mgAx*&ZBs+iOo~tIl2c?ZL~_D_4HAklwD!Eao%soJIDh
z^+#4aH~qAIcZ+kTS=m}IX|7eFj-UzQdLgli)#vy1?#q>4T75FYvc+!gtqWR978P?W
zTWK&=d*|l5Y^756Z;K@w>Ls(T>K{IO_VOHO>+k)`g17U=$VTieeI0iF<G$OKbMC3?
z8NN#}+HrMB+R{TW@9Q7;ebjgG^)B&6k)N+JF+`cHn~~R-uyfjbgHszAofAS&Me$sJ
zen+|@U7&5dZWNEhTQUFLZH3FWO?WWP_MvzBhS<Na6QxA;-~Gy4+qP9R&1^?sjKTp9
z_Q{u@c~$+Iz<yv~qh4(L=btyOuMD%{SIypgggu>m+1}o^CTJ<~N7_w4$~{1jnX_TJ
zs(RhD*V9c34<A21;o-h?{<q8vyp}hXtQB+ER6Ip>u2t#bxhazv?|d%!(tq`wMd-^y
zu4ta}>lb!x{!@2?EzeXl{>v`aU>(Mmk~bCKT|GJ;Y`>zoNb+8Tyw-7T)w@66Bp69=
zdw1}bqAts%DMhn)mkUaj=^t*=|9_+|K~di{(j(x!AkX`YbE^86UVQ}V$Vzc(LF)TY
z=M5yJ1NW@3uY0zeJ56o#=IwfB*Of!o%hg<d%Xg;y9CyhPmBQ=i=bKB;v90*V>*Aso
zv~S0A+qFlhM%P`xe)aJ44fkGJN^rjVtSYa*&vXCG*$g*q-pWe(RktS=U%U8h<DVF7
zku8P0UqLKvc*fFo!u~<qbC!EcnX~`xdHI<|`P(~HHB-}@{CSUa);xZ9Pvz)Ct}N?s
zb5B=DXNzp?EHD4D+S$cC=kDZ-96{5)rk=X<aIthw#M*l%5*EFs#m5AT&ej|$x88F`
zf1+llvtwh_RTB}=Qa%wTSC#I~@8!?8h}PfzP<2MWxN_Uw+c5>XJ9>F{UuL%u&RpHR
zEal44veKftC!frTW<OK^TKik%dhW^FCfA6Bbie0|IaTqWr(#A>x>%Lia<`6RiJViv
z4EXK{gd|&TyJs15Wxs>k@0T_~|LybSWmkWDID_|-eOQ0s?(K%jFSu7~{t-B?AJJ*K
z!M0Q*Lw?>R@FFA7R*Pzxn;u=oZ8FUVr7!0iyGJqX`Ztjc)X)0+eGlW=8N2>@yb3t!
z0;)B(@+ouH6-%7I#$RX~mXx*r`%S5t*ZxXW{&C6LY_{R?4cF_h-@QBcHrM{$<B~<p
zHq4oKcCRtEUaGSs_W8pc+bTh6f$A#r$G1%qx4l%2T4MKIU)9jA^#5WP<{OJFD?Z4?
zY+iCEf1RAG@xit6rL0%;r~fUCopJX-Njq=zM!{HVJD**GzvLvvKZIOK4X@zc@c+bm
zcD>#ARwv!QE*W_H^3<C0wf`9#Zb70Iw7}l~X3m^ja>qKZu&=bv(dPL7RiShHj3;8n
zPyFk=YIpoTS=-*RD=j+s;j4AW)fs%M)@q8L+P>iEV$sZBpV)UyeRsBcuKe){%eC^q
zuYJu`efQOt#7BRQo|*qitoPoWN4M&8e$TS`cdR_3GwuDl=5uDZ{0p=$SvqV^TAsM>
z;goO8+%K;^k<_yJr4jnt{`~azF2l@B^XsSPRd(CXk&;L^PP_8F=E`QXxBKpSKKXc9
zro<wM`Gw~73+HV_tv~(L4rPq^o#?)7oqYpuhvhG2XRA7n0~cm8bDBtRh`n?A*;h}7
zKU>}h+zN<zUD-FkMC`?ZS##(3lsI&K-fNd*KkdGf@ErNWN1Oj_wBx*!6}0OB>ownU
zK2}~%t!3Gugc!pESvz@WuA1+x4Ov&Ul9H0-9%!iuneAcwE0(f>r7PNZ-+hx#qw8z$
zR4V1D^!_@#H}UiJWosTSeZ?3)nSXYAty6#hv!&~Q&X&LYy_M~U^yPc?pVwS`xxIL`
z%lU6^%g)~NS$RwL<(+SltDe}pufO&;hJVfeRWDDl)?8R)vaNgf%rE=Je-vd;dR=`?
zOLnH&qfjmJPdnB2zDut7TBhT{yWyQL+o!(wMs4Tr&Hnc+E9>YY-NT$F(hV;EIR3VM
zWLDq!cGh?Eu$L#cJX{~NvxC2oeVWjfS8JvF468FaE<06O$j>Z1`}%ZUZRb>3=ZMD}
z3O`&hy>D!mbcsJd_~R+Ardz#7z;l4#!u%hU?kahC===S8|C$<`dGqFJK^A9pI3<K7
zZa!9Y@ztA`pU%u=kKg=JE%J)jl&fA&Oh2A{Jbpb#Rg3G-AD!|qb=Piatl`w0;WKUa
z`|Qm^m4<8|4&MLoH?7!H#Z0TMMI^8Gk9F_EYY}U%u>=`RynLVEIAOC}(W5J>K0CPr
zkKA_`p9Egz*&W6uFj41h#>b=gCv`4J-S!}TpIJ!chuz)p^t;!~Z?Cmp%v%3{H)law
z&EG%o3q&uRnX^m(*=woXEkA<yEZMK80GgC(d<NYm4_a>q-YyR|8tNk~0)OW3{mqxZ
z>d$?Py5*j;oxHSJPiYxVQQY*$D5W#t+ceu#aZMauT}?(TbqWey9Zd>TG*5`7a&6KK
zaB*b3x%$i|Z|Cr1@B5ET@p5%=h`P7_^`~8vCQqJRY-cG`{k-zrX9fn2Mg|53hKBaO
zN+w1dwl|x0skiL^tCc!w{Yjn09|PZZg{Jqv3qDe(f3<GulruG!3=9nd91Q$s40-?7
zY_0OSS$=#g*R++ZrM70Rs=KRoD$&rU<*vormGWOM-bk(YnAFpHFO)-3fPrC&V+lL!
z2YKUHOU<8W{yuebRZ>CxB%Yt96|Zd8#{6tft1*qrUG?Kd!jYn0`CHMZr%Eqf+g!+>
zX#eMH(C#{^8*A2Ni_iNsaiTsu1A_n)<AaAEPKO35hStO|p3&alHAQ1h%+61%QjS`^
zGOt^{?0fc$sEZqeZQW~^zhIx|`Zshzf#+1cX}$W)P|cew<t*o#=hteTToL4ZyDEw+
z_x1J{ORoM5_@-oNn7O-bp9llP1hztdg@%wrE57{S(q$Vp(cS-b)*N58`#q=r`shXG
zy}m5De)DnvS5a$xUmB%^1x#?Av|i`gZR`43sy7#(fBE2L_okamdJeKq(Z29~+t)SW
z=U$!p>D{$?=9AeEHJ_fD?L6(a-M212*}Y5Vv~r)gF12*A`}xyppF-QtSWeMCp?~!7
zMmN`2PnNFUbaF}0%=!Ore`Ns2)TKE)RwzfkEAg1bb1Z$zMf0Bz432L*6WIJtdD8xq
zt2Z6flU?_!l3B%b+h)OE3(~}O%dJAxmh>oHbM=^XYOif{e7VlVH*4E;F5NqO`Rcyo
zGg3}h*KT!MA69cq_KjcmwG;fNSqIBbuHN#=y6#${<<n4uknXf^!FAqEO5fjKY%^VL
z^6K@^NlUK(-n&lUt!`q@Y~Ek~mp5Fv<oR78%0J`Ug=JD(ZGP_UzgpFM`9k>WQ}Zt~
zGBEt%2F3dgLy1eLpKw-|xys&JaIp0L=RI*>|K8%-xAft=E3XW*F4VS2efb~tu=-op
z^<z6<1s#Yk$_bdyCM!~SRP$w<81IXPPunJOMQ@KzoZp`LeOKl>y=8XW|I1DLarpnr
z+J3`T_e&QXtG^Y$^y#hn`f>AD372_xUEg%D*Cgv;+0?gYj@5Oum#J1CHr*7NXvtT%
zI4s!wbl(0&A6V~KFfCB2HT=XKntMC=@4c*dk>CEh%bF&g^}H^8o3mHvcEIBB){IN1
z*52A(KK)tm8wJlO>wmnZ^R`SDW&x+9CAxmUxoSUt4QcI_Ue)L7rLpAW>v>@Z^8TAg
zFP<gp74`F1T<xE8K9byfSMxiaK6zhbrm<+~^n>}k%*!IA%jZr?^2<7X&c#z~W=$;T
zC6SP*_52#k?;36snUqoYb^gc6c2$d8S6$(_ntp%J<6CobC-TkjojgS$d&=%qqu>R*
zMfIDx#I+xAP24|s!qpkEx@`TI7di6U-j=aRUwmxub?00w-b>diJ$0vlz8|}$c+$7C
ze`7Y!E!%6Gy+G>;*MyX7pTAbE@Mp~Tywup4T{>&FC?msx{SM^;4E^%Xb$4%UcbdP>
z{zc@I+qq3<-`17f-di^P|BYo|{n|?sw%jVawP5+3)5=OpHm_@bzxLc4`ZP0jlFD(m
z(icA#s`b5G%T;N-(qU@2>7$x&xAprJb+2qm2|Hh0+@^5;aku#+)t@T^)=twZ_&Y^v
zYW=mZPJcw_?no$=ZF>4mQ;jF?XXuiI1<xxFeC9P-dfNPzQR@6T-|m;_yxLP2y(w;z
ziQ1OAW>e3-;M=rc<I#k(vQGOiPCQ*JDs$wiG$cl*^1PTD-+ZipTJlK-Q;8pQwc~wb
zmo?|cUHZ>7`962WzYW<%-^Fk5Tiz$yG<C*{sC(OT)ho3FJ10Gv>uDCOclFhuIbwb{
z)sG)^t=jreY3f@mANjIM*Q?!24Sw#qYU=QE{^X;-pH+mF^2W^*^E|oY(b?Zwg~gs1
z)wiVFN#XtT>vzig|JvtQEZ=^{A%4j(?MqW?CnYcUld)g=xBcy;@XW9Bm;EgF{Eu0^
z)~fVdSW?oWDH^F$drzA)FetDvd05XmH+xrN>8gyy|D<*%Z~dj)J@xWFC)r(3FTQGB
z|MFUy^<>MrDw@AmriX4!+G(A){mty)FP?v*tE#V=d0zUqo_*D|>&&JV=lrw6bY9!P
z3H7QqSbNK^zG)ZFT*dWG3upa&95&OuKdd{^`~5?$H<Q2I$obIIFl&EE>QqjX0wV^7
z2KK&cCPtfSG5Z4|L-J2BnU<dCe_>(ZSlYfWsD|sce9Y$e`RWt;@9d8ce^VB@_GRej
zz3abTp0jaPw#TkNyqs^>tqt5(edqA%Syxw1+Ql<>q0`%A8Mhbx`xW2EFq7wd)tq<F
z?AD*sns(aXT=nF};NSi6{y%vyF5mk9N2;Fy1A_wx%LKQXpI6-f<ZtUQ{A~YXMec9+
zw#|BcAYE$vqZhfS*T0{+`j)QUg_);+UtRWRwpHTvO=7Fp1@3=;<%{FnSZz!G*H4!2
z|8jp%?xOuq%6}Z4duv^w;5v(2pQcW?U0q!*{qNad&D}N6rl_2JFg4i!t`X})^||V=
zT=sna_Tc8i##{Lo$152jB`jaF;$ENaC1Kxdcg+f`xp(*TpWlkQrnT2qJuexHW>ri(
zU%h>y+FJW(?{-M8PmX@J!s$i9)avXFUB9ZeZ)^;F$e#aare5f(%6wk#LUsM<PX|)(
zm-vO}zhunnwtBf!{=)H;DR13^GHk@3=FQw3e`Vf%zMXQ>!FFb|H7i#{n_Bx!y7K4o
zwNt5@3*2(&6+aESZJzbxR><6rvaJEf+~;3d^L<tJm5GmLU3pUJCj<$!SvO15&il{W
zdG6|_XIs`^IJNb6>Q?oo8<sm)Wh6~s^1*mnZEEf2$5Y<i)KZJK+ZWC(s(;I)w`)pJ
z^E>4yp)X8%bHfh_f3nN6b=&W<Q!R0OyxkJp*TO36Piak2cbF6!I9-32^9{AXg<8ip
z-`(uar_#F5SUkmRy2-B7OWtk0lJ8+}swKCmk^9^3L-)S@4_@D=dZqN%yCsK9asqCb
zFVFga^K{qtd6y?mT%VQW(zNF_f57=yJ5?n^S00u=^?u&u3;P}$U)7Fx>XkMRJNfCW
zNmk{YxVZ*C|35#Eb7WvB_|N0F{IaKH!|&kzb+v1wwt8J(AHQ7U``J~hd*iPiC@tI`
zzo4gdi$&6uPqY8?*giMe8XVi(>+xaD<JkYox&Jj>_MiI~66<!(``_O^Ups$oS-(<Y
zvC3~t-<&Y_>^Wt!R&PAMNj~2H+hOYGdsmJeEB*3x;gOWv71ghlo<`jF%RldTZ%cUY
z2lFgR+t}D+>ptpa-}|3ks+k^oo~^bd!tdnjO=`OGE6?1v3!1!8lV9xepSPS<o0C<4
z-ruC7XXK=O|HZP__DS!8zi8cXna}fj?W<Pf!c#U3vQ{M*npn9-q@}gv<KwxQKp9_w
zWtm%9_7=V9*u+cb3X}hwJtlI!d%N4|tlr7KYOE}*_pFi++jjTwTlBhclBz<0Sn;Jz
zrwh$kQ@AD_)sUU@zi}2%tXA{NPO*I16>F|(cTIkheNv^q`{VmQw~OI=+UL|xK3VP4
z_|NE*s4x>FLj}VOB_@YiiCcbXpLq1Y|Mg7We@6F@R+S%qBfmo7|Ly5dJrY0r9Zx-a
zQse&ug}<MdOFxybFn`U+z;H+U9VbhJ`-vm|`H(hX=FHge{5vJ~+g>?MnL3Gqq2a&C
zeoy8DhlR=`-cQYYG&9PnZ*JAzvPj9EQWN`{Df2cPoKgoDbPl&3&h~e%zqfLsW^Bz_
z76))TZZJ8fv3aJUEhEE$$F0>&j0_A64>+B?6&l#jtgFbLlk>gC^wU(;tJ`NqZcYCi
z_13B`vaYW2)TG~*Q!6(vD0#Z(-;c`W=UAo$9TqPx>SADEa8hX4qWtVZxz_EJ54QW)
z-?@GN)$2ajpu%sqYgfifXS_Lo^y->#yt)zp+8^b|Tn<gIO8s1ZRw;hn|LrXh2Tdq5
zt*>3+<Ylj3|6V%k<IUHzWG8A=PQCVk)AsA0{f^>F{23pwT#Tweq1Wx7Wf!#Dto}|&
zva-;ZhmSb3q7E!f+Nl4!*|aQG^J>8PKf9dVpH6u?@vYyUHD;T2(#|=8z5JnLk8yId
z)s!=D>crMBu5<J3oTQ@_v}?&cW9g6`5uTwdBOT@UU2sSb`h4)1%%wm1d8ZQAEPh`6
z+skOh#pCv?9vu99>W@#uKF!@;?sLDnZ!!M(YoR~u+}E4J?snN8?c`u!$UA(%fI)5p
zi=>Zl$Yo&$)xTf)4o1(P_36dbb7F7%HZHxT+-JEt{`Fiw%em&0`_9U&ICPXt`^2Xy
zpF%#?rTi|+`daZdUVM3vnwu&s1B2#bRe=M{Ps2CNdv;c5^@8e(^Tlh^v;J7-ySlsI
z-*iQ%{`=mNuI;b9_S~&KXmsjP{>Ax%&%K@~#v0kJWwP8OTKZ?jx%(SVnme;St+AS}
zqxn>!dW%y3ylYo2X8Y`uocep_?d#ok)iKMHeNM$HT%WtU;MU{R-pOmsAK7;2#cV&N
z)t<8H&Qk~Tk5<)xmaIGKr8arWyOlYoM4$XOpPzp9W^j7ROSd^bYVo#Hb(a^hsJ(qS
zQ}=S{^S3W2EfE%9r(w5!`jc$I%^{F<(XP?VC4RcV|Gfdb?ON~oVIe<LH+pjK^?Kd?
zR5;Xf`uZiSXWd-5Z<&nO6qQ<w>dsAbJ1^X7P2N(FzG%<uz02~>8Px8yIrq7&d1K$Q
zb&?B>96hJ+1XTo%-vv+F-$8DK8%u}oOY`>(dbmA3yY9G9{qFmQ?E6A*ipi_%7UTph
zmrAw$y+0v(#=PCmU3>P_ueHnhIPIFX?8#};@o&~-%74A^zp2Ldv|sMb#pb)}mmDry
z<ukLXtHVOWGs`|};je4DOJ+0|hi?8YasHQa)zzy!p2wcEY1BBLb}zSAVqmCvb5U00
zK=Oe#Rm$66OipXP%c-|tEtjL1-NEx&LaF4RW!hhAHhSsb{HBrfDns|qORm<=(`qkH
zPfh(=wZASa_}JC2monoQo-?((mic{c#qzLzP$%Qcg8$dK?r#p?ZFVknI@7&L$<4n^
zUM%R)_VWz7vvu>ly;fU(&sbA_BYVG-uc+KU=he&iJm#D-x$e#?*@q8rZAqNN6~R;g
z`yRLb`3tAd-oH}itXjLVXg6rTvF&WJ6}<;Fe)p}+3V$p6_tSH`xlt>v?z+F&5U%MP
zwLLm9v3pDHMC~)@UR;llUYzr(dfUcB^*q6%&9688`*ub2)<)%+*UPV|GcbWO{&8cD
zu(vnPe!Wl`ae4Z`Id87+eW;ykU>dl2dehXcA;GO@<DYQaw9ewcy6pbGfbV9xSM+lx
zw_WvpeyiuL^x3;LQM>kq%kMW1KIZiPRr7_SPv&0Fd|Yi02Aw;6Ysc#;);B^gZ`k%V
zRBg#hyLXXa&oQ3Ln7Dtx>r5lHtu<_HKTbb-zy5$j{oh>uGe3l9-S{Z%d1<vU`-&nx
z`|Hn`=3lPPR@d6kGby>*YD?1J7qGPe^?Q6iPRcP~xAT6+#QnS9uexvC8hZVKDyUJT
zx;n+=wEcznm;cwKwe@U@_#^tlpM`~ip~ZorU-R;1{mc~G@P^xFWo>oacSRi%DUa}0
zdlSA;?(3B2=TlNQS2Fp3C~g&fanp-!@27Vb;S(l_e2iYA_kW}B>$!Y4`Fp}{`^~*_
z@^Adj3rp6`x&N;!sLazSEpYL7yPzA-<yTcTHtfFjzFPK@+Ord8x+PXB>rbr;Q_9|S
zohLFr<omL9QYw1!Yn{WEUrN<pVy4&{vennt|MjD%$;(BbmfU+7EHfv7fkENv{+$|(
z4aeIfr&@oTJdJmnjI>GlTSw8{2#@RE+p>3FUU<>8YuTS|El&*3Ojx-hYTCz(8#*dv
zf4$w%m-feb(wC&$->+<R+8`9W*W)U3TivriiD#<u?{D`WhA#FBd8?%>qi$*T;``)_
zT2Vh(zy7XSsPghg-_luEQWs3K=BxzQSPHx!v?H9~PnBF4DERSL;s>FxSvTU{ulY?0
zUuYg`w*Gx=-cfJ6x^-g4%}3qs?AA@&`eOOK(4$>X<$g=Cu(00SrlV%$6wgx^x>;H1
zYRv_c*6^=aKb%_F0&1z8f5FG~z;Lo-Y20hwlcoOmn|^w4c{6+Vs_QGenBFI6FFh?1
zWcB@r4*T^t!C&sL5#L$Af87oFj|;8LGTxt@Are<`Ui(F8)cLf}*Pq%Jhi)rVKYMSc
zdvg7ab&hAfPv3vEcJ>?{h!1p6gsf__l8%uzR_B*p@w9DHR+Qm=+Z@Fx^=rF-)rfJN
znlWW3U(K>Y?Y?{4zJ0YV4&8l}-Aed*jmq)qyF((+Kb9|-+41)4>GYW9D-wI=g}He*
zZ9DrnWKs%O=4!9X*ya-_Qa;@go+%`+*3Ax1V*h0?%g(>2xBHfFrMBh;liY>R|MHgq
z&75BUFEDs+nWWFcEYV5P=R;OJtjxW)o7Xh+ui<qCgRbivb&Eq3OLm;vYZ~FI%cl8n
z%d&6P&U3B4UTcb-{_fq2J+<!&a?DHrov+lov-;<=)hp~+W7gK*K6dWH`-|J#&Pzv`
zSI<&e`ENqlsr+pxt1f@t_4rI>Y~lLS#QSx!pLlp4@0)Ua&b(^}?fv4Wsl-~=d8Dr`
z^Mh5XH{R~rkUw`{y!iX7cQ+QWde?`Q{F&SAxlH`^isjZCU)M!@J+of3``hAav$oaE
zUl;MXH0}KNSC<Z&X8u}Wc~>N7ijSpsR?_t6wy*o&Uv)UzZ5FgxY3Y>(zd0&Jt9+er
z?^J8;x7-w&JpJ#Ij*e0(yW-2T&pD@9d%WYl|ITvT)`!O@?UgFd+ONd7lWkq6W%<Fq
zdwa6}ADyoCb{^Lv-}#+ZGP#rAT#WjDRiU@5V|w{t#;WCsik@nbmv%F4czl56>Z>pF
z?B3iB33a}IJ#nYggA<H<Lzmom{p+s4f|_@MfzS8;-MEiSdac)2>Fn=QUCuc}S`i#a
zWG_^!#sypztKAyxxi<E5ds)cONk_LP{O`5iV;R7D`p1cctVO3(js8D&uf4RoRcmiV
zkg4<i>ld4PzaHg}zPhGB?fYuCy<gI1Ec=o@@eAAYMA>P<U+@2N$$q|f*_(IwChwbB
zT)k}iikM{wWS?{PSbp2vC8D{KPkDOPG~ZWO16;dGue5Cr%bkD2H}&<GFEYQsoy>o6
zRLOPAmw+B0%Tupcmlj{y92a@B+$Ui1C)JQ86_sEACH3CoowZ0;erw5n=FG6Q+tn(b
zxomOQx&FxbeD)>&)n9j7->~9)8?x2zZ0LbW&Zcc<pII*H+s$41&$upVZp^+p-3$x|
z4nHWdnmdJYhyB;r*RQV%U0w3`S83PB1vmX(>=l=K@gv&UbboT^6y~&-&!XOytT_0g
zRVqxsT`#_Gvd_L#CGDNJ-d@Xy-F9pzQ@n%P&)NC$#&%X-)02Hp%~Jj_(MM7+zxP^c
z_8gVTIy0G4JwG}>E>(TCD`k_0!G%dVGtZ_31|$T<Y>n%@^wr{|e(;1pXXkinzPzxo
zSxhHl!HpXcKYs~LVr1|aTh_twA;M}~*0yQZvcDFkO{|akp4Q{*ws?P2)1Uu^v5Sw!
zJ5IA+pZ<+Mi0{99`I=|DZ>XPKb6~-O-4SY$OwI3ZG;xBvq#x#Yck^6tZ(jZ)Ex<7J
zPrTJxSu20Lv&Ua9l_@Wr`RQR08#p0MSS54U!1=V2l8S4_T_ex9rJ~=ePTVg$zQy%^
zz?9wQzh@Swr-Uj-?Pp+Mc*n$in4v<)=5tZ@^^BR5QckRYxcT@BpM+DFo#sS-Z}n;^
z`NFf~Q+xG+so%wA7EEAZVEErK&4Hnx_5SksYg_j2PThLUbKkW6k}^kC(@(s<wRLqi
zSGb8@CIh(4SlJOMcKUAVE0fuh--H+#9@Jfs6#)$~&r{gK!t~?(jmM8g(>LEOi`*0u
zl=Zax`p4MaOZTWwHUDrhZr7V|zZcIR^G(&7`JV4H)2-5P7wlZ!T(tx5ah>_QhU@vM
zFW<A>=U?7u@26>H7<lg`)71SQiLA5U*V{i@8+P+wka~Rg&xvP}IUu3tAT#5Bysq2d
z896gIUf6wW&xW~w@;T#|m#q!H@nK(7#ffF*&vF9q9at{s^P}5t?N;vPFW>q<-)V0?
zKWNt4^L0PVYwv8Io93=^YPCbAZSjkA*=fP4yL&G#-<$pSscqTYpLtDA>^_rJrfLQU
z2nw>RuD>L;_k;0OO@V^*zXi+RuusjpVE=RW9<S_o+YCgvN?1%}U|>)baM0SxF!QqK
zq<PuqewXjB)r#L8JL%%(U8R37MDb>P0=53;eg82-&gI3<Rab@EU%l18UNKE#-Y3tU
zp}Oxjw1x6rbz1Ix_1F1p=?hg=Vz2JGchxlKll3a0=W9QduYPsjZC~<fyPy{h`Kboi
zF3vf-IONLx1+S~cRyn<BuiEa(Ub%k8>Q_5!-e=m^e7<^do^5H0iP!RLz9Dn(#d~fl
zU}R@t_}*I0)L4Ho!Qe#N=9hndeinHge`2Qja-B&%Zi{=it0*MzT9aj*zckFJ@;PJu
zqW(?dxhv((*M`hDvMY}%zTeat^`hj&>I>Pw6t=WfXS$jGe^C5%y5mRV`xoxq*V`#}
z{LR%H|30gF-c{SPbkV<c2c!b_tXx*2t8dBwdam8)(x>yjAJ$lWQRC!_1#@m%CjL`x
zKfF-cy|3aQFEeO#sNlMkS&oM1s-UG^e6m(9j_<;obdMgb+u8N%=pjqdDTarc|2cGb
zh1LXGGBJz3C~rTVa3@h$VsF?akxNQ0k$Ha^6|xJrau;s>-#dG^@5f1fYk&H^p35iM
zQyQ{1<XXyXt-#gGR8H0<Y}qE?zkB=N9j(2apT>9g_lNH;d;8_}`u(fY&d%yF;s*8C
zcbLz$F28m?zJBfH<^HR)uCBVZF`3=9lH;9B%_C7+*}v2Ouj%UFt`?oP_5SbY$L9R;
z<-Qvmc-m(E-)#z!3rbWygC2dlYL&J%>-n$uDxSwK{SWpl{v@V$e@<)Y(FePp{FA6_
z(f?{+@b;GJ)%Ef5ppCne)qJ;<ybS6Bj~47GkKI*r@%#Py^*3%rTwNLLetNq8a*h6w
zEz|Zzx;{NU{rjKrhfhyhRj9<|Pg(yU^4i1G^F`|Wqx-D>wEy;sjBIVnmp4+|5m9)Z
z`8eC(q8}aqBIYZ2YQ-+R@Rs?^kJR9qm+nnk!u$HW)-3&Z6RnD$^?>d^T^qgq)z|Cs
z*Z=(doXN@pnt-{pe^=@2tix@*n<8|wZf;t7=+GgJQ_7QFa}O(-EKRRGKZ$Me?3Fh@
zU;Uaj{m`T(2aD@2HQn=U-hOB8k=YyP9e)`lVS4Pv>*-&P%v*5!UAx%)DQ!6)C(fV0
zyYzz6va*U6(Bj@xQ?+0B+yA?8VxqEX-Jgoe%8&c)L0Ra){*YxplDW6H`R*)!9=1Mi
zuSv;^fUZv`Bto|x7brE`vFIv){Mk$WtCz1lJ*76wpl0DL(bK;)${b(5FN@W_e`E2c
z%Fk(`>*Hbr0|RH7W>33U{oWQFut7`@4xXHEzkgfq?PdS}zOR=u&6-m6<%Qzx-i0=D
zy>DLaDwv^ivhaiS-ml-(KdrNO_$wv0$i#oe(Qfh8j?HYFZoiG%nACb}OQv%mc#wlX
z?%JBjt%Z->uC9p;Hcmem^7+}>)UB7keAf6OnzP_#?W*c|+t-C#9zA*g{j{@^)!(&m
zFP*(>`PGjCzB3FQ-`?69e5^-uYvyIO(DiY17lF%nxg?2?pD%ApJ$+?gZS~r?y-{1U
zu3kFYEv|V+Ibg+)&GU{MSNmP=te?2e$Y$!{&lYB_Z*M$~VLo_bLcC?Ok=F8<?|&kL
z=X_FfG&J<{4Eo0uneuRTmBt*u<@f6U*M5C_JN)P8=hwfzz5VqNw|;;Gs0cl9INfNb
zN$szaz5oCH{`GWvyoiv{qU7U!fiv1q)<p%o`FL`%Y|Fn=erntAdocn3n%1%R@mdx9
z`u5Sr|NpkjvP{KV*#UZ@D__t0Q2YEE^Y;FKlErR<*21cumY<)cB+O`6P*ilBYgKw_
zN8#g;Z8?#x?d{tB^K3-eKz+Cm8NBj#G4XXjm-flqubVJI;O(ug-HUAIu&j7H`ToTz
zhbv899xs)O)Ss*Jvj1c3)(-(^YI84|$7`NXFc$oM?(fsk)qHR7sCa(Lyyd}qCT;4v
z+xNDrTi0KlS@AsH=z-(f^7EDv+nEkNaqRBv3tQ~gYf}HO#x&!C!|Q8ngO~Zt)ZhWt
zWDoX#yp_E^^Wq}cuD-sowNYEQfHoQz@%!Afn|Iu9*_7KC*1ey(UN8P#=vK$WA&Tx3
zUVWRp;L^KyGo7Pq)?U`kw|tq{_1Adw>T?%<O<FSFZH>=1-sdytn-<T_es=I+Zv2$M
zplg>*LH)etwXM@~ttaa&aH}zopY$a5zjElQht>VR8?7zN-^~Ht@4_o>wjz3aUeun7
zjhB{syC;H^*#EU|y;3ii&##+wGbe0w+F6t0XFgp;;g3o`Jh%0%|1f`N!ELF+^uwkL
z&bk}TaQYgzS!t4r^Ri1)7MohNH>mnuoxQmJ!qokDu8Y_IobYXjdPVu1or}7|p6A}y
zyRl%mm2^*$n%~v-$=i0am28|JeB*$fmCo{cugx64e%ke%`^(w&_a5)^oRs!OZspqa
z$10xb+s+5QkNmCTT-W<H^!K)(T<_EDCv0kF=U?SDRqNN^@ApkhUIh5gGVyd%o}0@E
zDn|P|xkR-xUSC^#YNB#`)t46wCn~!KO|jnb=d+BcW>C;V=lx%GW?xyouuDX9QKofF
zqQScd-?c&)&%S0YTXbvR#qZHEZ%Yf8U(1TvUpeo)?2Mc7Q=@$*tPXzediGUpkZOG1
z($;r+v-+5YL*5@<q;;>dYf|YS6Smdo&!7Gpclr62+M00CNVscP`1KndET-@OZ(a6%
z-IO=~_RUdTpMCVo_o??c2Cl!RvGiDP*UGf%hM%-<xJ(W^6ZBMOyPrxu|L?}zIrsO)
zhOQ2~dUJESY3ZvFUKxuCix?OR#CiOdPo6)g(f!Svo10g~?%oEv*SPfEoyg?leYHx_
z`h4g9JNUNgg>L<K|JCLTO8tNTxw*T(`m;6Sr1e}Q_NM_0p7PzFX&W_h`8jQ~K(FW1
zBQ1+q>?RycNaE8Ax~bNzy8cwuhTJ{#HcQMqQSV&ex9|1Sr%PG6#a8&uwTg*gVA$cU
z=EY!BWm)v(!n3opzn;=wpYiI-N~`*Rd(zI#I9Th`Z~1>g%qRX`r>FM*J2mzC|Hk*t
z?{&Ys+pS#YGjow$?Jo@jg8=QYH32U#Ew!`+N7G7$fFHlJudWJxbzxyM=(cuH!T<13
zD;Ez>O;T1uce(GeePvG^d^eYbpZj8_TJ=0|tJeAZW%ut)K2!H}L*1=1k5AviQhv<;
zJ}D}-nkm~b<%B@5oNd(4Pfstuy1M%7o6YBoniv=g`d{#I{W(6zZ~3opZ&&-<{d8gF
z61i|`srT0}m;LPpkJqIg5BmGzb?VO_JDyc^_xDf#Uwrk-x?`WuEH3ZfaiYL?*5=@6
z44a=kR+d*iA@gTal$~SgrkgphZf;(lc7ER4pru}!KR-R?;^KPr1l*Bc5;wp0+st2I
zUM|kLxheDGqoZ1Jdn{ZTRgNAxvch+^S=8pVUeIwBtlVNJk`ncJL8YOCltKN!nytCF
z!_qcK7C%21I#nxFgKJ(!U|?X@ogEwN{(cQFeRCslvbz7YlSk(AFfja)_7P;XahqkH
zzwYYl@Y4wfFQ$gaUEF@Z&O7kK5v!^%D^`W9TohmbH<X!;Cm<q1f>8ie@J;v@vMwev
zdV5}IK)?jhHMOAYbQeA7+?;(~ueGJ+#g&!8vn&c1wed>31u`&HIK1HFvRS62tPHwl
z>h1mg@wQc8F1)z7`03N5@69}ZhpmlTx-IwisfPtGZs+e0J>15t$;H6HFHj%I)W0%j
zXHn+qX}YT-HZD^0o3o<q?JZ3^yLpEcd}rI$?wVzuuNNO5zczY%9|H%d-mTb_a%M(g
z_Vsn4pyo)*Ng+_{<odeU#ReXe-FhSz&a<u7vapDF`0$}t_&OO^MurE93#>IJa9POk
z?frO6`q!gw{V#Wl&u6~8wDi<WWA#?YSqa;3e|<JPUrR^F=V-V1^;1)|r`lGRaWOL-
zaCY)mm>~6UVO3@3pC2DVHSxb6kNHE_#YF!8_I9y>$JeCf<g1%fy<1ybS4M5ky0xd$
zm{EYCp}l9CgTmCAw$<CL%HC*nA6-=W`Ps{*)8iItcuduc-L+(y@9eDS=jL+p@?PDN
zdHKoPx4wxC3>7CbI9Y;bn&sZgczSB;rrU3`E-Yvi(~t8BobYOnMWGWYkDr@seRX?&
z{MlKi-H^aIaR2A#+cC#_B(JWEwf3E1(6~t9>W#hC+nt=8O1``Z?3Fh6TkPIH?O?(J
z0|o{gr4n}5ppC-nek;<>&ax_bG2vdtV_sf)yE*pDK1Te0xqSYWU8UKoDk>S@-rUT(
zwnh>Zc!G;n1w4*^dUA5{hYtl)b)&-`AL|WX?l*VJgb5CbA0lQMr}s&j=jklIxWY91
zng9bhtL~7WX_R_tSLy333mlox&bN>6l`;(~DKTk1n4EfMhT*GQTUUc_!&)1?eO1s>
zFA-LThW{LQ9UDy!<!yhx^?KapCnqO|ZpjGLjoj3-@Y~1v#TGJE&*zq#WL{FS{r~54
z)Yh!2P0h`kJPZu~6{DCMO%7$4WL;X~DP>i%qVE6S^3at*uI&7BOALOTKXUZwO5fRL
zvrIB4X@#y@vNn3V5CbDa1<wT`jwOzXiHWbSt)2bn@87Kj51pp!Mo(MxVB)T-ud6^g
z<->;p)BJle#_8v#+{{@93Ou$9PL@jx{N`G{Y?rSKsHxfW>e|}Xsi&s}ir5$}_n#lO
zHS4N~oZLF|dlk<2>wfD(a#2J3+OoH|Zf!_x{`2S0l~tkIzB7$l7d=>bdw2PIt?+eg
znA!PWJnpx@mOj7s*cMd=2Ai}Od|XQk-TPz$&(1O}eRIQb@9%fJrEDq;Tn{>fPS)9)
zb2Dh;#*MS=YIj9#&+~<}@qXyLxVo;)ySwYx&*$^6F7uVH{rPmd(M%r^HvTh*TDh+-
z^%np6>sQvvNvf-&wrYW5)zGy(-~p$Ah{zI9jb8Tl)-2oVZL`dBwN_vCO8nr^BX1w~
z_V)JhWj-^vRDMolQea@{w~S(HRPQ*O7Q8+0u1W4Klf54faf8|cO-)T2Jny<MzYO{R
z?=Pr@EUFU`usVEw*t<JBL2(4tcrqm@B4P#`kHi8{QKP7+XjSz^qxGPzw}(eSJD;pc
z-Jgo7`tflOA3g+4#07DJY`YR3U%M2PpA8HGy2bUwPEJzQ<a*~-QnDp#OU6Rq*=8?6
zCD|Oy;wgHuR$R;s4qA@h3Ljct-P^lcOh0bTj2RMFSA}|mHq>kUa0gva|Mk^XP+Ruv
zi;K>nLtGe`KpnaXQJ~}6USC@~dHwqJptyf|d3mtNpS8^Fd@G8cp1QTWe0>|g{5noy
zwG*ei|MxO5Ot^JHR-}PX&Su3d)9hDIPEP*ypqc;6zu)hJMO1|4<kqE~o%Qlquk<Xd
z(p8{q`5<xRarWF?>*cq$W@n$DXZz~P%E@-W-xy2wx^Xe<#_kG{wJw`-GUd{tR&G%F
z*~P@*pyg&S$1rKho12@nKR-LW3UpEHp+i?TrFtJdb}VRy`kf`7lSQPYRy8y*<lfma
z5mFjBL`_n3W?LJv(P`nrg;5)mT0wQk>FN5xGt{pvaBTi^Ejs_@%=CE|x8>fJa1;ZT
z@(o)~+`b*XIqhuF{e87ti=X>}iuNZbCyPAeoHS#`igmHOUrqM63w(KLsi>UXx|*M#
z0wow2E^)l$Wcje&?*E_Sy+5DL=HlYI^yp}J=pq-+u1Uv^9yy|6ViE!>pYQG}HLd?w
zv($h7yhGrg!-pji8xk6CZOK%YmzO^~)7U+9b=Xpk%qy+j;#YPQCcnM6HyYHVUE(=e
zLWYB(!KCBe0)}@hA~&aj>cJkj#h^peb{0Pmnqu<e#6)FK54`OCy|s?bY?-H~Xo8Xr
zM<c_$kf~asm#(dio@$yMR{HXi>-?HeoUWeE|Mq-0HZlsDtnPnpf}*nq7Xw44#9c?m
zJ6l10=diU=x3=fckE{I}+AC@778mzt+LT$KaD01f>tvO)v(5d#zP`R(gMnd5;h(Q;
z57x1=vuD4)wsuv>%1LUzvzD0U-qO(1>pQe0C^0cH>*Au;OUwP&e|>#j7*ZS_c+14Z
z^yP8C{i@2(&!TpftOOlz1iBP%M#!`O|K9(<vN~Mf_V1U=T>SjkyY=^Zfa+7H3$h{y
zY74FAzPh};A9Tm4Q!5v!%HrbVTV~L2x%=+AdwZ+1e|>pb^{`cZOVLxW)U&guLPEJ=
zw{hAT2~bb!=H~S4<@alsE4g+BOgVn#>}>O^+j65p3m8nZu3T8;+WqADb7&j>z}}h0
z>3yKnA4^_d%DlIya#hGmA=k<?F}q4Ud!@~<t&81#Wm|5v?_8_Ud-v{%urV~K3w&o`
z`f)vKd*0fq+To{OmaIxV+!nPZL(sLdN5;M`Mp04m*4FIlzrMa+KGQfIT<$hq6ykW$
zcyn`lf7aDiOFut9zdCri->KQ=`mR6wj~+Xg_36pUT{S;99qX50KhL&0NCec}X>nlS
zKX-Dndg$gfUs;=q2}w3;XVNx5J2P|fv0mw59!R=+aP;``*W2&cO+Nh4W9#vPFE0XH
z+uBwIFZX+OZEf_be|AOB&#le5xv4OTiGjg$Ql}6@#f39zo4uy%&0W=hFlnR5q^9QP
zna=zS3`;b5c$gU=<A)#$v|<!&1&AX6UJU@6@dELPm2y%5sRk`pWnchxH$XhdT2}@V
zrHHkWpgW0njGU>kD#M2*(&l+{>V7_*o^pC!++HouU+;FmFL`vNGi811i3yI*Z9IXX
zuBsdZgGVW3p$vn~Nl*^BapOjH)p>vWze^@e5a?oOU<i`k)y!1!Xz!;}+TXYATfcri
z=p=%N-~ma7pv4z1T&RxPpL%+l(d@G#eBf1d3Z|<fH!oviW`1Aw_uAU%U=wg$G+a5A
zVzjdI^Rx1;dDC<vcZdr>6mQMGy6S4|+UuKAy+H*l8v{d-t{XcmgQwr};GIQJFWuan
zUb_7BqiwmjL$>8aa)ITem>3-<9ZU#V9k%wL8G{rL+tYJ%XM;*pjz)$FVRteQh%dif
zd46#R69a=y-wQr2hV2>=8y4`%+s#SpesFJZ_18b2&j+h8Fico=QHX<KI|ncCUR{te
zqMAW1Fk`MQxuQB{-2w&%1>J?#8VrA2R|YNpa$0}CO17X)-Jczwo}6?pWMFVu<?gM(
z@aO2-SSMb#X3*`#43OznkJVAzax|L{Ds+O(0L5Kq+_yJ3zkWKcUp>z}`&!Q;28ITc
z!!Dc*HXMb8g;6_;mI?_AUq12?bVRDOS<Zx;IoCk9DxUcsvL?b%R4b%ISd5`TT>>=p
zt^n%g7$zSJ=r|s8kcqYA!2?GwMur2zJ<}W*_>Y{OZLaG1Ui{4RGj?_&PYl0Yd(U3-
z^HVApBd8tDAUDS<(pbCurjMeW^Jc%ffhwsC3=cYwA20x=a0d<+5JfO463$HuEKDGi
z@MM{P!D?O%4>l%pOxo#Z`ZLW>U-g*Q#~*e_^KR>=H{PmRHD~&|^AUQ__y6HY{u#!#
zCep_(dd?fe@Y3ST{F~lc`MrgDM|J(Dxl=cLo6hXnuD4b_{kZfd&1N4v_tNBJMK}K+
zP5)6QyYP+Z=TmiGSM4!+HqmiXe5cW_PleU7_p_gSGcYjtAG;qrh2cQG-ipSeeXqD0
zzO7~#{rdB_)z5S7yS6;tkaA8Tf3wPN&tvr;1=~LDow7G6&N5%_%1iV3;3rLg--Y)W
zrE*QuaWqOZ{16LX9>QRg^Q=vO_ioi2rEe!M-1N>$@A%Q?T_3+H%$3<6WaeqadUDR|
zfBKPiQ@eNDJ-m`%<grO#dfDa6VlN-PDcrT`<T{<?W18MipZ_poU~rh_ULL^k-*SGA
zf2Evs&E);h{!}j0Tcf{DCe-Nue|Cu??O46;*BcFAnof#lgRKSFG<`?nLLt?q?@j&c
zUM`(+q+%bWux)5RD>?r~T}p_<bi1y~_mfk;|5Me8Qm&r*&4@L9g3@~R>h<A`U0&f9
zv!`TC7D^J?|6Bd0o*J)wvQB`<xgBSIZC<?mptZv>oywFG8aMgpPT2XHBUM*a#q-m-
zZ#{eFZS$V)e<HDT|NfBNpHY4%c75d6^mbgT9UQXki`miq_*E?)-=9}Z`+cJDS9s+1
zzY{+-MtwRFvu{VuyiF%&Rs?stf77U2UmYOjJi+HD^Z!q67MAP0;v^?e+W7el<IS~m
zj2Rdn{F9vHw|ue@!;kG%-`=d`6jsadUTSnI$?X2+YTvJHTH%`yc{)CSmKpn}WX78I
z#gp~IB8^T|emtfdzCP*HmwMZ$@^9*Ie-rw)?%K}&<);%lf|hN3{A9l2)AOY<z0<;v
z-M;_+3hPms9h&#|_@BO=Gv)D@vRLc+!Hr9c?r!pHT32i8!s)t8Bm3x{lZ$U}%e4A_
zwg2R_rSGjm97R8$RyZk_KmFpHU55=5zt_x_-0kv7YjIBe*FdY!YkwEUP2X?5^g#Hj
zieK-n&*g_dseb%qNB{d3sYmi$XVsK^yrk+E!N4#t;f^rJ1N-#oeIbSyw?Ena<sg@o
zfv4r~{c8E;`sY#@FRbp_<o`<le?t9Dzx-<}zpX9V;~XA)t2l7dli(%Gn(W_bSS~Kv
zd0yJew5BWRuC47`^=q0%4+Q4Q83(1S)hs#TnQgna&-{Ap%PR*It*o>A_x$;jQp9e$
z{=p_LovF@+)!Sd*I)D7>l#~hmMrHEr)`))AdU&8LHcD<C%lGsdr%i*Zye}yj$4&jM
zm3{QjB%zY8pWj(sUE8Z9{^#AT>q5R?LQXQA6kPauec9Ekmpx;Hgq8PQ?o~}L{ktcd
zKY3d1y2;-q86b;e<>b%ZFWLCry=&*Jnpte~o%A)Iw-~VQ`{cCg=`{1(r?2Z+zFl7W
zu$gn6!1JDqy&K=Zo@H$y`T1JFvR5}%)2F;???3VI-WKDJyK<KEX?*-{yGhnc@%F^*
zc{|zXUhh9y_o_kYnqTmhhVP}HmsRilJ82QymdiW)?XO?``2O{$iL>X`c1D^-rQ1m@
zm;QYG^`GzS1l@V7yr!M{IZ1De<lj5}Z#KE_JhUeFaoqI=bE8x0;eOxQkG{BdK+~&W
z{oB}0D~-NNE$q43<8{Y6UR*rqaSS79h?gN#gJa)Kx94?#LbpY4THyFn<id;C)YqRj
zRXgVW6fL@Ioo>JFvhWNW#jK}#=P!TJ-&fLaU6^ft&Bv4fvA%|$(aOKNAA@>cKQ3|J
zC|y=P?KZ#s$1Pe?dnRv7wXX`{G=IA({*L<UYU_LY&;5Qfy}7Z7`(2n*v*G4H-cP3*
z|5|Xaeosp=bSHsr;r*Vll;`Fpndf(=oW8vGN^IlR&z_N9C!h0em$5Q`8?wi`Q?gj>
zUwAQhmBi%!g;OOe5B@!&^7Ee2_6Jv4EQ8N%H!fY~F73MSbLXYfiuj#rXF>}$f2yx$
z+@>qNP+NUcl(g`-KQHo3S-_1UkGGpdrteHyxPQqqjk`a-=+6E5?t4o2U#%(ARqhsg
zm8UPs`RMuZZKBWrydC`+nQQJuTB@H94Vb#9NbX1A#N>VZlQ(RZ+x4vS&F7%iDc>cS
z!0mt~_qs1|mrhyyo#E&6{eO$5Y)EN2muES3OI}}k>Li=Bx&N<Rdfs96D`LxnpC<CF
zmRpBS?!US2^=A7EFZ-9BEK_{=o_SS#XzJQokEdOj85#Mj{Amg#idN1~{;qg4HRRa0
z*q|EQf4A3qI=*|xYpItP_GZ3v=d$zKr{fPVR-C@)^6u@mZy(vd?f#tTBbLuG>A&~S
z*xUOn9j;%KDc0@x_1XA(1y5bos(SXH5^s0?x@vK^Kz?=jI;~Sd-zEJdzzv)Ur($<0
z%)TpHv}s%Y&u!E9O7B@=|Ng}0xi%W@#$Fe1$2y);kZ0YJF}LRLGSer@*B9u9r|XpK
z_xeU1{5PH7WBQf7SJc;Qr&NaLYZR|5ssAmvzQJBSf4gVlcDJ2JM3_>oD*sk8JYzV&
z@y8XtB5&=qpdVlIzO6H2o1J3)Veh)!+v~jJ*MIflOw9bZtHv@J+&ptwRd(98=B4v4
z?TQo9Ug0Y1_iPfG65sB4D)O-D)uzjJQGv<RJtql0t67r$?&lYupRcu|zq=WE7+rm_
z^i65(+PrJ2!l`>-aAm|jeRcXxo~+3;{k32G(>;FuIl(=z=Xva|rJMMF#vIIzI<?**
zPV&D`rrTzpcR#sOIf8y2vb?KkviD5tyw5qOVwSH_e^U8e_X+0$zVhH@2PM|XOjg&f
zc$D*WP4pFEyUXShOKgk}J&9kwd(-u^`pflR9&-O2tN+g|bo#fL!umL~RN2etjmwo7
z7!Le3u$b$op33kc|LdEZ!NO`j7p8foPC1&hZ~1q&o*9dKZt7`&n-lo5+4$bKU2p%$
zx%Nsf{n5QWW*v{?r}v-xjKt1QeREwW{f}zw-?TOB@@>>4i?l!Acz9~U$8Xl`Z=Y{}
zy`}omSDyEN0neUKwln|sUHOylubfTQ-{yE-_{#b^B{R(XUj5B!j~~g!TF>@wJjy)z
z`Ry0BU2jZ-`hw<8T^(-fe|rAqI^XNh&P@&9n#XnGSya~Uys4TBns<-p{quUhJZDPy
zQ@c}FH~M@^-ZKB%O~2a7{{4mGQEqR-mYj4K*N=VF!pZ<@C;pG+FqyVWYyOV8U3aB*
zBR`#3{$sOIWQ~fd#>WGEs&4mo91hj-Sf*6@G<DjX!a%P()qnPVyC3nzb><(H-xE1*
z>e;oNUZm)8PxZ6c_IHb`XDaG>aem)<AG9vHV7(8ht#kI@XMfM~w~_uDihqvW-Ckt5
zIOVtPtNZT{zqiiY9s8lN$W8O(^A+`RYzzz^CKUREdqM(Cpf%MDAXC6e3)HwHueFG$
zNk~wtp+0313xgbszW()mFHv*X6FE=5FFhW(=kCHYGmQeFD>vS6&A#q;ykCC$-8_Hc
z<HEwii)F3LH1hB5IW=8Be%8-<pd%SiaxokbJ_lI?#4rgouGb}|y9zWWabkj^2nz$l
z63H(&!7W%4dwY1ipeR6oprHgcD3MUwD}}fd#aP0Td%!ulz-q4FDTW80*H#9r=ib{B
z`SQ}z&pO^79v6Ooel98?u%MxV;ryo?HzHVBSzpf1-xoN~rcy%=TxmFD2@41;c=6&z
zb=CQ4y3rvEomi22ms_*1zgu_RRD$Q!G~L;tZWO$O8MZd+?$=#@%Y)ZOZJjh-e7Zja
z!vw2~vLXyBPft#E2c779H%6%Z-JO*|OT7Y37#JpKeGE2Z@W{Hm>#IHJK92?I^D5h5
zow{3heWwR3zr6Cog@7(r28JNhUCm4l(G!(ix77dtXT6O>&Z5A9m0N5{1OtP@)kUfT
z40qmLQN5C2u)?KN2sBjXz`@e6MTF@;U#ue|gF}>Kc>u!>*PTUAw<I6$6NzR~R#q03
zk@1-b>KD3sD=^$qTB^5@t=Un3?-wD^(6Rs%qr%$Ei;Irt3Lm^IDgv(f6neS1xPq>(
z3Z1NLJktj>fPWNHb9r?CvvTk`JIhp5Lc${vlIt!lI@T+F`uOqZ`Ob<^5rO*p`m3wM
z&wt*Nc(_f3m4RW2B&cKMSY$O<$}DHbv-9)y+Ye97>VN#}>+4A>^78Ui^<u3;>(yDg
z#jb!>p`4j%T-?OO!0?Qd<--FNH8s!x3}{^W(o*l`4Gj$;vT19>R@c<lrW!51cmK4I
zkWk6fQ=*`w-lO(bZT<B0^zlwE1_s3m0vr$S?=F1Y1{&?#ntgp$#m7glKK_4mx3VHQ
z_20jb<&&0WDok<OSdut#Tfou=)|)AUj*N~67#&?5k2qdDn0Rr1S6$)#QWjR($UBVY
zE~&cR8Xap+DP3T?bSNNd*99?+Mibx3fBt!RM4K@RGQPiFde-#q$w+&d^H;6U7ted%
z$gZ^~T)lp-s`s=HFPG0Rx|urNu>N1ox_!U0VAnr9dGqwilY+;+=AT}#-+yVLGdmlf
zOvRm;>QA7-FxkoK{%RH$GiDm6U)q`-4%c%g_x`@Q>tc6rs{Wprx9?{f@1i2c-*30y
zpJ9;5<hOh>==}TL?{;~E4pILpeJT8_Qqi@rwNb9K&2n#SPUkPbU%Q>@0qZ))X0{$V
z+gTSb1hDZ+9r4!Ry99Iy_#fVtssS$NPQAIgdFR`0w?9<a*nBv^98-EVRO^&$(*J*d
zd!)_#&YbZ9-BSPK<KwHfpj+rcXH-2=^PhKyncv3Y>8YubhDj}MbM|mp%J8ZA%s8+u
z_x6jsyUpj<{nAuaRD|7L;Nc2709}922PQ#5!5%5ou34tpt2D${Ub++nTEGH2qItGi
z?hNB}zN@Rl!$C)=i!iym)HFWa_xs)B!~FJ7KAqN2e{y1COIzD2198js*H4Sb*Bo5r
z+P&jZmv+RC0!7fp98hgf-oN?q@bH({*ZZG5NdevIu_5vBh6o+4Gpe8~R%$*T6?buU
zoj7OCnsOUg(CLb<Pu^$U-&b4m{@&d9x}UAva&H%XzgwPcG&5v|_k~+qvn${4{k|b;
zE$Eoyl2=zeK_Lw~&|?4OFJDSP8~e}AwSIYVF?;Q=FM)x2h0)vdp1$3Ff7zBTQ$T0)
z|NHxUHRyhX=@Px1dP?eFKA*QQK5Ke?L-ut&(3}Rt5A(vOQ^Sp_zGP^HuR9}Ke&?Wc
z{+@-P$cah1BBY^o+|$o*S^oWfjRzBEnPx9r^yjRIh=@Vn9gA=8?w(!|xVYkRuepH)
z4=kKxvh4nRaDKP<dz_P#lZ0Us3+NJ-Q}?etJlrm6UuR=o_U6I1+j);cQ%4OBkb_W)
zG%FvridTF(sm{sI|9oM)oRXH7m+Q~j#XDns=Gn|FeSPiXLTC20v$I^Ut_p?Sv9fc<
z&bxVXH6I++{N{MPy|q=+G)p8gF;T0B;lam>A73u}|NQs+edd)Fg4|*{3TkR@t%BfF
zVJCJ^Jp6D${r`VwW|?+tuirC?UA{(O_0?5}P82>nJNx*VnZ}?~^g((I4Fh2T-R<e<
z$Y@*jCE@F<t6Q?JcGds;JfDS?b)`X3kaqaGCx`j%mv~LpIy299_P4jU!GR4rmc#JJ
z{r~^mudj>!`F8vL9UqTLzr4PFzFYki&;lf}dzH^+_r&Rg66y9kMcuGF3&c0|N}FF=
z8*N@xT)gw|x7#o8?A+X?l%Dea-Ca;laBk-_tou^|@-{4c>bcyn{Vuz=`unmSJ7!eB
z-#cAQH)@4OQP{V)w~yc0n0)f=*{2uX<)2Osj|<G+1U^%^BgSKy|NL`z%kTRZ6d3T^
z{ZI&xDQxXhsz0&NxxM1`+U+-X7OOL}@jPhew>ywywi<LN2Unxgr|{~xTd!BV*?4?K
z*jlN@ZoP}b)<$V{%@OunKDkp^9bBU9EKW~5H^+0TR%i(HDx9C%6*h8iy;4Vie0<#5
z*Y|8;yWFCxudlRv=6wF0^Xke<o#<^nOO~iinmoC)rw4p<0qA~>nj)*YJTev&K$lf4
zaAa=dm-k!hH8n)y=Vef|MQ%<D+@2Q;x+}@a34Vdj^TUnI>@Tja76+~7tNi@zPWAh}
zRpH*}y5;NtO!T+=*|K4S!LxI7Pn%wkSqw_u8Z2EW=4%=oPhJ<hd(x~~r@mf~_y7Oz
zFX&RFo;l|G|NYViop$cFcp~UNhPb^|@JmM0Z){5CHosS)92pt;<?Ze1@9yqit?_9(
zDAe-zeiduymw$G*{QlE@zuyH1LN4oi5)-v8N7DR$jd9uTxz+D>y5IfqM1T71*{5%8
zOuo52Uq1O*&%>Y3=ZC|U&oE48V`k$ykYQq!bHhMLSU6N<r-F#6sA1k6i?a9k&f0#z
zb9jm8WDYK_AXt%;v-j(@=pGr%NgFm8xb?|+f-VFT*||VOT>SI>|No|gZmro{T@E@v
z8Z>(gJuvsg%nwz2ZGOF2{O7s-e?=psN$qk~9a5%QD-4Ppi+10w`SGxQP0Y?o&(6*^
zuK!n~xBHC|=o$~uA%TAGo}Qe|?EKFz``fGP>GcUJyCrPC86vUs!Pz;MlPf<z6BHLe
zeyEkZv%4D}3L#xxT`w*#=l7nbqo}Fb`TPC;`41i>h`Q-X-LL;|dwIG4a<^WoGxP1|
zKRY`co{N-j-n@CI;&E@ztu38=vQ`EK4;;=iYKSlW`ue(Y<)@UUrY6vxIxnuRoej%F
zPjhc?>jg#X#EFjY@9lkgbMx{pCGo9WwwQo!(dg>#o@tcI)y&Qx1`TeOgr}#betB`R
zxtX2cDC2^{t?l{i7oAX5w6>nTG5L5<K)`{|=k3qWFiZyB`XSirFmeC($jxbmzu#_u
z{I#m`K_h#{r6rwRO4Fw>v-5$=#Wyz(|NVad{O$bxy(d#(5%jN4&2P?$4T*<0+|22b
zFl?&&`f8;{k!O*GjL!^%#@yT63O}DU|9mrje&W_BXc;$gf4^b!u@8^B^@Ril4}z{_
zb8%_uQkuR6RA<%yuf4V=^04Xkn8SB=7O#xkyNgR1d?IGi%m0cs@ArQHbkSWtaDAMu
zTaSceM#c&*<$CV8_0#qD|C!XvEk0?&gae0KxmQMSUkB5cx#{*>P?g2XEe5*P>)+S)
z^@$rJLL`3Pj@*(VIKTd%<-~~-Rn*ldPniO)x;0q3Vxm5M`t;)7UhCJ_)-I0SUB)A2
z;?bOZ;{Gk9I_vXU^O*X-U!m71omi@<tJ|v=y9-qPRDFBn`TpMCP!YkJ&&m7#e!D$o
z%9IZm-Q^3P&n;gCs<OBmmBhEtum3mm-QC^65fLXE7@2px+x0p`WMYw#v9Y9ip3Lv>
z@1I`{j|UZTq0s2m^8;O)6TiQ1%Cu=lB`*SIndMd;oHP6M%Ok~Sj5Z%_`nbic?BN%g
z&08<_9L=$L{JG#3_mBJAOqw3e77`b$c{SCx>{8IB4ZGJKQBxDh(%V0u`OV#_ZVOHO
zRzF@HyW#th&R6gB6~wxCK6<)q&W^s=pDLx_ZhXG6^SpP?-dW54nSZ%)^2W^8!?Hd;
zf2(fvh`YsdadS&rm+4f0f0z2>!^0kFbH2qFSG0G4OX3qpAOHP+KmEmpg%NARK!sdQ
z%^%j1GQYZie=7Igda!E4mskJz3$ugH-oN=x;C^pi=h1szFP9X~uQy&0y7sbN^WWR`
zPp0xeW3Ag?`Q!4I&Xpyna=VPfKYnS+XUwpDAXsI-?o?lDab^C$R|l-VR{x!?n42BD
zp!{g(Yrcf6E3=ogZ(H`q!0~4Gz9(nbUk{B~AGv?xoohC$bS_(R-PrWaecAc=Uc+7N
z{=FIJSEk*!-t?M%-rOAw%`e%G?YSa(n>A#vvU{IF-Jc53^1NSPUmw4+GT5|WQb)%L
z$xGoYl<FrwpI6<d=-d{tHp*14{?A8l9R{X|jKbgCd;Xl^xFvN;PW1Ku825wk|1O-j
zaj6qSPu|_>>hX17dzcg)w%$;A_xAeBo^6J#(^;EVnQ<p9yB2!g=*_&@mQvk1ZQSqc
z3M}V}zOvi&f2*k7{?aR(_mucBuykaVYOlBZ$-w(D_KNP|d3xduZ*!uIdXLYq?0xC(
zT(ES({mKv3H@2p7Gx+{C*|~s8!C|S_)QTSu+cWR&u{2IQv!M9-IT?!r2YwOo$$=BO
zr{8{?_Ts`qo4;Q!Us)GBTYX+d(~1=<xW6kV_AU#2zNz6=ss7{XZ`u7fwoZPl{X9HE
zNG|sEn_VKaoICTUxd<(vf9s5(=b!tB7fby6@Hi%XJr~2Z4F#`MR3;_tFp#fYx96+P
z&o^E&PY=gl;{D>Q^ZM7!iN#&Bs=aRXPhaq@{Nf(JpVNYj6Sqcv{xP}#lCIth6aO^%
z*>$IwoK<|KuIx~FooY6F=l_4dcmDhJI&xo)CFo)iW8+|caj4__4?kSsKi>}2fNJBF
z1~u;v9Xh1<<MTS1xWYxhmU5dV*hk7)ZfHJlJ|!gLS?TWo9$9vek4I&=s2B<+9=Cbm
zwBq6ira5_sH}^l;Xl!tW+wxhBZp_uE&kp2g?~6Hp^44x6^YyCIFD`HTSNMCyWC8yf
zXWgDoxXgXY-KaiUIj*NrcZbcFLv!aH$uKE;KDYeR_Wbyu;NZ-=yG*11=+1!Lc~|79
zp`o!L`FJ0w<&?c{XB+6Qv#nA3UB%xmyblO?ui5W<ZnODJ-$-}A9eKZYElucHdcPs=
zPu=fj-OQDiDXC8<OrGrQIPvk6Z1$N=2a?sblf@+^BcI4_n4iKRx<UL&z=IESE|yw#
zbDcPKN<mq>yGiM}=l9vg2foj|9On?`#wKaWy2CHZhMm<iG9@Kt!qnh~&3`m^az$Fb
z^$n}AZIEui+<G~3+wun|+`fF-{-E;i>4@S*ViD)<uARBpWVlP3Q}AH@m-tydy3&ju
zSyIQ37L=F0zc*LWxees3-DPhhw&l!}GgDGh+OO=PD)93&X#MrN*xj3wkMr#+eeHIC
zU#+18kN8DVkz}tNx9P{W&iHY&pW)13yVlk0+gAMOn^nxOEuJcL<zL>q6)#hcX@1K%
zf9uehkC#1`KDrTd?8j?~s@q><<f>{GSWem5dHz`K(PKroC4{YwSN-R8nQ<^xKOsGe
zOZu8#g7dD~v)gRXZY|ZC>-OvRasT^!j?PJDOU}IVRPR^Z{Enj=`+Z-VZ`4fX)O?wj
zwCm2fQb$dn`TH$5tXmoA@5Z0w{?qL8owwSOzYnhII&Nf=^1%6ThDc)4*Y^AgbK+ig
z$7dE-{9I|taL49P<gFtu3}NeDl`t1y5@*=(<@nhxz8T>UZTC7`Z{HxMzt-CH*W>r^
zcYfRWFeCiJkB;tV?{Y3L;|*OM_H=Ify@lS>^?YWV_169U_41&yl2VaS7N^M1$+<T+
zG=fsbmMtdlc0A_$_4Rf6L{<m+{EgFJ9C_CG{qvpZeM!-dNAu?IoE<)~Yr388>*TM~
z=9I*<m+L<N`7zkjH0I>pe!J7F*Y7)Z+W!~Zf{#mdpWdBr_qhJsV|KkblVrC2Zn2)n
z_1>O}QVkaS{OtMa!Urj*7v+1zS<UBtxleD2<MPY1FK+I?>EEAvZhhyU&li_he!VLG
zV|#tTG~I5y#8cm`8O2|27N0!nof_APs4te6C*Li3e}DeFwU7ROopHE-<<G{u&+_LT
zzrmk&am{ssd6hpy<0?KmuIn~xG@rUMS$$$2|Fc&IlXt9MdsMz|_s18;=RcmA>|azG
z;*nq<`e=6H-2bs|{<ma;Hg}y~9=>gD3{zX1i9P%K_p&GTUUNlRpD78vJHNBD6V$Qx
znyOWJJ9j&%dth4wX?C95f3#b?`0du~pf(JsMZ#--r{H5;&*9#SF_X>izGfGz-I?Vc
zS?=Snn7!{-gW*1PUS*#h{$@XWvgPmle)0W%A@hU%4x>Bb6+PQBs%#B!TyMQH`_z+4
z>HgPAlK21l&6Bn4kY4}(<b~&L>~6ZH`9IruCG%!|lRR`H@i4P;>b$-)q1=%VzZP6(
z-kT{9$7v9({gQ2$>rbbEYmao_Y<DR>y;}dG`9&YIgr4_j4xi^=<Qya<`|FNh`u|;)
zzyC6x-Pm4vd0KMMyMy+(t{u-}*}T?F_IJr=^;a_6-f11k*%q`nO__m5QSrtau{CDW
z)42SftuooYH}t{w{Q6r`E4r6wsK+ne!I5>nYlgjzcT;;${OMgyxAYE0Z}FRJW%Kcf
zFzBi^L1niOCzSgaL{5N)*zA?T%Rv_h*vRqo+y61pjoPx~#rl)XSN8tebzpbhQhQyc
zv-gwAGoG(s^P%d+osEWJ2Z|@{-`ib$Iig#fhc~mN@@kiS+p)_lr>>V?QGHmgsIO92
ze`99zrgzJyW$d!Kq5UDUDlPHB^UL+d?*wkw`LQP4tG}Q6>B-4||81R%w$(dGo#*=W
zCQn1$Q+T$DP@<l7)YVt}++SW?sm_<WbYhqI`FlI}NZ)v#GOsQ@FF}5F+4Jy<`4^*W
zK5Ymx-k2K6{ibc{64?)PZ_his?<Zq^Ufoam`Wf*{VjcTK7rXJ!t9Zou=+UDe=j;C+
zZeV2A(A2!h?CIjNL}stB#wYcPr&Gf#{(L;XGH$QcVz*u;HMM7#c3MQ+=H-NU^VgqZ
z$>+0QcYeXatrP$J`dcpHBby->|8>gj_vddFclq~T@qNRRE!JQ@)%K_R3faOO9$jIf
zxen3I6Gh|rZx?-kxJc}o@lLVlikmjsCUh*lKKJcPu@{Ut`}kyicHGS?aDHKU{O{h2
ze*DkU7zOUHQ@?JuLQdkrme}2VGYTHF8QipZTj+oG;+B_j^LsznGw1VlUw{AXlwaZ>
zqiXK97w2LGB|o+Qk7SN@Ts$kTB-XKix2r5uw<v5aK(3iw<r6_r(|h^+I<LGuz4-dS
zrVAG?tg`6r=+I#4niDP8-wsL!-)`p%3kx4!5x96o+}>ZRWmDaxw!CDp_hnCT|D>O@
zzSgq%l6U6a|5^WDw1{6@m3~3+V#Lf9ZE7AL8!Uh1=&4@Fl8Aio^(bfgE4NoRZ|}}(
zs;MpwS?B5G^tU+Z>#p$a>RJEtQ<x*WuYd5)@V(z)AS?Cx_+sCk>lUmP);qOs!Er7=
zzGny7<x@ai;>b;)?OU4@4mNRD`S}S7PW=4+`kanA!bguDy;Jpi?Vi8iZmX!MutYKG
zpJaEhV5*({>-HU+?4JpbrYHC;dOFqAzh3=b$fi%-{vr!&{-<{=O}lz|{*nh>uR_nS
z5xuaU<-qRAYiiYJ&N~$75H|1cnpacyT6RzNtqPnlwe_!0+J+5BFYk=itv_pSlzhy|
zz+_40A<GxLDz|8BOqd~1`0~zjH|1tF-bLl_?}2(`*VaaF&b@7ByT%1{biAs-&&Qw(
zgP)(9JJYgQO*d-G15kfIXsOrqGhcUlFMPD5P)ut3?v;nEzV6;?x%QxE)|GDlKYKK5
z-xto8*Zb1HA#;Ue>go5qABtwY35j2RYDS0F@mi@AtH17d_ME&wvHVU-eOcYCFKqAg
zZr4ZsHjug8eQC4#OjhaQ{^<7}$F$;Re^C8>TK4>h-II4pXI^-9tLyspS6Qzj{{QgU
z?7pe|*RCeF^2eXL_h`T91YNmyu$g`G%$c6E&2l4l7A=*7-Vc(Ve}10rmfYLZ=GXn|
zl+N4H`1}2S`OP<LIAa%fzxfhdQGQdqq=RSWRSSbAwKk7+zN>-_8AQdWbGxS=JS1~b
zVZ+_NpQmp=*05LTYMwH`W^%3h?xnmFpCvoT9+i8;ez~UkqQv6e7mYsXCEeUumO5>I
z%-)vf)_GltXJtG;m;L*0YO5{r{bJCkKf-5r%l!WF@^ko%$-&bVb+bY%Vy5k%_lb-D
z_pQHKY9@R-(SL77?DIeSb${p!rSjii^J<?*zbb$4oXfq#yS91Sj^NK5m@D2qc_K2m
z;QzA@wl97<+NXSVll?y{=GnO3_%dPQFK&j;=^o)1gfs8&vpw9-|NO*6Wl%@$%q&xH
zetpPTnep^lv%FqkTYGa?srL4JRodlu3Xi*5vOieKlu&nzEo{k-54-j!Up?kCr%|uR
zY*yLsbK>WsO4aPPyxA4oSGy*DpTdXgQ~9?~ZrsguJGT1!-F=n+_N@F<eoLrGM(%@!
zN2JkY%a==M%=-Fl*|R`*V=i~)i>p$zPg*4$e_AwYcde?vZzMxo8;|52JApVER?8>9
z)F<m}-H_*4cdp6j(&k$A_c!V<?7wqjs_ZT{H|wuUR6idx?*II3cD~WGng0#ao4fs*
z^Pd*2X6(1^-Xe1U$h+w8&!&`qE)kpfo_~6^`TJ!DSdVRBVJlxFy5nD~eE-vr7av!A
z-e}nKH$klJ#knuJ;o=Qirb<kTCHLmMpYivecTfDc{g0O&|FH4WlG^ZkfxV4xhRd(t
zdb8>IgopO!{tM?eKfSysTVB)t$F3%|qwVpR{_i!*ywu_@UkkbddA?ok44cYLa!E=`
zpSGub$P!YWxVh~2+wGr^ipLk+ul;^;SLti-*N<YVf99O2Fe^HveCWc2hv)e|mH*XA
z`S#*^XXT5?b$uUr+cw#k%B`>Y^O&>z=8I?b#v6L|Wp;Lb+mQ2*Ni5%E^#;G+cFr&E
ztg?GrJ?+`@wU2kdPiIqWwAb1foByjM(|5;F@rdAOK2IjkH`127a-%8QN-p8qB-!J0
z&#zrP@9C=(0x_&F&&>NJm%aIP+$Cd|?I{lT^-Q`}HW{be`uF~RG~@5KlpFuOAGKd!
zHX&-C+p%djf9~kI%$WNo!f^igwDs#YL~edperav3uv32>>+>6@&!ivwz3eQ5&)F*D
zUGc9so$oL1cydvH;?;5s=UM;nDX6#qcsV(I#$)~lQ9t{A(*1X<Wff<xuBbY+UBbuQ
z;Mvl1({sENDtGSOY4iWj=PA>s75(^-*wkd=HVa&ApU8Z5bF=!%lP7&<8o3^|xV`3O
z$%BGj49e3Ut9jYq+?BBH#)hbKsp)L|e7j|*vm4CXcyrD3C97MGzdTU)WzJf&HEEAK
z!xo)*$Dq^F_U~VUg<iGb(rE!}o)s6?xjuPs-?TEw^pLY|{p{a)mzH?0iQLTAe%R5&
zqvM~mh~UKiqP2fDmFhJ&AMLuCWc&Q}JWJ)UXpQGp@7=nmUbt_azPR;*F-JmLw8pdI
zvbu#XE?NI<1O+GV7v3uj8qxp{(SR@Zz<t99{K}7dQC~qtrTQ75v5QIPd)$oEKF5Eo
zoy>l`(Pmrz`GsYhRSx|v*1A!A$@8>j%hyd+Dn~+W+uJu!o;IoNZmz1CVPn8&)*t*&
zH%{JY%-wE1W5%zq8(wBRfIEnS6OEszJQhFyIkfBblhqPRo<6&p)Xq-!=;?oS{;a#+
zHr?NC{Lj~ambae9K6w&{ebu`8`_IqGF_*ag=|%Tm4tsaU6&3H(mihb^Yq-WNx3K)N
zl=z+fS0>)I|9ox=D<lG|c9lXVH(buOwThMRby&Pdx8~C^ld?yw&N|!wy_S!Wdz141
zok>{T-i|4}mP>LXx(xS8_y7HDb?D!gFb&~}oVk)BDbF6?T-`W<>+e0?tGyH69!pr!
z^pt;2^XZpIax`Z@xGNI#tvGb{Q>H)VpEhKl7Gs$8Ewjnjd>wdjrQ?KrV($w7%|{f<
zmhCto;1v?$d+4lyTx9z(-C3V6Zwa3{IjBorcjo%KRfi{JC;wk!Y$zlq=`5NYoA9-;
z@$0MGa!bqp2|BHEpHnJO^mOg>lji)NzIe-S{55^1@}wX8+ZS89a0Cj5>g}mlxNc|m
zcM5Bp$1IZ-3#G$cT%P3n<hOL3*!f)dVDvv#$7GG%yjSu+OlyC~$<&uUE8Ok>=fdU|
zAxUA+8DDke>*v*~^v`Wv7<a;uKR|h*;KU;?EwK~$uPM*mzfn;C*bcE6xvWdJT1rZv
z%$H4b5VX|qxvHnk$t?Zx(ao8s9mONgKCqZ@*m0GhWZDu1_5Q=}1hS5`2)_t;Sr{-O
z{KP{c^JNcCUFqPk_cZrYXSMt(qgN;#v(w7#yyl`Ar~UjKH3cUgXE*EL+_C2m^ArJ}
zr>D%E4(Fu4H`v7$`RnHFH6N-B9vyr5WiNZ?#wmNBX6T<d7^&IxLAb?d`eO#x*7k!b
zOp9l(x_W5iya%kF)sB2+!KO-wbu8YpJ`tBYCL1$z#fcBXd;DKMa)@JWkIg%y7`j3#
zi}6f^7SFNAipi(l7$(g+#u}v$x`_ZZrFTAAUqyVqzCc{u+{4y&iRbjSe`u!af0V5b
z{(m55lKBs((8qSlk{7hPVBOK*hS2V4;7Xl;PjjDsHn&r|y8r)V6??8tWgAofvOW91
zMJ~C7f1cFEeK86}6E@}W&6kl4SoY_m+qcDmOFFO0?{h4wTB#ql^z6f@%bzh^NIt=#
z#J7B7;#(%OCA)WK-nbkSy*%h=clzZ?l|J!X{_WVDbw6JtcJ}e4qBqHxfBEKkMLK!y
znmt2rdvR<4*X8H=d~33^u1vnjd|R-}<d&?*^X}=(Hwfw%b`~yKzJxj8|FU|+C%1Mz
z)vJ}4?ki*M?Z2>B15z9my_3{3X|cTV;qLO3-}T+A3mYuu^%bN$m@d`*Xq(&F=`zvD
zEZ~KXV*O{EV}8FUrtNLJ@OsuS7TKGJ%ir%ked$fevc9=TYLnfB(v2US`8{>{y&obx
z8+mFn6nd4k9%|ik{`CC)CZ)Y=yjK{f8wacFDBqs_*n0n~XGLop%EIbDaou0CBjawm
zhHcc*q;DcdzdNr-KJ-m}6@CBXkJEd1RDF<GrLzBwy7nB2$GboNXNz%G{hny1KgaXa
zypm0uYZXmaIOU&q)|vZ<>*UL&Ca;WUM6ch^#4TrJu4epjvwWmovXG^FTakaaz@>D3
zHCFvVhii+@&&<p4`2O#2;`8%o;wQINpO&ff+1n(?o6W#?t?nnIH1F3P?>iNbw9fh<
zvefwNQOzAbqPwf*AYERcSVvbEmw)fR_$}!8!`Y>I=JfIx4vdnwuN!_o`}{|ZZ|D1k
z!uKaf=f6CAV!HbUfo6RPsievm>vl{0t5=pVNf6j>{>32wR#DGI+aK<?e?{~rY`eNF
zd4Ar$yVb=vcCBXEu=&+xeTEk$*Vi^)b1crt&3kcY=OM{RrP+p`rUv%BxqM`M_oGc=
zhyV26|LEJ*J>y$>Ve#sS`03usrL(^Fe3#ds?d)eTdu_=2n<vDToSimA>PF;V&8$4V
zw@1coPMbvEYL|D%R8^KwPwIGg=Ir(Ohwq<dEeQ)UXDeUBdZ4dz@{O7mY}@zz5~>wG
zo5FnT$2prne-x}#R4*<(|F)#>Q|tCQe~<nPnSK6SiMc@2$@^a~OGvp-xBL2Y{^mcw
zC&(Gq>AvP#6$lC*7u`msqIZ?$Mx}pVi;77;p0s0Qs&<sx(O<g~q*e=xrz@H6o#M7P
z?~zf%g$yaPzGnfq`1h@wTmAggyC-X9-bT!vH&5=y{Z)5=maMF7d6Hu;A$7@MUDVOX
zt#fqV$@~tRJ6G5HI@5(cnR~hJ_gu}oH~U{s>#UDQWQ<=s{}iYU`B3OR=S|daV}?%M
zbsQy+zsy(UjVfu8U&gdSQ@4M$k{)->tQ#s5If`%2Tm5VKwhc08XFd~%U+{UEQQ@cS
zcP{Ve=2coHQ2O-ioic?rZwy|+F3TvgyJPr@^O2UM25XZ}SkAQM!_x)5J07;&mOFEn
zNnu~C|GM0Oxr>==w`E^v6wbTDpeDUja`qp_&3aeQm1Wtb3+~=*n~>r0R%)tNUD`^&
zpxfKh<5|KPcVD`z{`ZDN-0X9KYGUcTZ_b+6x!E_8<IAqQ%kOPBoTDb29P!&(?cUz(
z%KyDvkCeahp5LdN5wkpJt3&vO70cj``=oBDRv1|kn)7V)?K>h*vV)5uUBAiertdu_
zKcjETBg@ytU$;M~4A!)XaC+PEpg5{)QgEG=P{za0V)I{`O02AZEby+xH~p8tHG{|R
zhuU$!A8O}Geq+&n9kSFZ{K7R(h*j?Y{|E|N9`Bp0{y`@-|D4s!nkLrBNSTlFhiA_{
z=h*)J4c{Y8?kwx?Vsk5Gm&>-lep6O&5|rnA{zzrl*QCdb&KJC5<M&(Ms%G2%q5d%2
zq>y*DM`r&2+i<_@PHL=U;}MopR#8afVCR9#Z)%$plHV1c?oV0sXe(!YlKrgeJ=UIQ
zS4ceQY1_1}U+(-z#)}18v{zje$xw{`d(>!CpKsPYr!@P&N~<^Z&#Cw=%whCXe(kf<
zI~J{6Z+T_zC+(ZZS+6F23a-6t=j^OKKg^eD`qMLy?xruB^t|zcRqu=cuRim<y?ige
zU*;3PrRAi(m#>SO?2?|oT6+cLWd9SEeK|Y&`KBdt9Q(4{fBGhd6pJ*k=2@J*%=YX5
z<>=gB{_NLouU#kZ?wgc;|L3${cYhazE>>Ce=XLG>hufEkPu=StzD57Od`HmDt>@#u
zUHB}2YVNF~b>a!@JTL2Q{WE>TG*034Nz=+X_#aqoXn*qYf8x@O!e{?}{<qWH`_Jv?
ziR>S4oII(?bZT8<_%p}<{%Mb*OF3DtK7DQ6$GLf>&ey5~GhEtNXN6U$$VX3J%*Aib
zBp!5j^VBVG9v#@26kWPBf6;#ab+?_}Ug<NatOEOJ%Hnb+*C#Sh?D>C}=RcGF@A9g`
zY|^gBR)^Mi9IBaQ@Z!Yn!)5jJFE;J9ykQ%7<IiiU!$J~wHj2sro4|SCQ2+5w7X&<~
zn78K~5V$*Y-`iJBf>ovYXO{e&eQ@@vC)1Bz-dJ9}>gMI2yY3u$^Z8z7BWrKf=Pw7`
z-kd%0v3*m`H*ry6NjayS-F7OvQs>?FRD55TdP+L`wBHqhlXv?|IC|bhE^XT%y)>Qm
z^Ro~oo!`}A;d|#^l-={>m-=dbtsDK5XGJt<eZ6J0<8+x_%$8=|&Hqn-zv@;Ldst&m
zW%SIC=X4nIDi^R%O#5(SvxoP`s|By`$;i0;+|^;#eM0}s)vWi=?uu+#@4sWe_M0`^
zp9}u2@Ymz#J(hf4QF*Bd4{ztXwDi0D`_?V+{P=K}yQ_=Kk~in2*I$1fD(Q00@BBPl
zPd~r3Q&TkSA8YNn@#Sl=PkeEcVgL`X<i9ry9xn?&asQ9*djIIsJFjBjOZqNvSsZL@
zadTsm-TSj~v59lj?!I3V{adX?>U7`!<fC`GUP~suym9RWzx`6dxi_Y5TRW+P-}Z!i
zz8|+wg#3+o@MIWQe=R?^eeLw6Pu)J79NU_DVr^^(H@D{WxbH@vXMSAsmEA7*a@?17
zTl&_ArG(zuxKVZSyxpxHo8QYAo9U_l-0b@1NOa-<?IntOHZ1Aqs^&?3KU(-(;@kg8
zKOSz?+^1=LO1J*U%-1)+yvxoxC_H1yz3-7i<^8*6R;lj)^ra==a7JFj8SlAQg(`YH
zC#!8tJ1Zr_#|~Q31j!K{P7hiSJuXQo|K*ly7@Zs8e{IW)qNH*wO<h*+rvYoy)XS`I
z<fNOGw_Hv<{_<0TNtEBdwZ}at-erv7;I*CnFeb+04PT6$-`@Ymy$}7^{g!j{yRBJh
zcBbKYq|3j5$}_Y=m2IvplJ-;#aB)#_xa;Wpq$zDn1n1T{ZWoNAH_SZUF3Y`bbH=|-
z5xIZfJ?s&4I<FNmbK|Wu=@wisUdHSa{cNb7yQM`)P;!;Dhl`6#-~V7YRRKW|2;=}w
zut5qM5ML0dz{LF>%Q^%m+BdQ<ug}~6SfY1^Z(7gu*V-EWlbI`LE=iTzv8qz%nfJD5
zdY{VPww+Gu-0<?ZtsrFV;<E(9cHN40{`wO(Pon}0Jx%tl;tW?;75bKbCiR_W%FF4O
zJ?>3-dV1bPTaP^|&I{U`Vpr<SHJ!Wlrd*!XHxt2aUb}dq4byYt%Hel@<j>3gC_R7w
zH^!9}2L(>*I{y+3*PXHG&)%Fk<7QD!>63o<Pb|t$`uY6DK@$#cyW`IjpLTt-<>dU=
zmw#=E<I)A*>;CUnPUnvAoBeF{$BXT|Jj)k!baec=7oV-9RL|sHVJoHfYk#_tOVH{(
z-9v|!S4^MavVQlx^o;eZYv)&%f4Vs5tHrsWm(O~o_}IR>y0Yq}-GQ^Anbv;{<#v8o
zyITO6PgANl^}pGFPiW7)uKsy9roGMRU}PviRrYCa{iVnq{uOT1Jua;Ct?@B+$_=}l
zr=+A*bnA+2h|51EwYu%hD~>xl#woUY-<i>*9jvS*bDDGCuA-a#0R<ln@+QyJ3SFIK
z*mN_=@?G>gg#(W*9Lf>}^UOom$+_-$%aYGoHAm*eegD8hPPJfzsjI`3&y@Y??*7&!
z^JZCr(qSKwr%wwP-0&^%;JBJK=ZB5|_o{OiFRDt-J0ab)m<KVgJ9ED5Kb>X}=e9I?
zQ^dP(Q+xg<gdJ^dYvZX|A>XUi$5-<x>3COuMgBJ{p1_SJPwJPXR38vH-F%*JY1z&@
zX&j~#cK&r&t20=CNN@MO&m41wvRLZQ-`_VWQ~r#SbVql$lC7<D!?(Zgmy|WRrtfW-
zn|b@@rZT<k%XeqYDqAM(9l2-Wos1R7RNhtp347$^n_giH>Y&%#m9mRYw4bjyV}Zko
z&jC|5PhB7P+vG#x;@Pu>;@)LW*<UG_Y(K+wbDVR<v!~&e4^Oi1)H}s;57ZWS*>zW3
zuG_f&;oh6CJKkHhguC%QT{ltRXiIpq!jm2Cv#bA_`}|3eJ8ShV?Lx!s&adUJu|F;v
zefsm)C6;w|#oy-06zQa&&;Rc|Qv1F4$<Dy-j2r%ZKL2y=iL(=HU+P3#wQqg7uWs2p
z-tNdf8|`ANzAnn0WB;Yn%;@LSN|B0lZ>n4%lNnDgJ-B&dk^HocIS0*ss@`zM$$Sr7
zAi5!Ve|=%}>J^(xWvgeH=UN_nA@^r-ZtWU3gWFk`H~O33e|`@%!SbPX_1%~IdL%_;
zDwZ7G_Qpr&w`JZmCubG&`~O+C-@P%bt@pM;!HjwX(eqKCzD@tZH23j_`lkYG#Nroh
zb~!Skg>jGN6ZxW`t#RJ(r0tA8*d>>|-e>dkl)}3F+tcE{+j`z#_HWDU$b+kLf1iH=
zcJDcMXlM($Uvqo5{Ecq)j}oaf!LjQf+%SL3m%8-lGMgLL)63QKpJuq9{PN|?0jt$z
z&oA#S<aKh&Wq32=d;C5_-esRn!xN0--KO8(9q@O?yQRyHWM>4f&sh>s?2+-{`JzWb
z3Ns#T(72`QR=l7pSn02N+Ij6IFK)ap+|B&nxXu0gA+a9^yt?JeHzpnaox*JW{YkNg
z#pf;S-S3}*rBoxu4c}w`e*E)EH936^^MaC>Y@2SY73@44ka)Fu=f0D7AK(7^<QCi2
z!r$)SbFQyrR=cx<(Z}qL!`EHBok^lM54^f5zp2X4IZw7oI9%P-=AFo%qxZNLH_uw_
zI;lnRf)Z!KmRP^@yWJQjOyAb08k`pL<B!Xpyz-;0+iq>?<9;;zOVB3cqLZ8F{`;!I
z(gn>sQhQWItQVGjp1spH*LZ%u2IF}*wSD!Hi{0-yMC=Up{kLvz-zig1j}`T={<fJt
zXbfcjnQ;d+0+X*5@8_ACw)tMqzO|+cm*2TjduZvorV_DpHS<MZ=0$y%JZ0ROwy*Zz
z-21%EsXa5CY7Sl#5OTRNGp4_zqvJ&70{wZePi*XRbWHZ9TIl|Lf4TCz-m|ayX0reP
z$xYa6`Fe@)3GtNF#P1EqUmlUU*|AW+=h->Ev!@>2{gwYC<KZ^rdEy@ej%m%S$~Dq1
zi#v9peg4w&)e&pNgjLfk&u-ip*qFC?c`-N>g4R*KG>ms!@6OIrryFZKcm4Ggx$U#k
z^NqO8p2=<h0~_^^-BA8j#O{Zx_?bkLo&U_@D*iCua{I|va`N1X<$wNsZWB0cSZyTx
zd3X1gsh?}-t87zNR64T%R_2-X2uH)8EBA#RJ8|JudI{XhOaqo=t?ye5+sz+MbiB@9
z7yth&OWmDPj)qyBz09Y>>&>RU4=(<zTcuaDef^W0<@=V_eYd-^twHmM*RQVoOo``Z
z$}G+AUi{v&{K3trh=p6v25-^7aMbuj3X6?}cg_B4F8-pDITse4@hM`Cb*%qCu`=Vj
z+O#U$gq+i+ou=nrRGpi4GDIw+*{5nvcSpyGl`rQy2>ukVu$Zv1RaEoeGu`uUb-tPY
zTh%T`^z54uwD=(}&$}IS`RhMyWjZ(e2b1G>JE!g&;F0Z!eIFig)G=`pdU{q-+Q6lJ
zS^n-_3_CavZZ6mReErCU#i2TBHYsT-3<A>wq_zFZZr3JPSJ~b^dG5W-j{U9c4Yubj
zx%9V{U2&7uv<I8=R;~BD=iWWR=GqkLizVNB`e#lRKD0qSpF`)iQ8tr(+uzw~g{GQk
z?;dd%e$G^Ix>>fiSiWuUzpry-#jDFxWr}qbcd@#<xI9_H6SqEmvq<+G(D5jf=FB-W
zMKd_0he6%G{*z7o(m4z27aMj4p8u+DWKet9ahhLU?~^Z=4rnb)+y41YF7MC#T|f7j
zSC_V)on5+ra^Bn70<ZGl+}d9&xV~lv)4e&#*LHWu$!2YQbT&M4PHZ8=jM?vGJtJQ2
z_KAPEX2YJs`Sr!er}{11XK~bgDRV+c-pL35t_Sa`)0+RoIcD*@=AC<eHR5|E>kl1F
z;<>0Rd1b$JxLe4ofXTAU3vJV1U064Dk3r!zPW#<2@2T%F+qWw3_~aQTnL^pu)*O6T
zARx5pf{qesNOzGyh2sYc*&B2B8Dws|wPBY3v}rLGZ~S89_}#jBXFu&P=`2aqFKW2h
zbT)0}hBL`q1hO_aWb4oW>71~+PWk0^9iH8HS>24yW`A8W*UkTb6=<!c%aikq-z`uo
zI%pKNrX_04@{P+RSD)?qI!ESekM5Nksna!HUUwvV!<?Mgrq|k@+Y|ru(d~FnKHFZ!
z3@<N@m6sag%(tugNCrV_1($RB%_qMcKfLawOJWt5i|gEDw=7fgc^@B>%gE4&w4DSk
z<!$)bUH{GJ{x1GVrOV!6hx)S*Zap<$Lq)@&hUl664i^7->bN)4{`9{eIQ{=Joc-1F
zWLsFE)&ywdG{$R{>6=5``Xx61nu{;!hPZ%d2wk2qrI^hQWf2q<+?kOX!3b{uP24ZI
zm8I*1<+FoNOCDNYKHHO~BmQ?!rJqsI_6^zd=T4Y#DulCNQO}O+4f~DV+PzNMGU3O=
zFYAITX>j?VaW3xo<@~zRW4EvFf9cp36a8E1+`d+w+V{MZ%C9X94KmC&OL?eecSvx3
z>#DH(i?`LSuh)i`pu3_<U-~BHn1oGk?&Y>ibKND)v+;}Moz|!@i-;L#^}U}@nDq9b
zP|A*u^;ZsWtIVG?C+?K)Vz$dhm%QIjdOmaGP3NgMo?ZEKpRI`T^v_)`{oKoM_qeXt
zgO<;8^v&<uw`IQ${rLZxlKByi9gPB6$FC{h;xR0Jb#a4dZJOObw%>cjKOVIzj`$qR
zDKUA4s2q<&_rJxR``#&pW<^DQ{IdQ@mF@GFDKR-SzUlm4U)|(as`b)ML{M<z<BS9e
zLCfP_2X9PLYh>V%`P`P!YLl|3=b5bTpYt#GWvt(^!=O84&jVe}-DaD6XIL4wL}>*t
ze)w~?ol^a0B|HDZMB|qq^^}y9)WPG*%bgZYlKAH!voL*wZ^>+SiIM{?haax^P_ZfM
z?_bGSzc)+eY64Gn9L?gaX-?HYaWIcd-RO)<V4NI#*IkzOmVmWx7Sn{ED)cuu?Q!Cp
zzQd3;e3{#kkFPEHo&-kZEOJY+m=-*D$MM$7tml%#BNa+S9v|U$JDtf_@)$BWyl*Aj
z*{Q39<g#o}enf9fhLyGb-v0l@>&ta43hx)lhVwGd(48@r9ojnoXA_q6D&l+wcX5Wr
zx1Z-fo@tx3skJTQU0sSncpE#jklBea1_qhmo-U3d?U8mb?RCz6+bVYN(*ccDayL#+
z+gcns``xiCTa4#MoO;BYn#hwZ^xd{jyKrIZ+Jc?8!;QW*M~d?2Wt=$B{_CQ~q3flW
ziW9WY@A)IhU}*lg`{DI#wl6Ei-^ZT4Ec|Sm?uTVYKc6mVes2Hg=E5U$lw5CZ-eyqz
z$0S}m$r8R?OG+kU+WuPA%XTwd%>=~7U8}0h7~cNwU*aAla`fivil@z$jbEqN|FJsF
zn0&^*BJGFia=y*b#(h!U%*?9a!84lfr~aCyW8m`Xyy5qY2cF4$9*vpXSG#NDm$^H)
z$eYY!dC#{m!m9mCPW*lE?`=nQ%uJSeudm=+e&#c;!{eJLo=n&N44T0)cl!6NvS;o$
z<+D}13$L4g<+R$={{CL|^rbgf`p8;${yx^DaxQJbkvfHfFw5Pa3xd8cVboIDrF!1x
z@}x&=w(c=X)sv9=bpGPs71@Rlikz1QtE_L{J?--)m-}Yr_C?a+ZS4;>zE6F}EgUg%
z+m!P7Q;!=B%L{j&ZCITkyuRLmn`h&n&z*`U9HtM?M4k_Ryf5MB^ZAz*$}D)Z%%0CY
zuKeTEQ_Urho-V7FUl{uK)D9arR?wu?N{6{EE>D^`n|iHEKK*@h_TllbyR~X|r5|6P
zkmGq;dsQH$wK=Knhf@+}b2D$rJkd3`w#mz2YxQnF@NwHatq<(GU4CuLc8f{!e)nQd
z#Vdx?EeTtrR^)JV>a5CH+j2DPXxip^e;8liI$kbZyS8jD+y7?m!?m}ib1#0WN-_w1
zkR3W_yP)`kG8sE5>y1%0y0fQsNK7(lFmM0k>}N1*W7xK$EpH7@WF1c0n6z1osjH*I
z!!?UjWFmK)p-toM*=(PGwh5J;i9a_lY~z(}4C`X*BByxzTz}^x;~SP&uJEA+)Iz<{
zJyp#1UDDCE?<d*H-kp3NP_S(M>8JO0?y;`ac0Yc`$KRhnXKPlu-CgPH2Qqk;x}LwC
zQONfEt>xi~zL<-YLJclopYq_|hR!+lCaP*WU1gmYWmepq;!(eB^K;Rll2@BA@v1_1
z=qxlYPHm6d@KF21vQ0(BX4_5sA9*~>&pC2hwl`~Tf%f{}GbXy`Zq1YxKVtrJX-fK&
zN$IB%qq3{F!v|&a&iQ}u_#0X6C)&Dl_6^Q!It=+*)t~wUte+KC-7{rs*t4ZnXQoke
zZ$;^D$P(#}m_JU7q%Io2i+;9b`lh;P=Sy<$b<ff5Hgqdj)>Han@;&K@$fMU<Ssa{7
z8Yi~TU0mcVB=jo&gXxB_t+LzCL{2z&Xy*IPN_Tx<+c{UNT|Be>{QkuTpn2GIpZ6?X
zCtAfCcdYu{yHDO$J7VuMakD=mPM7(@ldQY<9zEv9x~n%W|J<R%Nng1(ZQ3yZz1$SY
zs+tdt^)A8AI;N|mET?SjS{^t1OxN7$h94fLzr3p@w)&_`XPy|O9-ZjzIJL@m$GHd9
zHh({S<uQ|bb<s%i5zn^(pSo4+&!?wkJUtL}{$7e$&KcXhx5|>MD{>D_e^Wkp{_E9&
z`gR{aFZ;6hcx~MdgZuJK*#S3$yl$QPxi4eJjlLF+{|Y8*!ppP7c`v1Wgq9BCrxqXk
zu-kv$sx8u&_Uc^Nva3yE^}{XwWymed6*Dhbe7O1Q{IO59S?lU=h=}jJ9scQ!_5Gmz
z)$;!pOj>pR)@V&uuRnAvw%zY_h1%qoBG<zX9dx>Xk>}slGlIM?#Gd|6J^ttEo5CsY
zFTGTFx;&M^1hlv%jsKj`{pa%zHOt7VZ_zNGe8(X?;J>3SOBDO&Z#Nk?JbzknV5MUc
zwEjOQ-rv4xO~|$7Q-0NbU$!ajub<4iVD`ycA3aOHu`gZc`PE>4(p}xnY9Erb)XJ83
zGfTd%s9%)N^gUYcjmHkbo<vTbcM)bQ&K&AL{^`ey!aYB4NF1v(;C3!a|F!M2FIW7W
z-E*JtMtk`CKfFDE+rvM1xl1;tyGK2^W%ho>jY}_dPVk&be*s!HVlRFDUak2?i377w
z)i2joJpPcezj=zP@yy9_T`ME=^RnbvB{s7BX4+8xJ}GYQAIbRoPh$G}KC9mRmwaa4
zN5lT!e=}@Dlny<JF8}(aQYX@E`t$n|$MdRJzH=576y&^cz(DEK^TM}JHyb|gm)?`|
z_4?$9zovJL{zOI{YnhYs_>{|0Hph2YV*<CIdM}rC@3`=c=l^dC+?}-hclqN}9g8PB
zO6+`e?6*WvDrnsDEq{(>3iH`r_dU0N^X2$juPP=Z<tH+MH%3yt^i(C&+{)LLuX^>h
zbbkMPys;*~<LSGL%^XqYbH9uI75ky{w%xzt75k3TE1P3vZ#Go_%a=I)Oh4gc+tHrt
zw8DhF-)h?~><q2_zwvC$)xUXWxgUI)-h19ZaqGpjJD)!&cwP5@I$7T6W7_*Af!`0h
zL>)5tl6&-$<mv;nPc8ac@cFNQL1@K>cdj5G8&vF#<JDC!ng-g^QB-6Uyv!%i@SN#+
zJG<&Xyx;DAVK}^gZS|>!n``cS{4(V}oLAm>vF23gEn7eKvgPle+HbzvcD3fuEIoI@
z_PK6OhjX4U-Q9Bf<$;GKjBYw+($j?HuC(qw!RW?qHhbwIqyCFdE7t0)vwG`l;dL;`
zQe<`H_Dh{UvrIZov#&iUu;Ae240Hs|!MlPc!5mNB-O|)sv2(ZQ+WWEFmmKN6n4xNN
z<=5`lQo3PIy36Cv=kThYsrt9>YR<-_ZGY}Dp1t98cKJpgooyF_w<SOuaVO%>g?^AW
zPS;zivc-(^Yg6RcuUbt}{3?oPzU;m&<)bDV08QI^YCCUF+pK6S>!F^?oY3)f-?>ip
z_t#z)1U&h_v!GtlZ_$-spz^dx%EW3#M~pyL+{I(&QECl}lcN8ZvKMX>_UGPcUn<zY
z-(u$6e~jsy>VC8=m#gm4zm~Fk0;sLK-x=&9gZB877sBHYy)0?GSv2WjU=6&T&0NY2
zn!f>U4ejm}?0)*is&v*_sc)c$|C2ZW9tnXP+k&m2x)jv@{<6Y)MaQ318$EHo{^v86
zy6H?4F8x|EaeKCTjF@-J+dFmjPd7f_SUi9FjVe?Bk7sAJGh5DlB46_0Sj$>B6~l+y
zCa-^M;gA+)$G2TNcth`rD8n0P((C2)mJ6RZEH#Qg^GnSi(ph-&Xk+<Xai1A`cP4G=
ze0MNd`Bt*7_&%<(rJG8wm0U}!-jR9ZvdayvxwAh!*GM>hOMa$Z{nr0GOgGLmnDaLA
z-SYRFl<xK_YE3)wW^M}eN7?r&2YP+`ZXP@QE?(^OeaZXG&}NiG($3^34(qzslpK5h
z@Kp5q8Rq*|?3k!HjeWkXSMJ$vMQBB~(SVheS>W<Ny|!;2N8Ws7IOgzr8f0A7ZB@M%
zN5g_*BZd`oeZBXk{#sPTxFzU9XY|#Woi(5TSg$$iy*TcFQo`+B4Rf!~-*d>^-sgF1
zkns609#1lq<KG>-`KSDtmVw&G2f^EeZ)ZL5I2JXpHeGH0K1<Ha%7-&V<Z9fWC-!+f
zdOCfwfs~^CSL?X{6`TT#!a%WMsr^gZ=?TBV?6W)W=3TmcIkd-q!LQV-8yVad3nuSe
zKIvl2)NP05A5W~#x4QnnsO4(b*~k8NUfXvB*NacvS!S615j>xq`})`PTN!`*cXu#(
zO|L(+s>gZX(Q~#sKXObN=DvI9m%BQD-eIrOD5E9GwI7}t)ZUVMlU^g|*5js}pt7aP
z^sYhug^9}U9^T%E6AXU5)`F}|SpM?<|9|FTtEXO%ub<m1ZN5q&{p9Pi`2{;w_n3V&
zyvngP^880Z-7B6Q>Sxjx7j-;*vU+Dxq;WMjLvNJ2P}IN87j|v&{j&AHZp73!?h|+S
zoV%Wt>9eRX=PK9kRkKS(8RqRR_B^j`x%sok@yA#HesX_vfB*dH@pUtsnwaG3ek3+E
zH;0M{g60P%UcSCD*?pQ`tk+bn(2C!0w@2<OS=seT-J<kE;ll!pzMu1jr=N~YoO!wU
z*>T--=M&cM*UrjWyXrXW$rU@a84B`mxXinJa^02q8vbiHPOmIGqhN6GYEnjr{)=3%
z)0gdE#VcrQcZ<i@OkA@@=U;vGcRA70xgQGu7#~=mu~12~>!G1X(B=bIt{C0E!lqy8
z?G+tlps%U<z*08*ZPBZbVpAJDg0kWqd=GOk(8ycX(G<08^`lu`8Bz+;0w=gE3~cHj
zJl^#(II4r={q^|bpN8qrjEn2i?aq8R%D-n*`{km03lqbN=`(~m{&?QqT^=44HOohB
za@e|<l}$}dpd*eGvV2zZxt$Moa{IY8n&(=riRyvSXz?R2&Ri9G{%N}5tT%t>@2uXw
z=))ubaQO|3qpa`!$X@c8IagJ7s#gE&7{7I!F0Y=~Jlp-XwD^q72MXrq*|O&~u7CXU
zS?lFhx3WEzrq4j90=u}htc%&X=*5eSpTB+yaWO2o_h`Dq0x6p>7o5Gky@RLg#ZH+p
zVL|HYX)7Q7mweRms%-weSwHNh{>S{z-d-YkH+Y?1|BIXZcX&*S<M=Uq+1)h8DJtGK
zr)E#@4KwGvzQxFF&%1=rTX)v+=IyHL4dh+N_tV70q~Oz9$NP1^*V_GjA{-eRscC39
zk%@z00gDi0fNj*aoRv?Wq|EeDTe@V4hLVy~YHBJ=u=Daw?Z5i27`LqozV-e7k{2&7
z-i_MyxoOuAmX*7+Creq*zW3_Qrc*&`->yzszyDtp=wR`Cdv=13TK)I;w=fq&L%yF2
zLx_BJb+xLBO2(NPhE_#SI-H!GLPA1Z3``ew&g1>GQY$jDT2y$m>$TXnY>(?<F2Vev
zVPRQ!ca?(9dbo4<?$l}1)b#cBn;01ygg8R%7hhZ<svY)X?e=>?X=%$qr@P;Y5o21(
z$vbCv#-#)2uCM$4>CK1b&+dkvsCmsEy+nK0+QWtIQ>IVPJ~>JC*0$W)pk?_>mo8ly
zySt2qnSnu7V8>?A5uvTz;#%hB;l<C+T)eqCopUMMma^Q!w?(Hur9A!Dx$WwPz{}aw
z&zeUrx{|bSmqhQu$g{Id!i=4-&e^}|)9kbCrmNq)GPn@7R_02>_E&uxnwo(T5i>4b
zy7cAG=kwXm&&|!cx5x4z1A_x6OO~61L&LT8@#{BjG6J2T`{Lr_mLm1YPg{-iu9sf_
z{_1q>@_!!=n`Ty)YGghwOno~2PC@;@pFOLm%Upe8=WhETa_fVlWBif39=_Pbl$sJU
z)%o~uS<9dI7+YIgXPM{E16@7QDXgCL_*n0z&B?rB3=A5L4!qKKH9N}Q+*sJgE3IW?
z6I1>Do$3_p%M7Ji&$Hg1+8MuopK0*oJ*%c={?yFu<a;vZ-$HkjZ3Vlx-8eIoyLEqF
z>8W!Tik_P)9~D|k_f}n4U4GTrvN()&zm#XxtogxPC+<=4=dYW&D(~jao1im1&ds-9
z|NVY_Jm|RkHEY*$vN14tDHN33{CqOm%hz{mcX#(Rz1USPEi6tud9T;c+`e(&&&=1K
z`hRat`CVBm_iphwU)d+7H~jhI?#`(_+>}4%yY+GhXXnGd@A8$-PYmtul<Btqa=-uM
zY00;~>$9qq=ao#Kx<rlNPR(CZ<#b|g{i+B1xw4Lx|6Ud6Jw5PKMppIg^UV97etCI$
zmUa0$E-o(6wKSlUvRjzIk^dWXkZtYnZ!3$RpS!iAaPgwWi&HmlusQMGSX=w^@$Bnd
zpSbgF9!;P8OUuY_Q&mv&{EXnz#d~MB^}bW|T(!m{%;%7z=zp!e;=;bVJ-ZI>R^1=b
zYJT(L;?U`JRmT#Rp6W`;{qZd1YLnhHr?*k!i*r2n&i^{S`smfaOuf2KZNEi&2Aw)p
zG562Xtd%pGE>~|YI?sCY!J2TN{(BqZAFj3GzjJ4ybNiQ%$K_8aZPc){i+g;mS6PIC
zp{{-5rcIN0S2sw7goG?fJlvM~{M_76pFUk$;yF1bJ3Cr`^ND-={Su}5UQJvW{MBo{
znfaZ1P2<qbiyr0u+TN`8$Jc+i`NgSo-hcIbaVvZK>|=XZrrXXsIj7twzU9b^o`1z%
z##`R4H)sC6a;0A8`BRl%JEHciOMhzTV60}dc-jA}mp*M(S9Ih2T|bHETu^GC%F5#t
zK1%&~;%u(->C@+zAxCxkg10RRTg!9m+`1_XOUrvd%P#qLRyyuKSN{7-53RHR!rr$Y
z^=j*z_3dB8&MS{a)5_l7S~_Efgr$|0mZ4$b-Cd=RAvtr!+;uTKmF(^9*G6u3%gf8-
z;^CRHcdxDLhH2WGr-C1*Zca&^Qad?qQ~n3(W=$Q{Q@?ixq;;p{WUNo~eY-mK1*__Y
zQ*FG`pkv8xe>`aR^75Lrd$+Z^7z0DzL|#D#)AQG^g{=--8}$0x+OH?o=V!dTvy+RD
zZ`$O^!Vj+`mGPZTeO}Pb9({9VW%sN2nU^BYi(9W;vEo9PsP>lh^K!QTemwT_@d@b`
z)BRDv1kTy^%?B6s+y9Hu)6)YTx(+H;7A#QMxog+8`TAd;h892TJ6gXq<#t_BPQXI0
zZ1vJtH=eas7Jr?&^v9x0{^{P=uAQ>q`ux?}rC;+7|J=06sP@;(<)Awk_Uzp&DlNVG
zSg-VFMn;AQW&7KWWJGrSzP2uQwPEtH6Sr<ft>6D|msQD&0R6b~gD;zZ{fpU{le6U0
zr=po>(@cvhUHnV)4zCKmZuoremA)&+MrKA{W@eiLcjn00UfC@76qJHKd?-jwOFMP`
zynna2{<L1+|HoNCh3%cgpqN?c-2Up*)6-Jsd2_11yl~WLSkf)7uVrEqvUBIoS(e4i
zK<9*LFfar#HST)&zinq~YU<0Q;_*|qZ#Q4KAnx>;Ga(TXGrGFFMI|Lyo}8>M%*DVk
z#bH67W&OWBK5CQy{XAd4s{a2!(1i&+Y-R%Pe~X`-5Y&y@;=$JJn0;-{#SMvvIaom@
zNPxMhh{%hd&*xvix3~K0+Gz7$dHXmwW$&d6{&{$MUhERpzOuwq7<4?;-m0%kBH)7W
zZdg>5R(yQ?+W7r(a&mH@W7efivs?^Te17!!G3a>rox65vX=-|cww^5n7tgQQyN@n9
zaKJ$~W=DXYo*pPYEMBa9aKX8{dy5w@o;qjFn&WcSKGECrLRW{a<p39L0sleAHdK8+
zYkqZ2q_OXOyI4Ivy_P1cr>CZBcXf4LdU&|~>h}Ej+#4Gl`S|#tDSLlXQqq(6@Aadj
zqkZREh04jvwKQ2R+_3|68O5m+CoY`U-@oSR>FLJoVj#aWc>el*|NlBcWw#YuwuA_)
z`2=jwyUW4)earm%f03V`pAYx*>$_L+n70;mencBLs9-zrl~2}c2_rL`hPiq8V)y>A
z{dIpiSidhRetu3<N5{w2)s>5vH#8wZ0pjlh<C!yO8qM^vDt$F&&)&UHA3r|4UVX>%
zCCis<8yN+Ob-NlG8h-ltQ4t)j40Yadar4-CrI!5t{e5-%`FWqdf1lpl%X@IaHE&<v
ztD90!r{w3a-?78u>&wgS0SpWaSeSN*s;a71y<9pS)T-%|wboKla40J)V`+}kHZ%;}
zxN&1qVIk<S`7bAw`vt&Bgkk&QMT?w*m-$?LKEHk)yIh6C!Gj00Zf(&#xFBp}gbwJO
zcTkn)H`glj_O{%t%gcBn73>H1+7AcWtKRSZ{^ZG%B@Z4XnB8<+n`kzBYsJT;+V6ME
ztA4#)E-Eg5++GXpuoZmX)Ag=`%FXoj<(Ze4oxFQ@Za_hJ?7o^vZZVw;Cnu{vef)Uw
zix(ND85bH7AX@aIx8+Ri@9)>r(gNKyQCV3j$n~!4&z~C51s0K!k+aNlXE`}Lr>3U5
z8h|neW5{-9cD{@Y3mi|KKR<o_`t|cFpGi77_U<Zp*yQBw92yX?V8R4}y&sQBLn@^M
zb4z}G$(-q<HrKM)t(l$wT8E%A2dn9$Bb~xs9UY)Unm;`~{q_3(e^ViWyMlA)&Ye;g
z1q=A(Y*s9vSLKy_yl?8(t)>Sb^jQ`>XqcuGxu}g-8q_9ub$z`&B*FtG|NHyCKKsdu
ziJ(Ri=++j^;AI;O*xz-$y}kYQY5o0EHgDd%E$^<DzrVi|xasvEPeWh-dJCuUmYSbM
zTeGkGH8wUD6%;5)gmj-e<+V0?yPv10=cf-J0)m1j-P)Gx4e|P&-2DB2x22t*w^mFy
zO2fh;LN|JwipU?$r%#`P&bR;e{yyjc_pHy)&K8xFs7QdaEkjnfYqyxDf`Y@hZ{N7M
zxPo?<z2#u7_we-WR8mqZd31!6SJo;d``Vg`bLYxJB0b=C&aEw;pk$t!x>U`7-WqrL
z+9?4A(Og_yB4T38K0iMXzAfZXD|ciG2Pj81+|n^M4Fw%al9;&gSfA|bdwZ*ex$->2
z!lpSnJ5QZHeR<^Ow9IE`W<q0R?}H<q!bUTFetkSHZ<=$%fLGF}CE&wszqwXRL01`V
z*%IPCO=sf#`OwfU&_8+Z+_df6xA)0dg6=4KF*Q7H;?$|42OqGp^UGa%KEGaX@x>Ki
zUtd3c`}S-|B<>Iw6&1}q(jj<jV{*HVt?k!a+3QbUyEZMM;H>%mn#G~3!%m(&>Dk%Y
z`RdkIZAcsz=o`;I`|0y%(5*yIpFLZ4;DE!;+qZ>zM5LuxA35T(bLUP?O-;|dygY~}
z9AX0^BUefqr@dI*Z#QZ4=FQ9e=j*Aft2eD&uBD~5CFiEm(q+q5WnW+C<>PbZ@Av!u
zkQlsT`tadHP(j$-+}zaE1UlQ_X=S>Xw>PLn|MI1TSH>a$bP3txW4(~JUc;_etJm+F
zw0`~iHEY(02nc{~LJ{<`4UC9bv0;P3T<dbb_4|IUTC`|UN^-I@)Q-}VCr?UQm8|IM
z?tc3I{rb4QRf1l!<sTopg0j^AU)T44IcNR;MKiyh0;B|bH97fs-^tszqwW8Gne5gl
zv$BzyUC2vz>coi)KYS<v<=$>F-AhM0g*m~MN5iidhg!Ko<y+mK9lyT4U43z}`{wBh
zvc*3>B)Yn~o;-EREBpGos}mHRA(c(TuQ$`<>n=`Ebe=M4(xP>-yR#0pa2iSVvIMig
zxwki3JA7Tp<z>EGQ%{S1eQ~iF5`hgNVPR_V@$pBG9a~lV`x~eb09Q8OH~7vr)3mdT
z%e}qr>bJMIukNd@hU6%Q|A`wTL_|cEbaZfR&AS_QdAa{|P;GOk`TgTAQSB*{CMo&*
z`{&->6?*R6IY`C9kf*1j;$ob3CSY^gSrHkTHD<ZD1Wws5TC>I{Ffj1far=J@|NZ@)
z{rA_`UGMk(hEz}tf3|^A&-wHIZi@qhm-}5k%x~|bvGvYU@9CgxXpGa(ow$BIJig{5
zYq2#WsIWe;_V=&X>rHEZ6il5qE$h#ZkFW0SwO)87Xy?wIT)ezjk9LcjCLiNDJJ&j#
zt+^3WCLVZeS@UB98;`_-Idf!srA)of+kTf>c*Q9F{JhYJh#7jZyOywWi@o@IJzgCW
zfghfGwVh14baJwKs?p3ZkGl0gX$R{TzPVxO>gMLPO6t(>L)`k%ir~lRPoF-SRD4Ky
zdwaY8x^?S{ii(t`>^_rYrfq8*%g4vp)!!ff{@z|lOsfhweAU#{oVb7g{%h-Ey(PA`
z?B2cm(W6IKHYU3tVgR*0KnG&3@L$sbq8T(A!IYN*h-whx08tAd$H<c?HN^p>*CF=n
Y{-@dbB|mq!zXI_+UHx3vIVCg!0O{5<+5i9m

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/convert/sample_before.png b/tensorflow/contrib/lite/g3doc/images/convert/sample_before.png
new file mode 100644
index 0000000000000000000000000000000000000000..e5317ef295062e79c66430512ef1c45925858ce0
GIT binary patch
literal 155610
zcmeAS@N?(olHy`uVBq!ia0y~yVCrUIU`yj*V_;x7Q2#E1fkE(ORY*ihP-3}4K~a8M
zW=^U?No7H*LTW{38UsVct+g{Vi)1$Pw*LPtV#yTVV8h2bDRie}-+X>s<&&SLd1*~s
zJJEe@o=`;vx9GY3|0)mL{|NrvvC!<Tk&9COs};)r`(Awf`uyvxq*}ZE_o58<-~aoq
z?t$2O-ydaNt=B*OYMXOCd)A)&`|2EC|Ni^+@81RAekpzb-h1!<j~$60)qhLfyZ-fi
zuUFmP`S<?wKmJ?1`QL`Q-|aVE&$?W<{?qdR;Y%fU7w(_rY^OT)MEb|&B4GhlGyloW
zPZreca-Cm(>~my&{BDJ}dne8B`u_8(|JLr$f9rn6b$!q?ef@8x+xzdqziZ#l<NCW}
z(U-4J^6$?O%m4H7Jpb0+<=?B%XP0m0nmXyjy(`DV=B?jR5P0g-pBaape_sC)HNXF$
zdwRFqakINIH4z@lsan;+zSd_;E?36sN_wyPnex;${q)ny_2OrhZ(pABO8#ZqQeNu|
zi(`7$iQYRmKil(s@$t_#-gDL8ADL=dxcP|xpHG5y-=F>e^s#brwd^j<qyJV3Z%Bx}
zX?WIXPw~H*t1~=yy`Qyh`M&e+KN!tFwjUF-x-rkF+|1%})!n46Us~6G`m(biw%hc`
z^$eA}@1+Bj=G~3((JE2!($e+pnzebEi>&qEnQLsO&k~pQSo?JDp{3@no)_+Hwx2!!
z;+L4&(RF)yw_fN7YYBX5r(v@C8e83#G6R$7Ow%pbmSn!p-J<Eb%0w`*uy0n>maO2w
ztLvQ8_4j_ewlw>F?H9{W8UYc~MkYOW$7W1!_sg^W`SPS_@wz>yR;|{J-1SQ;Tk6Bc
ztg}b1?Bbnm{eF|=f8FTA@2cNaUSjlGKCkS_i<jwhDi7Y$-F~m^>8rQpcYfa7`bF+`
zeyq2B>AUx_-tX5{9)DFlx308zd#HWh#5sPmzW4M!o%Lcz6+1)8w|PhBX_fdt-;-XL
z(zYmd*R)yY>FVz#WY?%aExVllexKF%>-$>6{;%Hm)V@@&;`KlIx!m`5l|Qe0dpm#E
zw&#mAo@P$)UwGYQ8u!M+i)ZHUzwqdLxW(~R=XNidekJT=?$fHW$mUGjw`q^3mCasS
z+0>J|E941B_0pj3dA`%;ELaxE_w~KOElux7wzsxz-FvhCcIc^_T$N8f?VSu;PA)3q
z%?ZD`Ip}_C(%Nb1(Rcn|xmj6c<@M2TXX>rYOr5z)&s;8FoF_EtjNtWMyk4t{Cf}Mh
z<@Yi-UCFA|JIyq^RnJag{iJc?u}#evTl2lC+&peK+H>caR6P+cvM`l>?8Y0Dpt-K^
z;`W|n`<}0Tk#Tj)^1FG#;rB}AZe2X;<oBxd{mxUfdXEQtEPXjw?v2gswu`Lu7^NO0
zeBTu@RdelH&(-TEIWl-Rmd4Gj*4^dUDN?v&7Q>Tg^HwKSEcqTJbb%q;`P<s;#qSq~
zO<=B=IeDMLw%}iFLiz7E&o}$Fv*77IKMwAaixwX$zOKp5zB)}q!vFQZR_Tf7uYdTZ
zbg*sGHHq78r+s_ZoclIm@|ruv%X$)5J$?4>b`<ZG^GuhO-e@<+{t5K{?lATF6W?^J
z&1Da>Ive)%E&g7~7j*ML?BnQU<6@0RR(rWzbCNGjY5ID4wwM0D%*}gLzRzmASbWU*
z>*fy(#n<13Up)KdMAl6c?IN2T1H(0cG7==FJ};~leRcKGrgJ`w!9IzNW!LWQp3xl_
z8MtECLAl)u&t|tWeYbgemp8%i^PRwV>(u?fx}UFUPY`|<dh!B)b+T@icUq^Y_y#xM
zWw%dPYrSi7W6inwB>u|RkpEXDwzu(SZHZoUGHt5Ps>?6B{@pfAzuZ}Vy7XDA$Klq6
zd2*!-92%#6J*IZ%?Y1b9{0sq(uQ3w)_HLafn0k(_ZA-6C?G~PvV<&urXMEvdVO*N@
z;nu!QYqQPVmTbO!_D{Uq)n|)huNNKuR=_^*a(3*VP8X90VmWg*vg({!C~Y=xZshsb
z>yG&*7Os_KTAw>_mjFYsglVUtKtL^<TTpkyg#-(p|M&BXxhr06-_*lsy=~u0m706s
zWC9rHh;3~&?U9(W@5hmU0W9x!f4J!!`{7l?ZZG?sV`i7P{rooT1momee4m(~ew}xF
zu^or!tv8J)bE8eYZEZKSr>=FFbHTEOb;g<7j8ix6*jM59cC(Y%9sZ*&o<DwGeeU&f
z)3Xy2vP<+92sO<8cd1F{txE4^-V}B1tH~*;+q#_|?SF8sYp$(_qIcYE&%7H`CNJ0}
z`N$&Vh^eaQ+u7$tU;JLsca57#StBU>c&aP+<!v=nZOZMK7%jj4&~Qx%+vMN6@ynSD
zd1tR1EEl-oHTP6%mruaXn(ue4m7ipOKF0g=u<7b|yv1K67o5}6$SsR|pW%IF?sc)*
z0^b^PyARFb(U|KHxRJ?w$H%=J_|^#RHEPQJTw*dy_r@AVhPFnP6WiW5-6;!+J@B(&
z@xfrLIYkSqOKc85-s84KG~nLV!w1<)9XHNkwV1Hbb%sP`f$U7?)>nL~ZHn90c}#IC
zDG-U_yn1x+2Z5#CmpgPe&kdf(e5-vsSA*D*^6p!15-}HB{w_!<36$zTa!f;f5oaZ%
zu<41G$mCCqfoWz2zp8J(VB#%gUv=PHq}i(ecc++Uh(C?UkY-Ci$8oRURsM=tOnLIt
zdBv+7*Ksn~p5geN=fj>;ad}o<d<4Vfo~Ip>j124lt?8S#sJ+7D$2*4YJVu2f?n`eu
zyx4xkmm`V$kHJiKzxjT8S1)Yt-B?hkKJU)GgsVH9WP{l|w3|-r9@R@;bSGak=vJa|
zvq@Fx8$Z^bDY5GkN*?fh*Z%6LoWLU=f1reW{=<}OH?E)cnP6c4BG-5}PsHvICvVyv
zH_MAZb6TwNe(L+s%>S!i$ljT>?%_$B_=Q}lee8mq9*2^i1-LCbxFmr21+(J57YX50
z_yu@mqW2wtn0Geyu2@vd_R?zBokl{}e}1~=!79$GcVpge2Zue$I&RyUS;bxlZn56_
z^`WovVY^M5ua3RtuJ*XS-+Fe;DX;h2eS-IJhMait{?)WO`<^d2y;4@pbN7esyDwL{
zMa(Mi7d-G}?K)%Q0~S0|Wph@g9(j_R{8c<H_aVm-SIG-HMc2#T`<$0ZZ0$<<`b6|t
ze*6xBt|iZUE-q{rxpTqAZmrx*(^HKm^6Z-(Z&hkEv)C&HOPIgev{|k0oGVN0;&lfa
zqWgm<8YkR~$lNTUbZE)-3xY~U9zl-|eR-oUtYmY>Y}ul-t3K}FZ)6s$JbYICsneN4
zd7G{C9=vl2jQeqPJI~a_uZ#Nia*n>5p)ue2$6*6a4#RoRg{$S)7e_|C`Y94|X2$z8
zCxwE5tq&4S<aim3$_!4YbZnV_cwz1j4&NBR`4u99kMCSKQ`~jleCtx79XTwsU(N|g
zN<Y4CTJ=L|!zdRfwd_SR9S*5CaPOSDi2dNsb|b6S<9{}BEq{=sEurJDz_>(&!*xFI
zhNF*fJ(-vpm@ig2BWsVZz>?cHUoxh?IXCI>?5xxZR;y!0j*YE}@8XzB@&lUQgw75K
zTzxJ?m7Pt<b{~h;?p76#hs=i+6mFZFpKy7}w`87bo#njz^?B!l3{*~AO;?ov{E=}Y
zZ>4^kx`pWZ57&eOd5#9RKH!&-uQO{731Yr=%E#^{ht1;0yB;{aak6{kelqv4*}5|?
z8Jrt60$IXJ)jg*e-E{fx%^rB>_Egab>(b?xoGUg~waC|d?OdR&$Z@AK>GLeLtjilb
z-u66`lq`8yy+GjTk*1vT7Pe+J^(+~ane2NLyjwQ4@y^RLI1<d*vh{`XOO1)j+pP=N
zGiEH^;PI`z=iiqo!G~UM#mz>XB4_wxtsaE@xp8uXqxj2{)$9j6`Zt(VM7X{E;?$sf
zBFcG3a9UhL%OYits)Guas%77Ix^TPOaC{V*Z5U-?CH5+DW*5T@=Es_an-uk%rzNtV
z<S3{rJACO#LCq@$#Rt__3afoLb~4^jVw-gPuttI0vZ!D8%m0|J6~6DFH`nsOp48dL
z<E}fLdMNgK+0G4X9~*2u87g1ww4~WOTGjSW<KnIN*xItf{HL~yCU)N4rXk~VVnr~Y
zms`i4%Up?4KXe>A-Yd3FNDyjqJ)zmdUd6Q7N-@M<a;ad$aoNs?#&bi%W7rRVEU>xw
z)VY8)tEF|`9P#PVb~|J?^L?4_8xm5*Ft_E_DdpGM6%K)aRIlv~mOXd)>M}#ERcCBc
zycRY1c1Bu==H@vpteMgz*$_KpM^~{bm*5=3ma~6-3ZEQI&C?3hQSe?PDEp*v$svEY
zOrdKEdV9EpdyHA~KQ#QFA-VA_LrR55+b83|1r<%sVQM1HZ=H%{A2L>bcG?l}wktZe
zX|I~1$PfSROl}<=Mwjn!$h>r}E}ZW4@9yLN4QH&F6!y&X)>OM!6c@37a_xmsk>w)(
zITHMLdMD(3inQq$OS#kWFsbeKTi)--8On^ZyuKaSolyAEXx*(kl^F-ur?Dk4$n7!x
zSDx%-vg@JZC$T*W4(i{%TW?e`R-REiueN`|tcr<#(Tn7pa&)FAnttDPiod_NnaSR+
z@wIhuTl`Al{|`(Rt_wYq{PBd1<GS#7!8Ix}e6ET1f*NUkPdu+Vxo+^;5zSI>wB?$F
z>FI!346jzbc6n8s7}H}fH(haUhiq$+^pnc!D>EB5dL7s#^#1!-<tcO6)>R}qpIqCX
z!*KZ`yOV%t`~zDy5&iQ&E~-!ZR$#7lZbo2}|2)+mMIRUY5BUOIJ#U`}W-?FgITb4*
z==f>5sGhIQ+TSgQV|w;q7i%n8)p}_Kk6qrb7p_q(|3lXLF}F7!3r<$w_+y^#&+g+5
zZ5tYxk}dwf<`7wSC`M&T<8qxP$D6PG^t|{<rEjIz?MGR%(nmbrn*4Hjp>X1p&$A8J
zd)n@6-dp}j_3#lM6Vd3VU-9OdUOjDMJoOwxTD>h>k~npw>%-Pb^Xgcd7#%y(al#<2
zL@;PU|FNs>Gpv&iwtsWpu~T^BL0;o<L+hWx-j7(ebjqZdInO-E_)>h)$NRBIPS2b1
zuvy)*Zr1iHKOdEeJiESLo?WHf%BSz`+%E9X{>_Ku*XLf$XggwaT}f?*uurnI?wVo`
z>+NE9B+j(Py-1Y4cv5U{Q|##?_tOE|X4`*XoWR8P$Hv}GCnLuD+4nuidUl<^)jzYv
z+VGpOa9dz^-mJ)<l@sSKRq8N^E-Mm~vkE%&_3}lTJ0gDX?F(mrC_K4%P0P1(k2G-~
zg}JK^`<mEsw!N<73XNsGDb5_TEBHKL!0*C%vAY5j5}pcq6fF8_<MMXnjejAgcRbaf
zJFQYVu}V4qziP*PF{Rjw_!kmtO$+&sJ<qXdJCi7&^iOuhk9FdK%4+Oo!Aj-vcTW6y
z;ilaw{%2dq-qh<8XK$$Xiu?9ZjpK=DjZ-<F&bb*omSjtw7jWS?RPW!NEn?60t@xeO
z)xi2BAxn{to&5pKbp=c3R=4cAYV)0Y8Rs@-9)`rpvK+^}zNlw+_)FQv$1+c!%p_Q!
zmKG{>r&s^LVG*S@u_@gFW!d64+B^>gw6uyU+*{pLYu^;G<#zwZFt+L=70Ys*D-6Re
z4J{QE9-ThJw@%SBS>Ld3@{YcJsXKT-Y>^h+STk3Q;a<{}zWM8;RFoO{D!BcW_}q2a
z(gegzwW7s7N~WC7kh~zuBsOhH<<2QR``8Z8`+MiT+4fk!4L_Y;2y6_!^176x@=u4`
zhj7LF_8fn&Yll<_tZeFBS<ondZTq_wN7lRiH*HAJmWa|(XL&l0ZAEcly3t>ec~7cm
ztlx7XclEQv)4UgsJzQKG@Wyh3=X&>LON39%f7iTQ{@Ioz3ldfquKoQp^i<*0?~DJv
zO?TW^v28c+1OHteKjuzlU^uy#-Nopk*`}YGKU^bbXP2wY-Mq)LTYc%%<tNYZ3N!f5
zDEFV=Y_?_Vry9p8GfWvYmb6<Gu3n-cTUXQYE%~4r8%N;Ns6AZwxUQ;MmrZ3{C}<P$
z{0>Xq5~B&rMErM4rX(G5+U%I9eEqrLNuitkn%o^<@0S>y70?PhcV4nk$i`?@-30rM
zPwi)hJLx8Q>ppYnvTki;*cc)kewF{G$cYKRInOtrjeGRrnW4f7{<sZi8;bSqGt%|8
zKK?u_ywdsaziJ+{>?6~*+O!><@aWh}6&t^k9h;@@o$ELC|9Hprf^m;P4bw{H>vDfK
zEZX^@<L&<alk8rE^-b6>A0t$q5q@M#f{cXa|Gf-+v#&&4z4PU!7H`A<qr2MsjSf6k
z{v-Y1{5j?6yr1+L7<kk(ot*<bot+f|g8bb)eOwtBD&|a`&K?nbQmo<mG!+$<Ee<S8
zTJE{L49&`5WLd&x<|O2yp{T+lXuWC0fvA9u39jL;IoXUU8x!@r6W;T4Yn<8@v0>pS
zHO>XA?^nOCe*XK}J^S=?b1a|V**V#oVa4%+Bad=Im>XmQ_o<kfOt4h6Z1t>S5pZA-
zS-|aM5wp@lK}}7_L2}ps;Cpu)t_7^Sb-MQNo&WK!laEg?VsuDSSpIRz9hnpCj0f)S
zNb<B}XpviCK6{#en@>w1Q!Xcuz^0vZjHkR&d~TxZu9fpBIdVcUW5B{9i%C2TSDq9a
za_^A5&>zK`!Y=5q(R7FB=5*}`9~A1_Y_!zX#W(FTO?kcL{hyRShtqFu@aI!co-uW&
zhKs6)hG=^6vj6j+{<uGBvaa;3!#rlI9E8NQ&qu~>Uvr1UGFWlG@KRS-=W9(hz9)ij
zoZjiJ!0j!u<^O@Hn-b14Ff&X!^_a!g!K@)i>r`Kt%$48ExSelINPK&-BGJw|&Qk2D
zXHhcy-^HypnFV(QidDLHJ~2z(w5FxOalSN5&9@2Xmnk{cOfvG@vesdS1ZyPkC)>=h
zm?I0$epX`C&N|_y)IIe$&wNX^OR9<+y`<0BxLZvTf2Jz1X)&9nQM`csGMR4QheaYA
zUpk$8;;rt&cl6|Ac2@1rhq`_)YB~60qgP>F!kYzu!wSpJEU>+H<~-lj*S?C|y|^E>
z9sBm|@X=*w#MvMHnNg9Nv!hH+-NsFlIgeY3k(-54ZNkB6Y(<W00^7csd^r$*YM#IW
zMxhCeKMvFd7@H`0UQ}+nZV|xpujvT;rH6%$U2%3A4ZrjcM?JkNw?w;&TSR%2gZL8}
z0me-a^JgYa$*9=o@_{9*#s2KM){SY0)!zU1JRTx{NwB%o@rXeZ$3?d5$7>7oZG__f
zCKeyEEjYh$$G)4wyExvqxVOA+;XCwp<)NgL{1c52T3ldE=8-(3_xAUj0`C`BMfs1^
zE~tMYo&Mn00lD%<W7Y?C1$p5s{@R7zEI4*Q%ko9{8wa<O$|wG%yp~rE*c@6c`%P*6
zmG4^vj!iX-(Y;o1zQ1zAzSZ*l_mu0l91%-m->~@x=Np@AJ*Et5WjxtO^b*~l*z7Tz
zr*ZArg`ACD6`K~wKlJ}x$o70nxbLfg=NI07sNBH+<TLl@r~2I5;Wn{KX9S)YGe5{X
zo4(2Q&c($u{P_9`#HYUhdur`dUgwR=m3z4FmsB2S{Gii(swdxy?@lvcIio}Uci;4H
zQhL9aGRR~;KBdCyC9Ajp#Q(r64`(l8?|E=9mDjk4Der*vkLCmczD15U9&Eu0b6E@K
zdL1w^5I)1abb*WxtM<XD4T3q0)(7%7$eT2z6)??V6nB)~!T0Xa)=Qjm4RIg%z9@4z
zIc0FjUUcpe<UYbz=wu@huOiXuk~A?zMf9YDO9#))&NJ?(CVHjlZjxN-TIbHEtUlqh
zhjo#3Wk-qf`icG;Z9###R|K}Y9A6-p!64OmZ1LoaJSP0QPTm)ezR23bcDMD~V)YB%
zFG{yC*EW7jWC%VgF@rO$zcN8bN6P!)B|}a#Y3;+SHb~|0Ut^j7C3Fq*>w|WWT;}u{
z7gq0xJN$uVPWSVNn?G{Z=<Z`)f876J_{a7N{eM#(lqCe-aKy2gB{C^C8}tTvtx(EQ
zmQkA1_TEqZq3lGD6oIuJ?>^l1R!b8~7tro_-C5mvyz}jm<45d-I+wU_QvNw%k*7?M
z-O4Fj+O~9;^pvz-@%*)5(W<01kCq56nzTqMNHc6@(9M-aAy2zb1y8Np#CEgkjccFU
z=M$GtdY_0s>HFmL6Tc#R*U3q)9~T}8Hq!j8v0AfQW4Y#cjr@?%P~TA7(BrGQLZ7c%
z7j}1fmEYHeuiRIOU7dDnk>8?IE4o+SUU_`w_Lcb=CbLw}&Y$LVdb#ehtC##P$z8Bp
zaq+6#>Wx<`vWl;)y87zsx)pmv)K^EZU|-=Ms2{XG$bLax<X-pYHHj+^UV4ym=GuyD
z7HbNF7yD0M_i}~g+L?<Z7w!!Hyyuy*?xbl;!#0TduXf4qzUI01cu7y5Tl&SFUw(dB
z`{nW%zuZOJrkX`t2k(xl-d?u4Y<=IfzHYxoehdA!o)etQ$98N($_B}sJ*qu_d!&2(
zm3iD*dLHygm?WL=&{n?py6?)@>rY=#d-?9_-gUn{`&WvunZ9(p&w9uA`|Q@`ueOh?
zpY@OJ@8w^%%3Iy5`k(aaNbjtFBFlW`z%_+y32g_(n!VX-+uYg~wj~`lZBys1?vZf&
z=Qh(#yQfd8RQhsnZEx!_?Vf%~`}9`^%rkDDxq2q}Oq?+DWCP)uC(|Z}O|A>pzRYqt
z>GIxX-_L5K$)&wa(>@b*rs{0(+2b?Mo2)n9pX|8p!v=wfHJdE5Ki<1^%IIWKV`}qL
zW9?Y&ZJ`rG=U%nVp0#y;>Ace3ucp$AdfR=covoT}7JcQ~)v$!<-0GWrTZ=af-&(sp
zTi#XTsil|6s<bP<)vIb7Y8z|k{yluL=i<c0-R|Z+b(Y6!d``_sO+MH6^VNe_6W(sP
z`f%OiX~{d1HzrShoUI<MzAkv{Wz+Oo#T}nzp1VB{*DKSjna-=XJ?7=Q$1&nNw(Y1Z
zEH0nC<89&VGV^l(()j0fmz{4bA6tCr@f`Jo#utkF(l<YM*Ee5Zv;J>fef90?^vd^N
z>ddZX>D0fuwz0p~r`E!5cCA&-%zZ2O&91Tj<MYR&&a|%o&)Gkf|8D=8&vt_)h3ySj
zh2#qX4Ne=8AM>6bopsbou7$OgZ7S<lwro~)mi4@K0!dPn1cSscNxzb^QvOmQp{S#j
z6M4gTo8s)#>r%hB?QTEMWiRU3+tD2%{ZZqRS&-086FymSyS$$}ssqIW7p-W^5IEZx
zCs%8KOiNAwS^vi)9}OR`K3;y*>e#s+TWR<H*;3n$-^JD>{F(G)H|I6ZO)b-0z8>-H
z+}inma?>QO$yRDTGYzs29ZmV0l30|?rKxI|8trxe$b(L8^)kPrxgl0BkEN_W**f{W
z|Hb)}Y@eP8`MPq>O7<Ymm7XiDgYAnKS(?t-XHs@+*OoZF?%iF#o1UMKKKT0C?+@BP
z{V&&E(!M-D^+$@%`4?h8I9IXQI!!BB`yn(f;@Pn?-y<(Z-rmf%Y4WD;&sIEH@=Qi7
zvvZTn@}k}%?&8}=%N_+4y3Y&0cWO_ZXG_X#k%@lO%${W*?tkNLlzmfwQ#<QE)~l@Y
z%T)cW16D+8Xzek%TlQ<wOaE@CuDaQc=O3KQn3-u*eD6&AW~Q{<dA(=+X5TeBp5u3N
z&bgfni_6a}K6853_w<d<g{dEJ|MdJk+l$Bh*)*Gfs$Zt>3fdav`SL95J#I6-BjQ}*
z+w_X!wxnIWZWMR3wW$5ms!!)d?{snRxB9p1*Y(DuFT10s@6f%uVakq>Enkiq?N7DV
zGS%+ZlGm5tzNe10*!5=DcJ1ZsY~r5=Z7mc{othJR{MCu8z1e53>%@MpT$}Lr>#GB=
z_P*x1rW^IW&Nlz=RNjBm-*QauMC|)`S?%Gn=-JNZ*RMxh3&~Z?)4XwMt7+luoA+(%
zR^MH=e{Id}>2DXkm9<VPpR=lT{o1>k8;U(;qtsWwzGs>le*4{myS>$KzUSOKeIt4w
z|C`BgtN&HMdT;*C{Y}2*gTjRKj@`|#`7ZKLy{i26^6P487GBxjlf7@-5A$y~sWg}U
zmHb8i_H5r-tNUNbL>5gc+VbSZfsKojYmXm2UhS7Mr{a#$)~D}Ia;mSND^<VhYsrz9
z#pzDx=GaIVe7j*ezxeF)z31fjJt$oGaMA6fzr5E?=llEe*Yky8+pqmP{cgJSZJXP(
zZXMqyzxPr3(<4_GtWICIaeeF_xzfFV-469eo2%b_v*YOQuW!n}y$vttdiUl2-0J><
z^>6ka`@4#rjX&#O>gV-)W%Fd+<=AZZe9?G!@$T_E$MxrV%&&Q;^KwyXsI6&T)bBGd
zBa6#D%5Q(}JZ~M}5&z};lz&TqUf(tSnm+%(xBtH{y~nwC?s}E=Y;oaxE>>^;yY}?$
zY5QjEV}D<C@9Td3-wVDc9(V8WPq$aCt-59Z*7)V;SNdl0v-Y{w>@}ZYY@qPW!=lTd
zk;lx$R<_~#ZN>$vhx6Ob7&L^ZrW>~^ocg?^=|Fwb)1SVIc?>z<*%cD?7$$y@PG!y!
zo3QeO-hY<<@9Zl!e=d*~$=tPh@+EQ6c@Z4)Y;5-=m>ja09(1Q9EU5o`;K1~?1~+aq
z$ea(J$N5yrzBx}ydHJ?;(;vA1G|#wKsMWfELLCDG|Iy5lh>{3jAFJg2T)o7U{G?R9
zirfMQ5U{bYC`e4sPAySLN=?tqvsHS(d%u!GW{Ry+xT&v!Z-H}aMy5wqQEG6NUr2IQ
zcCuxPlD!?5O@&oOZb5EpNuokUZcbjYRfVk**j%f;Vk?lazLEl1NlCV?QiN}Sf^&XR
zs)C80iJpP3Yei<6k&+#kf=y9MnpKdC8`OxRlr&qVjFOT9D}DX)@^Za$W4-*MbbUih
zOG|wNBYh(y-J+B<-Qvo;lEez#ykcdL5fC$6Qj3#|G7CyF^YauyW+o=(mzLNnDRC(%
zC_oL*EGS8Kttf$80OEs<FD^;Z_bez$HmS%haIGlOH!?7=&^0j9HL}!4)?blZ;Oh(5
zomUJE#pL{4{ffi_eM3D1{oGs>9mOTCWeEGQ>L?DWEJ)Q4N-fSWElN&xElbTSQAW13
zAg8n#+0N49RFDwZ-8m^~`W3klo00Xnd-?{z^?-sgJu|letOKMPS!GHxTwOtFQ4Uy5
zO0s@xPHJvyUP-aOp`Ia%hd|2);5tzJ1Cjv;0kZKGxdqr&!@>)!7#yxvF8Rr&AWJ=6
zY?VOnwMxlP&P=faGm{LA&5aX{Omq#*%nfx-EX~bylZ;HvbS;d{jf@hFlT3}0(vXbu
z%quQQ%u7xM8C8*6pqH7MVr7tQYMf$XXr^moU}&Oil4_EwYhh?%uA5|(WRhrZ0K&#d
zM)((Hrf23Q<{-NYWK>FKidC9rvSp%)X_9WDNus%~Nt%I)u0@K0xvsHknx&ahvWZc$
zfdSYkP&`;U26);k8R;R31mq-^q~#ao+A8@bCM);{Bh-du=B5UhB!WWC(A3z%!r0W*
z)WXQbz`)oPp(rf1s5mn}4`isJfu0#yA|=_%Ex#x?vBXv>GdD3kRlguF9V`Kg7OQ~F
ziqxD4m(1MMJg80!3y6CV(ve8gh89o{ROA*|ITxiSmgE<O<mczuDuH~YV5Dbg0M2*{
zHlQrzT2W%<lb@Uj%6eeMsVR2gTnEnEPKoLIV6KfmKE)U&1!v@!=Ycgs@;4rH5K7^u
zI2M(Z=ND!B7x^b;r6!l)HwT+yxJf~&>6yhPMU|ceJcXvt29l^fGK)*{iz<=q4^AzF
z@F3pG$pi<Lf&#dxuu4n@g`%xeabj6&3OE84U{c8$iFxU%DYi<`3<(p@#FE&}O)Sk!
zO)QLb%}vwHbWPHXEp#nYObm3>lFSp$ElrY>EiKJaO)t(*D=AMbN_9+6%`350a?i{y
z0LQa}1|*GYqAD-XNChQ8149d4Ljzp{vk(JwD?>{wLknF4b1MS_B}kMj*yw|bVwjh0
zK!ugBuN88j*nkQ%D^F0G4K6Jx$jMACf&>pZ@dT$9LQJyJ$Ds~MKRE5>6V#5Z45HgP
zzo4=xGd-ikzdR4G^;i@_Oz_D}PR%REYd5kINU$K)Saw{{pbBzvv*WVS2UnY*DijhH
zpel@(7@7oWX@!Eqs3jzY?`Uw11{X;oK$7Co)HNDhB!vJ;ibqoy)q;x)(Kk=cOR-fd
zSF*R8dZk5!fq{W7$=lt9;Xep2*t>i(0|NtRfk$L91A~|<2s3&HseE8yU|=ut^mS!_
z#Kp!ZEcnw&+Kz!ifx*+oF{I+wo4e&Pp<m^Xf4nYR(0E$lPPB31LL-(dNA?PRKQp!3
zWQmTp@9C+`zn3bVE1I(G*}ZAXS47<V4)sQ9EmGq+)yf^SWBTvkkChfW<)zll>v!*W
zk6U-Y_}urJ_218apY#5?MvvQKNyQd{pVwYoTzoNWYt6r(&o4<Tg|Cj<swHifvmj`x
z*ComG6Qfp!c<FN}y4Z!S3{w5}?rwLDe8(L<U0vR-QM$=LE8AULofP*fv<UpP(b0?8
z!0^*fpi)t1Rfv}4>GLyP@)cW7JdcTu-C6LksiftEaAIJZsHzT^VoSw2E>W!(r-ceu
zR#8^Y9uXo!VyyyBHfw^GdKvB3P|@My>~`W%+!VGk$(6I)>CmDH8#G!3l(LOBBjldG
zdgWD7v7@AA!Um9_V#^6Dol|E(Dpf!>oCrGFCHnLC`~9BY-jmM>2#K|}xGJ^?eBZov
z>(YXUhhp|tg(6w}=GE2JU*6uH9$)`A)XMpZSNXd;3oAZ8Qc+j;PWOlqapL%>bYgF}
z()P(WLGIyDj8o8DsH~#X#lCVWcZ+~i%<i(@&(F^n|NN9%_xr7R*7bE~<?H`AZcIA*
z#LjKvVo4shHhy_OFE1{wsa)FH+DuGL0jsY@L`NIv-ZJ6rKkt!fHv8-)UMG%;^|H2A
zBJ%R`bLPx3kl^`#_kHMcKUqFG8;!XtMQf%^5iybS-DdCa<#p)TF*mDMk{(yYoH}aG
zoj%>YDfP6FxOjPdK!}EjNzRP`D~~4`*REYFd3VS1^!(CycOsh>f&G8}kL!XEqvB^i
zTcY09uD7<Z2v{Aa`%+=z<FD`U%d@k$w;y{yLD6|h@jT|8Ji09cObq7c=I7>EPX3r~
zZEfv0-_G~vr>9GcUHZ~Yq%;E-FjVced%Ts2l}kWaSoyoQU4Og=I6N3w*x8fQ)2Dw-
z-+WWYG4Kgj*4A0y-rimupurHZGUUsfo53*W1TS{$T@kq0?bv+n&{ZmPm5O@n{{BkL
z&*yLG?d4@=<C)+$5ft^0DrcK!haG#9baz+jQfHTS6Dtj;b$4^i$;n;Pge0B(d-v}B
z`1zCZ!Ry!BvPwm~Q@vcTuZv}rFv*xua-vwpb!V%<BgxkG_Rr7f*FT&2IP=;XN$avV
z53a3^UK6!d%YXjvs;{pCH>G$=9`V;OF`07W#0i=0@k_m?G99>i)6%`~?j5UpDW+Ql
zoNSc8YfUx0v+HO!tIX#!hDVn#d61Ahclr6-AEPgtWL<2U`r7=2LLHOZWX`QozN<f_
z7Twu5^K9BXiJ*dw7oY5?wf^QW;(IUWXjkd$qPv#UXSyo3RJes{i7wWfcJS~?Pb+2d
zn>qQ1FK#h;*|D4>#jCYRa{ctF=PH<HZ!wecH-37mc6v1^r*J5?fTPoiLoqRsL$O7`
z37kq5TR;h2z=@!=%RF`e0zsz^@e|A5PZufCW@)&Yav&#U;gyJgFHMineKf!G{>MY}
z6&GoAf1dGu!`8qAw^paM8}_WZl+d@oM{%h^nW*dz<8QV?>umqtv*4TG<7}vXsYgrX
zh=6giQCr4o;ncO(Jc|p8%Y({loWC5~?7x@y^}NX%7w=45A;_rWcKUKwG4pT#Wr?K;
zLgjo%pDsB6^ts5IM?SLzMY4Do9549DQmB1B;fj6Yge==BPIaamw(UOnH8DQ%RP*zI
zG_h8JN35H){!3{cj(4fw-Jz0fXtGq~&d)Nv@_##eOLubVKbKGZ9Qy9e4W6Z53opi~
zX88#Oc8dvY{b#W1x>nM)Mz7TGXJ%hK8`E%boye?7nRn*dXew(kEoqs@YQ1~8b=-CH
z!i*1649i|$_vqZ?<mJX8+Yy`D%Ckjgwvklufwd-m|744=<f+!%&un>eD8ozCaetxQ
zcK-Vgudh!GDo$T;(JnH2nR2_wLBXZ5_vY452Blv82``^TCOv1LT%_0&vRAODP(p8x
zdg0NMcXRe7R!%-7w>3L;%FO*swtSl=KUK3`>+|a_rHGxO&kj~B(Fx|<n!WRPyidM!
z<T4qqzotHH9=ofBlaGk@h@YEWm0=qwT{cN%>W2^0ox|SRx@q|CTge{Gw9IF-vDfue
z8p8R>H3_F&Uq(rmX14nrPs@MFoGW-GwQJe_uui4J_uZm5H7vFNe(w3P8LTrHT-P#+
z<iBsfb<B}1iZAu<Ue>UNMO);*EtR(~vN16`VDyZ8`tR?>i*(zkMn7$D>bbPl=+z;<
z{+!Dx225%`6CW<g;du6J+y1}+p7veq|2|KDaPa!si-}8)s6K!BS!4P_P^5XyvDclQ
zFxBBsPvi<IzNI1N{%6=k!_H-u*!LX|sb<kzZ#}uny?yFt<+MpVjI%8YSI*V^TBpnN
ze9^_apspE)%$@yfI!<5l-?;s9;v$2vTh@2x?Go@kuV^u?_}GjLhur0k`xQ@oYF)kU
z97h`KF<*t;ha5uLVJE-z#=Ly^ZCOpjuJ=iEyfZy+SoJ^olpnZamhsBzWkQyPoUBas
zOB%iguH)fn3i-|&{e1nh2QOAAAAQo(%k_of+gA;vb>`QWEYOSLc>I#r<$lH7MZT*R
z2Q{8#(GpdWpLJd~SbIxZLf$VsSDA0gb!+8rC1oY;`ysM=yVOFL*3;^f1K$1+{=LmN
zaH7kfw*P{gJ$*N_F5a+j<K&J)?-Yf7TwR-Xm=vg$oz%DZmhIViZmq|RjiL<8KXw;r
zrn|rTnd{>BQ?UKKc)$hId9&x|9eH_K=a}2)S4&*)A6r$RcxL+c!pDoWkIoiR{uyPo
zZT^jDJ9%{z5eaia_T?^T%8mxi6!J7aCS;r|{7wGO{j?|7SO5R3aV#or<=U>OIz_{K
zg-28iE;`R|Z1EP|{3k~x#%Fi^o#&25n>AVl)_*v?rC>kXjK2Iw^Id0q9+f(N&bA|d
z?|c)5uixMAyi?Alc_Zm_l;<0%ipAIFt5+SH?lS)$dn@k^9^L~RU-+)$mp=EkVd0V2
zT!Akhn;m&;(I;sj7k{poH@d_ABY&E(#_8=#zU*<G68Axt^^|DEi@K9Kx1~Q??#eHZ
zxHr3^^T+z<lbM#d{?Gln$@Eoc{NKNaH+$8(Jq~%JHt}#<aq*85o|l`Q%bKebm+$kr
zvc~exUu{Y4?b{;W$M+xPcy`3t<=gvnZ?)M>O#gaB{^0FY^N2BTTfyqxVg0i|b@sE2
zckg(P`*2-6%)DcBo}jsToI|7O%Jg%~-ye+4tN**VXLGpe9_eoftlmi!R@W=M>tftm
z&3&mpR59-lQxL<X_fhB6)r8E?IbYT8-dOl0Jji!JSp37P5B^75;*PA0bKS9FT68kA
zhtbWX-%*|`?zJ&J&FT1AU@>uX`Qj~`l|N2bz3}v*nXH#KqcGQ+!kX{FT`YPFGIYg*
z^xqzNV5@QZd_%0+{N6tTOF&JSBdddUh)Fqw-jRL2Y^tYS)s!8z?jE}*Gc8$gzf_{*
za0*k9;>#bBB6?djw_8Z8wx7OmcL+yrsQ8)beo=X=%+)6pJY%~t#j*IeURqGc{Vmdm
zkBO?p8ZQoLeEFzq-CpB{?B?CV#sAE_W}mtLu61+nd9y<8T^Yu=mdQE<a)>6py~#1v
zIa($ruju>1qugCe78Rn3Z$EW@k^XnNpV@tj=v)J76N{3pcB_9PyPS7>?wZEp5OpN-
z62s<$JlD)LHo7uutkKzQmpN1KyXJe}NxLHSPDz`uQ<fAxXS;jBimOboZ`gZWIjuA!
z{OHO3R$j|riAb7?{Z^6^5jnK9fBDn)%>BjBBXqL9t3_zm7U(R0{%+ZiHOG$~t@mJ_
z^es>2=?%3<<+6&u?(%p1+AXD;rgzAXHFM>Hf&zy|(`$KJ7PWIt4t((sGJE{?-g-|h
zpO4!;tiBnW@B203#QJTC)1Rg<u<0}1^fO1~-0$yaPNiljwp4`XtYsA5^?vz+-QS;v
z-sNA-)Hh8aW%9l(Ndx9*8$x5^=XoqV=Ue#ht7?{7f|to+jkjG~28=7i-#gi|zjIab
zmVT!5|7QREPb;5J_4*#?sib@Rn^4F7YQE)qb|SNbcpv}#yh1yC$&VwKB>MHwY+?Fx
z@Bht3ml~Y@emEq#l<U$WenI2p^k05qPLH3>`FD5j&+L;kGSBn&zw`g>ne^?>0++>M
zQ{Lpe{J$(AnY`ZX<~xfWcOHE^^mR$5tifAr=Y#v^Zm3+W`%2y~fknu?eL~o3W7p-p
z2Ws-THE*O@o4u1h%2hq*nD4=xPmQiKxdj$DPI&s9UA$;}Ge4`{%-*y1UYC^kh3l1i
zog%kI@&qKRbsj0TpHPr*IPH+#?DGxEnVl)WBTv6B3<TG^s)o|O3pV}#E`Klky1}AD
zPrE&DOe=J_S|G8seXCQU@zqcNQj900{50Cc^!gj~M)N-hdJo?9^k9kp&ZXjdk8$?(
z4XZOUL!-}&CLQ+JbKPx?r837X#l<;(Y0E?wKizBi@#c2%++#ax*)`R&bGl}B%rlzz
zBSSSSU3k~j_Wyr_HeQUlWfZUa(aWgo)7Qd+;HE_;S)vi4I(>FVlBHj_7k>Qi5V%Cn
z^;uolwR;Ji%xaJK*G|<`(0J3epk!^QWoyr4FYk>k9<8~8YxZa{PuNt#tf`iLW!uFg
zZ+B}pzZ2b9zgsiY{n~_r0{hP0+mbDIOuNml9FU<Ecwm3aQ{V9Gg5sG+R&5Zwd+Tk&
zzFB8)T$q@~61wn<tN*vJ1v=^Nif_VwT5UI8V<_`mP<s5BA?Mv!rFU*x^q;usIAc@x
zWyi*wGwh}b8P|IVZ~Dper}Ldx&bIlz?8md?7E77u-La6o=EU*w%f8vx(|29->biX+
z<u6;S?YG0n0yk6%PVsVo^D0!)@{75Trl7<c?S}zNh4^}JKX?(?U>$6KXx01O8QhkO
z3UxbIy5!1eiyd8+6FEt5*~3d~t*?o`P6+lnc(>J2<zJCVYqjv4+rN?@|5f8_{r7Nw
zW_97uHR@7pdClymdj2w&e16{k$i1Utt;Mc6r&nK;x10R$=xgVPjQNW$W^Gs(Hm&96
z!6l*93-)qeHc(}H_IgQZweZ=kymxH>h%aNBXJIkt$+z?M3=e+1EQ!&wS{Pup_-mPJ
zohye_yu8fo@<S_o`Lk3L9=lvxX1ii_xO4PpUM`dG2TLjh3(d|Jt_u=b>g;&t)g_A`
zVNR)UJ5K!(KX&Nejq4wdF3gh5(VBYfR9JH<EBm8sbC2J<FWvr2H}I})=f4n@8OjFL
z+YbIZmG(~V-)sBx(G^Yeq|dd!_!L$ydBk5qSGRY)n%|SZ*H5MVEcU&BQslE1li4?w
z)_-l9q8rwC7GHPE*b){IzPkJ8t$c|*DG$T&V;gH&F0ST_JoHc0xU4k$j=9;S`1g&$
zp6Ve>S1k&0nOyQp=au=a<oKn0eqXf~1Y}(Z`QlpnT>g#C-3@D>bvm!D5_xe!vi()x
zow%w;3(X$q+zMJ!ra8q++r)0F;ho$52P(eYI6QwP=VgO8wzHnTUL|%|^GUSa(yhu_
zyQ5X+b)6~ObGJm(=lS+~r%Ip4B$~~B+Qz5YQo(jF@2bSY(8mv-d@140=w6`pvgzlc
zpYGx>!bBWnFS1^4{rvrGk_Ojw=chc!xWnh2pMJPF$3JqJ%#j#H-s%hgqnEjdE;CYz
zJuV{qzhmx3_t2<r%|~Hkwc&hIW6$i*)#wVSc;^zdY`^oJqyOKXJ^TO8qZipzBexb#
z=6R%^U?O!fBD#28=HBN97v2QkQl0S4LGg`yA8)JesY_9pl=2!EajbH>7*RUU^Wma;
zA<H`6Q<qu|ZeLmOqr=LDSI+g-<eu&BHEi>~ZQl6unQq_yxYqe*IcG|r$GnRaaq0;7
zm2Z9Nbmj5h>oTo|-QQ;|UZ2oYSGW6%&28`Q9p9H$)!VuUANhIw&P)~`LC%|B?uvT*
zy0=f8!u6{4(u$@Pb6w(nCKpbXo*~cI)o*HchI7eXY2jxYKHIwkE-dEI3J#rPwcm5q
zMZVop`wD(4tn|0KC-+gZ;@*5y@UX&>(-R729?9N#<K`WOtgb7eXX+Okcdb}27L?Io
z|LyDSCdFwjw%(oh$|8Qb9a<#d#KAb@aHp)a!<sKH|3p7(Z$JJ|>+c7q6ImIcZWpQj
zubn+tK;BPJF`I2T^WY3MfB&<-K1KWHMa{aY^Z2jl@o2reU7J8%y9&3Zr>8A_et!Rg
z5RFec5^MoaHa_+?o-KKEQ!md0^(8?o1GJ`w@N-Tr{MGU}_sZ<t;-{A3GtQ>{yz0QA
z=(0^KWW|AZcXw;X@7q(-c4A{2pKRCe-PX?Sd{fU=oO?14G!pXPbM@6l*IzGHcIgun
z6?L7c<hn!|)a(F_r?j*{hB;JpxIVJv-QA^{7zhf9j&M;iVIiR<$}W9s#@*n4_nt!=
zG>p5I)Qu&0RMgdj7dSA!<N*h=iVj!d{uL`UxWx5zWFei@7FWeheBDKyQ6RH8IJ=!Z
zBBp@4M^1+p2?&XS$Av)SJP5lLA=YA7Dj+20swX5XtBX*S;yaI97u+8PS)~efqDO>?
zl6PTYVMKIva&ofrT)1Q2sdq=C`?yHU+S*!3Sa_*2EDT)4;zhhvrFz|%7EjnP!yhu>
zlk)V+%HWvYWxRXi`WG)&K6vn;h6tCCkdTCBkxE%v*(Fb>LrHcE3}&BAd&#EQBEVnk
zs3CIW)~!Wht3AEF52qL<nn)Q+^@4_tf|h!Ds&|Ix-Pn-0W7jUDoEru&d&FJzoVgWS
zPFz~x*zDoysc2^>w>QqesL04|@x)J0PkRT5Shk(f@dA%ab+NFpEV!8QuG)8P*yXj+
z<}YO?CZD(r@mGcGojZ3l;`jZjU4Qe&jSCx--NCYFAVWdxFC3Sz?+IId_Qz(grHjJW
zMqM&=Is}nox>xmD*Jyon&fQ(6T&+%4ZcjMWCUeg9b3fR`y3`mPW)-42xw$Xz?X}jL
z%Eivk4$|;aWMZ<5yZhreZ+e0>3&DA(BV0{EfuYsOv8c$%cb3USK0(X2n5H_*B^uiW
zoSqmN8XC%27PU-|RJ)#Gl9Ze4>)~-i+qP_~muoW{?}n{gr#336K>fL(_V>4oi{1HS
zVq@jCvOg>R{Pl}TKt|?|SO5QacXz9(sB9>E8>JP#PKQ}Z-pj+|!KJ0%BAP)hv(59p
zAgPS$K#rMl<)@UU=H{Q@Zs%v-+LF2NZTI{A|K;rM?bFW8a9kg^H>Kc-absg+&DX2p
zH*VcZGLYD?Y11N?PN5Z7vySyh9)57JdFf=3?_K^C{r;9)_4So&O^ppZpUeW6PNAQ>
z?0<cG>%BH=>x-+a#eaW$t88H*AuTN}sqwRA{(SkqzP=YFRt!9B%;x#`Tvmn%iHobt
zeqlPnB(B(U;-HhW^T!V#4xByPJHP&)<$d;})pzdPu_$=JP*_;_<JYgEkB?kmPN`WG
zx3@|~Rn_*(uC;4*-|zdqu4DqJs5l{iCJp55k~cRdE?A(T<};%qN>{wuX!G{%&wsz)
zpZ?%L<CV41)92Uy^0ZQw->`Xe^^3aD?Cfkdez}@>#>$U>PVBdF;+S~&$gyLWc9-WT
zB_-Y1UvIzEYw9GwpZv^hJUiZR`f_Hbar~=C@*h8K&Ai;UIsJT+(M*N_t*JGiPO4ur
z)bBW>!=>oLSM=jUqK%DB#ow>jZ|*E!&T{m4$mTR(Cuir*uC7IXbFKb-y&iu_vohlI
zv$Mu&f>XUz*G6w&R`Kx>$b>_GjJQiV6<bbRGrwQsoRFYU{OruZ`}gCmo@nYuY-l(+
zS=~7Qo{d%cJ000c?2<feGP1Iso}LG<U7L2eoj-W78}CaAP`NYF{O;cB>2|fh3LYQh
zou(V@Hq$6|N$|XjpyAl}_x67L{{8vv{Cx*+-HNhO?f>}g+p`-Rldr6co$cJt=h?o;
zp-3`{LosgQV)y<>4<9<N3|e|)YxZ=OqtA~VKHPlaLcqG%-N&T!_Z*z69lj*^(+soR
zTMPzycPt=9-b8a&R@R5_-=CkN8T{kd>-DMMURxFy6(t?*5-oXqOH^K7-YoBq#@tVH
zT3cHKmwJhca2@sjpa15@MusgpH$Sy*xxc$SUr0#kiHb6KxMXQ}clYA@|Nmn4)y$Ny
z|1+`Z=_$|UdlHWK$v%F&{r)oNc0QlkX1v<k+ApW1i^#~FY2g&+;Nf|)cKf|k!N;df
znQ~<L{JK;7YJW#;%@U1^i~IR2=Jc6G0#18e)cxihxO{o?p;m5TAt5ElkE$y}SBouN
zxG-i<g`mB?y^@mBlHhqkIXOCSZ-dj1x{8X*c~;jAB0u=}_|op|DBSb&+3cFnXU(5H
z-wCczKTbL`)3{wsH_D*yPsK9-`Fhjm#v7%lr-!YN^W9zc_QSVtX|J!Xy`<@%@&Di7
z9lLi2Z_9~va&lS`u+V9q-Mv0p>rMIh?XInfbY2~{wxh4_-1O(Mpz`ksx0t;A`2&s2
zoc#RhH#enL{^K??w<vnTQTyx5!szXJJ~NG6=k<N&P@CM@DXhLJ;ULqqb924B#dJSi
z{V3h*rsg;2M3<;`#I_vCUTO1l;m7rjyPY^bGPSn0K79GIbMIc;>Tho*E|}r`<JT`C
zDXCK{gO_JqSim@O;>1guH#>TImIN+#`}6H~eoI^1v)v0g7KE*qw6C+dU-w&gZ}s<Q
zpU>ML&oF@$!8!Z>ev6)EmV4^O#l@U_eAC=7JT+kD7E8Iit8`1w%}G&Pvp$~I-|w;9
zCgJn5vlW$<7ngVno8{m0*_d?nQ)#_P)|HNCcK)J2KMMc+`Lm<yYt}xCJn-QA`I&L+
z(?2{o*wWhi@o~TXqgSuA=1w{+Yf~|y?(eUQyUX*{{N{9cEj@MpZ=!+3j~|cwZ|*A9
z_MWD5@cQ*|Hcz#*%`+c7NLc1S|J<FO#h?F7Zc0fD<WQWq`0{f9$4{TOPMs?H``g>e
z3ubtO3I}F(zJf0=0;SCJ&YYcX{y8^(j(xqHyu5tgy*(!vI=8E6q&Y4O5D*YJ(8eqM
z<IU#t5;heEo}QlIHo%|lN4v!@uLx9j>ybEkXJ_%oty`!1opj|A*E@5pS32{?2E}>v
z=DoSM*E+s0(ca#kjaTZ(rqt5~MMaYuKg!$K*jQA2P_UZY7qvC3@Yk1@HkET$6@Ho{
zqu5fR*4f#~BV#dPYxea`iHF&wOtVzxPV&|i;d*s-b$fd|``=$*mG$)c*m$KD1ZaHv
zQuw;W>esior;qhYM{duPJv-Zc`oRw~Or&_<-r9Qj+_}E(_v@tD+1dB(-TV29;VDpN
zuxT!Se(vGhx4kD*9&Nv0x47u(sY{xb4u5}rwJdnR@caAw<(Ze4-Pu*DeO~VM-sh9E
zw?=(?f8T$qR%i-?O>5lVs*Tm(^Tc!_9G3gd-BJC09k?)lKTR*z>+i3xl4dy)pwt3t
z4ro?-TwLUOb5kmJ>8mRT4<Bx}t^W38<zwU7XF++mzP^6nuUFb>XJ@tc_V&*Cksh!y
z$@Sa2yWSg<juw=cKYwtrx$@DDCq|&zhlNX)sI>9Pp8E9kG}w=`Oeg!DTr9%H`t|ko
z<yBu_eR+FZd~aO;{Q2?g_Up)sin=cMo6Et?o&5RP*&jcCoH$t*chGC;qRP+Drs&7d
zv#b3zVa^<%y;Wa}zRs8ko{L}-*NZuD{5bpGxbuz7>?^|7&MNV;yRafq`P=*Z=ilAk
zoq2JQ>$<u(x7Wq)E+{WQe)w?nT<h|nT_u^82lX9Rhppw{<xPElZmvn@r50)PJkX>>
z$DX{SU7~wF9+T#gG-^58Ex!0_R{X7Plk?{!)FlXfZf=UGQ+V{y{K(wL#}_CxH8DjN
zKdIXG)<x$2xho9}*XHzAKTv0LI`;F^!&#pbb~&GT#mZ75b=>mrXZ@qqe@{d)F*nb!
zuebXwp098z;ilW2*1IgMtb!6-wEmh3o1c59RB$b<gnvt6_W$yleSe;=v(b&)V{v<1
z?%~Uqh57mU>wdr89%EV;931@f%VqzU*VoH;i|IZ(t-t@w_pd7P$7bBzS8H8YS65M0
zwJG<u8MuBC2TkVjNFJIVUw3kvZuE=$`~Q32XP+%Jk4GwEzVdzDlvlq`cbeS&|Ji=}
zm-JWV_X}@rG>onBPP^?l^OR`fdAlz*5=>V`zRER}bFv3mey%E1RTQ89{IVv`9Ge0*
z=73Avc}q>%XWCYqiAHT(IOSMT<++nr#aFj>7p}WLA!wFJUvA8)?eX=ePEF=qA(x?D
za<XPly#AL<=bOEjCcU|_5mXSbi`{Kh`6;F7@Mc}T_<f+*wW$4N!p<+Z#Ic#})4{q(
zNAHy>PfkqKiQhM;{C@3pe)~TYrc4n5`z}0YR|#i)e0)+;5(h8uQny~I^*Qk&QPFQi
zxy~GWnqx8LW#qAw;*mQ&_#3YM`**94Z7<(kzyI&KZwXCVz2Z>RTDE4ny$|DFg#@0h
zH?Y0GfyH;ak{<tY+qrym?TWoj6Ac*En636cY&3uU{&t>=hk-3KUs9!O^Zf(*AKVVU
zlAW2xF05C0C5vP3-YzDotgY{lpO`p5$?BuS?ZWiPvNddL?{@FoRD5wwq%oh2#er*U
zqcg9r5;Zh5v@GNK|K+m(j~_oS>?}^dwl3Ctp;POV=D%E5RtC3&%GRf+L_u@1N4v#A
zy$qMU?Nwj1UR_z~yua>m#I6!eK0dznW$ip}Hg>;fo&S^jBk!61o!!}bN#C=k9Cp7P
zxzJ_4VWObF$AyA>b9c<Bocudr@nw;7FLjI)x|q0_ma{#0_OyF_xbMDuWwT~%-Yt3k
z;#^1Xe>D}mm(5<oC3Eyj<>wcVU!ObDyL@M9jqzctsz;%hd;i7Ota$y}^+TL@1MmHB
zx{S%qGru_6-m#B3wKaQT&EJDTXEZe~@OQ!10yx$f<%*j7HtdylzPI|=LP>68z0&`C
zvt?P2ZBr=rF-^P3yS*>}c~`x8jOqHagAScSsz07ik6#hBHOsB=*&G!^!-<T{Y#)w_
z$8X5Itah*Zz3tC{{YmG}om&&Jv1woJ@1X5@v2{NlvV)U=^wK3uKHRH*Z&>ui!z}O4
zi3N_$MJMJhowPvHbG6)I4{Q6$x$5tkF0PSSyg1(4-^f1aP1Ok<yB`PVMh4qI*}QV~
zk6pjmcGYibtS&AIdsk~&Yt{4KKKmTM^8N?wlfGu$Tw_$YLjG3Pk@LkICRd)${eJ)9
z=JT&Mz3SHQSaoMY73-B1vwafo{W1vSt#5Xft~)MJJi%`<-^K5X%YS4X$>`bd_jq{+
z7tj0)JfTw!&-~pYef08r&Ffje`Ifv*y}6I$0pILncNdgjzAiU)2It>G+rQRVl+^F!
zE)QEh)1pwxIPJ^=NV7@TJ!YC+<tLT6xHuJcbzv!~uHCzL*K8=B(8$aV%IVAe=6b!q
zx7X9d1JofE*m?Bn>FFzjm-CtD-FfifU~@%9#gjF%J_bP>(zMw$LRi+`@prjVrnmR9
zbhtQc=S-%nFXu|H{IpSgnyQ7?o!NVOU%p6G;+ar%{fxV=Eu-kxT|CEb*K}_FuktwO
z{ghM-?%O}PMdt|pEPh{?6k}yrdoANsmTutL-EpBN4Y%aZTo>-xSHsre?ap8DX3?wz
z2VQQ`$@kCla;nOn*%Ws2mQQZ(>1h{lTS$M?Uii;;<;vP4If=hiUo5-I^=!rEPtRuO
zZ>s-acWrI7`)sq^j^5td)Za$c-*RS|WFG1eRQ~a9_xlaow@>FU<9qV#*|C3re{bBh
z>CoZB&BE$_;Iey8-s5AvKYsnXwAh`0ntpuW;>F2*w~wa;H6AMY6`vopy=PJA`d*%Q
zalAY;^>3`&sJ-mbp?TbQc4uE$nNrBG=F<N;NqfH6iGJf^y|!Z0o+sRDLc(XJXob&y
z_V3@1-{#9J7kjB3S#~G;#r^GCYdlS*0#-<0I(J|G|F^dvzvue}HuqiWS)00SVdDOY
zGlHLA+WOq$-mBNYPux5deSCZI(n9l?t(NQ4VnL1coyF>+S|OlTt+IRHo-1$f?<{Wr
z_V)JXijPT7PEIfG?cH6oqS(UP+B)t0ytC8e>pV|S)17HoTh$k1pqd!SG4cJ<Wy?N2
zY?n8x`B89fZS-;L_j@KAmxp9~KlxVtqS1)=*2DDyi(cBw=)1G7(4F4D^;(w0(d^J$
z34d%`_FsM<{3rW-)EcX;%jU41nSRN`Fe)kXQ}5?Dp{2_ML)mlIivAIm<!|Vnx>dF8
zRz_idNOx?qUCQ-V37@vvYq%s1&rDgWS`ih!Eju-L=H4RBAhzOD*ICy66lP{SwEbWI
zO!nl!CF$PHj_y<5-EYW@Uuii1XGTC+-E_8f{O22HK1vP@yRal)W_wK2yNh?{&WZYP
z^wg;?&ERDpzFv>Nye&66?aYjiLEEYy9AM14w#IX&Q7Q)~=R&7euJiB3r#uhbcCw`X
z@6Bn+RgtN)d1T^jyAS;QJX2oe$g-YVQ^9SyoQ{r;XQFB?YQOz{IxY9rm6gK3zr9Vq
zx+?U?@88dNSj_{q{(jy&(8z3A{ETPe!i5!;l}}I9njD>%d~e2o!J<5=`)lmD%tR}z
zE5!G`dDPsMVOC>2+xD4>Q`ou0vmaki$uVrIl#jT0_`R{%`S}N?tKUz#o&J^4blrQi
zq_Fi2&-D2o-Sy|&kny`<&G~m}FZJCGbY>r$Wq)o%>e)08>r<!X%O2dxV|ia0pIUL8
z?aG&|V|Ulhe;~QHUw8lV55M*8i=Vs^G|Ru)tXaceJ=u1~_dx5g{`alZXN%wU-(k3G
zx%AR3lOrYXw55KWQ+&2>#dDv}ad$KCtYvMLw|F*V^X{33>59uf$m&GwogZ<Rb)QXC
z+q68_;y*J!egB^R{@z}ls4X3u!OQk+c)MijQp37G6}w7aIt2s>1TXhHdh6D&oq3OU
zPRR|j4Bl3eW%hJNT-Z%D-mQP{t7M*wdG_x5qv;#hYh_(4e!SaNJoDN6ZnnePnzuu?
z<wTafz2&+!>uN!HIjGZhyeGZY(p;oN$f-kog<~^YkCbWGudlD4?@Tt9{PKeL!HX4V
zKb5Xs{B2|U%P&>3E4F5su3CO;o#T4;wPI&4%kx&v6<oVm?fRGhhg!Ko5yUN~Q}Hf0
zet+FuIoqlQAzG0;3KWlai`N&jPpX-hcrW&5vS0FvKk*V_>qCCM7XNu>xA5&VnI^Ko
zg^hRbNVU3e#iJH_Ou~aN-hHiJ^^F3P8Afd`S8n#~ZTV1o<=v*KrtM<Es(;IvoBi4y
zmmUpVeM*9xZ88@xTgk}>E6$pz@l=(}JQnA2;{ONvhe!4aHy1GGy`Aqnx%`=N=IO*+
zQ@5W<yU+DgJ~?mOlil<FzpF3$;k9S?@`4{O@<wijcjQYx%s%>LareqkOyQ5)TUrcK
zP6+(^^73#Cr*OdPtLsZ-z&+b1iCVqi7A~0a^Q-#KIjgT)*@Xq1$=f|EovVvsRg9=e
z{1V;hZ4X|)^n84*_vWV5)7`vXA7W0KNitsgcIn`1tGLqIm64BYzr}f1yiA?lbuH63
zy>-&Z(|ieD_wTRW%zyFw+28Vae`=jJGtIj*1JtH~_t#!&%jSb}f>TG#p#%eioErvh
zywb<~?f)K8pI_4y^wVhHsrh?zg1vqPH`~P*He62JEfM!xLB8*Pmv89)>CxLfWBc9-
z-<q^3jj=uX?2PsARc|a+-x?z)$`GG-bJ?L*?#*?7t8Q(}?cToKJp1~(so)XQjyI~R
zst;ekKK=Feb<2tm3j6l$(=TVAw=w7P))0rA0{vco3$*-R*Znn=wLQq&{MB3kcF4+`
z(G|1xH>Q0z?8~`#sG<Coebu$!qEm8%G(>7X9AsY;zki;;{og4U7rXDDv~l^g{`mq<
z91}mgy0{1k3x7T}Ph|4WoXd;)-7jWkyNb2%<8Ax!JIU4U=CXNt89T$bX)5i_nS8IP
zHk{edwX4}^Dw7mzU}T<Q@y%tMUamh{tbBWu@2;LrzTcbTuRN8O%)M_eo$>xy_4}&}
z-?!BLS@rJZxpKL^2~R_(<$n6|CFSHK)jj|J{q~t<@^Wc@e41FRfYN-AeIIu(WjXcd
zC(GS%t0$SMT9uz}e94jhuQ*#z{RC$vmzkN_`n1F67v=;@y1v?L!|70Q!aSqx+G2w}
zlQ#Lj%dKX*kWwPOY1d=p<x_L3QlH0xhM=D;UJ!VwT~D{$p!!=*&g;FdyWgCeW3$ai
z)&J7|09{qDf6jMU#nyHo-r5o7=b`s;&ndH;(~Fj0e_em!?_`H(>~#(We>BSYKB_$v
zothgoea+b;$F^qPUGT~I_?+3N&GvExS-EpeeE;<6B};x!{l=j5t94Z&rKdjW@=vq*
zX>R)B;>(jorKUD9i&%J$9Fr<2Dm?R}YoDA^=e6B?EX<as_UW$qYh!!<lI6pP54Ysq
zowZ@ZhC6~rn>BBzTzGA;VCVijXO%r}s#%o25@BZNdvaxEuzv0BqVtOcoSqnS-aIFG
zL|EvH>goJxN2fCD?0<1|m8$Z}M=KQ6a&(*3WR^Di8+G?Bxp;D`K%TFSh3R|mb=!Z=
ztJD?JRrNZPSz~l|VrT7X<LoPCS><a^%m{3csMMJ8>Xt~~pB?l5Ik!LQUNGbSN|UHf
zr^2MxJ{E2_TYK%}%x$YL+P(JK#<e#2pX^p}W?=X~&1R#BqFmeW9abg(R|vU_ZA*xL
zzdqyVWkt4d-@{86%R8z*GgvlLT~&W=f>qWP)mKI}PR4c756|B5-(8q}T%v4R?varH
zYF9jbAFk`{xw>R>w%C+&VoKr@!3*F%#H;!%@l3mP)ct%6>z0r&8;>pCVXS5HeX7mL
zm_<ukqK$*X>K%Ihf_Fr2R-V__RUbY%OXSjB$SS#s#w$03+sIs-lpmvVO3wS_<S*BC
zRQ(Tdu}A61om=>EE@za?k?rEQKL@V%a=7NdGl20`(pJu!n`Zp;%a*wGjMpr-ukz_?
z&!elAwLgNo6D=oH&lH;9NcLm?x_{!?CHoKUF<<^TVrT9&RsU0&A~D>*CvS__TUdQs
z@5~%)qsKG%&h(xqVLVlXlmD*Yj9at$#pfq@B};$%BX??3vgxO{S(pC!g*|$n2Z>AJ
zWjBgh{JGc?-kwO5Dbwwe?aa9=)wJuIgp+mo0o6Pe+dp>6KdisySbtZ$z3eKxk4w(&
z<IlKv-<Wf6+Mz6l)zfxu{54%!qVH~!@9Cc=>n{ofKL|JZu=mXO`}@whueE>N{V~*{
zV6W-hE$TmSEN2egTEuC!Un}Bdvm2M{w{17CbDzmm-RHRc@PdnvW^9y-ay9*b@J{ut
zVo=A;-D2Z=m#trp8b_`<vj2Nbx@0pr)>C|+2DS31uYG!$J#N0$*Zcp!OB*!XS4VpF
zf7$=NRknPRZvFPe*B=W-%+;Et{B-A^nZN2*PxL4D$439k%KPLqX9q5N5)u(zV#Pf7
zmh~c|7eCj|?2TzU_k3QBY8_X*xPQXat%oe)e>Mj5)lIn)wy=Kt^_vzCg(ExlL9;$b
zv^l5pPm%NNusIp{*vg52+riySWXsq6IoQ@c_2+{9hn?%c{}WP`l6h~s<l-ZZSnW*{
z-+KKyaEWJ5Wizk3`p2p>(joa5`WauVKYEwtT(tSrCZYeTkks}^{dd;fNI9EnC+=U{
zRPj`7-J@gT!By!$J4_Vk#eCEL`2X5b$6JAmlFn+xa6Rs2TFw+TVd@Y4si0BumI@YK
zQ|Z!$X^ZtPi9V9FIp*~TRAg=!vS+$2)H~z)>3Iu#j~niZ=khz8_9*U%P^{cmq57jo
z7V>0np5qz)E%8^Ot!*361n&#}p5T(OeP>~relRoJh5hvmjEu^WdkRfIr{}tUm)V}!
zWqW*217pdX8~vH5zuRBg=2<?G_p`b|!j)g{`PY{y=iJ_Y=2pqS$Zs9e>E(hyZXEw9
z|K8u?><Px=otcw1PMK_T)vKmdWV!wigW9hQQDr|{^4m-v-s|PtB=r9&cyQ>1p^r?Q
zNmHY7nPJw&GP{qu%)e&m{M#k_*l)l8`-bORXL_iay>L5P*<JbnlpNEtJ%@Q;Z$0xS
z$nvqS)^Q^dFT2AVz1RJ>l}_$RIXvlW&b+^}uL4Ww7q#=q+}_c&c6Qpvz^{_;?H~VM
z|9@YnEMJ_@q|7CAj5p0MQ{DGdX#f9x?Uu66Mz^x8vN+=_j%*AGd*mG<;?(g+?cJFy
zwsSX*PGH^r@@4dyPm8u$$R5jm^osqqAw$$AZT+M}2|)`FHt0#pY`tk;YyR#_T^IZN
zCFy&U0@jP&KQ5nsVAcJLr7LqHTaqp<HeOYz`A=f&2a}~Q48GW&+5i85f8?Kt_170~
zySV7*`3K7uhW)PF5xOxkw)}ANmn-#q?<=g1D_*cXwzJ+gMcGeYUv15N^^?C+t}K&2
z^CLsPYaVl|>OXH4&}hC=`;MXteGTbL7yceRoqAN#WE->J3l2`HbGwDbjq{7-vr1+i
zvYb0J`nb9An#JmIrgvZ5Kia-)UmVN(UX^DzqVC+eQ}91QbE8Ad4X-<Q?)+F>z}{w}
zGwtH8=Dkd-e}xvXBpteT>0+@>_H_1(xsUurROQmoPxfS;W3)G^$Li>nf6Ng(vLv0e
z!_+Iz$M@$vJy?C=-#x*e*|VkRF3(lec~ZY4^!<TllecfG-LJ|S9k$nOYUui%#vxf-
zX4Y{U6+AgU_08YU8tr~<#eZK_Zd)JGxye;GX45ZGC0mVqzgrJoG~9daQSDdBqn9Uu
zh7eB(KC+ar;#vCITIBSjqM6r@w!T)Jyh%@g(+172Tq|zgPtp0@WL1=TQJ^aC$J~RC
zb0fdr)T_$b^e6d!zzPFjwtDx&LNmLRW-CS7yyXer7T(@+ndgn=yg+vUM~_z=xc*AN
zDyME%^2N<%8S*i1?FKhiGd#Ry|6-d>&%s`;1(i)<aoL7Fq2>CQYrh)Sb($LFe)fu*
zy8Y+*{l%~29;QZqoyDH<LShf+>(JLfOOFN>tn;|c=ltS<)^|D3P<}_v$vz>|IQ86*
z^Y56=(EJl{V`q3=%E`Y^UGLfZv@B$0xc7C{BE~D<686n&U|ex4OJUi8x|Vw{yk^Q)
zXWV(#{X5E5=8xg31H2~Mxz2}XK91SMw_wxa)Z1CFm)lHvlKo~!$YlS!I>o(Hcj-Pj
zpXYx)WNY5y9|@twY1+#S9tk=M@=SZ~e|pvRJ<icx*ZwkR=-PhtItf~yc0w>@%Gx02
zOW%qfuChCQ|LxnZl{!ydFU4=1BF?zFI^XkTNn%)HFIRxbnuF$Rb0tl)wNC4WEe~9F
zKy=?jZL{g}=bwa$>eR<(@@7taVH754|J1moH_gNP*wYpD)0L%5HcHG}wzlPV%#IAD
z($gH*=J)=7|9x3}Q=abXxK{A+M`hs5Hrtm=0}PjC8;F<wXR~*{qIrz>ht%rp(dn^q
zE{nIZxMiO{s-&I$`%4Aqf``5L<|p3SHLYo_befH;YSM9)T{`;57FhLxX1fpD*PWfF
zop$C}=^Lvu*#_&4<_5EcjarYtxp8NE?Hld@=Y>x`|F1p0^9D=Oq55ZcHLEx8J8mEx
zT%&V}YnIRSJv=#oUr9^l+&}#3VZOn_<1tFUH~0U)c2d9_6t%x5xopn5rIj>)Dd%6;
zIv%d1e}8_S;pb8^FzApsZ!54g`+4%V(Ot!quS`r<y^rVI`?JE_K62%<nz%ks&~%fF
z-$KpfCl1fn`1w`6WX0<~|Hs!v9x91{{VMVOE61wcGiH>(e)vhrFmd+HPt{!NBKE~g
zqGZ~)y*Vf%cUnJv_QiEsj&D|*CM>u$Uu>(6ao$Nm`^&E$Pi5GWX(P<gdUf}oU+(6s
zUY@&9?!dh)JS1t~jJ8wDb=d#yaTQ;mllI!@V#}K7^Ya$3l-YX!#4)Gbip_zhY`MSM
zCg-+aetLbU#ueL0d8<$M&oz_Jy;G|==j8M7olZpTq`0|%epDaTaQhoHX>KrgQtVo>
zv}A#`_F5;;U%r3L>d(RNzXhz4Ep6@Y=}lh{Kk3N_XP+1=!O-e@tKGNkW0h}pZOM$9
zWWK9cIpJd2z4aLhuh+<Jk(_*FiEevl_&?5Dq0rIk^`Fkw@B8TKZR%;M)_OC??DPb2
zrzh#WQYHykRs>#IA8&vB&E%5{+}E2Ygk%>KUa|XQ%*;RKTxCeLT1fv+r7I~HCin18
zdu6SqCvShKG<n(Sy9`U3cCqY>Tc4VByX7E@)KsnHZ*J?&7hTP2s5mFbsr>I?SkS_h
zQ2kT8ZGPWrbzILZ?Ot^0(Dhbs@uFvEB*k<hKAg#IZEwH4t2F!2p+iN7I8;+&VwtYJ
z+W795Jzwq3Pln?9GcUaSlCfpxx!_N(ZI$JV&aDcl3Mp-P@|oxJJFibCL~E{J=Jlyc
zdhC-CD<glSjWae!CS$Hqn6-M#`o(3_*RHYLGDE9>Mxq~Id}sOhDPLHV|6e+OEa}u0
zHHnD(`}MB3ze||mzcBpT<K=#HlO7ysoMTn`YH5BnI1%wJxs*{@Qdp^>k@n|I;O~~V
zE9Yh0j_T#Q^!!CoiD1l*iizFd|CTp$#J*zAU2VkbdV2oW@2_m7gns-!)5?Bp$B9;M
z@lHQ8yXkhj^&2|GPstRwJZ&?~iTlB*x!S!z=hZ#SJtz0{$$`r2S(6PNOuDVA3^GnC
zUWiXg`+x9Ev{C=|JJtdM0teQ_)-$RJok<Ibu1hp{{kYR)_Qvhu#eYw5S(VA2e0A0M
zV^MMP&qD8)D#dR!mzBLawdd+;$=K7)jofvv|67iKxUizz`uLT;xij`wNiNQ=-0;Ms
zYNndzad1`U^k=(Q@2-HCGbhIsMty&;KX<14oV#1Z_TOvV6ZTbaugGT3Ti@98zkFuv
zGR}CG@@&hm7NMgt4Z8z==^p#G+WE*eL!Hk2W1al>9#yp({-_U}5NNFB@T&5SD3@RB
zpF`(12cD5Op1r@;J|Rq2R#(bD?d8GX3M;J_58gRV-sikh=ohm%+c(93bI<x@g>hAx
zCPd85^Qq6}K788duc4|-+onHW(SE#dom)Oz-g<9w_Vzy4ow`qFfG0j%1pE(Fv@SI+
z6aIJGe@)!pR^?|V=g!7V-nhagc9~uLj|`4mrEO1Mvs*m9*A(RNXKseU@(Das)<yA(
z=vk!A;tacC9r$LemCya1{leXBk7WLeYwX?VRqr48eObk9eZexrIkMtGA2p&6d}986
zJ>yC4(UmKHq+99y&SK`g`|5DpPvH#Hj|`cD7hk-enw<51-um~g(4PG9vey^FlJ3nv
zyI~ve$90!`UY=G@G|NqYyRYL*c}Awy4*7KE=fWZ~UgGuU4690I9RGg!FDPKqe_-Zr
z$)B;OduA<U$g?Y55xAJ`srQaM;%BZ%UUvw+p)H~jlCVCY;IrSMz2yN5dVc-86zH$0
zS7TfneQsgS_47AvHnKjm2=KI2;{<QC6EA+lzG}Ds!!E_&r4tOrzZsb-G?@0UYMe2*
zCT^0-_nn8kzB@_WIo&U$>J_}NB=gkuFX>_Fx~tvSKTrF$o2zR3^68cK+WEf&R(Ghr
zRdtz{vVNER{IA*W{|h(2n7QxJJsI({a>Y{o*BY*iRnP6pJbt5A=daz-8LQs}UOB!!
zsZ`2*Q`*O4_LsBEUs>$eo4L%?D=|4W(>3?=3-4q6)8e{pHV92!&A)!PM#nBGPCreT
ze?M1GNuC562vM@0wMbO*N1wXh|HAcQ2RE%>d~-9uZ6u4=T~WU{?;q!LBfKW<aTAQ3
zF-iN){O^+8m-IH2hf93_v{&<)&VKhL34XChHgLY~G-6y8wC87HF58}9`()P5|9(}T
zS=s&K#tzHg*S{Y<dv>htusg>r6WNBTo11TxDyk(%cQxHR6K^+-)p?eXMZFy(<Lnt5
z%Y;I=n3ep<QmsAYezEwEO<JGK!MFPL2@Shv>Smic-wF)-x1o8({~xR>&)(#vor&zw
ztYPO4d#`?trFGTTBPDfGrP(|w^A}1iJKR|F@265#N#XhLPiHJC=@U@8f4XyNaU5r}
z++AnYN}m}4sWp*OC00CAxyv>wRn@(E;eP8%Kvjr`^Yk^x4EKhdYin?G`Tn!@VBEAK
zWwpJ17L(b;k5_+GOrN4sIq_58%Q@<z=i4S84*d4Ntt|EGi{7kB%H<#S?>#!x=*aIt
zX5qQ#8jfB3xbTw=+g!fI#{}0td-&GYcct27$Ns>@uFMi&*bYocK4rM~)FsPJQc)a=
zpNua1rtR3CvT>EIb{XHpH{rQ6B_^G@A<F*ZQ<Td5t1lf_R^HB(+dkLwck;%)Tf1cY
z4&VG&>fLu{_8IZ+M-i17hF7<0%EZ|;PQA|cxaKLlpXHChpy<~AV|#lSH8jPC`z2qW
zwN>V*&wR<-FA_IT)_0l|=+@`{`=a`fr8&<Q$C`bq`OBmIB{otxKz*^w$tN{2lT_||
z>^OC4t^0=pYaiEB&VP8z)>(QyR!o+Ate;&_a%NXoav0;KNlLm)XT6KMUtegg%DN_g
zwdc>ANBv%l&;B^-vH0qN<gorTufglQ;+ARo_3vzsu+f?4<mz>y^ozOA9plMdPv;!4
z>wMH}zaV`o$18KIBbV+PYfApLy1lD6`RB!FU+NDti@kN_ossNkRPc<~Y|fk`uMP79
z_Bz_Lt(os($t%Msa&*?_4__ku^rf8UhqhOgKfK*jIM@BwmCP>%zR8)rts%3%a?47U
zDJR~_D{+`KMdhUZo4XM^>g=S=|D3vXv|8g`RR6d6^{$r3#YLQ#giHplI}qq!zoML5
zYL?Z;r%!(GS-X?*S@#<2BH8Oo`-SfB&|G!ensG<T7Y6g-t1N7O%Y7=VO+Nm&%Q`zn
zhCS20v&%I2;!=Zq8U<PlSb3J*F}=Jaur#Eh*LKEZhMpx%YKs{)8m?#5FWY;6N_4ZD
z(Z^;!i=>3_{jXmdrZm{h)1R;L;ujmUUA40nXd$Olhs^P1O11aao?0M!+CycY{{9z-
z!#_70if&l?e9d-GyHtmTc^=mF|9iUg0;lIsn3Q?3YgdZ$v#*=dGaeW6yr155dBe;d
z;(^Y;s;-D6m!*2^1a63Wem*jMlaz9Mo#$uO=Qrgp9V+6q+OcBJ{2GOt$J_6GdFg*x
z$Y0u;M<(vCt$Whh41<IYX5WWK71ur=JihhG<oD<2ndaQsFrUj={nFA^ljkgOpK$ep
z6URjU9l!ZiN?*rJ3;(n{`?}tq%7^J?p;<nqYnhfSr5Y};@qIAM-Y`L-=Jo#>l?Gz&
z+NYl;ZtZ3A37oU<!Qn4gnKN!a72YR#NoB9wJSlC3qZyp7Vb{KWKkKqYMK$Sy>xPsw
z{vzwIH}fv!HjtkF`Ff$I=cN1@$~Qk8m3fx)^RcAV#em(}?r+NV9$3$n{(JAe?^>ma
zU*6wna`f)AE6jeHwJUvpS3GzM?vF?8vFQ(|*37)QPD<m{#a+|&7vHL8O86X={H*p$
z-_^&}51&4B{Vtr>QxklY|8(89DT<NhB4?`V{@3y!w%)d=^7F4NCB=^qKXu<Jx~)}K
zJZPrymY&Je`!`pfmh*WYzVTayj%w|i8CREczF%w6-&-eln0flfs<&Y^yZs&<+cPz2
zlFIxJ%YVl9vwY{w{_K1#I8LtJak+E)iJ1S#_<l2a*?5Z`4Semi_FD7I8#mhSZ=dVT
z-oP1~^M<)zeXs6bzPa!2FL%$5@e0_psAu2tf8U*VIdCX`VrKuf?uS_DuOy-Uf}b5r
z-=7ZZj{oo3^7W5Q?XpXj%*XUn4n&-`Uf2GIe|f*S@U`=kQ)-Q}F7u|<Y~f!$k2$hv
zSMjm1EFHUp?ce`BD4n2uxF=())B?*`-l{KbFJ7%Ux*(s+tgk5Qla1}|=YNkrnm>Kh
zy3qJ(Z-37|xmJszAm?JiuX8tCZ*R0W+SzTL9@hJ8?Ml^~l{pOI^(U&H+A>`GzNmZR
z6c+CvAxGyVy;@MHG0R-J=v>UCd-EMHOi;_{R9AW7J?*|uw>Eo3-No)uZ?~YMf<aH8
zfZ~dy&_488QlVC;nEmxhrL98D?sYLI<JhhF=3WVMnR{gB{cQ_#E{aY6e@`iDt=8#p
zrcs<-{->^8oA&zp`r>D2Bv*y5o~Hfnri9?3eO_;0#NHA<+g|$O>(8vVtpdNcMZneq
zNVyg+xYlr9(!jVSV{NnQ1dSZYt$WP>r@!C#c-5-^7Jd8Qr75#%yt6G@($v);{C&Y4
zX7hJbXU%M#HfvVYUzyatD|f1&%Iy>?n7!=Ty)G`TO*`*={mpC=zO=QV@%-oca<S{a
z&--k7zxe&{y8X|q_wAJb@3(jVtjhNb9^Ck7`=-S1e5Yt~M9~(3$=9zX2A^k0`kj|j
zv$y8LMm6TuTc6gunq7?D+QKg$_TWON-VNgkT#Hv<tjqA^JD)q@W#9jEbFDXS-FkMy
z_i2fs#q<9i+`He`rQNA^E7OlY^2t}daQ)@k57%mYWL^qvWMZ9Rnl08VZT{@IeEk`b
z+ch~iEBfMYoL?2X`bPC939-jPX0nwNeubMy)m?7$Jt$r2qH?=zpYz^w+4-|8*H`2{
zvlU2pOq@RX&d%-&6-V3-`K?=CI4`0s#w1{4Lf@3<79tDFQhApv%N3<PXY;Cm^0S2Z
z71zJHhO)Ltd~2UszB4p5EO~jUHFR~@#)N}SJU7{1MuJWOIREDJ#Vx-TW_hU^g*-nu
zcV*aGsodM!Qs3X(duE0qv$(kU>pM4g80%PHoA&hb?WEP$7k`d!(zBZ&|Ll*`ZmqWk
zOk6*<Ets}Zce1+c2fuE)9gKetpQ%27NmL_#-xXs|x5j3tgLhY09AFHXytqMXlJ1SC
zVTFb7T^})@PU5$C{*@tb--dPFU#;r?Sj6rwTUz(`myB(dh_v)<o|}AN=S@uWF`OP@
zpRMCKf1UT-5`{}qA0HjHEO{Z&$|ZW}_;Juun5d^^aS`_N+Zo?hn)}OfiR<~yvss#X
zdA0sA=f^jy4K<&7@-2ON>3yu3zNP=+S*}6S7w@0n)8FS^^{+RFD|q>~X+Nd?mK{4O
zac;fuqP(9?BAR!39#6E5>Gj;X?c=F?rWsLYODE_(-5L=6oJ&`%yY>Iy_w}GPkn8vV
zn>EuoJuM=@XXPFNr<7u@)}{|13N&4$0xU%jn7-6Gt~Y(c%ySME6&75sx9-b%tiEbg
z`YPnr)z$6^2?~3wz9#+n@K9swwTtW}|1UU2`Q7R)Pq$ZEwf#UW>x@MfJO1l1C`Z@#
z9bd;8QWze*&{JV)-p!BiCg0XrdT!7Bi%I7%Utj(EWsFr8--aI^M$`NH5_g>z|MLIA
z$($<Lv&VIxyInV9>D$>B|Hv@OF5Y9qa&4_P!_VJ1tU~^`DAvl}C|V~Nt#<vvn`!SQ
z$_*vI_C!|H8BQp&T&vD^cYpo-dA8M)rc61qG5NSgzygP3^=?r*VmnVx5KxM~y`xas
z(a{k!Z7XM6mGU}j2E$Z4xxKgMP5Eh{mX!49;!W$1Z~n~q;ofg>)B5q3H_gi`l6tc8
za(_(W%U&h0ZC7koe}LJV#V3!<FieivS)^+J|Bvxht<b8~jV1;L0-JB1`SbJh#}6M)
zl&tm353<dTFaK6^__}%4r?=e3@2k(g{m3u8DkyJz@)gU(|4!`-HA~h0O`iGhTGsb>
zcfY*5JA1(b1-E{=UawSH`<*EgQ5+NBva+xUh>0EBQ~CMDrKQ}3g@rfQv3>8)Pf6JM
ziN$}GOXT@gKXk1;@2oXA#>18NY)W87*qdYPvI_-j`#N=_KK)&Fl%+cK$@0VB87nHX
zcy+aU6VG1C-14_bz02n2c3aT0>^5HMqF-M!L96bzONlUkdUEpcySuwrhOd{~w{Kt0
z?QOb~Lw&xU;h%VA=6Ch}q7UD>@9Zp`xAye3#W@eY_B#qXJpWP@&~jzwk1!b#UN&ZB
z_r4|h_xI^UZ|l+D|L0OxladE${>x_Slqp9RI5ywdU9RsvO=n@u&Z3*?&wVGJ$&Q^b
zDY`GIdj6t2-Q`c7{O0ZamneJpzNz8zG;Oi4zfHEza)FyCw_R4>RZyHNZ@1&Zti{g7
z7f<cBo!|D?PgI>XUHtp9!^dvj+?4)tdVJl&^7r??yuUyH!Gj0ejadGcJ2^YQys(h@
z>@3sdJ39(9w%$4ue&omPoVcBRkHrrkTxwA|+x+Numus<NpMHIP4O$OgUS2My6Vb4G
z_3Ep+i(^2Wu2<&d<jgQm=L5Mq^YXGDIonxzcXy@s&kets*x%&1wx+PW{D|#Ko8U8Q
zT=Dit_hsm&?)y7S^ZgX1^qEX8d3MKtMJ7vrt>wsiALVjq$Jx}k3SO2=_4rqut;;>0
zV`iLp$3jd$?o5}c_KL{OX>B>}>(}e6`_EI+(CE0eHG6ZxL#HlL?P)u^X7;AtTOYS~
zR^s6{K^d7d4-Pi3EwctqbRFTgDtps0eY*JCsI5vSCQ~vmFH6jy|MmBYdC?aSWw5v!
z&NBPo;N`C-XS3^cud#IIr9}1aZ@#I?>oR8PGf$1;ezy9^j02vmDVzTtss8@%;@W6)
z6Di*Pf4@n;d-qOqm#v<j-kRv`eKU>IAH9En9<rQ$itz0&6%~~a7v1FxKRj?Odw0k4
zXqV{Ps_Lh+KqqIk1}*g}{PH5O#7g$+>hS0N_Ww>is?KSy<zt-~xbpRLt3A$@>n`P9
zu*o|v-MM?`e8<Dv3qF4^3Q=7ZptHQGLwfr`OUccb<hl!WclX_8RCpC=A?7V(xNOaR
zzi)4Ex6hw1zcu@M+VgXBCFMlsU)`P`|Lxt~(^E8qIk>n^O}qXmYVrO0|FZG%@eF=*
zEE2yug11Qh;f;=tuKV>;-K|H$aj{!(#HJL_X?L%0j()cBhlf%3fAiMZ`aA44FE%{!
z?vk13c6gQOifcLCw*SRlI*ob_=Fac&|0(z8`txHY8Ag}?YaDF~eE&H&fcccEzJI@D
zdVoc&(vv5Db)xk*-#qi<<KveX7qfSZ>!*EraWSm;_N?@&y3x}fAMbzs;)Tc7tgAoX
zZomKJ$T^42X=fKjZcYOgL#^E6j~+ajpt&ttZ3l<qk=HFNH>ZQvP{zf@`OLL4J$x<0
zw_MHk<HxEiets|J*@^TAruXw*IR5(LQa^jsmybOUsg;^#UE6y)$;E}`?(XvAIcA%a
zj&c<}KQ~u%TlLjdp*#`>4GW#yjS3zdD4Aa~d9H2MmyWu>zXavv&UJ}u8{I8katV~s
z*Dv&$X(VG^#?v5WnpN=dQ0tqGkF{jKoo6{ZrTxwPfBWCf{5@qw$+sJ?Ln5s-+FDv3
zynfx?-_O6h{QWr-N5KWYv&}%ehUVA*n<=InB_JuOsa|Hq!OMH}`0@5fN4twZJUDp9
z{GkVEihqw?b#*l;)lX1#UJ<|FPCIPPgheUBV%@DLC#zrHS)2}vs$;#<>7m~bR>!t-
zi9R~e$Q-gNgi~H#{@dHz>g?Zj4QKlF$lK5Jn`>oQ{SCa--$crH*&T7oHZiA;`iEa$
zUfz;*l`B3ze&5e$vw32-hwh2xm$O;m-Y=)3rnV{RDA%WFXJ@N^%l`iEZpphlGY=ea
zaO;!ljM6pt@YDt!GZei&??s8#G@ZyvoZ(k1=Y7%PQarLhU|o!5pS-<aQj*fvtgDYc
zJv|*XMd!tXgUvsF{R#pd#Ps&|^wcJ$hW-2P)&1u^`SSAemDS<>v(58QG5Rw{Z_7D&
z{(S$jUg^!**Vp;Ti-0q4{euS&4t#og`sMZY^8fz*TNAn2t!t8NgpSz1@B9B>T<FZ6
zc7ER3Bb~y=Pd9wFFDx!jetm82mdwj-Ql?oO>i$+){`fyBZhiWT3kz%hd_10WV?*P4
zyWeND(og)CHsaB35y*V-<mBWnSy#JmZOsO4vnsKgE228N>dOnky;WZyeSCa;P0Y?o
ze|~;W@BV97_s1gl_O_*IXJ`4$v$6br=7&GCxL(Wx@9BC|bRsA1EPnpz&6_z(zxh{z
zmVWSttO#fnR`>hx<74uxD=QCQT^+tuW9Eg4%I+V&etmkk{QlGH@%3lT90ftk|L)iQ
z?p?BENluaSuPqxhE-JmczW#gzBXh*ws?urPT@0%(E_Uyq=B(Qy@PDq~^2NTh%_Mr=
zc9p(%dw8f-Gj!Dy5w*!P>}sv#YrhC?PCE-4+l@@)_~?7P;Pf=zjoY^`_nxk2VPyr1
zfJ~Y70>Z-1@9*set?G~7p0_mda9d<qe5ynghvL1a$H)6Y<I_?m83)4SYg@xsPhAwT
z@_4`e<=y4^N=izgJvpr0VpA3|i0j8K$-TWzL@T62(l~8`vOAyVDdXqm*Vo0ajNfk;
zyv*n1r*h{BpyqM~_dK4Lpa#dT($__gkMU}DO|5(R``53aEg6AMPEINsc8gs)g=Fk%
zW-zgGJ$U^1@IvSIO|`x{V%)*Yd={pjo>ues>-95pERCb@n0dE>4rI~YxN+mc+}qn!
zR8#~+L|iT|a@CC7v_#~X*PjoE`D=bWY_IwM_dAcIk&5@ZvVZ3l9hWW&T<j*I9oC~4
zyKBOfDJ}i|{@Y@-43w0b4jgd!_v0~t*}FTP+qai*yIBs-93=%$P6*!GlIgrT?W}}(
zUd*!ClZx6MeSOQ~_t)LoRoZ=Zb$IZOf<(<z*&O`*%bnZ#7|zT#*Z00-|Dp8ly}h&V
z?XAANx4QgTkL2XHbrB~Fwg@;aY1Rr|_2G8@{=&z{ctLTyHhTLK4M|Ya`|<17B=h{Z
zLx&EjXzXjWsr<Ad{k+_tzkfe|{hE4vTW)0Ce558r>@JhbdY?5wiw2B1#l^+V^6q%N
zzP5H{;9|Du?RirdEm>??{OrR)c6o!sM=tyR|Er!^@+IG?Iq2kyz{L@}%XFRF`BHy;
zc&MVTo_?}+Z{1(3udl8?esr{Zj!osHTU)c!Z$H*M3>so!<dd`UxV<em^X4Yi<YPS(
zODpd$x^W}o*Z245r)q~s?5#3A+9f)9(UQg$D>QbMzdyG%`+COBO{vpvZU6D3;@R2R
z?ga$~&FuW=z8#MOm1Zdu+WF-#tqfLQv}jSpmW;q@QRO$H=GfKF>XWqw)tOr|FQ=r=
z-JWo9k?S1GVzt=11WPNcplvxfxBrXtS~_W4?(LwBNv@U$gh2T!IZ{VV&3D$3D=UL%
znr4fAeSLlUq9qf5etvG8c1B{E|9rpS-`*}&n0e<wBeQ1YCKb?GTuDoeCBF+eb@co>
zx-P&V=?KT4KYudr@3Vb-YwP3B=k2H8-tmY1<HwJnEj#xAet<U2c}>w!bUn%BqZYg^
zCiJ+J$JJJWLtH<8{w#cSgtPAd-*PF_tRr`KmoL@$f9U4s^pBrEA3k`n@o+o8cfc8^
zGijS!z}t`~?%la_=f>LKWm0B29rNeQ?=F8oZPA`a*KV<kI|`GVnww`@m+NiYwk=46
zvs;N5oKPYJTb&M`n5g{m^JirPg9+#6S_{j_=)_N|S5{PPJlM?s`F8&PkFVG72kl(Z
z>}qkH*tX=A6Gvie*40%<JB8IjyUcuNn?3#a_jk~gaEtnXHl|ru9F_(Zet8kd*%fJ{
zb871zP#`+lD2K0$d2?%P_bk)wN3UO>{`dFyQjPNw`)Vvf^J6zQB<jTM5U{qk*7Oon
z6OJ+J;{omKHT}ORTuah8jmI?W%7WnKekJek%@tI3OL?t-7P`A<p>z9<-R1ea?v(qd
z*hscb0}XXQ%K!2Eck!Peg}1inpa1=S|M?k)$(x=QmG;ZqKU>^yx2WdlCmH)Xnd<85
zOc~f|cMNiJa&PYKoo$$W?85f^c%v*|*2yPbLPA8=ty}l<-#x9VU5(7_msW@CX9@_3
zxmryWjcE~Z+B5O@_xHj3YAR=zC||lddFz$iTQZf8dZY_oY!z_YBiQTKd@*B+Q%8xa
za13aSwB>}#m8`8Tt`py-NPtsmhmVLDFI%&S6Ks3;9-p>_t&pfxJQ6=CF=Jw2VqjPS
z_+U3B;Q$R0jj3KfYLi8rmt;pwnFo$?MI+9au_7;h)0{!8JUAwHW|&CLe4=z|>KDj%
z;EA0c=RrsPfpT!mkJ}qGY`c~A9$w@FHFWZX+s`|`s9lFCK4GxMBj1Pzyf@rMjl0{3
zM;E%USTW8M&2g|z=O4Ex2DUA9t!j6j$qUt}c=&?S-jg{Q;5ic`!IzODFMZn<f@;^6
z6Df&-Rbp^oYM(UNGGPO_fNA;R4Yn>#o~!$4(nKQ;#g-rH@pV5{t;^mxEWTLq@DOWE
zOw5Lj8y_BQX4i?|CnGN}uQ|!%vzfT0WT)5Cq??;kSy)&eJbTs_wDQU0e*0rLZ$@^h
zfIYr-Md)g=-{0TYKfT{4W9jtt)YPROO1>*Yw9ce`*8X4i^;IaBxSo!y7yG68+ARW)
zwz9CZKYsDz$EC;X)~yo~IU4=(<?{K9u4cto{kdCyf9e8=bGLr{{@s0TnEm$|UP~9n
z?k)?G`50F6>dMQd(uKEg-n_WnUtY5hJm4Jmy>(&8rLEcFCQ@fF9|!FQzp-(#i2EK<
zRqtsk<?rsCw@ovenQ?#LUJ-Xt;l+`CWpn!Z=T+jRR<fb1!=@hU*uE?p)b`o6{Qvv6
zZ+lm-UM=DVQ5dqTB=ghr^ZZMA9JlA*4(n37x9Y%w1IxFcDJ&@gb@_t8o0SyTME?Kx
zcjM;GhtHk!Yg=$MI(&)8#O3w@J#Na3jErzK2?+|EoSYIoY<u?ZU8?YCEjJezlY)W*
zLv(bsriWFqH)o5$AuClC6@&D1GOJ_4{8wL{WL=)O@9($h)obR0s<_>r6-7lxo72y`
zt-qeWv$ngllTpFFPX@FDHZt#j(UTL70RaMBt%tO>`{$j~Z4n5|`ugfB%eHLy<xW4>
z=+BsOR)cY?sNVi>?j<^$?{^fwI@FqTH4fC1X=dYn5x2x)+MWF$j=0$5<g76Z_$&T-
z{k<oLTDhla1cD3%wSW$JMe2yX5<78R-Id$R@ZN#W;}?BAjnYql3$m?`%haEFVc!dj
zH9Oz?tpklvDH3}EAg=QRBj>GopwuET^W*e={sKFrrm*f6yplCV%fS7<^1JK3N8Z^_
zU-o{fhG0YX^tKyTt-^K%GWolLwjN4X*jbxj{QdRUq6bD@yY1h}@9lSEo$+Nx?}95Y
zgZ`yeomiZIY?8#!Z{Y$Lw@LP`Ud}GISg_T~v}r|%hQY*=uWYlLR4?3WElz)6;q%|@
zhKX!b^;)KgFK!}M9r3^I7xCZooxJT&`r<P6=TkCo+>bxIcKx-~zvrbbn_C2wJ{z{5
zRZe;yoAc>ubN2p%{Bu#0G&-`spFeqbY1FKJ)1O{a?8#D}QkM14cTPc81Z(U~s~fkp
zpBzY?a;J5}$+Ze;Z)+Bvo4N7ibpF#{_uh&BsJ%nfZMt^tBZp6~<U9O}9H*b3`-8W5
zhkd@8VDYp`Ez-A+aH~wdbnYMb<7b*SKXe&q*ZeeS`SZm@W0`C8HM94Pv30By8Xi8=
z{W6h%ew+JpxwV^fR)FU!XHI$fq|&G1<-JNDap#U%yJUqI=*E0GudXQ*FkPed(!bXF
zoOll3n?d@uoEi*wxc>gE`R%kKR#al~<azarq7|>Ga@qdRe=F<ouup6M<N5r%wjbHK
zK#Fyp#r98if9F<TEz`bpaf)uM%enfGsq=%K|2&&{M9!x3pQ8*z+wJ|yHrv{M&(>R{
z7yD^_|LW*dA48^Ue2IB0%IdX9bKdK-x2DF*sO=RrNZeJ+c=_mO34J-Ws?Tgmb^m*x
zh~J)hh_Q6%_ki7&`)>BHe4(PFGD)K9-Ok+VyW4v9I=`|LbDDaKO*GE2q9#CgYp-&|
zqmIA#3zVL{V?VIY-@x*(O<?N9Z_41-P-bQT!?YhQQ}jP)Ee&8i_T$Ht|Ld=^_ivlJ
z;YDah%#|ZLl7|<^hHrdnH7&;U)5IEc&e=73N8Zoy+p@@P=d0<5WqvaB&eoom9nJB7
zF^8P*r-l3Hwd5*R>uBaTAD(b>hsegGi;7)ycOBZhGrjQTEe2`z*sOeC9l^g<lLhh*
zuFJOVVGkD!U&HHXI%`2(g+b~I|3JCxnuiNZ0w<Ofu(t*A7Hrskw)tiJq!+gyK7G3{
zes&G>o(uE4kNkUFV<U63`9^eFCewQ9dEd@Hdbs%@Z@>g2p4=GwM)vx`nBs34P3Qbt
z4G*^5@@rLnC9<8(-{t!E=sWj()gDf?F57kZj)dX!t+%f>KY700`EH4b+`a2NOh3u-
z@y_dvetK(izK5_*`TIqurt{~?KROqF{G{z-dnvikA9knSVEcOb9^WIgde0{lzn`!-
zV|d&c>2i-bps|-}!-Cw;KeM+!-F?_C$@mL<ky!CT0nz4TM>)AWU3~uwh3>wk5jgSl
z<Bu8sZON}g<&K`!y>shj!|LtUTn8S{*7q<FIcC_;cX+Ap9l7l+$IqvI_Unvz_f5Xd
z&TjI*8xw*(I24Waxf8@cSXS<jc4$1%C7~z(=pO%T$3TJYd8UT4|7TWzHcN8o4qnZt
zahm;M;BJe+_^%K03hWPkZ%e+vz<$^L@??X?sM@*K_t!pnnCbq>MDy(9!rC3*k1EK;
zcrxh*?Bic2S|qsl0lU(+xz35<Q|5>s_N-tLORD^Fm*3{j#;HpxxHVQk2s(9WapddD
zEuYIpUM+f(5u{Rjxc>RU3ol>zF3^42+55`s;a)qncWX{-Z@;}SE%nOB=bdvtSx!uU
zw{XM6<oU}^eXl!OxcA^<;qMy-ysxyJyAx9@)#@GjhD+qoNo)RN#<M$QY-g%$wSILy
zH+<o7Mq#d;LP^u<e~iT^?Eku3W74Gg_vKtYi+DK?6#a33$5Mah?D^{scKJo+TOT`m
zSR}aAzhMyk?93?qYx47l*K~b2uUAgEb^CkB!EfvDPP?D^-ucTa<{6BKC$h%8{v~?y
zMpE#RX~(Vg6BK8jP;9O+D3;q;-m77dz&ZQq;msc-dc=3w=J4>VtvSFEA?I#m>n^<g
z%$Y5a5v3GH&*M%8D@r3eo_<{OQsNP#xqeB*sl`lsrQQ=uzGz(h&A(VDih=uNM3DQV
zFVTk$79ZNSLd<x=j(p~m$LrKC&f(WF@7O=(QgyoLT+RiW;Svn9r5G(ljRmVeX>3f-
zPr4NCAn$i*Bl8ix+YOQd`~TZTh4K~E{9@gi_00d}$(48CoL}hkS@4~nRnw)TqNiv2
zs(ikFdv3uWovu0eL}vY$oi(|;bEnGQu1wG1btkm@UdQs3KRzJg_(<1ynX<S+GSg|_
zNBTz=e3kTQ>|9%ElWDUjZFkq*Qoevh#fk0*9q+ulJ@vJ{?7=UOa;7n*ZVYt%^!2vp
z|N74io>c+QPc;5}{&z;9YWnL%D~fh9YRS9~cfY$z@RWA7;hqmcue?(4FVMZ;x*|nk
z=8xF+h8I(EjT?ey&q^-VIv~^-`2YDAiPosE=_|VK`dztMVKP;EVd$Rr<b^4V!sAa~
zdFu@704q)2Y;(i=h>J1nF}4R*AGIenyG_Vh&FXVR=SSBJ^}}1Au1bzJQ(YA~X<p2|
z{EDiD`<1qet2|9q)+k!EF+8DXyThwHAxqtOr$yV$FzQq<^o?F$b*kanX7L=&JB2If
zPv0Ty-LX5F%~_`Ef8P#~)jh?LqW$NW?3t&wKKoRC?XBe9PZT=qH{P9feD#jaCXCm^
zN(*29u*_<z-0*sp%%3f<EPr)BURCvR$Fi=u-$bYJ-7Wm8-F)2qa_1eHuT^hf-Bg=o
z<&tyLyw&=5+>hj>Q~LvV?_v&bVCms(h~gGp-Ldbk)O4xZ6Ro@-#g#UG=26*x(dEnZ
z{d@kn<=>b#^?dIA`#JwtpF4Wx|FN}q&RQok7yREPf9H;GP+GoxQ5T1zi(2aHzn`7|
z94mU4XVLByE6|^B=TiAv%q`@JjH%phroI(fyej_>9Gh|O9>W%kee<nuE~yZlq9RoP
zNF+^pMNF`o&Wy>E112yp4>Jv_5_?nHz&q_kyM~DC$<LYYQ@F}y+We01UB#ZZ_~lN0
z?^b{DH&dopZph*8{JnjmL|Mb}&E-i>t--6$Y5Hs3m>Kgg>X3bW3uE{l`?s9+8}Htd
za<q6ME_83TZ6Rku^ut)ofBz?GS}t`zlT`61eI8f+hgXlY(wWtgPZfq{1Ts8Z@#x8{
z8?%?l+du!+QmawmD*3VME!V#{H;wJ9ESn<dZCQMRSw*$s+-3Exz2XOtv3?Q%&!CWY
zom0f`=LI_+#iU1JK3eWqE~d|mJD%mm#u^oJuFE|CbZ1+7Q@)az!$QX5O?FFjZ|_?C
zQK$7L2Uqp;DaV6$-<y)BdQq&rN&0kUi6vw2Y~hc$%Tj)sEtaya`jTN;+9Gi1*BsmG
zX_NQAS@h7_GVWh;Cqq&F9hqLY&Rr3qQ=VlX$&YE?*kaGu+Vwc(^2`<S?~}qesq~f3
zdKhBJ9w^*Csp1_==PjchS8}^*Q}c_iX!zWjm2YreqNO-4J>=&-*OeVAiLcXimS#T;
zxj9{iaqWh)3t5`Ej_$2hS;3tgzedK=E~91cJDG>CCl%&@(ev6WY}UH<i2GFa`wyZt
zu11~N?sVb))#gKt$L}@XcwXlAGDzh1pFE50tGl)S2=HHD@AmpWm-P2T7E`Kxo;+lc
z@wVT8*Hdq6LH2#Q+>co|o`ij<cxN(6e($em>gAKa$QVVvxVb%kTb6$Kx|oLtmi4mi
z*!AaWu}Iw9lb07tY4r+~Ci-7F*Y9alD}DNEw1!ql<-Z+z{p#VrwN`BLTQTSDM(z*p
z&l8%rt&p1RT^W-V?hw98LhE#;+}VG9E!Q)`)6SpU5qh#cXr*KLQjuR5;tiMfNoUXI
zKe#n3aY~f>m2KgjG3BDat#}JozU{4@yh~&K+XwH?TzOEc%ym2MYR7EziA#P@5BO<%
zcuT$==k!Gvwn_H6FE(0X;+3<f#z1z4dr;r0^;f68uQ3xp-v2B4{i~LBE^$7UdkVGY
zar+)@Qqkc)x~(o%Cfmeiwy}K~>)k+0afih_z3(`e&+(pmU}Ih6m;HGvKPR?6InG+J
zE=i&2+P3;VzUMFPp2)(+`ta4Otf{%)i5nwyK26kV5xACqbycX&k0;5?#Pol>k3Vux
zHuT<{2)%jFcdq^;@@_@<^CN#AJ=U-D+#%|8@`n4;7T8(<XCHo@gwE=Bg;TjD^yBKS
zt^coj>vuQp^3l)rzn@k=Dl`e6bVlai-;!-v`qpJ{5^ByXJe#KV>AmwoX_@QccHd2J
zU!5H=O`}!m71J{DJgIdB#YcoBXWxpGQLuX_zx})Y_SD}_E8fT@RejobJ}<q|so?Xk
z%YU~?TJ+9&R@!p@#{Yi(+b@npZfk1`+?3+EIw!j5>>>fDl+=%cGxXf{{Mn((^mw&g
z{m0CS*W-6iYE%7Wb?ijRy?58|q<wt9_he!1)8hMw42|M#{)@l=@00MmeZh>hT+4q}
z%l6N>nzL`FkXdoh{Pto$QLfz)?@DjZSH0B=o+NYPnCLc1@9u$=o~IxGaL%ktIxP&I
zQ*h##7?xOgFd|R?MAUB;ThOY;76I^rBo0OJk#?X{(ZS;t0#2X<<-muggS(puX(P#<
zpc%x7jU7i^@>JEl6z?5adqjoj*qfW;pB!#^FB6%XaQC<mNK?ehpru}griz{KrsqZ7
z-6q-N_Jxg^Z`-!8?YSE{IZIw$P`tf$`XbkEp-hwL)asw>ebj_Gt8Dwq$|67AxM^+u
z_}#m_t+$tNh|m%H=^^IC@lkDC{{3@TRtDePSG!xpc~7aj-<$>M=jX}T*Tr;AWPAde
z>=JN_Fx`B!$7$gN$SyX;mWtAd5G~bB;FTv}Pk;vnK%NHo&q3Z4f*ckP8|(lD1Y8k#
zZ9hm6G?ZYsE`St40}6Wa0{E19khj6-%5y@(7J6bI=;Zk+pz$eWMO!yS=)mRo%sQDe
z2|5-c@aQUZl!#cPpyNj1ZY{)Hu-hU)M=wJC-r}kV@s^O7>#+cD&Xyn9Cr_UI^2&Mw
zGy9=UPygM%ecSN=amk9xO3!4NgDb*(e0X|inLSVIk^L!k&Shap%6_Rkw%&7h^+xQh
z5$u&Ro#Zz0>|CqTF3^nx>FMq{IXV+3P6W-fw6w5*1`2<Ce7w{Ha%A47_75KlvaaX0
ze3Z@i+CH~MX?C55W!03!|K=)f;eB&r%6fGHkt?xM?q<_w%<zb~@p<o?@bz)54hsW3
z8e3Y9>Ue4CAK^YX*V=vQsjC5Ed7c}W#i!q1@WZ9ADOBTrt@dHN-;>U}_fKV;)LU6y
z{rKIxf1mof+}-VdJ`vWO03K%QSfj6^(vnrTwP3ZJPoq@xQiYXo?sf$(yzZB#8{xlw
zLEH=<HBB$Gt2>L+O{C5i_s{fElQPTEI4TH^rHNbD#_pbW>}*uPnx8qJLUPnuD;uvV
zia*_zpZI*?YLl!_jGUaDK~r*8WM5x*zSXtW$?@~Ev+-P@gg9k`hRZag++XD-?@c?5
z=3e{g_t7e7!u|!<EI;Hu(9(0Wd=$Kwk6+Ga!lD%8)KgOqo;~~a<$r4n3jq-kl~$<j
zrja^gc2eIslhy0aSKF37n6iD3$kX|DCLvi~+h({nb?eWYTDzm{?JN=1*)MNyR=1kV
z$1v5)_3SLur5e!Cmuo)gaQoZbh+T>D@4wz@%ss~xG{rn%$31b2kT>1oq49omEEHK!
zx-l^`2Wp6{2+^`Cd7%*hNGbNvBCn;BK-o#@?~!B2f|kc6^UbUGxw%gI;*F<M7X9J=
zxr!}t!NcuWzU80jleJ!=aMNX>Q>#YQ77ew@oq2b6c{(16PB)tQ#NCr^(r=xvoagWE
z?!L0_c1OzeQ&aM^w;%ht{?2I=$JY<MUVocA<J()8U8S$JTs_rShpj!eAud*3U7g|D
zn#jYmOtUZj{hOPcD``=nU|D>MfA1TIwuQ#i4kc}jSi+;Udq=@TrmcUZAH-}kX3gfG
z+_Yrjj*jj>8<&VS|Nj1<`@FlC<Gn>KEiAvjy*>T#aC_$Ub+K)C1a9Y;2}?_NZ{A$o
zcICmTsoHPu@1GyKIxKK&R%oV3WI#xWh`4^7%gPX=`hPW`p}RXfjX^=t%4PfappuFX
z*Tl3#hYn>3U1+K)b(?&AbF;`3-3!O>J%95kjOEJrhsSRkFHwJ>Yf=7AW^L5gMM+1y
zeCAq-T3heViSE{o-gaSseLd(bunP;FmlmcN-rZH2e6WetzkY&Er?#P?AqzV@Xfc9j
zkkv&0qKUeoxx_YKy9FnH8>?#UbG14iGll=wsc#p!O){@F9XaBn=_M7lr^3)Q>q^3d
z1C3{98mmXMyEdr#&e~A_ziyg-{5f{{ngfrI_b;88qL_PY%fX8mC%$-*v1sw)&dyFn
zH8nPNcJ^6jxmv7V?w2-eY})fa{m+5)`TdK1+C?p%u2x|6WKWyi=q=4Q;c`)Pd|A%4
z*Qfq1Ua~~x*Z24S9v&Q^wHQ7#je@46gAR(3kvX$1_cmzuE-o%EXo}8(43kO9?tG6P
zKb~K0Q~vJGoxRoJTvPg%qz9&C%GZC=oZmnDP_^T2RgGoO*}J`*JH<A46tD7Hdde<l
ze_d?XC;iwx6%(~WR|&|=gHGF0(b(4%w)*Mo_50KQ{P<{+cW1`S%gen3H!*CDI?FC!
z^Wl!^&ZxCvX%D<3M3mmvP29%+=FlV4-77C-tCzhOnY3t$ajTQ#!(Yi8^zSpTU$t+J
zO=VEmr?zRj(QWJ3>w}h1{rmIN!otEM@ZT}e^ufKo)sY(#7(r_})YRO%CY>yOeeLbe
zvh_hLRdmvhJm1OV+`UFkPp?mnQFrIB0N0b(>=$0T;T33f<FB#w&TW+%+2Q(&^X~2n
znoz&s`s<$`4)gzfIz2vOQwrzP)2B~4<wx!JxBJ;*Tm5ZA)z>Vour(U4CztrB3G3<U
z9XWQ4DPUEI=DE4n>aCvXphyYR*jYbc>uOek_x$OX-cIuh*dny^6kEWiyGc=7_);!D
zIvO-#f8o<pqI>r4?d<7MF);~gdbEGR<(D(8O10u^KC*s&eLelznVCUTe7u$hU0miX
zJ;h74tgP(I%ggRvN{N9T_3s}Y?GE|0s!cOLxr^P$b>Wny3)ggdFU*?qdRwQXv%68=
zpYWgw;0d{V_wJRvywrNMTm12h7aFcV`J=YwoP2k8_e{%TwYRsnR^PH#HJYv$yDK3%
zx%p(uqT1izf+p}N9KN9BZBg<<pm$cC!#=C|laFs+EOMmlW%9eCvO?`Bla(jWNM3##
z*R$lzGg+$=joaIDlP@lEHOiTFY|^Ajpp*MR$IWG5TQl+AEfXPG*|RS%FR#39cqnOO
zz~(gHdzH^+udR>&f9?C~l9!iu?AmpTaoPrrO_O@1&4q4Qb_nUFJ&f8kPeeu7K<#Kj
zo~yf-*!h#ULQiklxbft+tvkxzO0A9Ft`@(q1~m73ILzfFU)Z{slOG-)o@tQCWLy1h
zO4A>9&`Ed6$NMTODi&PKxcBS#-F>yv*4DeX*GcP5K6&Kp>+3Vk^W{M2w=(M{2BxrX
zPCJ`c@j>EV?tY%<f2J-`nAveN<-wy}`*vu(|8TnR?2a8fQg%y$vUS|vD#Mx|1;4(&
zR_|4kjoDK%u}{`IXrU8p+WC3Cd-v`YdCGhE$_&Hg8P?@`o72vonk0OUPtL~U`@6fD
z_x4y$R`+*XeKqa(W4*`rN|&WR-t@4O{%rc5HFxrY8ItxjAyQu3eof4%_SXvi_VC%+
z**DYf^Rh8Fv-78&n4q{Oc6V4)l9g8AB9`CZ-@7jjdieC|(VLsomujqkd2Ma<l~tkL
z8#Wj?IXiEDGnaQ&=xVo!2#M?K<B#9IJ==S_-pS`%P3kA!l<0M1vMx>j*Y$SK^<wt2
z_af64r37)cHZ9^ko*2DMdtPXbMd_=MY4@K?o98`v{`~k<?QjlG&V!dP2TM6wZQi;y
z_1T%3JhE0(qPAvzyzFo98TjvD8?W?-PoEY!HnaWt^ZERfQ&Y9Q->>iP<~}`5cX9Uh
zbw7Um*ii8B(3`)Jx`!_)8F%*gFVA%F)^l}OX~rFp`F4p$<dMIZrFU#Am3V!9%>uu<
zR*^;W=DD|A`sHjTj8Zt7*?1>4=dprX636>wS4M5svj6kJd0Tw`Z&y!GPmojI-rl}B
z;b2qF&MA*>ZccxBXQy#oTpTE6MBkH%uM%@zCMYQA7&?DejQ^RlyJe?&2b@VNn)<sz
zXs!4M$J5_*B6pd1zny(CBjv`1M9^ZNsoLRQ0b7)odQX3LppjXEhfOPd-I<@CpD)!o
z&+-yfvPnvMMnptxi!I*mGt;Q0rDeg@td$WPmDK&_oG3F5=#{gr+A>Gz(5={gH8ZpJ
zZ4D6RT9C1rQ$ysFwYv7EQq2bMk5xJ|?ns|uTlM>A+OOJOwZF}JrOnmU)Y^EZ%>vd$
z7+PKkb6b2-KtQ0uZ~5Zl=jSRaDnN&qrM{l^=f~rI(22MY54RgveaR@Xirtp44ANZw
z?oP?8E1XlkTDNciUi>;NJ=KUucg?+t%I+CAp45K2!lRb|^jy${90z&Xb2E?XX39P|
z-}8Q1+O2BCyxZG)^X~34OgO;M#v?f?cVC3jX5;j8po5jOuB<rt{eJ!Yw|TEcgoT~Y
z&$G2Ge<#B&uD2xR<fKa)aRr~B`R*!tdC2<xp2LS)xeJSnCvVVjal5rCmHRF8?w~i#
z@&(#=74F{KK5KcOnCldY#^m3^k<uUk+^=2rA|ouI(}^>kUv1-$XWD+sy^|K~irVu@
z>*hA2#@_Qfatk+v8t&b!TVCgzu-4V}c#%}m>oYYwRXT5HR(D<AIkB&38`l@Fj$Jd)
zY<Krz*z#@r!XQ7^BQt-LMH}&(a7A6e5a^lu=<?jQ=W)pe)z%@$g?@gUWO=u*(NJ?%
z@oDCdI#18dn(A#FdcN&sijsxJjHTYwK}W5w3SF(HUaTf0E`Iz#BQwaa+1J)Abm<hj
znY5o@-p=Rux3{2mox<vVXTI(2tpEKp#pKh|>G7L#ZW@W{$IVGyzq!uo*r%4Ovy7j+
z-ztA~aCf8jWm63%8Jl(K&y_!%Sh;B8{BKJyFLQO}m9q?0iS?^14PY^tvGb2aPN@EO
zwa6Z^L-MnB$mKqJ$1D6@XMy)+J|5P!t5y~sJ@iO`S;}1WXVHOgR~B$=c-A7=KgWx&
zP37~2u%jE&%&zDzQ+v+6dv!rz;5AOYyT;338wT9A2wryCs&0Sk!m71ZHg$h2g16iT
zx8~QK?G#q$;N_jVU<Rk9g~f-<{`MDFhwInW)<*8DshoB%zWeB*Bb~yW92^Ocj&y>K
zqiuR*Uu(DWYLBFGn`QB{3oC=w?S8*8?r~e(;;MKl_`?eQA5T7aPT{}RRovLKbH%SK
zbEgz6DZ6~(-OS&v(<}U)PdNpM`Nf#ne$T%nBPvq-(^zI+w^t=^Gv}QcksJ2HD*K}*
zpIBDV;cZfE{O{>^{b!2~ylrh2ykpYf=4g2QIrEA`-+q2y8+o^F*;CUeOgbCYrv1rd
zncCHM^uNth+tXPW?t7*s>qH3o)@;_Bc$%%E(DiLrs_BZyy>HGnq?S(2(p8N<|9s&(
zn|Yx%J4;_r3*CM@c6ZssmoGi%T9roZuiL95IqhWW`+IYHrOl19u4w%F^0GN@{rRHb
z761R$g4W_ZJ3E_`lXGF_<z+<=?mN7_wUv{b`|*)Z;gI!lbEVDmQqC6Wv`0k<FTUup
zeZjpqKfbDnR=sn0{a{|=?Mcq*4}bY8%duX$^5NQsq}hxC7p?`LooU}QrS|*Hh@}hi
zwWGCeM0B`zuI_Cu%@($p`D^>OxiT5Er40Drc|8d<)aB(`y|Y%M@#6A%?>C-~cRB2S
z)l_1#=nkuc7i~Lb$}S0n1U9eb`l5DW<%4(foi$YlT!ibTXH{qzW=^%3<#Xo$Rgc|Q
za;2|aYny*fweZD^#^X<~PWgN(q2ANqhHKW;kfMsmpAVnSdhqkd>I2(%bnQ-`)Omiv
z;{^d48rL$I%$@fg-`vH&yF71c%+8{)t9fVBj6qG3&(F`NA8cX`Ssxec_e|%}-tYI6
zO-!cL{r&au?c1}nOtUqjwq#7Zv0c`>?84UUaM0qP4ngIBl_5n3{<E5!n_pWOd%FC7
zt^3ncQ)e0`v#pKZelF;@hHdwuEEN@%3mZQEeq&oW`_o<X^o)QSrI*th7xU}h5WVnV
zJ!7gMFRO6xpPJ`C8=eQ>ZCmv|;zyCE(&1@Gif!UNyPuiaL_YogX(7w5bN&z7938ht
zMb&W|erT9JNpiu0Y+Kn*@yRo{S$9@ztcZy{{q^z5J0{GYZcWdRZG1RSvR_H+&K=V&
zddnVNx9!xinZ#qJHS4L_=6|y!ecYPD)e28ANcg<jmByrQntRtV(!tqrb7!yMe}OA8
zAyckISX3OF^6>bghWiXQS^Y=XFSD=y{%-56vg-HuZ2$cI%gM{D3To6Pi9LDo;>4q)
z-7haK<-WQqH2LeRt23+WdDxf_xA87^@0Zhw*)ie4g9Og5%nyspE-Y{aomwTR>~>*w
zxW16Eu&NH%#A!;(${)Y2Jvw#9oGE$xWu{(}x|SWfAS`l5{UHX8m9LjBt4=kY&aYnV
z|MSmh8}3yG_YCdk&MEq^OVGs2D_gXo^}hbhg9&MucTPFpovijN^yld?PrC!pWKDHX
zuiROyIMaD~kLE$43+^*27$hb!_BLJm8Ep4}d-}CU)7=lAJbhATU$WceUpwbNo;m$q
z)rAH**0_05R+X!ER-gLtrSyMSdL-{F73JN+YZlLaQny}JHR`PT0@vuUH4zgZ-rk&k
zUQR1?Rm=MI`qJikNABJQrJFV3>*M?nA824?_VDvNSMs~y=_%2gni|lGBzAteBj?Zi
z^W{jbirA=>n3#Cv*s(`XpB|lQoUWq7H8D&+Zt9*@3(MEKc0Mx>SheQys}2>N1#F)~
znZ+j^SiFa~?pxZln5_xFQ-pmP#Dpg6thgKc#Jl!KTFR@fUH)-8rhD%87%_{z`|tWA
zXhra$rw{*n`rKXn{CDliGy77Pt$F)-{p3TFq`1!h*%5vI%G|1D@BE(43Tu3Oxwhij
z&5dr~+-GdmUa)kV^`DwQ^Q*W;ujs8)`ZUMzaNB}-)%1%}Q(pfv-pgKdX35F>DJRTk
zoJ~8{Bk6p+PZqR}Ykuvw$Yp!?_E&s15D*eN^!E1lk6*t&y|p!aX4>uPda+&Gx0}Dc
zy*>Tvs?aBAW(M=Uh<@<m#fd$YpCxQ63_#1ZpPiX0B-Z*vzfaEAD>dJK&K|a)74JM&
z-`ZBAr&Y3Q%7<;=e7xT5t~+%gSK;Cep=F<TFZ0p*`M6QiL~GSDzm|`hi`yiGkDoo?
z=CvX+ig}`%A48U?M6kU1l7Hrg*L*~{q_%Ba+LnCkYq#;NR~OWpKF+*e`RUJ;4=a^+
zOcV<jG5NTCskf%+F?HMSZCp0<XU^IDvf@;o${mw}7fWro*?gDdIzC15h4XWtu<Lm;
z#nTr$ou6Ct_ihavkCao@Jw_cRr6x9BDbT`2(DCS@tHXA^`#txu^h}dnsmsgGKK`<J
zdV2gl%Rk1Kmz_<zwPdCYr+?Mg?iq_LHe{JKNA*36`NO^Sa{TkjMNcD(4f|enFFZ5f
zY}x|<>w8<}d8+u<&N{awa`Oz+Tq%3`eV@OiSZ;l)zRY*_u`i+R7no-xS!_+^yIfya
zSUR=BtY7lX`Rk7^eU-4Vw%(j`)9BW=T<@!^LT_$JY!0nIckKA_%UiR<kM+r(UgkUd
z#pUJwH9wa$A2{Gp_w%XvvokZDPfyb|J}WD$qQkX%Q|arlKd()f1w3B)Wc8)9tG1mz
z9qc}RlGdm2Ef1CbMaBE2cT7)7`dui@S9zu8;cO${ok_XUQM;UP>{D@i7nyPGSf8Qp
zn~Yh953S*?%DZd$>A;c&uG!Ydwr*X%?%I*-uP0^3-fvsvQ0Fguckfn{bIVz#cFE1-
zx+ddwY(?P3z0J(=*Di0Uu2pLLZX$BI;heMY^G#B(me|=lq^mud$2pxTYTwIT|Lk((
z$>!2O9#7*8$!aUzruyFaw`ld{6Ejt~)BX1f7@2teGTF4SgRSJ9R9k5H<9NsBTM0?k
zYf87t&z7mwl;1M9)VlA|p3vPTGgtlsEhyZW<Z5GUdvi~v@y_Dse&7F|doAhkdeO!|
zKOT!5Tlep6>wdRQJB~m9^W*V%i|e_*Q*G)C?bW7ov)b~!+4-ew=Ikulsa0PVd+<ri
za7|k_;i=AXz2#2}R?oZdd~T6@d--HHm2*ED<@s&|?|n6S+Nwvk`#1fucrWyIR*38J
z{bzs4iyM5M{N6(LyVLI8eJA#8y*Q=d{EX9KzrMacKFc&4)Io4=<2jgM5OKG%=+hHV
zP-$~CNl`_m<<ryCr>FZ{X~pfasQ&)0c9&K_Mn=Z6JN!E}x1SIwets_b#fJp1?a@ws
zO`)64)ZM(T^<hKf>A>^z!}9vL<}{w(xV~~%rpS}>h9erM?c>6Kguj3NY{dbg-&OzZ
zo|-DyeXYtd=d}sY*63<I9ah)7?dFQ-hi=6^+1Q(O{M!7&vNFHV-#0|)_{=b9e82zy
zyqlZTgSY41<+=NO3HRxbS(b8Bwnm72lASsysA|Vzf88VIAF>0kC4D{kUL*Ja^S4vJ
zd^CLFt2JlNo8b6l-MF>iH~o82E^<MALD{DIcUtVin&GxF5tdWTARDS)t)1_4<j9f5
zo6DV+|N4BhYBHB>RnnvbC;!c?{_^s1=f%%oYV+UyY*RZt=VtvPb<4)z?E8-If3I^a
z-|EQkR#}D}`=3j@-OD}E-<Udo-@je&LL(mTpAhOV-)vod>~hI;_2c`pJly=wcXV|v
z@}F<#;^G1t``{AMNDzH~Y)j^43G+Oe`!%0^r_G)_TQc|dw%&h#e_!5TU*FW+Y<zX8
z_NC1lm+nq~y#LYT2ZdAK?^E?=b#3_Ja@yNbP{HWhr(ceO2NE8JU3tyEH+*mY6KR&C
zQ?6G$yDxQMQpR6LL5G#s0#}`7QVM<lDDvoZP<Q_2Yx{~14Q{<s8%tk@IXO8Q*=R7m
z`1+0KZw<@Tv%e#^Jbp7-`NXl8$0g1G23O==u$X3hx$VOH(?8G5xzPGacmJappI55v
z&uUG+>$ETT551>t-%z{#k;T?TzO$#NC4YS1KY!ml;lk~QB#bp?%nWvW7`ah;_qwBn
z|9`l#ZClauygK#ey<J>(p|(Hs;!lOF=Z=16@S^*@#Q%lzqINMy)<!R^`TI~w=j7!d
z6`#D`%~A;8x@=33hU8`om9JbK5oY!E6V~;fvXSmLWH|qy=R$GCNvmDv-<1|W|F~<%
zjtxamy=Ix``#n81_2!<+;<mi=huir<ZSaqej=s6Q-M?wk)Ql@D1Q#w`_~zDD?)v)r
zLx&Hm>TrDwKh`hpep63+L%FuzQGraSg#iI;PP$AgVZE^>GkDhCyo?MDQSGoLIX5@u
z+}|h5&(Ht*MxwV-UuyQ{z3sNUkFS5Wi&Mw^`-kF#VOR8?eYq3-{>{vD?cCZ1wT}|)
zX6~4@{`A`Ae=mGhnf&STqs^&`b1b)ZE5;@A$Zei^s8=f3d11}b*ow!I*O$m!-u{ww
z-~b~_q~C{+KjuF9u%<Ph&ot}*#xpVHT~nQFW^Vt-@vA*?%bza_vF+bdUu;O|*{#0$
zQCeVI&)W7?Up(q0?uqe>F3@xNy^&F6{k*da-NkguzkQ57=dh>f-(jm;7wb3th^TzE
z?|a|XEr%VO*)|qE^#ZLoKQ3QCM^U>9G!D!qsui#$Be3lKJ>Q3iTAwl>>Z+)y*iia9
zY*qMrzolMNZ){5KZgExo_h`24+U7MmikIe{GZatzTpvH}LefTq%u6buA+awnFH2gN
zt(kha{anMr7cKGK7bAMIZ<c(}Qao*bY29r3Z^zQt>t88`?5>?@)v{($Q+>LRJAdJ6
z294~%jZ2JI*ks&VYrSl-`i_2XxufwL*L#}Z%r+A&{VVdDFK)@1?eWG-*NgAEQn&Y1
z-TOt%SNIZk3cEkqIz{gMX?JIfGS9P1JpV6FOl0RdS-3L#+ww_YZw1~gnt64<%;OKo
zd|AG(J!r3MR{sA-*t+YP-kfs}t-6&kRV$Q(m-p$JnZ{SvL{7fFJ%9P>-?Mzw+<K*4
zcbC1LVVKPJ_;|nj;)^HRn@rjCmrfE?cH2<$G6+<9g7(1uGxOQh)Ggp-^Z4Ro_mX#a
zX0r3keR#co|0Ml*yVbYdM5T1tgu9roN`@Y9)Bnw~;G3LhMxE+0Zia6w=fBH(Q?z==
zj_*nz8&fy3&-q-%VD8x|_)+uno6-kw-Z6SHpR1pGsmCoqWO3$a{uN)IM4fOiul%C&
zRn`7-_p%E~2FDfzTz~Xe&37K#{*LU2E;e`D(=Dbft^c1@B73{{?kk6~Z-1A|Pp)#B
zzGkuWdxbS0!!B7CKTA0~%hYG45v#twzL-vg!{IjG`?r4I-BsGnCu_CfY8EFqH)!p8
zky1L-)m5R&Iyyc7{{DXa_U+kIQ?)hY?pV3sS&|;uvHi&2hFOKVxsulpO<Nyz`%~c5
z|KGMQ+YtIOHlN*XO5NnkVFw?4`&JVY<M;J?;g1iAT48HEHYOdtv9ox2D4T1;i|Xs0
z=8}xnQe|=+2OjJz-N=4LN_WQh!rzHrD%wH2B#$Xq?~s~o^rj~Dy_AXW>>!84igl+S
z9GaJZ@$1LKvL5?#+*bQ_R!HTZS|Yr4ulCcOUfUf%?~u(`dH1Z2jrV?2;Br6Cc~M`B
z&!2x8$Kd_xx=&QCkZqM-&cC*miTwdf$`-u%YUpz0kEDS%3k%Z&`8z$+*?;HGtLA?c
z)V4r++SYpkO9C_`tV%Szr|T_EKR>VL*URNId@}2$SMf@lJ$U*QbP3gs9firW*p4pw
zAbe()NpIxkWgAVNJkyqXWI4@e_OUIQlOz6qx#q6FyRt@Gn4LH2tNca%)_=B(R~uD)
ztu%1m_<Vid^C@iaWa17l@w^P$&%XKD`sds1p4u+b|95Ch=H(eCxv8&`)eil?adgSc
z*`1XSP2b7?n0S8LvbVn5Vq;<=w&h61?k)rEYis2e_lgivvhLiGnew#K_~)GviS=rq
zx9YBlhOGs7JMsTF*F?i8j{1y8RVAkkRv&V8U9D7a&$1jg*m0V%lDAjNH0e+ar_T%n
z#;v!k*yG;G-#>ApU(9Il@r*jVI{EbV$G&`c@6YTX-Xo~~BQ5LH^s{R`FI)UQ;c<`c
z@Qj}e+xV{9C;nTrvF6|J|2M7AZ_rq0#&du3=F{&sHFQmk*l;8+An-_A*WScV)*o*I
zJ9WPa8D4Z*eC(e0n`3g-{l}VOLoVFV-niYvMU(sX_um_}ec$kwzI%O2bmm;k^j|j5
zZJ*cr*>2-o(=>Uv`r6O`eY+WxXP7P%znsLraq(vJ<=4)qtT}q*UF&I1-oM>PXFD0h
z|LXQtPuUV<HD{0e@__Ag)J3K8p5E@174-jm^jR`nU+(>57eBk-+)~N^%)5SLczD<S
zUuJ1nH(aZztho57h24JJrvrOkLeGf1J&4X@<DYz#^{M44<2_ZCQa{A}Di1vKa0)w{
z>_21T24?9S8=lKt@B4dl>n2}~Rf3oP+J-K4V&#*yI<m}ncEr}KtG9)VtYq)*DqWm^
zf8Uz8y|eb!{#N41|6a3V)AVPz7ih`MweG)ocY2}qRqZ!V4)OKOv!1?L@9IbW>5peV
zeWDe^@a>JWcb1^WbMN<k73t+{hQdELpS-lKxQbbBp8c`XwNquDN(BX0f~JEeug`P7
z^Y*Z~k%&%v(mf`l&{bW#FFE{ZJ9<X)yREk0@ugi+uPwI}h|T=8f3{+Xqod=WpU>xm
zb`oD(8!ap@K7HN0<datJkM=GuDmkS7L#Sx!XZ89uO;IUn(Wd2mM~xrl+-lP8-t53u
zDOF}Ylf~%8ymKDc7@}ByZNEI>VGfh?8{>@@mfTM!9=@6KG-S=1Wx;ve_jRu>UmY27
zPgHPrmEne1R;$uiU!rbjn@FvT-Q6~M@@4h%yfan0w^GE+I_Ep&@>EUHe495fULb0%
z+|F+9Q`gsoufN*l*8FMd_q#nSL^ei#X}GjRnN3vhRDZhN<Gj1O0#^p9HZ?Wf+p&gQ
zTu(qk!egOR>kPA8DLp;C_j$)l#a}*@Js`Kb_1xxdmlsd6y|r%lA&2*AIa{Mv{QNBM
z^X<Dw*}*L?uCp)hu-oar<<G?}M$<0-&YT(T(9Pv5bzSe>rKnTC^Ak&7G`ZYXul3=n
z3T*CvcJkNxjhSAluT!c7j%_zhzx4P)jMwfHitlvZp5E?oFQ2pccJ(dozq7xEI?XIt
zB<=rf^TpVx#&T|EcfT#lAJ_H%o2@o=`KRm~OkH~p7jEL@<OJ=advkkx|5WYpM^B$l
z)q5AOxk*Fu<3Ty2#&u_9B`-#}irc%_@fR%#RJ;ADtoJI<>eY*?xz(R-y=<^~>#Vx3
zp=VydInpr6NGmMh(ZZ%VOG9|pDIDLvsq18K=#{mL(&p`VPWoxb>hdnutX9k^@X^l>
zud~5Uf(|PSD=QafXmgzP<~o{wb#)Ev!VK-lodE)YT}m3J@>2t9W}lgBJ^j>F?aNEO
z#l>`^zT~wkHQaMq+4xxE#qy+gPu{xR62E_ED_7)v`AoeN7t~Il-BDa(>C4;{xi!lu
zPEJn8I&bqPAtSCH$-Bb*>x-s*);hUkZF*%zow)Pts!wkZP0rjR@NQ$#83nih-=yE3
zsx+!OeDnD&6WiR^UnczJJ}#Wbxwv-zN10u#nbrBN>ce|3czzbzy=(ou81G~AUr(rc
zba+eAjsLd))UW@&SL-x2YHixBEt#MhjH<7%3=<ErTypmD7wQ%glJDr68am~4PU??K
zEWh-x7R6{U&J-4nyyLPeb?U}SSEkx+FD9&J(UZMyT-|^1+3bwA!wakQW-2}kU{ZEE
z8=|SD{;#d^qqOi2#mN)pqs2=UTc=*woc1!s+oR@*i@Uq9fWUz*nU`<uELOi?^Z9J`
zs+%5jjM%?Me>+fAXcX;l&!!Y<XVLbp*L(7w$A^Cz^jOK(r+zrJvgG9_;oZ+m6R()Z
zS|rNrum13-ZF9Q+$KMBT>2S#%vX9gb+#Z&==<<oJdw1rtC4Uipd26pWGkaLd(+P?;
z8h?0ZFEh!V`|aUVc?P5WYiBr*9k-huIMXfm;HK%Szw?;-Z&-ZM%guULJ2C0mh3ao#
za&B$W1l`N<_0`oY>*MF2IN{-`<K<CP$5HWA%<!(^$0HHSUv_bd7-{X`XmBty-(a_4
z5qr{N-6N_I9e&GxKe_R2I-7QS-mF5+RcYJyPntWs#DyVFj_Zk)@b1S;H|r{X-1zwM
z<L2&e?%&_uE)H1e^nUv%3*njG*Oo6<FiJhjm^bg&<c@W@8ZWda{Q0_QuRNnszDSwt
z*QM#cd)5lh*(clmaO2c{rQXa+seh~==Jr1LQ!6`jj?og$bAE4c`0eBQIOk4vL;Je3
zGc3Nk7iC}CmU+qk@HNjZ&kJUjcuuse?EBP{w;=6vf3#(NmU;W1sKB7lljhAkdT;&m
z^mli5-rShXE+;29MI-Rh+8a%K^(K|}wH&clTYLPr?(`m=unD_6cO*YCD=@yqbfRN_
z_LB$6?5j))Rmw$XDEGdpXuSBpE!L_hb&83m;&Sh3cNsC2fOQQXPa>PIu<Cvk*|q!l
zk6KTSlz>IoU!M%#XW2V{@`4XeU#?3=EEBw!&3}2%gVTNAuWU-a(LdvMpU0eM{*P|P
z?5~@9cX#>X5Uu<F|27GpaN7JacgFpEfv?*|5)LlhwqXC1Z5up7#FYF_PRiryRC~JZ
zDPPBu+~_<tyK`OD^FCeqb~-m+?DSoY!;_mPZQU&^FhB8i$?v?VyW721ZR0<->uEYu
z)WHj-?r{>jK3~54Vc-lc+BqljaGPQ3DUlwx$Lu@*DY%NMs@&6bQj^TOy<1;UJ$>T7
zuOS(W&nytooPKiKo+LxdBQtKMPMPS=ocNnJ$9`4v0jt`iFM)bZnvs!<f)u>fCdM;`
z#&BKz_*TnS;qY7TXz?!-xr4;F9J#z=)AXbJ*GXSq?(ZJ^vOZr;gz<@&7kjC4zFTy-
zyy11r+SjZ%=UkjVbHCZFraY;aO-s^Evhw~{GU`q^z1^^`i6?coUByn0&Q$@)+f6Q|
zob_k)w%C0r_p!)Y{`?K@E1Y)T(Th;lk?>ixcVp4Sy-D1wL`&V?Dtz!yS@U*Z$c<ys
zr$eP|R!>cQ{@~u5J%0o?|C7_M=6g2BZ`v~7%bRoN7Hqj2t+sjI3U*I73%}M?#v9fM
zMLx{?vFzdw!{cs>r<ZSWzx-z2JY5?>`^UE(x^%oe_9)M}<HB}*)80Ryeyo`JGWBRn
zL$@2l;?9@1zg)>(vn;VeSyf4~ee=}F)n6Js#9AAwMMdv@+w|zY#EoQulk6|{PBE!l
z@pylP_R~`<Bq!cJ;TxFZuwd3}s|&|xTzD3)U_R@noOUdG=a!Q(D@3lcNcv@6HNJi^
zJNd%wqsxs~Bqv=^_cs#UEO=wJ_oF-KUbCC4Z@OFN^<+zJ$JtvFi_Ut*#7<hIR6b9I
zr-tR5`F!nXE4a@{S~pJMcAv3f6^A`*#pP}N_M)q&x~fe2D!gp-@hrz}No=b>vps(Q
z^z52c`{2c)zK?d7KmT^F*eA+(l9H6=#k;Enuk2Evc5j}{?W*ZN>-i2p{G*qaZ{KD2
zCqC@DRAbF?wGAyBlP<=WWZs+q{u|4s<tz0WY}0a=r#-RzC2w5dbmQ2?q>C+i_nLnO
ziQLw|vgqlhhi~$~Dwm%2brX2K`Og7~JEr9sxxC9<j~ekrZJ8aG=Rc{@C~$Wvcc1+l
z=V>9w|9tqPx9seU_Mel~H-|r6P<prLkI1xc(aU!Ei;F>Rk-zSycFM2kOwZoIof51X
z`5e@)*t&Wqqcm?yk=TXPsXRH44jf|Toa4Q9?e5J7-bTC*e#(*6SYwmB`}0cW$me^Q
zc6D#jf9$_Dlsz-j<m--&5l+kg9NAR8Q}cx5wAhbHPY)$@?U1n8^gul9&MBv5ue&WC
z$IO(OrF2DGH~HL#mnLy_4N(_Yue@B!wW;Z<#Eo)>sl2ma><)Ha{pWeaI^KlR^IQ${
zN@cZ*mY<IQqVc=Z=kL#D*DU?~{Qi7CZ*O5;&-32b*3htO%F0d8=HJlBj4{!dU3xhy
z*<qdM<*etKrL)`CPLAxpwxnw3+sk?)xjme{-JhE0UF?xG7Lu0!eQ?*pr6(4(9AS5x
zJt5}G%{O^6XFu$pRP=5pYnRxR<h|*2UK&iDPKKqH*I2X#FRh$f;rp(|XX$k%|J~<f
z_-)$57P#Jbl98P}dBMr@$oeXkpYp!~?rgV}z0SM5>|KQ3KO^BM6BZXmvx)!VJ^f<i
z7M+a5!p=&OS-a+{g)(<a@9vg3aCDx*hwPJQE}i_R68LCR+YX_rQgNZhnI`KKcvZCK
zi<K^7J1hNFzB?o4$oYR=UddvrDwjmVj;;55@FZDV?x^gFP#?c5mCVfBB$os$f1G!r
zxa>Gv*O$+mq=mXGu4QyvzTpT@UUX3CShI$9Q?}S1!4ne#n^&B1*~a8=w?eI_;ouT>
zlZwADjvsqx$~kLO_pH6M9yo1kVO)BJRafz&A-FWVG&`op)VOl-lo?7N*Llud`HSW5
zU2U6!rqk7Wi*uhgvYnWqEL|X6`Yr3vwOgHfSKBWw-`mJztM>S%8vB>bY5W0ee)6w5
z8`$$e&pxpFq3br;_op{5eVuW0r*hC9Hzy}>1A<l5M#SWE(Sb;o%{NaqSEoJEW7@y|
z;@++6?sTP2-J+@Jp185-;hILy<3Unx7O6rngU&WG^@SbNt*zS|dB>$`l|{k3^(<z4
z^O(Aw4(#umbZOB{N9!K_+Ih3Y1ZVEGdirnb$_0yZ(~NDmODvuJGC6CC%l*{;t4^$y
zlTwVc*ME-Q+xbNH6369L0_%>MEZot3z`pw0f{bfZZ9cK*?2eh%x!Nk|%*oEOo3X;I
z^A!&@HOHJ3-mKy5Wb<o=yN>+U$!UwNHvJ2bv#GKA_##nuTFdkY)6d%^KKSf-x^bzu
zrC_@2%6_*K&rX{&KCS#avAoKuEi5s3qpQz1{XdtY|5hjH1?Po5xE>~VNp<3pMJ-2G
zUtG6Q`Rb-q9Y3OvpFi%yan5L$Nx>24WVi3rUY&Tla7nPHq@p{|6YX~nAA_{2Y`fA=
zO;uq(w!>Xa?lj93t=Gk`OHZpVPiIwqC;9MG_PhQCVlTtbH!?-K?D%XX=+iiD>(nWw
z(<ciCulg&zBFv@l-x?JWd$lux;xo*Oq!_ZBstcApm~J6-ewwc0iPQYg0zRwlaAUu`
z>662+lG3EttyMX)(X49(WvsWgOZqwsonB>`q4-wzyV13zMRyK8og($YzGzD5h0U?N
zeWmj~c;DuKk$mhhS6?qcFG1wh=G!~MwsE>H)p>EAg?DPS$)le1*k?cb6}McU&EYyb
z=jWx}%J(Ga3Lky^vRP1MRmdC5r?0@hC@b?RdhIG=sw#24OmbeUQl}QaFJV&dyXhLd
zruC!tBgwmdo@@`yCLE1aXtvRemSkW}U{OzR?6K0`K7aAlKU}A8?6YaKPihPnQ!5Xf
ztobwAS8+l--~Y_Up7Zygvzz<!?8{6odv5Z^PlV~6R)t7*?K;8jrU%dbDCOjN@!-hm
zv&@O9Klx;Lb7@Pyo?SHK^JGm{bI)L|x{!`_t}kp$dG`kE%G+AKyg$|2?@>y2^tUYb
z-KFQ|PT);>vArSqt@q-~uO`fXZ?y8gUC}w|12@%}g|_Po)y!CI>yu&PwZM$=o6Zde
zao!iJH{W+xEp@C9GoQmfbMMoNr^WT#78qxDc9|_M^fi6IBhtHPTFN(HrK5kW0?+)r
zes)6UmRIu|Yt`p<xY`B=RvGQsrE)jfUuad`<V80pMNFAc;cD}_vtjCo<t=@!lDEIO
zC|=mQQ_6qp)QFUtg_}0z<=Xh!Wo11V=<MA1;E@vZnm6~>9DVcF<Lw%wq&%q?JI@}t
z{`E@DT<&N3eh(a+CVlXdo2ealsWI;9(>pu)cWyrA@cq%tIV@i<pRUnfkrC{3tk*;P
z=cZFP_C}tb^!(f}B?a}?aEq<0R%L{G^u65Jx&Iy457DWG&pP@zm*-?JeR}Nq(}fP}
zrY;ltld}J#=BDtkGO@dw_MYo&nRo4hn*W14|JI&(Wpe22OWlhq-j2S3eL>CLC0VMI
z3ZK6ys_NlgTDDflXa2rZ&pKaxRps|z`|x)Bp%1#Nlm7*BpXI7+bngwC^Yg9xJCi?i
z@4q@?yzY9^nlNtWW4jFg%w9jk>V^L)bKL_E92c(PnCn*9o8Z0T(`2DlDjPKvKc3=v
za$bjd^UUnbp5ME_9&kE(D(h70HlvC%rtOV#x^psaxg2fy>Q=+-vPkds%gQ^aI?nYy
zIjxlZ!cA7y%KA~p23Lh%ha-x$MhiLeT18Wip1HDFj3wQ2?<wmI`Jpo2atE*T%KWr?
zoh{?3X8G3pM8qbAd3W6;7h79TIv>hb9-k|Bac*^K#+AOBxz27o?|dltaOzt3Y0J69
ztxQVamuRxu3r{hbyG^it>gyYOYYbDg9L}oE%?t|Yy5_OR?pk}|m6t!XyZPrY+|unJ
z>>r+0bGcQieOke`vmCGf33@GGBo;F}EY0hiB+t6gSu%pBXLiU`e(U=3bE(+x>D9)c
ze>?p9n{8lS6rQ{#p4Ur)*T(F&lx6$0b&H*+p9?bi%{yUDv5MXGaFL0IVgji<te5Gw
z>?!J=@ot9D@>|?{^O#zB^4By>6&63!Ai7qvy5roTZ{BG!pH6GG^_;r-;QZ#3N2WPF
zVUM@`ZLVIxbYf<-OW({3?%U?~uD>}sT46m$8u#>`ZM7WpWRG9iyx#obpSE19JcY;H
zXU^<Ss%-SwTI$&D70gv<mvJZa!a{ZXez~*fnXMlwbGmk#yHwjVrQFka<Mqz_{H%&-
zWsBP1h75_%^BAPKrj#xS+2+%saq`KO6s3HV8_T9%VZLdbl^AigTj;D?;rf(duiYG-
zu@_9cJ?G5>XBS?jf18<@Sc^_y5}C|trCnq$SpIv`?1|rW&a1p-U&3?#y>*@f<HU1~
zT1TYa{8Xy1v~3M6OpUhTes}B0ic2ZK>&uSLmTm9WXwaBY!^hYEX8rZMN*3HZx*FUZ
z`TL*mm)m0_a-vkN%sZIN%fn-jSieQ3>ZxyXzBLo=HkK4EI@e^^!k2w+3TyC;;-}fa
zu5It#WVSSL632<n)6`QwGg+lX_&#s5%h@9~{mDyB)ez65K&MBgH_ERZsCW3NrS&S^
zE9%{}*mF<#`ew*SJ17R7`xkvQsi?znv-Y`ri?*^~@oC5q@4FUZ%q|yEa8jc{<)WPX
zf_iIrqq}BBERy>wUE8*X1;?bZriAFH-u}ToH8=LVc!&Gyy*JcX-|gANpfp$7;=w|(
z59^mNOP(m+Yq0TF+z!pACZ@g8t2=JYJaFx|7}xI=D<<W{he#dxlX7P1T5hXPO`fls
z>e}zv?w@|q*80*yXaA~iJZT5pLeI?F)jDH)#PPX{vtL}?qIuSeC*|?Q+5Ug?`gR`r
z6}kUH;e}n|*H-uMI&?~td&h#>=E#(@hI4LNwXc-c-}i>?j_v-*hM9Aw*Ylrw+Mb%e
zRQhFea<#nc@`z<mPyC7h5&Yf{+@txwb?;2MdkOsktwlSH3tnIGl={16f>`O_nKs5V
zl*$e+FBABoe(9Q*>CNc(Rkb|R`Tm~N+-bg7d`?10sc5zT{YMurm2Sy-s9EZ`f2y{I
z;z!9OUjyTWN~UJ%6&xG|i#DEXl4EqgcxapM$2GMI3Rzc7cnX%ds5~jyW|Tcu?*FW;
zDI8bUE<I8s{i$QYm!P13HB9b38-7mdd?m`d?Y45>+gU3<1~A53UU;niyZde5y!q#)
z1D}emjP*MEUiFP{^5WK2sxEyp{=2iUpY&bgZz}4=vdT1I@&@b1UO~2P&qP1|KGpay
zSN4Lt)X&1(#g{gV8tuALqOhK6N*>p*@NJ)dGYW5&UZJ^iqtWYCl5fSu{%kt+@<LE=
zVkOfpCoZdLAq^Qz<84(>_s!5;wqaA`tEM{l8J0%-?HwnV8D;#N&!aJ&d&~D5ChH8H
z&zCLtJ-<EJ`_4|$D{qVDS9Y@%mHn)!dlMml^jX2H`l(aSsqA|4i2JVPA!RPR@6D@s
zSNGhXu9N(0lSCNH*807&m+q(iJye=A%TD~~f-e2dj1Q%!ZBBh0V%EX@a^5TRlX|MQ
zcTP>rnQ7hm@c0)2vlc%0;z!S3oY)_|O-N1qaMRx27oj&idc#aR-`|^i+xom==2?%f
z44pN%clG;DFW>aivpDSH#Hro35nKOi{Q6KI8Cx#4)jQ;<O&ZI-r|Ulcp7ZBRuE7Mw
z!(9a*{Fs|g&CIa05Z6}mKX2}~A!=>dOB0Vh{vmySKUm_O)|L2~|6UTvol-0oAh~u<
zZA$Gg^H8nVbHpdOGEO`;eZ94CSl(^n#H>oq6RNj_QYP81O4zpH$Bvgb`8P)?wC|N!
z?xW<(-JRgK^E{VeUyJlkJ@(u~tKVlA6{QL7_^MI6*G|doYf{2)F3`~}U*9qJwq;*m
z_wdP+FOL1nzwU6=Fu7l<{H9m+L*;zcuH#SQ>MxafxyLZ>HDlWw>650-G~=v7*Nm-E
zC99sF@0K=7)sbvw3y4x~_-f|teMSBJslPSBSKd8Lxh9l6sW|(vLex6V(+{uycm8}w
zEP~~&ci)oy7ffC1@4p$pzWB;vY2ZGWmg8r3@umJ;d`u~9?~=w%?=EreiI#X%9BFUa
zS$}!Sr;s|{Xora<6IgGrjW*W`UF8xGa3MSQ{gWq0?En91-n`j(zFJjM(ie@`H`gpL
z)?dC?&TKC)>tz;fbK>EdP4PQp>}E`J4W7rmH;h}#cTuUIPmf%0H@8yS7Q<!BpLhS`
zI61>mdE>^78dK$;MJ&n7;N*0eouK-t@u}v*89#1ocP$DRjK9^QvXJA!0}bWnvpwF4
z{6Bf(-H$&-YfhdjED;OJ&Ht@GeYVH56sM1&-%TxF`fAj9#n^8WPYGT5YNnOaq~84c
zJSVm!j+vL*^A3xDxV`?Z#>E^F@uW2^`m%lQCnvG+b1zHTSu{oa>fB_0pO*9Qo7>yj
z?bq0G9@sh6K4{MsAFi^$_tZofSyUxGghKiK*DlO_nYbp>to6V{Yk^Js&wT&)^4OWP
z&HFE#B#68Y?^~F{dHdfUhhK>;BFP2o|C#dg^YhQ@zk6oogpU;>`G<~O{;&3LURdX(
z*i*;bgv6(-^dH@EdpW1^+NT#DKEAV~xOwt#vklL@5?6`VM(Jn^w<ZNyZgcDW;mL9I
zOT!gjUB!v#Zd5s(zh<uU_@J-w;rv-kCjSf+I(^jn;R&uX)oiKePp0eE-Qkq<T<<#X
zn@1G96FT>U>Al6_fjb=K-TvE5n0a({(Z4#G`z3!G`zAYgT)APlrO5H@g4EoCau2nO
zdT)~&_ZTa>8av&IY>MJdzVPt!lu|*B<v-e|&5nB<u;}oI-|uW@`}e5!|L8d38EW}I
zUMBUf(Y_$V%GX~Ko<7dr5+Qo-n}%(&`C5;Qq4V0ZroIWXE1J_~nts#7ecSu^!;(gU
zyPRU%dbEE2IkBw;)OuG@vE!FH8K-1FjaB`pf4=JJ;E9H|>E$^KJodMK<6qv9-n(ta
zZuy65t`1>sMRIu#HtSejN{SkEOr+%-;y<YUx}JY_o9xU?(Fr<E$-B;F{J(nU*48c^
zhdaHGT`ayV+0eZ<Z|C9HvYub2PCmgkO|6?_LZ@B2-`;PLnqn3maoX9&u@bKTpL4lR
zN_#WmO^{uZ$&@wE{7+{6yR_1S-Q(^BAt525ms}I?xOz3D>QB$lxSaFYI!$!vp65kL
zYE^AAH-onP^ZmA<=#wDZ&YD?4SGP>qn5>qR6E$0>bH9tx{?9Ho+twdC?#uUL;|bXV
zMYAP}FSY+Jy)g4jgwmo5>UxI4Umutj?|l64@h4H^V`u+HmfQW>x!S8tQ%DHZ`uMS;
zph#Hq%AJXxU*~;0B6vmVvX!-9j7yb>>LkwHtJY@dcJT1cI5u_0l7}q6*wZC$70Pt|
z;Wd!uewfvhd*=nDuiCKQWLx!_XOZgej1u#5-i5T3-R;_x`qK5)uV-8Jds1u9SN*xX
zfme0s@wL&<XUf~moGr#1b~&o7?LOm!z$>w9uI)&9^ilG}(}dbN8bQW8o*v7ZbMi;(
zLYJ3Y=S2M!Ha;Z3cY(mvvtp+dE00f~r>m^?;P-R6X<t7+oIcHX=iHq5GaCa>eA0ZI
z6sdo6=^o)PE$e5UodR0tmdzEs%qQ{Gn#eV`sv4pV<&RI8e&w2<dH?^DFXq`s{ylT7
zeVXp!s?8sx=BS7*Wp{9TbXVy60jDJz59YXq&%2uP^qsTu^~JiEIwyHJ&1`?5EdBG}
zYhja>FFf1l9c1~Q<96C{cZS(I`=`=854Oi~MTN<oXo}qb;quF>9NCE0;MdoL(>RmA
zZ4%IH&wsxDv|E}_<YC+08$Nx>c751(S2L@BhwRU_^3F~rIZ}7#z1sRZaD5zud)M!W
zfk*o9x0|Fn9k&w64{BN<nJyT!uHeIy{patb7GGhV<}<@@##b)mWBL!Arf`PdYtxbb
zXK&W~YnoQv(;44(@ZG<D{_mT|X6{0pvgE1{^|Z*|IM8PM`~vesZ#Kh>YCf;0XL_nP
z8b$b?&bUx+`2Ea>4}bL3Km&7^;?KmDHmsO&8Z<DsOKwu^spx&0n!*ANulp>|?3*~*
zEVk*x`p3<7Ia*(}s*I}7U*+GH&VOG1@h|Q@wx#9$8w=i*w4Jw~FhgMK>H@|aoB4Im
z-!S`S^!d75@C9EfP0h&X9(&3!EiOH`dzmA%`@Uly7sP~r&Q(-0n`+CiQl@DInoyjq
zWNf`QwJAy=Udwy_mTSM~{0Lc79P~ncuh`nKe$RDBuDX8yl3x`R_1tE^$(J<2r9VEE
zz7ALz$kY*IT{p#gnTm6TwA+OA<#WRCJ#Q)bk}+{{VAqV~b;5PO%rt_gCWQoUaq={p
zai+uT;=c6@MB+{DX4t(Avg3U%qpNKlk{G@>^~_iOIXk{wnY3?<)aK@iv->an&{jW_
zX1plFY4y~_7jLXf^$9V_FnTcQ%`@ZIXFI$u9D5P4Gb6~Q>GGCVFXqe52K&}pPL^YT
z9(3y|Yo>&Mq;=-c#hIx(i@#qs+TI{0IxFROxq6BUZ&{U~-Kux&{Xz8^uVpk3x;j0w
zO)zb?ncZ~1Y03&$hwP+RL9#!yR<HOxb<S<~N0L>iyaI$xpPGjKb5>b>@x+6vRi8q`
zE~;%ma5SRv9-mrMnDmF$3lHhf=GolLlO7RX(g|A&uz6<WY2kor^;Nc={qlh;54x_b
zJeR*d<d!%0QT3<l>u(AaPd;7#WeLwD>40Y^oOV8ETRNrlsl?+&JBof9SuD{yI$@&C
zJ=Fr=I|svWUw?9ZMd3uN1@|@ErrbTJ`qw%z_l=&}+Z!kEW=GH5_>_ZHphMvG#>{l-
zurkT6fZbx-)6b|Z{H>y9R-Sxt$H_Cdi$oSS@Vpk~PTHB5Gxw%wd8&+Ja9z^Py>~>O
zD4bqbv9T!8|6jD-jCIDnYxrk3rQY_L#XIjT|7xcbyDDuGrxkB`Z#-}2;p)ee|5~2(
z&y29<lw>{D^X}yO<9&=5^3KTfeE7`pMP{neD^*ZG`d;9HTdIXI=axQDk5|2aCE&wh
zM&-EplLvX4qTDLYCB?U8&XmyHv|;U}qH}CrVpE0d?HWI_>+o=fOuFyeSYJ2s?d->$
z1)&%29&b!beR5<;9Dm1zbGwd2F9|TM=h~Ynx$4c*%)1M>lo;8ohFGqAvNqvMspMPk
z69Pwm{n@nH{95#P%PUJ|H^!P9ZklGj<=ayCR)=Lz?)ofFDteZ``uwV;$r=lIm;3kj
z@!SrXyKv_9WtrSt-?wKO6x&)&?s*hnp7h{Rt3<(qMfD6iw-3#-JSbA(8@l|%pSED`
zXF~f@81A$1AKZS(>QvXao?nxK+-yCPL32I*ej*1;?w?p!v7_?h5mjZ2Rf>8Cown54
z%rrgd^Zx0_JGu)}yc%?4QjVOOIXf+;Yt_uR>r<Yq$!?mn?fAFVN_-)w^a_j)9k^)y
zZ27gxEsI)Oj#MA~)sda3_UZXaKQVR2i@QxK?`{fSG-1}=>Ah1<<$k|fa?0B@pEX_X
z$f6>Ki@QyBnlKe@zhsiROzoCH{`S^SUYXpYWi78iZks0*-Ew-0g~*|l^UI80#^t~0
zeTX&>`1i$<o~n~qV!>muFV1?!Fez~PYo0o&E327ncCmP3A4^X1VY4@vHvZbec`?*)
z`nJ9bO_l?P?%b_Xdw;>Wa(iPoA5(kK+HhCJxtX2Y4<G*ED)L?CXzAb0b|?BRHY6(A
zTJ!myF{usxXKOHBe%jore&5_X-rbU%7IpT@r!$$=hiyIerg2+vGJTh`J*hTrvf*@_
zjndYIwk}4`|8%V`yq12;Nweo<`_(;n{Q?gQuh(>$!@|gZCdh3ss6AoFpmajE<;Zcp
z`P?!4bE+Lrx*E(>3|F*_+*tHvM`x4j(-n-ZQO71Yh)#DZt=a9xe_}%B+J+PTUuAwU
zG47Yjx+Oa8vg*~U$dF5!(<`hs_UkwVT)(u#&iK?Lr?_%e#YFQ<bE@7Sy}7E2Pj`;%
zvOn=2>ejKzn@xXN>t*h{&>sH%;?|4bEm(RMh3;IVAv0&vlR2-nSA`cR9Bi9hw*IVy
z-ny%Sn_H|G9L(xHd~KtU=ZPoWCF>{j?{Sq{mdPD@;ViFCqD$u48SmuMpQb9S%$T5^
z^LfFnGrz>EE0%34W7^;rVK5_SQb754rRt+%A!kdQRUfJ@>G@Tzc3xFQ<<gUHxyDlp
z6ISi-I2!5prczc}FXF@cwQBMUz6S-pn$Q>_Vv_T+<$W@@8p|uQpik>P;WJ}5%8xZz
zt?%*LDI;<JCSy_$Ph8>>U+Z>}*Rs7+&+R_)dSQR3qkdMw#T#~u{4BLEt==qI{Hk<{
zRCU>E55E3O6`O^p%AC0+QKBVovPd_2>;M0Ci;o|?^kidw(U!o#1t%v=sr;34Z0h|b
z{l!-Mv^nk@w5uNHd2y&t^+%ne+?-o(I|8OFZZ<Po9~8HS-)}?3$-Fayfq(XGwQ#IE
znqid0YQB$i!PO1fAzmEn&z8IXdGVE1@c)X9F>`#{6AP#8+}JT=zSzahE^bGJ{{DPh
zW}vIhzr;UWeOug$<JUHZPS93<8GNnr+*Mo6D+^5Dv8Kx0*wuZDS^MHGDWC4HREaGa
z=4<c7T)I;Ys=&|BotyFY%6@n2t{tZ|Z>*JaTw9&=^6kluYs+W;e{QhmxLU#5oXIZT
znw+4CG3PD5_Rg>8s6RQQlF+-k==P5uQ4ywhMcUhi9Ttlz9PARiy?^u4O(!dFrp_0h
zzo_)wZKYp#q-vPjgLb^tIA&V@Y`UARMc_K-dY`wx-#YA9T=|x7*8c6;i!GTkD$459
zb(VhV5ar4Iq567xf69qJ8DTAY3w9hkFp+ithDTvyMzQfnB%hypazSLFe?h2P7}t}$
ztwu3imTX4(Y5DW}>O9%2^eo#W)|nc&mp?doM}9}>lWd)O9>vEUwl%>f%KzJXmgHG&
z)Tj)vk-BD?e~rOVGxm*{PeEpCI#by14e5~zUYCWt=cYW;^NrRpj#|sv>&BVpu_y9O
z)D|^S{&Q{nr)|AFd6$X*o9Zu7NdZeY{QP;r<C>+A(0sw|aVn2`Je`7OPuQ}~?4Xy6
z)2Yz-vY9O`4sA)il6Mn$oQ+eSuHJKxg@5JI|4yGLhbt$PmL2;4d)o=8G$Xh8iEsbb
z?40#i%h9>*L+RSmkdK<IpT8@yui^I>Hu<Ud+P`1rzq5ST?`_K;?|N9XbdUR-t6rNI
z6mFTLl^pjmYt9bQ)7wvO4i!FkDP``gc>f=PH?B|h-G0_)W})jQjh|ZrckP%E^K1rB
z!p<z$bs=h8C!Q?H<yKYeJoUu9dxfL&zI|IS2bXG^?(mrI11j`{gyuhdKI@e3uEp7#
zJ6_M-TUqkf;8?7+g=jGM-${2vUK-8{cHTZ&KfciKoo&O#t&+macTI{`Rw(xC+A^#3
zVgl!5$0<{p)<u4BQ*d^x{^B0i7StNkSTSd>YuTLr{=d7*3Kwi{3Qo&<ZrxFT^3j*s
zJ0?xrYV7>w{x-cANt0YpK5zQ@Q1V;+S?|{ee*f9`ieoPGsn2GQQu7br@&r$d2?@<N
zmGru@U~8#TxA>VuS~bTHJew`?`E|e@+1EzbuAXVL(0*5WD){#MlT&Io-s!!xv}4nW
z;*iJNulbd%w3}l0mN(^vp`^dL<%>6qF0lCT`y(k6RG+lzV&cj9PhyRF>uQ;QZayBf
zOpIe=<P?v`hwY?~oH_dPS8?G>7Z1=7<7L-?ZkFGgg`$kzSItys9{RU%bHt53uE+m<
zzaaXjlvi|XakquGqS>u3jm-u%EBkI=Qk_)s?Cfj5!v{?{WbEb^TAvGeJF(TU@o~lu
z8wpkL;(;Fz?!D15>VLJ9ca5Zh)b$63A)EP`Ca-2Ouz&F8=_a3;s}qy_{I;BamnHvP
z)LUY~q}z77Tny}$T{&0V|F>*><|1ZQ{&T9)yqPO+eHBq$dhOz-V)vEYzl5J$xz?J~
ze_^uYN_T%frd#HUv-c`~{O~94*|{k9eR_vK>d$)kRr_LmdcsV%iuJZWpPT$HdDgpw
z6Z876%?|U<6qtk@pC%o4XLHAg^^KP7UuGD0JgMh@eEu$%_D`$*2mHAo{rtUfY6Q!a
z^_f$ZQ-m_-g?GKG(GyvFYlhQP_F&cB4XY;T*L8(m(M&Y<)w{CA{(j~C1^&VDUR75g
z6&_$%wR+vgjFV=d8U9>}h^?T>Gdthc!Mn>uy{alMR2Oo*PCv`&E%AE$+(+kD?vE%s
zb714!+dICtvaemMo)Nf_=k|N<Ie%3fR<6FbIqNX%u72$UD>lSTay_Z;<+-1s>+bCH
zJvX_cYG$j5Fd98S-CuOxZp$7&qj~c#v!B2DrUJG`<Ttm$GpptKM?Ka5>nelRsKiaw
z@nd@4|01IJa?|X^N3VJPJAY{EZm~2u!$<Rd+4yxb^Vcx+OgI|JFn8*Sl*o&qZow*_
zwDR=E+Z+A&`xIv~7}s=8?Ogrn<Lc)f`;$+Ve%gIf;o2mlhpvhbCUU;B-|%Vv+OS`y
zL6aZmFl7fxEcU2b#S#B&Q`R<Hdxv@L@%=UPMXp{JS@60^c+1se4Kb55axZ^;URm<z
z=C)IFCw#=1J*#<s_UW4g>a3NU{^cw_V%6*~x2)Z>)lKu#t23Fi-}*gP+o5rGi(Lq3
z?-ka+or@MatUPt%U!iK{rp<P0Y$kmv+6s1(dlUXUdi$*6$=H0aXNAbcV;O-rH&4xP
z^I07FpXZHrAkWeny$9lA-{h~4xW=F7m$LiqloOh+lHf)yV`ahB6C%I&hpv=cGCQ63
z&-Eh{@1H*1u`_wWb1~-l>%pH(^%}GP9}JH_VIaBogeX(O(=&>)!sljvM05?n-GLpm
z{ZH!e<1*9lPtG+uf5a%xYoBiNYw<tpw$A!+xayy;9^0KKJ=sf+NTnXs?C#LHbmBv4
zl~JYU%TTW;ZcC=tM5eyEx8~{mJ6<*+4w|CPvR`^QOg+Va7(4iUdnk5!1N+5{n0b;H
zEq9f$R?n5?v|W49ciw@Wf1@W}&1`vHtbJvP0JrDNjY?ADE17>?cANd^pQVBJjFZ=k
zgM(|QNlxee)3A17LFQ|jG?@;kzwf>N6}>$COx}BA*%5<htuafgzB;wE#2ivg5UI9K
zON&{5G-9n7TTki2)PKi){%2bCPM2R0_9d&W<;8@%a&1;C6#w(QIWM)4EBU{K=ifzQ
z(@IvH{C8^ejNA93cj>I}01a||oEEvo)n6x}Z?mg-?}4w%R(0H_Nlbb3FMT}wra~{1
zwQG90!t&Wn|Nkicb{2{#Im4heF)h_)(ITIiUC9q->81XBc>L>{y=jb_JLgYR+wdUk
zlg2WK0%cJ?!78>j-V1JjV$q2d`LbGT`LcGVcM_8O3>&@E8k-_hWcaQYpX3p27gAZM
z@li-4(<)b4*=yE~y)}l~X7i7%txDhb{reQZv({?a*Vgn%nQ~2j`MB0kUT&t}_t>Qo
zHoMGY>*d_d9gj4KzCL`|L|WvOdF}2?+yD2^DC>XPuO0c}`}!`0Ys+7+Fl|}?SM=~J
z);lL68twL9andoGG4noq(AD$(T1`CA6(J=G6<;E5U0Lt@-=3}UD_`W@++O+S3mx*e
z>tdWPW^Fa9`%^Jz&YS=ZyE`)LK;uLew<XTz>`zk+R%Bf|F;H=~!wF69iTye=HWoc<
zlH9Q2%@RQu$*Z4kD{04z7A^=XYn&{|)^~5C!1n`ZR_L<%ElGT^b_08|(XuE#i_<5R
z58bWLWx6Dl{cx4woVhG&x|cRRS$E)tm=ss%g6-d}PH*(J)KL5wggWmQ93Ykd!w$58
z<I~gApyNAco9EZHxGzwzIV?XVH%Kg6eKYeq)*StLGqRsEbZ}UE{1CNFKIXIJUASLL
z+3B7&4eUEjm{#3k{V0}`Z?jnOT%1zJDW4SW4^LRr4sl5MY&*$1x9jRs^Xn3OzsW4C
z?-z>NWL9ansQm8V-`_KDY*5@?_V&@2mzPhZ$op*qPlCyQNMFHdt8|}b)dd@aP4>JC
zmtT%#)OO=;Xx+K{tIxfK>7VkLugkE+rA>>8eHFHXQ^$erxq9+}HAhq#7CD?gmUT;f
zx?!!W&GVj15+a<P{hd0In?PG|n3$PY1})`!eSQ7^h$!(+C%@kZoVF-_jrMj)zodL(
z^`#r<`S-8if9(8vv-nrZ9mmt|{M9)k{%=yJT5+?5eI}E#`#;aiYD?{Az1~~=+^?pl
zW<}6auIg`Z4&J@{xA|Q^sPA&G!Tp<@V(_Xv0@-UibIWyhq)aI^bj=7*XukDL|9(<Y
zMNZoHdNGG5$E)~i8XMwyP2X~LzqsSlW)cu&)jN-UvE|YTpFh?I57}Ft2n)y)pKVmM
z<G|kCVOl9u3*XBtmtQg37s7CA$r7eO%?YZL-z;TWb>#Tj!=I}q?`~FKecrvk;a0Nc
zCUEcUY16e?eW!iqb2K^J-&cEcTduToJKs{!y*VmN-fPY-DJ*B4uy|p_T%(v7d*&Di
zhAFgFho6brSJcjvw{xS^F~hX4o?l+<+>&ZDncw~4f^O$&{+anVW>xU42w_rdUYoPm
zu8rMLj7LyR=85OZYi%tRUkn3ZG@f}n`(y0C$!jbl=6G3#zWgU8-RoBO;~{(1mlp?5
zP1TmPD$!W<<Kf|j;56?$XVdAQH;*rS@@abL#nakPnfHF2AvpQwnQOZ}PwO~Lb?fWl
zxbk57%~MUjRdOy8mKFDXmc{V(ykB$h{@Ft}blqa^K6!F_;e*v7@3%ja<9fCGw1ddY
zP=C*6A->Lf_R5npHfT2f)?!^8y+ZPCRPVE8jTaNu&e?C9w6<l|PDU@2v=hN@XUi*7
z|A=R+rc0VCuhRT+C!K4`>8CPwH9scX9W&poQyDx*Ge|{SG11&$>1(6=dWMFEb8ISw
zroMXHA*!#h|L@=Ld>%IDsoLSs?(8gn`DurL?u=s>Jf9oH$DF(n8C|f>z9TPgcShh+
z3*XsS68Bv_v)}z&!Q1Xtf39seT-~<y*y+RFV(K;=>#|dSO_@1E|L@Uni<jRps+J3X
z^Ue32>Z+uD22A&hj4k#&H9r>R^CoV`9PYpgi`O)*e4F**ZrA3ri8Fg&)upbqTvWDe
z{-O7;uCBhaBCt98`ntkTPdrzJtejLQr4_dN>*c&I&n4FINwI%R`Zp(qn)(_TB&^qc
zqrSp5ZENVp<WQUA6;<;xdZQF>dq+Lr!xL%5sZko1rykr8Ifqp#d2K_(ZmXBu%~z`1
z3T|8}&U`y`LGW_F^;>H_JUG@yZ(o*typKo5;zM(vV3k;!!<Fodr%g3nFKv-NxJFg$
zR?jihr!{-bP74XY{+qo>TJVs$tMR_d*cqLjU2MK=F_TimW}i)Sx!4k>z5V?2Y&A8n
z_G1s#_G<Sn4SvDYmCbVdlZ38UFL&qc*=H5HUTlqeBGr5M%kRk2<)LZ69)hk!%DmjR
ztMs)|<|P#w8JT-oKfA^CK^NfgNE)>)Sg@e~k>%CwtqU$KI5J1c^!}Rbi)vp<*)Qkm
zRqM@UI-9Xo&7@P<?{(z;Yz@n{|6Y;PzhqzBQuNddbR|vQzn|&5%HPlXCv|C3$;1g2
zySHRN@<<hD+<EF$(@oF6mz~rky&M{+M)%LV=QlgS_hlzXzRCk7dE33YDM^)@u1}UL
zh}>rb-J15}<KrzE7n?3z2zYjWzW+3x$V*4}{jIJMR^BJsv#eBmn@(iYuKPjLcFB4t
zns3TYikA5ocW(P)zl&G&jTfumPdT{m<djll`^%f2_`W(NRoiCh=q#9Z{n59cnJOjo
zj~scHrz36NDjD4uz|hFCQEoP0&FNG5O@>##Iqy!^s$St@uJG<!qFT@G7Oi3??`y{l
zG9T+leEZ|Kzf*f<++H0;#p6E`Lp9>AhuMUdo81+4+g`cr%$Fme{r+|}7MGX%s~Z@6
zn0w{ow%ll~kQELI2@2KU-X#9|@-pSr6wO%sbg#3*hBG#qyM<&G1n$d{Tq&5_D}1@s
zjs45xqPF@8Q|F)FQ=%oZHA=Vk_cv7?ou0e9%YzrWbe8O#56=ImV|FGV3sx>lTdt?M
z(N!@yA(eGv|JsVHVT(DgZhW##<-#T9o10RTzrVZt=gVb($k|O!H8nMMKk8~<Mlzmf
zQZoLc%E_5|>hJFBnb*vw9Nn!pS=q=wnz?<G$ri2Fa9@{?`?qfScD}d9JJau8zw-U7
zKTGb1b{)>XwdkSXHPcW-o8{8Kp1xY;ajx@i$J<?<6Sv-3xK-a)i^WLwJ=-hM35p$G
zf2OAAui3uH!g2@ev3b%zR<5$X*0%FTRKBFzqsZsid9TlW{UYE>&*53mcqAl`{fmDU
zknl*0i-kvZ$LZuV=a#QD*I4`5XZ4vcjxjMZ_iMk)hOdiJG%=a-=jZ4Bmw$1IXdIZR
z>@HzZpdhLpmU6I(HDpD=!_RdSp3c;~y+EdI$F~wrulZ$`%l+npE(!@*7c)~>-S5GR
z7c=Vomw(BeP;oy(SImdgaDI;Ymxsw!eDdB~FJvTMIaTrU(f(wfkAWPAX1fcx1g1PW
zF)?IiP^($)tqWVT!+(8$|3B<h-QT%IF3k?-pV{r$X}NgMPVo&ld+!+EdgE5~v~=l@
zE6x@3?wx;<C_J<L<^leq=DV9xJh@H_-IcyJ)i~N@vYzzE)4A_Ym7M~ORo2^ds2}ij
z_@d_<xBX3q%;K3Mi;kP0Fk9R<;ihap!(HER5oc;j|B6&Emenvzl$mTKe(b2}yq--)
zSH7I{EKAi}bolwQ!19z^smqwp&9QU_-4YeI*UG(5rZW4nNPJ6kGxKCM-$iL>XMyfM
z^P6k+@$dKh%TF&9dC#7{s{F^iS1(K6r=6Q~5_HI!RjHP4)E1Bbx1xXT@z|4J_2tdc
zL#n<XGxmyws+GTN@z^7}?!6Ra;PVyX`NyKQH~Y=ClCiIwbKrmjpS<0fJ(ZtN%(48;
zZhh=Y<=2yM-R6C{W4oiQ`@&D*mtui4n-?FO^xDle$pDlOdtdF^B5zvoK2YWX%ZZaS
zy!qIKPaK@3b@P_v)K|sHW{nf(v-$2@_GNNmTj2MOOShHgsR=Le+5N8m{QKi`!e+|P
zXyq0+DtQs0=0DG8b=X?F6L!!Go1(U69o<*^J7Plu<IPQ}lj8(lwzTuhU)q`-E@xA5
z;NoKUle5j=yKlT7WVYo<b^4zhfqyP+7gm3MRq@-sng0JOw8H|7PQ{;Hu}o{#joji*
zJ34u?)-4NM_2AX@&`_V;w<oTY^=&z+x~IUBi}AmdmfO`Z6}wA!rB}}od2!+c8{hm^
z(Y~WOk3Ri+U(j*qaOZ~$+uylQQ+DV3_U>-?^5yDwc6KtBMNgPzS91BfHJ)(a_VV(j
zP0}+<9@t2$y<g_s+?Da=`wp43CYGDVy59GetULM8@5-umDF@po-_V(mykke0`&prP
zpuyBdf`0J<P7@3h56f+7c=yiq(3<|vgENI<me!vCdFR@!j@>)^=iSdgx@04#!%ka;
z>0x(+{Wzw0t@c^_uQ^6?ZJ0Oc7!%WMF?N2rKM77BId5)GPyhbz?v+)c-174B+j4KK
z)mB_$WapDvkaV<5L^o=Rb30$HiIbp%j8(~tfR{pEd+HfO8~5xiHpox0mf1Y*z(v#Z
z>eXxF;=J}4UQV7nPg<?uwCeKkRME;<R;6<iI}^V1Xa_s`-cB+S)9XoJ9Jip@S9IEG
zw>f(+#nnZMynMg>i1l)r(mgYEr|ULMEuQXk=W@ptEse)F3WWpbf2d6{%h<VN6ECaL
z>l=GN`x~X5`H>fO)ktz{*40G`2b;F!-WF>pdU|T_^}LtG(a#Ml12wAG+o;rk6cW*j
z=y<fs<5-{E(K$1VZ#=j)>wa~ZvuO1ML1py~)=E+$f;#j6TZ>ltE8V|Z$RlGoH`aRk
z#@7p5duHi~=tOtvc-_ptx^;tls@IPYXY>8sll7x^?*3aYw^ib3xvHG;<Lf8q>|W$L
zb2Y<Ku4fA;<jZ~I<xD$sX6Bqf&(+G`#T+v&*S%Hp>Dh%V@iVRR#XkRUh@Zaw`IRr7
zI<}<~qCei$h~LK{X|#KJU^#Es(#SN{ny#l6>%Twv`T04hA^-E!)8xOuzJhi~$J$6Y
zDFiI_5(VvAUhX${N9AX=ef##kJhb_J@!=dZ<Dw@XZM@RQ4z+T>ytMSRd%3L2Yne{|
zQ~PRvXIwcl|4ymf#Iu+GPqsO>UfSZ4gNe`<!^-3HPP{p%Ri<-&lbM*qu?^??Qo1f&
zF5WIEAR)3u@wO>*|0nAu8~>cVZ?K?QxK-{2=dDR0b?Xj!)qR|52wLoEf3f=bg5D>0
zCNaDU`Iz$X&&?B0-!J$6bZu=k=mw})uTIUeEViq7_VRL>a@v<DVXt1H3yZI6S3Xjj
zbZL{yywC3bg5F-&%NGk4iF|#e9sbk%N~*rO8r$ScsvGBRxp-UG*Tl?w-V&#aSzk}Y
z#suuURA{&S%=aUcRJ}oG=LoC$c;vV&ahJ2N^EuupyR+)6R=1dL)pJgBA<sMeYG;3Y
zdwcQ44AA+W@pV57{rWXaoxN)UYHDjOYZCbXzmq<8mO0pd_G7UtEB}>=o#3+BF*`>~
zf_q1eqIdPq*ONP@HOS1CzxTZ5+x~hB%jIT{YyU)S%J^3;+{FOes{a4qUlF|+33++>
zLx&F8z1S%*byf(I*A{M9hEy)b<yUU}6>=5)QW#@fK2tk<-I5>gKRwlUdF7)YIel-;
zba77C-}`R7^*b`9bx~zwKuy`*^BSLJh2`b_pP!pM(<qhe_xJby*PR3vl8^PQiQPTz
z++6G6#csUQbfeX3Ywj`6HqT#H^z;<ywAKCpe!X5S+NGen!($0^m#Fr!kIC&TqZaE4
zZv1{C{QY(Q*`>b}H=KVt=gZN}LLUz}EzB{xz39s6&0SB}mFy1xR{eFY@SJ}B$sK`*
zU)<Swnf)evtJA?2PT>z<zp5%IG}Qh5wbWbr?(^*JIVLYII94(4Rhq6ByC`&ZSVeWU
zu$<hyb#MKGKwB|E>z@)56ciN|eP$Z5s;jHNT(mE>#7g$y!GoZS=FZMG58jq@v;W_W
zB_|fS>^gAw?%c=6`6eDM>GNKFJK6B1{d!>q|Gx%YJJP2)I=u>1`t~%6yWjrsZ<SAv
z_|EBbr~G`hQ{G2IZF%5gHyPWiDW=)iE-Z58mb0(Zu{|^2Ks#*Bg0i=_Ko=Xkc8k@9
zIDO=llap&{Z-4yejnC08(VxHH?_bLOTWn{=N2PuH_H_o^)Nwfl3Uv#;?BbC$+MxeL
z-=_Gq<@Oz%$6C%mKb(Elk!eAQnrp~Yr5THKk1XA4bL~+5{F{FZ)pHE5o0X?ukB`o{
zu)uNG&Gj~wp9(%aaQyV-r1Mg*sTPHgTD}<6C%q`K+V}gN^|4;*<MV8*UwoM8o^mzI
zwD{SX#49TTO)@XFWL{pTxKw|SCFq<*zqwYRYpw17{a_XspYH$chRQ~biR%KirshQK
zp0uoVkEQ8)zFo)5?R*O}<b|fqh)OWo{C!{R9M>FWXO^u!d$=r$&&~Mu>9fC^W>oIx
zu$o8BV(Z__9etcU*+Q&JA$HppSs9rbMyXtnkM}Rnyu9qpEK~2Q8TNwfVs<vIU$4*B
z-1y-`fsKug-OS|A-@mKt==AhTn_pTJX{@BIyp;Q&l#9Fj<!!mq&(6#|+{n!S;>O0u
z>g7F?BBpqhb2J_JvU0M-4egG}#o5Bu|L>@m*7)TqInP=ZpcL@-|C_s|4p+X*AOHBg
zkeyv-!GfgcZ`j_<yK%#Gw_)4eb7fY^|K6OlycpSW)Mf4>Cbp#OI!E$CewjZv>wv8V
z*zxNxqg8S7Ifi$Cf22P?{j>Lb?fyHj?e4$-{&#J@v-0yJFQe=FpSMj_EtZNj-no@`
z*XyXXcFFtJvfkWP*S4zltFD-<D;X2{H9Y=kNRjELlNk>_UE8<f-TP-QFA_gJJw1KR
z_3fFLm&sTbwSdynzS?RfW#y+0eh1C+?s)V{nZCHY+dTW)8qcZMzdt=Qb8<5~|D#v0
zyjr<LZ|tw%zvtusLmQKiuLxMkG}9<mNkyfliIscVx$~)+bCoA;23_X*?*8}138$x8
zJY6|QY5T*wYm4vc$BS`a33NN>`uB55FYl2fs{iWQyep<`;rQK@^=#RPYnE*-5qi0A
zeD3k6ZOgwqWs%*Atx>wn?0iS=-=7b<adcCPr{#=bfwf_$Cn!2w6g*&<ZI-*prBf(v
z<K5k5Zx@xmzE<-3T5sTDx7v$(%$o1I*PJ}kBWXOtJYNoU7IffZx0O}ZsybaM`U(mS
z$Ik9k4!@h3qIJ*Y=-T?VQD$3CE_@KzGI6f;V@rwelHzHZj{mE|QZ|QAipxy6vg(J3
zq?mA$T=Bu*)_1f;9$t^1w)6)7+OMy#gRX^j>zDK0o_F_2f*<3dLx(c1uao`x`FZ-!
zPfu^$zCHVBnY+9D<sF5|CQ`hdoSdMeg|e<{6+JnjXnj=r-TnRj%a^NL&F$NoeZB6}
zayQQikw?mNZL7t;M}74OUnOp-^>td)lBA9P>x%12jdzvj9i1{?ZP|~vphEuaY;*Pa
zeKlw1+s8*uRY~5rZy)Gf*Nw@?A3c4VYIghYi;K#yuC8`p8MJhUeZ3u5Ytu~QbfeT$
zB5CL5curRHO?mFIDgC@$)wegEpfhM~tG3*<5p+Ma$ip!2?k?A>ueNSH5jEGQRBdLw
zzMJufKi@p{*5sbO(wY3?>gx1WH`o36Q2{z%Wnb;@58u8mi`ba-DNS9OlarH2-foU<
z^|uX$kKJNoW6!4D&cCz6ac$JrjJvx`^X>kA2lZLAu6AjKt}@8IWs;MVbEZuG;>(gA
zS?g(MXPZBM`_^}=R_K|m-%r`ZCnlwwpEvih;z5hd^97l~;XS`tG`+Mw{kSA|Zhc<g
z_fxJbwKSL_qN6ugd`vplEA9UG*Vo9MMN9WQJn2$VVe#(WJDaL69m48<GeQGw%HPRM
z)ec{le}7*-t1E-Kxw+pQi^j0kON*YKirG<cQ0M!^4I2#Z+`YTA?yuEyzqv-cL3b)H
zvFPjTV|jPq`D0E&U1{)_kLQ<atUPe{aW=>8XGi7!zctCZVQ@7s{r0w8PA;ybi;G-A
z>2JmiiOSFW7rni`J$P-Dsa4sVj{biB%u7oQXXo<s@hx-fmHPAN^ZEMfQ+My(lgrP~
zZ{wHuySuA2a$C;K$hXR`udiQT_4QTBt1F#rqqjeL@+4*2@vb>-Vw#gKW*k}VUvHLo
z+xy}-yA_vHCN5eM@R0Fxht>YJzt!av?d$6{-*~oTheg%*cfJJ$2KVmW6VZ!_C~ML8
zSbo{C=!wUlKYwm)NMv5>J>6{e+xr`n*+J)%+`c_~mT7j~hyA;Yo^pY@k)S&L-=CjL
zd}o_oy{4h2*4D_(Ui9XM;kEVg`DW!u_V@SbbWPd$I4WX8!9N+c%F-uSE=9T6@6d4d
zO!RSM4&V0m_@iy|>V2|ivlno5`d^6HS7WK{*0Uh*?yidJ>eGB5Tkq_toIJ}kyXf^b
zUB7uYnPrZJS678rR8?(CKHgU^D#G~b>FMJWmEBLwG*%bY3VCp1qVmd3Zt3UdEKEAu
z6%iFx^yf$6X8tY(<!&X>4{@8@-dFQ4c~HLp!HnIDL{wM}UOd{dWutNBoG))~JWDov
z>v`|#p;m5_{ChIy=H_YV=FC)@Z$2mJ&%fXAD=I4uOI`%9@k*t{#h>$0JJu)Ly?wj6
zb@{s`4vkFzGwbT=>Pp_;n!3ofJ1H}BX4OA4NuD`YrCm2Sr-QDCw#-n`>AI5g{@z}b
z+>5jBd|ftqV??s1htaa`+C!}G->(SUrm#9(*FRL=L|)s>Y}!2AY9T49Q?pF7|9xiP
zSur<mefpUhhChD&+En+q>eN*2^sw9V^78(mmTc*3vFL3%iq363^HcWm%iBG<u`zk&
zyPcO*y{DOU-|G|-YmKz3{bgdBRsA4lo&L>J-={RGI2Q_<TWsGY8GGCG@$<~dOBM1#
z;eNQCKmF7cO`}yGJMZi)ZvXc7_GG<StFZNPxn<@WHjpF6CYtBRP1BF>i&{HvuG63F
zqO*%UOueRPIKH@;kaT|SlUe^Yo>T{X5~_S|vV8j0#6SFhRW$awUtbq{b9=u0<z>Ey
zPoM7o@uNcXRQ-nR>v~$@>wG4w`QF%=+<uew=;rkELF-~HZES2leET+Sfrj0z9-Xd`
z=PxfWzq00XgL|9Kll)B#+*7BDJk_qQsnicQ`<`&M>*yzw4t@byS>NyP?pjuV)46x=
z-jmbQ{kuNtOYpEA>z6<O>+9>t?Rm1I+OzI&Jp+oZ;N^a2W*WD@-}l?EjO*k46DJmV
zl*-7;a$0?0cisGA=YzX4wKYpbo@(!^PLjR8?aPt-`Q0o>`BT!<kKfvw{qpW^bI|Ps
zkB{{RP2n$jf6w;SwY8^r7C*PB{In#J?I`Fd;I+}_pw6FTV2zN`CXFE7#TPx!zyJN<
z_ix9pE#DpAPVnsd)Oz>EfBSj!V_gr({}a)P2;ll8=r`Za_w%!}D?tZ&|Npajj@KXQ
z>hJHCR)2p7YU;?>{W$nWj&s%Z*DtTHmv3xr%(=gB?i|bFQ+|qeoRX0uE6bm6+mLW_
z(a$gMZ%zyL&KCOnVUoyGZbQl5n7f7&d3jHdY&<L^^1oR~)vMsq5l$s#<)7bf=W}v%
zPhPa=areW1`+pwi=h=b+O4+^7WcPv}e|{F**w}FJ^Q&7}L^Lg$utDR<e#1}g56}F(
z?{aS4iu0F091far{>*~z=Z}7UOWDZr_{KyBqx5q+u0Q+VynTCfLn8CMdGkWn$H~UW
z$7^~``u+X=<A)CqZcILIQTa(Fbaj~NZYj_i3S6RE0UHt=x8>eWyH@Mn+TyBc7uEjl
z?Z=xB&Mqx|TUn8}TjYuH#iMQo|E9N1(Vx*)TmNPX|62bur%!{9dpp)6dH6shv!{oL
z!E5kwXI@ja0{2#xmc75%yLt2Gn^zxPSm+$GCPGkOUmtV~ebCA&%&s*>MQ0ZoXoE(w
z+;aJi!XzBF72RE`Fh3yXi0`@ed2a33dS@8cuDUsw?a!on_Vs?x&&}oJ=U-m?`&(M%
z>($}wH&uR4+p~A?%DBC|BHyuRUt5#-;6NkjAfTwNStlD6Rdl#Mu54*<4_;f9yQb*6
za`@Rop)9+`6T8!QZg^&WtY6OSe_icTh4>SjQcqjd{rM3dS?1!xvOVwaudMTDw{G2<
zdSZfN%Bhq(^L2YGO771x&Azd(*1DBjd|I=kM})|u;Ob9LI89%hC^$`=T+{nJap4sA
zbG0S9>)$-y<X;}97wxe*?d%eTv_tpr&yU=k_VE3C{i8pc;%s)k|MvFwk6*v)Ho1T2
z{8;{EmTTg)OG~}oKOEdEaz@}Z>wB>mTLXip^cTE;Qnl{aw+G+f&Q{gn`Z&|J;zL8$
z)m00FR^HfMzMchi_fE(8v&W9H?a!UPGI%3H`n2oL(qA3AKIz}FvFL7Z6t)zRa9HX!
zb%}ygjoZS73qQTEQ%nr(Sns$pWK+#cr(Y%6Ctof#?X0Va?E1v-<E9+`w=Id|_L0uy
z_NP^vigufSetr&0Cue3FOWIa#5pk(0)Y5DeFi892u_^tu*M7&Znpd(+GmEE9U9_j!
zQT%+Wy#J<~t`F-?IM~_QHM<1ndxnIF%$qkaW@l0B;dcJX>i%+?T|cx^QUW_v=3190
zJ<ebK<MZ}yHa73oSu30M-f2Fs&-Hn^XWN_muk{@RJI=p&b93_)y;v!CcXvKnD;38;
zj|dT^ohxE@n=w}Z-TBhK<F@;BZ^ubT?Dw91nOd6hGO*EC`dU(Is;aBtBjd=OMXK-K
zz3Y*)oz==Me(G!Iqt%lyUvS!U@aom67Zx83Ssgd~^~CKbV~f{BURQo~FY;XV_M|r_
z9C>6e#Fq&u+D+cE!(z{#Jsg~z4-Ygli)e*-bSZ&O_l<daZtm>I$N2&mm(}X!zVrOH
z>TrO@nS2GiSN|&4<-R_>>FDQ-o12!tk#MR}o30<ftn%}-DLRppCQJ~>$;|~{n<H>C
z_Sct}mNFs^v&-gMKDqm0^_u3m_0G$CkGp*?owqS3JNtCs+Sx0DmUg`naH>(;|K}68
zb30$^r6ryfRaIV&pzDK!grudVYih66ZhiXx-rm_SFE8I*{XMVJxWyw?Nm&_mRZsf)
zd7!?pl53Yopvxu|^BtgDx8iSHT<l(0SO~hPCHZ)tXMo7bSv@*jiY)^19Tzh|1NWe!
z_-MEI<rRU-nq3n%XtW48ad7bSFZZ6VH^WD*Pu|||+M39v8cMDcE?;osP&|^JVl*>i
zM}cDMsVNKd?(TYXcXxQxB9Fk7lt2!}mLEQc4j+DbYpeFgjT>cbDke0sa(e~3bSW!?
zuCY3zo@O)?G@!ZEd-}4xySrG}*}VfqG^at_GylTZ*VjQ>KYskk!pa)Bz=2V-%i|){
zToxZS;m^;{FVDHTsiLC7BS2(j8${dxiy&>6mU44)a<Z_pf*MttT^^=TZ60cqJ1ajw
z3tZ+SDP@x3kdUx|#dYGV6DJl4IB|RwJap*Li(6Z@t>*SUc#y!^sdRPHL|4TYfk*Kk
z9v%;t&#zmQb90kP&W#Dp?EGE<A}gT*C!g{D-rkDJ%D@c?j!w?bCubTj7jcE$jdkYk
z-QDGq76qUI1yH#D`uf^Cu;nH+<f>nMtx7vH!?9CH^~8L8`{(E9PhX&MrVrwe_W~j!
zN50>$_kVU~re(<sfuEnAPF@7PJ8ft6mluN0Z9IwZ@9jM^!?0OQH_9WhB@&djITTqE
zOr#=r6ezlOi>W%d@x<(@2y9v;0P&(@Mux`Ts;`HxU7NPI`g_opj6lsU0Z&NS+^qWb
zMRRY}*F}kk+rGTLJ)N1I&m%wtay?mby3x#tokgmx+~RI4LyA5=a=o=ZU%yrGk?a$b
zEdnP$IKO#yb@dD%wY>ZLe0_X)rt8PE9k_ULB9Ekz$HXJy58uD{zq_k+<CZNoJJ&}=
zMlQVkvY@o|>5q?(HIuB*&9OZE{eFFa-1_6^&+~_ehgTN2PS~K~a_Zsh*WR1c&TiPe
z`EZI+qS4HP;$meht66g_i-qLm^jwAFT@N;~2CfWwQL^j$GVQ5eUQ@NEzPWC%rPXDa
zd~8G6+o-xfAKh(iZ38t#J{{xw$Pad@&(^G~0jsZq4wn)ZcFxJsxp?tnL}cW}h0g4n
z6MkeBJw4^x$i#YYR$gvy@ABp8n_}~u4<2}Qv|BS^0mH<J6X)31&wF=ww>KnQG$&7<
zEd2EJ^!n^wK`W<h(Ac-@-QC^E$;qE@nN(N*PO;wf;`wv;kB^Q<?ys{2-2nLW)6*sX
z^W*M$fU2a4pX?{Refu_k_B7k-vQN*>Y9AGh4_zI$)<CLPzkb`6ElY}@pF5dn>p$Pl
zw^zy(G)M^AQFDJ^ZBgzb0U@zg4_hhItS`4p6R%ynR`UK{ESK^=tMv5rn3$Nje`|tP
zPP*FheXe!+iz_RIOJ7}C7`ZtObnPV@uha=?b#U<^9vvN>b#2YhPwA6Sy1c%&cBz3<
z<co`o*~`kx7*bA7YHecWuG^UU`fC;FQjtTg-0`#JWM$93xVTs|a1l%7=Cr4uo}T{l
z>MHkCuXm5ZsbuAq)yo%ki)cDc(}`T7p%j^MZH=VWTt02>)%tbG-n+}*TGae7aBkyK
z)Y5wO^sM&J&(EJ9>y@5kQ#r|VvfAEK$##K9k57i{DNb5>YioA0iB#a~t6$#S4CYdr
z#9Lim4VqwdaA3&2z3p$(>9^O{%irFfe>}k;!9?oiy}j1y=jJE|FZ22F^XJ~Osu+%s
zFExDBCVQSzaQSp)OJ;CQZEa^q$AOC%1G$v!ZhXtV{o?j&^;zccV*Z89b9F6#|1p{C
z>AA0w&+-kVPR`bszg_;8E#cFXlRk4SCf>MlWBU4yn>Hog+?4w1iKwETv1Fu(=DhxX
zeo)taZnN{M+h%j)`loui?u*$Oba&fJ-l|Ve&YkVt{5(bd9}gcZ`{!P>J~K(X%P%8#
zZ<%2EHa+@(MYG`gyVVR4JByx1JAQ0!Xkg&%2Hk@8=>1&#`gzxG8eM!odrnY~h>BPK
zf`1n$2X&m+=sz~=kJN0Do$m_Sl&^(_fsV-4*Vlja=+V6@UneIf?eKM=X}p@xXU#vg
zaf532@5Ng-smz@^RYZw(<E9IVxti~1R@g7T=&{(Xx1w<C6tAU+u3Zybe9@z(#%88*
zx?1$MoKGrZpj4^YV&KkMWy`?IC9)u5W73XYyMmT@2y%((c+9h@T%y|tN_hfKGmN(`
zRsG1x8N>n6;~^$;^rXoiY02&s#=VR@6J6SlFEo>Vc)l}x|DRVUUZyHeV$D3I+akbZ
zWWMl2n`cVt*5~i`Oq}!Xi){F|7!}`-cOq_|*t!%H2u>V{*EaFIY|t&dBlGy=V&&-!
z9e3@TH}9Xv-mCU9G&5lL6xX>gzfQ9h^R$l^JEbqL<+Q}9NGgg$@yy(2_P(2;Gv3KO
zy6E~hd%w|}h;vhKi#@nCRmoW@VOKf->NhtZTvODmvVSsrAMb{%8W)dBY`CIf=sM?#
z&-&&kv6uOKPdOa9wz6gI)+GmKDc-nq=k&_N%}ee%Eiw3{8o;4=CYDWIhw<;{-TD8|
zm9)+X&icfc+4JZ}k{-{SqbnauD^#<73adZ!_5!<u$oAvrkH74?tTCr+vROLA^m<O#
zDM#XYSsyw6p0Ocu^U`Y&e@+b2Tyy`k%QPF+4dISC5fzWlC;slZ(sb)$=Z@-&GfMb7
z_vE>hzYt>MVUQ?W|8%C9k!ePI>eaRgdkY_xPbHDifUi8{C%l$5bzYFghd%2G$p={?
zQcq0zu~}29wuduhTIc$@>#V-ZwD0il>URCN>q++X+3x#hFFEJ5BmmS+ygTP^q1+V?
zZ`FhgNmrU%qvrmwJ}j)X%wOT%#pm*GH)L5(`I0x&@a0#HI1`~Tt-u5adC6|AVl|<%
zzy0pfdNp5+WZ3?+)T(*+RH<^!`;c<YKV$MX---JbUs_)~c<iBG`EMVW{};cmasD>n
zT4;ZwNdKR8C;v^2nsKh6`gr9fGi|4NlFb(OAL?IBa$~&wWsdJUCqb8E4aqN0tq@~h
zuwA3!n5W~Ji^U1iOkXz6Flam^#C_m{#mD={*4=fRQ|+I#W?kV=kCrQu9U@+CVy>^I
zX^LN)DdO^L&4ZV&%GYjgO61~yaBx@i1V!^#J@L;3JZ|i0m{H+=<>s8uo10qLPm6yp
zI4Sk$WW%SArW>~YJ<p!}WX;ix+|3KmWj*=mKRJ%gmErPK<rkrR5jR%-``hWzYRq>x
zN@ZPqj>)ax+b`a$pYvn)pVE!%XSsjd9HLNnw?OZQ%+VQpwN%Z11?BvdimAKp*Z1f9
z&I|cWiI?|uUY@e-$D>w}C@YbtN8iR+hj!F#EncwUjPo|Rc~7Lmq*=pKBrj=aF8}iQ
z<JXELrfv0q#6=X3gsUreE0xwu7j05WJMOfIFWzD1I!1<Fx6?1Q$2~sy?avb35QcNH
z2bAg^bsoK2uwQhl$b>f=HuN6rFkV%{^~GF%&z$9(A~<)*w>=EJ5`X$+AOm-{?yu7i
z*FV=eFhx;ZY~w?TEma4Y4!`x5$dhEO*r#m$YQ^dsQ&;``)s<b8IP<pD;hpw|buW(1
z$`h#9XS2SY!DG1oymzW}`@c$?CH1LQZ80lkDs$(GO1?3)?Q)uFFMjOw!x;>>=P4Iz
zNxR?K)GM*Lxxzw6<KvaZix|!^TO|noFb>?bdMVe&*J?#+vMWAKIbgKb^qBSW3w9p2
znDy5@cY6JG1=n<kk56=VEqVF;wr+hYpEqZH@;v^wZ4Vl!gn`;UD&fcec2|7zD)9c3
z|0pwK?|XlnZL3>8e>oa4u|dnZ>dW(*$`W&Z$-CVPLz)gUc&Q$dk2|vW==YNb#VphK
z^%k877y7$RzH-CWhRahH-ni+Oq`*9F)^eM9clcvxK1^8?%JX>f@rX2!{+GMt`-S7;
z`9uF+OwlW!8@+gb62HFtpEcIL3dvJ*RPJ;ix0<bNxck`$lXYr0rNj^Ycwy3Cc6`FZ
zTvm}=zis{%3f%hi_{6J;o$bxl45zMdyT9tGgi&>hv$R~luO9ER8%JK2F)w>K<2c)7
zD_gC@rtkEv`-9E-uWa-BvXL!ho0pHje{uC;<4e<v-568m*-d+KTuO#3RXy)`zm5NG
z!Cu`NSzpwWmw87_)##jLv8ZPG=aki5KHcsKrRC=4$Jb7gHEg}uKDX|@$AW){GBqa%
zJTv>Cq$=?2%(iOloY3o)7U_29*Bwoc+@cuPIQ!zOgCf=QZpTJ_wutH8<&vsXv?J+t
zhDy;nlXp5c4F|7Dy`EQFc%q`;x|XeAdA|Q5rOT7+KJdTT_OJ0AgI1_T`LlJ8JwFs4
zXke0-ao1P=eJK9Ky+sCHKg{P<NAJ;|x&Q3(%O~&dF#lBjj_r<X@NMqdpS?S}SX}=w
zCzYRndUC161$%{W(*zH1w)hhMPvP6yZ2jVGYTfRN^L*c&{JXKXvPM^KO~WHw&v+x{
z^YxJj4$l=2yiwOY`82PRpUnXm5&!yE2hR4U2U>_7eRfS~-kkn}OZFdmd+>4Pt|W0Q
z2H7L}zvVZwb$pdP+AJBobNf>xt~D3t{x{BKo%iv_^Y|2XVJA?ES`xsOZu0X|!lhLk
z^RID)UWruV|L=Hbtp!7mxqbqRPn2*(WOCtaF{@@?NwGDm!8bXS>UQ@o+aSDbb-K{(
z))|XB?drwX6elEq7El)0UeC?BFuL<rv0y-wQeVf!*qv8b3qRC$Hjs#mSG{U(X)v)t
z)9z#X+xP7Ti;gVYAbg(rutIfe4#U*Rx?H}TZ$76h`R?6wB<Evg_UDjwt_Mq+XJ~gW
z=l1n<cAa*#h&ds4sUlw`ughF3w<|xZU%Yc|{-;>Dy+^`HFVQEfaZ!L~{J)I`w!6Ci
zJ+->MfI(zh=i9I^(KQ|JoyoVwG`y}J(|V+`(eRR%D~Do`l_jt4nHjudUIunnXC}|w
zJ(($m@rtXg@c;OquO2Qup2G9!pGCo&i*oIkd{5Y2>^u=6-1X*XAQPKI`D4kDO~+j>
zeeHUGYQ?qA{f2MnpPc)*LFsw)dc&jpe@x#s{qu#~Tiy2#PcC^}&$dqRw~2Y?@1El;
z_8*vAtS@v&`(^NQ_EzC(Qm#p}Coj1l8L{d>N%wK%<@@%?U0Rj)^oa0{Q{Tg1|J~?6
z`%9Ct4d)K;Y5A`wZtb2oqjYhKxVNW_*Mm1#n@d(H=>6OA&3^B_b>C}>((R7jeA@pb
zEr9*flE6jPMg11cE@$5QA1Q6V65S!<;wIL5(?CUqZ|%&I^OFvp<xR>vzF6xTqdLF1
z$hoNnaRws2bIlATBqznqH<^;Ab)m5**XTn^3jaG68<&Zu;=6A9AGG4%_EU&o)pzM1
zfx=lgj`thBa-3=RI6hH3Gv&O*$Fw(H-ZAsuN&d<<XViWpJ=Ob9=sueW0rxW>?GNtg
ze?C2F!<nhQ7V`^j><XB<+1Z8o-quL|(phs=zj>~qq8w{xVMd(l$Jq<rcq5XlzOb}D
zlHSyl(cXGUa(82Kf!=J%#cNF^zI47`w83fOmUUfA&g4v7$Po53CqN^9rQ(7OrN@-&
zIdvLZ;%hw(A{K6{Jo1_Od&+WeO{3}w)pNunV>d6}w(VE``tX#aJ7f61n7^N#9KnBS
zmS5qvSnhSQH9oiA?A&m6V%RHPP+wq@yz5H7l*}L&hTFfCJ+3daVYs-u=3>^=C7G5}
zV(hCE3;%>K+H8K%jaO`^_T*)r=9ivaZO}6JTWT61mgPB3P}%eOw6-4$=DM7hKL5Eu
z_@$e!(&Z(r7O6W9?wv1|V4uLVEo;N73!KZ3Jq_6Yx3PrPRAa-z%s(DiX6?Urw(dYd
z&eVk9dX8_$dhdMR-BJ2<U&`Ds#y5A1sTUnCSv2|GyuMq-T#LU6o#&G__#@ac@3_s~
z88>EMOnkvJr)X8cicgbLFBRxCKNl?26uSM&_T<(_<}#aC9(d3aJ1f|RyZ^|h1?-mf
zGkt%_nD5vh>Um7cQfT|;<mgNMYZ{rfWqy|_{W~A65yE=HYOi@_RDC~hLhN$>GFKgs
z4u;vcYENF<n3QzFgiWy0yZhf2yUeMRCf)h`_}2kNo9j(Sch4&NKl{4)y8gvhjqTCz
z7nTJiTspS!u*;hB6?*d39kGo}LHUY7x6)UfOqtZ(<<e!Ac!))AO_baI{G!?~Hd8FQ
zobJrMe(T|$@6uuanj&oWw)~qa?Ij{vw6JC|@7a25qq%`kiw{-&e0$B3i+4s+oR0a%
zt7q?@({k3+shD&_<EF^pz}GnwmBsJy)bH-?Kg5?>#@i;owpHX@O2h|cPUEVNe@@*u
z|J!-#^^b2SGL@2(Q`P_Rxba9In_Rm8l4A5R{yWb<O)US)^Cs^{LDm!fguu2t-1Q1`
zCr^6uU~UHY)uW!pvp@XreRA?-$M*R7NAeEni)aWP^E`huW%hQd^mNu;yVW^o+w`sY
z{VgJ`r|M?e^Y=$2drWL9J1y3R-Sbi0XCxUZ!ensg&*$bQrG}56=C5aUWw^ZfgTJ^4
zqYwYZ_`33b*Wfd&Pyc;hYG%vje`i~G)1&>d>5q3eDK%(X8EV|Ru6Ua(zes0Z@7c-9
z|0P6Rxe|CDR;`yj^5WpOfXp``4`cGa*Cqwj<UcsmwK>`<$?4yLjH+{I1uu7Y7$nR*
z+R9XHK6$zOPkyd0a53L;MeUc>#_dW5cD85#p1c%p_?6{YbIiY|9AbVi4d;DSeKmjH
z|H&m>zb?*O+-dJFr?SR9S#J%uQ@_x1skoZU>zb~Ff+`D+&B?o5j@;OHN6xIZMMBd(
zYtgx9JPoBwzggV)|JS`OcI_vD+P3A_c_Yjuy!ZX<W$5@i^VCYsB{DlCVQT^ER2VY5
zM5iv|0$-Qza*F#;=U&H)um8>Z=F9(SXLMoa53WZZ{`U%N@Bja`B>2KccfKT+l7)#k
zLf<&JO<!{RrT@J{EGjnB+W&J-yyP+8Rk6jOoG<i*-40Q{tbk{09N+!Z%FFFn`G4e>
z#`|BwrDvzlIH7mrS;6ubEV9p+&96QFI@Dq%@0}a928Hts1yvGUuQ_HH?)fe8`p(Tj
zx46frUcC*<hjuCgmH3O&4@&0Ut!G=}n9Bca#q*eqx4%EBn(s1ua7_FElegCozck!^
zVQ<C`*_X$U>g`YNDROulB+&iC`bg{^-K}EaF{6$tCo2BCF8=!b#bUd6Uw3t%kFWQg
z!dcPx?Ma~K1_8|(F`L07ix4XXz!Alv*dp*qcv7~Q>#6_8k`oW!y8XS=VCla1c5age
z_nvH-+G=z)H@YXzJtSt%n*&au8|XXEuTDQdFXhC9#w$N&eC8I{+mL=<?$_7X)3xsJ
zuKW4vB;TKz5mAwwUVU@^XQE|y`VX(0vejb2?5odzN_ZMbbozTtFc0cTaY*#)oxj0p
ziO}xy_i4YrynJ(SZ*-U9zo{!$XfQD`+1>u*KgZ(Ww%pqi=6QG0qutKUvD6G))Us!$
zQ%$PTCXE(>l@s>vwdIvI`;+r|`=(8gPE1suUn=R;@x(E&K+p-~DIu{|0T81FG@1x@
zI7k%I4^(V{^r<-%K}9iWuoav<S_E)PtAK}V;UNYNZ4Sj2=uj<sfZ~$=ym*oj$HYH2
z)!%p~pL7Wc5wV)vx7>d|Q$lujchuTxOq>uCci!4ns@*GXe(cmKE{4e`U6%RG^ziUd
zD1UdSrM1;_BE*@8Vy0?`^KFgl4O?CNHz9Oo#;q+YMLakZkBF;Y*WsFYCgseGiIY^l
z_nF>3b*js6u9asaRNtMiFD^1$TkpQUEcL{M#zn5(UWG6j)9h<JbN#-5O5YmgdwpH(
zQUfQ*V8elf2OG6QSMB)|xiH|uzS?Td9!TQpaIvlWa^T)QxdxlcPZPAm^)$O+5^{2K
zFTPe?$uhOA`ZA#j8dN{Jw{A6k_wHTB)~KkhS*ompP8=UYLD}k`TSbLM?yW6~CMF`S
zP9Kj>&o_|@)DTHIKkx5q)@zxUmz_MZOu%V|Ve82hBTvbeird%LMt}VBCFQ~b$BL?|
zS*g6BQRFKtg;l+#B%GY2nqeY!)9_3LXfs6AR;AoqTaFw%_UX+{<4q|iokBuHKvS0w
z548rVC<ZA?Mv6S*wVK<Pc)0E1<Hv`O^-62T?%Hzm+M<Gohh!`Y8XTM19zJ_^Y?<%u
zCD#{Q)+ek8iP>NGcWWlUoDIVS=?yI&N1x1cRg5eA{LEJ?eBGI)-qS5gUQCE&KPsM+
zlM}Hafw7sL|Jm*Q{i#R0L^my-p&1??zA5eOuWKdM)zxhLayqO+pv1Cr_Wb$spb`1R
z!)-Y?HZZ!oyKA1(zO^Ou@VeODGwf=uWUb3o%*?{NlG?1w-}OwLdNulT_LUWiM+L#U
zC&#T%e{o^qmseN2K}(4K{rz1(b#`P>Qxhw9(T4|)JZzxl3(p=M?G6$#zP!+xeTvu9
zczGo;u`M|_KW)tfr|KnA<?rq+%(}X2j$Q4n2M-cXP1RO+{mH&<+curZO)Yk{zd*}I
z>;8TX*X)u!IbHvLcKP$$w{Bfp>MgF>1IkPuwSWHpEi5ixT=w=>&b>V|OI}{`4E)#1
zCu{ZL_xt_EnU_>{mA&<fiJ2pE)Tg4VDk(RYS7lqcR{Xv_BCepKeu*4gbEACSkH);a
zyEayR%>sqZ^11dw6O~*!cz9IQ)Y^<vPZj+9l&aY!nQ?g;ui9kJsa~$_e6mk|K*Heo
z-s<lw;`Ul8Dk`$Dvj=ZXa@9Ol51Oy;64eHcv4R%#zrD3JXiA8l_cR^Q;>*2p{>S@d
zmnt|dDY`TzdPcAPzaP!t-rl~vHrl+6SGujWwI=-670?p+-DPhJ%FENw&9Pi@HB0kU
zx{$Q==_e;ATb94O6CU|*srU396(5y)rOp3oo!-82W97~N!QgW8|B(z6qr5v7)AZxd
zZAv}8B5La@k*CI8U0sJxpY8@tp=MrI0}VpE{yefR|GwY*dwW3_Me|CT{K$GOC@b4L
zckbQr$ZvOdm;e0ve17J|MXZz6d=DKw=vc-BDsMi%2hB=eTN|BhA_barlQd47vZzMU
zd%9lgqa&SX=G)JorW@_$XxA9MJ@4Y~@_fCxJtyAW+<f+5x4(-E%VPKbwD0fko|$7A
z{LNU=PS}!PJ!ox114B$~tY-K+8SAn)6Bhhvt*olrlzLjMjbFZR`gCzlPR^hy{Y$RD
zo@rNWrKhJSq8~SB!UO?HMfN#!=782ptqxzGcBqB(%F5vO4I2z@CW4B`BlnG^dO<hc
zndRQvQ1LOztyfC*x_X_eo}L~HJA3i>cd@6Y>;J!2+<MgBy66c<US8gh-@iB4|F3iF
zk@$E{eZ`E;MNdx^zPO+WS|u^lD0PNK;i4K#aFoCQ23o3hW#!>{w$&V5T#Kr{z6zSM
zUdMNq$(qQ`ZDzT*0=DHu*3{N6)sTPi^78VM_xI*5^PO!}@W5eP?rpbuHkC^Z-Y*PV
zDPfSnaPZ(k(0V>0Rj+~v2N)qK`iFNyf`Xo&-i{qRKuLJAWpSFOm(eBAeaJO65*7st
ze}8>-UVhm#@E+*WR#1X!bvpR*@$t-yi&{mVGN+!OH<wr1%pmWM#jWl6{_T9S6<=OV
z6mjF=>^?MSrjJ_Pub1jkTQUy*`T1GWx@?Wja<iF5#l^}N7BgO7Umv_ONHy=y4n^0>
zqfbvyUm3U8$~f&z!sBDTXXaQ=Hp#r?k!a`C%*Ja`_Xo7pN<=fLMb&$n!uMh>#l*lF
z3)TJS1+0&=EqimrF(5#|H2d0=MJmOamzSm9-BoImduxhO>ZuL2zsokS_pGV0S?u1w
z%&C>@%G&7Zkfn-R)~D;2T+EnZQ)zU2TW<2tPftTug}hw-bt!npA^zY(=k|ia!i50~
zoqqiOJz3RTOw)_&(sF<KYwP3v-{0FSX;WcPwfCORYf!VGOHB9F&(F^-tG;N&#l;yJ
z{p%FfkDF8b``g7;q1vm$*Pol9=-koUJM|F9$Hpb+7kL!=s0m+Q?w|ha%S)Yz4Gp_W
zUwZ`xZRqIW*j4`i+~)N2pp{rRH>DbPzui~=-|p}4@9HWlE&u<%ufMV`HoB}$|G~q<
z?L88PO^4h0jq~r>yt=+#-&OFDJZN=G;5@rpufuJ;A3uInG%yHgO454p<jIj^z0w~)
zd{_{&GU&~noyMX0>n~=c+}l$rq8--r=H}+&H#ZD9yW%Ho-fVn&x_<h>CRWf=!DGGB
z#jmgFf>ViWR>7ASfiW>LC+1q4?=E|5G+R)kctL;$C{cm#2e1DAjz`{3rupE6SKB^6
zIXQWXc6eXu>uV3+zd!%u<KvARH%^p&puHk&t(2moqKIykh`6};GXME`+on5leC)4Z
z{M2NN2PbGZ&6AUppKh5bS@GwG;oV)O%6fWzUteEe-jyA=c+nyW(=3tK*VZN<YT?|H
ze?RWti<Bp~wq}D?G#~Gm-<)z%=+rb_ZCAlZqDGrECT&eWKkwoaPvL88B0=kNRWxQb
zetCKM@$1*#y;7zH#l@h#U7A(ZX1TXq7CN<7e0w9gaN$BBk;x15_x)^pdwaWa!2^d`
zCYetCa<&!U-pmwn>rgpW`1n}wWTTEJ8YU)Fq)f9OJbH9!Rp@Gu>3Vm?qrc64d^=*t
z#rcoj&o4e1Kc~lRef|1f#U<L>wcm=r95B@RB%-`}@2j^nCFTk3cp3iBJdJ(lt8mrk
zsgKs*&pa&oEU5Ll>8;6MPa0mU`hRH8TK^S+iv{L?bIgnC%ZWHJ>%inW+<edal&o1d
zTQacu{a%~&_gr(!ot8<in-&_m6jeRfGK;wqur1+N_%p%7eIK27&e$x(D!czo@-q=G
zR#EM+BWKU{>c#F75EOL0x+*mIO0x9&`2Bvjx8;8P`Ze|Aqobg?w&>_+%MFt^D!F!D
z*qH3j#KiRC_ICZ5MyZP`K0XSPNc@(P5~$)`{QTU~xV=@N*{heAm)Czxf0$O9?a`gT
zC}LNMny}ftn#bGwBktz%cuzm4?At3g<z&*|dFzw<o}5{ee1B@W$-J<Yg>S#vR$huq
zp1xUNr_XcFy}vZ)r>=hLbD?s}3eD$%(|7#jju2R}u;}I*{jffvsa=~&eu`#QO>tej
zxA*;}_!*`-dg+Vr@5p=oGj-Z;8}<F0pLM@ypQ;tAq@mGqxSjv;yLadQ{{BAs@4nTS
zU;cQxe7;8PE|aIHr$7Jj@NmeA07t%8B1*oqOwP<Q?f&}ux^eBV63~*)nxC6Q+`vme
zI(mARxOR)JiP+fG(7+&Np7-bKHC69@`3yTO>P6L+{_ooI{pZX3AI&5WERss)-e-4f
zWpAg7k<qiK=g*y+$;hiEnxULrQF*$}$oT%(*Pfi|qF!5tBaPE*lV7qaDJx%;ikrX8
zlz~lmqTwy`O^Ii+Pw(BCub2B<PKnK7a(^86uH-+(VvW}q$M1V~@FVjS`D>G%q*zyM
zzPMz$IPa;5kAHeicNYICKV-X4dB5Ao(0lCZ>FL|@?s_ftnp#j=ntFd<?a8wLygWQh
zTsnnV*w}<5Bu*@FY~HYC%M@m7R;|!gB8`oWSJuVO<`UI1NIN63C2B3$*N?KTDnF^L
zTer@_%4$>oeY>LP=l+IXcVB(gpx{h_mo)!}*Xure1}EPzdu}28yoVtvuCi3*cN+hL
z-}2!bJ}mDO$v2zXJyGoo!?C|6h1zY=t(&H3+?)M(`y$J|shqxDV#!};rt3fd?et}>
zbI*J0Nv+22kDpzdX?uNfWkHQ}?W|po!WL=7Mon4h6lwnK-G1H!s{2*H$XZYL{HqnX
z>HhjVyF|~N|66x-iotBf%h!s}Gf1AkzI^AQrjM(eCoyG<SU;0p7JOG}eU7t@)q}6?
zr!y^I?VYLT>-Ev}wo7zG#D~ND_8UOA9B<7&zN7H5fmH9wwEMN6o^Z~!E??$5+pI^<
zR!Ux8zUbK*P4-f*<sn+2^?vSsG7sO}+zi^mW&iJoGnX=GiI*wpdh)2PS%zhABGS&z
zYK>a^@0WI}*MWU6wwJtImiT^2Z;SMSl4AMjVzcMB_wcRX`{mp|%P-NPlPrUO8Rwj;
zs?&Ss{kn0V$^;9gpNDTLoSS>^a6?f03;R0uov*Jg?mEPDZ`T#WwzG2gA79yIz4!N*
zWvgGZIIF!?TN`bvwKQ~L(fk94&MuF%U!Zil*7<^+rORf)_Ln9b#F^RqUG|qtz0RvU
z+`wrXC?<8ZY2p2ek(?`vE><}?3z_LX^Jw9`JooRJ(kySy<@FM}HFb?w7am<1s1&>D
zhX1P2)ouX+0*s7|S5}2~xAV)_-HknxlJxV_Q&1(}>*k!1q0uX4I*Hk8>d8r}+@Q5@
zudb}DsH(cOt2A5Arb3}rP)MxxhtQ(Mi!-mT66F@x121^ZzNRC&b}rxCdyQVc#}hC8
z-tf0T!f}7@;?U<_8DG}N#jkjJ=bf&I)DyQyp|xIKyUGs~yy4hh(%@<u!?lDlHSJaT
zyQYUXIOQIAJga#xvATP|XT!w<mp@#7V_!2<>HqJUGxvHe^pm=-9lO#X%ORD0ne|1R
zUGpp+Rep^SSQsASGuNuwYUkSvsxInlZ^m<7$#KoR9BTGY@YKcJ+b?ge=K3#kQeNi&
z+z+qrPmEvdVfNl@na|7*pFc15nQ63R$Bu#r2O8IJk`vd9S>QEQODAeehi>#X&^q)t
zZ{I#$;CJ-Zm6f1CiQJsFG~?nTornzzk-Y_=N%19C>F4Ggyn0o%*X{7XzrRn;HqYM_
zWv@DIMhTmUrIzuoU8ddDW=qbCmcL5d?y#Quf=VyXstsqCJSjTxbh`TF%PD7h93EZG
zGMrFy-b3p7nS_ZmP2PKFR@rKN3fk|PY-3}yB6PLb{b$=$9=X2qu6X)PxK-3@uUP9U
zsa_kK4;gnP`1_+$uDC6#-pRXugLc>S@XCKPE=Zm~x-;|Lw14ti3si4Z8|Ow%SJ`R1
z^2VV<O@WKuHfCH@0&V&{(kc8iNH_WMqel{UH5Q?(!<JTle#XMW60pdH({jPtmfl`o
zVKtuv_wUa)&At|}ucp#(u9Yez+OJI0i|yLI+gjPJC*j?loe|N|>bpy8>g?x8G|OEG
zSne#(@ay}kMz(;+sK!G3?ekUKHrcdFE#SZD9&xGH;5w68;>|U7mLi@TbzhczURwL#
z`R-<+a}I|>mBUsYxss8YI{k-{Z&jDD+w*;=cgikS|MiUV7vERs^<k=P{@Pcw*OwQa
zYt($0zAVqCRZMq!nNRw)g&ZQD%=4piRrr<vbInkd-o|>MHig+}|JI||*6TLEI1+rJ
zv@6YL+wY0HcUwC;I{x_mJ9xRDtekz_pUCOof4|=!yrUrT(4j*doSX+QU;f;^{qxS^
z8tLHl^J(ASJe|{9@w-#Gye}{H+Z)$2^L9;`u>0ftd7Te68>PiAZ@+1wcWjT~rmYWo
zw8Q53PG4MEXE{T3v-*Y7BhRj{yYSa!i_A@f_LbdY8#BJ;Y?IsjIq9jz+wGHXZcaB&
zKRofs#X@J5<$tFcq`#h@e@3}Y_VMq{U#CiM-#5jnm1|@5_dKiecRp`#ZGCxvzx~CF
z7r`Aa{{`;-a-d4gw))$LyXE&)PnXsd)JQL8Tbty!y)ei2UxUfgR~OIT?Y@=w@}j-X
z!7q`k86r$d&C->>q^z}_(fU^>Gq_ShTg*4%%*<Uscvm;YZkx2_boHcb&dWAsWZ6G+
z>rE){zclAt>;aR-^Y&_AFZ%I8ukP+P{=-XSbF32`rQge+EG{uCJvD7f4YR#XdWmag
zmH2IEA&!stPhRA9zq!qp;qcPfH~r;_4-dHINZjY!z9?^tUEq|7x!sf2R+#dLs|Rg;
zQ!{Umk=}|^QGN2Or!TI%+LYhY?*8WXhMtCGt3KAeMDGKY>)QXE`)cH?u_jRE^<T~6
zXJ_8r**W>q(eB5O9vxchJ>9O>VRv!+GT+&kw&%w`J2&^V{{BBs$9m$`UA&{0hwr~w
zyLX%4gd6-@oZI<Y+S-H!1sxYUwSM^caiaYvrK=N<|LxR2ckEbOx48bJH*e;=+IDt9
zuc8|_AK&`=ip*rCi;IpqF8+5xQc!rY>(d7fzm%u1?(JmkW&6s$emCRNsFU4dwpq0{
z6C)lk2wOJeSNWS86K9!bU)o#!-T!l)6wjHN#_gVy)h;e}=V#-Wo1>GruWSDo>7}!L
znYy$7NSTO-w;yr%|L2RA*0#TrI;%E*{BY;UHjnl1n%SP7u2uOriGOJn%Y*71J<BZ{
zSIC&GkIhR^_-NGT{hIw!QLrRmZqC#a-*t^APDoslV*hu`lqZAN>q<sv&-1T;W2Bv*
zX#ak8w)XNzi8*b|>NbT;axCW#@4c?1mTT{{_)e$tGyZSB=~wnz>EuR#dHTn^T=THi
z_t2aQOWt>HX1n~g&1>;nH(5t-DhD4QXe-Q<lat$jette#J6!K)vwje(WTZ&^oS>CQ
zjvZ4g&6^$dIV^xBAX~=HMnml!$60o^qu+JI|LQj-J+hgi?_PGrOLc{*rj`cN{4Z>i
zlaKcWt_soImV5iyy}i|vmPIL_c6iHYzMc}&nsV{l%TQ76uqP)bDo@dko))y!%dqlO
zN{Q9nKNrP8gPTgGmH+-&!WIv{yv%NIZx7m@wkIs;mX+%6ZGQR@QBg(}9}<|DnJo(*
zFsOP@Q_;}y;L`z*bp|oZ%gb|#YPI++e+=1Mq&0Kh$}Oc{eYtbvrW&XFndRR*b7G=0
zs0N#BU9P63<;7<-QEBB87AKC0D`Iz-fd+Y2g|1e6{k3NE+ROd6(u=gryB@p|d!c{O
z`nJ_p-L5wA+_`!C?p^czyJ+p?cg?q~cJA9DX7uRZ{Vk<kH8-!lJgOrfxBv9v>9;?0
zE!pNbSFI#%{VLtv)wK!H%jIG{--&yf<lT`dFE4*}eSLp_KmS~-(n-v|ODv2wX&gxx
zsyA<O4Nl*jaZxF4|23&6%6l(edUCjS?=>aOPyCBoS@ZJpPS)8jT^sx<|MeHG>gwt)
z5zVH!_0yYumUw^-NSeEpNB33C;;h@AUt+gTcgy?#vH9)PX=)5EZN<+jtGd#+|DGK(
z@kZ2><p-rIor5~XbLaL&woER3njtlHmAm=(8|B+dPnm2pKbpm#_VDe)Eh}w$827&U
zuscG0e`e^0?)@IS_XKC!rfAu2zNZ{w*mT#~yZF}k^ktG?r^N1@c87QK#_Zd6JkynK
zls$^RRq_AC!EMSbgUiIYQf;rX`MFkBS>JXRvRHWI<VB_PmU}?O2FJJK{s|8cxP6hj
z-^cs-;Hsx3OvgoB>vu`DtrT7v_039}kMFswe~WnT+&A~HUOSmyoD{9TTQ|q^euL1m
zDHpGKc3g3adfuUb?%cV)?Ca}h9$$R%(f=j>FQ-k3m-m=`DD&RWN#@bB_RNu!?EEHM
zuA;upr1Q}B_uFk=z3~6OM17+~l2T<}z~d($u2&v;axLmf@$0+0&7Yo{`uoK-OR3(r
z*Vor4r>9SUm8G0Jci+1A`~T1LnyPhiZM6AM0Rc_W(%O$RUH5<g9U<~Ntv~VX-Z@Xy
zJ50mxMC>oJXq6GN*p$Sy|L&=V<R4n+ZiTC@UM5f)s~cfdC)az8E$H)4H@ElQ#~*#G
zFkx;FuuPO@{vL7J?0x06V_Q;^?|<5pqkCy{dnAkN$L~|Jc=_IYq-XUW-k-H`%8hHD
zf9FqIeJ$hP(_Q;-UE>VMDwwc61U$}IrXMv+#`2;dZ>@&qI-9lZVh>(ic~dgGRQ$g2
z<P*P_C>>o@w(RvY`IS?RyX@6<7(`Rn-f0Z{DV{XPsMqXg@1Lj|N!DjSE-$%N?&ANu
z+2erX%B?5-Pp;Y8A>h<8#Wk-$@X_judhf{_PCQSrId+&sm}@!D6gS6NTcoDE3Xh)k
z;vyqwmF|Lrxl7oitFJG({l<R9#r2$gD*vx3L5JD{l9pFkuQ_^u?;&Svq3BsEzTP>R
zOlFb$QWLGMtHR`ucUg9OUQQQNog4aouD!#=XZ8;^zhIxIBkR0&)8w7G9$C7Vb}gCH
zBnF=F=y-DK%O5>PHRTomx^miP6o_4$+vt3-?R`$>B=4^iil?78NOlf?wMF94|Ee9I
zA91Y@kDkS#a^&cu-aRKz1-?EJvyyk;!EVEs*FStY`BUU-luJ$IrE8uXAM;O`c01Lm
zwRLo#nQJ26;CuLYfl9qwPG{=zzS>USDYMNFzWjMx*ViCcRYj%Yr|<W@bHaNCwO{7!
z{-5eCJY8|^iKt_f_Q~Gmd}+)4eMT$Kj4ao;U&J+U7a5s7FZ0S4{1^wa1+=0v#o=X0
z$G>8;i~oCH1~2>b_5ak}=bnV@K5|Vdc2ZRLotxWRO`5|Bc5T-FX!vmjPx(yC4-7Y_
z^FEB3tf>D#cj3c!i(BsfZj0HnE*h<L_4H=v%~)N$VbRt(O=9n-u4~}FucfnL^TxXO
z*>*eDP6qc(I~f}^YTQkq2fF{Cp7?QY?%clremjnrKbfv?0-hN9<Gi9~?beQIi|_5<
zc>mtC!|5g#s~=g^U*PY)#ameQJATncjr+FpTepY_{`a|M#R(dvo%m=)rd7Fi*lY96
z<!{$Nn9(ogTXjKYe@wN;KAFtdDlMQ@*G0=ty8YFwU8z4+ApU6gHb3q7{MT20b0kjW
z)4y%?w0iB@$z5lDPC3G?f6GepOO%3cp0e$XYrEO_ZR(%6XTDZ(=~5|o=H6WX{ZQSq
zFK@l3>(#!zU2dVhEhpjP99fsTL#cNxcXitN&c3wfw81vHy+5zL=bae-A=adEowC{g
z$3<HO!z1jgS;dq89iD8(J8{mp=tFbX*4MqTfAFr=)~D7zR*psQX}jt36nC$}C2J=O
zfF>ONOEmQ9$UCf^w)f<{G9B;mGl|ch)LG1%b?#8J;QTkuI=}wpUwK*CUG}&w@VsTu
z#Msv#e}DaP-9AjVXoB58n}y-R|H^iLEO(CBSt4hz$M5ogqxKS{aSZzuy(p*ce@;!B
z-~Q?L%opa@{JlS%a<zEotG_##OF4ILi-5eBpELL7uYdlq?K*qMx&LVWg>5;@j&#mC
zV<NeIs`TGF^^Z3--tSbtlEAQ3$k~VI<25Oh`pF-h{_QxrYJuX8ERiX?W|O1qzE((x
z^GEFe(V`Z#{*Y$zwfdO!*E*>$EU$Df&Q{y2{5z|&lRdll*ZbymR?@yzRf$}YM|bhB
znldY>*E)F11XH`UTk}N>U*6i>`sv{OMf=n|qt(IFiT@&hO#GV=*qfz#pT}zEN159!
z*-AIftKUSIw;aov?Xg_*^~(E;?5xe+F?46G`H?IpcDwyT<}*2|zOT=gi>X@fuTxpO
zEG6~%#f2%Gy=HB7@|F7cYeG#?mGC@Srxo}0ZH?33xIK+~Ha|S(_qU7(XBWi3us5Ew
zOis*a=GO_L^=B6Ke7-Mx@8gRGll4BFw@s(FHH!srIGo}(eV5Dax`RGZn>*Z&3cvom
z&hP}g{;l~Zwx(-#Z7CH3CGSV_t{>A2G{k;ZPkbq>EF-0^{_f4}FYm0K{$HPQDcCGv
z!#{>)J+jA|lCJJ3u%G1@TfgA-np&GnH`L`~xTM|IWd08iV0onf_aIj&|2I)T?u*Y~
zTr0Ct=9{~)r@3j<wCfq0-zu9u{b^{kM{mR5BPK<f^FL*znd;f}E$b=1Rc|`mX6x?_
z|8AB><#QahcRIZ#cYmP%_OJJshBYny>-marDyU3ojM=PlW}0qf*Yl$lKSlbtE-PnS
z_<wWx{foX=|H~hrpBWY8p?PA8q5@Zx#%v}gBW-5~4HlPQD`0B@0tB9%p60t%Bgi3S
zTFycR?`=9WQ%?%b+;nTxa*>IV^$ofKQW>|F{QP^-D7CYv@A_kz^9N_oo?U%^r)T`n
zJJt8iA9h^``W`1JwW)ivMRRcGsj4*QOA}T$7=BC-50mQ6Idp2DcE=N=>}%=I_By?M
zuXCwv*1~<=Gq$W_j%<8kTx~J)SpW9B!iQEB&su(aQg-6+Lam5NYup;bKiHQvTx&J5
zySj<X{OwuwKG8oZE1b@{v$^KUH+|Dv^G$uFvs3hP$D=uatlqmnTYtxYTbAt4L)&+F
zDNj0-nGzVJsa$E><@Be<Z_dUCffwG&%lDS7ml9sb@%MzV@%D8bw$;C#lQ%P}b?kl}
zS*c~Rq-(9_;r3Xi;=|uk_WnJQx~%k1A6K?`km79P1l4<%yhr9Mo#K1v*06l{+sK}?
zUVl#Rxcxk$Y3>PcF{9M#u8*@{?6(w5Q-62lxz6AF)<48feYg;rzuo@g&h(Y-x2!ss
zcu7T03DNeS+MkhV@F{h2VT<csOXc%bF9q&zk#0OayHviy^~LM7=`9v7=Ll%UHr$Bz
zFaNwCbh^`;S;cdrrhZoU+v7JOKKI{>@Ah3E#h;&(O^rGraqigc!W+{*c)ws}Sm)CA
zxWGj?wf>I3O?pAhUfCeyl+)eHc}&XB->F>6dU&8|y@Y(#Q$y~;qpS*N@9r00yGF{S
z>(pv<!|VH*m}mJv_!Ij&X}?_OW$9%{Hmv!kuHSUa%J!o0m1~|ZiGfcn4WB9}Trb*}
zu|a(Pr=yv##T^>rm1ljw=df6Ce(<_Hf3><tPM^!G4Qnqk?6ZoPJ-<5i(Sb_}PLG^t
z*pw@)J6+%O&PC=iv-s!mqzzhe&z0}3yZ-ju)p@(s>fHAC8m{@ViT7&Om1Sjb)!t|Y
zvHSO1op}>JKk#i<(2?aDZ;#6#dmwi#|F^!~pYusi4Ac65%@c5!c<k=?JYos&^*>){
zSbuO1<M}zmYw0PYcUivsD(v4Y|JxRq_W01s9Ij5~l-owra;=&~Ip*G-eJ{H>@X^*Y
zPxNA>+H>_vtN)ycFYB<_)p+so@}&Q@vXjO0l#O1o#J4^P{5|=(TkhN|*F05T8d%8i
zwVzVZpEP^6bp4-?>@P1b_qVE_wN^|=OxrA7c;=n6|7*^i+Pk(`?sNOSEb}BIyPw&|
zA9UShNqBPJcK6N%<-{lNw^<aN^Sl}KQ~qQ5<u2CrGiu7;R$Ok8v*FkKuB&=(s?&QB
z<>s?}7gKy<!VM(<=l<D{+_@=nU-4JxNf*{eo1dDh?cUBOJMY!cw9PZ^|Njv-H{U*e
zm+bYvGs~SWA1s-(r}3R=+}_s{_e*>}nH9yhO<DN=-nEyH9e9|MGylRh&f}H)G=JIs
z6wBFLd@egFdU^5bs>?Nnl4Ux-K3r7yw(z^O>_$S>n>l_Hcl-NST-xT>`TN?XZK*$2
z{?ZYvd-BueYuC?OySTGS!uoEzLyy${*ke5{-+9^Ni7e}`y}z`STU0x2N$u}%zmyeD
z_PL3<dhCfZ4qjHza`blp&i}JNbI!NXxEHnWX3CRmvieDj&lM@@^lYg9XMOPIiXHCy
zCsMp(_>Q=`3za=OQhS=?@sZBrI~y#8njft#t({e<l{|^NOY`L8TUMzGUA>L}N+d1j
zcHZB%C}XjrOm~rhRl35wqg{d*>>H9&PX}#tF>lhp$uajC==6(PvA66GPBiTk(KKvq
zn!DeFqwB+gZGODo-_k$cahSyy)35kkc+C&zTUMqyMlB0i4{L;(B-Y9a|CPIO?d0Xl
zlaCz^FG_gqVblF%!=DFp1uM3e3RUhaY3%k8`WzM-fA3fS>4LM7lf-hpSIsy%>6X=9
z|J=DQaeBQvT^%Z#S+Bn+WW82dv_V5jsq>-!!nK#v{N`-5dGK-fEh|+&&y3ec_U6v5
z2#8)T$s_+wwvad5#xZ*N=knN@|Ko32xfVTFn^>l#Z~JuXo$a@**8Z8Z)P22eQP|c}
zp}+_cPqTCHBF_fc+l$#g=#O@Jq0=QIe<<_y=iSGA);dp8-&*RFJJ)5Bz}(FmpKSiW
z*dNPa_Vw7Mm-8DhOxeru)_T9!vgc(?bxE%$^nLx}refZiJGZF!MW_GIM^jdYP0wi&
zOj~)vT-k29hnlspKHG&qcjJDPmpy)-y!B^gg({zpY~k*;!P6IATw^VG{N911SAH+F
zwe0E4oqKNSZ}(_*vuzKrX?DK<@~^W@PVUT`?X$1F&H5C}s4kQEQR4KIr`!LEf7Jc@
zpv?2ziR3f0jeeFzFF(I~`yu6e+h=?>2i*VKr5zQ}xL(Wjc(&cLwU;L?<;<OXXWB*`
zZ`-VJ-7^c1Z1a2Fo~Gve@RCJN<CV7)yG`$JRXO`!TEe_e>)ZWq<H|(6s4J&Vgez9g
zu=mTI>oQMt>$!>IdYaqLRo3bJx$+_QQG4Ff$~b1RnEd#SbADVGv_5uQSz+1<k2lYF
zcHB4ZS2YqkBl)g!o~ahs>We+Qe+qt9-FQt=-^nX;Zn1w|Q1tTp=#45juX(O5R<XQ%
zV$q4u|6HeCJg{M5U5&I1kHhLj`-_{y%9{4c`|QuWK4X8em21v%$y?97c}%qm>;9N{
z`TcD*OT8?3<^KF)Io^){E9E8Ga~FM$HjgwY(m8(e#J`)x8kQM$f2E%MeO}XQw5rsD
zE74?Tq4`Ytk2m&5-P)i3y5#!*<8E`G9%0q~9`|0TTQfZDlwFpi+U2)T?_|Apne}^`
z(Cmpdajqvz{G*pIUnP-NF54gmN-k+JUE7}>_@dcz?w#lBhPsftbj_(+QyMa77qTrn
zpP&BjCBMa$m7?kKmTt|Z?U|7l*7kb7r?*$X{!sSRXN^Vv^Eb0T@@$@#{x*H8<<!q2
zZ-0MX?D&)M{|x`N&QB@>qSYr}@f6?DVUxZwT=vU~P$7e$TicoA`X<Z#-Jd<<(tXP-
zFT-}&-VUFtm~n~Wp8OB#kLmi!hUv?WN$-Ccy@tb>KV?Cfra9l5_|wvoc^m3Kn<*KY
zKH9yp_T+5S)3u*d!(Iyr3Rb3>Y|{9o#PoP=oSH<Tv4ltQ{fhfr?}rB*EVs4KlRMn?
z=K6=3>~79{x6Z}+{=513#~SzGuyT3#!x`*5KTlokHaA+FGe+I!($jrEml<q3;I@@d
zP;g?plXADx`I(PXY&LFdDUcAZn{)P9T8Qoxx$8$h|DGGmJLi9{-La<B3ggL*-s$I@
z+alIXte&~^mz1e>?dfSNb1m$Cs^o|2p4E@9JEgC#v?2O`Qse#UGCTMDzxPv3Heqp8
z`-$#^{fCaVPX3tvh4;9Qf7RM0Q+w}Ed3xFPc&NQ#tE(VmI#=1sQ#Zuwd}hgcOrN*k
zFn6o{O{IRr>~B&X{E7c232YKs*ZTKI!71_F^J1@8-zeuzf1+PsT;U*mOphn&;ilvg
z>FC*$c<q<3zApJkI&4|Q%I`aFEm42}^i;w78f7j66&3!iHdn1&<{3ZM-ZS%XXK!M<
zoOSk*iwS<wG7ZKz_uhIq)p+kK^+`S}O8fQ{tXy5{*A%Vmcjn!*_P%d7*NWw+8LJ)0
z+L+|^B7D*Xwc}6f&WElS^A@p~{r{%l@h5$|H8Z#IRSE_ry`EFQcYWld4GD+t=Ip9g
znt1)q4C9@}=~+qAvTIz97QFw%AOFLCn(*J>LZ^PNOS*XOWrfX?E$p|K3fO&{=kWdh
zvVco}9n<Y^c6RMy`@XezQ?&4{7ZHnecGx}IJXvN%X`f2ERHR5HV~?8gyZ)ca={`JN
z2i`8=NGK}*?_Mpk`R3n!b-m^1(|x8oevIts?tXk}srSi}x9d0SeF(0qv7gR)_jF*<
zfgq(u?%mHXZt!H_7Vld)Wx0Cs_n0S-=82Z%M@$MjYLodoy6sb{w_)DrN+l=p$1h)c
ze)O+X5!z9fU^j2xyd1v%$_GX7N}F>h|M{qMtJh%l-22me4`04Kxzb&KW9cyur$^2i
zkL@R2jjnq(yY1-Dieq&QG85Y+Zf)Qc7S~&n|59kn{6E@(g6$HIug^$Y+V$}H^^Q$X
zPd_qEP@R_Y>zsuQpI*$4hJy==J6sb3T~s1}|9ZV1bhgK2b$>NGJG;oWzgtTWzo=a+
z*SOc}a`pp`B=Pl6{-0wF-h6j^jb*aVT&von>w~Qgmz1jOiL7us+H=k#T>ik3rR)(8
z;u|v^5AK+nccv*$v>?V~$FGaF-eIfCXE>!^UdXPOo`2;o`?5<9Ll;XWC?0!uB%0Ui
z!s-O6Bv0qc`)UuK9DO9uRIz`P`PO^Yv$HLjuSZ_kCwkqQ{nF}S^<!^i9B#e;%X#N@
z-}-z}o@3u%+N2mR^xmghcCo}Sa?<_A4U66TKi#W-U-<V|Y1Y+MOZjJNfa=8b+TY(&
zUte1b+BLt_OZ40O`}2LS-8pq{{`E_1qMXECob~eg<mWifTVa3TB=__qjxXLE-TSd<
z&XWf}TMVt%ceNTYp38f%L!ehw<4axjo8#@9ZEe~Xzcve88^@{hzx<Qp>7C!_J>Rw?
z;z8JK-rH}j_udf77t8G~WKfGMJUzic@t4hOzaxQ4`NvxRm!I|z)feZB-R1WzXJ(<+
z15dfljMw+Del$w)3HyDiSK9o?r_=f?V|SbBMsG`bes1oQQ&YX=jRGS?CW>o6dXd)l
z>*vN7FHA)rOMd+DA>sA4wIy$FiOS2%+x`Djye@wKyd&3d9qY|tS4>}bZ)QsWqfJNW
z3Eaz@uQubtu9w>)ZqKck7Lu}(G!jn@xx3|yob{BOiLNVt?Rxz-Qf}9r+p7huum88O
zPs+_dn{?o8!h`kN7q`!!Y}jvn<kkkpkIN@L*ZjIVeEqVLmzQGpR7_NM@4K)vSY5VB
ztkrd5z4A`kd)}vv_J34c<{Q0X!RspwZ_0ifM~BTe%e5$bBN4WGYS2<IK@pKDE!*$-
ze4I7a(rSzRY5uqaLZ$OEW%sGrJf1Dm!Jq1!ap-4jUx#syQOKg4d*MQEe=n)9SWP#{
zD?e&2Ec)*{_w7|jQ`SYZmLK@@>hQG5jN#K;Zhy1fEmd%KhUg>bjx9Gfw!Tk1*lDpN
z^YBg8SSODy(VcrlTyL4M{o7F^8xy}g>Z*xP{5E-QhCrEkq2}_7cLaocFH0}Gy?9DE
zWBBzYvODx*b~tQKJNx4Da(>W4r+0UDrktMUEAJB+A@XzkscE{PP1`%FzOFjN+i<n%
zYuK@kQ5{{o-ka3zZar~g#S!~u<q?Y)ZLnId?9Nyk%mO+B0(3rP#>GYFb7syoN=>@8
zB@=WqGw7`CHeTtXUteCT?EIT=`oeMjs`Qt)_o$_wzJ6${cf6K}&haHX<BfMNHb1Q9
zd+2<=<zjJuwc97ZuHorZt#~U{CV%&A*VNe?{_pGC>aNUCpdB;w+qu#e(N+upPEy=_
zoq77>hri6V#n*adFq&q|PFyc`bia?3RhYKl<&DuNV=r{(+UF}6g$qVme7m%!CQ~do
z_IQ7GdGcJ_Ij-#M9~o!Zt*v{1*8Q)#k=ldefB&+?)_(qb^n0VQ(6!9(jA6d>ydsij
zuUzs=IrYtrje9<yv!0?6*yP;K7q~Xcv{pm)XuxZk%7;nG$)NK#w8PdUe0y_qiq6bm
z?}V!h&vUVD{U=o1-5+6EZI)?2`|zBcgFW}fJa2V#M?8Obe%AY_Non_O?Bt3X+DeU#
zOU^_t%a?GTnNzmSmn-4S%RLp#ZO{C0-Th*7f%RGLRFA2@gMEB>Y^%Se+}@U}6SwEb
z%(kPuj~+d0m~}<Ntxv{td*0m--@hL}dQ|n~)x%jE8&=P()}OXrNHq26le+TPbB?nt
zII_+3yY64kY01ZvHYS?Q7OLZweE;Xg#v3vJ3q}9mwXe%jD?fAcjnlsg69oSJ`Ez2L
zuC%b4&jN==reBk$i@0z9Jy~4Oca{kY8(YxMqEsof9FO&JdtcnyS-fhM<F`le1NJmb
z{CF)X=gy4@|GDjb7f-YPS$J5YD7J3${MmJf=gbYQsD4<!qh?2o*uKibxX?*||BIiM
zUcT&Nt}|zuPG09sZ40q*MYE}LZT}wk+h<%{#M;Uwn)LqO-t$}S7j<`YGcq!oWMAu9
z7rPsDAl0!xS?^XZ(Mch`K53tump%HYe_K;hUgFif6ic@=X^+>|-!r}0^HZ?-!GE<A
zYOLYw<DRZwzc1<QtE)XSmXn^Ho&EZBY><v@+qCJ+5)Ze5&h@PP{OsZ)SMEiN76qL@
zCVe^aSVR0{u9VYZ4STj-W|+hCv-g5QdK>TWzNnwZf%$LB-1e0=8UD3TywDkOXHGI_
z>^I@mno|=5I4@1moGRBAx2r_+>gw?2pkuA(+y8Ghy>xE2x&G>~wMicy9Ndz7ds?5Y
z^{4Om>t8d^?E+<xDO#ai;^N|cvewHMXmE8aoe%!^=jY9h$?TRzPZret{B&|f=*fEV
z^6UGPbHaGeg|3KS&Q~gBvcPYyRLb_a#OGTW>}J%@?cXnVuE9txX=CzEk@H(`%=mpd
zebvg@SF?`Zzds+eT(0!>wY^&cUxG5?n;V9kQcgBapDu2ke$FNR>yjLG|9MOD?(XW5
zHt&0Sdb)ApBNwBT6AKP?99neZbDGgi3Hv%5=Qf^&b$@^P%rsIJ&DpB`tcOi6-gCzc
zRp)JcERx&}axT`biTGK2)x#~{`qU=F{g<}oRi3%F-$F2{dA6U(y2x3{$<GQkE-|`%
z44*MqI8i+4=)O<S=hp}AttwSgQo6CbTwm2|O2x|8kDom|_Tu7V(6LvW)6On(>y?tR
ztqPHUCGvwg^}~aMA*;iBca^>tl9KY8X_Oiw5f~v-d2yCm?x}Ngt!El0v#pKV>czJA
z?Yw&v=Px(3wwQMFe($sCi^U7i#`kvGzS_K@p4+Q(+M=6r{`+zwTW9Io%(!!LwwtYb
z(L#ovoY1+ORsXyU%3?mNCAx5-zwa{3H!*j%b|wdI6WLp@6TffHs?gP!mibDH>Bsf#
z-d#N@a{HsB-J6S_`$cWZa0Jc%Utce8o_FU$yA|t=9fitqad9%%Wj%j?f4{ugoxjJe
zRAIsu*KlX=Ny5Uyht=oT96Hu3J<~Xy&$Ua$)AV-U_NWaJ2k-v9=E_yJMlAZ#-bHI8
z)-1an-hFT1D^cqu5A5R#3k$!zyxeY@eQiU*Lnp84dUKat|NiJ`H)x4+@^L=5UMbhN
zx3+#==+~wfx5vZ5f#Kl6gEjyEe!sFZ*xhwvh_`b0q$fLe?J}zTl=AC4_&k=GcC}Tj
zZk8=RSrZ<>VxY$6%5d2>*VX6Qk#8EB78>(xtHtv3^YvnO9Jsf)dU8zw%ZYim)}No9
zO@4J{B?}A7g*}zUhYlb9deBep(c{OUqkdCQO?mkBdi?Vf6P3BT7HwD|<PmgrPi1l4
z@3-b^{_}ipZ_C{np`+!c_386@`{12Lsk6-UL2J=Z%rLCHD-Akl^Kd(V@R|t2Q&Y7;
zOTp&7ludhb)4JdR!_`%x$rl&7f@XOaI=4^Ki?tGU6A=q#@;r5~>a}iDbF<;>vmt9D
zK$Y|g4b3@c)07ny9+W*UtFQl`vRhYea_7Ilzd;dI_w%VZ=(OpKj1~8usHj<4&5GMw
zb#Z|svsKxf6WelcYsBo>z@<Ft#E-M)_iGj#c?3Os^vLP&udgR3C^D~&+NxCi?2L$Z
zm`|%`<GK0vpn2fJ!ooGt+xyJ#*BHCzgg-koGxO#q(1BAWFD^6&FZT=FS5p}i8~b(H
z{*=?x^^ae_K0Rh<(Z-seMOtBNIu0fTB)Ys?G||=5ENWZM$!lw)CC&3>WUWdrTwSJl
z=GNBi%I|l}Gw<v$)DBzoU}|`rXW{K17v1Fx-`p_laZ_IEH8tsA6KmDu_(_}7&tKY6
zn9Rl}bK=g<;+dAkYLS~#0ux<C#8h{F{_sITMTKSYMGw%yzIA_gbWJ*QGsg^c>gV$L
zbzT7h0=CuPd@d?mOB$!8Tw3DE!pbTrA>na%SE*&?rzK@A;!m#G?$Zui<8ic06f|!?
zRVy?o(M8U&eWB`2&>>RCkGF5hynJa@sCHcK*Q+5{<J46-`T0S|8^`RgoBQ?k_0QL$
z^Ow%bJwMmlT)zH~Vfeb3g|)xG-Puvtyeakcqc?BX$Ue3N6?Oae?Q?N;1%-{>j|a?O
zeMBmmq#~!Jtj@i?E$7yjPC;e23roGllai7`rldT%#w!2y)m6|+_mp#UEWiGZGM9RJ
zaWVVv@9&?#xVU(VW^kMDY_rVg6~~m6lwRD~X}mUi`?C7~|3EA8ugBNNwh9JDOu2I4
z^5w}FE(AP!^k_xQPNT%cL@h6&^zFAnYj|RJm!)1@<oe~^UF)my0TZmt^O~BPBy6ip
zVt1D<&Aq)XW><-)^=V^+>TfxF_Ut(^!%+EnpX}no$H%m~Iu0#bVwRVecV@2j^mli6
zKYsi6?4eff&?zdf*6;tf$ZxLIo7>y_Ykz<H_-^<6RT||hqqpn1_sgl~-`fLn_R8Sp
zi)QE8&V78mKX`MR?=ru+r(RuM&B?=~Qv2(RR?msx3vd6axPl_0SK1tOaCVP`A=BiO
zOBSgJ&Nj<^^tj)CS@H98U*6spS6BZVRy*ZzJOAaC!Rn75J>uZuS(0&ak=7|;&^ha%
zv&5dAnOXDq>-CbCmt23(I#IvGf5H-tqg|rO@9*se#oemV)kUwaXo@~n?e6bS|M1`-
zC{2T^$@TH}e?QhSfjVrrxApeh|C{ma>uX^F0f&;3EnJ^DUSC@aI#F_S`gzc9pIxP|
zm(}`$#<S;NTJC?oiIw}sg@w#lSA{BTXn43*dV-3oy;Wat>?~FX9j)j!RZHvC{saSw
zjQjg+L5n<ZZOxvd7duN-J8Xr)_n)uV?+3LEK#ibFOFTh~@?|WGRv0L8bu0B>Tk0)-
zZC&i?@c3HSemUEY-rmr_xCLvYw+C%Za@7i5b)=0~`ouij-CduiY)UzKs7KORBVvQX
z-s<niu3w)HS~C4>*NwMjYCba@Zf;7|jM-t(&Myx-==D~j%REEVFOx*prktGg=I-w4
z+j4J%YOS>M^L+Q!{0y1#T*J<8-owM~#;K=7Kqne*-@d);6Avh8Z@ew*le0Zle!tee
zl}q%;m&^V?A2jm^rP_kp-Jqqw6BL<mZ_oD+wY74MGl_kmwDQ=EjmeTG83K=w^)8Ov
zTcvfP7_`gi*4AwIm>3!Rzh5TvvDzs^7RCt+2{~O`6Uq4F^LhJ^-@k)aPp{mhmU(&E
z()9E5w&dIlvhDy?-<v_|Cad|b2wu+D%+7!8=uubKN;l9JpV?-)8CO?{GPChqm}@w1
z;$rvypsiV<*VaTjpPZx`Dj-<-u;kpLC0cQDaiApo`T6<J_p09)etQ!s`qaM2LI%{5
z0&RBinQhj4u$f(3!%jJTecV#VX0{$FQ!mFr7ZEYnxG6o7#+!0(8rl8%;0)T4_Uh{D
z&?(DB1O*)zyY)uwud}WG_GV$)*;!hrjD_Uo{XvIq1~2E+joK36=<;Og#l#HHt7UI*
zxn5r8`|;z)!~+eC7S^w4t*;N>9kw>=;k$R|+IXc)>K++i+nyg^_V(7%U8S$L#CM(W
zUOLg$^D3wo*;o4;bhxEy)|Cf`TDi4OC0oew9qW;F-kf%JhDoN7ot+)%T-n)Ymp!sk
z_MWDbcyCXoPQ(TW$3T}jt6rTh6}AO}%5I?5dxnLNTx9)fJG8^sU0CSMuB5CCI$>@7
z-fvPLKYpzFaFBh^$79kgY;3DM{$HG^?7kvutJdplYZtq83Tbt8>?x6o6j6NWCmCP!
zk=1*e&cXBN``7RL#kKqHx<hwVRa9EIM72JAyPdze^mSNV_1mp2`hx1}pw)7>wr0;X
z&7Q{fRsL{AGaIjjaavE)q7Ie0`zK08itN08zy7~(x0vpZy1!QT|9&h!ca#})Gm%@b
z)X^!L!E(1)e%;uZyj@>HD|XkGvL*GJc6Rf=->;v4sFm9|>xzb`i_0XQUY)Kx>fY0I
zKqaG9>8pT^Nv>u&Hzs6d7k+&eI?FWs*rTJ}H}_V5SAH+{=;6bI{Puqw7Q6NCD0w-l
zY)Spf{QLWE<e0Vb%dbo7@R+n{qU%KW8@o!i&(1PEeD$g*c$>-p_xt}Z3tb)7BVpKN
zl6lGIV)4FTue7)2-8}`0y24|^QCl(u_f~yXQc-C+Gt+prg3_W58k6R@xVwM89$&xo
zRIdMQGu@rV&;Ny=FSL-kvOa$Pp32XleQa+X`av79K-+deYYI0e9Sso>1dSU^(bv-I
zGD<xqARyrI`r6vi3GF+Io^r*-#qmg)bnx5%31AfjjR|_%_Q~7(Jv}uwa%+}oTwI*i
ziP|@1*6;WKw{z_lOFhydsMXP-61nSutINFZ?f2_uYX&d7up&@dFK*9@raR01=i6Dx
z@U`*Dt^)ad+2-@EPj-Ux_KofN^85Df`}6Dd`W-uVtk5{n#L6A9r@~NCQBk7T4HV=i
zvTb4$pPzbI@Zj-d=XEhVCG6{JR;@pNu$leiw{K~O+jvEEqe84ZI#epHcyyJ{e-~eQ
z@W7j!n{VzYRKC32-~Z}X(DnyVCX+Vza}0EubpHDeH!;^It`>!lSl;jZt*0HfW<k!)
zO)K+$zb&)gnss$i%E?JQ(q=x^EgdS-QjsDP#X)zA_{=urjgP+{QhCtZ!GXbT@kB-E
zwv2Rvi7Bq)J33Bq2XD`d{r2{@`(n4=8(T7iSM7X|`tQ$AP}@LAsA=-#y2afdpB5dy
zp!CVfQie~;G|T1No0~H&3X{6--~0RfJLrA~v%EVU%a<=NV-WN#`yw3f68A<wq`5J2
zbK1tz*I~>2=FVEQ-Bd}5NnT$5Sik)G1sX~xzgI^qcPo7|YH4i^+?eD#O+VgmXVKH3
zJKt`;Ez7#LrgQrA>F0t)Kv6ewIcQI&RoNSlf&v4}q9-1Kx34XB=kJrZKesA$b;-O%
z9V*$s*n}ree+Ie?D0sP_N!FE)8#iu<Y>ow8UhwqP)HAcq{Xv0e+pE)c!ux4~#fPt7
zRg0gU@tJ31c{Bg~zS`d_A~&mnPNpw-c&Mt`RpsVo|2V<Qo%<tr*+3hhK*uoGM(nBi
zX~ZjS25L5Obtz3M+PB<vlZKM#!IP8KR|YI(a&&Z@q91Q}bM5}KX^;1Qzqi<_m5WEp
z#3O%2hlWm9qDXTQY%M^CO?LM6bxUi1e`8^1pKP3dE@kVx`E|cE6B85H#Ow@uuG!_$
zwJ6_2(9<aE>MGak>tZM0W!2xDem?E%tE;;fxVi+X^y+kZOuD?pQ~1u^yOUinzq76W
z7O=Z4*DU{@&qAlxFV8wXx)#qrpfu?T=&;BK4<4x0TOaVAu6J=mqI2DkhwQAbf}R^q
zK#sq(GI+U0{63p*as6dUN4pq64Yk+T*H5;s{wBW9?&HUgM~)qP^y(FtO3Z~vZ{PYZ
z^_nVSkkD|?0H#0a-=Ci|O|!*(XPbde?f&!excrk76PwNN*L+^O=KPP3k3DCb>CQIG
zWdbc%|MK$k<^A>bdn1)5Ey_#@beXgz@o<}A?XQwUhYxRz&{^UOIvVjCsASxK;p>Ml
zU!I(ps0<Qp<CO-T#QpB>?voP~oA0RzdR_!OJ}M_i2Xqjim6cTcVaJ#l8RPVGH49%C
z{Q8pl@5f_)hP-=wJOcti2Wcw%&N8WLc5z7zba|4x;bso#0<4A3?MYjs^8M>26AQn+
zxp}8n;MuWW>71LJSpEI|&&{=7Zte}L(_G@VyuQAEbLwd^xsqi)N0So2zq_lS6T8fB
zZk2xmYe#42!z(L;b8c>O&7TH}=Z-xY>-T)(GS9!ar26~2CBC!8nhzHI-Z<Ml-zfLi
z7V%AT?EG>nN=lFBWxl+$RJs1&AJ777G2N&aXNACNeLCo<+Qh?c8`ICrb&2cu-OOP-
zkhlHwWq<p`?YHOuwmLJz@bR}5i!NrU<lfo>>L1>(|3A0r>8Y2#LK8*&!=1Z5Ha#e?
zm|<0_Wn1;-K-Shrj~+RJ_F-mSsaUsurfGJVl9H0a?6aWbY31wxOsx6&=_N0?fSf3<
ztEAMlV#SK%=Ps|0w+A(?ve)nZ7IR+m&ZboE+*?}?-n~25v6(G!d){65`h`+FPtMN%
zKF|4?5~ww%biUzk-tn6^XI_sx`NQS&{f=JMb9w5{x3-+mott;;XyM;9p2$nPAAVtB
z{BURLYM+@#t>50>e$6X7QRHx>s=&l03U+pKizf?9dx?8)yKV62iKLRU^4By_HsFqN
z0fEFo5Gf+&3ZjVTs8||KdGFUZ&#r&QLtUe9f2Lipo6&JQ+pmAV9;o6`npCvox>oba
z%~F|X`FBp={J31___`#MrH4PLTy_OVeo#kF?}-W5C7B~nT-b5PUfz7i!#6*zwmH`e
z{0AvvNuTQA64X_)_Wt2*#flg6%?|DjlAV+O_DI?5gL~vB{C=(W;kqS{<)?>7d}aD)
z=_x5SxQV%{9Icr>_2Ja*vMzpHN=1JidCuie*y#7K<v7pH6H*s=+&_q%`~LRq&dlA6
z%52Wp*!{yz!ERAWEnF8r-{e?N%A3EYH?{Y(HAG*n;ykE-e)ke@_H7-fIgQ)rdP~@z
z`8DCCl2Xz0-=V4k6I0Gl=nc<$qEvNPW|968=NOY~uXHP(`sy21NbRh$O*A{s`<i+F
zzYQ;ylom~N74+nc+*qT1$#U+K_m%d=#iyn6-rYEM_2lb~$w&4UdRvt9Nl#d}O5g3k
z${*4eiAOSae0uQm=BAY^z$svo(w?h5cVxYi3eLLz3X&AMyjc2W^pvBV2K(Zg!*4Hp
z`e{?`gsl>lX=l~TA1`VU{9IirXS3HW_iCt%%0xcrZjVpje%GAaI;;M@iMz@5{lA-D
zfA_g_=w<3ngMHKZe&4(-@!;FfPX5_?5W8ks=q1|E{&8M?rk;`#sNPfpiz_LCBb8Ln
zq^_4`^-7aW>ULULCieF)e|Yfm`M0J?Z_DP)ol{^WG4J%J{+K;yTE4slr^uuN3z=P_
zt2$ITLB}mCDLvY$yFYYw*q0X<o0E=q+0F;mdy8DVA3a&pY^F4+C<9c^^?**OQ1_qr
zd|S=tn`ff)_quipsfOeTPE=nqf4>r$&IT`lfcYDo3CZ;SF9{jGcIQ-qiBncNHnV}w
zohW&ENl;MG@z<A^o|Dyh!`H<m-rZFyq8a4kDkNA5N%NbEpPwuI_2uR5|Medq9libE
zZ@%4LE)}Iu5}@eNjB;=|!0ep(@KEc?>H71zRKW4FGV<=O($4Pg-RV)gN?yLWu&}vH
z86pvTYfEP3|G(eg&rI8X`{#$l{Gl@7GD+~qkH`J*Ek$BCrFd>iJ-zHu2e^oB*uMSy
z+xp!*cfJ(Z0_ya1oRB?t?%a=Gzmg0jI=Z_T2QGHg>Vit--rjcfVFBp;h^=P?1%H~`
z?+9fQtPDJK=n!a+`NoYK_5W<mzgrI~fb8w}Pv7)+sn^s+si&t&@UXq*QJR$Vf=^HB
z)1An*Vf+4mi$2yb@4vh3?TJaMr@bTlXPf6wGRwU+{nIVJ_GVS@X%8MfI<&-d@`o>9
zuJkD?oooWN({}#)dOiN-ot?(5+~R51*Tp_LGt>C$wME(2*ZIu1o11lY)x}+<+53Jz
zlfJand-+@!mq{jbH)|-JKYFxVJoDZj%eS|;gU;4CGuPTYl--q~`Jlt@vbP+3e9!Le
zEEdrU=?GmNmYEI?Do<M(K6YVsza<_Mm7biL2^#4DO%NWsqZ+*}2h?Zd6jpoi{yk_A
z>-qWl;fdh7FlZYyJKu+M*6$6{&&hx`Wj8P~$KA4QI_Wr7E0lweZ&|^^L!g5e0vEd_
zZoeHW0cminT3Jce*VkuVS>d=kY;DB;I$Jq8Iju9o1~Yv?6_W34Gf;cYYpT}KcXxNg
zG%0FmaPaf<D=8^)@bWGVT^*Li`&r~(#be&EbupE{H<W;`(Wv^mic1-kk|){BwJdI{
z`ughPa)0?{{`1c*@tk~Oj^$>q&+<Qi{Q}*9^XTzo&<x?ZIhI;opfus}Nl;w;_=Sbe
znxU&imU>SIg*9k%acgCpQObz}GYpd>HY6}AyZ0TreS7wX4F;lapyFL6^Y*@4YgMl)
z37?;x&AGd4>O9-(M~@yYI%J~`GUN8P+$$>ro6GN2wy)p+PipnmRfjrGa4)sq(QzW#
z)y?hF=5+sS>*LRFNIaZzVL@Zpr>55S_RE{o{XrK<9_bYR`0ZO*pv#li3nvzJ*jxe)
zK!mIi$h^Ev)x;#k@uTX=na1kH&(Eda*pOKB=i_mc{Cjh{#r0PifU|`DanMCgF*}VO
zAMa0pd1)!=hMq4kFRwPxk6atIm4la8HT~S2imEE3#6v8P9zPD15Uk8Ak%)462RaJ|
zG%`3vGx){r?fP$TZ$Ez0bHDcFtOegce?FYH8MKY3OI&~5qK*^B%l9)V72UqIH5)Yi
zk(ikH=HA}fe6m&=8fuAha&l|p_s_ewHX1Z8_2=*3&c41dNTQjy_QQvQn%dfz*Valm
zHa3E`yoJZthPHaL@vwnMO-)RubaZgo*w{quDp?7M&`<YozAa;7X4VW_Bf-igqF`Y$
z<JHyGs|`*DJUcUUrb(vI<>mg%^Y8Bi9Ry*Ver^RM5d76mOhD&F2#AOr(ck~)(DeAa
z&ef||b4@-Ix+0)4c6Zssr%zp11}&AaD$yt`EY#}hurZB&pfu@}i>vF!-R1ey^kPpf
zaBR-Fwq|D6B&WG{wX@`Gs|x=8sZ{fyx5oNy$BE#JCl+~VZAv)EWMgB~(b=g8iW$S!
z)6@0ky{GFf^`CDCI<aC~?rkAK!G(vwCCZ)G{r3MBq@A4w>RwtFKLZ`H5m)(CR8(zh
z#)Sop)!*JI=HA*Oq8Bqm(l~8}hSH}aB@$68tlho6ponH+WBYV7eg4I*+2M2M&W-;N
z6~P&6G5Kx%(S~ZlTU%DP{kH9ozG0TmSFN06kfP~*y#G{U)q$Wl6BBjj*)|JX&c1y9
z`saDam7LN8m)2Tnm$N6_-8f^xg4{h9GZpr=8rfXd-MOLKu-GhK!I@2X$w|Mv{ZbQd
zr#!gT$$KWIxJ4rF>0&?M6S+nSy*$Y`-{gwq-e=?A{P3L9B7WfyC&TQ1F66fC(XX3-
zV}HGUU0vObvuSF+vs&8Q+wC-zK6yw*ig+#rbucb2a^>XXOFP~tTl49py1t?Y(+rKf
zOE~kQ(~MH5$iMc#bDRA_S()<O_P*p>X5uyWH3b1I25G09&&`&${L*07wCoLQ#{Hct
zGkw@TiE8&{UKZ1_*?s?@pnJ1rZsCO0Iku-}mwovA{r+dSwN+2dR9g=4u%3QoRBE+8
zWQ|ka<i;5v64HEr&TG89EpfMf0h@k$?5{s5#{1^{T~zV$QO><RlJfHMpgaBM+tu!<
z{JhK#oRTI*#q6t@2^zfuT^rlT%npij(4P89*R^=lgqp5Cket2Y-n_i@*MBTI<J}~U
zAJ&yOY>Mva>DiSXCw#L=wAtib!&HsabKUR6X`fe@-D}9a%*XO%;DdcCX<G$EQ+K9M
zx|_v*Kq&QK-FsyhSJ#O@47PbI*8F?uqV%@xz>(c=e?M$(Y<<mlztZcAr&Em8r5zz<
z3^B#i7G4&=`|+2%?3y?8Hc#1h);qbT^L^Xo$-=T$B?;f(-34`xcN8RETNk^UzaLb}
zElJ^PZw8%Hbf}dZbohyrv-8T}<$gkuyDNBDvo9Uq&snHlz1h?zanD)7jk9+?wDXaT
zkUzfFx;XxZZGW3DQ^bw}o~1UnGj~5vN_uAU`S*I`7xy;sJUu;ak<0t}J5u($1Uy(?
zA(neS(ECuvqr1IHb^m{||Jf&Z{DHq#(4WcjmgO%6lK)5ruL&{y^G7OhTTSF1wlznX
zT;|_)tlz)SN9JySkm{`ur{@di%r%@e;fKJjAN?uS6+AmCFKx_DEbE(hYU7dX)?ZHx
z8P&y3_WAzA>9+mpg0~u*ox2~!>J(3^2#>4%8k%-?*3lV;$uF+0mF|@`SF^E+kyis%
zJD!<*vQ{pwT%t1!li8qCt253%Q`y?Lak^~9)oh>SX5qc!?-?FEI5cxgZg}^cBX>CW
z3MyX7`K}v(PQrV+U;M|{SNwP~?{@AD<UFWTw_bbx+56|$ElobLy~rT>TI}+mRw0Hv
z^%L$2ZfthjTYhPmdwNyPJ@MP?5C7EE(cZT&)Ij3?^3M~Fw&?wwcEk0uW$MNaTl~b0
zOkX-|e|I<I`3>*$2ctEf-e#GzC*qGYgJ_Z3mxVfi9n(ARpLwI3-l(nfUv19+%IEbt
zyn62<&%U(s)yyzbS^53g?c1{-JV@XY(Qp7|D;}OBdD~w%tAgsllXuU~HvjqO^Lfw#
zJZEQ{ryp+Pt*EL}ySR04V{saP&Hjq^87sMOO4m(UARl*pd4^lg#&SNv=zs53EGi#P
zs+eO|U6{W|M%}XFpVYPpZn^nCCY?_?u=c_3KkI#@{Ni?YYVN(w9B=sQyfVsk_xjxv
zEu+)sa8*n?o^!x7-*FF*q~bH3*^l1FSk^Y2we}CFxWeI9uGec=IOCF7@pBoE=P^$u
zqRe%*fB!pkk$L~7z31FTjP3OpqHfxyYo3)BsbAT?K>pa(t5fU#{@R#$n60U)Nh5d}
zPxZGq7u$VY3c@&DCankEWK#J#&B@8>1Zcx9|8~QGM}NOn@Me3TmYSVu^(%c7(?tFq
z5>L-<wQ#$`)5cy`n|kIk&&F--`{H`eb{*Ww_4W0YowX0oEVVzplkv#>UGGibni=fz
zjGfOmWsm3|rT>p=el+V%F$`Sl%g`4ZfBA{>kF=NTj<X*);(XzL-|2Zv?k{d)<=#~F
zH48Lpx!A4OqW0Goy9}k3bKJySCvBWDL*mqQ{rNmiNm2|ka}G|p*}YBoj6~1-&xuQI
z7;YrJ|5IhU|Cbu;Bj=j}vkK-dQhvlUn`f%VY;)!#ZiTOYt<^49=8O|^ZsuD5_|>Ez
zHQ~$;dbk@lck<l)^r%Dj;0D*<8<U=X4^@`jtUSrY@S?@uM6W+va~^9wn5D_>YUwJy
zKXcCZ8y$ALS@-MLwUuzTf6gt_o;ZC`UjD?}(N|xD9&5RLJ9b6P2S(-D=gvLYQT6-J
z$!>P@qFqtJT(fvjCf#2ZzFuzQ#*M!wbasGRqn4ZF)2>eG<hi|f;->pPi?YxAh2;F>
z{kmzt7vtnc<=6EOPA6J#*gQ9}`17ULi_2%2e9vB0|Kx0M-d&6Trv=^g<$Bt09yprx
zwIlOx+r7SZGx%~8j^9w2mByL=Y=^0U{GmUpiY7+ylP+{d%sKAA=*soCGj1%loBMlv
zLSntg<rJ0KyRClgd(QG^zj*rFJpP{DX#!&MhyE;m{?Z~{A=>_x_u5U#1`_wv?`=zb
zViv!kRk<PUsrTjTYwK&KybYUvB7DWgMUvG@Sw;0B+uh}o?@c+`VsS{qzQ}5R`Tq&Y
zi=SWmv-zs$CxI;;Cl;^J-0opymUTq~bk)d(h0ehbu0?isZAvOC-@pF*B3bV#tY_ZM
zm~T*dbFG+8SV6hD{mC;CI$?9(*TwE;?LI5I`Q^*S^R5c9u6s9Ql4leTKmYQerCz(N
zqFhWsBW6=M%fGX4ePNbVR{lTS+i2FA9kb_Eq@DSGKF7qq(4h9kwHbH6PV4g6w4@kw
zu;Pj2CChH5NjwqYI)1V2j5XH{o~-bBt@hO5Z@a2n%eh5fzYD_N@hRLFz8CoP(Kbz%
zdA05N_MfgqD=DpP0}UNW8hp;%%wHgBKT&?2=<oB6f(O7$&!?*dCk6CaNS(<%y?B;$
z#7(<oq4j%ay5_foYK;yROSf{PYRlkPy?6NeyUMsHP7I9r|C1pwFfii%uDcs=XFvV9
zWU+bLfilgO^Y(K*p2U84-sa+#wDUJ-XL-WPXq(;lxIsm*pl6=mn|bbs)fjbREZTzR
z%{z0q*wFB6tKqU-cC~XePP8Ab{qXs7@|#-%O-%MjH)mA$MJ3&^N>)+)sJP{${K;=0
zyji~HpWni3J>$|E6XX6k)(4N~U7fS;@{}gE<}O{Om0jl|MJA@47Zh|lTXz4`<38sp
zCFT!;ZG^8zmmf<09keND?$Mst?>By*%)j%)vFTS{PM>*i@m`O~Pc?jQ_ZX|4`fb*@
zb2X^&WzCa2mYx~4?{0MacJ1_}qm|YPslVqe-SYNIV0nVz#1MDoZly_YBB3Saj3=AK
zzaKkYIH~#h`oh1bQVWg<G4BamVIX1jFpnkpm-YAJZL0SU+!TAvu>H)HipS<P-J*%l
z8hNg0P5t>zG2{KAnV~s?o~59qaq^Dnw{};xnuT}&xfwr7^RS4C+41msh}1of{fS~+
zF&wH1r?$^%e9^9_&z_sUUhV&z!kTM~HCONUPt0lTowM%Rl%_P<CNc0RpvvFU(zo+6
z#mao84`glE66u(Kr0qg?MM#CH`w>YqNv(^j`D}~M|38)X+)O7kcz(}0-ILQaAO5?r
zPOsCu)bzodNqh{m?|L1PSJb&Tdurd@eM?I|o(Xwb)6lCr?b4oPYq&Kh>t==qyBQnY
z;8OgXvHQ2_rlsD;kA~U{KMI-ka<=`0$=e^V_&3qqd{^n?Kl<(xdn*j&(wN2dWsVD)
z&u_cB>~7qh1CpCNV)n8<a-OkYrDeU8=t5Yw{iMRjpK&O{y*}of+~V6EX*?g#AO2Ig
ziz~_{NWJ;+!-5q&N+&O`2wZGY{EWxBoiEgoVFP&W+Pir-;+|VMGXAsJD(m`p&$)=4
zb4TXHA3t%{QsUCZ*ZUl@4ki^J&GC}3-{R()Xu9)p-X_pSo-*^LtGwFU+d)Uy9Xfn?
zYjo$5BGBZA$dQ)ccNgmY;;~<M%a`+ftjL4X9dhS>xmvHd_VIMR-PMd^HP;KDSvOwp
zxEag7z*XhzrK=Z|CKc_hG%9_mQWSgRZ2#rupHDYtNPUq!QoVlnN;4%Te-BXdS(K_5
z(O9-uYlVQ4lD{WtBIC>r<--vIH`=%5-hTA_`SAu%?g)!ozT-OQKV5n8I%l?>z<edU
znoIRk?Rq!=ef+U5)%Zj9_p^`Vuge{6oL`^t=F{W<nZ0W!ChBm^`FG^@&kH{n^9x_d
zW2(QoRll?2(4vkd2G{%6EQnB*uPa=8?nLKhN$V<(-njio8VvW_T~3zReQZm**wddy
zN!gFY*)Q#Rw&VAq&?+07xYXnmk~#f8InOy~8BI3ad#q67*0(FKg@xtL^l08UF1Tfu
z^S|&P_rcSV)9vhUmUJxvl@}geN%rTD@c(`ol+W?lI!t{sr@<`En9bJ3yHir`%XjK-
zcq^u)ym-UC|0UT@w>G;(Ry<h{b*6Be%hrUN+pngqikPohY<D=({=|L5?QSpLxVT0Y
zoSxdhsy{kCIIqU4bZOj7lkZ>i_H11h=n}c6|8B|kIjJwcxR&pkw;_ao{{2($EWI;t
z@7eOp6123UV~Ng1^>X9miO(mey)UY7mOTIe>*9CPFL!RJmgAkxvZ3IigjZ#iQTch-
zH`tbeiye>m$%$Y(uVklVCBYbz7Ij#t#^yiQ{QA)M-?A-sFFTlU!mBF%fY7lEZN3tl
z3m9vo%#wwlsY_hwQRF*OaDL0{HQaxu$lX7m>h!&6ip+*7CywhLt9mby_+_W;7p<Tv
zN=Nr4e82d%L_cT>WB0fFoQJNl_c-3$wKT!-Zcu>BB(c~BN=oN_q@LIJbxNL{=^-!Z
z^b=dbR#Us$AlJLjieK{BhtJxYcPp%x+pRu*Jb7R0pWn78e`cvY(Ax7&F@AgH^Lc^W
z4`)vce0Tca6XT<sHl-Us`ZMig=+rwFubw`9eyg~0R`Y4`YrKoUZ>@Usca>EiH|vbx
zYCUoL&x)7#@3=bucJ@=rKcCD0{Q0*$CoWSZ`#^uq)961F&+pukZo2%n$<hU$+eJhs
zE^!5==KD8gIuk!0bUgm4Ui~0w&G?e(dv}<FHlNtk_nx_F^f=_eWScbRb(P$YQcukI
z;BDrw`fIA8;E&USNqT>dEqr8pvi7>s;{G$s|0c@2{oKcUae2h9i>I<@3PnAVFnV71
z=<uQMn>KxazSU&)x3EJ+MU}-r9`M-Ado=IDxyn6}MjIG6KbJYa+;QXaeeb?mdS~8S
zu!GOB8=MVn)=6$=+K~7Aq&xeH+sBQ1c1Q{_&EV~@|GHpx$JvVVjKdcbCv(Vbm%VQ_
zuYA+XR^1=HeaVln@E5Yb*|?E2@#@$8aylCem2Wwon7O)K?dIYgB416Cr<Cc<;beA~
zXe=(Xt-5x^`9i5}skfsbXm(lTgk`wvva+u!&sT`)+$#RK&VAkHya@SgcTPAq{s=H@
zQL}m0VR2Z^S@qPjvkZ{-n~nOhW1$DGnkssveamv-K5q8xe{9>On{$NcOFMm>6`T3Z
zwKVD6+{fpcwIytR^LI(jk=U9eET*9L$KOdNeEtI_(eN0COBVO}RvgU^d-mc&osNA4
zqxY4{Wu-qRZJ2Rf^nj#Hc<1iyOGkSbM^4--e|vh(>2-zjn_s;6+J3J7;{!Lp@N6%~
z&3umC9zuWm!Z}@@>^BZt_gN=xpVhQ$YgZX>NC=e@HQ7-AwN%Pq@`!uf&J`G~JfDLz
z4)<`(Y54p7(VDw&8w;=V9D<hbvnm=5&9~a?w(qQ1+R`2M=DB3>Qd@?9yxI5azCO%$
z(tJI4+FIZJ3*Pka)eL^7ThOwx_OqgvN#zN~%-^YIdXcA=O|Wo6Y&(nQt-6q1UHi*y
zy=VM_?`z^$PI&t;ekx=G+f3*N)X(qMo+!(cy!toIs3Ozc=t+weoBFT%7w*TNK3*~H
z;hoyg^(Qu7EYAFQGfTEfY@*7^2L%=~TFoAxOc1TV$>+Rou8q(UTVs}fX6BvuyRB|{
zg-rRn?zf(Oc9@QsblPUiho>Gx_loL$KO-vs+-qTZ*TjQq$1{wW%GQ(_NWMQ}y80;7
zx-=iRAI0bM^lm)Zx3r<y@6@zY|L<SF1!*S+z}kr|?d`9d)h3++&2P@rUCg<8H|J+&
zd6S3l@~Zmu^A6?sMeGyo=<HK@bA;1l^0Rq6lah@monNgnGjF-^WBaCACu`EVL7O-q
zHBJ3JtqHslt$VB4?(F^GLEeetXHC;RqqgY&oIf4BQ8LKVRb}cWRz0LSg#YtE(<wqJ
zswEG7wz*pwFowN0RbZ~{p0+G#>7^7SrfZkKX#Hi3^DeuvW5Mm<y<Kk)i7jhQTb}%P
zyZnzGwpu-+pPy}h)uX<+-Q>t$!^^HNK}w*>W}%ctC6m?mNZGV<O3(S2d%rhrYxjox
z>vcnC`8_ad*xWhk?d}Qoi(d!tKlN{`@Xvp*70%t3|Fh$wEq`QEpFow-w|U#lK(jZX
zIg3w+_Bef<HPuk!&WQxG$!il>|D>0w|8wE^pI+Et$nI<Z(q-fBDur9Rfi7;5@1pyr
zo;acLZz6kgpK<n?K$&ej*YN$_U%zk9U8gCcMo&+CO@U0*sc7yhdmF`dZ&%BMboLGI
z8=r4a;PGSie4q8;z$W<*A9W7BX%1)9zO7ca;N*dqe=K$gEp1zW)alUOIHp^?iLcL1
z{eS*mt-E{R-+Y<+^V8m_&1MnuSkmkvEF9&)q~Yu6;NaM`ph?74r=hFPQB!Hr0wq_G
z4FN7X3p9d4SX=_DxD_TY+HxUV$n56bH}C%Z^H{NHk^PdE=cRqmU!;_!eK+3xc=Pvj
zcf5~s^>02{TNTk;WwHsj7T{)Ve7(oUi<c#9_jhlPo9%u}vf6gi7fIdKJdZZ7T%2KD
zA?LPmf80FjrAwCz^00kn6&Ad^S)<5w_uFl^B@B~T`fWaOfUek!-&eEqHM7)DPL=&G
z>*YR2Y%E}9DDQpY&Z*5JWxlZN|MpGmljgnM^YL1c)mdr(&o8X@guivItAE4yY0a#Y
zdmeq2sCu3oa^h3rpODXw6w`dRUVOKClZf8E{sZl`KkK}`-)RZ+sheH;y3g@T?526M
zm3dd)u3!8&{6Ht~^Z0q(zO&7GpU<oAW0$W9h&K`pJiX||)ips&y;RiIg=J-ZudWK6
zX;W#mGb>2#v%xj#-yfg7;LP!fI=6)JyV2*9);}|XwZhy*SiPU$F?^wOJpHtIK*n2@
zUpCfkJ%?^II2H1K@O|ysS{;`de{@<%=b3rcKac1aSH8b~eujCjRPgh)tPhUrPZwtJ
zdlPeZ+XT5Orz5XVGZxxj{e0GEz4N}HwaB1#O&x;DGYpfDO*J*10BYi#*e@(7c<{tT
z@D<E6Eee%*rOi|<EiI?l?0OR!!o(0J9vXem+xGCG@29uV-y5aSda;xDhuso8XZNqm
z<?{YCtaVW0PWN3<Jnv;mc3Ah!+x`r3V!iQa4xEg?xWLWymC5^4)wS;qS*P-x?wfT}
z(_n^E)PpzQKP*38%sBu2@zeVIefCy;&A7CLGyB>aMMJ~Dcs)>MbJ7>IY;jZSY0zTG
zUMbV00}YHRX=!m+=f8ZgPgSDQ!nFU(d+vSDs*OFia=C7DKDSLl(TrWaF80iCH&Lmp
zI&QCiEv?~CsoX0Q`CZp@Th^CZ8`{EOSx0X9$MDbgzOmtAPF-QIpe--His*)P90}tO
z=)XV1q*&Cf+G5%N;(gIu*&CAGzbDO7Ke1uQx!pFp1*P-NyTx=LJ)51M_V`%un%Ldb
zmd~$~S{-`60#p@yy!+!6tz+};Msm)L4UM1$C|k2a_x*e}d-aFOEFQl;`QEEI-&_B4
zxy|L}{Qg_kczjO3_Th_k@XHw!<}cB|_w4F|`K-eBN#XM<-kF47URPzazGidlrGue1
zKR&SZwg0GC_45CWZ%<wJ-M&1<FZAx7D=#9qt?^QNx#0hmeRYu^lr@)~y!3GHj6K_x
zCjYvB^n+fx!2Lx`vG;Fv2=1IZH8b@5?mx>*+1H&@sQaJ*I#(2O67n4V{eLDca_xTQ
zt)f()Fk|N{Hg!SE%YS};2HhxGSXj8{+pTO7tts8|_jxM{f9LLy31f;gw}`P!R;cpy
z{`{z`=Z|}`65F0U8;e5A2IIJ&>8IC=Ez1N=!Laem`5f<)t$f%jUa&Lf*Cx<Z!HKP>
zUVk-8IU#Vb?sx9I+HaA&%HOZcJC=Q=jaT}`&CTjztEYZ^d|X&ueEA}f@{W|-n^L*&
z?k)!{r{DAQ*=!XRl@%J-Ikw+E``}>n&nJ`pC5%!yVt1DvHQ*6-bJ6Q~7b&vS4YY`u
zm0Rq>?(+OwTQZ&3$L%fp*>I1^Ap4q*kdP4Qh;GnT!u+;hCNwPqC;a)l|Nr~#>F0MW
zY2$<m6Ao<2yc{wmCgc0NyIV3Zw>2`ee>%u6|KT9JynwLqV#tigpS{o}cxh*59PAKO
z&bYV7a{u3Nv$-a7|M&|!Dcx~x)K(84pOnA9zFt`y9o`DAs(wBf5*Ci!oW=`UxE!~)
zYR{ifr+?Jgi9XZXnsd`A>)M)={r3M(fR4w%zFuBYQBkX><InyVjN*cfn|ABYuXx01
zmV4{SiHXWrrAkFXCwTw;{XPBOo=VWVLbKdkD>RgfF0M!obYakl+M*F(_fz$7JOA^G
z?($2EpP!SlsR(G@dB}IJ)znp?s~;8Z^zicw3ji-A`<ZW=eXYlLwprks2*bR6Khy5j
z|F6BmSorkjo=Rg-mb<f~@XS2h*@nr-1SBO_9_l#Zx;!sK@Mo~JdES!f?RhLrjG%oN
zH8p#>ie^^Y$bm+z?^VCIW#f@J@VMW;@93iYf0aJnzbNYF@-GCkgjYCrR|)7gR?s5t
z?YGw+icz~?`(5_xs?f*Z@7JdvZsYy(;-WKT`RtQVMm){!a#bC^v&{<s{3sL>5}E;8
z4qD9l_V)IEcDafM_4zfEw%@Dj&b+(~bonr37W2ueiT3q%YX0-iOwkMmoeiIRYYS+V
zlGd4e4GA8jvNsW{!q<Zq*Zp`pJwD^giiwapxuU15f|v8XzP?`F&~PH?7~P~J9DnNe
za}`fq8@qd&Rq3k_SHt5sR(woq<B@dA$XLOptTcU&T!G-k`nTWj*DtUB{_f4q&FvQ#
zyHB1mW5uFB_Dh#8HO#o6u<yqs?lwNzQ_cK#D;9N}NL@P3Rq*F^uW33bx8JWj-N49P
z@t~2t;?2h67FAzXaDCRV{rB^^ii*mFsZ)<G^PMds;tE=%Rn%^lduzg+IX)E?7NM)d
zQeRzJsddU8bZ$K8)cOsHhi91OO1<CvUGCz=i&{OPgzfQ8P*U>b?EHNv=U5g?nCHoW
zV)OH}vs!23Km4rWkv8kGDt#5OzpnP$+GzK+QCmYGli7E)6%`w&YKMb%G)S3b9O%~H
z*Kz!DVB)!tm;LQ0&X{pxmT7jx-YV1ozwiIwa5D#D*9p^t#ElUzE-q%5Hp_Vc3XG_&
zS!d?k$G0+qrhX6e+xtw{iv`U}g0?hOe0-$U(-E`ri^-ObKhHtS2;^$N1cHisP(|S5
zbH+5B`LYQ(gPUXse7|3BufOky(!+-jtt`Ql^emg6o5<Nz7%X=0Klc1{?6mI!94s3m
zbR^meBfrg`FyX+Bjmaw`HZJnx0M#LaJ1>Eztj+ILG)tLgt&mVs68HG7)78-dE@C<$
z>nF(MfVLWwsrAqzkPA*|wq?#*c1l5MXXxIlub_nkdeghNY%w`>=+K6nIWkry9je~b
z0(KUqih8;{(Z3PGBzW@drcIkHs=jEr_sg*=n3{gQaz80KIaEP#;{Jts8G<LTR(*fx
z>*8`mep>jdkj_J`+^ZeHD`JXtv+wLMY;<_ACN(iJvE=nN-=l({;F*{`_wd67m7kwU
zv=wGWDhMp_oo%Mo2M!yKlDD^}e)#Yq_<!u)s?tS^7l%rK3$lrC)EDVadviLrqGHFc
z1>lufF6VY!h}7AVeVwoSXw&`o&)4mK7Zm6MT2SEP_xj38;e!bi*m$KL6zvQV5S+-p
z#BrzVlTDt!zDpD5TBTIlPEV=NuD|Qk>BQRE)$`~4_o**Ge)_XOe8+xgci9Y(BL#n&
zPnta0b4A2Ney!>IRepbFZElIs^KCzsUd<;Ew%tzHZq6!ZU8Qi-vk#P%riVVXnHX|H
zc;;)n@K2oP*5@SFxw|~cT3U@c`V7igE)ZwBfKwKBj>`4RJbFq+uA9wkn_kaSPf+Zr
z|1RgVe75s>{-f(Jb#%lCWkrfi)DF)n=GwNdGyh^mS*di3wLtgc24UX}eemjqrlmEJ
zYa9%|GR&~{tX*pVt|!7Nx#(Gp*`bX`!;gp+Hx?U6KfG~j%Hc;=>Gr9o61AlY6W_m-
zU!$!wNqU8Jl*_t^gIA?Ll&^AfnXMSn?f3Y>4t<L+k5)$*`Dommqr`ms;JaLw#q$+a
zjGC2&#RLoEjh)WK-}rl?WKsO<Zf&&_59?x2?s=HVwQX(F%dJWMn<jpHsL-dU>+*)1
zU!aLU+NbW1^nQh>U!TU9*J&tbYW`rpXu;i*xJ-59>i7On?91<7U-nX=)8pO$hwFqo
zPKav%mHy`UhtXajY%%ZA`;v}FO5HirM4s<@emilUg!`!j|K8M=8$M{bZ?9N7TRO+j
zz`VHPUfKaJH^U^WtBM<{)PCRp-=*W!k#kk<`r+@wX}8ZEyqI73$DIG$j%O|+yhr!f
z8fxyH|M11O#|%sAOvIj7Ucagu0!~VLs}C$Hd{ei9-+j`zL<_D7?`JBuOk3A^nQ^A)
z>rLUe>VI6R?P)v9W!b+!!l}s0|5M0O#t7~(rB~mID#F&Od~N^QGWA^Z%Tj^D^RYKB
znNQeOVmEVl-P04=`N?4~oDN2FP25^2b$ps-!QA}2cO7LF1W$gw!mFzkzNTZ}`lOjx
zr(fx{_N;!a`NzNK(XLe+UGB)tlVv-6xFJO0^?GxWBzC4HzE!PTCNt{q(`3zWf3R?=
zjHIEydgipFg5eDR_4$%^-{<=`SEk}Ex525TjsH&WlK%eMq0?jCvT3e@JClC$MR!)M
z(Z4q5|2eByj7$}iw+pizdY{qH%ds+g>g4_%5@rWp9bvLqcYc2D!q2it`lr75=ggri
z*0}YZut@D9v6<F3Hy&9F)TXEQ+=%X|{T&%2$ac<s-XT%32RAyNpJe*9=K}BN4d3^?
z?&^3_22QMD4?b}AY?!U_=h^HZm)5k1bspMY$d|eL`Q)#`wd-~-)pAv@@A)6WCUAho
zNNkPbeO^;dqo2u}g1#jtsV>Si<U4!QVWIV7O@kk@K9?*%T~c21dzR3*8tYG6DyP}x
zDinXT==&GeyQu$25}Q%sR%x#t+HBP~_=4JW4wb#z<^3I$n%1SyShiB}w9AuC9)5ny
zD&I!W4Bq+YXL!NUq(XU>7g~*bQ{<X{Ul+D0`86r^$dm(z{FvO|Ex2){Oy>3V&M$Wz
z3yvo7o>nQ)_{)ELvsmA;JD;@E@8xFbhVdDF|5Nz)^NtIp_IEe;%Z9G{{BH_>=>pd$
ztH1$Ww39!r-Re8vCbK^u&p%rz%fhfYEpttT;hg31;sx{Ts~dIM|8KC^@cKG;o%jEg
zSiy;R9KP#xow&Nzr+8g<rSd-K%$EDNUByiWCwBYjb}JQ?-u@)|ySjd<RY%7k_b<xe
zrMX0|$z5k8l^*D#w>r1I-=OaJyO{4g_uZ_Y@oK8+>{T5xe@)D{bi|m|?Vlgs&Zlbn
z^@r-s`)AgtO`H)i_jOXT{>gv0?%eKcX<>nuk2|FXZyg63q$IVP^;nl2%=u*gr{8~b
zZg1ngY*KvnxmMU33BBpw>=EFS`k(Il>)wCad~bX>z23s<aPisaTt8}`6k0}X%rP`H
zG_<+^Uh?|M|L5PjS$FStP3h4Ps{Y@Ucrv!!@J)n4OZ02Agq!EPPs}!7&$pwagJV~E
zpk9E1#FDRjzfay{?l5!O`zN~=a0EQs_Wa}VJ5G0ROBFsk!YNxK2&(^f3M|XKwDGd@
znj0kzMY(Fp8=u`{dnqlo-)GiiHz|{>w&Ra49ux;Doa*rJT(bX{q<`o47bNfQ-*7`_
z=fBU>vdo(H-iw%ZuarMBGSZ3>?3*X&|Ez!i*=PP==BM&A`b?kPstCHVAwuJFlFn>?
zi7i=|=Q$`W5OxGtROd{eJYBfQJe93bHZ7s}+0JeIB*LVhM)@aQownHK!bb0P(rO-H
zGylcA?YC@uxcI*6zQ;aJF+bOz)o<5rk$yk-)OYP|A5JXxf3ZXW<VwMYPGNP!2S>v@
zUt0*fr!;2o61z6R;M9HdUj2QMhm6mEyLwU))Pg9=oo!cZWm|23+*dYDKq}UUl`-d>
z$0Mtnb1nV*o*X^s93lW}P)y0bd}n9z%TFI3AF(x<`{9O>)(Ocsi9b8)KWMw{vz;iY
z?7mV#NvY`U1s*-71B+a{cYNQ+u6h5TuoBm?)e6racI@c;(Pu1avV?P@pkT?%OOdYN
zFhBVhbZb|v?yZ1rXI(yttX|dhhc!JU@R3#B!Qk^|$~JdgZ*R*D6%iCXdG!i!ZLhbN
z*P}kQvzn#fBNlnzTh$c95bmRy_v(Wrf9d;^Z*Fc5g*f$2=I3W;ujF5tA=DeMUcZw~
z>x8{P-~+W&+yA$G<1a~jc&JqioN&F~ZAw1QmvuecVc{<3oqGTDM4!Z;2;Rc=^p@YV
zch$ExmAniR1-Bej%R$@tw_n-U`=@iS#*_<wtUv2JcI^{e8okeXvGx0;x3{)JOgwe<
z?T3el?*#oeQ+i|_w4^Fr^oi1m_-AHb_bi)kOPMKJT1K)83Qml1^>>bU4hjz5xiybD
z=eqgQh_`WCCtO9OulG2Fv2A^An(%Xyyk_L4C0*d;Ui7tK_uVVI_Zsf2G1V-st`~h`
zAF!b)nkiE9PSA6+FumX4_MnE6Qqk35^Dm|cUVpu`p|H5`rg!kFliFG*>Q|gNJNHEJ
zMsAP?oZI=p5p^~Abduil@4qw4!nK2p7HL-ZFI%}=r0edjX4!qhccQj4p00~?xwoe>
zbjs<BdwVK#?(dUr`0}MB?d+_hzrMaU<Y~UPHd<Izbg5Ej#~;?iCceR`C+0RCRQSFn
z^Zcr{nNzZVD~heIlaBG)U;9_);mg%OUbg>iX>VVxFx@jHMMY9lvZbYE&Q=>^<H_^u
z|IM6bn!U<HNvYoP?uDIGGG~>%`68fgCc7YJ_DQa;=?8gx<JHba&SUGH_3&dw$b{2N
z^6&4v@wUv!*!cV}_56E#RMgZ~fg8iummQz?;^?3G^$(Ag$tQ2L1gQ<VY<}Z)@6&V7
zbWff-738Sb{QKSR_xz{VglT`jTP`owt=9<31lJe*|5`im<>vYYf0!?CY-?GmFnwa;
z#(-r<BeuzGDe#~DjfII(^r?_`*cu7Y+5g!yFTY$8yxb4mpnkM9{lsbK%KTG2kF3rd
zcy?xSQw(Rk(dyo!wdxzIP6x`pxWRf?t>$Iv>ubFmHe9x!qol0t>ERKO2+H>Bbr;{4
zIs6))nL{UNzwA!qet4{U`KP~Kx+Smw?cgfv+?sp)Tgd7>4-byH*5zT~_V;>SVRgTx
z1KsO|?iZ@{onX>BA$mee$nEFvWgnkz_kQ`;M_%-a(5d<M_Mxl8&f4*XY0Fxb1O&Rc
z=uMEDc$RgU|9rpm_s$0{ysYE2xI4B*S87ZC8wI;N4Dl1br&rx9J9mG#Noj!Am4-76
zhtq#s)XZCZ#k1go&bea`ug>JTD`E5c>)qQsJM7;*z4((+Zl`TaSF3K#-%PgnrSW%e
zUD<QfvHP`d#pVM%GiuDwAMT$SuxrlTb05ueW}KNdckddvIdi9(M@ib>Exz{LR4Jz?
zFE{<w6rX8-?dJz>v{deVZP4r~JWD`Q@?gWi^RHR<x$G)^eQc7dw?@zsj?`0A0>P!_
zdS1}B=F^A!g|8Q~t($R4Jae_0eC^xhfa?Zb(QCKtGVES>a{1vn7f<W$V%r=Q4(!lB
zFE7GacjwXa$WZHZpL6Q(JKFy=YMLOb^3y^^%vdig_R-^<nC+L({}Hu2CC>eHngqA@
z{<6=>Y2WT||MyBg;CN$wsG5D6)Rz1g51uKReR}l#j$>X{$R=Syj!h4hY|rHIcdM;?
z?BllY?u?F(f_ZPQex7ApZ=Q4Gz`MJ<^-u0Qbm)+Q@8p&imS^YYR%hyMFq1BQb>-m0
z3mq(=KE(8csKsUcleM##I|c7-5%{ED?bmwf_v)LQ|Ep$9DoShEBz?s7;J;?Ic1B~{
z6B8LCm=jv0M2@Mbs;{ZtdEoGLOa0q*Ivf$_Z&mwDag^JCa+`6KRP4eB=UNs8vbp!q
z+;Ki)f5;TaSDcfbBL(749DBS~re^<f{uz7zukFuzJ2zxSPJ65!Ll1|xA&bObtKW9p
z`kz(Kh(C95b?Ldi#)lV#WorL6`*rTt{r}UIA6Y%JK9_P~f|^T9`{k|K)3aaQxw|Qq
zyY|<Y#G9K^j~qW9zv*w-ihxFDc0PlwD@$jd)zH=L1zi{Ns9XQiLT7f~tIA4T-9;}!
zXOHZT+OjZbcIOk#u&?`XcB-E^SE3Pj!0mPtTmD-k!=$$%MQjl}?k@NzX?xE!zwFY^
zrm273eY=iC+}Nfyd3qSTBFECK7Nd=2Wv3r5%$2jVNVuSS(eZ_%{N1!)=k6`P>vVsb
zUs7Yo9<fW?t6%Ovw<JE9oplbIq}*hK<&2qiziQtc)X04PGWzY0TDG6ciBFyQ4{Gtr
zNSI^^y50EWqxR<0<|hdgs#xEc^ndsmYGtw_QGLVR^ZMy;ZdJ|`k_Zetb<20(t%{HB
zdkWn|Ro6WhxB0t2t)+eUj_>!Xi~m;cuK!;LTA<z|X$%@c0qtR3xNzalU&i{eyQcj6
z``ftYM?o8}wA*Ag-x5aQiL1Nnp2f9Jnlx$0V$Wi)trFfPC!_!Gju%qgkhpaEV~4G;
zZU!&ix+?Yg11tI67I$-Vj@+u)v8ngx{M|3jm75K-|4v!;>dC#y!S9y+t}OV&`RpyH
zQ1B_orOkEM<T#U__B)@rvi9wQHL543?3}4{Pgv#EFM)R_zI?tqjX~haY{|TNe-E-x
z3|r+cVRPev<1;b&#bOmV`W?4MzdaK_nRP~um1;BR?oHnflS{=C3Vxq0JK*&9weFuC
zU*7+oD*WipjXQN>DSymO%Z^TYw2W{5#}AA5&aka6TW6bo`SRtL*VoHGJv|+?z`gYK
zwU1ve`#=A+fFoeB8}GC8^Zoz-`&;>H<?<EL+d=z#FSPo&h=@Jizp?Uj+AH%6pH8*I
zC*!vrtlwkKe19QB_cs>BTW-6P!m^GY68Cg;I`_g<Pb!<$XVagX|Myloi^cJr65W=t
z`M|c`n+xAOW{uk;aYrD#K;`wH->YMeblvAS&F53px_o|>uid0eiMI`fnbfi`J-l#^
zA&y~rt5SpU{NCzExl3}sZPYp0Z~lDm&GQ*rKKGxhpX8mmH0Ms}MZdc3^CMr@|Nr@V
z>b9bc?*EI*9Or8<I^a7c|3Bwj`4?vnwQ?6<{rT$iPYKH+m8D)&laBYv>csAvGCjU-
z=86?7vfu5#y(Kd^Y+a1!>uYO4hpDWM+M4wG+S*+!K&xJ2Y^=-PEGXN{CbG(U!s|z~
z;wRO%StdT?l8@tgmm$-&^X74>19kICH=ld4()RPs^x0qEJgxip=O6oW(VCTv$8<gz
z6*cx7cK?0SZ@OuB$J*yEma`dW==?u;_ylX*F`Z9_b>$O;6rbPTbn^M?nue)jon_nB
zp8Iv1p<)wvRpF<4``2Fc7KLhDFEc8?e|eg!_q3R=&sSy6s`+x!9dwZ*=!~Vv&1oOc
zS-%h1l5sKnmvzq8sFzn(3VV8bhO7u^lrqh_u(!ILPsU;a_nZz5om2nM-`iWgbLqF-
zC;o3-7q*m5&)FZzXu$s~vhof~)0yarcWW}vbLPn<d_Txg$CDLuCNASh1jo^ov*#2d
z4$j(~=aZ<tkY(n6Lk~BGoO5s1y-BvzofIiInU}q}C071WvUwCI|JDoJlO;I)m}MLu
zJaIN>3p-G2al(*CYAfH;G8Pft%;#&D@tvsbIF@JJp<S|Tcg^+nb#<qim>8d_vj3L|
ziA{gUEOhGT#>ziB9{WwU_CXtRkC&BiE_-`x^|zS0_Vsq0oSdK$c+duUN#isDaq;E)
zA0?ffok4fBy}q{gaNhRdeKnPAJQ5C7RlB;BmBR1w-xZImc*r{S;i<A89}=^!tn!Zd
z$*vLg`r`k(_A>6h@5+Mg4mouOg&q3-`{#!Zj<(?w%+1+$>|Qr{+4bm+5jr8-9ILOT
zZ9L1rUDaY`=pC<a3+dNl>7O3P?btV?w=4Cnipa)`Z`8EyYi6H(x2Wvaw9Su8A~nQY
zv_2{xvEt<tH;MQZEY^DZ#?FmP=BkOlP09Q8;hG}%|JAP*&ZGrg;4%*{4$wHWpuldj
z|77M6%{+BQyOv+(5kH=N_St92u{~s4@XuueNtc=b?0ElV??g4phhdNQ)ZM#XRu<9I
zzxlGRBFpl#3U=2H{hRA3Wmb^z=T)-bsim{$>FdOMIxX@ts!DL5Wxjjzj2SalKeLJ5
zU8W0K*8l3tN}KO@ia`U?kB)Y~=Kf>8@Z-1J`O8CBht+&MDsJ=jO0b0t->Q(6O;@u*
z8(p4sCCr$$;#=3WS+l&7*6w2AHd}Dy?ca4@E_Vh67W|n}dF@8`x`5^;zZ>P}3e4-5
zdbBY9yX^e-Pw5U*4Jp2v>F=)R1so}BT>oF#P>`)+Q~TqcSH7+NJEgXpZDY;J{wX?<
zpStW`-QJ%6eCP9dpc9us`?vh&S|x7IT+mvde7KF5N5Y^X_x3i!loJBCw&ivwN`Obe
zPt9K)yI!%H$voiIxkRmFrfR82UHWw851xA)z47+j@H@Y)YknAjMzMpJ`z@{h{_e}0
zo5IsiFJ&$l{PW>3|Cjgo<r~7|Yp4GD`g*m7Qqj#9^NwoY`SsOPWbNN6T!p+^MaAlU
z^7eH{?;f8vapJ)T2b(p+*U3aiMuJYQcs{S%&+(%YXzIfL&j)7P>TgdTw#$Q#bz2j?
z-48PNWO-cl3hT@7za`D{WI$!k-(O!r*Ty|O+#VnLJv1)1{oaFl)$bC&zPf6Xd8x&%
zSE}%S?e~qz$N5CvK?Uupkk4uC>$mbfT{>;yo=a9e;Zg0ER=4#A{a@5>Vspyt{j`NW
z`|o>d^6c8q{rv4Q@qG(_u&__w)mm76!}afd<Gr_bR_xlcPwe?D!(=w|yCuS)o&A@W
z`=@_;ax(nS-YGj<@8&&!*e;*;_t)1c8iC12e~81@0yMR>uvC40b#$V#dx#9^%p&d6
zlSJdL{o(;t>|1OmpP&49%Gw7@;-3cy{ZCA=DbzVp*xh)TQ_km`?3c~oD?Y4W95ZEV
znCj-}TYd7}o8AXB$ch)Hi7KD+ic!hAD}UwSInZp5kgzc5#F_Q`erXjyJF~Fp=_#!<
zsRD9x=RTjeKmY3L>XT>AEXldK33P~KPY(|?L?+HMo?86MaB`^hW{HhD+#7;cw>-V#
zao13gP5pgbcc+ntXyG}xH*8Ct9)6JQcdOrd;IOy%dAUEkJX##jtQ5%lQ1UrI%Pn>O
zrrq`D7rp1v|9vjPUhU9=kSmsVw^^>|?>EpW3R_^>(QW(JjxlS_oLm0RcR!xi-+y6C
zX0TPsivx%G?axe7_0G7kppmPXwWGHebY2JOs54N4xU|&!^EvDH0vs$MGJ-$-Gitbh
zhHolzTc<Zg_+D2?q0!2HtvvmnGcG6GHsWtAQar*WSgE@t|NEyWdvqRYw_Py4cx>L^
zfISVYHjZA$ZoZuRLgCAV{ygK_7V%9Vos`)l-J4v*Hx!jjZjY>!F7#WlX>y&P{JbW`
zBTt(n<#+zHxx1ixb%2(o@1>0s?q?c3{>*bIyY<qB4O6#lQJ6SU>_Eyv9dr3#xjlJO
zhtAD)eB>4BpSu{+V*Gb`<==HBnwpwl-rt|Uv-tU^FPHs4KWvv55)xV@{Ew^TFt52m
z>M0S>@jUzM{#HEhH3!WnmEW)3-leQ0J~c}0sr)Iq%#~@j?6uLY9}^<AGplQQB3>Ql
zUy}FH=wCX2HP<c8Y)+%pJhkrcES<aTTEbX!zxePxY|oVBnKSd?^y;RB_WS0)BCAz8
zM1>717IjWOv&84uxie2z$2xLN+qN-v<@B?RH@+3C>HnQ`bo=ei%WF?EW!-zVq{?pc
zLHV>xzn#rj-xZ76R`BWW`N)dK#nF3gEbN*^c=9?n1}xt*x8n@Mdi@U{E`DPuyZv(I
z-)VV!%%aml2X<^eZ|8lqOSH4IQ&Cg1^SFF{jN?Zw(C%E&@i!M1I!hWPFqmdvd-8mK
zJ!HF~o;q`W+Jv>ywLP<+Ejykgzngz1i$Ub9C+RPbF;uSK{OG&$+1bt=r@m~?n%qC7
zCM<^~qDrS)d-?N8W|otTKWr7uTG(<mCG}iMP4T{S4-Rt5@lC$VQ&%+aWyto*S9?`N
zwm-W6SY0*!<R#g<4bdywIA^T!<YQ2K#~{#fb^mY0OXrrXOBA{J?(yX02?EoNx864`
z`#t^o>!(|<$1N^-c?ooDx2Sg5heO=@pxNTff3z-dNOYd27wa`$FLuZKeZN6d<tI;`
zgp`>}`MVM})s>3PSN$1T_U4V@UH@~EZ?`n*n;w|K8t10>!7y#({5^?m2>~x`_z&MW
z_e;F_HTRr--wo7GIsLP*wO5-dCmg+Is*<I5nd~XPS^J9bT|Qa6<o)N*&zSgsH;ZLm
z=YRb2QrN4h+ln-9)}JkLWN&s<^PT1L^3u|uFPG1sVV*CSzwf6SE4SDc%{tS1Yu@Iv
zx3@Nyy^R7LF*4Vx^u?v6+|}RS1Qvp3hV*(TuWgKc?$en5t9V-VCgwYB&5noG@ju)s
zy|$%2>cwrto+2^f4FUU$%BoJkKf~AN`euS4!|clke9SG`LiSxf8M~3~<5Q=Km*)iL
zSpME<#gnv1N5Eq5e&6=ygzZ7LTGf$fXXi{1OZeBh`Tl144N;2wn&uu!Xs{FZoA9Jj
z`qT~0Z1c!zHMu2@=VuzXcZq6$0_{+){FGw%??>{fX}Z7V-(5VtD)X_VX_m-hH9eKL
z{PpFVe;j_Zzkl8?p7--}R0~S9=l50h9lUJ*?_s`sn6KKtOWJQf%*y#!XXRY|CE?ey
zE8YJ*zv=w%Nj&Ukn0LqGnD37!k^FD{ORwmD1{r$Tt+py_=Jyw2cCx08Dc>z8r_8<K
zF<&o#^Lu-nyE1(2{x%<3JUu;K+}(p0I<fxx^3pjgYn5ISsN4E~?vF0TEgETzJhQfz
z%#+hQp0}{vTb+;RUCzrTU9!*qJWy3Vy&&psz_Ahz<(%&JiKq9Rxh=ZD<!OLgDev;Q
zHLOi+zEW11K^K@R-+oz{@!=bje%$h@V!AOlvB!^{nJE|4aQLN~>*_Y6B@3I|SKVT4
zZrk3zA!TCw{BN^OvW1T=n3iaAHrwc(xBl9E{)3H<i`n?z`EV?35$#g5(3q1Nu+~wd
zR@zE4nSayC?1a-?AAI%dxvGwKiC$S5+<tFwwQ<1%2T=Fp>#M8!2j|I#<X&$3Ag}iE
z-3Pfdw~HRF+5f9tcggPy=9Tpu=U)4+_UOmtwGoTE12u{jrTyll^IxC3Ec?5bi#3l|
z$eNbNzvT;xKc*SkZpvh}zN6N<+I;b{WX|McJrCR5&T2e5k;iyGc$w9=?`i_2-mzU4
zJT}$g1|RPl^3>HE{wrpF-ZisEI&f0N>-zYgHkF?YGA<~Dt%*3;$jlD9JNj@t|MAyf
zUp1*IiTe~g$3Oh^Y<Hj6-TFg<g?B@L{_A#;cwp{%(q``&(JX<ZH&6e2vsLruv-^71
z#k*POZg_j6b<QQWrHc~o8&2*CXqGm(Ij!>P)%=g%M?G);l)iZJ;{Ph?=xsR*^Y8Bi
zT{sO|)%N@Se)-i`t2*sFXZuIpJLMf+b7S`2Lz}{9J#9I}z}>!e`SB_78&tnNe*XQP
zt?it^17|N@VOaO#L@;B~=P7w#w%eTC@_U{7S>B23j2EXiFodnK{QYUqmI>^qpP&6V
z`MU2caV2Gc{%Il~*fv<L*>U-fg>r1f`5S&wv!vU#h1x~`u!n!T@HYSR5&!o=s+ZW*
z%`Rzfl-;&``G>saUN`kEH%{~0I^C$|?BY3ALC?&rLS+sGd#ek)x2lY8oh~^yd(-oY
z`IV2J{h9u!=GLA{V`g?f6-~`f@%WmF>~a+h-LAQ8N}sNmOr7vQ#Os&5^Fr&-@0%Z;
zydHOAlkxWIL`Ok|>R9uHw78j$ACqou$rS$h@nguUkj~QA*Dh{MK7Lw2gt18?_1%M&
z>bEDDuGY7IcQ!BIxL+sz)jhF2=0Eh#7*~0G*ZJQuE%%o}?kx|YM~`Og`sMIV-YF(0
z&S0W?)IFcGuQ)^A{pMt6X8)NKqW?&%BsJJ?`umyXo;mYA^VzcPJ2_$A_JEbo+27dh
zXq;#p6==9n$+qf_^VeBdSAKh>bLT+kU2lmik1LP$z3_K>>Q}S<FNeKf&D^O6r<PQG
zDy_Tsy77a>F55C;Z}&artGa)Fe{ZM${(!ZG_ND5-^*YY4)RwJTb>OY&=h$hV7v|jD
z1lr5&Gsj}$_4xX^$K|Sh#5F-v@jIuzy}y6{ks~hi>V9Q9IXmmWuKp-%wDQ^-(X~9>
zwf50W2YzKvUU6@!VC#oDeIMP!irlA9e`#K5(r;tb!1~_ykT$>Fp;=qrB#6a4e)6*a
z`o^4N9$sgp4xPxqyfRk1I&5Y!d!K2)&ck{8yemJfd}nj@cjX`MxTllX8g9vp6+e8d
z<=y(4W2?@`A2Y9I+Q0kmnH`@47u~iw^Vr|UPrR7-JkQU@mTS2Of_l&2d+EH^>1O%)
zu-tpHP2Dp-9oBmwVH|(sH1qcVf4{V|hn-J7J~vnM@s2&kb?RzrMg<QX%HH2wtKSJ)
zIdfvF5_9xE>CC@5yf*KBlh5Ast@(KF>y{N0#P`4dQvBlG-`fX*-d+iQb3Js|+^lD2
z*Oo7N^nK^fjp^B57CeVe-#Nu>-oD{&gj!$6htKZ}bARhI<V61zUcK#>gxMQjsaScx
zeK%?%yp9K+NqwLb*OuiL_u<W>-w6%L){`Q(lyIB#*7pkDU7d6`bJ>?_zwJ@Cxzvre
zWNK`lH~;XmC9HpHp4Wzy{EcP!^67KCaYlW)>KZFamnT|_Bx78jOgj0+;i+2eO0zY_
zvQm1B-ga0XVQ@eH*`Vv~m8&Pcb|lXE{%Pv=8N4<B?C;J${{HBOt54Tizqcv9|M=kB
z>XQsj`{!>y-yq);WRhEG9_q)ofAMdRo7MZGrDd+Xy~1An&EeJ^cKtIE-@Y>%Ea(39
z)O`KrP6efK(XBeCToW!V+&DQrs_j$I>43OQ*;CrO&Xp<$-Tu{ASe0eIc=GmKqDlXc
z_xEzIO8L2&Z+*QXHt44D!5g^=Pgc7<{~arFP3=zfZV86E`!D0yuVY%OAsch_Pw36>
znzP)?KJ5*h{p#rF2L0aqm;LW0R80|`U%g}ZwXYV%e9O;fGHiRJc`%pn(-E=bU-ll`
z@bdMCN2#CRT$kMWL_=|&a713glRr`4ie(Dio-b$Ge|hEc-%ED<opa+Wv+caO%!xPo
zkN#ke>yxvw{or45^VgsEY3$RA`h%9Asf;qqH8bd+zpMV!fnTj}R@|Rvr02Z688l+C
z_1Kf<^|e11traPp;WPJb;=^+ap{uPb_pW{X<?`BkZI4fa9Gj*8ln0($aIe>F4|}s!
z&UCK_0-KNXCg1qf+EbG#lifG#&ZU2>v%Y5eXSN^I|9o%Z{SB%tlj4(ZTv__#Q)u#?
zD~&zx%d8I{>s{>LKeOjY4u|<Jo=@R9ZO`ZWeUUYtCT?Z#bmfJ9#_W3i$;)ea4^FN5
z^V6$t=4U?r+>eHz))wZRjc(uQfA{DX$M2H`raSC5E&P(1Vw85}Hj7#PY4g*QXW0I;
z-NqLF>0^(`t$K?YlTN!|Tv4T(WitD)bAHJ}kB=4a%&%WqmlnG3UhT8V0f8oa_RD^~
zeb4dCPWhi1M!{|UiA#4*-5xjJ;ZvZweg6!v-`8F>iA`L+!PCR}dCbjq^A;BK{3<I8
ze6(PC-sa7>gQD(>9-GVh;IgJ<&0OQ&IM`_OF%uoB?1Mi1lO}Pxt(aOUJEz9-=-H&z
zj&c2-rzHEgwYf`7*>Lcf@w}?C)+TG=E;mV;vz3Q_E(r=`TdD5DH+PPo!Kq`rb7g8w
zkDd(=KX|3?|NVu<VtO$r7C1Kl`1N}I`a_%Vdq>R@cTag@nXJ!Nuzcr1(azUQlfw_Z
zK303~f@S?iqrC0nw`cz5-BI{^?hcv%cCRY$W<EG0*Z17Zp!{8on(wS5Z*FcbT_`$n
z|B`C4r;n^GwOj8x#n^ERB!s(ttN)(+QbnxeM7Cn>?&<2!f4;n{*Hd$xS)w(!Vp^)f
z3v-U70{?9B+_^V{x&QqrWfLxQye_s-WtR_M@=}K|G5uQk1=-H*tTAri>ef1hHE_zO
zZ`!+H?(+PtkH1Sk>2I0JZCsh1ZBWgZ@qMc~`<JUH9aEnCY<sco(XaO6@Mu{*C0X%3
zM(c{hx9!rinDOuB^1{r0xq>^bR!T?dWd)RoY%$)FEAhURZ}A5G1KZ=Pf3U8IzU>(4
zJwdbBIC77=$Ca;&TeLEk#;EAbegAOh&bUj(ZNeKFtgls86x6U;8*0QJXcEj)G`!~Y
zsbB6_-Mhv;W?E^#B5OgzaX-GW^q>DUA#;0C{9OULoCgZU-Ti^wHgAqExV2XC$?*ko
zncBa%#aI_Rbc2@Eotk=>>t1tRn7r9WM$MCUcK1@5*rs+zeEhxe*;z%G)L)TXPafY@
z>-deGC#sAu-9d8l$<_X5-m})+mtyd-i4+LF&^1$X!F6}ZH@a^VLT0}So3FF!`4lzn
z6SGPzHeNsf%Va}aga(tv=bJm!@41~ylRrJ_k4)#Lplf-XzDUhvj&gB%k`!RRB_>7f
zvUr7t%<Dx#R|Dj%ryY#mKigvM18-h~w1X$51$c#$6R#|n+>-lXK~|)?^WxeszpSiJ
z+&ur{6E};`hUk~q?L=Ou@!P7U8*ZB|Y;C#sUizOWmU9G;h%qtxO#A=f;p92h%DoZX
zXSFOhfQAUK2MP%a?hMF^oYH-a-@<A0dt;fescP+wS-f*h`VDRt7<k<Hs%TTTc<P4$
z?po`%i4*So^+&JinK|*~hplxvhGAS5yfRO=NIc7#%pjtjy_iAmbl7jr;EQe=3H+vh
ztp4feUd{<PaHvnWt}pSzic*cO$_p|)t@BMw1Kw6?w@<ptT3cGQ`N!dpJyPa<g^Sw^
z>uwqC`F~zlF4A=753ltb4p%=s|3fz7kKTFTyDJVWg3eR;-{biEnBS>ejTzSKO|}S6
zlT5f!9=9hV<ECBDi?xwUQY1rG%buFSm)Mf~@x!-w3_rUqE0%7Yye)Ojn|m{k-sP2T
zaQqwXv?B1LU!=7K*R2lgN9wZ8X>Si#?wKQM&(=H3S~JJ+VAfLq-2rj$W!!G)$*))a
z`7>uG|NgsP&%Zt7$xBFoxJRe!mhZHc+`qJ*KDt(RcH@52Y2`uMpAu%f$oxOAYqd_>
z_xiigj&nQ1znxen`ljH2Xu+3HpN|x}Og**!u%gm+ky7W=t_hzmUY}W0^6t_7%`eRU
z^%Ultr3b1UV%YELb*`ZF?(z92<ou2&Z_*Vmx+UZvIqQk_*Z0NhJ2aC{*R9pQ(%>_B
zSMOm3frKa1<@#>>1QqR?5w804#V*b6MGq%bo_S)hp(d)~?yIJKRguak!_T@M;&uIJ
zqrrAZ#+Ua&*T2UF)|r`kGV3loUvCj#>v=wpk*VST1Fx$4*3;DMRylRfIlbuji@<`X
zmbNkHPD(}`%VHA~+_`N<YM|cADPJx-eq7>w+>h<({)wf(w!S;_=A+2lumfE4&Rll<
zc;;*Qa~9WzyT?tIWaMp2|LMLZ=W=`IWw%KJ;>WLCWiRrZwzlWZAA@A|zNh!RbU|}Q
zi}y#K6kl8=7`{+@_8qypIX|qlo3&qldGh+tp6?3rJg-ju{yhCd-tJXCw@PFhm{&jF
z^!VJiDTNs@2d#^GxY2Bt+ku6T*<z*I4gO@`UzwnBUF`D-uXis#8OiJ1beQVVEH-t~
z&bJ5McYJ&<`Q(40k?>{d4a=VIeY10+eEGNY?)L?+a-7`Hc`v?&m)$gad*jN@Ki|x?
zn{w`sN4Mm{J$s({ox0ljZ1og|2Tz~#r(gIvF>h8Qc;)CiF?GIQFTZb$Zxy)ndwb*3
zTPv5pSiic&`s$gFYy3Tz-rDG#@aqGE>*@O}ZT%kFGX$4EFaGxYj_$+nGX&=Kx0(NO
zdENEV=I^oPTk>b{2VGAqcf3Ev|0rn7_(X5jeRJmaU3IqDV>Nl9v}yR}Kheh{wtvm|
z#`^Y(=vl8F6Ys{>1fRZlrS7X-6;E++)p@O7dwv_~JvcHyZ@t3Y<!6>YnPOh~Yvr~x
zwU1)mrpKM=`<1KtI7oa8sD&6~Bwfz-@w%nuo8A+xvFn1)M?Fis`CH$}e2?DJ=EpPk
zzhW@IrqxiHqJCd`Yvq;-Ilm=32@}@}&oAHAy=NQW5x4a7{BO5B&$*hJu(_S_0e^+%
zr`uk3(ZbH#-Q$*JEv>oocHWUgr;Ic1uk8jcuuTknvR_OtG2jrJbpOm5GaZAN6L=ZQ
znuGjqOv{^Bes^*VV(vD*cV>jvDTS{sCl~5mwY<(Llk{!3vc#T4+}gh{S5^il9X@1X
zP|c^U8#;5t3QOD5sfD*|w@?1~<MEQ3@7&6E(uaEWnwhKXj-L57W7^mI_FH!z6jm=T
zd3RkP=H&7D>r)Qce!P?aZDH)$RbOQ7`+D`aC2q0g_&qB{u70iTdYj0e{`*&gFDMn|
z8u?tlqP!@4!=9h(76*NwU*Hn+_y6Km#j7W?cS--5l^39Q#Q*xs$S-f-EOJQ`P|~$%
zaGq^zvsuFdM@6|sH%vkMKuf>z&A1EaE^oi}@i)&W_d};zXYSc*T3(W^U$@di)4E>S
z%Ia7A-nxf}T0sX<t>6Fem(<T=6`*Y@9X6IFepc~cDuukCKfDuqr|owB;mMzL*XwJY
z(7wBBuCT}59e>XSJ3p<seCdOgQfCQE!SbJlu2w~%OF?6BOFSpfu&v&vw?*mGbd!?4
zm{s>xuju6~Cok{6o}RYpa?-*pEC28RU77WV^Vu7Iq2Nny69c$AH;PTF|8n7bG5?dT
zfrgjQzj~}DkaBFfdPU8&cAwo-f4^$`kig77GrFcH>OrDC+toZ?$HQvzt7Tl=<{2J)
z)H%1tpxV+?d)mg#=W=ELc1_U~o*uCOVQ+oS%B!VSKCTA@B|XdRwmjRlKm#<w60R_r
zcQ=#EH2&0@Z1(%LPZOmRHXnF5NArhKjb(aDT0@+C>W1xGngeq;{a2ssdQPMv((&4o
z!<~EUt}SmgJ>n#IVCMN_wja%c%zj5qdvaaCYLc!^-}khh#UJK9tuaVsy01F-!Mo>i
ziw=H$xT?3~1nVW+?xK@5miL{*LfLh-Zd5FBmQWPl<bGq@+}~Fk?t1P2wlKoH`h}ro
z_r|+(Zk&8qYxnv2*@ry}+XOfgHvM7epZ|pCRPK?e4(`_8nX7g>>@(W>;?w>pt(J}x
z`<F~}-RZ*fYkTRY%9ZWy%PJYU4bQzf`FD+FqM>&Bt!FDXnQ<j-%Go;KNHn*F>07|#
zD`j&mm*+V#a$9fvVcF)m_}aoNeGQ9L-|o43#j&I5(bR);4=q1jb#YU1`mesK`TWOy
z6ITEJUpaeAIs5<a^#{_U|D}AGeZWx6Pujn~I!?hdJ>Gqr&ZE|Ib6?)s$-da`&xFFq
zX_J>NR#qx<ys}ASde8l=r!%)LeDk5f{OtLUlN6_HXi`4txVlGYmDt9Hs}I(l(79xo
zam{Xy)$!&Zh8b7xoSamv7P0nArueCk0osbYYk2%V6)W4xdGahQ{#~GOVoj#xn!ugw
z85qU(mGQ4{+k2*8IPdW0pV_bOeCA1>RlZ=$Yx%gUOJy7EOzlOx+M3TgPF>d@Wb}SM
z&#si-MInyd+}qzRv2IA$*1vPP`H1KJk4{b&tL<uM>Ty4+d>|;R2wK{=vuVa2qn|90
z>9A?WGUu99-mf&ZyItkxeesk|*Nsijo61_HCS(R}FO*<m>iKi&aN+HjI~O2TfyZJl
z6zxpikX|dM`kv!3>yA@q&u%~5Z=n3tqLlk-y92|Tus!cg3fzj0ZC2gUFQ@w8l=G6q
zTuVN??hliB^~1L;VvE?4-1F`nb0od*W-eh{<KmvyzfeN9`q#_C^9OTl^4WIU9%9|G
z;#L`h{u<d+dh1X2@-cHvC^Hd_xfpY!Scmb_r8^ce>HE|7<Xze$F!QtAzb<oK-t|{q
zmv}3icb%B(fA>O>=T76Z!HysGQ&ZWGPF`Q={4FFe>_f^tzOM9j`=cE{atXUVoc&g1
z|4#v1_wz~f#kuZ(RGF(C^`q=g@sZFq9;eUE^;{7*xy4I&#?F@<OVo_FJhknaY1aCD
zby(V?Pt0#3|8V{luaYoM5eTlo<$7uQ`wL2X^E6-gGslVb*Um6&R{wBt!Y!3Ozs^P`
z=QCMw-#sz6t@U}dn$gGKXITm^Bn9Mq-Qp?8Pt7>Z;#Ozb&dD4t82>TSY{{DRl;>I@
zudJBOGiwarC_d7%@!NF!$&q)eJg+|5B)fl6Us$BR@AkaE%;}%!l)o_i;M?|PPpg-$
zeM0<!v&-iSzp%{FdF=L0Nl&luRBif0-ZzggC)P?@mFdW?4o~_pUAlA@X#d8EqXKf-
zi?di{tnw}_-Pm?)r&Ywt|7UX8w(Fg5D*tt9K^FHi{rml=1%2m=`D*q$em)+T`uNz<
zFZq)WH82)i%=jc6TyFBbzW>XUnRTX>pKtey?AXfu_f*S;^cSn|lzSUKk}_YoE%D?#
z@sByrd$&)!w9(zainogY{gjV?>fYH-+$Qv8zIC59FZbd3JKD8k-8LM2dmyp&^Go6H
zlddM4F<aR?oKK&3$b{c9#OCbmtA}msm^zk3zJ500?;bnjcQsR9nDKx9A>4oYXv|?_
z=VuRXH?{mves9OTc*Qk?zW?UCPpaKixx%7($(qrrzhp^AiGYja#AO|<EPAdlnWirb
z5)pWLiRESJsxSAP-^+R`OxQHRciSC)P7aQ3-kz0p^G|G>`T5+LPj4)Z)9?MLvDUTT
zzWj52RZ_fpkL2z36PG>n*!S|@yf)QG>YY*_U%rlsw`^OuZPS+jOKk7%o}TLW{$5Lj
z>D9HMX275Oi=5wnnN{1quK3-DpQj?eDyYre^TPk|zT%rZ9$2p`X#1BNmwavE?22Vx
z-dWoxP2u!mU$U^Q`<#W}zf))5<lfx1#)NzS|4C8I+aBzZXcM(AbXa5iZwl*TOJ2#(
zDm?eAo~7SASDJFmeyZHveaz0!j?asJ{JAKytt}`0*@QBSGQUMPS0COfQaLTY?5NP3
z@2U(u)3)51z06wAOo~U2;l|^n$MQWpPptQ2-+oR$<?y%3BEPCEly6N}d3HyT!SDUO
zL-T^w+W7vq&9aPNbZoY0!S0>2`EU0xzqg<=<lK`74_?KenEHy3-7M!>+AZ@RX(j%f
zUe;Y!nsnmj9M_3YY8qzy%(1JT)h(_+$u#@g^IdOdY}TGxyHR1SR=RTRy;D8+40`3J
z-cP=G&)ej*bbVphUA`FypFC|msH+_l<8^b{>Ys&&J*K5}EO@9Rw?qDO^11iLF=-_Q
zxo__4B%WBI7190e_`eh0eP?9*x87o(;+MTJW3GSX!@R{|(^uwLh4MC7n^hkQe>i`>
z{fnX-2Pe$7;*}2g6L)}n|FO=)W<1MIU0EOd`(Mg~h0oU|9X`<Y@2%zJ{mPSHJ!fCk
z(N{K6wc5ZmE8V*yRV4gy@(1tVHSZ_9E3mk+A(2^a@;`2YiTjt$bNzXnC(g|-|IOJC
z%kJ^yKRSBAE$-w0yjN#GsLc!cQuQp`;rI4hiQl_^+gF936iPd`^=o2Mc7D~Af3^ZY
zSR^EW%xmq;)#X{A{O@m{(MgY+n^H3`FXMfBYU*P5ez}(RcKhwA)oG^=F5~-qLtM`O
zOqR#%e(?$F-m`v)TYMLoSx|eqGrs=V;>gfBx6_i(M9;dD$=edQgJ)&ZM%9>;Le<~-
zww)-*ad?xdE~jRE@wmD1$$&YtQa1|~W?1iMDm0Ux_wM7~CN+QNm&V6FI{t6{em>P_
zs-CFCrwQt>y;F=Oetde)>%wI2|0c2gQ1+XR4{U^6c{yzB_3Zx4x)ji|dCMz<&eLJm
z7sbslExcW%6S?V8m+ZL<^Z64FAL~qwH!O;_Fu&Hn`1^W&<B#oTOIW}8%xluzVUb-`
zdu-NZ^THeJ43|6W_Z_jhUz_yu($Xn9k&~3&`wR*mIMi+gtzWWn-N&|RdGp-c$q{7-
zUB&8^WBQ-ApEZ+N+4JOE?KSNOkD^x0UA_JPt9|F$r1mE%U5Q+D0JavuR$#*IX8*2T
z8%vJLmfdOjbJS+#bM76|k~c1N&zQXN&Cj!)^=ai+{|^hy5}Dznw(rj;?>;$Oub3Da
zM@L7{VX?c*=O1>N^3Lu_@!RAF^;RGLK6(Fhi-)e*^`HE2wU2!-_$9%!rG7vAeg3x|
z-MLpI?rdaw$$j-mVU0v;_m<SfZ&hWi+df|1)SaUD_S}o_t3G{vb66(QVp^h~_3LG4
z9wn_hbfxU=)3-*gl7CKnezN#}Vbj};WoKh~&fPsTZ(G;ym6wy^uebBrwi*2Vt*?2#
zNJN+I%hHzrr?<#mm%QEoe_>97!e8;X3_3F!V%D1}m%qu-Vq-J<I>&lx%=XCL-=xl_
z@hCh$7kEWS_=tAt^w0myb5Bg#to7LHdV1dfat*<VrtRNZFMZ}UUTpoZBYK`r&V1{6
z3;*7}R2I#!-t_gc<4g4}tPE7`nzrKHTl3vPwwFJ@4860j)*7^r^uhwi9x2nV`1-$7
z4;*l)UEg^kzWnNr7`?f%b32y$O>bQAE!yCBTE)ZHLK7~}-Cir(WA#EsVL$s}t;&L$
z*RpLgWs^)VG^@E;$II6^O(<r6bm0DV21C6=kK}G}?&Wz>m0%^?ptFxn&;9S$b(tTT
zUdL8g_BMWq{uu5XzUqPN>L!DlKe2zjc09HcIo@YsP%(4Hr9)>fMTR*Q&8ob9-tYa{
zk9Wh)N>v3b&+Tp$eqUrInEU@M^X9^zha@j8^ZgxS_x{St;F^CwpTD`WvH86H|2bm1
zQ3)n?yI)2>N<ZE?f7Y?QO%uM|yJKQ_JTtUPWA64IWpzFsejoG48zMK()}6sP`)b?G
zBsQ6dwPiZfj-SbWydiPx><u$|EpoyPqUGcQ%5H3sJUc@+?2vNK4BgDi56LoS+J+Gu
zD&{=Dyy)-Ggx@J#(LDUM@8qYYZ%wTFAa{jxe)8Yh!OtVsPE6SJ=i}!2jgOD_UtaDn
z@8skJ+Jt;|w)ye1zJFPiKJi=0g)sfx?R5T_q1MCu^ZMU7i1QyWTD;IaxcP3!9wG1c
z)Qu(;ce~jldFSkAw7j%CvA;O6$ecmXY1^?6UlZB4uC5BQUM$VWZ(bwIw{L#w^wm4h
z)I^?~yL9uSng0)nShh0X%@oc#rmZtg@Alj;Rd>&L&trRcWXFeZCJsyHI!4Rx2{rg<
z|A5cecIGC9HrYZ3wp&-soFiiXq|0r&_`7kN>oG>bJ{M7u`K>mUpA7Qv*?=w}y1p*<
z=eynSU)<ksU;64wrP}H4^9$Yn$Z_so*HKWmJ5RV>vitsF@d^ofj_PN>SDx#5>RBu#
zb@op6sf=wl-ztq8E;`-XGVQoc?m?y->th~-U(b!Q{P$yLeZ;+FwG@+Cy7k5L+%`n~
znfG(szN)WU%<OzAN4rEz-rt-1^Yim$v)Nx=)s+6vsEa!lY@9o@SL1-7?ET#m>~ro5
z9^Ub`@J8cl_T&u9e|+j|*tpoMKNK}s`xZ&x-11NFa@o<Q<rB@lq&WGv7eDgSc`j!;
zUHi#~xqr>)N)!spW~HC>WPk8vI&0Xj+#XhYW!Jo;8$ZkumMN<`=gHCaF7Hd-ai^ap
zY#+*>-}UKVvt#$f>qfgbR<E}`nzQ=SQt!=$kKOkD`;`q^+qX6A>Wz)b$3I8zcq~=C
zeBIN#<xP>s@2bmUdn-S(cG?`>A}L+@eXhbbox7d7ocXsqIygYvbY~i+f^JS*?l*Ul
z_jEn3E~QWFSEdKfn`gnke@^iwmHF03%`GZb7dKwjp4(C>S@0m~{>C4AvuCnD`mt$F
zr`FELyK{>~K0HuWma&jM{Z!j;k#j6h+_Q=^Ofh$Q9{+P)y*z*Cfrm-)A}z5635Wm2
zePdQIJ>tJn_<72Ko{So!nG$i=CkL_KnOXPsd*O%guVxp{-Ddb~jq%akO;KC30zr4L
z`Oo(|Jxy0LYKz9--`~~i|NS|ixA0hx$=iGCg`$V_`E!MCTm5x5o89|<|NnV{%5DMc
z<7}OroK8&9oV;(k%ai}h=DAj`3d`v~+BR{TukFT`V;f&KK7ZG`V}^wI1Eq7E4^j?U
zNh^onme{E`tyr$jes00<gYo9`J}lon*XCBdXzkT=pS1n<$nVJ7UB;{L<#Oy|Vr)&t
zmvaBcc&5C(J>TzD&#@})@|vpk@mBWwj~|cA7r(t_`u2in!{wJL&(6$bVPyraUo*?Q
zbK*#+uqb#FecYO8PUUUi+^ra51IxnV3Lj0s)%*Cj+>S0*=9NNDQA?fFcDzlv%d4KS
z<oRp4CC44!u8P|;{l14!!^C@~XGH3K3U)v4n9s+`VEmv!wZ)0q?SbDJX4~#KHmzqy
zLAt9S{(C1k=VtGO#UHg}cWx5*-E?TuVLq*=CZFPD^>2y#>}+qjne)v4|4;tqesh%+
z6dHn-dR^R}AFrgOwB@jR(CV<YkEVvlC7zn12|7RgXt#Lq0tZG>H<x)@yw0cXpG`jL
za=+-Kr(Suczg5+3iLKwiU)fO+E>rqyW<=fB#(Vq=l5F#f+$2vV6&h7;>%Fj>ZJUTu
zklkwb_^qECPFPf(?NiQOFf(T6-IEV9A0ONkX>nn0l;qt5cjxM#a=caZE}`UOTxPjU
zq^&{X0yk6sW3fdS6gFG`&wO~SQCncj&ED;6{PLK$^B*;MwPtd|9JwuL#OG_>U-^3O
z1{U_k(=CJleDj@aW$N6{_w=&AJ?M~d5#6XMda=7Mtc$g_v9T#(P+<byZ5O^iuBELF
zbO6Y!>+9Dq>NsJ%cxAEB@A^BP8L#H-U!9YxFD2xCo%bW-r3a}`ev8dpGq1Wm_Q29;
z{`W;<`plC5pI))_fAl{4faudlZin_8R~*b=xK5>f_JQTLrRVk+J~<&+_2q@5hX=>r
zs;`gMZohZwinjiVna1fauCJE|t(uPCU$-Ui?ys4CwJJ(XQf<?gzu4P$&!^`6A@-j<
zI(I)*`!81Pl}<Y#dtJUssp0Fo*9#YL{E2rIRM2_#JwZb3>7xQ6o~};Ug-)#zyGk@c
z>lZIA@tmR?J?-l1@YM$AFMfS}{pXv_=T`(R<x=&U^5DzM%X|L+`_0t_8lv$$y;7TR
zk&K?%+RmSU4~dnV?+Pzaf3vMy$aGJI`D({juSHrRD-L{ndmD83qHWce1z~HW-rU~q
z-&)zWXz^l6lMDe>?`cPlA8((m?*Hkic>IH>PnSOGI8nSf@r8?C@HCl}HUC6kehv>e
z43c==Yy3t=^vv21vw!^fp|Yy9*g^(0i?6@`&!h(r5<vH!o||L2_2RrqJ9b!rX1X>c
z9$pc**9x=}B6=I>IEOs~uOfY(r1{N1v(oi#=Z-Drk=r-F(N1GC|Cm&u?5|bT=-)80
z@qOfs8`lnfuu3Zuu21y+e=LqC=aFxJ_@wV!BMrCf6<0(tAFKHuyNJ6BbhTF5+gpXN
zu4s0NYP&r>H5IfXeBr`{RvG{K^`>`Mets6X$b~a(ecahqp{r-u)$Rfx%W`UR=*pnh
zo5sruQl3S-#9GhExh*M>ptAVz!ROpbH&V=vHY^l3R8PM7dA9KXGw(|3?#_L&y6o4v
z?uT6`d;c(=EWh5{yU*{$mS11qeUy~1`B1kgE<8U!AGDOTQ&|1Or%#JqI)y;PgfCuX
z$nN=np^aC1MZ`v>#qRxShgvxQd^)W!q8k+gUOW3Udu#Ufv`60;TiWb3ytW~w>U$FV
z4cU+BE>CnblwQ4vWy|Y-{5!QgQufsm>&=~a7<TLsd3wa}lTEeKJ3kA>VCfH;yUSl*
z+8I14@_ot6ONK>HJifiVd-~_+=b&;Gv|zVvcP#sdc#D!30$*QWU+&z_2deh&?k*2r
z>Lm(3&gAL*%v)PDmD_lqeCX%hDP3n6x&7-CN9n!h|LaUkQ)C1lK8W-2TgQB%ELx<!
zzNT3$|Jt(p{Yxre?|gG}vvKh=pRjc?C%Z(oHRASIaEs|&VEiK?t{=C=cea^H#)XDo
zUtbr$xnX#(=CdykTP%y35?A*p{jHgoXVuy67BE_Cw{691&BYhrxy7A+bL`#Tle^nb
z36@V!Homu{uGgsM^Vbb3_vbBGpa6=#A0Hn}8YZz!)e23zyv*0Cqj9!*{-wRu<)@}<
zyWiVWX<7TrWPZ&j&Uf$LY4w5T(*M8T+$a0ukEb-B|62Dub7!C2xA*;3^T$@X$!Bgp
zsZJMNZaXb`^X%Y_DSCB3>U=h3MPFSNdUJC+f9$T3hmZU1mn9x<ld-F@aFyF#{{CL-
zj}H$;w8MCoUp~1p`S^?b`|B6&Y4CJ$Ni^(yefKqYlxhBfhnc+_j=QMcF1xnwp=;*}
z3#&EV`=8IRH+oZ1Xem2q#`RfAJA)02e@#iTWXUv;sCd10JLs^Wt=ZR?#qF&EW#yP%
zC7P}>>-pN7HG`K0Eb$QRld(M1Cu=QXoW{e*$oR`hc%q8q_cp&pZTzx6At5H+=_U&U
zx?3e|_e>7`trh-lk!RS9jfWhU&T-o${^h|_Z!<PIA3NI~z3JV;+_xrM$I5Wm&71z_
z{jrDd-uboj$!1(#C3?7xcX7_mO`v<fjZ#mk+%|J}ary9e`~69V$!=o0Q7$WkmVSAv
zs^t0JeZSgMzgvlq=ZdYpx#2q#>+JIf&!+fo%FPToKHKV;#Vx%)WrgMJhgVhGeBxqX
zeEfc@e8BQPb6%mi*;D35UFu}Go>>?B-Med#^6{^)uS=TcNQmpj?D&6UsrPiyB@q7e
zY)(GDzT>3J>1nzvBR4PGw*j;!GLC2YWzWOM@;AQknfk|f`QtR7S#yu_w;kPIlUtkr
zYQmPok7mYKJ*ur%k2YU>`1<v5Ya;<MF}I0IuAuT$)q7fqql=3W=iJRRu9z>FE&ch~
z8I}LuY<{|b1g0Bj@?E~yS(^B%e1ARr^y53r*{5HRD|+mgbLno{^XvQv-)`^CiP`Vv
z_>oc8s$_v{w-{HKl2T_+mDuFY88al}*2Z;b_pN!d);@~WbBSK=&pvDSWgD}Pr^P<_
z&~UU{{PNmpb3Qp6(CvI&;0uDzJC&4}h-!tapIXkcGw%O-u1P0iZkkU05UYG+YppTp
zDq%;VJNs&9A2{HUc7C33E0<^pc<XfKwf`UY1?}TH=VN*zZtHTcNggq`?c1}Ze{D{F
z?N;>k)Y2v!&~Yjim6adwmfv3~p`_F~Vc9v)w4mjFXP3o?2mhMd74zkTYIZB1)+uhW
zsuaoT*S@8_UiOu%t3P6M8ZYP|A{7;vqXL4SOG+BOjh?)Je|}nTcBhTW%+uEDD;4xF
zoDF_$vHOkjwY71M2b)+!C$NKVjRYN6b(r7&g`S#{Qs<1eg_AcHKR*|^EXUO-<c;sL
zNBJui&fi>HeB0vR<=uZaB^joko)+pT_wnWO`AJ)&X4+J4>QYpi)R8G+C3$+HGW+bh
zSm&Eup$1hIlNC42>-sduCFa)gkCwU-7XQ^*j~>6VDV3XBT+e6Y_f9dvz=$a>%Ju*L
ztcl+|=~QU^;TP{*f7M(KoszEeYWrfDxnISMm-#Nqy1Gj1gmO|+QpV+FyvfIU6iZ)S
z(dy{%P+DFr7TmC4fkIv6z8ROfZwuFzb#r~<iOiq#@I-&ULERaSS674m-#z~F_V)Co
zqg{df>uN=vT~s(PSaxfkSik?D*IzeZ%^Nj*E93MQHrb>aZr3X;DAH}?e|7fv`|Y8Q
za>?iC*@Es;ac<{(`2PL+rj8C3##fO(j;lge9(p4gY+?0TchS0+F&mek6f2!OA@l3Y
z&z0(|N7X@>CzZawR`dC+IVg>Bbx(@8x-NEh=i~iPKYTeBaoBIAg1<7$UC!qpuKX(b
z@xj)r{Fe`FWz+wE-}k4SpEq~={kmvS{5hHM%ysgeXVZG_?(d)_U%B>NN(*)Tr@C9W
zYl7z2itoKyHGi)Lh(6(Ta&iJ)oXXB8qY%8z2NW(si&ifTUU6-0w4kW0@BaVSmS^0K
zH~5_v5cp5g{Z4G(*^0?*>shrm{zcA};fvay2Ra!I)YzS*!0Ws`LDss)>$(4}f{HBL
zNAK>0IR0Z?ZaeL<z3K9xm3MmNmsd;up9$Kub!BDn<z>Fo;PAX;u;s$m-_Lz!f97tD
z);;><<?^OKp#dc~EbYGAHf@?6X;RPUwm8tWvT3rqf6%%Z%d+?Pe80W93EENL>gu`b
zR{fnu*VX00D~>w#$lcRAA-p3-&+kmW`@1U>L5U4i_@A18WmV8pFO$qmE&ld@Ls$g`
zJr|kq=rZ*D%@bMA_Wb#cH)$4c%|xGY`su!OiMn1nO+R+Qf*;|(uiV&ZG1t%T=M!PD
zLxayRy7RomD)Yt<-EVU@PTK#n<WA47e=i-=!@jj-zwYSXSoGwCU`<U;$Q1P*1rM22
zy{Dy|o~HZc^mKoajfbvm*4Y1^Cok{JY~#s)^d&8JE|Aq*9_;E_P<J<>XW{G{dy}<6
z8$xWWzl8+;JM#Pee*fuuv7ocKCNX~1IVFGO$dM0OU!!7*Q&jW+Uew$tH+@^p>AA}G
zo3f9mF)=aiD19vkx(R5${rm?H65bm0&oD~mN<P+;`25`5T|eAGrROB=)mOcKdR^`K
zr0KZs`v2KUuL~-#i{!nPI=Sxc?30sJPyaps{OoM^`}=Bd?yt9(lasrZC=|21tarfz
z1<)3ViAt^^0)i7$91dUj6d~5#8h?G=k-x{n&aIc8V$^*8pJhv)?73;ZujRP97(lm%
z<lox^x_Q(3H1n<P`To1h-h#a%_*LiBd@gS8#b2~VKc2bXvqsc<*^IJwxyWO!Q)6%J
z%Cu$W5~+BTy)JGq=$>>?d1PQ<aP+-4=n~Nb4UC``>fW`Weeshf#q6z`8g%ffXY3>4
zO)T4ezrEdCSR}h#d|UG2U7;e{VQ>87trHKifEt|Ja&H$Dd+(I}YMyszLBYdAI&phu
zTw3b=dV!#zr<)0nZoOvkGM}8gTjp($Ro=1d{qmH%*P@?qE6V!E{%2Fo$9Oi^j{UE$
z-@ZLNaIxFc7qv^A+xblL@5ML<x~ND7onQ1P_jtej@|;$!l(m*~cpAS>iv6vm_xACN
zGv<*N`DUT3!~WiWZ(aP1=itGEZ*Fbvma!~avg0pksIuhUotdD*;px+*%&wpa3fjcX
z&KI!rRrV6yOEQx!LpNnkzbttAtF7?Nua8|`U0eIR|Gs_AkAiD!A`id1x;psEKLJ5O
z$Akn0<MeY&l8$y+Ie<3qyC(eq_xH@Kzrud$pKg_Ag{(gXx<YdOz1Ia6pj$sc4I<s>
zZ9ay-oo;W-1>N8E=Eg=V1F+5~=VfdvCP<lP6}-5hXq0kd!kgQ>%ic!a+Lr6>;lZ(Q
z-@aRn5OwRnytz3U)G_#c-d_E6S=8DvE^)mxI|?8FE6ra6T8!1T#KLTL?;_W3P}fV!
zBx6F(?VAh1S2)kKDqUr^3p88Q@kcm%Th7C^+wVOB-7)GlRqN)~{MXmkg3eu(v8$P3
zl6fhkxf?X;al-!V$K&$C($c3}IE5#L?oLWd;^5*+`tabONzRP`=_pXxx%}&0u|k81
znR%vFsaEK!5KptWuP!cTPd`5|_1&GFw@!f4jMAt75gU_OWv$D6%qEAvzq@<-t*zO`
zFD@uDF)?jvbae^hxckokiR8-A)nZG%r!VuLZ|AvBx3I8qOWxgC>-YWg0>yZ1;Q2)-
zj7uIL<7HxIj@*_bsp>t=XVY5=g9HZa@^>jWH>KWM0kO;E>>SI<4-dC*PCqZ_qZT|V
z`oFS!A84Bm=$7AUI+0r(U0s4Cc%8d#lF!aIS1*2chDX{==j7VmhaWB|eSM8b#v%Z0
zVbJ+SCyEPRUQ+d%qH*xq+1V$j>&H)ue7}DGzgeQ%VH?WcM)9yUn=Jtcsf{*hdH?eH
zb)W{Zr&0yzzOC@M%GSBo<&%ul{c;@yCyIC-zMypeq`Q1A%Yhw*kA?p3+7+V*I=Axt
zJX=sX9%S)Vr_07&-EYnUr&g{vcXv;p9$#m9aozJBg^y?0)mlYw%Q<+~cfTfhEzOgi
zpq}FGZMn%mKRxX%`l}bWr{e#gQ-6Mb&Tj7PSW@uHNTBlEuF}`XrfP@JFwG8I6tlnV
z?X8O|gVkAG1qB(eY}WYX1hT@a^p${+kkhX(FNM!mfsV2bT<o?n`}#T+OZ6q-^rRvk
zySwb^tE;Qe%&{~MTys8u|6jL@i(Ek$Njo|^-f{r9sy->*-CI5VP%F1_)t8K{tE;{q
zy1qPcv73la#Dqp>_N>Pp9Vcp~(uFD=_w3m-!yu7KH+tKX4-XIj`1NbwhSHxupU=0f
z`(v>={rt5yLBWabFPOw9im&jRs&!>WV6&WU)t&mB+uL$wtV&+Y)GPn@=ckLiJ9EM3
zXTDZtZ%#ZpIoWXb*;^V)N}mLHoxArWY|p>1W?(Sk<YaZ@>TfyA{N{SC3|e|)YxZ^H
z$a_b-#ZOMtmHzkd-<dg<ljZCG2-erv@B8^oI+sUK@Fz1^|H|Oy&p`JVJ2taf%=KgF
zle6tQ{j^GLJL{bdiOhC(b}g-~7Z*4(8>OE+)5a^kMbX9O$wm_%-A_!dt*wFU<80mf
zWKQ1QUB2;V&iDD&MNdu`M?O0>Rr@`^<Nck*?cL(~LJ|@u7P)qBaRi^-qhk2-_I7>m
z>3UDk&9&yYeOF?2b4%uB<H%>fzP^_Kbvtcy;p=O<Qf4_Petv$wH4vg@ql94+%h_3`
zhi~8hz3}+^dwWlJiE7XL^=VzqPN$TVCx_#i^74NC__3kjq0^@H^Xqz*z`3Hc^=3}m
zvokY$Bn+AK_1Cx0yS=OQb#VN>HePADUB=zhr;A$_KYQ}vV6#ca1qW&H5jU<vi}~$-
zH0VZe6A%z+Xm4j{*tl`y9LwToEA^fq{4Bb6@7~NC8x$246`!1$87%DxE}?`rKRVhC
z>SZ=HH-G&0?byqbO_iUYE#K8s{am}Mx*C)|($3B4T)upHm%fsc|B^~E)yVFyu1Bw~
zuJ+8D8>=U-?B191=f}q<Cnh#e)eg6tdY+e;cWKhmt|k8S=XDCJPqMH7rxGgOebfn*
zk%Pgdl#OlZ>FLRz`(*dbu`0d9z01(>h~?ZRwND;RuRr~+Kq#$Ot+}5+FV8bS;NhFI
z1$r?%9Ima2+?w3oVdFk)O7x_yGpx&J#H<lmdU;8h;iMf_zx(gm-w$$enI{jrjS>=i
zN=l%i9neAJT-_iNbd)z<nUnM0Jy6mNcL-(Lb>;Y74vQR%w-3_Hj@v)q;rSyxzR3K|
z&HKEUAD_)zqCZh^`rW>RQwlt!dL;@H`u8+zKK4Gc?$@jhuI~T;8cCHY)fQZ1pO$#M
z?xl*Lcio{f!&mwBeG3a}|J<+Q_20ib=4Xil-&<EDrIYij#8jVFX<E<Im+Y8hKQsFH
zoQkxQO|So~kNKY|{Nwq7IS+rz$ZfRmKPokut#}&4n(n#>zm|M#`DZX$;*a4y3+8+G
z{(o<j+MU64ZH7rj+Q}1hUF`+771=LST0g1q!1W({{(QH;Tl#~cP|g0%p0C!ypt4_O
z=legIcQ@)uyI=Xnd?WA3{bGxEpA_FzKP#+#>FLAz>F9pF{5l`~-*#W(n(P1EKep$l
zq0NtqYc)HbpZxHlN#|xE|C#^o&1V@N%$GHPpCMyr{op|1PnN#P$2ZQu<9TG0tIL}^
z?SC)6vu@^JcF5K6=kI@ikL`Li{h^ZB@2A2?a%UXfTG?*!pj*}A|NHfy&-ZEOe`^>2
zo-jK`yra8=dH3IqZ|ifSb?rWuo|(mOep>nV1H-MNf)kIp-esPYWNBV={w#a5bb>;i
z#I1cF_TTxS>3<;n@ka%*$@Nul+Qo~WiYrUnuj;*dyW?*k^W^P<*Vi`3{L0~Y_AUN#
zyumc<hl~}o%ZnBs=dalIXA+O-W`Sy5w(~lf8`eqrwk+h5%n~^!S;k!c|I3bZy3gI-
z9Dbd+>hkQKgU@$d`7O5Zz~6%(4&_f?8k`$*eO=<2NvwMBADnuycaO=l<U2dhp3P5x
zc=egG&OL#>ISh%ryFPuI{&hqCzj@WoXV-oB;s054rmdnveCPk&mklmG`En@Lum5CW
zTlW8#ts9TZ-)7k;VZ%K!^%KvX9=Di@?mIrcbl^F=UnTkK^p2e(&Eetld9~JZ!H2K?
zNd0&?bF(VXN9&HLAExHlqBvfBf9Je)uT9OmGB3f2M{ev~B-CLu?>qbZ2f0@6FJ2hl
zu@i2tPOz=XJFi%Czcj|B@b!M(9Y2aW+DsZN($DtA7pT2ypDuJQU&3%1gOBU22^+oS
z@_6%3Z=EM!`1Shgh`8SppBI)Mu{@Asb$)7o{iA2s_BJpx$1QnjJ8}AP-=|O2SPhRH
zvD@R+R+I7~g#Y?I&X|`sr*_vqbDWkCm-X^&uWmS_^)}ntoOAbx79>~~$mqo$DtvBZ
zu9mlFf6ta?x6hxR++1?{x!;#xk}diD4N7l4<fk-ttW4&Nt>kGg*SoYg_H}<Q(}9le
z+jn04(|f-B&EW^r<<GB?^a!=%pEKw9*L|`(UVk{G?Jlxi?Co*ZBRLXhcTQ{G=qB}X
z<!?5-{*`OrpZ;Yf1lkhisl~$>@oCASRO83L-D-UT6<zY;Pv7<Sd$i8A*>=kA>2oZ~
z<{s!|juGEDuj9z#H_KCGFSXouW_Mh-W=F?SQQ`j+w~5{^NIIfDo!#%jyEfM&?;?G!
z%u42De53c!_IAO_TJLlIH(MWHeE57#V}+FBw?9vR?AOmzJI1!`%F2zp_c|tjS&}=6
zJDIKiKu3Nr*9@useYa=EckY(|w5a1&t(dv8K-mQ8%hQ-2#962_%oBb6aQbb>D~)$u
zWG%n7o2l~HHUG6TOw@~UKd-jMzGp+fl~C0-fthZ8hi-G6vnYt;IP9u(&Gh%d(vA;b
z)elY)-QU!|er-pGik(!OSf!5sot_6gx6hpwTrJF$Vt1xd|7bj8&Y#EDi`;mRU0q}P
z^0c_p<M73t(Pi8W;b-<bKH2g6@JTi~iKse*#Z|}bAKx-)(w1g$-gbZf9_jyb!M_+D
z@D#BBmOb!<aba=ijyo+TqV_7x^CJ9P9>y-R)Q|u3ayx7-z)e{fo`~NU6y>*Ps9fLE
zK4E8#!A$%8$DST6E-W^0d@W?|#`82T<@1U-#^1`hyZ!~XtG8s!2Yr?HnYHEljw=s~
zFZIfQ{jhTm=i8#Fw+*}n^*On3+v_i{Q&*aFVnuo&OTm$2-)HW8ly}_z!LEj{MLf$6
z>3T%(mS}7E#M1M&?dWxed-EP_INomC%+7Y8Qk!40ZHsc6ll-~=<;-lC9*b2memJnb
zuHaAlvy+|D<s08wcU<N>_GN!O$K$Sen{yxBm7m=G@$|^uwhd>G++i&4+{DSA=Vx=d
z;cd#FDOMA+za@S+-E{n{ZVO*z<KY+GnvV<uIc>|nPh%9YUTD2t?{>lY`xAKH?{&I!
zRa$~)=ZP;rt#?Q=eRydztKq?)ANH5jf}@z%wP@$XFqA(QVk)%!v#C1SdIw*7eWA^n
zhVoU_8-<fs{dn;r*iOGiPT|ud;Vf+>rOv*M@%!ukhOl|AQ|p)a*EAQOxz}d?4=Lx9
zVkueQxZKLkeJs5ayh>;9R-E2x`~J}G96=q|(-U{OWy)Bq6+VthG};#{wYXjWScur3
z*P%z>JnnSY6Yeu{^$1qwDE@axBKA0^{D0kY8?K4VqGWZJHhIPxvS0kX?@;7M=9Nav
z56xJrkk@7~M>Xm}ycKs|-n$vz&yQ~_74G}N)0Y37amlF{-J0%!GS9CUWoMgA3y)X!
zotmJtb@jutJkjgN)^^o@Y>lqCTHE@<)45}39ar+tT^43O&(5ukuGsUFmGR*BX@MNK
zuk}WR#~+P(EV}3Ow1c<bY6O;FN<0}m;|g!w@!K^$+P2dlg>L`ooV%-3hx=bC_v5rr
zhhOkMi`x3Z>vgaDy^ME9QjaY^puI$EK6mqM$>MoNJMQjka8Hoi)3o|==t9PQFV_{;
z9uRmZQ~Gb=wnT5f|1Y=A`5G;ezcs6M|Ni}DIf4_PpLkHTGe)afCH!fZ_~N}69bcNP
z{Qp;f)ta9V--*g3*4tKYJ!o%z%p}3S@?Rr6v%yo(?$1vO=Ut1e%&YvIyrY!qxboyS
z=|8oFcccBg`6CwpzbSqw`P%=Dxm9mw*A`UYZ2J7D)PN<yN51Xwg^rxVuLbQ^SeJh|
z^6^CapUJnb-K?MDDs3LPA;IzO4JD=X6VFA8{N(QIKK}n(c5LGFjqSF-5;VR|Ug%#s
zcaD0$ed164mUZ<KBJ;QW>(9NMd|#&bbJCZ;X4Z!qg8uDDJo13+XyK`!^;g4OT;g~@
zO`-E`{*_F##1zeRWbT(adw%tDyXxYi!WndaQHPE6#b}Pd=f$o%x~Oypp9dd!P11?k
z9z_d1cdAAjS66=gx9;wqIclMsau?rydE{NeqnGpc@IMbW&r8}Eu_d^>!zP_=`st@Z
zk}5m9dU~E*TIzjrSFf~LPQtr8J9}iU%_6s7PTDxZwz}-?6(yzfGpoc@BR_xsyf|Q?
z)5%@E&(F<0eEqul&F#T3KjCe}5!u#Tv_S*ZCL~fdiZac9PgMcWs-)!Pn_IKR^`>_#
zyY~sm$(?(4b~Ynuy}-`m=V23FT%N2n*b$>Q{glF_Eq}gT_TTgO+wCP@Q@g6$Hylh*
zP*Q5D`S}TSWWK1Ui_1K&uR2{Sj;7hyj^vmL3krU`@Hj3muI9r*cC85=9X6&`>47So
zUURKXpPrif`|xAG<&(GHuZw0C0xcq*IAvYz?y?`he%XDA0Nq|c!*FqzGPw7k8G9FU
zd;S0IN0S!D?k>~n0rw6){`~9&Ex-S}|81Fdue3Sn*b2}|+mH7C|M&ZQ`=ZlRwcTG`
zSs5w<IxkdYV#U?f;m4mAC7R7H72VR&VIw@t%KeGt&a$^s=6QE2o}Jc=+hZ}?ELSQ0
z+?<w{7Mt7pJd->B{QR73AORUA@I9sbX$t6`@$LEXY3Jtrj9KT?*V(D4rNvcW|37`-
zninrJn3$L(EQ_8bFMlT}IPr<QZMTwt=iR*YGcyc9bAWBU(nTL0I9^)nJ$=os7ezZ~
zSeNVN@BeFd@Zdqvy0M&_n=&7Fcc>&=r3XII1Pxg<Ha6-+Z|j+=9d49#q$5RKMDgFW
zCg0g+ssI1|eR6j8^hZa#KOYs37m$@*JJ-QQW#%P=Ehqjzd~tDc$-6r<i=Uq}toV?S
zb#2YeMYrm|z6$;I?d|C{UTM&n#oDN?mwxtu1Gnh&Gv8ldULHPg|KDeO-rXM$n)wZ8
z2EB`v1x=(^e}8u)$IPuy#<P`6bj#$94i#akHnE@Vj~+b&^#fVCL=L<x*_40Z&hFQX
z#UipIjG)1aTU)bV-q@)8e$QvVUTO1Ze|~<xl?d*h1@)z#o+c<Eabi{I>WHmbqVe^A
zO*1bq3r&=}u%j^f+xz?e3!PduA~&hnR((;>((-Br$D8NAJ{e1=UMbTTx3+2@ZsScp
z*u)ANU2Uyo^7r@ORsR0mxw+Omi=J}*{PZ;W(UDGzxqhPVE-IXtEW4Hb3mzZi1x>~t
zYUO@;aWQ-Dtu3A(51;*i?BCztFR!hYE`N6?@yUsaA*(_>T?GX_oj@ZK9Xobd?ECdf
zn@dEa;bcnE|9^kCWMB7l{1^{97&!adnuXre^|s{PoCJ!^Z*L++-N1d3myvra3|YCw
zKv!{q?rTXr+&0mwG)wfEX2z8jg1^7LRo2kxxU|&!^49EdC1qvE3HK9^=&Pu-tO{KX
znri=W*8Dzb5!9zACl?>GNln{4b6xE2P1)D=oSdCc&NkP7e7yfUD9$=(oQs?yqN}6R
zGtahqQ^m(5(6tXy+w*!aW`rcpo6OA42Ri&rNm*IavPdO#RY+i<i;IxtR~@e|5ozhu
z8xjv|1T0{fH*cPdMM1;&`}OgTA1%4LxRS1}3jMCHuquAP-E#l=pqwNk<{CGrm0LV$
zfdeDxMvz}$UrQRLaI6ek8k88f;MdpJo6FzFc}>-FEhsRE-BqHfq~rvNu*y0<84HJ}
zr>26ACIiiMzq_;Z$+@}Ft)41p(v0U?m#3YdXM1N?X?M-fPoOhEWcb><l$Aa)y`1Cf
z`E6bN{&V8-H3vUEJ^k|LW_8`@ZC474g$iC=P~27i-mjv<LfNfnLDbePt*(v}+zaNp
zdTz@xlWu0`Uly>?>Bp~MMGp=z-rAD6c)<)85zU|$(Df3l!}aa{d~kksZSCom!OKG=
z1SdW}b7Ot{{yME@74y%}&MtOrW;=51m{IMo5-Ib%o|`#!t4{CORL=Rh?qBO(>;8n3
z(Jef2{Qtl0+Ei>9{477=`LVk@p6d9d-9PU0>g|rB&noRdmCjF1Ss%<<&wrRX^SjO6
z?ogFQw|1xO+c`VZc-g1Bk7kFg4rAzFF1`AuVY!a6`TOS|uC0wR+~f04r>>#7`Sfzb
z^rvcNu`LUnA`L8+EO!)1757JKOt$~aJU=lt@_(Mp>9Bc!GUvU!_4M@gezV+LA8zOG
zzql<odR^RJuYv-DdGqGoy6EaMkNK;P*CtS2)Cya3qJfcF!n#by{_huI%i?D>YNt=d
zERypray3jm#If&B<$vY#ZH9Yu<<)$C^o3lV6<~E)xT)8W_omUYjsLE++S{cY1@C$5
z{>0~Pr~fvllycd`^Dl2KmlER7wQS-wKDagh`MVh#d+z2ebeehQdsyG1r_3`AUW#1J
zo_;WA_nAYS!IC%sMmsHf%U>UI#8d5Tze0Y))0tOp9X{NA-tKqL-s<l{qN1+*YJUEB
zxBGpG-D1!X=85~CLB3iQs-1Oxo$uvkzBhN3YKv-z)jT_Wc8a<<!|`K2$?;F#FaPf*
z6LXMfI`bW^V72;P?aH%EC!O$k{dYezlM$1ckJ<*;sGailJ(8KXK00@?%`ljCd6H%F
zGxurl6pW07ZZ}tID%x1C30u?uOk?)fg-_#R{_CE#&N<ALaMJnrM)?Oec9YINU-W+e
zvtRdm_gwnx%oV|taL(y4zx}ejOn$k&em_s$6P>gE?vy=ZhmF^6-@o7fe%)_f@9BD}
z2b)+ysqO0O@Yf6ipq^UgrpL$oC#(C*U0UXQx{;Y3G{v5mcRv04wwpR7?=Ck7o$LLx
z<CVvu#bMKW?wvkxaX#}k_Bh$2zXIp}$T*O4?C*Emu04h`k|*lDVK~QYq7yhnDD`x}
zmJ{kZJ4DVNbhW==u)*Lm|J{B6cYb$?zvcFy&sNfWk-2Ku)}*sLGrpaXa7ehg=*x@y
z&5Ke`1uVOv{)VgfJ)_OWKkQat{!Vk0JuQ6Yqq|=0ty3TR>>qW>x?WE#-tnw~XZ@9h
z8xQvWyS!b6<(kH{+U;|vtrxFaz-xGfk>B4WCHZ*|>z2HTNzY5oG7=ILPEXTa3|i)t
zdwW{k-l~@l5MS>BPXPN!vhheXtY82BbL<XnvHJ(Kw&Z0e-d!-?@;$@#qhEE-U7Mk`
z^RHah%c)-%zqHWpxae%R`1b@$9{oKNwp;6~O#2~sQa>&C;fI5!C0aXs&;Qa_KK^>^
zij0D0`#G;NQXSv+d;EUJUi0at`j*1Ci(fCeDzff&Ly*LY_~pXw{Q1w6uc!6>6YG8b
z%nWVE#eBVIi!M*UYAE*N=#4zzlNl^NH!FXit~&D1W}fUry_j&b54pFVP4W3HzPDA-
zedfO999O%RRlmEvuk7tD(20T0?R-#Q{+uj(^R4UKQ-w!v{?)j@LSFB8&9prG)6;Hx
zSF8@L)cMAKUVlqr#aAV+5>3ZHk+p73tF|53Sj|5D5ySRVBB!Tw^+>L~yvnri?(ZFb
zQbx}zed1%!xYm_g?I``HAiZt=k_*=lOJ3ZklD64;$J8Z02@DS$Z`rsRI6m3=L+<?L
z-P<=jpVGO|dH=#RKKVyUi-oVfDgU`@zpLi;%(}SU?Q^}SU0PP_`tQ}I1+S~a3;tbQ
z_QX^==*Bj2K~G5^t;Q4k(~ro8&tZ1H#`UG{>SRXwYllVdbg$LlooLUP;AwHnr(%D~
zlPBxdH=6MuVOnx(j^K_CYlg55%xNB?69n%(^7y)BO4{~jMe|ZAvjso)7PA~)FPK++
zw_eKl$lcS^%B0zXQ(s+QS8M;Hb>42T7k|xc4yaG)x36oR7QZcN?n=wmx0nC#=DKmi
zDw*-@t*ws2LY*Bd{=Yo+KGk>pcxw~ZF1sOM^U>uDHP`k@i5+<Ij`19C%sxr^lSN`H
zx@)gjol}e9D1LUma>xF__BCxg+Us`Bjrec+jQ_MS!;yJqj}p83E8fZ3ebnE)WI0EA
z=G8=@>1)dt>E|tR-v0E4?nn6qFOKE!9M;GNv8EgrwVt!kUj9H^ukh)8Nl&&F@Nmk`
zH;YzTIB|dcU73*emJFfQ*Hj<NN4=4h&$AT@GqbAW*p-&R_LfIU>Ex6uG1q@fUsq4R
zu==z~+3vaOho{`Tq9574E%g0|sLn4poWFm(dLomd#=b;E|LfnDL+k&x7f!Ffu*Rv}
zYU}sJpBt}7+O1KtGdy0Q{ypH{JF6-D#_`Gh4~xUfwT~WbJuLZbOK{pLouVtJ53N#t
zT{wp${p3C^li7w1zGth0HlOj}ITm&1md%uG?tMwyi_IUWuY0~&>EaqI$>aM1Kd<<G
z_;%0hnEBuS-cQV{|4{K++w42%k-7~=eG8dahCGugHvg*d;NEnnX9;Y3q<Zp~{^2*j
zA$aW9PwpS3<ubc##Wrfx#&+!rD_B?J%oW9ROVPz;lIGoa{*(4VTAv(Zx=~Y9u3b;u
zoV2xZ{{6$nLbujTykYP--SGci{zF+?6F+@E{6~L!_U5RANeBOIzkavzVh)SU4Vi>(
z#|1jo>z;|5bL`EzX#~2+Y?7*Xi9tt)4fCr=k;-#DY20=Fp6O}b^3UqOIY#_B^Z$C$
zS+UdhG2g2fUg)^VA))&tb8=UQO14!xXbJ*+KZ^^f$>rkW;xbQZ?q-coAvbRrmTOrb
zj-9bFv72FYeOT9YeczXRX8!-U{I1QeM$Y^C3fJ42GbX(jyX$>z@s&ThCg)y%-hM5$
zc7K<Srb+Ka-%s{?)!gKlxgYl}`SWn`59|FM9UhyO&2yc&{FKj!*}WekZRc@LY~1qe
zNvs!J%(BwO*DLBzy{rF|`LgKgu|Kcf*6cWtvaI5E|B0EmKKnC2TBz$T^zKxWlJiQl
zDW|@a<TqBNUQY`9`Ao&ZF%azVf4uoMLAwKOirN0?WNJn}XJ*^;>g2ko$2(YL_}CK;
zXO*h#x#yUYqGEPjd859O(Cm}HzeoR*G`_Uuak~g_MS_Du;;%W?`FnT#i=I_}qs{am
zugI#ujAu{V-dOxn!N_LL33st)9zSkNWj;NaE_2P}ka8}6>&3aoVi$Hrdfwd#o^p2i
zr~mz3QPbQT8RF}1c>BD&*tW>E`;x&1m4m7F&tL3Z`{n(G$%V>Q!HXiaug&RZ%gOn-
zPx$OV>zOB+cN>04(UFs|*ztm0icKu-*TUN$4;Gvbk=koHqqVp#2vkRL-#mk;SuCVB
zwbk`DRn(tiWQcjF$dma+?eVj7OVdv#ZIRvIcVf0;&zlP~7O8e!T6kD`ie_|MP`utz
zu9%qrvcBx+QXikU%`mE}t_fCp^!`TB+6=4Qc)^JxPS16^Y`E>%zP*V(|0?A8&F{hW
zX>ZP*+wk)(=Z!nfp2wnUc0MzXnSSU-;CtIkebElDoA_*=nJ#<1;Od&3>OTK#V#&An
zh(+vZ&5lb=Dir)trK|N#PPqPx+nZ(Ij=M+h%9ps=yPZQiaOUJ=jU~S}S<OzHnv}DB
z*^G^Wo2%;f9b|DaT>iG&Wo!JQ1+O9>%=N1I<rVDqG{)<-+SHj_4AtMV^QYc;x^zm8
z&CJcQajE~)g-@FQ5|;|g^ZK%3s|!>4_jM0y-Y$D$`npe7NlCM{O3Za$Ylb-6`z?FE
z1o>rsjX1Q8nS13_bKQM*Y&}c6CL1dVyK>FE`KGAg^&dyG9DC1&>)VYN)e7?I{_4EM
z>n&^_yNx|lxNTwYGn4P_OO*X&59sC>{^@1Nd?8XaVH#_&u(&kugo1O6W}4Wo6uS1@
zXtm$5K%MAUEe5kEYc0LQ$=RzE>tJ=~6U(HlNe7Ih<Gdt;nD0$_5jp$l{E)?OS8NS=
zeM7)LCui=)hBV$L6Bb>iNlh!#1D{B01}bzYx&EsV-K4GbP`=Dsq9}NIB6FAeo#>mQ
zb<-rT%eXx0u6z>9e|)pVZr<lFEpjFZXvwy;NB#eh_4}{#-G=^y2ew9EY~W8&Dh(4p
za%&^=xy|)^UZ*cS7H?o*tXNfZqg^VvZ*$V$Y@hqVE&uCgJ4%PUsIY?P;D7Kw<8yI2
z{^y}&Isg3o^-F&)S&UNPCvX4yWL;=ivU0kh&^sojon@yEo(v9W=&*fz<Zy~wsDoKd
zVe-$cXJv<S!}!j<JUqj8`-bDk^pB@?tC(Eso8I1NoEZ3EW^?%J3WLgvjV*D<J^Ie~
zFN~U~BOu%xtyg?V-`#pUqyKr0W#4~w_B@@vvEX$@%(t0yZtm{;yxvT8y{q#DB_++*
z^`?TAT{3TGTF-j9JIQkEZu`ZTzdU{1k{Dh6!me)4KEC5G3zq5W``o_OnqijAFniYb
zC22D!O7v`<v9(g8wfE|ej8Nr!>mp|Csk~$HvM4C+_<Eg9ZiZ(vm!FvuDf0T4oH4u1
zlFIkrBaI}Q>}5~SSa!3+GD7nBg~kxA;y!t`DJfU=tv9~idG5UF{^chR+>FWisdo1F
z{KbFn+PZw3>DK?|`j2I=BV^Q@ex#-|_E;|~IJI;3&CmQ3&s+8RIonp~{xv+c=cj7;
zao#17)5}dkk$y-bMsK=z>VzdZPftw+okH!}Ev9<0M$-Oo@5%H0pPBh@uL=9GCE*z3
zF?AP{*}U3nI%$s&zuz1o_iIi2Jt4`?yKmZ${wWVT+jW(#u1+lHn#YGPhv%HQ^k`G+
z=^2K}Y~Is!E*?}+`osfDTdu~lHvaf)xO1lNv}*l|Pt|ABbo^`X3*Y<Xq4D=~=;ng_
zt7pXQbF~F0ig-LnzFqH6F=(dXT%KI)=^j&egV|?q<d_}sImYBTML)dDXWIUHrv=J?
zL_R-1mv*()!Hqw!#dWEu;KXzX+is=vk-M5gHJ%r@yg#;PQOe|Ldv33gF|YsCas5=?
zIm6fY_uo4+!%SE*xo^Ysk~Ljg;8By4?Sk?0@&+ktxmxy@_QlNjUj6m>(};uTj0N}a
zKa-|vbm`ZL>fDxg_eTr|oE(l_kqD|}Na)?7W;5?VqN<IJQDWTD>V8WhJ1e2tk0<)~
z^{8BWb>W#Hcogi&p(m2|2fudSwPrXm$FA<uJK@vy3!LxgOJ3P%<0xDIWEOLbdHLnV
zXA{hF<Gn5_%@;^2ZB(@VCR<yvJy7}76kX?&=fws1?%Xl%cFV6j+2wjac+m;<H(b2Q
zSHH&#L~POMjoQA<W#*<S99btGEjB+`*lX!*pMGLT;hu1{4#sZ2?86-$C*J=LU&*4R
z-*Aw7&g!3e4+3oX&!zo*`RU8tEiJER`s~Qdd)#||`{&2ECZ_P6KKp6e-x(Vhv-vI-
z-)t2v+ID4~*TfwWdDaIGmx?#P<ZV0m&b{yTeu0qR?CZX)I{H(7|G~d&f9|=gBE2m2
zsO5***1A&H6wjA?ul)YeAj;3oXn8g7uP>2)`=+T&W@(?+kJTzPyBvAz+dJRpSKJzF
zthaV_aDbW(hB4)a64@7*_!T<&TN&)XJi#rsh)JOG9S6hm^8Cc}n!?WQb<SsH>{7Va
z_`SK)CsLVgaJ|cH(T@3y8y-zt&M@-~n-8O<MydzzjFr|^+VM}Vf7*Y0XKc{5D);e^
z`m(akv$+_BmVB4XdU2^cL^{+#fYnvRRYY_NM-!{7qu}lb%?(Y9rYJmRoT4Pl=(y{W
zqm%z8&u!;^D=zGMTlq3(zkbWvcT;|!$y;Zep2oMh{ae}(K7GM5=J07bDO*FAMa7gU
z%o2;8r?w$k@&5ULeM*r*H$NN>>Pyc)8~vVVK||A`j*gU|^Na3$;5nRde_!pAWy^%b
z#L6B&an+tWM^wuAQ?vTY-x>LAqQ54E-#Rw&-S>nWuM5k&#nSWV&E4@^pz+VuNRw=t
z$?djY{nqLqKdqTMWzUmI%xnH`k>`KE>g=ZQc=iAHW~mguJ0!VZzM#q0gyryw=BfKS
z_pq)mcJF4IdEVpMh0c;in|9`PSmoW_C3-kuhp6Drpevg-%=g^dnjO3{NL4Fj#epX$
zC+`$Y+fdRnhui#Sw(S}9{_82vF8r3*|B53ri94b4e!`J(=?@PQs}Fg&>DxP=YOBoI
z68>;yq5a1!W^wyJH7xEPM|S4de(QN(SN5B6pVZz7?NQfM?iA0E&HgiCTIEdd0$tCU
z_IdyRX**e&E=_FK`cwWO_)q!qLZ*3#eJYC-_AiP5|8`&UalWXnSx1j_3ctL)ef>QS
zaORvYCMS39$;rt-f4yG+^T*@<mEr5<R)?+KmAC!-iDNT+e$_Ct)IYduzP!MK;cE51
zGoN|q?76vC@%<l}!@vFSpS`m1yRf{tfBXDL5*IZ2mY+U-S$BT=t1ln_d{6M+6;fdQ
z^>ZuBw)4sFcD>d4n`D#Sls&_sQr`ES{?T?@sf)jEez%O^akINH;lG@Z`XPfCKMU;b
zBreX@J1uRvxIEWu%d69mL%&^G9vXY<xcRe?-RYTge*bzKYE$`1rS|u?rG<}=ndIG>
zp&Pv|V0T&WUO^X^bpnTX3GPaC`KNe#(+rbLp})VsFOS-q1-jGs$?56u+wM$nINp1k
zIWX|~{k+@GL6h1)tkC%P>)TyMn>_n-1?mqDzAY5W%YA%sm2ySZ&EFzw4i#5gw|^|#
zJ^6r~-;T?B^j<&w{u>lVpdA%!qqnEs*Z>+3b!Ok&aYA(2JlCB&_x+V%1Ko^zdwagR
zo?hQ9(`=)X7Xf$jzVmL^UuOJg(E^SKDzEaJl^SMBiWtWUzhim*h4Wg(@(&OHI0vwB
z@b~Y^Yku_S=jWACTeXUxpL_cI{r=~FzuylJ5E0xNRUdtIvxfQRW_JEhe?Fi8{P+8P
z<BAUn@Am)y7yUV@<3QrFo<B2eEgCgmmDf#JwR8P{?uc!<x83&F{k^ffTpx6%GH7_v
zQCDfv292VhYvcCLy0^Fb@z*MlUre*Fg#@lkR`;8e@Z-ZnpV?-;;&By?aeJ#C7FdAB
z0J9wqUnueht%24KTeBel{yv*87o0)E=6kEZuQIqUx&Qw88yk~XhO87?mboeSw%M;Q
zFBd~j!6_;&d2ylf?QQUV3t3lJ9qrcN=i%km#Z@dj!#JHUbX7>=!6sJFY5P60*3;hH
z+`QUAN$L8GwuMipmA<&3`0MNI<4vsG5+)e}bFIsdrCt8Me0A>YYiqx}zTW@$_xI1&
zqVq5AsVo+Cb$Md7Xn#-9=~Hv9&4tx`5}uuz3EE$EYisuAoSR0X&$KMd-bnb{eib=8
z%XD$->1jFl_C!KD7*Bo$?<`8K`}5HqWEtpA+~t0AZ){HQ=PGXfQDb+n^11Az$Bzql
z#^l`I)_ZSnb@KM?Z-L;-=XJ#IZ*OnztF`|4@#B?sv9leU*(T1Jv*u8Y+vfE1YIb(>
zRK2HdD0vwKGGbHe>1Bt&^DuWLV)Vp8TWLQ(J1b*fH-}49%b@O0Mbp6rU7!DNsQjE3
zwJqmlxBk8pH#R1N_V#K-Zd$^n434$rQ&Y9Y=huFd+?;;?*=ha#PrlvGPd_<HRqM?D
zk}od;=T$u7jM|ptxjJm^kDt%yXI@&;3DsD<B4niyXzcC60>>|JZcgTtwc1eqJx}ym
ze#PE6w|+U_ni`vl6DNZ9Yl8-;_PBukH#>N?S?-N(xzblxhcC~&y9;#3!jdH_qR)iV
z&dzdudTMIqz8cHZ)AiH8yttThc2;O>;vtjku(bf-*@&GxE$90E`u_gB{{BBsK|w;I
zS|JZE``cgr*)!8D_m<1&XJ<j1THf#f54x~JL_aPDGA6!r>ytAxC;Qv~owBp|`KPbf
z<AWEw@z(u#*dD!2uCufA%d4y1yWj2V1|1Au_}ER#JZ}wTZSa$8VGEsD#q{IOTv+J*
z^6F~wx3{)Fz8+t{H}%{7#~(jDo1GuDvnX{_+SyZmvep%EHXb+d4Q6i#ji`3axwtA+
zdt2UJueZ0hu8iMr_xIP=$6sDvHvekpKi_Wdj~^A+*2RJr?}0WTotUKB&BrD<arT5~
zZDKonKo=EQ6et*{oq6!{`TXTcN4w0g{!>*|1#K$xn{Rja#l^*$7Zx;1#X`EXYe840
ziO1JWJlxJ7|FLxE-8{efcE0cK>|Fn2m(S%T-qZDL{(LxmBS&fmc$jeF>o_MTCi$8V
zjGUaDT5TO2F=el$1O&m&9}pQBAp)XWK{xV&22)AkP@olJ-Nem2r#I<Lb$oJcQ`y@n
zxymPkN=ix|etyqxY)lr>3gOs(`>bsFor9~_?^~tR*>NH@F`UyS&n9mD^sle4$A7rd
z$jr{r(bpH&=;HEZ(?x?VCwNz+ot^dM#Kgz^{qAmVm$v7}i+Z?##xU|qV|SGt%=`cN
zfx}|A-Vhl`;<{S;_SV!16DAbrr_JsydwVO^$e|08#ga}<)2%)jP!bU#!Eg6t0T*Ou
zbkf4;?RlV;qA_~n;1UlqGGbKn?vCVPgAKRerq_#sO%`O}64i29=+ye*_urDYw?bJ3
z1t*^NOFz^dlh)bUxyR^@DpTG6(8&0VX9i!6zu#Ye_1nh6$;<APp0~})5u6y^kX9hL
z^9*QFz3^SRMd=6kgs0Xzo<1bYAy7GWUDLmpDMFo=Ns-@A>qAsmiMg(8xV1C?apRxL
zyi>K>qLSx+GY0qSC>3>Iu<Qm+SCcbqU33|AJM#60#ZQHw?9rB!cKq|3KWXP38QBj~
z^`^78R2~<8vddh~ea7S~OXUlmOg^f$q@yFI@KvP9&Ra?EmBMe&s;K`lq1LwT5#NzV
zd(Iw>_q!x_al)Ost0tQKaO_)ir`J^?@b2wbygJ}Eg!$ds>q<C%6e@mkL{wY;eWh?`
z<;Ip{5|aJ~N*ku@_S>p`y3_sX$w3u{NM+C3|ExhWz9+-9=WRANX?IWgBONSI{cq09
z)161-_B@ckmj6V*@v%^UcSlD|SKC6@b(<dS^8F#d#wBI<bi>cSsvn#4Eq`5leIrO$
z<96eo+NcCsyPlcm`Z5tG4{kS~yf{6U$LYkcCmc@o_FbDRbl4_jKk44#pQ^qp!fem}
z`3LHvdCZuW<o)ZN5Y)PHw%S2v@0;ciY;}~Uy?D4@ShQ3~e!JkK$kYGQ|9gK~@w}tQ
z%roAQKmMeeON;D-8`g(CZq8Y9Oh{1ht4>!;>B(YClRCpXg*9&0Ts0<LC+4VgESsi~
zK7aRyYqe(%GM?X*|Ka+C?N-~j8e1A=usQCT^WwXNdz{>vkMVaR{=JoXVLj#X{PhYQ
zom=f2_r_LFo|7UJWpUzR`@8i2U+x=x5;^xHOd@!;PUYMk>h`A>J}fp!(h{0hZhU-R
z9lJk+lh?7wM&AowOiTx@Mt4aJd=ka==L6&K-QOPnICfa#-0c(p-kfd`>pq;?RDQm(
zNBwQI*Dm%qk(-@XFf%Ae9}C!;XnuZg<K;aJKg$1eg(<yszr2wz@8ph%W7GNEc-GaJ
zdCWO>&vEgG(E83j&Qb{v&sz8!?U;2~I4rsQT;tYCu{hIya6XH%p7=Lj-Fpt_wYW+5
zs%9x?3mcwrOsO%`IWSMz?DNc(KY|&nPt4bQ-PD~VbFN2$Gr|1dk3Ai2Pvow(7u@HU
zsrcjFG-FQXG-dz3p8D)E1(Q1g%AKIn_{7#nwrN6tEB<d5U;e{y2{WV3?6isVxX%5U
zH8cOrp&g43ZcgT2lG@_%+QrSr#OS=)wxd%NRMOofw$v;?$LhSFzri8Rd|~OKL-%&@
z@$FGLAJMSb_n1|W&c=WR+?LX91sNY$YMv-;`+qn~MNrVvTYN`HOlo7YrbJ@u3F9=L
zdn%JJ?73KBeqJr}^ZSU!`xN~5^F5LN%VuZHc6-}RnR%J&X(#4Nv<TRKQT>sl^YZq$
zWD!lrc@y^*{7*|{T4xo@!CxpGd!$t`!)sgL!WmDyQ(`1ON2v4p{hfKh_UK8&8lioQ
z)f)vieXZ@(2~*kD{Q2XXxz1HV;)_0ARnM=NXOELz4@zRtg7wo(lT4vt_PqzBUAbd^
zaX#F5<4z~bVeUs7b6Ov~w->gkdU*Rn*dCQ*C*Rgr{AqgrM4#jLH-SIq(&BR6lGEHC
zWEi>mI9lv#um8=s;nx$*d;By0{J30Yx~=1cE~qOXKHJ(*`N7Wb4{n#dT+ULS^ZWKu
zmhW$W&c7yGkp3q__U__s0)i9wFD({x1q~l3VvZ;ix>NbdvgML7F6$a{)%`cVN|`ph
z=>D~NK1z2b*SUfAC5lY^4{Akp>IHQ0H-|5OY^Q2E_43J+PfpJKylRd9T~WvOKG~}m
zl$5q}9Nwhkohq=iP*p|c!$EfW2@@tPNb2Y~k-D6Gr-7EfxPU7cklzS-1XVSbkRYf4
zw+(r+Wm6xV2Q|oF8mA?&uAQB=U&<^8RMA~IpafdX{$u|vvs@|uviF{SU9kpl7n+}~
zi*&S4wtsb~<6-m5o1X(sw{(E=<G<+lJ(bfPo#U4JJkReq75r}5?fo%R-z6XLUVC%*
ze}?sYjbeE9l$4a}y`JmDEqP!3SK`wv>nZQ;zAxkHVo1En9r*8IWzYX2y}4HQ9rxdd
zAMWnx=s5H!{^QS@JJ&aI3oqAQbm;JBj<aD8mfgEQ;e*5}yBP-_@wV?={BrJ&yLqxK
zV7FFiYHDhlWL=shUvKGmr+IThOX_~rZ?}0k7ndB@zwx$j;jgz}Evmk(;A0a6Rkb$v
zCn~$Ih+53|+qP1p^uCLa`wQKz|J%+k`C<G1QE#rp(o?2CzFv?2x(?LhX?1OwYh50+
z(u<d=a#rx0br&VfK2KZgda#Z8M9@#qRUzw-&N9vZa!x={aALHz&bp|rU0J5DC#<cC
z=ekz!=gu@?c~&@=nY`~|>siL>=VU;41H0s%-7sUq1c%4V%7a!s-TkC*f4uv_$|NuI
z#Ubtb8TVg0+Wjq=!v*nJ_RkN8`(NC6n&?$N%c*ZtZz)TR(>110&F>d-e(f{pQFLwx
z-OQ`BeM9uCWp|j__@4dsR|{NsyS(MO#-eNgxgQ!H)%gFR=2&0$;eCJEfBrbUamKQh
zhhp3(O`5bphkN_&-t*5v4NiuZwl>h!N=)GPQpB{>yKk%|Ian$-MDst>jq`u`d%1v+
z;Oc4r+yBf<+2;Gw>1LB)&F`<%TiV)IX<Tp2yu9q<GGA#YC#MZM+^08fxS3P2H_mIC
z&dF<QqeE2$1$XM?{Yrds<x0pbll|SUufGL<TgbwCwEpC~o+TyASLRqt9eZ0URrKfx
zr|46wfW1|vF?!;0>&54vZ@>Tk`0KAiLPAcjudNML0L9!N^*&i^x39Nfe|-HiXUeM&
ztZ5!QqW;uddib!bM%8<Gg_vrEt~@!<wmM`=wvPWin<-kMulW0CFY|nUZZ6aW>-~Sf
zNl(?feqi!qxufr8yU%V}z4iZmuDga)J`~+u8S}QxEa%38rkG^V0w)`rKm3ZDTsnoc
z`oKwjYU%51flI4&m9%aB4!SX4JvBY>-mCrz-}?lw<QA}Vi|GV7u3NY*_x7Xjzt8`>
z|Fr1h-s*DD;v*5U|EKm;e$KeCLh|SAy{`AN_*b^BZ<te99=PZ(OVrl4N1h9-uTr?~
zu`TE3im<h}So=#~UGZ!MwYHDif4*7&&-peVf54{0DoNhv;jiNyFScd{eLFbQ=HnNJ
zxXRKNuA<&AFD^EpIN`zIHBHA8bOE!7py19md9$LQ?=g9k>bCc{;nMg2XLa0oly!6U
zk>Xzhv+eVJZ|{~eDm#;rV>Y|1NOb4jyrL&37!RxnTwL+xqB~?co=v=g#FLB1=RLS>
z>8)A(|GLHEyx`SwssFxQ&iT7Yc&hi)C#&_<AC*{zPFcMoaIu?C%#I15B^A2SW}s9q
zB6i-x-+y`NZN8j8Uqw{k&+fP@xl(Lz*uuN-Q;(b5o{%h>^0PP%bYNigF1h&HucFNC
ze4ssF;dX1%?<E`R=)CD#e>!jb<(<XpZ9I}rB_&(<Y6JyakG{U%DXgAx=ZjxoWTj@^
z`|2A$?NOhlHtTfXf5vyf>Wgi+_}Sg6=jTUn&yxl9ygfZV_xyUbI%G}6$7Q$nKY5Z8
zSO2#(?cAJ`3mltY+}x~QwmZ*TRcX<Nin%wp=gXH$n;blIea0c*wnT}xs83d#>TgQ)
zt^Ylt_49sZJ-v6+ZahES&VPA*y#2HD^Us4!xU}@NZ|%Ql`uqPJ0^Jbze*gb-D*_j1
zTw2n}S0gxa_7&dm*K-~p>pc>_)-z+~I+0AP7{`xRQ`TP-z7v+uvGtW%%Ew1X&A;wj
zwtTs9&JBY{j~;b&b|xNf<1Kl6>+7=p7nb|XG*VGf`Ebcw|KjR!{aL2j$F}9(F8Ki3
z))W`rEvCE3@9(iEnP0emW_=5J9eX@a`+i%}TKk9Rs&5&UzPd8INPhpHPu`#vf4{!I
zj@(tEslWHjrJJ{J+|in4o`3Gv)@)F(b8GhXXPxTvF8q0@q+~u*I!&VL(vy>4YWDD}
z-s)A4ei$`9b>_yOZ*3wEGctPEzexVU^}qFKr<vppgBF9s55C0luV2d-Q)F5Cc<vp^
z^yfErnZ(_ev8m_$@<zb#X0V)Gw7`RlZi-XRCnukq5|XACfA80oHLsrBb278B+rT9D
zfA0KqlT0+iE**4EeS2f$jHweoM)X>+ul1T)^CQ%1_qk}dr&4;mQusflG`-)H8~*8`
z?;oR&c5OUwyyq{kx-XJH%QXAihbzJU6KBmj^?LpOb8olbU*|9M;K#?upyitR`~OaZ
zjw!vYaTA<4`vC97%@&WoPPDnVTfQ(hJ!abijtM)?pU=MkyW?bD(3TjEQoUVn%Y2>1
zT{q26`TXm@i3XF-&4=vM8(;J9-)J%C#q$d*6@E13*s?Bs_EgqPUpjqxpo&iX+tlg<
z3i&-J-@mri@jU(G@44JMrkg&O>OXH(IlJJotlq8gQXyf^m*Q^8ucmK$?8y^%?QQp&
z&}m$EpBvRYVgK^(@^tB+e!lH|vWtqJpSx3h-nQiBrPkEb(=;{asaJn{(<5ctWm){p
zp!!?RqeqWs*j8@?53p1`jhN=TUj6rnV>1e;ytnH=aoa2YTj`F;x=k~eW`y0e5ufYi
z`zqx;M;YULSH|Os0^->d9-k>wW4DxJsAuR2Xx_8%`8=!IM61K$DJ*C2s{fZblC+4o
zE6$`ya^BBhH5b_)irenb*y8Auwxah$?(*wz{fcfFH@#f`c~3<{k?8rQamtre+8E;3
zaWjZbe!l$ksUmp`J)X60TRb`2k6t?4csedQyu|KQ`TWyYTI4g!@BjT#vrqb!@1$wd
zj$K&j47##%XYq5;EsrJd?nFL6{jW1<sh45$F`l@}r=p<igl3s!9-5{b4H^fOzEW!K
z_T*zl#{G@ltIg$?)O@eJ5gWPgOjit>JKrykl+u7DJ{(iSmV6Wr*q+95?R}l!-8A(t
zF>OoILdE%~8vp#;;(AX)diS+uhBdJQO#xfnG#<ZBVd+@B`_jyUBMYKS)fc+&{W9a>
zUX6lVFYcdm)e)F^)$LOrf3sNxPtLO^X44(BcDc?t-JoL6yx-%v#+wH-Z}I(!YnBPz
zkQQil(93U*gvnm>o(-&-J-45yS0+vPwcT=u_NCKjQZix=J0@(OAAj!tQ|~E?cIO)J
zzD-YjHC1!7vdJtX{xb<zx4gTOf2GHg^M2g-=ieiz9er|g^2_`C?LkK|-rSV><ox{j
z`BMJ_E-&-Vys|=2zV?gY*H>2;`_484Ep`G8T3RS6nNN7VDgEP@mzS^PUTFDRT{Gq7
zr?*qNyJpU5RJd&Z^MJ|D)q8k1T->zc;2lxBbEi8cHkQe)%t$|IV$QtZ^Xc8MbzNa)
zh4*Jx3#<Qr`74p@Ge29W=Bp)6i{^hl-aO+^;ft+tvg)cij&}d`dY7^{yNGiw^qe(+
zrZtnpgq`<yROW3IU*xmORjMTSjY@9&^zGuyK3lVeW?4skn9RRn#-~cH$KO)dJf2-@
zcVIzaD0{+UM@4Dxus^Z>^Bn&sskN8?D2zX7HJeXd^|tSWfDHzrtFG_UGGFsAer?p&
zE-BM2gRCnW$NJ^Zw{QxJi2R?Pzqri0-}ajXXuagL)WA(Cp7*L=ua#a`9_iwe_|dB9
z!vjaH@YP2HH=C&5>N}pcpDXxyt3=MN7mv#qB-|8uHLYBr^2+h@PVut6&$>++x#ORe
z8oW09`5_^|sGoN?Z&~iSAIv|FI`PZ#bRChB`5tREC-hAAqwmgVS1GzI{~5Xa=;>oA
zH%c!xH5~OnSx}PCz~6A%E$+b7yMC`XmGR|#-&C|%-al<p>747KGxnT6?`G~Iy0y5x
zlHKx7UE1HXM@`b(7KG>U#(WHtHj_RY)z|W;TsVHI+U@Q6``7jD2dxL*nCu?4J<oT0
z-ra~j6^5o+S1za@{U5w7=Vry{v*t7H>+L|xV0~s9otb6o4IZ!jQ@$<drc=$;SvR`3
zEzjt_zV4P~qGY4Ni92r@IktaJD78Ml?|WiP+^?VeJ{w*AnXY*2EVumk&(9zK(G%XY
zoO#dg3x{8{#@&m3rS6{d%+=_bE92=r2C>y*%Z;lRwwKp``1J6f0;{v$)|Bqd?7a;q
z_g<BK6ZQP}?R$rQSg?O9vFDz=J}z2(#dn^Y+20obdRrQ~G3ltO{kNdtU`d+_gQusb
zFL!8US`)v2p5I)nmwS$!UgOd!RP+1o_C5c8y$0RzYm#-P<Lqp6(6~EScYXfLOG~e;
ziJgDLs`*6x?~|*y#c3Z^QaYxhzoBC4^pu9+x_y?MVQKtH3dS<;{COT6U*Pt$$0Ys8
z4UVIamhW{mICzGq(y-FUiE~pz@q{Z3H)16lAE!L~RP$17wor46DA&V}L27~v&3@Sn
zxa~R3pf0e%Xnly80ps_L3hIU#x6GzkO`NcPL-R)6xf3dnZa$nRF#UJTiksWdo?pMO
zhj;$|ul?dzcO24>asQj7=Dt1drs50lld|_450&qApZu)cIQdvlcF~>4X-8W)g+a$^
zfwtTq?GgoTCiw87pw=qJvHVUU`|7Z@iz+`qGs(EnAZJ@u@a09IsGG|Zs}w#-qm&b0
zHz<leULE-2oZ%Cl>hGb^=`L$dKG+?bQhsZN1b6ZFUxm*MKjwbg^rOY`{<eir3MDO+
ze7vL_r@iGWIKj{RIQCB5fhYGYtfi+dGyEtZaw>m$4JZ4@%C~nSkEMLvWAg2J@r8X$
zn8SDUPI6okV%8Ed-Em$F+v({)PXym_TzQCZ*7WiT+f?UnbkDVkKkqr?uEKQA(A5?v
zWz#08?>;(ZEu)rbfk3=j3!7`snYq)B-EmnFdeQNyk>U|G_K2b%XLe2$TPdhkC}%(a
z%`O9-X}!f4l<ioi-(8TirdRp+!+Aw&-TS_8xLW=7wylnP;wF(bhojGLTTs8hZpO08
ztJn4>9p#GJo_F@u)zy{n_kMqIb+ve4VBoKi9Z~l~a?P~D*PUVJw>bb^X5N+<u+(en
z>MEIzjuY{_^YTu~%FUdbbw1~apj55$l-@-b3tlZby7BXO#VZQax%uXA`B$hPd(VUU
zj~M%<e+MVavi($2c>Cw870;(z{~x~o@3JqW`LFu=w}~JAKA1Dds&dYT4I8xgt*ib0
z?c<%|^B*7g+k?)>c(?n#9j|#jd)d1?p8x;-y}2=&UD>_w$-L@!pcVV;+V#I{9sGYN
z>AI_<;DNc*KK|G*dCqpiAN{6z=ac>)ef{<C|0bn^v(1mLJ}TQSX;GjcsvWk(W1<o$
zTky$RX=uc$cZq5ryB=SEc17S~(4G%oDU%1#yIn=ZT-OzB*{);v`;Bpzxc<3yvAZL7
z7O66`@$AsrJLA)P)0`U<rfP>5eZO0Nc}HQgT-}dEAz@+Yo`uuzkIUCD@tUf&=l{Ro
zKJ#p57Ct^UQQcoo^qE!qEH!@nKL*lfISM8wQ!Xv_HcmRiu_tbQm$FjP)R%LPdasDQ
zdU7voP6boKEXnO_EsGAgABwWy{qO1C7Uf00a%<(CHX3xBt5s#(QJk}Bxs8q4^ZB|f
za`sG4*P89~$7U+)^xQJh=H*r`iR*kHpPhT~^&^Kpcb@CKySsaO`uy5ypoNMhF9L3D
z&#%9^{pW*SGks#VNs9}by!gYAx;eID@sj4%X&=6xIy@mSaPjBnkh8U&JByjrBdv0^
z;(aZB&U5MqY5m!wSa@D;-ufEx&}F*I4W(zAr7PP1lRIv`e%qr@ajV}ZKE1*pS)<gV
zBgemU)%J()vML)Gq|^N$w8wf%>|Awmact?FMisMje<vONXJu1ae)99QhiZv07?@()
zipv*voQ``PySwb;A#VK*rLV*8RXpa6+M2c0KVNX-Y8Tt?=^JXcwttOl-eYLCZH-s*
ztuTAlQ#&Po1ZZ59{ht@9sop$ilcYuLgubH>UF_Ta{+2ozFY>)5a*l&{xgVQljmd#|
zr!9Er3+4)mvs%~l-z&+TK55b+Zv8zDOTDIQ1TJEEeSQ6YB}YL8|39%?bSIrTF5mxX
z>H2H&Uv4k1xmRl-U;jrhQflkok^mNi#hMdNA73=L`u`8>-IKlU$}cVm6BlIl3cT>E
zb?a`u^z*aY=THCJ#IAld+C(|`P4%AbCUb6GJ@;kLtSRe7j~}1^Zb@NxmW3_*k8h99
zOZ@#KVYc*#e$17ZvNcw3O^zn7Kcm~;C8m4o%F5u%=X1*?Y$^=CzPg%xxQ+MM2GAzw
zJ!O9B|9tIgFD1QAlV2S6i(&F@?pq4R2FF~a(yJ@LjYylT8xJqY(2%iy-!=Uu_hrd_
zj}FZY`s5~cWDf5_w)1yHy?(yvS$6aP?EZiED=g}h_RR2TUUX4BcFM$`zdAgc_lUDZ
zHSdUc+%#{6RQj_<zkQXrPH)mV)+_D)@zK$nyUX=ghp%5&|Nmc0TU*@4ug~<tSQ^$Q
z|3B>S`P}NIfBv7o&M=0YuPeen9ojj;_MOV4N#|rmHg4S6Q1GVggUU|J>XI2dGw%Nm
zJazn*;oO@iCcl34@H2y_r)R}E`#DDWTv7$Q3*&7R`;XTitop7i<-A;ddV0(R(YT&@
z?`@q5Zf8t;=V$qQj`C}<;Kx7RjjL?tR_^?M@%YZoSscstqrMz0y}xm}Yjsz9{LSvx
zSJ&6qf1Bp+JKJn&^!7X%%OV%~2qmTP39+g_viE&C6EH#Y-lwplkCDvMD_@Ew>NUq|
zU*?>`qbSyKCQG?;d%NA|+9|s7PVZlp@%`gt&D-o)HQ`Vp-><N8p{gt1{gQD?MYkm{
zUFm#ZF(GIB%sDqEozvMr*Frqt;!%g`)4#+h9!YJFoP5{E+w8?nwJBVB-6=Uw3L<Na
zb*FaRcHXJn?->*{VTFct!p4GWvpego-*4ebe1A`&(Dm%*(>wOgIQj7Gz14NX@3d77
z4JR)1o&Bh2=M1A%u8$u-hUeau^SAKle|P-SbT@;<d-@?+((F5C{yb;Zw*T_{Mt@z4
z_cu@KR8<>H<`Z9FaeBrU<9V8oe^2CB_`&@B*z~i1xWfC()Azf5*>#df(xPp*y8hDa
zoofo>x|hC*KQ}x7%#o?Kz2}#!8fG1nSX3|Ldj0yc3!8Gih4y(dMB5(ImGHe9VSPKj
z{>Y7`>KwL4t(AebGp=su$p130A(gfA+gFV?`^)oUOrtIx`_Zv*zW(B;50=ePnf&1$
z*RHxxXDWYB=;Pl}u&s=LwOZX_=l*XGvhK)UH1XY4`r7UGw%p3MTd!-x?lMU}*0V7A
zc;BxBLV`P)u5AACyf&2K%9{5}>Sr75nr7f>^&<5Omww%YFMRy+|7M&!7{ljvqp9X!
ztduuDTc@6g)9jeCs&M|qk5@wC=1e^GrP;CIK>qB%YwYEY9AV;_m+?+1H(Ba$`Vzz9
z_z5vb-@J)<m{`qF_IJjy5AwBVZqGHm^PD;BT3Mj(?xPzzGr!Dzkv8|X+v@7rKi}Qu
z<sKa2<BzrXdCI`%nv`=z_pO4F?tv4nFMaOG9!Rv$mwR!xarUyM*Vab2pG^bpzza9k
zNRv^EQ+oXBbHMikv3-9;tC<qsMjz>6%3Trv<&boZe9RZVz5jz7%j2rMp2yEVB%Qi{
z>EX-wADy+i7B^#0&spjJ-t$kLuFT3{YMo`eyf1P0k#H;X`&%k{a^A`<ye_==McUsZ
zoAu8xe>3-c0BiFk-QOE3KL^Qep0?@fcab%ZPiNk(XN{fz?&RfK_6PAltm5BkJI$Xi
zl{`)AMD_Wt>eD%Ia9?`3boYn4uZr6K5fSY52|nBWLe70N-`l!;`-h*micGZT|6MiB
zPD`r`bWqa6!|k7+PLF@3><wz~M;Uxr!g_P(<<soJcWq`qNc|t!%^;?2w)SQ!I|G9P
zgQtsQ$gjwamJZkEsMXnDStI3CGHd?U=shwg@{-gKbWN7@wltr<<><#VucLYbq*;Bt
zO=n*Icy``ouj_>`Wt5er=gdt1wdJ<eZrzWpK7ZK%%ABZ4IVxvwkrQ%|NvH0X6vLXH
zhsPP#{65Pc$Y=Lj&h6gaxP3O2_iQe#G!OSqJw5HM)Y_Zv=jC13hB<Heb9u(rN9NON
z4JZGWTjRRp|DVs?wmI|c=LVfWl0Hu;A!uQuI8%1mru*zSUIlJ+5i=;h_~~Ff$C{s=
z$}W>v-%$Pa>eBa*wFU=H+DiXSJj!=!&ri0+Rbj_omNcw2y80-oQDBp(@j*V%-SvzK
z*Jq`!xtp1Oq+@$Vlm^p$bJgP0=QJ<=uv-<e`$yKnsIaqhT({3FE%@il+PCI+pDpjL
z;5(JM@4{!=R+rU=bauozO11s#Ucez{zy7n~XQ6|qCjK<PliKvXq@i5=*SUS`GDFo9
zCfq(Kzi!^P_z%sF3vcuM&{Y;V=5l+(zb|Yz#WG)teT$g9=Z6Ku8*3?+oC^J0%Zy)p
z*u6<V<J7&aH0DaCfSXC?jjow8dpp|}tTz>ApQhh1alcu!)DDSnFJf=3=6`a$#_Z2+
z#zV%^NBnN&s#)*5-?%$i=Apwho1G1{Gndcab&%ge(K|<P!_O0+(lyV%XJ4$L`+je`
zSNb_Yo5C5wa<AX4FHHO5#=PXWt;DygJ9ifyz4>7Jj^6^E>E9pP2o~D>tKi+W?~4NW
z+qpCD8y~+Pa^P^k@cxC#{dpNpc7hXUPp%UCe`Mx8hx@LH=TGh7FnJu~*6rKrc!;a@
zLxtw#jsWSu@*InooMp-}Sux||bK`bxWr=5{4p|Iu^q948K2izTxUGA4GViHrJc$+E
z0_S3kv#ofO7Uj;_dF$YGzUV&IgNZ@9pWiu|p1z@|d7x<u_x79oGt{Q&K3`B};IO|?
zGsmZ0>8R|Kf`~PDqym`Z)t1-aZ?=BGekk+SQ%;6?uWoh}9C*jk_<&!=VxHhhuIamk
z|G#qmGp$nb%=-KFy^Bk|KS}T74*K`aHUCk~7O{;D<>3<qs@;whx1`*ebWCnubJ3q;
z&Hi<#7`M-#QrlhmY3I!!hmFM&&hsqiJMg+)XCv!|J^2QYR`XVT*kGHevUvLZ11J7W
zd-zdh;*64auby$0mohDQ*kFC~YrAb(YRl~jXL@~4N;Kt3O%G;o?l|G=W!pVHU|S8x
z_h;u%TrA&KRQPFjN!Txj_TGJKV=N3aE}lBtn15;J-W$$ew=E1=DeN`XYQz8MCULQn
z3zE0=T5Gj^irjf-opk(!XXQIOPGvH;H`hI7-n0F};f6N9cYDocIE+e*j{R&qK1p{~
zDBshkix~TE<TM<V?R9!#tUmWrq|?sLu@X=FoaT9S^DnE)xUC_g&gLs^%_-B+q-ODW
z?vqIAC0>sMe!b_LbYJj^c)w$6e8bL@S&vUYJh5vgPt7y?H5=b5KL6o(Cw8ahUFl_)
zRP5OQ9hom-`OBW6#^6zIUiC)t)Y)I2{$>9ABI0-DuP;3b->#*dsoTpZ_%eFu{udlp
zHr7q<pJme4UY!5)d}!k1?fD<7F8FW#I9anKJnq@q7?*_KoN50;=JyG=Z(MSjVOE%Q
zZ=zZHr%Qq||I$>lo3eitTvC$Lo_=VLtX*Bz@9cMfRxrP}uQj)Azjyx1LV-(pD&FkJ
zAKs7XKhW{;%GzzSS9<>vtbXRK=wI4qy23Bz?j-Sd1rId3m(?Ero?Wx1bH=$ZGUY+{
ze)ZiAeZRD1$1)!g!JUyWm))7}3F-=opSvt0mnCra{`tFe%r8A~*z|RK|J?m|V)Sl&
z?SEEcrXj`K?jN<JO-kfoQuwW};rsue_ucvY%*?|p0vFG)tNq1WxQ4|h?~YtY*RBuq
zTI9-w<mR}vTK|99RIuCj#^OZ<|N38l-EmhyuJ5yael^48OeRJf#)4m$JrB7~`FxAl
zNnJ(7pzx7P+POI^{YwNVYCG6=Pd~Y|X|DCzU+&M@;$Cg77ZW+J^L;+MU3ctE@mmwp
zHhTM(o}GEFV3FR$b8{XB{<qkYs@L&B<N5#Wz2`oBsNeZE-DzRxWtKAqi>@qDQZhFM
z9YKM9T*LH<=OXu<IW?{1!rC;Bg$Fv1C^QOmpWpW}=zE{QqtCZy{0`MJE;aq+d*<+|
zmGd{gYH1bbpLh8|&q@EKQK2iNY|ihGyY*4m{=-N1`vFfbCT`I!n)F=zzU$P?)0x3G
zkK}VtFH$U>5&6u%ColTbr?*L`(tdr7b9&z|cY4N8R~MJOtXGkHdU^sTAAe<Kxy^8{
z+~c5|A`N?PD$G@g?suMX_{1K@&3E?93CX|S)$vpQQ(=${|F>5@Pj4{WTv^4*-yi>Z
z{q$*jJp$+4*FV<FTBbgAm%ZD=HS6~CblB_vRxJ(_v$kAave@h0iPO8<*~8B##HA!A
zfmUXSh&{dEc`{XqYsu0ypZ(^Qepa(D=Z0`Ki$)tg%lG4Ta7j7FyX*XP{cCO)FL29q
zv-bojKi*~|-V)Tj=;E&RcfV@)bgVJcdV10`q{2*VdreCsZ_Ju`d#9=>2yigS-Fath
z?#@49%9NJpi>jAaPnz`M#^H0mKhvKrs(tUSDPMiB!Qt84;&XdS?k_SdsVb^`7WzXi
zHOj_m`R()XFY-(%dBox=zE)?#&-(I7vp?4h7pNC1oh){gh&BBvE%z!S(s)}(#~mdT
zo?1!Ocl;B0rmPh{HbLg)o{d+|&%S)=#KZF|N(7p(<sQy_e`<zT!O1xx$JB3EEZ)lf
z+GRqA#AM~ympwRcRPNZse8=%jfpnR|*K+;l^oo_&F1U8bm@z4uDKRtJ&6qQ-@NmJ-
ze{*I&WwE~}!hL%8{KCJxK6)O~>Pp;jI_KDwm*t<Yu87>RY5B*WTU6Z6N;p4u;+s}?
zq5onv*9DCd<K*&+O2+2i%l1hp4jA6Ct<;P?otk=o)A@?5D{@g=H+;PH!o_aZm3jJA
z{s%TS+5D~&Id5|M*cI{lPan*k|M6q9=mx)>B$w}s66Ge(4&3<e@!GU&`>i8Jc`pL&
zBDeIi@p^5@xLD?NYwxK`2g{3UF7CGZ8!tY$>))Z2a_fCno7m=@o3Zif)K9beBVC?^
zEu44s^hxmtU;kHmuyIW*GyJwNAj&mL=IdPTZ}WIxmi#rCx>rJE_gS}nHf2RK9OwO2
z>(%_-B%+g){@O~OohhM;>C10}R?p}^EqBG7UhfdNC1x7ANu&4wnr}^~mz1V{eIGMj
ziRaq7=p%oAUaow*Fp4L7^8D*=-<E_K3Lnln=iU9xFf&Fy?A}+4o40J8)~i=tJo~FN
zJ$~N#hg08Q%64>B+w)DtHom6$IFGQtSmD`O!N)hG)*Q9o+a}hZ|8VU}=N<nSs=r9n
zO>K{yW~Xm@OviM>_j%d=FE^ED+}feCd?WKopO^m_PTJmo_S0QH^-Py$%f8#~+v~d)
z<=(E6NGq<|_scYU`QE1Qs|)`;EBkyncz(blPw#hczX_Eu{qHAKkaD;DP3FaUkNm#o
zfF{dM{ZILyZ#VD9g|lyB>k4MvsmzxsJ;!Gm{b!FcL&TEbMr~`q=YIR}Nk+dmy7iLj
zeqJZuqQr;n-*Ubc^89#zsz>on(@*23#DzhMS5D<_NSr=Bl)YK&*H?|Bhd53j%iS)z
ztaATMo%<hj&vvIP8yz~ZCaCk!#+{ns$4&-mqqf|pJ%0W&KIrScRVjD$tXpzs9=@>5
z_}7~WUtZ3Y-XOZ(rH50%cfPOm%P1>tyZmD=4<(t8`z=!5l#+Lz@!8uCfg2sJoiT`>
z@hem&$IC~pI;Sk>=%W+zc@MM#=4iQvDfIkc)SYp0wcy?RaSn^`eHUtNmshWIK3=Vr
z!4c*1L~K#wi_<UWrRgbMU;4Rn(a~`3%a&3cOS3sATdC&g{55@S+4uS1DK;jtqh4(W
z3)VI7yua(`PA3W3&(2k8hiA+@ZyMuvOg{ZGLsQM33x_wJNj)OIbt3cYH~*jh5{zHP
z^z_u<k6J0OOP=x9*-N-)R=&3lIW;HxrqxGjVSaXtSf#&<!eb@QU-~Q8wA*N(OG?Wr
z26dsk^%dLxzp=inaLB--#)!3p;Z4ffeD?M7_5y$2_8YTJ&HT)C`O}nz_ZK(sO#WlG
zFZzl~GDnq<pXs`e6S|<$n2U|U7dFi7-ZS&R{S+A%oo{Mv2U8!YJZ74{x&8=qz_J!u
zCdX@PPU|!jGA10I`0=;SO!18_F48wRH~k5>PcT{#$zf@banWE3yVoqPuImrW4cH5;
zRR8ktFKls{^Lv`u#JMP~rk8*3D0P{wKXmQl<eMdjm?l~5F+R{Ym!0v$J1(K5@=2=O
z7^c0qb@Pil$k{);H<-=1?5*dYdp%z(Y&=!eesb~}{`i_<w1ey7f;x`J%kodWDfLOZ
zy`@BI>S^wmnm4*EjQ3mqvSi&6bF#+ngLl!@S0}dH*L6Ide`Nh@u}#YztGn8>K~Z?^
z*J`!tzDb){_1K%=@jw0^q9Ks{_(h)n{W)7c`e>^sTv!msvAoZLll?~V&6C@VIkw-F
zyrDd2@p6XtUb(NbZ$3MgWQyk>iEueCu_k`fu{CSOEW2l3IVFGSgq-4pC&>@QckO!m
zODN&(mGU1oTc^*>Zu{i-rQ2qnWzK8=Kb@TO^;I^!W|@Ch&-=Sr>I;iP32!Uj1({nf
zN~9I5>B~AsZWio6o9)_em8uzc<`|#0$JVL$jqlDq^V-q=>M{Rik2dogWZrPI;;ou7
zbG_47^?S)VznPZguW?T}Qt&G5_E~rN@U(k(LMyJxJzTv^{zBOg@i%X8_4zrM2W#i*
zdN%)e-rv+-`TFK}j-2%e?u0Hi2=7ZzFW`;$`8`wU-S?NqN-b)Z+k)2K^_Hosw~(Fx
za*bk!;Kb;j#K>u-wmBXZDPgN@Hw(IFeCdt$ZFl6Gf7GzuF=orbl}{AdFa0~XuU)ET
zSt^g}aTh5W-!{j)hd0*wq<&%K4*%48_{_tFY&&B%yk1gzS*GJY|GASZCm-kJ=Kg(s
z{aoX8zQu06Nx#0lG|$*~X2;)NTP?2VmHgFr@(NlXs}#JO)6!jOvty(1I}`ny*oq&l
zr*}P2_4NEPH?YO}|H)-L3#%2hv}F3NIb#AX)^FZ2W5R?3M>>UPS{5&}yXfk&j^V3L
zoZr4P0nazznDgU&^Bn85kIvjobDylMkUT@b+-~xewRS;zAOFN=YJK-TTp}d*_4t!#
zJpA^bTkPxCUiSPkf8RU*xS~Lfzps6{pPyM~d3f=aFeRn!3UfDC+%<f0<BVrWkwE#D
z=DJYR7wSq%O4s|U#MWC&8t?ZBQv@wdOZ@mNrDfKa?1w6|IazdV7z?b7)-&+3&73;n
z+$3!)o}3Gtoh?%RD=Tu&+3#;yTC`_!o~W!vlVe%#w-2{o^?0w(4lBL1bB?V0g{0SM
z`uXd=t)94k<74Z(gfrQ~H{4mCN{1YsIcaKN|F$RV*{Xue^q-v#6#kKQQET4M1Udd|
zrxrVPb(|1gp7_GM(?ny2Y}i^4^~CkA_aub-Dt>0HY<Ccwyl7&06|)YX`Y+3pSMT$7
z<X%|THdn?W=b}K>)a{0^cZY<@wNDa_y(7x`^YI*;po_=4Oyl^fcu$3uT--fl*56rP
z_fJl^<{$LtW4NUGyU@JyoTq<VHS24q7CvA&{H|`c_5B|$?~i>^Wzo0S|IOOJN)xo+
zI6U5`<>0ibFKhPdpSZT9=Rnz?hzWDsnf7eI<X*CFzee=499_eSY=(W^i(*$eUVOp*
zK5)AEr%T`0%-&}(_n|gZ`hR)3Ss(NFWjubv%3GSfJ0tR(qu&A#v2K1l|Hr$X9qs16
zx81F&>vd~}wPi1-bnPAs!@0o@TY__Id^aaO(%ZMa$;G9O`C&+&{R~CdZJW;*KU=hd
z!BgFD!V}Bu|CY$4vPT{eR;d5G?#eg6#g`|aJ|8FHs_=QIoVn*aW5G+#L6d&>@4P(s
zNT;`ro|00Lrc~R$mLvCT_RRgc`Sl{Fx3}Lsy|Fj3ChV@;B}aXKwThj~BQ?H1F~3pE
z!}WB-!|#zdWZ4onzC2!X?~cp@S7&kEqMIBoH#&G&ek2r0wRc%mu3Xm~IXQU7w3@uX
zsgGq!R=-pEvSNCA7>neMGbeNPny-{}8%$^Ptc&|+c%XKv@vlYma^5HXF@L${3!_9y
z_QRINcFR<qfApB#Nq66*d+F+)>vOcrHKQcV*tV=X*J*h9MbLhh?|g5&kLM-Ky>X=D
zowLEaj5CcJ-%q@jEs|fZd(eEbeR6sMZ*sh(-`!JBBaBO!rinhed3|xl?0Wgy_&C4k
z@2=kb_R-~<gzHQ5HKw!GYEHkAy{-B6NOjH4#e8aV)As#mS^Rz4vTa{2=cm`tR6QU1
z`1eBF7vHan$w_{eY!$aZ*6=s>=citGjj6u!{7OoT<{kZivR=qWg}HG`Pn^5_hV)dc
z_v=4AWarEAS^m0xWlp*Dl(lkB`y$wLyJzV{PB~T|R(&w_(ymP>JB91E$$#1OdlAd_
z%+26A_J}1azh>@KW?-55yLa|;v1-LrOYV5e-+#yFbl`l0=1j+niOCoKKD=@&&!MOJ
z+)hh<4W}GW_U(cR&(hAS*XHhDB${%eT>iqL4IiHguU&a1S7i1bzd30}!Mp4oiq3DZ
z{CCk^a&r>b!uQ`Eu2p5U;;(4Z+NSlT_NVKvEt#%}vAE@#r_(|<mL+P}%0D(=|8%Pu
zyTs4Ea~bdZBtM;!e)HSGG(YAg@1L#Rs9o{k<UJeHFTytS>rdprTeGF6P^VwKd_&4f
zrfHQ`2H!6q7ZCxSUGVY$QSpR#?-VYo%7h$p>y2USHe<7_GV);Pp1&=vWw)nvO~S1e
z@g|GeHw3H{$a-paW5emwtC}5ygqZitdhm3)eazvc!asYCm(AUxknl0sNan7zMfJQ}
zGb@*Eo_BYh!eO;#7qY{wG9^MZOYd3iIQ*n_SJ|{PUXRtcJ}?!ZxL!@!Y;D4&Y+H^s
zzozu@-b<*z1Z_H{b-Rfys}p6PEi~Eq*+jNKGvjq??)B`>uVQ#3c~H;4@lC_zq*SfI
zJy|iwjv9UASaU#%Z<lVt)F-b)xliu>XZ*|dyPia)QT3VRii$7NIlr@%9{NU2@IDLP
z<?+A%{@M?Z4lc>A7Jt}jH?6=?k6FR?E|X4;V}pXc{uhI)eeD;y_rBpXJn>QdwEh10
z<E-_S@z1*3rv>x<oLOt&5%FSYn%u3MN6%TV?6GD^5U88Hwf%PO<2lXWPha7WtUbhZ
zNb~+D&VALJwWrk!dDnR>f?G^a-vmxD-7mYRzV7QU*7jEW$9r7u8@>x24}QS#J|zFl
zcSYWcw{IP<96vi#b-v%38P!#<f*)H7?AUnAPF2-a-UC#qT|ejfpS8aI{H95N&sa*R
zXszze6))kHN;%1sS$g`$!;ps7<4p}G8`2b?99ZCbI6zJ#bCO)P(Tb&$z549enVT|{
zg+Dv0@261ktj3vH*u&R;eY)Y51%85)-G26NJ{x7*-|HLA)ZloZ{ZNP9gJe~+S7K(8
zRrMOBhw|o`X+>|-Y3G+;<~!T$&7GZ}ciq1yv$Mv==*#WR;>%M`&SULK5;+||jqR(m
zgn6xV+WSJG+sl?-UJ(<w;jrd`d6hzI?TvY=Y&=z#Nx4X{DSn$!e9%DU@#{A2&o?&h
z<_X%S`!sm5kZ<<0=U>ii|B(AoX)|HdyoKv`&fIU**v=Gx>G@<)bC=!KHgceY^X~7f
z&Ahm%)ou-FFX!ADU&NJ)ZdZQ%*0X<`>*`%EWS>2JTl)X{y;}B=$Zz#B_4AKD+`tlY
z#pTW{=_^~#$m}ed);0B+{_U?nI?^tCoxLRI?(eT@wkF3yi~H<eqpd~xcOJR1GK!|k
z@}>X%cR<JY^clt<m46q@yesa1UwXIw{LXh@qqF90J?FVxFYTG?-*0mK7uIykPc`Cg
z`Ca|vO*Z?=GG^_pnHf?K57*EC*2tsYyuVB+>bBR@O=4QVtj<}B#4g{~%9q-cu<^sI
zxdKslnbh=ar6LR5e<U1pxP8s8rsBxi+Em>=A2v+XJ+826%|!OiwsYA0^3SHeVdSsf
z^(*kU_=*?KK1Vm#r}SG!Ob9NyrD<a`=kD%u<FYpqpl!R&{B|c+uixiYP*Bhq6aW9h
zqaAfJUjH3A??fv0|L^@jt=-*cq2mhuUbmXF58dBAUue#E>B_#wo`vzBqLxg5|Nl2%
zwe9PlFZKslJUc1Zay_@P`U}S_bGs8Cru*o8t}@Vzaf%IBnBN<D_FdM&KLxLyQ~que
zuzCHn?wnX$maM1ydygu!b<WHGzjh2g&n8#-CU=+gvdJ~))$a%eG|rN(ogwjCCBLoa
z+=buy+8c8JK{uzqy1M%KudlCH#_Zf=x533_onZ6z*I&h2L_hNLEc^KRV?>T(>#_>p
zs2eZ5<n;B<^G{%#-PteqE%oDe%Qeh>OXkN%)z5A;+LwOb@sH^X7AcEal}~*Gj4m{o
z8~>kqMU|O%%BF)KuWUZQLeFIyukO*8Unc+f`FFG8`7K*a*!bnnoSA71x_H6g<|B(*
zFk^MfqT7v!Q;xnm_w0%!d)WVgKl(f7PE=NAo^JYrfBk%O-!iiwzmxyYK7Foi=Ev=`
z3amFBeC3~5wcyFKv$HFIKAnE@%$X;k%LY<UPx~bzF1YhpiNr6iFE3vmUtasCa@DRz
z%hUDJnI<g~8=1uR)N^z(+$?)v=_vTa-BD0M_w)9I8Pi^-Rx9#!g$JLUq&m|ek;!|S
zPU5{im7opGs@~H?)=V{t{LDP><opX4E_AawWnY-j?#l4`K5tk0^tbt@bN}W)<m@`$
zFaLaI`aDG|t6A;xbu-S+HrLkZ>d?>;i>dD4d9tMX=g;;#_w{l%N>u*T<IY^Z*CZqR
z@qZ;Att-4C%l%}h=|mm^Eh_%^^Ev1+M@~*oEg!|nvuB^4VVIn8WrbkssVRzzijB+X
zRdFp2l#f+Xy54;*au3t$YnBmP-!+NPm2>ku9azXU19XApzS?Tg9Pn(j+!q%Yv#;O#
zO-fNwQOl1}TwMJ4g@w+U7ZxzSzP=u`6;i9K;{@-5_0NNzMCypmtN&M7_VyO5Lr92-
zeBBR4z3JXZH5))XL1&xgR(!u(4%+Q<dAUD$zjI?r<LT0CPx!Rw9`1ecZBv4y-fSjc
zo~-xYncPd{%MBCPH9y`yqjF;>>*RwD*Dl()t=0Rr@L5*ugCfhaH#ZI*Xk_;E@kx1d
zV&azU>wfcDSRIOrjGWu~Qg3cb1+7PQm#+<JpY7_x)xh-GGiPJO37>m$Pq^j8(iZjH
zKC5k9^z&wFl1sSsPR`04i+2lCBDv3R-9BOJlSf~7#PrSVy(1O&<$pxl^0!Byp4y!k
zu6un|X!p@>@z3Y1-y39HPyj7fDSLl!t$(xhk-Y83MNd4WOtV~ETv#|cIc@&^cnrC-
z>iKM=m&M<Fs#~_)?d~o+diO!mbN**xt25P#m(LYkJ-5c>`ng9T{B@~kLl_U7p6J7`
zx5sst%yRzq$*<F8e}0+lzcFg<_0pTN)@3Qbzr6+RvMzgj%dp^qgWo)xmHw@-U);{$
zzqIb}uQ&Jh&hC{qH!6DK!NkNAA^}>tZU5@ZN@07QS*H(L{<YYa_{lf$PfG>6bKTFk
z*Q_P|9`wwvO|RE`6)wN@-JJ95)>nUe8y^4RMsokX38fn%bZmaV+5G0_X7=u*PJe%W
zm9($3@mn4|pYJ;8x<=5^eH%A!^qFfl_5HrzeO#hiD-1xDTm8Di$H&&hel>oXegENw
zOM&L$-Vr`qw6jzG9-IAmQTO84cF#ZjY1H|f$oTzE@p<0(`4=t(fL0uyxBq|U(o*l8
zZ@1l^VPC(`PI29)B}-J~>i-m0eS32<*x&YO8?SW6tt~4d>u`Ur3SE6{+1J&Es=6uf
z*DzMtzMH*B@z<T~qr1-^{LOCTS{Kc)C+T<WSj4sUk&cJkc)73ay1F8;xs_YosO(Jy
zXmR^=z1SN&i<jGJ{#!U_j?BHP*Sb>Xc|J?Mrh0gIJh<pCe>CC3Uk}i=F6$ju1}%-)
z{ww{|PqmG&XSWwvzjfUuXtizYMTNtj|K&gP*G}6aa`x?)9kD7k|MS;mp1!%IuC%dk
zUjFZIZ*OkPm5$w2ve3Dm545LRSl#c1@Z<bXReNjxe!cGE>bkN1f1O$Wy))7IdqIa)
zcY$_)|2WUW!m=UdV$8cWp-pe+mmixMY@%<Iaq*B_KgW!{cYQy5sr@S1^s(gZqH@s{
z*7~z!<G(Kndf<6&O^n~kNvhm`tY)801MS3~XIuT@^Jma8o;}j$e$S2LmX*G~c5zdx
zx0=t416wjL|9Cb#zoVlg0DK7Tdb#eShs@)BAN}98J?XUi+YR%rVwQBthn;M9Up4zc
z%A)dgd%4`?#i@}q<mcCG>{>Br?~Lhlk6k-&b#YUjMYq@)y^_UVii(P$t^K0fVINNG
z@4v9Xk@?x#*~|U=y)CM~X!P5B;@DgLeOdheI?y?=YQD2T>7_Mf;f@F2-Wn=vhOg6c
zZoBJzwtb)Cik!Vlfq$+rSK1gUzQ5F)to7&ijM_&G)@tR?XCALM{IvFAYgP2B@b%~R
zRDK36+yrf6ILvRKa%Dx}m36Vv^Hm(m%FO=%zW@KnkH`H#zwiHlRy@9@Fg9<Ko|4k_
zu!WZQZrU%CkFT+sCp&p}anbw!)@=_gE91_n>h`>O=52iY=yJ{-N6$!KG`ls`Y|`ZL
z3hV1Lm1p0u+GwlXzBlAW^P~4$b8k;u8@(NLx29G3J0Bk(p3KZl^SA9C*I%c;y0X&6
z)wS@)hs13;HxJFu-?wsshEkA=$c*EEW7kd94!^X~+x*m4$#c4a{Hs{GR|r?vCA{xR
zpJ9}#6uYZLbJOk0UtcmoE1;J7%sh0Ukr{Lmjh&sHxv%y5$jxoj<El6(pFDD7WAe_w
z-)>+12}+&$dt!H&UEK85mgRbN&!@BOt3<hHEa?rl-1UW5?9ANC%PsADtG|D`Q+$47
z_H{i_Dz_?s7qevh_Y)J9_k6pR4T_7v#crUD4tLo=_o8iVV4bu?C2Gqq_VSZzA$j&(
z&GVF_6=dSRnMqnN+wkYOYsl)bx65w-Q}v!^Q1HM3v`p^B#l<Uwm-|h;weQXECj!r5
zYXRQm{QG_X|EHJB=L-o7ANJPYyX5d$Pzk{r?&ik!y<|`S;+u{k@BeYSZ4T9~YOb)A
z`*yaP@%77Io#;&smzH|p-?Ft}_uZOLC)If*janwF`#<_xWhlYpn)YOE`uTZ1a<;QJ
zrJi2(7&Oo<R#RD7Y4amM{@M}sqU`nSH{5W#vg*3EjOE)moz-)?4)Q%aJJT^CVL_ME
zzgBrrLfp4+A83`HYtrl<H)XfpM_@ya*`H!&+jro~j-OZF*~;FZP_Xq)>7z52N6)R~
zzpy4TbeZ2jneL;D7R@m&e&!Qb^U-x%&do*Y(>gjl9?EKH2z6dhNVrq|{==yQ>Nh`S
zS{qH?zB+Y1^Y4`|kL)wu*H_Ore*5|)SJ(c@vu1hS-&Z@+By$qzJR6aBb3#Sj#OmMA
z&foX&Wc}CY*W~64M(vy#^+W2U#kQ$)gXJIeF4X`0rs1E~3D$RezsG?N)~NsYSzgrH
z#f8hXIPmrYcKMnKmzH`L=WqYL@<JWY(${6d+VQFC-*uH0-+9{oe!{$H@#4@4+%^Ax
zK37pw6A~9+p48Fdu`v9Jn^^tU+}mbRTi-f#w$*B0|8hU;<!Y76)7w?_>Uu4WAMbd*
zZg<E8ZW*hR4!2&ZjU_LGY}o=p`zX10DZBSw`17tz;&61}%DWtQO@7FG{i)PhZdQM2
zlPWjpf>N#`#{T1n@0Q>9UG6ux;$f?Jh=8D=rcl=xTZ2<mwT~Yw)#luopXh%_xFV)9
z>!bFBdj6x1JHFMQUiNjhsEGc<yXE(fzWxeIwJRl*l!8)3UQRq3Zz;pK>;C=jvz+QX
zz8+jTqqh0Vr|Bmj7fmqut9k0irAU){v7=q0!GU^7$NOYK2T_;4z81L9iB;6a#igrf
z_M>;Z!d6ecdB5I$xmYPEH}~~C&iZ-!!It@H$Cs>g3ES}D@+DWUuIZ~LU-o=|Z?B|j
z))Y_#Y3PXE;x(0;`Qc&3yxM1q+iEVgt+`^G_1Au4^)HEN^?T0OxAD%jtd$B|Ew{8n
z^KculvXWBMak*-rqXL2xm7XuN;Hh2cIa&Sk`v2T_emzie(z>%SuAuSEW9yriPwV0;
zH`?p(U&3wr@64;Kt3hqGj~_p_w6=n_7cSZm5du0a#pV8gfvHw|y(8Y9d&QYP*J?x4
z-TQWjTE5?39KJqI>%{yM=gz4a8BN-Lzi#&Lce_D%D+;zAU6l6u+1V{SmkTeQW#j(i
zt^d6*a}`rxe}6Ogv(EZGUu>G$`LEskDE{l)TW>!<zWADttzC*rO0LsBrrrCoVuc2%
z%Dgs>Qzpjd&9ZkK?qRhjHp<68zRW2vAT8}36g25upZ&&yhfdGV&p*GT@No!_prGcZ
zyjK&C#?Q5>oD`O8U34p^bJh30;~7TxO>V#YT4lK*!(g7Wcf_~HPN$~ppP!w-@8qki
zt0OlgG=7`VZBhD41hmre(h|=tIX8nq2QVm|T_NP~@zK%9y-AW+yVEzmUj4h!a#LQ{
z+iyGOzM1(?<me{zjfH=;PEFV6Kd^fJzEdlMm;d;1nE$G!-lF{b`&``JL5<~(LyyiL
zN!x5#fA0I1%$UyH`K2oQ+Dkq1*Potv@|@<yD_V1ppPFB<qN4KT&igZI#*@{2AAP+Z
zpMHN|?JJr6pTAy@-<*5f%&kvmWfFLA?Ac=}MjwA3>92e?ab3>OC9*d&U5zy?jrum1
zCRJPA32olFdD-LFuYb2^g6gFSip~{39<~?mzWYk%z{cd`72j^A&$Oxh<eIIfq!c6q
zTJihi%jHg+@0SuznRFgZ*(f^m?Wd|3mGyGxiw*q^OHO3h+pWFseS2GO=FLs2>-YV7
z<$GWK*4AwI%gcNvjZ!+w1O!2sz5b~-tTWBEa$W24_V(6CjlbXTUz6D{XY19>#v8FE
zLvZ56iRQWSKOQvmgAPd24qta9#VGSR_~P7&@xF7drcP9L-&FYc*z8Xm>p_Rwdrnqc
z6SH$t*40%jok4zHbfWd${{MAKN=iT8*Z;2$&fR|d?Xz>%@0XOnzxU<!b$?mN<zjlz
zo}8R~Wp#M}`hCB;($3DhYjXSVwdnkhm;LQGW?WR_VQaQ(0&V2%iUD1O_wxFB`Hvqz
zvaqp<{d{|OOQ!JK+uPGGFY}$E8yzMKxv}xzsmuQMy{DfhJv!3qT4h~aTny@EfVv)g
ztG)(-_9`D*bmHlWbLT+!LjQSR|9`RfbUoKH>-qKnD*NPYPi@J(eB}jbH}wjm6Zbb}
zUDaB&cyVWc|MHZRlU(zDPt%Y0dw*{)=p5~Z3m0Y;fI>1v^2z(Er&GgkY)<DlPCKI!
zbbIQ=iH>)7mHvFOxPQm<dDT}gfX&;W5w4`K-|yBd1!{f&`tmY3W!q~|Q!#7nr;G0L
zD<`_TxG-$?QV2hE{CNAx$?BI^g=&M2>2Yy!S$KchOyl%TNk_Rr)drgw=r{u9?oa(a
zlE!VV+~S`e_uGrT`Yo_**|MPZakk6+=le}o^S!b`Nl7VPGL-43dR)ar))+nU_j|v`
zExgyKKEGxXs9)n4=;HE(Nh)$m{Tt9Fad~%UJUu-<cyroWr|tjd*Z;G8e5^P5?yk}Z
z9Wm1t;3$pBonQA$6VxNizP3g)r1ABIh0dUDAxpid2kkD)HC@utae}o(B5GZi`n-y!
z=)9e+=J#thUs$vG_~VBY-Q^x06^{=I0Bv+->D4(EI(gD0r<4>G(1n|YkB_m|)!wgs
zE}M3K-q}s5rwydO?)7v59fY%Pr=)S(hf~_?K}R0_`SXYCP1U;_8=J*+qc-H-wbBY(
zv!YEH6bn;cKYiFPuco2V@$vC-@e^N*t>%K9R`>VU#~+XTS04oL;eT3XRs4*n^!2r;
zll|=$=HA}Mn)f%Z`mO1<{QKwDL~gzk0bYz!q^uLWYl>a%uMMTI!<?L)xXx68cHQUQ
z-6be5?;jB{L&^*6w7e;d%xn{;O*<AIUmL1$P2Jjh_K6c7$9g24udj=}>HszC=G(GY
z*Vmulka)P_K_h$Kk6)lWkl(z?+4tv@w`m{fbb?ENou16vcrypI>>p&}lqoI8A3ywh
zJ^uN--S783JALldDJ2t=DeL$DlWG8ULF)h2Bqb+bH2`hVZ*~)VI;;9|uQ}*e?wK=Z
zzPY)%ea;-2OP4Nb1TH#a8vSgJWijZAhkg6@nPgw<`SGJ-na@n8)6;aXD!{dKb8{!3
zon?CD`0>e;Cp#x3D0olP*}3fW9Mf#EyzR5!yveEi^U?j>982f8xOq~}E-p{<FP&Hv
zV=O5t3EKIXb9dL&H*em2t}Fll_q%cKtu5Kx-Wa8xD){r`<L5ZgJuSh@{c65m4WHqo
zW;y|!jP7{sE_=Hu?d+@>K5BRJzE|$&=Hz^MWo7X7l%1d*m2SOKPo6!S_kGvZtfQ~L
zPMSNn_hwEQb7x0K44Z7*wCrVWy;7hnO|42_{dst?zOu3sG!{8ar~Kj~*ZPWUo9EA$
z=ePYLkbZ7XASkT`Moei^Qc`N-l{OO)7H&S9X8nEF?YC(U54B!TnR$7+zq-0QJHzCY
zE(r;sJDU?99qIh>>(?sK5f=YI*1dkVe16@c=<RtaX=#s2tTHbx;nWUaw`bMqg57sn
zSXn1dp4>ctzWmkI;px}bL{8BT_v2MiQYvCK*`#sZ_w%!}o*o_#*6n_GC~NDctgBiw
ze8)p$kNx`k`sB%z2h%niCLCb6wmyFS0u3dlPy87v8z)5XDNszlFBkmc+~M2xFAS`O
zgm!%1c3VSEsOIXk-P!NszRB4fnae93WMi_V;B60^@?*PyGdemrN^=ASH4RFZ)hm9l
zy;GiYG+)hXkI%xW_t(sXmuH75DwqowdtD2iogZ8;&$D%PVriPQSJ}c<CtuXINA9iK
zdT0@-fIW5f>Y@~lw}-q|2(;cgWPDSrsiQ;00^9M>xKF4+k?FFrJ-AfExJA(B1?!*g
z-%NRX_{8IUB-Z{sJ8ye6U*`OO1#kCHG^v~ubT_N{Fl(Bz+ktM*W=@;P<sWZ(J$O9*
z$i^9hJJw$_@_ArcJZH`P^vORjuRE<DIk#H#XM%N2^S_(FIyyQ;M5Q8iE;)Tz8^!m0
zo9mJq8}U;u$)<1Bzdm01B<1(x@P$`OBe~D7>YNiZy)(bU;@;lRmjC{-1gX~@cxk&c
zg6YypWsb%3f6D7CmlhiBd-$-Rfb+QhJU_jsnjdcjczxgbc+>O8clWU_|2cnsKRD%z
zFzvK3(^YzVx^l<2cZc6H`G-lJYuUPFzkT-|<I10tr><$(8P)pvUZ#d5N5je}R-XO!
zwu`K$trBnXab|dC_0?d0?fIS4|1uvCXj>VhP<i@sA@e+8_5$^H$EI$WJ;$|hPQ;d;
zJ;pnyo9^HL=-wu2^9OeOq{|O+UAiCLA^l-x#LqvK;k!i_|9?MSylv^XwLfpawC(xa
zBOJ4?JJ0@~=!Pr%zW$LtZm&?E-nF9cUyXiXw4mU`LoSEaU1xqbc=>q$jvw!X(kFj1
zW8sl;_E43M%jP`u?yt=M6844uB}ZPRADa63*qii2v*pfDOm|QGd)<1<nwjqBRvP?S
zlWl49B6-E-$sZbJKdPxGbMdz?&i44FHg%H!*2Cr}y2KlNg4%UXolh0Z{`ZMFD)8A<
zu_gbqIreRzSr@&&`^@_rCwiyU{l6b!arWem^&4#OiPzhom}Na@OX*jQ?`h&O`)gc2
z<#4kdm_2v0{Fiq-M1>!mI(P2>&ZEXtDi_$rwR3uR%<R$3FVI@5`tYyRqZ?D({&zo|
zBlCwP?vuffYf67MYfAX4?UA&$5Hn^v?3^F8EHtyI|4CA{VZ)ajoHKUYxdl`R$h9*s
zzRI=Z<ez`CerN9dX}_hm-$25B+peV@9U^*Ck$Yxdt(_$_ajC`UqwGucZnPd;Z?rJ5
z-EylYtNzR-|DPuc?Z4foxWueZP?TfWU$L0m&nKAW@a%kMaHj3m6sa4m?~nOge7n`H
z!q?mU;kM@0KRlMp4|uDoC)+=GvgqfNXHDWCUVqVgYB$H@=lA&sjz6)JRy)7z$R98J
zN5?xwJsY@>x5az>%~5@DY-6gqjAfJca{+a`znb>)%yqGCL1y2$mE^h(?ab#rZvR7$
z=c(q;jN&`e>4i0qa_0OH5V_0!|JnA$bN762?RNIOT_@A5-@vKA-16h6|Aj9;`${zB
z9W`Gha&%T$Yl}rt-248!xrf6HCx4ka`y<oN)5i-xOq`gKK7-@M9ogW!>Hh+5KeC;@
z;(TXE$Du_ca?`A)GJMFr*1A9|&uIG^^<sCUb5C>%mL}OhI=DBFU+&HWUzKWqHap*>
z4vDMHmlxkZBpN6kFZ@K-YL@-sBk?Cn8!Rmtwjbv1Qz<h0<Mb@t_~V?nIXCa`J2$c3
z_S{4F`$r!d%)QU3Z@E7~W^>&kXHKW;kYgdMR1%anyp5f)_;&K0{XEa#r%m3S$m+LQ
zRDb52e;G38au%dGuW4B6qoTMrU$y*i)ww@grp@WKHfXu$*Lpr_W6n&UV{!ZmOyOKR
zGvoGH^Ut~a&*aiCK83Fix_OoPN_A=<EDjc1@ak8l{NEtCC)#qW3FDck9DDvg+A1n2
zsF_nD@k{N|-Vd{zj`G}7o>Nh7GH>(xpIdfv3;(Qq=`=Ih{n6X65~{PVEjm+we4<f`
zW!!-nC7v7nPad3NJYzSZ{GrX9pQrzSHQ16adz}645$=fkxkt_|eLnq{ZhGH0g(WwQ
zryQF8#y0bZX+_5+J@fQ7eam`le;wu0DeWERy91K0czlwRcbRvVy<qakqaH?mbMD@<
zDo?cht9yLevjC-Criy#plb)Ft>4(`kGaPN}RZ;n(;QnjB(?`za^Y+s(UuUTJn6kS;
zW>4N_*X-;k4|dLXb#dWh?$x<x(v`T4DerpzN8Oh!Y|g29l}pXtoFy2~_bRU`l(eba
z_n_=)yLB@^+kwe(Q-c=IQ~N%7q0F!IMLbguoZ%`eK5%Qn$(wy4oc9F^RNfqUT{K@l
zV0G9bmWsK{>$yvBM*jT&&nowp{`5KW4#)Us8?dxKE-qqSx%k;*Gu9-#&2!JhAD(~q
z`F6hAAK8=A-yKTO^Pl;j??A;94n3b1zDn+B7lmbFhEuv<Hu!Ho&RJ07c`4ZT<SEAJ
zl(V0&-<@Zl_VVoB$qTsZEFRS#iQQyp7j|I%jrE2C!5ja)khri#YDu2@=M$<~tNS#R
zlv;Z-Z)`|B9MX0u``(+IKjbZ4?j|`-pQo`vP__NP^nM2Jwh3N0R<TanoTWXF+1<d!
z?9hHM*{1i|{qsx3J>%1ZSEVca*g4MV`uX_!QqgM+@ikhr3)`=#E=g6n7QH&*)4OL^
zwz#IeEqzp9ZtWCxudn|2H3@#bM?ZcAP05NEUTn#6zsv4P%6+ew$D<RsNf?IbFf8Bp
zz+#rXNbN4ZB`%M<Bx;)sZu3nrdwf9t+4MgT7j0%Vkd1zBePWhH*PSVQXUr-)w)c;5
z(YIyJTlJ>YZPXU)7A&56Ov0A^<UHB-)lu3^S|O?{Dh)5y`yE?fRm$}KoliiB!OO+-
zl+Bm0s#bqHviz0ok7;HF&)P+rqOTpx;5+n${TiR_hSpyXgLS)Q(j%F&?l^8-Hq}sK
zZNzG`V>9X`=HCk4c=sOL?bz>v5jHcYJX~Cm=l{n!w}0=im-24q_4yZ98ehr2+jeVf
z_Uh#B4v)sf4^?~PrY`*O>$-5i@U|jL{;As4Pv<U3Vf9gb;AK&A^s?0IyIqR4{O`7`
zzC2gY)ZY1|!=B8UKka|8AGmq(<mqSfZ7Xv$?zP<A-}_1OkU==_(YYIb`yZBRW9WJM
z=cCH9C-;R`xJczWo1fZ$w>3C_uMgk89r`Oy-(UImz~1)CiYpRw6%QDp4V-g}9{5_f
zXuqBB``YtHZRquvL7%O~MIOFqcWrnRa))!J_!$$?>#gf!OCCm62yH&M_xN14J7Ge~
zYCQKs?o`|UU$IR|Nr@G7gW}F0MqkeA1$vd4%Q<SQOpd<OSUjtv!(-y&iLMj1r*3V!
zA|AGaPf00gNoESDB?vlN61>EU0EbKRZu&-!h$%%v^_ypH+HiZqq*I5rnjx*LFAnS5
zElMA;%<gq9pKxQ|?9;n^XJ_7f(x-HH+S=9Emt|gF_Uej~QqTg4J#p*9QztaN`S9>C
zsKtJ`oqx3iXt~$SbCDt>9NO86^Po@4kMk_%iCcetwULM8<QX$OK0iDA@%#7VZ_A25
zJ@Gtr=n$x3%*M7U@2(Z-?n0Nuz`QU?Nl6jyus4hLb8~YmtE#%X3JD7C<SUVg;##s`
zfx@jVnV;LQ=bA~c-}9-9O9j+d)-d0_e16@m0|yR#j{96;^X1LW;G=?qf;;^{de+>z
z6XWFUyz%zi=W*FrRtRQaUl&^F;^MMSxL3!k@#*W=-pBi7&;Nb%{{8vO{`PARfzLea
zdQ$zUQ~jK5SJ}>(lK1ywL7@{EA#(EiuF}`V|M$OMw_C5W6>>b<Dc=(*Muq?XerE<9
zQv(`JQ@TD=uVA9~q<QnsbqFfo*p_?SH2&Jl%gZD8*V&feuQgYj{PNa20XAk?>#`;J
z_xH^>n+7^*O+!aZ=hXd&MLR40|NUP1`K&qU<Q~)PYkTxgms-d~ti6`KPiFh=vt6Rv
zC(fNqyR)NkOaA@1UOmttw^nmZ-_Os_jS~(q{Q3L0v#)Pi&dp7(WzmlxKb~n<Yjt&X
zxVo9yG%3@p70lpv>4~Fn?(CfW@9*!;>F4EkmB07fTlF=s>uuT27|>0JtlVNB9yIeC
zB^}}D64zhHrwFn_i9y|eUdo3D2Wvi`HQ$nd|KF}T#t{Oksi}tP=VZ)sZX9@Uu(`9d
z^I+1(70qBf?#!9vr@nFH#vTd7rZ!&bOFN6xnV6XJe(h3GRTUH$cXxAROFrJWG<dn+
zD;-E2PTyJl+$iAyLsL^zM_=EwY0-IsfqI}mHE4ihs#a)6Z|~BuwNYB&!&tp8Zc9GS
zmz0!rV|%{*#fulOtPFN{{VANcefHDS(~ax@)ht@P_~-fhf6kpks-Vfqz=$b}o*wBG
z-t+6#>LW*wUR>zR9<?Q-FnHedilv)2Z=PwK&iDHI`sY`}<5LebFj~zI1dTl}{ZMQ*
zchC2G)jcwnliYfxF0Kq#|MT~6=nU;CbLRNO#K^pS`4Th<r5C$vg$CFYt(VtCZU&9O
zaq{vmt^WRQie|8z>rSt2`S<-|Vq`?M!&J=7!rDKB)=&j<_2=H+c5!{YJ)f+VOG$~z
z*6izHf$Km+*`J=Co@t&hcXwB5@{<!2r|3imfsYDv)tqtZ(k0NL)%WZF+k#S$Yqyw0
zubb#IsnaJ<CZ3yP37TMWY-R%u7}eC&K>gjMsIT8&e!q5l;9|GJ-*2~H2KnO8pO6`^
z-~9Oa*k``o+$~#7?p43H-BtE>73kj9LyPW2fNVH#_ghERdzy-s)vTbUUMn@sZ}iDp
zcXW3zj@en%BX9q2S!E8mFX+W6Bros3zwR$+>h13C^6R{dxh49KgKo1tGsm)c$M<{H
zlD1VQiHV7jJ3~)xojPmQsnh!V&%C+08FWkK+1cjTD?3>u{QUSnKRcWJ_0`oqUoLrr
zP7#S%8wNRE?dhu`3mMS(Tk@G1hIa~&%a**k5%^qHOi5Ey6Xb;{Q$+67evjQ%{(c?A
z3o){nm;0wbIM6snE41s~-QBPE9C6CIzAm=r``z+8MW=Op<n8CJSfL^63X15br*iJ?
znOXPu*TgAPT>9l~R|GCTW*XV|`HN!Rzf|*x17RB#eOJYQc#~W$q+Nf@OU=(|waL=J
zsT1ta9hrP|-q+U=yVd4~oiADbrqVrX?wc7O)n=dmw)A^$<C~cMeZ1mx-^u55{=TM{
z8a_?Ze);4FTD_S8sw^KS>_}y`?th-ZvGUB_-qlG555G*d{+T*^^0n_SiNEj0oO=^_
z?9k-U$_1W3Ojmwi|J?Z8T<ht6bFB=kzGQe!*IUb{ucTx?#V<?SrBB}8Z@yh^#@$_}
zp{qg?KRh^iT}Oi{qDlM0rl)J)L~olD8NcSylIMT!%m2${+B{wN@rwAh-R!Om$qYxE
zW8Zk#6;6zwr4hDLb!t=XK?#S54G)Db$5wjy+$rW!7hC1xHfj40_GxcEF8#H$)Kn;N
zug>Xd%LA-8X^G8|xwD~3{(0@uRmwZX-^ln0)kQVcM?BbiTe8<JYD>n!4-XGp7CvGz
z&ARen?e=?z9u}<l+zsk2{MmVNu{-GKZNE7dAVuqM_ipVCldoa4(Z0FC{BQY9&DPM<
zpPYNvfBhrvYb{&$=G%QHzkrPb2gD}}&M9M`cfWN1hZDOqgKX{hu(GiJF?@IP?G4%e
z{ww9?$Q^im<Xu6W!N$jXp3b|ma|ZuDp972ux~{K}zdJU0OM~soozIt978jU@T5q?q
zo|L&T>2Mn8kmmP_*MENACZGJYk>k~L{j#&!d7E!`^!24(Sm1bOp6%>YQ?-rr@7cU%
zQBo>$o)OyAq_3&jd9az?IO#~o>-yQE(`R$VRt5e%vHeW6#Ch4P(E==UZfulFZvUNO
zQm^*%dC}ClvfRrze<~?FHQ~UiDP2cwuFiZEcVPGAa`ydm>}M`i>)-Q(;c3zLjW2eu
zTynj>O?}o>`}&>pjMvQEK0)P=N6f~iPqs-rc2{t1PCL3$`KI$zBe!qr)jM{dSGaYR
zN3rsBP~DBWz5d6tw%xM&;B?&JS?y2mxIHzt$KCq-whPCYcDd=K9y`UdXxFu)9dpli
zf4}&qlkd6yFQrtAVuN{CR#<D~?yOtN&&~aKX8OFuv$IS=!;OdA`Lojn1Sh`sQ|@kU
zIR04C!r}+p-Tw2w>nx{K`QCcy8FgUV5gCQap_kv7<OekFx$ir3U$NojdQJcN?6)uf
zJSb5$HTwJy&DTFoz3=^ge_hh{Q|PU5uY^lIXCqF9Ezg^~=}F8I>3KJH&9GdTwI%QN
ztiHLcH>$m}Hp*M+X=<;$?AAq_UvE5;tU?|y@BLknruyNYd3ENsg{HDr_u8v}%Y-eR
z`e>e9sll|p{q9!x{Wb_1Ok3jF|7*v!e<$|o=r4JnH1WS|2#>)ZSKc`)Ya-k~vx(-P
z-~-*5QIfS0)VT6$d|JALv!`Q8>%kL;u2x&j+|fG4%vV!3;YVylNm0scwo_#*I=<#y
z*NT6wXT$!j_|#KYXJ4%;ADNR47tJz1Ui?7dr5`urzHQxYo0X#<Yo-<#R9u(a#4L63
z_ru>BiD%!ho4TqsnbBw81KX+IZnq2>g81G%bl5a;o^5sT_9^;u<^LW&yDuN07yO)0
zt1>uZ_hW@>^W0^wuciJxO?Y_I`NG!ItgDPWI!^eyw=aCEZ&}FPc;{O{@HGFQZHBYG
zKVNz6dvNBT-79^5h!{`UV$6Ab?!ySi{SO2lJh_>#va{m6-2JVh+y@g19;P?;bn9Jg
zR=)0?aZ>9_=bx`!Io8WJpPRS%MbUq=l?Qu{=N?a3<@xu-iQ<F$%3n>=_lW;=OVM?o
zy)AU6K{C_r-tteUPVDDvNas5?IqG%UsTcEZDk&8SUpleqkL7Cqm(i077*z9eJyh7v
z_}&h_n8q-flOcXHr$La`9=myecYADozI@A+yV-{3T}xUN?e9pQ`?l@L+hW7*`=%-!
zNHUu+DUtm^QCVGD_tZ~YwKl%peKN;>Ugz=3S*DX8S-dhzVlsH{pKkJ#NkAZkX??a{
z@pZXlos*yFeva6n-@Uo}=*umW6aT;UIl{#|iy`3ozF#cyI;(O-EM71$2pj^P3-By0
zNh8&6ckvVXro19|*0(xJN=3n-G3ZI^+MP>_pKpA8!|vJoy{+qdwF|@BkA<ub<W!!y
zX8!gWb1Mwj#GTo-LC<8><%#aM{geO9zJFoiR{iKnN|hfEO8wY1X~*U-GD-XA`M#cD
zEc3vQt#qBswT*Arblh)$eL(Ws)o+J4<O)~tZ+JC*?Ut-76V0rXX8w7{!EnpgtRqTt
z_m8aovz^;?-*BH#d%sM!V$!A?FMFO{<m3HObBuk>OvY!o<&!e57u@MI{di{H$qx%N
zzsTR3xgdVOB5Szft}xLY(^G#EXK(JyO~1&0`lHI!^q02;-mQ7<f2UFC%j;ktStl2l
zJg;4v<%*!Arxq<*Bw?K9^N?evA3LA;jhyH5+e@w9>@Y~)9QRN4C||p5PuLo<$D+E|
zUluf6JvBA{$eV+S<;hzk^CE16gM%yo|NZXi<8$Slf|Anpxo%=lR~bjGiP4FUT(fPz
z?5#Cnx9y|W9sT9+Z(@?P^(DiWj*dG=>aT|~fqEN|&cej~zU>R2<`-?sf2;rI^rxhQ
zoyP1sj;b2*d$~fr*#E!XS@}jYCu(cgS@lCN|2n9!r|miUY*X#ayF1RLu07qi-}LX(
zq79YDwpYyhw!m@5`dKk9E_qo}kyG}cbeei>`@`LD-KX6;eQ{52u$HLxN%i-APcF~r
z|MQnGgIo1L<rHR3y`CA6+U&p0f>hbt{;QaGoO$`*tlG$a?dgRjhOD5re5>nDCc7WM
zBv(iT@ys@D=$cqA9DDlEzq7|TO#WZtRPd<aPgTA%f7x!?=dJv)-z+v*D=Mgbi7Dsl
z-!oh7s`I(oXC4;Jsa~(1Y#*FFeZu39F-cYY-`4275!X3W{4KrxZsEbr^XI0d-aKEe
z`}a-pf>Pl;K|w*;wrTZOs(qd_&9c7UeZ978{{EjWOJ{!%Wb-g{6Z#y(d`<U8i|T=s
zU8R<P?ibgtkbUNz_Q*=DZpZu;)l4((>$R8NntIZ1+uWV^eO34VGq$<7#%5lR@|nMB
zjCNPGi!1)|hrAGW{_*dp%m=yDLr-?vT+nIy%e{Q=?0IXH&-^Q4YqQ#Fx%CJ8r(6Hz
z_e!&GtU1Or?Y+mMu(i=QHY#&|e#>vPC0pd$n!h!luZpcQ(K=gx|IOn2(zP1trJM4P
zTFts&yM3Q$C#dv1CHmR4;K2jFZ9ionKX@g%#wK7=-1%)v1?Nqs9dp0*cIvkRqvpTw
zYdZcfK7VP^8u8D&Hu}unI@MfW@9>fD%P&{n(9pWg6Joaf!#_9uf2pg3xBEqkC9ue>
z`Ta(j>0NE_ZtY(a-&A)LYWEv%IsJA+k??uDi>*O!>i>?{&AoSSmcPBy_N&}?;?-GZ
zU3+kP;o}I2Z%n#U*AsrOGl;UiFq!?bx#5$&PnVQ+94bo?zU_7G!NC)^rWEhoSiR@@
zJm%W}i>vrReYQi3Vs^&pv#<oLiIA8WJ8!vfL}8AbhaJx`FHsFqk)Es54l}KvzCqai
zMyk2+zX1Jz`U`m8ZHbjJzk68MjG19s@!ktg4eNRDG<;V1cwG5~%sY06naBE#HyJ)r
zThD&rDa)Guv+qsxtJySr#iaE2r!#at&z@4dmh$9Lyrw{MrnKss2f=$xrk<Fep5c-k
z6TCkm;L%*0Ynx54=CGZ6bEclJB(r-~go_JH$+<;;n5Ukarmm#a)N$#j<42j_H|uTl
z45p~9N_19IR$dvroX^yI*2#~vK34fpEPm&5@V=f(;s0Oq{10qBqK@qhFSxb2Aa(=a
zyZc{L;uUO97k*<Eu2b!)xclx;<kGk$pV>nnZhby~(av06;ro@%y(=T(VbhzJ%v2;(
zS^uZ^<oE4;sk}-}B5FQS=RDilkDrY7`BlaJ<3~fftTo>f<_S}$^2M>2@kg9t3o?3>
z@Kya#!h_kBr!A&ERd`)t6MyDj>%k|J=d0TKZ8CbZ-#1^=^K4XF-K`1<bp}h7^~Hg^
zV>B#^4W}5*fBPoyO+!Dc5r4kw#gNW#{(sxqvm<ZptmoI}cL`WOXCAYzlF}#b%Nr68
zx2@!1jdpWm6ISzC;5*xlH6hnVCSvyl)zy3xx4h?=_aJ&jiMIjUfysp(M>mTVnA~h!
z>XLbCI^W`wtpeZlW*uFWYQHJR)JEWP=cR*7SIU&kTED#Rc)xXBO7I^^o<v5L%Ay;x
z`)4VCJgyO@+hzSu>-@q<<H#SMjq2jWUFN61Q!}~1{=5J4%zb~{a}pP-%$dE<tT!*)
z;ohfC9iLA{R{z8|y{~B4CMu)zr%tM~ldmhBLpk-a{~F63?D-o{h47zDd{b>NEK<`o
zm-A>wfa>I{-)7ypzJjNX>B_&GH*bOtjca6P&jL5R&OP{AwO7meVA+=!fw02-4nx0i
z`{CA$W&buWwDn{6(}~b6c<GqO&(|)!`fK_7FBSzmbR%@d>-#U~9Z%ktxOdHGzPUHv
z>a8)8c3aMDEtWH_$@IazS+h?4e!t&8DQQs|o1oy%cO{M8G1YF|VY8nq=|}&39W5!f
zF-o5!-%b0P{mEjn*>CgL%wfH~{&m{Mq~L7Ooa%?^5ul+hlym8yth#8~U37F)UvB!*
zZHpsx#OC;^Cnzq-sazGl+GWGnPy5?bWbYr{bo906RjpZWsejkKHnZ3qbYn}B`9s~=
z!C~i{!~_L@GWY7766My2o}9HUS?6D(_SC$5x68LzP1^aZ*o|-YF~j5cXZU{%no<5v
z>AThX{hz<o)x~X_A$hv|abPicK;YEX_jYFS@d_gKv*j6&DrZ#2mw(7ftoJ(iK4>QM
z?!SM&yePbJa?|tscGBJUic5QE_@;fZ*kC;;#=tN#vg};-)yJ0B+1BT{C(XN@cl_Ot
z*FVxZ*ZHhIm%DOF8)yZBifr4o)t4upyH~6B^MM_MQ}d_(mumTur}%eo{&+ItY*;jV
zT-6HAb$7dG?z-A;eOe|pG%xwr_HG9I8ymCqZeLsXm?7@ntG`NfUrkZ?{6I_XoZXq+
ziH&FC3MMU&d+YY?*H53=J)K5VwI}bXx0moLu0JDeqVYKK?4DMYhwhKMM5nwLbiUs9
zuzueozwJ`~E-p{}Gg1QoDRNhoZ_0Qkv#;*GXwv@em;au8zCt#qX${N0-)Z`1t+p(4
zKK{EPfBxaFy_s+3ZRs&8U7T@ak2UX{<>zg^W<9X7v8)oQwGK&sI%}^*!KU}qf6jQ9
zadz%m>x)}UnA-hkuPBTbNp1L*TV+)BMPQfPJQck>ZqI0O<Kv;d*K(sTZ!9*wcBJ;W
zV0)AYWO0Lnx`cA3OlV_IivF`@yqOV8MHnnIk282!ZfIvd=~ngajqlwc$B-9NM~~_M
zKdjEZJ$Uuq+#3wxebb{%Pbad})F|3JMs-yED6N|6m-w*fWX=3Hhpw}TMKhQ=tFCHc
zXJT|Qd)uL2X>Dzuzi`UggAZe?Pw-{9yl;QMxJ54teCEH-t1eOPw!3-HZ|Cn<RaamB
zVC@3){s*5*Uwn<Qj@j??X!ln4&&f+}cODFx{A=Pw7qc9XloZ*f&u^yedK<@PSEhgS
z^dkw&5;OJl^Wt6<#aGN**v@S4`*`lvvZ%7^2`^5i#q9t6cKzhpI%x(X$7erR-I5~u
z%Ee7cGP2#u)cf&?BC%K7?Cbtm1TXVh=+w$}OAs6?dU^&16P(-m1m)z;O;B_OExO1n
zKWK6NT*j}vRblxjg>zSL%yiSSe{bC!AHcb1-Oe+y)e`D{leV8pK6p=f&EGuJqpQpx
z-c0OoPFJ<`Q%>XAap&0P&tJ~|zg|3P>*wv4Z8k}TJ<puPU!m|m=f`KcBk#`Cuc^HB
zZtINqYrNmyntJow-IQ(e=Z`$LIXla<P^R?n(Z%O|)eiVMS{EByThGqCyzJw*+xeeA
z9OmDga#H9m2e^aV#w%^+ldAFK&!0apE-mE-Z3MWrCG*TI({3SEF99*JvcuOjl?#<W
zi|MNC8c+3cS9rR*EqGf|XwJ#iGj@MapYyQ%!m6gmotx7)mz)eTV=;O9a@$moyI)wn
zg=!1tdK`${V_DkNw=w6XpV`vcE5ptmxR%OoIb()j!c>n_zva@Nt*LzDvoYCV>4u`8
z=cjb$INW;0`)9*<mbNXrHPt(`&byndtnrED={)_T?q5+|%&w+SbLAHoRvb{het+`R
zBdd;jM@+70xNIrrHC@|%!uj66|DKfXj@*_bDPRA`aQpo_>&<Cr7x~UMo8hAd>CZ#P
z&oyeEotx|Z|KDFuK0Y-ot64^=r$la)ezlObh)LKS`TxsS?>*AX5BDC;Vw__!M{JFX
zVd4oHt3CfD5-T>u8Su9s=5x~y>z;U}`L{>;;h%x0d6y-u3egE&@M=r#@3Kda9)Ye&
z+qiKfXuY^>tcy#Y#a*lA6F_?<-rm}(Y-To1P}!~EcJ6l2@FEiv6W5(xQo3O|)ArZB
z6?%WH=v}6P*cvef<!<ID503R*P5W`!dQI5v{EZjtx*oqg$8EKXyVye@l*^-b(&WjV
z-QADByu1urZd~%}il=OhiwhS|uZ|d_I%Mz`bOqnHx3@2EN<F=(`o^W=Hq+13Cbda^
zSo|!uJkavS@_;<AurRUW=jT%Y{`$%zXES5Q3<({vZmTWemfzNm#n1gfL$15a_3!R3
z2i@JK<;4gJ;9Vt}+~Rsq8rkI%j&uk@Z(I{>b=_(A?EHLxP`L8&ECCIN>PCmTYAQT>
z{CK8eGF#~CFjX_NX|qhTS3yQkug^dIH0jTekD#42zrMb{ywrO-Q>ESGvyX0UOa=||
zojKzJ+8@-;CmSLnC>R(qMf&}fmBFAD+n{BOcYgMyZC;nXD{j-KO*3q(&1ReBCY_j|
z2wFY(RUCBloMuJY`+L4VK0Le2-!F^ap6BA`wo2prgws#0%HxvO$L*D{tFc(Pa3N%n
z((B;1f`?AOzQ6B3o2G1KHS5EN0@0^hw>PD7UtaFN+^tvY$gyKZuh(veI=!|1^wUSp
z{B{p|&F?8_X?5wv?h=rYSaIl&vwOdss-7O-?z`u96g~#6?}NJikhZPuTt(+LgVI+a
zpyigU!`7DA-4#7?;zYu?H#c)`Zfd<AUq5$W?eA56;Gt#L<MV8*U)<kse|vlW^6KyJ
zTwGmOYD_=)v}j|>Ng*-aC>PK|V!Ij((3YtlP<(k^+*0u|DJdywL)6+YFD^PiH~!b@
zJKGF&27A`^b!T@JK3);GcUKp9-C59=op<x>{(Nw5<CS*XTlE!`^Bo->wa!Gk_sJ;g
z>G8E6cFf7q$-TWT6f~)o7?@|b+<*Qv(4w=``umr}?k?MsclXyaJAQp-<>tuEX&Xyl
zhqduaAKU-$>-t5%|KtjS!hdHBXys@9|9?l0AHTdd+FZ&cqaZkU|LLcX+U4sKK0Z2X
z^XY^#=;-qY4-!gwKoKnRbFZrRGy!4Z=94Ll-23G~tMY#4-rG^AY@BvR!N_RR#^mEc
zTeCt-S-`4yifq4q_PBignF)%{D}tBveZN<&f3!<fyXf}J88c3l->+@I{Wk5-kB?ij
zu6p(AgQqp7zdke57-Z*p`~P!_o}Loj=`d&7v|}G09<Ka!QoS?|Jix7`b84!Wk59_~
ze}7%G+YKaE#P8p?jzJu3r=XzV#QjMjoEWVllqM3f9KA39KNv{zizlT<FfcGMc)I$z
JtaD0e0suv#pJf05

literal 0
HcmV?d00001

diff --git a/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg b/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg
new file mode 100644
index 0000000000..3dfcbd67d8
--- /dev/null
+++ b/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg
@@ -0,0 +1 @@
+<svg version="1.1" viewBox="0.0 0.0 720.0 540.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg"><clipPath id="p.0"><path d="m0 0l720.0 0l0 540.0l-720.0 0l0 -540.0z" clip-rule="nonzero"/></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l720.0 0l0 540.0l-720.0 0z" fill-rule="evenodd"/><path fill="#f3f3f3" d="m19.375328 28.750656l361.6378 0l0 358.01575l-361.6378 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m19.375328 28.750656l361.6378 0l0 358.01575l-361.6378 0z" fill-rule="evenodd"/><path fill="#434343" d="m338.89157 372.61328q0 1.0 -0.75 1.546875q-0.734375 0.53125 -2.078125 0.53125q-1.421875 0 -2.21875 -0.4375l0 -1.015625q0.515625 0.265625 1.109375 0.421875q0.59375 0.140625 1.140625 0.140625q0.84375 0 1.296875 -0.265625q0.453125 -0.265625 0.453125 -0.828125q0 -0.40625 -0.359375 -0.703125q-0.359375 -0.296875 -1.40625 -0.703125q-1.0 -0.375 -1.421875 -0.640625q-0.421875 -0.28125 -0.625 -0.625q-0.203125 -0.359375 -0.203125 -0.84375q0 -0.875 0.703125 -1.375q0.71875 -0.515625 1.953125 -0.515625q1.15625 0 2.25 0.46875l-0.375 0.875q-1.078125 -0.4375 -1.953125 -0.4375q-0.765625 0 -1.15625 0.25q-0.390625 0.234375 -0.390625 0.65625q0 0.28125 0.140625 0.484375q0.15625 0.203125 0.46875 0.390625q0.328125 0.171875 1.25 0.53125q1.28125 0.453125 1.71875 0.921875q0.453125 0.46875 0.453125 1.171875zm4.7644653 2.078125q-1.578125 0 -2.5 -0.953125q-0.90625 -0.96875 -0.90625 -2.671875q0 -1.734375 0.84375 -2.75q0.859375 -1.015625 2.28125 -1.015625q1.34375 0 2.125 0.890625q0.78125 0.875 0.78125 2.328125l0 0.671875l-4.90625 0q0.03125 1.265625 0.625 1.921875q0.609375 0.640625 1.703125 0.640625q1.140625 0 2.265625 -0.484375l0 0.96875q-0.5625 0.25 -1.078125 0.34375q-0.515625 0.109375 -1.234375 0.109375zm-0.296875 -6.484375q-0.859375 0 -1.375 0.5625q-0.5 0.5625 -0.59375 1.546875l3.734375 0q0 -1.015625 -0.453125 -1.5625q-0.453125 -0.546875 -1.3125 -0.546875zm8.024414 -0.90625q0.46875 0 0.84375 0.078125l-0.140625 1.0q-0.453125 -0.09375 -0.78125 -0.09375q-0.875 0 -1.5 0.703125q-0.609375 0.703125 -0.609375 1.75l0 3.828125l-1.078125 0l0 -7.125l0.890625 0l0.125 1.3125l0.0625 0q0.390625 -0.703125 0.953125 -1.078125q0.5625 -0.375 1.234375 -0.375zm3.7374573 7.265625l-2.703125 -7.125l1.15625 0l1.53125 4.21875q0.53125 1.484375 0.625 1.9375l0.046875 0q0.0625 -0.359375 0.4375 -1.4375q0.390625 -1.078125 1.71875 -4.71875l1.15625 0l-2.703125 7.125l-1.265625 0zm8.1302185 0.125q-1.578125 0 -2.5 -0.953125q-0.90625 -0.96875 -0.90625 -2.671875q0 -1.734375 0.84375 -2.75q0.859375 -1.015625 2.28125 -1.015625q1.34375 0 2.125 0.890625q0.78125 0.875 0.78125 2.328125l0 0.671875l-4.90625 0q0.03125 1.265625 0.625 1.921875q0.609375 0.640625 1.703125 0.640625q1.140625 0 2.265625 -0.484375l0 0.96875q-0.5625 0.25 -1.078125 0.34375q-0.515625 0.109375 -1.234375 0.109375zm-0.296875 -6.484375q-0.859375 0 -1.375 0.5625q-0.5 0.5625 -0.59375 1.546875l3.734375 0q0 -1.015625 -0.453125 -1.5625q-0.453125 -0.546875 -1.3125 -0.546875zm8.024414 -0.90625q0.46875 0 0.84375 0.078125l-0.140625 1.0q-0.453125 -0.09375 -0.78125 -0.09375q-0.875 0 -1.5 0.703125q-0.609375 0.703125 -0.609375 1.75l0 3.828125l-1.078125 0l0 -7.125l0.890625 0l0.125 1.3125l0.0625 0q0.390625 -0.703125 0.953125 -1.078125q0.5625 -0.375 1.234375 -0.375z" fill-rule="nonzero"/><path fill="#d9d9d9" d="m25.624672 36.249344l301.88977 0l0 69.98425l-301.88977 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" stroke-dasharray="4.0,3.0" d="m25.624672 36.249344l301.88977 0l0 69.98425l-301.88977 0z" fill-rule="evenodd"/><path fill="#434343" d="m134.67014 56.769344l-1.0 0l0 -7.6875l-2.703125 0l0 -0.875l6.421875 0l0 0.875l-2.71875 0l0 7.6875zm6.576172 0.125q-1.421875 0 -2.25 -0.875q-0.828125 -0.875 -0.828125 -2.40625q0 -1.5625 0.765625 -2.46875q0.765625 -0.921875 2.0625 -0.921875q1.203125 0 1.90625 0.796875q0.703125 0.796875 0.703125 2.09375l0 0.625l-4.421875 0q0.03125 1.125 0.5625 1.71875q0.546875 0.578125 1.53125 0.578125q1.03125 0 2.046875 -0.4375l0 0.875q-0.515625 0.21875 -0.984375 0.3125q-0.453125 0.109375 -1.09375 0.109375zm-0.265625 -5.84375q-0.78125 0 -1.25 0.5q-0.453125 0.5 -0.53125 1.390625l3.359375 0q0 -0.921875 -0.40625 -1.40625q-0.40625 -0.484375 -1.171875 -0.484375zm8.669922 5.71875l0 -4.15625q0 -0.78125 -0.359375 -1.171875q-0.34375 -0.390625 -1.109375 -0.390625q-1.015625 0 -1.484375 0.546875q-0.46875 0.546875 -0.46875 1.796875l0 3.375l-0.96875 0l0 -6.421875l0.796875 0l0.15625 0.875l0.046875 0q0.296875 -0.46875 0.828125 -0.734375q0.546875 -0.265625 1.203125 -0.265625q1.171875 0 1.75 0.5625q0.59375 0.5625 0.59375 1.796875l0 4.1875l-0.984375 0zm7.1152344 -1.75q0 0.890625 -0.671875 1.390625q-0.65625 0.484375 -1.875 0.484375q-1.265625 0 -1.984375 -0.40625l0 -0.90625q0.46875 0.234375 0.984375 0.375q0.53125 0.125 1.03125 0.125q0.765625 0 1.171875 -0.234375q0.40625 -0.25 0.40625 -0.75q0 -0.375 -0.328125 -0.640625q-0.3125 -0.265625 -1.265625 -0.625q-0.890625 -0.34375 -1.28125 -0.59375q-0.375 -0.25 -0.5625 -0.5625q-0.171875 -0.3125 -0.171875 -0.75q0 -0.78125 0.640625 -1.234375q0.640625 -0.46875 1.75 -0.46875q1.03125 0 2.03125 0.421875l-0.359375 0.796875q-0.953125 -0.390625 -1.75 -0.390625q-0.6875 0 -1.046875 0.21875q-0.34375 0.203125 -0.34375 0.59375q0 0.25 0.125 0.4375q0.140625 0.171875 0.421875 0.34375q0.296875 0.15625 1.140625 0.46875q1.140625 0.421875 1.53125 0.84375q0.40625 0.421875 0.40625 1.0625zm7.1308594 -1.46875q0 1.578125 -0.796875 2.46875q-0.78125 0.875 -2.1875 0.875q-0.859375 0 -1.53125 -0.40625q-0.65625 -0.40625 -1.03125 -1.15625q-0.359375 -0.765625 -0.359375 -1.78125q0 -1.5625 0.78125 -2.4375q0.796875 -0.890625 2.1875 -0.890625q1.34375 0 2.140625 0.90625q0.796875 0.890625 0.796875 2.421875zm-4.890625 0q0 1.234375 0.484375 1.875q0.5 0.640625 1.453125 0.640625q0.953125 0 1.4375 -0.640625q0.5 -0.640625 0.5 -1.875q0 -1.21875 -0.5 -1.859375q-0.484375 -0.640625 -1.453125 -0.640625q-0.953125 0 -1.4375 0.640625q-0.484375 0.625 -0.484375 1.859375zm9.529297 -3.328125q0.421875 0 0.765625 0.078125l-0.140625 0.90625q-0.390625 -0.09375 -0.703125 -0.09375q-0.78125 0 -1.34375 0.640625q-0.546875 0.625 -0.546875 1.5625l0 3.453125l-0.96875 0l0 -6.421875l0.796875 0l0.125 1.1875l0.046875 0q0.34375 -0.625 0.84375 -0.96875q0.515625 -0.34375 1.125 -0.34375zm3.1015625 6.546875l-1.0 0l0 -8.5625l4.78125 0l0 0.875l-3.78125 0l0 3.140625l3.546875 0l0 0.890625l-3.546875 0l0 3.65625zm6.0214844 0l-0.96875 0l0 -9.125l0.96875 0l0 9.125zm7.6132812 -3.21875q0 1.578125 -0.796875 2.46875q-0.78125 0.875 -2.1875 0.875q-0.859375 0 -1.53125 -0.40625q-0.65625 -0.40625 -1.03125 -1.15625q-0.359375 -0.765625 -0.359375 -1.78125q0 -1.5625 0.78125 -2.4375q0.796875 -0.890625 2.1875 -0.890625q1.34375 0 2.140625 0.90625q0.796875 0.890625 0.796875 2.421875zm-4.890625 0q0 1.234375 0.484375 1.875q0.5 0.640625 1.453125 0.640625q0.953125 0 1.4375 -0.640625q0.5 -0.640625 0.5 -1.875q0 -1.21875 -0.5 -1.859375q-0.484375 -0.640625 -1.453125 -0.640625q-0.953125 0 -1.4375 0.640625q-0.484375 0.625 -0.484375 1.859375zm11.841797 3.21875l-1.1875 -3.765625q-0.109375 -0.34375 -0.40625 -1.578125l-0.046875 0q-0.234375 1.03125 -0.421875 1.59375l-1.203125 3.75l-1.125 0l-1.75 -6.421875l1.015625 0q0.625 2.421875 0.9375 3.6875q0.328125 1.265625 0.375 1.703125l0.046875 0q0.0625 -0.328125 0.203125 -0.859375q0.15625 -0.53125 0.265625 -0.84375l1.171875 -3.6875l1.046875 0l1.15625 3.6875q0.328125 1.0 0.4375 1.6875l0.046875 0q0.03125 -0.203125 0.125 -0.640625q0.109375 -0.453125 1.234375 -4.734375l1.0 0l-1.765625 6.421875l-1.15625 0zm12.732422 0l-1.0625 -2.71875l-3.4375 0l-1.046875 2.71875l-1.015625 0l3.390625 -8.609375l0.828125 0l3.375 8.609375l-1.03125 0zm-1.375 -3.625l-1.0 -2.65625q-0.1875 -0.5 -0.390625 -1.234375q-0.140625 0.5625 -0.375 1.234375l-1.0 2.65625l2.765625 0zm9.015625 -2.453125q0 1.3125 -0.890625 2.015625q-0.890625 0.6875 -2.53125 0.6875l-1.015625 0l0 3.375l-1.0 0l0 -8.5625l2.234375 0q3.203125 0 3.203125 2.484375zm-4.4375 1.859375l0.90625 0q1.3125 0 1.90625 -0.421875q0.59375 -0.4375 0.59375 -1.390625q0 -0.84375 -0.5625 -1.25q-0.546875 -0.421875 -1.734375 -0.421875l-1.109375 0l0 3.484375zm6.2246094 4.21875l0 -8.5625l1.0 0l0 8.5625l-1.0 0zm7.345703 -1.75q0 0.890625 -0.671875 1.390625q-0.65625 0.484375 -1.875 0.484375q-1.265625 0 -1.984375 -0.40625l0 -0.90625q0.46875 0.234375 0.984375 0.375q0.53125 0.125 1.03125 0.125q0.765625 0 1.171875 -0.234375q0.40625 -0.25 0.40625 -0.75q0 -0.375 -0.328125 -0.640625q-0.3125 -0.265625 -1.265625 -0.625q-0.890625 -0.34375 -1.28125 -0.59375q-0.375 -0.25 -0.5625 -0.5625q-0.171875 -0.3125 -0.171875 -0.75q0 -0.78125 0.640625 -1.234375q0.640625 -0.46875 1.75 -0.46875q1.03125 0 2.03125 0.421875l-0.359375 0.796875q-0.953125 -0.390625 -1.75 -0.390625q-0.6875 0 -1.046875 0.21875q-0.34375 0.203125 -0.34375 0.59375q0 0.25 0.125 0.4375q0.140625 0.171875 0.421875 0.34375q0.296875 0.15625 1.140625 0.46875q1.140625 0.421875 1.53125 0.84375q0.40625 0.421875 0.40625 1.0625z" fill-rule="nonzero"/><path fill="#f3f3f3" d="m396.75067 183.75066l249.00787 0l0 203.02364l-249.00787 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m396.75067 183.75066l249.00787 0l0 203.02364l-249.00787 0z" fill-rule="evenodd"/><path fill="#434343" d="m409.75067 374.69928q-1.546875 0 -2.40625 -0.953125q-0.84375 -0.953125 -0.84375 -2.6875q0 -1.796875 0.859375 -2.765625q0.859375 -0.984375 2.453125 -0.984375q0.515625 0 1.03125 0.109375q0.515625 0.109375 0.8125 0.265625l-0.328125 0.921875q-0.359375 -0.15625 -0.796875 -0.25q-0.421875 -0.09375 -0.734375 -0.09375q-2.171875 0 -2.171875 2.78125q0 1.3125 0.515625 2.015625q0.53125 0.703125 1.578125 0.703125q0.890625 0 1.828125 -0.390625l0 0.96875q-0.71875 0.359375 -1.796875 0.359375zm4.5639343 -0.125l-1.078125 0l0 -10.125l1.078125 0l0 10.125zm3.3710632 0l-1.078125 0l0 -7.125l1.078125 0l0 7.125zm-1.171875 -9.0625q0 -0.375 0.1875 -0.546875q0.1875 -0.171875 0.453125 -0.171875q0.265625 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.546875q0 0.359375 -0.1875 0.546875q-0.1875 0.171875 -0.453125 0.171875q-0.265625 0 -0.453125 -0.171875q-0.1875 -0.1875 -0.1875 -0.546875zm6.4804688 9.1875q-1.578125 0 -2.5 -0.953125q-0.90625 -0.96875 -0.90625 -2.671875q0 -1.734375 0.84375 -2.75q0.859375 -1.015625 2.28125 -1.015625q1.34375 0 2.125 0.890625q0.78125 0.875 0.78125 2.328125l0 0.671875l-4.90625 0q0.03125 1.265625 0.625 1.921875q0.609375 0.640625 1.703125 0.640625q1.140625 0 2.265625 -0.484375l0 0.96875q-0.5625 0.25 -1.078125 0.34375q-0.515625 0.109375 -1.234375 0.109375zm-0.296875 -6.484375q-0.859375 0 -1.375 0.5625q-0.5 0.5625 -0.59375 1.546875l3.734375 0q0 -1.015625 -0.453125 -1.5625q-0.453125 -0.546875 -1.3125 -0.546875zm9.649414 6.359375l0 -4.609375q0 -0.875 -0.40625 -1.296875q-0.390625 -0.4375 -1.234375 -0.4375q-1.125 0 -1.65625 0.609375q-0.515625 0.59375 -0.515625 2.0l0 3.734375l-1.078125 0l0 -7.125l0.890625 0l0.171875 0.96875l0.046875 0q0.328125 -0.53125 0.921875 -0.8125q0.609375 -0.296875 1.34375 -0.296875q1.296875 0 1.9375 0.625q0.65625 0.625 0.65625 1.984375l0 4.65625l-1.078125 0zm5.602295 -0.765625q0.28125 0 0.546875 -0.03125q0.265625 -0.046875 0.421875 -0.09375l0 0.828125q-0.171875 0.078125 -0.515625 0.125q-0.34375 0.0625 -0.609375 0.0625q-2.078125 0 -2.078125 -2.171875l0 -4.25l-1.015625 0l0 -0.515625l1.015625 -0.453125l0.453125 -1.515625l0.625 0l0 1.65625l2.078125 0l0 0.828125l-2.078125 0l0 4.203125q0 0.640625 0.3125 0.984375q0.3125 0.34375 0.84375 0.34375z" fill-rule="nonzero"/><path fill="#f4cccc" d="m206.61942 201.17455l140.47244 0l0 30.992126l-140.47244 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m206.61942 201.17455l140.47244 0l0 30.992126l-140.47244 0z" fill-rule="evenodd"/><path fill="#000000" d="m237.07405 214.53435l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65625 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125 0l0 0.6875l-1.8125 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53125 0 0.984375 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125zm5.906296 2.296875l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm4.281296 4.421875q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm4.796921 3.40625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm2.1250305 2.6875l3.40625 -4.140625l-3.140625 0l0 -0.75l4.25 0l0 0.59375l-3.328125 4.140625l3.09375 0q0.1875 0 0.28125 -0.015625q0.09375 -0.03125 0.15625 -0.09375l0.09375 0l0 0.859375l-4.8125 0l0 -0.59375zm8.671936 0.71875q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm1.9531555 4.234375l0 -0.71875l5.171875 0l0 0.71875l-5.171875 0zm11.281311 -5.875q-0.109375 -0.015625 -0.3125 -0.015625q-0.578125 0 -0.96875 0.25q0.203125 0.421875 0.203125 0.90625q0 0.546875 -0.234375 0.984375q-0.234375 0.4375 -0.671875 0.6875q-0.4375 0.25 -1.0 0.25q-0.46875 0 -0.859375 -0.171875q-0.21875 0.265625 -0.21875 0.5q0 0.296875 0.359375 0.421875q0.375 0.125 1.328125 0.125q1.15625 0 1.578125 0.359375q0.4375 0.34375 0.4375 1.03125q0 0.703125 -0.578125 1.171875q-0.578125 0.484375 -1.796875 0.484375q-1.1875 0 -1.859375 -0.359375q-0.65625 -0.359375 -0.65625 -1.0625q0 -0.421875 0.234375 -0.734375q0.25 -0.3125 0.625 -0.5q-0.40625 -0.265625 -0.40625 -0.71875q0 -0.484375 0.5 -1.015625q-0.296875 -0.265625 -0.453125 -0.640625q-0.15625 -0.375 -0.15625 -0.8125q0 -0.53125 0.234375 -0.96875q0.234375 -0.4375 0.671875 -0.6875q0.4375 -0.25 1.015625 -0.25q0.875 0 1.390625 0.5625q0.296875 -0.265625 0.59375 -0.390625q0.3125 -0.125 0.6875 -0.125l0.203125 0.015625l0.109375 0.703125zm-2.984375 2.34375q0.515625 0 0.84375 -0.34375q0.34375 -0.359375 0.34375 -0.859375q0 -0.5 -0.34375 -0.84375q-0.328125 -0.359375 -0.84375 -0.359375q-0.515625 0 -0.859375 0.359375q-0.34375 0.34375 -0.34375 0.84375q0 0.5 0.34375 0.859375q0.34375 0.34375 0.859375 0.34375zm1.828125 3.125q0 -0.296875 -0.109375 -0.46875q-0.09375 -0.15625 -0.390625 -0.234375q-0.296875 -0.078125 -0.90625 -0.078125q-0.859375 0 -1.375 -0.15625q-0.296875 0.203125 -0.40625 0.40625q-0.109375 0.203125 -0.109375 0.53125q0 0.40625 0.453125 0.640625q0.453125 0.234375 1.265625 0.234375q0.796875 0 1.1875 -0.234375q0.390625 -0.234375 0.390625 -0.640625zm6.5625305 -4.890625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm3.937561 -1.3125q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm4.1094055 -4.953125l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm3.781311 -7.359375l0.953125 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.9375q0.328125 -0.5 0.8125 -0.796875q0.484375 -0.296875 0.984375 -0.296875q0.78125 0 1.21875 0.546875q0.453125 0.546875 0.453125 1.734375l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.390625 0 -0.765625 0.21875q-0.375 0.203125 -0.625 0.59375q-0.234375 0.390625 -0.234375 0.890625l0 3.15625l-0.84375 0l0 -7.984375zm7.9844055 8.109375q-0.296875 0 -0.5 -0.203125q-0.203125 -0.203125 -0.203125 -0.46875q0 -0.28125 0.203125 -0.484375q0.203125 -0.203125 0.5 -0.203125q0.265625 0 0.46875 0.203125q0.21875 0.203125 0.21875 0.484375q0 0.265625 -0.21875 0.46875q-0.203125 0.203125 -0.46875 0.203125zm3.859436 -5.609375l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm8.2187805 -4.859375q-0.0625 0.5 -0.21875 0.984375q-0.15625 0.46875 -0.421875 1.265625l-1.375 3.859375q-0.265625 0.765625 -0.71875 1.109375q-0.4375 0.359375 -1.046875 0.359375q-0.78125 0 -1.265625 -0.46875l0.375 -0.59375l0 -0.015625l0.015625 0.015625q0.203125 0.1875 0.40625 0.265625q0.21875 0.078125 0.515625 0.078125q0.4375 0 0.703125 -0.3125q0.265625 -0.3125 0.515625 -1.03125l-2.25 -5.515625l0.90625 0l1.703125 4.390625l0.71875 -2.203125q0.265625 -0.828125 0.390625 -1.265625q0.125 -0.4375 0.1875 -0.921875l0.859375 0zm-4.53125 6.46875q0 0.03125 -0.03125 0.03125l-0.09375 -0.03125l0.046875 -0.0625l0.078125 0.046875l0 0.015625zm-0.140625 0.03125q-0.046875 -0.0625 -0.03125 -0.046875q0.03125 0.015625 0.046875 0.015625l-0.015625 0.03125z" fill-rule="nonzero"/><path fill="#f4cccc" d="m132.49081 319.42978l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49081 319.42978l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m160.73871 332.44586l0 -6.734375l-2.125 0l0 -0.75l5.1875 0l0 0.75l-2.21875 0l0 6.734375l-0.84375 0zm4.437546 0l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm5.859421 -7.46875l0.984375 0l0 0.078125q-0.078125 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 6.234375l3.59375 0l0 0.71875l-4.46875 0l0 -7.46875zm6.406296 7.46875l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm8.343796 6.140625q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.9375458 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0z" fill-rule="nonzero"/><path fill="#000000" d="m152.62926 345.8521q0.4375 0 0.84375 -0.25q0.40625 -0.25 0.65625 -0.671875l0.625 0.40625q-0.375 0.625 -0.875 0.9375q-0.5 0.296875 -1.21875 0.296875q-0.84375 0 -1.5 -0.40625q-0.65625 -0.421875 -1.046875 -1.265625q-0.390625 -0.859375 -0.390625 -2.15625q0 -1.375 0.421875 -2.234375q0.421875 -0.859375 1.0625 -1.21875q0.65625 -0.375 1.40625 -0.375q0.78125 0 1.359375 0.390625q0.59375 0.390625 0.890625 1.078125l-0.71875 0.34375q-0.015625 0 -0.015625 0q0 -0.015625 0 -0.015625q-0.3125 -0.625 -0.703125 -0.875q-0.375 -0.25 -0.84375 -0.25q-0.9375 0 -1.484375 0.828125q-0.546875 0.8125 -0.546875 2.28125q0 0.921875 0.265625 1.640625q0.28125 0.71875 0.75 1.125q0.484375 0.390625 1.0625 0.390625zm1.375 -5.171875q0.015625 -0.015625 0.015625 -0.015625q0.03125 0 0.109375 0.0625l-0.09375 0.046875l-0.03125 -0.09375zm0.140625 0.046875q0.046875 0.109375 -0.015625 0l0.015625 0zm4.093796 5.8125q-0.734375 0 -1.3125 -0.359375q-0.578125 -0.359375 -0.90625 -1.0q-0.3125 -0.65625 -0.3125 -1.484375q0 -0.828125 0.3125 -1.46875q0.328125 -0.65625 0.90625 -1.015625q0.578125 -0.375 1.3125 -0.375q0.734375 0 1.3125 0.375q0.578125 0.359375 0.890625 1.015625q0.328125 0.640625 0.328125 1.46875q0 0.828125 -0.328125 1.484375q-0.3125 0.640625 -0.890625 1.0q-0.578125 0.359375 -1.3125 0.359375zm0 -0.71875q0.46875 0 0.828125 -0.265625q0.375 -0.28125 0.578125 -0.765625q0.21875 -0.484375 0.21875 -1.109375q0 -0.9375 -0.46875 -1.53125q-0.453125 -0.59375 -1.15625 -0.59375q-0.703125 0 -1.171875 0.59375q-0.453125 0.59375 -0.453125 1.53125q0 0.625 0.203125 1.109375q0.21875 0.484375 0.578125 0.765625q0.375 0.265625 0.84375 0.265625zm3.8594208 -4.859375l0.84375 0l0 0.96875q0.328125 -0.5 0.8125 -0.796875q0.5 -0.296875 1.046875 -0.296875q0.734375 0 1.171875 0.5625q0.4375 0.546875 0.4375 1.71875l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.375 0 -0.75 0.21875q-0.375 0.21875 -0.625 0.609375q-0.25 0.390625 -0.25 0.875l0 3.15625l-0.84375 0l0 -5.484375zm10.593796 0q-0.1875 0.96875 -0.796875 2.40625l-1.328125 3.078125l-0.671875 0l-2.171875 -5.484375l0.859375 0l1.6875 4.296875l0.890625 -2.03125q0.546875 -1.25 0.71875 -2.265625l0.8125 0zm3.8125458 5.609375q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.640671 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm6.343796 3.796875q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.9375458 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.640671 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625z" fill-rule="nonzero"/><path fill="#d9ead3" d="m284.12296 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m284.12296 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m312.37085 332.41437l0 -6.734375l-2.125 0l0 -0.75l5.1875 0l0 0.75l-2.21875 0l0 6.734375l-0.84375 0zm4.4375305 0l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm5.859436 -7.46875l0.984375 0l0 0.078125q-0.078125 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 6.234375l3.59375 0l0 0.71875l-4.46875 0l0 -7.46875zm6.4062805 7.46875l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm8.343811 6.140625q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.9375305 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0z" fill-rule="nonzero"/><path fill="#000000" d="m298.80826 346.41437l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm6.0156555 0l0 -0.703125l1.609375 0l0 -6.578125l-1.546875 0l0 -0.703125l2.421875 0l0 7.28125l1.609375 0l0 0.703125l-4.09375 0zm7.968811 -5.609375q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm8.7187805 0.03125q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.859436 0.625q-0.484375 0 -0.90625 -0.21875q-0.421875 -0.21875 -0.703125 -0.625l-0.3125 0.71875l-0.546875 0l0 -7.984375l0.984375 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.8125q0.265625 -0.453125 0.71875 -0.703125q0.453125 -0.265625 0.921875 -0.265625q1.03125 0 1.640625 0.71875q0.609375 0.71875 0.609375 2.09375q0 0.9375 -0.328125 1.609375q-0.3125 0.65625 -0.84375 0.984375q-0.53125 0.328125 -1.125 0.328125zm-0.109375 -0.765625q0.65625 0 1.078125 -0.5q0.4375 -0.515625 0.4375 -1.609375q0 -1.046875 -0.40625 -1.578125q-0.390625 -0.546875 -1.078125 -0.546875q-0.671875 0 -1.09375 0.609375q-0.421875 0.59375 -0.421875 1.546875q0 2.078125 1.484375 2.078125zm5.5781555 0.765625q-0.875 0 -1.390625 -0.640625q-0.515625 -0.640625 -0.5 -1.90625l0.015625 -3.0625l0.84375 0l0 3.0625q0 0.984375 0.328125 1.421875q0.34375 0.4375 0.921875 0.4375q0.609375 0 1.03125 -0.484375q0.4375 -0.484375 0.4375 -1.40625l0 -3.03125l0.84375 0l0 4.625q0 0.296875 0.015625 0.484375q0.015625 0.1875 0.09375 0.375l-0.828125 0q-0.078125 -0.1875 -0.09375 -0.375q-0.015625 -0.1875 -0.015625 -0.46875q-0.265625 0.453125 -0.71875 0.71875q-0.453125 0.25 -0.984375 0.25zm8.562561 -6.78125l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65625 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125 0l0 0.6875l-1.8125 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53125 0 0.984375 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125zm6.0156555 -0.015625l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65625 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125 0l0 0.6875l-1.8125 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53125 0 0.984375 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125zm4.062561 6.765625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.6406555 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625z" fill-rule="nonzero"/><path fill="#f4cccc" d="m413.02625 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m413.02625 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m441.27414 332.41437l0 -6.734375l-2.125 0l0 -0.75l5.1875 0l0 0.75l-2.21875 0l0 6.734375l-0.84375 0zm4.437561 0l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm5.8594055 -7.46875l0.984375 0l0 0.078125q-0.078125 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 6.234375l3.59375 0l0 0.71875l-4.46875 0l0 -7.46875zm6.406311 7.46875l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm8.3437805 6.140625q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.937561 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0z" fill-rule="nonzero"/><path fill="#000000" d="m424.97714 346.41437l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm3.875061 1.15625l0.84375 0l0 0.96875q0.328125 -0.5 0.8125 -0.796875q0.5 -0.296875 1.046875 -0.296875q0.734375 0 1.171875 0.5625q0.4375 0.546875 0.4375 1.71875l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.375 0 -0.75 0.21875q-0.375 0.21875 -0.625 0.609375q-0.25 0.390625 -0.25 0.875l0 3.15625l-0.84375 0l0 -5.484375zm10.4687805 4.984375q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.937561 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.6406555 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm1.734436 -1.1875l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm8.0156555 -3.71875l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm4.281311 4.421875q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.8594055 2.78125q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.937561 0.625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.6406555 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m371.61902 334.89435l41.417297 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m371.61902 334.89435l37.990234 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m409.60925 334.89435l-1.1245728 1.1246033l3.0897522 -1.1246033l-3.0897522 -1.1245728z" fill-rule="evenodd"/><path fill="#c9daf8" d="m548.5407 277.52954l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 277.52954l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m584.8932 293.06122l2.90625 0l0 4.15625q-0.6875 0.21875 -1.390625 0.328125q-0.703125 0.125 -1.625 0.125q-1.9375 0 -3.03125 -1.15625q-1.078125 -1.171875 -1.078125 -3.25q0 -1.34375 0.53125 -2.34375q0.546875 -1.0 1.546875 -1.53125q1.015625 -0.53125 2.359375 -0.53125q1.375 0 2.5625 0.5l-0.390625 0.875q-1.15625 -0.484375 -2.234375 -0.484375q-1.5625 0 -2.453125 0.9375q-0.875 0.921875 -0.875 2.578125q0 1.734375 0.84375 2.640625q0.859375 0.890625 2.5 0.890625q0.890625 0 1.734375 -0.21875l0 -2.625l-1.90625 0l0 -0.890625zm10.392578 -1.59375q0 1.3125 -0.890625 2.015625q-0.890625 0.6875 -2.53125 0.6875l-1.015625 0l0 3.375l-1.0 0l0 -8.5625l2.234375 0q3.203125 0 3.203125 2.484375zm-4.4375 1.859375l0.90625 0q1.3125 0 1.90625 -0.421875q0.59375 -0.4375 0.59375 -1.390625q0 -0.84375 -0.5625 -1.25q-0.546875 -0.421875 -1.734375 -0.421875l-1.109375 0l0 3.484375zm12.693359 -4.34375l0 5.53125q0 1.46875 -0.890625 2.3125q-0.875 0.84375 -2.421875 0.84375q-1.546875 0 -2.390625 -0.84375q-0.84375 -0.859375 -0.84375 -2.328125l0 -5.515625l1.0 0l0 5.578125q0 1.078125 0.578125 1.65625q0.59375 0.578125 1.71875 0.578125q1.09375 0 1.671875 -0.578125q0.59375 -0.578125 0.59375 -1.65625l0 -5.578125l0.984375 0z" fill-rule="nonzero"/><path fill="#c9daf8" d="m548.5407 319.3983l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 319.3983l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m585.3668 331.6175q-1.40625 0 -2.234375 0.9375q-0.8125 0.9375 -0.8125 2.578125q0 1.671875 0.78125 2.59375q0.796875 0.921875 2.25 0.921875q0.90625 0 2.046875 -0.328125l0 0.875q-0.890625 0.34375 -2.1875 0.34375q-1.890625 0 -2.921875 -1.15625q-1.03125 -1.15625 -1.03125 -3.265625q0 -1.328125 0.484375 -2.3125q0.5 -1.0 1.4375 -1.53125q0.9375 -0.546875 2.203125 -0.546875q1.34375 0 2.359375 0.484375l-0.421875 0.859375q-0.984375 -0.453125 -1.953125 -0.453125zm9.3359375 1.71875q0 1.3125 -0.890625 2.015625q-0.890625 0.6875 -2.53125 0.6875l-1.015625 0l0 3.375l-1.0 0l0 -8.5625l2.234375 0q3.203125 0 3.203125 2.484375zm-4.4375 1.859375l0.90625 0q1.3125 0 1.90625 -0.421875q0.59375 -0.4375 0.59375 -1.390625q0 -0.84375 -0.5625 -1.25q-0.546875 -0.421875 -1.734375 -0.421875l-1.109375 0l0 3.484375zm12.693359 -4.34375l0 5.53125q0 1.46875 -0.890625 2.3125q-0.875 0.84375 -2.421875 0.84375q-1.546875 0 -2.390625 -0.84375q-0.84375 -0.859375 -0.84375 -2.328125l0 -5.515625l1.0 0l0 5.578125q0 1.078125 0.578125 1.65625q0.59375 0.578125 1.71875 0.578125q1.09375 0 1.671875 -0.578125q0.59375 -0.578125 0.59375 -1.65625l0 -5.578125l0.984375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m219.98688 334.92584l64.12598 -0.03149414" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.98688 334.92584l60.698914 -0.029815674" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m280.68576 334.89603l-1.1240234 1.1251526l3.0892334 -1.1260986l-3.090332 -1.1230774z" fill-rule="evenodd"/><path fill="#d9ead3" d="m413.02625 141.28871l20.53543 0l0 20.53543l-20.53543 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m413.02625 141.28871l20.53543 0l0 20.53543l-20.53543 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m437.52493 135.68242l77.480316 0l0 31.748032l-77.480316 0z" fill-rule="evenodd"/><path fill="#000000" d="m454.54056 151.84303q0 2.109375 -1.15625 3.234375q-1.140625 1.125 -3.3125 1.125l-2.375 0l0 -8.5625l2.625 0q2.0 0 3.109375 1.109375q1.109375 1.09375 1.109375 3.09375zm-1.046875 0.03125q0 -1.671875 -0.84375 -2.515625q-0.84375 -0.859375 -2.5 -0.859375l-1.453125 0l0 6.84375l1.21875 0q1.78125 0 2.671875 -0.875q0.90625 -0.875 0.90625 -2.59375zm6.763672 4.328125l-0.203125 -0.921875l-0.046875 0q-0.46875 0.609375 -0.953125 0.828125q-0.46875 0.21875 -1.1875 0.21875q-0.953125 0 -1.5 -0.5q-0.546875 -0.5 -0.546875 -1.40625q0 -1.9375 3.109375 -2.03125l1.09375 -0.03125l0 -0.40625q0 -0.75 -0.328125 -1.109375q-0.3125 -0.359375 -1.03125 -0.359375q-0.8125 0 -1.8125 0.484375l-0.3125 -0.75q0.484375 -0.25 1.046875 -0.390625q0.5625 -0.15625 1.140625 -0.15625q1.140625 0 1.6875 0.515625q0.5625 0.5 0.5625 1.625l0 4.390625l-0.71875 0zm-2.203125 -0.6875q0.90625 0 1.421875 -0.5q0.53125 -0.5 0.53125 -1.390625l0 -0.578125l-0.984375 0.03125q-1.15625 0.046875 -1.671875 0.375q-0.5 0.3125 -0.5 0.984375q0 0.53125 0.3125 0.8125q0.3125 0.265625 0.890625 0.265625zm7.001953 0q0.25 0 0.484375 -0.03125q0.25 -0.046875 0.390625 -0.078125l0 0.734375q-0.15625 0.078125 -0.46875 0.125q-0.296875 0.0625 -0.546875 0.0625q-1.859375 0 -1.859375 -1.96875l0 -3.828125l-0.921875 0l0 -0.46875l0.921875 -0.40625l0.40625 -1.359375l0.5625 0l0 1.484375l1.859375 0l0 0.75l-1.859375 0l0 3.78125q0 0.578125 0.265625 0.890625q0.28125 0.3125 0.765625 0.3125zm6.111328 0.6875l-0.203125 -0.921875l-0.046875 0q-0.46875 0.609375 -0.953125 0.828125q-0.46875 0.21875 -1.1875 0.21875q-0.953125 0 -1.5 -0.5q-0.546875 -0.5 -0.546875 -1.40625q0 -1.9375 3.109375 -2.03125l1.09375 -0.03125l0 -0.40625q0 -0.75 -0.328125 -1.109375q-0.3125 -0.359375 -1.03125 -0.359375q-0.8125 0 -1.8125 0.484375l-0.3125 -0.75q0.484375 -0.25 1.046875 -0.390625q0.5625 -0.15625 1.140625 -0.15625q1.140625 0 1.6875 0.515625q0.5625 0.5 0.5625 1.625l0 4.390625l-0.71875 0zm-2.203125 -0.6875q0.90625 0 1.421875 -0.5q0.53125 -0.5 0.53125 -1.390625l0 -0.578125l-0.984375 0.03125q-1.15625 0.046875 -1.671875 0.375q-0.5 0.3125 -0.5 0.984375q0 0.53125 0.3125 0.8125q0.3125 0.265625 0.890625 0.265625zm10.822266 0.6875l-1.0 0l0 -7.6875l-2.703125 0l0 -0.875l6.421875 0l0 0.875l-2.71875 0l0 7.6875zm2.8417969 -6.421875l1.046875 0l1.40625 3.65625q0.453125 1.265625 0.5625 1.8125l0.046875 0q0.078125 -0.296875 0.3125 -1.015625q0.25 -0.734375 1.609375 -4.453125l1.03125 0l-2.75 7.3125q-0.421875 1.078125 -0.96875 1.53125q-0.546875 0.46875 -1.34375 0.46875q-0.4375 0 -0.875 -0.109375l0 -0.78125q0.328125 0.078125 0.71875 0.078125q1.0 0 1.4375 -1.125l0.359375 -0.921875l-2.59375 -6.453125zm10.046875 6.546875q-0.625 0 -1.140625 -0.234375q-0.515625 -0.234375 -0.875 -0.71875l-0.0625 0q0.0625 0.5625 0.0625 1.0625l0 2.65625l-0.96875 0l0 -9.3125l0.796875 0l0.125 0.875l0.046875 0q0.375 -0.53125 0.875 -0.765625q0.5 -0.234375 1.140625 -0.234375q1.28125 0 1.96875 0.875q0.703125 0.875 0.703125 2.453125q0 1.578125 -0.703125 2.46875q-0.703125 0.875 -1.96875 0.875zm-0.140625 -5.84375q-0.984375 0 -1.421875 0.546875q-0.4375 0.546875 -0.453125 1.734375l0 0.21875q0 1.359375 0.453125 1.9375q0.453125 0.578125 1.453125 0.578125q0.828125 0 1.296875 -0.671875q0.46875 -0.671875 0.46875 -1.859375q0 -1.203125 -0.46875 -1.84375q-0.46875 -0.640625 -1.328125 -0.640625zm7.2285156 5.84375q-1.421875 0 -2.25 -0.875q-0.828125 -0.875 -0.828125 -2.40625q0 -1.5625 0.765625 -2.46875q0.765625 -0.921875 2.0625 -0.921875q1.203125 0 1.90625 0.796875q0.703125 0.796875 0.703125 2.09375l0 0.625l-4.421875 0q0.03125 1.125 0.5625 1.71875q0.546875 0.578125 1.53125 0.578125q1.03125 0 2.046875 -0.4375l0 0.875q-0.515625 0.21875 -0.984375 0.3125q-0.453125 0.109375 -1.09375 0.109375zm-0.265625 -5.84375q-0.78125 0 -1.25 0.5q-0.453125 0.5 -0.53125 1.390625l3.359375 0q0 -0.921875 -0.40625 -1.40625q-0.40625 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#f4cccc" d="m519.9029 141.28871l20.5354 0l0 20.53543l-20.5354 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m519.9029 141.28871l20.5354 0l0 20.53543l-20.5354 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m544.40155 135.68242l100.0 0l0 31.748032l-100.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m554.5734 156.20241l0 -8.5625l1.0 0l0 8.5625l-1.0 0zm7.595703 0l0 -4.15625q0 -0.78125 -0.359375 -1.171875q-0.34375 -0.390625 -1.109375 -0.390625q-1.015625 0 -1.484375 0.546875q-0.46875 0.546875 -0.46875 1.796875l0 3.375l-0.96875 0l0 -6.421875l0.796875 0l0.15625 0.875l0.046875 0q0.296875 -0.46875 0.828125 -0.734375q0.546875 -0.265625 1.203125 -0.265625q1.171875 0 1.75 0.5625q0.59375 0.5625 0.59375 1.796875l0 4.1875l-0.984375 0zm5.8652344 -5.671875l-1.625 0l0 5.671875l-0.984375 0l0 -5.671875l-1.140625 0l0 -0.4375l1.140625 -0.34375l0 -0.359375q0 -2.375 2.078125 -2.375q0.5 0 1.1875 0.203125l-0.25 0.78125q-0.5625 -0.171875 -0.953125 -0.171875q-0.5625 0 -0.828125 0.375q-0.25 0.359375 -0.25 1.15625l0 0.421875l1.625 0l0 0.75zm4.1132812 -0.875q0.421875 0 0.765625 0.078125l-0.140625 0.90625q-0.390625 -0.09375 -0.703125 -0.09375q-0.78125 0 -1.34375 0.640625q-0.546875 0.625 -0.546875 1.5625l0 3.453125l-0.96875 0l0 -6.421875l0.796875 0l0.125 1.1875l0.046875 0q0.34375 -0.625 0.84375 -0.96875q0.515625 -0.34375 1.125 -0.34375zm5.9140625 6.546875l-0.203125 -0.921875l-0.046875 0q-0.46875 0.609375 -0.953125 0.828125q-0.46875 0.21875 -1.1875 0.21875q-0.953125 0 -1.5 -0.5q-0.546875 -0.5 -0.546875 -1.40625q0 -1.9375 3.109375 -2.03125l1.09375 -0.03125l0 -0.40625q0 -0.75 -0.328125 -1.109375q-0.3125 -0.359375 -1.03125 -0.359375q-0.8125 0 -1.8125 0.484375l-0.3125 -0.75q0.484375 -0.25 1.046875 -0.390625q0.5625 -0.15625 1.140625 -0.15625q1.140625 0 1.6875 0.515625q0.5625 0.5 0.5625 1.625l0 4.390625l-0.71875 0zm-2.203125 -0.6875q0.90625 0 1.421875 -0.5q0.53125 -0.5 0.53125 -1.390625l0 -0.578125l-0.984375 0.03125q-1.15625 0.046875 -1.671875 0.375q-0.5 0.3125 -0.5 0.984375q0 0.53125 0.3125 0.8125q0.3125 0.265625 0.890625 0.265625zm9.064453 -1.0625q0 0.890625 -0.671875 1.390625q-0.65625 0.484375 -1.875 0.484375q-1.265625 0 -1.984375 -0.40625l0 -0.90625q0.46875 0.234375 0.984375 0.375q0.53125 0.125 1.03125 0.125q0.765625 0 1.171875 -0.234375q0.40625 -0.25 0.40625 -0.75q0 -0.375 -0.328125 -0.640625q-0.3125 -0.265625 -1.265625 -0.625q-0.890625 -0.34375 -1.28125 -0.59375q-0.375 -0.25 -0.5625 -0.5625q-0.171875 -0.3125 -0.171875 -0.75q0 -0.78125 0.640625 -1.234375q0.640625 -0.46875 1.75 -0.46875q1.03125 0 2.03125 0.421875l-0.359375 0.796875q-0.953125 -0.390625 -1.75 -0.390625q-0.6875 0 -1.046875 0.21875q-0.34375 0.203125 -0.34375 0.59375q0 0.25 0.125 0.4375q0.140625 0.171875 0.421875 0.34375q0.296875 0.15625 1.140625 0.46875q1.140625 0.421875 1.53125 0.84375q0.40625 0.421875 0.40625 1.0625zm3.6621094 1.0625q0.25 0 0.484375 -0.03125q0.25 -0.046875 0.390625 -0.078125l0 0.734375q-0.15625 0.078125 -0.46875 0.125q-0.296875 0.0625 -0.546875 0.0625q-1.859375 0 -1.859375 -1.96875l0 -3.828125l-0.921875 0l0 -0.46875l0.921875 -0.40625l0.40625 -1.359375l0.5625 0l0 1.484375l1.859375 0l0 0.75l-1.859375 0l0 3.78125q0 0.578125 0.265625 0.890625q0.28125 0.3125 0.765625 0.3125zm5.095703 -5.859375q0.421875 0 0.765625 0.078125l-0.140625 0.90625q-0.390625 -0.09375 -0.703125 -0.09375q-0.78125 0 -1.34375 0.640625q-0.546875 0.625 -0.546875 1.5625l0 3.453125l-0.96875 0l0 -6.421875l0.796875 0l0.125 1.1875l0.046875 0q0.34375 -0.625 0.84375 -0.96875q0.515625 -0.34375 1.125 -0.34375zm2.8828125 0.125l0 4.171875q0 0.78125 0.34375 1.171875q0.359375 0.375 1.125 0.375q1.015625 0 1.46875 -0.546875q0.46875 -0.546875 0.46875 -1.796875l0 -3.375l0.96875 0l0 6.421875l-0.796875 0l-0.140625 -0.859375l-0.046875 0q-0.296875 0.46875 -0.828125 0.734375q-0.53125 0.25 -1.21875 0.25q-1.171875 0 -1.75 -0.5625q-0.578125 -0.5625 -0.578125 -1.78125l0 -4.203125l0.984375 0zm9.005859 6.546875q-1.390625 0 -2.15625 -0.859375q-0.765625 -0.859375 -0.765625 -2.4375q0 -1.609375 0.78125 -2.484375q0.78125 -0.890625 2.203125 -0.890625q0.46875 0 0.921875 0.109375q0.46875 0.09375 0.734375 0.234375l-0.296875 0.828125q-0.328125 -0.140625 -0.703125 -0.21875q-0.375 -0.078125 -0.671875 -0.078125q-1.953125 0 -1.953125 2.484375q0 1.1875 0.46875 1.828125q0.484375 0.625 1.421875 0.625q0.796875 0 1.640625 -0.34375l0 0.859375q-0.640625 0.34375 -1.625 0.34375zm5.2285156 -0.8125q0.25 0 0.484375 -0.03125q0.25 -0.046875 0.390625 -0.078125l0 0.734375q-0.15625 0.078125 -0.46875 0.125q-0.296875 0.0625 -0.546875 0.0625q-1.859375 0 -1.859375 -1.96875l0 -3.828125l-0.921875 0l0 -0.46875l0.921875 -0.40625l0.40625 -1.359375l0.5625 0l0 1.484375l1.859375 0l0 0.75l-1.859375 0l0 3.78125q0 0.578125 0.265625 0.890625q0.28125 0.3125 0.765625 0.3125zm3.0800781 -5.734375l0 4.171875q0 0.78125 0.34375 1.171875q0.359375 0.375 1.125 0.375q1.015625 0 1.46875 -0.546875q0.46875 -0.546875 0.46875 -1.796875l0 -3.375l0.96875 0l0 6.421875l-0.796875 0l-0.140625 -0.859375l-0.046875 0q-0.296875 0.46875 -0.828125 0.734375q-0.53125 0.25 -1.21875 0.25q-1.171875 0 -1.75 -0.5625q-0.578125 -0.5625 -0.578125 -1.78125l0 -4.203125l0.984375 0zm9.380859 -0.125q0.421875 0 0.765625 0.078125l-0.140625 0.90625q-0.390625 -0.09375 -0.703125 -0.09375q-0.78125 0 -1.34375 0.640625q-0.546875 0.625 -0.546875 1.5625l0 3.453125l-0.96875 0l0 -6.421875l0.796875 0l0.125 1.1875l0.046875 0q0.34375 -0.625 0.84375 -0.96875q0.515625 -0.34375 1.125 -0.34375zm4.6796875 6.671875q-1.421875 0 -2.25 -0.875q-0.828125 -0.875 -0.828125 -2.40625q0 -1.5625 0.765625 -2.46875q0.765625 -0.921875 2.0625 -0.921875q1.203125 0 1.90625 0.796875q0.703125 0.796875 0.703125 2.09375l0 0.625l-4.421875 0q0.03125 1.125 0.5625 1.71875q0.546875 0.578125 1.53125 0.578125q1.03125 0 2.046875 -0.4375l0 0.875q-0.515625 0.21875 -0.984375 0.3125q-0.453125 0.109375 -1.09375 0.109375zm-0.265625 -5.84375q-0.78125 0 -1.25 0.5q-0.453125 0.5 -0.53125 1.390625l3.359375 0q0 -0.921875 -0.40625 -1.40625q-0.40625 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#d9ead3" d="m31.874912 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m31.874912 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m64.2166 265.0834l0.984375 0l0 0.078125q-0.0625 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.671875l2.9375 0l0 -3.1875l0.984375 0l0 0.078125q-0.078125 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 6.953125l-0.890625 0l0 -3.5625l-2.921875 0l0 3.5625l-0.875 0l0 -7.46875zm6.046921 0l1.75 0q0.921875 0 1.453125 0.25q0.546875 0.234375 0.9375 0.765625q0.734375 0.984375 0.734375 2.75q-0.0625 1.8125 -0.84375 2.78125q-0.765625 0.953125 -2.421875 0.9375l-1.609375 0l0 -7.484375zm1.5625 6.828125q2.484375 0 2.484375 -3.0q-0.015625 -1.53125 -0.578125 -2.328125q-0.546875 -0.796875 -1.765625 -0.796875l-0.90625 0l0 6.125l0.765625 0zm4.734421 0.640625l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm8.140671 -4.859375q0.65625 0 1.15625 0.3125q0.515625 0.296875 0.8125 0.875q0.296875 0.5625 0.296875 1.3125q0 0.765625 -0.3125 1.328125q-0.296875 0.5625 -0.84375 0.859375q-0.53125 0.296875 -1.203125 0.296875q-0.6875 0 -1.265625 -0.296875q-0.578125 -0.296875 -0.953125 -0.84375l0.671875 -0.515625l0.015625 0q0.015625 0 0.015625 0q0 0 0 0q0.3125 0.484375 0.65625 0.71875q0.34375 0.21875 0.90625 0.21875q0.390625 0 0.71875 -0.21875q0.34375 -0.234375 0.53125 -0.625q0.203125 -0.40625 0.203125 -0.953125q0 -0.8125 -0.4375 -1.28125q-0.4375 -0.484375 -1.09375 -0.484375q-0.390625 0 -0.765625 0.1875q-0.359375 0.171875 -0.640625 0.515625l-0.53125 -0.21875l0.25 -3.796875l3.796875 0l0 0.75l-3.078125 0l-0.125 2.140625q0.59375 -0.28125 1.21875 -0.28125zm-1.625 3.328125q-0.0625 -0.09375 0.015625 -0.015625l-0.015625 0.015625zm0.125 0q0 0.0625 -0.109375 -0.015625l0.0625 -0.046875l0.046875 0.0625z" fill-rule="nonzero"/><path fill="#d9ead3" d="m190.14 134.76706l87.49608 0l0 30.992126l-87.49608 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m190.14 134.76706l87.49608 0l0 30.992126l-87.49608 0z" fill-rule="evenodd"/><path fill="#000000" d="m215.40347 151.18938l0 2.9375q-1.0625 0.78125 -2.1875 0.78125q-0.890625 0 -1.546875 -0.46875q-0.65625 -0.46875 -1.0 -1.3125q-0.34375 -0.859375 -0.34375 -1.984375q-0.015625 -1.5 0.421875 -2.359375q0.453125 -0.875 1.09375 -1.21875q0.65625 -0.34375 1.3125 -0.34375q0.671875 0 1.296875 0.359375q0.625 0.34375 0.9375 0.90625l-0.59375 0.46875l-0.015625 0.015625q-0.40625 -0.546875 -0.75 -0.765625q-0.34375 -0.234375 -0.859375 -0.234375q-0.90625 0 -1.46875 0.703125q-0.546875 0.6875 -0.546875 2.28125q0 1.53125 0.546875 2.375q0.546875 0.828125 1.515625 0.828125q0.359375 0 0.75 -0.109375q0.390625 -0.125 0.65625 -0.34375l0 -1.78125l-1.34375 0l0 -0.734375l2.125 0zm-0.75 -2.21875q0 -0.078125 0.109375 0l-0.0625 0.0625l-0.046875 -0.0625zm0.125 0q0.0625 0.078125 -0.015625 0l0.015625 0zm6.218796 1.46875l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm3.9375458 -1.3125q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm4.109421 -4.953125l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm3.7812958 -7.359375l0.953125 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.9375q0.328125 -0.5 0.8125 -0.796875q0.484375 -0.296875 0.984375 -0.296875q0.78125 0 1.21875 0.546875q0.453125 0.546875 0.453125 1.734375l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.390625 0 -0.765625 0.21875q-0.375 0.203125 -0.625 0.59375q-0.234375 0.390625 -0.234375 0.890625l0 3.15625l-0.84375 0l0 -7.984375zm5.765671 0.515625l1.75 0q0.921875 0 1.453125 0.25q0.546875 0.234375 0.9375 0.765625q0.734375 0.984375 0.734375 2.75q-0.0625 1.8125 -0.84375 2.78125q-0.765625 0.953125 -2.421875 0.9375l-1.609375 0l0 -7.484375zm1.5625 6.828125q2.484375 0 2.484375 -3.0q-0.015625 -1.53125 -0.578125 -2.328125q-0.546875 -0.796875 -1.765625 -0.796875l-0.90625 0l0 6.125l0.765625 0zm7.062546 0.765625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.750061 -3.375l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65626526 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125153 0l0 0.6875l-1.8125153 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53126526 0 0.98439026 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125z" fill-rule="nonzero"/><path fill="#d9ead3" d="m233.1085 252.53609l87.49608 0l0 30.992142l-87.49608 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.1085 252.53609l87.49608 0l0 30.992142l-87.49608 0z" fill-rule="evenodd"/><path fill="#000000" d="m259.7939 265.55215l0 -7.484375l4.3125 0l0 0.734375l-3.46875 0l0 2.34375l2.796875 0l0 0.734375l-2.796875 0l0 3.671875l-0.84375 0zm10.1719055 -4.34375l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm4.015686 4.390625q-0.734375 0 -1.3125 -0.359375q-0.578125 -0.359375 -0.90625 -1.0q-0.3125 -0.65625 -0.3125 -1.484375q0 -0.828125 0.3125 -1.46875q0.328125 -0.65625 0.90625 -1.015625q0.578125 -0.375 1.3125 -0.375q0.734375 0 1.3125 0.375q0.578125 0.359375 0.890625 1.015625q0.328125 0.640625 0.328125 1.46875q0 0.828125 -0.328125 1.484375q-0.3125 0.640625 -0.890625 1.0q-0.578125 0.359375 -1.3125 0.359375zm0 -0.71875q0.46875 0 0.828125 -0.265625q0.375 -0.28125 0.578125 -0.765625q0.21875 -0.484375 0.21875 -1.109375q0 -0.9375 -0.46875 -1.53125q-0.453125 -0.59375 -1.15625 -0.59375q-0.703125 0 -1.171875 0.59375q-0.453125 0.59375 -0.453125 1.53125q0 0.625 0.203125 1.109375q0.21875 0.484375 0.578125 0.765625q0.375 0.265625 0.84375 0.265625zm3.5937805 0.03125l3.40625 -4.140625l-3.140625 0l0 -0.75l4.25 0l0 0.59375l-3.328125 4.140625l3.09375 0q0.1875 0 0.28125 -0.015625q0.09375 -0.03125 0.15625 -0.09375l0.09375 0l0 0.859375l-4.8125 0l0 -0.59375zm8.671936 0.71875q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm2.3906555 -2.203125l0.84375 0l0 0.96875q0.328125 -0.5 0.8125 -0.796875q0.5 -0.296875 1.046875 -0.296875q0.734375 0 1.171875 0.5625q0.4375 0.546875 0.4375 1.71875l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.375 0 -0.75 0.21875q-0.375 0.21875 -0.625 0.609375q-0.25 0.390625 -0.25 0.875l0 3.15625l-0.84375 0l0 -5.484375z" fill-rule="nonzero"/><path fill="#000000" d="m258.37198 275.9584l0 2.9375q-1.0625 0.78125 -2.1875 0.78125q-0.890625 0 -1.546875 -0.46875q-0.65625 -0.46875 -1.0 -1.3125q-0.34375 -0.859375 -0.34375 -1.984375q-0.015625 -1.5 0.421875 -2.359375q0.453125 -0.875 1.09375 -1.21875q0.65625 -0.34375 1.3125 -0.34375q0.671875 0 1.296875 0.359375q0.625 0.34375 0.9375 0.90625l-0.59375 0.46875l-0.015625 0.015625q-0.40625 -0.546875 -0.75 -0.765625q-0.34375 -0.234375 -0.859375 -0.234375q-0.90625 0 -1.46875 0.703125q-0.546875 0.6875 -0.546875 2.28125q0 1.53125 0.546875 2.375q0.546875 0.828125 1.515625 0.828125q0.359375 0 0.75 -0.109375q0.390625 -0.125 0.65625 -0.34375l0 -1.78125l-1.34375 0l0 -0.734375l2.125 0zm-0.75 -2.21875q0 -0.078125 0.109375 0l-0.0625 0.0625l-0.046875 -0.0625zm0.125 0q0.0625 0.078125 -0.015625 0l0.015625 0zm6.218811 1.46875l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm3.9375305 -1.3125q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm4.109436 -4.953125l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm3.7812805 -7.359375l0.953125 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.9375q0.328125 -0.5 0.8125 -0.796875q0.484375 -0.296875 0.984375 -0.296875q0.78125 0 1.21875 0.546875q0.453125 0.546875 0.453125 1.734375l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.390625 0 -0.765625 0.21875q-0.375 0.203125 -0.625 0.59375q-0.234375 0.390625 -0.234375 0.890625l0 3.15625l-0.84375 0l0 -7.984375zm5.765686 0.515625l1.75 0q0.921875 0 1.453125 0.25q0.546875 0.234375 0.9375 0.765625q0.734375 0.984375 0.734375 2.75q-0.0625 1.8125 -0.84375 2.78125q-0.765625 0.953125 -2.421875 0.9375l-1.609375 0l0 -7.484375zm1.5625 6.828125q2.484375 0 2.484375 -3.0q-0.015625 -1.53125 -0.578125 -2.328125q-0.546875 -0.796875 -1.765625 -0.796875l-0.90625 0l0 6.125l0.765625 0zm7.0625305 0.765625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.750061 -3.375l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65625 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125 0l0 0.6875l-1.8125 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53125 0 0.984375 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 232.16667l0 20.377945" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 232.16667l0 16.950867" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.85565 249.11754l-1.1246033 -1.124588l1.1246033 3.0897675l1.1245728 -3.0897675z" fill-rule="evenodd"/><path fill="#f4cccc" d="m31.874016 68.3563l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m31.874016 68.3563l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m56.94999 87.87236q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm5.890671 -6.15625l-0.015625 0.015625q-0.328125 -0.421875 -0.5625 -0.5625q-0.234375 -0.15625 -0.65625 -0.15625q-0.5625 0 -0.890625 0.34375q-0.328125 0.328125 -0.328125 1.078125l0 0.453125l1.8125 0l0 0.6875l-1.8125 0l0 4.796875l-0.828125 0l0 -4.796875l-1.1875 0l0 -0.6875l1.1875 0l0 -0.453125q0 -1.0625 0.546875 -1.578125q0.546875 -0.53125 1.46875 -0.53125q0.53125 0 0.984375 0.1875q0.453125 0.1875 0.734375 0.5625l-0.453125 0.640625zm-0.140625 0.03125q0 -0.03125 0.046875 0q0.046875 0.015625 0.0625 0.015625l-0.046875 0.046875l-0.0625 -0.046875l0 -0.015625zm0.125 -0.015625q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125zm3.6562958 6.765625q-0.296875 0 -0.5 -0.203125q-0.203125 -0.203125 -0.203125 -0.46875q0 -0.28125 0.203125 -0.484375q0.203125 -0.203125 0.5 -0.203125q0.265625 0 0.46875 0.203125q0.21875 0.203125 0.21875 0.484375q0 0.265625 -0.21875 0.46875q-0.203125 0.203125 -0.46875 0.203125zm5.125046 -3.875l-0.578125 0.65625l0 3.09375l-0.90625 0l0 -7.46875l1.015625 0l0 0.078125q-0.078125 0.078125 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.921875l3.125 -3.5q0.296875 0.0625 0.609375 0.0625l0.3125 0l-2.828125 3.21875l3.03125 4.25l-1.078125 0.046875l-2.59375 -3.796875zm7.281296 3.875q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm6.640671 -1.0625l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625zm3.9375458 -1.3125q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm6.781296 -2.703125q1.03125 0.3125 1.453125 0.6875q0.4375 0.359375 0.4375 0.953125q0 0.734375 -0.59375 1.234375q-0.578125 0.484375 -1.671875 0.484375q-1.390625 0 -2.328125 -0.875l0.46875 -0.8125l0.015625 -0.015625l0.015625 0.015625q0.375 0.484375 0.765625 0.734375q0.40625 0.234375 1.078125 0.234375q0.65625 0 1.015625 -0.234375q0.375 -0.234375 0.375 -0.640625q0 -0.359375 -0.296875 -0.578125q-0.296875 -0.234375 -1.078125 -0.484375q-2.0625 -0.59375 -2.0625 -1.703125q0 -0.640625 0.515625 -1.0q0.53125 -0.375 1.5 -0.375q0.75 0 1.25 0.203125q0.515625 0.203125 0.9375 0.65625l-0.5 0.59375l0 0.015625q-0.265625 -0.390625 -0.734375 -0.609375q-0.453125 -0.21875 -0.921875 -0.21875q-0.515625 0 -0.859375 0.1875q-0.328125 0.171875 -0.328125 0.5q0 0.296875 0.328125 0.546875q0.34375 0.25 1.21875 0.5zm1.15625 -0.875q0 -0.0625 0.09375 0l-0.03125 0.046875l-0.0625 -0.046875zm0.140625 -0.03125q0.03125 0.046875 0.015625 0.0625q0 0.015625 -0.03125 0q-0.015625 -0.015625 -0.03125 -0.03125l0.046875 -0.03125zm-3.375 2.53125q0 0.046875 -0.109375 0l0.03125 -0.0625l0.078125 0.046875l0 0.015625zm-0.140625 0.03125q-0.03125 -0.046875 -0.03125 -0.046875q0.015625 0 0.0625 0.015625l-0.03125 0.03125z" fill-rule="nonzero"/><path fill="#f4cccc" d="m132.49081 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49081 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m149.94176 88.37367l0 -7.484375l4.59375 0l0 0.734375l-3.796875 0l0 2.46875l3.125 0l0 0.765625l-3.125 0l0 2.765625l3.75 0l0 0.75l-4.546875 0zm8.687546 -3.234375q1.03125 0.3125 1.453125 0.6875q0.4375 0.359375 0.4375 0.953125q0 0.734375 -0.59375 1.234375q-0.578125 0.484375 -1.671875 0.484375q-1.390625 0 -2.328125 -0.875l0.46875 -0.8125l0.015625 -0.015625l0.015625 0.015625q0.375 0.484375 0.765625 0.734375q0.40625 0.234375 1.078125 0.234375q0.65625 0 1.015625 -0.234375q0.375 -0.234375 0.375 -0.640625q0 -0.359375 -0.296875 -0.578125q-0.296875 -0.234375 -1.078125 -0.484375q-2.0625 -0.59375 -2.0625 -1.703125q0 -0.640625 0.515625 -1.0q0.53125 -0.375 1.5 -0.375q0.75 0 1.25 0.203125q0.515625 0.203125 0.9375 0.65625l-0.5 0.59375l0 0.015625q-0.265625 -0.390625 -0.734375 -0.609375q-0.453125 -0.21875 -0.921875 -0.21875q-0.515625 0 -0.859375 0.1875q-0.328125 0.171875 -0.328125 0.5q0 0.296875 0.328125 0.546875q0.34375 0.25 1.21875 0.5zm1.15625 -0.875q0 -0.0625 0.09375 0l-0.03125 0.046875l-0.0625 -0.046875zm0.140625 -0.03125q0.03125 0.046875 0.015625 0.0625q0 0.015625 -0.03125 0q-0.015625 -0.015625 -0.03125 -0.03125l0.046875 -0.03125zm-3.375 2.53125q0 0.046875 -0.109375 0l0.03125 -0.0625l0.078125 0.046875l0 0.015625zm-0.140625 0.03125q-0.03125 -0.046875 -0.03125 -0.046875q0.015625 0 0.0625 0.015625l-0.03125 0.03125zm10.156296 1.078125q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm1.8750458 0.5l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm3.4687958 1.15625l0.796875 0l0 0.546875q0.1875 -0.3125 0.484375 -0.484375q0.3125 -0.1875 0.625 -0.1875q0.359375 0 0.625 0.234375q0.28125 0.21875 0.359375 0.5625q0.140625 -0.359375 0.46875 -0.578125q0.34375 -0.21875 0.765625 -0.21875q0.53125 0 0.796875 0.390625q0.28125 0.375 0.25 1.0l0 4.21875l-0.78125 0l0 -3.890625q0 -0.6875 -0.140625 -0.890625q-0.125 -0.203125 -0.390625 -0.203125q-0.203125 0 -0.40625 0.203125q-0.203125 0.203125 -0.34375 0.53125q-0.125 0.3125 -0.125 0.625l0 3.625l-0.796875 0l0 -3.8125q0 -0.671875 -0.125 -0.90625q-0.125 -0.234375 -0.46875 -0.234375q-0.1875 0 -0.375 0.171875q-0.1875 0.15625 -0.3125 0.453125q-0.109375 0.28125 -0.109375 0.671875l0 3.65625l-0.796875 0l0 -5.484375zm8.468796 -0.125q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm8.718796 0.03125q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm3.6719208 0.59375q-0.734375 0 -1.3125 -0.359375q-0.578125 -0.359375 -0.90625 -1.0q-0.3125 -0.65625 -0.3125 -1.484375q0 -0.828125 0.3125 -1.46875q0.328125 -0.65625 0.90625 -1.015625q0.578125 -0.375 1.3125 -0.375q0.734375 0 1.3125 0.375q0.578125 0.359375 0.890625 1.015625q0.328125 0.640625 0.328125 1.46875q0 0.828125 -0.328125 1.484375q-0.3125 0.640625 -0.890625 1.0q-0.578125 0.359375 -1.3125 0.359375zm0 -0.71875q0.46875 0 0.828125 -0.265625q0.375 -0.28125 0.578125 -0.765625q0.21875 -0.484375 0.21875 -1.109375q0 -0.9375 -0.46875 -1.53125q-0.453125 -0.59375 -1.15625 -0.59375q-0.703125 0 -1.171875 0.59375q-0.453125 0.59375 -0.453125 1.53125q0 0.625 0.203125 1.109375q0.21875 0.484375 0.578125 0.765625q0.375 0.265625 0.84375 0.265625zm8.109421 -3.71875l0 0.015625q-0.34375 -0.328125 -0.578125 -0.4375q-0.234375 -0.109375 -0.546875 -0.109375q-0.4375 0 -0.828125 0.21875q-0.375 0.21875 -0.625 0.65625q-0.234375 0.4375 -0.234375 1.09375l0 2.921875l-0.875 0l0 -5.5l0.90625 0l-0.03125 1.015625q0.234375 -0.546875 0.703125 -0.84375q0.484375 -0.296875 1.046875 -0.296875q0.875 0 1.453125 0.59375l-0.390625 0.671875zm0 0.015625q0.09375 0.0625 0.046875 0.0625q-0.046875 -0.015625 -0.0625 -0.03125l0.015625 -0.03125zm-0.125 0.03125q0 -0.03125 0.03125 -0.015625q0.03125 0 0.078125 0.015625l-0.03125 0.0625l-0.078125 -0.046875l0 -0.015625z" fill-rule="nonzero"/><path fill="#f4cccc" d="m233.1076 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#666666" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.1076 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#999999" d="m259.68378 88.37367l1.8125 -8.5625l1.0 0l-1.625 7.65625l3.3125 0l-0.1875 0.90625l-4.3125 0zm8.080078 0.125q-1.078125 0 -1.703125 -0.640625q-0.609375 -0.640625 -0.609375 -1.78125q0 -1.09375 0.4375 -2.109375q0.4375 -1.015625 1.15625 -1.578125q0.71875 -0.5625 1.578125 -0.5625q0.90625 0 1.359375 0.390625q0.453125 0.390625 0.453125 1.09375q0 1.046875 -0.984375 1.65625q-0.96875 0.59375 -2.78125 0.59375l-0.1875 0l-0.03125 0.46875q0 0.765625 0.359375 1.203125q0.359375 0.4375 1.125 0.4375q0.359375 0 0.75 -0.109375q0.390625 -0.109375 0.96875 -0.390625l0 0.859375q-0.546875 0.25 -0.96875 0.359375q-0.421875 0.109375 -0.921875 0.109375zm0.8125 -5.828125q-0.609375 0 -1.140625 0.5625q-0.53125 0.546875 -0.8125 1.515625l0.078125 0q1.328125 0 2.03125 -0.34375q0.71875 -0.34375 0.71875 -1.015625q0 -0.3125 -0.21875 -0.515625q-0.203125 -0.203125 -0.65625 -0.203125zm8.667969 -0.71875l-0.15625 0.625l-1.234375 0.140625q0.234375 0.359375 0.234375 0.921875q0 1.125 -0.6875 1.796875q-0.671875 0.65625 -1.8125 0.65625q-0.328125 0 -0.5 -0.046875q-0.8125 0.3125 -0.8125 0.765625q0 0.25 0.1875 0.328125q0.203125 0.078125 0.578125 0.125l0.6875 0.078125q1.046875 0.125 1.53125 0.515625q0.484375 0.390625 0.484375 1.140625q0 1.078125 -0.859375 1.671875q-0.859375 0.59375 -2.40625 0.59375q-1.140625 0 -1.78125 -0.4375q-0.65625 -0.4375 -0.65625 -1.21875q0 -0.609375 0.421875 -1.0625q0.421875 -0.453125 1.375 -0.765625q-0.453125 -0.25 -0.453125 -0.71875q0 -0.40625 0.296875 -0.6875q0.296875 -0.296875 0.828125 -0.546875q-0.375 -0.1875 -0.609375 -0.546875q-0.234375 -0.375 -0.234375 -0.859375q0 -1.140625 0.703125 -1.859375q0.703125 -0.734375 1.796875 -0.734375q0.453125 0 0.890625 0.125l2.1875 0zm-6.25 7.5625q0 0.453125 0.375 0.71875q0.390625 0.265625 1.140625 0.265625q1.0625 0 1.65625 -0.375q0.609375 -0.375 0.609375 -1.046875q0 -0.375 -0.3125 -0.59375q-0.3125 -0.203125 -1.078125 -0.28125l-0.9375 -0.09375q-0.703125 0.140625 -1.078125 0.515625q-0.375 0.375 -0.375 0.890625zm1.625 -5.125q0 0.5 0.265625 0.765625q0.265625 0.25 0.734375 0.25q0.46875 0 0.8125 -0.234375q0.34375 -0.25 0.53125 -0.671875q0.1875 -0.4375 0.1875 -0.953125q0 -0.46875 -0.265625 -0.71875q-0.25 -0.265625 -0.734375 -0.265625q-0.453125 0 -0.796875 0.25q-0.34375 0.234375 -0.546875 0.65625q-0.1875 0.421875 -0.1875 0.921875zm7.9335938 -2.5625q0.53125 0 0.953125 0.296875q0.421875 0.296875 0.65625 0.828125l0.0625 0l0.390625 -1.0l0.75 0l-1.359375 6.421875l-0.78125 0l0.15625 -1.21875l-0.046875 0q-1.0625 1.34375 -2.21875 1.34375q-0.8125 0 -1.28125 -0.578125q-0.453125 -0.59375 -0.453125 -1.59375q0 -1.21875 0.40625 -2.265625q0.421875 -1.046875 1.15625 -1.640625q0.734375 -0.59375 1.609375 -0.59375zm-1.125 5.84375q0.578125 0 1.140625 -0.53125q0.5625 -0.546875 0.90625 -1.40625q0.359375 -0.875 0.359375 -1.75q0 -0.609375 -0.328125 -0.96875q-0.328125 -0.359375 -0.859375 -0.359375q-0.609375 0 -1.140625 0.515625q-0.515625 0.5 -0.828125 1.359375q-0.296875 0.859375 -0.296875 1.8125q0 0.671875 0.28125 1.0q0.28125 0.328125 0.765625 0.328125zm7.123047 0.828125q-1.140625 0 -1.765625 -0.625q-0.625 -0.640625 -0.625 -1.78125q0 -1.171875 0.421875 -2.15625q0.4375 -1.0 1.203125 -1.546875q0.765625 -0.5625 1.71875 -0.5625q0.8125 0 1.578125 0.3125l-0.28125 0.8125q-0.703125 -0.296875 -1.28125 -0.296875q-0.65625 0 -1.203125 0.453125q-0.53125 0.453125 -0.84375 1.25q-0.3125 0.796875 -0.3125 1.734375q0 0.75 0.390625 1.171875q0.390625 0.40625 1.078125 0.40625q0.421875 0 0.796875 -0.109375q0.375 -0.125 0.734375 -0.28125l0 0.84375q-0.71875 0.375 -1.609375 0.375zm3.0117188 -6.546875l0.984375 0l0.4375 3.1875q0.046875 0.40625 0.09375 1.203125q0.0625 0.78125 0.0625 1.265625l0.046875 0q0.203125 -0.515625 0.5 -1.171875q0.3125 -0.671875 0.453125 -0.921875l1.90625 -3.5625l1.046875 0l-4.078125 7.515625q-0.546875 1.0 -1.078125 1.390625q-0.53125 0.40625 -1.28125 0.40625q-0.421875 0 -0.828125 -0.125l0 -0.796875q0.375 0.109375 0.765625 0.109375q0.484375 0 0.828125 -0.296875q0.34375 -0.296875 0.671875 -0.875l0.4375 -0.796875l-0.96875 -6.53125z" fill-rule="nonzero"/><path fill="#d9ead3" d="m282.5035 134.76706l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m282.5035 134.76706l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m296.64194 154.18938q0.4375 0 0.84375 -0.25q0.40625 -0.25 0.65625 -0.671875l0.625 0.40625q-0.375 0.625 -0.875 0.9375q-0.5 0.296875 -1.21875 0.296875q-0.84375 0 -1.5 -0.40625q-0.65625 -0.421875 -1.046875 -1.265625q-0.390625 -0.859375 -0.390625 -2.15625q0 -1.375 0.421875 -2.234375q0.421875 -0.859375 1.0625 -1.21875q0.65625 -0.375 1.40625 -0.375q0.78125 0 1.359375 0.390625q0.59375 0.390625 0.890625 1.078125l-0.71875 0.34375q-0.015625 0 -0.015625 0q0 -0.015625 0 -0.015625q-0.3125 -0.625 -0.703125 -0.875q-0.375 -0.25 -0.84375 -0.25q-0.9375 0 -1.484375 0.828125q-0.546875 0.8125 -0.546875 2.28125q0 0.921875 0.265625 1.640625q0.28125 0.71875 0.75 1.125q0.484375 0.390625 1.0625 0.390625zm1.375 -5.171875q0.015625 -0.015625 0.015625 -0.015625q0.03125 0 0.109375 0.0625l-0.09375 0.046875l-0.03125 -0.09375zm0.140625 0.046875q0.046875 0.109375 -0.015625 0l0.015625 0zm1.9687805 -2.265625l0.953125 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 2.9375q0.328125 -0.5 0.8125 -0.796875q0.484375 -0.296875 0.984375 -0.296875q0.78125 0 1.21875 0.546875q0.453125 0.546875 0.453125 1.734375l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.390625 0 -0.765625 0.21875q-0.375 0.203125 -0.625 0.59375q-0.234375 0.390625 -0.234375 0.890625l0 3.15625l-0.84375 0l0 -7.984375zm8.390686 8.109375q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm5.0000305 2.640625q0.828125 0 1.421875 -0.671875l0.515625 0.578125q-0.8125 0.859375 -1.984375 0.859375q-0.796875 0 -1.421875 -0.359375q-0.625 -0.375 -0.984375 -1.03125q-0.34375 -0.65625 -0.34375 -1.46875q0 -0.8125 0.34375 -1.453125q0.359375 -0.65625 0.984375 -1.03125q0.625 -0.375 1.40625 -0.375q0.65625 0 1.1875 0.28125q0.546875 0.265625 0.890625 0.734375l-0.546875 0.53125l0 0.015625q-0.359375 -0.453125 -0.71875 -0.640625q-0.359375 -0.1875 -0.90625 -0.1875q-0.46875 0 -0.875 0.265625q-0.390625 0.25 -0.640625 0.71875q-0.234375 0.46875 -0.234375 1.078125q0 0.609375 0.234375 1.109375q0.25 0.484375 0.6875 0.765625q0.4375 0.28125 0.984375 0.28125zm1.328125 -3.375q0 -0.078125 0.109375 0l-0.046875 0.0625l-0.0625 -0.0625zm0.140625 -0.015625q0.046875 0.078125 0.015625 0.0625q-0.015625 -0.03125 -0.046875 -0.046875l0.03125 -0.015625zm3.453186 1.28125l-0.71875 0.671875l0 2.078125l-0.875 0l0 -7.984375l0.984375 0l0 0.09375q-0.078125 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0 4.484375l2.75 -2.546875q0.296875 0.0625 0.625 0.0625l0.3125 0l-2.296875 2.171875l2.6875 3.28125l-1.125 0.046875l-2.234375 -2.796875zm4.3281555 -2.734375l0.875 0l0 1.078125q0.1875 -0.59375 0.625 -0.890625q0.4375 -0.3125 1.046875 -0.3125q0.625 0 1.140625 0.328125q0.53125 0.3125 0.84375 0.953125q0.3125 0.625 0.3125 1.546875q0 0.921875 -0.328125 1.59375q-0.328125 0.65625 -0.859375 1.0q-0.53125 0.328125 -1.140625 0.328125q-0.484375 0 -0.921875 -0.21875q-0.421875 -0.234375 -0.703125 -0.640625l0 2.71875l-0.890625 0l0 -7.484375zm2.375 4.859375q0.65625 0 1.109375 -0.5q0.453125 -0.5 0.453125 -1.625q0 -1.015625 -0.40625 -1.5625q-0.390625 -0.546875 -1.125 -0.546875q-0.671875 0 -1.109375 0.578125q-0.421875 0.5625 -0.421875 1.71875q0.03125 0.953125 0.421875 1.453125q0.390625 0.484375 1.078125 0.484375zm5.906311 0.71875q-0.734375 0 -1.3125 -0.359375q-0.578125 -0.359375 -0.90625 -1.0q-0.3125 -0.65625 -0.3125 -1.484375q0 -0.828125 0.3125 -1.46875q0.328125 -0.65625 0.90625 -1.015625q0.578125 -0.375 1.3125 -0.375q0.734375 0 1.3125 0.375q0.578125 0.359375 0.890625 1.015625q0.328125 0.640625 0.328125 1.46875q0 0.828125 -0.328125 1.484375q-0.3125 0.640625 -0.890625 1.0q-0.578125 0.359375 -1.3125 0.359375zm0 -0.71875q0.46875 0 0.828125 -0.265625q0.375 -0.28125 0.578125 -0.765625q0.21875 -0.484375 0.21875 -1.109375q0 -0.9375 -0.46875 -1.53125q-0.453125 -0.59375 -1.15625 -0.59375q-0.703125 0 -1.171875 0.59375q-0.453125 0.59375 -0.453125 1.53125q0 0.625 0.203125 1.109375q0.21875 0.484375 0.578125 0.765625q0.375 0.265625 0.84375 0.265625zm4.2031555 0.625l0 -0.703125l1.40625 0l0 -4.078125l-1.34375 0l0 -0.703125l2.203125 0l0 4.78125l1.28125 0l0 0.703125l-3.546875 0zm1.78125 -6.640625q-0.25 0 -0.4375 -0.171875q-0.171875 -0.1875 -0.171875 -0.4375q0 -0.265625 0.171875 -0.4375q0.171875 -0.1875 0.4375 -0.1875q0.25 0 0.4375 0.1875q0.1875 0.1875 0.1875 0.4375q0 0.25 -0.1875 0.4375q-0.1875 0.171875 -0.4375 0.171875zm3.875061 1.15625l0.84375 0l0 0.96875q0.328125 -0.5 0.8125 -0.796875q0.5 -0.296875 1.046875 -0.296875q0.734375 0 1.171875 0.5625q0.4375 0.546875 0.4375 1.71875l0 3.328125l-0.84375 0l0 -3.296875q0 -0.8125 -0.28125 -1.1875q-0.265625 -0.375 -0.71875 -0.375q-0.375 0 -0.75 0.21875q-0.375 0.21875 -0.625 0.609375q-0.25 0.390625 -0.25 0.875l0 3.15625l-0.84375 0l0 -5.484375zm10.4687805 4.984375q-0.796875 0.578125 -1.734375 0.578125q-0.921875 0 -1.296875 -0.546875q-0.375 -0.546875 -0.375 -1.796875q0 -0.203125 0.015625 -0.703125l0.109375 -1.8125l-1.203125 0l0 -0.703125l1.25 0l0.09375 -1.46875l0.953125 -0.15625l0.125 0l0.015625 0.0625q-0.09375 0.125 -0.140625 0.21875q-0.03125 0.078125 -0.046875 0.25l-0.125 1.09375l1.8125 0l0 0.703125l-1.859375 0l-0.109375 1.859375q-0.03125 0.484375 -0.03125 0.640625q0 0.96875 0.21875 1.3125q0.234375 0.328125 0.71875 0.328125q0.359375 0 0.65625 -0.125q0.3125 -0.140625 0.6875 -0.421875l0.265625 0.6875zm4.062561 -2.734375q1.03125 0.3125 1.453125 0.6875q0.4375 0.359375 0.4375 0.953125q0 0.734375 -0.59375 1.234375q-0.578125 0.484375 -1.671875 0.484375q-1.390625 0 -2.328125 -0.875l0.46875 -0.8125l0.015625 -0.015625l0.015625 0.015625q0.375 0.484375 0.765625 0.734375q0.40625 0.234375 1.078125 0.234375q0.65625 0 1.015625 -0.234375q0.375 -0.234375 0.375 -0.640625q0 -0.359375 -0.296875 -0.578125q-0.296875 -0.234375 -1.078125 -0.484375q-2.0625 -0.59375 -2.0625 -1.703125q0 -0.640625 0.515625 -1.0q0.53125 -0.375 1.5 -0.375q0.75 0 1.25 0.203125q0.515625 0.203125 0.9375 0.65625l-0.5 0.59375l0 0.015625q-0.265625 -0.390625 -0.734375 -0.609375q-0.453125 -0.21875 -0.921875 -0.21875q-0.515625 0 -0.859375 0.1875q-0.328125 0.171875 -0.328125 0.5q0 0.296875 0.328125 0.546875q0.34375 0.25 1.21875 0.5zm1.15625 -0.875q0 -0.0625 0.09375 0l-0.03125 0.046875l-0.0625 -0.046875zm0.140625 -0.03125q0.03125 0.046875 0.015625 0.0625q0 0.015625 -0.03125 0q-0.015625 -0.015625 -0.03125 -0.03125l0.046875 -0.03125zm-3.375 2.53125q0 0.046875 -0.109375 0l0.03125 -0.0625l0.078125 0.046875l0 0.015625zm-0.140625 0.03125q-0.03125 -0.046875 -0.03125 -0.046875q0.015625 0 0.0625 0.015625l-0.03125 0.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 99.34974l0 17.70874l-42.960632 0l0 17.724327" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 99.34974l0 17.70874l-42.960632 0l0 14.297249" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m233.89502 131.35573l-1.124588 -1.124588l1.124588 3.0897675l1.1245728 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 99.34974l0 17.70874l49.385803 0l0 17.724327" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 99.34974l0 17.70874l49.385803 0l0 14.297249" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m326.24146 131.35573l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#c9daf8" d="m548.5407 235.66077l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 235.66077l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m580.4762 255.67682l-1.140625 0l-4.6875 -7.1875l-0.046875 0q0.09375 1.265625 0.09375 2.3125l0 4.875l-0.921875 0l0 -8.5625l1.125 0l4.671875 7.15625l0.046875 0q0 -0.15625 -0.046875 -1.015625q-0.046875 -0.859375 -0.03125 -1.234375l0 -4.90625l0.9375 0l0 8.5625zm9.046875 0l-1.140625 0l-4.6875 -7.1875l-0.046875 0q0.09375 1.265625 0.09375 2.3125l0 4.875l-0.921875 0l0 -8.5625l1.125 0l4.671875 7.15625l0.046875 0q0 -0.15625 -0.046875 -1.015625q-0.046875 -0.859375 -0.03125 -1.234375l0 -4.90625l0.9375 0l0 8.5625zm10.8515625 0l-1.0625 -2.71875l-3.4375 0l-1.046875 2.71875l-1.015625 0l3.390625 -8.609375l0.828125 0l3.375 8.609375l-1.03125 0zm-1.375 -3.625l-1.0 -2.65625q-0.1875 -0.5 -0.390625 -1.234375q-0.140625 0.5625 -0.375 1.234375l-1.0 2.65625l2.765625 0zm9.015625 -2.453125q0 1.3125 -0.890625 2.015625q-0.890625 0.6875 -2.53125 0.6875l-1.015625 0l0 3.375l-1.0 0l0 -8.5625l2.234375 0q3.203125 0 3.203125 2.484375zm-4.4375 1.859375l0.90625 0q1.3125 0 1.90625 -0.421875q0.59375 -0.4375 0.59375 -1.390625q0 -0.84375 -0.5625 -1.25q-0.546875 -0.421875 -1.734375 -0.421875l-1.109375 0l0 3.484375zm6.2246094 4.21875l0 -8.5625l1.0 0l0 8.5625l-1.0 0z" fill-rule="nonzero"/><path fill="#c9daf8" d="m548.5407 193.79199l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 193.79199l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m588.3893 213.18306q0 -0.390625 0.171875 -0.59375q0.1875 -0.203125 0.515625 -0.203125q0.34375 0 0.53125 0.203125q0.1875 0.203125 0.1875 0.59375q0 0.390625 -0.1875 0.59375q-0.1875 0.203125 -0.53125 0.203125q-0.296875 0 -0.5 -0.1875q-0.1875 -0.1875 -0.1875 -0.609375zm3.1933594 0q0 -0.390625 0.171875 -0.59375q0.1875 -0.203125 0.515625 -0.203125q0.34375 0 0.53125 0.203125q0.1875 0.203125 0.1875 0.59375q0 0.390625 -0.1875 0.59375q-0.1875 0.203125 -0.53125 0.203125q-0.296875 0 -0.5 -0.1875q-0.1875 -0.1875 -0.1875 -0.609375zm3.1933594 0q0 -0.390625 0.171875 -0.59375q0.1875 -0.203125 0.515625 -0.203125q0.34375 0 0.53125 0.203125q0.1875 0.203125 0.1875 0.59375q0 0.390625 -0.1875 0.59375q-0.1875 0.203125 -0.53125 0.203125q-0.296875 0 -0.5 -0.1875q-0.1875 -0.1875 -0.1875 -0.609375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m75.62294 283.52823l0 17.950958l100.62993 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m75.62295 283.52823l0 17.950928l100.62992 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.25287 316.00665l-1.124588 -1.1246033l1.124588 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.85654 283.52823l0 17.950958l-100.62991 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85654 283.52823l0 17.950928l-100.62991 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.22662 316.00665l-1.124588 -1.1246033l1.124588 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 0.06298828l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 0.06298828l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 334.95734l-1.1245728 1.1246033l3.0897827 -1.1246033l-3.0897827 -1.1245728z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -41.858246l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -41.858246l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 293.0361l-1.1245728 1.1245728l3.0897827 -1.1245728l-3.0897827 -1.1246033z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -83.74802l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -83.74802l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 251.14633l-1.1245728 1.1245728l3.0897827 -1.1245728l-3.0897827 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -125.60629l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -125.60629l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 209.28806l-1.1245728 1.124588l3.0897827 -1.124588l-3.0897827 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m233.88803 165.75919l0 17.70752l42.960632 0l0 17.694061" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.88805 165.75919l0 17.70752l42.960617 0l0 14.266968" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.84866 197.73367l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m326.25156 165.75919l0 17.70752l-49.385834 0l0 17.694061" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m326.25156 165.75919l0 17.70752l-49.385834 0l0 14.266968" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.86572 197.73367l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#d9ead3" d="m132.49171 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49171 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m149.81764 268.42715q1.0625 0.4375 1.46875 0.90625q0.40625 0.46875 0.40625 1.171875q0 0.5625 -0.265625 1.0625q-0.265625 0.484375 -0.828125 0.796875q-0.5625 0.3125 -1.390625 0.3125q-1.453125 0 -2.34375 -0.953125l0.4375 -0.75l0 -0.015625q0 0 0 0.015625q0 0 0 0q0.328125 0.421875 0.828125 0.6875q0.515625 0.25 1.1875 0.25q0.671875 0 1.109375 -0.359375q0.4375 -0.375 0.4375 -0.921875q0 -0.34375 -0.140625 -0.578125q-0.125 -0.234375 -0.484375 -0.453125q-0.359375 -0.234375 -1.078125 -0.546875q-1.109375 -0.4375 -1.59375 -0.984375q-0.46875 -0.5625 -0.46875 -1.234375q0 -0.84375 0.609375 -1.34375q0.625 -0.515625 1.671875 -0.515625q0.609375 0 1.140625 0.25q0.546875 0.25 0.9375 0.6875l-0.46875 0.625l-0.015625 0.015625q-0.359375 -0.484375 -0.75 -0.671875q-0.390625 -0.203125 -0.96875 -0.203125q-0.578125 0 -0.9375 0.328125q-0.34375 0.3125 -0.34375 0.765625q0 0.34375 0.140625 0.609375q0.15625 0.25 0.546875 0.5q0.390625 0.25 1.15625 0.546875zm1.03125 -1.84375q0 -0.046875 0.046875 -0.015625q0.046875 0.015625 0.0625 0.015625l-0.03125 0.046875l-0.078125 -0.046875l0 0zm0.125 -0.03125q0.078125 0.09375 0.03125 0.0625q-0.03125 -0.03125 -0.046875 -0.03125l0.015625 -0.03125zm-3.546875 4.375q0 0.03125 -0.046875 0.015625q-0.046875 -0.03125 -0.0625 -0.03125l0.03125 -0.046875l0.078125 0.046875l0 0.015625zm-0.125 0.03125q-0.078125 -0.09375 0.015625 -0.046875l-0.015625 0.046875zm7.859421 -4.015625q1.1875 0 1.796875 0.625q0.625 0.609375 0.625 2.0625l0 2.921875l-0.9375 0l0 -0.84375q-0.5 0.96875 -1.875 0.96875q-0.90625 0 -1.421875 -0.40625q-0.515625 -0.421875 -0.515625 -1.09375q0 -0.578125 0.359375 -1.0q0.375 -0.4375 1.0 -0.671875q0.640625 -0.234375 1.390625 -0.234375q0.6875 0 1.234375 0.0625q-0.0625 -0.921875 -0.484375 -1.296875q-0.40625 -0.375 -1.21875 -0.375q-0.421875 0 -0.796875 0.15625q-0.375 0.15625 -0.6875 0.453125l-0.421875 -0.5625q0.765625 -0.765625 1.953125 -0.765625zm-0.3125 5.078125q0.890625 0 1.40625 -0.515625q0.515625 -0.53125 0.5625 -1.515625q-0.53125 -0.078125 -1.15625 -0.078125q-0.90625 0 -1.4375 0.296875q-0.53125 0.296875 -0.53125 0.921875q0 0.890625 1.15625 0.890625zm8.843796 -4.953125q-0.1875 0.96875 -0.796875 2.40625l-1.328125 3.078125l-0.671875 0l-2.171875 -5.484375l0.859375 0l1.6875 4.296875l0.890625 -2.03125q0.546875 -1.25 0.71875 -2.265625l0.8125 0zm3.8125458 5.609375q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm4.343796 3.40625q-0.59375 0 -1.109375 -0.328125q-0.515625 -0.34375 -0.84375 -1.0q-0.3125 -0.65625 -0.3125 -1.59375q0 -0.953125 0.328125 -1.578125q0.34375 -0.640625 0.859375 -0.9375q0.53125 -0.3125 1.125 -0.3125q0.546875 0 0.953125 0.25q0.421875 0.25 0.640625 0.6875l0 -3.296875l0.90625 0l0 0.09375q-0.0625 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0.015625 6.59375q0 0.296875 0.015625 0.484375q0.015625 0.1875 0.109375 0.375l-0.859375 0q-0.078125 -0.1875 -0.09375 -0.375q-0.015625 -0.1875 -0.015625 -0.484375q-0.265625 0.46875 -0.6875 0.734375q-0.40625 0.25 -0.921875 0.25zm0.125 -0.765625q0.75 0 1.09375 -0.578125q0.34375 -0.59375 0.34375 -1.546875q0 -0.984375 -0.375 -1.5625q-0.375 -0.59375 -1.125 -0.59375q-0.734375 0 -1.125 0.53125q-0.375 0.53125 -0.375 1.46875q0 1.046875 0.40625 1.671875q0.40625 0.609375 1.15625 0.609375zm3.5469208 0.640625l0 -7.46875l0.671875 0l1.84375 3.65625l1.890625 -3.671875l0.625 0l0 7.484375l-0.78125 0l0 -5.640625l-1.625 3.015625l-0.328125 0l-1.515625 -2.984375l0 5.609375l-0.78125 0zm8.515671 0.09375q-0.734375 0 -1.3125 -0.359375q-0.578125 -0.359375 -0.90625 -1.0q-0.3125 -0.65625 -0.3125 -1.484375q0 -0.828125 0.3125 -1.46875q0.328125 -0.65625 0.90625 -1.015625q0.578125 -0.375 1.3125 -0.375q0.734375 0 1.3125 0.375q0.578125 0.359375 0.890625 1.015625q0.328125 0.640625 0.328125 1.46875q0 0.828125 -0.328125 1.484375q-0.3125 0.640625 -0.890625 1.0q-0.578125 0.359375 -1.3125 0.359375zm0 -0.71875q0.46875 0 0.828125 -0.265625q0.375 -0.28125 0.578125 -0.765625q0.21875 -0.484375 0.21875 -1.109375q0 -0.9375 -0.46875 -1.53125q-0.453125 -0.59375 -1.15625 -0.59375q-0.703125 0 -1.171875 0.59375q-0.453125 0.59375 -0.453125 1.53125q0 0.625 0.203125 1.109375q0.21875 0.484375 0.578125 0.765625q0.375 0.265625 0.84375 0.265625zm5.812546 0.75q-0.59375 0 -1.109375 -0.328125q-0.515625 -0.34375 -0.84375 -1.0q-0.3125 -0.65625 -0.3125 -1.59375q0 -0.953125 0.328125 -1.578125q0.34375 -0.640625 0.859375 -0.9375q0.53125 -0.3125 1.125 -0.3125q0.546875 0 0.953125 0.25q0.421875 0.25 0.640625 0.6875l0 -3.296875l0.90625 0l0 0.09375q-0.0625 0.0625 -0.09375 0.15625q-0.015625 0.078125 -0.015625 0.28125l0.015625 6.59375q0 0.296875 0.015625 0.484375q0.015625 0.1875 0.109375 0.375l-0.859375 0q-0.078125 -0.1875 -0.09375 -0.375q-0.015625 -0.1875 -0.015625 -0.484375q-0.265625 0.46875 -0.6875 0.734375q-0.40625 0.25 -0.921875 0.25zm0.125 -0.765625q0.75 0 1.09375 -0.578125q0.34375 -0.59375 0.34375 -1.546875q0 -0.984375 -0.375 -1.5625q-0.375 -0.59375 -1.125 -0.59375q-0.734375 0 -1.125 0.53125q-0.375 0.53125 -0.375 1.46875q0 1.046875 0.40625 1.671875q0.40625 0.609375 1.15625 0.609375zm6.328171 0.765625q-1.21875 0 -1.953125 -0.75q-0.71875 -0.75 -0.71875 -2.09375q0 -0.90625 0.328125 -1.5625q0.34375 -0.65625 0.90625 -0.984375q0.578125 -0.34375 1.28125 -0.34375q1.0 0 1.609375 0.65625q0.625 0.65625 0.625 1.9375q0 0.140625 -0.03125 0.40625l-3.890625 0q0.046875 1.0 0.5625 1.515625q0.515625 0.515625 1.296875 0.515625q0.875 0 1.421875 -0.609375l0.484375 0.46875q-0.703125 0.84375 -1.921875 0.84375zm1.203125 -3.40625q0 -0.78125 -0.390625 -1.21875q-0.390625 -0.453125 -1.03125 -0.453125q-0.59375 0 -1.046875 0.421875q-0.4375 0.421875 -0.546875 1.25l3.015625 0zm2.4844208 3.28125l0 -0.703125l1.609375 0l0 -6.578125l-1.546875 0l0 -0.703125l2.421875 0l0 7.28125l1.609375 0l0 0.703125l-4.09375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m176.23885 99.34974l0 153.19684" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m176.23885 99.34974l0 149.76978" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.23885 249.1195l-1.124588 -1.124588l1.124588 3.0897675l1.124588 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m176.23975 283.52823l0 17.950958l0.06298828 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m176.23975 283.52823l0 17.950928l0.06298828 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.30273 316.00665l-1.1245728 -1.1246033l1.1245728 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m75.62205 99.34843l0 153.19684" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m75.62205 99.34843l0 149.76978" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m75.62205 249.1182l-1.1245804 -1.124588l1.1245804 3.0897675l1.1245804 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m99.50131 100.0l0 76.0l54.992126 0l0 76.0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m99.50131 100.0l0 76.0l54.992126 0l0 72.57292" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m154.49344 248.5729l-1.124588 -1.1245728l1.124588 3.0897675l1.124588 -3.0897675z" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/index.md b/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
deleted file mode 100644
index 12ba0225f6..0000000000
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/index.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# TensorFlow Lite Converter
-
-The TensorFlow Lite Converter converts TensorFlow graphs into
-TensorFlow Lite graphs. There are additional usages that are also detailed in
-the usage documentation.
-
-
-## Where the converter fits in the TensorFlow landscape
-
-Once an application developer has a trained TensorFlow model, the TensorFlow
-Lite Converter will accept
-that model and generate a TensorFlow Lite
-[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports
-[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators),
-frozen graphs (models generated via
-[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)),
-and `tf.Keras` model files.  The TensorFlow Lite FlatBuffer file can be shipped
-to client devices, generally mobile devices, where the TensorFlow Lite
-interpreter handles them on-device.  This flow is represented in the diagram
-below.
-
-![drawing](toco_landscape.svg)
diff --git a/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg b/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
deleted file mode 100644
index 335debde57..0000000000
--- a/tensorflow/contrib/lite/g3doc/tflite_convert/toco_landscape.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg version="1.1" viewBox="0.0 0.0 720.0 540.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg"><clipPath id="p.0"><path d="m0 0l720.0 0l0 540.0l-720.0 0l0 -540.0z" clip-rule="nonzero"/></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l720.0 0l0 540.0l-720.0 0z" fill-rule="evenodd"/><path fill="#f3f3f3" d="m19.375328 28.750656l361.6378 0l0 358.01575l-361.6378 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m19.375328 28.750656l361.6378 0l0 358.01575l-361.6378 0z" fill-rule="evenodd"/><path fill="#434343" d="m338.49512 374.66016q-0.609375 0 -1.171875 -0.140625q-0.546875 -0.15625 -0.96875 -0.421875q-0.25 -0.15625 -0.359375 -0.296875q-0.09375 -0.140625 -0.09375 -0.34375q0 -0.171875 0.09375 -0.28125q0.109375 -0.109375 0.265625 -0.109375q0.171875 0 0.46875 0.1875q0.40625 0.25 0.796875 0.390625q0.390625 0.140625 0.984375 0.140625q0.71875 0 1.109375 -0.25q0.40625 -0.265625 0.40625 -0.734375q0 -0.296875 -0.15625 -0.46875q-0.140625 -0.1875 -0.5 -0.328125q-0.359375 -0.140625 -1.046875 -0.296875q-1.171875 -0.25 -1.6875 -0.671875q-0.5 -0.421875 -0.5 -1.15625q0 -0.578125 0.3125 -1.015625q0.328125 -0.4375 0.890625 -0.6875q0.5625 -0.265625 1.28125 -0.265625q0.53125 0 1.015625 0.140625q0.484375 0.140625 0.859375 0.390625q0.453125 0.328125 0.453125 0.671875q0 0.171875 -0.109375 0.296875q-0.109375 0.125 -0.25 0.125q-0.15625 0 -0.484375 -0.234375q-0.375 -0.234375 -0.703125 -0.359375q-0.328125 -0.140625 -0.828125 -0.140625q-0.625 0 -1.015625 0.28125q-0.375 0.265625 -0.375 0.734375q0 0.296875 0.140625 0.484375q0.140625 0.171875 0.46875 0.3125q0.328125 0.140625 0.9375 0.28125q0.90625 0.1875 1.40625 0.4375q0.5 0.234375 0.703125 0.578125q0.21875 0.34375 0.21875 0.890625q0 0.828125 -0.703125 1.34375q-0.703125 0.515625 -1.859375 0.515625zm9.241241 -1.59375q0.140625 0 0.25 0.125q0.109375 0.109375 0.109375 0.296875q0 0.328125 -0.46875 0.609375q-0.484375 0.28125 -1.015625 0.421875q-0.53125 0.140625 -1.046875 0.140625q-1.5 0 -2.375 -0.890625q-0.875 -0.890625 -0.875 -2.46875q0 -1.0 0.390625 -1.765625q0.390625 -0.765625 1.078125 -1.1875q0.703125 -0.4375 1.59375 -0.4375q1.265625 0 2.015625 0.828125q0.75 0.828125 0.75 2.25q0 0.265625 -0.109375 0.390625q-0.109375 0.109375 -0.34375 0.109375l-4.296875 0q0.125 2.296875 2.171875 2.296875q0.53125 0 0.890625 -0.140625q0.375 -0.140625 0.8125 -0.390625q0.34375 -0.1875 0.46875 -0.1875zm-2.34375 -4.3125q-0.84375 0 -1.359375 0.53125q-0.515625 0.53125 -0.609375 1.515625l3.765625 0q-0.015625 -1.0 -0.484375 -1.515625q-0.46875 -0.53125 -1.3125 -0.53125zm7.5551147 -0.8125q0.546875 -0.03125 0.546875 0.453125q0 0.21875 -0.125 0.34375q-0.109375 0.125 -0.40625 0.15625l-0.390625 0.03125q-0.890625 0.078125 -1.328125 0.640625q-0.4375 0.546875 -0.4375 1.296875l0 3.234375q0 0.265625 -0.15625 0.40625q-0.140625 0.125 -0.375 0.125q-0.234375 0 -0.390625 -0.140625q-0.15625 -0.140625 -0.15625 -0.390625l0 -5.625q0 -0.25 0.15625 -0.390625q0.15625 -0.140625 0.390625 -0.140625q0.21875 0 0.359375 0.140625q0.140625 0.140625 0.140625 0.375l0 0.75q0.28125 -0.578125 0.796875 -0.890625q0.515625 -0.3125 1.1875 -0.359375l0.1875 -0.015625zm6.157959 0.328125q0.15625 -0.3125 0.46875 -0.3125q0.203125 0 0.359375 0.140625q0.15625 0.125 0.15625 0.328125q0 0.109375 -0.046875 0.203125l-2.59375 5.609375q-0.078125 0.171875 -0.25 0.28125q-0.15625 0.09375 -0.34375 0.09375q-0.171875 0 -0.328125 -0.09375q-0.15625 -0.109375 -0.25 -0.28125l-2.59375 -5.609375q-0.046875 -0.09375 -0.046875 -0.1875q0 -0.203125 0.171875 -0.34375q0.1875 -0.15625 0.390625 -0.15625q0.140625 0 0.265625 0.078125q0.125 0.078125 0.1875 0.234375l2.234375 5.0l2.21875 -4.984375zm7.2099915 4.796875q0.140625 0 0.25 0.125q0.109375 0.109375 0.109375 0.296875q0 0.328125 -0.46875 0.609375q-0.484375 0.28125 -1.015625 0.421875q-0.53125 0.140625 -1.046875 0.140625q-1.5 0 -2.375 -0.890625q-0.875 -0.890625 -0.875 -2.46875q0 -1.0 0.390625 -1.765625q0.390625 -0.765625 1.078125 -1.1875q0.703125 -0.4375 1.59375 -0.4375q1.265625 0 2.015625 0.828125q0.75 0.828125 0.75 2.25q0 0.265625 -0.109375 0.390625q-0.109375 0.109375 -0.34375 0.109375l-4.296875 0q0.125 2.296875 2.171875 2.296875q0.53125 0 0.890625 -0.140625q0.375 -0.140625 0.8125 -0.390625q0.34375 -0.1875 0.46875 -0.1875zm-2.34375 -4.3125q-0.84375 0 -1.359375 0.53125q-0.515625 0.53125 -0.609375 1.515625l3.765625 0q-0.015625 -1.0 -0.484375 -1.515625q-0.46875 -0.53125 -1.3125 -0.53125zm7.5551453 -0.8125q0.546875 -0.03125 0.546875 0.453125q0 0.21875 -0.125 0.34375q-0.109375 0.125 -0.40625 0.15625l-0.390625 0.03125q-0.890625 0.078125 -1.328125 0.640625q-0.4375 0.546875 -0.4375 1.296875l0 3.234375q0 0.265625 -0.15625 0.40625q-0.140625 0.125 -0.375 0.125q-0.234375 0 -0.390625 -0.140625q-0.15625 -0.140625 -0.15625 -0.390625l0 -5.625q0 -0.25 0.15625 -0.390625q0.15625 -0.140625 0.390625 -0.140625q0.21875 0 0.359375 0.140625q0.140625 0.140625 0.140625 0.375l0 0.75q0.28125 -0.578125 0.796875 -0.890625q0.515625 -0.3125 1.1875 -0.359375l0.1875 -0.015625z" fill-rule="nonzero"/><path fill="#d9d9d9" d="m25.624672 36.249344l301.88977 0l0 69.98425l-301.88977 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" stroke-dasharray="4.0,3.0" d="m25.624672 36.249344l301.88977 0l0 69.98425l-301.88977 0z" fill-rule="evenodd"/><path fill="#434343" d="m134.36497 56.831844q-0.234375 0 -0.375 -0.140625q-0.140625 -0.140625 -0.140625 -0.359375l0 -7.1875l-2.578125 0q-0.21875 0 -0.34375 -0.109375q-0.109375 -0.109375 -0.109375 -0.3125q0 -0.203125 0.109375 -0.296875q0.125 -0.109375 0.34375 -0.109375l6.15625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-2.578125 0l0 7.1875q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625zm9.004181 -1.421875q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.839676 -0.75q2.09375 0 2.09375 2.3125l0 3.25q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.1875q0 -0.8125 -0.328125 -1.1875q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.328125l0 0.609375q0.28125 -0.53125 0.796875 -0.8125q0.53125 -0.28125 1.1875 -0.28125zm5.84729 6.0625q-0.56248474 0 -1.0624847 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.87498474 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0624847 -0.234375 -1.5156097 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.1562347 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.56248474 0 -0.90623474 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84373474 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125zm6.2131653 0q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm7.1288147 -5.25q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm1.970398 6.03125q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm6.5434265 0q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.359375 -0.125q0.203125 0 0.34375 0.125q0.140625 0.125 0.140625 0.34375l0 7.625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125zm4.721527 0.015625q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm12.222534 -4.9375q0.125 -0.28125 0.390625 -0.28125q0.1875 0 0.328125 0.125q0.140625 0.109375 0.140625 0.296875q0 0.078125 -0.03125 0.171875l-1.984375 5.046875q-0.078125 0.15625 -0.21875 0.25q-0.140625 0.078125 -0.296875 0.078125q-0.15625 0 -0.296875 -0.078125q-0.140625 -0.09375 -0.21875 -0.25l-1.65625 -4.21875l-1.640625 4.21875q-0.0625 0.15625 -0.203125 0.25q-0.140625 0.078125 -0.3125 0.078125q-0.15625 0 -0.296875 -0.078125q-0.140625 -0.09375 -0.21875 -0.25l-1.984375 -5.03125q-0.046875 -0.09375 -0.046875 -0.171875q0 -0.1875 0.15625 -0.3125q0.171875 -0.140625 0.359375 -0.140625q0.296875 0 0.40625 0.296875l1.65625 4.421875l1.6875 -4.390625q0.078125 -0.15625 0.203125 -0.234375q0.125 -0.09375 0.265625 -0.09375q0.15625 0 0.28125 0.09375q0.125 0.078125 0.1875 0.234375l1.6875 4.375l1.65625 -4.40625zm12.637604 5.09375q0.046875 0.09375 0.046875 0.203125q0 0.171875 -0.140625 0.296875q-0.140625 0.125 -0.328125 0.125q-0.296875 0 -0.421875 -0.296875l-0.84375 -1.9375l-4.53125 0l-0.859375 1.9375q-0.125 0.296875 -0.421875 0.296875q-0.1875 0 -0.34375 -0.125q-0.140625 -0.125 -0.140625 -0.3125q0 -0.09375 0.046875 -0.1875l3.4375 -7.640625q0.078125 -0.15625 0.21875 -0.234375q0.140625 -0.09375 0.3125 -0.09375q0.171875 0 0.3125 0.09375q0.15625 0.078125 0.21875 0.234375l3.4375 7.640625zm-5.859375 -2.421875l3.8125 0l-1.90625 -4.3125l-1.90625 4.3125zm7.78656 3.046875q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.84375 0q1.328125 0 2.0625 0.65625q0.75 0.640625 0.75 1.828125q0 1.1875 -0.75 1.84375q-0.734375 0.65625 -2.0625 0.65625l-2.359375 0l0 3.03125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm2.765625 -4.34375q1.9375 0 1.9375 -1.6875q0 -1.671875 -1.9375 -1.671875l-2.265625 0l0 3.359375l2.265625 0zm4.9744263 4.34375q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 7.578125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm4.4157715 0.015625q-0.5625 0 -1.0625 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.875 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0625 -0.234375 -1.515625 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.15625 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.5625 0 -0.90625 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84375 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125z" fill-rule="nonzero"/><path fill="#f3f3f3" d="m396.75067 183.75066l249.00787 0l0 203.02364l-249.00787 0z" fill-rule="evenodd"/><path stroke="#cccccc" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m396.75067 183.75066l249.00787 0l0 203.02364l-249.00787 0z" fill-rule="evenodd"/><path fill="#434343" d="m409.42255 374.66803q-0.90625 0 -1.609375 -0.40625q-0.6875 -0.421875 -1.078125 -1.171875q-0.375 -0.765625 -0.375 -1.765625q0 -1.0 0.390625 -1.765625q0.40625 -0.78125 1.109375 -1.203125q0.703125 -0.4375 1.625 -0.4375q0.5 0 1.0 0.140625q0.5 0.140625 0.875 0.40625q0.234375 0.171875 0.328125 0.328125q0.109375 0.140625 0.109375 0.328125q0 0.1875 -0.109375 0.3125q-0.09375 0.109375 -0.25 0.109375q-0.09375 0 -0.203125 -0.046875q-0.09375 -0.046875 -0.171875 -0.09375q-0.078125 -0.0625 -0.09375 -0.078125q-0.359375 -0.234375 -0.671875 -0.359375q-0.3125 -0.140625 -0.765625 -0.140625q-0.96875 0 -1.515625 0.671875q-0.53125 0.65625 -0.53125 1.828125q0 1.171875 0.53125 1.8125q0.546875 0.640625 1.515625 0.640625q0.453125 0 0.78125 -0.125q0.328125 -0.140625 0.65625 -0.375q0.15625 -0.09375 0.28125 -0.15625q0.140625 -0.0625 0.234375 -0.0625q0.140625 0 0.234375 0.125q0.109375 0.109375 0.109375 0.296875q0 0.171875 -0.09375 0.3125q-0.09375 0.140625 -0.34375 0.3125q-0.375 0.25 -0.90625 0.40625q-0.515625 0.15625 -1.0625 0.15625zm4.2591553 -0.03125q-0.234375 0 -0.390625 -0.140625q-0.15625 -0.140625 -0.15625 -0.390625l0 -8.46875q0 -0.25 0.15625 -0.390625q0.15625 -0.140625 0.390625 -0.140625q0.21875 0 0.375 0.140625q0.15625 0.140625 0.15625 0.390625l0 8.46875q0 0.25 -0.15625 0.390625q-0.15625 0.140625 -0.375 0.140625zm3.092102 0q-0.234375 0 -0.390625 -0.140625q-0.15625 -0.140625 -0.15625 -0.390625l0 -5.625q0 -0.25 0.15625 -0.390625q0.15625 -0.140625 0.390625 -0.140625q0.234375 0 0.375 0.140625q0.15625 0.140625 0.15625 0.390625l0 5.625q0 0.265625 -0.15625 0.40625q-0.140625 0.125 -0.375 0.125zm0 -8.09375q-0.3125 0 -0.515625 -0.171875q-0.203125 -0.1875 -0.203125 -0.5q0 -0.296875 0.203125 -0.484375q0.203125 -0.1875 0.515625 -0.1875q0.328125 0 0.515625 0.1875q0.203125 0.1875 0.203125 0.484375q0 0.3125 -0.203125 0.5q-0.1875 0.171875 -0.515625 0.171875zm7.5765076 6.53125q0.140625 0 0.25 0.125q0.109375 0.109375 0.109375 0.296875q0 0.328125 -0.46875 0.609375q-0.484375 0.28125 -1.015625 0.421875q-0.53125 0.140625 -1.046875 0.140625q-1.5 0 -2.375 -0.890625q-0.875 -0.890625 -0.875 -2.46875q0 -1.0 0.390625 -1.765625q0.390625 -0.765625 1.078125 -1.1875q0.703125 -0.4375 1.59375 -0.4375q1.265625 0 2.015625 0.828125q0.75 0.828125 0.75 2.25q0 0.265625 -0.109375 0.390625q-0.109375 0.109375 -0.34375 0.109375l-4.296875 0q0.125 2.296875 2.171875 2.296875q0.53125 0 0.890625 -0.140625q0.375 -0.140625 0.8125 -0.390625q0.34375 -0.1875 0.46875 -0.1875zm-2.34375 -4.3125q-0.84375 0 -1.359375 0.53125q-0.515625 0.53125 -0.609375 1.515625l3.765625 0q-0.015625 -1.0 -0.484375 -1.515625q-0.46875 -0.53125 -1.3125 -0.53125zm7.6020203 -0.84375q2.328125 0 2.328125 2.578125l0 3.609375q0 0.25 -0.140625 0.390625q-0.140625 0.140625 -0.390625 0.140625q-0.25 0 -0.40625 -0.140625q-0.140625 -0.140625 -0.140625 -0.390625l0 -3.546875q0 -0.90625 -0.359375 -1.3125q-0.34375 -0.421875 -1.125 -0.421875q-0.890625 0 -1.421875 0.546875q-0.53125 0.546875 -0.53125 1.484375l0 3.25q0 0.25 -0.140625 0.390625q-0.140625 0.140625 -0.390625 0.140625q-0.25 0 -0.40625 -0.140625q-0.140625 -0.140625 -0.140625 -0.390625l0 -5.625q0 -0.234375 0.140625 -0.375q0.15625 -0.15625 0.40625 -0.15625q0.234375 0 0.375 0.15625q0.140625 0.140625 0.140625 0.359375l0 0.6875q0.328125 -0.609375 0.890625 -0.921875q0.578125 -0.3125 1.3125 -0.3125zm7.304718 5.875q0.46875 0.03125 0.46875 0.421875q0 0.21875 -0.171875 0.34375q-0.171875 0.109375 -0.5 0.078125l-0.359375 -0.015625q-1.0625 -0.09375 -1.578125 -0.640625q-0.5 -0.5625 -0.5 -1.703125l0 -3.34375l-0.890625 0q-0.234375 0 -0.359375 -0.109375q-0.125 -0.109375 -0.125 -0.296875q0 -0.203125 0.125 -0.3125q0.125 -0.125 0.359375 -0.125l0.890625 0l0 -1.515625q0 -0.25 0.140625 -0.390625q0.15625 -0.140625 0.40625 -0.140625q0.234375 0 0.375 0.140625q0.15625 0.140625 0.15625 0.390625l0 1.515625l1.484375 0q0.203125 0 0.328125 0.125q0.140625 0.109375 0.140625 0.3125q0 0.1875 -0.140625 0.296875q-0.125 0.109375 -0.328125 0.109375l-1.484375 0l0 3.40625q0 0.734375 0.296875 1.0625q0.296875 0.3125 0.90625 0.359375l0.359375 0.03125z" fill-rule="nonzero"/><path fill="#f4cccc" d="m206.61942 201.17455l140.47244 0l0 30.992126l-140.47244 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m206.61942 201.17455l140.47244 0l0 30.992126l-140.47244 0z" fill-rule="evenodd"/><path fill="#000000" d="m237.0857 213.5031q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125zm4.248535 1.71875q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm5.861023 4.609375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm8.417801 3.875q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm8.199051 4.46875q0.203125 0 0.296875 0.109375q0.109375 0.09375 0.109375 0.265625q0 0.1875 -0.109375 0.296875q-0.09375 0.09375 -0.296875 0.09375l-4.203125 0q-0.203125 0 -0.34375 -0.125q-0.125 -0.125 -0.125 -0.3125q0 -0.1875 0.140625 -0.359375l3.546875 -4.28125l-3.28125 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l4.0625 0q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.3125q0 0.1875 -0.140625 0.359375l-3.5625 4.28125l3.421875 0zm6.2547913 -0.59375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm3.3865662 5.875q-0.171875 0 -0.28125 -0.09375q-0.109375 -0.09375 -0.109375 -0.21875q0 -0.140625 0.109375 -0.234375q0.109375 -0.09375 0.28125 -0.09375l5.21875 0q0.171875 0 0.28125 0.09375q0.109375 0.09375 0.109375 0.234375q0 0.125 -0.109375 0.21875q-0.109375 0.09375 -0.28125 0.09375l-5.21875 0zm11.2500305 -6.609375q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 5.09375q0 1.296875 -0.671875 1.96875q-0.671875 0.671875 -1.984375 0.671875q-1.28125 0 -2.140625 -0.515625q-0.421875 -0.234375 -0.421875 -0.546875q0 -0.171875 0.078125 -0.28125q0.09375 -0.109375 0.234375 -0.109375q0.125 0 0.4375 0.171875q0.421875 0.21875 0.828125 0.34375q0.40625 0.140625 0.96875 0.140625q0.859375 0 1.28125 -0.453125q0.4375 -0.453125 0.4375 -1.3125l0 -1.03125q-0.25 0.5625 -0.78125 0.859375q-0.515625 0.296875 -1.21875 0.296875q-0.765625 0 -1.359375 -0.359375q-0.59375 -0.359375 -0.9375 -1.015625q-0.328125 -0.65625 -0.328125 -1.515625q0 -0.875 0.328125 -1.53125q0.34375 -0.65625 0.9375 -1.015625q0.59375 -0.359375 1.359375 -0.359375q0.6875 0 1.203125 0.296875q0.515625 0.296875 0.78125 0.84375l0 -0.640625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625zm-2.28125 4.984375q0.84375 0 1.3125 -0.546875q0.484375 -0.5625 0.484375 -1.546875q0 -0.984375 -0.46875 -1.53125q-0.46875 -0.5625 -1.328125 -0.5625q-0.84375 0 -1.34375 0.5625q-0.484375 0.546875 -0.484375 1.53125q0 0.984375 0.484375 1.546875q0.5 0.546875 1.34375 0.546875zm7.4695435 -4.984375q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.720398 -0.015625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.3131714 -5.296875q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm7.20282 -5.265625q1.03125 0 1.546875 0.578125q0.53125 0.578125 0.53125 1.734375l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.78125 -0.328125 -1.15625q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.203125 0.125 -0.328125q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.125q0.125 0.125 0.125 0.34375l0 3.140625q0.28125 -0.53125 0.796875 -0.796875q0.515625 -0.28125 1.1875 -0.28125zm4.331665 6.046875q-0.28125 0 -0.484375 -0.1875q-0.1875 -0.1875 -0.1875 -0.484375q0 -0.296875 0.1875 -0.484375q0.203125 -0.203125 0.484375 -0.203125q0.28125 0 0.46875 0.203125q0.1875 0.1875 0.1875 0.484375q0 0.296875 -0.1875 0.484375q-0.1875 0.1875 -0.46875 0.1875zm5.2167664 -6.046875q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm8.45282 -4.9375q0.140625 -0.296875 0.421875 -0.296875q0.1875 0 0.328125 0.125q0.140625 0.109375 0.140625 0.296875q0 0.109375 -0.046875 0.1875l-3.375 7.28125q-0.0625 0.125 -0.171875 0.1875q-0.109375 0.078125 -0.234375 0.078125q-0.1875 0 -0.328125 -0.109375q-0.125 -0.109375 -0.125 -0.296875q0 -0.09375 0.046875 -0.1875l0.84375 -1.8125l-2.375 -5.140625q-0.046875 -0.078125 -0.046875 -0.171875q0 -0.1875 0.15625 -0.3125q0.15625 -0.140625 0.359375 -0.140625q0.109375 0 0.21875 0.078125q0.125 0.078125 0.1875 0.203125l2.0 4.5l2.0 -4.46875z" fill-rule="nonzero"/><path fill="#f4cccc" d="m132.49081 319.42978l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49081 319.42978l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m163.01448 339.50836q-0.234375 0 -0.375 -0.140625q-0.140625 -0.140625 -0.140625 -0.359375l0 -7.1875l-2.578125 0q-0.21875 0 -0.34375 -0.109375q-0.109375 -0.109375 -0.109375 -0.3125q0 -0.203125 0.109375 -0.296875q0.125 -0.109375 0.34375 -0.109375l6.15625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-2.578125 0l0 7.1875q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625zm8.160431 0.03125q-1.171875 0 -2.046875 -0.515625q-0.859375 -0.53125 -1.328125 -1.5q-0.46875 -0.984375 -0.46875 -2.296875q0 -1.34375 0.453125 -2.3125q0.46875 -0.984375 1.328125 -1.5q0.875 -0.53125 2.0625 -0.53125q1.1875 0 2.0625 0.53125q0.875 0.515625 1.328125 1.5q0.46875 0.96875 0.46875 2.296875q0 1.3125 -0.46875 2.296875q-0.46875 0.984375 -1.34375 1.515625q-0.859375 0.515625 -2.046875 0.515625zm0 -0.84375q1.34375 0 2.09375 -0.90625q0.75 -0.90625 0.75 -2.578125q0 -1.6875 -0.75 -2.578125q-0.734375 -0.90625 -2.09375 -0.90625q-1.34375 0 -2.09375 0.90625q-0.75 0.90625 -0.75 2.578125q0 1.671875 0.75 2.578125q0.75 0.90625 2.09375 0.90625zm9.214935 0.84375q-1.1875 0 -2.0625 -0.515625q-0.875 -0.53125 -1.359375 -1.5q-0.46875 -0.984375 -0.46875 -2.3125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.359375 -1.5q0.875 -0.53125 2.0625 -0.53125q0.8125 0 1.515625 0.265625q0.71875 0.25 1.25 0.734375q0.1875 0.1875 0.1875 0.421875q0 0.171875 -0.09375 0.296875q-0.09375 0.125 -0.21875 0.125q-0.15625 0 -0.359375 -0.140625q-0.609375 -0.46875 -1.109375 -0.65625q-0.5 -0.203125 -1.140625 -0.203125q-1.390625 0 -2.140625 0.90625q-0.75 0.90625 -0.75 2.578125q0 1.671875 0.75 2.578125q0.75 0.90625 2.140625 0.90625q0.640625 0 1.140625 -0.1875q0.5 -0.1875 1.109375 -0.671875q0.203125 -0.125 0.359375 -0.125q0.125 0 0.21875 0.125q0.09375 0.109375 0.09375 0.296875q0 0.234375 -0.1875 0.40625q-0.53125 0.484375 -1.25 0.75q-0.703125 0.25 -1.515625 0.25zm8.077179 0q-1.171875 0 -2.046875 -0.515625q-0.859375 -0.53125 -1.328125 -1.5q-0.46875 -0.984375 -0.46875 -2.296875q0 -1.34375 0.453125 -2.3125q0.46875 -0.984375 1.328125 -1.5q0.875 -0.53125 2.0625 -0.53125q1.1875 0 2.0625 0.53125q0.875 0.515625 1.328125 1.5q0.46875 0.96875 0.46875 2.296875q0 1.3125 -0.46875 2.296875q-0.46875 0.984375 -1.34375 1.515625q-0.859375 0.515625 -2.046875 0.515625zm0 -0.84375q1.34375 0 2.09375 -0.90625q0.75 -0.90625 0.75 -2.578125q0 -1.6875 -0.75 -2.578125q-0.734375 -0.90625 -2.09375 -0.90625q-1.34375 0 -2.09375 0.90625q-0.75 0.90625 -0.75 2.578125q0 1.671875 0.75 2.578125q0.75 0.90625 2.09375 0.90625z" fill-rule="nonzero"/><path fill="#d9ead3" d="m284.12296 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m284.12296 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m314.7006 332.47687q-0.234375 0 -0.375 -0.140625q-0.140625 -0.140625 -0.140625 -0.359375l0 -7.1875l-2.578125 0q-0.21875 0 -0.34375 -0.109375q-0.109375 -0.109375 -0.109375 -0.3125q0 -0.203125 0.109375 -0.296875q0.125 -0.109375 0.34375 -0.109375l6.15625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-2.578125 0l0 7.1875q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625zm5.113556 0q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm6.6840515 -0.0625q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.328125l0 -7.5625q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 7.171875l3.875 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-4.375 0zm6.3394165 0.0625q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm4.987152 6.515625q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm5.9081726 -0.65625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#000000" d="m303.37402 346.47687q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm6.5434265 0q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.359375 -0.125q0.203125 0 0.34375 0.125q0.140625 0.125 0.140625 0.34375l0 7.625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125zm4.674652 -6.046875q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.0631714 -0.015625q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm4.3300476 -5.28125q0.765625 0 1.34375 0.375q0.59375 0.359375 0.921875 1.046875q0.328125 0.6875 0.328125 1.59375q0 0.90625 -0.328125 1.59375q-0.328125 0.6875 -0.921875 1.078125q-0.578125 0.375 -1.34375 0.375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 0.640625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.625q0 -0.203125 0.125 -0.328125q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.125q0.125 0.125 0.125 0.34375l0 3.203125q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.59375q0.46875 -0.59375 0.46875 -1.65625q0 -1.046875 -0.46875 -1.625q-0.46875 -0.578125 -1.328125 -0.578125q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm8.687164 -5.25q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 5.078125q0 0.203125 -0.125 0.34375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.34375 -0.125q-0.125 -0.125 -0.125 -0.328125l0 -0.609375q-0.28125 0.53125 -0.78125 0.8125q-0.5 0.265625 -1.125 0.265625q-1.03125 0 -1.5625 -0.578125q-0.53125 -0.578125 -0.53125 -1.71875l0 -3.265625q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 3.234375q0 0.78125 0.3125 1.15625q0.3125 0.359375 0.984375 0.359375q0.765625 0 1.234375 -0.5q0.46875 -0.5 0.46875 -1.3125l0 -2.9375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625zm4.8726807 -1.71875q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125zm3.9360352 0q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125zm5.873535 6.328125q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.7927856 -0.734375q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625z" fill-rule="nonzero"/><path fill="#f4cccc" d="m413.02625 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m413.02625 319.3983l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m443.6039 332.47687q-0.234375 0 -0.375 -0.140625q-0.140625 -0.140625 -0.140625 -0.359375l0 -7.1875l-2.578125 0q-0.21875 0 -0.34375 -0.109375q-0.109375 -0.109375 -0.109375 -0.3125q0 -0.203125 0.109375 -0.296875q0.125 -0.109375 0.34375 -0.109375l6.15625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-2.578125 0l0 7.1875q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625zm5.113556 0q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm6.6840515 -0.0625q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.328125l0 -7.5625q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 7.171875l3.875 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-4.375 0zm6.3394165 0.0625q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm4.987152 6.515625q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm5.908142 -0.65625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#000000" d="m429.9527 346.47687q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm5.237152 1.234375q2.09375 0 2.09375 2.3125l0 3.25q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.1875q0 -0.8125 -0.328125 -1.1875q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.328125l0 0.609375q0.28125 -0.53125 0.796875 -0.8125q0.53125 -0.28125 1.1875 -0.28125zm6.56604 5.28125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm5.9081726 -0.65625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.7927856 -0.734375q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm4.282898 -0.015625q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm7.14032 -5.25q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm5.861023 4.609375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.5896606 4.53125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm5.9081726 -0.65625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.7927856 -0.734375q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m371.61902 334.89435l41.417297 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m371.61902 334.89435l37.990234 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m409.60925 334.89435l-1.1245728 1.1246033l3.0897522 -1.1246033l-3.0897522 -1.1245728z" fill-rule="evenodd"/><path fill="#c9daf8" d="m548.5407 277.52954l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 277.52954l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m587.0588 293.13934q0.1875 0 0.296875 0.109375q0.109375 0.109375 0.109375 0.296875l0 2.984375q0 0.296875 -0.09375 0.4375q-0.078125 0.140625 -0.328125 0.234375q-0.46875 0.203125 -1.15625 0.328125q-0.6875 0.109375 -1.375 0.109375q-1.25 0 -2.171875 -0.515625q-0.90625 -0.515625 -1.390625 -1.484375q-0.484375 -0.96875 -0.484375 -2.328125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.375 -1.5q0.90625 -0.53125 2.125 -0.53125q0.84375 0 1.5625 0.265625q0.71875 0.25 1.203125 0.734375q0.21875 0.203125 0.21875 0.421875q0 0.171875 -0.109375 0.296875q-0.09375 0.125 -0.234375 0.125q-0.140625 0 -0.328125 -0.140625q-0.625 -0.484375 -1.140625 -0.671875q-0.5 -0.1875 -1.15625 -0.1875q-1.4375 0 -2.203125 0.90625q-0.75 0.890625 -0.75 2.578125q0 1.71875 0.765625 2.609375q0.78125 0.890625 2.28125 0.890625q1.109375 0 2.03125 -0.328125l0 -2.578125l-1.75 0q-0.203125 0 -0.328125 -0.109375q-0.125 -0.109375 -0.125 -0.265625q0 -0.1875 0.125 -0.28125q0.125 -0.109375 0.328125 -0.109375l2.234375 0zm2.8911743 4.46875q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.84375 0q1.328125 0 2.0625 0.65625q0.75 0.640625 0.75 1.828125q0 1.1875 -0.75 1.84375q-0.734375 0.65625 -2.0625 0.65625l-2.359375 0l0 3.03125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm2.765625 -4.34375q1.9375 0 1.9375 -1.6875q0 -1.671875 -1.9375 -1.671875l-2.265625 0l0 3.359375l2.265625 0zm7.7869263 4.375q-1.65625 0 -2.515625 -0.859375q-0.84375 -0.859375 -0.84375 -2.546875l0 -4.703125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.359375l0 4.78125q0 1.25 0.609375 1.875q0.609375 0.609375 1.78125 0.609375q1.171875 0 1.765625 -0.609375q0.609375 -0.625 0.609375 -1.875l0 -4.78125q0 -0.234375 0.140625 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.359375l0 4.703125q0 1.671875 -0.859375 2.546875q-0.859375 0.859375 -2.5 0.859375z" fill-rule="nonzero"/><path fill="#c9daf8" d="m548.5407 319.3983l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 319.3983l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m584.63763 339.50812q-1.1875 0 -2.0625 -0.515625q-0.875 -0.53125 -1.359375 -1.5q-0.46875 -0.984375 -0.46875 -2.3125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.359375 -1.5q0.875 -0.53125 2.0625 -0.53125q0.8125 0 1.515625 0.265625q0.71875 0.25 1.25 0.734375q0.1875 0.1875 0.1875 0.421875q0 0.171875 -0.09375 0.296875q-0.09375 0.125 -0.21875 0.125q-0.15625 0 -0.359375 -0.140625q-0.609375 -0.46875 -1.109375 -0.65625q-0.5 -0.203125 -1.140625 -0.203125q-1.390625 0 -2.140625 0.90625q-0.75 0.90625 -0.75 2.578125q0 1.671875 0.75 2.578125q0.75 0.90625 2.140625 0.90625q0.640625 0 1.140625 -0.1875q0.5 -0.1875 1.109375 -0.671875q0.203125 -0.125 0.359375 -0.125q0.125 0 0.21875 0.125q0.09375 0.109375 0.09375 0.296875q0 0.234375 -0.1875 0.40625q-0.53125 0.484375 -1.25 0.75q-0.703125 0.25 -1.515625 0.25zm5.0302734 -0.03125q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.84375 0q1.328125 0 2.0625 0.65625q0.75 0.640625 0.75 1.828125q0 1.1875 -0.75 1.84375q-0.734375 0.65625 -2.0625 0.65625l-2.359375 0l0 3.03125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm2.765625 -4.34375q1.9375 0 1.9375 -1.6875q0 -1.671875 -1.9375 -1.671875l-2.265625 0l0 3.359375l2.265625 0zm7.7869263 4.375q-1.65625 0 -2.515625 -0.859375q-0.84375 -0.859375 -0.84375 -2.546875l0 -4.703125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.359375l0 4.78125q0 1.25 0.609375 1.875q0.609375 0.609375 1.78125 0.609375q1.171875 0 1.765625 -0.609375q0.609375 -0.625 0.609375 -1.875l0 -4.78125q0 -0.234375 0.140625 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.359375l0 4.703125q0 1.671875 -0.859375 2.546875q-0.859375 0.859375 -2.5 0.859375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m219.98688 334.92584l64.12598 -0.03149414" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m219.98688 334.92584l60.698914 -0.029815674" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m280.68576 334.89603l-1.1240234 1.1251526l3.0892334 -1.1260986l-3.090332 -1.1230774z" fill-rule="evenodd"/><path fill="#d9ead3" d="m413.02625 141.28871l20.53543 0l0 20.53543l-20.53543 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m413.02625 141.28871l20.53543 0l0 20.53543l-20.53543 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m437.52493 135.68242l73.763794 0l0 31.748032l-73.763794 0z" fill-rule="evenodd"/><path fill="#000000" d="m448.0718 156.20241q-0.234375 0 -0.375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.5q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.34375 0q2.03125 0 3.140625 1.09375q1.109375 1.09375 1.109375 3.125q0 2.03125 -1.125 3.140625q-1.109375 1.09375 -3.125 1.09375l-2.34375 0zm2.28125 -0.84375q3.28125 0 3.28125 -3.390625q0 -3.390625 -3.28125 -3.390625l-1.796875 0l0 6.78125l1.796875 0zm8.3211975 -5.140625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.0631714 -0.015625q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm3.767517 -5.28125q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm10.15921 0.75q-0.234375 0 -0.375 -0.140625q-0.140625 -0.140625 -0.140625 -0.359375l0 -7.1875l-2.578125 0q-0.21875 0 -0.34375 -0.109375q-0.109375 -0.109375 -0.109375 -0.3125q0 -0.203125 0.109375 -0.296875q0.125 -0.109375 0.34375 -0.109375l6.15625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.109375 0.109375 -0.328125 0.109375l-2.578125 0l0 7.1875q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625zm8.691681 -5.71875q0.140625 -0.296875 0.421875 -0.296875q0.1875 0 0.328125 0.125q0.140625 0.109375 0.140625 0.296875q0 0.109375 -0.046875 0.1875l-3.375 7.28125q-0.0625 0.125 -0.171875 0.1875q-0.109375 0.078125 -0.234375 0.078125q-0.1875 0 -0.328125 -0.109375q-0.125 -0.109375 -0.125 -0.296875q0 -0.09375 0.046875 -0.1875l0.84375 -1.8125l-2.375 -5.140625q-0.046875 -0.078125 -0.046875 -0.171875q0 -0.1875 0.15625 -0.3125q0.15625 -0.140625 0.359375 -0.140625q0.109375 0 0.21875 0.078125q0.125 0.078125 0.1875 0.203125l2.0 4.5l2.0 -4.46875zm4.902405 -0.328125q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm8.76532 -0.640625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#f4cccc" d="m519.9029 141.28871l20.5354 0l0 20.53543l-20.5354 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m519.9029 141.28871l20.5354 0l0 20.53543l-20.5354 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m544.40155 135.68242l100.0 0l0 31.748032l-100.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m554.9328 156.26491q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 7.578125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm5.3845215 -6.046875q2.09375 0 2.09375 2.3125l0 3.25q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.1875q0 -0.8125 -0.328125 -1.1875q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.328125l0 0.609375q0.28125 -0.53125 0.796875 -0.8125q0.53125 -0.28125 1.1875 -0.28125zm6.456726 -1.703125q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125zm4.248535 1.71875q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.720398 -0.015625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm6.3444214 0.765625q-0.5625 0 -1.0625 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.875 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0625 -0.234375 -1.515625 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.15625 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.5625 0 -0.90625 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84375 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125zm6.47876 -0.78125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm4.283142 -5.265625q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm5.782898 0q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 5.078125q0 0.203125 -0.125 0.34375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.34375 -0.125q-0.125 -0.125 -0.125 -0.328125l0 -0.609375q-0.28125 0.53125 -0.78125 0.8125q-0.5 0.265625 -1.125 0.265625q-1.03125 0 -1.5625 -0.578125q-0.53125 -0.578125 -0.53125 -1.71875l0 -3.265625q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 3.234375q0 0.78125 0.3125 1.15625q0.3125 0.359375 0.984375 0.359375q0.765625 0 1.234375 -0.5q0.46875 -0.5 0.46875 -1.3125l0 -2.9375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625zm4.7008057 6.046875q-0.8125 0 -1.453125 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.6875 -0.34375 -1.578125q0 -0.90625 0.359375 -1.59375q0.359375 -0.703125 0.984375 -1.078125q0.640625 -0.390625 1.46875 -0.390625q0.453125 0 0.90625 0.125q0.453125 0.125 0.78125 0.359375q0.21875 0.140625 0.3125 0.28125q0.09375 0.140625 0.09375 0.3125q0 0.171875 -0.09375 0.28125q-0.09375 0.09375 -0.234375 0.09375q-0.078125 0 -0.1875 -0.046875q-0.09375 -0.046875 -0.15625 -0.09375q-0.0625 -0.046875 -0.09375 -0.0625q-0.3125 -0.203125 -0.59375 -0.3125q-0.28125 -0.125 -0.6875 -0.125q-0.875 0 -1.359375 0.59375q-0.484375 0.59375 -0.484375 1.65625q0 1.046875 0.484375 1.625q0.484375 0.578125 1.359375 0.578125q0.40625 0 0.703125 -0.109375q0.296875 -0.125 0.59375 -0.328125q0.140625 -0.09375 0.25 -0.15625q0.125 -0.0625 0.203125 -0.0625q0.140625 0 0.21875 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.15625 -0.09375 0.28125q-0.078125 0.125 -0.296875 0.28125q-0.34375 0.234375 -0.8125 0.375q-0.46875 0.125 -0.953125 0.125zm6.029297 -0.78125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm5.830017 -5.265625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 5.078125q0 0.203125 -0.125 0.34375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.34375 -0.125q-0.125 -0.125 -0.125 -0.328125l0 -0.609375q-0.28125 0.53125 -0.78125 0.8125q-0.5 0.265625 -1.125 0.265625q-1.03125 0 -1.5625 -0.578125q-0.53125 -0.578125 -0.53125 -1.71875l0 -3.265625q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.34375l0 3.234375q0 0.78125 0.3125 1.15625q0.3125 0.359375 0.984375 0.359375q0.765625 0 1.234375 -0.5q0.46875 -0.5 0.46875 -1.3125l0 -2.9375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625zm5.1851807 0q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm5.861023 4.609375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375z" fill-rule="nonzero"/><path fill="#d9ead3" d="m31.874912 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m31.874912 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m67.27695 264.03653q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.359375l0 7.578125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.34375 0.140625q-0.234375 0 -0.375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -3.4375l-5.062496 0l0 3.4375q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.34375 0.140625q-0.234375 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.359375l0 3.296875l5.062496 0l0 -3.296875q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.375 -0.140625zm3.0648193 8.515625q-0.234375 0 -0.375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.5q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.34375 0q2.03125 0 3.140625 1.09375q1.109375 1.09375 1.109375 3.125q0 2.03125 -1.125 3.140625q-1.109375 1.09375 -3.125 1.09375l-2.34375 0zm2.28125 -0.84375q3.28125 0 3.28125 -3.390625q0 -3.390625 -3.28125 -3.390625l-1.796875 0l0 6.78125l1.796875 0zm6.5711823 0.90625q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm9.0746765 -5.359375q0.8125 0 1.40625 0.34375q0.609375 0.328125 0.9375 0.9375q0.328125 0.59375 0.328125 1.390625q0 0.78125 -0.359375 1.40625q-0.359375 0.625 -1.0 0.96875q-0.640625 0.328125 -1.484375 0.328125q-0.734375 0 -1.453125 -0.25q-0.703125 -0.265625 -1.1875 -0.734375q-0.203125 -0.171875 -0.203125 -0.40625q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.234375 -0.125q0.171875 0 0.34375 0.140625q0.515625 0.4375 1.046875 0.640625q0.53125 0.203125 1.109375 0.203125q0.890625 0 1.390625 -0.5q0.5 -0.5 0.5 -1.359375q0 -0.84375 -0.5 -1.359375q-0.5 -0.515625 -1.359375 -0.515625q-1.09375 0 -1.78125 0.84375q-0.15625 0.171875 -0.40625 0.171875q-0.15625 0 -0.28125 -0.09375q-0.109375 -0.109375 -0.109375 -0.296875l0 -4.125q0 -0.21875 0.125 -0.34375q0.125 -0.125 0.359375 -0.125l4.21875 0q0.21875 0 0.34375 0.109375q0.125 0.09375 0.125 0.296875q0 0.1875 -0.125 0.296875q-0.125 0.109375 -0.34375 0.109375l-3.734375 0l0 3.015625q0.34375 -0.328125 0.78125 -0.5q0.453125 -0.171875 0.984375 -0.171875z" fill-rule="nonzero"/><path fill="#d9ead3" d="m190.14 134.76706l87.49608 0l0 30.992126l-87.49608 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m190.14 134.76706l87.49608 0l0 30.992126l-87.49608 0z" fill-rule="evenodd"/><path fill="#000000" d="m215.10997 150.37688q0.1875 0 0.296875 0.109375q0.109375 0.109375 0.109375 0.296875l0 2.984375q0 0.296875 -0.09375 0.4375q-0.078125 0.140625 -0.328125 0.234375q-0.46875 0.203125 -1.15625 0.328125q-0.6875 0.109375 -1.375 0.109375q-1.25 0 -2.171875 -0.515625q-0.90625 -0.515625 -1.390625 -1.484375q-0.484375 -0.96875 -0.484375 -2.328125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.375 -1.5q0.90625 -0.53125 2.125 -0.53125q0.84375 0 1.5625 0.265625q0.71875 0.25 1.203125 0.734375q0.21875 0.203125 0.21875 0.421875q0 0.171875 -0.109375 0.296875q-0.09375 0.125 -0.234375 0.125q-0.140625 0 -0.328125 -0.140625q-0.625 -0.484375 -1.140625 -0.671875q-0.5 -0.1875 -1.15625 -0.1875q-1.4375 0 -2.203125 0.90625q-0.75 0.890625 -0.75 2.578125q0 1.71875 0.765625 2.609375q0.78125 0.890625 2.28125 0.890625q1.109375 0 2.03125 -0.328125l0 -2.578125l-1.75 0q-0.203125 0 -0.328125 -0.109375q-0.125 -0.109375 -0.125 -0.265625q0 -0.1875 0.125 -0.28125q0.125 -0.109375 0.328125 -0.109375l2.234375 0zm5.1568146 -1.5625q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.720398 -0.015625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.3131714 -5.296875q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm7.2028046 -5.265625q1.03125 0 1.546875 0.578125q0.53125 0.578125 0.53125 1.734375l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.78125 -0.328125 -1.15625q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.203125 0.125 -0.328125q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.125q0.125 0.125 0.125 0.34375l0 3.140625q0.28125 -0.53125 0.796875 -0.796875q0.515625 -0.28125 1.1875 -0.28125zm4.5035553 5.984375q-0.234375 0 -0.375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.5q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.34375 0q2.03125 0 3.140625 1.09375q1.109375 1.09375 1.109375 3.125q0 2.03125 -1.125 3.140625q-1.109375 1.09375 -3.125 1.09375l-2.34375 0zm2.28125 -0.84375q3.28125 0 3.28125 -3.390625q0 -3.390625 -3.28125 -3.390625l-1.796875 0l0 6.78125l1.796875 0zm10.461807 -0.515625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.480301 -2.453125q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125z" fill-rule="nonzero"/><path fill="#d9ead3" d="m233.1085 252.53609l87.49608 0l0 30.992142l-87.49608 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.1085 252.53609l87.49608 0l0 30.992142l-87.49608 0z" fill-rule="evenodd"/><path fill="#000000" d="m260.00964 265.61465q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.375 0q0.203125 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.203125 -0.125 0.3125q-0.125 0.109375 -0.328125 0.109375l-3.90625 0l0 2.90625l3.65625 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.3125q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.65625 0l0 3.453125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625zm8.9496765 -6.03125q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.767273 6.046875q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm8.535065 -0.046875q0.203125 0 0.296875 0.109375q0.109375 0.09375 0.109375 0.265625q0 0.1875 -0.109375 0.296875q-0.09375 0.09375 -0.296875 0.09375l-4.203125 0q-0.203125 0 -0.34375 -0.125q-0.125 -0.125 -0.125 -0.3125q0 -0.1875 0.140625 -0.359375l3.546875 -4.28125l-3.28125 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l4.0625 0q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.3125q0 0.1875 -0.140625 0.359375l-3.5625 4.28125l3.421875 0zm6.2547913 -0.59375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.8396606 -0.75q2.09375 0 2.09375 2.3125l0 3.25q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.1875q0 -0.8125 -0.328125 -1.1875q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.328125l0 0.609375q0.28125 -0.53125 0.796875 -0.8125q0.53125 -0.28125 1.1875 -0.28125z" fill-rule="nonzero"/><path fill="#000000" d="m258.07846 275.1459q0.1875 0 0.296875 0.109375q0.109375 0.109375 0.109375 0.296875l0 2.984375q0 0.296875 -0.09375 0.4375q-0.078125 0.140625 -0.328125 0.234375q-0.46875 0.203125 -1.15625 0.328125q-0.6875 0.109375 -1.3749847 0.109375q-1.25 0 -2.171875 -0.515625q-0.90625 -0.515625 -1.390625 -1.484375q-0.484375 -0.96875 -0.484375 -2.328125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.375 -1.5q0.90625 -0.53125 2.125 -0.53125q0.84373474 0 1.5624847 0.265625q0.71875 0.25 1.203125 0.734375q0.21875 0.203125 0.21875 0.421875q0 0.171875 -0.109375 0.296875q-0.09375 0.125 -0.234375 0.125q-0.140625 0 -0.328125 -0.140625q-0.625 -0.484375 -1.140625 -0.671875q-0.5 -0.1875 -1.1562347 -0.1875q-1.4375 0 -2.203125 0.90625q-0.75 0.890625 -0.75 2.578125q0 1.71875 0.765625 2.609375q0.78125 0.890625 2.28125 0.890625q1.1093597 0 2.0312347 -0.328125l0 -2.578125l-1.7499847 0q-0.203125 0 -0.328125 -0.109375q-0.125 -0.109375 -0.125 -0.265625q0 -0.1875 0.125 -0.28125q0.125 -0.109375 0.328125 -0.109375l2.2343597 0zm5.15683 -1.5625q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.720398 -0.015625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.3131714 -5.296875q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm7.2027893 -5.265625q1.03125 0 1.546875 0.578125q0.53125 0.578125 0.53125 1.734375l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.78125 -0.328125 -1.15625q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.203125 0.125 -0.328125q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.125q0.125 0.125 0.125 0.34375l0 3.140625q0.28125 -0.53125 0.796875 -0.796875q0.515625 -0.28125 1.1875 -0.28125zm4.5035706 5.984375q-0.234375 0 -0.375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.5q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.34375 0q2.03125 0 3.140625 1.09375q1.109375 1.09375 1.109375 3.125q0 2.03125 -1.125 3.140625q-1.109375 1.09375 -3.125 1.09375l-2.34375 0zm2.28125 -0.84375q3.28125 0 3.28125 -3.390625q0 -3.390625 -3.28125 -3.390625l-1.796875 0l0 6.78125l1.796875 0zm10.461792 -0.515625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.480316 -2.453125q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 232.16667l0 20.377945" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 232.16667l0 16.950867" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.85565 249.11754l-1.1246033 -1.124588l1.1246033 3.0897675l1.1245728 -3.0897675z" fill-rule="evenodd"/><path fill="#f4cccc" d="m31.874016 68.3563l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m31.874016 68.3563l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m58.725647 87.669235q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm3.9706573 -6.984375q-0.640625 0.046875 -0.96875 0.40625q-0.3125 0.34375 -0.3125 1.046875l0 0.390625l1.328125 0q0.203125 0 0.3125 0.109375q0.109375 0.109375 0.109375 0.28125q0 0.1875 -0.109375 0.28125q-0.109375 0.09375 -0.3125 0.09375l-1.328125 0l0 4.65625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -4.65625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -0.21875q0 -1.078125 0.53125 -1.6875q0.546875 -0.625 1.5625 -0.703125l0.3125 -0.015625q0.3125 -0.03125 0.453125 0.0625q0.140625 0.078125 0.140625 0.296875q0 0.34375 -0.421875 0.390625l-0.3125 0.03125zm1.8266602 7.75q-0.28125 0 -0.484375 -0.1875q-0.1875 -0.1875 -0.1875 -0.484375q0 -0.296875 0.1875 -0.484375q0.203125 -0.203125 0.484375 -0.203125q0.28125 0 0.46875 0.203125q0.1875 0.1875 0.1875 0.484375q0 0.296875 -0.1875 0.484375q-0.1875 0.1875 -0.46875 0.1875zm8.498016 -0.8125q0.171875 0.15625 0.171875 0.359375q0 0.15625 -0.140625 0.296875q-0.140625 0.140625 -0.3125 0.140625q-0.15625 0 -0.328125 -0.140625l-4.484375 -3.921875l0 3.578125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 3.4375l4.28125 -3.796875q0.125 -0.140625 0.3125 -0.140625q0.171875 0 0.296875 0.140625q0.140625 0.140625 0.140625 0.3125q0 0.171875 -0.15625 0.328125l-3.875 3.421875l4.09375 3.5625zm5.8329315 -0.609375q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.792801 -0.734375q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625zm3.720398 -0.015625q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm6.3444214 0.765625q-0.5625 0 -1.0625 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.875 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0625 -0.234375 -1.515625 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.15625 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.5625 0 -0.90625 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84375 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125z" fill-rule="nonzero"/><path fill="#f4cccc" d="m132.49081 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49081 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m152.20152 88.37367q-0.234375 0 -0.375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -7.5q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l4.484375 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-4.015625 0l0 2.9375l3.78125 0q0.21875 0 0.328125 0.109375q0.125 0.109375 0.125 0.296875q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-3.78125 0l0 3.078125l4.015625 0q0.21875 0 0.328125 0.109375q0.125 0.09375 0.125 0.296875q0 0.1875 -0.125 0.296875q-0.109375 0.109375 -0.328125 0.109375l-4.484375 0zm8.31218 0.078125q-0.5625 0 -1.0625 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.875 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0625 -0.234375 -1.515625 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.15625 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.5625 0 -0.90625 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84375 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125zm6.4787903 -0.78125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm1.8769073 0.765625q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm8.799652 1.234375q1.9375 0 1.9375 2.3125l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.328125 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -3.21875q0 -0.8125 -0.296875 -1.171875q-0.28125 -0.359375 -0.890625 -0.359375q-0.734375 0 -1.15625 0.5q-0.421875 0.484375 -0.421875 1.328125l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.8125 -0.296875 -1.171875q-0.28125 -0.359375 -0.90625 -0.359375q-0.71875 0 -1.140625 0.5q-0.421875 0.484375 -0.421875 1.328125l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.359375 -0.140625q0.203125 0 0.328125 0.125q0.140625 0.125 0.140625 0.34375l0 0.578125q0.265625 -0.515625 0.734375 -0.78125q0.46875 -0.28125 1.078125 -0.28125q1.375 0 1.78125 1.140625q0.265625 -0.515625 0.78125 -0.828125q0.515625 -0.3125 1.171875 -0.3125zm6.0990753 0q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm7.0631714 -0.015625q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm3.8144073 0.78125q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm7.1287994 -5.25q0.5 -0.03125 0.5 0.40625q0 0.203125 -0.109375 0.3125q-0.109375 0.109375 -0.375 0.140625l-0.359375 0.03125q-0.796875 0.078125 -1.1875 0.578125q-0.390625 0.484375 -0.390625 1.15625l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.140625 -0.359375q0.140625 -0.125 0.34375 -0.125q0.1875 0 0.3125 0.125q0.140625 0.125 0.140625 0.34375l0 0.671875q0.25 -0.53125 0.71875 -0.796875q0.46875 -0.28125 1.0625 -0.328125l0.171875 -0.015625z" fill-rule="nonzero"/><path fill="#f4cccc" d="m233.1076 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.1076 68.35761l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m269.00754 88.46742q-0.90625 0 -1.734375 -0.265625q-0.8125 -0.265625 -1.3125 -0.734375q-0.171875 -0.15625 -0.171875 -0.40625q0 -0.171875 0.09375 -0.296875q0.09375 -0.125 0.234375 -0.125q0.15625 0 0.328125 0.125q1.109375 0.859375 2.546875 0.859375q1.03125 0 1.578125 -0.390625q0.5625 -0.390625 0.5625 -1.125q0 -0.421875 -0.265625 -0.671875q-0.265625 -0.265625 -0.703125 -0.421875q-0.4375 -0.15625 -1.15625 -0.328125q-0.984375 -0.21875 -1.625 -0.46875q-0.625 -0.265625 -1.015625 -0.734375q-0.390625 -0.46875 -0.390625 -1.21875q0 -0.71875 0.390625 -1.265625q0.390625 -0.5625 1.09375 -0.875q0.703125 -0.3125 1.59375 -0.3125q0.84375 0 1.5625 0.265625q0.734375 0.25 1.234375 0.734375q0.1875 0.1875 0.1875 0.421875q0 0.171875 -0.09375 0.296875q-0.09375 0.125 -0.234375 0.125q-0.125 0 -0.34375 -0.140625q-0.59375 -0.46875 -1.09375 -0.65625q-0.5 -0.203125 -1.21875 -0.203125q-0.984375 0 -1.546875 0.421875q-0.546875 0.40625 -0.546875 1.15625q0 0.625 0.484375 0.953125q0.484375 0.3125 1.5 0.5625q1.09375 0.25 1.71875 0.484375q0.625 0.21875 1.03125 0.671875q0.421875 0.4375 0.421875 1.171875q0 0.71875 -0.390625 1.265625q-0.390625 0.53125 -1.109375 0.828125q-0.703125 0.296875 -1.609375 0.296875zm5.0446777 -0.03125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.359375 -0.125q0.203125 0 0.34375 0.125q0.140625 0.125 0.140625 0.34375l0 7.625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125zm2.784027 0q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm8.799652 1.234375q1.9375 0 1.9375 2.3125l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.328125 0.125q-0.21875 0 -0.359375 -0.125q-0.140625 -0.125 -0.140625 -0.359375l0 -3.21875q0 -0.8125 -0.296875 -1.171875q-0.28125 -0.359375 -0.890625 -0.359375q-0.734375 0 -1.15625 0.5q-0.421875 0.484375 -0.421875 1.328125l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.8125 -0.296875 -1.171875q-0.28125 -0.359375 -0.90625 -0.359375q-0.71875 0 -1.140625 0.5q-0.421875 0.484375 -0.421875 1.328125l0 2.921875q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.359375 -0.140625q0.203125 0 0.328125 0.125q0.140625 0.125 0.140625 0.34375l0 0.578125q0.265625 -0.515625 0.734375 -0.78125q0.46875 -0.28125 1.078125 -0.28125q1.375 0 1.78125 1.140625q0.265625 -0.515625 0.78125 -0.828125q0.515625 -0.3125 1.171875 -0.3125z" fill-rule="nonzero"/><path fill="#d9ead3" d="m282.5035 134.76706l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m282.5035 134.76706l87.49606 0l0 30.992126l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m297.8283 154.87688q-1.1875 0 -2.0625 -0.515625q-0.875 -0.53125 -1.359375 -1.5q-0.46875 -0.984375 -0.46875 -2.3125q0 -1.328125 0.46875 -2.296875q0.484375 -0.984375 1.359375 -1.5q0.875 -0.53125 2.0625 -0.53125q0.8125 0 1.515625 0.265625q0.71875 0.25 1.25 0.734375q0.1875 0.1875 0.1875 0.421875q0 0.171875 -0.09375 0.296875q-0.09375 0.125 -0.21875 0.125q-0.15625 0 -0.359375 -0.140625q-0.609375 -0.46875 -1.109375 -0.65625q-0.5 -0.203125 -1.140625 -0.203125q-1.390625 0 -2.140625 0.90625q-0.75 0.90625 -0.75 2.578125q0 1.671875 0.75 2.578125q0.75 0.90625 2.140625 0.90625q0.640625 0 1.140625 -0.1875q0.5 -0.1875 1.109375 -0.671875q0.203125 -0.125 0.359375 -0.125q0.125 0 0.21875 0.125q0.09375 0.109375 0.09375 0.296875q0 0.234375 -0.1875 0.40625q-0.53125 0.484375 -1.25 0.75q-0.703125 0.25 -1.515625 0.25zm7.358429 -6.078125q1.03125 0 1.546875 0.578125q0.53125 0.578125 0.53125 1.734375l0 3.25q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.21875q0 -0.78125 -0.328125 -1.15625q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.203125 0.125 -0.328125q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.125q0.125 0.125 0.125 0.34375l0 3.140625q0.28125 -0.53125 0.796875 -0.796875q0.515625 -0.28125 1.1875 -0.28125zm8.37854 4.625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm6.308441 5.3125q-0.8125 0 -1.453125 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.6875 -0.34375 -1.578125q0 -0.90625 0.359375 -1.59375q0.359375 -0.703125 0.984375 -1.078125q0.640625 -0.390625 1.46875 -0.390625q0.453125 0 0.90625 0.125q0.453125 0.125 0.78125 0.359375q0.21875 0.140625 0.3125 0.28125q0.09375 0.140625 0.09375 0.3125q0 0.171875 -0.09375 0.28125q-0.09375 0.09375 -0.234375 0.09375q-0.078125 0 -0.1875 -0.046875q-0.09375 -0.046875 -0.15625 -0.09375q-0.0625 -0.046875 -0.09375 -0.0625q-0.3125 -0.203125 -0.59375 -0.3125q-0.28125 -0.125 -0.6875 -0.125q-0.875 0 -1.359375 0.59375q-0.484375 0.59375 -0.484375 1.65625q0 1.046875 0.484375 1.625q0.484375 0.578125 1.359375 0.578125q0.40625 0 0.703125 -0.109375q0.296875 -0.125 0.59375 -0.328125q0.140625 -0.09375 0.25 -0.15625q0.125 -0.0625 0.203125 -0.0625q0.140625 0 0.21875 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.15625 -0.09375 0.28125q-0.078125 0.125 -0.296875 0.28125q-0.34375 0.234375 -0.8125 0.375q-0.46875 0.125 -0.953125 0.125zm7.998047 -0.84375q0.203125 0.171875 0.203125 0.375q0 0.1875 -0.125 0.328125q-0.125 0.125 -0.3125 0.125q-0.15625 0 -0.328125 -0.140625l-3.125 -2.703125l0 2.359375q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.359375 -0.125q0.203125 0 0.34375 0.125q0.140625 0.125 0.140625 0.34375l0 4.875l2.859375 -2.625q0.15625 -0.140625 0.328125 -0.140625q0.1875 0 0.3125 0.140625q0.140625 0.125 0.140625 0.296875q0 0.203125 -0.171875 0.359375l-2.375 2.109375l2.59375 2.265625zm4.2812805 -5.21875q0.765625 0 1.34375 0.390625q0.59375 0.375 0.921875 1.0625q0.328125 0.6875 0.328125 1.609375q0 0.90625 -0.328125 1.59375q-0.328125 0.671875 -0.90625 1.046875q-0.578125 0.359375 -1.359375 0.359375q-0.6875 0 -1.203125 -0.296875q-0.5 -0.296875 -0.765625 -0.84375l0 2.8125q0 0.21875 -0.125 0.34375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.140625q-0.125 -0.125 -0.125 -0.328125l0 -7.234375q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.125 0.125 0.125 0.34375l0 0.640625q0.265625 -0.546875 0.765625 -0.84375q0.515625 -0.296875 1.203125 -0.296875zm-0.203125 5.265625q0.859375 0 1.328125 -0.578125q0.46875 -0.578125 0.46875 -1.625q0 -1.0625 -0.46875 -1.65625q-0.46875 -0.59375 -1.328125 -0.59375q-0.84375 0 -1.3125 0.578125q-0.453125 0.578125 -0.453125 1.640625q0 1.0625 0.453125 1.65625q0.46875 0.578125 1.3125 0.578125zm6.67157 0.796875q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm4.722534 0.78125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.234375 0.125 -0.359375q0.140625 -0.125 0.359375 -0.125q0.21875 0 0.34375 0.125q0.140625 0.125 0.140625 0.359375l0 5.0625q0 0.234375 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125zm0 -7.28125q-0.296875 0 -0.484375 -0.171875q-0.171875 -0.171875 -0.171875 -0.453125q0 -0.25 0.171875 -0.421875q0.1875 -0.171875 0.484375 -0.171875q0.28125 0 0.453125 0.171875q0.1875 0.171875 0.1875 0.421875q0 0.28125 -0.1875 0.453125q-0.171875 0.171875 -0.453125 0.171875zm5.237152 1.234375q2.09375 0 2.09375 2.3125l0 3.25q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -3.1875q0 -0.8125 -0.328125 -1.1875q-0.3125 -0.375 -1.0 -0.375q-0.8125 0 -1.296875 0.5q-0.46875 0.484375 -0.46875 1.328125l0 2.921875q0 0.234375 -0.125 0.359375q-0.125 0.125 -0.359375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -5.0625q0 -0.21875 0.125 -0.34375q0.125 -0.140625 0.359375 -0.140625q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.328125l0 0.609375q0.28125 -0.53125 0.796875 -0.8125q0.53125 -0.28125 1.1875 -0.28125zm6.5660706 5.28125q0.421875 0.03125 0.421875 0.375q0 0.203125 -0.15625 0.3125q-0.140625 0.09375 -0.4375 0.078125l-0.328125 -0.03125q-0.953125 -0.0625 -1.421875 -0.5625q-0.453125 -0.515625 -0.453125 -1.53125l0 -3.015625l-0.796875 0q-0.203125 0 -0.328125 -0.09375q-0.109375 -0.109375 -0.109375 -0.28125q0 -0.171875 0.109375 -0.28125q0.125 -0.109375 0.328125 -0.109375l0.796875 0l0 -1.359375q0 -0.21875 0.125 -0.34375q0.140625 -0.140625 0.375 -0.140625q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.34375l0 1.359375l1.328125 0q0.1875 0 0.296875 0.109375q0.125 0.109375 0.125 0.28125q0 0.171875 -0.125 0.28125q-0.109375 0.09375 -0.296875 0.09375l-1.328125 0l0 3.0625q0 0.65625 0.265625 0.953125q0.265625 0.296875 0.8125 0.328125l0.3125 0.03125zm3.361267 0.78125q-0.5625 0 -1.0625 -0.125q-0.5 -0.140625 -0.875 -0.375q-0.21875 -0.140625 -0.3125 -0.265625q-0.078125 -0.125 -0.078125 -0.3125q0 -0.15625 0.078125 -0.25q0.09375 -0.109375 0.234375 -0.109375q0.15625 0 0.421875 0.1875q0.359375 0.21875 0.71875 0.34375q0.359375 0.125 0.875 0.125q0.65625 0 1.015625 -0.21875q0.359375 -0.234375 0.359375 -0.671875q0 -0.265625 -0.140625 -0.421875q-0.125 -0.171875 -0.453125 -0.296875q-0.3125 -0.125 -0.9375 -0.25q-1.0625 -0.234375 -1.515625 -0.609375q-0.453125 -0.390625 -0.453125 -1.046875q0 -0.515625 0.28125 -0.90625q0.28125 -0.40625 0.796875 -0.625q0.515625 -0.234375 1.15625 -0.234375q0.46875 0 0.90625 0.125q0.4375 0.125 0.78125 0.34375q0.40625 0.296875 0.40625 0.609375q0 0.15625 -0.09375 0.265625q-0.09375 0.109375 -0.234375 0.109375q-0.140625 0 -0.4375 -0.203125q-0.328125 -0.21875 -0.625 -0.34375q-0.296875 -0.125 -0.75 -0.125q-0.5625 0 -0.90625 0.265625q-0.34375 0.25 -0.34375 0.671875q0 0.25 0.125 0.421875q0.125 0.15625 0.421875 0.28125q0.296875 0.125 0.84375 0.25q0.828125 0.1875 1.265625 0.40625q0.453125 0.203125 0.640625 0.515625q0.203125 0.3125 0.203125 0.796875q0 0.75 -0.640625 1.21875q-0.640625 0.453125 -1.671875 0.453125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 99.34974l0 17.70874l-42.960632 0l0 17.724327" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 99.34974l0 17.70874l-42.960632 0l0 14.297249" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m233.89502 131.35573l-1.124588 -1.124588l1.124588 3.0897675l1.1245728 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.85565 99.34974l0 17.70874l49.385803 0l0 17.724327" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85565 99.34974l0 17.70874l49.385803 0l0 14.297249" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m326.24146 131.35573l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#c9daf8" d="m548.5407 235.66077l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 235.66077l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m579.47955 247.1612q0.203125 0 0.328125 0.140625q0.125 0.125 0.125 0.359375l0 7.578125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625q-0.234375 0 -0.390625 -0.203125l-4.984375 -6.65625l0 6.359375q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.34375 0.140625q-0.21875 0 -0.34375 -0.140625q-0.109375 -0.140625 -0.109375 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.40625 0.203125l4.96875 6.65625l0 -6.359375q0 -0.234375 0.125 -0.359375q0.125 -0.140625 0.34375 -0.140625zm8.868103 0q0.203125 0 0.328125 0.140625q0.125 0.125 0.125 0.359375l0 7.578125q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.359375 0.140625q-0.234375 0 -0.390625 -0.203125l-4.984375 -6.65625l0 6.359375q0 0.21875 -0.125 0.359375q-0.125 0.140625 -0.34375 0.140625q-0.21875 0 -0.34375 -0.140625q-0.109375 -0.140625 -0.109375 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.125 -0.140625 0.359375 -0.140625q0.234375 0 0.40625 0.203125l4.96875 6.65625l0 -6.359375q0 -0.234375 0.125 -0.359375q0.125 -0.140625 0.34375 -0.140625zm12.917175 7.953125q0.046875 0.09375 0.046875 0.203125q0 0.171875 -0.140625 0.296875q-0.140625 0.125 -0.328125 0.125q-0.296875 0 -0.421875 -0.296875l-0.84375 -1.9375l-4.53125 0l-0.859375 1.9375q-0.125 0.296875 -0.421875 0.296875q-0.1875 0 -0.34375 -0.125q-0.140625 -0.125 -0.140625 -0.3125q0 -0.09375 0.046875 -0.1875l3.4375 -7.640625q0.078125 -0.15625 0.21875 -0.234375q0.140625 -0.09375 0.3125 -0.09375q0.171875 0 0.3125 0.09375q0.15625 0.078125 0.21875 0.234375l3.4375 7.640625zm-5.859375 -2.421875l3.8125 0l-1.90625 -4.3125l-1.90625 4.3125zm7.78656 3.046875q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.546875q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.375 -0.125l2.84375 0q1.328125 0 2.0625 0.65625q0.75 0.640625 0.75 1.828125q0 1.1875 -0.75 1.84375q-0.734375 0.65625 -2.0625 0.65625l-2.359375 0l0 3.03125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625zm2.765625 -4.34375q1.9375 0 1.9375 -1.6875q0 -1.671875 -1.9375 -1.671875l-2.265625 0l0 3.359375l2.265625 0zm4.9744263 4.34375q-0.21875 0 -0.359375 -0.140625q-0.125 -0.140625 -0.125 -0.359375l0 -7.578125q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.234375 0 0.359375 0.140625q0.140625 0.125 0.140625 0.359375l0 7.578125q0 0.21875 -0.140625 0.359375q-0.125 0.140625 -0.359375 0.140625z" fill-rule="nonzero"/><path fill="#c9daf8" d="m548.5407 193.79199l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m548.5407 193.79199l87.49603 0l0 30.992126l-87.49603 0z" fill-rule="evenodd"/><path fill="#000000" d="m589.5417 213.87056q-0.28125 0 -0.484375 -0.1875q-0.1875 -0.1875 -0.1875 -0.484375q0 -0.296875 0.1875 -0.484375q0.203125 -0.203125 0.484375 -0.203125q0.28125 0 0.46875 0.203125q0.1875 0.1875 0.1875 0.484375q0 0.296875 -0.1875 0.484375q-0.1875 0.1875 -0.46875 0.1875zm2.7480469 0q-0.28125 0 -0.484375 -0.1875q-0.1875 -0.1875 -0.1875 -0.484375q0 -0.296875 0.1875 -0.484375q0.203125 -0.203125 0.484375 -0.203125q0.28125 0 0.46875 0.203125q0.1875 0.1875 0.1875 0.484375q0 0.296875 -0.1875 0.484375q-0.1875 0.1875 -0.46875 0.1875zm2.7479858 0q-0.28125 0 -0.484375 -0.1875q-0.1875 -0.1875 -0.1875 -0.484375q0 -0.296875 0.1875 -0.484375q0.203125 -0.203125 0.484375 -0.203125q0.28125 0 0.46875 0.203125q0.1875 0.1875 0.1875 0.484375q0 0.296875 -0.1875 0.484375q-0.1875 0.1875 -0.46875 0.1875z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m75.62294 283.52823l0 17.950958l100.62993 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m75.62295 283.52823l0 17.950928l100.62992 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.25287 316.00665l-1.124588 -1.1246033l1.124588 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.85654 283.52823l0 17.950958l-100.62991 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.85654 283.52823l0 17.950928l-100.62991 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.22662 316.00665l-1.124588 -1.1246033l1.124588 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 0.06298828l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 0.06298828l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 334.95734l-1.1245728 1.1246033l3.0897827 -1.1246033l-3.0897827 -1.1245728z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -41.858246l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -41.858246l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 293.0361l-1.1245728 1.1245728l3.0897827 -1.1245728l-3.0897827 -1.1246033z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -83.74802l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -83.74802l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 251.14633l-1.1245728 1.1245728l3.0897827 -1.1245728l-3.0897827 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m500.5223 334.89435l24.009003 0l0 -125.60629l24.022522 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m500.5223 334.89435l24.009003 0l0 -125.60629l20.595398 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m545.1267 209.28806l-1.1245728 1.124588l3.0897827 -1.124588l-3.0897827 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m233.88803 165.75919l0 17.70752l42.960632 0l0 17.694061" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m233.88805 165.75919l0 17.70752l42.960617 0l0 14.266968" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.84866 197.73367l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m326.25156 165.75919l0 17.70752l-49.385834 0l0 17.694061" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m326.25156 165.75919l0 17.70752l-49.385834 0l0 14.266968" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m276.86572 197.73367l-1.1245728 -1.124588l1.1245728 3.0897675l1.1246033 -3.0897675z" fill-rule="evenodd"/><path fill="#d9ead3" d="m132.49171 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m132.49171 252.53609l87.49606 0l0 30.992142l-87.49606 0z" fill-rule="evenodd"/><path fill="#000000" d="m146.9475 272.6459q-0.90625 0 -1.734375 -0.265625q-0.8125 -0.265625 -1.3125 -0.734375q-0.171875 -0.15625 -0.171875 -0.40625q0 -0.171875 0.09375 -0.296875q0.09375 -0.125 0.234375 -0.125q0.15625 0 0.328125 0.125q1.109375 0.859375 2.546875 0.859375q1.03125 0 1.578125 -0.390625q0.5625 -0.390625 0.5625 -1.125q0 -0.421875 -0.265625 -0.671875q-0.265625 -0.265625 -0.703125 -0.421875q-0.4375 -0.15625 -1.15625 -0.328125q-0.984375 -0.21875 -1.625 -0.46875q-0.625 -0.265625 -1.015625 -0.734375q-0.390625 -0.46875 -0.390625 -1.21875q0 -0.71875 0.390625 -1.265625q0.390625 -0.5625 1.09375 -0.875q0.703125 -0.3125 1.59375 -0.3125q0.84375 0 1.5625 0.265625q0.734375 0.25 1.234375 0.734375q0.1875 0.1875 0.1875 0.421875q0 0.171875 -0.09375 0.296875q-0.09375 0.125 -0.234375 0.125q-0.125 0 -0.34375 -0.140625q-0.59375 -0.46875 -1.09375 -0.65625q-0.5 -0.203125 -1.21875 -0.203125q-0.984375 0 -1.546875 0.421875q-0.546875 0.40625 -0.546875 1.15625q0 0.625 0.484375 0.953125q0.484375 0.3125 1.5 0.5625q1.09375 0.25 1.71875 0.484375q0.625 0.21875 1.03125 0.671875q0.421875 0.4375 0.421875 1.171875q0 0.71875 -0.390625 1.265625q-0.390625 0.53125 -1.109375 0.828125q-0.703125 0.296875 -1.609375 0.296875zm6.9353027 -6.078125q2.203125 0 2.203125 2.296875l0 3.265625q0 0.21875 -0.125 0.359375q-0.125 0.125 -0.34375 0.125q-0.21875 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.578125q-0.21875 0.515625 -0.6875 0.796875q-0.46875 0.28125 -1.078125 0.28125q-0.5625 0 -1.046875 -0.21875q-0.46875 -0.234375 -0.75 -0.640625q-0.265625 -0.40625 -0.265625 -0.90625q0 -0.65625 0.328125 -1.015625q0.34375 -0.375 1.109375 -0.53125q0.765625 -0.15625 2.125 -0.15625l0.265625 0l0 -0.40625q0 -0.71875 -0.296875 -1.046875q-0.28125 -0.34375 -0.953125 -0.34375q-0.8125 0 -1.65625 0.453125q-0.3125 0.203125 -0.453125 0.203125q-0.140625 0 -0.234375 -0.109375q-0.09375 -0.109375 -0.09375 -0.28125q0 -0.171875 0.09375 -0.296875q0.109375 -0.125 0.328125 -0.25q0.421875 -0.25 0.953125 -0.375q0.546875 -0.140625 1.0625 -0.140625zm-0.390625 5.296875q0.71875 0 1.171875 -0.484375q0.46875 -0.484375 0.46875 -1.25l0 -0.34375l-0.21875 0q-1.046875 0 -1.609375 0.09375q-0.546875 0.078125 -0.78125 0.296875q-0.234375 0.203125 -0.234375 0.609375q0 0.46875 0.34375 0.78125q0.34375 0.296875 0.859375 0.296875zm8.578796 -4.96875q0.140625 -0.296875 0.421875 -0.296875q0.1875 0 0.328125 0.125q0.140625 0.109375 0.140625 0.296875q0 0.109375 -0.046875 0.1875l-2.34375 5.046875q-0.0625 0.15625 -0.21875 0.25q-0.140625 0.078125 -0.3125 0.078125q-0.15625 0 -0.296875 -0.078125q-0.140625 -0.09375 -0.21875 -0.25l-2.328125 -5.046875q-0.046875 -0.078125 -0.046875 -0.171875q0 -0.1875 0.15625 -0.3125q0.15625 -0.140625 0.359375 -0.140625q0.109375 0 0.21875 0.078125q0.125 0.078125 0.1875 0.203125l2.0 4.5l2.0 -4.46875zm6.480545 4.296875q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm8.589676 -3.28125q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.328125l0 7.625q0 0.21875 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.640625q-0.265625 0.546875 -0.78125 0.84375q-0.5 0.296875 -1.1875 0.296875q-0.765625 0 -1.359375 -0.375q-0.578125 -0.390625 -0.90625 -1.078125q-0.328125 -0.6875 -0.328125 -1.59375q0 -0.90625 0.328125 -1.59375q0.328125 -0.6875 0.90625 -1.046875q0.59375 -0.375 1.359375 -0.375q0.6875 0 1.1875 0.296875q0.515625 0.296875 0.78125 0.84375l0 -3.203125q0 -0.21875 0.125 -0.34375q0.125 -0.125 0.359375 -0.125zm-2.25 7.796875q0.84375 0 1.296875 -0.578125q0.46875 -0.59375 0.46875 -1.65625q0 -1.0625 -0.46875 -1.640625q-0.453125 -0.578125 -1.296875 -0.578125q-0.859375 0 -1.34375 0.578125q-0.46875 0.578125 -0.46875 1.625q0 1.0625 0.46875 1.65625q0.484375 0.59375 1.34375 0.59375zm12.202805 -7.796875q0.21875 0 0.34375 0.140625q0.125 0.125 0.125 0.359375l0 7.59375q0 0.21875 -0.125 0.359375q-0.109375 0.125 -0.328125 0.125q-0.21875 0 -0.328125 -0.125q-0.109375 -0.140625 -0.109375 -0.359375l0 -6.125l-2.59375 4.984375q-0.171875 0.34375 -0.5 0.34375q-0.3125 0 -0.484375 -0.34375l-2.625 -4.921875l0 6.0625q0 0.21875 -0.109375 0.359375q-0.109375 0.125 -0.328125 0.125q-0.21875 0 -0.34375 -0.125q-0.109375 -0.140625 -0.109375 -0.359375l0 -7.59375q0 -0.234375 0.125 -0.359375q0.140625 -0.140625 0.359375 -0.140625q0.3125 0 0.484375 0.34375l3.046875 5.84375l3.015625 -5.84375q0.09375 -0.1875 0.203125 -0.265625q0.125 -0.078125 0.28125 -0.078125zm4.8576965 8.59375q-0.828125 0 -1.46875 -0.359375q-0.625 -0.375 -0.96875 -1.0625q-0.34375 -0.703125 -0.34375 -1.609375q0 -0.90625 0.34375 -1.59375q0.34375 -0.703125 0.96875 -1.0625q0.640625 -0.375 1.46875 -0.375q0.828125 0 1.453125 0.375q0.640625 0.359375 0.984375 1.0625q0.34375 0.6875 0.34375 1.59375q0 0.90625 -0.34375 1.609375q-0.34375 0.6875 -0.984375 1.0625q-0.625 0.359375 -1.453125 0.359375zm0 -0.796875q0.859375 0 1.3125 -0.5625q0.46875 -0.578125 0.46875 -1.671875q0 -1.0625 -0.46875 -1.640625q-0.46875 -0.59375 -1.3125 -0.59375q-0.859375 0 -1.328125 0.59375q-0.46875 0.578125 -0.46875 1.640625q0 1.078125 0.453125 1.65625q0.46875 0.578125 1.34375 0.578125zm8.925674 -7.796875q0.21875 0 0.34375 0.140625q0.140625 0.125 0.140625 0.328125l0 7.625q0 0.21875 -0.140625 0.359375q-0.125 0.125 -0.34375 0.125q-0.234375 0 -0.359375 -0.125q-0.125 -0.140625 -0.125 -0.359375l0 -0.640625q-0.265625 0.546875 -0.78125 0.84375q-0.5 0.296875 -1.1875 0.296875q-0.765625 0 -1.359375 -0.375q-0.578125 -0.390625 -0.90625 -1.078125q-0.328125 -0.6875 -0.328125 -1.59375q0 -0.90625 0.328125 -1.59375q0.328125 -0.6875 0.90625 -1.046875q0.59375 -0.375 1.359375 -0.375q0.6875 0 1.1875 0.296875q0.515625 0.296875 0.78125 0.84375l0 -3.203125q0 -0.21875 0.125 -0.34375q0.125 -0.125 0.359375 -0.125zm-2.25 7.796875q0.84375 0 1.296875 -0.578125q0.46875 -0.59375 0.46875 -1.65625q0 -1.0625 -0.46875 -1.640625q-0.453125 -0.578125 -1.296875 -0.578125q-0.859375 0 -1.34375 0.578125q-0.46875 0.578125 -0.46875 1.625q0 1.0625 0.46875 1.65625q0.484375 0.59375 1.34375 0.59375zm9.06218 -0.640625q0.140625 0 0.234375 0.109375q0.09375 0.109375 0.09375 0.28125q0 0.296875 -0.421875 0.546875q-0.4375 0.25 -0.921875 0.375q-0.46875 0.125 -0.921875 0.125q-1.359375 0 -2.15625 -0.796875q-0.78125 -0.8125 -0.78125 -2.21875q0 -0.90625 0.34375 -1.59375q0.359375 -0.6875 0.984375 -1.0625q0.640625 -0.390625 1.4375 -0.390625q1.140625 0 1.8125 0.75q0.671875 0.734375 0.671875 2.0q0 0.25 -0.09375 0.359375q-0.09375 0.109375 -0.3125 0.109375l-3.859375 0q0.09375 2.0625 1.953125 2.0625q0.46875 0 0.796875 -0.125q0.34375 -0.125 0.71875 -0.34375q0.3125 -0.1875 0.421875 -0.1875zm-2.09375 -3.875q-0.765625 0 -1.234375 0.484375q-0.46875 0.484375 -0.546875 1.359375l3.390625 0q-0.015625 -0.890625 -0.4375 -1.359375q-0.421875 -0.484375 -1.171875 -0.484375zm4.386551 5.296875q-0.21875 0 -0.359375 -0.125q-0.125 -0.125 -0.125 -0.359375l0 -7.625q0 -0.21875 0.125 -0.34375q0.140625 -0.125 0.359375 -0.125q0.203125 0 0.34375 0.125q0.140625 0.125 0.140625 0.34375l0 7.625q0 0.234375 -0.140625 0.359375q-0.140625 0.125 -0.34375 0.125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m176.23885 99.34974l0 153.19684" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m176.23885 99.34974l0 149.76978" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.23885 249.1195l-1.124588 -1.124588l1.124588 3.0897675l1.124588 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m176.23975 283.52823l0 17.950958l0.06298828 0l0 17.954529" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m176.23975 283.52823l0 17.950928l0.06298828 0l0 14.527496" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m176.30273 316.00665l-1.1245728 -1.1246033l1.1245728 3.0897827l1.124588 -3.0897827z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m75.62205 99.34843l0 153.19684" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m75.62205 99.34843l0 149.76978" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m75.62205 249.1182l-1.1245804 -1.124588l1.1245804 3.0897675l1.1245804 -3.0897675z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m99.50131 100.0l0 76.0l54.992126 0l0 76.0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m99.50131 100.0l0 76.0l54.992126 0l0 72.57292" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m154.49344 248.5729l-1.124588 -1.1245728l1.124588 3.0897675l1.124588 -3.0897675z" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
-- 
GitLab


From a4acd49c0dd32ec741877d587a5ec0f7aede64d7 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 12 Oct 2018 11:08:59 -0700
Subject: [PATCH 0880/1085] Fix a piece of lint in the docstring for
 `tf_export` (non-matching quote characters).

PiperOrigin-RevId: 216890307
---
 tensorflow/python/util/tf_export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py
index a5ac430ce7..a1870dd9de 100644
--- a/tensorflow/python/util/tf_export.py
+++ b/tensorflow/python/util/tf_export.py
@@ -34,7 +34,7 @@ tf_export('foo', 'bar.foo')(foo)
 Exporting a constant
 ```python
 foo = 1
-tf_export("consts.foo").export_constant(__name__, 'foo')
+tf_export('consts.foo').export_constant(__name__, 'foo')
 ```
 """
 from __future__ import absolute_import
-- 
GitLab


From a5697d8ae343434a08b69452291bdaf92d60e1e0 Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Fri, 12 Oct 2018 11:13:54 -0700
Subject: [PATCH 0881/1085] line length

---
 tensorflow/python/framework/ops_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index b600cd0deb..cdfe6abcc6 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -255,7 +255,9 @@ class OperationTest(test_util.TensorFlowTestCase):
     """, op3.node_def)
 
   def testDevicePresent(self):
-    op = ops.Operation(ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'), ops.Graph(), [], [])
+    op = ops.Operation(
+      ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'), ops.Graph(),
+      [], [])
     self.assertEqual('/job:goo/device:GPU:0', op.device)
 
   def testDeviceObject(self):
-- 
GitLab


From 7b63fdf025b8284abeae2679ad47df07c63b7090 Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Fri, 12 Oct 2018 11:17:10 -0700
Subject: [PATCH 0882/1085] indent

---
 tensorflow/python/framework/ops_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index cdfe6abcc6..b804d275d1 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -256,8 +256,8 @@ class OperationTest(test_util.TensorFlowTestCase):
 
   def testDevicePresent(self):
     op = ops.Operation(
-      ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'), ops.Graph(),
-      [], [])
+        ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'),
+        ops.Graph(), [], [])
     self.assertEqual('/job:goo/device:GPU:0', op.device)
 
   def testDeviceObject(self):
-- 
GitLab


From 2581dd69782678595276a7fd63c8f932bf0363ab Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 12 Oct 2018 11:12:24 -0700
Subject: [PATCH 0883/1085] [tf.data] Export `tf.data.experimental.AUTOTUNE`.

PiperOrigin-RevId: 216890862
---
 tensorflow/python/data/experimental/__init__.py               | 2 ++
 tensorflow/python/data/experimental/ops/optimization.py       | 3 +++
 .../tools/api/golden/v1/tensorflow.data.experimental.pbtxt    | 4 ++++
 .../tools/api/golden/v2/tensorflow.data.experimental.pbtxt    | 4 ++++
 4 files changed, 13 insertions(+)

diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
index 2ac159d38a..d4e7fee921 100644
--- a/tensorflow/python/data/experimental/__init__.py
+++ b/tensorflow/python/data/experimental/__init__.py
@@ -57,6 +57,8 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@StatsAggregator
 @@unbatch
 @@unique
+
+@@AUTOTUNE
 """
 
 from __future__ import absolute_import
diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
index 276dde8383..8e1de136b6 100644
--- a/tensorflow/python/data/experimental/ops/optimization.py
+++ b/tensorflow/python/data/experimental/ops/optimization.py
@@ -21,9 +21,12 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
+
 
 # A constant that can be used to enable auto-tuning.
 AUTOTUNE = -1
+tf_export("data.experimental.AUTOTUNE").export_constant(__name__, "AUTOTUNE")
 
 
 # TODO(jsimsa): Support RE matching for both individual transformation (e.g. to
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index 2a1f899dc0..f5f96ab98b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.data.experimental"
 tf_module {
+  member {
+    name: "AUTOTUNE"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "CheckpointInputPipelineHook"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index 2a1f899dc0..f5f96ab98b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.data.experimental"
 tf_module {
+  member {
+    name: "AUTOTUNE"
+    mtype: "<type \'int\'>"
+  }
   member {
     name: "CheckpointInputPipelineHook"
     mtype: "<type \'type\'>"
-- 
GitLab


From 443084eeaff33764271ea025119b1d8c151d6694 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Fri, 12 Oct 2018 11:15:00 -0700
Subject: [PATCH 0884/1085] Simplify implementation of shared weights support
 to not depend on string names.

Instead we just ensure that all consumers of a target tensor use the inserted FakeQuant tensor instead.

PiperOrigin-RevId: 216891274
---
 .../contrib/quantize/python/quantize.py       | 126 +++++++-----------
 1 file changed, 47 insertions(+), 79 deletions(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 4ab888e0a4..c3b9760787 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -87,7 +87,8 @@ def Quantize(graph,
     _InsertQuantOp(
         context,
         'weights_quant',
-        layer_match.weight_tensor.op, [layer_match.layer_op],
+        layer_match.weight_tensor.op,
+        input_to_ops_map.ConsumerOperations(layer_match.weight_tensor.op),
         is_training,
         moving_avg=False,
         ema_decay=ema_decay,
@@ -133,7 +134,8 @@ def Quantize(graph,
       _InsertQuantOp(
           context,
           'conv_quant',
-          layer_match.bias_add_op, [layer_match.bypass_op],
+          layer_match.bias_add_op,
+          input_to_ops_map.ConsumerOperations(layer_match.bias_add_op),
           is_training,
           moving_avg=True,
           ema_decay=ema_decay,
@@ -498,8 +500,8 @@ class _LayerMatch(object):
     return self._bias_add_op
 
 
-def _GetFollowingFakeQuantOp(tensor):
-  """Returns the following FakeQuant op if it exists else None."""
+def _FollowedByFakeQuant(tensor):
+  """Returns True if the tensor is followed by a FakeQuant."""
   fake_quant_ops = set([
       'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs',
       'FakeQuantWithMinMaxVarsPerChannel'
@@ -509,11 +511,11 @@ def _GetFollowingFakeQuantOp(tensor):
   while consumers:
     c = consumers.pop()
     if c.type in fake_quant_ops:
-      return c
+      return True
     elif c.type in pass_through_ops:
       for output in c.outputs:
         consumers.extend(output.consumers())
-  return None
+  return False
 
 
 def _InsertQuantOp(context,
@@ -599,80 +601,46 @@ def _InsertQuantOp(context,
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
   # overlap between multiple matches, so we need to ensure that we don't
   # add duplicate FakeQuant operations.
-  fake_quant_op = _GetFollowingFakeQuantOp(inputs)
-
-  # If we find that we are attempting to insert a fake quant op following
-  # a fake quant, we skip inserting a fake quant op
-
-  if fake_quant_op is None:
-    if moving_avg:
-      quant = (
-          quant_ops.MovingAvgQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              ema_decay=ema_decay,
-              is_training=is_training,
-              num_bits=bits,
-              symmetric=symmetric,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-    else:
-      quant = (
-          quant_ops.LastValueQuantize(
-              inputs,
-              init_min=init_min,
-              init_max=init_max,
-              is_training=is_training,
-              num_bits=bits,
-              symmetric=symmetric,
-              narrow_range=narrow_range,
-              vars_collection=vars_collection,
-              name_prefix=name_prefix))
-
-    if quant_delay and quant_delay > 0:
-      activate_quant = math_ops.greater_equal(
-          common.CreateOrGetQuantizationStep(),
-          quant_delay,
-          name=name_prefix + '/activate_quant')
-      quant = control_flow_ops.cond(
-          activate_quant,
-          lambda: quant,
-          lambda: inputs,
-          name=name_prefix + '/delayed_quant')
+  if _FollowedByFakeQuant(inputs):
+    return
+
+  if moving_avg:
+    quant = (
+        quant_ops.MovingAvgQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            ema_decay=ema_decay,
+            is_training=is_training,
+            num_bits=bits,
+            symmetric=symmetric,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
   else:
-    #  return
-    # If a fake quant op is present already, make sure that
-    # any downstream use of the tensor reroutes to the appropriate quantized
-    # tensor. If there is no quant_delay, this is simply the output of the
-    # fake quant op. If there is a quant delay, we reroute to the output
-    # of the delayed quant operation, which inserts quantization only after
-    # a specified quant_delay
-
-    quant = fake_quant_op.outputs[0]
-    if quant_delay and quant_delay > 0:
-      name_prefix = '/'.join(quant.name.split('/')[:-1])
-      quant = quant.graph.get_tensor_by_name(name_prefix +
-                                             '/delayed_quant/Merge:0')
-    pruned_consumer_set = set()
-    for consumer in consumers:
-      fake_quant_dest_op = _GetFollowingFakeQuantOp(consumer.outputs[0])
-      if (fake_quant_dest_op is None or
-          fake_quant_dest_op.name != fake_quant_op.name):
-        pruned_consumer_set.add(consumer)
-    consumers = pruned_consumer_set
-
-    # If we have
-    # input->pass_through->fake_quant
-    # there is nothing to reroute.
-    #
-    # If we have
-    #  input-> pass_through->fake_quant
-    #                |-> consumer
-    # Then we reroute such that:
-    # input-> pass_through->fake_quant
-    #                            |-> consumer
+    quant = (
+        quant_ops.LastValueQuantize(
+            inputs,
+            init_min=init_min,
+            init_max=init_max,
+            is_training=is_training,
+            num_bits=bits,
+            symmetric=symmetric,
+            narrow_range=narrow_range,
+            vars_collection=vars_collection,
+            name_prefix=name_prefix))
+
+  if quant_delay and quant_delay > 0:
+    activate_quant = math_ops.greater_equal(
+        common.CreateOrGetQuantizationStep(),
+        quant_delay,
+        name=name_prefix + '/activate_quant')
+    quant = control_flow_ops.cond(
+        activate_quant,
+        lambda: quant,
+        lambda: inputs,
+        name=name_prefix + '/delayed_quant')
+
   if consumers:
     tensors_modified_count = common.RerouteTensor(
         quant, inputs, can_modify=consumers)
-- 
GitLab


From 0ef4085b08976086696f18b2a8d2be832fa41590 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 11:33:27 -0700
Subject: [PATCH 0885/1085] Make XRT handles random 64-bit integers instead of
 increasing dense integers. This somewhat reduces the probability of a client
 accidentally using a stale handle without realizing it.

PiperOrigin-RevId: 216894405
---
 tensorflow/compiler/xrt/xrt_compilation_cache.cc | 12 +++++++++++-
 tensorflow/compiler/xrt/xrt_compilation_cache.h  |  2 --
 tensorflow/compiler/xrt/xrt_state.cc             |  8 +++-----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.cc b/tensorflow/compiler/xrt/xrt_compilation_cache.cc
index 31bb476895..d1405eae46 100644
--- a/tensorflow/compiler/xrt/xrt_compilation_cache.cc
+++ b/tensorflow/compiler/xrt/xrt_compilation_cache.cc
@@ -18,9 +18,19 @@ limitations under the License.
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/random/random.h"
 
 namespace tensorflow {
 
+namespace {
+
+int64 get_uid() {
+  uint64 unsigned_rand = random::New64() & INT64_MAX;
+  return static_cast<int64>(unsigned_rand);
+}
+
+}  // namespace
+
 const char* kXRTCompilationCacheResourceName = "xrt_compilation_cache";
 
 XRTCompilationCache::EntryRefImpl::EntryRefImpl(XRTCompilationCache* parent,
@@ -153,7 +163,7 @@ XRTCompilationCache::CompiledSubgraph* XRTCompilationCache::InitializeEntry(
   CompiledSubgraph* entry = new CompiledSubgraph();
   entry->parent = this;
   entry->key = key;
-  entry->uid = next_uid_++;
+  entry->uid = get_uid();
   // Add the entry to the cache. Once the computation has been compiled,
   // UpdateEntryAfterCompilation will be called to potentially mark old entries
   // that don't fit any more for eviction.
diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.h b/tensorflow/compiler/xrt/xrt_compilation_cache.h
index c505299a45..c43d0fc478 100644
--- a/tensorflow/compiler/xrt/xrt_compilation_cache.h
+++ b/tensorflow/compiler/xrt/xrt_compilation_cache.h
@@ -211,8 +211,6 @@ class XRTCompilationCache : public ResourceBase {
   const int max_cache_entries_;
 
   mutable absl::Mutex mu_;
-  // The uid to assign to the next new entry created.
-  int64 next_uid_ GUARDED_BY(mu_) = 0;
   // The total number of entries that are stored and not marked for eviction.
   int cache_entries_ GUARDED_BY(mu_) = 0;
   // The total number of entries that are marked for eviction.
diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc
index d05a1e7dcb..6e0f1216ca 100644
--- a/tensorflow/compiler/xrt/xrt_state.cc
+++ b/tensorflow/compiler/xrt/xrt_state.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/stream_executor/stream_executor.h"
 
@@ -42,12 +43,9 @@ namespace {
 
 const char* kTupleContainer = "tuples";
 
-// Counter used to assign unique handles.
-mutex _uid_mutex(tensorflow::LINKER_INITIALIZED);
-int64 _uid GUARDED_BY(_uid_mutex) = 0;
 int64 get_uid() {
-  mutex_lock l(_uid_mutex);
-  return _uid++;
+  uint64 unsigned_rand = random::New64() & INT64_MAX;
+  return static_cast<int64>(unsigned_rand);
 }
 
 Status AllocateScopedShapedBuffer(
-- 
GitLab


From d30ace44a62a7699b01e567e496906e77cd5e682 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Fri, 12 Oct 2018 11:37:00 -0700
Subject: [PATCH 0886/1085] Copy GPU tensors to host before sending remotely.

AsProtoTensorContent doesn't work if the tensor buffer is on the GPU.

PiperOrigin-RevId: 216895001
---
 tensorflow/contrib/eager/python/BUILD         |  5 ++---
 .../contrib/eager/python/remote_test.py       | 13 ++++++++++++
 .../core/common_runtime/eager/execute.cc      | 20 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 33a1d572a2..b35ac3abe9 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -249,11 +249,10 @@ py_library(
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "remote_test",
     srcs = ["remote_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
+    additional_deps = [
         ":parameter_server",
         ":remote",
         "//tensorflow/contrib/eager/python:tfe",
diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index 9ad3bdaa3c..3926de15e7 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -220,6 +220,19 @@ class RemoteExecutionTest(test.TestCase):
     self.assertEqual(y.device,
                      "/job:%s/replica:0/task:0/device:CPU:0" % JOB_NAME)
 
+  @run_sync_and_async
+  def testGPUToRemoteCopy(self):
+    """Tests that the remote copy happens satisfactorily."""
+    if not context.context().num_gpus():
+      self.skipTest("No GPUs.")
+
+    x1 = array_ops.ones([2, 2]).gpu()
+
+    with ops.device("/job:remote_device/replica:0/task:1/device:CPU:0"):
+      x2 = x1._copy()  # pylint: disable=protected-access
+
+    np.testing.assert_array_equal(x1.numpy(), x2.numpy())
+
 
 if __name__ == "__main__":
   ops.enable_eager_execution()
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 0f46483ce5..c29a767d23 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -425,8 +425,24 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h,
   request.set_op_id(ctx->NextId());
   request.set_device_name(recv_device->name());
 
+  Device* tensor_handle_device;
+  TF_RETURN_IF_ERROR(h->Device(&tensor_handle_device));
+
+  // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence copy
+  // it to the CPU before copying it out.
+  // TODO(nareshmodi): this is currently slow, but can be fixed by making tensor
+  // handles aware of more than one device.
+  TensorHandle* actual_handle;
+  if (tensor_handle_device != nullptr &&
+      tensor_handle_device->device_type() != "CPU") {
+    TF_RETURN_IF_ERROR(h->CopyToDevice(ctx, ctx->HostCPU(), &actual_handle));
+  } else {
+    actual_handle = h;
+    actual_handle->Ref();
+  }
+
   const Tensor* tensor;
-  TF_RETURN_IF_ERROR(h->Tensor(&tensor));
+  TF_RETURN_IF_ERROR(actual_handle->Tensor(&tensor));
   tensor->AsProtoTensorContent(request.add_tensors());
 
   const tensorflow::uint64 id = request.op_id();
@@ -450,6 +466,8 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h,
                              recv_device, recv_device, ctx);
   (*result)->SetRemoteShape(MakeUnique<TensorShape>(tensor->shape()));
 
+  actual_handle->Unref();
+
   return Status::OK();
 #endif
 }
-- 
GitLab


From 0585a79976a209a8cb7b9267a55ef0c530265fc2 Mon Sep 17 00:00:00 2001
From: Shimin Guo <smguo2001@gmail.com>
Date: Fri, 12 Oct 2018 11:45:16 -0700
Subject: [PATCH 0887/1085] better name

---
 tensorflow/python/framework/ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index b804d275d1..f714f31fca 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -254,7 +254,7 @@ class OperationTest(test_util.TensorFlowTestCase):
     input:'myop1' input:'myop2:1' input:'myop2:1'
     """, op3.node_def)
 
-  def testDevicePresent(self):
+  def testDeviceFromNodeDef(self):
     op = ops.Operation(
         ops._NodeDef("None", "myop", device='/job:goo/device:GPU:0'),
         ops.Graph(), [], [])
-- 
GitLab


From 14f86407043fec62890fa00c246f5968b12a15f1 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 12 Oct 2018 11:44:26 -0700
Subject: [PATCH 0888/1085] Support real custom ops for Toco --allow_eager_ops
 flow.

PiperOrigin-RevId: 216896261
---
 tensorflow/contrib/lite/toco/tflite/BUILD     |  4 +++
 tensorflow/contrib/lite/toco/tflite/export.cc |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 27 +++++++++++++++++++
 .../contrib/lite/toco/tflite/operator.h       |  5 ++++
 .../contrib/lite/toco/tflite/operator_test.cc |  6 +++++
 5 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index 71cdb7703e..a0450f3ec1 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -42,6 +42,7 @@ tf_cc_test(
     deps = [
         ":operator",
         "//tensorflow/contrib/lite/toco:tooling_util",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
@@ -71,6 +72,7 @@ tf_cc_test(
     tags = ["no_oss"],
     deps = [
         ":types",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -106,6 +108,7 @@ tf_cc_test(
     deps = [
         ":export",
         "//tensorflow/contrib/lite/schema:schema_fbs",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -141,6 +144,7 @@ tf_cc_test(
         ":import",
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite/schema:schema_fbs",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
     ],
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 3b34cd6285..02577b317a 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -126,7 +126,7 @@ OperatorKey GetOperatorKey(
 
     // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
+    if (ShouldExportAsFlexOp(allow_flex_ops, unsupported_op.tensorflow_op)) {
       key.is_flex_op = true;
       key.flex_tensorflow_op = tensorflow_op;
       key.custom_code =
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index e08a61d357..1ee71d4341 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -23,6 +23,8 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/tflite/types.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace toco {
@@ -1258,6 +1260,16 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
+    if (ShouldExportAsFlexOp(allow_flex_ops_, node_def.op())) {
+      fbb->Vector([&]() {
+        fbb->String(node_def.op());
+        fbb->String(op.tensorflow_node_def);
+      });
+      fbb->Finish();
+      LOG(INFO) << "Writing flex op: " << node_def.op();
+      return std::unique_ptr<flexbuffers::Builder>(fbb.release());
+    }
+
     bool has_valid_attr = false;
     size_t map_start = fbb->StartMap();
     for (const auto& pair : node_def.attr()) {
@@ -1588,6 +1600,21 @@ std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
   return result;
 }
 
+bool ShouldExportAsFlexOp(bool allow_flex_ops,
+                          const string& tensorflow_op_name) {
+  // If Flex ops aren't allow at all, simply return false.
+  if (!allow_flex_ops) {
+    return false;
+  }
+  // Check if we can find the `OpDef` for the TensorFlow op. If we can find
+  // it, export the op as an Flex op. Otherwise, export it as a regular custom
+  // op.
+  const tensorflow::OpDef* op_def = nullptr;
+  return tensorflow::OpRegistry::Global()
+      ->LookUpOpDef(tensorflow_op_name, &op_def)
+      .ok();
+}
+
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 6e4e0a16d1..6e2a41bf53 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -113,6 +113,11 @@ class BaseOperator {
   OperatorType type_;
 };
 
+// Helper function to determine if a unsupported TensorFlow op should be
+// exported as an Flex op or a regular custom op.
+bool ShouldExportAsFlexOp(bool allow_flex_ops,
+                          const string& tensorflow_op_name);
+
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 0bc591e647..66896a49c0 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -569,6 +569,12 @@ TEST_F(OperatorTest, TensorFlowUnsupportedWithoutAttr) {
   EXPECT_TRUE(output_node_def.attr().empty());
 }
 
+TEST_F(OperatorTest, TestShouldExportAsFlexOp) {
+  EXPECT_FALSE(ShouldExportAsFlexOp(false, "Conv2D"));
+  EXPECT_TRUE(ShouldExportAsFlexOp(true, "Conv2D"));
+  EXPECT_FALSE(ShouldExportAsFlexOp(true, "MyAwesomeCustomOp"));
+}
+
 }  // namespace
 }  // namespace tflite
 
-- 
GitLab


From 261bcd2394e32ae7353a8812373913d98aa94e9a Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Fri, 12 Oct 2018 11:46:52 -0700
Subject: [PATCH 0889/1085] Have cond_v2 and while_v2 use LazyLoader to resolve
 circular dependencies.

PiperOrigin-RevId: 216896672
---
 tensorflow/python/BUILD                       |  35 +-
 .../data/experimental/kernel_tests/BUILD      |   1 +
 tensorflow/python/eager/BUILD                 |   1 -
 tensorflow/python/eager/function.py           |   5 -
 .../python/framework/function_def_to_graph.py |   6 -
 tensorflow/python/kernel_tests/BUILD          |   1 -
 .../kernel_tests/control_flow_ops_py_test.py  |   1 -
 tensorflow/python/ops/cond_v2.py              | 471 ++++++++++++++++-
 tensorflow/python/ops/cond_v2_impl.py         | 497 ------------------
 tensorflow/python/ops/control_flow_ops.py     |  28 +-
 tensorflow/python/ops/gradients_impl.py       |   5 -
 tensorflow/python/ops/while_v2.py             |   5 +-
 12 files changed, 492 insertions(+), 564 deletions(-)
 delete mode 100644 tensorflow/python/ops/cond_v2_impl.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d016de3261..1a890a7938 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -88,6 +88,7 @@ py_library(
         ":client",
         ":client_testlib",
         ":collective_ops",
+        ":cond_v2",
         ":confusion_matrix",
         ":control_flow_ops",
         ":cudnn_rnn_ops_gen",
@@ -132,6 +133,7 @@ py_library(
         ":training",
         ":util",
         ":weights_broadcast_ops",
+        ":while_v2",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/compat",
         "//tensorflow/python/data",
@@ -742,7 +744,6 @@ py_library(
     srcs = ["framework/graph_to_function_def.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":cond_v2_impl",
         ":op_def_registry",
         "//tensorflow/core:protos_all_py",
     ],
@@ -1984,7 +1985,6 @@ py_library(
         "tensor_shape",
         ":array_ops",
         ":array_ops_gen",
-        ":cond_v2_impl",
         ":constant_op",
         ":control_flow_ops_gen",
         ":control_flow_util",
@@ -2020,10 +2020,19 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":cond_v2_impl",
+        ":array_ops",
+        ":c_api_util",
+        ":framework_ops",
         ":function",
         ":function_def_to_graph",
+        ":functional_ops_gen",
         ":gradients",
+        ":gradients_impl",
+        ":graph_to_function_def",
+        ":pywrap_tensorflow",
+        ":util",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python/eager:function",
     ],
 )
 
@@ -2035,7 +2044,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":array_ops",
-        ":cond_v2_impl",
+        ":cond_v2",
         ":constant_op",
         ":control_flow_ops",
         ":control_flow_util",
@@ -2051,23 +2060,6 @@ py_library(
     ],
 )
 
-py_library(
-    name = "cond_v2_impl",
-    srcs = [
-        "ops/cond_v2_impl.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":array_ops",
-        ":c_api_util",
-        ":framework_ops",
-        ":functional_ops_gen",
-        ":pywrap_tensorflow",
-        ":util",
-        "//tensorflow/core:protos_all_py",
-    ],
-)
-
 py_library(
     name = "ctc_ops",
     srcs = ["ops/ctc_ops.py"],
@@ -2165,7 +2157,6 @@ py_library(
         ":array_ops",
         ":bitwise_ops",
         ":check_ops",
-        ":cond_v2_impl",
         ":control_flow_grad",
         ":control_flow_ops",
         ":control_flow_util",
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index e9fda2f6b5..bf531ee78c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -331,6 +331,7 @@ py_test(
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:cond_v2",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index 005c7d9e7f..6a90832b9c 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -246,7 +246,6 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":graph_only_ops",
-        "//tensorflow/python:cond_v2_impl",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 8b4efef0be..fb6855ed71 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -44,7 +44,6 @@ from tensorflow.python.framework import dtypes as dtypes_module
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import custom_gradient
@@ -57,10 +56,6 @@ from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 
-# This is to avoid a circular dependency with cond_v2_impl
-# (function -> gradients_impl -> control_flow_ops -> cond_v2_impl).
-cond_v2_impl._function = sys.modules[__name__]  # pylint: disable=protected-access
-
 # This is to avoid a circular dependency with gradients_impl
 gradients_impl._function = sys.modules[__name__]  # pylint: disable=protected-access
 
diff --git a/tensorflow/python/framework/function_def_to_graph.py b/tensorflow/python/framework/function_def_to_graph.py
index a04fa369ae..98388c2e43 100644
--- a/tensorflow/python/framework/function_def_to_graph.py
+++ b/tensorflow/python/framework/function_def_to_graph.py
@@ -18,8 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import sys
-
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.core.framework import versions_pb2
@@ -27,10 +25,6 @@ from tensorflow.python.eager import function
 from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import versions
-from tensorflow.python.ops import cond_v2_impl
-
-# This is to avoid a circular dependency with cond_v2_impl.
-cond_v2_impl._function_def_to_graph = sys.modules[__name__]  # pylint: disable=protected-access
 
 
 def function_def_to_graph(fdef, input_shapes=None):
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 51e024b197..eee03b7ef2 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1490,7 +1490,6 @@ cuda_py_test(
         "//tensorflow/python:array_ops_gen",
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:cond_v2",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:data_flow_ops",
         "//tensorflow/python:data_flow_ops_gen",
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index f7bab691cd..c74fca49f8 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -41,7 +41,6 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import cond_v2  # pylint: disable=unused-import
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import functional_ops
diff --git a/tensorflow/python/ops/cond_v2.py b/tensorflow/python/ops/cond_v2.py
index 75a1a53eb7..cb3943ce7c 100644
--- a/tensorflow/python/ops/cond_v2.py
+++ b/tensorflow/python/ops/cond_v2.py
@@ -12,21 +12,478 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-"""cond_v2 wrapper module.
+"""cond_v2 and gradient.
 
-This imports the cond_v2 method and all necessary dependencies (this is to avoid
-circular dependencies in the cond_v2 implementation). See cond_v2_impl for more
-information.
+This is a version of cond that emits a single If op, as well as the gradient
+function for If ops produced by cond_v2. This will eventually replace the
+current tf.cond implementation once it reaches feature and performance parity.
 """
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=unused-import
+import collections
+
+from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.eager import function
 from tensorflow.python.framework import function_def_to_graph
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
 
-from tensorflow.python.ops.cond_v2_impl import cond_v2
-# pylint: enable=unused-import
+# NOTE(skyewm): TensorFlow uses protected class methods and fields to signify
+# that they aren't part of the official public API. These protected members
+# often need to be used by implementation code however. Rather than litter the
+# code with pylint comments, we ignore protected access violations for
+# readability.
+# pylint: disable=protected-access
+
+
+def cond_v2(pred, true_fn, false_fn, name="cond"):
+  """Like tf.cond, except emits a single If op."""
+  if isinstance(pred, bool):
+    raise TypeError("pred must not be a Python bool", pred)
+
+  if not name:
+    name = "cond"
+
+  with ops.name_scope(name) as scope:
+    with ops.name_scope(None):
+      # Find the outer most graph for uniquing function names.
+      # TODO(jpienaar): Make this work in eager mode.
+      graph = ops.get_default_graph()
+      while isinstance(graph, function.FuncGraph):
+        graph = graph.outer_graph
+
+      true_name = graph.unique_name(("%strue" % scope).replace("/", "_"))
+      false_name = graph.unique_name(("%sfalse" % scope).replace("/", "_"))
+
+    true_graph = function.func_graph_from_py_func(
+        true_name, true_fn, [], {})
+    false_graph = function.func_graph_from_py_func(
+        false_name, false_fn, [], {})
+    _check_same_outputs(true_graph, false_graph)
+
+    # Add inputs to true_graph and false_graph to make them match. Note that
+    # this modifies true_graph and false_graph.
+    cond_inputs = _make_inputs_match(true_graph, false_graph,
+                                     true_graph.external_captures,
+                                     false_graph.external_captures)
+
+    # Add all intermediate tensors as function outputs so they're available for
+    # the gradient computation.
+
+    true_intermediates = _get_intermediates(true_graph)
+    false_intermediates = _get_intermediates(false_graph)
+
+    # Save the original number of outputs to return to the caller.
+    num_cond_outputs = len(true_graph.outputs)
+
+    # Make the number/type of new intermediate outputs match.
+    extra_true_outputs, extra_false_outputs = _pad_params(
+        true_graph, false_graph, true_intermediates, false_intermediates)
+
+    true_graph.outputs.extend(extra_true_outputs)
+    false_graph.outputs.extend(extra_false_outputs)
+
+    # Create the If op.
+    tensors = gen_functional_ops._if(  # pylint: disable=protected-access
+        pred,
+        cond_inputs, [t.dtype for t in true_graph.outputs],
+        _create_new_tf_function(true_graph),
+        _create_new_tf_function(false_graph),
+        output_shapes=_get_output_shapes(true_graph.outputs,
+                                         false_graph.outputs),
+        name=scope)
+
+    # Set the flag to enable lowering on the `if` op if necessary
+    # Lowering allows cond_v2 to avoid some of the limitations of Functions,
+    # allowing users to specify devices & colocation inside of cond_v2 branches,
+    # and enabling non-strict evaluation & partial pruning of cond_v2 branches.
+    # This brings cond_v2 closer to feature parity with tf.cond.
+    #
+    # However, we do not lower `If` in the XLA context because it is easier for
+    # XLA to apply its own optimizations when dealing with un-lowered `If`
+    # operators than with lowered switch/merge control flow.
+    #
+    # TODO(b/110167197) this approach requires cond_v2 to have at least 1 output
+    if_op = tensors[0].op
+    if not control_flow_util.IsInXLAContext(if_op):
+      # pylint: disable=protected-access
+      if_op._set_attr("_lower_using_switch_merge",
+                      attr_value_pb2.AttrValue(b=True))
+      # pylint: enable=protected-access
+
+    result = tuple(tensors[:num_cond_outputs])
+    if len(result) == 1:
+      return result[0]
+    else:
+      return result
+
+
+@ops.RegisterGradient("If")
+def _IfGrad(op, *grads):  # pylint: disable=invalid-name
+  """The gradient of an If op produced by cond_v2."""
+  true_graph, false_graph = _get_func_graphs(op)
+  # Note: op.graph != ops.get_default_graph() when we are computing the gradient
+  # of a nested cond.
+  assert true_graph.outer_graph == op.graph
+  assert false_graph.outer_graph == op.graph
+
+  # Create grad functions that compute the gradient of the true/false forward
+  # graphs. These functions will capture tensors from the forward pass
+  # functions.
+  true_grad_graph = _create_grad_func(
+      true_graph, grads, _get_grad_fn_name(true_graph))
+  false_grad_graph = _create_grad_func(
+      false_graph, grads, _get_grad_fn_name(false_graph))
+
+  assert ([t.dtype for t in true_grad_graph.outputs] ==
+          [t.dtype for t in false_grad_graph.outputs])
+
+  # Resolve references to forward graph tensors in grad graphs and ensure
+  # they are in-scope, i.e., belong to one of outer graphs of the grad graph.
+  true_grad_inputs = _resolve_grad_inputs(true_graph, true_grad_graph)
+  false_grad_inputs = _resolve_grad_inputs(false_graph, false_grad_graph)
+
+  # Make the inputs to true_grad_graph and false_grad_graph match. Note that
+  # this modifies true_grad_graph and false_grad_graph.
+  grad_inputs = _make_inputs_match(true_grad_graph, false_grad_graph,
+                                   true_grad_inputs, false_grad_inputs)
+
+  # Add all intermediate tensors as function outputs so they're available for
+  # higher-order gradient computations.
+
+  true_grad_intermediates = _get_intermediates(true_grad_graph)
+  false_grad_intermediates = _get_intermediates(false_grad_graph)
+
+  # Save the original number of gradient outputs to return.
+  num_grad_outputs = len(true_grad_graph.outputs)
+
+  # Make the number/type of new intermediate outputs match.
+  extra_true_grad_outputs, extra_false_grad_outputs = _pad_params(
+      true_grad_graph, false_grad_graph,
+      true_grad_intermediates, false_grad_intermediates)
+
+  true_grad_graph.outputs.extend(extra_true_grad_outputs)
+  false_grad_graph.outputs.extend(extra_false_grad_outputs)
+
+  # Create the gradient If op.
+  tensors = gen_functional_ops._if(
+      op.inputs[0],
+      grad_inputs, [t.dtype for t in true_grad_graph.outputs],
+      _create_new_tf_function(true_grad_graph),
+      _create_new_tf_function(false_grad_graph),
+      output_shapes=_get_output_shapes(true_grad_graph.outputs,
+                                       false_grad_graph.outputs))
+
+  # The predicate has no gradient.
+  return [None] + tensors[:num_grad_outputs]
+
+
+def _get_func_graphs(if_op):
+  """Returns `FuncGraph`s for the input op branches.
+
+  Args:
+    if_op: The _If Operation.
+
+  Returns:
+    A 2-tuple of the `FuncGraph`s of the then_branch and else_branch.
+  """
+  def _get_func_graph_for_branch(branch_name):
+    """Generates and returns a FuncGraph for the given branch."""
+    inputs = if_op.inputs[1:]  # First input is pred.
+    input_shapes = [t.shape for t in inputs]
+    func_name = if_op.get_attr(branch_name).name
+    fdef = if_op.graph._get_function(func_name).definition
+    # `if_op.graph` may not be the same as `ops.get_default_graph()` e.g.
+    # in the case of nested if ops or when the gradient is being computed
+    # from inside a Defun. We build the `func_graph` with `if_op.graph` as its
+    # `outer_graph`. This resembles how the `FuncGraph` was built in the
+    # forward pass. We need this so that we can resolve references to tensors
+    # in `func_graph` from its gradient graph in `_resolve_grad_inputs`.
+    with if_op.graph.as_default():
+      func_graph = function_def_to_graph.function_def_to_graph(
+          fdef, input_shapes)
+    func_graph.captures = collections.OrderedDict(zip(inputs,
+                                                      func_graph.inputs))
+    # Set the if op so that the gradient code can use it.
+    func_graph._if = if_op
+    return func_graph
+
+  return (_get_func_graph_for_branch("then_branch"),
+          _get_func_graph_for_branch("else_branch"))
+
+
+def _grad_fn(func_graph, grads):
+  """The gradient function for each conditional branch.
+
+  This function builds the gradient graph of the corresponding forward-pass
+  conditional branch in `func_graph`. This is done by differentiating
+  func_graph's outputs w.r.t. its inputs.
+
+  Args:
+    func_graph: function.FuncGraph. The corresponding forward-pass function.
+    grads: The list of input gradient Tensors.
+
+  Returns:
+    The output gradient Tensors.
+  """
+  # Filter out untrainable function outputs.
+  # NOTE(skyewm): If we don't do this, the untrainable tensors can sometimes
+  # cause _GradientsHelper to raise an exception (e.g. the implementation
+  # doesn't expect 'ys' to contain boolean tensors).
+  assert len(func_graph.outputs) == len(grads)
+  ys = []
+  grad_ys = []
+  for y, grad_y in zip(func_graph.outputs, grads):
+    if not gradients_impl._IsTrainable(y):
+      continue
+    ys.append(y)
+    grad_ys.append(grad_y)
+
+  # Build the gradient graph. Note that this builds the gradient computation of
+  # func_graph in the current graph, which requires capturing tensors from
+  # func_graph. The captured func_graph tensors are resolved to external tensors
+  # in _resolve_grad_inputs.
+  result = gradients_impl._GradientsHelper(
+      ys, func_graph.inputs, grad_ys=grad_ys,
+      src_graph=func_graph)
+
+  # Functions can't return None; replace Nones with zero tensors.
+  # TODO(b/80444525): don't return anything here and make _IfGrad return None if
+  # both branches have zero gradient.
+  for i in range(len(result)):
+    if result[i] is None:
+      result[i] = array_ops.zeros_like(func_graph.inputs[i])
+
+  return result
+
+
+def _create_grad_func(func_graph, grads, name):
+  """Returns the FuncGraph representation of _grad_fn."""
+  return function.func_graph_from_py_func(
+      name, lambda: _grad_fn(func_graph, grads), [], {})
+
+
+def _resolve_grad_inputs(cond_graph, grad_graph):
+  """Returns the tensors to pass as inputs to `grad_graph`.
+
+  The `grad_graph` may have external references to
+  1. Its outer graph containing the input gradients. These references are kept
+     as is.
+  2. Tensors in the forward pass graph. These tensors may not be "live"
+     when the gradient is being computed. We replace such references by their
+     corresponding tensor in `cond_graph.outer_graph`. In the case of nested
+     control flow or functions, the gradient logic handling
+     `grad_graph.outer_graph` will make sure the tensor from
+     `cond_graph.outer_graph` is also correctly captured.
+
+  Args:
+    cond_graph: function.FuncGraph. The forward-pass function.
+    grad_graph: function.FuncGraph. The gradients function.
+
+  Returns:
+    A list of inputs tensors to be passed to grad_graph.
+  """
+  new_inputs = []
+
+  for t in grad_graph.external_captures:
+    # `t` must either be in `grad_graph.outer_graph` or in the forward
+    # `cond_graph`.
+    if t.graph != grad_graph.outer_graph:
+      assert t.graph == cond_graph
+      # `internal_captures` are not treated as intermediates and hence not added
+      # to If op outputs. So we get the outer tensor corresponding to those
+      # from the list of `external_captures`.
+      try:
+        t = t.graph._if.outputs[t.graph.outputs.index(t)]
+      except ValueError:
+        index = t.graph.internal_captures.index(t)
+        t = t.graph.external_captures[index]
+
+      # Note: We rely on the capturing logic of the gradient If op graph to
+      # correctly capture the tensors in `cond_graph.outer_graph`. Both cond_v2
+      # and while_v2 handle this while building their gradient functions.
+      assert t.graph == cond_graph.outer_graph
+    new_inputs.append(t)
+
+  return new_inputs
+
+
+def _create_new_tf_function(func_graph):
+  """Converts func_graph to a TF_Function and adds it to the current graph.
+
+  Args:
+    func_graph: function.FuncGraph
+
+  Returns:
+    The name of the new TF_Function.
+  """
+  func = function._EagerDefinedFunction(
+      func_graph.name, func_graph, func_graph.inputs, func_graph.outputs, {})
+  func.add_to_graph(func_graph.outer_graph)
+  return func_graph.name
+
+
+def _get_intermediates(func_graph):
+  """Returns all tensors in `func_graph` that aren't inputs or outputs."""
+  intermediates = []
+  for op in func_graph.get_operations():
+    for t in op.outputs:
+      if t in func_graph.inputs: continue
+      if t in func_graph.outputs: continue
+      intermediates.append(t)
+  return intermediates
+
+
+def _separate_unique_inputs(true_inputs, false_inputs):
+  """Separates tensors appearing only in true_inputs or false_inputs, or both.
+
+  Args:
+    true_inputs: list of Tensors
+    false_inputs: list of Tensors
+
+  Returns:
+    Three lists of Tensors:
+      1. The tensors that appear in both true_inputs and false_inputs
+      2. The tensors that only appear in true_inputs
+      3. The tensors that only appear in false_inputs
+  """
+  true_inputs = set(true_inputs)
+  false_inputs = set(false_inputs)
+
+  shared_inputs = true_inputs.intersection(false_inputs)
+  true_only_inputs = true_inputs - false_inputs
+  false_only_inputs = false_inputs - true_inputs
+
+  return list(shared_inputs), list(true_only_inputs), list(false_only_inputs)
+
+
+def _pad_params(true_graph, false_graph, true_params, false_params):
+  """Returns new param lists that have matching signatures.
+
+  This is done by mirroring each param list in the other using dummy params.
+  There is no merging of params.
+
+  Args:
+    true_graph: function.FuncGraph
+    false_graph: function.FuncGraph
+    true_params: a list of Tensors from true_graph
+    false_params: a list of Tensors from false_graph
+
+  Returns:
+    A new list of Tensors in true_graph and a new list of Tensors in
+    false_graph. The two lists have the same number of Tensors, with matching
+    types and shapes across the lists.
+  """
+  new_true_params = (true_params +
+                     _create_dummy_params(true_graph, false_params))
+  new_false_inputs = (_create_dummy_params(false_graph, true_params)
+                      + false_params)
+  return new_true_params, new_false_inputs
+
+
+def _make_inputs_match(true_graph, false_graph, true_inputs, false_inputs):
+  """Modifies true_graph and false_graph so they have the same input signature.
+
+  This method reorders and/or adds parameters to true_graph and false_graph so
+  they have the same input signature, and updates the 'inputs' and 'captured'
+  fields of both graphs accordingly. It uses the input tensors from the outer
+  graph to avoid duplicating shared arguments.
+
+  Args:
+    true_graph: function.FuncGraph
+    false_graph: function.FuncGraph
+    true_inputs: a list of Tensors in the outer graph. The inputs for
+      true_graph.
+    false_inputs: a list of Tensors in the outer graph. The inputs for
+      false_graph.
+
+  Returns:
+    A new list of Tensors from the outer graph that are the new inputs for both
+    true_graph and false_graph. This is a deduped version of true_inputs +
+    false_inputs.
+  """
+  shared_inputs, true_only_inputs, false_only_inputs = _separate_unique_inputs(
+      true_inputs, false_inputs)
+
+  new_inputs = shared_inputs + true_only_inputs + false_only_inputs
+
+  true_input_to_param = dict(zip(true_inputs, true_graph.inputs))
+  false_input_to_param = dict(zip(false_inputs, false_graph.inputs))
+
+  true_graph.inputs = (
+      [true_input_to_param[t] for t in shared_inputs] +
+      [true_input_to_param[t] for t in true_only_inputs] +
+      _create_dummy_params(true_graph, false_only_inputs))
+
+  false_graph.inputs = (
+      [false_input_to_param[t] for t in shared_inputs] +
+      _create_dummy_params(false_graph, true_only_inputs) +
+      [false_input_to_param[t] for t in false_only_inputs])
+
+  # Rewrite the FuncGraphs' state to reflect the new inputs.
+  true_graph.captures = collections.OrderedDict(zip(new_inputs,
+                                                    true_graph.inputs))
+  false_graph.captures = collections.OrderedDict(zip(new_inputs,
+                                                     false_graph.inputs))
+
+  return new_inputs
+
+
+def _create_dummy_params(func_graph, template_tensors):
+  """Creates tensors in func_graph to represent template_tensors.
+
+  Args:
+    func_graph: function.FuncGraph.
+    template_tensors: a list of tensors in the outer graph.
+
+  Returns:
+    A list of tensors in func_graph.
+  """
+  with func_graph.as_default():
+    return [gen_functional_ops.fake_param(dtype=t.dtype, shape=t.shape)
+            for t in template_tensors]
+
+
+def _get_grad_fn_name(func_graph):
+  """Returns a unique name to use for the grad function of `func_graph`.
+
+  Ensures this name is unique in the entire hierarchy.
+
+  Args:
+    func_graph: The FuncGraph.
+
+  Returns:
+    A string, the name to use for the gradient function.
+  """
+  name = "%s_grad" % func_graph.name
+  outer_most_graph = func_graph
+  while isinstance(outer_most_graph, function.FuncGraph):
+    outer_most_graph = outer_most_graph.outer_graph
+  return outer_most_graph.unique_name(name)
+
+
+def _check_same_outputs(true_graph, false_graph):
+  """Raises an error if true_graph and false_graph have different outputs."""
+  true_output_types = [t.dtype for t in true_graph.outputs]
+  false_output_types = [t.dtype for t in false_graph.outputs]
+  if (len(true_graph.outputs) != len(false_graph.outputs) or
+      true_output_types != false_output_types):
+    raise ValueError(
+        "true_fn() and false_fn() must return the same number and type of "
+        "arguments, got:\n"
+        "  true_fn: %s\n"
+        "  false_fn: %s" % (true_output_types, false_output_types))
+
+
+def _get_output_shapes(true_graph_outputs, false_graph_outputs):
+  output_shapes = [
+      t_out.shape.most_specific_compatible_shape(f_out.shape)
+      for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
+  ]
+  return output_shapes
diff --git a/tensorflow/python/ops/cond_v2_impl.py b/tensorflow/python/ops/cond_v2_impl.py
deleted file mode 100644
index 81d9cba042..0000000000
--- a/tensorflow/python/ops/cond_v2_impl.py
+++ /dev/null
@@ -1,497 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""cond_v2 and gradient.
-
-This is a version of cond that emits a single If op, as well as the gradient
-function for If ops produced by cond_v2. This will eventually replace the
-current tf.cond implementation once it reaches feature and performance parity.
-
-NOTE: most users of cond_v2 should import cond_v2, not this module! This module
-does not contain all the necessary imports to prevent circular dependencies,
-while cond_v2 does.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import gen_functional_ops
-
-
-# The following modules cannot be imported directly because they cause circular
-# dependencies. These are set in each corresponding module.
-_function = None
-_function_def_to_graph = None
-_gradients_impl = None
-
-# NOTE(skyewm): TensorFlow uses protected class methods and fields to signify
-# that they aren't part of the official public API. These protected members
-# often need to be used by implementation code however. Rather than litter the
-# code with pylint comments, we ignore protected access violations for
-# readability.
-# pylint: disable=protected-access
-
-
-def cond_v2(pred, true_fn, false_fn, name="cond"):
-  """Like tf.cond, except emits a single If op."""
-  if isinstance(pred, bool):
-    raise TypeError("pred must not be a Python bool", pred)
-
-  if not name:
-    name = "cond"
-
-  with ops.name_scope(name) as scope:
-    with ops.name_scope(None):
-      # Find the outer most graph for uniquing function names.
-      # TODO(jpienaar): Make this work in eager mode.
-      graph = ops.get_default_graph()
-      while isinstance(graph, _function.FuncGraph):
-        graph = graph.outer_graph
-
-      true_name = graph.unique_name(("%strue" % scope).replace("/", "_"))
-      false_name = graph.unique_name(("%sfalse" % scope).replace("/", "_"))
-
-    true_graph = _function.func_graph_from_py_func(
-        true_name, true_fn, [], {})
-    false_graph = _function.func_graph_from_py_func(
-        false_name, false_fn, [], {})
-    _check_same_outputs(true_graph, false_graph)
-
-    # Add inputs to true_graph and false_graph to make them match. Note that
-    # this modifies true_graph and false_graph.
-    cond_inputs = _make_inputs_match(true_graph, false_graph,
-                                     true_graph.external_captures,
-                                     false_graph.external_captures)
-
-    # Add all intermediate tensors as function outputs so they're available for
-    # the gradient computation.
-
-    true_intermediates = _get_intermediates(true_graph)
-    false_intermediates = _get_intermediates(false_graph)
-
-    # Save the original number of outputs to return to the caller.
-    num_cond_outputs = len(true_graph.outputs)
-
-    # Make the number/type of new intermediate outputs match.
-    extra_true_outputs, extra_false_outputs = _pad_params(
-        true_graph, false_graph, true_intermediates, false_intermediates)
-
-    true_graph.outputs.extend(extra_true_outputs)
-    false_graph.outputs.extend(extra_false_outputs)
-
-    # Create the If op.
-    tensors = gen_functional_ops._if(  # pylint: disable=protected-access
-        pred,
-        cond_inputs, [t.dtype for t in true_graph.outputs],
-        _create_new_tf_function(true_graph),
-        _create_new_tf_function(false_graph),
-        output_shapes=_get_output_shapes(true_graph.outputs,
-                                         false_graph.outputs),
-        name=scope)
-
-    # Set the flag to enable lowering on the `if` op if necessary
-    # Lowering allows cond_v2 to avoid some of the limitations of Functions,
-    # allowing users to specify devices & colocation inside of cond_v2 branches,
-    # and enabling non-strict evaluation & partial pruning of cond_v2 branches.
-    # This brings cond_v2 closer to feature parity with tf.cond.
-    #
-    # However, we do not lower `If` in the XLA context because it is easier for
-    # XLA to apply its own optimizations when dealing with un-lowered `If`
-    # operators than with lowered switch/merge control flow.
-    #
-    # TODO(b/110167197) this approach requires cond_v2 to have at least 1 output
-    if_op = tensors[0].op
-    if not control_flow_util.IsInXLAContext(if_op):
-      # pylint: disable=protected-access
-      if_op._set_attr("_lower_using_switch_merge",
-                      attr_value_pb2.AttrValue(b=True))
-      # pylint: enable=protected-access
-
-    result = tuple(tensors[:num_cond_outputs])
-    if len(result) == 1:
-      return result[0]
-    else:
-      return result
-
-
-@ops.RegisterGradient("If")
-def _IfGrad(op, *grads):  # pylint: disable=invalid-name
-  """The gradient of an If op produced by cond_v2."""
-  true_graph, false_graph = _get_func_graphs(op)
-  # Note: op.graph != ops.get_default_graph() when we are computing the gradient
-  # of a nested cond.
-  assert true_graph.outer_graph == op.graph
-  assert false_graph.outer_graph == op.graph
-
-  # Create grad functions that compute the gradient of the true/false forward
-  # graphs. These functions will capture tensors from the forward pass
-  # functions.
-  true_grad_graph = _create_grad_func(
-      true_graph, grads, _get_grad_fn_name(true_graph))
-  false_grad_graph = _create_grad_func(
-      false_graph, grads, _get_grad_fn_name(false_graph))
-
-  assert ([t.dtype for t in true_grad_graph.outputs] ==
-          [t.dtype for t in false_grad_graph.outputs])
-
-  # Resolve references to forward graph tensors in grad graphs and ensure
-  # they are in-scope, i.e., belong to one of outer graphs of the grad graph.
-  true_grad_inputs = _resolve_grad_inputs(true_graph, true_grad_graph)
-  false_grad_inputs = _resolve_grad_inputs(false_graph, false_grad_graph)
-
-  # Make the inputs to true_grad_graph and false_grad_graph match. Note that
-  # this modifies true_grad_graph and false_grad_graph.
-  grad_inputs = _make_inputs_match(true_grad_graph, false_grad_graph,
-                                   true_grad_inputs, false_grad_inputs)
-
-  # Add all intermediate tensors as function outputs so they're available for
-  # higher-order gradient computations.
-
-  true_grad_intermediates = _get_intermediates(true_grad_graph)
-  false_grad_intermediates = _get_intermediates(false_grad_graph)
-
-  # Save the original number of gradient outputs to return.
-  num_grad_outputs = len(true_grad_graph.outputs)
-
-  # Make the number/type of new intermediate outputs match.
-  extra_true_grad_outputs, extra_false_grad_outputs = _pad_params(
-      true_grad_graph, false_grad_graph,
-      true_grad_intermediates, false_grad_intermediates)
-
-  true_grad_graph.outputs.extend(extra_true_grad_outputs)
-  false_grad_graph.outputs.extend(extra_false_grad_outputs)
-
-  # Create the gradient If op.
-  tensors = gen_functional_ops._if(
-      op.inputs[0],
-      grad_inputs, [t.dtype for t in true_grad_graph.outputs],
-      _create_new_tf_function(true_grad_graph),
-      _create_new_tf_function(false_grad_graph),
-      output_shapes=_get_output_shapes(true_grad_graph.outputs,
-                                       false_grad_graph.outputs))
-
-  # The predicate has no gradient.
-  return [None] + tensors[:num_grad_outputs]
-
-
-def _get_func_graphs(if_op):
-  """Returns `FuncGraph`s for the input op branches.
-
-  Args:
-    if_op: The _If Operation.
-
-  Returns:
-    A 2-tuple of the `FuncGraph`s of the then_branch and else_branch.
-  """
-  def _get_func_graph_for_branch(branch_name):
-    """Generates and returns a FuncGraph for the given branch."""
-    inputs = if_op.inputs[1:]  # First input is pred.
-    input_shapes = [t.shape for t in inputs]
-    func_name = if_op.get_attr(branch_name).name
-    fdef = if_op.graph._get_function(func_name).definition
-    # `if_op.graph` may not be the same as `ops.get_default_graph()` e.g.
-    # in the case of nested if ops or when the gradient is being computed
-    # from inside a Defun. We build the `func_graph` with `if_op.graph` as its
-    # `outer_graph`. This resembles how the `FuncGraph` was built in the
-    # forward pass. We need this so that we can resolve references to tensors
-    # in `func_graph` from its gradient graph in `_resolve_grad_inputs`.
-    with if_op.graph.as_default():
-      func_graph = _function_def_to_graph.function_def_to_graph(
-          fdef, input_shapes)
-    func_graph.captures = collections.OrderedDict(zip(inputs,
-                                                      func_graph.inputs))
-    # Set the if op so that the gradient code can use it.
-    func_graph._if = if_op
-    return func_graph
-
-  return (_get_func_graph_for_branch("then_branch"),
-          _get_func_graph_for_branch("else_branch"))
-
-
-def _grad_fn(func_graph, grads):
-  """The gradient function for each conditional branch.
-
-  This function builds the gradient graph of the corresponding forward-pass
-  conditional branch in `func_graph`. This is done by differentiating
-  func_graph's outputs w.r.t. its inputs.
-
-  Args:
-    func_graph: function.FuncGraph. The corresponding forward-pass function.
-    grads: The list of input gradient Tensors.
-
-  Returns:
-    The output gradient Tensors.
-  """
-  # Filter out untrainable function outputs.
-  # NOTE(skyewm): If we don't do this, the untrainable tensors can sometimes
-  # cause _GradientsHelper to raise an exception (e.g. the implementation
-  # doesn't expect 'ys' to contain boolean tensors).
-  assert len(func_graph.outputs) == len(grads)
-  ys = []
-  grad_ys = []
-  for y, grad_y in zip(func_graph.outputs, grads):
-    if not _gradients_impl._IsTrainable(y):
-      continue
-    ys.append(y)
-    grad_ys.append(grad_y)
-
-  # Build the gradient graph. Note that this builds the gradient computation of
-  # func_graph in the current graph, which requires capturing tensors from
-  # func_graph. The captured func_graph tensors are resolved to external tensors
-  # in _resolve_grad_inputs.
-  result = _gradients_impl._GradientsHelper(
-      ys, func_graph.inputs, grad_ys=grad_ys,
-      src_graph=func_graph)
-
-  # Functions can't return None; replace Nones with zero tensors.
-  # TODO(b/80444525): don't return anything here and make _IfGrad return None if
-  # both branches have zero gradient.
-  for i in range(len(result)):
-    if result[i] is None:
-      result[i] = array_ops.zeros_like(func_graph.inputs[i])
-
-  return result
-
-
-def _create_grad_func(func_graph, grads, name):
-  """Returns the FuncGraph representation of _grad_fn."""
-  return _function.func_graph_from_py_func(
-      name, lambda: _grad_fn(func_graph, grads), [], {})
-
-
-def _resolve_grad_inputs(cond_graph, grad_graph):
-  """Returns the tensors to pass as inputs to `grad_graph`.
-
-  The `grad_graph` may have external references to
-  1. Its outer graph containing the input gradients. These references are kept
-     as is.
-  2. Tensors in the forward pass graph. These tensors may not be "live"
-     when the gradient is being computed. We replace such references by their
-     corresponding tensor in `cond_graph.outer_graph`. In the case of nested
-     control flow or functions, the gradient logic handling
-     `grad_graph.outer_graph` will make sure the tensor from
-     `cond_graph.outer_graph` is also correctly captured.
-
-  Args:
-    cond_graph: function.FuncGraph. The forward-pass function.
-    grad_graph: function.FuncGraph. The gradients function.
-
-  Returns:
-    A list of inputs tensors to be passed to grad_graph.
-  """
-  new_inputs = []
-
-  for t in grad_graph.external_captures:
-    # `t` must either be in `grad_graph.outer_graph` or in the forward
-    # `cond_graph`.
-    if t.graph != grad_graph.outer_graph:
-      assert t.graph == cond_graph
-      # `internal_captures` are not treated as intermediates and hence not added
-      # to If op outputs. So we get the outer tensor corresponding to those
-      # from the list of `external_captures`.
-      try:
-        t = t.graph._if.outputs[t.graph.outputs.index(t)]
-      except ValueError:
-        index = t.graph.internal_captures.index(t)
-        t = t.graph.external_captures[index]
-
-      # Note: We rely on the capturing logic of the gradient If op graph to
-      # correctly capture the tensors in `cond_graph.outer_graph`. Both cond_v2
-      # and while_v2 handle this while building their gradient functions.
-      assert t.graph == cond_graph.outer_graph
-    new_inputs.append(t)
-
-  return new_inputs
-
-
-def _create_new_tf_function(func_graph):
-  """Converts func_graph to a TF_Function and adds it to the current graph.
-
-  Args:
-    func_graph: function.FuncGraph
-
-  Returns:
-    The name of the new TF_Function.
-  """
-  func = _function._EagerDefinedFunction(
-      func_graph.name, func_graph, func_graph.inputs, func_graph.outputs, {})
-  func.add_to_graph(func_graph.outer_graph)
-  return func_graph.name
-
-
-def _get_intermediates(func_graph):
-  """Returns all tensors in `func_graph` that aren't inputs or outputs."""
-  intermediates = []
-  for op in func_graph.get_operations():
-    for t in op.outputs:
-      if t in func_graph.inputs: continue
-      if t in func_graph.outputs: continue
-      intermediates.append(t)
-  return intermediates
-
-
-def _separate_unique_inputs(true_inputs, false_inputs):
-  """Separates tensors appearing only in true_inputs or false_inputs, or both.
-
-  Args:
-    true_inputs: list of Tensors
-    false_inputs: list of Tensors
-
-  Returns:
-    Three lists of Tensors:
-      1. The tensors that appear in both true_inputs and false_inputs
-      2. The tensors that only appear in true_inputs
-      3. The tensors that only appear in false_inputs
-  """
-  true_inputs = set(true_inputs)
-  false_inputs = set(false_inputs)
-
-  shared_inputs = true_inputs.intersection(false_inputs)
-  true_only_inputs = true_inputs - false_inputs
-  false_only_inputs = false_inputs - true_inputs
-
-  return list(shared_inputs), list(true_only_inputs), list(false_only_inputs)
-
-
-def _pad_params(true_graph, false_graph, true_params, false_params):
-  """Returns new param lists that have matching signatures.
-
-  This is done by mirroring each param list in the other using dummy params.
-  There is no merging of params.
-
-  Args:
-    true_graph: function.FuncGraph
-    false_graph: function.FuncGraph
-    true_params: a list of Tensors from true_graph
-    false_params: a list of Tensors from false_graph
-
-  Returns:
-    A new list of Tensors in true_graph and a new list of Tensors in
-    false_graph. The two lists have the same number of Tensors, with matching
-    types and shapes across the lists.
-  """
-  new_true_params = (true_params +
-                     _create_dummy_params(true_graph, false_params))
-  new_false_inputs = (_create_dummy_params(false_graph, true_params)
-                      + false_params)
-  return new_true_params, new_false_inputs
-
-
-def _make_inputs_match(true_graph, false_graph, true_inputs, false_inputs):
-  """Modifies true_graph and false_graph so they have the same input signature.
-
-  This method reorders and/or adds parameters to true_graph and false_graph so
-  they have the same input signature, and updates the 'inputs' and 'captured'
-  fields of both graphs accordingly. It uses the input tensors from the outer
-  graph to avoid duplicating shared arguments.
-
-  Args:
-    true_graph: function.FuncGraph
-    false_graph: function.FuncGraph
-    true_inputs: a list of Tensors in the outer graph. The inputs for
-      true_graph.
-    false_inputs: a list of Tensors in the outer graph. The inputs for
-      false_graph.
-
-  Returns:
-    A new list of Tensors from the outer graph that are the new inputs for both
-    true_graph and false_graph. This is a deduped version of true_inputs +
-    false_inputs.
-  """
-  shared_inputs, true_only_inputs, false_only_inputs = _separate_unique_inputs(
-      true_inputs, false_inputs)
-
-  new_inputs = shared_inputs + true_only_inputs + false_only_inputs
-
-  true_input_to_param = dict(zip(true_inputs, true_graph.inputs))
-  false_input_to_param = dict(zip(false_inputs, false_graph.inputs))
-
-  true_graph.inputs = (
-      [true_input_to_param[t] for t in shared_inputs] +
-      [true_input_to_param[t] for t in true_only_inputs] +
-      _create_dummy_params(true_graph, false_only_inputs))
-
-  false_graph.inputs = (
-      [false_input_to_param[t] for t in shared_inputs] +
-      _create_dummy_params(false_graph, true_only_inputs) +
-      [false_input_to_param[t] for t in false_only_inputs])
-
-  # Rewrite the FuncGraphs' state to reflect the new inputs.
-  true_graph.captures = collections.OrderedDict(zip(new_inputs,
-                                                    true_graph.inputs))
-  false_graph.captures = collections.OrderedDict(zip(new_inputs,
-                                                     false_graph.inputs))
-
-  return new_inputs
-
-
-def _create_dummy_params(func_graph, template_tensors):
-  """Creates tensors in func_graph to represent template_tensors.
-
-  Args:
-    func_graph: function.FuncGraph.
-    template_tensors: a list of tensors in the outer graph.
-
-  Returns:
-    A list of tensors in func_graph.
-  """
-  with func_graph.as_default():
-    return [gen_functional_ops.fake_param(dtype=t.dtype, shape=t.shape)
-            for t in template_tensors]
-
-
-def _get_grad_fn_name(func_graph):
-  """Returns a unique name to use for the grad function of `func_graph`.
-
-  Ensures this name is unique in the entire hierarchy.
-
-  Args:
-    func_graph: The FuncGraph.
-
-  Returns:
-    A string, the name to use for the gradient function.
-  """
-  name = "%s_grad" % func_graph.name
-  outer_most_graph = func_graph
-  while isinstance(outer_most_graph, _function.FuncGraph):
-    outer_most_graph = outer_most_graph.outer_graph
-  return outer_most_graph.unique_name(name)
-
-
-def _check_same_outputs(true_graph, false_graph):
-  """Raises an error if true_graph and false_graph have different outputs."""
-  true_output_types = [t.dtype for t in true_graph.outputs]
-  false_output_types = [t.dtype for t in false_graph.outputs]
-  if (len(true_graph.outputs) != len(false_graph.outputs) or
-      true_output_types != false_output_types):
-    raise ValueError(
-        "true_fn() and false_fn() must return the same number and type of "
-        "arguments, got:\n"
-        "  true_fn: %s\n"
-        "  false_fn: %s" % (true_output_types, false_output_types))
-
-
-def _get_output_shapes(true_graph_outputs, false_graph_outputs):
-  output_shapes = [
-      t_out.shape.most_specific_compatible_shape(f_out.shape)
-      for t_out, f_out in zip(true_graph_outputs, false_graph_outputs)
-  ]
-  return output_shapes
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 5bc217d355..56a4d4b768 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -40,7 +40,6 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_util as util
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gen_control_flow_ops
@@ -58,18 +57,21 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_should_use
+from tensorflow.python.util.lazy_loader import LazyLoader
 from tensorflow.python.util.tf_export import tf_export
 
-# The while_v2 module.
-_while_v2 = None
+# This is to avoid a circular dependency:
+# cond_v2 -> gradients_impl -> control_flow_ops
+cond_v2 = LazyLoader(
+    "cond_v2", globals(), "tensorflow.python.ops.cond_v2")
+
+# This is to avoid circular dependencies:
+# while_v2 -> control_flow_ops
+# while_v2 -> gradients_impl -> control_flow_ops
+while_v2 = LazyLoader(
+    "while_v2", globals(), "tensorflow.python.ops.while_v2")
 
 ENABLE_COND_V2 = os.getenv("TF_ENABLE_COND_V2", "0") != "0"
-# Note: Setting this to True is not sufficient to switch to the v2 while_loop.
-# Users must also import the while_v2 module to set the _while_v2 module
-# variable above. We do this to avoid a circular dependency:
-# control_flow_ops -> while_v2 -> gradients_impl -> control_flow_ops
-# A ValueError is raised in tf.while_loop if this is set to True and the
-# `_while_v2` module is not set.
 ENABLE_WHILE_V2 = os.getenv("TF_ENABLE_WHILE_V2", "0") != "0"
 
 
@@ -2040,7 +2042,7 @@ def cond(pred,
 
   """
   if ENABLE_COND_V2 and not context.executing_eagerly():
-    return cond_v2_impl.cond_v2(pred, true_fn, false_fn, name)
+    return cond_v2.cond_v2(pred, true_fn, false_fn, name)
 
   # We needed to make true_fn/false_fn keyword arguments for
   # backwards-compatibility. This check exists so that we can convert back to
@@ -3224,11 +3226,7 @@ def while_loop(cond,
 
   """
   if ENABLE_WHILE_V2 and not context.executing_eagerly():
-    if not _while_v2:
-      raise ValueError("The while_v2 module is not set. Did you forget to "
-                       "import tensorflow.python.ops."
-                       "while_v2?")
-    return _while_v2.while_loop(
+    return while_v2.while_loop(
         cond, body, loop_vars, shape_invariants=shape_invariants, name=name)
 
   with ops.name_scope(name, "while", loop_vars):
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index 6909fcaed5..c58ecf4147 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import collections
 import contextlib
 import enum  # pylint: disable=g-bad-import-order
-import sys
 import warnings
 
 import numpy as np
@@ -39,7 +38,6 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops  # pylint: disable=unused-import
-from tensorflow.python.ops import cond_v2_impl
 from tensorflow.python.ops import control_flow_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
@@ -63,9 +61,6 @@ from tensorflow.python.util.tf_export import tf_export
 # gradients_impl). This is set in eager/function.py.
 _function = None
 
-# This is to avoid a circular dependency with cond_v2_impl.
-cond_v2_impl._gradients_impl = sys.modules[__name__]  # pylint: disable=protected-access
-
 # Warn the user if we convert a sparse representation to dense with at
 # least this number of elements.
 _LARGE_SPARSE_NUM_ELEMENTS = 100000000
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 0419656143..b805a46583 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -24,7 +24,6 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
-import sys
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.eager import function
@@ -34,7 +33,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import cond_v2_impl as cond_v2
+from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import custom_gradient
@@ -45,8 +44,6 @@ from tensorflow.python.util import nest
 
 # pylint: disable=protected-access
 
-control_flow_ops._while_v2 = sys.modules[__name__]
-
 # TODO(b/79881896): Handle external control dependencies. tf.while_loop allows
 # control dependencies on external nodes with at least 1 output.
 # Another idea is to create const nodes outside the loop and add control edges
-- 
GitLab


From 427b6d2dfdc15c469e9805cb57eb97aba468fa7f Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Fri, 12 Oct 2018 11:59:34 -0700
Subject: [PATCH 0890/1085] Fix a typo in xla.estimator_model_fn docstring

PiperOrigin-RevId: 216898670
---
 tensorflow/contrib/compiler/xla.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 873b03580d..28d7d653ee 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -606,8 +606,8 @@ class _ModelFnWrapper(object):
 def estimator_model_fn(target_model_fn=None):
   """estimator_model_fn decorates a model_fn to be compiled for execution.
 
-  Currently only it only works with `TPUEstimator`. If you need to use it with
-  base `Estimator`, please add `tf.enable_resource_variables()` at beginning of
+  Currently it only works with `TPUEstimator`. If you need to use it with base
+  `Estimator`, please add `tf.enable_resource_variables()` at the beginning of
   your program.
 
   Example 1, decorating model_fn:
-- 
GitLab


From a98b34414f125db1edc97bd7e62cf93701ecdf73 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 12 Oct 2018 12:01:44 -0700
Subject: [PATCH 0891/1085] Automated rollback of commit
 7294f306c0e144db5340b713c903c45e79105e5e

PiperOrigin-RevId: 216898976
---
 tensorflow/core/framework/op_kernel.cc | 53 +-------------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index f21821f116..3e34bf0418 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 
-#include <mutex>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -39,7 +38,6 @@ limitations under the License.
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
@@ -926,50 +924,12 @@ struct KernelRegistration {
 // KernelDef.
 typedef std::unordered_multimap<string, KernelRegistration> KernelRegistry;
 
-#if defined(_WIN32)
-static const char kKernelLibPattern[] = "libtfkernel*.dll";
-#elif defined(__APPLE__)
-static const char kKernelLibPattern[] = "libtfkernel*.dylib";
-#else
-static const char kKernelLibPattern[] = "libtfkernel*.so";
-#endif
-
-void LoadDynamicKernelsInternal() {
-  Env* env = Env::Default();
-  string bazel_kernel_dir = io::JoinPath(env->GetRunfilesDir(),
-                                         "tensorflow",
-                                         "core",
-                                         "kernels");
-  std::vector<string> files;
-  Status s_kernel_dir = env->GetChildren(bazel_kernel_dir, &files);
-  if (s_kernel_dir.ok()) {
-    string dll_spec = io::JoinPath(bazel_kernel_dir, kKernelLibPattern);
-    for (const auto&  file : files) {
-      string fullpath =  io::JoinPath(bazel_kernel_dir, file);
-      if (env->MatchPath(fullpath, dll_spec)) {
-        // TODO(gunan): Store the handles to the opened files.
-        void* unused_filehandle;
-        TF_CHECK_OK(env->LoadLibrary(fullpath.c_str(), &unused_filehandle));
-      }
-    }
-  }
-}
-
-// Mechanism for loading existing kernel libraries.
-void LoadDynamicKernels() {
-  // TODO(gunan): As more features are available, add intelligent kernel
-  // selection, and dropping unsuitable kernel logic here.
-  static std::once_flag dll_loader_flag;
-  std::call_once(dll_loader_flag, LoadDynamicKernelsInternal);
-}
-
 void* GlobalKernelRegistry() {
   static KernelRegistry* global_kernel_registry = new KernelRegistry;
   return global_kernel_registry;
 }
 
 static KernelRegistry* GlobalKernelRegistryTyped() {
-  LoadDynamicKernels();
   return reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry());
 }
 
@@ -989,17 +949,8 @@ void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
     const string key =
         Key(kernel_def->op(), DeviceType(kernel_def->device_type()),
             kernel_def->label());
-
-    // To avoid calling LoadDynamicKernels DO NOT CALL GlobalKernelRegistryTyped
-    // here.
-    // InitInternal gets called by static initializers, so it ends up executing
-    // before main. This causes LoadKernelLibraries function to get called
-    // before some file libraries can initialize, which in turn crashes the
-    // program flakily. Until we get rid of static initializers in kernel
-    // registration mechanism, we have this workaround here.
-    reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry())
-        ->insert(std::make_pair(
-            key, KernelRegistration(*kernel_def, kernel_class_name, factory)));
+    GlobalKernelRegistryTyped()->insert(std::make_pair(
+        key, KernelRegistration(*kernel_def, kernel_class_name, factory)));
   }
   delete kernel_def;
 }
-- 
GitLab


From c665c2669974167672f9dd33d51ff9490d3042cf Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 12 Oct 2018 12:01:49 -0700
Subject: [PATCH 0892/1085] [tf.data vectorization] Handle negative "axis" attr
 values for Unpack vectorizer

PiperOrigin-RevId: 216898989
---
 .../data/vectorization/unpack_vectorizer.cc   | 19 +++++++++++++++----
 .../optimization/map_vectorization_test.py    |  5 ++++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
index f1ba741821..13b8500eda 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/unpack_vectorizer.cc
@@ -35,15 +35,26 @@ class UnpackVectorizer : public Vectorizer {
     auto new_unpack_node = outer_scope->AddNode(node.def(), &s);
     TF_RETURN_IF_ERROR(s);
 
-    // Increment "axis" attr by 1:
-    int new_axis = node.def().attr().at("axis").i() + 1;
-    new_unpack_node->AddAttr("axis", new_axis);
+    int axis = 0;
+    if (HasNodeAttr(node.def(), "axis")) {
+      TF_RETURN_IF_ERROR(GetNodeAttr(node.attrs(), "axis", &axis));
+    }
+
+    if (axis >= 0) {
+      // Since the vectorized input has an extra leading dimension, we need
+      // to increment `axis` attr by 1 for non-negative axis values.
+      // Note: negative axis values wrap around.
+      axis += 1;
+    }
+    new_unpack_node->AddAttr("axis", axis);
 
     outer_scope->AddEdge(inputs[0].node, inputs[0].output_index,
                          new_unpack_node, 0);
 
+    int num;
+    TF_RETURN_IF_ERROR(GetNodeAttr(node.attrs(), "num", &num));
+
     // Add the output mappings
-    int num = node.def().attr().at("num").i();
     for (int i = 0; i < num; ++i) {
       outputs->push_back({new_unpack_node, i, true});
     }
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index bcceab60bb..eb24fa6462 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -51,8 +51,11 @@ def _generate_optimization_test_cases():
       ("Add", lambda x: x + rand_val, base_dataset_factory),
       ("Cast", lambda x: math_ops.cast(x, dtypes.float64),
        base_dataset_factory),
-      ("Reshape", lambda x: array_ops.gather(x, 0), base_dataset_factory),
+      ("Reshape", lambda x: array_ops.reshape(x, (-1, 30)),
+       base_dataset_factory),
       ("Unpack", array_ops.unstack, base_dataset_factory),
+      ("UnpackNegativeAxis", lambda x: array_ops.unstack(x, axis=-1),
+       base_dataset_factory),
   ]
 
   return [{
-- 
GitLab


From b4018576d536c67b67e2bfdc0044941b6c4f3dd2 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Fri, 12 Oct 2018 12:05:27 -0700
Subject: [PATCH 0893/1085] Internal change.

PiperOrigin-RevId: 216899775
---
 tensorflow/contrib/lite/build_def.bzl         |  9 ++---
 tensorflow/contrib/lite/testing/BUILD         | 29 ----------------
 .../contrib/lite/testing/model_coverage/BUILD | 33 +++++++++++++++++++
 .../model_coverage/model_coverage_lib_test.py | 11 ++-----
 4 files changed, 41 insertions(+), 41 deletions(-)
 create mode 100644 tensorflow/contrib/lite/testing/model_coverage/BUILD

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index f962a138f7..e62c192dfc 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -421,14 +421,14 @@ def gen_selected_ops(name, model):
         tools = [tool],
     )
 
-def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
+def gen_full_model_test(conversion_modes, models, data, tags):
     """Generates Python test targets for testing TFLite models.
 
     Args:
       conversion_modes: List of conversion modes to test the models on.
       models: List of models to test.
       data: List of BUILD targets linking the data.
-      test_suite_tag: Tag identifying the model test suite.
+      tags: Any additional tags including the test_suite tag.
     """
     options = [
         (conversion_mode, model)
@@ -451,10 +451,11 @@ def gen_full_model_test(conversion_modes, models, data, test_suite_tag):
                 "no_oss",
                 "no_windows",
                 "notap",
+                # TODO(nupurgarg): Remove manual tag when this test is running without the BUILD flag.
                 "manual",
-            ] + [test_suite_tag],
+            ] + tags,
             deps = [
-                "//tensorflow/contrib/lite/testing:model_coverage_lib",
+                "//tensorflow/contrib/lite/testing/model_coverage:model_coverage_lib",
                 "//tensorflow/contrib/lite/python:lite",
                 "//tensorflow/python:client_testlib",
             ],
diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD
index 8f45d05ce3..891d44d2b6 100644
--- a/tensorflow/contrib/lite/testing/BUILD
+++ b/tensorflow/contrib/lite/testing/BUILD
@@ -387,33 +387,4 @@ cc_binary(
     ],
 )
 
-py_binary(
-    name = "model_coverage_lib",
-    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib.py"],
-    srcs_version = "PY2AND3",
-    tags = ["no_pip"],
-    visibility = ["//tensorflow/contrib/lite:__subpackages__"],
-    deps = [
-        "//tensorflow/contrib/lite/python:lite",
-        "//tensorflow/python:platform",
-    ],
-)
-
-py_test(
-    name = "model_coverage_lib_test",
-    srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",
-        "no_pip",
-        "no_windows",
-        "notap",
-    ],
-    deps = [
-        ":model_coverage_lib",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
 tflite_portable_test_suite()
diff --git a/tensorflow/contrib/lite/testing/model_coverage/BUILD b/tensorflow/contrib/lite/testing/model_coverage/BUILD
new file mode 100644
index 0000000000..c8359bab06
--- /dev/null
+++ b/tensorflow/contrib/lite/testing/model_coverage/BUILD
@@ -0,0 +1,33 @@
+package(default_visibility = [
+    "//tensorflow/contrib/lite:__subpackages__",
+])
+
+licenses(["notice"])  # Apache 2.0
+
+py_binary(
+    name = "model_coverage_lib",
+    srcs = ["model_coverage_lib.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/contrib/lite/python:lite",
+        "//tensorflow/python:platform",
+    ],
+)
+
+py_test(
+    name = "model_coverage_lib_test",
+    srcs = ["model_coverage_lib_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "manual",
+        "no_oss",
+        "no_pip",
+        "no_windows",
+        "notap",
+    ],
+    deps = [
+        ":model_coverage_lib",
+        "//tensorflow/python:client_testlib",
+    ],
+)
diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
index e07202b1a6..6989a9ef01 100644
--- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -86,22 +86,17 @@ class EvaluateFrozenGraph(test.TestCase):
 
   def testQuantized(self):
     filename = self._getQuantizedModel()
-    model_coverage.test_frozen_graph_quant(filename, ['inputA', 'inputB'],
-                                           ['output'])
+    model_coverage.test_frozen_graph_quant(filename, ['inputA'], ['output'])
 
   def testQuantizedInputShapes(self):
     filename = self._getQuantizedModel()
     model_coverage.test_frozen_graph_quant(
-        filename, ['inputA', 'inputB'], ['output'],
-        input_shapes={
-            'inputA': [33, 33],
-            'inputB': [33, 33],
-        })
+        filename, ['inputA'], ['output'], input_shapes={'inputA': [33, 33]})
 
   def testQuantizedFlexAll(self):
     filename = self._getQuantizedModel()
     model_coverage.test_frozen_graph_quant(
-        filename, ['inputA', 'inputB'], ['output'],
+        filename, ['inputA'], ['output'],
         converter_mode=lite.ConverterMode.TOCO_FLEX_ALL)
 
 
-- 
GitLab


From daf58076af9d28ca2e7e618c6e4e9a695ecc6975 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 12:32:23 -0700
Subject: [PATCH 0894/1085] Automated rollback of commit
 14f86407043fec62890fa00c246f5968b12a15f1

PiperOrigin-RevId: 216903862
---
 tensorflow/contrib/lite/toco/tflite/BUILD     |  4 ---
 tensorflow/contrib/lite/toco/tflite/export.cc |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 27 -------------------
 .../contrib/lite/toco/tflite/operator.h       |  5 ----
 .../contrib/lite/toco/tflite/operator_test.cc |  6 -----
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index a0450f3ec1..71cdb7703e 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -42,7 +42,6 @@ tf_cc_test(
     deps = [
         ":operator",
         "//tensorflow/contrib/lite/toco:tooling_util",
-        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
@@ -72,7 +71,6 @@ tf_cc_test(
     tags = ["no_oss"],
     deps = [
         ":types",
-        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -108,7 +106,6 @@ tf_cc_test(
     deps = [
         ":export",
         "//tensorflow/contrib/lite/schema:schema_fbs",
-        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -144,7 +141,6 @@ tf_cc_test(
         ":import",
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite/schema:schema_fbs",
-        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
     ],
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 02577b317a..3b34cd6285 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -126,7 +126,7 @@ OperatorKey GetOperatorKey(
 
     // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (ShouldExportAsFlexOp(allow_flex_ops, unsupported_op.tensorflow_op)) {
+    if (allow_flex_ops) {
       key.is_flex_op = true;
       key.flex_tensorflow_op = tensorflow_op;
       key.custom_code =
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 1ee71d4341..e08a61d357 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -23,8 +23,6 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/tflite/types.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace toco {
@@ -1260,16 +1258,6 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
-    if (ShouldExportAsFlexOp(allow_flex_ops_, node_def.op())) {
-      fbb->Vector([&]() {
-        fbb->String(node_def.op());
-        fbb->String(op.tensorflow_node_def);
-      });
-      fbb->Finish();
-      LOG(INFO) << "Writing flex op: " << node_def.op();
-      return std::unique_ptr<flexbuffers::Builder>(fbb.release());
-    }
-
     bool has_valid_attr = false;
     size_t map_start = fbb->StartMap();
     for (const auto& pair : node_def.attr()) {
@@ -1600,21 +1588,6 @@ std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
   return result;
 }
 
-bool ShouldExportAsFlexOp(bool allow_flex_ops,
-                          const string& tensorflow_op_name) {
-  // If Flex ops aren't allow at all, simply return false.
-  if (!allow_flex_ops) {
-    return false;
-  }
-  // Check if we can find the `OpDef` for the TensorFlow op. If we can find
-  // it, export the op as an Flex op. Otherwise, export it as a regular custom
-  // op.
-  const tensorflow::OpDef* op_def = nullptr;
-  return tensorflow::OpRegistry::Global()
-      ->LookUpOpDef(tensorflow_op_name, &op_def)
-      .ok();
-}
-
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 6e2a41bf53..6e4e0a16d1 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -113,11 +113,6 @@ class BaseOperator {
   OperatorType type_;
 };
 
-// Helper function to determine if a unsupported TensorFlow op should be
-// exported as an Flex op or a regular custom op.
-bool ShouldExportAsFlexOp(bool allow_flex_ops,
-                          const string& tensorflow_op_name);
-
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 66896a49c0..0bc591e647 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -569,12 +569,6 @@ TEST_F(OperatorTest, TensorFlowUnsupportedWithoutAttr) {
   EXPECT_TRUE(output_node_def.attr().empty());
 }
 
-TEST_F(OperatorTest, TestShouldExportAsFlexOp) {
-  EXPECT_FALSE(ShouldExportAsFlexOp(false, "Conv2D"));
-  EXPECT_TRUE(ShouldExportAsFlexOp(true, "Conv2D"));
-  EXPECT_FALSE(ShouldExportAsFlexOp(true, "MyAwesomeCustomOp"));
-}
-
 }  // namespace
 }  // namespace tflite
 
-- 
GitLab


From 2f5e8e189c3b70840d7a835d4e36fd146380abc4 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 12 Oct 2018 12:56:09 -0700
Subject: [PATCH 0895/1085] [XLA:GPU] Adapt test for cudnn conv picker pass
 rename

This now fails in some configurations and is obviously doing the wrong thing.

PiperOrigin-RevId: 216907441
---
 tensorflow/compiler/xla/tests/convolution_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index b851db14ec..3aebf78466 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -883,7 +883,7 @@ XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) {
 // (We run this test on all platforms, because, what the heck.)
 XLA_TEST_F(ConvolutionTest, NoCudnnAlgorithmPicker) {
   execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
-      "cudnn-convolution-algorithm-picker");
+      "cudnn-conv-algorithm-picker");
 
   XlaBuilder builder(TestName());
   Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
-- 
GitLab


From 85f4f6b7ced7afab7e77e65c2b21448cfbf2d6f2 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 12 Oct 2018 13:11:06 -0700
Subject: [PATCH 0896/1085] Fix shape inference segfault in
 ScatterNdUpdateShape.

Fixes #22013.

PiperOrigin-RevId: 216909792
---
 tensorflow/core/framework/common_shape_fns.cc         | 9 +++++++--
 tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 9 +++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 50403b4004..e1be16a39c 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1457,7 +1457,11 @@ Status ValidateSparseTensor(InferenceContext* c, ShapeHandle indices_shape,
 Status ScatterNdUpdateShape(InferenceContext* c) {
   ShapeHandle input_shape = c->input(0);
   if (c->input_handle_shapes_and_types(0) != nullptr) {
-    input_shape = (*c->input_handle_shapes_and_types(0))[0].shape;
+    // This is called for tf.scatter_nd_update; input is a Variable handle.
+    const auto& shape_and_type = *(c->input_handle_shapes_and_types(0));
+    if (shape_and_type.size() == 1) {
+      input_shape = shape_and_type[0].shape;
+    }
   }
   ShapeHandle indices_shape;
   TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(1), 1, &indices_shape));
@@ -1514,7 +1518,8 @@ Status ScatterNdUpdateShape(InferenceContext* c) {
     }
   }
 
-  if (c->input_handle_shapes_and_types(0) == nullptr) {
+  if (c->input_handle_shapes_and_types(0) == nullptr && c->num_outputs() > 0) {
+    // This is called for tf.scatter_nd; output is a tensor with this shape.
     c->set_output(0, input_shape);
   }
   return Status::OK();
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 4b92309e4d..9843bf4be0 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -285,6 +285,15 @@ class StatefulScatterNdTest(test.TestCase):
         state_ops.scatter_nd_update(ref, indices,
                                     updates).get_shape().as_list(), shape)
 
+  def testResVarInvalidOutputShape(self):
+    res = variables.Variable(
+        initial_value=lambda: array_ops.zeros(shape=[], dtype=dtypes.float32),
+        dtype=dtypes.float32)
+    with self.cached_session():
+      res.initializer.run()
+      with self.assertRaisesOpError("Output must be at least 1-D"):
+        state_ops.scatter_nd_update(res, [[0]], [0.22]).eval()
+
   def testExtraIndicesDimensions(self):
     indices = array_ops.zeros([1, 1, 2], dtypes.int32)
     updates = array_ops.zeros([1, 1], dtypes.int32)
-- 
GitLab


From a45b907f6b03ec76f65512657d7b6231eb3c7152 Mon Sep 17 00:00:00 2001
From: Deepak B <dksb29@gmail.com>
Date: Fri, 12 Oct 2018 13:20:20 -0700
Subject: [PATCH 0897/1085] Create ISSUES.md

---
 ISSUES.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 ISSUES.md

diff --git a/ISSUES.md b/ISSUES.md
new file mode 100644
index 0000000000..f44363c8b3
--- /dev/null
+++ b/ISSUES.md
@@ -0,0 +1,6 @@
+If you open a GitHub Issue, here is our policy:
+1. It must be a bug/performance issue or a feature request or a build issue or a documentation issue (for small doc fixes please send a PR instead).
+2. Make sure the Issue Template is filled out.
+3. The issue should be related to the repo it is created in.
+
+**Here's why we have this policy:** We want to focus on the work that benefits the whole community, e.g., fixing bugs and adding features. Individual support should be seeked on StackOverflow or other non-GitHub channels. It helps us to address bugs and feature requests in a timely manner.
-- 
GitLab


From 0608a3f0dbbac03aeb84966d2ea9a24a694ebbce Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 12 Oct 2018 13:29:55 -0700
Subject: [PATCH 0898/1085] [Grappler] Enable RemoveStackStridedSliceSameAxis
 optimizer.

PiperOrigin-RevId: 216912523
---
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index bb56f61e30..15e5ad9df5 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -80,7 +80,7 @@ class ArithmeticOptimizer : public GraphOptimizer {
     bool convert_log1p = true;
     bool convert_expm1 = true;
     bool unary_ops_composition = true;
-    bool remove_stack_strided_slice_same_axis = false;
+    bool remove_stack_strided_slice_same_axis = true;
 
     // Choose which arithmetic optimizer stages will be enabled for a given
     // optimization level by default.
-- 
GitLab


From fe52c06bd6e0a380487fc8e2dec3831377403384 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Fri, 12 Oct 2018 13:39:20 -0700
Subject: [PATCH 0899/1085] Rollforward with build fix to MLIR TPU compiler.
 Also renamed some methods to avoid "hides overloaded virtual function"
 compilation error which only appears in the "Builder" analysis in Critique.
 See b/11765370.

*** Original change description ***

Automated rollback of commit 51f0eb5849be0f9ce20e5eb8370158088711f19d

PiperOrigin-RevId: 216914046
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../xla/service/compile_only_service.cc       |  6 +++--
 tensorflow/compiler/xla/service/compiler.cc   |  4 +--
 tensorflow/compiler/xla/service/compiler.h    | 27 ++++++++++++++-----
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  7 +++--
 .../compiler/xla/service/cpu/cpu_compiler.h   |  2 +-
 .../xla/service/gpu/nvptx_compiler.cc         |  5 ++--
 .../compiler/xla/service/gpu/nvptx_compiler.h |  2 +-
 .../compiler/xla/service/hlo_module_group.cc  |  5 ++--
 .../compiler/xla/service/hlo_module_group.h   | 13 ++++++++-
 .../xla/service/hlo_module_group_test.cc      |  2 +-
 .../xla/service/interpreter/compiler.cc       | 23 +++++++++++++---
 .../xla/service/interpreter/compiler.h        | 11 ++++++--
 .../compiler/xla/service/llvm_compiler.cc     | 20 +++++++++++++-
 .../compiler/xla/service/llvm_compiler.h      | 11 +++++++-
 tensorflow/compiler/xla/service/service.cc    |  9 ++++---
 .../compiler/xla/tests/codegen_test_base.cc   |  5 ++--
 .../compiler/xla/tests/llvm_compiler_test.cc  | 13 ++++-----
 18 files changed, 122 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 6c3b9764b7..7d03eba800 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -860,6 +860,7 @@ cc_library(
         ":executable",
         ":hlo",
         ":hlo_module_config",
+        ":hlo_module_group",
         ":logical_buffer",
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index bd5045b9b9..c9b0e4c08c 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -103,8 +103,10 @@ CompileOnlyService::CompileAheadOfTime(
     hlo_modules.push_back(std::move(hlo_module));
   }
 
-  return compiler_->CompileAheadOfTime(std::move(hlo_modules), options,
-                                       metadata);
+  return compiler_->CompileAheadOfTime(
+      absl::make_unique<HloModuleGroup>(hlo_modules[0]->name(),
+                                        absl::MakeSpan(hlo_modules)),
+      options, metadata);
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc
index 687ecafe0c..80c630c620 100644
--- a/tensorflow/compiler/xla/service/compiler.cc
+++ b/tensorflow/compiler/xla/service/compiler.cc
@@ -45,7 +45,7 @@ Compiler::ComputeDefaultBackendConfig(const HloInstruction& hlo,
 // Define a default version where metadata is not used.
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 Compiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     const AotCompilationOptions& options,
     std::unique_ptr<AotCompilationMetadata>* metadata) {
   if (metadata != nullptr) {
@@ -53,7 +53,7 @@ Compiler::CompileAheadOfTime(
         "Populating AotCompilationMetadata is not implemented on this "
         "compiler.");
   }
-  return CompileAheadOfTime(std::move(modules), options);
+  return CompileAheadOfTime(std::move(module_group), options);
 }
 
 /* static */ std::map<se::Platform::Id, Compiler::CompilerFactory>*
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index 1fdda31c34..9ab179303b 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -135,6 +136,12 @@ class Compiler {
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
+  // Optimizes a HLO module group, a set of module which runs concurrently on
+  // multiple devices potentially communicating data between the modules.
+  virtual Status RunHloPassesOnModuleGroup(
+      HloModuleGroup* module_group, se::StreamExecutor* executor,
+      DeviceMemoryAllocator* device_allocator) = 0;
+
   // Compiles the HLO module for execution on a device given by the executor,
   // and returns an executable object or an error status. No HLO passes are
   // applied to module. Generally a module should be passed through RunHloPasses
@@ -145,12 +152,18 @@ class Compiler {
   // (not just type of device) indicated by the executor.
   //
   // device_allocator is optional; see RunHloPasses.
-  //
-  // Use the overload below to compile computations that run in parallel.
   virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> module, se::StreamExecutor* executor,
       DeviceMemoryAllocator* device_allocator) = 0;
 
+  // Compiles a set of HLO modules that can run in parallel, potentially
+  // communicating data between the modules.
+  virtual StatusOr<std::vector<std::unique_ptr<Executable>>>
+  RunBackendOnModuleGroup(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) = 0;
+
   // Compiles a set of HLO modules that can run in parallel, potentially
   // communicating data between the modules, and returns a corresponding
   // sequence of executable objects.
@@ -160,7 +173,7 @@ class Compiler {
   // TODO(b/68666782): Remove this method after adding support for multiple
   // modules to RunHloPasses and RunBackends.
   virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) = 0;
 
@@ -184,16 +197,16 @@ class Compiler {
   ComputeDefaultBackendConfig(const HloInstruction& hlo,
                               se::StreamExecutor* executor) const;
 
-  // Compiles the HLO module for ahead-of-time execution.  This is intended for
-  // use in static compilation.
+  // Compiles the HLO module group for ahead-of-time execution.  This is
+  // intended for use in static compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options) = 0;
 
   // Similar to CompileAheadOfTime above but AotCompilationMetadata
   // has an argument that can be populated during compilation.
   virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options,
                      std::unique_ptr<AotCompilationMetadata>* metadata);
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 68c715a086..da01c0caf2 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -676,9 +676,12 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                                 const AotCompilationOptions& aot_options) {
-  TF_RET_CHECK(!modules.empty());
+  TF_RET_CHECK(!module_group->empty());
+  std::vector<std::unique_ptr<HloModule>> modules =
+      module_group->ConsumeModules();
+
   std::call_once(llvm_command_line_options_initialized,
                  &llvm_ir::InitializeLLVMCommandLineOptions,
                  modules[0]->config());
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
index f2af923782..c67307548d 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h
@@ -142,7 +142,7 @@ class CpuCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 829d1499bc..791d414c91 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -825,9 +825,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx,
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-NVPTXCompiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> module,
-    const AotCompilationOptions& options) {
+NVPTXCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
+                                  const AotCompilationOptions& options) {
   return Unimplemented(
       "not yet implemented: NVPTXCompiler::CompileAheadOfTime");
 }
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index c4a0b727cd..f79ae2990a 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -59,7 +59,7 @@ class NVPTXCompiler : public LLVMCompiler {
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> module,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      AotCompilationOptions const& options) override;
 
   se::Platform::Id PlatformId() const override;
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc
index f9b56ef464..8999ac9f32 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group.cc
@@ -17,9 +17,8 @@ limitations under the License.
 
 namespace xla {
 
-HloModuleGroup::HloModuleGroup(absl::string_view name,
-                               std::unique_ptr<HloModule> module)
-    : name_(name) {
+HloModuleGroup::HloModuleGroup(std::unique_ptr<HloModule> module)
+    : name_(module->name()) {
   push_back(std::move(module));
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h
index 7338be8b9c..7c39cf1781 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group.h
@@ -35,7 +35,7 @@ class HloModuleGroup {
   explicit HloModuleGroup(absl::string_view name) : name_(name) {}
 
   // Construct a module group containing a single module.
-  HloModuleGroup(absl::string_view name, std::unique_ptr<HloModule> module);
+  explicit HloModuleGroup(std::unique_ptr<HloModule> module);
 
   // Construct a module group containing any number of modules.
   HloModuleGroup(absl::string_view name,
@@ -50,11 +50,16 @@ class HloModuleGroup {
   // Add a module to the back of vector of modules in the group.
   void push_back(std::unique_ptr<HloModule> module);
 
+  // Replaces the existing module at the given index with the given module. The
+  // existing module is discarded.
+  void ReplaceModule(int index, std::unique_ptr<HloModule> module);
+
   // Moves all modules from the group into the returned vector. After this
   // method runs, the module group will be empty.
   std::vector<std::unique_ptr<HloModule>> ConsumeModules();
 
   string name() const { return name_; }
+
   string ToString() const;
 
   // Serialize the module group to/from a proto.
@@ -63,6 +68,12 @@ class HloModuleGroup {
       const HloModuleGroupProto& proto,
       absl::Span<const HloModuleConfig> module_configs);
 
+  // Returns the number of modules in the module group.
+  int size() const { return modules_.size(); }
+
+  // Returns true if there are no modules in the module group.
+  bool empty() const { return modules_.empty(); }
+
  private:
   string name_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
index b7b12cb72b..5a9a86af56 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -46,7 +46,7 @@ ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
 )";
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                           ParseHloString(text));
-  HloModuleGroup group(TestName(), std::move(module));
+  HloModuleGroup group(std::move(module));
 
   EXPECT_EQ(group.modules().size(), 1);
   EXPECT_THAT(
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 7c79eb7d79..26643667c8 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -57,6 +57,12 @@ StatusOr<std::unique_ptr<HloModule>> InterpreterCompiler::RunHloPasses(
   return std::move(hlo_module);
 }
 
+Status InterpreterCompiler::RunHloPassesOnModuleGroup(
+    HloModuleGroup* module_group, se::StreamExecutor* executor,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented("Module group compilation not supported on Interpreter");
+}
+
 StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
     std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
     DeviceMemoryAllocator* /*device_allocator*/) {
@@ -76,17 +82,26 @@ StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
   return std::move(executable);
 }
 
+StatusOr<std::vector<std::unique_ptr<Executable>>>
+InterpreterCompiler::RunBackendOnModuleGroup(
+    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Module group compilation is not supported on Interpreter.");
+}
+
 StatusOr<std::vector<std::unique_ptr<Executable>>> InterpreterCompiler::Compile(
-    std::vector<std::unique_ptr<HloModule>> /*hlo_modules*/,
+    std::unique_ptr<HloModuleGroup> /*module_group*/,
     std::vector<std::vector<se::StreamExecutor*>> /*stream_execs*/,
     DeviceMemoryAllocator* /*device_allocator*/) {
-  return tensorflow::errors::Unimplemented(
-      "Compilation of multiple HLO modules is not supported on Interpreter.");
+  return Unimplemented(
+      "Module group compilation is not supported on Interpreter.");
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 InterpreterCompiler::CompileAheadOfTime(
-    std::vector<std::unique_ptr<HloModule>> hlo_modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     const AotCompilationOptions& aot_options) {
   return tensorflow::errors::InvalidArgument(
       "AOT compilation not supported on Interpreter");
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h
index e90ae3e818..d8cb32c0be 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.h
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.h
@@ -46,18 +46,25 @@ class InterpreterCompiler : public Compiler {
   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
+  Status RunHloPassesOnModuleGroup(
+      HloModuleGroup* module_group, se::StreamExecutor* executor,
+      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::unique_ptr<Executable>> RunBackend(
       std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
+  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackendOnModuleGroup(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> hlo_modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_exec,
       DeviceMemoryAllocator* device_allocator) override;
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> hlo_modules,
+  CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                      const AotCompilationOptions& aot_options) override;
 
   HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override;
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc
index b17c9d5045..d287aa4ec7 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.cc
+++ b/tensorflow/compiler/xla/service/llvm_compiler.cc
@@ -21,8 +21,24 @@ limitations under the License.
 #endif
 
 namespace xla {
+Status LLVMCompiler::RunHloPassesOnModuleGroup(
+    HloModuleGroup* module_group, se::StreamExecutor* executor,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Model partitioning not implemented for the CPU/GPU compilers!");
+}
+
+StatusOr<std::vector<std::unique_ptr<Executable>>>
+LLVMCompiler::RunBackendOnModuleGroup(
+    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+    DeviceMemoryAllocator* device_allocator) {
+  return Unimplemented(
+      "Model partitioning not implemented for the CPU/GPU compilers!");
+}
+
 StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
-    std::vector<std::unique_ptr<HloModule>> modules,
+    std::unique_ptr<HloModuleGroup> module_group,
     std::vector<std::vector<se::StreamExecutor*>> stream_execs,
     DeviceMemoryAllocator* device_allocator) {
   // Tensorflow tries to enable the following behaviors in all its threads:
@@ -38,6 +54,8 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> LLVMCompiler::Compile(
   tensorflow::port::ScopedDontFlushDenormal dont_flush_denormals;
 
   std::vector<std::unique_ptr<Executable>> result;
+  std::vector<std::unique_ptr<HloModule>> modules =
+      module_group->ConsumeModules();
   for (size_t i = 0; i < modules.size(); i++) {
     if (stream_execs[i].size() != 1) {
       return Unimplemented(
diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h
index f1c623508c..86abd5da01 100644
--- a/tensorflow/compiler/xla/service/llvm_compiler.h
+++ b/tensorflow/compiler/xla/service/llvm_compiler.h
@@ -69,8 +69,17 @@ class LLVMCompiler : public Compiler {
   using Compiler::RunBackend;
   using Compiler::RunHloPasses;
 
+  Status RunHloPassesOnModuleGroup(
+      HloModuleGroup* module_group, se::StreamExecutor* executor,
+      DeviceMemoryAllocator* device_allocator) override;
+
+  StatusOr<std::vector<std::unique_ptr<Executable>>> RunBackendOnModuleGroup(
+      std::unique_ptr<HloModuleGroup> module_group,
+      std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+      DeviceMemoryAllocator* device_allocator) override;
+
   StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
-      std::vector<std::unique_ptr<HloModule>> modules,
+      std::unique_ptr<HloModuleGroup> module_group,
       std::vector<std::vector<se::StreamExecutor*>> stream_execs,
       DeviceMemoryAllocator* device_allocator) override;
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index d290c0eb5d..cb6a9e6707 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -341,18 +341,19 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
   }
 
   CHECK_EQ(module_protos.size(), module_configs.size());
-  std::vector<std::unique_ptr<HloModule>> modules;
+  auto module_group =
+      absl::make_unique<HloModuleGroup>(module_protos[0]->name());
   for (int64 i = 0; i < module_protos.size(); ++i) {
     const HloModuleProto* proto = module_protos[i];
     const HloModuleConfig& config = *module_configs[i];
     TF_ASSIGN_OR_RETURN(auto module, CreateModuleFromProto(*proto, config));
-    modules.push_back(std::move(module));
+    module_group->push_back(std::move(module));
   }
 
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<Executable>> executables,
-      backend->compiler()->Compile(std::move(modules), std::move(executors),
-                                   device_allocator));
+      backend->compiler()->Compile(std::move(module_group),
+                                   std::move(executors), device_allocator));
 
   for (size_t i = 0; i < module_protos.size(); ++i) {
     if (!module_configs[i]->debug_options().xla_dump_executions_to().empty()) {
diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc
index 022641394f..fbebe04087 100644
--- a/tensorflow/compiler/xla/tests/codegen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc
@@ -32,11 +32,10 @@ StatusOr<std::unique_ptr<AotCompilationResult>>
 CodegenTestBase::CompileToAotCompilationResult(
     std::unique_ptr<HloModule> hlo_module,
     const AotCompilationOptions& options) {
-  std::vector<std::unique_ptr<HloModule>> hlo_modules;
-  hlo_modules.push_back(std::move(hlo_module));
+  auto module_group = absl::make_unique<HloModuleGroup>(std::move(hlo_module));
   TF_ASSIGN_OR_RETURN(
       std::vector<std::unique_ptr<AotCompilationResult>> results,
-      backend().compiler()->CompileAheadOfTime(std::move(hlo_modules),
+      backend().compiler()->CompileAheadOfTime(std::move(module_group),
                                                options));
   return std::move(results.front());
 }
diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
index 8d65869557..c622b29509 100644
--- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
+++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc
@@ -93,15 +93,16 @@ class LLVMCompilerTest : public ::testing::Test {
     std::unique_ptr<HloModule> hlo_module = CreateNewModule();
     hlo_module->AddEntryComputation(builder.Build());
 
-    std::vector<std::unique_ptr<HloModule>> modules;
-    modules.push_back(hlo_module->Clone());
-    modules.push_back(std::move(hlo_module));
+    auto module_group = absl::make_unique<HloModuleGroup>("test_module_group");
+    module_group->push_back(hlo_module->Clone());
+    module_group->push_back(std::move(hlo_module));
 
     std::vector<std::vector<se::StreamExecutor *>> executors;
     executors.push_back({backend_->default_stream_executor()});
     executors.push_back({backend_->default_stream_executor()});
 
-    EXPECT_IS_OK(compiler->Compile(std::move(modules), std::move(executors),
+    EXPECT_IS_OK(compiler->Compile(std::move(module_group),
+                                   std::move(executors),
                                    /*device_allocator=*/nullptr));
   }
 
@@ -150,12 +151,12 @@ TEST_F(GpuCompilerTest, HooksTest) {
   TestCompilerHooks(&compiler);
 }
 
-TEST_F(CpuCompilerTest, MultiModuleCompilation) {
+TEST_F(CpuCompilerTest, CpuMultiModuleCompilation) {
   cpu::CpuCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
 
-TEST_F(GpuCompilerTest, MultModuleCompilation) {
+TEST_F(GpuCompilerTest, NVPTXMultiModuleCompilation) {
   gpu::NVPTXCompiler compiler;
   TestMultiModuleCompilation(&compiler);
 }
-- 
GitLab


From 2ad4975e11b96b5b9f28c46c1a6d0b5d330b2bcd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 14:06:06 -0700
Subject: [PATCH 0900/1085] Speed up Grappler by using FlatMap/FlatSet instead
 of std::unordered_map/std::unordered_set in NodeMap and SetVector. Fix a
 latent bug in DedupComputations where we depended on pointer stability in
 NodeMap.

Benchmark numbers for running Grappler on the Transformer model:

name                                old time/op             new time/op             delta
BM_OptimizeTransformer              6.55s ? 0%              5.65s ? 0%   ~     (p=1.000 n=1+1)

name                                old allocs/op           new allocs/op           delta
BM_OptimizeTransformer              24.4M ? 0%              23.3M ? 0%   ~     (p=1.000 n=1+1)

name                                old peak-mem(Bytes)/op  new peak-mem(Bytes)/op  delta
BM_OptimizeTransformer               123M ? 0%               123M ? 0%   ~     (p=1.000 n=1+1)

PiperOrigin-RevId: 216918669
---
 .../core/grappler/optimizers/arithmetic_optimizer.cc |  3 ++-
 tensorflow/core/grappler/utils.h                     | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index e2bcf91950..3614b00a50 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3363,7 +3363,8 @@ void ArithmeticOptimizer::DedupComputations() {
       }
       VLOG(3) << "Remove duplicated node: node=" << node->name()
               << " representative=" << rep->name();
-      const std::set<NodeDef*>& fanouts = node_map_->GetOutputs(node->name());
+      const std::set<NodeDef*>& tmp = node_map_->GetOutputs(node->name());
+      std::vector<NodeDef*> fanouts(tmp.begin(), tmp.end());
       for (NodeDef* fanout : fanouts) {
         for (int i = 0; i < fanout->input_size(); ++i) {
           string* fanout_input = fanout->mutable_input(i);
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 95126d470c..0168ab1da3 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -17,8 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_UTILS_H_
 
 #include <functional>
-#include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 #include "tensorflow/core/framework/graph.pb.h"
@@ -28,6 +26,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace tensorflow {
@@ -57,8 +57,8 @@ class NodeMap {
 
  private:
   const std::set<NodeDef*> empty_set_;
-  std::unordered_map<string, NodeDef*> nodes_;
-  std::unordered_map<string, std::set<NodeDef*>> outputs_;
+  gtl::FlatMap<string, NodeDef*> nodes_;
+  gtl::FlatMap<string, std::set<NodeDef*>> outputs_;
 };
 
 // A vector with a set. The set stores the same elements as the vector, and
@@ -90,7 +90,7 @@ class SetVector {
   void Reserve(int64 size) { vector_.reserve(size); }
 
  private:
-  std::unordered_set<T, Hash> set_;
+  gtl::FlatSet<T, Hash> set_;
   std::vector<T> vector_;
 };
 
@@ -331,7 +331,7 @@ class SimpleGraphView {
  private:
   const GraphDef* graph_;  // Not owned.
   std::vector<string> index_to_name_;
-  std::unordered_map<string, int> name_to_index_;
+  gtl::FlatMap<string, int> name_to_index_;
   std::vector<gtl::InlinedVector<int, 4>> inputs_;
   std::vector<gtl::InlinedVector<int, 2>> outputs_;
 };
-- 
GitLab


From 5e740d80c3e3c5151cc4760bdfef75d80fa409db Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Fri, 12 Oct 2018 14:13:31 -0700
Subject: [PATCH 0901/1085] [XLA] Allow whitespace in shapes and layouts in HLO
 text.

PiperOrigin-RevId: 216919890
---
 tensorflow/compiler/xla/service/hlo_lexer.cc       |  2 +-
 tensorflow/compiler/xla/service/hlo_parser.cc      |  3 ++-
 tensorflow/compiler/xla/service/hlo_parser_test.cc | 12 ++++++++++++
 tensorflow/compiler/xla/shape_util.cc              |  3 ++-
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc
index d9be841dd7..971a9a2063 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/service/hlo_lexer.cc
@@ -204,7 +204,7 @@ TokKind HloLexer::LexIdentifier() {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
     // 'consumable' will be advanced iff its prefix matches the pattern.
     static LazyRE2 shape_pattern = {
-        R"(^(\w*\d*)\[([\d,]*)\](?:(dense|sparse)?{([\d,]+)})?)"};
+        R"(^(\w*\d*)\[([\d,\s]*)\](?:(dense|sparse)?{([\d,\s]+)})?)"};
     if (RE2::Consume(&consumable, *shape_pattern)) {
       auto status_or_shape = ShapeUtil::ParseShapeString(
           StringPieceFromPointers(token_start_, consumable.begin()));
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 128113f7a5..81f091238e 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -2982,7 +2982,8 @@ bool HloParser::ParseShape(Shape* result) {
   }
 
   if (lexer_.GetKind() != TokKind::kShape) {
-    return TokenError("expects shape");
+    return TokenError(absl::StrCat("expected shape, saw ",
+                                   TokKindToString(lexer_.GetKind())));
   }
   *result = lexer_.GetShapeVal();
   lexer_.Lex();
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index ef2e74588c..19f84d8bd2 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -2149,6 +2149,18 @@ ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]
                   "operand 1 is not compatible with operand shape");
 }
 
+TEST_F(HloParserTest, AllowShapeWhitespace) {
+  const string text = R"(
+HloModule module
+
+ENTRY entry {
+  ROOT root = f32[ 1, 2,3, 4, 5]{0, 1, 2,3, 4 } parameter(0)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(text));
+}
+
 // custom call incompatible shape.
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 9267de3cfc..7a34c0fb26 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -597,7 +597,8 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
   // we convert in to the RE2-consumable type and then consume the corresponding
   // amount from our string_view type.
   static LazyRE2 shape_pattern = {
-      "^(\\w*\\d*)\\[([\\d,]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,]+)})?"};
+      "^(\\w*\\d*)\\[([\\d,\\s]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,\\s]+)})"
+      "?"};
   tensorflow::RegexpStringPiece s_consumable(s->data(), s->size());
   if (RE2::Consume(&s_consumable, *shape_pattern, &element_type_string,
                    &dimensions_string, &format_string, &layout_string)) {
-- 
GitLab


From d9b77fa640df3c2c079fffa314312e11814478bb Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Fri, 12 Oct 2018 14:19:47 -0700
Subject: [PATCH 0902/1085] Replacing legacy_init_op argument in
 SavedModelBuilder with main_op.

PiperOrigin-RevId: 216920955
---
 tensorflow/contrib/learn/python/learn/estimators/estimator.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index c1de42782e..3efceab337 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -1433,13 +1433,12 @@ class Estimator(BaseEstimator):
                            'must specify no transforms.')
         untransformed_tags = graph_rewrite_specs[0].tags
 
-        # TODO(soergel): switch to main_op or otherwise update when dust settles
         builder.add_meta_graph_and_variables(
             session,
             untransformed_tags,
             signature_def_map=signature_def_map,
             assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
-            legacy_init_op=init_op,
+            main_op=init_op,
             strip_default_attrs=strip_default_attrs)
 
     # pylint: disable=protected-access
-- 
GitLab


From 0500c1e453dda481adb2e8638de234ed3f1cd551 Mon Sep 17 00:00:00 2001
From: Anna Revinskaya <annarev@google.com>
Date: Fri, 12 Oct 2018 14:25:56 -0700
Subject: [PATCH 0903/1085] Merging hlo_verifier.cc

---
 .../compiler/xla/service/hlo_verifier.cc      | 27 -------------------
 1 file changed, 27 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index b694c62648..ba95cef21d 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -884,7 +884,6 @@ Status VerifyEntryAndExitShapes(const HloModule& module) {
   return Status::OK();
 }
 
-<<<<<<< HEAD
 // Verifies that entry computation layout matches characteristics of
 // entry computation.
 Status CheckEntryComputationLayout(const HloModule& module) {
@@ -935,8 +934,6 @@ Status CheckEntryComputationLayout(const HloModule& module) {
   return Status::OK();
 }
 
-=======
->>>>>>> upstream/r1.12
 // Checks if the given two instructions share the same channel id.
 Status CheckSameChannel(const HloInstruction* instr1,
                         const HloInstruction* instr2) {
@@ -1184,14 +1181,10 @@ Status CheckElementwiseInstruction(HloInstruction* instruction) {
 // not check result shape as that is checked in the ShapeVerifier.
 class InstructionVerifier : public DfsHloVisitorWithDefault {
  public:
-<<<<<<< HEAD
   explicit InstructionVerifier(std::function<bool(const HloInstruction*)>
                                    instruction_can_change_layout_func)
       : instruction_can_change_layout_func_(
             instruction_can_change_layout_func) {}
-=======
-  InstructionVerifier() {}
->>>>>>> upstream/r1.12
 
   Status DefaultAction(HloInstruction*) override { return Status::OK(); }
 
@@ -1240,7 +1233,6 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
           "True computation %s of %s must have 1 parameter insted of %d",
           conditional->true_computation()->name(), conditional->ToString(),
           conditional->true_computation()->num_parameters());
-<<<<<<< HEAD
     }
     if (conditional->false_computation()->num_parameters() != 1) {
       return FailedPrecondition(
@@ -1248,15 +1240,6 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
           conditional->false_computation()->name(), conditional->ToString(),
           conditional->false_computation()->num_parameters());
     }
-=======
-    }
-    if (conditional->false_computation()->num_parameters() != 1) {
-      return FailedPrecondition(
-          "False computation %s of %s must have 1 parameter insted of %d",
-          conditional->false_computation()->name(), conditional->ToString(),
-          conditional->false_computation()->num_parameters());
-    }
->>>>>>> upstream/r1.12
     return Status::OK();
   }
 
@@ -1300,7 +1283,6 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
         << " in computation: " << previous->second->parent()->name();
     instructions_by_name_[instruction->name()] = instruction;
     return Status::OK();
-<<<<<<< HEAD
   }
 
   Status Postprocess(HloInstruction* instruction) override {
@@ -1324,18 +1306,13 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
     }
 
     return Status::OK();
-=======
->>>>>>> upstream/r1.12
   }
 
  private:
   absl::flat_hash_map<string, const HloInstruction*> instructions_by_name_;
-<<<<<<< HEAD
   // Determines whether an instruction can change layouts.
   std::function<bool(const HloInstruction*)>
       instruction_can_change_layout_func_;
-=======
->>>>>>> upstream/r1.12
 };
 
 }  // namespace
@@ -1349,12 +1326,8 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     std::unique_ptr<ShapeVerifier> shape_verifier = shape_verifier_factory_();
     TF_RETURN_IF_ERROR(computation->Accept(shape_verifier.get()));
 
-<<<<<<< HEAD
     InstructionVerifier instruction_verifier(
         instruction_can_change_layout_func_);
-=======
-    InstructionVerifier instruction_verifier;
->>>>>>> upstream/r1.12
     TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier));
   }
 
-- 
GitLab


From 2ce9f9046b269ed30bda0f8a7374e76cc9956529 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 12 Oct 2018 14:23:12 -0700
Subject: [PATCH 0904/1085] [XLA:GPU] Fix scatter oob check computation

This was comparing the index after adding it to the window, and then comparing
against the window dimension. This means that the bounds check was only correct
for the first element of a window. Instead compare the scatter index, which is
the same for all elements of a window.

PiperOrigin-RevId: 216921512
---
 .../xla/service/gpu/ir_emitter_unnested.cc        | 15 ++++++++-------
 tensorflow/compiler/xla/tests/scatter_test.cc     |  3 +--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index fd624fda08..008398328c 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -2134,20 +2134,21 @@ Status IrEmitterUnnested::EmitScatter(
           scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape(
               scatter_indices_shape, scatter_indices->shape(), &b_)));
       // And add the index to our window index. This yields the output index.
+      llvm::Value* casted_scatter_index =
+          IntCast(loaded_scatter_index, index.GetType(),
+                  /*isSigned=*/true);
       llvm::Value* dim_offset =
-          Add(input_window_multidim[operand_dim],
-              IntCast(loaded_scatter_index, index.GetType(),
-                      /*isSigned=*/true));
+          Add(input_window_multidim[operand_dim], casted_scatter_index);
       input_window_multidim[operand_dim] = dim_offset;
 
       // Also do the bounds check now.
       int64 max_index = operand->shape().dimensions(operand_dim) -
                         input_window_bounds[operand_dim] + 1;
-      // is_in_bounds = dim_offset >= 0 && dim_offset < dim_size-window_size+1
-      //   --> dim_offset u< dim_size-window_size+1
+      // is_in_bounds = index >= 0 && index < dim_size-window_size+1
+      //   --> index u< dim_size-window_size+1
       is_in_bounds =
-          And(is_in_bounds,
-              ICmpULT(dim_offset, index.GetConstantWithIndexType(max_index)));
+          And(is_in_bounds, ICmpULT(casted_scatter_index,
+                                    index.GetConstantWithIndexType(max_index)));
     }
 
     llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse(
diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc
index 1854224dff..7e1f4aa0eb 100644
--- a/tensorflow/compiler/xla/tests/scatter_test.cc
+++ b/tensorflow/compiler/xla/tests/scatter_test.cc
@@ -129,8 +129,7 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
-// TODO(b/117627031): fails on GPU on 2018-10-11.
-XLA_TEST_F(ScatterTest, DISABLED_ON_GPU(SimpleR4)) {
+XLA_TEST_F(ScatterTest, SimpleR4) {
   const char* hlo_text = R"(
 HloModule SimpleR4
 
-- 
GitLab


From 8e4b6875a11969672e40d866a936eb1937167856 Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Fri, 12 Oct 2018 14:34:05 -0700
Subject: [PATCH 0905/1085] Fix a tiny bug

PiperOrigin-RevId: 216923314
---
 tensorflow/contrib/compiler/xla.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 28d7d653ee..f2636e190c 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -272,7 +272,7 @@ def _compile_internal(computation, inputs=None):
     raise TypeError(
         'Supplied computation cannot be called with the specified inputs. You '
         'specified %d inputs: %s, but the computation needs %s' %
-        (input_arity, str([i.name for i in inputs[0]]), arg_error))
+        (input_arity, str([i.name for i in inputs]), arg_error))
 
   cluster_name = ops.get_default_graph().unique_name('cluster')
   pivot = control_flow_ops.no_op(name=cluster_name + '/pivot')
-- 
GitLab


From a1fc91b0ad454b9d78c0aedd9dad511ce1a27a2c Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Fri, 12 Oct 2018 14:42:26 -0700
Subject: [PATCH 0906/1085] Slight change to the API of VirtualScheduler;
 Include RunMetadata in AnalyticalCostEstimator; Add more fields in
 CostGraphDef in AnalyticalCostEstimator

PiperOrigin-RevId: 216924665
---
 .../core/grappler/clusters/virtual_cluster.cc |   4 +-
 tensorflow/core/grappler/costs/BUILD          |   2 +
 .../costs/analytical_cost_estimator.cc        | 175 +++++++++++++-----
 .../costs/analytical_cost_estimator.h         |  25 ++-
 .../core/grappler/costs/virtual_scheduler.cc  |  87 ++++++---
 .../core/grappler/costs/virtual_scheduler.h   |  26 ++-
 .../grappler/costs/virtual_scheduler_test.cc  |  68 ++++---
 7 files changed, 260 insertions(+), 127 deletions(-)

diff --git a/tensorflow/core/grappler/clusters/virtual_cluster.cc b/tensorflow/core/grappler/clusters/virtual_cluster.cc
index f543dca49e..295b3c12e6 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster.cc
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.cc
@@ -70,8 +70,8 @@ Status VirtualCluster::Run(const GraphDef& graph,
   item.graph = graph;
   item.feed = feed;
   item.fetch = fetch;
-  VirtualScheduler scheduler(&item, true, this, node_manager_.get());
-  TF_RETURN_IF_ERROR(scheduler.Init());
+  VirtualScheduler scheduler(true, this, node_manager_.get());
+  TF_RETURN_IF_ERROR(scheduler.Init(&item));
 
   if (metadata) {
     metadata->clear_step_stats();
diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index 46eacd3a06..01e8f2b185 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -312,6 +312,8 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler:utils",
     ] + tf_protos_grappler(),
 )
 
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
index 0690640ffa..8a6d575956 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
@@ -27,90 +27,152 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/virtual_placer.h"
 #include "tensorflow/core/grappler/costs/virtual_scheduler.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/utils.h"
 
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+
+// Helper function in PredictCosts() to add cost node to cost_graph.
+void AddCostNode(ReadyNodeManager* node_manager, const OpContext& op_context,
+                 int node_id, const Costs& node_costs,
+                 gtl::FlatMap<string, CostGraphDef::Node*>* name_to_cost_node,
+                 gtl::FlatMap<string, int>* name_to_id,
+                 CostGraphDef* cost_graph) {
+  const string& op_name = op_context.name;
+  auto it = name_to_cost_node->find(op_name);
+  CostGraphDef::Node* node;
+  if (it != name_to_cost_node->end()) {
+    node = it->second;
+    node->clear_input_info();
+    node->clear_output_info();
+  } else {
+    node = cost_graph->add_node();
+    (*name_to_cost_node)[op_name] = node;
+    node->set_name(op_name);
+    node->set_id(node_id);
+    (*name_to_id)[node->name()] = node->id();
+  }
+  // For nodes we have seen before (e.g. Merge nodes are executed twice by
+  // VirtualScheduler), the following fields will be overwritten/updated
+  node->set_device(op_context.device_name);
+  node->set_compute_cost(node_costs.execution_time.asMicroSeconds().count());
+  node->set_compute_time(node_costs.compute_time.asMicroSeconds().count());
+  node->set_memory_time(node_costs.memory_time.asMicroSeconds().count());
+  node->set_inaccurate(node_costs.inaccurate);
+
+  for (const string& input : node_manager->GetCurrNode()->input()) {
+    int input_port;
+    string input_name = ParseNodeName(input, &input_port);
+
+    // All inputs should have been seen already unless this is a Merge node
+    if (name_to_id->find(input_name) == name_to_id->end()) {
+      if (!IsMerge(*node_manager->GetCurrNode()))
+        LOG(ERROR) << "input: " << input
+                   << " not found for non-Merge node: " << op_name;
+
+      // For Merge node, some of inputs may not be seen before
+      // For example, for a typical while loop in tensorflow, Merge node
+      // will be executed twice by VirtualScheduler (one for Enter, the
+      // other for NextIteration), so eventually both inputs will be added
+      continue;
+    }
+
+    if (IsControlInput(input)) {
+      node->add_control_input(name_to_id->at(input_name));
+    } else {
+      auto* input_info = node->add_input_info();
+      input_info->set_preceding_node(name_to_id->at(input_name));
+      input_info->set_preceding_port(input_port);
+    }
+  }
+
+  for (const auto& output : op_context.op_info.outputs()) {
+    auto output_info = node->add_output_info();
+    output_info->set_alias_input_port(-1);
+    output_info->set_dtype(output.dtype());
+    auto shape = output_info->mutable_shape();
+    *shape = output.shape();
+  }
+}
+
+}  // namespace
+
 AnalyticalCostEstimator::AnalyticalCostEstimator(Cluster* cluster,
                                                  bool use_static_shapes)
-    : cluster_(cluster),
-      node_estimator_(new OpLevelCostEstimator()),
-      node_manager_(VirtualScheduler::ReadyNodeManagerFactory("FirstReady")),
-      use_static_shapes_(use_static_shapes) {}
+    : AnalyticalCostEstimator(
+          cluster, absl::make_unique<OpLevelCostEstimator>(),
+          std::unique_ptr<ReadyNodeManager>(
+              VirtualScheduler::ReadyNodeManagerFactory("FirstReady")),
+          use_static_shapes, nullptr) {}
 
 AnalyticalCostEstimator::AnalyticalCostEstimator(
-    Cluster* cluster, OpLevelCostEstimator* node_estimator,
-    ReadyNodeManager* node_manager, bool use_static_shapes)
+    Cluster* cluster, std::unique_ptr<OpLevelCostEstimator> node_estimator,
+    std::unique_ptr<ReadyNodeManager> node_manager, bool use_static_shapes,
+    RunMetadata* run_metadata)
     : cluster_(cluster),
-      node_estimator_(node_estimator),
-      node_manager_(node_manager),
-      use_static_shapes_(use_static_shapes) {}
+      node_estimator_(std::move(node_estimator)),
+      node_manager_(std::move(node_manager)),
+      use_static_shapes_(use_static_shapes),
+      run_metadata_(run_metadata) {
+  scheduler_ = absl::make_unique<VirtualScheduler>(use_static_shapes_, cluster_,
+                                                   node_manager_.get());
+}
 
 Status AnalyticalCostEstimator::Initialize(const GrapplerItem& item) {
   item_ = item;
   return Status::OK();
 }
 
+// TODO(b/67607683): unify logic with VirtualCluster logic
 Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph,
                                              CostGraphDef* cost_graph,
                                              Costs* costs) const {
   GrapplerItem item = item_;
   item.graph = optimized_graph;
 
-  std::unordered_map<string, CostGraphDef::Node*> name_to_cost;
+  auto status = scheduler_->Init(&item);
+  if (!status.ok()) {
+    costs->execution_time = Costs::Duration::max();
+    return status;
+  }
+
+  gtl::FlatMap<string, CostGraphDef::Node*> name_to_cost_node;
   if (cost_graph) {
+    // TODO(pcma): Clear nodes in cost_graph after we make sure we always pass
+    // in an empty cost_graph (a non-empty but incomplete cost_graph will cause
+    // problems, e.g., no node_id in cost_graph)
     for (auto& node : *cost_graph->mutable_node()) {
-      name_to_cost[node.name()] = &node;
+      name_to_cost_node[node.name()] = &node;
     }
   }
   std::vector<string> inaccurate_nodes;
   int nodes_executed = 0;
-  VirtualScheduler scheduler(&item, use_static_shapes_, cluster_,
-                             node_manager_.get());
-  auto status = scheduler.Init();
-  if (!status.ok()) {
-    costs->execution_time = Costs::Duration::max();
-    return status;
-  }
+  int node_id = 0;
+  gtl::FlatMap<string, int> name_to_id;
 
   Costs node_costs;
   do {
     ++nodes_executed;
-    OpContext op_context = scheduler.GetCurrNode();
-    const string& op_name = op_context.name;
-
+    OpContext op_context = scheduler_->GetCurrNode();
     node_costs = node_estimator_->PredictCosts(op_context);
+
     if (node_costs.inaccurate) {
-      inaccurate_nodes.push_back(op_name);
+      inaccurate_nodes.push_back(op_context.name);
+      if (node_costs.num_ops_with_unknown_shapes > 0)
+        VLOG(4) << op_context.name << " has "
+                << node_costs.num_ops_with_unknown_shapes << " unknown shapes";
     }
+
+    // TODO(pcma): Add unit tests for generating CostGraphDef
     if (cost_graph) {
-      auto it = name_to_cost.find(op_name);
-      CostGraphDef::Node* cost_node;
-      if (it != name_to_cost.end()) {
-        cost_node = it->second;
-      } else {
-        cost_node = cost_graph->add_node();
-        cost_node->set_name(op_name);
-      }
-      cost_node->set_device(op_context.device_name);
-      cost_node->set_compute_cost(
-          node_costs.execution_time.asMicroSeconds().count());
-      cost_node->set_compute_time(
-          node_costs.compute_time.asMicroSeconds().count());
-      cost_node->set_memory_time(
-          node_costs.memory_time.asMicroSeconds().count());
-      cost_node->set_inaccurate(node_costs.inaccurate);
-      for (const auto& output : op_context.op_info.outputs()) {
-        auto output_info = cost_node->add_output_info();
-        output_info->set_dtype(output.dtype());
-        auto shape = output_info->mutable_shape();
-        *shape = output.shape();
-      }
+      AddCostNode(node_manager_.get(), op_context, node_id++, node_costs,
+                  &name_to_cost_node, &name_to_id, cost_graph);
     }
-  } while (scheduler.MarkCurrNodeExecuted(node_costs));
+  } while (scheduler_->MarkCurrNodeExecuted(node_costs));
 
-  RunMetadata run_metadata;
-  *costs = scheduler.Summary(&run_metadata);
   VLOG(1) << inaccurate_nodes.size() << " out of " << nodes_executed
           << " nodes have inaccurate time estimation";
   if (VLOG_IS_ON(3)) {
@@ -119,10 +181,25 @@ Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph,
     }
   }
 
+  *costs = scheduler_->Summary(run_metadata_);
+  // run_metadata_ gets step_stats and parition_graphs from Summary.
+  // Note that cost_graph could already point to the cost_graph field of
+  // run_metadata_, since both are set by the caller.
+  if (run_metadata_ && cost_graph &&
+      run_metadata_->mutable_cost_graph() != cost_graph)
+    *run_metadata_->mutable_cost_graph() = *cost_graph;
+
   if (VLOG_IS_ON(1)) {
-    bool verbosity = VLOG_IS_ON(2);
-    VLOG(1) << GetStatsStringFromRunMetadata(run_metadata, verbosity);
+    bool verbose = VLOG_IS_ON(2);
+    if (run_metadata_) {
+      VLOG(1) << GetStatsStringFromRunMetadata(*run_metadata_, verbose);
+    } else {
+      RunMetadata run_metadata;
+      scheduler_->GenerateRunMetadata(&run_metadata);
+      VLOG(1) << GetStatsStringFromRunMetadata(run_metadata, verbose);
+    }
   }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.h b/tensorflow/core/grappler/costs/analytical_cost_estimator.h
index dd2738e088..2629672459 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.h
@@ -39,12 +39,16 @@ class AnalyticalCostEstimator : public CostEstimator {
  public:
   // Does not take ownership of cluster.
   AnalyticalCostEstimator(Cluster* cluster, bool use_static_shapes);
-  // Does not take ownership of the cluster, but takes ownership of the
-  // node_estimator and the node_manager
+  // Does not take ownership of cluster or run_metadata
+  //
+  // When metadata is provided, step_stats and partition_graphs fields will
+  // always be filled during PredictCosts, and the cost_graph field of metadata
+  // will be filled only when cost_graph is not nullptr when invoking
+  // PredictCosts.
   AnalyticalCostEstimator(Cluster* cluster,
-                          OpLevelCostEstimator* node_estimator,
-                          ReadyNodeManager* node_manager,
-                          bool use_static_shapes);
+                          std::unique_ptr<OpLevelCostEstimator> node_estimator,
+                          std::unique_ptr<ReadyNodeManager> node_manager,
+                          bool use_static_shapes, RunMetadata* run_metadata);
   ~AnalyticalCostEstimator() override {}
 
   // Initializes the estimator for the specified grappler item.
@@ -53,16 +57,21 @@ class AnalyticalCostEstimator : public CostEstimator {
 
   // Predict the performance of each node of the optimized graph and annotate
   // the CostGraphDef with the corresponding estimates. Also returns the
-  // expected latency for the whole graph.
+  // expected cost for the whole graph.
   Status PredictCosts(const GraphDef& optimized_graph, CostGraphDef* cost_graph,
-                      Costs* overall_latency) const override;
+                      Costs* cost) const override;
+
+  const VirtualScheduler* GetScheduler() const { return scheduler_.get(); }
 
  private:
-  Cluster* cluster_;  // Not owned.
+  Cluster* cluster_;
   GrapplerItem item_;
   std::unique_ptr<OpLevelCostEstimator> node_estimator_;
   std::unique_ptr<ReadyNodeManager> node_manager_;
   bool use_static_shapes_;
+  std::unique_ptr<VirtualScheduler> scheduler_;
+
+  RunMetadata* run_metadata_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 5c5bdad1cb..d3c4686b32 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -275,13 +275,28 @@ bool CompositeNodeManager::Empty() const {
   return empty && send_manager_.Empty() && recv_manager_.Empty();
 }
 
+// TODO(pcma): Modify to return unique_ptr instead
+ReadyNodeManager* VirtualScheduler::ReadyNodeManagerFactory(
+    const string& ready_node_manager) {
+  if (ready_node_manager == "FIFO") {
+    return new FIFOManager();
+  } else if (ready_node_manager == "LIFO") {
+    return new LIFOManager();
+  } else if (ready_node_manager == "FirstReady") {
+    return new FirstReadyManager();
+  } else if (ready_node_manager == "Composite") {
+    return new CompositeNodeManager();
+  }
+  LOG(FATAL) << "Not a valid ready node manager: " << ready_node_manager;
+}
+
 VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item,
                                    const bool use_static_shapes,
                                    Cluster* cluster,
                                    ReadyNodeManager* ready_nodes)
     : ready_nodes_(ready_nodes),
       graph_costs_(Costs::ZeroCosts()),
-      graph_properties_(*grappler_item),
+      graph_properties_(new GraphProperties(*grappler_item)),
       cluster_(cluster),
       grappler_item_(grappler_item),
       use_static_shapes_(use_static_shapes),
@@ -290,35 +305,54 @@ VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item,
   initialized_ = false;
 }
 
-ReadyNodeManager* VirtualScheduler::ReadyNodeManagerFactory(
-    const string& ready_node_manager) {
-  if (ready_node_manager == "FIFO") {
-    return new FIFOManager();
-  } else if (ready_node_manager == "LIFO") {
-    return new LIFOManager();
-  } else if (ready_node_manager == "FirstReady") {
-    return new FirstReadyManager();
-  } else if (ready_node_manager == "Composite") {
-    return new CompositeNodeManager();
-  }
-  LOG(FATAL) << "Not a valid ready node manager: " << ready_node_manager;
+VirtualScheduler::VirtualScheduler(const bool use_static_shapes,
+                                   Cluster* cluster,
+                                   ReadyNodeManager* ready_nodes)
+    : ready_nodes_(ready_nodes),
+      graph_costs_(Costs::ZeroCosts()),
+      cluster_(cluster),
+      use_static_shapes_(use_static_shapes),
+      placer_(cluster) {
+  graph_costs_.num_ops_total = 0;
+  initialized_ = false;
+}
+
+Status VirtualScheduler::Init(const GrapplerItem* item) {
+  grappler_item_ = item;
+  graph_properties_ = absl::make_unique<GraphProperties>(*item);
+
+  return Init();
 }
 
+// TODO(pcma): Merge with Init(const GrapplerItem* item) when this
+// deprecated API is deleted
 Status VirtualScheduler::Init() {
+  initialized_ = false;
+
+  // Clear all internal states so that the VirtualScheduler is reusable for
+  // different GrapplerItems
+  node_map_.clear();
+  device_.clear();
+  additional_nodes_.clear();
+
+  graph_costs_ = Costs::ZeroCosts();
+  graph_costs_.num_ops_total = 0;
+  op_to_cost_.clear();
+
+  op_counts_.clear();
+  op_costs_.clear();
+
   // Init() preprocesses the input grappler_item and graph_properties to extract
   // necessary information for emulating tensorflow op scheduling and
   // construct internal data structures (NodeState and DeviceState) for virtual
   // scheduling.
   ready_nodes_->Init(GetNodeStates());
+
   // Construct graph properties.
-  Status status;
   if (use_static_shapes_) {
-    status = graph_properties_.InferStatically(true);
+    TF_RETURN_IF_ERROR(graph_properties_->InferStatically(true));
   } else {
-    status = graph_properties_.InferDynamically(cluster_);
-  }
-  if (!status.ok()) {
-    return status;
+    TF_RETURN_IF_ERROR(graph_properties_->InferDynamically(cluster_));
   }
 
   const auto& graph = grappler_item_->graph;
@@ -513,7 +547,7 @@ void VirtualScheduler::MaybeUpdateInputOutput(const NodeDef* node) {
       outputs.push_back(control_message);
     } else {
       auto output_properties =
-          graph_properties_.GetOutputProperties(NodeName(input_source_name));
+          graph_properties_->GetOutputProperties(NodeName(input_source_name));
       // Like with HasInputProperties, if a node does not have output
       // properties, it's likely it was pruned during the shape inference run.
       if (!output_properties.empty()) {
@@ -666,9 +700,9 @@ NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
   it = node_map_.emplace(node, NodeState()).first;
   auto& node_state = it->second;
   node_state.input_properties =
-      graph_properties_.GetInputProperties(node->name());
+      graph_properties_->GetInputProperties(node->name());
   node_state.output_properties =
-      graph_properties_.GetOutputProperties(node->name());
+      graph_properties_->GetOutputProperties(node->name());
 
   // Some ops may need further processing to the input / output properties:
   // _Send and _Recv.
@@ -982,10 +1016,11 @@ Costs VirtualScheduler::Summary() const {
 }
 
 Costs VirtualScheduler::Summary(RunMetadata* metadata) {
-  if (!metadata) {
-    return Summary();
-  }
+  if (metadata) GenerateRunMetadata(metadata);
+  return Summary();
+}
 
+void VirtualScheduler::GenerateRunMetadata(RunMetadata* metadata) {
   // Fill RunMetadata's step_stats and partition_graphs fields.
   StepStats* stepstats = metadata->mutable_step_stats();
   for (const auto& device : device_) {
@@ -1034,8 +1069,6 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
       *device_partition_graph->add_node() = *node_def;
     }
   }
-
-  return Summary();
 }
 
 const std::unordered_map<string, int64> VirtualScheduler::GetPeakMemoryUsage()
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index bead84af29..59ab0a67a8 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -252,13 +252,27 @@ class CompositeNodeManager : public ReadyNodeManager {
 // dependencies, device, etc.
 class VirtualScheduler {
  public:
+  // TODO(pcma): Modify power_analyzer.cc to use new API's.
+  // DEPRECATED
   VirtualScheduler(const GrapplerItem* grappler_item,
                    const bool use_static_shapes, Cluster* cluster,
                    ReadyNodeManager* ready_nodes);
-  // Initializes NodeState and DeviceState from grappler_item_ and
-  // graph_properties_.
+  // DEPRECATED
   Status Init();
 
+  // Does not take ownership of cluster or ready_nodes.
+  VirtualScheduler(bool use_static_shapes, Cluster* cluster,
+                   ReadyNodeManager* ready_nodes);
+  // Initializes the scheduler for the specific grappler item.
+  // Should be called immediately after the c'tor or when the scheduler will be
+  // reused for a new grappler item. All internal states of the scheduler
+  // related to the previous grappler item will be reset/cleared.
+  //
+  // This function should be called at least once after the scheduler is
+  // constructed. An uninitialized or failed-to-initialize scheduler will cause
+  // undefined behavior.
+  Status Init(const GrapplerItem* item);
+
   OpContext GetCurrNode() const;
 
   // Returns true if there is any node to be scheduled.
@@ -269,6 +283,10 @@ class VirtualScheduler {
   // Like the above, but writes detailed stats to RunMetadata.
   // If metadata is nullptr, then just calls and return Summary().
   Costs Summary(RunMetadata* metadata);
+  // Generate RunMetadata's step_stats and partition_graphs fields from results
+  // of the virtual execution of the graph.
+  void GenerateRunMetadata(RunMetadata* metadata);
+
   // Methods called from constructor.
   static ReadyNodeManager* ReadyNodeManagerFactory(
       const string& ready_node_manager);
@@ -326,8 +344,8 @@ class VirtualScheduler {
   std::map<string, Costs> op_to_cost_;  // Per-op cost.
 
   // Auxiliary data structures for constructing NodeState and DeviceState.
-  GraphProperties graph_properties_;
-  Cluster* cluster_;  // Not owned.
+  std::unique_ptr<GraphProperties> graph_properties_;  // Initialized in Init().
+  Cluster* cluster_;                                   // Not owned.
 
   const GrapplerItem* grappler_item_;  // Not owned.
   bool use_static_shapes_;
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 99272dd7e9..0a695458e1 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -30,10 +30,8 @@ namespace grappler {
 // Class for testing virtual scheduler.
 class TestVirtualScheduler : public VirtualScheduler {
  public:
-  TestVirtualScheduler(const GrapplerItem* grappler_item,
-                       const bool use_static_shapes, Cluster* cluster)
-      : VirtualScheduler(grappler_item, use_static_shapes, cluster,
-                         &ready_node_manager_) {}
+  TestVirtualScheduler(const bool use_static_shapes, Cluster* cluster)
+      : VirtualScheduler(use_static_shapes, cluster, &ready_node_manager_) {}
 
   FRIEND_TEST(VirtualSchedulerTest, MemoryUsage);
   FRIEND_TEST(VirtualSchedulerTest, ControlDependency);
@@ -47,6 +45,30 @@ class TestVirtualScheduler : public VirtualScheduler {
 
 class VirtualSchedulerTest : public ::testing::Test {
  protected:
+  VirtualSchedulerTest() {
+    // node1_ to node6_ on kCPU0, with time_ready in reverse_order.
+    NodeSetUp("Node1", kConv2D, kCPU0, 6000, &node1_);
+    NodeSetUp("Node2", kConv2D, kCPU0, 5000, &node2_);
+    NodeSetUp("Node3", kConv2D, kCPU0, 4000, &node3_);
+    NodeSetUp("Node4", kConv2D, kCPU0, 3000, &node4_);
+    NodeSetUp("Node5", kConv2D, kCPU0, 2000, &node5_);
+    NodeSetUp("Node6", kConv2D, kCPU0, 1000, &node6_);
+
+    // Initializes cluster_ and scheduler_.
+    std::unordered_map<string, DeviceProperties> devices;
+
+    // Set some dummy CPU properties
+    DeviceProperties cpu_device = GetDummyCPUDevice();
+
+    // IMPORTANT: Device is not actually ever used in the test case since
+    // force_cpu_type is defaulted to "Haswell"
+    devices[kCPU0] = cpu_device;
+    devices[kCPU1] = cpu_device;
+    cluster_ = absl::make_unique<VirtualCluster>(devices);
+    scheduler_ = absl::make_unique<TestVirtualScheduler>(
+        /* use_static_shapes = */ true, cluster_.get());
+  }
+
   NodeDef node1_, node2_, node3_, node4_, node5_, node6_;
   std::unordered_map<const NodeDef*, NodeState> node_states_;
 
@@ -84,29 +106,6 @@ class VirtualSchedulerTest : public ::testing::Test {
     node_states_[node].device_name = device_name;
   }
 
-  void SetUp() override {
-    // node1_ to node6_ on kCPU0, with time_ready in reverse_order.
-    NodeSetUp("Node1", kConv2D, kCPU0, 6000, &node1_);
-    NodeSetUp("Node2", kConv2D, kCPU0, 5000, &node2_);
-    NodeSetUp("Node3", kConv2D, kCPU0, 4000, &node3_);
-    NodeSetUp("Node4", kConv2D, kCPU0, 3000, &node4_);
-    NodeSetUp("Node5", kConv2D, kCPU0, 2000, &node5_);
-    NodeSetUp("Node6", kConv2D, kCPU0, 1000, &node6_);
-
-    // Initializes cluster_ and placer_.
-    std::unordered_map<string, DeviceProperties> devices;
-
-    // Set some dummy CPU properties
-    DeviceProperties cpu_device = GetDummyCPUDevice();
-
-    // IMPORTANT: Device is not actually ever used in the test case since
-    // force_cpu_type is defaulted to "Haswell"
-    devices[kCPU0] = cpu_device;
-    devices[kCPU1] = cpu_device;
-    cluster_.reset(new VirtualCluster(devices));
-    placer_.reset(new VirtualPlacer(cluster_.get()));
-  }
-
   // Three Conv2Ds with only two in fetch nodes.
   void CreateGrapplerItemWithConv2Ds() {
     Scope s = Scope::NewRootScope().WithDevice(kCPU0);
@@ -919,11 +918,7 @@ versions {
   }
 
   // Call this after creating grappler_item_ and setting up dependency_.
-  void InitScheduler() {
-    scheduler_.reset(new TestVirtualScheduler(
-        grappler_item_.get(), true /* use_static_shapes */, cluster_.get()));
-    TF_CHECK_OK(scheduler_->Init());
-  }
+  void InitScheduler() { TF_ASSERT_OK(scheduler_->Init(grappler_item_.get())); }
 
   // Returns cost based on op.
   Costs SimplePredictCosts(const OpContext& op_context) const {
@@ -1035,14 +1030,12 @@ versions {
     }
   }
 
-  // SetUp() inits cluster_ and placer_.
+  // cluster_ and scheduler_ are initialized in the c'tor.
   std::unique_ptr<VirtualCluster> cluster_;
-  std::unique_ptr<VirtualPlacer> placer_;
+  std::unique_ptr<TestVirtualScheduler> scheduler_;
 
-  // grappler_item_ and scheduler_ will be initialized differently for each test
-  // case.
+  // grappler_item_ will be initialized differently for each test case.
   std::unique_ptr<GrapplerItem> grappler_item_;
-  std::unique_ptr<TestVirtualScheduler> scheduler_;
   // Node name -> its preceding nodes map for testing scheduling order.
   std::unordered_map<string, std::vector<string>> dependency_;
 
@@ -2077,5 +2070,6 @@ TEST_F(VirtualSchedulerTest, GraphWihtOnlyRecv) {
   // Recv without Send will be treated as initially ready node.
   EXPECT_GT(ops_executed.count("Recv"), 0);
 }
+
 }  // end namespace grappler
 }  // end namespace tensorflow
-- 
GitLab


From e60168f8b8cfd86c56f98fd2ca3cb29b679dfd38 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 12 Oct 2018 14:49:16 -0700
Subject: [PATCH 0907/1085] Blacklist `HashTableV2` for Flex in Toco converter.

PiperOrigin-RevId: 216925873
---
 tensorflow/contrib/lite/toco/tflite/export.cc    | 16 +++++++++++++++-
 .../contrib/lite/toco/tflite/export_test.cc      | 15 +++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index 3b34cd6285..c23043789c 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -63,6 +63,20 @@ bool IsControlFlowOp(const string& tensorflow_op) {
   return false;
 }
 
+// Check if a TensorFlow Op is unsupportred by the Flex runtime.
+bool IsUnsupportedFlexOp(const string& tensorflow_op) {
+  if (IsControlFlowOp(tensorflow_op)) {
+    return true;
+  }
+  // `HashTableV2` isn't supported for now since it requires an additinonal
+  // initialization step.
+  // TODO(b/117651199): Support `HashTableV2` with Flex runtime.
+  if (tensorflow_op == "HashTableV2") {
+    return true;
+  }
+  return false;
+}
+
 // Map from operator name to TF Lite enum value, for all builtins.
 const std::map<string, BuiltinOperator>& GetBuiltinOpsMap() {
   static std::map<string, BuiltinOperator>* builtin_ops = nullptr;
@@ -150,7 +164,7 @@ OperatorKey GetOperatorKey(
   }
 
   if (key.is_flex_op) {
-    if (IsControlFlowOp(key.flex_tensorflow_op)) {
+    if (IsUnsupportedFlexOp(key.flex_tensorflow_op)) {
       key.is_unsupported_flex_op = true;
     }
   }
diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc
index eda1aa78a3..13e3ba6d5f 100644
--- a/tensorflow/contrib/lite/toco/tflite/export_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc
@@ -383,6 +383,21 @@ TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
   EXPECT_TRUE(key.is_unsupported_flex_op);
 }
 
+TEST(OperatorKeyTest, TestFlexWithUnsupportedOp) {
+  auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
+  op->tensorflow_op = "HashTableV2";
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+  const auto key = details::GetOperatorKey(*op, ops_by_type, true);
+
+  EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM);
+  EXPECT_EQ(key.custom_code, "FlexHashTableV2");
+  EXPECT_EQ(key.version, 1);
+  EXPECT_TRUE(key.is_flex_op);
+  // The control flow ops should be marked as unsupported.
+  EXPECT_TRUE(key.is_unsupported_flex_op);
+}
+
 TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
   // Test Toco-supported/TFLite-unsupported operators.
   // TODO(ycling): The test will be broken if Range is implemented in TFLite.
-- 
GitLab


From 13857215d436d36d9231cbdc41e7dd96584e7ae4 Mon Sep 17 00:00:00 2001
From: Anna Revinskaya <annarev@google.com>
Date: Fri, 12 Oct 2018 14:55:28 -0700
Subject: [PATCH 0908/1085] Fix lint error

---
 tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index d4d521c469..c74fca49f8 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -23,7 +23,6 @@ from __future__ import print_function
 import collections
 import math
 import time
-import unittest
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
-- 
GitLab


From 43c31f7a02b44dc24e1d8b220f937dd1a34af5fe Mon Sep 17 00:00:00 2001
From: Jonathan Shen <jonathanasdf@google.com>
Date: Fri, 12 Oct 2018 14:49:31 -0700
Subject: [PATCH 0909/1085] Register Assert op for GPU.

PiperOrigin-RevId: 216925919
---
 tensorflow/core/kernels/logging_ops.cc         |  8 ++++++++
 tensorflow/python/framework/test_util_test.py  |  1 +
 tensorflow/python/ops/control_flow_ops_test.py | 13 +++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index 1ded012f3c..cddfa359fc 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -52,6 +52,14 @@ class AssertOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(Name("Assert").Device(DEVICE_CPU), AssertOp);
 
+#if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("Assert")
+                            .Device(DEVICE_GPU)
+                            .HostMemory("condition")
+                            .HostMemory("data"),
+                        AssertOp);
+#endif  // GOOGLE_CUDA
+
 class PrintOp : public OpKernel {
  public:
   explicit PrintOp(OpKernelConstruction* ctx)
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index 22189afa59..79b1979925 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -361,6 +361,7 @@ class TestUtilTest(test_util.TensorFlowTestCase):
     b = [1, 2]
     self.assertArrayNear(a, b, 0.001)
 
+  @test_util.skip_if(True)  # b/117665998
   def testForceGPU(self):
     with self.assertRaises(errors.InvalidArgumentError):
       with self.test_session(force_gpu=True):
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 2c42176158..f6d6376a39 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -981,5 +981,18 @@ class WhileLoopTestCase(test_util.TensorFlowTestCase):
     self.assertEqual(self.evaluate(r), [10, []])
 
 
+class AssertTest(test_util.TensorFlowTestCase):
+
+  def testAssert(self):
+    i = constant_op.constant(0)
+    c = control_flow_ops.Assert(i < 10, [i, [10], [i + 1]])
+    self.evaluate(c)
+
+    i = constant_op.constant(10)
+    c = control_flow_ops.Assert(i < 10, [i, [10], [i + 1]])
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(c)
+
+
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From d1757bde2551873810149693f5e77620559fbb26 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 14:54:52 -0700
Subject: [PATCH 0910/1085] Internal change.

PiperOrigin-RevId: 216926812
---
 tensorflow/contrib/lite/c/c_api_internal.c    |  9 +++++--
 tensorflow/contrib/lite/c/c_api_internal.h    |  5 +++-
 .../kernels/bidirectional_sequence_lstm.cc    | 24 ++++++++++---------
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/lite/c/c_api_internal.c b/tensorflow/contrib/lite/c/c_api_internal.c
index 8a0c177b19..8be37945ca 100644
--- a/tensorflow/contrib/lite/c/c_api_internal.c
+++ b/tensorflow/contrib/lite/c/c_api_internal.c
@@ -28,10 +28,15 @@ int TfLiteIntArrayGetSizeInBytes(int size) {
 int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b) {
   if (a == b) return 1;
   if (a == NULL || b == NULL) return 0;
-  if (a->size != b->size) return 0;
+  return TfLiteIntArrayEqualsArray(a, b->size, b->data);
+}
+
+int TfLiteIntArrayEqualsArray(TfLiteIntArray* a, int b_size, int b_data[]) {
+  if (a == NULL) return (b_size == 0);
+  if (a->size != b_size) return 0;
   int i = 0;
   for (; i < a->size; i++)
-    if (a->data[i] != b->data[i]) return 0;
+    if (a->data[i] != b_data[i]) return 0;
   return 1;
 }
 
diff --git a/tensorflow/contrib/lite/c/c_api_internal.h b/tensorflow/contrib/lite/c/c_api_internal.h
index ee3dff6792..fdc9ff634a 100644
--- a/tensorflow/contrib/lite/c/c_api_internal.h
+++ b/tensorflow/contrib/lite/c/c_api_internal.h
@@ -88,9 +88,12 @@ int TfLiteIntArrayGetSizeInBytes(int size);
 // This returns a pointer, that you must free using TfLiteIntArrayFree().
 TfLiteIntArray* TfLiteIntArrayCreate(int size);
 
-// Check if two tensors are equal. Returns 1 if they are equal, 0 otherwise.
+// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
 int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b);
 
+// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
+int TfLiteIntArrayEqualsArray(TfLiteIntArray* a, int b_size, int b_data[]);
+
 // Create a copy of an array passed as `src`.
 // You are expected to free memory with TfLiteIntArrayFree
 TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src);
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 0d9863ae8d..ca6b6a9a41 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -678,9 +678,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kScalingFactors);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {n_batch};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
@@ -690,10 +691,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kProductScalingFactors);
     prod_scaling_factors->type = kTfLiteFloat32;
     prod_scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
-    prod_scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(prod_scaling_factors->dims,
-                             prod_scaling_factors_size)) {
+    if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1,
+                                   scaling_dims)) {
+      TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
+      prod_scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, prod_scaling_factors,
                                               prod_scaling_factors_size));
@@ -707,10 +708,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kRecoveredCellWeights);
     recovered_cell_weights->type = kTfLiteFloat32;
     recovered_cell_weights->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
-    recovered_cell_weights_size->data[0] = n_fw_cell;
-    if (!TfLiteIntArrayEqual(recovered_cell_weights->dims,
-                             recovered_cell_weights_size)) {
+    int recovered_cell_dims[1] = {n_fw_cell};
+    if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1,
+                                   recovered_cell_dims)) {
+      TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
+      recovered_cell_weights_size->data[0] = n_fw_cell;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, recovered_cell_weights,
                                               recovered_cell_weights_size));
-- 
GitLab


From 757acceb7c421cd2a0ba96704055ab4a9dcb5ef8 Mon Sep 17 00:00:00 2001
From: Raghuraman Krishnamoorthi <raghuramank@google.com>
Date: Fri, 12 Oct 2018 15:06:37 -0700
Subject: [PATCH 0911/1085]  Enable quantization of activations after add and
 mul operations in a graph. This change allows for full quantization of
 mobilenet ssd models.

PiperOrigin-RevId: 216928860
---
 .../contrib/quantize/python/quantize.py       | 121 +++++++++++++++++-
 .../quantize/python/quantize_graph_test.py    |  92 +++++++++++++
 2 files changed, 212 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index c3b9760787..92ca3f2039 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -36,6 +36,12 @@ _ACTIVATION_TYPES = {'Relu', 'Relu6', 'Identity'}
 
 _RELU_TYPES = {'Relu', 'Relu6'}
 
+_QUANTIZATION_OP = {'FakeQuantWithMinMaxVars'}
+_VALID_SRC_OP = {'Add', 'Mul'}
+_INTERMEDIATE_OP = {'Add', 'Mul'}
+_PASS_THROUGH_OP = {'Reshape', 'Identity', 'BatchToSpaceND', 'SpaceToBatchND'}
+_VALID_ACTIVATION_OP = {'Relu', 'Relu6'}
+
 
 def Quantize(graph,
              is_training,
@@ -78,6 +84,7 @@ def Quantize(graph,
     scope += '/'
 
   input_to_ops_map = input_to_ops.InputToOps(graph)
+  quantized_ops = set()
   for layer_match in _FindLayersToQuantize(graph):
     # Quantize the weights.
     context = _GetContextFromOp(layer_match.layer_op)
@@ -125,6 +132,7 @@ def Quantize(graph,
         symmetric=symmetric,
         init_min=0.0,
         producer_scope=scope)
+    quantized_ops.add(layer_match.activation_op)
 
     # Quantize the inputs and output to the bypass (if it exists). The input to
     # the bypass is the bias add, and the output is the activation.
@@ -145,6 +153,7 @@ def Quantize(graph,
           symmetric=symmetric,
           producer_scope=scope,
           consumer_scope=scope)
+      quantized_ops.add(layer_match.bias_add_op)
       # Make sure the op following this isn't an activation. In which case, we
       # shouldn't quantize it, since the activation will be Fused into the
       # Add at inference time.
@@ -167,6 +176,7 @@ def Quantize(graph,
             symmetric=symmetric,
             producer_scope=scope,
             consumer_scope=scope)
+        quantized_ops.add(layer_match.bypass_op)
 
     # Quantize bypass ops that occur after the activation.
     if layer_match.post_activation_bypass_op is not None:
@@ -200,6 +210,115 @@ def Quantize(graph,
             bits=activation_bits,
             symmetric=symmetric,
             producer_scope=scope)
+        quantized_ops.add(layer_match.post_activation_bypass_op)
+
+  _QuantizeActivationLayers(
+      quantized_ops,
+      graph,
+      is_training,
+      activation_bits,
+      ema_decay,
+      quant_delay,
+      vars_collection,
+      scope=scope)
+
+
+def _QuantizeActivationLayers(quantized_ops,
+                              graph,
+                              is_training,
+                              activation_bits=8,
+                              ema_decay=0.999,
+                              quant_delay=None,
+                              vars_collection=ops.GraphKeys.GLOBAL_VARIABLES,
+                              scope=None):
+  """Quantize intermediate activation tensors after addition and multiplication.
+
+  Args:
+    quantized_ops: Set of previously quantized activation ops.
+    graph: Graph to modify.
+    is_training: Whether quantizing training graph or eval graph.
+    activation_bits: Number of bits to use for quantizing activations.
+    ema_decay: (Optional) Float, EMA decay parameter.  EMA is used to update
+      quantization intervals for quantizing activations (see here about EMA:
+      https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average).
+    quant_delay: (Optional, default None) Int, count of global steps for which
+      to delay quantization.  This helps weights stabilize at the start of
+      training.
+    vars_collection: (Optional) Collection where to store the variables for
+      quantization interval ends.
+    scope: The scope to be transformed. If it's not None, only the ops which are
+      in this scope will be transformed.
+
+  Raises:
+    ValueError: When quantization fails.
+  """
+  input_to_ops_map = input_to_ops.InputToOps(graph)
+  for op in (op for op in graph.get_operations()):
+    if _CheckIfQuantizableOp(op, quantized_ops):
+      logging.info('Inserting fake quant op activation_%s_quant after %s',
+                   op.type, op.name)
+      consumers = input_to_ops_map.ConsumerOperations(op)
+      _InsertQuantOp(
+          op.name,
+          'activation_' + op.type + '_quant',
+          op,
+          consumers,
+          is_training,
+          moving_avg=True,
+          ema_decay=ema_decay,
+          quant_delay=quant_delay,
+          vars_collection=vars_collection,
+          bits=activation_bits,
+          producer_scope=scope)
+
+
+def _CheckIfQuantizableOp(src_op, quantized_ops):
+  """Check if the output of an op should be quantized.
+
+  Args:
+    src_op: op to be checked
+    quantized_ops: Set of previously quantized activation ops.
+
+  Returns:
+    Boolean specifying if output should be quantized or not.
+  """
+  src_op_name = set([src_op.type])
+  if src_op in quantized_ops:
+    return False
+  if not src_op_name.intersection(_VALID_SRC_OP):
+    return False
+
+  # If src op is an add or a mul and the output is immediately
+  # followed by an activation skip
+  if len(src_op.outputs) == 1 and len(src_op.outputs[0].consumers()) == 1:
+    op_consumers = src_op.outputs[0].consumers()
+    if set([op_consumers[0].type]).intersection(_VALID_ACTIVATION_OP):
+      logging.info('Skipping quant after %s', src_op.name)
+      return False
+  # Is an Add or a Mul
+  input_ops = src_op.inputs
+
+  for op in input_ops:
+    curr_op = op.op
+    curr_op_type = set([curr_op.type])
+    while curr_op_type.intersection(_PASS_THROUGH_OP):
+      # Walk back through pass through ops
+      curr_op = curr_op.inputs[0].op
+      curr_op_type = set([curr_op.type])
+      # Now at a valid or quantizable op, need to check if
+      # atleast one of the inputs to a valid op is connected
+      # to a quantizable op via pass through ops
+
+    if (curr_op_type.intersection(_QUANTIZATION_OP) or
+        curr_op.name.find('delayed_quant/Merge') > 0):
+      return True
+
+    if curr_op_type.intersection(_INTERMEDIATE_OP):
+      # Check if atleast one input to intermediate_op are quantizable
+      for input_op in curr_op.inputs:
+        if _CheckIfQuantizableOp(input_op.op, quantized_ops):
+          return True
+  return False
 
 
 def _FindLayersToQuantize(graph):
@@ -457,7 +576,7 @@ def _IsSkipLayer(activation_op):
       if consumer.type == 'FusedBatchNorm':
         skip_layer = True
         logging.info(
-            'Skipping quantizing %s, because it is the output of a conv/fc'
+            'Skipping quantizing %s, because it is the output of a conv/fc '
             'followed by a identity, feeding a fused batch norm.',
             activation_op.name)
   return skip_layer
diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py
index 623212fc56..9aa6e2c24d 100644
--- a/tensorflow/contrib/quantize/python/quantize_graph_test.py
+++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py
@@ -295,6 +295,30 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     self._AssertOutputGoesToOps(conv_out_identity, graph,
                                 ['test/BatchNorm/FusedBatchNorm'])
 
+  def testActivationQuantization(self):
+    self._RunTestOverAllRewrites(self._TestActivationQuantization)
+
+  def _TestActivationQuantization(self, rewrite_fn):
+    graph = ops.Graph()
+    with graph.as_default():
+      _ = self._LayerWithActivationProcessing()
+
+    rewrite_fn(graph)
+    # Check if outputs of multipliers and adds are quantized.
+
+    mul_op = graph.get_operation_by_name('test/Mul')
+    self._AssertOutputGoesToOps(
+        mul_op, graph,
+        ['test/Mul/activation_Mul_quant/FakeQuantWithMinMaxVars'])
+    mul_op = graph.get_operation_by_name('test/Mul_1')
+    self._AssertOutputGoesToOps(
+        mul_op, graph,
+        ['test/Mul_1/activation_Mul_quant/FakeQuantWithMinMaxVars'])
+    add_op = graph.get_operation_by_name('test/add')
+    self._AssertOutputGoesToOps(
+        add_op, graph,
+        ['test/add/activation_Add_quant/FakeQuantWithMinMaxVars'])
+
   def testRewriteWithScope(self):
     self._RunTestOverExperimentalRewritesWithScope(
         self._TestRewriteWithScope, 'scope1')
@@ -334,6 +358,46 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
     # No ops should be inserted or removed.
     self.assertEqual(op_names_before_rewrite, op_names_after_rewrite)
 
+  def testActivationRewriteWithScope(self):
+    self._RunTestOverExperimentalRewritesWithScope(
+        self._TestActivationRewriteWithScope, 'scope1')
+
+  def _TestActivationRewriteWithScope(self, rewrite_fn):
+    graph = ops.Graph()
+    with graph.as_default():
+      output = self._LayerWithIdentity(scope='scope1')
+      with ops.name_scope('scope2'):
+        output = nn_ops.relu6(output)
+        scaled_output1 = math_ops.mul(2.0, output)
+        scaled_output2 = math_ops.mul(3.0, output)
+        output = scaled_output1 + scaled_output2
+      rewrite_fn(graph)
+
+      op_names = [op.name for op in graph.get_operations()]
+      # The weights and activation of scope1 is quantized, but not scope2.
+      self.assertTrue(any('scope1/Conv/act_quant' in name for name in op_names))
+      self.assertTrue(
+          any('scope1/Conv/weights_quant' in name for name in op_names))
+
+      for op_name in op_names:
+        if op_name.startswith('scope2'):
+          self.assertTrue('FakeQuant' not in op_name)
+
+  def testActivationRewriteWithNonMatchingScope(self):
+    self._RunTestOverExperimentalRewritesWithScope(
+        self._TestActivationRewriteWithNonMatchingScope, 'NonExistingScope')
+
+  def _TestActivationRewriteWithNonMatchingScope(self, rewrite_fn):
+    graph = ops.Graph()
+    with graph.as_default():
+      self._LayerWithActivationProcessing()
+
+    rewrite_fn(graph)
+    op_types_after_rewrite = set([op.type for op in graph.get_operations()])
+    self.assertFalse(
+        op_types_after_rewrite.intersection('FakeQuantWithMinMaxVars'))
+    # No fake quant ops should be inserted.
+
   def testWithSharedWeights(self):
 
     self._RunTestOverAllRewrites(self._TestWithSharedWeights)
@@ -420,6 +484,34 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase):
         output += input_tensor
     return output
 
+  def _LayerWithActivationProcessing(self,
+                                     input_tensor=None,
+                                     scope='test',
+                                     post_activation_bypass=False):
+
+    batch_size, height, width, depth = 5, 128, 128, 3
+    if input_tensor is None:
+      input_tensor = array_ops.zeros((batch_size, height, width, depth))
+    weight_init = init_ops.truncated_normal_initializer
+    with ops.name_scope(scope):
+      output = layers.conv2d(
+          input_tensor,
+          depth, [5, 5],
+          padding='SAME',
+          weights_initializer=weight_init(0.09),
+          activation_fn=None,
+          normalizer_fn=None,
+          biases_initializer=None)
+
+      output = layers.batch_norm(
+          output, center=True, scale=True, decay=1.0 - 0.003, fused=True)
+
+      output = nn_ops.relu6(output)
+      scaled_output1 = math_ops.mul(2.0, output)
+      scaled_output2 = math_ops.mul(3.0, output)
+      output = scaled_output1 + scaled_output2
+    return output
+
   def _AssertInputOpsAre(self, op, in_op_names):
     """Asserts that all inputs to op come from in_op_names (disregarding order).
 
-- 
GitLab


From 64628112e2557cae43b7d5f479bf5be598663712 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 15:24:44 -0700
Subject: [PATCH 0912/1085] Automated rollback of commit
 0608a3f0dbbac03aeb84966d2ea9a24a694ebbce

PiperOrigin-RevId: 216931739
---
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index 15e5ad9df5..bb56f61e30 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -80,7 +80,7 @@ class ArithmeticOptimizer : public GraphOptimizer {
     bool convert_log1p = true;
     bool convert_expm1 = true;
     bool unary_ops_composition = true;
-    bool remove_stack_strided_slice_same_axis = true;
+    bool remove_stack_strided_slice_same_axis = false;
 
     // Choose which arithmetic optimizer stages will be enabled for a given
     // optimization level by default.
-- 
GitLab


From 3437098ba5b111817ef6ac5906d86934168704b7 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 12 Oct 2018 15:26:01 -0700
Subject: [PATCH 0913/1085] Make AWS, GCP, HDFS, Kafka and Ignite default on,
 but introduce the ability to disable them

Fixes #22819

PiperOrigin-RevId: 216931905
---
 .bazelrc                                      |  7 ++
 configure.py                                  | 30 +++----
 tensorflow/BUILD                              | 32 +++++++-
 tensorflow/contrib/BUILD                      | 80 ++++++++++++++++---
 .../core/platform/default/build_config.bzl    | 80 ++++++++++++-------
 tensorflow/tools/lib_package/BUILD            | 54 +++++++++----
 tensorflow/tools/pip_package/BUILD            | 39 ++++++---
 7 files changed, 238 insertions(+), 84 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index d5d20309df..1945078789 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -66,6 +66,13 @@ build:gdr --define=with_gdr_support=true
 build:ngraph --define=with_ngraph_support=true
 build:verbs --define=with_verbs_support=true
 
+# Options to disable default on features
+build:noaws --define=no_aws_support=true
+build:nogcp --define=no_gcp_support=true
+build:nohdfs --define=no_hdfs_support=true
+build:nokafka --define=no_kafka_support=true
+build:noignite --define=no_ignite_support=true
+
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
 build --define=grpc_no_ares=true
diff --git a/configure.py b/configure.py
index e7709cc56c..5a4207476f 100644
--- a/configure.py
+++ b/configure.py
@@ -1573,8 +1573,6 @@ def main():
   if is_ppc64le():
     write_action_env_to_bazelrc('OMP_NUM_THREADS', 1)
 
-  set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite',
-                'with_ignite_support', True, 'ignite')
   xla_enabled_by_default = is_linux()
   set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
                 xla_enabled_by_default, 'xla')
@@ -1669,19 +1667,23 @@ def main():
     create_android_ndk_rule(environ_cp)
     create_android_sdk_rule(environ_cp)
 
-  # On Windows, we don't have MKL support and the build is always monolithic.
-  # So no need to print the following message.
-  # TODO(pcloudy): remove the following if check when they make sense on Windows
-  if not is_windows():
-    print('Preconfigured Bazel build configs. You can use any of the below by '
-          'adding "--config=<>" to your build command. See .bazelrc for more '
-          'details.')
-    config_info_line('mkl', 'Build with MKL support.')
-    config_info_line('monolithic', 'Config for mostly static monolithic build.')
-    config_info_line('gdr', 'Build with GDR support.')
-    config_info_line('verbs', 'Build with libverbs support.')
-    config_info_line('ngraph', 'Build with Intel nGraph support.')
+  print('Preconfigured Bazel build configs. You can use any of the below by '
+        'adding "--config=<>" to your build command. See .bazelrc for more '
+        'details.')
+  config_info_line('mkl', 'Build with MKL support.')
+  config_info_line('monolithic', 'Config for mostly static monolithic build.')
+  config_info_line('gdr', 'Build with GDR support.')
+  config_info_line('verbs', 'Build with libverbs support.')
+  config_info_line('ngraph', 'Build with Intel nGraph support.')
+
+  print('Preconfigured Bazel build configs to DISABLE default on features:')
+  config_info_line('noaws', 'Disable AWS S3 filesystem support.')
+  config_info_line('nogcp', 'Disable GCP support.')
+  config_info_line('nohdfs', 'Disable HDFS support.')
+  config_info_line('noignite', 'Disable Apacha Ignite support.')
+  config_info_line('nokafka', 'Disable Apache Kafka support.')
 
 
 if __name__ == '__main__':
   main()
+
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 9b62a50452..77e3baaff1 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -209,12 +209,40 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+# Features that are default ON are handled differently below.
+#
+config_setting(
+    name = "no_aws_support",
+    define_values = {"no_aws_support": "false"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "no_gcp_support",
+    define_values = {"no_gcp_support": "false"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "no_hdfs_support",
+    define_values = {"no_hdfs_support": "false"},
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "no_ignite_support",
+    define_values = {"no_ignite_support": "false"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
-    name = "with_ignite_support",
-    define_values = {"with_ignite_support": "true"},
+    name = "no_kafka_support",
+    define_values = {"no_kafka_support": "false"},
     visibility = ["//visibility:public"],
 )
 
+# Crosses between platforms and file system libraries not supported on those
+# platforms due to limitations in nested select() statements.
 config_setting(
     name = "with_cuda_support_windows_override",
     define_values = {"using_cuda_nvcc": "true"},
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index fa06d351d4..78ad19a4ab 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -113,22 +113,52 @@ py_library(
         "//tensorflow/python:util",
         "//tensorflow/python/estimator:estimator_py",
     ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
         "//tensorflow:windows": [],
+        "//tensorflow:no_kafka_support": [],
         "//conditions:default": [
-            "//tensorflow/contrib/bigtable",
-            "//tensorflow/contrib/cloud:cloud_py",
-            "//tensorflow/contrib/fused_conv:fused_conv_py",  # unresolved symbols, need to export more symbols
             "//tensorflow/contrib/kafka",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
             "//tensorflow/contrib/kinesis",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//conditions:default": [
+            "//tensorflow/contrib/fused_conv:fused_conv_py",
             "//tensorflow/contrib/tensorrt:init_py",
             "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
         ],
     }) + select({
-        "//tensorflow:with_ignite_support": [
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
+        "//conditions:default": [
+            "//tensorflow/contrib/bigtable",
+            "//tensorflow/contrib/cloud:cloud_py",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_ignite_support": [],
+        "//conditions:default": [
             "//tensorflow/contrib/ignite",
         ],
-        "//conditions:default": [],
     }),
 )
 
@@ -152,14 +182,26 @@ cc_library(
     ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([
         "//tensorflow/contrib/nccl:nccl_kernels",
     ]) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
         "//tensorflow:windows": [],
+        "//tensorflow:no_kafka_support": [],
         "//conditions:default": [
             "//tensorflow/contrib/kafka:dataset_kernels",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
             "//tensorflow/contrib/kinesis:dataset_kernels",
-            "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
         ],
-    }),
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
+    ]),
 )
 
 cc_library(
@@ -183,17 +225,33 @@ cc_library(
         "//tensorflow/contrib/text:all_ops",
         "//tensorflow/contrib/tpu:all_ops",
     ] + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
         "//tensorflow:windows": [],
+        "//tensorflow:no_kafka_support": [],
         "//conditions:default": [
             "//tensorflow/contrib/kafka:dataset_ops_op_lib",
-            "//tensorflow/contrib/kinesis:dataset_ops_op_lib",
-            "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
         ],
     }) + select({
-        "//tensorflow:with_ignite_support": [
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
+            "//tensorflow/contrib/kinesis:dataset_ops_op_lib",
+        ],
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
+    ]) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_ignite_support": [],
+        "//conditions:default": [
             "//tensorflow/contrib/ignite:dataset_ops_op_lib",
         ],
-        "//conditions:default": [],
     }),
 )
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index d884c1aa7c..927dbbc5b3 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -166,10 +166,10 @@ def cc_proto_library(
         proto_gen(
             name = name + "_genproto",
             srcs = srcs,
-            deps = [s + "_genproto" for s in deps],
             includes = includes,
             protoc = protoc,
             visibility = ["//visibility:public"],
+            deps = [s + "_genproto" for s in deps],
         )
 
         # An empty cc_library to make rule dependency consistent.
@@ -193,15 +193,15 @@ def cc_proto_library(
     proto_gen(
         name = name + "_genproto",
         srcs = srcs,
-        deps = [s + "_genproto" for s in deps],
+        outs = outs,
+        gen_cc = 1,
         includes = includes,
-        protoc = protoc,
         plugin = grpc_cpp_plugin,
         plugin_language = "grpc",
         plugin_options = plugin_options,
-        gen_cc = 1,
-        outs = outs,
+        protoc = protoc,
         visibility = ["//visibility:public"],
+        deps = [s + "_genproto" for s in deps],
     )
 
     if use_grpc_plugin:
@@ -286,14 +286,14 @@ def py_proto_library(
     proto_gen(
         name = name + "_genproto",
         srcs = srcs,
-        deps = [s + "_genproto" for s in deps],
-        includes = includes,
-        protoc = protoc,
-        gen_py = 1,
         outs = outs,
-        visibility = ["//visibility:public"],
+        gen_py = 1,
+        includes = includes,
         plugin = grpc_python_plugin,
         plugin_language = "grpc",
+        protoc = protoc,
+        visibility = ["//visibility:public"],
+        deps = [s + "_genproto" for s in deps],
     )
 
     if default_runtime and not default_runtime in py_libs + deps:
@@ -345,14 +345,13 @@ def tf_proto_library_cc(
         # libraries containing all the sources.
         proto_gen(
             name = cc_name + "_genproto",
-            deps = [s + "_genproto" for s in cc_deps],
             protoc = "@protobuf_archive//:protoc",
             visibility = ["//visibility:public"],
+            deps = [s + "_genproto" for s in cc_deps],
         )
         native.cc_library(
             name = cc_name,
-            deps = cc_deps + ["@protobuf_archive//:protobuf_headers"] +
-                   if_static([name + "_cc_impl"]),
+            deps = cc_deps + ["@protobuf_archive//:protobuf_headers"] + if_static([name + "_cc_impl"]),
             testonly = testonly,
             visibility = visibility,
         )
@@ -365,8 +364,8 @@ def tf_proto_library_cc(
 
     cc_proto_library(
         name = cc_name,
+        testonly = testonly,
         srcs = srcs,
-        deps = cc_deps + ["@protobuf_archive//:cc_wkt_protos"],
         cc_libs = cc_libs + if_static(
             ["@protobuf_archive//:protobuf"],
             ["@protobuf_archive//:protobuf_headers"],
@@ -376,11 +375,11 @@ def tf_proto_library_cc(
             "-Wno-unused-but-set-variable",
             "-Wno-sign-compare",
         ]),
+        default_header = default_header,
         protoc = "@protobuf_archive//:protoc",
         use_grpc_plugin = use_grpc_plugin,
-        testonly = testonly,
         visibility = visibility,
-        default_header = default_header,
+        deps = cc_deps + ["@protobuf_archive//:cc_wkt_protos"],
     )
 
 def tf_proto_library_py(
@@ -399,9 +398,9 @@ def tf_proto_library_py(
         # libraries containing all the sources.
         proto_gen(
             name = py_name + "_genproto",
-            deps = [s + "_genproto" for s in py_deps],
             protoc = "@protobuf_archive//:protoc",
             visibility = ["//visibility:public"],
+            deps = [s + "_genproto" for s in py_deps],
         )
         native.py_library(
             name = py_name,
@@ -413,14 +412,14 @@ def tf_proto_library_py(
 
     py_proto_library(
         name = py_name,
+        testonly = testonly,
         srcs = srcs,
-        srcs_version = srcs_version,
-        deps = deps + py_deps + ["@protobuf_archive//:protobuf_python"],
-        protoc = "@protobuf_archive//:protoc",
         default_runtime = "@protobuf_archive//:protobuf_python",
-        visibility = visibility,
-        testonly = testonly,
+        protoc = "@protobuf_archive//:protoc",
+        srcs_version = srcs_version,
         use_grpc_plugin = use_grpc_plugin,
+        visibility = visibility,
+        deps = deps + py_deps + ["@protobuf_archive//:protobuf_python"],
     )
 
 def tf_jspb_proto_library(**kwargs):
@@ -452,23 +451,23 @@ def tf_proto_library(
 
     tf_proto_library_cc(
         name = name,
+        testonly = testonly,
         srcs = srcs,
-        protodeps = protodeps,
         cc_grpc_version = cc_grpc_version,
         cc_libs = cc_libs,
-        testonly = testonly,
-        visibility = visibility,
         default_header = default_header,
+        protodeps = protodeps,
+        visibility = visibility,
     )
 
     tf_proto_library_py(
         name = name,
+        testonly = testonly,
         srcs = srcs,
         protodeps = protodeps,
         srcs_version = "PY2AND3",
-        testonly = testonly,
-        visibility = visibility,
         use_grpc_plugin = has_services,
+        visibility = visibility,
     )
 
 # A list of all files under platform matching the pattern in 'files'. In
@@ -632,23 +631,41 @@ def tf_additional_lib_deps():
 def tf_additional_core_deps():
     return select({
         "//tensorflow:android": [],
-        "//tensorflow:windows": [],
         "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
         "//conditions:default": [
             "//tensorflow/core/platform/cloud:gcs_file_system",
-            "//tensorflow/core/platform/s3:s3_file_system",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_hdfs_support": [],
+        "//conditions:default": [
             "//tensorflow/core/platform/hadoop:hadoop_file_system",
         ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
+            "//tensorflow/core/platform/s3:s3_file_system",
+        ],
     })
 
 # TODO(jart, jhseu): Delete when GCP is default on.
 def tf_additional_cloud_op_deps():
     return select({
         "//tensorflow:android": [],
-        "//tensorflow:windows": [],
         "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
         "//conditions:default": [
             "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib",
             "//tensorflow/contrib/cloud:gcs_config_ops_op_lib",
@@ -659,9 +676,10 @@ def tf_additional_cloud_op_deps():
 def tf_additional_cloud_kernel_deps():
     return select({
         "//tensorflow:android": [],
-        "//tensorflow:windows": [],
         "//tensorflow:ios": [],
         "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
         "//conditions:default": [
             "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops",
             "//tensorflow/contrib/cloud/kernels:gcs_config_ops",
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 85514b8629..aff26bf0fb 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -137,6 +137,24 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
+        "//conditions:default": [
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+        ],
+    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
@@ -153,14 +171,7 @@ genrule(
             "@grpc//third_party/nanopb:LICENSE.txt",
             "@grpc//third_party/address_sorting:LICENSE",
         ],
-    ) + select({
-        "//tensorflow:linux_s390x": [],
-        "//tensorflow:windows": [],
-        "//conditions:default": [
-            "@aws//:LICENSE",
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-        ],
-    }),
+    ),
     outs = ["include/tensorflow/c/LICENSE"],
     cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
     tools = [":concat_licenses.sh"],
@@ -194,6 +205,24 @@ genrule(
         "@snappy//:COPYING",
         "@zlib_archive//:zlib.h",
     ] + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
+        "//conditions:default": [
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+        ],
+    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
@@ -203,14 +232,7 @@ genrule(
     ]) + if_mkl([
         "//third_party/mkl:LICENSE",
         "//third_party/mkl_dnn:LICENSE",
-    ]) + select({
-        "//tensorflow:linux_s390x": [],
-        "//tensorflow:windows": [],
-        "//conditions:default": [
-            "@aws//:LICENSE",
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-        ],
-    }),
+    ]),
     outs = ["include/tensorflow/jni/LICENSE"],
     cmd = "$(location :concat_licenses.sh) $(SRCS) >$@",
     tools = [":concat_licenses.sh"],
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 164b3d8303..970dd49e11 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -167,6 +167,34 @@ filegroup(
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_aws_support": [],
+        "//conditions:default": [
+            "@aws//:LICENSE",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_gcp_support": [],
+        "//conditions:default": [
+            "@com_github_googleapis_googleapis//:LICENSE",
+            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
+        ],
+    }) + select({
+        "//tensorflow:android": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_s390x": [],
+        "//tensorflow:windows": [],
+        "//tensorflow:no_kafka_support": [],
+        "//conditions:default": [
+            "@kafka//:LICENSE",
+        ],
+    }) + select({
         "//tensorflow/core/kernels:xsmm": [
             "@libxsmm_archive//:LICENSE.md",
         ],
@@ -189,16 +217,7 @@ filegroup(
         "@ngraph_tf//:LICENSE",
         "@nlohmann_json_lib//:LICENSE.MIT",
         "@tbb//:LICENSE",
-    ]) + tf_additional_license_deps() + select({
-        "//tensorflow:linux_s390x": [],
-        "//tensorflow:windows": [],
-        "//conditions:default": [
-            "@aws//:LICENSE",
-            "@com_github_googleapis_googleapis//:LICENSE",
-            "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE",
-            "@kafka//:LICENSE",
-        ],
-    }),
+    ]) + tf_additional_license_deps(),
 )
 
 sh_binary(
-- 
GitLab


From a5d494395ec9a4a1e4ea6a30e06734b303adb864 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 15:32:29 -0700
Subject: [PATCH 0914/1085] Performance improvement for distillation of DNNs to
 trees. * Move the DNN network to the training-only graph so it is not in the
 eval if we're doing distillation. This makes the inference much faster
 because we no longer run the DNN. * Simplify use_core_version logic. Now we
 always return model_fn for contrib and estimator_spec for core.

PiperOrigin-RevId: 216932818
---
 .../dnn_tree_combined_estimator.py            | 163 +++++++++---------
 .../dnn_tree_combined_estimator_test.py       |  79 +++++----
 2 files changed, 128 insertions(+), 114 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
index 194a5c8754..ca73e4af2f 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py
@@ -28,7 +28,6 @@ import six
 from tensorflow.contrib import layers
 from tensorflow.contrib.boosted_trees.estimator_batch import model
 from tensorflow.contrib.boosted_trees.estimator_batch import distillation_loss
-from tensorflow.contrib.boosted_trees.estimator_batch import estimator_utils
 from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks
 from tensorflow.contrib.boosted_trees.python.ops import model_ops
 from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch
@@ -170,6 +169,7 @@ def _dnn_tree_combined_model_fn(
   if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and
       not use_core_versions):
     raise ValueError("You must use core versions with Estimator Spec")
+  global_step = training_util.get_global_step()
 
   with variable_scope.variable_scope(
       dnn_parent_scope,
@@ -191,46 +191,58 @@ def _dnn_tree_combined_model_fn(
             feature_columns=dnn_feature_columns,
             weight_collections=[dnn_parent_scope],
             scope=input_layer_scope)
-    previous_layer = input_layer
-    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
+    def dnn_logits_fn():
+      """Builds the logits from the input layer."""
+      previous_layer = input_layer
+      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
+        with variable_scope.variable_scope(
+            "hiddenlayer_%d" % layer_id,
+            values=(previous_layer,)) as hidden_layer_scope:
+          net = layers.fully_connected(
+              previous_layer,
+              num_hidden_units,
+              activation_fn=dnn_activation_fn,
+              variables_collections=[dnn_parent_scope],
+              scope=hidden_layer_scope)
+          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
+            net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
+        _add_hidden_layer_summary(net, hidden_layer_scope.name)
+        previous_layer = net
       with variable_scope.variable_scope(
-          "hiddenlayer_%d" % layer_id,
-          values=(previous_layer,)) as hidden_layer_scope:
-        net = layers.fully_connected(
+          "logits", values=(previous_layer,)) as logits_scope:
+        dnn_logits = layers.fully_connected(
             previous_layer,
-            num_hidden_units,
-            activation_fn=dnn_activation_fn,
+            head.logits_dimension,
+            activation_fn=None,
             variables_collections=[dnn_parent_scope],
-            scope=hidden_layer_scope)
-        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
-          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
-      _add_hidden_layer_summary(net, hidden_layer_scope.name)
-      previous_layer = net
-    with variable_scope.variable_scope(
-        "logits", values=(previous_layer,)) as logits_scope:
-      dnn_logits = layers.fully_connected(
-          previous_layer,
-          head.logits_dimension,
-          activation_fn=None,
-          variables_collections=[dnn_parent_scope],
-          scope=logits_scope)
-    _add_hidden_layer_summary(dnn_logits, logits_scope.name)
-
-    def _dnn_train_op_fn(loss):
-      """Returns the op to optimize the loss."""
-      return optimizers.optimize_loss(
-          loss=loss,
-          global_step=training_util.get_global_step(),
-          learning_rate=_DNN_LEARNING_RATE,
-          optimizer=_get_optimizer(dnn_optimizer),
-          name=dnn_parent_scope,
-          variables=ops.get_collection(
-              ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
-          # Empty summaries to prevent optimizers from logging training_loss.
-          summaries=[])
+            scope=logits_scope)
+      _add_hidden_layer_summary(dnn_logits, logits_scope.name)
+      return dnn_logits
+    if predict_with_tree_only and mode == model_fn.ModeKeys.INFER:
+      dnn_logits = array_ops.constant(0.0)
+      dnn_train_op_fn = control_flow_ops.no_op
+    elif predict_with_tree_only and mode == model_fn.ModeKeys.EVAL:
+      dnn_logits = control_flow_ops.cond(
+          global_step > dnn_steps_to_train,
+          lambda: array_ops.constant(0.0),
+          dnn_logits_fn)
+      dnn_train_op_fn = control_flow_ops.no_op
+    else:
+      dnn_logits = dnn_logits_fn()
+      def dnn_train_op_fn(loss):
+        """Returns the op to optimize the loss."""
+        return optimizers.optimize_loss(
+            loss=loss,
+            global_step=training_util.get_global_step(),
+            learning_rate=_DNN_LEARNING_RATE,
+            optimizer=_get_optimizer(dnn_optimizer),
+            name=dnn_parent_scope,
+            variables=ops.get_collection(
+                ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
+            # Empty summaries to prevent optimizers from logging training_loss.
+            summaries=[])
 
   # Build Tree Logits.
-  global_step = training_util.get_global_step()
   with ops.device(global_step.device):
     ensemble_handle = model_ops.tree_ensemble_variable(
         stamp_token=0,
@@ -261,8 +273,13 @@ def _dnn_tree_combined_model_fn(
       """Returns the op to optimize the loss."""
       if dnn_to_tree_distillation_param:
         loss_weight, loss_fn = dnn_to_tree_distillation_param
-        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
-            features, head.weight_column_name)
+        # pylint: disable=protected-access
+        if use_core_versions:
+          weight_tensor = head_lib._weight_tensor(features, head._weight_column)
+        else:
+          weight_tensor = head_lib._weight_tensor(
+              features, head.weight_column_name)
+        # pylint: enable=protected-access
         dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)
 
         if loss_fn is None:
@@ -305,52 +322,26 @@ def _dnn_tree_combined_model_fn(
   finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()
 
   if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
-    if use_core_versions:
-      model_fn_ops = head.create_estimator_spec(
-          features=features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_no_train_op_fn,
-          logits=tree_train_logits)
-      dnn_train_op = head.create_estimator_spec(
-          features=features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_dnn_train_op_fn,
-          logits=dnn_logits)
-      dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
-          dnn_train_op).train_op
-
-      tree_train_op = head.create_estimator_spec(
-          features=tree_features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_tree_train_op_fn,
-          logits=tree_train_logits)
-      tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
-          tree_train_op).train_op
-
-      model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(
-          model_fn_ops)
-    else:
-      model_fn_ops = head.create_model_fn_ops(
-          features=features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_no_train_op_fn,
-          logits=tree_train_logits)
-      dnn_train_op = head.create_model_fn_ops(
-          features=features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_dnn_train_op_fn,
-          logits=dnn_logits).train_op
-      tree_train_op = head.create_model_fn_ops(
-          features=tree_features,
-          mode=mode,
-          labels=labels,
-          train_op_fn=_tree_train_op_fn,
-          logits=tree_train_logits).train_op
+    model_fn_ops = head.create_model_fn_ops(
+        features=features,
+        mode=mode,
+        labels=labels,
+        train_op_fn=_no_train_op_fn,
+        logits=tree_train_logits)
+    if mode != model_fn.ModeKeys.TRAIN:
+      return model_fn_ops
+    dnn_train_op = head.create_model_fn_ops(
+        features=features,
+        mode=mode,
+        labels=labels,
+        train_op_fn=dnn_train_op_fn,
+        logits=dnn_logits).train_op
+    tree_train_op = head.create_model_fn_ops(
+        features=tree_features,
+        mode=mode,
+        labels=labels,
+        train_op_fn=_tree_train_op_fn,
+        logits=tree_train_logits).train_op
 
     # Add the hooks
     model_fn_ops.training_hooks.extend([
@@ -369,11 +360,13 @@ def _dnn_tree_combined_model_fn(
         labels=labels,
         train_op_fn=_no_train_op_fn,
         logits=tree_train_logits)
+    if mode != model_fn.ModeKeys.TRAIN:
+      return fusion_spec
     dnn_spec = head.create_estimator_spec(
         features=features,
         mode=mode,
         labels=labels,
-        train_op_fn=_dnn_train_op_fn,
+        train_op_fn=dnn_train_op_fn,
         logits=dnn_logits)
     tree_spec = head.create_estimator_spec(
         features=tree_features,
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
index 839eedd3a8..dea19b7c62 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py
@@ -18,13 +18,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import tempfile
 from tensorflow.contrib.boosted_trees.estimator_batch import dnn_tree_combined_estimator as estimator
 from tensorflow.contrib.boosted_trees.proto import learner_pb2
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
 from tensorflow.contrib.learn.python.learn.estimators import run_config
+from tensorflow.python.estimator import exporter
 from tensorflow.python.estimator.canned import head as head_lib
+from tensorflow.python.estimator.export import export
+from tensorflow.python.ops import parsing_ops
 from tensorflow.python.feature_column import feature_column_lib as core_feature_column
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -34,6 +38,7 @@ from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import googletest
 from tensorflow.python.training import checkpoint_utils
 
+
 def _train_input_fn():
   features = {
       "x": constant_op.constant([[2.], [1.], [1.]])
@@ -103,35 +108,6 @@ class DNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
     classifier.fit(input_fn=_train_input_fn, steps=15)
     classifier.evaluate(input_fn=_eval_input_fn, steps=1)
 
-  def testFitAndEvaluateDontThrowExceptionWithCore(self):
-    learner_config = learner_pb2.LearnerConfig()
-    learner_config.num_classes = 2
-    learner_config.constraints.max_tree_depth = 1
-    model_dir = tempfile.mkdtemp()
-    config = run_config.RunConfig()
-
-    # Use core head
-    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
-        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)
-
-    classifier = estimator.DNNBoostedTreeCombinedEstimator(
-        head=head_fn,
-        dnn_hidden_units=[1],
-        # Use core feature columns
-        dnn_feature_columns=[core_feature_column.numeric_column("x")],
-        tree_learner_config=learner_config,
-        num_trees=1,
-        tree_examples_per_layer=3,
-        model_dir=model_dir,
-        config=config,
-        dnn_steps_to_train=10,
-        dnn_input_layer_to_tree=True,
-        tree_feature_columns=[],
-        use_core_versions=True)
-
-    classifier.fit(input_fn=_train_input_fn, steps=15)
-    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
-
   def testFitAndEvaluateWithDistillation(self):
     learner_config = learner_pb2.LearnerConfig()
     learner_config.num_classes = 2
@@ -223,6 +199,51 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase):
     self.assertLess(0.5, res["auc"])
     est.predict(input_fn=_eval_input_fn)
 
+  def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
+    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
+        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)
+
+    learner_config = learner_pb2.LearnerConfig()
+    learner_config.num_classes = 2
+    learner_config.constraints.max_tree_depth = 3
+    model_dir = tempfile.mkdtemp()
+    config = run_config.RunConfig()
+
+    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
+        head=head_fn,
+        dnn_hidden_units=[1],
+        dnn_feature_columns=[core_feature_column.numeric_column("x")],
+        tree_learner_config=learner_config,
+        num_trees=1,
+        tree_examples_per_layer=3,
+        model_dir=model_dir,
+        config=config,
+        dnn_steps_to_train=10,
+        dnn_input_layer_to_tree=True,
+        predict_with_tree_only=True,
+        dnn_to_tree_distillation_param=(0.5, None),
+        tree_feature_columns=[])
+
+    # Train for a few steps.
+    est.train(input_fn=_train_input_fn, steps=1000)
+    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
+    self.assertLess(0.5, res["auc"])
+    est.predict(input_fn=_eval_input_fn)
+    serving_input_fn = (
+        export.build_parsing_serving_input_receiver_fn(
+            feature_spec={"x": parsing_ops.FixedLenFeature(
+                [1], dtype=dtypes.float32)}))
+    base_exporter = exporter.FinalExporter(
+        name="Servo",
+        serving_input_receiver_fn=serving_input_fn,
+        assets_extra=None)
+    export_path = os.path.join(model_dir, "export")
+    base_exporter.export(
+        est,
+        export_path=export_path,
+        checkpoint_path=None,
+        eval_result={},
+        is_the_final_export=True)
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From 7f77295b8aab2841627ceef6d70a5799df93f10f Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 12 Oct 2018 15:43:07 -0700
Subject: [PATCH 0915/1085] Refactor TransposeTensor and PrepareTensor for
 shape. Add more checks to reshape and transpose functions and converters.
 Improve tests.

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 297 +++++++++++-------
 .../tensorrt/test/reshape_transpose_test.py   | 182 ++++++++++-
 2 files changed, 362 insertions(+), 117 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index e2ed372f12..b3f5fcd4f9 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -650,8 +650,11 @@ using OpConverter =
 class Converter {
  public:
   explicit Converter(nvinfer1::INetworkDefinition* trt_network,
-                     TRTWeightStore* ws, bool fp16)
-      : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) {
+                     TRTWeightStore* ws, bool fp16, int max_batch_size)
+      : trt_network_(trt_network),
+        weight_store_(ws),
+        fp16_(fp16),
+        max_batch_size_(max_batch_size) {
     this->register_op_converters();
   }
 
@@ -669,6 +672,8 @@ class Converter {
   // TODO(aaroey): fix all the namings.
   bool isFP16() { return fp16_; }
 
+  int GetMaxBatchSize() { return max_batch_size_; }
+
   TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) {
     return this->get_temp_weights(weights.type_, weights.shape_);
   }
@@ -726,18 +731,23 @@ class Converter {
     return trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second;
   }
 
-  nvinfer1::ITensor* TransposeTensor(nvinfer1::ITensor* input_tensor,
-                                     const std::vector<int>& order) {
+  tensorflow::Status TransposeTensor(nvinfer1::ITensor* input_tensor,
+                                     const std::vector<int>& order,
+                                     const nvinfer1::ITensor** output_tensor) {
     const auto dims = input_tensor->getDimensions();
 
-    // TODO(jie): change the return to status and properly exit
-    if (order.size() - 1 != size_t(dims.nbDims))
-      LOG(ERROR) << "Dimension does not match, fail gracefully";
+    if (order.size() - 1 != size_t(dims.nbDims)) {
+      return tensorflow::errors::InvalidArgument(
+        "Rank of perm for transpose does not match with that of the input.");
+    }
+    if (order[0] != 0) {
+      return tensorflow::errors::Unimplemented(
+        "Transpose at batch dimension is not supported.");
+    }
 
     nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor);
-    if (layer == nullptr) {
-      return nullptr;
-    }
+    TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Transpose");
+
     nvinfer1::Permutation permutation;
     for (int32_t i = 0; i < dims.nbDims; ++i) {
       permutation.order[i] = order[i + 1] - 1;
@@ -751,7 +761,52 @@ class Converter {
       reshape_dims.type[i] = dims.type[i];
     }
     layer->setReshapeDimensions(reshape_dims);
-    return layer->getOutput(0);
+
+    *output_tensor = layer->getOutput(0);
+    return tensorflow::Status::OK();
+  }
+
+  // Helper function converts input into tensor with shape specified by dims.
+  tensorflow::Status PrepareTensorForShape(const TRT_TensorOrWeights& input,
+                                           const nvinfer1::Dims& dims,
+                                           const nvinfer1::ITensor** tensor) {
+    // If -1 is not used for one of the dims, we can check if the shapes are
+    // compatible.
+    bool can_check_shapes = true;
+    for (int i = 0; i < dims.nbDims; i++) {
+      if (dims.d[i] == -1) {
+        can_check_shapes = false;
+        break;
+      }
+    }
+    if (can_check_shapes &&
+        GetShapeSize(input.shape()) != GetShapeSize(dims)) {
+      return tensorflow::errors::InvalidArgument(
+          "Reshape shapes are not compatible.");
+    }
+
+    if (input.is_tensor()) {
+      if (DimsEqual(input.shape(), dims)) {
+        *tensor = input.tensor();
+      } else {
+        nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(
+            *const_cast<nvinfer1::ITensor*>(input.tensor()));
+        TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
+        layer->setReshapeDimensions(dims);
+        *tensor = layer->getOutput(0);
+      }
+    } else {
+#if NV_TENSORRT_MAJOR > 3
+      nvinfer1::IConstantLayer* layer =
+          this->network()->addConstant(dims, input.weights());
+      TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
+      *tensor = layer->getOutput(0);
+#else
+      return tensorflow::errors::Unimplemented(
+        "Can't reshape constant. Please upgrade to TRT 4 or above.");
+#endif
+    }
+    return tensorflow::Status::OK();
   }
 
  private:
@@ -767,6 +822,8 @@ class Converter {
 
   bool fp16_;
 
+  int max_batch_size_;
+
   void register_op_converters();
 
   tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def,
@@ -1140,9 +1197,10 @@ tensorflow::Status BinaryTensorOpWeight(
       }
       permutation[1] = dims_t.nbDims;
       permutation[dims_t.nbDims] = 1;
-      tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
-                                   permutation);
-      TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name());
+      TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+          const_cast<nvinfer1::ITensor*>(tensor),
+          permutation,
+          &tensor));
     } else {
       return tensorflow::errors::InvalidArgument(
           "Transpose cannot be applied, " + node_def.name());
@@ -1203,15 +1261,18 @@ tensorflow::Status BinaryTensorOpWeight(
       scale_weights, power_weights);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
 
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+  const nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   // transpose back dimension
   if (permutation_flag) {
-    output_tensor = ctx.TransposeTensor(output_tensor, permutation);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(output_tensor),
+        permutation,
+        &output_tensor));
   }
 
   // Pass the output
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -1229,9 +1290,10 @@ tensorflow::Status ConvertConv2DHelper(
   int w_index = 3;
   auto data_format = attrs.get<string>("data_format");
   if (data_format == "NHWC") {
-    tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
-                                 {0, 3, 1, 2});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(tensor),
+        {0, 3, 1, 2},
+        &tensor));
     h_index = 1;
     w_index = 2;
     // TODO(jie): transpose it
@@ -1309,15 +1371,18 @@ tensorflow::Status ConvertConv2DHelper(
   layer->setPadding({padding[0].first, padding[1].first});
   layer->setName(node_def.name().c_str());
   layer->setNbGroups(num_groups);
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+  const nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions());
   VLOG(2) << "data_format: " << data_format;
   if (data_format == "NHWC") {
     // TODO(jie): transpose it back!
-    output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(output_tensor),
+        {0, 2, 3, 1},
+        &output_tensor));
   }
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -1335,39 +1400,6 @@ tensorflow::Status ConvertConv2DHelper(
                                            node_def.name());
 }
 
-// Helper function converts input into tensor with shape specified by dims.
-bool PrepareTensorForShape(Converter& ctx, const TRT_TensorOrWeights& input,
-                           const nvinfer1::Dims& dims,
-                           const nvinfer1::ITensor** tensor) {
-  if (input.is_tensor()) {
-    if (DimsEqual(input.shape(), dims)) {
-      *tensor = input.tensor();
-    } else {
-      nvinfer1::IShuffleLayer* layer = ctx.network()->addShuffle(
-          *const_cast<nvinfer1::ITensor*>(input.tensor()));
-      if (layer != nullptr) {
-        layer->setReshapeDimensions(dims);
-        *tensor = layer->getOutput(0);
-      } else {
-        return false;
-      }
-    }
-  } else {
-#if NV_TENSORRT_MAJOR > 3
-    nvinfer1::IConstantLayer* layer =
-        ctx.network()->addConstant(dims, input.weights());
-    if (layer != nullptr) {
-      *tensor = layer->getOutput(0);
-    } else {
-      return false;
-    }
-#else
-    return false;
-#endif
-  }
-  return true;
-}
-
 tensorflow::Status BinaryTensorOpTensor(
     Converter& ctx, const tensorflow::NodeDef& node_def,
     const TRT_TensorOrWeights& operand_l, const TRT_TensorOrWeights& operand_r,
@@ -1396,10 +1428,8 @@ tensorflow::Status BinaryTensorOpTensor(
         node_def.op() + ", at: " + node_def.name());
   }
 
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, operand_l, dim_l, &tensor_l), node_def.name());
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, operand_r, dim_r, &tensor_r), node_def.name());
+  TF_RETURN_IF_ERROR(ctx.PrepareTensorForShape(operand_l, dim_l, &tensor_l));
+  TF_RETURN_IF_ERROR(ctx.PrepareTensorForShape(operand_r, dim_r, &tensor_r));
 
   // get trt type & shape
   TFAttrs attrs(node_def);
@@ -1487,8 +1517,15 @@ tensorflow::Status ConvertTranspose(
     perm[i] = weights_ptr[i];
   }
 
-  nvinfer1::ITensor* output_tensor = ctx.TransposeTensor(input_tensor, perm);
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  if (perm[0] != 0) {
+    return tensorflow::errors::Unimplemented(
+        "Transpose at batch dimension is not supported, at", node_def.name());
+  }
+
+  const nvinfer1::ITensor* output_tensor = nullptr;
+  TF_RETURN_IF_ERROR(ctx.TransposeTensor(input_tensor, perm, &output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -1502,6 +1539,12 @@ tensorflow::Status ConvertReshape(
   }
 
   TRT_ShapedWeights weights = inputs.at(1).weights();
+  if (weights.count() == 0) {
+    return tensorflow::errors::Unimplemented(
+        "Reshape to shape=[] is not supported, at", node_def.name());
+  }
+
+  // Get new_shape
   const int* weights_ptr = static_cast<int*>(const_cast<void*>(
       weights.GetValues()));
   nvinfer1::Dims new_shape;
@@ -1511,10 +1554,22 @@ tensorflow::Status ConvertReshape(
     new_shape.d[i-1] = weights_ptr[i];
   }
 
-  const nvinfer1::ITensor* output_tensor;
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, inputs.at(0), new_shape, &output_tensor),
-      node_def.name());
+  // Check that batch dimension doesn't change
+  const nvinfer1::Dims input_shape = inputs.at(0).shape();
+  if (weights_ptr[0] == -1) {
+    // Product of input shape should equal product of new_shape
+    if (GetShapeSize(input_shape) != GetShapeSize(new_shape)) {
+      return tensorflow::errors::Unimplemented(
+        "Reshape on the batch dimension is not supported.");
+    }
+  } else if (weights_ptr[0] != ctx.GetMaxBatchSize()) {
+    return tensorflow::errors::Unimplemented(
+        "Reshape on the batch dimension is not supported.");
+  }
+
+  const nvinfer1::ITensor* output_tensor = nullptr;
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(inputs.at(0), new_shape, &output_tensor));
   outputs->push_back(TRT_TensorOrWeights(
       const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
@@ -1549,9 +1604,10 @@ tensorflow::Status ConvertPool(Converter& ctx,
   if (data_format == "NHWC") {
     h_index = 1;
     w_index = 2;
-    tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
-                                 {0, 3, 1, 2});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(tensor),
+        {0, 3, 1, 2},
+        &tensor));
   }
 
   nvinfer1::PoolingType type;
@@ -1607,13 +1663,16 @@ tensorflow::Status ConvertPool(Converter& ctx,
   layer->setStride(stride);
   layer->setPadding({padding[0].first, padding[1].first});
   layer->setName(node_def.name().c_str());
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+  const nvinfer1::ITensor* output_tensor = layer->getOutput(0);
 
   if (data_format == "NHWC") {
-    output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(output_tensor),
+        {0, 2, 3, 1},
+        &output_tensor));
   }
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -1937,10 +1996,9 @@ tensorflow::Status ConvertUnary(Converter& ctx,
 #endif
 
   // TODO(jie): check type
-  const nvinfer1::ITensor* tensor;
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, inputs.at(0), inputs.at(0).shape(), &tensor),
-      node_def.name());
+  const nvinfer1::ITensor* tensor = nullptr;
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(inputs.at(0), inputs.at(0).shape(), &tensor));
 
   nvinfer1::IUnaryLayer* layer;
   if (node_def.op() == "Rsqrt") {
@@ -1960,7 +2018,8 @@ tensorflow::Status ConvertUnary(Converter& ctx,
 
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
   nvinfer1::ITensor* output_tensor = layer->getOutput(0);
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -2030,16 +2089,17 @@ tensorflow::Status ConvertReducePool(
     permutation_order[1] = permuted_index;
 
     // Apply permutation before extracting dimension for pool_kernel
-    tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
-                                 permutation_order);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(tensor),
+        permutation_order,
+        &tensor));
   }
 
   // Apply permutation before extracting dimension for pool_kernel
   pool_kernel.d[0] = (idx_set.count(2) || permuted_index == 2) ? dims.d[1] : 1;
   pool_kernel.d[1] = (idx_set.count(3) || permuted_index == 3) ? dims.d[2] : 1;
 
-  nvinfer1::ITensor* output_tensor;
+  const nvinfer1::ITensor* output_tensor = nullptr;
 
   if (node_def.op() == "Mean") {
     nvinfer1::IPoolingLayer* layer =
@@ -2053,11 +2113,13 @@ tensorflow::Status ConvertReducePool(
   }
   if (permuted_index != -1) {
     // Apply permutation before extracting dimension for pool_kernel
-    output_tensor = ctx.TransposeTensor(
-        const_cast<nvinfer1::ITensor*>(output_tensor), permutation_order);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(output_tensor),
+        permutation_order,
+        &output_tensor));
   }
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 #elif NV_TENSORRT_MAJOR > 3
@@ -2205,9 +2267,10 @@ tensorflow::Status ConvertPad(Converter& ctx,
   std::vector<int32_t> permuted_pad_index(pad_index);
   if (pad_index[0] == 1) {
     legit_pad = false;
-    tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor),
-                                 {0, 3, 2, 1});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(tensor),
+        {0, 3, 2, 1},
+        &tensor));
     permuted_pad_index[0] = 3;
   }
 
@@ -2225,15 +2288,17 @@ tensorflow::Status ConvertPad(Converter& ctx,
   nvinfer1::IPaddingLayer* layer = ctx.network()->addPadding(
       *const_cast<nvinfer1::ITensor*>(tensor), pre_padding, post_padding);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
+  const nvinfer1::ITensor* output_tensor = layer->getOutput(0);
 
   if (!legit_pad) {
-    output_tensor = ctx.TransposeTensor(
-        const_cast<nvinfer1::ITensor*>(output_tensor), {0, 3, 2, 1});
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        const_cast<nvinfer1::ITensor*>(output_tensor),
+        {0, 3, 2, 1},
+        &output_tensor));
   }
 
-  outputs->push_back(TRT_TensorOrWeights(output_tensor));
+  outputs->push_back(TRT_TensorOrWeights(
+      const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
 }
 
@@ -2314,9 +2379,10 @@ tensorflow::Status ConvertConcat(Converter& ctx,
 #if NV_TENSORRT_MAJOR == 3
     // TRT3 does concatenation only on channel!
     if (index != 1) {
-      tensor_i = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor_i),
-                                     permutation_order);
-      TFTRT_RETURN_ERROR_IF_NULLPTR(tensor_i, node_def.name());
+      TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+          const_cast<nvinfer1::ITensor*>(tensor_i),
+          permutation_order,
+          &tensor_i));
     }
 #endif
     inputs_vec.push_back(tensor_i);
@@ -2334,8 +2400,10 @@ tensorflow::Status ConvertConcat(Converter& ctx,
 
 #if NV_TENSORRT_MAJOR == 3
   if (index != 1) {
-    output_tensor = ctx.TransposeTensor(output_tensor, permutation_order);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name());
+    TF_RETURN_IF_ERROR(ctx.TransposeTensor(
+        output_tensor,
+        permutation_order,
+        &output_tensor));
   }
 #endif
   outputs->push_back(TRT_TensorOrWeights(output_tensor));
@@ -2489,21 +2557,20 @@ tensorflow::Status ConvertMatMulHelper(
   while (input_dim.nbDims != 3) {
     input_dim.d[input_dim.nbDims++] = 1;
   }
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, tensor_input, input_dim, &tensor), node_name);
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(tensor_input, input_dim, &tensor));
 
   nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected(
       *const_cast<nvinfer1::ITensor*>(tensor), noutput, weights, biases);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
   output_tensor = layer->getOutput(0);
 
-  const nvinfer1::ITensor* temp_tensor;
+  const nvinfer1::ITensor* temp_tensor = nullptr;
   auto output_dim = output_tensor->getDimensions();
   output_dim.nbDims = 1;
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, TRT_TensorOrWeights(output_tensor), output_dim,
-                            &temp_tensor),
-      node_name);
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(TRT_TensorOrWeights(output_tensor), output_dim,
+                                &temp_tensor));
   output_tensor = const_cast<nvinfer1::ITensor*>(temp_tensor);
   outputs->push_back(TRT_TensorOrWeights(output_tensor));
   return tensorflow::Status::OK();
@@ -2604,13 +2671,10 @@ tensorflow::Status ConvertBatchMatMul(
       dims_r.nbDims--;
     }
   }
-
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, inputs.at(0), dims_l, &tensor_l),
-      node_def.name());
-  TFTRT_RETURN_ERROR_IF_FALSE(
-      PrepareTensorForShape(ctx, inputs.at(1), dims_r, &tensor_r),
-      node_def.name());
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(inputs.at(0), dims_l, &tensor_l));
+  TF_RETURN_IF_ERROR(
+      ctx.PrepareTensorForShape(inputs.at(1), dims_r, &tensor_r));
 
   nvinfer1::IMatrixMultiplyLayer* layer = ctx.network()->addMatrixMultiply(
       *const_cast<nvinfer1::ITensor*>(tensor_l), transpose_a,
@@ -2782,7 +2846,8 @@ tensorflow::Status ConvertGraphDefToEngine(
 
   // Build the network
   VLOG(1) << "Starting engine conversion ";
-  Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE);
+  Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE,
+                      max_batch_size);
   std::vector<std::pair<string, string>> output_tensors;
   // Graph nodes are already topologically sorted during construction
   for (const auto& node_def : gdef.node()) {
diff --git a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
index 81dad5e1a4..61d95bb242 100644
--- a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
+++ b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
@@ -46,8 +46,9 @@ class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
           dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
       with g.device("/GPU:0"):
         reshape = array_ops.reshape(inp, [-1, 24*24*2])
-        print('RESHAPE SHAPE', reshape.get_shape().as_list())
+        # Add identities to ensure we have at least min_segment_size=3 nodes
         identity = array_ops.identity(reshape, "identity")
+        identity = array_ops.identity(identity, "identity2")
       array_ops.identity(identity, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
@@ -60,6 +61,150 @@ class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
     """Return the expected engines to build."""
     return ["my_trt_op_0"]
 
+class ReshapeToScalarTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [1]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=input_dims, name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [])
+        # Add identities to ensure we have at least min_segment_size=3 nodes
+        identity = array_ops.identity(reshape, "identity")
+        identity = array_ops.identity(identity, "identity2")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[()])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return []
+
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
+    # a calib graph. Doesn't seem to contain any calibration nodes.""
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+            not run_params.dynamic_engine)
+
+class ReshapeBatchDimensionTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [2, 50, 24, 24, 2])
+        # Add identities to ensure we have at least min_segment_size=3 nodes
+        identity = array_ops.identity(reshape, "identity")
+        identity = array_ops.identity(identity, "identity2")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(2, 50, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return []
+
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
+    # a calib graph. Doesn't seem to contain any calibration nodes.""
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+            not run_params.dynamic_engine)
+
+class ReshapeBatchDimensionTest2(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [-1, 50, 24, 24, 2])
+        # Add identities to ensure we have at least min_segment_size=3 nodes
+        identity = array_ops.identity(reshape, "identity")
+        identity = array_ops.identity(identity, "identity2")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(2, 50, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return []
+
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
+    # a calib graph. Doesn't seem to contain any calibration nodes.""
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+            not run_params.dynamic_engine)
+
+class ReshapeBatchDimensionTest3(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        reshape = array_ops.reshape(inp, [2, 50, -1, 24, 2])
+        # Add identities to ensure we have at least min_segment_size=3 nodes
+        identity = array_ops.identity(reshape, "identity")
+        identity = array_ops.identity(identity, "identity2")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(2, 50, 24, 24, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return []
+
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
+    # a calib graph. Doesn't seem to contain any calibration nodes.""
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+            not run_params.dynamic_engine)
+
 class ReshapeInverseTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
@@ -149,6 +294,41 @@ class SimpleTransposeTest(trt_test.TfTrtIntegrationTestBase):
     """Return the expected engines to build."""
     return ["my_trt_op_0"]
 
+class TransposeBatchDimensionTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """Create a graph containing single segment."""
+    dtype = dtypes.float32
+    input_name = "input"
+    input_dims = [100, 24, 24, 2]
+    output_name = "output"
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device("/GPU:0"):
+        # to NCHW
+        transpose = array_ops.transpose(inp, [2, 1, 0, 3])
+        identity = array_ops.identity(transpose, "identity")
+      array_ops.identity(identity, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(24, 24, 100, 2)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """Return the expected engines to build."""
+    return []
+
+  def ShouldRunTest(self, run_params):
+    """Whether to run the test."""
+    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
+    # a calib graph. Doesn't seem to contain any calibration nodes.""
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+            not run_params.dynamic_engine)
+
 class TransposeInverseTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
-- 
GitLab


From edcffca64658e445f3aff6e990b0d271e092eeca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 15:33:39 -0700
Subject: [PATCH 0916/1085] Add some benchmarks for parallal_for.

PiperOrigin-RevId: 216932970
---
 .../ops/parallel_for/control_flow_ops_test.py | 75 ++++++++++++++++---
 .../python/ops/parallel_for/gradients_test.py | 26 +++++++
 2 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index 6e276dee55..3ad9f9ac58 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.ops import bitwise_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients as gradient_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
@@ -1358,15 +1359,78 @@ class Benchmarks(test.Benchmark):
     with sess:
       init = variables.global_variables_initializer()
       sess.run(init)
-      sess.run(targets)
+      run_fn = sess.make_callable(targets)
+      run_fn()  # Warm up
       begin = time.time()
       for _ in range(iters):
-        sess.run(targets)
+        run_fn()
       end = time.time()
     avg_time_ms = 1000 * (end - begin) / iters
     self.report_benchmark(iters=iters, wall_time=avg_time_ms, name=name)
     return avg_time_ms
 
+  def benchmark_sess_run_overhead(self):
+    with ops.Graph().as_default():
+      x = constant_op.constant(1.0)
+      self._run(x, 10000, name="session_run_overhead")
+
+  def benchmark_add(self):
+    with ops.Graph().as_default():
+      n = 256
+      params = 1000
+      x = random_ops.random_normal([n, params])
+      y = random_ops.random_normal([n, params])
+
+      def loop_fn(i):
+        x_i = array_ops.gather(x, i)
+        y_i = array_ops.gather(y, i)
+        return x_i + y_i
+
+      pfor_outputs = pfor_control_flow_ops.pfor(loop_fn, n)
+      while_outputs = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, n)
+      manual = x + y
+
+      self._run(manual, 1000, name="manual_add")
+      self._run(pfor_outputs, 1000, name="pfor_add")
+      self._run(while_outputs, 100, name="while_add")
+
+  def benchmark_matmul(self):
+    with ops.Graph().as_default():
+      n = 1024
+      params = 1000
+      x = random_ops.random_normal([n, params])
+      y = random_ops.random_normal([params, params])
+
+      def loop_fn(i):
+        x_i = array_ops.expand_dims(array_ops.gather(x, i), 0)
+        return math_ops.matmul(x_i, y)
+
+      pfor_outputs = pfor_control_flow_ops.pfor(loop_fn, n)
+      while_outputs = pfor_control_flow_ops.for_loop(loop_fn, dtypes.float32, n)
+      manual = math_ops.matmul(x, y)
+
+      self._run(manual, 1000, name="manual_matmul")
+      self._run(pfor_outputs, 1000, name="pfor_matmul")
+      self._run(while_outputs, 100, name="while_matmul")
+
+  def benchmark_map_fn(self):
+    with ops.Graph().as_default():
+      b = 256
+      params = 1000
+      inp = random_ops.random_normal((b, params))
+      map_fn = lambda x: x * x
+
+      def pfor_map_fn(f, x):
+        return pfor_control_flow_ops.pfor(
+            lambda i: f(array_ops.gather(x, i)),
+            array_ops.shape(x)[0])
+
+      map_output = functional_ops.map_fn(map_fn, inp)
+      pfor_output = pfor_map_fn(map_fn, inp)
+
+      self._run(map_output, 100, name="tf_map_fn")
+      self._run(pfor_output, 100, name="pfor_map_fn")
+
   def benchmark_basic_while(self):
     with ops.Graph().as_default():
 
@@ -1391,13 +1455,6 @@ class Benchmarks(test.Benchmark):
       self._run(pfor_outputs, 100, name="pfor_rnn")
       self._run(tf_outputs, 100, name="tf_rnn")
 
-  def benchmark_dynamic_lstm(self):
-    with ops.Graph().as_default():
-      pfor_outputs, tf_outputs = create_dynamic_lstm(rnn_cell.BasicLSTMCell,
-                                                     128, 512, 16)
-      self._run(pfor_outputs, 100, name="pfor_lstm")
-      self._run(tf_outputs, 100, name="tf_lstm")
-
 
 class SparseTest(PForTest):
 
diff --git a/tensorflow/python/ops/parallel_for/gradients_test.py b/tensorflow/python/ops/parallel_for/gradients_test.py
index 5467f55af6..5a058bae82 100644
--- a/tensorflow/python/ops/parallel_for/gradients_test.py
+++ b/tensorflow/python/ops/parallel_for/gradients_test.py
@@ -258,6 +258,22 @@ class Mnist(keras_training.Model):
     return self.fc2(y)
 
 
+def create_mnist_autobatch(batch_size, data_format, training):
+  images = random_ops.random_uniform([batch_size, 28, 28])
+  model = Mnist(data_format)
+  manual = model(images, training=training)
+
+  def loop_fn(i):
+    image = array_ops.gather(images, i)
+    return model(image, training=training)
+
+  pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size)
+  while_outputs = control_flow_ops.for_loop(
+      loop_fn, dtypes.float32, batch_size)
+
+  return pfor_outputs, while_outputs, manual
+
+
 def create_mnist_per_eg_grad(batch_size, data_format, training):
   images = random_ops.random_uniform([batch_size, 28, 28])
   sparse_labels = np.random.randint(
@@ -577,6 +593,16 @@ class GradientsBenchmarks(test.Benchmark):
       self._run(pfor_outputs, 100, name="lstm_per_eg_grad_pfor")
       self._run(while_outputs, 20, name="lstm_per_eg_grad_while")
 
+  def benchmark_mnist_autobatch(self):
+    with ops.Graph().as_default():
+      data_format = ("channels_first"
+                     if test.is_gpu_available() else "channels_last")
+      pfor_outputs, while_outputs, manual = create_mnist_autobatch(
+          100, data_format, training=False)
+      self._run(pfor_outputs, 100, name="mnist_pfor")
+      self._run(while_outputs, 20, name="mnist_while")
+      self._run(manual, 100, name="mnist_manual")
+
   def benchmark_mnist_per_eg_grad(self):
     with ops.Graph().as_default():
       data_format = ("channels_first"
-- 
GitLab


From 68bca8740782f354ac12cc5bf40a76eabaec0b39 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 12 Oct 2018 15:45:30 -0700
Subject: [PATCH 0917/1085] [TF:XLA] Bump open source abseil revision to
 a00bdd176d66ef0b417d9576052a19091fbdf891

PiperOrigin-RevId: 216934705
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index c697bf6452..dc2118694a 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -119,11 +119,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "2809f7d97d126ad341c3126711df7bae6336278d959144db293a9b2756b726a8",
-        strip_prefix = "abseil-cpp-f340f773edab951656b19b6f1a77c964a78ec4c2",
+        sha256 = "225b683f2f866cd12b868e43b5af00e032e70e3f78ff332108b0ce15d41f6091",
+        strip_prefix = "abseil-cpp-a00bdd176d66ef0b417d9576052a19091fbdf891",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/f340f773edab951656b19b6f1a77c964a78ec4c2.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/a00bdd176d66ef0b417d9576052a19091fbdf891.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/a00bdd176d66ef0b417d9576052a19091fbdf891.tar.gz",
         ],
     )
 
-- 
GitLab


From df414e1360551dd6783276808b0fc37f7e974876 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 12 Oct 2018 15:47:14 -0700
Subject: [PATCH 0918/1085] Update Keras RNN to support nested
 input/output/states.

PiperOrigin-RevId: 216934951
---
 tensorflow/python/keras/backend.py            | 233 ++++++++++++------
 tensorflow/python/keras/engine/base_layer.py  |   2 +-
 tensorflow/python/keras/layers/recurrent.py   | 197 +++++++++++----
 .../python/keras/layers/recurrent_test.py     | 224 +++++++++++++++++
 tensorflow/python/keras/layers/wrappers.py    |   6 +-
 ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt |   2 +-
 ...rflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt |   2 +-
 .../v1/tensorflow.keras.layers.-g-r-u.pbtxt   |   4 +-
 .../v1/tensorflow.keras.layers.-l-s-t-m.pbtxt |   4 +-
 .../v1/tensorflow.keras.layers.-r-n-n.pbtxt   |   4 +-
 ...ensorflow.keras.layers.-simple-r-n-n.pbtxt |   4 +-
 ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt |   2 +-
 ...rflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt |   2 +-
 .../v2/tensorflow.keras.layers.-g-r-u.pbtxt   |   4 +-
 .../v2/tensorflow.keras.layers.-l-s-t-m.pbtxt |   4 +-
 .../v2/tensorflow.keras.layers.-r-n-n.pbtxt   |   4 +-
 ...ensorflow.keras.layers.-simple-r-n-n.pbtxt |   4 +-
 17 files changed, 550 insertions(+), 152 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 7509ef9c59..708a442e71 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -57,6 +57,7 @@ from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-im
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variables as variables_module
 
+from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import tf_export
@@ -3133,11 +3134,13 @@ def rnn(step_function,
                   as 'states'. The first state in the list must be the
                   output tensor at the previous timestep.
       inputs: Tensor of temporal data of shape `(samples, time, ...)`
-          (at least 3D).
-      initial_states: Tensor with shape `(samples, output_dim)`
-          (no time dimension),
-          containing the initial values for the states used in
-          the step function.
+          (at least 3D), or nested tensors, and each of which has shape
+          `(samples, time, ...)`.
+      initial_states: Tensor with shape `(samples, state_size)`
+          (no time dimension), containing the initial values for the states used
+          in the step function. In the case that state_size is in a nested
+          shape, the shape of initial_states will also follow the nested
+          structure.
       go_backwards: Boolean. If True, do the iteration over the time
           dimension in reverse order and return the reversed sequence.
       mask: Binary tensor with shape `(samples, time, 1)`,
@@ -3169,21 +3172,31 @@ def rnn(step_function,
       ValueError: if `mask` is provided (not `None`) but states is not provided
           (`len(states)` == 0).
   """
-  ndim = len(inputs.shape)
-  if ndim < 3:
-    raise ValueError('Input should be at least 3D.')
-  inputs_shape = inputs.shape
+
+  def swap_batch_timestep(input_t):
+    # Swap the batch and timestep dim for the incoming tensor.
+    axes = list(range(len(input_t.shape)))
+    axes[0], axes[1] = 1, 0
+    return array_ops.transpose(input_t, axes)
+
   if not time_major:
-    axes = [1, 0] + list(range(2, ndim))
-    inputs = array_ops.transpose(inputs, axes)
+    inputs = nest.map_structure(swap_batch_timestep, inputs)
+
+  flatted_inputs = nest.flatten(inputs)
+  time_steps = flatted_inputs[0].shape[0]
+  batch = flatted_inputs[0].shape[1]
+  time_steps_t = array_ops.shape(flatted_inputs[0])[0]
+
+  for input_ in flatted_inputs:
+    input_.get_shape().with_rank_at_least(3)
 
   if mask is not None:
     if mask.dtype != dtypes_module.bool:
       mask = math_ops.cast(mask, dtypes_module.bool)
-    if len(mask.shape) == ndim - 1:
+    if len(mask.shape) == 2:
       mask = expand_dims(mask)
     if not time_major:
-      mask = array_ops.transpose(mask, axes)
+      mask = swap_batch_timestep(mask)
 
   if constants is None:
     constants = []
@@ -3191,40 +3204,61 @@ def rnn(step_function,
   global uses_learning_phase  # pylint: disable=global-variable-undefined
   uses_learning_phase = False
 
+  # tf.where needs its condition tensor to be the same shape as its two
+  # result tensors, but in our case the condition (mask) tensor is
+  # (nsamples, 1), and inputs are (nsamples, ndimensions) or even more.
+  # So we need to broadcast the mask to match the shape of inputs.
+  # That's what the tile call does, it just repeats the mask along its
+  # second dimension n times.
+  def _expand_mask(mask_t, input_t):
+    assert not nest.is_sequence(mask_t)
+    assert not nest.is_sequence(input_t)
+    rank_diff = len(input_t.shape) - len(mask_t.shape)
+    for _ in range(rank_diff):
+      mask_t = array_ops.expand_dims(mask_t)
+    expand_dims = [1] + input_t.shape.as_list()[1:]
+    return array_ops.tile(mask_t, expand_dims)
+
   if unroll:
-    if not inputs.shape[0]:
+    if not time_steps:
       raise ValueError('Unrolling requires a fixed number of timesteps.')
     states = initial_states
     successive_states = []
     successive_outputs = []
 
-    input_list = array_ops.unstack(inputs)
-    if go_backwards:
-      input_list.reverse()
+    # Process the input tensors. The input tensor need to be split on the
+    # time_step dim, and reverse if go_backwards is True. In the case of nested
+    # input, the input is flattened and then transformed individually.
+    # The result of this will be a tuple of lists, each of the item in tuple is
+    # list of the tensor with shape (batch, feature)
+    def _process_single_input_t(input_t):
+      input_t = array_ops.unstack(input_t)  # unstack for time_step dim
+      if go_backwards:
+        input_t.reverse()
+      return input_t
+
+    if nest.is_sequence(inputs):
+      processed_input = nest.map_structure(_process_single_input_t, inputs)
+    else:
+      processed_input = (_process_single_input_t(inputs),)
+
+    def _get_input_tensor(time):
+      inp = [t_[time] for t_ in processed_input]
+      return nest.pack_sequence_as(inputs, inp)
 
     if mask is not None:
       mask_list = array_ops.unstack(mask)
       if go_backwards:
         mask_list.reverse()
 
-      for inp, mask_t in zip(input_list, mask_list):
+      for i in range(time_steps):
+        inp = _get_input_tensor(i)
+        mask_t = mask_list[i]
         output, new_states = step_function(inp, states + constants)
         if getattr(output, '_uses_learning_phase', False):
           uses_learning_phase = True
 
-        # tf.where needs its condition tensor
-        # to be the same shape as its two
-        # result tensors, but in our case
-        # the condition (mask) tensor is
-        # (nsamples, 1), and A and B are (nsamples, ndimensions).
-        # So we need to
-        # broadcast the mask to match the shape of A and B.
-        # That's what the tile call does,
-        # it just repeats the mask along its second dimension
-        # n times.
-        tiled_mask_t = array_ops.tile(mask_t,
-                                      array_ops.stack(
-                                          [1, array_ops.shape(output)[1]]))
+        tiled_mask_t = _expand_mask(mask_t, output)
 
         if not successive_outputs:
           prev_output = zeros_like(output)
@@ -3236,10 +3270,7 @@ def rnn(step_function,
         return_states = []
         for state, new_state in zip(states, new_states):
           # (see earlier comment for tile explanation)
-          tiled_mask_t = array_ops.tile(mask_t,
-                                        array_ops.stack(
-                                            [1,
-                                             array_ops.shape(new_state)[1]]))
+          tiled_mask_t = _expand_mask(mask_t, new_state)
           return_states.append(array_ops.where(tiled_mask_t, new_state, state))
         states = return_states
         successive_outputs.append(output)
@@ -3248,7 +3279,8 @@ def rnn(step_function,
       new_states = successive_states[-1]
       outputs = array_ops.stack(successive_outputs)
     else:
-      for inp in input_list:
+      for i in range(time_steps):
+        inp = _get_input_tensor(i)
         output, states = step_function(inp, states + constants)
         if getattr(output, '_uses_learning_phase', False):
           uses_learning_phase = True
@@ -3259,18 +3291,38 @@ def rnn(step_function,
       outputs = array_ops.stack(successive_outputs)
 
   else:
-    if go_backwards:
-      inputs = reverse(inputs, 0)
-
     states = tuple(initial_states)
 
-    time_steps = array_ops.shape(inputs)[0]
-    outputs, _ = step_function(inputs[0], initial_states + constants)
-    output_ta = tensor_array_ops.TensorArray(
-        dtype=outputs.dtype, size=time_steps, tensor_array_name='output_ta')
-    input_ta = tensor_array_ops.TensorArray(
-        dtype=inputs.dtype, size=time_steps, tensor_array_name='input_ta')
-    input_ta = input_ta.unstack(inputs)
+    # Create input tensor array, if the inputs is nested tensors, then it will
+    # be flattened first, and tensor array will be created one per flattened
+    # tensor.
+    input_ta = tuple(
+        tensor_array_ops.TensorArray(
+            dtype=inp.dtype,
+            size=time_steps_t,
+            tensor_array_name='input_ta_%s' % i)
+        for i, inp in enumerate(flatted_inputs))
+    input_ta = tuple(
+        ta.unstack(input_) if not go_backwards else ta
+        .unstack(reverse(input_, 0))
+        for ta, input_ in zip(input_ta, flatted_inputs))
+
+    # Get the time(0) input and compute the output for that, the output will be
+    # used to determine the dtype of output tensor array. Don't read from
+    # input_ta due to TensorArray clear_after_read default to True.
+    input_time_zero = nest.pack_sequence_as(inputs,
+                                            [inp[0] for inp in flatted_inputs])
+    # output_time_zero is used to determine the cell output shape and its dtype.
+    # the value is discarded.
+    output_time_zero, _ = step_function(input_time_zero,
+                                        initial_states + constants)
+    output_ta = tuple(
+        tensor_array_ops.TensorArray(
+            dtype=out.dtype,
+            size=time_steps_t,
+            tensor_array_name='output_ta_%s' % i)
+        for i, out in enumerate(nest.flatten(output_time_zero)))
+
     time = constant_op.constant(0, dtype='int32', name='time')
 
     if mask is not None:
@@ -3286,7 +3338,7 @@ def rnn(step_function,
 
       mask_ta = tensor_array_ops.TensorArray(
           dtype=dtypes_module.bool,
-          size=time_steps,
+          size=time_steps_t,
           tensor_array_name='mask_ta')
       mask_ta = mask_ta.unstack(mask)
 
@@ -3301,30 +3353,38 @@ def rnn(step_function,
         Returns:
             Tuple: `(time + 1,output_ta_t) + tuple(new_states)`
         """
-        current_input = input_ta.read(time)
+        current_input = tuple(ta.read(time) for ta in input_ta)
+        # maybe set shape.
+        current_input = nest.pack_sequence_as(inputs, current_input)
         mask_t = mask_ta.read(time)
         output, new_states = step_function(current_input,
                                            tuple(states) + tuple(constants))
         if getattr(output, '_uses_learning_phase', False):
           global uses_learning_phase  # pylint: disable=global-variable-undefined
           uses_learning_phase = True
-        for state, new_state in zip(states, new_states):
+
+        flat_output = nest.flatten(output)
+        # This assume the state[0] is same shape as the output
+        flat_previous_output = nest.flatten(states[0])
+        tiled_mask_t = tuple(_expand_mask(mask_t, o) for o in flat_output)
+        flat_new_output = tuple(
+            array_ops.where(m, o, po) for m, o, po in zip(
+                tiled_mask_t, flat_output, flat_previous_output))
+
+        # mask states
+        flat_state = nest.flatten(states)
+        flat_new_state = nest.flatten(new_states)
+        for state, new_state in zip(flat_state, flat_new_state):
           new_state.set_shape(state.shape)
-        tiled_mask_t = array_ops.tile(mask_t,
-                                      array_ops.stack(
-                                          [1, array_ops.shape(output)[1]]))
-        output = array_ops.where(tiled_mask_t, output, states[0])
-
-        masked_states = []
-        for i in range(len(states)):
-          states_dim = array_ops.shape(new_states[i])[1]
-          stacked_states_dim = array_ops.stack([1, states_dim])
-          tiled_mask = array_ops.tile(mask_t, stacked_states_dim)
-          masked_state = array_ops.where(tiled_mask, new_states[i], states[i])
-          masked_states.append(masked_state)
-        new_states = masked_states
-
-        output_ta_t = output_ta_t.write(time, output)
+        tiled_mask_t = tuple(_expand_mask(mask_t, s) for s in flat_state)
+        flat_final_state = tuple(
+            array_ops.where(m, o, po)
+            for m, o, po in zip(tiled_mask_t, flat_new_state, flat_state))
+        new_states = nest.pack_sequence_as(new_states, flat_final_state)
+
+        output_ta_t = tuple(
+            ta.write(time, out)
+            for ta, out in zip(output_ta_t, flat_new_output))
         return (time + 1, output_ta_t) + tuple(new_states)
     else:
 
@@ -3339,19 +3399,26 @@ def rnn(step_function,
         Returns:
             Tuple: `(time + 1,output_ta_t) + tuple(new_states)`
         """
-        current_input = input_ta.read(time)
+        current_input = tuple(ta.read(time) for ta in input_ta)
+        current_input = nest.pack_sequence_as(inputs, current_input)
         output, new_states = step_function(current_input,
                                            tuple(states) + tuple(constants))
         if getattr(output, '_uses_learning_phase', False):
           global uses_learning_phase  # pylint: disable=global-variable-undefined
           uses_learning_phase = True
-        for state, new_state in zip(states, new_states):
+
+        flat_state = nest.flatten(states)
+        flat_new_state = nest.flatten(new_states)
+        for state, new_state in zip(flat_state, flat_new_state):
           new_state.set_shape(state.shape)
-        output_ta_t = output_ta_t.write(time, output)
+
+        flat_output = nest.flatten(output)
+        output_ta_t = tuple(
+            ta.write(time, out) for ta, out in zip(output_ta_t, flat_output))
         return (time + 1, output_ta_t) + tuple(new_states)
 
     final_outputs = control_flow_ops.while_loop(
-        cond=lambda time, *_: time < time_steps,
+        cond=lambda time, *_: time < time_steps_t,
         body=_step,
         loop_vars=(time, output_ta) + states,
         maximum_iterations=input_length,
@@ -3361,21 +3428,27 @@ def rnn(step_function,
     output_ta = final_outputs[1]
     new_states = final_outputs[2:]
 
-    outputs = output_ta.stack()
-    last_output = output_ta.read(last_time - 1)
+    outputs = tuple(o.stack() for o in output_ta)
+    outputs = nest.pack_sequence_as(output_time_zero, outputs)
+    last_output = tuple(o.read(last_time - 1) for o in output_ta)
+    if not context.executing_eagerly():
+      for o in last_output:
+        o._uses_learning_phase = uses_learning_phase
+    last_output = nest.pack_sequence_as(output_time_zero, last_output)
 
-  if not time_major:
-    axes = [1, 0] + list(range(2, len(outputs.shape)))
-    outputs = array_ops.transpose(outputs, axes)
+  # static shape inference
+  def set_shape(output_):
+    shape = output_.shape.as_list()
+    shape[0] = time_steps
+    shape[1] = batch
+    output_.set_shape(shape)
+    return output_
 
-  # Static shape inference: (samples, time, ...) or (time, sample, ...)
-  outputs_shape = outputs.shape.as_list()
-  outputs_shape[0] = inputs_shape[0]
-  outputs_shape[1] = inputs_shape[1]
-  outputs.set_shape(outputs_shape)
+  outputs = nest.map_structure(set_shape, outputs)
+
+  if not time_major:
+    outputs = nest.map_structure(swap_batch_timestep, outputs)
 
-  if not context.executing_eagerly():
-    last_output._uses_learning_phase = uses_learning_phase
   return last_output, outputs, new_states
 
 
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index a75ce30d31..d42962158f 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -1847,7 +1847,7 @@ def have_all_keras_metadata(iterable_or_element):
   if not isinstance(iterable_or_element, (list, tuple)):
     iterable = [iterable_or_element]
   else:
-    iterable = iterable_or_element
+    iterable = nest.flatten(iterable_or_element)
   return all([hasattr(x, '_keras_history') for x in iterable])
 
 
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 31933070c6..26ec24ce87 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -482,11 +482,14 @@ class RNN(Layer):
     self.time_major = time_major
 
     self.supports_masking = True
-    self.input_spec = [None]  # The input shape is unknown yet, at least rank 3.
+    # The input shape is unknown yet, it could have nested tensor inputs, and
+    # the input spec will be the list of specs for flattened inputs.
+    self.input_spec = None
     self.state_spec = None
     self._states = None
     self.constants_spec = None
     self._num_constants = None
+    self._num_inputs = None
 
   @property
   def states(self):
@@ -499,40 +502,54 @@ class RNN(Layer):
   def states(self, states):
     self._states = states
 
-  @tf_utils.shape_type_conversion
   def compute_output_shape(self, input_shape):
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
+    # Check whether the input shape contains any nested shapes. It could be
+    # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy
+    # inputs.
+    try:
+      input_shape = tensor_shape.as_shape(input_shape)
+    except (ValueError, TypeError):
+      # A nested tensor input
+      input_shape = nest.flatten(input_shape)[0]
+
+    batch = input_shape[0]
+    time_step = input_shape[1]
+    if self.time_major:
+      batch, time_step = time_step, batch
 
     if _is_multiple_state(self.cell.state_size):
       state_size = self.cell.state_size
     else:
       state_size = [self.cell.state_size]
 
+    def _get_output_shape(flat_output_size):
+      output_dim = tensor_shape.as_shape(flat_output_size).as_list()
+      if self.return_sequences:
+        if self.time_major:
+          output_shape = tensor_shape.as_shape([time_step, batch] + output_dim)
+        else:
+          output_shape = tensor_shape.as_shape([batch, time_step] + output_dim)
+      else:
+        output_shape = tensor_shape.as_shape([batch] + output_dim)
+      return output_shape
+
     if getattr(self.cell, 'output_size', None) is not None:
-      output_dim = tensor_shape.as_shape(self.cell.output_size).as_list()
+      # cell.output_size could be nested structure.
+      output_shape = nest.flatten(nest.map_structure(
+          _get_output_shape, self.cell.output_size))
+      output_shape = output_shape[0] if len(output_shape) == 1 else output_shape
     else:
       # Note that state_size[0] could be a tensor_shape or int.
-      output_dim = tensor_shape.as_shape(state_size[0]).as_list()
-
-    batch = input_shape[0]
-    time_step = input_shape[1]
-    if self.time_major:
-      batch, time_step = time_step, batch
-    if self.return_sequences:
-      if self.time_major:
-        output_shape = tuple([time_step, batch] + output_dim)
-      else:
-        output_shape = tuple([batch, time_step] + output_dim)
-    else:
-      output_shape = tuple([batch] + output_dim)
+      output_shape = _get_output_shape(state_size[0])
 
     if self.return_state:
-      state_shape = [
-          tuple([batch] + tensor_shape.as_shape(dim).as_list())
-          for dim in state_size
-      ]
-      return [output_shape] + state_shape
+      def _get_state_shape(flat_state):
+        state_shape = [batch] + tensor_shape.as_shape(flat_state).as_list()
+        return tensor_shape.as_shape(state_shape)
+      state_shape = nest.map_structure(_get_state_shape, state_size)
+      return generic_utils.to_list(output_shape) + nest.flatten(state_shape)
     else:
       return output_shape
 
@@ -546,28 +563,66 @@ class RNN(Layer):
     else:
       return output_mask
 
-  @tf_utils.shape_type_conversion
   def build(self, input_shape):
     # Note input_shape will be list of shapes of initial states and
     # constants if these are passed in __call__.
     if self._num_constants is not None:
       constants_shape = input_shape[-self._num_constants:]  # pylint: disable=invalid-unary-operand-type
+      constants_shape = nest.map_structure(
+          lambda s: tuple(tensor_shape.TensorShape(s).as_list()),
+          constants_shape)
     else:
       constants_shape = None
 
     if isinstance(input_shape, list):
       input_shape = input_shape[0]
+      # The input_shape here could be a nest structure.
 
-    input_spec_shape = list(input_shape)
-    batch_index, time_step_index = (1, 0) if self.time_major else (0, 1)
-    if not self.stateful:
-      input_spec_shape[batch_index] = None
-    input_spec_shape[time_step_index] = None
-    self.input_spec[0] = InputSpec(shape=tuple(input_spec_shape))
+    # do the tensor_shape to shapes here. The input could be single tensor, or a
+    # nested structure of tensors.
+    def get_input_spec(shape):
+      if isinstance(shape, tensor_shape.TensorShape):
+        input_spec_shape = shape.as_list()
+      else:
+        input_spec_shape = list(shape)
+      batch_index, time_step_index = (1, 0) if self.time_major else (0, 1)
+      if not self.stateful:
+        input_spec_shape[batch_index] = None
+      input_spec_shape[time_step_index] = None
+      return InputSpec(shape=tuple(input_spec_shape))
+
+    def get_step_input_shape(shape):
+      if isinstance(shape, tensor_shape.TensorShape):
+        shape = tuple(shape.as_list())
+      # remove the timestep from the input_shape
+      return shape[1:] if self.time_major else (shape[0],) + shape[2:]
+
+    # Check whether the input shape contains any nested shapes. It could be
+    # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy
+    # inputs.
+    try:
+      input_shape = tensor_shape.as_shape(input_shape)
+    except (ValueError, TypeError):
+      # A nested tensor input
+      pass
+
+    if not nest.is_sequence(input_shape):
+      # This indicates the there is only one input.
+      if self.input_spec is not None:
+        self.input_spec[0] = get_input_spec(input_shape)
+      else:
+        self.input_spec = [get_input_spec(input_shape)]
+      step_input_shape = get_step_input_shape(input_shape)
+    else:
+      flat_input_shapes = nest.flatten(input_shape)
+      flat_input_shapes = nest.map_structure(get_input_spec, flat_input_shapes)
+      assert len(flat_input_shapes) == self._num_inputs
+      if self.input_spec is not None:
+        self.input_spec[:self._num_inputs] = flat_input_shapes
+      else:
+        self.input_spec = flat_input_shapes
+      step_input_shape = nest.map_structure(get_step_input_shape, input_shape)
 
-    batch = input_shape[batch_index]
-    input_dim = input_shape[2:]
-    step_input_shape = (batch,) + input_dim
     # allow cell (if layer) to build before we set or validate state_spec
     if isinstance(self.cell, Layer):
       if constants_shape is not None:
@@ -623,6 +678,11 @@ class RNN(Layer):
   def get_initial_state(self, inputs):
     get_initial_state_fn = getattr(self.cell, 'get_initial_state', None)
 
+    if nest.is_sequence(inputs):
+      # The input are nested sequences. Use the first element in the seq to get
+      # batch size and dtype.
+      inputs = nest.flatten(inputs)[0]
+
     input_shape = array_ops.shape(inputs)
     batch_size = input_shape[1] if self.time_major else input_shape[0]
     dtype = inputs.dtype
@@ -642,7 +702,13 @@ class RNN(Layer):
     inputs, initial_state, constants = _standardize_args(inputs,
                                                          initial_state,
                                                          constants,
-                                                         self._num_constants)
+                                                         self._num_constants,
+                                                         self._num_inputs)
+    # in case the real inputs is a nested structure, set the size of flatten
+    # input so that we can distinguish between real inputs, initial_state and
+    # constants.
+    self._num_inputs = len(nest.flatten(inputs))
+
     if initial_state is None and constants is None:
       return super(RNN, self).__call__(inputs, **kwargs)
 
@@ -678,7 +744,10 @@ class RNN(Layer):
     if is_keras_tensor:
       # Compute the full input spec, including state and constants
       full_input = [inputs] + additional_inputs
-      full_input_spec = self.input_spec + additional_specs
+      # The original input_spec is None since there could be a nested tensor
+      # input. Update the input_spec to match the inputs.
+      full_input_spec = [None for _ in range(len(nest.flatten(inputs)))
+                        ] + additional_specs
       # Perform the call with temporarily replaced input_spec
       original_input_spec = self.input_spec
       self.input_spec = full_input_spec
@@ -726,7 +795,12 @@ class RNN(Layer):
       raise ValueError(
           'Layer has ' + str(len(self.states)) + ' states but was passed ' +
           str(len(initial_state)) + ' initial states.')
-    input_shape = K.int_shape(inputs)
+
+    if nest.is_sequence(inputs):
+      # In the case of nested input, use the first element for shape check.
+      input_shape = K.int_shape(nest.flatten(inputs)[0])
+    else:
+      input_shape = K.int_shape(inputs)
     timesteps = input_shape[0] if self.time_major else input_shape[1]
     if self.unroll and timesteps in [None, 1]:
       raise ValueError('Cannot unroll a RNN if the '
@@ -802,7 +876,7 @@ class RNN(Layer):
         states = [states]
       else:
         states = list(states)
-      return [output] + states
+      return generic_utils.to_list(output) + states
     else:
       return output
 
@@ -1208,6 +1282,7 @@ class SimpleRNN(RNN):
         unroll=unroll,
         **kwargs)
     self.activity_regularizer = regularizers.get(activity_regularizer)
+    self.input_spec = [InputSpec(ndim=3)]
 
   def call(self, inputs, mask=None, training=None, initial_state=None):
     self.cell._dropout_mask = None
@@ -1737,6 +1812,7 @@ class GRU(RNN):
         unroll=unroll,
         **kwargs)
     self.activity_regularizer = regularizers.get(activity_regularizer)
+    self.input_spec = [InputSpec(ndim=3)]
 
   def call(self, inputs, mask=None, training=None, initial_state=None):
     self.cell._dropout_mask = None
@@ -2129,7 +2205,8 @@ class LSTMCell(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
   def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
-    return _generate_zero_filled_state_for_cell(self, inputs, batch_size, dtype)
+    return list(_generate_zero_filled_state_for_cell(
+        self, inputs, batch_size, dtype))
 
 
 @tf_export('keras.layers.LSTM')
@@ -2265,6 +2342,7 @@ class LSTM(RNN):
         unroll=unroll,
         **kwargs)
     self.activity_regularizer = regularizers.get(activity_regularizer)
+    self.input_spec = [InputSpec(ndim=3)]
 
   def call(self, inputs, mask=None, training=None, initial_state=None):
     self.cell._dropout_mask = None
@@ -2402,7 +2480,8 @@ def _generate_dropout_mask(ones, rate, training=None, count=1):
   return K.in_train_phase(dropped_inputs, ones, training=training)
 
 
-def _standardize_args(inputs, initial_state, constants, num_constants):
+def _standardize_args(
+    inputs, initial_state, constants, num_constants, num_inputs=1):
   """Standardizes `__call__` to a single list of tensor inputs.
 
   When running a model loaded from a file, the input tensors
@@ -2418,20 +2497,40 @@ def _standardize_args(inputs, initial_state, constants, num_constants):
       constants: Tensor or list of tensors or None, constant tensors.
       num_constants: Expected number of constants (if constants are passed as
         part of the `inputs` list.
+      num_inputs: Expected number of real input tensors (exclude initial_states
+        and constants).
 
   Returns:
-      inputs: Single tensor.
+      inputs: Single tensor or tuple of tensors.
       initial_state: List of tensors or None.
       constants: List of tensors or None.
   """
   if isinstance(inputs, list):
+    # There are several situations here:
+    # In the graph mode, __call__ will be only called once. The initial_state
+    # and constants could be in inputs (from file loading).
+    # In the eager mode, __call__ will be called twice, once during
+    # rnn_layer(inputs=input_t, constants=c_t, ...), and second time will be
+    # model.fit/train_on_batch/predict with real np data. In the second case,
+    # the inputs will contain initial_state and constants, and more importantly,
+    # the real inputs will be in a flat list, instead of nested tuple.
+    #
+    # For either case, we will use num_inputs to split the input list, and
+    # restructure the real input into tuple.
     assert initial_state is None and constants is None
     if num_constants is not None:
       constants = inputs[-num_constants:]
       inputs = inputs[:-num_constants]
+    if num_inputs is None:
+      num_inputs = 1
+    if len(inputs) > num_inputs:
+      initial_state = inputs[num_inputs:]
+      inputs = inputs[:num_inputs]
+
     if len(inputs) > 1:
-      initial_state = inputs[1:]
-    inputs = inputs[0]
+      inputs = tuple(inputs)
+    else:
+      inputs = inputs[0]
 
   def to_list_or_none(x):
     if x is None or isinstance(x, list):
@@ -2465,15 +2564,13 @@ def _generate_zero_filled_state(batch_size_tensor, state_size, dtype):
     raise ValueError(
         'batch_size and dtype cannot be None while constructing initial state: '
         'batch_size={}, dtype={}'.format(batch_size_tensor, dtype))
-  if _is_multiple_state(state_size):
-    states = []
-    for dims in state_size:
-      flat_dims = tensor_shape.as_shape(dims).as_list()
-      init_state_size = [batch_size_tensor] + flat_dims
-      init_state = array_ops.zeros(init_state_size, dtype=dtype)
-      states.append(init_state)
-    return states
-  else:
-    flat_dims = tensor_shape.as_shape(state_size).as_list()
+
+  def create_zeros(unnested_state_size):
+    flat_dims = tensor_shape.as_shape(unnested_state_size).as_list()
     init_state_size = [batch_size_tensor] + flat_dims
     return array_ops.zeros(init_state_size, dtype=dtype)
+
+  if nest.is_sequence(state_size):
+    return nest.map_structure(create_zeros, state_size)
+  else:
+    return create_zeros(state_size)
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index c343c03a8c..4132dcbdca 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -21,6 +21,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
+
 import numpy as np
 
 from tensorflow.python import keras
@@ -34,6 +36,11 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training import rmsprop
 from tensorflow.python.training.checkpointable import util as checkpointable_util
+from tensorflow.python.util import nest
+
+# Used for nested input/output/state RNN test.
+NestedInput = collections.namedtuple('NestedInput', ['t1', 't2'])
+NestedState = collections.namedtuple('NestedState', ['s1', 's2'])
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -822,6 +829,173 @@ class RNNTest(test.TestCase):
       self.assertEqual(initial_state.shape.as_list(), [batch, 5])
       self.assertEqual(initial_state.dtype, inputs.dtype)
 
+  def test_nested_input_output(self):
+    batch = 10
+    t = 5
+    i1, i2, i3 = 3, 4, 5
+    o1, o2, o3 = 2, 3, 4
+
+    cell = NestedCell(o1, o2, o3)
+    rnn = keras.layers.RNN(cell)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+
+    outputs = rnn((input_1, input_2))
+
+    self.assertEqual(len(outputs), 2)
+    self.assertEqual(outputs[0].shape.as_list(), [None, o1])
+    self.assertEqual(outputs[1].shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model((input_1, input_2), outputs)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))],
+        [np.zeros((batch, o1)), np.zeros((batch, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, o1), (None, o2, o3)])
+
+    cell = NestedCell(o1, o2, o3, use_tuple=True)
+
+    rnn = keras.layers.RNN(cell)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+
+    outputs = rnn(NestedInput(t1=input_1, t2=input_2))
+
+    self.assertEqual(len(outputs), 2)
+    self.assertEqual(outputs[0].shape.as_list(), [None, o1])
+    self.assertEqual(outputs[1].shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model([input_1, input_2], outputs)
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)),
+         np.zeros((batch, t, i2, i3))],
+        [np.zeros((batch, o1)), np.zeros((batch, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, o1), (None, o2, o3)])
+
+  def test_nested_input_output_with_state(self):
+    batch = 10
+    t = 5
+    i1, i2, i3 = 3, 4, 5
+    o1, o2, o3 = 2, 3, 4
+
+    cell = NestedCell(o1, o2, o3)
+    rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+
+    output1, output2, s1, s2 = rnn((input_1, input_2))
+
+    self.assertEqual(output1.shape.as_list(), [None, t, o1])
+    self.assertEqual(output2.shape.as_list(), [None, t, o2, o3])
+    self.assertEqual(s1.shape.as_list(), [None, o1])
+    self.assertEqual(s2.shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model([input_1, input_2], [output1, output2])
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)),
+         np.zeros((batch, t, i2, i3))],
+        [np.zeros((batch, t, o1)),
+         np.zeros((batch, t, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)])
+
+    cell = NestedCell(o1, o2, o3, use_tuple=True)
+
+    rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+
+    output1, output2, s1, s2 = rnn(NestedInput(t1=input_1, t2=input_2))
+
+    self.assertEqual(output1.shape.as_list(), [None, t, o1])
+    self.assertEqual(output2.shape.as_list(), [None, t, o2, o3])
+    self.assertEqual(s1.shape.as_list(), [None, o1])
+    self.assertEqual(s2.shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model([input_1, input_2], [output1, output2])
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)),
+         np.zeros((batch, t, i2, i3))],
+        [np.zeros((batch, t, o1)),
+         np.zeros((batch, t, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)])
+
+  def test_nest_input_output_with_init_state(self):
+    batch = 10
+    t = 5
+    i1, i2, i3 = 3, 4, 5
+    o1, o2, o3 = 2, 3, 4
+
+    cell = NestedCell(o1, o2, o3)
+    rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+    init_s1 = keras.Input((o1,))
+    init_s2 = keras.Input((o2, o3))
+
+    output1, output2, s1, s2 = rnn((input_1, input_2),
+                                   initial_state=(init_s1, init_s2))
+
+    self.assertEqual(output1.shape.as_list(), [None, t, o1])
+    self.assertEqual(output2.shape.as_list(), [None, t, o2, o3])
+    self.assertEqual(s1.shape.as_list(), [None, o1])
+    self.assertEqual(s2.shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model([input_1, input_2, init_s1, init_s2],
+                               [output1, output2])
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)),
+         np.zeros((batch, t, i2, i3)),
+         np.zeros((batch, o1)),
+         np.zeros((batch, o2, o3))],
+        [np.zeros((batch, t, o1)),
+         np.zeros((batch, t, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)])
+
+    cell = NestedCell(o1, o2, o3, use_tuple=True)
+
+    rnn = keras.layers.RNN(cell, return_sequences=True, return_state=True)
+
+    input_1 = keras.Input((t, i1))
+    input_2 = keras.Input((t, i2, i3))
+    init_s1 = keras.Input((o1,))
+    init_s2 = keras.Input((o2, o3))
+    init_state = NestedState(s1=init_s1, s2=init_s2)
+
+    output1, output2, s1, s2 = rnn(NestedInput(t1=input_1, t2=input_2),
+                                   initial_state=init_state)
+
+    self.assertEqual(output1.shape.as_list(), [None, t, o1])
+    self.assertEqual(output2.shape.as_list(), [None, t, o2, o3])
+    self.assertEqual(s1.shape.as_list(), [None, o1])
+    self.assertEqual(s2.shape.as_list(), [None, o2, o3])
+
+    model = keras.models.Model([input_1, input_2, init_s1, init_s2],
+                               [output1, output2])
+    model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
+                  loss='mse')
+    model.train_on_batch(
+        [np.zeros((batch, t, i1)),
+         np.zeros((batch, t, i2, i3)),
+         np.zeros((batch, o1)),
+         np.zeros((batch, o2, o3))],
+        [np.zeros((batch, t, o1)),
+         np.zeros((batch, t, o2, o3))])
+    self.assertEqual(model.output_shape, [(None, t, o1), (None, t, o2, o3)])
+
 
 class Minimal2DRNNCell(keras.layers.Layer):
   """The minimal 2D RNN cell is a simple combination of 2 1-D RNN cell.
@@ -877,5 +1051,55 @@ class PlusOneRNNCell(keras.layers.Layer):
     return inputs + 1, [states[0] + 1]
 
 
+class NestedCell(keras.layers.Layer):
+
+  def __init__(self, unit_1, unit_2, unit_3, use_tuple=False, **kwargs):
+    self.unit_1 = unit_1
+    self.unit_2 = unit_2
+    self.unit_3 = unit_3
+    self.use_tuple = use_tuple
+    super(NestedCell, self).__init__(**kwargs)
+    # A nested state.
+    if use_tuple:
+      self.state_size = NestedState(
+          s1=unit_1, s2=tensor_shape.TensorShape([unit_2, unit_3]))
+    else:
+      self.state_size = (unit_1, tensor_shape.TensorShape([unit_2, unit_3]))
+    self.output_size = (unit_1, tensor_shape.TensorShape([unit_2, unit_3]))
+
+  def build(self, inputs_shape):
+    # expect input_shape to contain 2 items, [(batch, i1), (batch, i2, i3)]
+    if self.use_tuple:
+      input_1 = inputs_shape.t1[1]
+      input_2, input_3 = inputs_shape.t2[1:]
+    else:
+      input_1 = inputs_shape[0][1]
+      input_2, input_3 = inputs_shape[1][1:]
+
+    self.kernel_1 = self.add_weight(
+        shape=(input_1, self.unit_1), initializer='uniform', name='kernel_1')
+    self.kernel_2_3 = self.add_weight(
+        shape=(input_2, input_3, self.unit_2, self.unit_3),
+        initializer='uniform',
+        name='kernel_2_3')
+
+  def call(self, inputs, states):
+    # inputs should be in [(batch, input_1), (batch, input_2, input_3)]
+    # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)]
+    flatten_inputs = nest.flatten(inputs)
+    s1, s2 = states
+
+    output_1 = math_ops.matmul(flatten_inputs[0], self.kernel_1)
+    output_2_3 = special_math_ops.einsum('bij,ijkl->bkl', flatten_inputs[1],
+                                         self.kernel_2_3)
+    state_1 = s1 + output_1
+    state_2_3 = s2 + output_2_3
+
+    output = [output_1, output_2_3]
+    new_states = NestedState(s1=state_1, s2=state_2_3)
+
+    return output, new_states
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index d19d0b5f8c..c795b2aa7e 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -29,6 +29,7 @@ from tensorflow.python.keras.layers.recurrent import _standardize_args
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -517,7 +518,10 @@ class Bidirectional(Wrapper):
     if is_keras_tensor:
       # Compute the full input spec, including state
       full_input = [inputs] + additional_inputs
-      full_input_spec = self.input_spec + additional_specs
+      # The original input_spec is None since there could be a nested tensor
+      # input. Update the input_spec to match the inputs.
+      full_input_spec = [None for _ in range(len(nest.flatten(inputs)))
+                        ] + additional_specs
 
       # Perform the call with temporarily replaced input_spec
       original_input_spec = self.input_spec
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
index 64e7a9046b..b207c68000 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
@@ -128,7 +128,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
index 6fdffef776..2d7a09ceda 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
@@ -128,7 +128,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
index a0fe598ab9..9e24bb8ae6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -191,7 +191,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
index ecdbf48157..1b1ccbe118 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -191,7 +191,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
index 68b6678d48..3358f26aeb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -110,7 +110,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -122,7 +122,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index 182efb83b8..3d70cf8b65 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
@@ -167,7 +167,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
index 64e7a9046b..b207c68000 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
@@ -128,7 +128,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
index 6fdffef776..2d7a09ceda 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
@@ -128,7 +128,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
index a0fe598ab9..9e24bb8ae6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -191,7 +191,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
index ecdbf48157..1b1ccbe118 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -191,7 +191,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
index 68b6678d48..3358f26aeb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -110,7 +110,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -122,7 +122,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index 182efb83b8..3d70cf8b65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
@@ -167,7 +167,7 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
@@ -179,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compute_output_shape"
-    argspec: "args=[\'instance\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "count_params"
-- 
GitLab


From 3930c0c521c01d30599430f7178c6e9d98c7283b Mon Sep 17 00:00:00 2001
From: Deepak B <dksb29@gmail.com>
Date: Fri, 12 Oct 2018 15:55:48 -0700
Subject: [PATCH 0919/1085] Create bug_template.md

---
 .github/ISSUE_TEMPLATE/bug_template.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_template.md

diff --git a/.github/ISSUE_TEMPLATE/bug_template.md b/.github/ISSUE_TEMPLATE/bug_template.md
new file mode 100644
index 0000000000..c8ab3e2a2c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_template.md
@@ -0,0 +1,24 @@
+Please make sure that this is a bug. As per our GitHub Policy [link] we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
+
+Please fill the following Bug_Template:
+### System information
+- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
+- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
+- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
+- TensorFlow installed from (source or binary):
+- TensorFlow version (use command below):
+- Python version:
+- Bazel version (if compiling from source):
+- GCC/Compiler version (if compiling from source):
+- CUDA/cuDNN version:
+- GPU model and memory:
+- Docker Image:
+
+You can use [this script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh) to collect some of the information asked above.
+
+### Describe the current behavior
+
+### Describe the expected behavior
+
+### Code to reproduce the issue
+Provide a reproducible test case that is the bare minimum necessary to generate the problem.
-- 
GitLab


From fbce5de12c2ca254febaf05b6913a1e8c6da7cbd Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 12 Oct 2018 16:14:39 -0700
Subject: [PATCH 0920/1085] Use fs->FileExists to update the searching status
 for the NOT_FOUNT case

---
 .../kernels/data/matching_files_dataset_op.cc | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index f12e376ddc..f6c9860f03 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -260,14 +260,25 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           Status s = fs->GetChildren(current_dir, &children);
           std::cout << "Children Num: " << children.size()
                     << "; Status: " << s.ToString()
-                    << "; Current dir: " << current_dir << std::endl;
+                    << "; Current dir: " << current_dir
+                    << "; FileExist status: "
+                    << fs->FileExists(current_dir).ToString() << std::endl;
           ret.Update(s);
 
-          // If GetChildren() fails, continue the next search.
-          if (ret.code() == error::NOT_FOUND) {
-            continue;
-          } else if (!ret.ok()) {
-            return ret;
+          // When the children is empty, 1) return the non-ok status immediately
+          // if it is not NOT_FOUND; 2) continue the search if the status is ok
+          // or NOT_FOUND;
+          if (children.empty()) {
+            if (ret.code() != error::NOT_FOUND || !ret.ok()) {
+              return ret;
+            } else {
+              // On some platforms, fs.GetChildren() return the OK status even
+              // if the path isn't found. fs->FileExists() is used to make
+              // different platforms return the same status when searching a
+              // non-existing path.
+              ret.Update(fs->FileExists(current_dir));
+              continue;
+            }
           }
 
           // children_dir_status holds is_dir status for children. It can have
-- 
GitLab


From de845ea2d797ced500a586120c6b1ec0cd0c50ff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 16:16:39 -0700
Subject: [PATCH 0921/1085] This CL switches the custom-registered-gradient of
 nested Functions from using the C++ SymGrad to the correct forward & backward
 passes. This allows gradients of defuns nested > 2 levels deep to work
 correctly.

This CL also adds a number of unit tests to make sure various nestings of gradient & defuns work. (Note: unit tests that combine multiple levels of gradient tapes & multiple levels of defun still need to be added).

PiperOrigin-RevId: 216939066
---
 tensorflow/python/eager/BUILD            |   1 +
 tensorflow/python/eager/function.py      |  82 +++--
 tensorflow/python/eager/function_test.py | 368 +++++++++++++++++++++++
 tensorflow/python/ops/gradients_impl.py  |   3 +-
 4 files changed, 423 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index 6a90832b9c..a30737fbec 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -143,6 +143,7 @@ cuda_py_test(
 
 cuda_py_test(
     name = "function_test",
+    size = "medium",
     srcs = ["function_test.py"],
     additional_deps = [
         ":backprop",
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index fb6855ed71..f27e5558ed 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -423,8 +423,9 @@ class _EagerDefinedFunction(object):
       proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
     function_def = function_pb2.FunctionDef()
     function_def.ParseFromString(compat.as_bytes(proto_data))
-    if context.executing_eagerly():
-      _register(fn)
+    with ops.init_scope():
+      if context.executing_eagerly():
+        _register(fn)
     self.definition = function_def
     self.name = compat.as_bytes(function_def.signature.name)
     self.signature = function_def.signature
@@ -569,6 +570,7 @@ class Function(object):
         self._func_graph.inputs, self._func_graph.outputs, self._attrs)
     self._backward_graph_function = None
     self._signature = signature
+    self._gradient_name = None
 
   def __call__(self, *args):
     """Executes the wrapped function.
@@ -614,45 +616,56 @@ class Function(object):
     if context.executing_eagerly() or not self.outputs:
       outputs = self._inference_function.call(ctx, args)
     else:
-      name = "PartitionedCall-%s" % ops.uid()
-
-      @ops.RegisterGradient(name)
-      def grad_fn(op, *doutputs):  # pylint: disable=unused-variable
-        """Gradients of this function."""
-        if op.graph is not ops.get_default_graph():
-          # TODO(apassos) this will still emit SymbolicGradient ops when
-          # nested defuns are being differentiated. We need to somehow figure
-          # out a way to update the FunctionDef corresponding to the calling
-          # function when mutating a call to the forward pass.
-          return gradients_impl._SymGrad(op, list(doutputs))  # pylint: disable=protected-access
-        if self._backward_graph_function is None:
-          self._construct_backprop_function()
-        self._forward_function.add_to_graph(op.graph)
+      if not self._gradient_name:
+        self._gradient_name = "PartitionedCall-%s" % ops.uid()
+        self._register_gradient(self._gradient_name)
+      with ops.get_default_graph().gradient_override_map(
+          {"PartitionedCall": self._gradient_name,
+           "StatefulPartitionedCall": self._gradient_name}):
+        outputs = self._inference_function.call(ctx, args)
+    return self._build_call_outputs(outputs)
+
+  def _register_gradient(self, name):
+    """Registers the gradient for the current Function under the given name.
+
+    The gradient rewrites an inference call op to a forward call op, but does
+    not modify a pre-existing forward call op. It then computes the gradient
+    from the output's gradients and the side outputs of the forward op.
+
+    Args:
+      name: The name to register the gradient as.
+    """
+    @ops.RegisterGradient(name)
+    def grad_fn(op, *doutputs):  # pylint: disable=unused-variable
+      """Gradients of this function."""
+      if self._backward_graph_function is None:
+        self._construct_backprop_function()
+
+      # pylint: disable=protected-access
+      self._forward_function.add_to_graph(op.graph)
+      num_inference_outputs = self._inference_function._num_outputs
+
+      # Rewrite an inference call op to be a forward call op
+      if op.get_attr("f").name.encode() == self._inference_function.name:
         func = attr_value_pb2.AttrValue(
             func=attr_value_pb2.NameAttrList(
                 name=self._forward_function.name))
-        # pylint: disable=protected-access
         op._set_attr("f", func)
         types = attr_value_pb2.AttrValue.ListValue(
             type=self._forward_function._output_types)
         op._set_attr("Tout", attr_value_pb2.AttrValue(list=types))
         for i in range(
-            len(outputs), len(self._forward_function._output_types)):
+            num_inference_outputs, len(self._forward_function._output_types)):
           t = ops.Tensor(op, i, self._forward_function._output_types[i])
           t.set_shape(self._forward_function._output_shapes[i])
           func_graph_output = self._forward_function._func_graph_outputs[i]
           custom_gradient.copy_handle_data(func_graph_output, t)
           op._outputs.append(t)
-        # pylint: enable=protected-access
-        side_outputs = op.outputs[len(outputs):]
-        return self._backward_graph_function(
-            *(list(doutputs) + list(side_outputs)))
-
-      with ops.get_default_graph().gradient_override_map(
-          {"PartitionedCall": name}):
-        outputs = self._inference_function.call(ctx, args)
-
-    return self._build_call_outputs(outputs)
+      # pylint: enable=protected-access
+      # Compute the gradients using the side outputs
+      side_outputs = op.outputs[num_inference_outputs:]
+      return self._backward_graph_function(
+          *(list(doutputs[:num_inference_outputs]) + list(side_outputs)))
 
   @property
   def name(self):
@@ -732,7 +745,8 @@ class Function(object):
           self._func_graph.outputs,
           self._func_graph.inputs,
           grad_ys=gradients_wrt_outputs,
-          src_graph=self._func_graph)
+          src_graph=self._func_graph,
+          unconnected_gradients=gradients_impl.UnconnectedGradients.ZERO)
 
     backwards_graph_captures = list(backwards_graph.captures.keys())
 
@@ -777,7 +791,15 @@ class Function(object):
       self._construct_backprop_function()
 
     ctx = context.context()
-    outputs = self._forward_function.call(ctx, args)
+
+    if not self._gradient_name:
+      self._gradient_name = "PartitionedCall-%s" % ops.uid()
+      self._register_gradient(self._gradient_name)
+    with ops.get_default_graph().gradient_override_map(
+        {"PartitionedCall": self._gradient_name,
+         "StatefulPartitionedCall": self._gradient_name}):
+      outputs = self._forward_function.call(ctx, args)
+
     if isinstance(outputs, ops.Operation) or outputs is None:
       return outputs
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 74e4dd3365..8344c014b9 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -990,6 +990,7 @@ class FunctionTest(test.TestCase):
     self.assertAllEqual(r, 25.0)
     self.assertAllEqual(g, 2 * 5.0)
 
+  @test_util.run_in_graph_and_eager_modes
   def testNestedDifferentiableFunction(self):
     @function.defun
     def inner_fn(a, b):
@@ -1007,6 +1008,373 @@ class FunctionTest(test.TestCase):
 
     self.assertAllEqual(grad, 2 * 5.0 + 1.0)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunction(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def outer_fn(x):
+      return middle_fn(x, 1.0)
+
+    x = constant_op.constant(5.0)
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionWithMultipleGradCalls(self):
+    @function.defun
+    def inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return math_ops.mul(a, inner_fn(a, b))
+
+    @function.defun
+    def outer_fn(x):
+      return middle_fn(x, 3.0)
+
+    x = constant_op.constant(5.0)
+    self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0))
+
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+    self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0))
+    self.assertAllEqual(middle_fn(3.0, x), 3.0 * (3.0 + 5.0))
+
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+
+    y = constant_op.constant(4.0)
+    with backprop.GradientTape() as tp:
+      tp.watch(y)
+      result = outer_fn(y)
+    grad = tp.gradient(result, y)
+
+    self.assertAllEqual(grad, 2 * 4.0 + 3.0)
+
+    with backprop.GradientTape() as tp:
+      tp.watch(y)
+      result = inner_fn(y, y)
+    grad = tp.gradient(result, y)
+
+    self.assertAllEqual(grad, 2.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionGradientTapeInDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def outer_fn(x):
+      with backprop.GradientTape() as tp:
+        tp.watch(x)
+        result = middle_fn(x, 1.0)
+      grad = tp.gradient(result, x)
+      return grad
+
+    x = constant_op.constant(5.0)
+    grad = outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionGradientTapeInNestedDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def almost_outer_fn(x):
+      with backprop.GradientTape() as tp:
+        tp.watch(x)
+        result = middle_fn(x, 1.0)
+      grad = tp.gradient(result, x)
+      return grad
+
+    @function.defun
+    def outer_fn(x):
+      return almost_outer_fn(x)
+
+    x = constant_op.constant(5.0)
+    grad = outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionGradientTapeInMultNestedDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def almost_outer_fn(x):
+      with backprop.GradientTape() as tp:
+        tp.watch(x)
+        result = middle_fn(x, 1.0)
+      grad = tp.gradient(result, x)
+      return grad
+
+    @function.defun
+    def outer_fn(x):
+      return almost_outer_fn(x)
+
+    @function.defun
+    def outer_outer_fn(x):
+      return outer_fn(x)
+
+    x = constant_op.constant(5.0)
+    grad = outer_outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionTFGradientInDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def outer_fn(x):
+      result = middle_fn(x, 1.0)
+      return gradients_impl.gradients(result, [x])[0]
+
+    x = constant_op.constant(5.0)
+    grad = outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionTFGradientInNestedDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def almost_outer_fn(x):
+      result = middle_fn(x, 1.0)
+      return gradients_impl.gradients(result, [x])[0]
+
+    @function.defun
+    def outer_fn(x):
+      return almost_outer_fn(x)
+
+    x = constant_op.constant(5.0)
+    grad = outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeeplyNestedDifferentiableFunctionTFGradientInMultNestedDefun(self):
+    @function.defun
+    def inner_inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def inner_fn(a, b):
+      return inner_inner_fn(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def almost_outer_fn(x):
+      result = middle_fn(x, 1.0)
+      return gradients_impl.gradients(result, [x])[0]
+
+    @function.defun
+    def outer_fn(x):
+      return almost_outer_fn(x)
+
+    @function.defun
+    def outer_outer_fn(x):
+      return outer_fn(x)
+
+    x = constant_op.constant(5.0)
+    grad = outer_outer_fn(x)
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  def testDeeplyNestedDifferentiableFunctionWithVariable(self):
+    var = variables.Variable(constant_op.constant(1.0))
+
+    @function.defun
+    def inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return a * inner_fn(a, b)
+
+    @function.defun
+    def outer_fn(x):
+      return middle_fn(x, var)
+
+    x = constant_op.constant(5.0)
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 1.0)
+
+  def testDeeplyNestedDifferentiableFunctionWithVariableMultipleGradCalls(self):
+    v = variables.Variable(constant_op.constant(3.0))
+
+    @function.defun
+    def inner_fn(a, b):
+      return math_ops.add(a, b)
+
+    @function.defun
+    def middle_fn(a, b):
+      return math_ops.mul(a, inner_fn(a, b))
+
+    @function.defun
+    def outer_fn(x):
+      return middle_fn(x, v)
+
+    x = constant_op.constant(5.0)
+    self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0))
+
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+    self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0))
+    self.assertAllEqual(middle_fn(v, x), 3.0 * (3.0 + 5.0))
+
+    with backprop.GradientTape() as tp:
+      tp.watch(x)
+      result = outer_fn(x)
+    grad = tp.gradient(result, x)
+
+    self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+
+    y = constant_op.constant(4.0)
+    with backprop.GradientTape() as tp:
+      tp.watch(y)
+      result = outer_fn(y)
+    grad = tp.gradient(result, y)
+
+    self.assertAllEqual(grad, 2 * 4.0 + 3.0)
+
+    v.assign(constant_op.constant(1.5))
+    with backprop.GradientTape() as tp:
+      tp.watch(y)
+      result = outer_fn(y)
+    grad = tp.gradient(result, y)
+
+    self.assertAllEqual(grad, 2 * 4.0 + 1.5)
+
+    with backprop.GradientTape() as tp:
+      tp.watch(y)
+      result = inner_fn(y, v)
+    grad = tp.gradient(result, y)
+
+    self.assertAllEqual(grad, 1.0)
+
+  def testDeeplyNestedDifferentiableFunctionWithVariableMultipleTFGrads(self):
+    with context.graph_mode(), self.cached_session():
+      v = resource_variable_ops.ResourceVariable(3.0)
+      v.initializer.run()
+
+      @function.defun
+      def inner_fn(a, b):
+        return math_ops.add(a, b)
+
+      @function.defun
+      def middle_fn(a, b):
+        return math_ops.mul(a, inner_fn(a, b))
+
+      @function.defun
+      def outer_fn(x):
+        return middle_fn(x, v)
+
+      x = constant_op.constant(5.0)
+      self.assertAllEqual(outer_fn(x).eval(), 5.0 * (5.0 + 3.0))
+
+      grad, = gradients_impl.gradients(outer_fn(x), x)
+
+      self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+      self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0))
+      self.assertAllEqual(middle_fn(v, x), 3.0 * (3.0 + 5.0))
+
+      grad, = gradients_impl.gradients(outer_fn(x), x)
+
+      self.assertAllEqual(grad, 2 * 5.0 + 3.0)
+
+      y = constant_op.constant(4.0)
+      grad, = gradients_impl.gradients(outer_fn(y), y)
+      self.assertAllEqual(grad, 2 * 4.0 + 3.0)
+
+      self.evaluate(v.assign(constant_op.constant(1.5)))
+      grad, = gradients_impl.gradients(outer_fn(y), y)
+
+      self.assertAllEqual(grad, 2 * 4.0 + 1.5)
+
+      grad, = gradients_impl.gradients(inner_fn(y, v), y)
+      self.assertAllEqual(grad, 1.0)
+
   def testNestedDifferentiableFunctionNoneOutputs(self):
     @function.defun
     def foo(a, b):
diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py
index c58ecf4147..0b3853a37c 100644
--- a/tensorflow/python/ops/gradients_impl.py
+++ b/tensorflow/python/ops/gradients_impl.py
@@ -973,7 +973,8 @@ def _GetGrad(grads, t, unconnected_gradients):
   op_grads = grads.get(op)
   if not op_grads:
     if unconnected_gradients == UnconnectedGradients.ZERO:
-      return array_ops.zeros_like(t)
+      t_dtype = t.dtype if t.dtype != dtypes.resource else dtypes.float32
+      return array_ops.zeros_like(t, dtype=t_dtype)
     elif unconnected_gradients == UnconnectedGradients.NONE:
       return None
     else:
-- 
GitLab


From 6a6113a7ba3fc6a931a2f1cb7653a6ad56692192 Mon Sep 17 00:00:00 2001
From: Jingyue Wu <jingyue@google.com>
Date: Fri, 12 Oct 2018 16:16:45 -0700
Subject: [PATCH 0922/1085] Expose QuantizeAndDequantizeV2 to open-source
 Python API.

PiperOrigin-RevId: 216939077
---
 .../api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt  | 4 +++-
 tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt  | 4 ++++
 tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt  | 4 ++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt
index 15e181be20..a30b42010a 100644
--- a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt
@@ -1,4 +1,6 @@
 op {
   graph_op_name: "QuantizeAndDequantizeV2"
-  visibility: HIDDEN
+  endpoint {
+    name: "quantization.quantize_and_dequantize"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
index 77c92aeb0d..2948b7318e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.quantization.pbtxt
@@ -32,6 +32,10 @@ tf_module {
     name: "quantize"
     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
   }
+  member_method {
+    name: "quantize_and_dequantize"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
index 77c92aeb0d..2948b7318e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.quantization.pbtxt
@@ -32,6 +32,10 @@ tf_module {
     name: "quantize"
     argspec: "args=[\'input\', \'min_range\', \'max_range\', \'T\', \'mode\', \'round_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'MIN_COMBINED\', \'HALF_AWAY_FROM_ZERO\', \'None\'], "
   }
+  member_method {
+    name: "quantize_and_dequantize"
+    argspec: "args=[\'input\', \'input_min\', \'input_max\', \'signed_input\', \'num_bits\', \'range_given\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'8\', \'False\', \'None\'], "
+  }
   member_method {
     name: "quantized_concat"
     argspec: "args=[\'concat_dim\', \'values\', \'input_mins\', \'input_maxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-- 
GitLab


From 9fa8939c4ad08d8783a3336aa28552febd2c08df Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 12 Oct 2018 16:18:46 -0700
Subject: [PATCH 0923/1085] Remove tensorflow_no_py_deps from configure script.

This config breaks out of the box usage of "bazel test" and hurts development.

PiperOrigin-RevId: 216939335
---
 configure.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/configure.py b/configure.py
index 5a4207476f..b564da2722 100644
--- a/configure.py
+++ b/configure.py
@@ -1498,14 +1498,6 @@ def set_windows_build_flags(environ_cp):
   # TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0
   # Short object file path will be enabled by default.
   write_to_bazelrc('build --experimental_shortened_obj_file_path=true')
-  # When building zip file for some py_binary and py_test targets, don't
-  # include its dependencies. This is for:
-  #   1. Running python tests against the system installed TF pip package.
-  #   2. Avoiding redundant files in
-  #      //tensorflow/tools/pip_package:simple_console_windows,
-  #      which is a py_binary used during creating TF pip package.
-  #      See https://github.com/tensorflow/tensorflow/issues/22390
-  write_to_bazelrc('build --define=no_tensorflow_py_deps=true')
 
   if get_var(
       environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
-- 
GitLab


From 3d63e7aa199e81e2d89155c8826a27dd4ef51758 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddw@google.com>
Date: Fri, 12 Oct 2018 17:08:57 -0700
Subject: [PATCH 0924/1085] Explicitly set jdk8 in ci_parameterized_build.sh

PiperOrigin-RevId: 216946217
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 489722c0e9..bc9cb4e9a1 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -149,8 +149,12 @@ BAZEL_TEST_FLAGS=""\
 "--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
 BAZEL_BUILD_FLAGS="--keep_going"
 
-BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}"
-BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS}"
+# Explicitly set jdk8 since that's what's installed in our images. Note that
+# bazel 0.16 and higher defaults to jdk9, which causes failures. See b/117634064
+BAZEL_JAVA_FLAGS="--java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8"
+
+BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS} ${BAZEL_JAVA_FLAGS}"
+BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS} ${BAZEL_JAVA_FLAGS}"
 BAZEL_CLEAN_CMD="bazel clean"
 
 PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh"
-- 
GitLab


From 779d1e8f127de5b8039d1cf1da95c9606857d89a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 17:17:51 -0700
Subject: [PATCH 0925/1085] Internal change.

PiperOrigin-RevId: 216947070
---
 tensorflow/contrib/lite/kernels/basic_rnn.cc  |  7 +++---
 .../kernels/bidirectional_sequence_rnn.cc     |  7 +++---
 tensorflow/contrib/lite/kernels/conv.cc       |  8 ++++---
 .../contrib/lite/kernels/layer_norm_lstm.cc   | 23 ++++++++++--------
 tensorflow/contrib/lite/kernels/lstm.cc       | 24 ++++++++++---------
 tensorflow/contrib/lite/kernels/svdf.cc       |  7 +++---
 .../kernels/unidirectional_sequence_lstm.cc   | 24 ++++++++++---------
 .../kernels/unidirectional_sequence_rnn.cc    |  7 +++---
 8 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/basic_rnn.cc b/tensorflow/contrib/lite/kernels/basic_rnn.cc
index 74de23df73..53f524cc6c 100644
--- a/tensorflow/contrib/lite/kernels/basic_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/basic_rnn.cc
@@ -114,9 +114,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = batch_size;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {batch_size};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = batch_size;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
index 9f764313ff..6b702af476 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc
@@ -213,9 +213,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kScalingFactors);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = batch_size;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {batch_size};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = batch_size;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc
index dbcadbee14..6695282a92 100644
--- a/tensorflow/contrib/lite/kernels/conv.cc
+++ b/tensorflow/contrib/lite/kernels/conv.cc
@@ -387,12 +387,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, data->scaling_factors_index);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
     // Only one scale factor per batch is typically necessary. See optimized
     // implementation for why we need to allocate for the height of the inputs
     // flattened to 2D.
-    scaling_factors_size->data[0] = NumElements(input) / channels_in;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    const int height = NumElements(input) / channels_in;
+    int scaling_dims[1] = {height};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = height;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
index 9739fd4514..48dd03e7ae 100644
--- a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc
@@ -409,9 +409,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {n_batch};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
@@ -420,10 +421,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, /*index=*/5);
     prod_scaling_factors->type = kTfLiteFloat32;
     prod_scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
-    prod_scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(prod_scaling_factors->dims,
-                             prod_scaling_factors_size)) {
+    if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1,
+                                   scaling_dims)) {
+      TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
+      prod_scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, prod_scaling_factors,
                                               prod_scaling_factors_size));
@@ -435,9 +436,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* recovered_weights = GetTemporary(context, node, /*index=*/6);
     recovered_weights->type = kTfLiteFloat32;
     recovered_weights->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* recovered_weights_size = TfLiteIntArrayCreate(1);
-    recovered_weights_size->data[0] = n_cell;
-    if (!TfLiteIntArrayEqual(recovered_weights->dims, recovered_weights_size)) {
+    int recovered_dims[1] = {n_cell};
+    if (!TfLiteIntArrayEqualsArray(recovered_weights->dims, 1,
+                                   recovered_dims)) {
+      TfLiteIntArray* recovered_weights_size = TfLiteIntArrayCreate(1);
+      recovered_weights_size->data[0] = n_cell;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, recovered_weights,
                                               recovered_weights_size));
diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index 16d67a1a93..f08a1a80c0 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -386,9 +386,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {n_batch};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
@@ -397,10 +398,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, /*index=*/5);
     prod_scaling_factors->type = kTfLiteFloat32;
     prod_scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
-    prod_scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(prod_scaling_factors->dims,
-                             prod_scaling_factors_size)) {
+    if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1,
+                                   scaling_dims)) {
+      TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
+      prod_scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, prod_scaling_factors,
                                               prod_scaling_factors_size));
@@ -413,10 +414,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, /*index=*/6);
     recovered_cell_weights->type = kTfLiteFloat32;
     recovered_cell_weights->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
-    recovered_cell_weights_size->data[0] = n_cell;
-    if (!TfLiteIntArrayEqual(recovered_cell_weights->dims,
-                             recovered_cell_weights_size)) {
+    int recovered_cell_dims[1] = {n_cell};
+    if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1,
+                                   recovered_cell_dims)) {
+      TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
+      recovered_cell_weights_size->data[0] = n_cell;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, recovered_cell_weights,
                                               recovered_cell_weights_size));
diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc
index 9903fd5c35..e133f37c0c 100644
--- a/tensorflow/contrib/lite/kernels/svdf.cc
+++ b/tensorflow/contrib/lite/kernels/svdf.cc
@@ -216,9 +216,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = batch_size;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {batch_size};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = batch_size;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index 89d57e4599..40029779e0 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -389,9 +389,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kScalingFactors);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {n_batch};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
@@ -401,10 +402,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kProductScalingFactors);
     prod_scaling_factors->type = kTfLiteFloat32;
     prod_scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
-    prod_scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(prod_scaling_factors->dims,
-                             prod_scaling_factors_size)) {
+    if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1,
+                                   scaling_dims)) {
+      TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1);
+      prod_scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, prod_scaling_factors,
                                               prod_scaling_factors_size));
@@ -418,10 +419,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, kRecoveredCellWeights);
     recovered_cell_weights->type = kTfLiteFloat32;
     recovered_cell_weights->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
-    recovered_cell_weights_size->data[0] = n_cell;
-    if (!TfLiteIntArrayEqual(recovered_cell_weights->dims,
-                             recovered_cell_weights_size)) {
+    int recovered_cell_dims[1] = {n_cell};
+    if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1,
+                                   recovered_cell_dims)) {
+      TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1);
+      recovered_cell_weights_size->data[0] = n_cell;
       TF_LITE_ENSURE_OK(context,
                         context->ResizeTensor(context, recovered_cell_weights,
                                               recovered_cell_weights_size));
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
index 354b837b3e..5b9e8dce22 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc
@@ -125,9 +125,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = batch_size;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {batch_size};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = batch_size;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
-- 
GitLab


From 1a77f1876e8138c84f96e639e464096294e9f389 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 12 Oct 2018 17:19:01 -0700
Subject: [PATCH 0926/1085] Internal change

PiperOrigin-RevId: 216947172
---
 tensorflow/contrib/lite/BUILD | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 787a85644c..a725c73193 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -7,6 +7,13 @@ licenses(["notice"])  # Apache 2.0
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
 
+package_group(
+    name = "internal",
+    packages = [
+        "//tensorflow/contrib/lite/...",
+    ],
+)
+
 exports_files(glob([
     "testdata/*.bin",
     "testdata/*.pb",
@@ -32,7 +39,6 @@ config_setting(
 config_setting(
     name = "with_tflite_flex",
     define_values = {"with_tflite_flex": "true"},
-    visibility = ["//visibility:public"],
 )
 
 cc_library(
@@ -80,12 +86,14 @@ cc_library(
 cc_library(
     name = "graph_info",
     hdrs = ["graph_info.h"],
+    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
 cc_library(
     name = "memory_planner",
     hdrs = ["memory_planner.h"],
+    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
@@ -93,6 +101,7 @@ cc_library(
     name = "simple_memory_arena",
     srcs = ["simple_memory_arena.cc"],
     hdrs = ["simple_memory_arena.h"],
+    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
@@ -323,6 +332,7 @@ cc_library(
     name = "util",
     srcs = ["util.cc"],
     hdrs = ["util.h"],
+    visibility = [":internal"],
     deps = [
         "//tensorflow/contrib/lite/c:c_api_internal",
     ],
-- 
GitLab


From 9ea44816685002922459109591f7dff36f541761 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 17:26:01 -0700
Subject: [PATCH 0927/1085] Open source mid-level TPU embedding API.

PiperOrigin-RevId: 216947827
---
 tensorflow/contrib/tpu/BUILD                  |   19 +
 .../contrib/tpu/python/tpu/tpu_embedding.py   | 1099 +++++++++++++++++
 2 files changed, 1118 insertions(+)
 create mode 100644 tensorflow/contrib/tpu/python/tpu/tpu_embedding.py

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index acdc47482b..401afcb0f4 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -192,6 +192,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":keras_support",  # split out to avoid cycle with tpu_strategy
+        ":tpu_embedding",
         ":tpu_estimator",
         ":tpu_lib",
     ],
@@ -396,3 +397,21 @@ tf_py_test(
         "//tensorflow/python:framework_test_lib",
     ],
 )
+
+py_library(
+    name = "tpu_embedding",
+    srcs = ["python/tpu/tpu_embedding.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/tpu:tpu_ops",
+        "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:partitioned_variables",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "@six_archive//:six",
+    ],
+)
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
new file mode 100644
index 0000000000..741d2b759f
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
@@ -0,0 +1,1099 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TPU embedding APIs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import math
+import re
+import six
+
+from tensorflow.contrib.framework.python.framework import experimental
+from tensorflow.contrib.tpu.ops import gen_tpu_ops
+from tensorflow.contrib.tpu.proto import tpu_embedding_configuration_pb2 as elc
+from tensorflow.contrib.tpu.python.ops import tpu_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+
+TRAINING = elc.TPUEmbeddingConfiguration.TRAINING
+INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE
+
+# TODO(shizhiw): A better interface is to make `num_hosts` and
+# `num_cores_per_host` optional parameters for `TPUEmbedding`
+# constructor. Usually they can be automatically detected, but
+# user can also specify them for debugging (b/112112496).
+# Auto-detection can be done with `tpu_system_metadata.py`.
+_MASTER_JOB = 'tpu_worker'
+_HOST_PATTERN = '/job:tpu_worker/task:{}/device:CPU:0'
+_NUM_CORES_PER_HOST = 8
+
+_TEST_MASTER_JOB = None
+_TEST_HOST = '/replica:0/task:0/device:CPU:0'
+_TEST_NUM_CORES_PER_HOST = 2
+
+
+class TableConfig(
+    collections.namedtuple(
+        'TableConfig',
+        ['vocabulary_size', 'dimension', 'initializer', 'combiner'])):
+  """Embedding table configuration."""
+
+  @experimental
+  def __new__(cls,
+              vocabulary_size,
+              dimension,
+              initializer=None,
+              combiner='mean'):
+    """Embedding table configuration.
+
+    Args:
+      vocabulary_size: Number of vocabulary (/rows) in the table.
+      dimension: The embedding dimension.
+      initializer: A variable initializer function to be used in embedding
+        variable initialization. If not specified, defaults to
+        `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
+        `1/sqrt(dimension)`.
+      combiner: A string specifying how to reduce if there are multiple entries
+        in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with
+        'mean' the default. 'sqrtn' often achieves good accuracy, in particular
+        with bag-of-words columns. For more information, see
+        `tf.nn.embedding_lookup_sparse`.
+
+    Returns:
+      `TableConfig`.
+
+    Raises:
+      ValueError: if `vocabulary_size` is not positive integer.
+      ValueError: if `dimension` is not positive integer.
+      ValueError: if `initializer` is specified and is not callable.
+      ValueError: if `combiner` is not supported.
+    """
+    if not isinstance(vocabulary_size, int) or vocabulary_size < 1:
+      raise ValueError('Invalid vocabulary_size {}.'.format(vocabulary_size))
+
+    if not isinstance(dimension, int) or dimension < 1:
+      raise ValueError('Invalid dimension {}.'.format(dimension))
+
+    if (initializer is not None) and (not callable(initializer)):
+      raise ValueError('initializer must be callable if specified.')
+    if initializer is None:
+      initializer = init_ops.truncated_normal_initializer(
+          mean=0.0, stddev=1 / math.sqrt(dimension))
+
+    if combiner not in ('mean', 'sum', 'sqrtn'):
+      raise ValueError('Invalid combiner {}'.format(combiner))
+
+    return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension,
+                                           initializer, combiner)
+
+
+# TODO(shizhiw): Factor `use_gradient_accumulation` and
+# `pipeline_execution_with_tensor_core` out of `_OptimizationParameters`.
+class _OptimizationParameters(object):
+  """Parameters common to all optimizations."""
+
+  def __init__(self, learning_rate, use_gradient_accumulation,
+               pipeline_execution_with_tensor_core):
+    self.learning_rate = learning_rate
+    self.use_gradient_accumulation = use_gradient_accumulation
+    self.pipeline_execution_with_tensor_core = (
+        pipeline_execution_with_tensor_core)
+
+
+class AdagradParameters(_OptimizationParameters):
+  """Optimization parameters for Adagrad."""
+
+  def __init__(self, learning_rate, initial_accumulator,
+               use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    """Optimization parameters for Adagrad.
+
+    Args:
+      learning_rate: used for updating embedding table.
+      initial_accumulator: initial accumulator for Adagrad.
+      use_gradient_accumulation: setting this to `True` makes embedding
+         gradients calculation more accurate but slower. Please see
+         `optimization_parameters.proto` for details.
+         for details.
+      pipeline_execution_with_tensor_core: setting this to `True` makes training
+        faster, but trained model will be different if step N and step N+1
+        involve the same set of embedding ID. Please see
+        `tpu_embedding_configuration.proto` for details.
+    """
+    super(AdagradParameters, self).__init__(learning_rate,
+                                            use_gradient_accumulation,
+                                            pipeline_execution_with_tensor_core)
+    self.initial_accumulator = initial_accumulator
+
+
+class AdamParameters(_OptimizationParameters):
+  """Optimization parameters for Adam."""
+
+  def __init__(self, learning_rate,
+               beta1=0.9,
+               beta2=0.999,
+               epsilon=1e-08,
+               use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    """Optimization parameters for Adam.
+
+    Args:
+      learning_rate: a floating point value. The learning rate.
+      beta1: A float value.
+        The exponential decay rate for the 1st moment estimates.
+      beta2: A float value.
+        The exponential decay rate for the 2nd moment estimates.
+      epsilon: A small constant for numerical stability.
+      use_gradient_accumulation: setting this to `True` makes embedding
+         gradients calculation more accurate but slower. Please see
+         `optimization_parameters.proto` for details.
+         for details.
+      pipeline_execution_with_tensor_core: setting this to `True` makes training
+        faster, but trained model will be different if step N and step N+1
+        involve the same set of embedding ID. Please see
+        `tpu_embedding_configuration.proto` for details.
+    """
+    super(AdamParameters, self).__init__(learning_rate,
+                                         use_gradient_accumulation,
+                                         pipeline_execution_with_tensor_core)
+    self.beta1 = beta1
+    self.beta2 = beta2
+    self.epsilon = epsilon
+
+
+class StochasticGradientDescentParameters(_OptimizationParameters):
+  """Optimization parameters for stochastic gradient descent.
+
+  Args:
+    learning_rate: a floating point value. The learning rate.
+    use_gradient_accumulation: setting this to `True` makes embedding
+      gradients calculation more accurate but slower. Please see
+         `optimization_parameters.proto` for details.
+    pipeline_execution_with_tensor_core: setting this to `True` makes training
+      faster, but trained model will be different if step N and step N+1
+      involve the same set of embedding ID. Please see
+      `tpu_embedding_configuration.proto` for details.
+    """
+
+  def __init__(self, learning_rate, use_gradient_accumulation=False,
+               pipeline_execution_with_tensor_core=True):
+    super(StochasticGradientDescentParameters, self).__init__(
+        learning_rate, use_gradient_accumulation,
+        pipeline_execution_with_tensor_core)
+
+
+class TPUEmbedding(object):
+  """API for using TPU for embedding.
+
+    Example:
+    ```
+    table_config_user = tpu_embedding.TableConfig(
+        vocabulary_size=4, dimension=2,
+        initializer=initializer, combiner='mean')
+    table_to_config_dict = {'video': table_config_video,
+                          'user': table_config_user}
+    feature_to_table_dict = {'watched': 'video',
+                             'favorited': 'video',
+                             'friends': 'user'}
+    batch_size = 4
+    num_hosts = 1
+    optimization_parameters = tpu_embedding.AdagradParameters(1., 1.)
+    mode = tpu_embedding.TRAINING
+    embedding = tpu_embedding.TPUEmbedding(
+        table_to_config_dict, feature_to_table_dict,
+        batch_size, num_hosts, mode, optimization_parameters)
+
+    batch_size_per_core = embedding.batch_size_per_core
+    sparse_features_list = []
+    for host in hosts:
+      with ops.device(host):
+        for _ in range(embedding.num_cores_per_host):
+          sparse_features = {}
+          sparse_features['watched'] = sparse_tensor.SparseTensor(...)
+          sparse_features['favorited'] = sparse_tensor.SparseTensor(...)
+          sparse_features['friends'] = sparse_tensor.SparseTensor(...)
+          sparse_features_list.append(sparse_features)
+
+    enqueue_ops = embedding.generate_enqueue_ops(sparse_features_list)
+
+    def computation():
+      activations = embedding.get_activations()
+      loss = compute_loss(activations)
+
+      base_optimizer = gradient_descent.GradientDescentOptimizer(
+          learning_rate=1)
+      cross_shard_optimizer = tpu_optimizer.CrossShardOptimizer(
+          base_optimizer)
+
+      train_op = cross_shard_optimizer.minimize(loss)
+      # `train_op` and `send_gradients_op` must happen in order.
+      with ops.control_dependencies([train_op]):
+        send_gradients_op = embedding.generate_send_gradients_op()
+      with ops.control_dependencies([send_gradients_op]):
+        loss = array_ops.identity(loss)
+
+    loss = tpu.shard(computation,
+                     num_shards=embedding.num_cores)
+
+    with self.test_session() as sess:
+      sess.run(tpu.initialize_system(embedding_config=
+                                     embedding.config_proto))
+      sess.run(variables.global_variables_initializer())
+      sess.run(embedding.init_ops)
+      sess.run(enqueue_ops)
+      loss_val = sess.run(loss)
+    ```
+  """
+
+  # TODO(shizhiw): Instead of `feature_to_table_dict` which maps to table
+  # name, consider `feature_to_config_dict` which maps to `FeatureConfig`.
+  # `FeatureConfig` could have fields other than table name. For example, it
+  # could have a field to indicate that the feature should not be used to
+  # update embedding table (cr/204852758, cr/204940540). Also, this can support
+  # different combiners for different features within the same table.
+
+  # TODO(shizhiw): will it be cleaner to make `table_to_config_dict` and
+  # `feature_to_table_dict` lists of `TableSpec` and `FeatureSpec` respectively?
+
+  # TODO(shizhiw): Consider adding `input_fn` as an option to remove boilerplate
+  # for-loops around construction of inputs.
+
+  # `optimization_parameter` applies to all tables. If the need arises,
+  # we can add `optimization_parameters` to `TableConfig` to override this
+  # global setting.
+  @experimental
+  def __init__(self,
+               table_to_config_dict,
+               feature_to_table_dict,
+               batch_size,
+               num_hosts,
+               mode,
+               optimization_parameters=None,
+               tpu_embedding_test=False):
+    """API for using TPU for embedding lookups.
+
+    Args:
+      table_to_config_dict: A dictionary mapping from string of table name to
+        `TableConfig`. Table refers to an embedding table, e.g. `params`
+        argument to `tf.nn.embedding_lookup_sparse()`.
+      feature_to_table_dict: A dictionary mapping from string of feature name
+        to string of table name. Feature refers to ids to lookup in embedding
+        table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`.
+      batch_size: An `int` representing the global batch size.
+      num_hosts: An `int` representing the number of TPU hosts.
+      mode: `TRAINING` or `INFERENCE`.
+      optimization_parameters: `AdagradParameters`, `AdamParameters`,
+        `Stochasticgradientdescentparameters`. Must be set in training and must
+        not be `None` in inference.
+      tpu_embedding_test: A `bool`. Only used for testing.
+
+    Raises:
+      ValueError: if any input is invalid.
+    """
+    _validate_table_to_config_dict(table_to_config_dict)
+    # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`.
+    self._table_to_config_dict = _create_ordered_dict(table_to_config_dict)
+    self._combiners = _create_combiners(self._table_to_config_dict)
+
+    _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict)
+    self._feature_to_table_dict = _create_ordered_dict(feature_to_table_dict)
+    self._table_to_features_dict = _create_table_to_features_dict(
+        self._feature_to_table_dict)
+
+    self._batch_size = batch_size
+
+    if tpu_embedding_test:
+      self._num_hosts = 1
+      self._hosts = [_TEST_HOST]
+      self._num_cores_per_host = _TEST_NUM_CORES_PER_HOST
+    else:
+      self._num_hosts = num_hosts
+      self._hosts = [_HOST_PATTERN.format(i) for i in range(self._num_hosts)]
+      self._num_cores_per_host = _NUM_CORES_PER_HOST
+    self._num_cores = self._num_cores_per_host * self._num_hosts
+
+    _validate_batch_size(self._batch_size, self._num_cores)
+    self._batch_size_per_core = self._batch_size // self._num_cores
+
+    self._init_ops = []
+
+    # TODO(shizhiw): remove `mode`?
+    if mode == TRAINING:
+      _validate_optimization_parameters(optimization_parameters)
+      self._optimization_parameters = optimization_parameters
+    elif mode == INFERENCE:
+      if optimization_parameters is not None:
+        raise ValueError('`optimization_parameters` should be `None` '
+                         'for inference mode.')
+      self._optimization_parameters = (
+          StochasticGradientDescentParameters(1.))
+    else:
+      raise ValueError('`mode` only supports {} and {}; got {}.'
+                       .format(TRAINING, INFERENCE, mode))
+    self._mode = mode
+
+    # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler`
+    # and create special handler for inference that inherits from
+    # StochasticGradientDescentHandler with more user-friendly error message
+    # on get_slot().
+    self._optimizer_handler = _get_optimization_handler(
+        self._optimization_parameters)
+
+    dummy_table_variables_init_op = self._create_dummy_table_variables()
+    self._init_ops.append(dummy_table_variables_init_op)
+
+    self._config_proto = self._create_config_proto()
+
+    self._create_variables_and_ops()
+    self._init_ops.extend(self._load_parameters_ops)
+
+  @property
+  def hosts(self):
+    """A list of device names for CPU hosts.
+
+    Returns:
+      A list of device names for CPU hosts.
+    """
+    return self._hosts
+
+  # TODO(shizhiw): change to num_tensor_cores_per_host to be more explicit and
+  # to be consistent with `tpu_embedding_configuration.proto`.
+  @property
+  def num_cores_per_host(self):
+    """Number of TPU cores on a CPU host.
+
+    Returns:
+      Number of TPU cores on a CPU host.
+    """
+    return self._num_cores_per_host
+
+  @property
+  def num_cores(self):
+    """Total number of TPU cores on all hosts.
+
+    Returns:
+      Total number of TPU cores on all hosts.
+    """
+    return self._num_cores
+
+  @property
+  def batch_size_per_core(self):
+    """Batch size for each TPU core.
+
+    The sparse tensors in `sparse_features_list` to `generate_enqueue_ops`
+       must have batch dimension equal to this.
+
+    Returns:
+      Batch size for each TPU core.
+    """
+    return self._batch_size_per_core
+
+  @property
+  def config_proto(self):
+    """Create embedding config proto for `tpu.initialize_system()`.
+
+    Returns:
+      an `TPUEmbeddingConfiguration` proto describing the desired
+         configuration of the hardware embedding lookup tables, which
+         is passed to `tpu.initialize_system()`.
+    """
+    return self._config_proto
+
+  @property
+  def init_ops(self):
+    """Initialization ops for TPU embedding.
+
+    It must be called after all global variables have been initialized,
+    i.e. after `global_variables_initializer()`, as it loads embedding
+    tables into TPU.
+
+    Returns:
+      A list of ops.
+    """
+    return self._init_ops
+
+  # TODO(shizhiw): get table variables the same way as getting slot variables.
+  @property
+  def table_to_table_variables_dict(self):
+    return copy.copy(self._table_to_table_variables_dict)
+
+  def get_slot_names(self):
+    """Return a list of the names of slots created by `TPUEmbedding`."""
+    return self._optimizer_handler.get_slot_names()
+
+  def get_slot(self, table, name):
+    """Return a slot named `name` create for `table` by `TPUEmbedding`."""
+    return self._optimizer_handler.get_slot(table, name)
+
+  # TODO(shizhiw): expose load to user too?
+  @property
+  def retrieve_parameters_ops(self):
+    return self._retrieve_parameters_ops
+
+  def _create_config_proto(self):
+    """Create `TPUEmbeddingConfiguration`."""
+    config_proto = elc.TPUEmbeddingConfiguration()
+    for table in self._table_to_config_dict:
+      table_descriptor = config_proto.table_descriptor.add()
+      table_descriptor.name = table
+
+      table_config = self._table_to_config_dict[table]
+      table_descriptor.vocabulary_size = table_config.vocabulary_size
+      table_descriptor.dimension = table_config.dimension
+
+      features_for_table = self._table_to_features_dict[table]
+      table_descriptor.num_features = len(features_for_table)
+
+      table_descriptor.optimization_parameters.learning_rate.constant = (
+          self._optimization_parameters.learning_rate)
+      table_descriptor.optimization_parameters.use_gradient_accumulation = (
+          self._optimization_parameters.use_gradient_accumulation)
+      self._optimizer_handler.set_optimization_parameters(table_descriptor)
+
+    config_proto.mode = self._mode
+    config_proto.batch_size_per_tensor_core = self._batch_size_per_core
+    config_proto.num_hosts = self._num_hosts
+    config_proto.num_tensor_cores = self._num_cores
+    config_proto.sharding_strategy = elc.TPUEmbeddingConfiguration.DIV_DEFAULT
+    config_proto.pipeline_execution_with_tensor_core = (
+        self._optimization_parameters.pipeline_execution_with_tensor_core)
+
+    return config_proto
+
+  def _create_variables_and_ops(self):
+    """Create embedding variables and return ops to load them into TPU."""
+    self._load_parameters_ops = []
+    self._retrieve_parameters_ops = []
+    self._table_to_table_variables_dict = {}
+    for table in self._table_to_config_dict:
+      device_fn = _create_device_fn(self._hosts)
+      with ops.device(device_fn):
+        # TODO(shizhiw): allow user to specify variable name so that
+        # they could make the name consistent with CPU etc.
+        variable_name = table
+        table_variables = _create_partitioned_variables(
+            name=variable_name,
+            num_hosts=self._num_hosts,
+            vocabulary_size=self._table_to_config_dict[table].vocabulary_size,
+            embedding_dimension=self._table_to_config_dict[table].dimension,
+            initializer=self._table_to_config_dict[table].initializer,
+            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
+        self._table_to_table_variables_dict[table] = table_variables
+
+        self._optimizer_handler.create_variables_and_ops(
+            table, variable_name, self._num_hosts,
+            self._table_to_config_dict[table], table_variables,
+            self._load_parameters_ops, self._retrieve_parameters_ops)
+
+  def _create_dummy_table_variables(self):
+    """Create dummy embedding table variables.
+
+    The sole purpose of these dummy variables are to trigger gradient
+    calcuation wrt them so that the gradients wrt activation can be captured
+    and later sent to TPU embedding.
+
+    Returns:
+      Initializer for these variables.
+
+    Raises:
+      RuntimeError: if collection to store gradients already exists and is not
+      empty.
+    """
+    self._dummy_table_variables = []
+    # TODO(shizhiw): remove table id.
+    for table_id, table in enumerate(self._table_to_features_dict):
+      self._dummy_table_variables.append(
+          variable_scope.get_variable(
+              'tpu_embedding_dummy_table_variable_%s' % table,
+              dtype=dtypes.float32,
+              shape=[1],
+              use_resource=True,
+              trainable=True,
+              # TODO(shizhiw): Remove these dummy variables as
+              # tensorflow optimizer creates slot variable for them which
+              # is undesirable.
+              # e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1}.
+              # Explicitly specifying collections prevents this variable from
+              # being added to the GLOBAL_VARIABLES collection, so that Saver()
+              # ignores it.
+              collections=['tpu_embedding_dummy_table_variables']))
+
+      g = ops.get_default_graph()
+      table_gradients = g.get_collection_ref(
+          'tpu_embedding_gradients_table_%d' % table_id)
+      if table_gradients:
+        raise RuntimeError(
+            'tpu_embedding_gradients_table_%d is not empty.' % table_id)
+      table_gradients.extend([None] * len(self._table_to_features_dict[table]))
+
+    return variables.variables_initializer(
+        self._dummy_table_variables,
+        name='tpu_embedding_dummy_table_variables_init')
+
+  def generate_enqueue_ops(self, sparse_features_list):
+    """Generate enqueue ops.
+
+    Args:
+      sparse_features_list: a list of dictionary mapping from string
+        of feature names to sparse tensor. Each dictionary is for one
+        TPU core. Dictionaries for the same core should be contiguous
+        on the list.
+
+    Returns:
+      Ops to enqueue to TPU for embedding.
+    """
+    self._validate_generate_enqueue_ops_sparse_features_list(
+        sparse_features_list)
+    return [
+        self._generate_enqueue_op(
+            sparse_features, device_ordinal=i % self._num_cores_per_host)
+        for i, sparse_features in enumerate(sparse_features_list)
+    ]
+
+  def _validate_generate_enqueue_ops_sparse_features_list(
+      self, sparse_features_list):
+    """Validate `sparse_features_list`."""
+    if len(sparse_features_list) != self._num_cores:
+      raise ValueError('Length of `sparse_features_list` should match the '
+                       'number of cores; '
+                       '`len(sparse_features_list)` is {}, '
+                       'number of cores is {}.'.format(
+                           len(sparse_features_list), self._num_cores))
+
+    feature_set = set(self._feature_to_table_dict.keys())
+    contiguous_device = None
+    for i, sparse_features in enumerate(sparse_features_list):
+      used_feature_set = set(sparse_features.keys())
+
+      # Check features are valid.
+      missing_feature_set = feature_set - used_feature_set
+      if missing_feature_set:
+        raise ValueError('`sparse_features_list[{}]` misses a feature that is '
+                         'in `feature_to_config_dict`: {}.'.format(
+                             i, missing_feature_set))
+
+      extra_feature_set = used_feature_set - feature_set
+      if extra_feature_set:
+        raise ValueError('`sparse_features_list[{}]` has a feature that is not '
+                         'in `feature_to_config_dict`: {}.'.format(
+                             i, extra_feature_set))
+
+      device = None
+      device_feature = None
+      for feature, tensor in six.iteritems(sparse_features):
+        if not isinstance(tensor, sparse_tensor.SparseTensor):
+          raise ValueError('`sparse_features_list[{}]` has a feature that is '
+                           'not mapped to `SparseTensor`. '
+                           '`feature`: {}, type: {}'.format(
+                               i, feature, type(tensor)))
+
+        # Check all features are on the same device.
+        if device is None:
+          device = tensor.op.device
+          device_feature = feature
+        else:
+          if device != tensor.op.device:
+            raise ValueError('Devices are different between features in '
+                             '`sparse_features_list[{}]`; '
+                             'devices: {}, {}; features: {}, {}.'.format(
+                                 i, device, tensor.op.device, feature,
+                                 device_feature))
+
+      if i % self._num_cores_per_host:
+        if device != contiguous_device:
+          raise ValueError('We expect the `sparse_features` which are on the '
+                           'same host to be contiguous in '
+                           '`sparse_features_list`, '
+                           '`sparse_features_list[{}]` is on device {}, '
+                           'but is expected to be on device {}.'.format(
+                               i, device, contiguous_device))
+      else:
+        contiguous_device = device
+
+  def _generate_enqueue_op(self, sparse_features, device_ordinal):
+    with ops.colocate_with(sparse_features.values()[0]):
+      sample_idcs, embedding_idcs, aggregation_weights = (
+          self._format_for_tpu_embedding_sparse_batch(sparse_features))
+      return tpu_ops.enqueue_tpu_embedding_sparse_batch(
+          sample_idcs,
+          embedding_idcs,
+          aggregation_weights,
+          combiners=self._combiners,
+          device_ordinal=device_ordinal)
+
+  def _format_for_tpu_embedding_sparse_batch(self, sparse_features):
+    """Format sparse features for `enqueue_tpu_embedding_sparse_batch()`.
+
+    Args:
+      sparse_features: a `Dict` of `SparseTensor`s for embedding.
+
+    Returns:
+      Arguments for `enqueue_tpu_embedding_sparse_batch()`.
+    """
+
+    sample_idcs, embedding_idcs, aggregation_weights = list(), list(), list()
+    for table in self._table_to_features_dict:
+      sample_t, indices_t, weights_t = list(), list(), list()
+
+      features = self._table_to_features_dict[table]
+      for i, feature in enumerate(features):
+        tensor = sparse_features[feature]
+        sample_indices = tensor.indices[:, 0]
+        embedding_indices = tensor.values
+        weights = array_ops.ones_like(embedding_indices)
+        sample_t.append(i * self._batch_size_per_core + sample_indices)
+        indices_t.append(embedding_indices)
+        weights_t.append(weights)
+
+      sample_idcs.append(
+          math_ops.cast(array_ops.concat(sample_t, axis=0), dtype=dtypes.int32))
+      embedding_idcs.append(
+          math_ops.cast(
+              array_ops.concat(indices_t, axis=0), dtype=dtypes.int32))
+      aggregation_weights.append(
+          math_ops.cast(
+              array_ops.concat(weights_t, axis=0), dtype=dtypes.float32))
+
+    return sample_idcs, embedding_idcs, aggregation_weights
+
+  def get_activations(self):
+    """Get activations for features.
+
+    This should be called within `computation` that is passed to
+      `tpu.replicate` and friends.
+
+    Returns:
+      A dictionary mapping from `String` of feature name to `Tensor`
+        of activation.
+    """
+    recv_activations = tpu_ops.recv_tpu_embedding_activations(
+        num_outputs=len(self._table_to_config_dict),
+        config=self._config_proto.SerializeToString())
+
+    activations = collections.OrderedDict()
+    for table_id, table in enumerate(self._table_to_features_dict):
+      features = self._table_to_features_dict[table]
+      for lookup_id, feature in enumerate(features):
+        start_row = lookup_id * self._batch_size_per_core
+        end_row = start_row + self._batch_size_per_core
+        activations[feature] = gen_tpu_ops.tpu_embedding_activations(
+            self._dummy_table_variables[table_id],
+            recv_activations[table_id][start_row:end_row, :],
+            table_id=table_id,
+            lookup_id=lookup_id)
+    return activations
+
+  # TODO(shizhiw): Make `gradient_multiplier` per feature. Setting it to 0 would
+  # have the effect of `tf.stop_gradients()`.
+  # TODO(shizhiw): Consider alternative ways to capture gradients wrt embedding
+  # layer outputs to remove `_dummy_table_variables`,
+  # `_embedding_activation_grad` and `tpu_embedding_gradients_table_%d'.
+  def generate_send_gradients_op(self, gradient_multipliers=None):
+    """Retrieve gradients from collections and send them to TPU embedding.
+
+    Args:
+      gradient_multipliers: None, or dict mapping table names to gradient
+        multiplier Tensors.
+
+    Returns:
+      SendTPUEmbeddingGradients Op.
+
+    Raises:
+      ValueError: If required gradients have not been defined.
+      RuntimeError: If `mode` is not `TRAINING`.
+    """
+    if self._mode != TRAINING:
+      raise RuntimeError('Only in training mode gradients need to '
+                         'be sent to TPU embedding; got mode {}.'
+                         .format(self._mode))
+
+    g = ops.get_default_graph()
+    gradients = list()
+    for table_id, table in enumerate(self._table_to_config_dict):
+      table_gradients = g.get_collection(
+          'tpu_embedding_gradients_table_%d' % table_id)
+      if any(gradient is None for gradient in table_gradients):
+        raise ValueError(
+            'Table {}/{} has undefined gradients: this is probably because the '
+            'model asked TPUEmbedding to compute activations that were not '
+            'used.'.format(table_id, table))
+      concat_table_grads = array_ops.concat(table_gradients, axis=0)
+      if gradient_multipliers is not None:
+        concat_table_grads *= gradient_multipliers[table.name]
+      gradients.append(concat_table_grads)
+
+    return tpu_ops.send_tpu_embedding_gradients(
+        inputs=gradients, config=self.config_proto.SerializeToString())
+
+
+def _validate_table_to_config_dict(table_to_config_dict):
+  """Validate `table_to_config_dict`."""
+  for k, v in six.iteritems(table_to_config_dict):
+    if not isinstance(v, TableConfig):
+      raise ValueError('Value of `table_to_config_dict` must be of type '
+                       '`TableConfig`, got {} for {}.'.format(type(v), k))
+
+
+def _validate_feature_to_table_dict(table_to_config_dict,
+                                    feature_to_table_dict):
+  """Validate `feature_to_table_dict`."""
+  used_table_set = set(feature_to_table_dict.values())
+  table_set = set(table_to_config_dict.keys())
+
+  unused_table_set = table_set - used_table_set
+  if unused_table_set:
+    raise ValueError('`table_to_config_dict` specifies table that is not '
+                     'used in `feature_to_table_dict`: {}.'
+                     .format(unused_table_set))
+
+  extra_table_set = used_table_set - table_set
+  if extra_table_set:
+    raise ValueError('`feature_to_table_dict` refers to a table that is not '
+                     'specified in `table_to_config_dict`: {}.'
+                     .format(extra_table_set))
+
+
+def _validate_batch_size(batch_size, num_cores):
+  if batch_size % num_cores:
+    raise ValueError('`batch_size` is not a multiple of number of '
+                     'cores. `batch_size`={}, `_num_cores`={}.'.format(
+                         batch_size, num_cores))
+
+
+def _validate_optimization_parameters(optimization_parameters):
+  if not isinstance(optimization_parameters, _OptimizationParameters):
+    raise ValueError('`optimization_parameters` must inherit from '
+                     '`_OptimizationPramaters`. '
+                     '`type(optimization_parameters)`={}'.format(
+                         type(optimization_parameters)))
+
+
+class _OptimizerHandler(object):
+  """Interface class for handling optimizer specific logic."""
+
+  def __init__(self, optimization_parameters):
+    self._optimization_parameters = optimization_parameters
+
+  def set_optimization_parameters(self, table_descriptor):
+    raise NotImplementedError()
+
+  def create_variables_and_ops(self, table, variable_name):
+    raise NotImplementedError()
+
+  def get_slot_names(self):
+    raise NotImplementedError()
+
+  def get_slot(self, table, name):
+    raise NotImplementedError()
+
+
+class _AdagradHandler(_OptimizerHandler):
+  """Handles Adagrad specific logic."""
+
+  def __init__(self, optimization_parameters):
+    super(_AdagradHandler, self).__init__(optimization_parameters)
+    self._table_to_accumulator_variables_dict = {}
+
+  def set_optimization_parameters(self, table_descriptor):
+    table_descriptor.optimization_parameters.adagrad.SetInParent()
+
+  def create_variables_and_ops(self, table, variable_name, num_hosts,
+                               table_config, table_variables,
+                               load_parameters_ops, retrieve_parameters_ops):
+    optimizer_name = 'Adagrad'
+    accumulator_initializer = init_ops.constant_initializer(
+        self._optimization_parameters.initial_accumulator)
+    accumulator_variables = _create_partitioned_variables(
+        name='%s/%s' % (variable_name, optimizer_name),
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=accumulator_initializer)
+
+    self._table_to_accumulator_variables_dict[table] = accumulator_variables
+    for host_id, table_variable, accumulator_variable in (zip(
+        range(num_hosts), table_variables, accumulator_variables)):
+      with ops.colocate_with(table_variable):
+        load_parameters_op = (
+            tpu_ops.load_tpu_embedding_adagrad_parameters(
+                parameters=table_variable,
+                accumulators=accumulator_variable,
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieved_table, retrieved_accumulator = (
+            tpu_ops.retrieve_tpu_embedding_adagrad_parameters(
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieve_parameters_op = control_flow_ops.group(
+            state_ops.assign(table_variable, retrieved_table),
+            state_ops.assign(accumulator_variable, retrieved_accumulator))
+
+      load_parameters_ops.append(load_parameters_op)
+      retrieve_parameters_ops.append(retrieve_parameters_op)
+
+  def get_slot_names(self):
+    return ['accumulator']
+
+  def get_slot(self, table, name):
+    if name not in self.get_slot_names():
+      raise ValueError('Adagrad has {} as slot names; got {}.'
+                       .format(self.get_slot_names(), name))
+    return self._table_to_accumulator_variables_dict[table]
+
+
+class _AdamHandler(_OptimizerHandler):
+  """Handles Adam specific logic."""
+
+  def __init__(self, optimization_parameters):
+    super(_AdamHandler, self).__init__(optimization_parameters)
+    self._table_to_m_variables_dict = {}
+    self._table_to_v_variables_dict = {}
+
+  def set_optimization_parameters(self, table_descriptor):
+    table_descriptor.optimization_parameters.adam.beta1 = (
+        self._optimization_parameters.beta1)
+    table_descriptor.optimization_parameters.adam.beta2 = (
+        self._optimization_parameters.beta2)
+    table_descriptor.optimization_parameters.adam.epsilon = (
+        self._optimization_parameters.epsilon)
+
+  def create_variables_and_ops(self, table, variable_name, num_hosts,
+                               table_config, table_variables,
+                               load_parameters_ops, retrieve_parameters_ops):
+    optimizer_name = 'Adam'
+    m_initializer = init_ops.zeros_initializer()
+    m_variables = _create_partitioned_variables(
+        name='%s/%s/m' % (variable_name, optimizer_name),
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=m_initializer)
+    v_initializer = init_ops.zeros_initializer()
+    v_variables = _create_partitioned_variables(
+        name='%s/%s/v' % (variable_name, optimizer_name),
+        num_hosts=num_hosts,
+        vocabulary_size=table_config.vocabulary_size,
+        embedding_dimension=table_config.dimension,
+        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
+        initializer=v_initializer)
+
+    self._table_to_m_variables_dict[table] = m_variables
+    self._table_to_v_variables_dict[table] = v_variables
+
+    for host_id, table_variable, m_variable, v_variable in (zip(
+        range(num_hosts), table_variables,
+        m_variables, v_variables)):
+      with ops.colocate_with(table_variable):
+        load_parameters_op = (
+            tpu_ops.load_tpu_embedding_adam_parameters(
+                parameters=table_variable,
+                momenta=m_variable,
+                velocities=v_variable,
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieved_table, retrieved_m, retrieved_v = (
+            tpu_ops.retrieve_tpu_embedding_adam_parameters(
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieve_parameters_op = control_flow_ops.group(
+            state_ops.assign(table_variable, retrieved_table),
+            state_ops.assign(m_variable, retrieved_m),
+            state_ops.assign(v_variable, retrieved_v))
+
+      load_parameters_ops.append(load_parameters_op)
+      retrieve_parameters_ops.append(retrieve_parameters_op)
+
+  def get_slot_names(self):
+    return ['m', 'v']
+
+  def get_slot(self, table, name):
+    if name == 'm':
+      return self._table_to_m_variables_dict[table]
+    elif name == 'v':
+      return self._table_to_v_variables_dict[table]
+    else:
+      raise ValueError('Adam has {} as slot names; got {}.'
+                       .format(self.get_slot_names(), name))
+
+
+class _StochasticGradientDescentHandler(_OptimizerHandler):
+  """Handles stochastic gradient descent specific logic."""
+
+  def set_optimization_parameters(self, table_descriptor):
+    (table_descriptor.optimization_parameters.stochastic_gradient_descent
+     .SetInParent())
+
+  def create_variables_and_ops(self, table, variable_name, num_hosts,
+                               table_config, table_variables,
+                               load_parameters_ops, retrieve_parameters_ops):
+    del table_config
+
+    for host_id, table_variable in (zip(
+        range(num_hosts), table_variables)):
+      with ops.colocate_with(table_variable):
+        load_parameters_op = (
+            tpu_ops
+            .load_tpu_embedding_stochastic_gradient_descent_parameters(
+                parameters=table_variable,
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieved_table = (
+            tpu_ops
+            .retrieve_tpu_embedding_stochastic_gradient_descent_parameters(
+                table_name=table,
+                num_shards=num_hosts,
+                shard_id=host_id))
+        retrieve_parameters_op = control_flow_ops.group(
+            state_ops.assign(table_variable, retrieved_table))
+
+      load_parameters_ops.append(load_parameters_op)
+      retrieve_parameters_ops.append(retrieve_parameters_op)
+
+  def get_slot_names(self):
+    return []
+
+  def get_slot(self, table, name):
+    raise ValueError('Stochastic gradient descent does not have slot variable.')
+
+
+def _get_optimization_handler(optimization_parameters):
+  if isinstance(optimization_parameters, AdagradParameters):
+    return _AdagradHandler(optimization_parameters)
+  elif isinstance(optimization_parameters, AdamParameters):
+    return _AdamHandler(optimization_parameters)
+  elif isinstance(optimization_parameters, StochasticGradientDescentParameters):
+    return _StochasticGradientDescentHandler(optimization_parameters)
+  else:
+    return NotImplementedError()
+
+
+def _create_ordered_dict(d):
+  """Create an OrderedDict from Dict."""
+  return collections.OrderedDict((k, d[k]) for k in sorted(d))
+
+
+def _create_combiners(table_to_config_dict):
+  return [table_to_config_dict[t].combiner for t in table_to_config_dict]
+
+
+def _create_table_to_features_dict(feature_to_table_dict):
+  """Create mapping from table to a list of its features."""
+  table_to_features_dict_tmp = {}
+  for feature, table in six.iteritems(feature_to_table_dict):
+    if table in table_to_features_dict_tmp:
+      table_to_features_dict_tmp[table].append(feature)
+    else:
+      table_to_features_dict_tmp[table] = [feature]
+
+  table_to_features_dict = collections.OrderedDict()
+  for table in sorted(table_to_features_dict_tmp):
+    table_to_features_dict[table] = sorted(table_to_features_dict_tmp[table])
+  return table_to_features_dict
+
+
+def _create_device_fn(hosts):
+  """Create device_fn() to use with _create_partitioned_variables()."""
+
+  def device_fn(op):
+    """Returns the `device` for `op`."""
+    part_match = re.match(r'.*/part_(\d+)(/|$)', op.name)
+
+    if part_match:
+      idx = int(part_match.group(1))
+    else:
+      raise RuntimeError('Internal Error: '
+                         'Expected %s to contain /part_*.' % op.name)
+
+    device = hosts[idx]
+    return device
+
+  return device_fn
+
+
+def _create_partitioned_variables(name,
+                                  num_hosts,
+                                  vocabulary_size,
+                                  embedding_dimension,
+                                  initializer,
+                                  collections=None):  # pylint: disable=redefined-outer-name
+  """Creates ParitionedVariables based on `num_hosts` for `table`."""
+  # TODO(shizhiw): automatically place embedding lookup elsewhere?
+  if vocabulary_size < num_hosts:
+    raise ValueError('`vocabulary_size`({}) is smaller than `num_hosts`({}). '
+                     'As TPU embedding is not optimized for small tables, '
+                     'please consider other ways for this embedding lookup.')
+
+  slicing = [num_hosts, 1]
+
+  # TODO(shizhiw): deprecated, use tf.get_variable()?
+  return partitioned_variables.create_partitioned_variables(
+      name=name,
+      slicing=slicing,
+      shape=(vocabulary_size, embedding_dimension),
+      dtype=dtypes.float32,
+      initializer=initializer,
+      collections=collections,
+      trainable=False)
+
+
+@ops.RegisterGradient('TPUEmbeddingActivations')
+def _embedding_activations_grad(activations_op, grad_wrt_activations):
+  """Saves the gradient of embedding activations ops in a graph collection."""
+  g = ops.get_default_graph()
+  table_id = activations_op.get_attr('table_id')
+  lookup_id = activations_op.get_attr('lookup_id')
+  table_gradients = g.get_collection_ref(
+      'tpu_embedding_gradients_table_%d' % table_id)
+
+  if not table_gradients:
+    raise RuntimeError(
+        'Gradients for TPUEmbedding have been generated in non-training mode. '
+        'This is not expected. Consider putting your Optimizer.minimize code '
+        'behind the training mode condition check. For Estimator, you can '
+        'do \n\n'
+        '    if mode == tf.estimator.ModeKeys.TRAIN:\n'
+        '        train_op = opt.minimize(loss)\n'
+        '\n')
+
+  table_gradients[lookup_id] = array_ops.identity(grad_wrt_activations)
+  return [
+      # RegisterGradient requires that value be returned for all inputs. Since
+      # the first argument (tpu_gradient_variable_{table_name}) has shape [1],
+      # we will return zeros(shape=[1]). The actual gradient w.r.t. the
+      # embedding activations (grad_wrt_activations) has the same shape as the
+      # activations returned by  embedding_activations.
+      array_ops.zeros(arg.shape, dtype=dtypes.float32)
+      for arg in activations_op.inputs
+  ]
-- 
GitLab


From 5664e8c71afa735b3da29bc1a651caf1f5c4d634 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Fri, 12 Oct 2018 17:36:23 -0700
Subject: [PATCH 0928/1085] Fix TensorFlow Lite Convert on Windows

NamedTemporaryFile's need to be used differently because Windows
cannot have multiple open handles to a single file.

PiperOrigin-RevId: 216948856
---
 tensorflow/contrib/lite/python/convert.py | 56 ++++++++++++++++-------
 tensorflow/tools/pip_package/MANIFEST.in  |  1 +
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 1bf42d7551..de936f1fc6 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -102,20 +102,34 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str):
     return _toco_python.TocoConvert(
         model_flags_str, toco_flags_str, input_data_str)
 
-  with _tempfile.NamedTemporaryFile() as fp_toco, \
-           _tempfile.NamedTemporaryFile() as fp_model, \
-           _tempfile.NamedTemporaryFile() as fp_input, \
-           _tempfile.NamedTemporaryFile() as fp_output:
-    fp_model.write(model_flags_str)
-    fp_toco.write(toco_flags_str)
-    fp_input.write(input_data_str)
-    fp_model.flush()
-    fp_toco.flush()
-    fp_input.flush()
-
+  # Windows and TemporaryFile are not that useful together,
+  # since you cannot have two readers/writers. So we have to
+  # make the temporaries and close and delete them explicitly.
+  toco_filename, model_filename, input_filename, output_filename = (
+      None, None, None, None)
+  try:
+    # Build all input files
+    with _tempfile.NamedTemporaryFile(delete=False) as fp_toco, \
+             _tempfile.NamedTemporaryFile(delete=False) as fp_model, \
+             _tempfile.NamedTemporaryFile(delete=False) as fp_input:
+      toco_filename = fp_toco.name
+      input_filename = fp_input.name
+      model_filename = fp_model.name
+      fp_model.write(model_flags_str)
+      fp_toco.write(toco_flags_str)
+      fp_input.write(input_data_str)
+      fp_model.flush()
+      fp_toco.flush()
+      fp_input.flush()
+
+    # Reserve an output file
+    with _tempfile.NamedTemporaryFile(delete=False) as fp:
+      output_filename = fp.name
+
+    # Run
     cmd = [
-        _toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name,
-        fp_output.name
+        _toco_from_proto_bin, model_filename, toco_filename, input_filename,
+        output_filename
     ]
     cmdline = " ".join(cmd)
     is_windows = _platform.system() == "Windows"
@@ -128,11 +142,19 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str):
     stdout, stderr = proc.communicate()
     exitcode = proc.returncode
     if exitcode == 0:
-      stuff = fp_output.read()
-      return stuff
+      with open(output_filename, "rb") as fp:
+        return fp.read()
     else:
-      raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" %
-                         (stdout, stderr))
+      raise RuntimeError(
+          "TOCO failed see console for info.\n%s\n%s\n" % (stdout, stderr))
+  finally:
+    # Must manually cleanup files.
+    for filename in [
+        toco_filename, input_filename, model_filename, output_filename]:
+      try:
+        _os.unlink(filename)
+      except (OSError, TypeError):
+        pass
 
 
 def tensor_name(x):
diff --git a/tensorflow/tools/pip_package/MANIFEST.in b/tensorflow/tools/pip_package/MANIFEST.in
index c4b4af93b8..272ff4735c 100644
--- a/tensorflow/tools/pip_package/MANIFEST.in
+++ b/tensorflow/tools/pip_package/MANIFEST.in
@@ -1,5 +1,6 @@
 include README
 recursive-include * *.py
+recursive-include * *.pyd
 recursive-include * *.pd
 recursive-include * *.so
 recursive-include * *.dll
-- 
GitLab


From 493a1ce9579698a5c543b2cc130003654acac042 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 17:42:28 -0700
Subject: [PATCH 0929/1085] Switches defun gradients to output None for
 unconnected gradients instead of 0.0.

PiperOrigin-RevId: 216949450
---
 tensorflow/python/eager/function.py      | 2 +-
 tensorflow/python/eager/function_test.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f27e5558ed..59c4e4cb30 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -746,7 +746,7 @@ class Function(object):
           self._func_graph.inputs,
           grad_ys=gradients_wrt_outputs,
           src_graph=self._func_graph,
-          unconnected_gradients=gradients_impl.UnconnectedGradients.ZERO)
+          unconnected_gradients=gradients_impl.UnconnectedGradients.NONE)
 
     backwards_graph_captures = list(backwards_graph.captures.keys())
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 8344c014b9..4f947e91bf 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -637,7 +637,7 @@ class FunctionTest(test.TestCase):
       op = call()
       self.assertAllEqual(sess.run(op), 2.0)
 
-  def testSymbolicGradientVariableZerosLike(self):
+  def testSymbolicGradientVariableNoneNotZerosLike(self):
     with ops.Graph().as_default():
       v = resource_variable_ops.ResourceVariable(1.0)
 
@@ -651,7 +651,7 @@ class FunctionTest(test.TestCase):
       _, dv = gradients_impl.gradients(l, [x, v])
       with self.cached_session():
         v.initializer.run()
-        self.assertAllEqual(dv.eval(), 0.0)
+        self.assertEqual(dv, None)
 
   def testGraphModeManyFunctions(self):
     with context.graph_mode(), self.cached_session():
-- 
GitLab


From d6729a6bebd9c899a0f746d1dec7ccfac3aefa88 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Fri, 12 Oct 2018 17:47:23 -0700
Subject: [PATCH 0930/1085] [XLA]Enable more DataTypes for running on the
 XLA:INTERPRETER backend.

PiperOrigin-RevId: 216949811
---
 tensorflow/compiler/jit/xla_interpreter_device.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc
index 19e681af0c..8a80639b63 100644
--- a/tensorflow/compiler/jit/xla_interpreter_device.cc
+++ b/tensorflow/compiler/jit/xla_interpreter_device.cc
@@ -25,8 +25,9 @@ namespace tensorflow {
 const char* const DEVICE_XLA_INTERPRETER = "XLA_INTERPRETER";
 const char* const DEVICE_INTERPRETER_XLA_JIT = "XLA_INTERPRETER_JIT";
 
-constexpr std::array<DataType, 6> kExecAllTypes = {
-    {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}};
+constexpr std::array<DataType, 9> kExecAllTypes = {
+    {DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64,
+     DT_BOOL, DT_BFLOAT16}};
 
 class XlaInterpreterDeviceFactory : public DeviceFactory {
  public:
-- 
GitLab


From ca6c218675c77c7a9bb5c80385470ec8308cae0e Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 12 Oct 2018 18:03:24 -0700
Subject: [PATCH 0931/1085] Automated rollback of commit
 1a77f1876e8138c84f96e639e464096294e9f389

PiperOrigin-RevId: 216951012
---
 tensorflow/contrib/lite/BUILD | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index a725c73193..787a85644c 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -7,13 +7,6 @@ licenses(["notice"])  # Apache 2.0
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts", "gen_selected_ops")
 
-package_group(
-    name = "internal",
-    packages = [
-        "//tensorflow/contrib/lite/...",
-    ],
-)
-
 exports_files(glob([
     "testdata/*.bin",
     "testdata/*.pb",
@@ -39,6 +32,7 @@ config_setting(
 config_setting(
     name = "with_tflite_flex",
     define_values = {"with_tflite_flex": "true"},
+    visibility = ["//visibility:public"],
 )
 
 cc_library(
@@ -86,14 +80,12 @@ cc_library(
 cc_library(
     name = "graph_info",
     hdrs = ["graph_info.h"],
-    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
 cc_library(
     name = "memory_planner",
     hdrs = ["memory_planner.h"],
-    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
@@ -101,7 +93,6 @@ cc_library(
     name = "simple_memory_arena",
     srcs = ["simple_memory_arena.cc"],
     hdrs = ["simple_memory_arena.h"],
-    visibility = ["//visibility:private"],
     deps = ["//tensorflow/contrib/lite/c:c_api_internal"],
 )
 
@@ -332,7 +323,6 @@ cc_library(
     name = "util",
     srcs = ["util.cc"],
     hdrs = ["util.h"],
-    visibility = [":internal"],
     deps = [
         "//tensorflow/contrib/lite/c:c_api_internal",
     ],
-- 
GitLab


From 869e5ef81fea6d95138b6d940c7fdccccb561403 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 18:05:34 -0700
Subject: [PATCH 0932/1085] Export the host program shape (with layouts) in the
 HLO module proto.

PiperOrigin-RevId: 216951291
---
 tensorflow/compiler/xla/client/lib/testing.cc |  4 +-
 tensorflow/compiler/xla/client/xla_builder.cc |  4 +-
 .../compiler/xla/client/xla_computation.cc    |  4 +-
 .../xla/service/compile_only_service.cc       |  4 +-
 .../xla/service/computation_layout.cc         | 10 +++
 .../compiler/xla/service/computation_layout.h |  4 ++
 tensorflow/compiler/xla/service/hlo.proto     |  4 +-
 tensorflow/compiler/xla/service/hlo_module.cc | 15 ++---
 .../compiler/xla/service/hlo_proto_util.cc    | 10 +--
 .../compiler/xla/service/local_service.cc     |  4 +-
 tensorflow/compiler/xla/service/service.cc    | 25 ++++----
 .../compiler/xla/tools/replay_computation.cc  |  3 +-
 .../compiler/xrt/kernels/xrt_compile_ops.cc   |  5 +-
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 64 +++++++++++++++++++
 tensorflow/compiler/xrt/xrt_state.cc          |  3 +
 15 files changed, 122 insertions(+), 41 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc
index ff0ec76a7f..a44681f586 100644
--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@@ -93,9 +93,9 @@ std::unique_ptr<GlobalData> MakeFakeDataOrDie(const Shape& shape,
 
 std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
     const XlaComputation& computation, Client* client) {
-  CHECK(computation.proto().has_program_shape())
+  CHECK(computation.proto().has_host_program_shape())
       << "Computation should have progran shape.";
-  auto program_shape = computation.proto().program_shape();
+  auto program_shape = computation.proto().host_program_shape();
 
   std::vector<std::unique_ptr<GlobalData>> results;
   for (const Shape& shape : program_shape.parameters()) {
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index ebfd9ac4e4..7d081b2722 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -278,7 +278,7 @@ StatusOr<XlaComputation> XlaBuilder::Build(int64 root_id) {
   module->set_id(entry.id());
   module->set_entry_computation_name(entry.name());
   module->set_entry_computation_id(entry.id());
-  *module->mutable_program_shape() = entry.program_shape();
+  *module->mutable_host_program_shape() = entry.program_shape();
   for (auto& e : embedded_) {
     module->add_computations()->Swap(&e.second);
   }
@@ -2357,7 +2357,7 @@ StatusOr<XlaComputation> XlaBuilder::BuildConstantSubGraph(
   module->set_id(entry.id());
   module->set_entry_computation_name(entry.name());
   module->set_entry_computation_id(entry.id());
-  *module->mutable_program_shape() = *program_shape;
+  *module->mutable_host_program_shape() = *program_shape;
   for (auto& e : embedded_) {
     if (related_calls.find(e.second.id()) != related_calls.end()) {
       *module->add_computations() = e.second;
diff --git a/tensorflow/compiler/xla/client/xla_computation.cc b/tensorflow/compiler/xla/client/xla_computation.cc
index 22c9e83bb2..c9870b65b9 100644
--- a/tensorflow/compiler/xla/client/xla_computation.cc
+++ b/tensorflow/compiler/xla/client/xla_computation.cc
@@ -24,8 +24,8 @@ limitations under the License.
 namespace xla {
 
 StatusOr<ProgramShape> XlaComputation::GetProgramShape() const {
-  TF_RET_CHECK(proto_.has_program_shape());
-  return proto_.program_shape();
+  TF_RET_CHECK(proto_.has_host_program_shape());
+  return proto_.host_program_shape();
 }
 
 StatusOr<std::unique_ptr<HloSnapshot>> XlaComputation::Snapshot() const {
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index c9b0e4c08c..6d67f97002 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -67,7 +67,7 @@ CompileOnlyService::CompileAheadOfTime(
     std::unique_ptr<AotCompilationMetadata>* metadata) {
   std::vector<std::unique_ptr<HloModule>> hlo_modules;
   for (const AotXlaComputationInstance& instance : computations) {
-    TF_RET_CHECK(instance.computation.has_program_shape());
+    TF_RET_CHECK(instance.computation.has_host_program_shape());
 
     const DebugOptions& debug_options = options.debug_options();
 
@@ -86,7 +86,7 @@ CompileOnlyService::CompileAheadOfTime(
           Executable::DumpToDirectory(per_host_path, filename, hlo_snapshot));
     }
 
-    const auto& program_shape = instance.computation.program_shape();
+    const auto& program_shape = instance.computation.host_program_shape();
     ExecutionOptions execution_options;
     *execution_options.mutable_debug_options() = debug_options;
     *execution_options.mutable_shape_with_output_layout() =
diff --git a/tensorflow/compiler/xla/service/computation_layout.cc b/tensorflow/compiler/xla/service/computation_layout.cc
index af8f7f1027..efc893818d 100644
--- a/tensorflow/compiler/xla/service/computation_layout.cc
+++ b/tensorflow/compiler/xla/service/computation_layout.cc
@@ -56,4 +56,14 @@ string ComputationLayout::ToString() const {
                       result_layout_.ToString());
 }
 
+ProgramShape ComputationLayout::ComputeProgramShape() const {
+  ProgramShape program_shape;
+  for (int64 i = 0; i < parameter_layouts_.size(); ++i) {
+    *program_shape.add_parameters() = parameter_layouts_[i].shape();
+    *program_shape.add_parameter_names() = absl::StrCat("p", i);
+  }
+  *program_shape.mutable_result() = result_layout_.shape();
+  return program_shape;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/computation_layout.h b/tensorflow/compiler/xla/service/computation_layout.h
index 6975f387b4..a2fb656677 100644
--- a/tensorflow/compiler/xla/service/computation_layout.h
+++ b/tensorflow/compiler/xla/service/computation_layout.h
@@ -83,6 +83,10 @@ class ComputationLayout {
   // Returns a string representation of this object.
   string ToString() const;
 
+  // Create a ProgramShape proto based on the parameter and result shapes held
+  // within this object.
+  ProgramShape ComputeProgramShape() const;
+
  private:
   std::vector<ShapeLayout> parameter_layouts_;
   ShapeLayout result_layout_;
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 82c8fb1904..dbab62f847 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -261,8 +261,8 @@ message HloModuleProto {
   // callees appear before their callers.
   repeated HloComputationProto computations = 3;
 
-  // The program shape (with layout) of the entry computation.
-  xla.ProgramShape program_shape = 4;
+  // The host program shape (with layout) of the entry computation.
+  xla.ProgramShape host_program_shape = 4;
 
   // The id of this module.
   int64 id = 5;
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index a53aaed49b..6845c27a91 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -246,17 +246,14 @@ HloModuleProto HloModule::ToProto() const {
   proto.set_entry_computation_id(entry_computation_->unique_id());
   for (const HloComputation* computation : MakeComputationPostOrder()) {
     HloComputationProto computation_proto = computation->ToProto();
-    if (computation->name() == entry_computation_->name()) {
-      *proto.mutable_program_shape() = computation_proto.program_shape();
-    }
     proto.add_computations()->Swap(&computation_proto);
   }
   if (has_schedule()) {
     *proto.mutable_schedule() = schedule().ToProto().ValueOrDie();
   }
-
+  *proto.mutable_host_program_shape() =
+      entry_computation_layout().ComputeProgramShape();
   *proto.mutable_input_output_alias() = input_output_alias_config().ToProto();
-
   return proto;
 }
 
@@ -268,9 +265,9 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 
   // The ProgramShape in the passed in module config must match the shapes of
   // the entry parameters and root.
-  TF_RET_CHECK(proto.has_program_shape())
+  TF_RET_CHECK(proto.has_host_program_shape())
       << "No program shape found in the proto";
-  const auto& expected_program_shape = proto.program_shape();
+  const auto& expected_program_shape = proto.host_program_shape();
   TF_RET_CHECK(expected_program_shape.parameters_size() ==
                module_config.entry_computation_layout().parameter_count());
   for (int i = 0; i < expected_program_shape.parameters_size(); ++i) {
@@ -375,9 +372,9 @@ StatusOr<std::unique_ptr<HloModule>> HloModule::CreateFromProto(
 /* static */
 StatusOr<HloModuleConfig> HloModule::CreateModuleConfigFromProto(
     const HloModuleProto& module, const DebugOptions& debug_options) {
-  TF_RET_CHECK(module.has_program_shape())
+  TF_RET_CHECK(module.has_host_program_shape())
       << "No program shape found in the proto";
-  const auto& program_shape = module.program_shape();
+  const auto& program_shape = module.host_program_shape();
 
   HloModuleConfig module_config(program_shape);
   module_config.set_debug_options(debug_options);
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index 7bb65ae665..cf33668f5b 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -53,12 +53,12 @@ StatusOr<std::vector<const Shape*>> EntryComputationParameterShapes(
   if (!hlo_proto.has_hlo_module()) {
     return NotFound("HloProto missing HloModuleProto.");
   }
-  if (!hlo_proto.hlo_module().has_program_shape()) {
+  if (!hlo_proto.hlo_module().has_host_program_shape()) {
     return NotFound("HloProto missing program shape.");
   }
 
   std::vector<const Shape*> parameter_shapes;
-  const auto& program_shape = hlo_proto.hlo_module().program_shape();
+  const auto& program_shape = hlo_proto.hlo_module().host_program_shape();
   for (const Shape& shape : program_shape.parameters()) {
     parameter_shapes.push_back(&shape);
   }
@@ -69,14 +69,14 @@ StatusOr<const Shape*> EntryComputationOutputShape(const HloProto& hlo_proto) {
   if (!hlo_proto.has_hlo_module()) {
     return NotFound("HloProto missing HloModuleProto.");
   }
-  if (!hlo_proto.hlo_module().has_program_shape()) {
+  if (!hlo_proto.hlo_module().has_host_program_shape()) {
     return NotFound("HloProto missing program shape.");
   }
-  if (!hlo_proto.hlo_module().program_shape().has_result()) {
+  if (!hlo_proto.hlo_module().host_program_shape().has_result()) {
     return NotFound("HloProto missing result in its program shape");
   }
 
-  return &hlo_proto.hlo_module().program_shape().result();
+  return &hlo_proto.hlo_module().host_program_shape().result();
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 0d0fb7946a..cca3755617 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -144,8 +144,8 @@ StatusOr<std::unique_ptr<Executable>> LocalService::CompileExecutable(
     const absl::Span<const Shape* const> argument_layouts,
     const ExecutableBuildOptions& build_options) {
   const HloModuleProto& proto = computation.proto();
-  TF_RET_CHECK(proto.has_program_shape());
-  const ProgramShape& program_shape = proto.program_shape();
+  TF_RET_CHECK(proto.has_host_program_shape());
+  const ProgramShape& program_shape = proto.host_program_shape();
 
   // Validate incoming layouts.
   if (argument_layouts.size() != program_shape.parameters_size()) {
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index cb6a9e6707..75465359f8 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -634,7 +634,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg,
         arg->requests(i).execution_options();
     const ExecuteGraphRequest& request = arg->requests(i);
     TF_RET_CHECK(request.has_computation()) << "computations may not be empty";
-    TF_RET_CHECK(request.computation().has_program_shape())
+    TF_RET_CHECK(request.computation().has_host_program_shape())
         << "programe shape may not be empty";
 
     // Get the executors.
@@ -651,7 +651,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg,
     // replica 0.
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr<HloModuleConfig> module_config,
-        CreateModuleConfig(request.computation().program_shape(),
+        CreateModuleConfig(request.computation().host_program_shape(),
                            replicated_arguments.front(),
                            request.execution_options()));
     VLOG(3)
@@ -836,7 +836,7 @@ Status Service::ExecuteGraph(const ExecuteGraphRequest* arg,
   if (!arg->has_computation()) {
     return InvalidArgument("computations may not be empty");
   }
-  if (!arg->computation().has_program_shape()) {
+  if (!arg->computation().has_host_program_shape()) {
     return InvalidArgument("programe shape may not be empty");
   }
 
@@ -851,10 +851,11 @@ Status Service::ExecuteGraph(const ExecuteGraphRequest* arg,
       std::vector<std::vector<const ShapedBuffer*>> replicated_arguments,
       ResolveAndValidateArguments(arg->arguments(), replicas));
 
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModuleConfig> module_config,
-                      CreateModuleConfig(arg->computation().program_shape(),
-                                         replicated_arguments.front(),
-                                         arg->execution_options()));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModuleConfig> module_config,
+      CreateModuleConfig(arg->computation().host_program_shape(),
+                         replicated_arguments.front(),
+                         arg->execution_options()));
 
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<Executable> executable,
@@ -1063,15 +1064,15 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg,
   if (!arg->has_computation()) {
     return InvalidArgument("computations may not be empty");
   }
-  if (!arg->computation().has_program_shape()) {
+  if (!arg->computation().has_host_program_shape()) {
     return InvalidArgument("program shape may not be empty");
   }
-  if (arg->computation().program_shape().parameters_size() != 0) {
+  if (arg->computation().host_program_shape().parameters_size() != 0) {
     return InvalidArgument(
         "constant computation may not depend on any parameters.");
   }
 
-  ProgramShape program_shape = arg->computation().program_shape();
+  ProgramShape program_shape = arg->computation().host_program_shape();
   TF_DCHECK_OK(ShapeUtil::ValidateShape(program_shape.result()));
   if (arg->has_output_layout()) {
     TF_RETURN_IF_ERROR(LayoutUtil::ValidateLayoutForShape(
@@ -1111,11 +1112,11 @@ Status Service::GetComputationGraphStats(
   if (!arg->has_computation()) {
     return InvalidArgument("Computations may not be empty.");
   }
-  if (!arg->computation().has_program_shape()) {
+  if (!arg->computation().has_host_program_shape()) {
     return InvalidArgument("Program shape may not be empty.");
   }
 
-  HloModuleConfig config(arg->computation().program_shape());
+  HloModuleConfig config(arg->computation().host_program_shape());
   config.set_debug_options(arg->debug_options());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
                       CreateModuleFromProto(arg->computation(), config));
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 0c41f227b3..f910e98053 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -83,7 +83,8 @@ std::unique_ptr<LocalExecutable> CompileExecutable(const HloSnapshot& module,
                                                    LocalClient* client) {
   XlaComputation computation(module.hlo().hlo_module());
   std::vector<const Shape*> argument_layouts;
-  for (const auto& param : computation.proto().program_shape().parameters()) {
+  for (const auto& param :
+       computation.proto().host_program_shape().parameters()) {
     argument_layouts.push_back(&param);
   }
   return client
diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
index 1ab836a496..dc62cf7a6b 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
@@ -176,8 +176,9 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) {
   xla::LocalExecutable* executable = entry->get().get_executable();
   xla::ProgramShape program_shape = executable->executable()
                                         ->module()
-                                        .entry_computation()
-                                        ->ComputeProgramShape();
+                                        .config()
+                                        .entry_computation_layout()
+                                        .ComputeProgramShape();
   Tensor program_shape_output(DT_STRING, TensorShape({1}));
   program_shape_output.vec<string>()(0) = program_shape.SerializeAsString();
   ctx->set_output(1, program_shape_output);
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index ee6734020d..ad42148ce3 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -132,6 +132,16 @@ xla::XlaComputation AddAndScale() {
   return builder.Build().ValueOrDie();
 }
 
+xla::XlaComputation AddS64() {
+  xla::XlaBuilder builder("AddS64");
+  auto p0 = xla::Parameter(&builder, 0, xla::ShapeUtil::MakeShape(xla::S64, {}),
+                           "P0");
+  auto p1 = xla::Parameter(&builder, 1, xla::ShapeUtil::MakeShape(xla::S64, {}),
+                           "P1");
+  xla::Add(p0, p1);
+  return builder.Build().ValueOrDie();
+}
+
 xla::XlaComputation AddAndTuple() {
   xla::XlaBuilder builder("AddAndTuple");
   auto p0 = xla::Parameter(&builder, 0,
@@ -551,6 +561,60 @@ TEST(RawApiTest, LeakCompilationReference) {
   TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs));
 }
 
+TEST(RawApiTest, CompileAndExecuteWithS64Argument) {
+  xrt::XLAAllocation p0;
+  p0.set_device_ordinal(0);
+  *p0.mutable_value() = xla::LiteralUtil::CreateR0<int64>(11031965).ToProto();
+  xrt::XLAAllocation p1;
+  p1.set_device_ordinal(0);
+  *p1.mutable_value() = xla::LiteralUtil::CreateR0<int64>(4091934).ToProto();
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S64, {});
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S64, {});
+  *shapes->mutable_result() = xla::ShapeUtil::MakeShape(xla::S64, {});
+  StoreComputationSnapshot(AddS64(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto e_config =
+      ops::Const(root.WithDevice("/device:CPU:0"), e.SerializeAsString());
+  auto computation =
+      ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value =
+      ops::Const(root.WithDevice("/device:CPU:0"), p0.SerializeAsString());
+  auto p0_handle = ops::XRTAllocate(root, p0_value);
+  auto p1_value =
+      ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString());
+  auto p1_handle = ops::XRTAllocate(root, p1_value);
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+                                {Output(p0_handle), Output(p1_handle)});
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<string>()()));
+
+  auto expected = xla::LiteralUtil::CreateR0<int64>(15123899);
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+
+  xla::ProgramShape program_shape;
+  EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec<string>()(0)));
+  EXPECT_EQ(program_shape.parameters_size(), 2);
+  EXPECT_TRUE(
+      xla::ShapeUtil::HasPrimitiveType(program_shape.result(), xla::S64));
+}
+
 }  // namespace
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc
index 6e0f1216ca..3a99820d7a 100644
--- a/tensorflow/compiler/xrt/xrt_state.cc
+++ b/tensorflow/compiler/xrt/xrt_state.cc
@@ -65,6 +65,9 @@ Status AllocateScopedShapedBuffer(
   // requests the host-shape sub-buffer at index i, that will correspond to the
   // right device-shape sub-buffer at the same index.
   xla::Shape on_device_shape = transfer_manager->HostShapeToDeviceShape(shape);
+  VLOG(3) << "Allocating literal buffer: host_shape="
+          << xla::ShapeUtil::HumanStringWithLayout(shape) << " device_shape="
+          << xla::ShapeUtil::HumanStringWithLayout(on_device_shape);
 
   // The ScopedShapedBuffer frees the buffers that have so far been allocated if
   // it goes out of scope. That's useful if we return early as the result of an
-- 
GitLab


From dc1aab20a12e2d32a10f019a5de3f65f56345fa8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 18:13:58 -0700
Subject: [PATCH 0933/1085]   Extend the PrintV2 op to write tensors to file.  
 If the output_stream attribute is started with "file://", the string that
 follows will be interpreted as a file path. Then PrintV2 will write the
 tensors to that file.

PiperOrigin-RevId: 216952073
---
 tensorflow/core/kernels/logging_ops.cc        | 50 ++++++++++++++++++-
 tensorflow/core/ops/logging_ops.cc            |  4 +-
 .../python/kernel_tests/logging_ops_test.py   | 27 +++++++++-
 tensorflow/python/ops/logging_ops.py          | 32 ++++++++----
 tensorflow/python/tools/saved_model_cli.py    |  2 +-
 5 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc
index cddfa359fc..2599340d78 100644
--- a/tensorflow/core/kernels/logging_ops.cc
+++ b/tensorflow/core/kernels/logging_ops.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <iostream>
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -22,6 +23,31 @@ limitations under the License.
 
 namespace tensorflow {
 
+namespace {
+
+// If the following string is found at the beginning of an output stream, it
+// will be interpreted as a file path.
+const char kOutputStreamEscapeStr[] = "file://";
+
+// A mutex that guards appending strings to files.
+static mutex* file_mutex = new mutex();
+
+// Appends the given data to the specified file. It will create the file if it
+// doesn't already exist.
+Status AppendStringToFile(const std::string& fname, StringPiece data,
+                          Env* env) {
+  // TODO(ckluk): If opening and closing on every log causes performance issues,
+  // we can reimplement using reference counters.
+  mutex_lock l(*file_mutex);
+  std::unique_ptr<WritableFile> file;
+  TF_RETURN_IF_ERROR(env->NewAppendableFile(fname, &file));
+  Status a = file->Append(absl::StrCat(data, "\n"));
+  Status c = file->Close();
+  return a.ok() ? c : a;
+}
+
+}  // namespace
+
 class AssertOp : public OpKernel {
  public:
   explicit AssertOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
@@ -104,6 +130,9 @@ class PrintV2Op : public OpKernel {
   explicit PrintV2Op(OpKernelConstruction* ctx) : OpKernel(ctx) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_stream", &output_stream_));
 
+    SetFilePathIfAny();
+    if (!file_path_.empty()) return;
+
     auto output_stream_index =
         std::find(std::begin(valid_output_streams_),
                   std::end(valid_output_streams_), output_stream_);
@@ -123,6 +152,11 @@ class PrintV2Op : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input("input", &input_));
     const string& msg = input_->scalar<string>()();
 
+    if (!file_path_.empty()) {
+      // Outputs to a file at the specified path.
+      OP_REQUIRES_OK(ctx, AppendStringToFile(file_path_, msg, ctx->env()));
+      return;
+    }
     if (output_stream_ == "stdout") {
       std::cout << msg << std::endl;
     } else if (output_stream_ == "stderr") {
@@ -139,15 +173,29 @@ class PrintV2Op : public OpKernel {
       for (auto valid_stream : valid_output_streams_) {
         strings::StrAppend(&error_msg, " ", valid_stream);
       }
+      strings::StrAppend(&error_msg, ", or file://<filename>");
       OP_REQUIRES(ctx, false, errors::InvalidArgument(error_msg));
     }
   }
 
-  const char* valid_output_streams_[6] = {"stdout", "stderr", "log(info)",
+  const char* valid_output_streams_[5] = {"stdout", "stderr", "log(info)",
                                           "log(warning)", "log(error)"};
 
  private:
+  // Either output_stream_ or file_path_ (but not both) will be non-empty.
   string output_stream_;
+  string file_path_;
+
+  // If output_stream_ is a file path, extracts it to file_path_ and clears
+  // output_stream_; otherwise sets file_paths_ to "".
+  void SetFilePathIfAny() {
+    if (absl::StartsWith(output_stream_, kOutputStreamEscapeStr)) {
+      file_path_ = output_stream_.substr(strlen(kOutputStreamEscapeStr));
+      output_stream_ = "";
+    } else {
+      file_path_ = "";
+    }
+  }
 };
 
 REGISTER_KERNEL_BUILDER(Name("PrintV2").Device(DEVICE_CPU), PrintV2Op);
diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index 2034d3601b..42a1b1d7e3 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -49,9 +49,7 @@ WHITELIST_STATEFUL_OP_FOR_DATASET_FUNCTIONS("Print");
 REGISTER_OP("PrintV2")
     .Input("input: string")
     .SetIsStateful()
-    .Attr(
-        "output_stream: {'stdout', 'stderr', 'log(info)', "
-        "'log(warning)', 'log(error)'} = 'stderr'")
+    .Attr("output_stream: string = 'stderr'")
     .SetShapeFn([](InferenceContext* c) {
       // Make sure that the input is a scalar.
       if (c->Rank(c->input(0)) != 0) {
diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py
index 2f19ecc0e6..8e9b87f651 100644
--- a/tensorflow/python/kernel_tests/logging_ops_test.py
+++ b/tensorflow/python/kernel_tests/logging_ops_test.py
@@ -18,7 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import sys
+import tempfile
 
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
@@ -35,7 +37,6 @@ from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
-
 class LoggingOpsTest(test.TestCase):
 
   def testAssertDivideByZero(self):
@@ -271,6 +272,30 @@ class PrintV2Test(test.TestCase):
       expected = "[0 1 2 ... 7 8 9]"
       self.assertTrue((expected + "\n") in printed.contents())
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testPrintTensorsToFile(self):
+    tmpfile_name = tempfile.mktemp(".printv2_test")
+    tensor_0 = math_ops.range(0, 10)
+    print_op_0 = logging_ops.print_v2(tensor_0,
+                                      output_stream="file://"+tmpfile_name)
+    self.evaluate(print_op_0)
+    tensor_1 = math_ops.range(11, 20)
+    print_op_1 = logging_ops.print_v2(tensor_1,
+                                      output_stream="file://"+tmpfile_name)
+    self.evaluate(print_op_1)
+    try:
+      f = open(tmpfile_name, "r")
+      line_0 = f.readline()
+      expected_0 = "[0 1 2 ... 7 8 9]"
+      self.assertTrue(expected_0 in line_0)
+      line_1 = f.readline()
+      expected_1 = "[11 12 13 ... 17 18 19]"
+      self.assertTrue(expected_1 in line_1)
+      f.close()
+      os.remove(tmpfile_name)
+    except IOError as e:
+      self.fail(e)
+
   @test_util.run_in_graph_and_eager_modes()
   def testInvalidOutputStreamRaisesError(self):
     with self.cached_session():
diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py
index 4c53f33af1..fd532a9be2 100644
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@@ -114,6 +114,11 @@ def _generate_placeholder_string(x, default_placeholder="{}"):
   return placeholder
 
 
+def _is_filepath(output_stream):
+  """Returns True if output_stream is a file path."""
+  return isinstance(output_stream, str) and output_stream.startswith("file://")
+
+
 # Temporarily disable pylint g-doc-args error to allow giving more context
 # about what the kwargs are.
 # Because we are using arbitrary-length positional arguments, python 2
@@ -198,9 +203,11 @@ def print_v2(*inputs, **kwargs):
       primitives, tensors, data structures such as dicts and lists that
       may contain tensors (with the data structures possibly nested in
       arbitrary ways), and printable python objects.
-    output_stream: The output stream or logging level to print to. Defaults to
-      sys.stderr, but sys.stdout, tf.logging.info, tf.logging.warning, and
-      tf.logging.error are also supported.
+    output_stream: The output stream, logging level, or file to print to.
+      Defaults to sys.stderr, but sys.stdout, tf.logging.info,
+      tf.logging.warning, and tf.logging.error are also supported. To print to
+      a file, pass a string started with "file://" followed by the file path,
+      e.g., "file:///tmp/foo.out".
     summarize: The first and last `summarize` elements within each dimension are
       recursively printed per Tensor. If None, then the first 3 and last 3
       elements of each dimension are printed for each tensor. If set to -1, it
@@ -241,13 +248,17 @@ def print_v2(*inputs, **kwargs):
       tf_logging.error: "log(error)",
   }
 
-  output_stream_string = output_stream_to_constant.get(output_stream)
-  if not output_stream_string:
-    raise ValueError(
-        "Unsupported output stream or logging level " +
-        str(output_stream) + ". Supported streams are sys.stdout, "
-                             "sys.stderr, tf.logging.info, "
-                             "tf.logging.warning, tf.logging.error")
+  if _is_filepath(output_stream):
+    output_stream_string = output_stream
+  else:
+    output_stream_string = output_stream_to_constant.get(output_stream)
+    if not output_stream_string:
+      raise ValueError(
+          "Unsupported output stream, logging level, or file." +
+          str(output_stream) + ". Supported streams are sys.stdout, "
+          "sys.stderr, tf.logging.info, "
+          "tf.logging.warning, tf.logging.error. " +
+          "File needs to be in the form of 'file://<filepath>'.")
 
   # If we are only printing a single string scalar, there is no need to format
   if (len(inputs) == 1 and tensor_util.is_tensor(inputs[0])
@@ -612,7 +623,6 @@ def scalar_summary(tags, values, collections=None, name=None):
     _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
   return val
 
-
 ops.NotDifferentiable("HistogramSummary")
 ops.NotDifferentiable("ImageSummary")
 ops.NotDifferentiable("AudioSummary")
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 2fcb0fa029..c4c3756c04 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -45,7 +45,7 @@ from tensorflow.python.saved_model import loader
 from tensorflow.python.tools import saved_model_utils
 
 # Set of ops to blacklist.
-_OP_BLACKLIST = set(['WriteFile', 'ReadFile'])
+_OP_BLACKLIST = set(['WriteFile', 'ReadFile', 'PrintV2'])
 
 
 def _show_tag_sets(saved_model_dir):
-- 
GitLab


From 989b82109f52cf1726b5f02f5fdffc41252cabf6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 18:35:07 -0700
Subject: [PATCH 0934/1085] Internal change.

PiperOrigin-RevId: 216953764
---
 .../lite/kernels/bidirectional_sequence_lstm.cc   |  8 ++++----
 tensorflow/contrib/lite/kernels/lstm_eval.cc      | 15 +++++++++++----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index ca6b6a9a41..5325507d96 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -522,11 +522,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   const TfLiteTensor* fw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor);
-  if (has_aux_input) {
+  const bool fw_use_cifg = (fw_input_to_input_weights == nullptr);
+  if (has_aux_input && !fw_use_cifg) {
     TF_LITE_ENSURE_EQ(context, fw_aux_input_to_input_weights->dims->data[0],
                       fw_input_to_input_weights->dims->data[0]);
   }
-  const bool fw_use_cifg = (fw_input_to_input_weights == nullptr);
   TfLiteIntArray* fw_scratch_buffer_size = TfLiteIntArrayCreate(2);
   fw_scratch_buffer_size->data[0] = n_batch;
   if (fw_use_cifg) {
@@ -579,11 +579,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   const TfLiteTensor* bw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor);
-  if (has_aux_input) {
+  const bool bw_use_cifg = (bw_input_to_input_weights == nullptr);
+  if (has_aux_input && !bw_use_cifg) {
     TF_LITE_ENSURE_EQ(context, bw_aux_input_to_input_weights->dims->data[0],
                       bw_input_to_input_weights->dims->data[0]);
   }
-  const bool bw_use_cifg = (bw_input_to_input_weights == nullptr);
   TfLiteIntArray* bw_scratch_buffer_size = TfLiteIntArrayCreate(2);
   bw_scratch_buffer_size->data[0] = n_batch;
   if (bw_use_cifg) {
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index f228488c65..dc0a0b455b 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -770,7 +770,9 @@ TfLiteStatus EvalFloat(
   float* aux_input_to_output_weights_ptr = nullptr;
   if (aux_input_size > 0) {
     aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
+    if (!use_cifg) {
+      aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
+    }
     aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f;
     aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f;
     aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
@@ -966,15 +968,20 @@ TfLiteStatus EvalHybrid(
   float aux_input_to_output_weights_scale = 0.0f;
   if (aux_input_size > 0) {
     aux_input_ptr = aux_input->data.f;
-    aux_input_to_input_weights_ptr =
-        reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
+    if (!use_cifg) {
+      aux_input_to_input_weights_ptr =
+          reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
+    }
     aux_input_to_forget_weights_ptr =
         reinterpret_cast<int8_t*>(aux_input_to_forget_weights->data.uint8);
     aux_input_to_cell_weights_ptr =
         reinterpret_cast<int8_t*>(aux_input_to_cell_weights->data.uint8);
     aux_input_to_output_weights_ptr =
         reinterpret_cast<int8_t*>(aux_input_to_output_weights->data.uint8);
-    aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale;
+    if (!use_cifg) {
+      aux_input_to_input_weights_scale =
+          aux_input_to_input_weights->params.scale;
+    }
     aux_input_to_forget_weights_scale =
         aux_input_to_forget_weights->params.scale;
     aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale;
-- 
GitLab


From c788dbba395603308c2f06cbd734d2b75b63895d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 19:19:22 -0700
Subject: [PATCH 0935/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 216956589
---
 tensorflow/core/ops/compat/ops_history.v1.pbtxt | 15 +++++++++++++++
 tensorflow/core/ops/ops.pbtxt                   |  9 ---------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 98ac343d21..d2e937dd2c 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -40156,6 +40156,21 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "PrintV2"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  attr {
+    name: "output_stream"
+    type: "string"
+    default_value {
+      s: "stderr"
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "PriorityQueue"
   output_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 8b5881a850..d034ea27a1 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -20375,15 +20375,6 @@ op {
     default_value {
       s: "stderr"
     }
-    allowed_values {
-      list {
-        s: "stdout"
-        s: "stderr"
-        s: "log(info)"
-        s: "log(warning)"
-        s: "log(error)"
-      }
-    }
   }
   is_stateful: true
 }
-- 
GitLab


From 24b80ceee29e72b9c7baf25d7c1a7da87998aead Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 12 Oct 2018 20:59:24 -0700
Subject: [PATCH 0936/1085] By default the _Retval Op returns INT32 tensors in
 Host memory. This causes a problem for RemoteCall ops which when run on a GPU
 expect to put their results always (barring for strings and resources) in
 device memory. As a result, when we run a remote call function via
 GeneratorDataset (placed on GPU) in the MultiDeviceIterator, the _Retval ends
 up copying the INT32 tensors on to Host memory causing downstream consumers
 of the data to misbehave (treat them as garbage / NaN's).

We fix this by conditionally (based on an experimental attr) replacing the _Retval op with a new Op that doesn't do the HostMemory annotation for INT32 types, thereby making sure that INT32's are placed in device memory. We also make sure that the MultiDeviceIterator implementation makes use of this experimental attr.

PiperOrigin-RevId: 216961804
---
 tensorflow/core/framework/function.cc           | 14 +++++++++++---
 tensorflow/core/framework/function.h            |  1 +
 tensorflow/core/framework/function_test.cc      | 17 +++++++++++++++++
 tensorflow/core/kernels/function_ops.cc         |  3 +++
 tensorflow/core/kernels/function_ops.h          |  1 +
 tensorflow/core/ops/function_ops.cc             | 15 +++++++++++++++
 .../data/ops/multi_device_iterator_ops.py       |  2 +-
 7 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index be11f8874c..4ad6fd00da 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -303,7 +303,7 @@ class FunctionInstantiationHelper {
   Status AddReturnNode(
       const OpDef::ArgDef& ret_def, AttrSlice attrs,
       const ::tensorflow::protobuf::Map<string, string>& ret_map,
-      int* ret_index) {
+      bool ints_on_device, int* ret_index) {
     auto ret_iter = ret_map.find(ret_def.name());
     if (ret_iter == ret_map.end()) {
       return errors::InvalidArgument("Return ", ret_def.name(), " missing.");
@@ -329,7 +329,11 @@ class FunctionInstantiationHelper {
         strings::StrAppend(&name, "_", i);
       }
       NodeDef* gnode = AddNode(name);
-      gnode->set_op(FunctionLibraryDefinition::kRetOp);
+      if (ints_on_device && dtypes[i] == DataType::DT_INT32) {
+        gnode->set_op(FunctionLibraryDefinition::kDeviceRetOp);
+      } else {
+        gnode->set_op(FunctionLibraryDefinition::kRetOp);
+      }
       AddInput(nodes_.size() - 1, item->nid, item->idx + i);
       AddAttr("T", dtypes[i], gnode);
       AddAttr("index", (*ret_index)++, gnode);
@@ -686,10 +690,14 @@ Status InstantiateFunction(const FunctionDef& fdef, AttrSlice attr_values,
     }
   }
 
+  bool ints_on_device = fdef.attr().count("experimental_ints_on_device") != 0 &&
+                        fdef.attr().at("experimental_ints_on_device").b();
+
   // Emits nodes for the function's return values.
   int ret_index = 0;
   for (const OpDef::ArgDef& ret_def : sig.output_arg()) {
-    s = helper.AddReturnNode(ret_def, attr_values, fdef.ret(), &ret_index);
+    s = helper.AddReturnNode(ret_def, attr_values, fdef.ret(), ints_on_device,
+                             &ret_index);
     if (!s.ok()) {
       errors::AppendToMessage(&s, "In function output ", Print(ret_def));
       return s;
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index fa58e36a21..fcc6203729 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -380,6 +380,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // created for return values bear the name given by `kRetOp`.
   static constexpr const char* const kArgOp = "_Arg";
   static constexpr const char* const kRetOp = "_Retval";
+  static constexpr const char* const kDeviceRetOp = "_DeviceRetval";
 
   static constexpr const char* const kGradientOp = "SymbolicGradient";
   static constexpr const char* const kFuncAttr = "f";
diff --git a/tensorflow/core/framework/function_test.cc b/tensorflow/core/framework/function_test.cc
index 44e1383719..10392a9f32 100644
--- a/tensorflow/core/framework/function_test.cc
+++ b/tensorflow/core/framework/function_test.cc
@@ -495,6 +495,23 @@ MySelect(x:float) -> (z:float) {
   EXPECT_EQ(DebugString(result.nodes), e2);
 }
 
+TEST(TFunc, IntsOnDeviceArgNotSet) {
+  auto fdef = test::function::XTimesTwoInt32();
+  InstantiationResult result;
+  TF_ASSERT_OK(InstantiateFunction(fdef, AttrSlice(), GetOpSig, &result));
+  EXPECT_EQ(5, result.nodes.size());
+  EXPECT_EQ("_Retval", result.nodes[4].op());
+}
+
+TEST(TFunc, IntsOnDeviceArgSet) {
+  auto fdef = test::function::XTimesTwoInt32();
+  (*fdef.mutable_attr())["experimental_ints_on_device"].set_b(true);
+  InstantiationResult result;
+  TF_ASSERT_OK(InstantiateFunction(fdef, AttrSlice(), GetOpSig, &result));
+  EXPECT_EQ(5, result.nodes.size());
+  EXPECT_EQ("_DeviceRetval", result.nodes[4].op());
+}
+
 static void HasError(const Status& s, const string& substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << ">>" << s << "<<, expected substring >>" << substr << "<<";
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index bfdabc3a9f..a5374e0268 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -70,6 +70,7 @@ void RetvalOp::Compute(OpKernelContext* ctx) {
 
 REGISTER_SYSTEM_KERNEL_BUILDER(Name(kArgOp).Device(DEVICE_CPU), ArgOp);
 REGISTER_SYSTEM_KERNEL_BUILDER(Name(kRetOp).Device(DEVICE_CPU), RetvalOp);
+REGISTER_SYSTEM_KERNEL_BUILDER(Name(kDeviceRetOp).Device(DEVICE_CPU), RetvalOp);
 
 #if TENSORFLOW_USE_SYCL
 #define REGISTER(type)     \
@@ -126,6 +127,8 @@ TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp)
                                                    .HostMemory("input")
                                                    .TypeConstraint<int32>("T"),
                                                RetvalOp);
+REGISTER_KERNEL_BUILDER(
+    Name(kDeviceRetOp).Device(DEVICE_GPU).TypeConstraint<int32>("T"), RetvalOp);
 REGISTER_KERNEL_BUILDER(Name(kRetOp)
                             .Device(DEVICE_GPU)
                             .TypeConstraint<ResourceHandle>("T")
diff --git a/tensorflow/core/kernels/function_ops.h b/tensorflow/core/kernels/function_ops.h
index 9e88cc6d8c..0f51eca163 100644
--- a/tensorflow/core/kernels/function_ops.h
+++ b/tensorflow/core/kernels/function_ops.h
@@ -23,6 +23,7 @@ namespace tensorflow {
 
 static const char* const kArgOp = FunctionLibraryDefinition::kArgOp;
 static const char* const kRetOp = FunctionLibraryDefinition::kRetOp;
+static const char* const kDeviceRetOp = FunctionLibraryDefinition::kDeviceRetOp;
 
 class ArgOp : public OpKernel {
  public:
diff --git a/tensorflow/core/ops/function_ops.cc b/tensorflow/core/ops/function_ops.cc
index a6914d9383..6edd86b3ad 100644
--- a/tensorflow/core/ops/function_ops.cc
+++ b/tensorflow/core/ops/function_ops.cc
@@ -50,6 +50,21 @@ input: The return value.
 index: This return value is the index-th return value of the function.
 )doc");
 
+REGISTER_SYSTEM_OP("_DeviceRetval")
+    .Input("input: T")
+    .Attr("T: type")
+    .Attr("index: int >= 0")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext* context) {
+      return Status::OK();
+    })
+    .Doc(R"doc(
+A graph node which represents a return value of a function.
+
+input: The return value.
+index: This return value is the index-th return value of the function.
+)doc");
+
 REGISTER_OP("_ListToArray")
     .Input("input: Tin")
     .Output("output: N * T")
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index b7d3aac206..2086614b7c 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -78,7 +78,7 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
           output_types=self._flat_output_types,
           output_shapes=self._flat_output_shapes)
 
-    @function.Defun(dtypes.string)
+    @function.Defun(dtypes.string, experimental_ints_on_device=True)
     def _remote_next_func(string_handle):
       return functional_ops.remote_call(
           target=source_device,
-- 
GitLab


From d931fbf77127e718199be6d9c9997c83f388fe2a Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 12 Oct 2018 21:25:14 -0700
Subject: [PATCH 0937/1085] [tf.data vectorization] Add vectorizers for parsing
 ops (DecodeCSV, ParseSingleExample)

PiperOrigin-RevId: 216963411
---
 .../core/grappler/optimizers/data/BUILD       |   1 +
 .../optimizers/data/vectorization/BUILD       |  16 +
 .../vectorization/decode_csv_vectorizer.cc    |  70 ++
 .../parse_single_example_vectorizer.cc        | 109 +++
 .../data/vectorization_utils_test.cc          | 722 ++++++++++++------
 .../kernel_tests/optimization/BUILD           |   2 +
 .../optimization/map_vectorization_test.py    |  95 ++-
 7 files changed, 766 insertions(+), 249 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/decode_csv_vectorizer.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 1c553044a8..1a648da5da 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -591,6 +591,7 @@ tf_cc_test(
         "//tensorflow/core/kernels/data:dataset_ops",
         "//tensorflow/core/kernels:cast_op",
         "//tensorflow/core/kernels:logging_ops",
+        "//tensorflow/core/kernels:parsing",
         "//tensorflow/tools/graph_transforms:transform_utils",
     ] + tf_protos_all(),
 )
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
index 60f1df1c52..5d98b0d97c 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/BUILD
@@ -75,6 +75,20 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "decode_csv_vectorizer",
+    srcs = ["decode_csv_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "parse_single_example_vectorizer",
+    srcs = ["parse_single_example_vectorizer.cc"],
+    deps = VECTORIZER_DEPS,
+    alwayslink = 1,
+)
+
 cc_library(
     name = "reshape_vectorizer",
     srcs = ["reshape_vectorizer.cc"],
@@ -96,6 +110,8 @@ cc_library(
     deps = [
         ":add_vectorizer",
         ":cast_vectorizer",
+        ":decode_csv_vectorizer",
+        ":parse_single_example_vectorizer",
         ":reshape_vectorizer",
         ":unpack_vectorizer",
         ":vectorizer",
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/decode_csv_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/decode_csv_vectorizer.cc
new file mode 100644
index 0000000000..c4460387bb
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/decode_csv_vectorizer.cc
@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+// DecodeCSV is the vectorized version of itself.
+class DecodeCSVVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (!inputs[0].stacked) {
+      return errors::InvalidArgument("Expecting input 0 to be stacked.");
+    }
+    for (size_t i = 1; i < inputs.size(); ++i) {
+      if (inputs[i].stacked) {
+        // Record defaults should not be stacked
+        return errors::InvalidArgument("Expecting input ", i,
+                                       "to be unstacked.");
+      }
+    }
+
+    std::vector<NodeBuilder::NodeOut> defaults;
+    defaults.reserve(inputs.size() - 1);
+    for (size_t i = 1; i < inputs.size(); ++i) {
+      defaults.emplace_back(inputs[i].node, inputs[i].output_index);
+    }
+
+    Node* new_node;
+    auto node_builder =
+        NodeBuilder(node.type_string(), node.type_string())
+            .Input(inputs[0].node, inputs[0].output_index)  // records;
+            .Input(defaults);                               // defaults
+
+    for (const auto& attr : node.attrs()) {
+      node_builder = node_builder.Attr(attr.first, attr.second);
+    }
+    TF_RETURN_IF_ERROR(node_builder.Finalize(outer_scope, &new_node));
+
+    // Add output mappings
+    for (int i = 0; i < node.num_outputs(); ++i) {
+      outputs->emplace_back(new_node, i, true);
+    }
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("DecodeCSV", DecodeCSVVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc
new file mode 100644
index 0000000000..7d0edfb386
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc
@@ -0,0 +1,109 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+namespace {
+
+// ParseExample is the vectorized version of ParseSingleExample.
+class ParseSingleExampleVectorizer : public Vectorizer {
+ public:
+  Status Vectorize(const Node& node, Graph* outer_scope,
+                   std::vector<WrappedTensor>&& inputs,
+                   std::vector<WrappedTensor>* outputs) override {
+    if (!inputs[0].stacked) {
+      return errors::InvalidArgument("Expecting input 0 to be stacked.");
+    }
+    for (size_t i = 1; i < inputs.size(); ++i) {
+      if (inputs[i].stacked) {
+        // Dense defaults should not be stacked
+        return errors::InvalidArgument("Expecting input ", i,
+                                       "to be unstacked.");
+      }
+    }
+
+    std::vector<NodeBuilder::NodeOut> dense_defaults;
+    dense_defaults.reserve(inputs.size() - 1);
+    for (size_t i = 1; i < inputs.size(); ++i) {
+      dense_defaults.emplace_back(inputs[i].node, inputs[i].output_index);
+    }
+
+    Status scope_status;
+    Scope parent = NewInternalScope(outer_scope, &scope_status, nullptr);
+    Scope s = parent.NewSubScope("vectorize/parse_single_example");
+
+    // Empty string vector
+    Node* names = ops::Const(s, std::initializer_list<string>({})).node();
+
+    // sparse_keys and dense_keys are attrs on ParseSingleExample, but are
+    // inputs on ParseExample. We have to add const input nodes for these.
+    auto make_list_input_from_attr =
+        [&s, &node](StringPiece attr_name,
+                    std::vector<NodeBuilder::NodeOut>* result) {
+          std::vector<string> attr_vals;
+          TF_RETURN_IF_ERROR(GetNodeAttr(node.attrs(), attr_name, &attr_vals));
+          result->reserve(attr_vals.size());
+
+          for (const auto& val : attr_vals) {
+            result->push_back(ops::Const(s, val).node());
+          }
+          return Status::OK();
+        };
+
+    std::vector<NodeBuilder::NodeOut> sparse_keys;
+    TF_RETURN_IF_ERROR(make_list_input_from_attr("sparse_keys", &sparse_keys));
+
+    std::vector<NodeBuilder::NodeOut> dense_keys;
+    TF_RETURN_IF_ERROR(make_list_input_from_attr("dense_keys", &dense_keys));
+
+    TF_RETURN_IF_ERROR(scope_status);
+
+    Node* new_node;
+    auto node_builder =
+        NodeBuilder(strings::StrCat("vectorized/", node.name()), "ParseExample")
+            .Input(inputs[0].node, inputs[0].output_index)  // serialized
+            .Input(names)                                   // names
+            .Input(sparse_keys)                             // sparse_keys
+            .Input(dense_keys)                              // dense_keys
+            .Input(dense_defaults);                         // dense_defaults
+
+    for (const auto& attr : {"sparse_types", "dense_shapes"}) {
+      // Copy attrs if they exist
+      const AttrValue* val;
+      TF_RETURN_IF_ERROR(node.attrs().Find(attr, &val));
+      node_builder = node_builder.Attr(attr, *val);
+    }
+
+    TF_RETURN_IF_ERROR(node_builder.Finalize(outer_scope, &new_node));
+
+    // Add output mappings
+    for (size_t i = 0; i < node.num_outputs(); ++i) {
+      outputs->emplace_back(new_node, i, true);
+    }
+    return Status::OK();
+  }
+};
+
+REGISTER_VECTORIZER("ParseSingleExample", ParseSingleExampleVectorizer);
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index 767f61226b..e82bfb702a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -49,7 +49,7 @@ NodeDef* AddUnstackNode(const string& name, const std::vector<string>& inputs,
 NodeDef* AddMapDefunNode(const string& name, const std::vector<string>& inputs,
                          const std::vector<DataType>& t_arguments,
                          const std::vector<DataType>& output_types,
-                         const std::vector<TensorShape>& output_shapes,
+                         const std::vector<PartialTensorShape>& output_shapes,
                          const string& function_name, FunctionDef* fn) {
   NameAttrList func;
   func.set_name(function_name);
@@ -92,6 +92,9 @@ FunctionDef CreateFunction(
   return func;
 }
 
+///==================================//
+// Tests for vectorization framework //
+///==================================//
 
 // Before:
 //
@@ -124,7 +127,7 @@ FunctionDef CreateFunction(
 // +---------------+ Ret0 +---+ Ret1 +--------+
 //                 +------+   +------+
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
+TEST(VectorizeMapDefunTest, VectorizeWithNoOps) {
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}, {"arg1", DT_INT32}},
                      {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
@@ -178,7 +181,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
 // +---------------+ Ret0 +---+ Ret1 +--------+
 //                 +------+   +------+
 //
-//   where XOp1 is not convertible.
+//   where XOp1 does not have a vectorizer defined.
 //
 // After:
 //
@@ -207,7 +210,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunNoOps) {
 // +---------------+ Ret0 +---+ Ret1 +--------+
 //                 +------+   +------+
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
+TEST(VectorizeMapDefunTest, VectorizeWithUnvectorizableOp) {
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}, {"arg1", DT_INT32}},
                      {{"ret0", DT_INT32}, {"ret1", DT_INT32}},
@@ -250,78 +253,6 @@ TEST(VectorizeMapDefunTest, VectorizeDefunUnconvertible) {
       lib_def.Find(map_defun_node.attr().at("f").func().name());
   EXPECT_EQ(map_defun_fn->signature().output_arg_size(), 1);
 }
-
-// Before:
-//
-//
-//                 +------+
-// +---------------+ Arg0 +---------+
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// |   +-----------+ Arg0 +-----+   |
-// |   |           +---+--+     |   |
-// |   |               |        |   |
-// |   |               |        |   |
-// |   |           +---v--+     |   |
-// |   |           | Cast |     |   |
-// |   |           +---+--+     |   |
-// |   |               |        |   |
-// |   | MapDefun  +---v--+     |   |
-// |   +-----------+ Ret0 +-----+   |
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// +---------------+ Ret0 +---------+
-//                 +------+
-//
-//
-//  After:
-//
-//                 +------+
-// +---------------+ Arg0 +---------+
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// |               | Cast |         |
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// +---------------+ Ret0 +---------+
-//                 +------+
-//
-TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
-  FunctionDef inner =
-      CreateFunction("inner_function", {{"arg0", DT_INT32}},
-                     {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
-  NodeDef* cast_op =
-      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
-  CHECK_NOTNULL(cast_op);
-
-  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
-                                     {{"mapdefun", DT_INT64}},
-                                     {{"mapdefun", "MapDefun:output:0"}});
-
-  NodeDef* map_defun =
-      AddMapDefunNode("MapDefun", {"x"}, {DT_INT32}, {DT_INT64}, {{}},
-                      inner.signature().name(), &outer);
-  CHECK_NOTNULL(map_defun);
-
-  FunctionDefLibrary lib;
-  *lib.add_function() = outer;
-  *lib.add_function() = inner;
-  FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
-  EXPECT_TRUE(
-      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
-  const NodeDef& cast_node = vectorized->node_def(
-      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
-  EXPECT_EQ(cast_node.input(0), "x");
-  EXPECT_EQ(GetRetval(*vectorized, 0),
-            strings::StrCat(cast_node.name(), ":y:0"));
-  EXPECT_EQ(vectorized->node_def_size(), 1);
-}
-
 // Before:
 //
 //                 +------+
@@ -365,7 +296,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunSimpleCast) {
 // +---------------+ Ret0 +---+ Ret1 +--------+
 //                 +------+   +------+
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
+TEST(VectorizeMapDefunTest, VectorizeWithOutputUsedTwice) {
   // Tests that behavior is correct when an output is used more than once.
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}},
@@ -401,98 +332,6 @@ TEST(VectorizeMapDefunTest, VectorizeDefunCastUsedTwice) {
             strings::StrCat(cast_node.name(), ":y:0"));
   EXPECT_EQ(vectorized->node_def_size(), 1);
 }
-
-// Before:
-//
-//                        +------+
-// +----------------------+ Arg0 +----------------------+
-// |                      +---+--+                      |
-// |                          |                         |
-// |                      +---v--+                      |
-// |   +------------------+ Arg0 +------------------+   |
-// |   |                  +---+--+                  |   |
-// |   |                      |                     |   |
-// |   |                      |                     |   |
-// |   |                  +---v---+ num=3           |   |
-// |   |                  |Unstack| axis=0          |   |
-// |   |                  ++--+--++                 |   |
-// |   |                   |  |  |                  |   |
-// |   |              +----+  |  +-------+          |   |
-// |   |              |       |          |          |   |
-// |   | MapDefun +---v--+  +-v----+  +--v---+      |   |
-// |   +----------+ Ret0 +--+ Ret1 +--+ Ret2 +------+   |
-// |              +---+--+  +--+---+  +--+---+          |
-// |                  |        |         |              |
-// |              +---v--+  +--v---+  +--v---+          |
-// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
-//                +------+  +------+  +------+
-//
-//
-//  After:
-//
-//                        +------+
-// +----------------------+ Arg0 +----------------------+
-// |                      +---+--+                      |
-// |                          |                         |
-// |                          |                         |
-// |                          |                         |
-// |                      +---v---+ num=3               |
-// |                      |Unstack| axis=1              |
-// |                      ++--+--++                     |
-// |                       |  |  |                      |
-// |                  +----+  |  +-------+              |
-// |                  |       |          |              |
-// |                  |       |          |              |
-// |              +---v--+  +-v----+  +--v---+          |
-// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
-//                +------+  +------+  +------+
-//
-TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
-  FunctionDef inner = CreateFunction(
-      "inner_function", {{"arg0", DT_INT32}},
-      {{"ret0", DT_INT32}, {"ret1", DT_INT32}, {"ret2", DT_INT32}},
-      {{"ret0", "MyUnstack:output:0"},
-       {"ret1", "MyUnstack:output:1"},
-       {"ret2", "MyUnstack:output:2"}});
-  NodeDef* unstack_op =
-      AddUnstackNode("MyUnstack", {"arg0"}, DT_INT32, 0, 3, &inner);
-  CHECK_NOTNULL(unstack_op);
-
-  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
-                                     {{"mapdefun", DT_INT32},
-                                      {"mapdefun_0", DT_INT32},
-                                      {"mapdefun_1", DT_INT32}},
-                                     {{"mapdefun", "MapDefun:output:0"},
-                                      {"mapdefun_0", "MapDefun:output:1"},
-                                      {"mapdefun_1", "MapDefun:output:2"}});
-
-  NodeDef* map_defun = AddMapDefunNode(
-      "MapDefun", {"x"}, {DT_INT32}, {DT_INT32, DT_INT32, DT_INT32},
-      {{1}, {1}, {1}}, inner.signature().name(), &outer);
-  CHECK_NOTNULL(map_defun);
-
-  FunctionDefLibrary lib;
-  *lib.add_function() = outer;
-  *lib.add_function() = inner;
-  FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
-  EXPECT_TRUE(
-      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
-  const NodeDef& unpack_node = vectorized->node_def(
-      function_utils::FindFunctionNodeWithOp("Unpack", *vectorized));
-  EXPECT_EQ(unpack_node.input(0), "x");
-  EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
-  EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
-  EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
-  EXPECT_EQ(GetRetval(*vectorized, 0),
-            strings::StrCat(unpack_node.name(), ":output:0"));
-  EXPECT_EQ(GetRetval(*vectorized, 1),
-            strings::StrCat(unpack_node.name(), ":output:1"));
-  EXPECT_EQ(GetRetval(*vectorized, 2),
-            strings::StrCat(unpack_node.name(), ":output:2"));
-  EXPECT_EQ(vectorized->node_def_size(), 1);
-}
-
 // Before:
 //
 //                        +------+
@@ -543,7 +382,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunOpWithMultipleOutputs) {
 // +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
 //                +------+  +------+  +------+
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
+TEST(VectorizeMapDefunTest, VectorizeWithChainedConvertibleOps) {
   FunctionDef inner = CreateFunction(
       "inner_function", {{"arg0", DT_INT32}},
       {{"ret0", DT_INT32}, {"ret1", DT_INT32}, {"ret2", DT_INT32}},
@@ -627,7 +466,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunChainedConvertibleOps) {
 //
 //  No change because we don't deal with control inputs for now.
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
+TEST(VectorizeMapDefunTest, VectorizeWithControlInputs) {
   FunctionDef inner =
       CreateFunction("inner_function", {{"arg0", DT_INT32}},
                      {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
@@ -673,77 +512,6 @@ TEST(VectorizeMapDefunTest, VectorizeDefunWithControlInputs) {
               cast_node.input(1) == control_input);
 }
 
-// Before:
-//
-//
-//                 +------+
-// +---------------+ Arg0 +---------+
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// |   +-----------+ Arg0 +-----+   |
-// |   |           +------+     |   |
-// |   |                        |   |
-// |   |                        |   |
-// |   |           +------+     |   |
-// |   |           |Const |     |   |
-// |   |           +---+--+     |   |
-// |   |               |        |   |
-// |   | MapDefun  +---v--+     |   |
-// |   +-----------+ Ret0 +-----+   |
-// |               +---+--+         |
-// |                   |            |
-// |               +---v--+         |
-// +---------------+ Ret0 +---------+
-//                 +------+
-//
-//
-//  After:
-//
-//                 +------+
-// +---------------+ Arg0 +---------+
-// |               +------+         |
-// |                                |
-// |               +------+         |
-// |               |Const |         |
-// |               +---+--+         |
-// |                   |            |
-// |                   |            |
-// |                   |            |
-// |               +---v--+         |
-// |               |Stack*|         |
-// |               +---+--+         |
-// |                   |            |
-// |                   |            |
-// |                   |            |
-// |               +---v--+         |
-// +---------------+ Ret0 +---------+
-//                 +------+
-// *Not actually a Stack node, but does the equivalent.
-//
-TEST(VectorizeMapDefunTest, VectorizeConst) {
-  FunctionDef inner = FunctionDefHelper::Create(
-      "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
-      {/* nodes */ FunctionDefHelper::Const("Const", 2)},
-      {{"ret0", "Const:output:0"}});
-  FunctionDef outer = FunctionDefHelper::Create(
-      "outer_function", {"outer_arg0: int32"}, {"mapdefun: int32"},
-      {/* attrs */}, {/* nodes */}, {{"mapdefun", "MapDefun:output:0"}});
-
-  NodeDef* map_defun =
-      AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_INT32}, {DT_INT32}, {{}},
-                      inner.signature().name(), &outer);
-
-  FunctionDefLibrary lib;
-  *lib.add_function() = outer;
-  *lib.add_function() = inner;
-  FunctionDef* vectorized;
-  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
-  EXPECT_TRUE(
-      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
-  EXPECT_TRUE(function_utils::ContainsFunctionNodeWithOp("Const", *vectorized));
-}
-
 // Before:
 //
 //
@@ -798,7 +566,7 @@ TEST(VectorizeMapDefunTest, VectorizeConst) {
 //                 +------+
 // *Not actually a Stack node, but does the equivalent.
 //
-TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
+TEST(VectorizeMapDefunTest, VectorizeWithUnstackedOutput) {
   FunctionDef inner = FunctionDefHelper::Create(
       "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
       {/* nodes */ FunctionDefHelper::Const("Const", 2)},
@@ -880,7 +648,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedOutput) {
 //                 +------+
 // *Not actually a Stack node, but does the equivalent.
 //
-TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
+TEST(VectorizeMapDefunTest, VectorizeWithUnstackedControl) {
   FunctionDef inner = FunctionDefHelper::Create(
       "inner_function", {"arg0: int32"}, {"ret0: int64"}, {/* attrs */},
       {/* nodes */ FunctionDefHelper::Const("Const", 2),
@@ -924,6 +692,172 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
   EXPECT_EQ(cast_node.input(1), strings::StrCat("^", const_dep_node->name()));
 }
 
+///==================================//
+// Tests for specific op vectorizers //
+///==================================//
+
+// Before:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                      +---v--+                      |
+// |   +------------------+ Arg0 +------------------+   |
+// |   |                  +---+--+                  |   |
+// |   |                      |                     |   |
+// |   |                      |                     |   |
+// |   |                  +---v---+ num=3           |   |
+// |   |                  |Unstack| axis=0          |   |
+// |   |                  ++--+--++                 |   |
+// |   |                   |  |  |                  |   |
+// |   |              +----+  |  +-------+          |   |
+// |   |              |       |          |          |   |
+// |   | MapDefun +---v--+  +-v----+  +--v---+      |   |
+// |   +----------+ Ret0 +--+ Ret1 +--+ Ret2 +------+   |
+// |              +---+--+  +--+---+  +--+---+          |
+// |                  |        |         |              |
+// |              +---v--+  +--v---+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+//
+//  After:
+//
+//                        +------+
+// +----------------------+ Arg0 +----------------------+
+// |                      +---+--+                      |
+// |                          |                         |
+// |                          |                         |
+// |                          |                         |
+// |                      +---v---+ num=3               |
+// |                      |Unstack| axis=1              |
+// |                      ++--+--++                     |
+// |                       |  |  |                      |
+// |                  +----+  |  +-------+              |
+// |                  |       |          |              |
+// |                  |       |          |              |
+// |              +---v--+  +-v----+  +--v---+          |
+// +--------------+ Ret0 +--+ Ret1 +--+ Ret2 +----------+
+//                +------+  +------+  +------+
+//
+TEST(VectorizerTest, VectorizeUnstack) {
+  FunctionDef inner = CreateFunction(
+      "inner_function", {{"arg0", DT_INT32}},
+      {{"ret0", DT_INT32}, {"ret1", DT_INT32}, {"ret2", DT_INT32}},
+      {{"ret0", "MyUnstack:output:0"},
+       {"ret1", "MyUnstack:output:1"},
+       {"ret2", "MyUnstack:output:2"}});
+  NodeDef* unstack_op =
+      AddUnstackNode("MyUnstack", {"arg0"}, DT_INT32, 0, 3, &inner);
+  CHECK_NOTNULL(unstack_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT32},
+                                      {"mapdefun_0", DT_INT32},
+                                      {"mapdefun_1", DT_INT32}},
+                                     {{"mapdefun", "MapDefun:output:0"},
+                                      {"mapdefun_0", "MapDefun:output:1"},
+                                      {"mapdefun_1", "MapDefun:output:2"}});
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"x"}, {DT_INT32}, {DT_INT32, DT_INT32, DT_INT32},
+      {{1}, {1}, {1}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& unpack_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Unpack", *vectorized));
+  EXPECT_EQ(unpack_node.input(0), "x");
+  EXPECT_EQ(unpack_node.attr().at("axis").i(), 1);
+  EXPECT_EQ(unpack_node.attr().at("T").type(), DT_INT32);
+  EXPECT_EQ(unpack_node.attr().at("num").i(), 3);
+  EXPECT_EQ(GetRetval(*vectorized, 0),
+            strings::StrCat(unpack_node.name(), ":output:0"));
+  EXPECT_EQ(GetRetval(*vectorized, 1),
+            strings::StrCat(unpack_node.name(), ":output:1"));
+  EXPECT_EQ(GetRetval(*vectorized, 2),
+            strings::StrCat(unpack_node.name(), ":output:2"));
+  EXPECT_EQ(vectorized->node_def_size(), 1);
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |   +-----------+ Arg0 +-----+   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   |               |        |   |
+// |   |           +---v--+     |   |
+// |   |           | Cast |     |   |
+// |   |           +---+--+     |   |
+// |   |               |        |   |
+// |   | MapDefun  +---v--+     |   |
+// |   +-----------+ Ret0 +-----+   |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+//
+//  After:
+//
+//                 +------+
+// +---------------+ Arg0 +---------+
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// |               | Cast |         |
+// |               +---+--+         |
+// |                   |            |
+// |               +---v--+         |
+// +---------------+ Ret0 +---------+
+//                 +------+
+//
+TEST(VectorizerTest, VectorizeCast) {
+  FunctionDef inner =
+      CreateFunction("inner_function", {{"arg0", DT_INT32}},
+                     {{"ret0", DT_INT64}}, {{"ret0", "Cast:y:0"}});
+  NodeDef* cast_op =
+      AddCastNode("Cast", {"arg0"}, DT_INT32, DT_INT64, false, &inner);
+  CHECK_NOTNULL(cast_op);
+
+  FunctionDef outer = CreateFunction("outer_function", {{"x", DT_INT32}},
+                                     {{"mapdefun", DT_INT64}},
+                                     {{"mapdefun", "MapDefun:output:0"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"x"}, {DT_INT32}, {DT_INT64}, {{}},
+                      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  const NodeDef& cast_node = vectorized->node_def(
+      function_utils::FindFunctionNodeWithOp("Cast", *vectorized));
+  EXPECT_EQ(cast_node.input(0), "x");
+  EXPECT_EQ(GetRetval(*vectorized, 0),
+            strings::StrCat(cast_node.name(), ":y:0"));
+  EXPECT_EQ(vectorized->node_def_size(), 1);
+}
+
 // Before:
 //
 //                   +------+
@@ -977,7 +911,7 @@ TEST(VectorizeMapDefunTest, VectorizeUnstackedControl) {
 // +-----------------------+ Ret  +-----------+
 //                         +------+
 //
-TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
+TEST(VectorizerTest, VectorizeAdd) {
   // Note that this checks that the "Add" vectorizer is successful, but does not
   // check that the transformed function is correct (i.e. produces the same
   // output as the unvectorized map defun). For the latter, the tests are in
@@ -1065,7 +999,7 @@ TEST(VectorizeMapDefunTest, VectorizeDefunAdd) {
 //
 // (Where Concat* appends the 0th dim of the input to the new shape)
 //
-TEST(VectorizeMapDefunTest, VectorizeReshape) {
+TEST(VectorizerTest, VectorizeReshape) {
   FunctionDef inner = FunctionDefHelper::Create(
       "inner_function", {"arg0: int32"}, {"ret0: int32"}, {/* attrs */},
       {/* nodes */ FunctionDefHelper::Const("Const",
@@ -1100,6 +1034,302 @@ TEST(VectorizeMapDefunTest, VectorizeReshape) {
             strings::StrCat(reshape_node.name(), ":output:0"));
 }
 
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------------------+
+// |               +---+--+                     |
+// |                   |                        |
+// |               +---v--+                     |
+// |   +-----------+ Arg0 +-----------------+   |
+// |   |           +---+--+                 |   |
+// |   |               |                    |   |
+// |   |               |   record_defaults  |   |
+// |   |               |   +-----+  +-----+ |   |
+// |   |               |   |Const|  |Const| |   |
+// |   |               |   +--+--+  +--+--+ |   |
+// |   |               |      |        |    |   |
+// |   |               | +----+        |    |   |
+// |   |               | |             |    |   |
+// |   |               | | +-----------+    |   |
+// |   |               | | |                |   |
+// |   |           +---v-v-v-+              |   |
+// |   |           |DecodeCSV|              |   |
+// |   |           +---+---+-+              |   |
+// |   |               |   |                |   |
+// |   |               |   +------+         |   |
+// |   |               |          |         |   |
+// |   | MapDefun  +---v--+   +---v--+      |   |
+// |   +-----------+ Ret0 +---+ Ret1 +------+   |
+// |               +---+--+   +---+--+          |
+// |                   |          |             |
+// |               +---v--+   +---v--+          |
+// +---------------+ Ret0 +---+ Ret1 +----------+
+//                 +------+   +------+
+//
+//  After:
+//
+//           +------+
+// +---------+ Arg0 +------------------------+
+// |         +---+--+                        |
+// |             |                           |
+// |             |                           |
+// |             |     +-----+ +-----+       |
+// |             |     |Const| |Const|       |
+// |             |     +--+--+ +--+--+       |
+// |             |        |       |          |
+// |             |        |       |          |
+// |             | +------+       |          |
+// |             | | +------------+          |
+// |             | | |                       |
+// |             | | |                       |
+// |         +---v-v-v-+                     |
+// |         |DecodeCSV|                     |
+// |         +---+---+-+                     |
+// |             |   |                       |
+// |             |   +-------+               |
+// |             |           |               |
+// |           +-v----+   +--v---+           |
+// +-----------+ Ret0 +---+ Ret1 +-----------+
+//             +------+   +------+
+//
+//
+TEST(VectorizerTest, VectorizeDecodeCSV) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: string"}, {"ret0: int32", "ret1: string"},
+      {/* attrs */},
+      {FunctionDefHelper::Const("Default0", gtl::ArraySlice<int>({2})),
+       FunctionDefHelper::Const("Default1", gtl::ArraySlice<string>({})),
+       {{"DecodeCSV"},
+        "DecodeCSV",
+        {"arg0", "Default0:output:0", "Default1:output:0"},
+        {{"OUT_TYPE", DataTypeVector({DT_INT32, DT_STRING})}}}},
+      {{"ret0", "DecodeCSV:output:0"}, {"ret1", "DecodeCSV:output:1"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: string"},
+      {"mapdefun: int32", "mapdefun_0: string"}, {/* attrs */}, {/* nodes */},
+      {{"mapdefun", "MapDefun:output:0"}, {"mapdefun_0", "MapDefun:output:1"}});
+
+  NodeDef* map_defun = AddMapDefunNode("MapDefun", {"outer_arg0"}, {DT_STRING},
+                                       {DT_INT32, DT_STRING}, {{}, {}},
+                                       inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
+
+TEST(VectorizerTest, VectorizeDecodeCSVWithStackedDefaults) {
+  // When the `record_defaults` input to DecodeCSV are stacked,
+  // the node should not be vectorized.
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: string", "arg1: int32", "arg2: string"},
+      {"ret0: int32", "ret1: string"}, {/* attrs */},
+      {{{"DecodeCSV"},
+        "DecodeCSV",
+        {"arg0", "arg1", "arg2"},  // Inputs come from args, which are "stacked"
+        {{"OUT_TYPE", DataTypeVector({DT_INT32, DT_STRING})}}}},
+      {{"ret0", "DecodeCSV:output:0"}, {"ret1", "DecodeCSV:output:1"}});
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function",
+      {"outer_arg0: string", "outer_arg1: int32", "outer_arg2: string"},
+      {"mapdefun: int32", "mapdefun_0: string"}, {/* attrs */}, {/* nodes */},
+      {{"mapdefun", "MapDefun:output:0"}, {"mapdefun_0", "MapDefun:output:1"}});
+
+  NodeDef* map_defun =
+      AddMapDefunNode("MapDefun", {"outer_arg0", "outer_arg1", "outer_arg2"},
+                      {DT_STRING, DT_INT32, DT_STRING}, {DT_INT32, DT_STRING},
+                      {{}, {}}, inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
+
+// Before:
+//
+//
+//                 +------+
+// +---------------+ Arg0 +---------------------+
+// |               +---+--+                     |
+// |                   |                        |
+// |               +---v--+                     |
+// |   +-----------+ Arg0 +-----------------+   |
+// |   |           +---+--+                 |   |
+// |   |               |                    |   |
+// |   |               |   dense_defaults   |   |
+// |   |               |   +-----+  +-----+ |   |
+// |   |               |   |Const|  |Const| |   |
+// |   |               |   +--+--+  +--+--+ |   |
+// |   |               |      |        |    |   |
+// |   |               | +----+        |    |   |
+// |   |               | |             |    |   |
+// |   |               | | +-----------+    |   |
+// |   |               | | |                |   |
+// |   |           +---v-v-v----------+     |   |
+// |   |           |ParseSingleExample|     |   |
+// |   |           +---+---+----------+     |   |
+// |   |               |                    |   |
+// |   |             (...)                  |   |
+// |   |               |                    |   |
+// |   | MapDefun  +---v--+                 |   |
+// |   +-----------+ Rets*+-----------------+   |
+// |               +---+--+                     |
+// |                   |                        |
+// |               +---v--+                     |
+// +---------------+ Rets*+---------------------+
+//                 +------+
+//
+//  After:
+//
+//           +------+
+// +---------+ Arg0 +------------------------------------+
+// |         +---+--+                                    |
+// |             |                                       |
+// |             |   names                               |
+// |             |   sparse_types                        |
+// |             |   dense_types   dense_defaults        |
+// |             |  +============+ +-----+ +-----+       |
+// |             |  |  Consts*   | |Const| |Const|       |
+// |             |  +============+ +--+--+ +--+--+       |
+// |             |       |            |       |          |
+// |             |     (...)          |       |          |
+// |             |       |     +------+       |          |
+// |             |       |     | +------------+          |
+// |             |       |     | |                       |
+// |             |       |     | |                       |
+// |         +---v-------v-----v-v-+                     |
+// |         |  ParseExample       |                     |
+// |         +---+-----------------+                     |
+// |             |                                       |
+// |           (...)                                     |
+// |             |                                       |
+// |           +-v----+                                  |
+// +-----------+ Rets*+----------------------------------+
+//             +------+
+//
+// *Multiple nodes. Only one drawn for brevity.
+//
+TEST(VectorizerTest, VectorizeParseSingleExample) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: string"},
+      {"si0: int64", "si1: int64", "sv0: int64", "sv1: string", "ss0: int64",
+       "ss1: int64", "dv0: int64", "dv1: string"},
+      {/* attrs */},
+      {FunctionDefHelper::Const("DenseIntDefault", static_cast<int64>(0)),
+       FunctionDefHelper::Const("DenseStrDefault", string("")),
+       {{"Parse"},
+        "ParseSingleExample",
+        {"arg0", "DenseIntDefault:output:0", "DenseStrDefault:output:0"},
+        {
+            {"Tdense", DataTypeVector({DT_INT64, DT_STRING})},
+            {"dense_keys", gtl::ArraySlice<string>({"dense_int", "dense_str"})},
+            {"dense_shapes", gtl::ArraySlice<TensorShape>({}, {})},
+            {"num_sparse", 2},
+            {"sparse_keys", gtl::ArraySlice<string>({"spar_int", "spar_str"})},
+            {"sparse_types", DataTypeVector({DT_INT64, DT_STRING})},
+        }}},
+      {
+          {"si0", "Parse:sparse_indices:0"},
+          {"si1", "Parse:sparse_indices:1"},
+          {"sv0", "Parse:sparse_values:0"},
+          {"sv1", "Parse:sparse_values:1"},
+          {"ss0", "Parse:sparse_shapes:0"},
+          {"ss1", "Parse:sparse_shapes:1"},
+          {"dv0", "Parse:dense_values:0"},
+          {"dv1", "Parse:dense_values:1"},
+      });
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: string"},
+      {"si0: int64", "si1: int64", "sv0: int64", "sv1: string", "ss0: int64",
+       "ss1: int64", "dv0: int64", "dv1: string"},
+      {/* attrs */}, {/* nodes */},
+      {
+          {"si0", "MapDefun:output:0"},
+          {"si1", "MapDefun:output:1"},
+          {"sv0", "MapDefun:output:2"},
+          {"sv1", "MapDefun:output:3"},
+          {"ss0", "MapDefun:output:4"},
+          {"ss1", "MapDefun:output:5"},
+          {"dv0", "MapDefun:output:6"},
+          {"dv1", "MapDefun:output:7"},
+      });
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"outer_arg0"}, {DT_STRING},
+      {DT_INT64, DT_INT64, DT_INT64, DT_STRING, DT_INT64, DT_INT64, DT_INT64,
+       DT_STRING},
+      std::vector<PartialTensorShape>(8), inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      !function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+  EXPECT_TRUE(
+      function_utils::ContainsFunctionNodeWithOp("ParseExample", *vectorized));
+}
+
+TEST(VectorizerTest, VectorizeParseSingleExampleWithStackedDefaults) {
+  FunctionDef inner = FunctionDefHelper::Create(
+      "inner_function", {"arg0: string", "arg1: string"},
+      {"dv0: int64", "dv1: string"}, {/* attrs */},
+      {FunctionDefHelper::Const("DenseIntDefault", static_cast<int64>(0)),
+       {{"Parse"},
+        "ParseSingleExample",
+        {"arg0", "DenseIntDefault:output:0", "arg1"},
+        {
+            {"Tdense", DataTypeVector({DT_INT64, DT_STRING})},
+            {"dense_keys", gtl::ArraySlice<string>({"dense_int", "dense_str"})},
+            {"dense_shapes", gtl::ArraySlice<TensorShape>({}, {})},
+            {"num_sparse", 0},
+            {"sparse_keys", gtl::ArraySlice<string>({})},
+            {"sparse_types", DataTypeVector({})},
+        }}},
+      {
+          {"dv0", "Parse:dense_values:0"},
+          {"dv1", "Parse:dense_values:1"},
+      });
+
+  FunctionDef outer = FunctionDefHelper::Create(
+      "outer_function", {"outer_arg0: string", "outer_arg1: string"},
+      {"dv0: int64", "dv1: string"}, {/* attrs */}, {/* nodes */},
+      {
+          {"dv0", "MapDefun:output:0"},
+          {"dv1", "MapDefun:output:1"},
+      });
+
+  NodeDef* map_defun = AddMapDefunNode(
+      "MapDefun", {"outer_arg0", "outer_arg1"}, {DT_STRING, DT_STRING},
+      {DT_INT64, DT_STRING}, std::vector<PartialTensorShape>(8),
+      inner.signature().name(), &outer);
+  CHECK_NOTNULL(map_defun);
+
+  FunctionDefLibrary lib;
+  *lib.add_function() = outer;
+  *lib.add_function() = inner;
+  FunctionDef* vectorized;
+  TF_EXPECT_OK(VectorizeMapDefun(outer, *map_defun, &lib, &vectorized));
+  EXPECT_TRUE(
+      function_utils::ContainsFunctionNodeWithOp("MapDefun", *vectorized));
+}
 }  // namespace
 }  // namespace vectorization_utils
 }  // namespace grappler
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index 5a0a73fd83..8c54fe967e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -79,6 +79,7 @@ py_test(
         "no_windows",
     ],
     deps = [
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
@@ -86,6 +87,7 @@ py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
+        "//tensorflow/python:parsing_ops",
         "//tensorflow/python:session",
         "//tensorflow/python/data/experimental/ops:optimization",
         "//tensorflow/python/data/kernel_tests:test_base",
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index eb24fa6462..91b4e401a9 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -22,6 +22,8 @@ import time
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
@@ -31,12 +33,69 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
+def _generate_csv_test_case():
+
+  def csv_factory():
+    return dataset_ops.Dataset.from_tensor_slices(["1.0:2:a",
+                                                   "2.4:5:c"]).repeat(5)
+
+  def decode_csv_fn(x):
+    return parsing_ops.decode_csv(
+        x,
+        record_defaults=[
+            constant_op.constant([], dtypes.float32),
+            constant_op.constant([], dtypes.int32),
+            constant_op.constant([], dtypes.string)
+        ],
+        field_delim=":")
+
+  return decode_csv_fn, csv_factory
+
+
+def _generate_parse_single_example_test_case():
+
+  def parse_example_factory():
+
+    def _int64_feature(*values):
+      return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values))
+
+    def _bytes_feature(*values):
+      return feature_pb2.Feature(
+          bytes_list=feature_pb2.BytesList(
+              value=[v.encode("utf-8") for v in values]))
+
+    return dataset_ops.Dataset.from_tensor_slices(
+        constant_op.constant([
+            example_pb2.Example(
+                features=feature_pb2.Features(
+                    feature={
+                        "dense_int": _int64_feature(i),
+                        "dense_str": _bytes_feature(str(i)),
+                        "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8),
+                        "sparse_str": _bytes_feature(*["abc"] * i)
+                    })).SerializeToString() for i in range(10)
+        ]))
+
+  def parse_single_example_fn(x):
+    features = {
+        "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0),
+        "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""),
+        "sparse_int": parsing_ops.VarLenFeature(dtypes.int64),
+        "sparse_str": parsing_ops.VarLenFeature(dtypes.string),
+    }
+    return parsing_ops.parse_single_example(x, features)
+
+  return parse_single_example_fn, parse_example_factory
+
+
 def _generate_optimization_test_cases():
 
   def base_dataset_factory():
@@ -44,6 +103,18 @@ def _generate_optimization_test_cases():
 
   rand_val = np.random.rand(1, 1, 1, 1, 1, 1)
 
+  csv_test_case = _generate_csv_test_case()
+  parse_fn, parse_base = _generate_parse_single_example_test_case()
+
+  def dense_output_only_parse_fn(x):
+    # Since we haven't implemented a vectorizer for SerializeSparse, any
+    # function with sparse outputs will only be naively vectorized.
+    parse_result = parse_fn(x)
+    return [
+        y for y in parse_result if not isinstance(y, sparse_tensor.SparseTensor)
+    ]
+
+  # Misc test cases
   test_cases = [
       ("Basic", lambda x: (x, x + 1), base_dataset_factory),
       ("Const", lambda x: 2, base_dataset_factory),
@@ -56,6 +127,11 @@ def _generate_optimization_test_cases():
       ("Unpack", array_ops.unstack, base_dataset_factory),
       ("UnpackNegativeAxis", lambda x: array_ops.unstack(x, axis=-1),
        base_dataset_factory),
+      # Parsing ops
+      ("DecodeCSV", csv_test_case[0], csv_test_case[1]),
+      ("ParseSingleExample", parse_fn, parse_base),
+      ("ParseSingleExampleDenseOutputOnly", dense_output_only_parse_fn,
+       parse_base),
   ]
 
   return [{
@@ -79,7 +155,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
                          expect_optimized=True):
     """Given base dataset and map fn, creates test datasets.
 
-    Returns a tuple of (unoptimized, dataset, optimized dataset). The
+    Returns a tuple of (unoptimized dataset, optimized dataset). The
     unoptimized dataset has the assertion that Batch follows Map. The optimized
     dataset has the assertion that Map follows Batch, and has the
     "map_vectorization" optimization applied.
@@ -205,7 +281,7 @@ class MapVectorizationBenchmark(test.Benchmark):
     return median_time
 
   def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
-    num_elems = np.sum([np.prod(x) for x in input_size])
+    num_elems = int(np.sum([np.prod(x) for x in input_size]))
     name_template = "{}__batch_size_{}_input_element_size_{}_{}"
     unoptimized = input_dataset.map(map_fn).batch(batch_size)
     unoptimized_op = unoptimized.make_one_shot_iterator().get_next()
@@ -251,10 +327,22 @@ class MapVectorizationBenchmark(test.Benchmark):
     self._benchmark_helper(
         lambda *args: [array_ops.reshape(x, (-1, 30)) for x in args], "reshape")
 
+  def benchmarkDecodeCSV(self):
+    csv_fn, csv_factory = _generate_csv_test_case()
+    self._benchmark_helper(csv_fn, "decode_csv", lambda: [csv_factory()])
+
+  def benchmarkParseSingleExample(self):
+    # NOTE: Since we haven't implemented a vectorizer for "SerializeSparse",
+    # this function is only naively vectorized.
+    parse_fn, parse_factory = _generate_parse_single_example_test_case()
+
+    self._benchmark_helper(parse_fn, "parse_single_example",
+                           lambda: [parse_factory()])
+
   def _default_dataset_factory(self):
     input_sizes = [(10, 10, 3), (10, 100, 300)]
     for sz in input_sizes:
-      yield dataset_ops.Dataset.from_tensor_slices(np.random.rand(*sz)).repeat()
+      yield dataset_ops.Dataset.from_tensor_slices(np.random.rand(*sz))
 
   def _benchmark_helper(self, map_fn, str_id, base_dataset_factory=None):
     if base_dataset_factory is None:
@@ -262,6 +350,7 @@ class MapVectorizationBenchmark(test.Benchmark):
 
     batch_size = 1000
     for base_dataset in base_dataset_factory():
+      base_dataset = base_dataset.repeat()
       input_size = [
           tuple(shape.as_list())
           for shape in nest.flatten(base_dataset.output_shapes)
-- 
GitLab


From 0875270b8fbb40739654a84f3c19673ef885a70e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 21:43:06 -0700
Subject: [PATCH 0938/1085] Change default value of initial_sparsity from 0 to
 0.0

Otherwise initial_sparsity would be interpreted as int, which would cause type
mismatch when specified by an external float value.

PiperOrigin-RevId: 216964332
---
 tensorflow/contrib/model_pruning/python/pruning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index 67e58ff15d..d2b8116417 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -210,7 +210,7 @@ def get_pruning_hparams():
       block_height=1,
       block_width=1,
       block_pooling_function='AVG',
-      initial_sparsity=0,
+      initial_sparsity=0.0,
       target_sparsity=0.5,
       sparsity_function_begin_step=0,
       sparsity_function_end_step=100,
-- 
GitLab


From 3e94e19e703feb10a1b113917389b062bd25afb9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 12 Oct 2018 21:46:31 -0700
Subject: [PATCH 0939/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 216964494

---
 tensorflow/go/op/wrappers.go | 312 +++++++++++++++++------------------
 1 file changed, 156 insertions(+), 156 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 0bd1e07dae..2031e60773 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4687,6 +4687,162 @@ func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_va
 	return op.Output(0), op.Output(1)
 }
 
+// ShapeNAttr is an optional argument to ShapeN.
+type ShapeNAttr func(optionalAttr)
+
+// ShapeNOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func ShapeNOutType(value tf.DataType) ShapeNAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns shape of tensors.
+//
+// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
+func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ShapeN",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("ShapeN", err)
+		return
+	}
+	return output
+}
+
+// CudnnRNNParamsToCanonicalAttr is an optional argument to CudnnRNNParamsToCanonical.
+type CudnnRNNParamsToCanonicalAttr func(optionalAttr)
+
+// CudnnRNNParamsToCanonicalRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNParamsToCanonicalRnnMode(value string) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNParamsToCanonicalInputMode(value string) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNParamsToCanonicalDirection(value string) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalDropout(value float32) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalSeed(value int64) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalSeed2(value int64) CudnnRNNParamsToCanonicalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Retrieves CudnnRNN params in canonical form.
+//
+// Retrieves a set of weights from the opaque params buffer that can be saved and
+// restored in a way compatible with future runs.
+//
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// num_params: number of parameter sets for all layers.
+//     Each layer may contain multiple parameter sets, with each set consisting of
+//     a weight matrix and a bias vector.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_params": num_params}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNParamsToCanonical",
+		Input: []tf.Input{
+			num_layers, num_units, input_size, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
+		return
+	}
+	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
+		return
+	}
+	return weights, biases
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -33002,159 +33158,3 @@ func CudnnRNNV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Out
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
-
-// ShapeNAttr is an optional argument to ShapeN.
-type ShapeNAttr func(optionalAttr)
-
-// ShapeNOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func ShapeNOutType(value tf.DataType) ShapeNAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns shape of tensors.
-//
-// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
-func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ShapeN",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("ShapeN", err)
-		return
-	}
-	return output
-}
-
-// CudnnRNNParamsToCanonicalAttr is an optional argument to CudnnRNNParamsToCanonical.
-type CudnnRNNParamsToCanonicalAttr func(optionalAttr)
-
-// CudnnRNNParamsToCanonicalRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNParamsToCanonicalRnnMode(value string) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNParamsToCanonicalInputMode(value string) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNParamsToCanonicalDirection(value string) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalDropout(value float32) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalSeed(value int64) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalSeed2(value int64) CudnnRNNParamsToCanonicalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Retrieves CudnnRNN params in canonical form.
-//
-// Retrieves a set of weights from the opaque params buffer that can be saved and
-// restored in a way compatible with future runs.
-//
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// num_params: number of parameter sets for all layers.
-//     Each layer may contain multiple parameter sets, with each set consisting of
-//     a weight matrix and a bias vector.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_params": num_params}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNParamsToCanonical",
-		Input: []tf.Input{
-			num_layers, num_units, input_size, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
-		return
-	}
-	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonical", err)
-		return
-	}
-	return weights, biases
-}
-- 
GitLab


From 0f13eb912c48a7d9c5f4122a18a5e617222ae0ce Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Fri, 12 Oct 2018 23:08:36 -0700
Subject: [PATCH 0940/1085] [XLA:GPU] Elide tuple roots of the entry
 computation

The tuple buffer is never read, so stop emitting code to fill it. A typical
root tuple consists of a H2D memcpy and a host callback, both of which are
somewhat slow.

This helps tiny models and inference benchmarks, where the host/device syncs
can be a significant part of the runtime of the entire computation.

PiperOrigin-RevId: 216968475
---
 .../compiler/xla/service/gpu/ir_emitter_unnested.cc       | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 008398328c..eb8aaaea4f 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1728,6 +1728,14 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
 }
 
 Status IrEmitterUnnested::HandleTuple(HloInstruction* tuple) {
+  // For the root node of the entry computation we can elide writing the tuple
+  // buffer. We can always figure out the contents of the tuples from buffer
+  // assignment because we insert copies to ensure non-ambiguous output buffers.
+  // GpuExecutable never reads the tuple buffer.
+  if (tuple ==
+      tuple->parent()->parent()->entry_computation()->root_instruction()) {
+    return Status::OK();
+  }
   bool all_tuple_elements_have_buffer =
       absl::c_all_of(tuple->operands(), [&](HloInstruction* tuple_element) {
         return ir_emitter_context_->buffer_assignment()
-- 
GitLab


From 573985cacba7ba36032fe3ac55f4ca9b52e08033 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 13 Oct 2018 02:02:22 -0700
Subject: [PATCH 0941/1085] compat: Update forward compatibility horizon to
 2018-10-13

PiperOrigin-RevId: 216976797
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 676fc869e4..15b34abc96 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 12)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 13)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From d84bfa45172da3af2b487593fb0cac1756f4fc0d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 13 Oct 2018 12:41:01 -0700
Subject: [PATCH 0942/1085] Removed #includes of protobuf full headers when
 TENSORFLOW_LITE_PROTOS is defined. This requires #ifdef'ing out the code to
 use JSON parsing/serialization.

PiperOrigin-RevId: 217003132
---
 .../core/platform/default/human_readable_json.cc      |  9 +++++++++
 tensorflow/core/platform/default/protobuf.h           | 11 +++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index 6bf2106f6e..9f97c8272c 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc
@@ -22,6 +22,10 @@ namespace tensorflow {
 
 Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
                                 string* result) {
+#ifdef TENSORFLOW_LITE_PROTOS
+  *result = "[human readable output not available on Android]";
+  return Status::OK();
+#else
   result->clear();
 
   auto status = google::protobuf::util::MessageToJsonString(proto, result);
@@ -34,10 +38,14 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
+#endif
 }
 
 Status HumanReadableJsonToProto(const string& str,
                                 ::google::protobuf::Message* proto) {
+#ifdef TENSORFLOW_LITE_PROTOS
+  return errors::Internal("Cannot parse JSON protos on Android");
+#else
   proto->Clear();
   auto status = google::protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
@@ -49,6 +57,7 @@ Status HumanReadableJsonToProto(const string& str,
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
+#endif
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/default/protobuf.h b/tensorflow/core/platform/default/protobuf.h
index bd9d41c62b..2708d6ebda 100644
--- a/tensorflow/core/platform/default/protobuf.h
+++ b/tensorflow/core/platform/default/protobuf.h
@@ -19,18 +19,21 @@ limitations under the License.
 // IWYU pragma: private, include "third_party/tensorflow/core/platform/protobuf.h"
 // IWYU pragma: friend third_party/tensorflow/core/platform/protobuf.h
 
-#include "google/protobuf/arena.h"
+#ifndef TENSORFLOW_LITE_PROTOS
 #include "google/protobuf/descriptor.h"
 #include "google/protobuf/descriptor.pb.h"
 #include "google/protobuf/dynamic_message.h"
+#include "google/protobuf/text_format.h"
+#include "google/protobuf/util/json_util.h"
+#include "google/protobuf/util/type_resolver_util.h"
+#endif
+
+#include "google/protobuf/arena.h"
 #include "google/protobuf/io/coded_stream.h"
 #include "google/protobuf/io/zero_copy_stream.h"
 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
 #include "google/protobuf/map.h"
 #include "google/protobuf/repeated_field.h"
-#include "google/protobuf/text_format.h"
-#include "google/protobuf/util/json_util.h"
-#include "google/protobuf/util/type_resolver_util.h"
 
 namespace tensorflow {
 namespace protobuf = ::google::protobuf;
-- 
GitLab


From f667188daefab3650a07eaa578c78af7c96ff14b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 13 Oct 2018 15:58:09 -0700
Subject: [PATCH 0943/1085] In TPUEstimator, allow input_fn to return features
 and labels of any arbitrary nested structure rather than just tensor or dict
 or tensors.

The current restriction seems to be historical.  The code in _ModelFnWrapper uses nest flattens already, and then in _call_model_fn, 'features' and 'labels' are passed directly to the user code model_fn, so any nested structure should be supported.  Before entering ModelFnWrapper, the _InputPipeline and _Inputs classes also both handle nested structure as well.

PiperOrigin-RevId: 217010800
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 315 +++++++++---------
 1 file changed, 153 insertions(+), 162 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index a00acdcbce..1a34d017eb 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -76,7 +76,6 @@ from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 
-
 _INITIAL_LOSS = 1e7
 _ZERO_LOSS = 0.
 _TPU_ESTIMATOR = 'tpu_estimator'
@@ -96,7 +95,6 @@ _REWRITE_FOR_INFERENCE_MODE = '_rewrite_for_inference'
 # off of using _USE_TPU_KEY.
 _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY]
 
-
 # TODO(b/65703635): Flip the value and remove all dead code. Currently, this is
 # only used for per-core based deployments. For per-host based pipelines, if a
 # user returns a Dataset instance it will be automatically wrapped in a
@@ -104,7 +102,6 @@ _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY, _CTX_KEY]
 # explicitly).
 _WRAP_INPUT_FN_INTO_WHILE_LOOP = False
 
-
 ops.register_proto_function(
     '{}_{}'.format(_TPU_ESTIMATOR, _ITERATIONS_PER_LOOP_VAR),
     proto_type=variable_pb2.VariableDef,
@@ -206,8 +203,8 @@ def _increase_eval_step_op(iterations_per_loop):
   """Returns an op to increase the eval step for TPU evaluation.
 
   Args:
-    iterations_per_loop: Tensor. The number of eval steps running in TPU
-        system before returning to CPU host for each `Session.run`.
+    iterations_per_loop: Tensor. The number of eval steps running in TPU system
+      before returning to CPU host for each `Session.run`.
 
   Returns:
     An operation
@@ -298,9 +295,8 @@ class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=prote
 
     for hook in training_hooks + evaluation_hooks + prediction_hooks:
       if not isinstance(hook, session_run_hook.SessionRunHook):
-        raise TypeError(
-            'All hooks must be SessionRunHook instances, given: {}'.format(
-                hook))
+        raise TypeError('All hooks must be SessionRunHook instances, given: {}'
+                        .format(hook))
 
     return super(TPUEstimatorSpec, cls).__new__(
         cls,
@@ -372,7 +368,7 @@ class _OpQueueContext(object):
       yield iterations
 
   def join(self):
-    logging.info('Shutting down %s thread.' % self._name)
+    logging.info('Shutting down %s thread.', self._name)
     self.stop()
     self._thread.join()
 
@@ -518,7 +514,10 @@ class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
 
   def __init__(self, ctx, enqueue_ops, dequeue_ops, rendezvous=None):
     super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
-        ctx, enqueue_ops, dequeue_ops, run_infeed_loop_on_coordinator=False,
+        ctx,
+        enqueue_ops,
+        dequeue_ops,
+        run_infeed_loop_on_coordinator=False,
         rendezvous=rendezvous)
 
   def _create_infeed_controller(self, name, target, args):
@@ -668,8 +667,7 @@ def generate_per_core_enqueue_ops_fn_for_host(
         user_context = tpu_context.TPUContext(
             internal_ctx=ctx,
             input_device=host_device,
-            invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal
-        )
+            invocation_index=host_id * ctx.num_of_cores_per_host + core_ordinal)
         inputs = _Inputs.from_input_fn(input_fn(user_context))
         if inputs.is_dataset:
           raise TypeError(
@@ -706,9 +704,7 @@ def generate_per_host_enqueue_ops_fn_for_host(
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
-        internal_ctx=ctx,
-        input_device=device,
-        invocation_index=host_id)
+        internal_ctx=ctx, input_device=device, invocation_index=host_id)
     inputs = _Inputs.from_input_fn(input_fn(user_context))
 
     is_dataset = inputs.is_dataset
@@ -720,7 +716,8 @@ def generate_per_host_enqueue_ops_fn_for_host(
       if batch_axis is not None:
         raise TypeError('For mode PREDICT, batch_axis is not supported yet.')
       inputs = _InputsWithStoppingSignals(
-          dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn,
+          dataset=inputs.dataset,
+          batch_size=ctx.batch_size_for_input_fn,
           add_padding=True)
 
     if is_dataset:
@@ -781,9 +778,7 @@ def generate_per_host_v2_enqueue_ops_fn_for_host(
 
   with ops.device(device):
     user_context = tpu_context.TPUContext(
-        internal_ctx=ctx,
-        input_device=device,
-        invocation_index=host_id)
+        internal_ctx=ctx, input_device=device, invocation_index=host_id)
     inputs = _Inputs.from_input_fn(input_fn(user_context))
 
     is_dataset = inputs.is_dataset
@@ -964,12 +959,12 @@ class _InputPipeline(object):
   may expect multiple `features` and `labels` tuples one for each core.
 
   TPUEstimator allows various different structures for inputs (namely `features`
-  and `labels`).  `features` can be `Tensor`, dict of string name to `Tensor`,
-  or nested tuples and `labels` could be `None`, `Tensor`, or dict of string
-  name to `Tensor`. TPU infeed/outfeed library expects flattened tensor list.
-  So, `features` and `labels` need to be flattened, before infeed enqueue, and
-  the structure of them needs to be recorded, in order to restore them after
-  infeed dequeue.
+  and `labels`).  Both `features` and `labels` can be any nested sturcture
+  supported by TF nest (namely, dict, tuples, namedtuples or any nested
+  structure of such of Tensors).  `labels` could be `None` as well.
+
+  These are flattened before they are passed to the infeed/outfeed library
+  as that expectes flattend lists.
   """
 
   class InputsStructureRecorder(object):
@@ -1456,18 +1451,16 @@ class _ModelFnWrapper(object):
       if tensor.shape[0].value is None:
         raise ValueError(
             'The tensor with key ({}) in TPUEstimatorSpec.predictions has '
-            'dynamic shape (should be static). Tensor: {}'.format(
-                key, tensor))
+            'dynamic shape (should be static). Tensor: {}'.format(key, tensor))
     return predictions
 
-  def _validate_model_features_and_labels(self,
-                                          features,
-                                          labels,
+  def _validate_model_features_and_labels(self, features, labels,
                                           is_export_mode):
     """Validates that the features and labels for the model function are valid.
 
     A valid features/labels object is the one with:
-    - Type: Tensor or a dictionary of Tensors
+    - Type: A tensor or any nested structure of tensors supported by TF nest,
+        namely nested dictionary, tuple, namedtuple, or sequence of tensors.
     - Static shape if is_export_mode is False.
 
     Args:
@@ -1482,11 +1475,6 @@ class _ModelFnWrapper(object):
 
     def validate(obj, obj_name):
       """Helper validate function."""
-      if not isinstance(obj, ops.Tensor) and not isinstance(obj, dict):
-        raise TypeError(
-            'The {} to the model returned by input_fn must be either a Tensor '
-            'or a dictionary of Tensors. {}: {}'.format(obj_name, obj_name,
-                                                        obj))
       if is_export_mode or self._ctx.is_running_on_cpu(is_export_mode):
         return
       if isinstance(obj, ops.Tensor):
@@ -1495,14 +1483,11 @@ class _ModelFnWrapper(object):
               'The {} to the model returned by input_fn must have static shape.'
               ' Tensor: {}'.format(obj_name, obj))
       else:
-        for (key, value) in obj.items():
-          flattened_tensors = data_nest.flatten(value)
-          for tensor in flattened_tensors:
-            if not tensor.get_shape().is_fully_defined():
-              raise ValueError(
-                  'The {} to the model returned by input_fn must have static '
-                  'shape. Key: \'{}\', Tensor: {}'.format(
-                      obj_name, key, tensor))
+        for tensor in data_nest.flatten(obj):
+          if not tensor.get_shape().is_fully_defined():
+            raise ValueError(
+                ('The {} to the model returned by input_fn must have static '
+                 'shape. Tensor: {}').format(obj_name, tensor))
 
     validate(features, 'features')
     if labels is not None:
@@ -1723,7 +1708,8 @@ class _OutfeedHostCall(object):
     dequeue_ops_by_name = {}
     pos = 0
     for name in self._names:
-      dequeue_ops_by_name[name] = dequeue_ops[pos:pos+len(self._tensors[name])]
+      dequeue_ops_by_name[name] = dequeue_ops[pos:pos +
+                                              len(self._tensors[name])]
       pos += len(self._tensors[name])
 
     # It is assumed evaluation always happens on single host TPU system. So,
@@ -1867,7 +1853,7 @@ class TPUEstimator(estimator_lib.Estimator):
   the following discussion on TPU evaluation does not apply.
 
   `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where
-  `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. (See
+  `tensors` could be a list of any nested structure of `Tensor`s (See
   `TPUEstimatorSpec` for details).  `metric_fn` takes the `tensors` and returns
   a dict from metric string name to the result of calling a metric function,
   namely a `(metric_tensor, update_op)` tuple.
@@ -2052,8 +2038,9 @@ class TPUEstimator(estimator_lib.Estimator):
 
     Args:
       model_fn: Model function as required by `Estimator` which returns
-      EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks',
-      and `prediction_hooks` must not capure any TPU Tensor inside the model_fn.
+        EstimatorSpec or TPUEstimatorSpec. `training_hooks`, 'evaluation_hooks',
+        and `prediction_hooks` must not capure any TPU Tensor inside the
+        model_fn.
       model_dir: Directory to save model parameters, graph and etc. This can
         also be used to load checkpoints from the directory into a estimator to
         continue training a previously saved model. If `None`, the model_dir in
@@ -2064,19 +2051,18 @@ class TPUEstimator(estimator_lib.Estimator):
         `input_fn` and `model_fn`.  Keys are names of parameters, values are
         basic python types. There are reserved keys for `TPUEstimator`,
         including 'batch_size'.
-      use_tpu: A bool indicating whether TPU support is enabled. Currently,
-        - TPU training and evaluation respect this bit, but eval_on_tpu can
-          override execution of eval. See below.
-        - Predict still happens on CPU.
+      use_tpu: A bool indicating whether TPU support is enabled. Currently, -
+        TPU training and evaluation respect this bit, but eval_on_tpu can
+        override execution of eval. See below. - Predict still happens on CPU.
       train_batch_size: An int representing the global training batch size.
         TPUEstimator transforms this global batch size to a per-shard batch
         size, as params['batch_size'], when calling `input_fn` and `model_fn`.
-        Cannot be `None` if `use_tpu` is `True`.
-        Must be divisible by total number of replicas.
-      eval_batch_size: An int representing evaluation batch size.
-        Must be divisible by total number of replicas.
-      predict_batch_size: An int representing the prediction batch size.
-        Must be divisible by total number of replicas.
+        Cannot be `None` if `use_tpu` is `True`. Must be divisible by total
+        number of replicas.
+      eval_batch_size: An int representing evaluation batch size. Must be
+        divisible by total number of replicas.
+      predict_batch_size: An int representing the prediction batch size. Must be
+        divisible by total number of replicas.
       batch_axis: A python tuple of int values describing how each tensor
         produced by the Estimator `input_fn` should be split across the TPU
         compute shards. For example, if your input_fn produced (images, labels)
@@ -2092,11 +2078,10 @@ class TPUEstimator(estimator_lib.Estimator):
       export_to_tpu: If True, `export_savedmodel()` exports a metagraph for
         serving on TPU besides the one on CPU.
       warm_start_from: Optional string filepath to a checkpoint or SavedModel to
-                       warm-start from, or a `tf.estimator.WarmStartSettings`
-                       object to fully configure warm-starting.  If the string
-                       filepath is provided instead of a `WarmStartSettings`,
-                       then all variables are warm-started, and it is assumed
-                       that vocabularies and Tensor names are unchanged.
+        warm-start from, or a `tf.estimator.WarmStartSettings` object to fully
+        configure warm-starting.  If the string filepath is provided instead of
+        a `WarmStartSettings`, then all variables are warm-started, and it is
+        assumed that vocabularies and Tensor names are unchanged.
 
     Raises:
       ValueError: `params` has reserved keys already.
@@ -2157,10 +2142,8 @@ class TPUEstimator(estimator_lib.Estimator):
     # All properties passed to _InternalTPUContext are immutable.
     # pylint: disable=protected-access
     self._ctx = tpu_context._get_tpu_context(
-        self._config, train_batch_size,
-        eval_batch_size, predict_batch_size,
-        use_tpu,
-        eval_on_tpu)
+        self._config, train_batch_size, eval_batch_size, predict_batch_size,
+        use_tpu, eval_on_tpu)
 
     self._export_to_tpu = export_to_tpu
 
@@ -2182,38 +2165,39 @@ class TPUEstimator(estimator_lib.Estimator):
           'when `export_to_tpu` is `True`; '
           'got {}.'.format(mode))
 
-    (super(TPUEstimator, self).
-     _add_meta_graph_for_mode(builder,
-                              input_receiver_fn_map,
-                              checkpoint_path,
-                              strip_default_attrs,
-                              save_variables,
-                              mode=mode,
-                              export_tags=export_tags,
-                              check_variables=check_variables))
+    (super(TPUEstimator, self)._add_meta_graph_for_mode(
+        builder,
+        input_receiver_fn_map,
+        checkpoint_path,
+        strip_default_attrs,
+        save_variables,
+        mode=mode,
+        export_tags=export_tags,
+        check_variables=check_variables))
 
     if self._export_to_tpu:
-      input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE:
-                               input_receiver_fn_map[mode]}
+      input_receiver_fn_map = {
+          _REWRITE_FOR_INFERENCE_MODE: input_receiver_fn_map[mode]
+      }
       export_tags = [tag_constants.SERVING, tag_constants.TPU]
       mode = _REWRITE_FOR_INFERENCE_MODE
       # See b/110052256 for why `check_variables` is `False`.
-      (super(TPUEstimator, self).
-       _add_meta_graph_for_mode(builder,
-                                input_receiver_fn_map,
-                                checkpoint_path,
-                                strip_default_attrs,
-                                save_variables=False,
-                                mode=mode,
-                                export_tags=export_tags,
-                                check_variables=False))
+      (super(TPUEstimator, self)._add_meta_graph_for_mode(
+          builder,
+          input_receiver_fn_map,
+          checkpoint_path,
+          strip_default_attrs,
+          save_variables=False,
+          mode=mode,
+          export_tags=export_tags,
+          check_variables=False))
 
   def _call_model_fn(self, features, labels, mode, config):
     if mode == _REWRITE_FOR_INFERENCE_MODE:
       return self._call_model_fn_for_inference(features, labels, mode, config)
     else:
-      return super(TPUEstimator, self)._call_model_fn(
-          features, labels, mode, config)
+      return super(TPUEstimator, self)._call_model_fn(features, labels, mode,
+                                                      config)
 
   def _call_model_fn_for_inference(self, features, labels, mode, config):
     """Wraps `_call_model_fn` for `export_savedmodel`."""
@@ -2243,8 +2227,7 @@ class TPUEstimator(estimator_lib.Estimator):
       # from `computation` for rewriting.
       tensors_dict = collections.OrderedDict(
           (k, _export_output_to_tensors(v))
-          for k, v in six.iteritems(estimator_spec.export_outputs)
-      )
+          for k, v in six.iteritems(estimator_spec.export_outputs))
       tensors = nest.flatten(tensors_dict)
       tpu_tensors = [t for t in tensors if _is_tpu_tensor(t)]
 
@@ -2267,9 +2250,10 @@ class TPUEstimator(estimator_lib.Estimator):
       else:
         # Only fetching `tpu_tensors_on_cpu` does not trigger
         # TPU computation and blocks, so we add the control dependency here.
-        control_inputs = (tpu_tensors_on_cpu
-                          if isinstance(tpu_tensors_on_cpu, (list, tuple))
-                          else (tpu_tensors_on_cpu,))
+        control_inputs = (
+            tpu_tensors_on_cpu if isinstance(tpu_tensors_on_cpu,
+                                             (list, tuple)) else
+            (tpu_tensors_on_cpu,))
         with ops.control_dependencies(control_inputs):
           new_tensors.append(array_ops.identity(t))
 
@@ -2279,8 +2263,7 @@ class TPUEstimator(estimator_lib.Estimator):
     export_outputs = estimator_spec.export_outputs
     new_export_outputs = collections.OrderedDict(
         (k, _clone_export_output_with_tensors(export_outputs[k], v))
-        for k, v in six.iteritems(new_tensors_dict)
-    )
+        for k, v in six.iteritems(new_tensors_dict))
 
     return estimator_spec._replace(export_outputs=new_export_outputs)
 
@@ -2344,9 +2327,9 @@ class TPUEstimator(estimator_lib.Estimator):
       mode: ModeKeys
 
     Returns:
-      Either features or (features, labels) where features and labels are:
-        features - `Tensor` or dictionary of string feature name to `Tensor`.
-        labels - `Tensor` or dictionary of `Tensor` with labels.
+      In TPU mode, returns an input_fn to be called later in model_fn.
+      Otherwise, calls the input_fn and returns either fatures or
+        (features, labels).
 
     Raises:
       ValueError: if input_fn takes invalid arguments or does not have `params`.
@@ -2374,8 +2357,8 @@ class TPUEstimator(estimator_lib.Estimator):
       # input_fn for use_tpu=True/False.
       batch_size_for_input_fn = ctx.batch_size_for_input_fn
       if batch_size_for_input_fn is not None:
-        _add_item_to_params(kwargs['params'],
-                            _BATCH_SIZE_KEY, batch_size_for_input_fn)
+        _add_item_to_params(kwargs['params'], _BATCH_SIZE_KEY,
+                            batch_size_for_input_fn)
 
       # For export_savedmodel, input_fn is never passed to Estimator. So,
       # `is_export_mode` must be False.
@@ -2420,24 +2403,32 @@ class TPUEstimator(estimator_lib.Estimator):
     self._rendezvous[model_fn_lib.ModeKeys.TRAIN] = rendezvous
     try:
       return super(TPUEstimator, self).train(
-          input_fn=input_fn, hooks=hooks, steps=steps, max_steps=max_steps,
-          saving_listeners=saving_listeners
-      )
+          input_fn=input_fn,
+          hooks=hooks,
+          steps=steps,
+          max_steps=max_steps,
+          saving_listeners=saving_listeners)
     except Exception:  # pylint: disable=broad-except
       rendezvous.record_error('training_loop', sys.exc_info())
     finally:
       rendezvous.record_done('training_loop')
       rendezvous.raise_errors()
 
-  def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None,
+  def evaluate(self,
+               input_fn,
+               steps=None,
+               hooks=None,
+               checkpoint_path=None,
                name=None):
     rendezvous = error_handling.ErrorRendezvous(num_sources=3)
     self._rendezvous[model_fn_lib.ModeKeys.EVAL] = rendezvous
     try:
       return super(TPUEstimator, self).evaluate(
-          input_fn, steps=steps, hooks=hooks, checkpoint_path=checkpoint_path,
-          name=name
-      )
+          input_fn,
+          steps=steps,
+          hooks=hooks,
+          checkpoint_path=checkpoint_path,
+          name=name)
     except Exception:  # pylint: disable=broad-except
       rendezvous.record_error('evaluation_loop', sys.exc_info())
     finally:
@@ -2531,25 +2522,25 @@ class TPUEstimator(estimator_lib.Estimator):
           if shutdown_mode:
             if shutdown_mode == 'shutdown_worker':
               finalizer_hooks = [
-                  session_support.ShutdownLameWorkers(timeout_ms=60*1000),
+                  session_support.ShutdownLameWorkers(timeout_ms=60 * 1000),
               ]
             elif shutdown_mode == 'shutdown_computation':
               finalizer_hooks = [
-                  session_support.RestartComputation(timeout_ms=60*1000),
+                  session_support.RestartComputation(timeout_ms=60 * 1000),
               ]
             else:
-              raise ValueError('Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' %
-                               shutdown_mode)
+              raise ValueError(
+                  'Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE "%s"' % shutdown_mode)
 
-            shutdown_hooks.append(session_support.GracefulShutdownHook(
-                checkpoint_prefix=self.model_dir + '/model.ckpt',
-                on_shutdown_hooks=finalizer_hooks
-            ))
+            shutdown_hooks.append(
+                session_support.GracefulShutdownHook(
+                    checkpoint_prefix=self.model_dir + '/model.ckpt',
+                    on_shutdown_hooks=finalizer_hooks))
 
           with ops.control_dependencies([loss]):
             global_step = array_ops.identity(training.get_global_step())
           hooks = input_hooks + shutdown_hooks
-          logging_hook_frequency = (    # Divide and round up
+          logging_hook_frequency = (  # Divide and round up
               (self._log_every_n_steps +
                self._config.tpu_config.iterations_per_loop - 1) //
               self._config.tpu_config.iterations_per_loop)
@@ -2563,14 +2554,13 @@ class TPUEstimator(estimator_lib.Estimator):
                   rendezvous=self._rendezvous[mode],
               ),
               InstallSignalHandlerHook(),
-              training.LoggingTensorHook(
-                  {
-                      'loss': array_ops.identity(loss),
-                      'step': global_step,
-                  },
-                  every_n_iter=logging_hook_frequency)
+              training.LoggingTensorHook({
+                  'loss': array_ops.identity(loss),
+                  'step': global_step,
+              },
+                                         every_n_iter=logging_hook_frequency)
           ])
-          examples_hook._set_steps_per_run(   # pylint: disable=protected-access
+          examples_hook._set_steps_per_run(  # pylint: disable=protected-access
               self._config.tpu_config.iterations_per_loop)
           hooks.append(examples_hook)
 
@@ -2585,7 +2575,7 @@ class TPUEstimator(estimator_lib.Estimator):
                 save_secs=self._config.save_checkpoints_secs,
                 save_steps=self._config.save_checkpoints_steps,
                 scaffold=scaffold)
-            checkpoint_hook._set_steps_per_run(   # pylint: disable=protected-access
+            checkpoint_hook._set_steps_per_run(  # pylint: disable=protected-access
                 self._config.tpu_config.iterations_per_loop)
             chief_hooks.append(checkpoint_hook)
 
@@ -2611,10 +2601,9 @@ class TPUEstimator(estimator_lib.Estimator):
           total_loss, host_calls, scaffold, eval_hooks = _eval_on_tpu_system(
               ctx, model_fn_wrapper, dequeue_fn)
           iterations_per_loop_var = _create_or_get_iterations_per_loop()
-          mean_loss = math_ops.div(total_loss,
-                                   math_ops.cast(
-                                       iterations_per_loop_var,
-                                       dtype=total_loss.dtype))
+          mean_loss = math_ops.div(
+              total_loss,
+              math_ops.cast(iterations_per_loop_var, dtype=total_loss.dtype))
 
           # Creates a dummy metric update_op for all metrics. Estimator expects
           # all metrics in eval_metric_ops have update_op and calls them one by
@@ -2705,7 +2694,8 @@ class TPUEstimator(estimator_lib.Estimator):
 
         predictions = host_call_ret['predictions']
         _verify_cross_hosts_transfer_size(
-            predictions, message=(
+            predictions,
+            message=(
                 'The estimated size for TPUEstimatorSpec.predictions is too '
                 'large.'))
         signals = host_call_ret['signals']
@@ -2751,7 +2741,8 @@ def _export_output_to_tensors(export_output):
 
   Args:
     export_output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
+      `RegressionOutput`, or `PredictOutput`.
+
   Returns:
     a list of tensors used in export_output.
 
@@ -2776,7 +2767,7 @@ def _clone_export_output_with_tensors(export_output, tensors):
 
   Args:
     export_output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
+      `RegressionOutput`, or `PredictOutput`.
     tensors: a list of `Tensors` used to construct a new `export_output`.
 
   Returns:
@@ -2813,9 +2804,8 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
   ) = model_fn_wrapper.convert_to_single_tpu_eval_step(dequeue_fn)
 
   def multi_tpu_eval_steps_on_single_shard():
-    return training_loop.repeat(
-        iterations_per_loop_var,
-        single_tpu_eval_step, [_ZERO_LOSS])
+    return training_loop.repeat(iterations_per_loop_var, single_tpu_eval_step,
+                                [_ZERO_LOSS])
 
   (loss,) = tpu.shard(
       multi_tpu_eval_steps_on_single_shard,
@@ -2837,9 +2827,8 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
        model_fn_wrapper.convert_to_single_tpu_train_step(dequeue_fn))
 
   def multi_tpu_train_steps_on_single_shard():
-    return training_loop.repeat(
-        iterations_per_loop_var,
-        single_tpu_train_step, [_INITIAL_LOSS])
+    return training_loop.repeat(iterations_per_loop_var, single_tpu_train_step,
+                                [_INITIAL_LOSS])
 
   (loss,) = tpu.shard(
       multi_tpu_train_steps_on_single_shard,
@@ -3185,8 +3174,8 @@ class _InputsWithStoppingSignals(_Inputs):
 
       if add_padding:
         padding_mask, features, labels = (
-            _PaddingSignals.pad_features_and_labels(
-                features, labels, batch_size))
+            _PaddingSignals.pad_features_and_labels(features, labels,
+                                                    batch_size))
 
         new_input_dict['features'] = features
         if labels is not None:
@@ -3199,7 +3188,8 @@ class _InputsWithStoppingSignals(_Inputs):
         padding_mask = None
 
       new_input_dict['signals'] = _StopSignals(
-          stop=stop, batch_size=batch_size, padding_mask=padding_mask).as_dict()
+          stop=stop, batch_size=batch_size,
+          padding_mask=padding_mask).as_dict()
 
       return new_input_dict
 
@@ -3242,8 +3232,8 @@ class _StopSignals(object):
     if isinstance(scalar_stopping_signal, ops.Tensor):
       # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF
       # way to express the bool check whether scalar_stopping_signal is True.
-      return math_ops.logical_and(
-          scalar_stopping_signal, _StopSignals.STOPPING_SIGNAL)
+      return math_ops.logical_and(scalar_stopping_signal,
+                                  _StopSignals.STOPPING_SIGNAL)
     else:
       # For non Tensor case, it is used in SessionRunHook. So, we cannot modify
       # the graph anymore. Here, we use pure Python.
@@ -3262,7 +3252,8 @@ class _PaddingSignals(object):
     batch_size_tensor = constant_op.constant(batch_size, dtypes.int32)
 
     check_greater = check_ops.assert_greater_equal(
-        batch_size_tensor, real_batch_size,
+        batch_size_tensor,
+        real_batch_size,
         data=(batch_size_tensor, real_batch_size),
         message='The real batch size should not be greater than batch_size.')
 
@@ -3286,8 +3277,8 @@ class _PaddingSignals(object):
     if labels is not None:
       labels = nest_pad(labels)
 
-    padding_mask = _PaddingSignals._padding_mask(
-        real_batch_size, missing_count, batch_size)
+    padding_mask = _PaddingSignals._padding_mask(real_batch_size, missing_count,
+                                                 batch_size)
 
     return padding_mask, features, labels
 
@@ -3335,20 +3326,20 @@ class _PaddingSignals(object):
 
   @staticmethod
   def _find_any_tensor(batch_features):
-    tensors = [x for x in nest.flatten(batch_features)
-               if isinstance(x, ops.Tensor)]
+    tensors = [
+        x for x in nest.flatten(batch_features) if isinstance(x, ops.Tensor)
+    ]
     if not tensors:
       raise ValueError('Cannot find any Tensor in features dict.')
     return tensors[0]
 
   @staticmethod
   def _padding_mask(real_batch_size, missing_count, batch_size):
-    padding_mask = array_ops.concat(
-        [
-            array_ops.zeros((real_batch_size,), dtype=dtypes.int32),
-            array_ops.ones((missing_count,), dtype=dtypes.int32)
-        ],
-        axis=0)
+    padding_mask = array_ops.concat([
+        array_ops.zeros((real_batch_size,), dtype=dtypes.int32),
+        array_ops.ones((missing_count,), dtype=dtypes.int32)
+    ],
+                                    axis=0)
     padding_mask.set_shape((batch_size,))
     return padding_mask
 
@@ -3366,9 +3357,11 @@ def _verify_cross_hosts_transfer_size(tensor_dict, message):
         '{} The transfer size is larger than the protobuf limit. Please '
         'consider to use Tensors with smaller shapes or reduce batch '
         'size. Given:\n'
-        '{}'.format(message, '\n'.join([
-            ' -- Key: {}, Shape: {}'.format(k, v)
-            for k, v in tensor_structure.items()])))
+        '{}'.format(
+            message, '\n'.join([
+                ' -- Key: {}, Shape: {}'.format(k, v)
+                for k, v in tensor_structure.items()
+            ])))
 
 
 def _add_item_to_params(params, key, value):
@@ -3397,8 +3390,8 @@ def export_estimator_savedmodel(estimator,
     estimator: `Estimator` with which model has been trained.
     export_dir_base: A string containing a directory in which to create
       timestamped subdirectories containing exported SavedModels.
-    serving_input_receiver_fn: A function that takes no argument and
-      returns a `ServingInputReceiver` or `TensorServingInputReceiver`.
+    serving_input_receiver_fn: A function that takes no argument and returns a
+      `ServingInputReceiver` or `TensorServingInputReceiver`.
     assets_extra: A dict specifying how to populate the assets.extra directory
       within the exported SavedModel, or `None` if no extra assets are needed.
     as_text: whether to write the SavedModel proto in text format.
@@ -3422,7 +3415,5 @@ def export_estimator_savedmodel(estimator,
       eval_batch_size=2048,  # Does not matter.
   )
   return est.export_savedmodel(export_dir_base, serving_input_receiver_fn,
-                               assets_extra,
-                               as_text,
-                               checkpoint_path,
+                               assets_extra, as_text, checkpoint_path,
                                strip_default_attrs)
-- 
GitLab


From 47b04fdb3ee2491dfb0306ea56617d0e95b38c27 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Sat, 13 Oct 2018 17:42:53 -0700
Subject: [PATCH 0944/1085] Rolling forward changing distribution strategies to
 use MultiDeviceIterator. The underlying issue with NaN's has now been
 resolved.

PiperOrigin-RevId: 217014692
---
 tensorflow/contrib/distribute/python/BUILD    |  28 +--
 .../distribute/python/metrics_v1_test.py      |   3 +-
 .../distribute/python/minimize_loss_test.py   |  26 +-
 .../python/mirrored_strategy_multigpu_test.py |  12 +-
 .../contrib/distribute/python/monitor.py      |   1 +
 .../distribute/python/optimizer_v2_test.py    |   8 +-
 .../distribute/python/prefetching_ops_v2.py   | 232 ------------------
 .../python/prefetching_ops_v2_test.py         |  90 -------
 .../contrib/distribute/python/step_fn.py      |   7 +-
 .../contrib/distribute/python/step_fn_test.py |   1 +
 .../contrib/distribute/python/values.py       |  40 ++-
 .../contrib/distribute/python/values_test.py  |  23 +-
 .../data/ops/multi_device_iterator_ops.py     |  12 +
 13 files changed, 90 insertions(+), 393 deletions(-)
 delete mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2.py
 delete mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 76d5b59ce1..dc2964568b 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -22,7 +22,6 @@ py_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":input_ops",
-        ":prefetching_ops_v2",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:device_util",
@@ -31,6 +30,7 @@ py_library(
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:training",
         "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:multi_device_iterator_ops",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/training/checkpointable:base",
         "@six_archive//:six",
@@ -666,32 +666,6 @@ cuda_py_test(
     ],
 )
 
-py_library(
-    name = "prefetching_ops_v2",
-    srcs = ["prefetching_ops_v2.py"],
-    deps = [
-        "//tensorflow/contrib/data/python/ops:prefetching_ops",
-        "//tensorflow/python:experimental_dataset_ops_gen",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/util:nest",
-        "//tensorflow/python/data/util:sparse",
-    ],
-)
-
-cuda_py_test(
-    name = "prefetching_ops_v2_test",
-    srcs = ["prefetching_ops_v2_test.py"],
-    additional_deps = [
-        ":prefetching_ops_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/data/ops:iterator_ops",
-    ],
-)
-
 py_library(
     name = "input_ops",
     srcs = ["input_ops.py"],
diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index ae4189eb1c..2c79a8bfd3 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -96,7 +96,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
   def _test_metric(self, distribution, dataset_fn, metric_fn, expected_fn):
     with ops.Graph().as_default(), distribution.scope():
       iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+          dataset_fn).make_initializable_iterator()
       if isinstance(distribution, tpu_strategy.TPUStrategy):
         def step_fn(ctx, inputs):
           value, update = distribution.call_for_each_tower(
@@ -120,6 +120,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
         # replace "distribution.num_towers" with "1".
         batches_per_update = distribution.num_towers
 
+      self.evaluate(iterator.initializer)
       self.evaluate(distribution.initialize())
       self.evaluate(variables.local_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index 60e134055f..3c4544a39e 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -41,6 +41,14 @@ from tensorflow.python.ops.losses import losses_impl
 
 class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
 
+  def _get_iterator(self, ds):
+    if context.executing_eagerly():
+      iterator = ds.make_one_shot_iterator()
+    else:
+      iterator = ds.make_initializable_iterator()
+      self.evaluate(iterator.initializer)
+    return iterator
+
   @combinations.generate(
       combinations.times(
           combinations.distributions_and_v1_optimizers(),
@@ -62,8 +70,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -99,8 +106,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.group(
@@ -159,8 +165,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, *inputs, run_concurrently=layer.built))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -239,8 +244,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
           fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
         return control_flow_ops.group(fetches)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -333,8 +337,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             distribution.call_for_each_tower(
                 model_fn, x, y, run_concurrently=False))
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         return distribution.run_steps_on_dataset(
@@ -427,8 +430,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             output=loss)
         return distribution.group(train_op)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
       def run_step():
         initial_loss = lambda: constant_op.constant(1e7)
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index ed36639ce8..fd833c772d 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -307,9 +307,15 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
     dist = mirrored_strategy.MirroredStrategy(
         ["/device:GPU:0", "/device:CPU:0"])
-    features = dist.distribute_dataset(
-        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)
-    ).make_one_shot_iterator().get_next()
+    ds = dist.distribute_dataset(
+        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10))
+    if context.executing_eagerly():
+      iterator = ds.make_one_shot_iterator()
+    else:
+      iterator = ds.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
+
+    features = iterator.get_next()
 
     with dist.scope():
       result = dist.call_for_each_tower(
diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py
index 7644acedc9..17b7ab74f6 100644
--- a/tensorflow/contrib/distribute/python/monitor.py
+++ b/tensorflow/contrib/distribute/python/monitor.py
@@ -51,6 +51,7 @@ class Monitor(object):
     else:
       if session is None:
         raise ValueError("Should provide a `session` in Graph mode.")
+      session.run(step_callable._iterator.initializer)  # pylint: disable=protected-access
       self._run_step = session.make_callable(step_callable())
       session.run(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
index 6e9ba37a19..3064433129 100644
--- a/tensorflow/contrib/distribute/python/optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py
@@ -42,8 +42,11 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
       model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
-      iterator = distribution.distribute_dataset(
-          dataset_fn).make_one_shot_iterator()
+      ds = distribution.distribute_dataset(dataset_fn)
+      if context.executing_eagerly():
+        iterator = ds.make_one_shot_iterator()
+      else:
+        iterator = ds.make_initializable_iterator()
 
       def run_step():
         return control_flow_ops.group(distribution.unwrap(
@@ -52,6 +55,7 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
 
       if not context.executing_eagerly():
         with self.cached_session() as sess:
+          sess.run(iterator.initializer)
           run_step = sess.make_callable(run_step())
         self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
deleted file mode 100644
index d48aa9c89b..0000000000
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Extension of prefetching_ops to support more than one device."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import warnings
-
-from tensorflow.python.data.experimental.ops import prefetching_ops
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.util import nest as data_nest
-from tensorflow.python.data.util import sparse
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
-from tensorflow.python.util import nest
-
-
-# pylint: disable=protected-access
-class _PrefetchToDeviceIterator(object):
-  """A replacement for `tf.data.Iterator` that prefetches to another device.
-
-  Args:
-    input_dataset: The input dataset.
-    one_shot: If true, we make a one shot iterator that's already initialized.
-    devices: Devices on which to prefetch.
-    buffer_size: Size of the prefetching buffer.
-    shared_name: (Optional.) If non-empty, the returned iterator will be shared
-      under the given name across multiple sessions that share the same devices
-      (e.g. when using a remote server). Only used if one_shot is False.
-
-  Returns:
-    An Iterator type object.
-  """
-
-  def __init__(self,
-               input_dataset,
-               one_shot,
-               devices,
-               buffer_size,
-               shared_name=None):
-    self._input_dataset = input_dataset
-    self._get_next_call_count = 0
-    self._one_shot = one_shot
-    if shared_name is None:
-      shared_name = ""
-    self._devices = devices
-
-    if self._one_shot:
-      self._input_iterator = input_dataset.make_one_shot_iterator()
-    else:
-      self._input_iterator = iterator_ops.Iterator.from_structure(
-          self._input_dataset.output_types, self._input_dataset.output_shapes,
-          shared_name, self._input_dataset.output_classes)
-    input_iterator_handle = self._input_iterator.string_handle()
-
-    @function.Defun(dtypes.string)
-    def _prefetch_fn(handle):
-      """Prefetches one element from `input_iterator`."""
-      remote_iterator = iterator_ops.Iterator.from_string_handle(
-          handle, self._input_iterator.output_types,
-          self._input_iterator.output_shapes,
-          self._input_iterator.output_classes)
-      ret = remote_iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
-
-    target_device = ged_ops.experimental_iterator_get_device(
-        self._input_iterator._iterator_resource)
-    self._buffering_resources = []
-    for device in nest.flatten(self._devices):
-      with ops.device(device):
-        buffer_resource_handle = prefetching_ops.function_buffering_resource(
-            f=_prefetch_fn,
-            output_types=data_nest.flatten(
-                sparse.as_dense_types(self._input_dataset.output_types,
-                                      self._input_dataset.output_classes)),
-            target_device=target_device,
-            string_arg=input_iterator_handle,
-            buffer_size=buffer_size,
-            shared_name=shared_name)
-        self._buffering_resources.append(buffer_resource_handle)
-
-    if not self._one_shot:
-      reset_ops = []
-      for buffer_resource in self._buffering_resources:
-        reset_ops.append(
-            ged_ops.experimental_function_buffering_resource_reset(
-                buffer_resource))
-      with ops.control_dependencies(reset_ops):
-        self._initializer = self._input_iterator.make_initializer(
-            self._input_dataset)
-
-  def get_next(self, name=None):
-    """See `tf.data.Iterator.get_next`."""
-    self._get_next_call_count += 1
-    if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD:
-      warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE)
-
-    flat_result = []
-    # TODO(priyag): This will fail if the input size (typically number of
-    # batches) is not divisible by number of devices.
-    # How do we handle that more gracefully / let the user know?
-    for buffer_resource in self._buffering_resources:
-      flat_ret = ged_ops.experimental_function_buffering_resource_get_next(
-          buffer_resource,
-          output_types=data_nest.flatten(
-              sparse.as_dense_types(self.output_types, self.output_classes)),
-          name=name)
-
-      ret = sparse.deserialize_sparse_tensors(
-          data_nest.pack_sequence_as(self.output_types, flat_ret),
-          self.output_types, self.output_shapes, self.output_classes)
-
-      for tensor, shape in zip(
-          data_nest.flatten(ret), data_nest.flatten(self.output_shapes)):
-        if isinstance(tensor, ops.Tensor):
-          tensor.set_shape(shape)
-      flat_result.append(ret)
-
-    return nest.pack_sequence_as(self._devices, flat_result)
-
-  @property
-  def initializer(self):
-    if self._one_shot:
-      raise NotImplementedError("Can't initialize a one_shot_iterator")
-    return self._initializer
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-
-# pylint: enable=protected-access
-
-
-class _PrefetchToDeviceDataset(dataset_ops.UnaryDataset):
-  """A `Dataset` whose iterator prefetches elements to other device(s)."""
-
-  def __init__(self, input_dataset, devices, buffer_size):
-    super(_PrefetchToDeviceDataset, self).__init__(input_dataset)
-    self._input_dataset = input_dataset
-    self._devices = devices
-    self._buffer_size = buffer_size if buffer_size is not None else 1
-
-  def make_one_shot_iterator(self):
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=True,
-        devices=self._devices,
-        buffer_size=self._buffer_size)
-
-  def make_initializable_iterator(self, shared_name=None):
-    if context.executing_eagerly():
-      raise RuntimeError(
-          "make_initializable_iterator is not supported when eager "
-          "execution is enabled.")
-
-    return _PrefetchToDeviceIterator(
-        self._input_dataset,
-        one_shot=False,
-        devices=self._devices,
-        buffer_size=self._buffer_size,
-        shared_name=shared_name)
-
-  def _as_variant_tensor(self):
-    # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset
-    # transformation methods is called.
-    # TODO(mrry): Investigate support for chaining further transformations after
-    # the prefetch, including GPU support.
-    raise NotImplementedError("`prefetch_to_devices()` must be the last "
-                              "transformation in a dataset pipeline.")
-
-  # TODO(priyag): Fix the output types, shapes and classes to match the result
-  # of get_next (which has the additional nesting layer of devices now).
-  @property
-  def output_types(self):
-    return self._input_dataset.output_types
-
-  @property
-  def output_shapes(self):
-    return self._input_dataset.output_shapes
-
-  @property
-  def output_classes(self):
-    return self._input_dataset.output_classes
-
-
-def prefetch_to_devices(devices, buffer_size=None):
-  """A transformation that prefetches dataset values to the given `devices`.
-
-  NOTE: Although the transformation creates a `tf.data.Dataset`, the
-  transformation must be the final `Dataset` in the input pipeline.
-
-  Args:
-    devices: A nested structure of devices on which to prefetch the data. It can
-      be a single device name, or a tuple or list of device names.
-    buffer_size: (Optional.) The number of elements to buffer on each device.
-      Defaults to an automatically chosen value.
-
-  Returns:
-    A `Dataset` transformation function, which can be passed to
-    `tf.data.Dataset.apply`.
-  """
-
-  def _apply_fn(dataset):
-    return _PrefetchToDeviceDataset(dataset, devices, buffer_size)
-
-  return _apply_fn
diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
deleted file mode 100644
index 16799104e8..0000000000
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for prefetching_ops_v2."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distribute.python import prefetching_ops_v2
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
-from tensorflow.python.platform import test
-
-
-class PrefetchingOpsV2Test(test.TestCase):
-
-  def testPrefetchToOneDevice(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices("/gpu:0"))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  def testPrefetchToTwoDevicesInAList(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
-
-    iterator = device_dataset.make_one_shot_iterator()
-    next_element = iterator.get_next()
-
-    output = []
-    # TODO(rohanj): Modify test to go till the end of the dataset when we
-    # switch to MultiDeviceIterator.
-    with self.cached_session() as sess:
-      for _ in range(4):
-        result = sess.run(next_element)
-        self.assertEqual(2, len(result))
-        output.extend(result)
-      self.assertEquals(set(range(8)), set(output))
-
-  def testPrefetchToTwoDevicesWithReinit(self):
-    if not test_util.is_gpu_available():
-      self.skipTest("No GPU available")
-
-    host_dataset = dataset_ops.Dataset.range(10)
-    device_dataset = host_dataset.apply(
-        prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"]))
-
-    iterator = device_dataset.make_initializable_iterator()
-    next_element = iterator.get_next()
-
-    # TODO(rohanj): Modify test to go till the end of the dataset when we
-    # switch to MultiDeviceIterator.
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      for _ in range(4):
-        sess.run(next_element)
-      sess.run(iterator.initializer)
-      for _ in range(4):
-        sess.run(next_element)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py
index 1b5a4f64e5..23bf36184f 100644
--- a/tensorflow/contrib/distribute/python/step_fn.py
+++ b/tensorflow/contrib/distribute/python/step_fn.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.training import optimizer as optimizer_lib
 
 
@@ -50,7 +51,11 @@ class StandardInputStep(Step):
   def __init__(self, dataset_fn, distribution):
     super(StandardInputStep, self).__init__(distribution)
     self._distributed_input = distribution.distribute_dataset(dataset_fn)
-    self._iterator = self._distributed_input.make_one_shot_iterator()
+    if context.executing_eagerly():
+      self._iterator = self._distributed_input.make_one_shot_iterator()
+    else:
+      # TODO(priyag): Expose initializer via some initializer property.
+      self._iterator = self._distributed_input.make_initializable_iterator()
 
 
 class StandardSingleLossStep(StandardInputStep):
diff --git a/tensorflow/contrib/distribute/python/step_fn_test.py b/tensorflow/contrib/distribute/python/step_fn_test.py
index f1ada49fa3..1ff9b9ceec 100644
--- a/tensorflow/contrib/distribute/python/step_fn_test.py
+++ b/tensorflow/contrib/distribute/python/step_fn_test.py
@@ -50,6 +50,7 @@ class SingleLossStepTest(test.TestCase, parameterized.TestCase):
         run_step = single_loss_step
       else:
         with self.cached_session() as sess:
+          sess.run(single_loss_step._iterator.initializer)
           run_step = sess.make_callable(single_loss_step())
       self.evaluate(variables.global_variables_initializer())
 
diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py
index 472cb4230c..c555dc8a71 100644
--- a/tensorflow/contrib/distribute/python/values.py
+++ b/tensorflow/contrib/distribute/python/values.py
@@ -27,7 +27,7 @@ import weakref
 import six
 
 from tensorflow.contrib.distribute.python import input_ops
-from tensorflow.contrib.distribute.python import prefetching_ops_v2
+from tensorflow.python.data.ops import multi_device_iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as tf_device
@@ -1089,7 +1089,7 @@ class PerDeviceDataIterator(object):
   def get_next(self, name=None):
     """Scatter the input across devices."""
     if self._prefetch_on_device:
-      data_list = self._iterator.get_next(name=name)
+      data_list = self._iterator.get_next()
       index = dict(zip(self._devices, data_list))
     else:
       batch = self._iterator.get_next(name=name)
@@ -1113,17 +1113,15 @@ class PerDeviceDataset(object):
     self._devices = devices
 
     # Default to using prefetching in graph mode, unless specified.
-    # TODO(priyag): Enable prefetching in eager mode.
+    # TODO(rohanj): Enable prefetching in eager mode.
     self._prefetch_on_device = prefetch_on_device
     if self._prefetch_on_device is None:
       self._prefetch_on_device = not context.executing_eagerly()
     assert not (self._prefetch_on_device and context.executing_eagerly()), (
         "Prefetching is only supported in graph mode currently")
 
-    if self._prefetch_on_device:
-      self._dataset = dataset.apply(
-          prefetching_ops_v2.prefetch_to_devices(self._devices))
-    else:
+    self._dataset = dataset
+    if not self._prefetch_on_device:
       # TODO(priyag): If dropping remainder is not appropriate, find another
       # approach to distributing the dataset when not possible to divide evenly.
       # Possibly not an issue when we start using PartitionedDataset.
@@ -1131,15 +1129,33 @@ class PerDeviceDataset(object):
 
   def make_one_shot_iterator(self):
     """Get a one time use iterator for the distributed PerDeviceDataset."""
+    # Graph mode with one shot iterator is disabled.
+    if not context.executing_eagerly():
+      raise ValueError("Cannot create a one shot iterator. Please use "
+                       "`make_initializable_iterator()` instead.")
+    # Eager mode prefetching would error out in constructor. Only remaining
+    # case is non-prefetching in eager mode. We delegate to
+    # PerDeviceDataIterator to handle that case.
     dataset_iterator = self._dataset.make_one_shot_iterator()
-    return PerDeviceDataIterator(dataset_iterator, self._devices,
-                                 self._prefetch_on_device)
+    return PerDeviceDataIterator(
+        dataset_iterator, self._devices, prefetch_on_device=False)
 
   def make_initializable_iterator(self):
     """Get an initializable iterator for the distributed PerDeviceDataset."""
-    dataset_iterator = self._dataset.make_initializable_iterator()
-    return PerDeviceDataIterator(dataset_iterator, self._devices,
-                                 self._prefetch_on_device)
+    # Eager mode generates already initialized iterators. Hence we cannot create
+    # an initializable iterator.
+    if context.executing_eagerly():
+      raise ValueError("Cannot create initializable iterator in Eager mode. "
+                       "Please use `make_one_shot_iterator` instead.")
+    if self._prefetch_on_device:
+      dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+          self._dataset, self._devices)
+    else:
+      dataset_iterator = self._dataset.make_initializable_iterator()
+    return PerDeviceDataIterator(
+        dataset_iterator,
+        self._devices,
+        prefetch_on_device=self._prefetch_on_device)
 
 
 class MultiWorkerDataIterator(object):
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index 121d2fbb3f..7ef4776ac6 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -349,7 +349,11 @@ class PerDeviceDatasetTest(test.TestCase):
   def _test_iterator_no_prefetch(self, devices, dataset, expected_values):
     per_device_dataset = values.PerDeviceDataset(
         dataset, devices, prefetch_on_device=False)
-    iterator = per_device_dataset.make_one_shot_iterator()
+    if context.executing_eagerly():
+      iterator = per_device_dataset.make_one_shot_iterator()
+    else:
+      iterator = per_device_dataset.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
 
     for expected_value in expected_values:
       next_element = iterator.get_next()
@@ -366,21 +370,14 @@ class PerDeviceDatasetTest(test.TestCase):
     if not context.executing_eagerly():
       per_device_dataset = values.PerDeviceDataset(
           dataset, devices, prefetch_on_device=True)
-      iterator = per_device_dataset.make_one_shot_iterator()
+      iterator = per_device_dataset.make_initializable_iterator()
+      self.evaluate([iterator.initializer])
 
-      # With prefetching, we cannot guarantee which input ends up on which
-      # device, so we verify that the complete set seen on all devices is
-      # correct, and equal numbers are distributed to each device.
-      combined_actual = []
-      combined_expected = []
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        combined_actual.extend(
-            self.evaluate(
-                [values.select_device(d, next_element) for d in devices]))
-        combined_expected.extend(expected_value)
-
-      self.assertEqual(set(combined_expected), set(combined_actual))
+        computed_value = self.evaluate(
+            [values.select_device(d, next_element) for d in devices])
+        self.assertEqual(expected_value, computed_value)
 
       with self.assertRaises(errors.OutOfRangeError):
         next_element = iterator.get_next()
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 2086614b7c..b7033cc4ce 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -229,3 +229,15 @@ class MultiDeviceIterator(object):
   @property
   def initializer(self):
     return self._initializer
+
+  @property
+  def output_types(self):
+    return self._dataset.output_types
+
+  @property
+  def output_shapes(self):
+    return self._dataset.output_shapes
+
+  @property
+  def output_classes(self):
+    return self._dataset.output_classes
-- 
GitLab


From 109a0c1b15d5ba7389bb82facd48aeb8477e89af Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 13 Oct 2018 20:16:31 -0700
Subject: [PATCH 0945/1085] Disables logging and summaries of steps, steps/s,
 loss etc. when RunConfig.log_step_count_steps is None.

This is already how Estimator works (as one can see in line 2136 where we explicitly replace log_step_count_steps as None).  It should be backward compatible as current code would have crashed if log_step_count_steps is None.

PiperOrigin-RevId: 217019985
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 1a34d017eb..b2fa9eb45c 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -2480,17 +2480,19 @@ class TPUEstimator(estimator_lib.Estimator):
 
         # examples_hook is added to training_hooks for both CPU and TPU
         # execution.
-        examples_hook = ExamplesPerSecondHook(
-            ctx.global_batch_size,
-            output_dir=self.model_dir,
-            every_n_steps=self._log_every_n_steps)
+        if self._log_every_n_steps is not None:
+          examples_hook = ExamplesPerSecondHook(
+              ctx.global_batch_size,
+              output_dir=self.model_dir,
+              every_n_steps=self._log_every_n_steps)
 
         if ctx.is_running_on_cpu(is_export_mode=is_export_mode):
           logging.info('Running %s on CPU', mode)
           estimator_spec = model_fn_wrapper.call_without_tpu(
               features, labels, is_export_mode=is_export_mode)
-          estimator_spec = estimator_spec._replace(
-              training_hooks=estimator_spec.training_hooks + (examples_hook,))
+          if self._log_every_n_steps is not None:
+            estimator_spec = estimator_spec._replace(
+                training_hooks=estimator_spec.training_hooks + (examples_hook,))
           return estimator_spec
 
         assert labels is None, '`labels` passed to `model_fn` must be `None`.'
@@ -2540,10 +2542,6 @@ class TPUEstimator(estimator_lib.Estimator):
           with ops.control_dependencies([loss]):
             global_step = array_ops.identity(training.get_global_step())
           hooks = input_hooks + shutdown_hooks
-          logging_hook_frequency = (  # Divide and round up
-              (self._log_every_n_steps +
-               self._config.tpu_config.iterations_per_loop - 1) //
-              self._config.tpu_config.iterations_per_loop)
           hooks.extend([
               TPUInfeedOutfeedSessionHook(
                   ctx,
@@ -2553,16 +2551,22 @@ class TPUEstimator(estimator_lib.Estimator):
                       run_infeed_loop_on_coordinator),
                   rendezvous=self._rendezvous[mode],
               ),
-              InstallSignalHandlerHook(),
-              training.LoggingTensorHook({
-                  'loss': array_ops.identity(loss),
-                  'step': global_step,
-              },
-                                         every_n_iter=logging_hook_frequency)
+              InstallSignalHandlerHook()
           ])
-          examples_hook._set_steps_per_run(  # pylint: disable=protected-access
-              self._config.tpu_config.iterations_per_loop)
-          hooks.append(examples_hook)
+          if self._log_every_n_steps is not None:
+            logging_hook_frequency = (  # Divide and round up
+                (self._log_every_n_steps +
+                 self._config.tpu_config.iterations_per_loop - 1) //
+                self._config.tpu_config.iterations_per_loop)
+            hooks.append(
+                training.LoggingTensorHook({
+                    'loss': array_ops.identity(loss),
+                    'step': global_step,
+                },
+                                           every_n_iter=logging_hook_frequency))
+            examples_hook._set_steps_per_run(  # pylint: disable=protected-access
+                self._config.tpu_config.iterations_per_loop)
+            hooks.append(examples_hook)
 
           if training_hooks:
             hooks.extend(training_hooks)
-- 
GitLab


From e4b1832849ad57a8a1acb57f56d5c3fc56852671 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Sat, 13 Oct 2018 20:26:25 -0700
Subject: [PATCH 0946/1085] Support real custom ops for Toco --allow_eager_ops
 flow.

PiperOrigin-RevId: 217020295
---
 tensorflow/contrib/lite/toco/tflite/BUILD     |  5 ++++
 tensorflow/contrib/lite/toco/tflite/export.cc |  2 +-
 .../contrib/lite/toco/tflite/operator.cc      | 27 +++++++++++++++++++
 .../contrib/lite/toco/tflite/operator.h       |  5 ++++
 .../contrib/lite/toco/tflite/operator_test.cc |  6 +++++
 5 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD
index 71cdb7703e..fcb628fec8 100644
--- a/tensorflow/contrib/lite/toco/tflite/BUILD
+++ b/tensorflow/contrib/lite/toco/tflite/BUILD
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/contrib/lite/schema:schema_fbs",
         "//tensorflow/contrib/lite/toco:graph_transformations",
         "//tensorflow/contrib/lite/toco:model",
+        "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/memory",
@@ -42,6 +43,7 @@ tf_cc_test(
     deps = [
         ":operator",
         "//tensorflow/contrib/lite/toco:tooling_util",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
@@ -71,6 +73,7 @@ tf_cc_test(
     tags = ["no_oss"],
     deps = [
         ":types",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -106,6 +109,7 @@ tf_cc_test(
     deps = [
         ":export",
         "//tensorflow/contrib/lite/schema:schema_fbs",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -141,6 +145,7 @@ tf_cc_test(
         ":import",
         "//tensorflow/contrib/lite:schema_fbs_version",
         "//tensorflow/contrib/lite/schema:schema_fbs",
+        "//tensorflow/core:ops",
         "@com_google_googletest//:gtest_main",
         "@flatbuffers",
     ],
diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc
index c23043789c..8dcb795738 100644
--- a/tensorflow/contrib/lite/toco/tflite/export.cc
+++ b/tensorflow/contrib/lite/toco/tflite/export.cc
@@ -140,7 +140,7 @@ OperatorKey GetOperatorKey(
 
     // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way
     // to populate a regular custom op. We need to find a way to fix this.
-    if (allow_flex_ops) {
+    if (ShouldExportAsFlexOp(allow_flex_ops, unsupported_op.tensorflow_op)) {
       key.is_flex_op = true;
       key.flex_tensorflow_op = tensorflow_op;
       key.custom_code =
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index e08a61d357..1ee71d4341 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -23,6 +23,8 @@ limitations under the License.
 #include "tensorflow/contrib/lite/toco/tflite/types.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/util/ptr_util.h"
 
 namespace toco {
@@ -1258,6 +1260,16 @@ class TensorFlowUnsupported : public BaseOperator {
       return std::unique_ptr<flexbuffers::Builder>();
     }
 
+    if (ShouldExportAsFlexOp(allow_flex_ops_, node_def.op())) {
+      fbb->Vector([&]() {
+        fbb->String(node_def.op());
+        fbb->String(op.tensorflow_node_def);
+      });
+      fbb->Finish();
+      LOG(INFO) << "Writing flex op: " << node_def.op();
+      return std::unique_ptr<flexbuffers::Builder>(fbb.release());
+    }
+
     bool has_valid_attr = false;
     size_t map_start = fbb->StartMap();
     for (const auto& pair : node_def.attr()) {
@@ -1588,6 +1600,21 @@ std::map<string, std::unique_ptr<BaseOperator>> BuildOperatorByNameMap(
   return result;
 }
 
+bool ShouldExportAsFlexOp(bool allow_flex_ops,
+                          const string& tensorflow_op_name) {
+  // If Flex ops aren't allow at all, simply return false.
+  if (!allow_flex_ops) {
+    return false;
+  }
+  // Check if we can find the `OpDef` for the TensorFlow op. If we can find
+  // it, export the op as an Flex op. Otherwise, export it as a regular custom
+  // op.
+  const tensorflow::OpDef* op_def = nullptr;
+  return tensorflow::OpRegistry::Global()
+      ->LookUpOpDef(tensorflow_op_name, &op_def)
+      .ok();
+}
+
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h
index 6e4e0a16d1..6e2a41bf53 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.h
+++ b/tensorflow/contrib/lite/toco/tflite/operator.h
@@ -113,6 +113,11 @@ class BaseOperator {
   OperatorType type_;
 };
 
+// Helper function to determine if a unsupported TensorFlow op should be
+// exported as an Flex op or a regular custom op.
+bool ShouldExportAsFlexOp(bool allow_flex_ops,
+                          const string& tensorflow_op_name);
+
 }  // namespace tflite
 
 }  // namespace toco
diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
index 0bc591e647..66896a49c0 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc
@@ -569,6 +569,12 @@ TEST_F(OperatorTest, TensorFlowUnsupportedWithoutAttr) {
   EXPECT_TRUE(output_node_def.attr().empty());
 }
 
+TEST_F(OperatorTest, TestShouldExportAsFlexOp) {
+  EXPECT_FALSE(ShouldExportAsFlexOp(false, "Conv2D"));
+  EXPECT_TRUE(ShouldExportAsFlexOp(true, "Conv2D"));
+  EXPECT_FALSE(ShouldExportAsFlexOp(true, "MyAwesomeCustomOp"));
+}
+
 }  // namespace
 }  // namespace tflite
 
-- 
GitLab


From f868981c38c31309bc1c3c230ed9aefe5efcdaa5 Mon Sep 17 00:00:00 2001
From: Tom Hennigan <tomhennigan@google.com>
Date: Sat, 13 Oct 2018 22:29:43 -0700
Subject: [PATCH 0947/1085] Update error messages to reference post TF 1.7
 symbol for `Input`.

PiperOrigin-RevId: 217025180
---
 tensorflow/python/keras/engine/network.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 266c48d304..4d0626cc66 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -197,7 +197,7 @@ class Network(base_layer.Layer):
       if not hasattr(x, '_keras_history'):
         cls_name = self.__class__.__name__
         raise ValueError('Input tensors to a ' + cls_name + ' ' +
-                         'must come from `tf.layers.Input`. '
+                         'must come from `tf.keras.Input`. '
                          'Received: ' + str(x) +
                          ' (missing previous layer metadata).')
       # Check that x is an input tensor.
@@ -207,14 +207,14 @@ class Network(base_layer.Layer):
           layer._inbound_nodes and layer._inbound_nodes[0].inbound_layers):
         cls_name = self.__class__.__name__
         logging.warning(cls_name + ' inputs must come from '
-                        '`tf.layers.Input` (thus holding past layer metadata), '
+                        '`tf.keras.Input` (thus holding past layer metadata), '
                         'they cannot be the output of '
                         'a previous non-Input layer. '
                         'Here, a tensor specified as '
                         'input to "' + self.name + '" was not an Input tensor, '
                         'it was generated by layer ' + layer.name + '.\n'
                         'Note that input tensors are '
-                        'instantiated via `tensor = tf.layers.Input(shape)`.\n'
+                        'instantiated via `tensor = tf.keras.Input(shape)`.\n'
                         'The tensor that caused the issue was: ' + str(x.name))
     for x in self.outputs:
       if not hasattr(x, '_keras_history'):
-- 
GitLab


From c11b5458f81c143a0de6905d32cacd893ad2bcd7 Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sun, 14 Oct 2018 13:46:59 +0800
Subject: [PATCH 0948/1085] Revert "Move bazel.rc to workspace root to support
 bazel-0.18.0"

This reverts commit a74a3217f7ff2dbee2fb618aa658cf666861545c.

Bazel-0.18.0 is changing where it searches for .bazelrc files.
Originally it was removing /tools/bazel.rc and only using /.bazelrc.
This causes issues for gitignoring /.bazelrc and 0.18.0 has temporarily
added tools/bazel.rc back to the list until 0.19. The long term solution
is to use try-import but that statement is new in 0.18 and we are not
going to bump TF's minimum right away. When 0.19 is out things will need
to be changed back and the minimum bumped to 0.18.

Fixes: https://github.com/tensorflow/tensorflow/issues/22762
Fixes: https://github.com/tensorflow/tensorflow/pull/22906
Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 .gitignore                 | 1 +
 .bazelrc => tools/bazel.rc | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)
 rename .bazelrc => tools/bazel.rc (98%)

diff --git a/.gitignore b/.gitignore
index cb65f447d4..1ef4c297ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .DS_Store
 .ipynb_checkpoints
 node_modules
+/.bazelrc
 /.tf_configure.bazelrc
 /bazel-*
 /bazel_pip
diff --git a/.bazelrc b/tools/bazel.rc
similarity index 98%
rename from .bazelrc
rename to tools/bazel.rc
index 1945078789..cee0b0b064 100644
--- a/.bazelrc
+++ b/tools/bazel.rc
@@ -91,5 +91,3 @@ build:dynamic_kernels --define=dynamic_loaded_kernels=true
 build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
 build --define=INCLUDEDIR=$(PREFIX)/include
-
-# Do not commit the tf_configure.bazelrc line
-- 
GitLab


From 5b85449b8d5818358ef1223814fff3f6a70f30cc Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sat, 13 Oct 2018 23:15:00 -0700
Subject: [PATCH 0949/1085] Return NotFound if the input patterns result in
 empty match

---
 .../kernels/data/matching_files_dataset_op.cc | 38 +++++++++++--------
 .../matching_files_dataset_op_test.py         |  2 +-
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index f6c9860f03..08953ee390 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -130,6 +130,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                   std::move(current_path.first);
               out_tensors->emplace_back(std::move(filepath_tensor));
               *end_of_sequence = false;
+              hasMatch_ = true;
               return Status::OK();
             }
 
@@ -171,7 +172,11 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
         }
 
         *end_of_sequence = true;
-        return Status::OK();
+        if (hasMatch_) {
+          return Status::OK();
+        } else {
+          return errors::NotFound("Don't find any matched files");
+        }
       }
 
      protected:
@@ -182,6 +187,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
         TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("current_pattern"),
                                                current_pattern_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("hasMatch"), hasMatch_));
 
         if (!filepath_queue_.empty()) {
           TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("queue_size"),
@@ -212,6 +219,10 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
 
         TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("current_pattern"),
                                               &current_pattern_));
+        int64 hasMatch;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("hasMatch"), &hasMatch));
+        hasMatch_ = static_cast<bool>(hasMatch);
 
         if (reader->Contains(full_name("queue_size"))) {
           int64 queue_size;
@@ -224,7 +235,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                 full_name(strings::StrCat("path_", i)), &path));
             TF_RETURN_IF_ERROR(reader->ReadScalar(
                 full_name(strings::StrCat("path_status_", i)), &path_status));
-            filepath_queue_.push(PathStatus(path, path_status));
+            filepath_queue_.push(
+                PathStatus(path, static_cast<bool>(path_status)));
           }
         }
 
@@ -265,20 +277,13 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                     << fs->FileExists(current_dir).ToString() << std::endl;
           ret.Update(s);
 
-          // When the children is empty, 1) return the non-ok status immediately
-          // if it is not NOT_FOUND; 2) continue the search if the status is ok
-          // or NOT_FOUND;
-          if (children.empty()) {
-            if (ret.code() != error::NOT_FOUND || !ret.ok()) {
-              return ret;
-            } else {
-              // On some platforms, fs.GetChildren() return the OK status even
-              // if the path isn't found. fs->FileExists() is used to make
-              // different platforms return the same status when searching a
-              // non-existing path.
-              ret.Update(fs->FileExists(current_dir));
-              continue;
-            }
+          // Handle the error cases: 1) continue the search if the status is ok
+          // or NOT_FOUND; 2) return the non-ok status immediately if it is not
+          // NOT_FOUND.
+          if (ret.code() == error::NOT_FOUND) {
+            continue;
+          } else if (!ret.ok()) {
+            return ret;
           }
 
           // children_dir_status holds is_dir status for children. It can have
@@ -345,6 +350,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           filepath_queue_ GUARDED_BY(mu_);
       size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
       string current_pattern_ GUARDED_BY(mu_);
+      bool hasMatch_ GUARDED_BY(mu_) = false;
     };
 
     const std::vector<string> patterns_;
diff --git a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
index 2a60b653d2..05f8958d2f 100644
--- a/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/matching_files_dataset_op_test.py
@@ -61,7 +61,7 @@ class MatchingFilesDatasetTest(test_base.DatasetTestBase):
     dataset = MatchingFilesDataset(os.path.join(self.tmp_dir, '*'))
     with self.cached_session() as sess:
       next_element = dataset.make_one_shot_iterator().get_next()
-      with self.assertRaises(errors.OutOfRangeError):
+      with self.assertRaises(errors.NotFoundError):
         sess.run(next_element)
 
   def testSimpleDirectory(self):
-- 
GitLab


From b47a10d0a2adb1fe6c9563269f6ecb4f765aea4d Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Sat, 13 Oct 2018 23:58:31 -0700
Subject: [PATCH 0950/1085] Make gen_build_info genrule to use a py_binary.

This makes the script more platform independent.

PiperOrigin-RevId: 217028598
---
 tensorflow/python/BUILD                       | 5 ++++-
 tensorflow/tensorflow.bzl                     | 4 ++--
 tensorflow/tools/build_info/BUILD             | 8 ++++----
 tensorflow/tools/build_info/gen_build_info.py | 1 -
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 1a890a7938..00577426a0 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1113,7 +1113,10 @@ py_test(
     ],
     main = "platform/build_info_test.py",
     srcs_version = "PY2AND3",
-    tags = ["notap"],
+    tags = [
+        "no_pip",
+        "notap",
+    ],
     deps = [
         ":client_testlib",
         ":platform",
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 53a382bd49..34d6fe825f 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1980,9 +1980,9 @@ def tf_py_build_info_genrule():
         name = "py_build_info_gen",
         outs = ["platform/build_info.py"],
         cmd =
-            "$(location //tensorflow/tools/build_info:gen_build_info.py) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"),
+            "$(location //tensorflow/tools/build_info:gen_build_info) --raw_generate \"$@\" --build_config " + if_cuda("cuda", "cpu"),
         local = 1,
-        tools = [clean_dep("//tensorflow/tools/build_info:gen_build_info.py")],
+        tools = [clean_dep("//tensorflow/tools/build_info:gen_build_info")],
     )
 
 def cc_library_with_android_deps(
diff --git a/tensorflow/tools/build_info/BUILD b/tensorflow/tools/build_info/BUILD
index 7307417805..680e68b0b9 100644
--- a/tensorflow/tools/build_info/BUILD
+++ b/tensorflow/tools/build_info/BUILD
@@ -4,8 +4,8 @@ package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
-exports_files(
-    glob(["gen/*"]) + [
-        "gen_build_info.py",
-    ],
+py_binary(
+    name = "gen_build_info",
+    srcs = ["gen_build_info.py"],
+    srcs_version = "PY2AND3",
 )
diff --git a/tensorflow/tools/build_info/gen_build_info.py b/tensorflow/tools/build_info/gen_build_info.py
index 690214abfb..9ebd168d78 100755
--- a/tensorflow/tools/build_info/gen_build_info.py
+++ b/tensorflow/tools/build_info/gen_build_info.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
-- 
GitLab


From c9e3160160c013fe9f72e23796d5365b1475e3db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 14 Oct 2018 02:02:07 -0700
Subject: [PATCH 0951/1085] compat: Update forward compatibility horizon to
 2018-10-14

PiperOrigin-RevId: 217034982
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 15b34abc96..ffe686e962 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 13)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 14)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 8b8c3accfb6f536db84860a2302e19c9d632bed4 Mon Sep 17 00:00:00 2001
From: Seunghoon Park <pclove1@gmail.com>
Date: Sun, 14 Oct 2018 10:19:29 -0700
Subject: [PATCH 0952/1085] fix non_max_suppression_with_overlaps() function
 call

- add a simple unit test case
---
 tensorflow/python/ops/image_ops_impl.py |  2 +-
 tensorflow/python/ops/image_ops_test.py | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 5a8e5d8399..7f41814b75 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -2210,7 +2210,7 @@ def non_max_suppression_with_overlaps(overlaps,
     overlap_threshold = ops.convert_to_tensor(
         overlap_threshold, name='overlap_threshold')
     # pylint: disable=protected-access
-    return gen_image_ops._non_max_suppression_v3(
+    return gen_image_ops.non_max_suppression_with_overlaps(
         overlaps, scores, max_output_size, overlap_threshold, score_threshold)
     # pylint: enable=protected-access
 
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 81c2cc526e..438ecc05e0 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -3771,6 +3771,29 @@ class NonMaxSuppressionPaddedTest(test_util.TensorFlowTestCase):
       self.assertAllClose(selected_indices.eval(), [0, 2, 4])
       self.assertEqual(num_valid.eval(), 3)
 
+class NonMaxSuppressionWithOverlapsTest(test_util.TensorFlowTestCase):
+
+  def testSelectOneFromThree(self):
+    overlaps_np = [
+        [1.0, 0.7, 0.2],
+        [0.7, 1.0, 0.0],
+        [0.2, 0.0, 1.0],
+    ]
+    scores_np = [0.7, 0.9, 0.1]
+    max_ouput_size_np = 3
+
+    overlaps = constant_op.constant(overlaps_np)
+    scores = constant_op.constant(scores_np)
+    max_output_size = constant_op.constant(max_ouput_size_np)
+    overlap_threshold = 0.6
+    score_threshold = 0.4
+
+    selected_indices = image_ops.non_max_suppression_with_overlaps(
+        overlaps, scores, max_output_size, overlap_threshold, score_threshold)
+
+    with self.cached_session():
+      self.assertAllClose(selected_indices.eval(), [1])
+
 
 class VerifyCompatibleImageShapesTest(test_util.TensorFlowTestCase):
   """Tests utility function used by ssim() and psnr()."""
-- 
GitLab


From 6582efdc5e40b9196664878cb20e4f6590fcd0ea Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 00:49:13 -0700
Subject: [PATCH 0953/1085] [TF CTC] Fix Returns documentation stanzas to be
 properly formatted (see
 https://www.tensorflow.org/api_docs/python/tf/nn/ctc_greedy_decoder)

PiperOrigin-RevId: 217100215
---
 tensorflow/python/ops/ctc_ops.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py
index 4b0f528dfb..e1071afd8e 100644
--- a/tensorflow/python/ops/ctc_ops.py
+++ b/tensorflow/python/ops/ctc_ops.py
@@ -212,14 +212,19 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
 
   Returns:
     A tuple `(decoded, neg_sum_logits)` where
+
     decoded: A single-element list. `decoded[0]`
       is an `SparseTensor` containing the decoded outputs s.t.:
+
       `decoded.indices`: Indices matrix `(total_decoded_outputs, 2)`.
         The rows store: `[batch, time]`.
+
       `decoded.values`: Values vector, size `(total_decoded_outputs)`.
         The vector stores the decoded classes.
+
       `decoded.dense_shape`: Shape vector, size `(2)`.
         The shape values are: `[batch_size, max_decoded_length]`
+
     neg_sum_logits: A `float` matrix `(batch_size x 1)` containing, for the
         sequence found, the negative of the sum of the greatest logit at each
         timeframe.
@@ -259,14 +264,19 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
 
   Returns:
     A tuple `(decoded, log_probabilities)` where
+
     decoded: A list of length top_paths, where `decoded[j]`
       is a `SparseTensor` containing the decoded outputs:
+
       `decoded[j].indices`: Indices matrix `(total_decoded_outputs[j] x 2)`
         The rows store: [batch, time].
+
       `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`.
         The vector stores the decoded classes for beam j.
+
       `decoded[j].dense_shape`: Shape vector, size `(2)`.
         The shape values are: `[batch_size, max_decoded_length[j]]`.
+
     log_probability: A `float` matrix `(batch_size x top_paths)` containing
         sequence log-probabilities.
   """
@@ -301,14 +311,19 @@ def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100,
 
   Returns:
     A tuple `(decoded, log_probabilities)` where
+
     decoded: A list of length top_paths, where `decoded[j]`
       is a `SparseTensor` containing the decoded outputs:
+
       `decoded[j].indices`: Indices matrix `[total_decoded_outputs[j], 2]`;
         The rows store: `[batch, time]`.
+
       `decoded[j].values`: Values vector, size `[total_decoded_outputs[j]]`.
         The vector stores the decoded classes for beam `j`.
+
       `decoded[j].dense_shape`: Shape vector, size `(2)`.
         The shape values are: `[batch_size, max_decoded_length[j]]`.
+
     log_probability: A `float` matrix `[batch_size, top_paths]` containing
         sequence log-probabilities.
   """
-- 
GitLab


From 63923f0bb1268acb88c0752f8c7f06c821c01094 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 02:02:21 -0700
Subject: [PATCH 0954/1085] compat: Update forward compatibility horizon to
 2018-10-15

PiperOrigin-RevId: 217108509
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index ffe686e962..0a1dd17b89 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 14)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 15)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 55e97b31a21804f6bc2e1f3381e25c47fca4970c Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Mon, 15 Oct 2018 08:09:07 -0700
Subject: [PATCH 0955/1085] Add fuzzer for StringSplitV2.

PiperOrigin-RevId: 217144630
---
 tensorflow/core/kernels/fuzzing/BUILD         |  2 +
 .../kernels/fuzzing/string_split_v2_fuzz.cc   | 65 +++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc

diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD
index f2e0b2558f..1a254da50e 100644
--- a/tensorflow/core/kernels/fuzzing/BUILD
+++ b/tensorflow/core/kernels/fuzzing/BUILD
@@ -25,6 +25,8 @@ tf_ops_fuzz_target_lib("string_to_number")
 
 tf_ops_fuzz_target_lib("string_split")
 
+tf_ops_fuzz_target_lib("string_split_v2")
+
 tf_ops_fuzz_target_lib("encode_base64")
 
 tf_ops_fuzz_target_lib("decode_base64")
diff --git a/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
new file mode 100644
index 0000000000..787bccc15b
--- /dev/null
+++ b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
@@ -0,0 +1,65 @@
+/* Copyright 2017 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/kernels/fuzzing/fuzz_session.h"
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzStringSplitV2 : public FuzzSession {
+  void BuildGraph(const Scope& scope) override {
+    auto input =
+        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
+    auto separator =
+        tensorflow::ops::Placeholder(scope.WithOpName("input2"), DT_STRING);
+    (void)tensorflow::ops::StringSplitV2(scope.WithOpName("output"),
+                                               input, separator);
+  }
+
+  void FuzzImpl(const uint8_t* data, size_t size) final {
+    Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
+    Tensor separator_tensor(tensorflow::DT_STRING, TensorShape({}));
+
+    if (size > 0) {
+      // The spec for split is that the separator should be 0 or 1 characters.
+      // Naturally, fuzz it with something that might be larger. But don't split
+      // on a separator that is too large. Let's say we're picking a separator
+      // of size 0, 1, 2 up to MaxSepSize (a static limit that has been picked
+      // arbitrarily).
+      size_t sep_len = static_cast<size_t>(data[0]) % kMaxSepSize;
+
+      // We still have to handle the case when fuzzing input is shorter than the
+      // minimum length required to get the separator
+      if (sep_len > size) {
+        sep_len = size - 1;
+      }
+      separator_tensor.scalar<string>()() =
+          string(reinterpret_cast<const char*>(data), sep_len);
+      input_tensor.scalar<string>()() = string(
+          reinterpret_cast<const char*>(data + sep_len), size - sep_len);
+    }
+
+    RunTwoInputs(input_tensor, separator_tensor).IgnoreError();
+  }
+
+ private:
+  static const size_t kMaxSepSize = 4;
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzStringSplitV2);
+
+}  // end namespace fuzzing
+}  // end namespace tensorflow
-- 
GitLab


From 61081e22ca0275ee54269fa349be8645f189f621 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 08:18:55 -0700
Subject: [PATCH 0956/1085] Avoid depending on init_tensorflow in
 benchmark_main.

PiperOrigin-RevId: 217145701
---
 tensorflow/contrib/lite/tools/benchmark/BUILD |  2 +-
 .../lite/tools/benchmark/benchmark_main.cc    |  2 -
 .../benchmark/benchmark_plus_flex_main.cc     | 39 +++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/contrib/lite/tools/benchmark/benchmark_plus_flex_main.cc

diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD
index af54421155..166eaeea75 100644
--- a/tensorflow/contrib/lite/tools/benchmark/BUILD
+++ b/tensorflow/contrib/lite/tools/benchmark/BUILD
@@ -39,7 +39,7 @@ cc_binary(
 cc_binary(
     name = "benchmark_model_plus_flex",
     srcs = [
-        "benchmark_main.cc",
+        "benchmark_plus_flex_main.cc",
     ],
     copts = common_copts,
     linkopts = tflite_linkopts() + select({
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
index b9cf6c67d2..372d31e838 100644
--- a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
 #include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h"
 #include "tensorflow/contrib/lite/tools/benchmark/logging.h"
 
@@ -21,7 +20,6 @@ namespace tflite {
 namespace benchmark {
 
 int Main(int argc, char** argv) {
-  ::tflite::InitTensorFlow();
 #ifdef TFLITE_CUSTOM_OPS_HEADER
   TFLITE_LOG(INFO) << "STARTING with custom ops!";
 #else
diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_plus_flex_main.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_plus_flex_main.cc
new file mode 100644
index 0000000000..b9cf6c67d2
--- /dev/null
+++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_plus_flex_main.cc
@@ -0,0 +1,39 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/testing/init_tensorflow.h"
+#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h"
+#include "tensorflow/contrib/lite/tools/benchmark/logging.h"
+
+namespace tflite {
+namespace benchmark {
+
+int Main(int argc, char** argv) {
+  ::tflite::InitTensorFlow();
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+  TFLITE_LOG(INFO) << "STARTING with custom ops!";
+#else
+  TFLITE_LOG(INFO) << "STARTING!";
+#endif
+  BenchmarkTfLiteModel benchmark;
+  BenchmarkLoggingListener listener;
+  benchmark.AddListener(&listener);
+  benchmark.Run(argc, argv);
+  return 0;
+}
+}  // namespace benchmark
+}  // namespace tflite
+
+int main(int argc, char** argv) { return tflite::benchmark::Main(argc, argv); }
-- 
GitLab


From c0ebb859f4a47f16917e713396cd77826bfde307 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 08:29:58 -0700
Subject: [PATCH 0957/1085] Add arbitrary axis support to ArgMax

PiperOrigin-RevId: 217147034
---
 .../contrib/lite/kernels/arg_min_max.cc       | 31 ++++++++---
 .../contrib/lite/kernels/arg_min_max_test.cc  | 30 ++++++++---
 .../internal/reference/reference_ops.h        | 52 ++++++++++++-------
 .../contrib/lite/testing/generate_examples.py |  6 +--
 .../testing/generated_examples_zip_test.cc    |  8 ---
 5 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/arg_min_max.cc b/tensorflow/contrib/lite/kernels/arg_min_max.cc
index b91e348c27..531f4e1f1b 100644
--- a/tensorflow/contrib/lite/kernels/arg_min_max.cc
+++ b/tensorflow/contrib/lite/kernels/arg_min_max.cc
@@ -29,6 +29,19 @@ constexpr int kInputTensor = 0;
 constexpr int kAxis = 1;
 constexpr int kOutputTensor = 0;
 
+TfLiteStatus ResizeOutput(TfLiteContext* context, const TfLiteTensor* input,
+                          const TfLiteTensor* axis, TfLiteTensor* output) {
+  int axis_value = *GetTensorData<int>(axis);
+  if (axis_value < 0) {
+    axis_value += NumDimensions(input);
+  }
+
+  // Copy the input dimensions to output except make the axis dimension 1.
+  TfLiteIntArray* output_dims = TfLiteIntArrayCopy(input->dims);
+  output_dims->data[axis_value] = 1;
+  return context->ResizeTensor(context, output, output_dims);
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -37,10 +50,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* axis = GetInput(context, node, kAxis);
   // Make sure the axis is only 1 dimension.
   TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
-
   // Make sure the axis is only either int32 or int64.
   TF_LITE_ENSURE(context,
                  axis->type == kTfLiteInt32 || axis->type == kTfLiteInt64);
+
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
   auto* params = reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
@@ -72,12 +85,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteError;
   }
 
-  // Copy the input dimensions to output except make the last dimension 1.
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
-  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims);
-  output_size->data[NumDimensions(input) - 1] = 1;
 
-  return context->ResizeTensor(context, output, output_size);
+  if (IsConstantTensor(axis)) {
+    TF_LITE_ENSURE_STATUS(ResizeOutput(context, input, axis, output));
+  } else {
+    SetTensorToDynamic(output);
+  }
+
+  return kTfLiteOk;
 }
 
 template <typename T>
@@ -89,12 +105,13 @@ std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
   }
 }
 
-// The current impl actually ignores the axis argument.
-// Only determine the index of the maximum value in the last dimension.
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* axis = GetInput(context, node, kAxis);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  if (IsDynamicTensor(output)) {
+    TF_LITE_ENSURE_STATUS(ResizeOutput(context, input, axis, output));
+  }
 
 #define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
   optimized_ops::ArgMinMax(                                    \
diff --git a/tensorflow/contrib/lite/kernels/arg_min_max_test.cc b/tensorflow/contrib/lite/kernels/arg_min_max_test.cc
index 90e5fdc532..c8181efc36 100644
--- a/tensorflow/contrib/lite/kernels/arg_min_max_test.cc
+++ b/tensorflow/contrib/lite/kernels/arg_min_max_test.cc
@@ -79,7 +79,6 @@ TEST(ArgMaxOpTest, GetMaxArgFloat) {
   ArgMaxOpModel<int32_t> model({1, 1, 1, 4}, TensorType_FLOAT32,
                                TensorType_INT32, TensorType_INT32);
   model.PopulateTensor<float>(model.input(), {0.1, 0.9, 0.7, 0.3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -91,7 +90,6 @@ TEST(ArgMaxOpTest, GetMaxArgInt) {
   ArgMaxOpModel<int32_t> model({1, 1, 1, 4}, TensorType_INT32, TensorType_INT32,
                                TensorType_INT32);
   model.PopulateTensor<int>(model.input(), {1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -103,7 +101,6 @@ TEST(ArgMaxOpTest, GetMaxArgMulDimensions) {
   ArgMaxOpModel<int32_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT32,
                                TensorType_INT32);
   model.PopulateTensor<int>(model.input(), {1, 2, 7, 8, 1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -111,11 +108,21 @@ TEST(ArgMaxOpTest, GetMaxArgMulDimensions) {
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 2, 1}));
 }
 
+TEST(ArgMaxOpTest, GetMaxArgNegativeAxis) {
+  ArgMaxOpModel<int32_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT32,
+                               TensorType_INT32);
+  model.PopulateTensor<int>(model.input(), {1, 2, 7, 8, 1, 9, 7, 3});
+  model.PopulateTensor<int>(model.axis(), {-2});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({0, 1, 0, 0}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
+}
+
 TEST(ArgMaxOpTest, GetMaxArgOutput64) {
   ArgMaxOpModel<int64_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT64,
                                TensorType_INT64);
   model.PopulateTensor<int>(model.input(), {10, 2, 7, 8, 1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -127,7 +134,6 @@ TEST(ArgMinOpTest, GetMinArgFloat) {
   ArgMinOpModel<int32_t> model({1, 1, 1, 4}, TensorType_FLOAT32,
                                TensorType_INT32, TensorType_INT32);
   model.PopulateTensor<float>(model.input(), {0.1, 0.9, 0.7, 0.3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -139,7 +145,6 @@ TEST(ArgMinOpTest, GetMinArgInt) {
   ArgMinOpModel<int32_t> model({1, 1, 1, 4}, TensorType_INT32, TensorType_INT32,
                                TensorType_INT32);
   model.PopulateTensor<int>(model.input(), {1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -151,7 +156,6 @@ TEST(ArgMinOpTest, GetMinArgMulDimensions) {
   ArgMinOpModel<int32_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT32,
                                TensorType_INT32);
   model.PopulateTensor<int>(model.input(), {1, 2, 7, 8, 1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
@@ -159,11 +163,21 @@ TEST(ArgMinOpTest, GetMinArgMulDimensions) {
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 2, 1}));
 }
 
+TEST(ArgMinOpTest, GetMinArgNegativeAxis) {
+  ArgMinOpModel<int32_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT32,
+                               TensorType_INT32);
+  model.PopulateTensor<int>(model.input(), {1, 2, 7, 8, 1, 9, 7, 3});
+  model.PopulateTensor<int>(model.axis(), {-2});
+  model.Invoke();
+
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({0, 0, 0, 1}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4}));
+}
+
 TEST(ArgMinOpTest, GetMinArgOutput64) {
   ArgMinOpModel<int64_t> model({1, 1, 2, 4}, TensorType_INT32, TensorType_INT64,
                                TensorType_INT64);
   model.PopulateTensor<int>(model.input(), {10, 2, 7, 8, 1, 9, 7, 3});
-  // Currently only support the last dimension.
   model.PopulateTensor<int>(model.axis(), {3});
   model.Invoke();
 
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 19d23fa80b..5d97a2a336 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -3727,32 +3727,48 @@ template <typename T1, typename T2, typename T3, typename Cmp>
 void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
                const T3* input2_data, const RuntimeShape& output_shape,
                T2* output_data, const Cmp& cmp) {
-  // The current ArgMax implemention can only determine the index of the maximum
-  // value in the last dimension. So the axis argument is ignored.
-
   // For ArgMax, the number of output dimensions = (number of input dimensions -
   // 1). For the sake of simplicity, the output dimensions are equal to the
-  // input dimensions here. We enforce the constraint that the last dimension
+  // input dimensions here. We enforce the constraint that the axis dimension
   // must always be 1.
-  const int trailing_dim = output_shape.DimensionsCount() - 1;
   TFLITE_DCHECK_EQ(input1_shape.DimensionsCount(),
                    output_shape.DimensionsCount());
-  TFLITE_DCHECK_EQ(output_shape.Dims(trailing_dim), 1);
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input1_shape, trailing_dim, output_shape);
-  const int depth = input1_shape.Dims(trailing_dim);
 
-  for (int i = 0; i < outer_size; ++i) {
-    auto min_max_value = input1_data[i * depth];
-    int min_max_index = 0;
-    for (int d = 1; d < depth; ++d) {
-      const auto& curr_value = input1_data[i * depth + d];
-      if (cmp(curr_value, min_max_value)) {
-        min_max_value = curr_value;
-        min_max_index = d;
+  int axis = input2_data[0];
+  if (axis < 0) {
+    axis += input1_shape.DimensionsCount();
+  }
+
+  const int axis_size = input1_shape.Dims(axis);
+  TFLITE_DCHECK_EQ(output_shape.Dims(axis), 1);
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
+    outer_size *= input1_shape.Dims(i);
+  }
+
+  int inner_size = 1;
+  const int dims_count = input1_shape.DimensionsCount();
+  for (int i = axis + 1; i < dims_count; ++i) {
+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
+    inner_size *= input1_shape.Dims(i);
+  }
+
+  for (int outer = 0; outer < outer_size; ++outer) {
+    for (int inner = 0; inner < inner_size; ++inner) {
+      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
+      int min_max_index = 0;
+      for (int i = 1; i < axis_size; ++i) {
+        const auto& curr_value =
+            input1_data[(outer * axis_size + i) * inner_size + inner];
+        if (cmp(curr_value, min_max_value)) {
+          min_max_value = curr_value;
+          min_max_index = i;
+        }
       }
+      output_data[outer * inner_size + inner] = min_max_index;
     }
-    output_data[i] = min_max_index;
   }
 }
 
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 3f2255c454..6227215b35 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -2552,7 +2552,6 @@ def make_arg_min_max_tests(zip_path):
       "input_dtype": [tf.float32, tf.int32],
       "input_shape": [[], [1, 1, 1, 3], [2, 3, 4, 5], [2, 3, 3], [5, 5], [10]],
       "output_type": [tf.int32, tf.int64],
-      "axis_is_last_dim": [True, False],
       "is_arg_max": [True],
   }]
 
@@ -2562,10 +2561,7 @@ def make_arg_min_max_tests(zip_path):
         dtype=parameters["input_dtype"],
         name="input",
         shape=parameters["input_shape"])
-    if parameters["axis_is_last_dim"]:
-      axis = len(parameters["input_shape"]) - 1
-    else:
-      axis = random.randint(0, max(len(parameters["input_shape"]) - 2, 0))
+    axis = random.randint(0, max(len(parameters["input_shape"]) - 1, 0))
     if parameters["is_arg_max"]:
       out = tf.arg_max(input_value, axis, output_type=parameters["output_type"])
     else:
diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
index 349aa5a3b4..42c7aaa1c9 100644
--- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
+++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc
@@ -88,14 +88,6 @@ std::map<string, string> kBrokenTests = {
     // No support for axis!=0 in GatherV2.
     {R"(^\/gather.*axis=1)", "76910444"},
 
-    // No support for arbitrary dimensions in ArgMax.
-    {R"(^\/arg_min_max.*axis_is_last_dim=False.*input_shape=\[.,.,.,.\])",
-     "77546240"},
-    {R"(^\/arg_min_max.*axis_is_last_dim=False.*input_shape=\[.,.,.\])",
-     "77546240"},
-    {R"(^\/arg_min_max.*axis_is_last_dim=False.*input_shape=\[.,.\])",
-     "77546240"},
-
     // No Support for float.
     {R"(^\/floor_div.*dtype=tf\.float32)", "112859002"},
 
-- 
GitLab


From a7d26471d32f983aa4dacaef8d6362fa8fed07fe Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 15 Oct 2018 08:34:35 -0700
Subject: [PATCH 0958/1085] New toolchains for remote build execution on
 windows.

PiperOrigin-RevId: 217147663
---
 .../toolchains/preconfig/win_1803/BUILD       |   26 +
 .../preconfig/win_1803/bazel_6f8e36b/BUILD    |  155 +++
 .../win_1803/bazel_6f8e36b/CROSSTOOL          | 1212 +++++++++++++++++
 .../bazel_6f8e36b/dummy_toolchain.bzl         |   23 +
 .../toolchains/preconfig/win_1803/py36/BUILD  |  191 +++
 5 files changed, 1607 insertions(+)
 create mode 100644 third_party/toolchains/preconfig/win_1803/BUILD
 create mode 100644 third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD
 create mode 100644 third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL
 create mode 100644 third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl
 create mode 100644 third_party/toolchains/preconfig/win_1803/py36/BUILD

diff --git a/third_party/toolchains/preconfig/win_1803/BUILD b/third_party/toolchains/preconfig/win_1803/BUILD
new file mode 100644
index 0000000000..6b798dfc3a
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/BUILD
@@ -0,0 +1,26 @@
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+java_runtime(
+    name = "windows_jdk8",
+    srcs = [],
+    java_home = "C:/openjdk",
+)
+
+platform(
+    name = "rbe_windows_1803",
+    constraint_values = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+    ],
+    remote_execution_properties = """
+        properties:{
+          name:"container-image"
+          value:"docker://gcr.io/tensorflow-testing/tf-rbe-win@sha256:b2eeb661e0134ef96a4736677e8f96a90970bc206dea93739cd711031b62a0e5"
+        }
+        properties:{
+          name: "OSFamily" value: "Windows"
+        }
+        """,
+)
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD
new file mode 100644
index 0000000000..c00f005e46
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD
@@ -0,0 +1,155 @@
+# Copyright 2018 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This becomes the BUILD file for @local_config_cc// under Windows.
+
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "malloc",
+)
+
+cc_library(
+    name = "stl",
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
+
+# Hardcoded toolchain, legacy behaviour.
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "armeabi-v7a|compiler": ":cc-compiler-armeabi-v7a",
+        "x64_windows|msvc-cl": ":cc-compiler-x64_windows",
+        "x64_windows|msys-gcc": ":cc-compiler-x64_windows_msys",
+        "x64_windows|mingw-gcc": ":cc-compiler-x64_windows_mingw",
+    },
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_msys",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "local",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_msys",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+        "@bazel_tools//tools/cpp:msys",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_msys",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows_mingw",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "x64_windows",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 0,
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows_mingw",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+        "@bazel_tools//tools/cpp:mingw",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows_mingw",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-x64_windows",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "x64_windows",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+toolchain(
+    name = "cc-toolchain-x64_windows",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:windows",
+    ],
+    toolchain = ":cc-compiler-x64_windows",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain(
+    name = "cc-compiler-armeabi-v7a",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "local",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 1,
+)
+
+toolchain(
+    name = "cc-toolchain-armeabi-v7a",
+    exec_compatible_with = [
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:arm",
+        "@bazel_tools//platforms:android",
+    ],
+    toolchain = ":cc-compiler-armeabi-v7a",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL
new file mode 100644
index 0000000000..04c8bcae45
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL
@@ -0,0 +1,1212 @@
+# Copyright 2016 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+  cpu: "x64_windows"
+  toolchain_identifier: "msvc_x64"
+}
+
+default_toolchain {
+  cpu: "local"
+  toolchain_identifier: "stub_armeabi-v7a"
+}
+
+default_toolchain {
+  cpu: "armeabi-v7a"
+  toolchain_identifier: "stub_armeabi-v7a"
+}
+
+default_toolchain {
+  cpu: "x64_windows"
+  toolchain_identifier: "msvc_x64"
+}
+
+default_toolchain {
+  cpu: "x64_windows_msvc"
+  toolchain_identifier: "msvc_x64"
+}
+
+default_toolchain {
+  cpu: "x64_windows_msys"
+  toolchain_identifier: "msys_x64"
+}
+
+default_toolchain {
+  cpu: "s390x"
+  toolchain_identifier: "msys_x64"
+}
+
+# Android tooling requires a default toolchain for the armeabi-v7a cpu.
+toolchain {
+  abi_version: "armeabi-v7a"
+  abi_libc_version: "armeabi-v7a"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "armeabi-v7a"
+  needsPic: true
+  supports_gold_linker: false
+  supports_incremental_linker: false
+  supports_fission: false
+  supports_interface_shared_objects: false
+  supports_normalizing_ar: false
+  supports_start_end_lib: false
+  target_libc: "armeabi-v7a"
+  target_cpu: "armeabi-v7a"
+  target_system_name: "armeabi-v7a"
+  toolchain_identifier: "stub_armeabi-v7a"
+
+  tool_path { name: "ar" path: "/bin/false" }
+  tool_path { name: "compat-ld" path: "/bin/false" }
+  tool_path { name: "cpp" path: "/bin/false" }
+  tool_path { name: "dwp" path: "/bin/false" }
+  tool_path { name: "gcc" path: "/bin/false" }
+  tool_path { name: "gcov" path: "/bin/false" }
+  tool_path { name: "ld" path: "/bin/false" }
+
+  tool_path { name: "nm" path: "/bin/false" }
+  tool_path { name: "objcopy" path: "/bin/false" }
+  tool_path { name: "objdump" path: "/bin/false" }
+  tool_path { name: "strip" path: "/bin/false" }
+  linking_mode_flags { mode: DYNAMIC }
+}
+
+toolchain {
+  toolchain_identifier: "msys_x64"
+   abi_version: "local"
+   abi_libc_version: "local"
+   builtin_sysroot: ""
+   compiler: "msys-gcc"
+   host_system_name: "local"
+   needsPic: false
+   target_libc: "msys"
+   target_cpu: "x64_windows"
+   target_system_name: "local"
+   tool_path { name: "ar" path: "c:/tools/msys64/usr/bin/ar" }
+   tool_path { name: "compat-ld" path: "c:/tools/msys64/usr/bin/ld" }
+   tool_path { name: "cpp" path: "c:/tools/msys64/usr/bin/cpp" }
+   tool_path { name: "dwp" path: "c:/tools/msys64/usr/bin/dwp" }
+   tool_path { name: "gcc" path: "c:/tools/msys64/usr/bin/gcc" }
+   artifact_name_pattern { category_name: "executable" prefix: "" extension: ".exe"}
+   cxx_flag: "-std=gnu++0x"
+   linker_flag: "-lstdc++"
+   cxx_builtin_include_directory: "c:/tools/msys64/usr/"
+   tool_path { name: "gcov" path: "c:/tools/msys64/usr/bin/gcov" }
+   tool_path { name: "ld" path: "c:/tools/msys64/usr/bin/ld" }
+   tool_path { name: "nm" path: "c:/tools/msys64/usr/bin/nm" }
+   tool_path { name: "objcopy" path: "c:/tools/msys64/usr/bin/objcopy" }
+   objcopy_embed_flag: "-I"
+   objcopy_embed_flag: "binary"
+   tool_path { name: "objdump" path: "c:/tools/msys64/usr/bin/objdump" }
+   tool_path { name: "strip" path: "c:/tools/msys64/usr/bin/strip" }   feature { name: "targets_windows" implies: "copy_dynamic_libraries_to_binary" enabled: true }   feature { name: "copy_dynamic_libraries_to_binary" }
+
+  compilation_mode_flags {
+    mode: DBG
+
+  }
+  compilation_mode_flags {
+    mode: OPT
+
+  }
+  linking_mode_flags { mode: DYNAMIC }
+
+
+
+  feature {
+    name: 'fdo_optimize'
+    provides: 'profile'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      expand_if_all_available: 'fdo_profile_path'
+      flag_group {
+        flag: '-fprofile-use=%{fdo_profile_path}'
+        flag: '-fprofile-correction',
+      }
+    }
+  }
+}
+
+toolchain {
+  toolchain_identifier: "msys_x64_mingw"
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "mingw-gcc"
+  host_system_name: "local"
+  needsPic: false
+  target_libc: "mingw"
+  target_cpu: "x64_windows"
+  target_system_name: "local"
+
+  artifact_name_pattern {
+     category_name: 'executable'
+     prefix: ''
+     extension: '.exe'
+  }
+
+   tool_path { name: "ar" path: "c:/tools/msys64/mingw64/bin/ar" }
+   tool_path { name: "compat-ld" path: "c:/tools/msys64/mingw64/bin/ld" }
+   tool_path { name: "cpp" path: "c:/tools/msys64/mingw64/bin/cpp" }
+   tool_path { name: "dwp" path: "c:/tools/msys64/mingw64/bin/dwp" }
+   tool_path { name: "gcc" path: "c:/tools/msys64/mingw64/bin/gcc" }
+   artifact_name_pattern { category_name: "executable" prefix: "" extension: ".exe"}
+   cxx_flag: "-std=gnu++0x"
+   linker_flag: "-lstdc++"
+   cxx_builtin_include_directory: "c:/tools/msys64/mingw64/"
+   tool_path { name: "gcov" path: "c:/tools/msys64/mingw64/bin/gcov" }
+   tool_path { name: "ld" path: "c:/tools/msys64/mingw64/bin/ld" }
+   tool_path { name: "nm" path: "c:/tools/msys64/mingw64/bin/nm" }
+   tool_path { name: "objcopy" path: "c:/tools/msys64/mingw64/bin/objcopy" }
+   objcopy_embed_flag: "-I"
+   objcopy_embed_flag: "binary"
+   tool_path { name: "objdump" path: "c:/tools/msys64/mingw64/bin/objdump" }
+   tool_path { name: "strip" path: "c:/tools/msys64/mingw64/bin/strip" }   feature { name: "targets_windows" implies: "copy_dynamic_libraries_to_binary" enabled: true }   feature { name: "copy_dynamic_libraries_to_binary" }
+
+  linking_mode_flags { mode: DYNAMIC }
+}
+
+toolchain {
+  toolchain_identifier: "msvc_x64"
+  # This is a workaround for https://github.com/bazelbuild/bazel/issues/5087.
+  cxx_builtin_include_directory: "C:\\botcode\\w"
+  host_system_name: "local"
+  target_system_name: "local"
+
+  abi_version: "local"
+  abi_libc_version: "local"
+  target_cpu: "x64_windows"
+  compiler: "msvc-cl"
+  target_libc: "msvcrt"
+  default_python_version: "python2.7"
+
+cxx_builtin_include_directory: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\INCLUDE"
+cxx_builtin_include_directory: "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.10240.0\\ucrt"
+cxx_builtin_include_directory: "C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\shared"
+cxx_builtin_include_directory: "C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\um"
+cxx_builtin_include_directory: "C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\winrt"
+
+  tool_path {
+    name: "ar"
+    path: "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/lib.exe"
+  }
+  tool_path {
+    name: "ml"
+    path: "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/ml64.exe"
+  }
+  tool_path {
+    name: "cpp"
+    path: "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/cl.exe"
+  }
+  tool_path {
+    name: "gcc"
+    path: "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/cl.exe"
+  }
+  tool_path {
+    name: "gcov"
+    path: "wrapper/bin/msvc_nop.bat"
+  }
+  tool_path {
+    name: "ld"
+    path: "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/link.exe"
+  }
+  tool_path {
+    name: "nm"
+    path: "wrapper/bin/msvc_nop.bat"
+  }
+  tool_path {
+    name: "objcopy"
+    path: "wrapper/bin/msvc_nop.bat"
+  }
+  tool_path {
+    name: "objdump"
+    path: "wrapper/bin/msvc_nop.bat"
+  }
+  tool_path {
+    name: "strip"
+    path: "wrapper/bin/msvc_nop.bat"
+  }
+  supports_gold_linker: false
+  supports_start_end_lib: false
+  supports_interface_shared_objects: true
+  supports_incremental_linker: false
+  supports_normalizing_ar: true
+  needsPic: false
+
+  # TODO(pcloudy): Review those flags below, they should be defined by cl.exe
+  compiler_flag: "/DCOMPILER_MSVC"
+
+  # Don't define min/max macros in windows.h.
+  compiler_flag: "/DNOMINMAX"
+
+  # Platform defines.
+  compiler_flag: "/D_WIN32_WINNT=0x0600"
+  # Turn off warning messages.
+  compiler_flag: "/D_CRT_SECURE_NO_DEPRECATE"
+  compiler_flag: "/D_CRT_SECURE_NO_WARNINGS"
+
+  # Useful options to have on for compilation.
+  # Increase the capacity of object files to 2^32 sections.
+  compiler_flag: "/bigobj"
+  # Allocate 500MB for precomputed headers.
+  compiler_flag: "/Zm500"
+  # Catch C++ exceptions only and tell the compiler to assume that functions declared
+  # as extern "C" never throw a C++ exception.
+  compiler_flag: "/EHsc"
+
+  # Globally disabled warnings.
+  # Don't warn about elements of array being be default initialized.
+  compiler_flag: "/wd4351"
+  # Don't warn about no matching delete found.
+  compiler_flag: "/wd4291"
+  # Don't warn about diamond inheritance patterns.
+  compiler_flag: "/wd4250"
+  # Don't warn about insecure functions (e.g. non _s functions).
+  compiler_flag: "/wd4996"
+
+  linker_flag: "/MACHINE:X64"
+
+  feature {
+    name: "no_legacy_features"
+  }
+
+  artifact_name_pattern {
+     category_name: 'object_file'
+     prefix: ''
+     extension: '.obj'
+  }
+
+  artifact_name_pattern {
+     category_name: 'static_library'
+     prefix: ''
+     extension: '.lib'
+  }
+
+  artifact_name_pattern {
+     category_name: 'alwayslink_static_library'
+     prefix: ''
+     extension: '.lo.lib'
+  }
+
+  artifact_name_pattern {
+     category_name: 'executable'
+     prefix: ''
+     extension: '.exe'
+  }
+
+  artifact_name_pattern {
+     category_name: 'dynamic_library'
+     prefix: ''
+     extension: '.dll'
+  }
+
+  artifact_name_pattern {
+     category_name: 'interface_library'
+     prefix: ''
+     extension: '.if.lib'
+  }
+
+  # Suppress startup banner.
+  feature {
+    name: "nologo"
+    flag_set {
+      action: "c-compile"
+      action: "c++-compile"
+      action: "c++-module-compile"
+      action: "c++-module-codegen"
+      action: "c++-header-parsing"
+      action: "assemble"
+      action: "preprocess-assemble"
+      action: "c++-link-executable"
+      action: "c++-link-dynamic-library"
+      action: "c++-link-nodeps-dynamic-library"
+      action: "c++-link-static-library"
+      flag_group {
+        flag: "/nologo"
+      }
+    }
+  }
+
+  feature {
+    name: 'has_configured_linker_path'
+  }
+
+  # This feature indicates strip is not supported, building stripped binary will just result a copy of orignial binary
+  feature {
+    name: 'no_stripping'
+  }
+
+  # This feature indicates this is a toolchain targeting Windows.
+  feature {
+    name: 'targets_windows'
+    implies: 'copy_dynamic_libraries_to_binary'
+    enabled: true
+  }
+
+  feature {
+    name: 'copy_dynamic_libraries_to_binary'
+  }
+
+  action_config {
+    config_name: 'assemble'
+    action_name: 'assemble'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/ml64.exe'
+    }
+    implies: 'compiler_input_flags'
+    implies: 'compiler_output_flags'
+    implies: 'nologo'
+    implies: 'msvc_env'
+    implies: 'sysroot'
+  }
+
+  action_config {
+    config_name: 'preprocess-assemble'
+    action_name: 'preprocess-assemble'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/ml64.exe'
+    }
+    implies: 'compiler_input_flags'
+    implies: 'compiler_output_flags'
+    implies: 'nologo'
+    implies: 'msvc_env'
+    implies: 'sysroot'
+  }
+
+  action_config {
+    config_name: 'c-compile'
+    action_name: 'c-compile'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/cl.exe'
+    }
+    implies: 'compiler_input_flags'
+    implies: 'compiler_output_flags'
+    implies: 'legacy_compile_flags'
+    implies: 'nologo'
+    implies: 'msvc_env'
+    implies: 'parse_showincludes'
+    implies: 'user_compile_flags'
+    implies: 'sysroot'
+    implies: 'unfiltered_compile_flags'
+  }
+
+  action_config {
+    config_name: 'c++-compile'
+    action_name: 'c++-compile'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/cl.exe'
+    }
+    implies: 'compiler_input_flags'
+    implies: 'compiler_output_flags'
+    implies: 'legacy_compile_flags'
+    implies: 'nologo'
+    implies: 'msvc_env'
+    implies: 'parse_showincludes'
+    implies: 'user_compile_flags'
+    implies: 'sysroot'
+    implies: 'unfiltered_compile_flags'
+  }
+
+  action_config {
+    config_name: 'c++-link-executable'
+    action_name: 'c++-link-executable'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/link.exe'
+    }
+    implies: 'nologo'
+    implies: 'linkstamps'
+    implies: 'output_execpath_flags'
+    implies: 'input_param_flags'
+    implies: 'user_link_flags'
+    implies: 'legacy_link_flags'
+    implies: 'linker_subsystem_flag'
+    implies: 'linker_param_file'
+    implies: 'msvc_env'
+    implies: 'no_stripping'
+  }
+
+  action_config {
+    config_name: 'c++-link-dynamic-library'
+    action_name: 'c++-link-dynamic-library'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/link.exe'
+    }
+    implies: 'nologo'
+    implies: 'shared_flag'
+    implies: 'linkstamps'
+    implies: 'output_execpath_flags'
+    implies: 'input_param_flags'
+    implies: 'user_link_flags'
+    implies: 'legacy_link_flags'
+    implies: 'linker_subsystem_flag'
+    implies: 'linker_param_file'
+    implies: 'msvc_env'
+    implies: 'no_stripping'
+    implies: 'has_configured_linker_path'
+    implies: 'def_file'
+  }
+
+  action_config {
+      config_name: 'c++-link-nodeps-dynamic-library'
+      action_name: 'c++-link-nodeps-dynamic-library'
+      tool {
+        tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/link.exe'
+      }
+      implies: 'nologo'
+      implies: 'shared_flag'
+      implies: 'linkstamps'
+      implies: 'output_execpath_flags'
+      implies: 'input_param_flags'
+      implies: 'user_link_flags'
+      implies: 'legacy_link_flags'
+      implies: 'linker_subsystem_flag'
+      implies: 'linker_param_file'
+      implies: 'msvc_env'
+      implies: 'no_stripping'
+      implies: 'has_configured_linker_path'
+      implies: 'def_file'
+    }
+
+  action_config {
+    config_name: 'c++-link-static-library'
+    action_name: 'c++-link-static-library'
+    tool {
+      tool_path: 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64/lib.exe'
+    }
+    implies: 'nologo'
+    implies: 'archiver_flags'
+    implies: 'input_param_flags'
+    implies: 'linker_param_file'
+    implies: 'msvc_env'
+  }
+
+  # TODO(b/65151735): Remove legacy_compile_flags feature when legacy fields are
+  # not used in this crosstool
+  feature {
+    name: 'legacy_compile_flags'
+    flag_set {
+      expand_if_all_available: 'legacy_compile_flags'
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      flag_group {
+        iterate_over: 'legacy_compile_flags'
+        flag: '%{legacy_compile_flags}'
+      }
+    }
+  }
+
+  feature {
+    name: "msvc_env"
+    env_set {
+      action: "c-compile"
+      action: "c++-compile"
+      action: "c++-module-compile"
+      action: "c++-module-codegen"
+      action: "c++-header-parsing"
+      action: "assemble"
+      action: "preprocess-assemble"
+      action: "c++-link-executable"
+      action: "c++-link-dynamic-library"
+      action: "c++-link-nodeps-dynamic-library"
+      action: "c++-link-static-library"
+      env_entry {
+        key: "PATH"
+        value: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\BIN\\amd64;C:\\Windows\\Microsoft.NET\\Framework64\\v4.0.30319;C:\\Windows\\Microsoft.NET\\Framework64\\;C:\\Program Files (x86)\\Windows Kits\\8.1\\bin\\x64;C:\\Program Files (x86)\\Windows Kits\\8.1\\bin\\x86;;C:\\Windows\\system32"
+      }
+      env_entry {
+        key: "TMP"
+        value: "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp"
+      }
+      env_entry {
+        key: "TEMP"
+        value: "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp"
+      }
+    }
+    implies: 'msvc_compile_env'
+    implies: 'msvc_link_env'
+  }
+
+  feature {
+    name: "msvc_compile_env"
+    env_set {
+      action: "c-compile"
+      action: "c++-compile"
+      action: "c++-module-compile"
+      action: "c++-module-codegen"
+      action: "c++-header-parsing"
+      action: "assemble"
+      action: "preprocess-assemble"
+      env_entry {
+        key: "INCLUDE"
+        value: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\INCLUDE;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.10240.0\\ucrt;C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\shared;C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\um;C:\\Program Files (x86)\\Windows Kits\\8.1\\include\\winrt;"
+      }
+    }
+  }
+
+  feature {
+    name: "msvc_link_env"
+    env_set {
+      action: "c++-link-executable"
+      action: "c++-link-dynamic-library"
+      action: "c++-link-nodeps-dynamic-library"
+      action: "c++-link-static-library"
+      env_entry {
+        key: "LIB"
+        value: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\LIB\\amd64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.10240.0\\ucrt\\x64;C:\\Program Files (x86)\\Windows Kits\\8.1\\lib\\winv6.3\\um\\x64;"
+      }
+    }
+  }
+
+  feature {
+    name: 'include_paths'
+    flag_set {
+      action: "assemble"
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      flag_group {
+        iterate_over: 'quote_include_paths'
+        flag: '/I%{quote_include_paths}'
+      }
+      flag_group {
+        iterate_over: 'include_paths'
+        flag: '/I%{include_paths}'
+      }
+      flag_group {
+        iterate_over: 'system_include_paths'
+        flag: '/I%{system_include_paths}'
+      }
+    }
+  }
+
+  feature {
+    name: "preprocessor_defines"
+    flag_set {
+      action: "assemble"
+      action: "preprocess-assemble"
+      action: "c-compile"
+      action: "c++-compile"
+      action: "c++-header-parsing"
+      action: "c++-module-compile"
+      flag_group {
+        flag: "/D%{preprocessor_defines}"
+        iterate_over: "preprocessor_defines"
+      }
+    }
+  }
+
+  # Tell Bazel to parse the output of /showIncludes
+  feature {
+    name: 'parse_showincludes'
+    flag_set {
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-module-compile'
+      action: 'c++-header-parsing'
+      flag_group {
+        flag: "/showIncludes"
+      }
+    }
+  }
+
+
+  feature {
+    name: 'generate_pdb_file'
+    requires: {
+      feature: 'dbg'
+    }
+    requires: {
+      feature: 'fastbuild'
+    }
+  }
+
+  feature {
+    name: 'shared_flag'
+    flag_set {
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: '/DLL'
+      }
+    }
+  }
+
+  feature {
+    name: 'linkstamps'
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      expand_if_all_available: 'linkstamp_paths'
+      flag_group {
+        iterate_over: 'linkstamp_paths'
+        flag: '%{linkstamp_paths}'
+      }
+    }
+  }
+
+  feature {
+    name: 'output_execpath_flags'
+    flag_set {
+      expand_if_all_available: 'output_execpath'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: '/OUT:%{output_execpath}'
+      }
+    }
+  }
+
+  feature {
+    name: 'archiver_flags'
+    flag_set {
+      expand_if_all_available: 'output_execpath'
+      action: 'c++-link-static-library'
+      flag_group {
+        flag: '/OUT:%{output_execpath}'
+      }
+    }
+  }
+
+  feature {
+    name: 'input_param_flags'
+    flag_set {
+      expand_if_all_available: 'interface_library_output_path'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/IMPLIB:%{interface_library_output_path}"
+      }
+    }
+    flag_set {
+      expand_if_all_available: 'libopts'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        iterate_over: 'libopts'
+        flag: '%{libopts}'
+      }
+    }
+    flag_set {
+      expand_if_all_available: 'libraries_to_link'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      action: 'c++-link-static-library'
+      flag_group {
+        iterate_over: 'libraries_to_link'
+        flag_group {
+          expand_if_equal: {
+            variable: 'libraries_to_link.type'
+            value: 'object_file_group'
+          }
+          iterate_over: 'libraries_to_link.object_files'
+          flag_group {
+            flag: '%{libraries_to_link.object_files}'
+          }
+        }
+        flag_group {
+          expand_if_equal: {
+            variable: 'libraries_to_link.type'
+            value: 'object_file'
+          }
+          flag_group {
+            flag: '%{libraries_to_link.name}'
+          }
+        }
+        flag_group {
+          expand_if_equal: {
+            variable: 'libraries_to_link.type'
+            value: 'interface_library'
+          }
+          flag_group {
+            flag: '%{libraries_to_link.name}'
+          }
+        }
+        flag_group {
+          expand_if_equal: {
+            variable: 'libraries_to_link.type'
+            value: 'static_library'
+          }
+          flag_group {
+            expand_if_false: 'libraries_to_link.is_whole_archive'
+            flag: '%{libraries_to_link.name}'
+          }
+          flag_group {
+            expand_if_true: 'libraries_to_link.is_whole_archive'
+            flag: '/WHOLEARCHIVE:%{libraries_to_link.name}'
+          }
+        }
+      }
+    }
+  }
+
+  # Since this feature is declared earlier in the CROSSTOOL than
+  # "user_link_flags", this feature will be applied prior to it anwyhere they
+  # are both implied. And since "user_link_flags" contains the linkopts from
+  # the build rule, this allows the user to override the /SUBSYSTEM in the BUILD
+  # file.
+  feature {
+    name: 'linker_subsystem_flag'
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: '/SUBSYSTEM:CONSOLE'
+      }
+    }
+  }
+
+  # The "user_link_flags" contains user-defined linkopts (from build rules)
+  # so it should be defined after features that declare user-overridable flags.
+  # For example the "linker_subsystem_flag" defines a default "/SUBSYSTEM" flag
+  # but we want to let the user override it, therefore "link_flag_subsystem" is
+  # defined earlier in the CROSSTOOL file than "user_link_flags".
+  feature {
+    name: 'user_link_flags'
+    flag_set {
+      expand_if_all_available: 'user_link_flags'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        iterate_over: 'user_link_flags'
+        flag: '%{user_link_flags}'
+      }
+    }
+  }
+  feature {
+    name: 'legacy_link_flags'
+    flag_set {
+      expand_if_all_available: 'legacy_link_flags'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        iterate_over: 'legacy_link_flags'
+        flag: '%{legacy_link_flags}'
+      }
+    }
+  }
+
+  feature {
+    name: 'linker_param_file'
+    flag_set {
+      expand_if_all_available: 'linker_param_file'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      action: 'c++-link-static-library'
+      flag_group {
+        flag: '@%{linker_param_file}'
+      }
+    }
+  }
+
+  feature {
+    name: 'static_link_msvcrt'
+  }
+
+  feature {
+    name: 'static_link_msvcrt_no_debug'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/MT"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEFAULTLIB:libcmt.lib"
+      }
+    }
+    requires: { feature: 'fastbuild'}
+    requires: { feature: 'opt'}
+  }
+
+  feature {
+    name: 'dynamic_link_msvcrt_no_debug'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/MD"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEFAULTLIB:msvcrt.lib"
+      }
+    }
+    requires: { feature: 'fastbuild'}
+    requires: { feature: 'opt'}
+  }
+
+  feature {
+    name: 'static_link_msvcrt_debug'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/MTd"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEFAULTLIB:libcmtd.lib"
+      }
+    }
+    requires: { feature: 'dbg'}
+  }
+
+  feature {
+    name: 'dynamic_link_msvcrt_debug'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/MDd"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEFAULTLIB:msvcrtd.lib"
+      }
+    }
+    requires: { feature: 'dbg'}
+  }
+
+  feature {
+    name: 'dbg'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/Od"
+        flag: "/Z7"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEBUG:FULL"
+        flag: "/INCREMENTAL:NO"
+      }
+    }
+    implies: 'generate_pdb_file'
+  }
+
+  feature {
+    name: 'fastbuild'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/Od"
+        flag: "/Z7"
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEBUG:FASTLINK"
+        flag: "/INCREMENTAL:NO"
+      }
+    }
+    implies: 'generate_pdb_file'
+  }
+
+  feature {
+    name: 'opt'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/O2" # Implies /Og /Oi /Ot /Oy /Ob2 /Gs /GF /Gy
+      }
+    }
+    implies: 'frame_pointer'
+  }
+
+  # Keep stack frames for debugging, even in opt mode.
+  # Must come after /O1, /O2 and /Ox.
+  feature {
+    name: "frame_pointer"
+    flag_set {
+      action: "c-compile"
+      action: "c++-compile"
+      flag_group {
+        flag: "/Oy-"
+      }
+    }
+  }
+
+  # Remove assert/DCHECKs in opt mode.
+  # You can have them back with --features=-disable_assertions.
+  feature {
+    name: 'disable_assertions'
+    enabled: true
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      with_feature: {
+        feature: 'opt'
+      }
+      flag_group {
+        flag: "/DNDEBUG"
+      }
+    }
+  }
+
+  feature {
+    name: "determinism"
+    enabled: true
+    flag_set {
+      action: "c-compile"
+      action: "c++-compile"
+      flag_group {
+        # Make C++ compilation deterministic. Use linkstamping instead of these
+        # compiler symbols.
+        # TODO: detect clang on Windows and use "-Wno-builtin-macro-redefined"
+        flag: "/wd4117" # Trying to define or undefine a predefined macro
+        flag: "-D__DATE__=\"redacted\""
+        flag: "-D__TIMESTAMP__=\"redacted\""
+        flag: "-D__TIME__=\"redacted\""
+      }
+    }
+  }
+
+  feature {
+    name: 'treat_warnings_as_errors'
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      flag_group {
+        flag: "/WX"
+      }
+    }
+  }
+
+  # Trade slower build time for smaller binary
+  feature {
+    name: 'smaller_binary'
+    enabled: true
+    flag_set {
+      action: 'c-compile'
+      action: 'c++-compile'
+      with_feature: {
+        feature: 'opt'
+      }
+      flag_group {
+        flag: "/Gy" # Enable function-level linking (-ffunction-sections)
+        flag: "/Gw" # Optimize global data (-fdata-sections)
+      }
+    }
+    flag_set {
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library',
+      action: 'c++-link-nodeps-dynamic-library'
+      with_feature: {
+        feature: 'opt'
+      }
+      flag_group {
+        flag: '/OPT:ICF' # Fold identical functions
+        flag: '/OPT:REF' # Eliminate unreferenced functions and data
+      }
+    }
+  }
+
+  # Suppress warnings that most users do not care
+  feature {
+    name: 'ignore_noisy_warnings'
+    enabled: true
+    flag_set {
+      action: 'c++-link-static-library'
+      flag_group {
+        # Suppress 'object file does not define any public symbols' warning
+        flag: '/ignore:4221'
+      }
+    }
+  }
+
+  feature {
+    name: 'user_compile_flags'
+    flag_set {
+      expand_if_all_available: 'user_compile_flags'
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      flag_group {
+        iterate_over: 'user_compile_flags'
+        flag: '%{user_compile_flags}'
+      }
+    }
+  }
+
+  feature {
+    name: 'sysroot'
+    flag_set {
+      expand_if_all_available: 'sysroot'
+      action: 'assemble'
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        iterate_over: 'sysroot'
+        flag: '--sysroot=%{sysroot}'
+      }
+    }
+  }
+
+  feature {
+    name: 'unfiltered_compile_flags'
+    flag_set {
+      expand_if_all_available: 'unfiltered_compile_flags'
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      flag_group {
+        iterate_over: 'unfiltered_compile_flags'
+        flag: '%{unfiltered_compile_flags}'
+      }
+    }
+  }
+
+  feature {
+    name: 'compiler_output_flags'
+    flag_set {
+      action: 'assemble'
+      flag_group {
+        expand_if_all_available: 'output_file'
+        expand_if_none_available: 'output_assembly_file'
+        expand_if_none_available: 'output_preprocess_file'
+        flag: '/Fo%{output_file}'
+        flag: '/Zi'
+      }
+    }
+    flag_set {
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      flag_group {
+        expand_if_all_available: 'output_file'
+        expand_if_none_available: 'output_assembly_file'
+        expand_if_none_available: 'output_preprocess_file'
+        flag: '/Fo%{output_file}'
+      }
+      flag_group {
+        expand_if_all_available: 'output_file'
+        expand_if_all_available: 'output_assembly_file'
+        flag: '/Fa%{output_file}'
+      }
+      flag_group {
+        expand_if_all_available: 'output_file'
+        expand_if_all_available: 'output_preprocess_file'
+        flag: '/P'
+        flag: '/Fi%{output_file}'
+      }
+    }
+  }
+
+  feature {
+    name: 'compiler_input_flags'
+    flag_set {
+      action: 'assemble'
+      action: 'preprocess-assemble'
+      action: 'c-compile'
+      action: 'c++-compile'
+      action: 'c++-header-parsing'
+      action: 'c++-module-compile'
+      action: 'c++-module-codegen'
+      flag_group {
+        expand_if_all_available: 'source_file'
+        flag: '/c'
+        flag: '%{source_file}'
+      }
+    }
+  }
+
+  feature {
+    name : 'def_file',
+    flag_set {
+      expand_if_all_available: 'def_file_path'
+      action: 'c++-link-executable'
+      action: 'c++-link-dynamic-library'
+      action: "c++-link-nodeps-dynamic-library"
+      flag_group {
+        flag: "/DEF:%{def_file_path}"
+        # We can specify a different DLL name in DEF file, /ignore:4070 suppresses
+        # the warning message about DLL name doesn't match the default one.
+        # See https://msdn.microsoft.com/en-us/library/sfkk2fz7.aspx
+        flag: "/ignore:4070"
+      }
+    }
+  }
+
+  feature {
+    name: 'windows_export_all_symbols'
+  }
+
+  feature {
+    name: 'no_windows_export_all_symbols'
+  }
+
+  linking_mode_flags { mode: DYNAMIC }
+}
+
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl
new file mode 100644
index 0000000000..45c0285d23
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl
@@ -0,0 +1,23 @@
+# pylint: disable=g-bad-file-header
+# Copyright 2017 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Skylark rule that stubs a toolchain."""
+
+def _dummy_toolchain_impl(ctx):
+    ctx = ctx  # unused argument
+    toolchain = platform_common.ToolchainInfo()
+    return [toolchain]
+
+dummy_toolchain = rule(_dummy_toolchain_impl, attrs = {})
diff --git a/third_party/toolchains/preconfig/win_1803/py36/BUILD b/third_party/toolchains/preconfig/win_1803/py36/BUILD
new file mode 100644
index 0000000000..7b2e84bb38
--- /dev/null
+++ b/third_party/toolchains/preconfig/win_1803/py36/BUILD
@@ -0,0 +1,191 @@
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib
+# See https://docs.python.org/3/extending/windows.html
+cc_import(
+    name = "python_lib",
+    interface_library = select({
+        ":windows": ":python_import_lib",
+        # A placeholder for Unix platforms which makes --no_build happy.
+        "//conditions:default": "not-existing.lib",
+    }),
+    system_provided = 1,
+)
+
+cc_library(
+    name = "python_headers",
+    hdrs = [":python_include"],
+    includes = ["python_include"],
+    deps = select({
+        ":windows": [":python_lib"],
+        "//conditions:default": [],
+    }),
+)
+
+cc_library(
+    name = "numpy_headers",
+    hdrs = [":numpy_include"],
+    includes = ["numpy_include"],
+)
+
+config_setting(
+    name = "windows",
+    values = {"cpu": "x64_windows"},
+    visibility = ["//visibility:public"],
+)
+
+genrule(
+    name = "python_include",
+    outs = [
+        "python_include/Python-ast.h",
+        "python_include/Python.h",
+        "python_include/abstract.h",
+        "python_include/accu.h",
+        "python_include/asdl.h",
+        "python_include/ast.h",
+        "python_include/bitset.h",
+        "python_include/bltinmodule.h",
+        "python_include/boolobject.h",
+        "python_include/bytearrayobject.h",
+        "python_include/bytes_methods.h",
+        "python_include/bytesobject.h",
+        "python_include/cellobject.h",
+        "python_include/ceval.h",
+        "python_include/classobject.h",
+        "python_include/code.h",
+        "python_include/codecs.h",
+        "python_include/compile.h",
+        "python_include/complexobject.h",
+        "python_include/datetime.h",
+        "python_include/descrobject.h",
+        "python_include/dictobject.h",
+        "python_include/dtoa.h",
+        "python_include/dynamic_annotations.h",
+        "python_include/enumobject.h",
+        "python_include/errcode.h",
+        "python_include/eval.h",
+        "python_include/fileobject.h",
+        "python_include/fileutils.h",
+        "python_include/floatobject.h",
+        "python_include/frameobject.h",
+        "python_include/funcobject.h",
+        "python_include/genobject.h",
+        "python_include/graminit.h",
+        "python_include/grammar.h",
+        "python_include/import.h",
+        "python_include/intrcheck.h",
+        "python_include/iterobject.h",
+        "python_include/listobject.h",
+        "python_include/longintrepr.h",
+        "python_include/longobject.h",
+        "python_include/marshal.h",
+        "python_include/memoryobject.h",
+        "python_include/metagrammar.h",
+        "python_include/methodobject.h",
+        "python_include/modsupport.h",
+        "python_include/moduleobject.h",
+        "python_include/namespaceobject.h",
+        "python_include/node.h",
+        "python_include/object.h",
+        "python_include/objimpl.h",
+        "python_include/odictobject.h",
+        "python_include/opcode.h",
+        "python_include/osdefs.h",
+        "python_include/osmodule.h",
+        "python_include/parsetok.h",
+        "python_include/patchlevel.h",
+        "python_include/pgen.h",
+        "python_include/pgenheaders.h",
+        "python_include/py_curses.h",
+        "python_include/pyarena.h",
+        "python_include/pyatomic.h",
+        "python_include/pycapsule.h",
+        "python_include/pyconfig.h",
+        "python_include/pyctype.h",
+        "python_include/pydebug.h",
+        "python_include/pydtrace.h",
+        "python_include/pyerrors.h",
+        "python_include/pyexpat.h",
+        "python_include/pyfpe.h",
+        "python_include/pygetopt.h",
+        "python_include/pyhash.h",
+        "python_include/pylifecycle.h",
+        "python_include/pymacconfig.h",
+        "python_include/pymacro.h",
+        "python_include/pymath.h",
+        "python_include/pymem.h",
+        "python_include/pyport.h",
+        "python_include/pystate.h",
+        "python_include/pystrcmp.h",
+        "python_include/pystrhex.h",
+        "python_include/pystrtod.h",
+        "python_include/pythonrun.h",
+        "python_include/pythread.h",
+        "python_include/pytime.h",
+        "python_include/rangeobject.h",
+        "python_include/setobject.h",
+        "python_include/sliceobject.h",
+        "python_include/structmember.h",
+        "python_include/structseq.h",
+        "python_include/symtable.h",
+        "python_include/sysmodule.h",
+        "python_include/token.h",
+        "python_include/traceback.h",
+        "python_include/tupleobject.h",
+        "python_include/typeslots.h",
+        "python_include/ucnhash.h",
+        "python_include/unicodeobject.h",
+        "python_include/warnings.h",
+        "python_include/weakrefobject.h",
+    ],
+    cmd = """
+cp -f "C:/Python36/include/Python-ast.h" "$(@D)/python_include/Python-ast.h" && cp -f "C:/Python36/include/Python.h" "$(@D)/python_include/Python.h" && cp -f "C:/Python36/include/abstract.h" "$(@D)/python_include/abstract.h" && cp -f "C:/Python36/include/accu.h" "$(@D)/python_include/accu.h" && cp -f "C:/Python36/include/asdl.h" "$(@D)/python_include/asdl.h" && cp -f "C:/Python36/include/ast.h" "$(@D)/python_include/ast.h" && cp -f "C:/Python36/include/bitset.h" "$(@D)/python_include/bitset.h" && cp -f "C:/Python36/include/bltinmodule.h" "$(@D)/python_include/bltinmodule.h" && cp -f "C:/Python36/include/boolobject.h" "$(@D)/python_include/boolobject.h" && cp -f "C:/Python36/include/bytearrayobject.h" "$(@D)/python_include/bytearrayobject.h" && cp -f "C:/Python36/include/bytes_methods.h" "$(@D)/python_include/bytes_methods.h" && cp -f "C:/Python36/include/bytesobject.h" "$(@D)/python_include/bytesobject.h" && cp -f "C:/Python36/include/cellobject.h" "$(@D)/python_include/cellobject.h" && cp -f "C:/Python36/include/ceval.h" "$(@D)/python_include/ceval.h" && cp -f "C:/Python36/include/classobject.h" "$(@D)/python_include/classobject.h" && cp -f "C:/Python36/include/code.h" "$(@D)/python_include/code.h" && cp -f "C:/Python36/include/codecs.h" "$(@D)/python_include/codecs.h" && cp -f "C:/Python36/include/compile.h" "$(@D)/python_include/compile.h" && cp -f "C:/Python36/include/complexobject.h" "$(@D)/python_include/complexobject.h" && cp -f "C:/Python36/include/datetime.h" "$(@D)/python_include/datetime.h" && cp -f "C:/Python36/include/descrobject.h" "$(@D)/python_include/descrobject.h" && cp -f "C:/Python36/include/dictobject.h" "$(@D)/python_include/dictobject.h" && cp -f "C:/Python36/include/dtoa.h" "$(@D)/python_include/dtoa.h" && cp -f "C:/Python36/include/dynamic_annotations.h" "$(@D)/python_include/dynamic_annotations.h" && cp -f "C:/Python36/include/enumobject.h" "$(@D)/python_include/enumobject.h" && cp -f "C:/Python36/include/errcode.h" "$(@D)/python_include/errcode.h" && cp -f "C:/Python36/include/eval.h" "$(@D)/python_include/eval.h" && cp -f "C:/Python36/include/fileobject.h" "$(@D)/python_include/fileobject.h" && cp -f "C:/Python36/include/fileutils.h" "$(@D)/python_include/fileutils.h" && cp -f "C:/Python36/include/floatobject.h" "$(@D)/python_include/floatobject.h" && cp -f "C:/Python36/include/frameobject.h" "$(@D)/python_include/frameobject.h" && cp -f "C:/Python36/include/funcobject.h" "$(@D)/python_include/funcobject.h" && cp -f "C:/Python36/include/genobject.h" "$(@D)/python_include/genobject.h" && cp -f "C:/Python36/include/graminit.h" "$(@D)/python_include/graminit.h" && cp -f "C:/Python36/include/grammar.h" "$(@D)/python_include/grammar.h" && cp -f "C:/Python36/include/import.h" "$(@D)/python_include/import.h" && cp -f "C:/Python36/include/intrcheck.h" "$(@D)/python_include/intrcheck.h" && cp -f "C:/Python36/include/iterobject.h" "$(@D)/python_include/iterobject.h" && cp -f "C:/Python36/include/listobject.h" "$(@D)/python_include/listobject.h" && cp -f "C:/Python36/include/longintrepr.h" "$(@D)/python_include/longintrepr.h" && cp -f "C:/Python36/include/longobject.h" "$(@D)/python_include/longobject.h" && cp -f "C:/Python36/include/marshal.h" "$(@D)/python_include/marshal.h" && cp -f "C:/Python36/include/memoryobject.h" "$(@D)/python_include/memoryobject.h" && cp -f "C:/Python36/include/metagrammar.h" "$(@D)/python_include/metagrammar.h" && cp -f "C:/Python36/include/methodobject.h" "$(@D)/python_include/methodobject.h" && cp -f "C:/Python36/include/modsupport.h" "$(@D)/python_include/modsupport.h" && cp -f "C:/Python36/include/moduleobject.h" "$(@D)/python_include/moduleobject.h" && cp -f "C:/Python36/include/namespaceobject.h" "$(@D)/python_include/namespaceobject.h" && cp -f "C:/Python36/include/node.h" "$(@D)/python_include/node.h" && cp -f "C:/Python36/include/object.h" "$(@D)/python_include/object.h" && cp -f "C:/Python36/include/objimpl.h" "$(@D)/python_include/objimpl.h" && cp -f "C:/Python36/include/odictobject.h" "$(@D)/python_include/odictobject.h" && cp -f "C:/Python36/include/opcode.h" "$(@D)/python_include/opcode.h" && cp -f "C:/Python36/include/osdefs.h" "$(@D)/python_include/osdefs.h" && cp -f "C:/Python36/include/osmodule.h" "$(@D)/python_include/osmodule.h" && cp -f "C:/Python36/include/parsetok.h" "$(@D)/python_include/parsetok.h" && cp -f "C:/Python36/include/patchlevel.h" "$(@D)/python_include/patchlevel.h" && cp -f "C:/Python36/include/pgen.h" "$(@D)/python_include/pgen.h" && cp -f "C:/Python36/include/pgenheaders.h" "$(@D)/python_include/pgenheaders.h" && cp -f "C:/Python36/include/py_curses.h" "$(@D)/python_include/py_curses.h" && cp -f "C:/Python36/include/pyarena.h" "$(@D)/python_include/pyarena.h" && cp -f "C:/Python36/include/pyatomic.h" "$(@D)/python_include/pyatomic.h" && cp -f "C:/Python36/include/pycapsule.h" "$(@D)/python_include/pycapsule.h" && cp -f "C:/Python36/include/pyconfig.h" "$(@D)/python_include/pyconfig.h" && cp -f "C:/Python36/include/pyctype.h" "$(@D)/python_include/pyctype.h" && cp -f "C:/Python36/include/pydebug.h" "$(@D)/python_include/pydebug.h" && cp -f "C:/Python36/include/pydtrace.h" "$(@D)/python_include/pydtrace.h" && cp -f "C:/Python36/include/pyerrors.h" "$(@D)/python_include/pyerrors.h" && cp -f "C:/Python36/include/pyexpat.h" "$(@D)/python_include/pyexpat.h" && cp -f "C:/Python36/include/pyfpe.h" "$(@D)/python_include/pyfpe.h" && cp -f "C:/Python36/include/pygetopt.h" "$(@D)/python_include/pygetopt.h" && cp -f "C:/Python36/include/pyhash.h" "$(@D)/python_include/pyhash.h" && cp -f "C:/Python36/include/pylifecycle.h" "$(@D)/python_include/pylifecycle.h" && cp -f "C:/Python36/include/pymacconfig.h" "$(@D)/python_include/pymacconfig.h" && cp -f "C:/Python36/include/pymacro.h" "$(@D)/python_include/pymacro.h" && cp -f "C:/Python36/include/pymath.h" "$(@D)/python_include/pymath.h" && cp -f "C:/Python36/include/pymem.h" "$(@D)/python_include/pymem.h" && cp -f "C:/Python36/include/pyport.h" "$(@D)/python_include/pyport.h" && cp -f "C:/Python36/include/pystate.h" "$(@D)/python_include/pystate.h" && cp -f "C:/Python36/include/pystrcmp.h" "$(@D)/python_include/pystrcmp.h" && cp -f "C:/Python36/include/pystrhex.h" "$(@D)/python_include/pystrhex.h" && cp -f "C:/Python36/include/pystrtod.h" "$(@D)/python_include/pystrtod.h" && cp -f "C:/Python36/include/pythonrun.h" "$(@D)/python_include/pythonrun.h" && cp -f "C:/Python36/include/pythread.h" "$(@D)/python_include/pythread.h" && cp -f "C:/Python36/include/pytime.h" "$(@D)/python_include/pytime.h" && cp -f "C:/Python36/include/rangeobject.h" "$(@D)/python_include/rangeobject.h" && cp -f "C:/Python36/include/setobject.h" "$(@D)/python_include/setobject.h" && cp -f "C:/Python36/include/sliceobject.h" "$(@D)/python_include/sliceobject.h" && cp -f "C:/Python36/include/structmember.h" "$(@D)/python_include/structmember.h" && cp -f "C:/Python36/include/structseq.h" "$(@D)/python_include/structseq.h" && cp -f "C:/Python36/include/symtable.h" "$(@D)/python_include/symtable.h" && cp -f "C:/Python36/include/sysmodule.h" "$(@D)/python_include/sysmodule.h" && cp -f "C:/Python36/include/token.h" "$(@D)/python_include/token.h" && cp -f "C:/Python36/include/traceback.h" "$(@D)/python_include/traceback.h" && cp -f "C:/Python36/include/tupleobject.h" "$(@D)/python_include/tupleobject.h" && cp -f "C:/Python36/include/typeslots.h" "$(@D)/python_include/typeslots.h" && cp -f "C:/Python36/include/ucnhash.h" "$(@D)/python_include/ucnhash.h" && cp -f "C:/Python36/include/unicodeobject.h" "$(@D)/python_include/unicodeobject.h" && cp -f "C:/Python36/include/warnings.h" "$(@D)/python_include/warnings.h" && cp -f "C:/Python36/include/weakrefobject.h" "$(@D)/python_include/weakrefobject.h"
+   """,
+)
+
+genrule(
+    name = "numpy_include",
+    outs = [
+        "numpy_include/numpy/__multiarray_api.h",
+        "numpy_include/numpy/__ufunc_api.h",
+        "numpy_include/numpy/_neighborhood_iterator_imp.h",
+        "numpy_include/numpy/_numpyconfig.h",
+        "numpy_include/numpy/arrayobject.h",
+        "numpy_include/numpy/arrayscalars.h",
+        "numpy_include/numpy/halffloat.h",
+        "numpy_include/numpy/multiarray_api.txt",
+        "numpy_include/numpy/ndarrayobject.h",
+        "numpy_include/numpy/ndarraytypes.h",
+        "numpy_include/numpy/noprefix.h",
+        "numpy_include/numpy/npy_1_7_deprecated_api.h",
+        "numpy_include/numpy/npy_3kcompat.h",
+        "numpy_include/numpy/npy_common.h",
+        "numpy_include/numpy/npy_cpu.h",
+        "numpy_include/numpy/npy_endian.h",
+        "numpy_include/numpy/npy_interrupt.h",
+        "numpy_include/numpy/npy_math.h",
+        "numpy_include/numpy/npy_no_deprecated_api.h",
+        "numpy_include/numpy/npy_os.h",
+        "numpy_include/numpy/numpyconfig.h",
+        "numpy_include/numpy/old_defines.h",
+        "numpy_include/numpy/oldnumeric.h",
+        "numpy_include/numpy/ufunc_api.txt",
+        "numpy_include/numpy/ufuncobject.h",
+        "numpy_include/numpy/utils.h",
+    ],
+    cmd = """
+cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/__multiarray_api.h" "$(@D)/numpy_include/numpy/__multiarray_api.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/__ufunc_api.h" "$(@D)/numpy_include/numpy/__ufunc_api.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h" "$(@D)/numpy_include/numpy/_neighborhood_iterator_imp.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/_numpyconfig.h" "$(@D)/numpy_include/numpy/_numpyconfig.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/arrayobject.h" "$(@D)/numpy_include/numpy/arrayobject.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/arrayscalars.h" "$(@D)/numpy_include/numpy/arrayscalars.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/halffloat.h" "$(@D)/numpy_include/numpy/halffloat.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/multiarray_api.txt" "$(@D)/numpy_include/numpy/multiarray_api.txt" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/ndarrayobject.h" "$(@D)/numpy_include/numpy/ndarrayobject.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/ndarraytypes.h" "$(@D)/numpy_include/numpy/ndarraytypes.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/noprefix.h" "$(@D)/numpy_include/numpy/noprefix.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_1_7_deprecated_api.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_3kcompat.h" "$(@D)/numpy_include/numpy/npy_3kcompat.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_common.h" "$(@D)/numpy_include/numpy/npy_common.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_cpu.h" "$(@D)/numpy_include/numpy/npy_cpu.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_endian.h" "$(@D)/numpy_include/numpy/npy_endian.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_interrupt.h" "$(@D)/numpy_include/numpy/npy_interrupt.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_math.h" "$(@D)/numpy_include/numpy/npy_math.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_no_deprecated_api.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/npy_os.h" "$(@D)/numpy_include/numpy/npy_os.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/numpyconfig.h" "$(@D)/numpy_include/numpy/numpyconfig.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/old_defines.h" "$(@D)/numpy_include/numpy/old_defines.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/oldnumeric.h" "$(@D)/numpy_include/numpy/oldnumeric.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/ufunc_api.txt" "$(@D)/numpy_include/numpy/ufunc_api.txt" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/ufuncobject.h" "$(@D)/numpy_include/numpy/ufuncobject.h" && cp -f "C:/Python36/lib/site-packages/numpy/core/include/numpy/utils.h" "$(@D)/numpy_include/numpy/utils.h"
+   """,
+)
+
+genrule(
+    name = "python_import_lib",
+    outs = [
+        "python36.lib",
+    ],
+    cmd = """
+cp -f "C:/Python36/libs/python36.lib" "$(@D)/python36.lib"
+   """,
+)
-- 
GitLab


From 67e7aa1c0c960f2f5ea47bb39bb7aa1830c1d268 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 08:43:20 -0700
Subject: [PATCH 0959/1085] Internal Change

PiperOrigin-RevId: 217148772
---
 tensorflow/core/BUILD | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 7789ea22fc..841291e6d8 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -237,7 +237,6 @@ tf_proto_library(
     srcs = [],
     cc_api_version = 2,
     default_header = True,
-    js_api_version = 2,
     protodeps = [
         ":protos_all_proto",
         ":error_codes_proto",
@@ -2406,7 +2405,6 @@ tf_proto_library(
     srcs = ERROR_CODES_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    js_api_version = 2,
     provide_cc_alias = True,
 )
 
@@ -2426,7 +2424,6 @@ tf_proto_library(
     srcs = COMMON_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS,
     cc_api_version = 2,
     default_header = True,
-    js_api_version = 2,
     protodeps = [
         ":error_codes_proto",
     ],
-- 
GitLab


From 3d9d0033846c6668d660bbfcab4cbc794ad73c7b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 09:03:11 -0700
Subject: [PATCH 0960/1085] Remove incorrect automatic comment for namespace
 lookup

PiperOrigin-RevId: 217151569
---
 tensorflow/core/kernels/lookup_table_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index 0bc1ea77d6..3d1ee50c95 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -774,7 +774,7 @@ class MutableDenseHashTable final : public LookupInterface {
   uint64 empty_key_hash_;
   PersistentTensor deleted_key_;
   uint64 deleted_key_hash_;
-};  // namespace lookup
+};
 
 }  // namespace lookup
 
-- 
GitLab


From e3fb5d037cf0ced745679feff87f6f0abc022205 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Mon, 15 Oct 2018 09:04:50 -0700
Subject: [PATCH 0961/1085] Enable DT_INT64 for the XLA implementation of
 DataFormatVecPermute.

PiperOrigin-RevId: 217151877
---
 tensorflow/compiler/tests/permute_test.py     | 48 +++++++++++--------
 .../compiler/tf2xla/kernels/permute_op.cc     |  6 +--
 2 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/tests/permute_test.py b/tensorflow/compiler/tests/permute_test.py
index dbb9274df4..e2f6de821b 100644
--- a/tensorflow/compiler/tests/permute_test.py
+++ b/tensorflow/compiler/tests/permute_test.py
@@ -40,40 +40,48 @@ class XlaPermuteOpTest(xla_test.XLATestCase):
     self.assertAllEqual(result, expected)
 
   def testNHWCToNCHW(self):
-    x = np.array([7, 4, 9, 3], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NHWC", "NCHW", [7, 3, 4, 9])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([7, 4, 9, 3], dtype=dtype)
+      self._runPermuteAndCompare(x, "NHWC", "NCHW", [7, 3, 4, 9])
 
   def testNCHWToNHWC(self):
-    x = np.array([7, 4, 9, 3], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NCHW", "NHWC", [7, 9, 3, 4])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([7, 4, 9, 3], dtype=dtype)
+      self._runPermuteAndCompare(x, "NCHW", "NHWC", [7, 9, 3, 4])
 
   def testNHWCToHWNC(self):
-    x = np.array([7, 4, 9, 3], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NHWC", "HWNC", [4, 9, 7, 3])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([7, 4, 9, 3], dtype=dtype)
+      self._runPermuteAndCompare(x, "NHWC", "HWNC", [4, 9, 7, 3])
 
   def testHWNCToNHWC(self):
-    x = np.array([7, 4, 9, 3], dtype=np.int32)
-    self._runPermuteAndCompare(x, "HWNC", "NHWC", [9, 7, 4, 3])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([7, 4, 9, 3], dtype=dtype)
+      self._runPermuteAndCompare(x, "HWNC", "NHWC", [9, 7, 4, 3])
 
   def testNHWCToNCHW2D(self):
-    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NHWC", "NCHW",
-                               [[7, 4], [5, 1], [9, 3], [4, 5]])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=dtype)
+      self._runPermuteAndCompare(x, "NHWC", "NCHW",
+                                 [[7, 4], [5, 1], [9, 3], [4, 5]])
 
   def testNHWCToHWNC2D(self):
-    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NHWC", "HWNC",
-                               [[9, 3], [4, 5], [7, 4], [5, 1]])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=dtype)
+      self._runPermuteAndCompare(x, "NHWC", "HWNC",
+                                 [[9, 3], [4, 5], [7, 4], [5, 1]])
 
   def testHWNCToNHWC2D(self):
-    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
-    self._runPermuteAndCompare(x, "HWNC", "NHWC",
-                               [[4, 5], [7, 4], [9, 3], [5, 1]])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=dtype)
+      self._runPermuteAndCompare(x, "HWNC", "NHWC",
+                                 [[4, 5], [7, 4], [9, 3], [5, 1]])
 
   def testNCHWToNHWC2D(self):
-    x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=np.int32)
-    self._runPermuteAndCompare(x, "NCHW", "NHWC",
-                               [[7, 4], [4, 5], [5, 1], [9, 3]])
+    for dtype in {np.int32, np.int64}:
+      x = np.array([[7, 4], [9, 3], [4, 5], [5, 1]], dtype=dtype)
+      self._runPermuteAndCompare(x, "NCHW", "NHWC",
+                                 [[7, 4], [4, 5], [5, 1], [9, 3]])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
index 3ca5eecf1a..94b51e1a58 100644
--- a/tensorflow/compiler/tf2xla/kernels/permute_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc
@@ -90,9 +90,9 @@ class DataFormatVecPermuteOp : public XlaOpKernel {
   TF_DISALLOW_COPY_AND_ASSIGN(DataFormatVecPermuteOp);
 };
 
-// TODO(b/115384656): Support DT_INT64.
-REGISTER_XLA_OP(Name("DataFormatVecPermute").TypeConstraint("T", DT_INT32),
-                DataFormatVecPermuteOp);
+REGISTER_XLA_OP(
+    Name("DataFormatVecPermute").TypeConstraint("T", {DT_INT32, DT_INT64}),
+    DataFormatVecPermuteOp);
 
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From a57b7902bb7219674ad63e6d82796468e830f40e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 09:14:11 -0700
Subject: [PATCH 0962/1085] Internal change

PiperOrigin-RevId: 217153168
---
 tensorflow/c/BUILD                         | 15 +++++-
 tensorflow/c/c_api.cc                      |  3 ++
 tensorflow/c/c_api_test.cc                 | 42 ++++++++++++----
 tensorflow/contrib/coder/BUILD             |  2 +-
 tensorflow/contrib/hadoop/BUILD            |  2 +-
 tensorflow/contrib/image/BUILD             | 58 +++++++++++++++++-----
 tensorflow/contrib/periodic_resample/BUILD |  2 +-
 tensorflow/contrib/resampler/BUILD         | 12 ++++-
 tensorflow/contrib/rnn/BUILD               | 40 ++++++++-------
 tensorflow/contrib/text/BUILD              |  2 +-
 tensorflow/examples/adding_an_op/BUILD     |  5 +-
 tensorflow/python/BUILD                    |  1 +
 tensorflow/python/kernel_tests/BUILD       | 15 ++++--
 tensorflow/tensorflow.bzl                  |  2 +-
 14 files changed, 149 insertions(+), 52 deletions(-)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 17e2e292eb..56f5e6767a 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -6,11 +6,12 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
-    "tf_cuda_cc_test",
     "tf_copts",
     "tf_cuda_library",
     "tf_custom_op_library",
+    "tf_kernel_library",
 )
+load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
 
 # -----------------------------------------------------------------------------
 # Public targets
@@ -197,9 +198,9 @@ tf_cuda_cc_test(
     size = "small",
     srcs = ["c_api_test.cc"],
     data = [
-        ":test_op.so",
         "//tensorflow/cc/saved_model:saved_model_half_plus_two",
     ],
+    kernels = [":test_op_kernel"],
     linkopts = select({
         "//tensorflow:darwin": ["-headerpad_max_install_names"],
         "//conditions:default": [],
@@ -285,6 +286,16 @@ tf_custom_op_library(
     srcs = ["test_op.cc"],
 )
 
+tf_kernel_library(
+    name = "test_op_kernel",
+    srcs = ["test_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+    alwayslink = 1,
+)
+
 # -----------------------------------------------------------------------------
 # Python API target
 
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 79811ceae5..1726db12fa 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -2770,6 +2770,9 @@ TF_Buffer* TF_ApiDefMapGet(TF_ApiDefMap* api_def_map, const char* name,
   }
   string name_str(name, name_len);
   const auto* api_def = api_def_map->api_def_map.GetApiDef(name_str);
+  if (api_def == nullptr) {
+    return nullptr;
+  }
 
   TF_Buffer* ret = TF_NewBuffer();
   status->status = MessageToBuffer(*api_def, ret);
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 03516c39dc..c4746b4990 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb_text.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -195,12 +196,31 @@ TEST(CAPI, LibraryLoadFunctions) {
   TF_DeleteStatus(status);
   ASSERT_EQ(TF_OK, code) << status_msg;
 
-  // Test op list.
-  TF_Buffer op_list_buf = TF_GetOpList(lib);
-  tensorflow::OpList op_list;
-  EXPECT_TRUE(op_list.ParseFromArray(op_list_buf.data, op_list_buf.length));
-  ASSERT_EQ(op_list.op_size(), 1);
-  EXPECT_EQ("TestCApi", op_list.op(0).name());
+  {
+    TF_Buffer* op_list_buffer = TF_GetAllOpList();
+    tensorflow::OpList op_list;
+    op_list.ParseFromArray(op_list_buffer->data, op_list_buffer->length);
+    ASSERT_GE(op_list.op_size(), 1);
+    typedef tensorflow::protobuf::RepeatedPtrField<tensorflow::OpDef> OpDefs;
+    const OpDefs& ops = op_list.op();
+    bool found = std::find_if(ops.begin(), ops.end(),
+                              [](const tensorflow::OpDef& op_def) {
+                                return op_def.name() == "TestCApi";
+                              }) != ops.end();
+    EXPECT_TRUE(found);
+    TF_DeleteBuffer(op_list_buffer);
+  }
+
+#if !defined(TENSORFLOW_NO_SHARED_OBJECTS)
+  {
+    // Test op list.
+    TF_Buffer op_list_buf = TF_GetOpList(lib);
+    tensorflow::OpList op_list;
+    EXPECT_TRUE(op_list.ParseFromArray(op_list_buf.data, op_list_buf.length));
+    ASSERT_EQ(op_list.op_size(), 1);
+    EXPECT_EQ("TestCApi", op_list.op(0).name());
+  }
+#endif  // !defined(TENSORFLOW_NO_SHARED_OBJECTS)
 
   TF_DeleteLibraryHandle(lib);
 }
@@ -2335,9 +2355,9 @@ TEST(TestApiDef, TestCreateApiDef) {
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
 
-  TF_Buffer op_list_buf = TF_GetOpList(lib);
+  TF_Buffer* op_list_buf = TF_GetAllOpList();
   status = TF_NewStatus();
-  auto* api_def_map = TF_NewApiDefMap(&op_list_buf, status);
+  auto* api_def_map = TF_NewApiDefMap(op_list_buf, status);
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
 
@@ -2355,6 +2375,7 @@ TEST(TestApiDef, TestCreateApiDef) {
 
   TF_DeleteBuffer(api_def_buf);
   TF_DeleteApiDefMap(api_def_map);
+  TF_DeleteBuffer(op_list_buf);
   TF_DeleteLibraryHandle(lib);
 }
 
@@ -2369,9 +2390,9 @@ TEST(TestApiDef, TestCreateApiDefWithOverwrites) {
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
 
-  TF_Buffer op_list_buf = TF_GetOpList(lib);
+  TF_Buffer* op_list_buf = TF_GetAllOpList();
   status = TF_NewStatus();
-  auto* api_def_map = TF_NewApiDefMap(&op_list_buf, status);
+  auto* api_def_map = TF_NewApiDefMap(op_list_buf, status);
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteStatus(status);
 
@@ -2400,6 +2421,7 @@ TEST(TestApiDef, TestCreateApiDefWithOverwrites) {
 
   TF_DeleteBuffer(api_def_buf);
   TF_DeleteApiDefMap(api_def_map);
+  TF_DeleteBuffer(op_list_buf);
   TF_DeleteLibraryHandle(lib);
 }
 
diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD
index 4bfd753bb1..7f96a103d4 100644
--- a/tensorflow/contrib/coder/BUILD
+++ b/tensorflow/contrib/coder/BUILD
@@ -13,12 +13,12 @@ load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
     "tf_custom_op_library",
-    "tf_custom_op_py_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
     "tf_kernel_library",
     "tf_py_test",
 )
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 cc_library(
     name = "range_coder",
diff --git a/tensorflow/contrib/hadoop/BUILD b/tensorflow/contrib/hadoop/BUILD
index ccad31efa1..178a8a6f08 100644
--- a/tensorflow/contrib/hadoop/BUILD
+++ b/tensorflow/contrib/hadoop/BUILD
@@ -7,12 +7,12 @@ exports_files(["LICENSE"])
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_custom_op_library",
-    "tf_custom_op_py_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
     "tf_kernel_library",
     "tf_py_test",
 )
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 filegroup(
     name = "test_data",
diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD
index da450480b3..c9d917fe20 100755
--- a/tensorflow/contrib/image/BUILD
+++ b/tensorflow/contrib/image/BUILD
@@ -49,6 +49,7 @@ tf_kernel_library(
         "kernels/image_ops.h",
     ],
     deps = [
+        ":image_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//third_party/eigen3",
@@ -74,7 +75,6 @@ tf_custom_op_py_library(
     dso = [":python/ops/_image_ops.so"],
     kernels = [
         ":image_ops_kernels",
-        ":image_ops_op_lib",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -128,6 +128,26 @@ tf_custom_op_library(
     ],
 )
 
+tf_kernel_library(
+    name = "distort_image_ops_kernels",
+    srcs = [
+        "kernels/adjust_hsv_in_yiq_op.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
+    ],
+    gpu_srcs = [
+        "kernels/adjust_hsv_in_yiq_op_gpu.cu.cc",
+        "kernels/adjust_hsv_in_yiq_op.h",
+    ],
+    deps = [
+        ":distort_image_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:gpu_util_hdrs",
+        "//third_party/eigen3",
+    ],
+    alwayslink = 1,
+)
+
 tf_cc_test(
     name = "adjust_hsv_in_yiq_op_test",
     size = "small",
@@ -155,13 +175,16 @@ tf_gen_op_wrapper_py(
     deps = [":distort_image_ops_op_lib"],
 )
 
-py_library(
+tf_custom_op_py_library(
     name = "distort_image_py",
     srcs = [
         "__init__.py",
         "python/ops/distort_image_ops.py",
     ],
-    data = [":python/ops/_distort_image_ops.so"],
+    dso = [":python/ops/_distort_image_ops.so"],
+    kernels = [
+        ":distort_image_ops_kernels",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         ":distort_image_ops",
@@ -338,25 +361,36 @@ tf_gen_op_libs(
     op_lib_names = ["single_image_random_dot_stereograms_ops"],
 )
 
+tf_kernel_library(
+    name = "single_image_random_dot_stereograms_kernels",
+    srcs = [
+        "kernels/single_image_random_dot_stereograms_ops.cc",
+    ],
+    deps = [
+        ":single_image_random_dot_stereograms_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+)
+
 tf_gen_op_wrapper_py(
     name = "single_image_random_dot_stereograms_ops",
     deps = [":single_image_random_dot_stereograms_ops_op_lib"],
 )
 
-cc_library(
+alias(
     name = "image_ops_cc",
-    srcs = ["ops/image_ops.cc"],
-    deps = [
-        ":image_ops_kernels",
-        "//tensorflow/core:framework",
-    ],
-    alwayslink = 1,
+    actual = ":image_ops_op_lib",
 )
 
-py_library(
+tf_custom_op_py_library(
     name = "single_image_random_dot_stereograms_py",
     srcs = glob(["python/ops/single*.py"]) + ["__init__.py"],
-    data = [":python/ops/_single_image_random_dot_stereograms.so"],
+    dso = [":python/ops/_single_image_random_dot_stereograms.so"],
+    kernels = [
+        ":single_image_random_dot_stereograms_kernels",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         ":image_py",
diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD
index f2171efc95..c980a9342e 100644
--- a/tensorflow/contrib/periodic_resample/BUILD
+++ b/tensorflow/contrib/periodic_resample/BUILD
@@ -9,10 +9,10 @@ load(
     "tf_cc_test",
     "tf_gen_op_libs",
     "tf_custom_op_library",
-    "tf_custom_op_py_library",
     "tf_gen_op_wrapper_py",
 )
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 cc_library(
     name = "all_ops",
diff --git a/tensorflow/contrib/resampler/BUILD b/tensorflow/contrib/resampler/BUILD
index 48345d7030..b3f32b8f34 100644
--- a/tensorflow/contrib/resampler/BUILD
+++ b/tensorflow/contrib/resampler/BUILD
@@ -7,12 +7,12 @@ package(default_visibility = ["//visibility:public"])
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_custom_op_library",
-    "tf_custom_op_py_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
     "tf_kernel_library",
 )
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 tf_custom_op_py_library(
     name = "resampler_py",
@@ -20,7 +20,6 @@ tf_custom_op_py_library(
     dso = [":python/ops/_resampler_ops.so"],
     kernels = [
         ":resampler_ops_kernels",
-        ":resampler_ops_op_lib",
     ],
     visibility = ["//visibility:public"],
     deps = [
@@ -40,8 +39,17 @@ tf_custom_op_py_library(
 
 tf_kernel_library(
     name = "resampler_ops_kernels",
+    srcs = [
+        "kernels/resampler_ops.cc",
+        "kernels/resampler_ops.h",
+    ],
+    gpu_srcs = [
+        "kernels/resampler_ops_gpu.cu.cc",
+        "kernels/resampler_ops.h",
+    ],
     prefix = "resampler_ops",
     deps = [
+        ":resampler_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
     ],
diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index 1385a9ddc1..391df8cdb4 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -4,10 +4,10 @@
 
 licenses(["notice"])  # Apache 2.0
 
-exports_files(["LICENSE"])
-
 package(default_visibility = ["//visibility:public"])
 
+exports_files(["LICENSE"])
+
 load("//tensorflow:tensorflow.bzl", "cuda_py_tests")
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 load(
@@ -19,10 +19,6 @@ load(
     "tf_kernel_library",
     "tf_gen_op_wrapper_py",
 )
-load(
-    "//tensorflow/core:platform/default/build_config.bzl",
-    "tf_kernel_tests_linkstatic",
-)
 
 cc_library(
     name = "all_ops",
@@ -290,7 +286,7 @@ tf_cc_test(
     name = "ops/gru_ops_test",
     size = "small",
     srcs = ["ops/gru_ops_test.cc"],
-    data = [":python/ops/_gru_ops.so"],
+    kernels = [":gru_ops_kernels"],
     tags = ["noasan"],
     # We must ensure that the dependencies can be dynamically linked since
     # the shared library must be able to use core:framework.
@@ -310,7 +306,9 @@ tf_cc_test(
     name = "ops/lstm_ops_test",
     size = "small",
     srcs = ["ops/lstm_ops_test.cc"],
-    data = [":python/ops/_lstm_ops.so"],
+    kernels = [
+        ":lstm_ops_kernels",
+    ],
     tags = ["noasan"],
     # We must ensure that the dependencies can be dynamically linked since
     # the shared library must be able to use core:framework.
@@ -334,16 +332,29 @@ tf_gen_op_libs(
 )
 
 tf_kernel_library(
-    name = "gru_ops_kernels",
+    name = "blas_gemm",
     srcs = [
         "kernels/blas_gemm.cc",
-        "kernels/blas_gemm.h",
     ],
+    hdrs = ["kernels/blas_gemm.h"],
     gpu_srcs = [
         "kernels/blas_gemm.h",
     ],
+    visibility = ["//visibility:private"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:eigen_helpers",
+        "//third_party/eigen3",
+    ],
+)
+
+tf_kernel_library(
+    name = "gru_ops_kernels",
     prefix = "kernels/gru_ops",
     deps = [
+        ":blas_gemm",
+        ":gru_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/kernels:eigen_helpers",
@@ -353,15 +364,10 @@ tf_kernel_library(
 
 tf_kernel_library(
     name = "lstm_ops_kernels",
-    srcs = [
-        "kernels/blas_gemm.cc",
-        "kernels/blas_gemm.h",
-    ],
-    gpu_srcs = [
-        "kernels/blas_gemm.h",
-    ],
     prefix = "kernels/lstm_ops",
     deps = [
+        ":blas_gemm",
+        ":lstm_ops_op_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/kernels:eigen_helpers",
diff --git a/tensorflow/contrib/text/BUILD b/tensorflow/contrib/text/BUILD
index 38d91f7e49..a434c12039 100644
--- a/tensorflow/contrib/text/BUILD
+++ b/tensorflow/contrib/text/BUILD
@@ -13,11 +13,11 @@ load(
     "//tensorflow:tensorflow.bzl",
     "py_test",
     "tf_custom_op_library",
-    "tf_custom_op_py_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
     "tf_kernel_library",
 )
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
 tf_custom_op_py_library(
     name = "text_py",
diff --git a/tensorflow/examples/adding_an_op/BUILD b/tensorflow/examples/adding_an_op/BUILD
index cf8054be6a..2b39b3683f 100644
--- a/tensorflow/examples/adding_an_op/BUILD
+++ b/tensorflow/examples/adding_an_op/BUILD
@@ -68,6 +68,7 @@ py_test(
     size = "small",
     srcs = ["zero_out_1_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["notap"],
     deps = [
         ":zero_out_op_1",
         "//tensorflow:tensorflow_py",
@@ -79,6 +80,7 @@ py_test(
     size = "small",
     srcs = ["zero_out_2_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["notap"],
     deps = [
         ":zero_out_grad_2",
         ":zero_out_op_2",
@@ -91,6 +93,7 @@ py_test(
     size = "small",
     srcs = ["zero_out_3_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["notap"],
     deps = [
         ":zero_out_op_3",
         "//tensorflow:tensorflow_py",
@@ -116,7 +119,7 @@ py_test(
     size = "small",
     srcs = ["cuda_op_test.py"],
     srcs_version = "PY2AND3",
-    tags = tf_cuda_tests_tags(),
+    tags = tf_cuda_tests_tags() + ["notap"],
     deps = [
         ":cuda_op",
         "//tensorflow:tensorflow_py",
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 00577426a0..c749017627 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -468,6 +468,7 @@ py_test(
     tags = [
         "no_pip",  # Path issues due to test environment
         "no_windows",
+        "notap",
     ],
     deps = [
         ":client_testlib",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index eee03b7ef2..bbadc9907b 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3214,7 +3214,10 @@ tf_py_test(
         "//tensorflow/python:platform",
     ],
     data = [":ackermann_op.so"],
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "notap",
+    ],
 )
 
 tf_custom_op_library(
@@ -3233,7 +3236,10 @@ tf_py_test(
         "//tensorflow/python:platform",
     ],
     data = [":duplicate_op.so"],
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "notap",
+    ],
 )
 
 tf_custom_op_library(
@@ -3252,7 +3258,10 @@ tf_py_test(
         "//tensorflow/python:platform",
     ],
     data = [":invalid_op.so"],
-    tags = ["no_pip"],
+    tags = [
+        "no_pip",
+        "notap",
+    ],
 )
 
 cuda_py_test(
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 34d6fe825f..a886dcf5d7 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1543,7 +1543,7 @@ def tf_custom_op_py_library(
         srcs_version = "PY2AND3",
         visibility = None,
         deps = []):
-    kernels = kernels  # unused argument
+    _ignore = [kernels]
     native.py_library(
         name = name,
         data = dso,
-- 
GitLab


From aba365a8e13ca6a4cf4fccb60226995433849302 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 15 Oct 2018 09:27:23 -0700
Subject: [PATCH 0963/1085] Add a missing PySequence_Fast in the eager fast
 path

Correctly handles things that pretend to be lists, like the ListWrappers we use for variable tracking.

Fixes #22853.

PiperOrigin-RevId: 217154951
---
 tensorflow/python/eager/pywrap_tfe_src.cc      | 15 +++++++++++----
 .../checkpointable/data_structures_test.py     | 18 ++++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 9789dbadee..dcbe6d42bd 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -2547,7 +2547,12 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
     if (!input_arg.number_attr().empty()) {
       // The item is a homogeneous list.
       if (!RaiseIfNotPySequence(input, input_arg.number_attr())) return nullptr;
-      Py_ssize_t len = PySequence_Fast_GET_SIZE(input);
+      tensorflow::Safe_PyObjectPtr fast_input(
+          PySequence_Fast(input, "Could not parse sequence."));
+      if (fast_input.get() == nullptr) {
+        return nullptr;
+      }
+      Py_ssize_t len = PySequence_Fast_GET_SIZE(fast_input.get());
 
       TFE_OpSetAttrInt(op, input_arg.number_attr().data(), len);
       if (op_exec_info.run_callbacks) {
@@ -2559,15 +2564,17 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
 
       if (len > 0) {
         // First item adds the type attr.
-        if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, 0),
-                          true, input_arg, flattened_attrs.get(),
+        if (!AddInputToOp(&op_exec_info,
+                          PySequence_Fast_GET_ITEM(fast_input.get(), 0), true,
+                          input_arg, flattened_attrs.get(),
                           flattened_inputs.get(), op, status)) {
           return nullptr;
         }
 
         for (Py_ssize_t j = 1; j < len; j++) {
           // Since the list is homogeneous, we don't need to re-add the attr.
-          if (!AddInputToOp(&op_exec_info, PySequence_Fast_GET_ITEM(input, j),
+          if (!AddInputToOp(&op_exec_info,
+                            PySequence_Fast_GET_ITEM(fast_input.get(), j),
                             false, input_arg, nullptr /* flattened_attrs */,
                             flattened_inputs.get(), op, status)) {
             return nullptr;
diff --git a/tensorflow/python/training/checkpointable/data_structures_test.py b/tensorflow/python/training/checkpointable/data_structures_test.py
index 5597c7c772..ff7d1f1d2d 100644
--- a/tensorflow/python/training/checkpointable/data_structures_test.py
+++ b/tensorflow/python/training/checkpointable/data_structures_test.py
@@ -24,6 +24,7 @@ import six
 
 from tensorflow.python.eager import context
 from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers import core
@@ -156,6 +157,23 @@ class ListTests(test.TestCase):
     with self.assertRaises(AttributeError):
       data_structures.List().pop()
 
+  @test_util.run_in_graph_and_eager_modes
+  def testTensorConversion(self):
+
+    class ListToTensor(training.Model):
+
+      def __init__(self):
+        super(ListToTensor, self).__init__()
+        self.l = [1., 2., 3.]
+
+    self.assertAllEqual(
+        [1., 2., 3.],
+        self.evaluate(constant_op.constant(ListToTensor().l)))
+
+    self.assertAllEqual(
+        [1., 2., 3.],
+        self.evaluate(array_ops.pack(ListToTensor().l)))
+
   def testNesting(self):
     with context.graph_mode():
       inner = data_structures.List()
-- 
GitLab


From bb271165c1ee1a2b40964e9330ab686257077d36 Mon Sep 17 00:00:00 2001
From: Ben Zinberg <bzinberg@google.com>
Date: Mon, 15 Oct 2018 09:43:18 -0700
Subject: [PATCH 0964/1085] Fix copy/paste error in the docstrings of
 assertAll{Less,Greater}{,Equal}.

PiperOrigin-RevId: 217157127
---
 tensorflow/python/framework/test_util.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 95925bb471..b975ba5023 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1702,7 +1702,7 @@ class TensorFlowTestCase(googletest.TestCase):
     self.assertGreater(np.min(a), comparison_target)
 
   def assertAllLess(self, a, comparison_target):
-    """Assert element values are all greater than a target value.
+    """Assert element values are all less than a target value.
 
     Args:
       a: The numpy `ndarray`, or anything that can be converted into a
@@ -1713,7 +1713,7 @@ class TensorFlowTestCase(googletest.TestCase):
     self.assertLess(np.max(a), comparison_target)
 
   def assertAllGreaterEqual(self, a, comparison_target):
-    """Assert element values are all greater than a target value.
+    """Assert element values are all greater than or equal to a target value.
 
     Args:
       a: The numpy `ndarray`, or anything that can be converted into a
@@ -1724,7 +1724,7 @@ class TensorFlowTestCase(googletest.TestCase):
     self.assertGreaterEqual(np.min(a), comparison_target)
 
   def assertAllLessEqual(self, a, comparison_target):
-    """Assert element values are all greater than a target value.
+    """Assert element values are all less than or equal to a target value.
 
     Args:
       a: The numpy `ndarray`, or anything that can be converted into a
-- 
GitLab


From ab5ba2aa0c3817f472a8336bba4cbb18fdeda258 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 15 Oct 2018 09:59:00 -0700
Subject: [PATCH 0965/1085] Allow empty GCS tokens to be cached.

PiperOrigin-RevId: 217159671
---
 tensorflow/core/platform/cloud/google_auth_provider.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index 6ffe51e897..e15400780a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -135,8 +135,7 @@ Status GoogleAuthProvider::GetToken(string* t) {
   mutex_lock lock(mu_);
   const uint64 now_sec = env_->NowSeconds();
 
-  if (!current_token_.empty() &&
-      now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
+  if (now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
     *t = current_token_;
     return Status::OK();
   }
-- 
GitLab


From 0e193212fe7bf09f8f19af3ac87ca807bf84a615 Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 10:06:07 -0700
Subject: [PATCH 0966/1085] Delete bug_template.md

Removing the bug template. Will add back once Issue Policy is pushed in so that I can add a link to it.
---
 .github/ISSUE_TEMPLATE/bug_template.md | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/bug_template.md

diff --git a/.github/ISSUE_TEMPLATE/bug_template.md b/.github/ISSUE_TEMPLATE/bug_template.md
deleted file mode 100644
index c8ab3e2a2c..0000000000
--- a/.github/ISSUE_TEMPLATE/bug_template.md
+++ /dev/null
@@ -1,24 +0,0 @@
-Please make sure that this is a bug. As per our GitHub Policy [link] we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
-
-Please fill the following Bug_Template:
-### System information
-- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
-- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
-- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
-- TensorFlow installed from (source or binary):
-- TensorFlow version (use command below):
-- Python version:
-- Bazel version (if compiling from source):
-- GCC/Compiler version (if compiling from source):
-- CUDA/cuDNN version:
-- GPU model and memory:
-- Docker Image:
-
-You can use [this script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh) to collect some of the information asked above.
-
-### Describe the current behavior
-
-### Describe the expected behavior
-
-### Code to reproduce the issue
-Provide a reproducible test case that is the bare minimum necessary to generate the problem.
-- 
GitLab


From 70a979701d4b03284a4ca5c45bc7ce07831e2c65 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 15 Oct 2018 10:06:54 -0700
Subject: [PATCH 0967/1085] Fix broken call and add test coverage for it.

PiperOrigin-RevId: 217161539
---
 .../python/autograph/converters/call_trees.py | 11 ++++----
 .../autograph/converters/call_trees_test.py   | 28 ++++++-------------
 2 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 09072833d2..ca6945266e 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -85,14 +85,15 @@ class FunctionNamer(object):
 class CallTreeTransformer(converter.Base):
   """Transforms the call tree by renaming transformed symbols."""
 
-  def _resolve_name(self, node):
+  def _resolve_decorator_name(self, node):
     """Used to resolve decorator info."""
     if isinstance(node, gast.Call):
-      return self._resolve_name(node.func)
+      return self._resolve_decorator_name(node.func)
     if isinstance(node, gast.Name):
-      return self.ctx.namespace.get(node.id)
+      # TODO(mdan): Add test coverage for this branch.
+      return self.ctx.info.namespace.get(node.id)
     if isinstance(node, gast.Attribute):
-      parent = self._resolve_name(node.value)
+      parent = self._resolve_decorator_name(node.value)
       if parent is not None:
         return getattr(parent, node.attr)
       return None
@@ -170,7 +171,7 @@ class CallTreeTransformer(converter.Base):
         return True
 
       for dec in target_node.decorator_list:
-        decorator_fn = self._resolve_name(dec)
+        decorator_fn = self._resolve_decorator_name(dec)
         if (decorator_fn is not None and
             decorator_fn in self.ctx.program.options.strip_decorators):
           return False
diff --git a/tensorflow/python/autograph/converters/call_trees_test.py b/tensorflow/python/autograph/converters/call_trees_test.py
index dbc82a674a..8dbb91da3e 100644
--- a/tensorflow/python/autograph/converters/call_trees_test.py
+++ b/tensorflow/python/autograph/converters/call_trees_test.py
@@ -133,29 +133,19 @@ class CallTreesTest(converter_testing.TestCase):
         result_tensor = result.test_fn(constant_op.constant(1))
         self.assertEquals(sess.run(result_tensor), 3)
 
-  def test_decorated_callee(self):
+  def test_call_to_decotated_function(self):
 
-    # Using this trick to prevent the Python loader from automatically expanding
-    # the decorator. This simulates the situation found when converting a
-    # function from within an actual decorator.
+    def decorator(f):
+      return f
 
-    def wrapper_fn():
-
-      def dec(f):
-        return f
-
-      @dec
-      def called_fn(a):
-        return a
-
-      @dec
-      def test_fn(a):
-        return called_fn(a)
+    @decorator
+    def called_fn(a):
+      return a
 
-      return test_fn
+    def test_fn(a):
+      return called_fn(a)
 
-    node, ctx = self.prepare(wrapper_fn, {})
-    node = node.body[2]
+    node, ctx = self.prepare(test_fn, {'called_fn': called_fn})
     node = call_trees.transform(node, ctx)
 
 
-- 
GitLab


From 3febecc59017ccaf4dd30434168e5b900ef84e31 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Mon, 15 Oct 2018 10:16:57 -0700
Subject: [PATCH 0968/1085] Automated rollback of commit
 64628112e2557cae43b7d5f479bf5be598663712

PiperOrigin-RevId: 217163302
---
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 5 +++--
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.h  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 3614b00a50..9b94d2706a 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -3000,10 +3000,11 @@ class RemoveStackStridedSliceSameAxis : public ArithmeticOptimizerStage {
     *pack_axis = pack->attr().at("axis").i();
     auto slice_properties =
         ctx().graph_properties->GetInputProperties(node->name());
-    *pack_output_shape = slice_properties[0].shape();
-    if (pack_output_shape->unknown_rank()) {
+    if (slice_properties.empty() ||
+        slice_properties[0].shape().unknown_rank()) {
       return Status::OK();
     }
+    *pack_output_shape = slice_properties[0].shape();
     const int pack_input_rank = pack_output_shape->dims() - 1;
     if (*pack_axis < 0) {
       // The ndims of any input into Pack op is its output ndims - 1.
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
index bb56f61e30..15e5ad9df5 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h
@@ -80,7 +80,7 @@ class ArithmeticOptimizer : public GraphOptimizer {
     bool convert_log1p = true;
     bool convert_expm1 = true;
     bool unary_ops_composition = true;
-    bool remove_stack_strided_slice_same_axis = false;
+    bool remove_stack_strided_slice_same_axis = true;
 
     // Choose which arithmetic optimizer stages will be enabled for a given
     // optimization level by default.
-- 
GitLab


From 788bb3b9e87af4abda37434b94cba6aa554a3de6 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Mon, 15 Oct 2018 10:22:32 -0700
Subject: [PATCH 0969/1085] [XLA] Take padded dimensions into account when
 verifying shape size.

PiperOrigin-RevId: 217164296
---
 tensorflow/compiler/xla/shape_util.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 7a34c0fb26..6c4b1485d2 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -931,7 +931,12 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
       return dense_shape_size;
     }
 
-    for (int64 dim : shape.dimensions()) {
+    bool is_padded =
+        LayoutUtil::IsDenseArray(shape) && LayoutUtil::IsPadded(shape);
+    absl::Span<const int64> shape_max_dimensions =
+        is_padded ? LayoutUtil::PaddedDimensions(shape)
+                  : AsInt64Slice(shape.dimensions());
+    for (int64 dim : shape_max_dimensions) {
       dense_shape_size = MultiplyWithoutOverflow(dense_shape_size, dim);
       if (dense_shape_size < 0) {
         return dense_shape_size;
-- 
GitLab


From 1573f9f20d703c7a30ddd34d89ff7e1a5a363391 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 10:32:34 -0700
Subject: [PATCH 0970/1085] Internal change.

PiperOrigin-RevId: 217166408
---
 .../unidirectional_sequence_lstm_test.cc      | 90 +++++++++----------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
index c97b0fdd61..7b9d66c19b 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
@@ -119,71 +119,71 @@ class UnidirectionalLSTMOpModel : public SingleOpModel {
     BuildInterpreter(input_shapes);
   }
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(const std::vector<float>& f) {
     PopulateTensor(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(const std::vector<float>& f) {
     PopulateTensor(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(const std::vector<float>& f) {
     PopulateTensor(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(const std::vector<float>& f) {
     PopulateTensor(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(const std::vector<float>& f) {
     PopulateTensor(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(const std::vector<float>& f) {
     PopulateTensor(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(const std::vector<float>& f) {
     PopulateTensor(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(const std::vector<float>& f) {
     PopulateTensor(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(const std::vector<float>& f) {
     PopulateTensor(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(const std::vector<float>& f) {
     PopulateTensor(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(const std::vector<float>& f) {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
-  void SetInputGateBias(std::initializer_list<float> f) {
+  void SetInputGateBias(const std::vector<float>& f) {
     PopulateTensor(input_gate_bias_, f);
   }
 
-  void SetForgetGateBias(std::initializer_list<float> f) {
+  void SetForgetGateBias(const std::vector<float>& f) {
     PopulateTensor(forget_gate_bias_, f);
   }
 
-  void SetCellBias(std::initializer_list<float> f) {
+  void SetCellBias(const std::vector<float>& f) {
     PopulateTensor(cell_bias_, f);
   }
 
-  void SetOutputGateBias(std::initializer_list<float> f) {
+  void SetOutputGateBias(const std::vector<float>& f) {
     PopulateTensor(output_gate_bias_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(const std::vector<float>& f) {
     PopulateTensor(projection_weights_, f);
   }
 
-  void SetProjectionBias(std::initializer_list<float> f) {
+  void SetProjectionBias(const std::vector<float>& f) {
     PopulateTensor(projection_bias_, f);
   }
 
@@ -249,51 +249,51 @@ class HybridUnidirectionalLSTMOpModel : public UnidirectionalLSTMOpModel {
             use_peephole, use_projection_weights, use_projection_bias,
             cell_clip, proj_clip, input_shapes, TensorType_UINT8) {}
 
-  void SetInputToInputWeights(std::initializer_list<float> f) {
+  void SetInputToInputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
   }
 
-  void SetInputToForgetWeights(std::initializer_list<float> f) {
+  void SetInputToForgetWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
   }
 
-  void SetInputToCellWeights(std::initializer_list<float> f) {
+  void SetInputToCellWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
   }
 
-  void SetInputToOutputWeights(std::initializer_list<float> f) {
+  void SetInputToOutputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
   }
 
-  void SetRecurrentToInputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToInputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
   }
 
-  void SetRecurrentToForgetWeights(std::initializer_list<float> f) {
+  void SetRecurrentToForgetWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
   }
 
-  void SetRecurrentToCellWeights(std::initializer_list<float> f) {
+  void SetRecurrentToCellWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
   }
 
-  void SetRecurrentToOutputWeights(std::initializer_list<float> f) {
+  void SetRecurrentToOutputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
   }
 
-  void SetCellToInputWeights(std::initializer_list<float> f) {
+  void SetCellToInputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
   }
 
-  void SetCellToForgetWeights(std::initializer_list<float> f) {
+  void SetCellToForgetWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
   }
 
-  void SetCellToOutputWeights(std::initializer_list<float> f) {
+  void SetCellToOutputWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
   }
 
-  void SetProjectionWeights(std::initializer_list<float> f) {
+  void SetProjectionWeights(const std::vector<float>& f) {
     SymmetricQuantizeAndPopulate(projection_weights_, f);
   }
 };
@@ -301,22 +301,22 @@ class HybridUnidirectionalLSTMOpModel : public UnidirectionalLSTMOpModel {
 class BaseLstmTest : public ::testing::Test {
  protected:
   // Weights of the LSTM model. Some are optional.
-  std::initializer_list<float> input_to_input_weights_;
-  std::initializer_list<float> input_to_cell_weights_;
-  std::initializer_list<float> input_to_forget_weights_;
-  std::initializer_list<float> input_to_output_weights_;
-  std::initializer_list<float> input_gate_bias_;
-  std::initializer_list<float> cell_gate_bias_;
-  std::initializer_list<float> forget_gate_bias_;
-  std::initializer_list<float> output_gate_bias_;
-  std::initializer_list<float> recurrent_to_input_weights_;
-  std::initializer_list<float> recurrent_to_cell_weights_;
-  std::initializer_list<float> recurrent_to_forget_weights_;
-  std::initializer_list<float> recurrent_to_output_weights_;
-  std::initializer_list<float> cell_to_input_weights_;
-  std::initializer_list<float> cell_to_forget_weights_;
-  std::initializer_list<float> cell_to_output_weights_;
-  std::initializer_list<float> projection_weights_;
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> projection_weights_;
 
   // LSTM input is stored as num_batch x num_inputs vector.
   std::vector<std::vector<float>> lstm_input_;
-- 
GitLab


From aa3a7408e6d22cd2cb7c176778474e290bb9ac32 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Mon, 15 Oct 2018 10:38:05 -0700
Subject: [PATCH 0971/1085] Remove the temporary logging code and add a flag
 for Windowns FS

---
 .../kernels/data/matching_files_dataset_op.cc | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 08953ee390..32aaf6a573 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -120,8 +120,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               Tensor filepath_tensor(ctx->allocator({}), DT_STRING, {});
 
               // Replace the forward slash with the backslash for Windows path
-              if (dataset()->patterns_[current_pattern_index_ - 1].find('\\') !=
-                  std::string::npos) {
+              if (isWindows_) {
                 std::replace(current_path.first.begin(),
                              current_path.first.end(), '/', '\\');
               }
@@ -149,6 +148,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             // the API expects backslash as an escape character, but no code
             // appears to rely on this behavior
             if (current_pattern_.find('\\') != std::string::npos) {
+              isWindows_ = true;
               std::replace(current_pattern_.begin(), current_pattern_.end(),
                            '\\', '/');
             }
@@ -189,6 +189,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
                                                current_pattern_));
         TF_RETURN_IF_ERROR(
             writer->WriteScalar(full_name("hasMatch"), hasMatch_));
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(full_name("isWindows"), isWindows_));
 
         if (!filepath_queue_.empty()) {
           TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("queue_size"),
@@ -224,6 +226,11 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
             reader->ReadScalar(full_name("hasMatch"), &hasMatch));
         hasMatch_ = static_cast<bool>(hasMatch);
 
+        int64 isWindows;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(full_name("isWindows"), &isWindows));
+        isWindows_ = static_cast<bool>(isWindows);
+
         if (reader->Contains(full_name("queue_size"))) {
           int64 queue_size;
           TF_RETURN_IF_ERROR(
@@ -269,16 +276,10 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
           // If current_path is a directory, search its children.
           const string& current_dir = current_path.first;
           std::vector<string> children;
-          Status s = fs->GetChildren(current_dir, &children);
-          std::cout << "Children Num: " << children.size()
-                    << "; Status: " << s.ToString()
-                    << "; Current dir: " << current_dir
-                    << "; FileExist status: "
-                    << fs->FileExists(current_dir).ToString() << std::endl;
-          ret.Update(s);
-
-          // Handle the error cases: 1) continue the search if the status is ok
-          // or NOT_FOUND; 2) return the non-ok status immediately if it is not
+          ret.Update(fs->GetChildren(current_dir, &children));
+
+          // Handle the error cases: 1) continue the search if the status is
+          // NOT_FOUND; 2) return the non-ok status immediately if it is not
           // NOT_FOUND.
           if (ret.code() == error::NOT_FOUND) {
             continue;
@@ -351,6 +352,7 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
       size_t current_pattern_index_ GUARDED_BY(mu_) = 0;
       string current_pattern_ GUARDED_BY(mu_);
       bool hasMatch_ GUARDED_BY(mu_) = false;
+      bool isWindows_ GUARDED_BY(mu_) = false;
     };
 
     const std::vector<string> patterns_;
-- 
GitLab


From 4371a68427242633ab9d1a99ee2f1ab95a5166dd Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 15 Oct 2018 10:43:00 -0700
Subject: [PATCH 0972/1085] [TF:XLA] Fix memory leak in XLA on-demand mode.

PiperOrigin-RevId: 217168404
---
 tensorflow/compiler/jit/xla_compile_on_demand_op.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index 79976c85df..129528bb44 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -16,6 +16,8 @@ limitations under the License.
 // Defines the XlaCompileOnDemandOp.
 
 #include "tensorflow/compiler/jit/xla_compile_on_demand_op.h"
+
+#include "absl/memory/memory.h"
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/compiler/jit/xla_launch_util.h"
 #include "tensorflow/compiler/tf2xla/tf2xla_util.h"
@@ -164,8 +166,9 @@ Status XlaCompileOnDemandOp::Compile(
   XlaCompiler::Options options;
   options.device_type = metadata.jit_device_type();
   options.client = metadata.client();
-  options.flib_def =
-      new FunctionLibraryDefinition(OpRegistry::Global(), FunctionDefLibrary{});
+  auto flib_def = absl::make_unique<FunctionLibraryDefinition>(
+      OpRegistry::Global(), FunctionDefLibrary{});
+  options.flib_def = flib_def.get();
   options.shape_representation_fn = metadata.shape_representation_fn();
 
   XlaCompiler::CompileOptions compile_options;
-- 
GitLab


From 09c208bd4a4a6489717eab89b1469882c15297d0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 10:44:19 -0700
Subject: [PATCH 0973/1085] Bugfixes.

PiperOrigin-RevId: 217168657
---
 .../contrib/lite/kernels/bidirectional_sequence_lstm.cc   | 6 +++---
 tensorflow/contrib/lite/kernels/lstm_eval.cc              | 8 ++++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 5325507d96..60abfbc85e 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -955,9 +955,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
           bw_recurrent_to_cell_weights, bw_recurrent_to_output_weights,
           bw_cell_to_input_weights, bw_cell_to_forget_weights,
-          bw_cell_to_output_weights, aux_input, fw_aux_input_to_input_weights,
-          fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
-          fw_aux_input_to_output_weights, bw_input_gate_bias,
+          bw_cell_to_output_weights, aux_input, bw_aux_input_to_input_weights,
+          bw_aux_input_to_forget_weights, bw_aux_input_to_cell_weights,
+          bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
           bw_projection_weights, bw_projection_bias, &lstm_params,
           /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index dc0a0b455b..2ef70aa933 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -769,7 +769,6 @@ TfLiteStatus EvalFloat(
   float* aux_input_to_cell_weights_ptr = nullptr;
   float* aux_input_to_output_weights_ptr = nullptr;
   if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
     if (!use_cifg) {
       aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f;
     }
@@ -787,6 +786,9 @@ TfLiteStatus EvalFloat(
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
+    if (aux_input) {
+      aux_input_ptr = aux_input->data.f + t_rel * input_step;
+    }
     float* output_ptr_time =
         output->data.f + t_rel * output_step + output_offset;
 
@@ -967,7 +969,6 @@ TfLiteStatus EvalHybrid(
   float aux_input_to_cell_weights_scale = 0.0f;
   float aux_input_to_output_weights_scale = 0.0f;
   if (aux_input_size > 0) {
-    aux_input_ptr = aux_input->data.f;
     if (!use_cifg) {
       aux_input_to_input_weights_ptr =
           reinterpret_cast<int8_t*>(aux_input_to_input_weights->data.uint8);
@@ -998,6 +999,9 @@ TfLiteStatus EvalHybrid(
     // If this is the forward_sequence, step forward, otherwise step backwards.
     const int t_rel = forward_sequence ? t : max_time - t - 1;
     const float* input_ptr = input->data.f + t_rel * input_step;
+    if (aux_input) {
+      aux_input_ptr = aux_input->data.f + t_rel * input_step;
+    }
     float* output_ptr = output->data.f + t_rel * output_step + output_offset;
 
     LstmStepWithAuxInput(
-- 
GitLab


From 6fa6bd045c98bdc89424a3425e15b5161586a9a7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 15 Oct 2018 10:55:40 -0700
Subject: [PATCH 0974/1085] Replace references to tensorflow::StringPiece with
 absl::string_view. No functional changes.

PiperOrigin-RevId: 217170781
---
 tensorflow/c/BUILD                            |   3 +
 tensorflow/c/c_api.cc                         |   8 +-
 tensorflow/c/c_api_function.cc                |   5 +-
 tensorflow/c/c_api_test.cc                    |   3 +-
 tensorflow/c/eager/BUILD                      |   1 +
 tensorflow/c/eager/c_api.cc                   |  10 +-
 tensorflow/cc/BUILD                           |   4 +
 tensorflow/cc/framework/cc_op_gen.cc          |  23 +--
 tensorflow/cc/framework/cc_op_gen_main.cc     |   4 +-
 tensorflow/cc/framework/cc_op_gen_test.cc     |   5 +-
 tensorflow/cc/framework/scope.cc              |   3 +-
 tensorflow/cc/saved_model/BUILD               |   1 +
 tensorflow/cc/saved_model/loader.cc           |  10 +-
 tensorflow/cc/tutorials/example_trainer.cc    |   7 +-
 .../jit/encapsulate_xla_computations_pass.cc  |   5 +-
 tensorflow/compiler/tf2xla/kernels/BUILD      |   1 +
 .../tf2xla/kernels/conv_op_helpers.cc         |  23 +--
 .../compiler/tf2xla/kernels/conv_op_helpers.h |   9 +-
 .../xla/tools/hex_floats_to_packed_literal.cc |   4 +-
 .../android/asset_manager_filesystem.cc       |  11 +-
 .../bigtable/kernels/bigtable_kernels.cc      |   4 +-
 .../kernels/bigtable_range_helpers.cc         |   7 +-
 .../bigtable/kernels/bigtable_range_helpers.h |   4 +-
 tensorflow/contrib/cloud/kernels/BUILD        |   3 +
 .../cloud/kernels/bigquery_table_accessor.cc  |  11 +-
 .../kernels/bigquery_table_accessor_test.cc   |   3 +-
 .../contrib/cloud/kernels/gcs_config_ops.cc   |   5 +-
 tensorflow/contrib/ffmpeg/BUILD               |   2 +
 tensorflow/contrib/ffmpeg/decode_audio_op.cc  |   8 +-
 tensorflow/contrib/ffmpeg/decode_video_op.cc  |   3 +-
 tensorflow/contrib/ffmpeg/default/BUILD       |   1 +
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      |   3 +-
 tensorflow/contrib/ffmpeg/ffmpeg_lib.h        |   3 +-
 tensorflow/contrib/gdr/BUILD                  |   1 +
 tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc  |   3 +-
 tensorflow/contrib/layers/kernels/BUILD       |   1 +
 .../kernels/sparse_feature_cross_kernel.cc    |  10 +-
 .../libsvm/kernels/decode_libsvm_op.cc        |   7 +-
 tensorflow/contrib/lite/delegates/flex/BUILD  |   2 +
 .../contrib/lite/delegates/flex/delegate.cc   |   3 +-
 .../lite/delegates/flex/kernel_test.cc        |   7 +-
 .../contrib/nccl/kernels/nccl_rewrite.cc      |   7 +-
 .../contrib/saved_model/cc/saved_model/BUILD  |   1 +
 .../cc/saved_model/signature_def_utils.cc     |   4 +-
 tensorflow/contrib/session_bundle/BUILD       |   4 +
 .../contrib/session_bundle/bundle_shim.cc     |   4 +-
 .../contrib/session_bundle/session_bundle.cc  |  32 +++--
 .../contrib/session_bundle/session_bundle.h   |   8 +-
 .../contrib/session_bundle/signature_test.cc  |   4 +-
 tensorflow/contrib/tensorboard/db/BUILD       |   1 +
 .../tensorboard/db/summary_db_writer.cc       |  15 +-
 tensorflow/contrib/tpu/profiler/BUILD         |   1 +
 .../contrib/tpu/profiler/dump_tpu_profile.cc  |   8 +-
 tensorflow/contrib/verbs/BUILD                |   1 +
 tensorflow/contrib/verbs/verbs_util.cc        |   4 +-
 tensorflow/core/BUILD                         |  26 ++++
 .../common_runtime/constant_folding_test.cc   |   3 +-
 tensorflow/core/common_runtime/copy_tensor.cc |   8 +-
 tensorflow/core/common_runtime/copy_tensor.h  |   4 +-
 tensorflow/core/common_runtime/device_mgr.cc  |   9 +-
 tensorflow/core/common_runtime/device_mgr.h   |   8 +-
 tensorflow/core/common_runtime/device_set.cc  |   4 +-
 .../core/common_runtime/direct_session.cc     |   3 +-
 .../core/common_runtime/direct_session.h      |   4 +-
 .../common_runtime/direct_session_test.cc     |   5 +-
 tensorflow/core/common_runtime/eager/BUILD    |   3 +
 .../core/common_runtime/eager/attr_builder.cc |  16 ++-
 .../core/common_runtime/eager/attr_builder.h  |  17 +--
 .../core/common_runtime/eager/context.cc      |   3 +-
 .../core/common_runtime/eager/execute.cc      |   5 +-
 tensorflow/core/common_runtime/function.cc    |   3 +-
 tensorflow/core/common_runtime/function.h     |   3 +-
 .../core/common_runtime/function_test.cc      |   3 +-
 .../common_runtime/gpu/gpu_device_test.cc     |   3 +-
 .../gpu/gpu_util_platform_specific.cc         |   3 +-
 .../core/common_runtime/gpu_device_context.h  |   7 +-
 .../common_runtime/lower_if_while_test.cc     |   3 +-
 tensorflow/core/common_runtime/placer.cc      |  17 +--
 .../core/common_runtime/profile_handler.h     |   7 +-
 .../core/common_runtime/shape_refiner.cc      |   5 +-
 .../common_runtime/step_stats_collector.cc    |  15 +-
 tensorflow/core/debug/BUILD                   |   2 +
 tensorflow/core/debug/debug_graph_utils.cc    |   3 +-
 tensorflow/core/debug/debug_io_utils.cc       |   3 +-
 tensorflow/core/distributed_runtime/BUILD     |   4 +
 .../base_rendezvous_mgr.cc                    |   5 +-
 .../distributed_runtime/master_session.cc     |  21 +--
 .../core/distributed_runtime/remote_device.cc |   5 +-
 tensorflow/core/distributed_runtime/rpc/BUILD |   7 +
 .../rpc/grpc_master_service.cc                |   7 +-
 .../rpc/grpc_remote_master.cc                 |   3 +-
 .../rpc/grpc_session_test.cc                  |   3 +-
 .../rpc/grpc_tensor_coding.cc                 |   5 +-
 .../rpc/grpc_tensorflow_server.cc             |   3 +-
 .../rpc/grpc_testlib_server.cc                |   3 +-
 .../rpc/rpc_rendezvous_mgr.cc                 |   3 +-
 .../core/distributed_runtime/tensor_coding.cc |   3 +-
 tensorflow/core/example/feature_util.h        |   4 +-
 tensorflow/core/framework/attr_value_util.cc  |  18 +--
 tensorflow/core/framework/attr_value_util.h   |  11 +-
 tensorflow/core/framework/dataset.cc          |   3 +-
 tensorflow/core/framework/dataset.h           |  30 ++--
 tensorflow/core/framework/device_base.h       |   7 +-
 tensorflow/core/framework/function.cc         |  13 +-
 tensorflow/core/framework/function.h          |   8 +-
 tensorflow/core/framework/function_testlib.cc |   4 +-
 tensorflow/core/framework/function_testlib.h  |   4 +-
 tensorflow/core/framework/node_def_builder.cc |  37 ++---
 tensorflow/core/framework/node_def_builder.h  |  79 +++++-----
 .../core/framework/node_def_builder_test.cc   |   4 +-
 tensorflow/core/framework/node_def_util.cc    | 131 +++++++++--------
 tensorflow/core/framework/node_def_util.h     | 136 +++++++++---------
 .../core/framework/node_def_util_test.cc      |   4 +-
 tensorflow/core/framework/op.h                |  13 +-
 tensorflow/core/framework/op_def_builder.cc   |  74 +++++-----
 .../core/framework/op_def_builder_test.cc     |   6 +-
 tensorflow/core/framework/op_def_util.cc      |  16 +--
 tensorflow/core/framework/op_def_util.h       |   9 +-
 tensorflow/core/framework/op_gen_lib.cc       |  49 +++----
 tensorflow/core/framework/op_gen_lib.h        |  10 +-
 tensorflow/core/framework/op_kernel.cc        |  57 ++++----
 tensorflow/core/framework/op_kernel.h         |  67 +++++----
 tensorflow/core/framework/rendezvous.cc       |  33 ++---
 tensorflow/core/framework/rendezvous.h        |  11 +-
 tensorflow/core/framework/resource_mgr.cc     |   5 +-
 tensorflow/core/framework/resource_mgr.h      |   3 +-
 tensorflow/core/framework/shape_inference.cc  |   7 +-
 tensorflow/core/framework/shape_inference.h   |  12 +-
 .../framework/shape_inference_testutil.cc     |   7 +-
 .../core/framework/shape_inference_testutil.h |   4 +-
 tensorflow/core/framework/tensor.cc           |  12 +-
 tensorflow/core/framework/tensor.h            |   4 +-
 tensorflow/core/framework/tensor_util.cc      |  14 +-
 tensorflow/core/framework/types.cc            |   3 +-
 tensorflow/core/framework/types.h             |   7 +-
 .../core/framework/variant_op_registry.cc     |  10 +-
 .../core/framework/variant_op_registry.h      |  23 +--
 tensorflow/core/graph/costmodel.h             |   4 +-
 tensorflow/core/graph/graph.cc                |   7 +-
 tensorflow/core/graph/graph.h                 |   6 +-
 tensorflow/core/graph/graph_constructor.cc    |  57 ++++----
 .../core/graph/graph_constructor_test.cc      |   3 +-
 tensorflow/core/graph/graph_def_builder.cc    |  11 +-
 tensorflow/core/graph/graph_def_builder.h     |  18 +--
 tensorflow/core/graph/graph_partition.cc      |   5 +-
 tensorflow/core/graph/node_builder.cc         |  11 +-
 tensorflow/core/graph/node_builder.h          |  21 +--
 tensorflow/core/graph/quantize_training.cc    |   9 +-
 tensorflow/core/graph/subgraph.cc             |   4 +-
 tensorflow/core/graph/subgraph_test.cc        |   3 +-
 tensorflow/core/graph/tensor_id.cc            |  10 +-
 tensorflow/core/graph/tensor_id.h             |   8 +-
 tensorflow/core/graph/while_context.cc        |   3 +-
 tensorflow/core/graph/while_context.h         |   3 +-
 tensorflow/core/grappler/BUILD                |   2 +
 tensorflow/core/grappler/optimizers/BUILD     |   4 +
 .../optimizers/arithmetic_optimizer.cc        |  20 +--
 .../grappler/optimizers/constant_folding.cc   |   6 +-
 .../grappler/optimizers/constant_folding.h    |   5 +-
 .../core/grappler/optimizers/data/BUILD       |  10 ++
 .../optimizers/data/function_utils.cc         |  31 ++--
 .../grappler/optimizers/data/function_utils.h |  27 ++--
 .../grappler/optimizers/data/fusion_utils.cc  |  14 +-
 .../grappler/optimizers/data/fusion_utils.h   |  14 +-
 .../optimizers/data/graph_test_utils.cc       |  21 +--
 .../optimizers/data/graph_test_utils.h        |  22 +--
 .../grappler/optimizers/data/graph_utils.cc   |  22 +--
 .../grappler/optimizers/data/graph_utils.h    |  23 +--
 .../optimizers/data/graph_utils_test.cc       |   3 +-
 .../optimizers/data/latency_all_edges.cc      |   5 +-
 .../data/map_and_batch_fusion_test.cc         |   7 +-
 .../optimizers/data/map_vectorization_test.cc |  24 ++--
 .../optimizers/data/noop_elimination_test.cc  |   9 +-
 .../optimizers/data/vectorization_utils.cc    |   3 +-
 .../data/vectorization_utils_test.cc          |   5 +-
 .../optimizers/dependency_optimizer.cc        |   6 +-
 .../optimizers/scoped_allocator_optimizer.cc  |   3 +-
 .../optimizers/scoped_allocator_optimizer.h   |   4 +-
 tensorflow/core/grappler/utils.cc             |   8 +-
 tensorflow/core/grappler/utils.h              |  30 ++--
 tensorflow/core/grappler/utils/BUILD          |   1 +
 tensorflow/core/grappler/utils/functions.cc   |   5 +-
 tensorflow/core/grappler/utils_test.cc        |  22 +--
 tensorflow/core/kernels/BUILD                 |  17 ++-
 tensorflow/core/kernels/conv_grad_ops.cc      |  22 +--
 tensorflow/core/kernels/conv_grad_ops.h       |  19 ++-
 tensorflow/core/kernels/data/BUILD            |   4 +
 .../core/kernels/data/cache_dataset_ops.cc    |   5 +-
 tensorflow/core/kernels/data/dataset_utils.cc |   5 +-
 tensorflow/core/kernels/data/dataset_utils.h  |   5 +-
 .../core/kernels/data/experimental/BUILD      |   1 +
 .../data/experimental/csv_dataset_op.cc       |  37 ++---
 .../data/experimental/indexed_dataset.h       |   4 +-
 tensorflow/core/kernels/data/iterator_ops.cc  |  23 +--
 tensorflow/core/kernels/data/writer_ops.cc    |   4 +-
 tensorflow/core/kernels/decode_bmp_op.cc      |   3 +-
 tensorflow/core/kernels/decode_csv_op.cc      |   5 +-
 tensorflow/core/kernels/decode_image_op.cc    |  13 +-
 tensorflow/core/kernels/deep_conv2d.cc        |   3 +-
 .../core/kernels/extract_jpeg_shape_op.cc     |   3 +-
 tensorflow/core/kernels/gpu_utils.h           |   3 +-
 tensorflow/core/kernels/hexagon/BUILD         |   2 +
 .../kernels/hexagon/graph_transfer_utils.cc   |   3 +-
 .../core/kernels/hexagon/graph_transferer.cc  |   5 +-
 .../kernels/hexagon/graph_transferer_test.cc  |   5 +-
 .../hexagon/hexagon_control_wrapper.cc        |   3 +-
 .../kernels/immutable_constant_op_test.cc     |   7 +-
 tensorflow/core/kernels/list_kernels.cc       |   3 +-
 tensorflow/core/kernels/reduce_join_op.cc     |   4 +-
 .../remote_fused_graph_execute_op_test.cc     |   3 +-
 tensorflow/core/kernels/restore_v2_op_test.cc |   3 +-
 tensorflow/core/kernels/shape_op_test.cc      |   3 +-
 tensorflow/core/kernels/sparse_cross_op.cc    |  10 +-
 .../sparse_dense_binary_op_shared_test.cc     |   3 +-
 tensorflow/core/kernels/sparse_reduce_op.cc   |   5 +-
 .../core/kernels/spectrogram_test_utils.cc    |  31 ++--
 tensorflow/core/kernels/string_join_op.cc     |   3 +-
 tensorflow/core/kernels/string_split_op.cc    |  52 +++----
 tensorflow/core/kernels/string_strip_op.cc    |   3 +-
 .../core/kernels/string_to_hash_bucket_op.h   |   3 +-
 tensorflow/core/kernels/string_util.h         |   5 +-
 tensorflow/core/kernels/substr_op.cc          |  35 ++---
 tensorflow/core/kernels/tensor_array_ops.cc   |   5 +-
 tensorflow/core/kernels/word2vec_kernels.cc   |   8 +-
 tensorflow/core/lib/core/coding.cc            |   9 +-
 tensorflow/core/lib/core/coding.h             |   6 +-
 tensorflow/core/lib/core/status.cc            |   3 +-
 tensorflow/core/lib/core/status.h             |   4 +-
 tensorflow/core/lib/core/stringpiece_test.cc  |  17 ++-
 tensorflow/core/lib/db/BUILD                  |   2 +
 tensorflow/core/lib/db/sqlite.cc              |  10 +-
 tensorflow/core/lib/db/sqlite.h               |  26 ++--
 tensorflow/core/lib/db/sqlite_test.cc         |   4 +-
 tensorflow/core/lib/hash/hash.h               |   8 +-
 tensorflow/core/lib/hash/hash_test.cc         |  17 +--
 tensorflow/core/lib/io/block.cc               |  20 +--
 tensorflow/core/lib/io/block_builder.cc       |  12 +-
 tensorflow/core/lib/io/block_builder.h        |   6 +-
 tensorflow/core/lib/io/format.cc              |  17 +--
 tensorflow/core/lib/io/format.h               |   8 +-
 tensorflow/core/lib/io/inputbuffer.cc         |   3 +-
 tensorflow/core/lib/io/inputbuffer_test.cc    |  19 +--
 tensorflow/core/lib/io/iterator.cc            |  11 +-
 tensorflow/core/lib/io/iterator.h             |   8 +-
 tensorflow/core/lib/io/path.cc                |  61 ++++----
 tensorflow/core/lib/io/path.h                 |  22 +--
 tensorflow/core/lib/io/path_test.cc           |   5 +-
 tensorflow/core/lib/io/proto_encode_helper.h  |   6 +-
 tensorflow/core/lib/io/random_inputstream.cc  |   7 +-
 tensorflow/core/lib/io/record_writer.cc       |   7 +-
 tensorflow/core/lib/io/record_writer.h        |   4 +-
 tensorflow/core/lib/io/recordio_test.cc       |  11 +-
 .../core/lib/io/snappy/snappy_buffers_test.cc |  13 +-
 .../core/lib/io/snappy/snappy_inputbuffer.cc  |   3 +-
 .../core/lib/io/snappy/snappy_outputbuffer.cc |   7 +-
 .../core/lib/io/snappy/snappy_outputbuffer.h  |   5 +-
 tensorflow/core/lib/io/table.cc               |  17 +--
 tensorflow/core/lib/io/table.h                |  12 +-
 tensorflow/core/lib/io/table_builder.cc       |  22 +--
 tensorflow/core/lib/io/table_builder.h        |   5 +-
 tensorflow/core/lib/io/table_test.cc          |  37 ++---
 tensorflow/core/lib/io/two_level_iterator.cc  |  13 +-
 tensorflow/core/lib/io/two_level_iterator.h   |   4 +-
 tensorflow/core/lib/io/zlib_buffers_test.cc   |   9 +-
 tensorflow/core/lib/io/zlib_outputbuffer.cc   |   7 +-
 tensorflow/core/lib/io/zlib_outputbuffer.h    |   5 +-
 tensorflow/core/lib/jpeg/jpeg_mem.h           |   4 +-
 .../lib/monitoring/collection_registry.cc     |   3 +-
 .../core/lib/monitoring/collection_registry.h |   4 +-
 tensorflow/core/lib/monitoring/metric_def.h   |  11 +-
 tensorflow/core/lib/png/png_io.cc             |   5 +-
 tensorflow/core/lib/png/png_io.h              |   6 +-
 tensorflow/core/lib/strings/base64.cc         |   8 +-
 tensorflow/core/lib/strings/base64.h          |   8 +-
 tensorflow/core/lib/strings/numbers.cc        |  23 +--
 tensorflow/core/lib/strings/numbers.h         |  32 ++---
 tensorflow/core/lib/strings/numbers_test.cc   |  27 ++--
 tensorflow/core/lib/strings/ordered_code.cc   |  15 +-
 tensorflow/core/lib/strings/ordered_code.h    |  10 +-
 .../core/lib/strings/ordered_code_test.cc     |  40 +++---
 .../core/lib/strings/proto_text_util.cc       |   5 +-
 tensorflow/core/lib/strings/proto_text_util.h |   6 +-
 tensorflow/core/lib/strings/scanner.cc        |   6 +-
 tensorflow/core/lib/strings/scanner.h         |  16 ++-
 tensorflow/core/lib/strings/scanner_test.cc   |  31 ++--
 tensorflow/core/lib/strings/str_util.cc       |  57 ++++----
 tensorflow/core/lib/strings/str_util.h        |  74 +++++-----
 tensorflow/core/lib/strings/str_util_test.cc  |  55 +++----
 tensorflow/core/lib/strings/strcat.cc         |  16 ++-
 tensorflow/core/lib/strings/strcat.h          |  16 ++-
 tensorflow/core/lib/strings/strcat_test.cc    |   5 +-
 tensorflow/core/platform/cloud/BUILD          |  12 ++
 .../cloud/compute_engine_zone_provider.cc     |   3 +-
 .../core/platform/cloud/curl_http_request.cc  |  20 +--
 .../core/platform/cloud/curl_http_request.h   |   6 +-
 .../platform/cloud/curl_http_request_test.cc  |   5 +-
 .../core/platform/cloud/gcs_file_system.cc    |  49 +++----
 .../platform/cloud/gcs_file_system_test.cc    |  32 +++--
 .../platform/cloud/google_auth_provider.cc    |   5 +-
 .../cloud/google_auth_provider_test.cc        |   8 +-
 .../core/platform/cloud/http_request_fake.h   |   4 +-
 .../core/platform/cloud/oauth_client.cc       |  30 ++--
 tensorflow/core/platform/cloud/oauth_client.h |  14 +-
 .../core/platform/cloud/oauth_client_test.cc  |   3 +-
 .../platform/cloud/retrying_file_system.h     |   5 +-
 .../cloud/retrying_file_system_test.cc        |  13 +-
 .../core/platform/default/device_tracer.cc    |   6 +-
 .../core/platform/default/fingerprint.h       |   6 +-
 .../platform/default/human_readable_json.cc   |  19 ++-
 .../core/platform/default/string_coding.h     |   3 +-
 .../core/platform/default/test_benchmark.cc   |   3 +-
 tensorflow/core/platform/env.cc               |  13 +-
 tensorflow/core/platform/env.h                |   4 +-
 tensorflow/core/platform/env_test.cc          |  10 +-
 tensorflow/core/platform/file_system.cc       |   7 +-
 tensorflow/core/platform/file_system.h        |   6 +-
 tensorflow/core/platform/file_system_test.cc  |   7 +-
 tensorflow/core/platform/fingerprint.h        |   6 +-
 tensorflow/core/platform/hadoop/BUILD         |   2 +
 .../platform/hadoop/hadoop_file_system.cc     |  15 +-
 .../core/platform/hadoop/hadoop_file_system.h |   3 +-
 .../hadoop/hadoop_file_system_test.cc         |   7 +-
 .../core/platform/posix/posix_file_system.cc  |   9 +-
 .../core/platform/posix/posix_file_system.h   |   3 +-
 tensorflow/core/platform/s3/BUILD             |   1 +
 tensorflow/core/platform/s3/s3_file_system.cc |  16 ++-
 tensorflow/core/platform/tensor_coding.cc     |   7 +-
 tensorflow/core/platform/tensor_coding.h      |   5 +-
 tensorflow/core/platform/tracing.cc           |   6 +-
 tensorflow/core/platform/tracing.h            |  25 ++--
 .../platform/windows/windows_file_system.cc   |   9 +-
 .../platform/windows/windows_file_system.h    |   3 +-
 tensorflow/core/profiler/internal/BUILD       |   1 +
 .../core/profiler/internal/tfprof_utils.cc    |   5 +-
 tensorflow/core/util/command_line_flags.cc    |  12 +-
 tensorflow/core/util/device_name_utils.cc     |  28 ++--
 tensorflow/core/util/device_name_utils.h      |  19 +--
 .../core/util/device_name_utils_test.cc       |   5 +-
 tensorflow/core/util/env_var.cc               |   9 +-
 tensorflow/core/util/env_var.h                |  10 +-
 tensorflow/core/util/events_writer.cc         |   3 +-
 tensorflow/core/util/events_writer.h          |   3 +-
 .../core/util/example_proto_fast_parsing.cc   |  60 ++++----
 tensorflow/core/util/memmapped_file_system.cc |   9 +-
 .../core/util/memmapped_file_system_test.cc   |   5 +-
 .../core/util/memmapped_file_system_writer.cc |   5 +-
 tensorflow/core/util/mirror_pad_mode.cc       |   3 +-
 tensorflow/core/util/mirror_pad_mode.h        |   3 +-
 tensorflow/core/util/padding.cc               |   3 +-
 tensorflow/core/util/padding.h                |   3 +-
 tensorflow/core/util/reporter_test.cc         |   3 +-
 .../core/util/saved_tensor_slice_util.cc      |   3 +-
 tensorflow/core/util/semver_test.cc           |  12 +-
 tensorflow/core/util/tensor_bundle/BUILD      |   7 +-
 tensorflow/core/util/tensor_bundle/naming.cc  |   6 +-
 tensorflow/core/util/tensor_bundle/naming.h   |   6 +-
 .../core/util/tensor_bundle/tensor_bundle.cc  |  46 +++---
 .../core/util/tensor_bundle/tensor_bundle.h   |  37 ++---
 .../util/tensor_bundle/tensor_bundle_test.cc  |   9 +-
 tensorflow/core/util/tensor_slice_reader.cc   |   3 +-
 tensorflow/core/util/tensor_slice_writer.cc   |   3 +-
 tensorflow/core/util/tensor_slice_writer.h    |   4 +-
 tensorflow/core/util/util.cc                  |  17 +--
 tensorflow/core/util/util.h                   |   6 +-
 tensorflow/examples/label_image/BUILD         |   2 +-
 tensorflow/examples/label_image/main.cc       |   4 +-
 tensorflow/java/BUILD                         |   1 +
 tensorflow/java/src/gen/cc/op_specs.cc        |   3 +-
 tensorflow/java/src/gen/cc/source_writer.cc   |   5 +-
 tensorflow/java/src/gen/cc/source_writer.h    |  12 +-
 tensorflow/js/BUILD                           |   1 +
 tensorflow/js/ops/ts_op_gen_test.cc           |   5 +-
 tensorflow/python/BUILD                       |   6 +
 tensorflow/python/eager/pywrap_tfe_src.cc     |  13 +-
 tensorflow/python/framework/python_op_gen.cc  |   5 +-
 .../framework/python_op_gen_internal.cc       |  17 +--
 .../python/framework/python_op_gen_main.cc    |   7 +-
 .../python/framework/test_file_system.cc      |   5 +-
 tensorflow/python/lib/core/py_func.cc         |   5 +-
 tensorflow/python/lib/core/strings.i          |  26 ++--
 tensorflow/python/lib/io/py_record_writer.cc  |   4 +-
 tensorflow/python/lib/io/py_record_writer.h   |   4 +-
 tensorflow/python/util/util.cc                |   7 +-
 tensorflow/tools/graph_transforms/BUILD       |   4 +
 .../graph_transforms/fold_constants_lib.cc    |  11 +-
 .../graph_transforms/fold_constants_test.cc   |   3 +-
 .../freeze_requantization_ranges.cc           |   3 +-
 .../graph_transforms/sparsify_gather_test.cc  |   5 +-
 .../tools/graph_transforms/transform_graph.cc |  11 +-
 .../tools/graph_transforms/transform_utils.cc |   7 +-
 .../gen_proto_text_functions_lib.cc           |   4 +-
 391 files changed, 2459 insertions(+), 1914 deletions(-)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 56f5e6767a..762afaee87 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -78,10 +78,12 @@ tf_cuda_library(
     deps = select({
         "//tensorflow:android": [
             ":c_api_internal",
+            "@com_google_absl//absl/strings",
             "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":c_api_internal",
+            "@com_google_absl//absl/strings",
             "//tensorflow/cc/saved_model:loader",
             "//tensorflow/cc:gradients",
             "//tensorflow/cc:ops",
@@ -228,6 +230,7 @@ tf_cuda_cc_test(
         "//tensorflow/core/kernels:array",
         "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/kernels:math",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 1726db12fa..7cd7e198c4 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <limits>
 #include <memory>
 #include <vector>
+#include "absl/strings/string_view.h"
 
 #ifndef __ANDROID__
 #include "tensorflow/cc/framework/gradients.h"
@@ -51,7 +52,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -120,7 +120,7 @@ void TF_SetStatus(TF_Status* s, TF_Code code, const char* msg) {
     s->status = Status::OK();
     return;
   }
-  s->status = Status(static_cast<Code>(code), tensorflow::StringPiece(msg));
+  s->status = Status(static_cast<Code>(code), absl::string_view(msg));
 }
 
 TF_Code TF_GetCode(const TF_Status* s) {
@@ -1160,7 +1160,7 @@ void TF_ColocateWith(TF_OperationDescription* desc, TF_Operation* op) {
 
 void TF_SetAttrString(TF_OperationDescription* desc, const char* attr_name,
                       const void* value, size_t length) {
-  tensorflow::StringPiece s(static_cast<const char*>(value), length);
+  absl::string_view s(static_cast<const char*>(value), length);
   desc->node_builder.Attr(attr_name, s);
 }
 
@@ -1174,7 +1174,7 @@ void TF_SetAttrStringList(TF_OperationDescription* desc, const char* attr_name,
                                            lengths[i]);
     }
   } else {
-    std::vector<tensorflow::StringPiece> v;
+    std::vector<absl::string_view> v;
     v.reserve(num_values);
     for (int i = 0; i < num_values; ++i) {
       v.emplace_back(static_cast<const char*>(values[i]), lengths[i]);
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index f68f8a3e90..604e12eef6 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api_internal.h"
 
 #include <algorithm>
@@ -324,7 +325,7 @@ Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
     TF_RETURN_IF_ERROR(
         NameRangesForNode(*node, node->op_def(), nullptr, &output_ranges));
     for (const auto& output : output_ranges) {
-      const StringPiece& output_name = output.first;
+      const absl::string_view& output_name = output.first;
       int index_start = output.second.first;
       int index_end = output.second.second;
       for (int i = index_start; i < index_end; ++i) {
@@ -364,7 +365,7 @@ Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
     const uint64 hash = FunctionDefHash(*fdef);
     string encoded;
     TF_RETURN_IF_ERROR(Base64Encode(
-        StringPiece(reinterpret_cast<const char*>(&hash), sizeof(hash)),
+        absl::string_view(reinterpret_cast<const char*>(&hash), sizeof(hash)),
         &encoded));
     // Besides letters and digits our Base64 encoding uses '_' and '-'.
     // Dash is invalid in operation names and multiple underscores in random
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index c4746b4990..a8ecd7b726 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_test_util.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/cc/saved_model/tag_constants.h"
@@ -55,7 +56,7 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst);
 
 namespace {
 
-static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
+static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 3ee31a6a7a..8e38a4675b 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -50,6 +50,7 @@ tf_cuda_library(
         ],
         "//conditions:default": [],
     }) + [
+        "@com_google_absl//absl/strings",
         "//tensorflow/core/common_runtime/eager:eager_operation",
         "//tensorflow/core/distributed_runtime/eager:eager_client",
         "//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_client",
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 3554ec0bf3..f4215ab77a 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_internal.h"
@@ -46,7 +47,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/refcount.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -526,8 +526,7 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx,
 void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const void* value,
                          size_t length) {
   op->operation.MutableAttrs()->Set(
-      attr_name,
-      tensorflow::StringPiece(static_cast<const char*>(value), length));
+      attr_name, absl::string_view(static_cast<const char*>(value), length));
 }
 
 void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value) {
@@ -596,10 +595,9 @@ void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
 void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name,
                              const void* const* values, const size_t* lengths,
                              int num_values) {
-  std::vector<tensorflow::StringPiece> v(num_values);
+  std::vector<absl::string_view> v(num_values);
   for (int i = 0; i < num_values; ++i) {
-    v[i] = tensorflow::StringPiece(static_cast<const char*>(values[i]),
-                                   lengths[i]);
+    v[i] = absl::string_view(static_cast<const char*>(values[i]), lengths[i]);
   }
   op->operation.MutableAttrs()->Set(attr_name, v);
 }
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index c18b07603a..e3859014cf 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -170,6 +170,7 @@ cc_library_with_android_deps(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -603,6 +604,7 @@ cc_library_with_android_deps(
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -622,6 +624,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -683,6 +686,7 @@ tf_cc_binary(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index 39593370d1..aeec058030 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/cc_op_gen.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -107,7 +108,7 @@ string ToTitle(const string& name) {
 //   ABC         /// ABC
 //               ///
 //   DEF         /// DEF
-string MakeComment(StringPiece text, StringPiece indent) {
+string MakeComment(absl::string_view text, absl::string_view indent) {
   string ret;
   while (!text.empty()) {
     int last_non_space = -1;
@@ -302,9 +303,9 @@ string ToCamelCase(const string& str) {
 // attr_type when defining an object of that type. The bool is a flag to
 // indicate whether to treat the type as const when accepting the C++ type as an
 // argument to a function.
-std::pair<const char*, bool> AttrTypeName(StringPiece attr_type) {
+std::pair<const char*, bool> AttrTypeName(absl::string_view attr_type) {
   static const auto* attr_type_map =
-      new std::unordered_map<StringPiece, std::pair<const char*, bool>,
+      new std::unordered_map<absl::string_view, std::pair<const char*, bool>,
                              StringPieceHasher>{
           {"string", {"StringPiece", false}},
           {"list(string)", {"gtl::ArraySlice<string>", true}},
@@ -331,9 +332,9 @@ std::pair<const char*, bool> AttrTypeName(StringPiece attr_type) {
   return entry->second;
 }
 
-const char* ListElementTypeName(StringPiece attr_type) {
+const char* ListElementTypeName(absl::string_view attr_type) {
   static const auto* attr_list_type_map =
-      new std::unordered_map<StringPiece, const char*, StringPieceHasher>{
+      new std::unordered_map<absl::string_view, const char*, StringPieceHasher>{
           {"list(string)", "string"},
           {"list(int)", "int"},
           {"list(float)", "float"},
@@ -351,8 +352,8 @@ const char* ListElementTypeName(StringPiece attr_type) {
   return entry->second;
 }
 
-bool IsCPPKeyword(StringPiece name) {
-  static const std::unordered_set<StringPiece, StringPieceHasher>
+bool IsCPPKeyword(absl::string_view name) {
+  static const std::unordered_set<absl::string_view, StringPieceHasher>
       // Keywords obtained from http://en.cppreference.com/w/cpp/keyword
       kCPPReserved{
           "alignas",
@@ -462,7 +463,7 @@ bool IsCPPKeyword(StringPiece name) {
   return kCPPReserved.count(name) > 0;
 }
 
-string AvoidCPPKeywords(StringPiece name) {
+string AvoidCPPKeywords(absl::string_view name) {
   if (IsCPPKeyword(name)) {
     return strings::StrCat(name, "_");
   }
@@ -516,7 +517,7 @@ struct OpInfo {
   explicit OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
                   const std::vector<string>& aliases);
   string GetOpAttrStruct() const;
-  string GetConstructorDecl(StringPiece op_name_prefix,
+  string GetConstructorDecl(absl::string_view op_name_prefix,
                             bool include_attr) const;
   void WriteClassDecl(WritableFile* h) const;
   void GetOutput(string* out) const;
@@ -574,7 +575,7 @@ OpInfo::OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
     arg_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to()));
 
     // TODO(keveman): Include input type information.
-    StringPiece description = api_def_arg.description();
+    absl::string_view description = api_def_arg.description();
     if (!description.empty()) {
       ConsumeEquals(&description);
       strings::StrAppend(&comment, "* ",
@@ -768,7 +769,7 @@ string OpInfo::GetOpAttrStruct() const {
   return struct_decl;
 }
 
-string OpInfo::GetConstructorDecl(StringPiece op_name_prefix,
+string OpInfo::GetConstructorDecl(absl::string_view op_name_prefix,
                                   bool include_attr) const {
   const string prefix = strings::StrCat(op_name_prefix, op_name, "(");
   string c_decl;
diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc
index 3157792e15..a9759fcee9 100644
--- a/tensorflow/cc/framework/cc_op_gen_main.cc
+++ b/tensorflow/cc/framework/cc_op_gen_main.cc
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/cc_op_gen.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
@@ -69,7 +69,7 @@ int main(int argc, char* argv[]) {
     exit(1);
   }
 
-  bool include_internal = tensorflow::StringPiece("1") == argv[3];
+  bool include_internal = absl::string_view("1") == argv[3];
   std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
       argv[4], ",", tensorflow::str_util::SkipEmpty());
   tensorflow::PrintAllCCOps(argv[1], argv[2], include_internal, api_def_dirs);
diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc
index 5d9dfd95a5..327e3c6bac 100644
--- a/tensorflow/cc/framework/cc_op_gen_test.cc
+++ b/tensorflow/cc/framework/cc_op_gen_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/cc/framework/cc_op_gen.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -61,12 +62,12 @@ op {
 }
 )";
 
-void ExpectHasSubstr(StringPiece s, StringPiece expected) {
+void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
 
-void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) {
+void ExpectDoesNotHaveSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_FALSE(str_util::StrContains(s, expected))
       << "'" << s << "' contains '" << expected << "'";
 }
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 6abc9e268e..3bc2e5fa2b 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/scope_internal.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -247,7 +248,7 @@ std::unordered_set<string> Scope::Impl::GetColocationConstraints(
   std::vector<string> node_constraints;
   if (GetNodeAttr(attrs, kColocationAttrName, &node_constraints).ok()) {
     for (const string& entry : node_constraints) {
-      StringPiece s(entry);
+      absl::string_view s(entry);
       if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) {
         current_constraints.emplace(s);
       }
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 3d3895c8fa..43106c82c9 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -95,6 +95,7 @@ cc_library(
     deps = [
         ":constants",
         ":reader",
+        "@com_google_absl//absl/strings",
     ] + if_not_mobile([
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index c6abe2f41b..e33c584ed7 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/constants.h"
 #include "tensorflow/cc/saved_model/reader.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -60,7 +61,7 @@ Tensor CreateStringTensor(const string& value) {
   return tensor;
 }
 
-void AddAssetsTensorsToInputs(const StringPiece export_dir,
+void AddAssetsTensorsToInputs(const absl::string_view export_dir,
                               const std::vector<AssetFileDef>& asset_file_defs,
                               std::vector<std::pair<string, Tensor>>* inputs) {
   if (asset_file_defs.empty()) {
@@ -147,7 +148,8 @@ Status RunMainOp(const RunOptions& run_options, const string& export_dir,
     std::vector<std::pair<string, Tensor>> inputs;
     AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs);
     RunMetadata run_metadata;
-    const StringPiece main_op_name = main_op_it->second.node_list().value(0);
+    const absl::string_view main_op_name =
+        main_op_it->second.node_list().value(0);
     return RunOnce(run_options, inputs, {}, {string(main_op_name)},
                    nullptr /* outputs */, &run_metadata, session);
   }
@@ -155,8 +157,8 @@ Status RunMainOp(const RunOptions& run_options, const string& export_dir,
 }
 
 Status RunRestore(const RunOptions& run_options, const string& export_dir,
-                  const StringPiece restore_op_name,
-                  const StringPiece variable_filename_const_op_name,
+                  const absl::string_view restore_op_name,
+                  const absl::string_view variable_filename_const_op_name,
                   const std::vector<AssetFileDef>& asset_file_defs,
                   Session* session) {
   LOG(INFO) << "Restoring SavedModel bundle.";
diff --git a/tensorflow/cc/tutorials/example_trainer.cc b/tensorflow/cc/tutorials/example_trainer.cc
index 5dbc4f5f6a..1a76a1a3a2 100644
--- a/tensorflow/cc/tutorials/example_trainer.cc
+++ b/tensorflow/cc/tutorials/example_trainer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -165,8 +166,7 @@ void ConcurrentSessions(const Options& opts) {
 
 namespace {
 
-bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
-                    int32* dst) {
+bool ParseInt32Flag(absl::string_view arg, absl::string_view flag, int32* dst) {
   if (tensorflow::str_util::ConsumePrefix(&arg, flag) &&
       tensorflow::str_util::ConsumePrefix(&arg, "=")) {
     char extra;
@@ -176,8 +176,7 @@ bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
-bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
-                   bool* dst) {
+bool ParseBoolFlag(absl::string_view arg, absl::string_view flag, bool* dst) {
   if (tensorflow::str_util::ConsumePrefix(&arg, flag)) {
     if (arg.empty()) {
       *dst = true;
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index 2ce6fa73fc..c2d58552e7 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -123,8 +124,8 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
     bool a_is_resource = (a->output_type(0) == DT_RESOURCE);
     bool b_is_resource = (b->output_type(0) == DT_RESOURCE);
     // Uses the name as a tiebreaker so the output is deterministic.
-    StringPiece a_name(a->name());
-    StringPiece b_name(b->name());
+    absl::string_view a_name(a->name());
+    absl::string_view b_name(b->name());
     return std::tie(a_is_resource, a_name) < std::tie(b_is_resource, b_name);
   });
 
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 224e5ea123..4a7318359d 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -194,6 +194,7 @@ cc_library(
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:conv_ops",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index c9a1be4940..4be61e23b2 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // XLA-specific Ops for 2D convolution.
 
 #include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h"
+#include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
@@ -198,10 +199,11 @@ Status CheckConvAttrs(const ConvOpAttrs& attrs) {
 // Wrapper around ConvBackpropComputeDimensions that converts from XLA shapes
 // to TensorShapes.
 Status ConvBackpropComputeDimensionsV2XlaShapes(
-    StringPiece label, int num_spatial_dims, const xla::Shape& input_shape,
-    const xla::Shape& filter_shape, const xla::Shape& out_backprop_shape,
-    absl::Span<const int32> dilations, const std::vector<int32>& strides,
-    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
+    absl::string_view label, int num_spatial_dims,
+    const xla::Shape& input_shape, const xla::Shape& filter_shape,
+    const xla::Shape& out_backprop_shape, absl::Span<const int32> dilations,
+    const std::vector<int32>& strides, Padding padding,
+    TensorFormat data_format, ConvBackpropDimensions* dims) {
   TensorShape input_tensor_shape, filter_tensor_shape,
       out_backprop_tensor_shape;
   TF_RETURN_IF_ERROR(XLAShapeToTensorShape(input_shape, &input_tensor_shape));
@@ -235,10 +237,9 @@ xla::StatusOr<ConvOpAttrs> ConvOpAttrs::Create(int num_spatial_dims,
   return attrs;
 }
 
-xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece /*type_string*/,
-                                               xla::XlaOp conv_input,
-                                               xla::XlaOp filter,
-                                               const ConvOpAttrs& attrs) {
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(
+    absl::string_view /*type_string*/, xla::XlaOp conv_input, xla::XlaOp filter,
+    const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
 
   auto* builder = conv_input.builder();
@@ -309,8 +310,8 @@ xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece /*type_string*/,
 }
 
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
-    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
-    xla::XlaOp out_backprop, const ConvOpAttrs& attrs) {
+    absl::string_view type_string, const xla::Shape& input_shape,
+    xla::XlaOp filter, xla::XlaOp out_backprop, const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
 
   int num_dims = attrs.num_spatial_dims + 2;
@@ -380,7 +381,7 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
 }
 
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
-    StringPiece type_string, xla::XlaOp activations,
+    absl::string_view type_string, xla::XlaOp activations,
     const xla::Shape& filter_shape, xla::XlaOp gradients,
     const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
index 6e1b70a478..bcd846dd02 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -52,15 +53,15 @@ struct ConvOpAttrs {
 
 // Creates a new XLA forward or backward convolution with the given inputs and
 // attributes.
-xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece type_string,
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(absl::string_view type_string,
                                                xla::XlaOp conv_input,
                                                xla::XlaOp filter,
                                                const ConvOpAttrs& attrs);
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
-    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
-    xla::XlaOp out_backprop, const ConvOpAttrs& attrs);
+    absl::string_view type_string, const xla::Shape& input_shape,
+    xla::XlaOp filter, xla::XlaOp out_backprop, const ConvOpAttrs& attrs);
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
-    StringPiece type_string, xla::XlaOp activations,
+    absl::string_view type_string, xla::XlaOp activations,
     const xla::Shape& filter_shape, xla::XlaOp gradients,
     const ConvOpAttrs& attrs);
 
diff --git a/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc b/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
index 0c3ec5934e..23ce1d235b 100644
--- a/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
+++ b/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
@@ -67,8 +67,8 @@ int main(int argc, char** argv) {
     floats.push_back(value);
   }
 
-  tensorflow::StringPiece content(absl::bit_cast<const char*>(floats.data()),
-                                  floats.size() * sizeof(float));
+  absl::string_view content(absl::bit_cast<const char*>(floats.data()),
+                            floats.size() * sizeof(float));
   TF_CHECK_OK(tensorflow::WriteStringToFile(tensorflow::Env::Default(),
                                             output_file, content));
   return 0;
diff --git a/tensorflow/contrib/android/asset_manager_filesystem.cc b/tensorflow/contrib/android/asset_manager_filesystem.cc
index d14b2126a0..ed1f88b160 100644
--- a/tensorflow/contrib/android/asset_manager_filesystem.cc
+++ b/tensorflow/contrib/android/asset_manager_filesystem.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <unistd.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/file_system_helper.h"
@@ -26,7 +27,7 @@ namespace {
 
 string RemoveSuffix(const string& name, const string& suffix) {
   string output(name);
-  StringPiece piece(output);
+  absl::string_view piece(output);
   str_util::ConsumeSuffix(&piece, suffix);
   return string(piece);
 }
@@ -87,7 +88,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
       : asset_manager_(asset_manager), file_name_(name) {}
   ~RandomAccessFileFromAsset() override = default;
 
-  Status Read(uint64 offset, size_t to_read, StringPiece* result,
+  Status Read(uint64 offset, size_t to_read, absl::string_view* result,
               char* scratch) const override {
     auto asset = ScopedAsset(AAssetManager_open(
         asset_manager_, file_name_.c_str(), AASSET_MODE_RANDOM));
@@ -98,7 +99,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
     off64_t new_offset = AAsset_seek64(asset.get(), offset, SEEK_SET);
     off64_t length = AAsset_getLength64(asset.get());
     if (new_offset < 0) {
-      *result = StringPiece(scratch, 0);
+      *result = absl::string_view(scratch, 0);
       return errors::OutOfRange("Read after file end.");
     }
     const off64_t region_left =
@@ -107,7 +108,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
     if (read < 0) {
       return errors::Internal("Error reading from asset.");
     }
-    *result = StringPiece(scratch, region_left);
+    *result = absl::string_view(scratch, region_left);
     return (region_left == to_read)
                ? Status::OK()
                : errors::OutOfRange("Read less bytes than requested.");
@@ -229,7 +230,7 @@ string AssetManagerFileSystem::NormalizeDirectoryPath(const string& fname) {
 }
 
 string AssetManagerFileSystem::RemoveAssetPrefix(const string& name) {
-  StringPiece piece(name);
+  absl::string_view piece(name);
   str_util::ConsumePrefix(&piece, prefix_);
   return string(piece);
 }
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
index 6138d79126..ce77a4c01c 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/bigtable/kernels/bigtable_lib.h"
 
 #include "tensorflow/core/framework/op_kernel.h"
@@ -342,7 +343,8 @@ class ToBigtableOp : public AsyncOpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+                             const absl::string_view& argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
index 51965f6214..1b4be1cbcd 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -55,11 +56,11 @@ const string& MultiModeKeyRange::begin_key() const { return begin_; }
 
 const string& MultiModeKeyRange::end_key() const { return end_; }
 
-bool MultiModeKeyRange::contains_key(StringPiece key) const {
-  if (StringPiece(begin_) > key) {
+bool MultiModeKeyRange::contains_key(absl::string_view key) const {
+  if (absl::string_view(begin_) > key) {
     return false;
   }
-  if (StringPiece(end_) <= key && !end_.empty()) {
+  if (absl::string_view(end_) <= key && !end_.empty()) {
     return false;
   }
   return true;
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
index 44c628e366..d55c6d8f6a 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -52,7 +52,7 @@ class MultiModeKeyRange {
   // The first invalid key after the valid range.
   const string& end_key() const;
   // Returns true if the provided key is a part of the range, false otherwise.
-  bool contains_key(StringPiece key) const;
+  bool contains_key(absl::string_view key) const;
 
  private:
   MultiModeKeyRange(string begin, string end)
diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD
index 1311063ec0..84cf6a80f1 100644
--- a/tensorflow/contrib/cloud/kernels/BUILD
+++ b/tensorflow/contrib/cloud/kernels/BUILD
@@ -46,6 +46,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/platform/cloud:curl_http_request",
         "//tensorflow/core/platform/cloud:google_auth_provider",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -65,6 +66,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/platform/cloud:http_request_fake",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -85,6 +87,7 @@ tf_kernel_library(
         "//tensorflow/core/platform/cloud:curl_http_request",
         "//tensorflow/core/platform/cloud:gcs_file_system",
         "//tensorflow/core/platform/cloud:oauth_client",
+        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
index e57a66b99f..ba7678d5a8 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 
@@ -31,7 +32,7 @@ bool IsPartitionEmpty(const BigQueryTablePartition& partition) {
   return false;
 }
 
-Status ParseJson(StringPiece json, Json::Value* result) {
+Status ParseJson(absl::string_view json, Json::Value* result) {
   Json::Reader reader;
   if (!reader.parse(string(json), *result)) {
     return errors::Internal("Couldn't parse JSON response from BigQuery.");
@@ -183,8 +184,8 @@ Status BigQueryTableAccessor::ReadRow(int64* row_id, Example* example) {
                                     FullTableName());
 
     // Parse the returned row.
-    StringPiece response_piece =
-        StringPiece(&output_buffer[0], output_buffer.size());
+    absl::string_view response_piece =
+        absl::string_view(&output_buffer[0], output_buffer.size());
     Json::Value root;
     TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
     for (unsigned int i = 0; i < root["rows"].size(); ++i) {
@@ -261,8 +262,8 @@ Status BigQueryTableAccessor::ReadSchema() {
                                   FullTableName());
 
   // Parse the schema.
-  StringPiece response_piece =
-      StringPiece(&output_buffer[0], output_buffer.size());
+  absl::string_view response_piece =
+      absl::string_view(&output_buffer[0], output_buffer.size());
 
   Json::Value root;
   TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
index 7416eb19d3..c85f240c2c 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test_data.h"
 #include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -29,7 +30,7 @@ constexpr char kTestProject[] = "test-project";
 constexpr char kTestDataset[] = "test-dataset";
 constexpr char kTestTable[] = "test-table";
 
-bool HasSubstr(StringPiece base, StringPiece substr) {
+bool HasSubstr(absl::string_view base, absl::string_view substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
index 648a219fb8..fc8197c762 100644
--- a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
+++ b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <sstream>
 
+#include "absl/strings/string_view.h"
 #include "include/json/json.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -63,8 +64,8 @@ Status RetrieveGcsFs(OpKernelContext* ctx, RetryingGcsFileSystem** fs) {
 }
 
 template <typename T>
-Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name,
-                           T* output) {
+Status ParseScalarArgument(OpKernelContext* ctx,
+                           absl::string_view argument_name, T* output) {
   const Tensor* argument_t;
   TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
   if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD
index f7b3273a4d..1eb3e358b2 100644
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
@@ -29,6 +29,7 @@ cc_library(
         "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib",
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -57,6 +58,7 @@ cc_library(
         "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib",
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
index 5ab57ca4cd..f4905a02de 100644
--- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
@@ -18,6 +18,7 @@
 #include <cstdio>
 #include <set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -41,8 +42,7 @@ const char* kValidFileFormats[] = {"mp3", "mp4", "ogg", "wav"};
  * Decoding implementation, shared across V1 and V2 ops. Creates a new
  * output in the context.
  */
-void Decode(OpKernelContext* context,
-            const tensorflow::StringPiece& file_contents,
+void Decode(OpKernelContext* context, const absl::string_view& file_contents,
             const string& file_format, const int32 samples_per_second,
             const int32 channel_count, const string& stream) {
   // Write the input data to a temp file.
@@ -135,7 +135,7 @@ class DecodeAudioOpV2 : public OpKernel {
                     "channel_count must be a rank-0 tensor but got shape ",
                     channel_count_tensor.shape().DebugString()));
 
-    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
+    const absl::string_view contents = contents_tensor.scalar<string>()();
     const string file_format =
         str_util::Lowercase(file_format_tensor.scalar<string>()());
     const int32 samples_per_second =
@@ -245,7 +245,7 @@ class DecodeAudioOp : public OpKernel {
         errors::InvalidArgument("contents must be scalar but got shape ",
                                 contents.shape().DebugString()));
 
-    const tensorflow::StringPiece file_contents = contents.scalar<string>()();
+    const absl::string_view file_contents = contents.scalar<string>()();
     Decode(context, file_contents, file_format_, samples_per_second_,
            channel_count_, "");
   }
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
index 6f8ad486d1..e9c1e783de 100644
--- a/tensorflow/contrib/ffmpeg/decode_video_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -18,6 +18,7 @@
 #include <cstdio>
 #include <set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -45,7 +46,7 @@ class DecodeVideoOp : public OpKernel {
                 errors::InvalidArgument(
                     "contents must be a rank-0 tensor but got shape ",
                     contents_tensor.shape().DebugString()));
-    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
+    const absl::string_view contents = contents_tensor.scalar<string>()();
 
     // Write the input data to a temp file.
     string extension;
diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD
index 59bad8982d..0c15541412 100644
--- a/tensorflow/contrib/ffmpeg/default/BUILD
+++ b/tensorflow/contrib/ffmpeg/default/BUILD
@@ -20,6 +20,7 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index cca1a05419..ccbf48b317 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -25,6 +25,7 @@
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -303,7 +304,7 @@ FileDeleter::~FileDeleter() {
   env.DeleteFile(filename_).IgnoreError();
 }
 
-Status WriteFile(const string& filename, StringPiece contents) {
+Status WriteFile(const string& filename, absl::string_view contents) {
   Env& env = *Env::Default();
   std::unique_ptr<WritableFile> file;
   TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
index bf2aa75545..0b81848668 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -35,7 +36,7 @@ class FileDeleter {
 };
 
 // Writes binary data to a file.
-Status WriteFile(const string& filename, tensorflow::StringPiece contents);
+Status WriteFile(const string& filename, absl::string_view contents);
 
 // Reads an audio file using ffmpeg and converts it into an array of samples in
 // [-1.0, 1.0]. If there are multiple channels in the audio then each frame will
diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD
index e534fdc177..0fc8cd7ebd 100644
--- a/tensorflow/contrib/gdr/BUILD
+++ b/tensorflow/contrib/gdr/BUILD
@@ -97,6 +97,7 @@ cc_library(
         "//tensorflow/core/distributed_runtime:worker_cache",
         "//tensorflow/core/distributed_runtime:worker_env",
         "//tensorflow/core/distributed_runtime:worker_interface",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
index 94f522c04e..cf4218c716 100644
--- a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
+++ b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/contrib/gdr/gdr_rendezvous_mgr.h"
 
 #include "google/protobuf/any.pb.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/gdr/gdr_memory_manager.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -41,7 +42,7 @@ class GdrRecvTensorCall : public BaseRecvTensorCall {
   GdrRecvTensorCall(WorkerInterface* wi, Device* dst_device,
                     RemoteMemoryManager* remote_memory_manager,
                     const Rendezvous::Args& recv_args, int64 step_id,
-                    StringPiece key)
+                    absl::string_view key)
       : wi_(wi),
         dst_device_(dst_device),
         remote_memory_manager_(remote_memory_manager),
diff --git a/tensorflow/contrib/layers/kernels/BUILD b/tensorflow/contrib/layers/kernels/BUILD
index 7aae09ff3e..d97d518bb6 100644
--- a/tensorflow/contrib/layers/kernels/BUILD
+++ b/tensorflow/contrib/layers/kernels/BUILD
@@ -13,6 +13,7 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
         "@farmhash_archive//:farmhash",
         "@protobuf_archive//:protobuf_headers",
     ],
diff --git a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
index 01893d6061..61852df9ad 100644
--- a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
+++ b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -26,7 +27,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -92,8 +92,8 @@ string SparseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-StringPiece SparseTensorColumn<StringPiece>::Feature(int64 batch,
-                                                     int64 n) const {
+absl::string_view SparseTensorColumn<absl::string_view>::Feature(
+    int64 batch, int64 n) const {
   const int64 start = feature_start_indices_[batch];
   return values_.vec<string>().data()[start + n];
 }
@@ -130,8 +130,8 @@ string DenseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-StringPiece DenseTensorColumn<StringPiece>::Feature(int64 batch,
-                                                    int64 n) const {
+absl::string_view DenseTensorColumn<absl::string_view>::Feature(int64 batch,
+                                                                int64 n) const {
   return tensor_.matrix<string>()(batch, n);
 }
 
diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 720c74e3de..2e9f609682 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -46,10 +47,10 @@ class DecodeLibsvmOp : public OpKernel {
     std::vector<T> out_values;
     std::vector<std::pair<int64, int64>> out_indices;
     for (int i = 0; i < input_flat.size(); ++i) {
-      StringPiece line(input_flat(i));
+      absl::string_view line(input_flat(i));
       str_util::RemoveWhitespaceContext(&line);
 
-      StringPiece piece;
+      absl::string_view piece;
       OP_REQUIRES(ctx, str_util::ConsumeNonWhitespace(&line, &piece),
                   errors::InvalidArgument("No label found for input[", i,
                                           "]: \"", input_flat(i), "\""));
@@ -64,7 +65,7 @@ class DecodeLibsvmOp : public OpKernel {
       str_util::RemoveLeadingWhitespace(&line);
       while (str_util::ConsumeNonWhitespace(&line, &piece)) {
         size_t p = piece.find(':');
-        OP_REQUIRES(ctx, (p != StringPiece::npos),
+        OP_REQUIRES(ctx, (p != absl::string_view::npos),
                     errors::InvalidArgument("Invalid feature \"", piece, "\""));
 
         int64 feature_index;
diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index 2f866eaecb..55f1e8a412 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -79,6 +79,7 @@ cc_library(
         ":delegate_data",
         ":kernel",
         ":util",
+        "@com_google_absl//absl/strings",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite:kernel_api",
         "//tensorflow/contrib/lite:util",
@@ -176,6 +177,7 @@ tf_cc_test(
         ":kernel",
         ":test_util",
         "@com_google_googletest//:gtest",
+        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib",
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
index c72b0cf513..aa749ab0e5 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/lite/context_util.h"
 #include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/contrib/lite/delegates/flex/kernel.h"
@@ -68,7 +69,7 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
   }
 
   tensorflow::Tensor t = buffer_map->GetTensor(buffer_handle);
-  tensorflow::StringPiece t_data = t.tensor_data();
+  absl::string_view t_data = t.tensor_data();
 
   if (size != t_data.size()) {
     context->ReportError(
diff --git a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
index 94a6f8b61a..c084c35e4f 100644
--- a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 #include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
@@ -60,9 +61,9 @@ class KernelTest : public testing::FlexModelTest {
                                         TfLiteBufferHandle buffer_handle,
                                         void* data, size_t size) {
       auto* delegate_data = reinterpret_cast<DelegateData*>(delegate->data_);
-      tensorflow::StringPiece values = delegate_data->GetBufferMap(context)
-                                           ->GetTensor(buffer_handle)
-                                           .tensor_data();
+      absl::string_view values = delegate_data->GetBufferMap(context)
+                                     ->GetTensor(buffer_handle)
+                                     .tensor_data();
       memcpy(data, values.data(), values.size());
       return kTfLiteOk;
     };
diff --git a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
index 06ff86e6d8..82320a5402 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #if GOOGLE_CUDA
 
@@ -35,7 +36,7 @@ Status ReplaceReduce(Graph* graph, Node* node) {
   TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype));
   int num_devices = node->num_inputs();
   string shared_name = node->name();
-  auto make_builder = [&](StringPiece op_name, StringPiece suffix) {
+  auto make_builder = [&](absl::string_view op_name, absl::string_view suffix) {
     return NodeBuilder(strings::StrCat(shared_name, suffix), op_name)
         .Attr("reduction", reduction)
         .Attr("num_devices", num_devices)
@@ -159,7 +160,7 @@ Status ReplaceBroadcast(Graph* graph, Node* node) {
   }
 
   string shared_name = node->name();
-  auto make_builder = [&](StringPiece op_name, StringPiece suffix) {
+  auto make_builder = [&](absl::string_view op_name, absl::string_view suffix) {
     return NodeBuilder(strings::StrCat(shared_name, suffix), op_name)
         .Attr("num_devices", num_devices)
         .Attr("shared_name", shared_name)
@@ -255,7 +256,7 @@ class NcclReplacePass : public GraphOptimizationPass {
     }
     // Find reduction and broadcast ops and replace them with Send/Recv ops.
     for (Node* node : graph->op_nodes()) {
-      StringPiece type = node->type_string();
+      absl::string_view type = node->type_string();
       if (!str_util::StartsWith(type, "Nccl")) {
         continue;
       }
diff --git a/tensorflow/contrib/saved_model/cc/saved_model/BUILD b/tensorflow/contrib/saved_model/cc/saved_model/BUILD
index ea4d41d43b..da27789272 100644
--- a/tensorflow/contrib/saved_model/cc/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/cc/saved_model/BUILD
@@ -35,6 +35,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc b/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
index e87e497e5f..79c0b1b0d5 100644
--- a/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
+++ b/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
@@ -15,17 +15,17 @@ limitations under the License.
 
 #include "tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
 
 namespace {
 template <class T>
-Status FindInProtobufMap(StringPiece description,
+Status FindInProtobufMap(absl::string_view description,
                          const protobuf::Map<string, T>& map, const string& key,
                          const T** value) {
   const auto it = map.find(key);
diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD
index 9c08859180..9e0f4eb974 100644
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
@@ -190,6 +190,7 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -206,6 +207,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":signature_lite",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:lib_internal",
     ] + if_not_mobile([
         ":manifest_proto_cc",
@@ -342,6 +344,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -370,6 +373,7 @@ cc_library(
     deps = [
         ":session_bundle",
         ":signature",
+        "@com_google_absl//absl/strings",
         "//tensorflow/cc/saved_model:loader",
         "//tensorflow/cc/saved_model:signature_constants",
     ] + if_not_mobile([
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.cc b/tensorflow/contrib/session_bundle/bundle_shim.cc
index c669ced997..5f2b1e2a67 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.cc
+++ b/tensorflow/contrib/session_bundle/bundle_shim.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/session_bundle/bundle_shim.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/loader.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
@@ -23,7 +24,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -129,7 +129,7 @@ Status MaybeBuildPredictSignatureDef(
 
 Status LoadSavedModelFromLegacySessionBundlePath(
     const SessionOptions& session_options, const RunOptions& run_options,
-    const StringPiece session_bundle_export_dir,
+    const absl::string_view session_bundle_export_dir,
     SavedModelBundle* saved_model_bundle) {
   if (session_bundle_export_dir.empty()) {
     return Status(error::Code::NOT_FOUND, "Export directory path is empty.");
diff --git a/tensorflow/contrib/session_bundle/session_bundle.cc b/tensorflow/contrib/session_bundle/session_bundle.cc
index a690d9b129..3f36704405 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.cc
+++ b/tensorflow/contrib/session_bundle/session_bundle.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "google/protobuf/any.pb.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/graph_def_util.h"
@@ -62,7 +63,7 @@ Status CreateSessionFromGraphDef(const SessionOptions& options,
   return (*session)->Create(graph);
 }
 
-Status GetMetaGraphDefFromExport(const StringPiece export_dir,
+Status GetMetaGraphDefFromExport(const absl::string_view export_dir,
                                  MetaGraphDef* meta_graph_def) {
   const string meta_graph_def_path =
       io::JoinPath(export_dir, kMetaGraphDefFilename);
@@ -77,7 +78,7 @@ Tensor CreateStringTensor(const string& value) {
 }
 
 // Adds Assets related tensors (assets_dir and asset files) to the inputs.
-void AddAssetsTensorsToInputs(const StringPiece export_dir,
+void AddAssetsTensorsToInputs(const absl::string_view export_dir,
                               const std::vector<AssetFile>& asset_files,
                               std::vector<std::pair<string, Tensor>>* inputs) {
   if (asset_files.empty()) {
@@ -108,7 +109,7 @@ void AddAssetsTensorsToInputs(const StringPiece export_dir,
 // prefix.data-* are present in the filesystem. So if we see export.index
 // present in the export_dir, we know the export is in V2 format and we return
 // <export_dir>/export as this prefix.
-string GetVariablesFilename(const StringPiece export_dir) {
+string GetVariablesFilename(const absl::string_view export_dir) {
   const char kVariablesFilename[] = "export";
   const string kVariablesIndexFilename = MetaFilename("export");  // V2 ckpts
   const char kVariablesFilenamePattern[] = "export-\?\?\?\?\?-of-\?\?\?\?\?";
@@ -128,10 +129,11 @@ string GetVariablesFilename(const StringPiece export_dir) {
   }
 }
 
-Status RunRestoreOp(const RunOptions& run_options, const StringPiece export_dir,
+Status RunRestoreOp(const RunOptions& run_options,
+                    const absl::string_view export_dir,
                     const std::vector<AssetFile>& asset_files,
-                    const StringPiece restore_op_name,
-                    const StringPiece variables_filename_const_op_name,
+                    const absl::string_view restore_op_name,
+                    const absl::string_view variables_filename_const_op_name,
                     Session* session) {
   LOG(INFO) << "Running restore op for SessionBundle: " << restore_op_name
             << ", " << variables_filename_const_op_name;
@@ -145,9 +147,10 @@ Status RunRestoreOp(const RunOptions& run_options, const StringPiece export_dir,
                       nullptr /* outputs */, &run_metadata);
 }
 
-Status RunInitOp(const RunOptions& run_options, const StringPiece export_dir,
+Status RunInitOp(const RunOptions& run_options,
+                 const absl::string_view export_dir,
                  const std::vector<AssetFile>& asset_files,
-                 const StringPiece init_op_name, Session* session) {
+                 const absl::string_view init_op_name, Session* session) {
   LOG(INFO) << "Running init op for SessionBundle";
   std::vector<std::pair<string, Tensor>> inputs;
   AddAssetsTensorsToInputs(export_dir, asset_files, &inputs);
@@ -158,7 +161,7 @@ Status RunInitOp(const RunOptions& run_options, const StringPiece export_dir,
 
 Status LoadSessionBundleFromPathUsingRunOptionsInternal(
     const SessionOptions& options, const RunOptions& run_options,
-    const StringPiece export_dir, SessionBundle* const bundle) {
+    const absl::string_view export_dir, SessionBundle* const bundle) {
   LOG(INFO) << "Attempting to load a SessionBundle from: " << export_dir;
   LOG(INFO) << "Using RunOptions: " << DebugStringIfAvailable(run_options);
   TF_RETURN_IF_ERROR(
@@ -227,17 +230,16 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal(
 }  // namespace
 
 Status LoadSessionBundleFromPath(const SessionOptions& options,
-                                 const StringPiece export_dir,
+                                 const absl::string_view export_dir,
                                  SessionBundle* const bundle) {
   TF_RETURN_IF_ERROR(LoadSessionBundleFromPathUsingRunOptions(
       options, RunOptions(), export_dir, bundle));
   return Status::OK();
 }
 
-Status LoadSessionBundleFromPathUsingRunOptions(const SessionOptions& options,
-                                                const RunOptions& run_options,
-                                                const StringPiece export_dir,
-                                                SessionBundle* const bundle) {
+Status LoadSessionBundleFromPathUsingRunOptions(
+    const SessionOptions& options, const RunOptions& run_options,
+    const absl::string_view export_dir, SessionBundle* const bundle) {
   const uint64 start_microseconds = Env::Default()->NowMicros();
   const Status status = LoadSessionBundleFromPathUsingRunOptionsInternal(
       options, run_options, export_dir, bundle);
@@ -263,7 +265,7 @@ Status LoadSessionBundleFromPathUsingRunOptions(const SessionOptions& options,
   return status;
 }
 
-bool IsPossibleExportDirectory(const StringPiece directory) {
+bool IsPossibleExportDirectory(const absl::string_view directory) {
   const string meta_graph_def_path =
       io::JoinPath(directory, kMetaGraphDefFilename);
   return Env::Default()->FileExists(meta_graph_def_path).ok();
diff --git a/tensorflow/contrib/session_bundle/session_bundle.h b/tensorflow/contrib/session_bundle/session_bundle.h
index b2be46efa6..f25123c388 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.h
+++ b/tensorflow/contrib/session_bundle/session_bundle.h
@@ -20,10 +20,10 @@ limitations under the License.
 
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/contrib/session_bundle/signature.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/protobuf/saver.pb.h"
 #include "tensorflow/core/public/session.h"
@@ -60,7 +60,7 @@ struct SessionBundle {
 
 // Loads a manifest and initialized session using the output of an Exporter.
 Status LoadSessionBundleFromPath(const SessionOptions& options,
-                                 const StringPiece export_dir,
+                                 const absl::string_view export_dir,
                                  SessionBundle* bundle);
 
 // Similar to the LoadSessionBundleFromPath(), but also allows the session run
@@ -70,14 +70,14 @@ Status LoadSessionBundleFromPath(const SessionOptions& options,
 // This method is EXPERIMENTAL and may change or be removed.
 Status LoadSessionBundleFromPathUsingRunOptions(
     const SessionOptions& session_options, const RunOptions& run_options,
-    const StringPiece export_dir, SessionBundle* bundle);
+    const absl::string_view export_dir, SessionBundle* bundle);
 
 // Sanity checks whether the directory looks like an export directory. Note that
 // we don't try to load any data in this method.
 //
 // If the method returns false this is definitely not an export directory, if it
 // returns true, it is no guarantee that the model will load.
-bool IsPossibleExportDirectory(const StringPiece export_dir);
+bool IsPossibleExportDirectory(const absl::string_view export_dir);
 
 }  // namespace serving
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/session_bundle/signature_test.cc b/tensorflow/contrib/session_bundle/signature_test.cc
index b1ff55552e..65be9c9a8b 100644
--- a/tensorflow/contrib/session_bundle/signature_test.cc
+++ b/tensorflow/contrib/session_bundle/signature_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <memory>
 
 #include "google/protobuf/any.pb.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -25,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
@@ -34,7 +34,7 @@ namespace tensorflow {
 namespace serving {
 namespace {
 
-static bool HasSubstr(StringPiece base, StringPiece substr) {
+static bool HasSubstr(absl::string_view base, absl::string_view substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 6507546ee9..03f18183ad 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -47,6 +47,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:summary_interface",
         "//tensorflow/core/lib/db:sqlite",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index cfdc884277..c45be444e0 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -16,12 +16,12 @@ limitations under the License.
 
 #include <deque>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/tensorboard/db/summary_converter.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/summary.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/util/event.pb.h"
@@ -136,7 +136,7 @@ void PatchPluginName(SummaryMetadata* metadata, const char* name) {
   }
 }
 
-Status SetDescription(Sqlite* db, int64 id, const StringPiece& markdown) {
+Status SetDescription(Sqlite* db, int64 id, const absl::string_view& markdown) {
   const char* sql = R"sql(
     INSERT OR REPLACE INTO Descriptions (id, description) VALUES (?, ?)
   )sql";
@@ -260,12 +260,12 @@ class GraphWriter {
     for (int node_id = 0; node_id < graph_->node_size(); ++node_id) {
       const NodeDef& node = graph_->node(node_id);
       for (int idx = 0; idx < node.input_size(); ++idx) {
-        StringPiece name = node.input(idx);
+        absl::string_view name = node.input(idx);
         int64 input_node_id;
         int64 input_node_idx = 0;
         int64 is_control = 0;
         size_t i = name.rfind(':');
-        if (i != StringPiece::npos) {
+        if (i != absl::string_view::npos) {
           if (!strings::safe_strto64(name.substr(i + 1, name.size() - i - 1),
                                      &input_node_idx)) {
             return errors::DataLoss("Bad NodeDef.input: ", name);
@@ -369,7 +369,8 @@ class GraphWriter {
   const uint64 now_;
   const int64 graph_id_;
   std::vector<string> name_copies_;
-  std::unordered_map<StringPiece, int64, StringPieceHasher> name_to_node_id_;
+  std::unordered_map<absl::string_view, int64, StringPieceHasher>
+      name_to_node_id_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(GraphWriter);
 };
@@ -680,7 +681,7 @@ class SeriesWriter {
       } else {
         SqliteTransaction txn(*db);
         TF_RETURN_IF_ERROR(
-            Update(db, step, computed_time, t, StringPiece(), rowid));
+            Update(db, step, computed_time, t, absl::string_view(), rowid));
         TF_RETURN_IF_ERROR(UpdateNdString(db, t, rowid));
         return txn.Commit();
       }
@@ -690,7 +691,7 @@ class SeriesWriter {
   }
 
   Status Update(Sqlite* db, int64 step, double computed_time, const Tensor& t,
-                const StringPiece& data, int64 rowid) {
+                const absl::string_view& data, int64 rowid) {
     const char* sql = R"sql(
       UPDATE OR REPLACE
         Tensors
diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index 38d1c3049e..adaa7fbfbd 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD
@@ -29,6 +29,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index b4b06a40a2..49baacceed 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <ctime>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/tpu/profiler/op_profile.pb.h"
 #include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h"
@@ -63,7 +64,8 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) {
   return Status::OK();
 }
 
-Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix,
+Status DumpTraceToLogDirectory(absl::string_view run_dir,
+                               const string& host_prefix,
                                const string& encoded_trace, std::ostream* os) {
   string proto_path =
       JoinPath(run_dir, StrCat(host_prefix, kProtoTraceFileName));
@@ -86,7 +88,7 @@ Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix,
   return Status::OK();
 }
 
-Status DumpOpProfileToLogDirectory(StringPiece run_dir,
+Status DumpOpProfileToLogDirectory(absl::string_view run_dir,
                                    const string& host_prefix,
                                    const tpu::op_profile::Profile& profile,
                                    std::ostream* os) {
@@ -107,7 +109,7 @@ Status DumpOpProfileToLogDirectory(StringPiece run_dir,
   return Status::OK();
 }
 
-Status DumpToolDataToLogDirectory(StringPiece run_dir,
+Status DumpToolDataToLogDirectory(absl::string_view run_dir,
                                   const string& host_prefix,
                                   const tensorflow::ProfileToolData& tool,
                                   std::ostream* os) {
diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD
index 19cb8983b6..83aae915be 100644
--- a/tensorflow/contrib/verbs/BUILD
+++ b/tensorflow/contrib/verbs/BUILD
@@ -42,6 +42,7 @@ cc_library(
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/verbs/verbs_util.cc b/tensorflow/contrib/verbs/verbs_util.cc
index a6333d9f36..3662347256 100644
--- a/tensorflow/contrib/verbs/verbs_util.cc
+++ b/tensorflow/contrib/verbs/verbs_util.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <vector>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -32,7 +32,7 @@ string VerbsUtil::AppendStepidToKey(const string& key, int64 step_id) {
 // static
 void VerbsUtil::GetKeyAndStepId(const string& key_with_step_id, string& key,
                                 int64& step_id) {
-  StringPiece s(key_with_step_id);
+  absl::string_view s(key_with_step_id);
   // a key (with step_id) has exact 6 parts if split by ";"
   // part 1: src_device;
   // part 2: src_incarnation;
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 841291e6d8..8cdc629640 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -485,6 +485,7 @@ cc_library(
         ":platform_port",
         ":platform_protobuf",
         "//tensorflow/core/platform/default/build_config:env",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -514,6 +515,7 @@ cc_library(
         ":lib",
         ":lib_platform",
         ":platform_env",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -592,6 +594,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -978,6 +981,7 @@ cc_library(
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -1478,6 +1482,7 @@ cc_library(
         "//tensorflow/core/kernels:ops_testutil",
         "//tensorflow/core/kernels:ops_util",
         "//tensorflow/core/kernels:random_ops",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2193,6 +2198,7 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
+        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
@@ -2618,6 +2624,7 @@ tf_cuda_library(
         ":protos_all_cc",
         ":stats_calculator_portable",
         ":version_lib",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/kernels:bounds_check",
         "//third_party/eigen3",
@@ -2748,6 +2755,7 @@ tf_cuda_library(
         ":proto_text",
         ":protos_all_cc",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2792,6 +2800,7 @@ tf_cuda_library(
         ":function_ops_op_lib",
         ":functional_grad",
         ":functional_ops_op_lib",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:required",
         ":core_cpu_impl",
@@ -2921,6 +2930,7 @@ tf_cuda_library(
         ":lib_internal",
         ":proto_text",
         ":protos_all_cc",
+        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/core/grappler:grappler_item",
     ] + mkl_deps(),
@@ -3000,6 +3010,7 @@ tf_cuda_library(
         ":protos_all_cc",
         "//tensorflow/core/debug:debug_graph_utils",
         "//tensorflow/core/kernels:function_ops",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -3123,6 +3134,7 @@ tf_cuda_library(
         ":protos_all_cc",
         ":stream_executor",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -3252,6 +3264,7 @@ cc_library(
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3367,6 +3380,7 @@ tf_cc_tests(
         ":test",
         ":test_main",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
         "@zlib_archive//:zlib",
     ],
 )
@@ -3398,6 +3412,7 @@ tf_cc_test(
         ":test",
         ":test_main",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3469,6 +3484,7 @@ tf_cc_test(
         ":protos_all_cc",
         ":test",
         ":test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3518,6 +3534,7 @@ tf_cc_test(
         ":lib_internal",
         ":test",
         ":test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3679,6 +3696,7 @@ tf_cc_tests(
         "//tensorflow/cc:while_loop",
         "//tensorflow/core/kernels:ops_util",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3715,6 +3733,7 @@ tf_cc_tests(
         "//tensorflow/cc:sendrecv_ops",
         "//tensorflow/core/kernels:ops_util",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3898,6 +3917,7 @@ tf_cc_tests_gpu(
         ":testlib",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3946,6 +3966,7 @@ tf_cuda_cc_test(
         ":testlib",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4062,6 +4083,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:immutable_constant_op",
         "//tensorflow/core/kernels:matmul_op",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4172,6 +4194,7 @@ tf_cuda_cc_test(
         ":test",
         ":test_main",
         ":testlib",
+        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:collective_ops",
@@ -4212,6 +4235,7 @@ tf_cc_test(
         ":test",
         ":test_main",
         ":testlib",
+        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/cc:cc_ops",
         # Link with support for TensorFlow Debugger (tfdbg).
@@ -4352,6 +4376,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:shape_ops",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4744,6 +4769,7 @@ tf_cc_tests(
         "//tensorflow/cc:client_session",
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc
index 98aefcde27..2227705ab5 100644
--- a/tensorflow/core/common_runtime/constant_folding_test.cc
+++ b/tensorflow/core/common_runtime/constant_folding_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
 
 #include "tensorflow/cc/ops/array_ops_internal.h"
@@ -640,7 +641,7 @@ class TestTFFileSystem : public ::tensorflow::NullFileSystem {
       return ::tensorflow::errors::Unimplemented(
           "NewReadOnlyMemoryRegionFromFile unimplemented");
     }
-    const ::tensorflow::StringPiece sp = data_tensor_.tensor_data();
+    const ::absl::string_view sp = data_tensor_.tensor_data();
     *result = std::unique_ptr<::tensorflow::ReadOnlyMemoryRegion>(
         new TestReadOnlyMemoryRegion(sp.data(), sp.size()));
     return ::tensorflow::Status::OK();
diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index 6e2eb66b94..56b80cccaa 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <atomic>
 #include <utility>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -48,7 +49,7 @@ std::vector<RegistrationInfo>* MutableRegistry() {
 }
 
 void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
-                      Allocator* out_allocator, StringPiece edge_name,
+                      Allocator* out_allocator, absl::string_view edge_name,
                       Device* dst, Tensor* output,
                       DeviceContext* recv_dev_context, StatusCallback done) {
   if (input->dtype() == DT_VARIANT) {
@@ -113,7 +114,7 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
 }
 
 void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator,
-                      Allocator* out_allocator, StringPiece edge_name,
+                      Allocator* out_allocator, absl::string_view edge_name,
                       Device* src, Tensor* output,
                       DeviceContext* send_dev_context, StatusCallback done) {
   if (input->dtype() == DT_VARIANT) {
@@ -246,7 +247,8 @@ void CopyDeviceToDevice(CopyTensor::CopyFunction copy_function,
 }  // namespace
 
 // static
-void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
+void CopyTensor::ViaDMA(absl::string_view edge_name,
+                        DeviceContext* send_dev_context,
                         DeviceContext* recv_dev_context, Device* src,
                         Device* dst, const AllocatorAttributes src_alloc_attr,
                         const AllocatorAttributes dst_alloc_attr,
diff --git a/tensorflow/core/common_runtime/copy_tensor.h b/tensorflow/core/common_runtime/copy_tensor.h
index 9cd5ac2a37..f7a416fffc 100644
--- a/tensorflow/core/common_runtime/copy_tensor.h
+++ b/tensorflow/core/common_runtime/copy_tensor.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_COPY_TENSOR_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_COPY_TENSOR_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/device_base.h"
@@ -40,7 +41,8 @@ class CopyTensor {
   // the type of devices and memory in use, the copy may be performed
   // synchronously or asynchronously.  'done' will be invoked only
   // after the copy is actually complete.
-  static void ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
+  static void ViaDMA(absl::string_view edge_name,
+                     DeviceContext* send_dev_context,
                      DeviceContext* recv_dev_context, Device* src, Device* dst,
                      const AllocatorAttributes src_alloc_attr,
                      const AllocatorAttributes dst_alloc_attr,
diff --git a/tensorflow/core/common_runtime/device_mgr.cc b/tensorflow/core/common_runtime/device_mgr.cc
index 470abc1431..068e944fec 100644
--- a/tensorflow/core/common_runtime/device_mgr.cc
+++ b/tensorflow/core/common_runtime/device_mgr.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_mgr.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -51,11 +52,11 @@ DeviceMgr::~DeviceMgr() {
   for (Device* p : devices_) delete p;
 }
 
-StringPiece DeviceMgr::CopyToBackingStore(StringPiece s) {
+absl::string_view DeviceMgr::CopyToBackingStore(absl::string_view s) {
   size_t n = s.size();
   char* space = name_backing_store_.Alloc(n);
   memcpy(space, s.data(), n);
-  return StringPiece(space, n);
+  return absl::string_view(space, n);
 }
 
 void DeviceMgr::ListDeviceAttributes(
@@ -89,11 +90,11 @@ string DeviceMgr::DeviceMappingString() const {
   return out;
 }
 
-Status DeviceMgr::LookupDevice(StringPiece name, Device** device) const {
+Status DeviceMgr::LookupDevice(absl::string_view name, Device** device) const {
   Status s;
   auto iter = device_map_.find(name);
   if (iter == device_map_.end()) {
-    std::vector<StringPiece> device_names;
+    std::vector<absl::string_view> device_names;
     for (auto&& itr : device_map_) {
       device_names.push_back(itr.first);
     }
diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h
index c1ff10d9b5..7a4235d3bc 100644
--- a/tensorflow/core/common_runtime/device_mgr.h
+++ b/tensorflow/core/common_runtime/device_mgr.h
@@ -21,10 +21,10 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/lib/core/arena.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -53,7 +53,7 @@ class DeviceMgr {
 
   // Assigns *device with pointer to Device of the given name.
   // Accepts either a full device name, or just the replica-local suffix.
-  Status LookupDevice(StringPiece name, Device** device) const;
+  Status LookupDevice(absl::string_view name, Device** device) const;
 
   // Clears given containers of all devices if 'container' is
   // non-empty. Otherwise, clears default containers of all devices.
@@ -66,9 +66,9 @@ class DeviceMgr {
   typedef gtl::InlinedVector<Device*, 8> DeviceVec;
   DeviceVec devices_;
 
-  StringPiece CopyToBackingStore(StringPiece s);
+  absl::string_view CopyToBackingStore(absl::string_view s);
 
-  std::unordered_map<StringPiece, Device*, StringPieceHasher> device_map_;
+  std::unordered_map<absl::string_view, Device*, StringPieceHasher> device_map_;
   core::Arena name_backing_store_;  // Storage for keys in device_map_
   std::unordered_map<string, int> device_type_counts_;
 
diff --git a/tensorflow/core/common_runtime/device_set.cc b/tensorflow/core/common_runtime/device_set.cc
index f6b4115cbf..a1e162ed4b 100644
--- a/tensorflow/core/common_runtime/device_set.cc
+++ b/tensorflow/core/common_runtime/device_set.cc
@@ -19,9 +19,9 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 
 namespace tensorflow {
@@ -68,7 +68,7 @@ static bool DeviceTypeComparator(const DeviceType& a, const DeviceType& b) {
     return a_priority > b_priority;
   }
 
-  return StringPiece(a.type()) < StringPiece(b.type());
+  return absl::string_view(a.type()) < absl::string_view(b.type());
 }
 
 std::vector<DeviceType> DeviceSet::PrioritizedDeviceTypeList() const {
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 52c1cd2691..139fed6e6b 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/collective_executor_mgr.h"
 #include "tensorflow/core/common_runtime/collective_param_resolver_local.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
@@ -1192,7 +1193,7 @@ Status DirectSession::CreateExecutors(
 
   if (run_state_args->is_partial_run) {
     ek->graph = std::move(run_state_args->graph);
-    std::unordered_set<StringPiece, StringPieceHasher> names;
+    std::unordered_set<absl::string_view, StringPieceHasher> names;
     for (const string& input : callable_options.feed()) {
       TensorId id(ParseTensorName(input));
       names.emplace(id.first);
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 3a168bbe3f..5e64f9cee6 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/common_runtime/debugger_state_interface.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -65,7 +66,8 @@ class DirectSession : public Session {
   ~DirectSession() override;
 
   typedef std::vector<std::pair<string, Tensor>> NamedTensorList;
-  typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameNodeMap;
+  typedef std::unordered_map<absl::string_view, Node*, StringPieceHasher>
+      NameNodeMap;
 
   ::tensorflow::Status Create(const GraphDef& graph) override;
   ::tensorflow::Status Extend(const GraphDef& graph) override;
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index a6440c55ad..e33b1cb74e 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/function_testlib.h"
@@ -2040,8 +2041,8 @@ void TestFeedAndFetchTensorsInDeviceMemory(
         << DataType_Name(dtype);
     TF_ASSERT_OK(session->ReleaseCallable(handle)) << DataType_Name(dtype);
     ASSERT_EQ(1, outputs.size());
-    const StringPiece actual_data = outputs[0].tensor_data();
-    const StringPiece expected_data = host_tensor.tensor_data();
+    const absl::string_view actual_data = outputs[0].tensor_data();
+    const absl::string_view expected_data = host_tensor.tensor_data();
     EXPECT_EQ(expected_data.size(), actual_data.size()) << DataType_Name(dtype);
     EXPECT_EQ(0, memcmp(expected_data.data(), actual_data.data(),
                         std::min(expected_data.size(), actual_data.size())))
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index 7b74c67c85..ebc9e28c3a 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -49,6 +49,7 @@ tf_cuda_library(
     deps = [
         ":eager_executor",
         ":kernel_and_device",
+        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -197,6 +198,7 @@ cc_library(
         ":eager_operation",
         ":kernel_and_device",
         ":tensor_handle",
+        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -221,6 +223,7 @@ tf_cuda_library(
     deps = [
         ":kernel_and_device",
         "@farmhash_archive//:farmhash",
+        "@com_google_absl//absl/strings",
         # Only the TF_AttrType enum is required, so pull in just the C headers.
         # TODO(b/113535673): Break this dependency and avoid the C header completely.
         "//tensorflow/c:c_api_headers",
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc
index 5c8369de87..d77372684d 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.cc
+++ b/tensorflow/core/common_runtime/eager/attr_builder.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/eager/attr_builder.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
@@ -96,11 +97,12 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) {
   return Status::OK();
 }
 
-#define DEFINE_SET_ATTR(value_type, value_field)                             \
-  template <>                                                                \
-  AttrBuilder& AttrBuilder::Set(StringPiece attr_name, value_type&& value) { \
-    value_field.push_back(std::make_pair(attr_name, value));                 \
-    return *this;                                                            \
+#define DEFINE_SET_ATTR(value_type, value_field)             \
+  template <>                                                \
+  AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, \
+                                value_type&& value) {        \
+    value_field.push_back(std::make_pair(attr_name, value)); \
+    return *this;                                            \
   }
 
 DEFINE_SET_ATTR(float, float_attrs_);
@@ -194,13 +196,13 @@ void CombineUnordered(const tensorflow::Fprint128& a,
   b->high64 += a.high64;
 }
 
-inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s,
+inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s,
                                             const tensorflow::Fprint128& b) {
   tensorflow::Fprint128 a = tensorflow::Fingerprint128(s);
   return FingerprintCat128(a, b);
 }
 
-inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s, uint64 b) {
+inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, uint64 b) {
   return CacheKeyHelper(s, {b, b});
 }
 
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index c114ea4ba0..4dbf6da49b 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <memory>
 #include <unordered_map>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
@@ -94,7 +95,7 @@ class AttrBuilder {
   AttrBuilder& NumInputs(int n);
 
   template <class T>
-  AttrBuilder& Set(StringPiece attr_name, T&& value) {
+  AttrBuilder& Set(absl::string_view attr_name, T&& value) {
     MayBeInitializeNodeDef();
     SetInAttrValueMap(node_def_->mutable_attr(), attr_name, value);
     return *this;
@@ -107,7 +108,8 @@ class AttrBuilder {
 
  private:
   template <class T>
-  using AttrVec = tensorflow::gtl::InlinedVector<std::pair<StringPiece, T>, 2>;
+  using AttrVec =
+      tensorflow::gtl::InlinedVector<std::pair<absl::string_view, T>, 2>;
 
   void MayBeInitializeNodeDef();
   // Fill `m` with the attr-value pairs set via AttrBuilder::Set() so far, as
@@ -119,7 +121,7 @@ class AttrBuilder {
   void FillAttrValueMap(AttrValueMap* m, bool include_those_in_node_def) const;
 
   template <class T>
-  void SetInAttrValueMap(AttrValueMap* m, StringPiece attr_name,
+  void SetInAttrValueMap(AttrValueMap* m, absl::string_view attr_name,
                          T&& value) const {
     DCHECK(!node_def_finalized_)
         << "Calling SetInAttrValueMap after BuildNodeDef.";
@@ -148,16 +150,15 @@ class AttrBuilder {
 };  // namespace tensorflow
 
 template <>
-AttrBuilder& AttrBuilder::Set(StringPiece attr_name, int&& value);
+AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, int&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(StringPiece attr_name, float&& value);
+AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, float&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(StringPiece attr_name, bool&& value);
+AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, bool&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(StringPiece attr_name,
+AttrBuilder& AttrBuilder::Set(absl::string_view attr_name,
                               tensorflow::DataType&& value);
 
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_ATTR_BUILDER_H_
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index f23cefb33d..9fc15e832d 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/eager/context.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -24,7 +25,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) {
+bool ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val) {
   bool val;
   if (tensorflow::ReadBoolFromEnvVar(env_var_name, default_val, &val).ok()) {
     return val;
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index c29a767d23..bf42172ffe 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -820,7 +821,7 @@ Status FindDeviceFromName(EagerContext* ctx, const char* device_name,
 }
 
 Status ExecuteSend(EagerContext* ctx, tensorflow::Device* device,
-                   TensorHandle* h, StringPiece wire_id,
+                   TensorHandle* h, absl::string_view wire_id,
                    const string& recv_device) {
   const tensorflow::AttrTypeMap* types;
   TF_RETURN_IF_ERROR(tensorflow::AttrTypeMapForOp("_Send", &types));
@@ -847,7 +848,7 @@ Status ExecuteSend(EagerContext* ctx, tensorflow::Device* device,
 }
 
 Status ExecuteRecv(EagerContext* ctx, tensorflow::Device* device,
-                   DataType dtype, StringPiece wire_id,
+                   DataType dtype, absl::string_view wire_id,
                    const string& send_device, int64 send_device_incarnation,
                    TensorHandle** result) {
   const tensorflow::AttrTypeMap* types;
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index e0e5f4a215..327e22a2ea 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <deque>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/executor_factory.h"
@@ -583,7 +584,7 @@ Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) {
   return Status::OK();
 }
 
-void DumpGraph(StringPiece label, const Graph* g) {
+void DumpGraph(absl::string_view label, const Graph* g) {
   // TODO(zhifengc): Change Graph to record #nodes.
   VLOG(1) << "Graph " << label << " #nodes " << g->num_nodes() << " #edges "
           << g->num_edges();
diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h
index eeca66f5d0..877fb08306 100644
--- a/tensorflow/core/common_runtime/function.h
+++ b/tensorflow/core/common_runtime/function.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
@@ -124,7 +125,7 @@ bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph);
 
 // Dump the contents of the "graph" to log files if the logging level is
 // sufficiently high.
-void DumpGraph(StringPiece label, const Graph* g);
+void DumpGraph(absl::string_view label, const Graph* g);
 
 // Applies graph rewrite optimization such as inlining, dead code
 // removal, etc.
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 716167132b..7182b1bb7b 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <atomic>
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/array_ops_internal.h"
 #include "tensorflow/cc/ops/function_ops.h"
 #include "tensorflow/cc/ops/functional_ops.h"
@@ -55,7 +56,7 @@ Status GetOpSig(const string& op, const OpDef** sig) {
   return OpRegistry::Global()->LookUpOpDef(op, sig);
 }
 
-void HasError(const Status& s, StringPiece substr) {
+void HasError(const Status& s, absl::string_view substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << s << ", expected substring " << substr;
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
index 36294094e9..281f6216c4 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #if GOOGLE_CUDA
 
 #include "tensorflow/core/common_runtime/gpu/gpu_device.h"
@@ -53,7 +54,7 @@ Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major,
   return Status::OK();
 }
 
-void ExpectErrorMessageSubstr(const Status& s, StringPiece substr) {
+void ExpectErrorMessageSubstr(const Status& s, absl::string_view substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << s << ", expected substring " << substr;
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
index 4bc88ffc8c..6061fd50c6 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
@@ -31,7 +32,7 @@ void GPUDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
 }
 
 void GPUDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                             StringPiece tensor_name,
+                                             absl::string_view tensor_name,
                                              Device* device, Tensor* cpu_tensor,
                                              StatusCallback done) {
   GPUUtil::CopyGPUTensorToCPU(device, this, device_tensor, cpu_tensor, done);
diff --git a/tensorflow/core/common_runtime/gpu_device_context.h b/tensorflow/core/common_runtime/gpu_device_context.h
index 3603808152..5cd5b61be0 100644
--- a/tensorflow/core/common_runtime/gpu_device_context.h
+++ b/tensorflow/core/common_runtime/gpu_device_context.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_DEVICE_CONTEXT_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_DEVICE_CONTEXT_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -53,9 +54,9 @@ class GPUDeviceContext : public DeviceContext {
                              Tensor* device_tensor,
                              StatusCallback done) const override;
 
-  void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
-                             Device* device, Tensor* cpu_tensor,
-                             StatusCallback done) override;
+  void CopyDeviceTensorToCPU(const Tensor* device_tensor,
+                             absl::string_view edge_name, Device* device,
+                             Tensor* cpu_tensor, StatusCallback done) override;
 
   void MaintainLifetimeOnStream(const Tensor* t,
                                 se::Stream* stream) const override {}
diff --git a/tensorflow/core/common_runtime/lower_if_while_test.cc b/tensorflow/core/common_runtime/lower_if_while_test.cc
index 07bcecf168..f9f6805660 100644
--- a/tensorflow/core/common_runtime/lower_if_while_test.cc
+++ b/tensorflow/core/common_runtime/lower_if_while_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/lower_if_while.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/ops/array_ops.h"
@@ -38,7 +39,7 @@ namespace {
 
 typedef FunctionDefHelper FDH;
 
-static void AssertHasSubstr(StringPiece s, StringPiece expected) {
+static void AssertHasSubstr(absl::string_view s, absl::string_view expected) {
   ASSERT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index 5e1ed13080..d0583284a2 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -37,8 +37,9 @@ namespace {
 
 // We hoist the conversion from C-style string literal to StringPiece here,
 // so that we can avoid the many repeated calls to strlen().
-const StringPiece kColocationAttrNameStringPiece(kColocationAttrName);
-const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
+const absl::string_view kColocationAttrNameStringPiece(kColocationAttrName);
+const absl::string_view kColocationGroupPrefixStringPiece(
+    kColocationGroupPrefix);
 
 // Returns a list of devices having type in supported_device_types.  The
 // returned list is sorted by preferred type (higher numeric type is preferred).
@@ -68,7 +69,7 @@ std::vector<Device*> FilterSupportedDevices(
     if (a_priority != b_priority) {
       return a_priority > b_priority;
     }
-    return StringPiece(a->name()) < StringPiece(b->name());
+    return absl::string_view(a->name()) < absl::string_view(b->name());
   };
   std::vector<Device*>::iterator sort_start;
   if (filtered_default_device != nullptr) {
@@ -144,7 +145,7 @@ class ColocationGraph {
     // 'string' values stored in NodeDef attribute lists, as well as StringPiece
     // values that refer to 'string' values from NodeDef::name(), without
     // performing any string allocations.
-    std::unordered_map<StringPiece, const Node*, StringPieceHasher>
+    std::unordered_map<absl::string_view, const Node*, StringPieceHasher>
         colocation_group_root;
 
     for (Node* node : graph_->op_nodes()) {
@@ -161,7 +162,7 @@ class ColocationGraph {
           node->attrs().Find(kColocationAttrNameStringPiece);
       if (attr_value != nullptr && attr_value->has_list()) {
         for (const string& class_spec : attr_value->list().s()) {
-          StringPiece spec(class_spec);
+          absl::string_view spec(class_spec);
           if (str_util::ConsumePrefix(&spec,
                                       kColocationGroupPrefixStringPiece)) {
             found_spec = true;
@@ -183,9 +184,9 @@ class ColocationGraph {
   }
 
   Status ColocateNodeToGroup(
-      std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
+      std::unordered_map<absl::string_view, const Node*, StringPieceHasher>*
           colocation_group_root,
-      Node* node, StringPiece colocation_group) {
+      Node* node, absl::string_view colocation_group) {
     const Node*& root_node = (*colocation_group_root)[colocation_group];
     if (root_node == nullptr) {
       // This is the first node of the colocation group, so
diff --git a/tensorflow/core/common_runtime/profile_handler.h b/tensorflow/core/common_runtime/profile_handler.h
index 391dc8c198..cfdf8caf5c 100644
--- a/tensorflow/core/common_runtime/profile_handler.h
+++ b/tensorflow/core/common_runtime/profile_handler.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROFILE_HANDLER_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_PROFILE_HANDLER_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/graph/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
@@ -41,8 +41,9 @@ class ProfileHandler {
   // - op_type: String name of the Op.
   // - details: Main content for timeline click text.
   virtual void RecordOneOp(const string& device, const NodeExecStats& stats,
-                           bool is_copy, StringPiece label, StringPiece op_type,
-                           StringPiece details) = 0;
+                           bool is_copy, absl::string_view label,
+                           absl::string_view op_type,
+                           absl::string_view details) = 0;
 
   // Records that the current step finished.
   //
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index 9488a44778..6852e12609 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/eval_const_tensor.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -68,7 +69,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
   TF_RETURN_IF_ERROR(refiner->AddNode(node));
   InferenceContext* node_context = CHECK_NOTNULL(refiner->GetContext(node));
 
-  if (StringPiece(node->type_string()) == kArgOp) {
+  if (absl::string_view(node->type_string()) == kArgOp) {
     // Handle special node: function input.
     // Shapes for these nodes are provided in the outer inference
     // context.
@@ -88,7 +89,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
     if (resource) {
       node_context->set_output_handle_shapes_and_types(0, *resource);
     }
-  } else if (StringPiece(node->type_string()) == kRetvalOp) {
+  } else if (absl::string_view(node->type_string()) == kRetvalOp) {
     // Handle special node: function output.
     // Shapes inferred for these nodes go into the outer inference
     // context.
diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index a70ab93d4a..d82cb3d495 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -22,7 +23,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tracking_allocator.h"
 #include "tensorflow/core/graph/costmodel.h"
 #include "tensorflow/core/graph/graph.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
@@ -212,7 +212,7 @@ static int ExtractGpuWithStreamAll(string device_name) {
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
   // Check that the digits are preceded by the 'device:GPU:' string
   scanner.OneLiteral(":UPG:ecived");
-  StringPiece capture;
+  absl::string_view capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
   if (!matched) {
@@ -241,7 +241,7 @@ static int ExtractGpuWithoutStream(string device_name) {
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
   // Check that the digits are preceded by the 'device:GPU:' string
   scanner.OneLiteral(":UPG:ecived");
-  StringPiece capture;
+  absl::string_view capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
   if (!matched) {
@@ -276,7 +276,7 @@ void StepStatsCollector::BuildCostModel(
     const DeviceStepStats* hardware_stats;
   };
 
-  std::unordered_map<StringPiece, DeviceStats, StringPieceHasher>
+  std::unordered_map<absl::string_view, DeviceStats, StringPieceHasher>
       per_device_stats;
   std::unordered_map<int, const DeviceStepStats*> gpu_hardware_stats;
 
@@ -295,7 +295,7 @@ void StepStatsCollector::BuildCostModel(
   }
 
   for (auto& itr : per_device_stats) {
-    const StringPiece device_name = itr.first;
+    const absl::string_view device_name = itr.first;
     const int gpu_id = ExtractGpuWithoutStream(string(device_name));
     if (gpu_id >= 0) {
       // Reference the gpu hardware stats in addition to the regular stats
@@ -307,7 +307,7 @@ void StepStatsCollector::BuildCostModel(
   }
 
   for (auto itr : device_map) {
-    const StringPiece device = itr.first;
+    const absl::string_view device = itr.first;
     if (per_device_stats.find(device) == per_device_stats.end()) {
       continue;
     }
@@ -316,7 +316,8 @@ void StepStatsCollector::BuildCostModel(
     CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph);
     cm->IncrementUpdateTimes();
 
-    std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node;
+    std::unordered_map<absl::string_view, Node*, StringPieceHasher>
+        name_to_node;
     for (Node* n : graph->nodes()) {
       name_to_node.emplace(n->name(), n);
     }
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 591c22b8f6..60f57e3fd3 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -109,6 +109,7 @@ tf_cuda_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -132,6 +133,7 @@ tf_cuda_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc
index 5fc95a8f20..f35035ce92 100644
--- a/tensorflow/core/debug/debug_graph_utils.cc
+++ b/tensorflow/core/debug/debug_graph_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/debug/debug_graph_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
 #include "tensorflow/core/framework/kernel_def.pb.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -344,7 +345,7 @@ Status DebugNodeInserter::ParseDebugOpName(
 
     std::vector<string> attribute_segs = str_util::Split(arguments, ";");
     for (const string& attribute_seg : attribute_segs) {
-      StringPiece seg(attribute_seg);
+      absl::string_view seg(attribute_seg);
       str_util::RemoveWhitespaceContext(&seg);
       if (seg.empty()) {
         continue;
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 6994dec3b5..4b5a545f5c 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <limits>
 #include <utility>
 #include <vector>
+#include "absl/strings/string_view.h"
 
 #ifndef PLATFORM_WINDOWS
 #include "grpcpp/create_channel.h"
@@ -308,7 +309,7 @@ Status ReadEventFromFile(const string& dump_file_path, Event* event) {
     return s;
   }
 
-  StringPiece result;
+  absl::string_view result;
   s = file->Read(0, file_size, &result, &(content)[0]);
   if (!s.ok()) {
     return s;
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index 37029f3f1a..d2c63d0a3b 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -172,6 +172,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:worker_proto_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -288,6 +289,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:worker_proto_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -344,6 +346,7 @@ cc_library(
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/debug:debug_graph_utils",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -397,6 +400,7 @@ cc_library(
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
index de6e4b4a7c..97e1639a42 100644
--- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/copy_tensor.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -144,8 +145,8 @@ BaseRemoteRendezvous::~BaseRemoteRendezvous() {
 // Returns true if "device_name" is a valid full name of local device
 // of the "worker".  This helper is purely based on the worker name
 // and device name and does no lookups in the worker->device_mgr.
-static bool IsLocalDevice(const StringPiece worker_name,
-                          const StringPiece device_name) {
+static bool IsLocalDevice(const absl::string_view worker_name,
+                          const absl::string_view device_name) {
   return str_util::StartsWith(device_name, worker_name);
 }
 
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 8e9eec1ed9..6c9bda15d1 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/profile_handler.h"
 #include "tensorflow/core/common_runtime/stats_publisher_interface.h"
@@ -235,7 +236,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   const bool is_partial_;
   const CallableOptions callable_opts_;
   WorkerCacheInterface* const worker_cache_;  // Not owned.
-  std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node_;
+  std::unordered_map<absl::string_view, Node*, StringPieceHasher> name_to_node_;
   const bool should_deregister_;
   std::atomic<int64> execution_count_ = {0};
 
@@ -296,12 +297,14 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   // This is a generic method that handles Run, PartialRun, and RunCallable.
   template <class FetchListType, class ClientRequestType,
             class ClientResponseType>
-  Status RunPartitionsHelper(
-      const std::unordered_map<StringPiece, size_t, StringPieceHasher>& feeds,
-      const FetchListType& fetches, const MasterEnv* env, int64 step_id,
-      int64 execution_count, PerStepState* pss, CallOptions* call_opts,
-      const ClientRequestType& req, ClientResponseType* resp,
-      CancellationManager* cm, bool is_last_partial_run);
+  Status RunPartitionsHelper(const std::unordered_map<absl::string_view, size_t,
+                                                      StringPieceHasher>& feeds,
+                             const FetchListType& fetches, const MasterEnv* env,
+                             int64 step_id, int64 execution_count,
+                             PerStepState* pss, CallOptions* call_opts,
+                             const ClientRequestType& req,
+                             ClientResponseType* resp, CancellationManager* cm,
+                             bool is_last_partial_run);
 
   // Deregisters the partitions on the workers.  Called in the
   // destructor and does not wait for the rpc completion.
@@ -729,7 +732,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
   VLOG(2) << "RunPartitions step_id " << step_id << " execution_count "
           << execution_count;
   // Maps the names of fed tensors to their index in `req`.
-  std::unordered_map<StringPiece, size_t, StringPieceHasher> feeds(3);
+  std::unordered_map<absl::string_view, size_t, StringPieceHasher> feeds(3);
   for (size_t i = 0; i < req.num_feeds(); ++i) {
     if (!feeds.insert({req.feed_name(i), i}).second) {
       return errors::InvalidArgument("Duplicated feeds: ", req.feed_name(i));
@@ -753,7 +756,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
   VLOG(2) << "RunPartitions step_id " << step_id << " execution_count "
           << execution_count;
   // Maps the names of fed tensors to their index in `req`.
-  std::unordered_map<StringPiece, size_t, StringPieceHasher> feeds(3);
+  std::unordered_map<absl::string_view, size_t, StringPieceHasher> feeds(3);
   for (size_t i = 0; i < callable_opts_.feed_size(); ++i) {
     if (!feeds.insert({callable_opts_.feed(i), i}).second) {
       // MakeCallable will fail if there are two feeds with the same name.
diff --git a/tensorflow/core/distributed_runtime/remote_device.cc b/tensorflow/core/distributed_runtime/remote_device.cc
index a043c5dee6..26c145a5ba 100644
--- a/tensorflow/core/distributed_runtime/remote_device.cc
+++ b/tensorflow/core/distributed_runtime/remote_device.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/distributed_runtime/worker_cache.h"
@@ -33,9 +34,9 @@ namespace tensorflow {
 // parsing into one place.
 //
 // Parses and returns the local device part (e.g., cpu:0, gpu:4).
-string GetLocalDeviceName(StringPiece fullname) {
+string GetLocalDeviceName(absl::string_view fullname) {
   auto pos = fullname.rfind('/');
-  CHECK_NE(pos, StringPiece::npos);
+  CHECK_NE(pos, absl::string_view::npos);
   fullname.remove_prefix(pos + 1);
   return string(fullname);
 }
diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD
index 4a10d99a60..60e34c48f7 100644
--- a/tensorflow/core/distributed_runtime/rpc/BUILD
+++ b/tensorflow/core/distributed_runtime/rpc/BUILD
@@ -119,6 +119,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:worker_proto_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -208,6 +209,7 @@ cc_library(
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core/distributed_runtime:call_options",
         "//tensorflow/core/distributed_runtime:master_interface",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -226,6 +228,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core/distributed_runtime:master",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -254,6 +257,7 @@ cc_library(
         "//tensorflow/core/distributed_runtime:worker_cache",
         "//tensorflow/core/distributed_runtime:worker_env",
         "//tensorflow/core/distributed_runtime:worker_interface",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -316,6 +320,7 @@ tf_cc_binary(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/distributed_runtime:server_lib",
         "//tensorflow/core/kernels:data_flow",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -341,6 +346,7 @@ tf_cc_binary(
         "//tensorflow/core/kernels:matmul_op",
         "//tensorflow/core/kernels:reduction_ops",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -479,6 +485,7 @@ tf_cuda_cc_test(
         "//tensorflow/core/kernels:dense_update_ops",
         "//tensorflow/core/kernels:matmul_op",
         "//tensorflow/core/kernels:variable_ops",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
index 127dea2882..23cbba58c5 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "grpcpp/alarm.h"
 #include "grpcpp/server_builder.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/master.h"
 #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_call.h"
@@ -286,12 +287,12 @@ class GrpcMasterService : public AsyncServiceInterface {
 
   // Start tracing, including the ID attached to the RPC.
   tracing::ScopedActivity* TraceRpc(
-      StringPiece name,
+      absl::string_view name,
       const std::multimap<::grpc::string_ref, ::grpc::string_ref>& metadata) {
-    StringPiece id;
+    absl::string_view id;
     auto it = metadata.find(GrpcIdKey());
     if (it != metadata.end()) {
-      id = StringPiece(it->second.data(), it->second.size());
+      id = absl::string_view(it->second.data(), it->second.size());
     }
     return new tracing::ScopedActivity(name, id);
   }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
index b832a2115c..bc6a97af08 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/call_options.h"
 #include "tensorflow/core/distributed_runtime/master_interface.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h"
@@ -119,7 +120,7 @@ class GrpcRemoteMaster : public MasterInterface {
 
  private:
   // Start tracing, attaching a unique ID to both the trace and the RPC.
-  tracing::ScopedActivity TraceRpc(StringPiece name,
+  tracing::ScopedActivity TraceRpc(absl::string_view name,
                                    ::grpc::ClientContext* ctx) {
     string trace_id = strings::StrCat(tracing::GetUniqueArg());
     ctx->AddMetadata(GrpcIdKey(), trace_id);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
index fc601991a2..358db1e240 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_testlib.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -667,7 +668,7 @@ TEST(GrpcSessionTest, LongErrorMessage) {
     auto a = test::graph::Constant(&g, Tensor());
     a->set_assigned_device_name(dev_a);
     std::vector<char> long_string_buffer(1024 * 1024, 'x');
-    StringPiece long_string(long_string_buffer.data(), 1024 * 1024);
+    absl::string_view long_string(long_string_buffer.data(), 1024 * 1024);
     string name = strings::StrCat(long_string, "fantasia!");
     auto a_err = test::graph::Error(&g, a, name);
     a_err->set_assigned_device_name(dev_a);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
index 159435fd7d..951f2d026c 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h"
 #include "grpcpp/support/byte_buffer.h"
 #include "grpcpp/support/slice.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -158,7 +159,7 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val,
     io::ProtoEncodeHelper e_skeleton(skeleton.data(), skeleton.size());
     EncodeSkeleton(val, &e_skeleton);
 
-    StringPiece tdata = val.tensor_data();
+    absl::string_view tdata = val.tensor_data();
     uint32 overall_tensor_proto_bytesize =
         (e_skeleton.size() +
          VarLengthEncodingSize(TensorProto::kTensorContentFieldNumber,
@@ -197,7 +198,7 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val,
     e.WriteVarlengthBeginning(RecvTensorResponse::kTensorFieldNumber,
                               overall_tensor_proto_bytesize);
     // (C)
-    e.WriteRawBytes(StringPiece(e_skeleton.data(), e_skeleton.size()));
+    e.WriteRawBytes(absl::string_view(e_skeleton.data(), e_skeleton.size()));
     // (D1) & (D2)
     e.WriteVarlengthBeginning(TensorProto::kTensorContentFieldNumber,
                               tdata.size());
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
index e52b257411..a011cad596 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "grpcpp/security/credentials.h"
 #include "grpcpp/server_builder.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -56,7 +57,7 @@ Status FillServerDef(const string& cluster_spec, const string& job_name,
     const string& job_name = job_pieces[0];
     job_def->set_name(job_name);
     // Does a bit more validation of the tasks_per_replica.
-    const StringPiece spec = job_pieces[1];
+    const absl::string_view spec = job_pieces[1];
     // job_str is of form <job_name>|<host_ports>.
     const std::vector<string> host_ports = str_util::Split(spec, ';');
     for (size_t i = 0; i < host_ports.size(); ++i) {
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
index 33cbadda0a..1268863562 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "grpcpp/security/credentials.h"
 #include "grpcpp/server_builder.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -50,7 +51,7 @@ Status FillServerDef(const string& job_spec, const string& job_name,
     CHECK_EQ(2, job_pieces.size()) << job_str;
     job_def->set_name(job_pieces[0]);
     // Does a bit more validation of the tasks_per_replica.
-    const StringPiece spec = job_pieces[1];
+    const absl::string_view spec = job_pieces[1];
     // job_str is of form <job_name>|<host_ports>.
     const std::vector<string> host_ports = str_util::Split(spec, ';');
     uint32 tasks_per_replica = host_ports.size();
diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
index b8cb538503..0e7484ed03 100644
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -58,7 +59,7 @@ class RpcRecvTensorCall : public BaseRecvTensorCall {
  public:
   RpcRecvTensorCall() : wi_(nullptr), dst_device_(nullptr) {}
 
-  void Init(WorkerInterface* wi, int64 step_id, StringPiece key,
+  void Init(WorkerInterface* wi, int64 step_id, absl::string_view key,
             AllocatorAttributes alloc_attrs, Device* dst_device,
             const Rendezvous::Args& recv_args, Rendezvous::DoneCallback done) {
     wi_ = wi;
diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc
index fe2d1a1293..a61dca6f95 100644
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "google/protobuf/any.pb.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -196,7 +197,7 @@ bool TensorResponse::ParseTensorSubmessage(
         seen_tensor_content = true;
         TensorShape shape(tensor_meta->tensor_shape());
         Tensor t(allocator_, tensor_meta->dtype(), shape);
-        StringPiece buf = t.tensor_data();
+        absl::string_view buf = t.tensor_data();
         if (static_cast<size_t>(num_bytes) != buf.size()) return false;
         // TODO(jeff,sanjay): Figure out a way to avoid this copy if
         // the underlying ZeroCopyInputStream data is properly aligned
diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h
index 016d1a92c1..49cd28a6ed 100644
--- a/tensorflow/core/example/feature_util.h
+++ b/tensorflow/core/example/feature_util.h
@@ -104,9 +104,9 @@ limitations under the License.
 #include <type_traits>
 
 #include "absl/base/macros.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -170,7 +170,7 @@ template <>
 struct is_string<string> : std::true_type {};
 
 template <>
-struct is_string<::tensorflow::StringPiece> : std::true_type {};
+struct is_string<::absl::string_view> : std::true_type {};
 
 template <typename ValueType>
 struct FeatureTrait<
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index 79966f0692..aa6e6f38a7 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -18,13 +18,13 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb_text.h"
 #include "tensorflow/core/framework/tensor.pb_text.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb_text.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -187,8 +187,8 @@ string SummarizeString(const string& str) {
   // If the string is long, replace the middle with ellipses.
   constexpr int kMaxStringSummarySize = 80;
   if (escaped.size() >= kMaxStringSummarySize) {
-    StringPiece prefix(escaped);
-    StringPiece suffix = prefix;
+    absl::string_view prefix(escaped);
+    absl::string_view suffix = prefix;
     prefix.remove_suffix(escaped.size() - 10);
     suffix.remove_prefix(escaped.size() - 10);
     return strings::StrCat("\"", prefix, "...", suffix, "\"");
@@ -288,7 +288,7 @@ string SummarizeAttrValue(const AttrValue& attr_value) {
   return "<Unknown AttrValue type>";  // Prevent missing return warning
 }
 
-Status AttrValueHasType(const AttrValue& attr_value, StringPiece type) {
+Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type) {
   int num_set = 0;
 
 #define VALIDATE_FIELD(name, type_string, oneof_case)                         \
@@ -386,7 +386,8 @@ Status AttrValueHasType(const AttrValue& attr_value, StringPiece type) {
   return Status::OK();
 }
 
-bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
+bool ParseAttrValue(absl::string_view type, absl::string_view text,
+                    AttrValue* out) {
   // Parse type.
   string field_name;
   bool is_list = str_util::ConsumePrefix(&type, "list(");
@@ -420,7 +421,7 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
   if (is_list) {
     // TextFormat parser considers "i: 7" to be the same as "i: [7]",
     // but we only want to allow list values with [].
-    StringPiece cleaned = text;
+    absl::string_view cleaned = text;
     str_util::RemoveLeadingWhitespace(&cleaned);
     str_util::RemoveTrailingWhitespace(&cleaned);
     if (cleaned.size() < 2 || cleaned[0] != '[' ||
@@ -473,11 +474,12 @@ DEFINE_SET_ATTR_VALUE_LIST(const std::vector<bool>&, b)
 DEFINE_SET_ATTR_VALUE_LIST(std::initializer_list<bool>, b)
 DEFINE_SET_ATTR_VALUE_BOTH(DataType, type)
 
-void SetAttrValue(StringPiece value, AttrValue* out) {
+void SetAttrValue(absl::string_view value, AttrValue* out) {
   out->set_s(value.data(), value.size());
 }
 
-void SetAttrValue(const gtl::ArraySlice<StringPiece> value, AttrValue* out) {
+void SetAttrValue(const gtl::ArraySlice<absl::string_view> value,
+                  AttrValue* out) {
   out->mutable_list()->Clear();  // Create list() even if value empty.
   for (const auto& v : value) {
     out->mutable_list()->add_s(v.data(), v.size());
diff --git a/tensorflow/core/framework/attr_value_util.h b/tensorflow/core/framework/attr_value_util.h
index 9fce488793..c9f2b334c8 100644
--- a/tensorflow/core/framework/attr_value_util.h
+++ b/tensorflow/core/framework/attr_value_util.h
@@ -20,12 +20,12 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -39,7 +39,7 @@ class NameAttrList;
 string SummarizeAttrValue(const AttrValue& attr_value);
 
 // Generates an error if attr_value doesn't have the indicated attr type.
-Status AttrValueHasType(const AttrValue& attr_value, StringPiece type);
+Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type);
 
 // Converts a text proto value from "text" into the field of *out
 // indicated by "type" (e.g. from the type field of an AttrDef).
@@ -48,12 +48,13 @@ Status AttrValueHasType(const AttrValue& attr_value, StringPiece type);
 // * If type:"list(string)" and text:"['foo', 'bar']",
 //   then *out is set to "list { s: ['foo', 'bar'] }"
 // Returns true on success.
-bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out);
+bool ParseAttrValue(absl::string_view type, absl::string_view text,
+                    AttrValue* out);
 
 // Sets *out based on the type of value.
 void SetAttrValue(const string& value, AttrValue* out);
 void SetAttrValue(const char* value, AttrValue* out);
-void SetAttrValue(StringPiece value, AttrValue* out);
+void SetAttrValue(absl::string_view value, AttrValue* out);
 void SetAttrValue(int64 value, AttrValue* out);
 void SetAttrValue(int32 value, AttrValue* out);
 void SetAttrValue(float value, AttrValue* out);
@@ -69,7 +70,7 @@ void SetAttrValue(const NameAttrList& value, AttrValue* out);
 
 void SetAttrValue(gtl::ArraySlice<string> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<const char*> value, AttrValue* out);
-void SetAttrValue(gtl::ArraySlice<StringPiece> value, AttrValue* out);
+void SetAttrValue(gtl::ArraySlice<absl::string_view> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<int64> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<int32> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<float> value, AttrValue* out);
diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 284dafb886..d3dcc0377a 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/dataset.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/node_builder.h"
@@ -77,7 +78,7 @@ Status GraphDefBuilderWrapper::AddDataset(
     const DatasetBase* dataset,
     const std::vector<std::pair<size_t, Node*>>& inputs,
     const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
-    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+    const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
     Node** output) {
   const string& name = dataset->name();
   std::unique_ptr<const GraphDefBuilder::Options> opts(
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 964a7d5f8c..4bb0f326cb 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <deque>
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/dataset_stateful_op_whitelist.h"
@@ -57,10 +58,10 @@ class SerializationContext;
 // Used for restoring iterator state.
 class IteratorStateReader {
  public:
-  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
-  virtual Status ReadScalar(StringPiece key, string* val) = 0;
-  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
-  virtual bool Contains(StringPiece key) = 0;
+  virtual Status ReadScalar(absl::string_view key, int64* val) = 0;
+  virtual Status ReadScalar(absl::string_view key, string* val) = 0;
+  virtual Status ReadTensor(absl::string_view key, Tensor* val) = 0;
+  virtual bool Contains(absl::string_view key) = 0;
 
   virtual ~IteratorStateReader() {}
 };
@@ -69,9 +70,9 @@ class IteratorStateReader {
 // Used for saving iterator state.
 class IteratorStateWriter {
  public:
-  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
-  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
-  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
+  virtual Status WriteScalar(absl::string_view key, const int64 val) = 0;
+  virtual Status WriteScalar(absl::string_view key, const string& val) = 0;
+  virtual Status WriteTensor(absl::string_view key, const Tensor& val) = 0;
 
   virtual ~IteratorStateWriter() {}
 };
@@ -155,10 +156,10 @@ class GraphDefBuilderWrapper {
   // `*output` contains a pointer to the output `Node`. It is guaranteed to be
   // non-null if the method returns with an OK status.
   // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  Status AddDataset(const DatasetBase* dataset,
-                    const std::vector<Node*>& inputs,
-                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
-                    Node** output) {
+  Status AddDataset(
+      const DatasetBase* dataset, const std::vector<Node*>& inputs,
+      const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
+      Node** output) {
     std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
     for (int i = 0; i < inputs.size(); i++) {
       enumerated_inputs[i] = std::make_pair(i, inputs[i]);
@@ -170,7 +171,7 @@ class GraphDefBuilderWrapper {
       const DatasetBase* dataset,
       const std::vector<std::pair<size_t, Node*>>& inputs,
       const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
-      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+      const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
       Node** output);
 
   // Adds a user-defined function with name `function_name` to the graph and
@@ -726,7 +727,8 @@ class DatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+                             const absl::string_view& argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
@@ -738,7 +740,7 @@ class DatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseVectorArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name,
+                             const absl::string_view& argument_name,
                              std::vector<T>* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 446c31b17f..f6c4632efa 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -21,11 +21,11 @@ limitations under the License.
 #include <vector>
 
 #include "absl/base/macros.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace Eigen {
@@ -86,8 +86,9 @@ class DeviceContext : public core::RefCounted {
   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
   // to be of the same size as "device_tensor".
   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                     StringPiece tensor_name, Device* device,
-                                     Tensor* cpu_tensor, StatusCallback done) {
+                                     absl::string_view tensor_name,
+                                     Device* device, Tensor* cpu_tensor,
+                                     StatusCallback done) {
     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
   }
 
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index 4ad6fd00da..df1c797a31 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/function.pb_text.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -504,9 +505,9 @@ string Print(const NodeDef& n) {
     strings::StrAppend(&out, "[", str_util::Join(entries, ", "), "]");
   }
   strings::StrAppend(&out, "(");
-  std::vector<StringPiece> dat;
+  std::vector<absl::string_view> dat;
   std::vector<string> dep;
-  for (StringPiece s : n.input()) {
+  for (absl::string_view s : n.input()) {
     if (str_util::ConsumePrefix(&s, "^")) {
       dep.emplace_back(s);
     } else {
@@ -647,7 +648,7 @@ Status InstantiateFunction(const FunctionDef& fdef, AttrSlice attr_values,
     }
   }
 
-  auto substitute = [attr_values](StringPiece name, AttrValue* val) {
+  auto substitute = [attr_values](absl::string_view name, AttrValue* val) {
     if (const AttrValue* v = attr_values.Find(name)) {
       *val = *v;
       return true;
@@ -1192,7 +1193,8 @@ Status FunctionLibraryDefinition::LookUp(
   return default_registry_->LookUp(op, op_reg_data);
 }
 
-string FunctionLibraryDefinition::UniqueFunctionName(StringPiece prefix) const {
+string FunctionLibraryDefinition::UniqueFunctionName(
+    absl::string_view prefix) const {
   tf_shared_lock l(mu_);
   int index = 0;
   string name = strings::StrCat(prefix, index);
@@ -1270,7 +1272,8 @@ GET_ATTR(string)
 GET_ATTR(bool)
 #undef GET_ATTR
 
-void FunctionDefHelper::AttrValueWrapper::InitFromString(StringPiece val) {
+void FunctionDefHelper::AttrValueWrapper::InitFromString(
+    absl::string_view val) {
   if (val.size() >= 2 && val[0] == '$') {
     proto.set_placeholder(val.data() + 1, val.size() - 1);
   } else {
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index fcc6203729..b2a4416774 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_FUNCTION_H_
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -92,7 +93,7 @@ class FunctionDefHelper {
     }
 
    private:
-    void InitFromString(StringPiece val);
+    void InitFromString(absl::string_view val);
   };
 
   // Constructs an AttrValue.func given the "name" and "attrs".
@@ -181,7 +182,8 @@ inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(
 }
 
 template <>
-inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(StringPiece val) {
+inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(
+    absl::string_view val) {
   InitFromString(val);
 }
 
@@ -374,7 +376,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
 
   // Generates new function name with the specified prefix that is unique
   // across this library.
-  string UniqueFunctionName(StringPiece prefix) const LOCKS_EXCLUDED(mu_);
+  string UniqueFunctionName(absl::string_view prefix) const LOCKS_EXCLUDED(mu_);
 
   // Ops created for function arguments bear the name given by `kArgOp`; those
   // created for return values bear the name given by `kRetOp`.
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index 0445c242e9..2ea4dee9ed 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function_testlib.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -45,7 +46,8 @@ GraphDef GDef(gtl::ArraySlice<NodeDef> nodes,
 }
 
 // Helper to construct a NodeDef.
-NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
+NodeDef NDef(absl::string_view name, absl::string_view op,
+             gtl::ArraySlice<string> inputs,
              gtl::ArraySlice<std::pair<string, FDH::AttrValueWrapper>> attrs,
              const string& device) {
   NodeDef n;
diff --git a/tensorflow/core/framework/function_testlib.h b/tensorflow/core/framework/function_testlib.h
index a01743423b..aacd555cb3 100644
--- a/tensorflow/core/framework/function_testlib.h
+++ b/tensorflow/core/framework/function_testlib.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -48,7 +49,8 @@ class Attrs {
 
 // Helper to construct a NodeDef.
 NodeDef NDef(
-    StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
+    absl::string_view name, absl::string_view op,
+    gtl::ArraySlice<string> inputs,
     gtl::ArraySlice<std::pair<string, FunctionDefHelper::AttrValueWrapper>>
         attrs = {},
     const string& device = "");
diff --git a/tensorflow/core/framework/node_def_builder.cc b/tensorflow/core/framework/node_def_builder.cc
index 348a825af9..898a7e716b 100644
--- a/tensorflow/core/framework/node_def_builder.cc
+++ b/tensorflow/core/framework/node_def_builder.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_builder.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -23,20 +24,21 @@ limitations under the License.
 
 namespace tensorflow {
 
-NodeDefBuilder::NodeOut::NodeOut(StringPiece n, int i, DataType dt)
+NodeDefBuilder::NodeOut::NodeOut(absl::string_view n, int i, DataType dt)
     : node(n), index(i), data_type(dt) {}
 
 NodeDefBuilder::NodeOut::NodeOut() {
   // uninitialized, call Reset() before use.
 }
 
-void NodeDefBuilder::NodeOut::Reset(StringPiece n, int i, DataType dt) {
+void NodeDefBuilder::NodeOut::Reset(absl::string_view n, int i, DataType dt) {
   node = string(n);
   index = i;
   data_type = dt;
 }
 
-NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
+NodeDefBuilder::NodeDefBuilder(absl::string_view name,
+                               absl::string_view op_name,
                                const OpRegistryInterface* op_registry) {
   node_def_.set_name(string(name));
   const Status status = op_registry->LookUpOpDef(string(op_name), &op_def_);
@@ -48,7 +50,7 @@ NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
   }
 }
 
-NodeDefBuilder::NodeDefBuilder(StringPiece name, const OpDef* op_def)
+NodeDefBuilder::NodeDefBuilder(absl::string_view name, const OpDef* op_def)
     : op_def_(op_def) {
   node_def_.set_name(string(name));
   Initialize();
@@ -84,7 +86,7 @@ NodeDefBuilder& NodeDefBuilder::Input(FakeInputFunctor fake_input) {
   return *this;
 }
 
-NodeDefBuilder& NodeDefBuilder::Input(StringPiece src_node, int src_index,
+NodeDefBuilder& NodeDefBuilder::Input(absl::string_view src_node, int src_index,
                                       DataType dt) {
   const OpDef::ArgDef* arg = NextArgDef();
   if (arg != nullptr) SingleInput(arg, src_node, src_index, dt);
@@ -104,7 +106,7 @@ NodeDefBuilder& NodeDefBuilder::Input(gtl::ArraySlice<NodeOut> src_list) {
 }
 
 void NodeDefBuilder::SingleInput(const OpDef::ArgDef* input_arg,
-                                 StringPiece src_node, int src_index,
+                                 absl::string_view src_node, int src_index,
                                  DataType dt) {
   AddInput(src_node, src_index);
 
@@ -161,7 +163,7 @@ void NodeDefBuilder::ListInput(const OpDef::ArgDef* input_arg,
   }
 }
 
-void NodeDefBuilder::AddInput(StringPiece src_node, int src_index) {
+void NodeDefBuilder::AddInput(absl::string_view src_node, int src_index) {
   if (src_node.empty()) {
     errors_.push_back("Empty input node name");
   } else if (src_node[0] == '^') {
@@ -192,12 +194,12 @@ void NodeDefBuilder::VerifyInputRef(const OpDef::ArgDef* input_arg,
   }
 }
 
-NodeDefBuilder& NodeDefBuilder::ControlInput(StringPiece src_node) {
+NodeDefBuilder& NodeDefBuilder::ControlInput(absl::string_view src_node) {
   control_inputs_.emplace_back(src_node);
   return *this;
 }
 
-NodeDefBuilder& NodeDefBuilder::Device(StringPiece device_spec) {
+NodeDefBuilder& NodeDefBuilder::Device(absl::string_view device_spec) {
   node_def_.set_device(string(device_spec));
   return *this;
 }
@@ -248,7 +250,8 @@ Status NodeDefBuilder::Finalize(NodeDef* node_def) const {
   }
 }
 
-NodeDefBuilder& NodeDefBuilder::Attr(StringPiece name, const AttrValue& value) {
+NodeDefBuilder& NodeDefBuilder::Attr(absl::string_view name,
+                                     const AttrValue& value) {
   if (const AttrValue* found = AttrSlice(node_def_).Find(name)) {
     if (!AreAttrValuesEqual(*found, value)) {
       errors_.push_back(strings::StrCat("Inconsistent values for attr '", name,
@@ -261,13 +264,13 @@ NodeDefBuilder& NodeDefBuilder::Attr(StringPiece name, const AttrValue& value) {
   return *this;
 }
 
-#define ATTR(T)                                                     \
-  NodeDefBuilder& NodeDefBuilder::Attr(StringPiece name, T value) { \
-    AttrValue attr_value;                                           \
-    SetAttrValue(value, &attr_value);                               \
-    return Attr(name, attr_value);                                  \
+#define ATTR(T)                                                           \
+  NodeDefBuilder& NodeDefBuilder::Attr(absl::string_view name, T value) { \
+    AttrValue attr_value;                                                 \
+    SetAttrValue(value, &attr_value);                                     \
+    return Attr(name, attr_value);                                        \
   }
-ATTR(StringPiece)
+ATTR(absl::string_view)
 ATTR(const char*)
 ATTR(int32)
 ATTR(int64)
@@ -279,7 +282,7 @@ ATTR(const PartialTensorShape&)
 ATTR(const Tensor&)
 ATTR(const TensorProto&)
 ATTR(const NameAttrList&)
-ATTR(gtl::ArraySlice<StringPiece>)
+ATTR(gtl::ArraySlice<absl::string_view>)
 ATTR(gtl::ArraySlice<const char*>)
 ATTR(gtl::ArraySlice<string>)
 ATTR(gtl::ArraySlice<int32>)
diff --git a/tensorflow/core/framework/node_def_builder.h b/tensorflow/core/framework/node_def_builder.h
index ad07ec5480..f8af6c0710 100644
--- a/tensorflow/core/framework/node_def_builder.h
+++ b/tensorflow/core/framework/node_def_builder.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <functional>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -50,9 +51,9 @@ class NodeDefBuilder {
  public:
   // To specify an output to be consumed by one of the Input() methods below.
   struct NodeOut {
-    NodeOut(StringPiece n, int i, DataType dt);
+    NodeOut(absl::string_view n, int i, DataType dt);
     NodeOut();  // uninitialized, call Reset() before use.
-    void Reset(StringPiece n, int i, DataType dt);
+    void Reset(absl::string_view n, int i, DataType dt);
     string node;
     int index;
     DataType data_type;
@@ -62,16 +63,16 @@ class NodeDefBuilder {
   // the Op plus a registry) for the NodeDef.  Other fields are
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
-  NodeDefBuilder(StringPiece name, StringPiece op_name,
+  NodeDefBuilder(absl::string_view name, absl::string_view op_name,
                  const OpRegistryInterface* op_registry = OpRegistry::Global());
   // REQUIRES: in addition, *op_def must outlive *this.
-  NodeDefBuilder(StringPiece name, const OpDef* op_def);
+  NodeDefBuilder(absl::string_view name, const OpDef* op_def);
 
   // You must call one Input() function per input_arg in the Op,
   // *and in the same order as the input_args appear in the OpDef.*
 
   // For inputs that take a single tensor.
-  NodeDefBuilder& Input(StringPiece src_node, int src_index, DataType dt);
+  NodeDefBuilder& Input(absl::string_view src_node, int src_index, DataType dt);
   NodeDefBuilder& Input(const NodeOut& src);
 
   // For inputs that take a list of tensors.
@@ -81,45 +82,49 @@ class NodeDefBuilder {
   NodeDefBuilder& Input(FakeInputFunctor fake_input);
 
   // Specify that this node must only run after src_node.
-  NodeDefBuilder& ControlInput(StringPiece src_node);
+  NodeDefBuilder& ControlInput(absl::string_view src_node);
 
   // Constrains what devices this node may be scheduled on.
-  NodeDefBuilder& Device(StringPiece device_spec);
+  NodeDefBuilder& Device(absl::string_view device_spec);
 
   // Sets the attr, if not already set.  If already set with a different
   // value, an error will be returned from Finalize().
-  NodeDefBuilder& Attr(StringPiece name, const AttrValue& value);
-  NodeDefBuilder& Attr(StringPiece name, StringPiece value);
-  NodeDefBuilder& Attr(StringPiece name, const char* value);
-  NodeDefBuilder& Attr(StringPiece name, int32 value);
-  NodeDefBuilder& Attr(StringPiece name, int64 value);
-  NodeDefBuilder& Attr(StringPiece name, float value);
-  NodeDefBuilder& Attr(StringPiece name, double value);
-  NodeDefBuilder& Attr(StringPiece name, bool value);
-  NodeDefBuilder& Attr(StringPiece name, DataType value);
-  NodeDefBuilder& Attr(StringPiece name, const PartialTensorShape& value);
-  NodeDefBuilder& Attr(StringPiece name, const Tensor& value);
-  NodeDefBuilder& Attr(StringPiece name, const TensorProto& value);
-  NodeDefBuilder& Attr(StringPiece name, const NameAttrList& value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<StringPiece> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<const char*> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<string> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int32> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int64> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<float> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<bool> value);
-  NodeDefBuilder& Attr(StringPiece name, const std::vector<bool>& value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<DataType> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<TensorShape> value);
-  NodeDefBuilder& Attr(StringPiece name,
+  NodeDefBuilder& Attr(absl::string_view name, const AttrValue& value);
+  NodeDefBuilder& Attr(absl::string_view name, absl::string_view value);
+  NodeDefBuilder& Attr(absl::string_view name, const char* value);
+  NodeDefBuilder& Attr(absl::string_view name, int32 value);
+  NodeDefBuilder& Attr(absl::string_view name, int64 value);
+  NodeDefBuilder& Attr(absl::string_view name, float value);
+  NodeDefBuilder& Attr(absl::string_view name, double value);
+  NodeDefBuilder& Attr(absl::string_view name, bool value);
+  NodeDefBuilder& Attr(absl::string_view name, DataType value);
+  NodeDefBuilder& Attr(absl::string_view name, const PartialTensorShape& value);
+  NodeDefBuilder& Attr(absl::string_view name, const Tensor& value);
+  NodeDefBuilder& Attr(absl::string_view name, const TensorProto& value);
+  NodeDefBuilder& Attr(absl::string_view name, const NameAttrList& value);
+  NodeDefBuilder& Attr(absl::string_view name,
+                       gtl::ArraySlice<absl::string_view> value);
+  NodeDefBuilder& Attr(absl::string_view name,
+                       gtl::ArraySlice<const char*> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<string> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<int32> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<int64> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<float> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<bool> value);
+  NodeDefBuilder& Attr(absl::string_view name, const std::vector<bool>& value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<DataType> value);
+  NodeDefBuilder& Attr(absl::string_view name,
+                       gtl::ArraySlice<TensorShape> value);
+  NodeDefBuilder& Attr(absl::string_view name,
                        gtl::ArraySlice<PartialTensorShape> value);
-  NodeDefBuilder& Attr(StringPiece name,
+  NodeDefBuilder& Attr(absl::string_view name,
                        gtl::ArraySlice<TensorShapeProto> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<Tensor> value);
-  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<NameAttrList> value);
+  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<Tensor> value);
+  NodeDefBuilder& Attr(absl::string_view name,
+                       gtl::ArraySlice<NameAttrList> value);
 
   template <class T>
-  NodeDefBuilder& Attr(StringPiece name, std::initializer_list<T> value) {
+  NodeDefBuilder& Attr(absl::string_view name, std::initializer_list<T> value) {
     return Attr(name, gtl::ArraySlice<T>(value));
   }
 
@@ -146,13 +151,13 @@ class NodeDefBuilder {
   bool NextArgAvailable();
 
   // These do the main work of the Input() methods.
-  void SingleInput(const OpDef::ArgDef* input_arg, StringPiece src_node,
+  void SingleInput(const OpDef::ArgDef* input_arg, absl::string_view src_node,
                    int src_index, DataType dt);
   void ListInput(const OpDef::ArgDef* input_arg,
                  gtl::ArraySlice<NodeOut> src_list);
 
   // Add "src_node:src_index" to the list of inputs in the node_def_.
-  void AddInput(StringPiece src_node, int src_index);
+  void AddInput(absl::string_view src_node, int src_index);
 
   // Generate an error if you can't pass dt when expected is expected.
   void VerifyInputType(const OpDef::ArgDef* input_arg, DataType expected,
diff --git a/tensorflow/core/framework/node_def_builder_test.cc b/tensorflow/core/framework/node_def_builder_test.cc
index cc583df348..45168e9580 100644
--- a/tensorflow/core/framework/node_def_builder_test.cc
+++ b/tensorflow/core/framework/node_def_builder_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <memory>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -50,7 +51,8 @@ class NodeDefBuilderTest : public ::testing::Test {
   // expectations.
   void ExpectSuccess(const NodeDefBuilder& builder,
                      DataTypeSlice expected_in_types,
-                     DataTypeSlice expected_out_types, StringPiece proto) {
+                     DataTypeSlice expected_out_types,
+                     absl::string_view proto) {
     NodeDef node_def;
     Status status = builder.Finalize(&node_def);
     TF_EXPECT_OK(status);
diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index 43ac1d0ada..497da45d3d 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -50,7 +51,7 @@ AttrSlice::AttrSlice(const NodeDef& node_def)
 
 AttrSlice::AttrSlice(const AttrValueMap* a) : ndef_(nullptr), attrs_(a) {}
 
-static string SummarizeAttrsHelper(AttrSlice attrs, StringPiece device) {
+static string SummarizeAttrsHelper(AttrSlice attrs, absl::string_view device) {
   string ret;
 
   // We sort the attrs so the output is deterministic.
@@ -78,9 +79,10 @@ static string SummarizeAttrsHelper(AttrSlice attrs, StringPiece device) {
 }
 
 string AttrSlice::SummarizeNode() const {
-  return ndef_ ? SummarizeNodeDef(*ndef_)
-               : strings::StrCat(
-                     "[", SummarizeAttrsHelper(*this, StringPiece()), "]");
+  return ndef_
+             ? SummarizeNodeDef(*ndef_)
+             : strings::StrCat(
+                   "[", SummarizeAttrsHelper(*this, absl::string_view()), "]");
 }
 
 string SummarizeNode(const Node& node) { return SummarizeNodeDef(node.def()); }
@@ -110,7 +112,7 @@ string FormatNodeDefForError(const NodeDef& node_def) {
   return errors::FormatNodeNameForError(node_def.name());
 }
 
-const AttrValue* AttrSlice::Find(StringPiece attr_name) const {
+const AttrValue* AttrSlice::Find(absl::string_view attr_name) const {
   // Currently, the collection used for NodeDef::attr() (google::protobuf::Map)
   // requires that the keys used for lookups have type 'const string&'. Because
   // this method takes a StringPiece, it is necessary to allocate a temporary
@@ -131,7 +133,7 @@ const AttrValue* AttrSlice::Find(StringPiece attr_name) const {
   return nullptr;
 }
 
-Status AttrSlice::Find(StringPiece attr_name,
+Status AttrSlice::Find(absl::string_view attr_name,
                        const AttrValue** attr_value) const {
   *attr_value = Find(attr_name);
   if (*attr_value != nullptr) {
@@ -166,7 +168,7 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
 // The ... is to allow the caller to inject some value validation code.  Use
 // just ; if no additional validation code is needed.
 #define DEFINE_GET_ATTR(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...)         \
-  Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,           \
+  Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,     \
                      TYPE* value) {                                           \
     const AttrValue* attr_value;                                              \
     TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));                   \
@@ -176,7 +178,7 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
     *value = CAST;                                                            \
     return Status::OK();                                                      \
   }                                                                           \
-  Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,           \
+  Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,     \
                      std::vector<TYPE>* value) {                              \
     const AttrValue* attr_value;                                              \
     TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));                   \
@@ -188,37 +190,37 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
     return Status::OK();                                                      \
   }
 
-#define DEFINE_GET_ATTR_SIMPLE(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...) \
-  bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,      \
-                         TYPE* value) {                                      \
-    const AttrValue* attr_value = attrs.Find(attr_name);                     \
-    if (attr_value == nullptr) {                                             \
-      return false;                                                          \
-    }                                                                        \
-    Status s = AttrValueHasType(*attr_value, ATTR_TYPE);                     \
-    if (!s.ok()) {                                                           \
-      return false;                                                          \
-    }                                                                        \
-    const auto& v = attr_value->FIELD();                                     \
-    __VA_ARGS__;                                                             \
-    *value = CAST;                                                           \
-    return true;                                                             \
-  }                                                                          \
-  bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,      \
-                         std::vector<TYPE>* value) {                         \
-    const AttrValue* attr_value = attrs.Find(attr_name);                     \
-    if (attr_value == nullptr) {                                             \
-      return false;                                                          \
-    }                                                                        \
-    Status s = AttrValueHasType(*attr_value, "list(" ATTR_TYPE ")");         \
-    if (!s.ok()) {                                                           \
-      return false;                                                          \
-    }                                                                        \
-    for (const auto& v : attr_value->list().FIELD()) {                       \
-      __VA_ARGS__;                                                           \
-      value->APPEND_OP(CAST);                                                \
-    }                                                                        \
-    return true;                                                             \
+#define DEFINE_GET_ATTR_SIMPLE(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...)  \
+  bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name, \
+                         TYPE* value) {                                       \
+    const AttrValue* attr_value = attrs.Find(attr_name);                      \
+    if (attr_value == nullptr) {                                              \
+      return false;                                                           \
+    }                                                                         \
+    Status s = AttrValueHasType(*attr_value, ATTR_TYPE);                      \
+    if (!s.ok()) {                                                            \
+      return false;                                                           \
+    }                                                                         \
+    const auto& v = attr_value->FIELD();                                      \
+    __VA_ARGS__;                                                              \
+    *value = CAST;                                                            \
+    return true;                                                              \
+  }                                                                           \
+  bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name, \
+                         std::vector<TYPE>* value) {                          \
+    const AttrValue* attr_value = attrs.Find(attr_name);                      \
+    if (attr_value == nullptr) {                                              \
+      return false;                                                           \
+    }                                                                         \
+    Status s = AttrValueHasType(*attr_value, "list(" ATTR_TYPE ")");          \
+    if (!s.ok()) {                                                            \
+      return false;                                                           \
+    }                                                                         \
+    for (const auto& v : attr_value->list().FIELD()) {                        \
+      __VA_ARGS__;                                                            \
+      value->APPEND_OP(CAST);                                                 \
+    }                                                                         \
+    return true;                                                              \
   }
 
 DEFINE_GET_ATTR(string, s, "string", emplace_back, v, ;)
@@ -253,13 +255,14 @@ DEFINE_GET_ATTR(Tensor, tensor, "tensor", emplace_back, t, Tensor t;
 DEFINE_GET_ATTR(NameAttrList, func, "func", emplace_back, v, ;);
 #undef DEFINE_GET_ATTR
 
-bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name) {
+bool HasNodeAttr(const NodeDef& node_def, absl::string_view attr_name) {
   return node_def.attr().find(string(attr_name)) != node_def.attr().end();
 }
 
 static const string& kEmptyString = *new string();
 
-const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name) {
+const string& GetNodeAttrString(const AttrSlice& attrs,
+                                absl::string_view attr_name) {
   const AttrValue* attr_value = attrs.Find(attr_name);
   if (attr_value == nullptr) {
     return kEmptyString;
@@ -271,7 +274,7 @@ const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name) {
   return attr_value->s();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    DataTypeVector* value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -282,7 +285,7 @@ Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
   return Status::OK();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    const TensorProto** value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -291,7 +294,7 @@ Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
   return Status::OK();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    const NameAttrList** value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -574,7 +577,7 @@ namespace {
 
 using ::tensorflow::strings::Scanner;
 
-bool IsValidOpName(StringPiece sp) {
+bool IsValidOpName(absl::string_view sp) {
   return Scanner(sp)
       .One(Scanner::LETTER_DIGIT_DOT)
       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE)
@@ -582,7 +585,7 @@ bool IsValidOpName(StringPiece sp) {
       .GetResult();
 }
 
-bool IsValidDataInputName(StringPiece sp) {
+bool IsValidDataInputName(absl::string_view sp) {
   // Data inputs are op_name, op_name:0, or op_name:12345.
   Scanner scan(sp);
   scan.One(Scanner::LETTER_DIGIT_DOT)
@@ -600,7 +603,7 @@ bool IsValidDataInputName(StringPiece sp) {
   return scan.GetResult();
 }
 
-bool IsValidControlInputName(StringPiece sp) {
+bool IsValidControlInputName(absl::string_view sp) {
   return Scanner(sp)
       .OneLiteral("^")
       .One(Scanner::LETTER_DIGIT_DOT)
@@ -665,18 +668,19 @@ Status AttachDef(const Status& status, const Node& node) {
   return AttachDef(status, node.def());
 }
 
-void AddNodeAttr(StringPiece name, const AttrValue& value, NodeDef* node_def) {
+void AddNodeAttr(absl::string_view name, const AttrValue& value,
+                 NodeDef* node_def) {
   node_def->mutable_attr()->insert(
       AttrValueMap::value_type(string(name), value));
 }
 
-#define ADD_NODE_ATTR(T)                                           \
-  void AddNodeAttr(StringPiece name, T value, NodeDef* node_def) { \
-    AttrValue attr_value;                                          \
-    SetAttrValue(value, &attr_value);                              \
-    AddNodeAttr(name, attr_value, node_def);                       \
+#define ADD_NODE_ATTR(T)                                                 \
+  void AddNodeAttr(absl::string_view name, T value, NodeDef* node_def) { \
+    AttrValue attr_value;                                                \
+    SetAttrValue(value, &attr_value);                                    \
+    AddNodeAttr(name, attr_value, node_def);                             \
   }
-ADD_NODE_ATTR(StringPiece)
+ADD_NODE_ATTR(absl::string_view)
 ADD_NODE_ATTR(const char*)
 ADD_NODE_ATTR(int32)
 ADD_NODE_ATTR(int64)
@@ -688,7 +692,7 @@ ADD_NODE_ATTR(const PartialTensorShape&)
 ADD_NODE_ATTR(const Tensor&)
 ADD_NODE_ATTR(const TensorProto&)
 ADD_NODE_ATTR(const NameAttrList&)
-ADD_NODE_ATTR(gtl::ArraySlice<StringPiece>)
+ADD_NODE_ATTR(gtl::ArraySlice<absl::string_view>)
 ADD_NODE_ATTR(gtl::ArraySlice<const char*>)
 ADD_NODE_ATTR(gtl::ArraySlice<string>)
 ADD_NODE_ATTR(gtl::ArraySlice<int32>)
@@ -704,21 +708,22 @@ ADD_NODE_ATTR(gtl::ArraySlice<Tensor>)
 ADD_NODE_ATTR(gtl::ArraySlice<NameAttrList>)
 #undef ADD_NODE_ATTR
 
-void AddAttr(StringPiece name, const AttrValue& value, AttrValueMap* map) {
+void AddAttr(absl::string_view name, const AttrValue& value,
+             AttrValueMap* map) {
   map->insert(AttrValueMap::value_type(string(name), value));
 }
 
-#define ADD_ATTR(T)                                            \
-  void AddAttr(StringPiece name, T value, AttrValueMap* map) { \
-    AttrValue attr_value;                                      \
-    SetAttrValue(value, &attr_value);                          \
-    AddAttr(name, attr_value, map);                            \
+#define ADD_ATTR(T)                                                  \
+  void AddAttr(absl::string_view name, T value, AttrValueMap* map) { \
+    AttrValue attr_value;                                            \
+    SetAttrValue(value, &attr_value);                                \
+    AddAttr(name, attr_value, map);                                  \
   }
 ADD_ATTR(bool)
 #undef ADD_ATTR
 
-Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
-                                NodeDef* node_def) {
+Status AddPrefixAndSuffixToNode(absl::string_view prefix,
+                                absl::string_view suffix, NodeDef* node_def) {
   node_def->set_name(strings::StrCat(prefix, node_def->name(), suffix));
   if (node_def->op() == "Enter" || node_def->op() == "RefEnter") {
     string frame_name;
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 0ff67554eb..631f358741 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -59,61 +59,65 @@ typedef protobuf::Map<string, AttrValue> AttrValueMap;
 
 // Adds an attr with name <name> and value <value> to *node_def.
 // The type of the attr is based on the type of value.
-void AddNodeAttr(StringPiece name, const AttrValue& value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, StringPiece value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const char* value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, int32 value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, int64 value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, float value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, double value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, bool value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, DataType value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const PartialTensorShape& value,
+void AddNodeAttr(absl::string_view name, const AttrValue& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const Tensor& value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const TensorProto& value, NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const NameAttrList& value,
+void AddNodeAttr(absl::string_view name, absl::string_view value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<StringPiece> value,
+void AddNodeAttr(absl::string_view name, const char* value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, int32 value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, int64 value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, float value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, double value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, bool value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, DataType value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, const PartialTensorShape& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<const char*> value,
+void AddNodeAttr(absl::string_view name, const Tensor& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<string> value,
+void AddNodeAttr(absl::string_view name, const TensorProto& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<int32> value,
+void AddNodeAttr(absl::string_view name, const NameAttrList& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<int64> value,
+void AddNodeAttr(absl::string_view name,
+                 gtl::ArraySlice<absl::string_view> value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<const char*> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<float> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<string> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<bool> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<int32> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, const std::vector<bool>& value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<int64> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<DataType> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<float> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<TensorShape> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<bool> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<PartialTensorShape> value,
+void AddNodeAttr(absl::string_view name, const std::vector<bool>& value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<TensorShapeProto> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<DataType> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<Tensor> value,
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<TensorShape> value,
                  NodeDef* node_def);
-void AddNodeAttr(StringPiece name, gtl::ArraySlice<NameAttrList> value,
+void AddNodeAttr(absl::string_view name,
+                 gtl::ArraySlice<PartialTensorShape> value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name,
+                 gtl::ArraySlice<TensorShapeProto> value, NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<Tensor> value,
+                 NodeDef* node_def);
+void AddNodeAttr(absl::string_view name, gtl::ArraySlice<NameAttrList> value,
                  NodeDef* node_def);
 
 // Version to workaround C++'s "perfect" forwarding not being able to
 // forward {...} initialization.
 template <class T>
-void AddNodeAttr(StringPiece name, std::initializer_list<T> value,
+void AddNodeAttr(absl::string_view name, std::initializer_list<T> value,
                  NodeDef* node_def) {
   AddNodeAttr(name, gtl::ArraySlice<T>(value), node_def);
 }
 
 // Adds an attr to an attr value map.
-void AddAttr(StringPiece name, const AttrValue& value, AttrValueMap* map);
-void AddAttr(StringPiece name, bool value, AttrValueMap* map);
+void AddAttr(absl::string_view name, const AttrValue& value, AttrValueMap* map);
+void AddAttr(absl::string_view name, bool value, AttrValueMap* map);
 
 class AttrSlice {
  public:
@@ -126,11 +130,11 @@ class AttrSlice {
 
   // Returns the attr with attr_name if found.  Otherwise, returns
   // nullptr.
-  const AttrValue* Find(StringPiece attr_name) const;
+  const AttrValue* Find(absl::string_view attr_name) const;
 
   // Returns the attr_value for attr_name if found. Otherwise, returns a
   // NotFound status.
-  Status Find(StringPiece attr_name, const AttrValue** attr_value) const;
+  Status Find(absl::string_view attr_name, const AttrValue** attr_value) const;
 
   // Helper class to avoid allocations in EqualAttrs.
   // TODO(irving): Will go away once NodeInfo is used.
@@ -165,84 +169,85 @@ class AttrSlice {
 };
 
 // Return true if the attr with the name attr_name is defined in node_def.
-bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name);
+bool HasNodeAttr(const NodeDef& node_def, absl::string_view attr_name);
 
 // Look up the attr with name attr_name and set *value to its value.  If no
 // attr with attr_name is found in node_def, or the attr does not have
 // a matching type, a non-ok status will be returned.
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    string* value);  // type: "string"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    int64* value);  // type: "int"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    int32* value);  // type: "int"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    float* value);  // type: "float"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    bool* value);  // type: "bool"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    DataType* value);  // type: "type"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    TensorShapeProto* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    TensorShape* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    PartialTensorShape* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    Tensor* value);  // type: "tensor"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<string>* value);  // type "list(string)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<int64>* value);  // type "list(int)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<int32>* value);  // type "list(int)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<float>* value);  // type "list(float)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<bool>* value);  // type "list(bool)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<DataType>* value);  // type "list(type)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    DataTypeVector* value);  // type "list(type)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<TensorShapeProto>* value);  // type "list(shape)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<TensorShape>* value);  // type "list(shape)"
 Status GetNodeAttr(
-    const AttrSlice& attrs, StringPiece attr_name,
+    const AttrSlice& attrs, absl::string_view attr_name,
     std::vector<PartialTensorShape>* value);  // type "list(shape)"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<Tensor>* value);  // type: "list(tensor)"
 
 // This version avoids copying the TensorProto.
 // REQUIRES: Must not use *value beyond the lifetime of node_def.
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    const TensorProto** value);  // type: "tensor"
 
 // This version avoids copying the NameAttrList.
 // REQUIRES: Must not use *value beyond the lifetime of node_def.
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    const NameAttrList** value);  // type: "func"
 
 // These versions copies the NameAttrList(s).
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    NameAttrList* value);  // type: "func"
-Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
                    std::vector<NameAttrList>* value);  // type: "list(func)"
 
 // Look up the attr with name attr_name and set *value to its value.  If no
 // attr with attr_name is found in node_def, or the attr does not have
 // a matching type, false is returned.
-bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,
+bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name,
                        string* value);  // type: "string"
-bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,
+bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name,
                        std::vector<string>* value);  // type: "string"
 
 // Look up the attr with name attr_name and return a reference to its value.
 // If no attr with attr_name is found in node_def, or the attr does not have
 // a matching type, a reference to an empty string is returned.
 // REQUIRES: Must not use the returned value beyond the lifetime of node_def.
-const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name);
+const string& GetNodeAttrString(const AttrSlice& attrs,
+                                absl::string_view attr_name);
 
 // Computes the input type for a specific node input.
 // REQUIRES: ValidateOpDef(op_def).ok()
@@ -284,7 +289,8 @@ Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def);
 // space, the returned `NameRangeMap` objects borrow the input/output
 // argument names from `op_def`. The `op_def` must outlive the
 // returned `NameRangeMap` objects.
-typedef gtl::FlatMap<StringPiece, std::pair<int, int>, hash<StringPiece>>
+typedef gtl::FlatMap<absl::string_view, std::pair<int, int>,
+                     hash<absl::string_view>>
     NameRangeMap;
 Status NameRangesForNode(const NodeDef& node_def, const OpDef& op_def,
                          NameRangeMap* inputs, NameRangeMap* outputs);
@@ -315,8 +321,8 @@ Status AttachDef(const Status& status, const Node& node);
 // Appends the given prefix and suffix to the original node name in order to
 // make the name unique. If it's an "Enter" node, use the same way to reset
 // attribute "frame_name".
-Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
-                                NodeDef* node_def);
+Status AddPrefixAndSuffixToNode(absl::string_view prefix,
+                                absl::string_view suffix, NodeDef* node_def);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_FRAMEWORK_NODE_DEF_UTIL_H_
diff --git a/tensorflow/core/framework/node_def_util_test.cc b/tensorflow/core/framework/node_def_util_test.cc
index d9d437024a..56fb66bd09 100644
--- a/tensorflow/core/framework/node_def_util_test.cc
+++ b/tensorflow/core/framework/node_def_util_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/node_def_util.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"  // NOLINT
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -270,7 +271,8 @@ void ExpectInvalidSyntax(const NodeDef& bad, const string& message) {
   EXPECT_TRUE(errors::IsInvalidArgument(status))
       << status << "; NodeDef: " << SummarizeNodeDef(bad);
 
-  EXPECT_TRUE(str_util::StrContains(StringPiece(status.ToString()), message))
+  EXPECT_TRUE(
+      str_util::StrContains(absl::string_view(status.ToString()), message))
       << "NodeDef: " << SummarizeNodeDef(bad) << ", " << status << ", "
       << message;
 }
diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index 81ed5f95f0..b3db276278 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/selective_registration.h"
@@ -261,15 +262,17 @@ template <>
 class OpDefBuilderWrapper<false> {
  public:
   constexpr OpDefBuilderWrapper(const char name[]) {}
-  OpDefBuilderWrapper<false>& Attr(StringPiece spec) { return *this; }
-  OpDefBuilderWrapper<false>& Input(StringPiece spec) { return *this; }
-  OpDefBuilderWrapper<false>& Output(StringPiece spec) { return *this; }
+  OpDefBuilderWrapper<false>& Attr(absl::string_view spec) { return *this; }
+  OpDefBuilderWrapper<false>& Input(absl::string_view spec) { return *this; }
+  OpDefBuilderWrapper<false>& Output(absl::string_view spec) { return *this; }
   OpDefBuilderWrapper<false>& SetIsCommutative() { return *this; }
   OpDefBuilderWrapper<false>& SetIsAggregate() { return *this; }
   OpDefBuilderWrapper<false>& SetIsStateful() { return *this; }
   OpDefBuilderWrapper<false>& SetAllowsUninitializedInput() { return *this; }
-  OpDefBuilderWrapper<false>& Deprecated(int, StringPiece) { return *this; }
-  OpDefBuilderWrapper<false>& Doc(StringPiece text) { return *this; }
+  OpDefBuilderWrapper<false>& Deprecated(int, absl::string_view) {
+    return *this;
+  }
+  OpDefBuilderWrapper<false>& Doc(absl::string_view text) { return *this; }
   OpDefBuilderWrapper<false>& SetShapeFn(
       Status (*fn)(shape_inference::InferenceContext*)) {
     return *this;
diff --git a/tensorflow/core/framework/op_def_builder.cc b/tensorflow/core/framework/op_def_builder.cc
index 8a9bb63182..e46d91a602 100644
--- a/tensorflow/core/framework/op_def_builder.cc
+++ b/tensorflow/core/framework/op_def_builder.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <limits>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/op_def_util.h"
@@ -33,11 +34,11 @@ namespace tensorflow {
 
 namespace {
 
-string AttrError(StringPiece orig, const string& op_name) {
+string AttrError(absl::string_view orig, const string& op_name) {
   return strings::StrCat(" from Attr(\"", orig, "\") for Op ", op_name);
 }
 
-bool ConsumeAttrName(StringPiece* sp, StringPiece* out) {
+bool ConsumeAttrName(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -48,7 +49,7 @@ bool ConsumeAttrName(StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeListPrefix(StringPiece* sp) {
+bool ConsumeListPrefix(absl::string_view* sp) {
   return Scanner(*sp)
       .OneLiteral("list")
       .AnySpace()
@@ -57,7 +58,8 @@ bool ConsumeListPrefix(StringPiece* sp) {
       .GetResult(sp);
 }
 
-bool ConsumeQuotedString(char quote_ch, StringPiece* sp, StringPiece* out) {
+bool ConsumeQuotedString(char quote_ch, absl::string_view* sp,
+                         absl::string_view* out) {
   const string quote_str(1, quote_ch);
   return Scanner(*sp)
       .OneLiteral(quote_str.c_str())
@@ -69,7 +71,7 @@ bool ConsumeQuotedString(char quote_ch, StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeAttrType(StringPiece* sp, StringPiece* out) {
+bool ConsumeAttrType(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .Many(Scanner::LOWERLETTER_DIGIT)
       .StopCapture()
@@ -77,10 +79,10 @@ bool ConsumeAttrType(StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeAttrNumber(StringPiece* sp, int64* out) {
+bool ConsumeAttrNumber(absl::string_view* sp, int64* out) {
   Scanner scan(*sp);
-  StringPiece match;
-  StringPiece remaining;
+  absl::string_view match;
+  absl::string_view remaining;
 
   scan.AnySpace().RestartCapture();
   if (scan.Peek() == '-') {
@@ -110,20 +112,21 @@ bool ConsumeAttrNumber(StringPiece* sp, int64* out) {
     }                                                                     \
   } while (false)
 
-bool ConsumeCompoundAttrType(StringPiece* sp, StringPiece* out) {
+bool ConsumeCompoundAttrType(absl::string_view* sp, absl::string_view* out) {
   auto capture_begin = sp->begin();
   if (str_util::ConsumePrefix(sp, "numbertype") ||
       str_util::ConsumePrefix(sp, "numerictype") ||
       str_util::ConsumePrefix(sp, "quantizedtype") ||
       str_util::ConsumePrefix(sp, "realnumbertype") ||
       str_util::ConsumePrefix(sp, "realnumberictype")) {
-    *out = StringPiece(capture_begin, sp->begin() - capture_begin);
+    *out = absl::string_view(capture_begin, sp->begin() - capture_begin);
     return true;
   }
   return false;
 }
 
-bool ProcessCompoundType(const StringPiece type_string, AttrValue* allowed) {
+bool ProcessCompoundType(const absl::string_view type_string,
+                         AttrValue* allowed) {
   if (type_string == "numbertype" || type_string == "numerictype") {
     for (DataType dt : NumberTypes()) {
       allowed->mutable_list()->add_type(dt);
@@ -143,20 +146,20 @@ bool ProcessCompoundType(const StringPiece type_string, AttrValue* allowed) {
   return true;
 }
 
-void FinalizeAttr(StringPiece spec, OpDef* op_def,
+void FinalizeAttr(absl::string_view spec, OpDef* op_def,
                   std::vector<string>* errors) {
   OpDef::AttrDef* attr = op_def->add_attr();
-  StringPiece orig(spec);
+  absl::string_view orig(spec);
 
   // Parse "<name>:" at the beginning.
-  StringPiece tmp_name;
+  absl::string_view tmp_name;
   VERIFY(ConsumeAttrName(&spec, &tmp_name), "Trouble parsing '<name>:'");
   attr->set_name(tmp_name.data(), tmp_name.size());
 
   // Read "<type>" or "list(<type>)".
   bool is_list = ConsumeListPrefix(&spec);
   string type;
-  StringPiece type_string;  // Used if type == "type"
+  absl::string_view type_string;  // Used if type == "type"
   if (str_util::ConsumePrefix(&spec, "string")) {
     type = "string";
   } else if (str_util::ConsumePrefix(&spec, "int")) {
@@ -185,7 +188,7 @@ void FinalizeAttr(StringPiece spec, OpDef* op_def,
     if (str_util::StartsWith(spec, "\"") || str_util::StartsWith(spec, "'")) {
       type = "string";  // "{ \"foo\", \"bar\" }" or "{ 'foo', 'bar' }"
       while (true) {
-        StringPiece escaped_string;
+        absl::string_view escaped_string;
         VERIFY(ConsumeQuotedString('"', &spec, &escaped_string) ||
                    ConsumeQuotedString('\'', &spec, &escaped_string),
                "Trouble parsing allowed string at '", spec, "'");
@@ -266,12 +269,13 @@ void FinalizeAttr(StringPiece spec, OpDef* op_def,
 
 #undef VERIFY
 
-string InOutError(bool is_output, StringPiece orig, const string& op_name) {
+string InOutError(bool is_output, absl::string_view orig,
+                  const string& op_name) {
   return strings::StrCat(" from ", is_output ? "Output" : "Input", "(\"", orig,
                          "\") for Op ", op_name);
 }
 
-bool ConsumeInOutName(StringPiece* sp, StringPiece* out) {
+bool ConsumeInOutName(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .One(Scanner::LOWERLETTER)
       .Any(Scanner::LOWERLETTER_DIGIT_UNDERSCORE)
@@ -282,7 +286,7 @@ bool ConsumeInOutName(StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeInOutRefOpen(StringPiece* sp) {
+bool ConsumeInOutRefOpen(absl::string_view* sp) {
   return Scanner(*sp)
       .OneLiteral("Ref")
       .AnySpace()
@@ -291,11 +295,11 @@ bool ConsumeInOutRefOpen(StringPiece* sp) {
       .GetResult(sp);
 }
 
-bool ConsumeInOutRefClose(StringPiece* sp) {
+bool ConsumeInOutRefClose(absl::string_view* sp) {
   return Scanner(*sp).OneLiteral(")").AnySpace().GetResult(sp);
 }
 
-bool ConsumeInOutNameOrType(StringPiece* sp, StringPiece* out) {
+bool ConsumeInOutNameOrType(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -304,7 +308,7 @@ bool ConsumeInOutNameOrType(StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeInOutTimesType(StringPiece* sp, StringPiece* out) {
+bool ConsumeInOutTimesType(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .OneLiteral("*")
       .AnySpace()
@@ -325,15 +329,15 @@ bool ConsumeInOutTimesType(StringPiece* sp, StringPiece* out) {
     }                                                                 \
   } while (false)
 
-void FinalizeInputOrOutput(StringPiece spec, bool is_output, OpDef* op_def,
-                           std::vector<string>* errors) {
+void FinalizeInputOrOutput(absl::string_view spec, bool is_output,
+                           OpDef* op_def, std::vector<string>* errors) {
   OpDef::ArgDef* arg =
       is_output ? op_def->add_output_arg() : op_def->add_input_arg();
 
-  StringPiece orig(spec);
+  absl::string_view orig(spec);
 
   // Parse "<name>:" at the beginning.
-  StringPiece tmp_name;
+  absl::string_view tmp_name;
   VERIFY(ConsumeInOutName(&spec, &tmp_name), "Trouble parsing 'name:'");
   arg->set_name(tmp_name.data(), tmp_name.size());
 
@@ -343,7 +347,7 @@ void FinalizeInputOrOutput(StringPiece spec, bool is_output, OpDef* op_def,
   }
 
   {  // Parse "<name|type>" or "<name>*<name|type>".
-    StringPiece first, second, type_or_attr;
+    absl::string_view first, second, type_or_attr;
     VERIFY(ConsumeInOutNameOrType(&spec, &first),
            "Trouble parsing either a type or an attr name at '", spec, "'");
     if (ConsumeInOutTimesType(&spec, &second)) {
@@ -409,7 +413,7 @@ void FinalizeInputOrOutput(StringPiece spec, bool is_output, OpDef* op_def,
 
 #undef VERIFY
 
-int num_leading_spaces(StringPiece s) {
+int num_leading_spaces(absl::string_view s) {
   size_t i = 0;
   while (i < s.size() && s[i] == ' ') {
     ++i;
@@ -417,7 +421,7 @@ int num_leading_spaces(StringPiece s) {
   return i;
 }
 
-bool ConsumeDocNameColon(StringPiece* sp, StringPiece* out) {
+bool ConsumeDocNameColon(absl::string_view* sp, absl::string_view* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -428,7 +432,7 @@ bool ConsumeDocNameColon(StringPiece* sp, StringPiece* out) {
       .GetResult(sp, out);
 }
 
-bool IsDocNameColon(StringPiece s) {
+bool IsDocNameColon(absl::string_view s) {
   return ConsumeDocNameColon(&s, nullptr /* out */);
 }
 
@@ -465,8 +469,8 @@ void FinalizeDoc(const string& text, OpDef* op_def,
   // name: description
   //   possibly continued on the next line
   //   if so, we remove the minimum indent
-  StringPiece name;
-  std::vector<StringPiece> description;
+  absl::string_view name;
+  std::vector<absl::string_view> description;
   while (static_cast<size_t>(l) < lines.size()) {
     description.clear();
     description.push_back(lines[l]);
@@ -605,13 +609,13 @@ Status OpDefBuilder::Finalize(OpRegistrationData* op_reg_data) const {
   *op_reg_data = op_reg_data_;
 
   OpDef* op_def = &op_reg_data->op_def;
-  for (StringPiece attr : attrs_) {
+  for (absl::string_view attr : attrs_) {
     FinalizeAttr(attr, op_def, &errors);
   }
-  for (StringPiece input : inputs_) {
+  for (absl::string_view input : inputs_) {
     FinalizeInputOrOutput(input, false, op_def, &errors);
   }
-  for (StringPiece output : outputs_) {
+  for (absl::string_view output : outputs_) {
     FinalizeInputOrOutput(output, true, op_def, &errors);
   }
   FinalizeDoc(doc_, op_def, &errors);
diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc
index 9b24e3aa00..1e6a62e767 100644
--- a/tensorflow/core/framework/op_def_builder_test.cc
+++ b/tensorflow/core/framework/op_def_builder_test.cc
@@ -15,11 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_def_builder.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -40,7 +40,7 @@ class OpDefBuilderTest : public ::testing::Test {
  protected:
   OpDefBuilder b() { return OpDefBuilder("Test"); }
 
-  void ExpectSuccess(const OpDefBuilder& builder, StringPiece proto,
+  void ExpectSuccess(const OpDefBuilder& builder, absl::string_view proto,
                      OpShapeInferenceFn* shape_fn_out = nullptr) {
     OpRegistrationData op_reg_data;
     Status status = builder.Finalize(&op_reg_data);
@@ -61,7 +61,7 @@ class OpDefBuilderTest : public ::testing::Test {
     }
   }
 
-  void ExpectOrdered(const OpDefBuilder& builder, StringPiece proto) {
+  void ExpectOrdered(const OpDefBuilder& builder, absl::string_view proto) {
     OpRegistrationData op_reg_data;
     Status status = builder.Finalize(&op_reg_data);
     TF_EXPECT_OK(status);
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index 3597f43d51..aa37d40824 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -18,12 +18,12 @@ limitations under the License.
 #include <set>
 #include <unordered_map>
 #include <unordered_set>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/op_def.pb_text.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
@@ -145,7 +145,7 @@ Status ValidateAttrValue(const AttrValue& attr_value,
   return Status::OK();
 }
 
-const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def) {
+const OpDef::AttrDef* FindAttr(absl::string_view name, const OpDef& op_def) {
   for (int i = 0; i < op_def.attr_size(); ++i) {
     if (op_def.attr(i).name() == name) {
       return &op_def.attr(i);
@@ -154,7 +154,7 @@ const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def) {
   return nullptr;
 }
 
-OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def) {
+OpDef::AttrDef* FindAttrMutable(absl::string_view name, OpDef* op_def) {
   for (int i = 0; i < op_def->attr_size(); ++i) {
     if (op_def->attr(i).name() == name) {
       return op_def->mutable_attr(i);
@@ -163,7 +163,7 @@ OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def) {
   return nullptr;
 }
 
-const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def) {
+const OpDef::ArgDef* FindInputArg(absl::string_view name, const OpDef& op_def) {
   for (int i = 0; i < op_def.input_arg_size(); ++i) {
     if (op_def.input_arg(i).name() == name) {
       return &op_def.input_arg(i);
@@ -172,7 +172,7 @@ const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def) {
   return nullptr;
 }
 
-const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
+const ApiDef::Arg* FindInputArg(absl::string_view name, const ApiDef& api_def) {
   for (int i = 0; i < api_def.in_arg_size(); ++i) {
     if (api_def.in_arg(i).name() == name) {
       return &api_def.in_arg(i);
@@ -268,11 +268,11 @@ Status ValidateOpDef(const OpDef& op_def) {
              attr.name(), " that matches a data type");
 
     // Validate type
-    StringPiece type(attr.type());
+    absl::string_view type(attr.type());
     bool is_list = str_util::ConsumePrefix(&type, "list(");
     bool found = false;
-    for (StringPiece valid : {"string", "int", "float", "bool", "type", "shape",
-                              "tensor", "func"}) {
+    for (absl::string_view valid : {"string", "int", "float", "bool", "type",
+                                    "shape", "tensor", "func"}) {
       if (str_util::ConsumePrefix(&type, valid)) {
         found = true;
         break;
diff --git a/tensorflow/core/framework/op_def_util.h b/tensorflow/core/framework/op_def_util.h
index 85afe2bdea..a5f0608861 100644
--- a/tensorflow/core/framework/op_def_util.h
+++ b/tensorflow/core/framework/op_def_util.h
@@ -20,6 +20,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_OP_DEF_UTIL_H_
 
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -41,16 +42,16 @@ Status ValidateAttrValue(const AttrValue& attr_value,
 
 // The following search through op_def for an attr with the indicated name.
 // Returns nullptr if no such attr is found.
-const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def);
-OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def);
+const OpDef::AttrDef* FindAttr(absl::string_view name, const OpDef& op_def);
+OpDef::AttrDef* FindAttrMutable(absl::string_view name, OpDef* op_def);
 
 // Searches op_def for input argument with the indicated name.
 // Returns nullptr if no such attr is found.
-const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def);
+const OpDef::ArgDef* FindInputArg(absl::string_view name, const OpDef& op_def);
 
 // Searches api_def for input argument with the indicated name.
 // Returns nullptr if no such attr is found.
-const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def);
+const ApiDef::Arg* FindInputArg(absl::string_view name, const ApiDef& api_def);
 
 // Produce a human-readable version of an op_def that is more concise
 // than a text-format proto.  Excludes descriptions.
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index 505ab54775..81c67ed34c 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -26,7 +27,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-string WordWrap(StringPiece prefix, StringPiece str, int width) {
+string WordWrap(absl::string_view prefix, absl::string_view str, int width) {
   const string indent_next_line = "\n" + Spaces(prefix.size());
   width -= prefix.size();
   string result;
@@ -39,16 +40,16 @@ string WordWrap(StringPiece prefix, StringPiece str, int width) {
       break;
     }
     auto space = str.rfind(' ', width);
-    if (space == StringPiece::npos) {
+    if (space == absl::string_view::npos) {
       // Rather make a too-long line and break at a space.
       space = str.find(' ');
-      if (space == StringPiece::npos) {
+      if (space == absl::string_view::npos) {
         strings::StrAppend(&result, str);
         break;
       }
     }
     // Breaking at character at position <space>.
-    StringPiece to_append = str.substr(0, space);
+    absl::string_view to_append = str.substr(0, space);
     str.remove_prefix(space + 1);
     // Remove spaces at break.
     while (str_util::EndsWith(to_append, " ")) {
@@ -65,7 +66,7 @@ string WordWrap(StringPiece prefix, StringPiece str, int width) {
   return result;
 }
 
-bool ConsumeEquals(StringPiece* description) {
+bool ConsumeEquals(absl::string_view* description) {
   if (str_util::ConsumePrefix(description, "=")) {
     while (str_util::ConsumePrefix(description,
                                    " ")) {  // Also remove spaces after "=".
@@ -80,12 +81,12 @@ bool ConsumeEquals(StringPiece* description) {
 // contains the maximum prefix of the input `*orig` that doesn't
 // contain `split_ch`, and `*orig` contains everything after the
 // first `split_ch`.
-static bool SplitAt(char split_ch, StringPiece* orig,
-                    StringPiece* before_split) {
+static bool SplitAt(char split_ch, absl::string_view* orig,
+                    absl::string_view* before_split) {
   auto pos = orig->find(split_ch);
-  if (pos == StringPiece::npos) {
+  if (pos == absl::string_view::npos) {
     *before_split = *orig;
-    *orig = StringPiece();
+    *orig = absl::string_view();
     return false;
   } else {
     *before_split = orig->substr(0, pos);
@@ -96,9 +97,9 @@ static bool SplitAt(char split_ch, StringPiece* orig,
 
 // Does this line start with "<spaces><field>:" where "<field>" is
 // in multi_line_fields? Sets *colon_pos to the position of the colon.
-static bool StartsWithFieldName(StringPiece line,
+static bool StartsWithFieldName(absl::string_view line,
                                 const std::vector<string>& multi_line_fields) {
-  StringPiece up_to_colon;
+  absl::string_view up_to_colon;
   if (!SplitAt(':', &line, &up_to_colon)) return false;
   while (str_util::ConsumePrefix(&up_to_colon, " "))
     ;  // Remove leading spaces.
@@ -110,7 +111,7 @@ static bool StartsWithFieldName(StringPiece line,
   return false;
 }
 
-static bool ConvertLine(StringPiece line,
+static bool ConvertLine(absl::string_view line,
                         const std::vector<string>& multi_line_fields,
                         string* ml) {
   // Is this a field we should convert?
@@ -118,8 +119,8 @@ static bool ConvertLine(StringPiece line,
     return false;
   }
   // Has a matching field name, so look for "..." after the colon.
-  StringPiece up_to_colon;
-  StringPiece after_colon = line;
+  absl::string_view up_to_colon;
+  absl::string_view after_colon = line;
   SplitAt(':', &after_colon, &up_to_colon);
   while (str_util::ConsumePrefix(&after_colon, " "))
     ;  // Remove leading spaces.
@@ -128,12 +129,12 @@ static bool ConvertLine(StringPiece line,
     return false;
   }
   auto last_quote = after_colon.rfind('\"');
-  if (last_quote == StringPiece::npos) {
+  if (last_quote == absl::string_view::npos) {
     // Error: we don't see the expected matching quote, abort the conversion.
     return false;
   }
-  StringPiece escaped = after_colon.substr(0, last_quote);
-  StringPiece suffix = after_colon.substr(last_quote + 1);
+  absl::string_view escaped = after_colon.substr(0, last_quote);
+  absl::string_view suffix = after_colon.substr(last_quote + 1);
   // We've now parsed line into '<up_to_colon>: "<escaped>"<suffix>'
 
   string unescaped;
@@ -159,13 +160,13 @@ static bool ConvertLine(StringPiece line,
   return true;
 }
 
-string PBTxtToMultiline(StringPiece pbtxt,
+string PBTxtToMultiline(absl::string_view pbtxt,
                         const std::vector<string>& multi_line_fields) {
   string ml;
   // Probably big enough, since the input and output are about the
   // same size, but just a guess.
   ml.reserve(pbtxt.size() * (17. / 16));
-  StringPiece line;
+  absl::string_view line;
   while (!pbtxt.empty()) {
     // Split pbtxt into its first line and everything after.
     SplitAt('\n', &pbtxt, &line);
@@ -180,8 +181,8 @@ string PBTxtToMultiline(StringPiece pbtxt,
 // Given a single line of text `line` with first : at `colon`, determine if
 // there is an "<<END" expression after the colon and if so return true and set
 // `*end` to everything after the "<<".
-static bool FindMultiline(StringPiece line, size_t colon, string* end) {
-  if (colon == StringPiece::npos) return false;
+static bool FindMultiline(absl::string_view line, size_t colon, string* end) {
+  if (colon == absl::string_view::npos) return false;
   line.remove_prefix(colon + 1);
   while (str_util::ConsumePrefix(&line, " ")) {
   }
@@ -192,12 +193,12 @@ static bool FindMultiline(StringPiece line, size_t colon, string* end) {
   return false;
 }
 
-string PBTxtFromMultiline(StringPiece multiline_pbtxt) {
+string PBTxtFromMultiline(absl::string_view multiline_pbtxt) {
   string pbtxt;
   // Probably big enough, since the input and output are about the
   // same size, but just a guess.
   pbtxt.reserve(multiline_pbtxt.size() * (33. / 32));
-  StringPiece line;
+  absl::string_view line;
   while (!multiline_pbtxt.empty()) {
     // Split multiline_pbtxt into its first line and everything after.
     if (!SplitAt('\n', &multiline_pbtxt, &line)) {
@@ -237,7 +238,7 @@ string PBTxtFromMultiline(StringPiece multiline_pbtxt) {
         unescaped.push_back('\n');
       }
       strings::StrAppend(&unescaped, line);
-      line = StringPiece();
+      line = absl::string_view();
     }
 
     // Escape what we extracted and then output it in quotes.
diff --git a/tensorflow/core/framework/op_gen_lib.h b/tensorflow/core/framework/op_gen_lib.h
index c269e2df04..b7660207f8 100644
--- a/tensorflow/core/framework/op_gen_lib.h
+++ b/tensorflow/core/framework/op_gen_lib.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <string>
 #include <unordered_map>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
@@ -35,17 +35,17 @@ inline string Spaces(int n) { return string(n, ' '); }
 // after the first by prefix.size() spaces.  Intended use case is something
 // like prefix = "  Foo(" and str is a list of arguments (terminated by a ")").
 // TODO(josh11b): Option to wrap on ", " instead of " " when possible.
-string WordWrap(StringPiece prefix, StringPiece str, int width);
+string WordWrap(absl::string_view prefix, absl::string_view str, int width);
 
 // Looks for an "=" at the beginning of *description.  If found, strips it off
 // (and any following spaces) from *description and return true.  Otherwise
 // returns false.
-bool ConsumeEquals(StringPiece* description);
+bool ConsumeEquals(absl::string_view* description);
 
 // Convert text-serialized protobufs to/from multiline format.
-string PBTxtToMultiline(StringPiece pbtxt,
+string PBTxtToMultiline(absl::string_view pbtxt,
                         const std::vector<string>& multi_line_fields);
-string PBTxtFromMultiline(StringPiece multiline_pbtxt);
+string PBTxtFromMultiline(absl::string_view multiline_pbtxt);
 
 // Takes a list of files with ApiDefs text protos, and allows you to
 // look up the specific ApiDef for any given op.
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 3e34bf0418..65aa005bf6 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
@@ -33,7 +34,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -118,7 +118,7 @@ const string& OpKernel::type_string() const { return def_->op(); }
 const string& OpKernel::requested_device() const { return def_->device(); }
 const string& OpKernel::requested_input(int i) const { return def_->input(i); }
 
-Status OpKernel::InputRange(StringPiece input_name, int* start,
+Status OpKernel::InputRange(absl::string_view input_name, int* start,
                             int* stop) const {
   const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
@@ -130,7 +130,7 @@ Status OpKernel::InputRange(StringPiece input_name, int* start,
   }
 }
 
-Status OpKernel::OutputRange(StringPiece output_name, int* start,
+Status OpKernel::OutputRange(absl::string_view output_name, int* start,
                              int* stop) const {
   const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
@@ -200,7 +200,7 @@ OpKernelConstruction::OpKernelConstruction(
       graph_def_version_(graph_def_version),
       status_(status) {}
 
-bool OpKernelConstruction::HasAttr(StringPiece attr_name) const {
+bool OpKernelConstruction::HasAttr(absl::string_view attr_name) const {
   return HasNodeAttr(def(), attr_name);
 }
 
@@ -320,7 +320,7 @@ void OpKernelContext::really_record_tensor_reference(const Tensor& tensor) {
   referenced_tensors_->Add(tensor);
 }
 
-Status OpKernelContext::input(StringPiece name, const Tensor** tensor) {
+Status OpKernelContext::input(absl::string_view name, const Tensor** tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -338,7 +338,8 @@ Status OpKernelContext::input(StringPiece name, const Tensor** tensor) {
   return Status::OK();
 }
 
-Status OpKernelContext::input_dtype(StringPiece name, DataType* dtype) const {
+Status OpKernelContext::input_dtype(absl::string_view name,
+                                    DataType* dtype) const {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -356,7 +357,8 @@ Status OpKernelContext::input_dtype(StringPiece name, DataType* dtype) const {
   return Status::OK();
 }
 
-Status OpKernelContext::input_ref_mutex(StringPiece name, mutex** out_mutex) {
+Status OpKernelContext::input_ref_mutex(absl::string_view name,
+                                        mutex** out_mutex) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -438,7 +440,7 @@ bool OpKernelContext::forward_input_to_output_with_shape(
 }
 
 Status OpKernelContext::forward_input_to_output_with_shape(
-    StringPiece input_name, StringPiece output_name,
+    absl::string_view input_name, absl::string_view output_name,
     const TensorShape& output_shape, Tensor** output) {
   int input_index, output_index, stop;
   TF_RETURN_IF_ERROR(
@@ -562,7 +564,7 @@ void OpKernelContext::delete_ref_input(int index, bool lock_held) {
   }
 }
 
-Status OpKernelContext::mutable_input(StringPiece name, Tensor* tensor,
+Status OpKernelContext::mutable_input(absl::string_view name, Tensor* tensor,
                                       bool lock_held) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
@@ -586,7 +588,7 @@ Status OpKernelContext::mutable_input(StringPiece name, Tensor* tensor,
   return Status::OK();
 }
 
-Status OpKernelContext::replace_ref_input(StringPiece name,
+Status OpKernelContext::replace_ref_input(absl::string_view name,
                                           const Tensor& tensor,
                                           bool lock_held) {
   int start, stop;
@@ -604,14 +606,14 @@ Status OpKernelContext::replace_ref_input(StringPiece name,
   return Status::OK();
 }
 
-Status OpKernelContext::input_list(StringPiece name, OpInputList* list) {
+Status OpKernelContext::input_list(absl::string_view name, OpInputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   *list = OpInputList(this, start, stop);
   return Status::OK();
 }
 
-Status OpKernelContext::mutable_input_list(StringPiece name,
+Status OpKernelContext::mutable_input_list(absl::string_view name,
                                            OpMutableInputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
@@ -619,7 +621,8 @@ Status OpKernelContext::mutable_input_list(StringPiece name,
   return Status::OK();
 }
 
-Status OpKernelContext::output_list(StringPiece name, OpOutputList* list) {
+Status OpKernelContext::output_list(absl::string_view name,
+                                    OpOutputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   *list = OpOutputList(this, start, stop);
@@ -642,7 +645,7 @@ Status OpKernelContext::allocate_output(int index, const TensorShape& shape,
   return allocate_output(index, shape, output, attr);
 }
 
-Status OpKernelContext::allocate_output(StringPiece name,
+Status OpKernelContext::allocate_output(absl::string_view name,
                                         const TensorShape& shape,
                                         Tensor** tensor) {
   int start, stop;
@@ -656,7 +659,7 @@ Status OpKernelContext::allocate_output(StringPiece name,
   return allocate_output(start, shape, tensor);
 }
 
-Status OpKernelContext::allocate_output(StringPiece name,
+Status OpKernelContext::allocate_output(absl::string_view name,
                                         const TensorShape& shape,
                                         Tensor** tensor,
                                         AllocatorAttributes attr) {
@@ -752,7 +755,8 @@ Status OpKernelContext::allocate_persistent(DataType type,
   return s;
 }
 
-Status OpKernelContext::set_output(StringPiece name, const Tensor& tensor) {
+Status OpKernelContext::set_output(absl::string_view name,
+                                   const Tensor& tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -799,7 +803,7 @@ void OpKernelContext::set_output_ref(int index, mutex* mu,
   outputs_[index] = TensorValue(mu, tensor_for_ref);
 }
 
-Status OpKernelContext::set_output_ref(StringPiece name, mutex* mu,
+Status OpKernelContext::set_output_ref(absl::string_view name, mutex* mu,
                                        Tensor* tensor_for_ref) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
@@ -813,7 +817,8 @@ Status OpKernelContext::set_output_ref(StringPiece name, mutex* mu,
   return Status::OK();
 }
 
-Status OpKernelContext::mutable_output(StringPiece name, Tensor** tensor) {
+Status OpKernelContext::mutable_output(absl::string_view name,
+                                       Tensor** tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -911,7 +916,7 @@ void OpKernelContext::clear_recorded_memory() {
 // OpKernel registration ------------------------------------------------------
 
 struct KernelRegistration {
-  KernelRegistration(const KernelDef& d, StringPiece c,
+  KernelRegistration(const KernelDef& d, absl::string_view c,
                      kernel_factory::OpKernelRegistrar::Factory f)
       : def(d), kernel_class_name(c), factory(f) {}
   const KernelDef def;
@@ -933,8 +938,8 @@ static KernelRegistry* GlobalKernelRegistryTyped() {
   return reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry());
 }
 
-static string Key(StringPiece op_type, const DeviceType& device_type,
-                  StringPiece label) {
+static string Key(absl::string_view op_type, const DeviceType& device_type,
+                  absl::string_view label) {
   return strings::StrCat(op_type, ":", DeviceTypeString(device_type), ":",
                          label);
 }
@@ -942,7 +947,7 @@ static string Key(StringPiece op_type, const DeviceType& device_type,
 namespace kernel_factory {
 
 void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
-                                     StringPiece kernel_class_name,
+                                     absl::string_view kernel_class_name,
                                      Factory factory) {
   // See comments in register_kernel::Name in header for info on _no_register.
   if (kernel_def->op() != "_no_register") {
@@ -959,7 +964,7 @@ void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
 
 namespace {
 
-static const StringPiece kKernelAttr("_kernel");
+static const absl::string_view kKernelAttr("_kernel");
 
 // TODO(irving): Replace with const Node& version below.
 Status FindKernelRegistration(const DeviceType& device_type,
@@ -1072,12 +1077,12 @@ KernelList GetFilteredRegisteredKernels(
   return kernel_list;
 }
 
-KernelList GetRegisteredKernelsForOp(StringPiece op_name) {
+KernelList GetRegisteredKernelsForOp(absl::string_view op_name) {
   auto op_pred = [op_name](const KernelDef& k) { return k.op() == op_name; };
   return GetFilteredRegisteredKernels(op_pred);
 }
 
-string KernelsRegisteredForOp(StringPiece op_name) {
+string KernelsRegisteredForOp(absl::string_view op_name) {
   KernelList kernel_list = GetRegisteredKernelsForOp(op_name);
   if (kernel_list.kernel_size() == 0) return "  <no registered kernels>\n";
   string ret;
@@ -1175,7 +1180,7 @@ Status CreateOpKernel(DeviceType device_type, DeviceBase* device,
 
 namespace {
 
-bool FindArgInOp(StringPiece arg_name,
+bool FindArgInOp(absl::string_view arg_name,
                  const protobuf::RepeatedPtrField<OpDef::ArgDef>& args) {
   for (const auto& arg : args) {
     if (arg_name == arg.name()) {
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 3b1f57a457..7d48d35356 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include <utility>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/cancellation.h"
 #include "tensorflow/core/framework/control_flow.h"
@@ -142,8 +143,9 @@ class OpKernel {
     return output_memory_types_;
   }
 
-  Status InputRange(StringPiece input_name, int* start, int* stop) const;
-  Status OutputRange(StringPiece output_name, int* start, int* stop) const;
+  Status InputRange(absl::string_view input_name, int* start, int* stop) const;
+  Status OutputRange(absl::string_view output_name, int* start,
+                     int* stop) const;
 
   // We allow legacy scalars within Google up until GraphDef version 6.
   // TODO(irving): Remove when we can drop support for GraphDef version 5.
@@ -311,10 +313,10 @@ class OpKernelConstruction {
   // attr with attr_name is found in def(), or the attr does not have
   // a matching type, a non-ok status will be returned.
   template <class T>
-  Status GetAttr(StringPiece attr_name, T* value) const;
+  Status GetAttr(absl::string_view attr_name, T* value) const;
 
   // Return true if the attr_name is defined in def().
-  bool HasAttr(StringPiece attr_name) const;
+  bool HasAttr(absl::string_view attr_name) const;
 
   // Return the device type.
   const DeviceType& device_type() const { return device_type_; }
@@ -615,7 +617,7 @@ class OpKernelContext {
 
   int num_inputs() const { return params_->inputs->size(); }
   DataType input_dtype(int index) const;
-  Status input_dtype(StringPiece name, DataType* dtype) const;
+  Status input_dtype(absl::string_view name, DataType* dtype) const;
   MemoryType input_memory_type(int index) const;
 
   int num_outputs() const { return outputs_.size(); }
@@ -635,14 +637,14 @@ class OpKernelContext {
   // use mutable_input below.
   // REQUIRES: !IsRefType(input_dtype(index))
   // REQUIRES: the named input must not be a list.
-  Status input(StringPiece name, const Tensor** tensor);
+  Status input(absl::string_view name, const Tensor** tensor);
 
   // Returns the named list-valued immutable input in "list", as
   // defined in the OpDef.  If the named output is not list-valued,
   // returns a one-element list. May only be used for non-Ref
   // inputs. For Ref inputs use mutable_input below.
   // REQUIRES: !IsRefType(input_dtype(index))
-  Status input_list(StringPiece name, OpInputList* list);
+  Status input_list(absl::string_view name, OpInputList* list);
 
   // For mutable inputs, use the following together to make sure there
   // is no concurrent access to mutable_input(), e.g.:
@@ -652,7 +654,7 @@ class OpKernelContext {
   //   // modify the values in t
   // }
   // REQUIRES: IsRefType(input_dtype(index))
-  Status input_ref_mutex(StringPiece name, mutex** out_mutex);
+  Status input_ref_mutex(absl::string_view name, mutex** out_mutex);
 
   // Returns a mutable input tensor. Must be used to access Ref
   // inputs.  REQUIRES: IsRefType(input_dtype(index)). The caller may
@@ -670,7 +672,7 @@ class OpKernelContext {
   // the input mutex will be acquired before returning the Tensor.
   // REQUIRES: the named input must not be a list.
   // REQUIRES: the named input must be a ref tensor.
-  Status mutable_input(StringPiece name, Tensor* tensor, bool lock_held);
+  Status mutable_input(absl::string_view name, Tensor* tensor, bool lock_held);
 
   // Returns the named list-valued mutable input in "list", as defined
   // in the OpDef.  If the named input is not list-valued, returns a
@@ -678,7 +680,7 @@ class OpKernelContext {
   // stored in the Tensor buffer may be modified, and modifications
   // will be visible to other Ops reading the same ref tensor.
   // REQUIRES: the named input must be a ref tensor.
-  Status mutable_input_list(StringPiece name, OpMutableInputList* list);
+  Status mutable_input_list(absl::string_view name, OpMutableInputList* list);
 
   // Replace the corresponding Ref Input to use the storage buffer
   // used by tensor. If !lock_held the input mutex will be acquired
@@ -690,7 +692,7 @@ class OpKernelContext {
   // buffer used by tensor. If !lock_held the input mutex will be
   // acquired before returning the Tensor.
   // REQUIRES: IsRefType(input_dtype(index)).
-  Status replace_ref_input(StringPiece name, const Tensor& tensor,
+  Status replace_ref_input(absl::string_view name, const Tensor& tensor,
                            bool lock_held);
 
   // Deletes the Tensor object used as the Ref Input at
@@ -728,8 +730,8 @@ class OpKernelContext {
   bool forward_input_to_output_with_shape(int input_index, int output_index,
                                           const TensorShape& output_shape,
                                           Tensor** output) TF_MUST_USE_RESULT;
-  Status forward_input_to_output_with_shape(StringPiece input_name,
-                                            StringPiece output_name,
+  Status forward_input_to_output_with_shape(absl::string_view input_name,
+                                            absl::string_view output_name,
                                             const TensorShape& output_shape,
                                             Tensor** output) TF_MUST_USE_RESULT;
 
@@ -773,8 +775,8 @@ class OpKernelContext {
       gtl::ArraySlice<int> candidate_input_indices, int output_index,
       const TensorShape& output_shape, Tensor** output) TF_MUST_USE_RESULT;
   Status forward_input_or_allocate_output(
-      gtl::ArraySlice<StringPiece> candidate_input_names,
-      StringPiece output_name, const TensorShape& output_shape,
+      gtl::ArraySlice<absl::string_view> candidate_input_names,
+      absl::string_view output_name, const TensorShape& output_shape,
       Tensor** output) TF_MUST_USE_RESULT;
 
   // Tries to reuse one of the inputs given in input_indices as a temporary.
@@ -796,7 +798,7 @@ class OpKernelContext {
 
   // Returns the named list-valued output in "list", as defined in the OpDef.
   // If the named output is not list-valued, returns a one-element list.
-  Status output_list(StringPiece name, OpOutputList* list);
+  Status output_list(absl::string_view name, OpOutputList* list);
 
   // If output_required(index) returns true, the OpKernel's Compute() method
   // should call allocate_output(index, ...), set_output(index, ...),
@@ -861,7 +863,7 @@ class OpKernelContext {
   // REQUIRES: !IsRefType(expected_output_dtype(index))
   Status allocate_output(int index, const TensorShape& shape,
                          Tensor** tensor) TF_MUST_USE_RESULT;
-  Status allocate_output(StringPiece name, const TensorShape& shape,
+  Status allocate_output(absl::string_view name, const TensorShape& shape,
                          Tensor** tensor) TF_MUST_USE_RESULT;
   // The following methods use the supplied attributes instead of
   // those in output_attr_array. The caller is responsible for
@@ -870,7 +872,7 @@ class OpKernelContext {
   // device. See comment above.
   Status allocate_output(int index, const TensorShape& shape, Tensor** tensor,
                          AllocatorAttributes attr) TF_MUST_USE_RESULT;
-  Status allocate_output(StringPiece name, const TensorShape& shape,
+  Status allocate_output(absl::string_view name, const TensorShape& shape,
                          Tensor** tensor,
                          AllocatorAttributes attr) TF_MUST_USE_RESULT;
 
@@ -913,15 +915,16 @@ class OpKernelContext {
   // index.  REQUIRES: !IsRefType(expected_output_dtype(index))
   // REQUIRES: 'tensor' must have the same MemoryType as
   // output_memory_types[index]. See comment above.
-  Status set_output(StringPiece name, const Tensor& tensor);
+  Status set_output(absl::string_view name, const Tensor& tensor);
 
   // To output a reference.  Caller retains ownership of mu and tensor_for_ref,
   // and they must outlive all uses within the step. See comment above.
   // REQUIRES: IsRefType(expected_output_dtype(index))
-  Status set_output_ref(StringPiece name, mutex* mu, Tensor* tensor_for_ref);
+  Status set_output_ref(absl::string_view name, mutex* mu,
+                        Tensor* tensor_for_ref);
 
   // Returns nullptr if allocate_output() or set_output() have not been called.
-  Status mutable_output(StringPiece name, Tensor** tensor);
+  Status mutable_output(absl::string_view name, Tensor** tensor);
 
   // Records device specific state about how the input tensors were
   // computed.
@@ -1224,7 +1227,7 @@ Status SupportedDeviceTypesForNode(
 
 // Returns a message with a description of the kernels registered for op
 // `op_name`.
-string KernelsRegisteredForOp(StringPiece op_name);
+string KernelsRegisteredForOp(absl::string_view op_name);
 
 // Call once after Op registration has completed.
 Status ValidateKernelRegistrations(const OpRegistryInterface& op_registry);
@@ -1322,7 +1325,7 @@ KernelList GetFilteredRegisteredKernels(
     const std::function<bool(const KernelDef&)>& predicate);
 
 // Gets a list of all registered kernels for a given op
-KernelList GetRegisteredKernelsForOp(StringPiece op_name);
+KernelList GetRegisteredKernelsForOp(absl::string_view op_name);
 
 namespace kernel_factory {
 
@@ -1330,8 +1333,8 @@ class OpKernelRegistrar {
  public:
   typedef OpKernel* (*Factory)(OpKernelConstruction*);
 
-  OpKernelRegistrar(const KernelDef* kernel_def, StringPiece kernel_class_name,
-                    Factory factory) {
+  OpKernelRegistrar(const KernelDef* kernel_def,
+                    absl::string_view kernel_class_name, Factory factory) {
     // Perform the check in the header to allow compile-time optimization
     // to a no-op, allowing the linker to remove the kernel symbols.
     if (kernel_def != nullptr) {
@@ -1340,8 +1343,8 @@ class OpKernelRegistrar {
   }
 
  private:
-  void InitInternal(const KernelDef* kernel_def, StringPiece kernel_class_name,
-                    Factory factory);
+  void InitInternal(const KernelDef* kernel_def,
+                    absl::string_view kernel_class_name, Factory factory);
 };
 
 }  // namespace kernel_factory
@@ -1350,7 +1353,8 @@ class OpKernelRegistrar {
 // Template and inline method implementations, please ignore
 
 template <class T>
-Status OpKernelConstruction::GetAttr(StringPiece attr_name, T* value) const {
+Status OpKernelConstruction::GetAttr(absl::string_view attr_name,
+                                     T* value) const {
   return GetNodeAttr(def(), attr_name, value);
 }
 
@@ -1453,9 +1457,10 @@ inline Status OpKernelContext::forward_input_or_allocate_output(
 }
 
 inline Status OpKernelContext::forward_input_or_allocate_output(
-    gtl::ArraySlice<StringPiece> candidate_input_names, StringPiece output_name,
-    const TensorShape& output_shape, Tensor** output) {
-  for (const StringPiece& input_name : candidate_input_names) {
+    gtl::ArraySlice<absl::string_view> candidate_input_names,
+    absl::string_view output_name, const TensorShape& output_shape,
+    Tensor** output) {
+  for (const absl::string_view& input_name : candidate_input_names) {
     if (forward_input_to_output_with_shape(input_name, output_name,
                                            output_shape, output)
             .ok()) {
diff --git a/tensorflow/core/framework/rendezvous.cc b/tensorflow/core/framework/rendezvous.cc
index e84143f1b9..3d14f221a3 100644
--- a/tensorflow/core/framework/rendezvous.cc
+++ b/tensorflow/core/framework/rendezvous.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
@@ -36,15 +37,15 @@ namespace tensorflow {
 Rendezvous::ParsedKey& Rendezvous::ParsedKey::operator=(const ParsedKey& b) {
   const char* b_base = b.buf_.data();
   buf_ = b.buf_;
-  src_device = StringPiece(buf_.data() + (b.src_device.data() - b_base),
-                           b.src_device.size());
+  src_device = absl::string_view(buf_.data() + (b.src_device.data() - b_base),
+                                 b.src_device.size());
   src = b.src;
   src_incarnation = b.src_incarnation;
-  dst_device = StringPiece(buf_.data() + (b.dst_device.data() - b_base),
-                           b.dst_device.size());
+  dst_device = absl::string_view(buf_.data() + (b.dst_device.data() - b_base),
+                                 b.dst_device.size());
   dst = b.dst;
-  edge_name = StringPiece(buf_.data() + (b.edge_name.data() - b_base),
-                          b.edge_name.size());
+  edge_name = absl::string_view(buf_.data() + (b.edge_name.data() - b_base),
+                                b.edge_name.size());
   return *this;
 }
 
@@ -68,22 +69,22 @@ string Rendezvous::CreateKey(const string& src_device, uint64 src_incarnation,
 // Return the prefix of "*s" up to the next occurrence of "delim", or
 // the whole remaining string if "delim" is not found.  "*s" is advanced
 // past the string returned plus the delimiter (if found).
-static StringPiece ConsumeNextPart(StringPiece* s, char delim) {
+static absl::string_view ConsumeNextPart(absl::string_view* s, char delim) {
   for (size_t offset = 0; offset < s->size(); offset++) {
     if ((*s)[offset] == delim) {
-      StringPiece result(s->data(), offset);
+      absl::string_view result(s->data(), offset);
       s->remove_prefix(offset + 1);  // +1: remove delim, as well
       return result;
     }
   }
   // No delimiter found: return rest of string
-  StringPiece result(s->data(), s->size());
+  absl::string_view result(s->data(), s->size());
   s->remove_prefix(s->size());
   return result;
 }
 
 /* static */
-Status Rendezvous::ParseKey(StringPiece key, ParsedKey* out) {
+Status Rendezvous::ParseKey(absl::string_view key, ParsedKey* out) {
   if (key.data() == out->buf_.data()) {
     // Caller used our buf_ string directly, so we don't need to copy.  (The
     // SendOp and RecvOp implementations do this, for example).
@@ -93,8 +94,8 @@ Status Rendezvous::ParseKey(StringPiece key, ParsedKey* out) {
     // for the lifetime of the ParsedKey object.
     out->buf_.assign(key.data(), key.size());
   }
-  StringPiece s(out->buf_);
-  StringPiece parts[5];
+  absl::string_view s(out->buf_);
+  absl::string_view parts[5];
   for (int i = 0; i < 5; i++) {
     parts[i] = ConsumeNextPart(&s, ';');
   }
@@ -104,9 +105,9 @@ Status Rendezvous::ParseKey(StringPiece key, ParsedKey* out) {
       strings::HexStringToUint64(parts[1], &out->src_incarnation) &&
       DeviceNameUtils::ParseFullName(parts[2], &out->dst) &&
       !parts[3].empty()) {
-    out->src_device = StringPiece(parts[0].data(), parts[0].size());
-    out->dst_device = StringPiece(parts[2].data(), parts[2].size());
-    out->edge_name = StringPiece(parts[3].data(), parts[3].size());
+    out->src_device = absl::string_view(parts[0].data(), parts[0].size());
+    out->dst_device = absl::string_view(parts[2].data(), parts[2].size());
+    out->edge_name = absl::string_view(parts[3].data(), parts[3].size());
     return Status::OK();
   }
   return errors::InvalidArgument("Invalid  rendezvous key: ", key);
@@ -277,7 +278,7 @@ class LocalRendezvousImpl : public Rendezvous {
   };
 
   // We key the hash table by KeyHash of the Rendezvous::CreateKey string
-  static uint64 KeyHash(const StringPiece& k) {
+  static uint64 KeyHash(const absl::string_view& k) {
     return Hash64(k.data(), k.size());
   }
 
diff --git a/tensorflow/core/framework/rendezvous.h b/tensorflow/core/framework/rendezvous.h
index 01e43e44e3..1e796a443d 100644
--- a/tensorflow/core/framework/rendezvous.h
+++ b/tensorflow/core/framework/rendezvous.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/control_flow.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -60,18 +61,18 @@ class Rendezvous : public core::RefCounted {
   // Parses the key constructed by CreateKey and parse src/dst device
   // names into structures respectively.
   struct ParsedKey {
-    StringPiece src_device;
+    absl::string_view src_device;
     DeviceNameUtils::ParsedName src;
     uint64 src_incarnation = 0;
-    StringPiece dst_device;
+    absl::string_view dst_device;
     DeviceNameUtils::ParsedName dst;
-    StringPiece edge_name;
+    absl::string_view edge_name;
 
     ParsedKey() {}
     ParsedKey(const ParsedKey& b) { *this = b; }
 
     ParsedKey& operator=(const ParsedKey& b);
-    StringPiece FullKey() const { return buf_; }
+    absl::string_view FullKey() const { return buf_; }
 
    private:
     friend class Rendezvous;
@@ -79,7 +80,7 @@ class Rendezvous : public core::RefCounted {
     friend class RecvOp;
     string buf_;
   };
-  static Status ParseKey(StringPiece key, ParsedKey* out);
+  static Status ParseKey(absl::string_view key, ParsedKey* out);
 
   // The caller is a tensor producer and it sends a message (a tensor
   // "val" and a bool "is_dead") under the given "key".
diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc
index 508a8d3149..31a5f8bd6d 100644
--- a/tensorflow/core/framework/resource_mgr.cc
+++ b/tensorflow/core/framework/resource_mgr.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/resource_mgr.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -223,7 +224,7 @@ Status ResourceMgr::Cleanup(const string& container) {
   return Status::OK();
 }
 
-static bool IsValidContainerName(StringPiece s) {
+static bool IsValidContainerName(absl::string_view s) {
   using ::tensorflow::strings::Scanner;
   return Scanner(s)
       .One(Scanner::LETTER_DIGIT_DOT)
@@ -275,7 +276,7 @@ const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
   return ctx->input(input).flat<ResourceHandle>()(0);
 }
 
-Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
+Status HandleFromInput(OpKernelContext* ctx, absl::string_view input,
                        ResourceHandle* handle) {
   const Tensor* tensor;
   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h
index 4a531648d9..703e2b6922 100644
--- a/tensorflow/core/framework/resource_mgr.h
+++ b/tensorflow/core/framework/resource_mgr.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <typeinfo>
 #include <unordered_map>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_handle.h"
@@ -244,7 +245,7 @@ ResourceHandle MakePerStepResourceHandle(OpKernelContext* ctx,
 
 // Returns a resource handle from a numbered op input.
 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input);
-Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
+Status HandleFromInput(OpKernelContext* ctx, absl::string_view input,
                        ResourceHandle* handle);
 
 // Create a resource pointed by a given resource handle.
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 4dcc80680f..174b2d6bdf 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/shape_inference.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb_text.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -172,7 +173,7 @@ Status InferenceContext::Run(
   return s;
 }
 
-Status InferenceContext::set_output(StringPiece output_name,
+Status InferenceContext::set_output(absl::string_view output_name,
                                     const std::vector<ShapeHandle>& shapes) {
   auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
@@ -191,7 +192,7 @@ Status InferenceContext::set_output(StringPiece output_name,
   return Status::OK();
 }
 
-Status InferenceContext::input(StringPiece input_name,
+Status InferenceContext::input(absl::string_view input_name,
                                std::vector<ShapeHandle>* output) const {
   const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
@@ -205,7 +206,7 @@ Status InferenceContext::input(StringPiece input_name,
   return Status::OK();
 }
 
-Status InferenceContext::output(StringPiece output_name,
+Status InferenceContext::output(absl::string_view output_name,
                                 std::vector<ShapeHandle>* output) const {
   const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index e3885b7d9e..7fe27d1d9f 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -288,7 +289,8 @@ class InferenceContext {
   void SetInput(int idx, ShapeHandle shape) { inputs_[idx] = shape; }
 
   ShapeHandle input(int64 idx) const { return inputs_[idx]; }
-  Status input(StringPiece input_name, std::vector<ShapeHandle>* output) const;
+  Status input(absl::string_view input_name,
+               std::vector<ShapeHandle>* output) const;
   int num_inputs() const { return inputs_.size(); }
 
   // Returns the input tensor at index <idx>, or nullptr if the input tensor is
@@ -325,12 +327,12 @@ class InferenceContext {
 
   ShapeHandle output(int64 idx) const { return outputs_.at(idx); }
   void set_output(int idx, ShapeHandle shape) { outputs_.at(idx) = shape; }
-  Status set_output(StringPiece output_name,
+  Status set_output(absl::string_view output_name,
                     const std::vector<ShapeHandle>& shapes);
 
   int num_outputs() const { return outputs_.size(); }
   ShapeHandle output(int idx) const { return outputs_.at(idx); }
-  Status output(StringPiece output_name,
+  Status output(absl::string_view output_name,
                 std::vector<ShapeHandle>* output) const;
 
   AttrSlice attrs() const { return AttrSlice(*node_def_); }
@@ -525,7 +527,7 @@ class InferenceContext {
   // set *value to its value.  If no attr with attr_name is found in def(), or
   // the attr does not have a matching type, a non-ok status will be returned.
   template <class T>
-  Status GetAttr(StringPiece attr_name, T* value) const;
+  Status GetAttr(absl::string_view attr_name, T* value) const;
 
   // Returns in <out> the result of dividing <dividend> by <divisor>.
   // Returns an error if <divisor>  is not positive or if <evenly_divisible>
@@ -816,7 +818,7 @@ inline DimensionOrConstant::DimensionOrConstant(int64 val) : val(val) {
 }
 
 template <class T>
-Status InferenceContext::GetAttr(StringPiece attr_name, T* value) const {
+Status InferenceContext::GetAttr(absl::string_view attr_name, T* value) const {
   return GetNodeAttr(*node_def_, attr_name, value);
 }
 
diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc
index b54dd220ab..214a4aee44 100644
--- a/tensorflow/core/framework/shape_inference_testutil.cc
+++ b/tensorflow/core/framework/shape_inference_testutil.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/shape_inference_testutil.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -86,7 +87,7 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
                    " but should list ", num_outputs);
   }
   for (int i = 0; i < num_outputs; ++i) {
-    StringPiece expected(expected_outs_v[i]);
+    absl::string_view expected(expected_outs_v[i]);
     shape_inference::ShapeHandle out = c.output(i);
 
     string err_prefix = strings::StrCat("Output ", i);
@@ -153,7 +154,7 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
     }
     for (int j = 0; j < expected_dims.size(); ++j) {
       err_prefix = strings::StrCat("Output dim ", i, ",", j);
-      StringPiece expected_dim(expected_dims[j]);
+      absl::string_view expected_dim(expected_dims[j]);
       DimensionHandle out_dim = c.Dim(out, j);
 
       std::pair<int, int> in_dim_idx(-1, -1);
@@ -245,7 +246,7 @@ Status ShapeInferenceTestutil::MakeShapeFromString(
       dims.push_back(manager->MakeDim(InferenceContext::kUnknownDim));
     } else {
       scanner.RestartCapture().Many(strings::Scanner::DIGIT);
-      StringPiece match;
+      absl::string_view match;
       int64 dim_size = 0;
 
       if (!scanner.GetResult(nullptr, &match) ||
diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h
index bb4dc25da4..deba5bb6f1 100644
--- a/tensorflow/core/framework/shape_inference_testutil.h
+++ b/tensorflow/core/framework/shape_inference_testutil.h
@@ -16,10 +16,10 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_SHAPE_INFERENCE_TESTUTIL_H_
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/version.h"
@@ -32,7 +32,7 @@ class Tensor;
 
 struct ShapeInferenceTestOp {
   typedef std::pair<string, DataType> ShapeAndType;
-  explicit ShapeInferenceTestOp(StringPiece name) : name(string(name)) {}
+  explicit ShapeInferenceTestOp(absl::string_view name) : name(string(name)) {}
   string name;
   NodeDef node_def;
   std::vector<const Tensor*> input_tensors;
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 1dea6da911..50b39eb2ff 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -29,6 +29,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/tensor.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/resource_handle.pb.h"
@@ -133,8 +134,8 @@ struct Helper {
   template <typename Destination>
   static void Encode(TensorBuffer* in, int64 n, Destination* out) {
     DCHECK_EQ(in->size(), sizeof(T) * n);
-    port::AssignRefCounted(StringPiece(in->base<const char>(), in->size()), in,
-                           out);
+    port::AssignRefCounted(
+        absl::string_view(in->base<const char>(), in->size()), in, out);
   }
 
   // Decoder of simple type T. Copy the bytes from "in" into the
@@ -1148,9 +1149,10 @@ string Tensor::SummarizeValue(int64 max_entries, bool print_v2) const {
   }
 }
 
-StringPiece Tensor::tensor_data() const {
-  if (buf_ == nullptr) return StringPiece();  // Don't die for empty tensors
-  return StringPiece(static_cast<char*>(buf_->data()), TotalBytes());
+absl::string_view Tensor::tensor_data() const {
+  if (buf_ == nullptr)
+    return absl::string_view();  // Don't die for empty tensors
+  return absl::string_view(static_cast<char*>(buf_->data()), TotalBytes());
 }
 
 bool Tensor::SharesBufferWith(const Tensor& b) const {
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index d0f9eb56e2..c753b12698 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_
 #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_
 
+#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -24,7 +25,6 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -469,7 +469,7 @@ class Tensor {
   /// not get destroyed while the `StringPiece` is still used.
   ///
   /// REQUIRES: `DataTypeCanUseMemcpy(dtype())`.
-  StringPiece tensor_data() const;
+  absl::string_view tensor_data() const;
 
   /// Copy the other tensor into this tensor and reshape it and reinterpret the
   /// buffer's datatype.
diff --git a/tensorflow/core/framework/tensor_util.cc b/tensorflow/core/framework/tensor_util.cc
index 65f6dc1c00..37ab67fcbc 100644
--- a/tensorflow/core/framework/tensor_util.cc
+++ b/tensorflow/core/framework/tensor_util.cc
@@ -16,9 +16,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace tensor {
@@ -27,12 +27,12 @@ Tensor DeepCopy(const Tensor& other) {
   Tensor tmp = Tensor(other.dtype(), other.shape());
   if (DataTypeCanUseMemcpy(other.dtype())) {
     if (other.NumElements() > 0) {
-      StringPiece other_data = other.tensor_data();
+      absl::string_view other_data = other.tensor_data();
 
       // We use StringPiece as a convenient map over the tensor buffer,
       // but we cast the type to get to the underlying buffer to do the
       // copy.
-      StringPiece tmp_data = tmp.tensor_data();
+      absl::string_view tmp_data = tmp.tensor_data();
       memcpy(const_cast<char*>(tmp_data.data()), other_data.data(),
              other_data.size());
     }
@@ -72,12 +72,12 @@ Status Concat(const gtl::ArraySlice<Tensor>& tensors, Tensor* result) {
   // We use StringPiece as a convenient map over the tensor buffer,
   // but we cast the type to get to the underlying buffer to do the
   // copy.
-  StringPiece to_data = result->tensor_data();
+  absl::string_view to_data = result->tensor_data();
 
   if (DataTypeCanUseMemcpy(dtype)) {
     int64 offset = 0;
     for (const Tensor& tensor : tensors) {
-      StringPiece from_data = tensor.tensor_data();
+      absl::string_view from_data = tensor.tensor_data();
       CHECK_LE(offset + from_data.size(), to_data.size());
       memcpy(const_cast<char*>(to_data.data()) + offset, from_data.data(),
              from_data.size());
@@ -121,7 +121,7 @@ Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
         "'tensor'");
   }
 
-  StringPiece from_data = tensor.tensor_data();
+  absl::string_view from_data = tensor.tensor_data();
 
   if (DataTypeCanUseMemcpy(tensor.dtype())) {
     int64 offset = 0;
@@ -134,7 +134,7 @@ Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
       // We use StringPiece as a convenient map over the tensor buffer,
       // but we cast the type to get to the underlying buffer to do the
       // copy.
-      StringPiece to_data = split->tensor_data();
+      absl::string_view to_data = split->tensor_data();
       CHECK_LE(offset + to_data.size(), from_data.size());
       memcpy(const_cast<char*>(to_data.data()), from_data.data() + offset,
              to_data.size());
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index 2280114de5..944555f683 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/types.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -113,7 +114,7 @@ string DataTypeString(DataType dtype) {
   return DataTypeStringInternal(dtype);
 }
 
-bool DataTypeFromString(StringPiece sp, DataType* dt) {
+bool DataTypeFromString(absl::string_view sp, DataType* dt) {
   if (str_util::EndsWith(sp, "_ref")) {
     sp.remove_suffix(4);
     DataType non_ref;
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index 2e96b05787..e07877aa09 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <set>
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 // Disable clang-format to prevent 'FixedPoint' header from being included
 // before 'Tensor' header on which it depends.
@@ -30,7 +31,6 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
@@ -56,7 +56,8 @@ class DeviceType {
   DeviceType(const char* type)  // NOLINT(runtime/explicit)
       : type_(type) {}
 
-  explicit DeviceType(StringPiece type) : type_(type.data(), type.size()) {}
+  explicit DeviceType(absl::string_view type)
+      : type_(type.data(), type.size()) {}
 
   const char* type() const { return type_.c_str(); }
   const string& type_string() const { return type_; }
@@ -228,7 +229,7 @@ class DataTypeSet {
 
 // If "sp" names a valid type, store it in "*dt" and return true.  Otherwise,
 // return false.
-bool DataTypeFromString(StringPiece sp, DataType* dt);
+bool DataTypeFromString(absl::string_view sp, DataType* dt);
 
 constexpr inline DataTypeSet ToSet(DataType dt) {
   return DataTypeSet(1u << static_cast<uint32>(dt));
diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc
index ef5b240aea..f089114919 100644
--- a/tensorflow/core/framework/variant_op_registry.cc
+++ b/tensorflow/core/framework/variant_op_registry.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/type_index.h"
 #include "tensorflow/core/framework/variant.h"
@@ -89,7 +90,7 @@ REGISTER_VARIANT_SHAPE_TYPE(double);
 #undef REGISTER_VARIANT_SHAPE_TYPE
 
 UnaryVariantOpRegistry::VariantDecodeFn* UnaryVariantOpRegistry::GetDecodeFn(
-    StringPiece type_name) {
+    absl::string_view type_name) {
   auto found = decode_fns.find(type_name);
   if (found == decode_fns.end()) return nullptr;
   return &found->second;
@@ -102,7 +103,7 @@ void UnaryVariantOpRegistry::RegisterDecodeFn(
   CHECK_EQ(existing, nullptr)
       << "Unary VariantDecodeFn for type_name: " << type_name
       << " already registered";
-  decode_fns.insert(std::pair<StringPiece, VariantDecodeFn>(
+  decode_fns.insert(std::pair<absl::string_view, VariantDecodeFn>(
       GetPersistentStringPiece(type_name), decode_fn));
 }
 
@@ -179,7 +180,7 @@ Status VariantDeviceCopy(
 
 // Special casing UnaryOpFn per op and per device.
 UnaryVariantOpRegistry::VariantUnaryOpFn* UnaryVariantOpRegistry::GetUnaryOpFn(
-    VariantUnaryOp op, StringPiece device, const TypeIndex& type_index) {
+    VariantUnaryOp op, absl::string_view device, const TypeIndex& type_index) {
   auto found = unary_op_fns.find({op, device, type_index});
   if (found == unary_op_fns.end()) return nullptr;
   return &found->second;
@@ -221,7 +222,8 @@ REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool);
 
 // Special casing BinaryOpFn per op and per device.
 UnaryVariantOpRegistry::VariantBinaryOpFn*
-UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op, StringPiece device,
+UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op,
+                                      absl::string_view device,
                                       const TypeIndex& type_index) {
   auto found = binary_op_fns.find({op, device, type_index});
   if (found == binary_op_fns.end()) return nullptr;
diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h
index 7eb37e859f..dbbd6cebfc 100644
--- a/tensorflow/core/framework/variant_op_registry.h
+++ b/tensorflow/core/framework/variant_op_registry.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <unordered_set>
 #include <vector>
+#include "absl/strings/string_view.h"
 
 #define EIGEN_USE_THREADS
 
@@ -105,7 +106,7 @@ class UnaryVariantOpRegistry {
                         const VariantDecodeFn& decode_fn);
 
   // Returns nullptr if no decode function was found for the given TypeName.
-  VariantDecodeFn* GetDecodeFn(StringPiece type_name);
+  VariantDecodeFn* GetDecodeFn(absl::string_view type_name);
 
   // Add a copy-to-GPU function to the registry.
   void RegisterDeviceCopyFn(const VariantDeviceCopyDirection direction,
@@ -124,7 +125,7 @@ class UnaryVariantOpRegistry {
 
   // Returns nullptr if no unary op function was found for the given
   // op, device, and TypeName.
-  VariantUnaryOpFn* GetUnaryOpFn(VariantUnaryOp op, StringPiece device,
+  VariantUnaryOpFn* GetUnaryOpFn(VariantUnaryOp op, absl::string_view device,
                                  const TypeIndex& type_index);
 
   // Add a binary op function to the registry.
@@ -134,7 +135,7 @@ class UnaryVariantOpRegistry {
 
   // Returns nullptr if no binary op function was found for the given
   // op, device and TypeName.
-  VariantBinaryOpFn* GetBinaryOpFn(VariantBinaryOp op, StringPiece device,
+  VariantBinaryOpFn* GetBinaryOpFn(VariantBinaryOp op, absl::string_view device,
                                    const TypeIndex& type_index);
 
   // Get a pointer to a global UnaryVariantOpRegistry object
@@ -155,7 +156,8 @@ class UnaryVariantOpRegistry {
   };
 
   gtl::FlatMap<TypeIndex, VariantShapeFn, TypeIndexHash> shape_fns;
-  gtl::FlatMap<StringPiece, VariantDecodeFn, StringPieceHasher> decode_fns;
+  gtl::FlatMap<absl::string_view, VariantDecodeFn, StringPieceHasher>
+      decode_fns;
 
   // Map std::pair<Direction, type_name> to function.
   struct PairHash {
@@ -179,10 +181,11 @@ class UnaryVariantOpRegistry {
   // and references therein
   template <typename Op>
   struct FuncTuple {
-    FuncTuple(const Op& op, const StringPiece& dev, const TypeIndex& type_index)
+    FuncTuple(const Op& op, const absl::string_view& dev,
+              const TypeIndex& type_index)
         : op_type_(op), device_(dev), type_index_(type_index) {}
     Op op_type_;
-    StringPiece device_;
+    absl::string_view device_;
     TypeIndex type_index_;
   };
   // friend declaration for operator==
@@ -192,7 +195,7 @@ class UnaryVariantOpRegistry {
   struct TupleHash {
     template <typename Op>
     std::size_t operator()(
-        const std::tuple<Op, StringPiece, TypeIndex>& x) const {
+        const std::tuple<Op, absl::string_view, TypeIndex>& x) const {
       // The hash of an enum is just its value as a std::size_t.
       std::size_t ret = static_cast<std::size_t>(std::get<0>(x));
       ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x)));
@@ -218,14 +221,14 @@ class UnaryVariantOpRegistry {
   // Find or insert a string into a persistent string storage
   // container; return the StringPiece pointing to the permanent string
   // location.
-  static StringPiece GetPersistentStringPiece(const string& str) {
+  static absl::string_view GetPersistentStringPiece(const string& str) {
     const auto string_storage = PersistentStringStorage();
     auto found = string_storage->find(str);
     if (found == string_storage->end()) {
       auto inserted = string_storage->insert(str);
-      return StringPiece(*inserted.first);
+      return absl::string_view(*inserted.first);
     } else {
-      return StringPiece(*found);
+      return absl::string_view(*found);
     }
   }
 };
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index 2d94dd5cdc..102cbb18b2 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -19,18 +19,18 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/cost_graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/types.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
-typedef std::unordered_map<StringPiece, int32, StringPieceHasher>
+typedef std::unordered_map<absl::string_view, int32, StringPieceHasher>
     NodeNameToCostIdMap;
 
 class StepStats;
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index bc0a6ae346..91b0cdab8e 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -549,7 +550,7 @@ Status Graph::AddFunctionLibrary(const FunctionDefLibrary& fdef_lib) {
 
 namespace {
 
-void AddInput(NodeDef* dst, StringPiece src_name, int src_slot) {
+void AddInput(NodeDef* dst, absl::string_view src_name, int src_slot) {
   if (src_slot == Graph::kControlSlot) {
     dst->add_input(strings::StrCat("^", src_name));
   } else if (src_slot == 0) {
@@ -635,7 +636,7 @@ void Graph::ToGraphDefSubRange(GraphDef* graph_def, int from_node_id) const {
   }
 }
 
-string Graph::NewName(StringPiece prefix) {
+string Graph::NewName(absl::string_view prefix) {
   return strings::StrCat(prefix, "/_", name_counter_++);
 }
 
@@ -729,7 +730,7 @@ int Graph::InternDeviceName(const string& device_name) {
   return index;
 }
 
-Status Graph::AddWhileContext(StringPiece frame_name,
+Status Graph::AddWhileContext(absl::string_view frame_name,
                               std::vector<Node*> enter_nodes,
                               std::vector<Node*> exit_nodes,
                               OutputTensor cond_output,
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 027ab522ed..1993d073de 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -40,6 +40,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
@@ -534,7 +535,7 @@ class Graph {
 
   // Generate new node name with the specified prefix that is unique
   // across this graph.
-  string NewName(StringPiece prefix);
+  string NewName(absl::string_view prefix);
 
   // Access to the list of all nodes.  Example usage:
   //   for (Node* node : graph.nodes()) { ... }
@@ -607,7 +608,8 @@ class Graph {
   // Create and return a new WhileContext owned by this graph. This is called
   // when a new while loop is created. `frame_name` must be unique among
   // WhileContexts in this graph.
-  Status AddWhileContext(StringPiece frame_name, std::vector<Node*> enter_nodes,
+  Status AddWhileContext(absl::string_view frame_name,
+                         std::vector<Node*> enter_nodes,
                          std::vector<Node*> exit_nodes,
                          OutputTensor cond_output,
                          std::vector<OutputTensor> body_inputs,
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index eeb5c14eaa..d70fb6a182 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -53,7 +54,7 @@ inline bool IsNextIteration(const NodeDef& node_def) {
          node_def.op() == "RefNextIteration";
 }
 
-bool IsValidNodeName(StringPiece s, bool allow_internal_ops) {
+bool IsValidNodeName(absl::string_view s, bool allow_internal_ops) {
   using ::tensorflow::strings::Scanner;
   return Scanner(s)
       .One(allow_internal_ops ? Scanner::LETTER_DIGIT_DOT_UNDERSCORE
@@ -217,15 +218,15 @@ class GraphConstructor {
 
   // Returns true if `name` already exists in `g_` (either as a node name or
   // prefix).
-  bool NameExistsInGraph(StringPiece name);
+  bool NameExistsInGraph(absl::string_view name);
 
   // Returns true if `name` already exists in the GraphDef being imported
   // (either as a node name or prefix).
-  bool NameExistsInGraphDef(StringPiece name);
+  bool NameExistsInGraphDef(absl::string_view name);
 
   // Returns a unique version of `original_name`, or `original_name` if it's
   // already unique in the graph.
-  string FindUniqueName(StringPiece original_name);
+  string FindUniqueName(absl::string_view original_name);
 
   // Decrement pending count for users of `processed` and add the ones that now
   // have all of their pending inputs satisfied to `ready_`.
@@ -267,16 +268,18 @@ class GraphConstructor {
   };
   // TODO(vrv): Profile this data structure to see if we should use an
   // alternative implementation of std::unordered_map.
-  std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
+  std::unordered_map<absl::string_view, NodeInfo, StringPieceHasher>
+      gdef_nodes_;
 
   // Prefixes already used in the GraphDef being imported.
-  std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
+  std::unordered_set<absl::string_view, StringPieceHasher> gdef_prefixes_;
 
   // Mapping from node name to the existing node in g_.
-  std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
+  std::unordered_map<absl::string_view, Node*, StringPieceHasher>
+      existing_nodes_;
 
   // Prefixes already used in the graph.
-  std::unordered_set<StringPiece, StringPieceHasher> existing_prefixes_;
+  std::unordered_set<absl::string_view, StringPieceHasher> existing_prefixes_;
 
   // Imported node names that have been uniquified. The key is the original
   // name, the value is the new unique name.
@@ -341,7 +344,7 @@ void GraphConstructor::UpdatePendingCountAndReady(int processed) {
 // This could be expensive but we don't expect to call it often, if at all (only
 // if there are multiple nodes in g_ with the same name)
 bool NodeNameInValues(const std::map<TensorId, TensorId>& input_map,
-                      const StringPiece& node_name) {
+                      const absl::string_view& node_name) {
   for (auto iter = input_map.begin(); iter != input_map.end(); ++iter) {
     if (iter->second.first == node_name) return true;
   }
@@ -349,17 +352,18 @@ bool NodeNameInValues(const std::map<TensorId, TensorId>& input_map,
 }
 
 bool NodeNameInValues(const std::vector<string>& control_dependencies,
-                      const StringPiece& node_name) {
+                      const absl::string_view& node_name) {
   return std::find(control_dependencies.begin(), control_dependencies.end(),
                    node_name) != control_dependencies.end();
 }
 
 // Adds any prefixes of `node_name` (not including the full name itself) to
 // `prefixes`.
-void AddPrefixes(StringPiece node_name,
-                 std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
+void AddPrefixes(
+    absl::string_view node_name,
+    std::unordered_set<absl::string_view, StringPieceHasher>* prefixes) {
   size_t idx = -1;
-  while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
+  while ((idx = node_name.find('/', idx + 1)) != absl::string_view::npos) {
     prefixes->insert(node_name.substr(0, idx));
   }
 }
@@ -393,7 +397,7 @@ Status GraphConstructor::EnsureNoNameCollisions() {
       }
     }
   } else if (!prefix_.empty()) {
-    StringPiece prefix_no_slash(prefix_);
+    absl::string_view prefix_no_slash(prefix_);
     prefix_no_slash.remove_suffix(1);
     if (!IsValidNodeName(prefix_no_slash, false)) {
       return errors::InvalidArgument("Imported node name prefix '", prefix_,
@@ -443,7 +447,8 @@ Status GraphConstructor::BuildNodeIndex() {
           "': Node name contains invalid characters");
     }
     if (!gdef_nodes_
-             .insert(std::make_pair(StringPiece(node_def.name()), NodeInfo(n)))
+             .insert(std::make_pair(absl::string_view(node_def.name()),
+                                    NodeInfo(n)))
              .second) {
       return errors::InvalidArgument("Node '", node_def.name(),
                                      "' is not unique");
@@ -460,7 +465,7 @@ Status GraphConstructor::BuildNodeIndex() {
     // Validate control edges at end
     bool in_control_dependence = false;
     for (int i = 0; i < node_def.input_size(); ++i) {
-      StringPiece input_name = node_def.input(i);
+      absl::string_view input_name = node_def.input(i);
       if (!input_name.empty() && str_util::StartsWith(input_name, "^")) {
         in_control_dependence = true;
       } else if (in_control_dependence) {
@@ -508,7 +513,7 @@ Status GraphConstructor::InitFromEdges() {
       int32 num_control_edges = 0;
       bool has_loop_back_edge = false;
       for (int i = 0; i < node_def.input_size(); ++i) {
-        StringPiece input_name(node_def.input(i));
+        absl::string_view input_name(node_def.input(i));
         if (str_util::StartsWith(input_name, "^")) {
           num_control_edges++;
         } else {
@@ -524,7 +529,7 @@ Status GraphConstructor::InitFromEdges() {
       }
     }
     for (int i = 0; i < node_def.input_size(); ++i) {
-      StringPiece input_name = node_def.input(i);
+      absl::string_view input_name = node_def.input(i);
       TensorId id(ParseTensorName(input_name));
       if (opts_.input_map.count(id) == 0) {
         // If an input is not mapped, then the input should appear in the graph
@@ -558,7 +563,7 @@ Status GraphConstructor::ValidateColocationConstraints(
   const auto iter = node_def.attr().find(kColocationAttrName);
   if (iter == node_def.attr().end()) return Status::OK();
   for (const string& c : iter->second.list().s()) {
-    StringPiece s(c);
+    absl::string_view s(c);
     if (str_util::ConsumePrefix(&s, kColocationGroupPrefix) &&
         gdef_nodes_.find(s) == gdef_nodes_.end()) {
       return errors::InvalidArgument(
@@ -797,7 +802,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     // Skip remapped inputs (which already exist in g_ and are not being
     // imported).
     if (input_already_exists[i]) continue;
-    StringPiece input(node_def->input(i));
+    absl::string_view input(node_def->input(i));
     if (str_util::ConsumePrefix(&input, "^")) {
       node_def->set_input(i, strings::StrCat("^", prefix_, input));
     } else {
@@ -809,7 +814,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     auto* list =
         node_def->mutable_attr()->at(kColocationAttrName).mutable_list();
     for (int i = 0; i < list->s_size(); ++i) {
-      StringPiece v(list->s(i));
+      absl::string_view v(list->s(i));
       if (str_util::ConsumePrefix(&v, kColocationGroupPrefix)) {
         list->set_s(i, strings::StrCat(kColocationGroupPrefix, prefix_, v));
       }
@@ -852,7 +857,7 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
     if (!status.ok()) continue;
     bool updated = false;
     for (int i = 0; i < coloc_values.size(); ++i) {
-      StringPiece val(coloc_values[i]);
+      absl::string_view val(coloc_values[i]);
       if (str_util::ConsumePrefix(&val, kColocationGroupPrefix)) {
         const auto& name_pair = uniquified_names_.find(string(val));
         if (name_pair == uniquified_names_.end()) continue;
@@ -867,19 +872,19 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
   }
 }
 
-bool GraphConstructor::NameExistsInGraph(StringPiece name) {
+bool GraphConstructor::NameExistsInGraph(absl::string_view name) {
   if (existing_nodes_.find(name) != existing_nodes_.end()) return true;
   if (existing_prefixes_.find(name) != existing_prefixes_.end()) return true;
   return false;
 }
 
-bool GraphConstructor::NameExistsInGraphDef(StringPiece name) {
+bool GraphConstructor::NameExistsInGraphDef(absl::string_view name) {
   if (gdef_nodes_.find(name) != gdef_nodes_.end()) return true;
   if (gdef_prefixes_.find(name) != gdef_prefixes_.end()) return true;
   return false;
 }
 
-string GraphConstructor::FindUniqueName(StringPiece original_name) {
+string GraphConstructor::FindUniqueName(absl::string_view original_name) {
   string name(original_name);
   int count = 0;
   // Check that any generated names don't collide with imported NodeDefs (as
@@ -1132,7 +1137,7 @@ Status GraphConstructor::PopulateReturnTensors() {
 
 Status GraphConstructor::PopulateReturnNodes() {
   if (opts_.return_nodes.empty()) return Status::OK();
-  for (StringPiece name : opts_.return_nodes) {
+  for (absl::string_view name : opts_.return_nodes) {
     auto iter = gdef_nodes_.find(name);
     if (iter == gdef_nodes_.end()) {
       return errors::InvalidArgument("Requested return node '", name,
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 3eef6bd2bd..395e88a59d 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -155,7 +156,7 @@ class GraphConstructorTest : public ::testing::Test {
              "value for the _class attribute. Update it and its callers";
       return "";
     }
-    StringPiece loc(value[0]);
+    absl::string_view loc(value[0]);
     return str_util::ConsumePrefix(&loc, kColocationGroupPrefix) ? string(loc)
                                                                  : "";
   }
diff --git a/tensorflow/core/graph/graph_def_builder.cc b/tensorflow/core/graph/graph_def_builder.cc
index 6d5df7efba..02256ae6f9 100644
--- a/tensorflow/core/graph/graph_def_builder.cc
+++ b/tensorflow/core/graph/graph_def_builder.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/errors.h"
 
@@ -27,11 +28,11 @@ GraphDefBuilder::Options::Options(Graph* graph, Status* status)
 GraphDefBuilder::Options::~Options() {}
 
 GraphDefBuilder::Options GraphDefBuilder::Options::WithName(
-    StringPiece name) const {
+    absl::string_view name) const {
   return Options(*this).WithNameImpl(name);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithDevice(
-    StringPiece device) const {
+    absl::string_view device) const {
   return Options(*this).WithDeviceImpl(device);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInput(
@@ -43,12 +44,12 @@ GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInputs(
   return Options(*this).WithControlInputsImpl(control_inputs);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithNameImpl(
-    StringPiece name) {
+    absl::string_view name) {
   name_ = string(name);
   return *this;
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithDeviceImpl(
-    StringPiece device) {
+    absl::string_view device) {
   device_ = string(device);
   return *this;
 }
@@ -71,7 +72,7 @@ Status GraphDefBuilder::ToGraphDef(GraphDef* graph_def) const {
   return status_;
 }
 
-string GraphDefBuilder::Options::GetNameForOp(StringPiece op) const {
+string GraphDefBuilder::Options::GetNameForOp(absl::string_view op) const {
   if (name_.empty()) return graph_->NewName(op);
   return name_;
 }
diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h
index 400d8b6c84..e0528742d4 100644
--- a/tensorflow/core/graph/graph_def_builder.h
+++ b/tensorflow/core/graph/graph_def_builder.h
@@ -17,12 +17,12 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPH_GRAPH_DEF_BUILDER_H_
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -76,19 +76,19 @@ class GraphDefBuilder {
 
     // Methods for setting options.  These are const methods: they
     // return a copy of *this with the option set.
-    Options WithName(StringPiece name) const;
-    Options WithDevice(StringPiece device) const;
+    Options WithName(absl::string_view name) const;
+    Options WithDevice(absl::string_view device) const;
     Options WithControlInput(Node* control_input) const;
     Options WithControlInputs(gtl::ArraySlice<Node*> control_inputs) const;
 
     // Override the default value for an optional attr.
     template <class T>
-    Options WithAttr(StringPiece attr_name, T&& value) const {
+    Options WithAttr(absl::string_view attr_name, T&& value) const {
       return Options(*this).WithAttrImpl(attr_name, std::forward<T>(value));
     }
     // Note: overload needed to allow {...} expressions for value.
     template <class T>
-    Options WithAttr(StringPiece attr_name,
+    Options WithAttr(absl::string_view attr_name,
                      std::initializer_list<T> value) const {
       return WithAttr<std::initializer_list<T>>(attr_name, std::move(value));
     }
@@ -106,7 +106,7 @@ class GraphDefBuilder {
     // Given the Op type name, return a name for a node of that type.
     // Uses the value set in WithName() if that has been called.  Otherwise,
     // returns a name built out of the Op type name.
-    string GetNameForOp(StringPiece op) const;
+    string GetNameForOp(absl::string_view op) const;
 
     // Sets the device, adds control inputs, adds attrs, and calls Finalize().
     // If Finalize returns an error, it is saved and this function returns
@@ -122,12 +122,12 @@ class GraphDefBuilder {
     }
 
    private:
-    Options WithNameImpl(StringPiece name);
-    Options WithDeviceImpl(StringPiece device);
+    Options WithNameImpl(absl::string_view name);
+    Options WithDeviceImpl(absl::string_view device);
     Options WithControlInputImpl(Node* control_input);
     Options WithControlInputsImpl(gtl::ArraySlice<Node*> control_inputs);
     template <class T>
-    Options WithAttrImpl(StringPiece name, T&& value) {
+    Options WithAttrImpl(absl::string_view name, T&& value) {
       attrs_.emplace_back(string(name), AttrValue());
       SetAttrValue(std::forward<T>(value), &attrs_.back().second);
       return *this;
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 1dbcebab59..9446e7cf6b 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -155,7 +156,7 @@ bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) {
 
 // Add an input to dst that comes from the "src_slot" output of the
 // node named by "src_name".
-void AddInput(NodeDef* dst, StringPiece src_name, int src_slot) {
+void AddInput(NodeDef* dst, absl::string_view src_name, int src_slot) {
   if (src_slot == Graph::kControlSlot) {
     dst->add_input(strings::StrCat("^", src_name));
   } else if (src_slot == 0) {
@@ -911,7 +912,7 @@ Status AddControlEdges(const PartitionOptions& opts,
 // If 'ndef' is a Send or Recv, fills its attr send_device_incarnation
 // if possible.
 void SetIncarnation(const PartitionOptions& opts, NodeDef* ndef) {
-  StringPiece op(ndef->op());
+  absl::string_view op(ndef->op());
   if (op != "_Send" && op != "_Recv") {
     // Not related to send/recv.
     return;
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index 68a20fcc5f..5f5587cef6 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/graph/node_builder.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -29,17 +30,17 @@ NodeBuilder::NodeOut::NodeOut(Node* n, int32 i)  // NOLINT(runtime/explicit)
       index(i),
       dt(SafeGetOutput(node, i, &error)) {}
 
-NodeBuilder::NodeOut::NodeOut(StringPiece n, int32 i, DataType t)
+NodeBuilder::NodeOut::NodeOut(absl::string_view n, int32 i, DataType t)
     : node(nullptr), error(false), name(n), index(i), dt(t) {}
 
 NodeBuilder::NodeOut::NodeOut()
     : node(nullptr), error(true), index(0), dt(DT_FLOAT) {}
 
-NodeBuilder::NodeBuilder(StringPiece name, StringPiece op_name,
+NodeBuilder::NodeBuilder(absl::string_view name, absl::string_view op_name,
                          const OpRegistryInterface* op_registry)
     : def_builder_(name, op_name, op_registry) {}
 
-NodeBuilder::NodeBuilder(StringPiece name, const OpDef* op_def)
+NodeBuilder::NodeBuilder(absl::string_view name, const OpDef* op_def)
     : def_builder_(name, op_def) {}
 
 NodeBuilder::NodeBuilder(const NodeDefBuilder& def_builder)
@@ -94,12 +95,12 @@ NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice<Node*> src_nodes) {
   return *this;
 }
 
-NodeBuilder& NodeBuilder::Device(StringPiece device_spec) {
+NodeBuilder& NodeBuilder::Device(absl::string_view device_spec) {
   def_builder_.Device(device_spec);
   return *this;
 }
 
-NodeBuilder& NodeBuilder::AssignedDevice(StringPiece device) {
+NodeBuilder& NodeBuilder::AssignedDevice(absl::string_view device) {
   assigned_device_ = string(device);
   return *this;
 }
diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h
index d576985a23..f2baea606a 100644
--- a/tensorflow/core/graph/node_builder.h
+++ b/tensorflow/core/graph/node_builder.h
@@ -17,12 +17,12 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPH_NODE_BUILDER_H_
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -55,7 +55,7 @@ class NodeBuilder {
     // useful when preparing a graph for ExtendSession or creating a
     // back edge to a node that hasn't been added to the graph yet,
     // but will be.
-    NodeOut(StringPiece name, int32 i, DataType t);
+    NodeOut(absl::string_view name, int32 i, DataType t);
 
     // Default constructor for std::vector<NodeOut>.
     NodeOut();
@@ -75,9 +75,9 @@ class NodeBuilder {
   // the Op plus a registry) for the Node.  Other fields are
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
-  NodeBuilder(StringPiece name, StringPiece op_name,
+  NodeBuilder(absl::string_view name, absl::string_view op_name,
               const OpRegistryInterface* op_registry = OpRegistry::Global());
-  NodeBuilder(StringPiece name, const OpDef* op_def);
+  NodeBuilder(absl::string_view name, const OpDef* op_def);
 
   // Create a NodeBuilder from an existing NodeDefBuilder.
   NodeBuilder(const NodeDefBuilder& def_builder);
@@ -98,10 +98,10 @@ class NodeBuilder {
 
   // Sets the "requested device spec" in the NodeDef (not the
   // "assigned device" in the Node).
-  NodeBuilder& Device(StringPiece device_spec);
+  NodeBuilder& Device(absl::string_view device_spec);
 
   // Sets the device name in the "assigned device" field in tensorflow::Node.
-  NodeBuilder& AssignedDevice(StringPiece device);
+  NodeBuilder& AssignedDevice(absl::string_view device);
 
   // Set the value of an attr.  attr_name must match the name of one of
   // attrs defined by the Op, and value must have the corresponding type
@@ -109,9 +109,10 @@ class NodeBuilder {
   // types for value).  Note that attrs will be set automatically if
   // they can be determined by the inputs.
   template <class T>
-  NodeBuilder& Attr(StringPiece attr_name, T&& value);
+  NodeBuilder& Attr(absl::string_view attr_name, T&& value);
   template <class T>
-  NodeBuilder& Attr(StringPiece attr_name, std::initializer_list<T> value);
+  NodeBuilder& Attr(absl::string_view attr_name,
+                    std::initializer_list<T> value);
 
   // Validates the described node and adds it to *graph, adding edges
   // for all (non-back) inputs.  If created_node is not nullptr,
@@ -150,13 +151,13 @@ class NodeBuilder {
 // IMPLEMENTATION -------------------------------------------------------------
 
 template <class T>
-NodeBuilder& NodeBuilder::Attr(StringPiece attr_name, T&& value) {
+NodeBuilder& NodeBuilder::Attr(absl::string_view attr_name, T&& value) {
   def_builder_.Attr(attr_name, std::forward<T>(value));
   return *this;
 }
 
 template <class T>
-NodeBuilder& NodeBuilder::Attr(StringPiece attr_name,
+NodeBuilder& NodeBuilder::Attr(absl::string_view attr_name,
                                std::initializer_list<T> value) {
   def_builder_.Attr(attr_name, value);
   return *this;
diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc
index 3b6e8cc233..91ed6d6082 100644
--- a/tensorflow/core/graph/quantize_training.cc
+++ b/tensorflow/core/graph/quantize_training.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/quantize_training.h"
 
 #include "tensorflow/core/common_runtime/executor.h"
@@ -153,7 +154,7 @@ Status FindSaveOp(const Graph* graph, Node** save_op,
   return Status::OK();
 }
 
-Node* FindRestoreAllOp(const Graph* graph, StringPiece save_prefix) {
+Node* FindRestoreAllOp(const Graph* graph, absl::string_view save_prefix) {
   for (Node* node : graph->op_nodes()) {
     // The restore_all op should have the same prefix of the save_op.
     if (node->name() == strings::StrCat(save_prefix, "/restore_all")) {
@@ -166,8 +167,8 @@ Node* FindRestoreAllOp(const Graph* graph, StringPiece save_prefix) {
 // Strips the last "/suffix" from a name.
 // We use this to construct the name of restore ops in the same way they are
 // constructed by the Saver.
-StringPiece GetNodeNamePrefix(const Node* node) {
-  StringPiece name = node->name();
+absl::string_view GetNodeNamePrefix(const Node* node) {
+  absl::string_view name = node->name();
   return name.substr(0, name.rfind('/'));
 }
 
@@ -251,7 +252,7 @@ Status AddRestoreVariableSubgraphs(Graph* graph, Node* save_op,
                                    const std::vector<const Edge*>& in_edges,
                                    const std::vector<Node*>& variables) {
   Node* prefix_op = in_edges[0]->src();
-  StringPiece name_prefix = GetNodeNamePrefix(save_op);
+  absl::string_view name_prefix = GetNodeNamePrefix(save_op);
   Node* restore_all = FindRestoreAllOp(graph, name_prefix);
   if (restore_all == nullptr) {
     return errors::InvalidArgument("graph has SaveOp, but no restore_all NoOp");
diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc
index 60337e30aa..96c5191a0a 100644
--- a/tensorflow/core/graph/subgraph.cc
+++ b/tensorflow/core/graph/subgraph.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/types.h"
@@ -44,7 +45,8 @@ namespace subgraph {
 
 namespace {
 
-typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameIndex;
+typedef std::unordered_map<absl::string_view, Node*, StringPieceHasher>
+    NameIndex;
 
 // Rewrite graph by replacing the output tensors specified in
 // "fed_outputs" with special feed nodes for each specified output
diff --git a/tensorflow/core/graph/subgraph_test.cc b/tensorflow/core/graph/subgraph_test.cc
index 6c014a8d44..cb3e7771ae 100644
--- a/tensorflow/core/graph/subgraph_test.cc
+++ b/tensorflow/core/graph/subgraph_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/graph/graph.h"
@@ -312,7 +313,7 @@ TEST_F(SubgraphTest, ChainOfFools) {
   EXPECT_TRUE(HasEdge("e", 0, "_send_e_0", 0));
 }
 
-static bool HasSubstr(StringPiece base, StringPiece substr) {
+static bool HasSubstr(absl::string_view base, absl::string_view substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/core/graph/tensor_id.cc b/tensorflow/core/graph/tensor_id.cc
index 5a5b85e727..3a8317d757 100644
--- a/tensorflow/core/graph/tensor_id.cc
+++ b/tensorflow/core/graph/tensor_id.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -28,10 +28,10 @@ SafeTensorId::SafeTensorId(const TensorId& id)
     : SafeTensorId(string(id.first), id.second) {}
 
 TensorId ParseTensorName(const string& name) {
-  return ParseTensorName(StringPiece(name.data(), name.size()));
+  return ParseTensorName(absl::string_view(name.data(), name.size()));
 }
 
-TensorId ParseTensorName(StringPiece name) {
+TensorId ParseTensorName(absl::string_view name) {
   // Parse either a name, ^name, or name:digits.  To do so, we go backwards from
   // the end of the string, skipping over a run of digits.  If we hit a ':'
   // character, then we know we are in the 'name:digits' regime.  Otherwise, we
@@ -49,11 +49,11 @@ TensorId ParseTensorName(StringPiece name) {
   }
   TensorId id;
   if (p > base && *p == ':' && mul > 1) {
-    id.first = StringPiece(base, p - base);
+    id.first = absl::string_view(base, p - base);
     id.second = index;
   } else if (str_util::StartsWith(name, "^")) {
     // Control edge
-    id.first = StringPiece(base + 1);
+    id.first = absl::string_view(base + 1);
     id.second = Graph::kControlSlot;
   } else {
     id.first = name;
diff --git a/tensorflow/core/graph/tensor_id.h b/tensorflow/core/graph/tensor_id.h
index 0ba3942618..fb1d7f5082 100644
--- a/tensorflow/core/graph/tensor_id.h
+++ b/tensorflow/core/graph/tensor_id.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/graph.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
@@ -30,8 +30,8 @@ struct SafeTensorId;
 // Identifier for a tensor within a step.
 // first == operation_name, second == output_index
 // Note: does not own backing storage for name.
-struct TensorId : public std::pair<StringPiece, int> {
-  typedef std::pair<StringPiece, int> Base;
+struct TensorId : public std::pair<absl::string_view, int> {
+  typedef std::pair<absl::string_view, int> Base;
 
   // Inherit the set of constructors.
   using Base::pair;
@@ -55,7 +55,7 @@ struct TensorId : public std::pair<StringPiece, int> {
 };
 
 TensorId ParseTensorName(const string& name);
-TensorId ParseTensorName(StringPiece name);
+TensorId ParseTensorName(absl::string_view name);
 
 // Same as TensorId, except owns the backing storage for the op name. This makes
 // the memory management simpler at the expense of a copy.
diff --git a/tensorflow/core/graph/while_context.cc b/tensorflow/core/graph/while_context.cc
index 8e89bc4c75..31b3c56f9d 100644
--- a/tensorflow/core/graph/while_context.cc
+++ b/tensorflow/core/graph/while_context.cc
@@ -14,10 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/graph/while_context.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
-WhileContext::WhileContext(StringPiece frame_name,
+WhileContext::WhileContext(absl::string_view frame_name,
                            std::vector<Node*> enter_nodes,
                            std::vector<Node*> exit_nodes,
                            OutputTensor cond_output,
diff --git a/tensorflow/core/graph/while_context.h b/tensorflow/core/graph/while_context.h
index 5405e62be2..310dcd712e 100644
--- a/tensorflow/core/graph/while_context.h
+++ b/tensorflow/core/graph/while_context.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPH_WHILE_CONTEXT_H_
 #define TENSORFLOW_CORE_GRAPH_WHILE_CONTEXT_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/graph.h"
 
 namespace tensorflow {
@@ -34,7 +35,7 @@ namespace tensorflow {
 // differentiable. Figure out backwards compatibility story.
 class WhileContext {
  public:
-  WhileContext(StringPiece frame_name, std::vector<Node*> enter_nodes,
+  WhileContext(absl::string_view frame_name, std::vector<Node*> enter_nodes,
                std::vector<Node*> exit_nodes, OutputTensor cond_output,
                std::vector<OutputTensor> body_inputs,
                std::vector<OutputTensor> body_outputs);
diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 7c6fe56e1f..bcb3bced95 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -41,6 +42,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 43a7d6a70b..c014bcb489 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -103,6 +103,7 @@ cc_library(
         "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:symbolic_shapes",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -268,6 +269,7 @@ cc_library(
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:symbolic_shapes",
         "//tensorflow/core/grappler/utils:topological_sort",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -315,6 +317,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:topological_sort",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -769,6 +772,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:frame",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 9b94d2706a..069de8b93f 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -39,7 +40,6 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -285,7 +285,7 @@ class ArithmeticOptimizerStage : public GraphOptimizerStage<string> {
     for (const NodeDef* output : ctx().node_map->GetOutputs(node.name())) {
       for (int i = 0; i < output->input_size(); ++i) {
         auto input = output->input(i);
-        StringPiece name = ParseNodeNameAsStringPiece(input, &position);
+        absl::string_view name = ParseNodeNameAsStringPiece(input, &position);
         if (name == node.name() && /*control input*/ position < 0) {
           return true;
         }
@@ -431,12 +431,12 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage {
     return signature;
   }
 
-  void MarkWithTag(const StringPiece tag, NodeDef* node) {
+  void MarkWithTag(const absl::string_view tag, NodeDef* node) {
     AddNodeAttr(tag, true, node);
   }
 
   void MarkAllMembersWithTag(const OptimizedNodesGroup& group,
-                             const StringPiece tag) const {
+                             const absl::string_view tag) const {
     AddNodeAttr(tag, true, group.root_node);
     for (NodeDef* optimized_node : group.optimized_nodes) {
       AddNodeAttr(tag, true, optimized_node);
@@ -453,12 +453,12 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage {
            ctx().nodes_to_preserve->end();
   }
 
-  bool IsMarkedWithTag(const NodeDef& node, const StringPiece tag) const {
+  bool IsMarkedWithTag(const NodeDef& node, const absl::string_view tag) const {
     return HasNodeAttr(node, tag);
   }
 
-  bool IsMarkedWithAnyTag(const NodeDef& node, const StringPiece tag1,
-                          const StringPiece tag2) const {
+  bool IsMarkedWithAnyTag(const NodeDef& node, const absl::string_view tag1,
+                          const absl::string_view tag2) const {
     return IsMarkedWithTag(node, tag1) || IsMarkedWithTag(node, tag2);
   }
 };
@@ -1577,7 +1577,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       for (NodeDef* output : outputs) {
         if (IsControlInput(output->input(0))) continue;
         int port;
-        const StringPiece node_name =
+        const absl::string_view node_name =
             ParseNodeNameAsStringPiece(output->input(0), &port);
         if (node_name == node.name()) {
           tails->insert(ChainLink(output, port));
@@ -1628,7 +1628,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       } else {
         for (NodeDef* new_tail : ctx().node_map->GetOutputs(tail->name())) {
           int port;
-          const StringPiece node_name =
+          const absl::string_view node_name =
               ParseNodeNameAsStringPiece(new_tail->input(0), &port);
           if (node_name != tail->name()) {
             return Status::OK();
@@ -3219,7 +3219,7 @@ uint64 UniqueNodes::ComputeSignature(const NodeDef& node) const {
 
   for (const auto& input : node.input()) {
     int pos;
-    const StringPiece node_name = ParseNodeNameAsStringPiece(input, &pos);
+    const absl::string_view node_name = ParseNodeNameAsStringPiece(input, &pos);
     h = Hash64CombineUnordered(Hash64(node_name.data(), node_name.size()), h);
     h = Hash64CombineUnordered(std::hash<int>()(pos), h);
   }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 11331c9406..ff32bec5de 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
@@ -34,7 +35,6 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/evaluation_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/symbolic_shapes.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -258,12 +258,12 @@ static Status ConvertShapeToConstant(const string& op, const DataType& type,
 
 // TODO(rmlarsen): Perhaps we should move this to the GraphOptimizer base class.
 bool ConstantFolding::OptimizedNodeExists(const NodeDef& node,
-                                          StringPiece suffix) const {
+                                          absl::string_view suffix) const {
   return node_map_->NodeExists(OptimizedNodeName(node, suffix));
 }
 
 string ConstantFolding::OptimizedNodeName(const NodeDef& node,
-                                          StringPiece suffix) const {
+                                          absl::string_view suffix) const {
   return AddPrefixToNodeName(strings::StrCat(node.name(), suffix),
                              kConstantFoldingConst);
 }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 8593b3e0b8..7bf4b90012 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CONSTANT_FOLDING_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CONSTANT_FOLDING_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -52,8 +53,8 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
-  string OptimizedNodeName(const NodeDef& node, StringPiece suffix) const;
-  bool OptimizedNodeExists(const NodeDef& node, StringPiece suffix) const;
+  string OptimizedNodeName(const NodeDef& node, absl::string_view suffix) const;
+  bool OptimizedNodeExists(const NodeDef& node, absl::string_view suffix) const;
 
   bool IsReallyConstant(const NodeDef& node) const;
 
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 1a648da5da..a4bd3566e5 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -52,6 +52,7 @@ cc_library(
     deps = [
         ":graph_utils",
         ":function_utils",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -90,6 +91,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
@@ -124,6 +126,7 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -147,6 +150,7 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -159,6 +163,7 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
@@ -215,6 +220,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:grappler_item",
@@ -261,6 +267,7 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler:grappler_item",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -295,6 +302,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/grappler:grappler_item",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -479,6 +487,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/grappler:grappler_item",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -580,6 +589,7 @@ tf_cc_test(
         ":graph_utils",
         ":function_utils",
         ":vectorization_utils",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc
index 311df15bc2..ad2b8167cb 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/framework/device_base.h"
@@ -34,8 +35,8 @@ FunctionDefTensorDesc::FunctionDefTensorDesc(const string& node_name,
 FunctionDefTensorDesc::FunctionDefTensorDesc(const string& input) {
   // Parses node_name:node_output:position string into its components.
   full_str = input;
-  StringPiece capture;
-  StringPiece remaining;
+  absl::string_view capture;
+  absl::string_view remaining;
 
   // Parse "node_name"
   if (strings::Scanner(input)
@@ -86,8 +87,8 @@ void ReplaceReferences(const string& from, const string& to,
   }
 }
 
-void AddFunctionOutputWithUniqueName(StringPiece prefix,
-                                     StringPiece output_tensor_name,
+void AddFunctionOutputWithUniqueName(absl::string_view prefix,
+                                     absl::string_view output_tensor_name,
                                      FunctionDef* function, DataType dt) {
   string name = string(prefix);
   int id = function->signature().output_arg_size();
@@ -102,7 +103,7 @@ void AddFunctionOutputWithUniqueName(StringPiece prefix,
   (*function->mutable_ret())[name] = string(output_tensor_name);
 }
 
-NodeDef* AddNode(StringPiece name, StringPiece op,
+NodeDef* AddNode(absl::string_view name, absl::string_view op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  FunctionDef* fd) {
@@ -122,45 +123,49 @@ NodeDef* AddNode(StringPiece name, StringPiece op,
   return node;
 }
 
-bool ContainsFunctionNodeWithName(StringPiece name,
+bool ContainsFunctionNodeWithName(absl::string_view name,
                                   const FunctionDef& function) {
   return FindFunctionNodeWithName(name, function) != -1;
 }
 
-bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
+bool ContainsFunctionNodeWithOp(absl::string_view op,
+                                const FunctionDef& function) {
   return FindFunctionNodeWithOp(op, function) != -1;
 }
 
-bool ContainsFunctionOutputWithName(StringPiece name,
+bool ContainsFunctionOutputWithName(absl::string_view name,
                                     const FunctionDef& function) {
   return FindFunctionOutputWithName(name, function) != -1;
 }
 
-int FindFunctionInputWithName(StringPiece name, const FunctionDef& function) {
+int FindFunctionInputWithName(absl::string_view name,
+                              const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().input_arg());
 }
 
-int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function) {
+int FindFunctionOutputWithName(absl::string_view name,
+                               const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().output_arg());
 }
 
-int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function) {
+int FindFunctionNodeWithName(absl::string_view name,
+                             const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       function.node_def());
 }
 
-int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
+int FindFunctionNodeWithOp(absl::string_view op, const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; },
       function.node_def());
 }
 
-void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
+void SetUniqueFunctionNodeName(absl::string_view prefix, FunctionDef* function,
                                NodeDef* node) {
   string name = string(prefix);
   int id = function->node_def_size();
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.h b/tensorflow/core/grappler/optimizers/data/function_utils.h
index d4ce824652..714f7d30d6 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -59,46 +60,50 @@ void ReplaceReferences(const string& from, const string& to, FunctionDef* func);
 
 // Adds a function output to the function def, ensuring that the output key
 // is unique, and maps to output_tensor_name in the ret dict.
-void AddFunctionOutputWithUniqueName(StringPiece prefix,
-                                     StringPiece output_tensor_name,
+void AddFunctionOutputWithUniqueName(absl::string_view prefix,
+                                     absl::string_view output_tensor_name,
                                      FunctionDef* function, DataType dt);
 
 // Adds a node to a FunctionDef.
-NodeDef* AddNode(StringPiece name, StringPiece op,
+NodeDef* AddNode(absl::string_view name, absl::string_view op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  FunctionDef* fd);
 
 // Checks whether the function contains a node with the given name.
-bool ContainsFunctionNodeWithName(StringPiece name,
+bool ContainsFunctionNodeWithName(absl::string_view name,
                                   const FunctionDef& function);
 
 // Checks whether the function contains a node with the given op.
-bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
+bool ContainsFunctionNodeWithOp(absl::string_view op,
+                                const FunctionDef& function);
 
 // Checks whether the function contains an output with the given name.
-bool ContainsFunctionOutputWithName(StringPiece name,
+bool ContainsFunctionOutputWithName(absl::string_view name,
                                     const FunctionDef& function);
 
 // Returns the index of the function input with the given name or -1 if the
 // function node does not exist.
-int FindFunctionInputWithName(StringPiece name, const FunctionDef& function);
+int FindFunctionInputWithName(absl::string_view name,
+                              const FunctionDef& function);
 
 // Returns the index of the function output with the given name or -1 if the
 // function node does not exist.
-int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function);
+int FindFunctionOutputWithName(absl::string_view name,
+                               const FunctionDef& function);
 
 // Returns the index of the function node with the given name or -1 if the
 // function node does not exist.
-int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function);
+int FindFunctionNodeWithName(absl::string_view name,
+                             const FunctionDef& function);
 
 // Returns the index of the function node with the given op or -1 if the
 // function node does not exist.
-int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
+int FindFunctionNodeWithOp(absl::string_view op, const FunctionDef& function);
 
 // Sets the function node name using the `prefix` as a prefix while guaranteeing
 // the name is unique across the functions nodes.
-void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
+void SetUniqueFunctionNodeName(absl::string_view prefix, FunctionDef* function,
                                NodeDef* node);
 
 }  // end namespace function_utils
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
index b3bfee138f..db6e4d8b51 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/fusion_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op_def.pb.h"
@@ -423,11 +424,14 @@ void LazyConjunctionOutput(const protobuf::Map<string, string>& first_ret,
   *fused_ret = first_ret;
 }
 
-FunctionDef* FuseFunctions(
-    const FunctionDef& first_function, const FunctionDef& second_function,
-    StringPiece fused_name_prefix, const SetFunctionSignatureFn& set_signature,
-    const SetInputFn& set_input, const SetOutputFn& set_output,
-    const SetNodesFn& set_nodes, FunctionDefLibrary* library) {
+FunctionDef* FuseFunctions(const FunctionDef& first_function,
+                           const FunctionDef& second_function,
+                           absl::string_view fused_name_prefix,
+                           const SetFunctionSignatureFn& set_signature,
+                           const SetInputFn& set_input,
+                           const SetOutputFn& set_output,
+                           const SetNodesFn& set_nodes,
+                           FunctionDefLibrary* library) {
   if (first_function.attr_size() != 0 || second_function.attr_size() != 0)
     return nullptr;  // Functions with attributes are currently not supported
 
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils.h b/tensorflow/core/grappler/optimizers/data/fusion_utils.h
index 19b7002dcd..0350a2c08e 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUSION_UTILS_H_
 
 #include <functional>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -122,11 +123,14 @@ void LazyConjunctionNodes(const FunctionDef& first_function,
 // that are not conflicting with first function.  This means that copied nodes
 // from  second function can end up having different names.  For explanation of
 // set up functions see the documentation of the functions types.
-FunctionDef* FuseFunctions(
-    const FunctionDef& first_function, const FunctionDef& second_function,
-    StringPiece fused_name_prefix, const SetFunctionSignatureFn& set_signature,
-    const SetInputFn& set_input, const SetOutputFn& set_output,
-    const SetNodesFn& set_nodes, FunctionDefLibrary* library);
+FunctionDef* FuseFunctions(const FunctionDef& first_function,
+                           const FunctionDef& second_function,
+                           absl::string_view fused_name_prefix,
+                           const SetFunctionSignatureFn& set_signature,
+                           const SetInputFn& set_input,
+                           const SetOutputFn& set_output,
+                           const SetNodesFn& set_nodes,
+                           FunctionDefLibrary* library);
 
 }  // namespace fusion_utils
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
index 1f03c6515c..034a4070d0 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -25,8 +26,8 @@ namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name) {
+NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
+                    absl::string_view function_name) {
   return test::function::NDef(
       name, "MapDataset", {string(input_node_name)},
       {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
@@ -35,8 +36,9 @@ NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
        {"output_types", gtl::ArraySlice<DataType>{}}});
 }
 
-NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
-                       StringPiece function_name) {
+NodeDef MakeFilterNode(absl::string_view name,
+                       absl::string_view input_node_name,
+                       absl::string_view function_name) {
   return test::function::NDef(
       name, "FilterDataset", {string(input_node_name)},
       {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
@@ -45,11 +47,12 @@ NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
        {"output_types", gtl::ArraySlice<TensorShape>{}}});
 }
 
-NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
-                            StringPiece batch_size_node_name,
-                            StringPiece num_parallel_calls_node_name,
-                            StringPiece drop_remainder_node_name,
-                            StringPiece function_name) {
+NodeDef MakeMapAndBatchNode(absl::string_view name,
+                            absl::string_view input_node_name,
+                            absl::string_view batch_size_node_name,
+                            absl::string_view num_parallel_calls_node_name,
+                            absl::string_view drop_remainder_node_name,
+                            absl::string_view function_name) {
   return test::function::NDef(
       name, "MapAndBatchDatasetV2",
       {string(input_node_name), "", string(batch_size_node_name),
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
index f7891d5e1f..8014a4f305 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -16,24 +16,26 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name = "XTimesTwo");
+NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
+                    absl::string_view function_name = "XTimesTwo");
 
-NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
-                       StringPiece function_name = "IsZero");
+NodeDef MakeFilterNode(absl::string_view name,
+                       absl::string_view input_node_name,
+                       absl::string_view function_name = "IsZero");
 
-NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
-                            StringPiece batch_size_node_name,
-                            StringPiece num_parallel_calls_node_name,
-                            StringPiece drop_remainder_node_name,
-                            StringPiece function_name = "XTimesTwo");
+NodeDef MakeMapAndBatchNode(absl::string_view name,
+                            absl::string_view input_node_name,
+                            absl::string_view batch_size_node_name,
+                            absl::string_view num_parallel_calls_node_name,
+                            absl::string_view drop_remainder_node_name,
+                            absl::string_view function_name = "XTimesTwo");
 
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index b863a25dc5..1cc6fca037 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -99,7 +100,7 @@ NodeDef* AddScalarPlaceholder(DataType dtype, MutableGraphView* graph) {
   return graph->AddNode(std::move(node));
 }
 
-NodeDef* AddNode(StringPiece name, StringPiece op,
+NodeDef* AddNode(absl::string_view name, absl::string_view op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph) {
@@ -150,7 +151,7 @@ NodeDef* AddScalarConstNode(int64 v, MutableGraphView* graph) {
 }
 
 template <>
-NodeDef* AddScalarConstNode(StringPiece v, MutableGraphView* graph) {
+NodeDef* AddScalarConstNode(absl::string_view v, MutableGraphView* graph) {
   return AddScalarConstNodeHelper(
       DT_STRING,
       [v](TensorProto* proto) { proto->add_string_val(v.data(), v.size()); },
@@ -187,20 +188,20 @@ bool Compare(const GraphDef& g1, const GraphDef& g2) {
   return true;
 }
 
-bool ContainsGraphFunctionWithName(StringPiece name,
+bool ContainsGraphFunctionWithName(absl::string_view name,
                                    const FunctionDefLibrary& library) {
   return FindGraphFunctionWithName(name, library) != -1;
 }
 
-bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph) {
+bool ContainsGraphNodeWithName(absl::string_view name, const GraphDef& graph) {
   return FindGraphNodeWithName(name, graph) != -1;
 }
 
-bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph) {
+bool ContainsNodeWithOp(absl::string_view op, const GraphDef& graph) {
   return FindGraphNodeWithOp(op, graph) != -1;
 }
 
-int FindGraphFunctionWithName(StringPiece name,
+int FindGraphFunctionWithName(absl::string_view name,
                               const FunctionDefLibrary& library) {
   return GetFirstElementIndexWithPredicate(
       [&name](const FunctionDef& function) {
@@ -209,13 +210,13 @@ int FindGraphFunctionWithName(StringPiece name,
       library.function());
 }
 
-int FindGraphNodeWithName(StringPiece name, const GraphDef& graph) {
+int FindGraphNodeWithName(absl::string_view name, const GraphDef& graph) {
   return GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       graph.node());
 }
 
-int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph) {
+int FindGraphNodeWithOp(absl::string_view op, const GraphDef& graph) {
   return GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; }, graph.node());
 }
@@ -232,7 +233,7 @@ NodeDef* GetInputNode(const NodeDef& node, const MutableGraphView& graph) {
   return graph.GetRegularFanin(input_port).node;
 }
 
-void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
+void SetUniqueGraphNodeName(absl::string_view prefix, GraphDef* graph,
                             NodeDef* node) {
   string name = string(prefix);
   int id = graph->node_size();
@@ -248,7 +249,8 @@ void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
   node->set_name(std::move(name));
 }
 
-void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
+void SetUniqueGraphFunctionName(absl::string_view prefix,
+                                FunctionDefLibrary* library,
                                 FunctionDef* function) {
   string name = string(prefix);
   int id = library->function_size();
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index d130fee204..4397f77cba 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -48,7 +49,7 @@ int GetFirstElementIndexWithPredicate(const Predicate& predicate,
 }
 
 // Adds a node to the graph.
-NodeDef* AddNode(StringPiece name, StringPiece op,
+NodeDef* AddNode(absl::string_view name, absl::string_view op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph);
@@ -77,33 +78,33 @@ NodeDef* AddScalarConstNode(int v, MutableGraphView* graph);
 template <>
 NodeDef* AddScalarConstNode(int64 v, MutableGraphView* graph);
 template <>
-NodeDef* AddScalarConstNode(StringPiece v, MutableGraphView* graph);
+NodeDef* AddScalarConstNode(absl::string_view v, MutableGraphView* graph);
 
 // Checks whether the two graphs are the same.
 bool Compare(const GraphDef& g1, const GraphDef& g2);
 
 // Checks whether the graph contains a node with the given name.
-bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph);
+bool ContainsGraphNodeWithName(absl::string_view name, const GraphDef& graph);
 
 // Checks whether the library contains a function with the given name.
-bool ContainsGraphFunctionWithName(StringPiece name,
+bool ContainsGraphFunctionWithName(absl::string_view name,
                                    const FunctionDefLibrary& library);
 
 // Checks whether the graph contains a node with the given op.
-bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph);
+bool ContainsNodeWithOp(absl::string_view op, const GraphDef& graph);
 
 // Returns the index of the node with the given name or -1 if the node does
 // not exist.
-int FindGraphNodeWithName(StringPiece name, const GraphDef& graph);
+int FindGraphNodeWithName(absl::string_view name, const GraphDef& graph);
 
 // Returns the index of the function with the given name or -1 if the function
 // does not exist.
-int FindGraphFunctionWithName(StringPiece name,
+int FindGraphFunctionWithName(absl::string_view name,
                               const FunctionDefLibrary& library);
 
 // Returns the index of the first node with the given op or -1 if no such  node
 // exists.
-int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph);
+int FindGraphNodeWithOp(absl::string_view op, const GraphDef& graph);
 
 // Gets the 0th input to a node in the graph.
 NodeDef* GetInputNode(const NodeDef& node, const MutableGraphView& graph);
@@ -115,11 +116,13 @@ std::vector<int> FindAllGraphNodesWithOp(const string& op,
 
 // Sets the node name using `prefix` as a prefix while guaranteeing the name
 // is unique across the graph.
-void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph, NodeDef* node);
+void SetUniqueGraphNodeName(absl::string_view prefix, GraphDef* graph,
+                            NodeDef* node);
 
 // Sets the function name using the `prefix` name as a prefix while guaranteeing
 // the name is unique across the function library.
-void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
+void SetUniqueGraphFunctionName(absl::string_view prefix,
+                                FunctionDefLibrary* library,
                                 FunctionDef* function);
 
 // Copies attribute having name `attribute_name` from node `from` to node
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index 4ab6d71532..c77fff1656 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -81,7 +82,7 @@ TEST(GraphUtilsTest, AddScalarConstNodeInt64) {
 TEST(GraphUtilsTest, AddScalarConstNodeString) {
   GraphDef graph_def;
   MutableGraphView graph(&graph_def);
-  NodeDef* string_node = AddScalarConstNode<StringPiece>("hello", &graph);
+  NodeDef* string_node = AddScalarConstNode<absl::string_view>("hello", &graph);
   EXPECT_TRUE(
       ContainsGraphNodeWithName(string_node->name(), *graph.GetGraph()));
   EXPECT_EQ(string_node->attr().at("value").tensor().string_val(0), "hello");
diff --git a/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc b/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
index 9e382aeef9..7954d124a2 100644
--- a/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
+++ b/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/latency_all_edges.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
@@ -42,8 +43,8 @@ NodeDef MakeLatencyNode(const NodeDef& node, MutableGraphView* graph) {
   // Set the input of LatencyDataset node as `node`
   new_node.add_input(node.name());
 
-  NodeDef* tag = graph_utils::AddScalarConstNode<StringPiece>(
-      StringPiece("record_latency_" + node.name()), graph);
+  NodeDef* tag = graph_utils::AddScalarConstNode<absl::string_view>(
+      absl::string_view("record_latency_" + node.name()), graph);
   new_node.add_input(tag->name());
 
   // Set `output_types` and `output_shapes` attributes.
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
index b676246b31..0db08d0181 100644
--- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -41,7 +42,7 @@ TEST(MapAndBatchFusionTest, FuseMapAndBatchNodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
+      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
 
   NodeDef *map_node;
   {
@@ -123,7 +124,7 @@ TEST(MapAndBatchFusionTest, FuseMapAndBatchV2NodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
+      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
 
   NodeDef *map_node;
   {
@@ -206,7 +207,7 @@ TEST(MapAndBatchFusionTest, FuseParallelMapAndBatchNodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
+      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
   NodeDef *num_parallel_calls_node =
       graph_utils::AddScalarConstNode<int>(2, &graph);
 
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
index f4faf41549..c242a74359 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/map_vectorization.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -30,8 +31,10 @@ namespace {
 using test::function::GDef;
 using test::function::NDef;
 
-NodeDef MakeMapNodeHelper(StringPiece name, StringPiece input_node_name,
-                          StringPiece function_name, StringPiece map_op_name,
+NodeDef MakeMapNodeHelper(absl::string_view name,
+                          absl::string_view input_node_name,
+                          absl::string_view function_name,
+                          absl::string_view map_op_name,
                           gtl::ArraySlice<PartialTensorShape> output_shapes,
                           gtl::ArraySlice<DataType> output_types) {
   return test::function::NDef(
@@ -42,16 +45,16 @@ NodeDef MakeMapNodeHelper(StringPiece name, StringPiece input_node_name,
        {"output_types", output_types}});
 }
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name,
+NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
+                    absl::string_view function_name,
                     gtl::ArraySlice<PartialTensorShape> output_shapes,
                     gtl::ArraySlice<DataType> output_types) {
   return MakeMapNodeHelper(name, input_node_name, function_name, "MapDataset",
                            output_shapes, output_types);
 }
 
-NodeDef MakeBatchNode(StringPiece name, StringPiece input_node_name,
-                      StringPiece input_batch_size_name,
+NodeDef MakeBatchNode(absl::string_view name, absl::string_view input_node_name,
+                      absl::string_view input_batch_size_name,
                       gtl::ArraySlice<PartialTensorShape> output_shapes,
                       gtl::ArraySlice<DataType> output_types) {
   return NDef(
@@ -60,9 +63,10 @@ NodeDef MakeBatchNode(StringPiece name, StringPiece input_node_name,
       {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeBatchV2Node(StringPiece name, StringPiece input_node_name,
-                        StringPiece input_batch_size_name,
-                        StringPiece input_drop_remainder_name,
+NodeDef MakeBatchV2Node(absl::string_view name,
+                        absl::string_view input_node_name,
+                        absl::string_view input_batch_size_name,
+                        absl::string_view input_drop_remainder_name,
                         gtl::ArraySlice<PartialTensorShape> output_shapes,
                         gtl::ArraySlice<DataType> output_types) {
   return NDef(
@@ -72,7 +76,7 @@ NodeDef MakeBatchV2Node(StringPiece name, StringPiece input_node_name,
       {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeRangeNode(StringPiece name, gtl::ArraySlice<string> inputs) {
+NodeDef MakeRangeNode(absl::string_view name, gtl::ArraySlice<string> inputs) {
   return NDef(name, "RangeDataset", inputs,
               {{"output_shapes", gtl::ArraySlice<TensorShape>({{}})},
                {"output_types", gtl::ArraySlice<DataType>({DT_INT64})}});
diff --git a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
index be1a66df75..2f3da39845 100644
--- a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/noop_elimination.h"
 #include <tuple>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -35,15 +36,15 @@ std::vector<std::pair<string, AttrValue>> GetCommonAttributes() {
   return commonAttributes;
 }
 
-NodeDef *MakeUnaryNode(StringPiece node_type, int count, string input_node,
-                       MutableGraphView *graph) {
+NodeDef *MakeUnaryNode(absl::string_view node_type, int count,
+                       string input_node, MutableGraphView *graph) {
   NodeDef *node_count = graph_utils::AddScalarConstNode<int64>(count, graph);
   return graph_utils::AddNode("", node_type,
                               {std::move(input_node), node_count->name()},
                               GetCommonAttributes(), graph);
 }
 
-NodeDef *MakeUnaryNonConstNode(StringPiece node_type, string input_node,
+NodeDef *MakeUnaryNonConstNode(absl::string_view node_type, string input_node,
                                MutableGraphView *graph) {
   NodeDef *node_count = graph_utils::AddScalarPlaceholder(DT_INT32, graph);
   return graph_utils::AddNode("", node_type,
@@ -53,7 +54,7 @@ NodeDef *MakeUnaryNonConstNode(StringPiece node_type, string input_node,
 
 NodeDef *MakeCacheNode(string input_node, MutableGraphView *graph) {
   NodeDef *node_filename =
-      graph_utils::AddScalarConstNode<StringPiece>("", graph);
+      graph_utils::AddScalarConstNode<absl::string_view>("", graph);
   return graph_utils::AddNode("", "CacheDataset",
                               {std::move(input_node), node_filename->name()},
                               GetCommonAttributes(), graph);
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 8b93b1f2b8..799500350a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
@@ -414,7 +415,7 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
   }
 
   Graph* g = outer_scope_.get();
-  auto node_builder = [](StringPiece op) {
+  auto node_builder = [](absl::string_view op) {
     return NodeBuilder(strings::StrCat("vectorized/stack/", op), op);
   };
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index e82bfb702a..4d802e7a57 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -69,7 +70,8 @@ string GetRetval(const FunctionDef& function_def, int index) {
 
 // TODO(rachelim): Use FunctionDefHelper::Create instead
 FunctionDef CreateFunction(
-    StringPiece name, const std::vector<std::pair<string, DataType>>& inputs,
+    absl::string_view name,
+    const std::vector<std::pair<string, DataType>>& inputs,
     const std::vector<std::pair<string, DataType>>& outputs,
     const std::map<string, string>& rets) {
   FunctionDef func;
@@ -91,7 +93,6 @@ FunctionDef CreateFunction(
 
   return func;
 }
-
 ///==================================//
 // Tests for vectorization framework //
 ///==================================//
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 6613768a35..75e32e0b7f 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -204,7 +204,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
       bool data_connection = false;
       for (int i = fanout->input_size() - 1; i >= 0; --i) {
         int pos;
-        StringPiece input_name =
+        absl::string_view input_name =
             ParseNodeNameAsStringPiece(fanout->input(i), &pos);
         if (input_name == node_name) {
           if (pos < 0) {
@@ -352,7 +352,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
           for (int j = 0; j < consumer->input_size(); ++j) {
             const string& old_input = consumer->input(j);
             int old_input_pos;
-            StringPiece old_input_node_name =
+            absl::string_view old_input_node_name =
                 ParseNodeNameAsStringPiece(old_input, &old_input_pos);
             if (old_input_node_name == node_name) {
               if (old_input_pos >= 0) {
diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
index 0d4aaf6462..f41a2d7c15 100644
--- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/scoped_allocator.h"
 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -198,7 +199,7 @@ Status RemoveEdge(const string& input_edge_name, const string& from_node_name,
 }
 }  // namespace
 
-void ScopedAllocatorOptimizer::ExtendNodeAttr(StringPiece name,
+void ScopedAllocatorOptimizer::ExtendNodeAttr(absl::string_view name,
                                               const std::vector<int32>& values,
                                               NodeDef* node_def) {
   if (HasNodeAttr(*node_def, name)) {
diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
index 13589f536c..e7a1ec154b 100644
--- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
@@ -66,7 +67,8 @@ class ScopedAllocatorOptimizer : public GraphOptimizer {
 
   // Appends values to the attr value under name in node_def, if present.
   // If not present does an assignment.
-  static void ExtendNodeAttr(StringPiece name, const std::vector<int32>& values,
+  static void ExtendNodeAttr(absl::string_view name,
+                             const std::vector<int32>& values,
                              NodeDef* node_def);
 
   // Class that knows how to do graph rewriting for a particular kind of Op in
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 5867d01324..a890b56b4d 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -20,12 +20,12 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -146,9 +146,9 @@ bool IsSameInput(const string& name1, const string& name2) {
     return true;
   }
   int position1;
-  StringPiece node1 = ParseNodeNameAsStringPiece(name1, &position1);
+  absl::string_view node1 = ParseNodeNameAsStringPiece(name1, &position1);
   int position2;
-  StringPiece node2 = ParseNodeNameAsStringPiece(name2, &position2);
+  absl::string_view node2 = ParseNodeNameAsStringPiece(name2, &position2);
   return (position1 == position2) && (node1 == node2);
 }
 
@@ -251,7 +251,7 @@ int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) {
       if (node_as_input == node.name()) {
         ++num_outputs;
       } else {
-        const StringPiece name =
+        const absl::string_view name =
             ParseNodeNameAsStringPiece(node_as_input, &pos);
         if (name == node.name()) {
           ++num_outputs;
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 0168ab1da3..c7a8a517bb 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -19,12 +19,12 @@ limitations under the License.
 #include <functional>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
@@ -125,8 +125,8 @@ inline int NodePositionIfSameNode(const string& input_name,
   if (input_it == input_name.end()) {
     return is_ctrl ? -1 : 0;
   } else if (*input_it++ == ':') {
-    StringPiece remaining(&(*input_it),
-                          std::distance(input_it, input_name.end()));
+    absl::string_view remaining(&(*input_it),
+                                std::distance(input_it, input_name.end()));
     int position;
     if (!strings::safe_strto32(remaining, &position)) {
       return -2;
@@ -139,18 +139,18 @@ inline int NodePositionIfSameNode(const string& input_name,
 
 // Return the node name corresponding to 'name' if name is valid, or the empty
 // string otherwise.
-inline StringPiece NodeNameAsStringPiece(const string& name) {
+inline absl::string_view NodeNameAsStringPiece(const string& name) {
   static const string empty;
-  if (name.empty()) return StringPiece(empty);
+  if (name.empty()) return absl::string_view(empty);
   const auto begin_it = name[0] == '^' ? name.begin() + 1 : name.begin();
   auto end_it = begin_it;
   while (end_it != name.end() && *end_it != ':') {
     ++end_it;
   }
   if (end_it != name.end() && *end_it != ':') {
-    return StringPiece(empty);
+    return absl::string_view(empty);
   }
-  return StringPiece(&(*begin_it), std::distance(begin_it, end_it));
+  return absl::string_view(&(*begin_it), std::distance(begin_it, end_it));
 }
 
 // Return the node name corresponding to 'name' if name is valid, or the empty
@@ -160,12 +160,12 @@ inline string NodeName(const string& name) {
 }
 
 // Returns the node name and position in a single call.
-inline StringPiece ParseNodeNameAsStringPiece(const string& name,
-                                              int* position) {
+inline absl::string_view ParseNodeNameAsStringPiece(const string& name,
+                                                    int* position) {
   static const string empty;
   if (name.empty()) {
     *position = 0;
-    return StringPiece(empty);
+    return absl::string_view(empty);
   }
   const bool is_ctrl = name[0] == '^';
   const auto begin_it = is_ctrl ? name.begin() + 1 : name.begin();
@@ -174,15 +174,17 @@ inline StringPiece ParseNodeNameAsStringPiece(const string& name,
   while (end_it != name.end() && *end_it != ':') {
     ++end_it;
   }
-  const StringPiece node_name(&(*begin_it), std::distance(begin_it, end_it));
+  const absl::string_view node_name(&(*begin_it),
+                                    std::distance(begin_it, end_it));
   if (end_it != name.end()) {
     if (*end_it != ':') {
-      return StringPiece(empty);
+      return absl::string_view(empty);
     } else if (!is_ctrl) {
       ++end_it;
-      StringPiece remaining(&(*end_it), std::distance(end_it, name.end()));
+      absl::string_view remaining(&(*end_it),
+                                  std::distance(end_it, name.end()));
       if (!strings::safe_strto32(remaining, position)) {
-        return StringPiece(empty);
+        return absl::string_view(empty);
       }
     }
   }
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index bdbb8836e1..ae251577c7 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -170,6 +170,7 @@ cc_library(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 6861fb423c..e3b2984d85 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <unordered_map>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -108,8 +109,8 @@ Status GrapplerFunctionConnectivity::ExpandFunctionDefInput(
   string node_output;
   int position = -1;
 
-  StringPiece capture;
-  StringPiece remaining;
+  absl::string_view capture;
+  absl::string_view remaining;
 
   // Parse "node_name"
   if (Scanner(func_def_input)
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index 9b6c1f690b..a81d8e7bcd 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/utils.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -371,16 +372,17 @@ BM_NodePositionIfSameNode("^foo/bar/baz", "foo/bar/baz", Match_Ctrl);
 BM_NodePositionIfSameNode("blah", "foo/bar/baz", NoMatch_0);
 BM_NodePositionIfSameNode("foo/bar/baz/gnu", "foo/bar/baz", NoMatch_end);
 
-#define BM_ParseNodeNameAsStringPiece(I, NAME)                               \
-  static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) {              \
-    string input = I;                                                        \
-    for (int i = 0; i < iters; ++i) {                                        \
-      int position;                                                          \
-      const StringPiece name = ParseNodeNameAsStringPiece(input, &position); \
-      CHECK_GE(position, -1);                                                \
-      CHECK(!name.empty());                                                  \
-    }                                                                        \
-  }                                                                          \
+#define BM_ParseNodeNameAsStringPiece(I, NAME)                  \
+  static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) { \
+    string input = I;                                           \
+    for (int i = 0; i < iters; ++i) {                           \
+      int position;                                             \
+      const absl::string_view name =                            \
+          ParseNodeNameAsStringPiece(input, &position);         \
+      CHECK_GE(position, -1);                                   \
+      CHECK(!name.empty());                                     \
+    }                                                           \
+  }                                                             \
   BENCHMARK(BM_ParseNodeNameAsStringPiece_##NAME)
 
 BM_ParseNodeNameAsStringPiece("foo", foo);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 1f401b257b..5567288f1e 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -578,6 +578,7 @@ ARRAY_DEPS = [
     ":gather_functor",
     ":ops_util",
     ":transpose_functor",
+    "@com_google_absl//absl/strings",
     "//tensorflow/core:array_grad",
     "//tensorflow/core:array_ops_op_lib",
     "//tensorflow/core:core_cpu",
@@ -1745,6 +1746,7 @@ DATA_FLOW_DEPS = [
     ":tensor_array",
     ":typed_conditional_accumulator_base",
     ":typed_queue",
+    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:core_cpu",
     "//tensorflow/core:data_flow_ops_op_lib",
@@ -2104,6 +2106,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:list_ops_op_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2187,6 +2190,7 @@ IMAGE_DEPS = [
     ":bounds_check",
     ":eigen_helpers",
     ":image_resizer_state",
+    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:gif_internal",
@@ -2618,6 +2622,7 @@ tf_cc_tests(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/util/tensor_bundle",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3170,6 +3175,7 @@ tf_cc_tests(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3316,6 +3322,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3337,6 +3344,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3461,6 +3469,7 @@ tf_kernel_library(
         ":image_resizer_state",
         ":fill_functor",
         ":ops_util",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -3925,6 +3934,7 @@ cc_library(
 )
 
 PARSING_DEPS = [
+    "@com_google_absl//absl/strings",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:parsing_ops_op_lib",
@@ -4110,6 +4120,7 @@ cc_library(
 )
 
 SPARSE_DEPS = [
+    "@com_google_absl//absl/strings",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:sparse_ops_op_lib",
@@ -4528,6 +4539,7 @@ cc_library(
 STRING_DEPS = [
     ":bounds_check",
     ":string_util",
+    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
@@ -4550,7 +4562,7 @@ tf_kernel_library(
 tf_kernel_library(
     name = "string_format_op",
     prefix = "string_format_op",
-    deps = STRING_DEPS + ["@com_google_absl//absl/strings"],
+    deps = STRING_DEPS,
 )
 
 tf_cc_test(
@@ -4803,6 +4815,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:word2vec_ops",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4841,6 +4854,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -6324,6 +6338,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/conv_grad_ops.cc b/tensorflow/core/kernels/conv_grad_ops.cc
index 507720c998..00abebd895 100644
--- a/tensorflow/core/kernels/conv_grad_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // See docs in ../ops/nn_ops.cc.
 
+#include "absl/strings/string_view.h"
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
@@ -56,7 +57,7 @@ int ConvBackpropDimensions::SpatialPadding(const Padding& padding,
 // while the original version only handles the cases where dilation_rates equal
 // to 1.
 Status ConvBackpropExtractAndVerifyDimensionV2(
-    StringPiece label, const TensorShape& input_shape,
+    absl::string_view label, const TensorShape& input_shape,
     const TensorShape& filter_shape, const TensorShape& output_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, int spatial_dim, int filter_spatial_dim,
@@ -95,7 +96,7 @@ Status ConvBackpropExtractAndVerifyDimensionV2(
 }
 
 Status ConvBackpropExtractAndVerifyDimension(
-    StringPiece label, const TensorShape& input_shape,
+    absl::string_view label, const TensorShape& input_shape,
     const TensorShape& filter_shape, const TensorShape& output_shape,
     const std::vector<int32>& strides, Padding padding, int spatial_dim,
     int filter_spatial_dim, ConvBackpropSpatialDimension* dim) {
@@ -106,8 +107,9 @@ Status ConvBackpropExtractAndVerifyDimension(
 }
 
 Status ConvBackpropComputeDimensionsV2(
-    StringPiece label, int num_spatial_dims, const TensorShape& input_shape,
-    const TensorShape& filter_shape, const TensorShape& out_backprop_shape,
+    absl::string_view label, int num_spatial_dims,
+    const TensorShape& input_shape, const TensorShape& filter_shape,
+    const TensorShape& out_backprop_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
   // The + 2 in the following line is for the batch and feature dimensions.
@@ -159,13 +161,11 @@ Status ConvBackpropComputeDimensionsV2(
   return Status::OK();
 }
 
-Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims,
-                                     const TensorShape& input_shape,
-                                     const TensorShape& filter_shape,
-                                     const TensorShape& out_backprop_shape,
-                                     const std::vector<int32>& strides,
-                                     Padding padding, TensorFormat data_format,
-                                     ConvBackpropDimensions* dims) {
+Status ConvBackpropComputeDimensions(
+    absl::string_view label, int num_spatial_dims,
+    const TensorShape& input_shape, const TensorShape& filter_shape,
+    const TensorShape& out_backprop_shape, const std::vector<int32>& strides,
+    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
   static constexpr std::array<int32, 5> one_dilations = {{1, 1, 1, 1, 1}};
   return ConvBackpropComputeDimensionsV2(
       label, num_spatial_dims, input_shape, filter_shape, out_backprop_shape,
diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h
index 9551959463..24a4deb8d3 100644
--- a/tensorflow/core/kernels/conv_grad_ops.h
+++ b/tensorflow/core/kernels/conv_grad_ops.h
@@ -161,8 +161,8 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -249,19 +249,18 @@ struct ConvBackpropDimensions {
 // Common code between implementations of Conv?DBackpropInput and
 // Conv?DBackpropFilter. Verifies that the dimensions all match, and computes
 // sizes/padding for the spatial dimensions.
-Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims,
-                                     const TensorShape& input_shape,
-                                     const TensorShape& filter_shape,
-                                     const TensorShape& out_backprop_shape,
-                                     const std::vector<int32>& strides,
-                                     Padding padding, TensorFormat data_format,
-                                     ConvBackpropDimensions* dims);
+Status ConvBackpropComputeDimensions(
+    absl::string_view label, int num_spatial_dims,
+    const TensorShape& input_shape, const TensorShape& filter_shape,
+    const TensorShape& out_backprop_shape, const std::vector<int32>& strides,
+    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims);
 
 // The V2 version computes the same outputs with arbitrary dilation rate.
 // TODO(b/67112639): Merge V2 versions and the original versions eventually.
 Status ConvBackpropComputeDimensionsV2(
-    StringPiece label, int num_spatial_dims, const TensorShape& input_shape,
-    const TensorShape& filter_shape, const TensorShape& out_backprop_shape,
+    absl::string_view label, int num_spatial_dims,
+    const TensorShape& input_shape, const TensorShape& filter_shape,
+    const TensorShape& out_backprop_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims);
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 37c1c54786..6f6ba6943b 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -42,6 +42,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -639,6 +640,7 @@ tf_kernel_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:session_options",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -680,6 +682,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/util/tensor_bundle",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -794,6 +797,7 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/kernels:ops_util",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index f2419db3dc..ced5fb1971 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/dataset.h"
@@ -339,7 +340,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
           if (dataset()->env_->FileExists(lockfile_).ok()) {
             // Attempt to read the contents of the lockfile.
             char contents_scratch[151] = {0};  // Initialize all to 0.
-            StringPiece contents;
+            absl::string_view contents;
             std::unique_ptr<RandomAccessFile> file;
             if (dataset()->env_->NewRandomAccessFile(lockfile_, &file).ok()) {
               file->Read(0, 150, &contents, contents_scratch).IgnoreError();
@@ -458,7 +459,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
               *end_of_sequence = true;
               return Status::OK();
             }
-            StringPiece key = reader_.key();
+            absl::string_view key = reader_.key();
             DCHECK_EQ(key, dataset()->FormatName(cur_index_, i));
             TF_RETURN_IF_ERROR(reader_.ReadCurrent(&(*out_tensors)[i]));
             TF_RETURN_IF_ERROR(reader_.status());
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index a40f7f2146..f6e45bf0f5 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -68,8 +69,8 @@ std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
-    std::unique_ptr<IteratorBase>* out_iterator) {
+    int64 thread_index, CapturedFunction* captured_func,
+    absl::string_view prefix, std::unique_ptr<IteratorBase>* out_iterator) {
   std::vector<Tensor> return_values;
 
   TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index d777062293..fbc26872f3 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 #define TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
@@ -44,8 +45,8 @@ std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
-    std::unique_ptr<IteratorBase>* out_iterator);
+    int64 thread_index, CapturedFunction* captured_func,
+    absl::string_view prefix, std::unique_ptr<IteratorBase>* out_iterator);
 
 // Returns Status::OK() if `expected` and `received` types match,
 // errors::InvalidArgument otherwise.
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 441bdc2898..07437e0c5f 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -66,6 +66,7 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
index 7451ca4cb1..8114a5cf6e 100644
--- a/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 // See docs in ../ops/parsing_ops.cc.
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op.h"
@@ -379,7 +380,7 @@ class CSVDatasetOp : public DatasetOpKernel {
             // Reached EOF, and last field is empty
             *end_of_record = true;
             if (include) {
-              return FieldToOutput(ctx, StringPiece(), out_tensors);
+              return FieldToOutput(ctx, absl::string_view(), out_tensors);
             } else {
               return Status::OK();
             }
@@ -460,8 +461,9 @@ class CSVDatasetOp : public DatasetOpKernel {
               if (errors::IsOutOfRange(s)) {
                 // This was the last field. We are done
                 *end_of_record = true;
-                parse_result.Update(QuotedFieldToOutput(
-                    ctx, StringPiece(), out_tensors, earlier_pieces, include));
+                parse_result.Update(
+                    QuotedFieldToOutput(ctx, absl::string_view(), out_tensors,
+                                        earlier_pieces, include));
                 return parse_result;
               } else if (!s.ok()) {
                 return s;
@@ -472,14 +474,14 @@ class CSVDatasetOp : public DatasetOpKernel {
             pos_++;
             if (next == dataset()->delim_) {
               parse_result.Update(QuotedFieldToOutput(
-                  ctx, StringPiece(&buffer_[start], pos_ - 1 - start),
+                  ctx, absl::string_view(&buffer_[start], pos_ - 1 - start),
                   out_tensors, earlier_pieces, include));
               return parse_result;
 
             } else if (next == '\n' || next == '\r') {
               *end_of_record = true;
               parse_result.Update(QuotedFieldToOutput(
-                  ctx, StringPiece(&buffer_[start], pos_ - 1 - start),
+                  ctx, absl::string_view(&buffer_[start], pos_ - 1 - start),
                   out_tensors, earlier_pieces, include));
               if (next == '\r') SkipNewLineIfNecessary();
               return parse_result;
@@ -500,7 +502,7 @@ class CSVDatasetOp : public DatasetOpKernel {
       // Converts quoted field to an output tensor, removing the starting
       // and ending quotes from it and unescaping double quotations if
       // necessary.
-      Status QuotedFieldToOutput(IteratorContext* ctx, StringPiece field,
+      Status QuotedFieldToOutput(IteratorContext* ctx, absl::string_view field,
                                  std::vector<Tensor>* out_tensors,
                                  const std::vector<Piece>& earlier_pieces,
                                  bool include) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
@@ -529,17 +531,17 @@ class CSVDatasetOp : public DatasetOpKernel {
         // the opening quotation mark of the quoted field.
         bool skip_next_quote = true;
         for (const Piece& p : earlier_pieces) {
-          AppendUnescapedPiece(StringPiece(&p.buffer[p.start], p.len),
+          AppendUnescapedPiece(absl::string_view(&p.buffer[p.start], p.len),
                                &field_complete, &skip_next_quote);
         }
         AppendUnescapedPiece(field, &field_complete, &skip_next_quote);
-        StringPiece result = StringPiece(field_complete);
+        absl::string_view result = absl::string_view(field_complete);
         result.remove_suffix(1);  // Skip final quote
 
         return FieldToOutput(ctx, result, out_tensors);
       }
 
-      void AppendUnescapedPiece(StringPiece piece, string* field_complete,
+      void AppendUnescapedPiece(absl::string_view piece, string* field_complete,
                                 bool* skip_next_quote) {
         size_t from = 0;
         size_t found = piece.find('\"', from);
@@ -578,8 +580,8 @@ class CSVDatasetOp : public DatasetOpKernel {
               // Whatever we have is the last field of the last record
               *end_of_record = true;
               parse_result.Update(UnquotedFieldToOutput(
-                  ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
-                  earlier_pieces, include));
+                  ctx, absl::string_view(&buffer_[start], pos_ - start),
+                  out_tensors, earlier_pieces, include));
               return parse_result;
             } else if (!s.ok()) {
               return s;  // Surface all other errors to caller
@@ -590,8 +592,8 @@ class CSVDatasetOp : public DatasetOpKernel {
 
           if (ch == dataset()->delim_) {
             parse_result.Update(UnquotedFieldToOutput(
-                ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
-                earlier_pieces, include));
+                ctx, absl::string_view(&buffer_[start], pos_ - start),
+                out_tensors, earlier_pieces, include));
             pos_++;
             return parse_result;
           }
@@ -599,8 +601,8 @@ class CSVDatasetOp : public DatasetOpKernel {
             // need special case to skip over first \n of record if the line
             // breaks are \r\n
             parse_result.Update(UnquotedFieldToOutput(
-                ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
-                earlier_pieces, include));
+                ctx, absl::string_view(&buffer_[start], pos_ - start),
+                out_tensors, earlier_pieces, include));
             *end_of_record = true;
             pos_++;
             if (ch == '\r') SkipNewLineIfNecessary();
@@ -630,7 +632,7 @@ class CSVDatasetOp : public DatasetOpKernel {
       }
 
       // Given a field, converts it to the right output tensor type
-      Status FieldToOutput(IteratorContext* ctx, StringPiece field,
+      Status FieldToOutput(IteratorContext* ctx, absl::string_view field,
                            std::vector<Tensor>* out_tensors) {
         size_t output_idx = out_tensors->size();
         if (output_idx >= dataset()->out_type_.size()) {
@@ -749,7 +751,8 @@ class CSVDatasetOp : public DatasetOpKernel {
       // Given a string field, and its index in the output,
       // converts it to a Tensor of the right type and adds it to the
       // out_tensors vector.
-      Status UnquotedFieldToOutput(IteratorContext* ctx, StringPiece field,
+      Status UnquotedFieldToOutput(IteratorContext* ctx,
+                                   absl::string_view field,
                                    std::vector<Tensor>* out_tensors,
                                    const std::vector<Piece>& earlier_pieces,
                                    bool include) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
diff --git a/tensorflow/core/kernels/data/experimental/indexed_dataset.h b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
index 27a8360cbc..66bdbc5593 100644
--- a/tensorflow/core/kernels/data/experimental/indexed_dataset.h
+++ b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
 #define TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
@@ -84,7 +85,8 @@ class IndexedDatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+                             const absl::string_view& argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 7a833668ac..fca61d4e8f 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/kernels/data/iterator_ops.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
 #include "tensorflow/core/common_runtime/threadpool_device.h"
@@ -213,19 +214,19 @@ class VariantTensorDataReader : public IteratorStateReader {
   // pre-processing did not have errors.
   Status status() const { return status_; }
 
-  Status ReadScalar(StringPiece key, int64* val) override {
+  Status ReadScalar(absl::string_view key, int64* val) override {
     return ReadScalarInternal(key, val);
   }
 
-  Status ReadScalar(StringPiece key, string* val) override {
+  Status ReadScalar(absl::string_view key, string* val) override {
     return ReadScalarInternal(key, val);
   }
 
-  Status ReadTensor(StringPiece key, Tensor* val) override {
+  Status ReadTensor(absl::string_view key, Tensor* val) override {
     return ReadTensorInternal(key, val);
   }
 
-  bool Contains(StringPiece key) override {
+  bool Contains(absl::string_view key) override {
     return map_.find(string(key)) != map_.end();
   }
 
@@ -246,7 +247,7 @@ class VariantTensorDataReader : public IteratorStateReader {
   }
 
   template <typename T>
-  Status ReadScalarInternal(StringPiece key, T* val) {
+  Status ReadScalarInternal(absl::string_view key, T* val) {
     if (map_.find(string(key)) == map_.end()) {
       return errors::NotFound(key);
     }
@@ -254,7 +255,7 @@ class VariantTensorDataReader : public IteratorStateReader {
     return Status::OK();
   }
 
-  Status ReadTensorInternal(StringPiece key, Tensor* val) {
+  Status ReadTensorInternal(absl::string_view key, Tensor* val) {
     if (map_.find(string(key)) == map_.end()) {
       return errors::NotFound(key);
     }
@@ -273,15 +274,15 @@ class VariantTensorDataWriter : public IteratorStateWriter {
   // Does not take ownership of data.
   explicit VariantTensorDataWriter(VariantTensorData* data) : data_(data) {}
 
-  Status WriteScalar(StringPiece key, const int64 val) override {
+  Status WriteScalar(absl::string_view key, const int64 val) override {
     return WriteScalarInternal(key, val);
   }
 
-  Status WriteScalar(StringPiece key, const string& val) override {
+  Status WriteScalar(absl::string_view key, const string& val) override {
     return WriteScalarInternal(key, val);
   }
 
-  Status WriteTensor(StringPiece key, const Tensor& val) override {
+  Status WriteTensor(absl::string_view key, const Tensor& val) override {
     return WriteTensorInternal(key, val);
   }
 
@@ -297,13 +298,13 @@ class VariantTensorDataWriter : public IteratorStateWriter {
 
  private:
   template <typename T>
-  Status WriteScalarInternal(StringPiece key, const T& val) {
+  Status WriteScalarInternal(absl::string_view key, const T& val) {
     Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
     val_t.scalar<T>()() = val;
     return WriteTensorInternal(key, val_t);
   }
 
-  Status WriteTensorInternal(StringPiece key, const Tensor& val) {
+  Status WriteTensorInternal(absl::string_view key, const Tensor& val) {
     // Write key to the metadata proto. This gets written to `data_`
     // when `Flush()` is called. We do this lazily to avoid multiple
     // serialization calls.
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index 3f76695bb1..b3f7ab9fd7 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/kernels/data/dataset_utils.h"
@@ -36,7 +37,8 @@ class ToTFRecordOp : public AsyncOpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+                             const absl::string_view& argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index ae451be7e2..94c21f4da3 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -54,7 +55,7 @@ class DecodeBmpOp : public OpKernel {
                                         contents.shape().DebugString()));
 
     // Start decoding image to get shape details
-    const StringPiece input = contents.scalar<string>()();
+    const absl::string_view input = contents.scalar<string>()();
 
     OP_REQUIRES(context, (32 <= input.size()),
                 errors::InvalidArgument("Incomplete bmp content, requires at "
diff --git a/tensorflow/core/kernels/decode_csv_op.cc b/tensorflow/core/kernels/decode_csv_op.cc
index 6bfb5bd5bc..d2999102b6 100644
--- a/tensorflow/core/kernels/decode_csv_op.cc
+++ b/tensorflow/core/kernels/decode_csv_op.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // See docs in ../ops/parsing_ops.cc.
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -82,7 +83,7 @@ class DecodeCSVOp : public OpKernel {
     }
 
     for (int64 i = 0; i < records_size; ++i) {
-      const StringPiece record(records_t(i));
+      const absl::string_view record(records_t(i));
       std::vector<string> fields;
       ExtractFields(ctx, record, &fields);
       OP_REQUIRES(ctx, fields.size() == out_type_.size(),
@@ -205,7 +206,7 @@ class DecodeCSVOp : public OpKernel {
   bool select_all_cols_;
   string na_value_;
 
-  void ExtractFields(OpKernelContext* ctx, StringPiece input,
+  void ExtractFields(OpKernelContext* ctx, absl::string_view input,
                      std::vector<string>* result) {
     int64 current_idx = 0;
     int64 num_fields_parsed = 0;
diff --git a/tensorflow/core/kernels/decode_image_op.cc b/tensorflow/core/kernels/decode_image_op.cc
index 2cafa44f37..ed1b85042a 100644
--- a/tensorflow/core/kernels/decode_image_op.cc
+++ b/tensorflow/core/kernels/decode_image_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -39,7 +40,7 @@ enum FileFormat {
 };
 
 // Classify the contents of a file based on starting bytes (the magic number).
-FileFormat ClassifyFileFormat(StringPiece data) {
+FileFormat ClassifyFileFormat(absl::string_view data) {
   // The 4th byte of JPEG is '\xe0' or '\xe1', so check just the first three
   if (str_util::StartsWith(data, "\xff\xd8\xff")) return kJpgFormat;
   if (str_util::StartsWith(data, "\x89PNG\r\n\x1a\n")) return kPngFormat;
@@ -47,7 +48,7 @@ FileFormat ClassifyFileFormat(StringPiece data) {
   return kUnknownFormat;
 }
 
-string FileFormatString(FileFormat magic, StringPiece data) {
+string FileFormatString(FileFormat magic, absl::string_view data) {
   switch (magic) {
     case kPngFormat:
       return "PNG";
@@ -152,7 +153,7 @@ class DecodeImageOp : public OpKernel {
                                         contents.shape().DebugString()));
 
     // Determine format
-    const StringPiece input = contents.scalar<string>()();
+    const absl::string_view input = contents.scalar<string>()();
     const auto magic = ClassifyFileFormat(input);
     OP_REQUIRES(
         context,
@@ -183,7 +184,7 @@ class DecodeImageOp : public OpKernel {
     }
   }
 
-  void DecodeJpeg(OpKernelContext* context, StringPiece input) {
+  void DecodeJpeg(OpKernelContext* context, absl::string_view input) {
     OP_REQUIRES(context, channels_ == 0 || channels_ == 1 || channels_ == 3,
                 errors::InvalidArgument(
                     "channels must be 0, 1, or 3 for JPEG, got ", channels_));
@@ -231,7 +232,7 @@ class DecodeImageOp : public OpKernel {
                                 input.size()));
   }
 
-  void DecodePng(OpKernelContext* context, StringPiece input) {
+  void DecodePng(OpKernelContext* context, absl::string_view input) {
     // Start decoding png to get shape details
     png::DecodeContext decode;
     OP_REQUIRES(context,
@@ -287,7 +288,7 @@ class DecodeImageOp : public OpKernel {
     }
   }
 
-  void DecodeGif(OpKernelContext* context, StringPiece input) {
+  void DecodeGif(OpKernelContext* context, absl::string_view input) {
     OP_REQUIRES(context, channels_ == 0 || channels_ == 3,
                 errors::InvalidArgument("channels must be 0 or 3 for GIF, got ",
                                         channels_));
diff --git a/tensorflow/core/kernels/deep_conv2d.cc b/tensorflow/core/kernels/deep_conv2d.cc
index 1aa8c72d66..8d6ab69f62 100644
--- a/tensorflow/core/kernels/deep_conv2d.cc
+++ b/tensorflow/core/kernels/deep_conv2d.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
@@ -81,7 +82,7 @@ static int64 GetDirectConvCost(int filter_rows, int filter_cols, int in_depth,
 static bool ReadBoolFromEnvVar(const char* env_var_name, bool default_val) {
   const char* tf_env_var_val = getenv(env_var_name);
   if (tf_env_var_val != nullptr) {
-    StringPiece tf_env_var_val_str(tf_env_var_val);
+    absl::string_view tf_env_var_val_str(tf_env_var_val);
     if (tf_env_var_val_str == "0") {
       return false;
     }
diff --git a/tensorflow/core/kernels/extract_jpeg_shape_op.cc b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
index 60d798af56..e36f8c37e7 100644
--- a/tensorflow/core/kernels/extract_jpeg_shape_op.cc
+++ b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -41,7 +42,7 @@ class ExtractJpegShapeOp : public OpKernel {
     OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()),
                 errors::InvalidArgument("contents must be scalar, got shape ",
                                         contents.shape().DebugString()));
-    const StringPiece input = contents.scalar<string>()();
+    const absl::string_view input = contents.scalar<string>()();
     OP_REQUIRES(context, input.size() <= std::numeric_limits<int>::max(),
                 errors::InvalidArgument("JPEG contents are too large for int: ",
                                         input.size()));
diff --git a/tensorflow/core/kernels/gpu_utils.h b/tensorflow/core/kernels/gpu_utils.h
index 86146f75f4..a28247c1f4 100644
--- a/tensorflow/core/kernels/gpu_utils.h
+++ b/tensorflow/core/kernels/gpu_utils.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
 #define TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
 
+#include "absl/strings/string_view.h"
 #if GOOGLE_CUDA
 
 #include <unordered_map>
@@ -120,7 +121,7 @@ class AutoTuneMap {
     }
   };
 
-  string GetActionSummary(StringPiece action, const Parameters& params,
+  string GetActionSummary(absl::string_view action, const Parameters& params,
                           const Config& config) {
     return strings::Printf("autotune_map %s %s: %s -> (%s)", name_.c_str(),
                            string(action).c_str(), params.ToString().c_str(),
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD
index 4870d9ae20..f332c1fbe2 100644
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ b/tensorflow/core/kernels/hexagon/BUILD
@@ -40,6 +40,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:remote_fused_graph_ops",
         "//tensorflow/core/kernels:reshape_op",
         "//tensorflow/core/kernels:softmax_op",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -70,6 +71,7 @@ tf_kernel_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:remote_fused_graph_execute_utils",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
index 40bf5a4dc7..655b4b255d 100644
--- a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
 
@@ -153,7 +154,7 @@ GraphTransferUtils::BuildRemoteFusedGraphExecuteInfo(
                      .Attr("Tinputs", input_types)
                      .Attr("Toutputs", output_types)
                      .Attr("serialized_remote_fused_graph_execute_info",
-                           StringPiece(execute_info.SerializeAsString()));
+                           absl::string_view(execute_info.SerializeAsString()));
   CHECK(scope.ok());
   scope.UpdateBuilder(&builder);
   scope.UpdateStatus(builder.Finalize(scope.graph(), &node));
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.cc b/tensorflow/core/kernels/hexagon/graph_transferer.cc
index 477e729dcb..345d228561 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <cinttypes>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -587,7 +588,7 @@ bool GraphTransferer::HasPaddingAndStrides(const Node& node) {
 }
 
 bool GraphTransferer::NeedsToAddRank(const Node& node) {
-  const StringPiece op_type(node.type_string());
+  const absl::string_view op_type(node.type_string());
   if (op_type == "Transpose" || op_type == "ExpandDims") {
     return true;
   }
@@ -595,7 +596,7 @@ bool GraphTransferer::NeedsToAddRank(const Node& node) {
 }
 
 bool GraphTransferer::IsPadNode(const Node& node) {
-  const StringPiece op_type(node.type_string());
+  const absl::string_view op_type(node.type_string());
   if (op_type == "Pad") {
     return true;
   }
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
index 765795b1f4..5d1734e477 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -99,7 +100,7 @@ static Output BuildSoftmaxOps(const Scope& scope, const Input& logits) {
 static Output BuildConv2DOps(const Scope& scope, const Input& input,
                              const Input& filter,
                              const gtl::ArraySlice<int>& strides,
-                             const StringPiece& padding) {
+                             const absl::string_view& padding) {
   EXPECT_TRUE(scope.ok());
   auto _input = ops::AsNodeOut(scope, input);
   EXPECT_TRUE(scope.ok());
@@ -123,7 +124,7 @@ static Output BuildConv2DOps(const Scope& scope, const Input& input,
 static Output BuildMaxPoolOps(const Scope& scope, const Input& input,
                               const gtl::ArraySlice<int>& ksize,
                               const gtl::ArraySlice<int>& strides,
-                              const StringPiece& padding) {
+                              const absl::string_view& padding) {
   EXPECT_TRUE(scope.ok());
   auto _input = ops::AsNodeOut(scope, input);
   EXPECT_TRUE(scope.ok());
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
index cc469f6dba..9949b3d89c 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -410,7 +411,7 @@ Status HexagonControlWrapper::FuseRemoteGraph(
 
 bool HexagonControlWrapper::FillInputNode(const string& node_name,
                                           const Tensor& tensor) {
-  StringPiece tensor_data = tensor.tensor_data();
+  absl::string_view tensor_data = tensor.tensor_data();
   const ConstByteArray ba =
       ConstByteArray(reinterpret_cast<const uint8*>(tensor_data.data()),
                      tensor_data.size(), tensor.dtype());
diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc
index b2dc16d5d7..7a23fef135 100644
--- a/tensorflow/core/kernels/immutable_constant_op_test.cc
+++ b/tensorflow/core/kernels/immutable_constant_op_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <algorithm>
 #include <tuple>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -64,7 +65,7 @@ class TestFileSystem : public NullFileSystem {
       const string& fname,
       std::unique_ptr<ReadOnlyMemoryRegion>* result) override {
     float val = 0;
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(fname, &scheme, &host, &path);
     // For the tests create in-memory regions with float values equal to the
     // region name.
@@ -148,8 +149,8 @@ Status CreateTempFile(Env* env, float value, uint64 size, string* filename) {
   std::unique_ptr<WritableFile> file;
   TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file));
   for (uint64 i = 0; i < size; ++i) {
-    StringPiece sp(static_cast<char*>(static_cast<void*>(&value)),
-                   sizeof(value));
+    absl::string_view sp(static_cast<char*>(static_cast<void*>(&value)),
+                         sizeof(value));
     TF_RETURN_IF_ERROR(file->Append(sp));
   }
   TF_RETURN_IF_ERROR(file->Close());
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 2088c13586..ed95793285 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <limits>
+#include "absl/strings/string_view.h"
 
 #define EIGEN_USE_THREADS
 #if GOOGLE_CUDA
@@ -99,7 +100,7 @@ bool TensorList::Decode(const VariantTensorData& data) {
   string metadata;
   data.get_metadata(&metadata);
   uint64 scratch;
-  StringPiece iter(metadata);
+  absl::string_view iter(metadata);
   core::GetVarint64(&iter, &scratch);
   element_dtype = static_cast<DataType>(scratch);
   std::vector<int64> dims;
diff --git a/tensorflow/core/kernels/reduce_join_op.cc b/tensorflow/core/kernels/reduce_join_op.cc
index e2a3b861e5..6a571dfca2 100644
--- a/tensorflow/core/kernels/reduce_join_op.cc
+++ b/tensorflow/core/kernels/reduce_join_op.cc
@@ -17,13 +17,13 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
@@ -160,7 +160,7 @@ class ReduceJoinOp : public OpKernel {
 
     const int64 reduction_iter_size =
         GetReductionIterSize(reduced_indices, input_shape);
-    gtl::InlinedVector<StringPiece, 8> curr_strings(reduction_iter_size);
+    gtl::InlinedVector<absl::string_view, 8> curr_strings(reduction_iter_size);
     for (int64 output_index = 0; output_index < output_shape.num_elements();
          ++output_index) {
       int64 output_full_index = LinearSubIndexToFullIndex(
diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc b/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
index ec769d41f9..c0ae6eb4b8 100644
--- a/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
+++ b/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/const_op.h"
@@ -117,7 +118,7 @@ static Output BuildRemoteFusedGraphExecuteOp(
                      .Attr("Tinputs", input_types)
                      .Attr("Toutputs", output_types)
                      .Attr("serialized_remote_fused_graph_execute_info",
-                           StringPiece(execute_info.SerializeAsString()));
+                           absl::string_view(execute_info.SerializeAsString()));
   CHECK(scope.ok());
   scope.UpdateBuilder(&builder);
   scope.UpdateStatus(builder.Finalize(scope.graph(), &ret));
diff --git a/tensorflow/core/kernels/restore_v2_op_test.cc b/tensorflow/core/kernels/restore_v2_op_test.cc
index 36631570c7..2b67984564 100644
--- a/tensorflow/core/kernels/restore_v2_op_test.cc
+++ b/tensorflow/core/kernels/restore_v2_op_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -57,7 +58,7 @@ class RestoreV2OpTest : public OpsTestBase {
     TF_ASSERT_OK(InitOp());
   }
 
-  void RunTest(StringPiece save_op_to_use) {
+  void RunTest(absl::string_view save_op_to_use) {
     const string filename =
         io::JoinPath(testing::TmpDir(), "tensor_simple-", save_op_to_use);
     const std::vector<string> tensor_names = {
diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc
index 30cb1e0a7f..f720ec9926 100644
--- a/tensorflow/core/kernels/shape_op_test.cc
+++ b/tensorflow/core/kernels/shape_op_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/ops/array_ops.h"
 #include "tensorflow/cc/ops/const_op.h"
@@ -63,7 +64,7 @@ REGISTER_UNARY_VARIANT_DECODE_FUNCTION(KnownVecSize, "KNOWN VECTOR SIZE TYPE");
 
 REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(KnownVecSize, GetShapeFromKnownVecSize);
 
-static void ExpectHasError(const Status& s, StringPiece substr) {
+static void ExpectHasError(const Status& s, absl::string_view substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << ">>" << s << "<<, expected substring >>" << substr << "<<";
 }
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index 4ebb7fbcc7..644a75501f 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -26,7 +27,6 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -92,8 +92,8 @@ string SparseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-StringPiece SparseTensorColumn<StringPiece>::Feature(int64 batch,
-                                                     int64 n) const {
+absl::string_view SparseTensorColumn<absl::string_view>::Feature(
+    int64 batch, int64 n) const {
   const int64 start = feature_start_indices_[batch];
   return values_.vec<string>().data()[start + n];
 }
@@ -130,8 +130,8 @@ string DenseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-StringPiece DenseTensorColumn<StringPiece>::Feature(int64 batch,
-                                                    int64 n) const {
+absl::string_view DenseTensorColumn<absl::string_view>::Feature(int64 batch,
+                                                                int64 n) const {
   return tensor_.matrix<string>()(batch, n);
 }
 
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
index 29577ebb4e..7098983fde 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
@@ -32,7 +33,7 @@ namespace tensorflow {
 
 namespace {
 
-static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
+static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/core/kernels/sparse_reduce_op.cc b/tensorflow/core/kernels/sparse_reduce_op.cc
index a465564739..0908979fa2 100644
--- a/tensorflow/core/kernels/sparse_reduce_op.cc
+++ b/tensorflow/core/kernels/sparse_reduce_op.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // See docs in ../ops/sparse_ops.cc.
 
+#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/framework/op_kernel.h"
@@ -135,7 +136,7 @@ struct SumOp {
   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
       s.device(ctx->eigen_cpu_device()) = v.sum();
   }
-  static StringPiece Name() {
+  static absl::string_view Name() {
       return "sum";
   }
 };
@@ -145,7 +146,7 @@ struct MaxOp {
   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
       s.device(ctx->eigen_cpu_device()) = v.maximum();
   }
-  static StringPiece Name() {
+  static absl::string_view Name() {
       return "max";
   }
 };
diff --git a/tensorflow/core/kernels/spectrogram_test_utils.cc b/tensorflow/core/kernels/spectrogram_test_utils.cc
index 872a6e9d1b..227f225d90 100644
--- a/tensorflow/core/kernels/spectrogram_test_utils.cc
+++ b/tensorflow/core/kernels/spectrogram_test_utils.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <math.h>
 #include <stddef.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -175,8 +176,9 @@ bool WriteDoubleVectorToFile(const string& file_name,
     return false;
   }
   for (int i = 0; i < data.size(); ++i) {
-    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
-                                  sizeof(data[i])))
+    if (!file
+             ->Append(absl::string_view(
+                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -197,8 +199,9 @@ bool WriteFloatVectorToFile(const string& file_name,
     return false;
   }
   for (int i = 0; i < data.size(); ++i) {
-    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
-                                  sizeof(data[i])))
+    if (!file
+             ->Append(absl::string_view(
+                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -219,8 +222,9 @@ bool WriteDoubleArrayToFile(const string& file_name, int size,
     return false;
   }
   for (int i = 0; i < size; ++i) {
-    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
-                                  sizeof(data[i])))
+    if (!file
+             ->Append(absl::string_view(
+                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -241,8 +245,9 @@ bool WriteFloatArrayToFile(const string& file_name, int size,
     return false;
   }
   for (int i = 0; i < size; ++i) {
-    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
-                                  sizeof(data[i])))
+    if (!file
+             ->Append(absl::string_view(
+                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -266,16 +271,18 @@ bool WriteComplexVectorToRawFloatFile(
   for (int i = 0; i < data.size(); ++i) {
     for (int j = 0; j < data[i].size(); ++j) {
       const float real_part(real(data[i][j]));
-      if (!file->Append(StringPiece(reinterpret_cast<const char*>(&real_part),
-                                    sizeof(real_part)))
+      if (!file->Append(
+                   absl::string_view(reinterpret_cast<const char*>(&real_part),
+                                     sizeof(real_part)))
                .ok()) {
         LOG(ERROR) << "Failed to append to file " << file_name;
         return false;
       }
 
       const float imag_part(imag(data[i][j]));
-      if (!file->Append(StringPiece(reinterpret_cast<const char*>(&imag_part),
-                                    sizeof(imag_part)))
+      if (!file->Append(
+                   absl::string_view(reinterpret_cast<const char*>(&imag_part),
+                                     sizeof(imag_part)))
                .ok()) {
         LOG(ERROR) << "Failed to append to file " << file_name;
         return false;
diff --git a/tensorflow/core/kernels/string_join_op.cc b/tensorflow/core/kernels/string_join_op.cc
index 28cca9f448..3cbb4c7249 100644
--- a/tensorflow/core/kernels/string_join_op.cc
+++ b/tensorflow/core/kernels/string_join_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -62,7 +63,7 @@ class StringJoinOp : public OpKernel {
                                                      &output_tensor));
     auto output_flat = output_tensor->flat<string>();
 
-    std::vector<StringPiece> strings(input_list.size());
+    std::vector<absl::string_view> strings(input_list.size());
     for (size_t i = 0; i < input_shape.num_elements(); ++i) {
       for (int j = 0; j < input_list.size(); ++j) {
         strings[j] = (is_scalar[j]) ? inputs[j](0) : inputs[j](i);
diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc
index 3884370a6c..82ac01caea 100644
--- a/tensorflow/core/kernels/string_split_op.cc
+++ b/tensorflow/core/kernels/string_split_op.cc
@@ -17,12 +17,12 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -36,11 +36,11 @@ namespace {
 template <typename Predicate>
 std::vector<StringPiece> SplitOnChar(const string& str, const char delim,
                                      Predicate p) {
-  std::vector<StringPiece> result;
-  StringPiece text(str);
+  std::vector<absl::string_view> result;
+  absl::string_view text(str);
   auto f = text.find(delim);
-  while (f != StringPiece::npos) {
-    StringPiece token = text.substr(0, f);
+  while (f != absl::string_view::npos) {
+    absl::string_view token = text.substr(0, f);
     if (p(token)) {
       result.emplace_back(token);
     }
@@ -60,13 +60,14 @@ std::vector<StringPiece> SplitOnChar(const string& str, const char delim,
 template <typename Predicate>
 std::vector<StringPiece> SplitOnCharSet(const string& str,
                                         const string& delim_set, Predicate p) {
-  std::vector<StringPiece> result;
-  StringPiece text(str);
-  StringPiece delims(delim_set);
+  std::vector<absl::string_view> result;
+  absl::string_view text(str);
+  absl::string_view delims(delim_set);
   size_t token_start = 0;
   for (size_t i = 0; i < text.size() + 1; i++) {
-    if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
-      StringPiece token(text.data() + token_start, i - token_start);
+    if ((i == text.size()) ||
+        (delims.find(text[i]) != absl::string_view::npos)) {
+      absl::string_view token(text.data() + token_start, i - token_start);
       if (p(token)) {
         result.emplace_back(token);
       }
@@ -83,13 +84,13 @@ template <typename Predicate>
 std::vector<StringPiece> Split(const string& str, const string& delimiter,
                                Predicate predicate) {
   if (str.empty()) {
-    return std::vector<StringPiece>();
+    return std::vector<absl::string_view>();
   }
   if (delimiter.empty()) {
-    std::vector<StringPiece> result;
+    std::vector<absl::string_view> result;
     result.resize(str.size());
     for (size_t i = 0; i < str.size(); ++i) {
-      result[i] = StringPiece(str.data() + i, 1);
+      result[i] = absl::string_view(str.data() + i, 1);
     }
     return result;
   }
@@ -99,8 +100,8 @@ std::vector<StringPiece> Split(const string& str, const string& delimiter,
   return SplitOnCharSet(str, delimiter, predicate);
 }
 
-std::vector<StringPiece> SplitV2(const string& str, StringPiece sep,
-                                 int maxsplit) {
+std::vector<absl::string_view> SplitV2(const string& str, absl::string_view sep,
+                                       int maxsplit) {
   // This SplitV2 method matches the behavior of python's str.split:
   //   If sep is given, consecutive delimiters are not grouped together
   //   and are deemed to delimit empty strings (for example, '1,,2'.split(',')
@@ -115,16 +116,16 @@ std::vector<StringPiece> SplitV2(const string& str, StringPiece sep,
   //   splitting an empty string or a string consisting of just whitespace
   //   with a None separator returns [].
 
-  std::vector<StringPiece> result;
+  std::vector<absl::string_view> result;
 
-  StringPiece text(str);
+  absl::string_view text(str);
   if (maxsplit == 0) {
     result.emplace_back(text);
     return result;
   }
 
   if (sep.empty()) {
-    StringPiece token;
+    absl::string_view token;
     // Remove leading whitespaces.
     str_util::RemoveLeadingWhitespace(&text);
     int split = 0;
@@ -142,13 +143,13 @@ std::vector<StringPiece> SplitV2(const string& str, StringPiece sep,
   auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
   int split = 0;
   while (p != text.end()) {
-    StringPiece token = text.substr(0, p - text.begin());
+    absl::string_view token = text.substr(0, p - text.begin());
     result.push_back(token);
     text.remove_prefix(token.size());
     text.remove_prefix(sep.size());
     ++split;
     if (maxsplit > 0 && split == maxsplit) {
-      result.push_back(StringPiece(text));
+      result.push_back(absl::string_view(text));
       return result;
     }
     p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
@@ -190,7 +191,7 @@ class StringSplitOp : public OpKernel {
     const auto delimiter_vec = delimiter_tensor->flat<string>();
     const string& delimiter = delimiter_vec(0);
     // Empty delimiter means split the input character by character.
-    std::vector<StringPiece> tokens;
+    std::vector<absl::string_view> tokens;
     // Guess that we'll be unpacking a handful of tokens per example.
     static constexpr int kReserveSize = 4;
     tokens.reserve(batch_size * kReserveSize);
@@ -199,7 +200,7 @@ class StringSplitOp : public OpKernel {
     int64 max_num_entries = 0;
     std::vector<int64> num_indices(batch_size);
     for (int64 i = 0; i < batch_size; ++i) {
-      std::vector<StringPiece> parts =
+      std::vector<absl::string_view> parts =
           skip_empty_ ? Split(input_vec(i), delimiter, str_util::SkipEmpty())
                       : Split(input_vec(i), delimiter, str_util::AllowEmpty());
       int64 n_entries = parts.size();
@@ -262,8 +263,8 @@ class StringSplitV2Op : public OpKernel {
                 errors::InvalidArgument("sep must be a scalar, got shape: ",
                                         sep_tensor->shape().DebugString()));
     const auto sep_vec = sep_tensor->flat<string>();
-    StringPiece sep(sep_vec(0));
-    std::vector<StringPiece> tokens;
+    absl::string_view sep(sep_vec(0));
+    std::vector<absl::string_view> tokens;
     // Guess that we'll be unpacking a handful of tokens per example.
     static constexpr int kReserveSize = 4;
     tokens.reserve(batch_size * kReserveSize);
@@ -272,7 +273,8 @@ class StringSplitV2Op : public OpKernel {
     int64 max_num_entries = 0;
     std::vector<int64> num_indices(batch_size);
     for (int64 i = 0; i < batch_size; ++i) {
-      std::vector<StringPiece> parts = SplitV2(input_vec(i), sep, maxsplit_);
+      std::vector<absl::string_view> parts =
+          SplitV2(input_vec(i), sep, maxsplit_);
       int64 n_entries = parts.size();
       num_indices[i] = n_entries;
       output_size += n_entries;
diff --git a/tensorflow/core/kernels/string_strip_op.cc b/tensorflow/core/kernels/string_strip_op.cc
index 544dca96ba..8b7b0bd250 100644
--- a/tensorflow/core/kernels/string_strip_op.cc
+++ b/tensorflow/core/kernels/string_strip_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -41,7 +42,7 @@ class StringStripOp : public OpKernel {
     auto output = output_tensor->flat<string>();
 
     for (int64 i = 0; i < input.size(); ++i) {
-      StringPiece entry(input(i));
+      absl::string_view entry(input(i));
       str_util::RemoveWhitespaceContext(&entry);
       output(i) = string(entry);
     }
diff --git a/tensorflow/core/kernels/string_to_hash_bucket_op.h b/tensorflow/core/kernels/string_to_hash_bucket_op.h
index 62ef35bbba..d2c13719e6 100644
--- a/tensorflow/core/kernels/string_to_hash_bucket_op.h
+++ b/tensorflow/core/kernels/string_to_hash_bucket_op.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -26,7 +27,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-template <uint64 hash(StringPiece)>
+template <uint64 hash(absl::string_view)>
 class StringToHashBucketOp : public OpKernel {
  public:
   explicit StringToHashBucketOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
diff --git a/tensorflow/core/kernels/string_util.h b/tensorflow/core/kernels/string_util.h
index d40e93ea33..0af3449d8d 100644
--- a/tensorflow/core/kernels/string_util.h
+++ b/tensorflow/core/kernels/string_util.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
 #define TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -49,7 +50,7 @@ int32 UTF8StrLen(const string& string);
 // the end of the string is reached before the requested characters, then the
 // position will point to the end of string and this function will return false.
 template <typename T>
-bool ForwardNUTF8CharPositions(const StringPiece in,
+bool ForwardNUTF8CharPositions(const absl::string_view in,
                                const T num_utf8_chars_to_shift, T* pos) {
   const size_t size = in.size();
   T utf8_chars_counted = 0;
@@ -70,7 +71,7 @@ bool ForwardNUTF8CharPositions(const StringPiece in,
 // the string is reached before the requested character, then the position will
 // point to the beginning of the string and this function will return false.
 template <typename T>
-bool BackNUTF8CharPositions(const StringPiece in,
+bool BackNUTF8CharPositions(const absl::string_view in,
                             const T num_utf8_chars_to_shift, T* pos) {
   const size_t start = 0;
   T utf8_chars_counted = 0;
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index 93c427039d..030c2ff37c 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <cstdlib>
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op.h"
@@ -28,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/string_util.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/bcast.h"
 
@@ -73,7 +73,7 @@ class SubstrOp : public OpKernel {
         const T len =
             tensorflow::internal::SubtleMustCopy(len_tensor.scalar<T>()());
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
-          StringPiece in(input(i));
+          absl::string_view in(input(i));
           T byte_pos = pos;
           T byte_len = len;
           switch (unit_) {
@@ -90,7 +90,7 @@ class SubstrOp : public OpKernel {
                   errors::InvalidArgument("pos ", pos, " out of range for ",
                                           "string b'", in, "' at index ", i));
           }
-          StringPiece sub_in = in.substr(byte_pos, byte_len);
+          absl::string_view sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       } else {
@@ -98,7 +98,7 @@ class SubstrOp : public OpKernel {
         auto pos_flat = pos_tensor.flat<T>();
         auto len_flat = len_tensor.flat<T>();
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
-          StringPiece in(input(i));
+          absl::string_view in(input(i));
           const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i));
           const T len = tensorflow::internal::SubtleMustCopy(len_flat(i));
           T byte_pos = pos;
@@ -117,7 +117,7 @@ class SubstrOp : public OpKernel {
                   errors::InvalidArgument("pos ", pos, " out of range for ",
                                           "string b'", in, "' at index ", i));
           }
-          StringPiece sub_in = in.substr(byte_pos, byte_len);
+          absl::string_view sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       }
@@ -177,7 +177,7 @@ class SubstrOp : public OpKernel {
 
           // Iterate through broadcasted tensors and perform substr
           for (int i = 0; i < output_shape.dim_size(0); ++i) {
-            StringPiece in(input_bcast(i));
+            absl::string_view in(input_bcast(i));
             const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i));
             const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i));
             T byte_pos = pos;
@@ -197,7 +197,7 @@ class SubstrOp : public OpKernel {
                     errors::InvalidArgument("pos ", pos, " out of range for ",
                                             "string b'", in, "' at index ", i));
             }
-            StringPiece sub_in = in.substr(byte_pos, byte_len);
+            absl::string_view sub_in = in.substr(byte_pos, byte_len);
             output(i).assign(sub_in.data(), sub_in.size());
           }
           break;
@@ -241,7 +241,7 @@ class SubstrOp : public OpKernel {
           // Iterate through broadcasted tensors and perform substr
           for (int i = 0; i < output_shape.dim_size(0); ++i) {
             for (int j = 0; j < output_shape.dim_size(1); ++j) {
-              StringPiece in(input_bcast(i, j));
+              absl::string_view in(input_bcast(i, j));
               const T pos =
                   tensorflow::internal::SubtleMustCopy(pos_bcast(i, j));
               const T len =
@@ -263,7 +263,7 @@ class SubstrOp : public OpKernel {
                                               "string b'", in, "' at index (",
                                               i, ", ", j, ")"));
               }
-              StringPiece sub_in = in.substr(byte_pos, byte_len);
+              absl::string_view sub_in = in.substr(byte_pos, byte_len);
               output(i, j).assign(sub_in.data(), sub_in.size());
             }
           }
@@ -280,7 +280,8 @@ class SubstrOp : public OpKernel {
  private:
   // This adjusts the requested position. Note it does not perform any bound
   // checks.
-  static inline T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
+  static inline T AdjustedPosIndex(const T pos_requested,
+                                   const absl::string_view s) {
     if (pos_requested < 0) {
       return s.size() + pos_requested;
     }
@@ -289,7 +290,7 @@ class SubstrOp : public OpKernel {
 
   // Return true if successful; otherwise, return false if the `pos` argument
   // is out of range in the string.
-  static inline bool UpdatePosAndLenForUtf8(const StringPiece in, T* pos,
+  static inline bool UpdatePosAndLenForUtf8(const absl::string_view in, T* pos,
                                             T* len) {
     if (*pos >= 0) {
       return UpdatePositivePosAndLenForUtf8(in, *pos, *len, pos, len);
@@ -298,9 +299,9 @@ class SubstrOp : public OpKernel {
     }
   }
 
-  static bool UpdatePositivePosAndLenForUtf8(const StringPiece in, const T pos,
-                                             const T len, T* char_pos,
-                                             T* char_len) {
+  static bool UpdatePositivePosAndLenForUtf8(const absl::string_view in,
+                                             const T pos, const T len,
+                                             T* char_pos, T* char_len) {
     *char_pos = 0;
     // Determine byte position of the substring start.
     if (!ForwardNUTF8CharPositions(in, pos, char_pos)) {
@@ -319,9 +320,9 @@ class SubstrOp : public OpKernel {
   // This function expects a negative position relative to the end of the
   // string, but will update the character position to a positive number
   // relative to the beginning of the string.
-  static bool UpdateNegativePosAndLenForUtf8(const StringPiece in, const T pos,
-                                             const T len, T* char_pos,
-                                             T* char_len) {
+  static bool UpdateNegativePosAndLenForUtf8(const absl::string_view in,
+                                             const T pos, const T len,
+                                             T* char_pos, T* char_len) {
     // Initially treat the length as position of the end of the substring.
     *char_len = in.size();
     // This is the number of character to skip from the end of the string to
diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
index a97a71b344..3bb8e50e76 100644
--- a/tensorflow/core/kernels/tensor_array_ops.cc
+++ b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 // See docs in ../ops/data_flow_ops.cc.
 
+#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include <limits>
@@ -292,13 +293,13 @@ class TensorArrayGradOp : public TensorArrayCreationOp {
     } else {
       container = "_tensor_arrays";
       const auto& resource = ctx->input(0).flat<ResourceHandle>()(0);
-      if (StringPiece(resource.name()).substr(0, container.size()) !=
+      if (absl::string_view(resource.name()).substr(0, container.size()) !=
           container) {
         return errors::InvalidArgument("Wrong input container. ",
                                        resource.name());
       }
       tensor_array_name =
-          string(StringPiece(resource.name()).substr(container.size()));
+          string(absl::string_view(resource.name()).substr(container.size()));
     }
 
     auto output_handle = tensor_array_output_handle->flat<string>();
diff --git a/tensorflow/core/kernels/word2vec_kernels.cc b/tensorflow/core/kernels/word2vec_kernels.cc
index 3477445197..10a3a36842 100644
--- a/tensorflow/core/kernels/word2vec_kernels.cc
+++ b/tensorflow/core/kernels/word2vec_kernels.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/distribution_sampler.h"
 #include "tensorflow/core/lib/random/philox_random.h"
@@ -33,9 +33,9 @@ const int kSentenceSize = 1000;
 
 namespace {
 
-bool ScanWord(StringPiece* input, string* word) {
+bool ScanWord(absl::string_view* input, string* word) {
   str_util::RemoveLeadingWhitespace(input);
-  StringPiece tmp;
+  absl::string_view tmp;
   if (str_util::ConsumeNonWhitespace(input, &tmp)) {
     word->assign(tmp.data(), tmp.size());
     return true;
@@ -179,7 +179,7 @@ class SkipgramOp : public OpKernel {
   Status Init(Env* env, const string& filename) {
     string data;
     TF_RETURN_IF_ERROR(ReadFileToString(env, filename, &data));
-    StringPiece input = data;
+    absl::string_view input = data;
     string w;
     corpus_size_ = 0;
     std::unordered_map<string, int32> word_freq;
diff --git a/tensorflow/core/lib/core/coding.cc b/tensorflow/core/lib/core/coding.cc
index 50872eef83..60828d423a 100644
--- a/tensorflow/core/lib/core/coding.cc
+++ b/tensorflow/core/lib/core/coding.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/coding.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/byte_order.h"
 
 namespace tensorflow {
@@ -151,14 +152,14 @@ const char* GetVarint32PtrFallback(const char* p, const char* limit,
   return nullptr;
 }
 
-bool GetVarint32(StringPiece* input, uint32* value) {
+bool GetVarint32(absl::string_view* input, uint32* value) {
   const char* p = input->data();
   const char* limit = p + input->size();
   const char* q = GetVarint32Ptr(p, limit, value);
   if (q == nullptr) {
     return false;
   } else {
-    *input = StringPiece(q, limit - q);
+    *input = absl::string_view(q, limit - q);
     return true;
   }
 }
@@ -180,14 +181,14 @@ const char* GetVarint64Ptr(const char* p, const char* limit, uint64* value) {
   return nullptr;
 }
 
-bool GetVarint64(StringPiece* input, uint64* value) {
+bool GetVarint64(absl::string_view* input, uint64* value) {
   const char* p = input->data();
   const char* limit = p + input->size();
   const char* q = GetVarint64Ptr(p, limit, value);
   if (q == nullptr) {
     return false;
   } else {
-    *input = StringPiece(q, limit - q);
+    *input = absl::string_view(q, limit - q);
     return true;
   }
 }
diff --git a/tensorflow/core/lib/core/coding.h b/tensorflow/core/lib/core/coding.h
index 4a70ffa619..9add2b1afc 100644
--- a/tensorflow/core/lib/core/coding.h
+++ b/tensorflow/core/lib/core/coding.h
@@ -21,8 +21,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_CORE_CODING_H_
 #define TENSORFLOW_CORE_LIB_CORE_CODING_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/raw_coding.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -46,8 +46,8 @@ extern void PutFixed64(string* dst, uint64 value);
 extern void PutVarint32(string* dst, uint32 value);
 extern void PutVarint64(string* dst, uint64 value);
 
-extern bool GetVarint32(StringPiece* input, uint32* value);
-extern bool GetVarint64(StringPiece* input, uint64* value);
+extern bool GetVarint32(absl::string_view* input, uint32* value);
+extern bool GetVarint64(absl::string_view* input, uint64* value);
 
 extern const char* GetVarint32Ptr(const char* p, const char* limit, uint32* v);
 extern const char* GetVarint64Ptr(const char* p, const char* limit, uint64* v);
diff --git a/tensorflow/core/lib/core/status.cc b/tensorflow/core/lib/core/status.cc
index cb2a06e620..f21600328b 100644
--- a/tensorflow/core/lib/core/status.cc
+++ b/tensorflow/core/lib/core/status.cc
@@ -15,10 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include <stdio.h>
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
-Status::Status(tensorflow::error::Code code, StringPiece msg) {
+Status::Status(tensorflow::error::Code code, absl::string_view msg) {
   assert(code != tensorflow::error::OK);
   state_ = std::unique_ptr<State>(new State);
   state_->code = code;
diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index eb0ff555a5..4227a8e11b 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -20,8 +20,8 @@ limitations under the License.
 #include <iosfwd>
 #include <memory>
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -42,7 +42,7 @@ class Status {
 
   /// \brief Create a status with the specified error code and msg as a
   /// human-readable string containing more detailed information.
-  Status(tensorflow::error::Code code, tensorflow::StringPiece msg);
+  Status(tensorflow::error::Code code, absl::string_view msg);
 
   /// Copy the specified status.
   Status(const Status& s);
diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc
index e4b489fe17..420ae67708 100644
--- a/tensorflow/core/lib/core/stringpiece_test.cc
+++ b/tensorflow/core/lib/core/stringpiece_test.cc
@@ -13,9 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/lib/core/stringpiece.h"
-
 #include <unordered_map>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -24,24 +23,24 @@ TEST(StringPiece, Ctor) {
   {
     // const char* without size.
     const char* hello = "hello";
-    StringPiece s20(hello);
+    absl::string_view s20(hello);
     EXPECT_TRUE(s20.data() == hello);
     EXPECT_EQ(5, s20.size());
 
     // const char* with size.
-    StringPiece s21(hello, 4);
+    absl::string_view s21(hello, 4);
     EXPECT_TRUE(s21.data() == hello);
     EXPECT_EQ(4, s21.size());
 
     // Not recommended, but valid C++
-    StringPiece s22(hello, 6);
+    absl::string_view s22(hello, 6);
     EXPECT_TRUE(s22.data() == hello);
     EXPECT_EQ(6, s22.size());
   }
 
   {
     string hola = "hola";
-    StringPiece s30(hola);
+    absl::string_view s30(hola);
     EXPECT_TRUE(s30.data() == hola.data());
     EXPECT_EQ(4, s30.size());
 
@@ -49,15 +48,15 @@ TEST(StringPiece, Ctor) {
     hola.push_back('\0');
     hola.append("h2");
     hola.push_back('\0');
-    StringPiece s31(hola);
+    absl::string_view s31(hola);
     EXPECT_TRUE(s31.data() == hola.data());
     EXPECT_EQ(8, s31.size());
   }
 }
 
 TEST(StringPiece, ConversionToString) {
-  EXPECT_EQ("", string(StringPiece("")));
-  EXPECT_EQ("foo", string(StringPiece("foo")));
+  EXPECT_EQ("", string(absl::string_view("")));
+  EXPECT_EQ("foo", string(absl::string_view("foo")));
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/db/BUILD b/tensorflow/core/lib/db/BUILD
index 7a64306c6e..efba522005 100644
--- a/tensorflow/core/lib/db/BUILD
+++ b/tensorflow/core/lib/db/BUILD
@@ -16,6 +16,7 @@ cc_library(
         ":snapfn",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
         "@org_sqlite",
     ],
 )
@@ -41,5 +42,6 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/lib/db/sqlite.cc b/tensorflow/core/lib/db/sqlite.cc
index cf11f3a331..83c419988f 100644
--- a/tensorflow/core/lib/db/sqlite.cc
+++ b/tensorflow/core/lib/db/sqlite.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/lib/db/sqlite.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 
@@ -81,7 +82,8 @@ sqlite3_stmt* PrepareRawOrDie(sqlite3* db, const char* sql) {
   return stmt;
 }
 
-Status SetPragma(Sqlite* db, const char* pragma, const StringPiece& value) {
+Status SetPragma(Sqlite* db, const char* pragma,
+                 const absl::string_view& value) {
   if (value.empty()) return Status::OK();
   for (auto p = value.begin(); p < value.end(); ++p) {
     if (!(('0' <= *p && *p <= '9') || ('A' <= *p && *p <= 'Z') ||
@@ -96,9 +98,9 @@ Status SetPragma(Sqlite* db, const char* pragma, const StringPiece& value) {
   return stmt.Step(&unused_done);
 }
 
-const StringPiece GetEnv(const char* var) {
+const absl::string_view GetEnv(const char* var) {
   const char* val = std::getenv(var);
-  return (val == nullptr) ? StringPiece() : StringPiece(val);
+  return (val == nullptr) ? absl::string_view() : absl::string_view(val);
 }
 
 Status EnvPragma(Sqlite* db, const char* pragma, const char* var) {
@@ -160,7 +162,7 @@ Sqlite::~Sqlite() {
   CHECK_EQ(SQLITE_OK, sqlite3_close(db_));
 }
 
-Status Sqlite::Prepare(const StringPiece& sql, SqliteStatement* stmt) {
+Status Sqlite::Prepare(const absl::string_view& sql, SqliteStatement* stmt) {
   SqliteLock lock(*this);
   sqlite3_stmt* ps = nullptr;
   int rc = sqlite3_prepare_v2(db_, sql.data(), static_cast<int>(sql.size()),
diff --git a/tensorflow/core/lib/db/sqlite.h b/tensorflow/core/lib/db/sqlite.h
index efe97f78d2..d54a5f7ed2 100644
--- a/tensorflow/core/lib/db/sqlite.h
+++ b/tensorflow/core/lib/db/sqlite.h
@@ -17,10 +17,10 @@ limitations under the License.
 
 #include <mutex>
 
+#include "absl/strings/string_view.h"
 #include "sqlite3.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
@@ -87,8 +87,8 @@ class LOCKABLE Sqlite : public core::RefCounted {
   /// routine will retry automatically and then possibly fail.
   ///
   /// The returned statement holds a reference to this object.
-  Status Prepare(const StringPiece& sql, SqliteStatement* stmt);
-  SqliteStatement PrepareOrDie(const StringPiece& sql);
+  Status Prepare(const absl::string_view& sql, SqliteStatement* stmt);
+  SqliteStatement PrepareOrDie(const absl::string_view& sql);
 
   /// \brief Returns extended result code of last error.
   ///
@@ -228,22 +228,22 @@ class SqliteStatement {
   ///
   /// When using the unsafe methods, the data must not be changed or
   /// freed until this statement is Reset() or finalized.
-  void BindText(int parameter, const StringPiece& text) {
+  void BindText(int parameter, const absl::string_view& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
                                SQLITE_TRANSIENT, SQLITE_UTF8),
            parameter);
     size_ += text.size();
   }
-  void BindText(const char* parameter, const StringPiece& text) {
+  void BindText(const char* parameter, const absl::string_view& text) {
     BindText(GetParameterIndex(parameter), text);
   }
-  void BindTextUnsafe(int parameter, const StringPiece& text) {
+  void BindTextUnsafe(int parameter, const absl::string_view& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
                                SQLITE_STATIC, SQLITE_UTF8),
            parameter);
     size_ += text.size();
   }
-  void BindTextUnsafe(const char* parameter, const StringPiece& text) {
+  void BindTextUnsafe(const char* parameter, const absl::string_view& text) {
     BindTextUnsafe(GetParameterIndex(parameter), text);
   }
 
@@ -251,22 +251,22 @@ class SqliteStatement {
   ///
   /// When using the unsafe methods, the data must not be changed or
   /// freed until this statement is Reset() or finalized.
-  void BindBlob(int parameter, const StringPiece& blob) {
+  void BindBlob(int parameter, const absl::string_view& blob) {
     Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
                                SQLITE_TRANSIENT),
            parameter);
     size_ += blob.size();
   }
-  void BindBlob(const char* parameter, const StringPiece& blob) {
+  void BindBlob(const char* parameter, const absl::string_view& blob) {
     BindBlob(GetParameterIndex(parameter), blob);
   }
-  void BindBlobUnsafe(int parameter, const StringPiece& blob) {
+  void BindBlobUnsafe(int parameter, const absl::string_view& blob) {
     Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
                                SQLITE_STATIC),
            parameter);
     size_ += blob.size();
   }
-  void BindBlobUnsafe(const char* parameter, const StringPiece& text) {
+  void BindBlobUnsafe(const char* parameter, const absl::string_view& text) {
     BindBlobUnsafe(GetParameterIndex(parameter), text);
   }
 
@@ -309,7 +309,7 @@ class SqliteStatement {
   /// Empty values are returned as NULL. The returned memory will no
   /// longer be valid the next time Step() or Reset() is called. No NUL
   /// terminator is added.
-  StringPiece ColumnStringUnsafe(int column) const TF_MUST_USE_RESULT {
+  absl::string_view ColumnStringUnsafe(int column) const TF_MUST_USE_RESULT {
     return {static_cast<const char*>(sqlite3_column_blob(stmt_, column)),
             static_cast<size_t>(ColumnSize(column))};
   }
@@ -438,7 +438,7 @@ class SCOPED_LOCKABLE SqliteTransaction {
   EXCLUSIVE_LOCKS_REQUIRED(__VA_ARGS__)
 #define SQLITE_TRANSACTIONS_EXCLUDED(...) LOCKS_EXCLUDED(__VA_ARGS__)
 
-inline SqliteStatement Sqlite::PrepareOrDie(const StringPiece& sql) {
+inline SqliteStatement Sqlite::PrepareOrDie(const absl::string_view& sql) {
   SqliteStatement stmt;
   TF_CHECK_OK(Prepare(sql, &stmt));
   return stmt;
diff --git a/tensorflow/core/lib/db/sqlite_test.cc b/tensorflow/core/lib/db/sqlite_test.cc
index 1590055960..2826fe73c8 100644
--- a/tensorflow/core/lib/db/sqlite_test.cc
+++ b/tensorflow/core/lib/db/sqlite_test.cc
@@ -17,8 +17,8 @@ limitations under the License.
 #include <array>
 #include <climits>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
@@ -169,7 +169,7 @@ TEST_F(SqliteTest, UnsafeColumn) {
   TF_ASSERT_OK(stmt.StepAndReset());
   stmt = db_->PrepareOrDie("SELECT b FROM T ORDER BY a");
   TF_ASSERT_OK(stmt.Step(&is_done_));
-  StringPiece p = stmt.ColumnStringUnsafe(0);
+  absl::string_view p = stmt.ColumnStringUnsafe(0);
   EXPECT_EQ('h', *p.data());
   TF_ASSERT_OK(stmt.Step(&is_done_));
   // This will actually happen, but it's not safe to test this behavior.
diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h
index 675bab7191..8560f517dd 100644
--- a/tensorflow/core/lib/hash/hash.h
+++ b/tensorflow/core/lib/hash/hash.h
@@ -24,7 +24,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -94,12 +94,12 @@ struct hash<string> {
 };
 
 template <>
-struct hash<StringPiece> {
-  size_t operator()(StringPiece sp) const {
+struct hash<absl::string_view> {
+  size_t operator()(absl::string_view sp) const {
     return static_cast<size_t>(Hash64(sp.data(), sp.size()));
   }
 };
-using StringPieceHasher = ::tensorflow::hash<StringPiece>;
+using StringPieceHasher = ::tensorflow::hash<absl::string_view>;
 
 template <typename T, typename U>
 struct hash<std::pair<T, U>> {
diff --git a/tensorflow/core/lib/hash/hash_test.cc b/tensorflow/core/lib/hash/hash_test.cc
index 7d58313132..3223ef81fe 100644
--- a/tensorflow/core/lib/hash/hash_test.cc
+++ b/tensorflow/core/lib/hash/hash_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
@@ -86,10 +87,10 @@ BENCHMARK(BM_Hash32)->Range(1, 1024);
 TEST(StringPieceHasher, Equality) {
   StringPieceHasher hasher;
 
-  StringPiece s1("foo");
-  StringPiece s2("bar");
-  StringPiece s3("baz");
-  StringPiece s4("zot");
+  absl::string_view s1("foo");
+  absl::string_view s2("bar");
+  absl::string_view s3("baz");
+  absl::string_view s4("zot");
 
   EXPECT_TRUE(hasher(s1) != hasher(s2));
   EXPECT_TRUE(hasher(s1) != hasher(s3));
@@ -109,11 +110,11 @@ TEST(StringPieceHasher, HashMap) {
   string s2("bar");
   string s3("baz");
 
-  StringPiece p1(s1);
-  StringPiece p2(s2);
-  StringPiece p3(s3);
+  absl::string_view p1(s1);
+  absl::string_view p2(s2);
+  absl::string_view p3(s3);
 
-  std::unordered_map<StringPiece, int, StringPieceHasher> map;
+  std::unordered_map<absl::string_view, int, StringPieceHasher> map;
 
   map.insert(std::make_pair(p1, 0));
   map.insert(std::make_pair(p2, 1));
diff --git a/tensorflow/core/lib/io/block.cc b/tensorflow/core/lib/io/block.cc
index 4c30486cc4..3e4ce3c08b 100644
--- a/tensorflow/core/lib/io/block.cc
+++ b/tensorflow/core/lib/io/block.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/block.h"
 
 #include <algorithm>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/format.h"
@@ -95,10 +96,11 @@ class Block::Iter : public Iterator {
   uint32 current_;
   uint32 restart_index_;  // Index of restart block in which current_ falls
   string key_;
-  StringPiece value_;
+  absl::string_view value_;
   Status status_;
 
-  inline int Compare(const StringPiece& a, const StringPiece& b) const {
+  inline int Compare(const absl::string_view& a,
+                     const absl::string_view& b) const {
     return a.compare(b);
   }
 
@@ -119,7 +121,7 @@ class Block::Iter : public Iterator {
 
     // ParseNextKey() starts at the end of value_, so set value_ accordingly
     uint32 offset = GetRestartPoint(index);
-    value_ = StringPiece(data_ + offset, 0);
+    value_ = absl::string_view(data_ + offset, 0);
   }
 
  public:
@@ -134,11 +136,11 @@ class Block::Iter : public Iterator {
 
   bool Valid() const override { return current_ < restarts_; }
   Status status() const override { return status_; }
-  StringPiece key() const override {
+  absl::string_view key() const override {
     assert(Valid());
     return key_;
   }
-  StringPiece value() const override {
+  absl::string_view value() const override {
     assert(Valid());
     return value_;
   }
@@ -148,7 +150,7 @@ class Block::Iter : public Iterator {
     ParseNextKey();
   }
 
-  void Seek(const StringPiece& target) override {
+  void Seek(const absl::string_view& target) override {
     // Binary search in restart array to find the last restart point
     // with a key < target
     uint32 left = 0;
@@ -164,7 +166,7 @@ class Block::Iter : public Iterator {
         CorruptionError();
         return;
       }
-      StringPiece mid_key(key_ptr, non_shared);
+      absl::string_view mid_key(key_ptr, non_shared);
       if (Compare(mid_key, target) < 0) {
         // Key at "mid" is smaller than "target".  Therefore all
         // blocks before "mid" are uninteresting.
@@ -199,7 +201,7 @@ class Block::Iter : public Iterator {
     restart_index_ = num_restarts_;
     status_ = errors::DataLoss("bad entry in block");
     key_.clear();
-    value_ = StringPiece();
+    value_ = absl::string_view();
   }
 
   bool ParseNextKey() {
@@ -222,7 +224,7 @@ class Block::Iter : public Iterator {
     } else {
       key_.resize(shared);
       key_.append(p, non_shared);
-      value_ = StringPiece(p + non_shared, value_length);
+      value_ = absl::string_view(p + non_shared, value_length);
       while (restart_index_ + 1 < num_restarts_ &&
              GetRestartPoint(restart_index_ + 1) < current_) {
         ++restart_index_;
diff --git a/tensorflow/core/lib/io/block_builder.cc b/tensorflow/core/lib/io/block_builder.cc
index b2921c076c..44e564bf58 100644
--- a/tensorflow/core/lib/io/block_builder.cc
+++ b/tensorflow/core/lib/io/block_builder.cc
@@ -41,6 +41,7 @@ limitations under the License.
 
 #include <assert.h>
 #include <algorithm>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/io/table_builder.h"
 
@@ -68,7 +69,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
           sizeof(uint32));                     // Restart array length
 }
 
-StringPiece BlockBuilder::Finish() {
+absl::string_view BlockBuilder::Finish() {
   // Append restart array
   CHECK_LE(restarts_.size(), std::numeric_limits<uint32_t>::max());
   for (const auto r : restarts_) {
@@ -77,11 +78,12 @@ StringPiece BlockBuilder::Finish() {
   // Downcast safe because of the CHECK.
   core::PutFixed32(&buffer_, static_cast<uint32_t>(restarts_.size()));
   finished_ = true;
-  return StringPiece(buffer_);
+  return absl::string_view(buffer_);
 }
 
-void BlockBuilder::Add(const StringPiece& key, const StringPiece& value) {
-  StringPiece last_key_piece(last_key_);
+void BlockBuilder::Add(const absl::string_view& key,
+                       const absl::string_view& value) {
+  absl::string_view last_key_piece(last_key_);
   assert(!finished_);
   assert(counter_ <= options_->block_restart_interval);
   assert(buffer_.empty()  // No values yet?
@@ -117,7 +119,7 @@ void BlockBuilder::Add(const StringPiece& key, const StringPiece& value) {
   // Update state
   last_key_.resize(shared);
   last_key_.append(key.data() + shared, non_shared);
-  assert(StringPiece(last_key_) == key);
+  assert(absl::string_view(last_key_) == key);
   counter_++;
 }
 
diff --git a/tensorflow/core/lib/io/block_builder.h b/tensorflow/core/lib/io/block_builder.h
index 117b6a0bb8..1237f7fb7f 100644
--- a/tensorflow/core/lib/io/block_builder.h
+++ b/tensorflow/core/lib/io/block_builder.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include <stdint.h>
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -36,12 +36,12 @@ class BlockBuilder {
 
   // REQUIRES: Finish() has not been called since the last call to Reset().
   // REQUIRES: key is larger than any previously added key
-  void Add(const StringPiece& key, const StringPiece& value);
+  void Add(const absl::string_view& key, const absl::string_view& value);
 
   // Finish building the block and return a slice that refers to the
   // block contents.  The returned slice will remain valid for the
   // lifetime of this builder or until Reset() is called.
-  StringPiece Finish();
+  absl::string_view Finish();
 
   // Returns an estimate of the current (uncompressed) size of the block
   // we are building.
diff --git a/tensorflow/core/lib/io/format.cc b/tensorflow/core/lib/io/format.cc
index 0c24c660a2..81c9d59d4a 100644
--- a/tensorflow/core/lib/io/format.cc
+++ b/tensorflow/core/lib/io/format.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <limits>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/format.h"
 
 #include "tensorflow/core/lib/core/coding.h"
@@ -35,7 +36,7 @@ void BlockHandle::EncodeTo(string* dst) const {
   core::PutVarint64(dst, size_);
 }
 
-Status BlockHandle::DecodeFrom(StringPiece* input) {
+Status BlockHandle::DecodeFrom(absl::string_view* input) {
   if (core::GetVarint64(input, &offset_) && core::GetVarint64(input, &size_)) {
     return Status::OK();
   } else {
@@ -55,7 +56,7 @@ void Footer::EncodeTo(string* dst) const {
   assert(dst->size() == original_size + kEncodedLength);
 }
 
-Status Footer::DecodeFrom(StringPiece* input) {
+Status Footer::DecodeFrom(absl::string_view* input) {
   const char* magic_ptr = input->data() + kEncodedLength - 8;
   const uint32 magic_lo = core::DecodeFixed32(magic_ptr);
   const uint32 magic_hi = core::DecodeFixed32(magic_ptr + 4);
@@ -72,14 +73,14 @@ Status Footer::DecodeFrom(StringPiece* input) {
   if (result.ok()) {
     // We skip over any leftover data (just padding for now) in "input"
     const char* end = magic_ptr + 8;
-    *input = StringPiece(end, input->data() + input->size() - end);
+    *input = absl::string_view(end, input->data() + input->size() - end);
   }
   return result;
 }
 
 Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
                  BlockContents* result) {
-  result->data = StringPiece();
+  result->data = absl::string_view();
   result->cachable = false;
   result->heap_allocated = false;
 
@@ -92,7 +93,7 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
   }
 
   char* buf = new char[n + kBlockTrailerSize];
-  StringPiece contents;
+  absl::string_view contents;
   Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
   if (!s.ok()) {
     delete[] buf;
@@ -124,11 +125,11 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
         // Use it directly under the assumption that it will be live
         // while the file is open.
         delete[] buf;
-        result->data = StringPiece(data, n);
+        result->data = absl::string_view(data, n);
         result->heap_allocated = false;
         result->cachable = false;  // Do not double-cache
       } else {
-        result->data = StringPiece(buf, n);
+        result->data = absl::string_view(buf, n);
         result->heap_allocated = true;
         result->cachable = true;
       }
@@ -148,7 +149,7 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
         return errors::DataLoss("corrupted compressed block contents");
       }
       delete[] buf;
-      result->data = StringPiece(ubuf, ulength);
+      result->data = absl::string_view(ubuf, ulength);
       result->heap_allocated = true;
       result->cachable = true;
       break;
diff --git a/tensorflow/core/lib/io/format.h b/tensorflow/core/lib/io/format.h
index fd5c3470f6..96a92f1209 100644
--- a/tensorflow/core/lib/io/format.h
+++ b/tensorflow/core/lib/io/format.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <stdint.h>
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/table_builder.h"
 
 namespace tensorflow {
@@ -43,7 +43,7 @@ class BlockHandle {
   void set_size(uint64 size) { size_ = size; }
 
   void EncodeTo(string* dst) const;
-  Status DecodeFrom(StringPiece* input);
+  Status DecodeFrom(absl::string_view* input);
 
   // Maximum encoding length of a BlockHandle
   enum { kMaxEncodedLength = 10 + 10 };
@@ -68,7 +68,7 @@ class Footer {
   void set_index_handle(const BlockHandle& h) { index_handle_ = h; }
 
   void EncodeTo(string* dst) const;
-  Status DecodeFrom(StringPiece* input);
+  Status DecodeFrom(absl::string_view* input);
 
   // Encoded length of a Footer.  Note that the serialization of a
   // Footer will always occupy exactly this many bytes.  It consists
@@ -89,7 +89,7 @@ static const uint64 kTableMagicNumber = 0xdb4775248b80fb57ull;
 static const size_t kBlockTrailerSize = 5;
 
 struct BlockContents {
-  StringPiece data;     // Actual contents of data
+  absl::string_view data;  // Actual contents of data
   bool cachable;        // True iff data can be cached
   bool heap_allocated;  // True iff caller should delete[] data.data()
 };
diff --git a/tensorflow/core/lib/io/inputbuffer.cc b/tensorflow/core/lib/io/inputbuffer.cc
index 4d35af49b2..a12cf97a9c 100644
--- a/tensorflow/core/lib/io/inputbuffer.cc
+++ b/tensorflow/core/lib/io/inputbuffer.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/inputbuffer.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -31,7 +32,7 @@ InputBuffer::InputBuffer(RandomAccessFile* file, size_t buffer_bytes)
 InputBuffer::~InputBuffer() { delete[] buf_; }
 
 Status InputBuffer::FillBuffer() {
-  StringPiece data;
+  absl::string_view data;
   Status s = file_->Read(file_pos_, size_, &data, buf_);
   if (data.data() != buf_) {
     memmove(buf_, data.data(), data.size());
diff --git a/tensorflow/core/lib/io/inputbuffer_test.cc b/tensorflow/core/lib/io/inputbuffer_test.cc
index 3608008b30..2404d5e678 100644
--- a/tensorflow/core/lib/io/inputbuffer_test.cc
+++ b/tensorflow/core/lib/io/inputbuffer_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/inputbuffer.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 
 #include "tensorflow/core/lib/core/coding.h"
@@ -192,31 +193,31 @@ TEST(InputBuffer, ReadNBytes) {
 
     EXPECT_EQ(0, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(3, read, &bytes_read));
-    EXPECT_EQ(StringPiece(read, 3), "012");
+    EXPECT_EQ(absl::string_view(read, 3), "012");
 
     EXPECT_EQ(3, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(StringPiece(read, 3), "012");
+    EXPECT_EQ(absl::string_view(read, 3), "012");
 
     EXPECT_EQ(3, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(4, read, &bytes_read));
-    EXPECT_EQ(StringPiece(read, 4), "3456");
+    EXPECT_EQ(absl::string_view(read, 4), "3456");
 
     EXPECT_EQ(7, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(StringPiece(read, 4), "3456");
+    EXPECT_EQ(absl::string_view(read, 4), "3456");
 
     EXPECT_EQ(7, in.Tell());
     EXPECT_TRUE(errors::IsOutOfRange(in.ReadNBytes(5, read, &bytes_read)));
-    EXPECT_EQ(StringPiece(read, 3), "789");
+    EXPECT_EQ(absl::string_view(read, 3), "789");
 
     EXPECT_EQ(10, in.Tell());
     EXPECT_TRUE(errors::IsOutOfRange(in.ReadNBytes(5, read, &bytes_read)));
-    EXPECT_EQ(StringPiece(read, 3), "789");
+    EXPECT_EQ(absl::string_view(read, 3), "789");
 
     EXPECT_EQ(10, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(StringPiece(read, 3), "789");
+    EXPECT_EQ(absl::string_view(read, 3), "789");
     EXPECT_EQ(10, in.Tell());
   }
 }
@@ -312,7 +313,7 @@ TEST(InputBuffer, ReadVarint32) {
     for (uint32 number : data) {
       varint.clear();
       core::PutVarint32(&varint, number);
-      TF_CHECK_OK(file->Append(StringPiece(varint)));
+      TF_CHECK_OK(file->Append(absl::string_view(varint)));
     }
   }
 
@@ -351,7 +352,7 @@ TEST(InputBuffer, ReadVarint64) {
     for (uint64 number : data) {
       varint.clear();
       core::PutVarint64(&varint, number);
-      TF_CHECK_OK(file->Append(StringPiece(varint)));
+      TF_CHECK_OK(file->Append(absl::string_view(varint)));
     }
   }
 
diff --git a/tensorflow/core/lib/io/iterator.cc b/tensorflow/core/lib/io/iterator.cc
index 50eaa50b5c..5ba6c2d884 100644
--- a/tensorflow/core/lib/io/iterator.cc
+++ b/tensorflow/core/lib/io/iterator.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/iterator.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace table {
@@ -55,16 +56,16 @@ class EmptyIterator : public Iterator {
  public:
   explicit EmptyIterator(const Status& s) : status_(s) {}
   bool Valid() const override { return false; }
-  void Seek(const StringPiece& target) override {}
+  void Seek(const absl::string_view& target) override {}
   void SeekToFirst() override {}
   void Next() override { assert(false); }
-  StringPiece key() const override {
+  absl::string_view key() const override {
     assert(false);
-    return StringPiece();
+    return absl::string_view();
   }
-  StringPiece value() const override {
+  absl::string_view value() const override {
     assert(false);
-    return StringPiece();
+    return absl::string_view();
   }
   Status status() const override { return status_; }
 
diff --git a/tensorflow/core/lib/io/iterator.h b/tensorflow/core/lib/io/iterator.h
index 2b9326d7dd..8532b0f4e6 100644
--- a/tensorflow/core/lib/io/iterator.h
+++ b/tensorflow/core/lib/io/iterator.h
@@ -26,8 +26,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_IO_ITERATOR_H_
 #define TENSORFLOW_LIB_IO_ITERATOR_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace table {
@@ -48,7 +48,7 @@ class Iterator {
   // Position at the first key in the source that is at or past target.
   // The iterator is Valid() after this call iff the source contains
   // an entry that comes at or past target.
-  virtual void Seek(const StringPiece& target) = 0;
+  virtual void Seek(const absl::string_view& target) = 0;
 
   // Moves to the next entry in the source.  After this call, Valid() is
   // true iff the iterator was not positioned at the last entry in the source.
@@ -59,13 +59,13 @@ class Iterator {
   // the returned slice is valid only until the next modification of
   // the iterator.
   // REQUIRES: Valid()
-  virtual StringPiece key() const = 0;
+  virtual absl::string_view key() const = 0;
 
   // Return the value for the current entry.  The underlying storage for
   // the returned slice is valid only until the next modification of
   // the iterator.
   // REQUIRES: Valid()
-  virtual StringPiece value() const = 0;
+  virtual absl::string_view value() const = 0;
 
   // If an error has occurred, return it.  Else return an ok status.
   virtual Status status() const = 0;
diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc
index b75dcecadf..f64a7e1d86 100644
--- a/tensorflow/core/lib/io/path.cc
+++ b/tensorflow/core/lib/io/path.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include "absl/strings/string_view.h"
 #if !defined(PLATFORM_WINDOWS)
 #include <unistd.h>
 #endif
@@ -35,10 +36,10 @@ namespace tensorflow {
 namespace io {
 namespace internal {
 
-string JoinPathImpl(std::initializer_list<StringPiece> paths) {
+string JoinPathImpl(std::initializer_list<absl::string_view> paths) {
   string result;
 
-  for (StringPiece path : paths) {
+  for (absl::string_view path : paths) {
     if (path.empty()) continue;
 
     if (result.empty()) {
@@ -68,62 +69,65 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) {
 // no "/" in the path, the first part of the output is the scheme and host, and
 // the second is the path. If the only "/" in the path is the first character,
 // it is included in the first part of the output.
-std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
-  StringPiece scheme, host, path;
+std::pair<absl::string_view, absl::string_view> SplitPath(
+    absl::string_view uri) {
+  absl::string_view scheme, host, path;
   ParseURI(uri, &scheme, &host, &path);
 
   auto pos = path.rfind('/');
 #ifdef PLATFORM_WINDOWS
-  if (pos == StringPiece::npos) pos = path.rfind('\\');
+  if (pos == absl::string_view::npos) pos = path.rfind('\\');
 #endif
   // Handle the case with no '/' in 'path'.
-  if (pos == StringPiece::npos)
-    return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
-                          path);
+  if (pos == absl::string_view::npos)
+    return std::make_pair(
+        absl::string_view(uri.begin(), host.end() - uri.begin()), path);
 
   // Handle the case with a single leading '/' in 'path'.
   if (pos == 0)
     return std::make_pair(
-        StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
-        StringPiece(path.data() + 1, path.size() - 1));
+        absl::string_view(uri.begin(), path.begin() + 1 - uri.begin()),
+        absl::string_view(path.data() + 1, path.size() - 1));
 
   return std::make_pair(
-      StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
-      StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
+      absl::string_view(uri.begin(), path.begin() + pos - uri.begin()),
+      absl::string_view(path.data() + pos + 1, path.size() - (pos + 1)));
 }
 
 // Return the parts of the basename of path, split on the final ".".
 // If there is no "." in the basename or "." is the final character in the
 // basename, the second value will be empty.
-std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
+std::pair<absl::string_view, absl::string_view> SplitBasename(
+    absl::string_view path) {
   path = Basename(path);
 
   auto pos = path.rfind('.');
-  if (pos == StringPiece::npos)
-    return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
+  if (pos == absl::string_view::npos)
+    return std::make_pair(path,
+                          absl::string_view(path.data() + path.size(), 0));
   return std::make_pair(
-      StringPiece(path.data(), pos),
-      StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
+      absl::string_view(path.data(), pos),
+      absl::string_view(path.data() + pos + 1, path.size() - (pos + 1)));
 }
 }  // namespace internal
 
-bool IsAbsolutePath(StringPiece path) {
+bool IsAbsolutePath(absl::string_view path) {
   return !path.empty() && path[0] == '/';
 }
 
-StringPiece Dirname(StringPiece path) {
+absl::string_view Dirname(absl::string_view path) {
   return internal::SplitPath(path).first;
 }
 
-StringPiece Basename(StringPiece path) {
+absl::string_view Basename(absl::string_view path) {
   return internal::SplitPath(path).second;
 }
 
-StringPiece Extension(StringPiece path) {
+absl::string_view Extension(absl::string_view path) {
   return internal::SplitBasename(path).second;
 }
 
-string CleanPath(StringPiece unclean_path) {
+string CleanPath(absl::string_view unclean_path) {
   string path(unclean_path);
   const char* src = path.c_str();
   string::iterator dst = path.begin();
@@ -204,8 +208,8 @@ string CleanPath(StringPiece unclean_path) {
   return path;
 }
 
-void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
-              StringPiece* path) {
+void ParseURI(absl::string_view remaining, absl::string_view* scheme,
+              absl::string_view* host, absl::string_view* path) {
   // 0. Parse scheme
   // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
   // TODO(keveman): Allow "+" and "-" in the scheme.
@@ -217,8 +221,8 @@ void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
            .OneLiteral("://")
            .GetResult(&remaining, scheme)) {
     // If there's no scheme, assume the entire string is a path.
-    *scheme = StringPiece(remaining.begin(), 0);
-    *host = StringPiece(remaining.begin(), 0);
+    *scheme = absl::string_view(remaining.begin(), 0);
+    *host = absl::string_view(remaining.begin(), 0);
     *path = remaining;
     return;
   }
@@ -227,7 +231,7 @@ void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
   if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
     // No path, so the rest of the URI is the host.
     *host = remaining;
-    *path = StringPiece(remaining.end(), 0);
+    *path = absl::string_view(remaining.end(), 0);
     return;
   }
 
@@ -235,7 +239,8 @@ void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
   *path = remaining;
 }
 
-string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
+string CreateURI(absl::string_view scheme, absl::string_view host,
+                 absl::string_view path) {
   if (scheme.empty()) {
     return string(path);
   }
diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h
index 38fb0c5d86..288b6ee951 100644
--- a/tensorflow/core/lib/io/path.h
+++ b/tensorflow/core/lib/io/path.h
@@ -16,13 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_PATH_H_
 #define TENSORFLOW_CORE_LIB_IO_PATH_H_
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace io {
 namespace internal {
-string JoinPathImpl(std::initializer_list<tensorflow::StringPiece> paths);
+string JoinPathImpl(std::initializer_list<absl::string_view> paths);
 }
 
 // Utility routines for processing filenames
@@ -49,20 +49,20 @@ string JoinPath(const T&... args) {
 #endif /* SWIG */
 
 // Return true if path is absolute.
-bool IsAbsolutePath(tensorflow::StringPiece path);
+bool IsAbsolutePath(absl::string_view path);
 
 // Returns the part of the path before the final "/".  If there is a single
 // leading "/" in the path, the result will be the leading "/".  If there is
 // no "/" in the path, the result is the empty prefix of the input.
-tensorflow::StringPiece Dirname(tensorflow::StringPiece path);
+absl::string_view Dirname(absl::string_view path);
 
 // Returns the part of the path after the final "/".  If there is no
 // "/" in the path, the result is the same as the input.
-tensorflow::StringPiece Basename(tensorflow::StringPiece path);
+absl::string_view Basename(absl::string_view path);
 
 // Returns the part of the basename of path after the final ".".  If
 // there is no "." in the basename, the result is empty.
-tensorflow::StringPiece Extension(tensorflow::StringPiece path);
+absl::string_view Extension(absl::string_view path);
 
 // Collapse duplicate "/"s, resolve ".." and "." path elements, remove
 // trailing "/".
@@ -71,7 +71,7 @@ tensorflow::StringPiece Extension(tensorflow::StringPiece path);
 // invoke any system calls (getcwd(2)) in order to resolve relative
 // paths with respect to the actual working directory.  That is, this is purely
 // string manipulation, completely independent of process state.
-string CleanPath(tensorflow::StringPiece path);
+string CleanPath(absl::string_view path);
 
 // Populates the scheme, host, and path from a URI. scheme, host, and path are
 // guaranteed by this function to point into the contents of uri, even if
@@ -81,13 +81,13 @@ string CleanPath(tensorflow::StringPiece path);
 // - If the URI is invalid, scheme and host are set to empty strings and the
 //   passed string is assumed to be a path
 // - If the URI omits the path (e.g. file://host), then the path is left empty.
-void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme,
-              tensorflow::StringPiece* host, tensorflow::StringPiece* path);
+void ParseURI(absl::string_view uri, absl::string_view* scheme,
+              absl::string_view* host, absl::string_view* path);
 
 // Creates a URI from a scheme, host, and path. If the scheme is empty, we just
 // return the path.
-string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host,
-                 tensorflow::StringPiece path);
+string CreateURI(absl::string_view scheme, absl::string_view host,
+                 absl::string_view path);
 
 // Creates a temporary file name with an extension.
 string GetTempFilename(const string& extension);
diff --git a/tensorflow/core/lib/io/path_test.cc b/tensorflow/core/lib/io/path_test.cc
index 0090b9100c..e22b03f0b0 100644
--- a/tensorflow/core/lib/io/path_test.cc
+++ b/tensorflow/core/lib/io/path_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/path.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -101,8 +102,8 @@ TEST(PathTest, CleanPath) {
 
 #define EXPECT_PARSE_URI(uri, scheme, host, path)  \
   do {                                             \
-    StringPiece u(uri);                            \
-    StringPiece s, h, p;                           \
+    absl::string_view u(uri);                      \
+    absl::string_view s, h, p;                     \
     ParseURI(u, &s, &h, &p);                       \
     EXPECT_EQ(scheme, s);                          \
     EXPECT_EQ(host, h);                            \
diff --git a/tensorflow/core/lib/io/proto_encode_helper.h b/tensorflow/core/lib/io/proto_encode_helper.h
index 34905520f1..7e989e921f 100644
--- a/tensorflow/core/lib/io/proto_encode_helper.h
+++ b/tensorflow/core/lib/io/proto_encode_helper.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_PROTO_ENCODE_HELPER_H_
 #define TENSORFLOW_CORE_LIB_IO_PROTO_ENCODE_HELPER_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 // A helper class for appending various kinds of values in protocol
@@ -48,7 +48,7 @@ class ProtoEncodeHelper {
     Encode32(combine(tag, WIRETYPE_VARINT));
     EncodeBool(v);
   }
-  void WriteString(int tag, StringPiece v) {
+  void WriteString(int tag, absl::string_view v) {
     Encode32(combine(tag, WIRETYPE_LENGTH_DELIMITED));
     Encode32(v.size());
     EncodeBytes(v.data(), v.size());
@@ -57,7 +57,7 @@ class ProtoEncodeHelper {
     Encode32(combine(tag, WIRETYPE_LENGTH_DELIMITED));
     Encode32(len);
   }
-  void WriteRawBytes(StringPiece v) { EncodeBytes(v.data(), v.size()); }
+  void WriteRawBytes(absl::string_view v) { EncodeBytes(v.data(), v.size()); }
 
  private:
   // Note: this module's behavior must match the protocol buffer wire encoding
diff --git a/tensorflow/core/lib/io/random_inputstream.cc b/tensorflow/core/lib/io/random_inputstream.cc
index e85367df9c..4d9fe75497 100644
--- a/tensorflow/core/lib/io/random_inputstream.cc
+++ b/tensorflow/core/lib/io/random_inputstream.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include <memory>
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -37,7 +38,7 @@ Status RandomAccessInputStream::ReadNBytes(int64 bytes_to_read,
   result->clear();
   result->resize(bytes_to_read);
   char* result_buffer = &(*result)[0];
-  StringPiece data;
+  absl::string_view data;
   Status s = file_->Read(pos_, bytes_to_read, &data, result_buffer);
   if (data.data() != result_buffer) {
     memmove(result_buffer, data.data(), data.size());
@@ -61,7 +62,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
   // Try to read 1 bytes first, if we could complete the read then EOF is
   // not reached yet and we could return.
   if (bytes_to_skip > 0) {
-    StringPiece data;
+    absl::string_view data;
     Status s = file_->Read(pos_ + bytes_to_skip - 1, 1, &data, scratch.get());
     if ((s.ok() || errors::IsOutOfRange(s)) && data.size() == 1) {
       pos_ += bytes_to_skip;
@@ -71,7 +72,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
   // Read kDefaultSkipSize at a time till bytes_to_skip.
   while (bytes_to_skip > 0) {
     int64 bytes_to_read = std::min<int64>(kMaxSkipSize, bytes_to_skip);
-    StringPiece data;
+    absl::string_view data;
     Status s = file_->Read(pos_, bytes_to_read, &data, scratch.get());
     if (s.ok() || errors::IsOutOfRange(s)) {
       pos_ += data.size();
diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc
index 2c6db2487e..0aead25457 100644
--- a/tensorflow/core/lib/io/record_writer.cc
+++ b/tensorflow/core/lib/io/record_writer.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/record_writer.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
 #include "tensorflow/core/lib/io/compression.h"
@@ -88,7 +89,7 @@ RecordWriter::~RecordWriter() {
   }
 }
 
-Status RecordWriter::WriteRecord(StringPiece data) {
+Status RecordWriter::WriteRecord(absl::string_view data) {
   if (dest_ == nullptr) {
     return Status(::tensorflow::error::FAILED_PRECONDITION,
                   "Writer not initialized or previously closed");
@@ -102,9 +103,9 @@ Status RecordWriter::WriteRecord(StringPiece data) {
   char footer[kFooterSize];
   PopulateHeader(header, data.data(), data.size());
   PopulateFooter(footer, data.data(), data.size());
-  TF_RETURN_IF_ERROR(dest_->Append(StringPiece(header, sizeof(header))));
+  TF_RETURN_IF_ERROR(dest_->Append(absl::string_view(header, sizeof(header))));
   TF_RETURN_IF_ERROR(dest_->Append(data));
-  return dest_->Append(StringPiece(footer, sizeof(footer)));
+  return dest_->Append(absl::string_view(footer, sizeof(footer)));
 }
 
 Status RecordWriter::Close() {
diff --git a/tensorflow/core/lib/io/record_writer.h b/tensorflow/core/lib/io/record_writer.h
index 1212e1fafb..d797ab82f3 100644
--- a/tensorflow/core/lib/io/record_writer.h
+++ b/tensorflow/core/lib/io/record_writer.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_
 #define TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
 #if !defined(IS_SLIM_BUILD)
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
@@ -69,7 +69,7 @@ class RecordWriter {
   // implicit Close() call in the destructor.
   ~RecordWriter();
 
-  Status WriteRecord(StringPiece slice);
+  Status WriteRecord(absl::string_view slice);
 
   // Flushes any buffered data held by underlying containers of the
   // RecordWriter to the WritableFile. Does *not* flush the
diff --git a/tensorflow/core/lib/io/recordio_test.cc b/tensorflow/core/lib/io/recordio_test.cc
index 946d7188d3..eda3316381 100644
--- a/tensorflow/core/lib/io/recordio_test.cc
+++ b/tensorflow/core/lib/io/recordio_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -58,7 +59,7 @@ class StringDest : public WritableFile {
   Status Close() override { return Status::OK(); }
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
-  Status Append(StringPiece slice) override {
+  Status Append(absl::string_view slice) override {
     contents_->append(slice.data(), slice.size());
     return Status::OK();
   }
@@ -72,7 +73,7 @@ class StringSource : public RandomAccessFile {
   explicit StringSource(string* contents)
       : contents_(contents), force_error_(false) {}
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     if (force_error_) {
       force_error_ = false;
@@ -86,7 +87,7 @@ class StringSource : public RandomAccessFile {
     if (contents_->size() < offset + n) {
       n = contents_->size() - offset;
     }
-    *result = StringPiece(contents_->data() + offset, n);
+    *result = absl::string_view(contents_->data() + offset, n);
     return Status::OK();
   }
 
@@ -123,7 +124,7 @@ class RecordioTest : public ::testing::Test {
 
   void Write(const string& msg) {
     ASSERT_TRUE(!reading_) << "Write() after starting to read";
-    TF_ASSERT_OK(writer_->WriteRecord(StringPiece(msg)));
+    TF_ASSERT_OK(writer_->WriteRecord(absl::string_view(msg)));
   }
 
   size_t WrittenBytes() const { return contents_.size(); }
@@ -267,7 +268,7 @@ TEST_F(RecordioTest, NonSequentialReadsWithCompression) {
 }
 
 // Tests of all the error paths in log_reader.cc follow:
-void AssertHasSubstr(StringPiece s, StringPiece expected) {
+void AssertHasSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << s << " does not contain " << expected;
 }
diff --git a/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc b/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
index e0918c70a7..c82b424390 100644
--- a/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/snappy/snappy_inputbuffer.h"
@@ -68,7 +69,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
                              compress_output_buf_size);
 
   for (int i = 0; i < num_writes; i++) {
-    TF_RETURN_IF_ERROR(out.Write(StringPiece(data)));
+    TF_RETURN_IF_ERROR(out.Write(absl::string_view(data)));
     if (with_flush) {
       TF_RETURN_IF_ERROR(out.Flush());
     }
@@ -87,7 +88,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
     std::unique_ptr<RandomAccessFile> file_reader;
     TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file_reader));
 
-    StringPiece data;
+    absl::string_view data;
     size_t file_pos = 0;
     size_t bytes_to_read = 256;
     char* scratch = new char[bytes_to_read];
@@ -97,14 +98,14 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
     while ((file_reader->Read(file_pos, bytes_to_read, &data, scratch)).ok()) {
       file_pos += data.size();
       TF_CHECK_OK(
-          corrupt_file_writer->Append(StringPiece(buffer, buffer_size)));
+          corrupt_file_writer->Append(absl::string_view(buffer, buffer_size)));
       memcpy(buffer, data.data(), data.size());
       buffer_size = data.size();
     }
 
     // Drop the last byte. File is now corrupt.
-    TF_CHECK_OK(
-        corrupt_file_writer->Append(StringPiece(buffer, buffer_size - 1)));
+    TF_CHECK_OK(corrupt_file_writer->Append(
+        absl::string_view(buffer, buffer_size - 1)));
     TF_CHECK_OK(corrupt_file_writer->Flush());
     TF_CHECK_OK(corrupt_file_writer->Close());
     delete[] scratch;
@@ -136,7 +137,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
 
 static bool SnappyCompressionSupported() {
   string out;
-  StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  absl::string_view in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
   return port::Snappy_Compress(in.data(), in.size(), &out);
 }
 
diff --git a/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
index 853d86cb23..35572e6ae1 100644
--- a/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/snappy/snappy_inputbuffer.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -159,7 +160,7 @@ Status SnappyInputBuffer::ReadFromFile() {
     bytes_to_read -= avail_in_;
     read_location += avail_in_;
   }
-  StringPiece data;
+  absl::string_view data;
   // Try to read enough data to fill up input_buffer_.
   Status s = file_->Read(file_pos_, bytes_to_read, &data, read_location);
   if (data.data() != read_location) {
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index 3c31016732..d7c1deeb0b 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/snappy/snappy_outputbuffer.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -30,7 +31,7 @@ SnappyOutputBuffer::SnappyOutputBuffer(WritableFile* file,
       next_out_(output_buffer_.get()),
       avail_out_(output_buffer_bytes) {}
 
-Status SnappyOutputBuffer::Write(StringPiece data) {
+Status SnappyOutputBuffer::Write(absl::string_view data) {
   //
   // The deflated output is accumulated in output_buffer_ and gets written to
   // file as and when needed.
@@ -80,7 +81,7 @@ int32 SnappyOutputBuffer::AvailableInputSpace() const {
   return input_buffer_capacity_ - avail_in_;
 }
 
-void SnappyOutputBuffer::AddToInputBuffer(StringPiece data) {
+void SnappyOutputBuffer::AddToInputBuffer(absl::string_view data) {
   size_t bytes_to_write = data.size();
   DCHECK_LE(bytes_to_write, AvailableInputSpace());
 
@@ -140,7 +141,7 @@ Status SnappyOutputBuffer::DeflateBuffered() {
 Status SnappyOutputBuffer::FlushOutputBufferToFile() {
   size_t bytes_to_write = output_buffer_capacity_ - avail_out_;
   if (bytes_to_write > 0) {
-    Status s = file_->Append(StringPiece(
+    Status s = file_->Append(absl::string_view(
         reinterpret_cast<char*>(output_buffer_.get()), bytes_to_write));
     if (s.ok()) {
       next_out_ = output_buffer_.get();
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
index 5aea503846..7a0a3b0e19 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_SNAPPY_OUTPUTBUFFER_H_
 
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -61,7 +62,7 @@ class SnappyOutputBuffer {
   // to file when the buffer is full.
   //
   // To immediately write contents to file call `Flush()`.
-  Status Write(StringPiece data);
+  Status Write(absl::string_view data);
 
   // Compresses any cached input and writes all output to file. This must be
   // called before the destructor to avoid any data loss.
@@ -70,7 +71,7 @@ class SnappyOutputBuffer {
  private:
   // Appends `data` to `input_buffer_`.
   // Throws if `data.size()` > AvailableInputSpace().
-  void AddToInputBuffer(StringPiece data);
+  void AddToInputBuffer(absl::string_view data);
 
   // Appends `data` to `output_buffer_`. Flushes buffer contents to file when
   // buffer gets full.
diff --git a/tensorflow/core/lib/io/table.cc b/tensorflow/core/lib/io/table.cc
index 1ef7bb6ccd..2c982f85dd 100644
--- a/tensorflow/core/lib/io/table.cc
+++ b/tensorflow/core/lib/io/table.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/table.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/block.h"
@@ -46,7 +47,7 @@ Status Table::Open(const Options& options, RandomAccessFile* file, uint64 size,
   }
 
   char footer_space[Footer::kEncodedLength];
-  StringPiece footer_input;
+  absl::string_view footer_input;
   Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
                         &footer_input, footer_space);
   if (!s.ok()) return s;
@@ -91,14 +92,14 @@ static void DeleteBlock(void* arg, void* ignored) {
 
 // Convert an index iterator value (i.e., an encoded BlockHandle)
 // into an iterator over the contents of the corresponding block.
-Iterator* Table::BlockReader(void* arg, const StringPiece& index_value) {
+Iterator* Table::BlockReader(void* arg, const absl::string_view& index_value) {
   Table* table = reinterpret_cast<Table*>(arg);
   //  Cache* block_cache = table->rep_->options.block_cache;
   Block* block = nullptr;
   //  Cache::Handle* cache_handle = NULL;
 
   BlockHandle handle;
-  StringPiece input = index_value;
+  absl::string_view input = index_value;
   Status s = handle.DecodeFrom(&input);
   // We intentionally allow extra stuff in index_value so that we
   // can add more features in the future.
@@ -126,9 +127,9 @@ Iterator* Table::NewIterator() const {
                              &Table::BlockReader, const_cast<Table*>(this));
 }
 
-Status Table::InternalGet(const StringPiece& k, void* arg,
-                          void (*saver)(void*, const StringPiece&,
-                                        const StringPiece&)) {
+Status Table::InternalGet(const absl::string_view& k, void* arg,
+                          void (*saver)(void*, const absl::string_view&,
+                                        const absl::string_view&)) {
   Status s;
   Iterator* iiter = rep_->index_block->NewIterator();
   iiter->Seek(k);
@@ -149,13 +150,13 @@ Status Table::InternalGet(const StringPiece& k, void* arg,
   return s;
 }
 
-uint64 Table::ApproximateOffsetOf(const StringPiece& key) const {
+uint64 Table::ApproximateOffsetOf(const absl::string_view& key) const {
   Iterator* index_iter = rep_->index_block->NewIterator();
   index_iter->Seek(key);
   uint64 result;
   if (index_iter->Valid()) {
     BlockHandle handle;
-    StringPiece input = index_iter->value();
+    absl::string_view input = index_iter->value();
     Status s = handle.DecodeFrom(&input);
     if (s.ok()) {
       result = handle.offset();
diff --git a/tensorflow/core/lib/io/table.h b/tensorflow/core/lib/io/table.h
index b9c6b8d9d2..aa1ae0ecbf 100644
--- a/tensorflow/core/lib/io/table.h
+++ b/tensorflow/core/lib/io/table.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_TABLE_H_
 
 #include <stdint.h>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/iterator.h"
 
 namespace tensorflow {
@@ -60,21 +61,22 @@ class Table {
   // bytes, and so includes effects like compression of the underlying data.
   // E.g., the approximate offset of the last key in the table will
   // be close to the file length.
-  uint64 ApproximateOffsetOf(const StringPiece& key) const;
+  uint64 ApproximateOffsetOf(const absl::string_view& key) const;
 
  private:
   struct Rep;
   Rep* rep_;
 
   explicit Table(Rep* rep) { rep_ = rep; }
-  static Iterator* BlockReader(void*, const StringPiece&);
+  static Iterator* BlockReader(void*, const absl::string_view&);
 
   // Calls (*handle_result)(arg, ...) with the entry found after a call
   // to Seek(key).  May not make such a call if filter policy says
   // that key is not present.
-  Status InternalGet(const StringPiece& key, void* arg,
-                     void (*handle_result)(void* arg, const StringPiece& k,
-                                           const StringPiece& v));
+  Status InternalGet(const absl::string_view& key, void* arg,
+                     void (*handle_result)(void* arg,
+                                           const absl::string_view& k,
+                                           const absl::string_view& v));
 
   // No copying allowed
   Table(const Table&);
diff --git a/tensorflow/core/lib/io/table_builder.cc b/tensorflow/core/lib/io/table_builder.cc
index 81333a7b22..c127f3779e 100644
--- a/tensorflow/core/lib/io/table_builder.cc
+++ b/tensorflow/core/lib/io/table_builder.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/table_builder.h"
 
 #include <assert.h>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
@@ -30,7 +31,7 @@ namespace table {
 
 namespace {
 
-void FindShortestSeparator(string* start, const StringPiece& limit) {
+void FindShortestSeparator(string* start, const absl::string_view& limit) {
   // Find length of common prefix
   size_t min_length = std::min(start->size(), limit.size());
   size_t diff_index = 0;
@@ -47,7 +48,7 @@ void FindShortestSeparator(string* start, const StringPiece& limit) {
         diff_byte + 1 < static_cast<uint8>(limit[diff_index])) {
       (*start)[diff_index]++;
       start->resize(diff_index + 1);
-      assert(StringPiece(*start).compare(limit) < 0);
+      assert(absl::string_view(*start).compare(limit) < 0);
     }
   }
 }
@@ -115,12 +116,13 @@ TableBuilder::~TableBuilder() {
   delete rep_;
 }
 
-void TableBuilder::Add(const StringPiece& key, const StringPiece& value) {
+void TableBuilder::Add(const absl::string_view& key,
+                       const absl::string_view& value) {
   Rep* r = rep_;
   assert(!r->closed);
   if (!ok()) return;
   if (r->num_entries > 0) {
-    assert(key.compare(StringPiece(r->last_key)) > 0);
+    assert(key.compare(absl::string_view(r->last_key)) > 0);
     // See if this key+value would make our current block overly large.  If
     // so, emit the current block before adding this key/value
     const int kOverlyLargeBlockRatio = 2;
@@ -135,7 +137,7 @@ void TableBuilder::Add(const StringPiece& key, const StringPiece& value) {
     FindShortestSeparator(&r->last_key, key);
     string handle_encoding;
     r->pending_handle.EncodeTo(&handle_encoding);
-    r->index_block.Add(r->last_key, StringPiece(handle_encoding));
+    r->index_block.Add(r->last_key, absl::string_view(handle_encoding));
     r->pending_index_entry = false;
   }
 
@@ -169,9 +171,9 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
   //    crc: uint32
   assert(ok());
   Rep* r = rep_;
-  StringPiece raw = block->Finish();
+  absl::string_view raw = block->Finish();
 
-  StringPiece block_contents;
+  absl::string_view block_contents;
   CompressionType type = r->options.compression;
   // TODO(postrelease): Support more compression options: zlib?
   switch (type) {
@@ -198,7 +200,7 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
   block->Reset();
 }
 
-void TableBuilder::WriteRawBlock(const StringPiece& block_contents,
+void TableBuilder::WriteRawBlock(const absl::string_view& block_contents,
                                  CompressionType type, BlockHandle* handle) {
   Rep* r = rep_;
   handle->set_offset(r->offset);
@@ -210,7 +212,7 @@ void TableBuilder::WriteRawBlock(const StringPiece& block_contents,
     uint32 crc = crc32c::Value(block_contents.data(), block_contents.size());
     crc = crc32c::Extend(crc, trailer, 1);  // Extend crc to cover block type
     core::EncodeFixed32(trailer + 1, crc32c::Mask(crc));
-    r->status = r->file->Append(StringPiece(trailer, kBlockTrailerSize));
+    r->status = r->file->Append(absl::string_view(trailer, kBlockTrailerSize));
     if (r->status.ok()) {
       r->offset += block_contents.size() + kBlockTrailerSize;
     }
@@ -240,7 +242,7 @@ Status TableBuilder::Finish() {
       FindShortSuccessor(&r->last_key);
       string handle_encoding;
       r->pending_handle.EncodeTo(&handle_encoding);
-      r->index_block.Add(r->last_key, StringPiece(handle_encoding));
+      r->index_block.Add(r->last_key, absl::string_view(handle_encoding));
       r->pending_index_entry = false;
     }
     WriteBlock(&r->index_block, &index_block_handle);
diff --git a/tensorflow/core/lib/io/table_builder.h b/tensorflow/core/lib/io/table_builder.h
index 0e37e0a77f..c0c9afbf66 100644
--- a/tensorflow/core/lib/io/table_builder.h
+++ b/tensorflow/core/lib/io/table_builder.h
@@ -25,6 +25,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_TABLE_BUILDER_H_
 
 #include <stdint.h>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/table_options.h"
 
@@ -48,7 +49,7 @@ class TableBuilder {
   // Add key,value to the table being constructed.
   // REQUIRES: key is after any previously added key in lexicographic order.
   // REQUIRES: Finish(), Abandon() have not been called
-  void Add(const StringPiece& key, const StringPiece& value);
+  void Add(const absl::string_view& key, const absl::string_view& value);
 
   // Advanced operation: writes any buffered key/value pairs to file.
   // Can be used to ensure that two adjacent entries never live in
@@ -82,7 +83,7 @@ class TableBuilder {
  private:
   bool ok() const { return status().ok(); }
   void WriteBlock(BlockBuilder* block, BlockHandle* handle);
-  void WriteRawBlock(const StringPiece& data, CompressionType,
+  void WriteRawBlock(const absl::string_view& data, CompressionType,
                      BlockHandle* handle);
 
   struct Rep;
diff --git a/tensorflow/core/lib/io/table_test.cc b/tensorflow/core/lib/io/table_test.cc
index 9cebbf40c6..4821240964 100644
--- a/tensorflow/core/lib/io/table_test.cc
+++ b/tensorflow/core/lib/io/table_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <map>
 #include <string>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/block.h"
 #include "tensorflow/core/lib/io/block_builder.h"
@@ -35,17 +36,17 @@ namespace tensorflow {
 namespace table {
 
 namespace {
-typedef std::pair<StringPiece, StringPiece> StringPiecePair;
+typedef std::pair<absl::string_view, absl::string_view> StringPiecePair;
 }
 
 namespace test {
-static StringPiece RandomString(random::SimplePhilox* rnd, int len,
-                                string* dst) {
+static absl::string_view RandomString(random::SimplePhilox* rnd, int len,
+                                      string* dst) {
   dst->resize(len);
   for (int i = 0; i < len; i++) {
     (*dst)[i] = static_cast<char>(' ' + rnd->Uniform(95));  // ' ' .. '~'
   }
-  return StringPiece(*dst);
+  return absl::string_view(*dst);
 }
 static string RandomKey(random::SimplePhilox* rnd, int len) {
   // Make sure to generate a wide variety of characters so we
@@ -58,9 +59,9 @@ static string RandomKey(random::SimplePhilox* rnd, int len) {
   }
   return result;
 }
-static StringPiece CompressibleString(random::SimplePhilox* rnd,
-                                      double compressed_fraction, size_t len,
-                                      string* dst) {
+static absl::string_view CompressibleString(random::SimplePhilox* rnd,
+                                            double compressed_fraction,
+                                            size_t len, string* dst) {
   int raw = static_cast<int>(len * compressed_fraction);
   if (raw < 1) raw = 1;
   string raw_data;
@@ -72,7 +73,7 @@ static StringPiece CompressibleString(random::SimplePhilox* rnd,
     dst->append(raw_data);
   }
   dst->resize(len);
-  return StringPiece(*dst);
+  return absl::string_view(*dst);
 }
 }  // namespace test
 
@@ -83,7 +84,7 @@ namespace {
 struct STLLessThan {
   STLLessThan() {}
   bool operator()(const string& a, const string& b) const {
-    return StringPiece(a).compare(StringPiece(b)) < 0;
+    return absl::string_view(a).compare(absl::string_view(b)) < 0;
   }
 };
 }  // namespace
@@ -98,7 +99,7 @@ class StringSink : public WritableFile {
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     contents_.append(data.data(), data.size());
     return Status::OK();
   }
@@ -109,14 +110,14 @@ class StringSink : public WritableFile {
 
 class StringSource : public RandomAccessFile {
  public:
-  explicit StringSource(const StringPiece& contents)
+  explicit StringSource(const absl::string_view& contents)
       : contents_(contents.data(), contents.size()), bytes_read_(0) {}
 
   ~StringSource() override {}
 
   uint64 Size() const { return contents_.size(); }
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     if (offset > contents_.size()) {
       return errors::InvalidArgument("invalid Read offset");
@@ -125,7 +126,7 @@ class StringSource : public RandomAccessFile {
       n = contents_.size() - offset;
     }
     memcpy(scratch, &contents_[offset], n);
-    *result = StringPiece(scratch, n);
+    *result = absl::string_view(scratch, n);
     bytes_read_ += n;
     return Status::OK();
   }
@@ -146,7 +147,7 @@ class Constructor {
   explicit Constructor() : data_(STLLessThan()) {}
   virtual ~Constructor() {}
 
-  void Add(const string& key, const StringPiece& value) {
+  void Add(const string& key, const absl::string_view& value) {
     data_[key] = string(value);
   }
 
@@ -229,7 +230,7 @@ class TableConstructor : public Constructor {
 
   Iterator* NewIterator() const override { return table_->NewIterator(); }
 
-  uint64 ApproximateOffsetOf(const StringPiece& key) const {
+  uint64 ApproximateOffsetOf(const absl::string_view& key) const {
     return table_->ApproximateOffsetOf(key);
   }
 
@@ -347,7 +348,7 @@ class Harness : public ::testing::Test {
           model_iter = data.lower_bound(key);
           if (kVerbose)
             fprintf(stderr, "Seek '%s'\n", str_util::CEscape(key).c_str());
-          iter->Seek(StringPiece(key));
+          iter->Seek(absl::string_view(key));
           ASSERT_EQ(ToStringPiecePair(data, model_iter),
                     ToStringPiecePair(iter));
           break;
@@ -433,7 +434,7 @@ TEST_F(Harness, ZeroRestartPointsInBlock) {
   char data[sizeof(uint32)];
   memset(data, 0, sizeof(data));
   BlockContents contents;
-  contents.data = StringPiece(data, sizeof(data));
+  contents.data = absl::string_view(data, sizeof(data));
   contents.cachable = false;
   contents.heap_allocated = false;
   Block block(contents);
@@ -566,7 +567,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
 
 static bool SnappyCompressionSupported() {
   string out;
-  StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  absl::string_view in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
   return port::Snappy_Compress(in.data(), in.size(), &out);
 }
 
diff --git a/tensorflow/core/lib/io/two_level_iterator.cc b/tensorflow/core/lib/io/two_level_iterator.cc
index ad66ae40d8..9898d2ab42 100644
--- a/tensorflow/core/lib/io/two_level_iterator.cc
+++ b/tensorflow/core/lib/io/two_level_iterator.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/two_level_iterator.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/block.h"
 #include "tensorflow/core/lib/io/format.h"
 #include "tensorflow/core/lib/io/iterator.h"
@@ -25,7 +26,7 @@ namespace table {
 
 namespace {
 
-typedef Iterator* (*BlockFunction)(void*, const StringPiece&);
+typedef Iterator* (*BlockFunction)(void*, const absl::string_view&);
 
 class TwoLevelIterator : public Iterator {
  public:
@@ -34,18 +35,18 @@ class TwoLevelIterator : public Iterator {
 
   ~TwoLevelIterator() override;
 
-  void Seek(const StringPiece& target) override;
+  void Seek(const absl::string_view& target) override;
   void SeekToFirst() override;
   void Next() override;
 
   bool Valid() const override {
     return (data_iter_ == nullptr) ? false : data_iter_->Valid();
   }
-  StringPiece key() const override {
+  absl::string_view key() const override {
     assert(Valid());
     return data_iter_->key();
   }
-  StringPiece value() const override {
+  absl::string_view value() const override {
     assert(Valid());
     return data_iter_->value();
   }
@@ -91,7 +92,7 @@ TwoLevelIterator::~TwoLevelIterator() {
   delete data_iter_;
 }
 
-void TwoLevelIterator::Seek(const StringPiece& target) {
+void TwoLevelIterator::Seek(const absl::string_view& target) {
   index_iter_->Seek(target);
   InitDataBlock();
   if (data_iter_ != nullptr) data_iter_->Seek(target);
@@ -136,7 +137,7 @@ void TwoLevelIterator::InitDataBlock() {
   if (!index_iter_->Valid()) {
     SetDataIterator(nullptr);
   } else {
-    StringPiece handle = index_iter_->value();
+    absl::string_view handle = index_iter_->value();
     if (data_iter_ != nullptr && handle.compare(data_block_handle_) == 0) {
       // data_iter_ is already constructed with this iterator, so
       // no need to change anything
diff --git a/tensorflow/core/lib/io/two_level_iterator.h b/tensorflow/core/lib/io/two_level_iterator.h
index ce972bc68b..7472ec5965 100644
--- a/tensorflow/core/lib/io/two_level_iterator.h
+++ b/tensorflow/core/lib/io/two_level_iterator.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_IO_TWO_LEVEL_ITERATOR_H_
 #define TENSORFLOW_LIB_IO_TWO_LEVEL_ITERATOR_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/iterator.h"
 
 namespace tensorflow {
@@ -32,7 +33,8 @@ namespace table {
 // an iterator over the contents of the corresponding block.
 extern Iterator* NewTwoLevelIterator(
     Iterator* index_iter,
-    Iterator* (*block_function)(void* arg, const StringPiece& index_value),
+    Iterator* (*block_function)(void* arg,
+                                const absl::string_view& index_value),
     void* arg);
 
 }  // namespace table
diff --git a/tensorflow/core/lib/io/zlib_buffers_test.cc b/tensorflow/core/lib/io/zlib_buffers_test.cc
index 156c712db8..2ea197e9c8 100644
--- a/tensorflow/core/lib/io/zlib_buffers_test.cc
+++ b/tensorflow/core/lib/io/zlib_buffers_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
@@ -75,7 +76,7 @@ void TestAllCombinations(CompressionOptions input_options,
                              output_options);
         TF_ASSERT_OK(out.Init());
 
-        TF_ASSERT_OK(out.Append(StringPiece(data)));
+        TF_ASSERT_OK(out.Append(absl::string_view(data)));
         TF_ASSERT_OK(out.Close());
         TF_ASSERT_OK(file_writer->Flush());
         TF_ASSERT_OK(file_writer->Close());
@@ -124,7 +125,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
   TF_ASSERT_OK(out.Init());
 
   for (int i = 0; i < num_writes; i++) {
-    TF_ASSERT_OK(out.Append(StringPiece(data)));
+    TF_ASSERT_OK(out.Append(absl::string_view(data)));
     if (with_flush) {
       TF_ASSERT_OK(out.Flush());
     }
@@ -176,7 +177,7 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
                        output_options);
   TF_ASSERT_OK(out.Init());
 
-  TF_ASSERT_OK(out.Append(StringPiece(data)));
+  TF_ASSERT_OK(out.Append(absl::string_view(data)));
   TF_ASSERT_OK(out.Close());
   TF_ASSERT_OK(file_writer->Flush());
   TF_ASSERT_OK(file_writer->Close());
@@ -203,7 +204,7 @@ void WriteCompressedFile(Env* env, const string& fname, int input_buf_size,
                        output_options);
   TF_ASSERT_OK(out.Init());
 
-  TF_ASSERT_OK(out.Append(StringPiece(data)));
+  TF_ASSERT_OK(out.Append(absl::string_view(data)));
   TF_ASSERT_OK(out.Close());
   TF_ASSERT_OK(file_writer->Flush());
   TF_ASSERT_OK(file_writer->Close());
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.cc b/tensorflow/core/lib/io/zlib_outputbuffer.cc
index cba139e6ad..ccede3afe0 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.cc
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/zlib_outputbuffer.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -72,7 +73,7 @@ int32 ZlibOutputBuffer::AvailableInputSpace() const {
   return input_buffer_capacity_ - z_stream_->avail_in;
 }
 
-void ZlibOutputBuffer::AddToInputBuffer(StringPiece data) {
+void ZlibOutputBuffer::AddToInputBuffer(absl::string_view data) {
   size_t bytes_to_write = data.size();
   CHECK_LE(bytes_to_write, AvailableInputSpace());
 
@@ -132,7 +133,7 @@ Status ZlibOutputBuffer::DeflateBuffered(bool last) {
 Status ZlibOutputBuffer::FlushOutputBufferToFile() {
   uint32 bytes_to_write = output_buffer_capacity_ - z_stream_->avail_out;
   if (bytes_to_write > 0) {
-    Status s = file_->Append(StringPiece(
+    Status s = file_->Append(absl::string_view(
         reinterpret_cast<char*>(z_stream_output_.get()), bytes_to_write));
     if (s.ok()) {
       z_stream_->next_out = z_stream_output_.get();
@@ -143,7 +144,7 @@ Status ZlibOutputBuffer::FlushOutputBufferToFile() {
   return Status::OK();
 }
 
-Status ZlibOutputBuffer::Append(StringPiece data) {
+Status ZlibOutputBuffer::Append(absl::string_view data) {
   // If there is sufficient free space in z_stream_input_ to fit data we
   // add it there and return.
   // If there isn't enough space we deflate the existing contents of
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.h b/tensorflow/core/lib/io/zlib_outputbuffer.h
index ccad2fda44..7dfc6e781d 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.h
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.h
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/platform/env.h"
@@ -62,7 +63,7 @@ class ZlibOutputBuffer : public WritableFile {
   // to file when the buffer is full.
   //
   // To immediately write contents to file call `Flush()`.
-  Status Append(StringPiece data) override;
+  Status Append(absl::string_view data) override;
 
   // Deflates any cached input and writes all output to file.
   Status Flush() override;
@@ -111,7 +112,7 @@ class ZlibOutputBuffer : public WritableFile {
 
   // Adds `data` to `z_stream_input_`.
   // Throws if `data.size()` > AvailableInputSpace().
-  void AddToInputBuffer(StringPiece data);
+  void AddToInputBuffer(absl::string_view data);
 
   // Returns the total space available in z_input_stream_ buffer.
   int32 AvailableInputSpace() const;
diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.h b/tensorflow/core/lib/jpeg/jpeg_mem.h
index 03437a4e78..865983a146 100644
--- a/tensorflow/core/lib/jpeg/jpeg_mem.h
+++ b/tensorflow/core/lib/jpeg/jpeg_mem.h
@@ -24,7 +24,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/jpeg.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -136,7 +136,7 @@ struct CompressFlags {
   int y_density = 300;
 
   // If not empty, embed this XMP metadata in the image header
-  StringPiece xmp_metadata;
+  absl::string_view xmp_metadata;
 
   // The distance in bytes from one scanline to the other.  Should be at least
   // equal to width*components*sizeof(JSAMPLE).  If 0 is passed, the stride
diff --git a/tensorflow/core/lib/monitoring/collection_registry.cc b/tensorflow/core/lib/monitoring/collection_registry.cc
index fface033cb..8d9fd5a721 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/monitoring/collection_registry.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -45,7 +46,7 @@ void Collector::CollectMetricDescriptor(
   metric_descriptor->name = string(metric_def->name());
   metric_descriptor->description = string(metric_def->description());
 
-  for (const StringPiece label_name : metric_def->label_descriptions()) {
+  for (const absl::string_view label_name : metric_def->label_descriptions()) {
     metric_descriptor->label_names.emplace_back(label_name);
   }
 
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index 9e4e1989dd..c1493710db 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <map>
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/summary.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
 #include "tensorflow/core/platform/env.h"
@@ -186,7 +186,7 @@ class CollectionRegistry {
     CollectionFunction collection_function;
     uint64 registration_time_millis;
   };
-  std::map<StringPiece, CollectionInfo> registry_ GUARDED_BY(mu_);
+  std::map<absl::string_view, CollectionInfo> registry_ GUARDED_BY(mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(CollectionRegistry);
 };
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index bc4365e439..2d15547440 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <array>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/summary.pb.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -82,9 +82,9 @@ class AbstractMetricDef {
 
   ValueType value_type() const { return value_type_; }
 
-  StringPiece name() const { return name_; }
+  absl::string_view name() const { return name_; }
 
-  StringPiece description() const { return description_; }
+  absl::string_view description() const { return description_; }
 
   const std::vector<string>& label_descriptions() const {
     return label_descriptions_;
@@ -95,7 +95,8 @@ class AbstractMetricDef {
   friend class MetricDef;
 
   AbstractMetricDef(const MetricKind kind, const ValueType value_type,
-                    const StringPiece name, const StringPiece description,
+                    const absl::string_view name,
+                    const absl::string_view description,
                     const std::vector<string>& label_descriptions)
       : kind_(kind),
         value_type_(value_type),
@@ -127,7 +128,7 @@ template <MetricKind metric_kind, typename Value, int NumLabels>
 class MetricDef : public AbstractMetricDef {
  public:
   template <typename... LabelDesc>
-  MetricDef(const StringPiece name, const StringPiece description,
+  MetricDef(const absl::string_view name, const absl::string_view description,
             const LabelDesc&... label_descriptions)
       : AbstractMetricDef(metric_kind, internal::GetValueType<Value>(), name,
                           description, {label_descriptions...}) {
diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc
index e226a15ccc..5aa5809bae 100644
--- a/tensorflow/core/lib/png/png_io.cc
+++ b/tensorflow/core/lib/png/png_io.cc
@@ -24,6 +24,7 @@ limitations under the License.
 // NOTE(skal): we don't '#include <setjmp.h>' before png.h as it otherwise
 // provokes a compile error. We instead let png.h include what is needed.
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/png/png_io.h"
 #include "tensorflow/core/platform/byte_order.h"
@@ -129,7 +130,7 @@ void CommonFreeDecode(DecodeContext* context) {
   }
 }
 
-bool DecodeHeader(StringPiece png_string, int* width, int* height,
+bool DecodeHeader(absl::string_view png_string, int* width, int* height,
                   int* components, int* channel_bit_depth,
                   std::vector<std::pair<string, string> >* metadata) {
   DecodeContext context;
@@ -190,7 +191,7 @@ bool DecodeHeader(StringPiece png_string, int* width, int* height,
   return true;
 }
 
-bool CommonInitDecode(StringPiece png_string, int desired_channels,
+bool CommonInitDecode(absl::string_view png_string, int desired_channels,
                       int desired_channel_bits, DecodeContext* context) {
   CHECK(desired_channel_bits == 8 || desired_channel_bits == 16)
       << "desired_channel_bits = " << desired_channel_bits;
diff --git a/tensorflow/core/lib/png/png_io.h b/tensorflow/core/lib/png/png_io.h
index c876c5156a..bbe4325399 100644
--- a/tensorflow/core/lib/png/png_io.h
+++ b/tensorflow/core/lib/png/png_io.h
@@ -35,7 +35,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/png.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -58,7 +58,7 @@ struct DecodeContext {
   DecodeContext() : png_ptr(NULL), info_ptr(NULL) {}
 };
 
-bool DecodeHeader(StringPiece png_string, int* width, int* height,
+bool DecodeHeader(absl::string_view png_string, int* width, int* height,
                   int* components, int* channel_bit_depth,
                   std::vector<std::pair<string, string> >* metadata);
 
@@ -73,7 +73,7 @@ bool DecodeHeader(StringPiece png_string, int* width, int* height,
 //
 // desired_channels may be 0 to detected it from the input.
 
-bool CommonInitDecode(StringPiece png_string, int desired_channels,
+bool CommonInitDecode(absl::string_view png_string, int desired_channels,
                       int desired_channel_bits, DecodeContext* context);
 
 bool CommonFinishDecode(png_bytep data, int row_bytes, DecodeContext* context);
diff --git a/tensorflow/core/lib/strings/base64.cc b/tensorflow/core/lib/strings/base64.cc
index c5a521f18a..cd12afc171 100644
--- a/tensorflow/core/lib/strings/base64.cc
+++ b/tensorflow/core/lib/strings/base64.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <cstring>
 #include <memory>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -73,7 +74,7 @@ Status DecodeThreeChars(const char* codes, char* result) {
 }
 }  // namespace
 
-Status Base64Decode(StringPiece data, string* decoded) {
+Status Base64Decode(absl::string_view data, string* decoded) {
   if (decoded == nullptr) {
     return errors::Internal("'decoded' cannot be nullptr.");
   }
@@ -135,11 +136,12 @@ Status Base64Decode(StringPiece data, string* decoded) {
   return Status::OK();
 }
 
-Status Base64Encode(StringPiece source, string* encoded) {
+Status Base64Encode(absl::string_view source, string* encoded) {
   return Base64Encode(source, false, encoded);
 }
 
-Status Base64Encode(StringPiece source, bool with_padding, string* encoded) {
+Status Base64Encode(absl::string_view source, bool with_padding,
+                    string* encoded) {
   const char* const base64_chars = kBase64UrlSafeChars;
   if (encoded == nullptr) {
     return errors::Internal("'encoded' cannot be nullptr.");
diff --git a/tensorflow/core/lib/strings/base64.h b/tensorflow/core/lib/strings/base64.h
index 48a7f42b81..3ea41f9ce3 100644
--- a/tensorflow/core/lib/strings/base64.h
+++ b/tensorflow/core/lib/strings/base64.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_B64_H_
 
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -24,13 +25,14 @@ namespace tensorflow {
 /// \brief Converts data into web-safe base64 encoding.
 ///
 /// See https://en.wikipedia.org/wiki/Base64
-Status Base64Encode(StringPiece data, bool with_padding, string* encoded);
-Status Base64Encode(StringPiece data, string* encoded);  // with_padding=false.
+Status Base64Encode(absl::string_view data, bool with_padding, string* encoded);
+Status Base64Encode(absl::string_view data,
+                    string* encoded);  // with_padding=false.
 
 /// \brief Converts data from web-safe base64 encoding.
 ///
 /// See https://en.wikipedia.org/wiki/Base64
-Status Base64Decode(StringPiece data, string* decoded);
+Status Base64Decode(absl::string_view data, string* decoded);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index fff6f1fedc..ba7e554499 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <locale>
 #include <unordered_map>
 
+#include "absl/strings/string_view.h"
 #include "double-conversion/double-conversion.h"
 
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -211,16 +212,16 @@ size_t DoubleToBuffer(double value, char* buffer) {
 }
 
 namespace {
-char SafeFirstChar(StringPiece str) {
+char SafeFirstChar(absl::string_view str) {
   if (str.empty()) return '\0';
   return str[0];
 }
-void SkipSpaces(StringPiece* str) {
+void SkipSpaces(absl::string_view* str) {
   while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
 }
 }  // namespace
 
-bool safe_strto64(StringPiece str, int64* value) {
+bool safe_strto64(absl::string_view str, int64* value) {
   SkipSpaces(&str);
 
   int64 vlimit = kint64max;
@@ -261,7 +262,7 @@ bool safe_strto64(StringPiece str, int64* value) {
   return true;
 }
 
-bool safe_strtou64(StringPiece str, uint64* value) {
+bool safe_strtou64(absl::string_view str, uint64* value) {
   SkipSpaces(&str);
   if (!isdigit(SafeFirstChar(str))) return false;
 
@@ -282,7 +283,7 @@ bool safe_strtou64(StringPiece str, uint64* value) {
   return true;
 }
 
-bool safe_strto32(StringPiece str, int32* value) {
+bool safe_strto32(absl::string_view str, int32* value) {
   SkipSpaces(&str);
 
   int64 vmax = kint32max;
@@ -312,7 +313,7 @@ bool safe_strto32(StringPiece str, int32* value) {
   return true;
 }
 
-bool safe_strtou32(StringPiece str, uint32* value) {
+bool safe_strtou32(absl::string_view str, uint32* value) {
   SkipSpaces(&str);
   if (!isdigit(SafeFirstChar(str))) return false;
 
@@ -332,7 +333,7 @@ bool safe_strtou32(StringPiece str, uint32* value) {
   return true;
 }
 
-bool safe_strtof(StringPiece str, float* value) {
+bool safe_strtof(absl::string_view str, float* value) {
   int processed_characters_count = -1;
   auto len = str.size();
 
@@ -345,7 +346,7 @@ bool safe_strtof(StringPiece str, float* value) {
   return processed_characters_count > 0;
 }
 
-bool safe_strtod(StringPiece str, double* value) {
+bool safe_strtod(absl::string_view str, double* value) {
   int processed_characters_count = -1;
   auto len = str.size();
 
@@ -400,7 +401,7 @@ bool StringToFp(const string& s, Fprint* fp) {
   }
 }
 
-StringPiece Uint64ToHexString(uint64 v, char* buf) {
+absl::string_view Uint64ToHexString(uint64 v, char* buf) {
   static const char* hexdigits = "0123456789abcdef";
   const int num_byte = 16;
   buf[num_byte] = '\0';
@@ -408,10 +409,10 @@ StringPiece Uint64ToHexString(uint64 v, char* buf) {
     buf[i] = hexdigits[v & 0xf];
     v >>= 4;
   }
-  return StringPiece(buf, num_byte);
+  return absl::string_view(buf, num_byte);
 }
 
-bool HexStringToUint64(const StringPiece& s, uint64* result) {
+bool HexStringToUint64(const absl::string_view& s, uint64* result) {
   uint64 v = 0;
   if (s.empty()) {
     return false;
diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h
index 959290ba8c..f28cb0c28c 100644
--- a/tensorflow/core/lib/strings/numbers.h
+++ b/tensorflow/core/lib/strings/numbers.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -84,66 +84,66 @@ bool StringToFp(const string& s, Fprint* fp);
 // Convert a 64-bit fingerprint value to an ASCII representation that
 // is terminated by a '\0'.
 // Buf must point to an array of at least kFastToBufferSize characters
-StringPiece Uint64ToHexString(uint64 v, char* buf);
+absl::string_view Uint64ToHexString(uint64 v, char* buf);
 
 // Attempt to parse a uint64 in the form encoded by FastUint64ToHexString.  If
 // successful, stores the value in *v and returns true.  Otherwise,
 // returns false.
-bool HexStringToUint64(const StringPiece& s, uint64* v);
+bool HexStringToUint64(const absl::string_view& s, uint64* v);
 
 // Convert strings to 32bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strto32(StringPiece str, int32* value);
+bool safe_strto32(absl::string_view str, int32* value);
 
 // Convert strings to unsigned 32bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strtou32(StringPiece str, uint32* value);
+bool safe_strtou32(absl::string_view str, uint32* value);
 
 // Convert strings to 64bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strto64(StringPiece str, int64* value);
+bool safe_strto64(absl::string_view str, int64* value);
 
 // Convert strings to unsigned 64bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strtou64(StringPiece str, uint64* value);
+bool safe_strtou64(absl::string_view str, uint64* value);
 
 // Convert strings to floating point values.
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 // Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`.
-bool safe_strtof(StringPiece str, float* value);
+bool safe_strtof(absl::string_view str, float* value);
 
 // Convert strings to double precision floating point values.
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 // Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`.
-bool safe_strtod(StringPiece str, double* value);
+bool safe_strtod(absl::string_view str, double* value);
 
-inline bool ProtoParseNumeric(StringPiece s, int32* value) {
+inline bool ProtoParseNumeric(absl::string_view s, int32* value) {
   return safe_strto32(s, value);
 }
 
-inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
+inline bool ProtoParseNumeric(absl::string_view s, uint32* value) {
   return safe_strtou32(s, value);
 }
 
-inline bool ProtoParseNumeric(StringPiece s, int64* value) {
+inline bool ProtoParseNumeric(absl::string_view s, int64* value) {
   return safe_strto64(s, value);
 }
 
-inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
+inline bool ProtoParseNumeric(absl::string_view s, uint64* value) {
   return safe_strtou64(s, value);
 }
 
-inline bool ProtoParseNumeric(StringPiece s, float* value) {
+inline bool ProtoParseNumeric(absl::string_view s, float* value) {
   return safe_strtof(s, value);
 }
 
-inline bool ProtoParseNumeric(StringPiece s, double* value) {
+inline bool ProtoParseNumeric(absl::string_view s, double* value) {
   return safe_strtod(s, value);
 }
 
@@ -151,7 +151,7 @@ inline bool ProtoParseNumeric(StringPiece s, double* value) {
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 template <typename T>
-bool SafeStringToNumeric(StringPiece s, T* value) {
+bool SafeStringToNumeric(absl::string_view s, T* value) {
   return ProtoParseNumeric(s, value);
 }
 
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
index 5b595f9847..0e5e59a66b 100644
--- a/tensorflow/core/lib/strings/numbers_test.cc
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <cmath>
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -47,7 +48,7 @@ TEST(Uint64ToHexString, Ints) {
     for (int delta = -1; delta <= 1; delta++) {
       uint64 fp = (1ull << s) + delta;
       char buf[kFastToBufferSize];
-      StringPiece s = Uint64ToHexString(fp, buf);
+      absl::string_view s = Uint64ToHexString(fp, buf);
       uint64 fp2;
       EXPECT_TRUE(HexStringToUint64(s, &fp2));
       EXPECT_EQ(fp, fp2) << s;
@@ -144,11 +145,11 @@ TEST(safe_strto32, Int32s) {
   EXPECT_EQ(false, safe_strto32("-2147483649", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_EQ(true, safe_strto32(StringPiece("123", 1), &result));
+  EXPECT_EQ(true, safe_strto32(absl::string_view("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_EQ(true, safe_strto32(StringPiece(" -123", 4), &result));
+  EXPECT_EQ(true, safe_strto32(absl::string_view(" -123", 4), &result));
   EXPECT_EQ(-12, result);
-  EXPECT_EQ(false, safe_strto32(StringPiece(nullptr, 0), &result));
+  EXPECT_EQ(false, safe_strto32(absl::string_view(nullptr, 0), &result));
 }
 
 TEST(safe_strtou32, UInt32s) {
@@ -177,11 +178,11 @@ TEST(safe_strtou32, UInt32s) {
   EXPECT_FALSE(safe_strtou32("-1", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_TRUE(safe_strtou32(StringPiece("123", 1), &result));
+  EXPECT_TRUE(safe_strtou32(absl::string_view("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_TRUE(safe_strtou32(StringPiece(" 123", 3), &result));
+  EXPECT_TRUE(safe_strtou32(absl::string_view(" 123", 3), &result));
   EXPECT_EQ(12, result);
-  EXPECT_FALSE(safe_strtou32(StringPiece(nullptr, 0), &result));
+  EXPECT_FALSE(safe_strtou32(absl::string_view(nullptr, 0), &result));
 }
 
 TEST(safe_strto64, Int64s) {
@@ -213,11 +214,11 @@ TEST(safe_strto64, Int64s) {
   EXPECT_EQ(false, safe_strto64("-9223372036854775809", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_EQ(true, safe_strto64(StringPiece("123", 1), &result));
+  EXPECT_EQ(true, safe_strto64(absl::string_view("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_EQ(true, safe_strto64(StringPiece(" -123", 4), &result));
+  EXPECT_EQ(true, safe_strto64(absl::string_view(" -123", 4), &result));
   EXPECT_EQ(-12, result);
-  EXPECT_EQ(false, safe_strto64(StringPiece(nullptr, 0), &result));
+  EXPECT_EQ(false, safe_strto64(absl::string_view(nullptr, 0), &result));
 }
 
 TEST(safe_strtou64, UInt64s) {
@@ -248,11 +249,11 @@ TEST(safe_strtou64, UInt64s) {
   EXPECT_FALSE(safe_strtou64("-1", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_TRUE(safe_strtou64(StringPiece("123", 1), &result));
+  EXPECT_TRUE(safe_strtou64(absl::string_view("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_TRUE(safe_strtou64(StringPiece(" 123", 3), &result));
+  EXPECT_TRUE(safe_strtou64(absl::string_view(" 123", 3), &result));
   EXPECT_EQ(12, result);
-  EXPECT_FALSE(safe_strtou64(StringPiece(nullptr, 0), &result));
+  EXPECT_FALSE(safe_strtou64(absl::string_view(nullptr, 0), &result));
 }
 
 TEST(safe_strtof, Float) {
diff --git a/tensorflow/core/lib/strings/ordered_code.cc b/tensorflow/core/lib/strings/ordered_code.cc
index ef90050b4f..5b8ef93431 100644
--- a/tensorflow/core/lib/strings/ordered_code.cc
+++ b/tensorflow/core/lib/strings/ordered_code.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <assert.h>
 #include <stddef.h>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -160,7 +160,7 @@ const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start,
 
 // Helper routine to encode "s" and append to "*dest", escaping special
 // characters.
-inline static void EncodeStringFragment(string* dest, StringPiece s) {
+inline static void EncodeStringFragment(string* dest, absl::string_view s) {
   const char* p = s.data();
   const char* limit = p + s.size();
   const char* copy_start = p;
@@ -187,7 +187,7 @@ inline static void EncodeStringFragment(string* dest, StringPiece s) {
   }
 }
 
-void OrderedCode::WriteString(string* dest, StringPiece s) {
+void OrderedCode::WriteString(string* dest, absl::string_view s) {
   EncodeStringFragment(dest, s);
   AppendBytes(dest, kEscape1_Separator, 2);
 }
@@ -212,7 +212,7 @@ void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) {
 // If parse succeeds, return true, consume encoding from
 // "*src", and if result != NULL append the decoded string to "*result".
 // Otherwise, return false and leave both undefined.
-inline static bool ReadStringInternal(StringPiece* src, string* result) {
+inline static bool ReadStringInternal(absl::string_view* src, string* result) {
   const char* start = src->data();
   const char* string_limit = src->data() + src->size();
 
@@ -267,11 +267,11 @@ inline static bool ReadStringInternal(StringPiece* src, string* result) {
   return false;
 }
 
-bool OrderedCode::ReadString(StringPiece* src, string* result) {
+bool OrderedCode::ReadString(absl::string_view* src, string* result) {
   return ReadStringInternal(src, result);
 }
 
-bool OrderedCode::ReadNumIncreasing(StringPiece* src, uint64* result) {
+bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64* result) {
   if (src->empty()) {
     return false;  // Not enough bytes
   }
@@ -485,7 +485,8 @@ void OrderedCode::WriteSignedNumIncreasing(string* dest, int64 val) {
   dest->append(begin, len);
 }
 
-bool OrderedCode::ReadSignedNumIncreasing(StringPiece* src, int64* result) {
+bool OrderedCode::ReadSignedNumIncreasing(absl::string_view* src,
+                                          int64* result) {
   if (src->empty()) return false;
   const uint64 xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL;
   const unsigned char first_byte = (*src)[0] ^ (xor_mask & 0xff);
diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h
index 91870cfec6..54c7326011 100644
--- a/tensorflow/core/lib/strings/ordered_code.h
+++ b/tensorflow/core/lib/strings/ordered_code.h
@@ -39,7 +39,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
 
 #include <string>
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -53,7 +53,7 @@ class OrderedCode {
   // Encoding routines: each one of the following routines append
   // one item to "*dest" in an encoding where larger values are
   // ordered lexicographically after smaller values.
-  static void WriteString(string* dest, StringPiece str);
+  static void WriteString(string* dest, absl::string_view str);
   static void WriteNumIncreasing(string* dest, uint64 num);
   static void WriteSignedNumIncreasing(string* dest, int64 num);
 
@@ -65,9 +65,9 @@ class OrderedCode {
   // result.  In case of string result, the decoded string is appended to
   // "*result".  Returns true if the next item was read successfully, false
   // otherwise.
-  static bool ReadString(StringPiece* src, string* result);
-  static bool ReadNumIncreasing(StringPiece* src, uint64* result);
-  static bool ReadSignedNumIncreasing(StringPiece* src, int64* result);
+  static bool ReadString(absl::string_view* src, string* result);
+  static bool ReadNumIncreasing(absl::string_view* src, uint64* result);
+  static bool ReadSignedNumIncreasing(absl::string_view* src, int64* result);
 
   // Helper for testing: corrupt "*str" by changing the kth item separator
   // in the string.
diff --git a/tensorflow/core/lib/strings/ordered_code_test.cc b/tensorflow/core/lib/strings/ordered_code_test.cc
index ede9f4d390..53457b8ce7 100644
--- a/tensorflow/core/lib/strings/ordered_code_test.cc
+++ b/tensorflow/core/lib/strings/ordered_code_test.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include <limits>
 #include <vector>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
@@ -47,7 +47,7 @@ string RandomString(random::SimplePhilox* rnd, size_t len) {
 template <typename T>
 void OCWriteIncreasing(string* dest, const T& val);
 template <typename T>
-bool OCReadIncreasing(StringPiece* src, T* result);
+bool OCReadIncreasing(absl::string_view* src, T* result);
 
 // Read/WriteIncreasing<string>
 template <>
@@ -55,7 +55,7 @@ void OCWriteIncreasing<string>(string* dest, const string& val) {
   OrderedCode::WriteString(dest, val);
 }
 template <>
-bool OCReadIncreasing<string>(StringPiece* src, string* result) {
+bool OCReadIncreasing<string>(absl::string_view* src, string* result) {
   return OrderedCode::ReadString(src, result);
 }
 
@@ -65,7 +65,7 @@ void OCWriteIncreasing<uint64>(string* dest, const uint64& val) {
   OrderedCode::WriteNumIncreasing(dest, val);
 }
 template <>
-bool OCReadIncreasing<uint64>(StringPiece* src, uint64* result) {
+bool OCReadIncreasing<uint64>(absl::string_view* src, uint64* result) {
   return OrderedCode::ReadNumIncreasing(src, result);
 }
 
@@ -75,7 +75,7 @@ void OCWriteIncreasing<int64>(string* dest, const int64& val) {
   OrderedCode::WriteSignedNumIncreasing(dest, val);
 }
 template <>
-bool OCReadIncreasing<int64>(StringPiece* src, int64* result) {
+bool OCReadIncreasing<int64>(absl::string_view* src, int64* result) {
   return OrderedCode::ReadSignedNumIncreasing(src, result);
 }
 
@@ -92,7 +92,7 @@ void OCWriteToString(string* result, T val) {
 }
 
 template <typename T>
-bool OCRead(StringPiece* s, T* val) {
+bool OCRead(absl::string_view* s, T* val) {
   return OCReadIncreasing<T>(s, val);
 }
 
@@ -103,12 +103,12 @@ template <typename T>
 T TestRead(const string& a) {
   // gracefully reject any proper prefix of an encoding
   for (int i = 0; i < a.size() - 1; ++i) {
-    StringPiece s(a.data(), i);
+    absl::string_view s(a.data(), i);
     CHECK(!OCRead<T>(&s, nullptr));
     CHECK_EQ(s, a.substr(0, i));
   }
 
-  StringPiece s(a);
+  absl::string_view s(a);
   T v;
   CHECK(OCRead<T>(&s, &v));
   CHECK(s.empty());
@@ -304,7 +304,7 @@ inline string StrNot(const string& s) {
 
 template <typename T>
 void TestInvalidEncoding(const string& s) {
-  StringPiece p(s);
+  absl::string_view p(s);
   EXPECT_FALSE(OCRead<T>(&p, nullptr));
   EXPECT_EQ(s, p);
 }
@@ -338,7 +338,7 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) {
 
     EXPECT_NE(OCWrite<uint64>(0), non_minimal);
 #ifndef NDEBUG
-    StringPiece s(non_minimal);
+    absl::string_view s(non_minimal);
     EXPECT_DEATH(OrderedCode::ReadNumIncreasing(&s, nullptr),
                  "invalid encoding");
 #else
@@ -357,7 +357,7 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) {
 
     EXPECT_NE(OCWrite<int64>(0), non_minimal);
 #ifndef NDEBUG
-    StringPiece s(non_minimal);
+    absl::string_view s(non_minimal);
     EXPECT_DEATH(OrderedCode::ReadSignedNumIncreasing(&s, nullptr),
                  "invalid encoding")
         << n;
@@ -409,7 +409,7 @@ void BM_ReadNum(int n, T multiplier) {
   uint32 index = 0;
   while (n-- > 0) {
     T val;
-    StringPiece s = values[index++ % kValues];
+    absl::string_view s = values[index++ % kValues];
     OCRead<T>(&s, &val);
   }
 }
@@ -446,8 +446,8 @@ TEST(String, EncodeDecode) {
       OCWriteToString<string>(&out, b);
 
       string a2, b2, dummy;
-      StringPiece s = out;
-      StringPiece s2 = out;
+      absl::string_view s = out;
+      absl::string_view s2 = out;
       CHECK(OCRead<string>(&s, &a2));
       CHECK(OCRead<string>(&s2, nullptr));
       CHECK_EQ(s, s2);
@@ -467,9 +467,9 @@ TEST(String, EncodeDecode) {
 }
 
 // 'str' is a string literal that may contain '\0'.
-#define STATIC_STR(str) StringPiece((str), sizeof(str) - 1)
+#define STATIC_STR(str) absl::string_view((str), sizeof(str) - 1)
 
-string EncodeStringIncreasing(StringPiece value) {
+string EncodeStringIncreasing(absl::string_view value) {
   string encoded;
   OrderedCode::WriteString(&encoded, value);
   return encoded;
@@ -523,7 +523,7 @@ TEST(EncodingIsExpected, String) {
     OrderedCode::WriteString(&result, t.first);
     EXPECT_EQ(t.second, result);
 
-    StringPiece in = result;
+    absl::string_view in = result;
     string decoded;
     EXPECT_TRUE(OrderedCode::ReadString(&in, &decoded));
     EXPECT_EQ(t.first, decoded);
@@ -755,7 +755,7 @@ TEST(EncodingIsExpected, Unsigned) {
     OrderedCode::WriteNumIncreasing(&result, num);
     EXPECT_EQ(t.second, result) << std::hex << num;
 
-    StringPiece in = result;
+    absl::string_view in = result;
     uint64 decoded;
     EXPECT_TRUE(OrderedCode::ReadNumIncreasing(&in, &decoded));
     EXPECT_EQ(num, decoded);
@@ -1202,7 +1202,7 @@ TEST(EncodingIsExpected, Signed) {
     OrderedCode::WriteSignedNumIncreasing(&result, num);
     EXPECT_EQ(t.second, result) << std::hex << num;
 
-    StringPiece in = result;
+    absl::string_view in = result;
     int64 decoded;
     EXPECT_TRUE(OrderedCode::ReadSignedNumIncreasing(&in, &decoded));
     EXPECT_EQ(num, decoded);
@@ -1244,7 +1244,7 @@ void BM_ReadString(int n, int len) {
   testing::StartTiming();
   while (n-- > 0) {
     result.clear();
-    StringPiece s = data;
+    absl::string_view s = data;
     OCRead<string>(&s, &result);
   }
 }
diff --git a/tensorflow/core/lib/strings/proto_text_util.cc b/tensorflow/core/lib/strings/proto_text_util.cc
index 5e9fa24a87..906c1150d8 100644
--- a/tensorflow/core/lib/strings/proto_text_util.cc
+++ b/tensorflow/core/lib/strings/proto_text_util.cc
@@ -14,12 +14,13 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/strings/proto_text_util.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace strings {
 
 bool ProtoParseBoolFromScanner(Scanner* scanner, bool* value) {
-  StringPiece bool_str;
+  absl::string_view bool_str;
   if (!scanner->RestartCapture()
            .Many(Scanner::LETTER_DIGIT)
            .GetResult(nullptr, &bool_str)) {
@@ -41,7 +42,7 @@ bool ProtoParseStringLiteralFromScanner(Scanner* scanner, string* value) {
   const char quote = scanner->Peek();
   if (quote != '\'' && quote != '"') return false;
 
-  StringPiece value_sp;
+  absl::string_view value_sp;
   if (!scanner->One(Scanner::ALL)
            .RestartCapture()
            .ScanEscapedUntil(quote)
diff --git a/tensorflow/core/lib/strings/proto_text_util.h b/tensorflow/core/lib/strings/proto_text_util.h
index 05dbda6e15..3a2bcef6d5 100644
--- a/tensorflow/core/lib/strings/proto_text_util.h
+++ b/tensorflow/core/lib/strings/proto_text_util.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_
 #define TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -100,7 +101,8 @@ class ProtoTextOutput {
   }
 
  private:
-  void AppendFieldAndValue(const char field_name[], StringPiece value_text) {
+  void AppendFieldAndValue(const char field_name[],
+                           absl::string_view value_text) {
     StrAppend(output_, level_empty_ ? "" : field_separator_, indent_,
               field_name, kColonSeparator, value_text);
     level_empty_ = false;
@@ -131,7 +133,7 @@ inline void ProtoSpaceAndComments(Scanner* scanner) {
 // failed.
 template <typename T>
 bool ProtoParseNumericFromScanner(Scanner* scanner, T* value) {
-  StringPiece numeric_str;
+  absl::string_view numeric_str;
   scanner->RestartCapture();
   if (!scanner->Many(Scanner::LETTER_DIGIT_DOT_PLUS_MINUS)
            .GetResult(nullptr, &numeric_str)) {
diff --git a/tensorflow/core/lib/strings/scanner.cc b/tensorflow/core/lib/strings/scanner.cc
index 39a2265aa2..46d3858e30 100644
--- a/tensorflow/core/lib/strings/scanner.cc
+++ b/tensorflow/core/lib/strings/scanner.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/strings/scanner.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace strings {
@@ -41,7 +42,8 @@ void Scanner::ScanUntilImpl(char end_ch, bool escaped) {
   }
 }
 
-bool Scanner::GetResult(StringPiece* remaining, StringPiece* capture) {
+bool Scanner::GetResult(absl::string_view* remaining,
+                        absl::string_view* capture) {
   if (error_) {
     return false;
   }
@@ -50,7 +52,7 @@ bool Scanner::GetResult(StringPiece* remaining, StringPiece* capture) {
   }
   if (capture != nullptr) {
     const char* end = capture_end_ == nullptr ? cur_.data() : capture_end_;
-    *capture = StringPiece(capture_start_, end - capture_start_);
+    *capture = absl::string_view(capture_start_, end - capture_start_);
   }
   return true;
 }
diff --git a/tensorflow/core/lib/strings/scanner.h b/tensorflow/core/lib/strings/scanner.h
index c82e771368..4a28cbf5aa 100644
--- a/tensorflow/core/lib/strings/scanner.h
+++ b/tensorflow/core/lib/strings/scanner.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_SCANNER_H_
 
 #include <string>
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -61,7 +61,9 @@ class Scanner {
     UPPERLETTER,
   };
 
-  explicit Scanner(StringPiece source) : cur_(source) { RestartCapture(); }
+  explicit Scanner(absl::string_view source) : cur_(source) {
+    RestartCapture();
+  }
 
   // Consume the next character of the given class from input. If the next
   // character is not in the class, then GetResult will ultimately return false.
@@ -75,14 +77,14 @@ class Scanner {
 
   // Consume the next s.size() characters of the input, if they match <s>. If
   // they don't match <s>, this is a no-op.
-  Scanner& ZeroOrOneLiteral(StringPiece s) {
+  Scanner& ZeroOrOneLiteral(absl::string_view s) {
     str_util::ConsumePrefix(&cur_, s);
     return *this;
   }
 
   // Consume the next s.size() characters of the input, if they match <s>. If
   // they don't match <s>, then GetResult will ultimately return false.
-  Scanner& OneLiteral(StringPiece s) {
+  Scanner& OneLiteral(absl::string_view s) {
     if (!str_util::ConsumePrefix(&cur_, s)) {
       error_ = true;
     }
@@ -159,8 +161,8 @@ class Scanner {
   // Returns true if the input string successfully matched. When true is
   // returned, the remaining string is returned in <remaining> and the captured
   // string returned in <capture>, if non-NULL.
-  bool GetResult(StringPiece* remaining = nullptr,
-                 StringPiece* capture = nullptr);
+  bool GetResult(absl::string_view* remaining = nullptr,
+                 absl::string_view* capture = nullptr);
 
  private:
   void ScanUntilImpl(char end_ch, bool escaped);
@@ -226,7 +228,7 @@ class Scanner {
     return false;
   }
 
-  StringPiece cur_;
+  absl::string_view cur_;
   const char* capture_start_ = nullptr;
   const char* capture_end_ = nullptr;
   bool error_ = false;
diff --git a/tensorflow/core/lib/strings/scanner_test.cc b/tensorflow/core/lib/strings/scanner_test.cc
index b0f568a03e..79084ed4d6 100644
--- a/tensorflow/core/lib/strings/scanner_test.cc
+++ b/tensorflow/core/lib/strings/scanner_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/lib/strings/scanner.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -36,7 +37,7 @@ class ScannerTest : public ::testing::Test {
 };
 
 TEST_F(ScannerTest, Any) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("   horse0123")
                   .Any(Scanner::SPACE)
                   .Any(Scanner::DIGIT)
@@ -63,7 +64,7 @@ TEST_F(ScannerTest, Any) {
 }
 
 TEST_F(ScannerTest, AnySpace) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("  a b ")
                   .AnySpace()
                   .One(Scanner::LETTER)
@@ -74,7 +75,7 @@ TEST_F(ScannerTest, AnySpace) {
 }
 
 TEST_F(ScannerTest, AnyEscapedNewline) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("\\\n")
                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
                   .GetResult(&remaining, &match));
@@ -83,7 +84,7 @@ TEST_F(ScannerTest, AnyEscapedNewline) {
 }
 
 TEST_F(ScannerTest, AnyEmptyString) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("")
                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
                   .GetResult(&remaining, &match));
@@ -99,7 +100,7 @@ TEST_F(ScannerTest, Eos) {
 }
 
 TEST_F(ScannerTest, Many) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("abc").Many(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("0").Many(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("").Many(Scanner::LETTER).GetResult());
@@ -115,7 +116,7 @@ TEST_F(ScannerTest, Many) {
 }
 
 TEST_F(ScannerTest, One) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner("abc").One(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("0").One(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("").One(Scanner::LETTER).GetResult());
@@ -137,7 +138,7 @@ TEST_F(ScannerTest, OneLiteral) {
 }
 
 TEST_F(ScannerTest, ScanUntil) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
                   .OneLiteral("'")
                   .ScanUntil('\'')
@@ -164,7 +165,7 @@ TEST_F(ScannerTest, ScanUntil) {
 }
 
 TEST_F(ScannerTest, ScanEscapedUntil) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
                   .OneLiteral("'")
                   .ScanEscapedUntil('\'')
@@ -184,7 +185,7 @@ TEST_F(ScannerTest, ScanEscapedUntil) {
 }
 
 TEST_F(ScannerTest, ZeroOrOneLiteral) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
   EXPECT_TRUE(
       Scanner("abc").ZeroOrOneLiteral("abC").GetResult(&remaining, &match));
   EXPECT_EQ("abc", remaining);
@@ -205,7 +206,7 @@ TEST_F(ScannerTest, ZeroOrOneLiteral) {
 // Test output of GetResult (including the forms with optional params),
 // and that it can be called multiple times.
 TEST_F(ScannerTest, CaptureAndGetResult) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
 
   Scanner scan("  first    second");
   EXPECT_TRUE(scan.Any(Scanner::SPACE)
@@ -238,7 +239,7 @@ TEST_F(ScannerTest, CaptureAndGetResult) {
 // Tests that if StopCapture is not called, then calling GetResult, then
 // scanning more, then GetResult again will update the capture.
 TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
-  StringPiece remaining, match;
+  absl::string_view remaining, match;
 
   Scanner scan("one2three");
   EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
@@ -255,8 +256,8 @@ TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
 TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
   // A failed match doesn't change pointers passed to GetResult.
   Scanner scan("name");
-  StringPiece remaining = "rem";
-  StringPiece match = "match";
+  absl::string_view remaining = "rem";
+  absl::string_view match = "match";
   EXPECT_FALSE(scan.One(Scanner::SPACE).GetResult(&remaining, &match));
   EXPECT_EQ("rem", remaining);
   EXPECT_EQ("match", match);
@@ -265,8 +266,8 @@ TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
 TEST_F(ScannerTest, DefaultCapturesAll) {
   // If RestartCapture() is not called, the whole string is used.
   Scanner scan("a b");
-  StringPiece remaining = "rem";
-  StringPiece match = "match";
+  absl::string_view remaining = "rem";
+  absl::string_view match = "match";
   EXPECT_TRUE(scan.Any(Scanner::LETTER)
                   .AnySpace()
                   .Any(Scanner::LETTER)
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index 3aba5ec80e..ba40916b94 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <ctype.h>
 #include <algorithm>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -27,7 +28,7 @@ namespace str_util {
 
 static char hex_char[] = "0123456789abcdef";
 
-string CEscape(StringPiece src) {
+string CEscape(absl::string_view src) {
   string dest;
 
   for (unsigned char c : src) {
@@ -86,7 +87,7 @@ inline int hex_digit_to_int(char c) {
   return x & 0xf;
 }
 
-bool CUnescapeInternal(StringPiece source, string* dest,
+bool CUnescapeInternal(absl::string_view source, string* dest,
                        string::size_type* dest_len, string* error) {
   const char* p = source.data();
   const char* end = source.end();
@@ -216,8 +217,8 @@ bool CUnescapeInternal(StringPiece source, string* dest,
 }
 
 template <typename T>
-bool SplitAndParseAsInts(StringPiece text, char delim,
-                         std::function<bool(StringPiece, T*)> converter,
+bool SplitAndParseAsInts(absl::string_view text, char delim,
+                         std::function<bool(absl::string_view, T*)> converter,
                          std::vector<T>* result) {
   result->clear();
   std::vector<string> num_strings = Split(text, delim);
@@ -231,7 +232,7 @@ bool SplitAndParseAsInts(StringPiece text, char delim,
 
 }  // namespace
 
-bool CUnescape(StringPiece source, string* dest, string* error) {
+bool CUnescape(absl::string_view source, string* dest, string* error) {
   dest->resize(source.size());
   string::size_type dest_size;
   if (!CUnescapeInternal(source, dest, &dest_size, error)) {
@@ -249,7 +250,7 @@ void StripTrailingWhitespace(string* s) {
 }
 
 // Return lower-cased version of s.
-string Lowercase(StringPiece s) {
+string Lowercase(absl::string_view s) {
   string result(s.data(), s.size());
   for (char& c : result) {
     c = tolower(c);
@@ -258,7 +259,7 @@ string Lowercase(StringPiece s) {
 }
 
 // Return upper-cased version of s.
-string Uppercase(StringPiece s) {
+string Uppercase(absl::string_view s) {
   string result(s.data(), s.size());
   for (char& c : result) {
     c = toupper(c);
@@ -266,7 +267,7 @@ string Uppercase(StringPiece s) {
   return result;
 }
 
-string ArgDefCase(StringPiece s) {
+string ArgDefCase(absl::string_view s) {
   const size_t n = s.size();
 
   // Compute the size of resulting string.
@@ -318,18 +319,18 @@ string ArgDefCase(StringPiece s) {
   return result;
 }
 
-void TitlecaseString(string* s, StringPiece delimiters) {
+void TitlecaseString(string* s, absl::string_view delimiters) {
   bool upper = true;
   for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
     if (upper) {
       *ss = toupper(*ss);
     }
-    upper = (delimiters.find(*ss) != StringPiece::npos);
+    upper = (delimiters.find(*ss) != absl::string_view::npos);
   }
 }
 
-string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
-                     bool replace_all) {
+string StringReplace(absl::string_view s, absl::string_view oldsub,
+                     absl::string_view newsub, bool replace_all) {
   // TODO(jlebar): We could avoid having to shift data around in the string if
   // we had a StringPiece::find() overload that searched for a StringPiece.
   string res(s);
@@ -347,7 +348,7 @@ string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
   return res;
 }
 
-size_t RemoveLeadingWhitespace(StringPiece* text) {
+size_t RemoveLeadingWhitespace(absl::string_view* text) {
   size_t count = 0;
   const char* ptr = text->data();
   while (count < text->size() && isspace(*ptr)) {
@@ -358,7 +359,7 @@ size_t RemoveLeadingWhitespace(StringPiece* text) {
   return count;
 }
 
-size_t RemoveTrailingWhitespace(StringPiece* text) {
+size_t RemoveTrailingWhitespace(absl::string_view* text) {
   size_t count = 0;
   const char* ptr = text->data() + text->size() - 1;
   while (count < text->size() && isspace(*ptr)) {
@@ -369,12 +370,12 @@ size_t RemoveTrailingWhitespace(StringPiece* text) {
   return count;
 }
 
-size_t RemoveWhitespaceContext(StringPiece* text) {
+size_t RemoveWhitespaceContext(absl::string_view* text) {
   // use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
   return (RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text));
 }
 
-bool ConsumePrefix(StringPiece* s, StringPiece expected) {
+bool ConsumePrefix(absl::string_view* s, absl::string_view expected) {
   if (StartsWith(*s, expected)) {
     s->remove_prefix(expected.size());
     return true;
@@ -382,7 +383,7 @@ bool ConsumePrefix(StringPiece* s, StringPiece expected) {
   return false;
 }
 
-bool ConsumeSuffix(StringPiece* s, StringPiece expected) {
+bool ConsumeSuffix(absl::string_view* s, absl::string_view expected) {
   if (EndsWith(*s, expected)) {
     s->remove_suffix(expected.size());
     return true;
@@ -390,7 +391,7 @@ bool ConsumeSuffix(StringPiece* s, StringPiece expected) {
   return false;
 }
 
-bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
+bool ConsumeLeadingDigits(absl::string_view* s, uint64* val) {
   const char* p = s->data();
   const char* limit = p + s->size();
   uint64 v = 0;
@@ -415,7 +416,7 @@ bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
   }
 }
 
-bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) {
+bool ConsumeNonWhitespace(absl::string_view* s, absl::string_view* val) {
   const char* p = s->data();
   const char* limit = p + s->size();
   while (p < limit) {
@@ -425,29 +426,29 @@ bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) {
   }
   const size_t n = p - s->data();
   if (n > 0) {
-    *val = StringPiece(s->data(), n);
+    *val = absl::string_view(s->data(), n);
     s->remove_prefix(n);
     return true;
   } else {
-    *val = StringPiece();
+    *val = absl::string_view();
     return false;
   }
 }
 
-bool SplitAndParseAsInts(StringPiece text, char delim,
+bool SplitAndParseAsInts(absl::string_view text, char delim,
                          std::vector<int32>* result) {
   return SplitAndParseAsInts<int32>(text, delim, strings::safe_strto32, result);
 }
 
-bool SplitAndParseAsInts(StringPiece text, char delim,
+bool SplitAndParseAsInts(absl::string_view text, char delim,
                          std::vector<int64>* result) {
   return SplitAndParseAsInts<int64>(text, delim, strings::safe_strto64, result);
 }
 
-bool SplitAndParseAsFloats(StringPiece text, char delim,
+bool SplitAndParseAsFloats(absl::string_view text, char delim,
                            std::vector<float>* result) {
   return SplitAndParseAsInts<float>(text, delim,
-                                    [](StringPiece str, float* value) {
+                                    [](absl::string_view str, float* value) {
                                       return strings::safe_strtof(str, value);
                                     },
                                     result);
@@ -461,18 +462,18 @@ size_t Strnlen(const char* str, const size_t string_max_len) {
   return len;
 }
 
-bool StrContains(StringPiece haystack, StringPiece needle) {
+bool StrContains(absl::string_view haystack, absl::string_view needle) {
   return std::search(haystack.begin(), haystack.end(), needle.begin(),
                      needle.end()) != haystack.end();
 }
 
-bool StartsWith(StringPiece text, StringPiece prefix) {
+bool StartsWith(absl::string_view text, absl::string_view prefix) {
   return prefix.empty() ||
          (text.size() >= prefix.size() &&
           memcmp(text.data(), prefix.data(), prefix.size()) == 0);
 }
 
-bool EndsWith(StringPiece text, StringPiece suffix) {
+bool EndsWith(absl::string_view text, absl::string_view suffix) {
   return suffix.empty() || (text.size() >= suffix.size() &&
                             memcmp(text.data() + (text.size() - suffix.size()),
                                    suffix.data(), suffix.size()) == 0);
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index 9f52cf29fc..a00434534e 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 #include <vector>
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -29,7 +29,7 @@ namespace str_util {
 
 // Returns a version of 'src' where unprintable characters have been
 // escaped using C-style escape sequences.
-string CEscape(StringPiece src);
+string CEscape(absl::string_view src);
 
 // Copies "source" to "dest", rewriting C-style escape sequences --
 // '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
@@ -38,47 +38,47 @@ string CEscape(StringPiece src);
 // 'error'. To disable error reporting, set 'error' to NULL.
 //
 // NOTE: Does not support \u or \U!
-bool CUnescape(StringPiece source, string* dest, string* error);
+bool CUnescape(absl::string_view source, string* dest, string* error);
 
 // Removes any trailing whitespace from "*s".
 void StripTrailingWhitespace(string* s);
 
 // Removes leading ascii_isspace() characters.
 // Returns number of characters removed.
-size_t RemoveLeadingWhitespace(StringPiece* text);
+size_t RemoveLeadingWhitespace(absl::string_view* text);
 
 // Removes trailing ascii_isspace() characters.
 // Returns number of characters removed.
-size_t RemoveTrailingWhitespace(StringPiece* text);
+size_t RemoveTrailingWhitespace(absl::string_view* text);
 
 // Removes leading and trailing ascii_isspace() chars.
 // Returns number of chars removed.
-size_t RemoveWhitespaceContext(StringPiece* text);
+size_t RemoveWhitespaceContext(absl::string_view* text);
 
 // Consume a leading positive integer value.  If any digits were
 // found, store the value of the leading unsigned number in "*val",
 // advance "*s" past the consumed number, and return true.  If
 // overflow occurred, returns false.  Otherwise, returns false.
-bool ConsumeLeadingDigits(StringPiece* s, uint64* val);
+bool ConsumeLeadingDigits(absl::string_view* s, uint64* val);
 
 // Consume a leading token composed of non-whitespace characters only.
 // If *s starts with a non-zero number of non-whitespace characters, store
 // them in *val, advance *s past them, and return true.  Else return false.
-bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);
+bool ConsumeNonWhitespace(absl::string_view* s, absl::string_view* val);
 
 // If "*s" starts with "expected", consume it and return true.
 // Otherwise, return false.
-bool ConsumePrefix(StringPiece* s, StringPiece expected);
+bool ConsumePrefix(absl::string_view* s, absl::string_view expected);
 
 // If "*s" ends with "expected", remove it and return true.
 // Otherwise, return false.
-bool ConsumeSuffix(StringPiece* s, StringPiece expected);
+bool ConsumeSuffix(absl::string_view* s, absl::string_view expected);
 
 // Return lower-cased version of s.
-string Lowercase(StringPiece s);
+string Lowercase(absl::string_view s);
 
 // Return upper-cased version of s.
-string Uppercase(StringPiece s);
+string Uppercase(absl::string_view s);
 
 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
 // - converts all non-alphanumeric characters to underscores
@@ -89,16 +89,16 @@ string Uppercase(StringPiece s);
 // This method is useful for producing strings matching "[a-z][a-z0-9_]*"
 // as required by OpDef.ArgDef.name. The resulting string is either empty or
 // matches this regex.
-string ArgDefCase(StringPiece s);
+string ArgDefCase(absl::string_view s);
 
 // Capitalize first character of each word in "*s".  "delimiters" is a
 // set of characters that can be used as word boundaries.
-void TitlecaseString(string* s, StringPiece delimiters);
+void TitlecaseString(string* s, absl::string_view delimiters);
 
 // Replaces the first occurrence (if replace_all is false) or all occurrences
 // (if replace_all is true) of oldsub in s with newsub.
-string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
-                     bool replace_all);
+string StringReplace(absl::string_view s, absl::string_view oldsub,
+                     absl::string_view newsub, bool replace_all);
 
 // Join functionality
 template <typename T>
@@ -111,13 +111,13 @@ template <typename T, typename Formatter>
 string Join(const T& s, const char* sep, Formatter f);
 
 struct AllowEmpty {
-  bool operator()(StringPiece sp) const { return true; }
+  bool operator()(absl::string_view sp) const { return true; }
 };
 struct SkipEmpty {
-  bool operator()(StringPiece sp) const { return !sp.empty(); }
+  bool operator()(absl::string_view sp) const { return !sp.empty(); }
 };
 struct SkipWhitespace {
-  bool operator()(StringPiece sp) const {
+  bool operator()(absl::string_view sp) const {
     RemoveTrailingWhitespace(&sp);
     return !sp.empty();
   }
@@ -125,35 +125,36 @@ struct SkipWhitespace {
 
 // Split strings using any of the supplied delimiters. For example:
 // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
-std::vector<string> Split(StringPiece text, StringPiece delims);
+std::vector<string> Split(absl::string_view text, absl::string_view delims);
 
 template <typename Predicate>
-std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p);
+std::vector<string> Split(absl::string_view text, absl::string_view delims,
+                          Predicate p);
 
 // Split "text" at "delim" characters, and parse each component as
 // an integer.  If successful, adds the individual numbers in order
 // to "*result" and returns true.  Otherwise returns false.
-bool SplitAndParseAsInts(StringPiece text, char delim,
+bool SplitAndParseAsInts(absl::string_view text, char delim,
                          std::vector<int32>* result);
-bool SplitAndParseAsInts(StringPiece text, char delim,
+bool SplitAndParseAsInts(absl::string_view text, char delim,
                          std::vector<int64>* result);
-bool SplitAndParseAsFloats(StringPiece text, char delim,
+bool SplitAndParseAsFloats(absl::string_view text, char delim,
                            std::vector<float>* result);
 
 // StartsWith()
 //
 // Returns whether a given string `text` begins with `prefix`.
-bool StartsWith(StringPiece text, StringPiece prefix);
+bool StartsWith(absl::string_view text, absl::string_view prefix);
 
 // EndsWith()
 //
 // Returns whether a given string `text` ends with `suffix`.
-bool EndsWith(StringPiece text, StringPiece suffix);
+bool EndsWith(absl::string_view text, absl::string_view suffix);
 
 // StrContains()
 //
 // Returns whether a given string `haystack` contains the substring `needle`.
-bool StrContains(StringPiece haystack, StringPiece needle);
+bool StrContains(absl::string_view haystack, absl::string_view needle);
 
 // ------------------------------------------------------------------
 // Implementation details below
@@ -192,18 +193,21 @@ string Join(const T& s, const char* sep, Formatter f) {
   return result;
 }
 
-inline std::vector<string> Split(StringPiece text, StringPiece delims) {
+inline std::vector<string> Split(absl::string_view text,
+                                 absl::string_view delims) {
   return Split(text, delims, AllowEmpty());
 }
 
 template <typename Predicate>
-std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
+std::vector<string> Split(absl::string_view text, absl::string_view delims,
+                          Predicate p) {
   std::vector<string> result;
   size_t token_start = 0;
   if (!text.empty()) {
     for (size_t i = 0; i < text.size() + 1; i++) {
-      if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
-        StringPiece token(text.data() + token_start, i - token_start);
+      if ((i == text.size()) ||
+          (delims.find(text[i]) != absl::string_view::npos)) {
+        absl::string_view token(text.data() + token_start, i - token_start);
         if (p(token)) {
           result.emplace_back(token);
         }
@@ -214,13 +218,13 @@ std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
   return result;
 }
 
-inline std::vector<string> Split(StringPiece text, char delim) {
-  return Split(text, StringPiece(&delim, 1));
+inline std::vector<string> Split(absl::string_view text, char delim) {
+  return Split(text, absl::string_view(&delim, 1));
 }
 
 template <typename Predicate>
-std::vector<string> Split(StringPiece text, char delims, Predicate p) {
-  return Split(text, StringPiece(&delims, 1), p);
+std::vector<string> Split(absl::string_view text, char delims, Predicate p) {
+  return Split(text, absl::string_view(&delims, 1), p);
 }
 
 // Returns the length of the given null-terminated byte string 'str'.
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 3bf3e99825..38157d60a5 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -28,7 +29,7 @@ TEST(CEscape, Basic) {
   EXPECT_EQ(str_util::CEscape("\320hi\200"), "\\320hi\\200");
 }
 
-string ExpectCUnescapeSuccess(StringPiece source) {
+string ExpectCUnescapeSuccess(absl::string_view source) {
   string dest;
   string error;
   EXPECT_TRUE(str_util::CUnescape(source, &dest, &error)) << error;
@@ -49,7 +50,7 @@ TEST(CUnescape, HandlesCopyOnWriteStrings) {
   // For std::string, read and dest now share the same buffer.
 
   string error;
-  StringPiece source = "llohe";
+  absl::string_view source = "llohe";
   // CUnescape is going to write "llohe" to dest, so dest's buffer will be
   // reallocated, and read's buffer remains untouched.
   EXPECT_TRUE(str_util::CUnescape(source, &dest, &error));
@@ -81,71 +82,71 @@ TEST(StripTrailingWhitespace, Basic) {
 
 TEST(RemoveLeadingWhitespace, Basic) {
   string text = "  \t   \n  \r Quick\t";
-  StringPiece data(text);
+  absl::string_view data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 11);
-  EXPECT_EQ(data, StringPiece("Quick\t"));
+  EXPECT_EQ(data, absl::string_view("Quick\t"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
-  EXPECT_EQ(data, StringPiece("Quick\t"));
+  EXPECT_EQ(data, absl::string_view("Quick\t"));
 }
 
 TEST(RemoveLeadingWhitespace, TerminationHandling) {
   // check termination handling
   string text = "\t";
-  StringPiece data(text);
+  absl::string_view data(text);
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 1);
-  EXPECT_EQ(data, StringPiece(""));
+  EXPECT_EQ(data, absl::string_view(""));
 
   // check termination handling again
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
-  EXPECT_EQ(data, StringPiece(""));
+  EXPECT_EQ(data, absl::string_view(""));
 }
 
 TEST(RemoveTrailingWhitespace, Basic) {
   string text = "  \t   \n  \r Quick \t";
-  StringPiece data(text);
+  absl::string_view data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 2);
-  EXPECT_EQ(data, StringPiece("  \t   \n  \r Quick"));
+  EXPECT_EQ(data, absl::string_view("  \t   \n  \r Quick"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
-  EXPECT_EQ(data, StringPiece("  \t   \n  \r Quick"));
+  EXPECT_EQ(data, absl::string_view("  \t   \n  \r Quick"));
 }
 
 TEST(RemoveTrailingWhitespace, TerminationHandling) {
   // check termination handling
   string text = "\t";
-  StringPiece data(text);
+  absl::string_view data(text);
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 1);
-  EXPECT_EQ(data, StringPiece(""));
+  EXPECT_EQ(data, absl::string_view(""));
 
   // check termination handling again
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
-  EXPECT_EQ(data, StringPiece(""));
+  EXPECT_EQ(data, absl::string_view(""));
 }
 
 TEST(RemoveWhitespaceContext, Basic) {
   string text = "  \t   \n  \r Quick \t";
-  StringPiece data(text);
+  absl::string_view data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 13);
-  EXPECT_EQ(data, StringPiece("Quick"));
+  EXPECT_EQ(data, absl::string_view("Quick"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
-  EXPECT_EQ(data, StringPiece("Quick"));
+  EXPECT_EQ(data, absl::string_view("Quick"));
 
   // Test empty string
   text = "";
   data = text;
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
-  EXPECT_EQ(data, StringPiece(""));
+  EXPECT_EQ(data, absl::string_view(""));
 }
 
-void TestConsumeLeadingDigits(StringPiece s, int64 expected,
-                              StringPiece remaining) {
+void TestConsumeLeadingDigits(absl::string_view s, int64 expected,
+                              absl::string_view remaining) {
   uint64 v;
-  StringPiece input(s);
+  absl::string_view input(s);
   if (str_util::ConsumeLeadingDigits(&input, &v)) {
     EXPECT_EQ(v, static_cast<uint64>(expected));
     EXPECT_EQ(input, remaining);
@@ -178,10 +179,10 @@ TEST(ConsumeLeadingDigits, Basic) {
                            "184467440737095516159yz");
 }
 
-void TestConsumeNonWhitespace(StringPiece s, StringPiece expected,
-                              StringPiece remaining) {
-  StringPiece v;
-  StringPiece input(s);
+void TestConsumeNonWhitespace(absl::string_view s, absl::string_view expected,
+                              absl::string_view remaining) {
+  absl::string_view v;
+  absl::string_view input(s);
   if (str_util::ConsumeNonWhitespace(&input, &v)) {
     EXPECT_EQ(v, expected);
     EXPECT_EQ(input, remaining);
@@ -200,7 +201,7 @@ TEST(ConsumeNonWhitespace, Basic) {
 
 TEST(ConsumePrefix, Basic) {
   string s("abcdef");
-  StringPiece input(s);
+  absl::string_view input(s);
   EXPECT_FALSE(str_util::ConsumePrefix(&input, "abcdefg"));
   EXPECT_EQ(input, "abcdef");
 
@@ -228,7 +229,7 @@ TEST(JoinStrings, Basic) {
   s = {"hi", "there", "strings"};
   EXPECT_EQ(str_util::Join(s, " "), "hi there strings");
 
-  std::vector<StringPiece> sp;
+  std::vector<absl::string_view> sp;
   sp = {"hi"};
   EXPECT_EQ(str_util::Join(sp, ",,"), "hi");
   sp = {"hi", "there", "strings"};
diff --git a/tensorflow/core/lib/strings/strcat.cc b/tensorflow/core/lib/strings/strcat.cc
index f140ec3d26..fd708b837d 100644
--- a/tensorflow/core/lib/strings/strcat.cc
+++ b/tensorflow/core/lib/strings/strcat.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <stdio.h>
 #include <string.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -41,7 +42,7 @@ AlphaNum::AlphaNum(Hex hex) {
     value >>= 4;
     mask >>= 4;
   } while (mask != 0);
-  piece_ = StringPiece(writer, end - writer);
+  piece_ = absl::string_view(writer, end - writer);
 }
 
 // ----------------------------------------------------------------------
@@ -117,15 +118,15 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
 namespace internal {
 
 // Do not call directly - these are not part of the public API.
-string CatPieces(std::initializer_list<StringPiece> pieces) {
+string CatPieces(std::initializer_list<absl::string_view> pieces) {
   string result;
   size_t total_size = 0;
-  for (const StringPiece piece : pieces) total_size += piece.size();
+  for (const absl::string_view piece : pieces) total_size += piece.size();
   gtl::STLStringResizeUninitialized(&result, total_size);
 
   char *const begin = &*result.begin();
   char *out = begin;
-  for (const StringPiece piece : pieces) {
+  for (const absl::string_view piece : pieces) {
     const size_t this_size = piece.size();
     memcpy(out, piece.data(), this_size);
     out += this_size;
@@ -141,10 +142,11 @@ string CatPieces(std::initializer_list<StringPiece> pieces) {
 #define DCHECK_NO_OVERLAP(dest, src) \
   DCHECK_GE(uintptr_t((src).data() - (dest).data()), uintptr_t((dest).size()))
 
-void AppendPieces(string *result, std::initializer_list<StringPiece> pieces) {
+void AppendPieces(string *result,
+                  std::initializer_list<absl::string_view> pieces) {
   size_t old_size = result->size();
   size_t total_size = old_size;
-  for (const StringPiece piece : pieces) {
+  for (const absl::string_view piece : pieces) {
     DCHECK_NO_OVERLAP(*result, piece);
     total_size += piece.size();
   }
@@ -152,7 +154,7 @@ void AppendPieces(string *result, std::initializer_list<StringPiece> pieces) {
 
   char *const begin = &*result->begin();
   char *out = begin + old_size;
-  for (const StringPiece piece : pieces) {
+  for (const absl::string_view piece : pieces) {
     const size_t this_size = piece.size();
     memcpy(out, piece.data(), this_size);
     out += this_size;
diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h
index a620f59447..1c7cb68c40 100644
--- a/tensorflow/core/lib/strings/strcat.h
+++ b/tensorflow/core/lib/strings/strcat.h
@@ -22,7 +22,7 @@ limitations under the License.
 
 #include <string>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -121,19 +121,20 @@ class AlphaNum {
   AlphaNum(Hex hex);               // NOLINT(runtime/explicit)
 
   AlphaNum(const char *c_str) : piece_(c_str) {}   // NOLINT(runtime/explicit)
-  AlphaNum(const StringPiece &pc) : piece_(pc) {}  // NOLINT(runtime/explicit)
+  AlphaNum(const absl::string_view &pc)
+      : piece_(pc) {}                              // NOLINT(runtime/explicit)
   AlphaNum(const tensorflow::string &str)          // NOLINT(runtime/explicit)
       : piece_(str) {}
   template <typename A>
   AlphaNum(const std::basic_string<char, std::char_traits<char>, A> &str)
       : piece_(str) {}  // NOLINT(runtime/explicit)
 
-  StringPiece::size_type size() const { return piece_.size(); }
+  absl::string_view::size_type size() const { return piece_.size(); }
   const char *data() const { return piece_.data(); }
-  StringPiece Piece() const { return piece_; }
+  absl::string_view Piece() const { return piece_; }
 
  private:
-  StringPiece piece_;
+  absl::string_view piece_;
   char digits_[kFastToBufferSize];
 
   // Use ":" not ':'
@@ -176,8 +177,9 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
 namespace internal {
 
 // Do not call directly - this is not part of the public API.
-string CatPieces(std::initializer_list<StringPiece> pieces);
-void AppendPieces(string *dest, std::initializer_list<StringPiece> pieces);
+string CatPieces(std::initializer_list<absl::string_view> pieces);
+void AppendPieces(string *dest,
+                  std::initializer_list<absl::string_view> pieces);
 
 }  // namespace internal
 
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
index 6c4e5526b1..2e06bd0eb8 100644
--- a/tensorflow/core/lib/strings/strcat_test.cc
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -66,7 +67,7 @@ TEST(StrCat, Basics) {
 
   string strs[] = {"Hello", "Cruel", "World"};
 
-  StringPiece pieces[] = {"Hello", "Cruel", "World"};
+  absl::string_view pieces[] = {"Hello", "Cruel", "World"};
 
   const char *c_strs[] = {"Hello", "Cruel", "World"};
 
@@ -208,7 +209,7 @@ TEST(StrAppend, Basics) {
 
   string strs[] = {"Hello", "Cruel", "World"};
 
-  StringPiece pieces[] = {"Hello", "Cruel", "World"};
+  absl::string_view pieces[] = {"Hello", "Cruel", "World"};
 
   const char *c_strs[] = {"Hello", "Cruel", "World"};
 
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 647a797b82..4726505119 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -89,6 +89,7 @@ cc_library(
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
     alwayslink = 1,
@@ -115,6 +116,7 @@ cc_library(
         ":http_request",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
         "@curl",
     ],
 )
@@ -132,6 +134,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
+        "@com_google_absl//absl/strings",
         "@curl",
     ],
 )
@@ -151,6 +154,7 @@ cc_library(
         ":retrying_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
@@ -189,6 +193,7 @@ cc_library(
         ":compute_engine_metadata_client",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -220,6 +225,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@boringssl//:crypto",
+        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
@@ -249,6 +255,7 @@ cc_library(
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -304,6 +311,7 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -342,6 +350,7 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -361,6 +370,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@boringssl//:crypto",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -380,6 +390,7 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -421,6 +432,7 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc b/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
index e147d88371..c406ba0eae 100644
--- a/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/platform/cloud/compute_engine_zone_provider.h"
 
 #include <utility>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 namespace tensorflow {
 
@@ -35,7 +36,7 @@ Status ComputeEngineZoneProvider::GetZone(string* zone) {
   std::vector<char> response_buffer;
   TF_RETURN_IF_ERROR(google_metadata_client_->GetMetadata(kGceMetadataZonePath,
                                                           &response_buffer));
-  StringPiece location(&response_buffer[0], response_buffer.size());
+  absl::string_view location(&response_buffer[0], response_buffer.size());
 
   std::vector<string> elems = str_util::Split(location, "/");
   if (elems.size() == 4) {
diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index 5e1eabee5b..6f85514fa8 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <algorithm>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -275,7 +276,7 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) {
                                            reinterpret_cast<void*>(this)));
   CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION,
                                            &CurlHttpRequest::ReadCallback));
-  post_body_buffer_ = StringPiece(buffer, size);
+  post_body_buffer_ = absl::string_view(buffer, size);
 }
 
 void CurlHttpRequest::SetPostEmptyBody() {
@@ -386,8 +387,8 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size,
                                        size_t nmemb, void* this_object) {
   CHECK(ptr);
   auto that = reinterpret_cast<CurlHttpRequest*>(this_object);
-  StringPiece header(reinterpret_cast<const char*>(ptr), size * nmemb);
-  StringPiece name, value;
+  absl::string_view header(reinterpret_cast<const char*>(ptr), size * nmemb);
+  absl::string_view name, value;
   // The supplied header has the form "<name>: <value>", parse it.
   if (strings::Scanner(header)
           .ScanEscapedUntil(':')
@@ -446,7 +447,7 @@ Status CurlHttpRequest::Send() {
   auto get_error_message = [this]() -> string {
     string error_message = strings::StrCat(
         "Error executing an HTTP request: HTTP response code ", response_code_);
-    StringPiece body = GetResponse();
+    absl::string_view body = GetResponse();
     if (!body.empty()) {
       return strings::StrCat(
           error_message, " with body '",
@@ -542,13 +543,14 @@ void CurlHttpRequest::CheckNotSent() const {
   CHECK(!is_sent_) << "The request has already been sent.";
 }
 
-StringPiece CurlHttpRequest::GetResponse() const {
-  StringPiece response;
+absl::string_view CurlHttpRequest::GetResponse() const {
+  absl::string_view response;
   if (IsDirectResponse()) {
-    response = StringPiece(direct_response_.buffer_,
-                           direct_response_.bytes_transferred_);
+    response = absl::string_view(direct_response_.buffer_,
+                                 direct_response_.bytes_transferred_);
   } else {
-    response = StringPiece(response_buffer_->data(), response_buffer_->size());
+    response =
+        absl::string_view(response_buffer_->data(), response_buffer_->size());
   }
   return response;
 }
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 1b2029926d..22ae5d9687 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -19,10 +19,10 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include <curl/curl.h>
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -165,7 +165,7 @@ class CurlHttpRequest : public HttpRequest {
                               curl_off_t ulnow);
   void CheckMethodNotSet() const;
   void CheckNotSent() const;
-  StringPiece GetResponse() const;
+  absl::string_view GetResponse() const;
 
   /// Helper to convert the given CURLcode and error buffer, representing the
   /// result of performing a transfer, into a Status with an error message.
@@ -176,7 +176,7 @@ class CurlHttpRequest : public HttpRequest {
 
   FILE* put_body_ = nullptr;
 
-  StringPiece post_body_buffer_;
+  absl::string_view post_body_buffer_;
   size_t post_body_read_ = 0;
 
   std::vector<char>* response_buffer_ = nullptr;
diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc
index eb9023d708..e5f92d6ec8 100644
--- a/tensorflow/core/platform/cloud/curl_http_request_test.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 #include <fstream>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mem.h"
@@ -144,8 +145,8 @@ class FakeLibCurl : public LibCurl {
       posted_content_ = "";
       do {
         bytes_read = read_callback_(buffer, 1, sizeof(buffer), read_data_);
-        posted_content_ =
-            strings::StrCat(posted_content_, StringPiece(buffer, bytes_read));
+        posted_content_ = strings::StrCat(
+            posted_content_, absl::string_view(buffer, bytes_read));
       } while (bytes_read > 0);
     }
     if (write_data_ || write_callback_) {
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index c61b68aeeb..1b8c738edb 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
+#include "absl/strings/string_view.h"
 #ifdef _WIN32
 #include <io.h>  // for _mktemp
 #endif
@@ -172,9 +173,9 @@ Status GetTmpFilename(string* filename) {
 /// "bucket-name" and "path/to/file.txt".
 /// If fname only contains the bucket and empty_object_ok = true, the returned
 /// object is empty.
-Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket,
-                    string* object) {
-  StringPiece scheme, bucketp, objectp;
+Status ParseGcsPath(absl::string_view fname, bool empty_object_ok,
+                    string* bucket, string* object) {
+  absl::string_view scheme, bucketp, objectp;
   io::ParseURI(fname, &scheme, &bucketp, &objectp);
   if (scheme != "gs") {
     return errors::InvalidArgument("GCS path doesn't start with 'gs://': ",
@@ -223,7 +224,7 @@ std::set<string> AddAllSubpaths(const std::vector<string>& paths) {
   std::set<string> result;
   result.insert(paths.begin(), paths.end());
   for (const string& path : paths) {
-    StringPiece subpath = io::Dirname(path);
+    absl::string_view subpath = io::Dirname(path);
     while (!subpath.empty()) {
       result.emplace(string(subpath));
       subpath = io::Dirname(subpath);
@@ -232,7 +233,7 @@ std::set<string> AddAllSubpaths(const std::vector<string>& paths) {
   return result;
 }
 
-Status ParseJson(StringPiece json, Json::Value* result) {
+Status ParseJson(absl::string_view json, Json::Value* result) {
   Json::Reader reader;
   if (!reader.parse(json.data(), json.data() + json.size(), *result)) {
     return errors::Internal("Couldn't parse JSON response from GCS.");
@@ -241,7 +242,7 @@ Status ParseJson(StringPiece json, Json::Value* result) {
 }
 
 Status ParseJson(const std::vector<char>& json, Json::Value* result) {
-  return ParseJson(StringPiece{json.data(), json.size()}, result);
+  return ParseJson(absl::string_view{json.data(), json.size()}, result);
 }
 
 /// Reads a JSON value with the given name from a parent JSON value.
@@ -305,13 +306,13 @@ class GcsRandomAccessFile : public RandomAccessFile {
  public:
   using ReadFn =
       std::function<Status(const string& filename, uint64 offset, size_t n,
-                           StringPiece* result, char* scratch)>;
+                           absl::string_view* result, char* scratch)>;
 
   GcsRandomAccessFile(const string& filename, ReadFn read_fn)
       : filename_(filename), read_fn_(std::move(read_fn)) {}
 
   /// The implementation of reads with an LRU block cache. Thread safe.
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     return read_fn_(filename_, offset, n, result, scratch);
   }
@@ -372,7 +373,7 @@ class GcsWritableFile : public WritableFile {
 
   ~GcsWritableFile() override { Close().IgnoreError(); }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     TF_RETURN_IF_ERROR(CheckWritable());
     sync_needed_ = true;
     outfile_ << data;
@@ -530,7 +531,7 @@ class GcsWritableFile : public WritableFile {
       // This means GCS doesn't have any bytes of the file yet.
       *uploaded = 0;
     } else {
-      StringPiece range_piece(received_range);
+      absl::string_view range_piece(received_range);
       str_util::ConsumePrefix(&range_piece,
                               "bytes=");  // May or may not be present.
       std::vector<int64> range_parts;
@@ -604,7 +605,7 @@ class GcsReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
 // Helper function to extract an environment variable and convert it into a
 // value of type T.
 template <typename T>
-bool GetEnvVar(const char* varname, bool (*convert)(StringPiece, T*),
+bool GetEnvVar(const char* varname, bool (*convert)(absl::string_view, T*),
                T* value) {
   const char* env_value = std::getenv(varname);
   if (!env_value) {
@@ -613,14 +614,14 @@ bool GetEnvVar(const char* varname, bool (*convert)(StringPiece, T*),
   return convert(env_value, value);
 }
 
-bool StringPieceIdentity(StringPiece str, StringPiece* value) {
+bool StringPieceIdentity(absl::string_view str, absl::string_view* value) {
   *value = str;
   return true;
 }
 
 /// \brief Utility function to split a comma delimited list of strings to an
 /// unordered set, lowercasing all values.
-bool SplitByCommaToLowercaseSet(StringPiece list,
+bool SplitByCommaToLowercaseSet(absl::string_view list,
                                 std::unordered_set<string>* set) {
   std::vector<string> vector =
       str_util::Split(tensorflow::str_util::Lowercase(list), ",");
@@ -713,14 +714,14 @@ GcsFileSystem::GcsFileSystem() {
   }
 
   // Get the additional header
-  StringPiece add_header_contents;
+  absl::string_view add_header_contents;
   if (GetEnvVar(kAdditionalRequestHeader, StringPieceIdentity,
                 &add_header_contents)) {
     size_t split = add_header_contents.find(':', 0);
 
-    if (split != StringPiece::npos) {
-      StringPiece header_name = add_header_contents.substr(0, split);
-      StringPiece header_value = add_header_contents.substr(split + 1);
+    if (split != absl::string_view::npos) {
+      absl::string_view header_name = add_header_contents.substr(0, split);
+      absl::string_view header_value = add_header_contents.substr(split + 1);
 
       if (!header_name.empty() && !header_value.empty()) {
         additional_header_.reset(new std::pair<const string, const string>(
@@ -817,7 +818,7 @@ Status GcsFileSystem::NewRandomAccessFile(
   result->reset(new GcsRandomAccessFile(fname, [this, bucket, object](
                                                    const string& fname,
                                                    uint64 offset, size_t n,
-                                                   StringPiece* result,
+                                                   absl::string_view* result,
                                                    char* scratch) {
     tf_shared_lock l(block_cache_lock_);
     if (file_block_cache_->IsCacheEnabled()) {
@@ -834,11 +835,11 @@ Status GcsFileSystem::NewRandomAccessFile(
             << fname;
       }
     }
-    *result = StringPiece();
+    *result = absl::string_view();
     size_t bytes_transferred;
     TF_RETURN_IF_ERROR(
         file_block_cache_->Read(fname, offset, n, scratch, &bytes_transferred));
-    *result = StringPiece(scratch, bytes_transferred);
+    *result = absl::string_view(scratch, bytes_transferred);
     if (bytes_transferred < n) {
       return errors::OutOfRange("EOF reached, ", result->size(),
                                 " bytes were read out of ", n,
@@ -954,7 +955,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   std::unique_ptr<char[]> buffer(new char[kReadAppendableFileBufferSize]);
   Status status;
   uint64 offset = 0;
-  StringPiece read_chunk;
+  absl::string_view read_chunk;
 
   // Read the file from GCS in chunks and save it to a tmp file.
   string old_content_filename;
@@ -994,7 +995,7 @@ Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile(
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file));
 
-  StringPiece piece;
+  absl::string_view piece;
   TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get()));
 
   result->reset(new GcsReadOnlyMemoryRegion(std::move(data), size));
@@ -1320,7 +1321,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
         // The names should be relative to the 'dirname'. That means the
         // 'object_prefix', which is part of 'dirname', should be removed from
         // the beginning of 'name'.
-        StringPiece relative_path(name);
+        absl::string_view relative_path(name);
         if (!str_util::ConsumePrefix(&relative_path, object_prefix)) {
           return errors::Internal(strings::StrCat(
               "Unexpected response: the returned file name ", name,
@@ -1349,7 +1350,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
               "response.");
         }
         const string& prefix_str = prefix.asString();
-        StringPiece relative_path(prefix_str);
+        absl::string_view relative_path(prefix_str);
         if (!str_util::ConsumePrefix(&relative_path, object_prefix)) {
           return errors::Internal(
               "Unexpected response: the returned folder name ", prefix_str,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 702802b185..4903e68f3f 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/gcs_file_system.h"
 #include <fstream>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/cloud/http_request_fake.h"
@@ -79,7 +80,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  StringPiece result;
+  absl::string_view result;
 
   // Read the first chunk.
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
@@ -233,7 +234,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char small_scratch[3];
-  StringPiece result;
+  absl::string_view result;
 
   // Read the first chunk.
   TF_EXPECT_OK(file->Read(0, sizeof(small_scratch), &result, small_scratch));
@@ -290,7 +291,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       nullptr /* gcs additional header */);
 
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   {
     // We are instantiating this in an enclosed scope to make sure after the
     // unique ptr goes out of scope, we can still access result.
@@ -379,7 +380,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) {
       nullptr /* gcs additional header */);
 
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
   // Read the first chunk. The cache will be populated with the first block of
@@ -428,7 +429,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
                    kTestTimeoutConfig, *kAllowedLocationsDefault,
                    nullptr /* gcs additional header */);
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   // There should only be two HTTP requests issued to GCS even though we iterate
   // this loop 10 times.  This shows that the underlying FileBlockCache persists
   // across file close/open boundaries.
@@ -502,7 +503,7 @@ TEST(GcsFileSystemTest,
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[5];
-  StringPiece result;
+  absl::string_view result;
 
   // First read.
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
@@ -568,7 +569,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) {
   TF_ASSERT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  StringPiece result;
+  absl::string_view result;
 
   EXPECT_EQ(errors::Code::INTERNAL,
             file->Read(0, sizeof(scratch), &result, scratch).code());
@@ -631,7 +632,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile));
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch));
   EXPECT_EQ("0123", result);
   // Open the writable file.
@@ -791,7 +792,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile));
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch));
   EXPECT_EQ("0123", result);
   // Now write to the same file. Once the write succeeds, the cached block will
@@ -1029,7 +1030,7 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/appendable", &rfile));
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   TF_EXPECT_OK(rfile->Read(0, 8, &result, scratch));
   EXPECT_EQ("content1", result);
   // Closing the appendable file will flush its contents to GCS, triggering HTTP
@@ -1092,8 +1093,9 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
       "gs://bucket/path/random_access.txt", &region));
 
-  EXPECT_EQ(content, StringPiece(reinterpret_cast<const char*>(region->data()),
-                                 region->length()));
+  EXPECT_EQ(content,
+            absl::string_view(reinterpret_cast<const char*>(region->data()),
+                              region->length()));
 }
 
 TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
@@ -1808,7 +1810,7 @@ TEST(GcsFileSystemTest, DeleteFile) {
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/file1.txt", &file));
   TF_EXPECT_OK(file->Read(0, 8, &result, scratch));
@@ -2199,7 +2201,7 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
-  StringPiece result;
+  absl::string_view result;
   std::unique_ptr<RandomAccessFile> src;
   std::unique_ptr<RandomAccessFile> dst;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/src.txt", &src));
@@ -3310,7 +3312,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  StringPiece result;
+  absl::string_view result;
 
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
   EXPECT_EQ("012345", result);
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index e15400780a..b8f3e0c07c 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
+#include "absl/strings/string_view.h"
 #ifndef _WIN32
 #include <pwd.h>
 #include <unistd.h>
@@ -206,8 +207,8 @@ Status GoogleAuthProvider::GetTokenFromGce() {
 
   TF_RETURN_IF_ERROR(compute_engine_metadata_client_->GetMetadata(
       kGceTokenPath, &response_buffer));
-  StringPiece response =
-      StringPiece(&response_buffer[0], response_buffer.size());
+  absl::string_view response =
+      absl::string_view(&response_buffer[0], response_buffer.size());
 
   TF_RETURN_IF_ERROR(oauth_client_->ParseOAuthResponse(
       response, request_timestamp_sec, &current_token_,
diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
index ec31c5ee8c..17345665b6 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
 #include <stdlib.h>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/cloud/http_request_fake.h"
@@ -37,8 +38,9 @@ class FakeEnv : public EnvWrapper {
 class FakeOAuthClient : public OAuthClient {
  public:
   Status GetTokenFromServiceAccountJson(
-      Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
-      string* token, uint64* expiration_timestamp_sec) override {
+      Json::Value json, absl::string_view oauth_server_uri,
+      absl::string_view scope, string* token,
+      uint64* expiration_timestamp_sec) override {
     provided_credentials_json = json;
     *token = return_token;
     *expiration_timestamp_sec = return_expiration_timestamp;
@@ -47,7 +49,7 @@ class FakeOAuthClient : public OAuthClient {
 
   /// Retrieves a bearer token using a refresh token.
   Status GetTokenFromRefreshTokenJson(
-      Json::Value json, StringPiece oauth_server_uri, string* token,
+      Json::Value json, absl::string_view oauth_server_uri, string* token,
       uint64* expiration_timestamp_sec) override {
     provided_credentials_json = json;
     *token = return_token;
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index 0a1164b64a..0566ef89ff 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -20,11 +20,11 @@ limitations under the License.
 #include <fstream>
 #include <string>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include <curl/curl.h>
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -101,7 +101,7 @@ class FakeHttpRequest : public CurlHttpRequest {
       *captured_post_body_ = string(buffer, size);
     } else {
       actual_request_ +=
-          strings::StrCat("Post body: ", StringPiece(buffer, size), "\n");
+          strings::StrCat("Post body: ", absl::string_view(buffer, size), "\n");
     }
   }
   void SetPutEmptyBody() override { actual_request_ += "Put: yes\n"; }
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index 9b85cae9b9..97682c3e36 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
+#include "absl/strings/string_view.h"
 #ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
@@ -84,7 +85,7 @@ Status ReadJsonInt(const Json::Value& json, const string& name, int64* value) {
   return Status::OK();
 }
 
-Status CreateSignature(RSA* private_key, StringPiece to_sign,
+Status CreateSignature(RSA* private_key, absl::string_view to_sign,
                        string* signature) {
   if (!private_key || !signature) {
     return errors::FailedPrecondition(
@@ -120,13 +121,14 @@ Status CreateSignature(RSA* private_key, StringPiece to_sign,
     return errors::Internal("DigestFinal (signature compute) failed.");
   }
   EVP_MD_CTX_cleanup(md_ctx.get());
-  return Base64Encode(StringPiece(reinterpret_cast<char*>(sig.get()), sig_len),
-                      signature);
+  return Base64Encode(
+      absl::string_view(reinterpret_cast<char*>(sig.get()), sig_len),
+      signature);
 }
 
 /// Encodes a claim for a JSON web token (JWT) to make an OAuth request.
-Status EncodeJwtClaim(StringPiece client_email, StringPiece scope,
-                      StringPiece audience, uint64 request_timestamp_sec,
+Status EncodeJwtClaim(absl::string_view client_email, absl::string_view scope,
+                      absl::string_view audience, uint64 request_timestamp_sec,
                       string* encoded) {
   // Step 1: create the JSON with the claim.
   Json::Value root;
@@ -148,7 +150,7 @@ Status EncodeJwtClaim(StringPiece client_email, StringPiece scope,
 }
 
 /// Encodes a header for a JSON web token (JWT) to make an OAuth request.
-Status EncodeJwtHeader(StringPiece key_id, string* encoded) {
+Status EncodeJwtHeader(absl::string_view key_id, string* encoded) {
   // Step 1: create the JSON with the header.
   Json::Value root;
   root["alg"] = kCryptoAlgorithm;
@@ -174,8 +176,8 @@ OAuthClient::OAuthClient(
     : http_request_factory_(std::move(http_request_factory)), env_(env) {}
 
 Status OAuthClient::GetTokenFromServiceAccountJson(
-    Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
-    string* token, uint64* expiration_timestamp_sec) {
+    Json::Value json, absl::string_view oauth_server_uri,
+    absl::string_view scope, string* token, uint64* expiration_timestamp_sec) {
   if (!token || !expiration_timestamp_sec) {
     return errors::FailedPrecondition(
         "'token' and 'expiration_timestamp_sec' cannot be nullptr.");
@@ -221,15 +223,15 @@ Status OAuthClient::GetTokenFromServiceAccountJson(
   request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
-  StringPiece response =
-      StringPiece(response_buffer.data(), response_buffer.size());
+  absl::string_view response =
+      absl::string_view(response_buffer.data(), response_buffer.size());
   TF_RETURN_IF_ERROR(ParseOAuthResponse(response, request_timestamp_sec, token,
                                         expiration_timestamp_sec));
   return Status::OK();
 }
 
 Status OAuthClient::GetTokenFromRefreshTokenJson(
-    Json::Value json, StringPiece oauth_server_uri, string* token,
+    Json::Value json, absl::string_view oauth_server_uri, string* token,
     uint64* expiration_timestamp_sec) {
   if (!token || !expiration_timestamp_sec) {
     return errors::FailedPrecondition(
@@ -253,14 +255,14 @@ Status OAuthClient::GetTokenFromRefreshTokenJson(
   request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
-  StringPiece response =
-      StringPiece(response_buffer.data(), response_buffer.size());
+  absl::string_view response =
+      absl::string_view(response_buffer.data(), response_buffer.size());
   TF_RETURN_IF_ERROR(ParseOAuthResponse(response, request_timestamp_sec, token,
                                         expiration_timestamp_sec));
   return Status::OK();
 }
 
-Status OAuthClient::ParseOAuthResponse(StringPiece response,
+Status OAuthClient::ParseOAuthResponse(absl::string_view response,
                                        uint64 request_timestamp_sec,
                                        string* token,
                                        uint64* expiration_timestamp_sec) {
diff --git a/tensorflow/core/platform/cloud/oauth_client.h b/tensorflow/core/platform/cloud/oauth_client.h
index 519d69acf9..5598811258 100644
--- a/tensorflow/core/platform/cloud/oauth_client.h
+++ b/tensorflow/core/platform/cloud/oauth_client.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_PLATFORM_CLOUD_OAUTH_CLIENT_H_
 
 #include <memory>
+#include "absl/strings/string_view.h"
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
@@ -37,17 +38,16 @@ class OAuthClient {
   /// Retrieves the authentication bearer token using a JSON file
   /// with the client's private key.
   virtual Status GetTokenFromServiceAccountJson(
-      Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
-      string* token, uint64* expiration_timestamp_sec);
+      Json::Value json, absl::string_view oauth_server_uri,
+      absl::string_view scope, string* token, uint64* expiration_timestamp_sec);
 
   /// Retrieves a bearer token using a refresh token.
-  virtual Status GetTokenFromRefreshTokenJson(Json::Value json,
-                                              StringPiece oauth_server_uri,
-                                              string* token,
-                                              uint64* expiration_timestamp_sec);
+  virtual Status GetTokenFromRefreshTokenJson(
+      Json::Value json, absl::string_view oauth_server_uri, string* token,
+      uint64* expiration_timestamp_sec);
 
   /// Parses the JSON response with the token from an OAuth 2.0 server.
-  virtual Status ParseOAuthResponse(StringPiece response,
+  virtual Status ParseOAuthResponse(absl::string_view response,
                                     uint64 request_timestamp_sec, string* token,
                                     uint64* expiration_timestamp_sec);
 
diff --git a/tensorflow/core/platform/cloud/oauth_client_test.cc b/tensorflow/core/platform/cloud/oauth_client_test.cc
index 1cd0641cd3..d72d23c0ce 100644
--- a/tensorflow/core/platform/cloud/oauth_client_test.cc
+++ b/tensorflow/core/platform/cloud/oauth_client_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
 #include <fstream>
+#include "absl/strings/string_view.h"
 #include <openssl/bio.h>
 #include <openssl/evp.h>
 #include <openssl/pem.h>
@@ -115,7 +116,7 @@ TEST(OAuthClientTest, GetTokenFromServiceAccountJson) {
   EXPECT_EQ(13920, expiration_timestamp);
 
   // Now look at the JWT claim that was sent to the OAuth server.
-  StringPiece grant_type, assertion;
+  absl::string_view grant_type, assertion;
   ASSERT_TRUE(strings::Scanner(post_body)
                   .OneLiteral("grant_type=")
                   .RestartCapture()
diff --git a/tensorflow/core/platform/cloud/retrying_file_system.h b/tensorflow/core/platform/cloud/retrying_file_system.h
index 5ce6670dc7..5c454a2b91 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system.h
+++ b/tensorflow/core/platform/cloud/retrying_file_system.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -150,7 +151,7 @@ class RetryingRandomAccessFile : public RandomAccessFile {
                            const RetryConfig& retry_config)
       : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     return RetryingUtils::CallWithRetries(
         [this, offset, n, result, scratch]() {
@@ -175,7 +176,7 @@ class RetryingWritableFile : public WritableFile {
     Close().IgnoreError();
   }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     return RetryingUtils::CallWithRetries(
         [this, &data]() { return base_file_->Append(data); }, retry_config_);
   }
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index 868eea096c..fbce4a38f7 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/retrying_file_system.h"
 #include <fstream>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
@@ -60,7 +61,7 @@ class MockCallSequence {
 class MockRandomAccessFile : public RandomAccessFile {
  public:
   explicit MockRandomAccessFile(const ExpectedCalls& calls) : calls_(calls) {}
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     return calls_.ConsumeNextCall("Read");
   }
@@ -72,7 +73,7 @@ class MockRandomAccessFile : public RandomAccessFile {
 class MockWritableFile : public WritableFile {
  public:
   explicit MockWritableFile(const ExpectedCalls& calls) : calls_(calls) {}
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     return calls_.ConsumeNextCall("Append");
   }
   Status Close() override { return calls_.ConsumeNextCall("Close"); }
@@ -192,7 +193,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_ImmediateSuccess) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  StringPiece result;
+  absl::string_view result;
   char scratch[10];
   TF_EXPECT_OK(random_access_file->Read(0, 10, &result, scratch));
 }
@@ -220,7 +221,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_SuccessWith3rdTry) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  StringPiece result;
+  absl::string_view result;
   char scratch[10];
   TF_EXPECT_OK(random_access_file->Read(0, 10, &result, scratch));
 }
@@ -245,7 +246,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_AllRetriesFailed) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  StringPiece result;
+  absl::string_view result;
   char scratch[10];
   const auto& status = random_access_file->Read(0, 10, &result, scratch);
   EXPECT_TRUE(
@@ -276,7 +277,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_NoRetriesForSomeErrors) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  StringPiece result;
+  absl::string_view result;
   char scratch[10];
   EXPECT_EQ("Failed precondition",
             random_access_file->Read(0, 10, &result, scratch).error_message());
diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
index 83c65dbfa9..10487e0bae 100644
--- a/tensorflow/core/platform/default/device_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <stdlib.h>
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -301,7 +302,7 @@ class DeviceTracerImpl : public DeviceTracer,
 
   // tracing::TraceCollector interface:
   virtual std::unique_ptr<Handle> CreateAnnotationHandle(
-      StringPiece name_part1, StringPiece name_part2) const {
+      absl::string_view name_part1, absl::string_view name_part2) const {
     struct Impl : public tracing::TraceCollector::Handle {
       string annotation;
       explicit Impl(string &&name_scope) : annotation(name_scope) {
@@ -315,7 +316,8 @@ class DeviceTracerImpl : public DeviceTracer,
         new Impl{ConcatenateNames(name_part1, name_part2)});
   }
 
-  virtual std::unique_ptr<Handle> CreateActivityHandle(StringPiece, StringPiece,
+  virtual std::unique_ptr<Handle> CreateActivityHandle(absl::string_view,
+                                                       absl::string_view,
                                                        bool) const {
     // We don't do anything with 'Activities' yet.
     return nullptr;
diff --git a/tensorflow/core/platform/default/fingerprint.h b/tensorflow/core/platform/default/fingerprint.h
index f901befc16..11af54eac1 100644
--- a/tensorflow/core/platform/default/fingerprint.h
+++ b/tensorflow/core/platform/default/fingerprint.h
@@ -18,15 +18,15 @@ limitations under the License.
 
 #include <farmhash.h>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
-inline uint64 Fingerprint64(StringPiece s) {
+inline uint64 Fingerprint64(absl::string_view s) {
   return ::util::Fingerprint64(s.data(), s.size());
 }
 
-inline Fprint128 Fingerprint128(StringPiece s) {
+inline Fprint128 Fingerprint128(absl::string_view s) {
   const auto fingerprint = ::util::Fingerprint128(s.data(), s.size());
   return {::util::Uint128Low64(fingerprint),
           ::util::Uint128High64(fingerprint)};
diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index 9f97c8272c..6cf55c5314 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/human_readable_json.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -30,12 +31,11 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
 
   auto status = google::protobuf::util::MessageToJsonString(proto, result);
   if (!status.ok()) {
-    // Convert error_msg google::protobuf::StringPiece to
-    // tensorflow::StringPiece.
+    // Convert error_msg google::protobuf::StringPiece to absl::string_view.
     auto error_msg = status.error_message();
-    return errors::Internal(
-        strings::StrCat("Could not convert proto to JSON string: ",
-                        StringPiece(error_msg.data(), error_msg.length())));
+    return errors::Internal(strings::StrCat(
+        "Could not convert proto to JSON string: ",
+        absl::string_view(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
 #endif
@@ -49,12 +49,11 @@ Status HumanReadableJsonToProto(const string& str,
   proto->Clear();
   auto status = google::protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
-    // Convert error_msg google::protobuf::StringPiece to
-    // tensorflow::StringPiece.
+    // Convert error_msg google::protobuf::StringPiece to absl::string_view.
     auto error_msg = status.error_message();
-    return errors::Internal(
-        strings::StrCat("Could not convert JSON string to proto: ",
-                        StringPiece(error_msg.data(), error_msg.length())));
+    return errors::Internal(strings::StrCat(
+        "Could not convert JSON string to proto: ",
+        absl::string_view(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
 #endif
diff --git a/tensorflow/core/platform/default/string_coding.h b/tensorflow/core/platform/default/string_coding.h
index 70b8ab0144..d7ee5f1187 100644
--- a/tensorflow/core/platform/default/string_coding.h
+++ b/tensorflow/core/platform/default/string_coding.h
@@ -18,6 +18,7 @@ limitations under the License.
 // IWYU pragma: private, include "third_party/tensorflow/core/platform/tensor_coding.h"
 // IWYU pragma: friend third_party/tensorflow/core/platform/tensor_coding.h
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -86,7 +87,7 @@ class StringListDecoder {
   }
 
  private:
-  StringPiece reader_;
+  absl::string_view reader_;
 };
 
 std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out);
diff --git a/tensorflow/core/platform/default/test_benchmark.cc b/tensorflow/core/platform/default/test_benchmark.cc
index dedab42bd7..4f872542bb 100644
--- a/tensorflow/core/platform/default/test_benchmark.cc
+++ b/tensorflow/core/platform/default/test_benchmark.cc
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
@@ -117,7 +118,7 @@ void Benchmark::Run(const char* pattern) {
   // specified by clients, but we keep this here to match the internal
   // Google implementation, should we ever enable user-specified
   // pattern specification.
-  if (StringPiece(pattern) == "all") {
+  if (absl::string_view(pattern) == "all") {
     pattern = ".*";
   }
 
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index afc4201e53..39051f885e 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <deque>
 #include <utility>
 #include <vector>
+#include "absl/strings/string_view.h"
 #if defined(__APPLE__)
 #include <mach-o/dyld.h>
 #endif
@@ -90,7 +91,7 @@ Status FileSystemRegistryImpl::GetRegisteredFileSystemSchemes(
 Env::Env() : file_system_registry_(new FileSystemRegistryImpl) {}
 
 Status Env::GetFileSystemForFile(const string& fname, FileSystem** result) {
-  StringPiece scheme, host, path;
+  absl::string_view scheme, host, path;
   io::ParseURI(fname, &scheme, &host, &path);
   FileSystem* file_system = file_system_registry_->Lookup(string(scheme));
   if (!file_system) {
@@ -164,7 +165,7 @@ bool Env::FilesExist(const std::vector<string>& files,
                      std::vector<Status>* status) {
   std::unordered_map<string, std::vector<string>> files_per_fs;
   for (const auto& file : files) {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(file, &scheme, &host, &path);
     files_per_fs[string(scheme)].push_back(file);
   }
@@ -389,7 +390,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data) {
   }
   gtl::STLStringResizeUninitialized(data, file_size);
   char* p = gtl::string_as_array(data);
-  StringPiece result;
+  absl::string_view result;
   s = file->Read(0, file_size, &result, p);
   if (!s.ok()) {
     data->clear();
@@ -406,7 +407,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data) {
 }
 
 Status WriteStringToFile(Env* env, const string& fname,
-                         const StringPiece& data) {
+                         const absl::string_view& data) {
   std::unique_ptr<WritableFile> file;
   Status s = env->NewWritableFile(fname, &file);
   if (!s.ok()) {
@@ -431,7 +432,7 @@ Status FileSystemCopyFile(FileSystem* src_fs, const string& src,
   std::unique_ptr<char[]> scratch(new char[kCopyFileBufferSize]);
   Status s = Status::OK();
   while (s.ok()) {
-    StringPiece result;
+    absl::string_view result;
     s = src_file->Read(offset, kCopyFileBufferSize, &result, scratch.get());
     if (!(s.ok() || s.code() == error::OUT_OF_RANGE)) {
       return s;
@@ -457,7 +458,7 @@ class FileStream : public ::tensorflow::protobuf::io::ZeroCopyInputStream {
   Status status() const { return status_; }
 
   bool Next(const void** data, int* size) override {
-    StringPiece result;
+    absl::string_view result;
     Status s = file_->Read(pos_, kBufSize, &result, scratch_);
     if (result.empty()) {
       status_ = s;
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 5732271f15..508da1ea0b 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -21,9 +21,9 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/platform/file_system.h"
 #include "tensorflow/core/platform/macros.h"
@@ -408,7 +408,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data);
 /// A utility routine: write contents of `data` to file named `fname`
 /// (overwriting existing contents, if any).
 Status WriteStringToFile(Env* env, const string& fname,
-                         const StringPiece& data);
+                         const absl::string_view& data);
 
 /// Write binary representation of "proto" to the named file.
 Status WriteBinaryProto(Env* env, const string& fname,
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 2e32abdffb..d37a722b5f 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -17,10 +17,10 @@ limitations under the License.
 
 #include <sys/stat.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -75,7 +75,7 @@ TEST_F(DefaultEnvTest, IncompleteReadOutOfRange) {
   TF_EXPECT_OK(env_->NewRandomAccessFile(filename, &f));
 
   // Reading past EOF should give an OUT_OF_RANGE error
-  StringPiece result;
+  absl::string_view result;
   char scratch[3];
   EXPECT_EQ(error::OUT_OF_RANGE, f->Read(0, 3, &result, scratch).code());
   EXPECT_EQ(input, result);
@@ -280,7 +280,7 @@ TEST_F(DefaultEnvTest, SleepForMicroseconds) {
 class TmpDirFileSystem : public NullFileSystem {
  public:
   Status FileExists(const string& dir) override {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(dir, &scheme, &host, &path);
     if (path.empty()) return errors::NotFound(dir, " not found");
     // The special "flushed" file exists only if the filesystem's caches have
@@ -296,7 +296,7 @@ class TmpDirFileSystem : public NullFileSystem {
   }
 
   Status CreateDir(const string& dir) override {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(dir, &scheme, &host, &path);
     if (scheme != "tmpdirfs") {
       return errors::FailedPrecondition("scheme must be tmpdirfs");
@@ -359,7 +359,7 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   // Read from the temporary file and check content.
   std::unique_ptr<RandomAccessFile> file_to_read;
   TF_CHECK_OK(env->NewRandomAccessFile(filename, &file_to_read));
-  StringPiece content;
+  absl::string_view content;
   char scratch[1024];
   CHECK_EQ(error::OUT_OF_RANGE,
            file_to_read->Read(0 /* offset */, 1024 /* n */, &content, scratch)
diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc
index 3ab542a5d8..780cdad39f 100644
--- a/tensorflow/core/platform/file_system.cc
+++ b/tensorflow/core/platform/file_system.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <algorithm>
 #include <deque>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -136,9 +137,9 @@ Status FileSystem::DeleteRecursively(const string& dirname,
 }
 
 Status FileSystem::RecursivelyCreateDir(const string& dirname) {
-  StringPiece scheme, host, remaining_dir;
+  absl::string_view scheme, host, remaining_dir;
   io::ParseURI(dirname, &scheme, &host, &remaining_dir);
-  std::vector<StringPiece> sub_dirs;
+  std::vector<absl::string_view> sub_dirs;
   while (!remaining_dir.empty()) {
     Status status = FileExists(io::CreateURI(scheme, host, remaining_dir));
     if (status.ok()) {
@@ -159,7 +160,7 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) {
 
   // Now create the directories.
   string built_path(remaining_dir);
-  for (const StringPiece sub_dir : sub_dirs) {
+  for (const absl::string_view sub_dir : sub_dirs) {
     built_path = io::JoinPath(built_path, sub_dir);
     Status status = CreateDir(io::CreateURI(scheme, host, built_path));
     if (!status.ok() && status.code() != tensorflow::error::ALREADY_EXISTS) {
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index 156af6cdea..e57454b71b 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -21,9 +21,9 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cord.h"
 #include "tensorflow/core/platform/file_statistics.h"
 #include "tensorflow/core/platform/macros.h"
@@ -236,7 +236,7 @@ class RandomAccessFile {
   /// because of EOF.
   ///
   /// Safe for concurrent use by multiple threads.
-  virtual Status Read(uint64 offset, size_t n, StringPiece* result,
+  virtual Status Read(uint64 offset, size_t n, absl::string_view* result,
                       char* scratch) const = 0;
 
  private:
@@ -253,7 +253,7 @@ class WritableFile {
   virtual ~WritableFile();
 
   /// \brief Append 'data' to the file.
-  virtual Status Append(StringPiece data) = 0;
+  virtual Status Append(absl::string_view data) = 0;
 
   // TODO(ebrevdo): Remove this ifdef when absl is updated.
 #if defined(PLATFORM_GOOGLE)
diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc
index a637d42a92..5acf71dc6d 100644
--- a/tensorflow/core/platform/file_system_test.cc
+++ b/tensorflow/core/platform/file_system_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <sys/stat.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -120,7 +121,7 @@ class InterPlanetaryFileSystem : public NullFileSystem {
   }
 
   void ParsePath(const string& name, string* parsed_path) {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     ASSERT_EQ(scheme, "ipfs");
     ASSERT_EQ(host, "solarsystem");
@@ -156,10 +157,10 @@ string Match(InterPlanetaryFileSystem* ipfs, const string& suffix_pattern) {
   if (!s.ok()) {
     return s.ToString();
   } else {
-    std::vector<StringPiece> trimmed_results;
+    std::vector<absl::string_view> trimmed_results;
     std::sort(results.begin(), results.end());
     for (const string& result : results) {
-      StringPiece trimmed_result(result);
+      absl::string_view trimmed_result(result);
       EXPECT_TRUE(str_util::ConsumePrefix(&trimmed_result,
                                           strings::StrCat(kPrefix, "/")));
       trimmed_results.push_back(trimmed_result);
diff --git a/tensorflow/core/platform/fingerprint.h b/tensorflow/core/platform/fingerprint.h
index 720dc4c3d6..71280fbd1d 100644
--- a/tensorflow/core/platform/fingerprint.h
+++ b/tensorflow/core/platform/fingerprint.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
 #define TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -39,10 +39,10 @@ struct Fprint128Hasher {
 
 // This is a portable fingerprint interface for strings that will never change.
 // However, it is not suitable for cryptography.
-uint64 Fingerprint64(StringPiece s);
+uint64 Fingerprint64(absl::string_view s);
 
 // 128-bit variant of Fingerprint64 above (same properties and caveats apply).
-Fprint128 Fingerprint128(StringPiece s);
+Fprint128 Fingerprint128(absl::string_view s);
 
 namespace internal {
 // Mixes some of the bits that got propagated to the high bits back into the
diff --git a/tensorflow/core/platform/hadoop/BUILD b/tensorflow/core/platform/hadoop/BUILD
index 7c38c399bd..6c23f5a61e 100644
--- a/tensorflow/core/platform/hadoop/BUILD
+++ b/tensorflow/core/platform/hadoop/BUILD
@@ -20,6 +20,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/hadoop:hdfs",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -59,5 +60,6 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index eb35531e9f..48b272d2ba 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <errno.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -139,10 +140,10 @@ HadoopFileSystem::~HadoopFileSystem() {}
 // We rely on HDFS connection caching here. The HDFS client calls
 // org.apache.hadoop.fs.FileSystem.get(), which caches the connection
 // internally.
-Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
+Status HadoopFileSystem::Connect(absl::string_view fname, hdfsFS* fs) {
   TF_RETURN_IF_ERROR(hdfs_->status());
 
-  StringPiece scheme, namenode, path;
+  absl::string_view scheme, namenode, path;
   io::ParseURI(fname, &scheme, &namenode, &path);
   const string nn(namenode);
 
@@ -152,7 +153,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
   } else if (scheme == "viewfs") {
     char* defaultFS = nullptr;
     hdfs_->hdfsConfGetStr("fs.defaultFS", &defaultFS);
-    StringPiece defaultScheme, defaultCluster, defaultPath;
+    absl::string_view defaultScheme, defaultCluster, defaultPath;
     io::ParseURI(defaultFS, &defaultScheme, &defaultCluster, &defaultPath);
 
     if (scheme != defaultScheme || namenode != defaultCluster) {
@@ -181,7 +182,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
 }
 
 string HadoopFileSystem::TranslateName(const string& name) const {
-  StringPiece scheme, namenode, path;
+  absl::string_view scheme, namenode, path;
   io::ParseURI(name, &scheme, &namenode, &path);
   return string(path);
 }
@@ -203,7 +204,7 @@ class HDFSRandomAccessFile : public RandomAccessFile {
     }
   }
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -242,7 +243,7 @@ class HDFSRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = StringPiece(scratch, dst - scratch);
+    *result = absl::string_view(scratch, dst - scratch);
     return s;
   }
 
@@ -282,7 +283,7 @@ class HDFSWritableFile : public WritableFile {
     }
   }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     if (hdfs_->hdfsWrite(fs_, file_, data.data(),
                          static_cast<tSize>(data.size())) == -1) {
       return IOError(filename_, errno);
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.h b/tensorflow/core/platform/hadoop/hadoop_file_system.h
index 6af7a698ff..3655ee1076 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.h
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 
 extern "C" {
@@ -67,7 +68,7 @@ class HadoopFileSystem : public FileSystem {
   string TranslateName(const string& name) const override;
 
  private:
-  Status Connect(StringPiece fname, hdfsFS* fs);
+  Status Connect(absl::string_view fname, hdfsFS* fs);
   LibHDFS* hdfs_;
 };
 
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
index b207d34749..d3659ce248 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/hadoop/hadoop_file_system.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -54,7 +55,7 @@ class HadoopFileSystemTest : public ::testing::Test {
     TF_RETURN_IF_ERROR(hdfs.GetFileSize(fname, &file_size));
 
     content->resize(file_size);
-    StringPiece result;
+    absl::string_view result;
     TF_RETURN_IF_ERROR(
         reader->Read(0, file_size, &result, gtl::string_as_array(content)));
     if (file_size != result.size()) {
@@ -77,7 +78,7 @@ TEST_F(HadoopFileSystemTest, RandomAccessFile) {
 
   string got;
   got.resize(content.size());
-  StringPiece result;
+  absl::string_view result;
   TF_EXPECT_OK(
       reader->Read(0, content.size(), &result, gtl::string_as_array(&got)));
   EXPECT_EQ(content.size(), result.size());
@@ -213,7 +214,7 @@ TEST_F(HadoopFileSystemTest, WriteWhileReading) {
 
   string got;
   got.resize(content1.size());
-  StringPiece result;
+  absl::string_view result;
   TF_EXPECT_OK(
       reader->Read(0, content1.size(), &result, gtl::string_as_array(&got)));
   EXPECT_EQ(content1, result);
diff --git a/tensorflow/core/platform/posix/posix_file_system.cc b/tensorflow/core/platform/posix/posix_file_system.cc
index c7afab9583..2f8526c9b3 100644
--- a/tensorflow/core/platform/posix/posix_file_system.cc
+++ b/tensorflow/core/platform/posix/posix_file_system.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/mman.h>
+#include "absl/strings/string_view.h"
 #if !defined(__APPLE__)
 #include <sys/sendfile.h>
 #endif
@@ -52,7 +53,7 @@ class PosixRandomAccessFile : public RandomAccessFile {
       : filename_(fname), fd_(fd) {}
   ~PosixRandomAccessFile() override { close(fd_); }
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -70,7 +71,7 @@ class PosixRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = StringPiece(scratch, dst - scratch);
+    *result = absl::string_view(scratch, dst - scratch);
     return s;
   }
 };
@@ -91,7 +92,7 @@ class PosixWritableFile : public WritableFile {
     }
   }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     size_t r = fwrite(data.data(), 1, data.size(), file_);
     if (r != data.size()) {
       return IOError(filename_, errno);
@@ -217,7 +218,7 @@ Status PosixFileSystem::GetChildren(const string& dir,
   }
   struct dirent* entry;
   while ((entry = readdir(d)) != nullptr) {
-    StringPiece basename = entry->d_name;
+    absl::string_view basename = entry->d_name;
     if ((basename != ".") && (basename != "..")) {
       result->push_back(entry->d_name);
     }
diff --git a/tensorflow/core/platform/posix/posix_file_system.h b/tensorflow/core/platform/posix/posix_file_system.h
index 752eccea66..78e403859e 100644
--- a/tensorflow/core/platform/posix/posix_file_system.h
+++ b/tensorflow/core/platform/posix/posix_file_system.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 
@@ -68,7 +69,7 @@ Status IOError(const string& context, int err_number);
 class LocalPosixFileSystem : public PosixFileSystem {
  public:
   string TranslateName(const string& name) const override {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     return string(path);
   }
diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD
index 41184b6fd9..f7a0d40083 100644
--- a/tensorflow/core/platform/s3/BUILD
+++ b/tensorflow/core/platform/s3/BUILD
@@ -34,6 +34,7 @@ tf_cc_binary(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "@aws",
+        "@com_google_absl//absl/strings",
         "@curl",
         "@protobuf_archive//:protobuf_headers",
     ],
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index e0b8e37745..ed05b2171c 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/platform/s3/s3_file_system.h"
+
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/file_system_helper.h"
@@ -144,7 +146,7 @@ Status ParseS3Path(const string& fname, bool empty_object_ok, string* bucket,
   if (!bucket || !object) {
     return errors::Internal("bucket and object cannot be null.");
   }
-  StringPiece scheme, bucketp, objectp;
+  absl::string_view scheme, bucketp, objectp;
   io::ParseURI(fname, &scheme, &bucketp, &objectp);
   if (scheme != "s3") {
     return errors::InvalidArgument("S3 path doesn't start with 's3://': ",
@@ -170,7 +172,7 @@ class S3RandomAccessFile : public RandomAccessFile {
                      std::shared_ptr<Aws::S3::S3Client> s3_client)
       : bucket_(bucket), object_(object), s3_client_(s3_client) {}
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     Aws::S3::Model::GetObjectRequest getObjectRequest;
     getObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str());
@@ -182,13 +184,13 @@ class S3RandomAccessFile : public RandomAccessFile {
     auto getObjectOutcome = this->s3_client_->GetObject(getObjectRequest);
     if (!getObjectOutcome.IsSuccess()) {
       n = 0;
-      *result = StringPiece(scratch, n);
+      *result = absl::string_view(scratch, n);
       return Status(error::OUT_OF_RANGE, "Read less bytes than requested");
     }
     n = getObjectOutcome.GetResult().GetContentLength();
     getObjectOutcome.GetResult().GetBody().read(scratch, n);
 
-    *result = StringPiece(scratch, n);
+    *result = absl::string_view(scratch, n);
     return Status::OK();
   }
 
@@ -211,7 +213,7 @@ class S3WritableFile : public WritableFile {
             std::ios_base::binary | std::ios_base::trunc | std::ios_base::in |
                 std::ios_base::out)) {}
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     if (!outfile_) {
       return errors::FailedPrecondition(
           "The internal temporary file is not writable.");
@@ -339,7 +341,7 @@ Status S3FileSystem::NewAppendableFile(const string& fname,
   std::unique_ptr<char[]> buffer(new char[kS3ReadAppendableFileBufferSize]);
   Status status;
   uint64 offset = 0;
-  StringPiece read_chunk;
+  absl::string_view read_chunk;
 
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
@@ -372,7 +374,7 @@ Status S3FileSystem::NewReadOnlyMemoryRegionFromFile(
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file));
 
-  StringPiece piece;
+  absl::string_view piece;
   TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get()));
 
   result->reset(new S3ReadOnlyMemoryRegion(std::move(data), size));
diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc
index 84601de39a..2fbf9f6ee3 100644
--- a/tensorflow/core/platform/tensor_coding.cc
+++ b/tensorflow/core/platform/tensor_coding.cc
@@ -17,13 +17,14 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace port {
 
-void AssignRefCounted(StringPiece src, core::RefCounted* obj, string* out) {
+void AssignRefCounted(absl::string_view src, core::RefCounted* obj,
+                      string* out) {
   out->assign(src.data(), src.size());
 }
 
@@ -39,7 +40,7 @@ void EncodeStringList(const string* strings, int64 n, string* out) {
 
 bool DecodeStringList(const string& src, string* strings, int64 n) {
   std::vector<uint32> sizes(n);
-  StringPiece reader(src);
+  absl::string_view reader(src);
   int64 tot = 0;
   for (auto& v : sizes) {
     if (!core::GetVarint32(&reader, &v)) return false;
diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h
index 6c6d75830d..6987de56b6 100644
--- a/tensorflow/core/platform/tensor_coding.h
+++ b/tensorflow/core/platform/tensor_coding.h
@@ -18,8 +18,8 @@ limitations under the License.
 #define TENSORFLOW_PLATFORM_TENSOR_CODING_H_
 
 #include <string>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/refcount.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -35,7 +35,8 @@ namespace port {
 // Store src contents in *out.  If backing memory for src is shared with *out,
 // will ref obj during the call and will arrange to unref obj when no
 // longer needed.
-void AssignRefCounted(StringPiece src, core::RefCounted* obj, string* out);
+void AssignRefCounted(absl::string_view src, core::RefCounted* obj,
+                      string* out);
 
 // Copy contents of src to dst[0,src.size()-1].
 inline void CopyToArray(const string& src, char* dst) {
diff --git a/tensorflow/core/platform/tracing.cc b/tensorflow/core/platform/tracing.cc
index c0386c0a3f..c1678a2dd9 100644
--- a/tensorflow/core/platform/tracing.cc
+++ b/tensorflow/core/platform/tracing.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <map>
 #include <string>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -57,11 +58,12 @@ uint64 GetUniqueArg() {
   return unique_arg.fetch_add(1, std::memory_order_relaxed);
 }
 
-uint64 GetArgForName(StringPiece name) {
+uint64 GetArgForName(absl::string_view name) {
   return Hash64(name.data(), name.size());
 }
 
-string TraceCollector::ConcatenateNames(StringPiece first, StringPiece second) {
+string TraceCollector::ConcatenateNames(absl::string_view first,
+                                        absl::string_view second) {
   std::string result;
   bool has_two_parts = !first.empty() && !second.empty();
   result.reserve(first.size() + second.size() +
diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h
index aefbe64425..8c0d8666a6 100644
--- a/tensorflow/core/platform/tracing.h
+++ b/tensorflow/core/platform/tracing.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <map>
 #include <memory>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -82,7 +82,7 @@ inline const EventCollector* GetEventCollector(EventCategory category) {
 uint64 GetUniqueArg();
 
 // Returns an id for name to pass to RecordEvent/ScopedRegion.
-uint64 GetArgForName(StringPiece name);
+uint64 GetArgForName(absl::string_view name);
 
 // Records an atomic event through the currently registered EventCollector.
 inline void RecordEvent(EventCategory category, uint64 arg) {
@@ -121,7 +121,7 @@ class ScopedRegion {
 
   // Same as ScopedRegion(category, GetArgForName(name)), but faster if
   // EventCollector::IsEnaled() returns false.
-  ScopedRegion(EventCategory category, StringPiece name)
+  ScopedRegion(EventCategory category, absl::string_view name)
       : collector_(GetEventCollector(category)) {
     if (collector_) {
       collector_->StartRegion(GetArgForName(name));
@@ -150,9 +150,9 @@ class TraceCollector {
 
   virtual ~TraceCollector() {}
   virtual std::unique_ptr<Handle> CreateAnnotationHandle(
-      StringPiece name_part1, StringPiece name_part2) const = 0;
+      absl::string_view name_part1, absl::string_view name_part2) const = 0;
   virtual std::unique_ptr<Handle> CreateActivityHandle(
-      StringPiece name_part1, StringPiece name_part2,
+      absl::string_view name_part1, absl::string_view name_part2,
       bool is_expensive) const = 0;
 
   // Returns true if this annotation tracing is enabled for any op.
@@ -163,7 +163,8 @@ class TraceCollector {
   virtual bool IsEnabledForActivities(bool is_expensive) const = 0;
 
  protected:
-  static string ConcatenateNames(StringPiece first, StringPiece second);
+  static string ConcatenateNames(absl::string_view first,
+                                 absl::string_view second);
 
  private:
   friend void SetTraceCollector(const TraceCollector*);
@@ -185,14 +186,14 @@ const TraceCollector* GetTraceCollector();
 // This will add 'my kernels' to both kernels in the profiler UI
 class ScopedAnnotation {
  public:
-  explicit ScopedAnnotation(StringPiece name)
-      : ScopedAnnotation(name, StringPiece()) {}
+  explicit ScopedAnnotation(absl::string_view name)
+      : ScopedAnnotation(name, absl::string_view()) {}
 
   // If tracing is enabled, add a name scope of
   // "<name_part1>:<name_part2>".  This can be cheaper than the
   // single-argument constructor because the concatenation of the
   // label string is only done if tracing is enabled.
-  ScopedAnnotation(StringPiece name_part1, StringPiece name_part2)
+  ScopedAnnotation(absl::string_view name_part1, absl::string_view name_part2)
       : handle_([&] {
           auto trace_collector = GetTraceCollector();
           return trace_collector ? trace_collector->CreateAnnotationHandle(
@@ -211,14 +212,14 @@ class ScopedAnnotation {
 // the object is destroyed.
 class ScopedActivity {
  public:
-  explicit ScopedActivity(StringPiece name, bool is_expensive = true)
-      : ScopedActivity(name, StringPiece(), is_expensive) {}
+  explicit ScopedActivity(absl::string_view name, bool is_expensive = true)
+      : ScopedActivity(name, absl::string_view(), is_expensive) {}
 
   // If tracing is enabled, set up an activity with a label of
   // "<name_part1>:<name_part2>".  This can be cheaper than the
   // single-argument constructor because the concatenation of the
   // label string is only done if tracing is enabled.
-  ScopedActivity(StringPiece name_part1, StringPiece name_part2,
+  ScopedActivity(absl::string_view name_part1, absl::string_view name_part2,
                  bool is_expensive = true)
       : handle_([&] {
           auto trace_collector = GetTraceCollector();
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index 6cf79634d7..8d180272fe 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include <sys/types.h>
 #include <time.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
@@ -112,7 +113,7 @@ class WindowsRandomAccessFile : public RandomAccessFile {
     }
   }
 
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -130,7 +131,7 @@ class WindowsRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = StringPiece(scratch, dst - scratch);
+    *result = absl::string_view(scratch, dst - scratch);
     return s;
   }
 };
@@ -150,7 +151,7 @@ class WindowsWritableFile : public WritableFile {
     }
   }
 
-  Status Append(StringPiece data) override {
+  Status Append(absl::string_view data) override {
     DWORD bytes_written = 0;
     DWORD data_size = static_cast<DWORD>(data.size());
     BOOL write_result =
@@ -413,7 +414,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
 
   do {
     string file_name = WideCharToUtf8(find_data.cFileName);
-    const StringPiece basename = file_name;
+    const absl::string_view basename = file_name;
     if (basename != "." && basename != "..") {
       result->push_back(file_name);
     }
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index 1f4c535f24..7ba471eec4 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_WINDOWS_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_WINDOWS_WINDOWS_FILE_SYSTEM_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/file_system.h"
 
@@ -69,7 +70,7 @@ class WindowsFileSystem : public FileSystem {
 class LocalWinFileSystem : public WindowsFileSystem {
  public:
   string TranslateName(const string& name) const override {
-    StringPiece scheme, host, path;
+    absl::string_view scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     return string(path);
   }
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index 8dcfde9a2a..c0afedabf9 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -254,6 +254,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:tfprof_options",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc
index 7712ebd926..6ae1c1533a 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.cc
+++ b/tensorflow/core/profiler/internal/tfprof_utils.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <set>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -91,12 +92,12 @@ tensorflow::Status ReturnError(const std::vector<string>& pieces, int idx) {
       strings::StrCat("Invalid option '", pieces[idx], "' value: '", val, "'"));
 }
 
-bool CaseEqual(StringPiece s1, StringPiece s2) {
+bool CaseEqual(absl::string_view s1, absl::string_view s2) {
   if (s1.size() != s2.size()) return false;
   return str_util::Lowercase(s1) == str_util::Lowercase(s2);
 }
 
-bool StringToBool(StringPiece str, bool* value) {
+bool StringToBool(absl::string_view str, bool* value) {
   CHECK(value != nullptr) << "NULL output boolean given.";
   if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") ||
       CaseEqual(str, "y") || CaseEqual(str, "1")) {
diff --git a/tensorflow/core/util/command_line_flags.cc b/tensorflow/core/util/command_line_flags.cc
index f1196fdfec..e253b3fae1 100644
--- a/tensorflow/core/util/command_line_flags.cc
+++ b/tensorflow/core/util/command_line_flags.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -26,7 +26,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-bool ParseStringFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+bool ParseStringFlag(absl::string_view arg, absl::string_view flag,
                      const std::function<bool(string)>& hook,
                      bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -40,7 +40,7 @@ bool ParseStringFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
-bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+bool ParseInt32Flag(absl::string_view arg, absl::string_view flag,
                     const std::function<bool(int32)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -62,7 +62,7 @@ bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
-bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+bool ParseInt64Flag(absl::string_view arg, absl::string_view flag,
                     const std::function<bool(int64)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -84,7 +84,7 @@ bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
-bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+bool ParseBoolFlag(absl::string_view arg, absl::string_view flag,
                    const std::function<bool(bool)>& hook,
                    bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -112,7 +112,7 @@ bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
   return false;
 }
 
-bool ParseFloatFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+bool ParseFloatFlag(absl::string_view arg, absl::string_view flag,
                     const std::function<bool(float)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc
index 8c24076aa9..33167d7662 100644
--- a/tensorflow/core/util/device_name_utils.cc
+++ b/tensorflow/core/util/device_name_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/util/device_name_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -29,7 +30,7 @@ static bool IsAlpha(char c) {
 static bool IsAlphaNum(char c) { return IsAlpha(c) || (c >= '0' && c <= '9'); }
 
 // Returns true iff "in" is a valid job name.
-static bool IsJobName(StringPiece in) {
+static bool IsJobName(absl::string_view in) {
   if (in.empty()) return false;
   if (!IsAlpha(in[0])) return false;
   for (size_t i = 1; i < in.size(); ++i) {
@@ -39,7 +40,7 @@ static bool IsJobName(StringPiece in) {
 }
 
 // Returns true and fills in "*job" iff "*in" starts with a job name.
-static bool ConsumeJobName(StringPiece* in, string* job) {
+static bool ConsumeJobName(absl::string_view* in, string* job) {
   if (in->empty()) return false;
   if (!IsAlpha((*in)[0])) return false;
   size_t i = 1;
@@ -57,7 +58,7 @@ static bool ConsumeJobName(StringPiece* in, string* job) {
 
 // Returns true and fills in "*device_type" iff "*in" starts with a device type
 // name.
-static bool ConsumeDeviceType(StringPiece* in, string* device_type) {
+static bool ConsumeDeviceType(absl::string_view* in, string* device_type) {
   if (in->empty()) return false;
   if (!IsAlpha((*in)[0])) return false;
   size_t i = 1;
@@ -75,7 +76,7 @@ static bool ConsumeDeviceType(StringPiece* in, string* device_type) {
 
 // Returns true and fills in "*val" iff "*in" starts with a decimal
 // number.
-static bool ConsumeNumber(StringPiece* in, int* val) {
+static bool ConsumeNumber(absl::string_view* in, int* val) {
   uint64 tmp;
   if (str_util::ConsumeLeadingDigits(in, &tmp)) {
     *val = tmp;
@@ -111,7 +112,7 @@ string LegacyName(const string& job, int replica, int task, const string& type,
 }
 }  // anonymous namespace
 
-bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) {
+bool DeviceNameUtils::ParseFullName(absl::string_view fullname, ParsedName* p) {
   p->Clear();
   if (fullname == "/") {
     return true;
@@ -213,8 +214,8 @@ void CompleteName(const DeviceNameUtils::ParsedName& parsed_basename,
 }  // namespace
 
 /* static */
-Status DeviceNameUtils::CanonicalizeDeviceName(StringPiece fullname,
-                                               StringPiece basename,
+Status DeviceNameUtils::CanonicalizeDeviceName(absl::string_view fullname,
+                                               absl::string_view basename,
                                                string* canonical_name) {
   *canonical_name = "";
   ParsedName parsed_basename;
@@ -390,7 +391,8 @@ bool DeviceNameUtils::IsSameAddressSpace(const ParsedName& a,
 }
 
 /* static */
-bool DeviceNameUtils::IsSameAddressSpace(StringPiece src, StringPiece dst) {
+bool DeviceNameUtils::IsSameAddressSpace(absl::string_view src,
+                                         absl::string_view dst) {
   ParsedName x;
   ParsedName y;
   return ParseFullName(src, &x) && ParseFullName(dst, &y) &&
@@ -398,27 +400,27 @@ bool DeviceNameUtils::IsSameAddressSpace(StringPiece src, StringPiece dst) {
 }
 
 /* static */
-string DeviceNameUtils::LocalName(StringPiece type, int id) {
+string DeviceNameUtils::LocalName(absl::string_view type, int id) {
   return strings::StrCat("/device:", type, ":", id);
 }
 
 namespace {
 // Returns the legacy local device name given its "type" and "id" (which is
 // '/device:type:id').
-string LegacyLocalName(StringPiece type, int id) {
+string LegacyLocalName(absl::string_view type, int id) {
   return strings::StrCat(type, ":", id);
 }
 }  // anonymous namespace
 
 /* static */
-string DeviceNameUtils::LocalName(StringPiece fullname) {
+string DeviceNameUtils::LocalName(absl::string_view fullname) {
   ParsedName x;
   CHECK(ParseFullName(fullname, &x)) << fullname;
   return LocalName(x.type, x.id);
 }
 
 /* static */
-bool DeviceNameUtils::ParseLocalName(StringPiece name, ParsedName* p) {
+bool DeviceNameUtils::ParseLocalName(absl::string_view name, ParsedName* p) {
   if (!ConsumeDeviceType(&name, &p->type)) {
     return false;
   }
@@ -434,7 +436,7 @@ bool DeviceNameUtils::ParseLocalName(StringPiece name, ParsedName* p) {
 }
 
 /* static */
-bool DeviceNameUtils::SplitDeviceName(StringPiece name, string* task,
+bool DeviceNameUtils::SplitDeviceName(absl::string_view name, string* task,
                                       string* device) {
   ParsedName pn;
   if (ParseFullName(name, &pn) && pn.has_type && pn.has_id) {
diff --git a/tensorflow/core/util/device_name_utils.h b/tensorflow/core/util/device_name_utils.h
index 3f0bc60562..21074223a2 100644
--- a/tensorflow/core/util/device_name_utils.h
+++ b/tensorflow/core/util/device_name_utils.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
@@ -86,15 +86,15 @@ class DeviceNameUtils {
     int id = 0;
   };
   // Parses "fullname" into "*parsed". Returns true iff succeeds.
-  static bool ParseFullName(StringPiece fullname, ParsedName* parsed);
+  static bool ParseFullName(absl::string_view fullname, ParsedName* parsed);
 
   // Canonicalizes "fullname" into "*canonical_name". Uses a fully specified
   // basename to fill in fields that are missing. Accepts both legacy, newer
   // and local versions of the device spec. Returns the newer version of the
   // device spec. If we were unable to interpret / parse "fullname" returns
   // an error and *canonical_name is set to "".
-  static Status CanonicalizeDeviceName(StringPiece fullname,
-                                       StringPiece basename,
+  static Status CanonicalizeDeviceName(absl::string_view fullname,
+                                       absl::string_view basename,
                                        string* canonical_name);
 
   // Returns true if "name" specifies any non-trivial constraint on the device.
@@ -133,20 +133,20 @@ class DeviceNameUtils {
 
   // Returns true iff devices identified by 'src' and 'dst' are in the
   // same address space.
-  static bool IsSameAddressSpace(StringPiece src, StringPiece dst);
+  static bool IsSameAddressSpace(absl::string_view src, absl::string_view dst);
   static bool IsSameAddressSpace(const ParsedName& src, const ParsedName& dst);
 
   // Returns the local device given its "type" and "id".
-  static string LocalName(StringPiece type, int id);
+  static string LocalName(absl::string_view type, int id);
 
   // Returns a short local device name (cpu:0, gpu:1, etc) based on
   // the given fullname.
-  static string LocalName(StringPiece fullname);
+  static string LocalName(absl::string_view fullname);
 
   // If "name" is a valid local device name (cpu:0, gpu:1, etc.),
   // fills in parsed.type and parsed.id accordingly. Returns true iff
   // succeeds.
-  static bool ParseLocalName(StringPiece name, ParsedName* parsed);
+  static bool ParseLocalName(absl::string_view name, ParsedName* parsed);
 
   // Splits a fully-qualified device name into a task identifier and a
   // relative device identifier. It first parses "name" using
@@ -155,7 +155,8 @@ class DeviceNameUtils {
   // component into *device.  This function will still return true if
   // the task component is empty, but it requires the relative device
   // component to be fully specified.
-  static bool SplitDeviceName(StringPiece name, string* task, string* device);
+  static bool SplitDeviceName(absl::string_view name, string* task,
+                              string* device);
 
   static string ParsedNameToString(const ParsedName& pn);
 
diff --git a/tensorflow/core/util/device_name_utils_test.cc b/tensorflow/core/util/device_name_utils_test.cc
index dafb3b20b9..11c8171f27 100644
--- a/tensorflow/core/util/device_name_utils_test.cc
+++ b/tensorflow/core/util/device_name_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/util/device_name_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -278,7 +279,7 @@ TEST(DeviceNameUtilsTest, Basic) {
   }
 }
 
-static bool IsCSHelper(StringPiece pattern, StringPiece actual) {
+static bool IsCSHelper(absl::string_view pattern, absl::string_view actual) {
   DeviceNameUtils::ParsedName p, a;
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(pattern, &p));
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(actual, &a));
@@ -303,7 +304,7 @@ TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
       IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
 }
 
-static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
+static bool IsSpecHelper(absl::string_view pattern, absl::string_view actual) {
   DeviceNameUtils::ParsedName p, a;
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(pattern, &p));
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(actual, &a));
diff --git a/tensorflow/core/util/env_var.cc b/tensorflow/core/util/env_var.cc
index 2604a5d66a..1753d22417 100644
--- a/tensorflow/core/util/env_var.cc
+++ b/tensorflow/core/util/env_var.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <stdlib.h>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -25,7 +26,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
+Status ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val,
                           bool* value) {
   *value = default_val;
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
@@ -45,7 +46,7 @@ Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
       tf_env_var_val, ". Use the default value: ", default_val));
 }
 
-Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
+Status ReadInt64FromEnvVar(absl::string_view env_var_name, int64 default_val,
                            int64* value) {
   *value = default_val;
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
@@ -60,8 +61,8 @@ Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
       tf_env_var_val, ". Use the default value: ", default_val));
 }
 
-Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
-                            string* value) {
+Status ReadStringFromEnvVar(absl::string_view env_var_name,
+                            absl::string_view default_val, string* value) {
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
   if (tf_env_var_val != nullptr) {
     *value = tf_env_var_val;
diff --git a/tensorflow/core/util/env_var.h b/tensorflow/core/util/env_var.h
index 724ca35729..89c591844f 100644
--- a/tensorflow/core/util/env_var.h
+++ b/tensorflow/core/util/env_var.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_ENV_VAR_H_
 #define TENSORFLOW_CORE_UTIL_ENV_VAR_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -27,19 +27,19 @@ namespace tensorflow {
 // case insensitive "false" is interpreted as false. A string "1" or a case
 // insensitive "true" is interpreted as true. Otherwise, an error status is
 // returned.
-Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
+Status ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val,
                           bool* value);
 
 // Returns an int64 into "value" from the environmental variable "env_var_name".
 // If it is unset, the default value is used.
 // If the string cannot be parsed into int64, an error status is returned.
-Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
+Status ReadInt64FromEnvVar(absl::string_view env_var_name, int64 default_val,
                            int64* value);
 
 // Returns a string into "value" from the environmental variable "env_var_name".
 // If it is unset, the default value is used.
-Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
-                            string* value);
+Status ReadStringFromEnvVar(absl::string_view env_var_name,
+                            absl::string_view default_val, string* value);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/util/events_writer.cc b/tensorflow/core/util/events_writer.cc
index aaaba913a7..a0605a758d 100644
--- a/tensorflow/core/util/events_writer.cc
+++ b/tensorflow/core/util/events_writer.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <stddef.h>  // for NULL
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -102,7 +103,7 @@ string EventsWriter::FileName() {
   return filename_;
 }
 
-void EventsWriter::WriteSerializedEvent(StringPiece event_str) {
+void EventsWriter::WriteSerializedEvent(absl::string_view event_str) {
   if (recordio_writer_ == nullptr) {
     if (!InitIfNeeded().ok()) {
       LOG(ERROR) << "Write failed because file could not be opened.";
diff --git a/tensorflow/core/util/events_writer.h b/tensorflow/core/util/events_writer.h
index d5952c3cbd..9a60ba59fc 100644
--- a/tensorflow/core/util/events_writer.h
+++ b/tensorflow/core/util/events_writer.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/platform/env.h"
@@ -66,7 +67,7 @@ class EventsWriter {
   // Append "event_str", a serialized Event, to the file.
   // Note that this function does NOT check that de-serializing event_str
   // results in a valid Event proto.  The tensorflow:: bit makes SWIG happy.
-  void WriteSerializedEvent(tensorflow::StringPiece event_str);
+  void WriteSerializedEvent(absl::string_view event_str);
 
   // EventWriter automatically flushes and closes on destruction, but
   // these two methods are provided for users who want to write to disk sooner
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index e52d55e2ff..3f77843518 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb_text.h"
 #include "tensorflow/core/framework/numeric_op.h"
@@ -66,7 +67,7 @@ namespace parsed {
 class Feature {
  public:
   Feature() {}
-  explicit Feature(StringPiece serialized) : serialized_(serialized) {}
+  explicit Feature(absl::string_view serialized) : serialized_(serialized) {}
 
   Status ParseDataType(DataType* dtype) {
     DCHECK(dtype != nullptr);
@@ -223,14 +224,14 @@ class Feature {
     return true;
   }
 
-  StringPiece GetSerialized() const { return serialized_; }
+  absl::string_view GetSerialized() const { return serialized_; }
 
  private:
   // TODO(lew): Pair of uint8* would be more natural.
-  StringPiece serialized_;
+  absl::string_view serialized_;
 };
 
-using FeatureMapEntry = std::pair<StringPiece, Feature>;
+using FeatureMapEntry = std::pair<absl::string_view, Feature>;
 using Example = std::vector<FeatureMapEntry>;
 
 }  // namespace parsed
@@ -260,13 +261,14 @@ inline bool SkipExtraneousTag(protobuf::io::CodedInputStream* stream) {
   return false;  // unrecognized tag type
 }
 
-bool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) {
+bool ParseString(protobuf::io::CodedInputStream* stream,
+                 absl::string_view* result) {
   DCHECK(stream != nullptr);
   DCHECK(result != nullptr);
   uint32 length;
   if (!stream->ReadVarint32(&length)) return false;
   if (length == 0) {
-    *result = StringPiece(nullptr, 0);
+    *result = absl::string_view(nullptr, 0);
     return true;
   }
   const void* stream_alias;
@@ -275,7 +277,7 @@ bool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) {
     return false;
   }
   if (static_cast<uint32>(stream_size) < length) return false;
-  *result = StringPiece(static_cast<const char*>(stream_alias), length);
+  *result = absl::string_view(static_cast<const char*>(stream_alias), length);
   stream->Skip(length);
   return true;
 }
@@ -290,7 +292,7 @@ bool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream,
   if (!stream->ExpectTag(kDelimitedTag(1))) return false;
   if (!ParseString(stream, &feature_map_entry->first)) return false;
   if (!stream->ExpectTag(kDelimitedTag(2))) return false;
-  StringPiece feature_string_piece;
+  absl::string_view feature_string_piece;
   if (!ParseString(stream, &feature_string_piece)) return false;
   feature_map_entry->second = parsed::Feature(feature_string_piece);
   if (!stream->ExpectAtEnd()) return false;
@@ -332,7 +334,7 @@ bool ParseExample(protobuf::io::CodedInputStream* stream,
   return true;
 }
 
-bool ParseExample(StringPiece serialized, parsed::Example* example) {
+bool ParseExample(absl::string_view serialized, parsed::Example* example) {
   DCHECK(example != nullptr);
   protobuf::io::CodedInputStream stream(
       reinterpret_cast<const uint8*>(serialized.data()), serialized.size());
@@ -439,7 +441,7 @@ struct SparseBuffer {
 };
 
 struct SeededHasher {
-  uint64 operator()(StringPiece s) const {
+  uint64 operator()(absl::string_view s) const {
     return Hash64(s.data(), s.size(), seed);
   }
   uint64 seed{0xDECAFCAFFE};
@@ -467,7 +469,7 @@ class LimitedArraySlice {
   T* end_;
 };
 
-void LogDenseFeatureDataLoss(StringPiece feature_name) {
+void LogDenseFeatureDataLoss(absl::string_view feature_name) {
   LOG(WARNING) << "Data loss! Feature '" << feature_name
                << "' is present in multiple concatenated "
                   "tf.Examples. Ignoring all but last one.";
@@ -478,7 +480,7 @@ void LogDenseFeatureDataLoss(StringPiece feature_name) {
   duplicated_dense_feature->GetCell()->IncrementBy(1);
 }
 
-void LogSparseFeatureDataLoss(StringPiece feature_name) {
+void LogSparseFeatureDataLoss(absl::string_view feature_name) {
   LOG(WARNING) << "Data loss! Feature '" << feature_name
                << "' is present in multiple concatenated "
                   "tf.Examples. Ignoring all but last one.";
@@ -523,7 +525,7 @@ Status FastParseSerializedExample(
     parsed::FeatureMapEntry& name_and_feature =
         parsed_example[parsed_example_size - i - 1];
 
-    const StringPiece feature_name = name_and_feature.first;
+    const absl::string_view feature_name = name_and_feature.first;
     parsed::Feature& feature = name_and_feature.second;
 
     std::pair<size_t, Type> d_and_type;
@@ -542,7 +544,7 @@ Status FastParseSerializedExample(
       if (feature_name != config_feature_name) continue;
     }
 
-    auto example_error = [&](StringPiece suffix) {
+    auto example_error = [&](absl::string_view suffix) {
       return errors::InvalidArgument("Name: ", example_name,
                                      ", Key: ", feature_name,
                                      ", Index: ", example_index, ".  ", suffix);
@@ -585,7 +587,7 @@ Status FastParseSerializedExample(
 
         const std::size_t offset = example_index * num_elements;
 
-        auto shape_error = [&](size_t size, StringPiece type_str) {
+        auto shape_error = [&](size_t size, absl::string_view type_str) {
           return example_error(strings::StrCat(
               "Number of ", type_str,
               " values != expected.  "
@@ -637,7 +639,7 @@ Status FastParseSerializedExample(
               "Expected type: ", DataTypeString(config.dense[d].dtype)));
         }
 
-        auto shape_error = [&](size_t size, StringPiece type_str) {
+        auto shape_error = [&](size_t size, absl::string_view type_str) {
           return example_error(strings::StrCat(
               "Number of ", type_str,
               " values is not a multiple of stride length. Saw ", size,
@@ -1275,7 +1277,7 @@ Status FastParseSingleExample(const Config& config, const string& serialized,
     parsed::FeatureMapEntry& name_and_feature =
         parsed_example[parsed_example_size - i - 1];
 
-    const StringPiece feature_name = name_and_feature.first;
+    const absl::string_view feature_name = name_and_feature.first;
     parsed::Feature& feature = name_and_feature.second;
 
     std::pair<size_t, Type> d_and_type;
@@ -1294,7 +1296,7 @@ Status FastParseSingleExample(const Config& config, const string& serialized,
       if (feature_name != config_feature_name) continue;
     }
 
-    auto example_error = [feature_name](StringPiece suffix) {
+    auto example_error = [feature_name](absl::string_view suffix) {
       return errors::InvalidArgument("Key: ", feature_name, ".  ", suffix);
     };
 
@@ -1727,8 +1729,8 @@ Status FastParseSequenceExample(
   DCHECK(context_result != nullptr);
   DCHECK(feature_list_result != nullptr);
   DCHECK(dense_feature_lengths != nullptr);
-  std::map<StringPiece, bool> context_is_sparse;
-  std::map<StringPiece, std::pair<DataType, size_t>>
+  std::map<absl::string_view, bool> context_is_sparse;
+  std::map<absl::string_view, std::pair<DataType, size_t>>
       context_feature_type_and_lengths;
   if (!example_names.empty() && example_names.size() != num_examples) {
     return errors::InvalidArgument(
@@ -1759,8 +1761,8 @@ Status FastParseSequenceExample(
     }
     context_is_sparse[c.feature_name] = false;
   }
-  std::map<StringPiece, bool> sequence_is_sparse;
-  std::map<StringPiece, std::pair<DataType, size_t>>
+  std::map<absl::string_view, bool> sequence_is_sparse;
+  std::map<absl::string_view, std::pair<DataType, size_t>>
       sequence_feature_type_and_lengths;
   for (auto& c : feature_list_config.sparse) {
     TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
@@ -1779,10 +1781,10 @@ Status FastParseSequenceExample(
     sequence_is_sparse[c.feature_name] = false;
   }
 
-  std::vector<std::map<StringPiece, StringPiece>> all_context_features(
-      num_examples);
-  std::vector<std::map<StringPiece, StringPiece>> all_sequence_features(
-      num_examples);
+  std::vector<std::map<absl::string_view, absl::string_view>>
+      all_context_features(num_examples);
+  std::vector<std::map<absl::string_view, absl::string_view>>
+      all_sequence_features(num_examples);
   const string kUnknown = "<unknown>";
   for (int d = 0; d < num_examples; d++) {
     const string& example = serialized[d];
@@ -1798,8 +1800,8 @@ Status FastParseSequenceExample(
 
     // Extract pointers to all features within this serialized example.
     while (!stream.ExpectAtEnd()) {
-      std::map<StringPiece, StringPiece>* features = nullptr;
-      const std::map<StringPiece, std::pair<DataType, size_t>>* config =
+      std::map<absl::string_view, absl::string_view>* features = nullptr;
+      const std::map<absl::string_view, std::pair<DataType, size_t>>* config =
           nullptr;
       if (stream.ExpectTag(kDelimitedTag(1))) {
         // Context
@@ -1821,7 +1823,7 @@ Status FastParseSequenceExample(
         }
         auto limit = stream.PushLimit(length);
         while (!stream.ExpectAtEnd()) {
-          StringPiece key, value;
+          absl::string_view key, value;
           uint32 length;
           if (!stream.ExpectTag(kDelimitedTag(1)) ||
               !stream.ReadVarint32(&length)) {
diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc
index d3439cbc93..35f0062556 100644
--- a/tensorflow/core/util/memmapped_file_system.cc
+++ b/tensorflow/core/util/memmapped_file_system.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/util/memmapped_file_system.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -56,16 +57,16 @@ class RandomAccessFileFromMemmapped : public RandomAccessFile {
 
   ~RandomAccessFileFromMemmapped() override = default;
 
-  Status Read(uint64 offset, size_t to_read, StringPiece* result,
+  Status Read(uint64 offset, size_t to_read, absl::string_view* result,
               char* scratch) const override {
     if (offset >= length_) {
-      *result = StringPiece(scratch, 0);
+      *result = absl::string_view(scratch, 0);
       return Status(error::OUT_OF_RANGE, "Read after file end");
     }
     const uint64 region_left =
         std::min(length_ - offset, static_cast<uint64>(to_read));
-    *result =
-        StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left);
+    *result = absl::string_view(reinterpret_cast<const char*>(data_) + offset,
+                                region_left);
     return (region_left == to_read)
                ? Status::OK()
                : Status(error::OUT_OF_RANGE, "Read less bytes than requested");
diff --git a/tensorflow/core/util/memmapped_file_system_test.cc b/tensorflow/core/util/memmapped_file_system_test.cc
index 504d2d353f..b5608ed6cc 100644
--- a/tensorflow/core/util/memmapped_file_system_test.cc
+++ b/tensorflow/core/util/memmapped_file_system_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/util/memmapped_file_system.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
@@ -86,8 +87,8 @@ TEST(MemmappedFileSystemTest, SimpleTest) {
   // The memory region can be bigger but not less than Tensor size.
   ASSERT_GE(memory_region->length(), test_tensor.TotalBytes());
   EXPECT_EQ(test_tensor.tensor_data(),
-            StringPiece(static_cast<const char*>(memory_region->data()),
-                        test_tensor.TotalBytes()));
+            absl::string_view(static_cast<const char*>(memory_region->data()),
+                              test_tensor.TotalBytes()));
   // Check that GetFileSize works.
   uint64 file_size = 0;
   TF_ASSERT_OK(memmapped_env.GetFileSize(kTensor2FileName, &file_size));
diff --git a/tensorflow/core/util/memmapped_file_system_writer.cc b/tensorflow/core/util/memmapped_file_system_writer.cc
index 9556ee385f..a312a0d4d3 100644
--- a/tensorflow/core/util/memmapped_file_system_writer.cc
+++ b/tensorflow/core/util/memmapped_file_system_writer.cc
@@ -15,6 +15,7 @@ limitations under the License.
 #include "tensorflow/core/util/memmapped_file_system_writer.h"
 
 #include <algorithm>
+#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
@@ -80,7 +81,7 @@ Status MemmappedFileSystemWriter::SaveProtobuf(
 
 namespace {
 
-StringPiece EncodeUint64LittleEndian(uint64 val, char* output_buffer) {
+absl::string_view EncodeUint64LittleEndian(uint64 val, char* output_buffer) {
   for (unsigned int i = 0; i < sizeof(uint64); ++i) {
     output_buffer[i] = (val >> i * 8);
   }
@@ -116,7 +117,7 @@ Status MemmappedFileSystemWriter::AdjustAlignment(uint64 alignment) {
   static constexpr uint64 kFillerBufferSize = 16;
   const char kFillerBuffer[kFillerBufferSize] = {};
   for (uint64 rest = to_write_for_alignment; rest > 0;) {
-    StringPiece sp(kFillerBuffer, std::min(rest, kFillerBufferSize));
+    absl::string_view sp(kFillerBuffer, std::min(rest, kFillerBufferSize));
     TF_RETURN_IF_ERROR(output_file_->Append(sp));
     rest -= sp.size();
     output_file_offset_ += sp.size();
diff --git a/tensorflow/core/util/mirror_pad_mode.cc b/tensorflow/core/util/mirror_pad_mode.cc
index 433d8aad55..629a0b2de7 100644
--- a/tensorflow/core/util/mirror_pad_mode.cc
+++ b/tensorflow/core/util/mirror_pad_mode.cc
@@ -15,13 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/util/mirror_pad_mode.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
-Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
+Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
                    MirrorPadMode* value) {
   string str_value;
   TF_RETURN_IF_ERROR(GetNodeAttr(node_def, attr_name, &str_value));
diff --git a/tensorflow/core/util/mirror_pad_mode.h b/tensorflow/core/util/mirror_pad_mode.h
index ceee9b06b0..026fd62503 100644
--- a/tensorflow/core/util/mirror_pad_mode.h
+++ b/tensorflow/core/util/mirror_pad_mode.h
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -44,7 +45,7 @@ string GetMirrorPadModeAttrString();
 class NodeDef;
 
 // Specialization to parse an attribute directly into a MirrorPadMode enum.
-Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
+Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
                    MirrorPadMode* value);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/util/padding.cc b/tensorflow/core/util/padding.cc
index 117de5ee4b..1941d394c3 100644
--- a/tensorflow/core/util/padding.cc
+++ b/tensorflow/core/util/padding.cc
@@ -15,13 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/util/padding.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
-Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
+Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
                    Padding* value) {
   string str_value;
   TF_RETURN_IF_ERROR(GetNodeAttr(node_def, attr_name, &str_value));
diff --git a/tensorflow/core/util/padding.h b/tensorflow/core/util/padding.h
index 76f9b4dd9a..4f94a8c4e4 100644
--- a/tensorflow/core/util/padding.h
+++ b/tensorflow/core/util/padding.h
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -45,7 +46,7 @@ enum Padding {
 string GetPaddingAttrString();
 
 // Specialization to parse an attribute directly into a Padding enum.
-Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
+Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
                    Padding* value);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/util/reporter_test.cc b/tensorflow/core/util/reporter_test.cc
index 0972b86ea5..3d026f9238 100644
--- a/tensorflow/core/util/reporter_test.cc
+++ b/tensorflow/core/util/reporter_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/util/reporter.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -28,7 +29,7 @@ namespace tensorflow {
 namespace {
 
 // Tests of all the error paths in log_reader.cc follow:
-static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
+static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << s << " does not contain " << expected;
 }
diff --git a/tensorflow/core/util/saved_tensor_slice_util.cc b/tensorflow/core/util/saved_tensor_slice_util.cc
index 2040eac7e5..9b753fca6e 100644
--- a/tensorflow/core/util/saved_tensor_slice_util.cc
+++ b/tensorflow/core/util/saved_tensor_slice_util.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/ordered_code.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -44,7 +45,7 @@ string EncodeTensorNameSlice(const string& name, const TensorSlice& slice) {
 
 Status DecodeTensorNameSlice(const string& code, string* name,
                              tensorflow::TensorSlice* slice) {
-  StringPiece src(code);
+  absl::string_view src(code);
   uint64 x;
   if (!tensorflow::strings::OrderedCode::ReadNumIncreasing(&src, &x)) {
     return errors::Internal("Failed to parse the leading number: src = ", src);
diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc
index fdc34fa58b..667e746444 100644
--- a/tensorflow/core/util/semver_test.cc
+++ b/tensorflow/core/util/semver_test.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/public/version.h"
 
 #include <string>
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -32,14 +32,14 @@ bool IsDotOrIdentifierChar(char c) {
   return false;
 }
 
-bool ConsumeDotSeparatedIdentifiers(StringPiece* s, const string& prefix,
-                                    StringPiece* val) {
+bool ConsumeDotSeparatedIdentifiers(absl::string_view* s, const string& prefix,
+                                    absl::string_view* val) {
   if (!str_util::ConsumePrefix(s, prefix)) return false;
   size_t i;
   for (i = 0; i < s->size() && IsDotOrIdentifierChar((*s)[i]); ++i) {
     // Intentionally empty
   }
-  *val = StringPiece(s->data(), i);
+  *val = absl::string_view(s->data(), i);
   s->remove_prefix(i);
   return i > 0;
 }
@@ -50,8 +50,8 @@ TEST(SemverTest, VersionStringFollowsSemver) {
   // free to refine further (for example, check for leading 0s in numbers), but
   // avoid adding dependencies.
   uint64 major, minor, patch;
-  StringPiece prerelease, metadata;
-  StringPiece semver(TF_VERSION_STRING);
+  absl::string_view prerelease, metadata;
+  absl::string_view semver(TF_VERSION_STRING);
 
   ASSERT_TRUE(str_util::ConsumeLeadingDigits(&semver, &major));
   ASSERT_TRUE(str_util::ConsumePrefix(&semver, "."));
diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD
index f40ec9b752..dfb6492f05 100644
--- a/tensorflow/core/util/tensor_bundle/BUILD
+++ b/tensorflow/core/util/tensor_bundle/BUILD
@@ -45,6 +45,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -58,7 +59,10 @@ cc_library(
     name = "naming",
     srcs = ["naming.cc"],
     hdrs = ["naming.h"],
-    deps = ["//tensorflow/core:lib"],
+    deps = [
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/strings",
+    ],
 )
 
 tf_cc_test(
@@ -78,5 +82,6 @@ tf_cc_test(
         "//tensorflow/core:tensor_testutil",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/util/tensor_bundle/naming.cc b/tensorflow/core/util/tensor_bundle/naming.cc
index db3d7ec3ac..fa6ce785b9 100644
--- a/tensorflow/core/util/tensor_bundle/naming.cc
+++ b/tensorflow/core/util/tensor_bundle/naming.cc
@@ -15,17 +15,19 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_bundle/naming.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-string MetaFilename(StringPiece prefix) {
+string MetaFilename(absl::string_view prefix) {
   return strings::Printf("%.*s.index", static_cast<int>(prefix.size()),
                          prefix.data());
 }
 
-string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) {
+string DataFilename(absl::string_view prefix, int32 shard_id,
+                    int32 num_shards) {
   DCHECK_GT(num_shards, 0);
   DCHECK_LT(shard_id, num_shards);
   return strings::Printf("%.*s.data-%05d-of-%05d",
diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h
index 7b101971a8..53c44ec375 100644
--- a/tensorflow/core/util/tensor_bundle/naming.h
+++ b/tensorflow/core/util/tensor_bundle/naming.h
@@ -34,13 +34,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_
 #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
-string MetaFilename(StringPiece prefix);
-string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards);
+string MetaFilename(absl::string_view prefix);
+string DataFilename(absl::string_view prefix, int32 shard_id, int32 num_shards);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
index 2dcb57a1f9..5a14ac5f60 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb_text.h"
@@ -197,7 +198,7 @@ string* GetStringBackingBuffer(const Tensor& val) {
   return const_cast<string*>(val.flat<string>().data());
 }
 
-Status ParseEntryProto(StringPiece key, StringPiece value,
+Status ParseEntryProto(absl::string_view key, absl::string_view value,
                        protobuf::MessageLite* out) {
   if (!out->ParseFromArray(value.data(), value.size())) {
     return errors::DataLoss("Entry for key ", key, " not parseable.");
@@ -216,7 +217,7 @@ Status WriteTensor(const Tensor& val, FileOutputBuffer* out,
   *bytes_written = val.TotalBytes();
   char* buf = GetBackingBuffer(val);
   VLOG(1) << "Appending " << *bytes_written << " bytes to file";
-  return out->Append(StringPiece(buf, *bytes_written));
+  return out->Append(absl::string_view(buf, *bytes_written));
 }
 
 // Serializes string tensor "val".  "bytes_written" is treated in the same
@@ -260,7 +261,7 @@ Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out,
 
   // Writes the length checksum.
   const uint32 length_checksum = crc32c::Mask(*crc32c);
-  TF_RETURN_IF_ERROR(out->Append(StringPiece(
+  TF_RETURN_IF_ERROR(out->Append(absl::string_view(
       reinterpret_cast<const char*>(&length_checksum), sizeof(uint32))));
   *crc32c = crc32c::Extend(
       *crc32c, reinterpret_cast<const char*>(&length_checksum), sizeof(uint32));
@@ -313,7 +314,7 @@ Status WriteVariantTensor(const Tensor& val, FileOutputBuffer* out,
 
     // Write the checksum.
     const uint32 length_checksum = crc32c::Mask(*crc32c);
-    TF_RETURN_IF_ERROR(out->Append(StringPiece(
+    TF_RETURN_IF_ERROR(out->Append(absl::string_view(
         reinterpret_cast<const char*>(&length_checksum), sizeof(uint32))));
     *crc32c =
         crc32c::Extend(*crc32c, reinterpret_cast<const char*>(&length_checksum),
@@ -385,7 +386,8 @@ Status PadAlignment(FileOutputBuffer* out, int alignment, int64* size) {
 
 }  // namespace
 
-BundleWriter::BundleWriter(Env* env, StringPiece prefix, const Options& options)
+BundleWriter::BundleWriter(Env* env, absl::string_view prefix,
+                           const Options& options)
     : env_(env),
       options_(options),
       prefix_(prefix),
@@ -409,7 +411,7 @@ BundleWriter::BundleWriter(Env* env, StringPiece prefix, const Options& options)
   VLOG(1) << "Writing to file " << tmp_data_path_;
 }
 
-Status BundleWriter::Add(StringPiece key, const Tensor& val) {
+Status BundleWriter::Add(absl::string_view key, const Tensor& val) {
   if (!status_.ok()) return status_;
   CHECK_NE(key, kHeaderEntryKey);
   const string key_string(key);
@@ -446,7 +448,7 @@ Status BundleWriter::Add(StringPiece key, const Tensor& val) {
   return status_;
 }
 
-Status BundleWriter::AddSlice(StringPiece full_tensor_key,
+Status BundleWriter::AddSlice(absl::string_view full_tensor_key,
                               const TensorShape& full_tensor_shape,
                               const TensorSlice& slice_spec,
                               const Tensor& slice_tensor) {
@@ -563,7 +565,7 @@ struct MergeState {
 
 // Merges entries of "prefix" into the accumulator state "merge".
 // Returns OK iff the merge succeeds.
-static Status MergeOneBundle(Env* env, StringPiece prefix,
+static Status MergeOneBundle(Env* env, absl::string_view prefix,
                              MergeState* merge_state) {
   VLOG(1) << "Merging bundle:" << prefix;
   const string filename = MetaFilename(prefix);
@@ -663,7 +665,7 @@ static Status MergeOneBundle(Env* env, StringPiece prefix,
 }
 
 Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
-                    StringPiece merged_prefix) {
+                    absl::string_view merged_prefix) {
   // Merges all metadata tables.
   // TODO(zhifengc): KeyValue sorter if it becomes too big.
   MergeState merge;
@@ -713,7 +715,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
 
 // Interface for reading a tensor bundle.
 
-BundleReader::BundleReader(Env* env, StringPiece prefix)
+BundleReader::BundleReader(Env* env, absl::string_view prefix)
     : env_(env),
       prefix_(prefix),
       metadata_(nullptr),
@@ -772,7 +774,7 @@ BundleReader::~BundleReader() {
   gtl::STLDeleteValues(&tensor_slices_);
 }
 
-Status BundleReader::GetBundleEntryProto(StringPiece key,
+Status BundleReader::GetBundleEntryProto(absl::string_view key,
                                          BundleEntryProto* entry) {
   entry->Clear();
   TF_CHECK_OK(status_);
@@ -841,7 +843,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
     char* backing_buffer = const_cast<char*>((ret->tensor_data().data()));
     size_t unused_bytes_read;
     if (entry.size() > kBufferSize) {
-      StringPiece sp;
+      absl::string_view sp;
       TF_RETURN_IF_ERROR(buffered_file->file()->Read(
           entry.offset(), entry.size(), &sp, backing_buffer));
       if (sp.data() != backing_buffer) {
@@ -876,7 +878,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
   return Status::OK();
 }
 
-Status BundleReader::Lookup(StringPiece key, Tensor* val) {
+Status BundleReader::Lookup(absl::string_view key, Tensor* val) {
   CHECK(val != nullptr);
   BundleEntryProto entry;
   TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry));
@@ -908,7 +910,7 @@ Status BundleReader::ReadCurrent(Tensor* val) {
   }
 }
 
-Status BundleReader::LookupTensorSlices(StringPiece key,
+Status BundleReader::LookupTensorSlices(absl::string_view key,
                                         std::vector<TensorSlice>* slices) {
   slices->clear();
   BundleEntryProto entry;
@@ -920,7 +922,7 @@ Status BundleReader::LookupTensorSlices(StringPiece key,
   return Status::OK();
 }
 
-Status BundleReader::LookupSlice(StringPiece full_tensor_key,
+Status BundleReader::LookupSlice(absl::string_view full_tensor_key,
                                  const TensorSlice& slice_spec, Tensor* val) {
   CHECK(val != nullptr);
   BundleEntryProto entry;
@@ -928,7 +930,7 @@ Status BundleReader::LookupSlice(StringPiece full_tensor_key,
   return GetSliceValue(full_tensor_key, entry, slice_spec, val);
 }
 
-Status BundleReader::GetSliceValue(StringPiece full_tensor_key,
+Status BundleReader::GetSliceValue(absl::string_view full_tensor_key,
                                    const BundleEntryProto& full_tensor_entry,
                                    const TensorSlice& slice_spec, Tensor* val) {
   using checkpoint::RegisterTensorSlice;
@@ -1042,12 +1044,12 @@ Status BundleReader::GetSliceValue(StringPiece full_tensor_key,
   return Status::OK();
 }
 
-bool BundleReader::Contains(StringPiece key) {
+bool BundleReader::Contains(absl::string_view key) {
   Seek(key);
   return Valid() && (this->key() == key);
 }
 
-Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype,
+Status BundleReader::LookupDtypeAndShape(absl::string_view key, DataType* dtype,
                                          TensorShape* shape) {
   BundleEntryProto entry;
   TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry));
@@ -1056,7 +1058,8 @@ Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype,
   return Status::OK();
 }
 
-Status BundleReader::LookupTensorShape(StringPiece key, TensorShape* shape) {
+Status BundleReader::LookupTensorShape(absl::string_view key,
+                                       TensorShape* shape) {
   DataType ignored;
   return LookupDtypeAndShape(key, &ignored, shape);
 }
@@ -1080,7 +1083,7 @@ string BundleReader::DebugString() {
 
 FileOutputBuffer::~FileOutputBuffer() { delete file_; }
 
-Status FileOutputBuffer::Append(StringPiece data) {
+Status FileOutputBuffer::Append(absl::string_view data) {
   // In the below, it is critical to calculate the checksum on the actually
   // copied bytes, not the source bytes.  This is because "data" typically
   // points to tensor buffers, which may be concurrently written.
@@ -1117,7 +1120,8 @@ Status FileOutputBuffer::Close() {
 
 Status FileOutputBuffer::FlushBuffer() {
   if (position_ > 0) {
-    TF_RETURN_IF_ERROR(file_->Append(StringPiece(&buffer_[0], position_)));
+    TF_RETURN_IF_ERROR(
+        file_->Append(absl::string_view(&buffer_[0], position_)));
     position_ = 0;
   }
   return Status::OK();
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
index 3a2ffbb495..f9a628fc33 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
@@ -61,6 +61,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_
 #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/protobuf/tensor_bundle.pb.h"
 
 #include <map>
@@ -113,12 +114,12 @@ class BundleWriter {
     // Must be >= 1. The default size of 1 densely packs tensors.
     int data_alignment{1};
   };
-  BundleWriter(Env* env, StringPiece prefix,
+  BundleWriter(Env* env, absl::string_view prefix,
                const Options& options = Options());
 
   // Adds the tensor "val" under key "key".
   // Across calls "key" must be unique but can be added in any order.
-  Status Add(StringPiece key, const Tensor& val);
+  Status Add(absl::string_view key, const Tensor& val);
 
   // Partitioned variables support.
   // A slice of a full tensor is stored in two entries in the metadata table:
@@ -136,7 +137,7 @@ class BundleWriter {
   // consistent entry for "full_tensor_key" is produced.
   //
   // Returns an error if the same slice is added the second time.
-  Status AddSlice(StringPiece full_tensor_key,
+  Status AddSlice(absl::string_view full_tensor_key,
                   const TensorShape& full_tensor_shape,
                   const TensorSlice& slice_spec, const Tensor& slice_tensor);
 
@@ -173,7 +174,7 @@ class BundleWriter {
 // Once merged, makes a best effort to delete the old metadata files.
 // Returns OK iff all bundles are successfully merged.
 Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
-                    StringPiece merged_prefix);
+                    absl::string_view merged_prefix);
 
 // On construction, silently attempts to read the metadata associated with
 // "prefix".  If caller intends to call any function afterwards, "status()"
@@ -181,7 +182,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
 // All threads accessing the same BundleReader must synchronize.
 class BundleReader {
  public:
-  BundleReader(Env* const env, StringPiece prefix);
+  BundleReader(Env* const env, absl::string_view prefix);
   ~BundleReader();
 
   // Is ok() iff the reader construction is successful (completed the read of
@@ -191,17 +192,17 @@ class BundleReader {
   // Queries whether the bundle contains an entry keyed by "key".  Calls Seek()
   // internally, so this call invalidates the reader's current position.
   // REQUIRES: status().ok()
-  bool Contains(StringPiece key);
+  bool Contains(absl::string_view key);
 
   // Looks up the dtype and the shape of the tensor keyed by "key".
   // REQUIRES: status().ok()
-  Status LookupDtypeAndShape(StringPiece key, DataType* dtype,
+  Status LookupDtypeAndShape(absl::string_view key, DataType* dtype,
                              TensorShape* shape) TF_MUST_USE_RESULT;
 
   // Looks up the shape of the tensor keyed by "key".
   // Clears "shape" if not found.
   // REQUIRES: status().ok()
-  Status LookupTensorShape(StringPiece key,
+  Status LookupTensorShape(absl::string_view key,
                            TensorShape* shape) TF_MUST_USE_RESULT;
 
   // Looks up the tensor keyed by "key".  If "key" refers to a partitioned
@@ -216,7 +217,7 @@ class BundleReader {
   //
   // Validates the stored crc32c checksum against the restored bytes.
   // REQUIRES: status().ok()
-  Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT;
+  Status Lookup(absl::string_view key, Tensor* val) TF_MUST_USE_RESULT;
 
   // Looks up the tensor pointed to by the internal iterator.
   //
@@ -233,19 +234,21 @@ class BundleReader {
   // a slice with a larger start index in some dimension could come before
   // another slice with a smaller start index in the same dimension.
   // REQUIRES: status().ok()
-  Status LookupTensorSlices(StringPiece key, std::vector<TensorSlice>* slices)
+  Status LookupTensorSlices(absl::string_view key,
+                            std::vector<TensorSlice>* slices)
       TF_MUST_USE_RESULT;
 
   // Looks up a specific slice of a partitioned tensor.
   // It is only required that the stored slices cover the requested slice,
   // namely "slice_spec" is a subset of the union of the stored slices.
   // REQUIRES: status().ok()
-  Status LookupSlice(StringPiece full_tensor_key, const TensorSlice& slice_spec,
+  Status LookupSlice(absl::string_view full_tensor_key,
+                     const TensorSlice& slice_spec,
                      Tensor* val) TF_MUST_USE_RESULT;
 
   // Seeks to the first position in the bundle whose key is no less than "key".
   // REQUIRES: status().ok()
-  void Seek(StringPiece key) { return iter_->Seek(key); }
+  void Seek(absl::string_view key) { return iter_->Seek(key); }
   // Moves to the next position in the bundle.
   // REQUIRES: status().ok()
   void Next() const { iter_->Next(); }
@@ -255,10 +258,10 @@ class BundleReader {
 
   // Returns the key at the current position.
   // REQUIRES: status().ok() && Valid()
-  StringPiece key() const { return iter_->key(); }
+  absl::string_view key() const { return iter_->key(); }
   // Returns the raw value at the current position.
   // REQUIRES: status().ok() && Valid()
-  StringPiece value() const { return iter_->value(); }
+  absl::string_view value() const { return iter_->value(); }
 
   string DebugString();
 
@@ -266,7 +269,7 @@ class BundleReader {
   // Seeks for "key" and reads the metadata proto.
   // On non-OK return, clears "entry" for the caller.
   // REQUIRES: status().ok()
-  Status GetBundleEntryProto(StringPiece key,
+  Status GetBundleEntryProto(absl::string_view key,
                              BundleEntryProto* entry) TF_MUST_USE_RESULT;
 
   // Reads the tensor value described by the metadata proto "entry".
@@ -277,7 +280,7 @@ class BundleReader {
   // Reads the slice described by "slice_spec".  The corresponding full tensor
   // has key "ful_tensor_key" and metadata proto "full_tensor_entry".
   // REQUIRES: full_tensor_entry.slices_size() > 0
-  Status GetSliceValue(StringPiece full_tensor_key,
+  Status GetSliceValue(absl::string_view full_tensor_key,
                        const BundleEntryProto& full_tensor_entry,
                        const TensorSlice& slice_spec,
                        Tensor* val) TF_MUST_USE_RESULT;
@@ -318,7 +321,7 @@ class FileOutputBuffer {
   ~FileOutputBuffer();
 
   // Buffered append.
-  Status Append(StringPiece data);
+  Status Append(absl::string_view data);
 
   // Returns the running crc32c checksum of all currently appended bytes.
   uint32 crc32c() { return crc32c_; }
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
index 9567e4750b..eecf97fde5 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <random>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/variant.h"
@@ -281,7 +282,7 @@ void TestNonStandardShapes() {
 }
 
 // Writes a bundle to disk with a bad "version"; checks for "expected_error".
-void VersionTest(const VersionDef& version, StringPiece expected_error) {
+void VersionTest(const VersionDef& version, absl::string_view expected_error) {
   const string path = Prefix("version_test");
   {
     // Prepare an empty bundle with the given version information.
@@ -610,7 +611,7 @@ TEST(TensorBundleTest, DirectoryStructure) {
   // Ensures we have the expected files.
   auto CheckDirFiles = [env](const string& bundle_prefix,
                              gtl::ArraySlice<string> expected_files) {
-    StringPiece dir = io::Dirname(bundle_prefix);
+    absl::string_view dir = io::Dirname(bundle_prefix);
     for (const string& expected_file : expected_files) {
       TF_EXPECT_OK(env->FileExists(io::JoinPath(dir, expected_file)));
     }
@@ -754,8 +755,8 @@ TEST(TensorBundleTest, TruncatedTensorContents) {
   string data;
   TF_ASSERT_OK(ReadFileToString(env, datafile, &data));
   ASSERT_TRUE(!data.empty());
-  TF_ASSERT_OK(WriteStringToFile(env, datafile,
-                                 StringPiece(data.data(), data.size() - 1)));
+  TF_ASSERT_OK(WriteStringToFile(
+      env, datafile, absl::string_view(data.data(), data.size() - 1)));
 
   BundleReader reader(env, Prefix("end"));
   TF_ASSERT_OK(reader.status());
diff --git a/tensorflow/core/util/tensor_slice_reader.cc b/tensorflow/core/util/tensor_slice_reader.cc
index c6dda2ec29..f77150cf0d 100644
--- a/tensorflow/core/util/tensor_slice_reader.cc
+++ b/tensorflow/core/util/tensor_slice_reader.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <utility>
 #include <vector>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/types.pb_text.h"
 #include "tensorflow/core/framework/versions.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -53,7 +54,7 @@ class TensorSliceReaderTable : public TensorSliceReader::Table {
     std::unique_ptr<table::Iterator> iter(table_->NewIterator());
     iter->Seek(key);
     if (iter->Valid() && iter->key() == key) {
-      StringPiece v = iter->value();
+      absl::string_view v = iter->value();
       value->assign(v.data(), v.size());
       return true;
     } else {
diff --git a/tensorflow/core/util/tensor_slice_writer.cc b/tensorflow/core/util/tensor_slice_writer.cc
index 7ebde002e1..d0d6b6ced8 100644
--- a/tensorflow/core/util/tensor_slice_writer.cc
+++ b/tensorflow/core/util/tensor_slice_writer.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/table_builder.h"
@@ -40,7 +41,7 @@ class TableBuilder : public TensorSliceWriter::Builder {
     option.compression = table::kNoCompression;
     builder_.reset(new table::TableBuilder(option, f));
   }
-  void Add(StringPiece key, StringPiece val) override {
+  void Add(absl::string_view key, absl::string_view val) override {
     builder_->Add(key, val);
   }
   Status Finish(int64* file_size) override {
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index 0db2fb4804..de98629654 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -21,12 +21,12 @@ limitations under the License.
 
 #include <unordered_map>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -46,7 +46,7 @@ class TensorSliceWriter {
   class Builder {
    public:
     virtual ~Builder() {}
-    virtual void Add(StringPiece key, StringPiece value) = 0;
+    virtual void Add(absl::string_view key, absl::string_view value) = 0;
     virtual Status Finish(int64* file_size) = 0;
   };
   typedef std::function<Status(const string&, Builder**)> CreateBuilderFunction;
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 489999d1e8..6e1e86ff94 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -15,29 +15,30 @@ limitations under the License.
 
 #include "tensorflow/core/util/util.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-StringPiece NodeNamePrefix(const StringPiece& op_name) {
-  StringPiece sp(op_name);
+absl::string_view NodeNamePrefix(const absl::string_view& op_name) {
+  absl::string_view sp(op_name);
   auto p = sp.find('/');
-  if (p == StringPiece::npos || p == 0) {
+  if (p == absl::string_view::npos || p == 0) {
     return "";
   } else {
-    return StringPiece(sp.data(), p);
+    return absl::string_view(sp.data(), p);
   }
 }
 
-StringPiece NodeNameFullPrefix(const StringPiece& op_name) {
-  StringPiece sp(op_name);
+absl::string_view NodeNameFullPrefix(const absl::string_view& op_name) {
+  absl::string_view sp(op_name);
   auto p = sp.rfind('/');
-  if (p == StringPiece::npos || p == 0) {
+  if (p == absl::string_view::npos || p == 0) {
     return "";
   } else {
-    return StringPiece(sp.data(), p);
+    return absl::string_view(sp.data(), p);
   }
 }
 
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index 4aa47aa48a..2e913e17cf 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -16,18 +16,18 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_UTIL_H_
 #define TENSORFLOW_CORE_UTIL_UTIL_H_
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
 // If op_name has '/' in it, then return everything before the first '/'.
 // Otherwise return empty string.
-StringPiece NodeNamePrefix(const StringPiece& op_name);
+absl::string_view NodeNamePrefix(const absl::string_view& op_name);
 
 // If op_name has '/' in it, then return everything before the last '/'.
 // Otherwise return empty string.
-StringPiece NodeNameFullPrefix(const StringPiece& op_name);
+absl::string_view NodeNameFullPrefix(const absl::string_view& op_name);
 
 class MovingAverage {
  public:
diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD
index c50fd93d03..c9421f35ca 100644
--- a/tensorflow/examples/label_image/BUILD
+++ b/tensorflow/examples/label_image/BUILD
@@ -50,7 +50,7 @@ tf_cc_binary(
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:tensorflow",
         ],
-    }),
+    }) + ["@com_google_absl//absl/strings"],
 )
 
 py_binary(
diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc
index ee2927d0a5..dbab8c3e5a 100644
--- a/tensorflow/examples/label_image/main.cc
+++ b/tensorflow/examples/label_image/main.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
@@ -46,7 +47,6 @@ limitations under the License.
 #include "tensorflow/core/graph/default_device.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -99,7 +99,7 @@ static Status ReadEntireFile(tensorflow::Env* env, const string& filename,
   std::unique_ptr<tensorflow::RandomAccessFile> file;
   TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file));
 
-  tensorflow::StringPiece data;
+  absl::string_view data;
   TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0]));
   if (data.size() != file_size) {
     return tensorflow::errors::DataLoss("Truncated read of '", filename,
diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD
index 9dce78b9a3..f1b9005965 100644
--- a/tensorflow/java/BUILD
+++ b/tensorflow/java/BUILD
@@ -121,6 +121,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
         "@com_googlesource_code_re2//:re2",
     ],
 )
diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc
index 4f5a491d25..2324a36f90 100644
--- a/tensorflow/java/src/gen/cc/op_specs.cc
+++ b/tensorflow/java/src/gen/cc/op_specs.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "re2/re2.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
@@ -129,7 +130,7 @@ std::pair<Type, Type> TypeResolver::TypesOf(const OpDef_AttrDef& attr_def,
                                             bool* iterable_out) {
   std::pair<Type, Type> types = MakeTypePair(Type::Wildcard());
   *iterable_out = false;
-  StringPiece attr_type = attr_def.type();
+  absl::string_view attr_type = attr_def.type();
   if (str_util::ConsumePrefix(&attr_type, "list(")) {
     attr_type.remove_suffix(1);  // remove closing brace
     *iterable_out = true;
diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc
index a71b367691..757b7ae552 100644
--- a/tensorflow/java/src/gen/cc/source_writer.cc
+++ b/tensorflow/java/src/gen/cc/source_writer.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <algorithm>
 #include <list>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/java/src/gen/cc/source_writer.h"
 
 namespace tensorflow {
@@ -48,7 +49,7 @@ SourceWriter& SourceWriter::Prefix(const char* line_prefix) {
   return *this;
 }
 
-SourceWriter& SourceWriter::Write(const StringPiece& str) {
+SourceWriter& SourceWriter::Write(const absl::string_view& str) {
   size_t line_pos = 0;
   do {
     size_t start_pos = line_pos;
@@ -71,7 +72,7 @@ SourceWriter& SourceWriter::WriteFromFile(const string& fname, Env* env) {
   return Write(data_);
 }
 
-SourceWriter& SourceWriter::Append(const StringPiece& str) {
+SourceWriter& SourceWriter::Append(const absl::string_view& str) {
   if (!str.empty()) {
     if (newline_) {
       DoAppend(left_margin_ + line_prefix_);
diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h
index de0113bd5b..ce444cd836 100644
--- a/tensorflow/java/src/gen/cc/source_writer.h
+++ b/tensorflow/java/src/gen/cc/source_writer.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <list>
 #include <set>
 
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/java/src/gen/cc/java_defs.h"
 
@@ -61,7 +61,7 @@ class SourceWriter {
   // The data might potentially contain newline characters, therefore it will
   // be scanned to ensure that each line is indented and prefixed properly,
   // making it a bit slower than Append().
-  SourceWriter& Write(const StringPiece& str);
+  SourceWriter& Write(const absl::string_view& str);
 
   // Writes a source code snippet read from a file.
   //
@@ -74,7 +74,7 @@ class SourceWriter {
   //
   // It is expected that no newline character is present in the data provided,
   // otherwise Write() must be used.
-  SourceWriter& Append(const StringPiece& str);
+  SourceWriter& Append(const absl::string_view& str);
 
   // Appends a type to the current line.
   //
@@ -153,7 +153,7 @@ class SourceWriter {
                            const Javadoc* javadoc = nullptr);
 
  protected:
-  virtual void DoAppend(const StringPiece& str) = 0;
+  virtual void DoAppend(const absl::string_view& str) = 0;
 
  private:
   // A utility base class for visiting elements of a type.
@@ -223,7 +223,7 @@ class SourceFileWriter : public SourceWriter {
   virtual ~SourceFileWriter() = default;
 
  protected:
-  void DoAppend(const StringPiece& str) override {
+  void DoAppend(const absl::string_view& str) override {
     TF_CHECK_OK(file_->Append(str));
   }
 
@@ -243,7 +243,7 @@ class SourceBufferWriter : public SourceWriter {
   const string& str() { return *buffer_; }
 
  protected:
-  void DoAppend(const StringPiece& str) override {
+  void DoAppend(const absl::string_view& str) override {
     buffer_->append(str.begin(), str.end());
   }
 
diff --git a/tensorflow/js/BUILD b/tensorflow/js/BUILD
index ad0dc44f54..6866e4e9fc 100644
--- a/tensorflow/js/BUILD
+++ b/tensorflow/js/BUILD
@@ -48,5 +48,6 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/js/ops/ts_op_gen_test.cc b/tensorflow/js/ops/ts_op_gen_test.cc
index 03241689b5..1c4061e6ee 100644
--- a/tensorflow/js/ops/ts_op_gen_test.cc
+++ b/tensorflow/js/ops/ts_op_gen_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/js/ops/ts_op_gen.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -26,12 +27,12 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-void ExpectContainsStr(StringPiece s, StringPiece expected) {
+void ExpectContainsStr(absl::string_view s, absl::string_view expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
 
-void ExpectDoesNotContainStr(StringPiece s, StringPiece expected) {
+void ExpectDoesNotContainStr(absl::string_view s, absl::string_view expected) {
   EXPECT_FALSE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c749017627..ae4d67363d 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -336,6 +336,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//third_party/python_runtime:headers",
         "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -358,6 +359,7 @@ cc_library(
         "//tensorflow/python/eager:pywrap_tfe_lib",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -437,6 +439,7 @@ cc_library(
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -454,6 +457,7 @@ tf_cc_shared_object(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
+        "@com_google_absl//absl/strings",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
@@ -542,6 +546,7 @@ cc_library(
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -557,6 +562,7 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index dcbe6d42bd..cb8ac33f0a 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <thread>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
 #include "absl/strings/str_cat.h"
@@ -207,12 +208,12 @@ bool ParseDimensionValue(const string& key, PyObject* py_value,
 }
 
 bool ParseStringValue(const string& key, PyObject* py_value, TF_Status* status,
-                      tensorflow::StringPiece* value) {
+                      absl::string_view* value) {
   if (PyBytes_Check(py_value)) {
     Py_ssize_t size = 0;
     char* buf = nullptr;
     if (PyBytes_AsStringAndSize(py_value, &buf, &size) < 0) return false;
-    *value = tensorflow::StringPiece(buf, size);
+    *value = absl::string_view(buf, size);
     return true;
   }
 #if PY_MAJOR_VERSION >= 3
@@ -286,7 +287,7 @@ bool SetOpAttrList(
     std::unique_ptr<const void*[]> values(new const void*[num_values]);
     std::unique_ptr<size_t[]> lengths(new size_t[num_values]);
     for (int i = 0; i < num_values; ++i) {
-      tensorflow::StringPiece value;
+      absl::string_view value;
       tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));
       if (!ParseStringValue(key, py_value.get(), status, &value)) return false;
       values[i] = value.data();
@@ -489,7 +490,7 @@ bool SetOpAttrScalar(
     tensorflow::gtl::FlatMap<string, tensorflow::int64>* attr_list_sizes,
     TF_Status* status) {
   if (type == TF_ATTR_STRING) {
-    tensorflow::StringPiece value;
+    absl::string_view value;
     if (!ParseStringValue(key, py_value, status, &value)) return false;
     TFE_OpSetAttrString(op, key, value.data(), value.size());
   } else if (type == TF_ATTR_INT) {
@@ -552,7 +553,7 @@ bool SetOpAttrScalar(
     //     (which is what the various "defun" or "Defun" decorators do).
     // And in the future also allow an object that can encapsulate
     // the function name and its attribute values.
-    tensorflow::StringPiece func_name;
+    absl::string_view func_name;
     if (!ParseStringValue(key, py_value, status, &func_name)) {
       PyObject* name_attr = PyObject_GetAttrString(py_value, "name");
       if (name_attr == nullptr ||
@@ -2484,7 +2485,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
   for (int i = kFastPathExecuteInputStartIndex + op_def->input_arg_size();
        i < args_size; i += 2) {
     PyObject* py_attr_name = PyTuple_GET_ITEM(args, i);
-    const tensorflow::StringPiece attr_name(TFE_GetPythonString(py_attr_name));
+    const absl::string_view attr_name(TFE_GetPythonString(py_attr_name));
     PyObject* py_attr_value = PyTuple_GET_ITEM(args, i + 1);
 
     // Not creating an index since most of the time there are not more than a
diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index 2022fbcbaa..f21900ba67 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <stdio.h>
 #include <sstream>
 #include <unordered_map>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -160,7 +161,7 @@ class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp {
   string FlattenInputs(const std::vector<int>* input_indices,
                        std::vector<string>* output_sizes) const;
 
-  StringPiece op_name_;
+  absl::string_view op_name_;
   typedef std::unordered_map<string, std::vector<int>> AttrToArgMap;
   AttrToArgMap attr_to_args_;
   std::unordered_map<string, string> attr_expressions_;
@@ -473,7 +474,7 @@ bool GenEagerPythonOp::GetEagerFunctionSetup(const string& indentation,
     const auto& param = param_names_[i + op_def_.input_arg_size()];
     const auto& attr = *FindAttr(attr_name, op_def_);
     const string& attr_api_name = param.GetRenameTo();
-    StringPiece attr_type = attr.type();
+    absl::string_view attr_type = attr.type();
     attr_expressions_[attr_name] = attr_api_name;
     const int default_index = i - (attrs_.size() - params_with_default_.size());
     if (default_index >= 0) {
diff --git a/tensorflow/python/framework/python_op_gen_internal.cc b/tensorflow/python/framework/python_op_gen_internal.cc
index f6aef5bc50..5024cf0625 100644
--- a/tensorflow/python/framework/python_op_gen_internal.cc
+++ b/tensorflow/python/framework/python_op_gen_internal.cc
@@ -20,15 +20,16 @@ limitations under the License.
 #include <iomanip>
 #include <sstream>
 #include <unordered_map>
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb_text.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
-#include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor.pb_text.h"
+#include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
@@ -110,7 +111,7 @@ string AvoidPythonReserved(const string& s) {
 
 // Indent the first line by "initial" spaces and all following lines
 // by "rest" spaces.
-string Indent(int initial, int rest, StringPiece in) {
+string Indent(int initial, int rest, absl::string_view in) {
   // TODO(josh11b): Also word-wrapping?
   string copy(in.data(), in.size());
   str_util::StripTrailingWhitespace(&copy);
@@ -135,7 +136,7 @@ string Indent(int initial, int rest, StringPiece in) {
 
 // Adds append to *dest, with a space if the first line will be <= width,
 // or a newline otherwise.
-void AppendWithinWidth(string* dest, StringPiece append, int width) {
+void AppendWithinWidth(string* dest, absl::string_view append, int width) {
   auto first_line = append.find('\n');
   if (first_line == string::npos) first_line = append.size();
   if (dest->size() + first_line + 1 /* space */ > static_cast<size_t>(width)) {
@@ -283,7 +284,7 @@ string GetReturns(const OpDef& op_def,
     strings::StrAppend(&result, "    The created Operation.\n");
   } else {
     if (num_outs == 1) {
-      StringPiece description = op_def.output_arg(0).description();
+      absl::string_view description = op_def.output_arg(0).description();
       if (ConsumeEquals(&description)) {  // Skip the generated type info.
         strings::StrAppend(&result, Indent(4, 4, description));
       } else {
@@ -319,7 +320,7 @@ string GetReturns(const OpDef& op_def,
                          str_util::Join(out_names, ", "), ").\n\n");
       for (int i = 0; i < num_outs; ++i) {
         string desc = strings::StrCat(out_names[i], ": ");
-        StringPiece description = op_def.output_arg(i).description();
+        absl::string_view description = op_def.output_arg(i).description();
         if (ConsumeEquals(&description)) {  // Skip the generated type info.
           strings::StrAppend(&desc, description);
         } else {
@@ -481,7 +482,7 @@ static void AddDelimiter(string* append_to, const string& delim) {
   if (!append_to->empty()) strings::StrAppend(append_to, delim);
 }
 
-const ApiDef::Attr* FindAttr(StringPiece name, const ApiDef& api_def) {
+const ApiDef::Attr* FindAttr(absl::string_view name, const ApiDef& api_def) {
   for (int i = 0; i < api_def.attr_size(); ++i) {
     if (api_def.attr(i).name() == name) {
       return &api_def.attr(i);
@@ -659,7 +660,7 @@ void GenPythonOp::AddDocStringInputs() {
   for (int i = 0; i < api_def_.arg_order_size(); ++i) {
     const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
     const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
-    StringPiece description = api_def_arg.description();
+    absl::string_view description = api_def_arg.description();
     string desc;
     if (ConsumeEquals(&description)) {  // Skip the generated type info.
       desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ");
diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc
index e20ad5fd33..5a9b009500 100644
--- a/tensorflow/python/framework/python_op_gen_main.cc
+++ b/tensorflow/python/framework/python_op_gen_main.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/python/framework/python_op_gen.h"
 
 #include <memory>
@@ -47,8 +48,8 @@ Status ReadOpListFromFile(const string& filename,
     // The parser assumes that the op name is the first string on each
     // line with no preceding whitespace, and ignores lines that do
     // not start with an op name as a comment.
-    strings::Scanner scanner{StringPiece(line_contents)};
-    StringPiece op_name;
+    strings::Scanner scanner{absl::string_view(line_contents)};
+    absl::string_view op_name;
     if (scanner.One(strings::Scanner::LETTER_DIGIT_DOT)
             .Any(strings::Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE)
             .GetResult(nullptr, &op_name)) {
@@ -89,7 +90,7 @@ Status ParseOpListCommandLine(const char* arg, std::vector<string>* op_list) {
 // Returns an empty string if the current executable's name does not
 // follow a known pattern.
 string InferSourceFileName(const char* argv_zero) {
-  StringPiece command_str = io::Basename(argv_zero);
+  absl::string_view command_str = io::Basename(argv_zero);
 
   // For built-in ops, the Bazel build creates a separate executable
   // with the name gen_<op type>_ops_py_wrappers_cc containing the
diff --git a/tensorflow/python/framework/test_file_system.cc b/tensorflow/python/framework/test_file_system.cc
index 6e9915adbb..13d05c6fd0 100644
--- a/tensorflow/python/framework/test_file_system.cc
+++ b/tensorflow/python/framework/test_file_system.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/null_file_system.h"
 
@@ -20,7 +21,7 @@ namespace tensorflow {
 
 class TestRandomAccessFile : public RandomAccessFile {
   // The file contents is 10 bytes of all A's
-  Status Read(uint64 offset, size_t n, StringPiece* result,
+  Status Read(uint64 offset, size_t n, absl::string_view* result,
               char* scratch) const override {
     Status s;
     for (int i = 0; i < n; ++i) {
@@ -31,7 +32,7 @@ class TestRandomAccessFile : public RandomAccessFile {
       }
       scratch[i] = 'A';
     }
-    *result = StringPiece(scratch, n);
+    *result = absl::string_view(scratch, n);
     return s;
   }
 };
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 6189503d8f..4d920750cd 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <Python.h>
 
 #include "numpy/arrayobject.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/c/tf_status_helper.h"
@@ -401,7 +402,7 @@ Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
               std::max(1, EIGEN_MAX_ALIGN_BYTES) !=
           0) {
         Tensor t(dtype, shape);
-        StringPiece p = t.tensor_data();
+        absl::string_view p = t.tensor_data();
         memcpy(const_cast<char*>(p.data()), PyArray_DATA(input), p.size());
         *ret = t;
       } else {
@@ -464,7 +465,7 @@ Status ConvertTensorToNdarray(const Tensor& t, PyObject** ret) {
     }
   } else {
     CHECK(DataTypeCanUseMemcpy(t.dtype()));
-    StringPiece p = t.tensor_data();
+    absl::string_view p = t.tensor_data();
     memcpy(PyArray_DATA(np_array), p.data(), p.size());
   }
   *ret = PyArray_Return(np_array);
diff --git a/tensorflow/python/lib/core/strings.i b/tensorflow/python/lib/core/strings.i
index 9d807e51be..8b894ab387 100644
--- a/tensorflow/python/lib/core/strings.i
+++ b/tensorflow/python/lib/core/strings.i
@@ -34,13 +34,13 @@ limitations under the License.
 //       as it comes up.
 
 %{
-#include "tensorflow/core/lib/core/stringpiece.h"
+#include "absl/strings/string_view.h"
 
 // Handles str in Python 2, bytes in Python 3.
 // Returns true on success, false on failure.
-bool _BytesToStringPiece(PyObject* obj, tensorflow::StringPiece* result) {
+bool _BytesToStringPiece(PyObject* obj, absl::string_view* result) {
   if (obj == Py_None) {
-    *result = tensorflow::StringPiece();
+    *result = absl::string_view();
   } else {
     char* ptr;
     Py_ssize_t len;
@@ -48,30 +48,30 @@ bool _BytesToStringPiece(PyObject* obj, tensorflow::StringPiece* result) {
       // Python has raised an error (likely TypeError or UnicodeEncodeError).
       return false;
     }
-    *result = tensorflow::StringPiece(ptr, len);
+    *result = absl::string_view(ptr, len);
   }
   return true;
 }
 %}
 
-%typemap(typecheck) tensorflow::StringPiece = char *;
-%typemap(typecheck) const tensorflow::StringPiece & = char *;
+%typemap(typecheck) absl::string_view = char *;
+%typemap(typecheck) const absl::string_view & = char *;
 
-// "tensorflow::StringPiece" arguments must be specified as a 'str' or 'bytes' object.
-%typemap(in) tensorflow::StringPiece {
+// "absl::string_view" arguments must be specified as a 'str' or 'bytes' object.
+%typemap(in) absl::string_view {
   if (!_BytesToStringPiece($input, &$1)) SWIG_fail;
 }
 
-// "const tensorflow::StringPiece&" arguments can be provided the same as
-// "tensorflow::StringPiece", whose typemap is defined above.
-%typemap(in) const tensorflow::StringPiece & (tensorflow::StringPiece temp) {
+// "const absl::string_view&" arguments can be provided the same as
+// "absl::string_view", whose typemap is defined above.
+%typemap(in) const absl::string_view & (absl::string_view temp) {
   if (!_BytesToStringPiece($input, &temp)) SWIG_fail;
   $1 = &temp;
 }
 
-// C++ functions returning tensorflow::StringPiece will simply return bytes in
+// C++ functions returning absl::string_view will simply return bytes in
 // Python, or None if the StringPiece contained a NULL pointer.
-%typemap(out) tensorflow::StringPiece {
+%typemap(out) absl::string_view {
   if ($1.data()) {
     $result = PyBytes_FromStringAndSize($1.data(), $1.size());
   } else {
diff --git a/tensorflow/python/lib/io/py_record_writer.cc b/tensorflow/python/lib/io/py_record_writer.cc
index faf20df868..ee4eac27f1 100644
--- a/tensorflow/python/lib/io/py_record_writer.cc
+++ b/tensorflow/python/lib/io/py_record_writer.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/python/lib/io/py_record_writer.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/tf_status_helper.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/platform/env.h"
@@ -48,7 +48,7 @@ PyRecordWriter::~PyRecordWriter() {
   file_.reset();
 }
 
-void PyRecordWriter::WriteRecord(tensorflow::StringPiece record,
+void PyRecordWriter::WriteRecord(absl::string_view record,
                                  TF_Status* out_status) {
   if (writer_ == nullptr) {
     TF_SetStatus(out_status, TF_FAILED_PRECONDITION,
diff --git a/tensorflow/python/lib/io/py_record_writer.h b/tensorflow/python/lib/io/py_record_writer.h
index 9b0792c6db..0aa7b75334 100644
--- a/tensorflow/python/lib/io/py_record_writer.h
+++ b/tensorflow/python/lib/io/py_record_writer.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <memory>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -42,7 +42,7 @@ class PyRecordWriter {
                              TF_Status* out_status);
   ~PyRecordWriter();
 
-  void WriteRecord(tensorflow::StringPiece record, TF_Status* out_status);
+  void WriteRecord(absl::string_view record, TF_Status* out_status);
   void Flush(TF_Status* out_status);
   void Close(TF_Status* out_status);
 
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 11eb9ce947..0889950e16 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/memory/memory.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
@@ -112,7 +113,7 @@ PyObject* MappingKeys(PyObject* o) {
 // Note that '__class__' attribute is set only in new-style classes.
 // A lot of tensorflow code uses __class__ without checks, so it seems like
 // we only support new-style classes.
-StringPiece GetClassName(PyObject* o) {
+absl::string_view GetClassName(PyObject* o) {
   // __class__ is equivalent to type() for new style classes.
   // type() is equivalent to PyObject_Type()
   // (https://docs.python.org/3.5/c-api/object.html#c.PyObject_Type)
@@ -122,9 +123,9 @@ StringPiece GetClassName(PyObject* o) {
 
   // __name__ is the value of `tp_name` after the last '.'
   // (https://docs.python.org/2/c-api/typeobj.html#c.PyTypeObject.tp_name)
-  StringPiece name(type->tp_name);
+  absl::string_view name(type->tp_name);
   size_t pos = name.rfind('.');
-  if (pos != StringPiece::npos) {
+  if (pos != absl::string_view::npos) {
     name.remove_prefix(pos + 1);
   }
   return name;
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 1ad1895269..4d33cdf0cf 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -32,6 +32,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -120,6 +121,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":transform_utils",
+        "@com_google_absl//absl/strings",
         "//tensorflow/c:checkpoint_reader",
         "//tensorflow/core/util/tensor_bundle",
         "//tensorflow/core:core_cpu",
@@ -181,6 +183,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:quantization_utils",
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/util/tensor_bundle",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -198,6 +201,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
index 6df2718e61..5bdc529a15 100644
--- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc
+++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
@@ -24,12 +24,12 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/subgraph.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/public/session.h"
@@ -39,9 +39,10 @@ limitations under the License.
 namespace tensorflow {
 namespace graph_transforms {
 namespace {
-using StringPieceSet = std::unordered_set<StringPiece, StringPieceHasher>;
+using StringPieceSet = std::unordered_set<absl::string_view, StringPieceHasher>;
 template <typename T>
-using StringPieceMap = std::unordered_map<StringPiece, T, StringPieceHasher>;
+using StringPieceMap =
+    std::unordered_map<absl::string_view, T, StringPieceHasher>;
 }  // namespace
 
 Status ReplaceSendRecvs(const GraphDef& original_graph_def,
@@ -109,7 +110,7 @@ Status ReplaceSendRecvs(const GraphDef& original_graph_def,
 
   // Some input nodes are removed in rewrite_graph_def. Add those nodes to
   // output_graph_def.
-  for (StringPiece name : input_nodes) {
+  for (absl::string_view name : input_nodes) {
     const NodeDef& removed_node = *CHECK_NOTNULL(original_map[name]);
     output_graph_def->add_node()->MergeFrom(removed_node);
   }
@@ -163,7 +164,7 @@ Status RemoveUnusedNodes(const GraphDef& input_graph_def,
   }
   while (!current_nodes.empty()) {
     StringPieceSet next_nodes;
-    for (StringPiece node_name : current_nodes) {
+    for (absl::string_view node_name : current_nodes) {
       if (node_map.count(node_name) == 0) {
         LOG(ERROR) << "Bad graph structure, no node named '" << node_name
                    << "' found for input lookup";
diff --git a/tensorflow/tools/graph_transforms/fold_constants_test.cc b/tensorflow/tools/graph_transforms/fold_constants_test.cc
index dcdc3c2906..262314c079 100644
--- a/tensorflow/tools/graph_transforms/fold_constants_test.cc
+++ b/tensorflow/tools/graph_transforms/fold_constants_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include <utility>
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/nn_ops.h"
@@ -208,7 +209,7 @@ class ConstantFoldingTest : public ::testing::Test {
     }
 
     for (const NodeDef& node : graph_def.node()) {
-      const StringPiece name(node.name());
+      const absl::string_view name(node.name());
       const int occurrence_count = folded_node_map.count(node.name());
       if (str_util::EndsWith(name, "expect_removed")) {
         EXPECT_EQ(0, occurrence_count) << "node.name()=" << node.name();
diff --git a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
index d97496cbeb..1d586e2cba 100644
--- a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
+++ b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
@@ -88,7 +89,7 @@ Status ExtractMinMaxRecords(const string& log_file_name,
     if (!strings::safe_strtof(max_number_string.c_str(), &max)) {
       continue;
     }
-    StringPiece name_string = line_parts[min_max_index - 1];
+    absl::string_view name_string = line_parts[min_max_index - 1];
     if (!str_util::EndsWith(name_string, print_suffix)) {
       continue;
     }
diff --git a/tensorflow/tools/graph_transforms/sparsify_gather_test.cc b/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
index b8d6ba00de..5d3da9c59d 100644
--- a/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
+++ b/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/sendrecv_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
@@ -38,7 +39,7 @@ Status ReadTensorFromCheckpoint(
 
 class SparsifyGatherTest : public ::testing::Test {
  protected:
-  NodeDef* CreateNode(const StringPiece name, const StringPiece op,
+  NodeDef* CreateNode(const absl::string_view name, const absl::string_view op,
                       const std::vector<NodeDef*>& inputs, GraphDef* graph_def,
                       bool control_dep = false) {
     NodeDef* node_def = graph_def->add_node();
@@ -56,7 +57,7 @@ class SparsifyGatherTest : public ::testing::Test {
     return node_def;
   }
 
-  void MakeGather(StringPiece name, bool gather_v2, NodeDef* params,
+  void MakeGather(absl::string_view name, bool gather_v2, NodeDef* params,
                   NodeDef* indices, GraphDef* graph_def) {
     if (gather_v2) {
       NodeDef* axis_node =
diff --git a/tensorflow/tools/graph_transforms/transform_graph.cc b/tensorflow/tools/graph_transforms/transform_graph.cc
index 7efe450710..9a2b317850 100644
--- a/tensorflow/tools/graph_transforms/transform_graph.cc
+++ b/tensorflow/tools/graph_transforms/transform_graph.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/tools/graph_transforms/transform_graph.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -41,11 +42,11 @@ Status ParseTransformParameters(const string& transforms_string,
     TRANSFORM_PARAM_NAME,
     TRANSFORM_PARAM_VALUE,
   } state = TRANSFORM_NAME;
-  StringPiece remaining(transforms_string);
-  StringPiece match;
-  StringPiece transform_name;
-  StringPiece parameter_name;
-  StringPiece parameter_value;
+  absl::string_view remaining(transforms_string);
+  absl::string_view match;
+  absl::string_view transform_name;
+  absl::string_view parameter_name;
+  absl::string_view parameter_value;
   TransformFuncParameters func_parameters;
   while (!remaining.empty()) {
     if (state == TRANSFORM_NAME) {
diff --git a/tensorflow/tools/graph_transforms/transform_utils.cc b/tensorflow/tools/graph_transforms/transform_utils.cc
index c715380aae..3097adcb0b 100644
--- a/tensorflow/tools/graph_transforms/transform_utils.cc
+++ b/tensorflow/tools/graph_transforms/transform_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/tools/graph_transforms/transform_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/lib/hash/hash.h"
@@ -87,7 +88,7 @@ void NodeNamePartsFromInput(const string& input_name, string* prefix,
   } else {
     *suffix = ":" + input_parts[1];
   }
-  StringPiece node_name_piece(input_parts[0]);
+  absl::string_view node_name_piece(input_parts[0]);
   if (str_util::ConsumePrefix(&node_name_piece, "^")) {
     *prefix = "^";
   } else {
@@ -640,7 +641,7 @@ Status TransformFuncContext::GetOneInt32Parameter(const string& name,
   }
   string string_value;
   TF_RETURN_IF_ERROR(GetOneStringParameter(name, "", &string_value));
-  if (!strings::safe_strto32(StringPiece(string_value), result)) {
+  if (!strings::safe_strto32(absl::string_view(string_value), result)) {
     return errors::InvalidArgument("Couldn't interpret the ", name,
                                    " argument as a number:", string_value);
   }
@@ -657,7 +658,7 @@ Status TransformFuncContext::GetOneInt64Parameter(const string& name,
   }
   string string_value;
   TF_RETURN_IF_ERROR(GetOneStringParameter(name, "", &string_value));
-  if (!strings::safe_strto64(StringPiece(string_value), result)) {
+  if (!strings::safe_strto64(absl::string_view(string_value), result)) {
     return errors::InvalidArgument("Couldn't interpret the ", name,
                                    " argument as a number:", string_value);
   }
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index 15d7c70281..da50cae484 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -447,7 +447,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
   Print("scanner->RestartCapture()");
   Print("    .Many(Scanner::LETTER_DIGIT_UNDERSCORE)");
   Print("    .StopCapture();");
-  Print("StringPiece identifier;");
+  Print("absl::string_view identifier;");
   Print("if (!scanner->GetResult(nullptr, &identifier)) return false;");
   Print("bool parsed_colon = false;");
   Print("(void)parsed_colon;"); // Avoid "set but not used" compiler warning
@@ -528,7 +528,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
       Print("SetProtobufStringSwapAllowed(&str_value, ", mutable_value_expr,
             ");");
     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
-      Print("StringPiece value;");
+      Print("absl::string_view value;");
       Print(
           "if (!parsed_colon || "
           "!scanner->RestartCapture().Many("
-- 
GitLab


From ec16f981be86fa6ee0b846edf2d698fd1b647638 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Mon, 15 Oct 2018 10:56:12 -0700
Subject: [PATCH 0975/1085] Fix export for bipartite_match.

PiperOrigin-RevId: 217170873
---
 tensorflow/contrib/image/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py
index f230d93da4..91b8e8d0f9 100755
--- a/tensorflow/contrib/image/__init__.py
+++ b/tensorflow/contrib/image/__init__.py
@@ -58,6 +58,7 @@ from tensorflow.contrib.image.python.ops.distort_image_ops import adjust_hsv_in_
 from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_yiq
 
 from tensorflow.contrib.image.python.ops.image_ops import angles_to_projective_transforms
+from tensorflow.contrib.image.python.ops.image_ops import bipartite_match
 from tensorflow.contrib.image.python.ops.image_ops import compose_transforms
 from tensorflow.contrib.image.python.ops.image_ops import connected_components
 from tensorflow.contrib.image.python.ops.image_ops import flat_transforms_to_matrices
-- 
GitLab


From 7797c8ef36cd8aeb820d9259ea95e12eb80c9497 Mon Sep 17 00:00:00 2001
From: Yifei Feng <1192265+yifeif@users.noreply.github.com>
Date: Mon, 15 Oct 2018 11:13:11 -0700
Subject: [PATCH 0976/1085] Remove stale description on old PR process

---
 CONTRIBUTING.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 05e970e8cc..b4d7e58374 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,8 +33,6 @@ just getting started, Github has a [howto](https://help.github.com/articles/usin
 
 TensorFlow team members will be assigned to review your pull requests. Once the pull requests are approved and pass continuous integration checks, a TensorFlow team member will apply `ready to pull` label to your change. This means we are working on getting your pull request submitted to our internal repository. After the change has been submitted internally, your pull request will be merged automatically on GitHub.
 
-For some pull requests, we will apply the patch for each pull request to our internal version control system first, and export the change out as a new commit later, at which point the original pull request will be closed. The commits in the pull request will be squashed into a single commit with the pull request creator as the author. These pull requests will be labeled as pending merge internally.
-
 If you want to contribute but you're not sure where to start, take a look at the
 [issues with the "contributions welcome" label](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome).
 These are issues that we believe are particularly well suited for outside
-- 
GitLab


From 333c4ff5569a7d9a5b5b3ab8fda638588d008fba Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Mon, 15 Oct 2018 10:59:37 -0700
Subject: [PATCH 0977/1085] Add speech frontend.

PiperOrigin-RevId: 217171519
---
 .../lite/experimental/microfrontend/lib/BUILD | 188 +++++++++++++++
 .../experimental/microfrontend/lib/README     |   9 +
 .../experimental/microfrontend/lib/bits.h     | 102 ++++++++
 .../lite/experimental/microfrontend/lib/fft.c |  54 +++++
 .../lite/experimental/microfrontend/lib/fft.h |  50 ++++
 .../experimental/microfrontend/lib/fft_io.c   |  33 +++
 .../experimental/microfrontend/lib/fft_io.h   |  34 +++
 .../microfrontend/lib/fft_test.cc             |  49 ++++
 .../experimental/microfrontend/lib/fft_util.c |  71 ++++++
 .../experimental/microfrontend/lib/fft_util.h |  34 +++
 .../microfrontend/lib/filterbank.c            | 134 +++++++++++
 .../microfrontend/lib/filterbank.h            |  63 +++++
 .../microfrontend/lib/filterbank_io.c         |  66 +++++
 .../microfrontend/lib/filterbank_io.h         |  35 +++
 .../microfrontend/lib/filterbank_test.cc      | 194 +++++++++++++++
 .../microfrontend/lib/filterbank_util.c       | 225 ++++++++++++++++++
 .../microfrontend/lib/filterbank_util.h       |  50 ++++
 .../experimental/microfrontend/lib/frontend.c |  72 ++++++
 .../experimental/microfrontend/lib/frontend.h |  64 +++++
 .../microfrontend/lib/frontend_io.c           |  69 ++++++
 .../microfrontend/lib/frontend_io.h           |  31 +++
 .../microfrontend/lib/frontend_main.c         |  70 ++++++
 .../lib/frontend_memmap_generator.c           |  47 ++++
 .../microfrontend/lib/frontend_memmap_main.c  |  58 +++++
 .../microfrontend/lib/frontend_test.cc        | 120 ++++++++++
 .../microfrontend/lib/frontend_util.c         |  87 +++++++
 .../microfrontend/lib/frontend_util.h         |  52 ++++
 .../experimental/microfrontend/lib/log_lut.c  |  30 +++
 .../experimental/microfrontend/lib/log_lut.h  |  40 ++++
 .../microfrontend/lib/log_scale.c             |  83 +++++++
 .../microfrontend/lib/log_scale.h             |  39 +++
 .../microfrontend/lib/log_scale_io.c          |  21 ++
 .../microfrontend/lib/log_scale_io.h          |  33 +++
 .../microfrontend/lib/log_scale_test.cc       |  58 +++++
 .../microfrontend/lib/log_scale_util.c        |  27 +++
 .../microfrontend/lib/log_scale_util.h        |  45 ++++
 .../microfrontend/lib/noise_reduction.c       |  51 ++++
 .../microfrontend/lib/noise_reduction.h       |  46 ++++
 .../microfrontend/lib/noise_reduction_io.c    |  34 +++
 .../microfrontend/lib/noise_reduction_io.h    |  36 +++
 .../microfrontend/lib/noise_reduction_test.cc |  70 ++++++
 .../microfrontend/lib/noise_reduction_util.c  |  45 ++++
 .../microfrontend/lib/noise_reduction_util.h  |  50 ++++
 .../microfrontend/lib/pcan_gain_control.c     |  56 +++++
 .../microfrontend/lib/pcan_gain_control.h     |  46 ++++
 .../lib/pcan_gain_control_test.cc             |  59 +++++
 .../lib/pcan_gain_control_util.c              |  90 +++++++
 .../lib/pcan_gain_control_util.h              |  57 +++++
 .../experimental/microfrontend/lib/window.c   |  70 ++++++
 .../experimental/microfrontend/lib/window.h   |  49 ++++
 .../microfrontend/lib/window_io.c             |  42 ++++
 .../microfrontend/lib/window_io.h             |  34 +++
 .../microfrontend/lib/window_test.cc          | 157 ++++++++++++
 .../microfrontend/lib/window_util.c           |  71 ++++++
 .../microfrontend/lib/window_util.h           |  45 ++++
 tensorflow/workspace.bzl                      |   2 +
 third_party/kissfft/BUILD                     |   1 +
 third_party/kissfft/BUILD.bazel               |  23 ++
 third_party/kissfft/workspace.bzl             |  15 ++
 59 files changed, 3586 insertions(+)
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/BUILD
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/README
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_main.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_generator.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_main.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window_test.cc
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.c
 create mode 100644 tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h
 create mode 100644 third_party/kissfft/BUILD
 create mode 100644 third_party/kissfft/BUILD.bazel
 create mode 100644 third_party/kissfft/workspace.bzl

diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/BUILD b/tensorflow/contrib/lite/experimental/microfrontend/lib/BUILD
new file mode 100644
index 0000000000..3fd4b9fe82
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/BUILD
@@ -0,0 +1,188 @@
+# Library for generating feature vectors from audio data
+
+package(
+    default_visibility = ["//visibility:private"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+cc_library(
+    name = "bits",
+    hdrs = ["bits.h"],
+)
+
+cc_library(
+    name = "fft",
+    srcs = [
+        "fft.c",
+        "fft_util.c",
+    ],
+    hdrs = [
+        "fft.h",
+        "fft_util.h",
+    ],
+    deps = ["@kissfft//:kiss_fftr_16"],
+)
+
+cc_library(
+    name = "filterbank",
+    srcs = [
+        "filterbank.c",
+        "filterbank_util.c",
+    ],
+    hdrs = [
+        "filterbank.h",
+        "filterbank_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":fft",
+    ],
+)
+
+cc_library(
+    name = "frontend",
+    srcs = [
+        "frontend.c",
+        "frontend_util.c",
+    ],
+    hdrs = [
+        "frontend.h",
+        "frontend_util.h",
+    ],
+    deps = [
+        ":bits",
+        ":fft",
+        ":filterbank",
+        ":log_scale",
+        ":noise_reduction",
+        ":pcan_gain_control",
+        ":window",
+    ],
+)
+
+cc_library(
+    name = "log_scale",
+    srcs = [
+        "log_lut.c",
+        "log_scale.c",
+        "log_scale_util.c",
+    ],
+    hdrs = [
+        "log_lut.h",
+        "log_scale.h",
+        "log_scale_util.h",
+    ],
+    deps = [
+        ":bits",
+    ],
+)
+
+cc_library(
+    name = "noise_reduction",
+    srcs = [
+        "noise_reduction.c",
+        "noise_reduction_util.c",
+    ],
+    hdrs = [
+        "noise_reduction.h",
+        "noise_reduction_util.h",
+    ],
+)
+
+cc_library(
+    name = "pcan_gain_control",
+    srcs = [
+        "pcan_gain_control.c",
+        "pcan_gain_control_util.c",
+    ],
+    hdrs = [
+        "pcan_gain_control.h",
+        "pcan_gain_control_util.h",
+    ],
+    deps = [
+        ":bits",
+    ],
+)
+
+cc_library(
+    name = "window",
+    srcs = [
+        "window.c",
+        "window_util.c",
+    ],
+    hdrs = [
+        "window.h",
+        "window_util.h",
+    ],
+)
+
+cc_test(
+    name = "fft_test",
+    size = "small",
+    srcs = ["fft_test.cc"],
+    deps = [
+        ":fft",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "filterbank_test",
+    size = "small",
+    srcs = ["filterbank_test.cc"],
+    deps = [
+        ":filterbank",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "frontend_test",
+    size = "small",
+    srcs = ["frontend_test.cc"],
+    deps = [
+        ":frontend",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "log_scale_test",
+    size = "small",
+    srcs = ["log_scale_test.cc"],
+    deps = [
+        ":log_scale",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "noise_reduction_test",
+    size = "small",
+    srcs = ["noise_reduction_test.cc"],
+    deps = [
+        ":noise_reduction",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "pcan_gain_control_test",
+    size = "small",
+    srcs = ["pcan_gain_control_test.cc"],
+    deps = [
+        ":pcan_gain_control",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "window_test",
+    size = "small",
+    srcs = ["window_test.cc"],
+    deps = [
+        ":window",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/README b/tensorflow/contrib/lite/experimental/microfrontend/lib/README
new file mode 100644
index 0000000000..731d88c5bd
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/README
@@ -0,0 +1,9 @@
+The binary frontend_main shows sample usage of the frontend, printing out
+coefficients when it has processed enough data.
+
+The binary frontend_memmap_main shows a sample usage of how to avoid all the
+init code in your runtime, by first running "frontend_generate_memmap" to
+create a header/source file that uses a baked in frontend state. This command
+could be automated as part of your build process, or you can just use the output
+directly.
+
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h
new file mode 100644
index 0000000000..f81bc2b023
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
+
+#ifdef __cplusplus
+#include <cstdint>
+
+extern "C" {
+#endif
+
+static inline int CountLeadingZeros32Slow(uint64_t n) {
+  int zeroes = 28;
+  if (n >> 16) zeroes -= 16, n >>= 16;
+  if (n >> 8) zeroes -= 8, n >>= 8;
+  if (n >> 4) zeroes -= 4, n >>= 4;
+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
+}
+
+static inline int CountLeadingZeros32(uint32_t n) {
+#if defined(_MSC_VER)
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if (_BitScanReverse(&result, n)) {
+    return 31 - result;
+  }
+  return 32;
+#elif defined(__GNUC__)
+
+  // Handle 0 as a special case because __builtin_clz(0) is undefined.
+  if (n == 0) {
+    return 32;
+  }
+  return __builtin_clz(n);
+#else
+  return CountLeadingZeros32Slow(n);
+#endif
+}
+
+static inline int MostSignificantBit32(uint32_t n) {
+  return 32 - CountLeadingZeros32(n);
+}
+
+static inline int CountLeadingZeros64Slow(uint64_t n) {
+  int zeroes = 60;
+  if (n >> 32) zeroes -= 32, n >>= 32;
+  if (n >> 16) zeroes -= 16, n >>= 16;
+  if (n >> 8) zeroes -= 8, n >>= 8;
+  if (n >> 4) zeroes -= 4, n >>= 4;
+  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
+}
+
+static inline int CountLeadingZeros64(uint64_t n) {
+#if defined(_MSC_VER) && defined(_M_X64)
+  // MSVC does not have __buitin_clzll. Use _BitScanReverse64.
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if (_BitScanReverse64(&result, n)) {
+    return 63 - result;
+  }
+  return 64;
+#elif defined(_MSC_VER)
+  // MSVC does not have __buitin_clzll. Compose two calls to _BitScanReverse
+  unsigned long result = 0;  // NOLINT(runtime/int)
+  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
+    return 31 - result;
+  }
+  if (_BitScanReverse(&result, n)) {
+    return 63 - result;
+  }
+  return 64;
+#elif defined(__GNUC__)
+
+  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
+  if (n == 0) {
+    return 64;
+  }
+  return __builtin_clzll(n);
+#else
+  return CountLeadingZeros64Slow(n);
+#endif
+}
+
+static inline int MostSignificantBit64(uint64_t n) {
+  return 64 - CountLeadingZeros64(n);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.c
new file mode 100644
index 0000000000..1ecbb30b51
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.c
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+
+#include <string.h>
+
+#define FIXED_POINT 16
+#include "kiss_fft.h"
+// Internal test dependency placeholder1
+// Internal test dependency placeholder2
+#include "tools/kiss_fftr.h"
+// Internal test dependency placeholder3
+
+void FftCompute(struct FftState* state, const int16_t* input,
+                int input_scale_shift) {
+  const size_t input_size = state->input_size;
+  const size_t fft_size = state->fft_size;
+
+  int16_t* fft_input = state->input;
+  // First, scale the input by the given shift.
+  int i;
+  for (i = 0; i < input_size; ++i) {
+    *fft_input++ = (*input++) << input_scale_shift;
+  }
+  // Zero out whatever else remains in the top part of the input.
+  for (; i < fft_size; ++i) {
+    *fft_input++ = 0;
+  }
+
+  // Apply the FFT.
+  kiss_fftr((const kiss_fftr_cfg)state->scratch, state->input,
+            (kiss_fft_cpx*)state->output);
+}
+
+void FftInit(struct FftState* state) {
+  // All the initialization is done in FftPopulateState()
+}
+
+void FftReset(struct FftState* state) {
+  memset(state->input, 0, state->fft_size * sizeof(*state->input));
+  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h
new file mode 100644
index 0000000000..e7644bf2a7
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h
@@ -0,0 +1,50 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct complex_int16_t {
+  int16_t real;
+  int16_t imag;
+};
+
+struct FftState {
+  int16_t* input;
+  struct complex_int16_t* output;
+  size_t fft_size;
+  size_t input_size;
+  void* scratch;
+  size_t scratch_size;
+};
+
+void FftCompute(struct FftState* state, const int16_t* input,
+                int input_scale_shift);
+
+void FftInit(struct FftState* state);
+
+void FftReset(struct FftState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.c
new file mode 100644
index 0000000000..cc1ce209d8
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.c
@@ -0,0 +1,33 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h"
+
+void FftWriteMemmapPreamble(FILE* fp, const struct FftState* state) {
+  fprintf(fp, "static int16_t fft_input[%zu];\n", state->fft_size);
+  fprintf(fp, "static struct complex_int16_t fft_output[%zu];\n",
+          state->fft_size / 2 + 1);
+  fprintf(fp, "static char fft_scratch[%zu];\n", state->scratch_size);
+  fprintf(fp, "\n");
+}
+
+void FftWriteMemmap(FILE* fp, const struct FftState* state,
+                       const char* variable) {
+  fprintf(fp, "%s->input = fft_input;\n", variable);
+  fprintf(fp, "%s->output = fft_output;\n", variable);
+  fprintf(fp, "%s->fft_size = %zu;\n", variable, state->fft_size);
+  fprintf(fp, "%s->input_size = %zu;\n", variable, state->input_size);
+  fprintf(fp, "%s->scratch = fft_scratch;\n", variable);
+  fprintf(fp, "%s->scratch_size = %zu;\n", variable, state->scratch_size);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h
new file mode 100644
index 0000000000..4d10c3a92a
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_IO_H_
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void FftWriteMemmapPreamble(FILE* fp, const struct FftState* state);
+void FftWriteMemmap(FILE* fp, const struct FftState* state,
+                    const char* variable);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_test.cc
new file mode 100644
index 0000000000..b8684a0b5c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_test.cc
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int16_t kFakeWindow[] = {
+    0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
+    0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
+const int kScaleShift = 0;
+
+TEST(FftTest, CheckOutputValues) {
+  struct FftState state;
+  ASSERT_TRUE(
+      FftPopulateState(&state, sizeof(kFakeWindow) / sizeof(kFakeWindow[0])));
+
+  FftInit(&state);
+  FftCompute(&state, kFakeWindow, kScaleShift);
+
+  const struct complex_int16_t expected[] = {
+      {0, 0},    {-10, 9},     {-20, 0},   {-9, -10},     {0, 25},  {-119, 119},
+      {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
+      {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
+  ASSERT_EQ(state.fft_size / 2 + 1, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i <= state.fft_size / 2; ++i) {
+    EXPECT_EQ(state.output[i].real, expected[i].real);
+    EXPECT_EQ(state.output[i].imag, expected[i].imag);
+  }
+
+  FftFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.c
new file mode 100644
index 0000000000..55494422f3
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.c
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h"
+
+#include <stdio.h>
+
+#define FIXED_POINT 16
+#include "kiss_fft.h"
+#include "tools/kiss_fftr.h"
+
+int FftPopulateState(struct FftState* state, size_t input_size) {
+  state->input_size = input_size;
+  state->fft_size = 1;
+  while (state->fft_size < state->input_size) {
+    state->fft_size <<= 1;
+  }
+
+  state->input = malloc(state->fft_size * sizeof(*state->input));
+  if (state->input == NULL) {
+    fprintf(stderr, "Failed to alloc fft input buffer\n");
+    return 0;
+  }
+
+  state->output =
+      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2);
+  if (state->output == NULL) {
+    fprintf(stderr, "Failed to alloc fft output buffer\n");
+    return 0;
+  }
+
+  // Ask kissfft how much memory it wants.
+  size_t scratch_size = 0;
+  kiss_fftr_cfg kfft_cfg =
+      kiss_fftr_alloc(state->fft_size, 0, NULL, &scratch_size);
+  if (kfft_cfg != NULL) {
+    fprintf(stderr, "Kiss memory sizing failed.\n");
+    return 0;
+  }
+  state->scratch = malloc(scratch_size);
+  if (state->scratch == NULL) {
+    fprintf(stderr, "Failed to alloc fft scratch buffer\n");
+    return 0;
+  }
+  state->scratch_size = scratch_size;
+  // Let kissfft configure the scratch space we just allocated
+  kfft_cfg = kiss_fftr_alloc(state->fft_size, 0, state->scratch, &scratch_size);
+  if (kfft_cfg != state->scratch) {
+    fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
+    return 0;
+  }
+  return 1;
+}
+
+void FftFreeStateContents(struct FftState* state) {
+  free(state->input);
+  free(state->output);
+  free(state->scratch);
+}
+
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h
new file mode 100644
index 0000000000..4935e87fc1
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Prepares and FFT for the given input size.
+int FftPopulateState(struct FftState* state, size_t input_size);
+
+// Frees any allocated buffers.
+void FftFreeStateContents(struct FftState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.c
new file mode 100644
index 0000000000..944eb1a737
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.c
@@ -0,0 +1,134 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h"
+
+#include <string.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h"
+
+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
+                                         struct complex_int16_t* fft_output,
+                                         int32_t* energy) {
+  const int end_index = state->end_index;
+  int i;
+  energy += state->start_index;
+  fft_output += state->start_index;
+  for (i = state->start_index; i < end_index; ++i) {
+    const int32_t real = fft_output->real;
+    const int32_t imag = fft_output->imag;
+    fft_output++;
+    const uint32_t mag_squared = (real * real) + (imag * imag);
+    *energy++ = mag_squared;
+  }
+}
+
+void FilterbankAccumulateChannels(struct FilterbankState* state,
+                                  const int32_t* energy) {
+  uint64_t* work = state->work;
+  uint64_t weight_accumulator = 0;
+  uint64_t unweight_accumulator = 0;
+
+  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
+  const int16_t* channel_weight_starts = state->channel_weight_starts;
+  const int16_t* channel_widths = state->channel_widths;
+
+  int num_channels_plus_1 = state->num_channels + 1;
+  int i;
+  for (i = 0; i < num_channels_plus_1; ++i) {
+    const int32_t* magnitudes = energy + *channel_frequency_starts++;
+    const int16_t* weights = state->weights + *channel_weight_starts;
+    const int16_t* unweights = state->unweights + *channel_weight_starts++;
+    const int width = *channel_widths++;
+    int j;
+    for (j = 0; j < width; ++j) {
+      weight_accumulator += *weights++ * ((uint64_t) *magnitudes);
+      unweight_accumulator += *unweights++ * ((uint64_t) *magnitudes);
+      ++magnitudes;
+    }
+    *work++ = weight_accumulator;
+    weight_accumulator = unweight_accumulator;
+    unweight_accumulator = 0;
+  }
+}
+
+static uint16_t Sqrt32(uint32_t num) {
+  if (num == 0) {
+    return 0;
+  }
+  uint32_t res = 0;
+  int max_bit_number = 32 - MostSignificantBit32(num);
+  max_bit_number |= 1;
+  uint32_t bit = 1U << (31 - max_bit_number);
+  int iterations = (31 - max_bit_number) / 2 + 1;
+  while (iterations--) {
+    if (num >= res + bit) {
+      num -= res + bit;
+      res = (res >> 1U) + bit;
+    } else {
+      res >>= 1U;
+    }
+    bit >>= 2U;
+  }
+  // Do rounding - if we have the bits.
+  if (num > res && res != 0xFFFF) {
+    ++res;
+  }
+  return res;
+}
+
+static uint32_t Sqrt64(uint64_t num) {
+  // Take a shortcut and just use 32 bit operations if the upper word is all
+  // clear. This will cause a slight off by one issue for numbers close to 2^32,
+  // but it probably isn't going to matter (and gives us a big performance win).
+  if ((num >> 32) == 0) {
+    return Sqrt32((uint32_t) num);
+  }
+  uint64_t res = 0;
+  int max_bit_number = 64 - MostSignificantBit64(num);
+  max_bit_number |= 1;
+  uint64_t bit = 1ULL << (63 - max_bit_number);
+  int iterations = (63 - max_bit_number) / 2 + 1;
+  while (iterations--) {
+    if (num >= res + bit) {
+      num -= res + bit;
+      res = (res >> 1U) + bit;
+    } else {
+      res >>= 1U;
+    }
+    bit >>= 2U;
+  }
+  // Do rounding - if we have the bits.
+  if (num > res && res != 0xFFFFFFFFLL) {
+    ++res;
+  }
+  return res;
+}
+
+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
+  const int num_channels = state->num_channels;
+  const int64_t* work = state->work + 1;
+  // Reuse the work buffer since we're fine clobbering it at this point to hold
+  // the output.
+  uint32_t* output = (uint32_t*) state->work;
+  int i;
+  for (i = 0; i < num_channels; ++i) {
+    *output++ = Sqrt64(*work++) >> scale_down_shift;
+  }
+  return (uint32_t*) state->work;
+}
+
+void FilterbankReset(struct FilterbankState* state) {
+  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h
new file mode 100644
index 0000000000..0dd9c3fa65
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h
@@ -0,0 +1,63 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+
+#define kFilterbankBits 12
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FilterbankState {
+  int num_channels;
+  int start_index;
+  int end_index;
+  int16_t* channel_frequency_starts;
+  int16_t* channel_weight_starts;
+  int16_t* channel_widths;
+  int16_t* weights;
+  int16_t* unweights;
+  uint64_t* work;
+};
+
+// Converts the relevant complex values of an FFT output into energy (the
+// square magnitude).
+void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
+                                         struct complex_int16_t* fft_output,
+                                         int32_t* energy);
+
+// Computes the mel-scale filterbank on the given energy array. Output is cached
+// internally - to fetch it, you need to call FilterbankSqrt.
+void FilterbankAccumulateChannels(struct FilterbankState* state,
+                                  const int32_t* energy);
+
+// Applies an integer square root to the 64 bit intermediate values of the
+// filterbank, and returns a pointer to them. Memory will be invalidated the
+// next time FilterbankAccumulateChannels is called.
+uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
+
+void FilterbankReset(struct FilterbankState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.c
new file mode 100644
index 0000000000..672ddd530f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.c
@@ -0,0 +1,66 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h"
+
+static void PrintArray(FILE* fp, const char* name, const int16_t* values,
+                       size_t size) {
+  fprintf(fp, "static int16_t filterbank_%s[] = {", name);
+  for (int i = 0; i < size; ++i) {
+    fprintf(fp, "%d", values[i]);
+    if (i < size - 1) {
+      fprintf(fp, ", ");
+    }
+  }
+  fprintf(fp, "};\n");
+}
+
+void FilterbankWriteMemmapPreamble(FILE* fp,
+                                   const struct FilterbankState* state) {
+  const int num_channels_plus_1 = state->num_channels + 1;
+
+  PrintArray(fp, "channel_frequency_starts", state->channel_frequency_starts,
+             num_channels_plus_1);
+  PrintArray(fp, "channel_weight_starts", state->channel_weight_starts,
+             num_channels_plus_1);
+  PrintArray(fp, "channel_widths", state->channel_widths, num_channels_plus_1);
+  int num_weights = 0;
+  int i;
+  for (i = 0; i < num_channels_plus_1; ++i) {
+    num_weights += state->channel_widths[i];
+  }
+  PrintArray(fp, "weights", state->weights, num_weights);
+  PrintArray(fp, "unweights", state->unweights, num_weights);
+
+  fprintf(fp, "static uint64_t filterbank_work[%d];\n", num_channels_plus_1);
+  fprintf(fp, "\n");
+}
+
+void FilterbankWriteMemmap(FILE* fp, const struct FilterbankState* state,
+                           const char* variable) {
+  fprintf(fp, "%s->num_channels = %d;\n", variable, state->num_channels);
+  fprintf(fp, "%s->start_index = %d;\n", variable, state->start_index);
+  fprintf(fp, "%s->end_index = %d;\n", variable, state->end_index);
+
+  fprintf(
+      fp,
+      "%s->channel_frequency_starts = filterbank_channel_frequency_starts;\n",
+      variable);
+  fprintf(fp, "%s->channel_weight_starts = filterbank_channel_weight_starts;\n",
+          variable);
+  fprintf(fp, "%s->channel_widths = filterbank_channel_widths;\n", variable);
+  fprintf(fp, "%s->weights = filterbank_weights;\n", variable);
+  fprintf(fp, "%s->unweights = filterbank_unweights;\n", variable);
+  fprintf(fp, "%s->work = filterbank_work;\n", variable);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h
new file mode 100644
index 0000000000..1ddc314df2
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h
@@ -0,0 +1,35 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_IO_H_
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void FilterbankWriteMemmapPreamble(FILE* fp,
+                                   const struct FilterbankState* state);
+void FilterbankWriteMemmap(FILE* fp, const struct FilterbankState* state,
+                           const char* variable);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_test.cc
new file mode 100644
index 0000000000..88d8de4b8f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_test.cc
@@ -0,0 +1,194 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h"
+
+#include <cstring>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kSpectrumSize = 17;
+const int kStartIndex = 1;
+const int kEndIndex = 15;
+const int32_t kEnergy[] = {-1,     181,      400,      181,      625,    28322,
+                           786769, 18000000, 40972801, 18000000, 784996, 28085,
+                           625,    181,      361,      -1,       -1};
+const uint64_t kWork[] = {1835887, 61162970173, 258694800000};
+const int kScaleShift = 0;
+
+// Test filterbank generation using scaled-down defaults.
+class FilterbankTest : public ::testing::Test {
+ protected:
+  FilterbankTest() {
+    config_.num_channels = 2;
+    config_.lower_band_limit = 8.0;
+    config_.upper_band_limit = 450.0;
+  }
+
+  struct FilterbankConfig config_;
+};
+
+TEST_F(FilterbankTest, CheckStartIndex) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  EXPECT_EQ(state.start_index, kStartIndex);
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckEndIndex) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  EXPECT_EQ(state.end_index, kEndIndex);
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckChannelFrequencyStarts) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 4, 8};
+  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i <= state.num_channels; ++i) {
+    EXPECT_EQ(state.channel_frequency_starts[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckChannelWeightStarts) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 8, 16};
+  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i <= state.num_channels; ++i) {
+    EXPECT_EQ(state.channel_weight_starts[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckChannelWidths) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {8, 8, 8};
+  ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i <= state.num_channels; ++i) {
+    EXPECT_EQ(state.channel_widths[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckWeights) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 3277, 2217, 1200, 222,  0,   0,   0,
+                              0, 3376, 2468, 1591, 744,  0,   0,   0,
+                              0, 4020, 3226, 2456, 1708, 983, 277, 0};
+  ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
+                state.channel_widths[state.num_channels],
+            sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    EXPECT_EQ(state.weights[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckUnweights) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  const int16_t expected[] = {0, 819, 1879, 2896, 3874, 0,    0,    0,
+                              0, 720, 1628, 2505, 3352, 0,    0,    0,
+                              0, 76,  870,  1640, 2388, 3113, 3819, 0};
+  ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
+                state.channel_widths[state.num_channels],
+            sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    EXPECT_EQ(state.unweights[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckConvertFftComplexToEnergy) {
+  struct FilterbankState state;
+  state.start_index = kStartIndex;
+  state.end_index = kEndIndex;
+
+  struct complex_int16_t fake_fft[] = {
+      {0, 0},    {-10, 9},     {-20, 0},   {-9, -10},     {0, 25},  {-119, 119},
+      {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
+      {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
+  int32_t* energy = reinterpret_cast<int32_t*>(fake_fft);
+  FilterbankConvertFftComplexToEnergy(&state, fake_fft, energy);
+
+  for (int i = state.start_index; i < state.end_index; ++i) {
+    EXPECT_EQ(energy[i], kEnergy[i]);
+  }
+}
+
+TEST_F(FilterbankTest, CheckAccumulateChannels) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+
+  FilterbankAccumulateChannels(&state, kEnergy);
+
+  ASSERT_EQ(state.num_channels + 1, sizeof(kWork) / sizeof(kWork[0]));
+  for (int i = 0; i <= state.num_channels; ++i) {
+    EXPECT_EQ(state.work[i], kWork[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+TEST_F(FilterbankTest, CheckSqrt) {
+  struct FilterbankState state;
+  ASSERT_TRUE(
+      FilterbankPopulateState(&config_, &state, kSampleRate, kSpectrumSize));
+  std::memcpy(state.work, kWork, sizeof(kWork));
+
+  uint32_t* scaled_filterbank = FilterbankSqrt(&state, kScaleShift);
+
+  const uint32_t expected[] = {247311, 508620};
+  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.num_channels; ++i) {
+    EXPECT_EQ(scaled_filterbank[i], expected[i]);
+  }
+
+  FilterbankFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.c
new file mode 100644
index 0000000000..53b5e45073
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.c
@@ -0,0 +1,225 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#define kFilterbankIndexAlignment 4
+#define kFilterbankChannelBlockSize 4
+
+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
+  config->num_channels = 32;
+  config->lower_band_limit = 125.0f;
+  config->upper_band_limit = 7500.0f;
+  config->output_scale_shift = 7;
+}
+
+static float FreqToMel(float freq) {
+  return 1127.0 * log(1.0 + (freq / 700.0));
+}
+
+static void CalculateCenterFrequencies(const int num_channels,
+                                       const float lower_frequency_limit,
+                                       const float upper_frequency_limit,
+                                       float* center_frequencies) {
+  assert(lower_frequency_limit >= 0.0f);
+  assert(upper_frequency_limit > lower_frequency_limit);
+
+  const float mel_low = FreqToMel(lower_frequency_limit);
+  const float mel_hi = FreqToMel(upper_frequency_limit);
+  const float mel_span = mel_hi - mel_low;
+  const float mel_spacing = mel_span / ((float) num_channels);
+  int i;
+  for (i = 0; i < num_channels; ++i) {
+    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
+  }
+}
+
+static void QuantizeFilterbankWeights(const float float_weight,
+                                      int16_t* weight, int16_t* unweight) {
+  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
+  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
+}
+
+int FilterbankPopulateState(const struct FilterbankConfig* config,
+                            struct FilterbankState* state,
+                            int sample_rate, int spectrum_size) {
+  state->num_channels = config->num_channels;
+  const int num_channels_plus_1 = config->num_channels + 1;
+
+  // How should we align things to index counts given the byte alignment?
+  const int index_alignment =
+      (kFilterbankIndexAlignment < sizeof(int16_t)
+           ? 1
+           : kFilterbankIndexAlignment / sizeof(int16_t));
+
+  state->channel_frequency_starts =
+      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
+  state->channel_weight_starts =
+      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
+  state->channel_widths =
+      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
+  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
+
+  float* center_mel_freqs =
+      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
+  int16_t* actual_channel_starts =
+      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
+  int16_t* actual_channel_widths =
+      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
+
+  if (state->channel_frequency_starts == NULL ||
+      state->channel_weight_starts == NULL ||
+      state->channel_widths == NULL ||
+      center_mel_freqs == NULL ||
+      actual_channel_starts == NULL ||
+      actual_channel_widths == NULL) {
+    free(center_mel_freqs);
+    free(actual_channel_starts);
+    free(actual_channel_widths);
+    fprintf(stderr, "Failed to allocate channel buffers\n");
+    return 0;
+  }
+
+  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
+                             config->upper_band_limit, center_mel_freqs);
+
+  // Always exclude DC.
+  const float hz_per_sbin = 0.5 * sample_rate / ((float) spectrum_size - 1);
+  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
+  state->end_index = 0;  // Initialized to zero here, but actually set below.
+
+  // For each channel, we need to figure out what frequencies belong to it, and
+  // how much padding we need to add so that we can efficiently multiply the
+  // weights and unweights for accumulation. To simplify the multiplication
+  // logic, all channels will have some multiplication to do (even if there are
+  // no frequencies that accumulate to that channel) - they will be directed to
+  // a set of zero weights.
+  int chan_freq_index_start = state->start_index;
+  int weight_index_start = 0;
+  int needs_zeros = 0;
+
+  int chan;
+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
+    // Keep jumping frequencies until we overshoot the bound on this channel.
+    int freq_index = chan_freq_index_start;
+    while (FreqToMel((freq_index) * hz_per_sbin) <= center_mel_freqs[chan]) {
+      ++freq_index;
+    }
+
+    const int width = freq_index - chan_freq_index_start;
+    actual_channel_starts[chan] = chan_freq_index_start;
+    actual_channel_widths[chan] = width;
+
+    if (width == 0) {
+      // This channel doesn't actually get anything from the frequencies, it's
+      // always zero. We need then to insert some 'zero' weights into the
+      // output, and just redirect this channel to do a single multiplication at
+      // this point. For simplicity, the zeros are placed at the beginning of
+      // the weights arrays, so we have to go and update all the other
+      // weight_starts to reflect this shift (but only once).
+      state->channel_frequency_starts[chan] = 0;
+      state->channel_weight_starts[chan] = 0;
+      state->channel_widths[chan] = kFilterbankChannelBlockSize;
+      if (!needs_zeros) {
+        needs_zeros = 1;
+        int j;
+        for (j = 0; j < chan; ++j) {
+          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
+        }
+        weight_index_start += kFilterbankChannelBlockSize;
+      }
+    } else {
+      // How far back do we need to go to ensure that we have the proper
+      // alignment?
+      const int aligned_start =
+          (chan_freq_index_start / index_alignment) * index_alignment;
+      const int aligned_width =
+          (chan_freq_index_start - aligned_start + width);
+      const int padded_width =
+          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
+          kFilterbankChannelBlockSize;
+
+      state->channel_frequency_starts[chan] = aligned_start;
+      state->channel_weight_starts[chan] = weight_index_start;
+      state->channel_widths[chan] = padded_width;
+      weight_index_start += padded_width;
+    }
+    chan_freq_index_start = freq_index;
+  }
+
+  // Allocate the two arrays to store the weights - weight_index_start contains
+  // the index of what would be the next set of weights that we would need to
+  // add, so that's how many weights we need to allocate.
+  state->weights = calloc(weight_index_start, sizeof(*state->weights));
+  state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
+
+  // If the alloc failed, we also need to nuke the arrays.
+  if (state->weights == NULL || state->unweights == NULL) {
+    free(center_mel_freqs);
+    free(actual_channel_starts);
+    free(actual_channel_widths);
+    fprintf(stderr, "Failed to allocate weights or unweights\n");
+    return 0;
+  }
+
+  // Next pass, compute all the weights. Since everything has been memset to
+  // zero, we only need to fill in the weights that correspond to some frequency
+  // for a channel.
+  const float mel_low = FreqToMel(config->lower_band_limit);
+  for (chan = 0; chan < num_channels_plus_1; ++chan) {
+    int frequency = actual_channel_starts[chan];
+    const int num_frequencies = actual_channel_widths[chan];
+    const int frequency_offset =
+        frequency - state->channel_frequency_starts[chan];
+    const int weight_start = state->channel_weight_starts[chan];
+    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
+
+    int j;
+    for (j = 0; j < num_frequencies; ++j, ++frequency) {
+      const float weight =
+          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
+          (center_mel_freqs[chan] - denom_val);
+
+      // Make the float into an integer for the weights (and unweights).
+      const int weight_index = weight_start + frequency_offset + j;
+      QuantizeFilterbankWeights(weight, state->weights + weight_index,
+                                state->unweights + weight_index);
+    }
+    if (frequency > state->end_index) {
+      state->end_index = frequency;
+    }
+  }
+
+  free(center_mel_freqs);
+  free(actual_channel_starts);
+  free(actual_channel_widths);
+  if (state->end_index >= spectrum_size) {
+    fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
+    return 0;
+  }
+  return 1;
+}
+
+void FilterbankFreeStateContents(struct FilterbankState* state) {
+  free(state->channel_frequency_starts);
+  free(state->channel_weight_starts);
+  free(state->channel_widths);
+  free(state->weights);
+  free(state->unweights);
+  free(state->work);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h
new file mode 100644
index 0000000000..9ec9bc9302
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h
@@ -0,0 +1,50 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FilterbankConfig {
+  // number of frequency channel buckets for filterbank
+  int num_channels;
+  // maximum frequency to include
+  float upper_band_limit;
+  // minimum frequency to include
+  float lower_band_limit;
+  // unused
+  int output_scale_shift;
+};
+
+// Fills the frontendConfig with "sane" defaults.
+void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
+
+// Allocates any buffers.
+int FilterbankPopulateState(const struct FilterbankConfig* config,
+                            struct FilterbankState* state, int sample_rate,
+                            int spectrum_size);
+
+// Frees any allocated buffers.
+void FilterbankFreeStateContents(struct FilterbankState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.c
new file mode 100644
index 0000000000..de7a60b56f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.c
@@ -0,0 +1,72 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h"
+
+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
+                                             const int16_t* samples,
+                                             size_t num_samples,
+                                             size_t* num_samples_read) {
+  struct FrontendOutput output;
+  output.values = NULL;
+  output.size = 0;
+
+  // Try to apply the window - if it fails, return and wait for more data.
+  if (!WindowProcessSamples(&state->window, samples, num_samples,
+                            num_samples_read)) {
+    return output;
+  }
+
+  // Apply the FFT to the window's output (and scale it so that the fixed point
+  // FFT can have as much resolution as possible).
+  int input_shift =
+      15 - MostSignificantBit32(state->window.max_abs_output_value);
+  FftCompute(&state->fft, state->window.output, input_shift);
+
+  // We can re-ruse the fft's output buffer to hold the energy.
+  int32_t* energy = (int32_t*) state->fft.output;
+
+  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
+                                      energy);
+
+  FilterbankAccumulateChannels(&state->filterbank, energy);
+  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
+
+  // Apply noise reduction.
+  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
+
+  if (state->pcan_gain_control.enable_pcan) {
+    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
+  }
+
+  // Apply the log and scale.
+  int correction_bits =
+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
+  uint16_t* logged_filterbank =
+      LogScaleApply(&state->log_scale, scaled_filterbank,
+                    state->filterbank.num_channels, correction_bits);
+
+  output.size = state->filterbank.num_channels;
+  output.values = logged_filterbank;
+  return output;
+}
+
+void FrontendReset(struct FrontendState* state) {
+  WindowReset(&state->window);
+  FftReset(&state->fft);
+  FilterbankReset(&state->filterbank);
+  NoiseReductionReset(&state->noise_reduction);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h
new file mode 100644
index 0000000000..71ae81024c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h
@@ -0,0 +1,64 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FrontendState {
+  struct WindowState window;
+  struct FftState fft;
+  struct FilterbankState filterbank;
+  struct NoiseReductionState noise_reduction;
+  struct PcanGainControlState pcan_gain_control;
+  struct LogScaleState log_scale;
+};
+
+struct FrontendOutput {
+  const uint16_t* values;
+  size_t size;
+};
+
+// Main entry point to processing frontend samples. Updates num_samples_read to
+// contain the number of samples that have been consumed from the input array.
+// Returns a struct containing the generated output. If not enough samples were
+// added to generate a feature vector, the returned size will be 0 and the
+// values pointer will be NULL. Note that the output pointer will be invalidated
+// as soon as FrontendProcessSamples is called again, so copy the contents
+// elsewhere if you need to use them later.
+struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
+                                             const int16_t* samples,
+                                             size_t num_samples,
+                                             size_t* num_samples_read);
+
+void FrontendReset(struct FrontendState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.c
new file mode 100644
index 0000000000..40bcf24749
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.c
@@ -0,0 +1,69 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h"
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft_io.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_io.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h"
+
+int WriteFrontendStateMemmap(const char* header, const char* source,
+                             const struct FrontendState* state) {
+  // Write a header that just has our init function.
+  FILE* fp = fopen(header, "w");
+  if (!fp) {
+    fprintf(stderr, "Failed to open header '%s' for write\n", header);
+    return 0;
+  }
+  fprintf(fp, "#ifndef FRONTEND_STATE_MEMMAP_H_\n");
+  fprintf(fp, "#define FRONTEND_STATE_MEMMAP_H_\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "#include \"frontend.h\"\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "struct FrontendState* GetFrontendStateMemmap();\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "#endif  // FRONTEND_STATE_MEMMAP_H_\n");
+  fclose(fp);
+
+  // Write out the source file that actually has everything in it.
+  fp = fopen(source, "w");
+  if (!fp) {
+    fprintf(stderr, "Failed to open source '%s' for write\n", source);
+    return 0;
+  }
+  fprintf(fp, "#include \"%s\"\n", header);
+  fprintf(fp, "\n");
+  WindowWriteMemmapPreamble(fp, &state->window);
+  FftWriteMemmapPreamble(fp, &state->fft);
+  FilterbankWriteMemmapPreamble(fp, &state->filterbank);
+  NoiseReductionWriteMemmapPreamble(fp, &state->noise_reduction);
+  fprintf(fp, "static struct FrontendState state;\n");
+  fprintf(fp, "struct FrontendState* GetFrontendStateMemmap() {\n");
+  WindowWriteMemmap(fp, &state->window, "  (&state.window)");
+  FftWriteMemmap(fp, &state->fft, "  (&state.fft)");
+  FilterbankWriteMemmap(fp, &state->filterbank, "  (&state.filterbank)");
+  NoiseReductionWriteMemmap(fp, &state->noise_reduction,
+                            "  (&state.noise_reduction)");
+  LogScaleWriteMemmap(fp, &state->log_scale, "  (&state.log_scale)");
+  fprintf(fp, "  FftInit(&state.fft);\n");
+  fprintf(fp, "  FrontendReset(&state);\n");
+  fprintf(fp, "  return &state;\n");
+  fprintf(fp, "}\n");
+  fclose(fp);
+  return 1;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h
new file mode 100644
index 0000000000..4f45577cae
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h
@@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_IO_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int WriteFrontendStateMemmap(const char* header, const char* source,
+                             const struct FrontendState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_main.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_main.c
new file mode 100644
index 0000000000..46caebeec9
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_main.c
@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h"
+
+int main(int argc, char** argv) {
+  struct FrontendConfig frontend_config;
+  FrontendFillConfigWithDefaults(&frontend_config);
+
+  char* filename = argv[1];
+  int sample_rate = 16000;
+
+  struct FrontendState frontend_state;
+  if (!FrontendPopulateState(&frontend_config, &frontend_state, sample_rate)) {
+    fprintf(stderr, "Failed to populate frontend state\n");
+    FrontendFreeStateContents(&frontend_state);
+    return 1;
+  }
+
+
+  FILE* fp = fopen(filename, "r");
+  if (fp == NULL) {
+    fprintf(stderr, "Failed to open %s for read\n", filename);
+    return 1;
+  }
+  fseek(fp, 0L, SEEK_END);
+  size_t audio_file_size = ftell(fp) / sizeof(int16_t);
+  fseek(fp, 0L, SEEK_SET);
+  int16_t* audio_data = malloc(audio_file_size * sizeof(int16_t));
+  int16_t* original_audio_data = audio_data;
+  if (audio_file_size !=
+      fread(audio_data, sizeof(int16_t), audio_file_size, fp)) {
+    fprintf(stderr, "Failed to read in all audio data\n");
+    return 1;
+  }
+
+  while (audio_file_size > 0) {
+    size_t num_samples_read;
+    struct FrontendOutput output = FrontendProcessSamples(
+        &frontend_state, audio_data, audio_file_size, &num_samples_read);
+    audio_data += num_samples_read;
+    audio_file_size -= num_samples_read;
+
+    if (output.values != NULL) {
+      int i;
+      for (i = 0; i < output.size; ++i) {
+        printf("%d ", output.values[i]);
+      }
+      printf("\n");
+    }
+  }
+
+  FrontendFreeStateContents(&frontend_state);
+  free(original_audio_data);
+  return 0;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_generator.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_generator.c
new file mode 100644
index 0000000000..a4c59b0ccc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_generator.c
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_io.h"
+
+int main(int argc, char** argv) {
+  if (argc != 3) {
+    fprintf(stderr,
+            "%s requires exactly two parameters - the names of the header and "
+            "source files to save\n");
+    return 1;
+  }
+  struct FrontendConfig frontend_config;
+  FrontendFillConfigWithDefaults(&frontend_config);
+
+  int sample_rate = 16000;
+  struct FrontendState frontend_state;
+  if (!FrontendPopulateState(&frontend_config, &frontend_state, sample_rate)) {
+    fprintf(stderr, "Failed to populate frontend state\n");
+    FrontendFreeStateContents(&frontend_state);
+    return 1;
+  }
+
+  if (!WriteFrontendStateMemmap(argv[1], argv[2], &frontend_state)) {
+    fprintf(stderr, "Failed to write memmap\n");
+    FrontendFreeStateContents(&frontend_state);
+    return 1;
+  }
+
+  FrontendFreeStateContents(&frontend_state);
+  return 0;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_main.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_main.c
new file mode 100644
index 0000000000..a4264922b9
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_memmap_main.c
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+#include "memmap.h"
+
+int main(int argc, char** argv) {
+  struct FrontendState* frontend_state = GetFrontendStateMemmap();
+
+  char* filename = argv[1];
+  FILE* fp = fopen(filename, "r");
+  if (fp == NULL) {
+    fprintf(stderr, "Failed to open %s for read\n", filename);
+    return 1;
+  }
+  fseek(fp, 0L, SEEK_END);
+  size_t audio_file_size = ftell(fp) / sizeof(int16_t);
+  fseek(fp, 0L, SEEK_SET);
+  int16_t* audio_data = malloc(audio_file_size * sizeof(int16_t));
+  int16_t* original_audio_data = audio_data;
+  if (audio_file_size !=
+      fread(audio_data, sizeof(int16_t), audio_file_size, fp)) {
+    fprintf(stderr, "Failed to read in all audio data\n");
+    return 1;
+  }
+
+  while (audio_file_size > 0) {
+    size_t num_samples_read;
+    struct FrontendOutput output = FrontendProcessSamples(
+        frontend_state, audio_data, audio_file_size, &num_samples_read);
+    audio_data += num_samples_read;
+    audio_file_size -= num_samples_read;
+
+    if (output.values != NULL) {
+      int i;
+      for (i = 0; i < output.size; ++i) {
+        printf("%d ", output.values[i]);
+      }
+      printf("\n");
+    }
+  }
+
+  free(original_audio_data);
+  return 0;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_test.cc
new file mode 100644
index 0000000000..f06e2565c2
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kWindowSamples = 25;
+const int kStepSamples = 10;
+const int16_t kFakeAudioData[] = {
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
+
+// Test end-to-end frontend behaviors.
+class FrontendTest : public ::testing::Test {
+ protected:
+  FrontendTest() {
+    config_.window.size_ms = 25;
+    config_.window.step_size_ms = 10;
+    config_.noise_reduction.smoothing_bits = 10;
+    config_.filterbank.num_channels = 2;
+    config_.filterbank.lower_band_limit = 8.0;
+    config_.filterbank.upper_band_limit = 450.0;
+    config_.noise_reduction.smoothing_bits = 10;
+    config_.noise_reduction.even_smoothing = 0.025;
+    config_.noise_reduction.odd_smoothing = 0.06;
+    config_.noise_reduction.min_signal_remaining = 0.05;
+    config_.pcan_gain_control.enable_pcan = true;
+    config_.pcan_gain_control.strength = 0.95;
+    config_.pcan_gain_control.offset = 80.0;
+    config_.pcan_gain_control.gain_bits = 21;
+    config_.log_scale.enable_log = true;
+    config_.log_scale.scale_shift = 6;
+  }
+
+  struct FrontendConfig config_;
+};
+
+TEST_F(FrontendTest, CheckOutputValues) {
+  struct FrontendState state;
+  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read);
+
+  const uint16_t expected[] = {479, 425};
+  ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < output.size; ++i) {
+    EXPECT_EQ(output.values[i], expected[i]);
+  }
+
+  FrontendFreeStateContents(&state);
+}
+
+TEST_F(FrontendTest, CheckConsecutiveWindow) {
+  struct FrontendState state;
+  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  FrontendProcessSamples(&state, kFakeAudioData,
+                         sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]),
+                         &num_samples_read);
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read);
+
+  const int16_t expected[] = {436, 378};
+  ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < output.size; ++i) {
+    EXPECT_EQ(output.values[i], expected[i]);
+  }
+
+  FrontendFreeStateContents(&state);
+}
+
+TEST_F(FrontendTest, CheckNotEnoughSamples) {
+  struct FrontendState state;
+  ASSERT_TRUE(FrontendPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  FrontendProcessSamples(&state, kFakeAudioData,
+                         sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]),
+                         &num_samples_read);
+  FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read);
+  struct FrontendOutput output = FrontendProcessSamples(
+      &state, kFakeAudioData + kWindowSamples + kStepSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples -
+          kStepSamples,
+      &num_samples_read);
+
+  EXPECT_EQ(output.size, 0);
+  EXPECT_EQ(output.values, nullptr);
+
+  FrontendFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.c
new file mode 100644
index 0000000000..ae2d9ae6c4
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.c
@@ -0,0 +1,87 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h"
+
+void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
+  WindowFillConfigWithDefaults(&config->window);
+  FilterbankFillConfigWithDefaults(&config->filterbank);
+  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
+  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
+  LogScaleFillConfigWithDefaults(&config->log_scale);
+}
+
+int FrontendPopulateState(const struct FrontendConfig* config,
+                          struct FrontendState* state, int sample_rate) {
+  memset(state, 0, sizeof(*state));
+
+  if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
+    fprintf(stderr, "Failed to populate window state\n");
+    return 0;
+  }
+
+  if (!FftPopulateState(&state->fft, state->window.size)) {
+    fprintf(stderr, "Failed to populate fft state\n");
+    return 0;
+  }
+  FftInit(&state->fft);
+
+  if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
+                               sample_rate, state->fft.fft_size / 2 + 1)) {
+    fprintf(stderr, "Failed to populate filterbank state\n");
+    return 0;
+  }
+
+  if (!NoiseReductionPopulateState(&config->noise_reduction,
+                                   &state->noise_reduction,
+                                   state->filterbank.num_channels)) {
+    fprintf(stderr, "Failed to populate noise reduction state\n");
+    return 0;
+  }
+
+  int input_correction_bits =
+      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
+  if (!PcanGainControlPopulateState(&config->pcan_gain_control,
+                                    &state->pcan_gain_control,
+                                    state->noise_reduction.estimate,
+                                    state->filterbank.num_channels,
+                                    state->noise_reduction.smoothing_bits,
+                                    input_correction_bits)) {
+    fprintf(stderr, "Failed to populate pcan gain control state\n");
+    return 0;
+  }
+
+  if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
+    fprintf(stderr, "Failed to populate log scale state\n");
+    return 0;
+  }
+
+  FrontendReset(state);
+
+  // All good, return a true value.
+  return 1;
+}
+
+void FrontendFreeStateContents(struct FrontendState* state) {
+  WindowFreeStateContents(&state->window);
+  FftFreeStateContents(&state->fft);
+  FilterbankFreeStateContents(&state->filterbank);
+  NoiseReductionFreeStateContents(&state->noise_reduction);
+  PcanGainControlFreeStateContents(&state->pcan_gain_control);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h
new file mode 100644
index 0000000000..a958b610ea
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/frontend_util.h
@@ -0,0 +1,52 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/fft_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/filterbank_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/frontend.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FrontendConfig {
+  struct WindowConfig window;
+  struct FilterbankConfig filterbank;
+  struct NoiseReductionConfig noise_reduction;
+  struct PcanGainControlConfig pcan_gain_control;
+  struct LogScaleConfig log_scale;
+};
+
+// Fills the frontendConfig with "sane" defaults.
+void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
+
+// Allocates any buffers.
+int FrontendPopulateState(const struct FrontendConfig* config,
+                          struct FrontendState* state, int sample_rate);
+
+// Frees any allocated buffers.
+void FrontendFreeStateContents(struct FrontendState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.c
new file mode 100644
index 0000000000..f8d3210233
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.c
@@ -0,0 +1,30 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h"
+const uint16_t kLogLut[]
+#ifndef _MSC_VER
+    __attribute__((aligned(4)))
+#endif  // _MSV_VER
+    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
+       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
+       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
+       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
+       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
+       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
+       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
+       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
+       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
+       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
+       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h
new file mode 100644
index 0000000000..53dd1fa405
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h
@@ -0,0 +1,40 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Number of segments in the log lookup table. The table will be kLogSegments+1
+// in length (with some padding).
+#define kLogSegments 128
+#define kLogSegmentsLog2 7
+
+// Scale used by lookup table.
+#define kLogScale 65536
+#define kLogScaleLog2 16
+#define kLogCoeff 45426
+
+extern const uint16_t kLogLut[];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.c
new file mode 100644
index 0000000000..4b12461871
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.c
@@ -0,0 +1,83 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h"
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_lut.h"
+
+#define kuint16max 0x0000FFFF
+
+// The following functions implement integer logarithms of various sizes. The
+// approximation is calculated according to method described in
+//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
+//       publicaciones/SPL2007/Log10-spl07.pdf
+// It first calculates log2 of the input and then converts it to natural
+// logarithm.
+
+static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
+  // Part 1
+  int32_t frac = x - (1LL << log2x);
+  if (log2x < kLogScaleLog2) {
+    frac <<= kLogScaleLog2 - log2x;
+  } else {
+    frac >>= log2x - kLogScaleLog2;
+  }
+  // Part 2
+  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
+  const uint32_t seg_unit =
+      (((uint32_t) 1) << kLogScaleLog2) >> kLogSegmentsLog2;
+
+  const int32_t c0 = kLogLut[base_seg];
+  const int32_t c1 = kLogLut[base_seg + 1];
+  const int32_t seg_base = seg_unit * base_seg;
+  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
+  return frac + c0 + rel_pos;
+}
+
+static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
+  const uint32_t integer = MostSignificantBit32(x) - 1;
+  const uint32_t fraction = Log2FractionPart(x, integer);
+  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
+  const uint32_t round = kLogScale / 2;
+  const uint32_t loge =
+      (((uint64_t) kLogCoeff) * log2 + round) >> kLogScaleLog2;
+  // Finally scale to our output scale
+  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
+  return loge_scaled;
+}
+
+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
+                        int signal_size, int correction_bits) {
+  const int scale_shift = state->scale_shift;
+  uint16_t* output = (uint16_t*) signal;
+  uint16_t* ret = output;
+  for (int i = 0; i < signal_size; ++i) {
+    uint32_t value = *signal++;
+    if (state->enable_log) {
+      if (correction_bits < 0) {
+        value >>= -correction_bits;
+      } else {
+        value <<= correction_bits;
+      }
+      if (value > 1) {
+        value = Log(value, scale_shift);
+      } else {
+        value = 0;
+      }
+    }
+    *output++ = (value < kuint16max) ? value : kuint16max;
+  }
+  return ret;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h
new file mode 100644
index 0000000000..8fd6099933
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h
@@ -0,0 +1,39 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct LogScaleState {
+  int enable_log;
+  int scale_shift;
+};
+
+// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
+// that the signal array will be modified.
+uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
+                        int signal_size, int correction_bits);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.c
new file mode 100644
index 0000000000..f59cde951c
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.c
@@ -0,0 +1,21 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h"
+
+void LogScaleWriteMemmap(FILE* fp, const struct LogScaleState* state,
+                         const char* variable) {
+  fprintf(fp, "%s->enable_log = %d;\n", variable, state->enable_log);
+  fprintf(fp, "%s->scale_shift = %d;\n", variable, state->scale_shift);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h
new file mode 100644
index 0000000000..5444303b24
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_io.h
@@ -0,0 +1,33 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_IO_H_
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LogScaleWriteMemmap(FILE* fp, const struct LogScaleState* state,
+                         const char* variable);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_test.cc
new file mode 100644
index 0000000000..312d7ea740
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_test.cc
@@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kScaleShift = 6;
+const int kCorrectionBits = -1;
+
+TEST(LogScaleTest, CheckOutputValues) {
+  struct LogScaleState state;
+  state.enable_log = true;
+  state.scale_shift = kScaleShift;
+
+  uint32_t fake_signal[] = {3578, 1533};
+  uint16_t* output = LogScaleApply(&state, fake_signal,
+                                   sizeof(fake_signal) / sizeof(fake_signal[0]),
+                                   kCorrectionBits);
+
+  const uint16_t expected[] = {479, 425};
+  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    EXPECT_EQ(output[i], expected[i]);
+  }
+}
+
+TEST(LogScaleTest, CheckOutputValuesNoLog) {
+  struct LogScaleState state;
+  state.enable_log = false;
+  state.scale_shift = kScaleShift;
+
+  uint32_t fake_signal[] = {85964, 45998};
+  uint16_t* output = LogScaleApply(&state, fake_signal,
+                                   sizeof(fake_signal) / sizeof(fake_signal[0]),
+                                   kCorrectionBits);
+
+  const uint16_t expected[] = {65535, 45998};
+  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+    EXPECT_EQ(output[i], expected[i]);
+  }
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.c
new file mode 100644
index 0000000000..8a025fbf72
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.c
@@ -0,0 +1,27 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h"
+
+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
+  config->enable_log = 1;
+  config->scale_shift = 6;
+}
+
+int LogScalePopulateState(const struct LogScaleConfig* config,
+                          struct LogScaleState* state) {
+  state->enable_log = config->enable_log;
+  state->scale_shift = config->scale_shift;
+  return 1;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h
new file mode 100644
index 0000000000..33b21f30b1
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale_util.h
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/log_scale.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct LogScaleConfig {
+  // set to false (0) to disable this module
+  int enable_log;
+  // scale results by 2^(scale_shift)
+  int scale_shift;
+};
+
+// Populates the LogScaleConfig with "sane" default values.
+void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
+
+// Allocates any buffers.
+int LogScalePopulateState(const struct LogScaleConfig* config,
+                          struct LogScaleState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.c
new file mode 100644
index 0000000000..92f8b58d74
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.c
@@ -0,0 +1,51 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h"
+
+#include <string.h>
+
+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
+  int i;
+  for (i = 0; i < state->num_channels; ++i) {
+    const uint32_t smoothing =
+        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
+    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
+
+    // Update the estimate of the noise.
+    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
+    uint32_t estimate =
+        (((uint64_t) signal_scaled_up * smoothing) +
+         ((uint64_t) state->estimate[i] * one_minus_smoothing)) >>
+        kNoiseReductionBits;
+    state->estimate[i] = estimate;
+
+    // Make sure that we can't get a negative value for the signal - estimate.
+    if (estimate > signal_scaled_up) {
+      estimate = signal_scaled_up;
+    }
+
+    const uint32_t floor =
+        ((uint64_t) signal[i] * state->min_signal_remaining) >>
+        kNoiseReductionBits;
+    const uint32_t subtracted = (signal_scaled_up - estimate) >>
+        state->smoothing_bits;
+    const uint32_t output = subtracted > floor ? subtracted : floor;
+    signal[i] = output;
+  }
+}
+
+void NoiseReductionReset(struct NoiseReductionState* state) {
+  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h
new file mode 100644
index 0000000000..cc2cf2d9b7
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
+
+#define kNoiseReductionBits 14
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct NoiseReductionState {
+  int smoothing_bits;
+  uint16_t even_smoothing;
+  uint16_t odd_smoothing;
+  uint16_t min_signal_remaining;
+  int num_channels;
+  uint32_t* estimate;
+};
+
+// Removes stationary noise from each channel of the signal using a low pass
+// filter.
+void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
+
+void NoiseReductionReset(struct NoiseReductionState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.c
new file mode 100644
index 0000000000..1cba410436
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.c
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h"
+
+void NoiseReductionWriteMemmapPreamble(
+    FILE* fp, const struct NoiseReductionState* state) {
+  fprintf(fp, "static uint32_t noise_reduction_estimate[%zu];\n",
+          state->num_channels);
+  fprintf(fp, "\n");
+}
+
+void NoiseReductionWriteMemmap(FILE* fp,
+                               const struct NoiseReductionState* state,
+                               const char* variable) {
+  fprintf(fp, "%s->even_smoothing = %d;\n", variable, state->even_smoothing);
+  fprintf(fp, "%s->odd_smoothing = %d;\n", variable, state->odd_smoothing);
+  fprintf(fp, "%s->min_signal_remaining = %d;\n", variable,
+          state->min_signal_remaining);
+  fprintf(fp, "%s->num_channels = %d;\n", variable, state->num_channels);
+
+  fprintf(fp, "%s->estimate = noise_reduction_estimate;\n", variable);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h
new file mode 100644
index 0000000000..afeedfce99
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_io.h
@@ -0,0 +1,36 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_IO_H_
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void NoiseReductionWriteMemmapPreamble(FILE* fp,
+                                       const struct NoiseReductionState* state);
+void NoiseReductionWriteMemmap(FILE* fp,
+                               const struct NoiseReductionState* state,
+                               const char* variable);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_test.cc
new file mode 100644
index 0000000000..f4cf486227
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_test.cc
@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kNumChannels = 2;
+
+// Test noise reduction using default config values.
+class NoiseReductionTest : public ::testing::Test {
+ protected:
+  NoiseReductionTest() {
+    config_.smoothing_bits = 10;
+    config_.even_smoothing = 0.025;
+    config_.odd_smoothing = 0.06;
+    config_.min_signal_remaining = 0.05;
+  }
+
+  struct NoiseReductionConfig config_;
+};
+
+TEST_F(NoiseReductionTest, TestNoiseReductionEstimate) {
+  struct NoiseReductionState state;
+  ASSERT_TRUE(NoiseReductionPopulateState(&config_, &state, kNumChannels));
+
+  uint32_t signal[] = {247311, 508620};
+  NoiseReductionApply(&state, signal);
+
+  const uint32_t expected[] = {6321887, 31248341};
+  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.num_channels; ++i) {
+    EXPECT_EQ(state.estimate[i], expected[i]);
+  }
+
+  NoiseReductionFreeStateContents(&state);
+}
+
+TEST_F(NoiseReductionTest, TestNoiseReduction) {
+  struct NoiseReductionState state;
+  ASSERT_TRUE(NoiseReductionPopulateState(&config_, &state, kNumChannels));
+
+  uint32_t signal[] = {247311, 508620};
+  NoiseReductionApply(&state, signal);
+
+  const uint32_t expected[] = {241137, 478104};
+  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.num_channels; ++i) {
+    EXPECT_EQ(signal[i], expected[i]);
+  }
+
+  NoiseReductionFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.c
new file mode 100644
index 0000000000..46f475352e
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.c
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h"
+
+#include <stdio.h>
+
+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
+  config->smoothing_bits = 10;
+  config->even_smoothing = 0.025;
+  config->odd_smoothing = 0.06;
+  config->min_signal_remaining = 0.05;
+}
+
+int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
+                                struct NoiseReductionState* state,
+                                int num_channels) {
+  state->smoothing_bits = config->smoothing_bits;
+  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
+  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
+  state->min_signal_remaining =
+      config->min_signal_remaining * (1 << kNoiseReductionBits);
+  state->num_channels = num_channels;
+  state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
+  if (state->estimate == NULL) {
+    fprintf(stderr, "Failed to alloc estimate buffer\n");
+    return 0;
+  }
+  return 1;
+}
+
+void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
+  free(state->estimate);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h
new file mode 100644
index 0000000000..207b8a679d
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction_util.h
@@ -0,0 +1,50 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/noise_reduction.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct NoiseReductionConfig {
+  // scale the signal up by 2^(smoothing_bits) before reduction
+  int smoothing_bits;
+  // smoothing coefficient for even-numbered channels
+  float even_smoothing;
+  // smoothing coefficient for odd-numbered channels
+  float odd_smoothing;
+  // fraction of signal to preserve (1.0 disables this module)
+  float min_signal_remaining;
+};
+
+// Populates the NoiseReductionConfig with "sane" default values.
+void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
+
+// Allocates any buffers.
+int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
+                                struct NoiseReductionState* state,
+                                int num_channels);
+
+// Frees any allocated buffers.
+void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.c
new file mode 100644
index 0000000000..551d552e8f
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.c
@@ -0,0 +1,56 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h"
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/bits.h"
+
+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
+  if (x <= 2) {
+    return lut[x];
+  }
+
+  const int16_t interval = MostSignificantBit32(x);
+  lut += 4 * interval - 6;
+
+  const int16_t frac = ((interval < 11)
+                        ? (x << (11 - interval))
+                        : (x >> (interval - 11))
+                       ) & 0x3FF;
+
+  int32_t result = ((int32_t) lut[2] * frac) >> 5;
+  result += ((int32_t) lut[1]) << 5;
+  result *= frac;
+  result = (result + (1 << 14)) >> 15;
+  result += lut[0];
+  return (int16_t) result;
+}
+
+uint32_t PcanShrink(const uint32_t x) {
+  if (x < (2 << kPcanSnrBits)) {
+    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
+  } else {
+    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
+  }
+}
+
+void PcanGainControlApply(struct PcanGainControlState* state,
+                          uint32_t* signal) {
+  for (int i = 0; i < state->num_channels; ++i) {
+    const uint32_t gain = WideDynamicFunction(state->noise_estimate[i],
+                                              state->gain_lut);
+    const uint32_t snr = ((uint64_t) signal[i] * gain) >> state->snr_shift;
+    signal[i] = PcanShrink(snr);
+  }
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h
new file mode 100644
index 0000000000..cab74f49db
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define kPcanSnrBits 12
+#define kPcanOutputBits 6
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct PcanGainControlState {
+  int enable_pcan;
+  uint32_t* noise_estimate;
+  int num_channels;
+  int16_t* gain_lut;
+  int32_t snr_shift;
+};
+
+int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
+
+uint32_t PcanShrink(const uint32_t x);
+
+void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
new file mode 100644
index 0000000000..bbc36d6eac
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
@@ -0,0 +1,59 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kNumChannels = 2;
+const int kSmoothingBits = 10;
+const int kCorrectionBits = -1;
+
+// Test pcan auto gain control using default config values.
+class PcanGainControlTest : public ::testing::Test {
+ protected:
+  PcanGainControlTest() {
+    config_.enable_pcan = 1;
+    config_.strength = 0.95;
+    config_.offset = 80.0;
+    config_.gain_bits = 21;
+  }
+
+  struct PcanGainControlConfig config_;
+};
+
+TEST_F(PcanGainControlTest, TestPcanGainControl) {
+  uint32_t estimate[] = {6321887, 31248341};
+  struct PcanGainControlState state;
+  ASSERT_TRUE(PcanGainControlPopulateState(&config_, &state, estimate,
+                                           kNumChannels, kSmoothingBits,
+                                           kCorrectionBits));
+
+  uint32_t signal[] = {241137, 478104};
+  PcanGainControlApply(&state, signal);
+
+  const uint32_t expected[] = {3578, 1533};
+  ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.num_channels; ++i) {
+    EXPECT_EQ(signal[i], expected[i]);
+  }
+
+  PcanGainControlFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
new file mode 100644
index 0000000000..4226b390bc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
@@ -0,0 +1,90 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#define kint16max 0x00007FFF
+
+void PcanGainControlFillConfigWithDefaults(
+    struct PcanGainControlConfig* config) {
+  config->enable_pcan = 0;
+  config->strength = 0.95;
+  config->offset = 80.0;
+  config->gain_bits = 21;
+}
+
+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
+                               int32_t input_bits, uint32_t x) {
+  const float x_as_float = ((float) x) / ((uint32_t) 1 << input_bits);
+  const float gain_as_float = ((uint32_t) 1 << config->gain_bits) *
+      powf(x_as_float + config->offset, -config->strength);
+
+  if (gain_as_float > kint16max) {
+    return kint16max;
+  }
+  return (int16_t) (gain_as_float + 0.5f);
+}
+
+int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
+                                 struct PcanGainControlState* state,
+                                 uint32_t* noise_estimate,
+                                 const int num_channels,
+                                 const uint16_t smoothing_bits,
+                                 const int32_t input_correction_bits) {
+  state->enable_pcan = config->enable_pcan;
+  if (!state->enable_pcan) {
+    return 1;
+  }
+  state->noise_estimate = noise_estimate;
+  state->num_channels = num_channels;
+  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
+  if (state->gain_lut == NULL) {
+    fprintf(stderr, "Failed to allocate gain LUT\n");
+    return 0;
+  }
+  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
+
+  const int32_t input_bits = smoothing_bits - input_correction_bits;
+  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
+  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
+  state->gain_lut -= 6;
+  for (int interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
+    const uint32_t x0 = (uint32_t) 1 << (interval - 1);
+    const uint32_t x1 = x0 + (x0 >> 1);
+    const uint32_t x2 = (interval == kWideDynamicFunctionBits)
+        ? x0 + (x0 - 1) : 2 * x0;
+
+    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
+    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
+    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
+
+    const int32_t diff1 = (int32_t) y1 - y0;
+    const int32_t diff2 = (int32_t) y2 - y0;
+    const int32_t a1 = 4 * diff1 - diff2;
+    const int32_t a2 = diff2 - a1;
+
+    state->gain_lut[4 * interval] = y0;
+    state->gain_lut[4 * interval + 1] = (int16_t) a1;
+    state->gain_lut[4 * interval + 2] = (int16_t) a2;
+  }
+  state->gain_lut += 6;
+  return 1;
+}
+
+void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
+  free(state->gain_lut);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
new file mode 100644
index 0000000000..79c0b1da69
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
@@ -0,0 +1,57 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/pcan_gain_control.h"
+
+#define kWideDynamicFunctionBits 32
+#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct PcanGainControlConfig {
+  // set to false (0) to disable this module
+  int enable_pcan;
+  // gain normalization exponent (0.0 disables, 1.0 full strength)
+  float strength;
+  // positive value added in the normalization denominator
+  float offset;
+  // number of fractional bits in the gain
+  int gain_bits;
+};
+
+void PcanGainControlFillConfigWithDefaults(
+    struct PcanGainControlConfig* config);
+
+int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
+                               int32_t input_bits, uint32_t x);
+
+int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
+                                 struct PcanGainControlState* state,
+                                 uint32_t* noise_estimate,
+                                 const int num_channels,
+                                 const uint16_t smoothing_bits,
+                                 const int32_t input_correction_bits);
+
+void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/window.c
new file mode 100644
index 0000000000..0fdc040a7a
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window.c
@@ -0,0 +1,70 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window.h"
+
+#include <string.h>
+
+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
+                         size_t num_samples, size_t* num_samples_read) {
+  const int size = state->size;
+
+  // Copy samples from the samples buffer over to our local input.
+  size_t max_samples_to_copy = state->size - state->input_used;
+  if (max_samples_to_copy > num_samples) {
+    max_samples_to_copy = num_samples;
+  }
+  memcpy(state->input + state->input_used, samples,
+         max_samples_to_copy * sizeof(*samples));
+  *num_samples_read = max_samples_to_copy;
+  state->input_used += max_samples_to_copy;
+
+  if (state->input_used < state->size) {
+    // We don't have enough samples to compute a window.
+    return 0;
+  }
+
+  // Apply the window to the input.
+  const int16_t* coefficients = state->coefficients;
+  const int16_t* input = state->input;
+  int16_t* output = state->output;
+  int i;
+  int16_t max_abs_output_value = 0;
+  for (i = 0; i < size; ++i) {
+    int16_t new_value =
+        (((int32_t) *input++) * *coefficients++) >> kFrontendWindowBits;
+    *output++ = new_value;
+    if (new_value < 0) {
+      new_value = -new_value;
+    }
+    if (new_value > max_abs_output_value) {
+      max_abs_output_value = new_value;
+    }
+  }
+  // Shuffle the input down by the step size, and update how much we have used.
+  memmove(state->input, state->input + state->step,
+          sizeof(*state->input) * (state->size - state->step));
+  state->input_used -= state->step;
+  state->max_abs_output_value = max_abs_output_value;
+
+  // Indicate that the output buffer is valid for the next stage.
+  return 1;
+}
+
+void WindowReset(struct WindowState* state) {
+  memset(state->input, 0, state->size * sizeof(*state->input));
+  memset(state->output, 0, state->size * sizeof(*state->output));
+  state->input_used = 0;
+  state->max_abs_output_value = 0;
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/window.h
new file mode 100644
index 0000000000..90291e5c72
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define kFrontendWindowBits 12
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct WindowState {
+  size_t size;
+  int16_t* coefficients;
+  size_t step;
+
+  int16_t* input;
+  size_t input_used;
+  int16_t* output;
+  int16_t max_abs_output_value;
+};
+
+// Applies a window to the samples coming in, stepping forward at the given
+// rate.
+int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
+                         size_t num_samples, size_t* num_samples_read);
+
+void WindowReset(struct WindowState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.c
new file mode 100644
index 0000000000..f1fee7c1ed
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.c
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h"
+
+void WindowWriteMemmapPreamble(FILE* fp, const struct WindowState* state) {
+  fprintf(fp, "static int16_t window_coefficients[] = {\n");
+  for (int i = 0; i < state->size; ++i) {
+    fprintf(fp, "%d", state->coefficients[i]);
+    if (i < state->size - 1) {
+      fprintf(fp, ", ");
+    }
+  }
+  fprintf(fp, "};\n");
+  fprintf(fp, "static int16_t window_input[%zu];\n", state->size);
+  fprintf(fp, "static int16_t window_output[%zu];\n", state->size);
+  fprintf(fp, "\n");
+}
+
+void WindowWriteMemmap(FILE* fp, const struct WindowState* state,
+                       const char* variable) {
+  fprintf(fp, "%s->size = %zu;\n", variable, state->size);
+  fprintf(fp, "%s->coefficients = window_coefficients;\n", variable);
+  fprintf(fp, "%s->step = %zu;\n", variable, state->step);
+
+  fprintf(fp, "%s->input = window_input;\n", variable);
+  fprintf(fp, "%s->input_used = %zu;\n", variable, state->input_used);
+  fprintf(fp, "%s->output = window_output;\n", variable);
+  fprintf(fp, "%s->max_abs_output_value = %d;\n", variable,
+          state->max_abs_output_value);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h
new file mode 100644
index 0000000000..2bab9064c1
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_io.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_IO_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_IO_H_
+
+#include <stdio.h>
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void WindowWriteMemmapPreamble(FILE* fp, const struct WindowState* state);
+void WindowWriteMemmap(FILE* fp, const struct WindowState* state,
+                       const char* variable);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_IO_H_
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window_test.cc b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_test.cc
new file mode 100644
index 0000000000..a6c0879faa
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_test.cc
@@ -0,0 +1,157 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window.h"
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace {
+
+const int kSampleRate = 1000;
+const int kWindowSamples = 25;
+const int kStepSamples = 10;
+const int16_t kFakeAudioData[] = {
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768,
+    0, 32767, 0, -32768, 0, 32767, 0, -32768, 0, 32767, 0, -32768};
+
+// Test window function behaviors using default config values.
+class WindowTest : public ::testing::Test {
+ protected:
+  WindowTest() {
+    config_.size_ms = 25;
+    config_.step_size_ms = 10;
+  }
+
+  struct WindowConfig config_;
+};
+
+TEST_F(WindowTest, CheckCoefficients) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+
+  const int16_t expected[] = {16,   144,  391,  743,  1176, 1664, 2177,
+                              2681, 3145, 3541, 3843, 4032, 4096, 4032,
+                              3843, 3541, 3145, 2681, 2177, 1664, 1176,
+                              743,  391,  144,  16};
+  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.size; ++i) {
+    EXPECT_EQ(state.coefficients[i], expected[i]);
+  }
+
+  WindowFreeStateContents(&state);
+}
+
+TEST_F(WindowTest, CheckResidualInput) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  for (int i = kStepSamples; i < kWindowSamples; ++i) {
+    EXPECT_EQ(state.input[i - kStepSamples], kFakeAudioData[i]);
+  }
+
+  WindowFreeStateContents(&state);
+}
+
+TEST_F(WindowTest, CheckOutputValues) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  const int16_t expected[] = {
+      0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
+      0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
+  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.size; ++i) {
+    EXPECT_EQ(state.output[i], expected[i]);
+  }
+
+  WindowFreeStateContents(&state);
+}
+
+TEST_F(WindowTest, CheckMaxAbsValue) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+
+  EXPECT_EQ(state.max_abs_output_value, 32256);
+
+  WindowFreeStateContents(&state);
+}
+
+TEST_F(WindowTest, CheckConsecutiveWindow) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read));
+
+  const int16_t expected[] = {
+      0, -1152, 0, 5943,   0, -13312, 0, 21447, 0, -28328, 0, 32255, 0, -32256,
+      0, 28327, 0, -21448, 0, 13311,  0, -5944, 0, 1151,   0};
+  ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
+  for (int i = 0; i < state.size; ++i) {
+    EXPECT_EQ(state.output[i], expected[i]);
+  }
+
+  WindowFreeStateContents(&state);
+}
+
+TEST_F(WindowTest, CheckNotEnoughSamples) {
+  struct WindowState state;
+  ASSERT_TRUE(WindowPopulateState(&config_, &state, kSampleRate));
+  size_t num_samples_read;
+
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
+  ASSERT_TRUE(WindowProcessSamples(
+      &state, kFakeAudioData + kWindowSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples,
+      &num_samples_read));
+  ASSERT_FALSE(WindowProcessSamples(
+      &state, kFakeAudioData + kWindowSamples + kStepSamples,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - kWindowSamples -
+          kStepSamples,
+      &num_samples_read));
+
+  EXPECT_EQ(
+      state.input_used,
+      sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]) - 2 * kStepSamples);
+
+  WindowFreeStateContents(&state);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.c b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.c
new file mode 100644
index 0000000000..3adde0fb0a
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.c
@@ -0,0 +1,71 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void WindowFillConfigWithDefaults(struct WindowConfig* config) {
+  config->size_ms = 25;
+  config->step_size_ms = 10;
+}
+
+int WindowPopulateState(const struct WindowConfig* config,
+                        struct WindowState* state, int sample_rate) {
+  state->size = config->size_ms * sample_rate / 1000;
+  state->step = config->step_size_ms * sample_rate / 1000;
+
+  state->coefficients = malloc(
+      state->size * sizeof(*state->coefficients));
+  if (state->coefficients == NULL) {
+    fprintf(stderr, "Failed to allocate window coefficients\n");
+    return 0;
+  }
+
+  // Populate the window values.
+  const float arg = M_PI * 2.0 / ((float) state->size);
+  int i;
+  for (i = 0; i < state->size; ++i) {
+    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
+    // Scale it to fixed point and round it.
+    state->coefficients[i] =
+        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
+  }
+
+  state->input_used = 0;
+  state->input = malloc(
+      state->size * sizeof(*state->input));
+  if (state->input == NULL) {
+    fprintf(stderr, "Failed to allocate window input\n");
+    return 0;
+  }
+
+  state->output = malloc(
+      state->size * sizeof(*state->output));
+  if (state->output == NULL) {
+    fprintf(stderr, "Failed to allocate window output\n");
+    return 0;
+  }
+
+  return 1;
+}
+
+void WindowFreeStateContents(struct WindowState* state) {
+  free(state->coefficients);
+  free(state->input);
+  free(state->output);
+}
diff --git a/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h
new file mode 100644
index 0000000000..52dc8f38cc
--- /dev/null
+++ b/tensorflow/contrib/lite/experimental/microfrontend/lib/window_util.h
@@ -0,0 +1,45 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
+
+#include "tensorflow/contrib/lite/experimental/microfrontend/lib/window.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct WindowConfig {
+  // length of window frame in milliseconds
+  size_t size_ms;
+  // length of step for next frame in milliseconds
+  size_t step_size_ms;
+};
+
+// Populates the WindowConfig with "sane" default values.
+void WindowFillConfigWithDefaults(struct WindowConfig* config);
+
+// Allocates any buffers.
+int WindowPopulateState(const struct WindowConfig* config,
+                        struct WindowState* state, int sample_rate);
+
+// Frees any allocated buffers.
+void WindowFreeStateContents(struct WindowState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index dc2118694a..81e6676a97 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -26,6 +26,7 @@ load("//third_party/highwayhash:workspace.bzl", highwayhash = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 load("//third_party/nasm:workspace.bzl", nasm = "repo")
+load("//third_party/kissfft:workspace.bzl", kissfft = "repo")
 
 def initialize_third_party():
     """ Load third party repositories.  See above load() statements. """
@@ -33,6 +34,7 @@ def initialize_third_party():
     flatbuffers()
     highwayhash()
     icu()
+    kissfft()
     jpeg()
     nasm()
 
diff --git a/third_party/kissfft/BUILD b/third_party/kissfft/BUILD
new file mode 100644
index 0000000000..82bab3ffd9
--- /dev/null
+++ b/third_party/kissfft/BUILD
@@ -0,0 +1 @@
+# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/kissfft/BUILD.bazel b/third_party/kissfft/BUILD.bazel
new file mode 100644
index 0000000000..a57cb6ebda
--- /dev/null
+++ b/third_party/kissfft/BUILD.bazel
@@ -0,0 +1,23 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "kiss_fftr_16",
+    srcs = [
+        "kiss_fft.c",
+        "tools/kiss_fftr.c",
+    ],
+    hdrs = [
+        "_kiss_fft_guts.h",
+        "kiss_fft.h",
+        "tools/kiss_fftr.h",
+    ],
+    copts = [
+        "-DFIXED_POINT=16",
+    ],
+)
diff --git a/third_party/kissfft/workspace.bzl b/third_party/kissfft/workspace.bzl
new file mode 100644
index 0000000000..1754eb1a90
--- /dev/null
+++ b/third_party/kissfft/workspace.bzl
@@ -0,0 +1,15 @@
+"""Loads the kissfft library, used by TF Lite."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "kissfft",
+        strip_prefix = "kissfft-cddf3833fdf24fa84b79be37efdcd348cae0e39c",
+        sha256 = "7ba83a3da1636350472e501e3e6c3418df72466990530ea273c05fa7e3dd8635",
+        urls = [
+            "https://mirror.bazel.build/github.com/mborgerding/kissfft/archive/cddf3833fdf24fa84b79be37efdcd348cae0e39c.tar.gz",
+            "https://github.com/mborgerding/kissfft/archive/cddf3833fdf24fa84b79be37efdcd348cae0e39c.tar.gz",
+        ],
+        build_file = "//third_party/kissfft:BUILD.bazel",
+    )
-- 
GitLab


From 3f23f4ddeabbdc0704444d84c158bd6c348a9f10 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 15 Oct 2018 11:07:58 -0700
Subject: [PATCH 0978/1085] Automated rollback of commit
 6fa6bd045c98bdc89424a3425e15b5161586a9a7

PiperOrigin-RevId: 217173355
---
 tensorflow/c/BUILD                            |   3 -
 tensorflow/c/c_api.cc                         |   8 +-
 tensorflow/c/c_api_function.cc                |   5 +-
 tensorflow/c/c_api_test.cc                    |   3 +-
 tensorflow/c/eager/BUILD                      |   1 -
 tensorflow/c/eager/c_api.cc                   |  10 +-
 tensorflow/cc/BUILD                           |   4 -
 tensorflow/cc/framework/cc_op_gen.cc          |  23 ++-
 tensorflow/cc/framework/cc_op_gen_main.cc     |   4 +-
 tensorflow/cc/framework/cc_op_gen_test.cc     |   5 +-
 tensorflow/cc/framework/scope.cc              |   3 +-
 tensorflow/cc/saved_model/BUILD               |   1 -
 tensorflow/cc/saved_model/loader.cc           |  10 +-
 tensorflow/cc/tutorials/example_trainer.cc    |   7 +-
 .../jit/encapsulate_xla_computations_pass.cc  |   5 +-
 tensorflow/compiler/tf2xla/kernels/BUILD      |   1 -
 .../tf2xla/kernels/conv_op_helpers.cc         |  23 ++-
 .../compiler/tf2xla/kernels/conv_op_helpers.h |   9 +-
 .../xla/tools/hex_floats_to_packed_literal.cc |   4 +-
 .../android/asset_manager_filesystem.cc       |  11 +-
 .../bigtable/kernels/bigtable_kernels.cc      |   4 +-
 .../kernels/bigtable_range_helpers.cc         |   7 +-
 .../bigtable/kernels/bigtable_range_helpers.h |   4 +-
 tensorflow/contrib/cloud/kernels/BUILD        |   3 -
 .../cloud/kernels/bigquery_table_accessor.cc  |  11 +-
 .../kernels/bigquery_table_accessor_test.cc   |   3 +-
 .../contrib/cloud/kernels/gcs_config_ops.cc   |   5 +-
 tensorflow/contrib/ffmpeg/BUILD               |   2 -
 tensorflow/contrib/ffmpeg/decode_audio_op.cc  |   8 +-
 tensorflow/contrib/ffmpeg/decode_video_op.cc  |   3 +-
 tensorflow/contrib/ffmpeg/default/BUILD       |   1 -
 .../contrib/ffmpeg/default/ffmpeg_lib.cc      |   3 +-
 tensorflow/contrib/ffmpeg/ffmpeg_lib.h        |   3 +-
 tensorflow/contrib/gdr/BUILD                  |   1 -
 tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc  |   3 +-
 tensorflow/contrib/layers/kernels/BUILD       |   1 -
 .../kernels/sparse_feature_cross_kernel.cc    |  10 +-
 .../libsvm/kernels/decode_libsvm_op.cc        |   7 +-
 tensorflow/contrib/lite/delegates/flex/BUILD  |   2 -
 .../contrib/lite/delegates/flex/delegate.cc   |   3 +-
 .../lite/delegates/flex/kernel_test.cc        |   7 +-
 .../contrib/nccl/kernels/nccl_rewrite.cc      |   7 +-
 .../contrib/saved_model/cc/saved_model/BUILD  |   1 -
 .../cc/saved_model/signature_def_utils.cc     |   4 +-
 tensorflow/contrib/session_bundle/BUILD       |   4 -
 .../contrib/session_bundle/bundle_shim.cc     |   4 +-
 .../contrib/session_bundle/session_bundle.cc  |  32 ++---
 .../contrib/session_bundle/session_bundle.h   |   8 +-
 .../contrib/session_bundle/signature_test.cc  |   4 +-
 tensorflow/contrib/tensorboard/db/BUILD       |   1 -
 .../tensorboard/db/summary_db_writer.cc       |  15 +-
 tensorflow/contrib/tpu/profiler/BUILD         |   1 -
 .../contrib/tpu/profiler/dump_tpu_profile.cc  |   8 +-
 tensorflow/contrib/verbs/BUILD                |   1 -
 tensorflow/contrib/verbs/verbs_util.cc        |   4 +-
 tensorflow/core/BUILD                         |  26 ----
 .../common_runtime/constant_folding_test.cc   |   3 +-
 tensorflow/core/common_runtime/copy_tensor.cc |   8 +-
 tensorflow/core/common_runtime/copy_tensor.h  |   4 +-
 tensorflow/core/common_runtime/device_mgr.cc  |   9 +-
 tensorflow/core/common_runtime/device_mgr.h   |   8 +-
 tensorflow/core/common_runtime/device_set.cc  |   4 +-
 .../core/common_runtime/direct_session.cc     |   3 +-
 .../core/common_runtime/direct_session.h      |   4 +-
 .../common_runtime/direct_session_test.cc     |   5 +-
 tensorflow/core/common_runtime/eager/BUILD    |   3 -
 .../core/common_runtime/eager/attr_builder.cc |  16 +--
 .../core/common_runtime/eager/attr_builder.h  |  17 ++-
 .../core/common_runtime/eager/context.cc      |   3 +-
 .../core/common_runtime/eager/execute.cc      |   5 +-
 tensorflow/core/common_runtime/function.cc    |   3 +-
 tensorflow/core/common_runtime/function.h     |   3 +-
 .../core/common_runtime/function_test.cc      |   3 +-
 .../common_runtime/gpu/gpu_device_test.cc     |   3 +-
 .../gpu/gpu_util_platform_specific.cc         |   3 +-
 .../core/common_runtime/gpu_device_context.h  |   7 +-
 .../common_runtime/lower_if_while_test.cc     |   3 +-
 tensorflow/core/common_runtime/placer.cc      |  17 ++-
 .../core/common_runtime/profile_handler.h     |   7 +-
 .../core/common_runtime/shape_refiner.cc      |   5 +-
 .../common_runtime/step_stats_collector.cc    |  15 +-
 tensorflow/core/debug/BUILD                   |   2 -
 tensorflow/core/debug/debug_graph_utils.cc    |   3 +-
 tensorflow/core/debug/debug_io_utils.cc       |   3 +-
 tensorflow/core/distributed_runtime/BUILD     |   4 -
 .../base_rendezvous_mgr.cc                    |   5 +-
 .../distributed_runtime/master_session.cc     |  21 ++-
 .../core/distributed_runtime/remote_device.cc |   5 +-
 tensorflow/core/distributed_runtime/rpc/BUILD |   7 -
 .../rpc/grpc_master_service.cc                |   7 +-
 .../rpc/grpc_remote_master.cc                 |   3 +-
 .../rpc/grpc_session_test.cc                  |   3 +-
 .../rpc/grpc_tensor_coding.cc                 |   5 +-
 .../rpc/grpc_tensorflow_server.cc             |   3 +-
 .../rpc/grpc_testlib_server.cc                |   3 +-
 .../rpc/rpc_rendezvous_mgr.cc                 |   3 +-
 .../core/distributed_runtime/tensor_coding.cc |   3 +-
 tensorflow/core/example/feature_util.h        |   4 +-
 tensorflow/core/framework/attr_value_util.cc  |  18 ++-
 tensorflow/core/framework/attr_value_util.h   |  11 +-
 tensorflow/core/framework/dataset.cc          |   3 +-
 tensorflow/core/framework/dataset.h           |  30 ++--
 tensorflow/core/framework/device_base.h       |   7 +-
 tensorflow/core/framework/function.cc         |  13 +-
 tensorflow/core/framework/function.h          |   8 +-
 tensorflow/core/framework/function_testlib.cc |   4 +-
 tensorflow/core/framework/function_testlib.h  |   4 +-
 tensorflow/core/framework/node_def_builder.cc |  37 +++--
 tensorflow/core/framework/node_def_builder.h  |  79 +++++-----
 .../core/framework/node_def_builder_test.cc   |   4 +-
 tensorflow/core/framework/node_def_util.cc    | 131 ++++++++---------
 tensorflow/core/framework/node_def_util.h     | 136 +++++++++---------
 .../core/framework/node_def_util_test.cc      |   4 +-
 tensorflow/core/framework/op.h                |  13 +-
 tensorflow/core/framework/op_def_builder.cc   |  74 +++++-----
 .../core/framework/op_def_builder_test.cc     |   6 +-
 tensorflow/core/framework/op_def_util.cc      |  16 +--
 tensorflow/core/framework/op_def_util.h       |   9 +-
 tensorflow/core/framework/op_gen_lib.cc       |  49 ++++---
 tensorflow/core/framework/op_gen_lib.h        |  10 +-
 tensorflow/core/framework/op_kernel.cc        |  57 ++++----
 tensorflow/core/framework/op_kernel.h         |  67 ++++-----
 tensorflow/core/framework/rendezvous.cc       |  33 +++--
 tensorflow/core/framework/rendezvous.h        |  11 +-
 tensorflow/core/framework/resource_mgr.cc     |   5 +-
 tensorflow/core/framework/resource_mgr.h      |   3 +-
 tensorflow/core/framework/shape_inference.cc  |   7 +-
 tensorflow/core/framework/shape_inference.h   |  12 +-
 .../framework/shape_inference_testutil.cc     |   7 +-
 .../core/framework/shape_inference_testutil.h |   4 +-
 tensorflow/core/framework/tensor.cc           |  12 +-
 tensorflow/core/framework/tensor.h            |   4 +-
 tensorflow/core/framework/tensor_util.cc      |  14 +-
 tensorflow/core/framework/types.cc            |   3 +-
 tensorflow/core/framework/types.h             |   7 +-
 .../core/framework/variant_op_registry.cc     |  10 +-
 .../core/framework/variant_op_registry.h      |  23 ++-
 tensorflow/core/graph/costmodel.h             |   4 +-
 tensorflow/core/graph/graph.cc                |   7 +-
 tensorflow/core/graph/graph.h                 |   6 +-
 tensorflow/core/graph/graph_constructor.cc    |  57 ++++----
 .../core/graph/graph_constructor_test.cc      |   3 +-
 tensorflow/core/graph/graph_def_builder.cc    |  11 +-
 tensorflow/core/graph/graph_def_builder.h     |  18 +--
 tensorflow/core/graph/graph_partition.cc      |   5 +-
 tensorflow/core/graph/node_builder.cc         |  11 +-
 tensorflow/core/graph/node_builder.h          |  21 ++-
 tensorflow/core/graph/quantize_training.cc    |   9 +-
 tensorflow/core/graph/subgraph.cc             |   4 +-
 tensorflow/core/graph/subgraph_test.cc        |   3 +-
 tensorflow/core/graph/tensor_id.cc            |  10 +-
 tensorflow/core/graph/tensor_id.h             |   8 +-
 tensorflow/core/graph/while_context.cc        |   3 +-
 tensorflow/core/graph/while_context.h         |   3 +-
 tensorflow/core/grappler/BUILD                |   2 -
 tensorflow/core/grappler/optimizers/BUILD     |   4 -
 .../optimizers/arithmetic_optimizer.cc        |  20 +--
 .../grappler/optimizers/constant_folding.cc   |   6 +-
 .../grappler/optimizers/constant_folding.h    |   5 +-
 .../core/grappler/optimizers/data/BUILD       |  10 --
 .../optimizers/data/function_utils.cc         |  31 ++--
 .../grappler/optimizers/data/function_utils.h |  27 ++--
 .../grappler/optimizers/data/fusion_utils.cc  |  14 +-
 .../grappler/optimizers/data/fusion_utils.h   |  14 +-
 .../optimizers/data/graph_test_utils.cc       |  21 ++-
 .../optimizers/data/graph_test_utils.h        |  22 ++-
 .../grappler/optimizers/data/graph_utils.cc   |  22 ++-
 .../grappler/optimizers/data/graph_utils.h    |  23 ++-
 .../optimizers/data/graph_utils_test.cc       |   3 +-
 .../optimizers/data/latency_all_edges.cc      |   5 +-
 .../data/map_and_batch_fusion_test.cc         |   7 +-
 .../optimizers/data/map_vectorization_test.cc |  24 ++--
 .../optimizers/data/noop_elimination_test.cc  |   9 +-
 .../optimizers/data/vectorization_utils.cc    |   3 +-
 .../data/vectorization_utils_test.cc          |   5 +-
 .../optimizers/dependency_optimizer.cc        |   6 +-
 .../optimizers/scoped_allocator_optimizer.cc  |   3 +-
 .../optimizers/scoped_allocator_optimizer.h   |   4 +-
 tensorflow/core/grappler/utils.cc             |   8 +-
 tensorflow/core/grappler/utils.h              |  30 ++--
 tensorflow/core/grappler/utils/BUILD          |   1 -
 tensorflow/core/grappler/utils/functions.cc   |   5 +-
 tensorflow/core/grappler/utils_test.cc        |  22 ++-
 tensorflow/core/kernels/BUILD                 |  17 +--
 tensorflow/core/kernels/conv_grad_ops.cc      |  22 +--
 tensorflow/core/kernels/conv_grad_ops.h       |  19 +--
 tensorflow/core/kernels/data/BUILD            |   4 -
 .../core/kernels/data/cache_dataset_ops.cc    |   5 +-
 tensorflow/core/kernels/data/dataset_utils.cc |   5 +-
 tensorflow/core/kernels/data/dataset_utils.h  |   5 +-
 .../core/kernels/data/experimental/BUILD      |   1 -
 .../data/experimental/csv_dataset_op.cc       |  37 +++--
 .../data/experimental/indexed_dataset.h       |   4 +-
 tensorflow/core/kernels/data/iterator_ops.cc  |  23 ++-
 tensorflow/core/kernels/data/writer_ops.cc    |   4 +-
 tensorflow/core/kernels/decode_bmp_op.cc      |   3 +-
 tensorflow/core/kernels/decode_csv_op.cc      |   5 +-
 tensorflow/core/kernels/decode_image_op.cc    |  13 +-
 tensorflow/core/kernels/deep_conv2d.cc        |   3 +-
 .../core/kernels/extract_jpeg_shape_op.cc     |   3 +-
 tensorflow/core/kernels/gpu_utils.h           |   3 +-
 tensorflow/core/kernels/hexagon/BUILD         |   2 -
 .../kernels/hexagon/graph_transfer_utils.cc   |   3 +-
 .../core/kernels/hexagon/graph_transferer.cc  |   5 +-
 .../kernels/hexagon/graph_transferer_test.cc  |   5 +-
 .../hexagon/hexagon_control_wrapper.cc        |   3 +-
 .../kernels/immutable_constant_op_test.cc     |   7 +-
 tensorflow/core/kernels/list_kernels.cc       |   3 +-
 tensorflow/core/kernels/reduce_join_op.cc     |   4 +-
 .../remote_fused_graph_execute_op_test.cc     |   3 +-
 tensorflow/core/kernels/restore_v2_op_test.cc |   3 +-
 tensorflow/core/kernels/shape_op_test.cc      |   3 +-
 tensorflow/core/kernels/sparse_cross_op.cc    |  10 +-
 .../sparse_dense_binary_op_shared_test.cc     |   3 +-
 tensorflow/core/kernels/sparse_reduce_op.cc   |   5 +-
 .../core/kernels/spectrogram_test_utils.cc    |  31 ++--
 tensorflow/core/kernels/string_join_op.cc     |   3 +-
 tensorflow/core/kernels/string_split_op.cc    |  52 ++++---
 tensorflow/core/kernels/string_strip_op.cc    |   3 +-
 .../core/kernels/string_to_hash_bucket_op.h   |   3 +-
 tensorflow/core/kernels/string_util.h         |   5 +-
 tensorflow/core/kernels/substr_op.cc          |  35 +++--
 tensorflow/core/kernels/tensor_array_ops.cc   |   5 +-
 tensorflow/core/kernels/word2vec_kernels.cc   |   8 +-
 tensorflow/core/lib/core/coding.cc            |   9 +-
 tensorflow/core/lib/core/coding.h             |   6 +-
 tensorflow/core/lib/core/status.cc            |   3 +-
 tensorflow/core/lib/core/status.h             |   4 +-
 tensorflow/core/lib/core/stringpiece_test.cc  |  17 +--
 tensorflow/core/lib/db/BUILD                  |   2 -
 tensorflow/core/lib/db/sqlite.cc              |  10 +-
 tensorflow/core/lib/db/sqlite.h               |  26 ++--
 tensorflow/core/lib/db/sqlite_test.cc         |   4 +-
 tensorflow/core/lib/hash/hash.h               |   8 +-
 tensorflow/core/lib/hash/hash_test.cc         |  17 ++-
 tensorflow/core/lib/io/block.cc               |  20 ++-
 tensorflow/core/lib/io/block_builder.cc       |  12 +-
 tensorflow/core/lib/io/block_builder.h        |   6 +-
 tensorflow/core/lib/io/format.cc              |  17 ++-
 tensorflow/core/lib/io/format.h               |   8 +-
 tensorflow/core/lib/io/inputbuffer.cc         |   3 +-
 tensorflow/core/lib/io/inputbuffer_test.cc    |  19 ++-
 tensorflow/core/lib/io/iterator.cc            |  11 +-
 tensorflow/core/lib/io/iterator.h             |   8 +-
 tensorflow/core/lib/io/path.cc                |  61 ++++----
 tensorflow/core/lib/io/path.h                 |  22 +--
 tensorflow/core/lib/io/path_test.cc           |   5 +-
 tensorflow/core/lib/io/proto_encode_helper.h  |   6 +-
 tensorflow/core/lib/io/random_inputstream.cc  |   7 +-
 tensorflow/core/lib/io/record_writer.cc       |   7 +-
 tensorflow/core/lib/io/record_writer.h        |   4 +-
 tensorflow/core/lib/io/recordio_test.cc       |  11 +-
 .../core/lib/io/snappy/snappy_buffers_test.cc |  13 +-
 .../core/lib/io/snappy/snappy_inputbuffer.cc  |   3 +-
 .../core/lib/io/snappy/snappy_outputbuffer.cc |   7 +-
 .../core/lib/io/snappy/snappy_outputbuffer.h  |   5 +-
 tensorflow/core/lib/io/table.cc               |  17 ++-
 tensorflow/core/lib/io/table.h                |  12 +-
 tensorflow/core/lib/io/table_builder.cc       |  22 ++-
 tensorflow/core/lib/io/table_builder.h        |   5 +-
 tensorflow/core/lib/io/table_test.cc          |  37 +++--
 tensorflow/core/lib/io/two_level_iterator.cc  |  13 +-
 tensorflow/core/lib/io/two_level_iterator.h   |   4 +-
 tensorflow/core/lib/io/zlib_buffers_test.cc   |   9 +-
 tensorflow/core/lib/io/zlib_outputbuffer.cc   |   7 +-
 tensorflow/core/lib/io/zlib_outputbuffer.h    |   5 +-
 tensorflow/core/lib/jpeg/jpeg_mem.h           |   4 +-
 .../lib/monitoring/collection_registry.cc     |   3 +-
 .../core/lib/monitoring/collection_registry.h |   4 +-
 tensorflow/core/lib/monitoring/metric_def.h   |  11 +-
 tensorflow/core/lib/png/png_io.cc             |   5 +-
 tensorflow/core/lib/png/png_io.h              |   6 +-
 tensorflow/core/lib/strings/base64.cc         |   8 +-
 tensorflow/core/lib/strings/base64.h          |   8 +-
 tensorflow/core/lib/strings/numbers.cc        |  23 ++-
 tensorflow/core/lib/strings/numbers.h         |  32 ++---
 tensorflow/core/lib/strings/numbers_test.cc   |  27 ++--
 tensorflow/core/lib/strings/ordered_code.cc   |  15 +-
 tensorflow/core/lib/strings/ordered_code.h    |  10 +-
 .../core/lib/strings/ordered_code_test.cc     |  40 +++---
 .../core/lib/strings/proto_text_util.cc       |   5 +-
 tensorflow/core/lib/strings/proto_text_util.h |   6 +-
 tensorflow/core/lib/strings/scanner.cc        |   6 +-
 tensorflow/core/lib/strings/scanner.h         |  16 +--
 tensorflow/core/lib/strings/scanner_test.cc   |  31 ++--
 tensorflow/core/lib/strings/str_util.cc       |  57 ++++----
 tensorflow/core/lib/strings/str_util.h        |  74 +++++-----
 tensorflow/core/lib/strings/str_util_test.cc  |  55 ++++---
 tensorflow/core/lib/strings/strcat.cc         |  16 +--
 tensorflow/core/lib/strings/strcat.h          |  16 +--
 tensorflow/core/lib/strings/strcat_test.cc    |   5 +-
 tensorflow/core/platform/cloud/BUILD          |  12 --
 .../cloud/compute_engine_zone_provider.cc     |   3 +-
 .../core/platform/cloud/curl_http_request.cc  |  20 ++-
 .../core/platform/cloud/curl_http_request.h   |   6 +-
 .../platform/cloud/curl_http_request_test.cc  |   5 +-
 .../core/platform/cloud/gcs_file_system.cc    |  49 ++++---
 .../platform/cloud/gcs_file_system_test.cc    |  32 ++---
 .../platform/cloud/google_auth_provider.cc    |   5 +-
 .../cloud/google_auth_provider_test.cc        |   8 +-
 .../core/platform/cloud/http_request_fake.h   |   4 +-
 .../core/platform/cloud/oauth_client.cc       |  30 ++--
 tensorflow/core/platform/cloud/oauth_client.h |  14 +-
 .../core/platform/cloud/oauth_client_test.cc  |   3 +-
 .../platform/cloud/retrying_file_system.h     |   5 +-
 .../cloud/retrying_file_system_test.cc        |  13 +-
 .../core/platform/default/device_tracer.cc    |   6 +-
 .../core/platform/default/fingerprint.h       |   6 +-
 .../platform/default/human_readable_json.cc   |  19 +--
 .../core/platform/default/string_coding.h     |   3 +-
 .../core/platform/default/test_benchmark.cc   |   3 +-
 tensorflow/core/platform/env.cc               |  13 +-
 tensorflow/core/platform/env.h                |   4 +-
 tensorflow/core/platform/env_test.cc          |  10 +-
 tensorflow/core/platform/file_system.cc       |   7 +-
 tensorflow/core/platform/file_system.h        |   6 +-
 tensorflow/core/platform/file_system_test.cc  |   7 +-
 tensorflow/core/platform/fingerprint.h        |   6 +-
 tensorflow/core/platform/hadoop/BUILD         |   2 -
 .../platform/hadoop/hadoop_file_system.cc     |  15 +-
 .../core/platform/hadoop/hadoop_file_system.h |   3 +-
 .../hadoop/hadoop_file_system_test.cc         |   7 +-
 .../core/platform/posix/posix_file_system.cc  |   9 +-
 .../core/platform/posix/posix_file_system.h   |   3 +-
 tensorflow/core/platform/s3/BUILD             |   1 -
 tensorflow/core/platform/s3/s3_file_system.cc |  16 +--
 tensorflow/core/platform/tensor_coding.cc     |   7 +-
 tensorflow/core/platform/tensor_coding.h      |   5 +-
 tensorflow/core/platform/tracing.cc           |   6 +-
 tensorflow/core/platform/tracing.h            |  25 ++--
 .../platform/windows/windows_file_system.cc   |   9 +-
 .../platform/windows/windows_file_system.h    |   3 +-
 tensorflow/core/profiler/internal/BUILD       |   1 -
 .../core/profiler/internal/tfprof_utils.cc    |   5 +-
 tensorflow/core/util/command_line_flags.cc    |  12 +-
 tensorflow/core/util/device_name_utils.cc     |  28 ++--
 tensorflow/core/util/device_name_utils.h      |  19 ++-
 .../core/util/device_name_utils_test.cc       |   5 +-
 tensorflow/core/util/env_var.cc               |   9 +-
 tensorflow/core/util/env_var.h                |  10 +-
 tensorflow/core/util/events_writer.cc         |   3 +-
 tensorflow/core/util/events_writer.h          |   3 +-
 .../core/util/example_proto_fast_parsing.cc   |  60 ++++----
 tensorflow/core/util/memmapped_file_system.cc |   9 +-
 .../core/util/memmapped_file_system_test.cc   |   5 +-
 .../core/util/memmapped_file_system_writer.cc |   5 +-
 tensorflow/core/util/mirror_pad_mode.cc       |   3 +-
 tensorflow/core/util/mirror_pad_mode.h        |   3 +-
 tensorflow/core/util/padding.cc               |   3 +-
 tensorflow/core/util/padding.h                |   3 +-
 tensorflow/core/util/reporter_test.cc         |   3 +-
 .../core/util/saved_tensor_slice_util.cc      |   3 +-
 tensorflow/core/util/semver_test.cc           |  12 +-
 tensorflow/core/util/tensor_bundle/BUILD      |   7 +-
 tensorflow/core/util/tensor_bundle/naming.cc  |   6 +-
 tensorflow/core/util/tensor_bundle/naming.h   |   6 +-
 .../core/util/tensor_bundle/tensor_bundle.cc  |  46 +++---
 .../core/util/tensor_bundle/tensor_bundle.h   |  37 +++--
 .../util/tensor_bundle/tensor_bundle_test.cc  |   9 +-
 tensorflow/core/util/tensor_slice_reader.cc   |   3 +-
 tensorflow/core/util/tensor_slice_writer.cc   |   3 +-
 tensorflow/core/util/tensor_slice_writer.h    |   4 +-
 tensorflow/core/util/util.cc                  |  17 ++-
 tensorflow/core/util/util.h                   |   6 +-
 tensorflow/examples/label_image/BUILD         |   2 +-
 tensorflow/examples/label_image/main.cc       |   4 +-
 tensorflow/java/BUILD                         |   1 -
 tensorflow/java/src/gen/cc/op_specs.cc        |   3 +-
 tensorflow/java/src/gen/cc/source_writer.cc   |   5 +-
 tensorflow/java/src/gen/cc/source_writer.h    |  12 +-
 tensorflow/js/BUILD                           |   1 -
 tensorflow/js/ops/ts_op_gen_test.cc           |   5 +-
 tensorflow/python/BUILD                       |   6 -
 tensorflow/python/eager/pywrap_tfe_src.cc     |  13 +-
 tensorflow/python/framework/python_op_gen.cc  |   5 +-
 .../framework/python_op_gen_internal.cc       |  17 ++-
 .../python/framework/python_op_gen_main.cc    |   7 +-
 .../python/framework/test_file_system.cc      |   5 +-
 tensorflow/python/lib/core/py_func.cc         |   5 +-
 tensorflow/python/lib/core/strings.i          |  26 ++--
 tensorflow/python/lib/io/py_record_writer.cc  |   4 +-
 tensorflow/python/lib/io/py_record_writer.h   |   4 +-
 tensorflow/python/util/util.cc                |   7 +-
 tensorflow/tools/graph_transforms/BUILD       |   4 -
 .../graph_transforms/fold_constants_lib.cc    |  11 +-
 .../graph_transforms/fold_constants_test.cc   |   3 +-
 .../freeze_requantization_ranges.cc           |   3 +-
 .../graph_transforms/sparsify_gather_test.cc  |   5 +-
 .../tools/graph_transforms/transform_graph.cc |  11 +-
 .../tools/graph_transforms/transform_utils.cc |   7 +-
 .../gen_proto_text_functions_lib.cc           |   4 +-
 391 files changed, 1914 insertions(+), 2459 deletions(-)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 762afaee87..56f5e6767a 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -78,12 +78,10 @@ tf_cuda_library(
     deps = select({
         "//tensorflow:android": [
             ":c_api_internal",
-            "@com_google_absl//absl/strings",
             "//tensorflow/core:android_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":c_api_internal",
-            "@com_google_absl//absl/strings",
             "//tensorflow/cc/saved_model:loader",
             "//tensorflow/cc:gradients",
             "//tensorflow/cc:ops",
@@ -230,7 +228,6 @@ tf_cuda_cc_test(
         "//tensorflow/core/kernels:array",
         "//tensorflow/core/kernels:control_flow_ops",
         "//tensorflow/core/kernels:math",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 7cd7e198c4..1726db12fa 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <limits>
 #include <memory>
 #include <vector>
-#include "absl/strings/string_view.h"
 
 #ifndef __ANDROID__
 #include "tensorflow/cc/framework/gradients.h"
@@ -52,6 +51,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -120,7 +120,7 @@ void TF_SetStatus(TF_Status* s, TF_Code code, const char* msg) {
     s->status = Status::OK();
     return;
   }
-  s->status = Status(static_cast<Code>(code), absl::string_view(msg));
+  s->status = Status(static_cast<Code>(code), tensorflow::StringPiece(msg));
 }
 
 TF_Code TF_GetCode(const TF_Status* s) {
@@ -1160,7 +1160,7 @@ void TF_ColocateWith(TF_OperationDescription* desc, TF_Operation* op) {
 
 void TF_SetAttrString(TF_OperationDescription* desc, const char* attr_name,
                       const void* value, size_t length) {
-  absl::string_view s(static_cast<const char*>(value), length);
+  tensorflow::StringPiece s(static_cast<const char*>(value), length);
   desc->node_builder.Attr(attr_name, s);
 }
 
@@ -1174,7 +1174,7 @@ void TF_SetAttrStringList(TF_OperationDescription* desc, const char* attr_name,
                                            lengths[i]);
     }
   } else {
-    std::vector<absl::string_view> v;
+    std::vector<tensorflow::StringPiece> v;
     v.reserve(num_values);
     for (int i = 0; i < num_values; ++i) {
       v.emplace_back(static_cast<const char*>(values[i]), lengths[i]);
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index 604e12eef6..f68f8a3e90 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api_internal.h"
 
 #include <algorithm>
@@ -325,7 +324,7 @@ Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
     TF_RETURN_IF_ERROR(
         NameRangesForNode(*node, node->op_def(), nullptr, &output_ranges));
     for (const auto& output : output_ranges) {
-      const absl::string_view& output_name = output.first;
+      const StringPiece& output_name = output.first;
       int index_start = output.second.first;
       int index_end = output.second.second;
       for (int i = index_start; i < index_end; ++i) {
@@ -365,7 +364,7 @@ Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
     const uint64 hash = FunctionDefHash(*fdef);
     string encoded;
     TF_RETURN_IF_ERROR(Base64Encode(
-        absl::string_view(reinterpret_cast<const char*>(&hash), sizeof(hash)),
+        StringPiece(reinterpret_cast<const char*>(&hash), sizeof(hash)),
         &encoded));
     // Besides letters and digits our Base64 encoding uses '_' and '-'.
     // Dash is invalid in operation names and multiple underscores in random
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index a8ecd7b726..c4746b4990 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_test_util.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/cc/saved_model/tag_constants.h"
@@ -56,7 +55,7 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst);
 
 namespace {
 
-static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
+static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 8e38a4675b..3ee31a6a7a 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -50,7 +50,6 @@ tf_cuda_library(
         ],
         "//conditions:default": [],
     }) + [
-        "@com_google_absl//absl/strings",
         "//tensorflow/core/common_runtime/eager:eager_operation",
         "//tensorflow/core/distributed_runtime/eager:eager_client",
         "//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_client",
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index f4215ab77a..3554ec0bf3 100755
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_internal.h"
@@ -47,6 +46,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/refcount.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -526,7 +526,8 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx,
 void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const void* value,
                          size_t length) {
   op->operation.MutableAttrs()->Set(
-      attr_name, absl::string_view(static_cast<const char*>(value), length));
+      attr_name,
+      tensorflow::StringPiece(static_cast<const char*>(value), length));
 }
 
 void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value) {
@@ -595,9 +596,10 @@ void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
 void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name,
                              const void* const* values, const size_t* lengths,
                              int num_values) {
-  std::vector<absl::string_view> v(num_values);
+  std::vector<tensorflow::StringPiece> v(num_values);
   for (int i = 0; i < num_values; ++i) {
-    v[i] = absl::string_view(static_cast<const char*>(values[i]), lengths[i]);
+    v[i] = tensorflow::StringPiece(static_cast<const char*>(values[i]),
+                                   lengths[i]);
   }
   op->operation.MutableAttrs()->Set(attr_name, v);
 }
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index e3859014cf..c18b07603a 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -170,7 +170,6 @@ cc_library_with_android_deps(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -604,7 +603,6 @@ cc_library_with_android_deps(
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -624,7 +622,6 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -686,7 +683,6 @@ tf_cc_binary(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index aeec058030..39593370d1 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/cc_op_gen.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -108,7 +107,7 @@ string ToTitle(const string& name) {
 //   ABC         /// ABC
 //               ///
 //   DEF         /// DEF
-string MakeComment(absl::string_view text, absl::string_view indent) {
+string MakeComment(StringPiece text, StringPiece indent) {
   string ret;
   while (!text.empty()) {
     int last_non_space = -1;
@@ -303,9 +302,9 @@ string ToCamelCase(const string& str) {
 // attr_type when defining an object of that type. The bool is a flag to
 // indicate whether to treat the type as const when accepting the C++ type as an
 // argument to a function.
-std::pair<const char*, bool> AttrTypeName(absl::string_view attr_type) {
+std::pair<const char*, bool> AttrTypeName(StringPiece attr_type) {
   static const auto* attr_type_map =
-      new std::unordered_map<absl::string_view, std::pair<const char*, bool>,
+      new std::unordered_map<StringPiece, std::pair<const char*, bool>,
                              StringPieceHasher>{
           {"string", {"StringPiece", false}},
           {"list(string)", {"gtl::ArraySlice<string>", true}},
@@ -332,9 +331,9 @@ std::pair<const char*, bool> AttrTypeName(absl::string_view attr_type) {
   return entry->second;
 }
 
-const char* ListElementTypeName(absl::string_view attr_type) {
+const char* ListElementTypeName(StringPiece attr_type) {
   static const auto* attr_list_type_map =
-      new std::unordered_map<absl::string_view, const char*, StringPieceHasher>{
+      new std::unordered_map<StringPiece, const char*, StringPieceHasher>{
           {"list(string)", "string"},
           {"list(int)", "int"},
           {"list(float)", "float"},
@@ -352,8 +351,8 @@ const char* ListElementTypeName(absl::string_view attr_type) {
   return entry->second;
 }
 
-bool IsCPPKeyword(absl::string_view name) {
-  static const std::unordered_set<absl::string_view, StringPieceHasher>
+bool IsCPPKeyword(StringPiece name) {
+  static const std::unordered_set<StringPiece, StringPieceHasher>
       // Keywords obtained from http://en.cppreference.com/w/cpp/keyword
       kCPPReserved{
           "alignas",
@@ -463,7 +462,7 @@ bool IsCPPKeyword(absl::string_view name) {
   return kCPPReserved.count(name) > 0;
 }
 
-string AvoidCPPKeywords(absl::string_view name) {
+string AvoidCPPKeywords(StringPiece name) {
   if (IsCPPKeyword(name)) {
     return strings::StrCat(name, "_");
   }
@@ -517,7 +516,7 @@ struct OpInfo {
   explicit OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
                   const std::vector<string>& aliases);
   string GetOpAttrStruct() const;
-  string GetConstructorDecl(absl::string_view op_name_prefix,
+  string GetConstructorDecl(StringPiece op_name_prefix,
                             bool include_attr) const;
   void WriteClassDecl(WritableFile* h) const;
   void GetOutput(string* out) const;
@@ -575,7 +574,7 @@ OpInfo::OpInfo(const OpDef& graph_op_def, const ApiDef& api_def,
     arg_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to()));
 
     // TODO(keveman): Include input type information.
-    absl::string_view description = api_def_arg.description();
+    StringPiece description = api_def_arg.description();
     if (!description.empty()) {
       ConsumeEquals(&description);
       strings::StrAppend(&comment, "* ",
@@ -769,7 +768,7 @@ string OpInfo::GetOpAttrStruct() const {
   return struct_decl;
 }
 
-string OpInfo::GetConstructorDecl(absl::string_view op_name_prefix,
+string OpInfo::GetConstructorDecl(StringPiece op_name_prefix,
                                   bool include_attr) const {
   const string prefix = strings::StrCat(op_name_prefix, op_name, "(");
   string c_decl;
diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc
index a9759fcee9..3157792e15 100644
--- a/tensorflow/cc/framework/cc_op_gen_main.cc
+++ b/tensorflow/cc/framework/cc_op_gen_main.cc
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/cc_op_gen.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
@@ -69,7 +69,7 @@ int main(int argc, char* argv[]) {
     exit(1);
   }
 
-  bool include_internal = absl::string_view("1") == argv[3];
+  bool include_internal = tensorflow::StringPiece("1") == argv[3];
   std::vector<tensorflow::string> api_def_dirs = tensorflow::str_util::Split(
       argv[4], ",", tensorflow::str_util::SkipEmpty());
   tensorflow::PrintAllCCOps(argv[1], argv[2], include_internal, api_def_dirs);
diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc
index 327e3c6bac..5d9dfd95a5 100644
--- a/tensorflow/cc/framework/cc_op_gen_test.cc
+++ b/tensorflow/cc/framework/cc_op_gen_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/cc/framework/cc_op_gen.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -62,12 +61,12 @@ op {
 }
 )";
 
-void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
+void ExpectHasSubstr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
 
-void ExpectDoesNotHaveSubstr(absl::string_view s, absl::string_view expected) {
+void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) {
   EXPECT_FALSE(str_util::StrContains(s, expected))
       << "'" << s << "' contains '" << expected << "'";
 }
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 3bc2e5fa2b..6abc9e268e 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/scope_internal.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -248,7 +247,7 @@ std::unordered_set<string> Scope::Impl::GetColocationConstraints(
   std::vector<string> node_constraints;
   if (GetNodeAttr(attrs, kColocationAttrName, &node_constraints).ok()) {
     for (const string& entry : node_constraints) {
-      absl::string_view s(entry);
+      StringPiece s(entry);
       if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) {
         current_constraints.emplace(s);
       }
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 43106c82c9..3d3895c8fa 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -95,7 +95,6 @@ cc_library(
     deps = [
         ":constants",
         ":reader",
-        "@com_google_absl//absl/strings",
     ] + if_not_mobile([
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index e33c584ed7..c6abe2f41b 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unordered_set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/constants.h"
 #include "tensorflow/cc/saved_model/reader.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -61,7 +60,7 @@ Tensor CreateStringTensor(const string& value) {
   return tensor;
 }
 
-void AddAssetsTensorsToInputs(const absl::string_view export_dir,
+void AddAssetsTensorsToInputs(const StringPiece export_dir,
                               const std::vector<AssetFileDef>& asset_file_defs,
                               std::vector<std::pair<string, Tensor>>* inputs) {
   if (asset_file_defs.empty()) {
@@ -148,8 +147,7 @@ Status RunMainOp(const RunOptions& run_options, const string& export_dir,
     std::vector<std::pair<string, Tensor>> inputs;
     AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs);
     RunMetadata run_metadata;
-    const absl::string_view main_op_name =
-        main_op_it->second.node_list().value(0);
+    const StringPiece main_op_name = main_op_it->second.node_list().value(0);
     return RunOnce(run_options, inputs, {}, {string(main_op_name)},
                    nullptr /* outputs */, &run_metadata, session);
   }
@@ -157,8 +155,8 @@ Status RunMainOp(const RunOptions& run_options, const string& export_dir,
 }
 
 Status RunRestore(const RunOptions& run_options, const string& export_dir,
-                  const absl::string_view restore_op_name,
-                  const absl::string_view variable_filename_const_op_name,
+                  const StringPiece restore_op_name,
+                  const StringPiece variable_filename_const_op_name,
                   const std::vector<AssetFileDef>& asset_file_defs,
                   Session* session) {
   LOG(INFO) << "Restoring SavedModel bundle.";
diff --git a/tensorflow/cc/tutorials/example_trainer.cc b/tensorflow/cc/tutorials/example_trainer.cc
index 1a76a1a3a2..5dbc4f5f6a 100644
--- a/tensorflow/cc/tutorials/example_trainer.cc
+++ b/tensorflow/cc/tutorials/example_trainer.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -166,7 +165,8 @@ void ConcurrentSessions(const Options& opts) {
 
 namespace {
 
-bool ParseInt32Flag(absl::string_view arg, absl::string_view flag, int32* dst) {
+bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+                    int32* dst) {
   if (tensorflow::str_util::ConsumePrefix(&arg, flag) &&
       tensorflow::str_util::ConsumePrefix(&arg, "=")) {
     char extra;
@@ -176,7 +176,8 @@ bool ParseInt32Flag(absl::string_view arg, absl::string_view flag, int32* dst) {
   return false;
 }
 
-bool ParseBoolFlag(absl::string_view arg, absl::string_view flag, bool* dst) {
+bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
+                   bool* dst) {
   if (tensorflow::str_util::ConsumePrefix(&arg, flag)) {
     if (arg.empty()) {
       *dst = true;
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index c2d58552e7..2ce6fa73fc 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -124,8 +123,8 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
     bool a_is_resource = (a->output_type(0) == DT_RESOURCE);
     bool b_is_resource = (b->output_type(0) == DT_RESOURCE);
     // Uses the name as a tiebreaker so the output is deterministic.
-    absl::string_view a_name(a->name());
-    absl::string_view b_name(b->name());
+    StringPiece a_name(a->name());
+    StringPiece b_name(b->name());
     return std::tie(a_is_resource, a_name) < std::tie(b_is_resource, b_name);
   });
 
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 4a7318359d..224e5ea123 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -194,7 +194,6 @@ cc_library(
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:conv_ops",
         "//tensorflow/core/kernels:ops_util",
-        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:span",
     ],
 )
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 4be61e23b2..c9a1be4940 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -16,7 +16,6 @@ limitations under the License.
 // XLA-specific Ops for 2D convolution.
 
 #include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h"
-#include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
@@ -199,11 +198,10 @@ Status CheckConvAttrs(const ConvOpAttrs& attrs) {
 // Wrapper around ConvBackpropComputeDimensions that converts from XLA shapes
 // to TensorShapes.
 Status ConvBackpropComputeDimensionsV2XlaShapes(
-    absl::string_view label, int num_spatial_dims,
-    const xla::Shape& input_shape, const xla::Shape& filter_shape,
-    const xla::Shape& out_backprop_shape, absl::Span<const int32> dilations,
-    const std::vector<int32>& strides, Padding padding,
-    TensorFormat data_format, ConvBackpropDimensions* dims) {
+    StringPiece label, int num_spatial_dims, const xla::Shape& input_shape,
+    const xla::Shape& filter_shape, const xla::Shape& out_backprop_shape,
+    absl::Span<const int32> dilations, const std::vector<int32>& strides,
+    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
   TensorShape input_tensor_shape, filter_tensor_shape,
       out_backprop_tensor_shape;
   TF_RETURN_IF_ERROR(XLAShapeToTensorShape(input_shape, &input_tensor_shape));
@@ -237,9 +235,10 @@ xla::StatusOr<ConvOpAttrs> ConvOpAttrs::Create(int num_spatial_dims,
   return attrs;
 }
 
-xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(
-    absl::string_view /*type_string*/, xla::XlaOp conv_input, xla::XlaOp filter,
-    const ConvOpAttrs& attrs) {
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece /*type_string*/,
+                                               xla::XlaOp conv_input,
+                                               xla::XlaOp filter,
+                                               const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
 
   auto* builder = conv_input.builder();
@@ -310,8 +309,8 @@ xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(
 }
 
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
-    absl::string_view type_string, const xla::Shape& input_shape,
-    xla::XlaOp filter, xla::XlaOp out_backprop, const ConvOpAttrs& attrs) {
+    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
+    xla::XlaOp out_backprop, const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
 
   int num_dims = attrs.num_spatial_dims + 2;
@@ -381,7 +380,7 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
 }
 
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
-    absl::string_view type_string, xla::XlaOp activations,
+    StringPiece type_string, xla::XlaOp activations,
     const xla::Shape& filter_shape, xla::XlaOp gradients,
     const ConvOpAttrs& attrs) {
   TF_RETURN_IF_ERROR(CheckConvAttrs(attrs));
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
index bcd846dd02..6e1b70a478 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -53,15 +52,15 @@ struct ConvOpAttrs {
 
 // Creates a new XLA forward or backward convolution with the given inputs and
 // attributes.
-xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(absl::string_view type_string,
+xla::StatusOr<xla::XlaOp> MakeXlaForwardConvOp(StringPiece type_string,
                                                xla::XlaOp conv_input,
                                                xla::XlaOp filter,
                                                const ConvOpAttrs& attrs);
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropInputConvOp(
-    absl::string_view type_string, const xla::Shape& input_shape,
-    xla::XlaOp filter, xla::XlaOp out_backprop, const ConvOpAttrs& attrs);
+    StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter,
+    xla::XlaOp out_backprop, const ConvOpAttrs& attrs);
 xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
-    absl::string_view type_string, xla::XlaOp activations,
+    StringPiece type_string, xla::XlaOp activations,
     const xla::Shape& filter_shape, xla::XlaOp gradients,
     const ConvOpAttrs& attrs);
 
diff --git a/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc b/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
index 23ce1d235b..0c3ec5934e 100644
--- a/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
+++ b/tensorflow/compiler/xla/tools/hex_floats_to_packed_literal.cc
@@ -67,8 +67,8 @@ int main(int argc, char** argv) {
     floats.push_back(value);
   }
 
-  absl::string_view content(absl::bit_cast<const char*>(floats.data()),
-                            floats.size() * sizeof(float));
+  tensorflow::StringPiece content(absl::bit_cast<const char*>(floats.data()),
+                                  floats.size() * sizeof(float));
   TF_CHECK_OK(tensorflow::WriteStringToFile(tensorflow::Env::Default(),
                                             output_file, content));
   return 0;
diff --git a/tensorflow/contrib/android/asset_manager_filesystem.cc b/tensorflow/contrib/android/asset_manager_filesystem.cc
index ed1f88b160..d14b2126a0 100644
--- a/tensorflow/contrib/android/asset_manager_filesystem.cc
+++ b/tensorflow/contrib/android/asset_manager_filesystem.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unistd.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/file_system_helper.h"
@@ -27,7 +26,7 @@ namespace {
 
 string RemoveSuffix(const string& name, const string& suffix) {
   string output(name);
-  absl::string_view piece(output);
+  StringPiece piece(output);
   str_util::ConsumeSuffix(&piece, suffix);
   return string(piece);
 }
@@ -88,7 +87,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
       : asset_manager_(asset_manager), file_name_(name) {}
   ~RandomAccessFileFromAsset() override = default;
 
-  Status Read(uint64 offset, size_t to_read, absl::string_view* result,
+  Status Read(uint64 offset, size_t to_read, StringPiece* result,
               char* scratch) const override {
     auto asset = ScopedAsset(AAssetManager_open(
         asset_manager_, file_name_.c_str(), AASSET_MODE_RANDOM));
@@ -99,7 +98,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
     off64_t new_offset = AAsset_seek64(asset.get(), offset, SEEK_SET);
     off64_t length = AAsset_getLength64(asset.get());
     if (new_offset < 0) {
-      *result = absl::string_view(scratch, 0);
+      *result = StringPiece(scratch, 0);
       return errors::OutOfRange("Read after file end.");
     }
     const off64_t region_left =
@@ -108,7 +107,7 @@ class RandomAccessFileFromAsset : public RandomAccessFile {
     if (read < 0) {
       return errors::Internal("Error reading from asset.");
     }
-    *result = absl::string_view(scratch, region_left);
+    *result = StringPiece(scratch, region_left);
     return (region_left == to_read)
                ? Status::OK()
                : errors::OutOfRange("Read less bytes than requested.");
@@ -230,7 +229,7 @@ string AssetManagerFileSystem::NormalizeDirectoryPath(const string& fname) {
 }
 
 string AssetManagerFileSystem::RemoveAssetPrefix(const string& name) {
-  absl::string_view piece(name);
+  StringPiece piece(name);
   str_util::ConsumePrefix(&piece, prefix_);
   return string(piece);
 }
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
index ce77a4c01c..6138d79126 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_kernels.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/bigtable/kernels/bigtable_lib.h"
 
 #include "tensorflow/core/framework/op_kernel.h"
@@ -343,8 +342,7 @@ class ToBigtableOp : public AsyncOpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const absl::string_view& argument_name,
-                             T* output) {
+                             const StringPiece& argument_name, T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
index 1b4be1cbcd..51965f6214 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -56,11 +55,11 @@ const string& MultiModeKeyRange::begin_key() const { return begin_; }
 
 const string& MultiModeKeyRange::end_key() const { return end_; }
 
-bool MultiModeKeyRange::contains_key(absl::string_view key) const {
-  if (absl::string_view(begin_) > key) {
+bool MultiModeKeyRange::contains_key(StringPiece key) const {
+  if (StringPiece(begin_) > key) {
     return false;
   }
-  if (absl::string_view(end_) <= key && !end_.empty()) {
+  if (StringPiece(end_) <= key && !end_.empty()) {
     return false;
   }
   return true;
diff --git a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
index d55c6d8f6a..44c628e366 100644
--- a/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
+++ b/tensorflow/contrib/bigtable/kernels/bigtable_range_helpers.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -52,7 +52,7 @@ class MultiModeKeyRange {
   // The first invalid key after the valid range.
   const string& end_key() const;
   // Returns true if the provided key is a part of the range, false otherwise.
-  bool contains_key(absl::string_view key) const;
+  bool contains_key(StringPiece key) const;
 
  private:
   MultiModeKeyRange(string begin, string end)
diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD
index 84cf6a80f1..1311063ec0 100644
--- a/tensorflow/contrib/cloud/kernels/BUILD
+++ b/tensorflow/contrib/cloud/kernels/BUILD
@@ -46,7 +46,6 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/platform/cloud:curl_http_request",
         "//tensorflow/core/platform/cloud:google_auth_provider",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -66,7 +65,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/platform/cloud:http_request_fake",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -87,7 +85,6 @@ tf_kernel_library(
         "//tensorflow/core/platform/cloud:curl_http_request",
         "//tensorflow/core/platform/cloud:gcs_file_system",
         "//tensorflow/core/platform/cloud:oauth_client",
-        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
index ba7678d5a8..e57a66b99f 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 
@@ -32,7 +31,7 @@ bool IsPartitionEmpty(const BigQueryTablePartition& partition) {
   return false;
 }
 
-Status ParseJson(absl::string_view json, Json::Value* result) {
+Status ParseJson(StringPiece json, Json::Value* result) {
   Json::Reader reader;
   if (!reader.parse(string(json), *result)) {
     return errors::Internal("Couldn't parse JSON response from BigQuery.");
@@ -184,8 +183,8 @@ Status BigQueryTableAccessor::ReadRow(int64* row_id, Example* example) {
                                     FullTableName());
 
     // Parse the returned row.
-    absl::string_view response_piece =
-        absl::string_view(&output_buffer[0], output_buffer.size());
+    StringPiece response_piece =
+        StringPiece(&output_buffer[0], output_buffer.size());
     Json::Value root;
     TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
     for (unsigned int i = 0; i < root["rows"].size(); ++i) {
@@ -262,8 +261,8 @@ Status BigQueryTableAccessor::ReadSchema() {
                                   FullTableName());
 
   // Parse the schema.
-  absl::string_view response_piece =
-      absl::string_view(&output_buffer[0], output_buffer.size());
+  StringPiece response_piece =
+      StringPiece(&output_buffer[0], output_buffer.size());
 
   Json::Value root;
   TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
index c85f240c2c..7416eb19d3 100644
--- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
+++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test_data.h"
 #include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -30,7 +29,7 @@ constexpr char kTestProject[] = "test-project";
 constexpr char kTestDataset[] = "test-dataset";
 constexpr char kTestTable[] = "test-table";
 
-bool HasSubstr(absl::string_view base, absl::string_view substr) {
+bool HasSubstr(StringPiece base, StringPiece substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
index fc8197c762..648a219fb8 100644
--- a/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
+++ b/tensorflow/contrib/cloud/kernels/gcs_config_ops.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <sstream>
 
-#include "absl/strings/string_view.h"
 #include "include/json/json.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -64,8 +63,8 @@ Status RetrieveGcsFs(OpKernelContext* ctx, RetryingGcsFileSystem** fs) {
 }
 
 template <typename T>
-Status ParseScalarArgument(OpKernelContext* ctx,
-                           absl::string_view argument_name, T* output) {
+Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name,
+                           T* output) {
   const Tensor* argument_t;
   TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
   if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD
index 1eb3e358b2..f7b3273a4d 100644
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
@@ -29,7 +29,6 @@ cc_library(
         "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib",
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -58,7 +57,6 @@ cc_library(
         "//tensorflow/contrib/ffmpeg/default:ffmpeg_lib",
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
index f4905a02de..5ab57ca4cd 100644
--- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
@@ -18,7 +18,6 @@
 #include <cstdio>
 #include <set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -42,7 +41,8 @@ const char* kValidFileFormats[] = {"mp3", "mp4", "ogg", "wav"};
  * Decoding implementation, shared across V1 and V2 ops. Creates a new
  * output in the context.
  */
-void Decode(OpKernelContext* context, const absl::string_view& file_contents,
+void Decode(OpKernelContext* context,
+            const tensorflow::StringPiece& file_contents,
             const string& file_format, const int32 samples_per_second,
             const int32 channel_count, const string& stream) {
   // Write the input data to a temp file.
@@ -135,7 +135,7 @@ class DecodeAudioOpV2 : public OpKernel {
                     "channel_count must be a rank-0 tensor but got shape ",
                     channel_count_tensor.shape().DebugString()));
 
-    const absl::string_view contents = contents_tensor.scalar<string>()();
+    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
     const string file_format =
         str_util::Lowercase(file_format_tensor.scalar<string>()());
     const int32 samples_per_second =
@@ -245,7 +245,7 @@ class DecodeAudioOp : public OpKernel {
         errors::InvalidArgument("contents must be scalar but got shape ",
                                 contents.shape().DebugString()));
 
-    const absl::string_view file_contents = contents.scalar<string>()();
+    const tensorflow::StringPiece file_contents = contents.scalar<string>()();
     Decode(context, file_contents, file_format_, samples_per_second_,
            channel_count_, "");
   }
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
index e9c1e783de..6f8ad486d1 100644
--- a/tensorflow/contrib/ffmpeg/decode_video_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -18,7 +18,6 @@
 #include <cstdio>
 #include <set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -46,7 +45,7 @@ class DecodeVideoOp : public OpKernel {
                 errors::InvalidArgument(
                     "contents must be a rank-0 tensor but got shape ",
                     contents_tensor.shape().DebugString()));
-    const absl::string_view contents = contents_tensor.scalar<string>()();
+    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
 
     // Write the input data to a temp file.
     string extension;
diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD
index 0c15541412..59bad8982d 100644
--- a/tensorflow/contrib/ffmpeg/default/BUILD
+++ b/tensorflow/contrib/ffmpeg/default/BUILD
@@ -20,7 +20,6 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index ccbf48b317..cca1a05419 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -25,7 +25,6 @@
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -304,7 +303,7 @@ FileDeleter::~FileDeleter() {
   env.DeleteFile(filename_).IgnoreError();
 }
 
-Status WriteFile(const string& filename, absl::string_view contents) {
+Status WriteFile(const string& filename, StringPiece contents) {
   Env& env = *Env::Default();
   std::unique_ptr<WritableFile> file;
   TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
diff --git a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
index 0b81848668..bf2aa75545 100644
--- a/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
@@ -19,7 +19,6 @@
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -36,7 +35,7 @@ class FileDeleter {
 };
 
 // Writes binary data to a file.
-Status WriteFile(const string& filename, absl::string_view contents);
+Status WriteFile(const string& filename, tensorflow::StringPiece contents);
 
 // Reads an audio file using ffmpeg and converts it into an array of samples in
 // [-1.0, 1.0]. If there are multiple channels in the audio then each frame will
diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD
index 0fc8cd7ebd..e534fdc177 100644
--- a/tensorflow/contrib/gdr/BUILD
+++ b/tensorflow/contrib/gdr/BUILD
@@ -97,7 +97,6 @@ cc_library(
         "//tensorflow/core/distributed_runtime:worker_cache",
         "//tensorflow/core/distributed_runtime:worker_env",
         "//tensorflow/core/distributed_runtime:worker_interface",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
index cf4218c716..94f522c04e 100644
--- a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
+++ b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/contrib/gdr/gdr_rendezvous_mgr.h"
 
 #include "google/protobuf/any.pb.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/gdr/gdr_memory_manager.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -42,7 +41,7 @@ class GdrRecvTensorCall : public BaseRecvTensorCall {
   GdrRecvTensorCall(WorkerInterface* wi, Device* dst_device,
                     RemoteMemoryManager* remote_memory_manager,
                     const Rendezvous::Args& recv_args, int64 step_id,
-                    absl::string_view key)
+                    StringPiece key)
       : wi_(wi),
         dst_device_(dst_device),
         remote_memory_manager_(remote_memory_manager),
diff --git a/tensorflow/contrib/layers/kernels/BUILD b/tensorflow/contrib/layers/kernels/BUILD
index d97d518bb6..7aae09ff3e 100644
--- a/tensorflow/contrib/layers/kernels/BUILD
+++ b/tensorflow/contrib/layers/kernels/BUILD
@@ -13,7 +13,6 @@ cc_library(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
         "@farmhash_archive//:farmhash",
         "@protobuf_archive//:protobuf_headers",
     ],
diff --git a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
index 61852df9ad..01893d6061 100644
--- a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
+++ b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -27,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -92,8 +92,8 @@ string SparseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-absl::string_view SparseTensorColumn<absl::string_view>::Feature(
-    int64 batch, int64 n) const {
+StringPiece SparseTensorColumn<StringPiece>::Feature(int64 batch,
+                                                     int64 n) const {
   const int64 start = feature_start_indices_[batch];
   return values_.vec<string>().data()[start + n];
 }
@@ -130,8 +130,8 @@ string DenseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-absl::string_view DenseTensorColumn<absl::string_view>::Feature(int64 batch,
-                                                                int64 n) const {
+StringPiece DenseTensorColumn<StringPiece>::Feature(int64 batch,
+                                                    int64 n) const {
   return tensor_.matrix<string>()(batch, n);
 }
 
diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 2e9f609682..720c74e3de 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -47,10 +46,10 @@ class DecodeLibsvmOp : public OpKernel {
     std::vector<T> out_values;
     std::vector<std::pair<int64, int64>> out_indices;
     for (int i = 0; i < input_flat.size(); ++i) {
-      absl::string_view line(input_flat(i));
+      StringPiece line(input_flat(i));
       str_util::RemoveWhitespaceContext(&line);
 
-      absl::string_view piece;
+      StringPiece piece;
       OP_REQUIRES(ctx, str_util::ConsumeNonWhitespace(&line, &piece),
                   errors::InvalidArgument("No label found for input[", i,
                                           "]: \"", input_flat(i), "\""));
@@ -65,7 +64,7 @@ class DecodeLibsvmOp : public OpKernel {
       str_util::RemoveLeadingWhitespace(&line);
       while (str_util::ConsumeNonWhitespace(&line, &piece)) {
         size_t p = piece.find(':');
-        OP_REQUIRES(ctx, (p != absl::string_view::npos),
+        OP_REQUIRES(ctx, (p != StringPiece::npos),
                     errors::InvalidArgument("Invalid feature \"", piece, "\""));
 
         int64 feature_index;
diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD
index 55f1e8a412..2f866eaecb 100644
--- a/tensorflow/contrib/lite/delegates/flex/BUILD
+++ b/tensorflow/contrib/lite/delegates/flex/BUILD
@@ -79,7 +79,6 @@ cc_library(
         ":delegate_data",
         ":kernel",
         ":util",
-        "@com_google_absl//absl/strings",
         "//tensorflow/contrib/lite/c:c_api_internal",
         "//tensorflow/contrib/lite:kernel_api",
         "//tensorflow/contrib/lite:util",
@@ -177,7 +176,6 @@ tf_cc_test(
         ":kernel",
         ":test_util",
         "@com_google_googletest//:gtest",
-        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib",
diff --git a/tensorflow/contrib/lite/delegates/flex/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc
index aa749ab0e5..c72b0cf513 100644
--- a/tensorflow/contrib/lite/delegates/flex/delegate.cc
+++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc
@@ -16,7 +16,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/lite/context_util.h"
 #include "tensorflow/contrib/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/contrib/lite/delegates/flex/kernel.h"
@@ -69,7 +68,7 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
   }
 
   tensorflow::Tensor t = buffer_map->GetTensor(buffer_handle);
-  absl::string_view t_data = t.tensor_data();
+  tensorflow::StringPiece t_data = t.tensor_data();
 
   if (size != t_data.size()) {
     context->ReportError(
diff --git a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
index c084c35e4f..94a6f8b61a 100644
--- a/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
+++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/lite/delegates/flex/delegate_data.h"
 #include "tensorflow/contrib/lite/delegates/flex/test_util.h"
 
@@ -61,9 +60,9 @@ class KernelTest : public testing::FlexModelTest {
                                         TfLiteBufferHandle buffer_handle,
                                         void* data, size_t size) {
       auto* delegate_data = reinterpret_cast<DelegateData*>(delegate->data_);
-      absl::string_view values = delegate_data->GetBufferMap(context)
-                                     ->GetTensor(buffer_handle)
-                                     .tensor_data();
+      tensorflow::StringPiece values = delegate_data->GetBufferMap(context)
+                                           ->GetTensor(buffer_handle)
+                                           .tensor_data();
       memcpy(data, values.data(), values.size());
       return kTfLiteOk;
     };
diff --git a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
index 82320a5402..06ff86e6d8 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #if GOOGLE_CUDA
 
@@ -36,7 +35,7 @@ Status ReplaceReduce(Graph* graph, Node* node) {
   TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "T", &dtype));
   int num_devices = node->num_inputs();
   string shared_name = node->name();
-  auto make_builder = [&](absl::string_view op_name, absl::string_view suffix) {
+  auto make_builder = [&](StringPiece op_name, StringPiece suffix) {
     return NodeBuilder(strings::StrCat(shared_name, suffix), op_name)
         .Attr("reduction", reduction)
         .Attr("num_devices", num_devices)
@@ -160,7 +159,7 @@ Status ReplaceBroadcast(Graph* graph, Node* node) {
   }
 
   string shared_name = node->name();
-  auto make_builder = [&](absl::string_view op_name, absl::string_view suffix) {
+  auto make_builder = [&](StringPiece op_name, StringPiece suffix) {
     return NodeBuilder(strings::StrCat(shared_name, suffix), op_name)
         .Attr("num_devices", num_devices)
         .Attr("shared_name", shared_name)
@@ -256,7 +255,7 @@ class NcclReplacePass : public GraphOptimizationPass {
     }
     // Find reduction and broadcast ops and replace them with Send/Recv ops.
     for (Node* node : graph->op_nodes()) {
-      absl::string_view type = node->type_string();
+      StringPiece type = node->type_string();
       if (!str_util::StartsWith(type, "Nccl")) {
         continue;
       }
diff --git a/tensorflow/contrib/saved_model/cc/saved_model/BUILD b/tensorflow/contrib/saved_model/cc/saved_model/BUILD
index da27789272..ea4d41d43b 100644
--- a/tensorflow/contrib/saved_model/cc/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/cc/saved_model/BUILD
@@ -35,7 +35,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc b/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
index 79c0b1b0d5..e87e497e5f 100644
--- a/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
+++ b/tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.cc
@@ -15,17 +15,17 @@ limitations under the License.
 
 #include "tensorflow/contrib/saved_model/cc/saved_model/signature_def_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
 
 namespace {
 template <class T>
-Status FindInProtobufMap(absl::string_view description,
+Status FindInProtobufMap(StringPiece description,
                          const protobuf::Map<string, T>& map, const string& key,
                          const T** value) {
   const auto it = map.find(key);
diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD
index 9e0f4eb974..9c08859180 100644
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
@@ -190,7 +190,6 @@ cc_library(
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -207,7 +206,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":signature_lite",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:lib_internal",
     ] + if_not_mobile([
         ":manifest_proto_cc",
@@ -344,7 +342,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -373,7 +370,6 @@ cc_library(
     deps = [
         ":session_bundle",
         ":signature",
-        "@com_google_absl//absl/strings",
         "//tensorflow/cc/saved_model:loader",
         "//tensorflow/cc/saved_model:signature_constants",
     ] + if_not_mobile([
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.cc b/tensorflow/contrib/session_bundle/bundle_shim.cc
index 5f2b1e2a67..c669ced997 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.cc
+++ b/tensorflow/contrib/session_bundle/bundle_shim.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/contrib/session_bundle/bundle_shim.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/saved_model/loader.h"
 #include "tensorflow/cc/saved_model/signature_constants.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
@@ -24,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/session_options.h"
@@ -129,7 +129,7 @@ Status MaybeBuildPredictSignatureDef(
 
 Status LoadSavedModelFromLegacySessionBundlePath(
     const SessionOptions& session_options, const RunOptions& run_options,
-    const absl::string_view session_bundle_export_dir,
+    const StringPiece session_bundle_export_dir,
     SavedModelBundle* saved_model_bundle) {
   if (session_bundle_export_dir.empty()) {
     return Status(error::Code::NOT_FOUND, "Export directory path is empty.");
diff --git a/tensorflow/contrib/session_bundle/session_bundle.cc b/tensorflow/contrib/session_bundle/session_bundle.cc
index 3f36704405..a690d9b129 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.cc
+++ b/tensorflow/contrib/session_bundle/session_bundle.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 
 #include "google/protobuf/any.pb.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/graph_def_util.h"
@@ -63,7 +62,7 @@ Status CreateSessionFromGraphDef(const SessionOptions& options,
   return (*session)->Create(graph);
 }
 
-Status GetMetaGraphDefFromExport(const absl::string_view export_dir,
+Status GetMetaGraphDefFromExport(const StringPiece export_dir,
                                  MetaGraphDef* meta_graph_def) {
   const string meta_graph_def_path =
       io::JoinPath(export_dir, kMetaGraphDefFilename);
@@ -78,7 +77,7 @@ Tensor CreateStringTensor(const string& value) {
 }
 
 // Adds Assets related tensors (assets_dir and asset files) to the inputs.
-void AddAssetsTensorsToInputs(const absl::string_view export_dir,
+void AddAssetsTensorsToInputs(const StringPiece export_dir,
                               const std::vector<AssetFile>& asset_files,
                               std::vector<std::pair<string, Tensor>>* inputs) {
   if (asset_files.empty()) {
@@ -109,7 +108,7 @@ void AddAssetsTensorsToInputs(const absl::string_view export_dir,
 // prefix.data-* are present in the filesystem. So if we see export.index
 // present in the export_dir, we know the export is in V2 format and we return
 // <export_dir>/export as this prefix.
-string GetVariablesFilename(const absl::string_view export_dir) {
+string GetVariablesFilename(const StringPiece export_dir) {
   const char kVariablesFilename[] = "export";
   const string kVariablesIndexFilename = MetaFilename("export");  // V2 ckpts
   const char kVariablesFilenamePattern[] = "export-\?\?\?\?\?-of-\?\?\?\?\?";
@@ -129,11 +128,10 @@ string GetVariablesFilename(const absl::string_view export_dir) {
   }
 }
 
-Status RunRestoreOp(const RunOptions& run_options,
-                    const absl::string_view export_dir,
+Status RunRestoreOp(const RunOptions& run_options, const StringPiece export_dir,
                     const std::vector<AssetFile>& asset_files,
-                    const absl::string_view restore_op_name,
-                    const absl::string_view variables_filename_const_op_name,
+                    const StringPiece restore_op_name,
+                    const StringPiece variables_filename_const_op_name,
                     Session* session) {
   LOG(INFO) << "Running restore op for SessionBundle: " << restore_op_name
             << ", " << variables_filename_const_op_name;
@@ -147,10 +145,9 @@ Status RunRestoreOp(const RunOptions& run_options,
                       nullptr /* outputs */, &run_metadata);
 }
 
-Status RunInitOp(const RunOptions& run_options,
-                 const absl::string_view export_dir,
+Status RunInitOp(const RunOptions& run_options, const StringPiece export_dir,
                  const std::vector<AssetFile>& asset_files,
-                 const absl::string_view init_op_name, Session* session) {
+                 const StringPiece init_op_name, Session* session) {
   LOG(INFO) << "Running init op for SessionBundle";
   std::vector<std::pair<string, Tensor>> inputs;
   AddAssetsTensorsToInputs(export_dir, asset_files, &inputs);
@@ -161,7 +158,7 @@ Status RunInitOp(const RunOptions& run_options,
 
 Status LoadSessionBundleFromPathUsingRunOptionsInternal(
     const SessionOptions& options, const RunOptions& run_options,
-    const absl::string_view export_dir, SessionBundle* const bundle) {
+    const StringPiece export_dir, SessionBundle* const bundle) {
   LOG(INFO) << "Attempting to load a SessionBundle from: " << export_dir;
   LOG(INFO) << "Using RunOptions: " << DebugStringIfAvailable(run_options);
   TF_RETURN_IF_ERROR(
@@ -230,16 +227,17 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal(
 }  // namespace
 
 Status LoadSessionBundleFromPath(const SessionOptions& options,
-                                 const absl::string_view export_dir,
+                                 const StringPiece export_dir,
                                  SessionBundle* const bundle) {
   TF_RETURN_IF_ERROR(LoadSessionBundleFromPathUsingRunOptions(
       options, RunOptions(), export_dir, bundle));
   return Status::OK();
 }
 
-Status LoadSessionBundleFromPathUsingRunOptions(
-    const SessionOptions& options, const RunOptions& run_options,
-    const absl::string_view export_dir, SessionBundle* const bundle) {
+Status LoadSessionBundleFromPathUsingRunOptions(const SessionOptions& options,
+                                                const RunOptions& run_options,
+                                                const StringPiece export_dir,
+                                                SessionBundle* const bundle) {
   const uint64 start_microseconds = Env::Default()->NowMicros();
   const Status status = LoadSessionBundleFromPathUsingRunOptionsInternal(
       options, run_options, export_dir, bundle);
@@ -265,7 +263,7 @@ Status LoadSessionBundleFromPathUsingRunOptions(
   return status;
 }
 
-bool IsPossibleExportDirectory(const absl::string_view directory) {
+bool IsPossibleExportDirectory(const StringPiece directory) {
   const string meta_graph_def_path =
       io::JoinPath(directory, kMetaGraphDefFilename);
   return Env::Default()->FileExists(meta_graph_def_path).ok();
diff --git a/tensorflow/contrib/session_bundle/session_bundle.h b/tensorflow/contrib/session_bundle/session_bundle.h
index f25123c388..b2be46efa6 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.h
+++ b/tensorflow/contrib/session_bundle/session_bundle.h
@@ -20,10 +20,10 @@ limitations under the License.
 
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/contrib/session_bundle/signature.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/protobuf/saver.pb.h"
 #include "tensorflow/core/public/session.h"
@@ -60,7 +60,7 @@ struct SessionBundle {
 
 // Loads a manifest and initialized session using the output of an Exporter.
 Status LoadSessionBundleFromPath(const SessionOptions& options,
-                                 const absl::string_view export_dir,
+                                 const StringPiece export_dir,
                                  SessionBundle* bundle);
 
 // Similar to the LoadSessionBundleFromPath(), but also allows the session run
@@ -70,14 +70,14 @@ Status LoadSessionBundleFromPath(const SessionOptions& options,
 // This method is EXPERIMENTAL and may change or be removed.
 Status LoadSessionBundleFromPathUsingRunOptions(
     const SessionOptions& session_options, const RunOptions& run_options,
-    const absl::string_view export_dir, SessionBundle* bundle);
+    const StringPiece export_dir, SessionBundle* bundle);
 
 // Sanity checks whether the directory looks like an export directory. Note that
 // we don't try to load any data in this method.
 //
 // If the method returns false this is definitely not an export directory, if it
 // returns true, it is no guarantee that the model will load.
-bool IsPossibleExportDirectory(const absl::string_view export_dir);
+bool IsPossibleExportDirectory(const StringPiece export_dir);
 
 }  // namespace serving
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/session_bundle/signature_test.cc b/tensorflow/contrib/session_bundle/signature_test.cc
index 65be9c9a8b..b1ff55552e 100644
--- a/tensorflow/contrib/session_bundle/signature_test.cc
+++ b/tensorflow/contrib/session_bundle/signature_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <memory>
 
 #include "google/protobuf/any.pb.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/session_bundle/manifest.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -26,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
@@ -34,7 +34,7 @@ namespace tensorflow {
 namespace serving {
 namespace {
 
-static bool HasSubstr(absl::string_view base, absl::string_view substr) {
+static bool HasSubstr(StringPiece base, StringPiece substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 03f18183ad..6507546ee9 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -47,7 +47,6 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:summary_interface",
         "//tensorflow/core/lib/db:sqlite",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
index c45be444e0..cfdc884277 100644
--- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
+++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc
@@ -16,12 +16,12 @@ limitations under the License.
 
 #include <deque>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/tensorboard/db/summary_converter.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/db/sqlite.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/util/event.pb.h"
@@ -136,7 +136,7 @@ void PatchPluginName(SummaryMetadata* metadata, const char* name) {
   }
 }
 
-Status SetDescription(Sqlite* db, int64 id, const absl::string_view& markdown) {
+Status SetDescription(Sqlite* db, int64 id, const StringPiece& markdown) {
   const char* sql = R"sql(
     INSERT OR REPLACE INTO Descriptions (id, description) VALUES (?, ?)
   )sql";
@@ -260,12 +260,12 @@ class GraphWriter {
     for (int node_id = 0; node_id < graph_->node_size(); ++node_id) {
       const NodeDef& node = graph_->node(node_id);
       for (int idx = 0; idx < node.input_size(); ++idx) {
-        absl::string_view name = node.input(idx);
+        StringPiece name = node.input(idx);
         int64 input_node_id;
         int64 input_node_idx = 0;
         int64 is_control = 0;
         size_t i = name.rfind(':');
-        if (i != absl::string_view::npos) {
+        if (i != StringPiece::npos) {
           if (!strings::safe_strto64(name.substr(i + 1, name.size() - i - 1),
                                      &input_node_idx)) {
             return errors::DataLoss("Bad NodeDef.input: ", name);
@@ -369,8 +369,7 @@ class GraphWriter {
   const uint64 now_;
   const int64 graph_id_;
   std::vector<string> name_copies_;
-  std::unordered_map<absl::string_view, int64, StringPieceHasher>
-      name_to_node_id_;
+  std::unordered_map<StringPiece, int64, StringPieceHasher> name_to_node_id_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(GraphWriter);
 };
@@ -681,7 +680,7 @@ class SeriesWriter {
       } else {
         SqliteTransaction txn(*db);
         TF_RETURN_IF_ERROR(
-            Update(db, step, computed_time, t, absl::string_view(), rowid));
+            Update(db, step, computed_time, t, StringPiece(), rowid));
         TF_RETURN_IF_ERROR(UpdateNdString(db, t, rowid));
         return txn.Commit();
       }
@@ -691,7 +690,7 @@ class SeriesWriter {
   }
 
   Status Update(Sqlite* db, int64 step, double computed_time, const Tensor& t,
-                const absl::string_view& data, int64 rowid) {
+                const StringPiece& data, int64 rowid) {
     const char* sql = R"sql(
       UPDATE OR REPLACE
         Tensors
diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index adaa7fbfbd..38d1c3049e 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD
@@ -29,7 +29,6 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index 49baacceed..b4b06a40a2 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <ctime>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/contrib/tpu/profiler/op_profile.pb.h"
 #include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
 #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h"
@@ -64,8 +63,7 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) {
   return Status::OK();
 }
 
-Status DumpTraceToLogDirectory(absl::string_view run_dir,
-                               const string& host_prefix,
+Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix,
                                const string& encoded_trace, std::ostream* os) {
   string proto_path =
       JoinPath(run_dir, StrCat(host_prefix, kProtoTraceFileName));
@@ -88,7 +86,7 @@ Status DumpTraceToLogDirectory(absl::string_view run_dir,
   return Status::OK();
 }
 
-Status DumpOpProfileToLogDirectory(absl::string_view run_dir,
+Status DumpOpProfileToLogDirectory(StringPiece run_dir,
                                    const string& host_prefix,
                                    const tpu::op_profile::Profile& profile,
                                    std::ostream* os) {
@@ -109,7 +107,7 @@ Status DumpOpProfileToLogDirectory(absl::string_view run_dir,
   return Status::OK();
 }
 
-Status DumpToolDataToLogDirectory(absl::string_view run_dir,
+Status DumpToolDataToLogDirectory(StringPiece run_dir,
                                   const string& host_prefix,
                                   const tensorflow::ProfileToolData& tool,
                                   std::ostream* os) {
diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD
index 83aae915be..19cb8983b6 100644
--- a/tensorflow/contrib/verbs/BUILD
+++ b/tensorflow/contrib/verbs/BUILD
@@ -42,7 +42,6 @@ cc_library(
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/contrib/verbs/verbs_util.cc b/tensorflow/contrib/verbs/verbs_util.cc
index 3662347256..a6333d9f36 100644
--- a/tensorflow/contrib/verbs/verbs_util.cc
+++ b/tensorflow/contrib/verbs/verbs_util.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -32,7 +32,7 @@ string VerbsUtil::AppendStepidToKey(const string& key, int64 step_id) {
 // static
 void VerbsUtil::GetKeyAndStepId(const string& key_with_step_id, string& key,
                                 int64& step_id) {
-  absl::string_view s(key_with_step_id);
+  StringPiece s(key_with_step_id);
   // a key (with step_id) has exact 6 parts if split by ";"
   // part 1: src_device;
   // part 2: src_incarnation;
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 8cdc629640..841291e6d8 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -485,7 +485,6 @@ cc_library(
         ":platform_port",
         ":platform_protobuf",
         "//tensorflow/core/platform/default/build_config:env",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -515,7 +514,6 @@ cc_library(
         ":lib",
         ":lib_platform",
         ":platform_env",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -594,7 +592,6 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -981,7 +978,6 @@ cc_library(
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -1482,7 +1478,6 @@ cc_library(
         "//tensorflow/core/kernels:ops_testutil",
         "//tensorflow/core/kernels:ops_util",
         "//tensorflow/core/kernels:random_ops",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2198,7 +2193,6 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
-        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
@@ -2624,7 +2618,6 @@ tf_cuda_library(
         ":protos_all_cc",
         ":stats_calculator_portable",
         ":version_lib",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/kernels:bounds_check",
         "//third_party/eigen3",
@@ -2755,7 +2748,6 @@ tf_cuda_library(
         ":proto_text",
         ":protos_all_cc",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2800,7 +2792,6 @@ tf_cuda_library(
         ":function_ops_op_lib",
         ":functional_grad",
         ":functional_ops_op_lib",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:required",
         ":core_cpu_impl",
@@ -2930,7 +2921,6 @@ tf_cuda_library(
         ":lib_internal",
         ":proto_text",
         ":protos_all_cc",
-        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/core/grappler:grappler_item",
     ] + mkl_deps(),
@@ -3010,7 +3000,6 @@ tf_cuda_library(
         ":protos_all_cc",
         "//tensorflow/core/debug:debug_graph_utils",
         "//tensorflow/core/kernels:function_ops",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -3134,7 +3123,6 @@ tf_cuda_library(
         ":protos_all_cc",
         ":stream_executor",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -3264,7 +3252,6 @@ cc_library(
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3380,7 +3367,6 @@ tf_cc_tests(
         ":test",
         ":test_main",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
         "@zlib_archive//:zlib",
     ],
 )
@@ -3412,7 +3398,6 @@ tf_cc_test(
         ":test",
         ":test_main",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3484,7 +3469,6 @@ tf_cc_test(
         ":protos_all_cc",
         ":test",
         ":test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3534,7 +3518,6 @@ tf_cc_test(
         ":lib_internal",
         ":test",
         ":test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3696,7 +3679,6 @@ tf_cc_tests(
         "//tensorflow/cc:while_loop",
         "//tensorflow/core/kernels:ops_util",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3733,7 +3715,6 @@ tf_cc_tests(
         "//tensorflow/cc:sendrecv_ops",
         "//tensorflow/core/kernels:ops_util",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3917,7 +3898,6 @@ tf_cc_tests_gpu(
         ":testlib",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:ops_util",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3966,7 +3946,6 @@ tf_cuda_cc_test(
         ":testlib",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:ops_util",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4083,7 +4062,6 @@ tf_cc_test(
         "//tensorflow/core/kernels:immutable_constant_op",
         "//tensorflow/core/kernels:matmul_op",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4194,7 +4172,6 @@ tf_cuda_cc_test(
         ":test",
         ":test_main",
         ":testlib",
-        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core/kernels:collective_ops",
@@ -4235,7 +4212,6 @@ tf_cc_test(
         ":test",
         ":test_main",
         ":testlib",
-        "@com_google_absl//absl/strings",
         "//third_party/eigen3",
         "//tensorflow/cc:cc_ops",
         # Link with support for TensorFlow Debugger (tfdbg).
@@ -4376,7 +4352,6 @@ tf_cc_test(
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:shape_ops",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4769,7 +4744,6 @@ tf_cc_tests(
         "//tensorflow/cc:client_session",
         "//tensorflow/cc:function_ops",
         "//tensorflow/cc:ops",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc
index 2227705ab5..98aefcde27 100644
--- a/tensorflow/core/common_runtime/constant_folding_test.cc
+++ b/tensorflow/core/common_runtime/constant_folding_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
 
 #include "tensorflow/cc/ops/array_ops_internal.h"
@@ -641,7 +640,7 @@ class TestTFFileSystem : public ::tensorflow::NullFileSystem {
       return ::tensorflow::errors::Unimplemented(
           "NewReadOnlyMemoryRegionFromFile unimplemented");
     }
-    const ::absl::string_view sp = data_tensor_.tensor_data();
+    const ::tensorflow::StringPiece sp = data_tensor_.tensor_data();
     *result = std::unique_ptr<::tensorflow::ReadOnlyMemoryRegion>(
         new TestReadOnlyMemoryRegion(sp.data(), sp.size()));
     return ::tensorflow::Status::OK();
diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index 56b80cccaa..6e2eb66b94 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <atomic>
 #include <utility>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -49,7 +48,7 @@ std::vector<RegistrationInfo>* MutableRegistry() {
 }
 
 void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
-                      Allocator* out_allocator, absl::string_view edge_name,
+                      Allocator* out_allocator, StringPiece edge_name,
                       Device* dst, Tensor* output,
                       DeviceContext* recv_dev_context, StatusCallback done) {
   if (input->dtype() == DT_VARIANT) {
@@ -114,7 +113,7 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator,
 }
 
 void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator,
-                      Allocator* out_allocator, absl::string_view edge_name,
+                      Allocator* out_allocator, StringPiece edge_name,
                       Device* src, Tensor* output,
                       DeviceContext* send_dev_context, StatusCallback done) {
   if (input->dtype() == DT_VARIANT) {
@@ -247,8 +246,7 @@ void CopyDeviceToDevice(CopyTensor::CopyFunction copy_function,
 }  // namespace
 
 // static
-void CopyTensor::ViaDMA(absl::string_view edge_name,
-                        DeviceContext* send_dev_context,
+void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
                         DeviceContext* recv_dev_context, Device* src,
                         Device* dst, const AllocatorAttributes src_alloc_attr,
                         const AllocatorAttributes dst_alloc_attr,
diff --git a/tensorflow/core/common_runtime/copy_tensor.h b/tensorflow/core/common_runtime/copy_tensor.h
index f7a416fffc..9cd5ac2a37 100644
--- a/tensorflow/core/common_runtime/copy_tensor.h
+++ b/tensorflow/core/common_runtime/copy_tensor.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_COPY_TENSOR_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_COPY_TENSOR_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/device_base.h"
@@ -41,8 +40,7 @@ class CopyTensor {
   // the type of devices and memory in use, the copy may be performed
   // synchronously or asynchronously.  'done' will be invoked only
   // after the copy is actually complete.
-  static void ViaDMA(absl::string_view edge_name,
-                     DeviceContext* send_dev_context,
+  static void ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
                      DeviceContext* recv_dev_context, Device* src, Device* dst,
                      const AllocatorAttributes src_alloc_attr,
                      const AllocatorAttributes dst_alloc_attr,
diff --git a/tensorflow/core/common_runtime/device_mgr.cc b/tensorflow/core/common_runtime/device_mgr.cc
index 068e944fec..470abc1431 100644
--- a/tensorflow/core/common_runtime/device_mgr.cc
+++ b/tensorflow/core/common_runtime/device_mgr.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_mgr.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -52,11 +51,11 @@ DeviceMgr::~DeviceMgr() {
   for (Device* p : devices_) delete p;
 }
 
-absl::string_view DeviceMgr::CopyToBackingStore(absl::string_view s) {
+StringPiece DeviceMgr::CopyToBackingStore(StringPiece s) {
   size_t n = s.size();
   char* space = name_backing_store_.Alloc(n);
   memcpy(space, s.data(), n);
-  return absl::string_view(space, n);
+  return StringPiece(space, n);
 }
 
 void DeviceMgr::ListDeviceAttributes(
@@ -90,11 +89,11 @@ string DeviceMgr::DeviceMappingString() const {
   return out;
 }
 
-Status DeviceMgr::LookupDevice(absl::string_view name, Device** device) const {
+Status DeviceMgr::LookupDevice(StringPiece name, Device** device) const {
   Status s;
   auto iter = device_map_.find(name);
   if (iter == device_map_.end()) {
-    std::vector<absl::string_view> device_names;
+    std::vector<StringPiece> device_names;
     for (auto&& itr : device_map_) {
       device_names.push_back(itr.first);
     }
diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h
index 7a4235d3bc..c1ff10d9b5 100644
--- a/tensorflow/core/common_runtime/device_mgr.h
+++ b/tensorflow/core/common_runtime/device_mgr.h
@@ -21,10 +21,10 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/lib/core/arena.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -53,7 +53,7 @@ class DeviceMgr {
 
   // Assigns *device with pointer to Device of the given name.
   // Accepts either a full device name, or just the replica-local suffix.
-  Status LookupDevice(absl::string_view name, Device** device) const;
+  Status LookupDevice(StringPiece name, Device** device) const;
 
   // Clears given containers of all devices if 'container' is
   // non-empty. Otherwise, clears default containers of all devices.
@@ -66,9 +66,9 @@ class DeviceMgr {
   typedef gtl::InlinedVector<Device*, 8> DeviceVec;
   DeviceVec devices_;
 
-  absl::string_view CopyToBackingStore(absl::string_view s);
+  StringPiece CopyToBackingStore(StringPiece s);
 
-  std::unordered_map<absl::string_view, Device*, StringPieceHasher> device_map_;
+  std::unordered_map<StringPiece, Device*, StringPieceHasher> device_map_;
   core::Arena name_backing_store_;  // Storage for keys in device_map_
   std::unordered_map<string, int> device_type_counts_;
 
diff --git a/tensorflow/core/common_runtime/device_set.cc b/tensorflow/core/common_runtime/device_set.cc
index a1e162ed4b..f6b4115cbf 100644
--- a/tensorflow/core/common_runtime/device_set.cc
+++ b/tensorflow/core/common_runtime/device_set.cc
@@ -19,9 +19,9 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 
 namespace tensorflow {
@@ -68,7 +68,7 @@ static bool DeviceTypeComparator(const DeviceType& a, const DeviceType& b) {
     return a_priority > b_priority;
   }
 
-  return absl::string_view(a.type()) < absl::string_view(b.type());
+  return StringPiece(a.type()) < StringPiece(b.type());
 }
 
 std::vector<DeviceType> DeviceSet::PrioritizedDeviceTypeList() const {
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 139fed6e6b..52c1cd2691 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/collective_executor_mgr.h"
 #include "tensorflow/core/common_runtime/collective_param_resolver_local.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
@@ -1193,7 +1192,7 @@ Status DirectSession::CreateExecutors(
 
   if (run_state_args->is_partial_run) {
     ek->graph = std::move(run_state_args->graph);
-    std::unordered_set<absl::string_view, StringPieceHasher> names;
+    std::unordered_set<StringPiece, StringPieceHasher> names;
     for (const string& input : callable_options.feed()) {
       TensorId id(ParseTensorName(input));
       names.emplace(id.first);
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index 5e64f9cee6..3a168bbe3f 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/common_runtime/debugger_state_interface.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -66,8 +65,7 @@ class DirectSession : public Session {
   ~DirectSession() override;
 
   typedef std::vector<std::pair<string, Tensor>> NamedTensorList;
-  typedef std::unordered_map<absl::string_view, Node*, StringPieceHasher>
-      NameNodeMap;
+  typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameNodeMap;
 
   ::tensorflow::Status Create(const GraphDef& graph) override;
   ::tensorflow::Status Extend(const GraphDef& graph) override;
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index e33b1cb74e..a6440c55ad 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/function_testlib.h"
@@ -2041,8 +2040,8 @@ void TestFeedAndFetchTensorsInDeviceMemory(
         << DataType_Name(dtype);
     TF_ASSERT_OK(session->ReleaseCallable(handle)) << DataType_Name(dtype);
     ASSERT_EQ(1, outputs.size());
-    const absl::string_view actual_data = outputs[0].tensor_data();
-    const absl::string_view expected_data = host_tensor.tensor_data();
+    const StringPiece actual_data = outputs[0].tensor_data();
+    const StringPiece expected_data = host_tensor.tensor_data();
     EXPECT_EQ(expected_data.size(), actual_data.size()) << DataType_Name(dtype);
     EXPECT_EQ(0, memcmp(expected_data.data(), actual_data.data(),
                         std::min(expected_data.size(), actual_data.size())))
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index ebc9e28c3a..7b74c67c85 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -49,7 +49,6 @@ tf_cuda_library(
     deps = [
         ":eager_executor",
         ":kernel_and_device",
-        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -198,7 +197,6 @@ cc_library(
         ":eager_operation",
         ":kernel_and_device",
         ":tensor_handle",
-        "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -223,7 +221,6 @@ tf_cuda_library(
     deps = [
         ":kernel_and_device",
         "@farmhash_archive//:farmhash",
-        "@com_google_absl//absl/strings",
         # Only the TF_AttrType enum is required, so pull in just the C headers.
         # TODO(b/113535673): Break this dependency and avoid the C header completely.
         "//tensorflow/c:c_api_headers",
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc
index d77372684d..5c8369de87 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.cc
+++ b/tensorflow/core/common_runtime/eager/attr_builder.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/eager/attr_builder.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
@@ -97,12 +96,11 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) {
   return Status::OK();
 }
 
-#define DEFINE_SET_ATTR(value_type, value_field)             \
-  template <>                                                \
-  AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, \
-                                value_type&& value) {        \
-    value_field.push_back(std::make_pair(attr_name, value)); \
-    return *this;                                            \
+#define DEFINE_SET_ATTR(value_type, value_field)                             \
+  template <>                                                                \
+  AttrBuilder& AttrBuilder::Set(StringPiece attr_name, value_type&& value) { \
+    value_field.push_back(std::make_pair(attr_name, value));                 \
+    return *this;                                                            \
   }
 
 DEFINE_SET_ATTR(float, float_attrs_);
@@ -196,13 +194,13 @@ void CombineUnordered(const tensorflow::Fprint128& a,
   b->high64 += a.high64;
 }
 
-inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s,
+inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s,
                                             const tensorflow::Fprint128& b) {
   tensorflow::Fprint128 a = tensorflow::Fingerprint128(s);
   return FingerprintCat128(a, b);
 }
 
-inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, uint64 b) {
+inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s, uint64 b) {
   return CacheKeyHelper(s, {b, b});
 }
 
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index 4dbf6da49b..c114ea4ba0 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h
@@ -21,7 +21,6 @@ limitations under the License.
 #include <memory>
 #include <unordered_map>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
@@ -95,7 +94,7 @@ class AttrBuilder {
   AttrBuilder& NumInputs(int n);
 
   template <class T>
-  AttrBuilder& Set(absl::string_view attr_name, T&& value) {
+  AttrBuilder& Set(StringPiece attr_name, T&& value) {
     MayBeInitializeNodeDef();
     SetInAttrValueMap(node_def_->mutable_attr(), attr_name, value);
     return *this;
@@ -108,8 +107,7 @@ class AttrBuilder {
 
  private:
   template <class T>
-  using AttrVec =
-      tensorflow::gtl::InlinedVector<std::pair<absl::string_view, T>, 2>;
+  using AttrVec = tensorflow::gtl::InlinedVector<std::pair<StringPiece, T>, 2>;
 
   void MayBeInitializeNodeDef();
   // Fill `m` with the attr-value pairs set via AttrBuilder::Set() so far, as
@@ -121,7 +119,7 @@ class AttrBuilder {
   void FillAttrValueMap(AttrValueMap* m, bool include_those_in_node_def) const;
 
   template <class T>
-  void SetInAttrValueMap(AttrValueMap* m, absl::string_view attr_name,
+  void SetInAttrValueMap(AttrValueMap* m, StringPiece attr_name,
                          T&& value) const {
     DCHECK(!node_def_finalized_)
         << "Calling SetInAttrValueMap after BuildNodeDef.";
@@ -150,15 +148,16 @@ class AttrBuilder {
 };  // namespace tensorflow
 
 template <>
-AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, int&& value);
+AttrBuilder& AttrBuilder::Set(StringPiece attr_name, int&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, float&& value);
+AttrBuilder& AttrBuilder::Set(StringPiece attr_name, float&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(absl::string_view attr_name, bool&& value);
+AttrBuilder& AttrBuilder::Set(StringPiece attr_name, bool&& value);
 template <>
-AttrBuilder& AttrBuilder::Set(absl::string_view attr_name,
+AttrBuilder& AttrBuilder::Set(StringPiece attr_name,
                               tensorflow::DataType&& value);
 
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_ATTR_BUILDER_H_
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 9fc15e832d..f23cefb33d 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/eager/context.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -25,7 +24,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-bool ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val) {
+bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) {
   bool val;
   if (tensorflow::ReadBoolFromEnvVar(env_var_name, default_val, &val).ok()) {
     return val;
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index bf42172ffe..c29a767d23 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -821,7 +820,7 @@ Status FindDeviceFromName(EagerContext* ctx, const char* device_name,
 }
 
 Status ExecuteSend(EagerContext* ctx, tensorflow::Device* device,
-                   TensorHandle* h, absl::string_view wire_id,
+                   TensorHandle* h, StringPiece wire_id,
                    const string& recv_device) {
   const tensorflow::AttrTypeMap* types;
   TF_RETURN_IF_ERROR(tensorflow::AttrTypeMapForOp("_Send", &types));
@@ -848,7 +847,7 @@ Status ExecuteSend(EagerContext* ctx, tensorflow::Device* device,
 }
 
 Status ExecuteRecv(EagerContext* ctx, tensorflow::Device* device,
-                   DataType dtype, absl::string_view wire_id,
+                   DataType dtype, StringPiece wire_id,
                    const string& send_device, int64 send_device_incarnation,
                    TensorHandle** result) {
   const tensorflow::AttrTypeMap* types;
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 327e22a2ea..e0e5f4a215 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <deque>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/executor_factory.h"
@@ -584,7 +583,7 @@ Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) {
   return Status::OK();
 }
 
-void DumpGraph(absl::string_view label, const Graph* g) {
+void DumpGraph(StringPiece label, const Graph* g) {
   // TODO(zhifengc): Change Graph to record #nodes.
   VLOG(1) << "Graph " << label << " #nodes " << g->num_nodes() << " #edges "
           << g->num_edges();
diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h
index 877fb08306..eeca66f5d0 100644
--- a/tensorflow/core/common_runtime/function.h
+++ b/tensorflow/core/common_runtime/function.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
@@ -125,7 +124,7 @@ bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph);
 
 // Dump the contents of the "graph" to log files if the logging level is
 // sufficiently high.
-void DumpGraph(absl::string_view label, const Graph* g);
+void DumpGraph(StringPiece label, const Graph* g);
 
 // Applies graph rewrite optimization such as inlining, dead code
 // removal, etc.
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 7182b1bb7b..716167132b 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <atomic>
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/array_ops_internal.h"
 #include "tensorflow/cc/ops/function_ops.h"
 #include "tensorflow/cc/ops/functional_ops.h"
@@ -56,7 +55,7 @@ Status GetOpSig(const string& op, const OpDef** sig) {
   return OpRegistry::Global()->LookUpOpDef(op, sig);
 }
 
-void HasError(const Status& s, absl::string_view substr) {
+void HasError(const Status& s, StringPiece substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << s << ", expected substring " << substr;
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
index 281f6216c4..36294094e9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #if GOOGLE_CUDA
 
 #include "tensorflow/core/common_runtime/gpu/gpu_device.h"
@@ -54,7 +53,7 @@ Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major,
   return Status::OK();
 }
 
-void ExpectErrorMessageSubstr(const Status& s, absl::string_view substr) {
+void ExpectErrorMessageSubstr(const Status& s, StringPiece substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << s << ", expected substring " << substr;
 }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
index 6061fd50c6..4bc88ffc8c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
@@ -32,7 +31,7 @@ void GPUDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
 }
 
 void GPUDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                             absl::string_view tensor_name,
+                                             StringPiece tensor_name,
                                              Device* device, Tensor* cpu_tensor,
                                              StatusCallback done) {
   GPUUtil::CopyGPUTensorToCPU(device, this, device_tensor, cpu_tensor, done);
diff --git a/tensorflow/core/common_runtime/gpu_device_context.h b/tensorflow/core/common_runtime/gpu_device_context.h
index 5cd5b61be0..3603808152 100644
--- a/tensorflow/core/common_runtime/gpu_device_context.h
+++ b/tensorflow/core/common_runtime/gpu_device_context.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_DEVICE_CONTEXT_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_DEVICE_CONTEXT_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -54,9 +53,9 @@ class GPUDeviceContext : public DeviceContext {
                              Tensor* device_tensor,
                              StatusCallback done) const override;
 
-  void CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                             absl::string_view edge_name, Device* device,
-                             Tensor* cpu_tensor, StatusCallback done) override;
+  void CopyDeviceTensorToCPU(const Tensor* device_tensor, StringPiece edge_name,
+                             Device* device, Tensor* cpu_tensor,
+                             StatusCallback done) override;
 
   void MaintainLifetimeOnStream(const Tensor* t,
                                 se::Stream* stream) const override {}
diff --git a/tensorflow/core/common_runtime/lower_if_while_test.cc b/tensorflow/core/common_runtime/lower_if_while_test.cc
index f9f6805660..07bcecf168 100644
--- a/tensorflow/core/common_runtime/lower_if_while_test.cc
+++ b/tensorflow/core/common_runtime/lower_if_while_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/lower_if_while.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/ops/array_ops.h"
@@ -39,7 +38,7 @@ namespace {
 
 typedef FunctionDefHelper FDH;
 
-static void AssertHasSubstr(absl::string_view s, absl::string_view expected) {
+static void AssertHasSubstr(StringPiece s, StringPiece expected) {
   ASSERT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/core/common_runtime/placer.cc b/tensorflow/core/common_runtime/placer.cc
index d0583284a2..5e1ed13080 100644
--- a/tensorflow/core/common_runtime/placer.cc
+++ b/tensorflow/core/common_runtime/placer.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -37,9 +37,8 @@ namespace {
 
 // We hoist the conversion from C-style string literal to StringPiece here,
 // so that we can avoid the many repeated calls to strlen().
-const absl::string_view kColocationAttrNameStringPiece(kColocationAttrName);
-const absl::string_view kColocationGroupPrefixStringPiece(
-    kColocationGroupPrefix);
+const StringPiece kColocationAttrNameStringPiece(kColocationAttrName);
+const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
 
 // Returns a list of devices having type in supported_device_types.  The
 // returned list is sorted by preferred type (higher numeric type is preferred).
@@ -69,7 +68,7 @@ std::vector<Device*> FilterSupportedDevices(
     if (a_priority != b_priority) {
       return a_priority > b_priority;
     }
-    return absl::string_view(a->name()) < absl::string_view(b->name());
+    return StringPiece(a->name()) < StringPiece(b->name());
   };
   std::vector<Device*>::iterator sort_start;
   if (filtered_default_device != nullptr) {
@@ -145,7 +144,7 @@ class ColocationGraph {
     // 'string' values stored in NodeDef attribute lists, as well as StringPiece
     // values that refer to 'string' values from NodeDef::name(), without
     // performing any string allocations.
-    std::unordered_map<absl::string_view, const Node*, StringPieceHasher>
+    std::unordered_map<StringPiece, const Node*, StringPieceHasher>
         colocation_group_root;
 
     for (Node* node : graph_->op_nodes()) {
@@ -162,7 +161,7 @@ class ColocationGraph {
           node->attrs().Find(kColocationAttrNameStringPiece);
       if (attr_value != nullptr && attr_value->has_list()) {
         for (const string& class_spec : attr_value->list().s()) {
-          absl::string_view spec(class_spec);
+          StringPiece spec(class_spec);
           if (str_util::ConsumePrefix(&spec,
                                       kColocationGroupPrefixStringPiece)) {
             found_spec = true;
@@ -184,9 +183,9 @@ class ColocationGraph {
   }
 
   Status ColocateNodeToGroup(
-      std::unordered_map<absl::string_view, const Node*, StringPieceHasher>*
+      std::unordered_map<StringPiece, const Node*, StringPieceHasher>*
           colocation_group_root,
-      Node* node, absl::string_view colocation_group) {
+      Node* node, StringPiece colocation_group) {
     const Node*& root_node = (*colocation_group_root)[colocation_group];
     if (root_node == nullptr) {
       // This is the first node of the colocation group, so
diff --git a/tensorflow/core/common_runtime/profile_handler.h b/tensorflow/core/common_runtime/profile_handler.h
index cfdf8caf5c..391dc8c198 100644
--- a/tensorflow/core/common_runtime/profile_handler.h
+++ b/tensorflow/core/common_runtime/profile_handler.h
@@ -16,10 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROFILE_HANDLER_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_PROFILE_HANDLER_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/graph/types.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
@@ -41,9 +41,8 @@ class ProfileHandler {
   // - op_type: String name of the Op.
   // - details: Main content for timeline click text.
   virtual void RecordOneOp(const string& device, const NodeExecStats& stats,
-                           bool is_copy, absl::string_view label,
-                           absl::string_view op_type,
-                           absl::string_view details) = 0;
+                           bool is_copy, StringPiece label, StringPiece op_type,
+                           StringPiece details) = 0;
 
   // Records that the current step finished.
   //
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index 6852e12609..9488a44778 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/eval_const_tensor.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -69,7 +68,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
   TF_RETURN_IF_ERROR(refiner->AddNode(node));
   InferenceContext* node_context = CHECK_NOTNULL(refiner->GetContext(node));
 
-  if (absl::string_view(node->type_string()) == kArgOp) {
+  if (StringPiece(node->type_string()) == kArgOp) {
     // Handle special node: function input.
     // Shapes for these nodes are provided in the outer inference
     // context.
@@ -89,7 +88,7 @@ Status InferShapesForFunctionSubNode(const Node* node, ShapeRefiner* refiner,
     if (resource) {
       node_context->set_output_handle_shapes_and_types(0, *resource);
     }
-  } else if (absl::string_view(node->type_string()) == kRetvalOp) {
+  } else if (StringPiece(node->type_string()) == kRetvalOp) {
     // Handle special node: function output.
     // Shapes inferred for these nodes go into the outer inference
     // context.
diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index d82cb3d495..a70ab93d4a 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/costmodel_manager.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -23,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tracking_allocator.h"
 #include "tensorflow/core/graph/costmodel.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
@@ -212,7 +212,7 @@ static int ExtractGpuWithStreamAll(string device_name) {
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
   // Check that the digits are preceded by the 'device:GPU:' string
   scanner.OneLiteral(":UPG:ecived");
-  absl::string_view capture;
+  StringPiece capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
   if (!matched) {
@@ -241,7 +241,7 @@ static int ExtractGpuWithoutStream(string device_name) {
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
   // Check that the digits are preceded by the 'device:GPU:' string
   scanner.OneLiteral(":UPG:ecived");
-  absl::string_view capture;
+  StringPiece capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
   if (!matched) {
@@ -276,7 +276,7 @@ void StepStatsCollector::BuildCostModel(
     const DeviceStepStats* hardware_stats;
   };
 
-  std::unordered_map<absl::string_view, DeviceStats, StringPieceHasher>
+  std::unordered_map<StringPiece, DeviceStats, StringPieceHasher>
       per_device_stats;
   std::unordered_map<int, const DeviceStepStats*> gpu_hardware_stats;
 
@@ -295,7 +295,7 @@ void StepStatsCollector::BuildCostModel(
   }
 
   for (auto& itr : per_device_stats) {
-    const absl::string_view device_name = itr.first;
+    const StringPiece device_name = itr.first;
     const int gpu_id = ExtractGpuWithoutStream(string(device_name));
     if (gpu_id >= 0) {
       // Reference the gpu hardware stats in addition to the regular stats
@@ -307,7 +307,7 @@ void StepStatsCollector::BuildCostModel(
   }
 
   for (auto itr : device_map) {
-    const absl::string_view device = itr.first;
+    const StringPiece device = itr.first;
     if (per_device_stats.find(device) == per_device_stats.end()) {
       continue;
     }
@@ -316,8 +316,7 @@ void StepStatsCollector::BuildCostModel(
     CostModel* cm = cost_model_manager->FindOrCreateCostModel(graph);
     cm->IncrementUpdateTimes();
 
-    std::unordered_map<absl::string_view, Node*, StringPieceHasher>
-        name_to_node;
+    std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node;
     for (Node* n : graph->nodes()) {
       name_to_node.emplace(n->name(), n);
     }
diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD
index 60f57e3fd3..591c22b8f6 100644
--- a/tensorflow/core/debug/BUILD
+++ b/tensorflow/core/debug/BUILD
@@ -109,7 +109,6 @@ tf_cuda_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -133,7 +132,6 @@ tf_cuda_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc
index f35035ce92..5fc95a8f20 100644
--- a/tensorflow/core/debug/debug_graph_utils.cc
+++ b/tensorflow/core/debug/debug_graph_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/debug/debug_graph_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
 #include "tensorflow/core/framework/kernel_def.pb.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -345,7 +344,7 @@ Status DebugNodeInserter::ParseDebugOpName(
 
     std::vector<string> attribute_segs = str_util::Split(arguments, ";");
     for (const string& attribute_seg : attribute_segs) {
-      absl::string_view seg(attribute_seg);
+      StringPiece seg(attribute_seg);
       str_util::RemoveWhitespaceContext(&seg);
       if (seg.empty()) {
         continue;
diff --git a/tensorflow/core/debug/debug_io_utils.cc b/tensorflow/core/debug/debug_io_utils.cc
index 4b5a545f5c..6994dec3b5 100644
--- a/tensorflow/core/debug/debug_io_utils.cc
+++ b/tensorflow/core/debug/debug_io_utils.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include <limits>
 #include <utility>
 #include <vector>
-#include "absl/strings/string_view.h"
 
 #ifndef PLATFORM_WINDOWS
 #include "grpcpp/create_channel.h"
@@ -309,7 +308,7 @@ Status ReadEventFromFile(const string& dump_file_path, Event* event) {
     return s;
   }
 
-  absl::string_view result;
+  StringPiece result;
   s = file->Read(0, file_size, &result, &(content)[0]);
   if (!s.ok()) {
     return s;
diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD
index d2c63d0a3b..37029f3f1a 100644
--- a/tensorflow/core/distributed_runtime/BUILD
+++ b/tensorflow/core/distributed_runtime/BUILD
@@ -172,7 +172,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:worker_proto_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -289,7 +288,6 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:worker_proto_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -346,7 +344,6 @@ cc_library(
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/debug:debug_graph_utils",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -400,7 +397,6 @@ cc_library(
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
index 97e1639a42..de6e4b4a7c 100644
--- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/copy_tensor.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
@@ -145,8 +144,8 @@ BaseRemoteRendezvous::~BaseRemoteRendezvous() {
 // Returns true if "device_name" is a valid full name of local device
 // of the "worker".  This helper is purely based on the worker name
 // and device name and does no lookups in the worker->device_mgr.
-static bool IsLocalDevice(const absl::string_view worker_name,
-                          const absl::string_view device_name) {
+static bool IsLocalDevice(const StringPiece worker_name,
+                          const StringPiece device_name) {
   return str_util::StartsWith(device_name, worker_name);
 }
 
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 6c9bda15d1..8e9eec1ed9 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/profile_handler.h"
 #include "tensorflow/core/common_runtime/stats_publisher_interface.h"
@@ -236,7 +235,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   const bool is_partial_;
   const CallableOptions callable_opts_;
   WorkerCacheInterface* const worker_cache_;  // Not owned.
-  std::unordered_map<absl::string_view, Node*, StringPieceHasher> name_to_node_;
+  std::unordered_map<StringPiece, Node*, StringPieceHasher> name_to_node_;
   const bool should_deregister_;
   std::atomic<int64> execution_count_ = {0};
 
@@ -297,14 +296,12 @@ class MasterSession::ReffedClientGraph : public core::RefCounted {
   // This is a generic method that handles Run, PartialRun, and RunCallable.
   template <class FetchListType, class ClientRequestType,
             class ClientResponseType>
-  Status RunPartitionsHelper(const std::unordered_map<absl::string_view, size_t,
-                                                      StringPieceHasher>& feeds,
-                             const FetchListType& fetches, const MasterEnv* env,
-                             int64 step_id, int64 execution_count,
-                             PerStepState* pss, CallOptions* call_opts,
-                             const ClientRequestType& req,
-                             ClientResponseType* resp, CancellationManager* cm,
-                             bool is_last_partial_run);
+  Status RunPartitionsHelper(
+      const std::unordered_map<StringPiece, size_t, StringPieceHasher>& feeds,
+      const FetchListType& fetches, const MasterEnv* env, int64 step_id,
+      int64 execution_count, PerStepState* pss, CallOptions* call_opts,
+      const ClientRequestType& req, ClientResponseType* resp,
+      CancellationManager* cm, bool is_last_partial_run);
 
   // Deregisters the partitions on the workers.  Called in the
   // destructor and does not wait for the rpc completion.
@@ -732,7 +729,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
   VLOG(2) << "RunPartitions step_id " << step_id << " execution_count "
           << execution_count;
   // Maps the names of fed tensors to their index in `req`.
-  std::unordered_map<absl::string_view, size_t, StringPieceHasher> feeds(3);
+  std::unordered_map<StringPiece, size_t, StringPieceHasher> feeds(3);
   for (size_t i = 0; i < req.num_feeds(); ++i) {
     if (!feeds.insert({req.feed_name(i), i}).second) {
       return errors::InvalidArgument("Duplicated feeds: ", req.feed_name(i));
@@ -756,7 +753,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
   VLOG(2) << "RunPartitions step_id " << step_id << " execution_count "
           << execution_count;
   // Maps the names of fed tensors to their index in `req`.
-  std::unordered_map<absl::string_view, size_t, StringPieceHasher> feeds(3);
+  std::unordered_map<StringPiece, size_t, StringPieceHasher> feeds(3);
   for (size_t i = 0; i < callable_opts_.feed_size(); ++i) {
     if (!feeds.insert({callable_opts_.feed(i), i}).second) {
       // MakeCallable will fail if there are two feeds with the same name.
diff --git a/tensorflow/core/distributed_runtime/remote_device.cc b/tensorflow/core/distributed_runtime/remote_device.cc
index 26c145a5ba..a043c5dee6 100644
--- a/tensorflow/core/distributed_runtime/remote_device.cc
+++ b/tensorflow/core/distributed_runtime/remote_device.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/distributed_runtime/worker_cache.h"
@@ -34,9 +33,9 @@ namespace tensorflow {
 // parsing into one place.
 //
 // Parses and returns the local device part (e.g., cpu:0, gpu:4).
-string GetLocalDeviceName(absl::string_view fullname) {
+string GetLocalDeviceName(StringPiece fullname) {
   auto pos = fullname.rfind('/');
-  CHECK_NE(pos, absl::string_view::npos);
+  CHECK_NE(pos, StringPiece::npos);
   fullname.remove_prefix(pos + 1);
   return string(fullname);
 }
diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD
index 60e34c48f7..4a10d99a60 100644
--- a/tensorflow/core/distributed_runtime/rpc/BUILD
+++ b/tensorflow/core/distributed_runtime/rpc/BUILD
@@ -119,7 +119,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:worker_proto_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -209,7 +208,6 @@ cc_library(
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core/distributed_runtime:call_options",
         "//tensorflow/core/distributed_runtime:master_interface",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -228,7 +226,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:master_proto_cc",
         "//tensorflow/core/distributed_runtime:master",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -257,7 +254,6 @@ cc_library(
         "//tensorflow/core/distributed_runtime:worker_cache",
         "//tensorflow/core/distributed_runtime:worker_env",
         "//tensorflow/core/distributed_runtime:worker_interface",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -320,7 +316,6 @@ tf_cc_binary(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/distributed_runtime:server_lib",
         "//tensorflow/core/kernels:data_flow",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -346,7 +341,6 @@ tf_cc_binary(
         "//tensorflow/core/kernels:matmul_op",
         "//tensorflow/core/kernels:reduction_ops",
         "//tensorflow/core/kernels:variable_ops",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -485,7 +479,6 @@ tf_cuda_cc_test(
         "//tensorflow/core/kernels:dense_update_ops",
         "//tensorflow/core/kernels:matmul_op",
         "//tensorflow/core/kernels:variable_ops",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
index 23cbba58c5..127dea2882 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
@@ -33,7 +33,6 @@ limitations under the License.
 #include "grpcpp/alarm.h"
 #include "grpcpp/server_builder.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/master.h"
 #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_call.h"
@@ -287,12 +286,12 @@ class GrpcMasterService : public AsyncServiceInterface {
 
   // Start tracing, including the ID attached to the RPC.
   tracing::ScopedActivity* TraceRpc(
-      absl::string_view name,
+      StringPiece name,
       const std::multimap<::grpc::string_ref, ::grpc::string_ref>& metadata) {
-    absl::string_view id;
+    StringPiece id;
     auto it = metadata.find(GrpcIdKey());
     if (it != metadata.end()) {
-      id = absl::string_view(it->second.data(), it->second.size());
+      id = StringPiece(it->second.data(), it->second.size());
     }
     return new tracing::ScopedActivity(name, id);
   }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
index bc6a97af08..b832a2115c 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/call_options.h"
 #include "tensorflow/core/distributed_runtime/master_interface.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_master_service_impl.h"
@@ -120,7 +119,7 @@ class GrpcRemoteMaster : public MasterInterface {
 
  private:
   // Start tracing, attaching a unique ID to both the trace and the RPC.
-  tracing::ScopedActivity TraceRpc(absl::string_view name,
+  tracing::ScopedActivity TraceRpc(StringPiece name,
                                    ::grpc::ClientContext* ctx) {
     string trace_id = strings::StrCat(tracing::GetUniqueArg());
     ctx->AddMetadata(GrpcIdKey(), trace_id);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
index 358db1e240..fc601991a2 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/distributed_runtime/rpc/grpc_session.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_testlib.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -668,7 +667,7 @@ TEST(GrpcSessionTest, LongErrorMessage) {
     auto a = test::graph::Constant(&g, Tensor());
     a->set_assigned_device_name(dev_a);
     std::vector<char> long_string_buffer(1024 * 1024, 'x');
-    absl::string_view long_string(long_string_buffer.data(), 1024 * 1024);
+    StringPiece long_string(long_string_buffer.data(), 1024 * 1024);
     string name = strings::StrCat(long_string, "fantasia!");
     auto a_err = test::graph::Error(&g, a, name);
     a_err->set_assigned_device_name(dev_a);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
index 951f2d026c..159435fd7d 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h"
 #include "grpcpp/support/byte_buffer.h"
 #include "grpcpp/support/slice.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -159,7 +158,7 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val,
     io::ProtoEncodeHelper e_skeleton(skeleton.data(), skeleton.size());
     EncodeSkeleton(val, &e_skeleton);
 
-    absl::string_view tdata = val.tensor_data();
+    StringPiece tdata = val.tensor_data();
     uint32 overall_tensor_proto_bytesize =
         (e_skeleton.size() +
          VarLengthEncodingSize(TensorProto::kTensorContentFieldNumber,
@@ -198,7 +197,7 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val,
     e.WriteVarlengthBeginning(RecvTensorResponse::kTensorFieldNumber,
                               overall_tensor_proto_bytesize);
     // (C)
-    e.WriteRawBytes(absl::string_view(e_skeleton.data(), e_skeleton.size()));
+    e.WriteRawBytes(StringPiece(e_skeleton.data(), e_skeleton.size()));
     // (D1) & (D2)
     e.WriteVarlengthBeginning(TensorProto::kTensorContentFieldNumber,
                               tdata.size());
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
index a011cad596..e52b257411 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "grpcpp/security/credentials.h"
 #include "grpcpp/server_builder.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -57,7 +56,7 @@ Status FillServerDef(const string& cluster_spec, const string& job_name,
     const string& job_name = job_pieces[0];
     job_def->set_name(job_name);
     // Does a bit more validation of the tasks_per_replica.
-    const absl::string_view spec = job_pieces[1];
+    const StringPiece spec = job_pieces[1];
     // job_str is of form <job_name>|<host_ports>.
     const std::vector<string> host_ports = str_util::Split(spec, ';');
     for (size_t i = 0; i < host_ports.size(); ++i) {
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
index 1268863562..33cbadda0a 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib_server.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include "grpcpp/security/credentials.h"
 #include "grpcpp/server_builder.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -51,7 +50,7 @@ Status FillServerDef(const string& job_spec, const string& job_name,
     CHECK_EQ(2, job_pieces.size()) << job_str;
     job_def->set_name(job_pieces[0]);
     // Does a bit more validation of the tasks_per_replica.
-    const absl::string_view spec = job_pieces[1];
+    const StringPiece spec = job_pieces[1];
     // job_str is of form <job_name>|<host_ports>.
     const std::vector<string> host_ports = str_util::Split(spec, ';');
     uint32 tasks_per_replica = host_ports.size();
diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
index 0e7484ed03..b8cb538503 100644
--- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unordered_set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -59,7 +58,7 @@ class RpcRecvTensorCall : public BaseRecvTensorCall {
  public:
   RpcRecvTensorCall() : wi_(nullptr), dst_device_(nullptr) {}
 
-  void Init(WorkerInterface* wi, int64 step_id, absl::string_view key,
+  void Init(WorkerInterface* wi, int64 step_id, StringPiece key,
             AllocatorAttributes alloc_attrs, Device* dst_device,
             const Rendezvous::Args& recv_args, Rendezvous::DoneCallback done) {
     wi_ = wi;
diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc
index a61dca6f95..fe2d1a1293 100644
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include "google/protobuf/any.pb.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -197,7 +196,7 @@ bool TensorResponse::ParseTensorSubmessage(
         seen_tensor_content = true;
         TensorShape shape(tensor_meta->tensor_shape());
         Tensor t(allocator_, tensor_meta->dtype(), shape);
-        absl::string_view buf = t.tensor_data();
+        StringPiece buf = t.tensor_data();
         if (static_cast<size_t>(num_bytes) != buf.size()) return false;
         // TODO(jeff,sanjay): Figure out a way to avoid this copy if
         // the underlying ZeroCopyInputStream data is properly aligned
diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h
index 49cd28a6ed..016d1a92c1 100644
--- a/tensorflow/core/example/feature_util.h
+++ b/tensorflow/core/example/feature_util.h
@@ -104,9 +104,9 @@ limitations under the License.
 #include <type_traits>
 
 #include "absl/base/macros.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -170,7 +170,7 @@ template <>
 struct is_string<string> : std::true_type {};
 
 template <>
-struct is_string<::absl::string_view> : std::true_type {};
+struct is_string<::tensorflow::StringPiece> : std::true_type {};
 
 template <typename ValueType>
 struct FeatureTrait<
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index aa6e6f38a7..79966f0692 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -18,13 +18,13 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb_text.h"
 #include "tensorflow/core/framework/tensor.pb_text.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb_text.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -187,8 +187,8 @@ string SummarizeString(const string& str) {
   // If the string is long, replace the middle with ellipses.
   constexpr int kMaxStringSummarySize = 80;
   if (escaped.size() >= kMaxStringSummarySize) {
-    absl::string_view prefix(escaped);
-    absl::string_view suffix = prefix;
+    StringPiece prefix(escaped);
+    StringPiece suffix = prefix;
     prefix.remove_suffix(escaped.size() - 10);
     suffix.remove_prefix(escaped.size() - 10);
     return strings::StrCat("\"", prefix, "...", suffix, "\"");
@@ -288,7 +288,7 @@ string SummarizeAttrValue(const AttrValue& attr_value) {
   return "<Unknown AttrValue type>";  // Prevent missing return warning
 }
 
-Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type) {
+Status AttrValueHasType(const AttrValue& attr_value, StringPiece type) {
   int num_set = 0;
 
 #define VALIDATE_FIELD(name, type_string, oneof_case)                         \
@@ -386,8 +386,7 @@ Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type) {
   return Status::OK();
 }
 
-bool ParseAttrValue(absl::string_view type, absl::string_view text,
-                    AttrValue* out) {
+bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
   // Parse type.
   string field_name;
   bool is_list = str_util::ConsumePrefix(&type, "list(");
@@ -421,7 +420,7 @@ bool ParseAttrValue(absl::string_view type, absl::string_view text,
   if (is_list) {
     // TextFormat parser considers "i: 7" to be the same as "i: [7]",
     // but we only want to allow list values with [].
-    absl::string_view cleaned = text;
+    StringPiece cleaned = text;
     str_util::RemoveLeadingWhitespace(&cleaned);
     str_util::RemoveTrailingWhitespace(&cleaned);
     if (cleaned.size() < 2 || cleaned[0] != '[' ||
@@ -474,12 +473,11 @@ DEFINE_SET_ATTR_VALUE_LIST(const std::vector<bool>&, b)
 DEFINE_SET_ATTR_VALUE_LIST(std::initializer_list<bool>, b)
 DEFINE_SET_ATTR_VALUE_BOTH(DataType, type)
 
-void SetAttrValue(absl::string_view value, AttrValue* out) {
+void SetAttrValue(StringPiece value, AttrValue* out) {
   out->set_s(value.data(), value.size());
 }
 
-void SetAttrValue(const gtl::ArraySlice<absl::string_view> value,
-                  AttrValue* out) {
+void SetAttrValue(const gtl::ArraySlice<StringPiece> value, AttrValue* out) {
   out->mutable_list()->Clear();  // Create list() even if value empty.
   for (const auto& v : value) {
     out->mutable_list()->add_s(v.data(), v.size());
diff --git a/tensorflow/core/framework/attr_value_util.h b/tensorflow/core/framework/attr_value_util.h
index c9f2b334c8..9fce488793 100644
--- a/tensorflow/core/framework/attr_value_util.h
+++ b/tensorflow/core/framework/attr_value_util.h
@@ -20,12 +20,12 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -39,7 +39,7 @@ class NameAttrList;
 string SummarizeAttrValue(const AttrValue& attr_value);
 
 // Generates an error if attr_value doesn't have the indicated attr type.
-Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type);
+Status AttrValueHasType(const AttrValue& attr_value, StringPiece type);
 
 // Converts a text proto value from "text" into the field of *out
 // indicated by "type" (e.g. from the type field of an AttrDef).
@@ -48,13 +48,12 @@ Status AttrValueHasType(const AttrValue& attr_value, absl::string_view type);
 // * If type:"list(string)" and text:"['foo', 'bar']",
 //   then *out is set to "list { s: ['foo', 'bar'] }"
 // Returns true on success.
-bool ParseAttrValue(absl::string_view type, absl::string_view text,
-                    AttrValue* out);
+bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out);
 
 // Sets *out based on the type of value.
 void SetAttrValue(const string& value, AttrValue* out);
 void SetAttrValue(const char* value, AttrValue* out);
-void SetAttrValue(absl::string_view value, AttrValue* out);
+void SetAttrValue(StringPiece value, AttrValue* out);
 void SetAttrValue(int64 value, AttrValue* out);
 void SetAttrValue(int32 value, AttrValue* out);
 void SetAttrValue(float value, AttrValue* out);
@@ -70,7 +69,7 @@ void SetAttrValue(const NameAttrList& value, AttrValue* out);
 
 void SetAttrValue(gtl::ArraySlice<string> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<const char*> value, AttrValue* out);
-void SetAttrValue(gtl::ArraySlice<absl::string_view> value, AttrValue* out);
+void SetAttrValue(gtl::ArraySlice<StringPiece> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<int64> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<int32> value, AttrValue* out);
 void SetAttrValue(gtl::ArraySlice<float> value, AttrValue* out);
diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index d3dcc0377a..284dafb886 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/dataset.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/node_builder.h"
@@ -78,7 +77,7 @@ Status GraphDefBuilderWrapper::AddDataset(
     const DatasetBase* dataset,
     const std::vector<std::pair<size_t, Node*>>& inputs,
     const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
-    const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
+    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
     Node** output) {
   const string& name = dataset->name();
   std::unique_ptr<const GraphDefBuilder::Options> opts(
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 4bb0f326cb..964a7d5f8c 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <deque>
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/dataset_stateful_op_whitelist.h"
@@ -58,10 +57,10 @@ class SerializationContext;
 // Used for restoring iterator state.
 class IteratorStateReader {
  public:
-  virtual Status ReadScalar(absl::string_view key, int64* val) = 0;
-  virtual Status ReadScalar(absl::string_view key, string* val) = 0;
-  virtual Status ReadTensor(absl::string_view key, Tensor* val) = 0;
-  virtual bool Contains(absl::string_view key) = 0;
+  virtual Status ReadScalar(StringPiece key, int64* val) = 0;
+  virtual Status ReadScalar(StringPiece key, string* val) = 0;
+  virtual Status ReadTensor(StringPiece key, Tensor* val) = 0;
+  virtual bool Contains(StringPiece key) = 0;
 
   virtual ~IteratorStateReader() {}
 };
@@ -70,9 +69,9 @@ class IteratorStateReader {
 // Used for saving iterator state.
 class IteratorStateWriter {
  public:
-  virtual Status WriteScalar(absl::string_view key, const int64 val) = 0;
-  virtual Status WriteScalar(absl::string_view key, const string& val) = 0;
-  virtual Status WriteTensor(absl::string_view key, const Tensor& val) = 0;
+  virtual Status WriteScalar(StringPiece key, const int64 val) = 0;
+  virtual Status WriteScalar(StringPiece key, const string& val) = 0;
+  virtual Status WriteTensor(StringPiece key, const Tensor& val) = 0;
 
   virtual ~IteratorStateWriter() {}
 };
@@ -156,10 +155,10 @@ class GraphDefBuilderWrapper {
   // `*output` contains a pointer to the output `Node`. It is guaranteed to be
   // non-null if the method returns with an OK status.
   // The returned Node pointer is owned by the backing Graph of GraphDefBuilder.
-  Status AddDataset(
-      const DatasetBase* dataset, const std::vector<Node*>& inputs,
-      const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
-      Node** output) {
+  Status AddDataset(const DatasetBase* dataset,
+                    const std::vector<Node*>& inputs,
+                    const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
+                    Node** output) {
     std::vector<std::pair<size_t, Node*>> enumerated_inputs(inputs.size());
     for (int i = 0; i < inputs.size(); i++) {
       enumerated_inputs[i] = std::make_pair(i, inputs[i]);
@@ -171,7 +170,7 @@ class GraphDefBuilderWrapper {
       const DatasetBase* dataset,
       const std::vector<std::pair<size_t, Node*>>& inputs,
       const std::vector<std::pair<size_t, gtl::ArraySlice<Node*>>>& list_inputs,
-      const std::vector<std::pair<absl::string_view, AttrValue>>& attrs,
+      const std::vector<std::pair<StringPiece, AttrValue>>& attrs,
       Node** output);
 
   // Adds a user-defined function with name `function_name` to the graph and
@@ -727,8 +726,7 @@ class DatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const absl::string_view& argument_name,
-                             T* output) {
+                             const StringPiece& argument_name, T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
@@ -740,7 +738,7 @@ class DatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseVectorArgument(OpKernelContext* ctx,
-                             const absl::string_view& argument_name,
+                             const StringPiece& argument_name,
                              std::vector<T>* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index f6c4632efa..446c31b17f 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -21,11 +21,11 @@ limitations under the License.
 #include <vector>
 
 #include "absl/base/macros.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace Eigen {
@@ -86,9 +86,8 @@ class DeviceContext : public core::RefCounted {
   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
   // to be of the same size as "device_tensor".
   virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                     absl::string_view tensor_name,
-                                     Device* device, Tensor* cpu_tensor,
-                                     StatusCallback done) {
+                                     StringPiece tensor_name, Device* device,
+                                     Tensor* cpu_tensor, StatusCallback done) {
     done(errors::Internal("Unrecognized device type in device-to-CPU Copy"));
   }
 
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index df1c797a31..4ad6fd00da 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/function.pb_text.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -505,9 +504,9 @@ string Print(const NodeDef& n) {
     strings::StrAppend(&out, "[", str_util::Join(entries, ", "), "]");
   }
   strings::StrAppend(&out, "(");
-  std::vector<absl::string_view> dat;
+  std::vector<StringPiece> dat;
   std::vector<string> dep;
-  for (absl::string_view s : n.input()) {
+  for (StringPiece s : n.input()) {
     if (str_util::ConsumePrefix(&s, "^")) {
       dep.emplace_back(s);
     } else {
@@ -648,7 +647,7 @@ Status InstantiateFunction(const FunctionDef& fdef, AttrSlice attr_values,
     }
   }
 
-  auto substitute = [attr_values](absl::string_view name, AttrValue* val) {
+  auto substitute = [attr_values](StringPiece name, AttrValue* val) {
     if (const AttrValue* v = attr_values.Find(name)) {
       *val = *v;
       return true;
@@ -1193,8 +1192,7 @@ Status FunctionLibraryDefinition::LookUp(
   return default_registry_->LookUp(op, op_reg_data);
 }
 
-string FunctionLibraryDefinition::UniqueFunctionName(
-    absl::string_view prefix) const {
+string FunctionLibraryDefinition::UniqueFunctionName(StringPiece prefix) const {
   tf_shared_lock l(mu_);
   int index = 0;
   string name = strings::StrCat(prefix, index);
@@ -1272,8 +1270,7 @@ GET_ATTR(string)
 GET_ATTR(bool)
 #undef GET_ATTR
 
-void FunctionDefHelper::AttrValueWrapper::InitFromString(
-    absl::string_view val) {
+void FunctionDefHelper::AttrValueWrapper::InitFromString(StringPiece val) {
   if (val.size() >= 2 && val[0] == '$') {
     proto.set_placeholder(val.data() + 1, val.size() - 1);
   } else {
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index b2a4416774..fcc6203729 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_FUNCTION_H_
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -93,7 +92,7 @@ class FunctionDefHelper {
     }
 
    private:
-    void InitFromString(absl::string_view val);
+    void InitFromString(StringPiece val);
   };
 
   // Constructs an AttrValue.func given the "name" and "attrs".
@@ -182,8 +181,7 @@ inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(
 }
 
 template <>
-inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(
-    absl::string_view val) {
+inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(StringPiece val) {
   InitFromString(val);
 }
 
@@ -376,7 +374,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
 
   // Generates new function name with the specified prefix that is unique
   // across this library.
-  string UniqueFunctionName(absl::string_view prefix) const LOCKS_EXCLUDED(mu_);
+  string UniqueFunctionName(StringPiece prefix) const LOCKS_EXCLUDED(mu_);
 
   // Ops created for function arguments bear the name given by `kArgOp`; those
   // created for return values bear the name given by `kRetOp`.
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index 2ea4dee9ed..0445c242e9 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function_testlib.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -46,8 +45,7 @@ GraphDef GDef(gtl::ArraySlice<NodeDef> nodes,
 }
 
 // Helper to construct a NodeDef.
-NodeDef NDef(absl::string_view name, absl::string_view op,
-             gtl::ArraySlice<string> inputs,
+NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
              gtl::ArraySlice<std::pair<string, FDH::AttrValueWrapper>> attrs,
              const string& device) {
   NodeDef n;
diff --git a/tensorflow/core/framework/function_testlib.h b/tensorflow/core/framework/function_testlib.h
index aacd555cb3..a01743423b 100644
--- a/tensorflow/core/framework/function_testlib.h
+++ b/tensorflow/core/framework/function_testlib.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -49,8 +48,7 @@ class Attrs {
 
 // Helper to construct a NodeDef.
 NodeDef NDef(
-    absl::string_view name, absl::string_view op,
-    gtl::ArraySlice<string> inputs,
+    StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
     gtl::ArraySlice<std::pair<string, FunctionDefHelper::AttrValueWrapper>>
         attrs = {},
     const string& device = "");
diff --git a/tensorflow/core/framework/node_def_builder.cc b/tensorflow/core/framework/node_def_builder.cc
index 898a7e716b..348a825af9 100644
--- a/tensorflow/core/framework/node_def_builder.cc
+++ b/tensorflow/core/framework/node_def_builder.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_builder.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -24,21 +23,20 @@ limitations under the License.
 
 namespace tensorflow {
 
-NodeDefBuilder::NodeOut::NodeOut(absl::string_view n, int i, DataType dt)
+NodeDefBuilder::NodeOut::NodeOut(StringPiece n, int i, DataType dt)
     : node(n), index(i), data_type(dt) {}
 
 NodeDefBuilder::NodeOut::NodeOut() {
   // uninitialized, call Reset() before use.
 }
 
-void NodeDefBuilder::NodeOut::Reset(absl::string_view n, int i, DataType dt) {
+void NodeDefBuilder::NodeOut::Reset(StringPiece n, int i, DataType dt) {
   node = string(n);
   index = i;
   data_type = dt;
 }
 
-NodeDefBuilder::NodeDefBuilder(absl::string_view name,
-                               absl::string_view op_name,
+NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
                                const OpRegistryInterface* op_registry) {
   node_def_.set_name(string(name));
   const Status status = op_registry->LookUpOpDef(string(op_name), &op_def_);
@@ -50,7 +48,7 @@ NodeDefBuilder::NodeDefBuilder(absl::string_view name,
   }
 }
 
-NodeDefBuilder::NodeDefBuilder(absl::string_view name, const OpDef* op_def)
+NodeDefBuilder::NodeDefBuilder(StringPiece name, const OpDef* op_def)
     : op_def_(op_def) {
   node_def_.set_name(string(name));
   Initialize();
@@ -86,7 +84,7 @@ NodeDefBuilder& NodeDefBuilder::Input(FakeInputFunctor fake_input) {
   return *this;
 }
 
-NodeDefBuilder& NodeDefBuilder::Input(absl::string_view src_node, int src_index,
+NodeDefBuilder& NodeDefBuilder::Input(StringPiece src_node, int src_index,
                                       DataType dt) {
   const OpDef::ArgDef* arg = NextArgDef();
   if (arg != nullptr) SingleInput(arg, src_node, src_index, dt);
@@ -106,7 +104,7 @@ NodeDefBuilder& NodeDefBuilder::Input(gtl::ArraySlice<NodeOut> src_list) {
 }
 
 void NodeDefBuilder::SingleInput(const OpDef::ArgDef* input_arg,
-                                 absl::string_view src_node, int src_index,
+                                 StringPiece src_node, int src_index,
                                  DataType dt) {
   AddInput(src_node, src_index);
 
@@ -163,7 +161,7 @@ void NodeDefBuilder::ListInput(const OpDef::ArgDef* input_arg,
   }
 }
 
-void NodeDefBuilder::AddInput(absl::string_view src_node, int src_index) {
+void NodeDefBuilder::AddInput(StringPiece src_node, int src_index) {
   if (src_node.empty()) {
     errors_.push_back("Empty input node name");
   } else if (src_node[0] == '^') {
@@ -194,12 +192,12 @@ void NodeDefBuilder::VerifyInputRef(const OpDef::ArgDef* input_arg,
   }
 }
 
-NodeDefBuilder& NodeDefBuilder::ControlInput(absl::string_view src_node) {
+NodeDefBuilder& NodeDefBuilder::ControlInput(StringPiece src_node) {
   control_inputs_.emplace_back(src_node);
   return *this;
 }
 
-NodeDefBuilder& NodeDefBuilder::Device(absl::string_view device_spec) {
+NodeDefBuilder& NodeDefBuilder::Device(StringPiece device_spec) {
   node_def_.set_device(string(device_spec));
   return *this;
 }
@@ -250,8 +248,7 @@ Status NodeDefBuilder::Finalize(NodeDef* node_def) const {
   }
 }
 
-NodeDefBuilder& NodeDefBuilder::Attr(absl::string_view name,
-                                     const AttrValue& value) {
+NodeDefBuilder& NodeDefBuilder::Attr(StringPiece name, const AttrValue& value) {
   if (const AttrValue* found = AttrSlice(node_def_).Find(name)) {
     if (!AreAttrValuesEqual(*found, value)) {
       errors_.push_back(strings::StrCat("Inconsistent values for attr '", name,
@@ -264,13 +261,13 @@ NodeDefBuilder& NodeDefBuilder::Attr(absl::string_view name,
   return *this;
 }
 
-#define ATTR(T)                                                           \
-  NodeDefBuilder& NodeDefBuilder::Attr(absl::string_view name, T value) { \
-    AttrValue attr_value;                                                 \
-    SetAttrValue(value, &attr_value);                                     \
-    return Attr(name, attr_value);                                        \
+#define ATTR(T)                                                     \
+  NodeDefBuilder& NodeDefBuilder::Attr(StringPiece name, T value) { \
+    AttrValue attr_value;                                           \
+    SetAttrValue(value, &attr_value);                               \
+    return Attr(name, attr_value);                                  \
   }
-ATTR(absl::string_view)
+ATTR(StringPiece)
 ATTR(const char*)
 ATTR(int32)
 ATTR(int64)
@@ -282,7 +279,7 @@ ATTR(const PartialTensorShape&)
 ATTR(const Tensor&)
 ATTR(const TensorProto&)
 ATTR(const NameAttrList&)
-ATTR(gtl::ArraySlice<absl::string_view>)
+ATTR(gtl::ArraySlice<StringPiece>)
 ATTR(gtl::ArraySlice<const char*>)
 ATTR(gtl::ArraySlice<string>)
 ATTR(gtl::ArraySlice<int32>)
diff --git a/tensorflow/core/framework/node_def_builder.h b/tensorflow/core/framework/node_def_builder.h
index f8af6c0710..ad07ec5480 100644
--- a/tensorflow/core/framework/node_def_builder.h
+++ b/tensorflow/core/framework/node_def_builder.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <functional>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -51,9 +50,9 @@ class NodeDefBuilder {
  public:
   // To specify an output to be consumed by one of the Input() methods below.
   struct NodeOut {
-    NodeOut(absl::string_view n, int i, DataType dt);
+    NodeOut(StringPiece n, int i, DataType dt);
     NodeOut();  // uninitialized, call Reset() before use.
-    void Reset(absl::string_view n, int i, DataType dt);
+    void Reset(StringPiece n, int i, DataType dt);
     string node;
     int index;
     DataType data_type;
@@ -63,16 +62,16 @@ class NodeDefBuilder {
   // the Op plus a registry) for the NodeDef.  Other fields are
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
-  NodeDefBuilder(absl::string_view name, absl::string_view op_name,
+  NodeDefBuilder(StringPiece name, StringPiece op_name,
                  const OpRegistryInterface* op_registry = OpRegistry::Global());
   // REQUIRES: in addition, *op_def must outlive *this.
-  NodeDefBuilder(absl::string_view name, const OpDef* op_def);
+  NodeDefBuilder(StringPiece name, const OpDef* op_def);
 
   // You must call one Input() function per input_arg in the Op,
   // *and in the same order as the input_args appear in the OpDef.*
 
   // For inputs that take a single tensor.
-  NodeDefBuilder& Input(absl::string_view src_node, int src_index, DataType dt);
+  NodeDefBuilder& Input(StringPiece src_node, int src_index, DataType dt);
   NodeDefBuilder& Input(const NodeOut& src);
 
   // For inputs that take a list of tensors.
@@ -82,49 +81,45 @@ class NodeDefBuilder {
   NodeDefBuilder& Input(FakeInputFunctor fake_input);
 
   // Specify that this node must only run after src_node.
-  NodeDefBuilder& ControlInput(absl::string_view src_node);
+  NodeDefBuilder& ControlInput(StringPiece src_node);
 
   // Constrains what devices this node may be scheduled on.
-  NodeDefBuilder& Device(absl::string_view device_spec);
+  NodeDefBuilder& Device(StringPiece device_spec);
 
   // Sets the attr, if not already set.  If already set with a different
   // value, an error will be returned from Finalize().
-  NodeDefBuilder& Attr(absl::string_view name, const AttrValue& value);
-  NodeDefBuilder& Attr(absl::string_view name, absl::string_view value);
-  NodeDefBuilder& Attr(absl::string_view name, const char* value);
-  NodeDefBuilder& Attr(absl::string_view name, int32 value);
-  NodeDefBuilder& Attr(absl::string_view name, int64 value);
-  NodeDefBuilder& Attr(absl::string_view name, float value);
-  NodeDefBuilder& Attr(absl::string_view name, double value);
-  NodeDefBuilder& Attr(absl::string_view name, bool value);
-  NodeDefBuilder& Attr(absl::string_view name, DataType value);
-  NodeDefBuilder& Attr(absl::string_view name, const PartialTensorShape& value);
-  NodeDefBuilder& Attr(absl::string_view name, const Tensor& value);
-  NodeDefBuilder& Attr(absl::string_view name, const TensorProto& value);
-  NodeDefBuilder& Attr(absl::string_view name, const NameAttrList& value);
-  NodeDefBuilder& Attr(absl::string_view name,
-                       gtl::ArraySlice<absl::string_view> value);
-  NodeDefBuilder& Attr(absl::string_view name,
-                       gtl::ArraySlice<const char*> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<string> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<int32> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<int64> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<float> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<bool> value);
-  NodeDefBuilder& Attr(absl::string_view name, const std::vector<bool>& value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<DataType> value);
-  NodeDefBuilder& Attr(absl::string_view name,
-                       gtl::ArraySlice<TensorShape> value);
-  NodeDefBuilder& Attr(absl::string_view name,
+  NodeDefBuilder& Attr(StringPiece name, const AttrValue& value);
+  NodeDefBuilder& Attr(StringPiece name, StringPiece value);
+  NodeDefBuilder& Attr(StringPiece name, const char* value);
+  NodeDefBuilder& Attr(StringPiece name, int32 value);
+  NodeDefBuilder& Attr(StringPiece name, int64 value);
+  NodeDefBuilder& Attr(StringPiece name, float value);
+  NodeDefBuilder& Attr(StringPiece name, double value);
+  NodeDefBuilder& Attr(StringPiece name, bool value);
+  NodeDefBuilder& Attr(StringPiece name, DataType value);
+  NodeDefBuilder& Attr(StringPiece name, const PartialTensorShape& value);
+  NodeDefBuilder& Attr(StringPiece name, const Tensor& value);
+  NodeDefBuilder& Attr(StringPiece name, const TensorProto& value);
+  NodeDefBuilder& Attr(StringPiece name, const NameAttrList& value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<StringPiece> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<const char*> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<string> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int32> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<int64> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<float> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<bool> value);
+  NodeDefBuilder& Attr(StringPiece name, const std::vector<bool>& value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<DataType> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<TensorShape> value);
+  NodeDefBuilder& Attr(StringPiece name,
                        gtl::ArraySlice<PartialTensorShape> value);
-  NodeDefBuilder& Attr(absl::string_view name,
+  NodeDefBuilder& Attr(StringPiece name,
                        gtl::ArraySlice<TensorShapeProto> value);
-  NodeDefBuilder& Attr(absl::string_view name, gtl::ArraySlice<Tensor> value);
-  NodeDefBuilder& Attr(absl::string_view name,
-                       gtl::ArraySlice<NameAttrList> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<Tensor> value);
+  NodeDefBuilder& Attr(StringPiece name, gtl::ArraySlice<NameAttrList> value);
 
   template <class T>
-  NodeDefBuilder& Attr(absl::string_view name, std::initializer_list<T> value) {
+  NodeDefBuilder& Attr(StringPiece name, std::initializer_list<T> value) {
     return Attr(name, gtl::ArraySlice<T>(value));
   }
 
@@ -151,13 +146,13 @@ class NodeDefBuilder {
   bool NextArgAvailable();
 
   // These do the main work of the Input() methods.
-  void SingleInput(const OpDef::ArgDef* input_arg, absl::string_view src_node,
+  void SingleInput(const OpDef::ArgDef* input_arg, StringPiece src_node,
                    int src_index, DataType dt);
   void ListInput(const OpDef::ArgDef* input_arg,
                  gtl::ArraySlice<NodeOut> src_list);
 
   // Add "src_node:src_index" to the list of inputs in the node_def_.
-  void AddInput(absl::string_view src_node, int src_index);
+  void AddInput(StringPiece src_node, int src_index);
 
   // Generate an error if you can't pass dt when expected is expected.
   void VerifyInputType(const OpDef::ArgDef* input_arg, DataType expected,
diff --git a/tensorflow/core/framework/node_def_builder_test.cc b/tensorflow/core/framework/node_def_builder_test.cc
index 45168e9580..cc583df348 100644
--- a/tensorflow/core/framework/node_def_builder_test.cc
+++ b/tensorflow/core/framework/node_def_builder_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <memory>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -51,8 +50,7 @@ class NodeDefBuilderTest : public ::testing::Test {
   // expectations.
   void ExpectSuccess(const NodeDefBuilder& builder,
                      DataTypeSlice expected_in_types,
-                     DataTypeSlice expected_out_types,
-                     absl::string_view proto) {
+                     DataTypeSlice expected_out_types, StringPiece proto) {
     NodeDef node_def;
     Status status = builder.Finalize(&node_def);
     TF_EXPECT_OK(status);
diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index 497da45d3d..43ac1d0ada 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -51,7 +50,7 @@ AttrSlice::AttrSlice(const NodeDef& node_def)
 
 AttrSlice::AttrSlice(const AttrValueMap* a) : ndef_(nullptr), attrs_(a) {}
 
-static string SummarizeAttrsHelper(AttrSlice attrs, absl::string_view device) {
+static string SummarizeAttrsHelper(AttrSlice attrs, StringPiece device) {
   string ret;
 
   // We sort the attrs so the output is deterministic.
@@ -79,10 +78,9 @@ static string SummarizeAttrsHelper(AttrSlice attrs, absl::string_view device) {
 }
 
 string AttrSlice::SummarizeNode() const {
-  return ndef_
-             ? SummarizeNodeDef(*ndef_)
-             : strings::StrCat(
-                   "[", SummarizeAttrsHelper(*this, absl::string_view()), "]");
+  return ndef_ ? SummarizeNodeDef(*ndef_)
+               : strings::StrCat(
+                     "[", SummarizeAttrsHelper(*this, StringPiece()), "]");
 }
 
 string SummarizeNode(const Node& node) { return SummarizeNodeDef(node.def()); }
@@ -112,7 +110,7 @@ string FormatNodeDefForError(const NodeDef& node_def) {
   return errors::FormatNodeNameForError(node_def.name());
 }
 
-const AttrValue* AttrSlice::Find(absl::string_view attr_name) const {
+const AttrValue* AttrSlice::Find(StringPiece attr_name) const {
   // Currently, the collection used for NodeDef::attr() (google::protobuf::Map)
   // requires that the keys used for lookups have type 'const string&'. Because
   // this method takes a StringPiece, it is necessary to allocate a temporary
@@ -133,7 +131,7 @@ const AttrValue* AttrSlice::Find(absl::string_view attr_name) const {
   return nullptr;
 }
 
-Status AttrSlice::Find(absl::string_view attr_name,
+Status AttrSlice::Find(StringPiece attr_name,
                        const AttrValue** attr_value) const {
   *attr_value = Find(attr_name);
   if (*attr_value != nullptr) {
@@ -168,7 +166,7 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
 // The ... is to allow the caller to inject some value validation code.  Use
 // just ; if no additional validation code is needed.
 #define DEFINE_GET_ATTR(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...)         \
-  Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,     \
+  Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,           \
                      TYPE* value) {                                           \
     const AttrValue* attr_value;                                              \
     TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));                   \
@@ -178,7 +176,7 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
     *value = CAST;                                                            \
     return Status::OK();                                                      \
   }                                                                           \
-  Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,     \
+  Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,           \
                      std::vector<TYPE>* value) {                              \
     const AttrValue* attr_value;                                              \
     TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));                   \
@@ -190,37 +188,37 @@ bool AttrSlice::EqualAttrs(AttrSlice other, Scratch* scratch) const {
     return Status::OK();                                                      \
   }
 
-#define DEFINE_GET_ATTR_SIMPLE(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...)  \
-  bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name, \
-                         TYPE* value) {                                       \
-    const AttrValue* attr_value = attrs.Find(attr_name);                      \
-    if (attr_value == nullptr) {                                              \
-      return false;                                                           \
-    }                                                                         \
-    Status s = AttrValueHasType(*attr_value, ATTR_TYPE);                      \
-    if (!s.ok()) {                                                            \
-      return false;                                                           \
-    }                                                                         \
-    const auto& v = attr_value->FIELD();                                      \
-    __VA_ARGS__;                                                              \
-    *value = CAST;                                                            \
-    return true;                                                              \
-  }                                                                           \
-  bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name, \
-                         std::vector<TYPE>* value) {                          \
-    const AttrValue* attr_value = attrs.Find(attr_name);                      \
-    if (attr_value == nullptr) {                                              \
-      return false;                                                           \
-    }                                                                         \
-    Status s = AttrValueHasType(*attr_value, "list(" ATTR_TYPE ")");          \
-    if (!s.ok()) {                                                            \
-      return false;                                                           \
-    }                                                                         \
-    for (const auto& v : attr_value->list().FIELD()) {                        \
-      __VA_ARGS__;                                                            \
-      value->APPEND_OP(CAST);                                                 \
-    }                                                                         \
-    return true;                                                              \
+#define DEFINE_GET_ATTR_SIMPLE(TYPE, FIELD, ATTR_TYPE, APPEND_OP, CAST, ...) \
+  bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,      \
+                         TYPE* value) {                                      \
+    const AttrValue* attr_value = attrs.Find(attr_name);                     \
+    if (attr_value == nullptr) {                                             \
+      return false;                                                          \
+    }                                                                        \
+    Status s = AttrValueHasType(*attr_value, ATTR_TYPE);                     \
+    if (!s.ok()) {                                                           \
+      return false;                                                          \
+    }                                                                        \
+    const auto& v = attr_value->FIELD();                                     \
+    __VA_ARGS__;                                                             \
+    *value = CAST;                                                           \
+    return true;                                                             \
+  }                                                                          \
+  bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,      \
+                         std::vector<TYPE>* value) {                         \
+    const AttrValue* attr_value = attrs.Find(attr_name);                     \
+    if (attr_value == nullptr) {                                             \
+      return false;                                                          \
+    }                                                                        \
+    Status s = AttrValueHasType(*attr_value, "list(" ATTR_TYPE ")");         \
+    if (!s.ok()) {                                                           \
+      return false;                                                          \
+    }                                                                        \
+    for (const auto& v : attr_value->list().FIELD()) {                       \
+      __VA_ARGS__;                                                           \
+      value->APPEND_OP(CAST);                                                \
+    }                                                                        \
+    return true;                                                             \
   }
 
 DEFINE_GET_ATTR(string, s, "string", emplace_back, v, ;)
@@ -255,14 +253,13 @@ DEFINE_GET_ATTR(Tensor, tensor, "tensor", emplace_back, t, Tensor t;
 DEFINE_GET_ATTR(NameAttrList, func, "func", emplace_back, v, ;);
 #undef DEFINE_GET_ATTR
 
-bool HasNodeAttr(const NodeDef& node_def, absl::string_view attr_name) {
+bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name) {
   return node_def.attr().find(string(attr_name)) != node_def.attr().end();
 }
 
 static const string& kEmptyString = *new string();
 
-const string& GetNodeAttrString(const AttrSlice& attrs,
-                                absl::string_view attr_name) {
+const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name) {
   const AttrValue* attr_value = attrs.Find(attr_name);
   if (attr_value == nullptr) {
     return kEmptyString;
@@ -274,7 +271,7 @@ const string& GetNodeAttrString(const AttrSlice& attrs,
   return attr_value->s();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    DataTypeVector* value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -285,7 +282,7 @@ Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
   return Status::OK();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    const TensorProto** value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -294,7 +291,7 @@ Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
   return Status::OK();
 }
 
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    const NameAttrList** value) {
   const AttrValue* attr_value;
   TF_RETURN_IF_ERROR(attrs.Find(attr_name, &attr_value));
@@ -577,7 +574,7 @@ namespace {
 
 using ::tensorflow::strings::Scanner;
 
-bool IsValidOpName(absl::string_view sp) {
+bool IsValidOpName(StringPiece sp) {
   return Scanner(sp)
       .One(Scanner::LETTER_DIGIT_DOT)
       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE)
@@ -585,7 +582,7 @@ bool IsValidOpName(absl::string_view sp) {
       .GetResult();
 }
 
-bool IsValidDataInputName(absl::string_view sp) {
+bool IsValidDataInputName(StringPiece sp) {
   // Data inputs are op_name, op_name:0, or op_name:12345.
   Scanner scan(sp);
   scan.One(Scanner::LETTER_DIGIT_DOT)
@@ -603,7 +600,7 @@ bool IsValidDataInputName(absl::string_view sp) {
   return scan.GetResult();
 }
 
-bool IsValidControlInputName(absl::string_view sp) {
+bool IsValidControlInputName(StringPiece sp) {
   return Scanner(sp)
       .OneLiteral("^")
       .One(Scanner::LETTER_DIGIT_DOT)
@@ -668,19 +665,18 @@ Status AttachDef(const Status& status, const Node& node) {
   return AttachDef(status, node.def());
 }
 
-void AddNodeAttr(absl::string_view name, const AttrValue& value,
-                 NodeDef* node_def) {
+void AddNodeAttr(StringPiece name, const AttrValue& value, NodeDef* node_def) {
   node_def->mutable_attr()->insert(
       AttrValueMap::value_type(string(name), value));
 }
 
-#define ADD_NODE_ATTR(T)                                                 \
-  void AddNodeAttr(absl::string_view name, T value, NodeDef* node_def) { \
-    AttrValue attr_value;                                                \
-    SetAttrValue(value, &attr_value);                                    \
-    AddNodeAttr(name, attr_value, node_def);                             \
+#define ADD_NODE_ATTR(T)                                           \
+  void AddNodeAttr(StringPiece name, T value, NodeDef* node_def) { \
+    AttrValue attr_value;                                          \
+    SetAttrValue(value, &attr_value);                              \
+    AddNodeAttr(name, attr_value, node_def);                       \
   }
-ADD_NODE_ATTR(absl::string_view)
+ADD_NODE_ATTR(StringPiece)
 ADD_NODE_ATTR(const char*)
 ADD_NODE_ATTR(int32)
 ADD_NODE_ATTR(int64)
@@ -692,7 +688,7 @@ ADD_NODE_ATTR(const PartialTensorShape&)
 ADD_NODE_ATTR(const Tensor&)
 ADD_NODE_ATTR(const TensorProto&)
 ADD_NODE_ATTR(const NameAttrList&)
-ADD_NODE_ATTR(gtl::ArraySlice<absl::string_view>)
+ADD_NODE_ATTR(gtl::ArraySlice<StringPiece>)
 ADD_NODE_ATTR(gtl::ArraySlice<const char*>)
 ADD_NODE_ATTR(gtl::ArraySlice<string>)
 ADD_NODE_ATTR(gtl::ArraySlice<int32>)
@@ -708,22 +704,21 @@ ADD_NODE_ATTR(gtl::ArraySlice<Tensor>)
 ADD_NODE_ATTR(gtl::ArraySlice<NameAttrList>)
 #undef ADD_NODE_ATTR
 
-void AddAttr(absl::string_view name, const AttrValue& value,
-             AttrValueMap* map) {
+void AddAttr(StringPiece name, const AttrValue& value, AttrValueMap* map) {
   map->insert(AttrValueMap::value_type(string(name), value));
 }
 
-#define ADD_ATTR(T)                                                  \
-  void AddAttr(absl::string_view name, T value, AttrValueMap* map) { \
-    AttrValue attr_value;                                            \
-    SetAttrValue(value, &attr_value);                                \
-    AddAttr(name, attr_value, map);                                  \
+#define ADD_ATTR(T)                                            \
+  void AddAttr(StringPiece name, T value, AttrValueMap* map) { \
+    AttrValue attr_value;                                      \
+    SetAttrValue(value, &attr_value);                          \
+    AddAttr(name, attr_value, map);                            \
   }
 ADD_ATTR(bool)
 #undef ADD_ATTR
 
-Status AddPrefixAndSuffixToNode(absl::string_view prefix,
-                                absl::string_view suffix, NodeDef* node_def) {
+Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
+                                NodeDef* node_def) {
   node_def->set_name(strings::StrCat(prefix, node_def->name(), suffix));
   if (node_def->op() == "Enter" || node_def->op() == "RefEnter") {
     string frame_name;
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 631f358741..0ff67554eb 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -19,9 +19,9 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -59,65 +59,61 @@ typedef protobuf::Map<string, AttrValue> AttrValueMap;
 
 // Adds an attr with name <name> and value <value> to *node_def.
 // The type of the attr is based on the type of value.
-void AddNodeAttr(absl::string_view name, const AttrValue& value,
+void AddNodeAttr(StringPiece name, const AttrValue& value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, StringPiece value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, const char* value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, int32 value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, int64 value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, float value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, double value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, bool value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, DataType value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, const PartialTensorShape& value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, absl::string_view value,
+void AddNodeAttr(StringPiece name, const Tensor& value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, const TensorProto& value, NodeDef* node_def);
+void AddNodeAttr(StringPiece name, const NameAttrList& value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const char* value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, int32 value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, int64 value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, float value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, double value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, bool value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, DataType value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const PartialTensorShape& value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<StringPiece> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const Tensor& value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<const char*> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const TensorProto& value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<string> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const NameAttrList& value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<int32> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name,
-                 gtl::ArraySlice<absl::string_view> value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<const char*> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<int64> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<string> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<float> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<int32> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<bool> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<int64> value,
+void AddNodeAttr(StringPiece name, const std::vector<bool>& value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<float> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<DataType> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<bool> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<TensorShape> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, const std::vector<bool>& value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<PartialTensorShape> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<DataType> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<TensorShapeProto> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<TensorShape> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<Tensor> value,
                  NodeDef* node_def);
-void AddNodeAttr(absl::string_view name,
-                 gtl::ArraySlice<PartialTensorShape> value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name,
-                 gtl::ArraySlice<TensorShapeProto> value, NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<Tensor> value,
-                 NodeDef* node_def);
-void AddNodeAttr(absl::string_view name, gtl::ArraySlice<NameAttrList> value,
+void AddNodeAttr(StringPiece name, gtl::ArraySlice<NameAttrList> value,
                  NodeDef* node_def);
 
 // Version to workaround C++'s "perfect" forwarding not being able to
 // forward {...} initialization.
 template <class T>
-void AddNodeAttr(absl::string_view name, std::initializer_list<T> value,
+void AddNodeAttr(StringPiece name, std::initializer_list<T> value,
                  NodeDef* node_def) {
   AddNodeAttr(name, gtl::ArraySlice<T>(value), node_def);
 }
 
 // Adds an attr to an attr value map.
-void AddAttr(absl::string_view name, const AttrValue& value, AttrValueMap* map);
-void AddAttr(absl::string_view name, bool value, AttrValueMap* map);
+void AddAttr(StringPiece name, const AttrValue& value, AttrValueMap* map);
+void AddAttr(StringPiece name, bool value, AttrValueMap* map);
 
 class AttrSlice {
  public:
@@ -130,11 +126,11 @@ class AttrSlice {
 
   // Returns the attr with attr_name if found.  Otherwise, returns
   // nullptr.
-  const AttrValue* Find(absl::string_view attr_name) const;
+  const AttrValue* Find(StringPiece attr_name) const;
 
   // Returns the attr_value for attr_name if found. Otherwise, returns a
   // NotFound status.
-  Status Find(absl::string_view attr_name, const AttrValue** attr_value) const;
+  Status Find(StringPiece attr_name, const AttrValue** attr_value) const;
 
   // Helper class to avoid allocations in EqualAttrs.
   // TODO(irving): Will go away once NodeInfo is used.
@@ -169,85 +165,84 @@ class AttrSlice {
 };
 
 // Return true if the attr with the name attr_name is defined in node_def.
-bool HasNodeAttr(const NodeDef& node_def, absl::string_view attr_name);
+bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name);
 
 // Look up the attr with name attr_name and set *value to its value.  If no
 // attr with attr_name is found in node_def, or the attr does not have
 // a matching type, a non-ok status will be returned.
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    string* value);  // type: "string"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    int64* value);  // type: "int"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    int32* value);  // type: "int"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    float* value);  // type: "float"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    bool* value);  // type: "bool"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    DataType* value);  // type: "type"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    TensorShapeProto* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    TensorShape* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    PartialTensorShape* value);  // type: "shape"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    Tensor* value);  // type: "tensor"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<string>* value);  // type "list(string)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<int64>* value);  // type "list(int)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<int32>* value);  // type "list(int)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<float>* value);  // type "list(float)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<bool>* value);  // type "list(bool)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<DataType>* value);  // type "list(type)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    DataTypeVector* value);  // type "list(type)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<TensorShapeProto>* value);  // type "list(shape)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<TensorShape>* value);  // type "list(shape)"
 Status GetNodeAttr(
-    const AttrSlice& attrs, absl::string_view attr_name,
+    const AttrSlice& attrs, StringPiece attr_name,
     std::vector<PartialTensorShape>* value);  // type "list(shape)"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<Tensor>* value);  // type: "list(tensor)"
 
 // This version avoids copying the TensorProto.
 // REQUIRES: Must not use *value beyond the lifetime of node_def.
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    const TensorProto** value);  // type: "tensor"
 
 // This version avoids copying the NameAttrList.
 // REQUIRES: Must not use *value beyond the lifetime of node_def.
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    const NameAttrList** value);  // type: "func"
 
 // These versions copies the NameAttrList(s).
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    NameAttrList* value);  // type: "func"
-Status GetNodeAttr(const AttrSlice& attrs, absl::string_view attr_name,
+Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name,
                    std::vector<NameAttrList>* value);  // type: "list(func)"
 
 // Look up the attr with name attr_name and set *value to its value.  If no
 // attr with attr_name is found in node_def, or the attr does not have
 // a matching type, false is returned.
-bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name,
+bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,
                        string* value);  // type: "string"
-bool GetNodeAttrSimple(const AttrSlice& attrs, absl::string_view attr_name,
+bool GetNodeAttrSimple(const AttrSlice& attrs, StringPiece attr_name,
                        std::vector<string>* value);  // type: "string"
 
 // Look up the attr with name attr_name and return a reference to its value.
 // If no attr with attr_name is found in node_def, or the attr does not have
 // a matching type, a reference to an empty string is returned.
 // REQUIRES: Must not use the returned value beyond the lifetime of node_def.
-const string& GetNodeAttrString(const AttrSlice& attrs,
-                                absl::string_view attr_name);
+const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name);
 
 // Computes the input type for a specific node input.
 // REQUIRES: ValidateOpDef(op_def).ok()
@@ -289,8 +284,7 @@ Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def);
 // space, the returned `NameRangeMap` objects borrow the input/output
 // argument names from `op_def`. The `op_def` must outlive the
 // returned `NameRangeMap` objects.
-typedef gtl::FlatMap<absl::string_view, std::pair<int, int>,
-                     hash<absl::string_view>>
+typedef gtl::FlatMap<StringPiece, std::pair<int, int>, hash<StringPiece>>
     NameRangeMap;
 Status NameRangesForNode(const NodeDef& node_def, const OpDef& op_def,
                          NameRangeMap* inputs, NameRangeMap* outputs);
@@ -321,8 +315,8 @@ Status AttachDef(const Status& status, const Node& node);
 // Appends the given prefix and suffix to the original node name in order to
 // make the name unique. If it's an "Enter" node, use the same way to reset
 // attribute "frame_name".
-Status AddPrefixAndSuffixToNode(absl::string_view prefix,
-                                absl::string_view suffix, NodeDef* node_def);
+Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
+                                NodeDef* node_def);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_FRAMEWORK_NODE_DEF_UTIL_H_
diff --git a/tensorflow/core/framework/node_def_util_test.cc b/tensorflow/core/framework/node_def_util_test.cc
index 56fb66bd09..d9d437024a 100644
--- a/tensorflow/core/framework/node_def_util_test.cc
+++ b/tensorflow/core/framework/node_def_util_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/node_def_util.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"  // NOLINT
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@@ -271,8 +270,7 @@ void ExpectInvalidSyntax(const NodeDef& bad, const string& message) {
   EXPECT_TRUE(errors::IsInvalidArgument(status))
       << status << "; NodeDef: " << SummarizeNodeDef(bad);
 
-  EXPECT_TRUE(
-      str_util::StrContains(absl::string_view(status.ToString()), message))
+  EXPECT_TRUE(str_util::StrContains(StringPiece(status.ToString()), message))
       << "NodeDef: " << SummarizeNodeDef(bad) << ", " << status << ", "
       << message;
 }
diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index b3db276278..81ed5f95f0 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@@ -20,7 +20,6 @@ limitations under the License.
 #include <unordered_map>
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/selective_registration.h"
@@ -262,17 +261,15 @@ template <>
 class OpDefBuilderWrapper<false> {
  public:
   constexpr OpDefBuilderWrapper(const char name[]) {}
-  OpDefBuilderWrapper<false>& Attr(absl::string_view spec) { return *this; }
-  OpDefBuilderWrapper<false>& Input(absl::string_view spec) { return *this; }
-  OpDefBuilderWrapper<false>& Output(absl::string_view spec) { return *this; }
+  OpDefBuilderWrapper<false>& Attr(StringPiece spec) { return *this; }
+  OpDefBuilderWrapper<false>& Input(StringPiece spec) { return *this; }
+  OpDefBuilderWrapper<false>& Output(StringPiece spec) { return *this; }
   OpDefBuilderWrapper<false>& SetIsCommutative() { return *this; }
   OpDefBuilderWrapper<false>& SetIsAggregate() { return *this; }
   OpDefBuilderWrapper<false>& SetIsStateful() { return *this; }
   OpDefBuilderWrapper<false>& SetAllowsUninitializedInput() { return *this; }
-  OpDefBuilderWrapper<false>& Deprecated(int, absl::string_view) {
-    return *this;
-  }
-  OpDefBuilderWrapper<false>& Doc(absl::string_view text) { return *this; }
+  OpDefBuilderWrapper<false>& Deprecated(int, StringPiece) { return *this; }
+  OpDefBuilderWrapper<false>& Doc(StringPiece text) { return *this; }
   OpDefBuilderWrapper<false>& SetShapeFn(
       Status (*fn)(shape_inference::InferenceContext*)) {
     return *this;
diff --git a/tensorflow/core/framework/op_def_builder.cc b/tensorflow/core/framework/op_def_builder.cc
index e46d91a602..8a9bb63182 100644
--- a/tensorflow/core/framework/op_def_builder.cc
+++ b/tensorflow/core/framework/op_def_builder.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <limits>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/op_def_util.h"
@@ -34,11 +33,11 @@ namespace tensorflow {
 
 namespace {
 
-string AttrError(absl::string_view orig, const string& op_name) {
+string AttrError(StringPiece orig, const string& op_name) {
   return strings::StrCat(" from Attr(\"", orig, "\") for Op ", op_name);
 }
 
-bool ConsumeAttrName(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeAttrName(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -49,7 +48,7 @@ bool ConsumeAttrName(absl::string_view* sp, absl::string_view* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeListPrefix(absl::string_view* sp) {
+bool ConsumeListPrefix(StringPiece* sp) {
   return Scanner(*sp)
       .OneLiteral("list")
       .AnySpace()
@@ -58,8 +57,7 @@ bool ConsumeListPrefix(absl::string_view* sp) {
       .GetResult(sp);
 }
 
-bool ConsumeQuotedString(char quote_ch, absl::string_view* sp,
-                         absl::string_view* out) {
+bool ConsumeQuotedString(char quote_ch, StringPiece* sp, StringPiece* out) {
   const string quote_str(1, quote_ch);
   return Scanner(*sp)
       .OneLiteral(quote_str.c_str())
@@ -71,7 +69,7 @@ bool ConsumeQuotedString(char quote_ch, absl::string_view* sp,
       .GetResult(sp, out);
 }
 
-bool ConsumeAttrType(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeAttrType(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .Many(Scanner::LOWERLETTER_DIGIT)
       .StopCapture()
@@ -79,10 +77,10 @@ bool ConsumeAttrType(absl::string_view* sp, absl::string_view* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeAttrNumber(absl::string_view* sp, int64* out) {
+bool ConsumeAttrNumber(StringPiece* sp, int64* out) {
   Scanner scan(*sp);
-  absl::string_view match;
-  absl::string_view remaining;
+  StringPiece match;
+  StringPiece remaining;
 
   scan.AnySpace().RestartCapture();
   if (scan.Peek() == '-') {
@@ -112,21 +110,20 @@ bool ConsumeAttrNumber(absl::string_view* sp, int64* out) {
     }                                                                     \
   } while (false)
 
-bool ConsumeCompoundAttrType(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeCompoundAttrType(StringPiece* sp, StringPiece* out) {
   auto capture_begin = sp->begin();
   if (str_util::ConsumePrefix(sp, "numbertype") ||
       str_util::ConsumePrefix(sp, "numerictype") ||
       str_util::ConsumePrefix(sp, "quantizedtype") ||
       str_util::ConsumePrefix(sp, "realnumbertype") ||
       str_util::ConsumePrefix(sp, "realnumberictype")) {
-    *out = absl::string_view(capture_begin, sp->begin() - capture_begin);
+    *out = StringPiece(capture_begin, sp->begin() - capture_begin);
     return true;
   }
   return false;
 }
 
-bool ProcessCompoundType(const absl::string_view type_string,
-                         AttrValue* allowed) {
+bool ProcessCompoundType(const StringPiece type_string, AttrValue* allowed) {
   if (type_string == "numbertype" || type_string == "numerictype") {
     for (DataType dt : NumberTypes()) {
       allowed->mutable_list()->add_type(dt);
@@ -146,20 +143,20 @@ bool ProcessCompoundType(const absl::string_view type_string,
   return true;
 }
 
-void FinalizeAttr(absl::string_view spec, OpDef* op_def,
+void FinalizeAttr(StringPiece spec, OpDef* op_def,
                   std::vector<string>* errors) {
   OpDef::AttrDef* attr = op_def->add_attr();
-  absl::string_view orig(spec);
+  StringPiece orig(spec);
 
   // Parse "<name>:" at the beginning.
-  absl::string_view tmp_name;
+  StringPiece tmp_name;
   VERIFY(ConsumeAttrName(&spec, &tmp_name), "Trouble parsing '<name>:'");
   attr->set_name(tmp_name.data(), tmp_name.size());
 
   // Read "<type>" or "list(<type>)".
   bool is_list = ConsumeListPrefix(&spec);
   string type;
-  absl::string_view type_string;  // Used if type == "type"
+  StringPiece type_string;  // Used if type == "type"
   if (str_util::ConsumePrefix(&spec, "string")) {
     type = "string";
   } else if (str_util::ConsumePrefix(&spec, "int")) {
@@ -188,7 +185,7 @@ void FinalizeAttr(absl::string_view spec, OpDef* op_def,
     if (str_util::StartsWith(spec, "\"") || str_util::StartsWith(spec, "'")) {
       type = "string";  // "{ \"foo\", \"bar\" }" or "{ 'foo', 'bar' }"
       while (true) {
-        absl::string_view escaped_string;
+        StringPiece escaped_string;
         VERIFY(ConsumeQuotedString('"', &spec, &escaped_string) ||
                    ConsumeQuotedString('\'', &spec, &escaped_string),
                "Trouble parsing allowed string at '", spec, "'");
@@ -269,13 +266,12 @@ void FinalizeAttr(absl::string_view spec, OpDef* op_def,
 
 #undef VERIFY
 
-string InOutError(bool is_output, absl::string_view orig,
-                  const string& op_name) {
+string InOutError(bool is_output, StringPiece orig, const string& op_name) {
   return strings::StrCat(" from ", is_output ? "Output" : "Input", "(\"", orig,
                          "\") for Op ", op_name);
 }
 
-bool ConsumeInOutName(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeInOutName(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .One(Scanner::LOWERLETTER)
       .Any(Scanner::LOWERLETTER_DIGIT_UNDERSCORE)
@@ -286,7 +282,7 @@ bool ConsumeInOutName(absl::string_view* sp, absl::string_view* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeInOutRefOpen(absl::string_view* sp) {
+bool ConsumeInOutRefOpen(StringPiece* sp) {
   return Scanner(*sp)
       .OneLiteral("Ref")
       .AnySpace()
@@ -295,11 +291,11 @@ bool ConsumeInOutRefOpen(absl::string_view* sp) {
       .GetResult(sp);
 }
 
-bool ConsumeInOutRefClose(absl::string_view* sp) {
+bool ConsumeInOutRefClose(StringPiece* sp) {
   return Scanner(*sp).OneLiteral(")").AnySpace().GetResult(sp);
 }
 
-bool ConsumeInOutNameOrType(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeInOutNameOrType(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -308,7 +304,7 @@ bool ConsumeInOutNameOrType(absl::string_view* sp, absl::string_view* out) {
       .GetResult(sp, out);
 }
 
-bool ConsumeInOutTimesType(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeInOutTimesType(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .OneLiteral("*")
       .AnySpace()
@@ -329,15 +325,15 @@ bool ConsumeInOutTimesType(absl::string_view* sp, absl::string_view* out) {
     }                                                                 \
   } while (false)
 
-void FinalizeInputOrOutput(absl::string_view spec, bool is_output,
-                           OpDef* op_def, std::vector<string>* errors) {
+void FinalizeInputOrOutput(StringPiece spec, bool is_output, OpDef* op_def,
+                           std::vector<string>* errors) {
   OpDef::ArgDef* arg =
       is_output ? op_def->add_output_arg() : op_def->add_input_arg();
 
-  absl::string_view orig(spec);
+  StringPiece orig(spec);
 
   // Parse "<name>:" at the beginning.
-  absl::string_view tmp_name;
+  StringPiece tmp_name;
   VERIFY(ConsumeInOutName(&spec, &tmp_name), "Trouble parsing 'name:'");
   arg->set_name(tmp_name.data(), tmp_name.size());
 
@@ -347,7 +343,7 @@ void FinalizeInputOrOutput(absl::string_view spec, bool is_output,
   }
 
   {  // Parse "<name|type>" or "<name>*<name|type>".
-    absl::string_view first, second, type_or_attr;
+    StringPiece first, second, type_or_attr;
     VERIFY(ConsumeInOutNameOrType(&spec, &first),
            "Trouble parsing either a type or an attr name at '", spec, "'");
     if (ConsumeInOutTimesType(&spec, &second)) {
@@ -413,7 +409,7 @@ void FinalizeInputOrOutput(absl::string_view spec, bool is_output,
 
 #undef VERIFY
 
-int num_leading_spaces(absl::string_view s) {
+int num_leading_spaces(StringPiece s) {
   size_t i = 0;
   while (i < s.size() && s[i] == ' ') {
     ++i;
@@ -421,7 +417,7 @@ int num_leading_spaces(absl::string_view s) {
   return i;
 }
 
-bool ConsumeDocNameColon(absl::string_view* sp, absl::string_view* out) {
+bool ConsumeDocNameColon(StringPiece* sp, StringPiece* out) {
   return Scanner(*sp)
       .One(Scanner::LETTER)
       .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
@@ -432,7 +428,7 @@ bool ConsumeDocNameColon(absl::string_view* sp, absl::string_view* out) {
       .GetResult(sp, out);
 }
 
-bool IsDocNameColon(absl::string_view s) {
+bool IsDocNameColon(StringPiece s) {
   return ConsumeDocNameColon(&s, nullptr /* out */);
 }
 
@@ -469,8 +465,8 @@ void FinalizeDoc(const string& text, OpDef* op_def,
   // name: description
   //   possibly continued on the next line
   //   if so, we remove the minimum indent
-  absl::string_view name;
-  std::vector<absl::string_view> description;
+  StringPiece name;
+  std::vector<StringPiece> description;
   while (static_cast<size_t>(l) < lines.size()) {
     description.clear();
     description.push_back(lines[l]);
@@ -609,13 +605,13 @@ Status OpDefBuilder::Finalize(OpRegistrationData* op_reg_data) const {
   *op_reg_data = op_reg_data_;
 
   OpDef* op_def = &op_reg_data->op_def;
-  for (absl::string_view attr : attrs_) {
+  for (StringPiece attr : attrs_) {
     FinalizeAttr(attr, op_def, &errors);
   }
-  for (absl::string_view input : inputs_) {
+  for (StringPiece input : inputs_) {
     FinalizeInputOrOutput(input, false, op_def, &errors);
   }
-  for (absl::string_view output : outputs_) {
+  for (StringPiece output : outputs_) {
     FinalizeInputOrOutput(output, true, op_def, &errors);
   }
   FinalizeDoc(doc_, op_def, &errors);
diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc
index 1e6a62e767..9b24e3aa00 100644
--- a/tensorflow/core/framework/op_def_builder_test.cc
+++ b/tensorflow/core/framework/op_def_builder_test.cc
@@ -15,11 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_def_builder.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -40,7 +40,7 @@ class OpDefBuilderTest : public ::testing::Test {
  protected:
   OpDefBuilder b() { return OpDefBuilder("Test"); }
 
-  void ExpectSuccess(const OpDefBuilder& builder, absl::string_view proto,
+  void ExpectSuccess(const OpDefBuilder& builder, StringPiece proto,
                      OpShapeInferenceFn* shape_fn_out = nullptr) {
     OpRegistrationData op_reg_data;
     Status status = builder.Finalize(&op_reg_data);
@@ -61,7 +61,7 @@ class OpDefBuilderTest : public ::testing::Test {
     }
   }
 
-  void ExpectOrdered(const OpDefBuilder& builder, absl::string_view proto) {
+  void ExpectOrdered(const OpDefBuilder& builder, StringPiece proto) {
     OpRegistrationData op_reg_data;
     Status status = builder.Finalize(&op_reg_data);
     TF_EXPECT_OK(status);
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index aa37d40824..3597f43d51 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -18,12 +18,12 @@ limitations under the License.
 #include <set>
 #include <unordered_map>
 #include <unordered_set>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/op_def.pb_text.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/proto_serialization.h"
@@ -145,7 +145,7 @@ Status ValidateAttrValue(const AttrValue& attr_value,
   return Status::OK();
 }
 
-const OpDef::AttrDef* FindAttr(absl::string_view name, const OpDef& op_def) {
+const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def) {
   for (int i = 0; i < op_def.attr_size(); ++i) {
     if (op_def.attr(i).name() == name) {
       return &op_def.attr(i);
@@ -154,7 +154,7 @@ const OpDef::AttrDef* FindAttr(absl::string_view name, const OpDef& op_def) {
   return nullptr;
 }
 
-OpDef::AttrDef* FindAttrMutable(absl::string_view name, OpDef* op_def) {
+OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def) {
   for (int i = 0; i < op_def->attr_size(); ++i) {
     if (op_def->attr(i).name() == name) {
       return op_def->mutable_attr(i);
@@ -163,7 +163,7 @@ OpDef::AttrDef* FindAttrMutable(absl::string_view name, OpDef* op_def) {
   return nullptr;
 }
 
-const OpDef::ArgDef* FindInputArg(absl::string_view name, const OpDef& op_def) {
+const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def) {
   for (int i = 0; i < op_def.input_arg_size(); ++i) {
     if (op_def.input_arg(i).name() == name) {
       return &op_def.input_arg(i);
@@ -172,7 +172,7 @@ const OpDef::ArgDef* FindInputArg(absl::string_view name, const OpDef& op_def) {
   return nullptr;
 }
 
-const ApiDef::Arg* FindInputArg(absl::string_view name, const ApiDef& api_def) {
+const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
   for (int i = 0; i < api_def.in_arg_size(); ++i) {
     if (api_def.in_arg(i).name() == name) {
       return &api_def.in_arg(i);
@@ -268,11 +268,11 @@ Status ValidateOpDef(const OpDef& op_def) {
              attr.name(), " that matches a data type");
 
     // Validate type
-    absl::string_view type(attr.type());
+    StringPiece type(attr.type());
     bool is_list = str_util::ConsumePrefix(&type, "list(");
     bool found = false;
-    for (absl::string_view valid : {"string", "int", "float", "bool", "type",
-                                    "shape", "tensor", "func"}) {
+    for (StringPiece valid : {"string", "int", "float", "bool", "type", "shape",
+                              "tensor", "func"}) {
       if (str_util::ConsumePrefix(&type, valid)) {
         found = true;
         break;
diff --git a/tensorflow/core/framework/op_def_util.h b/tensorflow/core/framework/op_def_util.h
index a5f0608861..85afe2bdea 100644
--- a/tensorflow/core/framework/op_def_util.h
+++ b/tensorflow/core/framework/op_def_util.h
@@ -20,7 +20,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_OP_DEF_UTIL_H_
 
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -42,16 +41,16 @@ Status ValidateAttrValue(const AttrValue& attr_value,
 
 // The following search through op_def for an attr with the indicated name.
 // Returns nullptr if no such attr is found.
-const OpDef::AttrDef* FindAttr(absl::string_view name, const OpDef& op_def);
-OpDef::AttrDef* FindAttrMutable(absl::string_view name, OpDef* op_def);
+const OpDef::AttrDef* FindAttr(StringPiece name, const OpDef& op_def);
+OpDef::AttrDef* FindAttrMutable(StringPiece name, OpDef* op_def);
 
 // Searches op_def for input argument with the indicated name.
 // Returns nullptr if no such attr is found.
-const OpDef::ArgDef* FindInputArg(absl::string_view name, const OpDef& op_def);
+const OpDef::ArgDef* FindInputArg(StringPiece name, const OpDef& op_def);
 
 // Searches api_def for input argument with the indicated name.
 // Returns nullptr if no such attr is found.
-const ApiDef::Arg* FindInputArg(absl::string_view name, const ApiDef& api_def);
+const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def);
 
 // Produce a human-readable version of an op_def that is more concise
 // than a text-format proto.  Excludes descriptions.
diff --git a/tensorflow/core/framework/op_gen_lib.cc b/tensorflow/core/framework/op_gen_lib.cc
index 81c67ed34c..505ab54775 100644
--- a/tensorflow/core/framework/op_gen_lib.cc
+++ b/tensorflow/core/framework/op_gen_lib.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <algorithm>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -27,7 +26,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-string WordWrap(absl::string_view prefix, absl::string_view str, int width) {
+string WordWrap(StringPiece prefix, StringPiece str, int width) {
   const string indent_next_line = "\n" + Spaces(prefix.size());
   width -= prefix.size();
   string result;
@@ -40,16 +39,16 @@ string WordWrap(absl::string_view prefix, absl::string_view str, int width) {
       break;
     }
     auto space = str.rfind(' ', width);
-    if (space == absl::string_view::npos) {
+    if (space == StringPiece::npos) {
       // Rather make a too-long line and break at a space.
       space = str.find(' ');
-      if (space == absl::string_view::npos) {
+      if (space == StringPiece::npos) {
         strings::StrAppend(&result, str);
         break;
       }
     }
     // Breaking at character at position <space>.
-    absl::string_view to_append = str.substr(0, space);
+    StringPiece to_append = str.substr(0, space);
     str.remove_prefix(space + 1);
     // Remove spaces at break.
     while (str_util::EndsWith(to_append, " ")) {
@@ -66,7 +65,7 @@ string WordWrap(absl::string_view prefix, absl::string_view str, int width) {
   return result;
 }
 
-bool ConsumeEquals(absl::string_view* description) {
+bool ConsumeEquals(StringPiece* description) {
   if (str_util::ConsumePrefix(description, "=")) {
     while (str_util::ConsumePrefix(description,
                                    " ")) {  // Also remove spaces after "=".
@@ -81,12 +80,12 @@ bool ConsumeEquals(absl::string_view* description) {
 // contains the maximum prefix of the input `*orig` that doesn't
 // contain `split_ch`, and `*orig` contains everything after the
 // first `split_ch`.
-static bool SplitAt(char split_ch, absl::string_view* orig,
-                    absl::string_view* before_split) {
+static bool SplitAt(char split_ch, StringPiece* orig,
+                    StringPiece* before_split) {
   auto pos = orig->find(split_ch);
-  if (pos == absl::string_view::npos) {
+  if (pos == StringPiece::npos) {
     *before_split = *orig;
-    *orig = absl::string_view();
+    *orig = StringPiece();
     return false;
   } else {
     *before_split = orig->substr(0, pos);
@@ -97,9 +96,9 @@ static bool SplitAt(char split_ch, absl::string_view* orig,
 
 // Does this line start with "<spaces><field>:" where "<field>" is
 // in multi_line_fields? Sets *colon_pos to the position of the colon.
-static bool StartsWithFieldName(absl::string_view line,
+static bool StartsWithFieldName(StringPiece line,
                                 const std::vector<string>& multi_line_fields) {
-  absl::string_view up_to_colon;
+  StringPiece up_to_colon;
   if (!SplitAt(':', &line, &up_to_colon)) return false;
   while (str_util::ConsumePrefix(&up_to_colon, " "))
     ;  // Remove leading spaces.
@@ -111,7 +110,7 @@ static bool StartsWithFieldName(absl::string_view line,
   return false;
 }
 
-static bool ConvertLine(absl::string_view line,
+static bool ConvertLine(StringPiece line,
                         const std::vector<string>& multi_line_fields,
                         string* ml) {
   // Is this a field we should convert?
@@ -119,8 +118,8 @@ static bool ConvertLine(absl::string_view line,
     return false;
   }
   // Has a matching field name, so look for "..." after the colon.
-  absl::string_view up_to_colon;
-  absl::string_view after_colon = line;
+  StringPiece up_to_colon;
+  StringPiece after_colon = line;
   SplitAt(':', &after_colon, &up_to_colon);
   while (str_util::ConsumePrefix(&after_colon, " "))
     ;  // Remove leading spaces.
@@ -129,12 +128,12 @@ static bool ConvertLine(absl::string_view line,
     return false;
   }
   auto last_quote = after_colon.rfind('\"');
-  if (last_quote == absl::string_view::npos) {
+  if (last_quote == StringPiece::npos) {
     // Error: we don't see the expected matching quote, abort the conversion.
     return false;
   }
-  absl::string_view escaped = after_colon.substr(0, last_quote);
-  absl::string_view suffix = after_colon.substr(last_quote + 1);
+  StringPiece escaped = after_colon.substr(0, last_quote);
+  StringPiece suffix = after_colon.substr(last_quote + 1);
   // We've now parsed line into '<up_to_colon>: "<escaped>"<suffix>'
 
   string unescaped;
@@ -160,13 +159,13 @@ static bool ConvertLine(absl::string_view line,
   return true;
 }
 
-string PBTxtToMultiline(absl::string_view pbtxt,
+string PBTxtToMultiline(StringPiece pbtxt,
                         const std::vector<string>& multi_line_fields) {
   string ml;
   // Probably big enough, since the input and output are about the
   // same size, but just a guess.
   ml.reserve(pbtxt.size() * (17. / 16));
-  absl::string_view line;
+  StringPiece line;
   while (!pbtxt.empty()) {
     // Split pbtxt into its first line and everything after.
     SplitAt('\n', &pbtxt, &line);
@@ -181,8 +180,8 @@ string PBTxtToMultiline(absl::string_view pbtxt,
 // Given a single line of text `line` with first : at `colon`, determine if
 // there is an "<<END" expression after the colon and if so return true and set
 // `*end` to everything after the "<<".
-static bool FindMultiline(absl::string_view line, size_t colon, string* end) {
-  if (colon == absl::string_view::npos) return false;
+static bool FindMultiline(StringPiece line, size_t colon, string* end) {
+  if (colon == StringPiece::npos) return false;
   line.remove_prefix(colon + 1);
   while (str_util::ConsumePrefix(&line, " ")) {
   }
@@ -193,12 +192,12 @@ static bool FindMultiline(absl::string_view line, size_t colon, string* end) {
   return false;
 }
 
-string PBTxtFromMultiline(absl::string_view multiline_pbtxt) {
+string PBTxtFromMultiline(StringPiece multiline_pbtxt) {
   string pbtxt;
   // Probably big enough, since the input and output are about the
   // same size, but just a guess.
   pbtxt.reserve(multiline_pbtxt.size() * (33. / 32));
-  absl::string_view line;
+  StringPiece line;
   while (!multiline_pbtxt.empty()) {
     // Split multiline_pbtxt into its first line and everything after.
     if (!SplitAt('\n', &multiline_pbtxt, &line)) {
@@ -238,7 +237,7 @@ string PBTxtFromMultiline(absl::string_view multiline_pbtxt) {
         unescaped.push_back('\n');
       }
       strings::StrAppend(&unescaped, line);
-      line = absl::string_view();
+      line = StringPiece();
     }
 
     // Escape what we extracted and then output it in quotes.
diff --git a/tensorflow/core/framework/op_gen_lib.h b/tensorflow/core/framework/op_gen_lib.h
index b7660207f8..c269e2df04 100644
--- a/tensorflow/core/framework/op_gen_lib.h
+++ b/tensorflow/core/framework/op_gen_lib.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include <string>
 #include <unordered_map>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
@@ -35,17 +35,17 @@ inline string Spaces(int n) { return string(n, ' '); }
 // after the first by prefix.size() spaces.  Intended use case is something
 // like prefix = "  Foo(" and str is a list of arguments (terminated by a ")").
 // TODO(josh11b): Option to wrap on ", " instead of " " when possible.
-string WordWrap(absl::string_view prefix, absl::string_view str, int width);
+string WordWrap(StringPiece prefix, StringPiece str, int width);
 
 // Looks for an "=" at the beginning of *description.  If found, strips it off
 // (and any following spaces) from *description and return true.  Otherwise
 // returns false.
-bool ConsumeEquals(absl::string_view* description);
+bool ConsumeEquals(StringPiece* description);
 
 // Convert text-serialized protobufs to/from multiline format.
-string PBTxtToMultiline(absl::string_view pbtxt,
+string PBTxtToMultiline(StringPiece pbtxt,
                         const std::vector<string>& multi_line_fields);
-string PBTxtFromMultiline(absl::string_view multiline_pbtxt);
+string PBTxtFromMultiline(StringPiece multiline_pbtxt);
 
 // Takes a list of files with ApiDefs text protos, and allows you to
 // look up the specific ApiDef for any given op.
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 65aa005bf6..3e34bf0418 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/graph.pb_text.h"
@@ -34,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -118,7 +118,7 @@ const string& OpKernel::type_string() const { return def_->op(); }
 const string& OpKernel::requested_device() const { return def_->device(); }
 const string& OpKernel::requested_input(int i) const { return def_->input(i); }
 
-Status OpKernel::InputRange(absl::string_view input_name, int* start,
+Status OpKernel::InputRange(StringPiece input_name, int* start,
                             int* stop) const {
   const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
@@ -130,7 +130,7 @@ Status OpKernel::InputRange(absl::string_view input_name, int* start,
   }
 }
 
-Status OpKernel::OutputRange(absl::string_view output_name, int* start,
+Status OpKernel::OutputRange(StringPiece output_name, int* start,
                              int* stop) const {
   const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
@@ -200,7 +200,7 @@ OpKernelConstruction::OpKernelConstruction(
       graph_def_version_(graph_def_version),
       status_(status) {}
 
-bool OpKernelConstruction::HasAttr(absl::string_view attr_name) const {
+bool OpKernelConstruction::HasAttr(StringPiece attr_name) const {
   return HasNodeAttr(def(), attr_name);
 }
 
@@ -320,7 +320,7 @@ void OpKernelContext::really_record_tensor_reference(const Tensor& tensor) {
   referenced_tensors_->Add(tensor);
 }
 
-Status OpKernelContext::input(absl::string_view name, const Tensor** tensor) {
+Status OpKernelContext::input(StringPiece name, const Tensor** tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -338,8 +338,7 @@ Status OpKernelContext::input(absl::string_view name, const Tensor** tensor) {
   return Status::OK();
 }
 
-Status OpKernelContext::input_dtype(absl::string_view name,
-                                    DataType* dtype) const {
+Status OpKernelContext::input_dtype(StringPiece name, DataType* dtype) const {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -357,8 +356,7 @@ Status OpKernelContext::input_dtype(absl::string_view name,
   return Status::OK();
 }
 
-Status OpKernelContext::input_ref_mutex(absl::string_view name,
-                                        mutex** out_mutex) {
+Status OpKernelContext::input_ref_mutex(StringPiece name, mutex** out_mutex) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -440,7 +438,7 @@ bool OpKernelContext::forward_input_to_output_with_shape(
 }
 
 Status OpKernelContext::forward_input_to_output_with_shape(
-    absl::string_view input_name, absl::string_view output_name,
+    StringPiece input_name, StringPiece output_name,
     const TensorShape& output_shape, Tensor** output) {
   int input_index, output_index, stop;
   TF_RETURN_IF_ERROR(
@@ -564,7 +562,7 @@ void OpKernelContext::delete_ref_input(int index, bool lock_held) {
   }
 }
 
-Status OpKernelContext::mutable_input(absl::string_view name, Tensor* tensor,
+Status OpKernelContext::mutable_input(StringPiece name, Tensor* tensor,
                                       bool lock_held) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
@@ -588,7 +586,7 @@ Status OpKernelContext::mutable_input(absl::string_view name, Tensor* tensor,
   return Status::OK();
 }
 
-Status OpKernelContext::replace_ref_input(absl::string_view name,
+Status OpKernelContext::replace_ref_input(StringPiece name,
                                           const Tensor& tensor,
                                           bool lock_held) {
   int start, stop;
@@ -606,14 +604,14 @@ Status OpKernelContext::replace_ref_input(absl::string_view name,
   return Status::OK();
 }
 
-Status OpKernelContext::input_list(absl::string_view name, OpInputList* list) {
+Status OpKernelContext::input_list(StringPiece name, OpInputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
   *list = OpInputList(this, start, stop);
   return Status::OK();
 }
 
-Status OpKernelContext::mutable_input_list(absl::string_view name,
+Status OpKernelContext::mutable_input_list(StringPiece name,
                                            OpMutableInputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->InputRange(name, &start, &stop));
@@ -621,8 +619,7 @@ Status OpKernelContext::mutable_input_list(absl::string_view name,
   return Status::OK();
 }
 
-Status OpKernelContext::output_list(absl::string_view name,
-                                    OpOutputList* list) {
+Status OpKernelContext::output_list(StringPiece name, OpOutputList* list) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   *list = OpOutputList(this, start, stop);
@@ -645,7 +642,7 @@ Status OpKernelContext::allocate_output(int index, const TensorShape& shape,
   return allocate_output(index, shape, output, attr);
 }
 
-Status OpKernelContext::allocate_output(absl::string_view name,
+Status OpKernelContext::allocate_output(StringPiece name,
                                         const TensorShape& shape,
                                         Tensor** tensor) {
   int start, stop;
@@ -659,7 +656,7 @@ Status OpKernelContext::allocate_output(absl::string_view name,
   return allocate_output(start, shape, tensor);
 }
 
-Status OpKernelContext::allocate_output(absl::string_view name,
+Status OpKernelContext::allocate_output(StringPiece name,
                                         const TensorShape& shape,
                                         Tensor** tensor,
                                         AllocatorAttributes attr) {
@@ -755,8 +752,7 @@ Status OpKernelContext::allocate_persistent(DataType type,
   return s;
 }
 
-Status OpKernelContext::set_output(absl::string_view name,
-                                   const Tensor& tensor) {
+Status OpKernelContext::set_output(StringPiece name, const Tensor& tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -803,7 +799,7 @@ void OpKernelContext::set_output_ref(int index, mutex* mu,
   outputs_[index] = TensorValue(mu, tensor_for_ref);
 }
 
-Status OpKernelContext::set_output_ref(absl::string_view name, mutex* mu,
+Status OpKernelContext::set_output_ref(StringPiece name, mutex* mu,
                                        Tensor* tensor_for_ref) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
@@ -817,8 +813,7 @@ Status OpKernelContext::set_output_ref(absl::string_view name, mutex* mu,
   return Status::OK();
 }
 
-Status OpKernelContext::mutable_output(absl::string_view name,
-                                       Tensor** tensor) {
+Status OpKernelContext::mutable_output(StringPiece name, Tensor** tensor) {
   int start, stop;
   TF_RETURN_IF_ERROR(params_->op_kernel->OutputRange(name, &start, &stop));
   if (stop != start + 1) {
@@ -916,7 +911,7 @@ void OpKernelContext::clear_recorded_memory() {
 // OpKernel registration ------------------------------------------------------
 
 struct KernelRegistration {
-  KernelRegistration(const KernelDef& d, absl::string_view c,
+  KernelRegistration(const KernelDef& d, StringPiece c,
                      kernel_factory::OpKernelRegistrar::Factory f)
       : def(d), kernel_class_name(c), factory(f) {}
   const KernelDef def;
@@ -938,8 +933,8 @@ static KernelRegistry* GlobalKernelRegistryTyped() {
   return reinterpret_cast<KernelRegistry*>(GlobalKernelRegistry());
 }
 
-static string Key(absl::string_view op_type, const DeviceType& device_type,
-                  absl::string_view label) {
+static string Key(StringPiece op_type, const DeviceType& device_type,
+                  StringPiece label) {
   return strings::StrCat(op_type, ":", DeviceTypeString(device_type), ":",
                          label);
 }
@@ -947,7 +942,7 @@ static string Key(absl::string_view op_type, const DeviceType& device_type,
 namespace kernel_factory {
 
 void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
-                                     absl::string_view kernel_class_name,
+                                     StringPiece kernel_class_name,
                                      Factory factory) {
   // See comments in register_kernel::Name in header for info on _no_register.
   if (kernel_def->op() != "_no_register") {
@@ -964,7 +959,7 @@ void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def,
 
 namespace {
 
-static const absl::string_view kKernelAttr("_kernel");
+static const StringPiece kKernelAttr("_kernel");
 
 // TODO(irving): Replace with const Node& version below.
 Status FindKernelRegistration(const DeviceType& device_type,
@@ -1077,12 +1072,12 @@ KernelList GetFilteredRegisteredKernels(
   return kernel_list;
 }
 
-KernelList GetRegisteredKernelsForOp(absl::string_view op_name) {
+KernelList GetRegisteredKernelsForOp(StringPiece op_name) {
   auto op_pred = [op_name](const KernelDef& k) { return k.op() == op_name; };
   return GetFilteredRegisteredKernels(op_pred);
 }
 
-string KernelsRegisteredForOp(absl::string_view op_name) {
+string KernelsRegisteredForOp(StringPiece op_name) {
   KernelList kernel_list = GetRegisteredKernelsForOp(op_name);
   if (kernel_list.kernel_size() == 0) return "  <no registered kernels>\n";
   string ret;
@@ -1180,7 +1175,7 @@ Status CreateOpKernel(DeviceType device_type, DeviceBase* device,
 
 namespace {
 
-bool FindArgInOp(absl::string_view arg_name,
+bool FindArgInOp(StringPiece arg_name,
                  const protobuf::RepeatedPtrField<OpDef::ArgDef>& args) {
   for (const auto& arg : args) {
     if (arg_name == arg.name()) {
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 7d48d35356..3b1f57a457 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include <utility>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/cancellation.h"
 #include "tensorflow/core/framework/control_flow.h"
@@ -143,9 +142,8 @@ class OpKernel {
     return output_memory_types_;
   }
 
-  Status InputRange(absl::string_view input_name, int* start, int* stop) const;
-  Status OutputRange(absl::string_view output_name, int* start,
-                     int* stop) const;
+  Status InputRange(StringPiece input_name, int* start, int* stop) const;
+  Status OutputRange(StringPiece output_name, int* start, int* stop) const;
 
   // We allow legacy scalars within Google up until GraphDef version 6.
   // TODO(irving): Remove when we can drop support for GraphDef version 5.
@@ -313,10 +311,10 @@ class OpKernelConstruction {
   // attr with attr_name is found in def(), or the attr does not have
   // a matching type, a non-ok status will be returned.
   template <class T>
-  Status GetAttr(absl::string_view attr_name, T* value) const;
+  Status GetAttr(StringPiece attr_name, T* value) const;
 
   // Return true if the attr_name is defined in def().
-  bool HasAttr(absl::string_view attr_name) const;
+  bool HasAttr(StringPiece attr_name) const;
 
   // Return the device type.
   const DeviceType& device_type() const { return device_type_; }
@@ -617,7 +615,7 @@ class OpKernelContext {
 
   int num_inputs() const { return params_->inputs->size(); }
   DataType input_dtype(int index) const;
-  Status input_dtype(absl::string_view name, DataType* dtype) const;
+  Status input_dtype(StringPiece name, DataType* dtype) const;
   MemoryType input_memory_type(int index) const;
 
   int num_outputs() const { return outputs_.size(); }
@@ -637,14 +635,14 @@ class OpKernelContext {
   // use mutable_input below.
   // REQUIRES: !IsRefType(input_dtype(index))
   // REQUIRES: the named input must not be a list.
-  Status input(absl::string_view name, const Tensor** tensor);
+  Status input(StringPiece name, const Tensor** tensor);
 
   // Returns the named list-valued immutable input in "list", as
   // defined in the OpDef.  If the named output is not list-valued,
   // returns a one-element list. May only be used for non-Ref
   // inputs. For Ref inputs use mutable_input below.
   // REQUIRES: !IsRefType(input_dtype(index))
-  Status input_list(absl::string_view name, OpInputList* list);
+  Status input_list(StringPiece name, OpInputList* list);
 
   // For mutable inputs, use the following together to make sure there
   // is no concurrent access to mutable_input(), e.g.:
@@ -654,7 +652,7 @@ class OpKernelContext {
   //   // modify the values in t
   // }
   // REQUIRES: IsRefType(input_dtype(index))
-  Status input_ref_mutex(absl::string_view name, mutex** out_mutex);
+  Status input_ref_mutex(StringPiece name, mutex** out_mutex);
 
   // Returns a mutable input tensor. Must be used to access Ref
   // inputs.  REQUIRES: IsRefType(input_dtype(index)). The caller may
@@ -672,7 +670,7 @@ class OpKernelContext {
   // the input mutex will be acquired before returning the Tensor.
   // REQUIRES: the named input must not be a list.
   // REQUIRES: the named input must be a ref tensor.
-  Status mutable_input(absl::string_view name, Tensor* tensor, bool lock_held);
+  Status mutable_input(StringPiece name, Tensor* tensor, bool lock_held);
 
   // Returns the named list-valued mutable input in "list", as defined
   // in the OpDef.  If the named input is not list-valued, returns a
@@ -680,7 +678,7 @@ class OpKernelContext {
   // stored in the Tensor buffer may be modified, and modifications
   // will be visible to other Ops reading the same ref tensor.
   // REQUIRES: the named input must be a ref tensor.
-  Status mutable_input_list(absl::string_view name, OpMutableInputList* list);
+  Status mutable_input_list(StringPiece name, OpMutableInputList* list);
 
   // Replace the corresponding Ref Input to use the storage buffer
   // used by tensor. If !lock_held the input mutex will be acquired
@@ -692,7 +690,7 @@ class OpKernelContext {
   // buffer used by tensor. If !lock_held the input mutex will be
   // acquired before returning the Tensor.
   // REQUIRES: IsRefType(input_dtype(index)).
-  Status replace_ref_input(absl::string_view name, const Tensor& tensor,
+  Status replace_ref_input(StringPiece name, const Tensor& tensor,
                            bool lock_held);
 
   // Deletes the Tensor object used as the Ref Input at
@@ -730,8 +728,8 @@ class OpKernelContext {
   bool forward_input_to_output_with_shape(int input_index, int output_index,
                                           const TensorShape& output_shape,
                                           Tensor** output) TF_MUST_USE_RESULT;
-  Status forward_input_to_output_with_shape(absl::string_view input_name,
-                                            absl::string_view output_name,
+  Status forward_input_to_output_with_shape(StringPiece input_name,
+                                            StringPiece output_name,
                                             const TensorShape& output_shape,
                                             Tensor** output) TF_MUST_USE_RESULT;
 
@@ -775,8 +773,8 @@ class OpKernelContext {
       gtl::ArraySlice<int> candidate_input_indices, int output_index,
       const TensorShape& output_shape, Tensor** output) TF_MUST_USE_RESULT;
   Status forward_input_or_allocate_output(
-      gtl::ArraySlice<absl::string_view> candidate_input_names,
-      absl::string_view output_name, const TensorShape& output_shape,
+      gtl::ArraySlice<StringPiece> candidate_input_names,
+      StringPiece output_name, const TensorShape& output_shape,
       Tensor** output) TF_MUST_USE_RESULT;
 
   // Tries to reuse one of the inputs given in input_indices as a temporary.
@@ -798,7 +796,7 @@ class OpKernelContext {
 
   // Returns the named list-valued output in "list", as defined in the OpDef.
   // If the named output is not list-valued, returns a one-element list.
-  Status output_list(absl::string_view name, OpOutputList* list);
+  Status output_list(StringPiece name, OpOutputList* list);
 
   // If output_required(index) returns true, the OpKernel's Compute() method
   // should call allocate_output(index, ...), set_output(index, ...),
@@ -863,7 +861,7 @@ class OpKernelContext {
   // REQUIRES: !IsRefType(expected_output_dtype(index))
   Status allocate_output(int index, const TensorShape& shape,
                          Tensor** tensor) TF_MUST_USE_RESULT;
-  Status allocate_output(absl::string_view name, const TensorShape& shape,
+  Status allocate_output(StringPiece name, const TensorShape& shape,
                          Tensor** tensor) TF_MUST_USE_RESULT;
   // The following methods use the supplied attributes instead of
   // those in output_attr_array. The caller is responsible for
@@ -872,7 +870,7 @@ class OpKernelContext {
   // device. See comment above.
   Status allocate_output(int index, const TensorShape& shape, Tensor** tensor,
                          AllocatorAttributes attr) TF_MUST_USE_RESULT;
-  Status allocate_output(absl::string_view name, const TensorShape& shape,
+  Status allocate_output(StringPiece name, const TensorShape& shape,
                          Tensor** tensor,
                          AllocatorAttributes attr) TF_MUST_USE_RESULT;
 
@@ -915,16 +913,15 @@ class OpKernelContext {
   // index.  REQUIRES: !IsRefType(expected_output_dtype(index))
   // REQUIRES: 'tensor' must have the same MemoryType as
   // output_memory_types[index]. See comment above.
-  Status set_output(absl::string_view name, const Tensor& tensor);
+  Status set_output(StringPiece name, const Tensor& tensor);
 
   // To output a reference.  Caller retains ownership of mu and tensor_for_ref,
   // and they must outlive all uses within the step. See comment above.
   // REQUIRES: IsRefType(expected_output_dtype(index))
-  Status set_output_ref(absl::string_view name, mutex* mu,
-                        Tensor* tensor_for_ref);
+  Status set_output_ref(StringPiece name, mutex* mu, Tensor* tensor_for_ref);
 
   // Returns nullptr if allocate_output() or set_output() have not been called.
-  Status mutable_output(absl::string_view name, Tensor** tensor);
+  Status mutable_output(StringPiece name, Tensor** tensor);
 
   // Records device specific state about how the input tensors were
   // computed.
@@ -1227,7 +1224,7 @@ Status SupportedDeviceTypesForNode(
 
 // Returns a message with a description of the kernels registered for op
 // `op_name`.
-string KernelsRegisteredForOp(absl::string_view op_name);
+string KernelsRegisteredForOp(StringPiece op_name);
 
 // Call once after Op registration has completed.
 Status ValidateKernelRegistrations(const OpRegistryInterface& op_registry);
@@ -1325,7 +1322,7 @@ KernelList GetFilteredRegisteredKernels(
     const std::function<bool(const KernelDef&)>& predicate);
 
 // Gets a list of all registered kernels for a given op
-KernelList GetRegisteredKernelsForOp(absl::string_view op_name);
+KernelList GetRegisteredKernelsForOp(StringPiece op_name);
 
 namespace kernel_factory {
 
@@ -1333,8 +1330,8 @@ class OpKernelRegistrar {
  public:
   typedef OpKernel* (*Factory)(OpKernelConstruction*);
 
-  OpKernelRegistrar(const KernelDef* kernel_def,
-                    absl::string_view kernel_class_name, Factory factory) {
+  OpKernelRegistrar(const KernelDef* kernel_def, StringPiece kernel_class_name,
+                    Factory factory) {
     // Perform the check in the header to allow compile-time optimization
     // to a no-op, allowing the linker to remove the kernel symbols.
     if (kernel_def != nullptr) {
@@ -1343,8 +1340,8 @@ class OpKernelRegistrar {
   }
 
  private:
-  void InitInternal(const KernelDef* kernel_def,
-                    absl::string_view kernel_class_name, Factory factory);
+  void InitInternal(const KernelDef* kernel_def, StringPiece kernel_class_name,
+                    Factory factory);
 };
 
 }  // namespace kernel_factory
@@ -1353,8 +1350,7 @@ class OpKernelRegistrar {
 // Template and inline method implementations, please ignore
 
 template <class T>
-Status OpKernelConstruction::GetAttr(absl::string_view attr_name,
-                                     T* value) const {
+Status OpKernelConstruction::GetAttr(StringPiece attr_name, T* value) const {
   return GetNodeAttr(def(), attr_name, value);
 }
 
@@ -1457,10 +1453,9 @@ inline Status OpKernelContext::forward_input_or_allocate_output(
 }
 
 inline Status OpKernelContext::forward_input_or_allocate_output(
-    gtl::ArraySlice<absl::string_view> candidate_input_names,
-    absl::string_view output_name, const TensorShape& output_shape,
-    Tensor** output) {
-  for (const absl::string_view& input_name : candidate_input_names) {
+    gtl::ArraySlice<StringPiece> candidate_input_names, StringPiece output_name,
+    const TensorShape& output_shape, Tensor** output) {
+  for (const StringPiece& input_name : candidate_input_names) {
     if (forward_input_to_output_with_shape(input_name, output_name,
                                            output_shape, output)
             .ok()) {
diff --git a/tensorflow/core/framework/rendezvous.cc b/tensorflow/core/framework/rendezvous.cc
index 3d14f221a3..e84143f1b9 100644
--- a/tensorflow/core/framework/rendezvous.cc
+++ b/tensorflow/core/framework/rendezvous.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
@@ -37,15 +36,15 @@ namespace tensorflow {
 Rendezvous::ParsedKey& Rendezvous::ParsedKey::operator=(const ParsedKey& b) {
   const char* b_base = b.buf_.data();
   buf_ = b.buf_;
-  src_device = absl::string_view(buf_.data() + (b.src_device.data() - b_base),
-                                 b.src_device.size());
+  src_device = StringPiece(buf_.data() + (b.src_device.data() - b_base),
+                           b.src_device.size());
   src = b.src;
   src_incarnation = b.src_incarnation;
-  dst_device = absl::string_view(buf_.data() + (b.dst_device.data() - b_base),
-                                 b.dst_device.size());
+  dst_device = StringPiece(buf_.data() + (b.dst_device.data() - b_base),
+                           b.dst_device.size());
   dst = b.dst;
-  edge_name = absl::string_view(buf_.data() + (b.edge_name.data() - b_base),
-                                b.edge_name.size());
+  edge_name = StringPiece(buf_.data() + (b.edge_name.data() - b_base),
+                          b.edge_name.size());
   return *this;
 }
 
@@ -69,22 +68,22 @@ string Rendezvous::CreateKey(const string& src_device, uint64 src_incarnation,
 // Return the prefix of "*s" up to the next occurrence of "delim", or
 // the whole remaining string if "delim" is not found.  "*s" is advanced
 // past the string returned plus the delimiter (if found).
-static absl::string_view ConsumeNextPart(absl::string_view* s, char delim) {
+static StringPiece ConsumeNextPart(StringPiece* s, char delim) {
   for (size_t offset = 0; offset < s->size(); offset++) {
     if ((*s)[offset] == delim) {
-      absl::string_view result(s->data(), offset);
+      StringPiece result(s->data(), offset);
       s->remove_prefix(offset + 1);  // +1: remove delim, as well
       return result;
     }
   }
   // No delimiter found: return rest of string
-  absl::string_view result(s->data(), s->size());
+  StringPiece result(s->data(), s->size());
   s->remove_prefix(s->size());
   return result;
 }
 
 /* static */
-Status Rendezvous::ParseKey(absl::string_view key, ParsedKey* out) {
+Status Rendezvous::ParseKey(StringPiece key, ParsedKey* out) {
   if (key.data() == out->buf_.data()) {
     // Caller used our buf_ string directly, so we don't need to copy.  (The
     // SendOp and RecvOp implementations do this, for example).
@@ -94,8 +93,8 @@ Status Rendezvous::ParseKey(absl::string_view key, ParsedKey* out) {
     // for the lifetime of the ParsedKey object.
     out->buf_.assign(key.data(), key.size());
   }
-  absl::string_view s(out->buf_);
-  absl::string_view parts[5];
+  StringPiece s(out->buf_);
+  StringPiece parts[5];
   for (int i = 0; i < 5; i++) {
     parts[i] = ConsumeNextPart(&s, ';');
   }
@@ -105,9 +104,9 @@ Status Rendezvous::ParseKey(absl::string_view key, ParsedKey* out) {
       strings::HexStringToUint64(parts[1], &out->src_incarnation) &&
       DeviceNameUtils::ParseFullName(parts[2], &out->dst) &&
       !parts[3].empty()) {
-    out->src_device = absl::string_view(parts[0].data(), parts[0].size());
-    out->dst_device = absl::string_view(parts[2].data(), parts[2].size());
-    out->edge_name = absl::string_view(parts[3].data(), parts[3].size());
+    out->src_device = StringPiece(parts[0].data(), parts[0].size());
+    out->dst_device = StringPiece(parts[2].data(), parts[2].size());
+    out->edge_name = StringPiece(parts[3].data(), parts[3].size());
     return Status::OK();
   }
   return errors::InvalidArgument("Invalid  rendezvous key: ", key);
@@ -278,7 +277,7 @@ class LocalRendezvousImpl : public Rendezvous {
   };
 
   // We key the hash table by KeyHash of the Rendezvous::CreateKey string
-  static uint64 KeyHash(const absl::string_view& k) {
+  static uint64 KeyHash(const StringPiece& k) {
     return Hash64(k.data(), k.size());
   }
 
diff --git a/tensorflow/core/framework/rendezvous.h b/tensorflow/core/framework/rendezvous.h
index 1e796a443d..01e43e44e3 100644
--- a/tensorflow/core/framework/rendezvous.h
+++ b/tensorflow/core/framework/rendezvous.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/control_flow.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -61,18 +60,18 @@ class Rendezvous : public core::RefCounted {
   // Parses the key constructed by CreateKey and parse src/dst device
   // names into structures respectively.
   struct ParsedKey {
-    absl::string_view src_device;
+    StringPiece src_device;
     DeviceNameUtils::ParsedName src;
     uint64 src_incarnation = 0;
-    absl::string_view dst_device;
+    StringPiece dst_device;
     DeviceNameUtils::ParsedName dst;
-    absl::string_view edge_name;
+    StringPiece edge_name;
 
     ParsedKey() {}
     ParsedKey(const ParsedKey& b) { *this = b; }
 
     ParsedKey& operator=(const ParsedKey& b);
-    absl::string_view FullKey() const { return buf_; }
+    StringPiece FullKey() const { return buf_; }
 
    private:
     friend class Rendezvous;
@@ -80,7 +79,7 @@ class Rendezvous : public core::RefCounted {
     friend class RecvOp;
     string buf_;
   };
-  static Status ParseKey(absl::string_view key, ParsedKey* out);
+  static Status ParseKey(StringPiece key, ParsedKey* out);
 
   // The caller is a tensor producer and it sends a message (a tensor
   // "val" and a bool "is_dead") under the given "key".
diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc
index 31a5f8bd6d..508a8d3149 100644
--- a/tensorflow/core/framework/resource_mgr.cc
+++ b/tensorflow/core/framework/resource_mgr.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/resource_mgr.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_attributes.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -224,7 +223,7 @@ Status ResourceMgr::Cleanup(const string& container) {
   return Status::OK();
 }
 
-static bool IsValidContainerName(absl::string_view s) {
+static bool IsValidContainerName(StringPiece s) {
   using ::tensorflow::strings::Scanner;
   return Scanner(s)
       .One(Scanner::LETTER_DIGIT_DOT)
@@ -276,7 +275,7 @@ const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
   return ctx->input(input).flat<ResourceHandle>()(0);
 }
 
-Status HandleFromInput(OpKernelContext* ctx, absl::string_view input,
+Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
                        ResourceHandle* handle) {
   const Tensor* tensor;
   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h
index 703e2b6922..4a531648d9 100644
--- a/tensorflow/core/framework/resource_mgr.h
+++ b/tensorflow/core/framework/resource_mgr.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include <typeinfo>
 #include <unordered_map>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_handle.h"
@@ -245,7 +244,7 @@ ResourceHandle MakePerStepResourceHandle(OpKernelContext* ctx,
 
 // Returns a resource handle from a numbered op input.
 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input);
-Status HandleFromInput(OpKernelContext* ctx, absl::string_view input,
+Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
                        ResourceHandle* handle);
 
 // Create a resource pointed by a given resource handle.
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 174b2d6bdf..4dcc80680f 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/shape_inference.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb_text.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -173,7 +172,7 @@ Status InferenceContext::Run(
   return s;
 }
 
-Status InferenceContext::set_output(absl::string_view output_name,
+Status InferenceContext::set_output(StringPiece output_name,
                                     const std::vector<ShapeHandle>& shapes) {
   auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
@@ -192,7 +191,7 @@ Status InferenceContext::set_output(absl::string_view output_name,
   return Status::OK();
 }
 
-Status InferenceContext::input(absl::string_view input_name,
+Status InferenceContext::input(StringPiece input_name,
                                std::vector<ShapeHandle>* output) const {
   const auto result = input_name_map_.find(input_name);
   if (result == input_name_map_.end()) {
@@ -206,7 +205,7 @@ Status InferenceContext::input(absl::string_view input_name,
   return Status::OK();
 }
 
-Status InferenceContext::output(absl::string_view output_name,
+Status InferenceContext::output(StringPiece output_name,
                                 std::vector<ShapeHandle>* output) const {
   const auto result = output_name_map_.find(output_name);
   if (result == output_name_map_.end()) {
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 7fe27d1d9f..e3885b7d9e 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -289,8 +288,7 @@ class InferenceContext {
   void SetInput(int idx, ShapeHandle shape) { inputs_[idx] = shape; }
 
   ShapeHandle input(int64 idx) const { return inputs_[idx]; }
-  Status input(absl::string_view input_name,
-               std::vector<ShapeHandle>* output) const;
+  Status input(StringPiece input_name, std::vector<ShapeHandle>* output) const;
   int num_inputs() const { return inputs_.size(); }
 
   // Returns the input tensor at index <idx>, or nullptr if the input tensor is
@@ -327,12 +325,12 @@ class InferenceContext {
 
   ShapeHandle output(int64 idx) const { return outputs_.at(idx); }
   void set_output(int idx, ShapeHandle shape) { outputs_.at(idx) = shape; }
-  Status set_output(absl::string_view output_name,
+  Status set_output(StringPiece output_name,
                     const std::vector<ShapeHandle>& shapes);
 
   int num_outputs() const { return outputs_.size(); }
   ShapeHandle output(int idx) const { return outputs_.at(idx); }
-  Status output(absl::string_view output_name,
+  Status output(StringPiece output_name,
                 std::vector<ShapeHandle>* output) const;
 
   AttrSlice attrs() const { return AttrSlice(*node_def_); }
@@ -527,7 +525,7 @@ class InferenceContext {
   // set *value to its value.  If no attr with attr_name is found in def(), or
   // the attr does not have a matching type, a non-ok status will be returned.
   template <class T>
-  Status GetAttr(absl::string_view attr_name, T* value) const;
+  Status GetAttr(StringPiece attr_name, T* value) const;
 
   // Returns in <out> the result of dividing <dividend> by <divisor>.
   // Returns an error if <divisor>  is not positive or if <evenly_divisible>
@@ -818,7 +816,7 @@ inline DimensionOrConstant::DimensionOrConstant(int64 val) : val(val) {
 }
 
 template <class T>
-Status InferenceContext::GetAttr(absl::string_view attr_name, T* value) const {
+Status InferenceContext::GetAttr(StringPiece attr_name, T* value) const {
   return GetNodeAttr(*node_def_, attr_name, value);
 }
 
diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc
index 214a4aee44..b54dd220ab 100644
--- a/tensorflow/core/framework/shape_inference_testutil.cc
+++ b/tensorflow/core/framework/shape_inference_testutil.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/shape_inference_testutil.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -87,7 +86,7 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
                    " but should list ", num_outputs);
   }
   for (int i = 0; i < num_outputs; ++i) {
-    absl::string_view expected(expected_outs_v[i]);
+    StringPiece expected(expected_outs_v[i]);
     shape_inference::ShapeHandle out = c.output(i);
 
     string err_prefix = strings::StrCat("Output ", i);
@@ -154,7 +153,7 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
     }
     for (int j = 0; j < expected_dims.size(); ++j) {
       err_prefix = strings::StrCat("Output dim ", i, ",", j);
-      absl::string_view expected_dim(expected_dims[j]);
+      StringPiece expected_dim(expected_dims[j]);
       DimensionHandle out_dim = c.Dim(out, j);
 
       std::pair<int, int> in_dim_idx(-1, -1);
@@ -246,7 +245,7 @@ Status ShapeInferenceTestutil::MakeShapeFromString(
       dims.push_back(manager->MakeDim(InferenceContext::kUnknownDim));
     } else {
       scanner.RestartCapture().Many(strings::Scanner::DIGIT);
-      absl::string_view match;
+      StringPiece match;
       int64 dim_size = 0;
 
       if (!scanner.GetResult(nullptr, &match) ||
diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h
index deba5bb6f1..bb4dc25da4 100644
--- a/tensorflow/core/framework/shape_inference_testutil.h
+++ b/tensorflow/core/framework/shape_inference_testutil.h
@@ -16,10 +16,10 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_SHAPE_INFERENCE_TESTUTIL_H_
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/version.h"
@@ -32,7 +32,7 @@ class Tensor;
 
 struct ShapeInferenceTestOp {
   typedef std::pair<string, DataType> ShapeAndType;
-  explicit ShapeInferenceTestOp(absl::string_view name) : name(string(name)) {}
+  explicit ShapeInferenceTestOp(StringPiece name) : name(string(name)) {}
   string name;
   NodeDef node_def;
   std::vector<const Tensor*> input_tensors;
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 50b39eb2ff..1dea6da911 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -29,7 +29,6 @@ limitations under the License.
 
 #include "tensorflow/core/framework/tensor.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/resource_handle.pb.h"
@@ -134,8 +133,8 @@ struct Helper {
   template <typename Destination>
   static void Encode(TensorBuffer* in, int64 n, Destination* out) {
     DCHECK_EQ(in->size(), sizeof(T) * n);
-    port::AssignRefCounted(
-        absl::string_view(in->base<const char>(), in->size()), in, out);
+    port::AssignRefCounted(StringPiece(in->base<const char>(), in->size()), in,
+                           out);
   }
 
   // Decoder of simple type T. Copy the bytes from "in" into the
@@ -1149,10 +1148,9 @@ string Tensor::SummarizeValue(int64 max_entries, bool print_v2) const {
   }
 }
 
-absl::string_view Tensor::tensor_data() const {
-  if (buf_ == nullptr)
-    return absl::string_view();  // Don't die for empty tensors
-  return absl::string_view(static_cast<char*>(buf_->data()), TotalBytes());
+StringPiece Tensor::tensor_data() const {
+  if (buf_ == nullptr) return StringPiece();  // Don't die for empty tensors
+  return StringPiece(static_cast<char*>(buf_->data()), TotalBytes());
 }
 
 bool Tensor::SharesBufferWith(const Tensor& b) const {
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index c753b12698..d0f9eb56e2 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_
 #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_H_
 
-#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -25,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
@@ -469,7 +469,7 @@ class Tensor {
   /// not get destroyed while the `StringPiece` is still used.
   ///
   /// REQUIRES: `DataTypeCanUseMemcpy(dtype())`.
-  absl::string_view tensor_data() const;
+  StringPiece tensor_data() const;
 
   /// Copy the other tensor into this tensor and reshape it and reinterpret the
   /// buffer's datatype.
diff --git a/tensorflow/core/framework/tensor_util.cc b/tensorflow/core/framework/tensor_util.cc
index 37ab67fcbc..65f6dc1c00 100644
--- a/tensorflow/core/framework/tensor_util.cc
+++ b/tensorflow/core/framework/tensor_util.cc
@@ -16,9 +16,9 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace tensor {
@@ -27,12 +27,12 @@ Tensor DeepCopy(const Tensor& other) {
   Tensor tmp = Tensor(other.dtype(), other.shape());
   if (DataTypeCanUseMemcpy(other.dtype())) {
     if (other.NumElements() > 0) {
-      absl::string_view other_data = other.tensor_data();
+      StringPiece other_data = other.tensor_data();
 
       // We use StringPiece as a convenient map over the tensor buffer,
       // but we cast the type to get to the underlying buffer to do the
       // copy.
-      absl::string_view tmp_data = tmp.tensor_data();
+      StringPiece tmp_data = tmp.tensor_data();
       memcpy(const_cast<char*>(tmp_data.data()), other_data.data(),
              other_data.size());
     }
@@ -72,12 +72,12 @@ Status Concat(const gtl::ArraySlice<Tensor>& tensors, Tensor* result) {
   // We use StringPiece as a convenient map over the tensor buffer,
   // but we cast the type to get to the underlying buffer to do the
   // copy.
-  absl::string_view to_data = result->tensor_data();
+  StringPiece to_data = result->tensor_data();
 
   if (DataTypeCanUseMemcpy(dtype)) {
     int64 offset = 0;
     for (const Tensor& tensor : tensors) {
-      absl::string_view from_data = tensor.tensor_data();
+      StringPiece from_data = tensor.tensor_data();
       CHECK_LE(offset + from_data.size(), to_data.size());
       memcpy(const_cast<char*>(to_data.data()) + offset, from_data.data(),
              from_data.size());
@@ -121,7 +121,7 @@ Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
         "'tensor'");
   }
 
-  absl::string_view from_data = tensor.tensor_data();
+  StringPiece from_data = tensor.tensor_data();
 
   if (DataTypeCanUseMemcpy(tensor.dtype())) {
     int64 offset = 0;
@@ -134,7 +134,7 @@ Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
       // We use StringPiece as a convenient map over the tensor buffer,
       // but we cast the type to get to the underlying buffer to do the
       // copy.
-      absl::string_view to_data = split->tensor_data();
+      StringPiece to_data = split->tensor_data();
       CHECK_LE(offset + to_data.size(), from_data.size());
       memcpy(const_cast<char*>(to_data.data()), from_data.data() + offset,
              to_data.size());
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index 944555f683..2280114de5 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/types.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -114,7 +113,7 @@ string DataTypeString(DataType dtype) {
   return DataTypeStringInternal(dtype);
 }
 
-bool DataTypeFromString(absl::string_view sp, DataType* dt) {
+bool DataTypeFromString(StringPiece sp, DataType* dt) {
   if (str_util::EndsWith(sp, "_ref")) {
     sp.remove_suffix(4);
     DataType non_ref;
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index e07877aa09..2e96b05787 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -20,7 +20,6 @@ limitations under the License.
 #include <set>
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 // Disable clang-format to prevent 'FixedPoint' header from being included
 // before 'Tensor' header on which it depends.
@@ -31,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
@@ -56,8 +56,7 @@ class DeviceType {
   DeviceType(const char* type)  // NOLINT(runtime/explicit)
       : type_(type) {}
 
-  explicit DeviceType(absl::string_view type)
-      : type_(type.data(), type.size()) {}
+  explicit DeviceType(StringPiece type) : type_(type.data(), type.size()) {}
 
   const char* type() const { return type_.c_str(); }
   const string& type_string() const { return type_; }
@@ -229,7 +228,7 @@ class DataTypeSet {
 
 // If "sp" names a valid type, store it in "*dt" and return true.  Otherwise,
 // return false.
-bool DataTypeFromString(absl::string_view sp, DataType* dt);
+bool DataTypeFromString(StringPiece sp, DataType* dt);
 
 constexpr inline DataTypeSet ToSet(DataType dt) {
   return DataTypeSet(1u << static_cast<uint32>(dt));
diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc
index f089114919..ef5b240aea 100644
--- a/tensorflow/core/framework/variant_op_registry.cc
+++ b/tensorflow/core/framework/variant_op_registry.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/type_index.h"
 #include "tensorflow/core/framework/variant.h"
@@ -90,7 +89,7 @@ REGISTER_VARIANT_SHAPE_TYPE(double);
 #undef REGISTER_VARIANT_SHAPE_TYPE
 
 UnaryVariantOpRegistry::VariantDecodeFn* UnaryVariantOpRegistry::GetDecodeFn(
-    absl::string_view type_name) {
+    StringPiece type_name) {
   auto found = decode_fns.find(type_name);
   if (found == decode_fns.end()) return nullptr;
   return &found->second;
@@ -103,7 +102,7 @@ void UnaryVariantOpRegistry::RegisterDecodeFn(
   CHECK_EQ(existing, nullptr)
       << "Unary VariantDecodeFn for type_name: " << type_name
       << " already registered";
-  decode_fns.insert(std::pair<absl::string_view, VariantDecodeFn>(
+  decode_fns.insert(std::pair<StringPiece, VariantDecodeFn>(
       GetPersistentStringPiece(type_name), decode_fn));
 }
 
@@ -180,7 +179,7 @@ Status VariantDeviceCopy(
 
 // Special casing UnaryOpFn per op and per device.
 UnaryVariantOpRegistry::VariantUnaryOpFn* UnaryVariantOpRegistry::GetUnaryOpFn(
-    VariantUnaryOp op, absl::string_view device, const TypeIndex& type_index) {
+    VariantUnaryOp op, StringPiece device, const TypeIndex& type_index) {
   auto found = unary_op_fns.find({op, device, type_index});
   if (found == unary_op_fns.end()) return nullptr;
   return &found->second;
@@ -222,8 +221,7 @@ REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool);
 
 // Special casing BinaryOpFn per op and per device.
 UnaryVariantOpRegistry::VariantBinaryOpFn*
-UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op,
-                                      absl::string_view device,
+UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op, StringPiece device,
                                       const TypeIndex& type_index) {
   auto found = binary_op_fns.find({op, device, type_index});
   if (found == binary_op_fns.end()) return nullptr;
diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h
index dbbd6cebfc..7eb37e859f 100644
--- a/tensorflow/core/framework/variant_op_registry.h
+++ b/tensorflow/core/framework/variant_op_registry.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 #include <unordered_set>
 #include <vector>
-#include "absl/strings/string_view.h"
 
 #define EIGEN_USE_THREADS
 
@@ -106,7 +105,7 @@ class UnaryVariantOpRegistry {
                         const VariantDecodeFn& decode_fn);
 
   // Returns nullptr if no decode function was found for the given TypeName.
-  VariantDecodeFn* GetDecodeFn(absl::string_view type_name);
+  VariantDecodeFn* GetDecodeFn(StringPiece type_name);
 
   // Add a copy-to-GPU function to the registry.
   void RegisterDeviceCopyFn(const VariantDeviceCopyDirection direction,
@@ -125,7 +124,7 @@ class UnaryVariantOpRegistry {
 
   // Returns nullptr if no unary op function was found for the given
   // op, device, and TypeName.
-  VariantUnaryOpFn* GetUnaryOpFn(VariantUnaryOp op, absl::string_view device,
+  VariantUnaryOpFn* GetUnaryOpFn(VariantUnaryOp op, StringPiece device,
                                  const TypeIndex& type_index);
 
   // Add a binary op function to the registry.
@@ -135,7 +134,7 @@ class UnaryVariantOpRegistry {
 
   // Returns nullptr if no binary op function was found for the given
   // op, device and TypeName.
-  VariantBinaryOpFn* GetBinaryOpFn(VariantBinaryOp op, absl::string_view device,
+  VariantBinaryOpFn* GetBinaryOpFn(VariantBinaryOp op, StringPiece device,
                                    const TypeIndex& type_index);
 
   // Get a pointer to a global UnaryVariantOpRegistry object
@@ -156,8 +155,7 @@ class UnaryVariantOpRegistry {
   };
 
   gtl::FlatMap<TypeIndex, VariantShapeFn, TypeIndexHash> shape_fns;
-  gtl::FlatMap<absl::string_view, VariantDecodeFn, StringPieceHasher>
-      decode_fns;
+  gtl::FlatMap<StringPiece, VariantDecodeFn, StringPieceHasher> decode_fns;
 
   // Map std::pair<Direction, type_name> to function.
   struct PairHash {
@@ -181,11 +179,10 @@ class UnaryVariantOpRegistry {
   // and references therein
   template <typename Op>
   struct FuncTuple {
-    FuncTuple(const Op& op, const absl::string_view& dev,
-              const TypeIndex& type_index)
+    FuncTuple(const Op& op, const StringPiece& dev, const TypeIndex& type_index)
         : op_type_(op), device_(dev), type_index_(type_index) {}
     Op op_type_;
-    absl::string_view device_;
+    StringPiece device_;
     TypeIndex type_index_;
   };
   // friend declaration for operator==
@@ -195,7 +192,7 @@ class UnaryVariantOpRegistry {
   struct TupleHash {
     template <typename Op>
     std::size_t operator()(
-        const std::tuple<Op, absl::string_view, TypeIndex>& x) const {
+        const std::tuple<Op, StringPiece, TypeIndex>& x) const {
       // The hash of an enum is just its value as a std::size_t.
       std::size_t ret = static_cast<std::size_t>(std::get<0>(x));
       ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x)));
@@ -221,14 +218,14 @@ class UnaryVariantOpRegistry {
   // Find or insert a string into a persistent string storage
   // container; return the StringPiece pointing to the permanent string
   // location.
-  static absl::string_view GetPersistentStringPiece(const string& str) {
+  static StringPiece GetPersistentStringPiece(const string& str) {
     const auto string_storage = PersistentStringStorage();
     auto found = string_storage->find(str);
     if (found == string_storage->end()) {
       auto inserted = string_storage->insert(str);
-      return absl::string_view(*inserted.first);
+      return StringPiece(*inserted.first);
     } else {
-      return absl::string_view(*found);
+      return StringPiece(*found);
     }
   }
 };
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index 102cbb18b2..2d94dd5cdc 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -19,18 +19,18 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/cost_graph.pb.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
-typedef std::unordered_map<absl::string_view, int32, StringPieceHasher>
+typedef std::unordered_map<StringPiece, int32, StringPieceHasher>
     NodeNameToCostIdMap;
 
 class StepStats;
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 91b0cdab8e..bc0a6ae346 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -550,7 +549,7 @@ Status Graph::AddFunctionLibrary(const FunctionDefLibrary& fdef_lib) {
 
 namespace {
 
-void AddInput(NodeDef* dst, absl::string_view src_name, int src_slot) {
+void AddInput(NodeDef* dst, StringPiece src_name, int src_slot) {
   if (src_slot == Graph::kControlSlot) {
     dst->add_input(strings::StrCat("^", src_name));
   } else if (src_slot == 0) {
@@ -636,7 +635,7 @@ void Graph::ToGraphDefSubRange(GraphDef* graph_def, int from_node_id) const {
   }
 }
 
-string Graph::NewName(absl::string_view prefix) {
+string Graph::NewName(StringPiece prefix) {
   return strings::StrCat(prefix, "/_", name_counter_++);
 }
 
@@ -730,7 +729,7 @@ int Graph::InternDeviceName(const string& device_name) {
   return index;
 }
 
-Status Graph::AddWhileContext(absl::string_view frame_name,
+Status Graph::AddWhileContext(StringPiece frame_name,
                               std::vector<Node*> enter_nodes,
                               std::vector<Node*> exit_nodes,
                               OutputTensor cond_output,
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 1993d073de..027ab522ed 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -40,7 +40,6 @@ limitations under the License.
 #include <functional>
 #include <string>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
@@ -535,7 +534,7 @@ class Graph {
 
   // Generate new node name with the specified prefix that is unique
   // across this graph.
-  string NewName(absl::string_view prefix);
+  string NewName(StringPiece prefix);
 
   // Access to the list of all nodes.  Example usage:
   //   for (Node* node : graph.nodes()) { ... }
@@ -608,8 +607,7 @@ class Graph {
   // Create and return a new WhileContext owned by this graph. This is called
   // when a new while loop is created. `frame_name` must be unique among
   // WhileContexts in this graph.
-  Status AddWhileContext(absl::string_view frame_name,
-                         std::vector<Node*> enter_nodes,
+  Status AddWhileContext(StringPiece frame_name, std::vector<Node*> enter_nodes,
                          std::vector<Node*> exit_nodes,
                          OutputTensor cond_output,
                          std::vector<OutputTensor> body_inputs,
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index d70fb6a182..eeb5c14eaa 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -54,7 +53,7 @@ inline bool IsNextIteration(const NodeDef& node_def) {
          node_def.op() == "RefNextIteration";
 }
 
-bool IsValidNodeName(absl::string_view s, bool allow_internal_ops) {
+bool IsValidNodeName(StringPiece s, bool allow_internal_ops) {
   using ::tensorflow::strings::Scanner;
   return Scanner(s)
       .One(allow_internal_ops ? Scanner::LETTER_DIGIT_DOT_UNDERSCORE
@@ -218,15 +217,15 @@ class GraphConstructor {
 
   // Returns true if `name` already exists in `g_` (either as a node name or
   // prefix).
-  bool NameExistsInGraph(absl::string_view name);
+  bool NameExistsInGraph(StringPiece name);
 
   // Returns true if `name` already exists in the GraphDef being imported
   // (either as a node name or prefix).
-  bool NameExistsInGraphDef(absl::string_view name);
+  bool NameExistsInGraphDef(StringPiece name);
 
   // Returns a unique version of `original_name`, or `original_name` if it's
   // already unique in the graph.
-  string FindUniqueName(absl::string_view original_name);
+  string FindUniqueName(StringPiece original_name);
 
   // Decrement pending count for users of `processed` and add the ones that now
   // have all of their pending inputs satisfied to `ready_`.
@@ -268,18 +267,16 @@ class GraphConstructor {
   };
   // TODO(vrv): Profile this data structure to see if we should use an
   // alternative implementation of std::unordered_map.
-  std::unordered_map<absl::string_view, NodeInfo, StringPieceHasher>
-      gdef_nodes_;
+  std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
 
   // Prefixes already used in the GraphDef being imported.
-  std::unordered_set<absl::string_view, StringPieceHasher> gdef_prefixes_;
+  std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
 
   // Mapping from node name to the existing node in g_.
-  std::unordered_map<absl::string_view, Node*, StringPieceHasher>
-      existing_nodes_;
+  std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
 
   // Prefixes already used in the graph.
-  std::unordered_set<absl::string_view, StringPieceHasher> existing_prefixes_;
+  std::unordered_set<StringPiece, StringPieceHasher> existing_prefixes_;
 
   // Imported node names that have been uniquified. The key is the original
   // name, the value is the new unique name.
@@ -344,7 +341,7 @@ void GraphConstructor::UpdatePendingCountAndReady(int processed) {
 // This could be expensive but we don't expect to call it often, if at all (only
 // if there are multiple nodes in g_ with the same name)
 bool NodeNameInValues(const std::map<TensorId, TensorId>& input_map,
-                      const absl::string_view& node_name) {
+                      const StringPiece& node_name) {
   for (auto iter = input_map.begin(); iter != input_map.end(); ++iter) {
     if (iter->second.first == node_name) return true;
   }
@@ -352,18 +349,17 @@ bool NodeNameInValues(const std::map<TensorId, TensorId>& input_map,
 }
 
 bool NodeNameInValues(const std::vector<string>& control_dependencies,
-                      const absl::string_view& node_name) {
+                      const StringPiece& node_name) {
   return std::find(control_dependencies.begin(), control_dependencies.end(),
                    node_name) != control_dependencies.end();
 }
 
 // Adds any prefixes of `node_name` (not including the full name itself) to
 // `prefixes`.
-void AddPrefixes(
-    absl::string_view node_name,
-    std::unordered_set<absl::string_view, StringPieceHasher>* prefixes) {
+void AddPrefixes(StringPiece node_name,
+                 std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
   size_t idx = -1;
-  while ((idx = node_name.find('/', idx + 1)) != absl::string_view::npos) {
+  while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
     prefixes->insert(node_name.substr(0, idx));
   }
 }
@@ -397,7 +393,7 @@ Status GraphConstructor::EnsureNoNameCollisions() {
       }
     }
   } else if (!prefix_.empty()) {
-    absl::string_view prefix_no_slash(prefix_);
+    StringPiece prefix_no_slash(prefix_);
     prefix_no_slash.remove_suffix(1);
     if (!IsValidNodeName(prefix_no_slash, false)) {
       return errors::InvalidArgument("Imported node name prefix '", prefix_,
@@ -447,8 +443,7 @@ Status GraphConstructor::BuildNodeIndex() {
           "': Node name contains invalid characters");
     }
     if (!gdef_nodes_
-             .insert(std::make_pair(absl::string_view(node_def.name()),
-                                    NodeInfo(n)))
+             .insert(std::make_pair(StringPiece(node_def.name()), NodeInfo(n)))
              .second) {
       return errors::InvalidArgument("Node '", node_def.name(),
                                      "' is not unique");
@@ -465,7 +460,7 @@ Status GraphConstructor::BuildNodeIndex() {
     // Validate control edges at end
     bool in_control_dependence = false;
     for (int i = 0; i < node_def.input_size(); ++i) {
-      absl::string_view input_name = node_def.input(i);
+      StringPiece input_name = node_def.input(i);
       if (!input_name.empty() && str_util::StartsWith(input_name, "^")) {
         in_control_dependence = true;
       } else if (in_control_dependence) {
@@ -513,7 +508,7 @@ Status GraphConstructor::InitFromEdges() {
       int32 num_control_edges = 0;
       bool has_loop_back_edge = false;
       for (int i = 0; i < node_def.input_size(); ++i) {
-        absl::string_view input_name(node_def.input(i));
+        StringPiece input_name(node_def.input(i));
         if (str_util::StartsWith(input_name, "^")) {
           num_control_edges++;
         } else {
@@ -529,7 +524,7 @@ Status GraphConstructor::InitFromEdges() {
       }
     }
     for (int i = 0; i < node_def.input_size(); ++i) {
-      absl::string_view input_name = node_def.input(i);
+      StringPiece input_name = node_def.input(i);
       TensorId id(ParseTensorName(input_name));
       if (opts_.input_map.count(id) == 0) {
         // If an input is not mapped, then the input should appear in the graph
@@ -563,7 +558,7 @@ Status GraphConstructor::ValidateColocationConstraints(
   const auto iter = node_def.attr().find(kColocationAttrName);
   if (iter == node_def.attr().end()) return Status::OK();
   for (const string& c : iter->second.list().s()) {
-    absl::string_view s(c);
+    StringPiece s(c);
     if (str_util::ConsumePrefix(&s, kColocationGroupPrefix) &&
         gdef_nodes_.find(s) == gdef_nodes_.end()) {
       return errors::InvalidArgument(
@@ -802,7 +797,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     // Skip remapped inputs (which already exist in g_ and are not being
     // imported).
     if (input_already_exists[i]) continue;
-    absl::string_view input(node_def->input(i));
+    StringPiece input(node_def->input(i));
     if (str_util::ConsumePrefix(&input, "^")) {
       node_def->set_input(i, strings::StrCat("^", prefix_, input));
     } else {
@@ -814,7 +809,7 @@ void GraphConstructor::AddPrefixToNodeDef(
     auto* list =
         node_def->mutable_attr()->at(kColocationAttrName).mutable_list();
     for (int i = 0; i < list->s_size(); ++i) {
-      absl::string_view v(list->s(i));
+      StringPiece v(list->s(i));
       if (str_util::ConsumePrefix(&v, kColocationGroupPrefix)) {
         list->set_s(i, strings::StrCat(kColocationGroupPrefix, prefix_, v));
       }
@@ -857,7 +852,7 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
     if (!status.ok()) continue;
     bool updated = false;
     for (int i = 0; i < coloc_values.size(); ++i) {
-      absl::string_view val(coloc_values[i]);
+      StringPiece val(coloc_values[i]);
       if (str_util::ConsumePrefix(&val, kColocationGroupPrefix)) {
         const auto& name_pair = uniquified_names_.find(string(val));
         if (name_pair == uniquified_names_.end()) continue;
@@ -872,19 +867,19 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
   }
 }
 
-bool GraphConstructor::NameExistsInGraph(absl::string_view name) {
+bool GraphConstructor::NameExistsInGraph(StringPiece name) {
   if (existing_nodes_.find(name) != existing_nodes_.end()) return true;
   if (existing_prefixes_.find(name) != existing_prefixes_.end()) return true;
   return false;
 }
 
-bool GraphConstructor::NameExistsInGraphDef(absl::string_view name) {
+bool GraphConstructor::NameExistsInGraphDef(StringPiece name) {
   if (gdef_nodes_.find(name) != gdef_nodes_.end()) return true;
   if (gdef_prefixes_.find(name) != gdef_prefixes_.end()) return true;
   return false;
 }
 
-string GraphConstructor::FindUniqueName(absl::string_view original_name) {
+string GraphConstructor::FindUniqueName(StringPiece original_name) {
   string name(original_name);
   int count = 0;
   // Check that any generated names don't collide with imported NodeDefs (as
@@ -1137,7 +1132,7 @@ Status GraphConstructor::PopulateReturnTensors() {
 
 Status GraphConstructor::PopulateReturnNodes() {
   if (opts_.return_nodes.empty()) return Status::OK();
-  for (absl::string_view name : opts_.return_nodes) {
+  for (StringPiece name : opts_.return_nodes) {
     auto iter = gdef_nodes_.find(name);
     if (iter == gdef_nodes_.end()) {
       return errors::InvalidArgument("Requested return node '", name,
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc
index 395e88a59d..3eef6bd2bd 100644
--- a/tensorflow/core/graph/graph_constructor_test.cc
+++ b/tensorflow/core/graph/graph_constructor_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/graph/graph_constructor.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -156,7 +155,7 @@ class GraphConstructorTest : public ::testing::Test {
              "value for the _class attribute. Update it and its callers";
       return "";
     }
-    absl::string_view loc(value[0]);
+    StringPiece loc(value[0]);
     return str_util::ConsumePrefix(&loc, kColocationGroupPrefix) ? string(loc)
                                                                  : "";
   }
diff --git a/tensorflow/core/graph/graph_def_builder.cc b/tensorflow/core/graph/graph_def_builder.cc
index 02256ae6f9..6d5df7efba 100644
--- a/tensorflow/core/graph/graph_def_builder.cc
+++ b/tensorflow/core/graph/graph_def_builder.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/errors.h"
 
@@ -28,11 +27,11 @@ GraphDefBuilder::Options::Options(Graph* graph, Status* status)
 GraphDefBuilder::Options::~Options() {}
 
 GraphDefBuilder::Options GraphDefBuilder::Options::WithName(
-    absl::string_view name) const {
+    StringPiece name) const {
   return Options(*this).WithNameImpl(name);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithDevice(
-    absl::string_view device) const {
+    StringPiece device) const {
   return Options(*this).WithDeviceImpl(device);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInput(
@@ -44,12 +43,12 @@ GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInputs(
   return Options(*this).WithControlInputsImpl(control_inputs);
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithNameImpl(
-    absl::string_view name) {
+    StringPiece name) {
   name_ = string(name);
   return *this;
 }
 GraphDefBuilder::Options GraphDefBuilder::Options::WithDeviceImpl(
-    absl::string_view device) {
+    StringPiece device) {
   device_ = string(device);
   return *this;
 }
@@ -72,7 +71,7 @@ Status GraphDefBuilder::ToGraphDef(GraphDef* graph_def) const {
   return status_;
 }
 
-string GraphDefBuilder::Options::GetNameForOp(absl::string_view op) const {
+string GraphDefBuilder::Options::GetNameForOp(StringPiece op) const {
   if (name_.empty()) return graph_->NewName(op);
   return name_;
 }
diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h
index e0528742d4..400d8b6c84 100644
--- a/tensorflow/core/graph/graph_def_builder.h
+++ b/tensorflow/core/graph/graph_def_builder.h
@@ -17,12 +17,12 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPH_GRAPH_DEF_BUILDER_H_
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -76,19 +76,19 @@ class GraphDefBuilder {
 
     // Methods for setting options.  These are const methods: they
     // return a copy of *this with the option set.
-    Options WithName(absl::string_view name) const;
-    Options WithDevice(absl::string_view device) const;
+    Options WithName(StringPiece name) const;
+    Options WithDevice(StringPiece device) const;
     Options WithControlInput(Node* control_input) const;
     Options WithControlInputs(gtl::ArraySlice<Node*> control_inputs) const;
 
     // Override the default value for an optional attr.
     template <class T>
-    Options WithAttr(absl::string_view attr_name, T&& value) const {
+    Options WithAttr(StringPiece attr_name, T&& value) const {
       return Options(*this).WithAttrImpl(attr_name, std::forward<T>(value));
     }
     // Note: overload needed to allow {...} expressions for value.
     template <class T>
-    Options WithAttr(absl::string_view attr_name,
+    Options WithAttr(StringPiece attr_name,
                      std::initializer_list<T> value) const {
       return WithAttr<std::initializer_list<T>>(attr_name, std::move(value));
     }
@@ -106,7 +106,7 @@ class GraphDefBuilder {
     // Given the Op type name, return a name for a node of that type.
     // Uses the value set in WithName() if that has been called.  Otherwise,
     // returns a name built out of the Op type name.
-    string GetNameForOp(absl::string_view op) const;
+    string GetNameForOp(StringPiece op) const;
 
     // Sets the device, adds control inputs, adds attrs, and calls Finalize().
     // If Finalize returns an error, it is saved and this function returns
@@ -122,12 +122,12 @@ class GraphDefBuilder {
     }
 
    private:
-    Options WithNameImpl(absl::string_view name);
-    Options WithDeviceImpl(absl::string_view device);
+    Options WithNameImpl(StringPiece name);
+    Options WithDeviceImpl(StringPiece device);
     Options WithControlInputImpl(Node* control_input);
     Options WithControlInputsImpl(gtl::ArraySlice<Node*> control_inputs);
     template <class T>
-    Options WithAttrImpl(absl::string_view name, T&& value) {
+    Options WithAttrImpl(StringPiece name, T&& value) {
       attrs_.emplace_back(string(name), AttrValue());
       SetAttrValue(std::forward<T>(value), &attrs_.back().second);
       return *this;
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 9446e7cf6b..1dbcebab59 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/memory_types.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -156,7 +155,7 @@ bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) {
 
 // Add an input to dst that comes from the "src_slot" output of the
 // node named by "src_name".
-void AddInput(NodeDef* dst, absl::string_view src_name, int src_slot) {
+void AddInput(NodeDef* dst, StringPiece src_name, int src_slot) {
   if (src_slot == Graph::kControlSlot) {
     dst->add_input(strings::StrCat("^", src_name));
   } else if (src_slot == 0) {
@@ -912,7 +911,7 @@ Status AddControlEdges(const PartitionOptions& opts,
 // If 'ndef' is a Send or Recv, fills its attr send_device_incarnation
 // if possible.
 void SetIncarnation(const PartitionOptions& opts, NodeDef* ndef) {
-  absl::string_view op(ndef->op());
+  StringPiece op(ndef->op());
   if (op != "_Send" && op != "_Recv") {
     // Not related to send/recv.
     return;
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index 5f5587cef6..68a20fcc5f 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/graph/node_builder.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -30,17 +29,17 @@ NodeBuilder::NodeOut::NodeOut(Node* n, int32 i)  // NOLINT(runtime/explicit)
       index(i),
       dt(SafeGetOutput(node, i, &error)) {}
 
-NodeBuilder::NodeOut::NodeOut(absl::string_view n, int32 i, DataType t)
+NodeBuilder::NodeOut::NodeOut(StringPiece n, int32 i, DataType t)
     : node(nullptr), error(false), name(n), index(i), dt(t) {}
 
 NodeBuilder::NodeOut::NodeOut()
     : node(nullptr), error(true), index(0), dt(DT_FLOAT) {}
 
-NodeBuilder::NodeBuilder(absl::string_view name, absl::string_view op_name,
+NodeBuilder::NodeBuilder(StringPiece name, StringPiece op_name,
                          const OpRegistryInterface* op_registry)
     : def_builder_(name, op_name, op_registry) {}
 
-NodeBuilder::NodeBuilder(absl::string_view name, const OpDef* op_def)
+NodeBuilder::NodeBuilder(StringPiece name, const OpDef* op_def)
     : def_builder_(name, op_def) {}
 
 NodeBuilder::NodeBuilder(const NodeDefBuilder& def_builder)
@@ -95,12 +94,12 @@ NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice<Node*> src_nodes) {
   return *this;
 }
 
-NodeBuilder& NodeBuilder::Device(absl::string_view device_spec) {
+NodeBuilder& NodeBuilder::Device(StringPiece device_spec) {
   def_builder_.Device(device_spec);
   return *this;
 }
 
-NodeBuilder& NodeBuilder::AssignedDevice(absl::string_view device) {
+NodeBuilder& NodeBuilder::AssignedDevice(StringPiece device) {
   assigned_device_ = string(device);
   return *this;
 }
diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h
index f2baea606a..d576985a23 100644
--- a/tensorflow/core/graph/node_builder.h
+++ b/tensorflow/core/graph/node_builder.h
@@ -17,12 +17,12 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPH_NODE_BUILDER_H_
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 
 namespace tensorflow {
@@ -55,7 +55,7 @@ class NodeBuilder {
     // useful when preparing a graph for ExtendSession or creating a
     // back edge to a node that hasn't been added to the graph yet,
     // but will be.
-    NodeOut(absl::string_view name, int32 i, DataType t);
+    NodeOut(StringPiece name, int32 i, DataType t);
 
     // Default constructor for std::vector<NodeOut>.
     NodeOut();
@@ -75,9 +75,9 @@ class NodeBuilder {
   // the Op plus a registry) for the Node.  Other fields are
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
-  NodeBuilder(absl::string_view name, absl::string_view op_name,
+  NodeBuilder(StringPiece name, StringPiece op_name,
               const OpRegistryInterface* op_registry = OpRegistry::Global());
-  NodeBuilder(absl::string_view name, const OpDef* op_def);
+  NodeBuilder(StringPiece name, const OpDef* op_def);
 
   // Create a NodeBuilder from an existing NodeDefBuilder.
   NodeBuilder(const NodeDefBuilder& def_builder);
@@ -98,10 +98,10 @@ class NodeBuilder {
 
   // Sets the "requested device spec" in the NodeDef (not the
   // "assigned device" in the Node).
-  NodeBuilder& Device(absl::string_view device_spec);
+  NodeBuilder& Device(StringPiece device_spec);
 
   // Sets the device name in the "assigned device" field in tensorflow::Node.
-  NodeBuilder& AssignedDevice(absl::string_view device);
+  NodeBuilder& AssignedDevice(StringPiece device);
 
   // Set the value of an attr.  attr_name must match the name of one of
   // attrs defined by the Op, and value must have the corresponding type
@@ -109,10 +109,9 @@ class NodeBuilder {
   // types for value).  Note that attrs will be set automatically if
   // they can be determined by the inputs.
   template <class T>
-  NodeBuilder& Attr(absl::string_view attr_name, T&& value);
+  NodeBuilder& Attr(StringPiece attr_name, T&& value);
   template <class T>
-  NodeBuilder& Attr(absl::string_view attr_name,
-                    std::initializer_list<T> value);
+  NodeBuilder& Attr(StringPiece attr_name, std::initializer_list<T> value);
 
   // Validates the described node and adds it to *graph, adding edges
   // for all (non-back) inputs.  If created_node is not nullptr,
@@ -151,13 +150,13 @@ class NodeBuilder {
 // IMPLEMENTATION -------------------------------------------------------------
 
 template <class T>
-NodeBuilder& NodeBuilder::Attr(absl::string_view attr_name, T&& value) {
+NodeBuilder& NodeBuilder::Attr(StringPiece attr_name, T&& value) {
   def_builder_.Attr(attr_name, std::forward<T>(value));
   return *this;
 }
 
 template <class T>
-NodeBuilder& NodeBuilder::Attr(absl::string_view attr_name,
+NodeBuilder& NodeBuilder::Attr(StringPiece attr_name,
                                std::initializer_list<T> value) {
   def_builder_.Attr(attr_name, value);
   return *this;
diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc
index 91ed6d6082..3b6e8cc233 100644
--- a/tensorflow/core/graph/quantize_training.cc
+++ b/tensorflow/core/graph/quantize_training.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/quantize_training.h"
 
 #include "tensorflow/core/common_runtime/executor.h"
@@ -154,7 +153,7 @@ Status FindSaveOp(const Graph* graph, Node** save_op,
   return Status::OK();
 }
 
-Node* FindRestoreAllOp(const Graph* graph, absl::string_view save_prefix) {
+Node* FindRestoreAllOp(const Graph* graph, StringPiece save_prefix) {
   for (Node* node : graph->op_nodes()) {
     // The restore_all op should have the same prefix of the save_op.
     if (node->name() == strings::StrCat(save_prefix, "/restore_all")) {
@@ -167,8 +166,8 @@ Node* FindRestoreAllOp(const Graph* graph, absl::string_view save_prefix) {
 // Strips the last "/suffix" from a name.
 // We use this to construct the name of restore ops in the same way they are
 // constructed by the Saver.
-absl::string_view GetNodeNamePrefix(const Node* node) {
-  absl::string_view name = node->name();
+StringPiece GetNodeNamePrefix(const Node* node) {
+  StringPiece name = node->name();
   return name.substr(0, name.rfind('/'));
 }
 
@@ -252,7 +251,7 @@ Status AddRestoreVariableSubgraphs(Graph* graph, Node* save_op,
                                    const std::vector<const Edge*>& in_edges,
                                    const std::vector<Node*>& variables) {
   Node* prefix_op = in_edges[0]->src();
-  absl::string_view name_prefix = GetNodeNamePrefix(save_op);
+  StringPiece name_prefix = GetNodeNamePrefix(save_op);
   Node* restore_all = FindRestoreAllOp(graph, name_prefix);
   if (restore_all == nullptr) {
     return errors::InvalidArgument("graph has SaveOp, but no restore_all NoOp");
diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc
index 96c5191a0a..60337e30aa 100644
--- a/tensorflow/core/graph/subgraph.cc
+++ b/tensorflow/core/graph/subgraph.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/types.h"
@@ -45,8 +44,7 @@ namespace subgraph {
 
 namespace {
 
-typedef std::unordered_map<absl::string_view, Node*, StringPieceHasher>
-    NameIndex;
+typedef std::unordered_map<StringPiece, Node*, StringPieceHasher> NameIndex;
 
 // Rewrite graph by replacing the output tensors specified in
 // "fed_outputs" with special feed nodes for each specified output
diff --git a/tensorflow/core/graph/subgraph_test.cc b/tensorflow/core/graph/subgraph_test.cc
index cb3e7771ae..6c014a8d44 100644
--- a/tensorflow/core/graph/subgraph_test.cc
+++ b/tensorflow/core/graph/subgraph_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/graph/graph.h"
@@ -313,7 +312,7 @@ TEST_F(SubgraphTest, ChainOfFools) {
   EXPECT_TRUE(HasEdge("e", 0, "_send_e_0", 0));
 }
 
-static bool HasSubstr(absl::string_view base, absl::string_view substr) {
+static bool HasSubstr(StringPiece base, StringPiece substr) {
   bool ok = str_util::StrContains(base, substr);
   EXPECT_TRUE(ok) << base << ", expected substring " << substr;
   return ok;
diff --git a/tensorflow/core/graph/tensor_id.cc b/tensorflow/core/graph/tensor_id.cc
index 3a8317d757..5a5b85e727 100644
--- a/tensorflow/core/graph/tensor_id.cc
+++ b/tensorflow/core/graph/tensor_id.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -28,10 +28,10 @@ SafeTensorId::SafeTensorId(const TensorId& id)
     : SafeTensorId(string(id.first), id.second) {}
 
 TensorId ParseTensorName(const string& name) {
-  return ParseTensorName(absl::string_view(name.data(), name.size()));
+  return ParseTensorName(StringPiece(name.data(), name.size()));
 }
 
-TensorId ParseTensorName(absl::string_view name) {
+TensorId ParseTensorName(StringPiece name) {
   // Parse either a name, ^name, or name:digits.  To do so, we go backwards from
   // the end of the string, skipping over a run of digits.  If we hit a ':'
   // character, then we know we are in the 'name:digits' regime.  Otherwise, we
@@ -49,11 +49,11 @@ TensorId ParseTensorName(absl::string_view name) {
   }
   TensorId id;
   if (p > base && *p == ':' && mul > 1) {
-    id.first = absl::string_view(base, p - base);
+    id.first = StringPiece(base, p - base);
     id.second = index;
   } else if (str_util::StartsWith(name, "^")) {
     // Control edge
-    id.first = absl::string_view(base + 1);
+    id.first = StringPiece(base + 1);
     id.second = Graph::kControlSlot;
   } else {
     id.first = name;
diff --git a/tensorflow/core/graph/tensor_id.h b/tensorflow/core/graph/tensor_id.h
index fb1d7f5082..0ba3942618 100644
--- a/tensorflow/core/graph/tensor_id.h
+++ b/tensorflow/core/graph/tensor_id.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
@@ -30,8 +30,8 @@ struct SafeTensorId;
 // Identifier for a tensor within a step.
 // first == operation_name, second == output_index
 // Note: does not own backing storage for name.
-struct TensorId : public std::pair<absl::string_view, int> {
-  typedef std::pair<absl::string_view, int> Base;
+struct TensorId : public std::pair<StringPiece, int> {
+  typedef std::pair<StringPiece, int> Base;
 
   // Inherit the set of constructors.
   using Base::pair;
@@ -55,7 +55,7 @@ struct TensorId : public std::pair<absl::string_view, int> {
 };
 
 TensorId ParseTensorName(const string& name);
-TensorId ParseTensorName(absl::string_view name);
+TensorId ParseTensorName(StringPiece name);
 
 // Same as TensorId, except owns the backing storage for the op name. This makes
 // the memory management simpler at the expense of a copy.
diff --git a/tensorflow/core/graph/while_context.cc b/tensorflow/core/graph/while_context.cc
index 31b3c56f9d..8e89bc4c75 100644
--- a/tensorflow/core/graph/while_context.cc
+++ b/tensorflow/core/graph/while_context.cc
@@ -14,11 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/graph/while_context.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
-WhileContext::WhileContext(absl::string_view frame_name,
+WhileContext::WhileContext(StringPiece frame_name,
                            std::vector<Node*> enter_nodes,
                            std::vector<Node*> exit_nodes,
                            OutputTensor cond_output,
diff --git a/tensorflow/core/graph/while_context.h b/tensorflow/core/graph/while_context.h
index 310dcd712e..5405e62be2 100644
--- a/tensorflow/core/graph/while_context.h
+++ b/tensorflow/core/graph/while_context.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPH_WHILE_CONTEXT_H_
 #define TENSORFLOW_CORE_GRAPH_WHILE_CONTEXT_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/graph/graph.h"
 
 namespace tensorflow {
@@ -35,7 +34,7 @@ namespace tensorflow {
 // differentiable. Figure out backwards compatibility story.
 class WhileContext {
  public:
-  WhileContext(absl::string_view frame_name, std::vector<Node*> enter_nodes,
+  WhileContext(StringPiece frame_name, std::vector<Node*> enter_nodes,
                std::vector<Node*> exit_nodes, OutputTensor cond_output,
                std::vector<OutputTensor> body_inputs,
                std::vector<OutputTensor> body_outputs);
diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index bcb3bced95..7c6fe56e1f 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -26,7 +26,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -42,7 +41,6 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index c014bcb489..43a7d6a70b 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -103,7 +103,6 @@ cc_library(
         "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:symbolic_shapes",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -269,7 +268,6 @@ cc_library(
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:symbolic_shapes",
         "//tensorflow/core/grappler/utils:topological_sort",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -317,7 +315,6 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:topological_sort",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -772,7 +769,6 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/costs:graph_properties",
         "//tensorflow/core/grappler/utils:frame",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 069de8b93f..9b94d2706a 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -40,6 +39,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/symbolic_shapes.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -285,7 +285,7 @@ class ArithmeticOptimizerStage : public GraphOptimizerStage<string> {
     for (const NodeDef* output : ctx().node_map->GetOutputs(node.name())) {
       for (int i = 0; i < output->input_size(); ++i) {
         auto input = output->input(i);
-        absl::string_view name = ParseNodeNameAsStringPiece(input, &position);
+        StringPiece name = ParseNodeNameAsStringPiece(input, &position);
         if (name == node.name() && /*control input*/ position < 0) {
           return true;
         }
@@ -431,12 +431,12 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage {
     return signature;
   }
 
-  void MarkWithTag(const absl::string_view tag, NodeDef* node) {
+  void MarkWithTag(const StringPiece tag, NodeDef* node) {
     AddNodeAttr(tag, true, node);
   }
 
   void MarkAllMembersWithTag(const OptimizedNodesGroup& group,
-                             const absl::string_view tag) const {
+                             const StringPiece tag) const {
     AddNodeAttr(tag, true, group.root_node);
     for (NodeDef* optimized_node : group.optimized_nodes) {
       AddNodeAttr(tag, true, optimized_node);
@@ -453,12 +453,12 @@ class ArithmeticNodesGroupOptimizerStage : public ArithmeticOptimizerStage {
            ctx().nodes_to_preserve->end();
   }
 
-  bool IsMarkedWithTag(const NodeDef& node, const absl::string_view tag) const {
+  bool IsMarkedWithTag(const NodeDef& node, const StringPiece tag) const {
     return HasNodeAttr(node, tag);
   }
 
-  bool IsMarkedWithAnyTag(const NodeDef& node, const absl::string_view tag1,
-                          const absl::string_view tag2) const {
+  bool IsMarkedWithAnyTag(const NodeDef& node, const StringPiece tag1,
+                          const StringPiece tag2) const {
     return IsMarkedWithTag(node, tag1) || IsMarkedWithTag(node, tag2);
   }
 };
@@ -1577,7 +1577,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       for (NodeDef* output : outputs) {
         if (IsControlInput(output->input(0))) continue;
         int port;
-        const absl::string_view node_name =
+        const StringPiece node_name =
             ParseNodeNameAsStringPiece(output->input(0), &port);
         if (node_name == node.name()) {
           tails->insert(ChainLink(output, port));
@@ -1628,7 +1628,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
       } else {
         for (NodeDef* new_tail : ctx().node_map->GetOutputs(tail->name())) {
           int port;
-          const absl::string_view node_name =
+          const StringPiece node_name =
               ParseNodeNameAsStringPiece(new_tail->input(0), &port);
           if (node_name != tail->name()) {
             return Status::OK();
@@ -3219,7 +3219,7 @@ uint64 UniqueNodes::ComputeSignature(const NodeDef& node) const {
 
   for (const auto& input : node.input()) {
     int pos;
-    const absl::string_view node_name = ParseNodeNameAsStringPiece(input, &pos);
+    const StringPiece node_name = ParseNodeNameAsStringPiece(input, &pos);
     h = Hash64CombineUnordered(Hash64(node_name.data(), node_name.size()), h);
     h = Hash64CombineUnordered(std::hash<int>()(pos), h);
   }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index ff32bec5de..11331c9406 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/grappler/optimizers/constant_folding.h"
@@ -35,6 +34,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/optimizers/evaluation_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/symbolic_shapes.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/numbers.h"
@@ -258,12 +258,12 @@ static Status ConvertShapeToConstant(const string& op, const DataType& type,
 
 // TODO(rmlarsen): Perhaps we should move this to the GraphOptimizer base class.
 bool ConstantFolding::OptimizedNodeExists(const NodeDef& node,
-                                          absl::string_view suffix) const {
+                                          StringPiece suffix) const {
   return node_map_->NodeExists(OptimizedNodeName(node, suffix));
 }
 
 string ConstantFolding::OptimizedNodeName(const NodeDef& node,
-                                          absl::string_view suffix) const {
+                                          StringPiece suffix) const {
   return AddPrefixToNodeName(strings::StrCat(node.name(), suffix),
                              kConstantFoldingConst);
 }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 7bf4b90012..8593b3e0b8 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CONSTANT_FOLDING_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CONSTANT_FOLDING_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -53,8 +52,8 @@ class ConstantFolding : public GraphOptimizer {
                 const GraphDef& optimize_output, double result) override;
 
  private:
-  string OptimizedNodeName(const NodeDef& node, absl::string_view suffix) const;
-  bool OptimizedNodeExists(const NodeDef& node, absl::string_view suffix) const;
+  string OptimizedNodeName(const NodeDef& node, StringPiece suffix) const;
+  bool OptimizedNodeExists(const NodeDef& node, StringPiece suffix) const;
 
   bool IsReallyConstant(const NodeDef& node) const;
 
diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index a4bd3566e5..1a648da5da 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -52,7 +52,6 @@ cc_library(
     deps = [
         ":graph_utils",
         ":function_utils",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -91,7 +90,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
@@ -126,7 +124,6 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -150,7 +147,6 @@ tf_cc_test(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
         "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -163,7 +159,6 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:mutable_graph_view",
@@ -220,7 +215,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_utils",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core/grappler:mutable_graph_view",
         "//tensorflow/core:lib",
         "//tensorflow/core/grappler:grappler_item",
@@ -267,7 +261,6 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/grappler:grappler_item",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -302,7 +295,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/grappler:grappler_item",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -487,7 +479,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/grappler:grappler_item",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -589,7 +580,6 @@ tf_cc_test(
         ":graph_utils",
         ":function_utils",
         ":vectorization_utils",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:framework",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc
index ad2b8167cb..311df15bc2 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
 #include "tensorflow/core/framework/device_base.h"
@@ -35,8 +34,8 @@ FunctionDefTensorDesc::FunctionDefTensorDesc(const string& node_name,
 FunctionDefTensorDesc::FunctionDefTensorDesc(const string& input) {
   // Parses node_name:node_output:position string into its components.
   full_str = input;
-  absl::string_view capture;
-  absl::string_view remaining;
+  StringPiece capture;
+  StringPiece remaining;
 
   // Parse "node_name"
   if (strings::Scanner(input)
@@ -87,8 +86,8 @@ void ReplaceReferences(const string& from, const string& to,
   }
 }
 
-void AddFunctionOutputWithUniqueName(absl::string_view prefix,
-                                     absl::string_view output_tensor_name,
+void AddFunctionOutputWithUniqueName(StringPiece prefix,
+                                     StringPiece output_tensor_name,
                                      FunctionDef* function, DataType dt) {
   string name = string(prefix);
   int id = function->signature().output_arg_size();
@@ -103,7 +102,7 @@ void AddFunctionOutputWithUniqueName(absl::string_view prefix,
   (*function->mutable_ret())[name] = string(output_tensor_name);
 }
 
-NodeDef* AddNode(absl::string_view name, absl::string_view op,
+NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  FunctionDef* fd) {
@@ -123,49 +122,45 @@ NodeDef* AddNode(absl::string_view name, absl::string_view op,
   return node;
 }
 
-bool ContainsFunctionNodeWithName(absl::string_view name,
+bool ContainsFunctionNodeWithName(StringPiece name,
                                   const FunctionDef& function) {
   return FindFunctionNodeWithName(name, function) != -1;
 }
 
-bool ContainsFunctionNodeWithOp(absl::string_view op,
-                                const FunctionDef& function) {
+bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
   return FindFunctionNodeWithOp(op, function) != -1;
 }
 
-bool ContainsFunctionOutputWithName(absl::string_view name,
+bool ContainsFunctionOutputWithName(StringPiece name,
                                     const FunctionDef& function) {
   return FindFunctionOutputWithName(name, function) != -1;
 }
 
-int FindFunctionInputWithName(absl::string_view name,
-                              const FunctionDef& function) {
+int FindFunctionInputWithName(StringPiece name, const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().input_arg());
 }
 
-int FindFunctionOutputWithName(absl::string_view name,
-                               const FunctionDef& function) {
+int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const OpDef_ArgDef& arg) { return arg.name() == name; },
       function.signature().output_arg());
 }
 
-int FindFunctionNodeWithName(absl::string_view name,
-                             const FunctionDef& function) {
+int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       function.node_def());
 }
 
-int FindFunctionNodeWithOp(absl::string_view op, const FunctionDef& function) {
+int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function) {
   return graph_utils::GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; },
       function.node_def());
 }
 
-void SetUniqueFunctionNodeName(absl::string_view prefix, FunctionDef* function,
+void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
                                NodeDef* node) {
   string name = string(prefix);
   int id = function->node_def_size();
diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.h b/tensorflow/core/grappler/optimizers/data/function_utils.h
index 714f7d30d6..d4ce824652 100644
--- a/tensorflow/core/grappler/optimizers/data/function_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/function_utils.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUNCTION_UTILS_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -60,50 +59,46 @@ void ReplaceReferences(const string& from, const string& to, FunctionDef* func);
 
 // Adds a function output to the function def, ensuring that the output key
 // is unique, and maps to output_tensor_name in the ret dict.
-void AddFunctionOutputWithUniqueName(absl::string_view prefix,
-                                     absl::string_view output_tensor_name,
+void AddFunctionOutputWithUniqueName(StringPiece prefix,
+                                     StringPiece output_tensor_name,
                                      FunctionDef* function, DataType dt);
 
 // Adds a node to a FunctionDef.
-NodeDef* AddNode(absl::string_view name, absl::string_view op,
+NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  FunctionDef* fd);
 
 // Checks whether the function contains a node with the given name.
-bool ContainsFunctionNodeWithName(absl::string_view name,
+bool ContainsFunctionNodeWithName(StringPiece name,
                                   const FunctionDef& function);
 
 // Checks whether the function contains a node with the given op.
-bool ContainsFunctionNodeWithOp(absl::string_view op,
-                                const FunctionDef& function);
+bool ContainsFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
 
 // Checks whether the function contains an output with the given name.
-bool ContainsFunctionOutputWithName(absl::string_view name,
+bool ContainsFunctionOutputWithName(StringPiece name,
                                     const FunctionDef& function);
 
 // Returns the index of the function input with the given name or -1 if the
 // function node does not exist.
-int FindFunctionInputWithName(absl::string_view name,
-                              const FunctionDef& function);
+int FindFunctionInputWithName(StringPiece name, const FunctionDef& function);
 
 // Returns the index of the function output with the given name or -1 if the
 // function node does not exist.
-int FindFunctionOutputWithName(absl::string_view name,
-                               const FunctionDef& function);
+int FindFunctionOutputWithName(StringPiece name, const FunctionDef& function);
 
 // Returns the index of the function node with the given name or -1 if the
 // function node does not exist.
-int FindFunctionNodeWithName(absl::string_view name,
-                             const FunctionDef& function);
+int FindFunctionNodeWithName(StringPiece name, const FunctionDef& function);
 
 // Returns the index of the function node with the given op or -1 if the
 // function node does not exist.
-int FindFunctionNodeWithOp(absl::string_view op, const FunctionDef& function);
+int FindFunctionNodeWithOp(StringPiece op, const FunctionDef& function);
 
 // Sets the function node name using the `prefix` as a prefix while guaranteeing
 // the name is unique across the functions nodes.
-void SetUniqueFunctionNodeName(absl::string_view prefix, FunctionDef* function,
+void SetUniqueFunctionNodeName(StringPiece prefix, FunctionDef* function,
                                NodeDef* node);
 
 }  // end namespace function_utils
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
index db6e4d8b51..b3bfee138f 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/fusion_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op_def.pb.h"
@@ -424,14 +423,11 @@ void LazyConjunctionOutput(const protobuf::Map<string, string>& first_ret,
   *fused_ret = first_ret;
 }
 
-FunctionDef* FuseFunctions(const FunctionDef& first_function,
-                           const FunctionDef& second_function,
-                           absl::string_view fused_name_prefix,
-                           const SetFunctionSignatureFn& set_signature,
-                           const SetInputFn& set_input,
-                           const SetOutputFn& set_output,
-                           const SetNodesFn& set_nodes,
-                           FunctionDefLibrary* library) {
+FunctionDef* FuseFunctions(
+    const FunctionDef& first_function, const FunctionDef& second_function,
+    StringPiece fused_name_prefix, const SetFunctionSignatureFn& set_signature,
+    const SetInputFn& set_input, const SetOutputFn& set_output,
+    const SetNodesFn& set_nodes, FunctionDefLibrary* library) {
   if (first_function.attr_size() != 0 || second_function.attr_size() != 0)
     return nullptr;  // Functions with attributes are currently not supported
 
diff --git a/tensorflow/core/grappler/optimizers/data/fusion_utils.h b/tensorflow/core/grappler/optimizers/data/fusion_utils.h
index 0350a2c08e..19b7002dcd 100644
--- a/tensorflow/core/grappler/optimizers/data/fusion_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/fusion_utils.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_FUSION_UTILS_H_
 
 #include <functional>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/op_types.h"
@@ -123,14 +122,11 @@ void LazyConjunctionNodes(const FunctionDef& first_function,
 // that are not conflicting with first function.  This means that copied nodes
 // from  second function can end up having different names.  For explanation of
 // set up functions see the documentation of the functions types.
-FunctionDef* FuseFunctions(const FunctionDef& first_function,
-                           const FunctionDef& second_function,
-                           absl::string_view fused_name_prefix,
-                           const SetFunctionSignatureFn& set_signature,
-                           const SetInputFn& set_input,
-                           const SetOutputFn& set_output,
-                           const SetNodesFn& set_nodes,
-                           FunctionDefLibrary* library);
+FunctionDef* FuseFunctions(
+    const FunctionDef& first_function, const FunctionDef& second_function,
+    StringPiece fused_name_prefix, const SetFunctionSignatureFn& set_signature,
+    const SetInputFn& set_input, const SetOutputFn& set_output,
+    const SetNodesFn& set_nodes, FunctionDefLibrary* library);
 
 }  // namespace fusion_utils
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
index 034a4070d0..1f03c6515c 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -26,8 +25,8 @@ namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
-                    absl::string_view function_name) {
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name) {
   return test::function::NDef(
       name, "MapDataset", {string(input_node_name)},
       {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
@@ -36,9 +35,8 @@ NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
        {"output_types", gtl::ArraySlice<DataType>{}}});
 }
 
-NodeDef MakeFilterNode(absl::string_view name,
-                       absl::string_view input_node_name,
-                       absl::string_view function_name) {
+NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
+                       StringPiece function_name) {
   return test::function::NDef(
       name, "FilterDataset", {string(input_node_name)},
       {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
@@ -47,12 +45,11 @@ NodeDef MakeFilterNode(absl::string_view name,
        {"output_types", gtl::ArraySlice<TensorShape>{}}});
 }
 
-NodeDef MakeMapAndBatchNode(absl::string_view name,
-                            absl::string_view input_node_name,
-                            absl::string_view batch_size_node_name,
-                            absl::string_view num_parallel_calls_node_name,
-                            absl::string_view drop_remainder_node_name,
-                            absl::string_view function_name) {
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name) {
   return test::function::NDef(
       name, "MapAndBatchDatasetV2",
       {string(input_node_name), "", string(batch_size_node_name),
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
index 8014a4f305..f7891d5e1f 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -16,26 +16,24 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_TEST_UTILS_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
-                    absl::string_view function_name = "XTimesTwo");
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name = "XTimesTwo");
 
-NodeDef MakeFilterNode(absl::string_view name,
-                       absl::string_view input_node_name,
-                       absl::string_view function_name = "IsZero");
+NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
+                       StringPiece function_name = "IsZero");
 
-NodeDef MakeMapAndBatchNode(absl::string_view name,
-                            absl::string_view input_node_name,
-                            absl::string_view batch_size_node_name,
-                            absl::string_view num_parallel_calls_node_name,
-                            absl::string_view drop_remainder_node_name,
-                            absl::string_view function_name = "XTimesTwo");
+NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece batch_size_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece drop_remainder_node_name,
+                            StringPiece function_name = "XTimesTwo");
 
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 1cc6fca037..b863a25dc5 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
@@ -100,7 +99,7 @@ NodeDef* AddScalarPlaceholder(DataType dtype, MutableGraphView* graph) {
   return graph->AddNode(std::move(node));
 }
 
-NodeDef* AddNode(absl::string_view name, absl::string_view op,
+NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph) {
@@ -151,7 +150,7 @@ NodeDef* AddScalarConstNode(int64 v, MutableGraphView* graph) {
 }
 
 template <>
-NodeDef* AddScalarConstNode(absl::string_view v, MutableGraphView* graph) {
+NodeDef* AddScalarConstNode(StringPiece v, MutableGraphView* graph) {
   return AddScalarConstNodeHelper(
       DT_STRING,
       [v](TensorProto* proto) { proto->add_string_val(v.data(), v.size()); },
@@ -188,20 +187,20 @@ bool Compare(const GraphDef& g1, const GraphDef& g2) {
   return true;
 }
 
-bool ContainsGraphFunctionWithName(absl::string_view name,
+bool ContainsGraphFunctionWithName(StringPiece name,
                                    const FunctionDefLibrary& library) {
   return FindGraphFunctionWithName(name, library) != -1;
 }
 
-bool ContainsGraphNodeWithName(absl::string_view name, const GraphDef& graph) {
+bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph) {
   return FindGraphNodeWithName(name, graph) != -1;
 }
 
-bool ContainsNodeWithOp(absl::string_view op, const GraphDef& graph) {
+bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph) {
   return FindGraphNodeWithOp(op, graph) != -1;
 }
 
-int FindGraphFunctionWithName(absl::string_view name,
+int FindGraphFunctionWithName(StringPiece name,
                               const FunctionDefLibrary& library) {
   return GetFirstElementIndexWithPredicate(
       [&name](const FunctionDef& function) {
@@ -210,13 +209,13 @@ int FindGraphFunctionWithName(absl::string_view name,
       library.function());
 }
 
-int FindGraphNodeWithName(absl::string_view name, const GraphDef& graph) {
+int FindGraphNodeWithName(StringPiece name, const GraphDef& graph) {
   return GetFirstElementIndexWithPredicate(
       [&name](const NodeDef& node) { return node.name() == name; },
       graph.node());
 }
 
-int FindGraphNodeWithOp(absl::string_view op, const GraphDef& graph) {
+int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph) {
   return GetFirstElementIndexWithPredicate(
       [&op](const NodeDef& node) { return node.op() == op; }, graph.node());
 }
@@ -233,7 +232,7 @@ NodeDef* GetInputNode(const NodeDef& node, const MutableGraphView& graph) {
   return graph.GetRegularFanin(input_port).node;
 }
 
-void SetUniqueGraphNodeName(absl::string_view prefix, GraphDef* graph,
+void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
                             NodeDef* node) {
   string name = string(prefix);
   int id = graph->node_size();
@@ -249,8 +248,7 @@ void SetUniqueGraphNodeName(absl::string_view prefix, GraphDef* graph,
   node->set_name(std::move(name));
 }
 
-void SetUniqueGraphFunctionName(absl::string_view prefix,
-                                FunctionDefLibrary* library,
+void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
                                 FunctionDef* function) {
   string name = string(prefix);
   int id = library->function_size();
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.h b/tensorflow/core/grappler/optimizers/data/graph_utils.h
index 4397f77cba..d130fee204 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_
 #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_GRAPH_UTILS_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -49,7 +48,7 @@ int GetFirstElementIndexWithPredicate(const Predicate& predicate,
 }
 
 // Adds a node to the graph.
-NodeDef* AddNode(absl::string_view name, absl::string_view op,
+NodeDef* AddNode(StringPiece name, StringPiece op,
                  const std::vector<string>& inputs,
                  const std::vector<std::pair<string, AttrValue>>& attributes,
                  MutableGraphView* graph);
@@ -78,33 +77,33 @@ NodeDef* AddScalarConstNode(int v, MutableGraphView* graph);
 template <>
 NodeDef* AddScalarConstNode(int64 v, MutableGraphView* graph);
 template <>
-NodeDef* AddScalarConstNode(absl::string_view v, MutableGraphView* graph);
+NodeDef* AddScalarConstNode(StringPiece v, MutableGraphView* graph);
 
 // Checks whether the two graphs are the same.
 bool Compare(const GraphDef& g1, const GraphDef& g2);
 
 // Checks whether the graph contains a node with the given name.
-bool ContainsGraphNodeWithName(absl::string_view name, const GraphDef& graph);
+bool ContainsGraphNodeWithName(StringPiece name, const GraphDef& graph);
 
 // Checks whether the library contains a function with the given name.
-bool ContainsGraphFunctionWithName(absl::string_view name,
+bool ContainsGraphFunctionWithName(StringPiece name,
                                    const FunctionDefLibrary& library);
 
 // Checks whether the graph contains a node with the given op.
-bool ContainsNodeWithOp(absl::string_view op, const GraphDef& graph);
+bool ContainsNodeWithOp(StringPiece op, const GraphDef& graph);
 
 // Returns the index of the node with the given name or -1 if the node does
 // not exist.
-int FindGraphNodeWithName(absl::string_view name, const GraphDef& graph);
+int FindGraphNodeWithName(StringPiece name, const GraphDef& graph);
 
 // Returns the index of the function with the given name or -1 if the function
 // does not exist.
-int FindGraphFunctionWithName(absl::string_view name,
+int FindGraphFunctionWithName(StringPiece name,
                               const FunctionDefLibrary& library);
 
 // Returns the index of the first node with the given op or -1 if no such  node
 // exists.
-int FindGraphNodeWithOp(absl::string_view op, const GraphDef& graph);
+int FindGraphNodeWithOp(StringPiece op, const GraphDef& graph);
 
 // Gets the 0th input to a node in the graph.
 NodeDef* GetInputNode(const NodeDef& node, const MutableGraphView& graph);
@@ -116,13 +115,11 @@ std::vector<int> FindAllGraphNodesWithOp(const string& op,
 
 // Sets the node name using `prefix` as a prefix while guaranteeing the name
 // is unique across the graph.
-void SetUniqueGraphNodeName(absl::string_view prefix, GraphDef* graph,
-                            NodeDef* node);
+void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph, NodeDef* node);
 
 // Sets the function name using the `prefix` name as a prefix while guaranteeing
 // the name is unique across the function library.
-void SetUniqueGraphFunctionName(absl::string_view prefix,
-                                FunctionDefLibrary* library,
+void SetUniqueGraphFunctionName(StringPiece prefix, FunctionDefLibrary* library,
                                 FunctionDef* function);
 
 // Copies attribute having name `attribute_name` from node `from` to node
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
index c77fff1656..4ab6d71532 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -82,7 +81,7 @@ TEST(GraphUtilsTest, AddScalarConstNodeInt64) {
 TEST(GraphUtilsTest, AddScalarConstNodeString) {
   GraphDef graph_def;
   MutableGraphView graph(&graph_def);
-  NodeDef* string_node = AddScalarConstNode<absl::string_view>("hello", &graph);
+  NodeDef* string_node = AddScalarConstNode<StringPiece>("hello", &graph);
   EXPECT_TRUE(
       ContainsGraphNodeWithName(string_node->name(), *graph.GetGraph()));
   EXPECT_EQ(string_node->attr().at("value").tensor().string_val(0), "hello");
diff --git a/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc b/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
index 7954d124a2..9e382aeef9 100644
--- a/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
+++ b/tensorflow/core/grappler/optimizers/data/latency_all_edges.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/latency_all_edges.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
@@ -43,8 +42,8 @@ NodeDef MakeLatencyNode(const NodeDef& node, MutableGraphView* graph) {
   // Set the input of LatencyDataset node as `node`
   new_node.add_input(node.name());
 
-  NodeDef* tag = graph_utils::AddScalarConstNode<absl::string_view>(
-      absl::string_view("record_latency_" + node.name()), graph);
+  NodeDef* tag = graph_utils::AddScalarConstNode<StringPiece>(
+      StringPiece("record_latency_" + node.name()), graph);
   new_node.add_input(tag->name());
 
   // Set `output_types` and `output_shapes` attributes.
diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
index 0db08d0181..b676246b31 100644
--- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -42,7 +41,7 @@ TEST(MapAndBatchFusionTest, FuseMapAndBatchNodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
+      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
 
   NodeDef *map_node;
   {
@@ -124,7 +123,7 @@ TEST(MapAndBatchFusionTest, FuseMapAndBatchV2NodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
+      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
 
   NodeDef *map_node;
   {
@@ -207,7 +206,7 @@ TEST(MapAndBatchFusionTest, FuseParallelMapAndBatchNodesIntoOne) {
   NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs,
                                              range_attrs, &graph);
   NodeDef *captured_input_node =
-      graph_utils::AddScalarConstNode<absl::string_view>("hello", &graph);
+      graph_utils::AddScalarConstNode<StringPiece>("hello", &graph);
   NodeDef *num_parallel_calls_node =
       graph_utils::AddScalarConstNode<int>(2, &graph);
 
diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
index c242a74359..f4faf41549 100644
--- a/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/map_vectorization_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/map_vectorization.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -31,10 +30,8 @@ namespace {
 using test::function::GDef;
 using test::function::NDef;
 
-NodeDef MakeMapNodeHelper(absl::string_view name,
-                          absl::string_view input_node_name,
-                          absl::string_view function_name,
-                          absl::string_view map_op_name,
+NodeDef MakeMapNodeHelper(StringPiece name, StringPiece input_node_name,
+                          StringPiece function_name, StringPiece map_op_name,
                           gtl::ArraySlice<PartialTensorShape> output_shapes,
                           gtl::ArraySlice<DataType> output_types) {
   return test::function::NDef(
@@ -45,16 +42,16 @@ NodeDef MakeMapNodeHelper(absl::string_view name,
        {"output_types", output_types}});
 }
 
-NodeDef MakeMapNode(absl::string_view name, absl::string_view input_node_name,
-                    absl::string_view function_name,
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name,
                     gtl::ArraySlice<PartialTensorShape> output_shapes,
                     gtl::ArraySlice<DataType> output_types) {
   return MakeMapNodeHelper(name, input_node_name, function_name, "MapDataset",
                            output_shapes, output_types);
 }
 
-NodeDef MakeBatchNode(absl::string_view name, absl::string_view input_node_name,
-                      absl::string_view input_batch_size_name,
+NodeDef MakeBatchNode(StringPiece name, StringPiece input_node_name,
+                      StringPiece input_batch_size_name,
                       gtl::ArraySlice<PartialTensorShape> output_shapes,
                       gtl::ArraySlice<DataType> output_types) {
   return NDef(
@@ -63,10 +60,9 @@ NodeDef MakeBatchNode(absl::string_view name, absl::string_view input_node_name,
       {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeBatchV2Node(absl::string_view name,
-                        absl::string_view input_node_name,
-                        absl::string_view input_batch_size_name,
-                        absl::string_view input_drop_remainder_name,
+NodeDef MakeBatchV2Node(StringPiece name, StringPiece input_node_name,
+                        StringPiece input_batch_size_name,
+                        StringPiece input_drop_remainder_name,
                         gtl::ArraySlice<PartialTensorShape> output_shapes,
                         gtl::ArraySlice<DataType> output_types) {
   return NDef(
@@ -76,7 +72,7 @@ NodeDef MakeBatchV2Node(absl::string_view name,
       {{"output_types", output_types}, {"output_shapes", output_shapes}});
 }
 
-NodeDef MakeRangeNode(absl::string_view name, gtl::ArraySlice<string> inputs) {
+NodeDef MakeRangeNode(StringPiece name, gtl::ArraySlice<string> inputs) {
   return NDef(name, "RangeDataset", inputs,
               {{"output_shapes", gtl::ArraySlice<TensorShape>({{}})},
                {"output_types", gtl::ArraySlice<DataType>({DT_INT64})}});
diff --git a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
index 2f3da39845..be1a66df75 100644
--- a/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/noop_elimination_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/noop_elimination.h"
 #include <tuple>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value_util.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -36,15 +35,15 @@ std::vector<std::pair<string, AttrValue>> GetCommonAttributes() {
   return commonAttributes;
 }
 
-NodeDef *MakeUnaryNode(absl::string_view node_type, int count,
-                       string input_node, MutableGraphView *graph) {
+NodeDef *MakeUnaryNode(StringPiece node_type, int count, string input_node,
+                       MutableGraphView *graph) {
   NodeDef *node_count = graph_utils::AddScalarConstNode<int64>(count, graph);
   return graph_utils::AddNode("", node_type,
                               {std::move(input_node), node_count->name()},
                               GetCommonAttributes(), graph);
 }
 
-NodeDef *MakeUnaryNonConstNode(absl::string_view node_type, string input_node,
+NodeDef *MakeUnaryNonConstNode(StringPiece node_type, string input_node,
                                MutableGraphView *graph) {
   NodeDef *node_count = graph_utils::AddScalarPlaceholder(DT_INT32, graph);
   return graph_utils::AddNode("", node_type,
@@ -54,7 +53,7 @@ NodeDef *MakeUnaryNonConstNode(absl::string_view node_type, string input_node,
 
 NodeDef *MakeCacheNode(string input_node, MutableGraphView *graph) {
   NodeDef *node_filename =
-      graph_utils::AddScalarConstNode<absl::string_view>("", graph);
+      graph_utils::AddScalarConstNode<StringPiece>("", graph);
   return graph_utils::AddNode("", "CacheDataset",
                               {std::move(input_node), node_filename->name()},
                               GetCommonAttributes(), graph);
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
index 799500350a..8b93b1f2b8 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/data/vectorization/vectorizer_registry.h"
 
 #include "absl/strings/str_join.h"
@@ -415,7 +414,7 @@ Status Vectorization::StackTensor(WrappedTensor* unstacked,
   }
 
   Graph* g = outer_scope_.get();
-  auto node_builder = [](absl::string_view op) {
+  auto node_builder = [](StringPiece op) {
     return NodeBuilder(strings::StrCat("vectorized/stack/", op), op);
   };
 
diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
index 4d802e7a57..e82bfb702a 100644
--- a/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
+++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/optimizers/data/vectorization_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/grappler/optimizers/data/function_utils.h"
 #include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
@@ -70,8 +69,7 @@ string GetRetval(const FunctionDef& function_def, int index) {
 
 // TODO(rachelim): Use FunctionDefHelper::Create instead
 FunctionDef CreateFunction(
-    absl::string_view name,
-    const std::vector<std::pair<string, DataType>>& inputs,
+    StringPiece name, const std::vector<std::pair<string, DataType>>& inputs,
     const std::vector<std::pair<string, DataType>>& outputs,
     const std::map<string, string>& rets) {
   FunctionDef func;
@@ -93,6 +91,7 @@ FunctionDef CreateFunction(
 
   return func;
 }
+
 ///==================================//
 // Tests for vectorization framework //
 ///==================================//
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 75e32e0b7f..6613768a35 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -28,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -204,7 +204,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
       bool data_connection = false;
       for (int i = fanout->input_size() - 1; i >= 0; --i) {
         int pos;
-        absl::string_view input_name =
+        StringPiece input_name =
             ParseNodeNameAsStringPiece(fanout->input(i), &pos);
         if (input_name == node_name) {
           if (pos < 0) {
@@ -352,7 +352,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
           for (int j = 0; j < consumer->input_size(); ++j) {
             const string& old_input = consumer->input(j);
             int old_input_pos;
-            absl::string_view old_input_node_name =
+            StringPiece old_input_node_name =
                 ParseNodeNameAsStringPiece(old_input, &old_input_pos);
             if (old_input_node_name == node_name) {
               if (old_input_pos >= 0) {
diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
index f41a2d7c15..0d4aaf6462 100644
--- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/scoped_allocator.h"
 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -199,7 +198,7 @@ Status RemoveEdge(const string& input_edge_name, const string& from_node_name,
 }
 }  // namespace
 
-void ScopedAllocatorOptimizer::ExtendNodeAttr(absl::string_view name,
+void ScopedAllocatorOptimizer::ExtendNodeAttr(StringPiece name,
                                               const std::vector<int32>& values,
                                               NodeDef* node_def) {
   if (HasNodeAttr(*node_def, name)) {
diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
index e7a1ec154b..13589f536c 100644
--- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
+++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.h
@@ -18,7 +18,6 @@ limitations under the License.
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/grappler/optimizers/graph_optimizer.h"
 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
 
@@ -67,8 +66,7 @@ class ScopedAllocatorOptimizer : public GraphOptimizer {
 
   // Appends values to the attr value under name in node_def, if present.
   // If not present does an assignment.
-  static void ExtendNodeAttr(absl::string_view name,
-                             const std::vector<int32>& values,
+  static void ExtendNodeAttr(StringPiece name, const std::vector<int32>& values,
                              NodeDef* node_def);
 
   // Class that knows how to do graph rewriting for a particular kind of Op in
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index a890b56b4d..5867d01324 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -20,12 +20,12 @@ limitations under the License.
 #include <queue>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -146,9 +146,9 @@ bool IsSameInput(const string& name1, const string& name2) {
     return true;
   }
   int position1;
-  absl::string_view node1 = ParseNodeNameAsStringPiece(name1, &position1);
+  StringPiece node1 = ParseNodeNameAsStringPiece(name1, &position1);
   int position2;
-  absl::string_view node2 = ParseNodeNameAsStringPiece(name2, &position2);
+  StringPiece node2 = ParseNodeNameAsStringPiece(name2, &position2);
   return (position1 == position2) && (node1 == node2);
 }
 
@@ -251,7 +251,7 @@ int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) {
       if (node_as_input == node.name()) {
         ++num_outputs;
       } else {
-        const absl::string_view name =
+        const StringPiece name =
             ParseNodeNameAsStringPiece(node_as_input, &pos);
         if (name == node.name()) {
           ++num_outputs;
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index c7a8a517bb..0168ab1da3 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -19,12 +19,12 @@ limitations under the License.
 #include <functional>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
@@ -125,8 +125,8 @@ inline int NodePositionIfSameNode(const string& input_name,
   if (input_it == input_name.end()) {
     return is_ctrl ? -1 : 0;
   } else if (*input_it++ == ':') {
-    absl::string_view remaining(&(*input_it),
-                                std::distance(input_it, input_name.end()));
+    StringPiece remaining(&(*input_it),
+                          std::distance(input_it, input_name.end()));
     int position;
     if (!strings::safe_strto32(remaining, &position)) {
       return -2;
@@ -139,18 +139,18 @@ inline int NodePositionIfSameNode(const string& input_name,
 
 // Return the node name corresponding to 'name' if name is valid, or the empty
 // string otherwise.
-inline absl::string_view NodeNameAsStringPiece(const string& name) {
+inline StringPiece NodeNameAsStringPiece(const string& name) {
   static const string empty;
-  if (name.empty()) return absl::string_view(empty);
+  if (name.empty()) return StringPiece(empty);
   const auto begin_it = name[0] == '^' ? name.begin() + 1 : name.begin();
   auto end_it = begin_it;
   while (end_it != name.end() && *end_it != ':') {
     ++end_it;
   }
   if (end_it != name.end() && *end_it != ':') {
-    return absl::string_view(empty);
+    return StringPiece(empty);
   }
-  return absl::string_view(&(*begin_it), std::distance(begin_it, end_it));
+  return StringPiece(&(*begin_it), std::distance(begin_it, end_it));
 }
 
 // Return the node name corresponding to 'name' if name is valid, or the empty
@@ -160,12 +160,12 @@ inline string NodeName(const string& name) {
 }
 
 // Returns the node name and position in a single call.
-inline absl::string_view ParseNodeNameAsStringPiece(const string& name,
-                                                    int* position) {
+inline StringPiece ParseNodeNameAsStringPiece(const string& name,
+                                              int* position) {
   static const string empty;
   if (name.empty()) {
     *position = 0;
-    return absl::string_view(empty);
+    return StringPiece(empty);
   }
   const bool is_ctrl = name[0] == '^';
   const auto begin_it = is_ctrl ? name.begin() + 1 : name.begin();
@@ -174,17 +174,15 @@ inline absl::string_view ParseNodeNameAsStringPiece(const string& name,
   while (end_it != name.end() && *end_it != ':') {
     ++end_it;
   }
-  const absl::string_view node_name(&(*begin_it),
-                                    std::distance(begin_it, end_it));
+  const StringPiece node_name(&(*begin_it), std::distance(begin_it, end_it));
   if (end_it != name.end()) {
     if (*end_it != ':') {
-      return absl::string_view(empty);
+      return StringPiece(empty);
     } else if (!is_ctrl) {
       ++end_it;
-      absl::string_view remaining(&(*end_it),
-                                  std::distance(end_it, name.end()));
+      StringPiece remaining(&(*end_it), std::distance(end_it, name.end()));
       if (!strings::safe_strto32(remaining, position)) {
-        return absl::string_view(empty);
+        return StringPiece(empty);
       }
     }
   }
diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD
index ae251577c7..bdbb8836e1 100644
--- a/tensorflow/core/grappler/utils/BUILD
+++ b/tensorflow/core/grappler/utils/BUILD
@@ -170,7 +170,6 @@ cc_library(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index e3b2984d85..6861fb423c 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -16,7 +16,6 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/function.pb.h"
@@ -109,8 +108,8 @@ Status GrapplerFunctionConnectivity::ExpandFunctionDefInput(
   string node_output;
   int position = -1;
 
-  absl::string_view capture;
-  absl::string_view remaining;
+  StringPiece capture;
+  StringPiece remaining;
 
   // Parse "node_name"
   if (Scanner(func_def_input)
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index a81d8e7bcd..9b6c1f690b 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/utils.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -372,17 +371,16 @@ BM_NodePositionIfSameNode("^foo/bar/baz", "foo/bar/baz", Match_Ctrl);
 BM_NodePositionIfSameNode("blah", "foo/bar/baz", NoMatch_0);
 BM_NodePositionIfSameNode("foo/bar/baz/gnu", "foo/bar/baz", NoMatch_end);
 
-#define BM_ParseNodeNameAsStringPiece(I, NAME)                  \
-  static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) { \
-    string input = I;                                           \
-    for (int i = 0; i < iters; ++i) {                           \
-      int position;                                             \
-      const absl::string_view name =                            \
-          ParseNodeNameAsStringPiece(input, &position);         \
-      CHECK_GE(position, -1);                                   \
-      CHECK(!name.empty());                                     \
-    }                                                           \
-  }                                                             \
+#define BM_ParseNodeNameAsStringPiece(I, NAME)                               \
+  static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) {              \
+    string input = I;                                                        \
+    for (int i = 0; i < iters; ++i) {                                        \
+      int position;                                                          \
+      const StringPiece name = ParseNodeNameAsStringPiece(input, &position); \
+      CHECK_GE(position, -1);                                                \
+      CHECK(!name.empty());                                                  \
+    }                                                                        \
+  }                                                                          \
   BENCHMARK(BM_ParseNodeNameAsStringPiece_##NAME)
 
 BM_ParseNodeNameAsStringPiece("foo", foo);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 5567288f1e..1f401b257b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -578,7 +578,6 @@ ARRAY_DEPS = [
     ":gather_functor",
     ":ops_util",
     ":transpose_functor",
-    "@com_google_absl//absl/strings",
     "//tensorflow/core:array_grad",
     "//tensorflow/core:array_ops_op_lib",
     "//tensorflow/core:core_cpu",
@@ -1746,7 +1745,6 @@ DATA_FLOW_DEPS = [
     ":tensor_array",
     ":typed_conditional_accumulator_base",
     ":typed_queue",
-    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:core_cpu",
     "//tensorflow/core:data_flow_ops_op_lib",
@@ -2106,7 +2104,6 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:list_ops_op_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2190,7 +2187,6 @@ IMAGE_DEPS = [
     ":bounds_check",
     ":eigen_helpers",
     ":image_resizer_state",
-    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:gif_internal",
@@ -2622,7 +2618,6 @@ tf_cc_tests(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/util/tensor_bundle",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3175,7 +3170,6 @@ tf_cc_tests(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3322,7 +3316,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3344,7 +3337,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -3469,7 +3461,6 @@ tf_kernel_library(
         ":image_resizer_state",
         ":fill_functor",
         ":ops_util",
-        "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -3934,7 +3925,6 @@ cc_library(
 )
 
 PARSING_DEPS = [
-    "@com_google_absl//absl/strings",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:parsing_ops_op_lib",
@@ -4120,7 +4110,6 @@ cc_library(
 )
 
 SPARSE_DEPS = [
-    "@com_google_absl//absl/strings",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:sparse_ops_op_lib",
@@ -4539,7 +4528,6 @@ cc_library(
 STRING_DEPS = [
     ":bounds_check",
     ":string_util",
-    "@com_google_absl//absl/strings",
     "//third_party/eigen3",
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
@@ -4562,7 +4550,7 @@ tf_kernel_library(
 tf_kernel_library(
     name = "string_format_op",
     prefix = "string_format_op",
-    deps = STRING_DEPS,
+    deps = STRING_DEPS + ["@com_google_absl//absl/strings"],
 )
 
 tf_cc_test(
@@ -4815,7 +4803,6 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:word2vec_ops",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -4854,7 +4841,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -6338,7 +6324,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/conv_grad_ops.cc b/tensorflow/core/kernels/conv_grad_ops.cc
index 00abebd895..507720c998 100644
--- a/tensorflow/core/kernels/conv_grad_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_ops.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 // See docs in ../ops/nn_ops.cc.
 
-#include "absl/strings/string_view.h"
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
@@ -57,7 +56,7 @@ int ConvBackpropDimensions::SpatialPadding(const Padding& padding,
 // while the original version only handles the cases where dilation_rates equal
 // to 1.
 Status ConvBackpropExtractAndVerifyDimensionV2(
-    absl::string_view label, const TensorShape& input_shape,
+    StringPiece label, const TensorShape& input_shape,
     const TensorShape& filter_shape, const TensorShape& output_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, int spatial_dim, int filter_spatial_dim,
@@ -96,7 +95,7 @@ Status ConvBackpropExtractAndVerifyDimensionV2(
 }
 
 Status ConvBackpropExtractAndVerifyDimension(
-    absl::string_view label, const TensorShape& input_shape,
+    StringPiece label, const TensorShape& input_shape,
     const TensorShape& filter_shape, const TensorShape& output_shape,
     const std::vector<int32>& strides, Padding padding, int spatial_dim,
     int filter_spatial_dim, ConvBackpropSpatialDimension* dim) {
@@ -107,9 +106,8 @@ Status ConvBackpropExtractAndVerifyDimension(
 }
 
 Status ConvBackpropComputeDimensionsV2(
-    absl::string_view label, int num_spatial_dims,
-    const TensorShape& input_shape, const TensorShape& filter_shape,
-    const TensorShape& out_backprop_shape,
+    StringPiece label, int num_spatial_dims, const TensorShape& input_shape,
+    const TensorShape& filter_shape, const TensorShape& out_backprop_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
   // The + 2 in the following line is for the batch and feature dimensions.
@@ -161,11 +159,13 @@ Status ConvBackpropComputeDimensionsV2(
   return Status::OK();
 }
 
-Status ConvBackpropComputeDimensions(
-    absl::string_view label, int num_spatial_dims,
-    const TensorShape& input_shape, const TensorShape& filter_shape,
-    const TensorShape& out_backprop_shape, const std::vector<int32>& strides,
-    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) {
+Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims,
+                                     const TensorShape& input_shape,
+                                     const TensorShape& filter_shape,
+                                     const TensorShape& out_backprop_shape,
+                                     const std::vector<int32>& strides,
+                                     Padding padding, TensorFormat data_format,
+                                     ConvBackpropDimensions* dims) {
   static constexpr std::array<int32, 5> one_dilations = {{1, 1, 1, 1, 1}};
   return ConvBackpropComputeDimensionsV2(
       label, num_spatial_dims, input_shape, filter_shape, out_backprop_shape,
diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h
index 24a4deb8d3..9551959463 100644
--- a/tensorflow/core/kernels/conv_grad_ops.h
+++ b/tensorflow/core/kernels/conv_grad_ops.h
@@ -161,8 +161,8 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -249,18 +249,19 @@ struct ConvBackpropDimensions {
 // Common code between implementations of Conv?DBackpropInput and
 // Conv?DBackpropFilter. Verifies that the dimensions all match, and computes
 // sizes/padding for the spatial dimensions.
-Status ConvBackpropComputeDimensions(
-    absl::string_view label, int num_spatial_dims,
-    const TensorShape& input_shape, const TensorShape& filter_shape,
-    const TensorShape& out_backprop_shape, const std::vector<int32>& strides,
-    Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims);
+Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims,
+                                     const TensorShape& input_shape,
+                                     const TensorShape& filter_shape,
+                                     const TensorShape& out_backprop_shape,
+                                     const std::vector<int32>& strides,
+                                     Padding padding, TensorFormat data_format,
+                                     ConvBackpropDimensions* dims);
 
 // The V2 version computes the same outputs with arbitrary dilation rate.
 // TODO(b/67112639): Merge V2 versions and the original versions eventually.
 Status ConvBackpropComputeDimensionsV2(
-    absl::string_view label, int num_spatial_dims,
-    const TensorShape& input_shape, const TensorShape& filter_shape,
-    const TensorShape& out_backprop_shape,
+    StringPiece label, int num_spatial_dims, const TensorShape& input_shape,
+    const TensorShape& filter_shape, const TensorShape& out_backprop_shape,
     const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims);
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 6f6ba6943b..37c1c54786 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -42,7 +42,6 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -640,7 +639,6 @@ tf_kernel_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:session_options",
         "//tensorflow/core/kernels:ops_util",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -682,7 +680,6 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/util/tensor_bundle",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -797,7 +794,6 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/kernels:ops_util",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index ced5fb1971..f2419db3dc 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/dataset.h"
@@ -340,7 +339,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
           if (dataset()->env_->FileExists(lockfile_).ok()) {
             // Attempt to read the contents of the lockfile.
             char contents_scratch[151] = {0};  // Initialize all to 0.
-            absl::string_view contents;
+            StringPiece contents;
             std::unique_ptr<RandomAccessFile> file;
             if (dataset()->env_->NewRandomAccessFile(lockfile_, &file).ok()) {
               file->Read(0, 150, &contents, contents_scratch).IgnoreError();
@@ -459,7 +458,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
               *end_of_sequence = true;
               return Status::OK();
             }
-            absl::string_view key = reader_.key();
+            StringPiece key = reader_.key();
             DCHECK_EQ(key, dataset()->FormatName(cur_index_, i));
             TF_RETURN_IF_ERROR(reader_.ReadCurrent(&(*out_tensors)[i]));
             TF_RETURN_IF_ERROR(reader_.status());
diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc
index f6e45bf0f5..a40f7f2146 100644
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/data/dataset_utils.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -69,8 +68,8 @@ std::vector<bool> ComputeMoveVector(const std::vector<int>& indices) {
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func,
-    absl::string_view prefix, std::unique_ptr<IteratorBase>* out_iterator) {
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr<IteratorBase>* out_iterator) {
   std::vector<Tensor> return_values;
 
   TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index fbc26872f3..d777062293 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -15,7 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 #define TENSORFLOW_CORE_KERNELS_DATA_DATASET_UTILS_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/data/captured_function.h"
 #include "tensorflow/core/kernels/data/dataset.h"
@@ -45,8 +44,8 @@ std::vector<bool> ComputeMoveVector(const std::vector<int>& indices);
 
 Status MakeIteratorFromInputElement(
     IteratorContext* ctx, const std::vector<Tensor>& input_element,
-    int64 thread_index, CapturedFunction* captured_func,
-    absl::string_view prefix, std::unique_ptr<IteratorBase>* out_iterator);
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr<IteratorBase>* out_iterator);
 
 // Returns Status::OK() if `expected` and `received` types match,
 // errors::InvalidArgument otherwise.
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index 07437e0c5f..441bdc2898 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -66,7 +66,6 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
index 8114a5cf6e..7451ca4cb1 100644
--- a/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/csv_dataset_op.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 // See docs in ../ops/parsing_ops.cc.
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op.h"
@@ -380,7 +379,7 @@ class CSVDatasetOp : public DatasetOpKernel {
             // Reached EOF, and last field is empty
             *end_of_record = true;
             if (include) {
-              return FieldToOutput(ctx, absl::string_view(), out_tensors);
+              return FieldToOutput(ctx, StringPiece(), out_tensors);
             } else {
               return Status::OK();
             }
@@ -461,9 +460,8 @@ class CSVDatasetOp : public DatasetOpKernel {
               if (errors::IsOutOfRange(s)) {
                 // This was the last field. We are done
                 *end_of_record = true;
-                parse_result.Update(
-                    QuotedFieldToOutput(ctx, absl::string_view(), out_tensors,
-                                        earlier_pieces, include));
+                parse_result.Update(QuotedFieldToOutput(
+                    ctx, StringPiece(), out_tensors, earlier_pieces, include));
                 return parse_result;
               } else if (!s.ok()) {
                 return s;
@@ -474,14 +472,14 @@ class CSVDatasetOp : public DatasetOpKernel {
             pos_++;
             if (next == dataset()->delim_) {
               parse_result.Update(QuotedFieldToOutput(
-                  ctx, absl::string_view(&buffer_[start], pos_ - 1 - start),
+                  ctx, StringPiece(&buffer_[start], pos_ - 1 - start),
                   out_tensors, earlier_pieces, include));
               return parse_result;
 
             } else if (next == '\n' || next == '\r') {
               *end_of_record = true;
               parse_result.Update(QuotedFieldToOutput(
-                  ctx, absl::string_view(&buffer_[start], pos_ - 1 - start),
+                  ctx, StringPiece(&buffer_[start], pos_ - 1 - start),
                   out_tensors, earlier_pieces, include));
               if (next == '\r') SkipNewLineIfNecessary();
               return parse_result;
@@ -502,7 +500,7 @@ class CSVDatasetOp : public DatasetOpKernel {
       // Converts quoted field to an output tensor, removing the starting
       // and ending quotes from it and unescaping double quotations if
       // necessary.
-      Status QuotedFieldToOutput(IteratorContext* ctx, absl::string_view field,
+      Status QuotedFieldToOutput(IteratorContext* ctx, StringPiece field,
                                  std::vector<Tensor>* out_tensors,
                                  const std::vector<Piece>& earlier_pieces,
                                  bool include) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
@@ -531,17 +529,17 @@ class CSVDatasetOp : public DatasetOpKernel {
         // the opening quotation mark of the quoted field.
         bool skip_next_quote = true;
         for (const Piece& p : earlier_pieces) {
-          AppendUnescapedPiece(absl::string_view(&p.buffer[p.start], p.len),
+          AppendUnescapedPiece(StringPiece(&p.buffer[p.start], p.len),
                                &field_complete, &skip_next_quote);
         }
         AppendUnescapedPiece(field, &field_complete, &skip_next_quote);
-        absl::string_view result = absl::string_view(field_complete);
+        StringPiece result = StringPiece(field_complete);
         result.remove_suffix(1);  // Skip final quote
 
         return FieldToOutput(ctx, result, out_tensors);
       }
 
-      void AppendUnescapedPiece(absl::string_view piece, string* field_complete,
+      void AppendUnescapedPiece(StringPiece piece, string* field_complete,
                                 bool* skip_next_quote) {
         size_t from = 0;
         size_t found = piece.find('\"', from);
@@ -580,8 +578,8 @@ class CSVDatasetOp : public DatasetOpKernel {
               // Whatever we have is the last field of the last record
               *end_of_record = true;
               parse_result.Update(UnquotedFieldToOutput(
-                  ctx, absl::string_view(&buffer_[start], pos_ - start),
-                  out_tensors, earlier_pieces, include));
+                  ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
+                  earlier_pieces, include));
               return parse_result;
             } else if (!s.ok()) {
               return s;  // Surface all other errors to caller
@@ -592,8 +590,8 @@ class CSVDatasetOp : public DatasetOpKernel {
 
           if (ch == dataset()->delim_) {
             parse_result.Update(UnquotedFieldToOutput(
-                ctx, absl::string_view(&buffer_[start], pos_ - start),
-                out_tensors, earlier_pieces, include));
+                ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
+                earlier_pieces, include));
             pos_++;
             return parse_result;
           }
@@ -601,8 +599,8 @@ class CSVDatasetOp : public DatasetOpKernel {
             // need special case to skip over first \n of record if the line
             // breaks are \r\n
             parse_result.Update(UnquotedFieldToOutput(
-                ctx, absl::string_view(&buffer_[start], pos_ - start),
-                out_tensors, earlier_pieces, include));
+                ctx, StringPiece(&buffer_[start], pos_ - start), out_tensors,
+                earlier_pieces, include));
             *end_of_record = true;
             pos_++;
             if (ch == '\r') SkipNewLineIfNecessary();
@@ -632,7 +630,7 @@ class CSVDatasetOp : public DatasetOpKernel {
       }
 
       // Given a field, converts it to the right output tensor type
-      Status FieldToOutput(IteratorContext* ctx, absl::string_view field,
+      Status FieldToOutput(IteratorContext* ctx, StringPiece field,
                            std::vector<Tensor>* out_tensors) {
         size_t output_idx = out_tensors->size();
         if (output_idx >= dataset()->out_type_.size()) {
@@ -751,8 +749,7 @@ class CSVDatasetOp : public DatasetOpKernel {
       // Given a string field, and its index in the output,
       // converts it to a Tensor of the right type and adds it to the
       // out_tensors vector.
-      Status UnquotedFieldToOutput(IteratorContext* ctx,
-                                   absl::string_view field,
+      Status UnquotedFieldToOutput(IteratorContext* ctx, StringPiece field,
                                    std::vector<Tensor>* out_tensors,
                                    const std::vector<Piece>& earlier_pieces,
                                    bool include) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
diff --git a/tensorflow/core/kernels/data/experimental/indexed_dataset.h b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
index 66bdbc5593..27a8360cbc 100644
--- a/tensorflow/core/kernels/data/experimental/indexed_dataset.h
+++ b/tensorflow/core/kernels/data/experimental/indexed_dataset.h
@@ -15,7 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
 #define TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_INDEXED_DATASET_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
@@ -85,8 +84,7 @@ class IndexedDatasetOpKernel : public OpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const absl::string_view& argument_name,
-                             T* output) {
+                             const StringPiece& argument_name, T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index fca61d4e8f..7a833668ac 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/kernels/data/iterator_ops.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/graph_runner.h"
 #include "tensorflow/core/common_runtime/renamed_device.h"
 #include "tensorflow/core/common_runtime/threadpool_device.h"
@@ -214,19 +213,19 @@ class VariantTensorDataReader : public IteratorStateReader {
   // pre-processing did not have errors.
   Status status() const { return status_; }
 
-  Status ReadScalar(absl::string_view key, int64* val) override {
+  Status ReadScalar(StringPiece key, int64* val) override {
     return ReadScalarInternal(key, val);
   }
 
-  Status ReadScalar(absl::string_view key, string* val) override {
+  Status ReadScalar(StringPiece key, string* val) override {
     return ReadScalarInternal(key, val);
   }
 
-  Status ReadTensor(absl::string_view key, Tensor* val) override {
+  Status ReadTensor(StringPiece key, Tensor* val) override {
     return ReadTensorInternal(key, val);
   }
 
-  bool Contains(absl::string_view key) override {
+  bool Contains(StringPiece key) override {
     return map_.find(string(key)) != map_.end();
   }
 
@@ -247,7 +246,7 @@ class VariantTensorDataReader : public IteratorStateReader {
   }
 
   template <typename T>
-  Status ReadScalarInternal(absl::string_view key, T* val) {
+  Status ReadScalarInternal(StringPiece key, T* val) {
     if (map_.find(string(key)) == map_.end()) {
       return errors::NotFound(key);
     }
@@ -255,7 +254,7 @@ class VariantTensorDataReader : public IteratorStateReader {
     return Status::OK();
   }
 
-  Status ReadTensorInternal(absl::string_view key, Tensor* val) {
+  Status ReadTensorInternal(StringPiece key, Tensor* val) {
     if (map_.find(string(key)) == map_.end()) {
       return errors::NotFound(key);
     }
@@ -274,15 +273,15 @@ class VariantTensorDataWriter : public IteratorStateWriter {
   // Does not take ownership of data.
   explicit VariantTensorDataWriter(VariantTensorData* data) : data_(data) {}
 
-  Status WriteScalar(absl::string_view key, const int64 val) override {
+  Status WriteScalar(StringPiece key, const int64 val) override {
     return WriteScalarInternal(key, val);
   }
 
-  Status WriteScalar(absl::string_view key, const string& val) override {
+  Status WriteScalar(StringPiece key, const string& val) override {
     return WriteScalarInternal(key, val);
   }
 
-  Status WriteTensor(absl::string_view key, const Tensor& val) override {
+  Status WriteTensor(StringPiece key, const Tensor& val) override {
     return WriteTensorInternal(key, val);
   }
 
@@ -298,13 +297,13 @@ class VariantTensorDataWriter : public IteratorStateWriter {
 
  private:
   template <typename T>
-  Status WriteScalarInternal(absl::string_view key, const T& val) {
+  Status WriteScalarInternal(StringPiece key, const T& val) {
     Tensor val_t = Tensor(DataTypeToEnum<T>::v(), TensorShape({}));
     val_t.scalar<T>()() = val;
     return WriteTensorInternal(key, val_t);
   }
 
-  Status WriteTensorInternal(absl::string_view key, const Tensor& val) {
+  Status WriteTensorInternal(StringPiece key, const Tensor& val) {
     // Write key to the metadata proto. This gets written to `data_`
     // when `Flush()` is called. We do this lazily to avoid multiple
     // serialization calls.
diff --git a/tensorflow/core/kernels/data/writer_ops.cc b/tensorflow/core/kernels/data/writer_ops.cc
index b3f7ab9fd7..3f76695bb1 100644
--- a/tensorflow/core/kernels/data/writer_ops.cc
+++ b/tensorflow/core/kernels/data/writer_ops.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/data/dataset.h"
 #include "tensorflow/core/kernels/data/dataset_utils.h"
@@ -37,8 +36,7 @@ class ToTFRecordOp : public AsyncOpKernel {
 
   template <typename T>
   Status ParseScalarArgument(OpKernelContext* ctx,
-                             const absl::string_view& argument_name,
-                             T* output) {
+                             const StringPiece& argument_name, T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index 94c21f4da3..ae451be7e2 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -16,7 +16,6 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -55,7 +54,7 @@ class DecodeBmpOp : public OpKernel {
                                         contents.shape().DebugString()));
 
     // Start decoding image to get shape details
-    const absl::string_view input = contents.scalar<string>()();
+    const StringPiece input = contents.scalar<string>()();
 
     OP_REQUIRES(context, (32 <= input.size()),
                 errors::InvalidArgument("Incomplete bmp content, requires at "
diff --git a/tensorflow/core/kernels/decode_csv_op.cc b/tensorflow/core/kernels/decode_csv_op.cc
index d2999102b6..6bfb5bd5bc 100644
--- a/tensorflow/core/kernels/decode_csv_op.cc
+++ b/tensorflow/core/kernels/decode_csv_op.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 // See docs in ../ops/parsing_ops.cc.
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -83,7 +82,7 @@ class DecodeCSVOp : public OpKernel {
     }
 
     for (int64 i = 0; i < records_size; ++i) {
-      const absl::string_view record(records_t(i));
+      const StringPiece record(records_t(i));
       std::vector<string> fields;
       ExtractFields(ctx, record, &fields);
       OP_REQUIRES(ctx, fields.size() == out_type_.size(),
@@ -206,7 +205,7 @@ class DecodeCSVOp : public OpKernel {
   bool select_all_cols_;
   string na_value_;
 
-  void ExtractFields(OpKernelContext* ctx, absl::string_view input,
+  void ExtractFields(OpKernelContext* ctx, StringPiece input,
                      std::vector<string>* result) {
     int64 current_idx = 0;
     int64 num_fields_parsed = 0;
diff --git a/tensorflow/core/kernels/decode_image_op.cc b/tensorflow/core/kernels/decode_image_op.cc
index ed1b85042a..2cafa44f37 100644
--- a/tensorflow/core/kernels/decode_image_op.cc
+++ b/tensorflow/core/kernels/decode_image_op.cc
@@ -16,7 +16,6 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -40,7 +39,7 @@ enum FileFormat {
 };
 
 // Classify the contents of a file based on starting bytes (the magic number).
-FileFormat ClassifyFileFormat(absl::string_view data) {
+FileFormat ClassifyFileFormat(StringPiece data) {
   // The 4th byte of JPEG is '\xe0' or '\xe1', so check just the first three
   if (str_util::StartsWith(data, "\xff\xd8\xff")) return kJpgFormat;
   if (str_util::StartsWith(data, "\x89PNG\r\n\x1a\n")) return kPngFormat;
@@ -48,7 +47,7 @@ FileFormat ClassifyFileFormat(absl::string_view data) {
   return kUnknownFormat;
 }
 
-string FileFormatString(FileFormat magic, absl::string_view data) {
+string FileFormatString(FileFormat magic, StringPiece data) {
   switch (magic) {
     case kPngFormat:
       return "PNG";
@@ -153,7 +152,7 @@ class DecodeImageOp : public OpKernel {
                                         contents.shape().DebugString()));
 
     // Determine format
-    const absl::string_view input = contents.scalar<string>()();
+    const StringPiece input = contents.scalar<string>()();
     const auto magic = ClassifyFileFormat(input);
     OP_REQUIRES(
         context,
@@ -184,7 +183,7 @@ class DecodeImageOp : public OpKernel {
     }
   }
 
-  void DecodeJpeg(OpKernelContext* context, absl::string_view input) {
+  void DecodeJpeg(OpKernelContext* context, StringPiece input) {
     OP_REQUIRES(context, channels_ == 0 || channels_ == 1 || channels_ == 3,
                 errors::InvalidArgument(
                     "channels must be 0, 1, or 3 for JPEG, got ", channels_));
@@ -232,7 +231,7 @@ class DecodeImageOp : public OpKernel {
                                 input.size()));
   }
 
-  void DecodePng(OpKernelContext* context, absl::string_view input) {
+  void DecodePng(OpKernelContext* context, StringPiece input) {
     // Start decoding png to get shape details
     png::DecodeContext decode;
     OP_REQUIRES(context,
@@ -288,7 +287,7 @@ class DecodeImageOp : public OpKernel {
     }
   }
 
-  void DecodeGif(OpKernelContext* context, absl::string_view input) {
+  void DecodeGif(OpKernelContext* context, StringPiece input) {
     OP_REQUIRES(context, channels_ == 0 || channels_ == 3,
                 errors::InvalidArgument("channels must be 0 or 3 for GIF, got ",
                                         channels_));
diff --git a/tensorflow/core/kernels/deep_conv2d.cc b/tensorflow/core/kernels/deep_conv2d.cc
index 8d6ab69f62..1aa8c72d66 100644
--- a/tensorflow/core/kernels/deep_conv2d.cc
+++ b/tensorflow/core/kernels/deep_conv2d.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
@@ -82,7 +81,7 @@ static int64 GetDirectConvCost(int filter_rows, int filter_cols, int in_depth,
 static bool ReadBoolFromEnvVar(const char* env_var_name, bool default_val) {
   const char* tf_env_var_val = getenv(env_var_name);
   if (tf_env_var_val != nullptr) {
-    absl::string_view tf_env_var_val_str(tf_env_var_val);
+    StringPiece tf_env_var_val_str(tf_env_var_val);
     if (tf_env_var_val_str == "0") {
       return false;
     }
diff --git a/tensorflow/core/kernels/extract_jpeg_shape_op.cc b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
index e36f8c37e7..60d798af56 100644
--- a/tensorflow/core/kernels/extract_jpeg_shape_op.cc
+++ b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
@@ -16,7 +16,6 @@ limitations under the License.
 // See docs in ../ops/image_ops.cc
 
 #include <memory>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -42,7 +41,7 @@ class ExtractJpegShapeOp : public OpKernel {
     OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()),
                 errors::InvalidArgument("contents must be scalar, got shape ",
                                         contents.shape().DebugString()));
-    const absl::string_view input = contents.scalar<string>()();
+    const StringPiece input = contents.scalar<string>()();
     OP_REQUIRES(context, input.size() <= std::numeric_limits<int>::max(),
                 errors::InvalidArgument("JPEG contents are too large for int: ",
                                         input.size()));
diff --git a/tensorflow/core/kernels/gpu_utils.h b/tensorflow/core/kernels/gpu_utils.h
index a28247c1f4..86146f75f4 100644
--- a/tensorflow/core/kernels/gpu_utils.h
+++ b/tensorflow/core/kernels/gpu_utils.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
 #define TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
 
-#include "absl/strings/string_view.h"
 #if GOOGLE_CUDA
 
 #include <unordered_map>
@@ -121,7 +120,7 @@ class AutoTuneMap {
     }
   };
 
-  string GetActionSummary(absl::string_view action, const Parameters& params,
+  string GetActionSummary(StringPiece action, const Parameters& params,
                           const Config& config) {
     return strings::Printf("autotune_map %s %s: %s -> (%s)", name_.c_str(),
                            string(action).c_str(), params.ToString().c_str(),
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD
index f332c1fbe2..4870d9ae20 100644
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ b/tensorflow/core/kernels/hexagon/BUILD
@@ -40,7 +40,6 @@ tf_cc_test(
         "//tensorflow/core/kernels:remote_fused_graph_ops",
         "//tensorflow/core/kernels:reshape_op",
         "//tensorflow/core/kernels:softmax_op",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -71,7 +70,6 @@ tf_kernel_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:remote_fused_graph_execute_utils",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
index 655b4b255d..40bf5a4dc7 100644
--- a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
 
@@ -154,7 +153,7 @@ GraphTransferUtils::BuildRemoteFusedGraphExecuteInfo(
                      .Attr("Tinputs", input_types)
                      .Attr("Toutputs", output_types)
                      .Attr("serialized_remote_fused_graph_execute_info",
-                           absl::string_view(execute_info.SerializeAsString()));
+                           StringPiece(execute_info.SerializeAsString()));
   CHECK(scope.ok());
   scope.UpdateBuilder(&builder);
   scope.UpdateStatus(builder.Finalize(scope.graph(), &node));
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.cc b/tensorflow/core/kernels/hexagon/graph_transferer.cc
index 345d228561..477e729dcb 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <algorithm>
 #include <cinttypes>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -588,7 +587,7 @@ bool GraphTransferer::HasPaddingAndStrides(const Node& node) {
 }
 
 bool GraphTransferer::NeedsToAddRank(const Node& node) {
-  const absl::string_view op_type(node.type_string());
+  const StringPiece op_type(node.type_string());
   if (op_type == "Transpose" || op_type == "ExpandDims") {
     return true;
   }
@@ -596,7 +595,7 @@ bool GraphTransferer::NeedsToAddRank(const Node& node) {
 }
 
 bool GraphTransferer::IsPadNode(const Node& node) {
-  const absl::string_view op_type(node.type_string());
+  const StringPiece op_type(node.type_string());
   if (op_type == "Pad") {
     return true;
   }
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
index 5d1734e477..765795b1f4 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
@@ -100,7 +99,7 @@ static Output BuildSoftmaxOps(const Scope& scope, const Input& logits) {
 static Output BuildConv2DOps(const Scope& scope, const Input& input,
                              const Input& filter,
                              const gtl::ArraySlice<int>& strides,
-                             const absl::string_view& padding) {
+                             const StringPiece& padding) {
   EXPECT_TRUE(scope.ok());
   auto _input = ops::AsNodeOut(scope, input);
   EXPECT_TRUE(scope.ok());
@@ -124,7 +123,7 @@ static Output BuildConv2DOps(const Scope& scope, const Input& input,
 static Output BuildMaxPoolOps(const Scope& scope, const Input& input,
                               const gtl::ArraySlice<int>& ksize,
                               const gtl::ArraySlice<int>& strides,
-                              const absl::string_view& padding) {
+                              const StringPiece& padding) {
   EXPECT_TRUE(scope.ok());
   auto _input = ops::AsNodeOut(scope, input);
   EXPECT_TRUE(scope.ok());
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
index 9949b3d89c..cc469f6dba 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -411,7 +410,7 @@ Status HexagonControlWrapper::FuseRemoteGraph(
 
 bool HexagonControlWrapper::FillInputNode(const string& node_name,
                                           const Tensor& tensor) {
-  absl::string_view tensor_data = tensor.tensor_data();
+  StringPiece tensor_data = tensor.tensor_data();
   const ConstByteArray ba =
       ConstByteArray(reinterpret_cast<const uint8*>(tensor_data.data()),
                      tensor_data.size(), tensor.dtype());
diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc
index 7a23fef135..b2dc16d5d7 100644
--- a/tensorflow/core/kernels/immutable_constant_op_test.cc
+++ b/tensorflow/core/kernels/immutable_constant_op_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <algorithm>
 #include <tuple>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -65,7 +64,7 @@ class TestFileSystem : public NullFileSystem {
       const string& fname,
       std::unique_ptr<ReadOnlyMemoryRegion>* result) override {
     float val = 0;
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(fname, &scheme, &host, &path);
     // For the tests create in-memory regions with float values equal to the
     // region name.
@@ -149,8 +148,8 @@ Status CreateTempFile(Env* env, float value, uint64 size, string* filename) {
   std::unique_ptr<WritableFile> file;
   TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file));
   for (uint64 i = 0; i < size; ++i) {
-    absl::string_view sp(static_cast<char*>(static_cast<void*>(&value)),
-                         sizeof(value));
+    StringPiece sp(static_cast<char*>(static_cast<void*>(&value)),
+                   sizeof(value));
     TF_RETURN_IF_ERROR(file->Append(sp));
   }
   TF_RETURN_IF_ERROR(file->Close());
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index ed95793285..2088c13586 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include <limits>
-#include "absl/strings/string_view.h"
 
 #define EIGEN_USE_THREADS
 #if GOOGLE_CUDA
@@ -100,7 +99,7 @@ bool TensorList::Decode(const VariantTensorData& data) {
   string metadata;
   data.get_metadata(&metadata);
   uint64 scratch;
-  absl::string_view iter(metadata);
+  StringPiece iter(metadata);
   core::GetVarint64(&iter, &scratch);
   element_dtype = static_cast<DataType>(scratch);
   std::vector<int64> dims;
diff --git a/tensorflow/core/kernels/reduce_join_op.cc b/tensorflow/core/kernels/reduce_join_op.cc
index 6a571dfca2..e2a3b861e5 100644
--- a/tensorflow/core/kernels/reduce_join_op.cc
+++ b/tensorflow/core/kernels/reduce_join_op.cc
@@ -17,13 +17,13 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
@@ -160,7 +160,7 @@ class ReduceJoinOp : public OpKernel {
 
     const int64 reduction_iter_size =
         GetReductionIterSize(reduced_indices, input_shape);
-    gtl::InlinedVector<absl::string_view, 8> curr_strings(reduction_iter_size);
+    gtl::InlinedVector<StringPiece, 8> curr_strings(reduction_iter_size);
     for (int64 output_index = 0; output_index < output_shape.num_elements();
          ++output_index) {
       int64 output_full_index = LinearSubIndexToFullIndex(
diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc b/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
index c0ae6eb4b8..ec769d41f9 100644
--- a/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
+++ b/tensorflow/core/kernels/remote_fused_graph_execute_op_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/const_op.h"
@@ -118,7 +117,7 @@ static Output BuildRemoteFusedGraphExecuteOp(
                      .Attr("Tinputs", input_types)
                      .Attr("Toutputs", output_types)
                      .Attr("serialized_remote_fused_graph_execute_info",
-                           absl::string_view(execute_info.SerializeAsString()));
+                           StringPiece(execute_info.SerializeAsString()));
   CHECK(scope.ok());
   scope.UpdateBuilder(&builder);
   scope.UpdateStatus(builder.Finalize(scope.graph(), &ret));
diff --git a/tensorflow/core/kernels/restore_v2_op_test.cc b/tensorflow/core/kernels/restore_v2_op_test.cc
index 2b67984564..36631570c7 100644
--- a/tensorflow/core/kernels/restore_v2_op_test.cc
+++ b/tensorflow/core/kernels/restore_v2_op_test.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/framework/allocator.h"
@@ -58,7 +57,7 @@ class RestoreV2OpTest : public OpsTestBase {
     TF_ASSERT_OK(InitOp());
   }
 
-  void RunTest(absl::string_view save_op_to_use) {
+  void RunTest(StringPiece save_op_to_use) {
     const string filename =
         io::JoinPath(testing::TmpDir(), "tensor_simple-", save_op_to_use);
     const std::vector<string> tensor_names = {
diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc
index f720ec9926..30cb1e0a7f 100644
--- a/tensorflow/core/kernels/shape_op_test.cc
+++ b/tensorflow/core/kernels/shape_op_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include <functional>
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/ops/array_ops.h"
 #include "tensorflow/cc/ops/const_op.h"
@@ -64,7 +63,7 @@ REGISTER_UNARY_VARIANT_DECODE_FUNCTION(KnownVecSize, "KNOWN VECTOR SIZE TYPE");
 
 REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(KnownVecSize, GetShapeFromKnownVecSize);
 
-static void ExpectHasError(const Status& s, absl::string_view substr) {
+static void ExpectHasError(const Status& s, StringPiece substr) {
   EXPECT_TRUE(str_util::StrContains(s.ToString(), substr))
       << ">>" << s << "<<, expected substring >>" << substr << "<<";
 }
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index 644a75501f..4ebb7fbcc7 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -27,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -92,8 +92,8 @@ string SparseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-absl::string_view SparseTensorColumn<absl::string_view>::Feature(
-    int64 batch, int64 n) const {
+StringPiece SparseTensorColumn<StringPiece>::Feature(int64 batch,
+                                                     int64 n) const {
   const int64 start = feature_start_indices_[batch];
   return values_.vec<string>().data()[start + n];
 }
@@ -130,8 +130,8 @@ string DenseTensorColumn<string>::Feature(int64 batch, int64 n) const {
 }
 
 template <>
-absl::string_view DenseTensorColumn<absl::string_view>::Feature(int64 batch,
-                                                                int64 n) const {
+StringPiece DenseTensorColumn<StringPiece>::Feature(int64 batch,
+                                                    int64 n) const {
   return tensor_.matrix<string>()(batch, n);
 }
 
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
index 7098983fde..29577ebb4e 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
@@ -33,7 +32,7 @@ namespace tensorflow {
 
 namespace {
 
-static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
+static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/core/kernels/sparse_reduce_op.cc b/tensorflow/core/kernels/sparse_reduce_op.cc
index 0908979fa2..a465564739 100644
--- a/tensorflow/core/kernels/sparse_reduce_op.cc
+++ b/tensorflow/core/kernels/sparse_reduce_op.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 // See docs in ../ops/sparse_ops.cc.
 
-#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/framework/op_kernel.h"
@@ -136,7 +135,7 @@ struct SumOp {
   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
       s.device(ctx->eigen_cpu_device()) = v.sum();
   }
-  static absl::string_view Name() {
+  static StringPiece Name() {
       return "sum";
   }
 };
@@ -146,7 +145,7 @@ struct MaxOp {
   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
       s.device(ctx->eigen_cpu_device()) = v.maximum();
   }
-  static absl::string_view Name() {
+  static StringPiece Name() {
       return "max";
   }
 };
diff --git a/tensorflow/core/kernels/spectrogram_test_utils.cc b/tensorflow/core/kernels/spectrogram_test_utils.cc
index 227f225d90..872a6e9d1b 100644
--- a/tensorflow/core/kernels/spectrogram_test_utils.cc
+++ b/tensorflow/core/kernels/spectrogram_test_utils.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <math.h>
 #include <stddef.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -176,9 +175,8 @@ bool WriteDoubleVectorToFile(const string& file_name,
     return false;
   }
   for (int i = 0; i < data.size(); ++i) {
-    if (!file
-             ->Append(absl::string_view(
-                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
+    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
+                                  sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -199,9 +197,8 @@ bool WriteFloatVectorToFile(const string& file_name,
     return false;
   }
   for (int i = 0; i < data.size(); ++i) {
-    if (!file
-             ->Append(absl::string_view(
-                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
+    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
+                                  sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -222,9 +219,8 @@ bool WriteDoubleArrayToFile(const string& file_name, int size,
     return false;
   }
   for (int i = 0; i < size; ++i) {
-    if (!file
-             ->Append(absl::string_view(
-                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
+    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
+                                  sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -245,9 +241,8 @@ bool WriteFloatArrayToFile(const string& file_name, int size,
     return false;
   }
   for (int i = 0; i < size; ++i) {
-    if (!file
-             ->Append(absl::string_view(
-                 reinterpret_cast<const char*>(&(data[i])), sizeof(data[i])))
+    if (!file->Append(StringPiece(reinterpret_cast<const char*>(&(data[i])),
+                                  sizeof(data[i])))
              .ok()) {
       LOG(ERROR) << "Failed to append to file " << file_name;
       return false;
@@ -271,18 +266,16 @@ bool WriteComplexVectorToRawFloatFile(
   for (int i = 0; i < data.size(); ++i) {
     for (int j = 0; j < data[i].size(); ++j) {
       const float real_part(real(data[i][j]));
-      if (!file->Append(
-                   absl::string_view(reinterpret_cast<const char*>(&real_part),
-                                     sizeof(real_part)))
+      if (!file->Append(StringPiece(reinterpret_cast<const char*>(&real_part),
+                                    sizeof(real_part)))
                .ok()) {
         LOG(ERROR) << "Failed to append to file " << file_name;
         return false;
       }
 
       const float imag_part(imag(data[i][j]));
-      if (!file->Append(
-                   absl::string_view(reinterpret_cast<const char*>(&imag_part),
-                                     sizeof(imag_part)))
+      if (!file->Append(StringPiece(reinterpret_cast<const char*>(&imag_part),
+                                    sizeof(imag_part)))
                .ok()) {
         LOG(ERROR) << "Failed to append to file " << file_name;
         return false;
diff --git a/tensorflow/core/kernels/string_join_op.cc b/tensorflow/core/kernels/string_join_op.cc
index 3cbb4c7249..28cca9f448 100644
--- a/tensorflow/core/kernels/string_join_op.cc
+++ b/tensorflow/core/kernels/string_join_op.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -63,7 +62,7 @@ class StringJoinOp : public OpKernel {
                                                      &output_tensor));
     auto output_flat = output_tensor->flat<string>();
 
-    std::vector<absl::string_view> strings(input_list.size());
+    std::vector<StringPiece> strings(input_list.size());
     for (size_t i = 0; i < input_shape.num_elements(); ++i) {
       for (int j = 0; j < input_list.size(); ++j) {
         strings[j] = (is_scalar[j]) ? inputs[j](0) : inputs[j](i);
diff --git a/tensorflow/core/kernels/string_split_op.cc b/tensorflow/core/kernels/string_split_op.cc
index 82ac01caea..3884370a6c 100644
--- a/tensorflow/core/kernels/string_split_op.cc
+++ b/tensorflow/core/kernels/string_split_op.cc
@@ -17,12 +17,12 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -36,11 +36,11 @@ namespace {
 template <typename Predicate>
 std::vector<StringPiece> SplitOnChar(const string& str, const char delim,
                                      Predicate p) {
-  std::vector<absl::string_view> result;
-  absl::string_view text(str);
+  std::vector<StringPiece> result;
+  StringPiece text(str);
   auto f = text.find(delim);
-  while (f != absl::string_view::npos) {
-    absl::string_view token = text.substr(0, f);
+  while (f != StringPiece::npos) {
+    StringPiece token = text.substr(0, f);
     if (p(token)) {
       result.emplace_back(token);
     }
@@ -60,14 +60,13 @@ std::vector<StringPiece> SplitOnChar(const string& str, const char delim,
 template <typename Predicate>
 std::vector<StringPiece> SplitOnCharSet(const string& str,
                                         const string& delim_set, Predicate p) {
-  std::vector<absl::string_view> result;
-  absl::string_view text(str);
-  absl::string_view delims(delim_set);
+  std::vector<StringPiece> result;
+  StringPiece text(str);
+  StringPiece delims(delim_set);
   size_t token_start = 0;
   for (size_t i = 0; i < text.size() + 1; i++) {
-    if ((i == text.size()) ||
-        (delims.find(text[i]) != absl::string_view::npos)) {
-      absl::string_view token(text.data() + token_start, i - token_start);
+    if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
+      StringPiece token(text.data() + token_start, i - token_start);
       if (p(token)) {
         result.emplace_back(token);
       }
@@ -84,13 +83,13 @@ template <typename Predicate>
 std::vector<StringPiece> Split(const string& str, const string& delimiter,
                                Predicate predicate) {
   if (str.empty()) {
-    return std::vector<absl::string_view>();
+    return std::vector<StringPiece>();
   }
   if (delimiter.empty()) {
-    std::vector<absl::string_view> result;
+    std::vector<StringPiece> result;
     result.resize(str.size());
     for (size_t i = 0; i < str.size(); ++i) {
-      result[i] = absl::string_view(str.data() + i, 1);
+      result[i] = StringPiece(str.data() + i, 1);
     }
     return result;
   }
@@ -100,8 +99,8 @@ std::vector<StringPiece> Split(const string& str, const string& delimiter,
   return SplitOnCharSet(str, delimiter, predicate);
 }
 
-std::vector<absl::string_view> SplitV2(const string& str, absl::string_view sep,
-                                       int maxsplit) {
+std::vector<StringPiece> SplitV2(const string& str, StringPiece sep,
+                                 int maxsplit) {
   // This SplitV2 method matches the behavior of python's str.split:
   //   If sep is given, consecutive delimiters are not grouped together
   //   and are deemed to delimit empty strings (for example, '1,,2'.split(',')
@@ -116,16 +115,16 @@ std::vector<absl::string_view> SplitV2(const string& str, absl::string_view sep,
   //   splitting an empty string or a string consisting of just whitespace
   //   with a None separator returns [].
 
-  std::vector<absl::string_view> result;
+  std::vector<StringPiece> result;
 
-  absl::string_view text(str);
+  StringPiece text(str);
   if (maxsplit == 0) {
     result.emplace_back(text);
     return result;
   }
 
   if (sep.empty()) {
-    absl::string_view token;
+    StringPiece token;
     // Remove leading whitespaces.
     str_util::RemoveLeadingWhitespace(&text);
     int split = 0;
@@ -143,13 +142,13 @@ std::vector<absl::string_view> SplitV2(const string& str, absl::string_view sep,
   auto p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
   int split = 0;
   while (p != text.end()) {
-    absl::string_view token = text.substr(0, p - text.begin());
+    StringPiece token = text.substr(0, p - text.begin());
     result.push_back(token);
     text.remove_prefix(token.size());
     text.remove_prefix(sep.size());
     ++split;
     if (maxsplit > 0 && split == maxsplit) {
-      result.push_back(absl::string_view(text));
+      result.push_back(StringPiece(text));
       return result;
     }
     p = std::search(text.begin(), text.end(), sep.begin(), sep.end());
@@ -191,7 +190,7 @@ class StringSplitOp : public OpKernel {
     const auto delimiter_vec = delimiter_tensor->flat<string>();
     const string& delimiter = delimiter_vec(0);
     // Empty delimiter means split the input character by character.
-    std::vector<absl::string_view> tokens;
+    std::vector<StringPiece> tokens;
     // Guess that we'll be unpacking a handful of tokens per example.
     static constexpr int kReserveSize = 4;
     tokens.reserve(batch_size * kReserveSize);
@@ -200,7 +199,7 @@ class StringSplitOp : public OpKernel {
     int64 max_num_entries = 0;
     std::vector<int64> num_indices(batch_size);
     for (int64 i = 0; i < batch_size; ++i) {
-      std::vector<absl::string_view> parts =
+      std::vector<StringPiece> parts =
           skip_empty_ ? Split(input_vec(i), delimiter, str_util::SkipEmpty())
                       : Split(input_vec(i), delimiter, str_util::AllowEmpty());
       int64 n_entries = parts.size();
@@ -263,8 +262,8 @@ class StringSplitV2Op : public OpKernel {
                 errors::InvalidArgument("sep must be a scalar, got shape: ",
                                         sep_tensor->shape().DebugString()));
     const auto sep_vec = sep_tensor->flat<string>();
-    absl::string_view sep(sep_vec(0));
-    std::vector<absl::string_view> tokens;
+    StringPiece sep(sep_vec(0));
+    std::vector<StringPiece> tokens;
     // Guess that we'll be unpacking a handful of tokens per example.
     static constexpr int kReserveSize = 4;
     tokens.reserve(batch_size * kReserveSize);
@@ -273,8 +272,7 @@ class StringSplitV2Op : public OpKernel {
     int64 max_num_entries = 0;
     std::vector<int64> num_indices(batch_size);
     for (int64 i = 0; i < batch_size; ++i) {
-      std::vector<absl::string_view> parts =
-          SplitV2(input_vec(i), sep, maxsplit_);
+      std::vector<StringPiece> parts = SplitV2(input_vec(i), sep, maxsplit_);
       int64 n_entries = parts.size();
       num_indices[i] = n_entries;
       output_size += n_entries;
diff --git a/tensorflow/core/kernels/string_strip_op.cc b/tensorflow/core/kernels/string_strip_op.cc
index 8b7b0bd250..544dca96ba 100644
--- a/tensorflow/core/kernels/string_strip_op.cc
+++ b/tensorflow/core/kernels/string_strip_op.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -42,7 +41,7 @@ class StringStripOp : public OpKernel {
     auto output = output_tensor->flat<string>();
 
     for (int64 i = 0; i < input.size(); ++i) {
-      absl::string_view entry(input(i));
+      StringPiece entry(input(i));
       str_util::RemoveWhitespaceContext(&entry);
       output(i) = string(entry);
     }
diff --git a/tensorflow/core/kernels/string_to_hash_bucket_op.h b/tensorflow/core/kernels/string_to_hash_bucket_op.h
index d2c13719e6..62ef35bbba 100644
--- a/tensorflow/core/kernels/string_to_hash_bucket_op.h
+++ b/tensorflow/core/kernels/string_to_hash_bucket_op.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -27,7 +26,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-template <uint64 hash(absl::string_view)>
+template <uint64 hash(StringPiece)>
 class StringToHashBucketOp : public OpKernel {
  public:
   explicit StringToHashBucketOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
diff --git a/tensorflow/core/kernels/string_util.h b/tensorflow/core/kernels/string_util.h
index 0af3449d8d..d40e93ea33 100644
--- a/tensorflow/core/kernels/string_util.h
+++ b/tensorflow/core/kernels/string_util.h
@@ -15,7 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
 #define TENSORFLOW_CORE_KERNELS_STRING_UTIL_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -50,7 +49,7 @@ int32 UTF8StrLen(const string& string);
 // the end of the string is reached before the requested characters, then the
 // position will point to the end of string and this function will return false.
 template <typename T>
-bool ForwardNUTF8CharPositions(const absl::string_view in,
+bool ForwardNUTF8CharPositions(const StringPiece in,
                                const T num_utf8_chars_to_shift, T* pos) {
   const size_t size = in.size();
   T utf8_chars_counted = 0;
@@ -71,7 +70,7 @@ bool ForwardNUTF8CharPositions(const absl::string_view in,
 // the string is reached before the requested character, then the position will
 // point to the beginning of the string and this function will return false.
 template <typename T>
-bool BackNUTF8CharPositions(const absl::string_view in,
+bool BackNUTF8CharPositions(const StringPiece in,
                             const T num_utf8_chars_to_shift, T* pos) {
   const size_t start = 0;
   T utf8_chars_counted = 0;
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index 030c2ff37c..93c427039d 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <cstdlib>
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op.h"
@@ -29,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/string_util.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/bcast.h"
 
@@ -73,7 +73,7 @@ class SubstrOp : public OpKernel {
         const T len =
             tensorflow::internal::SubtleMustCopy(len_tensor.scalar<T>()());
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
-          absl::string_view in(input(i));
+          StringPiece in(input(i));
           T byte_pos = pos;
           T byte_len = len;
           switch (unit_) {
@@ -90,7 +90,7 @@ class SubstrOp : public OpKernel {
                   errors::InvalidArgument("pos ", pos, " out of range for ",
                                           "string b'", in, "' at index ", i));
           }
-          absl::string_view sub_in = in.substr(byte_pos, byte_len);
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       } else {
@@ -98,7 +98,7 @@ class SubstrOp : public OpKernel {
         auto pos_flat = pos_tensor.flat<T>();
         auto len_flat = len_tensor.flat<T>();
         for (size_t i = 0; i < input_tensor.NumElements(); ++i) {
-          absl::string_view in(input(i));
+          StringPiece in(input(i));
           const T pos = tensorflow::internal::SubtleMustCopy(pos_flat(i));
           const T len = tensorflow::internal::SubtleMustCopy(len_flat(i));
           T byte_pos = pos;
@@ -117,7 +117,7 @@ class SubstrOp : public OpKernel {
                   errors::InvalidArgument("pos ", pos, " out of range for ",
                                           "string b'", in, "' at index ", i));
           }
-          absl::string_view sub_in = in.substr(byte_pos, byte_len);
+          StringPiece sub_in = in.substr(byte_pos, byte_len);
           output(i).assign(sub_in.data(), sub_in.size());
         }
       }
@@ -177,7 +177,7 @@ class SubstrOp : public OpKernel {
 
           // Iterate through broadcasted tensors and perform substr
           for (int i = 0; i < output_shape.dim_size(0); ++i) {
-            absl::string_view in(input_bcast(i));
+            StringPiece in(input_bcast(i));
             const T pos = tensorflow::internal::SubtleMustCopy(pos_bcast(i));
             const T len = tensorflow::internal::SubtleMustCopy(len_bcast(i));
             T byte_pos = pos;
@@ -197,7 +197,7 @@ class SubstrOp : public OpKernel {
                     errors::InvalidArgument("pos ", pos, " out of range for ",
                                             "string b'", in, "' at index ", i));
             }
-            absl::string_view sub_in = in.substr(byte_pos, byte_len);
+            StringPiece sub_in = in.substr(byte_pos, byte_len);
             output(i).assign(sub_in.data(), sub_in.size());
           }
           break;
@@ -241,7 +241,7 @@ class SubstrOp : public OpKernel {
           // Iterate through broadcasted tensors and perform substr
           for (int i = 0; i < output_shape.dim_size(0); ++i) {
             for (int j = 0; j < output_shape.dim_size(1); ++j) {
-              absl::string_view in(input_bcast(i, j));
+              StringPiece in(input_bcast(i, j));
               const T pos =
                   tensorflow::internal::SubtleMustCopy(pos_bcast(i, j));
               const T len =
@@ -263,7 +263,7 @@ class SubstrOp : public OpKernel {
                                               "string b'", in, "' at index (",
                                               i, ", ", j, ")"));
               }
-              absl::string_view sub_in = in.substr(byte_pos, byte_len);
+              StringPiece sub_in = in.substr(byte_pos, byte_len);
               output(i, j).assign(sub_in.data(), sub_in.size());
             }
           }
@@ -280,8 +280,7 @@ class SubstrOp : public OpKernel {
  private:
   // This adjusts the requested position. Note it does not perform any bound
   // checks.
-  static inline T AdjustedPosIndex(const T pos_requested,
-                                   const absl::string_view s) {
+  static inline T AdjustedPosIndex(const T pos_requested, const StringPiece s) {
     if (pos_requested < 0) {
       return s.size() + pos_requested;
     }
@@ -290,7 +289,7 @@ class SubstrOp : public OpKernel {
 
   // Return true if successful; otherwise, return false if the `pos` argument
   // is out of range in the string.
-  static inline bool UpdatePosAndLenForUtf8(const absl::string_view in, T* pos,
+  static inline bool UpdatePosAndLenForUtf8(const StringPiece in, T* pos,
                                             T* len) {
     if (*pos >= 0) {
       return UpdatePositivePosAndLenForUtf8(in, *pos, *len, pos, len);
@@ -299,9 +298,9 @@ class SubstrOp : public OpKernel {
     }
   }
 
-  static bool UpdatePositivePosAndLenForUtf8(const absl::string_view in,
-                                             const T pos, const T len,
-                                             T* char_pos, T* char_len) {
+  static bool UpdatePositivePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
     *char_pos = 0;
     // Determine byte position of the substring start.
     if (!ForwardNUTF8CharPositions(in, pos, char_pos)) {
@@ -320,9 +319,9 @@ class SubstrOp : public OpKernel {
   // This function expects a negative position relative to the end of the
   // string, but will update the character position to a positive number
   // relative to the beginning of the string.
-  static bool UpdateNegativePosAndLenForUtf8(const absl::string_view in,
-                                             const T pos, const T len,
-                                             T* char_pos, T* char_len) {
+  static bool UpdateNegativePosAndLenForUtf8(const StringPiece in, const T pos,
+                                             const T len, T* char_pos,
+                                             T* char_len) {
     // Initially treat the length as position of the end of the substring.
     *char_len = in.size();
     // This is the number of character to skip from the end of the string to
diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
index 3bb8e50e76..a97a71b344 100644
--- a/tensorflow/core/kernels/tensor_array_ops.cc
+++ b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 // See docs in ../ops/data_flow_ops.cc.
 
-#include "absl/strings/string_view.h"
 #define EIGEN_USE_THREADS
 
 #include <limits>
@@ -293,13 +292,13 @@ class TensorArrayGradOp : public TensorArrayCreationOp {
     } else {
       container = "_tensor_arrays";
       const auto& resource = ctx->input(0).flat<ResourceHandle>()(0);
-      if (absl::string_view(resource.name()).substr(0, container.size()) !=
+      if (StringPiece(resource.name()).substr(0, container.size()) !=
           container) {
         return errors::InvalidArgument("Wrong input container. ",
                                        resource.name());
       }
       tensor_array_name =
-          string(absl::string_view(resource.name()).substr(container.size()));
+          string(StringPiece(resource.name()).substr(container.size()));
     }
 
     auto output_handle = tensor_array_output_handle->flat<string>();
diff --git a/tensorflow/core/kernels/word2vec_kernels.cc b/tensorflow/core/kernels/word2vec_kernels.cc
index 10a3a36842..3477445197 100644
--- a/tensorflow/core/kernels/word2vec_kernels.cc
+++ b/tensorflow/core/kernels/word2vec_kernels.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/distribution_sampler.h"
 #include "tensorflow/core/lib/random/philox_random.h"
@@ -33,9 +33,9 @@ const int kSentenceSize = 1000;
 
 namespace {
 
-bool ScanWord(absl::string_view* input, string* word) {
+bool ScanWord(StringPiece* input, string* word) {
   str_util::RemoveLeadingWhitespace(input);
-  absl::string_view tmp;
+  StringPiece tmp;
   if (str_util::ConsumeNonWhitespace(input, &tmp)) {
     word->assign(tmp.data(), tmp.size());
     return true;
@@ -179,7 +179,7 @@ class SkipgramOp : public OpKernel {
   Status Init(Env* env, const string& filename) {
     string data;
     TF_RETURN_IF_ERROR(ReadFileToString(env, filename, &data));
-    absl::string_view input = data;
+    StringPiece input = data;
     string w;
     corpus_size_ = 0;
     std::unordered_map<string, int32> word_freq;
diff --git a/tensorflow/core/lib/core/coding.cc b/tensorflow/core/lib/core/coding.cc
index 60828d423a..50872eef83 100644
--- a/tensorflow/core/lib/core/coding.cc
+++ b/tensorflow/core/lib/core/coding.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/coding.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/byte_order.h"
 
 namespace tensorflow {
@@ -152,14 +151,14 @@ const char* GetVarint32PtrFallback(const char* p, const char* limit,
   return nullptr;
 }
 
-bool GetVarint32(absl::string_view* input, uint32* value) {
+bool GetVarint32(StringPiece* input, uint32* value) {
   const char* p = input->data();
   const char* limit = p + input->size();
   const char* q = GetVarint32Ptr(p, limit, value);
   if (q == nullptr) {
     return false;
   } else {
-    *input = absl::string_view(q, limit - q);
+    *input = StringPiece(q, limit - q);
     return true;
   }
 }
@@ -181,14 +180,14 @@ const char* GetVarint64Ptr(const char* p, const char* limit, uint64* value) {
   return nullptr;
 }
 
-bool GetVarint64(absl::string_view* input, uint64* value) {
+bool GetVarint64(StringPiece* input, uint64* value) {
   const char* p = input->data();
   const char* limit = p + input->size();
   const char* q = GetVarint64Ptr(p, limit, value);
   if (q == nullptr) {
     return false;
   } else {
-    *input = absl::string_view(q, limit - q);
+    *input = StringPiece(q, limit - q);
     return true;
   }
 }
diff --git a/tensorflow/core/lib/core/coding.h b/tensorflow/core/lib/core/coding.h
index 9add2b1afc..4a70ffa619 100644
--- a/tensorflow/core/lib/core/coding.h
+++ b/tensorflow/core/lib/core/coding.h
@@ -21,8 +21,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_CORE_CODING_H_
 #define TENSORFLOW_CORE_LIB_CORE_CODING_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/raw_coding.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -46,8 +46,8 @@ extern void PutFixed64(string* dst, uint64 value);
 extern void PutVarint32(string* dst, uint32 value);
 extern void PutVarint64(string* dst, uint64 value);
 
-extern bool GetVarint32(absl::string_view* input, uint32* value);
-extern bool GetVarint64(absl::string_view* input, uint64* value);
+extern bool GetVarint32(StringPiece* input, uint32* value);
+extern bool GetVarint64(StringPiece* input, uint64* value);
 
 extern const char* GetVarint32Ptr(const char* p, const char* limit, uint32* v);
 extern const char* GetVarint64Ptr(const char* p, const char* limit, uint64* v);
diff --git a/tensorflow/core/lib/core/status.cc b/tensorflow/core/lib/core/status.cc
index f21600328b..cb2a06e620 100644
--- a/tensorflow/core/lib/core/status.cc
+++ b/tensorflow/core/lib/core/status.cc
@@ -15,11 +15,10 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include <stdio.h>
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
-Status::Status(tensorflow::error::Code code, absl::string_view msg) {
+Status::Status(tensorflow::error::Code code, StringPiece msg) {
   assert(code != tensorflow::error::OK);
   state_ = std::unique_ptr<State>(new State);
   state_->code = code;
diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index 4227a8e11b..eb0ff555a5 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -20,8 +20,8 @@ limitations under the License.
 #include <iosfwd>
 #include <memory>
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -42,7 +42,7 @@ class Status {
 
   /// \brief Create a status with the specified error code and msg as a
   /// human-readable string containing more detailed information.
-  Status(tensorflow::error::Code code, absl::string_view msg);
+  Status(tensorflow::error::Code code, tensorflow::StringPiece msg);
 
   /// Copy the specified status.
   Status(const Status& s);
diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc
index 420ae67708..e4b489fe17 100644
--- a/tensorflow/core/lib/core/stringpiece_test.cc
+++ b/tensorflow/core/lib/core/stringpiece_test.cc
@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/lib/core/stringpiece.h"
+
 #include <unordered_map>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -23,24 +24,24 @@ TEST(StringPiece, Ctor) {
   {
     // const char* without size.
     const char* hello = "hello";
-    absl::string_view s20(hello);
+    StringPiece s20(hello);
     EXPECT_TRUE(s20.data() == hello);
     EXPECT_EQ(5, s20.size());
 
     // const char* with size.
-    absl::string_view s21(hello, 4);
+    StringPiece s21(hello, 4);
     EXPECT_TRUE(s21.data() == hello);
     EXPECT_EQ(4, s21.size());
 
     // Not recommended, but valid C++
-    absl::string_view s22(hello, 6);
+    StringPiece s22(hello, 6);
     EXPECT_TRUE(s22.data() == hello);
     EXPECT_EQ(6, s22.size());
   }
 
   {
     string hola = "hola";
-    absl::string_view s30(hola);
+    StringPiece s30(hola);
     EXPECT_TRUE(s30.data() == hola.data());
     EXPECT_EQ(4, s30.size());
 
@@ -48,15 +49,15 @@ TEST(StringPiece, Ctor) {
     hola.push_back('\0');
     hola.append("h2");
     hola.push_back('\0');
-    absl::string_view s31(hola);
+    StringPiece s31(hola);
     EXPECT_TRUE(s31.data() == hola.data());
     EXPECT_EQ(8, s31.size());
   }
 }
 
 TEST(StringPiece, ConversionToString) {
-  EXPECT_EQ("", string(absl::string_view("")));
-  EXPECT_EQ("foo", string(absl::string_view("foo")));
+  EXPECT_EQ("", string(StringPiece("")));
+  EXPECT_EQ("foo", string(StringPiece("foo")));
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/db/BUILD b/tensorflow/core/lib/db/BUILD
index efba522005..7a64306c6e 100644
--- a/tensorflow/core/lib/db/BUILD
+++ b/tensorflow/core/lib/db/BUILD
@@ -16,7 +16,6 @@ cc_library(
         ":snapfn",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
         "@org_sqlite",
     ],
 )
@@ -42,6 +41,5 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/lib/db/sqlite.cc b/tensorflow/core/lib/db/sqlite.cc
index 83c419988f..cf11f3a331 100644
--- a/tensorflow/core/lib/db/sqlite.cc
+++ b/tensorflow/core/lib/db/sqlite.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/lib/db/sqlite.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 
@@ -82,8 +81,7 @@ sqlite3_stmt* PrepareRawOrDie(sqlite3* db, const char* sql) {
   return stmt;
 }
 
-Status SetPragma(Sqlite* db, const char* pragma,
-                 const absl::string_view& value) {
+Status SetPragma(Sqlite* db, const char* pragma, const StringPiece& value) {
   if (value.empty()) return Status::OK();
   for (auto p = value.begin(); p < value.end(); ++p) {
     if (!(('0' <= *p && *p <= '9') || ('A' <= *p && *p <= 'Z') ||
@@ -98,9 +96,9 @@ Status SetPragma(Sqlite* db, const char* pragma,
   return stmt.Step(&unused_done);
 }
 
-const absl::string_view GetEnv(const char* var) {
+const StringPiece GetEnv(const char* var) {
   const char* val = std::getenv(var);
-  return (val == nullptr) ? absl::string_view() : absl::string_view(val);
+  return (val == nullptr) ? StringPiece() : StringPiece(val);
 }
 
 Status EnvPragma(Sqlite* db, const char* pragma, const char* var) {
@@ -162,7 +160,7 @@ Sqlite::~Sqlite() {
   CHECK_EQ(SQLITE_OK, sqlite3_close(db_));
 }
 
-Status Sqlite::Prepare(const absl::string_view& sql, SqliteStatement* stmt) {
+Status Sqlite::Prepare(const StringPiece& sql, SqliteStatement* stmt) {
   SqliteLock lock(*this);
   sqlite3_stmt* ps = nullptr;
   int rc = sqlite3_prepare_v2(db_, sql.data(), static_cast<int>(sql.size()),
diff --git a/tensorflow/core/lib/db/sqlite.h b/tensorflow/core/lib/db/sqlite.h
index d54a5f7ed2..efe97f78d2 100644
--- a/tensorflow/core/lib/db/sqlite.h
+++ b/tensorflow/core/lib/db/sqlite.h
@@ -17,10 +17,10 @@ limitations under the License.
 
 #include <mutex>
 
-#include "absl/strings/string_view.h"
 #include "sqlite3.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
@@ -87,8 +87,8 @@ class LOCKABLE Sqlite : public core::RefCounted {
   /// routine will retry automatically and then possibly fail.
   ///
   /// The returned statement holds a reference to this object.
-  Status Prepare(const absl::string_view& sql, SqliteStatement* stmt);
-  SqliteStatement PrepareOrDie(const absl::string_view& sql);
+  Status Prepare(const StringPiece& sql, SqliteStatement* stmt);
+  SqliteStatement PrepareOrDie(const StringPiece& sql);
 
   /// \brief Returns extended result code of last error.
   ///
@@ -228,22 +228,22 @@ class SqliteStatement {
   ///
   /// When using the unsafe methods, the data must not be changed or
   /// freed until this statement is Reset() or finalized.
-  void BindText(int parameter, const absl::string_view& text) {
+  void BindText(int parameter, const StringPiece& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
                                SQLITE_TRANSIENT, SQLITE_UTF8),
            parameter);
     size_ += text.size();
   }
-  void BindText(const char* parameter, const absl::string_view& text) {
+  void BindText(const char* parameter, const StringPiece& text) {
     BindText(GetParameterIndex(parameter), text);
   }
-  void BindTextUnsafe(int parameter, const absl::string_view& text) {
+  void BindTextUnsafe(int parameter, const StringPiece& text) {
     Update(sqlite3_bind_text64(stmt_, parameter, text.data(), text.size(),
                                SQLITE_STATIC, SQLITE_UTF8),
            parameter);
     size_ += text.size();
   }
-  void BindTextUnsafe(const char* parameter, const absl::string_view& text) {
+  void BindTextUnsafe(const char* parameter, const StringPiece& text) {
     BindTextUnsafe(GetParameterIndex(parameter), text);
   }
 
@@ -251,22 +251,22 @@ class SqliteStatement {
   ///
   /// When using the unsafe methods, the data must not be changed or
   /// freed until this statement is Reset() or finalized.
-  void BindBlob(int parameter, const absl::string_view& blob) {
+  void BindBlob(int parameter, const StringPiece& blob) {
     Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
                                SQLITE_TRANSIENT),
            parameter);
     size_ += blob.size();
   }
-  void BindBlob(const char* parameter, const absl::string_view& blob) {
+  void BindBlob(const char* parameter, const StringPiece& blob) {
     BindBlob(GetParameterIndex(parameter), blob);
   }
-  void BindBlobUnsafe(int parameter, const absl::string_view& blob) {
+  void BindBlobUnsafe(int parameter, const StringPiece& blob) {
     Update(sqlite3_bind_blob64(stmt_, parameter, blob.data(), blob.size(),
                                SQLITE_STATIC),
            parameter);
     size_ += blob.size();
   }
-  void BindBlobUnsafe(const char* parameter, const absl::string_view& text) {
+  void BindBlobUnsafe(const char* parameter, const StringPiece& text) {
     BindBlobUnsafe(GetParameterIndex(parameter), text);
   }
 
@@ -309,7 +309,7 @@ class SqliteStatement {
   /// Empty values are returned as NULL. The returned memory will no
   /// longer be valid the next time Step() or Reset() is called. No NUL
   /// terminator is added.
-  absl::string_view ColumnStringUnsafe(int column) const TF_MUST_USE_RESULT {
+  StringPiece ColumnStringUnsafe(int column) const TF_MUST_USE_RESULT {
     return {static_cast<const char*>(sqlite3_column_blob(stmt_, column)),
             static_cast<size_t>(ColumnSize(column))};
   }
@@ -438,7 +438,7 @@ class SCOPED_LOCKABLE SqliteTransaction {
   EXCLUSIVE_LOCKS_REQUIRED(__VA_ARGS__)
 #define SQLITE_TRANSACTIONS_EXCLUDED(...) LOCKS_EXCLUDED(__VA_ARGS__)
 
-inline SqliteStatement Sqlite::PrepareOrDie(const absl::string_view& sql) {
+inline SqliteStatement Sqlite::PrepareOrDie(const StringPiece& sql) {
   SqliteStatement stmt;
   TF_CHECK_OK(Prepare(sql, &stmt));
   return stmt;
diff --git a/tensorflow/core/lib/db/sqlite_test.cc b/tensorflow/core/lib/db/sqlite_test.cc
index 2826fe73c8..1590055960 100644
--- a/tensorflow/core/lib/db/sqlite_test.cc
+++ b/tensorflow/core/lib/db/sqlite_test.cc
@@ -17,8 +17,8 @@ limitations under the License.
 #include <array>
 #include <climits>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
@@ -169,7 +169,7 @@ TEST_F(SqliteTest, UnsafeColumn) {
   TF_ASSERT_OK(stmt.StepAndReset());
   stmt = db_->PrepareOrDie("SELECT b FROM T ORDER BY a");
   TF_ASSERT_OK(stmt.Step(&is_done_));
-  absl::string_view p = stmt.ColumnStringUnsafe(0);
+  StringPiece p = stmt.ColumnStringUnsafe(0);
   EXPECT_EQ('h', *p.data());
   TF_ASSERT_OK(stmt.Step(&is_done_));
   // This will actually happen, but it's not safe to test this behavior.
diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h
index 8560f517dd..675bab7191 100644
--- a/tensorflow/core/lib/hash/hash.h
+++ b/tensorflow/core/lib/hash/hash.h
@@ -24,7 +24,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -94,12 +94,12 @@ struct hash<string> {
 };
 
 template <>
-struct hash<absl::string_view> {
-  size_t operator()(absl::string_view sp) const {
+struct hash<StringPiece> {
+  size_t operator()(StringPiece sp) const {
     return static_cast<size_t>(Hash64(sp.data(), sp.size()));
   }
 };
-using StringPieceHasher = ::tensorflow::hash<absl::string_view>;
+using StringPieceHasher = ::tensorflow::hash<StringPiece>;
 
 template <typename T, typename U>
 struct hash<std::pair<T, U>> {
diff --git a/tensorflow/core/lib/hash/hash_test.cc b/tensorflow/core/lib/hash/hash_test.cc
index 3223ef81fe..7d58313132 100644
--- a/tensorflow/core/lib/hash/hash_test.cc
+++ b/tensorflow/core/lib/hash/hash_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
@@ -87,10 +86,10 @@ BENCHMARK(BM_Hash32)->Range(1, 1024);
 TEST(StringPieceHasher, Equality) {
   StringPieceHasher hasher;
 
-  absl::string_view s1("foo");
-  absl::string_view s2("bar");
-  absl::string_view s3("baz");
-  absl::string_view s4("zot");
+  StringPiece s1("foo");
+  StringPiece s2("bar");
+  StringPiece s3("baz");
+  StringPiece s4("zot");
 
   EXPECT_TRUE(hasher(s1) != hasher(s2));
   EXPECT_TRUE(hasher(s1) != hasher(s3));
@@ -110,11 +109,11 @@ TEST(StringPieceHasher, HashMap) {
   string s2("bar");
   string s3("baz");
 
-  absl::string_view p1(s1);
-  absl::string_view p2(s2);
-  absl::string_view p3(s3);
+  StringPiece p1(s1);
+  StringPiece p2(s2);
+  StringPiece p3(s3);
 
-  std::unordered_map<absl::string_view, int, StringPieceHasher> map;
+  std::unordered_map<StringPiece, int, StringPieceHasher> map;
 
   map.insert(std::make_pair(p1, 0));
   map.insert(std::make_pair(p2, 1));
diff --git a/tensorflow/core/lib/io/block.cc b/tensorflow/core/lib/io/block.cc
index 3e4ce3c08b..4c30486cc4 100644
--- a/tensorflow/core/lib/io/block.cc
+++ b/tensorflow/core/lib/io/block.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/lib/io/block.h"
 
 #include <algorithm>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/format.h"
@@ -96,11 +95,10 @@ class Block::Iter : public Iterator {
   uint32 current_;
   uint32 restart_index_;  // Index of restart block in which current_ falls
   string key_;
-  absl::string_view value_;
+  StringPiece value_;
   Status status_;
 
-  inline int Compare(const absl::string_view& a,
-                     const absl::string_view& b) const {
+  inline int Compare(const StringPiece& a, const StringPiece& b) const {
     return a.compare(b);
   }
 
@@ -121,7 +119,7 @@ class Block::Iter : public Iterator {
 
     // ParseNextKey() starts at the end of value_, so set value_ accordingly
     uint32 offset = GetRestartPoint(index);
-    value_ = absl::string_view(data_ + offset, 0);
+    value_ = StringPiece(data_ + offset, 0);
   }
 
  public:
@@ -136,11 +134,11 @@ class Block::Iter : public Iterator {
 
   bool Valid() const override { return current_ < restarts_; }
   Status status() const override { return status_; }
-  absl::string_view key() const override {
+  StringPiece key() const override {
     assert(Valid());
     return key_;
   }
-  absl::string_view value() const override {
+  StringPiece value() const override {
     assert(Valid());
     return value_;
   }
@@ -150,7 +148,7 @@ class Block::Iter : public Iterator {
     ParseNextKey();
   }
 
-  void Seek(const absl::string_view& target) override {
+  void Seek(const StringPiece& target) override {
     // Binary search in restart array to find the last restart point
     // with a key < target
     uint32 left = 0;
@@ -166,7 +164,7 @@ class Block::Iter : public Iterator {
         CorruptionError();
         return;
       }
-      absl::string_view mid_key(key_ptr, non_shared);
+      StringPiece mid_key(key_ptr, non_shared);
       if (Compare(mid_key, target) < 0) {
         // Key at "mid" is smaller than "target".  Therefore all
         // blocks before "mid" are uninteresting.
@@ -201,7 +199,7 @@ class Block::Iter : public Iterator {
     restart_index_ = num_restarts_;
     status_ = errors::DataLoss("bad entry in block");
     key_.clear();
-    value_ = absl::string_view();
+    value_ = StringPiece();
   }
 
   bool ParseNextKey() {
@@ -224,7 +222,7 @@ class Block::Iter : public Iterator {
     } else {
       key_.resize(shared);
       key_.append(p, non_shared);
-      value_ = absl::string_view(p + non_shared, value_length);
+      value_ = StringPiece(p + non_shared, value_length);
       while (restart_index_ + 1 < num_restarts_ &&
              GetRestartPoint(restart_index_ + 1) < current_) {
         ++restart_index_;
diff --git a/tensorflow/core/lib/io/block_builder.cc b/tensorflow/core/lib/io/block_builder.cc
index 44e564bf58..b2921c076c 100644
--- a/tensorflow/core/lib/io/block_builder.cc
+++ b/tensorflow/core/lib/io/block_builder.cc
@@ -41,7 +41,6 @@ limitations under the License.
 
 #include <assert.h>
 #include <algorithm>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/io/table_builder.h"
 
@@ -69,7 +68,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
           sizeof(uint32));                     // Restart array length
 }
 
-absl::string_view BlockBuilder::Finish() {
+StringPiece BlockBuilder::Finish() {
   // Append restart array
   CHECK_LE(restarts_.size(), std::numeric_limits<uint32_t>::max());
   for (const auto r : restarts_) {
@@ -78,12 +77,11 @@ absl::string_view BlockBuilder::Finish() {
   // Downcast safe because of the CHECK.
   core::PutFixed32(&buffer_, static_cast<uint32_t>(restarts_.size()));
   finished_ = true;
-  return absl::string_view(buffer_);
+  return StringPiece(buffer_);
 }
 
-void BlockBuilder::Add(const absl::string_view& key,
-                       const absl::string_view& value) {
-  absl::string_view last_key_piece(last_key_);
+void BlockBuilder::Add(const StringPiece& key, const StringPiece& value) {
+  StringPiece last_key_piece(last_key_);
   assert(!finished_);
   assert(counter_ <= options_->block_restart_interval);
   assert(buffer_.empty()  // No values yet?
@@ -119,7 +117,7 @@ void BlockBuilder::Add(const absl::string_view& key,
   // Update state
   last_key_.resize(shared);
   last_key_.append(key.data() + shared, non_shared);
-  assert(absl::string_view(last_key_) == key);
+  assert(StringPiece(last_key_) == key);
   counter_++;
 }
 
diff --git a/tensorflow/core/lib/io/block_builder.h b/tensorflow/core/lib/io/block_builder.h
index 1237f7fb7f..117b6a0bb8 100644
--- a/tensorflow/core/lib/io/block_builder.h
+++ b/tensorflow/core/lib/io/block_builder.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include <stdint.h>
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -36,12 +36,12 @@ class BlockBuilder {
 
   // REQUIRES: Finish() has not been called since the last call to Reset().
   // REQUIRES: key is larger than any previously added key
-  void Add(const absl::string_view& key, const absl::string_view& value);
+  void Add(const StringPiece& key, const StringPiece& value);
 
   // Finish building the block and return a slice that refers to the
   // block contents.  The returned slice will remain valid for the
   // lifetime of this builder or until Reset() is called.
-  absl::string_view Finish();
+  StringPiece Finish();
 
   // Returns an estimate of the current (uncompressed) size of the block
   // we are building.
diff --git a/tensorflow/core/lib/io/format.cc b/tensorflow/core/lib/io/format.cc
index 81c9d59d4a..0c24c660a2 100644
--- a/tensorflow/core/lib/io/format.cc
+++ b/tensorflow/core/lib/io/format.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <limits>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/format.h"
 
 #include "tensorflow/core/lib/core/coding.h"
@@ -36,7 +35,7 @@ void BlockHandle::EncodeTo(string* dst) const {
   core::PutVarint64(dst, size_);
 }
 
-Status BlockHandle::DecodeFrom(absl::string_view* input) {
+Status BlockHandle::DecodeFrom(StringPiece* input) {
   if (core::GetVarint64(input, &offset_) && core::GetVarint64(input, &size_)) {
     return Status::OK();
   } else {
@@ -56,7 +55,7 @@ void Footer::EncodeTo(string* dst) const {
   assert(dst->size() == original_size + kEncodedLength);
 }
 
-Status Footer::DecodeFrom(absl::string_view* input) {
+Status Footer::DecodeFrom(StringPiece* input) {
   const char* magic_ptr = input->data() + kEncodedLength - 8;
   const uint32 magic_lo = core::DecodeFixed32(magic_ptr);
   const uint32 magic_hi = core::DecodeFixed32(magic_ptr + 4);
@@ -73,14 +72,14 @@ Status Footer::DecodeFrom(absl::string_view* input) {
   if (result.ok()) {
     // We skip over any leftover data (just padding for now) in "input"
     const char* end = magic_ptr + 8;
-    *input = absl::string_view(end, input->data() + input->size() - end);
+    *input = StringPiece(end, input->data() + input->size() - end);
   }
   return result;
 }
 
 Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
                  BlockContents* result) {
-  result->data = absl::string_view();
+  result->data = StringPiece();
   result->cachable = false;
   result->heap_allocated = false;
 
@@ -93,7 +92,7 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
   }
 
   char* buf = new char[n + kBlockTrailerSize];
-  absl::string_view contents;
+  StringPiece contents;
   Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
   if (!s.ok()) {
     delete[] buf;
@@ -125,11 +124,11 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
         // Use it directly under the assumption that it will be live
         // while the file is open.
         delete[] buf;
-        result->data = absl::string_view(data, n);
+        result->data = StringPiece(data, n);
         result->heap_allocated = false;
         result->cachable = false;  // Do not double-cache
       } else {
-        result->data = absl::string_view(buf, n);
+        result->data = StringPiece(buf, n);
         result->heap_allocated = true;
         result->cachable = true;
       }
@@ -149,7 +148,7 @@ Status ReadBlock(RandomAccessFile* file, const BlockHandle& handle,
         return errors::DataLoss("corrupted compressed block contents");
       }
       delete[] buf;
-      result->data = absl::string_view(ubuf, ulength);
+      result->data = StringPiece(ubuf, ulength);
       result->heap_allocated = true;
       result->cachable = true;
       break;
diff --git a/tensorflow/core/lib/io/format.h b/tensorflow/core/lib/io/format.h
index 96a92f1209..fd5c3470f6 100644
--- a/tensorflow/core/lib/io/format.h
+++ b/tensorflow/core/lib/io/format.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <stdint.h>
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/table_builder.h"
 
 namespace tensorflow {
@@ -43,7 +43,7 @@ class BlockHandle {
   void set_size(uint64 size) { size_ = size; }
 
   void EncodeTo(string* dst) const;
-  Status DecodeFrom(absl::string_view* input);
+  Status DecodeFrom(StringPiece* input);
 
   // Maximum encoding length of a BlockHandle
   enum { kMaxEncodedLength = 10 + 10 };
@@ -68,7 +68,7 @@ class Footer {
   void set_index_handle(const BlockHandle& h) { index_handle_ = h; }
 
   void EncodeTo(string* dst) const;
-  Status DecodeFrom(absl::string_view* input);
+  Status DecodeFrom(StringPiece* input);
 
   // Encoded length of a Footer.  Note that the serialization of a
   // Footer will always occupy exactly this many bytes.  It consists
@@ -89,7 +89,7 @@ static const uint64 kTableMagicNumber = 0xdb4775248b80fb57ull;
 static const size_t kBlockTrailerSize = 5;
 
 struct BlockContents {
-  absl::string_view data;  // Actual contents of data
+  StringPiece data;     // Actual contents of data
   bool cachable;        // True iff data can be cached
   bool heap_allocated;  // True iff caller should delete[] data.data()
 };
diff --git a/tensorflow/core/lib/io/inputbuffer.cc b/tensorflow/core/lib/io/inputbuffer.cc
index a12cf97a9c..4d35af49b2 100644
--- a/tensorflow/core/lib/io/inputbuffer.cc
+++ b/tensorflow/core/lib/io/inputbuffer.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/inputbuffer.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -32,7 +31,7 @@ InputBuffer::InputBuffer(RandomAccessFile* file, size_t buffer_bytes)
 InputBuffer::~InputBuffer() { delete[] buf_; }
 
 Status InputBuffer::FillBuffer() {
-  absl::string_view data;
+  StringPiece data;
   Status s = file_->Read(file_pos_, size_, &data, buf_);
   if (data.data() != buf_) {
     memmove(buf_, data.data(), data.size());
diff --git a/tensorflow/core/lib/io/inputbuffer_test.cc b/tensorflow/core/lib/io/inputbuffer_test.cc
index 2404d5e678..3608008b30 100644
--- a/tensorflow/core/lib/io/inputbuffer_test.cc
+++ b/tensorflow/core/lib/io/inputbuffer_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/lib/io/inputbuffer.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 
 #include "tensorflow/core/lib/core/coding.h"
@@ -193,31 +192,31 @@ TEST(InputBuffer, ReadNBytes) {
 
     EXPECT_EQ(0, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(3, read, &bytes_read));
-    EXPECT_EQ(absl::string_view(read, 3), "012");
+    EXPECT_EQ(StringPiece(read, 3), "012");
 
     EXPECT_EQ(3, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(absl::string_view(read, 3), "012");
+    EXPECT_EQ(StringPiece(read, 3), "012");
 
     EXPECT_EQ(3, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(4, read, &bytes_read));
-    EXPECT_EQ(absl::string_view(read, 4), "3456");
+    EXPECT_EQ(StringPiece(read, 4), "3456");
 
     EXPECT_EQ(7, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(absl::string_view(read, 4), "3456");
+    EXPECT_EQ(StringPiece(read, 4), "3456");
 
     EXPECT_EQ(7, in.Tell());
     EXPECT_TRUE(errors::IsOutOfRange(in.ReadNBytes(5, read, &bytes_read)));
-    EXPECT_EQ(absl::string_view(read, 3), "789");
+    EXPECT_EQ(StringPiece(read, 3), "789");
 
     EXPECT_EQ(10, in.Tell());
     EXPECT_TRUE(errors::IsOutOfRange(in.ReadNBytes(5, read, &bytes_read)));
-    EXPECT_EQ(absl::string_view(read, 3), "789");
+    EXPECT_EQ(StringPiece(read, 3), "789");
 
     EXPECT_EQ(10, in.Tell());
     TF_ASSERT_OK(in.ReadNBytes(0, read, &bytes_read));
-    EXPECT_EQ(absl::string_view(read, 3), "789");
+    EXPECT_EQ(StringPiece(read, 3), "789");
     EXPECT_EQ(10, in.Tell());
   }
 }
@@ -313,7 +312,7 @@ TEST(InputBuffer, ReadVarint32) {
     for (uint32 number : data) {
       varint.clear();
       core::PutVarint32(&varint, number);
-      TF_CHECK_OK(file->Append(absl::string_view(varint)));
+      TF_CHECK_OK(file->Append(StringPiece(varint)));
     }
   }
 
@@ -352,7 +351,7 @@ TEST(InputBuffer, ReadVarint64) {
     for (uint64 number : data) {
       varint.clear();
       core::PutVarint64(&varint, number);
-      TF_CHECK_OK(file->Append(absl::string_view(varint)));
+      TF_CHECK_OK(file->Append(StringPiece(varint)));
     }
   }
 
diff --git a/tensorflow/core/lib/io/iterator.cc b/tensorflow/core/lib/io/iterator.cc
index 5ba6c2d884..50eaa50b5c 100644
--- a/tensorflow/core/lib/io/iterator.cc
+++ b/tensorflow/core/lib/io/iterator.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/iterator.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace table {
@@ -56,16 +55,16 @@ class EmptyIterator : public Iterator {
  public:
   explicit EmptyIterator(const Status& s) : status_(s) {}
   bool Valid() const override { return false; }
-  void Seek(const absl::string_view& target) override {}
+  void Seek(const StringPiece& target) override {}
   void SeekToFirst() override {}
   void Next() override { assert(false); }
-  absl::string_view key() const override {
+  StringPiece key() const override {
     assert(false);
-    return absl::string_view();
+    return StringPiece();
   }
-  absl::string_view value() const override {
+  StringPiece value() const override {
     assert(false);
-    return absl::string_view();
+    return StringPiece();
   }
   Status status() const override { return status_; }
 
diff --git a/tensorflow/core/lib/io/iterator.h b/tensorflow/core/lib/io/iterator.h
index 8532b0f4e6..2b9326d7dd 100644
--- a/tensorflow/core/lib/io/iterator.h
+++ b/tensorflow/core/lib/io/iterator.h
@@ -26,8 +26,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_IO_ITERATOR_H_
 #define TENSORFLOW_LIB_IO_ITERATOR_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace table {
@@ -48,7 +48,7 @@ class Iterator {
   // Position at the first key in the source that is at or past target.
   // The iterator is Valid() after this call iff the source contains
   // an entry that comes at or past target.
-  virtual void Seek(const absl::string_view& target) = 0;
+  virtual void Seek(const StringPiece& target) = 0;
 
   // Moves to the next entry in the source.  After this call, Valid() is
   // true iff the iterator was not positioned at the last entry in the source.
@@ -59,13 +59,13 @@ class Iterator {
   // the returned slice is valid only until the next modification of
   // the iterator.
   // REQUIRES: Valid()
-  virtual absl::string_view key() const = 0;
+  virtual StringPiece key() const = 0;
 
   // Return the value for the current entry.  The underlying storage for
   // the returned slice is valid only until the next modification of
   // the iterator.
   // REQUIRES: Valid()
-  virtual absl::string_view value() const = 0;
+  virtual StringPiece value() const = 0;
 
   // If an error has occurred, return it.  Else return an ok status.
   virtual Status status() const = 0;
diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc
index f64a7e1d86..b75dcecadf 100644
--- a/tensorflow/core/lib/io/path.cc
+++ b/tensorflow/core/lib/io/path.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include "absl/strings/string_view.h"
 #if !defined(PLATFORM_WINDOWS)
 #include <unistd.h>
 #endif
@@ -36,10 +35,10 @@ namespace tensorflow {
 namespace io {
 namespace internal {
 
-string JoinPathImpl(std::initializer_list<absl::string_view> paths) {
+string JoinPathImpl(std::initializer_list<StringPiece> paths) {
   string result;
 
-  for (absl::string_view path : paths) {
+  for (StringPiece path : paths) {
     if (path.empty()) continue;
 
     if (result.empty()) {
@@ -69,65 +68,62 @@ string JoinPathImpl(std::initializer_list<absl::string_view> paths) {
 // no "/" in the path, the first part of the output is the scheme and host, and
 // the second is the path. If the only "/" in the path is the first character,
 // it is included in the first part of the output.
-std::pair<absl::string_view, absl::string_view> SplitPath(
-    absl::string_view uri) {
-  absl::string_view scheme, host, path;
+std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
+  StringPiece scheme, host, path;
   ParseURI(uri, &scheme, &host, &path);
 
   auto pos = path.rfind('/');
 #ifdef PLATFORM_WINDOWS
-  if (pos == absl::string_view::npos) pos = path.rfind('\\');
+  if (pos == StringPiece::npos) pos = path.rfind('\\');
 #endif
   // Handle the case with no '/' in 'path'.
-  if (pos == absl::string_view::npos)
-    return std::make_pair(
-        absl::string_view(uri.begin(), host.end() - uri.begin()), path);
+  if (pos == StringPiece::npos)
+    return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
+                          path);
 
   // Handle the case with a single leading '/' in 'path'.
   if (pos == 0)
     return std::make_pair(
-        absl::string_view(uri.begin(), path.begin() + 1 - uri.begin()),
-        absl::string_view(path.data() + 1, path.size() - 1));
+        StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
+        StringPiece(path.data() + 1, path.size() - 1));
 
   return std::make_pair(
-      absl::string_view(uri.begin(), path.begin() + pos - uri.begin()),
-      absl::string_view(path.data() + pos + 1, path.size() - (pos + 1)));
+      StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
+      StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
 }
 
 // Return the parts of the basename of path, split on the final ".".
 // If there is no "." in the basename or "." is the final character in the
 // basename, the second value will be empty.
-std::pair<absl::string_view, absl::string_view> SplitBasename(
-    absl::string_view path) {
+std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
   path = Basename(path);
 
   auto pos = path.rfind('.');
-  if (pos == absl::string_view::npos)
-    return std::make_pair(path,
-                          absl::string_view(path.data() + path.size(), 0));
+  if (pos == StringPiece::npos)
+    return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
   return std::make_pair(
-      absl::string_view(path.data(), pos),
-      absl::string_view(path.data() + pos + 1, path.size() - (pos + 1)));
+      StringPiece(path.data(), pos),
+      StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
 }
 }  // namespace internal
 
-bool IsAbsolutePath(absl::string_view path) {
+bool IsAbsolutePath(StringPiece path) {
   return !path.empty() && path[0] == '/';
 }
 
-absl::string_view Dirname(absl::string_view path) {
+StringPiece Dirname(StringPiece path) {
   return internal::SplitPath(path).first;
 }
 
-absl::string_view Basename(absl::string_view path) {
+StringPiece Basename(StringPiece path) {
   return internal::SplitPath(path).second;
 }
 
-absl::string_view Extension(absl::string_view path) {
+StringPiece Extension(StringPiece path) {
   return internal::SplitBasename(path).second;
 }
 
-string CleanPath(absl::string_view unclean_path) {
+string CleanPath(StringPiece unclean_path) {
   string path(unclean_path);
   const char* src = path.c_str();
   string::iterator dst = path.begin();
@@ -208,8 +204,8 @@ string CleanPath(absl::string_view unclean_path) {
   return path;
 }
 
-void ParseURI(absl::string_view remaining, absl::string_view* scheme,
-              absl::string_view* host, absl::string_view* path) {
+void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
+              StringPiece* path) {
   // 0. Parse scheme
   // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
   // TODO(keveman): Allow "+" and "-" in the scheme.
@@ -221,8 +217,8 @@ void ParseURI(absl::string_view remaining, absl::string_view* scheme,
            .OneLiteral("://")
            .GetResult(&remaining, scheme)) {
     // If there's no scheme, assume the entire string is a path.
-    *scheme = absl::string_view(remaining.begin(), 0);
-    *host = absl::string_view(remaining.begin(), 0);
+    *scheme = StringPiece(remaining.begin(), 0);
+    *host = StringPiece(remaining.begin(), 0);
     *path = remaining;
     return;
   }
@@ -231,7 +227,7 @@ void ParseURI(absl::string_view remaining, absl::string_view* scheme,
   if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
     // No path, so the rest of the URI is the host.
     *host = remaining;
-    *path = absl::string_view(remaining.end(), 0);
+    *path = StringPiece(remaining.end(), 0);
     return;
   }
 
@@ -239,8 +235,7 @@ void ParseURI(absl::string_view remaining, absl::string_view* scheme,
   *path = remaining;
 }
 
-string CreateURI(absl::string_view scheme, absl::string_view host,
-                 absl::string_view path) {
+string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
   if (scheme.empty()) {
     return string(path);
   }
diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h
index 288b6ee951..38fb0c5d86 100644
--- a/tensorflow/core/lib/io/path.h
+++ b/tensorflow/core/lib/io/path.h
@@ -16,13 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_PATH_H_
 #define TENSORFLOW_CORE_LIB_IO_PATH_H_
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace io {
 namespace internal {
-string JoinPathImpl(std::initializer_list<absl::string_view> paths);
+string JoinPathImpl(std::initializer_list<tensorflow::StringPiece> paths);
 }
 
 // Utility routines for processing filenames
@@ -49,20 +49,20 @@ string JoinPath(const T&... args) {
 #endif /* SWIG */
 
 // Return true if path is absolute.
-bool IsAbsolutePath(absl::string_view path);
+bool IsAbsolutePath(tensorflow::StringPiece path);
 
 // Returns the part of the path before the final "/".  If there is a single
 // leading "/" in the path, the result will be the leading "/".  If there is
 // no "/" in the path, the result is the empty prefix of the input.
-absl::string_view Dirname(absl::string_view path);
+tensorflow::StringPiece Dirname(tensorflow::StringPiece path);
 
 // Returns the part of the path after the final "/".  If there is no
 // "/" in the path, the result is the same as the input.
-absl::string_view Basename(absl::string_view path);
+tensorflow::StringPiece Basename(tensorflow::StringPiece path);
 
 // Returns the part of the basename of path after the final ".".  If
 // there is no "." in the basename, the result is empty.
-absl::string_view Extension(absl::string_view path);
+tensorflow::StringPiece Extension(tensorflow::StringPiece path);
 
 // Collapse duplicate "/"s, resolve ".." and "." path elements, remove
 // trailing "/".
@@ -71,7 +71,7 @@ absl::string_view Extension(absl::string_view path);
 // invoke any system calls (getcwd(2)) in order to resolve relative
 // paths with respect to the actual working directory.  That is, this is purely
 // string manipulation, completely independent of process state.
-string CleanPath(absl::string_view path);
+string CleanPath(tensorflow::StringPiece path);
 
 // Populates the scheme, host, and path from a URI. scheme, host, and path are
 // guaranteed by this function to point into the contents of uri, even if
@@ -81,13 +81,13 @@ string CleanPath(absl::string_view path);
 // - If the URI is invalid, scheme and host are set to empty strings and the
 //   passed string is assumed to be a path
 // - If the URI omits the path (e.g. file://host), then the path is left empty.
-void ParseURI(absl::string_view uri, absl::string_view* scheme,
-              absl::string_view* host, absl::string_view* path);
+void ParseURI(tensorflow::StringPiece uri, tensorflow::StringPiece* scheme,
+              tensorflow::StringPiece* host, tensorflow::StringPiece* path);
 
 // Creates a URI from a scheme, host, and path. If the scheme is empty, we just
 // return the path.
-string CreateURI(absl::string_view scheme, absl::string_view host,
-                 absl::string_view path);
+string CreateURI(tensorflow::StringPiece scheme, tensorflow::StringPiece host,
+                 tensorflow::StringPiece path);
 
 // Creates a temporary file name with an extension.
 string GetTempFilename(const string& extension);
diff --git a/tensorflow/core/lib/io/path_test.cc b/tensorflow/core/lib/io/path_test.cc
index e22b03f0b0..0090b9100c 100644
--- a/tensorflow/core/lib/io/path_test.cc
+++ b/tensorflow/core/lib/io/path_test.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/path.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -102,8 +101,8 @@ TEST(PathTest, CleanPath) {
 
 #define EXPECT_PARSE_URI(uri, scheme, host, path)  \
   do {                                             \
-    absl::string_view u(uri);                      \
-    absl::string_view s, h, p;                     \
+    StringPiece u(uri);                            \
+    StringPiece s, h, p;                           \
     ParseURI(u, &s, &h, &p);                       \
     EXPECT_EQ(scheme, s);                          \
     EXPECT_EQ(host, h);                            \
diff --git a/tensorflow/core/lib/io/proto_encode_helper.h b/tensorflow/core/lib/io/proto_encode_helper.h
index 7e989e921f..34905520f1 100644
--- a/tensorflow/core/lib/io/proto_encode_helper.h
+++ b/tensorflow/core/lib/io/proto_encode_helper.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_PROTO_ENCODE_HELPER_H_
 #define TENSORFLOW_CORE_LIB_IO_PROTO_ENCODE_HELPER_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 // A helper class for appending various kinds of values in protocol
@@ -48,7 +48,7 @@ class ProtoEncodeHelper {
     Encode32(combine(tag, WIRETYPE_VARINT));
     EncodeBool(v);
   }
-  void WriteString(int tag, absl::string_view v) {
+  void WriteString(int tag, StringPiece v) {
     Encode32(combine(tag, WIRETYPE_LENGTH_DELIMITED));
     Encode32(v.size());
     EncodeBytes(v.data(), v.size());
@@ -57,7 +57,7 @@ class ProtoEncodeHelper {
     Encode32(combine(tag, WIRETYPE_LENGTH_DELIMITED));
     Encode32(len);
   }
-  void WriteRawBytes(absl::string_view v) { EncodeBytes(v.data(), v.size()); }
+  void WriteRawBytes(StringPiece v) { EncodeBytes(v.data(), v.size()); }
 
  private:
   // Note: this module's behavior must match the protocol buffer wire encoding
diff --git a/tensorflow/core/lib/io/random_inputstream.cc b/tensorflow/core/lib/io/random_inputstream.cc
index 4d9fe75497..e85367df9c 100644
--- a/tensorflow/core/lib/io/random_inputstream.cc
+++ b/tensorflow/core/lib/io/random_inputstream.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include <memory>
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -38,7 +37,7 @@ Status RandomAccessInputStream::ReadNBytes(int64 bytes_to_read,
   result->clear();
   result->resize(bytes_to_read);
   char* result_buffer = &(*result)[0];
-  absl::string_view data;
+  StringPiece data;
   Status s = file_->Read(pos_, bytes_to_read, &data, result_buffer);
   if (data.data() != result_buffer) {
     memmove(result_buffer, data.data(), data.size());
@@ -62,7 +61,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
   // Try to read 1 bytes first, if we could complete the read then EOF is
   // not reached yet and we could return.
   if (bytes_to_skip > 0) {
-    absl::string_view data;
+    StringPiece data;
     Status s = file_->Read(pos_ + bytes_to_skip - 1, 1, &data, scratch.get());
     if ((s.ok() || errors::IsOutOfRange(s)) && data.size() == 1) {
       pos_ += bytes_to_skip;
@@ -72,7 +71,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
   // Read kDefaultSkipSize at a time till bytes_to_skip.
   while (bytes_to_skip > 0) {
     int64 bytes_to_read = std::min<int64>(kMaxSkipSize, bytes_to_skip);
-    absl::string_view data;
+    StringPiece data;
     Status s = file_->Read(pos_, bytes_to_read, &data, scratch.get());
     if (s.ok() || errors::IsOutOfRange(s)) {
       pos_ += data.size();
diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc
index 0aead25457..2c6db2487e 100644
--- a/tensorflow/core/lib/io/record_writer.cc
+++ b/tensorflow/core/lib/io/record_writer.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/record_writer.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
 #include "tensorflow/core/lib/io/compression.h"
@@ -89,7 +88,7 @@ RecordWriter::~RecordWriter() {
   }
 }
 
-Status RecordWriter::WriteRecord(absl::string_view data) {
+Status RecordWriter::WriteRecord(StringPiece data) {
   if (dest_ == nullptr) {
     return Status(::tensorflow::error::FAILED_PRECONDITION,
                   "Writer not initialized or previously closed");
@@ -103,9 +102,9 @@ Status RecordWriter::WriteRecord(absl::string_view data) {
   char footer[kFooterSize];
   PopulateHeader(header, data.data(), data.size());
   PopulateFooter(footer, data.data(), data.size());
-  TF_RETURN_IF_ERROR(dest_->Append(absl::string_view(header, sizeof(header))));
+  TF_RETURN_IF_ERROR(dest_->Append(StringPiece(header, sizeof(header))));
   TF_RETURN_IF_ERROR(dest_->Append(data));
-  return dest_->Append(absl::string_view(footer, sizeof(footer)));
+  return dest_->Append(StringPiece(footer, sizeof(footer)));
 }
 
 Status RecordWriter::Close() {
diff --git a/tensorflow/core/lib/io/record_writer.h b/tensorflow/core/lib/io/record_writer.h
index d797ab82f3..1212e1fafb 100644
--- a/tensorflow/core/lib/io/record_writer.h
+++ b/tensorflow/core/lib/io/record_writer.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_
 #define TENSORFLOW_CORE_LIB_IO_RECORD_WRITER_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
 #if !defined(IS_SLIM_BUILD)
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
@@ -69,7 +69,7 @@ class RecordWriter {
   // implicit Close() call in the destructor.
   ~RecordWriter();
 
-  Status WriteRecord(absl::string_view slice);
+  Status WriteRecord(StringPiece slice);
 
   // Flushes any buffered data held by underlying containers of the
   // RecordWriter to the WritableFile. Does *not* flush the
diff --git a/tensorflow/core/lib/io/recordio_test.cc b/tensorflow/core/lib/io/recordio_test.cc
index eda3316381..946d7188d3 100644
--- a/tensorflow/core/lib/io/recordio_test.cc
+++ b/tensorflow/core/lib/io/recordio_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -59,7 +58,7 @@ class StringDest : public WritableFile {
   Status Close() override { return Status::OK(); }
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
-  Status Append(absl::string_view slice) override {
+  Status Append(StringPiece slice) override {
     contents_->append(slice.data(), slice.size());
     return Status::OK();
   }
@@ -73,7 +72,7 @@ class StringSource : public RandomAccessFile {
   explicit StringSource(string* contents)
       : contents_(contents), force_error_(false) {}
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     if (force_error_) {
       force_error_ = false;
@@ -87,7 +86,7 @@ class StringSource : public RandomAccessFile {
     if (contents_->size() < offset + n) {
       n = contents_->size() - offset;
     }
-    *result = absl::string_view(contents_->data() + offset, n);
+    *result = StringPiece(contents_->data() + offset, n);
     return Status::OK();
   }
 
@@ -124,7 +123,7 @@ class RecordioTest : public ::testing::Test {
 
   void Write(const string& msg) {
     ASSERT_TRUE(!reading_) << "Write() after starting to read";
-    TF_ASSERT_OK(writer_->WriteRecord(absl::string_view(msg)));
+    TF_ASSERT_OK(writer_->WriteRecord(StringPiece(msg)));
   }
 
   size_t WrittenBytes() const { return contents_.size(); }
@@ -268,7 +267,7 @@ TEST_F(RecordioTest, NonSequentialReadsWithCompression) {
 }
 
 // Tests of all the error paths in log_reader.cc follow:
-void AssertHasSubstr(absl::string_view s, absl::string_view expected) {
+void AssertHasSubstr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << s << " does not contain " << expected;
 }
diff --git a/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc b/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
index c82b424390..e0918c70a7 100644
--- a/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_buffers_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/inputbuffer.h"
 #include "tensorflow/core/lib/io/snappy/snappy_inputbuffer.h"
@@ -69,7 +68,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
                              compress_output_buf_size);
 
   for (int i = 0; i < num_writes; i++) {
-    TF_RETURN_IF_ERROR(out.Write(absl::string_view(data)));
+    TF_RETURN_IF_ERROR(out.Write(StringPiece(data)));
     if (with_flush) {
       TF_RETURN_IF_ERROR(out.Flush());
     }
@@ -88,7 +87,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
     std::unique_ptr<RandomAccessFile> file_reader;
     TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file_reader));
 
-    absl::string_view data;
+    StringPiece data;
     size_t file_pos = 0;
     size_t bytes_to_read = 256;
     char* scratch = new char[bytes_to_read];
@@ -98,14 +97,14 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
     while ((file_reader->Read(file_pos, bytes_to_read, &data, scratch)).ok()) {
       file_pos += data.size();
       TF_CHECK_OK(
-          corrupt_file_writer->Append(absl::string_view(buffer, buffer_size)));
+          corrupt_file_writer->Append(StringPiece(buffer, buffer_size)));
       memcpy(buffer, data.data(), data.size());
       buffer_size = data.size();
     }
 
     // Drop the last byte. File is now corrupt.
-    TF_CHECK_OK(corrupt_file_writer->Append(
-        absl::string_view(buffer, buffer_size - 1)));
+    TF_CHECK_OK(
+        corrupt_file_writer->Append(StringPiece(buffer, buffer_size - 1)));
     TF_CHECK_OK(corrupt_file_writer->Flush());
     TF_CHECK_OK(corrupt_file_writer->Close());
     delete[] scratch;
@@ -137,7 +136,7 @@ Status TestMultipleWrites(size_t compress_input_buf_size,
 
 static bool SnappyCompressionSupported() {
   string out;
-  absl::string_view in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
   return port::Snappy_Compress(in.data(), in.size(), &out);
 }
 
diff --git a/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
index 35572e6ae1..853d86cb23 100644
--- a/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_inputbuffer.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/snappy/snappy_inputbuffer.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -160,7 +159,7 @@ Status SnappyInputBuffer::ReadFromFile() {
     bytes_to_read -= avail_in_;
     read_location += avail_in_;
   }
-  absl::string_view data;
+  StringPiece data;
   // Try to read enough data to fill up input_buffer_.
   Status s = file_->Read(file_pos_, bytes_to_read, &data, read_location);
   if (data.data() != read_location) {
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index d7c1deeb0b..3c31016732 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/io/snappy/snappy_outputbuffer.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace io {
@@ -31,7 +30,7 @@ SnappyOutputBuffer::SnappyOutputBuffer(WritableFile* file,
       next_out_(output_buffer_.get()),
       avail_out_(output_buffer_bytes) {}
 
-Status SnappyOutputBuffer::Write(absl::string_view data) {
+Status SnappyOutputBuffer::Write(StringPiece data) {
   //
   // The deflated output is accumulated in output_buffer_ and gets written to
   // file as and when needed.
@@ -81,7 +80,7 @@ int32 SnappyOutputBuffer::AvailableInputSpace() const {
   return input_buffer_capacity_ - avail_in_;
 }
 
-void SnappyOutputBuffer::AddToInputBuffer(absl::string_view data) {
+void SnappyOutputBuffer::AddToInputBuffer(StringPiece data) {
   size_t bytes_to_write = data.size();
   DCHECK_LE(bytes_to_write, AvailableInputSpace());
 
@@ -141,7 +140,7 @@ Status SnappyOutputBuffer::DeflateBuffered() {
 Status SnappyOutputBuffer::FlushOutputBufferToFile() {
   size_t bytes_to_write = output_buffer_capacity_ - avail_out_;
   if (bytes_to_write > 0) {
-    Status s = file_->Append(absl::string_view(
+    Status s = file_->Append(StringPiece(
         reinterpret_cast<char*>(output_buffer_.get()), bytes_to_write));
     if (s.ok()) {
       next_out_ = output_buffer_.get();
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
index 7a0a3b0e19..5aea503846 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_SNAPPY_OUTPUTBUFFER_H_
 
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -62,7 +61,7 @@ class SnappyOutputBuffer {
   // to file when the buffer is full.
   //
   // To immediately write contents to file call `Flush()`.
-  Status Write(absl::string_view data);
+  Status Write(StringPiece data);
 
   // Compresses any cached input and writes all output to file. This must be
   // called before the destructor to avoid any data loss.
@@ -71,7 +70,7 @@ class SnappyOutputBuffer {
  private:
   // Appends `data` to `input_buffer_`.
   // Throws if `data.size()` > AvailableInputSpace().
-  void AddToInputBuffer(absl::string_view data);
+  void AddToInputBuffer(StringPiece data);
 
   // Appends `data` to `output_buffer_`. Flushes buffer contents to file when
   // buffer gets full.
diff --git a/tensorflow/core/lib/io/table.cc b/tensorflow/core/lib/io/table.cc
index 2c982f85dd..1ef7bb6ccd 100644
--- a/tensorflow/core/lib/io/table.cc
+++ b/tensorflow/core/lib/io/table.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/table.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/block.h"
@@ -47,7 +46,7 @@ Status Table::Open(const Options& options, RandomAccessFile* file, uint64 size,
   }
 
   char footer_space[Footer::kEncodedLength];
-  absl::string_view footer_input;
+  StringPiece footer_input;
   Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
                         &footer_input, footer_space);
   if (!s.ok()) return s;
@@ -92,14 +91,14 @@ static void DeleteBlock(void* arg, void* ignored) {
 
 // Convert an index iterator value (i.e., an encoded BlockHandle)
 // into an iterator over the contents of the corresponding block.
-Iterator* Table::BlockReader(void* arg, const absl::string_view& index_value) {
+Iterator* Table::BlockReader(void* arg, const StringPiece& index_value) {
   Table* table = reinterpret_cast<Table*>(arg);
   //  Cache* block_cache = table->rep_->options.block_cache;
   Block* block = nullptr;
   //  Cache::Handle* cache_handle = NULL;
 
   BlockHandle handle;
-  absl::string_view input = index_value;
+  StringPiece input = index_value;
   Status s = handle.DecodeFrom(&input);
   // We intentionally allow extra stuff in index_value so that we
   // can add more features in the future.
@@ -127,9 +126,9 @@ Iterator* Table::NewIterator() const {
                              &Table::BlockReader, const_cast<Table*>(this));
 }
 
-Status Table::InternalGet(const absl::string_view& k, void* arg,
-                          void (*saver)(void*, const absl::string_view&,
-                                        const absl::string_view&)) {
+Status Table::InternalGet(const StringPiece& k, void* arg,
+                          void (*saver)(void*, const StringPiece&,
+                                        const StringPiece&)) {
   Status s;
   Iterator* iiter = rep_->index_block->NewIterator();
   iiter->Seek(k);
@@ -150,13 +149,13 @@ Status Table::InternalGet(const absl::string_view& k, void* arg,
   return s;
 }
 
-uint64 Table::ApproximateOffsetOf(const absl::string_view& key) const {
+uint64 Table::ApproximateOffsetOf(const StringPiece& key) const {
   Iterator* index_iter = rep_->index_block->NewIterator();
   index_iter->Seek(key);
   uint64 result;
   if (index_iter->Valid()) {
     BlockHandle handle;
-    absl::string_view input = index_iter->value();
+    StringPiece input = index_iter->value();
     Status s = handle.DecodeFrom(&input);
     if (s.ok()) {
       result = handle.offset();
diff --git a/tensorflow/core/lib/io/table.h b/tensorflow/core/lib/io/table.h
index aa1ae0ecbf..b9c6b8d9d2 100644
--- a/tensorflow/core/lib/io/table.h
+++ b/tensorflow/core/lib/io/table.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_TABLE_H_
 
 #include <stdint.h>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/iterator.h"
 
 namespace tensorflow {
@@ -61,22 +60,21 @@ class Table {
   // bytes, and so includes effects like compression of the underlying data.
   // E.g., the approximate offset of the last key in the table will
   // be close to the file length.
-  uint64 ApproximateOffsetOf(const absl::string_view& key) const;
+  uint64 ApproximateOffsetOf(const StringPiece& key) const;
 
  private:
   struct Rep;
   Rep* rep_;
 
   explicit Table(Rep* rep) { rep_ = rep; }
-  static Iterator* BlockReader(void*, const absl::string_view&);
+  static Iterator* BlockReader(void*, const StringPiece&);
 
   // Calls (*handle_result)(arg, ...) with the entry found after a call
   // to Seek(key).  May not make such a call if filter policy says
   // that key is not present.
-  Status InternalGet(const absl::string_view& key, void* arg,
-                     void (*handle_result)(void* arg,
-                                           const absl::string_view& k,
-                                           const absl::string_view& v));
+  Status InternalGet(const StringPiece& key, void* arg,
+                     void (*handle_result)(void* arg, const StringPiece& k,
+                                           const StringPiece& v));
 
   // No copying allowed
   Table(const Table&);
diff --git a/tensorflow/core/lib/io/table_builder.cc b/tensorflow/core/lib/io/table_builder.cc
index c127f3779e..81333a7b22 100644
--- a/tensorflow/core/lib/io/table_builder.cc
+++ b/tensorflow/core/lib/io/table_builder.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/lib/io/table_builder.h"
 
 #include <assert.h>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
@@ -31,7 +30,7 @@ namespace table {
 
 namespace {
 
-void FindShortestSeparator(string* start, const absl::string_view& limit) {
+void FindShortestSeparator(string* start, const StringPiece& limit) {
   // Find length of common prefix
   size_t min_length = std::min(start->size(), limit.size());
   size_t diff_index = 0;
@@ -48,7 +47,7 @@ void FindShortestSeparator(string* start, const absl::string_view& limit) {
         diff_byte + 1 < static_cast<uint8>(limit[diff_index])) {
       (*start)[diff_index]++;
       start->resize(diff_index + 1);
-      assert(absl::string_view(*start).compare(limit) < 0);
+      assert(StringPiece(*start).compare(limit) < 0);
     }
   }
 }
@@ -116,13 +115,12 @@ TableBuilder::~TableBuilder() {
   delete rep_;
 }
 
-void TableBuilder::Add(const absl::string_view& key,
-                       const absl::string_view& value) {
+void TableBuilder::Add(const StringPiece& key, const StringPiece& value) {
   Rep* r = rep_;
   assert(!r->closed);
   if (!ok()) return;
   if (r->num_entries > 0) {
-    assert(key.compare(absl::string_view(r->last_key)) > 0);
+    assert(key.compare(StringPiece(r->last_key)) > 0);
     // See if this key+value would make our current block overly large.  If
     // so, emit the current block before adding this key/value
     const int kOverlyLargeBlockRatio = 2;
@@ -137,7 +135,7 @@ void TableBuilder::Add(const absl::string_view& key,
     FindShortestSeparator(&r->last_key, key);
     string handle_encoding;
     r->pending_handle.EncodeTo(&handle_encoding);
-    r->index_block.Add(r->last_key, absl::string_view(handle_encoding));
+    r->index_block.Add(r->last_key, StringPiece(handle_encoding));
     r->pending_index_entry = false;
   }
 
@@ -171,9 +169,9 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
   //    crc: uint32
   assert(ok());
   Rep* r = rep_;
-  absl::string_view raw = block->Finish();
+  StringPiece raw = block->Finish();
 
-  absl::string_view block_contents;
+  StringPiece block_contents;
   CompressionType type = r->options.compression;
   // TODO(postrelease): Support more compression options: zlib?
   switch (type) {
@@ -200,7 +198,7 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
   block->Reset();
 }
 
-void TableBuilder::WriteRawBlock(const absl::string_view& block_contents,
+void TableBuilder::WriteRawBlock(const StringPiece& block_contents,
                                  CompressionType type, BlockHandle* handle) {
   Rep* r = rep_;
   handle->set_offset(r->offset);
@@ -212,7 +210,7 @@ void TableBuilder::WriteRawBlock(const absl::string_view& block_contents,
     uint32 crc = crc32c::Value(block_contents.data(), block_contents.size());
     crc = crc32c::Extend(crc, trailer, 1);  // Extend crc to cover block type
     core::EncodeFixed32(trailer + 1, crc32c::Mask(crc));
-    r->status = r->file->Append(absl::string_view(trailer, kBlockTrailerSize));
+    r->status = r->file->Append(StringPiece(trailer, kBlockTrailerSize));
     if (r->status.ok()) {
       r->offset += block_contents.size() + kBlockTrailerSize;
     }
@@ -242,7 +240,7 @@ Status TableBuilder::Finish() {
       FindShortSuccessor(&r->last_key);
       string handle_encoding;
       r->pending_handle.EncodeTo(&handle_encoding);
-      r->index_block.Add(r->last_key, absl::string_view(handle_encoding));
+      r->index_block.Add(r->last_key, StringPiece(handle_encoding));
       r->pending_index_entry = false;
     }
     WriteBlock(&r->index_block, &index_block_handle);
diff --git a/tensorflow/core/lib/io/table_builder.h b/tensorflow/core/lib/io/table_builder.h
index c0c9afbf66..0e37e0a77f 100644
--- a/tensorflow/core/lib/io/table_builder.h
+++ b/tensorflow/core/lib/io/table_builder.h
@@ -25,7 +25,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_LIB_IO_TABLE_BUILDER_H_
 
 #include <stdint.h>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/table_options.h"
 
@@ -49,7 +48,7 @@ class TableBuilder {
   // Add key,value to the table being constructed.
   // REQUIRES: key is after any previously added key in lexicographic order.
   // REQUIRES: Finish(), Abandon() have not been called
-  void Add(const absl::string_view& key, const absl::string_view& value);
+  void Add(const StringPiece& key, const StringPiece& value);
 
   // Advanced operation: writes any buffered key/value pairs to file.
   // Can be used to ensure that two adjacent entries never live in
@@ -83,7 +82,7 @@ class TableBuilder {
  private:
   bool ok() const { return status().ok(); }
   void WriteBlock(BlockBuilder* block, BlockHandle* handle);
-  void WriteRawBlock(const absl::string_view& data, CompressionType,
+  void WriteRawBlock(const StringPiece& data, CompressionType,
                      BlockHandle* handle);
 
   struct Rep;
diff --git a/tensorflow/core/lib/io/table_test.cc b/tensorflow/core/lib/io/table_test.cc
index 4821240964..9cebbf40c6 100644
--- a/tensorflow/core/lib/io/table_test.cc
+++ b/tensorflow/core/lib/io/table_test.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <map>
 #include <string>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/block.h"
 #include "tensorflow/core/lib/io/block_builder.h"
@@ -36,17 +35,17 @@ namespace tensorflow {
 namespace table {
 
 namespace {
-typedef std::pair<absl::string_view, absl::string_view> StringPiecePair;
+typedef std::pair<StringPiece, StringPiece> StringPiecePair;
 }
 
 namespace test {
-static absl::string_view RandomString(random::SimplePhilox* rnd, int len,
-                                      string* dst) {
+static StringPiece RandomString(random::SimplePhilox* rnd, int len,
+                                string* dst) {
   dst->resize(len);
   for (int i = 0; i < len; i++) {
     (*dst)[i] = static_cast<char>(' ' + rnd->Uniform(95));  // ' ' .. '~'
   }
-  return absl::string_view(*dst);
+  return StringPiece(*dst);
 }
 static string RandomKey(random::SimplePhilox* rnd, int len) {
   // Make sure to generate a wide variety of characters so we
@@ -59,9 +58,9 @@ static string RandomKey(random::SimplePhilox* rnd, int len) {
   }
   return result;
 }
-static absl::string_view CompressibleString(random::SimplePhilox* rnd,
-                                            double compressed_fraction,
-                                            size_t len, string* dst) {
+static StringPiece CompressibleString(random::SimplePhilox* rnd,
+                                      double compressed_fraction, size_t len,
+                                      string* dst) {
   int raw = static_cast<int>(len * compressed_fraction);
   if (raw < 1) raw = 1;
   string raw_data;
@@ -73,7 +72,7 @@ static absl::string_view CompressibleString(random::SimplePhilox* rnd,
     dst->append(raw_data);
   }
   dst->resize(len);
-  return absl::string_view(*dst);
+  return StringPiece(*dst);
 }
 }  // namespace test
 
@@ -84,7 +83,7 @@ namespace {
 struct STLLessThan {
   STLLessThan() {}
   bool operator()(const string& a, const string& b) const {
-    return absl::string_view(a).compare(absl::string_view(b)) < 0;
+    return StringPiece(a).compare(StringPiece(b)) < 0;
   }
 };
 }  // namespace
@@ -99,7 +98,7 @@ class StringSink : public WritableFile {
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     contents_.append(data.data(), data.size());
     return Status::OK();
   }
@@ -110,14 +109,14 @@ class StringSink : public WritableFile {
 
 class StringSource : public RandomAccessFile {
  public:
-  explicit StringSource(const absl::string_view& contents)
+  explicit StringSource(const StringPiece& contents)
       : contents_(contents.data(), contents.size()), bytes_read_(0) {}
 
   ~StringSource() override {}
 
   uint64 Size() const { return contents_.size(); }
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     if (offset > contents_.size()) {
       return errors::InvalidArgument("invalid Read offset");
@@ -126,7 +125,7 @@ class StringSource : public RandomAccessFile {
       n = contents_.size() - offset;
     }
     memcpy(scratch, &contents_[offset], n);
-    *result = absl::string_view(scratch, n);
+    *result = StringPiece(scratch, n);
     bytes_read_ += n;
     return Status::OK();
   }
@@ -147,7 +146,7 @@ class Constructor {
   explicit Constructor() : data_(STLLessThan()) {}
   virtual ~Constructor() {}
 
-  void Add(const string& key, const absl::string_view& value) {
+  void Add(const string& key, const StringPiece& value) {
     data_[key] = string(value);
   }
 
@@ -230,7 +229,7 @@ class TableConstructor : public Constructor {
 
   Iterator* NewIterator() const override { return table_->NewIterator(); }
 
-  uint64 ApproximateOffsetOf(const absl::string_view& key) const {
+  uint64 ApproximateOffsetOf(const StringPiece& key) const {
     return table_->ApproximateOffsetOf(key);
   }
 
@@ -348,7 +347,7 @@ class Harness : public ::testing::Test {
           model_iter = data.lower_bound(key);
           if (kVerbose)
             fprintf(stderr, "Seek '%s'\n", str_util::CEscape(key).c_str());
-          iter->Seek(absl::string_view(key));
+          iter->Seek(StringPiece(key));
           ASSERT_EQ(ToStringPiecePair(data, model_iter),
                     ToStringPiecePair(iter));
           break;
@@ -434,7 +433,7 @@ TEST_F(Harness, ZeroRestartPointsInBlock) {
   char data[sizeof(uint32)];
   memset(data, 0, sizeof(data));
   BlockContents contents;
-  contents.data = absl::string_view(data, sizeof(data));
+  contents.data = StringPiece(data, sizeof(data));
   contents.cachable = false;
   contents.heap_allocated = false;
   Block block(contents);
@@ -567,7 +566,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
 
 static bool SnappyCompressionSupported() {
   string out;
-  absl::string_view in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
   return port::Snappy_Compress(in.data(), in.size(), &out);
 }
 
diff --git a/tensorflow/core/lib/io/two_level_iterator.cc b/tensorflow/core/lib/io/two_level_iterator.cc
index 9898d2ab42..ad66ae40d8 100644
--- a/tensorflow/core/lib/io/two_level_iterator.cc
+++ b/tensorflow/core/lib/io/two_level_iterator.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/two_level_iterator.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/block.h"
 #include "tensorflow/core/lib/io/format.h"
 #include "tensorflow/core/lib/io/iterator.h"
@@ -26,7 +25,7 @@ namespace table {
 
 namespace {
 
-typedef Iterator* (*BlockFunction)(void*, const absl::string_view&);
+typedef Iterator* (*BlockFunction)(void*, const StringPiece&);
 
 class TwoLevelIterator : public Iterator {
  public:
@@ -35,18 +34,18 @@ class TwoLevelIterator : public Iterator {
 
   ~TwoLevelIterator() override;
 
-  void Seek(const absl::string_view& target) override;
+  void Seek(const StringPiece& target) override;
   void SeekToFirst() override;
   void Next() override;
 
   bool Valid() const override {
     return (data_iter_ == nullptr) ? false : data_iter_->Valid();
   }
-  absl::string_view key() const override {
+  StringPiece key() const override {
     assert(Valid());
     return data_iter_->key();
   }
-  absl::string_view value() const override {
+  StringPiece value() const override {
     assert(Valid());
     return data_iter_->value();
   }
@@ -92,7 +91,7 @@ TwoLevelIterator::~TwoLevelIterator() {
   delete data_iter_;
 }
 
-void TwoLevelIterator::Seek(const absl::string_view& target) {
+void TwoLevelIterator::Seek(const StringPiece& target) {
   index_iter_->Seek(target);
   InitDataBlock();
   if (data_iter_ != nullptr) data_iter_->Seek(target);
@@ -137,7 +136,7 @@ void TwoLevelIterator::InitDataBlock() {
   if (!index_iter_->Valid()) {
     SetDataIterator(nullptr);
   } else {
-    absl::string_view handle = index_iter_->value();
+    StringPiece handle = index_iter_->value();
     if (data_iter_ != nullptr && handle.compare(data_block_handle_) == 0) {
       // data_iter_ is already constructed with this iterator, so
       // no need to change anything
diff --git a/tensorflow/core/lib/io/two_level_iterator.h b/tensorflow/core/lib/io/two_level_iterator.h
index 7472ec5965..ce972bc68b 100644
--- a/tensorflow/core/lib/io/two_level_iterator.h
+++ b/tensorflow/core/lib/io/two_level_iterator.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_IO_TWO_LEVEL_ITERATOR_H_
 #define TENSORFLOW_LIB_IO_TWO_LEVEL_ITERATOR_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/iterator.h"
 
 namespace tensorflow {
@@ -33,8 +32,7 @@ namespace table {
 // an iterator over the contents of the corresponding block.
 extern Iterator* NewTwoLevelIterator(
     Iterator* index_iter,
-    Iterator* (*block_function)(void* arg,
-                                const absl::string_view& index_value),
+    Iterator* (*block_function)(void* arg, const StringPiece& index_value),
     void* arg);
 
 }  // namespace table
diff --git a/tensorflow/core/lib/io/zlib_buffers_test.cc b/tensorflow/core/lib/io/zlib_buffers_test.cc
index 2ea197e9c8..156c712db8 100644
--- a/tensorflow/core/lib/io/zlib_buffers_test.cc
+++ b/tensorflow/core/lib/io/zlib_buffers_test.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
@@ -76,7 +75,7 @@ void TestAllCombinations(CompressionOptions input_options,
                              output_options);
         TF_ASSERT_OK(out.Init());
 
-        TF_ASSERT_OK(out.Append(absl::string_view(data)));
+        TF_ASSERT_OK(out.Append(StringPiece(data)));
         TF_ASSERT_OK(out.Close());
         TF_ASSERT_OK(file_writer->Flush());
         TF_ASSERT_OK(file_writer->Close());
@@ -125,7 +124,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
   TF_ASSERT_OK(out.Init());
 
   for (int i = 0; i < num_writes; i++) {
-    TF_ASSERT_OK(out.Append(absl::string_view(data)));
+    TF_ASSERT_OK(out.Append(StringPiece(data)));
     if (with_flush) {
       TF_ASSERT_OK(out.Flush());
     }
@@ -177,7 +176,7 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
                        output_options);
   TF_ASSERT_OK(out.Init());
 
-  TF_ASSERT_OK(out.Append(absl::string_view(data)));
+  TF_ASSERT_OK(out.Append(StringPiece(data)));
   TF_ASSERT_OK(out.Close());
   TF_ASSERT_OK(file_writer->Flush());
   TF_ASSERT_OK(file_writer->Close());
@@ -204,7 +203,7 @@ void WriteCompressedFile(Env* env, const string& fname, int input_buf_size,
                        output_options);
   TF_ASSERT_OK(out.Init());
 
-  TF_ASSERT_OK(out.Append(absl::string_view(data)));
+  TF_ASSERT_OK(out.Append(StringPiece(data)));
   TF_ASSERT_OK(out.Close());
   TF_ASSERT_OK(file_writer->Flush());
   TF_ASSERT_OK(file_writer->Close());
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.cc b/tensorflow/core/lib/io/zlib_outputbuffer.cc
index ccede3afe0..cba139e6ad 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.cc
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/io/zlib_outputbuffer.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -73,7 +72,7 @@ int32 ZlibOutputBuffer::AvailableInputSpace() const {
   return input_buffer_capacity_ - z_stream_->avail_in;
 }
 
-void ZlibOutputBuffer::AddToInputBuffer(absl::string_view data) {
+void ZlibOutputBuffer::AddToInputBuffer(StringPiece data) {
   size_t bytes_to_write = data.size();
   CHECK_LE(bytes_to_write, AvailableInputSpace());
 
@@ -133,7 +132,7 @@ Status ZlibOutputBuffer::DeflateBuffered(bool last) {
 Status ZlibOutputBuffer::FlushOutputBufferToFile() {
   uint32 bytes_to_write = output_buffer_capacity_ - z_stream_->avail_out;
   if (bytes_to_write > 0) {
-    Status s = file_->Append(absl::string_view(
+    Status s = file_->Append(StringPiece(
         reinterpret_cast<char*>(z_stream_output_.get()), bytes_to_write));
     if (s.ok()) {
       z_stream_->next_out = z_stream_output_.get();
@@ -144,7 +143,7 @@ Status ZlibOutputBuffer::FlushOutputBufferToFile() {
   return Status::OK();
 }
 
-Status ZlibOutputBuffer::Append(absl::string_view data) {
+Status ZlibOutputBuffer::Append(StringPiece data) {
   // If there is sufficient free space in z_stream_input_ to fit data we
   // add it there and return.
   // If there isn't enough space we deflate the existing contents of
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.h b/tensorflow/core/lib/io/zlib_outputbuffer.h
index 7dfc6e781d..ccad2fda44 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.h
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.h
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/platform/env.h"
@@ -63,7 +62,7 @@ class ZlibOutputBuffer : public WritableFile {
   // to file when the buffer is full.
   //
   // To immediately write contents to file call `Flush()`.
-  Status Append(absl::string_view data) override;
+  Status Append(StringPiece data) override;
 
   // Deflates any cached input and writes all output to file.
   Status Flush() override;
@@ -112,7 +111,7 @@ class ZlibOutputBuffer : public WritableFile {
 
   // Adds `data` to `z_stream_input_`.
   // Throws if `data.size()` > AvailableInputSpace().
-  void AddToInputBuffer(absl::string_view data);
+  void AddToInputBuffer(StringPiece data);
 
   // Returns the total space available in z_input_stream_ buffer.
   int32 AvailableInputSpace() const;
diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.h b/tensorflow/core/lib/jpeg/jpeg_mem.h
index 865983a146..03437a4e78 100644
--- a/tensorflow/core/lib/jpeg/jpeg_mem.h
+++ b/tensorflow/core/lib/jpeg/jpeg_mem.h
@@ -24,7 +24,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/jpeg.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -136,7 +136,7 @@ struct CompressFlags {
   int y_density = 300;
 
   // If not empty, embed this XMP metadata in the image header
-  absl::string_view xmp_metadata;
+  StringPiece xmp_metadata;
 
   // The distance in bytes from one scanline to the other.  Should be at least
   // equal to width*components*sizeof(JSAMPLE).  If 0 is passed, the stride
diff --git a/tensorflow/core/lib/monitoring/collection_registry.cc b/tensorflow/core/lib/monitoring/collection_registry.cc
index 8d9fd5a721..fface033cb 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.cc
+++ b/tensorflow/core/lib/monitoring/collection_registry.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/monitoring/collection_registry.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -46,7 +45,7 @@ void Collector::CollectMetricDescriptor(
   metric_descriptor->name = string(metric_def->name());
   metric_descriptor->description = string(metric_def->description());
 
-  for (const absl::string_view label_name : metric_def->label_descriptions()) {
+  for (const StringPiece label_name : metric_def->label_descriptions()) {
     metric_descriptor->label_names.emplace_back(label_name);
   }
 
diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h
index c1493710db..9e4e1989dd 100644
--- a/tensorflow/core/lib/monitoring/collection_registry.h
+++ b/tensorflow/core/lib/monitoring/collection_registry.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <map>
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
 #include "tensorflow/core/lib/monitoring/metric_def.h"
 #include "tensorflow/core/platform/env.h"
@@ -186,7 +186,7 @@ class CollectionRegistry {
     CollectionFunction collection_function;
     uint64 registration_time_millis;
   };
-  std::map<absl::string_view, CollectionInfo> registry_ GUARDED_BY(mu_);
+  std::map<StringPiece, CollectionInfo> registry_ GUARDED_BY(mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(CollectionRegistry);
 };
diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h
index 2d15547440..bc4365e439 100644
--- a/tensorflow/core/lib/monitoring/metric_def.h
+++ b/tensorflow/core/lib/monitoring/metric_def.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <array>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/summary.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -82,9 +82,9 @@ class AbstractMetricDef {
 
   ValueType value_type() const { return value_type_; }
 
-  absl::string_view name() const { return name_; }
+  StringPiece name() const { return name_; }
 
-  absl::string_view description() const { return description_; }
+  StringPiece description() const { return description_; }
 
   const std::vector<string>& label_descriptions() const {
     return label_descriptions_;
@@ -95,8 +95,7 @@ class AbstractMetricDef {
   friend class MetricDef;
 
   AbstractMetricDef(const MetricKind kind, const ValueType value_type,
-                    const absl::string_view name,
-                    const absl::string_view description,
+                    const StringPiece name, const StringPiece description,
                     const std::vector<string>& label_descriptions)
       : kind_(kind),
         value_type_(value_type),
@@ -128,7 +127,7 @@ template <MetricKind metric_kind, typename Value, int NumLabels>
 class MetricDef : public AbstractMetricDef {
  public:
   template <typename... LabelDesc>
-  MetricDef(const absl::string_view name, const absl::string_view description,
+  MetricDef(const StringPiece name, const StringPiece description,
             const LabelDesc&... label_descriptions)
       : AbstractMetricDef(metric_kind, internal::GetValueType<Value>(), name,
                           description, {label_descriptions...}) {
diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc
index 5aa5809bae..e226a15ccc 100644
--- a/tensorflow/core/lib/png/png_io.cc
+++ b/tensorflow/core/lib/png/png_io.cc
@@ -24,7 +24,6 @@ limitations under the License.
 // NOTE(skal): we don't '#include <setjmp.h>' before png.h as it otherwise
 // provokes a compile error. We instead let png.h include what is needed.
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/png/png_io.h"
 #include "tensorflow/core/platform/byte_order.h"
@@ -130,7 +129,7 @@ void CommonFreeDecode(DecodeContext* context) {
   }
 }
 
-bool DecodeHeader(absl::string_view png_string, int* width, int* height,
+bool DecodeHeader(StringPiece png_string, int* width, int* height,
                   int* components, int* channel_bit_depth,
                   std::vector<std::pair<string, string> >* metadata) {
   DecodeContext context;
@@ -191,7 +190,7 @@ bool DecodeHeader(absl::string_view png_string, int* width, int* height,
   return true;
 }
 
-bool CommonInitDecode(absl::string_view png_string, int desired_channels,
+bool CommonInitDecode(StringPiece png_string, int desired_channels,
                       int desired_channel_bits, DecodeContext* context) {
   CHECK(desired_channel_bits == 8 || desired_channel_bits == 16)
       << "desired_channel_bits = " << desired_channel_bits;
diff --git a/tensorflow/core/lib/png/png_io.h b/tensorflow/core/lib/png/png_io.h
index bbe4325399..c876c5156a 100644
--- a/tensorflow/core/lib/png/png_io.h
+++ b/tensorflow/core/lib/png/png_io.h
@@ -35,7 +35,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/png.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -58,7 +58,7 @@ struct DecodeContext {
   DecodeContext() : png_ptr(NULL), info_ptr(NULL) {}
 };
 
-bool DecodeHeader(absl::string_view png_string, int* width, int* height,
+bool DecodeHeader(StringPiece png_string, int* width, int* height,
                   int* components, int* channel_bit_depth,
                   std::vector<std::pair<string, string> >* metadata);
 
@@ -73,7 +73,7 @@ bool DecodeHeader(absl::string_view png_string, int* width, int* height,
 //
 // desired_channels may be 0 to detected it from the input.
 
-bool CommonInitDecode(absl::string_view png_string, int desired_channels,
+bool CommonInitDecode(StringPiece png_string, int desired_channels,
                       int desired_channel_bits, DecodeContext* context);
 
 bool CommonFinishDecode(png_bytep data, int row_bytes, DecodeContext* context);
diff --git a/tensorflow/core/lib/strings/base64.cc b/tensorflow/core/lib/strings/base64.cc
index cd12afc171..c5a521f18a 100644
--- a/tensorflow/core/lib/strings/base64.cc
+++ b/tensorflow/core/lib/strings/base64.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <cstring>
 #include <memory>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
@@ -74,7 +73,7 @@ Status DecodeThreeChars(const char* codes, char* result) {
 }
 }  // namespace
 
-Status Base64Decode(absl::string_view data, string* decoded) {
+Status Base64Decode(StringPiece data, string* decoded) {
   if (decoded == nullptr) {
     return errors::Internal("'decoded' cannot be nullptr.");
   }
@@ -136,12 +135,11 @@ Status Base64Decode(absl::string_view data, string* decoded) {
   return Status::OK();
 }
 
-Status Base64Encode(absl::string_view source, string* encoded) {
+Status Base64Encode(StringPiece source, string* encoded) {
   return Base64Encode(source, false, encoded);
 }
 
-Status Base64Encode(absl::string_view source, bool with_padding,
-                    string* encoded) {
+Status Base64Encode(StringPiece source, bool with_padding, string* encoded) {
   const char* const base64_chars = kBase64UrlSafeChars;
   if (encoded == nullptr) {
     return errors::Internal("'encoded' cannot be nullptr.");
diff --git a/tensorflow/core/lib/strings/base64.h b/tensorflow/core/lib/strings/base64.h
index 3ea41f9ce3..48a7f42b81 100644
--- a/tensorflow/core/lib/strings/base64.h
+++ b/tensorflow/core/lib/strings/base64.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_B64_H_
 
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -25,14 +24,13 @@ namespace tensorflow {
 /// \brief Converts data into web-safe base64 encoding.
 ///
 /// See https://en.wikipedia.org/wiki/Base64
-Status Base64Encode(absl::string_view data, bool with_padding, string* encoded);
-Status Base64Encode(absl::string_view data,
-                    string* encoded);  // with_padding=false.
+Status Base64Encode(StringPiece data, bool with_padding, string* encoded);
+Status Base64Encode(StringPiece data, string* encoded);  // with_padding=false.
 
 /// \brief Converts data from web-safe base64 encoding.
 ///
 /// See https://en.wikipedia.org/wiki/Base64
-Status Base64Decode(absl::string_view data, string* decoded);
+Status Base64Decode(StringPiece data, string* decoded);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc
index ba7e554499..fff6f1fedc 100644
--- a/tensorflow/core/lib/strings/numbers.cc
+++ b/tensorflow/core/lib/strings/numbers.cc
@@ -24,7 +24,6 @@ limitations under the License.
 #include <locale>
 #include <unordered_map>
 
-#include "absl/strings/string_view.h"
 #include "double-conversion/double-conversion.h"
 
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -212,16 +211,16 @@ size_t DoubleToBuffer(double value, char* buffer) {
 }
 
 namespace {
-char SafeFirstChar(absl::string_view str) {
+char SafeFirstChar(StringPiece str) {
   if (str.empty()) return '\0';
   return str[0];
 }
-void SkipSpaces(absl::string_view* str) {
+void SkipSpaces(StringPiece* str) {
   while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
 }
 }  // namespace
 
-bool safe_strto64(absl::string_view str, int64* value) {
+bool safe_strto64(StringPiece str, int64* value) {
   SkipSpaces(&str);
 
   int64 vlimit = kint64max;
@@ -262,7 +261,7 @@ bool safe_strto64(absl::string_view str, int64* value) {
   return true;
 }
 
-bool safe_strtou64(absl::string_view str, uint64* value) {
+bool safe_strtou64(StringPiece str, uint64* value) {
   SkipSpaces(&str);
   if (!isdigit(SafeFirstChar(str))) return false;
 
@@ -283,7 +282,7 @@ bool safe_strtou64(absl::string_view str, uint64* value) {
   return true;
 }
 
-bool safe_strto32(absl::string_view str, int32* value) {
+bool safe_strto32(StringPiece str, int32* value) {
   SkipSpaces(&str);
 
   int64 vmax = kint32max;
@@ -313,7 +312,7 @@ bool safe_strto32(absl::string_view str, int32* value) {
   return true;
 }
 
-bool safe_strtou32(absl::string_view str, uint32* value) {
+bool safe_strtou32(StringPiece str, uint32* value) {
   SkipSpaces(&str);
   if (!isdigit(SafeFirstChar(str))) return false;
 
@@ -333,7 +332,7 @@ bool safe_strtou32(absl::string_view str, uint32* value) {
   return true;
 }
 
-bool safe_strtof(absl::string_view str, float* value) {
+bool safe_strtof(StringPiece str, float* value) {
   int processed_characters_count = -1;
   auto len = str.size();
 
@@ -346,7 +345,7 @@ bool safe_strtof(absl::string_view str, float* value) {
   return processed_characters_count > 0;
 }
 
-bool safe_strtod(absl::string_view str, double* value) {
+bool safe_strtod(StringPiece str, double* value) {
   int processed_characters_count = -1;
   auto len = str.size();
 
@@ -401,7 +400,7 @@ bool StringToFp(const string& s, Fprint* fp) {
   }
 }
 
-absl::string_view Uint64ToHexString(uint64 v, char* buf) {
+StringPiece Uint64ToHexString(uint64 v, char* buf) {
   static const char* hexdigits = "0123456789abcdef";
   const int num_byte = 16;
   buf[num_byte] = '\0';
@@ -409,10 +408,10 @@ absl::string_view Uint64ToHexString(uint64 v, char* buf) {
     buf[i] = hexdigits[v & 0xf];
     v >>= 4;
   }
-  return absl::string_view(buf, num_byte);
+  return StringPiece(buf, num_byte);
 }
 
-bool HexStringToUint64(const absl::string_view& s, uint64* result) {
+bool HexStringToUint64(const StringPiece& s, uint64* result) {
   uint64 v = 0;
   if (s.empty()) {
     return false;
diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h
index f28cb0c28c..959290ba8c 100644
--- a/tensorflow/core/lib/strings/numbers.h
+++ b/tensorflow/core/lib/strings/numbers.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -84,66 +84,66 @@ bool StringToFp(const string& s, Fprint* fp);
 // Convert a 64-bit fingerprint value to an ASCII representation that
 // is terminated by a '\0'.
 // Buf must point to an array of at least kFastToBufferSize characters
-absl::string_view Uint64ToHexString(uint64 v, char* buf);
+StringPiece Uint64ToHexString(uint64 v, char* buf);
 
 // Attempt to parse a uint64 in the form encoded by FastUint64ToHexString.  If
 // successful, stores the value in *v and returns true.  Otherwise,
 // returns false.
-bool HexStringToUint64(const absl::string_view& s, uint64* v);
+bool HexStringToUint64(const StringPiece& s, uint64* v);
 
 // Convert strings to 32bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strto32(absl::string_view str, int32* value);
+bool safe_strto32(StringPiece str, int32* value);
 
 // Convert strings to unsigned 32bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strtou32(absl::string_view str, uint32* value);
+bool safe_strtou32(StringPiece str, uint32* value);
 
 // Convert strings to 64bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strto64(absl::string_view str, int64* value);
+bool safe_strto64(StringPiece str, int64* value);
 
 // Convert strings to unsigned 64bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
-bool safe_strtou64(absl::string_view str, uint64* value);
+bool safe_strtou64(StringPiece str, uint64* value);
 
 // Convert strings to floating point values.
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 // Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`.
-bool safe_strtof(absl::string_view str, float* value);
+bool safe_strtof(StringPiece str, float* value);
 
 // Convert strings to double precision floating point values.
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 // Returns false on invalid input or if `strlen(value) >= kFastToBufferSize`.
-bool safe_strtod(absl::string_view str, double* value);
+bool safe_strtod(StringPiece str, double* value);
 
-inline bool ProtoParseNumeric(absl::string_view s, int32* value) {
+inline bool ProtoParseNumeric(StringPiece s, int32* value) {
   return safe_strto32(s, value);
 }
 
-inline bool ProtoParseNumeric(absl::string_view s, uint32* value) {
+inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
   return safe_strtou32(s, value);
 }
 
-inline bool ProtoParseNumeric(absl::string_view s, int64* value) {
+inline bool ProtoParseNumeric(StringPiece s, int64* value) {
   return safe_strto64(s, value);
 }
 
-inline bool ProtoParseNumeric(absl::string_view s, uint64* value) {
+inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
   return safe_strtou64(s, value);
 }
 
-inline bool ProtoParseNumeric(absl::string_view s, float* value) {
+inline bool ProtoParseNumeric(StringPiece s, float* value) {
   return safe_strtof(s, value);
 }
 
-inline bool ProtoParseNumeric(absl::string_view s, double* value) {
+inline bool ProtoParseNumeric(StringPiece s, double* value) {
   return safe_strtod(s, value);
 }
 
@@ -151,7 +151,7 @@ inline bool ProtoParseNumeric(absl::string_view s, double* value) {
 // Leading and trailing spaces are allowed.
 // Values may be rounded on over- and underflow.
 template <typename T>
-bool SafeStringToNumeric(absl::string_view s, T* value) {
+bool SafeStringToNumeric(StringPiece s, T* value) {
   return ProtoParseNumeric(s, value);
 }
 
diff --git a/tensorflow/core/lib/strings/numbers_test.cc b/tensorflow/core/lib/strings/numbers_test.cc
index 0e5e59a66b..5b595f9847 100644
--- a/tensorflow/core/lib/strings/numbers_test.cc
+++ b/tensorflow/core/lib/strings/numbers_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <cmath>
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -48,7 +47,7 @@ TEST(Uint64ToHexString, Ints) {
     for (int delta = -1; delta <= 1; delta++) {
       uint64 fp = (1ull << s) + delta;
       char buf[kFastToBufferSize];
-      absl::string_view s = Uint64ToHexString(fp, buf);
+      StringPiece s = Uint64ToHexString(fp, buf);
       uint64 fp2;
       EXPECT_TRUE(HexStringToUint64(s, &fp2));
       EXPECT_EQ(fp, fp2) << s;
@@ -145,11 +144,11 @@ TEST(safe_strto32, Int32s) {
   EXPECT_EQ(false, safe_strto32("-2147483649", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_EQ(true, safe_strto32(absl::string_view("123", 1), &result));
+  EXPECT_EQ(true, safe_strto32(StringPiece("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_EQ(true, safe_strto32(absl::string_view(" -123", 4), &result));
+  EXPECT_EQ(true, safe_strto32(StringPiece(" -123", 4), &result));
   EXPECT_EQ(-12, result);
-  EXPECT_EQ(false, safe_strto32(absl::string_view(nullptr, 0), &result));
+  EXPECT_EQ(false, safe_strto32(StringPiece(nullptr, 0), &result));
 }
 
 TEST(safe_strtou32, UInt32s) {
@@ -178,11 +177,11 @@ TEST(safe_strtou32, UInt32s) {
   EXPECT_FALSE(safe_strtou32("-1", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_TRUE(safe_strtou32(absl::string_view("123", 1), &result));
+  EXPECT_TRUE(safe_strtou32(StringPiece("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_TRUE(safe_strtou32(absl::string_view(" 123", 3), &result));
+  EXPECT_TRUE(safe_strtou32(StringPiece(" 123", 3), &result));
   EXPECT_EQ(12, result);
-  EXPECT_FALSE(safe_strtou32(absl::string_view(nullptr, 0), &result));
+  EXPECT_FALSE(safe_strtou32(StringPiece(nullptr, 0), &result));
 }
 
 TEST(safe_strto64, Int64s) {
@@ -214,11 +213,11 @@ TEST(safe_strto64, Int64s) {
   EXPECT_EQ(false, safe_strto64("-9223372036854775809", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_EQ(true, safe_strto64(absl::string_view("123", 1), &result));
+  EXPECT_EQ(true, safe_strto64(StringPiece("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_EQ(true, safe_strto64(absl::string_view(" -123", 4), &result));
+  EXPECT_EQ(true, safe_strto64(StringPiece(" -123", 4), &result));
   EXPECT_EQ(-12, result);
-  EXPECT_EQ(false, safe_strto64(absl::string_view(nullptr, 0), &result));
+  EXPECT_EQ(false, safe_strto64(StringPiece(nullptr, 0), &result));
 }
 
 TEST(safe_strtou64, UInt64s) {
@@ -249,11 +248,11 @@ TEST(safe_strtou64, UInt64s) {
   EXPECT_FALSE(safe_strtou64("-1", &result));
 
   // Check that the StringPiece's length is respected.
-  EXPECT_TRUE(safe_strtou64(absl::string_view("123", 1), &result));
+  EXPECT_TRUE(safe_strtou64(StringPiece("123", 1), &result));
   EXPECT_EQ(1, result);
-  EXPECT_TRUE(safe_strtou64(absl::string_view(" 123", 3), &result));
+  EXPECT_TRUE(safe_strtou64(StringPiece(" 123", 3), &result));
   EXPECT_EQ(12, result);
-  EXPECT_FALSE(safe_strtou64(absl::string_view(nullptr, 0), &result));
+  EXPECT_FALSE(safe_strtou64(StringPiece(nullptr, 0), &result));
 }
 
 TEST(safe_strtof, Float) {
diff --git a/tensorflow/core/lib/strings/ordered_code.cc b/tensorflow/core/lib/strings/ordered_code.cc
index 5b8ef93431..ef90050b4f 100644
--- a/tensorflow/core/lib/strings/ordered_code.cc
+++ b/tensorflow/core/lib/strings/ordered_code.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <assert.h>
 #include <stddef.h>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -160,7 +160,7 @@ const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start,
 
 // Helper routine to encode "s" and append to "*dest", escaping special
 // characters.
-inline static void EncodeStringFragment(string* dest, absl::string_view s) {
+inline static void EncodeStringFragment(string* dest, StringPiece s) {
   const char* p = s.data();
   const char* limit = p + s.size();
   const char* copy_start = p;
@@ -187,7 +187,7 @@ inline static void EncodeStringFragment(string* dest, absl::string_view s) {
   }
 }
 
-void OrderedCode::WriteString(string* dest, absl::string_view s) {
+void OrderedCode::WriteString(string* dest, StringPiece s) {
   EncodeStringFragment(dest, s);
   AppendBytes(dest, kEscape1_Separator, 2);
 }
@@ -212,7 +212,7 @@ void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) {
 // If parse succeeds, return true, consume encoding from
 // "*src", and if result != NULL append the decoded string to "*result".
 // Otherwise, return false and leave both undefined.
-inline static bool ReadStringInternal(absl::string_view* src, string* result) {
+inline static bool ReadStringInternal(StringPiece* src, string* result) {
   const char* start = src->data();
   const char* string_limit = src->data() + src->size();
 
@@ -267,11 +267,11 @@ inline static bool ReadStringInternal(absl::string_view* src, string* result) {
   return false;
 }
 
-bool OrderedCode::ReadString(absl::string_view* src, string* result) {
+bool OrderedCode::ReadString(StringPiece* src, string* result) {
   return ReadStringInternal(src, result);
 }
 
-bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64* result) {
+bool OrderedCode::ReadNumIncreasing(StringPiece* src, uint64* result) {
   if (src->empty()) {
     return false;  // Not enough bytes
   }
@@ -485,8 +485,7 @@ void OrderedCode::WriteSignedNumIncreasing(string* dest, int64 val) {
   dest->append(begin, len);
 }
 
-bool OrderedCode::ReadSignedNumIncreasing(absl::string_view* src,
-                                          int64* result) {
+bool OrderedCode::ReadSignedNumIncreasing(StringPiece* src, int64* result) {
   if (src->empty()) return false;
   const uint64 xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL;
   const unsigned char first_byte = (*src)[0] ^ (xor_mask & 0xff);
diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h
index 54c7326011..91870cfec6 100644
--- a/tensorflow/core/lib/strings/ordered_code.h
+++ b/tensorflow/core/lib/strings/ordered_code.h
@@ -39,7 +39,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_ORDERED_CODE_H__
 
 #include <string>
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -53,7 +53,7 @@ class OrderedCode {
   // Encoding routines: each one of the following routines append
   // one item to "*dest" in an encoding where larger values are
   // ordered lexicographically after smaller values.
-  static void WriteString(string* dest, absl::string_view str);
+  static void WriteString(string* dest, StringPiece str);
   static void WriteNumIncreasing(string* dest, uint64 num);
   static void WriteSignedNumIncreasing(string* dest, int64 num);
 
@@ -65,9 +65,9 @@ class OrderedCode {
   // result.  In case of string result, the decoded string is appended to
   // "*result".  Returns true if the next item was read successfully, false
   // otherwise.
-  static bool ReadString(absl::string_view* src, string* result);
-  static bool ReadNumIncreasing(absl::string_view* src, uint64* result);
-  static bool ReadSignedNumIncreasing(absl::string_view* src, int64* result);
+  static bool ReadString(StringPiece* src, string* result);
+  static bool ReadNumIncreasing(StringPiece* src, uint64* result);
+  static bool ReadSignedNumIncreasing(StringPiece* src, int64* result);
 
   // Helper for testing: corrupt "*str" by changing the kth item separator
   // in the string.
diff --git a/tensorflow/core/lib/strings/ordered_code_test.cc b/tensorflow/core/lib/strings/ordered_code_test.cc
index 53457b8ce7..ede9f4d390 100644
--- a/tensorflow/core/lib/strings/ordered_code_test.cc
+++ b/tensorflow/core/lib/strings/ordered_code_test.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include <limits>
 #include <vector>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
@@ -47,7 +47,7 @@ string RandomString(random::SimplePhilox* rnd, size_t len) {
 template <typename T>
 void OCWriteIncreasing(string* dest, const T& val);
 template <typename T>
-bool OCReadIncreasing(absl::string_view* src, T* result);
+bool OCReadIncreasing(StringPiece* src, T* result);
 
 // Read/WriteIncreasing<string>
 template <>
@@ -55,7 +55,7 @@ void OCWriteIncreasing<string>(string* dest, const string& val) {
   OrderedCode::WriteString(dest, val);
 }
 template <>
-bool OCReadIncreasing<string>(absl::string_view* src, string* result) {
+bool OCReadIncreasing<string>(StringPiece* src, string* result) {
   return OrderedCode::ReadString(src, result);
 }
 
@@ -65,7 +65,7 @@ void OCWriteIncreasing<uint64>(string* dest, const uint64& val) {
   OrderedCode::WriteNumIncreasing(dest, val);
 }
 template <>
-bool OCReadIncreasing<uint64>(absl::string_view* src, uint64* result) {
+bool OCReadIncreasing<uint64>(StringPiece* src, uint64* result) {
   return OrderedCode::ReadNumIncreasing(src, result);
 }
 
@@ -75,7 +75,7 @@ void OCWriteIncreasing<int64>(string* dest, const int64& val) {
   OrderedCode::WriteSignedNumIncreasing(dest, val);
 }
 template <>
-bool OCReadIncreasing<int64>(absl::string_view* src, int64* result) {
+bool OCReadIncreasing<int64>(StringPiece* src, int64* result) {
   return OrderedCode::ReadSignedNumIncreasing(src, result);
 }
 
@@ -92,7 +92,7 @@ void OCWriteToString(string* result, T val) {
 }
 
 template <typename T>
-bool OCRead(absl::string_view* s, T* val) {
+bool OCRead(StringPiece* s, T* val) {
   return OCReadIncreasing<T>(s, val);
 }
 
@@ -103,12 +103,12 @@ template <typename T>
 T TestRead(const string& a) {
   // gracefully reject any proper prefix of an encoding
   for (int i = 0; i < a.size() - 1; ++i) {
-    absl::string_view s(a.data(), i);
+    StringPiece s(a.data(), i);
     CHECK(!OCRead<T>(&s, nullptr));
     CHECK_EQ(s, a.substr(0, i));
   }
 
-  absl::string_view s(a);
+  StringPiece s(a);
   T v;
   CHECK(OCRead<T>(&s, &v));
   CHECK(s.empty());
@@ -304,7 +304,7 @@ inline string StrNot(const string& s) {
 
 template <typename T>
 void TestInvalidEncoding(const string& s) {
-  absl::string_view p(s);
+  StringPiece p(s);
   EXPECT_FALSE(OCRead<T>(&p, nullptr));
   EXPECT_EQ(s, p);
 }
@@ -338,7 +338,7 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) {
 
     EXPECT_NE(OCWrite<uint64>(0), non_minimal);
 #ifndef NDEBUG
-    absl::string_view s(non_minimal);
+    StringPiece s(non_minimal);
     EXPECT_DEATH(OrderedCode::ReadNumIncreasing(&s, nullptr),
                  "invalid encoding");
 #else
@@ -357,7 +357,7 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) {
 
     EXPECT_NE(OCWrite<int64>(0), non_minimal);
 #ifndef NDEBUG
-    absl::string_view s(non_minimal);
+    StringPiece s(non_minimal);
     EXPECT_DEATH(OrderedCode::ReadSignedNumIncreasing(&s, nullptr),
                  "invalid encoding")
         << n;
@@ -409,7 +409,7 @@ void BM_ReadNum(int n, T multiplier) {
   uint32 index = 0;
   while (n-- > 0) {
     T val;
-    absl::string_view s = values[index++ % kValues];
+    StringPiece s = values[index++ % kValues];
     OCRead<T>(&s, &val);
   }
 }
@@ -446,8 +446,8 @@ TEST(String, EncodeDecode) {
       OCWriteToString<string>(&out, b);
 
       string a2, b2, dummy;
-      absl::string_view s = out;
-      absl::string_view s2 = out;
+      StringPiece s = out;
+      StringPiece s2 = out;
       CHECK(OCRead<string>(&s, &a2));
       CHECK(OCRead<string>(&s2, nullptr));
       CHECK_EQ(s, s2);
@@ -467,9 +467,9 @@ TEST(String, EncodeDecode) {
 }
 
 // 'str' is a string literal that may contain '\0'.
-#define STATIC_STR(str) absl::string_view((str), sizeof(str) - 1)
+#define STATIC_STR(str) StringPiece((str), sizeof(str) - 1)
 
-string EncodeStringIncreasing(absl::string_view value) {
+string EncodeStringIncreasing(StringPiece value) {
   string encoded;
   OrderedCode::WriteString(&encoded, value);
   return encoded;
@@ -523,7 +523,7 @@ TEST(EncodingIsExpected, String) {
     OrderedCode::WriteString(&result, t.first);
     EXPECT_EQ(t.second, result);
 
-    absl::string_view in = result;
+    StringPiece in = result;
     string decoded;
     EXPECT_TRUE(OrderedCode::ReadString(&in, &decoded));
     EXPECT_EQ(t.first, decoded);
@@ -755,7 +755,7 @@ TEST(EncodingIsExpected, Unsigned) {
     OrderedCode::WriteNumIncreasing(&result, num);
     EXPECT_EQ(t.second, result) << std::hex << num;
 
-    absl::string_view in = result;
+    StringPiece in = result;
     uint64 decoded;
     EXPECT_TRUE(OrderedCode::ReadNumIncreasing(&in, &decoded));
     EXPECT_EQ(num, decoded);
@@ -1202,7 +1202,7 @@ TEST(EncodingIsExpected, Signed) {
     OrderedCode::WriteSignedNumIncreasing(&result, num);
     EXPECT_EQ(t.second, result) << std::hex << num;
 
-    absl::string_view in = result;
+    StringPiece in = result;
     int64 decoded;
     EXPECT_TRUE(OrderedCode::ReadSignedNumIncreasing(&in, &decoded));
     EXPECT_EQ(num, decoded);
@@ -1244,7 +1244,7 @@ void BM_ReadString(int n, int len) {
   testing::StartTiming();
   while (n-- > 0) {
     result.clear();
-    absl::string_view s = data;
+    StringPiece s = data;
     OCRead<string>(&s, &result);
   }
 }
diff --git a/tensorflow/core/lib/strings/proto_text_util.cc b/tensorflow/core/lib/strings/proto_text_util.cc
index 906c1150d8..5e9fa24a87 100644
--- a/tensorflow/core/lib/strings/proto_text_util.cc
+++ b/tensorflow/core/lib/strings/proto_text_util.cc
@@ -14,13 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/strings/proto_text_util.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace strings {
 
 bool ProtoParseBoolFromScanner(Scanner* scanner, bool* value) {
-  absl::string_view bool_str;
+  StringPiece bool_str;
   if (!scanner->RestartCapture()
            .Many(Scanner::LETTER_DIGIT)
            .GetResult(nullptr, &bool_str)) {
@@ -42,7 +41,7 @@ bool ProtoParseStringLiteralFromScanner(Scanner* scanner, string* value) {
   const char quote = scanner->Peek();
   if (quote != '\'' && quote != '"') return false;
 
-  absl::string_view value_sp;
+  StringPiece value_sp;
   if (!scanner->One(Scanner::ALL)
            .RestartCapture()
            .ScanEscapedUntil(quote)
diff --git a/tensorflow/core/lib/strings/proto_text_util.h b/tensorflow/core/lib/strings/proto_text_util.h
index 3a2bcef6d5..05dbda6e15 100644
--- a/tensorflow/core/lib/strings/proto_text_util.h
+++ b/tensorflow/core/lib/strings/proto_text_util.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_
 #define TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -101,8 +100,7 @@ class ProtoTextOutput {
   }
 
  private:
-  void AppendFieldAndValue(const char field_name[],
-                           absl::string_view value_text) {
+  void AppendFieldAndValue(const char field_name[], StringPiece value_text) {
     StrAppend(output_, level_empty_ ? "" : field_separator_, indent_,
               field_name, kColonSeparator, value_text);
     level_empty_ = false;
@@ -133,7 +131,7 @@ inline void ProtoSpaceAndComments(Scanner* scanner) {
 // failed.
 template <typename T>
 bool ProtoParseNumericFromScanner(Scanner* scanner, T* value) {
-  absl::string_view numeric_str;
+  StringPiece numeric_str;
   scanner->RestartCapture();
   if (!scanner->Many(Scanner::LETTER_DIGIT_DOT_PLUS_MINUS)
            .GetResult(nullptr, &numeric_str)) {
diff --git a/tensorflow/core/lib/strings/scanner.cc b/tensorflow/core/lib/strings/scanner.cc
index 46d3858e30..39a2265aa2 100644
--- a/tensorflow/core/lib/strings/scanner.cc
+++ b/tensorflow/core/lib/strings/scanner.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/strings/scanner.h"
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 namespace strings {
@@ -42,8 +41,7 @@ void Scanner::ScanUntilImpl(char end_ch, bool escaped) {
   }
 }
 
-bool Scanner::GetResult(absl::string_view* remaining,
-                        absl::string_view* capture) {
+bool Scanner::GetResult(StringPiece* remaining, StringPiece* capture) {
   if (error_) {
     return false;
   }
@@ -52,7 +50,7 @@ bool Scanner::GetResult(absl::string_view* remaining,
   }
   if (capture != nullptr) {
     const char* end = capture_end_ == nullptr ? cur_.data() : capture_end_;
-    *capture = absl::string_view(capture_start_, end - capture_start_);
+    *capture = StringPiece(capture_start_, end - capture_start_);
   }
   return true;
 }
diff --git a/tensorflow/core/lib/strings/scanner.h b/tensorflow/core/lib/strings/scanner.h
index 4a28cbf5aa..c82e771368 100644
--- a/tensorflow/core/lib/strings/scanner.h
+++ b/tensorflow/core/lib/strings/scanner.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LIB_STRINGS_SCANNER_H_
 
 #include <string>
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -61,9 +61,7 @@ class Scanner {
     UPPERLETTER,
   };
 
-  explicit Scanner(absl::string_view source) : cur_(source) {
-    RestartCapture();
-  }
+  explicit Scanner(StringPiece source) : cur_(source) { RestartCapture(); }
 
   // Consume the next character of the given class from input. If the next
   // character is not in the class, then GetResult will ultimately return false.
@@ -77,14 +75,14 @@ class Scanner {
 
   // Consume the next s.size() characters of the input, if they match <s>. If
   // they don't match <s>, this is a no-op.
-  Scanner& ZeroOrOneLiteral(absl::string_view s) {
+  Scanner& ZeroOrOneLiteral(StringPiece s) {
     str_util::ConsumePrefix(&cur_, s);
     return *this;
   }
 
   // Consume the next s.size() characters of the input, if they match <s>. If
   // they don't match <s>, then GetResult will ultimately return false.
-  Scanner& OneLiteral(absl::string_view s) {
+  Scanner& OneLiteral(StringPiece s) {
     if (!str_util::ConsumePrefix(&cur_, s)) {
       error_ = true;
     }
@@ -161,8 +159,8 @@ class Scanner {
   // Returns true if the input string successfully matched. When true is
   // returned, the remaining string is returned in <remaining> and the captured
   // string returned in <capture>, if non-NULL.
-  bool GetResult(absl::string_view* remaining = nullptr,
-                 absl::string_view* capture = nullptr);
+  bool GetResult(StringPiece* remaining = nullptr,
+                 StringPiece* capture = nullptr);
 
  private:
   void ScanUntilImpl(char end_ch, bool escaped);
@@ -228,7 +226,7 @@ class Scanner {
     return false;
   }
 
-  absl::string_view cur_;
+  StringPiece cur_;
   const char* capture_start_ = nullptr;
   const char* capture_end_ = nullptr;
   bool error_ = false;
diff --git a/tensorflow/core/lib/strings/scanner_test.cc b/tensorflow/core/lib/strings/scanner_test.cc
index 79084ed4d6..b0f568a03e 100644
--- a/tensorflow/core/lib/strings/scanner_test.cc
+++ b/tensorflow/core/lib/strings/scanner_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/lib/strings/scanner.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -37,7 +36,7 @@ class ScannerTest : public ::testing::Test {
 };
 
 TEST_F(ScannerTest, Any) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("   horse0123")
                   .Any(Scanner::SPACE)
                   .Any(Scanner::DIGIT)
@@ -64,7 +63,7 @@ TEST_F(ScannerTest, Any) {
 }
 
 TEST_F(ScannerTest, AnySpace) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("  a b ")
                   .AnySpace()
                   .One(Scanner::LETTER)
@@ -75,7 +74,7 @@ TEST_F(ScannerTest, AnySpace) {
 }
 
 TEST_F(ScannerTest, AnyEscapedNewline) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("\\\n")
                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
                   .GetResult(&remaining, &match));
@@ -84,7 +83,7 @@ TEST_F(ScannerTest, AnyEscapedNewline) {
 }
 
 TEST_F(ScannerTest, AnyEmptyString) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("")
                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
                   .GetResult(&remaining, &match));
@@ -100,7 +99,7 @@ TEST_F(ScannerTest, Eos) {
 }
 
 TEST_F(ScannerTest, Many) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("abc").Many(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("0").Many(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("").Many(Scanner::LETTER).GetResult());
@@ -116,7 +115,7 @@ TEST_F(ScannerTest, Many) {
 }
 
 TEST_F(ScannerTest, One) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner("abc").One(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("0").One(Scanner::LETTER).GetResult());
   EXPECT_FALSE(Scanner("").One(Scanner::LETTER).GetResult());
@@ -138,7 +137,7 @@ TEST_F(ScannerTest, OneLiteral) {
 }
 
 TEST_F(ScannerTest, ScanUntil) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
                   .OneLiteral("'")
                   .ScanUntil('\'')
@@ -165,7 +164,7 @@ TEST_F(ScannerTest, ScanUntil) {
 }
 
 TEST_F(ScannerTest, ScanEscapedUntil) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
                   .OneLiteral("'")
                   .ScanEscapedUntil('\'')
@@ -185,7 +184,7 @@ TEST_F(ScannerTest, ScanEscapedUntil) {
 }
 
 TEST_F(ScannerTest, ZeroOrOneLiteral) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
   EXPECT_TRUE(
       Scanner("abc").ZeroOrOneLiteral("abC").GetResult(&remaining, &match));
   EXPECT_EQ("abc", remaining);
@@ -206,7 +205,7 @@ TEST_F(ScannerTest, ZeroOrOneLiteral) {
 // Test output of GetResult (including the forms with optional params),
 // and that it can be called multiple times.
 TEST_F(ScannerTest, CaptureAndGetResult) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
 
   Scanner scan("  first    second");
   EXPECT_TRUE(scan.Any(Scanner::SPACE)
@@ -239,7 +238,7 @@ TEST_F(ScannerTest, CaptureAndGetResult) {
 // Tests that if StopCapture is not called, then calling GetResult, then
 // scanning more, then GetResult again will update the capture.
 TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
-  absl::string_view remaining, match;
+  StringPiece remaining, match;
 
   Scanner scan("one2three");
   EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
@@ -256,8 +255,8 @@ TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
 TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
   // A failed match doesn't change pointers passed to GetResult.
   Scanner scan("name");
-  absl::string_view remaining = "rem";
-  absl::string_view match = "match";
+  StringPiece remaining = "rem";
+  StringPiece match = "match";
   EXPECT_FALSE(scan.One(Scanner::SPACE).GetResult(&remaining, &match));
   EXPECT_EQ("rem", remaining);
   EXPECT_EQ("match", match);
@@ -266,8 +265,8 @@ TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
 TEST_F(ScannerTest, DefaultCapturesAll) {
   // If RestartCapture() is not called, the whole string is used.
   Scanner scan("a b");
-  absl::string_view remaining = "rem";
-  absl::string_view match = "match";
+  StringPiece remaining = "rem";
+  StringPiece match = "match";
   EXPECT_TRUE(scan.Any(Scanner::LETTER)
                   .AnySpace()
                   .Any(Scanner::LETTER)
diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index ba40916b94..3aba5ec80e 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <ctype.h>
 #include <algorithm>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -28,7 +27,7 @@ namespace str_util {
 
 static char hex_char[] = "0123456789abcdef";
 
-string CEscape(absl::string_view src) {
+string CEscape(StringPiece src) {
   string dest;
 
   for (unsigned char c : src) {
@@ -87,7 +86,7 @@ inline int hex_digit_to_int(char c) {
   return x & 0xf;
 }
 
-bool CUnescapeInternal(absl::string_view source, string* dest,
+bool CUnescapeInternal(StringPiece source, string* dest,
                        string::size_type* dest_len, string* error) {
   const char* p = source.data();
   const char* end = source.end();
@@ -217,8 +216,8 @@ bool CUnescapeInternal(absl::string_view source, string* dest,
 }
 
 template <typename T>
-bool SplitAndParseAsInts(absl::string_view text, char delim,
-                         std::function<bool(absl::string_view, T*)> converter,
+bool SplitAndParseAsInts(StringPiece text, char delim,
+                         std::function<bool(StringPiece, T*)> converter,
                          std::vector<T>* result) {
   result->clear();
   std::vector<string> num_strings = Split(text, delim);
@@ -232,7 +231,7 @@ bool SplitAndParseAsInts(absl::string_view text, char delim,
 
 }  // namespace
 
-bool CUnescape(absl::string_view source, string* dest, string* error) {
+bool CUnescape(StringPiece source, string* dest, string* error) {
   dest->resize(source.size());
   string::size_type dest_size;
   if (!CUnescapeInternal(source, dest, &dest_size, error)) {
@@ -250,7 +249,7 @@ void StripTrailingWhitespace(string* s) {
 }
 
 // Return lower-cased version of s.
-string Lowercase(absl::string_view s) {
+string Lowercase(StringPiece s) {
   string result(s.data(), s.size());
   for (char& c : result) {
     c = tolower(c);
@@ -259,7 +258,7 @@ string Lowercase(absl::string_view s) {
 }
 
 // Return upper-cased version of s.
-string Uppercase(absl::string_view s) {
+string Uppercase(StringPiece s) {
   string result(s.data(), s.size());
   for (char& c : result) {
     c = toupper(c);
@@ -267,7 +266,7 @@ string Uppercase(absl::string_view s) {
   return result;
 }
 
-string ArgDefCase(absl::string_view s) {
+string ArgDefCase(StringPiece s) {
   const size_t n = s.size();
 
   // Compute the size of resulting string.
@@ -319,18 +318,18 @@ string ArgDefCase(absl::string_view s) {
   return result;
 }
 
-void TitlecaseString(string* s, absl::string_view delimiters) {
+void TitlecaseString(string* s, StringPiece delimiters) {
   bool upper = true;
   for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
     if (upper) {
       *ss = toupper(*ss);
     }
-    upper = (delimiters.find(*ss) != absl::string_view::npos);
+    upper = (delimiters.find(*ss) != StringPiece::npos);
   }
 }
 
-string StringReplace(absl::string_view s, absl::string_view oldsub,
-                     absl::string_view newsub, bool replace_all) {
+string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
+                     bool replace_all) {
   // TODO(jlebar): We could avoid having to shift data around in the string if
   // we had a StringPiece::find() overload that searched for a StringPiece.
   string res(s);
@@ -348,7 +347,7 @@ string StringReplace(absl::string_view s, absl::string_view oldsub,
   return res;
 }
 
-size_t RemoveLeadingWhitespace(absl::string_view* text) {
+size_t RemoveLeadingWhitespace(StringPiece* text) {
   size_t count = 0;
   const char* ptr = text->data();
   while (count < text->size() && isspace(*ptr)) {
@@ -359,7 +358,7 @@ size_t RemoveLeadingWhitespace(absl::string_view* text) {
   return count;
 }
 
-size_t RemoveTrailingWhitespace(absl::string_view* text) {
+size_t RemoveTrailingWhitespace(StringPiece* text) {
   size_t count = 0;
   const char* ptr = text->data() + text->size() - 1;
   while (count < text->size() && isspace(*ptr)) {
@@ -370,12 +369,12 @@ size_t RemoveTrailingWhitespace(absl::string_view* text) {
   return count;
 }
 
-size_t RemoveWhitespaceContext(absl::string_view* text) {
+size_t RemoveWhitespaceContext(StringPiece* text) {
   // use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
   return (RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text));
 }
 
-bool ConsumePrefix(absl::string_view* s, absl::string_view expected) {
+bool ConsumePrefix(StringPiece* s, StringPiece expected) {
   if (StartsWith(*s, expected)) {
     s->remove_prefix(expected.size());
     return true;
@@ -383,7 +382,7 @@ bool ConsumePrefix(absl::string_view* s, absl::string_view expected) {
   return false;
 }
 
-bool ConsumeSuffix(absl::string_view* s, absl::string_view expected) {
+bool ConsumeSuffix(StringPiece* s, StringPiece expected) {
   if (EndsWith(*s, expected)) {
     s->remove_suffix(expected.size());
     return true;
@@ -391,7 +390,7 @@ bool ConsumeSuffix(absl::string_view* s, absl::string_view expected) {
   return false;
 }
 
-bool ConsumeLeadingDigits(absl::string_view* s, uint64* val) {
+bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
   const char* p = s->data();
   const char* limit = p + s->size();
   uint64 v = 0;
@@ -416,7 +415,7 @@ bool ConsumeLeadingDigits(absl::string_view* s, uint64* val) {
   }
 }
 
-bool ConsumeNonWhitespace(absl::string_view* s, absl::string_view* val) {
+bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) {
   const char* p = s->data();
   const char* limit = p + s->size();
   while (p < limit) {
@@ -426,29 +425,29 @@ bool ConsumeNonWhitespace(absl::string_view* s, absl::string_view* val) {
   }
   const size_t n = p - s->data();
   if (n > 0) {
-    *val = absl::string_view(s->data(), n);
+    *val = StringPiece(s->data(), n);
     s->remove_prefix(n);
     return true;
   } else {
-    *val = absl::string_view();
+    *val = StringPiece();
     return false;
   }
 }
 
-bool SplitAndParseAsInts(absl::string_view text, char delim,
+bool SplitAndParseAsInts(StringPiece text, char delim,
                          std::vector<int32>* result) {
   return SplitAndParseAsInts<int32>(text, delim, strings::safe_strto32, result);
 }
 
-bool SplitAndParseAsInts(absl::string_view text, char delim,
+bool SplitAndParseAsInts(StringPiece text, char delim,
                          std::vector<int64>* result) {
   return SplitAndParseAsInts<int64>(text, delim, strings::safe_strto64, result);
 }
 
-bool SplitAndParseAsFloats(absl::string_view text, char delim,
+bool SplitAndParseAsFloats(StringPiece text, char delim,
                            std::vector<float>* result) {
   return SplitAndParseAsInts<float>(text, delim,
-                                    [](absl::string_view str, float* value) {
+                                    [](StringPiece str, float* value) {
                                       return strings::safe_strtof(str, value);
                                     },
                                     result);
@@ -462,18 +461,18 @@ size_t Strnlen(const char* str, const size_t string_max_len) {
   return len;
 }
 
-bool StrContains(absl::string_view haystack, absl::string_view needle) {
+bool StrContains(StringPiece haystack, StringPiece needle) {
   return std::search(haystack.begin(), haystack.end(), needle.begin(),
                      needle.end()) != haystack.end();
 }
 
-bool StartsWith(absl::string_view text, absl::string_view prefix) {
+bool StartsWith(StringPiece text, StringPiece prefix) {
   return prefix.empty() ||
          (text.size() >= prefix.size() &&
           memcmp(text.data(), prefix.data(), prefix.size()) == 0);
 }
 
-bool EndsWith(absl::string_view text, absl::string_view suffix) {
+bool EndsWith(StringPiece text, StringPiece suffix) {
   return suffix.empty() || (text.size() >= suffix.size() &&
                             memcmp(text.data() + (text.size() - suffix.size()),
                                    suffix.data(), suffix.size()) == 0);
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index a00434534e..9f52cf29fc 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -19,7 +19,7 @@ limitations under the License.
 #include <functional>
 #include <string>
 #include <vector>
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -29,7 +29,7 @@ namespace str_util {
 
 // Returns a version of 'src' where unprintable characters have been
 // escaped using C-style escape sequences.
-string CEscape(absl::string_view src);
+string CEscape(StringPiece src);
 
 // Copies "source" to "dest", rewriting C-style escape sequences --
 // '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
@@ -38,47 +38,47 @@ string CEscape(absl::string_view src);
 // 'error'. To disable error reporting, set 'error' to NULL.
 //
 // NOTE: Does not support \u or \U!
-bool CUnescape(absl::string_view source, string* dest, string* error);
+bool CUnescape(StringPiece source, string* dest, string* error);
 
 // Removes any trailing whitespace from "*s".
 void StripTrailingWhitespace(string* s);
 
 // Removes leading ascii_isspace() characters.
 // Returns number of characters removed.
-size_t RemoveLeadingWhitespace(absl::string_view* text);
+size_t RemoveLeadingWhitespace(StringPiece* text);
 
 // Removes trailing ascii_isspace() characters.
 // Returns number of characters removed.
-size_t RemoveTrailingWhitespace(absl::string_view* text);
+size_t RemoveTrailingWhitespace(StringPiece* text);
 
 // Removes leading and trailing ascii_isspace() chars.
 // Returns number of chars removed.
-size_t RemoveWhitespaceContext(absl::string_view* text);
+size_t RemoveWhitespaceContext(StringPiece* text);
 
 // Consume a leading positive integer value.  If any digits were
 // found, store the value of the leading unsigned number in "*val",
 // advance "*s" past the consumed number, and return true.  If
 // overflow occurred, returns false.  Otherwise, returns false.
-bool ConsumeLeadingDigits(absl::string_view* s, uint64* val);
+bool ConsumeLeadingDigits(StringPiece* s, uint64* val);
 
 // Consume a leading token composed of non-whitespace characters only.
 // If *s starts with a non-zero number of non-whitespace characters, store
 // them in *val, advance *s past them, and return true.  Else return false.
-bool ConsumeNonWhitespace(absl::string_view* s, absl::string_view* val);
+bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);
 
 // If "*s" starts with "expected", consume it and return true.
 // Otherwise, return false.
-bool ConsumePrefix(absl::string_view* s, absl::string_view expected);
+bool ConsumePrefix(StringPiece* s, StringPiece expected);
 
 // If "*s" ends with "expected", remove it and return true.
 // Otherwise, return false.
-bool ConsumeSuffix(absl::string_view* s, absl::string_view expected);
+bool ConsumeSuffix(StringPiece* s, StringPiece expected);
 
 // Return lower-cased version of s.
-string Lowercase(absl::string_view s);
+string Lowercase(StringPiece s);
 
 // Return upper-cased version of s.
-string Uppercase(absl::string_view s);
+string Uppercase(StringPiece s);
 
 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
 // - converts all non-alphanumeric characters to underscores
@@ -89,16 +89,16 @@ string Uppercase(absl::string_view s);
 // This method is useful for producing strings matching "[a-z][a-z0-9_]*"
 // as required by OpDef.ArgDef.name. The resulting string is either empty or
 // matches this regex.
-string ArgDefCase(absl::string_view s);
+string ArgDefCase(StringPiece s);
 
 // Capitalize first character of each word in "*s".  "delimiters" is a
 // set of characters that can be used as word boundaries.
-void TitlecaseString(string* s, absl::string_view delimiters);
+void TitlecaseString(string* s, StringPiece delimiters);
 
 // Replaces the first occurrence (if replace_all is false) or all occurrences
 // (if replace_all is true) of oldsub in s with newsub.
-string StringReplace(absl::string_view s, absl::string_view oldsub,
-                     absl::string_view newsub, bool replace_all);
+string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
+                     bool replace_all);
 
 // Join functionality
 template <typename T>
@@ -111,13 +111,13 @@ template <typename T, typename Formatter>
 string Join(const T& s, const char* sep, Formatter f);
 
 struct AllowEmpty {
-  bool operator()(absl::string_view sp) const { return true; }
+  bool operator()(StringPiece sp) const { return true; }
 };
 struct SkipEmpty {
-  bool operator()(absl::string_view sp) const { return !sp.empty(); }
+  bool operator()(StringPiece sp) const { return !sp.empty(); }
 };
 struct SkipWhitespace {
-  bool operator()(absl::string_view sp) const {
+  bool operator()(StringPiece sp) const {
     RemoveTrailingWhitespace(&sp);
     return !sp.empty();
   }
@@ -125,36 +125,35 @@ struct SkipWhitespace {
 
 // Split strings using any of the supplied delimiters. For example:
 // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
-std::vector<string> Split(absl::string_view text, absl::string_view delims);
+std::vector<string> Split(StringPiece text, StringPiece delims);
 
 template <typename Predicate>
-std::vector<string> Split(absl::string_view text, absl::string_view delims,
-                          Predicate p);
+std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p);
 
 // Split "text" at "delim" characters, and parse each component as
 // an integer.  If successful, adds the individual numbers in order
 // to "*result" and returns true.  Otherwise returns false.
-bool SplitAndParseAsInts(absl::string_view text, char delim,
+bool SplitAndParseAsInts(StringPiece text, char delim,
                          std::vector<int32>* result);
-bool SplitAndParseAsInts(absl::string_view text, char delim,
+bool SplitAndParseAsInts(StringPiece text, char delim,
                          std::vector<int64>* result);
-bool SplitAndParseAsFloats(absl::string_view text, char delim,
+bool SplitAndParseAsFloats(StringPiece text, char delim,
                            std::vector<float>* result);
 
 // StartsWith()
 //
 // Returns whether a given string `text` begins with `prefix`.
-bool StartsWith(absl::string_view text, absl::string_view prefix);
+bool StartsWith(StringPiece text, StringPiece prefix);
 
 // EndsWith()
 //
 // Returns whether a given string `text` ends with `suffix`.
-bool EndsWith(absl::string_view text, absl::string_view suffix);
+bool EndsWith(StringPiece text, StringPiece suffix);
 
 // StrContains()
 //
 // Returns whether a given string `haystack` contains the substring `needle`.
-bool StrContains(absl::string_view haystack, absl::string_view needle);
+bool StrContains(StringPiece haystack, StringPiece needle);
 
 // ------------------------------------------------------------------
 // Implementation details below
@@ -193,21 +192,18 @@ string Join(const T& s, const char* sep, Formatter f) {
   return result;
 }
 
-inline std::vector<string> Split(absl::string_view text,
-                                 absl::string_view delims) {
+inline std::vector<string> Split(StringPiece text, StringPiece delims) {
   return Split(text, delims, AllowEmpty());
 }
 
 template <typename Predicate>
-std::vector<string> Split(absl::string_view text, absl::string_view delims,
-                          Predicate p) {
+std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
   std::vector<string> result;
   size_t token_start = 0;
   if (!text.empty()) {
     for (size_t i = 0; i < text.size() + 1; i++) {
-      if ((i == text.size()) ||
-          (delims.find(text[i]) != absl::string_view::npos)) {
-        absl::string_view token(text.data() + token_start, i - token_start);
+      if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
+        StringPiece token(text.data() + token_start, i - token_start);
         if (p(token)) {
           result.emplace_back(token);
         }
@@ -218,13 +214,13 @@ std::vector<string> Split(absl::string_view text, absl::string_view delims,
   return result;
 }
 
-inline std::vector<string> Split(absl::string_view text, char delim) {
-  return Split(text, absl::string_view(&delim, 1));
+inline std::vector<string> Split(StringPiece text, char delim) {
+  return Split(text, StringPiece(&delim, 1));
 }
 
 template <typename Predicate>
-std::vector<string> Split(absl::string_view text, char delims, Predicate p) {
-  return Split(text, absl::string_view(&delims, 1), p);
+std::vector<string> Split(StringPiece text, char delims, Predicate p) {
+  return Split(text, StringPiece(&delims, 1), p);
 }
 
 // Returns the length of the given null-terminated byte string 'str'.
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 38157d60a5..3bf3e99825 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/str_util.h"
 
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -29,7 +28,7 @@ TEST(CEscape, Basic) {
   EXPECT_EQ(str_util::CEscape("\320hi\200"), "\\320hi\\200");
 }
 
-string ExpectCUnescapeSuccess(absl::string_view source) {
+string ExpectCUnescapeSuccess(StringPiece source) {
   string dest;
   string error;
   EXPECT_TRUE(str_util::CUnescape(source, &dest, &error)) << error;
@@ -50,7 +49,7 @@ TEST(CUnescape, HandlesCopyOnWriteStrings) {
   // For std::string, read and dest now share the same buffer.
 
   string error;
-  absl::string_view source = "llohe";
+  StringPiece source = "llohe";
   // CUnescape is going to write "llohe" to dest, so dest's buffer will be
   // reallocated, and read's buffer remains untouched.
   EXPECT_TRUE(str_util::CUnescape(source, &dest, &error));
@@ -82,71 +81,71 @@ TEST(StripTrailingWhitespace, Basic) {
 
 TEST(RemoveLeadingWhitespace, Basic) {
   string text = "  \t   \n  \r Quick\t";
-  absl::string_view data(text);
+  StringPiece data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 11);
-  EXPECT_EQ(data, absl::string_view("Quick\t"));
+  EXPECT_EQ(data, StringPiece("Quick\t"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
-  EXPECT_EQ(data, absl::string_view("Quick\t"));
+  EXPECT_EQ(data, StringPiece("Quick\t"));
 }
 
 TEST(RemoveLeadingWhitespace, TerminationHandling) {
   // check termination handling
   string text = "\t";
-  absl::string_view data(text);
+  StringPiece data(text);
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 1);
-  EXPECT_EQ(data, absl::string_view(""));
+  EXPECT_EQ(data, StringPiece(""));
 
   // check termination handling again
   EXPECT_EQ(str_util::RemoveLeadingWhitespace(&data), 0);
-  EXPECT_EQ(data, absl::string_view(""));
+  EXPECT_EQ(data, StringPiece(""));
 }
 
 TEST(RemoveTrailingWhitespace, Basic) {
   string text = "  \t   \n  \r Quick \t";
-  absl::string_view data(text);
+  StringPiece data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 2);
-  EXPECT_EQ(data, absl::string_view("  \t   \n  \r Quick"));
+  EXPECT_EQ(data, StringPiece("  \t   \n  \r Quick"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
-  EXPECT_EQ(data, absl::string_view("  \t   \n  \r Quick"));
+  EXPECT_EQ(data, StringPiece("  \t   \n  \r Quick"));
 }
 
 TEST(RemoveTrailingWhitespace, TerminationHandling) {
   // check termination handling
   string text = "\t";
-  absl::string_view data(text);
+  StringPiece data(text);
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 1);
-  EXPECT_EQ(data, absl::string_view(""));
+  EXPECT_EQ(data, StringPiece(""));
 
   // check termination handling again
   EXPECT_EQ(str_util::RemoveTrailingWhitespace(&data), 0);
-  EXPECT_EQ(data, absl::string_view(""));
+  EXPECT_EQ(data, StringPiece(""));
 }
 
 TEST(RemoveWhitespaceContext, Basic) {
   string text = "  \t   \n  \r Quick \t";
-  absl::string_view data(text);
+  StringPiece data(text);
   // check that all whitespace is removed
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 13);
-  EXPECT_EQ(data, absl::string_view("Quick"));
+  EXPECT_EQ(data, StringPiece("Quick"));
   // check that non-whitespace is not removed
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
-  EXPECT_EQ(data, absl::string_view("Quick"));
+  EXPECT_EQ(data, StringPiece("Quick"));
 
   // Test empty string
   text = "";
   data = text;
   EXPECT_EQ(str_util::RemoveWhitespaceContext(&data), 0);
-  EXPECT_EQ(data, absl::string_view(""));
+  EXPECT_EQ(data, StringPiece(""));
 }
 
-void TestConsumeLeadingDigits(absl::string_view s, int64 expected,
-                              absl::string_view remaining) {
+void TestConsumeLeadingDigits(StringPiece s, int64 expected,
+                              StringPiece remaining) {
   uint64 v;
-  absl::string_view input(s);
+  StringPiece input(s);
   if (str_util::ConsumeLeadingDigits(&input, &v)) {
     EXPECT_EQ(v, static_cast<uint64>(expected));
     EXPECT_EQ(input, remaining);
@@ -179,10 +178,10 @@ TEST(ConsumeLeadingDigits, Basic) {
                            "184467440737095516159yz");
 }
 
-void TestConsumeNonWhitespace(absl::string_view s, absl::string_view expected,
-                              absl::string_view remaining) {
-  absl::string_view v;
-  absl::string_view input(s);
+void TestConsumeNonWhitespace(StringPiece s, StringPiece expected,
+                              StringPiece remaining) {
+  StringPiece v;
+  StringPiece input(s);
   if (str_util::ConsumeNonWhitespace(&input, &v)) {
     EXPECT_EQ(v, expected);
     EXPECT_EQ(input, remaining);
@@ -201,7 +200,7 @@ TEST(ConsumeNonWhitespace, Basic) {
 
 TEST(ConsumePrefix, Basic) {
   string s("abcdef");
-  absl::string_view input(s);
+  StringPiece input(s);
   EXPECT_FALSE(str_util::ConsumePrefix(&input, "abcdefg"));
   EXPECT_EQ(input, "abcdef");
 
@@ -229,7 +228,7 @@ TEST(JoinStrings, Basic) {
   s = {"hi", "there", "strings"};
   EXPECT_EQ(str_util::Join(s, " "), "hi there strings");
 
-  std::vector<absl::string_view> sp;
+  std::vector<StringPiece> sp;
   sp = {"hi"};
   EXPECT_EQ(str_util::Join(sp, ",,"), "hi");
   sp = {"hi", "there", "strings"};
diff --git a/tensorflow/core/lib/strings/strcat.cc b/tensorflow/core/lib/strings/strcat.cc
index fd708b837d..f140ec3d26 100644
--- a/tensorflow/core/lib/strings/strcat.cc
+++ b/tensorflow/core/lib/strings/strcat.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <stdio.h>
 #include <string.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -42,7 +41,7 @@ AlphaNum::AlphaNum(Hex hex) {
     value >>= 4;
     mask >>= 4;
   } while (mask != 0);
-  piece_ = absl::string_view(writer, end - writer);
+  piece_ = StringPiece(writer, end - writer);
 }
 
 // ----------------------------------------------------------------------
@@ -118,15 +117,15 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
 namespace internal {
 
 // Do not call directly - these are not part of the public API.
-string CatPieces(std::initializer_list<absl::string_view> pieces) {
+string CatPieces(std::initializer_list<StringPiece> pieces) {
   string result;
   size_t total_size = 0;
-  for (const absl::string_view piece : pieces) total_size += piece.size();
+  for (const StringPiece piece : pieces) total_size += piece.size();
   gtl::STLStringResizeUninitialized(&result, total_size);
 
   char *const begin = &*result.begin();
   char *out = begin;
-  for (const absl::string_view piece : pieces) {
+  for (const StringPiece piece : pieces) {
     const size_t this_size = piece.size();
     memcpy(out, piece.data(), this_size);
     out += this_size;
@@ -142,11 +141,10 @@ string CatPieces(std::initializer_list<absl::string_view> pieces) {
 #define DCHECK_NO_OVERLAP(dest, src) \
   DCHECK_GE(uintptr_t((src).data() - (dest).data()), uintptr_t((dest).size()))
 
-void AppendPieces(string *result,
-                  std::initializer_list<absl::string_view> pieces) {
+void AppendPieces(string *result, std::initializer_list<StringPiece> pieces) {
   size_t old_size = result->size();
   size_t total_size = old_size;
-  for (const absl::string_view piece : pieces) {
+  for (const StringPiece piece : pieces) {
     DCHECK_NO_OVERLAP(*result, piece);
     total_size += piece.size();
   }
@@ -154,7 +152,7 @@ void AppendPieces(string *result,
 
   char *const begin = &*result->begin();
   char *out = begin + old_size;
-  for (const absl::string_view piece : pieces) {
+  for (const StringPiece piece : pieces) {
     const size_t this_size = piece.size();
     memcpy(out, piece.data(), this_size);
     out += this_size;
diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h
index 1c7cb68c40..a620f59447 100644
--- a/tensorflow/core/lib/strings/strcat.h
+++ b/tensorflow/core/lib/strings/strcat.h
@@ -22,7 +22,7 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -121,20 +121,19 @@ class AlphaNum {
   AlphaNum(Hex hex);               // NOLINT(runtime/explicit)
 
   AlphaNum(const char *c_str) : piece_(c_str) {}   // NOLINT(runtime/explicit)
-  AlphaNum(const absl::string_view &pc)
-      : piece_(pc) {}                              // NOLINT(runtime/explicit)
+  AlphaNum(const StringPiece &pc) : piece_(pc) {}  // NOLINT(runtime/explicit)
   AlphaNum(const tensorflow::string &str)          // NOLINT(runtime/explicit)
       : piece_(str) {}
   template <typename A>
   AlphaNum(const std::basic_string<char, std::char_traits<char>, A> &str)
       : piece_(str) {}  // NOLINT(runtime/explicit)
 
-  absl::string_view::size_type size() const { return piece_.size(); }
+  StringPiece::size_type size() const { return piece_.size(); }
   const char *data() const { return piece_.data(); }
-  absl::string_view Piece() const { return piece_; }
+  StringPiece Piece() const { return piece_; }
 
  private:
-  absl::string_view piece_;
+  StringPiece piece_;
   char digits_[kFastToBufferSize];
 
   // Use ":" not ':'
@@ -177,9 +176,8 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
 namespace internal {
 
 // Do not call directly - this is not part of the public API.
-string CatPieces(std::initializer_list<absl::string_view> pieces);
-void AppendPieces(string *dest,
-                  std::initializer_list<absl::string_view> pieces);
+string CatPieces(std::initializer_list<StringPiece> pieces);
+void AppendPieces(string *dest, std::initializer_list<StringPiece> pieces);
 
 }  // namespace internal
 
diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc
index 2e06bd0eb8..6c4e5526b1 100644
--- a/tensorflow/core/lib/strings/strcat_test.cc
+++ b/tensorflow/core/lib/strings/strcat_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -67,7 +66,7 @@ TEST(StrCat, Basics) {
 
   string strs[] = {"Hello", "Cruel", "World"};
 
-  absl::string_view pieces[] = {"Hello", "Cruel", "World"};
+  StringPiece pieces[] = {"Hello", "Cruel", "World"};
 
   const char *c_strs[] = {"Hello", "Cruel", "World"};
 
@@ -209,7 +208,7 @@ TEST(StrAppend, Basics) {
 
   string strs[] = {"Hello", "Cruel", "World"};
 
-  absl::string_view pieces[] = {"Hello", "Cruel", "World"};
+  StringPiece pieces[] = {"Hello", "Cruel", "World"};
 
   const char *c_strs[] = {"Hello", "Cruel", "World"};
 
diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD
index 4726505119..647a797b82 100644
--- a/tensorflow/core/platform/cloud/BUILD
+++ b/tensorflow/core/platform/cloud/BUILD
@@ -89,7 +89,6 @@ cc_library(
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
     alwayslink = 1,
@@ -116,7 +115,6 @@ cc_library(
         ":http_request",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
         "@curl",
     ],
 )
@@ -134,7 +132,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
-        "@com_google_absl//absl/strings",
         "@curl",
     ],
 )
@@ -154,7 +151,6 @@ cc_library(
         ":retrying_utils",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
@@ -193,7 +189,6 @@ cc_library(
         ":compute_engine_metadata_client",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -225,7 +220,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@boringssl//:crypto",
-        "@com_google_absl//absl/strings",
         "@jsoncpp_git//:jsoncpp",
     ],
 )
@@ -255,7 +249,6 @@ cc_library(
         ":retrying_utils",
         "//tensorflow/core:framework_headers_lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -311,7 +304,6 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -350,7 +342,6 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -370,7 +361,6 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@boringssl//:crypto",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -390,7 +380,6 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -432,7 +421,6 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc b/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
index c406ba0eae..e147d88371 100644
--- a/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
+++ b/tensorflow/core/platform/cloud/compute_engine_zone_provider.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include "tensorflow/core/platform/cloud/compute_engine_zone_provider.h"
 
 #include <utility>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 namespace tensorflow {
 
@@ -36,7 +35,7 @@ Status ComputeEngineZoneProvider::GetZone(string* zone) {
   std::vector<char> response_buffer;
   TF_RETURN_IF_ERROR(google_metadata_client_->GetMetadata(kGceMetadataZonePath,
                                                           &response_buffer));
-  absl::string_view location(&response_buffer[0], response_buffer.size());
+  StringPiece location(&response_buffer[0], response_buffer.size());
 
   std::vector<string> elems = str_util::Split(location, "/");
   if (elems.size() == 4) {
diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc
index 6f85514fa8..5e1eabee5b 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <algorithm>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 
 #include "tensorflow/core/lib/core/errors.h"
@@ -276,7 +275,7 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) {
                                            reinterpret_cast<void*>(this)));
   CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION,
                                            &CurlHttpRequest::ReadCallback));
-  post_body_buffer_ = absl::string_view(buffer, size);
+  post_body_buffer_ = StringPiece(buffer, size);
 }
 
 void CurlHttpRequest::SetPostEmptyBody() {
@@ -387,8 +386,8 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size,
                                        size_t nmemb, void* this_object) {
   CHECK(ptr);
   auto that = reinterpret_cast<CurlHttpRequest*>(this_object);
-  absl::string_view header(reinterpret_cast<const char*>(ptr), size * nmemb);
-  absl::string_view name, value;
+  StringPiece header(reinterpret_cast<const char*>(ptr), size * nmemb);
+  StringPiece name, value;
   // The supplied header has the form "<name>: <value>", parse it.
   if (strings::Scanner(header)
           .ScanEscapedUntil(':')
@@ -447,7 +446,7 @@ Status CurlHttpRequest::Send() {
   auto get_error_message = [this]() -> string {
     string error_message = strings::StrCat(
         "Error executing an HTTP request: HTTP response code ", response_code_);
-    absl::string_view body = GetResponse();
+    StringPiece body = GetResponse();
     if (!body.empty()) {
       return strings::StrCat(
           error_message, " with body '",
@@ -543,14 +542,13 @@ void CurlHttpRequest::CheckNotSent() const {
   CHECK(!is_sent_) << "The request has already been sent.";
 }
 
-absl::string_view CurlHttpRequest::GetResponse() const {
-  absl::string_view response;
+StringPiece CurlHttpRequest::GetResponse() const {
+  StringPiece response;
   if (IsDirectResponse()) {
-    response = absl::string_view(direct_response_.buffer_,
-                                 direct_response_.bytes_transferred_);
+    response = StringPiece(direct_response_.buffer_,
+                           direct_response_.bytes_transferred_);
   } else {
-    response =
-        absl::string_view(response_buffer_->data(), response_buffer_->size());
+    response = StringPiece(response_buffer_->data(), response_buffer_->size());
   }
   return response;
 }
diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h
index 22ae5d9687..1b2029926d 100644
--- a/tensorflow/core/platform/cloud/curl_http_request.h
+++ b/tensorflow/core/platform/cloud/curl_http_request.h
@@ -19,10 +19,10 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include <curl/curl.h>
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -165,7 +165,7 @@ class CurlHttpRequest : public HttpRequest {
                               curl_off_t ulnow);
   void CheckMethodNotSet() const;
   void CheckNotSent() const;
-  absl::string_view GetResponse() const;
+  StringPiece GetResponse() const;
 
   /// Helper to convert the given CURLcode and error buffer, representing the
   /// result of performing a transfer, into a Status with an error message.
@@ -176,7 +176,7 @@ class CurlHttpRequest : public HttpRequest {
 
   FILE* put_body_ = nullptr;
 
-  absl::string_view post_body_buffer_;
+  StringPiece post_body_buffer_;
   size_t post_body_read_ = 0;
 
   std::vector<char>* response_buffer_ = nullptr;
diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc
index e5f92d6ec8..eb9023d708 100644
--- a/tensorflow/core/platform/cloud/curl_http_request_test.cc
+++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 #include <fstream>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/mem.h"
@@ -145,8 +144,8 @@ class FakeLibCurl : public LibCurl {
       posted_content_ = "";
       do {
         bytes_read = read_callback_(buffer, 1, sizeof(buffer), read_data_);
-        posted_content_ = strings::StrCat(
-            posted_content_, absl::string_view(buffer, bytes_read));
+        posted_content_ =
+            strings::StrCat(posted_content_, StringPiece(buffer, bytes_read));
       } while (bytes_read > 0);
     }
     if (write_data_ || write_callback_) {
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 1b8c738edb..c61b68aeeb 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <cstring>
 #include <fstream>
 #include <vector>
-#include "absl/strings/string_view.h"
 #ifdef _WIN32
 #include <io.h>  // for _mktemp
 #endif
@@ -173,9 +172,9 @@ Status GetTmpFilename(string* filename) {
 /// "bucket-name" and "path/to/file.txt".
 /// If fname only contains the bucket and empty_object_ok = true, the returned
 /// object is empty.
-Status ParseGcsPath(absl::string_view fname, bool empty_object_ok,
-                    string* bucket, string* object) {
-  absl::string_view scheme, bucketp, objectp;
+Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket,
+                    string* object) {
+  StringPiece scheme, bucketp, objectp;
   io::ParseURI(fname, &scheme, &bucketp, &objectp);
   if (scheme != "gs") {
     return errors::InvalidArgument("GCS path doesn't start with 'gs://': ",
@@ -224,7 +223,7 @@ std::set<string> AddAllSubpaths(const std::vector<string>& paths) {
   std::set<string> result;
   result.insert(paths.begin(), paths.end());
   for (const string& path : paths) {
-    absl::string_view subpath = io::Dirname(path);
+    StringPiece subpath = io::Dirname(path);
     while (!subpath.empty()) {
       result.emplace(string(subpath));
       subpath = io::Dirname(subpath);
@@ -233,7 +232,7 @@ std::set<string> AddAllSubpaths(const std::vector<string>& paths) {
   return result;
 }
 
-Status ParseJson(absl::string_view json, Json::Value* result) {
+Status ParseJson(StringPiece json, Json::Value* result) {
   Json::Reader reader;
   if (!reader.parse(json.data(), json.data() + json.size(), *result)) {
     return errors::Internal("Couldn't parse JSON response from GCS.");
@@ -242,7 +241,7 @@ Status ParseJson(absl::string_view json, Json::Value* result) {
 }
 
 Status ParseJson(const std::vector<char>& json, Json::Value* result) {
-  return ParseJson(absl::string_view{json.data(), json.size()}, result);
+  return ParseJson(StringPiece{json.data(), json.size()}, result);
 }
 
 /// Reads a JSON value with the given name from a parent JSON value.
@@ -306,13 +305,13 @@ class GcsRandomAccessFile : public RandomAccessFile {
  public:
   using ReadFn =
       std::function<Status(const string& filename, uint64 offset, size_t n,
-                           absl::string_view* result, char* scratch)>;
+                           StringPiece* result, char* scratch)>;
 
   GcsRandomAccessFile(const string& filename, ReadFn read_fn)
       : filename_(filename), read_fn_(std::move(read_fn)) {}
 
   /// The implementation of reads with an LRU block cache. Thread safe.
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     return read_fn_(filename_, offset, n, result, scratch);
   }
@@ -373,7 +372,7 @@ class GcsWritableFile : public WritableFile {
 
   ~GcsWritableFile() override { Close().IgnoreError(); }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     TF_RETURN_IF_ERROR(CheckWritable());
     sync_needed_ = true;
     outfile_ << data;
@@ -531,7 +530,7 @@ class GcsWritableFile : public WritableFile {
       // This means GCS doesn't have any bytes of the file yet.
       *uploaded = 0;
     } else {
-      absl::string_view range_piece(received_range);
+      StringPiece range_piece(received_range);
       str_util::ConsumePrefix(&range_piece,
                               "bytes=");  // May or may not be present.
       std::vector<int64> range_parts;
@@ -605,7 +604,7 @@ class GcsReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
 // Helper function to extract an environment variable and convert it into a
 // value of type T.
 template <typename T>
-bool GetEnvVar(const char* varname, bool (*convert)(absl::string_view, T*),
+bool GetEnvVar(const char* varname, bool (*convert)(StringPiece, T*),
                T* value) {
   const char* env_value = std::getenv(varname);
   if (!env_value) {
@@ -614,14 +613,14 @@ bool GetEnvVar(const char* varname, bool (*convert)(absl::string_view, T*),
   return convert(env_value, value);
 }
 
-bool StringPieceIdentity(absl::string_view str, absl::string_view* value) {
+bool StringPieceIdentity(StringPiece str, StringPiece* value) {
   *value = str;
   return true;
 }
 
 /// \brief Utility function to split a comma delimited list of strings to an
 /// unordered set, lowercasing all values.
-bool SplitByCommaToLowercaseSet(absl::string_view list,
+bool SplitByCommaToLowercaseSet(StringPiece list,
                                 std::unordered_set<string>* set) {
   std::vector<string> vector =
       str_util::Split(tensorflow::str_util::Lowercase(list), ",");
@@ -714,14 +713,14 @@ GcsFileSystem::GcsFileSystem() {
   }
 
   // Get the additional header
-  absl::string_view add_header_contents;
+  StringPiece add_header_contents;
   if (GetEnvVar(kAdditionalRequestHeader, StringPieceIdentity,
                 &add_header_contents)) {
     size_t split = add_header_contents.find(':', 0);
 
-    if (split != absl::string_view::npos) {
-      absl::string_view header_name = add_header_contents.substr(0, split);
-      absl::string_view header_value = add_header_contents.substr(split + 1);
+    if (split != StringPiece::npos) {
+      StringPiece header_name = add_header_contents.substr(0, split);
+      StringPiece header_value = add_header_contents.substr(split + 1);
 
       if (!header_name.empty() && !header_value.empty()) {
         additional_header_.reset(new std::pair<const string, const string>(
@@ -818,7 +817,7 @@ Status GcsFileSystem::NewRandomAccessFile(
   result->reset(new GcsRandomAccessFile(fname, [this, bucket, object](
                                                    const string& fname,
                                                    uint64 offset, size_t n,
-                                                   absl::string_view* result,
+                                                   StringPiece* result,
                                                    char* scratch) {
     tf_shared_lock l(block_cache_lock_);
     if (file_block_cache_->IsCacheEnabled()) {
@@ -835,11 +834,11 @@ Status GcsFileSystem::NewRandomAccessFile(
             << fname;
       }
     }
-    *result = absl::string_view();
+    *result = StringPiece();
     size_t bytes_transferred;
     TF_RETURN_IF_ERROR(
         file_block_cache_->Read(fname, offset, n, scratch, &bytes_transferred));
-    *result = absl::string_view(scratch, bytes_transferred);
+    *result = StringPiece(scratch, bytes_transferred);
     if (bytes_transferred < n) {
       return errors::OutOfRange("EOF reached, ", result->size(),
                                 " bytes were read out of ", n,
@@ -955,7 +954,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname,
   std::unique_ptr<char[]> buffer(new char[kReadAppendableFileBufferSize]);
   Status status;
   uint64 offset = 0;
-  absl::string_view read_chunk;
+  StringPiece read_chunk;
 
   // Read the file from GCS in chunks and save it to a tmp file.
   string old_content_filename;
@@ -995,7 +994,7 @@ Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile(
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file));
 
-  absl::string_view piece;
+  StringPiece piece;
   TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get()));
 
   result->reset(new GcsReadOnlyMemoryRegion(std::move(data), size));
@@ -1321,7 +1320,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
         // The names should be relative to the 'dirname'. That means the
         // 'object_prefix', which is part of 'dirname', should be removed from
         // the beginning of 'name'.
-        absl::string_view relative_path(name);
+        StringPiece relative_path(name);
         if (!str_util::ConsumePrefix(&relative_path, object_prefix)) {
           return errors::Internal(strings::StrCat(
               "Unexpected response: the returned file name ", name,
@@ -1350,7 +1349,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
               "response.");
         }
         const string& prefix_str = prefix.asString();
-        absl::string_view relative_path(prefix_str);
+        StringPiece relative_path(prefix_str);
         if (!str_util::ConsumePrefix(&relative_path, object_prefix)) {
           return errors::Internal(
               "Unexpected response: the returned folder name ", prefix_str,
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 4903e68f3f..702802b185 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/gcs_file_system.h"
 #include <fstream>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/cloud/http_request_fake.h"
@@ -80,7 +79,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  absl::string_view result;
+  StringPiece result;
 
   // Read the first chunk.
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
@@ -234,7 +233,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char small_scratch[3];
-  absl::string_view result;
+  StringPiece result;
 
   // Read the first chunk.
   TF_EXPECT_OK(file->Read(0, sizeof(small_scratch), &result, small_scratch));
@@ -291,7 +290,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) {
       nullptr /* gcs additional header */);
 
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   {
     // We are instantiating this in an enclosed scope to make sure after the
     // unique ptr goes out of scope, we can still access result.
@@ -380,7 +379,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) {
       nullptr /* gcs additional header */);
 
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
   // Read the first chunk. The cache will be populated with the first block of
@@ -429,7 +428,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) {
                    kTestTimeoutConfig, *kAllowedLocationsDefault,
                    nullptr /* gcs additional header */);
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   // There should only be two HTTP requests issued to GCS even though we iterate
   // this loop 10 times.  This shows that the underlying FileBlockCache persists
   // across file close/open boundaries.
@@ -503,7 +502,7 @@ TEST(GcsFileSystemTest,
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[5];
-  absl::string_view result;
+  StringPiece result;
 
   // First read.
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
@@ -569,7 +568,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) {
   TF_ASSERT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  absl::string_view result;
+  StringPiece result;
 
   EXPECT_EQ(errors::Code::INTERNAL,
             file->Read(0, sizeof(scratch), &result, scratch).code());
@@ -632,7 +631,7 @@ TEST(GcsFileSystemTest, NewWritableFile) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile));
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch));
   EXPECT_EQ("0123", result);
   // Open the writable file.
@@ -792,7 +791,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile));
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch));
   EXPECT_EQ("0123", result);
   // Now write to the same file. Once the write succeeds, the cached block will
@@ -1030,7 +1029,7 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
   std::unique_ptr<RandomAccessFile> rfile;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/appendable", &rfile));
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   TF_EXPECT_OK(rfile->Read(0, 8, &result, scratch));
   EXPECT_EQ("content1", result);
   // Closing the appendable file will flush its contents to GCS, triggering HTTP
@@ -1093,9 +1092,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
       "gs://bucket/path/random_access.txt", &region));
 
-  EXPECT_EQ(content,
-            absl::string_view(reinterpret_cast<const char*>(region->data()),
-                              region->length()));
+  EXPECT_EQ(content, StringPiece(reinterpret_cast<const char*>(region->data()),
+                                 region->length()));
 }
 
 TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) {
@@ -1810,7 +1808,7 @@ TEST(GcsFileSystemTest, DeleteFile) {
 
   // Do an initial read of the file to load its contents into the block cache.
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/file1.txt", &file));
   TF_EXPECT_OK(file->Read(0, 8, &result, scratch));
@@ -2201,7 +2199,7 @@ TEST(GcsFileSystemTest, RenameFile_Object) {
   // Do an initial read of the source and destination files to load their
   // contents into the block cache.
   char scratch[100];
-  absl::string_view result;
+  StringPiece result;
   std::unique_ptr<RandomAccessFile> src;
   std::unique_ptr<RandomAccessFile> dst;
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/src.txt", &src));
@@ -3312,7 +3310,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file));
 
   char scratch[6];
-  absl::string_view result;
+  StringPiece result;
 
   TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch));
   EXPECT_EQ("012345", result);
diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index b8f3e0c07c..e15400780a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
-#include "absl/strings/string_view.h"
 #ifndef _WIN32
 #include <pwd.h>
 #include <unistd.h>
@@ -207,8 +206,8 @@ Status GoogleAuthProvider::GetTokenFromGce() {
 
   TF_RETURN_IF_ERROR(compute_engine_metadata_client_->GetMetadata(
       kGceTokenPath, &response_buffer));
-  absl::string_view response =
-      absl::string_view(&response_buffer[0], response_buffer.size());
+  StringPiece response =
+      StringPiece(&response_buffer[0], response_buffer.size());
 
   TF_RETURN_IF_ERROR(oauth_client_->ParseOAuthResponse(
       response, request_timestamp_sec, &current_token_,
diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
index 17345665b6..ec31c5ee8c 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/google_auth_provider.h"
 #include <stdlib.h>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/cloud/http_request_fake.h"
@@ -38,9 +37,8 @@ class FakeEnv : public EnvWrapper {
 class FakeOAuthClient : public OAuthClient {
  public:
   Status GetTokenFromServiceAccountJson(
-      Json::Value json, absl::string_view oauth_server_uri,
-      absl::string_view scope, string* token,
-      uint64* expiration_timestamp_sec) override {
+      Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
+      string* token, uint64* expiration_timestamp_sec) override {
     provided_credentials_json = json;
     *token = return_token;
     *expiration_timestamp_sec = return_expiration_timestamp;
@@ -49,7 +47,7 @@ class FakeOAuthClient : public OAuthClient {
 
   /// Retrieves a bearer token using a refresh token.
   Status GetTokenFromRefreshTokenJson(
-      Json::Value json, absl::string_view oauth_server_uri, string* token,
+      Json::Value json, StringPiece oauth_server_uri, string* token,
       uint64* expiration_timestamp_sec) override {
     provided_credentials_json = json;
     *token = return_token;
diff --git a/tensorflow/core/platform/cloud/http_request_fake.h b/tensorflow/core/platform/cloud/http_request_fake.h
index 0566ef89ff..0a1164b64a 100644
--- a/tensorflow/core/platform/cloud/http_request_fake.h
+++ b/tensorflow/core/platform/cloud/http_request_fake.h
@@ -20,11 +20,11 @@ limitations under the License.
 #include <fstream>
 #include <string>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include <curl/curl.h>
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cloud/curl_http_request.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -101,7 +101,7 @@ class FakeHttpRequest : public CurlHttpRequest {
       *captured_post_body_ = string(buffer, size);
     } else {
       actual_request_ +=
-          strings::StrCat("Post body: ", absl::string_view(buffer, size), "\n");
+          strings::StrCat("Post body: ", StringPiece(buffer, size), "\n");
     }
   }
   void SetPutEmptyBody() override { actual_request_ += "Put: yes\n"; }
diff --git a/tensorflow/core/platform/cloud/oauth_client.cc b/tensorflow/core/platform/cloud/oauth_client.cc
index 97682c3e36..9b85cae9b9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.cc
+++ b/tensorflow/core/platform/cloud/oauth_client.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
-#include "absl/strings/string_view.h"
 #ifndef _WIN32
 #include <pwd.h>
 #include <sys/types.h>
@@ -85,7 +84,7 @@ Status ReadJsonInt(const Json::Value& json, const string& name, int64* value) {
   return Status::OK();
 }
 
-Status CreateSignature(RSA* private_key, absl::string_view to_sign,
+Status CreateSignature(RSA* private_key, StringPiece to_sign,
                        string* signature) {
   if (!private_key || !signature) {
     return errors::FailedPrecondition(
@@ -121,14 +120,13 @@ Status CreateSignature(RSA* private_key, absl::string_view to_sign,
     return errors::Internal("DigestFinal (signature compute) failed.");
   }
   EVP_MD_CTX_cleanup(md_ctx.get());
-  return Base64Encode(
-      absl::string_view(reinterpret_cast<char*>(sig.get()), sig_len),
-      signature);
+  return Base64Encode(StringPiece(reinterpret_cast<char*>(sig.get()), sig_len),
+                      signature);
 }
 
 /// Encodes a claim for a JSON web token (JWT) to make an OAuth request.
-Status EncodeJwtClaim(absl::string_view client_email, absl::string_view scope,
-                      absl::string_view audience, uint64 request_timestamp_sec,
+Status EncodeJwtClaim(StringPiece client_email, StringPiece scope,
+                      StringPiece audience, uint64 request_timestamp_sec,
                       string* encoded) {
   // Step 1: create the JSON with the claim.
   Json::Value root;
@@ -150,7 +148,7 @@ Status EncodeJwtClaim(absl::string_view client_email, absl::string_view scope,
 }
 
 /// Encodes a header for a JSON web token (JWT) to make an OAuth request.
-Status EncodeJwtHeader(absl::string_view key_id, string* encoded) {
+Status EncodeJwtHeader(StringPiece key_id, string* encoded) {
   // Step 1: create the JSON with the header.
   Json::Value root;
   root["alg"] = kCryptoAlgorithm;
@@ -176,8 +174,8 @@ OAuthClient::OAuthClient(
     : http_request_factory_(std::move(http_request_factory)), env_(env) {}
 
 Status OAuthClient::GetTokenFromServiceAccountJson(
-    Json::Value json, absl::string_view oauth_server_uri,
-    absl::string_view scope, string* token, uint64* expiration_timestamp_sec) {
+    Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
+    string* token, uint64* expiration_timestamp_sec) {
   if (!token || !expiration_timestamp_sec) {
     return errors::FailedPrecondition(
         "'token' and 'expiration_timestamp_sec' cannot be nullptr.");
@@ -223,15 +221,15 @@ Status OAuthClient::GetTokenFromServiceAccountJson(
   request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
-  absl::string_view response =
-      absl::string_view(response_buffer.data(), response_buffer.size());
+  StringPiece response =
+      StringPiece(response_buffer.data(), response_buffer.size());
   TF_RETURN_IF_ERROR(ParseOAuthResponse(response, request_timestamp_sec, token,
                                         expiration_timestamp_sec));
   return Status::OK();
 }
 
 Status OAuthClient::GetTokenFromRefreshTokenJson(
-    Json::Value json, absl::string_view oauth_server_uri, string* token,
+    Json::Value json, StringPiece oauth_server_uri, string* token,
     uint64* expiration_timestamp_sec) {
   if (!token || !expiration_timestamp_sec) {
     return errors::FailedPrecondition(
@@ -255,14 +253,14 @@ Status OAuthClient::GetTokenFromRefreshTokenJson(
   request->SetResultBuffer(&response_buffer);
   TF_RETURN_IF_ERROR(request->Send());
 
-  absl::string_view response =
-      absl::string_view(response_buffer.data(), response_buffer.size());
+  StringPiece response =
+      StringPiece(response_buffer.data(), response_buffer.size());
   TF_RETURN_IF_ERROR(ParseOAuthResponse(response, request_timestamp_sec, token,
                                         expiration_timestamp_sec));
   return Status::OK();
 }
 
-Status OAuthClient::ParseOAuthResponse(absl::string_view response,
+Status OAuthClient::ParseOAuthResponse(StringPiece response,
                                        uint64 request_timestamp_sec,
                                        string* token,
                                        uint64* expiration_timestamp_sec) {
diff --git a/tensorflow/core/platform/cloud/oauth_client.h b/tensorflow/core/platform/cloud/oauth_client.h
index 5598811258..519d69acf9 100644
--- a/tensorflow/core/platform/cloud/oauth_client.h
+++ b/tensorflow/core/platform/cloud/oauth_client.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define TENSORFLOW_CORE_PLATFORM_CLOUD_OAUTH_CLIENT_H_
 
 #include <memory>
-#include "absl/strings/string_view.h"
 #include "include/json/json.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/cloud/http_request.h"
@@ -38,16 +37,17 @@ class OAuthClient {
   /// Retrieves the authentication bearer token using a JSON file
   /// with the client's private key.
   virtual Status GetTokenFromServiceAccountJson(
-      Json::Value json, absl::string_view oauth_server_uri,
-      absl::string_view scope, string* token, uint64* expiration_timestamp_sec);
+      Json::Value json, StringPiece oauth_server_uri, StringPiece scope,
+      string* token, uint64* expiration_timestamp_sec);
 
   /// Retrieves a bearer token using a refresh token.
-  virtual Status GetTokenFromRefreshTokenJson(
-      Json::Value json, absl::string_view oauth_server_uri, string* token,
-      uint64* expiration_timestamp_sec);
+  virtual Status GetTokenFromRefreshTokenJson(Json::Value json,
+                                              StringPiece oauth_server_uri,
+                                              string* token,
+                                              uint64* expiration_timestamp_sec);
 
   /// Parses the JSON response with the token from an OAuth 2.0 server.
-  virtual Status ParseOAuthResponse(absl::string_view response,
+  virtual Status ParseOAuthResponse(StringPiece response,
                                     uint64 request_timestamp_sec, string* token,
                                     uint64* expiration_timestamp_sec);
 
diff --git a/tensorflow/core/platform/cloud/oauth_client_test.cc b/tensorflow/core/platform/cloud/oauth_client_test.cc
index d72d23c0ce..1cd0641cd3 100644
--- a/tensorflow/core/platform/cloud/oauth_client_test.cc
+++ b/tensorflow/core/platform/cloud/oauth_client_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/oauth_client.h"
 #include <fstream>
-#include "absl/strings/string_view.h"
 #include <openssl/bio.h>
 #include <openssl/evp.h>
 #include <openssl/pem.h>
@@ -116,7 +115,7 @@ TEST(OAuthClientTest, GetTokenFromServiceAccountJson) {
   EXPECT_EQ(13920, expiration_timestamp);
 
   // Now look at the JWT claim that was sent to the OAuth server.
-  absl::string_view grant_type, assertion;
+  StringPiece grant_type, assertion;
   ASSERT_TRUE(strings::Scanner(post_body)
                   .OneLiteral("grant_type=")
                   .RestartCapture()
diff --git a/tensorflow/core/platform/cloud/retrying_file_system.h b/tensorflow/core/platform/cloud/retrying_file_system.h
index 5c454a2b91..5ce6670dc7 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system.h
+++ b/tensorflow/core/platform/cloud/retrying_file_system.h
@@ -20,7 +20,6 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -151,7 +150,7 @@ class RetryingRandomAccessFile : public RandomAccessFile {
                            const RetryConfig& retry_config)
       : base_file_(std::move(base_file)), retry_config_(retry_config) {}
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     return RetryingUtils::CallWithRetries(
         [this, offset, n, result, scratch]() {
@@ -176,7 +175,7 @@ class RetryingWritableFile : public WritableFile {
     Close().IgnoreError();
   }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     return RetryingUtils::CallWithRetries(
         [this, &data]() { return base_file_->Append(data); }, retry_config_);
   }
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index fbce4a38f7..868eea096c 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/retrying_file_system.h"
 #include <fstream>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
@@ -61,7 +60,7 @@ class MockCallSequence {
 class MockRandomAccessFile : public RandomAccessFile {
  public:
   explicit MockRandomAccessFile(const ExpectedCalls& calls) : calls_(calls) {}
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     return calls_.ConsumeNextCall("Read");
   }
@@ -73,7 +72,7 @@ class MockRandomAccessFile : public RandomAccessFile {
 class MockWritableFile : public WritableFile {
  public:
   explicit MockWritableFile(const ExpectedCalls& calls) : calls_(calls) {}
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     return calls_.ConsumeNextCall("Append");
   }
   Status Close() override { return calls_.ConsumeNextCall("Close"); }
@@ -193,7 +192,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_ImmediateSuccess) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  absl::string_view result;
+  StringPiece result;
   char scratch[10];
   TF_EXPECT_OK(random_access_file->Read(0, 10, &result, scratch));
 }
@@ -221,7 +220,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_SuccessWith3rdTry) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  absl::string_view result;
+  StringPiece result;
   char scratch[10];
   TF_EXPECT_OK(random_access_file->Read(0, 10, &result, scratch));
 }
@@ -246,7 +245,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_AllRetriesFailed) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  absl::string_view result;
+  StringPiece result;
   char scratch[10];
   const auto& status = random_access_file->Read(0, 10, &result, scratch);
   EXPECT_TRUE(
@@ -277,7 +276,7 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_NoRetriesForSomeErrors) {
   TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file));
 
   // Use it and check the results.
-  absl::string_view result;
+  StringPiece result;
   char scratch[10];
   EXPECT_EQ("Failed precondition",
             random_access_file->Read(0, 10, &result, scratch).error_message());
diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
index 10487e0bae..83c65dbfa9 100644
--- a/tensorflow/core/platform/default/device_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <stdlib.h>
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -302,7 +301,7 @@ class DeviceTracerImpl : public DeviceTracer,
 
   // tracing::TraceCollector interface:
   virtual std::unique_ptr<Handle> CreateAnnotationHandle(
-      absl::string_view name_part1, absl::string_view name_part2) const {
+      StringPiece name_part1, StringPiece name_part2) const {
     struct Impl : public tracing::TraceCollector::Handle {
       string annotation;
       explicit Impl(string &&name_scope) : annotation(name_scope) {
@@ -316,8 +315,7 @@ class DeviceTracerImpl : public DeviceTracer,
         new Impl{ConcatenateNames(name_part1, name_part2)});
   }
 
-  virtual std::unique_ptr<Handle> CreateActivityHandle(absl::string_view,
-                                                       absl::string_view,
+  virtual std::unique_ptr<Handle> CreateActivityHandle(StringPiece, StringPiece,
                                                        bool) const {
     // We don't do anything with 'Activities' yet.
     return nullptr;
diff --git a/tensorflow/core/platform/default/fingerprint.h b/tensorflow/core/platform/default/fingerprint.h
index 11af54eac1..f901befc16 100644
--- a/tensorflow/core/platform/default/fingerprint.h
+++ b/tensorflow/core/platform/default/fingerprint.h
@@ -18,15 +18,15 @@ limitations under the License.
 
 #include <farmhash.h>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
-inline uint64 Fingerprint64(absl::string_view s) {
+inline uint64 Fingerprint64(StringPiece s) {
   return ::util::Fingerprint64(s.data(), s.size());
 }
 
-inline Fprint128 Fingerprint128(absl::string_view s) {
+inline Fprint128 Fingerprint128(StringPiece s) {
   const auto fingerprint = ::util::Fingerprint128(s.data(), s.size());
   return {::util::Uint128Low64(fingerprint),
           ::util::Uint128High64(fingerprint)};
diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index 6cf55c5314..9f97c8272c 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/platform/human_readable_json.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -31,11 +30,12 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
 
   auto status = google::protobuf::util::MessageToJsonString(proto, result);
   if (!status.ok()) {
-    // Convert error_msg google::protobuf::StringPiece to absl::string_view.
+    // Convert error_msg google::protobuf::StringPiece to
+    // tensorflow::StringPiece.
     auto error_msg = status.error_message();
-    return errors::Internal(strings::StrCat(
-        "Could not convert proto to JSON string: ",
-        absl::string_view(error_msg.data(), error_msg.length())));
+    return errors::Internal(
+        strings::StrCat("Could not convert proto to JSON string: ",
+                        StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
 #endif
@@ -49,11 +49,12 @@ Status HumanReadableJsonToProto(const string& str,
   proto->Clear();
   auto status = google::protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
-    // Convert error_msg google::protobuf::StringPiece to absl::string_view.
+    // Convert error_msg google::protobuf::StringPiece to
+    // tensorflow::StringPiece.
     auto error_msg = status.error_message();
-    return errors::Internal(strings::StrCat(
-        "Could not convert JSON string to proto: ",
-        absl::string_view(error_msg.data(), error_msg.length())));
+    return errors::Internal(
+        strings::StrCat("Could not convert JSON string to proto: ",
+                        StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
 #endif
diff --git a/tensorflow/core/platform/default/string_coding.h b/tensorflow/core/platform/default/string_coding.h
index d7ee5f1187..70b8ab0144 100644
--- a/tensorflow/core/platform/default/string_coding.h
+++ b/tensorflow/core/platform/default/string_coding.h
@@ -18,7 +18,6 @@ limitations under the License.
 // IWYU pragma: private, include "third_party/tensorflow/core/platform/tensor_coding.h"
 // IWYU pragma: friend third_party/tensorflow/core/platform/tensor_coding.h
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -87,7 +86,7 @@ class StringListDecoder {
   }
 
  private:
-  absl::string_view reader_;
+  StringPiece reader_;
 };
 
 std::unique_ptr<StringListEncoder> NewStringListEncoder(string* out);
diff --git a/tensorflow/core/platform/default/test_benchmark.cc b/tensorflow/core/platform/default/test_benchmark.cc
index 4f872542bb..dedab42bd7 100644
--- a/tensorflow/core/platform/default/test_benchmark.cc
+++ b/tensorflow/core/platform/default/test_benchmark.cc
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include <algorithm>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
@@ -118,7 +117,7 @@ void Benchmark::Run(const char* pattern) {
   // specified by clients, but we keep this here to match the internal
   // Google implementation, should we ever enable user-specified
   // pattern specification.
-  if (absl::string_view(pattern) == "all") {
+  if (StringPiece(pattern) == "all") {
     pattern = ".*";
   }
 
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 39051f885e..afc4201e53 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <deque>
 #include <utility>
 #include <vector>
-#include "absl/strings/string_view.h"
 #if defined(__APPLE__)
 #include <mach-o/dyld.h>
 #endif
@@ -91,7 +90,7 @@ Status FileSystemRegistryImpl::GetRegisteredFileSystemSchemes(
 Env::Env() : file_system_registry_(new FileSystemRegistryImpl) {}
 
 Status Env::GetFileSystemForFile(const string& fname, FileSystem** result) {
-  absl::string_view scheme, host, path;
+  StringPiece scheme, host, path;
   io::ParseURI(fname, &scheme, &host, &path);
   FileSystem* file_system = file_system_registry_->Lookup(string(scheme));
   if (!file_system) {
@@ -165,7 +164,7 @@ bool Env::FilesExist(const std::vector<string>& files,
                      std::vector<Status>* status) {
   std::unordered_map<string, std::vector<string>> files_per_fs;
   for (const auto& file : files) {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(file, &scheme, &host, &path);
     files_per_fs[string(scheme)].push_back(file);
   }
@@ -390,7 +389,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data) {
   }
   gtl::STLStringResizeUninitialized(data, file_size);
   char* p = gtl::string_as_array(data);
-  absl::string_view result;
+  StringPiece result;
   s = file->Read(0, file_size, &result, p);
   if (!s.ok()) {
     data->clear();
@@ -407,7 +406,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data) {
 }
 
 Status WriteStringToFile(Env* env, const string& fname,
-                         const absl::string_view& data) {
+                         const StringPiece& data) {
   std::unique_ptr<WritableFile> file;
   Status s = env->NewWritableFile(fname, &file);
   if (!s.ok()) {
@@ -432,7 +431,7 @@ Status FileSystemCopyFile(FileSystem* src_fs, const string& src,
   std::unique_ptr<char[]> scratch(new char[kCopyFileBufferSize]);
   Status s = Status::OK();
   while (s.ok()) {
-    absl::string_view result;
+    StringPiece result;
     s = src_file->Read(offset, kCopyFileBufferSize, &result, scratch.get());
     if (!(s.ok() || s.code() == error::OUT_OF_RANGE)) {
       return s;
@@ -458,7 +457,7 @@ class FileStream : public ::tensorflow::protobuf::io::ZeroCopyInputStream {
   Status status() const { return status_; }
 
   bool Next(const void** data, int* size) override {
-    absl::string_view result;
+    StringPiece result;
     Status s = file_->Read(pos_, kBufSize, &result, scratch_);
     if (result.empty()) {
       status_ = s;
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 508da1ea0b..5732271f15 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -21,9 +21,9 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/platform/file_system.h"
 #include "tensorflow/core/platform/macros.h"
@@ -408,7 +408,7 @@ Status ReadFileToString(Env* env, const string& fname, string* data);
 /// A utility routine: write contents of `data` to file named `fname`
 /// (overwriting existing contents, if any).
 Status WriteStringToFile(Env* env, const string& fname,
-                         const absl::string_view& data);
+                         const StringPiece& data);
 
 /// Write binary representation of "proto" to the named file.
 Status WriteBinaryProto(Env* env, const string& fname,
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index d37a722b5f..2e32abdffb 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -17,10 +17,10 @@ limitations under the License.
 
 #include <sys/stat.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -75,7 +75,7 @@ TEST_F(DefaultEnvTest, IncompleteReadOutOfRange) {
   TF_EXPECT_OK(env_->NewRandomAccessFile(filename, &f));
 
   // Reading past EOF should give an OUT_OF_RANGE error
-  absl::string_view result;
+  StringPiece result;
   char scratch[3];
   EXPECT_EQ(error::OUT_OF_RANGE, f->Read(0, 3, &result, scratch).code());
   EXPECT_EQ(input, result);
@@ -280,7 +280,7 @@ TEST_F(DefaultEnvTest, SleepForMicroseconds) {
 class TmpDirFileSystem : public NullFileSystem {
  public:
   Status FileExists(const string& dir) override {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(dir, &scheme, &host, &path);
     if (path.empty()) return errors::NotFound(dir, " not found");
     // The special "flushed" file exists only if the filesystem's caches have
@@ -296,7 +296,7 @@ class TmpDirFileSystem : public NullFileSystem {
   }
 
   Status CreateDir(const string& dir) override {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(dir, &scheme, &host, &path);
     if (scheme != "tmpdirfs") {
       return errors::FailedPrecondition("scheme must be tmpdirfs");
@@ -359,7 +359,7 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   // Read from the temporary file and check content.
   std::unique_ptr<RandomAccessFile> file_to_read;
   TF_CHECK_OK(env->NewRandomAccessFile(filename, &file_to_read));
-  absl::string_view content;
+  StringPiece content;
   char scratch[1024];
   CHECK_EQ(error::OUT_OF_RANGE,
            file_to_read->Read(0 /* offset */, 1024 /* n */, &content, scratch)
diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc
index 780cdad39f..3ab542a5d8 100644
--- a/tensorflow/core/platform/file_system.cc
+++ b/tensorflow/core/platform/file_system.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <algorithm>
 #include <deque>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -137,9 +136,9 @@ Status FileSystem::DeleteRecursively(const string& dirname,
 }
 
 Status FileSystem::RecursivelyCreateDir(const string& dirname) {
-  absl::string_view scheme, host, remaining_dir;
+  StringPiece scheme, host, remaining_dir;
   io::ParseURI(dirname, &scheme, &host, &remaining_dir);
-  std::vector<absl::string_view> sub_dirs;
+  std::vector<StringPiece> sub_dirs;
   while (!remaining_dir.empty()) {
     Status status = FileExists(io::CreateURI(scheme, host, remaining_dir));
     if (status.ok()) {
@@ -160,7 +159,7 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) {
 
   // Now create the directories.
   string built_path(remaining_dir);
-  for (const absl::string_view sub_dir : sub_dirs) {
+  for (const StringPiece sub_dir : sub_dirs) {
     built_path = io::JoinPath(built_path, sub_dir);
     Status status = CreateDir(io::CreateURI(scheme, host, built_path));
     if (!status.ok() && status.code() != tensorflow::error::ALREADY_EXISTS) {
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index e57454b71b..156af6cdea 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -21,9 +21,9 @@ limitations under the License.
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/cord.h"
 #include "tensorflow/core/platform/file_statistics.h"
 #include "tensorflow/core/platform/macros.h"
@@ -236,7 +236,7 @@ class RandomAccessFile {
   /// because of EOF.
   ///
   /// Safe for concurrent use by multiple threads.
-  virtual Status Read(uint64 offset, size_t n, absl::string_view* result,
+  virtual Status Read(uint64 offset, size_t n, StringPiece* result,
                       char* scratch) const = 0;
 
  private:
@@ -253,7 +253,7 @@ class WritableFile {
   virtual ~WritableFile();
 
   /// \brief Append 'data' to the file.
-  virtual Status Append(absl::string_view data) = 0;
+  virtual Status Append(StringPiece data) = 0;
 
   // TODO(ebrevdo): Remove this ifdef when absl is updated.
 #if defined(PLATFORM_GOOGLE)
diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc
index 5acf71dc6d..a637d42a92 100644
--- a/tensorflow/core/platform/file_system_test.cc
+++ b/tensorflow/core/platform/file_system_test.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <sys/stat.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -121,7 +120,7 @@ class InterPlanetaryFileSystem : public NullFileSystem {
   }
 
   void ParsePath(const string& name, string* parsed_path) {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     ASSERT_EQ(scheme, "ipfs");
     ASSERT_EQ(host, "solarsystem");
@@ -157,10 +156,10 @@ string Match(InterPlanetaryFileSystem* ipfs, const string& suffix_pattern) {
   if (!s.ok()) {
     return s.ToString();
   } else {
-    std::vector<absl::string_view> trimmed_results;
+    std::vector<StringPiece> trimmed_results;
     std::sort(results.begin(), results.end());
     for (const string& result : results) {
-      absl::string_view trimmed_result(result);
+      StringPiece trimmed_result(result);
       EXPECT_TRUE(str_util::ConsumePrefix(&trimmed_result,
                                           strings::StrCat(kPrefix, "/")));
       trimmed_results.push_back(trimmed_result);
diff --git a/tensorflow/core/platform/fingerprint.h b/tensorflow/core/platform/fingerprint.h
index 71280fbd1d..720dc4c3d6 100644
--- a/tensorflow/core/platform/fingerprint.h
+++ b/tensorflow/core/platform/fingerprint.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
 #define TENSORFLOW_CORE_PLATFORM_FINGERPRINT_H_
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -39,10 +39,10 @@ struct Fprint128Hasher {
 
 // This is a portable fingerprint interface for strings that will never change.
 // However, it is not suitable for cryptography.
-uint64 Fingerprint64(absl::string_view s);
+uint64 Fingerprint64(StringPiece s);
 
 // 128-bit variant of Fingerprint64 above (same properties and caveats apply).
-Fprint128 Fingerprint128(absl::string_view s);
+Fprint128 Fingerprint128(StringPiece s);
 
 namespace internal {
 // Mixes some of the bits that got propagated to the high bits back into the
diff --git a/tensorflow/core/platform/hadoop/BUILD b/tensorflow/core/platform/hadoop/BUILD
index 6c23f5a61e..7c38c399bd 100644
--- a/tensorflow/core/platform/hadoop/BUILD
+++ b/tensorflow/core/platform/hadoop/BUILD
@@ -20,7 +20,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/hadoop:hdfs",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -60,6 +59,5 @@ tf_cc_test(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index 48b272d2ba..eb35531e9f 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <errno.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -140,10 +139,10 @@ HadoopFileSystem::~HadoopFileSystem() {}
 // We rely on HDFS connection caching here. The HDFS client calls
 // org.apache.hadoop.fs.FileSystem.get(), which caches the connection
 // internally.
-Status HadoopFileSystem::Connect(absl::string_view fname, hdfsFS* fs) {
+Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
   TF_RETURN_IF_ERROR(hdfs_->status());
 
-  absl::string_view scheme, namenode, path;
+  StringPiece scheme, namenode, path;
   io::ParseURI(fname, &scheme, &namenode, &path);
   const string nn(namenode);
 
@@ -153,7 +152,7 @@ Status HadoopFileSystem::Connect(absl::string_view fname, hdfsFS* fs) {
   } else if (scheme == "viewfs") {
     char* defaultFS = nullptr;
     hdfs_->hdfsConfGetStr("fs.defaultFS", &defaultFS);
-    absl::string_view defaultScheme, defaultCluster, defaultPath;
+    StringPiece defaultScheme, defaultCluster, defaultPath;
     io::ParseURI(defaultFS, &defaultScheme, &defaultCluster, &defaultPath);
 
     if (scheme != defaultScheme || namenode != defaultCluster) {
@@ -182,7 +181,7 @@ Status HadoopFileSystem::Connect(absl::string_view fname, hdfsFS* fs) {
 }
 
 string HadoopFileSystem::TranslateName(const string& name) const {
-  absl::string_view scheme, namenode, path;
+  StringPiece scheme, namenode, path;
   io::ParseURI(name, &scheme, &namenode, &path);
   return string(path);
 }
@@ -204,7 +203,7 @@ class HDFSRandomAccessFile : public RandomAccessFile {
     }
   }
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -243,7 +242,7 @@ class HDFSRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = absl::string_view(scratch, dst - scratch);
+    *result = StringPiece(scratch, dst - scratch);
     return s;
   }
 
@@ -283,7 +282,7 @@ class HDFSWritableFile : public WritableFile {
     }
   }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     if (hdfs_->hdfsWrite(fs_, file_, data.data(),
                          static_cast<tSize>(data.size())) == -1) {
       return IOError(filename_, errno);
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.h b/tensorflow/core/platform/hadoop/hadoop_file_system.h
index 3655ee1076..6af7a698ff 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.h
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 
 extern "C" {
@@ -68,7 +67,7 @@ class HadoopFileSystem : public FileSystem {
   string TranslateName(const string& name) const override;
 
  private:
-  Status Connect(absl::string_view fname, hdfsFS* fs);
+  Status Connect(StringPiece fname, hdfsFS* fs);
   LibHDFS* hdfs_;
 };
 
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
index d3659ce248..b207d34749 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/platform/hadoop/hadoop_file_system.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -55,7 +54,7 @@ class HadoopFileSystemTest : public ::testing::Test {
     TF_RETURN_IF_ERROR(hdfs.GetFileSize(fname, &file_size));
 
     content->resize(file_size);
-    absl::string_view result;
+    StringPiece result;
     TF_RETURN_IF_ERROR(
         reader->Read(0, file_size, &result, gtl::string_as_array(content)));
     if (file_size != result.size()) {
@@ -78,7 +77,7 @@ TEST_F(HadoopFileSystemTest, RandomAccessFile) {
 
   string got;
   got.resize(content.size());
-  absl::string_view result;
+  StringPiece result;
   TF_EXPECT_OK(
       reader->Read(0, content.size(), &result, gtl::string_as_array(&got)));
   EXPECT_EQ(content.size(), result.size());
@@ -214,7 +213,7 @@ TEST_F(HadoopFileSystemTest, WriteWhileReading) {
 
   string got;
   got.resize(content1.size());
-  absl::string_view result;
+  StringPiece result;
   TF_EXPECT_OK(
       reader->Read(0, content1.size(), &result, gtl::string_as_array(&got)));
   EXPECT_EQ(content1, result);
diff --git a/tensorflow/core/platform/posix/posix_file_system.cc b/tensorflow/core/platform/posix/posix_file_system.cc
index 2f8526c9b3..c7afab9583 100644
--- a/tensorflow/core/platform/posix/posix_file_system.cc
+++ b/tensorflow/core/platform/posix/posix_file_system.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <fcntl.h>
 #include <stdio.h>
 #include <sys/mman.h>
-#include "absl/strings/string_view.h"
 #if !defined(__APPLE__)
 #include <sys/sendfile.h>
 #endif
@@ -53,7 +52,7 @@ class PosixRandomAccessFile : public RandomAccessFile {
       : filename_(fname), fd_(fd) {}
   ~PosixRandomAccessFile() override { close(fd_); }
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -71,7 +70,7 @@ class PosixRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = absl::string_view(scratch, dst - scratch);
+    *result = StringPiece(scratch, dst - scratch);
     return s;
   }
 };
@@ -92,7 +91,7 @@ class PosixWritableFile : public WritableFile {
     }
   }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     size_t r = fwrite(data.data(), 1, data.size(), file_);
     if (r != data.size()) {
       return IOError(filename_, errno);
@@ -218,7 +217,7 @@ Status PosixFileSystem::GetChildren(const string& dir,
   }
   struct dirent* entry;
   while ((entry = readdir(d)) != nullptr) {
-    absl::string_view basename = entry->d_name;
+    StringPiece basename = entry->d_name;
     if ((basename != ".") && (basename != "..")) {
       result->push_back(entry->d_name);
     }
diff --git a/tensorflow/core/platform/posix/posix_file_system.h b/tensorflow/core/platform/posix/posix_file_system.h
index 78e403859e..752eccea66 100644
--- a/tensorflow/core/platform/posix/posix_file_system.h
+++ b/tensorflow/core/platform/posix/posix_file_system.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
 
@@ -69,7 +68,7 @@ Status IOError(const string& context, int err_number);
 class LocalPosixFileSystem : public PosixFileSystem {
  public:
   string TranslateName(const string& name) const override {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     return string(path);
   }
diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD
index f7a0d40083..41184b6fd9 100644
--- a/tensorflow/core/platform/s3/BUILD
+++ b/tensorflow/core/platform/s3/BUILD
@@ -34,7 +34,6 @@ tf_cc_binary(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "@aws",
-        "@com_google_absl//absl/strings",
         "@curl",
         "@protobuf_archive//:protobuf_headers",
     ],
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index ed05b2171c..e0b8e37745 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/platform/s3/s3_file_system.h"
-
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/file_system_helper.h"
@@ -146,7 +144,7 @@ Status ParseS3Path(const string& fname, bool empty_object_ok, string* bucket,
   if (!bucket || !object) {
     return errors::Internal("bucket and object cannot be null.");
   }
-  absl::string_view scheme, bucketp, objectp;
+  StringPiece scheme, bucketp, objectp;
   io::ParseURI(fname, &scheme, &bucketp, &objectp);
   if (scheme != "s3") {
     return errors::InvalidArgument("S3 path doesn't start with 's3://': ",
@@ -172,7 +170,7 @@ class S3RandomAccessFile : public RandomAccessFile {
                      std::shared_ptr<Aws::S3::S3Client> s3_client)
       : bucket_(bucket), object_(object), s3_client_(s3_client) {}
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     Aws::S3::Model::GetObjectRequest getObjectRequest;
     getObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str());
@@ -184,13 +182,13 @@ class S3RandomAccessFile : public RandomAccessFile {
     auto getObjectOutcome = this->s3_client_->GetObject(getObjectRequest);
     if (!getObjectOutcome.IsSuccess()) {
       n = 0;
-      *result = absl::string_view(scratch, n);
+      *result = StringPiece(scratch, n);
       return Status(error::OUT_OF_RANGE, "Read less bytes than requested");
     }
     n = getObjectOutcome.GetResult().GetContentLength();
     getObjectOutcome.GetResult().GetBody().read(scratch, n);
 
-    *result = absl::string_view(scratch, n);
+    *result = StringPiece(scratch, n);
     return Status::OK();
   }
 
@@ -213,7 +211,7 @@ class S3WritableFile : public WritableFile {
             std::ios_base::binary | std::ios_base::trunc | std::ios_base::in |
                 std::ios_base::out)) {}
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     if (!outfile_) {
       return errors::FailedPrecondition(
           "The internal temporary file is not writable.");
@@ -341,7 +339,7 @@ Status S3FileSystem::NewAppendableFile(const string& fname,
   std::unique_ptr<char[]> buffer(new char[kS3ReadAppendableFileBufferSize]);
   Status status;
   uint64 offset = 0;
-  absl::string_view read_chunk;
+  StringPiece read_chunk;
 
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object));
@@ -374,7 +372,7 @@ Status S3FileSystem::NewReadOnlyMemoryRegionFromFile(
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file));
 
-  absl::string_view piece;
+  StringPiece piece;
   TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get()));
 
   result->reset(new S3ReadOnlyMemoryRegion(std::move(data), size));
diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc
index 2fbf9f6ee3..84601de39a 100644
--- a/tensorflow/core/platform/tensor_coding.cc
+++ b/tensorflow/core/platform/tensor_coding.cc
@@ -17,14 +17,13 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/coding.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 namespace port {
 
-void AssignRefCounted(absl::string_view src, core::RefCounted* obj,
-                      string* out) {
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, string* out) {
   out->assign(src.data(), src.size());
 }
 
@@ -40,7 +39,7 @@ void EncodeStringList(const string* strings, int64 n, string* out) {
 
 bool DecodeStringList(const string& src, string* strings, int64 n) {
   std::vector<uint32> sizes(n);
-  absl::string_view reader(src);
+  StringPiece reader(src);
   int64 tot = 0;
   for (auto& v : sizes) {
     if (!core::GetVarint32(&reader, &v)) return false;
diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h
index 6987de56b6..6c6d75830d 100644
--- a/tensorflow/core/platform/tensor_coding.h
+++ b/tensorflow/core/platform/tensor_coding.h
@@ -18,8 +18,8 @@ limitations under the License.
 #define TENSORFLOW_PLATFORM_TENSOR_CODING_H_
 
 #include <string>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/refcount.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -35,8 +35,7 @@ namespace port {
 // Store src contents in *out.  If backing memory for src is shared with *out,
 // will ref obj during the call and will arrange to unref obj when no
 // longer needed.
-void AssignRefCounted(absl::string_view src, core::RefCounted* obj,
-                      string* out);
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, string* out);
 
 // Copy contents of src to dst[0,src.size()-1].
 inline void CopyToArray(const string& src, char* dst) {
diff --git a/tensorflow/core/platform/tracing.cc b/tensorflow/core/platform/tracing.cc
index c1678a2dd9..c0386c0a3f 100644
--- a/tensorflow/core/platform/tracing.cc
+++ b/tensorflow/core/platform/tracing.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <map>
 #include <string>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -58,12 +57,11 @@ uint64 GetUniqueArg() {
   return unique_arg.fetch_add(1, std::memory_order_relaxed);
 }
 
-uint64 GetArgForName(absl::string_view name) {
+uint64 GetArgForName(StringPiece name) {
   return Hash64(name.data(), name.size());
 }
 
-string TraceCollector::ConcatenateNames(absl::string_view first,
-                                        absl::string_view second) {
+string TraceCollector::ConcatenateNames(StringPiece first, StringPiece second) {
   std::string result;
   bool has_two_parts = !first.empty() && !second.empty();
   result.reserve(first.size() + second.size() +
diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h
index 8c0d8666a6..aefbe64425 100644
--- a/tensorflow/core/platform/tracing.h
+++ b/tensorflow/core/platform/tracing.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include <map>
 #include <memory>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -82,7 +82,7 @@ inline const EventCollector* GetEventCollector(EventCategory category) {
 uint64 GetUniqueArg();
 
 // Returns an id for name to pass to RecordEvent/ScopedRegion.
-uint64 GetArgForName(absl::string_view name);
+uint64 GetArgForName(StringPiece name);
 
 // Records an atomic event through the currently registered EventCollector.
 inline void RecordEvent(EventCategory category, uint64 arg) {
@@ -121,7 +121,7 @@ class ScopedRegion {
 
   // Same as ScopedRegion(category, GetArgForName(name)), but faster if
   // EventCollector::IsEnaled() returns false.
-  ScopedRegion(EventCategory category, absl::string_view name)
+  ScopedRegion(EventCategory category, StringPiece name)
       : collector_(GetEventCollector(category)) {
     if (collector_) {
       collector_->StartRegion(GetArgForName(name));
@@ -150,9 +150,9 @@ class TraceCollector {
 
   virtual ~TraceCollector() {}
   virtual std::unique_ptr<Handle> CreateAnnotationHandle(
-      absl::string_view name_part1, absl::string_view name_part2) const = 0;
+      StringPiece name_part1, StringPiece name_part2) const = 0;
   virtual std::unique_ptr<Handle> CreateActivityHandle(
-      absl::string_view name_part1, absl::string_view name_part2,
+      StringPiece name_part1, StringPiece name_part2,
       bool is_expensive) const = 0;
 
   // Returns true if this annotation tracing is enabled for any op.
@@ -163,8 +163,7 @@ class TraceCollector {
   virtual bool IsEnabledForActivities(bool is_expensive) const = 0;
 
  protected:
-  static string ConcatenateNames(absl::string_view first,
-                                 absl::string_view second);
+  static string ConcatenateNames(StringPiece first, StringPiece second);
 
  private:
   friend void SetTraceCollector(const TraceCollector*);
@@ -186,14 +185,14 @@ const TraceCollector* GetTraceCollector();
 // This will add 'my kernels' to both kernels in the profiler UI
 class ScopedAnnotation {
  public:
-  explicit ScopedAnnotation(absl::string_view name)
-      : ScopedAnnotation(name, absl::string_view()) {}
+  explicit ScopedAnnotation(StringPiece name)
+      : ScopedAnnotation(name, StringPiece()) {}
 
   // If tracing is enabled, add a name scope of
   // "<name_part1>:<name_part2>".  This can be cheaper than the
   // single-argument constructor because the concatenation of the
   // label string is only done if tracing is enabled.
-  ScopedAnnotation(absl::string_view name_part1, absl::string_view name_part2)
+  ScopedAnnotation(StringPiece name_part1, StringPiece name_part2)
       : handle_([&] {
           auto trace_collector = GetTraceCollector();
           return trace_collector ? trace_collector->CreateAnnotationHandle(
@@ -212,14 +211,14 @@ class ScopedAnnotation {
 // the object is destroyed.
 class ScopedActivity {
  public:
-  explicit ScopedActivity(absl::string_view name, bool is_expensive = true)
-      : ScopedActivity(name, absl::string_view(), is_expensive) {}
+  explicit ScopedActivity(StringPiece name, bool is_expensive = true)
+      : ScopedActivity(name, StringPiece(), is_expensive) {}
 
   // If tracing is enabled, set up an activity with a label of
   // "<name_part1>:<name_part2>".  This can be cheaper than the
   // single-argument constructor because the concatenation of the
   // label string is only done if tracing is enabled.
-  ScopedActivity(absl::string_view name_part1, absl::string_view name_part2,
+  ScopedActivity(StringPiece name_part1, StringPiece name_part2,
                  bool is_expensive = true)
       : handle_([&] {
           auto trace_collector = GetTraceCollector();
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index 8d180272fe..6cf79634d7 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include <sys/types.h>
 #include <time.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/error_codes.pb.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
@@ -113,7 +112,7 @@ class WindowsRandomAccessFile : public RandomAccessFile {
     }
   }
 
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     Status s;
     char* dst = scratch;
@@ -131,7 +130,7 @@ class WindowsRandomAccessFile : public RandomAccessFile {
         s = IOError(filename_, errno);
       }
     }
-    *result = absl::string_view(scratch, dst - scratch);
+    *result = StringPiece(scratch, dst - scratch);
     return s;
   }
 };
@@ -151,7 +150,7 @@ class WindowsWritableFile : public WritableFile {
     }
   }
 
-  Status Append(absl::string_view data) override {
+  Status Append(StringPiece data) override {
     DWORD bytes_written = 0;
     DWORD data_size = static_cast<DWORD>(data.size());
     BOOL write_result =
@@ -414,7 +413,7 @@ Status WindowsFileSystem::GetChildren(const string& dir,
 
   do {
     string file_name = WideCharToUtf8(find_data.cFileName);
-    const absl::string_view basename = file_name;
+    const StringPiece basename = file_name;
     if (basename != "." && basename != "..") {
       result->push_back(file_name);
     }
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index 7ba471eec4..1f4c535f24 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_WINDOWS_FILE_SYSTEM_H_
 #define TENSORFLOW_CORE_PLATFORM_WINDOWS_WINDOWS_FILE_SYSTEM_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/file_system.h"
 
@@ -70,7 +69,7 @@ class WindowsFileSystem : public FileSystem {
 class LocalWinFileSystem : public WindowsFileSystem {
  public:
   string TranslateName(const string& name) const override {
-    absl::string_view scheme, host, path;
+    StringPiece scheme, host, path;
     io::ParseURI(name, &scheme, &host, &path);
     return string(path);
   }
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index c0afedabf9..8dcfde9a2a 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -254,7 +254,6 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:regexp_internal",
         "//tensorflow/core/profiler:tfprof_options",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc
index 6ae1c1533a..7712ebd926 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.cc
+++ b/tensorflow/core/profiler/internal/tfprof_utils.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <memory>
 #include <set>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -92,12 +91,12 @@ tensorflow::Status ReturnError(const std::vector<string>& pieces, int idx) {
       strings::StrCat("Invalid option '", pieces[idx], "' value: '", val, "'"));
 }
 
-bool CaseEqual(absl::string_view s1, absl::string_view s2) {
+bool CaseEqual(StringPiece s1, StringPiece s2) {
   if (s1.size() != s2.size()) return false;
   return str_util::Lowercase(s1) == str_util::Lowercase(s2);
 }
 
-bool StringToBool(absl::string_view str, bool* value) {
+bool StringToBool(StringPiece str, bool* value) {
   CHECK(value != nullptr) << "NULL output boolean given.";
   if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") ||
       CaseEqual(str, "y") || CaseEqual(str, "1")) {
diff --git a/tensorflow/core/util/command_line_flags.cc b/tensorflow/core/util/command_line_flags.cc
index e253b3fae1..f1196fdfec 100644
--- a/tensorflow/core/util/command_line_flags.cc
+++ b/tensorflow/core/util/command_line_flags.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -26,7 +26,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-bool ParseStringFlag(absl::string_view arg, absl::string_view flag,
+bool ParseStringFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                      const std::function<bool(string)>& hook,
                      bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -40,7 +40,7 @@ bool ParseStringFlag(absl::string_view arg, absl::string_view flag,
   return false;
 }
 
-bool ParseInt32Flag(absl::string_view arg, absl::string_view flag,
+bool ParseInt32Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                     const std::function<bool(int32)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -62,7 +62,7 @@ bool ParseInt32Flag(absl::string_view arg, absl::string_view flag,
   return false;
 }
 
-bool ParseInt64Flag(absl::string_view arg, absl::string_view flag,
+bool ParseInt64Flag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                     const std::function<bool(int64)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -84,7 +84,7 @@ bool ParseInt64Flag(absl::string_view arg, absl::string_view flag,
   return false;
 }
 
-bool ParseBoolFlag(absl::string_view arg, absl::string_view flag,
+bool ParseBoolFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                    const std::function<bool(bool)>& hook,
                    bool* value_parsing_ok) {
   *value_parsing_ok = true;
@@ -112,7 +112,7 @@ bool ParseBoolFlag(absl::string_view arg, absl::string_view flag,
   return false;
 }
 
-bool ParseFloatFlag(absl::string_view arg, absl::string_view flag,
+bool ParseFloatFlag(tensorflow::StringPiece arg, tensorflow::StringPiece flag,
                     const std::function<bool(float)>& hook,
                     bool* value_parsing_ok) {
   *value_parsing_ok = true;
diff --git a/tensorflow/core/util/device_name_utils.cc b/tensorflow/core/util/device_name_utils.cc
index 33167d7662..8c24076aa9 100644
--- a/tensorflow/core/util/device_name_utils.cc
+++ b/tensorflow/core/util/device_name_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/util/device_name_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -30,7 +29,7 @@ static bool IsAlpha(char c) {
 static bool IsAlphaNum(char c) { return IsAlpha(c) || (c >= '0' && c <= '9'); }
 
 // Returns true iff "in" is a valid job name.
-static bool IsJobName(absl::string_view in) {
+static bool IsJobName(StringPiece in) {
   if (in.empty()) return false;
   if (!IsAlpha(in[0])) return false;
   for (size_t i = 1; i < in.size(); ++i) {
@@ -40,7 +39,7 @@ static bool IsJobName(absl::string_view in) {
 }
 
 // Returns true and fills in "*job" iff "*in" starts with a job name.
-static bool ConsumeJobName(absl::string_view* in, string* job) {
+static bool ConsumeJobName(StringPiece* in, string* job) {
   if (in->empty()) return false;
   if (!IsAlpha((*in)[0])) return false;
   size_t i = 1;
@@ -58,7 +57,7 @@ static bool ConsumeJobName(absl::string_view* in, string* job) {
 
 // Returns true and fills in "*device_type" iff "*in" starts with a device type
 // name.
-static bool ConsumeDeviceType(absl::string_view* in, string* device_type) {
+static bool ConsumeDeviceType(StringPiece* in, string* device_type) {
   if (in->empty()) return false;
   if (!IsAlpha((*in)[0])) return false;
   size_t i = 1;
@@ -76,7 +75,7 @@ static bool ConsumeDeviceType(absl::string_view* in, string* device_type) {
 
 // Returns true and fills in "*val" iff "*in" starts with a decimal
 // number.
-static bool ConsumeNumber(absl::string_view* in, int* val) {
+static bool ConsumeNumber(StringPiece* in, int* val) {
   uint64 tmp;
   if (str_util::ConsumeLeadingDigits(in, &tmp)) {
     *val = tmp;
@@ -112,7 +111,7 @@ string LegacyName(const string& job, int replica, int task, const string& type,
 }
 }  // anonymous namespace
 
-bool DeviceNameUtils::ParseFullName(absl::string_view fullname, ParsedName* p) {
+bool DeviceNameUtils::ParseFullName(StringPiece fullname, ParsedName* p) {
   p->Clear();
   if (fullname == "/") {
     return true;
@@ -214,8 +213,8 @@ void CompleteName(const DeviceNameUtils::ParsedName& parsed_basename,
 }  // namespace
 
 /* static */
-Status DeviceNameUtils::CanonicalizeDeviceName(absl::string_view fullname,
-                                               absl::string_view basename,
+Status DeviceNameUtils::CanonicalizeDeviceName(StringPiece fullname,
+                                               StringPiece basename,
                                                string* canonical_name) {
   *canonical_name = "";
   ParsedName parsed_basename;
@@ -391,8 +390,7 @@ bool DeviceNameUtils::IsSameAddressSpace(const ParsedName& a,
 }
 
 /* static */
-bool DeviceNameUtils::IsSameAddressSpace(absl::string_view src,
-                                         absl::string_view dst) {
+bool DeviceNameUtils::IsSameAddressSpace(StringPiece src, StringPiece dst) {
   ParsedName x;
   ParsedName y;
   return ParseFullName(src, &x) && ParseFullName(dst, &y) &&
@@ -400,27 +398,27 @@ bool DeviceNameUtils::IsSameAddressSpace(absl::string_view src,
 }
 
 /* static */
-string DeviceNameUtils::LocalName(absl::string_view type, int id) {
+string DeviceNameUtils::LocalName(StringPiece type, int id) {
   return strings::StrCat("/device:", type, ":", id);
 }
 
 namespace {
 // Returns the legacy local device name given its "type" and "id" (which is
 // '/device:type:id').
-string LegacyLocalName(absl::string_view type, int id) {
+string LegacyLocalName(StringPiece type, int id) {
   return strings::StrCat(type, ":", id);
 }
 }  // anonymous namespace
 
 /* static */
-string DeviceNameUtils::LocalName(absl::string_view fullname) {
+string DeviceNameUtils::LocalName(StringPiece fullname) {
   ParsedName x;
   CHECK(ParseFullName(fullname, &x)) << fullname;
   return LocalName(x.type, x.id);
 }
 
 /* static */
-bool DeviceNameUtils::ParseLocalName(absl::string_view name, ParsedName* p) {
+bool DeviceNameUtils::ParseLocalName(StringPiece name, ParsedName* p) {
   if (!ConsumeDeviceType(&name, &p->type)) {
     return false;
   }
@@ -436,7 +434,7 @@ bool DeviceNameUtils::ParseLocalName(absl::string_view name, ParsedName* p) {
 }
 
 /* static */
-bool DeviceNameUtils::SplitDeviceName(absl::string_view name, string* task,
+bool DeviceNameUtils::SplitDeviceName(StringPiece name, string* task,
                                       string* device) {
   ParsedName pn;
   if (ParseFullName(name, &pn) && pn.has_type && pn.has_id) {
diff --git a/tensorflow/core/util/device_name_utils.h b/tensorflow/core/util/device_name_utils.h
index 21074223a2..3f0bc60562 100644
--- a/tensorflow/core/util/device_name_utils.h
+++ b/tensorflow/core/util/device_name_utils.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
@@ -86,15 +86,15 @@ class DeviceNameUtils {
     int id = 0;
   };
   // Parses "fullname" into "*parsed". Returns true iff succeeds.
-  static bool ParseFullName(absl::string_view fullname, ParsedName* parsed);
+  static bool ParseFullName(StringPiece fullname, ParsedName* parsed);
 
   // Canonicalizes "fullname" into "*canonical_name". Uses a fully specified
   // basename to fill in fields that are missing. Accepts both legacy, newer
   // and local versions of the device spec. Returns the newer version of the
   // device spec. If we were unable to interpret / parse "fullname" returns
   // an error and *canonical_name is set to "".
-  static Status CanonicalizeDeviceName(absl::string_view fullname,
-                                       absl::string_view basename,
+  static Status CanonicalizeDeviceName(StringPiece fullname,
+                                       StringPiece basename,
                                        string* canonical_name);
 
   // Returns true if "name" specifies any non-trivial constraint on the device.
@@ -133,20 +133,20 @@ class DeviceNameUtils {
 
   // Returns true iff devices identified by 'src' and 'dst' are in the
   // same address space.
-  static bool IsSameAddressSpace(absl::string_view src, absl::string_view dst);
+  static bool IsSameAddressSpace(StringPiece src, StringPiece dst);
   static bool IsSameAddressSpace(const ParsedName& src, const ParsedName& dst);
 
   // Returns the local device given its "type" and "id".
-  static string LocalName(absl::string_view type, int id);
+  static string LocalName(StringPiece type, int id);
 
   // Returns a short local device name (cpu:0, gpu:1, etc) based on
   // the given fullname.
-  static string LocalName(absl::string_view fullname);
+  static string LocalName(StringPiece fullname);
 
   // If "name" is a valid local device name (cpu:0, gpu:1, etc.),
   // fills in parsed.type and parsed.id accordingly. Returns true iff
   // succeeds.
-  static bool ParseLocalName(absl::string_view name, ParsedName* parsed);
+  static bool ParseLocalName(StringPiece name, ParsedName* parsed);
 
   // Splits a fully-qualified device name into a task identifier and a
   // relative device identifier. It first parses "name" using
@@ -155,8 +155,7 @@ class DeviceNameUtils {
   // component into *device.  This function will still return true if
   // the task component is empty, but it requires the relative device
   // component to be fully specified.
-  static bool SplitDeviceName(absl::string_view name, string* task,
-                              string* device);
+  static bool SplitDeviceName(StringPiece name, string* task, string* device);
 
   static string ParsedNameToString(const ParsedName& pn);
 
diff --git a/tensorflow/core/util/device_name_utils_test.cc b/tensorflow/core/util/device_name_utils_test.cc
index 11c8171f27..dafb3b20b9 100644
--- a/tensorflow/core/util/device_name_utils_test.cc
+++ b/tensorflow/core/util/device_name_utils_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/util/device_name_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -279,7 +278,7 @@ TEST(DeviceNameUtilsTest, Basic) {
   }
 }
 
-static bool IsCSHelper(absl::string_view pattern, absl::string_view actual) {
+static bool IsCSHelper(StringPiece pattern, StringPiece actual) {
   DeviceNameUtils::ParsedName p, a;
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(pattern, &p));
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(actual, &a));
@@ -304,7 +303,7 @@ TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
       IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
 }
 
-static bool IsSpecHelper(absl::string_view pattern, absl::string_view actual) {
+static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
   DeviceNameUtils::ParsedName p, a;
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(pattern, &p));
   EXPECT_TRUE(DeviceNameUtils::ParseFullName(actual, &a));
diff --git a/tensorflow/core/util/env_var.cc b/tensorflow/core/util/env_var.cc
index 1753d22417..2604a5d66a 100644
--- a/tensorflow/core/util/env_var.cc
+++ b/tensorflow/core/util/env_var.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <stdlib.h>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -26,7 +25,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-Status ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val,
+Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
                           bool* value) {
   *value = default_val;
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
@@ -46,7 +45,7 @@ Status ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val,
       tf_env_var_val, ". Use the default value: ", default_val));
 }
 
-Status ReadInt64FromEnvVar(absl::string_view env_var_name, int64 default_val,
+Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
                            int64* value) {
   *value = default_val;
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
@@ -61,8 +60,8 @@ Status ReadInt64FromEnvVar(absl::string_view env_var_name, int64 default_val,
       tf_env_var_val, ". Use the default value: ", default_val));
 }
 
-Status ReadStringFromEnvVar(absl::string_view env_var_name,
-                            absl::string_view default_val, string* value) {
+Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
+                            string* value) {
   const char* tf_env_var_val = getenv(string(env_var_name).c_str());
   if (tf_env_var_val != nullptr) {
     *value = tf_env_var_val;
diff --git a/tensorflow/core/util/env_var.h b/tensorflow/core/util/env_var.h
index 89c591844f..724ca35729 100644
--- a/tensorflow/core/util/env_var.h
+++ b/tensorflow/core/util/env_var.h
@@ -16,8 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_ENV_VAR_H_
 #define TENSORFLOW_CORE_UTIL_ENV_VAR_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -27,19 +27,19 @@ namespace tensorflow {
 // case insensitive "false" is interpreted as false. A string "1" or a case
 // insensitive "true" is interpreted as true. Otherwise, an error status is
 // returned.
-Status ReadBoolFromEnvVar(absl::string_view env_var_name, bool default_val,
+Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
                           bool* value);
 
 // Returns an int64 into "value" from the environmental variable "env_var_name".
 // If it is unset, the default value is used.
 // If the string cannot be parsed into int64, an error status is returned.
-Status ReadInt64FromEnvVar(absl::string_view env_var_name, int64 default_val,
+Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
                            int64* value);
 
 // Returns a string into "value" from the environmental variable "env_var_name".
 // If it is unset, the default value is used.
-Status ReadStringFromEnvVar(absl::string_view env_var_name,
-                            absl::string_view default_val, string* value);
+Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
+                            string* value);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/util/events_writer.cc b/tensorflow/core/util/events_writer.cc
index a0605a758d..aaaba913a7 100644
--- a/tensorflow/core/util/events_writer.cc
+++ b/tensorflow/core/util/events_writer.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <stddef.h>  // for NULL
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/path.h"
@@ -103,7 +102,7 @@ string EventsWriter::FileName() {
   return filename_;
 }
 
-void EventsWriter::WriteSerializedEvent(absl::string_view event_str) {
+void EventsWriter::WriteSerializedEvent(StringPiece event_str) {
   if (recordio_writer_ == nullptr) {
     if (!InitIfNeeded().ok()) {
       LOG(ERROR) << "Write failed because file could not be opened.";
diff --git a/tensorflow/core/util/events_writer.h b/tensorflow/core/util/events_writer.h
index 9a60ba59fc..d5952c3cbd 100644
--- a/tensorflow/core/util/events_writer.h
+++ b/tensorflow/core/util/events_writer.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <memory>
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/platform/env.h"
@@ -67,7 +66,7 @@ class EventsWriter {
   // Append "event_str", a serialized Event, to the file.
   // Note that this function does NOT check that de-serializing event_str
   // results in a valid Event proto.  The tensorflow:: bit makes SWIG happy.
-  void WriteSerializedEvent(absl::string_view event_str);
+  void WriteSerializedEvent(tensorflow::StringPiece event_str);
 
   // EventWriter automatically flushes and closes on destruction, but
   // these two methods are provided for users who want to write to disk sooner
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index 3f77843518..e52d55e2ff 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -16,7 +16,6 @@ limitations under the License.
 
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/example/example.pb.h"
 #include "tensorflow/core/example/feature.pb_text.h"
 #include "tensorflow/core/framework/numeric_op.h"
@@ -67,7 +66,7 @@ namespace parsed {
 class Feature {
  public:
   Feature() {}
-  explicit Feature(absl::string_view serialized) : serialized_(serialized) {}
+  explicit Feature(StringPiece serialized) : serialized_(serialized) {}
 
   Status ParseDataType(DataType* dtype) {
     DCHECK(dtype != nullptr);
@@ -224,14 +223,14 @@ class Feature {
     return true;
   }
 
-  absl::string_view GetSerialized() const { return serialized_; }
+  StringPiece GetSerialized() const { return serialized_; }
 
  private:
   // TODO(lew): Pair of uint8* would be more natural.
-  absl::string_view serialized_;
+  StringPiece serialized_;
 };
 
-using FeatureMapEntry = std::pair<absl::string_view, Feature>;
+using FeatureMapEntry = std::pair<StringPiece, Feature>;
 using Example = std::vector<FeatureMapEntry>;
 
 }  // namespace parsed
@@ -261,14 +260,13 @@ inline bool SkipExtraneousTag(protobuf::io::CodedInputStream* stream) {
   return false;  // unrecognized tag type
 }
 
-bool ParseString(protobuf::io::CodedInputStream* stream,
-                 absl::string_view* result) {
+bool ParseString(protobuf::io::CodedInputStream* stream, StringPiece* result) {
   DCHECK(stream != nullptr);
   DCHECK(result != nullptr);
   uint32 length;
   if (!stream->ReadVarint32(&length)) return false;
   if (length == 0) {
-    *result = absl::string_view(nullptr, 0);
+    *result = StringPiece(nullptr, 0);
     return true;
   }
   const void* stream_alias;
@@ -277,7 +275,7 @@ bool ParseString(protobuf::io::CodedInputStream* stream,
     return false;
   }
   if (static_cast<uint32>(stream_size) < length) return false;
-  *result = absl::string_view(static_cast<const char*>(stream_alias), length);
+  *result = StringPiece(static_cast<const char*>(stream_alias), length);
   stream->Skip(length);
   return true;
 }
@@ -292,7 +290,7 @@ bool ParseFeatureMapEntry(protobuf::io::CodedInputStream* stream,
   if (!stream->ExpectTag(kDelimitedTag(1))) return false;
   if (!ParseString(stream, &feature_map_entry->first)) return false;
   if (!stream->ExpectTag(kDelimitedTag(2))) return false;
-  absl::string_view feature_string_piece;
+  StringPiece feature_string_piece;
   if (!ParseString(stream, &feature_string_piece)) return false;
   feature_map_entry->second = parsed::Feature(feature_string_piece);
   if (!stream->ExpectAtEnd()) return false;
@@ -334,7 +332,7 @@ bool ParseExample(protobuf::io::CodedInputStream* stream,
   return true;
 }
 
-bool ParseExample(absl::string_view serialized, parsed::Example* example) {
+bool ParseExample(StringPiece serialized, parsed::Example* example) {
   DCHECK(example != nullptr);
   protobuf::io::CodedInputStream stream(
       reinterpret_cast<const uint8*>(serialized.data()), serialized.size());
@@ -441,7 +439,7 @@ struct SparseBuffer {
 };
 
 struct SeededHasher {
-  uint64 operator()(absl::string_view s) const {
+  uint64 operator()(StringPiece s) const {
     return Hash64(s.data(), s.size(), seed);
   }
   uint64 seed{0xDECAFCAFFE};
@@ -469,7 +467,7 @@ class LimitedArraySlice {
   T* end_;
 };
 
-void LogDenseFeatureDataLoss(absl::string_view feature_name) {
+void LogDenseFeatureDataLoss(StringPiece feature_name) {
   LOG(WARNING) << "Data loss! Feature '" << feature_name
                << "' is present in multiple concatenated "
                   "tf.Examples. Ignoring all but last one.";
@@ -480,7 +478,7 @@ void LogDenseFeatureDataLoss(absl::string_view feature_name) {
   duplicated_dense_feature->GetCell()->IncrementBy(1);
 }
 
-void LogSparseFeatureDataLoss(absl::string_view feature_name) {
+void LogSparseFeatureDataLoss(StringPiece feature_name) {
   LOG(WARNING) << "Data loss! Feature '" << feature_name
                << "' is present in multiple concatenated "
                   "tf.Examples. Ignoring all but last one.";
@@ -525,7 +523,7 @@ Status FastParseSerializedExample(
     parsed::FeatureMapEntry& name_and_feature =
         parsed_example[parsed_example_size - i - 1];
 
-    const absl::string_view feature_name = name_and_feature.first;
+    const StringPiece feature_name = name_and_feature.first;
     parsed::Feature& feature = name_and_feature.second;
 
     std::pair<size_t, Type> d_and_type;
@@ -544,7 +542,7 @@ Status FastParseSerializedExample(
       if (feature_name != config_feature_name) continue;
     }
 
-    auto example_error = [&](absl::string_view suffix) {
+    auto example_error = [&](StringPiece suffix) {
       return errors::InvalidArgument("Name: ", example_name,
                                      ", Key: ", feature_name,
                                      ", Index: ", example_index, ".  ", suffix);
@@ -587,7 +585,7 @@ Status FastParseSerializedExample(
 
         const std::size_t offset = example_index * num_elements;
 
-        auto shape_error = [&](size_t size, absl::string_view type_str) {
+        auto shape_error = [&](size_t size, StringPiece type_str) {
           return example_error(strings::StrCat(
               "Number of ", type_str,
               " values != expected.  "
@@ -639,7 +637,7 @@ Status FastParseSerializedExample(
               "Expected type: ", DataTypeString(config.dense[d].dtype)));
         }
 
-        auto shape_error = [&](size_t size, absl::string_view type_str) {
+        auto shape_error = [&](size_t size, StringPiece type_str) {
           return example_error(strings::StrCat(
               "Number of ", type_str,
               " values is not a multiple of stride length. Saw ", size,
@@ -1277,7 +1275,7 @@ Status FastParseSingleExample(const Config& config, const string& serialized,
     parsed::FeatureMapEntry& name_and_feature =
         parsed_example[parsed_example_size - i - 1];
 
-    const absl::string_view feature_name = name_and_feature.first;
+    const StringPiece feature_name = name_and_feature.first;
     parsed::Feature& feature = name_and_feature.second;
 
     std::pair<size_t, Type> d_and_type;
@@ -1296,7 +1294,7 @@ Status FastParseSingleExample(const Config& config, const string& serialized,
       if (feature_name != config_feature_name) continue;
     }
 
-    auto example_error = [feature_name](absl::string_view suffix) {
+    auto example_error = [feature_name](StringPiece suffix) {
       return errors::InvalidArgument("Key: ", feature_name, ".  ", suffix);
     };
 
@@ -1729,8 +1727,8 @@ Status FastParseSequenceExample(
   DCHECK(context_result != nullptr);
   DCHECK(feature_list_result != nullptr);
   DCHECK(dense_feature_lengths != nullptr);
-  std::map<absl::string_view, bool> context_is_sparse;
-  std::map<absl::string_view, std::pair<DataType, size_t>>
+  std::map<StringPiece, bool> context_is_sparse;
+  std::map<StringPiece, std::pair<DataType, size_t>>
       context_feature_type_and_lengths;
   if (!example_names.empty() && example_names.size() != num_examples) {
     return errors::InvalidArgument(
@@ -1761,8 +1759,8 @@ Status FastParseSequenceExample(
     }
     context_is_sparse[c.feature_name] = false;
   }
-  std::map<absl::string_view, bool> sequence_is_sparse;
-  std::map<absl::string_view, std::pair<DataType, size_t>>
+  std::map<StringPiece, bool> sequence_is_sparse;
+  std::map<StringPiece, std::pair<DataType, size_t>>
       sequence_feature_type_and_lengths;
   for (auto& c : feature_list_config.sparse) {
     TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
@@ -1781,10 +1779,10 @@ Status FastParseSequenceExample(
     sequence_is_sparse[c.feature_name] = false;
   }
 
-  std::vector<std::map<absl::string_view, absl::string_view>>
-      all_context_features(num_examples);
-  std::vector<std::map<absl::string_view, absl::string_view>>
-      all_sequence_features(num_examples);
+  std::vector<std::map<StringPiece, StringPiece>> all_context_features(
+      num_examples);
+  std::vector<std::map<StringPiece, StringPiece>> all_sequence_features(
+      num_examples);
   const string kUnknown = "<unknown>";
   for (int d = 0; d < num_examples; d++) {
     const string& example = serialized[d];
@@ -1800,8 +1798,8 @@ Status FastParseSequenceExample(
 
     // Extract pointers to all features within this serialized example.
     while (!stream.ExpectAtEnd()) {
-      std::map<absl::string_view, absl::string_view>* features = nullptr;
-      const std::map<absl::string_view, std::pair<DataType, size_t>>* config =
+      std::map<StringPiece, StringPiece>* features = nullptr;
+      const std::map<StringPiece, std::pair<DataType, size_t>>* config =
           nullptr;
       if (stream.ExpectTag(kDelimitedTag(1))) {
         // Context
@@ -1823,7 +1821,7 @@ Status FastParseSequenceExample(
         }
         auto limit = stream.PushLimit(length);
         while (!stream.ExpectAtEnd()) {
-          absl::string_view key, value;
+          StringPiece key, value;
           uint32 length;
           if (!stream.ExpectTag(kDelimitedTag(1)) ||
               !stream.ReadVarint32(&length)) {
diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc
index 35f0062556..d3439cbc93 100644
--- a/tensorflow/core/util/memmapped_file_system.cc
+++ b/tensorflow/core/util/memmapped_file_system.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/util/memmapped_file_system.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -57,16 +56,16 @@ class RandomAccessFileFromMemmapped : public RandomAccessFile {
 
   ~RandomAccessFileFromMemmapped() override = default;
 
-  Status Read(uint64 offset, size_t to_read, absl::string_view* result,
+  Status Read(uint64 offset, size_t to_read, StringPiece* result,
               char* scratch) const override {
     if (offset >= length_) {
-      *result = absl::string_view(scratch, 0);
+      *result = StringPiece(scratch, 0);
       return Status(error::OUT_OF_RANGE, "Read after file end");
     }
     const uint64 region_left =
         std::min(length_ - offset, static_cast<uint64>(to_read));
-    *result = absl::string_view(reinterpret_cast<const char*>(data_) + offset,
-                                region_left);
+    *result =
+        StringPiece(reinterpret_cast<const char*>(data_) + offset, region_left);
     return (region_left == to_read)
                ? Status::OK()
                : Status(error::OUT_OF_RANGE, "Read less bytes than requested");
diff --git a/tensorflow/core/util/memmapped_file_system_test.cc b/tensorflow/core/util/memmapped_file_system_test.cc
index b5608ed6cc..504d2d353f 100644
--- a/tensorflow/core/util/memmapped_file_system_test.cc
+++ b/tensorflow/core/util/memmapped_file_system_test.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/util/memmapped_file_system.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
@@ -87,8 +86,8 @@ TEST(MemmappedFileSystemTest, SimpleTest) {
   // The memory region can be bigger but not less than Tensor size.
   ASSERT_GE(memory_region->length(), test_tensor.TotalBytes());
   EXPECT_EQ(test_tensor.tensor_data(),
-            absl::string_view(static_cast<const char*>(memory_region->data()),
-                              test_tensor.TotalBytes()));
+            StringPiece(static_cast<const char*>(memory_region->data()),
+                        test_tensor.TotalBytes()));
   // Check that GetFileSize works.
   uint64 file_size = 0;
   TF_ASSERT_OK(memmapped_env.GetFileSize(kTensor2FileName, &file_size));
diff --git a/tensorflow/core/util/memmapped_file_system_writer.cc b/tensorflow/core/util/memmapped_file_system_writer.cc
index a312a0d4d3..9556ee385f 100644
--- a/tensorflow/core/util/memmapped_file_system_writer.cc
+++ b/tensorflow/core/util/memmapped_file_system_writer.cc
@@ -15,7 +15,6 @@ limitations under the License.
 #include "tensorflow/core/util/memmapped_file_system_writer.h"
 
 #include <algorithm>
-#include "absl/strings/string_view.h"
 
 namespace tensorflow {
 
@@ -81,7 +80,7 @@ Status MemmappedFileSystemWriter::SaveProtobuf(
 
 namespace {
 
-absl::string_view EncodeUint64LittleEndian(uint64 val, char* output_buffer) {
+StringPiece EncodeUint64LittleEndian(uint64 val, char* output_buffer) {
   for (unsigned int i = 0; i < sizeof(uint64); ++i) {
     output_buffer[i] = (val >> i * 8);
   }
@@ -117,7 +116,7 @@ Status MemmappedFileSystemWriter::AdjustAlignment(uint64 alignment) {
   static constexpr uint64 kFillerBufferSize = 16;
   const char kFillerBuffer[kFillerBufferSize] = {};
   for (uint64 rest = to_write_for_alignment; rest > 0;) {
-    absl::string_view sp(kFillerBuffer, std::min(rest, kFillerBufferSize));
+    StringPiece sp(kFillerBuffer, std::min(rest, kFillerBufferSize));
     TF_RETURN_IF_ERROR(output_file_->Append(sp));
     rest -= sp.size();
     output_file_offset_ += sp.size();
diff --git a/tensorflow/core/util/mirror_pad_mode.cc b/tensorflow/core/util/mirror_pad_mode.cc
index 629a0b2de7..433d8aad55 100644
--- a/tensorflow/core/util/mirror_pad_mode.cc
+++ b/tensorflow/core/util/mirror_pad_mode.cc
@@ -15,14 +15,13 @@ limitations under the License.
 
 #include "tensorflow/core/util/mirror_pad_mode.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
-Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
+Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
                    MirrorPadMode* value) {
   string str_value;
   TF_RETURN_IF_ERROR(GetNodeAttr(node_def, attr_name, &str_value));
diff --git a/tensorflow/core/util/mirror_pad_mode.h b/tensorflow/core/util/mirror_pad_mode.h
index 026fd62503..ceee9b06b0 100644
--- a/tensorflow/core/util/mirror_pad_mode.h
+++ b/tensorflow/core/util/mirror_pad_mode.h
@@ -21,7 +21,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -45,7 +44,7 @@ string GetMirrorPadModeAttrString();
 class NodeDef;
 
 // Specialization to parse an attribute directly into a MirrorPadMode enum.
-Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
+Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
                    MirrorPadMode* value);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/util/padding.cc b/tensorflow/core/util/padding.cc
index 1941d394c3..117de5ee4b 100644
--- a/tensorflow/core/util/padding.cc
+++ b/tensorflow/core/util/padding.cc
@@ -15,14 +15,13 @@ limitations under the License.
 
 #include "tensorflow/core/util/padding.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
-Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
+Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
                    Padding* value) {
   string str_value;
   TF_RETURN_IF_ERROR(GetNodeAttr(node_def, attr_name, &str_value));
diff --git a/tensorflow/core/util/padding.h b/tensorflow/core/util/padding.h
index 4f94a8c4e4..76f9b4dd9a 100644
--- a/tensorflow/core/util/padding.h
+++ b/tensorflow/core/util/padding.h
@@ -21,7 +21,6 @@ limitations under the License.
 
 #include <string>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
@@ -46,7 +45,7 @@ enum Padding {
 string GetPaddingAttrString();
 
 // Specialization to parse an attribute directly into a Padding enum.
-Status GetNodeAttr(const NodeDef& node_def, absl::string_view attr_name,
+Status GetNodeAttr(const NodeDef& node_def, StringPiece attr_name,
                    Padding* value);
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/util/reporter_test.cc b/tensorflow/core/util/reporter_test.cc
index 3d026f9238..0972b86ea5 100644
--- a/tensorflow/core/util/reporter_test.cc
+++ b/tensorflow/core/util/reporter_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include "tensorflow/core/util/reporter.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -29,7 +28,7 @@ namespace tensorflow {
 namespace {
 
 // Tests of all the error paths in log_reader.cc follow:
-static void ExpectHasSubstr(absl::string_view s, absl::string_view expected) {
+static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << s << " does not contain " << expected;
 }
diff --git a/tensorflow/core/util/saved_tensor_slice_util.cc b/tensorflow/core/util/saved_tensor_slice_util.cc
index 9b753fca6e..2040eac7e5 100644
--- a/tensorflow/core/util/saved_tensor_slice_util.cc
+++ b/tensorflow/core/util/saved_tensor_slice_util.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/ordered_code.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -45,7 +44,7 @@ string EncodeTensorNameSlice(const string& name, const TensorSlice& slice) {
 
 Status DecodeTensorNameSlice(const string& code, string* name,
                              tensorflow::TensorSlice* slice) {
-  absl::string_view src(code);
+  StringPiece src(code);
   uint64 x;
   if (!tensorflow::strings::OrderedCode::ReadNumIncreasing(&src, &x)) {
     return errors::Internal("Failed to parse the leading number: src = ", src);
diff --git a/tensorflow/core/util/semver_test.cc b/tensorflow/core/util/semver_test.cc
index 667e746444..fdc34fa58b 100644
--- a/tensorflow/core/util/semver_test.cc
+++ b/tensorflow/core/util/semver_test.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/public/version.h"
 
 #include <string>
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -32,14 +32,14 @@ bool IsDotOrIdentifierChar(char c) {
   return false;
 }
 
-bool ConsumeDotSeparatedIdentifiers(absl::string_view* s, const string& prefix,
-                                    absl::string_view* val) {
+bool ConsumeDotSeparatedIdentifiers(StringPiece* s, const string& prefix,
+                                    StringPiece* val) {
   if (!str_util::ConsumePrefix(s, prefix)) return false;
   size_t i;
   for (i = 0; i < s->size() && IsDotOrIdentifierChar((*s)[i]); ++i) {
     // Intentionally empty
   }
-  *val = absl::string_view(s->data(), i);
+  *val = StringPiece(s->data(), i);
   s->remove_prefix(i);
   return i > 0;
 }
@@ -50,8 +50,8 @@ TEST(SemverTest, VersionStringFollowsSemver) {
   // free to refine further (for example, check for leading 0s in numbers), but
   // avoid adding dependencies.
   uint64 major, minor, patch;
-  absl::string_view prerelease, metadata;
-  absl::string_view semver(TF_VERSION_STRING);
+  StringPiece prerelease, metadata;
+  StringPiece semver(TF_VERSION_STRING);
 
   ASSERT_TRUE(str_util::ConsumeLeadingDigits(&semver, &major));
   ASSERT_TRUE(str_util::ConsumePrefix(&semver, "."));
diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD
index dfb6492f05..f40ec9b752 100644
--- a/tensorflow/core/util/tensor_bundle/BUILD
+++ b/tensorflow/core/util/tensor_bundle/BUILD
@@ -45,7 +45,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -59,10 +58,7 @@ cc_library(
     name = "naming",
     srcs = ["naming.cc"],
     hdrs = ["naming.h"],
-    deps = [
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/strings",
-    ],
+    deps = ["//tensorflow/core:lib"],
 )
 
 tf_cc_test(
@@ -82,6 +78,5 @@ tf_cc_test(
         "//tensorflow/core:tensor_testutil",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/util/tensor_bundle/naming.cc b/tensorflow/core/util/tensor_bundle/naming.cc
index fa6ce785b9..db3d7ec3ac 100644
--- a/tensorflow/core/util/tensor_bundle/naming.cc
+++ b/tensorflow/core/util/tensor_bundle/naming.cc
@@ -15,19 +15,17 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_bundle/naming.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-string MetaFilename(absl::string_view prefix) {
+string MetaFilename(StringPiece prefix) {
   return strings::Printf("%.*s.index", static_cast<int>(prefix.size()),
                          prefix.data());
 }
 
-string DataFilename(absl::string_view prefix, int32 shard_id,
-                    int32 num_shards) {
+string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) {
   DCHECK_GT(num_shards, 0);
   DCHECK_LT(shard_id, num_shards);
   return strings::Printf("%.*s.data-%05d-of-%05d",
diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h
index 53c44ec375..7b101971a8 100644
--- a/tensorflow/core/util/tensor_bundle/naming.h
+++ b/tensorflow/core/util/tensor_bundle/naming.h
@@ -34,13 +34,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_
 #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
-string MetaFilename(absl::string_view prefix);
-string DataFilename(absl::string_view prefix, int32 shard_id, int32 num_shards);
+string MetaFilename(StringPiece prefix);
+string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
index 5a14ac5f60..2dcb57a1f9 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb_text.h"
@@ -198,7 +197,7 @@ string* GetStringBackingBuffer(const Tensor& val) {
   return const_cast<string*>(val.flat<string>().data());
 }
 
-Status ParseEntryProto(absl::string_view key, absl::string_view value,
+Status ParseEntryProto(StringPiece key, StringPiece value,
                        protobuf::MessageLite* out) {
   if (!out->ParseFromArray(value.data(), value.size())) {
     return errors::DataLoss("Entry for key ", key, " not parseable.");
@@ -217,7 +216,7 @@ Status WriteTensor(const Tensor& val, FileOutputBuffer* out,
   *bytes_written = val.TotalBytes();
   char* buf = GetBackingBuffer(val);
   VLOG(1) << "Appending " << *bytes_written << " bytes to file";
-  return out->Append(absl::string_view(buf, *bytes_written));
+  return out->Append(StringPiece(buf, *bytes_written));
 }
 
 // Serializes string tensor "val".  "bytes_written" is treated in the same
@@ -261,7 +260,7 @@ Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out,
 
   // Writes the length checksum.
   const uint32 length_checksum = crc32c::Mask(*crc32c);
-  TF_RETURN_IF_ERROR(out->Append(absl::string_view(
+  TF_RETURN_IF_ERROR(out->Append(StringPiece(
       reinterpret_cast<const char*>(&length_checksum), sizeof(uint32))));
   *crc32c = crc32c::Extend(
       *crc32c, reinterpret_cast<const char*>(&length_checksum), sizeof(uint32));
@@ -314,7 +313,7 @@ Status WriteVariantTensor(const Tensor& val, FileOutputBuffer* out,
 
     // Write the checksum.
     const uint32 length_checksum = crc32c::Mask(*crc32c);
-    TF_RETURN_IF_ERROR(out->Append(absl::string_view(
+    TF_RETURN_IF_ERROR(out->Append(StringPiece(
         reinterpret_cast<const char*>(&length_checksum), sizeof(uint32))));
     *crc32c =
         crc32c::Extend(*crc32c, reinterpret_cast<const char*>(&length_checksum),
@@ -386,8 +385,7 @@ Status PadAlignment(FileOutputBuffer* out, int alignment, int64* size) {
 
 }  // namespace
 
-BundleWriter::BundleWriter(Env* env, absl::string_view prefix,
-                           const Options& options)
+BundleWriter::BundleWriter(Env* env, StringPiece prefix, const Options& options)
     : env_(env),
       options_(options),
       prefix_(prefix),
@@ -411,7 +409,7 @@ BundleWriter::BundleWriter(Env* env, absl::string_view prefix,
   VLOG(1) << "Writing to file " << tmp_data_path_;
 }
 
-Status BundleWriter::Add(absl::string_view key, const Tensor& val) {
+Status BundleWriter::Add(StringPiece key, const Tensor& val) {
   if (!status_.ok()) return status_;
   CHECK_NE(key, kHeaderEntryKey);
   const string key_string(key);
@@ -448,7 +446,7 @@ Status BundleWriter::Add(absl::string_view key, const Tensor& val) {
   return status_;
 }
 
-Status BundleWriter::AddSlice(absl::string_view full_tensor_key,
+Status BundleWriter::AddSlice(StringPiece full_tensor_key,
                               const TensorShape& full_tensor_shape,
                               const TensorSlice& slice_spec,
                               const Tensor& slice_tensor) {
@@ -565,7 +563,7 @@ struct MergeState {
 
 // Merges entries of "prefix" into the accumulator state "merge".
 // Returns OK iff the merge succeeds.
-static Status MergeOneBundle(Env* env, absl::string_view prefix,
+static Status MergeOneBundle(Env* env, StringPiece prefix,
                              MergeState* merge_state) {
   VLOG(1) << "Merging bundle:" << prefix;
   const string filename = MetaFilename(prefix);
@@ -665,7 +663,7 @@ static Status MergeOneBundle(Env* env, absl::string_view prefix,
 }
 
 Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
-                    absl::string_view merged_prefix) {
+                    StringPiece merged_prefix) {
   // Merges all metadata tables.
   // TODO(zhifengc): KeyValue sorter if it becomes too big.
   MergeState merge;
@@ -715,7 +713,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
 
 // Interface for reading a tensor bundle.
 
-BundleReader::BundleReader(Env* env, absl::string_view prefix)
+BundleReader::BundleReader(Env* env, StringPiece prefix)
     : env_(env),
       prefix_(prefix),
       metadata_(nullptr),
@@ -774,7 +772,7 @@ BundleReader::~BundleReader() {
   gtl::STLDeleteValues(&tensor_slices_);
 }
 
-Status BundleReader::GetBundleEntryProto(absl::string_view key,
+Status BundleReader::GetBundleEntryProto(StringPiece key,
                                          BundleEntryProto* entry) {
   entry->Clear();
   TF_CHECK_OK(status_);
@@ -843,7 +841,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
     char* backing_buffer = const_cast<char*>((ret->tensor_data().data()));
     size_t unused_bytes_read;
     if (entry.size() > kBufferSize) {
-      absl::string_view sp;
+      StringPiece sp;
       TF_RETURN_IF_ERROR(buffered_file->file()->Read(
           entry.offset(), entry.size(), &sp, backing_buffer));
       if (sp.data() != backing_buffer) {
@@ -878,7 +876,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) {
   return Status::OK();
 }
 
-Status BundleReader::Lookup(absl::string_view key, Tensor* val) {
+Status BundleReader::Lookup(StringPiece key, Tensor* val) {
   CHECK(val != nullptr);
   BundleEntryProto entry;
   TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry));
@@ -910,7 +908,7 @@ Status BundleReader::ReadCurrent(Tensor* val) {
   }
 }
 
-Status BundleReader::LookupTensorSlices(absl::string_view key,
+Status BundleReader::LookupTensorSlices(StringPiece key,
                                         std::vector<TensorSlice>* slices) {
   slices->clear();
   BundleEntryProto entry;
@@ -922,7 +920,7 @@ Status BundleReader::LookupTensorSlices(absl::string_view key,
   return Status::OK();
 }
 
-Status BundleReader::LookupSlice(absl::string_view full_tensor_key,
+Status BundleReader::LookupSlice(StringPiece full_tensor_key,
                                  const TensorSlice& slice_spec, Tensor* val) {
   CHECK(val != nullptr);
   BundleEntryProto entry;
@@ -930,7 +928,7 @@ Status BundleReader::LookupSlice(absl::string_view full_tensor_key,
   return GetSliceValue(full_tensor_key, entry, slice_spec, val);
 }
 
-Status BundleReader::GetSliceValue(absl::string_view full_tensor_key,
+Status BundleReader::GetSliceValue(StringPiece full_tensor_key,
                                    const BundleEntryProto& full_tensor_entry,
                                    const TensorSlice& slice_spec, Tensor* val) {
   using checkpoint::RegisterTensorSlice;
@@ -1044,12 +1042,12 @@ Status BundleReader::GetSliceValue(absl::string_view full_tensor_key,
   return Status::OK();
 }
 
-bool BundleReader::Contains(absl::string_view key) {
+bool BundleReader::Contains(StringPiece key) {
   Seek(key);
   return Valid() && (this->key() == key);
 }
 
-Status BundleReader::LookupDtypeAndShape(absl::string_view key, DataType* dtype,
+Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype,
                                          TensorShape* shape) {
   BundleEntryProto entry;
   TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry));
@@ -1058,8 +1056,7 @@ Status BundleReader::LookupDtypeAndShape(absl::string_view key, DataType* dtype,
   return Status::OK();
 }
 
-Status BundleReader::LookupTensorShape(absl::string_view key,
-                                       TensorShape* shape) {
+Status BundleReader::LookupTensorShape(StringPiece key, TensorShape* shape) {
   DataType ignored;
   return LookupDtypeAndShape(key, &ignored, shape);
 }
@@ -1083,7 +1080,7 @@ string BundleReader::DebugString() {
 
 FileOutputBuffer::~FileOutputBuffer() { delete file_; }
 
-Status FileOutputBuffer::Append(absl::string_view data) {
+Status FileOutputBuffer::Append(StringPiece data) {
   // In the below, it is critical to calculate the checksum on the actually
   // copied bytes, not the source bytes.  This is because "data" typically
   // points to tensor buffers, which may be concurrently written.
@@ -1120,8 +1117,7 @@ Status FileOutputBuffer::Close() {
 
 Status FileOutputBuffer::FlushBuffer() {
   if (position_ > 0) {
-    TF_RETURN_IF_ERROR(
-        file_->Append(absl::string_view(&buffer_[0], position_)));
+    TF_RETURN_IF_ERROR(file_->Append(StringPiece(&buffer_[0], position_)));
     position_ = 0;
   }
   return Status::OK();
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
index f9a628fc33..3a2ffbb495 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
@@ -61,7 +61,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_
 #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/protobuf/tensor_bundle.pb.h"
 
 #include <map>
@@ -114,12 +113,12 @@ class BundleWriter {
     // Must be >= 1. The default size of 1 densely packs tensors.
     int data_alignment{1};
   };
-  BundleWriter(Env* env, absl::string_view prefix,
+  BundleWriter(Env* env, StringPiece prefix,
                const Options& options = Options());
 
   // Adds the tensor "val" under key "key".
   // Across calls "key" must be unique but can be added in any order.
-  Status Add(absl::string_view key, const Tensor& val);
+  Status Add(StringPiece key, const Tensor& val);
 
   // Partitioned variables support.
   // A slice of a full tensor is stored in two entries in the metadata table:
@@ -137,7 +136,7 @@ class BundleWriter {
   // consistent entry for "full_tensor_key" is produced.
   //
   // Returns an error if the same slice is added the second time.
-  Status AddSlice(absl::string_view full_tensor_key,
+  Status AddSlice(StringPiece full_tensor_key,
                   const TensorShape& full_tensor_shape,
                   const TensorSlice& slice_spec, const Tensor& slice_tensor);
 
@@ -174,7 +173,7 @@ class BundleWriter {
 // Once merged, makes a best effort to delete the old metadata files.
 // Returns OK iff all bundles are successfully merged.
 Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
-                    absl::string_view merged_prefix);
+                    StringPiece merged_prefix);
 
 // On construction, silently attempts to read the metadata associated with
 // "prefix".  If caller intends to call any function afterwards, "status()"
@@ -182,7 +181,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice<string> prefixes,
 // All threads accessing the same BundleReader must synchronize.
 class BundleReader {
  public:
-  BundleReader(Env* const env, absl::string_view prefix);
+  BundleReader(Env* const env, StringPiece prefix);
   ~BundleReader();
 
   // Is ok() iff the reader construction is successful (completed the read of
@@ -192,17 +191,17 @@ class BundleReader {
   // Queries whether the bundle contains an entry keyed by "key".  Calls Seek()
   // internally, so this call invalidates the reader's current position.
   // REQUIRES: status().ok()
-  bool Contains(absl::string_view key);
+  bool Contains(StringPiece key);
 
   // Looks up the dtype and the shape of the tensor keyed by "key".
   // REQUIRES: status().ok()
-  Status LookupDtypeAndShape(absl::string_view key, DataType* dtype,
+  Status LookupDtypeAndShape(StringPiece key, DataType* dtype,
                              TensorShape* shape) TF_MUST_USE_RESULT;
 
   // Looks up the shape of the tensor keyed by "key".
   // Clears "shape" if not found.
   // REQUIRES: status().ok()
-  Status LookupTensorShape(absl::string_view key,
+  Status LookupTensorShape(StringPiece key,
                            TensorShape* shape) TF_MUST_USE_RESULT;
 
   // Looks up the tensor keyed by "key".  If "key" refers to a partitioned
@@ -217,7 +216,7 @@ class BundleReader {
   //
   // Validates the stored crc32c checksum against the restored bytes.
   // REQUIRES: status().ok()
-  Status Lookup(absl::string_view key, Tensor* val) TF_MUST_USE_RESULT;
+  Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT;
 
   // Looks up the tensor pointed to by the internal iterator.
   //
@@ -234,21 +233,19 @@ class BundleReader {
   // a slice with a larger start index in some dimension could come before
   // another slice with a smaller start index in the same dimension.
   // REQUIRES: status().ok()
-  Status LookupTensorSlices(absl::string_view key,
-                            std::vector<TensorSlice>* slices)
+  Status LookupTensorSlices(StringPiece key, std::vector<TensorSlice>* slices)
       TF_MUST_USE_RESULT;
 
   // Looks up a specific slice of a partitioned tensor.
   // It is only required that the stored slices cover the requested slice,
   // namely "slice_spec" is a subset of the union of the stored slices.
   // REQUIRES: status().ok()
-  Status LookupSlice(absl::string_view full_tensor_key,
-                     const TensorSlice& slice_spec,
+  Status LookupSlice(StringPiece full_tensor_key, const TensorSlice& slice_spec,
                      Tensor* val) TF_MUST_USE_RESULT;
 
   // Seeks to the first position in the bundle whose key is no less than "key".
   // REQUIRES: status().ok()
-  void Seek(absl::string_view key) { return iter_->Seek(key); }
+  void Seek(StringPiece key) { return iter_->Seek(key); }
   // Moves to the next position in the bundle.
   // REQUIRES: status().ok()
   void Next() const { iter_->Next(); }
@@ -258,10 +255,10 @@ class BundleReader {
 
   // Returns the key at the current position.
   // REQUIRES: status().ok() && Valid()
-  absl::string_view key() const { return iter_->key(); }
+  StringPiece key() const { return iter_->key(); }
   // Returns the raw value at the current position.
   // REQUIRES: status().ok() && Valid()
-  absl::string_view value() const { return iter_->value(); }
+  StringPiece value() const { return iter_->value(); }
 
   string DebugString();
 
@@ -269,7 +266,7 @@ class BundleReader {
   // Seeks for "key" and reads the metadata proto.
   // On non-OK return, clears "entry" for the caller.
   // REQUIRES: status().ok()
-  Status GetBundleEntryProto(absl::string_view key,
+  Status GetBundleEntryProto(StringPiece key,
                              BundleEntryProto* entry) TF_MUST_USE_RESULT;
 
   // Reads the tensor value described by the metadata proto "entry".
@@ -280,7 +277,7 @@ class BundleReader {
   // Reads the slice described by "slice_spec".  The corresponding full tensor
   // has key "ful_tensor_key" and metadata proto "full_tensor_entry".
   // REQUIRES: full_tensor_entry.slices_size() > 0
-  Status GetSliceValue(absl::string_view full_tensor_key,
+  Status GetSliceValue(StringPiece full_tensor_key,
                        const BundleEntryProto& full_tensor_entry,
                        const TensorSlice& slice_spec,
                        Tensor* val) TF_MUST_USE_RESULT;
@@ -321,7 +318,7 @@ class FileOutputBuffer {
   ~FileOutputBuffer();
 
   // Buffered append.
-  Status Append(absl::string_view data);
+  Status Append(StringPiece data);
 
   // Returns the running crc32c checksum of all currently appended bytes.
   uint32 crc32c() { return crc32c_; }
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
index eecf97fde5..9567e4750b 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <random>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/variant.h"
@@ -282,7 +281,7 @@ void TestNonStandardShapes() {
 }
 
 // Writes a bundle to disk with a bad "version"; checks for "expected_error".
-void VersionTest(const VersionDef& version, absl::string_view expected_error) {
+void VersionTest(const VersionDef& version, StringPiece expected_error) {
   const string path = Prefix("version_test");
   {
     // Prepare an empty bundle with the given version information.
@@ -611,7 +610,7 @@ TEST(TensorBundleTest, DirectoryStructure) {
   // Ensures we have the expected files.
   auto CheckDirFiles = [env](const string& bundle_prefix,
                              gtl::ArraySlice<string> expected_files) {
-    absl::string_view dir = io::Dirname(bundle_prefix);
+    StringPiece dir = io::Dirname(bundle_prefix);
     for (const string& expected_file : expected_files) {
       TF_EXPECT_OK(env->FileExists(io::JoinPath(dir, expected_file)));
     }
@@ -755,8 +754,8 @@ TEST(TensorBundleTest, TruncatedTensorContents) {
   string data;
   TF_ASSERT_OK(ReadFileToString(env, datafile, &data));
   ASSERT_TRUE(!data.empty());
-  TF_ASSERT_OK(WriteStringToFile(
-      env, datafile, absl::string_view(data.data(), data.size() - 1)));
+  TF_ASSERT_OK(WriteStringToFile(env, datafile,
+                                 StringPiece(data.data(), data.size() - 1)));
 
   BundleReader reader(env, Prefix("end"));
   TF_ASSERT_OK(reader.status());
diff --git a/tensorflow/core/util/tensor_slice_reader.cc b/tensorflow/core/util/tensor_slice_reader.cc
index f77150cf0d..c6dda2ec29 100644
--- a/tensorflow/core/util/tensor_slice_reader.cc
+++ b/tensorflow/core/util/tensor_slice_reader.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 #include <vector>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/types.pb_text.h"
 #include "tensorflow/core/framework/versions.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -54,7 +53,7 @@ class TensorSliceReaderTable : public TensorSliceReader::Table {
     std::unique_ptr<table::Iterator> iter(table_->NewIterator());
     iter->Seek(key);
     if (iter->Valid() && iter->key() == key) {
-      absl::string_view v = iter->value();
+      StringPiece v = iter->value();
       value->assign(v.data(), v.size());
       return true;
     } else {
diff --git a/tensorflow/core/util/tensor_slice_writer.cc b/tensorflow/core/util/tensor_slice_writer.cc
index d0d6b6ced8..7ebde002e1 100644
--- a/tensorflow/core/util/tensor_slice_writer.cc
+++ b/tensorflow/core/util/tensor_slice_writer.cc
@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/table_builder.h"
@@ -41,7 +40,7 @@ class TableBuilder : public TensorSliceWriter::Builder {
     option.compression = table::kNoCompression;
     builder_.reset(new table::TableBuilder(option, f));
   }
-  void Add(absl::string_view key, absl::string_view val) override {
+  void Add(StringPiece key, StringPiece val) override {
     builder_->Add(key, val);
   }
   Status Finish(int64* file_size) override {
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index de98629654..0db2fb4804 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -21,12 +21,12 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@@ -46,7 +46,7 @@ class TensorSliceWriter {
   class Builder {
    public:
     virtual ~Builder() {}
-    virtual void Add(absl::string_view key, absl::string_view value) = 0;
+    virtual void Add(StringPiece key, StringPiece value) = 0;
     virtual Status Finish(int64* file_size) = 0;
   };
   typedef std::function<Status(const string&, Builder**)> CreateBuilderFunction;
diff --git a/tensorflow/core/util/util.cc b/tensorflow/core/util/util.cc
index 6e1e86ff94..489999d1e8 100644
--- a/tensorflow/core/util/util.cc
+++ b/tensorflow/core/util/util.cc
@@ -15,30 +15,29 @@ limitations under the License.
 
 #include "tensorflow/core/util/util.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 
-absl::string_view NodeNamePrefix(const absl::string_view& op_name) {
-  absl::string_view sp(op_name);
+StringPiece NodeNamePrefix(const StringPiece& op_name) {
+  StringPiece sp(op_name);
   auto p = sp.find('/');
-  if (p == absl::string_view::npos || p == 0) {
+  if (p == StringPiece::npos || p == 0) {
     return "";
   } else {
-    return absl::string_view(sp.data(), p);
+    return StringPiece(sp.data(), p);
   }
 }
 
-absl::string_view NodeNameFullPrefix(const absl::string_view& op_name) {
-  absl::string_view sp(op_name);
+StringPiece NodeNameFullPrefix(const StringPiece& op_name) {
+  StringPiece sp(op_name);
   auto p = sp.rfind('/');
-  if (p == absl::string_view::npos || p == 0) {
+  if (p == StringPiece::npos || p == 0) {
     return "";
   } else {
-    return absl::string_view(sp.data(), p);
+    return StringPiece(sp.data(), p);
   }
 }
 
diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h
index 2e913e17cf..4aa47aa48a 100644
--- a/tensorflow/core/util/util.h
+++ b/tensorflow/core/util/util.h
@@ -16,18 +16,18 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_UTIL_H_
 #define TENSORFLOW_CORE_UTIL_UTIL_H_
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
 
 // If op_name has '/' in it, then return everything before the first '/'.
 // Otherwise return empty string.
-absl::string_view NodeNamePrefix(const absl::string_view& op_name);
+StringPiece NodeNamePrefix(const StringPiece& op_name);
 
 // If op_name has '/' in it, then return everything before the last '/'.
 // Otherwise return empty string.
-absl::string_view NodeNameFullPrefix(const absl::string_view& op_name);
+StringPiece NodeNameFullPrefix(const StringPiece& op_name);
 
 class MovingAverage {
  public:
diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD
index c9421f35ca..c50fd93d03 100644
--- a/tensorflow/examples/label_image/BUILD
+++ b/tensorflow/examples/label_image/BUILD
@@ -50,7 +50,7 @@ tf_cc_binary(
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:tensorflow",
         ],
-    }) + ["@com_google_absl//absl/strings"],
+    }),
 )
 
 py_binary(
diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc
index dbab8c3e5a..ee2927d0a5 100644
--- a/tensorflow/examples/label_image/main.cc
+++ b/tensorflow/examples/label_image/main.cc
@@ -38,7 +38,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
@@ -47,6 +46,7 @@ limitations under the License.
 #include "tensorflow/core/graph/default_device.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -99,7 +99,7 @@ static Status ReadEntireFile(tensorflow::Env* env, const string& filename,
   std::unique_ptr<tensorflow::RandomAccessFile> file;
   TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file));
 
-  absl::string_view data;
+  tensorflow::StringPiece data;
   TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0]));
   if (data.size() != file_size) {
     return tensorflow::errors::DataLoss("Truncated read of '", filename,
diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD
index f1b9005965..9dce78b9a3 100644
--- a/tensorflow/java/BUILD
+++ b/tensorflow/java/BUILD
@@ -121,7 +121,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
         "@com_googlesource_code_re2//:re2",
     ],
 )
diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc
index 2324a36f90..4f5a491d25 100644
--- a/tensorflow/java/src/gen/cc/op_specs.cc
+++ b/tensorflow/java/src/gen/cc/op_specs.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "re2/re2.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/types.h"
@@ -130,7 +129,7 @@ std::pair<Type, Type> TypeResolver::TypesOf(const OpDef_AttrDef& attr_def,
                                             bool* iterable_out) {
   std::pair<Type, Type> types = MakeTypePair(Type::Wildcard());
   *iterable_out = false;
-  absl::string_view attr_type = attr_def.type();
+  StringPiece attr_type = attr_def.type();
   if (str_util::ConsumePrefix(&attr_type, "list(")) {
     attr_type.remove_suffix(1);  // remove closing brace
     *iterable_out = true;
diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc
index 757b7ae552..a71b367691 100644
--- a/tensorflow/java/src/gen/cc/source_writer.cc
+++ b/tensorflow/java/src/gen/cc/source_writer.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <algorithm>
 #include <list>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/java/src/gen/cc/source_writer.h"
 
 namespace tensorflow {
@@ -49,7 +48,7 @@ SourceWriter& SourceWriter::Prefix(const char* line_prefix) {
   return *this;
 }
 
-SourceWriter& SourceWriter::Write(const absl::string_view& str) {
+SourceWriter& SourceWriter::Write(const StringPiece& str) {
   size_t line_pos = 0;
   do {
     size_t start_pos = line_pos;
@@ -72,7 +71,7 @@ SourceWriter& SourceWriter::WriteFromFile(const string& fname, Env* env) {
   return Write(data_);
 }
 
-SourceWriter& SourceWriter::Append(const absl::string_view& str) {
+SourceWriter& SourceWriter::Append(const StringPiece& str) {
   if (!str.empty()) {
     if (newline_) {
       DoAppend(left_margin_ + line_prefix_);
diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h
index ce444cd836..de0113bd5b 100644
--- a/tensorflow/java/src/gen/cc/source_writer.h
+++ b/tensorflow/java/src/gen/cc/source_writer.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <list>
 #include <set>
 
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/java/src/gen/cc/java_defs.h"
 
@@ -61,7 +61,7 @@ class SourceWriter {
   // The data might potentially contain newline characters, therefore it will
   // be scanned to ensure that each line is indented and prefixed properly,
   // making it a bit slower than Append().
-  SourceWriter& Write(const absl::string_view& str);
+  SourceWriter& Write(const StringPiece& str);
 
   // Writes a source code snippet read from a file.
   //
@@ -74,7 +74,7 @@ class SourceWriter {
   //
   // It is expected that no newline character is present in the data provided,
   // otherwise Write() must be used.
-  SourceWriter& Append(const absl::string_view& str);
+  SourceWriter& Append(const StringPiece& str);
 
   // Appends a type to the current line.
   //
@@ -153,7 +153,7 @@ class SourceWriter {
                            const Javadoc* javadoc = nullptr);
 
  protected:
-  virtual void DoAppend(const absl::string_view& str) = 0;
+  virtual void DoAppend(const StringPiece& str) = 0;
 
  private:
   // A utility base class for visiting elements of a type.
@@ -223,7 +223,7 @@ class SourceFileWriter : public SourceWriter {
   virtual ~SourceFileWriter() = default;
 
  protected:
-  void DoAppend(const absl::string_view& str) override {
+  void DoAppend(const StringPiece& str) override {
     TF_CHECK_OK(file_->Append(str));
   }
 
@@ -243,7 +243,7 @@ class SourceBufferWriter : public SourceWriter {
   const string& str() { return *buffer_; }
 
  protected:
-  void DoAppend(const absl::string_view& str) override {
+  void DoAppend(const StringPiece& str) override {
     buffer_->append(str.begin(), str.end());
   }
 
diff --git a/tensorflow/js/BUILD b/tensorflow/js/BUILD
index 6866e4e9fc..ad0dc44f54 100644
--- a/tensorflow/js/BUILD
+++ b/tensorflow/js/BUILD
@@ -48,6 +48,5 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/js/ops/ts_op_gen_test.cc b/tensorflow/js/ops/ts_op_gen_test.cc
index 1c4061e6ee..03241689b5 100644
--- a/tensorflow/js/ops/ts_op_gen_test.cc
+++ b/tensorflow/js/ops/ts_op_gen_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/js/ops/ts_op_gen.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -27,12 +26,12 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
-void ExpectContainsStr(absl::string_view s, absl::string_view expected) {
+void ExpectContainsStr(StringPiece s, StringPiece expected) {
   EXPECT_TRUE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
 
-void ExpectDoesNotContainStr(absl::string_view s, absl::string_view expected) {
+void ExpectDoesNotContainStr(StringPiece s, StringPiece expected) {
   EXPECT_FALSE(str_util::StrContains(s, expected))
       << "'" << s << "' does not contain '" << expected << "'";
 }
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index ae4d67363d..c749017627 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -336,7 +336,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//third_party/python_runtime:headers",
         "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -359,7 +358,6 @@ cc_library(
         "//tensorflow/python/eager:pywrap_tfe_lib",
         "//third_party/py/numpy:headers",
         "//third_party/python_runtime:headers",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -439,7 +437,6 @@ cc_library(
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -457,7 +454,6 @@ tf_cc_shared_object(
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
-        "@com_google_absl//absl/strings",
         "@protobuf_archive//:protobuf_headers",
     ],
 )
@@ -546,7 +542,6 @@ cc_library(
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:proto_text",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = 1,
 )
@@ -562,7 +557,6 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:op_gen_lib",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index cb8ac33f0a..dcbe6d42bd 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <thread>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/python/eager/pywrap_tfe.h"
 
 #include "absl/strings/str_cat.h"
@@ -208,12 +207,12 @@ bool ParseDimensionValue(const string& key, PyObject* py_value,
 }
 
 bool ParseStringValue(const string& key, PyObject* py_value, TF_Status* status,
-                      absl::string_view* value) {
+                      tensorflow::StringPiece* value) {
   if (PyBytes_Check(py_value)) {
     Py_ssize_t size = 0;
     char* buf = nullptr;
     if (PyBytes_AsStringAndSize(py_value, &buf, &size) < 0) return false;
-    *value = absl::string_view(buf, size);
+    *value = tensorflow::StringPiece(buf, size);
     return true;
   }
 #if PY_MAJOR_VERSION >= 3
@@ -287,7 +286,7 @@ bool SetOpAttrList(
     std::unique_ptr<const void*[]> values(new const void*[num_values]);
     std::unique_ptr<size_t[]> lengths(new size_t[num_values]);
     for (int i = 0; i < num_values; ++i) {
-      absl::string_view value;
+      tensorflow::StringPiece value;
       tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));
       if (!ParseStringValue(key, py_value.get(), status, &value)) return false;
       values[i] = value.data();
@@ -490,7 +489,7 @@ bool SetOpAttrScalar(
     tensorflow::gtl::FlatMap<string, tensorflow::int64>* attr_list_sizes,
     TF_Status* status) {
   if (type == TF_ATTR_STRING) {
-    absl::string_view value;
+    tensorflow::StringPiece value;
     if (!ParseStringValue(key, py_value, status, &value)) return false;
     TFE_OpSetAttrString(op, key, value.data(), value.size());
   } else if (type == TF_ATTR_INT) {
@@ -553,7 +552,7 @@ bool SetOpAttrScalar(
     //     (which is what the various "defun" or "Defun" decorators do).
     // And in the future also allow an object that can encapsulate
     // the function name and its attribute values.
-    absl::string_view func_name;
+    tensorflow::StringPiece func_name;
     if (!ParseStringValue(key, py_value, status, &func_name)) {
       PyObject* name_attr = PyObject_GetAttrString(py_value, "name");
       if (name_attr == nullptr ||
@@ -2485,7 +2484,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
   for (int i = kFastPathExecuteInputStartIndex + op_def->input_arg_size();
        i < args_size; i += 2) {
     PyObject* py_attr_name = PyTuple_GET_ITEM(args, i);
-    const absl::string_view attr_name(TFE_GetPythonString(py_attr_name));
+    const tensorflow::StringPiece attr_name(TFE_GetPythonString(py_attr_name));
     PyObject* py_attr_value = PyTuple_GET_ITEM(args, i + 1);
 
     // Not creating an index since most of the time there are not more than a
diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc
index f21900ba67..2022fbcbaa 100644
--- a/tensorflow/python/framework/python_op_gen.cc
+++ b/tensorflow/python/framework/python_op_gen.cc
@@ -17,7 +17,6 @@ limitations under the License.
 #include <stdio.h>
 #include <sstream>
 #include <unordered_map>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
@@ -161,7 +160,7 @@ class GenEagerPythonOp : public python_op_gen_internal::GenPythonOp {
   string FlattenInputs(const std::vector<int>* input_indices,
                        std::vector<string>* output_sizes) const;
 
-  absl::string_view op_name_;
+  StringPiece op_name_;
   typedef std::unordered_map<string, std::vector<int>> AttrToArgMap;
   AttrToArgMap attr_to_args_;
   std::unordered_map<string, string> attr_expressions_;
@@ -474,7 +473,7 @@ bool GenEagerPythonOp::GetEagerFunctionSetup(const string& indentation,
     const auto& param = param_names_[i + op_def_.input_arg_size()];
     const auto& attr = *FindAttr(attr_name, op_def_);
     const string& attr_api_name = param.GetRenameTo();
-    absl::string_view attr_type = attr.type();
+    StringPiece attr_type = attr.type();
     attr_expressions_[attr_name] = attr_api_name;
     const int default_index = i - (attrs_.size() - params_with_default_.size());
     if (default_index >= 0) {
diff --git a/tensorflow/python/framework/python_op_gen_internal.cc b/tensorflow/python/framework/python_op_gen_internal.cc
index 5024cf0625..f6aef5bc50 100644
--- a/tensorflow/python/framework/python_op_gen_internal.cc
+++ b/tensorflow/python/framework/python_op_gen_internal.cc
@@ -20,16 +20,15 @@ limitations under the License.
 #include <iomanip>
 #include <sstream>
 #include <unordered_map>
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/api_def.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_def.pb_text.h"
 #include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_def.pb_text.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/framework/op_gen_lib.h"
-#include "tensorflow/core/framework/tensor.pb_text.h"
 #include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/tensor.pb_text.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
@@ -111,7 +110,7 @@ string AvoidPythonReserved(const string& s) {
 
 // Indent the first line by "initial" spaces and all following lines
 // by "rest" spaces.
-string Indent(int initial, int rest, absl::string_view in) {
+string Indent(int initial, int rest, StringPiece in) {
   // TODO(josh11b): Also word-wrapping?
   string copy(in.data(), in.size());
   str_util::StripTrailingWhitespace(&copy);
@@ -136,7 +135,7 @@ string Indent(int initial, int rest, absl::string_view in) {
 
 // Adds append to *dest, with a space if the first line will be <= width,
 // or a newline otherwise.
-void AppendWithinWidth(string* dest, absl::string_view append, int width) {
+void AppendWithinWidth(string* dest, StringPiece append, int width) {
   auto first_line = append.find('\n');
   if (first_line == string::npos) first_line = append.size();
   if (dest->size() + first_line + 1 /* space */ > static_cast<size_t>(width)) {
@@ -284,7 +283,7 @@ string GetReturns(const OpDef& op_def,
     strings::StrAppend(&result, "    The created Operation.\n");
   } else {
     if (num_outs == 1) {
-      absl::string_view description = op_def.output_arg(0).description();
+      StringPiece description = op_def.output_arg(0).description();
       if (ConsumeEquals(&description)) {  // Skip the generated type info.
         strings::StrAppend(&result, Indent(4, 4, description));
       } else {
@@ -320,7 +319,7 @@ string GetReturns(const OpDef& op_def,
                          str_util::Join(out_names, ", "), ").\n\n");
       for (int i = 0; i < num_outs; ++i) {
         string desc = strings::StrCat(out_names[i], ": ");
-        absl::string_view description = op_def.output_arg(i).description();
+        StringPiece description = op_def.output_arg(i).description();
         if (ConsumeEquals(&description)) {  // Skip the generated type info.
           strings::StrAppend(&desc, description);
         } else {
@@ -482,7 +481,7 @@ static void AddDelimiter(string* append_to, const string& delim) {
   if (!append_to->empty()) strings::StrAppend(append_to, delim);
 }
 
-const ApiDef::Attr* FindAttr(absl::string_view name, const ApiDef& api_def) {
+const ApiDef::Attr* FindAttr(StringPiece name, const ApiDef& api_def) {
   for (int i = 0; i < api_def.attr_size(); ++i) {
     if (api_def.attr(i).name() == name) {
       return &api_def.attr(i);
@@ -660,7 +659,7 @@ void GenPythonOp::AddDocStringInputs() {
   for (int i = 0; i < api_def_.arg_order_size(); ++i) {
     const auto& arg = *FindInputArg(api_def_.arg_order(i), op_def_);
     const auto& api_def_arg = *FindInputArg(api_def_.arg_order(i), api_def_);
-    absl::string_view description = api_def_arg.description();
+    StringPiece description = api_def_arg.description();
     string desc;
     if (ConsumeEquals(&description)) {  // Skip the generated type info.
       desc = strings::StrCat(param_names_[i].GetRenameTo(), ": ");
diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc
index 5a9b009500..e20ad5fd33 100644
--- a/tensorflow/python/framework/python_op_gen_main.cc
+++ b/tensorflow/python/framework/python_op_gen_main.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/python/framework/python_op_gen.h"
 
 #include <memory>
@@ -48,8 +47,8 @@ Status ReadOpListFromFile(const string& filename,
     // The parser assumes that the op name is the first string on each
     // line with no preceding whitespace, and ignores lines that do
     // not start with an op name as a comment.
-    strings::Scanner scanner{absl::string_view(line_contents)};
-    absl::string_view op_name;
+    strings::Scanner scanner{StringPiece(line_contents)};
+    StringPiece op_name;
     if (scanner.One(strings::Scanner::LETTER_DIGIT_DOT)
             .Any(strings::Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE)
             .GetResult(nullptr, &op_name)) {
@@ -90,7 +89,7 @@ Status ParseOpListCommandLine(const char* arg, std::vector<string>* op_list) {
 // Returns an empty string if the current executable's name does not
 // follow a known pattern.
 string InferSourceFileName(const char* argv_zero) {
-  absl::string_view command_str = io::Basename(argv_zero);
+  StringPiece command_str = io::Basename(argv_zero);
 
   // For built-in ops, the Bazel build creates a separate executable
   // with the name gen_<op type>_ops_py_wrappers_cc containing the
diff --git a/tensorflow/python/framework/test_file_system.cc b/tensorflow/python/framework/test_file_system.cc
index 13d05c6fd0..6e9915adbb 100644
--- a/tensorflow/python/framework/test_file_system.cc
+++ b/tensorflow/python/framework/test_file_system.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/null_file_system.h"
 
@@ -21,7 +20,7 @@ namespace tensorflow {
 
 class TestRandomAccessFile : public RandomAccessFile {
   // The file contents is 10 bytes of all A's
-  Status Read(uint64 offset, size_t n, absl::string_view* result,
+  Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
     Status s;
     for (int i = 0; i < n; ++i) {
@@ -32,7 +31,7 @@ class TestRandomAccessFile : public RandomAccessFile {
       }
       scratch[i] = 'A';
     }
-    *result = absl::string_view(scratch, n);
+    *result = StringPiece(scratch, n);
     return s;
   }
 };
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 4d920750cd..6189503d8f 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <Python.h>
 
 #include "numpy/arrayobject.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/c/tf_status_helper.h"
@@ -402,7 +401,7 @@ Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
               std::max(1, EIGEN_MAX_ALIGN_BYTES) !=
           0) {
         Tensor t(dtype, shape);
-        absl::string_view p = t.tensor_data();
+        StringPiece p = t.tensor_data();
         memcpy(const_cast<char*>(p.data()), PyArray_DATA(input), p.size());
         *ret = t;
       } else {
@@ -465,7 +464,7 @@ Status ConvertTensorToNdarray(const Tensor& t, PyObject** ret) {
     }
   } else {
     CHECK(DataTypeCanUseMemcpy(t.dtype()));
-    absl::string_view p = t.tensor_data();
+    StringPiece p = t.tensor_data();
     memcpy(PyArray_DATA(np_array), p.data(), p.size());
   }
   *ret = PyArray_Return(np_array);
diff --git a/tensorflow/python/lib/core/strings.i b/tensorflow/python/lib/core/strings.i
index 8b894ab387..9d807e51be 100644
--- a/tensorflow/python/lib/core/strings.i
+++ b/tensorflow/python/lib/core/strings.i
@@ -34,13 +34,13 @@ limitations under the License.
 //       as it comes up.
 
 %{
-#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 
 // Handles str in Python 2, bytes in Python 3.
 // Returns true on success, false on failure.
-bool _BytesToStringPiece(PyObject* obj, absl::string_view* result) {
+bool _BytesToStringPiece(PyObject* obj, tensorflow::StringPiece* result) {
   if (obj == Py_None) {
-    *result = absl::string_view();
+    *result = tensorflow::StringPiece();
   } else {
     char* ptr;
     Py_ssize_t len;
@@ -48,30 +48,30 @@ bool _BytesToStringPiece(PyObject* obj, absl::string_view* result) {
       // Python has raised an error (likely TypeError or UnicodeEncodeError).
       return false;
     }
-    *result = absl::string_view(ptr, len);
+    *result = tensorflow::StringPiece(ptr, len);
   }
   return true;
 }
 %}
 
-%typemap(typecheck) absl::string_view = char *;
-%typemap(typecheck) const absl::string_view & = char *;
+%typemap(typecheck) tensorflow::StringPiece = char *;
+%typemap(typecheck) const tensorflow::StringPiece & = char *;
 
-// "absl::string_view" arguments must be specified as a 'str' or 'bytes' object.
-%typemap(in) absl::string_view {
+// "tensorflow::StringPiece" arguments must be specified as a 'str' or 'bytes' object.
+%typemap(in) tensorflow::StringPiece {
   if (!_BytesToStringPiece($input, &$1)) SWIG_fail;
 }
 
-// "const absl::string_view&" arguments can be provided the same as
-// "absl::string_view", whose typemap is defined above.
-%typemap(in) const absl::string_view & (absl::string_view temp) {
+// "const tensorflow::StringPiece&" arguments can be provided the same as
+// "tensorflow::StringPiece", whose typemap is defined above.
+%typemap(in) const tensorflow::StringPiece & (tensorflow::StringPiece temp) {
   if (!_BytesToStringPiece($input, &temp)) SWIG_fail;
   $1 = &temp;
 }
 
-// C++ functions returning absl::string_view will simply return bytes in
+// C++ functions returning tensorflow::StringPiece will simply return bytes in
 // Python, or None if the StringPiece contained a NULL pointer.
-%typemap(out) absl::string_view {
+%typemap(out) tensorflow::StringPiece {
   if ($1.data()) {
     $result = PyBytes_FromStringAndSize($1.data(), $1.size());
   } else {
diff --git a/tensorflow/python/lib/io/py_record_writer.cc b/tensorflow/python/lib/io/py_record_writer.cc
index ee4eac27f1..faf20df868 100644
--- a/tensorflow/python/lib/io/py_record_writer.cc
+++ b/tensorflow/python/lib/io/py_record_writer.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/python/lib/io/py_record_writer.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/platform/env.h"
@@ -48,7 +48,7 @@ PyRecordWriter::~PyRecordWriter() {
   file_.reset();
 }
 
-void PyRecordWriter::WriteRecord(absl::string_view record,
+void PyRecordWriter::WriteRecord(tensorflow::StringPiece record,
                                  TF_Status* out_status) {
   if (writer_ == nullptr) {
     TF_SetStatus(out_status, TF_FAILED_PRECONDITION,
diff --git a/tensorflow/python/lib/io/py_record_writer.h b/tensorflow/python/lib/io/py_record_writer.h
index 0aa7b75334..9b0792c6db 100644
--- a/tensorflow/python/lib/io/py_record_writer.h
+++ b/tensorflow/python/lib/io/py_record_writer.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <memory>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/c/c_api.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/io/record_writer.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -42,7 +42,7 @@ class PyRecordWriter {
                              TF_Status* out_status);
   ~PyRecordWriter();
 
-  void WriteRecord(absl::string_view record, TF_Status* out_status);
+  void WriteRecord(tensorflow::StringPiece record, TF_Status* out_status);
   void Flush(TF_Status* out_status);
   void Close(TF_Status* out_status);
 
diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc
index 0889950e16..11eb9ce947 100644
--- a/tensorflow/python/util/util.cc
+++ b/tensorflow/python/util/util.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 
 #include "absl/memory/memory.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
@@ -113,7 +112,7 @@ PyObject* MappingKeys(PyObject* o) {
 // Note that '__class__' attribute is set only in new-style classes.
 // A lot of tensorflow code uses __class__ without checks, so it seems like
 // we only support new-style classes.
-absl::string_view GetClassName(PyObject* o) {
+StringPiece GetClassName(PyObject* o) {
   // __class__ is equivalent to type() for new style classes.
   // type() is equivalent to PyObject_Type()
   // (https://docs.python.org/3.5/c-api/object.html#c.PyObject_Type)
@@ -123,9 +122,9 @@ absl::string_view GetClassName(PyObject* o) {
 
   // __name__ is the value of `tp_name` after the last '.'
   // (https://docs.python.org/2/c-api/typeobj.html#c.PyTypeObject.tp_name)
-  absl::string_view name(type->tp_name);
+  StringPiece name(type->tp_name);
   size_t pos = name.rfind('.');
-  if (pos != absl::string_view::npos) {
+  if (pos != StringPiece::npos) {
     name.remove_prefix(pos + 1);
   }
   return name;
diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD
index 4d33cdf0cf..1ad1895269 100644
--- a/tensorflow/tools/graph_transforms/BUILD
+++ b/tensorflow/tools/graph_transforms/BUILD
@@ -32,7 +32,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -121,7 +120,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":transform_utils",
-        "@com_google_absl//absl/strings",
         "//tensorflow/c:checkpoint_reader",
         "//tensorflow/core/util/tensor_bundle",
         "//tensorflow/core:core_cpu",
@@ -183,7 +181,6 @@ tf_cc_test(
         "//tensorflow/core/kernels:quantization_utils",
         "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/util/tensor_bundle",
-        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -201,7 +198,6 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
-        "@com_google_absl//absl/strings",
     ],
 )
 
diff --git a/tensorflow/tools/graph_transforms/fold_constants_lib.cc b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
index 5bdc529a15..6df2718e61 100644
--- a/tensorflow/tools/graph_transforms/fold_constants_lib.cc
+++ b/tensorflow/tools/graph_transforms/fold_constants_lib.cc
@@ -24,12 +24,12 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/common_runtime/constant_folding.h"
 #include "tensorflow/core/common_runtime/shape_refiner.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/subgraph.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/public/session.h"
@@ -39,10 +39,9 @@ limitations under the License.
 namespace tensorflow {
 namespace graph_transforms {
 namespace {
-using StringPieceSet = std::unordered_set<absl::string_view, StringPieceHasher>;
+using StringPieceSet = std::unordered_set<StringPiece, StringPieceHasher>;
 template <typename T>
-using StringPieceMap =
-    std::unordered_map<absl::string_view, T, StringPieceHasher>;
+using StringPieceMap = std::unordered_map<StringPiece, T, StringPieceHasher>;
 }  // namespace
 
 Status ReplaceSendRecvs(const GraphDef& original_graph_def,
@@ -110,7 +109,7 @@ Status ReplaceSendRecvs(const GraphDef& original_graph_def,
 
   // Some input nodes are removed in rewrite_graph_def. Add those nodes to
   // output_graph_def.
-  for (absl::string_view name : input_nodes) {
+  for (StringPiece name : input_nodes) {
     const NodeDef& removed_node = *CHECK_NOTNULL(original_map[name]);
     output_graph_def->add_node()->MergeFrom(removed_node);
   }
@@ -164,7 +163,7 @@ Status RemoveUnusedNodes(const GraphDef& input_graph_def,
   }
   while (!current_nodes.empty()) {
     StringPieceSet next_nodes;
-    for (absl::string_view node_name : current_nodes) {
+    for (StringPiece node_name : current_nodes) {
       if (node_map.count(node_name) == 0) {
         LOG(ERROR) << "Bad graph structure, no node named '" << node_name
                    << "' found for input lookup";
diff --git a/tensorflow/tools/graph_transforms/fold_constants_test.cc b/tensorflow/tools/graph_transforms/fold_constants_test.cc
index 262314c079..dcdc3c2906 100644
--- a/tensorflow/tools/graph_transforms/fold_constants_test.cc
+++ b/tensorflow/tools/graph_transforms/fold_constants_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include <utility>
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/nn_ops.h"
@@ -209,7 +208,7 @@ class ConstantFoldingTest : public ::testing::Test {
     }
 
     for (const NodeDef& node : graph_def.node()) {
-      const absl::string_view name(node.name());
+      const StringPiece name(node.name());
       const int occurrence_count = folded_node_map.count(node.name());
       if (str_util::EndsWith(name, "expect_removed")) {
         EXPECT_EQ(0, occurrence_count) << "node.name()=" << node.name();
diff --git a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
index 1d586e2cba..d97496cbeb 100644
--- a/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
+++ b/tensorflow/tools/graph_transforms/freeze_requantization_ranges.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
@@ -89,7 +88,7 @@ Status ExtractMinMaxRecords(const string& log_file_name,
     if (!strings::safe_strtof(max_number_string.c_str(), &max)) {
       continue;
     }
-    absl::string_view name_string = line_parts[min_max_index - 1];
+    StringPiece name_string = line_parts[min_max_index - 1];
     if (!str_util::EndsWith(name_string, print_suffix)) {
       continue;
     }
diff --git a/tensorflow/tools/graph_transforms/sparsify_gather_test.cc b/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
index 5d3da9c59d..b8d6ba00de 100644
--- a/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
+++ b/tensorflow/tools/graph_transforms/sparsify_gather_test.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "absl/strings/string_view.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/sendrecv_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
@@ -39,7 +38,7 @@ Status ReadTensorFromCheckpoint(
 
 class SparsifyGatherTest : public ::testing::Test {
  protected:
-  NodeDef* CreateNode(const absl::string_view name, const absl::string_view op,
+  NodeDef* CreateNode(const StringPiece name, const StringPiece op,
                       const std::vector<NodeDef*>& inputs, GraphDef* graph_def,
                       bool control_dep = false) {
     NodeDef* node_def = graph_def->add_node();
@@ -57,7 +56,7 @@ class SparsifyGatherTest : public ::testing::Test {
     return node_def;
   }
 
-  void MakeGather(absl::string_view name, bool gather_v2, NodeDef* params,
+  void MakeGather(StringPiece name, bool gather_v2, NodeDef* params,
                   NodeDef* indices, GraphDef* graph_def) {
     if (gather_v2) {
       NodeDef* axis_node =
diff --git a/tensorflow/tools/graph_transforms/transform_graph.cc b/tensorflow/tools/graph_transforms/transform_graph.cc
index 9a2b317850..7efe450710 100644
--- a/tensorflow/tools/graph_transforms/transform_graph.cc
+++ b/tensorflow/tools/graph_transforms/transform_graph.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/tools/graph_transforms/transform_graph.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -42,11 +41,11 @@ Status ParseTransformParameters(const string& transforms_string,
     TRANSFORM_PARAM_NAME,
     TRANSFORM_PARAM_VALUE,
   } state = TRANSFORM_NAME;
-  absl::string_view remaining(transforms_string);
-  absl::string_view match;
-  absl::string_view transform_name;
-  absl::string_view parameter_name;
-  absl::string_view parameter_value;
+  StringPiece remaining(transforms_string);
+  StringPiece match;
+  StringPiece transform_name;
+  StringPiece parameter_name;
+  StringPiece parameter_value;
   TransformFuncParameters func_parameters;
   while (!remaining.empty()) {
     if (state == TRANSFORM_NAME) {
diff --git a/tensorflow/tools/graph_transforms/transform_utils.cc b/tensorflow/tools/graph_transforms/transform_utils.cc
index 3097adcb0b..c715380aae 100644
--- a/tensorflow/tools/graph_transforms/transform_utils.cc
+++ b/tensorflow/tools/graph_transforms/transform_utils.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
 #include "tensorflow/tools/graph_transforms/transform_utils.h"
 
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/lib/hash/hash.h"
@@ -88,7 +87,7 @@ void NodeNamePartsFromInput(const string& input_name, string* prefix,
   } else {
     *suffix = ":" + input_parts[1];
   }
-  absl::string_view node_name_piece(input_parts[0]);
+  StringPiece node_name_piece(input_parts[0]);
   if (str_util::ConsumePrefix(&node_name_piece, "^")) {
     *prefix = "^";
   } else {
@@ -641,7 +640,7 @@ Status TransformFuncContext::GetOneInt32Parameter(const string& name,
   }
   string string_value;
   TF_RETURN_IF_ERROR(GetOneStringParameter(name, "", &string_value));
-  if (!strings::safe_strto32(absl::string_view(string_value), result)) {
+  if (!strings::safe_strto32(StringPiece(string_value), result)) {
     return errors::InvalidArgument("Couldn't interpret the ", name,
                                    " argument as a number:", string_value);
   }
@@ -658,7 +657,7 @@ Status TransformFuncContext::GetOneInt64Parameter(const string& name,
   }
   string string_value;
   TF_RETURN_IF_ERROR(GetOneStringParameter(name, "", &string_value));
-  if (!strings::safe_strto64(absl::string_view(string_value), result)) {
+  if (!strings::safe_strto64(StringPiece(string_value), result)) {
     return errors::InvalidArgument("Couldn't interpret the ", name,
                                    " argument as a number:", string_value);
   }
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index da50cae484..15d7c70281 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -447,7 +447,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
   Print("scanner->RestartCapture()");
   Print("    .Many(Scanner::LETTER_DIGIT_UNDERSCORE)");
   Print("    .StopCapture();");
-  Print("absl::string_view identifier;");
+  Print("StringPiece identifier;");
   Print("if (!scanner->GetResult(nullptr, &identifier)) return false;");
   Print("bool parsed_colon = false;");
   Print("(void)parsed_colon;"); // Avoid "set but not used" compiler warning
@@ -528,7 +528,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
       Print("SetProtobufStringSwapAllowed(&str_value, ", mutable_value_expr,
             ");");
     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
-      Print("absl::string_view value;");
+      Print("StringPiece value;");
       Print(
           "if (!parsed_colon || "
           "!scanner->RestartCapture().Many("
-- 
GitLab


From 1750163291664f38d6ff15af146d77c5388e6d54 Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 11:58:31 -0700
Subject: [PATCH 0979/1085] Added new issue templates

---
 .../bug-performance-issue-template.md         | 36 +++++++++++++++++++
 .../ISSUE_TEMPLATE/bug-performance-issue.md   | 34 ++++++++++++++++++
 .../build-installation-issue-template.md      | 29 +++++++++++++++
 .../build-installation-issue.md               | 29 +++++++++++++++
 .github/ISSUE_TEMPLATE/custom.md              | 36 +++++++++++++++++++
 .../documentation-issue-template.md           | 17 +++++++++
 .github/ISSUE_TEMPLATE/documentation-issue.md | 17 +++++++++
 .../feature-request-template.md               | 22 ++++++++++++
 .github/ISSUE_TEMPLATE/feature-request.md     | 22 ++++++++++++
 .github/ISSUE_TEMPLATE/other-issues.md        | 13 +++++++
 10 files changed, 255 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug-performance-issue-template.md
 create mode 100644 .github/ISSUE_TEMPLATE/bug-performance-issue.md
 create mode 100644 .github/ISSUE_TEMPLATE/build-installation-issue-template.md
 create mode 100644 .github/ISSUE_TEMPLATE/build-installation-issue.md
 create mode 100644 .github/ISSUE_TEMPLATE/custom.md
 create mode 100644 .github/ISSUE_TEMPLATE/documentation-issue-template.md
 create mode 100644 .github/ISSUE_TEMPLATE/documentation-issue.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request-template.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md
 create mode 100644 .github/ISSUE_TEMPLATE/other-issues.md

diff --git a/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md b/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md
new file mode 100644
index 0000000000..890032dba5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md
@@ -0,0 +1,36 @@
+---
+name: Bug/Performance Issue Template
+about: Use this template for reporting a bug or a performance issue.
+
+---
+
+Please make sure that this is a bug. As per our [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
+
+Please fill the following Bug_Template:
+### System information
+- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
+- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
+- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
+- TensorFlow installed from (source or binary):
+- TensorFlow version (use command below):
+- Python version:
+- Bazel version (if compiling from source):
+- GCC/Compiler version (if compiling from source):
+- CUDA/cuDNN version:
+- GPU model and memory:
+
+
+You can collect some of this information using our environment capture [script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh)
+You can also obtain the TensorFlow version with
+python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
+
+### Describe the current behavior
+
+### Describe the expected behavior
+
+### Code to reproduce the issue
+Provide a reproducible test case that is the bare minimum necessary to generate the problem.
+
+
+### Other info / logs
+Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
diff --git a/.github/ISSUE_TEMPLATE/bug-performance-issue.md b/.github/ISSUE_TEMPLATE/bug-performance-issue.md
new file mode 100644
index 0000000000..5d6ca6da55
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-performance-issue.md
@@ -0,0 +1,34 @@
+---
+name: Bug/Performance Issue
+about: Use this template for reporting a bug or a performance issue.
+
+---
+
+<em>Please make sure that this is a bug. As per our [GitHub Policy](https://github.com/tensorflow/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:bug_template</em>
+
+**System information**
+- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
+- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
+- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
+- TensorFlow installed from (source or binary):
+- TensorFlow version (use command below):
+- Python version:
+- Bazel version (if compiling from source):
+- GCC/Compiler version (if compiling from source):
+- CUDA/cuDNN version:
+- GPU model and memory:
+
+
+You can collect some of this information using our environment capture [script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh)
+You can also obtain the TensorFlow version with
+python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
+
+**Describe the current behavior**
+
+**Describe the expected behavior**
+
+**Code to reproduce the issue**
+Provide a reproducible test case that is the bare minimum necessary to generate the problem.
+
+**Other info / logs**
+Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
diff --git a/.github/ISSUE_TEMPLATE/build-installation-issue-template.md b/.github/ISSUE_TEMPLATE/build-installation-issue-template.md
new file mode 100644
index 0000000000..61ac00c861
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/build-installation-issue-template.md
@@ -0,0 +1,29 @@
+---
+name: Build/Installation Issue Template
+about: Use this template for build/installation issues
+
+---
+
+<em>Please make sure that this is a build/installation issue. As per our   [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md) we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:build_template</em>
+
+**System information**
+- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
+- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
+- TensorFlow installed from (source or binary):
+- TensorFlow version:
+- Python version:
+- Installed using virtualenv? pip? conda?:
+- Bazel version (if compiling from source):
+- GCC/Compiler version (if compiling from source):
+- CUDA/cuDNN version:
+- GPU model and memory:
+
+
+
+**Describe the problem**
+
+**Provide the exact sequence of commands / steps that you executed before running into the problem**
+
+
+**Any other info / logs**
+Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
diff --git a/.github/ISSUE_TEMPLATE/build-installation-issue.md b/.github/ISSUE_TEMPLATE/build-installation-issue.md
new file mode 100644
index 0000000000..53e77e32d3
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/build-installation-issue.md
@@ -0,0 +1,29 @@
+---
+name: Build/Installation Issue
+about: Use this template for build/installation issues
+
+---
+
+<em>Please make sure that this is a build/installation issue. As per our   [GitHub Policy](https://github.com/tensorflow/tensorflow/blob/master/ISSUES.md) we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:build_template</em>
+
+**System information**
+- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
+- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
+- TensorFlow installed from (source or binary):
+- TensorFlow version:
+- Python version:
+- Installed using virtualenv? pip? conda?:
+- Bazel version (if compiling from source):
+- GCC/Compiler version (if compiling from source):
+- CUDA/cuDNN version:
+- GPU model and memory:
+
+
+
+**Describe the problem**
+
+**Provide the exact sequence of commands / steps that you executed before running into the problem**
+
+
+**Any other info / logs**
+Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md
new file mode 100644
index 0000000000..cfbf5d5117
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/custom.md
@@ -0,0 +1,36 @@
+---
+name: Custom issue template
+about: Describe this issue template's purpose here.
+
+---
+
+Please make sure that this is a bug. As per our [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
+
+Please fill the following Bug Template:
+### System information
+- **Have I written custom code (as opposed to using a stock example script provided in TensorFlow)**:
+- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
+- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
+- **TensorFlow installed from (source or binary)**:
+- **TensorFlow version (use command below)**:
+- **Python version**:
+- **Bazel version (if compiling from source)**:
+- **GCC/Compiler version (if compiling from source)**:
+- **CUDA/cuDNN version**:
+- **GPU model and memory**:
+
+
+You can collect some of this information using our environment capture [script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh)
+You can also obtain the TensorFlow version with
+python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
+
+### Describe the current behavior
+
+### Describe the expected behavior
+
+### Code to reproduce the issue
+Provide a reproducible test case that is the bare minimum necessary to generate the problem.
+
+
+### Other info / logs
+Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
diff --git a/.github/ISSUE_TEMPLATE/documentation-issue-template.md b/.github/ISSUE_TEMPLATE/documentation-issue-template.md
new file mode 100644
index 0000000000..7b31d0b3fe
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation-issue-template.md
@@ -0,0 +1,17 @@
+---
+name: Documentation Issue Template
+about: Use this template for documentation related issues
+
+---
+
+<em>Please make sure that this is a documentation issue. As per our  [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:doc_template</em>
+
+
+**System information**
+- TensorFlow version:
+- Doc Link:
+
+
+**Describe the documentation issue**
+
+**We welcome contributions by users. Will you be able to update submit a PR to fix the doc Issue?**
diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.md b/.github/ISSUE_TEMPLATE/documentation-issue.md
new file mode 100644
index 0000000000..8a5fbde645
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation-issue.md
@@ -0,0 +1,17 @@
+---
+name: Documentation Issue
+about: Use this template for documentation related issues
+
+---
+
+<em>Please make sure that this is a documentation issue. As per our  [GitHub Policy](https://github.com/tensorflow/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:doc_template</em>
+
+
+**System information**
+- TensorFlow version:
+- Doc Link:
+
+
+**Describe the documentation issue**
+
+**We welcome contributions by users. Will you be able to update submit a PR to fix the doc Issue?**
diff --git a/.github/ISSUE_TEMPLATE/feature-request-template.md b/.github/ISSUE_TEMPLATE/feature-request-template.md
new file mode 100644
index 0000000000..cdcdc3624d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request-template.md
@@ -0,0 +1,22 @@
+---
+name: Feature Request Template
+about: Use this template for raising a feature request
+
+---
+
+<em>Please make sure that this is a feature request.   As per our  [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:feature_template</em>
+
+
+**System information**
+- TensorFlow version (you are using):
+- Are you willing to contribute it (Yes/No):
+
+
+
+**Describe the feature and the current behavior/state.**
+
+**Will this change the current api? How?**
+
+**Who will benefit with this feature?**
+
+**Any Other info.**
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 0000000000..dbf094daee
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,22 @@
+---
+name: Feature Request
+about: Use this template for raising a feature request
+
+---
+
+<em>Please make sure that this is a feature request.   As per our  [GitHub Policy](https://github.com/tensorflow/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:feature_template</em>
+
+
+**System information**
+- TensorFlow version (you are using):
+- Are you willing to contribute it (Yes/No):
+
+
+
+**Describe the feature and the current behavior/state.**
+
+**Will this change the current api? How?**
+
+**Who will benefit with this feature?**
+
+**Any Other info.**
diff --git a/.github/ISSUE_TEMPLATE/other-issues.md b/.github/ISSUE_TEMPLATE/other-issues.md
new file mode 100644
index 0000000000..7cceaf24be
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/other-issues.md
@@ -0,0 +1,13 @@
+---
+name: Other Issues
+about: Use this template for any other non-support related issues
+
+---
+
+This template is for miscellaneous issues not covered by the other issue categories.  
+
+For questions on how work with TensorFlow, or support for problems that are not verified bugs in TensorFlow, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/tensorflow). 
+
+If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).
+
+For high-level discussions about TensorFlow, please post to discuss@tensorflow.org, for questions about the development or internal workings of TensorFlow, or if you would like to know how to contribute to TensorFlow, please post to developers@tensorflow.org.
-- 
GitLab


From 3f8506e6057f189d977697f8d0a152d6afbee7c7 Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 11:59:22 -0700
Subject: [PATCH 0980/1085] Delete custom.md

---
 .github/ISSUE_TEMPLATE/custom.md | 36 --------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/custom.md

diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md
deleted file mode 100644
index cfbf5d5117..0000000000
--- a/.github/ISSUE_TEMPLATE/custom.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-name: Custom issue template
-about: Describe this issue template's purpose here.
-
----
-
-Please make sure that this is a bug. As per our [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
-
-Please fill the following Bug Template:
-### System information
-- **Have I written custom code (as opposed to using a stock example script provided in TensorFlow)**:
-- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
-- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
-- **TensorFlow installed from (source or binary)**:
-- **TensorFlow version (use command below)**:
-- **Python version**:
-- **Bazel version (if compiling from source)**:
-- **GCC/Compiler version (if compiling from source)**:
-- **CUDA/cuDNN version**:
-- **GPU model and memory**:
-
-
-You can collect some of this information using our environment capture [script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh)
-You can also obtain the TensorFlow version with
-python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
-
-### Describe the current behavior
-
-### Describe the expected behavior
-
-### Code to reproduce the issue
-Provide a reproducible test case that is the bare minimum necessary to generate the problem.
-
-
-### Other info / logs
-Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
-- 
GitLab


From 094a06b16300e701188ebe6ba37a4c8d5dfc384b Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 12:00:09 -0700
Subject: [PATCH 0981/1085] Delete bug-performance-issue-template.md

---
 .../bug-performance-issue-template.md         | 36 -------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/bug-performance-issue-template.md

diff --git a/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md b/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md
deleted file mode 100644
index 890032dba5..0000000000
--- a/.github/ISSUE_TEMPLATE/bug-performance-issue-template.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-name: Bug/Performance Issue Template
-about: Use this template for reporting a bug or a performance issue.
-
----
-
-Please make sure that this is a bug. As per our [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub.
-
-Please fill the following Bug_Template:
-### System information
-- Have I written custom code (as opposed to using a stock example script provided in TensorFlow):
-- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
-- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
-- TensorFlow installed from (source or binary):
-- TensorFlow version (use command below):
-- Python version:
-- Bazel version (if compiling from source):
-- GCC/Compiler version (if compiling from source):
-- CUDA/cuDNN version:
-- GPU model and memory:
-
-
-You can collect some of this information using our environment capture [script](https://github.com/tensorflow/tensorflow/tree/master/tools/tf_env_collect.sh)
-You can also obtain the TensorFlow version with
-python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
-
-### Describe the current behavior
-
-### Describe the expected behavior
-
-### Code to reproduce the issue
-Provide a reproducible test case that is the bare minimum necessary to generate the problem.
-
-
-### Other info / logs
-Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
-- 
GitLab


From 0e40a0a03c10ef5e6fed9b1bcd2c710932b06c6a Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 12:00:17 -0700
Subject: [PATCH 0982/1085] Delete build-installation-issue-template.md

---
 .../build-installation-issue-template.md      | 29 -------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/build-installation-issue-template.md

diff --git a/.github/ISSUE_TEMPLATE/build-installation-issue-template.md b/.github/ISSUE_TEMPLATE/build-installation-issue-template.md
deleted file mode 100644
index 61ac00c861..0000000000
--- a/.github/ISSUE_TEMPLATE/build-installation-issue-template.md
+++ /dev/null
@@ -1,29 +0,0 @@
----
-name: Build/Installation Issue Template
-about: Use this template for build/installation issues
-
----
-
-<em>Please make sure that this is a build/installation issue. As per our   [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md) we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:build_template</em>
-
-**System information**
-- OS Platform and Distribution (e.g., Linux Ubuntu 16.04):
-- Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device:
-- TensorFlow installed from (source or binary):
-- TensorFlow version:
-- Python version:
-- Installed using virtualenv? pip? conda?:
-- Bazel version (if compiling from source):
-- GCC/Compiler version (if compiling from source):
-- CUDA/cuDNN version:
-- GPU model and memory:
-
-
-
-**Describe the problem**
-
-**Provide the exact sequence of commands / steps that you executed before running into the problem**
-
-
-**Any other info / logs**
-Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached.
-- 
GitLab


From 7b7c63e05661b31b7dea1a66b2dbce3fa1262a17 Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 12:00:34 -0700
Subject: [PATCH 0983/1085] Delete documentation-issue-template.md

---
 .../documentation-issue-template.md             | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/documentation-issue-template.md

diff --git a/.github/ISSUE_TEMPLATE/documentation-issue-template.md b/.github/ISSUE_TEMPLATE/documentation-issue-template.md
deleted file mode 100644
index 7b31d0b3fe..0000000000
--- a/.github/ISSUE_TEMPLATE/documentation-issue-template.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-name: Documentation Issue Template
-about: Use this template for documentation related issues
-
----
-
-<em>Please make sure that this is a documentation issue. As per our  [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:doc_template</em>
-
-
-**System information**
-- TensorFlow version:
-- Doc Link:
-
-
-**Describe the documentation issue**
-
-**We welcome contributions by users. Will you be able to update submit a PR to fix the doc Issue?**
-- 
GitLab


From 05e74beb901a9e4eea16f283bee74ae87f5651be Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 12:00:40 -0700
Subject: [PATCH 0984/1085] Delete feature-request-template.md

---
 .../feature-request-template.md               | 22 -------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/feature-request-template.md

diff --git a/.github/ISSUE_TEMPLATE/feature-request-template.md b/.github/ISSUE_TEMPLATE/feature-request-template.md
deleted file mode 100644
index cdcdc3624d..0000000000
--- a/.github/ISSUE_TEMPLATE/feature-request-template.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-name: Feature Request Template
-about: Use this template for raising a feature request
-
----
-
-<em>Please make sure that this is a feature request.   As per our  [GitHub Policy](https://github.com/dksb/tensorflow/blob/master/ISSUES.md)  we only address code/doc bugs, performance issues, feature requests and build/installation issues on GitHub. tag:feature_template</em>
-
-
-**System information**
-- TensorFlow version (you are using):
-- Are you willing to contribute it (Yes/No):
-
-
-
-**Describe the feature and the current behavior/state.**
-
-**Will this change the current api? How?**
-
-**Who will benefit with this feature?**
-
-**Any Other info.**
-- 
GitLab


From cabca3a5e2ba6b54fb9bfab0de66de9e0eec935d Mon Sep 17 00:00:00 2001
From: Deepak B <dbhadauria@google.com>
Date: Mon, 15 Oct 2018 12:08:08 -0700
Subject: [PATCH 0985/1085] Update other-issues.md

---
 .github/ISSUE_TEMPLATE/other-issues.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/other-issues.md b/.github/ISSUE_TEMPLATE/other-issues.md
index 7cceaf24be..225962a0f5 100644
--- a/.github/ISSUE_TEMPLATE/other-issues.md
+++ b/.github/ISSUE_TEMPLATE/other-issues.md
@@ -6,7 +6,7 @@ about: Use this template for any other non-support related issues
 
 This template is for miscellaneous issues not covered by the other issue categories.  
 
-For questions on how work with TensorFlow, or support for problems that are not verified bugs in TensorFlow, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/tensorflow). 
+For questions on how to work with TensorFlow, or support for problems that are not verified bugs in TensorFlow, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/tensorflow). 
 
 If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).
 
-- 
GitLab


From ead4fda06535ce547d014fba1656ae53f0b64996 Mon Sep 17 00:00:00 2001
From: Chris Jones <cjfj@google.com>
Date: Mon, 15 Oct 2018 11:58:18 -0700
Subject: [PATCH 0986/1085] Fixes a bug in tf.train.Saver(), where classes
 using the `VARIABLE_VALUE_KEY` used different naming in the checkpoint file
 when `var_list` was a dict.

PiperOrigin-RevId: 217182136
---
 tensorflow/python/training/saver.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 5b2b19e913..8a499b0b0d 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -626,7 +626,12 @@ class BaseSaverBuilder(object):
         op, variables.Variable):
       # pylint: disable=protected-access
       for attr, factory in op._gather_saveables_for_checkpoint().items():
-        op = (factory(name + "_" + attr) if callable(factory) else factory)
+        if attr == checkpointable.VARIABLE_VALUE_KEY:
+          # Keep original name for classes masquerading as variables.
+          full_name = name
+        else:
+          full_name = name + "_" + attr
+        op = (factory(full_name) if callable(factory) else factory)
         for op in BaseSaverBuilder.SaveableObjectsForOp(op, op.name):
           yield op
       # pylint: enable=protected-access
-- 
GitLab


From 307c83106445ab2c52847f08d35a66c51aff19d9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 12:09:46 -0700
Subject: [PATCH 0987/1085] [XLA] HloRunner: invoke Compile() instead of
 RunHloPasses() and RunBackEnd() in CreateExecutable to reproduce real
 compilation passes.

PiperOrigin-RevId: 217184451
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../compiler/xla/service/hlo_instruction.cc   | 29 ++++++++++---------
 .../compiler/xla/service/hlo_instructions.cc  |  4 +--
 tensorflow/compiler/xla/service/hlo_runner.cc | 10 +++++--
 .../xla/service/interpreter/compiler.cc       | 29 +++++++++++++++----
 5 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 7d03eba800..3a716c385b 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -3212,6 +3212,7 @@ cc_library(
         ":computation_placer",
         ":executable",
         ":hlo",
+        ":hlo_module_group",
         ":transfer_manager",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index b6df63c983..f6ed86b416 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -510,21 +510,24 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       TF_RET_CHECK(proto.operand_ids_size() == 1)
           << "Domain instruction should have 1 operands but sees "
           << proto.operand_ids_size();
-      TF_RET_CHECK(proto.has_domain_entry_sharding())
-          << "Domain instruction must domain_entry_sharding";
-      TF_RET_CHECK(proto.has_domain_exit_sharding())
-          << "Domain instruction must domain_exit_sharding";
-      TF_ASSIGN_OR_RETURN(
-          HloSharding entry_hlo_sharding,
-          HloSharding::FromProto(proto.domain_entry_sharding()));
-      TF_ASSIGN_OR_RETURN(HloSharding exit_hlo_sharding,
-                          HloSharding::FromProto(proto.domain_exit_sharding()));
+      std::shared_ptr<const HloSharding> entry_hlo_sharding;
+      std::shared_ptr<const HloSharding> exit_hlo_sharding;
+      if (proto.has_domain_entry_sharding()) {
+        TF_ASSIGN_OR_RETURN(
+            HloSharding sharding,
+            HloSharding::FromProto(proto.domain_entry_sharding()));
+        entry_hlo_sharding = std::make_shared<const HloSharding>(sharding);
+      }
+      if (proto.has_domain_exit_sharding()) {
+        TF_ASSIGN_OR_RETURN(
+            HloSharding sharding,
+            HloSharding::FromProto(proto.domain_exit_sharding()));
+        exit_hlo_sharding = std::make_shared<const HloSharding>(sharding);
+      }
       instruction = absl::make_unique<HloDomainInstruction>(
           proto.shape(), operands(0),
-          absl::make_unique<ShardingMetadata>(
-              std::make_shared<const HloSharding>(entry_hlo_sharding)),
-          absl::make_unique<ShardingMetadata>(
-              std::make_shared<const HloSharding>(exit_hlo_sharding)));
+          absl::make_unique<ShardingMetadata>(entry_hlo_sharding),
+          absl::make_unique<ShardingMetadata>(exit_hlo_sharding));
       break;
     }
     default: {
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 179ace2cdb..88495e8000 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -2335,14 +2335,14 @@ HloInstructionProto HloDomainInstruction::ToProto() const {
   HloInstructionProto proto = HloInstruction::ToProto();
   auto operand_side_sharding =
       dynamic_cast<const ShardingMetadata*>(operand_side_metadata_.get());
-  if (operand_side_sharding) {
+  if (operand_side_sharding && operand_side_sharding->sharding() != nullptr) {
     *proto.mutable_domain_entry_sharding() =
         operand_side_sharding->sharding()->ToProto();
   }
 
   auto user_side_sharding =
       dynamic_cast<const ShardingMetadata*>(user_side_metadata_.get());
-  if (user_side_sharding) {
+  if (user_side_sharding && user_side_sharding->sharding() != nullptr) {
     *proto.mutable_domain_exit_sharding() =
         user_side_sharding->sharding()->ToProto();
   }
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index fa7f216321..3f0ca342b4 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "absl/memory/memory.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/service/transfer_manager.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -324,10 +325,13 @@ StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicated(
 StatusOr<std::unique_ptr<Executable>> HloRunner::CreateExecutable(
     std::unique_ptr<HloModule> module, bool run_hlo_passes) {
   if (run_hlo_passes) {
+    auto module_group = absl::make_unique<HloModuleGroup>(std::move(module));
     TF_ASSIGN_OR_RETURN(
-        module, backend().compiler()->RunHloPasses(
-                    std::move(module), backend().default_stream_executor(),
-                    backend().memory_allocator()));
+        auto executables,
+        backend().compiler()->Compile(std::move(module_group),
+                                      {{backend().default_stream_executor()}},
+                                      backend().memory_allocator()));
+    return std::move(executables[0]);
   }
   return backend().compiler()->RunBackend(std::move(module),
                                           backend().default_stream_executor(),
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 26643667c8..a1fe97cffa 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -92,11 +92,30 @@ InterpreterCompiler::RunBackendOnModuleGroup(
 }
 
 StatusOr<std::vector<std::unique_ptr<Executable>>> InterpreterCompiler::Compile(
-    std::unique_ptr<HloModuleGroup> /*module_group*/,
-    std::vector<std::vector<se::StreamExecutor*>> /*stream_execs*/,
-    DeviceMemoryAllocator* /*device_allocator*/) {
-  return Unimplemented(
-      "Module group compilation is not supported on Interpreter.");
+    std::unique_ptr<HloModuleGroup> module_group,
+    std::vector<std::vector<se::StreamExecutor*>> stream_exec,
+    DeviceMemoryAllocator* device_allocator) {
+  if (module_group->empty()) {
+    return std::vector<std::unique_ptr<Executable>>();
+  }
+  if (module_group->size() > 1) {
+    return tensorflow::errors::Unimplemented(
+        "Compilation of multiple HLO modules is not supported on Interpreter.");
+  }
+  if (stream_exec.size() != 1 || stream_exec[0].size() != 1) {
+    return tensorflow::errors::Unimplemented(
+        "Unexpected number of StreamExecutor's.");
+  }
+  auto hlo_modules = module_group->ConsumeModules();
+  TF_ASSIGN_OR_RETURN(auto module,
+                      RunHloPasses(std::move(hlo_modules[0]), stream_exec[0][0],
+                                   device_allocator));
+  TF_ASSIGN_OR_RETURN(
+      auto executable,
+      RunBackend(std::move(module), stream_exec[0][0], device_allocator));
+  std::vector<std::unique_ptr<Executable>> ret;
+  ret.push_back(std::move(executable));
+  return std::move(ret);
 }
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
-- 
GitLab


From d8587c9ee2e4e99c78f6a5e490a89b9a90b17e8a Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Mon, 15 Oct 2018 12:22:11 -0700
Subject: [PATCH 0988/1085] Update renames_v2.py file.

PiperOrigin-RevId: 217186545
---
 tensorflow/tools/compatibility/renames_v2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index c3f20d0337..98146028c6 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -142,7 +142,9 @@ renames = {
     'tf.rint': 'tf.math.rint',
     'tf.rsqrt': 'tf.math.rsqrt',
     'tf.saved_model.builder.SavedModelBuilder': 'tf.saved_model.Builder',
+    'tf.saved_model.loader.load': 'tf.saved_model.load',
     'tf.saved_model.loader.maybe_saved_model_directory': 'tf.saved_model.maybe_saved_model_directory',
+    'tf.saved_model.main_op.main_op': 'tf.saved_model.main_op',
     'tf.saved_model.main_op.main_op_with_restore': 'tf.saved_model.main_op_with_restore',
     'tf.saved_model.signature_def_utils.build_signature_def': 'tf.saved_model.build_signature_def',
     'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.saved_model.classification_signature_def',
-- 
GitLab


From d7784aa0247dfdf37ea6fcac701c53693ed1cd9d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 12:23:28 -0700
Subject: [PATCH 0989/1085] Update to ragged array ops gen location.

PiperOrigin-RevId: 217186745
---
 tensorflow/python/BUILD | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c749017627..185ca1702f 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1804,6 +1804,14 @@ tf_gen_op_wrapper_private_py(
     out = "training/gen_training_ops.py",
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "ragged_array_ops_gen",
+    visibility = [
+        "//learning/brain/contrib/text:__pkg__",
+        "//learning/brain/contrib/text/python/ragged:__pkg__",
+    ],
+)
+
 tf_gen_op_wrapper_private_py(
     name = "ragged_math_ops_gen",
     visibility = [
-- 
GitLab


From 458bbee5f45f1e96e293e512f766d142ac125d9a Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 15 Oct 2018 12:26:29 -0700
Subject: [PATCH 0990/1085] [tf.data] More informative function name for
 tf_data_structured_function_wrapper.

PiperOrigin-RevId: 217187142
---
 .../python/data/experimental/ops/batching.py  |  3 +
 .../python/data/experimental/ops/grouping.py  | 28 +++++----
 .../python/data/experimental/ops/scan_ops.py  |  5 +-
 tensorflow/python/data/ops/dataset_ops.py     | 60 +++++++++++++++----
 4 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index d42af9e7e9..d8985fd13b 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -611,6 +611,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset):
   def output_types(self):
     return self._output_types
 
+  def _transformation_name(self):
+    return "tf.data.experimental.map_and_batch()"
+
 
 @tf_export("data.experimental.map_and_batch")
 def map_and_batch(map_func,
diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py
index 18ba583220..ac4c75743d 100644
--- a/tensorflow/python/data/experimental/ops/grouping.py
+++ b/tensorflow/python/data/experimental/ops/grouping.py
@@ -276,7 +276,7 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
   def _make_key_func(self, key_func, input_dataset):
     """Make wrapping Defun for key_func."""
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func, "tf.data.experimental.group_by_reducer()", input_dataset)
+        key_func, self._transformation_name(), dataset=input_dataset)
     if not (
         wrapped_func.output_types == dtypes.int64 and
         wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
@@ -290,7 +290,7 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
     """Make wrapping Defun for init_func."""
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
         init_func,
-        "tf.data.experimental.group_by_reducer()",
+        self._transformation_name(),
         input_classes=ops.Tensor,
         input_shapes=tensor_shape.scalar(),
         input_types=dtypes.int64)
@@ -309,7 +309,7 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
 
       wrapped_func = dataset_ops.StructuredFunctionWrapper(
           reduce_func,
-          "tf.data.experimental.group_by_reducer()",
+          self._transformation_name(),
           input_classes=(self._state_classes, input_dataset.output_classes),
           input_shapes=(self._state_shapes, input_dataset.output_shapes),
           input_types=(self._state_types, input_dataset.output_types),
@@ -363,7 +363,7 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
     """Make wrapping Defun for finalize_func."""
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
         finalize_func,
-        "tf.data.experimental.group_by_reducer()",
+        self._transformation_name(),
         input_classes=self._state_classes,
         input_shapes=self._state_shapes,
         input_types=self._state_types)
@@ -397,6 +397,9 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
         finalize_func=self._finalize_func,
         **dataset_ops.flat_structure(self))
 
+  def _transformation_name(self):
+    return "tf.data.experimental.group_by_reducer()"
+
 
 class _GroupByWindowDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that groups its input and performs a windowed reduction."""
@@ -417,7 +420,7 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
       return ops.convert_to_tensor(window_size_func(key), dtype=dtypes.int64)
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
         window_size_func_wrapper,
-        "tf.data.experimental.group_by_window()",
+        self._transformation_name(),
         input_classes=ops.Tensor,
         input_shapes=tensor_shape.scalar(),
         input_types=dtypes.int64)
@@ -433,8 +436,7 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
     def key_func_wrapper(*args):
       return ops.convert_to_tensor(key_func(*args), dtype=dtypes.int64)
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
-        key_func_wrapper, "tf.data.experimental.group_by_window()",
-        input_dataset)
+        key_func_wrapper, self._transformation_name(), dataset=input_dataset)
     if not (
         wrapped_func.output_types == dtypes.int64 and
         wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
@@ -447,7 +449,7 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
     nested_dataset = dataset_ops._NestedDatasetComponent(input_dataset)  # pylint: disable=protected-access
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
         reduce_func,
-        "tf.data.experimental.reduce_by_window()",
+        self._transformation_name(),
         input_classes=(ops.Tensor, nested_dataset),
         input_shapes=(tensor_shape.scalar(), nested_dataset),
         input_types=(dtypes.int64, nested_dataset),
@@ -483,6 +485,9 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
         window_size_func=self._window_size_func,
         **dataset_ops.flat_structure(self))
 
+  def _transformation_name(self):
+    return "tf.data.experimental.group_by_window()"
+
 
 @tf_export("data.experimental.Reducer")
 class Reducer(object):
@@ -522,8 +527,8 @@ class _MapXDataset(dataset_ops.UnaryDataset):
 
     wrapped_func = dataset_ops.StructuredFunctionWrapper(
         map_func,
-        "tf.data.experimental.map_x_dataset()",
-        input_dataset,
+        self._transformation_name(),
+        dataset=input_dataset,
         experimental_nested_dataset_support=True)
     self._output_classes = wrapped_func.output_classes
     self._output_shapes = wrapped_func.output_shapes
@@ -549,3 +554,6 @@ class _MapXDataset(dataset_ops.UnaryDataset):
   @property
   def output_types(self):
     return self._output_types
+
+  def _transformation_name(self):
+    return "tf.data.experimental.map_x_dataset()"
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
index e05e7c5a18..1194238e2f 100644
--- a/tensorflow/python/data/experimental/ops/scan_ops.py
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -69,7 +69,7 @@ class _ScanDataset(dataset_ops.UnaryDataset):
 
       wrapped_func = dataset_ops.StructuredFunctionWrapper(
           scan_func,
-          "tf.data.experimental.scan()",
+          self._transformation_name(),
           input_classes=(self._state_classes, input_dataset.output_classes),
           input_shapes=(self._state_shapes, input_dataset.output_shapes),
           input_types=(self._state_types, input_dataset.output_types),
@@ -149,6 +149,9 @@ class _ScanDataset(dataset_ops.UnaryDataset):
   def output_types(self):
     return self._output_types
 
+  def _transformation_name(self):
+    return "tf.data.experimental.scan()"
+
 
 @tf_export("data.experimental.scan")
 def scan(initial_state, scan_func):
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index c0b5027e73..5dbb017f2b 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -47,6 +47,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.util import deprecation
+from tensorflow.python.util import function_utils
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -1737,9 +1738,15 @@ class StructuredFunctionWrapper(object):
   """A wrapper for `Defun` that supports structured arguments and return values.
   """
 
-  def __init__(self, func, transformation_name, dataset=None,
-               input_classes=None, input_shapes=None, input_types=None,
-               add_to_graph=True, experimental_nested_dataset_support=False):
+  def __init__(self,
+               func,
+               transformation_name,
+               dataset=None,
+               input_classes=None,
+               input_shapes=None,
+               input_types=None,
+               add_to_graph=True,
+               experimental_nested_dataset_support=False):
     """Creates a new `StructuredFunctionWrapper` for the given function.
 
     Args:
@@ -1782,11 +1789,18 @@ class StructuredFunctionWrapper(object):
       self._input_classes = dataset.output_classes
 
     self._transformation_name = transformation_name
+    readable_transformation_name = transformation_name.replace(
+        ".", "_")[:-2] if len(transformation_name) > 2 else ""
+    self._func_name = "_".join([
+        readable_transformation_name,
+        function_utils.get_func_name(func),
+        str(ops.uid())
+    ])
 
     # TODO(b/110122868): Enable this support for all `tf.data` functions.
     self._nested_dataset_support = experimental_nested_dataset_support
 
-    @function.Defun(*self._defun_args())
+    @function.Defun(*self._defun_args(), func_name=self._func_name)
     def tf_data_structured_function_wrapper(*args):
       """Wrapper for passing nested structures to and from tf.data functions."""
       flat_args = []
@@ -1997,24 +2011,33 @@ class _GeneratorDataset(DatasetSource):
         init_args, [t.dtype for t in nest.flatten(init_args)])
 
     wrapped_init_func = StructuredFunctionWrapper(
-        init_func, "GeneratorDataset", input_classes=init_args_classes,
-        input_shapes=init_args_shapes, input_types=init_args_types)
+        init_func,
+        self._transformation_name(),
+        input_classes=init_args_classes,
+        input_shapes=init_args_shapes,
+        input_types=init_args_types)
     self._state_classes = wrapped_init_func.output_classes
     self._state_shapes = wrapped_init_func.output_shapes
     self._state_types = wrapped_init_func.output_types
     self._init_func = wrapped_init_func.function
 
     wrapped_next_func = StructuredFunctionWrapper(
-        next_func, "GeneratorDataset", input_classes=self._state_classes,
-        input_shapes=self._state_shapes, input_types=self._state_types)
+        next_func,
+        self._transformation_name(),
+        input_classes=self._state_classes,
+        input_shapes=self._state_shapes,
+        input_types=self._state_types)
     self._output_classes = wrapped_next_func.output_classes
     self._output_shapes = wrapped_next_func.output_shapes
     self._output_types = wrapped_next_func.output_types
     self._next_func = wrapped_next_func.function
 
     wrapped_finalize_func = StructuredFunctionWrapper(
-        finalize_func, "GeneratorDataset", input_classes=self._state_classes,
-        input_shapes=self._state_shapes, input_types=self._state_types)
+        finalize_func,
+        self._transformation_name(),
+        input_classes=self._state_classes,
+        input_shapes=self._state_shapes,
+        input_types=self._state_types)
     self._finalize_func = wrapped_finalize_func.function
 
   def _as_variant_tensor(self):
@@ -2039,6 +2062,9 @@ class _GeneratorDataset(DatasetSource):
   def output_types(self):
     return self._output_types
 
+  def _transformation_name(self):
+    return "Dataset.from_generator()"
+
 
 class ZipDataset(Dataset):
   """A `Dataset` that zips its inputs together."""
@@ -2630,7 +2656,7 @@ class MapDataset(UnaryDataset):
     self._use_inter_op_parallelism = use_inter_op_parallelism
 
     wrapped_func = StructuredFunctionWrapper(
-        map_func, "Dataset.map()", input_dataset)
+        map_func, self._transformation_name(), dataset=input_dataset)
     self._output_classes = wrapped_func.output_classes
     self._output_shapes = wrapped_func.output_shapes
     self._output_types = wrapped_func.output_types
@@ -2657,6 +2683,9 @@ class MapDataset(UnaryDataset):
   def output_types(self):
     return self._output_types
 
+  def _transformation_name(self):
+    return "Dataset.map()"
+
 
 class ParallelMapDataset(MapDataset):
   """A `Dataset` that maps a function over elements in its input in parallel."""
@@ -2695,7 +2724,9 @@ class FlatMapDataset(UnaryDataset):
     self._input_dataset = input_dataset
 
     wrapped_func = StructuredFunctionWrapper(
-        map_func, self._transformation_name(), input_dataset,
+        map_func,
+        self._transformation_name(),
+        dataset=input_dataset,
         experimental_nested_dataset_support=True)
     if not isinstance(wrapped_func.output_classes, _NestedDatasetComponent):
       raise TypeError("`map_func` must return a `Dataset` object.")
@@ -2790,7 +2821,7 @@ class FilterDataset(UnaryDataset):
     super(FilterDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     wrapped_func = StructuredFunctionWrapper(
-        predicate, "Dataset.filter()", input_dataset)
+        predicate, self._transformation_name(), dataset=input_dataset)
     if not (
         wrapped_func.output_types == dtypes.bool and
         wrapped_func.output_shapes.is_compatible_with(tensor_shape.scalar())):
@@ -2816,6 +2847,9 @@ class FilterDataset(UnaryDataset):
   def output_types(self):
     return self._input_dataset.output_types
 
+  def _transformation_name(self):
+    return "Dataset.filter()"
+
 
 class PrefetchDataset(UnaryDataset):
   """A `Dataset` that asynchronously prefetches its input."""
-- 
GitLab


From 0e0ea561eba255a3027d8e01535ba01c8df5ebd2 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 15 Oct 2018 12:32:35 -0700
Subject: [PATCH 0991/1085] Add a flag for controlling the features that are
 applied at conversion. Turn control dependencies and lists off for defun.

PiperOrigin-RevId: 217188171
---
 tensorflow/python/autograph/__init__.py       |  2 +
 tensorflow/python/autograph/core/converter.py | 49 ++++++++++++++++---
 tensorflow/python/autograph/impl/api.py       | 18 +++++--
 .../python/autograph/impl/conversion.py       |  9 ++--
 tensorflow/python/eager/function.py           |  9 ++--
 5 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py
index e02fb36269..fd9e60bea7 100644
--- a/tensorflow/python/autograph/__init__.py
+++ b/tensorflow/python/autograph/__init__.py
@@ -25,6 +25,7 @@ from __future__ import print_function
 from tensorflow.python.autograph import operators
 from tensorflow.python.autograph import utils
 from tensorflow.python.autograph.core.converter import ConversionOptions
+from tensorflow.python.autograph.core.converter import Feature
 from tensorflow.python.autograph.core.errors import GraphConstructionError
 from tensorflow.python.autograph.core.errors import improved_errors
 from tensorflow.python.autograph.core.errors import TfRuntimeError
@@ -44,6 +45,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 _allowed_symbols = [
     # Main API
     'ConversionOptions',
+    'Feature',
     'RunMode',
     'convert',
     'converted_call',
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 1b07bed495..6b2db477f9 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -89,6 +89,19 @@ from tensorflow.python.autograph.pyct.static_analysis import type_info
 # TODO(mdan): Add a test specific to this converter.
 
 
+class Feature(Enum):
+  """Constants to use when selecting AutoGraph features."""
+
+  ALL = 'Enable all features.'
+
+  AUTO_CONTROL_DEPS = (
+      'Insert of control dependencies in the generated code.')
+  LISTS = 'Convert list idioms, like initializers, slices, append, etc.'
+
+  def __repr__(self):
+    return self.name
+
+
 class ConversionOptions(object):
   """Immutable container for global conversion flags.
 
@@ -103,18 +116,31 @@ class ConversionOptions(object):
     force_conversion: bool, whether to force convertinng the target entity. When
       force_conversion is turned off, the converter may decide to return the
       function as-is.
+    optional_features: Union[Feature, Set[Feature]], controls the use of
+      optional features in the conversion process. See Feature for available
+      options.
   """
 
   def __init__(self,
                recursive=False,
                verbose=False,
                strip_decorators=None,
-               force_conversion=False):
+               force_conversion=False,
+               optional_features=Feature.ALL):
     self.recursive = recursive
     self.verbose = verbose
     self.strip_decorators = strip_decorators or ()
     self.force_conversion = force_conversion
 
+    if not isinstance(optional_features, (set, list, tuple)):
+      optional_features = (optional_features,)
+    optional_features = frozenset(optional_features)
+    self.optional_features = optional_features
+
+  def uses(self, feature):
+    return (Feature.ALL in self.optional_features or
+            feature in self.optional_features)
+
   def to_ast(self, namespace):
     """Returns a representation of this object as an AST node.
 
@@ -132,8 +158,9 @@ class ConversionOptions(object):
       constructor_name(
           recursive=recursive_val,
           verbose=verbose_val,
-          strip_decorators=strip_decorator_names,
-          force_conversion=force_conversion_val)
+          strip_decorators=strip_decorators_val,
+          force_conversion=force_conversion_val,
+          optional_features=optional_features_val)
     """
 
     def as_qualified_name(o):
@@ -143,8 +170,15 @@ class ConversionOptions(object):
             o, namespace))
       return name
 
-    strip_decorators_code = '({})'.format(', '.join(
-        tuple(as_qualified_name(o) for o in self.strip_decorators)))
+    def list_of_names(values):
+      return parser.parse_expression('({})'.format(', '.join(
+          tuple(as_qualified_name(v) for v in values))))
+
+    def list_of_features(values):
+      return parser.parse_expression('({})'.format(', '.join(
+          'ag__.Feature.{}'.format(v)
+          for v in Feature.__members__
+          if v in values)))
 
     expr_ast = templates.replace(
         template,
@@ -152,9 +186,10 @@ class ConversionOptions(object):
             as_qualified_name(ConversionOptions)),
         recursive_val=parser.parse_expression(str(self.recursive)),
         verbose_val=parser.parse_expression(str(self.verbose)),
-        strip_decorator_names=parser.parse_expression(strip_decorators_code),
+        strip_decorators_val=list_of_names(self.strip_decorators),
         force_conversion_val=parser.parse_expression(
-            str(self.force_conversion)))
+            str(self.force_conversion)),
+        optional_features_val=list_of_features(self.optional_features))
     return expr_ast[0].value
 
 
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index b3f056965c..51a455fbb6 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -28,6 +28,7 @@ from tensorflow.python.autograph.operators import py_builtins
 from tensorflow.python.autograph.pyct import compiler
 from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.utils import py_func
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 
@@ -66,6 +67,7 @@ def convert(recursive=False, verbose=False):
               recursive=recursive,
               verbose=verbose,
               force_conversion=True,
+              optional_features=converter.Feature.ALL,
           ), *args, **kwargs)
 
     wrapper = tf_decorator.make_decorator(f, wrapper)
@@ -142,6 +144,9 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
 # TODO(mdan): Move to a private, undocumented module.
 def converted_call(f, owner, options, *args, **kwargs):
   """Compiles a function call inline. For internal use only."""
+  if options.verbose:
+    logging.info('Converted call: {}; owner: {}'.format(f, owner))
+
   if owner is not None:
     if not isinstance(f, str):
       raise ValueError(
@@ -233,7 +238,8 @@ def converted_call(f, owner, options, *args, **kwargs):
       arg_values=arg_values,
       arg_types=arg_types,
       partial_types=partial_types,
-      strip_decorators=options.strip_decorators)
+      strip_decorators=options.strip_decorators,
+      optional_features=options.optional_features)
   return converted_f(*effective_args, **kwargs)
 
 
@@ -246,7 +252,8 @@ def to_graph(e,
              arg_values=None,
              arg_types=None,
              partial_types=None,
-             strip_decorators=None):
+             strip_decorators=None,
+             optional_features=converter.Feature.ALL):
   """Converts a Python entity into equivalent code that uses TensorFlow ops.
 
   Supported Python entities include:
@@ -267,6 +274,8 @@ def to_graph(e,
     partial_types: Set[Type], reserved for internal use.
     strip_decorators: Tuple[Callable], same as
       ConversionOptions.strip_decorators.
+    optional_features: Union[Feature, Set[Feature]], same as
+      ConversionOptions.optional_features.
 
   Returns:
     Union[Callable, Type], the converted entity, which is the same kind as e
@@ -284,7 +293,8 @@ def to_graph(e,
       options=converter.ConversionOptions(
           recursive=recursive,
           verbose=verbose,
-          strip_decorators=strip_decorators),
+          strip_decorators=strip_decorators,
+          optional_features=optional_features),
       partial_types=partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
@@ -295,7 +305,7 @@ def to_graph(e,
   for dep in reversed(program_ctx.conversion_order):
     nodes.extend(program_ctx.dependency_cache[dep])
 
-  compiled_module, compiled_src = compiler.ast_to_object(
+  compiled_module, _ = compiler.ast_to_object(
       nodes,
       source_prefix=program_ctx.required_imports,
       include_source_map=True)
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 3490f6b006..f93381a00e 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -347,16 +347,17 @@ def node_to_graph(node, context, rewrite_errors=True):
   # dealing with the extra loop increment operation that the for
   # canonicalization creates.
   node = converter.apply_(node, context, continue_statements)
-  context.info.namespace['len'] = len
   node = converter.apply_(node, context, return_statements)
-  node = converter.apply_(node, context, lists)
-  node = converter.apply_(node, context, slices)
+  if context.program.options.uses(converter.Feature.LISTS):
+    node = converter.apply_(node, context, lists)
+    node = converter.apply_(node, context, slices)
   node = converter.apply_(node, context, builtin_functions)
   node = converter.apply_(node, context, call_trees)
   node = converter.apply_(node, context, control_flow)
   node = converter.apply_(node, context, conditional_expressions)
   node = converter.apply_(node, context, logical_expressions)
-  node = converter.apply_(node, context, side_effect_guards)
+  if context.program.options.uses(converter.Feature.AUTO_CONTROL_DEPS):
+    node = converter.apply_(node, context, side_effect_guards)
   node = converter.apply_(node, context, function_scopes)
   if rewrite_errors:
     node = converter.apply_(node, context, error_handlers)
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 59c4e4cb30..083c91a26b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -960,10 +960,13 @@ def func_graph_from_py_func(name,
     try:
       if experimental_autograph:
         func_outputs = autograph.converted_call(
-            python_func,
+            python_func, None,
             autograph.ConversionOptions(
-                verbose=True, recursive=True, strip_decorators=(defun,)),
-            *func_args, **func_kwargs)
+                verbose=True,
+                recursive=True,
+                strip_decorators=(defun,),
+                optional_features=(),
+            ), *func_args, **func_kwargs)
       else:
         func_outputs = python_func(*func_args, **func_kwargs)
       # invariant: `func_outputs` contains only Tensors and `None`s.
-- 
GitLab


From 732bfdc7da667a57dabc0b85d47ff98bf52caca6 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 15 Oct 2018 12:45:30 -0700
Subject: [PATCH 0992/1085] Automated rollback of commit
 6e16d34172ba14b6afc5fac6ce50913343769f40. Revert #22533.

PiperOrigin-RevId: 217190256
---
 .../contrib/opt/python/training/adamax.py     | 31 ++++++++++----
 .../opt/python/training/nadam_optimizer.py    | 10 ++---
 tensorflow/python/training/adam.py            | 41 ++++++++++++-------
 3 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py
index 64c02ecc76..686bac0d84 100644
--- a/tensorflow/contrib/opt/python/training/adamax.py
+++ b/tensorflow/contrib/opt/python/training/adamax.py
@@ -23,6 +23,8 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.training import adam
 from tensorflow.python.training import training_ops
 
@@ -132,7 +134,8 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  def _apply_sparse_shared(self, grad, var, indices):
+  def _apply_sparse_shared(self, grad, var, indices,
+                           scatter_add, scatter_update):
     beta1_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
@@ -144,28 +147,38 @@ class AdaMaxOptimizer(adam.AdamOptimizer):
     m_slice = array_ops.gather(m, indices)
     m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t)
     with ops.control_dependencies([m_t_slice]):
-      m_t = m.scatter_update(
-          ops.IndexedSlices(m_t_slice, indices), use_locking=self._use_locking)
+      m_t = scatter_update(m, indices, m_t_slice)
     # u_t = max(beta2 * u, abs(g_t))
     v = self.get_slot(var, "v")
     v_slice = array_ops.gather(v, indices)
     v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad))
     with ops.control_dependencies([v_t_slice]):
-      v_t = v.scatter_update(
-          ops.IndexedSlices(v_t_slice, indices), use_locking=self._use_locking)
+      v_t = scatter_update(v, indices, v_t_slice)
     # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
     var_slice = -lr_t / (1 - beta1_power) * (m_t_slice /
                                              (v_t_slice + epsilon_t))
     with ops.control_dependencies([var_slice]):
-      var_update = var.scatter_add(
-          ops.IndexedSlices(var_slice, indices), use_locking=self._use_locking)
+      var_update = scatter_add(var, indices, var_slice)
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
   def _apply_sparse(self, grad, var):
-    return self._apply_sparse_shared(grad.values, var, grad.indices)
+    return self._apply_sparse_shared(
+        grad.values, var, grad.indices,
+        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking),
+        lambda x, i, v: state_ops.scatter_update(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking))
+
+  def _resource_scatter_update(self, x, i, v):
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_update(
+            x.handle, i, v)]):
+      return x.value()
 
   def _resource_apply_sparse(self, grad, var, indices):
-    return self._apply_sparse_shared(grad, var, indices)
+    return self._apply_sparse_shared(
+        grad, var, indices,
+        self._resource_scatter_add, self._resource_scatter_update)
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
diff --git a/tensorflow/contrib/opt/python/training/nadam_optimizer.py b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
index 92b4872b8e..44a8890cb1 100644
--- a/tensorflow/contrib/opt/python/training/nadam_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/nadam_optimizer.py
@@ -67,7 +67,7 @@ class NadamOptimizer(adam.AdamOptimizer):
         use_locking=self._use_locking,
         use_nesterov=True)
 
-  def _apply_sparse_shared(self, grad, var, indices):
+  def _apply_sparse_shared(self, grad, var, indices, scatter_add):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -81,9 +81,7 @@ class NadamOptimizer(adam.AdamOptimizer):
     m_scaled_g_values = grad * (1 - beta1_t)
     m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
     with ops.control_dependencies([m_t]):
-      m_t = m.scatter_add(
-          ops.IndexedSlices(m_scaled_g_values, indices),
-          use_locking=self._use_locking)
+      m_t = scatter_add(m, indices, m_scaled_g_values)
       # m_bar = (1 - beta1) * g_t + beta1 * m_t
       m_bar = m_scaled_g_values + beta1_t * m_t
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
@@ -91,9 +89,7 @@ class NadamOptimizer(adam.AdamOptimizer):
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
     v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
     with ops.control_dependencies([v_t]):
-      v_t = v.scatter_add(
-          ops.IndexedSlices(v_scaled_g_values, indices),
-          use_locking=self._use_locking)
+      v_t = scatter_add(v, indices, v_scaled_g_values)
     v_sqrt = math_ops.sqrt(v_t)
     var_update = state_ops.assign_sub(
         var, lr * m_bar / (v_sqrt + epsilon_t), use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 032a4d661b..704ad6d3fe 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -20,9 +20,10 @@ from __future__ import print_function
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
@@ -173,7 +174,7 @@ class AdamOptimizer(optimizer.Optimizer):
         math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad, use_locking=self._use_locking)
 
-  def _apply_sparse_shared(self, grad, var, indices):
+  def _apply_sparse_shared(self, grad, var, indices, scatter_add):
     beta1_power, beta2_power = self._get_beta_accumulators()
     beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
     beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
@@ -185,27 +186,37 @@ class AdamOptimizer(optimizer.Optimizer):
     # m_t = beta1 * m + (1 - beta1) * g_t
     m = self.get_slot(var, "m")
     m_scaled_g_values = grad * (1 - beta1_t)
-    m_gathered = array_ops.gather(m, indices)
-    m_t_gathered = m_gathered * beta1_t + m_scaled_g_values
-    m_t = m.scatter_update(ops.IndexedSlices(m_t_gathered, indices))
+    m_t = state_ops.assign(m, m * beta1_t,
+                           use_locking=self._use_locking)
+    with ops.control_dependencies([m_t]):
+      m_t = scatter_add(m, indices, m_scaled_g_values)
     # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
     v = self.get_slot(var, "v")
     v_scaled_g_values = (grad * grad) * (1 - beta2_t)
-    v_gathered = array_ops.gather(v, indices)
-    v_t_gathered = v_gathered * beta2_t + v_scaled_g_values
-    v_t = v.scatter_update(ops.IndexedSlices(v_t_gathered, indices))
-
-    v_sqrt_gathered = math_ops.sqrt(v_t_gathered)
-    var_update = var.scatter_sub(
-        ops.IndexedSlices(lr * m_t_gathered / (v_sqrt_gathered + epsilon_t),
-                          indices))
+    v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
+    with ops.control_dependencies([v_t]):
+      v_t = scatter_add(v, indices, v_scaled_g_values)
+    v_sqrt = math_ops.sqrt(v_t)
+    var_update = state_ops.assign_sub(var,
+                                      lr * m_t / (v_sqrt + epsilon_t),
+                                      use_locking=self._use_locking)
     return control_flow_ops.group(*[var_update, m_t, v_t])
 
   def _apply_sparse(self, grad, var):
-    return self._apply_sparse_shared(grad.values, var, grad.indices)
+    return self._apply_sparse_shared(
+        grad.values, var, grad.indices,
+        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking))
+
+  def _resource_scatter_add(self, x, i, v):
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_add(
+            x.handle, i, v)]):
+      return x.value()
 
   def _resource_apply_sparse(self, grad, var, indices):
-    return self._apply_sparse_shared(grad, var, indices)
+    return self._apply_sparse_shared(
+        grad, var, indices, self._resource_scatter_add)
 
   def _finish(self, update_ops, name_scope):
     # Update the power accumulators.
-- 
GitLab


From 7acb7aa3a346dc2c8d11bf6d277a7e8303fc4e0e Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Mon, 15 Oct 2018 13:22:35 -0700
Subject: [PATCH 0993/1085] Make TensorList gather and stack ops work when
 element_shape is not fully-defined but the resulting tensors have matching
 shape. This aligns with the implementation in TensorArrays.

PiperOrigin-RevId: 217196357
---
 tensorflow/core/kernels/list_kernels.h        |  74 ++++++----
 .../python/kernel_tests/list_ops_test.py      | 126 ++++++++++++++++++
 2 files changed, 176 insertions(+), 24 deletions(-)

diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index 72581c9293..a12e739e91 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -83,9 +83,9 @@ class TensorListStack : public OpKernel {
                                         DataTypeString(element_dtype_),
                                         " but list elements ",
                                         DataTypeString(l->element_dtype)));
-    OP_REQUIRES(c, l->element_shape.IsFullyDefined(),
-                errors::InvalidArgument("Tried to stack elements from a list "
-                                        "with non-fully-defined shape: ",
+    OP_REQUIRES(c, !l->tensors.empty() || l->element_shape.IsFullyDefined(),
+                errors::InvalidArgument("Tried to stack elements of a empty ",
+                                        "list with non-fully-defined shape: ",
                                         l->element_shape.DebugString()));
     if (num_elements_ != -1) {
       OP_REQUIRES(c, l->tensors.size() == num_elements_,
@@ -94,11 +94,26 @@ class TensorListStack : public OpKernel {
                                           " elements but got a list with ",
                                           l->tensors.size(), " elements."));
     }
+    // Compute the shape of the output tensor.
+    // If `element_shape` is fully-defined it gets used. It is assumed that all
+    // element tensors have the same shape.
+    // If `element_shape` is not fully-defined the shape of the first element
+    // tensor is used and it is checked that all other tensors have the same
+    // shape.
     TensorShape resulting_shape;
-    resulting_shape.AddDim(l->tensors.size());
-    for (TensorShapeDim s : l->element_shape) {
-      resulting_shape.AddDim(s.size);
+    if (!l->element_shape.AsTensorShape(&resulting_shape)) {
+      const Tensor& t = l->tensors[0];
+      resulting_shape = t.shape();
+      for (int i = 1; i < l->tensors.size(); ++i) {
+        const Tensor& t = l->tensors[i];
+        OP_REQUIRES(c, t.shape() == resulting_shape,
+                    errors::InvalidArgument(
+                        "Tried to stack tensors with unequal shapes: ",
+                        resulting_shape.DebugString(), " vs ",
+                        t.shape().DebugString()));
+      }
     }
+    resulting_shape.InsertDim(0, l->tensors.size());
     Tensor* output;
     OP_REQUIRES_OK(c, c->allocate_output(0, resulting_shape, &output));
     if (output->NumElements() == 0) {
@@ -108,12 +123,6 @@ class TensorListStack : public OpKernel {
     ConstMatrixVector inputs_flat;
     inputs_flat.reserve(l->tensors.size());
     for (const auto& t : l->tensors) {
-      OP_REQUIRES(
-          c, l->element_shape.IsCompatibleWith(t.shape()),
-          errors::InvalidArgument(
-              "Tensor with invalid shape in list. List element shape shape: ",
-              l->element_shape.DebugString(),
-              " and tensor shape: ", t.shape().DebugString()));
       inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
           t.shaped<T, 2>({1, t.NumElements()})));
     }
@@ -153,16 +162,38 @@ class TensorListGather : public OpKernel {
                                         DataTypeString(element_dtype_),
                                         " but list elements ",
                                         DataTypeString(l->element_dtype)));
-    OP_REQUIRES(c, l->element_shape.IsFullyDefined(),
-                errors::InvalidArgument("Tried to stack elements from a list "
-                                        "with non-fully-defined shape: ",
-                                        l->element_shape.DebugString()));
     Tensor indices = c->input(1);
+    OP_REQUIRES(c,
+                indices.NumElements() > 0 || l->element_shape.IsFullyDefined(),
+                errors::InvalidArgument("Tried to gather 0-elements from "
+                                        "a list with non-fully-defined shape: ",
+                                        l->element_shape.DebugString()));
+    // Compute the shape of the output tensor.
+    // If `element_shape` is fully-defined it gets used. It is assumed that all
+    // requested tensors have the same shape.
+    // If `element_shape` is not fully-defined the shape of the first requested
+    // tensor is used and it is checked that all other tensors have the same
+    // shape.
     TensorShape resulting_shape;
-    resulting_shape.AddDim(indices.NumElements());
-    for (TensorShapeDim s : l->element_shape) {
-      resulting_shape.AddDim(s.size);
+    if (!l->element_shape.AsTensorShape(&resulting_shape)) {
+      const int i = indices.flat<int32>()(0);
+      OP_REQUIRES(
+          c, i < l->tensors.size(),
+          errors::InvalidArgument("Index ", i, " out o range; list only has ",
+                                  l->tensors.size(), " elements."));
+      const Tensor& t = l->tensors[i];
+      resulting_shape = t.shape();
+      for (int index = 1; index < indices.NumElements(); ++index) {
+        const int i = indices.flat<int32>()(index);
+        const Tensor& t = l->tensors[i];
+        OP_REQUIRES(c, t.shape() == resulting_shape,
+                    errors::InvalidArgument(
+                        "Tried to gather elements with unequal shapes: ",
+                        resulting_shape.DebugString(), " vs ",
+                        t.shape().DebugString()));
+      }
     }
+    resulting_shape.InsertDim(0, indices.NumElements());
     Tensor* output;
     OP_REQUIRES_OK(c, c->allocate_output(0, resulting_shape, &output));
     if (output->NumElements() == 0) {
@@ -178,11 +209,6 @@ class TensorListGather : public OpKernel {
           errors::InvalidArgument("Index ", i, " out o range; list only has ",
                                   l->tensors.size(), " elements."));
       const Tensor& t = l->tensors[i];
-      OP_REQUIRES(c, l->element_shape.IsCompatibleWith(t.shape()),
-                  errors::InvalidArgument(
-                      "Tensor with invalid shape in list. List element shape: ",
-                      l->element_shape.DebugString(),
-                      " and tensor shape: ", t.shape().DebugString()));
       inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
           t.shaped<T, 2>({1, t.NumElements()})));
     }
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index ae413edaec..8e51d904ab 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -70,6 +70,65 @@ class ListOpsTest(test_util.TensorFlowTestCase):
     t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
     self.assertAllEqual(self.evaluate(t), [1.0, 2.0])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testStackWithUnknownElementShape(self):
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=-1)
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
+
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [1.0, 2.0])
+
+    # Should raise an error when the element tensors do not all have the same
+    # shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "unequal shapes"):
+      l = list_ops.tensor_list_push_back(l, constant_op.constant([3.0, 4.0]))
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testStackWithPartiallyDefinedElementShape(self):
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=[-1])
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([1.0]))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([2.0]))
+
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [[1.0], [2.0]])
+
+    # Should raise an error when the element tensors do not all have the same
+    # shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "unequal shapes"):
+      l = list_ops.tensor_list_push_back(l, constant_op.constant([2.0, 3.0]))
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testStackEmptyList(self):
+    # Should be able to stack empty lists with fully defined element_shape.
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=[1, 2])
+    t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t).shape, (0, 1, 2))
+
+    # Should not be able to stack empty lists with partially defined
+    # element_shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "non-fully-defined"):
+      l = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32, element_shape=[-1, 2])
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+    # Should not be able to stack empty lists with undefined element_shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "non-fully-defined"):
+      l = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32, element_shape=-1)
+      t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)
+      self.evaluate(t)
+
   @test_util.run_in_graph_and_eager_modes
   def testGatherGrad(self):
     with backprop.GradientTape() as tape:
@@ -85,6 +144,73 @@ class ListOpsTest(test_util.TensorFlowTestCase):
     dt = tape.gradient(s, c0)
     self.assertAllEqual(self.evaluate(dt), 6.0)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testGatherWithUnknownElementShape(self):
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=-1)
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([3.0, 4.0]))
+
+    t = list_ops.tensor_list_gather(l, [1, 0], element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [2.0, 1.0])
+
+    t = list_ops.tensor_list_gather(l, [2], element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [[3.0, 4.0]])
+
+    # Should raise an error when the requested tensors do not all have the same
+    # shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "unequal shapes"):
+      t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testGatherWithPartiallyDefinedElementShape(self):
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=[-1])
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([1.0]))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([2.0, 3.0]))
+    l = list_ops.tensor_list_push_back(l, constant_op.constant([4.0, 5.0]))
+
+    t = list_ops.tensor_list_gather(l, [0], element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [[1.0]])
+
+    t = list_ops.tensor_list_gather(l, [1, 2], element_dtype=dtypes.float32)
+    self.assertAllEqual(self.evaluate(t), [[2.0, 3.0], [4.0, 5.0]])
+
+    # Should raise an error when the requested tensors do not all have the same
+    # shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "unequal shapes"):
+      t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testGatherEmptyList(self):
+    # Should be able to gather from empty lists with fully defined
+    # element_shape.
+    l = list_ops.empty_tensor_list(
+        element_dtype=dtypes.float32, element_shape=[1, 2])
+    t = list_ops.tensor_list_gather(l, [], element_dtype=dtypes.float32)
+    self.assertAllEqual((0, 1, 2), self.evaluate(t).shape)
+
+    # Should not be able to gather from empty lists with partially defined
+    # element_shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "non-fully-defined"):
+      l = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32, element_shape=[-1, 2])
+      t = list_ops.tensor_list_gather(l, [], element_dtype=dtypes.float32)
+      self.evaluate(t)
+
+    # Should not be able to gather from empty lists with undefined
+    # element_shape.
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "non-fully-defined"):
+      l = list_ops.empty_tensor_list(
+          element_dtype=dtypes.float32, element_shape=-1)
+      t = list_ops.tensor_list_gather(l, [], element_dtype=dtypes.float32)
+      self.evaluate(t)
+
   @test_util.run_in_graph_and_eager_modes
   def testScatterGrad(self):
     with backprop.GradientTape() as tape:
-- 
GitLab


From a13c744ca6c7574630af6801f3ed1a2452724675 Mon Sep 17 00:00:00 2001
From: Geoffrey Irving <irving@naml.us>
Date: Fri, 12 Oct 2018 21:25:46 -0700
Subject: [PATCH 0994/1085] Move stateless ops out of contrib

---
 tensorflow/compiler/tests/BUILD               |   2 +-
 .../tests/stateless_random_ops_test.py        |   2 +-
 tensorflow/contrib/boosted_trees/BUILD        |   2 +-
 .../python/training/functions/gbdt_batch.py   |   2 +-
 tensorflow/contrib/stateless/BUILD            |   7 +-
 tensorflow/contrib/stateless/__init__.py      |  10 +-
 .../kernel_tests/stateless_random_ops_test.py | 140 ++--------------
 tensorflow/python/BUILD                       |  15 +-
 tensorflow/python/kernel_tests/random/BUILD   |  15 ++
 .../random/stateless_random_ops_test.py       | 153 ++++++++++++++++++
 tensorflow/python/ops/standard_ops.py         |   1 +
 .../ops/stateless_random_ops.py}              |   8 +-
 .../api/golden/v1/tensorflow.random.pbtxt     |  16 ++
 .../api/golden/v2/tensorflow.random.pbtxt     |  16 ++
 14 files changed, 250 insertions(+), 139 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
 rename tensorflow/{contrib/stateless/python/stateless_ops.py => python/ops/stateless_random_ops.py} (97%)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index a8a9f39e10..d6e3f0817e 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -867,9 +867,9 @@ tf_xla_py_test(
     tags = ["optonly"],
     deps = [
         ":xla_test",
-        "//tensorflow/contrib/stateless",
         "//tensorflow/python:framework",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python:stateless_random_ops",
     ],
 )
 
diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index e8741bc468..b7747414ea 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -23,9 +23,9 @@ import math
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
-from tensorflow.contrib import stateless
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import stateless_random_ops as stateless
 from tensorflow.python.ops.distributions import special_math
 from tensorflow.python.platform import test
 
diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD
index f03eab510c..f7f15a302a 100644
--- a/tensorflow/contrib/boosted_trees/BUILD
+++ b/tensorflow/contrib/boosted_trees/BUILD
@@ -98,7 +98,6 @@ py_library(
         "//tensorflow/contrib/boosted_trees/proto:learner_proto_py",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
-        "//tensorflow/contrib/stateless",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
@@ -108,6 +107,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:stateless_random_ops",
         "//tensorflow/python:summary",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:training",
diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index 8531e97f90..bd5d5bb695 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -22,7 +22,6 @@ import collections
 import copy
 
 from tensorflow.contrib import learn
-from tensorflow.contrib import stateless
 from tensorflow.contrib.boosted_trees.lib.learner.batch import categorical_split_handler
 from tensorflow.contrib.boosted_trees.lib.learner.batch import ordinal_split_handler
 from tensorflow.contrib.boosted_trees.proto import learner_pb2
@@ -44,6 +43,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import stateless_random_ops as stateless
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import losses
diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD
index e9ddec8889..f16d99f64c 100644
--- a/tensorflow/contrib/stateless/BUILD
+++ b/tensorflow/contrib/stateless/BUILD
@@ -13,12 +13,11 @@ py_library(
     name = "stateless",
     srcs = [
         "__init__.py",
-        "python/stateless_ops.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:stateless_random_ops_gen",
+        "//tensorflow/python:stateless_random_ops",
         "//tensorflow/python:util",
     ],
 )
@@ -28,10 +27,6 @@ cuda_py_test(
     srcs = ["python/kernel_tests/stateless_random_ops_test.py"],
     additional_deps = [
         ":stateless",
-        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:random_ops",
     ],
 )
diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py
index 30d0a7ab6a..1a3a78bb5b 100644
--- a/tensorflow/contrib/stateless/__init__.py
+++ b/tensorflow/contrib/stateless/__init__.py
@@ -14,6 +14,10 @@
 # ==============================================================================
 """Stateless random ops which take seed as a tensor input.
 
+DEPRECATED: Use `tf.random.stateless_uniform` rather than
+`tf.contrib.stateless.stateless_random_uniform`, and similarly for the other
+routines.
+
 Instead of taking `seed` as an attr which initializes a mutable state within
 the op, these random ops take `seed` as an input, and the random numbers are
 a deterministic function of `shape` and `seed`.
@@ -32,8 +36,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-# pylint: disable=wildcard-import
-from tensorflow.contrib.stateless.python.stateless_ops import *
+from tensorflow.python.ops.stateless_random_ops import stateless_random_uniform
+from tensorflow.python.ops.stateless_random_ops import stateless_random_normal
+from tensorflow.python.ops.stateless_random_ops import stateless_truncated_normal
+from tensorflow.python.ops.stateless_random_ops import stateless_multinomial
 
 from tensorflow.python.util.all_util import remove_undocumented
 
diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
index ec5a13b7c6..8373cf62dc 100644
--- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
+++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py
@@ -1,4 +1,4 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,141 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for stateless random ops."""
+"""Tests for tf.contrib.stateless API.
+
+The real tests are in python/kernel_tests/random/stateless_random_ops_test.py.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import functools
-
-import numpy as np
 from tensorflow.contrib import stateless
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import stateless_random_ops
 from tensorflow.python.platform import test
 
 
-def invert_philox(key, value):
-  """Invert the Philox bijection."""
-  key = np.array(key, dtype=np.uint32)
-  value = np.array(value, dtype=np.uint32)
-  step = np.array([0x9E3779B9, 0xBB67AE85], dtype=np.uint32)
-  for n in range(10)[::-1]:
-    key0, key1 = key + n * step
-    v0 = value[3] * 0x991a7cdb & 0xffffffff
-    v2 = value[1] * 0x6d7cae67 & 0xffffffff
-    hi0 = v0 * 0xD2511F53 >> 32
-    hi1 = v2 * 0xCD9E8D57 >> 32
-    v1 = hi1 ^ value[0] ^ key0
-    v3 = hi0 ^ value[2] ^ key1
-    value = v0, v1, v2, v3
-  return np.array(value)
-
-
 class StatelessOpsTest(test.TestCase):
 
-  def _test_match(self, cases):
-    # Stateless ops should be the same as stateful ops on the first call
-    # after seed scrambling.
-    cases = tuple(cases)
-    key = 0x3ec8f720, 0x02461e29
-    for seed in (7, 17), (11, 5), (2, 3):
-      preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64)
-      preseed = preseed[::2] | preseed[1::2] << 32
-      random_seed.set_random_seed(seed[0])
-      with self.test_session(use_gpu=True):
-        for stateless_op, stateful_op in cases:
-          stateful = stateful_op(seed=seed[1])
-          pure = stateless_op(seed=preseed)
-          self.assertAllEqual(stateful.eval(), pure.eval())
-
-  def _test_determinism(self, cases):
-    # Stateless values should be equal iff the seeds are equal (roughly)
-    cases = tuple(cases)
-    with self.test_session(use_gpu=True):
-      for seed_type in [dtypes.int32, dtypes.int64]:
-        seed_t = array_ops.placeholder(seed_type, shape=[2])
-        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
-        for stateless_op, _ in cases:
-          pure = stateless_op(seed=seed_t)
-          values = [
-              (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds
-          ]
-          for s0, v0 in values:
-            for s1, v1 in values:
-              self.assertEqual(s0 == s1, np.all(v0 == v1))
-
-  def _float_cases(self, shape_dtypes=(None,)):
-    float_cases = (
-        # Uniform distribution, with and without range
-        (stateless.stateless_random_uniform, random_ops.random_uniform, {}),
-        (stateless.stateless_random_uniform, random_ops.random_uniform,
-         dict(minval=2.2, maxval=7.1)),
-        # Normal distribution, with and without mean+stddev
-        (stateless.stateless_random_normal, random_ops.random_normal, {}),
-        (stateless.stateless_random_normal, random_ops.random_normal,
-         dict(mean=2, stddev=3)),
-        # Truncated normal distribution, with and without mean+stddev
-        (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}),
-        (stateless.stateless_truncated_normal, random_ops.truncated_normal,
-         dict(mean=3, stddev=4)),
-    )
-    for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
-      for shape_dtype in shape_dtypes:
-        for shape in (), (3,), (2, 5):
-          if shape_dtype is not None:
-            shape = constant_op.constant(shape, dtype=shape_dtype)
-          for stateless_op, stateful_op, kwds in float_cases:
-            kwds = dict(shape=shape, dtype=dtype, **kwds)
-            yield (functools.partial(stateless_op, **kwds),
-                   functools.partial(stateful_op, **kwds))
-
-  def _int_cases(self, shape_dtypes=(None,)):
-    for shape_dtype in shape_dtypes:
-      for shape in (), (3,), (2, 5):
-        if shape_dtype is not None:
-          shape = constant_op.constant(shape, dtype=shape_dtype)
-        for dtype in dtypes.int32, dtypes.int64:
-          kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape)
-          yield (functools.partial(stateless.stateless_random_uniform, **kwds),
-                 functools.partial(random_ops.random_uniform, **kwds))
-
-  def _multinomial_cases(self):
-    num_samples = 10
-    for logits_dtype in np.float16, np.float32, np.float64:
-      for output_dtype in dtypes.int32, dtypes.int64:
-        for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
-                                                  [0.25, 0.75]]):
-          kwds = dict(
-              logits=constant_op.constant(logits, dtype=logits_dtype),
-              num_samples=num_samples,
-              output_dtype=output_dtype)
-          yield (functools.partial(stateless.stateless_multinomial, **kwds),
-                 functools.partial(random_ops.multinomial, **kwds))
-
-  def testMatchFloat(self):
-    self._test_match(self._float_cases())
-
-  def testMatchInt(self):
-    self._test_match(self._int_cases())
-
-  def testMatchMultinomial(self):
-    self._test_match(self._multinomial_cases())
-
-  def testDeterminismFloat(self):
-    self._test_determinism(
-        self._float_cases(shape_dtypes=(dtypes.int32, dtypes.int64)))
-
-  def testDeterminismInt(self):
-    self._test_determinism(
-        self._int_cases(shape_dtypes=(dtypes.int32, dtypes.int64)))
-
-  def testDeterminismMultinomial(self):
-    self._test_determinism(self._multinomial_cases())
+  def testAPI(self):
+    self.assertIs(stateless.stateless_random_uniform,
+                  stateless_random_ops.stateless_random_uniform)
+    self.assertIs(stateless.stateless_random_normal,
+                  stateless_random_ops.stateless_random_normal)
+    self.assertIs(stateless.stateless_truncated_normal,
+                  stateless_random_ops.stateless_truncated_normal)
+    self.assertIs(stateless.stateless_multinomial,
+                  stateless_random_ops.stateless_multinomial)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 1a890a7938..d529d3bf02 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1747,7 +1747,6 @@ tf_gen_op_wrapper_private_py(
 tf_gen_op_wrapper_private_py(
     name = "stateless_random_ops_gen",
     visibility = [
-        "//tensorflow/contrib/stateless:__pkg__",
         "//tensorflow/python/data/experimental/ops:__pkg__",
     ],
 )
@@ -2584,6 +2583,19 @@ py_library(
     ],
 )
 
+py_library(
+    name = "stateless_random_ops",
+    srcs = ["ops/stateless_random_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dtypes",
+        ":framework_ops",
+        ":math_ops",
+        ":random_ops",
+        ":stateless_random_ops_gen",
+    ],
+)
+
 py_library(
     name = "rnn",
     srcs = ["ops/rnn.py"],
@@ -2850,6 +2862,7 @@ py_library(
         ":spectral_grad",
         ":state_grad",
         ":state_ops",
+        ":stateless_random_ops",
         ":string_ops",
         ":template",
         ":tensor_array_grad",
diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD
index 3b3a28fc9a..dd81306db0 100644
--- a/tensorflow/python/kernel_tests/random/BUILD
+++ b/tensorflow/python/kernel_tests/random/BUILD
@@ -90,6 +90,21 @@ cuda_py_test(
     ],
 )
 
+cuda_py_test(
+    name = "stateless_random_ops_test",
+    size = "medium",
+    srcs = ["stateless_random_ops_test.py"],
+    additional_deps = [
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:stateless_random_ops",
+    ],
+)
+
 cuda_py_test(
     name = "random_gamma_test",
     size = "medium",
diff --git a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
new file mode 100644
index 0000000000..d57db3c512
--- /dev/null
+++ b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
@@ -0,0 +1,153 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for stateless random ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+import numpy as np
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import stateless_random_ops as stateless
+from tensorflow.python.platform import test
+
+
+def invert_philox(key, value):
+  """Invert the Philox bijection."""
+  key = np.array(key, dtype=np.uint32)
+  value = np.array(value, dtype=np.uint32)
+  step = np.array([0x9E3779B9, 0xBB67AE85], dtype=np.uint32)
+  for n in range(10)[::-1]:
+    key0, key1 = key + n * step
+    v0 = value[3] * 0x991a7cdb & 0xffffffff
+    v2 = value[1] * 0x6d7cae67 & 0xffffffff
+    hi0 = v0 * 0xD2511F53 >> 32
+    hi1 = v2 * 0xCD9E8D57 >> 32
+    v1 = hi1 ^ value[0] ^ key0
+    v3 = hi0 ^ value[2] ^ key1
+    value = v0, v1, v2, v3
+  return np.array(value)
+
+
+class StatelessOpsTest(test.TestCase):
+
+  def _test_match(self, cases):
+    # Stateless ops should be the same as stateful ops on the first call
+    # after seed scrambling.
+    cases = tuple(cases)
+    key = 0x3ec8f720, 0x02461e29
+    for seed in (7, 17), (11, 5), (2, 3):
+      preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64)
+      preseed = preseed[::2] | preseed[1::2] << 32
+      random_seed.set_random_seed(seed[0])
+      with self.test_session(use_gpu=True):
+        for stateless_op, stateful_op in cases:
+          stateful = stateful_op(seed=seed[1])
+          pure = stateless_op(seed=preseed)
+          self.assertAllEqual(stateful.eval(), pure.eval())
+
+  def _test_determinism(self, cases):
+    # Stateless values should be equal iff the seeds are equal (roughly)
+    cases = tuple(cases)
+    with self.test_session(use_gpu=True):
+      for seed_type in [dtypes.int32, dtypes.int64]:
+        seed_t = array_ops.placeholder(seed_type, shape=[2])
+        seeds = [(x, y) for x in range(5) for y in range(5)] * 3
+        for stateless_op, _ in cases:
+          pure = stateless_op(seed=seed_t)
+          values = [
+              (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds
+          ]
+          for s0, v0 in values:
+            for s1, v1 in values:
+              self.assertEqual(s0 == s1, np.all(v0 == v1))
+
+  def _float_cases(self, shape_dtypes=(None,)):
+    float_cases = (
+        # Uniform distribution, with and without range
+        (stateless.stateless_random_uniform, random_ops.random_uniform, {}),
+        (stateless.stateless_random_uniform, random_ops.random_uniform,
+         dict(minval=2.2, maxval=7.1)),
+        # Normal distribution, with and without mean+stddev
+        (stateless.stateless_random_normal, random_ops.random_normal, {}),
+        (stateless.stateless_random_normal, random_ops.random_normal,
+         dict(mean=2, stddev=3)),
+        # Truncated normal distribution, with and without mean+stddev
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}),
+        (stateless.stateless_truncated_normal, random_ops.truncated_normal,
+         dict(mean=3, stddev=4)),
+    )
+    for dtype in dtypes.float16, dtypes.float32, dtypes.float64:
+      for shape_dtype in shape_dtypes:
+        for shape in (), (3,), (2, 5):
+          if shape_dtype is not None:
+            shape = constant_op.constant(shape, dtype=shape_dtype)
+          for stateless_op, stateful_op, kwds in float_cases:
+            kwds = dict(shape=shape, dtype=dtype, **kwds)
+            yield (functools.partial(stateless_op, **kwds),
+                   functools.partial(stateful_op, **kwds))
+
+  def _int_cases(self, shape_dtypes=(None,)):
+    for shape_dtype in shape_dtypes:
+      for shape in (), (3,), (2, 5):
+        if shape_dtype is not None:
+          shape = constant_op.constant(shape, dtype=shape_dtype)
+        for dtype in dtypes.int32, dtypes.int64:
+          kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape)
+          yield (functools.partial(stateless.stateless_random_uniform, **kwds),
+                 functools.partial(random_ops.random_uniform, **kwds))
+
+  def _multinomial_cases(self):
+    num_samples = 10
+    for logits_dtype in np.float16, np.float32, np.float64:
+      for output_dtype in dtypes.int32, dtypes.int64:
+        for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2],
+                                                  [0.25, 0.75]]):
+          kwds = dict(
+              logits=constant_op.constant(logits, dtype=logits_dtype),
+              num_samples=num_samples,
+              output_dtype=output_dtype)
+          yield (functools.partial(stateless.stateless_multinomial, **kwds),
+                 functools.partial(random_ops.multinomial, **kwds))
+
+  def testMatchFloat(self):
+    self._test_match(self._float_cases())
+
+  def testMatchInt(self):
+    self._test_match(self._int_cases())
+
+  def testMatchMultinomial(self):
+    self._test_match(self._multinomial_cases())
+
+  def testDeterminismFloat(self):
+    self._test_determinism(
+        self._float_cases(shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismInt(self):
+    self._test_determinism(
+        self._int_cases(shape_dtypes=(dtypes.int32, dtypes.int64)))
+
+  def testDeterminismMultinomial(self):
+    self._test_determinism(self._multinomial_cases())
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
index d0e5f70025..232eb97c05 100644
--- a/tensorflow/python/ops/standard_ops.py
+++ b/tensorflow/python/ops/standard_ops.py
@@ -91,6 +91,7 @@ from tensorflow.python.ops.state_ops import scatter_nd_sub
 # from tensorflow.python.ops.state_ops import scatter_nd_mul
 # from tensorflow.python.ops.state_ops import scatter_nd_div
 from tensorflow.python.ops.state_ops import scatter_nd_update
+from tensorflow.python.ops.stateless_random_ops import *
 from tensorflow.python.ops.string_ops import *
 from tensorflow.python.ops.template import *
 from tensorflow.python.ops.tensor_array_ops import *
diff --git a/tensorflow/contrib/stateless/python/stateless_ops.py b/tensorflow/python/ops/stateless_random_ops.py
similarity index 97%
rename from tensorflow/contrib/stateless/python/stateless_ops.py
rename to tensorflow/python/ops/stateless_random_ops.py
index 1449825c83..24759cf3af 100644
--- a/tensorflow/contrib/stateless/python/stateless_ops.py
+++ b/tensorflow/python/ops/stateless_random_ops.py
@@ -24,6 +24,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util.tf_export import tf_export
+
 
 ops.NotDifferentiable("StatelessMultinomial")
 ops.NotDifferentiable("StatelessRandomNormal")
@@ -32,6 +34,7 @@ ops.NotDifferentiable("StatelessRandomUniformInt")
 ops.NotDifferentiable("StatelessTruncatedNormal")
 
 
+@tf_export("random.stateless_uniform")
 def stateless_random_uniform(shape,
                              seed,
                              minval=0,
@@ -98,6 +101,7 @@ def stateless_random_uniform(shape,
       return math_ops.add(rnd * (maxval - minval), minval, name=name)
 
 
+@tf_export("random.stateless_normal")
 def stateless_random_normal(shape,
                             seed,
                             mean=0.0,
@@ -134,6 +138,7 @@ def stateless_random_normal(shape,
     return math_ops.add(rnd * stddev, mean, name=name)
 
 
+@tf_export("random.stateless_truncated_normal")
 def stateless_truncated_normal(shape,
                                seed,
                                mean=0.0,
@@ -175,6 +180,7 @@ def stateless_truncated_normal(shape,
     return math_ops.add(rnd * stddev, mean, name=name)
 
 
+@tf_export("random.stateless_multinomial")
 def stateless_multinomial(logits,
                           num_samples,
                           seed,
@@ -193,7 +199,7 @@ def stateless_multinomial(logits,
   ```python
   # samples has shape [1, 5], where each value is either 0 or 1 with equal
   # probability.
-  samples = tf.contrib.stateless.stateless_multinomial(
+  samples = tf.random.stateless_multinomial(
       tf.log([[10., 10.]]), 5, seed=[7, 17])
   ```
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
index a568dd4cd8..160c09798d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
@@ -32,6 +32,22 @@ tf_module {
     name: "shuffle"
     argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
+  member_method {
+    name: "stateless_multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'output_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_normal"
+    argspec: "args=[\'shape\', \'seed\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_truncated_normal"
+    argspec: "args=[\'shape\', \'seed\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_uniform"
+    argspec: "args=[\'shape\', \'seed\', \'minval\', \'maxval\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
   member_method {
     name: "truncated_normal"
     argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
index a568dd4cd8..160c09798d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
@@ -32,6 +32,22 @@ tf_module {
     name: "shuffle"
     argspec: "args=[\'value\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
+  member_method {
+    name: "stateless_multinomial"
+    argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'output_dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_normal"
+    argspec: "args=[\'shape\', \'seed\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_truncated_normal"
+    argspec: "args=[\'shape\', \'seed\', \'mean\', \'stddev\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
+  member_method {
+    name: "stateless_uniform"
+    argspec: "args=[\'shape\', \'seed\', \'minval\', \'maxval\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \"<dtype: \'float32\'>\", \'None\'], "
+  }
   member_method {
     name: "truncated_normal"
     argspec: "args=[\'shape\', \'mean\', \'stddev\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
-- 
GitLab


From 63f4fcd6618b25f6b3a624c5ca8fe12efe84acb3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 13:30:19 -0700
Subject: [PATCH 0995/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 217197730
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 90 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  2 +
 2 files changed, 92 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index d2e937dd2c..ab1efff6ed 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -9175,6 +9175,51 @@ op {
     }
   }
 }
+op {
+  name: "BatchMatMul"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+  attr {
+    name: "adj_x"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "adj_y"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "BatchMatrixBandPart"
   input_arg {
@@ -31198,6 +31243,51 @@ op {
     }
   }
 }
+op {
+  name: "MatMul"
+  input_arg {
+    name: "a"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "product"
+    type_attr: "T"
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "MatchingFiles"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index d034ea27a1..15ed2ac21b 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -3235,6 +3235,7 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
@@ -15795,6 +15796,7 @@ op {
         type: DT_FLOAT
         type: DT_DOUBLE
         type: DT_INT32
+        type: DT_INT64
         type: DT_COMPLEX64
         type: DT_COMPLEX128
       }
-- 
GitLab


From 43b6f6c0ebb032c17ed5382150ba989761e036b7 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Mon, 15 Oct 2018 13:57:27 -0700
Subject: [PATCH 0996/1085] [tf.data] Mechanism for trading determinism for
 performance.

This change:
- adds `experimental_deterministic` to `tf.data.Options()`, which can be used to indicate that the user favors performance over determinism
- adds a fast non-deterministic code path for `tf.data.Dataset.interleave(..., num_parallel_calls=...)
- adds a fast non-deterministic code path for `tf.data.Dataset.map(..., num_parallel_calls=...)
- adds a static rewrite that triggers the fast non-deterministic code path in `interleave` and `map` when `experimental_deterministic == False`

Fixes #18827.

PiperOrigin-RevId: 217202357
---
 .../core/grappler/optimizers/data/BUILD       |  32 +++
 .../optimizers/data/graph_test_utils.cc       |  69 ++++-
 .../optimizers/data/graph_test_utils.h        |  27 +-
 .../grappler/optimizers/data/make_sloppy.cc   |  46 ++++
 .../grappler/optimizers/data/make_sloppy.h    |  46 ++++
 .../optimizers/data/make_sloppy_test.cc       | 114 ++++++++
 .../kernels/data/map_and_batch_dataset_op.cc  |   3 -
 .../core/kernels/data/map_dataset_op.cc       |  27 +-
 .../data/parallel_interleave_dataset_op.cc    | 135 ++++++---
 .../kernels/data/parallel_map_dataset_op.cc   |  30 +-
 .../kernels/data/parallel_map_iterator.cc     | 111 +++++---
 .../core/kernels/data/parallel_map_iterator.h |   7 +-
 .../kernels/data/parse_example_dataset_op.cc  |  27 +-
 tensorflow/core/ops/dataset_ops.cc            |   3 +
 tensorflow/python/data/kernel_tests/BUILD     |   1 -
 .../interleave_dataset_op_test.py             | 259 +++++++++++++-----
 .../data/kernel_tests/map_dataset_op_test.py  |  78 ++++++
 tensorflow/python/data/ops/dataset_ops.py     |  20 +-
 .../golden/v1/tensorflow.data.-options.pbtxt  |   4 +
 .../golden/v2/tensorflow.data.-options.pbtxt  |   4 +
 20 files changed, 844 insertions(+), 199 deletions(-)
 create mode 100644 tensorflow/core/grappler/optimizers/data/make_sloppy.cc
 create mode 100644 tensorflow/core/grappler/optimizers/data/make_sloppy.h
 create mode 100644 tensorflow/core/grappler/optimizers/data/make_sloppy_test.cc

diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD
index 1a648da5da..a2eac4fd4b 100644
--- a/tensorflow/core/grappler/optimizers/data/BUILD
+++ b/tensorflow/core/grappler/optimizers/data/BUILD
@@ -226,6 +226,37 @@ cc_library(
     ] + tf_protos_all(),
 )
 
+cc_library(
+    name = "make_sloppy",
+    srcs = ["make_sloppy.cc"],
+    hdrs = ["make_sloppy.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/core/grappler:grappler_item",
+        "//tensorflow/core/grappler:mutable_graph_view",
+        "//tensorflow/core/grappler:op_types",
+        "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer",
+        "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry",
+    ] + tf_protos_all(),
+)
+
+tf_cc_test(
+    name = "make_sloppy_test",
+    srcs = ["make_sloppy_test.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":graph_test_utils",
+        ":graph_utils",
+        ":make_sloppy",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/grappler:grappler_item",
+    ],
+)
+
 cc_library(
     name = "map_vectorization",
     srcs = ["map_vectorization.cc"],
@@ -523,6 +554,7 @@ cc_library(
         ":filter_fusion",
         ":hoist_random_uniform",
         ":latency_all_edges",
+        ":make_sloppy",
         ":map_and_batch_fusion",
         ":map_and_batch_numa_aware_replacement",
         ":map_and_filter_fusion",
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
index 1f03c6515c..affaeafb0f 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc
@@ -25,16 +25,6 @@ namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name) {
-  return test::function::NDef(
-      name, "MapDataset", {string(input_node_name)},
-      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
-       {"Targuments", {}},
-       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
-       {"output_types", gtl::ArraySlice<DataType>{}}});
-}
-
 NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
                        StringPiece function_name) {
   return test::function::NDef(
@@ -54,12 +44,69 @@ NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
       name, "MapAndBatchDatasetV2",
       {string(input_node_name), "", string(batch_size_node_name),
        string(num_parallel_calls_node_name), string(drop_remainder_node_name)},
-      {{"predicate", FunctionDefHelper::FunctionRef(string(function_name))},
+      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
        {"Targuments", {}},
        {"output_shapes", gtl::ArraySlice<TensorShape>{}},
        {"output_types", gtl::ArraySlice<TensorShape>{}}});
 }
 
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name) {
+  return test::function::NDef(
+      name, "MapDataset", {string(input_node_name)},
+      {{"f", FunctionDefHelper::FunctionRef(string(function_name))},
+       {"Targuments", {}},
+       {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+       {"output_types", gtl::ArraySlice<DataType>{}}});
+}
+
+NodeDef MakeParallelInterleaveNode(StringPiece name,
+                                   StringPiece input_node_name,
+                                   StringPiece cycle_length_node_name,
+                                   StringPiece block_length_node_name,
+                                   StringPiece num_parallel_calls_node_name,
+                                   StringPiece function_name, bool sloppy) {
+  return test::function::NDef(
+      name, "ParallelInterleaveDatasetV2",
+      {string(input_node_name), "", string(cycle_length_node_name),
+       string(block_length_node_name), string(num_parallel_calls_node_name)},
+      {
+          {"f", FunctionDefHelper::FunctionRef(string(function_name))},
+          {"Targuments", {}},
+          {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+          {"output_types", gtl::ArraySlice<TensorShape>{}},
+          {"sloppy", sloppy},
+      });
+}
+
+NodeDef MakeParallelMapNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece function_name, bool sloppy) {
+  return test::function::NDef(
+      name, "ParallelMapDataset",
+      {string(input_node_name), string(num_parallel_calls_node_name)},
+      {
+          {"f", FunctionDefHelper::FunctionRef(string(function_name))},
+          {"Targuments", {}},
+          {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+          {"output_types", gtl::ArraySlice<DataType>{}},
+          {"sloppy", sloppy},
+      });
+}
+
+NodeDef MakeParseExampleNode(StringPiece name, StringPiece input_node_name,
+                             StringPiece num_parallel_calls_node_name,
+                             bool sloppy) {
+  return test::function::NDef(
+      name, "ParseExampleDataset",
+      {string(input_node_name), string(num_parallel_calls_node_name)},
+      {
+          {"output_shapes", gtl::ArraySlice<TensorShape>{}},
+          {"output_types", gtl::ArraySlice<DataType>{}},
+          {"sloppy", sloppy},
+      });
+}
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
index f7891d5e1f..a2707ee7b7 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
+++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h
@@ -23,18 +23,39 @@ namespace tensorflow {
 namespace grappler {
 namespace graph_tests_utils {
 
-NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
-                    StringPiece function_name = "XTimesTwo");
-
+// Creates a test NodeDef for FilterDataset.
 NodeDef MakeFilterNode(StringPiece name, StringPiece input_node_name,
                        StringPiece function_name = "IsZero");
 
+// Creates a test NodeDef for MapDataset.
+NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name,
+                    StringPiece function_name = "XTimesTwo");
+
+// Creates a test NodeDef for MapAndBatchDataset.
 NodeDef MakeMapAndBatchNode(StringPiece name, StringPiece input_node_name,
                             StringPiece batch_size_node_name,
                             StringPiece num_parallel_calls_node_name,
                             StringPiece drop_remainder_node_name,
                             StringPiece function_name = "XTimesTwo");
 
+// Creates a test NodeDef for ParallelInterleaveDataset.
+NodeDef MakeParallelInterleaveNode(StringPiece name,
+                                   StringPiece input_node_name,
+                                   StringPiece cycle_length_node_name,
+                                   StringPiece block_length_node_name,
+                                   StringPiece num_parallel_calls_node_name,
+                                   StringPiece function_name, bool sloppy);
+
+// Creates a test NodeDef for ParallelMapDataset.
+NodeDef MakeParallelMapNode(StringPiece name, StringPiece input_node_name,
+                            StringPiece num_parallel_calls_node_name,
+                            StringPiece function_name, bool sloppy);
+
+// Creates a test NodeDef for ParseExampleDataset.
+NodeDef MakeParseExampleNode(StringPiece name, StringPiece input_node_name,
+                             StringPiece num_parallel_calls_node_name,
+                             bool sloppy);
+
 }  // end namespace graph_tests_utils
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/make_sloppy.cc b/tensorflow/core/grappler/optimizers/data/make_sloppy.cc
new file mode 100644
index 0000000000..1cfaef3ffb
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/make_sloppy.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/make_sloppy.h"
+
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/grappler/clusters/cluster.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/mutable_graph_view.h"
+#include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+
+namespace tensorflow {
+namespace grappler {
+
+Status MakeSloppy::Optimize(Cluster* cluster, const GrapplerItem& item,
+                            GraphDef* output) {
+  *output = item.graph;
+  MutableGraphView graph(output);
+
+  for (NodeDef& node : *output->mutable_node()) {
+    if (node.op() == "ParallelInterleaveDatasetV2" ||
+        node.op() == "ParallelMapDataset" ||
+        node.op() == "ParseExampleDataset") {
+      (*node.mutable_attr())["sloppy"].set_b(true);
+    }
+  }
+  return Status::OK();
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(MakeSloppy, "make_sloppy");
+
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/data/make_sloppy.h b/tensorflow/core/grappler/optimizers/data/make_sloppy.h
new file mode 100644
index 0000000000..9dcab1038d
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/make_sloppy.h
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAKE_SLOPPY_H_
+#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAKE_SLOPPY_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+namespace tensorflow {
+namespace grappler {
+
+class MakeSloppy : public CustomGraphOptimizer {
+ public:
+  MakeSloppy() = default;
+  ~MakeSloppy() override = default;
+
+  string name() const override { return "make_sloppy"; }
+
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* output) override;
+
+  void Feedback(Cluster* cluster, const GrapplerItem& item,
+                const GraphDef& optimize_output, double result) override {}
+};
+
+}  // namespace grappler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAKE_SLOPPY_H_
diff --git a/tensorflow/core/grappler/optimizers/data/make_sloppy_test.cc b/tensorflow/core/grappler/optimizers/data/make_sloppy_test.cc
new file mode 100644
index 0000000000..24431f4744
--- /dev/null
+++ b/tensorflow/core/grappler/optimizers/data/make_sloppy_test.cc
@@ -0,0 +1,114 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/grappler/optimizers/data/make_sloppy.h"
+
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h"
+#include "tensorflow/core/grappler/optimizers/data/graph_utils.h"
+
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace grappler {
+namespace {
+
+using graph_tests_utils::MakeParallelInterleaveNode;
+using graph_tests_utils::MakeParallelMapNode;
+using graph_tests_utils::MakeParseExampleNode;
+
+TEST(MakeSloppy, ParallelInterleave) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+       NDef("cycle_length", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("block_length", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("num_parallel_calls", "Const", {},
+            {{"value", 1}, {"dtype", DT_INT32}}),
+       MakeParallelInterleaveNode("interleave", "range", "cycle_length",
+                                  "block_length", "num_parallel_calls",
+                                  "XTimesTwo", /*sloppy=*/false)},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MakeSloppy optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("interleave", output));
+  int index = graph_utils::FindGraphNodeWithName("interleave", output);
+  EXPECT_TRUE(output.node(index).attr().at("sloppy").b());
+}
+
+TEST(MakeSloppy, ParallelMap) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+       NDef("num_parallel_calls", "Const", {},
+            {{"value", 1}, {"dtype", DT_INT32}}),
+       MakeParallelMapNode("map", "range", "num_parallel_calls", "XTimesTwo",
+                           /*sloppy=*/false)},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  MakeSloppy optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("map", output));
+  int index = graph_utils::FindGraphNodeWithName("map", output);
+  EXPECT_TRUE(output.node(index).attr().at("sloppy").b());
+}
+
+TEST(MakeSloppy, ParseExampleDataset) {
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}),
+       NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}),
+       NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}),
+       NDef("range", "RangeDataset", {"start", "stop", "step"}, {}),
+       NDef("num_parallel_calls", "Const", {},
+            {{"value", 1}, {"dtype", DT_INT32}}),
+       MakeParseExampleNode("parse_example", "range", "num_parallel_calls",
+                            /*sloppy=*/false)},
+      // FunctionLib
+      {});
+
+  MakeSloppy optimizer;
+  GraphDef output;
+  TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output));
+  EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("parse_example", output));
+  int index = graph_utils::FindGraphNodeWithName("parse_example", output);
+  EXPECT_TRUE(output.node(index).attr().at("sloppy").b());
+}
+
+}  // namespace
+}  // namespace grappler
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index bae56828dc..7bec2cd498 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -38,9 +38,6 @@ namespace {
 
 // See documentation in ../ops/dataset_ops.cc for a high-level
 // description of the following op.
-
-// TODO(b/116852688): Make coordination between the performance model and this
-// transformation more robust.
 class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
  public:
   using MapAndBatchIteratorFunction =
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index 6b6ffabf4f..a0ebe71208 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -83,7 +83,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
     }
 
     *output = new Dataset(ctx, input, func_, std::move(captured_func),
-                          output_types_, output_shapes_, std::move(map_func));
+                          output_types_, output_shapes_,
+                          use_inter_op_parallelism_, std::move(map_func));
   }
 
  private:
@@ -94,10 +95,11 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
             std::unique_ptr<CapturedFunction> captured_func,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            MapIteratorFunction map_func)
+            bool use_inter_op_parallelism, MapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           func_(func),
+          use_inter_op_parallelism_(use_inter_op_parallelism),
           captured_func_(std::move(captured_func)),
           output_types_(output_types),
           output_shapes_(output_shapes),
@@ -140,16 +142,28 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
-      AttrValue f;
-      b->BuildAttrValue(func_, &f);
+
+      // Attr: f
+      TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
+      AttrValue f_attr;
+      b->BuildAttrValue(func_, &f_attr);
+
+      // Attr: Targuments
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
+      // Attr: use_inter_op_parallelism
+      AttrValue use_inter_op_parallelism_attr;
+      b->BuildAttrValue(use_inter_op_parallelism_,
+                        &use_inter_op_parallelism_attr);
+
       TF_RETURN_IF_ERROR(b->AddDataset(
           this, {std::make_pair(0, input_graph_node)},  // Single tensor inputs.
           {std::make_pair(1, other_arguments)},         // Tensor list inputs.
-          {std::make_pair("f", f),
-           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          {std::make_pair("f", f_attr),
+           std::make_pair("Targuments", other_arguments_types_attr),
+           std::make_pair("use_inter_op_parallelism",
+                          use_inter_op_parallelism_attr)},  // Attrs
           output));
       return Status::OK();
     }
@@ -210,6 +224,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
 
     const DatasetBase* const input_;
     const NameAttrList func_;
+    const bool use_inter_op_parallelism_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 6b6b3d6ab9..d63a98cbd7 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -1084,9 +1085,6 @@ REGISTER_KERNEL_BUILDER(Name("ParallelInterleaveDataset").Device(DEVICE_CPU),
 // The above design choices were made with automated optimizations in mind,
 // isolating the degree of parallelism as the single tunable knob of this
 // implementation.
-//
-// TODO(b/116852688): Make coordination between the performance model and this
-// transformation more robust.
 class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
  public:
   explicit ParallelInterleaveDatasetV2Op(OpKernelConstruction* ctx)
@@ -1094,6 +1092,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &interleave_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("sloppy", &sloppy_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
@@ -1126,9 +1125,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         ctx, CapturedFunction::Create(interleave_func_, ctx, "other_arguments",
                                       &captured_func));
 
-    *output = new Dataset(ctx, input, interleave_func_,
-                          std::move(captured_func), cycle_length, block_length,
-                          num_parallel_calls, output_types_, output_shapes_);
+    *output =
+        new Dataset(ctx, input, interleave_func_, std::move(captured_func),
+                    cycle_length, block_length, num_parallel_calls, sloppy_,
+                    output_types_, output_shapes_);
   }
 
  private:
@@ -1137,7 +1137,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
     Dataset(OpKernelContext* ctx, const DatasetBase* input,
             const NameAttrList& func,
             std::unique_ptr<CapturedFunction> captured_func, int64 cycle_length,
-            int64 block_length, int64 num_parallel_calls,
+            int64 block_length, int64 num_parallel_calls, bool sloppy,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes)
         : DatasetBase(DatasetContext(ctx)),
@@ -1147,6 +1147,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           cycle_length_(cycle_length),
           block_length_(block_length),
           num_parallel_calls_(num_parallel_calls),
+          sloppy_(sloppy),
           output_types_(output_types),
           output_shapes_(output_shapes) {
       input_->Ref();
@@ -1156,8 +1157,14 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(new Iterator(
-          {this, strings::StrCat(prefix, "::ParallelInterleaveV2")}));
+      if (sloppy_) {
+        return MakeUnique<SloppyParallelInterleave>(
+            ParallelInterleaveIteratorBase::Params{
+                this, strings::StrCat(prefix, "::ParallelInterleaveV2")});
+      }
+      return MakeUnique<DeterministicParallelInterleave>(
+          ParallelInterleaveIteratorBase::Params{
+              this, strings::StrCat(prefix, "::ParallelInterleaveV2")});
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -1200,22 +1207,27 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       b->BuildAttrValue(interleave_func_, &f);
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
+      AttrValue sloppy_attr;
+      b->BuildAttrValue(sloppy_, &sloppy_attr);
 
-      TF_RETURN_IF_ERROR(b->AddDataset(
-          this,
-          {{0, input_node},
-           {2, cycle_length_node},
-           {3, block_length_node},
-           {4, num_parallel_calls_node}},
-          {{1, other_arguments}},
-          {{"f", f}, {"Targuments", other_arguments_types_attr}}, output));
+      TF_RETURN_IF_ERROR(
+          b->AddDataset(this,
+                        {{0, input_node},
+                         {2, cycle_length_node},
+                         {3, block_length_node},
+                         {4, num_parallel_calls_node}},
+                        {{1, other_arguments}},
+                        {{"f", f},
+                         {"Targuments", other_arguments_types_attr},
+                         {"sloppy", sloppy_attr}},
+                        output));
       return Status::OK();
     }
 
    private:
-    class Iterator : public DatasetIterator<Dataset> {
+    class ParallelInterleaveIteratorBase : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params& params)
+      explicit ParallelInterleaveIteratorBase(const Params& params)
           : DatasetIterator<Dataset>(params),
             mu_(std::make_shared<mutex>()),
             cond_var_(std::make_shared<condition_variable>()),
@@ -1229,7 +1241,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                 dataset()->cycle_length_ /* num_threads */,
                 false /* low_latency_hint */)) {}
 
-      ~Iterator() override {
+      ~ParallelInterleaveIteratorBase() override {
         mutex_lock l(*mu_);
         // Cancel the runner thread.
         cancelled_ = true;
@@ -1260,23 +1272,19 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
                              bool* end_of_sequence) override {
         std::shared_ptr<InvocationResult> result;
         do {
+          result.reset();
           {
             mutex_lock l(*mu_);
             EnsureRunnerThreadStarted(ctx);
-            while (invocation_results_.empty() &&
-                   (!end_of_input_ || num_open_ > 0)) {
+            while (ShouldWait(&result)) {
               RecordStop(ctx);
               cond_var_->wait(l);
               RecordStart(ctx);
             }
-            if (!invocation_results_.empty()) {
-              std::swap(result, invocation_results_.front());
-              invocation_results_.pop_front();
-            } else {
+            if (!result) {
               *end_of_sequence = true;
               return Status::OK();
             }
-            cond_var_->notify_all();
           }
           RecordStop(ctx);
           result->notification.WaitForNotification();
@@ -1291,6 +1299,18 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       }
 
      protected:
+      struct InvocationResult {
+        Notification notification;  // used for coordination with the consumer
+        Status status;              // the invocation status
+        std::vector<Tensor> return_values;  // the invocation result values
+        bool skip;  // if set the result should be skipped
+      };
+
+      // Used by the consumer to determine whether it needs to wait. Upon
+      // returning false, `result` will either be NULL if end of input has been
+      // reached or point to a result to consume.
+      virtual bool ShouldWait(std::shared_ptr<InvocationResult>* result) = 0;
+
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(*mu_);
         // Wait for all in-flight calls to complete.
@@ -1377,14 +1397,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
-     private:
-      struct InvocationResult {
-        Notification notification;  // used for coordination with the consumer
-        Status status;              // the invocation status
-        std::vector<Tensor> return_values;  // the invocation result values
-        bool skip;  // if set the result should be skipped
-      };
-
       void EnsureRunnerThreadStarted(IteratorContext* ctx)
           EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
         if (!runner_thread_) {
@@ -1415,7 +1427,11 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
           if (end_of_input) {
             result->skip = true;
           }
-          result->notification.Notify();
+          {
+            mutex_lock l(*mu_);
+            result->notification.Notify();
+            cond_var_->notify_all();
+          }
           if (!result->status.ok()) {
             break;
           }
@@ -1505,9 +1521,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
               }
               num_calls_++;
               element_in_use_[cycle_index_] = true;
-              thread_pool_->Schedule(std::bind(&Iterator::FetchOutputs, this,
-                                               ctx, cycle_index_,
-                                               std::move(results)));
+              thread_pool_->Schedule(
+                  std::bind(&ParallelInterleaveIteratorBase::FetchOutputs, this,
+                            ctx, cycle_index_, std::move(results)));
             }
             cycle_index_ = (cycle_index_ + 1) % dataset()->cycle_length_;
           }
@@ -1651,16 +1667,57 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel {
       bool cancelled_ GUARDED_BY(*mu_) = false;
     };
 
+    class DeterministicParallelInterleave
+        : public ParallelInterleaveIteratorBase {
+     public:
+      using ParallelInterleaveIteratorBase::ParallelInterleaveIteratorBase;
+
+     protected:
+      bool ShouldWait(std::shared_ptr<InvocationResult>* result) override
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+        if (!invocation_results_.empty()) {
+          std::swap(*result, invocation_results_.front());
+          invocation_results_.pop_front();
+          cond_var_->notify_all();
+          return false;
+        }
+        return (!end_of_input_ || num_open_ > 0);
+      }
+    };
+
+    class SloppyParallelInterleave : public ParallelInterleaveIteratorBase {
+     public:
+      using ParallelInterleaveIteratorBase::ParallelInterleaveIteratorBase;
+
+     protected:
+      bool ShouldWait(std::shared_ptr<InvocationResult>* result) override
+          EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+        for (auto it = invocation_results_.begin();
+             it != invocation_results_.end(); ++it) {
+          if ((*it)->notification.HasBeenNotified()) {
+            std::swap(*result, *it);
+            invocation_results_.erase(it);
+            cond_var_->notify_all();
+            return false;
+          }
+        }
+        return !invocation_results_.empty() ||
+               (!end_of_input_ || num_open_ > 0);
+      }
+    };
+
     const DatasetBase* const input_;
     const NameAttrList interleave_func_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const int64 cycle_length_;
     const int64 block_length_;
     const int64 num_parallel_calls_;
+    const bool sloppy_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
   };
 
+  bool sloppy_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
   NameAttrList interleave_func_;
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 3a14924fba..25ae790181 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -40,6 +40,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("use_inter_op_parallelism",
                                      &use_inter_op_parallelism_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("sloppy", &sloppy_));
   }
 
  protected:
@@ -104,7 +105,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     }
 
     *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_,
-                          output_shapes_, use_inter_op_parallelism_,
+                          output_shapes_, use_inter_op_parallelism_, sloppy_,
                           std::move(captured_func), std::move(map_func));
   }
 
@@ -115,7 +116,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
             const NameAttrList& func, int32 num_parallel_calls,
             const DataTypeVector& output_types,
             const std::vector<PartialTensorShape>& output_shapes,
-            bool use_inter_op_parallelism,
+            bool use_inter_op_parallelism, bool sloppy,
             std::unique_ptr<CapturedFunction> captured_func,
             ParallelMapIteratorFunction map_func)
         : DatasetBase(DatasetContext(ctx)),
@@ -125,6 +126,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           use_inter_op_parallelism_(use_inter_op_parallelism),
+          sloppy_(sloppy),
           captured_func_(std::move(captured_func)),
           map_func_(std::move(map_func)) {
       input_->Ref();
@@ -140,7 +142,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
 
       return NewParallelMapIterator(
           {this, strings::StrCat(prefix, "::ParallelMap")}, input_,
-          std::move(init_func), map_func_, num_parallel_calls_);
+          std::move(init_func), map_func_, num_parallel_calls_, sloppy_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -182,20 +184,32 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
 
       // Attr: f
       TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name()));
-      AttrValue f;
-      b->BuildAttrValue(func_, &f);
+      AttrValue f_attr;
+      b->BuildAttrValue(func_, &f_attr);
 
       // Attr: Targuments
       AttrValue other_arguments_types_attr;
       b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr);
 
+      // Attr: use_inter_op_parallelism
+      AttrValue use_inter_op_parallelism_attr;
+      b->BuildAttrValue(use_inter_op_parallelism_,
+                        &use_inter_op_parallelism_attr);
+
+      // Attr: sloppy
+      AttrValue sloppy_attr;
+      b->BuildAttrValue(sloppy_, &sloppy_attr);
+
       TF_RETURN_IF_ERROR(b->AddDataset(
           this,
           {std::make_pair(0, input_graph_node),
            std::make_pair(2, num_parallel_calls)},  // Single tensor inputs.
           {std::make_pair(1, other_arguments)},     // Tensor list inputs.
-          {std::make_pair("f", f),
-           std::make_pair("Targuments", other_arguments_types_attr)},  // Attrs
+          {std::make_pair("f", f_attr),
+           std::make_pair("Targuments", other_arguments_types_attr),
+           std::make_pair("use_inter_op_parallelism",
+                          use_inter_op_parallelism_attr),
+           std::make_pair("sloppy", sloppy_attr)},  // Attrs
           output));
       return Status::OK();
     }
@@ -207,6 +221,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
     const bool use_inter_op_parallelism_;
+    const bool sloppy_;
     const std::unique_ptr<CapturedFunction> captured_func_;
     const ParallelMapIteratorFunction map_func_;
   };
@@ -214,6 +229,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
   bool use_inter_op_parallelism_;
+  bool sloppy_;
   NameAttrList func_;
 };
 
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index ebf41925c9..39809f5e9a 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <atomic>
 #include <deque>
 #include <functional>
+#include <memory>
 #include <utility>
 #include <vector>
 
@@ -28,11 +29,9 @@ namespace tensorflow {
 namespace data {
 namespace {
 
-// TODO(b/116852688): Make coordination between the performance model and this
-// transformation more robust.
-class ParallelMapIterator : public DatasetBaseIterator {
+class ParallelMapIteratorBase : public DatasetBaseIterator {
  public:
-  explicit ParallelMapIterator(
+  ParallelMapIteratorBase(
       const typename DatasetBaseIterator::BaseParams& params,
       const DatasetBase* input_dataset,
       std::function<Status(IteratorContext*)> init_func,
@@ -46,7 +45,7 @@ class ParallelMapIterator : public DatasetBaseIterator {
         num_parallel_calls_(std::make_shared<model::SharedState>(
             num_parallel_calls, mu_, cond_var_)) {}
 
-  ~ParallelMapIterator() override {
+  ~ParallelMapIteratorBase() override {
     mutex_lock l(*mu_);
     // Cancel the runner thread.
     cancelled_ = true;
@@ -82,14 +81,11 @@ class ParallelMapIterator : public DatasetBaseIterator {
     {
       mutex_lock l(*mu_);
       EnsureRunnerThreadStarted(ctx);
-      while (invocation_results_.empty()) {
+      while (ShouldWait(&result)) {
         RecordStop(ctx);
         cond_var_->wait(l);
         RecordStart(ctx);
       }
-      std::swap(result, invocation_results_.front());
-      invocation_results_.pop_front();
-      cond_var_->notify_all();
     }
     RecordStop(ctx);
     result->notification.WaitForNotification();
@@ -98,6 +94,17 @@ class ParallelMapIterator : public DatasetBaseIterator {
   }
 
  protected:
+  struct InvocationResult {
+    Notification notification;
+    Status status;
+    std::vector<Tensor> return_values;
+    bool end_of_input;
+  };
+
+  // Used by the consumer to determine whether it needs to wait. Upon returning
+  // false, `result` will point to a result to consume.
+  virtual bool ShouldWait(std::shared_ptr<InvocationResult>* result) = 0;
+
   Status SaveInternal(IteratorStateWriter* writer) override {
     mutex_lock l(*mu_);
     // Wait for all in-flight calls to complete.
@@ -169,32 +176,22 @@ class ParallelMapIterator : public DatasetBaseIterator {
     return Status::OK();
   }
 
- private:
-  struct InvocationResult {
-    Notification notification;
-    Status status;
-    std::vector<Tensor> return_values;
-    bool end_of_input;
-  };
-
   void EnsureRunnerThreadStarted(IteratorContext* ctx)
       EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
     if (!runner_thread_) {
       auto ctx_copy = std::make_shared<IteratorContext>(*ctx);
       runner_thread_.reset(ctx->env()->StartThread(
           {}, "runner_thread",
-          std::bind(&ParallelMapIterator::RunnerThread, this, ctx_copy)));
+          std::bind(&ParallelMapIteratorBase::RunnerThread, this, ctx_copy)));
     }
   }
 
   void CallCompleted(const std::shared_ptr<InvocationResult>& result)
       LOCKS_EXCLUDED(*mu_) {
-    {
-      mutex_lock l(*mu_);
-      num_calls_--;
-      cond_var_->notify_all();
-    }
+    mutex_lock l(*mu_);
+    num_calls_--;
     result->notification.Notify();
+    cond_var_->notify_all();
   }
 
   void CallFunction(const std::shared_ptr<IteratorContext>& ctx,
@@ -335,22 +332,70 @@ class ParallelMapIterator : public DatasetBaseIterator {
   bool cancelled_ GUARDED_BY(*mu_) = false;
 };
 
-}  // namespace
+class DeterministicParallelMapIterator : public ParallelMapIteratorBase {
+ public:
+  DeterministicParallelMapIterator(
+      const typename DatasetBaseIterator::BaseParams& params,
+      const DatasetBase* input_dataset,
+      std::function<Status(IteratorContext*)> init_func,
+      ParallelMapIteratorFunction map_func, int32 num_parallel_calls)
+      : ParallelMapIteratorBase(params, input_dataset, init_func, map_func,
+                                num_parallel_calls) {}
 
-std::unique_ptr<IteratorBase> NewParallelMapIterator(
-    const DatasetBaseIterator::BaseParams& params,
-    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
-    int32 num_parallel_calls) {
-  return NewParallelMapIterator(params, input_dataset, nullptr,
-                                std::move(map_func), num_parallel_calls);
-}
+ protected:
+  bool ShouldWait(std::shared_ptr<InvocationResult>* result) override
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+    if (!invocation_results_.empty()) {
+      std::swap(*result, invocation_results_.front());
+      invocation_results_.pop_front();
+      cond_var_->notify_all();
+      return false;
+    }
+    return true;
+  }
+};
+
+class SloppyParallelMapIterator : public ParallelMapIteratorBase {
+ public:
+  SloppyParallelMapIterator(
+      const typename DatasetBaseIterator::BaseParams& params,
+      const DatasetBase* input_dataset,
+      std::function<Status(IteratorContext*)> init_func,
+      ParallelMapIteratorFunction map_func, int32 num_parallel_calls)
+      : ParallelMapIteratorBase(params, input_dataset, init_func, map_func,
+                                num_parallel_calls) {}
+
+ protected:
+  bool ShouldWait(std::shared_ptr<InvocationResult>* result) override
+      EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
+    for (auto it = invocation_results_.begin(); it != invocation_results_.end();
+         ++it) {
+      if ((*it)->notification.HasBeenNotified() &&
+          (it == invocation_results_.begin() || !(*it)->end_of_input)) {
+        std::swap(*result, *it);
+        invocation_results_.erase(it);
+        cond_var_->notify_all();
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+}  // namespace
 
 std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
-    ParallelMapIteratorFunction map_func, int32 num_parallel_calls) {
-  return MakeUnique<ParallelMapIterator>(
+    ParallelMapIteratorFunction map_func, int32 num_parallel_calls,
+    bool sloppy) {
+  if (sloppy) {
+    return MakeUnique<SloppyParallelMapIterator>(
+        params, input_dataset, std::move(init_func), std::move(map_func),
+        num_parallel_calls);
+  }
+  return MakeUnique<DeterministicParallelMapIterator>(
       params, input_dataset, std::move(init_func), std::move(map_func),
       num_parallel_calls);
 }
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h
index 813f13c9e4..d715b9a497 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.h
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.h
@@ -42,11 +42,8 @@ std::unique_ptr<IteratorBase> NewParallelMapIterator(
     const DatasetBaseIterator::BaseParams& params,
     const DatasetBase* input_dataset,
     std::function<Status(IteratorContext*)> init_func,
-    ParallelMapIteratorFunction map_func, int32 num_parallel_calls);
-std::unique_ptr<IteratorBase> NewParallelMapIterator(
-    const DatasetBaseIterator::BaseParams& params,
-    const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func,
-    int32 num_parallel_calls);
+    ParallelMapIteratorFunction map_func, int32 num_parallel_calls,
+    bool sloppy);
 
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
index 7de5ea8860..e16857a92c 100644
--- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc
@@ -25,7 +25,6 @@ namespace {
 
 // See documentation in ../ops/dataset_ops.cc for a high-level
 // description of the following op.
-
 class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
  public:
   explicit ParseExampleDatasetOp(OpKernelConstruction* ctx)
@@ -38,6 +37,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("dense_shapes", &dense_shapes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("sloppy", &sloppy_));
     for (int i = 0; i < dense_shapes_.size(); ++i) {
       bool shape_ok = true;
       if (dense_shapes_[i].dims() == -1) {
@@ -142,11 +142,11 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
       it->second = i++;
     }
 
-    *output = new Dataset(ctx, input, std::move(dense_defaults),
-                          std::move(sparse_keys_), std::move(dense_keys_),
-                          std::move(key_to_output_index), std::move(config),
-                          num_parallel_calls, sparse_types_, dense_types_,
-                          dense_shapes_, output_types_, output_shapes_);
+    *output = new Dataset(
+        ctx, input, std::move(dense_defaults), std::move(sparse_keys_),
+        std::move(dense_keys_), std::move(key_to_output_index),
+        std::move(config), num_parallel_calls, sparse_types_, dense_types_,
+        dense_shapes_, output_types_, output_shapes_, sloppy_);
   }
 
  private:
@@ -161,7 +161,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
             const DataTypeVector& dense_types,
             const std::vector<PartialTensorShape>& dense_shapes,
             const DataTypeVector& output_types,
-            const std::vector<PartialTensorShape>& output_shapes)
+            const std::vector<PartialTensorShape>& output_shapes, bool sloppy)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
           dense_defaults_(std::move(dense_defaults)),
@@ -174,7 +174,8 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
           dense_types_(dense_types),
           dense_shapes_(dense_shapes),
           output_types_(output_types),
-          output_shapes_(output_shapes) {
+          output_shapes_(output_shapes),
+          sloppy_(sloppy) {
       input_->Ref();
     }
 
@@ -272,7 +273,8 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
 
       return NewParallelMapIterator(
           {this, strings::StrCat(prefix, "::ParseExample")}, input_,
-          std::move(map_fn), num_parallel_calls_);
+          /*init_func=*/nullptr, std::move(map_fn), num_parallel_calls_,
+          sloppy_);
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -312,12 +314,14 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
       AttrValue sparse_types_attr;
       AttrValue dense_attr;
       AttrValue dense_shapes_attr;
+      AttrValue sloppy_attr;
 
       b->BuildAttrValue(sparse_keys_, &sparse_keys_attr);
       b->BuildAttrValue(dense_keys_, &dense_keys_attr);
       b->BuildAttrValue(sparse_types_, &sparse_types_attr);
       b->BuildAttrValue(dense_types_, &dense_attr);
       b->BuildAttrValue(dense_shapes_, &dense_shapes_attr);
+      b->BuildAttrValue(sloppy_, &sloppy_attr);
 
       TF_RETURN_IF_ERROR(b->AddDataset(this,
                                        {
@@ -329,7 +333,8 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
                                         {"dense_keys", dense_keys_attr},
                                         {"sparse_types", sparse_types_attr},
                                         {"Tdense", dense_attr},
-                                        {"dense_shapes", dense_shapes_attr}},
+                                        {"dense_shapes", dense_shapes_attr},
+                                        {"sloppy", sloppy_attr}},
                                        output));
       return Status::OK();
     }
@@ -347,11 +352,13 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel {
     const std::vector<PartialTensorShape> dense_shapes_;
     const DataTypeVector output_types_;
     const std::vector<PartialTensorShape> output_shapes_;
+    const bool sloppy_;
   };
 
   const int graph_def_version_;
   DataTypeVector output_types_;
   std::vector<PartialTensorShape> output_shapes_;
+  bool sloppy_;
   std::vector<string> sparse_keys_;
   std::vector<string> dense_keys_;
   DataTypeVector sparse_types_;
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index ec22eee874..686458a598 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -180,6 +180,7 @@ REGISTER_OP("ParseExampleDataset")
     .Attr("output_shapes: list(shape) >= 1")  // Output components will be
                                               // sorted by key (dense_keys and
                                               // sparse_keys combined) here.
+    .Attr("sloppy: bool = false")
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("SetStatsAggregatorDataset")
@@ -213,6 +214,7 @@ REGISTER_OP("ParallelMapDataset")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("use_inter_op_parallelism: bool = true")
+    .Attr("sloppy: bool = false")
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("MapAndBatchDataset")
@@ -340,6 +342,7 @@ REGISTER_OP("ParallelInterleaveDatasetV2")
     .Attr("Targuments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
+    .Attr("sloppy: bool = false")
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("GroupByReducerDataset")
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 671b7ca1bb..9a565053f8 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -8,7 +8,6 @@ licenses(["notice"])  # Apache 2.0
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-load("//tensorflow:tensorflow.bzl", "sycl_py_test")
 
 tf_py_test(
     name = "batch_dataset_op_test",
diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
index 92bb67b6ff..b911c249ce 100644
--- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py
@@ -17,87 +17,151 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import itertools
+import threading
 
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+def _interleave(lists, cycle_length, block_length):
+  """Reference implementation of interleave used for testing.
+
+  Args:
+    lists: a list of lists to interleave
+    cycle_length: the length of the interleave cycle
+    block_length: the length of the interleave block
+
+  Yields:
+    Elements of `lists` interleaved in the order determined by `cycle_length`
+    and `block_length`.
+  """
+  num_open = 0
 
-  def _interleave(self, lists, cycle_length, block_length):
-    num_open = 0
+  # `all_iterators` acts as a queue of iterators over each element of `lists`.
+  all_iterators = [iter(l) for l in lists]
 
-    # `all_iterators` acts as a queue of iterators over each element of `lists`.
-    all_iterators = [iter(l) for l in lists]
+  # `open_iterators` are the iterators whose elements are currently being
+  # interleaved.
+  open_iterators = []
+  for i in range(cycle_length):
+    if all_iterators:
+      open_iterators.append(all_iterators.pop(0))
+      num_open += 1
+    else:
+      open_iterators.append(None)
 
-    # `open_iterators` are the iterators whose elements are currently being
-    # interleaved.
-    open_iterators = []
+  while num_open or all_iterators:
     for i in range(cycle_length):
-      if all_iterators:
-        open_iterators.append(all_iterators.pop(0))
-        num_open += 1
-      else:
-        open_iterators.append(None)
-
-    while num_open or all_iterators:
-      for i in range(cycle_length):
-        if open_iterators[i] is None:
-          if all_iterators:
-            open_iterators[i] = all_iterators.pop(0)
-            num_open += 1
-          else:
-            continue
-        for _ in range(block_length):
-          try:
-            yield next(open_iterators[i])
-          except StopIteration:
-            open_iterators[i] = None
-            num_open -= 1
-            break
-
-  def testPythonImplementation(self):
-    input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
-                   [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]
-
-    # Cycle length 1 acts like `Dataset.flat_map()`.
-    expected_elements = itertools.chain(*input_lists)
-    for expected, produced in zip(
-        expected_elements, self._interleave(input_lists, 1, 1)):
-      self.assertEqual(expected, produced)
+      if open_iterators[i] is None:
+        if all_iterators:
+          open_iterators[i] = all_iterators.pop(0)
+          num_open += 1
+        else:
+          continue
+      for _ in range(block_length):
+        try:
+          yield next(open_iterators[i])
+        except StopIteration:
+          open_iterators[i] = None
+          num_open -= 1
+          break
 
-    # Cycle length > 1.
-    expected_elements = [4, 5, 4, 5, 4, 5, 4,
-                         5, 5, 6, 6,  # NOTE(mrry): When we cycle back
-                                      # to a list and are already at
-                                      # the end of that list, we move
-                                      # on to the next element.
-                         4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5]
-    for expected, produced in zip(
-        expected_elements, self._interleave(input_lists, 2, 1)):
-      self.assertEqual(expected, produced)
 
-    # Cycle length > 1 and block length > 1.
-    expected_elements = [4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6,
-                         4, 5, 5, 5, 6, 6, 6, 5, 5, 6, 6, 6]
-    for expected, produced in zip(
-        expected_elements, self._interleave(input_lists, 2, 3)):
-      self.assertEqual(expected, produced)
+def _make_coordinated_sloppy_dataset(input_values, cycle_length, block_length,
+                                     num_parallel_calls):
+  """Produces a dataset iterator and events to control the order of elements.
+
+  Args:
+    input_values: the values to generate lists to interleave from
+    cycle_length: the length of the interleave cycle
+    block_length: the length of the interleave block
+    num_parallel_calls: the degree of interleave parallelism
+
+  Returns:
+    A dataset iterator (represented as `get_next` op) and events that can be
+    used to control the order of output elements.
+  """
+
+  # Set up threading events used to sequence when items are produced that
+  # are subsequently interleaved. These events allow us to deterministically
+  # simulate slowdowns and force sloppiness.
+  coordination_events = {i: threading.Event() for i in input_values}
+
+  def map_py_fn(x):
+    coordination_events[x].wait()
+    coordination_events[x].clear()
+    return x * x
+
+  def map_fn(x):
+    return script_ops.py_func(map_py_fn, [x], x.dtype)
+
+  def interleave_fn(x):
+    dataset = dataset_ops.Dataset.from_tensors(x)
+    dataset = dataset.repeat(x)
+    return dataset.map(map_fn)
+
+  options = dataset_ops.Options()
+  options.experimental_deterministic = False
+  dataset = dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
+      2).interleave(interleave_fn, cycle_length, block_length,
+                    num_parallel_calls).with_options(options)
+  iterator = dataset.make_one_shot_iterator()
+  get_next = iterator.get_next()
+  return get_next, coordination_events
+
+
+def _repeat(values, count):
+  """Produces a list of lists suitable for testing interleave.
+
+  Args:
+    values: for each element `x` the result contains `[x] * x`
+    count: determines how many times to repeat `[x] * x` in the result
+
+  Returns:
+    A list of lists of values suitable for testing interleave.
+  """
+  return [[value] * value for value in np.tile(values, count)]
+
+
+class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ("1", [4, 5, 6], 1, 1, [
+          4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 5, 5, 5, 5,
+          5, 6, 6, 6, 6, 6, 6
+      ]),
+      ("2", [4, 5, 6], 2, 1, [
+          4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6,
+          5, 6, 5, 6, 5, 6, 6
+      ]),
+      ("3", [4, 5, 6], 2, 3, [
+          4, 4, 4, 5, 5, 5, 4, 5, 5, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 5, 5, 5, 6,
+          6, 6, 5, 5, 6, 6, 6
+      ]),
+      ("4", [4, 5, 6], 7, 2, [
+          4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6,
+          6, 5, 6, 6, 5, 6, 6
+      ]),
+      ("5", [4, 0, 6], 2, 1,
+       [4, 4, 6, 4, 6, 4, 6, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 6, 6, 6]),
+  )
+  def testPythonImplementation(self, input_values, cycle_length, block_length,
+                               expected_elements):
+    input_lists = _repeat(input_values, 2)
 
-    # Cycle length > len(input_values).
-    expected_elements = [4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6, 4, 4, 5, 5, 6, 6,
-                         4, 4, 5, 5, 6, 6, 5, 6, 6, 5, 6, 6]
     for expected, produced in zip(
-        expected_elements, self._interleave(input_lists, 7, 2)):
+        expected_elements, _interleave(input_lists, cycle_length,
+                                       block_length)):
       self.assertEqual(expected, produced)
 
   @parameterized.named_parameters(
@@ -129,15 +193,9 @@ class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
             cycle_length, block_length, num_parallel_calls)
     get_next = dataset.make_one_shot_iterator().get_next()
 
-    def repeat(values, count):
-      result = []
-      for value in values:
-        result.append([value] * value)
-      return result * count
-
     with self.cached_session() as sess:
-      for expected_element in self._interleave(
-          repeat(input_values, count), cycle_length, block_length):
+      for expected_element in _interleave(
+          _repeat(input_values, count), cycle_length, block_length):
         self.assertEqual(expected_element, sess.run(get_next))
 
       for _ in range(2):
@@ -159,10 +217,7 @@ class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("12", np.float32([1., np.nan, 2., np.nan, 3.]), 7, 2, 5),
       ("13", np.float32([1., np.nan, 2., np.nan, 3.]), 7, 2, 7),
   )
-  def testInterleaveErrorDataset(self,
-                                 input_values,
-                                 cycle_length,
-                                 block_length,
+  def testInterleaveDatasetError(self, input_values, cycle_length, block_length,
                                  num_parallel_calls):
     dataset = dataset_ops.Dataset.from_tensor_slices(input_values).map(
         lambda x: array_ops.check_numerics(x, "message")).interleave(
@@ -180,7 +235,7 @@ class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
-  def testSparse(self):
+  def testInterleaveSparse(self):
 
     def _map_fn(i):
       return sparse_tensor.SparseTensorValue(
@@ -192,12 +247,10 @@ class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     iterator = (
         dataset_ops.Dataset.range(10).map(_map_fn).interleave(
-            _interleave_fn, cycle_length=1).make_initializable_iterator())
-    init_op = iterator.initializer
+            _interleave_fn, cycle_length=1).make_one_shot_iterator())
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
       for i in range(10):
         for j in range(2):
           expected = [i, 0] if j % 2 == 0 else [0, -i]
@@ -205,6 +258,60 @@ class InterleaveDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  @parameterized.named_parameters(
+      ("1", np.int64([4, 5, 6]), 2, 1, 1),
+      ("2", np.int64([4, 5, 6]), 2, 1, 2),
+      ("3", np.int64([4, 5, 6]), 2, 3, 1),
+      ("4", np.int64([4, 5, 6]), 2, 3, 2),
+      ("5", np.int64([4, 5, 6]), 3, 2, 1),
+      ("6", np.int64([4, 5, 6]), 3, 2, 2),
+      ("7", np.int64([4, 5, 6]), 3, 2, 3),
+      ("8", np.int64([4, 0, 6]), 2, 3, 1),
+      ("9", np.int64([4, 0, 6]), 2, 3, 2),
+  )
+  def testSloppyInterleaveInOrder(self, input_values, cycle_length,
+                                  block_length, num_parallel_calls):
+    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+        input_values, cycle_length, block_length, num_parallel_calls)
+    config = config_pb2.ConfigProto(
+        inter_op_parallelism_threads=num_parallel_calls + 1,
+        use_per_session_threads=True)
+    with self.cached_session(config=config) as sess:
+      for expected_element in _interleave(
+          _repeat(input_values, 2), cycle_length, block_length):
+        coordination_events[expected_element].set()
+        self.assertEqual(expected_element * expected_element,
+                         sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", np.int64([4, 5, 6]), 2, 1, 2),
+      ("2", np.int64([4, 5, 6]), 2, 3, 2),
+      ("3", np.int64([4, 5, 6]), 3, 2, 3),
+      ("4", np.int64([4, 0, 6]), 2, 3, 2),
+  )
+  def testSloppyInterleaveOutOfOrder(self, input_values, cycle_length,
+                                     block_length, num_parallel_calls):
+    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+        input_values, cycle_length, block_length, num_parallel_calls)
+    config = config_pb2.ConfigProto(
+        inter_op_parallelism_threads=num_parallel_calls + 1,
+        use_per_session_threads=True)
+    with self.cached_session(config=config) as sess:
+      elements = [
+          x for x in _interleave(
+              _repeat(input_values, 2), cycle_length, block_length)
+      ]
+      for i in [1, 4, 7]:
+        elements[i], elements[i + 1] = elements[i + 1], elements[i]
+
+      for element in elements:
+        coordination_events[element].set()
+        self.assertEqual(element * element, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
index 4683b1db91..7767c08e61 100644
--- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py
@@ -26,6 +26,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.core.framework import attr_value_pb2
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
@@ -48,6 +49,40 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
 
 
+def _make_coordinated_sloppy_dataset(num_elements, num_parallel_calls):
+  """Produces a dataset iterator and events to control the order of elements.
+
+  Args:
+    num_elements: the number of input elements
+    num_parallel_calls: the degree of map parallelism
+
+  Returns:
+    A dataset iterator (represented as `get_next` op) and events that can be
+    used to control the order of output elements.
+  """
+
+  # Set up threading events used to sequence when items are produced that
+  # are subsequently interleaved. These events allow us to deterministically
+  # simulate slowdowns and force sloppiness.
+  coordination_events = {i: threading.Event() for i in range(num_elements)}
+
+  def map_py_fn(x):
+    coordination_events[x].wait()
+    coordination_events[x].clear()
+    return x * x
+
+  def map_fn(x):
+    return script_ops.py_func(map_py_fn, [x], x.dtype)
+
+  options = dataset_ops.Options()
+  options.experimental_deterministic = False
+  dataset = dataset_ops.Dataset.range(num_elements).map(
+      map_fn, num_parallel_calls).with_options(options)
+  iterator = dataset.make_one_shot_iterator()
+  next_element = iterator.get_next()
+  return next_element, coordination_events
+
+
 class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _buildMapDataset(self, components, count):
@@ -820,6 +855,49 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(iterator.initializer, feed_dict={captured_t: 42})
       self.assertEqual(42, sess.run(get_next))
 
+  @parameterized.named_parameters(
+      ("1", 1, 1),
+      ("2", 10, 1),
+      ("3", 10, 10),
+      ("4", 100, 1),
+      ("5", 100, 10),
+      ("6", 100, 100),
+  )
+  def testSloppyInterleaveInOrder(self, num_elements, num_parallel_calls):
+    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+        num_elements, num_parallel_calls)
+    config = config_pb2.ConfigProto(
+        inter_op_parallelism_threads=num_parallel_calls + 1,
+        use_per_session_threads=True)
+    with self.cached_session(config=config) as sess:
+      for i in range(num_elements):
+        coordination_events[i].set()
+        self.assertEqual(i * i, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
+  @parameterized.named_parameters(
+      ("1", 10, 10),
+      ("2", 100, 10),
+      ("3", 100, 100),
+  )
+  def testSloppyInterleaveOutOfOrder(self, num_elements, num_parallel_calls):
+    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+        num_elements, num_parallel_calls)
+    config = config_pb2.ConfigProto(
+        inter_op_parallelism_threads=num_parallel_calls + 1,
+        use_per_session_threads=True)
+    with self.cached_session(config=config) as sess:
+      elements = [x for x in range(num_elements)]
+      for i in [1, 4, 7]:
+        elements[i], elements[i + 1] = elements[i + 1], elements[i]
+
+      for element in elements:
+        coordination_events[element].set()
+        self.assertEqual(element * element, sess.run(get_next))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
 
 class MapDatasetBenchmark(test.Benchmark):
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 5dbb017f2b..2b32be7d46 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1402,6 +1402,8 @@ class Options(object):
       ("experimental_autotune", bool,
        "Whether to dynamically adjust the values of tunable parameters (e.g. "
        "degrees of parallelism)."),
+      ("experimental_deterministic", bool,
+       "Whether the outputs need to be produced in deterministic order."),
       ("experimental_filter_fusion", bool,
        "Whether to fuse filter transformations."),
       ("experimental_hoist_random_uniform", bool,
@@ -1475,6 +1477,8 @@ class Options(object):
 
     if getattr(self, "experimental_numa_aware"):
       result.append("map_and_batch_numa_aware_replacement")
+    if getattr(self, "experimental_deterministic") is False:
+      result.append("make_sloppy")
     return result
 
   def merge(self, options):
@@ -1496,13 +1500,19 @@ class Options(object):
     result = Options()
     for other in [self, options]:
       for name in [
-          "experimental_autotune", "experimental_filter_fusion",
-          "experimental_hoist_random_uniform", "experimental_latency_all_edges",
+          "experimental_autotune",
+          "experimental_deterministic",
+          "experimental_filter_fusion",
+          "experimental_hoist_random_uniform",
+          "experimental_latency_all_edges",
           "experimental_map_and_batch_fusion",
-          "experimental_map_and_filter_fusion", "experimental_map_fusion",
-          "experimental_map_parallelization", "experimental_map_vectorization",
+          "experimental_map_and_filter_fusion",
+          "experimental_map_fusion",
+          "experimental_map_parallelization",
+          "experimental_map_vectorization",
           "experimental_noop_elimination",
-          "experimental_shuffle_and_repeat_fusion", "experimental_numa_aware",
+          "experimental_numa_aware",
+          "experimental_shuffle_and_repeat_fusion",
       ]:
         this = getattr(result, name)
         that = getattr(other, name)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
index 22256996d3..9f4de74c39 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-options.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "experimental_autotune"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_deterministic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_filter_fusion"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
index 22256996d3..9f4de74c39 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-options.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "experimental_autotune"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "experimental_deterministic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "experimental_filter_fusion"
     mtype: "<type \'property\'>"
-- 
GitLab


From 0d5b9d20cc3e3062aa4d443bc772bb3aed698d15 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Mon, 15 Oct 2018 14:38:07 -0700
Subject: [PATCH 0997/1085] handle the case that the input patterns contains
 both windows and other-FS paths

---
 tensorflow/core/kernels/data/matching_files_dataset_op.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/data/matching_files_dataset_op.cc b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
index 32aaf6a573..09517ac264 100644
--- a/tensorflow/core/kernels/data/matching_files_dataset_op.cc
+++ b/tensorflow/core/kernels/data/matching_files_dataset_op.cc
@@ -151,6 +151,8 @@ class MatchingFilesDatasetOp : public DatasetOpKernel {
               isWindows_ = true;
               std::replace(current_pattern_.begin(), current_pattern_.end(),
                            '\\', '/');
+            } else {
+              isWindows_ = false;
             }
 
             StringPiece fixed_prefix =
-- 
GitLab


From cdfbdffea6ff87e3e18fe76129907c61366e1a30 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 15 Oct 2018 14:53:54 -0700
Subject: [PATCH 0998/1085] Fix typo.

PiperOrigin-RevId: 217212365
---
 tensorflow/python/autograph/converters/call_trees_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/converters/call_trees_test.py b/tensorflow/python/autograph/converters/call_trees_test.py
index 8dbb91da3e..04881873a8 100644
--- a/tensorflow/python/autograph/converters/call_trees_test.py
+++ b/tensorflow/python/autograph/converters/call_trees_test.py
@@ -133,7 +133,7 @@ class CallTreesTest(converter_testing.TestCase):
         result_tensor = result.test_fn(constant_op.constant(1))
         self.assertEquals(sess.run(result_tensor), 3)
 
-  def test_call_to_decotated_function(self):
+  def test_call_to_decorated_function(self):
 
     def decorator(f):
       return f
-- 
GitLab


From bf36f984cbc1de40abb2453bceddbf1c37b708e8 Mon Sep 17 00:00:00 2001
From: Brian Nemsick <brian.nemsick@gmail.com>
Date: Mon, 15 Oct 2018 15:05:21 -0700
Subject: [PATCH 0999/1085] Add .hdf5 to _is_hdf5_filepath

---
 tensorflow/python/keras/engine/network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 4d0626cc66..b983055610 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1674,7 +1674,7 @@ class Network(base_layer.Layer):
 
 
 def _is_hdf5_filepath(filepath):
-  return filepath.endswith('.h5') or filepath.endswith('.keras')
+  return filepath.endswith('.h5') or filepath.endswith('.keras') or filepath.endswith('.hdf5')
 
 
 def _make_node_key(layer_name, node_index):
-- 
GitLab


From f1da1349a1e3bfa61cbbb6358422080d302e4703 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Mon, 15 Oct 2018 15:08:38 -0700
Subject: [PATCH 1000/1085] Improve comments in schema.fbs

PiperOrigin-RevId: 217215162
---
 tensorflow/contrib/lite/schema/schema.fbs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 73805b7618..fe3dc56e65 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -25,6 +25,9 @@ file_identifier "TFL3";
 // File extension of any written files.
 file_extension "tflite";
 
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
 // The type of data stored in a tensor.
 enum TensorType : byte {
   FLOAT32 = 0,
-- 
GitLab


From 406e48d628e779581f9a5841ec2c77d5cf339ff7 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 15 Oct 2018 15:19:05 -0700
Subject: [PATCH 1001/1085] Fix line length lint issue

---
 tensorflow/python/keras/engine/network.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index b983055610..37cbcd18e6 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -1674,7 +1674,9 @@ class Network(base_layer.Layer):
 
 
 def _is_hdf5_filepath(filepath):
-  return filepath.endswith('.h5') or filepath.endswith('.keras') or filepath.endswith('.hdf5')
+  return (filepath.endswith('.h5')
+          or filepath.endswith('.keras')
+          or filepath.endswith('.hdf5'))
 
 
 def _make_node_key(layer_name, node_index):
-- 
GitLab


From e20a79d4af7e9919af96b13052b044ec5af485fa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 15:20:18 -0700
Subject: [PATCH 1002/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 217217093
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 186 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 |  21 ++
 2 files changed, 207 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index ab1efff6ed..63d037c743 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -38814,6 +38814,61 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "ParallelInterleaveDatasetV2"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ParallelMapDataset"
   input_arg {
@@ -38942,6 +38997,60 @@ op {
     }
   }
 }
+op {
+  name: "ParallelMapDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "use_inter_op_parallelism"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ParameterizedTruncatedNormal"
   input_arg {
@@ -39221,6 +39330,83 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "ParseExampleDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_VARIANT
+  }
+  input_arg {
+    name: "num_parallel_calls"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "dense_defaults"
+    type_list_attr: "Tdense"
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  attr {
+    name: "sparse_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "dense_keys"
+    type: "list(string)"
+    has_minimum: true
+  }
+  attr {
+    name: "sparse_types"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "Tdense"
+    type: "list(type)"
+    has_minimum: true
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_INT64
+        type: DT_STRING
+      }
+    }
+  }
+  attr {
+    name: "dense_shapes"
+    type: "list(shape)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "ParseSequenceExample"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 15ed2ac21b..9a566c9d84 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -19319,6 +19319,13 @@ op {
     has_minimum: true
     minimum: 1
   }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
   name: "ParallelMapDataset"
@@ -19366,6 +19373,13 @@ op {
       b: true
     }
   }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
   name: "ParameterizedTruncatedNormal"
@@ -19582,6 +19596,13 @@ op {
     has_minimum: true
     minimum: 1
   }
+  attr {
+    name: "sloppy"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
 }
 op {
   name: "ParseSequenceExample"
-- 
GitLab


From 1cb157894090a6ab128a45d075a17c0577a3a853 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 15:22:29 -0700
Subject: [PATCH 1003/1085] Wrap dict.values() in list to make it python3
 compatible.

PiperOrigin-RevId: 217217469
---
 tensorflow/contrib/tpu/python/tpu/tpu_embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
index 741d2b759f..cd9bfbcdce 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py
@@ -636,7 +636,7 @@ class TPUEmbedding(object):
         contiguous_device = device
 
   def _generate_enqueue_op(self, sparse_features, device_ordinal):
-    with ops.colocate_with(sparse_features.values()[0]):
+    with ops.colocate_with(list(sparse_features.values())[0]):
       sample_idcs, embedding_idcs, aggregation_weights = (
           self._format_for_tpu_embedding_sparse_batch(sparse_features))
       return tpu_ops.enqueue_tpu_embedding_sparse_batch(
-- 
GitLab


From 5a6fcf3c0cbea5d01d7a231f3a8d6ca210f2d270 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Mon, 15 Oct 2018 15:24:46 -0700
Subject: [PATCH 1004/1085] Correct the error message in
 ModifyGraphsWithDelegate.

PiperOrigin-RevId: 217217865
---
 tensorflow/contrib/lite/interpreter.cc | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 88e41ffc55..c72e7bf33e 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -952,7 +952,10 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate,
       }
     }
     if (has_dynamic_tensors) {
-      ReportError(&context_, "Attempting to resize a fixed-size tensor.");
+      ReportError(
+          &context_,
+          "Attempting to use a delegate that only supports static-sized "
+          "tensors with a graph that has dynamic-sized tensors.");
       return kTfLiteError;
     }
   }
-- 
GitLab


From d1679ef92c1a88f95935aeea14b384ad6c7e8084 Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Mon, 15 Oct 2018 15:49:01 -0700
Subject: [PATCH 1005/1085] Fix converter for Const op and add corresponding
 unit tests.

---
 tensorflow/contrib/tensorrt/BUILD             |  24 +
 .../contrib/tensorrt/convert/convert_nodes.cc | 921 +++++++-----------
 .../contrib/tensorrt/convert/convert_nodes.h  | 159 +++
 .../tensorrt/convert/convert_nodes_test.cc    | 646 ++++++++++++
 4 files changed, 1200 insertions(+), 550 deletions(-)
 create mode 100644 tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc

diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD
index d756857f18..260294ecd8 100644
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@@ -326,6 +326,30 @@ tf_cuda_cc_test(
     ]),
 )
 
+tf_cuda_cc_test(
+    name = "convert_nodes_test",
+    size = "medium",
+    srcs = ["convert/convert_nodes_test.cc"],
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_windows",
+        "nomac",
+    ],
+    deps = [
+        ":trt_logging",
+        ":trt_conversion",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_base",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
 # Library for the segmenting portion of TensorRT operation creation
 cc_library(
     name = "segment",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index b3f5fcd4f9..c5f70fa245 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -17,12 +17,10 @@ limitations under the License.
 
 #include <algorithm>
 #include <cstring>
-#include <list>
 #include <map>
 #include <memory>
 #include <set>
 #include <unordered_map>
-#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -87,14 +85,13 @@ using ::tensorflow::str_util::Split;
 using ::tensorflow::strings::StrAppend;
 using ::tensorflow::strings::StrCat;
 
-namespace {
-
 inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype,
                                        nvinfer1::DataType* trt_dtype) {
   switch (tf_dtype) {
     case tensorflow::DataType::DT_FLOAT:
       *trt_dtype = nvinfer1::DataType::kFLOAT;
       break;
+    // TODO(aaroey): this should be DT_QINT8 which is not a well supported type.
     case tensorflow::DataType::DT_INT8:
       *trt_dtype = nvinfer1::DataType::kINT8;
       break;
@@ -169,13 +166,18 @@ tensorflow::Status ValidateInputProperties(const PartialTensorShape& shape,
 
 string DebugString(const nvinfer1::Dims& dims) {
   string out = StrCat("nvinfer1::Dims(nbDims=", dims.nbDims, ", d=");
-  for (int i = 0; i < nvinfer1::Dims::MAX_DIMS; ++i) {
+  for (int i = 0; i < dims.nbDims; ++i) {
     StrAppend(&out, dims.d[i], ",");
   }
   StrAppend(&out, ")");
   return out;
 }
 
+string DebugString(const nvinfer1::ITensor& tensor) {
+  return StrCat("nvinfer1::ITensor(@", reinterpret_cast<uintptr_t>(&tensor),
+                ", shape=", DebugString(tensor.getDimensions()), ")");
+}
+
 // Return whether or not the broadcast is feasible;
 bool TensorRTGetBroadcastShape(const nvinfer1::Dims& operand_l,
                                const bool operand_l_is_tensor,
@@ -268,7 +270,7 @@ inline bool DimsEqual(const nvinfer1::Dims& dim_l,
   return true;
 }
 
-inline nvinfer1::Dims GetTensorShape(const tensorflow::Tensor& tensor) {
+inline nvinfer1::Dims GetTrtDimsForTensor(const tensorflow::Tensor& tensor) {
   nvinfer1::Dims dims;
   dims.nbDims = tensor.dims();
   for (int i = 0; i < dims.nbDims; i++) {
@@ -277,11 +279,14 @@ inline nvinfer1::Dims GetTensorShape(const tensorflow::Tensor& tensor) {
   return dims;
 }
 
-inline int64_t GetShapeSize(const nvinfer1::Dims& shape) {
-  // Returns total number of elements in shape
+// Returns total number of elements in dims. Returning 0 means either some dim
+// is 0 or the number of dims is 0.
+// Note that for TF scalar constant, we always convert to dims [1].
+int64_t TrtDimsNumElements(const nvinfer1::Dims& dims) {
+  if (dims.nbDims == 0) return 0;
   int64_t count = 1;
-  for (int d = 0; d < shape.nbDims; ++d) {
-    count *= shape.d[d];
+  for (int d = 0; d < dims.nbDims; ++d) {
+    count *= dims.d[d];
   }
   return count;
 }
@@ -320,133 +325,64 @@ string GetCommonNameScope(const string& op_name_a, const string& op_name_b) {
   return op_name_a.substr(0, last_scope_separator);
 }
 
-// Class to convert TF weight to TRT weight.
-class TRT_ShapedWeights {
- public:
-  TRT_ShapedWeights(tensorflow::DataType type, const void* values,
-                    nvinfer1::Dims shape)
-      : shape_(shape), type_(type), values_(values), empty_weight_flag_(false) {
-    // Note: this->shape.type[] is not used
-  }
-
-  explicit TRT_ShapedWeights(tensorflow::DataType type)
-      : shape_(), type_(type), values_(nullptr), empty_weight_flag_(true) {}
-
-  // TODO(aaroey): use rvalue reference.
-  TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
-      : shape_(rhs.shape_),
-        type_(rhs.type_),
-        values_(rhs.values_),
-        empty_weight_flag_(rhs.empty_weight_flag_) {}
-
-  // TODO(aaroey): use GetShapeSize() instead.
-  int64_t count() const {
-    int64_t c = 1;
-    for (int i = 0; i < shape_.nbDims; i++) c *= shape_.d[i];
-    return c;
-  }
-
-  nvinfer1::Weights GetWeightsForTRT() const {
-    nvinfer1::DataType trt_type(nvinfer1::DataType::kFLOAT);
-    TF_CHECK_OK(ConvertDType(type_, &trt_type));
-    if (empty_weight_flag_) return nvinfer1::Weights{trt_type, nullptr, 0};
-
-    // Note: this->shape.type[] is not used
-    return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)};
-  }
-
-  const void* GetValues() const { return values_; }
-
-  // TODO(aaroey): get rid of this method.
-  void SetValues(const void* values) { values_ = values; }
-
-  size_t size_bytes() const {
-    int type_size = tensorflow::DataTypeSize(this->type_);
-    return this->count() * type_size;
-  }
-
-  // Default converter
-  operator nvinfer1::Weights() const { return GetWeightsForTRT(); }
-
-  string DebugString() const {
-    return StrCat(
-        "TRT_ShapedWeights(shape=", convert::DebugString(shape_), ", type=",
-        type_, ", values=", reinterpret_cast<uintptr_t>(values_),
-        ", empty_weight_flag=", empty_weight_flag_, ")");
-  }
-
-  // TODO(aaroey): make these private.
-  nvinfer1::Dims shape_;
-  tensorflow::DataType type_;
-
- private:
-  // TODO(aaroey): this should not be const as it's always from TRTWeightStore.
-  const void* values_;
-  bool empty_weight_flag_;
-};
+TRT_ShapedWeights::TRT_ShapedWeights(
+    tensorflow::DataType type, const void* values, nvinfer1::Dims shape)
+    : shape_(shape), type_(type), values_(CHECK_NOTNULL(values)) {}
 
-class TRT_TensorOrWeights {
- public:
-  explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor)
-      : tensor_(tensor), weights_(DT_FLOAT), variant_(TRT_NODE_TENSOR) {}
+TRT_ShapedWeights::TRT_ShapedWeights(tensorflow::DataType type)
+    : shape_(), type_(type), values_(nullptr) {
+  shape_.nbDims = 0;
+}
 
-  explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
-      : tensor_(nullptr), weights_(weights), variant_(TRT_NODE_WEIGHTS) {}
+TRT_ShapedWeights::TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
+    : shape_(rhs.shape_), type_(rhs.type_), values_(rhs.values_) {}
 
-  // TODO(aaroey): use rvalue reference.
-  TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
-      : tensor_(rhs.tensor_), weights_(rhs.weights_), variant_(rhs.variant_) {}
+int64_t TRT_ShapedWeights::count() const { return TrtDimsNumElements(shape_); }
 
-  ~TRT_TensorOrWeights() {}
+nvinfer1::Weights TRT_ShapedWeights::GetWeightsForTRT() const {
+  nvinfer1::DataType trt_type(nvinfer1::DataType::kFLOAT);
+  TF_CHECK_OK(ConvertDType(type_, &trt_type));
+  return nvinfer1::Weights{trt_type, values_, values_ == nullptr ? 0 : count()};
+}
 
-  bool is_tensor() const { return variant_ == TRT_NODE_TENSOR; }
-  bool is_weights() const { return variant_ == TRT_NODE_WEIGHTS; }
+size_t TRT_ShapedWeights::size_bytes() const {
+  return this->count() * tensorflow::DataTypeSize(this->type_);
+}
 
-  nvinfer1::ITensor* tensor() {
-    CHECK(is_tensor());
-    return tensor_;
-  }
+string TRT_ShapedWeights::DebugString() const {
+  return StrCat(
+      "TRT_ShapedWeights(shape=", convert::DebugString(shape_), ", type=",
+      type_, ", values=", reinterpret_cast<uintptr_t>(values_), ")");
+}
 
-  const nvinfer1::ITensor* tensor() const {
-    CHECK(is_tensor());
-    return tensor_;
-  }
+TRT_TensorOrWeights::TRT_TensorOrWeights(nvinfer1::ITensor* tensor)
+    : tensor_(tensor), weights_(DT_FLOAT), is_tensor_(true) {}
 
-  TRT_ShapedWeights& weights() {
-    CHECK(is_weights());
-    return weights_;
-  }
+TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
+    : tensor_(nullptr), weights_(weights), is_tensor_(false) {}
 
-  const TRT_ShapedWeights& weights() const {
-    CHECK(is_weights());
-    return weights_;
-  }
+TRT_TensorOrWeights::TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
+    : tensor_(rhs.tensor_), weights_(rhs.weights_), is_tensor_(rhs.is_tensor_) {}
 
-  nvinfer1::Dims shape() const {
-    if (is_tensor()) {
-      return tensor()->getDimensions();
-    } else {
-      return weights().shape_;
-    }
+nvinfer1::Dims TRT_TensorOrWeights::shape() const {
+  if (is_tensor()) {
+    return tensor()->getDimensions();
+  } else {
+    return weights().shape_;
   }
+}
 
-  string DebugString() const {
-    string output = "TRT_TensorOrWeights(type=";
-    if (is_tensor()) {
-      StrAppend(&output, "tensor @", reinterpret_cast<uintptr_t>(tensor_),
-                ", shape=", convert::DebugString(tensor_->getDimensions()));
-    } else {
-      StrAppend(&output, "weights=", weights_.DebugString());
-    }
-    StrAppend(&output, ")");
-    return output;
+string TRT_TensorOrWeights::DebugString() const {
+  string output = "TRT_TensorOrWeights(type=";
+  if (is_tensor()) {
+    StrAppend(&output, "tensor @", reinterpret_cast<uintptr_t>(tensor_),
+              ", shape=", convert::DebugString(tensor_->getDimensions()));
+  } else {
+    StrAppend(&output, "weights=", weights_.DebugString());
   }
-
- private:
-  nvinfer1::ITensor* tensor_;
-  TRT_ShapedWeights weights_;
-  enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } variant_;
-};
+  StrAppend(&output, ")");
+  return output;
+}
 
 class TFAttrs {
  public:
@@ -503,12 +439,6 @@ std::vector<float> TFAttrs::get<std::vector<float>>(const string& key) const {
   return std::vector<float>(attr.begin(), attr.end());
 }
 
-template <>
-std::vector<string> TFAttrs::get<std::vector<string>>(const string& key) const {
-  auto attr = this->at(key)->list().s();
-  return std::vector<string>(attr.begin(), attr.end());
-}
-
 template <>
 nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(const string& key) const {
   nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT);
@@ -640,239 +570,199 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights,
   }
 }
 
-class Converter;
-
-using OpConverter =
-    std::function<tensorflow::Status(Converter&, const tensorflow::NodeDef&,
-                                     const std::vector<TRT_TensorOrWeights>&,
-                                     std::vector<TRT_TensorOrWeights>*)>;
-
-class Converter {
- public:
-  explicit Converter(nvinfer1::INetworkDefinition* trt_network,
-                     TRTWeightStore* ws, bool fp16, int max_batch_size)
-      : trt_network_(trt_network),
-        weight_store_(ws),
-        fp16_(fp16),
-        max_batch_size_(max_batch_size) {
-    this->register_op_converters();
-  }
-
-  TRTWeightStore* weight_store() { return weight_store_; }
-
-  TRT_ShapedWeights get_temp_weights(tensorflow::DataType type,
-                                     nvinfer1::Dims shape) {
-    TRT_ShapedWeights weights(type, nullptr, shape);
-    // TODO(jie): check weights size_bytes. 0 means type error
-    weight_store_->store_.push_back(std::vector<uint8_t>(weights.size_bytes()));
-    weights.SetValues(weight_store_->store_.back().data());
-    return weights;
-  }
-
-  // TODO(aaroey): fix all the namings.
-  bool isFP16() { return fp16_; }
-
-  int GetMaxBatchSize() { return max_batch_size_; }
+Converter::Converter(nvinfer1::INetworkDefinition* trt_network, bool fp16,
+                     int max_batch_size)
+  : trt_network_(trt_network), fp16_(fp16), max_batch_size_(max_batch_size) {
+  this->RegisterOpConverters();
+}
 
-  TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) {
-    return this->get_temp_weights(weights.type_, weights.shape_);
-  }
+TRT_ShapedWeights Converter::GetTempWeights(tensorflow::DataType type,
+                                            const nvinfer1::Dims& dims) {
+  const int64_t size_bytes =
+      TrtDimsNumElements(dims) * tensorflow::DataTypeSize(type);
+  // TODO(jie): check weights size_bytes. 0 means type error
+  weight_store_.store_.push_back(std::vector<uint8_t>(size_bytes));
+  TRT_ShapedWeights weights(type, weight_store_.store_.back().data(), dims);
+  return weights;
+}
 
-  tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) {
-    std::vector<TRT_TensorOrWeights> inputs;
-    TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs));
-    const string& op = node_def.op();
-    std::vector<TRT_TensorOrWeights> outputs;
-    if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) {
-      TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs));
-    } else {
-      if (!op_registry_.count(op)) {
-        return tensorflow::errors::Unimplemented(
-            "No converter registered for op: " + op);
+tensorflow::Status Converter::ConvertNode(const tensorflow::NodeDef& node_def) {
+  std::vector<TRT_TensorOrWeights> inputs;
+  TF_RETURN_IF_ERROR(this->GetInputs(node_def, &inputs));
+  const string& op = node_def.op();
+  std::vector<TRT_TensorOrWeights> outputs;
+  if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) {
+    TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs));
+  } else {
+    if (!op_registry_.count(op)) {
+      return tensorflow::errors::Unimplemented(
+          "No converter registered for op: " + op);
+    }
+    OpConverter op_converter = op_registry_.at(op);
+    TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs));
+  }
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    TRT_TensorOrWeights& output = outputs[i];
+    string output_name = node_def.name();
+    if (i != 0) output_name = StrCat(output_name, ":", i);
+    // We need to check the name before setting it. For Identity op where the
+    // output is the input, if its input is one of the engine input, setting
+    // the name here will overwrite engine input bindings which will cause
+    // runtime error.
+    if (output.is_tensor()) {
+      const char* tensor_name = output.tensor()->getName();
+      if (tensor_name == nullptr || std::strlen(tensor_name) == 0) {
+        output.tensor()->setName(output_name.c_str());
       }
-      OpConverter op_converter = op_registry_.at(op);
-      TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs));
     }
-    for (size_t i = 0; i < outputs.size(); ++i) {
-      TRT_TensorOrWeights& output = outputs[i];
-      // TODO(jie): tf protobuf seems to be omitting the :0 suffix
-      string output_name = node_def.name();
-      if (i != 0) output_name = StrCat(output_name, ":", i);
-      // We need to check the name before setting it. For Identity op where the
-      // output is the input, if its input is one of the engine input, setting
-      // the name here will overwrite engine input bindings which will cause
-      // runtime error.
-      if (output.is_tensor()) {
-        const char* tensor_name = output.tensor()->getName();
-        if (tensor_name == nullptr || std::strlen(tensor_name) == 0) {
-          output.tensor()->setName(output_name.c_str());
-        }
-      }
-      VLOG(2) << "Adding out tensor " << output_name << ": "
-              << output.DebugString();
-      if (!trt_tensors_.insert({output_name, output}).second) {
-        return tensorflow::errors::AlreadyExists(
-            "Output tensor already exists for op: " + op);
-      }
+    VLOG(2) << "Adding out tensor " << output_name << ": "
+            << output.DebugString();
+    if (!trt_tensors_.insert({output_name, output}).second) {
+      return tensorflow::errors::AlreadyExists(
+          "Output tensor already exists for op: " + op);
     }
-    return tensorflow::Status::OK();
   }
+  return tensorflow::Status::OK();
+}
 
-  nvinfer1::INetworkDefinition* network() { return trt_network_; }
-
-  TRT_TensorOrWeights get_tensor(const string& name) {
-    if (!trt_tensors_.count(name)) {
-      return TRT_TensorOrWeights(nullptr);
-    }
-    return trt_tensors_.at(name);
+TRT_TensorOrWeights Converter::GetTensorOrWeights(const string& name) {
+  if (!trt_tensors_.count(name)) {
+    return TRT_TensorOrWeights(nullptr);
   }
+  return trt_tensors_.at(name);
+}
 
-  bool insert_input_tensor(const string& name, nvinfer1::ITensor* tensor) {
-    return trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second;
+Status Converter::AddInputTensor(
+    const string& name, nvinfer1::ITensor* tensor) {
+  if (!trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second) {
+    return errors::AlreadyExists("Input tensor already exists for op: ", name);
   }
+  return Status::OK();
+}
 
-  tensorflow::Status TransposeTensor(nvinfer1::ITensor* input_tensor,
-                                     const std::vector<int>& order,
-                                     const nvinfer1::ITensor** output_tensor) {
-    const auto dims = input_tensor->getDimensions();
+Status Converter::TransposeTensor(nvinfer1::ITensor* input_tensor,
+                                  const std::vector<int>& order_with_batch_dim,
+                                  const nvinfer1::ITensor** output_tensor) {
+  const auto dims = input_tensor->getDimensions();
 
-    if (order.size() - 1 != size_t(dims.nbDims)) {
-      return tensorflow::errors::InvalidArgument(
+  if (order_with_batch_dim.size() - 1 != size_t(dims.nbDims)) {
+    return tensorflow::errors::InvalidArgument(
         "Rank of perm for transpose does not match with that of the input.");
-    }
-    if (order[0] != 0) {
-      return tensorflow::errors::Unimplemented(
+  }
+  if (order_with_batch_dim[0] != 0) {
+    return tensorflow::errors::Unimplemented(
         "Transpose at batch dimension is not supported.");
-    }
-
-    nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor);
-    TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Transpose");
+  }
 
-    nvinfer1::Permutation permutation;
-    for (int32_t i = 0; i < dims.nbDims; ++i) {
-      permutation.order[i] = order[i + 1] - 1;
-    }
-    layer->setFirstTranspose(permutation);
+  nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Transpose");
 
-    nvinfer1::Dims reshape_dims;
-    reshape_dims.nbDims = dims.nbDims;
-    for (int32_t i = 0; i < reshape_dims.nbDims; ++i) {
-      reshape_dims.d[i] = 0;
-      reshape_dims.type[i] = dims.type[i];
-    }
-    layer->setReshapeDimensions(reshape_dims);
+  nvinfer1::Permutation permutation;
+  for (int32_t i = 0; i < dims.nbDims; ++i) {
+    permutation.order[i] = order_with_batch_dim[i + 1] - 1;
+  }
+  layer->setFirstTranspose(permutation);
 
-    *output_tensor = layer->getOutput(0);
-    return tensorflow::Status::OK();
+  nvinfer1::Dims reshape_dims;
+  reshape_dims.nbDims = dims.nbDims;
+  for (int32_t i = 0; i < reshape_dims.nbDims; ++i) {
+    reshape_dims.d[i] = 0;
+    // TODO(aaroey): why not transposing the types as well?
+    reshape_dims.type[i] = dims.type[i];
   }
+  layer->setReshapeDimensions(reshape_dims);
 
-  // Helper function converts input into tensor with shape specified by dims.
-  tensorflow::Status PrepareTensorForShape(const TRT_TensorOrWeights& input,
-                                           const nvinfer1::Dims& dims,
-                                           const nvinfer1::ITensor** tensor) {
-    // If -1 is not used for one of the dims, we can check if the shapes are
-    // compatible.
-    bool can_check_shapes = true;
-    for (int i = 0; i < dims.nbDims; i++) {
-      if (dims.d[i] == -1) {
-        can_check_shapes = false;
-        break;
-      }
-    }
-    if (can_check_shapes &&
-        GetShapeSize(input.shape()) != GetShapeSize(dims)) {
-      return tensorflow::errors::InvalidArgument(
-          "Reshape shapes are not compatible.");
+  *output_tensor = layer->getOutput(0);
+  return tensorflow::Status::OK();
+}
+
+Status Converter::PrepareTensorForShape(const TRT_TensorOrWeights& input,
+                                        const nvinfer1::Dims& dims,
+                                        const nvinfer1::ITensor** tensor) {
+  // If -1 is not used for one of the dims, we can check if the shapes are
+  // compatible.
+  bool can_check_shapes = true;
+  for (int i = 0; i < dims.nbDims; i++) {
+    if (dims.d[i] == -1) {
+      can_check_shapes = false;
+      break;
     }
+  }
+  if (can_check_shapes &&
+      TrtDimsNumElements(input.shape()) != TrtDimsNumElements(dims)) {
+    return tensorflow::errors::InvalidArgument(
+        "Reshape shapes are not compatible.");
+  }
 
-    if (input.is_tensor()) {
-      if (DimsEqual(input.shape(), dims)) {
-        *tensor = input.tensor();
-      } else {
-        nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(
-            *const_cast<nvinfer1::ITensor*>(input.tensor()));
-        TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
-        layer->setReshapeDimensions(dims);
-        *tensor = layer->getOutput(0);
-      }
+  if (input.is_tensor()) {
+    if (DimsEqual(input.shape(), dims)) {
+      *tensor = input.tensor();
     } else {
-#if NV_TENSORRT_MAJOR > 3
-      nvinfer1::IConstantLayer* layer =
-          this->network()->addConstant(dims, input.weights());
+      nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(
+          *const_cast<nvinfer1::ITensor*>(input.tensor()));
       TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
+      layer->setReshapeDimensions(dims);
       *tensor = layer->getOutput(0);
+    }
+  } else {
+#if NV_TENSORRT_MAJOR > 3
+    nvinfer1::IConstantLayer* layer =
+        this->network()->addConstant(dims, input.weights());
+    TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
+    *tensor = layer->getOutput(0);
 #else
-      return tensorflow::errors::Unimplemented(
+    return tensorflow::errors::Unimplemented(
         "Can't reshape constant. Please upgrade to TRT 4 or above.");
 #endif
-    }
-    return tensorflow::Status::OK();
   }
+  return tensorflow::Status::OK();
+}
 
- private:
-  std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_;
-  std::unordered_map<string, OpConverter> op_registry_;
-  OpConverter plugin_converter_;
-  nvinfer1::INetworkDefinition* trt_network_;
-  std::list<std::vector<uint8_t>> temp_bufs_;
-
-  // TODO(aaroey): inline the definition of TRTWeightStore here, and add APIs to
-  // operate the stored weights instead of operating it directly.
-  TRTWeightStore* weight_store_;
-
-  bool fp16_;
-
-  int max_batch_size_;
-
-  void register_op_converters();
-
-  tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def,
-                                std::vector<TRT_TensorOrWeights>* inputs) {
-    for (auto const& input_name : node_def.input()) {
-      /*************************************************************************
-       * TODO(jie): handle case 1) here.
-       * Normalizes the inputs and extracts associated metadata:
-       * 1) Inputs can contain a colon followed by a suffix of characters.
-       *    That suffix may be a single number (e.g. inputName:1) or several
-       *    word characters separated from a number by a colon
-       *    (e.g. inputName:foo:1). The
-       *    latter case is used to denote inputs and outputs of functions.
-       * 2) Control dependency inputs contain caret at the beginning and we
-       *    remove this and annotate the edge as a control dependency.
-       ************************************************************************/
-      // skip control nodes
-      if (input_name[0] == '^') continue;
-      string name = input_name;
-      auto last = name.find_last_of(':');
-      // TODO(aaroey): use TensorId
-      if (last != string::npos && last + 2 == name.size() &&
-          name[last + 1] == '0') {
-        name.erase(last);
-      }
-
-      if (trt_tensors_.count(name)) {
-        TRT_TensorOrWeights& input = trt_tensors_.at(name);
-        inputs->push_back(input);
-        VLOG(2) << "Retrieved input " << name << ": " << input.DebugString();
-      } else {
-        // TODO(aaroey): this should not happen, make it a CHECK.
-        // TODO(aaroey): use StrCat for pattern like this.
-        string msg("Node ");
-        StrAppend(&msg, node_def.name(), " should have an input named '", name,
-                  "' but it is not available");
-        LOG(ERROR) << msg;
-        return tensorflow::errors::InvalidArgument(msg);
-      }
+Status Converter::GetInputs(const tensorflow::NodeDef& node_def,
+                            std::vector<TRT_TensorOrWeights>* inputs) const {
+  for (auto const& input_name : node_def.input()) {
+    /*************************************************************************
+     * TODO(jie): handle case 1) here.
+     * Normalizes the inputs and extracts associated metadata:
+     * 1) Inputs can contain a colon followed by a suffix of characters.
+     *    That suffix may be a single number (e.g. inputName:1) or several
+     *    word characters separated from a number by a colon
+     *    (e.g. inputName:foo:1). The
+     *    latter case is used to denote inputs and outputs of functions.
+     * 2) Control dependency inputs contain caret at the beginning and we
+     *    remove this and annotate the edge as a control dependency.
+     ************************************************************************/
+    // skip control nodes
+    if (input_name[0] == '^') continue;
+    string name = input_name;
+    auto last = name.find_last_of(':');
+    // TODO(aaroey): use TensorId
+    if (last != string::npos && last + 2 == name.size() &&
+        name[last + 1] == '0') {
+      name.erase(last);
+    }
+
+    if (trt_tensors_.count(name)) {
+      TRT_TensorOrWeights input = trt_tensors_.at(name);
+      inputs->push_back(input);
+      VLOG(2) << "Retrieved input " << name << ": " << input.DebugString();
+    } else {
+      // TODO(aaroey): this should not happen, make it a CHECK.
+      // TODO(aaroey): use StrCat for pattern like this.
+      string msg("Node ");
+      StrAppend(&msg, node_def.name(), " should have an input named '", name,
+                "' but it is not available");
+      LOG(ERROR) << msg;
+      return tensorflow::errors::InvalidArgument(msg);
     }
-    return tensorflow::Status::OK();
   }
-};
+  return tensorflow::Status::OK();
+}
 
 TRT_ShapedWeights ConvertFP32ToFP16(Converter& ctx,
                                     const TRT_ShapedWeights& weights_src) {
   auto dtype_new = tensorflow::DataType::DT_HALF;
-  TRT_ShapedWeights weights =
-      ctx.get_temp_weights(dtype_new, weights_src.shape_);
+  TRT_ShapedWeights weights = ctx.GetTempWeights(dtype_new, weights_src.shape_);
   const float* src = static_cast<const float*>(weights_src.GetValues());
   Eigen::half* dst = const_cast<Eigen::half*>(
       static_cast<Eigen::half const*>(weights.GetValues()));
@@ -1028,77 +918,6 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights,
   return tensorflow::Status::OK();
 }
 
-tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l,
-                                 const TRT_ShapedWeights& iweights_r,
-                                 TRT_ShapedWeights* oweights,
-                                 LambdaFactory binary_op) {
-  // Assume iweights_l.type == iweight_r.type
-  CHECK_EQ(iweights_l.type_, oweights->type_);
-  CHECK_EQ(iweights_r.type_, oweights->type_);
-  VLOG(2) << "SANITY CHECK!";
-
-  switch (iweights_l.type_) {
-    case tensorflow::DataType::DT_FLOAT: {
-      auto inp_l = static_cast<const float*>(iweights_l.GetValues());
-      auto inp_r = static_cast<const float*>(iweights_r.GetValues());
-      auto oup = static_cast<float*>(const_cast<void*>(oweights->GetValues()));
-
-      if (iweights_l.count() != iweights_r.count()) {
-        // We only supports broadcast of RankZero
-        if (iweights_l.count() == 1) {
-          // TODO(aaroey): Remove loggings like this.
-          VLOG(2) << "I bet it is not working!" << (*inp_l);
-          std::transform(inp_r, inp_r + iweights_r.count(), oup,
-                         binary_op.broadcast_l<float>(*inp_l));
-        } else if (iweights_r.count() == 1) {
-          VLOG(2) << "I bet it is not working!" << (*inp_r);
-          std::transform(inp_l, inp_l + iweights_l.count(), oup,
-                         binary_op.broadcast_r<float>(*inp_r));
-        } else {
-          return tensorflow::errors::Unimplemented(
-              "Binary op with non-rankZero broadcast not supported");
-        }
-      } else {
-        std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup,
-                       binary_op.binary<float>());
-      }
-      break;
-    }
-    case tensorflow::DataType::DT_HALF: {
-      auto inp_l = static_cast<const Eigen::half*>(iweights_l.GetValues());
-      auto inp_r = static_cast<const Eigen::half*>(iweights_r.GetValues());
-      auto oup =
-          static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues()));
-
-      if (iweights_l.count() != iweights_r.count()) {
-        // We only supports broadcast of RankZero
-        if (iweights_l.count() == 1) {
-          VLOG(2) << "I bet it is not working!" << (*inp_l);
-          std::transform(inp_r, inp_r + iweights_r.count(), oup,
-                         binary_op.broadcast_l<Eigen::half>(*inp_l));
-        } else if (iweights_r.count() == 1) {
-          VLOG(2) << "I bet it is not working!" << (*inp_r);
-          std::transform(inp_l, inp_l + iweights_l.count(), oup,
-                         binary_op.broadcast_r<Eigen::half>(*inp_r));
-        } else {
-          return tensorflow::errors::Unimplemented(
-              "Binary op with non-rankZero broadcast not supported");
-        }
-      } else {
-        std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup,
-                       binary_op.binary<Eigen::half>());
-      }
-      break;
-    }
-    default:
-      return tensorflow::errors::Unimplemented(
-          "Data type not supported: " +
-          tensorflow::DataTypeString(iweights_l.type_));
-  }
-
-  return tensorflow::Status::OK();
-}
-
 // TODO(jie): broadcast is needed yet not implemented.
 // Only implemented channel wise for the time being
 tensorflow::Status BinaryTensorOpWeight(
@@ -1207,7 +1026,7 @@ tensorflow::Status BinaryTensorOpWeight(
     }
   }
 
-  if (ctx.isFP16()) {
+  if (ctx.IsFP16()) {
     weights = ConvertFP32ToFP16(ctx, weights);
   }
 
@@ -1226,7 +1045,7 @@ tensorflow::Status BinaryTensorOpWeight(
       TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
       tensor = layer->getOutput(0);
     } else {
-      TRT_ShapedWeights neg_weights = ctx.get_temp_weights_like(weights);
+      TRT_ShapedWeights neg_weights = ctx.GetTempWeightsLike(weights);
       LambdaFactory unary_op;
       unary_op.op = LambdaFactory::OP_CATEGORY::NEG;
       TF_RETURN_IF_ERROR(UnaryCompute(weights, &neg_weights, unary_op));
@@ -1241,7 +1060,7 @@ tensorflow::Status BinaryTensorOpWeight(
       TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
       tensor = layer->getOutput(0);
     } else {
-      TRT_ShapedWeights recip_weights = ctx.get_temp_weights_like(weights);
+      TRT_ShapedWeights recip_weights = ctx.GetTempWeightsLike(weights);
       LambdaFactory unary_op;
       unary_op.op = LambdaFactory::OP_CATEGORY::RECIP;
       TF_RETURN_IF_ERROR(UnaryCompute(weights, &recip_weights, unary_op));
@@ -1312,11 +1131,11 @@ tensorflow::Status ConvertConv2DHelper(
     return tensorflow::errors::Internal(
         "Conv2D expects kernel of dimension 4, at: " + node_def.name());
   }
-  if (ctx.isFP16()) {
+  if (ctx.IsFP16()) {
     weights_rsck = ConvertFP32ToFP16(ctx, inputs.at(1).weights());
   }
 
-  TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck);
+  TRT_ShapedWeights weights = ctx.GetTempWeightsLike(weights_rsck);
   ReorderRSCKToKCRS(weights_rsck, &weights, num_groups);
   TRT_ShapedWeights biases(weights.type_);
   const int noutput = weights.shape_.d[0] * num_groups;
@@ -1504,7 +1323,7 @@ tensorflow::Status ConvertTranspose(
   if (inputs.size() != 2 || !inputs.at(0).is_tensor() ||
       !inputs.at(1).is_weights()) {
     return tensorflow::errors::InvalidArgument(
-        "Input expects tensor and weights, at", node_def.name());
+        "Input expects tensor and weights, at ", node_def.name());
   }
   nvinfer1::ITensor* input_tensor = const_cast<nvinfer1::ITensor*>(
       inputs.at(0).tensor());
@@ -1517,11 +1336,6 @@ tensorflow::Status ConvertTranspose(
     perm[i] = weights_ptr[i];
   }
 
-  if (perm[0] != 0) {
-    return tensorflow::errors::Unimplemented(
-        "Transpose at batch dimension is not supported, at", node_def.name());
-  }
-
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(ctx.TransposeTensor(input_tensor, perm, &output_tensor));
   outputs->push_back(TRT_TensorOrWeights(
@@ -1535,30 +1349,30 @@ tensorflow::Status ConvertReshape(
     std::vector<TRT_TensorOrWeights>* outputs) {
   if (inputs.size() != 2 || !inputs.at(1).is_weights()) {
     return tensorflow::errors::InvalidArgument(
-        "Input expects weights for shape, at", node_def.name());
+        "Input expects weights for shape, at ", node_def.name());
   }
 
   TRT_ShapedWeights weights = inputs.at(1).weights();
   if (weights.count() == 0) {
     return tensorflow::errors::Unimplemented(
-        "Reshape to shape=[] is not supported, at", node_def.name());
+        "Reshape to shape=[] is not supported, at ", node_def.name());
   }
 
-  // Get new_shape
+  // Get new_dims
   const int* weights_ptr = static_cast<int*>(const_cast<void*>(
       weights.GetValues()));
-  nvinfer1::Dims new_shape;
+  nvinfer1::Dims new_dims;
   // Ignore first (batch) dimension because TRT abstracts batch away
-  new_shape.nbDims = weights.count() - 1;
+  new_dims.nbDims = weights.count() - 1;
   for (int i = 1; i < weights.count(); i++) {
-    new_shape.d[i-1] = weights_ptr[i];
+    new_dims.d[i-1] = weights_ptr[i];
   }
 
   // Check that batch dimension doesn't change
-  const nvinfer1::Dims input_shape = inputs.at(0).shape();
+  const nvinfer1::Dims input_dims = inputs.at(0).shape();
   if (weights_ptr[0] == -1) {
-    // Product of input shape should equal product of new_shape
-    if (GetShapeSize(input_shape) != GetShapeSize(new_shape)) {
+    // Product of input shape should equal product of new_dims
+    if (TrtDimsNumElements(input_dims) != TrtDimsNumElements(new_dims)) {
       return tensorflow::errors::Unimplemented(
         "Reshape on the batch dimension is not supported.");
     }
@@ -1569,7 +1383,7 @@ tensorflow::Status ConvertReshape(
 
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(
-      ctx.PrepareTensorForShape(inputs.at(0), new_shape, &output_tensor));
+      ctx.PrepareTensorForShape(inputs.at(0), new_dims, &output_tensor));
   outputs->push_back(TRT_TensorOrWeights(
       const_cast<nvinfer1::ITensor*>(output_tensor)));
   return tensorflow::Status::OK();
@@ -1701,7 +1515,7 @@ tensorflow::Status ConvertScale(Converter& ctx,
 
   const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
   TRT_ShapedWeights weights = inputs.at(1).weights();
-  if (ctx.isFP16()) {
+  if (ctx.IsFP16()) {
     weights = ConvertFP32ToFP16(ctx, inputs.at(1).weights());
   }
 
@@ -1783,140 +1597,153 @@ tensorflow::Status ConvertScale(Converter& ctx,
   return tensorflow::Status::OK();
 }
 
+Status GetTensorDimsWithProtoShape(const Tensor& tensor,
+                                   int tensor_proto_array_len,
+                                   nvinfer1::Dims* dims) {
+  if (tensor.dims() > 0) {
+    *dims = GetTrtDimsForTensor(tensor);
+    if (TrtDimsNumElements(*dims) != tensor_proto_array_len &&
+        tensor_proto_array_len != 1) {
+      return errors::InvalidArgument(
+          "Broadcast on weights only supports kCHANNEL and kUNIFORM");
+    }
+  } else {
+    dims->nbDims = 1;
+    // No dimension provided. Flatten it.
+    dims->d[0] = tensor_proto_array_len;
+    dims->type[0] = nvinfer1::DimensionType::kSPATIAL;
+    for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; ++i) {
+      dims->d[i] = 0;
+    }
+  }
+  return Status::OK();
+}
+
+template <typename CType>
+Status TfTensorToTrtWeights(const DataType dtype,
+                            const Tensor& tensor,
+                            const CType* tensor_proto_array,
+                            int tensor_proto_array_len, Converter* converter,
+                            TRT_ShapedWeights* weights) {
+  nvinfer1::Dims weight_dims;
+  TF_RETURN_IF_ERROR(GetTensorDimsWithProtoShape(
+      tensor, tensor_proto_array_len, &weight_dims));
+  const int64_t size_bytes =
+      tensorflow::DataTypeSize(dtype) * TrtDimsNumElements(weight_dims);
+  converter->weight_store()->store_.push_back(std::vector<uint8_t>(size_bytes));
+  void* dst =
+      static_cast<void*>(&(converter->weight_store()->store_.back()[0]));
+  if (tensor_proto_array_len == 1) {
+    std::fill_n((CType*)dst, TrtDimsNumElements(weight_dims),
+                *tensor_proto_array);
+  } else {
+    memcpy(dst, tensor_proto_array, size_bytes);
+  }
+  *weights = TRT_ShapedWeights(dtype, dst, weight_dims);
+  return Status::OK();
+}
+
 tensorflow::Status ConvertConst(Converter& ctx,
                                 const tensorflow::NodeDef& node_def,
                                 const std::vector<TRT_TensorOrWeights>& inputs,
                                 std::vector<TRT_TensorOrWeights>* outputs) {
-  const auto& weights_tensor = node_def.attr().at("value").tensor();
+  if (!inputs.empty()) {
+    return errors::InvalidArgument(
+        "Constant node is expected to have empty input list: ",
+        node_def.name());
+  }
 
-  // Get trt type & shape
   TFAttrs attrs(node_def);
-  const tensorflow::DataType dtype = attrs.get<tensorflow::DataType>("dtype");
+  const DataType dtype = attrs.get<tensorflow::DataType>("dtype");
+  // We always convert the integer constants to kINT32, since TRT kINT8 is for
+  // quantized inference.
+  const DataType converted_dtype =
+      (dtype == DT_INT16 || dtype == DT_INT8 || dtype == DT_UINT8 ? DT_INT32
+                                                                  : dtype);
+  nvinfer1::DataType trt_dtype;
+  TF_RETURN_IF_ERROR(ConvertDType(converted_dtype, &trt_dtype));
 
   // Create shaped weights as output
+  const auto& tensor_proto = node_def.attr().at("value").tensor();
   tensorflow::Tensor tensor;
-  if (!tensor.FromProto(weights_tensor)) {
+  if (!tensor.FromProto(tensor_proto)) {
     return tensorflow::errors::Internal("Cannot parse weight tensor proto: ",
                                         node_def.name());
   }
 
-  TRT_ShapedWeights weights(dtype);
-  // TODO(aaroey): we should choose the array using dtype and shape.
-  if (!weights_tensor.float_val().empty()) {
-    VLOG(2) << "SCALAR!!!" << node_def.name();
-    nvinfer1::Dims scalar_shape;
-    if (tensor.dims() > 0) {
-      VLOG(2) << "dimensions: " << tensor.dims();
-      VLOG(2) << "size: " << weights_tensor.float_val_size();
-      scalar_shape = GetTensorShape(tensor);
-      VLOG(2) << "details: ";
-      for (int i = 0; i < scalar_shape.nbDims; i++)
-        VLOG(2) << scalar_shape.d[i];
-      if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size() &&
-          weights_tensor.float_val_size() != 1) {
-        LOG(ERROR) << "Broadcast on weights only supports kCHANNEL and"
-                   << " kUNIFORM, at: " << node_def.name();
-        string err_str("Broadcast method is not supported for '");
-        StrAppend(&err_str, node_def.name(), "' of type ", node_def.op());
-        return tensorflow::errors::InvalidArgument(err_str);
-      }
-    } else {
-      VLOG(2) << "Dimensions: " << tensor.dims();
-      scalar_shape.nbDims = 1;
-      // no dimension provided. flatten it
-      scalar_shape.d[0] = weights_tensor.float_val_size();
-      scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
-      for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) {
-        scalar_shape.d[i] = 0;
+  TRT_ShapedWeights weights(converted_dtype);
+  if (tensor.NumElements() == 0) {
+    // Do nothing.
+  } else if (!tensor_proto.float_val().empty()) {
+    TF_RETURN_IF_ERROR(TfTensorToTrtWeights(
+        converted_dtype, tensor, tensor_proto.float_val().begin(),
+        tensor_proto.float_val_size(), &ctx, &weights));
+  } else if (!tensor_proto.int_val().empty()) {
+    TF_RETURN_IF_ERROR(TfTensorToTrtWeights(
+        converted_dtype, tensor, tensor_proto.int_val().begin(),
+        tensor_proto.int_val_size(), &ctx, &weights));
+  } else if (!tensor_proto.half_val().empty()) {
+    // TODO(aaroey): implement fp16 conversion.
+    return errors::Unimplemented("fp16 constant is not supported yet.");
+  } else if (!tensor_proto.tensor_content().empty()) {
+    // TODO(aaroey): fp16 will remain in half format and is not converted to
+    // fp32, but the converter currently uses all float weights as fp32. Fix
+    // this.
+    const auto& content = tensor_proto.tensor_content();
+    if (content.size() > 0) {
+      const int dtype_size = tensorflow::DataTypeSize(dtype);
+      if (content.size() % dtype_size != 0) {
+        return errors::FailedPrecondition(
+            "Tensor content size ", content.size(), " is not a multiple of ",
+            dtype_size);
       }
-    }
-    // TODO(aaroey): use GetShapeSize().
-    size_t len_data = tensorflow::DataTypeSize(dtype);
-    for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i];
-    ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
-    void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
-    if (weights_tensor.float_val_size() == 1) {
-      std::fill_n((float*)dst, GetShapeSize(scalar_shape),
-                  *weights_tensor.float_val().begin());
-    } else {
-      // TODO(aaroey): get rid of this copy as RepeatedField is always
-      // contiguous make a local copy first to flatten doesn't have to be
-      // contiguous
-      std::vector<float> tensor_data(weights_tensor.float_val().begin(),
-                                     weights_tensor.float_val().end());
-      memcpy(dst, tensor_data.data(), len_data);  // store into weight store
-    }
-    VLOG(2) << "create shape details: ";
-    for (int i = 0; i < scalar_shape.nbDims; i++) VLOG(2) << scalar_shape.d[i];
-    weights = TRT_ShapedWeights(dtype, dst, scalar_shape);
-  } else if (!weights_tensor.int_val().empty()) {
-    // TODO(aaroey): this is very similar to the above code for float, merge
-    // them.
-    VLOG(2) << "int!!!" << node_def.name();
-    nvinfer1::Dims scalar_shape;
-    if (tensor.dims() > 0) {
-      VLOG(2) << "dimensions: " << tensor.dims();
-      scalar_shape = GetTensorShape(tensor);
-      if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size() &&
-          weights_tensor.int_val_size() != 1) {
-        LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and"
-                     << " kUNIFORM, at: " << node_def.name();
-        string err_str("Broadcast method is not supported for '");
-        StrAppend(&err_str, node_def.name(), "' of type ", node_def.op());
-        return tensorflow::errors::InvalidArgument(err_str);
+      nvinfer1::Dims weights_dim;
+      TF_RETURN_IF_ERROR(GetTensorDimsWithProtoShape(
+          tensor, content.size() / dtype_size, &weights_dim));
+      const int64_t size_bytes = TrtDimsNumElements(weights_dim) * dtype_size;
+      if (content.size() != size_bytes) {
+        return errors::FailedPrecondition(
+            "Tensor size and TensorProto content size mismatch: ",
+            size_bytes, " vs ", content.size());
+      } else if (tensor.NumElements() != content.size() / dtype_size) {
+        return errors::FailedPrecondition(
+            "Tensor elements count and TensorProto content size mismatch: ",
+            tensor.NumElements(), " vs ", content.size() / dtype_size);
       }
-    } else {
-      VLOG(2) << "dimensions: " << tensor.dims();
-      scalar_shape.nbDims = 1;
-      // no dimension provided. flatten it
-      scalar_shape.d[0] = weights_tensor.int_val_size();
-      scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL;
-      for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) {
-        scalar_shape.d[i] = 0;
-        scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL;
+      weights = ctx.GetTempWeights(converted_dtype, weights_dim);
+      if (dtype_size == tensorflow::DataTypeSize(converted_dtype)) {
+        port::CopyToArray(
+            content,
+            static_cast<char*>(const_cast<void*>(weights.GetValues())));
+      } else {
+        // Copy out the weights as original data type.
+        std::vector<uint8_t> temp_weights(content.size());
+        port::CopyToArray(content,
+                          reinterpret_cast<char*>(temp_weights.data()));
+        int32* dst =
+            static_cast<int32*>(const_cast<void*>(weights.GetValues()));
+        // Copy to the weight store as converted data type.
+        if (dtype == DT_INT16) {
+          int16* data = reinterpret_cast<int16*>(temp_weights.data());
+          std::copy(data, data + tensor.NumElements(), dst);
+        } else if (dtype == DT_INT8) {
+          int8* data = reinterpret_cast<int8*>(temp_weights.data());
+          std::copy(data, data + tensor.NumElements(), dst);
+        } else if (dtype == DT_UINT8) {
+          uint8* data = reinterpret_cast<uint8*>(temp_weights.data());
+          std::copy(data, data + tensor.NumElements(), dst);
+        } else {
+          return errors::FailedPrecondition(
+              "Unexpected data type: ", DataTypeString(dtype), " at: ",
+              node_def.name());
+        }
       }
     }
-    // we should not have converted
-    size_t len_data = tensorflow::DataTypeSize(dtype);
-    for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i];
-    size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32);
-    len_data = std::max(len_data, len_tensor);
-    ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data));
-    void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0]));
-    if (weights_tensor.int_val_size() == 1) {
-      std::fill_n((int*)dst, GetShapeSize(scalar_shape),
-                  *weights_tensor.int_val().begin());
-    } else {
-      // TODO(aaroey): get rid of this copy as RepeatedField is always
-      // contiguous make a local copy first to flatten doesn't have to be
-      // contiguous
-      std::vector<int32> tensor_data(weights_tensor.int_val().begin(),
-                                     weights_tensor.int_val().end());
-      memcpy(dst, tensor_data.data(), len_tensor);  // store into weight store
-    }
-    weights = TRT_ShapedWeights(dtype, dst, scalar_shape);
-  } else if (!weights_tensor.tensor_content().empty()) {
-    // obsolete method.
-    // After optimization path, we do not see weights in this format.
-    // TODO(aaroey): why?
-    // fp16 conversion technically should be needed here.
-    VLOG(2) << "TENSOR!!!" << node_def.name();
-    const auto& content = weights_tensor.tensor_content();
-
-    weights = ctx.get_temp_weights(dtype, GetTensorShape(tensor));
-    if (content.size() > 0) {
-      const int dtype_size = tensorflow::DataTypeSize(dtype);
-      CHECK_EQ(0, content.size() % dtype_size)
-          << "Tensor content size (" << content.size()
-          << ") is not a multiple of " << dtype_size;
-      port::CopyToArray(
-          content, static_cast<char*>(const_cast<void*>(weights.GetValues())));
-    }
   } else {
-    return tensorflow::errors::Unimplemented("Not supported constant type, at ",
-                                             node_def.name());
+    return errors::Unimplemented("Not supported constant type, at ",
+                                 node_def.name());
   }
-  // Pass the output
+  // Pass the output.
   outputs->push_back(TRT_TensorOrWeights(weights));
   return tensorflow::Status::OK();
 }
@@ -2462,9 +2289,9 @@ tensorflow::Status ConvertFusedBatchNorm(
   //  We could technically have two weights with different shape.
   //  that requires two addScale op, arguably less performant
   TRT_ShapedWeights combined_scale_weights =
-      ctx.get_temp_weights_like(*ptr_shape_weights);
+      ctx.GetTempWeightsLike(*ptr_shape_weights);
   TRT_ShapedWeights combined_offset_weights =
-      ctx.get_temp_weights_like(*ptr_shape_weights);
+      ctx.GetTempWeightsLike(*ptr_shape_weights);
 
   const Eigen::half* cast_vals_array[4];
   const float* vals_array[4];
@@ -2546,7 +2373,7 @@ tensorflow::Status ConvertMatMulHelper(
     weights = weights_raw;
   } else {
     TRT_ShapedWeights weights_ck = weights_raw;
-    weights = ctx.get_temp_weights_like(weights_ck);
+    weights = ctx.GetTempWeightsLike(weights_ck);
     ReorderCKtoKC(weights_raw, &weights);
   }
   TRT_ShapedWeights biases(weights.type_);
@@ -2750,7 +2577,7 @@ tensorflow::Status ConvertTopK(Converter& ctx,
 }
 #endif
 
-void Converter::register_op_converters() {
+void Converter::RegisterOpConverters() {
   // vgg_16 slim implementation
   op_registry_["Conv2D"] = ConvertConv2D;
   op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
@@ -2807,8 +2634,6 @@ void Converter::register_op_converters() {
   plugin_converter_ = ConvertPlugin;
 }
 
-}  // namespace
-
 tensorflow::Status ConvertGraphDefToEngine(
     const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size,
     size_t max_workspace_size_bytes,
@@ -2842,11 +2667,10 @@ tensorflow::Status ConvertGraphDefToEngine(
     return tensorflow::errors::Internal(
         "Failed to create TensorRT network object");
   }
-  auto ws = std::unique_ptr<TRTWeightStore>(new TRTWeightStore());
 
   // Build the network
   VLOG(1) << "Starting engine conversion ";
-  Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE,
+  Converter converter(trt_network.get(), precision_mode == FP16MODE,
                       max_batch_size);
   std::vector<std::pair<string, string>> output_tensors;
   // Graph nodes are already topologically sorted during construction
@@ -2891,10 +2715,7 @@ tensorflow::Status ConvertGraphDefToEngine(
       }
       VLOG(2) << "Adding engine input tensor " << node_name << " with shape "
               << DebugString(input_dim);
-      if (!converter.insert_input_tensor(node_name, input_tensor)) {
-        return tensorflow::errors::AlreadyExists(
-            "Output tensor already exists for op: " + node_name);
-      }
+      TF_RETURN_IF_ERROR(converter.AddInputTensor(node_name, input_tensor));
     } else if (tensorflow::str_util::StartsWith(node_name, kOutputPHName) &&
                (node_def.op() == "Identity")) {
       int32 slot_number = -1;
@@ -2910,11 +2731,11 @@ tensorflow::Status ConvertGraphDefToEngine(
     } else {
       VLOG(2) << "Converting node: " << node_def.name() << " , "
               << node_def.op();
-      TF_RETURN_IF_ERROR(converter.convert_node(node_def));
+      TF_RETURN_IF_ERROR(converter.ConvertNode(node_def));
     }
   }
   for (const auto& output : output_tensors) {
-    auto tensor_or_weights = converter.get_tensor(output.first);
+    auto tensor_or_weights = converter.GetTensorOrWeights(output.first);
     if (!tensor_or_weights.is_tensor()) {
       return tensorflow::errors::InvalidArgument(
           "Output node '" + output.first + "' is weights not tensor");
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index 9274027e63..32f61fdfe5 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
 #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
 
+#include <list>
 #include <set>
 #include <string>
 #include <unordered_map>
@@ -26,6 +27,7 @@ limitations under the License.
 #include "tensorflow/contrib/tensorrt/log/trt_logger.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
+#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -33,6 +35,7 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
+#include "tensorrt/include/NvInfer.h"
 
 namespace tensorflow {
 namespace tensorrt {
@@ -170,6 +173,162 @@ class OutputEdgeValidator {
   bool operator()(const tensorflow::Edge* out_edge) const;
 };
 
+////////////////////////////////////////////////////////////////////////////////
+// Classes/functions below are exposed for testing purposes only.
+////////////////////////////////////////////////////////////////////////////////
+
+string DebugString(const nvinfer1::Dims& dims);
+string DebugString(const nvinfer1::ITensor& tensor);
+int64_t TrtDimsNumElements(const nvinfer1::Dims& dims);
+
+// Class to convert TF weight to TRT weight.
+class TRT_ShapedWeights {
+ public:
+  TRT_ShapedWeights(tensorflow::DataType type, const void* values,
+                    nvinfer1::Dims shape);
+
+  explicit TRT_ShapedWeights(tensorflow::DataType type);
+
+  // TODO(aaroey): use rvalue reference.
+  TRT_ShapedWeights(const TRT_ShapedWeights& rhs);
+
+  nvinfer1::Weights GetWeightsForTRT() const;
+
+  const void* GetValues() const { return values_; }
+
+  int64_t count() const;
+
+  size_t size_bytes() const;
+
+  // Default converter
+  operator nvinfer1::Weights() const { return GetWeightsForTRT(); }
+
+  string DebugString() const;
+
+  // TODO(aaroey): make these private.
+  nvinfer1::Dims shape_;  // Note: shape.type[] is not used.
+  tensorflow::DataType type_;
+
+ private:
+  // TODO(aaroey): this should not be const as it's always from TRTWeightStore.
+  const void* values_;
+
+  friend bool operator==(const TRT_ShapedWeights& lhs,
+                         const TRT_ShapedWeights& rhs);
+};
+
+class TRT_TensorOrWeights {
+ public:
+  explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor);
+
+  explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights);
+
+  // TODO(aaroey): use rvalue reference.
+  TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs);
+
+  bool is_tensor() const { return is_tensor_; }
+  bool is_weights() const { return !is_tensor_; }
+
+  nvinfer1::ITensor* tensor() {
+    CHECK(is_tensor());
+    return tensor_;
+  }
+
+  const nvinfer1::ITensor* tensor() const {
+    CHECK(is_tensor());
+    return tensor_;
+  }
+
+  TRT_ShapedWeights& weights() {
+    CHECK(is_weights());
+    return weights_;
+  }
+
+  const TRT_ShapedWeights& weights() const {
+    CHECK(is_weights());
+    return weights_;
+  }
+
+  // TODO(aaroey): rename to dims() to be consistent.
+  nvinfer1::Dims shape() const;
+
+  string DebugString() const;
+
+ private:
+  nvinfer1::ITensor* tensor_;
+  TRT_ShapedWeights weights_;
+  const bool is_tensor_;
+};
+
+// Class to convert TF nodes to TRT network.
+class Converter {
+ public:
+  Converter(nvinfer1::INetworkDefinition* trt_network, bool fp16,
+            int max_batch_size);
+
+  virtual ~Converter() {}
+
+  nvinfer1::INetworkDefinition* network() { return trt_network_; }
+
+  TRTWeightStore* weight_store() { return &weight_store_; }
+
+  bool IsFP16() const { return fp16_; }
+
+  int GetMaxBatchSize() const { return max_batch_size_; }
+
+  TRT_ShapedWeights GetTempWeights(tensorflow::DataType type,
+                                   const nvinfer1::Dims& dims);
+
+  TRT_ShapedWeights GetTempWeightsLike(const TRT_ShapedWeights& weights) {
+    return GetTempWeights(weights.type_, weights.shape_);
+  }
+
+  Status ConvertNode(const tensorflow::NodeDef& node_def);
+
+  TRT_TensorOrWeights GetTensorOrWeights(const string& name);
+
+  Status AddInputTensor(const string& name, nvinfer1::ITensor* tensor);
+
+  Status TransposeTensor(nvinfer1::ITensor* input_tensor,
+                         const std::vector<int>& order_with_batch_dim,
+                         const nvinfer1::ITensor** output_tensor);
+
+  // Converts input into tensor with shape specified by dims.
+  Status PrepareTensorForShape(const TRT_TensorOrWeights& input,
+                               const nvinfer1::Dims& dims,
+                               const nvinfer1::ITensor** tensor);
+
+  // Expose for testing purposes.
+  Status GetInputs(const tensorflow::NodeDef& node_def,
+                   std::vector<TRT_TensorOrWeights>* inputs) const;
+
+ private:
+  using OpConverter = std::function<tensorflow::Status(
+      Converter&, const tensorflow::NodeDef&,
+      const std::vector<TRT_TensorOrWeights>&,
+      std::vector<TRT_TensorOrWeights>*)>;
+
+  void RegisterOpConverters();
+
+  std::unordered_map<string, OpConverter> op_registry_;
+
+  std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_;
+
+  OpConverter plugin_converter_;
+
+  nvinfer1::INetworkDefinition* trt_network_;
+
+  // TODO(aaroey): inline the definition of TRTWeightStore here, and add APIs to
+  // operate the stored weights instead of operating it directly.
+  TRTWeightStore weight_store_;
+
+  bool fp16_;
+
+  int max_batch_size_;
+
+  friend class ConverterForTest;
+};
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
new file mode 100644
index 0000000000..5c9ddaec49
--- /dev/null
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -0,0 +1,646 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#include "cuda/include/cuda.h"
+#include "cuda/include/cuda_runtime_api.h"
+#include "tensorrt/include/NvInfer.h"
+
+namespace tensorflow {
+namespace tensorrt {
+namespace convert {
+
+using ::testing::ElementsAre;
+
+void ExpectStatus(Status status, error::Code code, const char* substr) {
+  EXPECT_EQ(code, status.code()) << status;
+  EXPECT_THAT(status.error_message(), ::testing::HasSubstr(substr)) << status;
+}
+
+nvinfer1::Dims GetTestDims(const std::vector<int>& d) {
+  nvinfer1::Dims dims;
+  dims.nbDims = d.size();
+  for (int i = 0; i < d.size(); ++i) {
+    dims.d[i] = d[i];
+  }
+  return dims;
+}
+
+// Fake ITensor implementation for testing purposes.
+class FakeITensor : public nvinfer1::ITensor {
+ public:
+  FakeITensor() {}
+
+  FakeITensor(const nvinfer1::Dims& dims, const string& name = "")
+      : name_(name), dims_(dims) {}
+
+  FakeITensor(const string& name, const std::vector<int>& dims)
+      : name_(name), dims_(GetTestDims(dims)) {}
+
+  void SetDims(const std::vector<int>& dims) {
+    setDimensions(GetTestDims(dims));
+  }
+
+  void setName(const char* name) override { name_ = name; }
+
+  const char* getName() const override { return name_.c_str(); }
+
+  void setDimensions(nvinfer1::Dims dimensions) override { dims_ = dimensions; }
+
+  nvinfer1::Dims getDimensions() const override { return dims_; }
+
+  void setType(nvinfer1::DataType type) override { type_ = type; }
+
+  nvinfer1::DataType getType() const override { return type_; }
+
+  bool isNetworkInput() const override { return false; }
+
+  bool isNetworkOutput() const override { return false; }
+
+  void setBroadcastAcrossBatch(bool broadcastAcrossBatch) override {}
+
+  bool getBroadcastAcrossBatch() const override { return false; }
+
+  nvinfer1::TensorLocation getLocation() const override { return location_; }
+
+  void setLocation(nvinfer1::TensorLocation location) override {
+    location_ = location;
+  }
+
+ private:
+  string name_;
+  nvinfer1::Dims dims_;
+  nvinfer1::DataType type_;
+  nvinfer1::TensorLocation location_;
+};
+
+bool Equals(const nvinfer1::Dims& lhs, const nvinfer1::Dims& rhs) {
+  if (lhs.nbDims != rhs.nbDims) return false;
+  for (int i = 0; i < lhs.nbDims; ++i) {
+    if (lhs.d[i] != rhs.d[i]) return false;
+    // We don't check the types in the tests.
+  }
+  return true;
+}
+
+bool operator==(const TRT_ShapedWeights& lhs, const TRT_ShapedWeights& rhs) {
+  return Equals(lhs.shape_, rhs.shape_) && lhs.type_ == rhs.type_ &&
+         lhs.values_ == rhs.values_;
+}
+
+TEST(TRT_ShapedWeights_Test, Basic) {
+  {
+    float raw_weights[10];
+    TRT_ShapedWeights weights(DT_FLOAT, raw_weights, GetTestDims({2, 5}));
+
+    nvinfer1::Weights trt_weights = weights.GetWeightsForTRT();
+    EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
+    EXPECT_EQ(static_cast<void*>(raw_weights), trt_weights.values);
+    EXPECT_EQ(10, trt_weights.count);
+
+    EXPECT_EQ(static_cast<void*>(raw_weights), weights.GetValues());
+    EXPECT_EQ(10, weights.count());
+    EXPECT_EQ(40, weights.size_bytes());
+  }
+  {
+    int32 raw_weights = 0;
+    TRT_ShapedWeights weights(DT_INT32, &raw_weights, GetTestDims({1, 1, 1}));
+
+    nvinfer1::Weights trt_weights = weights.GetWeightsForTRT();
+    EXPECT_EQ(nvinfer1::DataType::kINT32, trt_weights.type);
+    EXPECT_EQ(static_cast<void*>(&raw_weights), trt_weights.values);
+    EXPECT_EQ(1, trt_weights.count);
+
+    EXPECT_EQ(static_cast<void*>(&raw_weights), weights.GetValues());
+    EXPECT_EQ(1, weights.count());
+    EXPECT_EQ(4, weights.size_bytes());
+  }
+  {
+    TRT_ShapedWeights weights(DT_FLOAT);
+
+    nvinfer1::Weights trt_weights = weights.GetWeightsForTRT();
+    EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
+    EXPECT_EQ(nullptr, trt_weights.values);
+    EXPECT_EQ(0, trt_weights.count);
+
+    EXPECT_EQ(nullptr, weights.GetValues());
+    EXPECT_EQ(0, weights.count());
+    EXPECT_EQ(0, weights.size_bytes());
+  }
+}
+
+TEST(TRT_TensorOrWeights_Test, Basic) {
+  {
+    nvinfer1::Dims dims;
+    dims.nbDims = 1;
+    dims.d[0] = 1;
+    FakeITensor itensor(dims);
+
+    TRT_TensorOrWeights tw(&itensor);
+    EXPECT_EQ(true, tw.is_tensor());
+    EXPECT_EQ(false, tw.is_weights());
+    EXPECT_EQ(&itensor, tw.tensor());
+    EXPECT_TRUE(Equals(dims, tw.shape()))
+        << "- expected: " << DebugString(dims)
+        << "\n        vs\n-   actual: " << DebugString(tw.shape());
+  }
+  {
+    TRT_ShapedWeights weights(DT_FLOAT);
+    TRT_TensorOrWeights tw(weights);
+    EXPECT_EQ(false, tw.is_tensor());
+    EXPECT_EQ(true, tw.is_weights());
+    EXPECT_EQ(weights, tw.weights());
+
+    nvinfer1::Dims dims;
+    dims.nbDims = 0;
+    EXPECT_TRUE(Equals(dims, tw.shape()))
+        << "- expected: " << DebugString(dims)
+        << "\n        vs\n-   actual: " << DebugString(tw.shape());
+  }
+}
+
+class ConverterForTest : public Converter {
+ public:
+  ConverterForTest()
+      : Converter(nullptr, /*fp16=*/false, /*max_batch_size=*/1) {
+    QCHECK_EQ(0, cudaStreamCreate(&stream_));
+    Reset();
+  }
+
+  ~ConverterForTest() override { QCHECK_EQ(0, cudaStreamDestroy(stream_)); }
+
+  // Helper methods for testing purposes.
+
+  void AddOpConverter(const string& op_name, OpConverter op_converter) {
+    op_registry_[op_name] = op_converter;
+  }
+
+  void AddTensorOrWeights(const string& name, TRT_TensorOrWeights tw) {
+    ASSERT_TRUE(trt_tensors_.insert({name, tw}).second);
+  }
+
+  void Reset() {
+    // Clear the tensor map.
+    trt_tensors_.clear();
+    // Reset the INetworkDefinition.
+    engine_.reset(nullptr);
+    network_.reset(nullptr);
+    builder_.reset(nullptr);
+    builder_.reset(nvinfer1::createInferBuilder(logger_));
+    network_.reset(builder_->createNetwork());
+    trt_network_ = network_.get();
+  }
+
+  void BuildAndRun(const char* input_name, const std::vector<float>& input_data,
+                   const char* output_name, std::vector<float>* output_data) {
+    // Mark the output tensor as TRT engine output.
+    TRT_TensorOrWeights tensor = GetTensorOrWeights(output_name);
+    tensor.tensor()->setName(output_name);
+    network()->markOutput(*tensor.tensor());
+
+    // Build the TRT engine.
+    QCHECK_EQ(nullptr, engine_.get());
+    engine_.reset(builder_->buildCudaEngine(*network()));
+    CHECK_NOTNULL(engine_.get());
+
+    // Execute the TRT engine.
+    const int input_size = input_data.size() * sizeof(float);
+    const int output_size = output_data->size() * sizeof(float);
+    const int input_index = engine_->getBindingIndex(input_name);
+    const int output_index = engine_->getBindingIndex(output_name);
+
+    ASSERT_EQ(engine_->getNbBindings(), 2);
+    void* buffers[2];
+    ASSERT_EQ(0, cudaMalloc(&buffers[input_index], input_size));
+    ASSERT_EQ(0, cudaMalloc(&buffers[output_index], output_size));
+    ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index], input_data.data(),
+                                 input_size, cudaMemcpyHostToDevice, stream_));
+    TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
+        engine_->createExecutionContext());
+    execution_context->enqueue(1, buffers, stream_, nullptr);
+    ASSERT_EQ(0, cudaMemcpyAsync(output_data->data(), buffers[output_index],
+                                 output_size, cudaMemcpyDeviceToHost, stream_));
+    cudaStreamSynchronize(stream_);
+    ASSERT_EQ(0, cudaFree(buffers[input_index]));
+    ASSERT_EQ(0, cudaFree(buffers[output_index]));
+  }
+
+ private:
+  Logger logger_;
+  TrtUniquePtrType<nvinfer1::IBuilder> builder_;
+  TrtUniquePtrType<nvinfer1::INetworkDefinition> network_;
+  TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
+  cudaStream_t stream_;
+};
+
+class ConverterTest : public ::testing::Test {
+ protected:
+  nvinfer1::ITensor* AddTestTensor(const char* name,
+                                   const std::vector<int>& dims) {
+    nvinfer1::ITensor* tensor = converter_.network()->addInput(
+        name, nvinfer1::DataType::kFLOAT, GetTestDims(dims));
+    converter_.AddTensorOrWeights(name, TRT_TensorOrWeights{tensor});
+    return tensor;
+  }
+
+  template <typename CType>
+  TRT_ShapedWeights AddTestWeights(const char* name, const DataType dtype,
+                                   const std::vector<int>& dims,
+                                   const std::vector<CType>& values) {
+    const nvinfer1::Dims trt_dims = GetTestDims(dims);
+    const int64_t num_elements = TrtDimsNumElements(trt_dims);
+    QCHECK_EQ(num_elements, values.size())
+        << num_elements << " vs " << values.size();
+    TRT_ShapedWeights weights(dtype);
+    if (num_elements) {
+      const int64_t size_bytes = DataTypeSize(dtype) * num_elements;
+      QCHECK_EQ(size_bytes, sizeof(CType) * values.size())
+          << size_bytes << " vs " << sizeof(CType) * values.size();
+      converter_.weight_store()->store_.push_back(
+          std::vector<uint8_t>(size_bytes));
+      void* dst =
+          static_cast<void*>(converter_.weight_store()->store_.back().data());
+      memcpy(dst, values.data(), size_bytes);
+      weights = TRT_ShapedWeights(dtype, dst, trt_dims);
+    }
+    converter_.AddTensorOrWeights(name, TRT_TensorOrWeights{weights});
+    return weights;
+  }
+
+  NodeDef MakeNodeDef(const string& name, const string& op,
+                      const std::vector<string>& inputs) {
+    NodeDef node_def;
+    node_def.set_name(name);
+    node_def.set_op(op);
+    for (const string& input : inputs) {
+      node_def.add_input(input);
+    }
+    return node_def;
+  }
+
+  ConverterForTest converter_;
+};
+
+TEST_F(ConverterTest, GetTempWeights) {
+  TRT_ShapedWeights weights =
+      converter_.GetTempWeights(DT_FLOAT, GetTestDims({2, 3}));
+
+  nvinfer1::Weights trt_weights = weights.GetWeightsForTRT();
+  EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
+  EXPECT_NE(nullptr, trt_weights.values);
+  EXPECT_EQ(6, trt_weights.count);
+
+  EXPECT_NE(nullptr, weights.GetValues());
+  EXPECT_EQ(6, weights.count());
+  EXPECT_EQ(24, weights.size_bytes());
+
+  // TODO(aaroey): test the case where shape element count is 0.
+}
+
+TEST_F(ConverterTest, GetInputs) {
+  NodeDef node_def;
+  node_def.add_input("^control_input");
+  node_def.add_input("input");
+  node_def.add_input("input:0");
+  node_def.add_input("input:1");
+  node_def.add_input("weird_input:2:3:4:0");
+
+  FakeITensor input, input_1, input_2;
+  TF_EXPECT_OK(converter_.AddInputTensor("input", &input));
+  TF_EXPECT_OK(converter_.AddInputTensor("input:1", &input_1));
+  TF_EXPECT_OK(converter_.AddInputTensor("weird_input:2:3:4", &input_2));
+
+  std::vector<TRT_TensorOrWeights> inputs;
+  TF_EXPECT_OK(converter_.GetInputs(node_def, &inputs));
+  EXPECT_EQ(4, inputs.size());
+  EXPECT_EQ(&input, inputs[0].tensor());
+  EXPECT_EQ(&input, inputs[1].tensor());
+  EXPECT_EQ(&input_1, inputs[2].tensor());
+  EXPECT_EQ(&input_2, inputs[3].tensor());
+}
+
+TEST_F(ConverterTest, ConvertNode) {
+  FakeITensor output_tensors[2];
+  auto op_converter = [&output_tensors](
+                          Converter& ctx, const NodeDef& node_def,
+                          const std::vector<TRT_TensorOrWeights>& inputs,
+                          std::vector<TRT_TensorOrWeights>* outputs) -> Status {
+    nvinfer1::Dims dims = inputs[0].tensor()->getDimensions();
+    for (int i = 0; i < 2; ++i) {
+      dims.d[0] += 1;
+      output_tensors[i].setDimensions(dims);
+      outputs->push_back(TRT_TensorOrWeights(&output_tensors[i]));
+    }
+    return Status::OK();
+  };
+  converter_.AddOpConverter("MyOp", op_converter);
+
+  FakeITensor input_tensor("my_input", {12345});
+  TF_EXPECT_OK(converter_.AddInputTensor("my_input", &input_tensor));
+
+  NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
+  TF_EXPECT_OK(converter_.ConvertNode(node_def));
+
+  TRT_TensorOrWeights actual_output_1 = converter_.GetTensorOrWeights("my_op");
+  EXPECT_EQ(&output_tensors[0], actual_output_1.tensor());
+  EXPECT_EQ(12346, actual_output_1.tensor()->getDimensions().d[0]);
+
+  TRT_TensorOrWeights actual_output_2 =
+      converter_.GetTensorOrWeights("my_op:1");
+  EXPECT_EQ(&output_tensors[1], actual_output_2.tensor());
+  EXPECT_EQ(12347, actual_output_2.tensor()->getDimensions().d[0]);
+}
+
+TEST_F(ConverterTest, TransposeTensor) {
+  nvinfer1::ITensor* input_tensor = AddTestTensor("", {2, 3, 5});
+  const nvinfer1::ITensor* output_tensor = nullptr;
+
+  // Rank doesn't match.
+  ExpectStatus(
+      converter_.TransposeTensor(input_tensor, {0, 1}, &output_tensor),
+      error::INVALID_ARGUMENT,
+      "Rank of perm for transpose does not match with that of the input");
+
+  // Transpose at batch dimension.
+  ExpectStatus(
+      converter_.TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor),
+      error::UNIMPLEMENTED, "Transpose at batch dimension is not supported.");
+
+  // OK.
+  TF_EXPECT_OK(
+      converter_.TransposeTensor(input_tensor, {0, 3, 1, 2}, &output_tensor));
+  EXPECT_TRUE(Equals(GetTestDims({5, 2, 3}), output_tensor->getDimensions()))
+      << DebugString(*output_tensor);
+}
+
+TEST_F(ConverterTest, PrepareTensorForShape_Tensor) {
+  nvinfer1::ITensor* input_tensor = AddTestTensor("", {2, 3, 5});
+  TRT_TensorOrWeights tw(input_tensor);
+  const nvinfer1::ITensor* output_tensor = nullptr;
+
+  // Shape size doesn't match.
+  ExpectStatus(converter_.PrepareTensorForShape(tw, GetTestDims({2, 3, 6}),
+                                                &output_tensor),
+               error::INVALID_ARGUMENT, "Reshape shapes are not compatible.");
+
+  // TODO(aaroey): we should check the case where uninferred dimensions are not
+  // an exact divisor of input dim ensions, e.g. for dims {-1, 7}.
+
+  // Infer shape, ok.
+  TF_EXPECT_OK(converter_.PrepareTensorForShape(tw, GetTestDims({-1, 2}),
+                                                &output_tensor));
+  EXPECT_TRUE(Equals(GetTestDims({15, 2}), output_tensor->getDimensions()))
+      << DebugString(*output_tensor);
+
+  // Regular shape.
+  TF_EXPECT_OK(converter_.PrepareTensorForShape(tw, GetTestDims({10, 3}),
+                                                &output_tensor));
+  EXPECT_TRUE(Equals(GetTestDims({10, 3}), output_tensor->getDimensions()))
+      << DebugString(*output_tensor);
+}
+
+#if NV_TENSORRT_MAJOR > 3
+TEST_F(ConverterTest, PrepareTensorForShape_Weights) {
+  TRT_ShapedWeights weights =
+      converter_.GetTempWeights(DT_FLOAT, GetTestDims({2, 3, 5}));
+  TRT_TensorOrWeights tw(weights);
+  const nvinfer1::ITensor* output_tensor = nullptr;
+  TF_EXPECT_OK(converter_.PrepareTensorForShape(tw, GetTestDims({10, 3}),
+                                                &output_tensor));
+  EXPECT_TRUE(Equals(GetTestDims({10, 3}), output_tensor->getDimensions()))
+      << DebugString(*output_tensor);
+}
+#endif
+
+template <DataType dtype, typename InputCType, typename OutputCType>
+void TestConvertConst(ConverterForTest* converter) {
+  NodeDef node_def;
+  node_def.set_name("my_const");
+  node_def.set_op("Const");
+
+  auto reset_and_test = [&node_def, converter](
+                            const Tensor& tensor, const bool as_tensor_content,
+                            const std::vector<int>& expected_dims,
+                            const std::vector<OutputCType>& expected_value) {
+    converter->Reset();
+
+    auto& attr = *node_def.mutable_attr();
+    if (as_tensor_content) {
+      tensor.AsProtoTensorContent(attr["value"].mutable_tensor());
+    } else {
+      tensor.AsProtoField(attr["value"].mutable_tensor());
+    }
+    TF_EXPECT_OK(converter->ConvertNode(node_def));
+    TRT_TensorOrWeights output = converter->GetTensorOrWeights("my_const");
+    EXPECT_TRUE(Equals(GetTestDims(expected_dims), output.weights().shape_))
+        << output.DebugString();
+    ASSERT_EQ(expected_value.size(), output.weights().count())
+        << output.DebugString();
+    const OutputCType* actual_values =
+        static_cast<const OutputCType*>(output.weights().GetValues());
+    for (int i = 0; i < expected_value.size(); ++i) {
+      EXPECT_EQ(expected_value[i], actual_values[i]);
+    }
+  };
+
+  auto& attr = *node_def.mutable_attr();
+  attr["dtype"].set_type(dtype);
+  {
+    // By default empty tensor will pick DT_FLOAT as data type and we fix it
+    // here.
+    attr["value"].mutable_tensor()->set_dtype(dtype);
+    Tensor t;  // Empty tensor.
+    reset_and_test(t, false, {}, {});
+  }
+  {
+    Tensor t = ::tensorflow::test::AsScalar<InputCType>(12);
+    reset_and_test(t, false, {1}, {12});
+    reset_and_test(t, true, {1}, {12});
+  }
+  {
+    Tensor t = ::tensorflow::test::AsTensor<InputCType>({1, 2});
+    reset_and_test(t, false, {2}, {1, 2});
+    reset_and_test(t, true, {2}, {1, 2});
+  }
+  {
+    Tensor t = ::tensorflow::test::AsTensor<InputCType>({1, 2, 3, 4, 5, 6},
+                                                        TensorShape({2, 3}));
+    reset_and_test(t, false, {2, 3}, {1, 2, 3, 4, 5, 6});
+    reset_and_test(t, true, {2, 3}, {1, 2, 3, 4, 5, 6});
+  }
+}
+
+TEST_F(ConverterTest, ConvertConst) {
+  {
+    converter_.Reset();
+    NodeDef node_def = MakeNodeDef("my_const", "Const", {"input"});
+    AddTestTensor("input", {1});
+    ExpectStatus(
+        converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+        "Constant node is expected to have empty input list: my_const");
+  }
+  {
+    converter_.Reset();
+    NodeDef node_def = MakeNodeDef("my_const", "Const", {});
+    (*node_def.mutable_attr())["dtype"].set_type(DT_DOUBLE);
+    ExpectStatus(converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+                 "Unsupported data type");
+  }
+
+  TestConvertConst<DT_FLOAT, float, float>(&converter_);
+  TestConvertConst<DT_INT8, int8, int32>(&converter_);
+#if NV_TENSORRT_MAJOR > 3
+  TestConvertConst<DT_INT32, int32, int32>(&converter_);
+#endif
+}
+
+TEST_F(ConverterTest, ConvertTranspose) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_transpose", "Transpose", {});
+    ExpectStatus(converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+                 "Input expects tensor and weights, at my_transpose");
+  }
+  NodeDef node_def =
+      MakeNodeDef("my_transpose", "Transpose", {"input", "weights"});
+  {
+    // Permutation is a tensor, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestTensor("weights", {3});
+    ExpectStatus(converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+                 "Input expects tensor and weights, at my_transpose");
+  }
+  {
+    // Transpose at batch dimension, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {4}, {1, 0, 2, 3});
+    ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
+                 "Transpose at batch dimension is not supported");
+  }
+  {
+    // Permutation rank doesn't match, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {3}, {0, 1, 2});
+    ExpectStatus(
+        converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+        "Rank of perm for transpose does not match with that of the input.");
+  }
+  {
+    // Ok.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {4}, {0, 3, 1, 2});
+    TF_EXPECT_OK(converter_.ConvertNode(node_def));
+    TRT_TensorOrWeights output = converter_.GetTensorOrWeights("my_transpose");
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(
+        Equals(GetTestDims({3, 1, 2}), output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    converter_.BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_transpose",
+                           &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 4, 2, 5, 3, 6));
+  }
+}
+
+TEST_F(ConverterTest, ConvertReshape) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_reshape", "Reshape", {});
+    ExpectStatus(converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+                 "Input expects weights for shape, at my_reshape");
+  }
+  NodeDef node_def = MakeNodeDef("my_reshape", "Reshape", {"input", "weights"});
+  {
+    // Shape is a tensor, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestTensor("weights", {3});
+    ExpectStatus(converter_.ConvertNode(node_def), error::INVALID_ARGUMENT,
+                 "Input expects weights for shape, at my_reshape");
+  }
+  {
+    // Reshape to scalar, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {}, {});
+    ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
+                 "Reshape to shape=[] is not supported, at my_reshape");
+  }
+  {
+    // Reshape at batch dimension, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {4}, {-1, 1, 1, 2});
+    ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
+                 "Reshape on the batch dimension is not supported");
+  }
+  {
+    // Reshape at batch dimension, should fail.
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {4}, {3, 1, 1, 2});
+    ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
+                 "Reshape on the batch dimension is not supported");
+  }
+  // Reshape on non batch dimensions, ok.
+  for (int batch_dim : {-1, 1}) {
+    converter_.Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", DT_INT32, {4}, {batch_dim, 1, 3, 2});
+    TF_EXPECT_OK(converter_.ConvertNode(node_def));
+    TRT_TensorOrWeights output = converter_.GetTensorOrWeights("my_reshape");
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(
+        Equals(GetTestDims({1, 3, 2}), output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    converter_.BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_reshape",
+                           &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+}
+
+}  // namespace convert
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
-- 
GitLab


From 8682b67fbdad692a8cad705a795caf1a62788941 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 16:07:06 -0700
Subject: [PATCH 1006/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 217224726

---
 tensorflow/go/op/wrappers.go | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 2031e60773..662f6f227a 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -6313,6 +6313,17 @@ func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
+// ParseExampleDatasetAttr is an optional argument to ParseExampleDataset.
+type ParseExampleDatasetAttr func(optionalAttr)
+
+// ParseExampleDatasetSloppy sets the optional sloppy attribute to value.
+// If not specified, defaults to false
+func ParseExampleDatasetSloppy(value bool) ParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["sloppy"] = value
+	}
+}
+
 // Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
 //
 // Arguments:
@@ -6338,11 +6349,14 @@ func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
 // given feature along this dimension.
 //	output_types: The type list for the return values.
 //	output_shapes: The list of shapes being produced.
-func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ParseExampleDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
 		Type: "ParseExampleDataset",
 		Input: []tf.Input{
-- 
GitLab


From e3fe13ba06cd5c3f1d243ff04ddcb2c9c5ea150a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 16:19:37 -0700
Subject: [PATCH 1007/1085] [XLA] More module proto verification added.

PiperOrigin-RevId: 217226561
---
 tensorflow/compiler/xla/layout_util.cc        | 13 ++-
 tensorflow/compiler/xla/layout_util.h         |  7 +-
 tensorflow/compiler/xla/layout_util_test.cc   | 87 +++++++++++++++++++
 .../compiler/xla/service/hlo_schedule.cc      |  1 -
 .../xla/service/hlo_sharding_metadata.cc      | 10 +--
 .../compiler/xla/service/hlo_verifier.cc      | 21 ++++-
 tensorflow/compiler/xla/shape_util.cc         |  9 +-
 7 files changed, 130 insertions(+), 18 deletions(-)

diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 19667b7ed9..66af644cf7 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -162,18 +162,23 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   LayoutUtil::SetToDefaultLayout(program_shape->mutable_result());
 }
 
-/* static */ Status LayoutUtil::ValidateLayoutInShape(const Shape& shape) {
+/* static */ Status LayoutUtil::ValidateLayoutInShape(
+    const Shape& shape, bool allow_missing_layouts) {
   if (ShapeUtil::IsTuple(shape)) {
     // Tuple shape.
     if (shape.has_layout()) {
       return InvalidArgument("tuple should not have a layout field");
     }
     for (auto& element_shape : shape.tuple_shapes()) {
-      TF_RETURN_IF_ERROR(ValidateLayoutInShape(element_shape));
+      TF_RETURN_IF_ERROR(
+          ValidateLayoutInShape(element_shape, allow_missing_layouts));
     }
     return Status::OK();
   } else if (ShapeUtil::IsArray(shape)) {
     if (!shape.has_layout()) {
+      if (allow_missing_layouts) {
+        return Status::OK();
+      }
       return InvalidArgument("shape %s does not have a layout",
                              ShapeUtil::HumanString(shape));
     }
@@ -207,8 +212,8 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 
   if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) {
     return InvalidArgument(
-        "Layout does not have a valid format: layout {%s}, shape {%s}",
-        layout.ShortDebugString(), shape.ShortDebugString());
+        "Layout has an invalid format (%d) in layout {%s}, shape {%s}",
+        layout.format(), layout.ShortDebugString(), shape.ShortDebugString());
   }
 
   if (layout.format() == DENSE) {
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index af032b1cae..97806d7e33 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -68,8 +68,11 @@ class LayoutUtil {
   // default.
   static void SetToDefaultLayout(ProgramShape* program_shape);
 
-  // Validates that the layout within the given shape is correct.
-  static Status ValidateLayoutInShape(const Shape& shape);
+  // Validates that the layout within the given shape is correct. The check
+  // is performed for all subshapes as well. If missing layouts are allowed
+  // the check does not fail on array shapes without layouts.
+  static Status ValidateLayoutInShape(const Shape& shape,
+                                      bool allow_missing_layouts = false);
 
   // Validates that the provided layout satisfies invariants for the given
   // shape.
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index f25dae6ff4..a50d53eaeb 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -352,5 +352,92 @@ TEST_F(LayoutUtilTest, StreamOut) {
   EXPECT_EQ(oss.str(), "{0,1,2}");
 }
 
+TEST_F(LayoutUtilTest, ValidateLayout_ValidArrayLayout) {
+  Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {0, 1});
+  auto status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/false);
+  EXPECT_TRUE(status.ok());
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_TRUE(status.ok());
+}
+
+TEST_F(LayoutUtilTest, ValidateLayout_InvalidArrayLayout) {
+  Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
+  *shape.mutable_layout() = LayoutUtil::MakeLayout({0, 1, 2});
+  auto status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/false);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("layout minor_to_major field "
+                                   "contains 3 elements, but shape is rank 2"));
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("layout minor_to_major field "
+                                   "contains 3 elements, but shape is rank 2"));
+}
+
+TEST_F(LayoutUtilTest, ValidateLayout_MissingArrayLayout) {
+  Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
+  LayoutUtil::ClearLayout(&shape);
+  auto status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/false);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("shape f32[2,3] does not have a layout"));
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_TRUE(status.ok());
+}
+
+TEST_F(LayoutUtilTest, ValidateLayout_TupleWithLayout) {
+  Shape shape = ShapeUtil::MakeTupleShape({});
+  *shape.mutable_layout() = LayoutUtil::MakeLayout({0});
+  auto status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/false);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("tuple should not have a layout field"));
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("tuple should not have a layout field"));
+}
+
+TEST_F(LayoutUtilTest, ValidateLayout_TupleSubshapesWithMissingLayouts) {
+  Shape sub_1_1_1 = ShapeUtil::MakeShape(F32, {1, 2});
+  Shape sub_1_1 = ShapeUtil::MakeTupleShape({sub_1_1_1});
+  Shape sub_1_2 = ShapeUtil::MakeShape(F32, {1, 2});
+  LayoutUtil::ClearLayout(&sub_1_2);
+  Shape sub_1 = ShapeUtil::MakeTupleShape({sub_1_1, sub_1_2});
+  Shape sub_2_1 = ShapeUtil::MakeShape(F32, {9});
+  LayoutUtil::ClearLayout(&sub_2_1);
+  Shape sub_2 = ShapeUtil::MakeTupleShape({sub_2_1});
+  Shape shape = ShapeUtil::MakeTupleShape({sub_1, sub_2});
+
+  auto status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/false);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("shape f32[1,2] does not have a layout"));
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_TRUE(status.ok());
+
+  // Add invalid layout on one of sub-shapes.
+  *shape.mutable_tuple_shapes(1)->mutable_tuple_shapes(0)->mutable_layout() =
+      LayoutUtil::MakeLayout({0, 2, 3});
+
+  status =
+      LayoutUtil::ValidateLayoutInShape(shape, /*allow_missing_layouts=*/true);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::HasSubstr("layout minor_to_major field "
+                                   "contains 3 elements, but shape is rank 1"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_schedule.cc b/tensorflow/compiler/xla/service/hlo_schedule.cc
index 9972eb2077..0778ff5217 100644
--- a/tensorflow/compiler/xla/service/hlo_schedule.cc
+++ b/tensorflow/compiler/xla/service/hlo_schedule.cc
@@ -235,7 +235,6 @@ Status HloSchedule::Update() {
 
 Status HloSchedule::Verify() const {
   VLOG(2) << "VerifySchedule()";
-  XLA_VLOG_LINES(3, module_->ToString());
   XLA_VLOG_LINES(2, ToString());
 
   // Verify schedule contains exactly the same set of non-fusion computations as
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
index e3f4a9852a..88329c8997 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
@@ -169,14 +169,14 @@ Status ApplyDomainSingleSharding(const DomainMetadata::Domain& domain,
 // If user is a tuple instruction, return the tuple subsharding corresponding to
 // the operand matching the instruction argument, because that is the
 // subsharding corresponding to instruction.
-ShapeTree<HloSharding> GetShardingTreeFromUser(
+StatusOr<ShapeTree<HloSharding>> GetShardingTreeFromUser(
     const HloInstruction& instruction, const HloInstruction& user) {
   if (user.opcode() == HloOpcode::kTuple) {
     return user.sharding()
         .GetSubSharding(user.shape(), {user.operand_index(&instruction)})
-        .GetAsShapeTree(instruction.shape());
+        .AsShapeTree(instruction.shape());
   }
-  return user.sharding().GetAsShapeTree(user.shape());
+  return user.sharding().AsShapeTree(user.shape());
 }
 
 // Assign rhs to lhs. If rhs is unassigned (assigned to kUnassignedDevice)
@@ -264,8 +264,8 @@ StatusOr<bool> ApplyShardingFromUsers(HloInstruction* instruction,
       continue;
     }
     AssignmentKind sub_assigned = AssignmentKind::kUnassigned;
-    ShapeTree<HloSharding> user_sharding_tree =
-        GetShardingTreeFromUser(*instruction, *user);
+    TF_ASSIGN_OR_RETURN(ShapeTree<HloSharding> user_sharding_tree,
+                        GetShardingTreeFromUser(*instruction, *user));
     if (ShapeUtil::IsTuple(instruction->shape())) {
       // For tuple-shaped instructions collect individual tuple subshardings
       // from the uses, and then combine them into the tuple sharding.
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index ba95cef21d..a1cb60a049 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -388,7 +388,15 @@ Status ShapeVerifier::HandleParameter(HloInstruction* hlo) {
 }
 
 Status ShapeVerifier::HandleFusion(HloInstruction* fusion) {
-  for (HloInstruction* fused_param : fusion->fused_parameters()) {
+  auto& fused_parameters = fusion->fused_parameters();
+  if (fused_parameters.size() != fusion->operand_count()) {
+    return InternalError(
+        "Fused parameter count (%d) does not match the number of operands (%d)"
+        " passed to the fusion instruction in: %s.",
+        fused_parameters.size(), fusion->operand_count(),
+        fusion->ToString().c_str());
+  }
+  for (HloInstruction* fused_param : fused_parameters) {
     int64 param_no = fused_param->parameter_number();
     if (!ShapesSame(fused_param->shape(), fusion->operand(param_no)->shape())) {
       return InternalError(
@@ -891,6 +899,9 @@ Status CheckEntryComputationLayout(const HloModule& module) {
   const auto& layout = module.entry_computation_layout();
   const ShapeLayout& result_layout = layout.result_layout();
 
+  TF_RETURN_IF_ERROR(
+      ShapeUtil::ValidateShapeWithOptionalLayout(result_layout.shape()));
+
   if (LayoutUtil::IsSparseArray(result_layout.shape())) {
     return Unimplemented(
         "Sparse arrays are not yet fully supported in program result shape: %s",
@@ -915,6 +926,8 @@ Status CheckEntryComputationLayout(const HloModule& module) {
 
   for (int i = 0; i < computation->num_parameters(); ++i) {
     const HloInstruction* parameter = computation->parameter_instruction(i);
+    TF_RETURN_IF_ERROR(
+        ShapeUtil::ValidateShapeWithOptionalLayout(layout.parameter_shape(i)));
     if (LayoutUtil::IsSparseArray(layout.parameter_shape(i))) {
       return Unimplemented(
           "Sparse arrays are not yet fully supported "
@@ -1319,6 +1332,12 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
 
 StatusOr<bool> HloVerifier::Run(HloModule* module) {
   TF_RET_CHECK(!module->name().empty());
+
+  if (module->entry_computation()->IsFusionComputation()) {
+    return InvalidArgument(
+        "Module entry computation cannot be a fusion computation");
+  }
+
   TF_RETURN_IF_ERROR(VerifyHloStructure(module));
   TF_RETURN_IF_ERROR(VerifySendsAndRecvs(*module));
 
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 6c4b1485d2..f55508f8e6 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -958,11 +958,10 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
 
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayout(
     const Shape& shape) {
-  if (LayoutUtil::HasLayout(shape)) {
-    // Since a layout is present, upgrade to the full set of invariant checks.
-    return ValidateShape(shape);
-  }
-  return ValidateShapeWithOptionalLayoutInternal(shape);
+  TF_RETURN_IF_ERROR(ValidateShapeWithOptionalLayoutInternal(shape));
+
+  return LayoutUtil::ValidateLayoutInShape(shape,
+                                           /*allow_missing_layouts=*/true);
 }
 
 /* static */ Status ShapeUtil::ValidateShape(const Shape& shape) {
-- 
GitLab


From 9b50f7ddbfda157408db378d4bc4137e201f2e6f Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Mon, 15 Oct 2018 16:29:32 -0700
Subject: [PATCH 1008/1085] Check shape of item tensor in TensorListSetItem
 kernel.

PiperOrigin-RevId: 217228117
---
 tensorflow/core/kernels/list_kernels.cc         |  9 ++++++++-
 tensorflow/python/kernel_tests/list_ops_test.py | 13 +++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 2088c13586..95dfb991fd 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -449,9 +449,16 @@ class TensorListSetItem : public OpKernel {
                 errors::InvalidArgument("Trying to modify element ", index,
                                         " in a list with ", l->tensors.size(),
                                         " elements."));
+    const Tensor& value = c->input(2);
+    OP_REQUIRES(c, l->element_shape.IsCompatibleWith(value.shape()),
+                errors::InvalidArgument(
+                    "Tried to set a tensor with incompatible shape at a "
+                    "list index. Item element shape: ",
+                    value.shape().DebugString(),
+                    " list shape: ", l->element_shape.DebugString()));
     TensorList output;
     output = *l;
-    output.tensors[index] = c->input(2);
+    output.tensors[index] = value;
     Tensor* result;
     AllocatorAttributes attr;
     attr.set_on_host(true);
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index 8e51d904ab..e42e213266 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -473,6 +473,19 @@ class ListOpsTest(test_util.TensorFlowTestCase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(list_ops.tensor_list_set_item(l, 20, 3.0))
 
+  def testSetItemWithMismatchedShapeFails(self):
+    with self.cached_session() as sess:
+      ph = array_ops.placeholder(dtypes.float32)
+      c = constant_op.constant([1.0, 2.0])
+      l = list_ops.tensor_list_from_tensor(c, element_shape=scalar_shape())
+      # Set a placeholder with unknown shape to satisfy the shape inference
+      # at graph building time.
+      l = list_ops.tensor_list_set_item(l, 0, ph)
+      l_0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "incompatible shape"):
+        sess.run(l_0, {ph: [3.0]})
+
   @test_util.run_in_graph_and_eager_modes
   def testResourceVariableScatterGather(self):
     c = constant_op.constant([1.0, 2.0], dtype=dtypes.float32)
-- 
GitLab


From 88962cb69cedf3220050b0a49fa7cb6ad038c69c Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Mon, 15 Oct 2018 16:39:54 -0700
Subject: [PATCH 1009/1085] Lower FakeParamOp to appropriately sized zeros
 output.

FakeParamOp is used in CondV2 to signify a value that is defined along one
branch but not the other. These values should only be used along the
corresponding branch. For the lowering to XLA create a zero output of the
appropriate shape and type for the FakeParam (the value should never be used,
so any value could be set, zero is convenient).

This could be extended in future to instead be a dead value so that additional
verification could be done post lowering.

PiperOrigin-RevId: 217229911
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |  1 +
 .../compiler/tf2xla/kernels/fake_param_op.cc  | 51 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 tensorflow/compiler/tf2xla/kernels/fake_param_op.cc

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 224e5ea123..9ee4178f5c 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -40,6 +40,7 @@ tf_kernel_library(
         "dynamic_stitch_op.cc",
         "elu_op.cc",
         "extract_image_patches_op.cc",
+        "fake_param_op.cc",
         "fake_quantize_ops.cc",
         "fft_ops.cc",
         "fill_op.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/fake_param_op.cc b/tensorflow/compiler/tf2xla/kernels/fake_param_op.cc
new file mode 100644
index 0000000000..ec3463bd58
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/fake_param_op.cc
@@ -0,0 +1,51 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/xla_compiler.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+
+namespace tensorflow {
+
+// This OpKernel implements the FakeParam Op for XLA JIT devices. Create zeros
+// with the appropriate shape for FakeParam op.
+class XlaFakeParamOp : public XlaOpKernel {
+ public:
+  explicit XlaFakeParamOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    DataType dtype;
+    TensorShape tensor_shape;
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &tensor_shape));
+    OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype, tensor_shape, &shape_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaBuilder* b = ctx->builder();
+    ctx->SetOutput(0, xla::Zeros(b, shape_));
+  }
+
+ private:
+  xla::Shape shape_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaFakeParamOp);
+};
+
+REGISTER_XLA_OP(Name("FakeParam"), XlaFakeParamOp);
+
+}  // namespace tensorflow
-- 
GitLab


From 321c0822a4aa732e18bedec60e0cb899dcf84445 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 15 Oct 2018 16:56:52 -0700
Subject: [PATCH 1010/1085] Moves some examples from tfe.defun to tfe.function.

PiperOrigin-RevId: 217232503
---
 tensorflow/contrib/eager/python/BUILD            |  1 +
 .../python/examples/densenet/densenet_test.py    |  1 +
 .../contrib/eager/python/examples/l2hmc/main.py  |  4 ++--
 .../python/examples/resnet50/resnet50_test.py    |  7 ++++---
 .../eager/python/examples/revnet/revnet_test.py  |  5 ++++-
 tensorflow/contrib/eager/python/tfe.py           |  6 ++++--
 .../core/kernels/partitioned_function_ops.cc     |  6 ++++++
 tensorflow/python/eager/BUILD                    |  1 +
 tensorflow/python/eager/def_function.py          | 16 ++++++++++++----
 tensorflow/python/eager/function.py              |  9 +++++----
 10 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index b35ac3abe9..77052a75a7 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -28,6 +28,7 @@ py_library(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/eager:backprop",
         "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:execution_callbacks",
         "//tensorflow/python/eager:function",
     ],
diff --git a/tensorflow/contrib/eager/python/examples/densenet/densenet_test.py b/tensorflow/contrib/eager/python/examples/densenet/densenet_test.py
index e5058bfd94..a9fb0035d2 100644
--- a/tensorflow/contrib/eager/python/examples/densenet/densenet_test.py
+++ b/tensorflow/contrib/eager/python/examples/densenet/densenet_test.py
@@ -228,6 +228,7 @@ class DensenetBenchmark(tf.test.Benchmark):
                                 weight_decay=1e-4, dropout_rate=0,
                                 pool_initial=True, include_top=True)
       if defun:
+        # TODO(apassos) enable tfe.function here
         model.call = tfe.defun(model.call)
       batch_size = 64
       num_burn = 5
diff --git a/tensorflow/contrib/eager/python/examples/l2hmc/main.py b/tensorflow/contrib/eager/python/examples/l2hmc/main.py
index 45e1f98429..98fcb2ba10 100644
--- a/tensorflow/contrib/eager/python/examples/l2hmc/main.py
+++ b/tensorflow/contrib/eager/python/examples/l2hmc/main.py
@@ -71,7 +71,7 @@ def main(_):
     # Training
     if FLAGS.use_defun:
       # Use `tfe.deun` to boost performance when there are lots of small ops
-      loss_fn = tfe.defun(l2hmc.compute_loss)
+      loss_fn = tfe.function(l2hmc.compute_loss)
     else:
       loss_fn = l2hmc.compute_loss
 
@@ -104,7 +104,7 @@ def main(_):
   # Evaluation
   if FLAGS.use_defun:
     # Use tfe.deun to boost performance when there are lots of small ops
-    apply_transition = tfe.defun(dynamics.apply_transition)
+    apply_transition = tfe.function(dynamics.apply_transition)
   else:
     apply_transition = dynamics.apply_transition
 
diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
index d265169b5e..e406aee29d 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
@@ -77,7 +77,7 @@ class ResNet50Test(tf.test.TestCase):
     device, data_format = device_and_data_format()
     model = resnet50.ResNet50(data_format)
     if defun:
-      model.call = tfe.defun(model.call)
+      model.call = tfe.function(model.call)
     with tf.device(device), tfe.execution_mode(execution_mode):
       images, _ = random_batch(2, data_format)
       output = model(images, training=False)
@@ -221,7 +221,7 @@ class ResNet50Benchmarks(tf.test.Benchmark):
       device, data_format = device_and_format
       model = resnet50.ResNet50(data_format)
       if defun:
-        model.call = tfe.defun(model.call)
+        model.call = tfe.function(model.call)
       batch_size = 64
       num_burn = 5
       num_iters = 30
@@ -266,7 +266,8 @@ class ResNet50Benchmarks(tf.test.Benchmark):
         optimizer = tf.train.GradientDescentOptimizer(0.1)
         apply_grads = apply_gradients
         if defun:
-          model.call = tfe.defun(model.call)
+          model.call = tfe.function(model.call)
+          # TODO(apassos) enable tf.function here
           apply_grads = tfe.defun(apply_gradients)
 
         num_burn = 3
diff --git a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
index 4f4cc3af6f..971aa44f30 100644
--- a/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
+++ b/tensorflow/contrib/eager/python/examples/revnet/revnet_test.py
@@ -127,6 +127,8 @@ class RevNetTest(tf.test.TestCase):
 
   def test_compute_gradients_defun(self):
     """Test `compute_gradients` function with defun."""
+    # TODO(apassos): make cond support returning None to let this happen with
+    # tf.function.
     compute_gradients = tfe.defun(self.model.compute_gradients)
     _, saved_hidden = self.model(self.x)
     grads, _ = compute_gradients(saved_hidden=saved_hidden, labels=self.t)
@@ -235,6 +237,7 @@ class RevNetBenchmark(tf.test.Benchmark):
       device, data_format = device_and_format
       model = revnet.RevNet(config=config)
       if defun:
+        # TODO(apassos): reenable after cond lets you return None
         model.call = tfe.defun(model.call)
       batch_size = 64
       num_burn = 5
@@ -282,7 +285,7 @@ class RevNetBenchmark(tf.test.Benchmark):
         model = revnet.RevNet(config=config)
         optimizer = tf.train.GradientDescentOptimizer(0.1)
         if defun:
-          model.call = tfe.defun(model.call)
+          model.call = tfe.function(model.call)
 
         num_burn = 3
         num_iters = 10
diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index f5b8d95e4f..33c988fd90 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -25,6 +25,7 @@ To use, at program startup, call `tf.enable_eager_execution()`.
 
 @@py_func
 @@defun
+@@function
 @@make_template
 @@implicit_gradients
 @@implicit_value_and_gradients
@@ -101,7 +102,7 @@ from tensorflow.contrib.eager.python.saver import get_optimizer_variables
 from tensorflow.contrib.eager.python.saver import restore_variables_on_create
 from tensorflow.contrib.eager.python.saver import Saver
 from tensorflow.python.eager import backprop
-from tensorflow.python.eager import function
+from tensorflow.python.eager import function as _function_lib
 from tensorflow.python.eager.context import DEVICE_PLACEMENT_EXPLICIT
 from tensorflow.python.eager.context import DEVICE_PLACEMENT_WARN
 from tensorflow.python.eager.context import DEVICE_PLACEMENT_SILENT
@@ -115,6 +116,7 @@ from tensorflow.python.eager.context import SYNC
 from tensorflow.python.eager.context import ASYNC
 from tensorflow.python.eager.context import num_gpus
 from tensorflow.python.eager.context import set_server_def
+from tensorflow.python.eager.def_function import function
 from tensorflow.python.eager.execution_callbacks import add_execution_callback
 from tensorflow.python.eager.execution_callbacks import clear_execution_callbacks
 from tensorflow.python.eager.execution_callbacks import inf_callback
@@ -138,7 +140,7 @@ from tensorflow.python.training.checkpointable.util import Checkpoint
 from tensorflow.python.util.all_util import remove_undocumented
 
 py_func = script_ops.eager_py_func
-defun = function.defun
+defun = _function_lib.defun
 make_template = template.make_template_internal
 implicit_gradients = backprop.implicit_grad
 implicit_value_and_gradients = backprop.implicit_val_and_grad
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index 9efd1deba0..0ec14f7a2a 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -242,6 +242,12 @@ class PartitionedCallOp : public AsyncOpKernel {
         int index = attr_value->i();
         TF_RETURN_IF_ERROR(node->attrs().Find("T", &attr_value));
         DataType dtype = attr_value->type();
+        if (dtype != args[index].dtype()) {
+          return errors::InvalidArgument("For argument ", index, " expected ",
+                                         DataTypeString(dtype), " tensor, got ",
+                                         DataTypeString(args[index].dtype()),
+                                         " instead.");
+        }
         if (dtype == DT_RESOURCE) {
           const ResourceHandle& handle = args[index].flat<ResourceHandle>()(0);
           node->set_assigned_device_name(handle.device());
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index a30737fbec..52ea495305 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -393,6 +393,7 @@ py_library(
     name = "def_function",
     srcs = ["def_function.py"],
     srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
     deps = [
         ":context",
         ":function",
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index b23891d394..63f8e698a8 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -26,6 +26,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import function as function_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.training.checkpointable import base as checkpointable
@@ -225,7 +226,7 @@ class PolymorphicFunction(object):
     def variable_capturing_scope(unused_next_creator, **kwds):
       """Creates UnliftedInitializerVariables and saves references to them."""
       v = UnliftedInitializerVariable(**kwds)
-      self._created_variables.append(v)
+      self._created_variables.append(weakref.ref(v))
       return v
 
     self._stateful_fn = _defun_with_scope(
@@ -268,9 +269,16 @@ class PolymorphicFunction(object):
     def fn_with_cond(*inner_args, **inner_kwds):
       """Conditionally runs initialization if it's needed."""
       condition = True
-      for variable in self._created_variables:
-        condition = condition and resource_variable_ops.var_is_initialized_op(
-            variable.handle)
+      for wr in self._created_variables:
+        variable = wr()
+        if variable is None:
+          raise ValueError(
+              "Variable created in a tf.function garbage-collected. Code needs"
+              " to keep python references to variables created in a"
+              " tf.function.")
+        condition = math_ops.logical_and(
+            condition, resource_variable_ops.var_is_initialized_op(
+                variable.handle))
       # We want to call stateless_fn if possible because it avoids recomputing
       # potentially expensive initializers.
       return control_flow_ops.cond(
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 083c91a26b..e8d5416245 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1009,10 +1009,11 @@ def func_graph_from_py_func(name,
     func_graph.variables = variables
 
   # Register any other functions defined in the graph.
-  if context.executing_eagerly():
-    for f in func_graph._functions.values():  # pylint: disable=protected-access
-      # TODO(ashankar): What about the gradient registry?
-      _register(f._c_func.func)  # pylint: disable=protected-access
+  with ops.init_scope():
+    if context.executing_eagerly():
+      for f in func_graph._functions.values():  # pylint: disable=protected-access
+        # TODO(ashankar): What about the gradient registry?
+        _register(f._c_func.func)  # pylint: disable=protected-access
 
   return func_graph
 
-- 
GitLab


From 6194d396864c29b06ef041b92938afae40723848 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 15 Oct 2018 17:12:18 -0700
Subject: [PATCH 1011/1085] Introduce control_flow_util_v2.py

This new module includes:
* FuncGraph subclasses for different kinds of control flow. This can
  be used to determine if the current graph is building control flow.

* in_defun() method to determine if the containing context should
  follow defun or v1 graph semantics.

* Shared functionality between cond_v2 and while_v2.

This functionality will be used in future changes. I made a new file
rather than reusing control_flow_util.py due to import cycles. If/when
we get rid of the old control flow, these files can be consolidated.

PiperOrigin-RevId: 217235279
---
 tensorflow/python/BUILD                       | 14 +++-
 tensorflow/python/kernel_tests/BUILD          | 15 ++++
 .../kernel_tests/control_flow_util_v2_test.py | 66 +++++++++++++++++
 tensorflow/python/ops/cond_v2.py              | 33 +++------
 tensorflow/python/ops/control_flow_util_v2.py | 74 +++++++++++++++++++
 tensorflow/python/ops/while_v2.py             | 23 +++---
 6 files changed, 192 insertions(+), 33 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/control_flow_util_v2_test.py
 create mode 100644 tensorflow/python/ops/control_flow_util_v2.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 5ae4cd8a1a..ba1d9951ed 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2024,6 +2024,17 @@ py_library(
     ],
 )
 
+py_library(
+    name = "control_flow_util_v2",
+    srcs = ["ops/control_flow_util_v2.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "framework_ops",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
 py_library(
     name = "cond_v2",
     srcs = [
@@ -2033,6 +2044,7 @@ py_library(
     deps = [
         ":array_ops",
         ":c_api_util",
+        ":control_flow_util_v2",
         ":framework_ops",
         ":function",
         ":function_def_to_graph",
@@ -2055,10 +2067,10 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":array_ops",
-        ":cond_v2",
         ":constant_op",
         ":control_flow_ops",
         ":control_flow_util",
+        ":control_flow_util_v2",
         ":framework_ops",
         ":function_def_to_graph",
         ":functional_ops_gen",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index bbadc9907b..fa26690718 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1529,6 +1529,21 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "control_flow_util_v2_test",
+    size = "small",
+    srcs = ["control_flow_util_v2_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:cond_v2",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:control_flow_util_v2",
+        "//tensorflow/python:while_v2",
+    ],
+    tags = ["no_gpu"],  # TODO(b/117796385): runs out of memory
+)
+
 cuda_py_test(
     name = "conv1d_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/control_flow_util_v2_test.py b/tensorflow/python/kernel_tests/control_flow_util_v2_test.py
new file mode 100644
index 0000000000..d0374a7700
--- /dev/null
+++ b/tensorflow/python/kernel_tests/control_flow_util_v2_test.py
@@ -0,0 +1,66 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow.python.ops.control_flow_util_v2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util_v2
+from tensorflow.python.platform import test
+
+
+class ControlFlowUtilV2Test(test.TestCase):
+
+  def setUp(self):
+    self._enable_cond_v2_old = control_flow_ops.ENABLE_COND_V2
+    self._enable_while_v2_old = control_flow_ops.ENABLE_WHILE_V2
+    control_flow_ops.ENABLE_COND_V2 = True
+    control_flow_ops.ENABLE_WHILE_V2 = True
+
+  def tearDown(self):
+    control_flow_ops.ENABLE_COND_V2 = self._enable_cond_v2_old
+    control_flow_ops.ENABLE_WHILE_V2 = self._enable_while_v2_old
+
+  def _create_control_flow(self, expect_in_defun):
+    """Helper method for testInDefun."""
+    def body(i):
+      def branch():
+        self.assertEqual(control_flow_util_v2.in_defun(), expect_in_defun)
+        return i + 1
+      return control_flow_ops.cond(constant_op.constant(True),
+                                   branch, lambda: 0)
+    return control_flow_ops.while_loop(lambda i: i < 4, body,
+                                       [constant_op.constant(0)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testInDefun(self):
+    self._create_control_flow(False)
+
+    @function.defun
+    def defun():
+      self._create_control_flow(True)
+
+    defun()
+    self.assertFalse(control_flow_util_v2.in_defun())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/cond_v2.py b/tensorflow/python/ops/cond_v2.py
index cb3943ce7c..b3ae378316 100644
--- a/tensorflow/python/ops/cond_v2.py
+++ b/tensorflow/python/ops/cond_v2.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import function_def_to_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import control_flow_util_v2 as util
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
 
@@ -62,9 +63,11 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
       false_name = graph.unique_name(("%sfalse" % scope).replace("/", "_"))
 
     true_graph = function.func_graph_from_py_func(
-        true_name, true_fn, [], {})
+        true_name, true_fn, [], {},
+        func_graph=util.CondBranchFuncGraph(true_name))
     false_graph = function.func_graph_from_py_func(
-        false_name, false_fn, [], {})
+        false_name, false_fn, [], {},
+        func_graph=util.CondBranchFuncGraph(false_name))
     _check_same_outputs(true_graph, false_graph)
 
     # Add inputs to true_graph and false_graph to make them match. Note that
@@ -93,8 +96,8 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
     tensors = gen_functional_ops._if(  # pylint: disable=protected-access
         pred,
         cond_inputs, [t.dtype for t in true_graph.outputs],
-        _create_new_tf_function(true_graph),
-        _create_new_tf_function(false_graph),
+        util.create_new_tf_function(true_graph),
+        util.create_new_tf_function(false_graph),
         output_shapes=_get_output_shapes(true_graph.outputs,
                                          false_graph.outputs),
         name=scope)
@@ -175,8 +178,8 @@ def _IfGrad(op, *grads):  # pylint: disable=invalid-name
   tensors = gen_functional_ops._if(
       op.inputs[0],
       grad_inputs, [t.dtype for t in true_grad_graph.outputs],
-      _create_new_tf_function(true_grad_graph),
-      _create_new_tf_function(false_grad_graph),
+      util.create_new_tf_function(true_grad_graph),
+      util.create_new_tf_function(false_grad_graph),
       output_shapes=_get_output_shapes(true_grad_graph.outputs,
                                        false_grad_graph.outputs))
 
@@ -266,7 +269,8 @@ def _grad_fn(func_graph, grads):
 def _create_grad_func(func_graph, grads, name):
   """Returns the FuncGraph representation of _grad_fn."""
   return function.func_graph_from_py_func(
-      name, lambda: _grad_fn(func_graph, grads), [], {})
+      name, lambda: _grad_fn(func_graph, grads), [], {},
+      func_graph=util.CondBranchFuncGraph(name))
 
 
 def _resolve_grad_inputs(cond_graph, grad_graph):
@@ -314,21 +318,6 @@ def _resolve_grad_inputs(cond_graph, grad_graph):
   return new_inputs
 
 
-def _create_new_tf_function(func_graph):
-  """Converts func_graph to a TF_Function and adds it to the current graph.
-
-  Args:
-    func_graph: function.FuncGraph
-
-  Returns:
-    The name of the new TF_Function.
-  """
-  func = function._EagerDefinedFunction(
-      func_graph.name, func_graph, func_graph.inputs, func_graph.outputs, {})
-  func.add_to_graph(func_graph.outer_graph)
-  return func_graph.name
-
-
 def _get_intermediates(func_graph):
   """Returns all tensors in `func_graph` that aren't inputs or outputs."""
   intermediates = []
diff --git a/tensorflow/python/ops/control_flow_util_v2.py b/tensorflow/python/ops/control_flow_util_v2.py
new file mode 100644
index 0000000000..f500d53cc6
--- /dev/null
+++ b/tensorflow/python/ops/control_flow_util_v2.py
@@ -0,0 +1,74 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utilties for V2 control flow."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import context
+from tensorflow.python.eager import function
+from tensorflow.python.framework import ops
+
+
+class CondBranchFuncGraph(function.FuncGraph):
+  """FuncGraph for branches of tf.cond().
+
+  This is used to distinguish cond branches from other functions.
+  """
+  pass
+
+
+class WhileCondFuncGraph(function.FuncGraph):
+  """FuncGraph for the condition of tf.while_loop().
+
+  This is used to distinguish while conditions from other functions.
+  """
+  pass
+
+
+class WhileBodyFuncGraph(function.FuncGraph):
+  """FuncGraph for the body of tf.while_loop().
+
+  This is used to distinguish while bodies from other functions.
+  """
+  pass
+
+
+def in_defun():
+  """Returns if the current graph is, or is nested in, a defun."""
+  if context.executing_eagerly(): return False
+
+  graph = ops.get_default_graph()
+  while (isinstance(graph, CondBranchFuncGraph) or
+         isinstance(graph, WhileBodyFuncGraph)):
+    graph = graph.outer_graph
+  return isinstance(graph, function.FuncGraph)
+
+
+def create_new_tf_function(func_graph):
+  """Converts func_graph to a TF_Function and adds it to the current graph.
+
+  Args:
+    func_graph: function.FuncGraph
+
+  Returns:
+    The name of the new TF_Function.
+  """
+  func = function._EagerDefinedFunction(  # pylint: disable=protected-access
+      func_graph.name, func_graph, func_graph.inputs, func_graph.outputs, {})
+  func.add_to_graph(func_graph.outer_graph)
+  return func_graph.name
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index b805a46583..226dd57cf4 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -33,9 +33,9 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import control_flow_util_v2 as util
 from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import gradients_impl
@@ -87,7 +87,8 @@ def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
         for shape, t in zip(flattened_shapes, flattened_loop_vars)
     ]
     cond_graph = function.func_graph_from_py_func(
-        cond_name, wrapped_cond, flattened_loop_vars, {}, signature=signature)
+        cond_name, wrapped_cond, flattened_loop_vars, {}, signature=signature,
+        func_graph=util.WhileCondFuncGraph(cond_name))
 
     # Add external_captures of cond to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
@@ -126,7 +127,8 @@ def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
         for shape, t in zip(flattened_shapes, flattened_loop_vars)
     ]
     body_graph = function.func_graph_from_py_func(
-        body_name, wrapped_body, flattened_loop_vars, {}, signature=signature)
+        body_name, wrapped_body, flattened_loop_vars, {}, signature=signature,
+        func_graph=util.WhileBodyFuncGraph(body_name))
     # Add external captures of body to the list of loop vars.
     # Note that external tensors will be treated as loop invariants, i.e.,
     # the value of that tensor in each iteration is the same as it was at the
@@ -177,8 +179,8 @@ def while_loop(cond, body, loop_vars, shape_invariants=None, name=None):
                          flattened_loop_vars[1:1 + num_outputs])
     outputs = gen_functional_ops._while(
         flattened_loop_vars,
-        cond_v2._create_new_tf_function(cond_graph),
-        cond_v2._create_new_tf_function(body_graph),
+        util.create_new_tf_function(cond_graph),
+        util.create_new_tf_function(body_graph),
         output_shapes=[t.shape for t in body_graph.outputs],
         name=scope)
 
@@ -231,9 +233,10 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
     return counter < max_iters
 
   loop_vars = args + body_grad_graph.external_captures
+  grad_cond_name = _get_unique_name("%s_grad_cond" % op.name)
   cond_grad_graph = function.func_graph_from_py_func(
-      _get_unique_name("%s_grad_cond" % op.name),
-      grad_cond, loop_vars, {})
+      grad_cond_name, grad_cond, loop_vars, {},
+      func_graph=util.WhileCondFuncGraph(grad_cond_name))
 
   assert len(loop_vars) == len(body_grad_graph.inputs)
   assert len(loop_vars) == len(body_grad_graph.outputs)
@@ -241,8 +244,8 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
 
   outputs = gen_functional_ops._while(
       loop_vars,
-      cond_v2._create_new_tf_function(cond_grad_graph),
-      cond_v2._create_new_tf_function(body_grad_graph),
+      util.create_new_tf_function(cond_grad_graph),
+      util.create_new_tf_function(body_grad_graph),
       output_shapes=[t.shape for t in body_grad_graph.outputs],
       name=_get_unique_name("%s_grad" % op.name))
 
@@ -461,7 +464,7 @@ def _get_unique_name(name):
     return ops.get_default_graph().unique_name(name)
 
 
-class _WhileBodyGradFuncGraph(function.FuncGraph):
+class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
   """FuncGraph for the gradient function of the body of a While op.
 
   Contains the logic for capturing the tensors from the body of the forward
-- 
GitLab


From f943364eb8df6e56b889432172f0e74cda4ae4b2 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 15 Oct 2018 17:45:08 -0700
Subject: [PATCH 1012/1085] Updating constant endpoints.

PiperOrigin-RevId: 217239829
---
 tensorflow/python/framework/dtypes.py         |  55 +++++-----
 tensorflow/python/framework/versions.py       |  57 +++++++---
 tensorflow/python/saved_model/constants.py    |  69 ++++++++----
 .../python/saved_model/signature_constants.py |  84 +++++++++++----
 .../python/saved_model/tag_constants.py       |  26 +++--
 .../tools/api/generator/api_init_files.bzl    |   4 +-
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 .../api/golden/v1/tensorflow.dtypes.pbtxt     | 100 ++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
 .../golden/v1/tensorflow.saved_model.pbtxt    |  96 +++++++++++++++++
 .../api/golden/v1/tensorflow.sysconfig.pbtxt  |   8 ++
 .../api/golden/v1/tensorflow.version.pbtxt    |  27 +++++
 .../api/golden/v2/tensorflow.dtypes.pbtxt     | 100 ++++++++++++++++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |  36 +------
 .../v2/tensorflow.saved_model.constants.pbtxt |  39 -------
 .../golden/v2/tensorflow.saved_model.pbtxt    |  96 +++++++++++++++--
 ...flow.saved_model.signature_constants.pbtxt |  47 --------
 ...tensorflow.saved_model.tag_constants.pbtxt |  19 ----
 .../api/golden/v2/tensorflow.sysconfig.pbtxt  |   8 ++
 .../api/golden/v2/tensorflow.version.pbtxt    |  27 +++++
 20 files changed, 669 insertions(+), 234 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.version.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.constants.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_constants.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.saved_model.tag_constants.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.version.pbtxt

diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index 64d3b42d89..e36643b338 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -322,57 +322,59 @@ dtype_range = {
 
 # Define standard wrappers for the types_pb2.DataType enum.
 resource = DType(types_pb2.DT_RESOURCE)
-tf_export("resource").export_constant(__name__, "resource")
+tf_export("dtypes.resource", "resource").export_constant(__name__, "resource")
 variant = DType(types_pb2.DT_VARIANT)
-tf_export("variant").export_constant(__name__, "variant")
+tf_export("dtypes.variant", "variant").export_constant(__name__, "variant")
 float16 = DType(types_pb2.DT_HALF)
-tf_export("float16").export_constant(__name__, "float16")
+tf_export("dtypes.float16", "float16").export_constant(__name__, "float16")
 half = float16
-tf_export("half").export_constant(__name__, "half")
+tf_export("dtypes.half", "half").export_constant(__name__, "half")
 float32 = DType(types_pb2.DT_FLOAT)
-tf_export("float32").export_constant(__name__, "float32")
+tf_export("dtypes.float32", "float32").export_constant(__name__, "float32")
 float64 = DType(types_pb2.DT_DOUBLE)
-tf_export("float64").export_constant(__name__, "float64")
+tf_export("dtypes.float64", "float64").export_constant(__name__, "float64")
 double = float64
-tf_export("double").export_constant(__name__, "double")
+tf_export("dtypes.double", "double").export_constant(__name__, "double")
 int32 = DType(types_pb2.DT_INT32)
-tf_export("int32").export_constant(__name__, "int32")
+tf_export("dtypes.int32", "int32").export_constant(__name__, "int32")
 uint8 = DType(types_pb2.DT_UINT8)
-tf_export("uint8").export_constant(__name__, "uint8")
+tf_export("dtypes.uint8", "uint8").export_constant(__name__, "uint8")
 uint16 = DType(types_pb2.DT_UINT16)
-tf_export("uint16").export_constant(__name__, "uint16")
+tf_export("dtypes.uint16", "uint16").export_constant(__name__, "uint16")
 uint32 = DType(types_pb2.DT_UINT32)
-tf_export("uint32").export_constant(__name__, "uint32")
+tf_export("dtypes.uint32", "uint32").export_constant(__name__, "uint32")
 uint64 = DType(types_pb2.DT_UINT64)
-tf_export("uint64").export_constant(__name__, "uint64")
+tf_export("dtypes.uint64", "uint64").export_constant(__name__, "uint64")
 int16 = DType(types_pb2.DT_INT16)
-tf_export("int16").export_constant(__name__, "int16")
+tf_export("dtypes.uint16", "int16").export_constant(__name__, "int16")
 int8 = DType(types_pb2.DT_INT8)
-tf_export("int8").export_constant(__name__, "int8")
+tf_export("dtypes.int8", "int8").export_constant(__name__, "int8")
 string = DType(types_pb2.DT_STRING)
-tf_export("string").export_constant(__name__, "string")
+tf_export("dtypes.string", "string").export_constant(__name__, "string")
 complex64 = DType(types_pb2.DT_COMPLEX64)
-tf_export("complex64").export_constant(__name__, "complex64")
+tf_export("dtypes.complex64", "complex64").export_constant(
+    __name__, "complex64")
 complex128 = DType(types_pb2.DT_COMPLEX128)
-tf_export("complex128").export_constant(__name__, "complex128")
+tf_export("dtypes.complex128", "complex128").export_constant(
+    __name__, "complex128")
 int64 = DType(types_pb2.DT_INT64)
-tf_export("int64").export_constant(__name__, "int64")
+tf_export("dtypes.int64", "int64").export_constant(__name__, "int64")
 bool = DType(types_pb2.DT_BOOL)  # pylint: disable=redefined-builtin
-tf_export("bool").export_constant(__name__, "bool")
+tf_export("dtypes.bool", "bool").export_constant(__name__, "bool")
 qint8 = DType(types_pb2.DT_QINT8)
-tf_export("qint8").export_constant(__name__, "qint8")
+tf_export("dtypes.qint8", "qint8").export_constant(__name__, "qint8")
 quint8 = DType(types_pb2.DT_QUINT8)
-tf_export("quint8").export_constant(__name__, "quint8")
+tf_export("dtypes.quint8", "quint8").export_constant(__name__, "quint8")
 qint16 = DType(types_pb2.DT_QINT16)
-tf_export("qint16").export_constant(__name__, "qint16")
+tf_export("dtypes.qint16", "qint16").export_constant(__name__, "qint16")
 quint16 = DType(types_pb2.DT_QUINT16)
-tf_export("quint16").export_constant(__name__, "quint16")
+tf_export("dtypes.quint16", "quint16").export_constant(__name__, "quint16")
 qint32 = DType(types_pb2.DT_QINT32)
-tf_export("qint32").export_constant(__name__, "qint32")
+tf_export("dtypes.qint32", "qint32").export_constant(__name__, "qint32")
 resource_ref = DType(types_pb2.DT_RESOURCE_REF)
 variant_ref = DType(types_pb2.DT_VARIANT_REF)
 bfloat16 = DType(types_pb2.DT_BFLOAT16)
-tf_export("bfloat16").export_constant(__name__, "bfloat16")
+tf_export("dtypes.bfloat16", "bfloat16").export_constant(__name__, "bfloat16")
 float16_ref = DType(types_pb2.DT_HALF_REF)
 half_ref = float16_ref
 float32_ref = DType(types_pb2.DT_FLOAT_REF)
@@ -650,7 +652,8 @@ _QUANTIZED_DTYPES_NO_REF = frozenset([qint8, quint8, qint16, quint16, qint32])
 _QUANTIZED_DTYPES_REF = frozenset(
     [qint8_ref, quint8_ref, qint16_ref, quint16_ref, qint32_ref])
 QUANTIZED_DTYPES = _QUANTIZED_DTYPES_REF.union(_QUANTIZED_DTYPES_NO_REF)
-tf_export("QUANTIZED_DTYPES").export_constant(__name__, "QUANTIZED_DTYPES")
+tf_export("dtypes.QUANTIZED_DTYPES", "QUANTIZED_DTYPES").export_constant(
+    __name__, "QUANTIZED_DTYPES")
 
 _PYTHON_TO_TF = {
     float: float32,
diff --git a/tensorflow/python/framework/versions.py b/tensorflow/python/framework/versions.py
index 472ccbcac7..37f2b37b31 100644
--- a/tensorflow/python/framework/versions.py
+++ b/tensorflow/python/framework/versions.py
@@ -29,30 +29,59 @@ __cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__
 __monolithic_build__ = pywrap_tensorflow.__monolithic_build__
 
 VERSION = __version__
-tf_export("VERSION", "__version__").export_constant(__name__, "VERSION")
+tf_export(
+    "version.VERSION",
+    "__version__",
+    v1=["version.VERSION", "VERSION", "__version__"]).export_constant(
+        __name__, "VERSION")
 GIT_VERSION = __git_version__
-tf_export("GIT_VERSION", "__git_version__").export_constant(
-    __name__, "GIT_VERSION")
+tf_export(
+    "version.GIT_VERSION",
+    "__git_version__",
+    v1=["version.GIT_VERSION", "GIT_VERSION",
+        "__git_version__"]).export_constant(__name__, "GIT_VERSION")
 COMPILER_VERSION = __compiler_version__
-tf_export("COMPILER_VERSION", "__compiler_version__").export_constant(
-    __name__, "COMPILER_VERSION")
+tf_export(
+    "version.COMPILER_VERSION",
+    "__compiler_version__",
+    v1=["version.COMPILER_VERSION", "COMPILER_VERSION",
+        "__compiler_version__"]).export_constant(__name__, "COMPILER_VERSION")
+
 CXX11_ABI_FLAG = __cxx11_abi_flag__
-tf_export("CXX11_ABI_FLAG", "__cxx11_abi_flag__").export_constant(
-    __name__, "CXX11_ABI_FLAG")
+tf_export(
+    "sysconfig.CXX11_ABI_FLAG",
+    "__cxx11_abi_flag__",
+    v1=["sysconfig.CXX11_ABI_FLAG", "CXX11_ABI_FLAG",
+        "__cxx11_abi_flag__"]).export_constant(__name__, "CXX11_ABI_FLAG")
 MONOLITHIC_BUILD = __monolithic_build__
-tf_export("MONOLITHIC_BUILD", "__monolithic_build__").export_constant(
-    __name__, "MONOLITHIC_BUILD")
+tf_export(
+    "sysconfig.MONOLITHIC_BUILD",
+    "__monolithic_build__",
+    v1=[
+        "sysconfig.MONOLITHIC_BUILD", "MONOLITHIC_BUILD", "__monolithic_build__"
+    ]).export_constant(__name__, "MONOLITHIC_BUILD")
 
 GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION
-tf_export("GRAPH_DEF_VERSION").export_constant(__name__, "GRAPH_DEF_VERSION")
+tf_export(
+    "version.GRAPH_DEF_VERSION",
+    v1=["version.GRAPH_DEF_VERSION", "GRAPH_DEF_VERSION"]).export_constant(
+        __name__, "GRAPH_DEF_VERSION")
 GRAPH_DEF_VERSION_MIN_CONSUMER = (
     pywrap_tensorflow.GRAPH_DEF_VERSION_MIN_CONSUMER)
-tf_export("GRAPH_DEF_VERSION_MIN_CONSUMER").export_constant(
-    __name__, "GRAPH_DEF_VERSION_MIN_CONSUMER")
+tf_export(
+    "version.GRAPH_DEF_VERSION_MIN_CONSUMER",
+    v1=[
+        "version.GRAPH_DEF_VERSION_MIN_CONSUMER",
+        "GRAPH_DEF_VERSION_MIN_CONSUMER"
+    ]).export_constant(__name__, "GRAPH_DEF_VERSION_MIN_CONSUMER")
 GRAPH_DEF_VERSION_MIN_PRODUCER = (
     pywrap_tensorflow.GRAPH_DEF_VERSION_MIN_PRODUCER)
-tf_export("GRAPH_DEF_VERSION_MIN_PRODUCER").export_constant(
-    __name__, "GRAPH_DEF_VERSION_MIN_PRODUCER")
+tf_export(
+    "version.GRAPH_DEF_VERSION_MIN_PRODUCER",
+    v1=[
+        "version.GRAPH_DEF_VERSION_MIN_PRODUCER",
+        "GRAPH_DEF_VERSION_MIN_PRODUCER"
+    ]).export_constant(__name__, "GRAPH_DEF_VERSION_MIN_PRODUCER")
 
 __all__ = [
     "__version__",
diff --git a/tensorflow/python/saved_model/constants.py b/tensorflow/python/saved_model/constants.py
index cb251f08bb..0addbdc968 100644
--- a/tensorflow/python/saved_model/constants.py
+++ b/tensorflow/python/saved_model/constants.py
@@ -23,23 +23,36 @@ from tensorflow.python.util.tf_export import tf_export
 
 # Subdirectory name containing the asset files.
 ASSETS_DIRECTORY = "assets"
-tf_export("saved_model.constants.ASSETS_DIRECTORY").export_constant(
-    __name__, "ASSETS_DIRECTORY")
+tf_export(
+    "saved_model.ASSETS_DIRECTORY",
+    v1=[
+        "saved_model.ASSETS_DIRECTORY", "saved_model.constants.ASSETS_DIRECTORY"
+    ]).export_constant(__name__, "ASSETS_DIRECTORY")
 
 # CollectionDef key containing SavedModel assets.
 ASSETS_KEY = "saved_model_assets"
-tf_export("saved_model.constants.ASSETS_KEY").export_constant(
-    __name__, "ASSETS_KEY")
+tf_export(
+    "saved_model.ASSETS_KEY",
+    v1=["saved_model.ASSETS_KEY",
+        "saved_model.constants.ASSETS_KEY"]).export_constant(
+            __name__, "ASSETS_KEY")
 
 # CollectionDef key for the legacy init op.
 LEGACY_INIT_OP_KEY = "legacy_init_op"
-tf_export("saved_model.constants.LEGACY_INIT_OP_KEY").export_constant(
-    __name__, "LEGACY_INIT_OP_KEY")
+tf_export(
+    "saved_model.LEGACY_INIT_OP_KEY",
+    v1=[
+        "saved_model.LEGACY_INIT_OP_KEY",
+        "saved_model.constants.LEGACY_INIT_OP_KEY"
+    ]).export_constant(__name__, "LEGACY_INIT_OP_KEY")
 
 # CollectionDef key for the SavedModel main op.
 MAIN_OP_KEY = "saved_model_main_op"
-tf_export("saved_model.constants.MAIN_OP_KEY").export_constant(
-    __name__, "MAIN_OP_KEY")
+tf_export(
+    "saved_model.MAIN_OP_KEY",
+    v1=["saved_model.MAIN_OP_KEY",
+        "saved_model.constants.MAIN_OP_KEY"]).export_constant(
+            __name__, "MAIN_OP_KEY")
 
 # CollectionDef key for the SavedModel train op.
 # Not exported while export_all_saved_models is in contrib.
@@ -47,18 +60,30 @@ TRAIN_OP_KEY = "saved_model_train_op"
 
 # Schema version for SavedModel.
 SAVED_MODEL_SCHEMA_VERSION = 1
-tf_export("saved_model.constants.SAVED_MODEL_SCHEMA_VERSION").export_constant(
-    __name__, "SAVED_MODEL_SCHEMA_VERSION")
+tf_export(
+    "saved_model.SAVED_MODEL_SCHEMA_VERSION",
+    v1=[
+        "saved_model.SAVED_MODEL_SCHEMA_VERSION",
+        "saved_model.constants.SAVED_MODEL_SCHEMA_VERSION"
+    ]).export_constant(__name__, "SAVED_MODEL_SCHEMA_VERSION")
 
 # File name for SavedModel protocol buffer.
 SAVED_MODEL_FILENAME_PB = "saved_model.pb"
-tf_export("saved_model.constants.SAVED_MODEL_FILENAME_PB").export_constant(
-    __name__, "SAVED_MODEL_FILENAME_PB")
+tf_export(
+    "saved_model.SAVED_MODEL_FILENAME_PB",
+    v1=[
+        "saved_model.SAVED_MODEL_FILENAME_PB",
+        "saved_model.constants.SAVED_MODEL_FILENAME_PB"
+    ]).export_constant(__name__, "SAVED_MODEL_FILENAME_PB")
 
 # File name for text version of SavedModel protocol buffer.
 SAVED_MODEL_FILENAME_PBTXT = "saved_model.pbtxt"
-tf_export("saved_model.constants.SAVED_MODEL_FILENAME_PBTXT").export_constant(
-    __name__, "SAVED_MODEL_FILENAME_PBTXT")
+tf_export(
+    "saved_model.SAVED_MODEL_FILENAME_PBTXT",
+    v1=[
+        "saved_model.SAVED_MODEL_FILENAME_PBTXT",
+        "saved_model.constants.SAVED_MODEL_FILENAME_PBTXT"
+    ]).export_constant(__name__, "SAVED_MODEL_FILENAME_PBTXT")
 
 # File name for json format of SavedModel.
 # Not exported while keras_saved_model is in contrib.
@@ -66,10 +91,18 @@ SAVED_MODEL_FILENAME_JSON = "saved_model.json"
 
 # Subdirectory name containing the variables/checkpoint files.
 VARIABLES_DIRECTORY = "variables"
-tf_export("saved_model.constants.VARIABLES_DIRECTORY").export_constant(
-    __name__, "VARIABLES_DIRECTORY")
+tf_export(
+    "saved_model.VARIABLES_DIRECTORY",
+    v1=[
+        "saved_model.VARIABLES_DIRECTORY",
+        "saved_model.constants.VARIABLES_DIRECTORY"
+    ]).export_constant(__name__, "VARIABLES_DIRECTORY")
 
 # File name used for variables.
 VARIABLES_FILENAME = "variables"
-tf_export("saved_model.constants.VARIABLES_FILENAME").export_constant(
-    __name__, "VARIABLES_FILENAME")
+tf_export(
+    "saved_model.VARIABLES_FILENAME",
+    v1=[
+        "saved_model.VARIABLES_FILENAME",
+        "saved_model.constants.VARIABLES_FILENAME"
+    ]).export_constant(__name__, "VARIABLES_FILENAME")
diff --git a/tensorflow/python/saved_model/signature_constants.py b/tensorflow/python/saved_model/signature_constants.py
index 99007a9634..96460717ec 100644
--- a/tensorflow/python/saved_model/signature_constants.py
+++ b/tensorflow/python/saved_model/signature_constants.py
@@ -26,72 +26,112 @@ from tensorflow.python.util.tf_export import tf_export
 # signature is used in inference requests where a specific signature was not
 # specified.
 DEFAULT_SERVING_SIGNATURE_DEF_KEY = "serving_default"
-tf_export("saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY"
-         ).export_constant(__name__, "DEFAULT_SERVING_SIGNATURE_DEF_KEY")
+tf_export(
+    "saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY",
+    v1=[
+        "saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY",
+        "saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY"
+    ],
+).export_constant(__name__, "DEFAULT_SERVING_SIGNATURE_DEF_KEY")
 
 ################################################################################
 # Classification API constants.
 
 # Classification inputs.
 CLASSIFY_INPUTS = "inputs"
-tf_export("saved_model.signature_constants.CLASSIFY_INPUTS").export_constant(
-    __name__, "CLASSIFY_INPUTS")
+tf_export(
+    "saved_model.CLASSIFY_INPUTS",
+    v1=[
+        "saved_model.CLASSIFY_INPUTS",
+        "saved_model.signature_constants.CLASSIFY_INPUTS"
+    ]).export_constant(__name__, "CLASSIFY_INPUTS")
 
 # Classification method name used in a SignatureDef.
 CLASSIFY_METHOD_NAME = "tensorflow/serving/classify"
 tf_export(
-    "saved_model.signature_constants.CLASSIFY_METHOD_NAME").export_constant(
-        __name__, "CLASSIFY_METHOD_NAME")
+    "saved_model.CLASSIFY_METHOD_NAME",
+    v1=[
+        "saved_model.CLASSIFY_METHOD_NAME",
+        "saved_model.signature_constants.CLASSIFY_METHOD_NAME"
+    ]).export_constant(__name__, "CLASSIFY_METHOD_NAME")
 
 # Classification classes output.
 CLASSIFY_OUTPUT_CLASSES = "classes"
 tf_export(
-    "saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES").export_constant(
-        __name__, "CLASSIFY_OUTPUT_CLASSES")
+    "saved_model.CLASSIFY_OUTPUT_CLASSES",
+    v1=[
+        "saved_model.CLASSIFY_OUTPUT_CLASSES",
+        "saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES"
+    ]).export_constant(__name__, "CLASSIFY_OUTPUT_CLASSES")
 
 # Classification scores output.
 CLASSIFY_OUTPUT_SCORES = "scores"
 tf_export(
-    "saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES").export_constant(
-        __name__, "CLASSIFY_OUTPUT_SCORES")
+    "saved_model.CLASSIFY_OUTPUT_SCORES",
+    v1=[
+        "saved_model.CLASSIFY_OUTPUT_SCORES",
+        "saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES"
+    ]).export_constant(__name__, "CLASSIFY_OUTPUT_SCORES")
 
 ################################################################################
 # Prediction API constants.
 
 # Predict inputs.
 PREDICT_INPUTS = "inputs"
-tf_export("saved_model.signature_constants.PREDICT_INPUTS").export_constant(
-    __name__, "PREDICT_INPUTS")
+tf_export(
+    "saved_model.PREDICT_INPUTS",
+    v1=[
+        "saved_model.PREDICT_INPUTS",
+        "saved_model.signature_constants.PREDICT_INPUTS"
+    ]).export_constant(__name__, "PREDICT_INPUTS")
 
 # Prediction method name used in a SignatureDef.
 PREDICT_METHOD_NAME = "tensorflow/serving/predict"
 tf_export(
-    "saved_model.signature_constants.PREDICT_METHOD_NAME").export_constant(
-        __name__, "PREDICT_METHOD_NAME")
+    "saved_model.PREDICT_METHOD_NAME",
+    v1=[
+        "saved_model.PREDICT_METHOD_NAME",
+        "saved_model.signature_constants.PREDICT_METHOD_NAME"
+    ]).export_constant(__name__, "PREDICT_METHOD_NAME")
 
 # Predict outputs.
 PREDICT_OUTPUTS = "outputs"
-tf_export("saved_model.signature_constants.PREDICT_OUTPUTS").export_constant(
-    __name__, "PREDICT_OUTPUTS")
+tf_export(
+    "saved_model.PREDICT_OUTPUTS",
+    v1=[
+        "saved_model.PREDICT_OUTPUTS",
+        "saved_model.signature_constants.PREDICT_OUTPUTS"
+    ]).export_constant(__name__, "PREDICT_OUTPUTS")
 
 ################################################################################
 # Regression API constants.
 
 # Regression inputs.
 REGRESS_INPUTS = "inputs"
-tf_export("saved_model.signature_constants.REGRESS_INPUTS").export_constant(
-    __name__, "REGRESS_INPUTS")
+tf_export(
+    "saved_model.REGRESS_INPUTS",
+    v1=[
+        "saved_model.REGRESS_INPUTS",
+        "saved_model.signature_constants.REGRESS_INPUTS"
+    ]).export_constant(__name__, "REGRESS_INPUTS")
 
 # Regression method name used in a SignatureDef.
 REGRESS_METHOD_NAME = "tensorflow/serving/regress"
 tf_export(
-    "saved_model.signature_constants.REGRESS_METHOD_NAME").export_constant(
-        __name__, "REGRESS_METHOD_NAME")
+    "saved_model.REGRESS_METHOD_NAME",
+    v1=[
+        "saved_model.REGRESS_METHOD_NAME",
+        "saved_model.signature_constants.REGRESS_METHOD_NAME"
+    ]).export_constant(__name__, "REGRESS_METHOD_NAME")
 
 # Regression outputs.
 REGRESS_OUTPUTS = "outputs"
-tf_export("saved_model.signature_constants.REGRESS_OUTPUTS").export_constant(
-    __name__, "REGRESS_OUTPUTS")
+tf_export(
+    "saved_model.REGRESS_OUTPUTS",
+    v1=[
+        "saved_model.REGRESS_OUTPUTS",
+        "saved_model.signature_constants.REGRESS_OUTPUTS"
+    ]).export_constant(__name__, "REGRESS_OUTPUTS")
 
 ################################################################################
 # Train/Eval API constants.
diff --git a/tensorflow/python/saved_model/tag_constants.py b/tensorflow/python/saved_model/tag_constants.py
index c82154e7b9..8c84c9fbe4 100644
--- a/tensorflow/python/saved_model/tag_constants.py
+++ b/tensorflow/python/saved_model/tag_constants.py
@@ -24,23 +24,33 @@ from tensorflow.python.util.tf_export import tf_export
 
 # Tag for the `serving` graph.
 SERVING = "serve"
-tf_export("saved_model.tag_constants.SERVING").export_constant(
-    __name__, "SERVING")
+tf_export(
+    "saved_model.SERVING",
+    v1=["saved_model.SERVING",
+        "saved_model.tag_constants.SERVING"]).export_constant(
+            __name__, "SERVING")
 
 # Tag for the `training` graph.
 TRAINING = "train"
-tf_export("saved_model.tag_constants.TRAINING").export_constant(
-    __name__, "TRAINING")
+tf_export(
+    "saved_model.TRANING",
+    v1=["saved_model.TRAINING",
+        "saved_model.tag_constants.TRAINING"]).export_constant(
+            __name__, "TRAINING")
 
 # Tag for the `eval` graph. Not exported while the export logic is in contrib.
 EVAL = "eval"
 
 # Tag for the `gpu` graph.
 GPU = "gpu"
-tf_export("saved_model.tag_constants.GPU").export_constant(__name__, "GPU")
+tf_export(
+    "saved_model.GPU", v1=["saved_model.GPU",
+                           "saved_model.tag_constants.GPU"]).export_constant(
+                               __name__, "GPU")
 
 # Tag for the `tpu` graph.
 TPU = "tpu"
-tf_export("saved_model.tag_constants.TPU").export_constant(__name__, "TPU")
-
-
+tf_export(
+    "saved_model.TPU", v1=["saved_model.TPU",
+                           "saved_model.tag_constants.TPU"]).export_constant(
+                               __name__, "TPU")
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index dcf33f056b..1f5e6fd2b5 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -73,9 +73,6 @@ TENSORFLOW_API_INIT_FILES = [
     "resource_loader/__init__.py",
     "strings/__init__.py",
     "saved_model/__init__.py",
-    "saved_model/constants/__init__.py",
-    "saved_model/signature_constants/__init__.py",
-    "saved_model/tag_constants/__init__.py",
     "sets/__init__.py",
     "sparse/__init__.py",
     "spectral/__init__.py",
@@ -84,5 +81,6 @@ TENSORFLOW_API_INIT_FILES = [
     "test/__init__.py",
     "train/__init__.py",
     "user_ops/__init__.py",
+    "version/__init__.py",
     # END GENERATED FILES
 ]
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 0747424eab..7a8660fd90 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -91,5 +91,6 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "train/__init__.py",
     "train/queue_runner/__init__.py",
     "user_ops/__init__.py",
+    "version/__init__.py",
     # END GENERATED FILES
 ]
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
index ea23feca84..848fc303aa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
@@ -4,6 +4,106 @@ tf_module {
     name: "DType"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "QUANTIZED_DTYPES"
+    mtype: "<type \'frozenset\'>"
+  }
+  member {
+    name: "bfloat16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "bool"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "complex128"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "complex64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "double"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "half"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "quint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "quint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "resource"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "string"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "variant"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
   member_method {
     name: "as_dtype"
     argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 247dfcc1ca..4502a3919c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -600,6 +600,10 @@ tf_module {
     name: "variant"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "version"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "zeros_initializer"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
index 3f4965fc69..5b28f7b9b1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
@@ -1,9 +1,105 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "ASSETS_DIRECTORY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "ASSETS_KEY"
+    mtype: "<type \'str\'>"
+  }
   member {
     name: "Builder"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "CLASSIFY_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_OUTPUT_CLASSES"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_OUTPUT_SCORES"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "DEFAULT_SERVING_SIGNATURE_DEF_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GPU"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "LEGACY_INIT_OP_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "MAIN_OP_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_OUTPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_OUTPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_FILENAME_PB"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_FILENAME_PBTXT"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_SCHEMA_VERSION"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "SERVING"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "TPU"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "TRAINING"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "VARIABLES_DIRECTORY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "VARIABLES_FILENAME"
+    mtype: "<type \'str\'>"
+  }
   member {
     name: "builder"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sysconfig.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sysconfig.pbtxt
index 2f00aeac25..811ca18cdb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.sysconfig.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sysconfig.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.sysconfig"
 tf_module {
+  member {
+    name: "CXX11_ABI_FLAG"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "MONOLITHIC_BUILD"
+    mtype: "<type \'int\'>"
+  }
   member_method {
     name: "get_compile_flags"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.version.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.version.pbtxt
new file mode 100644
index 0000000000..dd4506cb0b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.version.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.version"
+tf_module {
+  member {
+    name: "COMPILER_VERSION"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GIT_VERSION"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION_MIN_CONSUMER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION_MIN_PRODUCER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "VERSION"
+    mtype: "<type \'str\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
index ea23feca84..848fc303aa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
@@ -4,6 +4,106 @@ tf_module {
     name: "DType"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "QUANTIZED_DTYPES"
+    mtype: "<type \'frozenset\'>"
+  }
+  member {
+    name: "bfloat16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "bool"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "complex128"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "complex64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "double"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "float64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "half"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "int8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "qint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "quint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "quint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "resource"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "string"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint32"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint64"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "uint8"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
+  member {
+    name: "variant"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
   member_method {
     name: "as_dtype"
     argspec: "args=[\'type_value\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 37e8e654b7..3664eef406 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -8,14 +8,6 @@ tf_module {
     name: "AttrValue"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "COMPILER_VERSION"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "CXX11_ABI_FLAG"
-    mtype: "<type \'int\'>"
-  }
   member {
     name: "ConditionalAccumulator"
     mtype: "<type \'type\'>"
@@ -56,26 +48,10 @@ tf_module {
     name: "FixedLenSequenceFeature"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "GIT_VERSION"
-    mtype: "<type \'str\'>"
-  }
   member {
     name: "GPUOptions"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "GRAPH_DEF_VERSION"
-    mtype: "<type \'int\'>"
-  }
-  member {
-    name: "GRAPH_DEF_VERSION_MIN_CONSUMER"
-    mtype: "<type \'int\'>"
-  }
-  member {
-    name: "GRAPH_DEF_VERSION_MIN_PRODUCER"
-    mtype: "<type \'int\'>"
-  }
   member {
     name: "GradientTape"
     mtype: "<type \'type\'>"
@@ -112,10 +88,6 @@ tf_module {
     name: "LogMessage"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "MONOLITHIC_BUILD"
-    mtype: "<type \'int\'>"
-  }
   member {
     name: "MetaGraphDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
@@ -200,10 +172,6 @@ tf_module {
     name: "UnconnectedGradients"
     mtype: "<class \'enum.EnumMeta\'>"
   }
-  member {
-    name: "VERSION"
-    mtype: "<type \'str\'>"
-  }
   member {
     name: "VarLenFeature"
     mtype: "<type \'type\'>"
@@ -512,6 +480,10 @@ tf_module {
     name: "variant"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "version"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "zeros_initializer"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.constants.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.constants.pbtxt
deleted file mode 100644
index 20e10aa094..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.constants.pbtxt
+++ /dev/null
@@ -1,39 +0,0 @@
-path: "tensorflow.saved_model.constants"
-tf_module {
-  member {
-    name: "ASSETS_DIRECTORY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "ASSETS_KEY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "LEGACY_INIT_OP_KEY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "MAIN_OP_KEY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "SAVED_MODEL_FILENAME_PB"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "SAVED_MODEL_FILENAME_PBTXT"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "SAVED_MODEL_SCHEMA_VERSION"
-    mtype: "<type \'int\'>"
-  }
-  member {
-    name: "VARIABLES_DIRECTORY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "VARIABLES_FILENAME"
-    mtype: "<type \'str\'>"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
index a95ab4a3bc..dc26a67fa0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt
@@ -1,20 +1,104 @@
 path: "tensorflow.saved_model"
 tf_module {
+  member {
+    name: "ASSETS_DIRECTORY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "ASSETS_KEY"
+    mtype: "<type \'str\'>"
+  }
   member {
     name: "Builder"
     mtype: "<type \'type\'>"
   }
   member {
-    name: "constants"
-    mtype: "<type \'module\'>"
+    name: "CLASSIFY_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_OUTPUT_CLASSES"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "CLASSIFY_OUTPUT_SCORES"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "DEFAULT_SERVING_SIGNATURE_DEF_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GPU"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "LEGACY_INIT_OP_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "MAIN_OP_KEY"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "PREDICT_OUTPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_INPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_METHOD_NAME"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "REGRESS_OUTPUTS"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_FILENAME_PB"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_FILENAME_PBTXT"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "SAVED_MODEL_SCHEMA_VERSION"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "SERVING"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "TPU"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "TRANING"
+    mtype: "<type \'str\'>"
   }
   member {
-    name: "signature_constants"
-    mtype: "<type \'module\'>"
+    name: "VARIABLES_DIRECTORY"
+    mtype: "<type \'str\'>"
   }
   member {
-    name: "tag_constants"
-    mtype: "<type \'module\'>"
+    name: "VARIABLES_FILENAME"
+    mtype: "<type \'str\'>"
   }
   member_method {
     name: "build_signature_def"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_constants.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_constants.pbtxt
deleted file mode 100644
index 478d410e06..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.signature_constants.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-path: "tensorflow.saved_model.signature_constants"
-tf_module {
-  member {
-    name: "CLASSIFY_INPUTS"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "CLASSIFY_METHOD_NAME"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "CLASSIFY_OUTPUT_CLASSES"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "CLASSIFY_OUTPUT_SCORES"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "DEFAULT_SERVING_SIGNATURE_DEF_KEY"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "PREDICT_INPUTS"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "PREDICT_METHOD_NAME"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "PREDICT_OUTPUTS"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "REGRESS_INPUTS"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "REGRESS_METHOD_NAME"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "REGRESS_OUTPUTS"
-    mtype: "<type \'str\'>"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.tag_constants.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.tag_constants.pbtxt
deleted file mode 100644
index 6af72498d7..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.tag_constants.pbtxt
+++ /dev/null
@@ -1,19 +0,0 @@
-path: "tensorflow.saved_model.tag_constants"
-tf_module {
-  member {
-    name: "GPU"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "SERVING"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "TPU"
-    mtype: "<type \'str\'>"
-  }
-  member {
-    name: "TRAINING"
-    mtype: "<type \'str\'>"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sysconfig.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sysconfig.pbtxt
index 2f00aeac25..811ca18cdb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.sysconfig.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sysconfig.pbtxt
@@ -1,5 +1,13 @@
 path: "tensorflow.sysconfig"
 tf_module {
+  member {
+    name: "CXX11_ABI_FLAG"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "MONOLITHIC_BUILD"
+    mtype: "<type \'int\'>"
+  }
   member_method {
     name: "get_compile_flags"
     argspec: "args=[], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.version.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.version.pbtxt
new file mode 100644
index 0000000000..dd4506cb0b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.version.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.version"
+tf_module {
+  member {
+    name: "COMPILER_VERSION"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GIT_VERSION"
+    mtype: "<type \'str\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION_MIN_CONSUMER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "GRAPH_DEF_VERSION_MIN_PRODUCER"
+    mtype: "<type \'int\'>"
+  }
+  member {
+    name: "VERSION"
+    mtype: "<type \'str\'>"
+  }
+}
-- 
GitLab


From adcaf40165e7c0871aafa1c9b36236446c699b7d Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 15 Oct 2018 18:17:23 -0700
Subject: [PATCH 1013/1085] [XLA:GPU] Preserve metadata when creating and
 transforming cudnn convs.

PiperOrigin-RevId: 217243833
---
 .../xla/service/gpu/cudnn_conv_rewriter.cc    | 18 +++++++-----
 .../service/gpu/cudnn_conv_rewriter_test.cc   | 12 ++++++--
 .../service/gpu/cudnn_fused_conv_rewriter.cc  |  1 +
 .../compiler/xla/service/gpu/tests/BUILD      |  1 +
 .../tests/cudnn_fused_conv_rewriter_test.cc   | 29 +++++++++++++++++++
 5 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
index 5cea66de38..01de110aa9 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc
@@ -40,7 +40,8 @@ HloInstruction* CreateCudnnConv(const char* call_target, const Shape& shape,
                                 HloInstruction* lhs, HloInstruction* rhs,
                                 const Window& window,
                                 const ConvolutionDimensionNumbers& dnums,
-                                int64 feature_group_count) {
+                                int64 feature_group_count,
+                                const OpMetadata& metadata) {
   HloComputation* computation = lhs->parent();
 
   // This call returns a tuple of (conv_result, scratch_memory), where
@@ -59,6 +60,7 @@ HloInstruction* CreateCudnnConv(const char* call_target, const Shape& shape,
   custom_call->set_window(window);
   custom_call->set_convolution_dimension_numbers(dnums);
   custom_call->set_feature_group_count(feature_group_count);
+  custom_call->set_metadata(metadata);
   return custom_call;
 }
 
@@ -499,22 +501,24 @@ StatusOr<bool> RunOnInstruction(HloInstruction* conv) {
     if (match) {
       return CreateCudnnConv(kCudnnConvBackwardFilterCallTarget, conv->shape(),
                              conv->mutable_operand(0), conv->mutable_operand(1),
-                             window, dnums, conv->feature_group_count());
+                             window, dnums, conv->feature_group_count(),
+                             conv->metadata());
     }
 
     std::tie(match, window, dnums, rhs) = MatchBackwardInput(conv);
     if (match) {
       return CreateCudnnConv(kCudnnConvBackwardInputCallTarget, conv->shape(),
                              conv->mutable_operand(0), rhs, window, dnums,
-                             conv->feature_group_count());
+                             conv->feature_group_count(), conv->metadata());
     }
 
     // If all else fails, try a forward convolution.
     if (CanImplementAsCudnnForwardConv(conv)) {
-      return CreateCudnnConv(
-          kCudnnConvForwardCallTarget, conv->shape(), conv->mutable_operand(0),
-          conv->mutable_operand(1), conv->window(),
-          conv->convolution_dimension_numbers(), conv->feature_group_count());
+      return CreateCudnnConv(kCudnnConvForwardCallTarget, conv->shape(),
+                             conv->mutable_operand(0), conv->mutable_operand(1),
+                             conv->window(),
+                             conv->convolution_dimension_numbers(),
+                             conv->feature_group_count(), conv->metadata());
     }
 
     return nullptr;
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
index 543160df8b..e7f572d01b 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
@@ -106,7 +106,7 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) {
   Window conv_window = default_conv_window_;
   conv_window.mutable_dimensions(1)->set_size(2);
   conv_window.mutable_dimensions(1)->set_window_dilation(2);
-  builder.AddInstruction(HloInstruction::CreateConvolve(
+  auto* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(
           activations->shape(), gradients->shape(), /*feature_group_count=*/1,
           conv_window, tf_default_dnums_for_backward_filter_)
@@ -114,13 +114,21 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) {
       activations, gradients, /*feature_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
+  OpMetadata metadata;
+  metadata.set_op_name("foo");
+  conv->set_metadata(metadata);
+
   auto module = CreateNewModule();
   HloComputation* entry_computation =
       module->AddEntryComputation(builder.Build());
   EXPECT_TRUE(RunPass(module));
-  EXPECT_THAT(entry_computation->root_instruction(),
+  ASSERT_THAT(entry_computation->root_instruction(),
               op::GetTupleElement(
                   op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0));
+
+  // Check that metadata was preserved.
+  EXPECT_THAT(entry_computation->root_instruction()->operand(0)->metadata(),
+              ::testing::EqualsProto(metadata));
 }
 
 TEST_F(CudnnConvRewriterTest,
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
index 8ac11bcf65..cde65ad574 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc
@@ -226,6 +226,7 @@ StatusOr<std::unique_ptr<HloInstruction>> TryRewriteToCudnnForwardRelu(
   new_conv->set_window(conv->window());
   new_conv->set_convolution_dimension_numbers(
       conv->convolution_dimension_numbers());
+  new_conv->set_metadata(conv->metadata());
   TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig config,
                       conv->backend_config<CudnnConvBackendConfig>());
   config.set_activation_mode(
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index d22ffc1754..32eebad3b1 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -217,6 +217,7 @@ tf_cc_test(
     deps = [
         ":gpu_codegen_test",
         "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
index 8bdb4c8080..5e7eef2101 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
@@ -277,6 +277,35 @@ TEST_F(CudnnFusedConvRewriterTest, TestMatchBroadcastedBiasOnly) {
     })");
 }
 
+TEST_F(CudnnFusedConvRewriterTest, PreservesMetadata) {
+  const char* kHloString = R"(
+    HloModule Test
+
+    ENTRY Test {
+      zero = f32[] constant(0)
+      zeros = f32[1,32,9,9] broadcast(zero), dimensions={}
+
+      input = f32[1,17,9,9] parameter(0)
+      filter = f32[3,3,17,32] parameter(1)
+
+      conv = f32[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1, metadata={op_type="foo"}
+      ROOT relu = f32[1,32,9,9] maximum(zeros, conv)
+    })";
+
+  const string optimized_hlo_string =
+      backend()
+          .compiler()
+          ->RunHloPasses(ParseHloString(kHloString, GetModuleConfigForTest())
+                             .ConsumeValueOrDie(),
+                         backend().default_stream_executor(),
+                         backend().memory_allocator())
+          .ConsumeValueOrDie()
+          ->ToString();
+  EXPECT_THAT(
+      optimized_hlo_string,
+      ::testing::ContainsRegex(R"(custom-call.*metadata={op_type="foo"})"));
+}
+
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
-- 
GitLab


From 0b292bc8e0c45c8c5df6b8053ef258fc0b523d4c Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 15 Oct 2018 18:42:50 -0700
Subject: [PATCH 1014/1085] [XLA:GPU] Tweak cudnn_fused_conv_rewriter_test.

- Move it into the same directory as the implementation (to match idiom of most
  of the rest of the code).
- Use gmock matchers for substr.
- Use constants for cudnn custom calls rather than the raw strings (in case we
  ever have to change them).

No functional change.

PiperOrigin-RevId: 217246977
---
 .../tests/cudnn_fused_conv_rewriter_test.cc   | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
index 5e7eef2101..12146068ed 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "absl/strings/str_replace.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/core/platform/test.h"
@@ -22,6 +23,9 @@ namespace xla {
 namespace gpu {
 namespace {
 
+using ::testing::HasSubstr;
+using ::testing::Not;
+
 class CudnnFusedConvRewriterTest : public HloTestBase {
  protected:
   string GetOptimizedHlo(absl::string_view hlo_string) {
@@ -39,13 +43,11 @@ class CudnnFusedConvRewriterTest : public HloTestBase {
     for (absl::string_view type : {"f16", "f32", "f64"}) {
       const string hlo_with_new_type =
           absl::StrReplaceAll(hlo_string, {{"TYPE", type}});
-      const string optimized_hlo_string = GetOptimizedHlo(hlo_with_new_type);
-      EXPECT_EQ(absl::string_view::npos,
-                optimized_hlo_string.find("__cudnn$convForward"))
-          << optimized_hlo_string;
-      EXPECT_NE(absl::string_view::npos,
-                optimized_hlo_string.find("__cudnn$convBiasActivationForward"))
-          << optimized_hlo_string;
+      string optimized_hlo_string = GetOptimizedHlo(hlo_with_new_type);
+      EXPECT_THAT(optimized_hlo_string,
+                  Not(HasSubstr(kCudnnConvForwardCallTarget)));
+      EXPECT_THAT(optimized_hlo_string,
+                  HasSubstr(kCudnnConvBiasActivationForwardCallTarget));
       EXPECT_TRUE(RunAndCompare(hlo_with_new_type, ErrorSpec{0.01}))
           << optimized_hlo_string;
     }
@@ -55,13 +57,10 @@ class CudnnFusedConvRewriterTest : public HloTestBase {
     for (absl::string_view type : {"f16", "f32", "f64"}) {
       const string hlo_with_new_type =
           absl::StrReplaceAll(hlo_string, {{"TYPE", type}});
-      string optimized_hlo = GetOptimizedHlo(hlo_with_new_type);
-      EXPECT_NE(absl::string_view::npos,
-                optimized_hlo.find("__cudnn$convForward"))
-          << optimized_hlo;
-      EXPECT_EQ(absl::string_view::npos,
-                optimized_hlo.find("__cudnn$convBiasActivationForward"))
-          << optimized_hlo;
+      string optimized_hlo_string = GetOptimizedHlo(hlo_with_new_type);
+      EXPECT_THAT(optimized_hlo_string, HasSubstr(kCudnnConvForwardCallTarget));
+      EXPECT_THAT(optimized_hlo_string,
+                  Not(HasSubstr(kCudnnConvBiasActivationForwardCallTarget)));
     }
   }
 };
-- 
GitLab


From 6c7f5f9f0175761af9c31fd94071fcfa0d0ede93 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 19:38:03 -0700
Subject: [PATCH 1015/1085] Test sequence categorical column with a zero
 dynamic sequence length.

PiperOrigin-RevId: 217251775
---
 .../sequence_feature_column_test.py           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
index 707f93b2da..2163af0b43 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
@@ -792,6 +793,33 @@ class SequenceCategoricalColumnWithVocabularyFileTest(
       _assert_sparse_tensor_value(
           self, expected, id_weight_pair.id_tensor.eval(session=sess))
 
+  def test_get_sparse_tensors_dynamic_zero_length(self):
+    """Tests _get_sparse_tensors with a dynamic sequence length."""
+    inputs = sparse_tensor.SparseTensorValue(
+        indices=np.zeros((0, 2)), values=[], dense_shape=(2, 0))
+    expected = sparse_tensor.SparseTensorValue(
+        indices=np.zeros((0, 3)),
+        values=np.array((), dtype=np.int64),
+        dense_shape=(2, 0, 1))
+    column = sfc.sequence_categorical_column_with_vocabulary_file(
+        key='aaa',
+        vocabulary_file=self._wire_vocabulary_file_name,
+        vocabulary_size=self._wire_vocabulary_size)
+    input_placeholder_shape = list(inputs.dense_shape)
+    # Make second dimension (sequence length) dynamic.
+    input_placeholder_shape[1] = None
+    input_placeholder = array_ops.sparse_placeholder(
+        dtypes.string, shape=input_placeholder_shape)
+    id_weight_pair = column._get_sparse_tensors(
+        _LazyBuilder({'aaa': input_placeholder}))
+
+    self.assertIsNone(id_weight_pair.weight_tensor)
+    with monitored_session.MonitoredSession() as sess:
+      result = id_weight_pair.id_tensor.eval(
+          session=sess, feed_dict={input_placeholder: inputs})
+      _assert_sparse_tensor_value(
+          self, expected, result)
+
 
 class SequenceCategoricalColumnWithVocabularyListTest(
     test.TestCase, parameterized.TestCase):
-- 
GitLab


From 23df2724a7f5ed2d58b5090de4d525db39838da2 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 15 Oct 2018 19:41:38 -0700
Subject: [PATCH 1016/1085] Consolidate the handling of special builtin
 functions that are not recognized as such by the Python reflection APIs.

PiperOrigin-RevId: 217252026
---
 .../python/autograph/converters/call_trees.py | 19 +++++-----------
 .../python/autograph/pyct/inspect_utils.py    | 22 ++++++++++++++-----
 .../pyct/static_analysis/live_values.py       | 16 ++++----------
 3 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index ca6945266e..0170173e61 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -308,7 +308,12 @@ class CallTreeTransformer(converter.Base):
         target_fqn = anno.getanno(node.func, 'fqn')
       else:
         target_fqn = None
-      if self._function_is_compilable(target_entity):
+
+      if inspect_utils.isbuiltin(target_entity):
+        # Note: Any builtin that passed the builtins converter is assumed to be
+        # safe for graph mode.
+        return node
+      elif self._function_is_compilable(target_entity):
         node = self._rename_compilable_function(node)
       elif target_fqn and target_fqn in KNOWN_NUMPY_FUNCTIONS:
         # TODO(mdan): Should we replace these with equivalent TF ops instead?
@@ -318,18 +323,6 @@ class CallTreeTransformer(converter.Base):
         raise NotImplementedError(
             'py_func with return values (unknown function)')
     else:
-      if anno.hasanno(node.func, anno.Basic.QN):
-        # Special-case a few builtins that otherwise go undetected. This
-        # normally doesn't pose a problem, but the dict built-in doesn't
-        # work with inspect.getargspec which is required for dynamic functions.
-        # Note: expecting this is resilient to aliasing (e.g.
-        # dict = an_evil_dict), because in those cases the regular mechanisms
-        # process a simple user function.
-        qn = anno.getanno(node.func, anno.Basic.QN)
-        # Add items to this list as needed.
-        if str(qn) in ('dict',):
-          return node
-
       if ast_util.matches(node, 'super(_)'):
         # super() calls are preserved. The class conversion mechanism will
         # ensure that they return the correct value.
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index a09d481003..6d5cced0ac 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -29,15 +29,25 @@ import six
 from tensorflow.python.util import tf_inspect
 
 
+# These functions test negative for isinstance(*, types.BuiltinFunctionType)
+# and inspect.isbuiltin, and are generally not visible in globals().
+SPECIAL_BUILTINS = {
+    'dict': dict,
+    'float': float,
+    'int': int,
+    'print': print,
+    'range': range,
+    'tuple': tuple
+}
+
+if six.PY2:
+  SPECIAL_BUILTINS['xrange'] = xrange
+
+
 def isbuiltin(f):
   """Returns True if the argument is a built-in function."""
-  # Note these return false for isinstance(f, types.BuiltinFunctionType) so we
-  # need to specifically check for them.
-  if f in (range, int, float):
+  if f in SPECIAL_BUILTINS.values():
     return True
-  if six.PY2:
-    if f in (xrange,):
-      return True
   if isinstance(f, types.BuiltinFunctionType):
     return True
   if tf_inspect.isbuiltin(f):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index dc363f9a47..e8e3d229be 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -24,21 +24,12 @@ from __future__ import division
 from __future__ import print_function
 
 import gast
-import six
 
 from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import transformer
 
 
-# TODO(aqj): Do we need this? Do other builtins fail in similar ways
-# See b/114389775 for a related bug in pyct
-# These symbols are legal in Python, but don't appear in the namespace.
-_SPECIAL_SYMBOLS = {'range': range, 'print': print}
-
-if six.PY2:
-  _SPECIAL_SYMBOLS['xrange'] = xrange
-
-
 class LiveValueResolver(transformer.Base):
   """Annotates nodes with live values."""
 
@@ -75,10 +66,11 @@ class LiveValueResolver(transformer.Base):
             # If the symbol value is for example a primitive, then it will not
             # have a name.
             pass
-        elif node.id in _SPECIAL_SYMBOLS:
+        elif node.id in inspect_utils.SPECIAL_BUILTINS:
           # Note: if the user redefined any of these symbols, then they would
           # be visible in the namespace and we would never reach this branch.
-          anno.setanno(node, 'live_val', _SPECIAL_SYMBOLS[node.id])
+          anno.setanno(
+              node, 'live_val', inspect_utils.SPECIAL_BUILTINS[node.id])
         else:
           pass
           # TODO(mdan): Should we raise an error here?
-- 
GitLab


From b75a555c3c930d8e15b3c7928f30f1941a48f43f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 20:29:39 -0700
Subject: [PATCH 1017/1085] [tfgan] Exposed warm start functionality to
 GANEstimator.

PiperOrigin-RevId: 217255637
---
 .../estimator/python/gan_estimator_impl.py    |  8 ++-
 .../estimator/python/gan_estimator_test.py    | 68 +++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 7243f150ce..219cc199d7 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -112,7 +112,8 @@ class GANEstimator(estimator.Estimator):
                get_eval_metric_ops_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
-               config=None):
+               config=None,
+               warm_start_from=None):
     """Initializes a GANEstimator instance.
 
     Args:
@@ -151,6 +152,8 @@ class GANEstimator(estimator.Estimator):
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A filepath to a checkpoint or saved model, or a
+        WarmStartSettings object to configure initialization.
 
     Raises:
       ValueError: If loss functions aren't callable.
@@ -187,7 +190,8 @@ class GANEstimator(estimator.Estimator):
           get_hooks_fn, use_loss_summaries)
 
     super(GANEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
 
 
 def _get_gan_model(
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index 83f8dd641f..4529dc07ef 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -33,9 +33,11 @@ from tensorflow.contrib.learn.python.learn.learn_io import graph_io
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.estimator import WarmStartSettings
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework.errors_impl import NotFoundError
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
@@ -317,5 +319,71 @@ class GANEstimatorIntegrationTest(test.TestCase):
         prediction_size=[batch_size, input_dim])
 
 
+class GANEstimatorWarmStartTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = self.create_tempdir().full_path
+    self.new_variable_name = 'new_var'
+    self.new_variable_value = [1, 2, 3]
+
+  def tearDown(self):
+    writer_cache.FileWriterCache.clear()
+
+  def _test_warm_start(self, warm_start_from=None):
+    """Tests whether WarmStartSettings work as intended."""
+    def generator_with_new_variable(noise_dict, mode):
+      variable_scope.get_variable(name=self.new_variable_name,
+                                  initializer=self.new_variable_value,
+                                  trainable=True)
+      return generator_fn(noise_dict, mode)
+
+    def train_input_fn():
+      data = np.zeros([3, 4])
+      return {'x': data}, data
+
+    est = estimator.GANEstimator(
+        generator_fn=generator_fn,
+        discriminator_fn=discriminator_fn,
+        generator_loss_fn=losses.wasserstein_generator_loss,
+        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
+        generator_optimizer=training.GradientDescentOptimizer(1.0),
+        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
+        model_dir=self._model_dir)
+
+    est.train(train_input_fn, steps=1)
+
+    est_warm = estimator.GANEstimator(
+        generator_fn=generator_with_new_variable,
+        discriminator_fn=discriminator_fn,
+        generator_loss_fn=losses.wasserstein_generator_loss,
+        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
+        generator_optimizer=training.GradientDescentOptimizer(1.0),
+        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
+        model_dir=None if warm_start_from else self._model_dir,
+        warm_start_from=warm_start_from)
+
+    est_warm.train(train_input_fn, steps=1)
+
+    return est_warm
+
+  def test_warm_start_error(self):
+    """Test if exception when reloading different estimators."""
+    with self.assertRaises(NotFoundError):
+      self._test_warm_start()
+
+  def test_warm_start_success(self):
+    """Test if GANEstimator allows explicit warm start variable assignment."""
+    # Regex matches all variable names in ckpt except for new_var.
+    var_regex = '^(?!.*%s.*)' % self.new_variable_name
+    warmstart = WarmStartSettings(ckpt_to_initialize_from=self._model_dir,
+                                  vars_to_warm_start=var_regex)
+    est_warm = self._test_warm_start(warm_start_from=warmstart)
+    full_variable_name = 'Generator/%s' % self.new_variable_name
+    self.assertIn(full_variable_name, est_warm.get_variable_names())
+    equal_vals = np.array_equal(est_warm.get_variable_value(full_variable_name),
+                                self.new_variable_value)
+    self.assertTrue(equal_vals)
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From dee02afef2f001f30e7f2415515e8bf963a5f4a4 Mon Sep 17 00:00:00 2001
From: Tiezhen WANG <wangtz@google.com>
Date: Mon, 15 Oct 2018 20:34:13 -0700
Subject: [PATCH 1018/1085] Keras TPU support: Fix minior usability issue.

The current code fails with the following code:
use_pipeline = magic_function()
model.fit(..., _pipeline=use_pipeline)

when use_pipeline is True, it's not pop out from kwargs thus causing exceptions in line 1559

PiperOrigin-RevId: 217256095
---
 tensorflow/contrib/tpu/python/tpu/keras_support.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index d628258b9d..9d7b894717 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -1482,10 +1482,12 @@ class KerasTPUModel(models.Model):
 
       self._numpy_to_infeed_manager_list = infeed_managers
       try:
-        if not kwargs.get('_pipeline', True):
-          logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
-                       kwargs['_pipeline'])
+        pipeline = kwargs.get('_pipeline', True)
+        if '_pipeline' in kwargs:
           kwargs.pop('_pipeline')
+        if not pipeline:
+          logging.info('Running non-pipelined training loop (`_pipeline=%s`).',
+                       pipeline)
           return super(KerasTPUModel, self).fit(
               x, y, batch_size, epochs, verbose, callbacks, validation_split,
               validation_data, shuffle, class_weight, sample_weight,
-- 
GitLab


From 5473b48a761ee1746686efe697bee7661bbaa612 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Mon, 15 Oct 2018 20:40:32 -0700
Subject: [PATCH 1019/1085] [XLA] Fold select to its operands when possible

This implements the following transformations:
 - select(true,  a, b) -> a
 - select(false, a, b) -> b
 - select(a,     b, b) -> b

PiperOrigin-RevId: 217256658
---
 .../xla/service/algebraic_simplifier.cc       | 18 +++++
 .../xla/service/algebraic_simplifier_test.cc  | 67 +++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index ca71f2cc12..72ed5ca482 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -157,6 +157,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
   Status HandleDynamicUpdateSlice(
       HloInstruction* dynamic_update_slice) override;
 
+  Status HandleSelect(HloInstruction* select) override;
+
   Status HandleSort(HloInstruction* sort) override;
 
   Status HandleTranspose(HloInstruction* transpose) override;
@@ -2199,6 +2201,22 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
                          /*reduce_computation=*/function));
 }
 
+Status AlgebraicSimplifierVisitor::HandleSelect(HloInstruction* select) {
+  // select(x, y, y) -> y.
+  if (select->operand(1) == select->operand(2)) {
+    return ReplaceInstruction(select, select->mutable_operand(1));
+  }
+  // select(true, x, y) -> x.
+  if (IsAll(select->operand(0), true)) {
+    return ReplaceInstruction(select, select->mutable_operand(1));
+  }
+  // select(false, x, y) -> y.
+  if (IsAll(select->operand(0), false)) {
+    return ReplaceInstruction(select, select->mutable_operand(2));
+  }
+  return Status::OK();
+}
+
 Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) {
   auto operand = sort->mutable_operand(0);
   int64 dimension_to_sort = sort->dimensions(0);
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 42d1f337dc..c79c518700 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -97,6 +97,73 @@ TEST_F(AlgebraicSimplifierTest, MulZero) {
   EXPECT_EQ(computation->root_instruction(), zero);
 }
 
+// Test that select(true, a, b) is simplified to a
+TEST_F(AlgebraicSimplifierTest, SelectTrue) {
+  Shape r0s32 = ShapeUtil::MakeShape(S32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0s32, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0s32, "param1"));
+  HloInstruction* one = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(true)));
+  builder.AddInstruction(HloInstruction::CreateTernary(
+      r0s32, HloOpcode::kSelect, one, param0, param1));
+
+  auto module = CreateNewVerifiedModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kSelect);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_EQ(computation->root_instruction(), param0);
+}
+
+// Test that select(false, a, b) is simplified to b
+TEST_F(AlgebraicSimplifierTest, SelectFalse) {
+  Shape r0s32 = ShapeUtil::MakeShape(S32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0s32, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0s32, "param1"));
+  HloInstruction* zero = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<bool>(false)));
+  builder.AddInstruction(HloInstruction::CreateTernary(
+      r0s32, HloOpcode::kSelect, zero, param0, param1));
+
+  auto module = CreateNewVerifiedModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kSelect);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_EQ(computation->root_instruction(), param1);
+}
+
+// Test that select(a, b, b) is simplified to b
+TEST_F(AlgebraicSimplifierTest, SelectIdentical) {
+  Shape r0s32 = ShapeUtil::MakeShape(S32, {});
+  HloComputation::Builder builder(TestName());
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, r0s32, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, r0s32, "param1"));
+  builder.AddInstruction(HloInstruction::CreateTernary(
+      r0s32, HloOpcode::kSelect, param0, param1, param1));
+
+  auto module = CreateNewVerifiedModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_EQ(root->opcode(), HloOpcode::kSelect);
+  AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
+                                 non_bitcasting_callback());
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_EQ(computation->root_instruction(), param1);
+}
+
 // Test that Reduce(Reduce(A)) -> Reduce(A)
 TEST_F(AlgebraicSimplifierTest, TwoReducesToOne) {
   HloComputation::Builder builder(TestName());
-- 
GitLab


From 062439348eb9283738d5a64bcf421cc2509e082f Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Mon, 15 Oct 2018 21:20:05 -0700
Subject: [PATCH 1020/1085] Move shape inference utility to open source.

PiperOrigin-RevId: 217259772
---
 tensorflow/compiler/jit/BUILD                 |  46 +++++
 tensorflow/compiler/jit/shape_inference.cc    | 174 ++++++++++++++++++
 tensorflow/compiler/jit/shape_inference.h     |  54 ++++++
 .../compiler/jit/shape_inference_test.cc      | 124 +++++++++++++
 tensorflow/compiler/jit/test_util.cc          |  57 ++++++
 tensorflow/compiler/jit/test_util.h           |  44 +++++
 6 files changed, 499 insertions(+)
 create mode 100644 tensorflow/compiler/jit/shape_inference.cc
 create mode 100644 tensorflow/compiler/jit/shape_inference.h
 create mode 100644 tensorflow/compiler/jit/shape_inference_test.cc
 create mode 100644 tensorflow/compiler/jit/test_util.cc
 create mode 100644 tensorflow/compiler/jit/test_util.h

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index ced0cd03f7..a635608596 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -359,6 +359,52 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "shape_inference",
+    srcs = ["shape_inference.cc"],
+    hdrs = ["shape_inference.h"],
+    deps = [
+        ":shape_inference_helpers",
+        "//tensorflow/compiler/tf2xla:dump_graph",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:lib",
+    ],
+)
+
+cc_library(
+    name = "test_util",
+    testonly = 1,
+    srcs = ["test_util.cc"],
+    hdrs = ["test_util.h"],
+    deps = [
+        ":shape_inference",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "shape_inference_test",
+    srcs = ["shape_inference_test.cc"],
+    deps = [
+        ":shape_inference",
+        ":test_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/cc:ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/kernels:constant_op",
+    ],
+)
+
 cc_library(
     name = "compilation_passes",
     srcs = [
diff --git a/tensorflow/compiler/jit/shape_inference.cc b/tensorflow/compiler/jit/shape_inference.cc
new file mode 100644
index 0000000000..80c691fe49
--- /dev/null
+++ b/tensorflow/compiler/jit/shape_inference.cc
@@ -0,0 +1,174 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/shape_inference.h"
+
+#include "tensorflow/compiler/jit/shape_inference_helpers.h"
+#include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/graph/algorithm.h"
+
+namespace tensorflow {
+
+namespace {
+
+// Converts a shape inference handle to a PartialTensorShape.
+Status ShapeHandleToTensorShape(shape_inference::InferenceContext* context,
+                                const shape_inference::ShapeHandle& handle,
+                                PartialTensorShape* shape) {
+  // The default is already unknown
+  if (!context->RankKnown(handle)) return Status::OK();
+
+  std::vector<int64> dims(context->Rank(handle));
+  for (int32 i = 0; i < dims.size(); ++i) {
+    dims[i] = context->Value(context->Dim(handle, i));
+  }
+  return PartialTensorShape::MakePartialShape(dims.data(), dims.size(), shape);
+}
+
+Status PropagateShapes(const Graph& graph,
+                       const std::map<int, InferredShape>& arg_shapes,
+                       ShapeRefiner* shape_refiner) {
+  // Visits the nodes in topological order (reverse post-order), inferring
+  // shapes.
+  // TODO(phawkins): handle cyclic graphs.
+  std::vector<Node*> order;
+  GetReversePostOrder(graph, &order);
+
+  for (Node* n : order) {
+    // Ignore the status returned by the shape_refiner. We want the best effort
+    // shapes, even if no shape function is registered for a node.
+    Status status = shape_refiner->AddNode(n);
+    if (!status.ok()) {
+      VLOG(1) << "Shape inference failed for node: " << status;
+    }
+
+    if (n->type_string() == "_Arg") {
+      int index;
+      TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index));
+      auto it = arg_shapes.find(index);
+      if (it != arg_shapes.end()) {
+        const InferredShape& arg_shape = it->second;
+        shape_inference::InferenceContext* context =
+            shape_refiner->GetContext(n);
+
+        if (arg_shape.handle_type != DT_INVALID) {
+          shape_inference::ShapeHandle handle;
+          TF_RETURN_IF_ERROR(context->MakeShapeFromPartialTensorShape(
+              arg_shape.handle_shape, &handle));
+
+          // Sets the shape and type of the variable's value.
+          context->set_output_handle_shapes_and_types(
+              0, std::vector<shape_inference::ShapeAndType>{
+                     {handle, arg_shape.handle_type}});
+        }
+
+        shape_inference::ShapeHandle handle;
+        TF_RETURN_IF_ERROR(
+            context->MakeShapeFromPartialTensorShape(arg_shape.shape, &handle));
+        TF_RETURN_IF_ERROR(shape_refiner->SetShape(n, 0, handle));
+      }
+    }
+  }
+  return Status::OK();
+}
+
+// Store the shapes of the output tensors in a map
+Status StoreOutputShapes(const Graph& graph, const ShapeRefiner& shape_refiner,
+                         GraphShapeInfo* shape_info) {
+  for (const Node* node : graph.nodes()) {
+    shape_inference::InferenceContext* context = shape_refiner.GetContext(node);
+    if (!context) continue;
+
+    auto& outputs = (*shape_info)[node->name()];
+    outputs.resize(context->num_outputs());
+    for (int i = 0; i < context->num_outputs(); ++i) {
+      auto& output = outputs[i];
+      TF_RETURN_IF_ERROR(
+          ShapeHandleToTensorShape(context, context->output(i), &output.shape));
+
+      const auto* handle_shapes_and_types =
+          context->output_handle_shapes_and_types(i);
+      if (handle_shapes_and_types != nullptr) {
+        if (handle_shapes_and_types->size() == 1) {
+          TF_RETURN_IF_ERROR(ShapeHandleToTensorShape(
+              context, (*handle_shapes_and_types)[0].shape,
+              &output.handle_shape));
+          output.handle_type = (*handle_shapes_and_types)[0].dtype;
+        } else {
+          // otherwise, it may be resource like a Queue, which can have
+          // multiple shapes and types represented by a single handle.
+        }
+      }
+      VLOG(4) << node->name() << " output " << i << " shape"
+              << output.shape.DebugString() << " handle_type "
+              << DataTypeString(output.handle_type) << " handle_shape "
+              << output.handle_shape.DebugString();
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status InferShapes(Graph* graph, const std::map<int, InferredShape>& arg_shapes,
+                   const tensorflow::FunctionLibraryDefinition* fnlib_def,
+                   GraphShapeInfo* shape_info) {
+  ShapeRefiner shape_refiner(graph->versions(), graph->op_registry());
+  shape_refiner.set_require_shape_inference_fns(false);
+  // TODO(dlibenzi): Verify if it is worth trying to infer shaped within
+  // functions. Some functions can be called at multiple locations with
+  // difference shapes, which will trigger a shape inference based on the
+  // arguments passed at the first call.
+  // shape_refiner.set_function_library_for_shape_inference(fnlib_def);
+
+  // ShapeRefiner requires that all inputs of a node are present when
+  // ShapeRefiner::AddNode is called. To get at least some shape information in
+  // loops, we temporarily remove loop backedges and add them back again after
+  // the shape inference is complete.
+  BackEdgeHelper back_edge;
+  TF_RETURN_IF_ERROR(back_edge.Remove(graph));
+  TF_RETURN_IF_ERROR(PropagateShapes(*graph, arg_shapes, &shape_refiner));
+  TF_RETURN_IF_ERROR(back_edge.Replace());
+
+  // Currently information does not flow "backward" from consumers to producers
+  // in the shape inference, but we consume the shapes in a second pass in case
+  // backward information flow is added in the future.
+  return StoreOutputShapes(*graph, shape_refiner, shape_info);
+}
+
+xla::StatusOr<InferredShape> MergeInferredShapes(const InferredShape& a,
+                                                 const InferredShape& b) {
+  InferredShape result;
+  TF_RETURN_IF_ERROR(a.shape.MergeWith(b.shape, &result.shape));
+
+  if (a.handle_type == DT_INVALID) {
+    result.handle_type = b.handle_type;
+  } else if (b.handle_type == DT_INVALID) {
+    result.handle_type = a.handle_type;
+  } else if (a.handle_type == b.handle_type) {
+    result.handle_type = a.handle_type;
+  } else {
+    return errors::InvalidArgument(
+        "Mismatched resource types: ", DataTypeString(a.handle_type), " vs. ",
+        DataTypeString(b.handle_type));
+  }
+  TF_RETURN_IF_ERROR(
+      a.handle_shape.MergeWith(b.handle_shape, &result.handle_shape));
+  return result;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/shape_inference.h b/tensorflow/compiler/jit/shape_inference.h
new file mode 100644
index 0000000000..8668dbca55
--- /dev/null
+++ b/tensorflow/compiler/jit/shape_inference.h
@@ -0,0 +1,54 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_H_
+#define TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_H_
+
+#include <map>
+#include <vector>
+
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+struct InferredShape {
+  // Shape of the argument tensor.
+  PartialTensorShape shape;
+
+  // If the argument is a resource variable, the type and shape of the
+  // variable's value.
+  DataType handle_type = DT_INVALID;
+  PartialTensorShape handle_shape;
+};
+typedef std::unordered_map<string, std::vector<InferredShape>> GraphShapeInfo;
+
+// Infer shapes for all Tensors in a graph, and save them in a map.  The vector
+// for a Node contains the information about each of its outputs.
+// TODO(phawkins): this code does not infer accurate shapes for cyclic graphs.
+Status InferShapes(Graph* graph, const std::map<int, InferredShape>& arg_shapes,
+                   const tensorflow::FunctionLibraryDefinition* fnlib_def,
+                   GraphShapeInfo* shape_info);
+
+// Merges two InferredShapes. Return an error if the two shapes cannot be
+// merged.
+xla::StatusOr<InferredShape> MergeInferredShapes(const InferredShape& a,
+                                                 const InferredShape& b);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_SHAPE_INFERENCE_H_
diff --git a/tensorflow/compiler/jit/shape_inference_test.cc b/tensorflow/compiler/jit/shape_inference_test.cc
new file mode 100644
index 0000000000..9268172b1c
--- /dev/null
+++ b/tensorflow/compiler/jit/shape_inference_test.cc
@@ -0,0 +1,124 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Tests for ShapeInference.
+
+#include "tensorflow/compiler/jit/shape_inference.h"
+
+#include <map>
+#include <vector>
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/ops/control_flow_ops_internal.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/jit/test_util.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(ShapeInferenceTest, Basics) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  auto a = ops::Placeholder(root.WithOpName("A"), DT_FLOAT,
+                            ops::Placeholder::Shape({2, 3}));
+  auto b = ops::Placeholder(root.WithOpName("B"), DT_FLOAT,
+                            ops::Placeholder::Shape({3}));
+  auto c = ops::Placeholder(root.WithOpName("C"), DT_FLOAT);
+  auto d = ops::Add(root.WithOpName("D"), a, b);
+  auto e = ops::Add(root.WithOpName("E"), d, c);
+  auto f = ops::Neg(root.WithOpName("F"), e);
+  auto g = ops::AddN(root.WithOpName("G"), std::initializer_list<Output>{e, f});
+
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_CHECK_OK(root.ToGraph(graph.get()));
+
+  GraphShapeInfo shape_info;
+  TF_ASSERT_OK(InferShapes(graph.get(), /*arg_shapes=*/{},
+                           /*fnlib_def=*/nullptr, &shape_info));
+
+  std::map<string, std::vector<PartialTensorShape>> expected = {
+      {"A", {PartialTensorShape({2, 3})}}, {"B", {PartialTensorShape({3})}},
+      {"C", {PartialTensorShape()}},       {"D", {PartialTensorShape({2, 3})}},
+      {"E", {PartialTensorShape()}},       {"F", {PartialTensorShape()}},
+      {"G", {PartialTensorShape()}},
+  };
+  TF_EXPECT_OK(ShapeAnnotationsMatch(*graph, shape_info, expected));
+}
+
+TEST(ShapeInferenceTest, WhileLoop) {
+  // Graph:
+  // x = array_ops.placeholder(dtypes.int32)
+  // y = control_flow_ops.while_loop(lambda i: i < 10, lambda i: i + 1, [x])
+  Graph graph(OpRegistry::Global());
+  {
+    Scope scope = Scope::NewRootScope().ExitOnError();
+
+    auto dummy = ops::Placeholder(scope.WithOpName("Dummy"), DT_INT32,
+                                  ops::Placeholder::Shape({}));
+
+    auto source = ops::Placeholder(scope.WithOpName("source"), DT_INT32,
+                                   ops::Placeholder::Shape({}));
+    auto enter =
+        ops::internal::Enter(scope.WithOpName("while/Enter"), source, "aloop");
+    // Add an unused Enter node. These should be ignored.
+    auto enter2 =
+        ops::internal::Enter(scope.WithOpName("while/Enter2"), source, "aloop");
+    auto merge = ops::Merge(scope.WithOpName("while/Merge"),
+                            std::initializer_list<Input>{enter, dummy});
+    auto ten = ops::Const<int32>(
+        scope.WithOpName("while/Less/y").WithControlDependencies(merge.output),
+        10);
+    auto less = ops::Less(scope.WithOpName("while/Less"), merge.output, ten);
+    auto loop_cond = ops::LoopCond(scope.WithOpName("while/LoopCond"), less);
+    auto switch_node =
+        ops::Switch(scope.WithOpName("while/Switch"), merge.output, loop_cond);
+    auto exit = ops::internal::Exit(scope.WithOpName("while/Exit"),
+                                    switch_node.output_false);
+    auto identity = ops::Identity(scope.WithOpName("while/Identity"),
+                                  switch_node.output_true);
+    auto identity_shape =
+        ops::Const<int32>(scope.WithOpName("while/Identity/shape"), {});
+    auto identity_reshaped = ops::Reshape(
+        scope.WithOpName("while/Identity/reshaped"), identity, identity_shape);
+
+    auto one = ops::Const<int32>(
+        scope.WithOpName("while/add/y").WithControlDependencies(identity), 1);
+    auto add = ops::Add(scope.WithOpName("while/add"), identity_reshaped, one);
+    auto next_iteration =
+        ops::NextIteration(scope.WithOpName("while/NextIteration"), add);
+
+    auto sink = ops::Identity(scope.WithOpName("sink"), exit);
+
+    // Remove the dummy node and add the loop backedge.
+    scope.graph()->RemoveNode(dummy.node());
+    scope.graph()->AddEdge(next_iteration.node(), 0, merge.output.node(), 1);
+
+    TF_EXPECT_OK(scope.ToGraph(&graph));
+  }
+
+  GraphShapeInfo shape_info;
+  TF_ASSERT_OK(InferShapes(&graph, /*arg_shapes=*/{}, /*fnlib_def=*/nullptr,
+                           &shape_info));
+  std::map<string, std::vector<PartialTensorShape>> expected = {
+      {"while/Identity", {PartialTensorShape()}},
+      {"while/add", {PartialTensorShape({})}},
+  };
+  TF_EXPECT_OK(ShapeAnnotationsMatch(graph, shape_info, expected));
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/test_util.cc b/tensorflow/compiler/jit/test_util.cc
new file mode 100644
index 0000000000..cada272090
--- /dev/null
+++ b/tensorflow/compiler/jit/test_util.cc
@@ -0,0 +1,57 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/test_util.h"
+
+#include "tensorflow/compiler/jit/shape_inference.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+
+namespace tensorflow {
+
+Status ShapeAnnotationsMatch(
+    const Graph& graph, const GraphShapeInfo& shape_info,
+    std::map<string, std::vector<PartialTensorShape>> expected_shapes) {
+  for (Node* node : graph.op_nodes()) {
+    auto sit = shape_info.find(node->name());
+    TF_RET_CHECK(sit != shape_info.end())
+        << "Missing shape information for node " << node->name();
+    std::vector<PartialTensorShape> shapes;
+    for (const auto& output : sit->second) shapes.push_back(output.shape);
+
+    auto it = expected_shapes.find(node->name());
+    if (it != expected_shapes.end()) {
+      if (!PartialTensorShapeUtils::AreIdentical(shapes, it->second)) {
+        return errors::InvalidArgument(
+            "Shape mismatch for ", node->name(), ". Expected: ",
+            PartialTensorShapeUtils::PartialShapeListString(it->second),
+            ", actual: ",
+            PartialTensorShapeUtils::PartialShapeListString(shapes));
+      }
+      expected_shapes.erase(it);
+    }
+  }
+  if (!expected_shapes.empty()) {
+    std::vector<string> missing;
+    missing.reserve(expected_shapes.size());
+    for (const auto& entry : expected_shapes) {
+      missing.push_back(entry.first);
+    }
+    return errors::InvalidArgument("Missing shapes for nodes: ",
+                                   str_util::Join(missing, ","));
+  }
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/test_util.h b/tensorflow/compiler/jit/test_util.h
new file mode 100644
index 0000000000..0c9fee8f24
--- /dev/null
+++ b/tensorflow/compiler/jit/test_util.h
@@ -0,0 +1,44 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Helper functions for tests.
+
+#ifndef TENSORFLOW_COMPILER_JIT_TEST_UTIL_H_
+#define TENSORFLOW_COMPILER_JIT_TEST_UTIL_H_
+
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/compiler/jit/shape_inference.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Tests that the shapes in 'shape_info' for the nodes in `graph` match
+// `expected_shapes`. Returns an error if there are nodes in `expected_shapes`
+// that do not have shape information. Ignores nodes in `graph` that do not have
+// `expected_shapes` entries.
+Status ShapeAnnotationsMatch(
+    const Graph& graph, const GraphShapeInfo& shape_info,
+    std::map<string, std::vector<PartialTensorShape>> expected_shapes);
+
+}  // namespace tensorflow
+
+
+#endif  // TENSORFLOW_COMPILER_JIT_TEST_UTIL_H_
-- 
GitLab


From 5dd69a28480bb478c5c2c1938d5da55fa2ad27f7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 21:26:06 -0700
Subject: [PATCH 1021/1085] Registering dynamic stitch kernels for quantized
 types.

PiperOrigin-RevId: 217260197
---
 tensorflow/core/kernels/dynamic_stitch_op.cc  |  1 +
 .../kernel_tests/dynamic_stitch_op_test.py    | 32 ++++++++++++-------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/kernels/dynamic_stitch_op.cc b/tensorflow/core/kernels/dynamic_stitch_op.cc
index fb2a4cc8ef..f21f2acf26 100644
--- a/tensorflow/core/kernels/dynamic_stitch_op.cc
+++ b/tensorflow/core/kernels/dynamic_stitch_op.cc
@@ -327,6 +327,7 @@ struct ParallelDynamicStitchOpCPU : DynamicStitchOpImplCPU<T, true> {
 
 TF_CALL_POD_STRING_TYPES(REGISTER_DYNAMIC_STITCH);
 TF_CALL_variant(REGISTER_DYNAMIC_STITCH);
+TF_CALL_QUANTIZED_TYPES(REGISTER_DYNAMIC_STITCH);
 #undef REGISTER_DYNAMIC_STITCH
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
index 3a1036e52a..61542528b8 100644
--- a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import gradients_impl
 import tensorflow.python.ops.data_flow_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
@@ -61,19 +62,26 @@ class DynamicStitchTestBase(object):
 
   def testSimpleOneDimensional(self):
     with self.test_session(use_gpu=True):
-      indices = [
-          constant_op.constant([0, 4, 7]),
-          constant_op.constant([1, 6, 2, 3, 5])
-      ]
-      data = [
-          constant_op.constant([0, 40, 70]),
-          constant_op.constant([10, 60, 20, 30, 50])
+      # Test various datatypes in the simple case to ensure that the op was
+      # registered under those types.
+      dtypes_to_test = [
+          dtypes.float32, dtypes.qint8, dtypes.quint8, dtypes.qint32
       ]
-      stitched_t = self.stitch_op(indices, data)
-      stitched_val = stitched_t.eval()
-      self.assertAllEqual([0, 10, 20, 30, 40, 50, 60, 70], stitched_val)
-      # Dimension 0 is max(flatten(indices))+1.
-      self.assertEqual([8], stitched_t.get_shape().as_list())
+      for dtype in dtypes_to_test:
+        indices = [
+            constant_op.constant([0, 4, 7]),
+            constant_op.constant([1, 6, 2, 3, 5])
+        ]
+        data = [
+            math_ops.cast(constant_op.constant([0, 40, 70]), dtype=dtype),
+            math_ops.cast(
+                constant_op.constant([10, 60, 20, 30, 50]), dtype=dtype)
+        ]
+        stitched_t = self.stitch_op(indices, data)
+        stitched_val = stitched_t.eval()
+        self.assertAllEqual([0, 10, 20, 30, 40, 50, 60, 70], stitched_val)
+        # Dimension 0 is max(flatten(indices))+1.
+        self.assertEqual([8], stitched_t.get_shape().as_list())
 
   def testOneListOneDimensional(self):
     with self.test_session(use_gpu=True):
-- 
GitLab


From 6d9b2eb9c9e8607c43ba1f7c06d95e0b574986b3 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Mon, 15 Oct 2018 21:27:25 -0700
Subject: [PATCH 1022/1085] Cleanup std::ignore lint warnings.

PiperOrigin-RevId: 217260294
---
 .../core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc | 6 +++---
 tensorflow/core/kernels/fuzzing/fuzz_session.h              | 3 +--
 tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc     | 2 +-
 tensorflow/core/kernels/fuzzing/string_split_fuzz.cc        | 4 ++--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
index 4d736a2160..5b029bf5ec 100644
--- a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
@@ -43,9 +43,9 @@ class FuzzExampleProtoFastParsing : public FuzzSession {
     std::vector<PartialTensorShape> dense_shapes;
     dense_shapes.push_back(PartialTensorShape());
 
-    std::ignore = ParseExample(scope.WithOpName("output"), in_expanded, names,
-                               sparse_keys, dense_keys, dense_defaults,
-                               sparse_types, dense_shapes);
+    (void)ParseExample(scope.WithOpName("output"), in_expanded, names,
+                       sparse_keys, dense_keys, dense_defaults, sparse_types,
+                       dense_shapes);
   }
 
   void FuzzImpl(const uint8_t* data, size_t size) final {
diff --git a/tensorflow/core/kernels/fuzzing/fuzz_session.h b/tensorflow/core/kernels/fuzzing/fuzz_session.h
index f1f3f199df..9777be1ae8 100644
--- a/tensorflow/core/kernels/fuzzing/fuzz_session.h
+++ b/tensorflow/core/kernels/fuzzing/fuzz_session.h
@@ -39,8 +39,7 @@ limitations under the License.
   void BuildGraph(const Scope& scope) override {                         \
     auto op_node =                                                       \
         tensorflow::ops::Placeholder(scope.WithOpName("input1"), dtype); \
-    std::ignore =                                                        \
-        tensorflow::ops::opName(scope.WithOpName("output"), op_node);    \
+    (void)tensorflow::ops::opName(scope.WithOpName("output"), op_node);  \
   }
 
 namespace tensorflow {
diff --git a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
index ada1235449..ab6812c5f1 100644
--- a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
@@ -27,7 +27,7 @@ class FuzzParseTensor : public FuzzSession {
     // The serialized proto.
     auto input = Placeholder(scope.WithOpName("input1"), DT_STRING);
 
-    std::ignore = ParseTensor(scope.WithOpName("output"), input, DT_FLOAT);
+    (void)ParseTensor(scope.WithOpName("output"), input, DT_FLOAT);
   }
 
   void FuzzImpl(const uint8_t* data, size_t size) final {
diff --git a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
index 738d78e99a..87a548a999 100644
--- a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
@@ -25,8 +25,8 @@ class FuzzStringSplit : public FuzzSession {
         tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
     auto delimeter =
         tensorflow::ops::Placeholder(scope.WithOpName("input2"), DT_STRING);
-    std::ignore = tensorflow::ops::StringSplit(scope.WithOpName("output"),
-                                               input, delimeter);
+    (void)tensorflow::ops::StringSplit(scope.WithOpName("output"), input,
+                                       delimeter);
   }
 
   void FuzzImpl(const uint8_t* data, size_t size) final {
-- 
GitLab


From 316e0466693ab6725e60ed4ff8b8f16d7d03c92c Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Mon, 15 Oct 2018 21:36:06 -0700
Subject: [PATCH 1023/1085] Add utility function to infer shapes for outside
 compilation nodes.

PiperOrigin-RevId: 217260892
---
 tensorflow/compiler/jit/BUILD                 | 27 ++++++
 tensorflow/compiler/jit/encapsulate_util.cc   | 94 +++++++++++++++++++
 tensorflow/compiler/jit/encapsulate_util.h    | 49 ++++++++++
 .../compiler/jit/encapsulate_util_test.cc     | 68 ++++++++++++++
 4 files changed, 238 insertions(+)
 create mode 100644 tensorflow/compiler/jit/encapsulate_util.cc
 create mode 100644 tensorflow/compiler/jit/encapsulate_util.h
 create mode 100644 tensorflow/compiler/jit/encapsulate_util_test.cc

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index a635608596..311313b8f2 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -405,6 +405,33 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "encapsulate_util",
+    srcs = ["encapsulate_util.cc"],
+    hdrs = ["encapsulate_util.h"],
+    deps = [
+        ":shape_inference",
+        "//tensorflow/core:graph",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+tf_cc_test(
+    name = "encapsulate_util_test",
+    srcs = ["encapsulate_util_test.cc"],
+    deps = [
+        ":encapsulate_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:scope",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 cc_library(
     name = "compilation_passes",
     srcs = [
diff --git a/tensorflow/compiler/jit/encapsulate_util.cc b/tensorflow/compiler/jit/encapsulate_util.cc
new file mode 100644
index 0000000000..870a265f29
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_util.cc
@@ -0,0 +1,94 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_util.h"
+#include <algorithm>
+#include <iterator>
+
+#include "absl/strings/str_cat.h"
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/jit/shape_inference.h"
+
+namespace tensorflow {
+
+namespace {
+
+// Returns string attribute value for the node if the attribute is present,
+// otherwise returns empty optional value.
+absl::optional<string> GetStringAttr(const Node& n, const string& attr_name) {
+  auto attr = n.attrs().Find(attr_name);
+  if (!attr) {
+    return absl::nullopt;
+  } else {
+    return attr->s();
+  }
+}
+
+}  // namespace
+
+const char kXlaInferredShapesAttrName[] = "_xla_inferred_shapes";
+
+Status PerformStaticShapeInferenceBeforeEncapsulation(
+    Graph* g, const string& xla_computation_attr_name,
+    const string& outside_compilation_attr_name) {
+  // Find all outside compilation to XLA computation data edges.
+  std::unordered_set<Node*> outside_compilation_send_nodes;
+  for (auto e : g->edges()) {
+    if (e->IsControlEdge()) {
+      continue;
+    }
+
+    auto src_computation = GetStringAttr(*e->src(), xla_computation_attr_name);
+    auto dst_computation = GetStringAttr(*e->dst(), xla_computation_attr_name);
+    if (!src_computation || !dst_computation ||
+        *src_computation != *dst_computation) {
+      continue;
+    }
+
+    auto src_outside_compilation =
+        GetStringAttr(*e->src(), outside_compilation_attr_name);
+    auto dst_outside_compilation =
+        GetStringAttr(*e->dst(), outside_compilation_attr_name);
+    if (src_outside_compilation && !dst_outside_compilation) {
+      outside_compilation_send_nodes.insert(e->src());
+    }
+  }
+
+  // Perform shape inference.
+  std::map<int, InferredShape> arg_shapes;
+  GraphShapeInfo shape_info;
+  TF_RETURN_IF_ERROR(
+      InferShapes(g, arg_shapes, /*fnlib_def=*/nullptr, &shape_info));
+
+  // Add attribute for output shapes.
+  for (Node* n : outside_compilation_send_nodes) {
+    auto iter = shape_info.find(n->name());
+    if (iter == shape_info.end()) {
+      continue;
+    }
+
+    std::vector<PartialTensorShape> output_shapes;
+    std::transform(iter->second.begin(), iter->second.end(),
+                   std::back_inserter(output_shapes),
+                   [](const InferredShape& inferred_shape) {
+                     return inferred_shape.shape;
+                   });
+    n->AddAttr(kXlaInferredShapesAttrName, output_shapes);
+  }
+
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/encapsulate_util.h b/tensorflow/compiler/jit/encapsulate_util.h
new file mode 100644
index 0000000000..bc46521b98
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_util.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file contains some utility functions for encapsulating XLA computation
+// in host graph and encapsulating outside compilation in XLA computation.
+
+#ifndef TENSORFLOW_COMPILER_JIT_ENCAPSULATE_UTIL_H_
+#define TENSORFLOW_COMPILER_JIT_ENCAPSULATE_UTIL_H_
+
+#include "tensorflow/core/graph/graph.h"
+
+namespace tensorflow {
+
+// Attribute marking output tensor shapes inferred by XLA. Attribute value is
+// a list of PartialTensorShape objects.
+extern const char kXlaInferredShapesAttrName[];
+
+// Infer output shapes for outside compilation nodes which have output data
+// edges to XLA computation nodes. These shapes will be used later by XLA
+// compiler as output shapes of the outside compilation's XlaHostCompute op.
+// XLA computation nodes will be mark by attr `xla_computation_attr_name`;
+// outside compilation nodes will be marked by both attr
+// `xla_computation_attr_name` and `outside_compilation_attr_name`.
+//
+// Those outside compilation nodes will be marked with attribute
+// `kXlaInferredShapesAttrName`.
+//
+// We have to perform shape inference before encapsulation because after
+// encapsulation, some nodes will be encapsulated into function call, and shape
+// inference does not handle function call at the moment.
+Status PerformStaticShapeInferenceBeforeEncapsulation(
+    Graph* g, const string& xla_computation_attr_name,
+    const string& outside_compilation_attr_name);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_ENCAPSULATE_UTIL_H_
diff --git a/tensorflow/compiler/jit/encapsulate_util_test.cc b/tensorflow/compiler/jit/encapsulate_util_test.cc
new file mode 100644
index 0000000000..53bdf55ab2
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_util_test.cc
@@ -0,0 +1,68 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_util.h"
+
+#include "tensorflow/cc/framework/scope.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+
+TEST(PerformStaticShapeInferenceBeforeEncapsulationTest, Basic) {
+  // Build the graph:
+  // "add" = "const_0" + "const_1"
+  // "identity" = "add"
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output const_0 = ops::Const(s.WithOpName("const_0"), 1, {2});
+  Output const_1 = ops::Const(s.WithOpName("const_1"), 2, {2});
+  Output add = ops::Add(s.WithOpName("add"), const_0, const_1);
+  Output identity = ops::Identity(s.WithOpName("identity"), add);
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(s.ToGraph(&g));
+
+  // "add" node is outside compilation node, "identity" node is XLA node.
+  auto node_index = g.BuildNodeNameIndex();
+  Node *add_node = node_index["add"], *identity_node = node_index["identity"];
+  add_node->AddAttr("_xla", "cluster");
+  add_node->AddAttr("_oc", "cluster");
+  identity_node->AddAttr("_xla", "cluster");
+  TF_CHECK_OK(
+      PerformStaticShapeInferenceBeforeEncapsulation(&g, "_xla", "_oc"));
+
+  // Check that only "add" node now has _xla_inferred_shapes attr.
+  std::vector<Node*> nodes_with_inferred_shape;
+  for (Node* n : g.nodes()) {
+    if (HasNodeAttr(n->def(), kXlaInferredShapesAttrName)) {
+      nodes_with_inferred_shape.push_back(n);
+    }
+  }
+  EXPECT_EQ(nodes_with_inferred_shape.size(), 1);
+  EXPECT_EQ(nodes_with_inferred_shape[0], add_node);
+  std::vector<PartialTensorShape> output_shapes;
+  TF_CHECK_OK(GetNodeAttr(add_node->attrs(), kXlaInferredShapesAttrName,
+                          &output_shapes));
+  EXPECT_EQ(output_shapes.size(), 1);
+  TensorShapeProto shape_proto;
+  output_shapes[0].AsProto(&shape_proto);
+  EXPECT_EQ(shape_proto.dim_size(), 1);
+  EXPECT_EQ(shape_proto.dim(0).size(), 2);
+}
+
+}  // namespace tensorflow
-- 
GitLab


From 4eebcb88cb80d98212c8855d9f89db74ec8f1ae3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 15 Oct 2018 22:16:21 -0700
Subject: [PATCH 1024/1085] Automated rollback of commit
 b75a555c3c930d8e15b3c7928f30f1941a48f43f

PiperOrigin-RevId: 217263920
---
 .../estimator/python/gan_estimator_impl.py    |  8 +--
 .../estimator/python/gan_estimator_test.py    | 68 -------------------
 2 files changed, 2 insertions(+), 74 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 219cc199d7..7243f150ce 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -112,8 +112,7 @@ class GANEstimator(estimator.Estimator):
                get_eval_metric_ops_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
-               config=None,
-               warm_start_from=None):
+               config=None):
     """Initializes a GANEstimator instance.
 
     Args:
@@ -152,8 +151,6 @@ class GANEstimator(estimator.Estimator):
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       config: `RunConfig` object to configure the runtime settings.
-      warm_start_from: A filepath to a checkpoint or saved model, or a
-        WarmStartSettings object to configure initialization.
 
     Raises:
       ValueError: If loss functions aren't callable.
@@ -190,8 +187,7 @@ class GANEstimator(estimator.Estimator):
           get_hooks_fn, use_loss_summaries)
 
     super(GANEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config,
-        warm_start_from=warm_start_from)
+        model_fn=_model_fn, model_dir=model_dir, config=config)
 
 
 def _get_gan_model(
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index 4529dc07ef..83f8dd641f 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -33,11 +33,9 @@ from tensorflow.contrib.learn.python.learn.learn_io import graph_io
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.estimator import WarmStartSettings
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework.errors_impl import NotFoundError
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
@@ -319,71 +317,5 @@ class GANEstimatorIntegrationTest(test.TestCase):
         prediction_size=[batch_size, input_dim])
 
 
-class GANEstimatorWarmStartTest(test.TestCase):
-
-  def setUp(self):
-    self._model_dir = self.create_tempdir().full_path
-    self.new_variable_name = 'new_var'
-    self.new_variable_value = [1, 2, 3]
-
-  def tearDown(self):
-    writer_cache.FileWriterCache.clear()
-
-  def _test_warm_start(self, warm_start_from=None):
-    """Tests whether WarmStartSettings work as intended."""
-    def generator_with_new_variable(noise_dict, mode):
-      variable_scope.get_variable(name=self.new_variable_name,
-                                  initializer=self.new_variable_value,
-                                  trainable=True)
-      return generator_fn(noise_dict, mode)
-
-    def train_input_fn():
-      data = np.zeros([3, 4])
-      return {'x': data}, data
-
-    est = estimator.GANEstimator(
-        generator_fn=generator_fn,
-        discriminator_fn=discriminator_fn,
-        generator_loss_fn=losses.wasserstein_generator_loss,
-        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
-        generator_optimizer=training.GradientDescentOptimizer(1.0),
-        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
-        model_dir=self._model_dir)
-
-    est.train(train_input_fn, steps=1)
-
-    est_warm = estimator.GANEstimator(
-        generator_fn=generator_with_new_variable,
-        discriminator_fn=discriminator_fn,
-        generator_loss_fn=losses.wasserstein_generator_loss,
-        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
-        generator_optimizer=training.GradientDescentOptimizer(1.0),
-        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
-        model_dir=None if warm_start_from else self._model_dir,
-        warm_start_from=warm_start_from)
-
-    est_warm.train(train_input_fn, steps=1)
-
-    return est_warm
-
-  def test_warm_start_error(self):
-    """Test if exception when reloading different estimators."""
-    with self.assertRaises(NotFoundError):
-      self._test_warm_start()
-
-  def test_warm_start_success(self):
-    """Test if GANEstimator allows explicit warm start variable assignment."""
-    # Regex matches all variable names in ckpt except for new_var.
-    var_regex = '^(?!.*%s.*)' % self.new_variable_name
-    warmstart = WarmStartSettings(ckpt_to_initialize_from=self._model_dir,
-                                  vars_to_warm_start=var_regex)
-    est_warm = self._test_warm_start(warm_start_from=warmstart)
-    full_variable_name = 'Generator/%s' % self.new_variable_name
-    self.assertIn(full_variable_name, est_warm.get_variable_names())
-    equal_vals = np.array_equal(est_warm.get_variable_value(full_variable_name),
-                                self.new_variable_value)
-    self.assertTrue(equal_vals)
-
-
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 7c8f4f7362136824d2ed05b0d4f044870400112a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 02:00:25 -0700
Subject: [PATCH 1025/1085] Pass **kwargs through tf_custom_op_library
 extension rules

PiperOrigin-RevId: 217283680
---
 tensorflow/tensorflow.bzl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index a886dcf5d7..510a696b41 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1482,9 +1482,9 @@ check_deps = rule(
     },
 )
 
-# Helper to build a dynamic library (.so) from the sources containing
-# implementations of custom ops and kernels.
-def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = []):
+def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [], **kwargs):
+    """Helper to build a dynamic library (.so) from the sources containing implementations of custom ops and kernels.
+    """
     cuda_deps = [
         clean_dep("//tensorflow/core:stream_executor_headers_lib"),
         "@local_config_cuda//cuda:cuda_headers",
@@ -1502,6 +1502,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
             copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
             features = if_cuda(["-use_header_modules"]),
             deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
+            **kwargs
         )
         cuda_deps.extend([":" + basename + "_gpu"])
         rocm_deps.extend([":" + basename + "_gpu"])
@@ -1528,6 +1529,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
             clean_dep("//tensorflow:windows"): [],
             clean_dep("//tensorflow:darwin"): [],
         }),
+        **kwargs
     )
 
 register_extension_info(
-- 
GitLab


From 63baef56878bda2e1e631855513221c7d5b36018 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 02:02:25 -0700
Subject: [PATCH 1026/1085] compat: Update forward compatibility horizon to
 2018-10-16

PiperOrigin-RevId: 217284008
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 0a1dd17b89..dac46aaa7d 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 15)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 16)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 5f741287b98c5584faf1408e4f486e86ef521b1c Mon Sep 17 00:00:00 2001
From: Doe Hyun Yoon <dyoon@google.com>
Date: Tue, 16 Oct 2018 08:00:33 -0700
Subject: [PATCH 1027/1085] Propagate Tensor values in Grappler static shape
 inference.

Currently, GraphProperties.InferStatically() propagates outputs (shape) and output_tensors_as_shape (value in shape).

Tensor values (input_tensors in InferenceContext) are fed only for a few cases:
Const, Rank, and Size ops as input.
But they don't propagate; e.g., X has input_tensor value for Const -> X, but not for
this case: Const -> Identity -> X.

This CL implements the followings:
(1) add input_tensor_protos and output_tensor_protos in the NodeContext struct in SymbolicShapeRefiner
(2) made MaybeUpdateNodeContextOutput() and all the ad-hoc NodeContext output setting in InferShapes() into the method (it makes more sense to do that ad-hoc setting of NodeContext output after we run shape inference function that sets output (shape) of a node
(3) moved ad-hoc setting of input tensor values (for Const, Rank, and Size ops) in UpdateNode()
 to MaybeUpdateNodeContextOutput(); all the ad-hoc configs are in the MaybeUpdateNodeContextOutput(), and we simply propagates input node's output_tensor_protos to input_tensor_protos.
(4) Other minor clean up and tests.

With this CL, we can set input_tensors in InferenceContext in many more cases; for example, ops like Split needs input_tensors, not input_tensors_as_shape.
Besides, we can later add more adhoc logic to propagate tensor value easily.

PiperOrigin-RevId: 217317682
---
 tensorflow/core/grappler/costs/BUILD          |   1 +
 .../core/grappler/costs/graph_properties.cc   | 289 +++++++++++++-----
 .../grappler/costs/graph_properties_test.cc   |  83 ++++-
 3 files changed, 296 insertions(+), 77 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index 01e8f2b185..144d7f8ce6 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -41,6 +41,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":utils",
+        "@com_google_absl//absl/memory",
         "//tensorflow/core/grappler/utils:functions",
         "//tensorflow/core/grappler/utils:topological_sort",
         "//tensorflow/core/grappler:graph_view",
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 56c8339d57..dd6ce0c132 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -15,14 +15,18 @@ limitations under the License.
 
 #include "tensorflow/core/grappler/costs/graph_properties.h"
 
+#include <limits>
+#include <list>
 #include <queue>
 #include <unordered_map>
 #include <unordered_set>
+#include "absl/memory/memory.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/grappler/costs/utils.h"
@@ -259,6 +263,8 @@ typename DisjointSet<Handle>::Rep* DisjointSet<Handle>::Find(Handle value) {
   return root;
 }
 
+// TODO(dyoon): Move many helper functions in this file (including those within
+// SymbolicShapeRefiner class) to shared utils.
 bool IsEnqueue(const NodeDef& n) {
   return (n.op().find("Enqueue") != string::npos &&
           n.op().find("EnqueueMany") == string::npos);
@@ -380,21 +386,29 @@ TensorProto MakeTensorProtoFromShape(InferenceContext* ic,
   return tensor_proto;
 }
 
-// Returns a Const NodeDef with shape = `shape`, values = `tensor_as_shape`,
-// and dtype = `dtype`.
-NodeDef MakeConstNodeDefFromShape(InferenceContext* ic,
-                                  const ShapeHandle& shape,
-                                  const ShapeHandle& tensor_as_shape,
-                                  const DataType& dtype) {
+// Returns a Const NodeDef with tensor `tensor_proto` and dtype = `dtype`.
+NodeDef MakeConstNodeDefFromTensorProto(InferenceContext* ic,
+                                        const TensorProto& tensor_proto,
+                                        const DataType& dtype) {
   NodeDef const_node;
   const_node.set_name("const_from_shape");
   const_node.set_op("Const");
   auto* attr = const_node.mutable_attr();
   (*attr)["dtype"].set_type(dtype);
   auto* tensor = (*attr)["value"].mutable_tensor();
-  *tensor = MakeTensorProtoFromShape(ic, shape, tensor_as_shape, dtype);
+  *tensor = tensor_proto;
   return const_node;
 }
+
+// Returns a Const NodeDef with shape = `shape`, values = `tensor_as_shape`,
+// and dtype = `dtype`.
+NodeDef MakeConstNodeDefFromShape(InferenceContext* ic,
+                                  const ShapeHandle& shape,
+                                  const ShapeHandle& tensor_as_shape,
+                                  const DataType& dtype) {
+  return MakeConstNodeDefFromTensorProto(
+      ic, MakeTensorProtoFromShape(ic, shape, tensor_as_shape, dtype), dtype);
+}
 }  // namespace
 
 // Queue of nodes to process. Nodes can be enqueued in any order, but will be
@@ -455,6 +469,9 @@ class SymbolicShapeRefiner {
     DataTypeVector input_types;
     DataTypeVector output_types;
     std::unique_ptr<InferenceContext> inference_context;
+    // Additional info for propagating tensor values and tensor shapes.
+    std::vector<const TensorProto*> input_tensor_protos;
+    std::vector<const TensorProto*> output_tensor_protos;
     std::vector<ShapeHandle> output_tensors_as_shapes;
   };
 
@@ -553,6 +570,12 @@ class SymbolicShapeRefiner {
       if (IsConstant(*input_node)) {
         TF_CHECK_OK(
             ReplaceInputWithConst(*input_node, i, &grappler_function_item));
+      } else if (ctx->input_tensor_protos.size() > i &&
+                 ctx->input_tensor_protos[i] != nullptr) {
+        NodeDef const_input_node = MakeConstNodeDefFromTensorProto(
+            ic, *ctx->input_tensor_protos[i], ctx->input_types[i]);
+        TF_CHECK_OK(ReplaceInputWithConst(const_input_node, i,
+                                          &grappler_function_item));
       } else if (ic->input_tensors_as_shapes().size() > i &&
                  IsShapeFullyDefinedIntegerVectorOrScalar(
                      ic, ic->input(i), ic->input_tensors_as_shapes()[i],
@@ -574,6 +597,8 @@ class SymbolicShapeRefiner {
     // Add return nodes for output shapes.
     int output = 0;
     ctx->output_tensors_as_shapes.resize(grappler_function_item.output_size());
+    ctx->output_tensor_protos.resize(grappler_function_item.output_size(),
+                                     nullptr);
     for (auto const& out_arg : grappler_function_item.outputs()) {
       if (out_arg.output_tensors.size() > 1) {
         // TODO(jmdecker): Handle case of multiple output tensors
@@ -610,8 +635,11 @@ class SymbolicShapeRefiner {
         // Forward tensor value to output_tensors_as_shape.
         Tensor tensor;
         if (tensor.FromProto(outprop.value())) {
-          MaybeSetTensorValueToShape(ic, tensor,
-                                     &ctx->output_tensors_as_shapes[output]);
+          MaybeTensorValueToShape(ic, tensor,
+                                  &ctx->output_tensors_as_shapes[output]);
+          const_tensors_to_propagate_.push_back(outprop.value());
+          ctx->output_tensor_protos[output] =
+              &const_tensors_to_propagate_.back();
         }
       }
       output++;
@@ -636,6 +664,8 @@ class SymbolicShapeRefiner {
                                              nullptr);
     std::vector<ShapeHandle> input_tensors_as_shapes(
         inference_context->num_inputs());
+    node_context->input_tensor_protos.resize(inference_context->num_inputs(),
+                                             nullptr);
 
     for (int dst_input = 0; dst_input < inference_context->num_inputs();
          ++dst_input) {
@@ -651,55 +681,59 @@ class SymbolicShapeRefiner {
               "' was not previously added to SymbolicShapeRefiner.");
         }
 
-        if (IsConstant(*input)) {
-          // Convert constant value into tensors.
-          if (const_values[dst_input].FromProto(
-                  input->attr().at("value").tensor())) {
-            input_tensors[dst_input] = &const_values[dst_input];
-            MaybeSetTensorValueToShape(inference_context,
-                                       const_values[dst_input],
-                                       &input_tensors_as_shapes[dst_input]);
-          }
-        } else if (IsRank(*input)) {
-          if (c->inference_context->RankKnown(c->inference_context->input(0))) {
-            int32 rank =
-                c->inference_context->Rank(c->inference_context->input(0));
-            Tensor t(DT_INT32, {});
-            t.flat<int32>()(0) = rank;
-            const_values[dst_input] = t;
-            input_tensors[dst_input] = &const_values[dst_input];
-          }
-        } else if (IsSize(*input)) {
-          DimensionHandle size =
-              c->inference_context->NumElements(c->inference_context->input(0));
-          if (c->inference_context->ValueKnown(size)) {
-            int64 sz = c->inference_context->Value(size);
-            bool valid = false;
-            if (input->attr().at("T").type() == DT_INT32) {
-              if (sz < std::numeric_limits<int32>::max()) {
-                Tensor t(DT_INT32, {});
-                t.flat<int32>()(0) = sz;
-                const_values[dst_input] = t;
-                valid = true;
-              }
-            } else {
-              Tensor t(DT_INT64, {});
-              t.flat<int64>()(0) = sz;
-              const_values[dst_input] = t;
-              valid = true;
-            }
-            if (valid) {
-              input_tensors[dst_input] = &const_values[dst_input];
-            }
-          }
-        }
+        // Propagate input node's NodeContext info to the current node's
+        // NodeContext:
+        // output_tensor_protos to input_tensor_protos and input_tensors, and
+        // output_tensors_as_shapes to input_tensors_as_shapes.
 
         if (c->output_tensors_as_shapes.size() > src_output) {
           input_tensors_as_shapes[dst_input] =
               c->output_tensors_as_shapes[src_output];
         }
 
+        if (c->output_tensor_protos.size() > src_output) {
+          auto* tensor_proto = c->output_tensor_protos[src_output];
+          if (tensor_proto != nullptr &&
+              const_values[dst_input].FromProto(*tensor_proto)) {
+            input_tensors[dst_input] = &const_values[dst_input];
+            node_context->input_tensor_protos[dst_input] = tensor_proto;
+
+            if (!inference_context->FullyDefined(
+                    input_tensors_as_shapes[dst_input])) {
+              // Shape from a Const is not fully defined when the Const has
+              // value -1 (e.g., Reshape(x, Const(-1)) to reshape an arbitrary
+              // tensor x to a vector).
+              // It's possible that the same Const with -1 is used in many
+              // places, but that doesn't mean the resultant shapes are
+              // identical. e.g., x1 = Reshape(x, c) and y1 = Reshape(y, c),
+              // where c is -1. In this case, shape inference yields both x1 and
+              // y1 as rank 1, size unknown, but still the shapes of x1 and y1
+              // can be different. (even if we use different Const(-1) for x1
+              // and x2, graph optimzier may merge them to single Const through
+              // duplicate removal.)
+              // If we reuse output_tensors_as_shapes to input_tensors_as_shapes
+              // by copying ShapeHandle, they share the same Shape object, and
+              // SymbolicShapeManager, later in InferStatically(), assigns the
+              // same symbolic dim value (unique value < -1); in the above
+              // Reshape example, the shapes of x1 and y1 become, for example,
+              // [-278] and graph optimizer may yield incorrect output 'cause it
+              // assumes x1 and y1 have the same shape.
+              // To prevent this, we re-create a ShapeHandle from the Const
+              // tensor, instead of reusing output_tensors_as_shapes (so that
+              // ShapeHandles of the const fanouts have the same values,
+              // but different Shape objects -- SymbolicShapeManager assigns
+              // different symbol id to each fanout shape).
+              // TODO(dyoon): clean up the way values are propagated.
+              MaybeTensorValueToShape(inference_context,
+                                      const_values[dst_input],
+                                      &input_tensors_as_shapes[dst_input]);
+            }
+          }
+        }
+
         DCHECK_GE(dst_input, 0);
+        // NOTE: we check only shape is refined; we do not (yet) check whether
+        // tensor value is refined.
         if (!*refined && !inference_context->input(dst_input).SameHandle(
                              c->inference_context->output(src_output))) {
           *refined = true;
@@ -974,17 +1008,53 @@ class SymbolicShapeRefiner {
     return dim;
   }
 
-  Status InferShapes(const NodeDef& node, NodeContext* c) {
-    InferenceContext* ic = c->inference_context.get();
-
-    auto it = fed_ports_.find(node.name());
-    const bool is_fed = it != fed_ports_.end();
-
-    // Propagate shape tensors unless the node is fed.
+  Status MaybeUpdateNodeContextOutput(const NodeDef& node, const bool is_fed,
+                                      NodeContext* c) {
+    // Propagate tensors and shape tensors unless the node is fed.
     // TODO(bsteiner) We should still propagate the shapes to the ports that
     // aren't fed in the case of a ShapeN node.
+
+    InferenceContext* ic = c->inference_context.get();
     if (!is_fed) {
-      if (IsShape(node)) {
+      if (IsConstant(node)) {
+        c->output_tensor_protos.resize(1);
+        const TensorProto& tensor_proto = node.attr().at("value").tensor();
+        c->output_tensor_protos[0] = &tensor_proto;
+        c->output_tensors_as_shapes.resize(1);
+        MaybeTensorProtoToShape(ic, tensor_proto,
+                                &c->output_tensors_as_shapes[0]);
+      } else if (IsRank(node)) {
+        if (ic->RankKnown(ic->input(0))) {
+          // Propagate rank value.
+          int32 rank = ic->Rank(ic->input(0));
+          const_tensors_to_propagate_.push_back(
+              MakeIntegerScalarTensorProto(DT_INT32, rank));
+          c->output_tensor_protos.resize(1);
+          c->output_tensor_protos[0] = &const_tensors_to_propagate_.back();
+        }
+      } else if (IsSize(node)) {
+        DimensionHandle size = ic->NumElements(ic->input(0));
+        if (ic->ValueKnown(size)) {
+          // Propagate size value.
+          int64 sz = ic->Value(size);
+          bool valid = false;
+          if (node.attr().at("T").type() == DT_INT32) {
+            if (sz < std::numeric_limits<int32>::max()) {
+              const_tensors_to_propagate_.push_back(
+                  MakeIntegerScalarTensorProto(DT_INT32, sz));
+              valid = true;
+            }
+          } else {
+            const_tensors_to_propagate_.push_back(
+                MakeIntegerScalarTensorProto(DT_INT64, sz));
+            valid = true;
+          }
+          if (valid) {
+            c->output_tensor_protos.resize(1);
+            c->output_tensor_protos[0] = &const_tensors_to_propagate_.back();
+          }
+        }
+      } else if (IsShape(node)) {
         c->output_tensors_as_shapes.resize(1);
         c->output_tensors_as_shapes[0] = c->inference_context->input(0);
       } else if (IsShapeN(node)) {
@@ -1042,9 +1112,12 @@ class SymbolicShapeRefiner {
           c->output_tensors_as_shapes[0] = ic->MakeShape(dims);
         }
       } else if (IsIdentity(node)) {
-        // Pass input_tensors_as_shapes to output_tensors_as_shapes.
         c->output_tensors_as_shapes.resize(1);
         c->output_tensors_as_shapes[0] = ic->input_tensors_as_shapes()[0];
+        if (c->input_tensor_protos[0] != nullptr) {
+          c->output_tensor_protos.resize(1);
+          c->output_tensor_protos[0] = c->input_tensor_protos[0];
+        }
       } else if (IsSlice(node)) {
         ShapeHandle input = ic->input_tensors_as_shapes()[0];
         bool valid = ic->RankKnown(input);
@@ -1125,7 +1198,10 @@ class SymbolicShapeRefiner {
         }
       }
     }
+    return Status::OK();
+  }
 
+  Status InferShapes(const NodeDef& node, NodeContext* c) {
     // Infer the shapes of output tensors.
     if (!c->op_data || c->op_data->shape_inference_fn == nullptr) {
       // There is nothing more we can infer, annotate outputs with unknown
@@ -1137,6 +1213,8 @@ class SymbolicShapeRefiner {
         c->inference_context->Run(c->op_data->shape_inference_fn));
 
     Status status = Status::OK();
+    auto it = fed_ports_.find(node.name());
+    const bool is_fed = it != fed_ports_.end();
     if (is_fed) {
       // It is possible to feed node output ports with tensors of any shape: as
       // a result, the shape of a fed port is completely unknown.
@@ -1145,6 +1223,9 @@ class SymbolicShapeRefiner {
       }
     }
 
+    // Update NodeContext output fields after shape inference function runs.
+    status.Update(MaybeUpdateNodeContextOutput(node, is_fed, c));
+
     return status;
   }
 
@@ -1166,17 +1247,65 @@ class SymbolicShapeRefiner {
     return false;
   }
 
-  void MaybeSetTensorValueToShape(InferenceContext* ic, const Tensor& tensor,
-                                  ShapeHandle* tensors_as_shapes) {
+  TensorProto MakeIntegerScalarTensorProto(const DataType dtype,
+                                           const int64 val) {
+    TensorProto tensor_proto;
+    tensor_proto.set_dtype(dtype);
+    // Scalar TensorProto has an empty tensor_shape; no dim, no dim.size.
+    tensor_proto.mutable_tensor_shape();
+    if (dtype == DT_INT32) {
+      tensor_proto.add_int_val(val);
+    } else if (dtype == DT_INT64) {
+      tensor_proto.add_int64_val(val);
+    }
+    return tensor_proto;
+  }
+
+  bool MaybeTensorProtoToShape(InferenceContext* ic,
+                               const TensorProto& tensor_proto,
+                               ShapeHandle* tensors_as_shapes) {
+    // Skip if dtype is not integer.
+    if (tensor_proto.dtype() != DT_INT32 && tensor_proto.dtype() != DT_INT64) {
+      return false;
+    }
+    // Skip if shape is neither scalar nor vector.
+    if (tensor_proto.tensor_shape().unknown_rank() ||
+        tensor_proto.tensor_shape().dim_size() > 1) {
+      return false;
+    }
+    Tensor tensor;
+    if (!tensor.FromProto(tensor_proto)) {
+      return false;
+    }
+    return MaybeTensorValueToShape(ic, tensor, tensors_as_shapes);
+  }
+
+  bool MaybeTensorValueToShape(InferenceContext* ic, const Tensor& tensor,
+                               ShapeHandle* tensors_as_shapes) {
     // Integer tensors of rank one can also be interpreted as a shape
     // provided all their values are >= -1.
     if (IsIntegerVector(tensor)) {
+#if 0
       ShapeHandle tensor_shape = ic->Vector(tensor.NumElements());
       ShapeHandle shp;
       // Note that MakeShapeFromTensor filters out invalid values (e.g., < -1).
       if (ic->MakeShapeFromTensor(&tensor, tensor_shape, &shp).ok()) {
         *tensors_as_shapes = shp;
+        return true;
+      }
+#else
+      bool has_values_smaller_than_minus_1 = false;
+      std::vector<DimensionHandle> dims;
+      for (int i = 0; i < tensor.NumElements(); i++) {
+        int64 value = tensor.dtype() == DT_INT32 ? tensor.flat<int32>()(i)
+                                                 : tensor.flat<int64>()(i);
+        has_values_smaller_than_minus_1 |= (value < -1);
+        dims.push_back(value < 0 ? ic->UnknownDim() : ic->MakeDim(value));
       }
+      if (!has_values_smaller_than_minus_1) {
+        *tensors_as_shapes = ic->MakeShape(dims);
+      }
+#endif
     } else if (IsIntegerScalar(tensor)) {
       // Scalar constant.
       int64 value = tensor.dtype() == DT_INT32 ? tensor.flat<int32>()(0)
@@ -1185,8 +1314,10 @@ class SymbolicShapeRefiner {
       // It's a limitation as we use ShapeHandle as a means to pass values.
       if (value >= -1) {
         *tensors_as_shapes = ic->MakeShape({ic->MakeDim(value)});
+        return true;
       }
     }
+    return false;
   }
 
   const GraphView& graph_;
@@ -1198,6 +1329,11 @@ class SymbolicShapeRefiner {
       fun_to_grappler_function_item_;
   FunctionLibraryDefinition function_library_;
   const std::unordered_map<string, std::unordered_set<int>>& fed_ports_;
+  // Store TensorProtos for tensor value propagation. Note that we use list, not
+  // vector, as we use pointers to the TensorProtos in this container. Vector
+  // may resize and copy the objects into a new buffer, then the existing
+  // pointers become dangling pointers.
+  std::list<TensorProto> const_tensors_to_propagate_;
 };
 
 // Keep track of shapes and dimensions in a graph.
@@ -1624,7 +1760,8 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
       PropagateShapes(&refiner, &new_shapes, resource_handles, num_loops));
 
   // Track shapes globally across the graph.
-  SymbolicShapeManager shape_manager;
+  std::unique_ptr<SymbolicShapeManager> shape_manager =
+      absl::make_unique<SymbolicShapeManager>();
   bool found_error = false;
   for (const NodeDef& node : item_.graph.node()) {
     auto node_ctx = refiner.GetContext(&node);
@@ -1637,14 +1774,14 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
       continue;
     }
     for (const auto& merged_shapes : node_ctx->MergedShapes()) {
-      if (!shape_manager.Merge(merged_shapes.first, merged_shapes.second)
+      if (!shape_manager->Merge(merged_shapes.first, merged_shapes.second)
                .ok()) {
         found_error = true;
         break;
       }
     }
     for (const auto& merged_dims : node_ctx->MergedDims()) {
-      if (!shape_manager.Merge(merged_dims.first, merged_dims.second).ok()) {
+      if (!shape_manager->Merge(merged_dims.first, merged_dims.second).ok()) {
         found_error = true;
         break;
       }
@@ -1652,7 +1789,7 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
     if (found_error) {
       // The shapes aren't consistent, we can't infer safely: discard all the
       // information discovered so far.
-      shape_manager = SymbolicShapeManager();
+      shape_manager = absl::make_unique<SymbolicShapeManager>();
       break;
     }
   }
@@ -1676,15 +1813,17 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
       input_properties.resize(ic->num_inputs());
       GraphView::InputPort input(&node, -1);
       for (int i = 0; i < ic->num_inputs(); ++i) {
-        shape_manager.AsTensorProperties(ic->input(i), ctx->input_types[i],
-                                         &input_properties[i]);
+        shape_manager->AsTensorProperties(ic->input(i), ctx->input_types[i],
+                                          &input_properties[i]);
         input.port_id = i;
         GraphView::OutputPort fanin = graph_view.GetRegularFanin(input);
-        // Export tensor value (either const tensor or input_tensors_as_shapes)
-        // to input_properties.value.
+        // Export tensor value to input_properties.value.
         if (IsConstant(*fanin.node)) {
           const TensorProto& raw_val = fanin.node->attr().at("value").tensor();
           *input_properties[i].mutable_value() = raw_val;
+        } else if (ctx->input_tensor_protos.size() > i &&
+                   ctx->input_tensor_protos[i] != nullptr) {
+          *input_properties[i].mutable_value() = *ctx->input_tensor_protos[i];
         } else if (ic->input_tensors_as_shapes().size() > i &&
                    IsShapeFullyDefinedIntegerVectorOrScalar(
                        ic, ic->input(i), ic->input_tensors_as_shapes()[i],
@@ -1705,13 +1844,15 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
 
       output_properties.resize(ic->num_outputs());
       for (int i = 0; i < ic->num_outputs(); ++i) {
-        shape_manager.AsTensorProperties(ic->output(i), ctx->output_types[i],
-                                         &output_properties[i]);
-        // Export tensor value (either const tensor or input_tensors_as_shapes)
-        // to output_properties.value.
+        shape_manager->AsTensorProperties(ic->output(i), ctx->output_types[i],
+                                          &output_properties[i]);
+        // Export tensor value to output_properties.value.
         if (IsConstant(node)) {
           const TensorProto& raw_val = node.attr().at("value").tensor();
           *output_properties[i].mutable_value() = raw_val;
+        } else if (ctx->output_tensor_protos.size() > i &&
+                   ctx->output_tensor_protos[i] != nullptr) {
+          *output_properties[i].mutable_value() = *ctx->output_tensor_protos[i];
         } else if (ctx->output_tensors_as_shapes.size() > i &&
                    IsShapeFullyDefinedIntegerVectorOrScalar(
                        ic, ic->output(i), ctx->output_tensors_as_shapes[i],
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index db10f586bc..5aae773994 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"  // NOLINT
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/grappler/clusters/single_machine.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -285,6 +286,37 @@ TEST_F(GraphPropertiesTest, Variables) {
   }
 }
 
+TEST_F(GraphPropertiesTest, ReadVariableOpAfterEnter) {
+  GrapplerItem item;
+  TF_CHECK_OK(NodeDefBuilder("Var", "VarHandleOp")
+                  .Attr("dtype", DT_FLOAT)
+                  .Attr("shape", TensorShape({3, 7}))
+                  .Finalize(item.graph.add_node()));
+  TF_CHECK_OK(NodeDefBuilder("Enter", "Enter")
+                  .Attr("T", DT_RESOURCE)
+                  .Attr("frame_name", "while_context")
+                  .Attr("is_constant", true)
+                  .Attr("parallel_iterations", 10)
+                  .Input("Var", 0, DT_RESOURCE)
+                  .Finalize(item.graph.add_node()));
+  TF_CHECK_OK(NodeDefBuilder("ReadVariableOpAfterEnter", "ReadVariableOp")
+                  .Attr("dtype", DT_FLOAT)
+                  .Input("Enter", 0, DT_RESOURCE)
+                  .Finalize(item.graph.add_node()));
+
+  // LOG(INFO) << item.graph.DebugString();
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically(false));
+  const auto props = properties.GetOutputProperties("ReadVariableOpAfterEnter");
+  EXPECT_EQ(1, props.size());
+  const OpInfo::TensorProperties& prop = props[0];
+  EXPECT_EQ(DT_FLOAT, prop.dtype());
+  EXPECT_FALSE(prop.shape().unknown_rank());
+  EXPECT_EQ(2, prop.shape().dim_size());
+  EXPECT_EQ(3, prop.shape().dim(0).size());
+  EXPECT_EQ(7, prop.shape().dim(1).size());
+}
+
 TEST_F(GraphPropertiesTest, VarHandles) {
   GrapplerItem item;
   TF_CHECK_OK(NodeDefBuilder("Var", "VarHandleOp")
@@ -865,8 +897,8 @@ TEST_F(GraphPropertiesTest, TensorAsShapesPropagation) {
   EXPECT_TRUE(properties.GetOutputProperties("b1")[0].has_value());
   EXPECT_TRUE(properties.GetOutputProperties("c")[0].has_value());
   EXPECT_TRUE(properties.GetInputProperties("c1")[0].has_value());
-  // Note that we propagate tensro value of only 1D vector and scalar.
-  EXPECT_FALSE(properties.GetOutputProperties("c1")[0].has_value());
+  // Note that we propagate tensor value of only 1D vector and scalar.
+  EXPECT_TRUE(properties.GetOutputProperties("c1")[0].has_value());
 
   // Check values.
   ExpectTensorValues({5, 7}, properties.GetOutputProperties("a")[0].value());
@@ -883,7 +915,8 @@ TEST_F(GraphPropertiesTest, TensorAsShapesPropagation) {
                      properties.GetOutputProperties("c")[0].value());
   ExpectTensorValues({c_values},
                      properties.GetInputProperties("c1")[0].value());
-  // No output value for c1, as it's neither 1D vector nor scalar.
+  ExpectTensorValues({c_values},
+                     properties.GetOutputProperties("c1")[0].value());
 }
 
 TEST_F(GraphPropertiesTest, IdentityPassingShape) {
@@ -928,6 +961,50 @@ TEST_F(GraphPropertiesTest, PackWithConstInput) {
   EXPECT_EQ("float: [1,2,3,4]", PropToString(out_prop0));
 }
 
+TEST_F(GraphPropertiesTest, RankOp) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c = ops::Const(s.WithOpName("Const"), 1, {4, 4, 4});
+  Output r = ops::Rank(s.WithOpName("Rank"), c);
+  Output i = ops::Identity(s.WithOpName("Identity"), r);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically(false));
+  const auto rank_props = properties.GetOutputProperties("Rank");
+  const OpInfo::TensorProperties rank_prop0 = rank_props[0];
+  EXPECT_EQ("int32: []", PropToString(rank_prop0));
+  EXPECT_TRUE(rank_prop0.has_value());
+  ExpectTensorValues({3}, rank_prop0.value());
+  const auto identity_props = properties.GetOutputProperties("Identity");
+  const OpInfo::TensorProperties identity_props0 = identity_props[0];
+  EXPECT_EQ("int32: []", PropToString(identity_props0));
+  EXPECT_TRUE(identity_props0.has_value());
+  ExpectTensorValues({3}, identity_props0.value());
+}
+
+TEST_F(GraphPropertiesTest, SizeOp) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output c = ops::Const(s.WithOpName("Const"), 1, {1, 2, 3, 4});
+  Output r = ops::Size(s.WithOpName("Size"), c);
+  Output i = ops::Identity(s.WithOpName("Identity"), r);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically(false));
+  const auto size_props = properties.GetOutputProperties("Size");
+  const OpInfo::TensorProperties size_props0 = size_props[0];
+  EXPECT_EQ("int32: []", PropToString(size_props0));
+  EXPECT_TRUE(size_props0.has_value());
+  ExpectTensorValues({24}, size_props0.value());
+  const auto identity_props = properties.GetOutputProperties("Identity");
+  const OpInfo::TensorProperties identity_props0 = identity_props[0];
+  EXPECT_EQ("int32: []", PropToString(identity_props0));
+  EXPECT_TRUE(identity_props0.has_value());
+  ExpectTensorValues({24}, identity_props0.value());
+}
+
 TEST_F(GraphPropertiesTest, PackWithIdentityInput) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   // Same to PackWithConstInput test case, but a, b, c, and d are Identity ops
-- 
GitLab


From 64a4cf11f61e8ed7fa332c35ddbcb2a803726694 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 09:36:20 -0700
Subject: [PATCH 1028/1085] Fix TF 2.0 variable API exporting.

Apparently @tf_export(v2=[...]) isn't a thing, the correct incantation
is @tf_export(..., v1=[]). Which makes sense since we want the symbol to
continue to exist in v3.

PiperOrigin-RevId: 217331254
---
 tensorflow/python/ops/variable_scope.py       |   2 +-
 tensorflow/python/ops/variables.py            |   2 +-
 ...ensorflow.-variable.-save-slice-info.pbtxt |  17 +++
 .../api/golden/v2/tensorflow.-variable.pbtxt  | 130 ++++++++++++++++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |   8 ++
 5 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt

diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 5032ca79f9..9b10af9182 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -2558,7 +2558,7 @@ def variable_creator_scope_v1(variable_creator):
 
 
 # Note: only the docstrings differ between this and v1.
-@tf_export(v2=["variable_creator_scope"])
+@tf_export("variable_creator_scope", v1=[])
 @tf_contextlib.contextmanager
 def variable_creator_scope(variable_creator):
   """Scope which defines a variable creation function to be used by variable().
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 84871f09f9..df7b7f920f 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -187,7 +187,7 @@ class VariableMetaclass(type):
       return super(VariableMetaclass, cls).__call__(*args, **kwargs)
 
 
-@tf_export(v2=["Variable"])
+@tf_export("Variable", v1=[])
 class Variable(six.with_metaclass(VariableMetaclass,
                                   checkpointable.CheckpointableBase)):
   """See the [Variables Guide](https://tensorflow.org/guide/variables).
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
new file mode 100644
index 0000000000..ac3ccd468b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt
@@ -0,0 +1,17 @@
+path: "tensorflow.Variable.SaveSliceInfo"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.variables.SaveSliceInfo\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "spec"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'full_name\', \'full_shape\', \'var_offset\', \'var_shape\', \'save_slice_info_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "to_proto"
+    argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
new file mode 100644
index 0000000000..e85949f23c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
@@ -0,0 +1,130 @@
+path: "tensorflow.Variable"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.variables.Variable\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "SaveSliceInfo"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "constraint"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "device"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "graph"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "initial_value"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "initializer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "op"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'import_scope\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "assign"
+    argspec: "args=[\'self\', \'value\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "assign_add"
+    argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "assign_sub"
+    argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "count_up_to"
+    argspec: "args=[\'self\', \'limit\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "eval"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_proto"
+    argspec: "args=[\'variable_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "get_shape"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "initialized_value"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "load"
+    argspec: "args=[\'self\', \'value\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "read_value"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "scatter_add"
+    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "scatter_nd_add"
+    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "scatter_nd_sub"
+    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "scatter_nd_update"
+    argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "scatter_sub"
+    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "scatter_update"
+    argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "set_shape"
+    argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "to_proto"
+    argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "value"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 3664eef406..4b4d150aa1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -176,6 +176,10 @@ tf_module {
     name: "VarLenFeature"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Variable"
+    mtype: "<class \'tensorflow.python.ops.variables.VariableMetaclass\'>"
+  }
   member {
     name: "VariableAggregation"
     mtype: "<class \'enum.EnumMeta\'>"
@@ -1700,6 +1704,10 @@ tf_module {
     name: "variable_axis_size_partitioner"
     argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], "
   }
+  member_method {
+    name: "variable_creator_scope"
+    argspec: "args=[\'variable_creator\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "where"
     argspec: "args=[\'condition\', \'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-- 
GitLab


From 2a91929a3b8bfd9ca8a823035b3654f2ae031ef2 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 16 Oct 2018 09:37:33 -0700
Subject: [PATCH 1029/1085] Add cond_v2 dependency back to
 control_flow_ops_py_test.

This is necessary for control_flow_ops.py to lazy-load cond_v2.

PiperOrigin-RevId: 217331510
---
 tensorflow/python/kernel_tests/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index fa26690718..33fb925f09 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1490,6 +1490,7 @@ cuda_py_test(
         "//tensorflow/python:array_ops_gen",
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:cond_v2",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:data_flow_ops",
         "//tensorflow/python:data_flow_ops_gen",
-- 
GitLab


From 41fc46f2a8b0d0ecdc80e39ffcb211eefd683865 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 09:46:54 -0700
Subject: [PATCH 1030/1085] Fix OSS build breaks.

PiperOrigin-RevId: 217333247
---
 .../compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc    | 6 ++++--
 .../xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
index e7f572d01b..a6980850af 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
@@ -127,8 +127,10 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) {
                   op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0));
 
   // Check that metadata was preserved.
-  EXPECT_THAT(entry_computation->root_instruction()->operand(0)->metadata(),
-              ::testing::EqualsProto(metadata));
+  const auto& md_after_opt =
+      entry_computation->root_instruction()->operand(0)->metadata();
+  EXPECT_TRUE(protobuf_util::ProtobufEquals(md_after_opt, metadata))
+      << md_after_opt.DebugString() << " vs " << metadata.DebugString();
 }
 
 TEST_F(CudnnConvRewriterTest,
diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
index 12146068ed..b7dd07a50c 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
@@ -302,7 +302,7 @@ TEST_F(CudnnFusedConvRewriterTest, PreservesMetadata) {
           ->ToString();
   EXPECT_THAT(
       optimized_hlo_string,
-      ::testing::ContainsRegex(R"(custom-call.*metadata={op_type="foo"})"));
+      ::testing::ContainsRegex(R"(custom-call.*metadata=\{op_type="foo"\})"));
 }
 
 }  // namespace
-- 
GitLab


From 77456c6611fd2faa07d6efabafb3f61f82a24eed Mon Sep 17 00:00:00 2001
From: Guangda Lai <31743510+aaroey@users.noreply.github.com>
Date: Tue, 16 Oct 2018 10:06:28 -0700
Subject: [PATCH 1031/1085] Fix broken tests caused by newly added op
 converters.

---
 .../contrib/tensorrt/convert/convert_nodes.cc |   4 +-
 .../tensorrt/convert/convert_nodes_test.cc    |  12 +-
 tensorflow/contrib/tensorrt/test/base_test.py |  10 +
 .../tensorrt/test/batch_matmul_test.py        |  11 +-
 .../tensorrt/test/biasadd_matmul_test.py      |  54 +--
 .../binary_tensor_weight_broadcast_test.py    | 106 ++----
 .../tensorrt/test/reshape_transpose_test.py   | 325 +++---------------
 7 files changed, 120 insertions(+), 402 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 8f697f2af9..85c9e62f10 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1374,11 +1374,11 @@ tensorflow::Status ConvertReshape(
     // Product of input shape should equal product of new_dims
     if (TrtDimsNumElements(input_dims) != TrtDimsNumElements(new_dims)) {
       return tensorflow::errors::Unimplemented(
-        "Reshape on the batch dimension is not supported.");
+        "Reshape on batch dimension is not supported, at ", node_def.name());
     }
   } else if (weights_ptr[0] != ctx.GetMaxBatchSize()) {
     return tensorflow::errors::Unimplemented(
-        "Reshape on the batch dimension is not supported.");
+        "Reshape on batch dimension is not supported, at ", node_def.name());
   }
 
   const nvinfer1::ITensor* output_tensor = nullptr;
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 5c9ddaec49..7c16d5b44e 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -69,10 +69,6 @@ class FakeITensor : public nvinfer1::ITensor {
   FakeITensor(const string& name, const std::vector<int>& dims)
       : name_(name), dims_(GetTestDims(dims)) {}
 
-  void SetDims(const std::vector<int>& dims) {
-    setDimensions(GetTestDims(dims));
-  }
-
   void setName(const char* name) override { name_ = name; }
 
   const char* getName() const override { return name_.c_str(); }
@@ -99,6 +95,10 @@ class FakeITensor : public nvinfer1::ITensor {
     location_ = location;
   }
 
+#if NV_TENSORRT_MAJOR >= 5
+  bool setDynamicRange(float min, float max) override {}
+#endif
+
  private:
   string name_;
   nvinfer1::Dims dims_;
@@ -609,7 +609,7 @@ TEST_F(ConverterTest, ConvertReshape) {
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("weights", DT_INT32, {4}, {-1, 1, 1, 2});
     ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
-                 "Reshape on the batch dimension is not supported");
+                 "Reshape on batch dimension is not supported, at my_reshape");
   }
   {
     // Reshape at batch dimension, should fail.
@@ -617,7 +617,7 @@ TEST_F(ConverterTest, ConvertReshape) {
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("weights", DT_INT32, {4}, {3, 1, 1, 2});
     ExpectStatus(converter_.ConvertNode(node_def), error::UNIMPLEMENTED,
-                 "Reshape on the batch dimension is not supported");
+                 "Reshape on batch dimension is not supported, at my_reshape");
   }
   // Reshape on non batch dimensions, ok.
   for (int batch_dim : {-1, 1}) {
diff --git a/tensorflow/contrib/tensorrt/test/base_test.py b/tensorflow/contrib/tensorrt/test/base_test.py
index 7e9ffb05ab..18096e0ff1 100644
--- a/tensorflow/contrib/tensorrt/test/base_test.py
+++ b/tensorflow/contrib/tensorrt/test/base_test.py
@@ -136,6 +136,16 @@ class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase):
     # - my_trt_op_1 should have ["weights","conv", "div"]
     return ["my_trt_op_0", "my_trt_op_1"]
 
+  def ShouldRunTest(self, run_params):
+    # TODO(aaroey): LayoutOptimizer adds Transpose(Const, Const) to the graph
+    # which breaks the conversion. We should fix it as:
+    # - Detect the invalid NodeDef earlier before adding them to segment
+    # - Let it able to change the RewriterConfig when calling
+    #   create_inference_graph().
+    # It will be good to add debugging feature for Grappler to print the graph
+    # after running each optimizer.
+    return False
+
 
 class PartiallyConvertedTestA(trt_test.TfTrtIntegrationTestBase):
 
diff --git a/tensorflow/contrib/tensorrt/test/batch_matmul_test.py b/tensorflow/contrib/tensorrt/test/batch_matmul_test.py
index 2f153c6f2f..4b88808178 100644
--- a/tensorflow/contrib/tensorrt/test/batch_matmul_test.py
+++ b/tensorflow/contrib/tensorrt/test/batch_matmul_test.py
@@ -50,17 +50,22 @@ class BatchMatMulTest(trt_test.TfTrtIntegrationTestBase):
       w2 = array_ops.placeholder(dtype=dtype, shape=w2_dims, name=w2_name)
       with g.device("/GPU:0"):
         b = constant_op.constant(np.random.randn(12, 5, 12, 7), dtype=dtype)
-        c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype)
-        d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype)
         x1 = math_ops.matmul(inp, b)
+        c = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype)
         x1 = x1 + c
+
         x2 = math_ops.matmul(inp, w1)
+        d = constant_op.constant(np.random.randn(5, 1, 1), dtype=dtype)
         x2 = x2 * d
-        e = gen_array_ops.reshape(inp, [12, 40, 12])
+
+        e = self.trt_incompatible_op(inp)
+        e = gen_array_ops.reshape(e, [12, 40, 12])
         x3 = math_ops.matmul(e, w2)
         f = constant_op.constant(np.random.randn(40, 1), dtype=dtype)
         x3 = x3 + f
         x3 = gen_array_ops.reshape(x3, [12, 5, 8, 7])
+        x3 = self.trt_incompatible_op(x3)
+
         out = x1 + x2 + x3
       array_ops.squeeze(out, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
index 15b301177f..74a2177d88 100644
--- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
+++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py
@@ -37,91 +37,91 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase):
     """Testing conversion of BiasAdd MatMul in TF-TRT conversion."""
     dtype = dtypes.float32
     input_name = "input"
-    input_dims = [48, 12]
+    input_dims = [4, 144]
     output_name = "output"
     g = ops.Graph()
     with g.as_default():
       x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
 
-      b = constant_op.constant(np.random.randn(12, 4), dtype=dtype)
+      b = constant_op.constant(np.random.randn(144, 4), dtype=dtype)
       x1 = math_ops.matmul(x, b)
       b = constant_op.constant(np.random.randn(1, 4), dtype=dtype)
       x1 = x1 + b
 
-      b = constant_op.constant(np.random.randn(48, 4), dtype=dtype)
-      x2 = math_ops.matmul(x, b, transpose_a=True)
-      x2 = gen_array_ops.reshape(x2, [48, 1])
+      b = constant_op.constant(np.random.randn(4, 144), dtype=dtype)
+      x2 = self.trt_incompatible_op(x)
+      x2 = math_ops.matmul(x2, b, transpose_a=True)
+      x2 = gen_array_ops.reshape(x2, [4, -1])
+      x2 = self.trt_incompatible_op(x2)
 
-      b = constant_op.constant(np.random.randn(4, 12), dtype=dtype)
+      b = constant_op.constant(np.random.randn(4, 144), dtype=dtype)
       x3 = math_ops.matmul(x, b, transpose_b=True)
 
-      b = constant_op.constant(np.random.randn(16, 48), dtype=dtype)
-      x4 = math_ops.matmul(x, b, transpose_b=True, transpose_a=True)
-      x4 = gen_array_ops.reshape(x4, [48, 4])
+      b = constant_op.constant(np.random.randn(16, 4), dtype=dtype)
+      x4 = self.trt_incompatible_op(x)
+      x4 = math_ops.matmul(x4, b, transpose_b=True, transpose_a=True)
+      x4 = gen_array_ops.reshape(x4, [4, -1])
+      x4 = self.trt_incompatible_op(x4)
 
-      x5 = gen_array_ops.reshape(x, [4, 144])
       b = constant_op.constant(np.random.randn(144, 48), dtype=dtype)
-      x5 = math_ops.matmul(x5, b)
+      x5 = math_ops.matmul(x, b)
       b = constant_op.constant(np.random.randn(48), dtype=dtype)
       x5 = nn.bias_add(x5, b)
-      x5 = gen_array_ops.reshape(x5, [48, 4])
+      x5 = gen_array_ops.reshape(x5, [4, -1])
 
       x6 = gen_array_ops.reshape(x, [4, 12, 12])
       b = constant_op.constant(np.random.randn(12), dtype=dtype)
       x6 = nn.bias_add(x6, b, data_format="NHWC")
-      x6 = gen_array_ops.reshape(x6, [48, -1])
+      x6 = gen_array_ops.reshape(x6, [4, -1])
 
       x7 = gen_array_ops.reshape(x, [4, 12, 3, 4])
       b = constant_op.constant(np.random.randn(4), dtype=dtype)
       x7 = nn.bias_add(x7, b, data_format="NHWC")
-      x7 = gen_array_ops.reshape(x7, [48, -1])
+      x7 = gen_array_ops.reshape(x7, [4, -1])
 
       x8 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2])
       b = constant_op.constant(np.random.randn(2), dtype=dtype)
       x8 = nn.bias_add(x8, b, data_format="NHWC")
-      x8 = gen_array_ops.reshape(x8, [48, -1])
+      x8 = gen_array_ops.reshape(x8, [4, -1])
 
       x9 = gen_array_ops.reshape(x, [4, 12, 3, 2, 2])
       b = constant_op.constant(np.random.randn(12), dtype=dtype)
       x9 = nn.bias_add(x9, b, data_format="NCHW")
-      x9 = gen_array_ops.reshape(x9, [48, -1])
+      x9 = gen_array_ops.reshape(x9, [4, -1])
 
       x10 = gen_array_ops.reshape(x, [4, 12, 3, 4])
       b = constant_op.constant(np.random.randn(12), dtype=dtype)
       x10 = nn.bias_add(x10, b, data_format="NCHW")
-      x10 = gen_array_ops.reshape(x10, [48, -1])
+      x10 = gen_array_ops.reshape(x10, [4, -1])
 
       x11 = gen_array_ops.reshape(x, [4, 12, 12])
       b = constant_op.constant(np.random.randn(12), dtype=dtype)
       x11 = nn.bias_add(x11, b, data_format="NCHW")
-      x11 = gen_array_ops.reshape(x11, [48, -1])
+      x11 = gen_array_ops.reshape(x11, [4, -1])
 
-      out = array_ops.concat(
-          [x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11], axis=-1)
+      out = array_ops.concat([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11],
+                             axis=-1)
       out = array_ops.squeeze(out, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
         input_names=[input_name],
         input_dims=[input_dims],
         output_names=[output_name],
-        expected_output_dims=[(48, 89)])
+        expected_output_dims=[(4, 6680)])
 
   def GetConversionParams(self, run_params):
     """Return a ConversionParams for test."""
     return super(BiasaddMatMulTest,
                  self).GetConversionParams(run_params)._replace(
-                     max_batch_size=48, maximum_cached_engines=2)
+                     max_batch_size=4, maximum_cached_engines=2)
 
   def _ValidEngines(self):
     """Engines expected to build and run."""
-    return [
-        "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_3",
-        "my_trt_op_6", "my_trt_op_7", "my_trt_op_8", "my_trt_op_9"
-    ]
+    return ["my_trt_op_0"]
 
   def _InvalidEngines(self):
     """Engines that will cause conversion error at building time."""
-    return ["my_trt_op_4", "my_trt_op_5"]
+    return ["my_trt_op_1", "my_trt_op_2"]
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
diff --git a/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py b/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py
index f126ed4238..72d95cca78 100644
--- a/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py
+++ b/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py
@@ -32,79 +32,34 @@ from tensorflow.python.platform import test
 
 class BinaryTensorWeightBroadcastTest(trt_test.TfTrtIntegrationTestBase):
 
+  def _ConstFn(self, shape):
+    return constant_op.constant(np.random.randn(*shape), dtype=dtypes.float32)
+
   def GetParams(self):
     """Tests for scale & elementwise layers in TF-TRT."""
-    dtype = dtypes.float32
     input_name = "input"
     input_dims = [10, 24, 24, 20]
     output_name = "output"
     g = ops.Graph()
     with g.as_default():
-      x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
-      # scale
-      a = constant_op.constant(np.random.randn(1), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # scale
-      a = constant_op.constant(np.random.randn(1), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # scale
-      a = constant_op.constant(np.random.randn(24, 1, 1), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # scale
-      a = constant_op.constant(np.random.randn(24, 1, 1), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # scale
-      a = constant_op.constant(np.random.randn(24, 24, 20), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # scale
-      a = constant_op.constant(np.random.randn(24, 24, 20), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(20), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(20), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 1, 1), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 1, 1), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 24, 1), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 24, 1), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 24, 20), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(1, 24, 24, 20), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(24, 20), dtype=dtype)
-      f = a + x
-      x = math_ops.sigmoid(f)
-      # elementwise
-      a = constant_op.constant(np.random.randn(24, 20), dtype=dtype)
-      f = x + a
-      x = math_ops.sigmoid(f)
+      x = array_ops.placeholder(
+          dtype=dtypes.float32, shape=input_dims, name=input_name)
+      for weights_shape in [
+          (1,),  # scale
+          (24, 1, 1),  # scale
+          (24, 24, 20),  # scale
+          (20,),  # elementwise
+          (1, 24, 1, 1),  # elementwise
+          (1, 24, 24, 1),  # elementwise
+          (1, 24, 24, 20),  # elementwise
+          (24, 20),  # elementwise
+      ]:
+        a = self._ConstFn(weights_shape)
+        f = x + a
+        x = math_ops.sigmoid(f)
+        a = self._ConstFn(weights_shape)
+        f = a + x
+        x = math_ops.sigmoid(f)
       gen_array_ops.reshape(x, [5, -1], name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
@@ -115,24 +70,7 @@ class BinaryTensorWeightBroadcastTest(trt_test.TfTrtIntegrationTestBase):
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
-    return [
-        "my_trt_op_0",
-        "my_trt_op_1",
-        "my_trt_op_2",
-        "my_trt_op_3",
-        "my_trt_op_4",
-        "my_trt_op_5",
-        "my_trt_op_6",
-        "my_trt_op_7",
-        "my_trt_op_8",
-        "my_trt_op_9",
-        "my_trt_op_10",
-        "my_trt_op_11",
-        "my_trt_op_12",
-        "my_trt_op_13",
-        "my_trt_op_14",
-        "my_trt_op_15",
-    ]
+    return ["my_trt_op_%d" % i for i in range(16)]
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
index 61d95bb242..e22929f114 100644
--- a/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
+++ b/tensorflow/contrib/tensorrt/test/reshape_transpose_test.py
@@ -20,22 +20,17 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.contrib.tensorrt.python import trt_convert
 from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import test
 
 
-class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
+class ReshapeTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
-    """Create a graph containing single segment."""
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [100, 24, 24, 2]
@@ -44,96 +39,51 @@ class SimpleReshapeTest(trt_test.TfTrtIntegrationTestBase):
     with g.as_default():
       inp = array_ops.placeholder(
           dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      outputs = []
+      # Here we test two types of reshapes, one changes the batch dimension and
+      # the other does not. Note that we're not able to test reshaping to
+      # scalar, since TRT requires input tensor to be of rank at least 2, so a
+      # reshape with scalar input will be filtered out of the segment before
+      # conversion.
       with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [-1, 24*24*2])
-        # Add identities to ensure we have at least min_segment_size=3 nodes
-        identity = array_ops.identity(reshape, "identity")
-        identity = array_ops.identity(identity, "identity2")
-      array_ops.identity(identity, name=output_name)
+        # These reshapes happen at batch dimension, thus should fail.
+        for shape in [[2, 50, 24, 24, 2], [-1, 50, 24, 24, 2],
+                      [2, 50, -1, 24, 2]]:
+          r = array_ops.reshape(inp, shape)
+          r = array_ops.reshape(r, [-1, 24, 24, 2])
+          outputs.append(self.trt_incompatible_op(r))
+        # Add another block with many reshapes that don't change the batch
+        # dimension.
+        r = array_ops.reshape(inp, [-1, 24 * 24, 2], name="reshape-0")
+        r = array_ops.reshape(r, [100, 24, -1], name="reshape-1")
+        r = array_ops.reshape(r, [100, 24 * 2, 24], name="reshape-2")
+        r = array_ops.reshape(r, [-1, 24, 24 * 2], name="reshape-3")
+        r = array_ops.reshape(r, [-1, 6, 4, 24, 2], name="reshape-4")
+        r = array_ops.reshape(r, [-1, 6, 4, 6, 4, 2, 1], name="reshape-5")
+        r = array_ops.reshape(r, [-1, 24, 24, 2], name="reshape-6")
+        outputs.append(self.trt_incompatible_op(r))
+      math_ops.add_n(outputs, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
         input_names=[input_name],
         input_dims=[input_dims],
         output_names=[output_name],
-        expected_output_dims=[(100, 24*24*2)])
+        expected_output_dims=[tuple(input_dims)])
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
-    return ["my_trt_op_0"]
-
-class ReshapeToScalarTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [1]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=input_dims, name=input_name)
-      with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [])
-        # Add identities to ensure we have at least min_segment_size=3 nodes
-        identity = array_ops.identity(reshape, "identity")
-        identity = array_ops.identity(identity, "identity2")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[()])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return []
+    return {
+        "my_trt_op_3": ["reshape-%d" % i for i in range(7)] +
+                       ["reshape-%d/shape" % i for i in range(7)]
+    }
 
   def ShouldRunTest(self, run_params):
     """Whether to run the test."""
-    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
-    # a calib graph. Doesn't seem to contain any calibration nodes.""
-    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and
             not run_params.dynamic_engine)
 
-class ReshapeBatchDimensionTest(trt_test.TfTrtIntegrationTestBase):
 
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [2, 50, 24, 24, 2])
-        # Add identities to ensure we have at least min_segment_size=3 nodes
-        identity = array_ops.identity(reshape, "identity")
-        identity = array_ops.identity(identity, "identity2")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(2, 50, 24, 24, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return []
-
-  def ShouldRunTest(self, run_params):
-    """Whether to run the test."""
-    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
-    # a calib graph. Doesn't seem to contain any calibration nodes.""
-    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
-            not run_params.dynamic_engine)
-
-class ReshapeBatchDimensionTest2(trt_test.TfTrtIntegrationTestBase):
+class TransposeTest(trt_test.TfTrtIntegrationTestBase):
 
   def GetParams(self):
     """Create a graph containing single segment."""
@@ -146,218 +96,33 @@ class ReshapeBatchDimensionTest2(trt_test.TfTrtIntegrationTestBase):
       inp = array_ops.placeholder(
           dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
       with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [-1, 50, 24, 24, 2])
-        # Add identities to ensure we have at least min_segment_size=3 nodes
-        identity = array_ops.identity(reshape, "identity")
-        identity = array_ops.identity(identity, "identity2")
-      array_ops.identity(identity, name=output_name)
+        t = array_ops.transpose(inp, [0, 3, 1, 2], name="transpose-1")
+        t = array_ops.transpose(t, [0, 2, 3, 1], name="transposeback-1")
+        incompatible = self.trt_incompatible_op(t)
+        t = array_ops.transpose(incompatible, [2, 1, 0, 3], name="transpose-2")
+        t = array_ops.transpose(t, [0, 2, 3, 1], name="transpose-3")
+      array_ops.identity(t, name=output_name)
     return trt_test.TfTrtIntegrationTestParams(
         gdef=g.as_graph_def(),
         input_names=[input_name],
         input_dims=[input_dims],
         output_names=[output_name],
-        expected_output_dims=[(2, 50, 24, 24, 2)])
+        expected_output_dims=[(24, 100, 2, 24)])
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
-    return []
+    return {
+        "my_trt_op_0": [
+            "transpose-1", "transpose-1/perm", "transposeback-1",
+            "transposeback-1/perm"
+        ]
+    }
 
   def ShouldRunTest(self, run_params):
     """Whether to run the test."""
-    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
-    # a calib graph. Doesn't seem to contain any calibration nodes.""
-    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
+    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and
             not run_params.dynamic_engine)
 
-class ReshapeBatchDimensionTest3(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [2, 50, -1, 24, 2])
-        # Add identities to ensure we have at least min_segment_size=3 nodes
-        identity = array_ops.identity(reshape, "identity")
-        identity = array_ops.identity(identity, "identity2")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(2, 50, 24, 24, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return []
-
-  def ShouldRunTest(self, run_params):
-    """Whether to run the test."""
-    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
-    # a calib graph. Doesn't seem to contain any calibration nodes.""
-    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
-            not run_params.dynamic_engine)
-
-class ReshapeInverseTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [-1, 24*24*2])
-        reshape = array_ops.reshape(reshape, [-1, 24, 24, 2])
-        identity = array_ops.identity(reshape, "identity")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(100, 24, 24, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return ["my_trt_op_0"]
-
-class ManyReshapeTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        reshape = array_ops.reshape(inp, [-1, 24*24, 2])
-        reshape = array_ops.reshape(reshape, [-1, 24*2, 24])
-        reshape = array_ops.reshape(reshape, [-1, 24, 24*2])
-        reshape = array_ops.reshape(reshape, [-1, 6, 4, 24, 2])
-        reshape = array_ops.reshape(reshape, [-1, 6, 4, 6, 4, 2])
-        reshape = array_ops.reshape(reshape, [-1, 6, 4, 6, 4, 2, 1])
-        reshape = array_ops.reshape(reshape, [-1, 24, 24, 2])
-        identity = array_ops.identity(reshape, "identity")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(100, 24, 24, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return ["my_trt_op_0"]
-
-class SimpleTransposeTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        # to NCHW
-        transpose = array_ops.transpose(inp, [0, 3, 1, 2])
-        identity = array_ops.identity(transpose, "identity")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(100, 2, 24, 24)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return ["my_trt_op_0"]
-
-class TransposeBatchDimensionTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        # to NCHW
-        transpose = array_ops.transpose(inp, [2, 1, 0, 3])
-        identity = array_ops.identity(transpose, "identity")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(24, 24, 100, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return []
-
-  def ShouldRunTest(self, run_params):
-    """Whether to run the test."""
-    # No engine should be created so exclude INT8 to avoid "ERROR:tensorflow:Not
-    # a calib graph. Doesn't seem to contain any calibration nodes.""
-    return (not trt_test.IsQuantizationMode(run_params.precision_mode) and 
-            not run_params.dynamic_engine)
-
-class TransposeInverseTest(trt_test.TfTrtIntegrationTestBase):
-
-  def GetParams(self):
-    """Create a graph containing single segment."""
-    dtype = dtypes.float32
-    input_name = "input"
-    input_dims = [100, 24, 24, 2]
-    output_name = "output"
-    g = ops.Graph()
-    with g.as_default():
-      inp = array_ops.placeholder(
-          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
-      with g.device("/GPU:0"):
-        # to NCHW
-        transpose = array_ops.transpose(inp, [0, 3, 1, 2])
-        # back to NHWC
-        transpose = array_ops.transpose(transpose, [0, 2, 3, 1])
-        identity = array_ops.identity(transpose, "identity")
-      array_ops.identity(identity, name=output_name)
-    return trt_test.TfTrtIntegrationTestParams(
-        gdef=g.as_graph_def(),
-        input_names=[input_name],
-        input_dims=[input_dims],
-        output_names=[output_name],
-        expected_output_dims=[(100, 24, 24, 2)])
-
-  def ExpectedEnginesToBuild(self, run_params):
-    """Return the expected engines to build."""
-    return ["my_trt_op_0"]
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From d06d64c34e730253abb596cc081793132a5b985d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 10:27:54 -0700
Subject: [PATCH 1032/1085] Add Eager execution to all tests in backend.py

PiperOrigin-RevId: 217341177
---
 tensorflow/python/keras/backend.py      |  19 +-
 tensorflow/python/keras/backend_test.py | 452 +++++++++++++-----------
 2 files changed, 260 insertions(+), 211 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 708a442e71..032077cd12 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -826,6 +826,9 @@ def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
       sparse: Boolean, whether the placeholder should have a sparse type.
       name: Optional name string for the placeholder.
 
+  Raises:
+      ValueError: If called with eager execution.
+
   Returns:
       Tensor instance (with Keras metadata included).
 
@@ -837,6 +840,9 @@ def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
       <tf.Tensor 'Placeholder_4:0' shape=(2, 4, 5) dtype=float32>
   ```
   """
+  if context.executing_eagerly():
+    raise ValueError(
+        '`keras.backend.placeholder` is not supported with eager execution.')
   if dtype is None:
     dtype = floatx()
   if not shape:
@@ -1007,7 +1013,7 @@ def eval(x):
              [ 3.,  4.]], dtype=float32)
   ```
   """
-  return to_dense(x).eval(session=get_session())
+  return get_value(to_dense(x))
 
 
 @tf_export('keras.backend.zeros')
@@ -3060,8 +3066,11 @@ def function(inputs, outputs, updates=None, **kwargs):
       Output values as Numpy arrays.
 
   Raises:
-      ValueError: if invalid kwargs are passed in.
+      ValueError: if invalid kwargs are passed in or if in eager execution.
   """
+  if context.executing_eagerly():
+    raise ValueError(
+        '`keras.backend.function` is not supported with eager execution.')
   if kwargs:
     for key in kwargs:
       if (key not in tf_inspect.getfullargspec(session_module.Session.run)[0]
@@ -4256,6 +4265,8 @@ def separable_conv2d(x,
     data_format = image_data_format()
   if data_format not in {'channels_first', 'channels_last'}:
     raise ValueError('Unknown data_format: ' + str(data_format))
+  if len(strides) != 2:
+    raise ValueError('`strides` must be a tuple of 2 integers.')
 
   x, tf_data_format = _preprocess_conv2d_input(x, data_format)
   padding = _preprocess_padding(padding)
@@ -4462,6 +4473,10 @@ def pool2d(x,
     data_format = image_data_format()
   if data_format not in {'channels_first', 'channels_last'}:
     raise ValueError('Unknown data_format: ' + str(data_format))
+  if len(pool_size) != 2:
+    raise ValueError('`pool_size` must be a tuple of 2 integers.')
+  if len(strides) != 2:
+    raise ValueError('`strides` must be a tuple of 2 integers.')
 
   x, tf_data_format = _preprocess_conv2d_input(x, data_format)
   padding = _preprocess_padding(padding)
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 0834448699..4368b69ebe 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -23,9 +23,12 @@ import scipy.sparse
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import keras
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -89,6 +92,7 @@ def compare_two_inputs_op_to_numpy(keras_op,
                          str(keras_output))
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class BackendUtilsTest(test.TestCase):
 
   def test_backend(self):
@@ -130,8 +134,9 @@ class BackendUtilsTest(test.TestCase):
       keras.backend.set_learning_phase(0)
       x = keras.Input((3,))
       y = keras.layers.BatchNormalization()(x)
-      sess.run(variables.global_variables_initializer())
-      sess.run(y, feed_dict={x: np.random.random((2, 3))})
+      if not context.executing_eagerly():
+        sess.run(variables.global_variables_initializer())
+        sess.run(y, feed_dict={x: np.random.random((2, 3))})
 
   def test_learning_phase_scope(self):
     with self.cached_session():
@@ -150,22 +155,29 @@ class BackendUtilsTest(test.TestCase):
       self.assertEqual(keras.backend.learning_phase(), initial_learning_phase)
 
   def test_int_shape(self):
-    x = keras.backend.placeholder(shape=(3, 4))
+    x = keras.backend.ones(shape=(3, 4))
     self.assertEqual(keras.backend.int_shape(x), (3, 4))
 
-    x = keras.backend.placeholder(shape=(None, 4))
-    self.assertEqual(keras.backend.int_shape(x), (None, 4))
+    if not context.executing_eagerly():
+      x = keras.backend.placeholder(shape=(None, 4))
+      self.assertEqual(keras.backend.int_shape(x), (None, 4))
 
   def test_in_train_phase(self):
     with self.cached_session():
       y1 = keras.backend.variable(1)
       y2 = keras.backend.variable(2)
-      y = keras.backend.in_train_phase(y1, y2)
-      f = keras.backend.function([keras.backend.learning_phase()], [y])
-      y_val = f([0])[0]
-      self.assertAllClose(y_val, 2)
-      y_val = f([1])[0]
-      self.assertAllClose(y_val, 1)
+      if context.executing_eagerly():
+        with keras.backend.learning_phase_scope(0):
+          y_val_test = keras.backend.in_train_phase(y1, y2).numpy()
+        with keras.backend.learning_phase_scope(1):
+          y_val_train = keras.backend.in_train_phase(y1, y2).numpy()
+      else:
+        y = keras.backend.in_train_phase(y1, y2)
+        f = keras.backend.function([keras.backend.learning_phase()], [y])
+        y_val_test = f([0])[0]
+        y_val_train = f([1])[0]
+      self.assertAllClose(y_val_test, 2)
+      self.assertAllClose(y_val_train, 1)
 
   def test_is_keras_tensor(self):
     x = keras.backend.variable(1)
@@ -175,164 +187,20 @@ class BackendUtilsTest(test.TestCase):
     with self.assertRaises(ValueError):
       keras.backend.is_keras_tensor(0)
 
-  def test_is_placeholder(self):
-    x = keras.backend.placeholder(shape=(1,))
-    self.assertEqual(keras.backend.is_placeholder(x), True)
-    # Test with TF placeholder
-    x = keras.backend.array_ops.placeholder(dtype='float32', shape=(1,))
-    self.assertEqual(keras.backend.is_placeholder(x), True)
-    x = keras.backend.variable(1)
-    self.assertEqual(keras.backend.is_placeholder(x), False)
-
   def test_stop_gradient(self):
     x = keras.backend.variable(1)
     y = keras.backend.stop_gradient(x)
-    self.assertEqual(y.op.name[:12], 'StopGradient')
+    if not context.executing_eagerly():
+      self.assertEqual(y.op.name[:12], 'StopGradient')
 
     xs = [keras.backend.variable(1) for _ in range(3)]
     ys = keras.backend.stop_gradient(xs)
-    for y in ys:
-      self.assertEqual(y.op.name[:12], 'StopGradient')
-
-  def test_function_tf_feed_symbols(self):
-    with self.cached_session():
-      # Test feeding a resource variable to `function`.
-      x1 = keras.backend.placeholder(shape=())
-      x2 = keras.backend.placeholder(shape=())
-      lr = keras.backend.learning_phase()  # Include a placeholder_with_default.
-
-      y1 = keras.backend.variable(10.)
-      y2 = 3
-
-      f = keras.backend.function(
-          inputs=[x1, x2, lr],
-          outputs=[x1 + 1,
-                   keras.backend.in_train_phase(x2 + 2, x2 - 1)])
-      outs = f([y1, y2, None])  # Use default learning_phase value.
-      self.assertEqual(outs, [11., 2.])
-      outs = f([y1, y2, 1])  # Set learning phase value.
-      self.assertEqual(outs, [11., 5.])
-
-      # Test triggering a callable refresh by changing the input.
-      y3 = keras.backend.constant(20.)  # Test with tensor
-      outs = f([y3, y2, None])
-      self.assertEqual(outs, [21., 2.])
-
-      y4 = 4  # Test with non-symbol
-      outs = f([y4, y2, None])
-      self.assertEqual(outs, [5., 2.])
-
-      # Test with a different dtype
-      y5 = keras.backend.constant(10., dtype='float64')
-      outs = f([y5, y2, None])
-      self.assertEqual(outs, [11., 2.])
-
-  def test_function_tf_fetches(self):
-    # Additional operations can be passed to tf.Session().run() via its
-    # `fetches` arguments. In contrast to `updates` argument of
-    # keras.backend.function() these do not have control dependency on `outputs`
-    # so they can run in parallel. Also they should not contribute to output of
-    # keras.backend.function().
-    with self.cached_session():
-      x = keras.backend.variable(0.)
-      y = keras.backend.variable(0.)
-      x_placeholder = keras.backend.placeholder(shape=())
-      y_placeholder = keras.backend.placeholder(shape=())
-
-      f = keras.backend.function(inputs=[x_placeholder, y_placeholder],
-                                 outputs=[x_placeholder + y_placeholder],
-                                 updates=[(x, x_placeholder + 1.)],
-                                 fetches=[keras.backend.update(y, 5.)])
-      output = f([10., 20.])
-      self.assertEqual(output, [30.])
-      self.assertEqual(
-          keras.backend.get_session().run(fetches=[x, y]), [11., 5.])
-
-  def test_function_tf_feed_dict(self):
-    # Additional substitutions can be passed to `tf.Session().run()` via its
-    # `feed_dict` arguments. Note that the feed_dict is passed once in the
-    # constructor but we can modify the values in the dictionary. Through
-    # this feed_dict we can provide additional substitutions besides Keras
-    # inputs.
-    with self.cached_session():
-      x = keras.backend.variable(0.)
-      y = keras.backend.variable(0.)
-      x_placeholder = keras.backend.placeholder(shape=())
-      y_placeholder = keras.backend.placeholder(shape=())
-
-      feed_dict = {y_placeholder: 3.}
-      fetches = [keras.backend.update(y, y_placeholder * 10.)]
-      f = keras.backend.function(inputs=[x_placeholder],
-                                 outputs=[x_placeholder + 1.],
-                                 updates=[(x, x_placeholder + 10.)],
-                                 feed_dict=feed_dict,
-                                 fetches=fetches)
-      output = f([10.])
-      self.assertEqual(output, [11.])
-      self.assertEqual(
-          keras.backend.get_session().run(fetches=[x, y]), [20., 30.])
-
-      # updated value in feed_dict will be modified within the K.function()
-      feed_dict[y_placeholder] = 4.
-      output = f([20.])
-      self.assertEqual(output, [21.])
-      self.assertEqual(
-          keras.backend.get_session().run(fetches=[x, y]), [30., 40.])
-
-  def test_function_tf_run_options_with_run_metadata(self):
-    with self.cached_session():
-      x_placeholder = keras.backend.placeholder(shape=())
-      y_placeholder = keras.backend.placeholder(shape=())
-
-      run_options = config_pb2.RunOptions(output_partition_graphs=True)
-      run_metadata = config_pb2.RunMetadata()
-      # enable run_options.
-      f = keras.backend.function(inputs=[x_placeholder, y_placeholder],
-                                 outputs=[x_placeholder + y_placeholder],
-                                 options=run_options,
-                                 run_metadata=run_metadata)
-      output = f([10., 20.])
-      self.assertEqual(output, [30.])
-      self.assertGreater(len(run_metadata.partition_graphs), 0)
-      # disable run_options.
-      f1 = keras.backend.function(inputs=[x_placeholder, y_placeholder],
-                                  outputs=[x_placeholder + y_placeholder],
-                                  run_metadata=run_metadata)
-      output1 = f1([10., 20.])
-      self.assertEqual(output1, [30.])
-      self.assertEqual(len(run_metadata.partition_graphs), 0)
-
-  def test_function_fetch_callbacks(self):
-
-    class CallbackStub(object):
-
-      def __init__(self):
-        self.times_called = 0
-        self.callback_result = 0
-
-      def _fetch_callback(self, result):
-        self.times_called += 1
-        self.callback_result = result
-
-    with self.cached_session():
-      callback = CallbackStub()
-      x_placeholder = keras.backend.placeholder(shape=())
-      y_placeholder = keras.backend.placeholder(shape=())
-
-      callback_op = x_placeholder * y_placeholder
-
-      f = keras.backend.function(
-          inputs=[x_placeholder, y_placeholder],
-          outputs=[x_placeholder + y_placeholder])
-      f.fetches.append(callback_op)
-      f.fetch_callbacks[callback_op] = callback._fetch_callback
-
-      _ = f([10., 20.])
-
-      self.assertEqual(callback.times_called, 1)
-      self.assertEqual(callback.callback_result, 200)
+    if not context.executing_eagerly():
+      for y in ys:
+        self.assertEqual(y.op.name[:12], 'StopGradient')
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class BackendVariableTest(test.TestCase):
 
   def test_zeros(self):
@@ -405,23 +273,18 @@ class BackendVariableTest(test.TestCase):
       y = keras.backend.to_dense(x)
       self.assertFalse(keras.backend.is_sparse(y))
 
-  def test_placeholder(self):
-    x = keras.backend.placeholder(shape=(3, 4))
-    self.assertEqual(x.get_shape().as_list(), [3, 4])
-    x = keras.backend.placeholder(shape=(3, 4), sparse=True)
-    self.assertEqual(x.get_shape().as_list(), [3, 4])
-
 
+@test_util.run_all_in_graph_and_eager_modes
 class BackendLinearAlgebraTest(test.TestCase):
 
   def test_dot(self):
-    x = keras.backend.placeholder(shape=(2, 3))
-    y = keras.backend.placeholder(shape=(3, 4))
+    x = keras.backend.ones(shape=(2, 3))
+    y = keras.backend.ones(shape=(3, 4))
     xy = keras.backend.dot(x, y)
     self.assertEqual(xy.get_shape().as_list(), [2, 4])
 
-    x = keras.backend.placeholder(shape=(32, 28, 3))
-    y = keras.backend.placeholder(shape=(3, 4))
+    x = keras.backend.ones(shape=(32, 28, 3))
+    y = keras.backend.ones(shape=(3, 4))
     xy = keras.backend.dot(x, y)
     self.assertEqual(xy.get_shape().as_list(), [32, 28, 4])
 
@@ -525,7 +388,8 @@ class BackendLinearAlgebraTest(test.TestCase):
 
       # alpha (leaky relu used)
       relu_op = keras.backend.relu(x, alpha=0.5)
-      self.assertTrue('LeakyRelu' in relu_op.name)
+      if not context.executing_eagerly():
+        self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
@@ -534,7 +398,8 @@ class BackendLinearAlgebraTest(test.TestCase):
 
       # nn.relu6 used
       relu_op = keras.backend.relu(x, max_value=6)
-      self.assertTrue('Relu6' in relu_op.name)  # uses tf.nn.relu6
+      if not context.executing_eagerly():
+        self.assertTrue('Relu6' in relu_op.name)  # uses tf.nn.relu6
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 6]])
 
       # max value > 6
@@ -578,6 +443,7 @@ class BackendLinearAlgebraTest(test.TestCase):
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, -1], [-0.5, 5]])
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class BackendShapeOpsTest(test.TestCase):
 
   def test_reshape(self):
@@ -663,9 +529,10 @@ class BackendShapeOpsTest(test.TestCase):
     self.assertEqual(y.get_shape().as_list(), [1, 9, 2])
 
     # Use with a dynamic axis:
-    x = keras.backend.placeholder(shape=(2, None, 2))
-    y = keras.backend.repeat_elements(x, 3, axis=1)
-    self.assertEqual(y.get_shape().as_list(), [2, None, 2])
+    if not context.executing_eagerly():
+      x = keras.backend.placeholder(shape=(2, None, 2))
+      y = keras.backend.repeat_elements(x, 3, axis=1)
+      self.assertEqual(y.get_shape().as_list(), [2, None, 2])
 
   def test_repeat(self):
     x = keras.backend.variable(np.ones((1, 3)))
@@ -780,6 +647,7 @@ class BackendShapeOpsTest(test.TestCase):
           np_kwargs={'data_format': 'channels_first'})
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
 
   def test_bias_add(self):
@@ -799,7 +667,7 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
                                      input_shape_a=(4, 3, 5, 2, 7),
                                      input_shape_b=(7,))
 
-      with self.assertRaises(ValueError):
+      with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
         x = keras.backend.variable((3, 4))
         b = keras.backend.variable((3, 4))
         keras.backend.bias_add(x, b)
@@ -1278,8 +1146,11 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
 
     rnn_fn = rnn_step_fn()
     inputs = keras.backend.variable(input_val)
-    initial_states = [keras.backend.variable(init_state_val),
-                      np.concatenate([init_state_val, init_state_val], axis=-1)]
+    initial_states = [
+        keras.backend.variable(init_state_val),
+        ops.convert_to_tensor(
+            np.concatenate([init_state_val, init_state_val], axis=-1))
+    ]
     mask = keras.backend.variable(np_mask)
 
     kwargs_list = [
@@ -1382,37 +1253,8 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(mean.get_shape().as_list(), [3,])
     self.assertEqual(var.get_shape().as_list(), [3,])
 
-  def test_batch_normalization(self):
-    g_val = np.random.random((3,))
-    b_val = np.random.random((3,))
-    gamma = keras.backend.variable(g_val)
-    beta = keras.backend.variable(b_val)
-
-    # 3D NHC case
-    val = np.random.random((10, 5, 3))
-    x = keras.backend.variable(val)
-    mean, var = nn.moments(x, (0, 1), None, None, False)
-    normed = keras.backend.batch_normalization(
-        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 5, 3])
-
-    # 4D NHWC case
-    val = np.random.random((10, 5, 5, 3))
-    x = keras.backend.variable(val)
-    mean, var = nn.moments(x, (0, 1, 2), None, None, False)
-    normed = keras.backend.batch_normalization(
-        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3])
-
-    # 4D NCHW case
-    val = np.random.random((10, 3, 5, 5))
-    x = keras.backend.variable(val)
-    mean, var = nn.moments(x, (0, 2, 3), None, None, False)
-    normed = keras.backend.batch_normalization(
-        x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
-
 
+@test_util.run_all_in_graph_and_eager_modes
 class TestCTC(test.TestCase):
 
   def test_ctc_decode(self):
@@ -1518,6 +1360,7 @@ class TestCTC(test.TestCase):
       self.assertAllClose(res[:, 0], ref, atol=1e-05)
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class TestRandomOps(test.TestCase):
 
   def test_random_binomial(self):
@@ -1545,5 +1388,196 @@ class TestRandomOps(test.TestCase):
       preds = seq.predict([['tensorflow eager']])
       self.assertEqual(preds.shape, (1,))
 
+
+class BackendGraphTests(test.TestCase):
+
+  def test_is_placeholder(self):
+    x = keras.backend.placeholder(shape=(1,))
+    self.assertEqual(keras.backend.is_placeholder(x), True)
+    # Test with TF placeholder
+    x = keras.backend.array_ops.placeholder(dtype='float32', shape=(1,))
+    self.assertEqual(keras.backend.is_placeholder(x), True)
+    x = keras.backend.variable(1)
+    self.assertEqual(keras.backend.is_placeholder(x), False)
+
+  def test_function_tf_feed_symbols(self):
+    with self.cached_session():
+      # Test feeding a resource variable to `function`.
+      x1 = keras.backend.placeholder(shape=())
+      x2 = keras.backend.placeholder(shape=())
+      lr = keras.backend.learning_phase()  # Include a placeholder_with_default.
+
+      y1 = keras.backend.variable(10.)
+      y2 = 3
+
+      f = keras.backend.function(
+          inputs=[x1, x2, lr],
+          outputs=[x1 + 1, keras.backend.in_train_phase(x2 + 2, x2 - 1)])
+      outs = f([y1, y2, None])  # Use default learning_phase value.
+      self.assertEqual(outs, [11., 2.])
+      outs = f([y1, y2, 1])  # Set learning phase value.
+      self.assertEqual(outs, [11., 5.])
+
+      # Test triggering a callable refresh by changing the input.
+      y3 = keras.backend.constant(20.)  # Test with tensor
+      outs = f([y3, y2, None])
+      self.assertEqual(outs, [21., 2.])
+
+      y4 = 4  # Test with non-symbol
+      outs = f([y4, y2, None])
+      self.assertEqual(outs, [5., 2.])
+
+      # Test with a different dtype
+      y5 = keras.backend.constant(10., dtype='float64')
+      outs = f([y5, y2, None])
+      self.assertEqual(outs, [11., 2.])
+
+  def test_function_tf_fetches(self):
+    # Additional operations can be passed to tf.Session().run() via its
+    # `fetches` arguments. In contrast to `updates` argument of
+    # keras.backend.function() these do not have control dependency on `outputs`
+    # so they can run in parallel. Also they should not contribute to output of
+    # keras.backend.function().
+    with self.cached_session():
+      x = keras.backend.variable(0.)
+      y = keras.backend.variable(0.)
+      x_placeholder = keras.backend.placeholder(shape=())
+      y_placeholder = keras.backend.placeholder(shape=())
+
+      f = keras.backend.function(
+          inputs=[x_placeholder, y_placeholder],
+          outputs=[x_placeholder + y_placeholder],
+          updates=[(x, x_placeholder + 1.)],
+          fetches=[keras.backend.update(y, 5.)])
+      output = f([10., 20.])
+      self.assertEqual(output, [30.])
+      self.assertEqual(keras.backend.get_session().run(fetches=[x, y]),
+                       [11., 5.])
+
+  def test_function_tf_feed_dict(self):
+    # Additional substitutions can be passed to `tf.Session().run()` via its
+    # `feed_dict` arguments. Note that the feed_dict is passed once in the
+    # constructor but we can modify the values in the dictionary. Through
+    # this feed_dict we can provide additional substitutions besides Keras
+    # inputs.
+    with self.cached_session():
+      x = keras.backend.variable(0.)
+      y = keras.backend.variable(0.)
+      x_placeholder = keras.backend.placeholder(shape=())
+      y_placeholder = keras.backend.placeholder(shape=())
+
+      feed_dict = {y_placeholder: 3.}
+      fetches = [keras.backend.update(y, y_placeholder * 10.)]
+      f = keras.backend.function(
+          inputs=[x_placeholder],
+          outputs=[x_placeholder + 1.],
+          updates=[(x, x_placeholder + 10.)],
+          feed_dict=feed_dict,
+          fetches=fetches)
+      output = f([10.])
+      self.assertEqual(output, [11.])
+      self.assertEqual(keras.backend.get_session().run(fetches=[x, y]),
+                       [20., 30.])
+
+      # updated value in feed_dict will be modified within the K.function()
+      feed_dict[y_placeholder] = 4.
+      output = f([20.])
+      self.assertEqual(output, [21.])
+      self.assertEqual(keras.backend.get_session().run(fetches=[x, y]),
+                       [30., 40.])
+
+  def test_function_tf_run_options_with_run_metadata(self):
+    with self.cached_session():
+      x_placeholder = keras.backend.placeholder(shape=())
+      y_placeholder = keras.backend.placeholder(shape=())
+
+      run_options = config_pb2.RunOptions(output_partition_graphs=True)
+      run_metadata = config_pb2.RunMetadata()
+      # enable run_options.
+      f = keras.backend.function(
+          inputs=[x_placeholder, y_placeholder],
+          outputs=[x_placeholder + y_placeholder],
+          options=run_options,
+          run_metadata=run_metadata)
+      output = f([10., 20.])
+      self.assertEqual(output, [30.])
+      self.assertGreater(len(run_metadata.partition_graphs), 0)
+      # disable run_options.
+      f1 = keras.backend.function(
+          inputs=[x_placeholder, y_placeholder],
+          outputs=[x_placeholder + y_placeholder],
+          run_metadata=run_metadata)
+      output1 = f1([10., 20.])
+      self.assertEqual(output1, [30.])
+      self.assertEqual(len(run_metadata.partition_graphs), 0)
+
+  def test_function_fetch_callbacks(self):
+
+    class CallbackStub(object):
+
+      def __init__(self):
+        self.times_called = 0
+        self.callback_result = 0
+
+      def _fetch_callback(self, result):
+        self.times_called += 1
+        self.callback_result = result
+
+    with self.cached_session():
+      callback = CallbackStub()
+      x_placeholder = keras.backend.placeholder(shape=())
+      y_placeholder = keras.backend.placeholder(shape=())
+
+      callback_op = x_placeholder * y_placeholder
+
+      f = keras.backend.function(
+          inputs=[x_placeholder, y_placeholder],
+          outputs=[x_placeholder + y_placeholder])
+      f.fetches.append(callback_op)
+      f.fetch_callbacks[callback_op] = callback._fetch_callback
+
+      _ = f([10., 20.])
+
+      self.assertEqual(callback.times_called, 1)
+      self.assertEqual(callback.callback_result, 200)
+
+  def test_placeholder(self):
+    x = keras.backend.placeholder(shape=(3, 4))
+    self.assertEqual(x.get_shape().as_list(), [3, 4])
+    x = keras.backend.placeholder(shape=(3, 4), sparse=True)
+    self.assertEqual(x.get_shape().as_list(), [3, 4])
+
+  def test_batch_normalization(self):
+    # No eager CPU kernel.
+    g_val = np.random.random((3,))
+    b_val = np.random.random((3,))
+    gamma = keras.backend.variable(g_val)
+    beta = keras.backend.variable(b_val)
+
+    # 3D NHC case
+    val = np.random.random((10, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 3])
+
+    # 4D NHWC case
+    val = np.random.random((10, 5, 5, 3))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 1, 2), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3])
+
+    # 4D NCHW case
+    val = np.random.random((10, 3, 5, 5))
+    x = keras.backend.variable(val)
+    mean, var = nn.moments(x, (0, 2, 3), None, None, False)
+    normed = keras.backend.batch_normalization(
+        x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
+    self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 3535836d3a58185335387bb0913da074456919a9 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 16 Oct 2018 10:54:31 -0700
Subject: [PATCH 1033/1085] To minimize the size of the windows GPU package,
 set no_tensorflow_py_deps.

PiperOrigin-RevId: 217346661
---
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 34847e637a..6178d7794d 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -103,7 +103,8 @@ fi
 
 run_configure_for_gpu_build
 
-bazel build --announce_rc --config=opt tensorflow/tools/pip_package:build_pip_package || exit $?
+bazel build --announce_rc --config=opt --define=no_tensorflow_py_deps=true \
+  tensorflow/tools/pip_package:build_pip_package || exit $?
 
 if [[ "$SKIP_TEST" == 1 ]]; then
   exit 0
-- 
GitLab


From ef83241220f5e7bf8b8408936f3b38b461ba653b Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 16 Oct 2018 11:07:37 -0700
Subject: [PATCH 1034/1085] Make pruning work when calling cond_v2 in a v1
 Graph.

PiperOrigin-RevId: 217349450
---
 .../python/kernel_tests/cond_v2_test.py       |  6 +--
 .../kernel_tests/control_flow_ops_py_test.py  | 49 +++++++++++++++++++
 tensorflow/python/ops/cond_v2.py              | 20 +++++++-
 3 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 833a0d152c..85a5986041 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -138,19 +138,19 @@ class CondV2Test(test.TestCase):
   def testDefaultName(self):
     with ops.Graph().as_default():
       cond = self._createCond(None)
-      self.assertEqual(cond.name, "cond")
+      self.assertStartsWith(cond.name, "cond")
       self.assertIn("cond_true", ops.get_default_graph()._functions)
       self.assertIn("cond_false", ops.get_default_graph()._functions)
 
     with ops.Graph().as_default():
       with ops.name_scope("foo"):
         cond = self._createCond("")
-        self.assertEqual(cond.name, "foo/cond")
+        self.assertStartsWith(cond.name, "foo/cond")
         self.assertIn("foo_cond_true", ops.get_default_graph()._functions)
         self.assertIn("foo_cond_false", ops.get_default_graph()._functions)
 
         cond2 = self._createCond(None)
-        self.assertEqual(cond2.name, "foo/cond_1")
+        self.assertStartsWith(cond2.name, "foo/cond_1")
         self.assertIn("foo_cond_1_true", ops.get_default_graph()._functions)
         self.assertIn("foo_cond_1_false", ops.get_default_graph()._functions)
 
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index c74fca49f8..3c7e6e6dce 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 
 import collections
 import math
+import sys
 import time
 
 import numpy as np
@@ -740,6 +741,54 @@ class ControlFlowTest(test.TestCase):
       ]
       self.assertAllEqual(dense_gv, [0.0, 2.0])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testCondAutoControlDeps(self):
+
+    def branch_fn():
+      logging_ops.print_v2("A")
+      logging_ops.print_v2("B")
+      with ops.control_dependencies([logging_ops.print_v2("C")]):
+        return constant_op.constant(10)
+
+    def build_cond():
+      return control_flow_ops.cond(
+          constant_op.constant(True), branch_fn, lambda: 0)
+
+    def build_nested_cond():
+      return control_flow_ops.cond(
+          constant_op.constant(True), build_cond, lambda: 0)
+
+    # In v1 graph mode, pruning should make only "C" print.
+    if not context.executing_eagerly():
+      with self.cached_session():
+        with self.captureWritesToStream(sys.stderr) as printed:
+          self.assertEqual(build_cond().eval(), 10)
+        self.assertEqual(printed.contents(), "C\n")
+
+        with self.captureWritesToStream(sys.stderr) as printed:
+          self.assertEqual(build_nested_cond().eval(), 10)
+        self.assertEqual(printed.contents(), "C\n")
+
+    # In defuns, all prints should execute in program order.
+    # This doesn't work with legacy control flow.
+    if control_flow_ops.ENABLE_COND_V2:
+
+      @eager_function.defun
+      def cond():
+        return build_cond()
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        self.assertEqual(self.evaluate(cond()), 10)
+      self.assertEqual(printed.contents(), "A\nB\nC\n")
+
+      @eager_function.defun
+      def nested_cond():
+        return build_nested_cond()
+
+      with self.captureWritesToStream(sys.stderr) as printed:
+        self.assertEqual(self.evaluate(nested_cond()), 10)
+      self.assertEqual(printed.contents(), "A\nB\nC\n")
+
   # Microbenchmark: 256,000 iterations/s.
   @test_util.disable_control_flow_v2("b/116630618 (Times out)")
   def testWhile_1(self):
diff --git a/tensorflow/python/ops/cond_v2.py b/tensorflow/python/ops/cond_v2.py
index b3ae378316..bb9286210c 100644
--- a/tensorflow/python/ops/cond_v2.py
+++ b/tensorflow/python/ops/cond_v2.py
@@ -62,12 +62,18 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
       true_name = graph.unique_name(("%strue" % scope).replace("/", "_"))
       false_name = graph.unique_name(("%sfalse" % scope).replace("/", "_"))
 
+    # Automatic control dependencies are added in defuns, but not in v1
+    # graphs. Propagate that behavior here.
+    add_control_dependencies = util.in_defun()
+
     true_graph = function.func_graph_from_py_func(
         true_name, true_fn, [], {},
-        func_graph=util.CondBranchFuncGraph(true_name))
+        func_graph=util.CondBranchFuncGraph(true_name),
+        add_control_dependencies=add_control_dependencies)
     false_graph = function.func_graph_from_py_func(
         false_name, false_fn, [], {},
-        func_graph=util.CondBranchFuncGraph(false_name))
+        func_graph=util.CondBranchFuncGraph(false_name),
+        add_control_dependencies=add_control_dependencies)
     _check_same_outputs(true_graph, false_graph)
 
     # Add inputs to true_graph and false_graph to make them match. Note that
@@ -120,6 +126,16 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
                       attr_value_pb2.AttrValue(b=True))
       # pylint: enable=protected-access
 
+    # Return identities for each output of the If op, rather than the output of
+    # the If op directly. This makes pruning work if the output of cond() is
+    # fetched: the lowering pass converts the If outputs into IdentityN outputs,
+    # which if fetched will cause all ops in the taken branch to be run (since
+    # it takes all merge ops as input). After lowering, each output identity op
+    # will end up with only the appropriate merge op as input.
+    # TODO(b/79984175): this doesn't have to be a tuple once we covert to the
+    # correct output structure
+    tensors = tuple(array_ops.identity(t) for t in tensors)
+
     result = tuple(tensors[:num_cond_outputs])
     if len(result) == 1:
       return result[0]
-- 
GitLab


From 2acbbe480b1de8f1bc4936d272333fd812382198 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 16 Oct 2018 11:09:30 -0700
Subject: [PATCH 1035/1085] Adding GetNextAsOptional support for
 MultiDeviceIterator

PiperOrigin-RevId: 217349782
---
 .../multi_device_iterator_test.py             | 69 +++++++++++++++++++
 tensorflow/python/data/ops/BUILD              |  1 +
 .../data/ops/multi_device_iterator_ops.py     | 11 +++
 3 files changed, 81 insertions(+)

diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 1cf6dd1bea..758d75650c 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -113,6 +113,39 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         sess.run(elem_on_1)
         sess.run(elem_on_2)
 
+  def testGetNextAsOptional(self):
+    dataset = dataset_ops.Dataset.range(9)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/cpu:2"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next_as_optional()
+    elem_on_1_has_value_t = elem_on_1.has_value()
+    elem_on_1_t = elem_on_1.get_value()
+    elem_on_2_has_value_t = elem_on_2.has_value()
+    elem_on_2_t = elem_on_2.get_value()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 3})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 8, 2):
+        elem_on_1_has_value, elem_on_1_value = sess.run(
+            [elem_on_1_has_value_t, elem_on_1_t])
+        self.assertTrue(elem_on_1_has_value)
+        self.assertEqual(i, elem_on_1_value)
+        elem_on_2_has_value, elem_on_2_value = sess.run(
+            [elem_on_2_has_value_t, elem_on_2_t])
+        self.assertTrue(elem_on_2_has_value)
+        self.assertEqual(i + 1, elem_on_2_value)
+      elem_on_1_has_value, elem_on_1_value = sess.run(
+          [elem_on_1_has_value_t, elem_on_1_t])
+      self.assertTrue(elem_on_1_has_value)
+      self.assertEqual(8, elem_on_1_value)
+      self.assertFalse(sess.run(elem_on_1_has_value_t))
+      self.assertFalse(sess.run(elem_on_2_has_value_t))
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(elem_on_1_t)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(elem_on_2_t)
+
   def testUneven(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -186,6 +219,42 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         sess.run(elem_on_1)
         sess.run(elem_on_2)
 
+  def testGetNextAsOptionalGpu(self):
+    if not test_util.is_gpu_available():
+      self.skipTest("No GPU available")
+
+    dataset = dataset_ops.Dataset.range(9)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, ["/cpu:1", "/gpu:0"])
+    elem_on_1, elem_on_2 = multi_device_iterator.get_next_as_optional()
+    elem_on_1_has_value_t = elem_on_1.has_value()
+    elem_on_1_t = elem_on_1.get_value()
+    elem_on_2_has_value_t = elem_on_2.has_value()
+    elem_on_2_t = elem_on_2.get_value()
+
+    config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
+    with self.test_session(config=config) as sess:
+      sess.run(multi_device_iterator.initializer)
+      for i in range(0, 8, 2):
+        elem_on_1_has_value, elem_on_1_value = sess.run(
+            [elem_on_1_has_value_t, elem_on_1_t])
+        self.assertTrue(elem_on_1_has_value)
+        self.assertEqual(i, elem_on_1_value)
+        elem_on_2_has_value, elem_on_2_value = sess.run(
+            [elem_on_2_has_value_t, elem_on_2_t])
+        self.assertTrue(elem_on_2_has_value)
+        self.assertEqual(i + 1, elem_on_2_value)
+      elem_on_1_has_value, elem_on_1_value = sess.run(
+          [elem_on_1_has_value_t, elem_on_1_t])
+      self.assertTrue(elem_on_1_has_value)
+      self.assertEqual(8, elem_on_1_value)
+      self.assertFalse(sess.run(elem_on_1_has_value_t))
+      self.assertFalse(sess.run(elem_on_2_has_value_t))
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(elem_on_1_t)
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(elem_on_2_t)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 76bf2470b1..84f6c30f5e 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -90,6 +90,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":dataset_ops",
+        ":iterator_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dataset_ops_gen",
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index b7033cc4ce..3bcd61a197 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import context
@@ -226,6 +227,16 @@ class MultiDeviceIterator(object):
       i += 1
     return result
 
+  def get_next_as_optional(self):
+    result = []
+    i = 0
+    for device in self._devices:
+      with ops.device(device):
+        result.append(iterator_ops.get_next_as_optional(
+            self._device_iterators[i]))
+      i += 1
+    return result
+
   @property
   def initializer(self):
     return self._initializer
-- 
GitLab


From 7adb49638c583d6f1020c0ba6320918f34ab548f Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 16 Oct 2018 11:13:50 -0700
Subject: [PATCH 1036/1085] Disable tests in OSS until we can figure out what's
 wrong with them.

PiperOrigin-RevId: 217350731
---
 tensorflow/examples/autograph/integration_tests/BUILD      | 5 ++++-
 tensorflow/python/autograph/pyct/common_transformers/BUILD | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/examples/autograph/integration_tests/BUILD b/tensorflow/examples/autograph/integration_tests/BUILD
index 3630b41fc8..1674d2fa88 100644
--- a/tensorflow/examples/autograph/integration_tests/BUILD
+++ b/tensorflow/examples/autograph/integration_tests/BUILD
@@ -22,7 +22,10 @@ py_test(
         "errors_test.py",
     ],
     srcs_version = "PY2AND3",
-    tags = ["no_windows"],
+    tags = [
+        "no_oss",
+        "no_windows",
+    ],
     deps = [
         "//tensorflow:tensorflow_py",
     ],
diff --git a/tensorflow/python/autograph/pyct/common_transformers/BUILD b/tensorflow/python/autograph/pyct/common_transformers/BUILD
index 5e2f8f3ac0..1106a19de1 100644
--- a/tensorflow/python/autograph/pyct/common_transformers/BUILD
+++ b/tensorflow/python/autograph/pyct/common_transformers/BUILD
@@ -34,6 +34,7 @@ py_test(
     name = "anf_test",
     srcs = ["anf_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_oss"],
     deps = [
         ":common_transformers",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From c18e8cf8825dbf9c129dcdedb66820e4933e7b83 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Tue, 16 Oct 2018 11:15:35 -0700
Subject: [PATCH 1037/1085] [Grappler] Remove unused outputs from specialized
 functions.

1. Remove function outputs that are not connected to other nodes and not in a fetch set.
2. Remap consumers to use new output positions.

PiperOrigin-RevId: 217351054
---
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../grappler/optimizers/function_optimizer.cc | 208 ++++++++++++++++--
 .../optimizers/function_optimizer_test.cc     | 117 +++++++++-
 .../optimizers/meta_optimizer_test.cc         | 121 +++++++++-
 tensorflow/core/grappler/utils/functions.cc   |  41 ++++
 tensorflow/core/grappler/utils/functions.h    |  18 +-
 6 files changed, 471 insertions(+), 35 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 43a7d6a70b..c732c690fc 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -145,6 +145,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/grappler:graph_view",
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:op_types",
         "//tensorflow/core/grappler:utils",
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc
index 56364f0095..7c35cc5f72 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/grappler/graph_view.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
@@ -39,6 +40,14 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
+// WARNING: Code in this file implicitly assumes that function input and output
+// arguments are plain tensors (tensor lists are not supported). Function inputs
+// and outputs are always expanded to a single placeholder or output tensor.
+// With this assumption, the calling node's input/output ports always match
+// function input/output arguments.
+//
+// This is guaranteed by the implementation of MakeGrapplerFunctionItem.
+
 // Mark functions that were created as a result of function specialization.
 constexpr char kGrapplerSpecializedFuncAttr[] = "_GrapplerSpecializedFunc";
 
@@ -80,13 +89,23 @@ string UniqueSpecializedFunctionName(const FunctionDef& func,
 // Specialized function instantiation type parameters, body parameters, and
 // const inputs.
 struct FunctionSpecializationSignature {
+  // Currently we do not support functions with tensor lists as inputs or
+  // outputs, so caller node input/output ports always match function
+  // input/output arguments.
+  using InputPort = int;
+  using OutputPort = int;
+
   string func_name;
+  bool is_in_fetch_set;
+  gtl::FlatSet<OutputPort> active_outputs;
   std::unordered_map<string, DataType> type_parameters;
   std::unordered_map<string, AttrValue> body_parameters;
-  std::unordered_map<int, string> const_inputs;
+  std::unordered_map<InputPort, string> const_inputs;
 
   bool operator==(const FunctionSpecializationSignature& other) const {
     bool equals = func_name == other.func_name &&
+                  is_in_fetch_set == other.is_in_fetch_set &&
+                  active_outputs == other.active_outputs &&
                   type_parameters == other.type_parameters &&
                   const_inputs == other.const_inputs;
 
@@ -104,11 +123,21 @@ struct FunctionSpecializationSignature {
     return true;
   }
 
+  // TODO(ezhulenev): Migrate to AbslHashValue.
+  // TODO(ezhulenev): Optimize performance by computing hashes of unordered
+  // values first, and then compute a hash of sorted hashes.
   struct Hash {
     uint64 operator()(FunctionSpecializationSignature const& s) const {
       uint64 h = Hash64(s.func_name);
+      h = Hash64Combine(std::hash<bool>()(s.is_in_fetch_set), h);
+
+      // Use std::set/std::map for deterministic iteration order.
 
-      // Use std::map for deterministic iteration order.
+      std::set<OutputPort> active_outputs(s.active_outputs.begin(),
+                                          s.active_outputs.end());
+      for (const auto& active_output : active_outputs) {
+        h = Hash64Combine(std::hash<int>()(active_output), h);
+      }
 
       std::map<string, DataType> types(s.type_parameters.begin(),
                                        s.type_parameters.end());
@@ -126,8 +155,8 @@ struct FunctionSpecializationSignature {
         h = Hash64Combine(FastAttrValueHash(pair.second), h);
       }
 
-      std::map<int, string> inputs(s.const_inputs.begin(),
-                                   s.const_inputs.end());
+      std::map<InputPort, string> inputs(s.const_inputs.begin(),
+                                         s.const_inputs.end());
       for (const auto& pair : inputs) {
         h = Hash64Combine(std::hash<int>()(pair.first), h);
         h = Hash64Combine(Hash64(pair.second), h);
@@ -140,8 +169,15 @@ struct FunctionSpecializationSignature {
 
 struct FunctionSpecialization {
   string specialized_func_name;
-  std::unordered_set<string> const_inputs;
-  std::unordered_set<string> control_deps;
+  // Names of the tensors that were pushed down into the function body.
+  gtl::FlatSet<string> const_inputs;
+  // Control dependencies of pushed down const inputs have to be attached to
+  // function caller node.
+  gtl::FlatSet<string> control_deps;
+  // Mapping from original function output port to the output port of
+  // specialized function. If function specialization changes the number of
+  // function outputs it's required to update all node consumers.
+  std::vector<std::pair<int, int>> output_mapping;
 };
 
 class FakeCPUDevice : public Device {
@@ -155,9 +191,12 @@ class FunctionOptimizerContext {
   explicit FunctionOptimizerContext(RewriterConfig::Toggle opt_level,
                                     const GrapplerItem& item)
       : graph_version_(item.graph.versions().producer()),
-        function_library_(OpRegistry::Global(), item.graph.library()) {
+        function_library_(OpRegistry::Global(), item.graph.library()),
+        // GraphView doesn't not modify the graph or the nodes.
+        graph_view_(const_cast<GraphDef*>(&item.graph)) {
     InitializeTrulyConstNodes(item);
     InitializeInlinedFunctions(opt_level, item);
+    InitializeFetchNodes(item);
   }
 
   const FunctionLibraryDefinition& function_library() const {
@@ -173,6 +212,19 @@ class FunctionOptimizerContext {
     return flr_;
   }
 
+  const gtl::FlatMap<string, std::vector<std::pair<int, int>>>&
+  output_mappings() const {
+    return output_mappings_;
+  }
+
+  const GraphView& graph_view() const { return graph_view_; }
+
+  const gtl::FlatSet<string>& fetch_tensors() const { return fetch_tensors_; }
+
+  bool IsFetchNode(const string& node_name) const {
+    return fetch_nodes_.find(node_name) != fetch_nodes_.end();
+  }
+
   bool IsInlinedFunction(const string& name) const {
     return inlined_functions_.count(name) > 0;
   }
@@ -200,9 +252,25 @@ class FunctionOptimizerContext {
     specialized_functions_.emplace(sig, specialized_func);
   }
 
+  void AddOutputMapping(const string& func_node,
+                        const FunctionSpecialization& specialized_func) {
+    output_mappings_.emplace(func_node, specialized_func.output_mapping);
+  }
+
+  // Return true if we had any specialized function that changed it's output
+  // mapping, and it's required to update output consumers to new ports ids.
+  bool RequiresOutputMapping() const {
+    for (const auto& m1 : output_mappings_) {
+      for (const std::pair<int, int>& m2 : m1.second) {
+        if (m2.first != m2.second) return true;
+      }
+    }
+    return false;
+  }
+
  private:
   void InitializeTrulyConstNodes(const GrapplerItem& item) {
-    std::unordered_set<string> feed_nodes;
+    gtl::FlatSet<string> feed_nodes;
     for (const auto& feed : item.feed) {
       feed_nodes.insert(NodeName(feed.first));
     }
@@ -234,6 +302,13 @@ class FunctionOptimizerContext {
     }
   }
 
+  void InitializeFetchNodes(const GrapplerItem& item) {
+    for (const string& fetch : item.fetch) {
+      fetch_tensors_.insert(fetch);
+      fetch_nodes_.insert(NodeName(fetch));
+    }
+  }
+
   void InitializeFunctionLibraryRuntime() {
     if (!flr_) {
       Env* env = Env::Default();
@@ -269,9 +344,42 @@ class FunctionOptimizerContext {
                      FunctionSpecializationSignature::Hash>
       specialized_functions_;
 
+  // GrapplerItem.fetch is a vector of tensors.
+  gtl::FlatSet<string> fetch_tensors_;  // format: node_name:port
+  gtl::FlatSet<string> fetch_nodes_;    // format: node_name
+
+  // Output mappings that have to be applied to the graph after all functions
+  // are specialized (node name -> output mappings).
+  gtl::FlatMap<string, std::vector<std::pair<int, int>>> output_mappings_;
+
+  // Use graph view to find active outputs of the function caller nodes.
+  GraphView graph_view_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(FunctionOptimizerContext);
 };
 
+gtl::FlatSet<int> GetActiveOutputs(const NodeDef& node,
+                                   const FunctionOptimizerContext& ctx,
+                                   int size_hint = 0) {
+  gtl::FlatSet<int> active_outputs;
+  active_outputs.reserve(static_cast<size_t>(size_hint));
+
+  // 1. Output can be consumed by the other graph node.
+  const auto node_fanout_edges =
+      ctx.graph_view().GetFanoutEdges(node, /*include_controlled_edges=*/false);
+  for (const GraphView::Edge& edge : node_fanout_edges) {
+    active_outputs.insert(edge.src.port_id);
+  }
+
+  // 2. Or it can be in a fetch set.
+  for (const string& fetch_tensor : ctx.fetch_tensors()) {
+    int port = NodePositionIfSameNode(fetch_tensor, node.name());
+    if (port >= 0) active_outputs.insert(port);
+  }
+
+  return active_outputs;
+}
+
 bool HasTrulyConstInputs(const NodeDef& node,
                          const FunctionOptimizerContext& ctx) {
   const auto is_truly_const = [&ctx](const string& input) {
@@ -280,12 +388,24 @@ bool HasTrulyConstInputs(const NodeDef& node,
   return std::any_of(node.input().begin(), node.input().end(), is_truly_const);
 }
 
+bool HasUnusedOutputs(const NodeDef& func_node, const FunctionDef& func,
+                      const FunctionOptimizerContext& ctx) {
+  // Functions with tensor list outputs are not supported right now, so the
+  // number of output args is the same as number of possible function caller
+  // node outputs.
+  int num_outputs = func.signature().output_arg_size();
+  const gtl::FlatSet<int> active_outputs =
+      GetActiveOutputs(func_node, ctx, /*size_hind*/ num_outputs);
+
+  return active_outputs.size() != num_outputs;
+}
+
 // Return trimmed FunctionDefLibrary with functions that are reachable from
 // the optimized graph.
 FunctionDefLibrary TrimFunctionLibrary(const FunctionLibraryDefinition& flib,
                                        const GraphDef& optimized_graph) {
   // Functions that are reachable from the optimized graph.
-  std::unordered_set<string> keep_funcs;
+  gtl::FlatSet<string> keep_funcs;
 
   std::vector<const FunctionDef*> func_queue;
   func_queue.reserve(flib.num_functions());
@@ -365,8 +485,8 @@ FunctionDefLibrary TrimFunctionLibrary(const FunctionLibraryDefinition& flib,
 Status PushDownConstInputs(const NodeDef& func_node,
                            const FunctionOptimizerContext& ctx,
                            GrapplerFunctionItem* item,
-                           std::unordered_set<string>* const_inputs,
-                           std::unordered_set<string>* control_deps) {
+                           gtl::FlatSet<string>* const_inputs,
+                           gtl::FlatSet<string>* control_deps) {
   // Record node control dependencies in the control_deps set.
   const auto record_control_deps = [&](const NodeDef* const_input) {
     for (int i = const_input->input_size() - 1; i >= 0; --i) {
@@ -397,8 +517,8 @@ Status PushDownConstInputs(const NodeDef& func_node,
 
 // Remove inputs that were pushed into the function body, and attach their
 // control dependencies to the function caller node.
-void RemovePushedDownConstInputs(const std::unordered_set<string>& const_inputs,
-                                 const std::unordered_set<string>& control_deps,
+void RemovePushedDownConstInputs(const gtl::FlatSet<string>& const_inputs,
+                                 const gtl::FlatSet<string>& control_deps,
                                  NodeDef* specialized_func_node) {
   // Nothing to do if it was no const inputs to the function node.
   if (const_inputs.empty()) return;
@@ -416,7 +536,7 @@ void RemovePushedDownConstInputs(const std::unordered_set<string>& const_inputs,
 
   // Attach control dependencies of pushed down const input to the caller node.
   if (!control_deps.empty()) {
-    std::unordered_set<string> existing_control_deps;
+    gtl::FlatSet<string> existing_control_deps;
 
     for (const string& input : keep_inputs) {
       existing_control_deps.insert(AsControlDependency(NodeName(input)));
@@ -435,7 +555,12 @@ Status InitializeFunctionSpecializationSignature(
     const NodeDef& func_node, const FunctionDef& func,
     const AttrValueMap& func_attr, const FunctionOptimizerContext& ctx,
     FunctionSpecializationSignature* sig) {
+  DCHECK(sig->const_inputs.empty());
+  DCHECK(sig->active_outputs.empty());
+
   sig->func_name = func.signature().name();
+  sig->is_in_fetch_set = ctx.IsFetchNode(func_node.name());
+  sig->active_outputs = GetActiveOutputs(func_node, ctx);
 
   TF_RETURN_IF_ERROR(
       InstantiationTypeParameters(func, func_attr, &sig->type_parameters));
@@ -484,6 +609,8 @@ Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func,
                                 already_specialized->control_deps,
                                 specialized_func_node);
 
+    ctx->AddOutputMapping(specialized_func_node->name(), *already_specialized);
+
     return Status::OK();
   }
 
@@ -498,11 +625,19 @@ Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func,
 
   // Push const inputs into the function body, and keep track of their control
   // dependencies.
-  std::unordered_set<string> const_inputs;
-  std::unordered_set<string> control_deps;
+  gtl::FlatSet<string> const_inputs;
+  gtl::FlatSet<string> control_deps;
   TF_RETURN_IF_ERROR(PushDownConstInputs(func_node, *ctx, &item, &const_inputs,
                                          &control_deps));
 
+  // Remove function outputs that do not have any consumers. We can't safely
+  // update outputs for the fetch nodes, so we just skip them.
+  std::vector<std::pair<int, int>> output_mapping;
+  if (!signature.is_in_fetch_set) {
+    TF_RETURN_IF_ERROR(
+        RemoveUnusedOutputs(signature.active_outputs, &item, &output_mapping));
+  }
+
   // TODO(ezhulenev): Push down known input shapes.
   FunctionDef specialized_func;
   TF_RETURN_IF_ERROR(MakeFunctionDef(item, flib, &specialized_func));
@@ -528,8 +663,10 @@ Status SpecializeFunction(const NodeDef& func_node, const FunctionDef& func,
   RemovePushedDownConstInputs(const_inputs, control_deps,
                               specialized_func_node);
 
-  ctx->AddSpecializedFunction(
-      signature, {specialized_func_name, const_inputs, control_deps});
+  FunctionSpecialization func_specialization = {
+      specialized_func_name, const_inputs, control_deps, output_mapping};
+  ctx->AddSpecializedFunction(signature, func_specialization);
+  ctx->AddOutputMapping(specialized_func_node->name(), func_specialization);
 
   return Status::OK();
 }
@@ -835,9 +972,12 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       // Do not specialize if function has custom gradient.
       const string grad_func = ctx.function_library().FindGradient(func_name);
 
-      // 2b. Specialize it to it's instantiation context if can't be inlined.
-      if (specialize_func && grad_func.empty() &&
-          (IsParametrized(*func) || HasTrulyConstInputs(node, ctx))) {
+      // 2b. Specialize it to it's instantiation context if can't be inlined,
+      // and it has something worth specializing.
+      bool specialization_worthy = IsParametrized(*func) ||
+                                   HasTrulyConstInputs(node, ctx) ||
+                                   HasUnusedOutputs(node, *func, ctx);
+      if (specialize_func && grad_func.empty() && specialization_worthy) {
         // TODO(ezhulenev): Specialize function call if input has a known shape.
         // Specialize function body for its instantiation attributes and inputs.
         TF_SKIP_ERROR_IF_GRAPH_UNMODIFIED(
@@ -854,6 +994,32 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 #undef TF_SKIP_ERROR_IF_GRAPH_UNMODIFIED
   }
 
+  // Function specialization might change the number of function outputs, so we
+  // have to process the final optimized graph and update all the node mapping.
+  if (ctx.RequiresOutputMapping()) {
+    GraphView optimized_graph_view(optimized_graph);
+    for (const auto& output_mapping : ctx.output_mappings()) {
+      const auto& node_name = output_mapping.first;
+      const auto& mappings = output_mapping.second;
+
+      for (const std::pair<int, int>& mapping : mappings) {
+        int from = mapping.first;
+        int to = mapping.second;
+
+        // Find the output port corresponding to the old output position.
+        GraphView::OutputPort from_port =
+            optimized_graph_view.GetOutputPort(node_name, from);
+
+        // Update all input ports that read from old output port.
+        for (GraphView::InputPort to_port :
+             optimized_graph_view.GetFanout(from_port)) {
+          *to_port.node->mutable_input(to_port.port_id) =
+              strings::StrCat(node_name, ":", to);
+        }
+      }
+    }
+  }
+
   *optimized_graph->mutable_versions() = item.graph.versions();
   *optimized_graph->mutable_library() =
       options_.enable_trim_function_library
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
index fab3f994c1..a22f97800f 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils/grappler_test.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -856,6 +857,10 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_OncePerUniqueContext) {
        NDef("mul_6", "MyMul", {"three", "xf"}, {{"T", DT_FLOAT}}, kDevice)},
       function_library);
 
+  // Specify fetch nodes before optimization to prevent pruning unused function
+  // outputs.
+  item.fetch = {"mul_1", "mul_2", "mul_3", "mul_4", "mul_5", "mul_6"};
+
   GraphDef output;
   TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
 
@@ -893,8 +898,9 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_OncePerUniqueContext) {
       EXPECT_EQ("MyMul_specialized_for_mul_4", node.op());
       ASSERT_EQ(3, node.input_size());
       EXPECT_EQ("yf", node.input(0));
-      EXPECT_EQ("^init", node.input(1));
-      EXPECT_EQ("^xf", node.input(2));
+      gtl::FlatSet<string> expected_ctrl = {"^init", "^xf"};
+      gtl::FlatSet<string> actual_ctrl = {node.input(1), node.input(2)};
+      EXPECT_EQ(expected_ctrl, actual_ctrl);
 
     } else if (node.name() == "mul_6" && count++) {
       EXPECT_EQ("MyMul_specialized_for_mul_6", node.op());
@@ -908,7 +914,6 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_OncePerUniqueContext) {
   // And that graph evaluation yields the same result.
   Tensor pi = test::AsScalar<float>(3.14f);
   Tensor four = test::AsScalar<int32>(4);
-  item.fetch = {"mul_1", "mul_2", "mul_3", "mul_4", "mul_5", "mul_6"};
   item.feed = {{"xf", pi}, {"yf", pi}, {"xi", four}, {"yi", four}};
 
   auto tensors_expected = EvaluateFetchNodes(item);
@@ -923,6 +928,112 @@ TEST_F(FunctionOptimizerTest, SpecializeFunction_OncePerUniqueContext) {
   test::ExpectTensorEqual<float>(tensors_expected[5], tensors[5]);
 }
 
+TEST_F(FunctionOptimizerTest, SpecializeFunctionForUsedOutputTensors) {
+  using test::function::NDef;
+
+  FunctionOptimizer optimizer(RewriterConfig::DEFAULT);
+
+  // MyFunc computes x*y three times and has three output values.
+  FunctionDef my_func = FunctionDefHelper::Create(
+      "MyFunc", {"x:T", "y:T"}, {"z1:T", "z2:T", "z3:T"}, {"T: {float, int32}"},
+      {{{"output1"}, "Mul", {"x", "y"}, {{"T", "$T"}}},
+       {{"output2"}, "Mul", {"x", "y"}, {{"T", "$T"}}},
+       {{"output3"}, "Mul", {"x", "y"}, {{"T", "$T"}}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z1", "output1:z:0"}, {"z2", "output2:z:0"}, {"z3", "output3:z:0"}});
+  (*my_func.mutable_attr())["_noinline"].set_b(true);
+  std::vector<FunctionDef> function_library = {my_func};
+
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("init", "NoOp", {}, {}, kDevice),
+
+       // Float placeholders.
+       NDef("xf", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("yf", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+
+       // Specialization #1: DT_FLOAT type parameter. All outputs used.
+       NDef("fn1", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn1_0", "Identity", {"fn1:0"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn1_1", "Identity", {"fn1:1"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn1_2", "Identity", {"fn1:2"}, {{"T", DT_FLOAT}}, kDevice),
+
+       // Specialization #2: DT_FLOAT type parameter. Only first output used.
+       NDef("fn2", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn2_0", "Identity", {"fn2:0"}, {{"T", DT_FLOAT}}, kDevice),
+
+       // Specialization #3: DT_FLOAT type parameter. Only second output used.
+       NDef("fn3", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn3_1", "Identity", {"fn3:1"}, {{"T", DT_FLOAT}}, kDevice),
+
+       // Specialization #4: DT_FLOAT type parameter. Only last output used.
+       NDef("fn4", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn4_2", "Identity", {"fn4:2"}, {{"T", DT_FLOAT}}, kDevice),
+
+       // Specialization #5: DT_FLOAT type parameter. First and last outputs.
+       NDef("fn5", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn5_0", "Identity", {"fn5:0"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("use_fn5_2", "Identity", {"fn5:2"}, {{"T", DT_FLOAT}}, kDevice),
+
+       // Specialization #6: DT_FLOAT type parameter. Outputs not used.
+       // Check that function optimizer do not fail. In practice it should be
+       // pruned from the graph before passing to function optimizer.
+       NDef("fn6", "MyFunc", {"xf", "yf"}, {{"T", DT_FLOAT}}, kDevice)},
+      function_library);
+
+  GraphDef output;
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  // Make sure that MyFunc was specialized once per unique context.
+  EXPECT_EQ(6, output.library().function_size());
+
+  // And graph nodes calling specialized functions.
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    // All function caller nodes must be specialized.
+    if (node.name() == "fn1" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn1", node.op());
+    } else if (node.name() == "fn2" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn2", node.op());
+    } else if (node.name() == "fn3" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn3", node.op());
+    } else if (node.name() == "fn4" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn4", node.op());
+    } else if (node.name() == "fn5" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn5", node.op());
+    } else if (node.name() == "fn6" && found++) {
+      EXPECT_EQ("MyFunc_specialized_for_fn6", node.op());
+    }
+    // And all consumers of specialized function nodes must be mapped to new
+    // output ports.
+    if (node.name() == "use_fn3_1" && found++) {
+      EXPECT_EQ("fn3:0", node.input(0));
+    } else if (node.name() == "use_fn4_2" && found++) {
+      EXPECT_EQ("fn4:0", node.input(0));
+    } else if (node.name() == "use_fn5_0" && found++) {
+      EXPECT_EQ("fn5:0", node.input(0));
+    } else if (node.name() == "use_fn5_2" && found++) {
+      EXPECT_EQ("fn5:1", node.input(0));
+    }
+  }
+  EXPECT_EQ(10, found);
+
+  // And that graph evaluation yields the same result.
+  Tensor pi = test::AsScalar<float>(3.14f);
+  item.fetch = {"use_fn1_0", "use_fn1_1", "use_fn1_2", "use_fn2_0",
+                "use_fn3_1", "use_fn4_2", "use_fn5_0", "use_fn5_2"};
+  item.feed = {{"xf", pi}, {"yf", pi}};
+
+  auto tensors_expected = EvaluateFetchNodes(item);
+  GrapplerItem optimized(item, std::move(output));
+  auto tensors = EvaluateFetchNodes(optimized);
+
+  ASSERT_EQ(tensors_expected.size(), tensors.size());
+  for (int i = 0; i < item.fetch.size(); ++i) {
+    test::ExpectTensorEqual<float>(tensors_expected[i], tensors[i]);
+  }
+}
+
 TEST_F(FunctionOptimizerTest, PruningUselessLibraryFunctions) {
   using test::function::NDef;
   FunctionOptimizer optimizer(RewriterConfig::DEFAULT);
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
index 79a0726597..e15b9e12f8 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc
@@ -285,28 +285,30 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) {
                                            output.library());
 
   // Specialized and optimized functions should be added to the graph.
-  EXPECT_EQ(5, optimized_flib.num_functions());
+  EXPECT_EQ(6, optimized_flib.num_functions());
 
   // MyQuadratic should be specialized once:
   //   0. 'quadratic' node in the main graph
   const string optimized_0 = "MyQuadratic_specialized_for_quadratic";
 
   // MySquare should be specialized and optimized for 3 instantiations:
-  //   1.  'square' node in the main graph
-  //   2.  'square' node in the MyQuadratic specialization
-  //   3*. 'quadratic' node in the MyQuadratic specialization
-  //        has identical instantiation context to #2
+  //   1. 'square' node in the main graph
+  //   2. 'square' node in the MyQuadratic specialization (not in a fetch set)
+  //   3. 'quadratic' node in the MyQuadratic specialization (is in a fetch set)
 
   const string optimized_1 = "MySquare_specialized_for_square";
   const string optimized_2 = "MySquare_specialized_for_square_1";
+  const string optimized_3 = "MySquare_specialized_for_quadratic";
 
   const FunctionDef* optimized_func_0 = optimized_flib.Find(optimized_0);
   const FunctionDef* optimized_func_1 = optimized_flib.Find(optimized_1);
   const FunctionDef* optimized_func_2 = optimized_flib.Find(optimized_2);
+  const FunctionDef* optimized_func_3 = optimized_flib.Find(optimized_3);
 
   ASSERT_NE(optimized_func_0, nullptr);
   ASSERT_NE(optimized_func_1, nullptr);
   ASSERT_NE(optimized_func_2, nullptr);
+  ASSERT_NE(optimized_func_3, nullptr);
 
   // Graph should call optimized function.
   int count = 0;
@@ -325,14 +327,13 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) {
     if (node.name() == "square" && count++) {
       EXPECT_EQ(optimized_2, node.op());
     } else if (node.name() == "quadratic" && count++) {
-      // Share specialized function with the 'square' node.
-      EXPECT_EQ(optimized_2, node.op());
+      EXPECT_EQ(optimized_3, node.op());
     }
   }
   EXPECT_EQ(2, count);
 
-  const std::vector<const FunctionDef*> optimized_funcs = {optimized_func_1,
-                                                           optimized_func_2};
+  const std::vector<const FunctionDef*> optimized_funcs = {
+      optimized_func_1, optimized_func_2, optimized_func_3};
 
   // MyMul should be inlined into all optimized versions of MySquare.
   for (const FunctionDef* optimized_func : optimized_funcs) {
@@ -378,6 +379,108 @@ TEST_F(MetaOptimizerTest, OptimizeFunctionLibrary) {
   test::ExpectTensorEqual<int>(tensors_expected[1], tensors[1]);
 }
 
+TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryPruneFunctionBody) {
+  using test::function::NDef;
+
+  // Enable function optimization and pruning.
+  RewriterConfig rewriter_config;
+  rewriter_config.set_meta_optimizer_iterations(RewriterConfig::TWO);
+  rewriter_config.set_function_optimization(RewriterConfig::ON);
+  rewriter_config.add_optimizers("function");
+  rewriter_config.add_optimizers("pruning");
+  rewriter_config.set_min_graph_nodes(-1);
+
+  MetaOptimizer optimizer(nullptr, rewriter_config);
+
+  // MyFunc defines two Mul nodes inside function body and two corresponding
+  // function outputs.
+  FunctionDef my_func = FunctionDefHelper::Create(
+      "MyFunc", {"x:T", "y:T"}, {"z1:T", "z2:T"}, {"T: {float, double}"},
+      {{{"mul1"}, "Mul", {"x", "y"}, {{"T", "$T"}}},
+       {{"mul2"}, "Mul", {"x", "y"}, {{"T", "$T"}}}},
+      /* Mapping between function returns and function node outputs. */
+      {{"z1", "mul1:z:0"}, {"z2", "mul2:z:0"}});
+  (*my_func.mutable_attr())["_noinline"].set_b(true);
+
+  // Tensorflow graph:
+  //
+  //   a = tf.Placeholder(tf.float);
+  //   b = tf.Placeholder(tf.int32);
+  //
+  //   fn1 = MyFunc(a, b);
+  //   fn2 = MyFunc(a, b);
+  //
+  // Fetch: fn1:0 and fn2:1 via Identity nodes.
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef("a", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       NDef("b", "Placeholder", {}, {{"dtype", DT_FLOAT}}, kDevice),
+       // Calls into function library
+       NDef("fn1", "MyFunc", {"a", "b"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("fn2", "MyFunc", {"a", "b"}, {{"T", DT_FLOAT}}, kDevice),
+       // Read outputs of function call nodes
+       NDef("out_fn1", "Identity", {"fn1:0"}, {{"T", DT_FLOAT}}, kDevice),
+       NDef("out_fn2", "Identity", {"fn2:1"}, {{"T", DT_FLOAT}}, kDevice)},
+      // FunctionLib
+      {my_func});
+
+  GraphDef output;
+  TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output));
+
+  FunctionLibraryDefinition optimized_flib(OpRegistry::Global(),
+                                           output.library());
+
+  // Specialized and optimized functions should be added to the graph.
+  EXPECT_EQ(2, optimized_flib.num_functions());
+
+  // Expected names of the specialized and optimized functions.
+  const string optimized_fn1 = "MyFunc_specialized_for_fn1";
+  const string optimized_fn2 = "MyFunc_specialized_for_fn2";
+
+  const FunctionDef* optimized_func_fn1 = optimized_flib.Find(optimized_fn1);
+  const FunctionDef* optimized_func_fn2 = optimized_flib.Find(optimized_fn2);
+
+  ASSERT_NE(optimized_func_fn1, nullptr);
+  ASSERT_NE(optimized_func_fn2, nullptr);
+
+  // Graph should call optimized function.
+  int count = 0;
+  for (const NodeDef& node : output.node()) {
+    if (node.name() == "fn1" && count++) {
+      EXPECT_EQ(optimized_fn1, node.op());
+    } else if (node.name() == "fn2" && count++) {
+      EXPECT_EQ(optimized_fn2, node.op());
+    }
+  }
+  EXPECT_EQ(2, count);
+
+  // Specialized MyFuncs should have just one Mul node and single output arg.
+
+  // 1. Specialized for fn1:0.
+  ASSERT_EQ(1, optimized_func_fn1->node_def_size());
+  EXPECT_EQ(1, optimized_func_fn1->signature().output_arg_size());
+  EXPECT_EQ("z1", optimized_func_fn1->signature().output_arg(0).name());
+  EXPECT_EQ("mul1", optimized_func_fn1->node_def(0).name());
+
+  // 2. Specialized for fn2:1.
+  ASSERT_EQ(1, optimized_func_fn2->node_def_size());
+  EXPECT_EQ(1, optimized_func_fn2->signature().output_arg_size());
+  EXPECT_EQ("z2", optimized_func_fn2->signature().output_arg(0).name());
+  EXPECT_EQ("mul2", optimized_func_fn2->node_def(0).name());
+
+  // Verify that output tensors are equal.
+  item.fetch = {"out_fn1", "out_fn2"};
+  item.feed.emplace_back("a", test::AsScalar<float>(2.0f));
+  item.feed.emplace_back("b", test::AsScalar<float>(3.123f));
+  auto tensors_expected = EvaluateFetchNodes(item);
+
+  GrapplerItem optimized(item, std::move(output));
+  auto tensors = EvaluateFetchNodes(optimized);
+
+  test::ExpectTensorEqual<float>(tensors_expected[0], tensors[0]);
+  test::ExpectTensorEqual<float>(tensors_expected[1], tensors[1]);
+}
+
 TEST_F(MetaOptimizerTest, OptimizeFunctionLibraryWithRestrictions) {
   using test::function::NDef;
   using FDH = FunctionDefHelper;
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 6861fb423c..bfb5a2ad84 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -685,6 +685,47 @@ Status ReplaceInputWithConst(const NodeDef& input_const, int input_position,
   return Status::OK();
 }
 
+Status RemoveUnusedOutputs(const gtl::FlatSet<int>& active_outputs,
+                           GrapplerFunctionItem* item,
+                           std::vector<std::pair<int, int>>* output_mapping) {
+  DCHECK(output_mapping->empty());
+
+  // Do some sanity checking of the active outputs positions.
+  for (int active_output : active_outputs) {
+    if (active_output < 0 || active_output >= item->output_size()) {
+      return errors::InvalidArgument(
+          "Active output position is out of bound: active_output=",
+          active_output, " num_output_args=", item->output_size());
+    }
+  }
+
+  gtl::FlatSet<const OutputArgExpansion*> unused_output_args;
+
+  const auto is_unused_output_arg = [&](const OutputArgExpansion& output) {
+    return unused_output_args.find(&output) != unused_output_args.end();
+  };
+
+  for (int i = 0; i < item->output_size(); ++i) {
+    const OutputArgExpansion& output = item->output(i);
+    DCHECK(output.output_tensors.size() == 1)
+        << "Output arg expansion must have single tensor";
+
+    if (active_outputs.find(i) == active_outputs.end()) {
+      VLOG(3) << "Remove unused output: output_name=" << output.output_name
+              << " output_position=" << i;
+      unused_output_args.insert(&output);
+    } else if (!unused_output_args.empty()) {
+      // Add output mapping only if output position changed.
+      output_mapping->push_back({i, i - unused_output_args.size()});
+    }
+  }
+
+  auto& o = item->output_arg_expansions_;
+  o.erase(std::remove_if(o.begin(), o.end(), is_unused_output_arg), o.end());
+
+  return Status::OK();
+}
+
 Status MakeFunctionDef(const GrapplerFunctionItem& item,
                        const FunctionLibraryDefinition& flib,
                        FunctionDef* func) {
diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h
index ef944ced09..dc8c3f1d11 100644
--- a/tensorflow/core/grappler/utils/functions.h
+++ b/tensorflow/core/grappler/utils/functions.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -167,6 +168,9 @@ class GrapplerFunctionItem : public GrapplerItem {
  private:
   friend Status ReplaceInputWithConst(const NodeDef&, int,
                                       GrapplerFunctionItem*);
+  friend Status RemoveUnusedOutputs(
+      const gtl::FlatSet<int>& active_outputs, GrapplerFunctionItem* item,
+      std::vector<std::pair<int, int>>* output_mapping);
 
   string description_;
   AttrValueMap func_attr_;  // Attributes specific to function definition that
@@ -216,13 +220,23 @@ Status RegisterGrapplerFunctionConnectivity(
 Status ReplaceInputWithConst(const NodeDef& input_const, int input_position,
                              GrapplerFunctionItem* item);
 
+// Remove function output arguments that do not have any active outputs (output
+// tensor connected to other node inputs or in a fetch set). Active outputs uses
+// GraphDef output position encoding, and multiple active outputs could
+// potentially be connected to the same output argument (in case of tensor list
+// outputs). Add output mapping for all active outputs that changed it's output
+// position (std::pair<old position, new position>).
+Status RemoveUnusedOutputs(const gtl::FlatSet<int>& active_outputs,
+                           GrapplerFunctionItem* item,
+                           std::vector<std::pair<int, int>>* output_mapping);
+
 // Make a GrapplerFunctionItem from the function definition and function
 // instantiation attributes (caller node attributes). Returns error if the given
 // function def cannot be converted (e.g. not all attributes are defined).
 Status MakeGrapplerFunctionItem(const FunctionDef& func,
                                 const AttrValueMap& func_instantiation_attr,
                                 const FunctionLibraryDefinition& flib,
-                                const int graph_def_version,
+                                int graph_def_version,
                                 GrapplerFunctionItem* item);
 
 // Make a GrapplerFunction item from the function definition. Function must be
@@ -232,7 +246,7 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
 // without specializing it to it's instantiation attributes (at least types)?
 Status MakeGrapplerFunctionItem(const FunctionDef& func,
                                 const FunctionLibraryDefinition& flib,
-                                const int graph_def_version,
+                                int graph_def_version,
                                 GrapplerFunctionItem* item);
 
 // Make a FunctionDef from the GrapplerFunctionItem. Use function library
-- 
GitLab


From efd1fb3c20a16a9a65c9186fa756944fad73635e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 11:16:08 -0700
Subject: [PATCH 1038/1085] This CL changes the defun descriptor to create a
 separate polymorphic function for each instance.

PiperOrigin-RevId: 217351144
---
 tensorflow/python/eager/function.py      | 34 +++++++++++++++-
 tensorflow/python/eager/function_test.py | 49 ++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index e8d5416245..6b37ab9410 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1077,6 +1077,10 @@ class PolymorphicFunction(object):
     self._function_attributes = attributes or {}
 
     self._lock = threading.Lock()
+    # _descriptor_cache is a of instance of a class to an instance-specific
+    # PolymorphicFunction, used to make sure defun-decorated methods create
+    # different functions for each instance.
+    self._descriptor_cache = weakref.WeakKeyDictionary()
 
     fullargspec = tf_inspect.getfullargspec(self._python_function)
     if tf_inspect.ismethod(self._python_function):
@@ -1151,8 +1155,34 @@ class PolymorphicFunction(object):
     #   foo = Foo()
     #   foo.bar()  # `foo.bar` is a `PolymorphicFunction` instance
     #
-    # then `instance` will be `foo` (and `owner` will be `Foo`).
-    return functools.partial(self.__call__, instance)
+    # then `instance` will be `foo` (and `owner` will be `Foo`).  We create a
+    # new instance of PolymorphicFunction here to allow different instances each
+    # to create variables once, thereby allowing methods to be decorated with
+    # defun. Keeps a cache to avoid retracing the function every time the
+    # descriptor is accessed.
+    if instance not in self._descriptor_cache:
+      if instance is None:
+        return self
+      # If there is no instance-specific polymorphic func in the cache,
+      # we construct an instance-specific polymorphic function
+      # that uses a weak reference to the instance (so that the instance will
+      # be correctly gc'd).
+      def make_partial_py_func(py_func, weak_instance):
+        return lambda *args, **kwargs: py_func(weak_instance(), *args, **kwargs)
+      weak_instance = weakref.ref(instance)
+      instance_func = PolymorphicFunction(
+          make_partial_py_func(self.python_function, weak_instance),
+          name=self._name)
+
+      # And we wrap the function with tf_decorator so inspection works correctly
+      wrapped_instance_func = tf_decorator.make_decorator(
+          self.python_function, instance_func)
+
+      # And finally add the wrapped function to the description cache
+      self._descriptor_cache[instance] = wrapped_instance_func
+
+    # Return the cached polymorphic function for the instance
+    return self._descriptor_cache[instance]
 
   def _cache_key(self, args, kwargs):
     """Computes the cache key given inputs and execution context."""
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 4f947e91bf..2d75b2c246 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -60,6 +60,7 @@ from tensorflow.python.training import momentum
 from tensorflow.python.training import training_ops
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_inspect
 
 
 class MiniModel(keras_training.Model):
@@ -2338,6 +2339,54 @@ class FunctionTest(test.TestCase):
         self.assertEqual(len(maybe_add._function_cache), 3)
         self.assertEqual(len(add._function_cache), 2)
 
+  def testDecoratedMethod(self):
+    m = DefunnedMiniModel()
+    instance_call_one = m.call(array_ops.ones([1, 2]), training=True)
+    instance_call_two = m.call(
+        inputs=array_ops.ones([1, 2]), training=True)
+    class_call = DefunnedMiniModel.call(m, array_ops.ones([1, 2]),
+                                        training=True)
+    self.assertAllEqual(instance_call_one, instance_call_two)
+    self.assertAllEqual(instance_call_one, class_call)
+
+  def testDecoratedMethodUniquePolymorphicFuncPerInstance(self):
+    m = DefunnedMiniModel()
+    n = DefunnedMiniModel()
+
+    class_method_one = DefunnedMiniModel.call
+    class_method_two = DefunnedMiniModel.call
+
+    m_method_one = m.call
+    m_method_two = m.call
+
+    n_method_one = n.call
+    n_method_two = n.call
+
+    self.assertEqual(class_method_one, class_method_two)
+    self.assertEqual(m_method_one, m_method_two)
+    self.assertEqual(n_method_one, n_method_two)
+    self.assertNotEqual(m.call, n.call)
+
+  def testDecoratedMethodInspect(self):
+    m = DefunnedMiniModel()
+    fullargspec = tf_inspect.getfullargspec(m.call)
+    self.assertTrue('training' in fullargspec.args)
+
+  def testDecoratedMethodGetConcreteFunction(self):
+    m = DefunnedMiniModel()
+    instance_call_one = m.call.get_concrete_function(
+        array_ops.ones([1, 2]), training=False)
+    instance_call_two = m.call.get_concrete_function(
+        inputs=array_ops.ones([1, 2]), training=False)
+    self.assertAllEqual(instance_call_one(array_ops.ones([1, 2])),
+                        instance_call_two(array_ops.ones([1, 2])))
+
+    # Also make sure get_concrete_function works on the class method
+    DefunnedMiniModel.call.get_concrete_function(
+        m, array_ops.ones([1, 2]), training=False)
+    DefunnedMiniModel.call.get_concrete_function(
+        m, inputs=array_ops.ones([1, 2]), training=True)
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From 8d6afba2a8882e2f5abf2d7790855caf2725e42b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 11:23:52 -0700
Subject: [PATCH 1039/1085] Update ops-related pbtxt files.

PiperOrigin-RevId: 217352683
---
 tensorflow/core/ops/compat/ops_history.v1.pbtxt | 12 ++++++++++++
 tensorflow/core/ops/ops.pbtxt                   | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 63d037c743..ed4c3f9a62 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -31299,6 +31299,18 @@ op {
     type: DT_STRING
   }
 }
+op {
+  name: "MatchingFilesDataset"
+  input_arg {
+    name: "patterns"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
 op {
   name: "MatrixBandPart"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 9a566c9d84..3b898f2155 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -15814,6 +15814,18 @@ op {
     type: DT_STRING
   }
 }
+op {
+  name: "MatchingFilesDataset"
+  input_arg {
+    name: "patterns"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "handle"
+    type: DT_VARIANT
+  }
+  is_stateful: true
+}
 op {
   name: "MatrixBandPart"
   input_arg {
-- 
GitLab


From 91286db9454ff73807f38fadb6f41eec7cee2bdf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 11:30:16 -0700
Subject: [PATCH 1040/1085] Handle comma separated list of placholder types in
 'optimize_for_inference.py' tool.

PiperOrigin-RevId: 217353860
---
 .../python/tools/optimize_for_inference.py      | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/tools/optimize_for_inference.py b/tensorflow/python/tools/optimize_for_inference.py
index dac6a06a89..fbf8c2d709 100644
--- a/tensorflow/python/tools/optimize_for_inference.py
+++ b/tensorflow/python/tools/optimize_for_inference.py
@@ -88,7 +88,7 @@ def main(unused_args):
       input_graph_def,
       FLAGS.input_names.split(","),
       FLAGS.output_names.split(","),
-      FLAGS.placeholder_type_enum,
+      _parse_placeholder_types(FLAGS.placeholder_type_enum),
       FLAGS.toco_compatible)
 
   if FLAGS.frozen_graph:
@@ -101,6 +101,12 @@ def main(unused_args):
   return 0
 
 
+def _parse_placeholder_types(values):
+  """Extracts placeholder types from a comma separate list."""
+  values = [int(value) for value in values.split(",")]
+  return values if len(values) > 1 else values[0]
+
+
 def parse_args():
   """Parses command line arguments."""
   parser = argparse.ArgumentParser()
@@ -137,9 +143,12 @@ def parse_args():
       """)
   parser.add_argument(
       "--placeholder_type_enum",
-      type=int,
-      default=dtypes.float32.as_datatype_enum,
-      help="The AttrValue enum to use for placeholders.")
+      type=str,
+      default=str(dtypes.float32.as_datatype_enum),
+      help="""\
+      The AttrValue enum to use for placeholders.
+      Or a comma separated list, one value for each placeholder.\
+      """)
   parser.add_argument(
       "--toco_compatible",
       type=bool,
-- 
GitLab


From eb428951c53790ba6d424eac20111dce1d64599c Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 16 Oct 2018 11:40:40 -0700
Subject: [PATCH 1041/1085] Automated rollback of commit
 23df2724a7f5ed2d58b5090de4d525db39838da2

PiperOrigin-RevId: 217355812
---
 .../python/autograph/converters/call_trees.py | 19 +++++++++++-----
 .../python/autograph/pyct/inspect_utils.py    | 22 +++++--------------
 .../pyct/static_analysis/live_values.py       | 16 ++++++++++----
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 0170173e61..ca6945266e 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -308,12 +308,7 @@ class CallTreeTransformer(converter.Base):
         target_fqn = anno.getanno(node.func, 'fqn')
       else:
         target_fqn = None
-
-      if inspect_utils.isbuiltin(target_entity):
-        # Note: Any builtin that passed the builtins converter is assumed to be
-        # safe for graph mode.
-        return node
-      elif self._function_is_compilable(target_entity):
+      if self._function_is_compilable(target_entity):
         node = self._rename_compilable_function(node)
       elif target_fqn and target_fqn in KNOWN_NUMPY_FUNCTIONS:
         # TODO(mdan): Should we replace these with equivalent TF ops instead?
@@ -323,6 +318,18 @@ class CallTreeTransformer(converter.Base):
         raise NotImplementedError(
             'py_func with return values (unknown function)')
     else:
+      if anno.hasanno(node.func, anno.Basic.QN):
+        # Special-case a few builtins that otherwise go undetected. This
+        # normally doesn't pose a problem, but the dict built-in doesn't
+        # work with inspect.getargspec which is required for dynamic functions.
+        # Note: expecting this is resilient to aliasing (e.g.
+        # dict = an_evil_dict), because in those cases the regular mechanisms
+        # process a simple user function.
+        qn = anno.getanno(node.func, anno.Basic.QN)
+        # Add items to this list as needed.
+        if str(qn) in ('dict',):
+          return node
+
       if ast_util.matches(node, 'super(_)'):
         # super() calls are preserved. The class conversion mechanism will
         # ensure that they return the correct value.
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 6d5cced0ac..a09d481003 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -29,25 +29,15 @@ import six
 from tensorflow.python.util import tf_inspect
 
 
-# These functions test negative for isinstance(*, types.BuiltinFunctionType)
-# and inspect.isbuiltin, and are generally not visible in globals().
-SPECIAL_BUILTINS = {
-    'dict': dict,
-    'float': float,
-    'int': int,
-    'print': print,
-    'range': range,
-    'tuple': tuple
-}
-
-if six.PY2:
-  SPECIAL_BUILTINS['xrange'] = xrange
-
-
 def isbuiltin(f):
   """Returns True if the argument is a built-in function."""
-  if f in SPECIAL_BUILTINS.values():
+  # Note these return false for isinstance(f, types.BuiltinFunctionType) so we
+  # need to specifically check for them.
+  if f in (range, int, float):
     return True
+  if six.PY2:
+    if f in (xrange,):
+      return True
   if isinstance(f, types.BuiltinFunctionType):
     return True
   if tf_inspect.isbuiltin(f):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/live_values.py b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
index e8e3d229be..dc363f9a47 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/live_values.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/live_values.py
@@ -24,12 +24,21 @@ from __future__ import division
 from __future__ import print_function
 
 import gast
+import six
 
 from tensorflow.python.autograph.pyct import anno
-from tensorflow.python.autograph.pyct import inspect_utils
 from tensorflow.python.autograph.pyct import transformer
 
 
+# TODO(aqj): Do we need this? Do other builtins fail in similar ways
+# See b/114389775 for a related bug in pyct
+# These symbols are legal in Python, but don't appear in the namespace.
+_SPECIAL_SYMBOLS = {'range': range, 'print': print}
+
+if six.PY2:
+  _SPECIAL_SYMBOLS['xrange'] = xrange
+
+
 class LiveValueResolver(transformer.Base):
   """Annotates nodes with live values."""
 
@@ -66,11 +75,10 @@ class LiveValueResolver(transformer.Base):
             # If the symbol value is for example a primitive, then it will not
             # have a name.
             pass
-        elif node.id in inspect_utils.SPECIAL_BUILTINS:
+        elif node.id in _SPECIAL_SYMBOLS:
           # Note: if the user redefined any of these symbols, then they would
           # be visible in the namespace and we would never reach this branch.
-          anno.setanno(
-              node, 'live_val', inspect_utils.SPECIAL_BUILTINS[node.id])
+          anno.setanno(node, 'live_val', _SPECIAL_SYMBOLS[node.id])
         else:
           pass
           # TODO(mdan): Should we raise an error here?
-- 
GitLab


From 4d2d6ddb16036cb390c5dea2b47065fe584d3c1b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 11:50:51 -0700
Subject: [PATCH 1042/1085] Go: Update generated wrapper functions for
 TensorFlow ops. PiperOrigin-RevId: 217357664

---
 tensorflow/go/op/wrappers.go | 228 +++++++++++++++++------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 662f6f227a..6b1ddef852 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4843,6 +4843,120 @@ func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.
 	return weights, biases
 }
 
+// CudnnRNNBackpropV2Attr is an optional argument to CudnnRNNBackpropV2.
+type CudnnRNNBackpropV2Attr func(optionalAttr)
+
+// CudnnRNNBackpropV2RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNBackpropV2RnnMode(value string) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropV2InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNBackpropV2InputMode(value string) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropV2Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNBackpropV2Direction(value string) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNBackpropV2Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropV2Dropout(value float32) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNBackpropV2Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropV2Seed(value int64) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNBackpropV2Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropV2Seed2(value int64) CudnnRNNBackpropV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Backprop step of CudnnRNN.
+//
+// Compute the backprop of both data and weights in a RNN. Takes an extra
+//     "host_reserved" inupt than CudnnRNNBackprop, which is used to determine RNN
+//     cudnnRNNAlgo_t and cudnnMathType_t.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicates whether there is a linear projection between the input and
+//     the actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
+// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
+//     pass.
+// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
+//     pass.
+// reserve_space: The same reserve_space produced in the forward operation.
+// host_reserved: The same host_reserved produced in the forward operation.
+// input_backprop: The backprop to input in the forward pass. Has the same shape
+//     as input.
+// input_h_backprop: The backprop to input_h in the forward pass. Has the same
+//     shape as input_h.
+// input_c_backprop: The backprop to input_c in the forward pass. Has the same
+//     shape as input_c.
+// params_backprop: The backprop to the params buffer in the forward pass. Has the
+//     same shape as params.
+func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, host_reserved tf.Output, optional ...CudnnRNNBackpropV2Attr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNBackpropV2",
+		Input: []tf.Input{
+			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, host_reserved,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
 // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
 type ResourceStridedSliceAssignAttr func(optionalAttr)
 
@@ -14396,120 +14510,6 @@ func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Out
 	return scope.AddOperation(opspec)
 }
 
-// CudnnRNNBackpropV2Attr is an optional argument to CudnnRNNBackpropV2.
-type CudnnRNNBackpropV2Attr func(optionalAttr)
-
-// CudnnRNNBackpropV2RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNBackpropV2RnnMode(value string) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNBackpropV2InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNBackpropV2InputMode(value string) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNBackpropV2Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNBackpropV2Direction(value string) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNBackpropV2Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropV2Dropout(value float32) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNBackpropV2Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropV2Seed(value int64) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNBackpropV2Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropV2Seed2(value int64) CudnnRNNBackpropV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Backprop step of CudnnRNN.
-//
-// Compute the backprop of both data and weights in a RNN. Takes an extra
-//     "host_reserved" inupt than CudnnRNNBackprop, which is used to determine RNN
-//     cudnnRNNAlgo_t and cudnnMathType_t.
-//
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicates whether there is a linear projection between the input and
-//     the actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
-// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
-//     pass.
-// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
-//     pass.
-// reserve_space: The same reserve_space produced in the forward operation.
-// host_reserved: The same host_reserved produced in the forward operation.
-// input_backprop: The backprop to input in the forward pass. Has the same shape
-//     as input.
-// input_h_backprop: The backprop to input_h in the forward pass. Has the same
-//     shape as input_h.
-// input_c_backprop: The backprop to input_c in the forward pass. Has the same
-//     shape as input_c.
-// params_backprop: The backprop to the params buffer in the forward pass. Has the
-//     same shape as params.
-func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, host_reserved tf.Output, optional ...CudnnRNNBackpropV2Attr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNBackpropV2",
-		Input: []tf.Input{
-			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, host_reserved,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
 // StringFormatAttr is an optional argument to StringFormat.
 type StringFormatAttr func(optionalAttr)
 
-- 
GitLab


From 59f873cf45a10fcb029ae282448df921a71a4d4c Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Tue, 16 Oct 2018 12:03:02 -0700
Subject: [PATCH 1043/1085] fixing minor documentation nits

PiperOrigin-RevId: 217359845
---
 tensorflow/contrib/distribute/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md
index 2e025765e4..b416619fc1 100644
--- a/tensorflow/contrib/distribute/README.md
+++ b/tensorflow/contrib/distribute/README.md
@@ -20,7 +20,7 @@ on many GPUs on one machine. Essentially, we create copies of all variables in
 the model's layers on each device. We then use all-reduce to combine gradients
 across the devices before applying them to the variables to keep them in sync.
 * [`CollectiveAllReduceStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/CollectiveAllReduceStrategy):
-This is a version of `MirroredStrategy` for multi-working training. It uses
+This is a version of `MirroredStrategy` for multi-worker training. It uses
 a collective op to do all-reduce. This supports between-graph communication and
 synchronization, and delegates the specifics of the all-reduce implementation to
 the runtime (as opposed to encoding it in the graph). This allows it to perform
@@ -31,8 +31,8 @@ fault-tolerance to allow training to continue when there is worker failure.
 * [`ParameterServerStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/ParameterServerStrategy):
 This strategy supports using parameter servers either for multi-GPU local
 training or asynchronous multi-machine training. When used to train locally,
-variables are not mirrored, instead they placed on the CPU and operations are
-replicated across all local GPUs. In a multi-machine setting, some are
+variables are not mirrored, instead they are placed on the CPU and operations
+are replicated across all local GPUs. In a multi-machine setting, some are
 designated as workers and some as parameter servers. Each variable is placed on
 one parameter server. Computation operations are replicated across all GPUs of
 the workers.
-- 
GitLab


From 6bfb36b241dadfecb345edb0589a8d0ae72dc968 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 12:06:21 -0700
Subject: [PATCH 1044/1085] Move from deprecated self.test_session() to
 self.session() or self.cached_session().

Move to cached_session() if the session is create more than once per test. Move to session() otherwise.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function:
* the session is not cached anymore (a new session is created).
* the session is closed when exiting the "with" scope.

PiperOrigin-RevId: 217360604
---
 .../layers/python/layers/layers_test.py       | 36 +++++++++----------
 .../python/kernel_tests/mel_ops_test.py       |  4 +--
 .../python/kernel_tests/mfcc_ops_test.py      |  4 +--
 .../kernel_tests/reconstruction_ops_test.py   | 18 +++++-----
 .../python/kernel_tests/shape_ops_test.py     | 24 ++++++-------
 .../python/kernel_tests/spectral_ops_test.py  | 14 ++++----
 .../python/kernel_tests/window_ops_test.py    |  2 +-
 7 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 3b7ae72e9c..8ead6336a0 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -630,7 +630,7 @@ class ConvolutionTest(test.TestCase):
       expected_size = [None, num_filters, None, None]
       expected_size_dynamic = [5, num_filters, 7, 9]
 
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         images = array_ops.placeholder(np.float32,
                                        [None, input_size[1], None, None])
         output = layers_lib.convolution2d(
@@ -721,7 +721,7 @@ class Convolution2dTransposeTests(test.TestCase):
   def testOutputSizeWithStrideOneSamePaddingNCHW(self):
     # `NCHW` data format is only supported for `GPU` device.
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 32
         input_size = [5, 3, 10, 12]
         expected_size = [5, num_filters, 10, 12]
@@ -740,7 +740,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWithStrideOneValidPaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 32
         input_size = [5, 3, 10, 12]
         expected_size = [5, num_filters, 12, 14]
@@ -759,7 +759,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWithStrideTwoValidPaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 32
         input_size = [5, 3, 9, 11]
         expected_size = [5, num_filters, 19, 23]
@@ -779,7 +779,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWith1x1StrideTwoSamePaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 1, 1]
         expected_size = [1, num_filters, 2, 2]
@@ -799,7 +799,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWith1x1StrideTwoValidPaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 1, 1]
         expected_size = [1, num_filters, 2, 2]
@@ -817,7 +817,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWith2x2StrideTwoSamePaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 2, 2]
         expected_size = [1, num_filters, 4, 4]
@@ -835,7 +835,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWith2x2StrideTwoValidPaddingNCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 2, 2]
         expected_size = [1, num_filters, 4, 4]
@@ -853,7 +853,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWithStride2x1NCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 3, 2]
         expected_size = [1, num_filters, 6, 5]
@@ -871,7 +871,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWithStride2x4NCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 3, 2]
         expected_size = [1, num_filters, 6, 8]
@@ -889,7 +889,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testOutputSizeWithStride2x5NCHW(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         num_filters = 1
         input_size = [1, 1, 3, 2]
         expected_size = [1, num_filters, 6, 10]
@@ -2056,7 +2056,7 @@ class BatchNormTest(test.TestCase):
     channels = 3
     np.random.seed(1)
     use_gpu = fused
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if data_format == 'NHWC':
         image_shape = (batch_size, height, width, channels)
         axis = (0, 1, 2)
@@ -2140,7 +2140,7 @@ class BatchNormTest(test.TestCase):
     channels = 3
     np.random.seed(1)
     use_gpu = fused
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if data_format == 'NHWC':
         image_shape = (batch_size, height, width, channels)
         axis = (0, 1, 2)
@@ -2344,7 +2344,7 @@ class BatchNormTest(test.TestCase):
     np.random.seed(1)
     use_gpu = fused
     np.random.seed(1)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if data_format == 'NHWC':
         image_shape = (batch_size, height, width, channels)
         axis = (0, 1, 2)
@@ -2491,7 +2491,7 @@ class BatchNormTest(test.TestCase):
     channels = 3
     np.random.seed(1)
     use_gpu = fused
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if data_format == 'NHWC':
         image_shape = (batch_size, height, width, channels)
         axis = (0, 1, 2)
@@ -2576,7 +2576,7 @@ class BatchNormTest(test.TestCase):
     channels = 32
     np.random.seed(1)
     use_gpu = fused
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if data_format == 'NHWC':
         image_shape = (batch_size, height, width, channels)
         axis = (0, 1, 2)
@@ -2674,7 +2674,7 @@ class BatchNormTest(test.TestCase):
 
   def _runBatchNormalizationWithFormat(self, shape, data_format, is_training):
     channels = shape[-1]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       images = np.arange(np.product(shape), dtype=np.float32).reshape(shape)
       beta = init_ops.constant_initializer(
           np.arange(2, channels + 2, dtype=np.float32))
@@ -2776,7 +2776,7 @@ class BatchNormTest(test.TestCase):
             'moving_variance': variance,
         },
         data_format='NCHW')
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(variables_lib.global_variables_initializer())
       return sess.run(output)
 
diff --git a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
index f4348e80ea..13ee8764b7 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/mel_ops_test.py
@@ -137,7 +137,7 @@ class LinearToMelTest(test.TestCase):
         # Settings used by Tacotron (https://arxiv.org/abs/1703.10135).
         (80, 1025, 24000.0, 80.0, 12000.0, dtypes.float64)
     ]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for config in configs:
         mel_matrix_np = spectrogram_to_mel_matrix(*config)
         mel_matrix = mel_ops.linear_to_mel_weight_matrix(*config)
@@ -178,7 +178,7 @@ class LinearToMelTest(test.TestCase):
         self.assertEqual(1, len(rewritten_graph.node))
 
   def test_num_spectrogram_bins_dynamic(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       num_spectrogram_bins = array_ops.placeholder(shape=(),
                                                    dtype=dtypes.int32)
       mel_matrix_np = spectrogram_to_mel_matrix(
diff --git a/tensorflow/contrib/signal/python/kernel_tests/mfcc_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/mfcc_ops_test.py
index e7743bdcba..9de1e2c2f4 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/mfcc_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/mfcc_ops_test.py
@@ -46,14 +46,14 @@ class MFCCTest(test.TestCase):
   def test_basic(self):
     """A basic test that the op runs on random input."""
     with spectral_ops_test_util.fft_kernel_label_map():
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         signal = random_ops.random_normal((2, 3, 5))
         mfcc_ops.mfccs_from_log_mel_spectrograms(signal).eval()
 
   def test_unknown_shape(self):
     """A test that the op runs when shape and rank are unknown."""
     with spectral_ops_test_util.fft_kernel_label_map():
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         signal = array_ops.placeholder_with_default(
             random_ops.random_normal((2, 3, 5)), tensor_shape.TensorShape(None))
         self.assertIsNone(signal.shape.ndims)
diff --git a/tensorflow/contrib/signal/python/kernel_tests/reconstruction_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/reconstruction_ops_test.py
index 5c9b2ac518..c476cd4e00 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/reconstruction_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/reconstruction_ops_test.py
@@ -55,7 +55,7 @@ class ReconstructionOpsTest(test.TestCase):
     signal = constant_op.constant(np.ones((3, 5)), dtype=dtypes.int64)
     reconstruction = reconstruction_ops.overlap_and_add(signal, 2)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       output = sess.run(reconstruction)
 
       expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])
@@ -86,7 +86,7 @@ class ReconstructionOpsTest(test.TestCase):
         (make_input(4), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 4),
     ]
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for signal, expected, frame_hop in configurations:
         reconstruction = reconstruction_ops.overlap_and_add(
             np.array(signal), frame_hop).eval()
@@ -98,7 +98,7 @@ class ReconstructionOpsTest(test.TestCase):
                                   dtype=dtypes.int64)
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       output = sess.run(reconstruction)
       string_output = [np.base_repr(x, self.bases[0]) for x in output]
 
@@ -108,7 +108,7 @@ class ReconstructionOpsTest(test.TestCase):
     signal = constant_op.constant(self.powers, dtype=dtypes.int64)
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       output = sess.run(reconstruction)
 
       accumulator = True
@@ -124,7 +124,7 @@ class ReconstructionOpsTest(test.TestCase):
     signal = constant_op.constant(input_matrix, dtype=dtypes.float32)
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       output = sess.run(reconstruction)
 
       string_output = [np.base_repr(int(x), self.bases[0]) for x in
@@ -143,8 +143,8 @@ class ReconstructionOpsTest(test.TestCase):
         ((2, 2, 2, 10, 128), 125),
     ]
 
-    for shape, frame_hop in configurations:
-      with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
+      for shape, frame_hop in configurations:
         signal = array_ops.zeros(shape)
         reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)
         loss = math_ops.reduce_sum(reconstruction)
@@ -155,7 +155,7 @@ class ReconstructionOpsTest(test.TestCase):
         self.assertTrue((gradient == 1.0).all())
 
   def test_gradient_batch(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       signal = array_ops.zeros((2, 10, 10))
       frame_hop = 10
       reconstruction = reconstruction_ops.overlap_and_add(signal, frame_hop)
@@ -177,7 +177,7 @@ class ReconstructionOpsTest(test.TestCase):
       self.assertAllEqual(expected_gradient, gradient)
 
   def test_gradient_numerical(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = (2, 10, 10)
       framed_signal = array_ops.zeros(shape)
       frame_hop = 10
diff --git a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py
index f132050153..838025a040 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py
@@ -33,7 +33,7 @@ from tensorflow.python.platform import test
 class FrameTest(test.TestCase):
 
   def test_mapping_of_indices_without_padding(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tensor = constant_op.constant(np.arange(9152), dtypes.int32)
       tensor = array_ops.expand_dims(tensor, 0)
 
@@ -48,7 +48,7 @@ class FrameTest(test.TestCase):
       self.assertAllEqual(expected, result)
 
   def test_mapping_of_indices_with_padding(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tensor = constant_op.constant(np.arange(10000), dtypes.int32)
       tensor = array_ops.expand_dims(tensor, 0)
 
@@ -89,7 +89,7 @@ class FrameTest(test.TestCase):
     frame_length = 2
     frame_step = 1
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       result = shape_ops.frame(signal, frame_length, frame_step,
                                pad_end=True, pad_value=99).eval()
       self.assertEqual((0, 2), result.shape)
@@ -149,7 +149,7 @@ class FrameTest(test.TestCase):
         for pad_end in [False, True]:
           op = shape_ops.frame(signal, frame_length, frame_step,
                                pad_end=pad_end, pad_value=99)
-          with self.test_session(use_gpu=True):
+          with self.cached_session(use_gpu=True):
             result = op.eval()
           self.assertEqual(op.shape.as_list(), list(result.shape))
 
@@ -158,7 +158,7 @@ class FrameTest(test.TestCase):
     frame_length = 3
     frame_step = 2
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for rank in range(5):
         nd_signal = np.reshape(signal, (1,) * rank + signal.shape)
 
@@ -184,7 +184,7 @@ class FrameTest(test.TestCase):
     frame_length = 3
     frame_step = 2
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for rank in range(5):
         nd_signal = np.reshape(signal, (1,) * rank + signal.shape)
 
@@ -218,7 +218,7 @@ class FrameTest(test.TestCase):
     frame_length = 3
     frame_step = 2
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # With padding, we pad the last frame with pad_value.
       result = shape_ops.frame(signal, frame_length, frame_step,
                                pad_end=True, pad_value=99).eval()
@@ -244,7 +244,7 @@ class FrameTest(test.TestCase):
 
   def test_axis(self):
     signal = np.reshape(np.arange(16), (2, 4, 2))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       result = shape_ops.frame(signal, frame_length=2, frame_step=2,
                                pad_end=True, axis=1)
       expected = np.reshape(np.arange(16), (2, 2, 2, 2))
@@ -279,7 +279,7 @@ class FrameTest(test.TestCase):
     frame_length = 4
     frame_step = 1
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       result = shape_ops.frame(signal, frame_length, frame_step,
                                pad_end=True, pad_value=99).eval()
       self.assertAllClose([[[1, 2, 99, 99], [2, 99, 99, 99]],
@@ -303,7 +303,7 @@ class FrameTest(test.TestCase):
     frame_length = 2
     frame_step = 3
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       result = shape_ops.frame(signal, frame_length, frame_step)
       self.assertEqual(result.dtype, signal.dtype)
 
@@ -315,7 +315,7 @@ class FrameTest(test.TestCase):
     frame_length = 2
     frame_step = 2
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       signal_placeholder = array_ops.placeholder(shape=(None, None),
                                                  dtype=dtypes.float32)
       result = sess.run(shape_ops.frame(
@@ -326,7 +326,7 @@ class FrameTest(test.TestCase):
                            [[20, 21], [22, 23]]], result)
 
   def test_gradient_numerical(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       signal_shape = (2, 128)
       signal = array_ops.ones(signal_shape)
       frame_length = 33
diff --git a/tensorflow/contrib/signal/python/kernel_tests/spectral_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/spectral_ops_test.py
index f10d78259a..5106a22f88 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/spectral_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/spectral_ops_test.py
@@ -81,7 +81,7 @@ class SpectralOpsTest(test.TestCase):
 
   def _compare(self, signal, frame_length, frame_step, fft_length):
     with spectral_ops_test_util.fft_kernel_label_map(), (
-        self.test_session(use_gpu=True)) as sess:
+        self.cached_session(use_gpu=True)) as sess:
       actual_stft = spectral_ops.stft(
           signal, frame_length, frame_step, fft_length, pad_end=False)
       signal_ph = array_ops.placeholder(dtype=dtypes.as_dtype(signal.dtype))
@@ -117,7 +117,7 @@ class SpectralOpsTest(test.TestCase):
 
   def test_shapes(self):
     with spectral_ops_test_util.fft_kernel_label_map(), (
-        self.test_session(use_gpu=True)):
+        self.session(use_gpu=True)):
       signal = np.zeros((512,)).astype(np.float32)
 
       # If fft_length is not provided, the smallest enclosing power of 2 of
@@ -188,7 +188,7 @@ class SpectralOpsTest(test.TestCase):
       signal = random_ops.random_normal([signal_length])
 
       with spectral_ops_test_util.fft_kernel_label_map(), (
-          self.test_session(use_gpu=True)) as sess:
+          self.cached_session(use_gpu=True)) as sess:
         stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length,
                                  pad_end=False)
         inverse_stft = spectral_ops.inverse_stft(stft, frame_length, frame_step,
@@ -234,7 +234,7 @@ class SpectralOpsTest(test.TestCase):
       inverse_window_fn = spectral_ops.inverse_stft_window_fn(frame_step)
       inverse_window = inverse_window_fn(frame_length, dtype=dtypes.float32)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         hann_window, inverse_window = sess.run([hann_window, inverse_window])
 
       # Expect unit gain at each phase of the window.
@@ -262,7 +262,7 @@ class SpectralOpsTest(test.TestCase):
       inverse_window_fn = spectral_ops.inverse_stft_window_fn(frame_step)
       inverse_window = inverse_window_fn(frame_length, dtype=dtypes.float32)
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         hann_window, inverse_window = sess.run([hann_window, inverse_window])
 
       self.assertAllClose(hann_window, inverse_window * 1.5)
@@ -279,7 +279,7 @@ class SpectralOpsTest(test.TestCase):
   def test_gradients(self):
     """Test that spectral_ops.stft has a working gradient."""
     with spectral_ops_test_util.fft_kernel_label_map(), (
-        self.test_session(use_gpu=True)) as sess:
+        self.session(use_gpu=True)) as sess:
       signal_length = 512
 
       # An all-zero signal has all zero gradients with respect to the sum of the
@@ -298,7 +298,7 @@ class SpectralOpsTest(test.TestCase):
 
   def test_gradients_numerical(self):
     with spectral_ops_test_util.fft_kernel_label_map(), (
-        self.test_session(use_gpu=True)):
+        self.session(use_gpu=True)):
       # Tuples of (signal_length, frame_length, frame_step, fft_length,
       # stft_bound, inverse_stft_bound).
       # TODO(rjryan): Investigate why STFT gradient error is so high.
diff --git a/tensorflow/contrib/signal/python/kernel_tests/window_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/window_ops_test.py
index 5a464699da..6a46a22693 100644
--- a/tensorflow/contrib/signal/python/kernel_tests/window_ops_test.py
+++ b/tensorflow/contrib/signal/python/kernel_tests/window_ops_test.py
@@ -64,7 +64,7 @@ class WindowOpsTest(test.TestCase):
                     (dtypes.float64, 1e-9)]
 
   def _compare_window_fns(self, np_window_fn, tf_window_fn):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for window_length in self._window_lengths:
         for periodic in [False, True]:
           for tf_dtype, tol in self._dtypes:
-- 
GitLab


From 1d02bb4cbaa9308dd7b4ad21ff1c74dd4134f920 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <superbobry@gmail.com>
Date: Tue, 16 Oct 2018 21:25:39 +0200
Subject: [PATCH 1045/1085] Fixed indentation in test_ps_session_config

---
 tensorflow/python/estimator/run_config_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py
index 313bf62c05..e886ac7344 100644
--- a/tensorflow/python/estimator/run_config_test.py
+++ b/tensorflow/python/estimator/run_config_test.py
@@ -1197,8 +1197,8 @@ class RunConfigSessionConfigTest(test.TestCase):
     }
     run_config = _create_run_config_with_cluster_spec(tf_config)
     self._assert_equal_session_config(
-      run_config.session_config,
-      ['/job:ps', '/job:worker', '/job:chief', '/job:master'])
+        run_config.session_config,
+        ['/job:ps', '/job:worker', '/job:chief', '/job:master'])
 
   def test_evaluator_session_config(self):
     tf_config = {
-- 
GitLab


From a1717b77f5a4fa2b5869adb660be3a74c6b02618 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 12:27:38 -0700
Subject: [PATCH 1046/1085] [tfgan] Exposed warm start functionality to
 GANEstimator.

PiperOrigin-RevId: 217363868
---
 .../estimator/python/gan_estimator_impl.py    |  8 ++-
 .../estimator/python/gan_estimator_test.py    | 68 +++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 7243f150ce..219cc199d7 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -112,7 +112,8 @@ class GANEstimator(estimator.Estimator):
                get_eval_metric_ops_fn=None,
                add_summaries=None,
                use_loss_summaries=True,
-               config=None):
+               config=None,
+               warm_start_from=None):
     """Initializes a GANEstimator instance.
 
     Args:
@@ -151,6 +152,8 @@ class GANEstimator(estimator.Estimator):
       use_loss_summaries: If `True`, add loss summaries. If `False`, does not.
         If `None`, uses defaults.
       config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A filepath to a checkpoint or saved model, or a
+        WarmStartSettings object to configure initialization.
 
     Raises:
       ValueError: If loss functions aren't callable.
@@ -187,7 +190,8 @@ class GANEstimator(estimator.Estimator):
           get_hooks_fn, use_loss_summaries)
 
     super(GANEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir, config=config)
+        model_fn=_model_fn, model_dir=model_dir, config=config,
+        warm_start_from=warm_start_from)
 
 
 def _get_gan_model(
diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index 83f8dd641f..cfc867f083 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -33,9 +33,11 @@ from tensorflow.contrib.learn.python.learn.learn_io import graph_io
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator.estimator import WarmStartSettings
 from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework.errors_impl import NotFoundError
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_lib
@@ -317,5 +319,71 @@ class GANEstimatorIntegrationTest(test.TestCase):
         prediction_size=[batch_size, input_dim])
 
 
+class GANEstimatorWarmStartTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = self.get_temp_dir()
+    self.new_variable_name = 'new_var'
+    self.new_variable_value = [1, 2, 3]
+
+  def tearDown(self):
+    writer_cache.FileWriterCache.clear()
+
+  def _test_warm_start(self, warm_start_from=None):
+    """Tests whether WarmStartSettings work as intended."""
+    def generator_with_new_variable(noise_dict, mode):
+      variable_scope.get_variable(name=self.new_variable_name,
+                                  initializer=self.new_variable_value,
+                                  trainable=True)
+      return generator_fn(noise_dict, mode)
+
+    def train_input_fn():
+      data = np.zeros([3, 4])
+      return {'x': data}, data
+
+    est = estimator.GANEstimator(
+        generator_fn=generator_fn,
+        discriminator_fn=discriminator_fn,
+        generator_loss_fn=losses.wasserstein_generator_loss,
+        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
+        generator_optimizer=training.GradientDescentOptimizer(1.0),
+        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
+        model_dir=self._model_dir)
+
+    est.train(train_input_fn, steps=1)
+
+    est_warm = estimator.GANEstimator(
+        generator_fn=generator_with_new_variable,
+        discriminator_fn=discriminator_fn,
+        generator_loss_fn=losses.wasserstein_generator_loss,
+        discriminator_loss_fn=losses.wasserstein_discriminator_loss,
+        generator_optimizer=training.GradientDescentOptimizer(1.0),
+        discriminator_optimizer=training.GradientDescentOptimizer(1.0),
+        model_dir=None if warm_start_from else self._model_dir,
+        warm_start_from=warm_start_from)
+
+    est_warm.train(train_input_fn, steps=1)
+
+    return est_warm
+
+  def test_warm_start_error(self):
+    """Test if exception when reloading different estimators."""
+    with self.assertRaises(NotFoundError):
+      self._test_warm_start()
+
+  def test_warm_start_success(self):
+    """Test if GANEstimator allows explicit warm start variable assignment."""
+    # Regex matches all variable names in ckpt except for new_var.
+    var_regex = '^(?!.*%s.*)' % self.new_variable_name
+    warmstart = WarmStartSettings(ckpt_to_initialize_from=self._model_dir,
+                                  vars_to_warm_start=var_regex)
+    est_warm = self._test_warm_start(warm_start_from=warmstart)
+    full_variable_name = 'Generator/%s' % self.new_variable_name
+    self.assertIn(full_variable_name, est_warm.get_variable_names())
+    equal_vals = np.array_equal(est_warm.get_variable_value(full_variable_name),
+                                self.new_variable_value)
+    self.assertTrue(equal_vals)
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 52589599b3fe467225b174a28271d52d50c4d54c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 13:04:06 -0700
Subject: [PATCH 1047/1085] Re-enable tests for tf.einsum().

These tests check that tf.einsum() is equivalent to np.einsum().
At one point, some of them were failing because of a bug in
np.einsum() in an old version of numpy.  This bug has been fixed,
so we can re-enable the tests now.

PiperOrigin-RevId: 217370167
---
 tensorflow/python/ops/special_math_ops_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index b9dfc79311..d2f6b47697 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -240,7 +240,7 @@ class EinsumTest(test.TestCase):
       'aef,fbc,dca->bde',
       'iJ,Jk->ik',
       'iJ,Ki->JK',
-      'iJk,Jklm->Jk'
+      'iJk,Jklm->Jk',
       'ij, jk, kl -> il',
       'a, ab, abc -> abc',
       'ab, ab, cd, cd, ef, ef -> ',
@@ -280,7 +280,7 @@ class EinsumTest(test.TestCase):
 
   dim_mismatch_cases = [('ijk,jkl->il', [(2, 3, 4), (3, 5, 6)])]
 
-  def disabled_test_simple(self):
+  def test_simple(self):
     for case in self.simple_cases:
       self.run_test(case)
 
-- 
GitLab


From 4b29bd950b6763a1d2ade08369ed1e432d492af4 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 16 Oct 2018 13:05:41 -0700
Subject: [PATCH 1048/1085] Adding an XLA kernel registration for kDeviceRetOp
 that returns int32 tensors in device memory rather than host memory.

PiperOrigin-RevId: 217370484
---
 tensorflow/compiler/jit/xla_device_ops.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 14a232b7a8..6a1c43aa96 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -210,6 +210,8 @@ class XlaAssignVariableOp : public AsyncOpKernel {
                               .TypeConstraint<ResourceHandle>("T")             \
                               .HostMemory("input"),                            \
                           RetvalOp);                                           \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name(kDeviceRetOp).Device(DEVICE).TypeConstraint<int32>("T"), RetvalOp); \
                                                                                \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("RemoteCall").Device(DEVICE).HostMemory("target"), RemoteCallOp);   \
-- 
GitLab


From 0326792aa76a132ab0c9bb8cc8dcb6ce0b1487fb Mon Sep 17 00:00:00 2001
From: mdfaijul <md.faijul.amin@intel.com>
Date: Tue, 16 Oct 2018 13:21:58 -0700
Subject: [PATCH 1049/1085] remove CHECK and CHECK_EQ, added TF_RETURN_IF_ERROR

---
 .../graph_transforms/fuse_quantized_convolution.cc     | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc
index 2128bcd978..bd021d094e 100644
--- a/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc
+++ b/tensorflow/tools/graph_transforms/fuse_quantized_convolution.cc
@@ -179,8 +179,14 @@ Status FuseQuantizedConvolutionAndRequantize(
 
         TensorProto float_tensor_proto = bias_node->attr().at("value").tensor();
         Tensor float_tensor;
-        CHECK(float_tensor.FromProto(float_tensor_proto));
-        CHECK_EQ(float_tensor.dtype(), DT_FLOAT);
+        if(!float_tensor.FromProto(float_tensor_proto)) {
+          TF_RETURN_IF_ERROR(::tensorflow::errors::InvalidArgument(
+              "TensorProto object is not valid."));
+        }
+        if (float_tensor.dtype() != DT_FLOAT) {
+          TF_RETURN_IF_ERROR(::tensorflow::errors::Unimplemented(
+              "Expected float tensor."));
+        }
         float *p_bias_float = float_tensor.flat<float>().data();
 
         Tensor int32_tensor = Tensor(DT_QINT32, float_tensor.shape());
-- 
GitLab


From 6e23a13b1ec2da3d1327570a9661c9663fab82a0 Mon Sep 17 00:00:00 2001
From: Ruoxin Sang <rxsang@google.com>
Date: Tue, 16 Oct 2018 13:27:12 -0700
Subject: [PATCH 1050/1085] Fix a bug in TPUEstimator that eval_steps is not
 increased correctly when no eval_metrics is passed.

PiperOrigin-RevId: 217374906
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index b2fa9eb45c..29aa0d6568 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -2609,10 +2609,6 @@ class TPUEstimator(estimator_lib.Estimator):
               total_loss,
               math_ops.cast(iterations_per_loop_var, dtype=total_loss.dtype))
 
-          # Creates a dummy metric update_op for all metrics. Estimator expects
-          # all metrics in eval_metric_ops have update_op and calls them one by
-          # one. The real metric update_ops are invoked in a separated thread.
-          # So, here give Estimator the dummy op for all metrics.
           with ops.control_dependencies([mean_loss]):
             # After TPU evaluation computation is done (the mean_loss tensor),
             # reads all variables back from TPU and updates the eval step
@@ -2620,16 +2616,30 @@ class TPUEstimator(estimator_lib.Estimator):
             internal_ops_to_run = _sync_variables_ops(ctx)
             internal_ops_to_run.append(
                 _increase_eval_step_op(iterations_per_loop_var))
-            with ops.control_dependencies(internal_ops_to_run):
-              dummy_update_op = control_flow_ops.no_op()
 
           host_call_ret = host_calls.create_tpu_hostcall()
           eval_metric_ops = {}
           eval_update_ops = []
 
-          for k, v in host_call_ret.get('eval_metrics', {}).items():
-            eval_metric_ops[k] = (v[0], dummy_update_op)
-            eval_update_ops.append(v[1])
+          eval_metrics = host_call_ret.get('eval_metrics', {})
+          if eval_metrics:
+            # Creates a dummy metric update_op for all metrics. Estimator
+            # expects all metrics in `eval_metric_ops` have update_op and calls
+            # them one by one. The real metric update_ops are invoked in a
+            # separated thread. So, here give Estimator the dummy op for all
+            # metrics.
+            with ops.control_dependencies(internal_ops_to_run):
+              dummy_update_op = control_flow_ops.no_op()
+
+            for k, v in eval_metrics.items():
+              eval_metric_ops[k] = (v[0], dummy_update_op)
+              eval_update_ops.append(v[1])
+          else:
+            # If no eval metrics are passed, create an identity node for the
+            # loss and add `internal_ops_to_run` to its dependencies. So
+            # `internal_ops_to_run` can be executed.
+            with ops.control_dependencies(internal_ops_to_run):
+              mean_loss = array_ops.identity(mean_loss)
 
           if 'host_call' not in host_call_ret:
             host_ops = []
-- 
GitLab


From f2d88e5ad422bb4abc2db1ddbac0f9247fd95896 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Tue, 16 Oct 2018 13:29:00 -0700
Subject: [PATCH 1051/1085] Modify variable_scope_test.py to work equally well
 under graph and eager execution.

PiperOrigin-RevId: 217375275
---
 tensorflow/python/eager/BUILD                 |    1 +
 tensorflow/python/kernel_tests/BUILD          |    1 +
 .../kernel_tests/variable_scope_test.py       | 1447 ++++++++++-------
 3 files changed, 896 insertions(+), 553 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index 52ea495305..751e8c402e 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -421,6 +421,7 @@ py_library(
     name = "wrap_function",
     srcs = ["wrap_function.py"],
     srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
     deps = [
         ":context",
         ":function",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 33fb925f09..3d9b886ebb 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1172,6 +1172,7 @@ tf_py_test(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:function",
+        "//tensorflow/python/eager:wrap_function",
     ],
     tags = ["no_windows"],
 )
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 33f464fb90..054e514a84 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -25,6 +25,7 @@ import numpy
 
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
+from tensorflow.python.eager import wrap_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -44,6 +45,25 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import tf_inspect
 
 
+def wrap_and_execute(graph_function, skip_graph=False):
+  """Wrap a graph _nullary_ function and execute it in graph and eager modes.
+
+  If graph mode fails, use skip_graph=True and comment on the caller with the
+  failure reason.
+
+  Args:
+    graph_function: python function containing graph code to be wrapped
+    skip_graph: Optional. Don't call the graph function if it errors.
+  """
+  wrapped = wrap_function.wrap_function(graph_function, [])
+  if context.executing_eagerly():
+    # use the wrapped graph function
+    wrapped()
+  elif not skip_graph:
+    # use the original function
+    graph_function()
+
+
 class VariableScopeTest(test.TestCase):
 
   def tearDown(self):
@@ -52,18 +72,29 @@ class VariableScopeTest(test.TestCase):
     # involving objects with __del__ defined.
     self.assertEqual(0, len(gc.garbage))
 
+  @test_util.run_in_graph_and_eager_modes
   def testGetVar(self):
-    vs = variable_scope._get_default_variable_store()
-    v = vs.get_variable("v", [1])
-    v1 = vs.get_variable("v", [1])
-    self.assertEqual(v, v1)
+
+    def _f():
+      vs = variable_scope._get_default_variable_store()
+      v = vs.get_variable("v", [1])
+      v1 = vs.get_variable("v", [1])
+      self.assertEqual(v, v1)
+
+    wrap_and_execute(_f)
 
   @test_util.run_in_graph_and_eager_modes
   def testResource(self):
-    vs = variable_scope._get_default_variable_store()
-    v1 = vs.get_variable("v", [1], use_resource=True)
-    self.assertTrue(isinstance(v1, resource_variable_ops.ResourceVariable))
 
+    def _f():
+      vs = variable_scope._get_default_variable_store()
+      v1 = vs.get_variable("v", [1], use_resource=True)
+      self.assertTrue(isinstance(v1, resource_variable_ops.ResourceVariable))
+
+    wrap_and_execute(_f)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # AttributeError: Tensor.op is meaningless when eager execution is enabled.
   def testNameExists(self):
     vs = variable_scope._get_default_variable_store()
     # No check by default, so we can both create and get existing names.
@@ -80,14 +111,22 @@ class VariableScopeTest(test.TestCase):
     with self.assertRaises(ValueError):
       vs.get_variable("u", [1], reuse=True)  # That fails.
 
+  @test_util.run_in_graph_and_eager_modes
   def testNamelessStore(self):
-    vs = variable_scope._get_default_variable_store()
-    vs.get_variable("v1", [2])
-    vs.get_variable("v2", [2])
-    expected_names = ["%s:0" % name for name in ["v1", "v2"]]
-    self.assertEqual(
-        set(expected_names), set([v.name for v in vs._vars.values()]))
 
+    def _f():
+      vs = variable_scope._get_default_variable_store()
+      vs.get_variable("v1", [2])
+      vs.get_variable("v2", [2])
+      expected_names = ["%s:0" % name for name in ["v1", "v2"]]
+      self.assertEqual(
+          set(expected_names), set([v.name for v in vs._vars.values()]))
+
+    wrap_and_execute(_f)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Operation name: "tower0/foo/v/Assign" ... is not an element of
+  # this graph.
   @test_util.run_in_graph_and_eager_modes
   def testVarScopeInitializer(self):
     init = init_ops.constant_initializer(0.3)
@@ -101,6 +140,8 @@ class VariableScopeTest(test.TestCase):
         self.evaluate(variables_lib.variables_initializer([w]))
         self.assertAllClose(self.evaluate(w.value()), 0.3)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Variable tower1/foo/v already exists, disallowed.
   @test_util.run_in_graph_and_eager_modes
   def testVarScopeConstraint(self):
     constraint = lambda x: 0. * x
@@ -112,12 +153,18 @@ class VariableScopeTest(test.TestCase):
         w = variable_scope.get_variable("w", [])
         self.assertEqual(w.constraint, constraint)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # TypeError: Fetch argument <tf.Variable 'string:0' shape=() dtype=string>
+  # has invalid type <class '...ResourceVariable'>, must be a string or Tensor.
+  # (Can not convert a ResourceVariable into a Tensor or Operation.)
   def testStringDefaultInitializer(self):
     with self.cached_session():
       v = variable_scope.get_variable("string", shape=[], dtype=dtypes.string)
       variables_lib.global_variables_initializer().run()
-      self.assertAllEqual(compat.as_bytes(v.eval()), b"")
+      self.assertAllEqual(compat.as_bytes(self.evaluate(v)), b"")
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Variable tower2/foo/v already exists, disallowed.
   @test_util.run_in_graph_and_eager_modes
   def testVarScopeDType(self):
     with variable_scope.variable_scope("tower2") as tower:
@@ -198,6 +245,8 @@ class VariableScopeTest(test.TestCase):
         self.assertAllEqual([v1, v2], [v3, v4])
       f()
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   @test_util.run_in_graph_and_eager_modes
   def testEagerVariablesStoreAddsToCollections(self):
     store = variable_scope.EagerVariableStore()
@@ -215,15 +264,15 @@ class VariableScopeTest(test.TestCase):
       self.assertEqual(
           ops.get_collection(ops.GraphKeys.CONCATENATED_VARIABLES), [concat])
 
-  @test_util.run_in_graph_and_eager_modes
   def testEagerVariablesOutsideStoreNotAddedToCollections(self):
-    if not context.executing_eagerly():
-      return
-    variable_scope.get_variable("v1", [], trainable=True)
-    variable_scope.get_variable("v2", [], trainable=False)
-    self.assertFalse(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
-    self.assertFalse(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
+    with context.eager_mode():
+      variable_scope.get_variable("v1", [], trainable=True)
+      variable_scope.get_variable("v2", [], trainable=False)
+      self.assertFalse(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
+      self.assertFalse(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Operation name: "v4/Assign" ... is not an element of this graph.
   @test_util.run_in_graph_and_eager_modes
   def testInitFromNonTensorValue(self):
     v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32)
@@ -240,6 +289,8 @@ class VariableScopeTest(test.TestCase):
     with self.assertRaises(error):
       variable_scope.get_variable("x4", initializer={})
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Operation name: "xx0/Assign" ...is not an element of this graph.
   @test_util.run_in_graph_and_eager_modes
   def testInitFromNonInitializer(self):
     # Test various dtypes with zeros initializer as following:
@@ -262,6 +313,8 @@ class VariableScopeTest(test.TestCase):
       self.assertAllEqual(self.evaluate(x.value()), self.evaluate(y.value()))
 
   # TODO(alive): support variable partitioning/caching in eager mode.
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # InvalidArgumentError: /job:moo/replica:0/task:0/device:CPU:0 unknown device.
   def testVarScopeCachingDevice(self):
     with self.cached_session():
       caching_device = "/job:moo"
@@ -295,6 +348,8 @@ class VariableScopeTest(test.TestCase):
         v_tower = variable_scope.get_variable("v", [])
         self.assertFalse(v_tower.value().device.startswith(caching_device))
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Operation name: ".../Assign"... is not an element of this graph.
   @test_util.run_in_graph_and_eager_modes
   def testVarScopeRegularizer(self):
     init = init_ops.constant_initializer(0.3)
@@ -340,6 +395,9 @@ class VariableScopeTest(test.TestCase):
           losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
           self.assertEqual(3, len(losses))  # No new loss added.
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Tensor-typed variable initializers must either be wrapped in an
+  # init_scope or callable...
   @test_util.run_in_graph_and_eager_modes
   def testInitializeFromValue(self):
     init = constant_op.constant(0.1)
@@ -366,6 +424,11 @@ class VariableScopeTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, "don't match"):
       variable_scope.get_variable("s", initializer=init, dtype=dtypes.float64)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # TypeError: Fetch argument <tf.Variable 'v0:0' shape=(1,) dtype=float32> has
+  # invalid type <class '...ops.resource_variable_ops.ResourceVariable'>, must
+  # be a string or Tensor. (Can not convert a ResourceVariable into a Tensor or
+  # Operation.)
   def testControlDeps(self):
     with self.cached_session() as sess:
       v0 = variable_scope.get_variable(
@@ -390,6 +453,8 @@ class VariableScopeTest(test.TestCase):
       sess.run(v0.initializer)
       sess.run(add)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # AssertionError: True is not false (last assertFalse)
   def testEnableResourceVariables(self):
     old = variable_scope._DEFAULT_USE_RESOURCE
     try:
@@ -402,6 +467,8 @@ class VariableScopeTest(test.TestCase):
     finally:
       variable_scope._DEFAULT_USE_RESOURCE = old
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # TypeError: Fetch argument None has invalid type <type 'NoneType'>
   def testControlFlow(self):
     with self.cached_session() as sess:
       v0 = variable_scope.get_variable(
@@ -441,6 +508,8 @@ class VariableScopeTest(test.TestCase):
       sess.run(v0.initializer)
       sess.run(add)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Operation name: ".../Assign"... is not an element of this graph.
   @test_util.run_in_graph_and_eager_modes
   def testGetVariableScope(self):
     # Test the get_variable_scope() function and setting properties of result.
@@ -464,123 +533,150 @@ class VariableScopeTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testVarScope(self):
-    with variable_scope.variable_scope("tower4") as tower:
-      self.assertEqual(tower.name, "tower4")
-      with ops.name_scope("scope") as sc:
-        self.assertEqual(sc, "tower4/scope/")
-
-    with variable_scope.variable_scope("tower5"):
-      with variable_scope.variable_scope("bar") as bar:
-        self.assertEqual(bar.name, "tower5/bar")
-        with ops.name_scope("scope") as sc:
-          self.assertEqual(sc, "tower5/bar/scope/")
 
-    with variable_scope.variable_scope("tower6"):
-      with variable_scope.variable_scope(tower, reuse=True) as tower_shared:
-        self.assertEqual(tower_shared.name, "tower4")
+    def _f():
+      with variable_scope.variable_scope("tower4") as tower:
+        self.assertEqual(tower.name, "tower4")
         with ops.name_scope("scope") as sc:
-          self.assertEqual(sc, "tower6/tower4/scope/")
+          self.assertEqual(sc, "tower4/scope/")
+
+      with variable_scope.variable_scope("tower5"):
+        with variable_scope.variable_scope("bar") as bar:
+          self.assertEqual(bar.name, "tower5/bar")
+          with ops.name_scope("scope") as sc:
+            self.assertEqual(sc, "tower5/bar/scope/")
+
+      with variable_scope.variable_scope("tower6"):
+        with variable_scope.variable_scope(tower, reuse=True) as tower_shared:
+          self.assertEqual(tower_shared.name, "tower4")
+          with ops.name_scope("scope") as sc:
+            self.assertEqual(sc, "tower6/tower4/scope/")
+
+    wrap_and_execute(_f)
 
   @test_util.run_in_graph_and_eager_modes
   def testVarScopeNameScope(self):
-    with ops.name_scope("testVarScopeNameScope1"):
-      with variable_scope.variable_scope("tower") as tower:
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "testVarScopeNameScope1/tower/scope2/")
-      if not context.executing_eagerly():
-        with variable_scope.variable_scope(
-            tower):  # Re-entering acts like another "tower".
+
+    def _f():
+      with ops.name_scope("testVarScopeNameScope1"):
+        with variable_scope.variable_scope("tower") as tower:
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "testVarScopeNameScope1/tower_1/scope2/")
-        with variable_scope.variable_scope(
-            "tower"):  # Re-entering by string acts the same.
+            self.assertEqual(sc2, "testVarScopeNameScope1/tower/scope2/")
+        if not context.executing_eagerly():
+          with variable_scope.variable_scope(
+              tower):  # Re-entering acts like another "tower".
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "testVarScopeNameScope1/tower_1/scope2/")
+          with variable_scope.variable_scope(
+              "tower"):  # Re-entering by string acts the same.
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "testVarScopeNameScope1/tower_2/scope2/")
+
+      with ops.name_scope("testVarScopeNameScope2"):
+        with variable_scope.variable_scope("tower"):
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "testVarScopeNameScope1/tower_2/scope2/")
+            self.assertEqual(sc2, "testVarScopeNameScope2/tower/scope2/")
+        if not context.executing_eagerly():
+          with variable_scope.variable_scope(tower):
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "testVarScopeNameScope2/tower_1/scope2/")
 
-    with ops.name_scope("testVarScopeNameScope2"):
-      with variable_scope.variable_scope("tower"):
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "testVarScopeNameScope2/tower/scope2/")
-      if not context.executing_eagerly():
-        with variable_scope.variable_scope(tower):
+      root_var_scope = variable_scope.get_variable_scope()
+      with ops.name_scope("testVarScopeNameScope3"):
+        with variable_scope.variable_scope(root_var_scope):
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "testVarScopeNameScope2/tower_1/scope2/")
+            self.assertEqual(sc2, "testVarScopeNameScope3/scope2/")
 
-    root_var_scope = variable_scope.get_variable_scope()
-    with ops.name_scope("testVarScopeNameScope3"):
-      with variable_scope.variable_scope(root_var_scope):
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "testVarScopeNameScope3/scope2/")
+    wrap_and_execute(_f)
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarScopeOriginalNameScope(self):
-    with self.cached_session():
-      with ops.name_scope("scope1"):
-        with variable_scope.variable_scope("tower") as tower:
-          self.assertEqual(tower.original_name_scope, "scope1/tower/")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower/scope2/")
-      with ops.name_scope("scope2"):
-        with variable_scope.variable_scope(tower) as tower1:
-          # Re-entering preserves original name scope.
-          self.assertEqual(tower1.original_name_scope, "scope1/tower/")
-          with ops.name_scope("foo") as sc2:
-            self.assertEqual(sc2, "scope2/tower/foo/")
-        # Test re-entering original name scope.
-        with ops.name_scope(tower.original_name_scope):
-          with ops.name_scope("bar") as sc3:
-            self.assertEqual(sc3, "scope1/tower/bar/")
-      with ops.name_scope("scope2"):
-        with variable_scope.variable_scope(tower):
+
+    def _f():
+      with self.cached_session():
+        with ops.name_scope("scope1"):
+          with variable_scope.variable_scope("tower") as tower:
+            self.assertEqual(tower.original_name_scope, "scope1/tower/")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "scope1/tower/scope2/")
+        with ops.name_scope("scope2"):
+          with variable_scope.variable_scope(tower) as tower1:
+            # Re-entering preserves original name scope.
+            self.assertEqual(tower1.original_name_scope, "scope1/tower/")
+            with ops.name_scope("foo") as sc2:
+              self.assertEqual(sc2, "scope2/tower/foo/")
+          # Test re-entering original name scope.
           with ops.name_scope(tower.original_name_scope):
             with ops.name_scope("bar") as sc3:
-              self.assertEqual(sc3, "scope1/tower/bar_1/")
+              self.assertEqual(sc3, "scope1/tower/bar/")
+        with ops.name_scope("scope2"):
+          with variable_scope.variable_scope(tower):
+            with ops.name_scope(tower.original_name_scope):
+              with ops.name_scope("bar") as sc3:
+                self.assertEqual(sc3, "scope1/tower/bar_1/")
 
+    # TODO(mihaimaruseac): calling _f fails with
+    # AssertionError: 'scope1_1/tower/' != 'scope1/tower/'
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testVarScopeObjectReuse(self):
-    with self.cached_session():
-      vs = None
-      with variable_scope.variable_scope("jump", reuse=True) as scope:
-        vs = scope
 
-      with variable_scope.variable_scope(vs) as jump:
-        self.assertTrue(jump.reuse)
+    def _f():
+      with self.cached_session():
+        vs = None
+        with variable_scope.variable_scope("jump", reuse=True) as scope:
+          vs = scope
 
-      with variable_scope.variable_scope(vs, reuse=True) as jump_reuse:
-        self.assertTrue(jump_reuse.reuse)
+        with variable_scope.variable_scope(vs) as jump:
+          self.assertTrue(jump.reuse)
 
-      with variable_scope.variable_scope(vs, reuse=False) as jump_no_reuse:
-        self.assertTrue(jump_no_reuse.reuse)  # Inherited, cannot be undone.
+        with variable_scope.variable_scope(vs, reuse=True) as jump_reuse:
+          self.assertTrue(jump_reuse.reuse)
 
-      with variable_scope.variable_scope("jump", reuse=False) as scope:
-        vs = scope
+        with variable_scope.variable_scope(vs, reuse=False) as jump_no_reuse:
+          self.assertTrue(jump_no_reuse.reuse)  # Inherited, cannot be undone.
 
-      with variable_scope.variable_scope(vs) as jump:
-        self.assertFalse(jump.reuse)
+        with variable_scope.variable_scope("jump", reuse=False) as scope:
+          vs = scope
 
-      with variable_scope.variable_scope(vs, reuse=True) as jump_reuse:
-        self.assertTrue(jump_reuse.reuse)
+        with variable_scope.variable_scope(vs) as jump:
+          self.assertFalse(jump.reuse)
 
-      with variable_scope.variable_scope(vs, reuse=False) as jump_no_reuse:
-        self.assertFalse(jump_no_reuse.reuse)
+        with variable_scope.variable_scope(vs, reuse=True) as jump_reuse:
+          self.assertTrue(jump_reuse.reuse)
 
+        with variable_scope.variable_scope(vs, reuse=False) as jump_no_reuse:
+          self.assertFalse(jump_no_reuse.reuse)
+
+    wrap_and_execute(_f)
+
+  @test_util.run_in_graph_and_eager_modes
   def testVarScopeGetOrCreateReuse(self):
-    with self.cached_session():
 
-      def test_value(value):
-        x = constant_op.constant(value)
-        with variable_scope.variable_scope(
-            "testVarScopeGetOrCreateReuse_bar",
-            reuse=variable_scope.AUTO_REUSE):
-          _ = state_ops.assign(variable_scope.get_variable("var", []), x)
-        with variable_scope.variable_scope(
-            "testVarScopeGetOrCreateReuse_bar",
-            reuse=variable_scope.AUTO_REUSE):
-          _ = variable_scope.get_variable("var", [])
-        self.assertEqual(value, x.eval())
+    def _f():
+      with self.cached_session():
+
+        def test_value(value):
+          x = constant_op.constant(value)
+          with variable_scope.variable_scope(
+              "testVarScopeGetOrCreateReuse_bar",
+              reuse=variable_scope.AUTO_REUSE):
+            _ = state_ops.assign(variable_scope.get_variable("var", []), x)
+          with variable_scope.variable_scope(
+              "testVarScopeGetOrCreateReuse_bar",
+              reuse=variable_scope.AUTO_REUSE):
+            _ = variable_scope.get_variable("var", [])
+          self.assertEqual(value, x.eval())
+
+        test_value(42.)  # Variable is created.
+        test_value(13.)  # Variable is reused hereafter.
+        test_value(17.)
 
-      test_value(42.)  # Variable is created.
-      test_value(13.)  # Variable is reused hereafter.
-      test_value(17.)
+    wrap_and_execute(_f)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # AttributeError: Tensor.op is meaningless when eager execution is enabled.
   def testVarOpScope(self):
     with self.cached_session():
       with ops.name_scope("testVarOpScope1"):
@@ -607,71 +703,96 @@ class VariableScopeTest(test.TestCase):
           with ops.name_scope("testVarOpScope2") as sc2:
             self.assertEqual(sc2, "testVarOpScope2/default_1/testVarOpScope2/")
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self):
-    with self.cached_session():
-      with variable_scope.variable_scope(None, "defaultScope1"):
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name,
-              "defaultScope1/layer/w:0")
-      with variable_scope.variable_scope(None, "defaultScope1"):
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name,
-              "defaultScope1_1/layer/w:0")
-      with variable_scope.variable_scope(None, "defaultScope"):
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name,
-              "defaultScope/layer/w:0")
-      with variable_scope.variable_scope(None, "defaultScope1"):
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name,
-              "defaultScope1_2/layer/w:0")
 
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope(None, "defaultScope1"):
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "defaultScope1/layer/w:0")
+        with variable_scope.variable_scope(None, "defaultScope1"):
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "defaultScope1_1/layer/w:0")
+        with variable_scope.variable_scope(None, "defaultScope"):
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "defaultScope/layer/w:0")
+        with variable_scope.variable_scope(None, "defaultScope1"):
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "defaultScope1_2/layer/w:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # AssertionError: 'defaultScope1_3/layer/w:0' != 'defaultScope1/layer/w:0'
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeUniqueNamesWithJump(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("default") as default:
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "default/layer/w:0")
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "default/layer_1/w:0")
-        with variable_scope.variable_scope(default):
-          pass
-        # No matter the jump in the middle, unique numbering continues.
-        with variable_scope.variable_scope(None, "layer"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "default/layer_2/w:0")
 
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("default") as default:
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "default/layer/w:0")
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "default/layer_1/w:0")
+          with variable_scope.variable_scope(default):
+            pass
+          # No matter the jump in the middle, unique numbering continues.
+          with variable_scope.variable_scope(None, "layer"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name,
+                "default/layer_2/w:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable default/layer/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeReuse(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
-        with variable_scope.variable_scope("tower", "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/tower/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/default/scope2/")
 
-      with variable_scope.variable_scope(outer, reuse=True) as outer:
-        with variable_scope.variable_scope("tower", "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/tower/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/default/scope2/")
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          with variable_scope.variable_scope("tower", "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/tower/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/tower/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/default/scope2/")
+
+        with variable_scope.variable_scope(outer, reuse=True) as outer:
+          with variable_scope.variable_scope("tower", "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/tower/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/tower/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/default/scope2/")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/tower/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # AttributeError: Tensor.op is meaningless when eager execution is enabled.
   def testVarScopeGetVar(self):
     with self.cached_session():
       with variable_scope.variable_scope("root"):
@@ -718,82 +839,108 @@ class VariableScopeTest(test.TestCase):
             variable_scope.get_variable("v", [1], dtype=dtypes.int32)
         self.assertEqual("dtype" in str(exc.exception), True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarScopeOuterScope(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
-        pass
-      with variable_scope.variable_scope(outer):
-        self.assertEqual(variable_scope.get_variable("w", []).name, "outer/w:0")
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "outer_1/scope2/")
-        with variable_scope.variable_scope("default"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/default/scope2/")
 
-      with variable_scope.variable_scope(outer, reuse=True):
-        self.assertEqual(variable_scope.get_variable("w", []).name, "outer/w:0")
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "outer_2/scope2/")
-        with variable_scope.variable_scope("default", reuse=True):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_2/default/scope2/")
-
-  def testVarScopeNestedOuterScope(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          pass
         with variable_scope.variable_scope(outer):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "outer/w:0")
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/outer/scope2/")
-        with variable_scope.variable_scope("default"):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/default/scope2/")
+            self.assertEqual(sc2, "outer_1/scope2/")
+          with variable_scope.variable_scope("default"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/default/scope2/")
 
         with variable_scope.variable_scope(outer, reuse=True):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "outer/w:0")
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/outer_1/scope2/")
-        with variable_scope.variable_scope("default", reuse=True):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/default_1/scope2/")
+            self.assertEqual(sc2, "outer_2/scope2/")
+          with variable_scope.variable_scope("default", reuse=True):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_2/default/scope2/")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testVarScopeNestedOuterScope(self):
+
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          with variable_scope.variable_scope(outer):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/outer/scope2/")
+          with variable_scope.variable_scope("default"):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/default/scope2/")
+
+          with variable_scope.variable_scope(outer, reuse=True):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/outer_1/scope2/")
+          with variable_scope.variable_scope("default", reuse=True):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/default_1/scope2/")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeReuseParam(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
-        with variable_scope.variable_scope("tower", "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/tower/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/default/scope2/")
 
-      with variable_scope.variable_scope(outer) as outer:
-        with variable_scope.variable_scope("tower", "default", reuse=True):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/tower/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/tower/scope2/")
-        outer.reuse_variables()
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/default/scope2/")
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          with variable_scope.variable_scope("tower", "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/tower/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/tower/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/default/scope2/")
+
+        with variable_scope.variable_scope(outer) as outer:
+          with variable_scope.variable_scope("tower", "default", reuse=True):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/tower/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/tower/scope2/")
+          outer.reuse_variables()
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/default/scope2/")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/tower/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # AttributeError: 'variable_scope' object has no attribute
+  # '_graph_context_manager'
   def testVarOpScopeReuseError(self):
     with self.cached_session():
       with self.assertRaises(ValueError):
@@ -801,179 +948,238 @@ class VariableScopeTest(test.TestCase):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "outer/tower/w:0")
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeOuterScope(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
-        pass
-      with variable_scope.variable_scope(outer, "default", []):
-        self.assertEqual(variable_scope.get_variable("w", []).name, "outer/w:0")
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "outer_1/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
+
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          pass
+        with variable_scope.variable_scope(outer, "default", []):
           self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
+              variable_scope.get_variable("w", []).name, "outer/w:0")
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/default/scope2/")
+            self.assertEqual(sc2, "outer_1/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/default/scope2/")
 
-      with variable_scope.variable_scope(outer, "default", reuse=True):
-        self.assertEqual(variable_scope.get_variable("w", []).name, "outer/w:0")
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "outer_2/scope2/")
-        outer.reuse_variables()
-        with variable_scope.variable_scope(None, "default", []):
+        with variable_scope.variable_scope(outer, "default", reuse=True):
           self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
+              variable_scope.get_variable("w", []).name, "outer/w:0")
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_2/default/scope2/")
+            self.assertEqual(sc2, "outer_2/scope2/")
+          outer.reuse_variables()
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_2/default/scope2/")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testVarOpScopeNestedOuterScope(self):
-    with self.cached_session():
-      with variable_scope.variable_scope("outer") as outer:
-        with variable_scope.variable_scope(outer, "default", []):
+
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer") as outer:
+          with variable_scope.variable_scope(outer, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/outer/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer/default/scope2/")
+
+        with variable_scope.variable_scope(outer, "default", reuse=True):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "outer/w:0")
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/outer/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer/default/scope2/")
+            self.assertEqual(sc2, "outer_1/scope2/")
+          with variable_scope.variable_scope(None, "default", []):
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            with ops.name_scope("scope2") as sc2:
+              self.assertEqual(sc2, "outer_1/default/scope2/")
 
-      with variable_scope.variable_scope(outer, "default", reuse=True):
-        self.assertEqual(variable_scope.get_variable("w", []).name, "outer/w:0")
-        with ops.name_scope("scope2") as sc2:
-          self.assertEqual(sc2, "outer_1/scope2/")
-        with variable_scope.variable_scope(None, "default", []):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "outer_1/default/scope2/")
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testBasicWhenAuxiliaryNameScopeIsFalse(self):
-    with self.cached_session():
-      with variable_scope.variable_scope(
-          "scope", auxiliary_name_scope=False) as scope:
-        self.assertEqual(scope.original_name_scope, "")
-        self.assertEqual(variable_scope.get_variable("w", []).name, "scope/w:0")
-        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
-      with variable_scope.variable_scope(scope, auxiliary_name_scope=False):
-        self.assertEqual(scope.original_name_scope, "")
-        self.assertEqual(
-            variable_scope.get_variable("w1", []).name, "scope/w1:0")
-        self.assertEqual(constant_op.constant([], name="c1").name, "c1:0")
-      # Recheck: new name scope is NOT created before
-      with ops.name_scope("scope"):
-        self.assertEqual(constant_op.constant([], name="c").name, "scope/c:0")
 
-      with variable_scope.variable_scope("outer"):
-        with variable_scope.variable_scope(
-            "inner", auxiliary_name_scope=False) as inner:
-          self.assertEqual(inner.original_name_scope, "outer/")
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/inner/w:0")
-          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+    def _f():
+      with self.cached_session():
         with variable_scope.variable_scope(
-            inner, auxiliary_name_scope=False) as inner1:
-          self.assertEqual(inner1.original_name_scope, "outer/")
+            "scope", auxiliary_name_scope=False) as scope:
+          self.assertEqual(scope.original_name_scope, "")
           self.assertEqual(
-              variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+              variable_scope.get_variable("w", []).name, "scope/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "c:0")
+        with variable_scope.variable_scope(scope, auxiliary_name_scope=False):
+          self.assertEqual(scope.original_name_scope, "")
           self.assertEqual(
-              constant_op.constant([], name="c1").name, "outer/c1:0")
+              variable_scope.get_variable("w1", []).name, "scope/w1:0")
+          self.assertEqual(constant_op.constant([], name="c1").name, "c1:0")
         # Recheck: new name scope is NOT created before
-        with ops.name_scope("inner"):
-          self.assertEqual(
-              constant_op.constant([], name="c").name, "outer/inner/c:0")
+        with ops.name_scope("scope"):
+          self.assertEqual(constant_op.constant([], name="c").name, "scope/c:0")
 
+        with variable_scope.variable_scope("outer"):
+          with variable_scope.variable_scope(
+              "inner", auxiliary_name_scope=False) as inner:
+            self.assertEqual(inner.original_name_scope, "outer/")
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/inner/w:0")
+            self.assertEqual(
+                constant_op.constant([], name="c").name, "outer/c:0")
+          with variable_scope.variable_scope(
+              inner, auxiliary_name_scope=False) as inner1:
+            self.assertEqual(inner1.original_name_scope, "outer/")
+            self.assertEqual(
+                variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+            self.assertEqual(
+                constant_op.constant([], name="c1").name, "outer/c1:0")
+          # Recheck: new name scope is NOT created before
+          with ops.name_scope("inner"):
+            self.assertEqual(
+                constant_op.constant([], name="c").name, "outer/inner/c:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable scope/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self):
-    with self.cached_session():
-      with variable_scope.variable_scope(
-          None, default_name="default", auxiliary_name_scope=False) as scope:
-        self.assertEqual(scope.original_name_scope, "")
-        self.assertEqual(
-            variable_scope.get_variable("w", []).name, "default/w:0")
-        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
-      # Recheck: new name scope is NOT created before
-      with ops.name_scope("default"):
-        self.assertEqual(constant_op.constant([], name="c").name, "default/c:0")
 
-      with variable_scope.variable_scope("outer"):
+    def _f():
+      with self.cached_session():
         with variable_scope.variable_scope(
-            None, default_name="default", auxiliary_name_scope=False) as inner:
-          self.assertEqual(inner.original_name_scope, "outer/")
+            None, default_name="default", auxiliary_name_scope=False) as scope:
+          self.assertEqual(scope.original_name_scope, "")
           self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/default/w:0")
-          self.assertEqual(constant_op.constant([], name="c").name, "outer/c:0")
+              variable_scope.get_variable("w", []).name, "default/w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "c:0")
         # Recheck: new name scope is NOT created before
         with ops.name_scope("default"):
           self.assertEqual(
-              constant_op.constant([], name="c").name, "outer/default/c:0")
+              constant_op.constant([], name="c").name, "default/c:0")
 
+        with variable_scope.variable_scope("outer"):
+          with variable_scope.variable_scope(
+              None, default_name="default",
+              auxiliary_name_scope=False) as inner:
+            self.assertEqual(inner.original_name_scope, "outer/")
+            self.assertEqual(
+                variable_scope.get_variable("w", []).name, "outer/default/w:0")
+            self.assertEqual(
+                constant_op.constant([], name="c").name, "outer/c:0")
+          # Recheck: new name scope is NOT created before
+          with ops.name_scope("default"):
+            self.assertEqual(
+                constant_op.constant([], name="c").name, "outer/default/c:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # AssertionError: 'default_1/w:0' != 'default/w:0'
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self):
-    with self.cached_session():
-      root_scope = variable_scope.get_variable_scope()
-      with variable_scope.variable_scope(
-          root_scope, auxiliary_name_scope=False) as scope:
-        self.assertEqual(scope.original_name_scope, "")
-        self.assertEqual(variable_scope.get_variable("w", []).name, "w:0")
-        self.assertEqual(constant_op.constant([], name="c").name, "c:0")
 
-      with variable_scope.variable_scope("outer"):
+    def _f():
+      with self.cached_session():
+        root_scope = variable_scope.get_variable_scope()
         with variable_scope.variable_scope(
-            root_scope, auxiliary_name_scope=False) as inner:
-          self.assertEqual(inner.original_name_scope, "")
-          self.assertEqual(variable_scope.get_variable("w1", []).name, "w1:0")
-          self.assertEqual(
-              constant_op.constant([], name="c1").name, "outer/c1:0")
+            root_scope, auxiliary_name_scope=False) as scope:
+          self.assertEqual(scope.original_name_scope, "")
+          self.assertEqual(variable_scope.get_variable("w", []).name, "w:0")
+          self.assertEqual(constant_op.constant([], name="c").name, "c:0")
 
+        with variable_scope.variable_scope("outer"):
+          with variable_scope.variable_scope(
+              root_scope, auxiliary_name_scope=False) as inner:
+            self.assertEqual(inner.original_name_scope, "")
+            self.assertEqual(variable_scope.get_variable("w1", []).name, "w1:0")
+            self.assertEqual(
+                constant_op.constant([], name="c1").name, "outer/c1:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testAuxiliaryNameScopeIsInvalid(self):
-    with self.cached_session():
-      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
-        with variable_scope.variable_scope(
-            None, default_name="scope", auxiliary_name_scope="invalid"):
-          pass
 
-      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
-        with variable_scope.variable_scope(
-            "scope", auxiliary_name_scope="invalid"):
-          pass
+    def _f():
+      with self.cached_session():
+        with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+          with variable_scope.variable_scope(
+              None, default_name="scope", auxiliary_name_scope="invalid"):
+            pass
 
-      with variable_scope.variable_scope("scope") as scope:
-        pass
-      with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
-        with variable_scope.variable_scope(
-            scope, auxiliary_name_scope="invalid"):
+        with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+          with variable_scope.variable_scope(
+              "scope", auxiliary_name_scope="invalid"):
+            pass
+
+        with variable_scope.variable_scope("scope") as scope:
           pass
+        with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"):
+          with variable_scope.variable_scope(
+              scope, auxiliary_name_scope="invalid"):
+            pass
 
+    wrap_and_execute(_f)
+
+  @test_util.run_in_graph_and_eager_modes
   def testReuseScopeWithoutNameScopeCollision(self):
     # Github issue: #13429
-    with self.cached_session():
-      with variable_scope.variable_scope("outer"):
-        with variable_scope.variable_scope("inner") as inner:
-          pass
-
-      with variable_scope.variable_scope(
-          inner, auxiliary_name_scope=False) as scope:
-        with ops.name_scope(scope.original_name_scope):
-          self.assertEqual(
-              variable_scope.get_variable("w", []).name, "outer/inner/w:0")
-          self.assertEqual(
-              constant_op.constant([], name="c").name, "outer/inner/c:0")
-        with ops.name_scope("inner"):
-          self.assertEqual(constant_op.constant([], name="c").name, "inner/c:0")
+    def _f():
+      with self.cached_session():
+        with variable_scope.variable_scope("outer"):
+          with variable_scope.variable_scope("inner") as inner:
+            pass
 
-      with variable_scope.variable_scope("another"):
         with variable_scope.variable_scope(
-            inner, auxiliary_name_scope=False) as scope1:
-          with ops.name_scope(scope1.original_name_scope):
+            inner, auxiliary_name_scope=False) as scope:
+          with ops.name_scope(scope.original_name_scope):
             self.assertEqual(
-                variable_scope.get_variable("w1", []).name, "outer/inner/w1:0")
+                variable_scope.get_variable("w", []).name, "outer/inner/w:0")
             self.assertEqual(
-                constant_op.constant([], name="c1").name, "outer/inner/c1:0")
+                constant_op.constant([], name="c").name, "outer/inner/c:0")
           with ops.name_scope("inner"):
             self.assertEqual(
-                constant_op.constant([], name="c").name, "another/inner/c:0")
+                constant_op.constant([], name="c").name, "inner/c:0")
 
+        with variable_scope.variable_scope("another"):
+          with variable_scope.variable_scope(
+              inner, auxiliary_name_scope=False) as scope1:
+            with ops.name_scope(scope1.original_name_scope):
+              self.assertEqual(
+                  variable_scope.get_variable("w1", []).name,
+                  "outer/inner/w1:0")
+              self.assertEqual(
+                  constant_op.constant([], name="c1").name, "outer/inner/c1:0")
+            with ops.name_scope("inner"):
+              self.assertEqual(
+                  constant_op.constant([], name="c").name, "another/inner/c:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable outer/inner/w already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
+  # (different assertions failing after wrapping, in both execution modes)
   @test_util.run_in_graph_and_eager_modes
   def testGetLocalVar(self):
     # Check that local variable respects naming.
@@ -996,30 +1202,42 @@ class VariableScopeTest(test.TestCase):
         self.assertEqual(
             variable_scope.get_local_variable("w", []).name, "outer/w:0")
 
+  @test_util.run_in_graph_and_eager_modes
   def testSignatureGetVarVsGetLocalVar(self):
     """get_{local,}variable() must take the same list of args."""
-    arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
-    local_arg_names = tf_inspect.getargspec(
-        variable_scope.get_local_variable)[0]
-    self.assertEqual(arg_names, local_arg_names)
 
+    def _f():
+      arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0]
+      local_arg_names = tf_inspect.getargspec(
+          variable_scope.get_local_variable)[0]
+      self.assertEqual(arg_names, local_arg_names)
+
+    wrap_and_execute(_f)
+
+  @test_util.run_in_graph_and_eager_modes
   def testGetVarWithDevice(self):
-    g = ops.Graph()
-    varname_type = []
 
-    def device_func(op):
-      if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
-        varname_type.append((op.name, op.get_attr("dtype")))
-      return "/device:GPU:0"
+    def _f():
+      g = ops.Graph()
+      varname_type = []
 
-    with g.as_default():
-      with ops.device(device_func):
-        _ = variable_scope.get_variable("x", (100, 200))
-        _ = variable_scope.get_variable(
-            "y", dtype=dtypes.int64, initializer=numpy.arange(73))
-    self.assertEqual(varname_type[0], ("x", dtypes.float32))
-    self.assertEqual(varname_type[1], ("y", dtypes.int64))
+      def device_func(op):
+        if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
+          varname_type.append((op.name, op.get_attr("dtype")))
+        return "/device:GPU:0"
 
+      with g.as_default():
+        with ops.device(device_func):
+          _ = variable_scope.get_variable("x", (100, 200))
+          _ = variable_scope.get_variable(
+              "y", dtype=dtypes.int64, initializer=numpy.arange(73))
+      self.assertEqual(varname_type[0], ("x", dtypes.float32))
+      self.assertEqual(varname_type[1], ("y", dtypes.int64))
+
+    wrap_and_execute(_f)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testGetCollection(self):
     with self.cached_session():
       _ = variable_scope.get_variable("testGetCollection_a", [])
@@ -1074,6 +1292,8 @@ class VariableScopeTest(test.TestCase):
           "testGetCollection_foo/testGetCollection_a:0"
       ])
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testGetTrainableVariablesWithGetVariable(self):
     with self.cached_session():
       _ = variable_scope.get_variable("testGetTrainableVariables_a", [])
@@ -1110,6 +1330,8 @@ class VariableScopeTest(test.TestCase):
             synchronization=variable_scope.VariableSynchronization.ON_READ,
             trainable=True)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testGetTrainableVariablesWithVariable(self):
     with self.cached_session():
       _ = variable_scope.variable(1.0, name="testGetTrainableVariables_a")
@@ -1149,6 +1371,8 @@ class VariableScopeTest(test.TestCase):
             synchronization=variable_scope.VariableSynchronization.ON_READ,
             trainable=True)
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testGetGlobalVariables(self):
     with self.cached_session():
       _ = variable_scope.get_variable("testGetGlobalVariables_a", [])
@@ -1159,6 +1383,8 @@ class VariableScopeTest(test.TestCase):
             ["testGetGlobalVariables_foo/"
              "testGetGlobalVariables_b:0"])
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testGetLocalVariables(self):
     with self.cached_session():
       _ = variable_scope.get_variable(
@@ -1169,22 +1395,34 @@ class VariableScopeTest(test.TestCase):
         _ = variable_scope.get_variable("c", [])
         self.assertEqual([v.name for v in scope.local_variables()], ["foo/b:0"])
 
+  @test_util.run_in_graph_and_eager_modes
   def testGetVariableWithRefDtype(self):
-    v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32)
-    # Ensure it is possible to do get_variable with a _ref dtype passed in.
-    _ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype)
 
+    def _f():
+      v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32)
+      # Ensure it is possible to do get_variable with a _ref dtype passed in.
+      _ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype)
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable v already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testTwoGraphs(self):
 
-    def f():
-      g1 = ops.Graph()
-      g2 = ops.Graph()
-      with g1.as_default():
-        with g2.as_default():
-          with variable_scope.variable_scope("_"):
-            pass
+    def _f():
+
+      def f():
+        g1 = ops.Graph()
+        g2 = ops.Graph()
+        with g1.as_default():
+          with g2.as_default():
+            with variable_scope.variable_scope("_"):
+              pass
+
+      self.assertRaisesRegexp(ValueError, "'_' is not a valid scope name", f)
 
-    self.assertRaisesRegexp(ValueError, "'_' is not a valid scope name", f)
+    wrap_and_execute(_f)
 
 
 def axis0_into1_partitioner(shape=None, **unused_kwargs):
@@ -1206,6 +1444,8 @@ def axis0_into3_partitioner(shape=None, **unused_kwargs):
 
 class VariableScopeWithPartitioningTest(test.TestCase):
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testResultNameMatchesRequested(self):
     with variable_scope.variable_scope(
         "scope0", partitioner=axis0_into2_partitioner):
@@ -1218,50 +1458,78 @@ class VariableScopeWithPartitioningTest(test.TestCase):
       self.assertIn("scope0/name0/part_1:0", [x.name for x in variables])
       self.assertNotIn("scope0/name0/part_2:0", [x.name for x in variables])
 
+  @test_util.run_in_graph_and_eager_modes
   def testBreaksIfPartitioningChanges(self):
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into2_partitioner):
-      variable_scope.get_variable("name0", shape=(3, 1, 1))
 
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into3_partitioner, reuse=True):
-      with self.assertRaisesRegexp(
-          ValueError,
-          "Trying to reuse partitioned variable .* but specified partitions .* "
-          "and found partitions .*"):
+    def _f():
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into2_partitioner):
         variable_scope.get_variable("name0", shape=(3, 1, 1))
 
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into1_partitioner, reuse=True):
-      with self.assertRaisesRegexp(
-          ValueError,
-          "Trying to reuse partitioned variable .* but specified partitions .* "
-          "and found partitions .*"):
-        variable_scope.get_variable("name0", shape=(3, 1, 1))
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into3_partitioner, reuse=True):
+        with self.assertRaisesRegexp(
+            ValueError,
+            "Trying to reuse partitioned variable .* but specified partitions "
+            ".* and found partitions .*"):
+          variable_scope.get_variable("name0", shape=(3, 1, 1))
+
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into1_partitioner, reuse=True):
+        with self.assertRaisesRegexp(
+            ValueError,
+            "Trying to reuse partitioned variable .* but specified partitions "
+            ".* and found partitions .*"):
+          variable_scope.get_variable("name0", shape=(3, 1, 1))
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Partitioned variable with name scope0/name0 already exists.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testReturnsExistingConcatenatedValueIfReuse(self):
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into2_partitioner):
-      v_concat = variable_scope.get_variable("name0", shape=(3, 1, 1))
-      variable_scope.get_variable_scope().reuse_variables()
-      v_concat_2 = variable_scope.get_variable("name0", shape=(3, 1, 1))
-      self.assertEqual(v_concat, v_concat_2)
 
+    def _f():
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into2_partitioner):
+        v_concat = variable_scope.get_variable("name0", shape=(3, 1, 1))
+        variable_scope.get_variable_scope().reuse_variables()
+        v_concat_2 = variable_scope.get_variable("name0", shape=(3, 1, 1))
+        self.assertEqual(v_concat, v_concat_2)
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Partitioned variable with name scope0/name0 already exists.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testAllowsReuseWithoutPartitioner(self):
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into2_partitioner):
-      v = variable_scope.get_variable("name0", shape=(3, 1, 1))
-    with variable_scope.variable_scope("scope0", reuse=True):
-      v_reused = variable_scope.get_variable("name0")
-    self.assertEqual(v, v_reused)
 
+    def _f():
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into2_partitioner):
+        v = variable_scope.get_variable("name0", shape=(3, 1, 1))
+      with variable_scope.variable_scope("scope0", reuse=True):
+        v_reused = variable_scope.get_variable("name0")
+      self.assertEqual(v, v_reused)
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Partitioned variable with name scope0/name0 already exists.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testPropagatePartitionerOnReopening(self):
-    with variable_scope.variable_scope(
-        "scope0", partitioner=axis0_into2_partitioner) as vs:
-      self.assertEqual(axis0_into2_partitioner, vs.partitioner)
-      with variable_scope.variable_scope(vs) as vs1:
-        self.assertEqual(axis0_into2_partitioner, vs1.partitioner)
 
+    def _f():
+      with variable_scope.variable_scope(
+          "scope0", partitioner=axis0_into2_partitioner) as vs:
+        self.assertEqual(axis0_into2_partitioner, vs.partitioner)
+        with variable_scope.variable_scope(vs) as vs1:
+          self.assertEqual(axis0_into2_partitioner, vs1.partitioner)
+
+    wrap_and_execute(_f)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # obtaining different results in the eager case compared to the graph one
   def testScalarIgnoresPartitioner(self):
     with variable_scope.variable_scope(
         "scope0", partitioner=axis0_into2_partitioner):
@@ -1272,94 +1540,124 @@ class VariableScopeWithPartitioningTest(test.TestCase):
 
   def _testPartitionConcatenatesAlongCorrectAxis(self, use_resource):
 
-    def _part_axis_0(**unused_kwargs):
-      return (2, 1, 1)
+    def _f():
 
-    def _part_axis_1(**unused_kwargs):
-      return (1, 2, 1)
+      def _part_axis_0(**unused_kwargs):
+        return (2, 1, 1)
 
-    with variable_scope.variable_scope("root", use_resource=use_resource):
-      v0 = variable_scope.get_variable(
-          "n0", shape=(2, 2, 2), partitioner=_part_axis_0)
-      v1 = variable_scope.get_variable(
-          "n1", shape=(2, 2, 2), partitioner=_part_axis_1)
+      def _part_axis_1(**unused_kwargs):
+        return (1, 2, 1)
 
-    self.assertEqual(v0.get_shape(), (2, 2, 2))
-    self.assertEqual(v1.get_shape(), (2, 2, 2))
+      with variable_scope.variable_scope("root", use_resource=use_resource):
+        v0 = variable_scope.get_variable(
+            "n0", shape=(2, 2, 2), partitioner=_part_axis_0)
+        v1 = variable_scope.get_variable(
+            "n1", shape=(2, 2, 2), partitioner=_part_axis_1)
+
+      self.assertEqual(v0.get_shape(), (2, 2, 2))
+      self.assertEqual(v1.get_shape(), (2, 2, 2))
+
+      n0_0 = list(v0)[0]
+      n0_1 = list(v0)[1]
+      self.assertEqual(n0_0.get_shape(), (1, 2, 2))
+      self.assertEqual(n0_1.get_shape(), (1, 2, 2))
 
-    n0_0 = list(v0)[0]
-    n0_1 = list(v0)[1]
-    self.assertEqual(n0_0.get_shape(), (1, 2, 2))
-    self.assertEqual(n0_1.get_shape(), (1, 2, 2))
+      n1_0 = list(v1)[0]
+      n1_1 = list(v1)[1]
+      self.assertEqual(n1_0.get_shape(), (2, 1, 2))
+      self.assertEqual(n1_1.get_shape(), (2, 1, 2))
 
-    n1_0 = list(v1)[0]
-    n1_1 = list(v1)[1]
-    self.assertEqual(n1_0.get_shape(), (2, 1, 2))
-    self.assertEqual(n1_1.get_shape(), (2, 1, 2))
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Partitioned variable with name root/n0 already exists.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testPartitionConcatenatesAlongCorrectAxis(self):
     self._testPartitionConcatenatesAlongCorrectAxis(use_resource=False)
 
+  @test_util.run_in_graph_and_eager_modes
   def testPartitionConcatenatesAlongCorrectAxisResource(self):
     self._testPartitionConcatenatesAlongCorrectAxis(use_resource=True)
 
 
 class VariableScopeWithCustomGetterTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes
   def testNonCallableGetterFails(self):
-    with self.assertRaisesRegexp(ValueError, r"custom_getter .* not callable:"):
-      with variable_scope.variable_scope("scope0", custom_getter=3):
-        variable_scope.get_variable("name0")
-    with self.assertRaisesRegexp(ValueError, r"custom_getter .* not callable:"):
-      variable_scope.get_variable("name0", custom_getter=3)
 
+    def _f():
+      with self.assertRaisesRegexp(ValueError,
+                                   r"custom_getter .* not callable:"):
+        with variable_scope.variable_scope("scope0", custom_getter=3):
+          variable_scope.get_variable("name0")
+      with self.assertRaisesRegexp(ValueError,
+                                   r"custom_getter .* not callable:"):
+        variable_scope.get_variable("name0", custom_getter=3)
+
+    wrap_and_execute(_f)
+
+  @test_util.run_in_graph_and_eager_modes
   def testNoSideEffectsWithIdentityCustomGetter(self):
-    called = [0]
 
-    def custom_getter(getter, *args, **kwargs):
-      called[0] += 1
-      return getter(*args, **kwargs)
+    def _f():
+      called = [0]
 
-    with variable_scope.variable_scope(
-        "scope", custom_getter=custom_getter) as scope:
-      v = variable_scope.get_variable("v", [1])
-    with variable_scope.variable_scope(scope, reuse=True):
-      v2 = variable_scope.get_variable("v", [1])
-    with variable_scope.variable_scope("new_scope") as new_scope:
-      v3 = variable_scope.get_variable("v3", [1])
-    with variable_scope.variable_scope(
-        new_scope, reuse=True, custom_getter=custom_getter):
-      v4 = variable_scope.get_variable("v3", [1])
+      def custom_getter(getter, *args, **kwargs):
+        called[0] += 1
+        return getter(*args, **kwargs)
+
+      with variable_scope.variable_scope(
+          "scope", custom_getter=custom_getter) as scope:
+        v = variable_scope.get_variable("v", [1])
+      with variable_scope.variable_scope(scope, reuse=True):
+        v2 = variable_scope.get_variable("v", [1])
+      with variable_scope.variable_scope("new_scope") as new_scope:
+        v3 = variable_scope.get_variable("v3", [1])
+      with variable_scope.variable_scope(
+          new_scope, reuse=True, custom_getter=custom_getter):
+        v4 = variable_scope.get_variable("v3", [1])
+
+      self.assertEqual(v, v2)
+      self.assertEqual(v3, v4)
+      self.assertEqual(3, called[0])  # skipped one in the first new_scope
 
-    self.assertEqual(v, v2)
-    self.assertEqual(v3, v4)
-    self.assertEqual(3, called[0])  # skipped one in the first new_scope
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable scope/v already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
+  @test_util.run_in_graph_and_eager_modes
   def testSynchronizationAndAggregationWithCustomGetter(self):
-    called = [0]
-    synchronization = variable_scope.VariableSynchronization.AUTO
-    aggregation = variable_scope.VariableAggregation.NONE
 
-    def custom_getter(getter, *args, **kwargs):
-      called[0] += 1
+    def _f():
+      called = [0]
+      synchronization = variable_scope.VariableSynchronization.AUTO
+      aggregation = variable_scope.VariableAggregation.NONE
 
-      # Verify synchronization and aggregation kwargs are as expected.
-      self.assertEqual(kwargs["synchronization"], synchronization)
-      self.assertEqual(kwargs["aggregation"], aggregation)
-      return getter(*args, **kwargs)
+      def custom_getter(getter, *args, **kwargs):
+        called[0] += 1
 
-    with variable_scope.variable_scope("scope", custom_getter=custom_getter):
-      variable_scope.get_variable("v", [1])
-    self.assertEqual(1, called[0])
+        # Verify synchronization and aggregation kwargs are as expected.
+        self.assertEqual(kwargs["synchronization"], synchronization)
+        self.assertEqual(kwargs["aggregation"], aggregation)
+        return getter(*args, **kwargs)
 
-    with variable_scope.variable_scope("scope", custom_getter=custom_getter):
-      synchronization = variable_scope.VariableSynchronization.ON_READ
-      aggregation = variable_scope.VariableAggregation.MEAN
-      variable_scope.get_variable(
-          "v1", [1], synchronization=synchronization, aggregation=aggregation)
+      with variable_scope.variable_scope("scope", custom_getter=custom_getter):
+        variable_scope.get_variable("v", [1])
+      self.assertEqual(1, called[0])
+
+      with variable_scope.variable_scope("scope", custom_getter=custom_getter):
+        synchronization = variable_scope.VariableSynchronization.ON_READ
+        aggregation = variable_scope.VariableAggregation.MEAN
+        variable_scope.get_variable(
+            "v1", [1], synchronization=synchronization, aggregation=aggregation)
 
-    self.assertEqual(2, called[0])
+      self.assertEqual(2, called[0])
 
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable scope/v already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  @test_util.run_in_graph_and_eager_modes
   def testCustomGetterWithReuse(self):
     # Custom getter can choose to behave differently on reused variables.
     def custom_getter(getter, *args, **kwargs):
@@ -1370,15 +1668,25 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
       else:
         return array_ops.identity(var, name="not_reused")
 
-    with variable_scope.variable_scope(
-        "scope", custom_getter=custom_getter) as scope:
-      v = variable_scope.get_variable("v", [1])
-    with variable_scope.variable_scope(scope, reuse=True):
-      v2 = variable_scope.get_variable("v", [1])
-
-    self.assertEqual(v.name, "not_reused:0")
-    self.assertEqual(v2.name, "reused:0")
-
+    def _f():
+      with variable_scope.variable_scope(
+          "scope", custom_getter=custom_getter) as scope:
+        v = variable_scope.get_variable("v", [1])
+      with variable_scope.variable_scope(scope, reuse=True):
+        v2 = variable_scope.get_variable("v", [1])
+
+      self.assertEqual(v.name, "not_reused:0")
+      self.assertEqual(v2.name, "reused:0")
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable scope/v already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
+
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Fetch argument <tf.Tensor 'custom_getter/add:0' shape=(1, 2, 3)
+  # dtype=float32> cannot be interpreted as a Tensor. (Tensor
+  # Tensor("custom_getter/add:0", shape=(1, 2, 3), dtype=float32) is not an
+  # element of this graph.)
   def testGetterThatCreatesTwoVariablesAndSumsThem(self):
 
     def custom_getter(getter, name, *args, **kwargs):
@@ -1401,6 +1709,11 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
       np_vars, np_v = sess.run([true_vars, v])
       self.assertAllClose(np_v, sum(np_vars))
 
+  # TODO(mihaimaruseac): Not converted to use wrap_function because of
+  # ValueError: Fetch argument <tf.Tensor 'sum_getter_2/add:0' shape=(1, 2, 3)
+  # dtype=float32> cannot be interpreted as a Tensor. (Tensor
+  # Tensor("sum_getter_2/add:0", shape=(1, 2, 3), dtype=float32) is not an
+  # element of this graph.)
   def testNestedCustomGetters(self):
 
     def sum_getter(getter, name, *args, **kwargs):
@@ -1444,101 +1757,125 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
           np_v, (((np_vars[0] * np_vars[1]) + (np_vars[2] * np_vars[3])) + (
               (np_vars[4] * np_vars[5]) + (np_vars[6] * np_vars[7]))))
 
+  @test_util.run_in_graph_and_eager_modes
   def testVariableCreator(self):
 
-    variable_names = []
+    def _f():
+
+      variable_names = []
 
-    def creator_a(next_creator, **kwargs):
-      variable_names.append(kwargs.get("name", ""))
-      return next_creator(**kwargs)
+      def creator_a(next_creator, **kwargs):
+        variable_names.append(kwargs.get("name", ""))
+        return next_creator(**kwargs)
 
-    def creator_b(next_creator, **kwargs):
-      kwargs["name"] = "forced_name"
-      return next_creator(**kwargs)
+      def creator_b(next_creator, **kwargs):
+        kwargs["name"] = "forced_name"
+        return next_creator(**kwargs)
 
-    with variable_scope.variable_creator_scope(creator_a):
-      with variable_scope.variable_creator_scope(creator_b):
-        variable_scope.variable(1.0, name="one_name")
+      with variable_scope.variable_creator_scope(creator_a):
+        with variable_scope.variable_creator_scope(creator_b):
+          variable_scope.variable(1.0, name="one_name")
 
-    self.assertAllEqual(variable_names, ["forced_name"])
+      self.assertAllEqual(variable_names, ["forced_name"])
 
-    called = [False]
+      called = [False]
 
-    def creater_c(next_creator, **kwargs):
-      called[0] = True
-      self.assertEqual(kwargs["synchronization"],
-                       variable_scope.VariableSynchronization.ON_WRITE)
-      self.assertEqual(kwargs["aggregation"],
-                       variable_scope.VariableAggregation.MEAN)
-      return next_creator(**kwargs)
+      def creater_c(next_creator, **kwargs):
+        called[0] = True
+        self.assertEqual(kwargs["synchronization"],
+                         variable_scope.VariableSynchronization.ON_WRITE)
+        self.assertEqual(kwargs["aggregation"],
+                         variable_scope.VariableAggregation.MEAN)
+        return next_creator(**kwargs)
 
-    with variable_scope.variable_creator_scope(creater_c):
-      variable_scope.get_variable(
-          "v", [],
-          synchronization=variable_scope.VariableSynchronization.ON_WRITE,
-          aggregation=variable_scope.VariableAggregation.MEAN)
-    self.assertTrue(called[0])
+      with variable_scope.variable_creator_scope(creater_c):
+        variable_scope.get_variable(
+            "v", [],
+            synchronization=variable_scope.VariableSynchronization.ON_WRITE,
+            aggregation=variable_scope.VariableAggregation.MEAN)
+      self.assertTrue(called[0])
+
+    # TODO(mihaimaruseac): calling _f fails with
+    # ValueError: Variable v already exists, disallowed.
+    wrap_and_execute(_f, skip_graph=True)
 
 
 class PartitionInfoTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes
   def testConstructorChecks(self):
-    # Invalid arg types.
-    with self.assertRaises(TypeError):
-      variable_scope._PartitionInfo(full_shape=None, var_offset=[0, 1])
-    with self.assertRaises(TypeError):
-      variable_scope._PartitionInfo(full_shape=[0, 1], var_offset=None)
-    with self.assertRaises(TypeError):
-      variable_scope._PartitionInfo(full_shape="foo", var_offset=[0, 1])
-    with self.assertRaises(TypeError):
-      variable_scope._PartitionInfo(full_shape=[0, 1], var_offset="foo")
-
-    # full_shape and var_offset must have same length.
-    with self.assertRaises(ValueError):
-      variable_scope._PartitionInfo(full_shape=[0, 1], var_offset=[0])
-    # Offset must always be less than shape.
-    with self.assertRaises(ValueError):
-      variable_scope._PartitionInfo(full_shape=[1, 1], var_offset=[0, 1])
 
+    def _f():
+      # Invalid arg types.
+      with self.assertRaises(TypeError):
+        variable_scope._PartitionInfo(full_shape=None, var_offset=[0, 1])
+      with self.assertRaises(TypeError):
+        variable_scope._PartitionInfo(full_shape=[0, 1], var_offset=None)
+      with self.assertRaises(TypeError):
+        variable_scope._PartitionInfo(full_shape="foo", var_offset=[0, 1])
+      with self.assertRaises(TypeError):
+        variable_scope._PartitionInfo(full_shape=[0, 1], var_offset="foo")
+
+      # full_shape and var_offset must have same length.
+      with self.assertRaises(ValueError):
+        variable_scope._PartitionInfo(full_shape=[0, 1], var_offset=[0])
+      # Offset must always be less than shape.
+      with self.assertRaises(ValueError):
+        variable_scope._PartitionInfo(full_shape=[1, 1], var_offset=[0, 1])
+
+    wrap_and_execute(_f)
+
+  @test_util.run_in_graph_and_eager_modes
   def testSingleOffset(self):
-    partition_info = variable_scope._PartitionInfo(
-        full_shape=[9, 3], var_offset=[4, 0])
-    self.assertEqual(4, partition_info.single_offset([1, 3]))
 
-    # Tests when the variable isn't partitioned at all.
-    partition_info = variable_scope._PartitionInfo(
-        full_shape=[9, 3], var_offset=[0, 0])
-    self.assertEqual(0, partition_info.single_offset([9, 3]))
+    def _f():
+      partition_info = variable_scope._PartitionInfo(
+          full_shape=[9, 3], var_offset=[4, 0])
+      self.assertEqual(4, partition_info.single_offset([1, 3]))
+
+      # Tests when the variable isn't partitioned at all.
+      partition_info = variable_scope._PartitionInfo(
+          full_shape=[9, 3], var_offset=[0, 0])
+      self.assertEqual(0, partition_info.single_offset([9, 3]))
+
+    wrap_and_execute(_f)
 
+  @test_util.run_in_graph_and_eager_modes
   def testSingleSliceDim(self):
-    partition_info = variable_scope._PartitionInfo(
-        full_shape=[9, 3], var_offset=[4, 0])
-    # Invalid shape.
-    with self.assertRaises(TypeError):
-      partition_info.single_slice_dim(None)
 
-    # Rank of shape differs from full_shape.
-    with self.assertRaises(ValueError):
-      partition_info.single_slice_dim([1, 2, 3])
+    def _f():
+      partition_info = variable_scope._PartitionInfo(
+          full_shape=[9, 3], var_offset=[4, 0])
+      # Invalid shape.
+      with self.assertRaises(TypeError):
+        partition_info.single_slice_dim(None)
 
-    # Shape is too large given var_offset (4+6 > 9).
-    with self.assertRaises(ValueError):
-      partition_info.single_slice_dim([6, 3])
+      # Rank of shape differs from full_shape.
+      with self.assertRaises(ValueError):
+        partition_info.single_slice_dim([1, 2, 3])
 
-    # Multiple possible slice dim from shape.
-    with self.assertRaises(ValueError):
-      partition_info.single_slice_dim([1, 1])
+      # Shape is too large given var_offset (4+6 > 9).
+      with self.assertRaises(ValueError):
+        partition_info.single_slice_dim([6, 3])
+
+      # Multiple possible slice dim from shape.
+      with self.assertRaises(ValueError):
+        partition_info.single_slice_dim([1, 1])
+
+      partition_info = variable_scope._PartitionInfo(
+          full_shape=[9, 3], var_offset=[0, 0])
+      self.assertEqual(1, partition_info.single_slice_dim([9, 2]))
+      partition_info = variable_scope._PartitionInfo(
+          full_shape=[9, 3], var_offset=[4, 0])
+      self.assertEqual(0, partition_info.single_slice_dim([2, 3]))
 
-    partition_info = variable_scope._PartitionInfo(
-        full_shape=[9, 3], var_offset=[0, 0])
-    self.assertEqual(1, partition_info.single_slice_dim([9, 2]))
-    partition_info = variable_scope._PartitionInfo(
-        full_shape=[9, 3], var_offset=[4, 0])
-    self.assertEqual(0, partition_info.single_slice_dim([2, 3]))
+    wrap_and_execute(_f)
 
 
 class VariableScopeMultithreadedTest(test.TestCase):
 
+  # TODO(mihaimaruseac): Not wrapping these as they cause timeouts if wrapped
+  @test_util.run_in_graph_and_eager_modes
   def testTwoThreadsDisjointScopeEntry(self):
 
     def thread_fn(i, graph):
@@ -1567,6 +1904,8 @@ class VariableScopeMultithreadedTest(test.TestCase):
     threads[1].start()
     threads[1].join()
 
+  # TODO(mihaimaruseac): Not wrapping these as they cause timeouts if wrapped
+  @test_util.run_in_graph_and_eager_modes
   def testTwoThreadsNestedScopeEntry(self):
 
     def thread_fn(i, graph, run_event, pause_event):
@@ -1604,6 +1943,8 @@ class VariableScopeMultithreadedTest(test.TestCase):
     threads[0].join()
     threads[1].join()
 
+  # TODO(mihaimaruseac): Not wrapping these as they cause timeouts if wrapped
+  @test_util.run_in_graph_and_eager_modes
   def testReenterMainScope(self):
 
     def thread_fn(graph, main_thread_scope):
-- 
GitLab


From d4eb6ab275f3db5e5ea116b1a0ac4203ef01db87 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Tue, 16 Oct 2018 14:01:38 -0700
Subject: [PATCH 1052/1085] Fix documentation for multi-worker training.

PiperOrigin-RevId: 217381397
---
 tensorflow/contrib/distribute/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md
index b416619fc1..f82453f3b5 100644
--- a/tensorflow/contrib/distribute/README.md
+++ b/tensorflow/contrib/distribute/README.md
@@ -190,7 +190,7 @@ in the input function gives a solid boost in performance. When using
 For multi-worker training, no code change is required to the `Estimator` code.
 You can run the same model code for all tasks in your cluster including
 parameter servers and the evaluator. But you need to use
-`tf.estimator.train_and_evaluator`, explicitly specify `num_gpus_per_workers`
+`tf.estimator.train_and_evaluate`, explicitly specify `num_gpus_per_workers`
 for your strategy object, and set "TF\_CONFIG" environment variables for each
 binary running in your cluster. We'll provide a Kubernetes template in the
 [tensorflow/ecosystem](https://github.com/tensorflow/ecosystem) repo which sets
-- 
GitLab


From c2250e40335e212d9fd1b035d1ea65beb61a4eca Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 16 Oct 2018 14:08:30 -0700
Subject: [PATCH 1053/1085] Update windows RBE toolchains to bazel 0.18, and
 update the toolchain image.

PiperOrigin-RevId: 217382795
---
 third_party/toolchains/preconfig/win_1803/BUILD                 | 2 +-
 .../preconfig/win_1803/{bazel_6f8e36b => bazel_018}/BUILD       | 0
 .../preconfig/win_1803/{bazel_6f8e36b => bazel_018}/CROSSTOOL   | 0
 .../win_1803/{bazel_6f8e36b => bazel_018}/dummy_toolchain.bzl   | 0
 4 files changed, 1 insertion(+), 1 deletion(-)
 rename third_party/toolchains/preconfig/win_1803/{bazel_6f8e36b => bazel_018}/BUILD (100%)
 rename third_party/toolchains/preconfig/win_1803/{bazel_6f8e36b => bazel_018}/CROSSTOOL (100%)
 rename third_party/toolchains/preconfig/win_1803/{bazel_6f8e36b => bazel_018}/dummy_toolchain.bzl (100%)

diff --git a/third_party/toolchains/preconfig/win_1803/BUILD b/third_party/toolchains/preconfig/win_1803/BUILD
index 6b798dfc3a..45209d260d 100644
--- a/third_party/toolchains/preconfig/win_1803/BUILD
+++ b/third_party/toolchains/preconfig/win_1803/BUILD
@@ -17,7 +17,7 @@ platform(
     remote_execution_properties = """
         properties:{
           name:"container-image"
-          value:"docker://gcr.io/tensorflow-testing/tf-rbe-win@sha256:b2eeb661e0134ef96a4736677e8f96a90970bc206dea93739cd711031b62a0e5"
+          value:"docker://gcr.io/tensorflow-testing/tf-rbe-win@sha256:bd22c6bfff6afc1fa4304ec4411df2410d93645494117585332a4e2258358422"
         }
         properties:{
           name: "OSFamily" value: "Windows"
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD b/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
similarity index 100%
rename from third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/BUILD
rename to third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL b/third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL
similarity index 100%
rename from third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/CROSSTOOL
rename to third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl b/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
similarity index 100%
rename from third_party/toolchains/preconfig/win_1803/bazel_6f8e36b/dummy_toolchain.bzl
rename to third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
-- 
GitLab


From f024ddcda099ed233e3384c43e0b62f5c328076b Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 16 Oct 2018 14:22:13 -0700
Subject: [PATCH 1054/1085] When converting the inputs to tensor the dtype
 should automatically be inferred from the input rather the using the layer
 dtype as those can be different when doing mixed precision training.

PiperOrigin-RevId: 217385423
---
 tensorflow/python/keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index efa21955e6..ef81ec76c3 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -956,7 +956,7 @@ class Dense(Layer):
     self.built = True
 
   def call(self, inputs):
-    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
+    inputs = ops.convert_to_tensor(inputs)
     rank = common_shapes.rank(inputs)
     if rank > 2:
       # Broadcasting is required for the inputs.
-- 
GitLab


From 864ab17cac9bf8deb0928b1aa8e6a96ece290c78 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 14:30:39 -0700
Subject: [PATCH 1055/1085] Internal-only change

PiperOrigin-RevId: 217387046
---
 tensorflow/core/BUILD                             | 4 +++-
 tensorflow/core/platform/default/build_config.bzl | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 841291e6d8..4f95f207ad 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -108,6 +108,7 @@ load(
     "tf_additional_device_tracer_cuda_deps",
     "tf_additional_device_tracer_deps",
     "tf_additional_device_tracer_srcs",
+    "tf_additional_device_tracer_test_flags",
     "tf_additional_gdr_lib_defines",
     "tf_additional_human_readable_json_deps",
     "tf_additional_lib_defines",
@@ -4654,7 +4655,8 @@ tf_cc_test_gpu(
     name = "device_tracer_test",
     size = "small",
     srcs = ["platform/device_tracer_test.cc"],
-    args = ["--heap_check=local"],
+    args =
+        ["--heap_check=local"] + tf_additional_device_tracer_test_flags(),
     linkstatic = tf_kernel_tests_linkstatic(),
     tags = tf_cuda_tests_tags() + ["nomac"],
     deps = [
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 927dbbc5b3..c9c89d066e 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -585,6 +585,9 @@ def tf_additional_device_tracer_cuda_deps():
 def tf_additional_device_tracer_deps():
     return []
 
+def tf_additional_device_tracer_test_flags():
+    return []
+
 def tf_additional_libdevice_data():
     return []
 
-- 
GitLab


From 14ab986f5ff8dd28fc3a45df24f2a959b19aeb8b Mon Sep 17 00:00:00 2001
From: Alan Chiao <alanchiao@google.com>
Date: Tue, 16 Oct 2018 14:37:18 -0700
Subject: [PATCH 1056/1085] Internal change.

PiperOrigin-RevId: 217388369
---
 .../contrib/lite/kernels/sparse_output_fully_connected.cc  | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
index 226bba2d47..66daf5e84a 100644
--- a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
+++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc
@@ -118,9 +118,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         GetTemporary(context, node, /*index=*/kScalingFactors);
     scaling_factors->type = kTfLiteFloat32;
     scaling_factors->allocation_type = kTfLiteArenaRw;
-    TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
-    scaling_factors_size->data[0] = n_batch;
-    if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) {
+    int scaling_dims[1] = {n_batch};
+    if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
+      TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
+      scaling_factors_size->data[0] = n_batch;
       TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
                                                        scaling_factors_size));
     }
-- 
GitLab


From ad836327e4d914528ee5c542974ebfba507670dd Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Tue, 16 Oct 2018 14:40:21 -0700
Subject: [PATCH 1057/1085] Replacing legacy_init_op argument in
 SavedModelBuilder with main_op.

PiperOrigin-RevId: 217389035
---
 tensorflow/examples/saved_model/saved_model_half_plus_two.py | 2 +-
 tensorflow/python/estimator/estimator.py                     | 5 ++---
 tensorflow/python/saved_model/simple_save.py                 | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/examples/saved_model/saved_model_half_plus_two.py b/tensorflow/examples/saved_model/saved_model_half_plus_two.py
index 2d1e0c6f6d..72c3b9778b 100644
--- a/tensorflow/examples/saved_model/saved_model_half_plus_two.py
+++ b/tensorflow/examples/saved_model/saved_model_half_plus_two.py
@@ -215,7 +215,7 @@ def _generate_saved_model_for_half_plus_two(export_dir,
           sess, [tf.saved_model.tag_constants.SERVING],
           signature_def_map=signature_def_map,
           assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
-          legacy_init_op=tf.group(assign_filename_op))
+          main_op=tf.group(assign_filename_op))
   builder.save(as_text)
 
 
diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index e6d82f0db7..3c1be9dbad 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1033,10 +1033,9 @@ class Estimator(object):
         meta_graph_kwargs = dict(
             tags=export_tags,
             signature_def_map=signature_def_map,
-            assets_collection=ops.get_collection(
-                ops.GraphKeys.ASSET_FILEPATHS),
+            assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
             strip_default_attrs=strip_default_attrs,
-            legacy_init_op=local_init_op,
+            main_op=local_init_op,
             saver=graph_saver)
 
         if save_variables:
diff --git a/tensorflow/python/saved_model/simple_save.py b/tensorflow/python/saved_model/simple_save.py
index 042b8fa8e2..76d6f666f6 100644
--- a/tensorflow/python/saved_model/simple_save.py
+++ b/tensorflow/python/saved_model/simple_save.py
@@ -81,6 +81,6 @@ def simple_save(session, export_dir, inputs, outputs, legacy_init_op=None):
       tags=[tag_constants.SERVING],
       signature_def_map=signature_def_map,
       assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
-      legacy_init_op=legacy_init_op,
+      main_op=legacy_init_op,
       clear_devices=True)
   b.save()
-- 
GitLab


From 94ab1a9e82fb7630b1aae44984d73968508dc917 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 16 Oct 2018 14:44:33 -0700
Subject: [PATCH 1058/1085] Update the python license, as its URL has been
 updated.

Triages one issue reported in #22741

PiperOrigin-RevId: 217389830
---
 tensorflow/workspace.bzl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 81e6676a97..25eed7afec 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -348,9 +348,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         name = "org_python_license",
         licenses = ["notice"],  # Python 2.0
         sha256_urls = {
-            "b5556e921715ddb9242c076cae3963f483aa47266c5e37ea4c187f77cc79501c": [
-                "https://mirror.bazel.build/docs.python.org/2.7/_sources/license.txt",
-                "https://docs.python.org/2.7/_sources/license.txt",
+            "7ca8f169368827781684f7f20876d17b4415bbc5cb28baa4ca4652f0dda05e9f": [
+                "https://mirror.bazel.build/docs.python.org/2.7/_sources/license.rst.txt",
+                "https://docs.python.org/2.7/_sources/license.rst.txt",
             ],
         },
     )
-- 
GitLab


From b28c5f471e486d86564669721887f284ed402ff8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 15:06:47 -0700
Subject: [PATCH 1059/1085] Move IsIdentityN with single output check out of
 IsIdentity.

PiperOrigin-RevId: 217394520
---
 tensorflow/core/grappler/costs/graph_properties.cc     |  2 +-
 tensorflow/core/grappler/op_types.cc                   |  8 +++++---
 tensorflow/core/grappler/op_types.h                    |  1 +
 .../core/grappler/optimizers/constant_folding.cc       |  7 ++++---
 .../core/grappler/optimizers/dependency_optimizer.cc   | 10 +++++-----
 tensorflow/core/grappler/optimizers/model_pruner.cc    |  2 +-
 .../core/grappler/optimizers/pin_to_host_optimizer.cc  |  2 +-
 7 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index dd6ce0c132..6a6b14276a 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -1111,7 +1111,7 @@ class SymbolicShapeRefiner {
           c->output_tensors_as_shapes.resize(1);
           c->output_tensors_as_shapes[0] = ic->MakeShape(dims);
         }
-      } else if (IsIdentity(node)) {
+      } else if (IsIdentity(node) || IsIdentityNSingleInput(node)) {
         c->output_tensors_as_shapes.resize(1);
         c->output_tensors_as_shapes[0] = ic->input_tensors_as_shapes()[0];
         if (c->input_tensor_protos[0] != nullptr) {
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 1b5a215987..be7411019f 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -216,9 +216,6 @@ bool IsHistogramSummary(const NodeDef& node) {
 
 bool IsIdentity(const NodeDef& node) {
   const auto& op = node.op();
-  if (op == "IdentityN" && node.attr().at("T").list().type_size() == 1) {
-    return true;
-  }
   return op == "Identity" || op == "RefIdentity";
 }
 
@@ -227,6 +224,11 @@ bool IsIdentityN(const NodeDef& node) {
   return op == "IdentityN";
 }
 
+bool IsIdentityNSingleInput(const NodeDef& node) {
+  return IsIdentityN(node) && node.attr().count("T") != 0 &&
+         node.attr().at("T").list().type_size() == 1;
+}
+
 bool IsIgamma(const NodeDef& node) { return node.op() == "Igamma"; }
 
 bool IsIgammac(const NodeDef& node) { return node.op() == "Igammac"; }
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index d4e0159e81..92b62944b7 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -71,6 +71,7 @@ bool IsGreaterEqual(const NodeDef& node);
 bool IsHistogramSummary(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
 bool IsIdentityN(const NodeDef& node);
+bool IsIdentityNSingleInput(const NodeDef& node);
 bool IsIgamma(const NodeDef& node);
 bool IsIgammac(const NodeDef& node);
 bool IsImag(const NodeDef& node);
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 11331c9406..c963f96858 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -187,7 +187,7 @@ string ConstantFolding::AddControlDependency(const string& input_name,
     // switch node, and use it to anchor the control dependency.
     auto outputs = node_map->GetOutputs(node->name());
     for (const NodeDef* output : outputs) {
-      if (IsIdentity(*output)) {
+      if (IsIdentity(*output) || IsIdentityNSingleInput(*output)) {
         if (IsSameInput(node->input(0), input_name)) {
           return AsControlDependency(*output);
         }
@@ -2200,7 +2200,7 @@ bool ConstantFolding::SimplifySwitch(GraphDef* optimized_graph, NodeDef* node) {
     auto fanouts = node_map_->GetOutputs(node->name());
     if (fanouts.size() == 2) {
       for (NodeDef* fanout : fanouts) {
-        if (!IsIdentity(*fanout) ||
+        if ((!IsIdentity(*fanout) && !IsIdentityNSingleInput(*fanout)) ||
             NumNonControlOutputs(*fanout, *node_map_) > 0) {
           already_optimized = false;
           break;
@@ -2679,7 +2679,8 @@ bool ConstantFolding::MulConvPushDown(NodeDef* node,
 
 bool ConstantFolding::PartialConstPropThroughIdentityN(NodeDef* node) {
   // Partial constant propagation through IdentityN.
-  if (IsIdentityN(*node) && NumNonControlInputs(*node) > 0) {
+  if ((IsIdentityN(*node) || IsIdentityNSingleInput(*node)) &&
+      NumNonControlInputs(*node) > 0) {
     const std::set<NodeDef*>& tmp = node_map_->GetOutputs(node->name());
     const std::vector<NodeDef*> consumers(tmp.begin(), tmp.end());
     bool updated_graph = false;
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 6613768a35..0938c27b1f 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -57,7 +57,7 @@ bool RemoveInput(NodeDef* node, const string& input, NodeMap* node_map) {
 }  // namespace
 
 bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const {
-  if (!IsIdentity(node)) {
+  if (!IsIdentity(node) && !IsIdentityNSingleInput(node)) {
     return true;
   }
 
@@ -136,7 +136,7 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) const {
 bool DependencyOptimizer::BypassingNodeIsBeneficial(
     const NodeDef& node, const std::vector<NodeDef*>& input_nodes,
     const std::vector<NodeDef*>& output_nodes) const {
-  const bool is_identity = IsIdentity(node);
+  const bool is_identity = IsIdentity(node) || IsIdentityNSingleInput(node);
   const int num_outputs = output_nodes.size();
   const int num_inputs = node.input_size();
 
@@ -193,7 +193,7 @@ void DependencyOptimizer::OptimizeNode(int node_idx,
                                        std::set<int>* nodes_to_delete) {
   NodeDef* node = optimized_graph_->mutable_node(node_idx);
   const bool is_noop = IsNoOp(*node);
-  const bool is_identity = IsIdentity(*node);
+  const bool is_identity = IsIdentity(*node) || IsIdentityNSingleInput(*node);
   const string node_name = node->name();
   // Constant nodes with no input control dependency are always executed early,
   // so we can prune all their output control dependencies.
@@ -415,8 +415,8 @@ Status DependencyOptimizer::OptimizeDependencies() {
   std::set<int> nodes_to_delete;
   for (int i = 0; i < optimized_graph_->node_size(); ++i) {
     const NodeDef& node = optimized_graph_->node(i);
-    if (IsNoOp(node) || IsIdentity(node) || IsConstant(node) ||
-        SafeToConvertToNoOp(node)) {
+    if (IsNoOp(node) || IsIdentity(node) || IsIdentityNSingleInput(node) ||
+        IsConstant(node) || SafeToConvertToNoOp(node)) {
       nodes_to_simplify.PushBack(i);
     }
   }
diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc
index 36eab4999d..c4fa162c59 100644
--- a/tensorflow/core/grappler/optimizers/model_pruner.cc
+++ b/tensorflow/core/grappler/optimizers/model_pruner.cc
@@ -32,7 +32,7 @@ bool IsTrivialOp(const NodeDef& node, const GraphRewriter& rewriter) {
   if (IsStopGradient(node)) {
     return true;
   }
-  if (IsIdentity(node)) {
+  if (IsIdentity(node) || IsIdentityNSingleInput(node)) {
     if (rewriter.FeedsMerge(node) || rewriter.IsDrivenBySwitch(node) ||
         rewriter.IsDrivenByControlDependency(node) ||
         rewriter.DrivesControlDependency(node)) {
diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
index 8278bf8289..db453f8521 100644
--- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc
@@ -113,7 +113,7 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph,
 
   // These nodes may be optimized away downstream (even if pinned to Host), we
   // should (recusively) check their source.
-  if (IsIdentity(node)) {
+  if (IsIdentity(node) || IsIdentityNSingleInput(node)) {
     for (const auto& fanin : graph.GetFanins(node, false)) {
       bool fanin_candidate = false;
       TF_RETURN_IF_ERROR(IsNodeOutputPortHostFriendly(
-- 
GitLab


From 65d6feba5530d34aba6aed522b98fb9d55278316 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 15:11:14 -0700
Subject: [PATCH 1060/1085] Update XLA ORC compiler pieces for LLVM r344572

PiperOrigin-RevId: 217395302
---
 .../compiler/xla/service/cpu/simple_orc_jit.cc | 18 +++++++++---------
 .../compiler/xla/service/cpu/simple_orc_jit.h  |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index 9ec0c8f657..f77641eb7d 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -108,15 +108,15 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options,
           [](llvm::Error Err) {
             cantFail(std::move(Err), "lookupFlags failed");
           })),
-      object_layer_(execution_session_,
-                    [this](llvm::orc::VModuleKey) {
-                      llvm::orc::RTDyldObjectLinkingLayer::Resources result;
-                      result.MemMgr =
-                          std::make_shared<llvm::SectionMemoryManager>(
-                              orc_jit_memory_mapper::GetInstance());
-                      result.Resolver = symbol_resolver_;
-                      return result;
-                    }),
+      object_layer_(
+          execution_session_,
+          [this](llvm::orc::VModuleKey) {
+            llvm::orc::LegacyRTDyldObjectLinkingLayer::Resources result;
+            result.MemMgr = std::make_shared<llvm::SectionMemoryManager>(
+                orc_jit_memory_mapper::GetInstance());
+            result.Resolver = symbol_resolver_;
+            return result;
+          }),
       compile_layer_(object_layer_,
                      CompilerFunctor(target_machine_.get(), &disassembler_,
                                      opt_level, optimize_for_size,
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
index d74b63fcf4..78406ba143 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
@@ -44,9 +44,9 @@ namespace cpu {
 // it's added to the JIT.
 class SimpleOrcJIT {
  public:
-  using ObjLayerT = llvm::orc::RTDyldObjectLinkingLayer;
+  using ObjLayerT = llvm::orc::LegacyRTDyldObjectLinkingLayer;
   using CompileFtor = std::function<ObjLayerT::ObjectPtr(llvm::Module&)>;
-  using CompileLayerT = llvm::orc::IRCompileLayer<ObjLayerT, CompileFtor>;
+  using CompileLayerT = llvm::orc::LegacyIRCompileLayer<ObjLayerT, CompileFtor>;
   using VModuleKeyT = llvm::orc::VModuleKey;
 
   // Create a new JIT, targeting the host architecture.
-- 
GitLab


From 8f435f9e89f4801ecb0aa993e496e64fd7e9981a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 15:12:42 -0700
Subject: [PATCH 1061/1085] [XLA:TPU] Implementation of Gather from rank 2
 tensors on TPUs.

PiperOrigin-RevId: 217395583
---
 tensorflow/compiler/xla/service/gather_expander.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/gather_expander.h b/tensorflow/compiler/xla/service/gather_expander.h
index 2b39359aae..8af9c6b71f 100644
--- a/tensorflow/compiler/xla/service/gather_expander.h
+++ b/tensorflow/compiler/xla/service/gather_expander.h
@@ -28,7 +28,7 @@ class GatherExpander : public HloModulePass {
   absl::string_view name() const override { return "gather_expander"; }
   StatusOr<bool> Run(HloModule* module) override;
 
- private:
+ protected:
   StatusOr<HloInstruction*> ExpandGather(HloInstruction* gather_instr);
 };
 
-- 
GitLab


From e4e19db364cf7ef0ac22cdf1cb55d4cdd30bec00 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 16 Oct 2018 15:27:50 -0700
Subject: [PATCH 1062/1085] Support arbitrary many values in KeyValueSort on
 CPU backend.

PiperOrigin-RevId: 217398356
---
 .../compiler/xla/service/cpu/ir_emitter.cc    | 119 +++++++++--------
 .../xla/service/cpu/runtime_key_value_sort.cc | 123 ++++++++++--------
 .../xla/service/cpu/runtime_key_value_sort.h  |  60 +++++----
 3 files changed, 168 insertions(+), 134 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index b2abdb39a5..50a8d0b1a5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -54,6 +54,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
@@ -493,53 +494,44 @@ Status IrEmitter::HandleOutfeed(HloInstruction* outfeed) {
   return Status::OK();
 }
 
-Status IrEmitter::HandleSort(HloInstruction* sort) {
+Status IrEmitter::HandleSort(HloInstruction* hlo) {
+  const HloSortInstruction* sort = Cast<HloSortInstruction>(hlo);
   TF_RETURN_IF_ERROR(EmitTargetAddressForOp(sort));
-  auto keys = sort->operand(0);
-  auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr;
-  ShapeIndex keys_shape_index({});
-  ShapeIndex values_shape_index({});
-  if (values != nullptr) {
-    keys_shape_index = ShapeIndex({0});
-    values_shape_index = ShapeIndex({1});
-  }
-  auto keys_destination = GetAllocationSlice(*sort, keys_shape_index);
-  auto keys_destination_address =
-      EmitBufferPointer(keys_destination, keys->shape());
-  auto values_destination = GetAllocationSlice(*sort, values_shape_index);
-  llvm::Value* values_destination_address = nullptr;
-
-  // The sort is implemented in-place, therefore we first copy the operand
-  // buffer to the output buffer if they are not the same.
-  if (keys_destination != GetAllocationSlice(*keys)) {
-    int64 primitive_type_size =
-        ShapeUtil::ByteSizeOfPrimitiveType(keys->shape().element_type());
-    auto source_buffer = GetEmittedValueFor(keys);
-    int64 keys_size = ByteSizeOf(keys->shape());
-    MemCpy(keys_destination_address, /*DstAlign=*/primitive_type_size,
-           source_buffer,
-           /*SrcAlign=*/primitive_type_size, keys_size);
-  }
-  if (values != nullptr) {
-    values_destination_address =
-        EmitBufferPointer(values_destination, values->shape());
-    if (values_destination != GetAllocationSlice(*values)) {
+  Shape keys_shape = sort->keys()->shape();
+  std::vector<llvm::Value*> destination_addresses(sort->operand_count());
+  for (int64 i = 0; i < sort->operand_count(); ++i) {
+    ShapeIndex shape_index =
+        sort->values_count() > 0 ? ShapeIndex({i}) : ShapeIndex({});
+    const HloInstruction* operand = sort->operand(i);
+    // We assume that the layout of all involved operands and outputs is the
+    // same.
+    TF_RET_CHECK(
+        LayoutUtil::LayoutsInShapesEqual(keys_shape, operand->shape()));
+    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
+        keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index)));
+
+    // The sort is implemented in-place, therefore we first copy the operand
+    // buffer to the output buffer if they are not the same.
+    auto destination_buffer = GetAllocationSlice(*sort, shape_index);
+    destination_addresses[i] =
+        EmitBufferPointer(destination_buffer, operand->shape());
+    auto source_address = GetAllocationSlice(*operand);
+    if (destination_buffer != source_address) {
       int64 primitive_type_size =
-          ShapeUtil::ByteSizeOfPrimitiveType(values->shape().element_type());
-      auto source_buffer = GetEmittedValueFor(values);
-      int64 values_size = ByteSizeOf(values->shape());
-      MemCpy(values_destination_address, /*DstAlign=*/primitive_type_size,
+          ShapeUtil::ByteSizeOfPrimitiveType(operand->shape().element_type());
+      auto source_buffer = GetEmittedValueFor(operand);
+      int64 size = ByteSizeOf(operand->shape());
+      MemCpy(destination_addresses[i], /*DstAlign=*/primitive_type_size,
              source_buffer,
-             /*SrcAlign=*/primitive_type_size, values_size);
+             /*SrcAlign=*/primitive_type_size, size);
     }
   }
 
   // Normalize the shape and the dimension to sort.
   Shape normalized_keys_shape =
-      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
-          keys->shape());
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(keys_shape);
   int64 physical_dimension_to_sort = LayoutUtil::MakeLogicalToPhysical(
-      keys->shape().layout())[sort->dimensions(0)];
+      keys_shape.layout())[sort->sort_dimension()];
 
   int64 sort_dimension_elements =
       normalized_keys_shape.dimensions(physical_dimension_to_sort);
@@ -553,7 +545,7 @@ Status IrEmitter::HandleSort(HloInstruction* sort) {
     lower_dimensions *= normalized_keys_shape.dimensions(i);
   }
 
-  PrimitiveType keys_type = keys->shape().element_type();
+  PrimitiveType keys_type = keys_shape.element_type();
   const char* fn_name = nullptr;
   llvm::Type* keys_native_type = nullptr;
   switch (keys_type) {
@@ -614,28 +606,49 @@ Status IrEmitter::HandleSort(HloInstruction* sort) {
   llvm::FunctionType* key_value_sort_type = llvm::FunctionType::get(
       b_.getVoidTy(),
       {keys_native_type, b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt64Ty(),
-       b_.getInt8PtrTy(), b_.getInt32Ty()},
+       b_.getInt8PtrTy()->getPointerTo(), b_.getInt32Ty(),
+       b_.getInt32Ty()->getPointerTo()},
       /*isVarArg=*/false);
   auto* key_value_sort_func = llvm::cast<llvm::Function>(
       module_->getOrInsertFunction(fn_name, key_value_sort_type));
   key_value_sort_func->setCallingConv(llvm::CallingConv::C);
   key_value_sort_func->setDoesNotThrow();
   key_value_sort_func->setOnlyAccessesArgMemory();
+  llvm::Value* values;
+  llvm::Value* sizes;
+  if (sort->values_count() == 0) {
+    values = llvm::Constant::getNullValue(b_.getInt8PtrTy()->getPointerTo());
+    sizes = llvm::Constant::getNullValue(b_.getInt32Ty()->getPointerTo());
+  } else {
+    values = llvm_ir::EmitAllocaAtFunctionEntryWithCount(
+        b_.getInt8PtrTy(), b_.getInt32(sort->values_count()),
+        "cc_values_alloca", &b_);
+    sizes = llvm_ir::EmitAllocaAtFunctionEntryWithCount(
+        b_.getInt32Ty(), b_.getInt32(sort->values_count()), "cc_sizes_alloca",
+        &b_);
+    for (int64 i = 0; i < sort->values_count(); ++i) {
+      llvm::Value* value_as_i8ptr =
+          PointerCast(destination_addresses[i + 1], b_.getInt8PtrTy());
+      llvm::Value* slot_in_values_alloca =
+          ConstInBoundsGEP1_32(b_.getInt8PtrTy(), values, i);
+      Store(value_as_i8ptr, slot_in_values_alloca);
+      llvm::Value* slot_in_sizes_alloca =
+          ConstInBoundsGEP1_32(b_.getInt32Ty(), sizes, i);
+      llvm::Value* size = b_.getInt32(ShapeUtil::ByteSizeOfPrimitiveType(
+          sort->operand(i + 1)->shape().element_type()));
+      Store(size, slot_in_sizes_alloca);
+    }
+  }
+
   Call(key_value_sort_func,
-       {PointerCast(keys_destination_address, keys_native_type),
+       {PointerCast(destination_addresses[0], keys_native_type),
         b_.getInt64(higher_dimensions), b_.getInt64(sort_dimension_elements),
-        b_.getInt64(lower_dimensions),
-        values != nullptr
-            ? PointerCast(values_destination_address, b_.getInt8PtrTy())
-            : llvm::Constant::getNullValue(b_.getInt8PtrTy()),
-        b_.getInt32(values != nullptr ? ShapeUtil::ByteSizeOfPrimitiveType(
-                                            values->shape().element_type())
-                                      : 0)});
-
-  if (values != nullptr) {
-    llvm_ir::EmitTuple(GetIrArrayFor(sort),
-                       {keys_destination_address, values_destination_address},
-                       &b_, module_);
+        b_.getInt64(lower_dimensions), values,
+        b_.getInt32(sort->values_count()), sizes});
+
+  if (sort->values_count() > 0) {
+    llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_,
+                       module_);
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
index e0e7deb98e..bbbb634f73 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
@@ -99,8 +99,9 @@ void KeyValueSort(std::pair<Eigen::half, int64>* row_to_sort,
 }
 
 template <typename KeyType>
-void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char* values,
-                      int32 values_primitive_type_size_in_bytes) {
+void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char** values,
+                      int32 values_count,
+                      int32* values_primitive_type_size_in_bytes) {
   // High-level idea of the iteration/sorting logic:
   // Conceptually we have a 3-dimensional shape [a, b, c]. b corresponds to the
   // dimension to sort, c is the product of the more minor dimensions (set to 1
@@ -129,7 +130,7 @@ void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char* values,
         index % sort_dimension_offset +
         (index - index % sort_dimension_offset) * sort_dimension_elements;
     // TODO(b/26783907): We could define a custom iterator class that references
-    // both arrays. Then we could avoid the intermediate copy. However this
+    // all arrays. Then we could avoid the intermediate copy. However this
     // would become more complicated, and it is not clear if the benefit is high
     // enough.
     for (int64 i = 0; i < sort_dimension_elements; ++i) {
@@ -140,97 +141,109 @@ void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char* values,
     for (int64 i = 0; i < sort_dimension_elements; ++i) {
       keys[base_offset + i * sort_dimension_offset] = row_to_sort[i].first;
     }
-    if (values == nullptr) {
-      continue;
-    }
 
     // Reorder the values according to the order defined by the keys.
-    for (int64 i = 0; i < sort_dimension_elements; ++i) {
-      int64 memory_index =
-          (base_offset + row_to_sort[i].second * sort_dimension_offset) *
-          values_primitive_type_size_in_bytes;
-
-      reordered_values[i] = std::string(values + memory_index,
-                                        values_primitive_type_size_in_bytes);
-    }
-    for (int64 i = 0; i < sort_dimension_elements; ++i) {
-      int64 memory_index = (base_offset + i * sort_dimension_offset) *
-                           values_primitive_type_size_in_bytes;
-      memcpy(values + memory_index, reordered_values[i].c_str(),
-             values_primitive_type_size_in_bytes);
+    for (int32 idx = 0; idx < values_count; ++idx) {
+      for (int64 i = 0; i < sort_dimension_elements; ++i) {
+        int64 memory_index =
+            (base_offset + row_to_sort[i].second * sort_dimension_offset) *
+            values_primitive_type_size_in_bytes[idx];
+
+        reordered_values[i] =
+            std::string(values[idx] + memory_index,
+                        values_primitive_type_size_in_bytes[idx]);
+      }
+      for (int64 i = 0; i < sort_dimension_elements; ++i) {
+        int64 memory_index = (base_offset + i * sort_dimension_offset) *
+                             values_primitive_type_size_in_bytes[idx];
+        memcpy(values[idx] + memory_index, reordered_values[i].c_str(),
+               values_primitive_type_size_in_bytes[idx]);
+      }
     }
   }
 }
 }  // namespace
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortPRED(
-    bool* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    bool* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS8(
-    int8* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    int8* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU8(
-    uint8* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    uint8* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS16(
-    int16* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    int16* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU16(
-    uint16* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    uint16* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF16(
-    Eigen::half* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    Eigen::half* keys, int64 a, int64 b, int64 c, char** values,
+    int32 values_count, int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS32(
-    int32* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    int32* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU32(
-    uint32* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    uint32* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF32(
-    float* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    float* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS64(
-    int64* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    int64* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU64(
-    uint64* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    uint64* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF64(
-    double* keys, int64 a, int64 b, int64 c, char* values,
-    int32 values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
+    double* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
+    int32* values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_count,
+                   values_primitive_type_size_in_bytes);
 }
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
index 28e35e82c1..7821099386 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
@@ -22,67 +22,75 @@ limitations under the License.
 extern "C" {
 
 // 'keys' represents a 3-dimensional shape with dimensions [a, b, c]. The 'b'
-// dimension of 'keys' is sorted into ascending order. 'values' can be nullptr.
-// If 'values' is not nullptr, the elements in 'values' are reordered in such a
-// way that if the element at index 'i' in 'keys' was moved to index 'j', the
-// element at index 'i' in 'values' is also moved to index 'j' (which means that
-// the same elements correspond to each other as before).
+// dimension of 'keys' is sorted into ascending order. If 'values_count' is <=
+// 0, 'values' and 'values_primitive_type_size_in_bytes' can be nullptr.
+// If 'values_count' > 0, they contain exactly 'values_count' many elements.
+// Each element of 'values' also represents a 3-dimensional shape with
+// dimensions [a, b, c], and the size of the primitive type of the i-th shape
+// has exactly 'values_primitive_type_size_in_bytes[i]' bytes. The elements in
+// each 'values' shape are reordered in such a way that if the element at index
+// 'i' in 'keys' was moved to index 'j', the element at index 'i' in a 'values'
+// shape is also moved to index 'j' (which means that the same elements
+// correspond to each other as before).
 extern void __xla_cpu_runtime_KeyValueSortPRED(
     bool* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+    char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS8(
     tensorflow::int8* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU8(
     tensorflow::uint8* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS16(
     tensorflow::int16* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU16(
     tensorflow::uint16* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF16(
     Eigen::half* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS32(
     tensorflow::int32* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU32(
     tensorflow::uint32* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF32(
     float* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+    char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS64(
     tensorflow::int64* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU64(
     tensorflow::uint64* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char* values,
-    tensorflow::int32 values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF64(
     double* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
+    char** values, tensorflow::int32 values_count,
+    tensorflow::int32* values_primitive_type_size_in_bytes);
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_
-- 
GitLab


From e6440a80c846ef1d29428348356d41f0d7a36eba Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 16 Oct 2018 15:37:30 -0700
Subject: [PATCH 1063/1085] [TF:XLA] Bump open source abseil revision to
 5b70a8910b2e6fb0ce5193a41873139a126d2f7f

PiperOrigin-RevId: 217400075
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 25eed7afec..7b11efeafc 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -121,11 +121,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "225b683f2f866cd12b868e43b5af00e032e70e3f78ff332108b0ce15d41f6091",
-        strip_prefix = "abseil-cpp-a00bdd176d66ef0b417d9576052a19091fbdf891",
+        sha256 = "4648b8738c059e6061b0dd49c87c139eb5d1e95973d790cf5fcecdbb1d6993ce",
+        strip_prefix = "abseil-cpp-5b70a8910b2e6fb0ce5193a41873139a126d2f7f",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/a00bdd176d66ef0b417d9576052a19091fbdf891.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/a00bdd176d66ef0b417d9576052a19091fbdf891.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/5b70a8910b2e6fb0ce5193a41873139a126d2f7f.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/5b70a8910b2e6fb0ce5193a41873139a126d2f7f.tar.gz",
         ],
     )
 
-- 
GitLab


From a3f855aca20d212386fd19c46adcc1bea51ceed1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 16:14:24 -0700
Subject: [PATCH 1064/1085] Add support for batch-major input in the
 unidirectional LSTM Op.

PiperOrigin-RevId: 217406579
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |   5 +-
 .../lite/core/api/flatbuffer_conversions.cc   |   2 +-
 .../kernels/bidirectional_sequence_lstm.cc    |  30 +--
 tensorflow/contrib/lite/kernels/lstm.cc       |   6 +-
 tensorflow/contrib/lite/kernels/lstm_eval.cc  | 223 ++++++++++++------
 tensorflow/contrib/lite/kernels/lstm_eval.h   |  20 +-
 .../kernels/unidirectional_sequence_lstm.cc   |  18 +-
 .../unidirectional_sequence_lstm_test.cc      | 148 +++++++++---
 tensorflow/contrib/lite/schema/schema.fbs     |   3 +
 .../contrib/lite/schema/schema_generated.h    |  23 +-
 10 files changed, 338 insertions(+), 140 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index 1e65c3cee2..5a5f3ad61c 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -187,10 +187,13 @@ typedef struct {
 } TfLiteLSTMParams;
 
 typedef struct {
-  // Parameters for the LSTM kernel.
+  // Parameters needed for the underlying LSTM.
   TfLiteFusedActivation activation;
   float cell_clip;
   float proj_clip;
+
+  // If set to true then the first dimension is time, otherwise batch.
+  bool time_major;
 } TfLiteUnidirectionalSequenceLSTMParams;
 
 typedef struct {
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index 348ce54dd7..fe56c4ebf9 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -399,11 +399,11 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
             parse_activation(seq_lstm_params->fused_activation_function());
         params->cell_clip = seq_lstm_params->cell_clip();
         params->proj_clip = seq_lstm_params->proj_clip();
+        params->time_major = seq_lstm_params->time_major();
       }
       *builtin_data = reinterpret_cast<void*>(params);
       break;
     }
-
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
       auto params =
           allocator->AllocatePOD<TfLiteBidirectionalSequenceLSTMParams>();
diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
index 60abfbc85e..f8660fbaa2 100644
--- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc
@@ -876,6 +876,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       params->merge_outputs ? fw_recurrent_to_output_weights->dims->data[1] : 0;
   const auto actual_bw_output = params->merge_outputs ? fw_output : bw_output;
 
+  // TODO(mirkov): add batch_major support (http://b/117326122).
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
       TfLiteStatus fw_pass_status = lstm_eval::EvalFloat(
@@ -889,8 +890,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
           fw_projection_weights, fw_projection_bias, &lstm_params,
-          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
-          fw_activation_state, fw_cell_state, fw_output);
+          /*forward_sequence=*/true, /*time_major=*/true, /*output_offset=*/0,
+          fw_scratch_buffer, fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
       TfLiteStatus bw_pass_status = lstm_eval::EvalFloat(
@@ -904,8 +905,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
           bw_projection_weights, bw_projection_bias, &lstm_params,
-          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
-          bw_activation_state, bw_cell_state, actual_bw_output);
+          /*forward_sequence=*/false, /*time_major=*/true, bw_output_offset,
+          bw_scratch_buffer, bw_activation_state, bw_cell_state,
+          actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
@@ -942,11 +944,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
           fw_projection_weights, fw_projection_bias, &lstm_params,
-          /*forward_sequence=*/true, /*output_offset=*/0, fw_scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, aux_input_quantized, fw_activation_state_quantized,
-          fw_cell_state_quantized, fw_activation_state, fw_cell_state,
-          fw_output);
+          /*forward_sequence=*/true, /*time_major=*/true, /*output_offset=*/0,
+          fw_scratch_buffer, scaling_factors, prod_scaling_factors,
+          recovered_cell_weights, input_quantized, aux_input_quantized,
+          fw_activation_state_quantized, fw_cell_state_quantized,
+          fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
       TfLiteStatus bw_pass_status = lstm_eval::EvalHybrid(
@@ -960,11 +962,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
           bw_projection_weights, bw_projection_bias, &lstm_params,
-          /*forward_sequence=*/false, bw_output_offset, bw_scratch_buffer,
-          scaling_factors, prod_scaling_factors, recovered_cell_weights,
-          input_quantized, aux_input_quantized, bw_activation_state_quantized,
-          bw_cell_state_quantized, bw_activation_state, bw_cell_state,
-          actual_bw_output);
+          /*forward_sequence=*/false, /*time_major=*/true, bw_output_offset,
+          bw_scratch_buffer, scaling_factors, prod_scaling_factors,
+          recovered_cell_weights, input_quantized, aux_input_quantized,
+          bw_activation_state_quantized, bw_cell_state_quantized,
+          bw_activation_state, bw_cell_state, actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
       return kTfLiteOk;
     }
diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc
index f08a1a80c0..3666122e94 100644
--- a/tensorflow/contrib/lite/kernels/lstm.cc
+++ b/tensorflow/contrib/lite/kernels/lstm.cc
@@ -497,6 +497,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
           projection_bias, params, /*forward_sequence=*/true,
+          /*time_major=*/true,
           /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
           output);
     }
@@ -524,8 +525,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
           projection_bias, params, /*forward_sequence=*/true,
-          /*output_offset=*/0, scratch_buffer, scaling_factors,
-          prod_scaling_factors, recovered_cell_weights, input_quantized,
+          /*time_major=*/true, /*output_offset=*/0, scratch_buffer,
+          scaling_factors, prod_scaling_factors, recovered_cell_weights,
+          input_quantized,
           /*aux_input_quantized=*/nullptr, activation_state_quantized,
           cell_state_quantized, activation_state, cell_state, output);
     }
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc
index 2ef70aa933..5b7951a931 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.cc
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc
@@ -710,9 +710,10 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output) {
+    const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
+    int output_offset, TfLiteTensor* scratch_buffer,
+    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
+    TfLiteTensor* output) {
   TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
@@ -777,36 +778,71 @@ TfLiteStatus EvalFloat(
     aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f;
   }
 
-  // Loop through the sequence.
   const int output_batch_leading_dim =
       output->dims->data[output->dims->size - 1];
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output_batch_leading_dim;
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    if (aux_input) {
-      aux_input_ptr = aux_input->data.f + t_rel * input_step;
+  if (time_major) {
+    // Loop through the sequence.
+    const int input_step = n_batch * n_input;
+    const int output_step = n_batch * output_batch_leading_dim;
+    for (int t = 0; t < max_time; t++) {
+      // If this is the forward_sequence, step forward, otherwise step
+      // backwards.
+      const int t_rel = forward_sequence ? t : max_time - t - 1;
+      const float* input_ptr = input->data.f + t_rel * input_step;
+      if (aux_input) {
+        aux_input_ptr = aux_input->data.f + t_rel * input_step;
+      }
+      float* output_ptr_time =
+          output->data.f + t_rel * output_step + output_offset;
+
+      LstmStepWithAuxInput(
+          input_ptr, input_to_input_weights_ptr,
+          input_to_forget_weights->data.f, input_to_cell_weights->data.f,
+          input_to_output_weights->data.f, aux_input_ptr,
+          aux_input_to_input_weights_ptr, aux_input_to_forget_weights_ptr,
+          aux_input_to_cell_weights_ptr, aux_input_to_output_weights_ptr,
+          recurrent_to_input_weights_ptr, recurrent_to_forget_weights->data.f,
+          recurrent_to_cell_weights->data.f,
+          recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
+          cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+          input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+          output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
+          params, n_batch, n_cell, n_input, aux_input_size, n_output,
+          output_batch_leading_dim, activation_state->data.f,
+          cell_state->data.f, input_gate_scratch, forget_gate_scratch,
+          cell_scratch, output_gate_scratch, output_ptr_time);
     }
-    float* output_ptr_time =
-        output->data.f + t_rel * output_step + output_offset;
+  } else {
+    for (int b = 0; b < n_batch; b++) {
+      const int input_step = n_input;
+      const int output_step = output_batch_leading_dim;
+      for (int t = 0; t < max_time; t++) {
+        // If this is the forward_sequence, step forward, otherwise step
+        // backwards.
+        const int t_rel = forward_sequence ? t : max_time - t - 1;
+        const float* input_ptr = input->data.f + t_rel * input_step;
+        float* output_ptr_time =
+            output->data.f + t_rel * output_step + output_offset;
 
-    LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_forget_weights->data.f,
-        input_to_cell_weights->data.f, input_to_output_weights->data.f,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_forget_weights_ptr, aux_input_to_cell_weights_ptr,
-        aux_input_to_output_weights_ptr, recurrent_to_input_weights_ptr,
-        recurrent_to_forget_weights->data.f, recurrent_to_cell_weights->data.f,
-        recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
-        cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-        input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
-        output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
-        params, n_batch, n_cell, n_input, aux_input_size, n_output,
-        output_batch_leading_dim, activation_state->data.f, cell_state->data.f,
-        input_gate_scratch, forget_gate_scratch, cell_scratch,
-        output_gate_scratch, output_ptr_time);
+        LstmStepWithAuxInput(
+            input_ptr, input_to_input_weights_ptr,
+            input_to_forget_weights->data.f, input_to_cell_weights->data.f,
+            input_to_output_weights->data.f, aux_input_ptr,
+            aux_input_to_input_weights_ptr, aux_input_to_forget_weights_ptr,
+            aux_input_to_cell_weights_ptr, aux_input_to_output_weights_ptr,
+            recurrent_to_input_weights_ptr, recurrent_to_forget_weights->data.f,
+            recurrent_to_cell_weights->data.f,
+            recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
+            cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+            input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+            output_gate_bias->data.f, projection_weights_ptr,
+            projection_bias_ptr, params, /*n_batch=*/1, n_cell, n_input,
+            aux_input_size, n_output, output_batch_leading_dim,
+            activation_state->data.f, cell_state->data.f, input_gate_scratch,
+            forget_gate_scratch, cell_scratch, output_gate_scratch,
+            output_ptr_time);
+      }
+    }
   }
   return kTfLiteOk;
 }
@@ -830,13 +866,13 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
-    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
-    TfLiteTensor* output_state, TfLiteTensor* cell_state,
-    TfLiteTensor* output) {
+    const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
+    int output_offset, TfLiteTensor* scratch_buffer,
+    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
+    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
+    TfLiteTensor* aux_input_quantized, TfLiteTensor* output_state_quantized,
+    TfLiteTensor* cell_state_quantized, TfLiteTensor* output_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output) {
   TF_LITE_ASSERT(input->dims->size >= 2 && input->dims->size <= 3);
   const int max_time = (input->dims->size == 2) ? 1 : input->dims->data[0];
   const int n_batch = input->dims->data[input->dims->size - 2];
@@ -990,45 +1026,90 @@ TfLiteStatus EvalHybrid(
         aux_input_to_output_weights->params.scale;
   }
 
-  // Feed the sequence into the LSTM step-by-step.
   const int output_batch_leading_dim =
       output->dims->data[output->dims->size - 1];
-  const int input_step = n_batch * n_input;
-  const int output_step = n_batch * output_batch_leading_dim;
-  for (int t = 0; t < max_time; t++) {
-    // If this is the forward_sequence, step forward, otherwise step backwards.
-    const int t_rel = forward_sequence ? t : max_time - t - 1;
-    const float* input_ptr = input->data.f + t_rel * input_step;
-    if (aux_input) {
-      aux_input_ptr = aux_input->data.f + t_rel * input_step;
+  if (time_major) {
+    // Feed the sequence into the LSTM step-by-step.
+    const int input_step = n_batch * n_input;
+    const int output_step = n_batch * output_batch_leading_dim;
+    for (int t = 0; t < max_time; t++) {
+      // If this is the forward_sequence, step forward, otherwise step
+      // backwards.
+      const int t_rel = forward_sequence ? t : max_time - t - 1;
+      const float* input_ptr = input->data.f + t_rel * input_step;
+      if (aux_input) {
+        aux_input_ptr = aux_input->data.f + t_rel * input_step;
+      }
+      float* output_ptr = output->data.f + t_rel * output_step + output_offset;
+
+      LstmStepWithAuxInput(
+          input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
+          input_to_forget_weights_ptr, input_to_forget_weights_scale,
+          input_to_cell_weights_ptr, input_to_cell_weights_scale,
+          input_to_output_weights_ptr, input_to_output_weights_scale,
+          aux_input_ptr, aux_input_to_input_weights_ptr,
+          aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
+          aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
+          aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
+          aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
+          recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
+          recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
+          recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
+          recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
+          cell_to_input_weights_scale, cell_to_forget_weights_ptr,
+          cell_to_forget_weights_scale, cell_to_output_weights_ptr,
+          cell_to_output_weights_scale, input_gate_bias_ptr,
+          forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
+          projection_weights_ptr, projection_weights_scale, projection_bias_ptr,
+          params, n_batch, n_cell, n_input, aux_input_size, n_output,
+          output_batch_leading_dim, input_gate_scratch, forget_gate_scratch,
+          cell_scratch, output_gate_scratch, scaling_factors_ptr,
+          prod_scaling_factors_ptr, recovered_cell_weights_ptr,
+          quantized_input_ptr, quantized_aux_input_ptr,
+          quantized_output_state_ptr, quantized_cell_state_ptr,
+          output_state_ptr, cell_state_ptr, output_ptr);
     }
-    float* output_ptr = output->data.f + t_rel * output_step + output_offset;
+  } else {
+    for (int b = 0; b < n_batch; b++) {
+      const int input_step = n_input;
+      const int output_step = output_batch_leading_dim;
+      for (int t = 0; t < max_time; t++) {
+        // If this is the forward_sequence, step forward, otherwise step
+        // backwards.
+        const int t_rel = forward_sequence ? t : max_time - t - 1;
+        const float* input_ptr = input->data.f + t_rel * input_step;
+        float* output_ptr =
+            output->data.f + t_rel * output_step + output_offset;
 
-    LstmStepWithAuxInput(
-        input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-        input_to_forget_weights_ptr, input_to_forget_weights_scale,
-        input_to_cell_weights_ptr, input_to_cell_weights_scale,
-        input_to_output_weights_ptr, input_to_output_weights_scale,
-        aux_input_ptr, aux_input_to_input_weights_ptr,
-        aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-        aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-        aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-        aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-        recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-        recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-        recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-        recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-        cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-        cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-        cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr,
-        cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr,
-        projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell,
-        n_input, aux_input_size, n_output, output_batch_leading_dim,
-        input_gate_scratch, forget_gate_scratch, cell_scratch,
-        output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-        recovered_cell_weights_ptr, quantized_input_ptr,
-        quantized_aux_input_ptr, quantized_output_state_ptr,
-        quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr);
+        LstmStepWithAuxInput(
+            input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
+            input_to_forget_weights_ptr, input_to_forget_weights_scale,
+            input_to_cell_weights_ptr, input_to_cell_weights_scale,
+            input_to_output_weights_ptr, input_to_output_weights_scale,
+            aux_input_ptr, aux_input_to_input_weights_ptr,
+            aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
+            aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
+            aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
+            aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
+            recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
+            recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
+            recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
+            recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
+            cell_to_input_weights_scale, cell_to_forget_weights_ptr,
+            cell_to_forget_weights_scale, cell_to_output_weights_ptr,
+            cell_to_output_weights_scale, input_gate_bias_ptr,
+            forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
+            projection_weights_ptr, projection_weights_scale,
+            projection_bias_ptr, params, n_batch, n_cell, n_input,
+            aux_input_size, n_output, output_batch_leading_dim,
+            input_gate_scratch, forget_gate_scratch, cell_scratch,
+            output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
+            recovered_cell_weights_ptr, quantized_input_ptr,
+            quantized_aux_input_ptr, quantized_output_state_ptr,
+            quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
+            output_ptr);
+      }
+    }
   }
 
   return kTfLiteOk;
diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.h b/tensorflow/contrib/lite/kernels/lstm_eval.h
index adf8cf0f64..8d8b97aead 100644
--- a/tensorflow/contrib/lite/kernels/lstm_eval.h
+++ b/tensorflow/contrib/lite/kernels/lstm_eval.h
@@ -42,9 +42,10 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* activation_state,
-    TfLiteTensor* cell_state, TfLiteTensor* output);
+    const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
+    int output_offset, TfLiteTensor* scratch_buffer,
+    TfLiteTensor* activation_state, TfLiteTensor* cell_state,
+    TfLiteTensor* output);
 
 TfLiteStatus EvalHybrid(
     const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
@@ -65,12 +66,13 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* input_gate_bias, const TfLiteTensor* forget_gate_bias,
     const TfLiteTensor* cell_bias, const TfLiteTensor* output_gate_bias,
     const TfLiteTensor* projection_weights, const TfLiteTensor* projection_bias,
-    const TfLiteLSTMParams* params, bool forward_sequence, int output_offset,
-    TfLiteTensor* scratch_buffer, TfLiteTensor* scaling_factors,
-    TfLiteTensor* prod_scaling_factors, TfLiteTensor* recovered_cell_weights,
-    TfLiteTensor* input_quantized, TfLiteTensor* aux_input_quantized,
-    TfLiteTensor* output_state_quantized, TfLiteTensor* cell_state_quantized,
-    TfLiteTensor* output_state, TfLiteTensor* cell_state, TfLiteTensor* output);
+    const TfLiteLSTMParams* params, bool forward_sequence, bool time_major,
+    int output_offset, TfLiteTensor* scratch_buffer,
+    TfLiteTensor* scaling_factors, TfLiteTensor* prod_scaling_factors,
+    TfLiteTensor* recovered_cell_weights, TfLiteTensor* input_quantized,
+    TfLiteTensor* aux_input_quantized, TfLiteTensor* output_state_quantized,
+    TfLiteTensor* cell_state_quantized, TfLiteTensor* output_state,
+    TfLiteTensor* cell_state, TfLiteTensor* output);
 
 }  // namespace lstm_eval
 }  // namespace builtin
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
index 40029779e0..bd6d4d1f88 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc
@@ -260,8 +260,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
   TF_LITE_ENSURE(context, input->dims->size > 1);
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
+  const auto* params =
+      reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
+          node->builtin_data);
+  const bool time_major = params->time_major;
+  const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0];
   const int n_input = input->dims->data[2];
 
   const TfLiteTensor* input_to_output_weights =
@@ -296,10 +299,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumElements(cell_state), n_batch * n_cell);
 
   // Resize the output tensors.
-  TfLiteIntArray* output_size = TfLiteIntArrayCreate(3);
-  output_size->data[0] = max_time;
-  output_size->data[1] = n_batch;
-  output_size->data[2] = n_output;
+  TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims);
+  output_size->data[input->dims->size - 1] = n_output;
   TF_LITE_ENSURE_OK(context,
                     context->ResizeTensor(context, output, output_size));
 
@@ -436,6 +437,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
           node->builtin_data);
+  const bool time_major = params->time_major;
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
   const TfLiteTensor* input_to_input_weights =
@@ -506,7 +508,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, &lstm_params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true, time_major,
           /*output_offset=*/0, scratch_buffer, activation_state, cell_state,
           output);
     }
@@ -533,7 +535,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           /*aux_input_to_cell_weights=*/nullptr,
           /*aux_input_to_output_weights=*/nullptr, input_gate_bias,
           forget_gate_bias, cell_bias, output_gate_bias, projection_weights,
-          projection_bias, &lstm_params, /*forward_sequence=*/true,
+          projection_bias, &lstm_params, /*forward_sequence=*/true, time_major,
           /*output_offset=*/0, scratch_buffer, scaling_factors,
           prod_scaling_factors, recovered_cell_weights, input_quantized,
           /*aux_input_quantized=*/nullptr, activation_state_quantized,
diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
index 7b9d66c19b..1de14dd60d 100644
--- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc
@@ -32,7 +32,7 @@ using ::testing::ElementsAreArray;
 class UnidirectionalLSTMOpModel : public SingleOpModel {
  public:
   UnidirectionalLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
-                            int sequence_length, bool use_cifg,
+                            int sequence_length, bool time_major, bool use_cifg,
                             bool use_peephole, bool use_projection_weights,
                             bool use_projection_bias, float cell_clip,
                             float proj_clip,
@@ -110,12 +110,12 @@ class UnidirectionalLSTMOpModel : public SingleOpModel {
 
     output_ = AddOutput(TensorType_FLOAT32);
 
-    SetBuiltinOp(
-        BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-        BuiltinOptions_UnidirectionalSequenceLSTMOptions,
-        CreateUnidirectionalSequenceLSTMOptions(
-            builder_, ActivationFunctionType_TANH, cell_clip, proj_clip)
-            .Union());
+    SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+                 BuiltinOptions_UnidirectionalSequenceLSTMOptions,
+                 CreateUnidirectionalSequenceLSTMOptions(
+                     builder_, ActivationFunctionType_TANH, cell_clip,
+                     proj_clip, time_major)
+                     .Union());
     BuildInterpreter(input_shapes);
   }
 
@@ -241,12 +241,12 @@ class HybridUnidirectionalLSTMOpModel : public UnidirectionalLSTMOpModel {
  public:
   HybridUnidirectionalLSTMOpModel(
       int n_batch, int n_input, int n_cell, int n_output, int sequence_length,
-      bool use_cifg, bool use_peephole, bool use_projection_weights,
-      bool use_projection_bias, float cell_clip, float proj_clip,
-      const std::vector<std::vector<int>>& input_shapes)
+      bool time_major, bool use_cifg, bool use_peephole,
+      bool use_projection_weights, bool use_projection_bias, float cell_clip,
+      float proj_clip, const std::vector<std::vector<int>>& input_shapes)
       : UnidirectionalLSTMOpModel(
-            n_batch, n_input, n_cell, n_output, sequence_length, use_cifg,
-            use_peephole, use_projection_weights, use_projection_bias,
+            n_batch, n_input, n_cell, n_output, sequence_length, time_major,
+            use_cifg, use_peephole, use_projection_weights, use_projection_bias,
             cell_clip, proj_clip, input_shapes, TensorType_UINT8) {}
 
   void SetInputToInputWeights(const std::vector<float>& f) {
@@ -326,21 +326,32 @@ class BaseLstmTest : public ::testing::Test {
   // Compares output up to tolerance to the result of the lstm given the input.
   void VerifyGoldens(const std::vector<std::vector<float>>& input,
                      const std::vector<std::vector<float>>& output,
-                     UnidirectionalLSTMOpModel* lstm, float tolerance = 1e-5) {
+                     UnidirectionalLSTMOpModel* lstm, float tolerance = 1e-5,
+                     bool time_major = true) {
     const int num_batches = input.size();
     EXPECT_GT(num_batches, 0);
     const int num_inputs = lstm->num_inputs();
     EXPECT_GT(num_inputs, 0);
     const int input_sequence_size = input[0].size() / num_inputs;
     EXPECT_GT(input_sequence_size, 0);
-    // Feed the whole sequence as input.
-    for (int i = 0; i < input_sequence_size; ++i) {
+    if (time_major) {
+      // Feed the whole sequence as input.
+      for (int i = 0; i < input_sequence_size; ++i) {
+        for (int b = 0; b < num_batches; ++b) {
+          const float* batch_start = input[b].data() + i * num_inputs;
+          const float* batch_end = batch_start + num_inputs;
+
+          lstm->SetInput(((i * num_batches) + b) * num_inputs, batch_start,
+                         batch_end);
+        }
+      }
+    } else {
       for (int b = 0; b < num_batches; ++b) {
-        const float* batch_start = input[b].data() + i * num_inputs;
-        const float* batch_end = batch_start + num_inputs;
+        const float* batch_start = input[b].data();
+        const float* batch_end = batch_start + input_sequence_size * num_inputs;
 
-        lstm->SetInput(((i * num_batches) + b) * lstm->num_inputs(),
-                       batch_start, batch_end);
+        lstm->SetInput(b * input_sequence_size * num_inputs, batch_start,
+                       batch_end);
       }
     }
 
@@ -349,15 +360,25 @@ class BaseLstmTest : public ::testing::Test {
     const int num_outputs = lstm->num_outputs();
     EXPECT_GT(num_outputs, 0);
     std::vector<float> expected;
-    for (int i = 0; i < input_sequence_size; ++i) {
+
+    if (time_major) {
+      for (int i = 0; i < input_sequence_size; ++i) {
+        for (int b = 0; b < num_batches; ++b) {
+          const float* golden_start_batch = output[b].data() + i * num_outputs;
+          const float* golden_end_batch = golden_start_batch + num_outputs;
+
+          expected.insert(expected.end(), golden_start_batch, golden_end_batch);
+        }
+      }
+    } else {
       for (int b = 0; b < num_batches; ++b) {
-        const float* golden_start_batch = output[b].data() + i * num_outputs;
-        const float* golden_end_batch = golden_start_batch + num_outputs;
+        const float* golden_batch_start = output[b].data();
+        const float* golden_batch_end =
+            golden_batch_start + input_sequence_size * num_outputs;
 
-        expected.insert(expected.end(), golden_start_batch, golden_end_batch);
+        expected.insert(expected.end(), golden_batch_start, golden_batch_end);
       }
     }
-
     EXPECT_THAT(lstm->GetOutput(),
                 ElementsAreArray(ArrayFloatNear(expected, tolerance)));
   }
@@ -422,7 +443,7 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) {
 
   UnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/false, /*use_peephole=*/false,
+      /*time_major=*/true, /*use_cifg=*/false, /*use_peephole=*/false,
       /*use_projection_weights=*/false,
       /*use_projection_bias=*/false,
       /*cell_clip=*/0.0, /*proj_clip=*/0.0,
@@ -473,6 +494,73 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) {
   VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm);
 }
 
+TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest,
+       LstmBlackBoxTestBatchMajor) {
+  const int n_batch = 1;
+  const int n_input = 2;
+  // n_cell and n_output have the same size when there is no projection.
+  const int n_cell = 4;
+  const int n_output = 4;
+  const int sequence_length = 3;
+
+  UnidirectionalLSTMOpModel lstm(
+      n_batch, n_input, n_cell, n_output, sequence_length,
+      /*time_major=*/true, /*use_cifg=*/false, /*use_peephole=*/false,
+      /*use_projection_weights=*/false,
+      /*use_projection_bias=*/false,
+      /*cell_clip=*/0.0, /*proj_clip=*/0.0,
+      {
+          {sequence_length, n_batch, n_input},  // input tensor
+
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},  // cell_to_input_weight tensor
+          {0},  // cell_to_forget_weight tensor
+          {0},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {0, 0},  // projection_weight tensor
+          {0},     // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+      });
+
+  lstm.SetInputToInputWeights(input_to_input_weights_);
+  lstm.SetInputToCellWeights(input_to_cell_weights_);
+  lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  lstm.SetInputGateBias(input_gate_bias_);
+  lstm.SetCellBias(cell_gate_bias_);
+  lstm.SetForgetGateBias(forget_gate_bias_);
+  lstm.SetOutputGateBias(output_gate_bias_);
+
+  lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_);
+  lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  // Reshuffle input and output to batch major format.
+  std::vector<std::vector<float>> input;
+  std::vector<std::vector<float>> output;
+
+  VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/1e-5,
+                /*time_major=*/false);
+}
+
 TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) {
   const int n_batch = 1;
   const int n_input = 2;
@@ -483,7 +571,7 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) {
 
   HybridUnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/false, /*use_peephole=*/false,
+      /*time_major=*/true, /*use_cifg=*/false, /*use_peephole=*/false,
       /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0,
       {
@@ -591,7 +679,7 @@ TEST_F(CifgPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) {
 
   UnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*time_major=*/true, /*use_cifg=*/true, /*use_peephole=*/true,
       /*use_projection_weights=*/false,
       /*use_projection_bias=*/false,
       /*cell_clip=*/0.0, /*proj_clip=*/0.0,
@@ -652,7 +740,7 @@ TEST_F(CifgPeepholeNoProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) {
 
   HybridUnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*time_major=*/true, /*use_cifg=*/true, /*use_peephole=*/true,
       /*use_projection_weights=*/false,
       /*use_projection_bias=*/false,
       /*cell_clip=*/0.0, /*proj_clip=*/0.0,
@@ -1311,7 +1399,7 @@ TEST_F(NoCifgPeepholeProjectionClippingLstmTest, LstmBlackBoxTest) {
 
   UnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/false, /*use_peephole=*/true,
+      /*time_major=*/true, /*use_cifg=*/false, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
       /*use_projection_bias=*/false,
       /*cell_clip=*/0.0, /*proj_clip=*/0.0,
@@ -1377,7 +1465,7 @@ TEST_F(NoCifgPeepholeProjectionClippingLstmTest, HybridLstmBlackBoxTest) {
 
   HybridUnidirectionalLSTMOpModel lstm(
       n_batch, n_input, n_cell, n_output, sequence_length,
-      /*use_cifg=*/false, /*use_peephole=*/true,
+      /*time_major=*/true, /*use_cifg=*/false, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
       /*use_projection_bias=*/false,
       /*cell_clip=*/0.0, /*proj_clip=*/0.0,
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index fe3dc56e65..3045351f22 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -407,6 +407,9 @@ table UnidirectionalSequenceLSTMOptions {
   fused_activation_function:ActivationFunctionType;
   cell_clip: float; // Optional, 0.0 means no clipping
   proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
 }
 
 table BidirectionalSequenceLSTMOptions {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 4426b7d407..2bae6d72ec 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -3534,10 +3534,12 @@ struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
   ActivationFunctionType fused_activation_function;
   float cell_clip;
   float proj_clip;
+  bool time_major;
   UnidirectionalSequenceLSTMOptionsT()
       : fused_activation_function(ActivationFunctionType_NONE),
         cell_clip(0.0f),
-        proj_clip(0.0f) {
+        proj_clip(0.0f),
+        time_major(false) {
   }
 };
 
@@ -3546,7 +3548,8 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
   enum {
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
-    VT_PROJ_CLIP = 8
+    VT_PROJ_CLIP = 8,
+    VT_TIME_MAJOR = 10
   };
   ActivationFunctionType fused_activation_function() const {
     return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
@@ -3557,11 +3560,15 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
   float proj_clip() const {
     return GetField<float>(VT_PROJ_CLIP, 0.0f);
   }
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            VerifyField<float>(verifier, VT_CELL_CLIP) &&
            VerifyField<float>(verifier, VT_PROJ_CLIP) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
            verifier.EndTable();
   }
   UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -3581,6 +3588,9 @@ struct UnidirectionalSequenceLSTMOptionsBuilder {
   void add_proj_clip(float proj_clip) {
     fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
   }
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
+  }
   explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -3597,10 +3607,12 @@ inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirection
     flatbuffers::FlatBufferBuilder &_fbb,
     ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
     float cell_clip = 0.0f,
-    float proj_clip = 0.0f) {
+    float proj_clip = 0.0f,
+    bool time_major = false) {
   UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
   builder_.add_cell_clip(cell_clip);
+  builder_.add_time_major(time_major);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
 }
@@ -8060,6 +8072,7 @@ inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLS
   { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
   { auto _e = cell_clip(); _o->cell_clip = _e; };
   { auto _e = proj_clip(); _o->proj_clip = _e; };
+  { auto _e = time_major(); _o->time_major = _e; };
 }
 
 inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -8073,11 +8086,13 @@ inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirection
   auto _fused_activation_function = _o->fused_activation_function;
   auto _cell_clip = _o->cell_clip;
   auto _proj_clip = _o->proj_clip;
+  auto _time_major = _o->time_major;
   return tflite::CreateUnidirectionalSequenceLSTMOptions(
       _fbb,
       _fused_activation_function,
       _cell_clip,
-      _proj_clip);
+      _proj_clip,
+      _time_major);
 }
 
 inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-- 
GitLab


From cc0cf49a0d0cfdb23073810260ca1af480d08850 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 16:20:58 -0700
Subject: [PATCH 1065/1085] Check for the presence of a Worker machine when
 reassigning hooks in distributed training jobs.

PiperOrigin-RevId: 217407558
---
 tensorflow/python/estimator/estimator.py      |  6 ++
 tensorflow/python/estimator/estimator_test.py | 61 +++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index 3c1be9dbad..c44413090a 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1423,7 +1423,13 @@ class Estimator(object):
     # evaluations.
     save_summary_steps = self._config.save_summary_steps
     log_step_count_steps = self._config.log_step_count_steps
+
+    # Check existence of appropriate cluster spec fields, as well as master and
+    # worker nodes. As master also performs evaluation, summary writing must
+    # occur on a different node. The presence of a worker is also checked to
+    # prevent reassigning hooks for single-replica jobs with just a master node.
     if (self._config.cluster_spec and self._config.cluster_spec.jobs and
+        (run_config.TaskType.WORKER in self._config.cluster_spec.jobs) and
         (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)):
       # Update config values to prevent the default hooks from being created on
       # the master or other workers.
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index 246dfb1a4b..c26b3e6509 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -1063,6 +1063,67 @@ class EstimatorTrainTest(test.TestCase):
       self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
       self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
 
+  def test_master_hooks_single_replica(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.MASTER: ['localhost:1234']
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig(
+              save_summary_steps=100, log_step_count_steps=200))
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_hooks_single_replica_with_ps(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.MASTER: ['localhost:1234'],
+            run_config.TaskType.PS: ['localhost: 1235'],
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig(
+              save_summary_steps=100, log_step_count_steps=200))
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps'])
+
 
 def _model_fn_with_eval_metric_ops(features, labels, mode, params):
   _, _ = features, labels
-- 
GitLab


From 500a807939be51396af56f59fcb4e8e569697a22 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Tue, 16 Oct 2018 16:23:06 -0700
Subject: [PATCH 1066/1085] [TF:XLA] Bump open source llvm revision to r344639

PiperOrigin-RevId: 217407969
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 7b11efeafc..19a4631d8f 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -481,11 +481,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
-        sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8",
-        strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271",
+        sha256 = "b5bd6aa6613f8d57cb5973d43b9d6b82def80bad66f51387d2ed9c76d2652040",
+        strip_prefix = "llvm-4998e62d5745cca132cf92cec718be0746e70bcf",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/4998e62d5745cca132cf92cec718be0746e70bcf.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/4998e62d5745cca132cf92cec718be0746e70bcf.tar.gz",
         ],
     )
 
-- 
GitLab


From adb904fb99ec706dbbe11b4e35c227ff3bb7127c Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Tue, 16 Oct 2018 17:12:32 -0700
Subject: [PATCH 1067/1085] [XLA:GPU] Actually move
 cudnn_fused_conv_rewriter_test out of `tests` directory.

This makes it consistent with most of the other tests for individual passes,
which this one morally is (even if its implementation is a little unusual since
it runs the whole pass pipeline).

PiperOrigin-RevId: 217415531
---
 tensorflow/compiler/xla/service/gpu/BUILD     | 19 +++++++++++++++++++
 .../cudnn_fused_conv_rewriter_test.cc         |  0
 .../compiler/xla/service/gpu/tests/BUILD      | 15 ---------------
 3 files changed, 19 insertions(+), 15 deletions(-)
 rename tensorflow/compiler/xla/service/gpu/{tests => }/cudnn_fused_conv_rewriter_test.cc (100%)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 4eb5739fe2..449fd919d6 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -25,6 +25,10 @@ filegroup(
 )
 
 load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
 
 xla_proto_library(
     name = "backend_configs",
@@ -980,3 +984,18 @@ cc_library(
         "//tensorflow/core:stream_executor_no_cuda",
     ],
 )
+
+tf_cc_test(
+    name = "cudnn_fused_conv_rewriter_test",
+    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
+        "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc
similarity index 100%
rename from tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc
rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 32eebad3b1..ed46f08d59 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -210,21 +210,6 @@ tf_cc_test(
     ],
 )
 
-tf_cc_test(
-    name = "cudnn_fused_conv_rewriter_test",
-    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
-    tags = tf_cuda_tests_tags(),
-    deps = [
-        ":gpu_codegen_test",
-        "//tensorflow/compiler/xla/service:hlo_parser",
-        "//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
-        "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "@com_google_absl//absl/strings",
-    ],
-)
-
 tf_cc_test(
     name = "gpu_atomic_test",
     srcs = ["gpu_atomic_test.cc"],
-- 
GitLab


From 599e37f66d08296fb3cf401377e473c34b114ea3 Mon Sep 17 00:00:00 2001
From: Sreeni Kesavarapu <sreenik@google.com>
Date: Tue, 16 Oct 2018 17:14:22 -0700
Subject: [PATCH 1068/1085] Update the doc with the details about the rounding
 mode used in quantize_and_dequantize_v2.

PiperOrigin-RevId: 217415800
---
 .../core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt
index 9b500d0b58..c43142599b 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizeAndDequantizeV2.pbtxt
@@ -93,5 +93,7 @@ following to each value in the 'input' tensor.
 
 output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor.
 
+The above round function uses half to even rounding.
+
 END
 }
-- 
GitLab


From 3716b1b91af0dd019102616b63eb31af62a2e743 Mon Sep 17 00:00:00 2001
From: Peter Ma <pcma@google.com>
Date: Tue, 16 Oct 2018 17:20:03 -0700
Subject: [PATCH 1069/1085] Move ReadyNodeManagerFactory() out of
 VirtualScheduler class and change it to return std::unique_ptr.

PiperOrigin-RevId: 217416514
---
 .../core/grappler/clusters/virtual_cluster.cc  |  7 +++++--
 .../core/grappler/clusters/virtual_cluster.h   |  4 ++--
 .../costs/analytical_cost_estimator.cc         |  4 +---
 .../core/grappler/costs/virtual_scheduler.cc   | 18 +++++++++++++++++-
 .../core/grappler/costs/virtual_scheduler.h    |  6 +++++-
 5 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/grappler/clusters/virtual_cluster.cc b/tensorflow/core/grappler/clusters/virtual_cluster.cc
index 295b3c12e6..dbd8f26c28 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster.cc
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.cc
@@ -34,8 +34,11 @@ VirtualCluster::VirtualCluster(
 
 VirtualCluster::VirtualCluster(
     const std::unordered_map<string, DeviceProperties>& devices,
-    OpLevelCostEstimator* node_estimator, ReadyNodeManager* node_manager)
-    : Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
+    std::unique_ptr<OpLevelCostEstimator> node_estimator,
+    std::unique_ptr<ReadyNodeManager> node_manager)
+    : Cluster(0),
+      node_estimator_(std::move(node_estimator)),
+      node_manager_(std::move(node_manager)) {
   devices_ = devices;
 }
 
diff --git a/tensorflow/core/grappler/clusters/virtual_cluster.h b/tensorflow/core/grappler/clusters/virtual_cluster.h
index 6adb0b99bc..d19e39cd29 100644
--- a/tensorflow/core/grappler/clusters/virtual_cluster.h
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.h
@@ -34,8 +34,8 @@ class VirtualCluster : public Cluster {
  public:
   VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices);
   VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
-                 OpLevelCostEstimator* node_estimator,
-                 ReadyNodeManager* node_manager);
+                 std::unique_ptr<OpLevelCostEstimator> node_estimator,
+                 std::unique_ptr<ReadyNodeManager> node_manager);
   VirtualCluster(const DeviceSet* device_set);
 
   ~VirtualCluster() override;
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
index 8a6d575956..b7804ffaa5 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc
@@ -104,9 +104,7 @@ AnalyticalCostEstimator::AnalyticalCostEstimator(Cluster* cluster,
                                                  bool use_static_shapes)
     : AnalyticalCostEstimator(
           cluster, absl::make_unique<OpLevelCostEstimator>(),
-          std::unique_ptr<ReadyNodeManager>(
-              VirtualScheduler::ReadyNodeManagerFactory("FirstReady")),
-          use_static_shapes, nullptr) {}
+          ReadyNodeManagerFactory("FirstReady"), use_static_shapes, nullptr) {}
 
 AnalyticalCostEstimator::AnalyticalCostEstimator(
     Cluster* cluster, std::unique_ptr<OpLevelCostEstimator> node_estimator,
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index d3c4686b32..ba50e55538 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -275,7 +275,23 @@ bool CompositeNodeManager::Empty() const {
   return empty && send_manager_.Empty() && recv_manager_.Empty();
 }
 
-// TODO(pcma): Modify to return unique_ptr instead
+std::unique_ptr<ReadyNodeManager> ReadyNodeManagerFactory(
+    const string& ready_node_manager) {
+  if (ready_node_manager == "FIFO") {
+    return absl::make_unique<FIFOManager>();
+  } else if (ready_node_manager == "LIFO") {
+    return absl::make_unique<LIFOManager>();
+  } else if (ready_node_manager == "FirstReady") {
+    return absl::make_unique<FirstReadyManager>();
+  } else if (ready_node_manager == "Composite") {
+    return absl::make_unique<CompositeNodeManager>();
+  }
+  LOG(FATAL) << "Not a valid ready node manager: " << ready_node_manager;
+  return nullptr;
+}
+
+// TODO(pcma): Delete this deprecated API after power_analyzer.cc is modeified
+// to use the new factory API
 ReadyNodeManager* VirtualScheduler::ReadyNodeManagerFactory(
     const string& ready_node_manager) {
   if (ready_node_manager == "FIFO") {
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 59ab0a67a8..89dff9686d 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -248,6 +248,10 @@ class CompositeNodeManager : public ReadyNodeManager {
   const NodeDef* curr_node_;
 };
 
+// Constructs a ready node manager from the given string.
+std::unique_ptr<ReadyNodeManager> ReadyNodeManagerFactory(
+    const string& ready_node_manager);
+
 // The virtual scheduler emulates execution of nodes in a graph, considering
 // dependencies, device, etc.
 class VirtualScheduler {
@@ -287,7 +291,7 @@ class VirtualScheduler {
   // of the virtual execution of the graph.
   void GenerateRunMetadata(RunMetadata* metadata);
 
-  // Methods called from constructor.
+  // DEPRECATED
   static ReadyNodeManager* ReadyNodeManagerFactory(
       const string& ready_node_manager);
 
-- 
GitLab


From 78ba89a89d39892b0cc1ef0e31a12c978e879966 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 16 Oct 2018 17:24:24 -0700
Subject: [PATCH 1070/1085] Automated rollback of commit
 e4e19db364cf7ef0ac22cdf1cb55d4cdd30bec00

PiperOrigin-RevId: 217417068
---
 .../compiler/xla/service/cpu/ir_emitter.cc    | 119 ++++++++---------
 .../xla/service/cpu/runtime_key_value_sort.cc | 123 ++++++++----------
 .../xla/service/cpu/runtime_key_value_sort.h  |  60 ++++-----
 3 files changed, 134 insertions(+), 168 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 50a8d0b1a5..b2abdb39a5 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -54,7 +54,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
-#include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
@@ -494,44 +493,53 @@ Status IrEmitter::HandleOutfeed(HloInstruction* outfeed) {
   return Status::OK();
 }
 
-Status IrEmitter::HandleSort(HloInstruction* hlo) {
-  const HloSortInstruction* sort = Cast<HloSortInstruction>(hlo);
+Status IrEmitter::HandleSort(HloInstruction* sort) {
   TF_RETURN_IF_ERROR(EmitTargetAddressForOp(sort));
-  Shape keys_shape = sort->keys()->shape();
-  std::vector<llvm::Value*> destination_addresses(sort->operand_count());
-  for (int64 i = 0; i < sort->operand_count(); ++i) {
-    ShapeIndex shape_index =
-        sort->values_count() > 0 ? ShapeIndex({i}) : ShapeIndex({});
-    const HloInstruction* operand = sort->operand(i);
-    // We assume that the layout of all involved operands and outputs is the
-    // same.
-    TF_RET_CHECK(
-        LayoutUtil::LayoutsInShapesEqual(keys_shape, operand->shape()));
-    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
-        keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index)));
-
-    // The sort is implemented in-place, therefore we first copy the operand
-    // buffer to the output buffer if they are not the same.
-    auto destination_buffer = GetAllocationSlice(*sort, shape_index);
-    destination_addresses[i] =
-        EmitBufferPointer(destination_buffer, operand->shape());
-    auto source_address = GetAllocationSlice(*operand);
-    if (destination_buffer != source_address) {
+  auto keys = sort->operand(0);
+  auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr;
+  ShapeIndex keys_shape_index({});
+  ShapeIndex values_shape_index({});
+  if (values != nullptr) {
+    keys_shape_index = ShapeIndex({0});
+    values_shape_index = ShapeIndex({1});
+  }
+  auto keys_destination = GetAllocationSlice(*sort, keys_shape_index);
+  auto keys_destination_address =
+      EmitBufferPointer(keys_destination, keys->shape());
+  auto values_destination = GetAllocationSlice(*sort, values_shape_index);
+  llvm::Value* values_destination_address = nullptr;
+
+  // The sort is implemented in-place, therefore we first copy the operand
+  // buffer to the output buffer if they are not the same.
+  if (keys_destination != GetAllocationSlice(*keys)) {
+    int64 primitive_type_size =
+        ShapeUtil::ByteSizeOfPrimitiveType(keys->shape().element_type());
+    auto source_buffer = GetEmittedValueFor(keys);
+    int64 keys_size = ByteSizeOf(keys->shape());
+    MemCpy(keys_destination_address, /*DstAlign=*/primitive_type_size,
+           source_buffer,
+           /*SrcAlign=*/primitive_type_size, keys_size);
+  }
+  if (values != nullptr) {
+    values_destination_address =
+        EmitBufferPointer(values_destination, values->shape());
+    if (values_destination != GetAllocationSlice(*values)) {
       int64 primitive_type_size =
-          ShapeUtil::ByteSizeOfPrimitiveType(operand->shape().element_type());
-      auto source_buffer = GetEmittedValueFor(operand);
-      int64 size = ByteSizeOf(operand->shape());
-      MemCpy(destination_addresses[i], /*DstAlign=*/primitive_type_size,
+          ShapeUtil::ByteSizeOfPrimitiveType(values->shape().element_type());
+      auto source_buffer = GetEmittedValueFor(values);
+      int64 values_size = ByteSizeOf(values->shape());
+      MemCpy(values_destination_address, /*DstAlign=*/primitive_type_size,
              source_buffer,
-             /*SrcAlign=*/primitive_type_size, size);
+             /*SrcAlign=*/primitive_type_size, values_size);
     }
   }
 
   // Normalize the shape and the dimension to sort.
   Shape normalized_keys_shape =
-      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(keys_shape);
+      ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+          keys->shape());
   int64 physical_dimension_to_sort = LayoutUtil::MakeLogicalToPhysical(
-      keys_shape.layout())[sort->sort_dimension()];
+      keys->shape().layout())[sort->dimensions(0)];
 
   int64 sort_dimension_elements =
       normalized_keys_shape.dimensions(physical_dimension_to_sort);
@@ -545,7 +553,7 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) {
     lower_dimensions *= normalized_keys_shape.dimensions(i);
   }
 
-  PrimitiveType keys_type = keys_shape.element_type();
+  PrimitiveType keys_type = keys->shape().element_type();
   const char* fn_name = nullptr;
   llvm::Type* keys_native_type = nullptr;
   switch (keys_type) {
@@ -606,49 +614,28 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) {
   llvm::FunctionType* key_value_sort_type = llvm::FunctionType::get(
       b_.getVoidTy(),
       {keys_native_type, b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt64Ty(),
-       b_.getInt8PtrTy()->getPointerTo(), b_.getInt32Ty(),
-       b_.getInt32Ty()->getPointerTo()},
+       b_.getInt8PtrTy(), b_.getInt32Ty()},
       /*isVarArg=*/false);
   auto* key_value_sort_func = llvm::cast<llvm::Function>(
       module_->getOrInsertFunction(fn_name, key_value_sort_type));
   key_value_sort_func->setCallingConv(llvm::CallingConv::C);
   key_value_sort_func->setDoesNotThrow();
   key_value_sort_func->setOnlyAccessesArgMemory();
-  llvm::Value* values;
-  llvm::Value* sizes;
-  if (sort->values_count() == 0) {
-    values = llvm::Constant::getNullValue(b_.getInt8PtrTy()->getPointerTo());
-    sizes = llvm::Constant::getNullValue(b_.getInt32Ty()->getPointerTo());
-  } else {
-    values = llvm_ir::EmitAllocaAtFunctionEntryWithCount(
-        b_.getInt8PtrTy(), b_.getInt32(sort->values_count()),
-        "cc_values_alloca", &b_);
-    sizes = llvm_ir::EmitAllocaAtFunctionEntryWithCount(
-        b_.getInt32Ty(), b_.getInt32(sort->values_count()), "cc_sizes_alloca",
-        &b_);
-    for (int64 i = 0; i < sort->values_count(); ++i) {
-      llvm::Value* value_as_i8ptr =
-          PointerCast(destination_addresses[i + 1], b_.getInt8PtrTy());
-      llvm::Value* slot_in_values_alloca =
-          ConstInBoundsGEP1_32(b_.getInt8PtrTy(), values, i);
-      Store(value_as_i8ptr, slot_in_values_alloca);
-      llvm::Value* slot_in_sizes_alloca =
-          ConstInBoundsGEP1_32(b_.getInt32Ty(), sizes, i);
-      llvm::Value* size = b_.getInt32(ShapeUtil::ByteSizeOfPrimitiveType(
-          sort->operand(i + 1)->shape().element_type()));
-      Store(size, slot_in_sizes_alloca);
-    }
-  }
-
   Call(key_value_sort_func,
-       {PointerCast(destination_addresses[0], keys_native_type),
+       {PointerCast(keys_destination_address, keys_native_type),
         b_.getInt64(higher_dimensions), b_.getInt64(sort_dimension_elements),
-        b_.getInt64(lower_dimensions), values,
-        b_.getInt32(sort->values_count()), sizes});
-
-  if (sort->values_count() > 0) {
-    llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_,
-                       module_);
+        b_.getInt64(lower_dimensions),
+        values != nullptr
+            ? PointerCast(values_destination_address, b_.getInt8PtrTy())
+            : llvm::Constant::getNullValue(b_.getInt8PtrTy()),
+        b_.getInt32(values != nullptr ? ShapeUtil::ByteSizeOfPrimitiveType(
+                                            values->shape().element_type())
+                                      : 0)});
+
+  if (values != nullptr) {
+    llvm_ir::EmitTuple(GetIrArrayFor(sort),
+                       {keys_destination_address, values_destination_address},
+                       &b_, module_);
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
index bbbb634f73..e0e7deb98e 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc
@@ -99,9 +99,8 @@ void KeyValueSort(std::pair<Eigen::half, int64>* row_to_sort,
 }
 
 template <typename KeyType>
-void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char** values,
-                      int32 values_count,
-                      int32* values_primitive_type_size_in_bytes) {
+void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char* values,
+                      int32 values_primitive_type_size_in_bytes) {
   // High-level idea of the iteration/sorting logic:
   // Conceptually we have a 3-dimensional shape [a, b, c]. b corresponds to the
   // dimension to sort, c is the product of the more minor dimensions (set to 1
@@ -130,7 +129,7 @@ void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char** values,
         index % sort_dimension_offset +
         (index - index % sort_dimension_offset) * sort_dimension_elements;
     // TODO(b/26783907): We could define a custom iterator class that references
-    // all arrays. Then we could avoid the intermediate copy. However this
+    // both arrays. Then we could avoid the intermediate copy. However this
     // would become more complicated, and it is not clear if the benefit is high
     // enough.
     for (int64 i = 0; i < sort_dimension_elements; ++i) {
@@ -141,109 +140,97 @@ void KeyValueSortImpl(KeyType* keys, int64 a, int64 b, int64 c, char** values,
     for (int64 i = 0; i < sort_dimension_elements; ++i) {
       keys[base_offset + i * sort_dimension_offset] = row_to_sort[i].first;
     }
+    if (values == nullptr) {
+      continue;
+    }
 
     // Reorder the values according to the order defined by the keys.
-    for (int32 idx = 0; idx < values_count; ++idx) {
-      for (int64 i = 0; i < sort_dimension_elements; ++i) {
-        int64 memory_index =
-            (base_offset + row_to_sort[i].second * sort_dimension_offset) *
-            values_primitive_type_size_in_bytes[idx];
-
-        reordered_values[i] =
-            std::string(values[idx] + memory_index,
-                        values_primitive_type_size_in_bytes[idx]);
-      }
-      for (int64 i = 0; i < sort_dimension_elements; ++i) {
-        int64 memory_index = (base_offset + i * sort_dimension_offset) *
-                             values_primitive_type_size_in_bytes[idx];
-        memcpy(values[idx] + memory_index, reordered_values[i].c_str(),
-               values_primitive_type_size_in_bytes[idx]);
-      }
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      int64 memory_index =
+          (base_offset + row_to_sort[i].second * sort_dimension_offset) *
+          values_primitive_type_size_in_bytes;
+
+      reordered_values[i] = std::string(values + memory_index,
+                                        values_primitive_type_size_in_bytes);
+    }
+    for (int64 i = 0; i < sort_dimension_elements; ++i) {
+      int64 memory_index = (base_offset + i * sort_dimension_offset) *
+                           values_primitive_type_size_in_bytes;
+      memcpy(values + memory_index, reordered_values[i].c_str(),
+             values_primitive_type_size_in_bytes);
     }
   }
 }
 }  // namespace
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortPRED(
-    bool* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    bool* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS8(
-    int8* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    int8* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU8(
-    uint8* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    uint8* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS16(
-    int16* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    int16* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU16(
-    uint16* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    uint16* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF16(
-    Eigen::half* keys, int64 a, int64 b, int64 c, char** values,
-    int32 values_count, int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    Eigen::half* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS32(
-    int32* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    int32* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU32(
-    uint32* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    uint32* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF32(
-    float* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    float* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortS64(
-    int64* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    int64* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortU64(
-    uint64* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    uint64* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSortF64(
-    double* keys, int64 a, int64 b, int64 c, char** values, int32 values_count,
-    int32* values_primitive_type_size_in_bytes) {
-  KeyValueSortImpl(keys, a, b, c, values, values_count,
-                   values_primitive_type_size_in_bytes);
+    double* keys, int64 a, int64 b, int64 c, char* values,
+    int32 values_primitive_type_size_in_bytes) {
+  KeyValueSortImpl(keys, a, b, c, values, values_primitive_type_size_in_bytes);
 }
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
index 7821099386..28e35e82c1 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
+++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h
@@ -22,75 +22,67 @@ limitations under the License.
 extern "C" {
 
 // 'keys' represents a 3-dimensional shape with dimensions [a, b, c]. The 'b'
-// dimension of 'keys' is sorted into ascending order. If 'values_count' is <=
-// 0, 'values' and 'values_primitive_type_size_in_bytes' can be nullptr.
-// If 'values_count' > 0, they contain exactly 'values_count' many elements.
-// Each element of 'values' also represents a 3-dimensional shape with
-// dimensions [a, b, c], and the size of the primitive type of the i-th shape
-// has exactly 'values_primitive_type_size_in_bytes[i]' bytes. The elements in
-// each 'values' shape are reordered in such a way that if the element at index
-// 'i' in 'keys' was moved to index 'j', the element at index 'i' in a 'values'
-// shape is also moved to index 'j' (which means that the same elements
-// correspond to each other as before).
+// dimension of 'keys' is sorted into ascending order. 'values' can be nullptr.
+// If 'values' is not nullptr, the elements in 'values' are reordered in such a
+// way that if the element at index 'i' in 'keys' was moved to index 'j', the
+// element at index 'i' in 'values' is also moved to index 'j' (which means that
+// the same elements correspond to each other as before).
 extern void __xla_cpu_runtime_KeyValueSortPRED(
     bool* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS8(
     tensorflow::int8* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU8(
     tensorflow::uint8* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS16(
     tensorflow::int16* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU16(
     tensorflow::uint16* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF16(
     Eigen::half* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS32(
     tensorflow::int32* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU32(
     tensorflow::uint32* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF32(
     float* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortS64(
     tensorflow::int64* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortU64(
     tensorflow::uint64* keys, tensorflow::int64 a, tensorflow::int64 b,
-    tensorflow::int64 c, char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    tensorflow::int64 c, char* values,
+    tensorflow::int32 values_primitive_type_size_in_bytes);
 
 extern void __xla_cpu_runtime_KeyValueSortF64(
     double* keys, tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c,
-    char** values, tensorflow::int32 values_count,
-    tensorflow::int32* values_primitive_type_size_in_bytes);
+    char* values, tensorflow::int32 values_primitive_type_size_in_bytes);
 }
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_KEY_VALUE_SORT_H_
-- 
GitLab


From cd1975be1ede20d30d7422c0d4e2f718e27bc766 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 16 Oct 2018 17:26:59 -0700
Subject: [PATCH 1071/1085] [TF:XLA] Merge XlaTransferManager and
 XlaDeviceContext.

XlaTransferManager has no other users any more, so it serves no useful purpose.

PiperOrigin-RevId: 217417415
---
 tensorflow/compiler/jit/xla_device_context.cc | 63 +++++--------------
 tensorflow/compiler/jit/xla_device_context.h  | 37 ++---------
 tensorflow/compiler/jit/xla_launch_util.cc    |  2 +-
 3 files changed, 21 insertions(+), 81 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index af83c792e5..090021093d 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -50,7 +50,7 @@ void XlaDeviceAllocator::DeallocateRaw(void* ptr) {
 
 void XlaDeviceAllocator::GetStats(AllocatorStats* stats) { stats->Clear(); }
 
-XlaTransferManager::XlaTransferManager(
+XlaDeviceContext::XlaDeviceContext(
     std::shared_ptr<se::Stream> compute_stream,
     std::shared_ptr<se::Stream> host_to_device_stream,
     std::shared_ptr<se::Stream> device_to_host_stream, xla::LocalClient* client,
@@ -75,8 +75,8 @@ XlaTransferManager::XlaTransferManager(
   }
 }
 
-Status XlaTransferManager::TransferLiteralToDevice(
-    const Tensor& host_tensor, Tensor* device_tensor) const {
+Status XlaDeviceContext::TransferLiteralToDevice(const Tensor& host_tensor,
+                                                 Tensor* device_tensor) const {
   xla::Shape xla_shape;
   TF_RETURN_IF_ERROR(TensorShapeToXLAShape(host_tensor.dtype(),
                                            host_tensor.shape(), &xla_shape));
@@ -112,7 +112,7 @@ Status XlaTransferManager::TransferLiteralToDevice(
   return Status::OK();
 }
 
-void XlaTransferManager::TransferLiteralFromDevice(
+void XlaDeviceContext::TransferLiteralFromDevice(
     Tensor* host_tensor, const Tensor& device_tensor,
     const StatusCallback& done) const {
   xla::MutableBorrowingLiteral literal;
@@ -134,10 +134,10 @@ void XlaTransferManager::TransferLiteralFromDevice(
       });
 }
 
-void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
-                                               Device* device,
-                                               Tensor* device_tensor,
-                                               StatusCallback done) const {
+void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
+                                             Device* device,
+                                             Tensor* device_tensor,
+                                             StatusCallback done) const {
   if (cpu_tensor->NumElements() == 0) {
     VLOG(2) << "CopyCPUTensorToDevice empty tensor";
     done(Status::OK());
@@ -202,11 +202,10 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
   done(status);
 }
 
-void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                               absl::string_view tensor_name,
-                                               Device* device,
-                                               Tensor* cpu_tensor,
-                                               StatusCallback done) {
+void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
+                                             absl::string_view tensor_name,
+                                             Device* device, Tensor* cpu_tensor,
+                                             StatusCallback done) {
   if (device_tensor->NumElements() == 0) {
     VLOG(2) << "CopyDeviceTensorToCPU empty tensor";
     done(Status::OK());
@@ -250,9 +249,9 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor,
   done(status);
 }
 
-void XlaTransferManager::CopyDeviceTensorToDevice(const Tensor& src_tensor,
-                                                  Tensor* dst_tensor,
-                                                  const StatusCallback& done) {
+void XlaDeviceContext::CopyDeviceTensorToDevice(const Tensor& src_tensor,
+                                                Tensor* dst_tensor,
+                                                const StatusCallback& done) {
   VLOG(2) << "CopyDeviceTensorToDevice "
           << reinterpret_cast<const void*>(src_tensor.tensor_data().data())
           << " "
@@ -320,36 +319,4 @@ void XlaTransferManager::CopyDeviceTensorToDevice(const Tensor& src_tensor,
   }
 }
 
-XlaDeviceContext::XlaDeviceContext(
-    std::shared_ptr<se::Stream> compute_stream,
-    std::shared_ptr<se::Stream> host_to_device_stream,
-    std::shared_ptr<se::Stream> device_to_host_stream, xla::LocalClient* client,
-    bool transfer_as_literal,
-    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
-    thread::ThreadPool* thread_pool)
-    : manager_(std::move(compute_stream), std::move(host_to_device_stream),
-               std::move(device_to_host_stream), client, transfer_as_literal,
-               std::move(shape_representation_fn), thread_pool) {}
-
-void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
-                                             Device* device,
-                                             Tensor* device_tensor,
-                                             StatusCallback done) const {
-  manager_.CopyCPUTensorToDevice(cpu_tensor, device, device_tensor, done);
-}
-
-void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                                             absl::string_view tensor_name,
-                                             Device* device, Tensor* cpu_tensor,
-                                             StatusCallback done) {
-  manager_.CopyDeviceTensorToCPU(device_tensor, tensor_name, device, cpu_tensor,
-                                 done);
-}
-
-void XlaDeviceContext::CopyDeviceTensorToDevice(const Tensor& src_tensor,
-                                                Tensor* dst_tensor,
-                                                const StatusCallback& done) {
-  manager_.CopyDeviceTensorToDevice(src_tensor, dst_tensor, done);
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index df82421294..babb60acb5 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -44,9 +44,9 @@ class XlaDeviceAllocator : public Allocator {
 };
 
 // Helper class for managing data transfers between host and XLA devices.
-class XlaTransferManager {
+class XlaDeviceContext : public DeviceContext {
  public:
-  explicit XlaTransferManager(
+  explicit XlaDeviceContext(
       std::shared_ptr<se::Stream> compute_stream,
       std::shared_ptr<se::Stream> host_to_device_stream,
       std::shared_ptr<se::Stream> device_to_host_stream,
@@ -55,10 +55,11 @@ class XlaTransferManager {
       thread::ThreadPool* thread_pool);
 
   void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
-                             Tensor* device_tensor, StatusCallback done) const;
+                             Tensor* device_tensor,
+                             StatusCallback done) const override;
   void CopyDeviceTensorToCPU(const Tensor* device_tensor,
                              absl::string_view tensor_name, Device* device,
-                             Tensor* cpu_tensor, StatusCallback done);
+                             Tensor* cpu_tensor, StatusCallback done) override;
 
   void CopyDeviceTensorToDevice(const Tensor& src_tensor, Tensor* dst_tensor,
                                 const StatusCallback& done);
@@ -94,34 +95,6 @@ class XlaTransferManager {
   thread::ThreadPool* thread_pool_;
 };
 
-// DeviceContext for operators assigned to XlaDevice devices. The
-// implementation must inherit from DeviceContext but otherwise just
-// wraps the methods in XlaTransferManager.
-class XlaDeviceContext : public DeviceContext {
- public:
-  explicit XlaDeviceContext(
-      std::shared_ptr<se::Stream> compute_stream,
-      std::shared_ptr<se::Stream> host_to_device_stream,
-      std::shared_ptr<se::Stream> device_to_host_stream,
-      xla::LocalClient* client, bool transfer_as_literal,
-      XlaCompiler::ShapeRepresentationFn shape_representation_fn,
-      thread::ThreadPool* thread_pool);
-
-  void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
-                             Tensor* device_tensor,
-                             StatusCallback done) const override;
-  void CopyDeviceTensorToCPU(const Tensor* device_tensor,
-                             absl::string_view tensor_name, Device* device,
-                             Tensor* cpu_tensor, StatusCallback done) override;
-  void CopyDeviceTensorToDevice(const Tensor& src_tensor, Tensor* dst_tensor,
-                                const StatusCallback& done);
-
-  se::Stream* stream() const override { return manager_.stream(); }
-
- private:
-  XlaTransferManager manager_;
-};
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_JIT_XLA_DEVICE_CONTEXT_H_
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
index 4f6fc4e068..0e8ee56ed8 100644
--- a/tensorflow/compiler/jit/xla_launch_util.cc
+++ b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -239,7 +239,7 @@ Status XlaComputationLaunchContext::PopulateOutputs(
         // Copy host -> device. (Empty tensors don't have backing buffers.)
         // Manually allocate memory using an XlaTensorBuffer so we can allocate
         // as much memory as the device requires (as given by
-        // GetByteSizeRequirement). This avoids XlaTransferManager having to
+        // GetByteSizeRequirement). This avoids XlaDeviceContext having to
         // reallocate the device buffer later.
         VLOG(1) << "Constant output tensor on device";
 
-- 
GitLab


From 3f7d60ca9d3f8037ba752220e80fc95d3c0be71a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 17:56:15 -0700
Subject: [PATCH 1072/1085] Cleanup: Don't crash when querying node for
 non-existing attributes.

PiperOrigin-RevId: 217420663
---
 tensorflow/core/grappler/BUILD                |  1 +
 .../optimizers/arithmetic_optimizer.cc        | 33 +++++++-------
 .../grappler/optimizers/constant_folding.cc   | 45 ++++++++++++-------
 tensorflow/core/grappler/utils.cc             | 15 +++++++
 tensorflow/core/grappler/utils.h              | 13 +++++-
 tensorflow/core/grappler/utils_test.cc        | 30 +++++++++++++
 6 files changed, 101 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 7c6fe56e1f..3bad29a239 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -26,6 +26,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index 9b94d2706a..a09100f121 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -67,7 +67,8 @@ bool ValuesFromConstNode(const NodeDef& node, std::vector<T>* values) {
     return false;
   }
 
-  if (node.attr().at("dtype").type() != DataTypeToEnum<T>::value) {
+  if (node.attr().count("dtype") == 0 || node.attr().count("value") == 0 ||
+      node.attr().at("dtype").type() != DataTypeToEnum<T>::value) {
     return false;
   }
 
@@ -158,14 +159,6 @@ void SetSourceDataType(DataType dtype, NodeDef* node) {
   SetDataTypeToAttr(dtype, SourceDataTypeAttrName(*node), node);
 }
 
-Status CheckAttrExists(const NodeDef& node, const string& key) {
-  if (node.attr().count(key) == 0) {
-    return errors::InvalidArgument("Node '", node.name(), "'lacks '", key,
-                                   "' attr: ", node.DebugString());
-  }
-  return Status::OK();
-}
-
 NodeDef* GetTailOfValuePreservingChain(
     const NodeDef& node, const NodeMap& node_map,
     const std::unordered_set<string>& nodes_to_preserve) {
@@ -641,7 +634,7 @@ class AddOpsRewriteStage : public ArithmeticNodesGroupOptimizerStage {
     CHECK(!inputs.empty()) << "Inputs must be non-empty";
 
     // Do not create redundant AddN nodes
-    if (inputs.size() == 1) {
+    if (inputs.size() == 1 || root_node.attr().count("T") == 0) {
       return inputs[0];
     }
 
@@ -1450,10 +1443,11 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
 
   bool IsSupported(const NodeDef* node) const override {
     if (IsInPreserveSet(*node)) return false;
-    if (IsConcat(*node)) {
+    if (IsConcat(*node) && node->attr().count("N") != 0) {
       const int n = node->attr().at("N").i();
       return n > 1;
-    } else if (IsSplit(*node) || IsSplitV(*node)) {
+    } else if ((IsSplit(*node) || IsSplitV(*node)) &&
+               node->attr().count("num_split") != 0) {
       const int num_split = node->attr().at("num_split").i();
       if (NumNonControlOutputs(*node, *ctx().node_map) > num_split) {
         // TODO(rmlarsen): Remove this constraint when we have optimizations
@@ -1556,6 +1550,7 @@ class HoistCWiseUnaryChainsStage : public ArithmeticOptimizerStage {
   Status InitializeChains(const NodeDef& node, ChainLinkSet* tails) const {
     if (node_is_concat_) {
       // Handle concat nodes by looking backwards in the graph.
+      TF_RETURN_IF_ERROR(CheckAttrExists(node, "N"));
       const int n = node.attr().at("N").i();
       const int start = node.op() == "Concat" ? 1 : 0;
       const int end = start + n;
@@ -2029,6 +2024,8 @@ class FoldMultiplyIntoConv : public ArithmeticOptimizerStage {
 
     // Check that 'scale * weight' can be const folded.
     TF_RETURN_IF_TRUE(!IsConstant(*scale));
+    TF_RETURN_IF_ERROR(CheckAttrsExist(*scale, {"dtype", "value"}));
+    TF_RETURN_IF_ERROR(CheckAttrExists(*weights, "dtype"));
     TF_RETURN_IF_TRUE(scale->attr().at("dtype").type() !=
                       weights->attr().at("dtype").type());
 
@@ -2803,6 +2800,7 @@ class UnaryOpsComposition : public ArithmeticOptimizerStage {
   }
 
   Status TrySimplify(NodeDef* root, string* simplified_node_name) override {
+    TF_RETURN_IF_ERROR(CheckAttrExists(*root, "T"));
     DataType dtype = root->attr().at("T").type();
 
     // Keep a trace of all supported input nodes that can be fused together.
@@ -3023,10 +3021,9 @@ class RemoveStackStridedSliceSameAxis : public ArithmeticOptimizerStage {
                       const PartialTensorShape& pack_output_shape,
                       int pack_axis, int* slice_start_value, bool* found) {
     *found = false;
-    for (auto key : {"begin_mask", "end_mask", "ellipsis_mask", "new_axis_mask",
-                     "shrink_axis_mask"}) {
-      TF_RETURN_IF_ERROR(CheckAttrExists(*node, key));
-    }
+    TF_RETURN_IF_ERROR(
+        CheckAttrsExist(*node, {"begin_mask", "end_mask", "ellipsis_mask",
+                                "new_axis_mask", "shrink_axis_mask"}));
 
     const int begin_mask = node->attr().at("begin_mask").i();
     const int end_mask = node->attr().at("end_mask").i();
@@ -3056,14 +3053,14 @@ class RemoveStackStridedSliceSameAxis : public ArithmeticOptimizerStage {
     Tensor slice_strides_t;
 
     TF_RETURN_IF_ERROR(CheckAttrExists(*slice_begin, "value"));
-    TF_RETURN_IF_ERROR(CheckAttrExists(*slice_end, "value"));
-
     if (!slice_begin_t.FromProto(slice_begin->attr().at("value").tensor())) {
       return Status::OK();
     }
+    TF_RETURN_IF_ERROR(CheckAttrExists(*slice_end, "value"));
     if (!slice_end_t.FromProto(slice_end->attr().at("value").tensor())) {
       return Status::OK();
     }
+    TF_RETURN_IF_ERROR(CheckAttrExists(*slice_strides, "value"));
     if (!slice_strides_t.FromProto(
             slice_strides->attr().at("value").tensor())) {
       return Status::OK();
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index c963f96858..8c56f665bf 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -349,6 +349,9 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) {
       if (IsReallyConstant(*array_size)) {
         // Don't materialize 0 sizes to avoid triggering incorrect static
         // checks. A 0 sized array that can't grow isn't useful anyway.
+        if (array_size->attr().count("value") == 0) {
+          continue;
+        }
         const TensorProto& raw_val = array_size->attr().at("value").tensor();
         if (raw_val.dtype() != DT_INT32) {
           continue;
@@ -454,6 +457,9 @@ bool ExtractShape(const NodeDef& shape_node, const GraphProperties& properties,
       *min_id = std::min<int64>(*min_id, dim.size());
     }
   } else {
+    if (shape_node.attr().count("value") == 0) {
+      return false;
+    }
     const TensorProto& raw_val = shape_node.attr().at("value").tensor();
     if (raw_val.dtype() != DT_INT64 && raw_val.dtype() != DT_INT32) {
       return false;
@@ -552,6 +558,7 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs(
   reduce_dims[0] = bcast.grad_x_reduce_idx();
   reduce_dims[1] = bcast.grad_y_reduce_idx();
 
+  TF_RETURN_IF_ERROR(CheckAttrExists(node, "T"));
   const DataType type = node.attr().at("T").type();
   NodeDef* out[2];
   for (int j = 0; j < 2; ++j) {
@@ -790,7 +797,8 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
     if (is_const) {
       // Don't fold strings constants for now since this causes problems with
       // checkpointing.
-      if (input_node->attr().at("dtype").type() == DT_STRING) {
+      if (input_node->attr().count("dtype") == 0 ||
+          input_node->attr().at("dtype").type() == DT_STRING) {
         return false;
       }
       // Special case: If a Merge node has at least one constant input that
@@ -985,6 +993,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
                     strings::StrCat("Can't fold ", node.name(), ", its ", input,
                                     " isn't constant"));
     }
+    TF_RETURN_IF_ERROR(CheckAttrExists(*input_node, "value"));
     const TensorProto& raw_val = input_node->attr().at("value").tensor();
     Tensor* value = new Tensor(raw_val.dtype(), raw_val.tensor_shape());
     CHECK(value->FromProto(raw_val));
@@ -1398,16 +1407,13 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const {
   if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
     return false;
   }
-  if (node.op() == "OnesLike") {
-    return true;
-  }
+  if (node.op() == "OnesLike") return true;
   if (node.op() == "Fill") {
     NodeDef* values = node_map_->GetNode(NodeName(node.input(1)));
     return values != nullptr && IsOnes(*values);
   }
-  if (node.op() != "Const") {
-    return false;
-  }
+  if (node.op() != "Const") return false;
+  if (node.attr().count("dtype") == 0) return false;
   const auto dtype = node.attr().at("dtype").type();
   switch (dtype) {
     IS_ONES_CASE(DT_BOOL);
@@ -1434,16 +1440,13 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const {
   if (feed_nodes_.find(node.name()) != feed_nodes_.end()) {
     return false;
   }
-  if (node.op() == "ZerosLike") {
-    return true;
-  }
+  if (node.op() == "ZerosLike") return true;
   if (node.op() == "Fill") {
     NodeDef* values = node_map_->GetNode(NodeName(node.input(1)));
     return values != nullptr && IsZeros(*values);
   }
-  if (!IsConstant(node)) {
-    return false;
-  }
+  if (!IsConstant(node)) return false;
+  if (node.attr().count("dtype") == 0) return false;
   const auto dtype = node.attr().at("dtype").type();
   switch (dtype) {
     IS_ZEROS_CASE(DT_BOOL);
@@ -1737,11 +1740,11 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node,
 bool ConstantFolding::RemoveSplitOrSplitV(const GraphProperties& properties,
                                           GraphDef* optimized_graph,
                                           NodeDef* node) {
+  if (node->attr().count("num_split") == 0) return false;
   if (IsSplit(*node) && node->attr().at("num_split").i() == 1) {
     ReplaceOperationWithIdentity(1, properties, node, optimized_graph);
     return true;
   }
-
   if (IsSplitV(*node) && node->attr().at("num_split").i() == 1) {
     ReplaceOperationWithIdentity(0, properties, node, optimized_graph);
     return true;
@@ -1918,6 +1921,8 @@ Status ConstantFolding::SimplifyStridedSlice(const GraphProperties& properties,
                                              NodeDef* node, bool* success) {
   if (use_shape_info && IsStridedSlice(*node) &&
       properties.GetInputProperties(node->name()).size() == 4) {
+    TF_RETURN_IF_ERROR(
+        CheckAttrsExist(*node, {"new_axis_mask", "shrink_axis_mask"}));
     if (node->attr().at("new_axis_mask").i() != 0 ||
         node->attr().at("shrink_axis_mask").i() != 0) {
       // Skip nodes with new/shrink axis mask, since they involve dimension
@@ -1952,6 +1957,8 @@ Status ConstantFolding::SimplifyStridedSlice(const GraphProperties& properties,
         return errors::InvalidArgument("Cannot parse tensor from proto: ",
                                        s.value().DebugString());
       }
+      TF_RETURN_IF_ERROR(
+          CheckAttrsExist(*node, {"begin_mask", "end_mask", "ellipsis_mask"}));
       int begin_mask = node->attr().at("begin_mask").i();
       int end_mask = node->attr().at("end_mask").i();
       std::set<int> expanded_ellipsis_indices;
@@ -2280,7 +2287,7 @@ bool ConstantFolding::SimplifyReduction(const GraphProperties& properties,
     // Replace the reduction node with an identity node, that can be further
     // optimized by the model pruner.
     DataType output_type;
-    if (node->attr().count("T") > 0) {
+    if (node->attr().count("T") != 0) {
       output_type = node->attr().at("T").type();
     } else {
       // This is an 'any' or 'all' reduction. The output is always boolean.
@@ -2297,8 +2304,10 @@ bool ConstantFolding::SimplifyReduction(const GraphProperties& properties,
 
 bool ConstantFolding::SimplifyReshape(const GraphProperties& properties,
                                       bool use_shape_info, NodeDef* node) {
-  if (!use_shape_info) return false;
-  if (!IsSimplifiableReshape(*node, properties)) return false;
+  if (!use_shape_info || node->attr().count("T") == 0 ||
+      !IsSimplifiableReshape(*node, properties)) {
+    return false;
+  }
   DataType output_type = node->attr().at("T").type();
   node->set_op("Identity");
   node->clear_attr();
@@ -2310,6 +2319,7 @@ bool ConstantFolding::SimplifyReshape(const GraphProperties& properties,
 Status ConstantFolding::SimplifyArithmeticOperations(
     const GraphProperties& properties, bool use_shape_info,
     GraphDef* optimized_graph, NodeDef* node, bool* success) {
+  *success = false;
   const bool is_mul = IsMul(*node) || IsLogicalAnd(*node);
   const bool is_matmul = IsMatMul(*node);
   const bool is_add = IsAdd(*node) || IsBiasAdd(*node) || IsLogicalOr(*node);
@@ -2354,6 +2364,7 @@ Status ConstantFolding::SimplifyArithmeticOperations(
 
     // Replace 1 / y with Reciprocal op.
     if (y_matches_output_shape && is_any_div && x_is_one) {
+      TF_RETURN_IF_ERROR(CheckAttrExists(*node, "T"));
       DataType type = node->attr().at("T").type();
       if (DataTypeIsFloating(type) || DataTypeIsComplex(type)) {
         ReplaceDivisionOfOnesByReciprocal(node, optimized_graph);
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 5867d01324..e803e2ac71 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -547,5 +547,20 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) {
 
 #undef HANDLE_CASE
 
+Status CheckAttrExists(const NodeDef& node, const string& key) {
+  if (node.attr().count(key) == 0) {
+    return errors::InvalidArgument("Node '", node.name(), "' lacks '", key,
+                                   "' attr: ", node.ShortDebugString());
+  }
+  return Status::OK();
+}
+
+Status CheckAttrsExist(const NodeDef& node, absl::Span<const string> keys) {
+  for (const string& key : keys) {
+    TF_RETURN_IF_ERROR(CheckAttrExists(node, key));
+  }
+  return Status::OK();
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 0168ab1da3..39319eacb7 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -17,8 +17,12 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_UTILS_H_
 
 #include <functional>
+#include <iterator>
+#include <set>
+#include <unordered_set>
+#include <utility>
 #include <vector>
-
+#include "absl/types/span.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -29,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -244,6 +249,12 @@ int NumNonControlDataOutputs(const NodeDef& node, const NodeMap& node_map);
 // Removes redundant control inputs from node.
 void DedupControlInputs(NodeDef* node);
 
+// Returns an error if an attribute with the given key does not exist in node.
+Status CheckAttrExists(const NodeDef& node, const string& key);
+
+// Returns an error if attributes with the given keys do not exist in node.
+Status CheckAttrsExist(const NodeDef& node, absl::Span<const string> keys);
+
 // Returns the data type in attribute `attr_name` of `node`. If that attribute
 // doesn't exist, returns DT_INVALID.
 DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name);
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index 9b6c1f690b..447195b001 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/utils.h"
+
+#include <unistd.h>
+#include <memory>
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -24,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/platform/notification.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -350,6 +354,32 @@ TEST_F(UtilsTest, NumNonControlOutputs) {
   EXPECT_EQ(1, NumNonControlDataOutputs(*add_node, node_map));
 }
 
+TEST(CheckAttrExists, All) {
+  NodeDef node;
+  node.set_name("node");
+  (*node.mutable_attr())["apple"].set_i(7);
+  (*node.mutable_attr())["pear"].set_b(true);
+
+  TF_EXPECT_OK(CheckAttrExists(node, "apple"));
+  TF_EXPECT_OK(CheckAttrExists(node, "pear"));
+
+  TF_EXPECT_OK(CheckAttrsExist(node, {}));
+  TF_EXPECT_OK(CheckAttrsExist(node, {"apple"}));
+  TF_EXPECT_OK(CheckAttrsExist(node, {"pear"}));
+  TF_EXPECT_OK(CheckAttrsExist(node, {"apple", "pear"}));
+  TF_EXPECT_OK(CheckAttrsExist(node, {"pear", "apple"}));
+
+  Status status = CheckAttrExists(node, "banana");
+  EXPECT_FALSE(status.ok());
+  EXPECT_EQ(status.ToString(),
+            "Invalid argument: Node 'node' lacks 'banana' attr: name: \"node\" "
+            "attr { key: \"apple\" value { i: 7 } } attr { key: \"pear\" value "
+            "{ b: true } }");
+  EXPECT_FALSE(CheckAttrsExist(node, {""}).ok());
+  EXPECT_FALSE(CheckAttrsExist(node, {"pear", "cherry"}).ok());
+  EXPECT_FALSE(CheckAttrsExist(node, {"banana", "apple"}).ok());
+}
+
 TEST_F(UtilsTest, DeleteNodes) {
   // TODO(rmlarsen): write forgotten test.
 }
-- 
GitLab


From b306ad9846238b7a396694c07510a1fc161627b2 Mon Sep 17 00:00:00 2001
From: Alexey Radul <axch@google.com>
Date: Tue, 16 Oct 2018 17:57:09 -0700
Subject: [PATCH 1073/1085] Extract nested functions functions in anf_test.py
 that include `exec` to toplevel, for a baroque compatibility reason.

PiperOrigin-RevId: 217420773
---
 .../pyct/common_transformers/anf_test.py      | 39 ++++++++++++-------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/common_transformers/anf_test.py b/tensorflow/python/autograph/pyct/common_transformers/anf_test.py
index ccc7e4ca8f..525d4886de 100644
--- a/tensorflow/python/autograph/pyct/common_transformers/anf_test.py
+++ b/tensorflow/python/autograph/pyct/common_transformers/anf_test.py
@@ -43,6 +43,29 @@ class DummyGensym(object):
     return stem + '_' + str(1000 + self._idx)
 
 
+# These two test functions have to be top-level, not nested, for compatibility
+# with some unknown version of Python 2.7 preceding 2.7.15.  Why?  Because
+# `exec` and nested function definitions _incomaptibly_ change the
+# representation of local variables, such that `exec` inside a nested function
+# definition is a syntax error in that version.  The tuple form of `exec` fixes
+# this problem, but apparently that was introduced in some unknown version of
+# Python that's more recent than at least one version that we wish to be
+# compatible with.
+def exec_test_function():
+  # The point is to test A-normal form conversion of exec
+  # pylint: disable=exec-used
+  exec('computed' + 5 + 'stuff', globals(), locals())
+
+
+def exec_expected_result():
+  # pylint: disable=exec-used
+  tmp_1001 = 'computed' + 5
+  tmp_1002 = tmp_1001 + 'stuff'
+  tmp_1003 = globals()
+  tmp_1004 = locals()
+  exec(tmp_1002, tmp_1003, tmp_1004)
+
+
 class AnfTransformerTest(test.TestCase):
 
   def _simple_source_info(self):
@@ -357,21 +380,7 @@ class AnfTransformerTest(test.TestCase):
     self.assert_body_anfs_as_expected(expected_result, test_function)
 
   def test_exec(self):
-
-    def test_function():
-      # The point is to test A-normal form conversion of exec
-      # pylint: disable=exec-used
-      exec('computed' + 5 + 'stuff', globals(), locals())
-
-    def expected_result():
-      # pylint: disable=exec-used
-      tmp_1001 = 'computed' + 5
-      tmp_1002 = tmp_1001 + 'stuff'
-      tmp_1003 = globals()
-      tmp_1004 = locals()
-      exec(tmp_1002, tmp_1003, tmp_1004)
-
-    self.assert_body_anfs_as_expected(expected_result, test_function)
+    self.assert_body_anfs_as_expected(exec_expected_result, exec_test_function)
 
   def test_simple_while_and_assert(self):
 
-- 
GitLab


From bddb651ed737f937d16dc93828469cf3abe331b2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 18:03:57 -0700
Subject: [PATCH 1074/1085] Move from deprecated self.test_session() to
 self.session() or self.cached_session().

Move to cached_session() if the session is create more than once per test. Move to session() otherwise.

self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to session() instead which slightly changes the semantic of the function:
* the session is not cached anymore (a new session is created).
* the session is closed when exiting the "with" scope.

PiperOrigin-RevId: 217421579
---
 .../python/kernel_tests/accumulate_n_test.py  |  8 +-
 .../python/kernel_tests/aggregate_ops_test.py |  6 +-
 .../python/kernel_tests/argmax_op_test.py     |  6 +-
 .../python/kernel_tests/array_ops_test.py     | 58 ++++++------
 .../python/kernel_tests/atrous_conv2d_test.py | 10 +-
 .../python/kernel_tests/basic_gpu_test.py     | 12 +--
 .../kernel_tests/batch_gather_op_test.py      | 10 +-
 .../kernel_tests/batch_matmul_op_test.py      |  4 +-
 .../kernel_tests/batch_scatter_ops_test.py    |  6 +-
 .../python/kernel_tests/bias_op_test.py       |  6 +-
 .../python/kernel_tests/bincount_op_test.py   | 12 +--
 .../python/kernel_tests/bitcast_op_test.py    |  2 +-
 .../kernel_tests/broadcast_to_ops_test.py     | 12 +--
 .../python/kernel_tests/bucketize_op_test.py  |  8 +-
 .../python/kernel_tests/cast_op_test.py       |  6 +-
 .../python/kernel_tests/cholesky_op_test.py   | 10 +-
 .../python/kernel_tests/clip_ops_test.py      | 52 +++++------
 .../compare_and_bitpack_op_test.py            |  2 +-
 .../python/kernel_tests/concat_op_test.py     | 36 ++++----
 .../python/kernel_tests/cond_v2_test.py       |  6 +-
 .../python/kernel_tests/constant_op_test.py   | 22 ++---
 .../kernel_tests/control_flow_ops_py_test.py  | 26 +++---
 tensorflow/python/kernel_tests/conv1d_test.py |  2 +-
 .../conv2d_backprop_filter_grad_test.py       |  2 +-
 .../kernel_tests/conv2d_transpose_test.py     |  6 +-
 .../python/kernel_tests/conv_ops_3d_test.py   |  8 +-
 .../python/kernel_tests/conv_ops_test.py      | 10 +-
 .../kernel_tests/ctc_decoder_ops_test.py      |  2 +-
 .../python/kernel_tests/ctc_loss_op_test.py   |  8 +-
 .../python/kernel_tests/dct_ops_test.py       |  2 +-
 .../kernel_tests/decode_image_op_test.py      |  8 +-
 .../python/kernel_tests/denormal_test.py      |  2 +-
 .../kernel_tests/dense_update_ops_test.py     |  6 +-
 .../kernel_tests/depthtospace_op_test.py      | 12 +--
 .../kernel_tests/depthwise_conv_op_test.py    | 12 +--
 .../kernel_tests/determinant_op_test.py       |  4 +-
 .../python/kernel_tests/diag_op_test.py       | 44 ++++-----
 .../kernel_tests/draw_bounding_box_op_test.py |  2 +-
 .../kernel_tests/dynamic_partition_op_test.py | 26 +++---
 .../kernel_tests/dynamic_stitch_op_test.py    | 18 ++--
 .../kernel_tests/edit_distance_op_test.py     |  4 +-
 .../python/kernel_tests/embedding_ops_test.py | 18 ++--
 .../extract_image_patches_op_test.py          |  2 +-
 .../extract_volume_patches_op_test.py         |  2 +-
 .../python/kernel_tests/fft_ops_test.py       | 14 +--
 .../python/kernel_tests/fifo_queue_test.py    |  2 +-
 .../python/kernel_tests/gather_nd_op_test.py  | 40 ++++----
 .../python/kernel_tests/gather_op_test.py     | 16 ++--
 .../python/kernel_tests/init_ops_test.py      | 45 +++++----
 .../python/kernel_tests/inplace_ops_test.py   | 18 ++--
 .../kernel_tests/large_concat_op_test.py      |  2 +-
 .../python/kernel_tests/linalg_grad_test.py   |  4 +-
 tensorflow/python/kernel_tests/losses_test.py |  6 +-
 tensorflow/python/kernel_tests/lrn_op_test.py |  6 +-
 .../python/kernel_tests/map_stage_op_test.py  | 24 ++---
 .../python/kernel_tests/matmul_op_test.py     |  4 +-
 .../kernel_tests/matrix_band_part_op_test.py  |  4 +-
 .../matrix_exponential_op_test.py             |  6 +-
 .../kernel_tests/matrix_inverse_op_test.py    |  4 +-
 .../kernel_tests/matrix_logarithm_op_test.py  |  4 +-
 .../kernel_tests/matrix_solve_ls_op_test.py   |  6 +-
 .../kernel_tests/matrix_solve_op_test.py      | 10 +-
 .../matrix_triangular_solve_op_test.py        |  2 +-
 .../kernel_tests/morphological_ops_test.py    |  8 +-
 .../neon_depthwise_conv_op_test.py            |  4 +-
 .../python/kernel_tests/norm_op_test.py       |  2 +-
 .../kernel_tests/nth_element_op_test.py       | 10 +-
 .../python/kernel_tests/numerics_test.py      |  6 +-
 .../python/kernel_tests/one_hot_op_test.py    |  2 +-
 tensorflow/python/kernel_tests/pad_op_test.py | 30 +++---
 .../parameterized_truncated_normal_op_test.py |  6 +-
 .../partitioned_variables_test.py             |  4 +-
 tensorflow/python/kernel_tests/pool_test.py   | 16 ++--
 .../kernel_tests/pooling_ops_3d_test.py       |  4 +-
 .../python/kernel_tests/pooling_ops_test.py   | 32 +++----
 .../python/kernel_tests/py_func_test.py       |  2 +-
 tensorflow/python/kernel_tests/qr_op_test.py  |  6 +-
 .../python/kernel_tests/reader_ops_test.py    |  2 +-
 .../python/kernel_tests/reduction_ops_test.py | 42 ++++-----
 .../python/kernel_tests/relu_op_test.py       | 14 +--
 .../python/kernel_tests/reshape_op_test.py    |  2 +-
 .../kernel_tests/reverse_sequence_op_test.py  |  2 +-
 tensorflow/python/kernel_tests/rnn_test.py    |  4 +-
 .../python/kernel_tests/scan_ops_test.py      | 16 ++--
 .../kernel_tests/scatter_nd_ops_test.py       | 16 ++--
 .../python/kernel_tests/scatter_ops_test.py   |  8 +-
 .../segment_reduction_ops_test.py             | 76 +++++++--------
 .../kernel_tests/self_adjoint_eig_op_test.py  |  8 +-
 .../python/kernel_tests/shape_ops_test.py     | 42 ++++-----
 .../python/kernel_tests/slice_op_test.py      | 28 +++---
 .../python/kernel_tests/softmax_op_test.py    |  6 +-
 .../python/kernel_tests/softplus_op_test.py   |  2 +-
 .../python/kernel_tests/softsign_op_test.py   |  2 +-
 .../kernel_tests/spacetobatch_op_test.py      |  8 +-
 .../kernel_tests/spacetodepth_op_test.py      | 12 +--
 .../python/kernel_tests/sparse_add_op_test.py | 14 +--
 .../kernel_tests/sparse_concat_op_test.py     | 20 ++--
 .../kernel_tests/sparse_matmul_op_test.py     |  2 +-
 .../python/kernel_tests/sparse_ops_test.py    | 86 ++++++++---------
 .../kernel_tests/sparse_reorder_op_test.py    | 10 +-
 .../kernel_tests/sparse_reshape_op_test.py    | 32 +++----
 .../sparse_serialization_ops_test.py          | 24 ++---
 .../kernel_tests/sparse_slice_op_test.py      | 14 +--
 .../kernel_tests/sparse_split_op_test.py      | 14 +--
 .../sparse_tensor_dense_matmul_grad_test.py   |  2 +-
 .../sparse_tensor_dense_matmul_op_test.py     |  6 +-
 .../sparse_tensors_map_ops_test.py            |  8 +-
 .../sparse_to_dense_op_py_test.py             | 18 ++--
 .../kernel_tests/sparse_xent_op_test.py       | 22 ++---
 .../python/kernel_tests/split_op_test.py      | 14 +--
 .../python/kernel_tests/stack_op_test.py      | 36 ++++----
 .../python/kernel_tests/stack_ops_test.py     | 28 +++---
 .../python/kernel_tests/stage_op_test.py      | 14 +--
 .../kernel_tests/string_length_op_test.py     |  4 +-
 tensorflow/python/kernel_tests/svd_op_test.py |  6 +-
 .../kernel_tests/tensor_array_ops_test.py     | 92 +++++++++----------
 .../python/kernel_tests/tensordot_op_test.py  |  4 +-
 .../python/kernel_tests/topk_op_test.py       |  6 +-
 .../python/kernel_tests/trace_op_test.py      |  2 +-
 .../python/kernel_tests/transpose_op_test.py  | 20 ++--
 .../python/kernel_tests/unstack_op_test.py    |  8 +-
 .../python/kernel_tests/variables_test.py     |  2 +-
 .../python/kernel_tests/where_op_test.py      |  8 +-
 .../python/kernel_tests/xent_op_test.py       | 10 +-
 .../python/kernel_tests/zero_division_test.py |  2 +-
 125 files changed, 843 insertions(+), 844 deletions(-)

diff --git a/tensorflow/python/kernel_tests/accumulate_n_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py
index 0bc5268f38..7889edc198 100644
--- a/tensorflow/python/kernel_tests/accumulate_n_test.py
+++ b/tensorflow/python/kernel_tests/accumulate_n_test.py
@@ -36,7 +36,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
     np.random.seed(12345)
     x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)]
     tf_x = ops.convert_n_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval())
       self.assertAllClose(x[0] * 5,
                           math_ops.accumulate_n([tf_x[0]] * 5).eval())
@@ -45,13 +45,13 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
     np.random.seed(54321)
     x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)]
     tf_x = ops.convert_n_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval())
       self.assertAllEqual(x[0] * 6,
                           math_ops.accumulate_n([tf_x[0]] * 6).eval())
 
   def testUnknownShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = array_ops.placeholder(dtype=dtypes_lib.int32, shape=[None])
       acc = math_ops.accumulate_n([x0, x0], shape=[None])
       self.assertAllEqual([2, 4], acc.eval(feed_dict={x0: [1, 2]}))
@@ -59,7 +59,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
   def testGrad(self):
     np.random.seed(42)
     for num_inputs in range(1, 10):
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         input_vars = [
             variables.Variable(10.0 * np.random.random())
             for _ in range(0, num_inputs)
diff --git a/tensorflow/python/kernel_tests/aggregate_ops_test.py b/tensorflow/python/kernel_tests/aggregate_ops_test.py
index 72dff6b3da..0f15319cb5 100644
--- a/tensorflow/python/kernel_tests/aggregate_ops_test.py
+++ b/tensorflow/python/kernel_tests/aggregate_ops_test.py
@@ -57,7 +57,7 @@ class AddNTest(test.TestCase):
 
   def testAddN(self):
     np.random.seed(12345)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in self._supported_types():
         for count in range(1, self._MAX_N + 1):
           data = [self._buildData((2, 2), dtype) for _ in range(count)]
@@ -69,7 +69,7 @@ class AddNTest(test.TestCase):
 
   def testUnknownShapes(self):
     np.random.seed(12345)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for dtype in self._supported_types():
         data = self._buildData((2, 2), dtype)
         for count in range(1, self._MAX_N + 1):
@@ -96,7 +96,7 @@ class AddNTest(test.TestCase):
 
     # TODO(ebrevdo): Re-enable use_gpu=True once non-DMA Variant
     # copying between CPU and GPU is supported.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       variant_const_3 = create_constant_variant(3)
       variant_const_4 = create_constant_variant(4)
       variant_const_5 = create_constant_variant(5)
diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py
index 127d14c250..fa370c17b4 100644
--- a/tensorflow/python/kernel_tests/argmax_op_test.py
+++ b/tensorflow/python/kernel_tests/argmax_op_test.py
@@ -34,7 +34,7 @@ class ArgMaxTest(test.TestCase):
                expected_values,
                use_gpu=False,
                expected_err_re=None):
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       ans = method(x, axis=axis)
       if expected_err_re is None:
         tf_ans = ans.eval()
@@ -77,7 +77,7 @@ class ArgMaxTest(test.TestCase):
   def testFloatInt32Output(self):
     x = np.asarray(100 * np.random.randn(200), dtype=np.float32)
     expected_values = x.argmax()
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ans = math_ops.argmax(x, axis=0, output_type=dtypes.int32)
       tf_ans = ans.eval()
       self.assertEqual(np.int32, tf_ans.dtype)
@@ -85,7 +85,7 @@ class ArgMaxTest(test.TestCase):
       # the values don't have a range that exceeds 32-bit integers.
       self.assertAllEqual(tf_ans, expected_values)
     expected_values = x.argmin()
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ans = math_ops.argmin(x, axis=0, output_type=dtypes.int32)
       tf_ans = ans.eval()
       self.assertEqual(np.int32, tf_ans.dtype)
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index dcc594789e..78fc091cf8 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -283,7 +283,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
   def testReverse0DimAuto(self):
     x_np = 4
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         x_tf = array_ops.reverse_v2(x_np, []).eval()
         self.assertAllEqual(x_tf, x_np)
 
@@ -292,7 +292,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
 
     for use_gpu in [False, True]:
       for axis_dtype in [dtypes.int32, dtypes.int64]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           x_tf = array_ops.reverse_v2(x_np,
                                       constant_op.constant(
                                           [0], dtype=axis_dtype)).eval()
@@ -304,7 +304,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
       for use_gpu in [False, True]:
         for axis_dtype in [dtypes.int32, dtypes.int64]:
-          with self.test_session(use_gpu=use_gpu):
+          with self.cached_session(use_gpu=use_gpu):
             x_tf_1 = reverse_f(x_np, constant_op.constant(
                 [0], dtype=axis_dtype)).eval()
             x_tf_2 = reverse_f(x_np, constant_op.constant(
@@ -391,7 +391,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
 
   def testReverseRowsOf3Channels(self):
     """Tests optimized code for reversing rows with last dim size = 3."""
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in (1, 2):
           for middle_size in list(range(50)) + [100000]:
@@ -403,7 +403,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
             self.assertAllEqual(x_tf, np_answer)
 
   def testReverseRowsOf4Channels(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in (1, 2):
           for middle_size in list(range(50)) + [100000]:
@@ -415,7 +415,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
             self.assertAllEqual(x_tf, np_answer)
 
   def testReverseColumnsOf3Channels(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in list(range(50)) + [100000]:
           for middle_size in (1, 2):
@@ -433,7 +433,7 @@ class MeshgridTest(test_util.TensorFlowTestCase):
     for index in ("ij", "xy"):
       numpy_out = np.meshgrid(x, y, indexing=index)
       tf_out = array_ops.meshgrid(x, y, indexing=index)
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         for xx, yy in zip(numpy_out, tf_out):
           self.assertAllEqual(xx, yy.eval())
 
@@ -446,7 +446,7 @@ class MeshgridTest(test_util.TensorFlowTestCase):
           x += 1j
         inputs.append(x)
       numpy_out = np.meshgrid(*inputs, indexing=index)
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_out = array_ops.meshgrid(*inputs, indexing=index)
         for x_np, x_tf in zip(numpy_out, tf_out):
           self.assertAllEqual(x_np, x_tf.eval())
@@ -523,7 +523,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
 
   def test_basic_slice(self):
     for tensor_type in STRIDED_SLICE_TYPES:
-      with self.test_session(use_gpu=not tensor_type.is_integer):
+      with self.cached_session(use_gpu=not tensor_type.is_integer):
         checker = StridedSliceChecker(
             self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
         _ = checker[:, :, :]
@@ -551,7 +551,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
   def testInt64GPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
-    with self.test_session(use_gpu=True, force_gpu=True):
+    with self.session(use_gpu=True, force_gpu=True):
       x = constant_op.constant([1., 2., 3.])
       begin = constant_op.constant([2], dtype=dtypes.int64)
       end = constant_op.constant([3], dtype=dtypes.int64)
@@ -576,7 +576,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       v[0]  # pylint: disable=pointless-statement
 
   def testDegenerateSlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
       # degenerate by offering a forward interval with a negative stride
       _ = checker[0:-1:-1, :, :]
@@ -586,7 +586,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[-1:0, 2:2, 2:3:-1]
 
   def testEllipsis(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2], [3, 4], [5, 6]]], [[[7, 8], [9, 10], [11, 12]]]]]
       checker = StridedSliceChecker(self, raw)
 
@@ -606,7 +606,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
         _ = checker[..., :, ...].eval()
 
   def testShrink(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -616,7 +616,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[:, :, 0]
 
   def testBothNewAxisAndShrink(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ones = array_ops.placeholder(shape=[2, 2], dtype=dtypes.int16)
       self.assertAllEqual(
           ones[array_ops.newaxis, :, 0].eval(
@@ -624,7 +624,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
           [[1, 1]])
 
   def testTensorIndexing(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw, check_type_infer=False)
@@ -640,7 +640,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[..., 3]
 
   def testExpand(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -657,7 +657,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[np.newaxis, ..., np.newaxis]
 
   def testExpandVariable(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = variables.Variable(7, dtype=dtypes.int32)
       x.initializer.run()
       y = x[None].eval()
@@ -665,7 +665,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(y, (7,))
 
   def testOptimizedCases(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       checker = StridedSliceChecker(self,
                                     StridedSliceChecker.REF_TENSOR_ALIGNED)
       # Identity
@@ -694,7 +694,7 @@ class StridedSliceShapeTest(test_util.TensorFlowTestCase):
   """Test the shape inference of StridedSliceShapes."""
 
   def testUnknown(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       uncertain_tensor = array_ops.placeholder(dtypes.float32)
       a = StridedSliceShapeChecker(uncertain_tensor)
       a_slice_shape = a[...]
@@ -705,7 +705,7 @@ class StridedSliceShapeTest(test_util.TensorFlowTestCase):
     self.assertEqual(x.as_list(), y.as_list())
 
   def testTensorShapeUncertain(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       uncertain_tensor = array_ops.placeholder(
           dtypes.float32, shape=(5, None, 7))
       a = StridedSliceShapeChecker(uncertain_tensor)
@@ -728,7 +728,7 @@ class StridedSliceShapeTest(test_util.TensorFlowTestCase):
                             tensor_shape.TensorShape([5, None, 1, 4]))
 
   def testTensorValuedIndexShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       defined_shape_tensor = array_ops.placeholder(
           dtypes.float32, shape=(5, 3, 7))
       index_value = array_ops.placeholder(dtypes.int32, shape=())
@@ -784,7 +784,7 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase):
   """Test that strided slice's custom gradient produces correct gradients."""
 
   def testGradient(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       var = variables.Variable(
           array_ops.reshape(
               math_ops.range(1, 97, 1, dtype=dtypes.float32), shape=(6, 4, 4)))
@@ -805,7 +805,7 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase):
         _ = grad[:, 200, :]
 
   def testGradientZero(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       var = variables.Variable(8.)
       init = variables.global_variables_initializer()
       sess.run(init)
@@ -813,7 +813,7 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase):
       _ = grad[tuple()]
 
   def testInt64Indices(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       a = math_ops.range(3, dtype=dtypes.float32)
       index = constant_op.constant(1, dtype=dtypes.int64)
       b = 2. * a[index]
@@ -825,7 +825,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
   """Test varied index types and host located memory."""
 
   def testHostVsDevice(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       var2 = variables.Variable(
           array_ops.reshape(
               math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
@@ -839,7 +839,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
       sess.run(foo)
 
   def testInt64Shape(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       original_dy = array_ops.reshape(
           math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
           shape=(4, 1, 1))
@@ -853,7 +853,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
       sess.run(dx)
 
   def testMixedIndexTypes(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       original_dy = array_ops.reshape(
           math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
           shape=(4, 1, 1))
@@ -1212,7 +1212,7 @@ class InvertPermutationTest(test_util.TensorFlowTestCase):
 
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
         y = array_ops.invert_permutation(x)
         self.assertAllEqual(y.get_shape(), [5])
@@ -1278,7 +1278,7 @@ class SnapshotOpTest(test_util.TensorFlowTestCase):
 
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
         y = gen_array_ops.snapshot(x)
         self.assertAllEqual(y.eval(), [0, 1, 2, 3])
diff --git a/tensorflow/python/kernel_tests/atrous_conv2d_test.py b/tensorflow/python/kernel_tests/atrous_conv2d_test.py
index ab1d698f6e..1d82b3d058 100644
--- a/tensorflow/python/kernel_tests/atrous_conv2d_test.py
+++ b/tensorflow/python/kernel_tests/atrous_conv2d_test.py
@@ -59,7 +59,7 @@ def _upsample_filters(filters, rate):
 class AtrousConv2DTest(test.TestCase):
 
   def testAtrousConv2DForward(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Input: [batch, height, width, input_depth]
       height = 9
       for width in [9, 10]:  # Test both odd and even width.
@@ -105,7 +105,7 @@ class AtrousConv2DTest(test.TestCase):
     padding = "SAME"  # The padding needs to be "SAME"
     np.random.seed(1)  # Make it reproducible.
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Input: [batch, height, width, input_depth]
       for height in range(15, 17):
         for width in range(15, 17):
@@ -134,7 +134,7 @@ class AtrousConv2DTest(test.TestCase):
               self.assertAllClose(y1.eval(), y2.eval(), rtol=1e-2, atol=1e-2)
 
   def testGradient(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Input: [batch, height, width, input_depth]
       x_shape = [2, 5, 6, 2]
       # Filter: [kernel_height, kernel_width, input_depth, output_depth]
@@ -161,7 +161,7 @@ class AtrousConv2DTest(test.TestCase):
 class AtrousConv2DTransposeTest(test.TestCase):
 
   def testAtrousConv2DTransposeForward(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Input: [batch, height, width, input_depth]
       height = 9
       for width in [9, 10]:  # Test both odd and even width.
@@ -200,7 +200,7 @@ class AtrousDepthwiseConv2DTest(test.TestCase):
 
   def testAtrousDepthwiseConv2DForward(self):
     strides = [1, 1, 1, 1]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Input: [batch, height, width, input_depth]
       height = 9
       for width in [9, 10]:  # Test both odd and even width.
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
index 67e8618198..225c1b35ae 100644
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -40,13 +40,13 @@ from tensorflow.python.platform import test
 class GPUBinaryOpsTest(test.TestCase):
 
   def _compareGPU(self, x, y, np_func, tf_func):
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = tf_func(inx, iny)
       tf_gpu = sess.run(out)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = tf_func(inx, iny)
@@ -93,7 +93,7 @@ class MathBuiltinUnaryTest(test.TestCase):
 
   def _compare(self, x, np_func, tf_func, use_gpu):
     np_out = np_func(x)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       inx = ops.convert_to_tensor(x)
       ofunc = tf_func(inx)
       tf_out = sess.run(ofunc)
@@ -143,7 +143,7 @@ class MathBuiltinUnaryTest(test.TestCase):
 
     np_out = np.floor_divide(x, y + 0.1)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y + 0.1)
       ofunc = inx / iny
@@ -156,7 +156,7 @@ class MathBuiltinUnaryTest(test.TestCase):
 class BroadcastSimpleTest(test.TestCase):
 
   def _GetGradientArgs(self, xs, ys):
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       return sess.run(broadcast_gradient_args(xs, ys))
 
   def testBroadcast(self):
@@ -210,7 +210,7 @@ class BroadcastSimpleTest(test.TestCase):
 
   def _compareGpu(self, x, y, np_func, tf_func):
     np_ans = np_func(x, y)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = tf_func(inx, iny)
diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/batch_gather_op_test.py
index 84e93b8136..547506d844 100644
--- a/tensorflow/python/kernel_tests/batch_gather_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_gather_op_test.py
@@ -44,7 +44,7 @@ class GatherTest(test.TestCase, parameterized.TestCase):
   def testSimpleGather(self, indices_dtype):
     data = np.array([0, 1, 2, 3, 7, 5, 8, 9, 10, 11, 15, 13])
     indices = [3, 4]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
@@ -60,7 +60,7 @@ class GatherTest(test.TestCase, parameterized.TestCase):
   def test2DArray(self, indices_dtype):
     data = np.array([[0, 1, 2, 3, 7, 5], [8, 9, 10, 11, 15, 13]])
     indices = [[3], [4]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
@@ -75,7 +75,7 @@ class GatherTest(test.TestCase, parameterized.TestCase):
   def testHigherRank(self):
     data = np.array([[[0, 1, 2], [3, 7, 5]], [[8, 9, 10], [11, 15, 13]]])
     indices = [[[2, 0], [1, 2]], [[2, 0], [0, 1]]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in _TEST_TYPES:
         params_np = self._buildParams(data, dtype)
         params = constant_op.constant(params_np)
@@ -101,13 +101,13 @@ class GatherTest(test.TestCase, parameterized.TestCase):
     self.assertEqual([1, None], gather_t.get_shape().as_list())
 
   def testBadIndicesCPU(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       params = [[0, 1, 2], [3, 4, 5]]
       with self.assertRaisesOpError(r"indices\[0\] = 7 is not in \[0, 2\)"):
         array_ops.batch_gather(params, [7]).eval()
 
   def testEmptySlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in _TEST_TYPES:
         for itype in np.int32, np.int64:
           params = np.zeros((7, 0, 0), dtype=dtype.as_numpy_dtype)
diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
index 34089e8dbe..8f6c089b42 100644
--- a/tensorflow/python/kernel_tests/batch_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
@@ -83,7 +83,7 @@ class BatchMatmulOpTest(test.TestCase):
     y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
     is_floating = x.dtype != np.int32
     tol = 100 * np.finfo(x.dtype).eps if is_floating else 0
-    with self.test_session(use_gpu=is_floating) as sess:
+    with self.cached_session(use_gpu=is_floating) as sess:
       if static_shape:
         z0 = math_ops.matmul(x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)
         z0_val = z0.eval()
@@ -154,7 +154,7 @@ class BatchMatmulGradientTest(test.TestCase):
     y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
     epsilon = np.finfo(x.dtype).eps
     delta = epsilon**(1.0 / 3.0)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = constant_op.constant(x)
       iny = constant_op.constant(y)
       z = math_ops.matmul(inx, iny, adjoint_a, adjoint_b)
diff --git a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
index 498e5f05a3..742a204883 100644
--- a/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/batch_scatter_ops_test.py
@@ -51,7 +51,7 @@ class ScatterTest(test.TestCase):
                         repeat_indices=False,
                         updates_are_scalar=False):
     np.random.seed(8)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       for indices_shape in (2,), (3, 7), (3, 4, 7):
         for extra_shape in (), (5,), (5, 9):
           # Generate random indices with no duplicates for easy numpy comparison
@@ -81,7 +81,7 @@ class ScatterTest(test.TestCase):
             state_ops.batch_scatter_update, vtype, itype)
 
   def testBooleanScatterUpdate(self):
-    with self.test_session(use_gpu=False) as session:
+    with self.session(use_gpu=False) as session:
       var = variables.Variable([True, False])
       update0 = state_ops.batch_scatter_update(var, [1], [True])
       update1 = state_ops.batch_scatter_update(
@@ -96,7 +96,7 @@ class ScatterTest(test.TestCase):
   def testScatterOutOfRange(self):
     params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32)
     updates = np.array([-3, -4, -5]).astype(np.float32)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       ref = variables.Variable(params)
       ref.initializer.run()
 
diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py
index 2427118407..749d6a791e 100644
--- a/tensorflow/python/kernel_tests/bias_op_test.py
+++ b/tensorflow/python/kernel_tests/bias_op_test.py
@@ -48,7 +48,7 @@ class BiasAddTest(test.TestCase):
 
   def _testBias(self, np_inputs, np_bias, use_gpu=False):
     np_val = self._npBias(np_inputs, np_bias)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_val = nn_ops.bias_add(np_inputs, np_bias).eval()
     self.assertAllCloseAccordingToType(np_val, tf_val)
 
@@ -76,7 +76,7 @@ class BiasAddTest(test.TestCase):
   def _testBiasNCHW(self, np_inputs, np_bias, use_gpu):
     np_val = self._npBias(np_inputs, np_bias)
     np_inputs = self._NHWCToNCHW(np_inputs)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_val = nn_ops.bias_add(np_inputs, np_bias, data_format="NCHW").eval()
     tf_val = self._NCHWToNHWC(tf_val)
     self.assertAllCloseAccordingToType(self._AtLeast3d(np_val), tf_val)
@@ -133,7 +133,7 @@ class BiasAddTest(test.TestCase):
           np.random.rand(4).astype(t))
 
   def _testGradient(self, np_input, bias, dtype, data_format, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if data_format == "NCHW":
         np_input = self._NHWCToNCHW(np_input)
       input_tensor = constant_op.constant(
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 8177cdd454..49eb835847 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -31,7 +31,7 @@ from tensorflow.python.platform import googletest
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
       self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
@@ -44,7 +44,7 @@ class BincountTest(test_util.TensorFlowTestCase):
           np.float64)
 
   def test_values(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
       arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
@@ -59,14 +59,14 @@ class BincountTest(test_util.TensorFlowTestCase):
           math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
 
   def test_maxlength(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
       self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1])
       self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
 
   def test_random_with_weights(self):
     num_samples = 10000
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       np.random.seed(42)
       for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
         arr = np.random.randint(0, 1000, num_samples)
@@ -79,7 +79,7 @@ class BincountTest(test_util.TensorFlowTestCase):
 
   def test_random_without_weights(self):
     num_samples = 10000
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       np.random.seed(42)
       for dtype in [np.int32, np.float32]:
         arr = np.random.randint(0, 1000, num_samples)
@@ -88,7 +88,7 @@ class BincountTest(test_util.TensorFlowTestCase):
             math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
 
   def test_zero_weights(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(
           math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
           np.zeros(1000))
diff --git a/tensorflow/python/kernel_tests/bitcast_op_test.py b/tensorflow/python/kernel_tests/bitcast_op_test.py
index a2c6b54273..79e0f36d24 100644
--- a/tensorflow/python/kernel_tests/bitcast_op_test.py
+++ b/tensorflow/python/kernel_tests/bitcast_op_test.py
@@ -28,7 +28,7 @@ from tensorflow.python.platform import test
 class BitcastTest(test.TestCase):
 
   def _testBitcast(self, x, datatype, shape):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_ans = array_ops.bitcast(x, datatype)
       out = tf_ans.eval()
       buff_after = memoryview(out).tobytes()
diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
index 09c325f2bc..5fe62a70d0 100644
--- a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
+++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py
@@ -32,21 +32,21 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
 
   def testBroadcastToBasic(self):
     for dtype in [np.uint8, np.uint16, np.int8, np.int16, np.int32, np.int64]:
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         x = np.array([1, 2, 3], dtype=dtype)
         v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3])
         v_np = np.broadcast_to(x, [3, 3])
         self.assertAllEqual(v_tf.eval(), v_np)
 
   def testBroadcastToString(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = np.array([b"1", b"2", b"3"])
       v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3])
       v_np = np.broadcast_to(x, [3, 3])
       self.assertAllEqual(v_tf.eval(), v_np)
 
   def testBroadcastToBool(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = np.array([True, False, True], dtype=np.bool)
       v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3])
       v_np = np.broadcast_to(x, [3, 3])
@@ -55,7 +55,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
   def testBroadcastToShape(self):
     for input_dim in range(1, 6):
       for output_dim in range(input_dim, 6):
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           input_shape = [2] * input_dim
           output_shape = [2] * output_dim
           x = np.array(np.random.randint(5, size=input_shape), dtype=np.int32)
@@ -64,7 +64,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
           self.assertAllEqual(v_tf.eval(), v_np)
 
   def testBroadcastToScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = np.array(1, dtype=np.int32)
       v_tf = array_ops.broadcast_to(constant_op.constant(x), [3, 3])
       v_np = np.broadcast_to(x, [3, 3])
@@ -72,7 +72,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase):
 
   def testBroadcastToShapeTypeAndInference(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = np.array([1, 2, 3])
         v_tf = array_ops.broadcast_to(
             constant_op.constant(x),
diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py
index e612b1c134..57413e6af5 100644
--- a/tensorflow/python/kernel_tests/bucketize_op_test.py
+++ b/tensorflow/python/kernel_tests/bucketize_op_test.py
@@ -31,7 +31,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5, 0, 2, 3, 5, 8, 10, 11, 12]),
         boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testFloat(self):
@@ -39,7 +39,7 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([-5., 0., 2., 3., 5., 8., 10., 11., 12.]),
         boundaries=[0., 3., 8., 11.])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def test2DInput(self):
@@ -47,13 +47,13 @@ class BucketizationOpTest(test.TestCase):
         constant_op.constant([[-5, 0, 2, 3, 5], [8, 10, 11, 12, 0]]),
         boundaries=[0, 3, 8, 11])
     expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       self.assertAllEqual(expected_out, sess.run(op))
 
   def testInvalidBoundariesOrder(self):
     op = math_ops._bucketize(
         constant_op.constant([-5, 0]), boundaries=[0, 8, 3, 11])
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError, "Expected sorted boundaries"):
         sess.run(op)
diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py
index c90520e46d..a5dff5df62 100644
--- a/tensorflow/python/kernel_tests/cast_op_test.py
+++ b/tensorflow/python/kernel_tests/cast_op_test.py
@@ -54,7 +54,7 @@ class CastOpTest(test.TestCase):
       return None
 
   def _cast(self, x, dtype, use_gpu=False):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       val = constant_op.constant(x, self._toDataType(np.array([x]).dtype))
       return math_ops.cast(val, self._toDataType(dtype), name="cast").eval()
 
@@ -105,10 +105,10 @@ class CastOpTest(test.TestCase):
 
   def testBfloat16(self):
     a = np.random.uniform(-100, 100, 100).astype(np.float32)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       b = math_ops.cast(math_ops.cast(a, dtypes.bfloat16), dtypes.float32)
       self.assertAllClose(a, b.eval(), rtol=1 / 128.)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       b = math_ops.cast(math_ops.cast(a, dtypes.bfloat16), dtypes.float32)
       self.assertAllClose(a, b.eval(), rtol=1 / 128.)
 
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index 2ebf74a4d7..e96b277266 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -111,7 +111,7 @@ class CholeskyOpTest(test.TestCase):
 
   def _verifyCholesky(self, x):
     # Verify that LL^T == x.
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       chol = linalg_ops.cholesky(x)
       verification = math_ops.matmul(chol, chol, adjoint_b=True)
       self._verifyCholeskyBase(sess, x, chol, verification)
@@ -162,7 +162,7 @@ class CholeskyOpTest(test.TestCase):
 
   def testNotInvertibleCPU(self):
     # The input should be invertible.
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError,
           "Cholesky decomposition was not successful. The"
@@ -176,7 +176,7 @@ class CholeskyOpTest(test.TestCase):
     self._verifyCholesky(np.empty([2, 0, 0]))
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       matrix1 = random_ops.random_normal([5, 5], seed=42)
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       matrix1 = math_ops.matmul(matrix1, matrix1, adjoint_a=True)
@@ -243,7 +243,7 @@ class CholeskyGradTest(test.TestCase):
     data = np.matmul(data, data.T)
     grad_data = np.random.randn(*data.shape).astype(np.float32)
 
-    with ops.Graph().as_default(), self.test_session(use_gpu=False) as s:
+    with ops.Graph().as_default(), self.session(use_gpu=False) as s:
       x = constant_op.constant(data, dtypes_lib.float32)
       chol = linalg_ops.cholesky(x)
       composite_grad = gradients_impl.gradients(chol, x, grad_data)[0]
@@ -256,7 +256,7 @@ class CholeskyGradTest(test.TestCase):
                            dtypes=(dtypes_lib.float32, dtypes_lib.float64,
                                    dtypes_lib.complex64, dtypes_lib.complex128),
                            scalarTest=False):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in shapes:
         for batch in False, True:
           for dtype in dtypes:
diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py
index bb7b645da2..efd7eee847 100644
--- a/tensorflow/python/kernel_tests/clip_ops_test.py
+++ b/tensorflow/python/kernel_tests/clip_ops_test.py
@@ -50,7 +50,7 @@ class ClipTest(test.TestCase):
 
   # ClipByValue test
   def testClipByValue(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
       np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]]
       clip_value = 4.4
@@ -65,7 +65,7 @@ class ClipTest(test.TestCase):
         dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8,
         dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16
     ]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
         np_ans = [[2, 2, 3], [4, 4, 4]]
         clip_value_min = 2
@@ -81,7 +81,7 @@ class ClipTest(test.TestCase):
         dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8,
         dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16
     ]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
         np_ans = [[2, 2, 3], [4, 4, 4]]
         clip_value_min = constant_op.constant(
@@ -98,7 +98,7 @@ class ClipTest(test.TestCase):
         dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8,
         dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16
     ]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
         np_ans = [[4, 4, 4], [4, 5, 6]]
         clip_value_min = 4
@@ -115,7 +115,7 @@ class ClipTest(test.TestCase):
         dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int8,
         dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16
     ]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype)
         np_ans = [[2, 2, 3], [5, 5, 6]]
         clip_value_min = constant_op.constant(
@@ -128,7 +128,7 @@ class ClipTest(test.TestCase):
       self.assertAllClose(np_ans, tf_ans)
 
   def testClipByValueBadShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
@@ -151,7 +151,7 @@ class ClipTest(test.TestCase):
   # ClipByNorm tests
   def testClipByNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]]
@@ -166,14 +166,14 @@ class ClipTest(test.TestCase):
     self.assertAllClose(np_ans, tf_ans_tensor)
 
   def testClipByNormGradientZeros(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.zeros([3])
       b = clip_ops.clip_by_norm(x, 1.)
       grad, = gradients_impl.gradients(b, x)
       self.assertAllEqual(grad.eval(), [1., 1., 1.])
 
   def testClipByNormBadShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1])
       # Use a nonsensical shape.
       clip = constant_op.constant([1.0, 2.0])
@@ -182,7 +182,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Norm of x = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -194,7 +194,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
@@ -206,7 +206,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim0(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3
       np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]]
@@ -218,7 +218,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormClippedWithDim1(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]]
@@ -230,7 +230,7 @@ class ClipTest(test.TestCase):
 
   def testClipByNormNotClippedWithAxes(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3])
       # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]]
@@ -243,7 +243,7 @@ class ClipTest(test.TestCase):
   # ClipByGlobalNorm tests
   def testClipByGlobalNormClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -264,7 +264,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormClippedTensor(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -285,7 +285,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormSupportsNone(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -308,7 +308,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormWithIndexedSlicesClipped(self):
     # Norm clipping when clip_norm < 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = ops.IndexedSlices(
           constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4]))
@@ -341,7 +341,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormNotClipped(self):
     # No norm clipping when clip_norm >= 5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
       # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5
@@ -360,7 +360,7 @@ class ClipTest(test.TestCase):
 
   def testClipByGlobalNormZero(self):
     # No norm clipping when norm = 0
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       x1 = constant_op.constant([0.0, 0.0])
       # Norm = 0, no changes
@@ -378,7 +378,7 @@ class ClipTest(test.TestCase):
     self.assertAllClose(np_ans_1, tf_ans_2)
 
   def testClipByGlobalNormInf(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x0 = constant_op.constant([-2.0, 0.0, np.inf, 4.0, 0.0, 0.0],
                                 shape=[2, 3])
       x1 = constant_op.constant([1.0, -2.0])
@@ -394,7 +394,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClipped(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -406,7 +406,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormClippedTensor(self):
     # Norm clipping when average clip_norm < 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]]
@@ -418,7 +418,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormNotClipped(self):
     # No norm clipping when average clip_norm >= 0.83333333
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
       # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333
       np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]]
@@ -430,7 +430,7 @@ class ClipTest(test.TestCase):
 
   def testClipByAverageNormZero(self):
     # No norm clipping when average clip_norm = 0
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3])
       # Average norm = 0, no changes
       np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
@@ -447,7 +447,7 @@ class ClipTest(test.TestCase):
     y = clip_ops.clip_by_value(zero, 1.0, 1.0)
     z = clip_ops.clip_by_value(zero, zero, 1.0)
     w = clip_ops.clip_by_value(zero, 1.0, zero)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run([x, y, z, w], feed_dict={zero: np.zeros((7, 0))})
 
 
diff --git a/tensorflow/python/kernel_tests/compare_and_bitpack_op_test.py b/tensorflow/python/kernel_tests/compare_and_bitpack_op_test.py
index 56ddd6e428..f27a0fc472 100644
--- a/tensorflow/python/kernel_tests/compare_and_bitpack_op_test.py
+++ b/tensorflow/python/kernel_tests/compare_and_bitpack_op_test.py
@@ -30,7 +30,7 @@ class CompareAndBitpackTest(test.TestCase):
                              x, threshold,
                              truth,
                              expected_err_re=None):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ans = math_ops.compare_and_bitpack(x, threshold)
       if expected_err_re is None:
         tf_ans = ans.eval()
diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py
index 0e59ce6972..92d09986e6 100644
--- a/tensorflow/python/kernel_tests/concat_op_test.py
+++ b/tensorflow/python/kernel_tests/concat_op_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.platform import test
 class ConcatOpTest(test.TestCase):
 
   def testHStack(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       p1 = array_ops.placeholder(dtypes.float32, shape=[4, 4])
       p2 = array_ops.placeholder(dtypes.float32, shape=[4, 4])
       c = array_ops.concat([p1, p2], 0)
@@ -50,7 +50,7 @@ class ConcatOpTest(test.TestCase):
     self.assertAllEqual(result[4:, :], params[p2])
 
   def testVStack(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       p1 = array_ops.placeholder(dtypes.float32, shape=[4, 4])
       p2 = array_ops.placeholder(dtypes.float32, shape=[4, 4])
       c = array_ops.concat([p1, p2], 1)
@@ -65,7 +65,7 @@ class ConcatOpTest(test.TestCase):
     self.assertAllEqual(result[:, 4:], params[p2])
 
   def testInt32GPU(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       p1 = np.random.rand(2, 3).astype("i")
       p2 = np.random.rand(2, 3).astype("i")
       x1 = constant_op.constant(p1)
@@ -76,7 +76,7 @@ class ConcatOpTest(test.TestCase):
     self.assertAllEqual(result[2:, :], p2)
 
   def testRefType(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       p1 = np.random.rand(4, 4).astype("f")
       p2 = np.random.rand(4, 4).astype("f")
       v1 = variables.Variable(p1)
@@ -101,7 +101,7 @@ class ConcatOpTest(test.TestCase):
       dtype_feed = dtypes.float32
     else:
       dtype_feed = dtype
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       p = []
       for i in np.arange(num_tensors):
         input_shape = shape
@@ -172,7 +172,7 @@ class ConcatOpTest(test.TestCase):
     # Test both positive and negative concat axis.
     # -2 and 1 correspond to the same axis for 3-dimensional tensors.
     for axis in [-2, 1]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = []
         inp_tensors = []
         for x in [1, 2, 6]:
@@ -203,7 +203,7 @@ class ConcatOpTest(test.TestCase):
     self._testGradientsSimple(dtypes.complex64)
 
   def testGradientsFirstDim(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       inp = []
       inp_tensors = []
       for x in [1, 2, 6]:
@@ -230,7 +230,7 @@ class ConcatOpTest(test.TestCase):
     # Test both positive and negative concat axis.
     # -1 and 2 correspond to the same axis for 3-dimensional tensors.
     for axis in [-1, 2]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = []
         inp_tensors = []
         for x in [1, 2, 6]:
@@ -261,7 +261,7 @@ class ConcatOpTest(test.TestCase):
     # Random dim to concat on
     concat_dim = np.random.randint(5)
     concat_dim_sizes = np.random.randint(1, 5, size=num_tensors)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inp = []
       inp_tensors = []
       for x in concat_dim_sizes:
@@ -288,7 +288,7 @@ class ConcatOpTest(test.TestCase):
       self._RunAndVerifyGradientsRandom()
 
   def testGradientWithUnknownInputDim(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.placeholder(dtypes.float32)
       y = array_ops.placeholder(dtypes.float32)
       c = array_ops.concat([x, y], 2)
@@ -358,7 +358,7 @@ class ConcatOpTest(test.TestCase):
   def testZeroSize(self):
     # Verify that concat doesn't crash and burn for zero size inputs
     np.random.seed(7)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for shape0 in (), (2,):
         axis = len(shape0)
         for shape1 in (), (3,):
@@ -489,7 +489,7 @@ class ConcatOpTest(test.TestCase):
   # important as gpu implementation could fail if
   # shared memory is not large for all the inputs
   def testConcatLargeNumberOfTensors(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for concat_dim in range(2):
         params = {}
         p = []
@@ -523,7 +523,7 @@ class ConcatOpTest(test.TestCase):
           self.assertAllEqual(result[index], params[p[i]])
 
   def testConcatEmpty(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       t1 = []
       t2 = []
       output = gen_array_ops.concat_v2([t1, t2], 0).eval()
@@ -531,13 +531,13 @@ class ConcatOpTest(test.TestCase):
 
   def testConcatInvalidAxis(self):
     with self.assertRaises(ValueError):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         t1 = [1]
         t2 = [2]
         gen_array_ops.concat_v2([t1, t2], 1).eval()
 
   def testConcatNegativeAxis(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       t1 = [[1, 2, 3], [4, 5, 6]]
       t2 = [[7, 8, 9], [10, 11, 12]]
 
@@ -608,7 +608,7 @@ class ConcatOpTest(test.TestCase):
 
   def testConcatAxisType(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         t1 = [[1, 2, 3], [4, 5, 6]]
         t2 = [[7, 8, 9], [10, 11, 12]]
 
@@ -621,7 +621,7 @@ class ConcatOpTest(test.TestCase):
 class ConcatOffsetTest(test.TestCase):
 
   def testBasic(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       cdim = constant_op.constant(1, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5], dtypes.int32)
@@ -673,7 +673,7 @@ class ConcatOffsetTest(test.TestCase):
         sess.run(off)
 
   def testNegativeDim(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       cdim = constant_op.constant(-2, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5], dtypes.int32)
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 85a5986041..df50fce2ef 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -849,7 +849,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testColocateWithInCondGraphPartitioning(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(
+      with self.session(
           graph=g,
           config=config_pb2.ConfigProto(device_count={"CPU": 2})
       ) as sess:
@@ -904,7 +904,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(
+      with self.session(
           graph=g, config=config_pb2.ConfigProto(device_count={"CPU": 2})):
 
         def fn2():
@@ -922,7 +922,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
 
   def testDeviceInCondGraphPartitioning(self):
     with ops.Graph().as_default() as g:
-      with self.test_session(
+      with self.session(
           graph=g,
           config=config_pb2.ConfigProto(device_count={"CPU": 2})
       ) as sess:
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index d1e4e5477f..403d5eaf9a 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -43,7 +43,7 @@ class ConstantTest(test.TestCase):
 
   def _testCpu(self, x):
     np_ans = np.array(x)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       tf_ans = ops.convert_to_tensor(x).eval()
     dtype = dtypes_lib.as_dtype(np_ans.dtype)
     if dtype.is_floating or dtype.is_complex:
@@ -53,7 +53,7 @@ class ConstantTest(test.TestCase):
 
   def _testGpu(self, x):
     np_ans = np.array(x)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_ans = ops.convert_to_tensor(x).eval()
     dtype = dtypes_lib.as_dtype(np_ans.dtype)
     if dtype.is_floating or dtype.is_complex:
@@ -134,7 +134,7 @@ class ConstantTest(test.TestCase):
   def testVariant(self):
     # TODO(ebrevdo): Re-enable use_gpu=True once non-DMA Variant
     # copying between CPU and GPU is supported.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       variant_tensor = tensor_pb2.TensorProto(
           dtype=dtypes_lib.variant.as_datatype_enum,
           tensor_shape=tensor_shape.TensorShape([]).as_proto(),
@@ -432,7 +432,7 @@ class ZerosTest(test.TestCase):
 class ZerosLikeTest(test.TestCase):
 
   def _compareZeros(self, dtype, fully_defined_shape, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       # Creates a tensor of non-zero values with shape 2 x 3.
       # NOTE(kearnes): The default numpy dtype associated with tf.string is
       # np.object (and can't be changed without breaking a lot things), which
@@ -505,7 +505,7 @@ class ZerosLikeTest(test.TestCase):
     # copying between CPU and GPU is supported AND we register a
     # ZerosLike callback for GPU for Variant storing primitive types
     # in variant_op_registry.cc.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       variant_tensor = tensor_pb2.TensorProto(
           dtype=dtypes_lib.variant.as_datatype_enum,
           tensor_shape=tensor_shape.TensorShape([]).as_proto(),
@@ -630,7 +630,7 @@ class OnesLikeTest(test.TestCase):
 class FillTest(test.TestCase):
 
   def _compare(self, dims, val, np_ans, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.fill(dims, val, name="fill")
       out = tf_ans.eval()
     self.assertAllClose(np_ans, out)
@@ -667,7 +667,7 @@ class FillTest(test.TestCase):
 
   def testFillString(self):
     np_ans = np.array([[b"yolo"] * 3] * 2)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = array_ops.fill([2, 3], np_ans[0][0], name="fill").eval()
     self.assertAllEqual(np_ans, tf_ans)
 
@@ -886,7 +886,7 @@ versions {
 class PlaceholderWithDefaultTest(test.TestCase):
 
   def testFullShape(self):
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
+    with self.session(force_gpu=test_util.is_gpu_available()):
       p = array_ops.placeholder_with_default([[2, 2], [2, 2]], shape=[2, 2])
       a = array_ops.identity(p)
       self.assertAllEqual([[2, 2], [2, 2]], a.eval())
@@ -897,7 +897,7 @@ class PlaceholderWithDefaultTest(test.TestCase):
         a.eval(feed_dict={p: [[6, 6, 6], [6, 6, 6]]})
 
   def testPartialShape(self):
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
+    with self.session(force_gpu=test_util.is_gpu_available()):
       p = array_ops.placeholder_with_default([1, 2, 3], shape=[None])
       a = array_ops.identity(p)
       self.assertAllEqual([1, 2, 3], a.eval())
@@ -907,7 +907,7 @@ class PlaceholderWithDefaultTest(test.TestCase):
         a.eval(feed_dict={p: [[2, 2], [2, 2]]})
 
   def testNoShape(self):
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
+    with self.session(force_gpu=test_util.is_gpu_available()):
       p = array_ops.placeholder_with_default([17], shape=None)
       a = array_ops.identity(p)
       self.assertAllEqual([17], a.eval())
@@ -916,7 +916,7 @@ class PlaceholderWithDefaultTest(test.TestCase):
           [[3, 3], [3, 3]], a.eval(feed_dict={p: [[3, 3], [3, 3]]}))
 
   def testGradient(self):
-    with self.test_session(force_gpu=test_util.is_gpu_available()):
+    with self.session(force_gpu=test_util.is_gpu_available()):
       x = array_ops.placeholder(dtypes_lib.float32, [5, 7])
       y = array_ops.placeholder_with_default(x, None)
       err = gradient_checker.compute_gradient_error(x, [5, 7], y, [5, 7])
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 3c7e6e6dce..1e9f29028b 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -456,7 +456,7 @@ class ControlFlowTest(test.TestCase):
     self.assertTrue(ind.dtype == np.int64)
 
   def testCondColocation(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with ops.device("/cpu:0"):
         v = variables.Variable(7.0)
 
@@ -471,7 +471,7 @@ class ControlFlowTest(test.TestCase):
           self.assertDeviceEqual(op.device, "/cpu:0")
 
   def _testCond_1(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       x = constant_op.constant(10)
       pred = math_ops.less(1, 2)
       fn1 = lambda: math_ops.add(x, 1)
@@ -1023,7 +1023,7 @@ class ControlFlowTest(test.TestCase):
 
     final_without_xla_context = create_while_loop()
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
       run_metadata = config_pb2.RunMetadata()
 
@@ -1139,7 +1139,7 @@ class ControlFlowTest(test.TestCase):
       self.assertLess(len(unique_allocs), 756)
 
   def _testWhile_Gpu_1(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       n = constant_op.constant(1.0)
       c = lambda x: math_ops.less(x, 10.0)
       b = lambda x: math_ops.add(x, 1.0)
@@ -1151,7 +1151,7 @@ class ControlFlowTest(test.TestCase):
     self._testWhile_Gpu_1(use_gpu=True)
 
   def _testWhile_Gpu_2(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       n = constant_op.constant(1.0)
       c = lambda x: math_ops.less(x, 10.0)
 
@@ -1293,7 +1293,7 @@ class ControlFlowTest(test.TestCase):
             [i.get_shape(), tensor_shape.TensorShape([None, 5])])
 
   def _testNestedWhile_1(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       n = constant_op.constant(0)
 
       def cpu_sum(s):
@@ -1320,7 +1320,7 @@ class ControlFlowTest(test.TestCase):
 
   def _testNestedWhile_2(self, use_gpu):
     # Test the cases that A -> Enter and Exit -> A are partitioned.
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       s0 = constant_op.constant(2.0)
 
       def inner_loop(s):
@@ -1499,7 +1499,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(10, r.eval())
 
   def _testCondWhile_3(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       p = array_ops.placeholder(dtypes.bool)
       n = constant_op.constant(0.0)
 
@@ -1881,7 +1881,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(2048.0, r.eval())
 
   def _testWhileGrad_Mul(self, use_gpu, p_iters):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       a = constant_op.constant(3.0, name="a")
       v = constant_op.constant(2.0, name="v")
       c = lambda v: math_ops.less(v, 100.0)
@@ -1901,7 +1901,7 @@ class ControlFlowTest(test.TestCase):
 
   def _testNestedWhileCondWhileGrad(self, use_gpu):
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       v = constant_op.constant(1.0)
 
       def inner_loop(s):
@@ -2207,7 +2207,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1.0, g.eval())  # y_f_d = x + 1.0, dy_f_d/dx = 1.0
 
   def _testNestedWhileGrad_Simple(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       v = constant_op.constant(1.0)
 
       def inner_loop(s):
@@ -2300,7 +2300,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(2.999, var.eval())
 
   def _testWhileCondGrad_Simple(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       v = ops.convert_to_tensor(2.0, name="v")
       n = ops.convert_to_tensor(100.0, name="n")
       one = ops.convert_to_tensor(1.0, name="one")
@@ -3299,7 +3299,7 @@ class TupleTest(test.TestCase):
 class AssertTest(test.TestCase):
 
   def testGuardedAssertDoesNotCopyWhenTrue(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with ops.device(test.gpu_device_name()):
         value = constant_op.constant(1.0)
       with ops.device("/cpu:0"):
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index 2d6d8a8051..8540875d75 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -40,7 +40,7 @@ class Conv1DTest(test.TestCase):
       filters = array_ops.expand_dims(filters, 2)  # out_channels
       # Filters is 2x1x1
       for stride in [1, 2]:
-        with self.test_session(use_gpu=test.is_gpu_available()):
+        with self.cached_session(use_gpu=test.is_gpu_available()):
           c = nn_ops.conv1d(x, filters, stride, padding="VALID")
           reduced = array_ops.squeeze(c)
           output = reduced.eval()
diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
index 644a151710..af6ffc1d19 100644
--- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py
@@ -66,7 +66,7 @@ class Conv2DBackpropFilterGradTest(test.TestCase):
 
   def testGradientDilatedConv(self):
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         for padding in ["SAME", "VALID"]:
           for stride in [1, 2]:
             np.random.seed(1)
diff --git a/tensorflow/python/kernel_tests/conv2d_transpose_test.py b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
index cbdd2c5991..6f9992a317 100644
--- a/tensorflow/python/kernel_tests/conv2d_transpose_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
@@ -177,7 +177,7 @@ class Conv2DTransposeTest(test.TestCase):
   def testConv2DTransposeSingleStrideNCHW(self):
     # `NCHW` data format is only supported for CUDA device.
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         strides = [1, 1, 1, 1]
 
         # Input, output: [batch, depth, height, width, depth]
@@ -212,7 +212,7 @@ class Conv2DTransposeTest(test.TestCase):
   def testConv2DTransposeSameNCHW(self):
     # `NCHW` data format is only supported for CUDA device.
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         strides = [1, 1, 2, 2]
 
         # Input, output: [batch, depth, height, width]
@@ -248,7 +248,7 @@ class Conv2DTransposeTest(test.TestCase):
   def testConv2DTransposeValidNCHW(self):
     # `NCHW` data format is only supported for CUDA device.
     if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         strides = [1, 1, 2, 2]
 
         # Input, output: [batch, depth, height, width]
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 6794464e3a..57b09dc167 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -74,7 +74,7 @@ class Conv3DTest(test.TestCase):
     # during the conv3d.
     x1 = [f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1)]
     x2 = [f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1)]
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
       t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
 
@@ -133,7 +133,7 @@ class Conv3DTest(test.TestCase):
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_filter + 1)]
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       if isinstance(stride, collections.Iterable):
@@ -413,7 +413,7 @@ class Conv3DTest(test.TestCase):
       elif data_type == dtypes.float16:
         tolerance = 1e-3
 
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         orig_input_tensor = constant_op.constant(
             input_data, shape=input_shape, dtype=data_type, name="input")
         filter_tensor = constant_op.constant(
@@ -659,7 +659,7 @@ class Conv3DTest(test.TestCase):
     # because we currently do not have a CPU implementation for arbitrary
     # dilation rates.
     if default_dilations or use_gpu:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         if data_format == "NCDHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index ea611497d9..0ccbbf155c 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -878,7 +878,7 @@ class Conv2DTest(test.TestCase):
     x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -932,7 +932,7 @@ class Conv2DTest(test.TestCase):
     x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -1139,7 +1139,7 @@ class Conv2DTest(test.TestCase):
     # So we disable the DOUBLE path.  We should re-enable this
     # when double support returns for CPU and/or GPU.
     for dtype in self._DtypesToTest(use_gpu=use_gpu):
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         input_tensor = constant_op.constant(
             input_data, shape=input_shape, dtype=dtype, name="input")
         filter_tensor = constant_op.constant(
@@ -1644,7 +1644,7 @@ class SeparableConv2DTest(test.TestCase):
       expected: An array containing the expected operation outputs.
       data_format: string data format for input tensor.
     """
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       t1 = self._InitValues(tensor_in_sizes)
       f1 = self._InitValues(depthwise_filter_in_sizes)
       f1.set_shape(depthwise_filter_in_sizes)
@@ -1766,7 +1766,7 @@ class DeepConv2DTest(test.TestCase):
     x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
     x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       strides = [1] + conv_strides + [1]
diff --git a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py
index 41ae0b456f..d818fbd75c 100644
--- a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py
+++ b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py
@@ -57,7 +57,7 @@ class CTCGreedyDecoderTest(test.TestCase):
     # from a len time python list of [batch_size x depth] tensors
     inputs_t = array_ops.stack(inputs_t)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       decoded_list, log_probability = decoder(
           inputs_t, sequence_length=seq_lens, **decoder_args)
       decoded_unwrapped = list(
diff --git a/tensorflow/python/kernel_tests/ctc_loss_op_test.py b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
index 18e92162b9..cfc7cb98aa 100644
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@@ -65,7 +65,7 @@ class CTCLossTest(test.TestCase):
 
     inputs_t = constant_op.constant(inputs)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       loss = ctc_ops.ctc_loss(
           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
       grad = gradients_impl.gradients(loss, [inputs_t])[0]
@@ -233,7 +233,7 @@ class CTCLossTest(test.TestCase):
     # Transposing tensor to [batch_size x max_time x depth tensor]
     inputs_t_transposed = constant_op.constant(inputs.transpose(1, 0, 2))
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       loss = ctc_ops.ctc_loss(
           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
       loss_transposed = ctc_ops.ctc_loss(
@@ -252,7 +252,7 @@ class CTCLossTest(test.TestCase):
     seq_lens = np.array([2, 2], dtype=np.int32)
     v = [1.0]
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       loss = ctc_ops.ctc_loss(
           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
       # Taking ths second gradient should fail, since it is not
@@ -269,7 +269,7 @@ class CTCLossTest(test.TestCase):
         values=constant_op.constant([], shape=(0,), dtype=dtypes.int32),
         dense_shape=[5, 5])
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "batch_size must not be 0"):
         sess.run(ctc_ops.ctc_loss(labels, inputs, sequence_lengths))
diff --git a/tensorflow/python/kernel_tests/dct_ops_test.py b/tensorflow/python/kernel_tests/dct_ops_test.py
index 97d7e2d8f9..c9d0167608 100644
--- a/tensorflow/python/kernel_tests/dct_ops_test.py
+++ b/tensorflow/python/kernel_tests/dct_ops_test.py
@@ -114,7 +114,7 @@ class DCTOpsTest(test.TestCase):
   def test_random(self):
     """Test randomly generated batches of data."""
     with spectral_ops_test_util.fft_kernel_label_map():
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         for shape in ([1], [2], [3], [10], [2, 20], [2, 3, 25]):
           signals = np.random.rand(*shape).astype(np.float32)
           for norm in (None, "ortho"):
diff --git a/tensorflow/python/kernel_tests/decode_image_op_test.py b/tensorflow/python/kernel_tests/decode_image_op_test.py
index 7f73fbaa84..0975f964b5 100644
--- a/tensorflow/python/kernel_tests/decode_image_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_image_op_test.py
@@ -36,7 +36,7 @@ class DecodeImageOpTest(test.TestCase):
   def testBmp(self):
     # Read a real bmp and verify shape
     path = os.path.join(prefix_path, "bmp", "testdata", "lena.bmp")
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       bmp0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(bmp0)
       image1 = image_ops.decode_bmp(bmp0)
@@ -52,7 +52,7 @@ class DecodeImageOpTest(test.TestCase):
     stride = 5
     shape = (12, height, width, 3)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       gif0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(gif0)
       image1 = image_ops.decode_gif(gif0)
@@ -81,7 +81,7 @@ class DecodeImageOpTest(test.TestCase):
   def testJpeg(self):
     # Read a real jpeg and verify shape
     path = os.path.join(prefix_path, "jpeg", "testdata", "jpeg_merge_test1.jpg")
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       jpeg0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(jpeg0)
       image1 = image_ops.decode_jpeg(jpeg0)
@@ -99,7 +99,7 @@ class DecodeImageOpTest(test.TestCase):
     inputs = [(1, "lena_gray.png")]
     for channels_in, filename in inputs:
       for channels in 0, 1, 3, 4:
-        with self.test_session(use_gpu=True) as sess:
+        with self.cached_session(use_gpu=True) as sess:
           path = os.path.join(prefix_path, "png", "testdata", filename)
           png0 = io_ops.read_file(path)
           image0 = image_ops.decode_image(png0, channels=channels)
diff --git a/tensorflow/python/kernel_tests/denormal_test.py b/tensorflow/python/kernel_tests/denormal_test.py
index 95fc40f883..71a528c4aa 100644
--- a/tensorflow/python/kernel_tests/denormal_test.py
+++ b/tensorflow/python/kernel_tests/denormal_test.py
@@ -39,7 +39,7 @@ class DenormalTest(test.TestCase):
       # Disabled denormal_test on power/s390x platform
       # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
       return
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       array_ops.identity(7).eval()
       for dtype in dtypes:
         tiny = np.finfo(dtype).tiny
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py
index 120e10314f..3e0a03d634 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py
@@ -32,7 +32,7 @@ class AssignOpTest(test.TestCase):
   def _initAssignFetch(self, x, y, use_gpu=False):
     """Initialize a param to init and update it with y."""
     super(AssignOpTest, self).setUp()
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       p = variables.Variable(x)
       assign = state_ops.assign(p, y)
       p.initializer.run()
@@ -41,7 +41,7 @@ class AssignOpTest(test.TestCase):
 
   def _initAssignAddFetch(self, x, y, use_gpu=False):
     """Initialize a param to init, and compute param += y."""
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       p = variables.Variable(x)
       add = state_ops.assign_add(p, y)
       p.initializer.run()
@@ -50,7 +50,7 @@ class AssignOpTest(test.TestCase):
 
   def _initAssignSubFetch(self, x, y, use_gpu=False):
     """Initialize a param to init, and compute param -= y."""
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       p = variables.Variable(x)
       sub = state_ops.assign_sub(p, y)
       p.initializer.run()
diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py
index f0beabb4e2..13a28caf1f 100644
--- a/tensorflow/python/kernel_tests/depthtospace_op_test.py
+++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py
@@ -37,12 +37,12 @@ class DepthToSpaceTest(test.TestCase):
 
   def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32):
     input_nhwc = math_ops.cast(inputs, dtype)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       # test NHWC (default) on CPU
       x_tf = array_ops.depth_to_space(input_nhwc, block_size)
       self.assertAllEqual(x_tf.eval(), outputs)
     if test.is_gpu_available():
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # test NHWC (default) on GPU
         x_tf = array_ops.depth_to_space(input_nhwc, block_size)
         self.assertAllEqual(x_tf.eval(), outputs)
@@ -102,13 +102,13 @@ class DepthToSpaceTest(test.TestCase):
     input_nhwc = array_ops.ones([batch_size, 2, 3, 12])
     x_out = array_ops.ones([batch_size, 4, 6, 3])
 
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       # test NHWC (default) on CPU
       x_tf = array_ops.depth_to_space(input_nhwc, block_size)
       self.assertAllEqual(x_tf.shape, x_out.shape)
       x_tf.eval()
     if test.is_gpu_available():
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # test NHWC (default) on GPU
         x_tf = array_ops.depth_to_space(input_nhwc, block_size)
         self.assertAllEqual(x_tf.shape, x_out.shape)
@@ -276,7 +276,7 @@ class DepthToSpaceTest(test.TestCase):
       expected = self.depthToSpaceUsingTranspose(t, block_size, data_format)
       actual = array_ops.depth_to_space(t, block_size, data_format=data_format)
 
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       actual_vals, expected_vals = sess.run([actual, expected])
       self.assertTrue(np.array_equal(actual_vals, expected_vals))
 
@@ -314,7 +314,7 @@ class DepthToSpaceGradientTest(test.TestCase):
       return
 
     assert 4 == x.ndim
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_x = ops.convert_to_tensor(x)
       tf_y = array_ops.depth_to_space(tf_x, block_size, data_format=data_format)
 
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index 737a73f97a..77b27c6c7e 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -209,7 +209,7 @@ class DepthwiseConv2DTest(test.TestCase):
     # GitHub issue 22110.
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.placeholder(dtypes.float32)
       f = np.ones([1, 1, 1, 1], np.float32)
       v = nn_impl.depthwise_conv2d(
@@ -263,7 +263,7 @@ class DepthwiseConv2DTest(test.TestCase):
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t1.set_shape(tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
@@ -522,7 +522,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x2 = np.random.rand(*output_sizes).astype(np.float32)
 
     def _GetVal(use_gpu):
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         t0 = constant_op.constant(input_sizes, shape=[len(input_sizes)])
         t1 = constant_op.constant(x1, shape=filter_sizes)
         t2 = constant_op.constant(x2, shape=output_sizes)
@@ -542,7 +542,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x2 = np.random.rand(*output_sizes).astype(np.float64)
 
     def _GetVal(use_gpu):
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         t0 = constant_op.constant(input_sizes, shape=[len(input_sizes)])
         t1 = constant_op.constant(x1, shape=filter_sizes)
         t2 = constant_op.constant(x2, shape=output_sizes)
@@ -574,7 +574,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x2 = np.random.rand(*output_sizes).astype(np.float32)
 
     def _GetVal(use_gpu):
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         t0 = constant_op.constant(x0, shape=input_sizes)
         t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
         t2 = constant_op.constant(x2, shape=output_sizes)
@@ -594,7 +594,7 @@ class DepthwiseConv2DTest(test.TestCase):
     x2 = np.random.rand(*output_sizes).astype(np.float64)
 
     def _GetVal(use_gpu):
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         t0 = constant_op.constant(x0, shape=input_sizes)
         t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
         t2 = constant_op.constant(x2, shape=output_sizes)
diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py
index fb114f9f24..da33b2848b 100644
--- a/tensorflow/python/kernel_tests/determinant_op_test.py
+++ b/tensorflow/python/kernel_tests/determinant_op_test.py
@@ -62,7 +62,7 @@ class DeterminantOpTest(test.TestCase):
         atol=5e-5)
 
   def _compareDeterminant(self, matrix_x):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       self._compareDeterminantBase(matrix_x,
                                    linalg_ops.matrix_determinant(matrix_x))
       self._compareLogDeterminantBase(
@@ -150,7 +150,7 @@ class DeterminantOpTest(test.TestCase):
     self._compareDeterminant(np.empty([2, 0, 0]))
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       matrix1 = random_ops.random_normal([5, 5], seed=42)
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       det1 = linalg_ops.matrix_determinant(matrix1)
diff --git a/tensorflow/python/kernel_tests/diag_op_test.py b/tensorflow/python/kernel_tests/diag_op_test.py
index 0825d8fc6b..9e43258fa2 100644
--- a/tensorflow/python/kernel_tests/diag_op_test.py
+++ b/tensorflow/python/kernel_tests/diag_op_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.platform import tf_logging
 class MatrixDiagTest(test.TestCase):
 
   def testVector(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = np.array([1.0, 2.0, 3.0])
       mat = np.diag(v)
       v_diag = array_ops.matrix_diag(v)
@@ -40,7 +40,7 @@ class MatrixDiagTest(test.TestCase):
       self.assertAllEqual(v_diag.eval(), mat)
 
   def _testBatchVector(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       v_batch = np.array([[1.0, 0.0, 3.0], [4.0, 5.0, 6.0]]).astype(dtype)
       mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 3.0]],
                             [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0],
@@ -61,14 +61,14 @@ class MatrixDiagTest(test.TestCase):
       array_ops.matrix_diag(0)
 
   def testInvalidShapeAtEval(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = array_ops.placeholder(dtype=dtypes_lib.float32)
       with self.assertRaisesOpError("input must be at least 1-dim"):
         array_ops.matrix_diag(v).eval(feed_dict={v: 0.0})
 
   def testGrad(self):
     shapes = ((3,), (7, 4))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in shapes:
         x = constant_op.constant(np.random.rand(*shape), np.float32)
         y = array_ops.matrix_diag(x)
@@ -82,7 +82,7 @@ class MatrixDiagTest(test.TestCase):
 class MatrixSetDiagTest(test.TestCase):
 
   def testSquare(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = np.array([1.0, 2.0, 3.0])
       mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0]])
       mat_set_diag = np.array([[1.0, 1.0, 0.0], [1.0, 2.0, 1.0],
@@ -92,7 +92,7 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertAllEqual(mat_set_diag, output.eval())
 
   def testRectangular(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = np.array([3.0, 4.0])
       mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0]])
       expected = np.array([[3.0, 1.0, 0.0], [1.0, 4.0, 1.0]])
@@ -108,7 +108,7 @@ class MatrixSetDiagTest(test.TestCase):
       self.assertAllEqual(expected, output.eval())
 
   def _testSquareBatch(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       v_batch = np.array([[-1.0, 0.0, -3.0], [-4.0, -5.0, -6.0]]).astype(dtype)
       mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0], [1.0, 0.0, 3.0]],
                             [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0],
@@ -131,7 +131,7 @@ class MatrixSetDiagTest(test.TestCase):
     self._testSquareBatch(np.bool)
 
   def testRectangularBatch(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]])
       mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]],
                             [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]])
@@ -149,7 +149,7 @@ class MatrixSetDiagTest(test.TestCase):
       array_ops.matrix_set_diag([[0]], 0)
 
   def testInvalidShapeAtEval(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = array_ops.placeholder(dtype=dtypes_lib.float32)
       with self.assertRaisesOpError("input must be at least 2-dim"):
         array_ops.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0})
@@ -159,7 +159,7 @@ class MatrixSetDiagTest(test.TestCase):
 
   def testGrad(self):
     shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in shapes:
         x = constant_op.constant(
             np.random.rand(*shape), dtype=dtypes_lib.float32)
@@ -179,7 +179,7 @@ class MatrixSetDiagTest(test.TestCase):
         self.assertLess(error_x_diag, 1e-4)
 
   def testGradWithNoShapeInformation(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       v = array_ops.placeholder(dtype=dtypes_lib.float32)
       mat = array_ops.placeholder(dtype=dtypes_lib.float32)
       grad_input = array_ops.placeholder(dtype=dtypes_lib.float32)
@@ -201,7 +201,7 @@ class MatrixSetDiagTest(test.TestCase):
 class MatrixDiagPartTest(test.TestCase):
 
   def testSquare(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = np.array([1.0, 2.0, 3.0])
       mat = np.diag(v)
       mat_diag = array_ops.matrix_diag_part(mat)
@@ -209,7 +209,7 @@ class MatrixDiagPartTest(test.TestCase):
       self.assertAllEqual(mat_diag.eval(), v)
 
   def testRectangular(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       mat = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
       mat_diag = array_ops.matrix_diag_part(mat)
       self.assertAllEqual(mat_diag.eval(), np.array([1.0, 5.0]))
@@ -218,7 +218,7 @@ class MatrixDiagPartTest(test.TestCase):
       self.assertAllEqual(mat_diag.eval(), np.array([1.0, 4.0]))
 
   def _testSquareBatch(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       v_batch = np.array([[1.0, 0.0, 3.0], [4.0, 5.0, 6.0]]).astype(dtype)
       mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 3.0]],
                             [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0],
@@ -236,7 +236,7 @@ class MatrixDiagPartTest(test.TestCase):
     self._testSquareBatch(np.bool)
 
   def testRectangularBatch(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v_batch = np.array([[1.0, 2.0], [4.0, 5.0]])
       mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]],
                             [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0]]])
@@ -250,14 +250,14 @@ class MatrixDiagPartTest(test.TestCase):
       array_ops.matrix_diag_part(0)
 
   def testInvalidShapeAtEval(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = array_ops.placeholder(dtype=dtypes_lib.float32)
       with self.assertRaisesOpError("input must be at least 2-dim"):
         array_ops.matrix_diag_part(v).eval(feed_dict={v: 0.0})
 
   def testGrad(self):
     shapes = ((3, 3), (2, 3), (3, 2), (5, 3, 3))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in shapes:
         x = constant_op.constant(np.random.rand(*shape), dtype=np.float32)
         y = array_ops.matrix_diag_part(x)
@@ -271,7 +271,7 @@ class MatrixDiagPartTest(test.TestCase):
 class DiagTest(test.TestCase):
 
   def _diagOp(self, diag, dtype, expected_ans, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.diag(ops.convert_to_tensor(diag.astype(dtype)))
       out = tf_ans.eval()
       tf_ans_inv = array_ops.diag_part(expected_ans)
@@ -418,7 +418,7 @@ class DiagPartOpTest(test.TestCase):
     np.random.seed(0)
 
   def _diagPartOp(self, tensor, dtype, expected_ans, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tensor = ops.convert_to_tensor(tensor.astype(dtype))
       tf_ans_inv = array_ops.diag_part(tensor)
       inv_out = tf_ans_inv.eval()
@@ -441,7 +441,7 @@ class DiagPartOpTest(test.TestCase):
     i = np.arange(3)
     expected_ans = x[i, i]
     for shape in None, (None, 3), (3, None):
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         t = ops.convert_to_tensor(x.astype(np.float32))
         t.set_shape(shape)
         tf_ans = array_ops.diag_part(t)
@@ -497,7 +497,7 @@ class DiagGradOpTest(test.TestCase):
     np.random.seed(0)
     shapes = ((3,), (3, 3), (3, 3, 3))
     dtypes = (dtypes_lib.float32, dtypes_lib.float64)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       errors = []
       for shape in shapes:
         for dtype in dtypes:
@@ -517,7 +517,7 @@ class DiagGradPartOpTest(test.TestCase):
     np.random.seed(0)
     shapes = ((3, 3), (3, 3, 3, 3))
     dtypes = (dtypes_lib.float32, dtypes_lib.float64)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       errors = []
       for shape in shapes:
         for dtype in dtypes:
diff --git a/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
index 4f5b854e6f..c655876280 100644
--- a/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
+++ b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
@@ -86,7 +86,7 @@ class DrawBoundingBoxOpTest(test.TestCase):
       image = image_ops_impl.convert_image_dtype(image, dtypes.float32)
       image = array_ops.expand_dims(image, 0)
       image = image_ops.draw_bounding_boxes(image, bboxes)
-      with self.test_session(use_gpu=False) as sess:
+      with self.cached_session(use_gpu=False) as sess:
         op_drawn_image = np.squeeze(sess.run(image), 0)
         self.assertAllEqual(test_drawn_image, op_drawn_image)
 
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 9557e30993..07da855a01 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.platform import test
 class DynamicPartitionTest(test.TestCase):
 
   def testSimpleOneDimensional(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant([0, 13, 2, 39, 4, 17], dtype=dtypes.float32)
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
@@ -55,7 +55,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual([None], partitions[3].get_shape().as_list())
 
   def testSimpleTwoDimensional(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11],
                                    [12, 13, 14], [15, 16, 17]],
                                   dtype=dtypes.float32)
@@ -82,7 +82,7 @@ class DynamicPartitionTest(test.TestCase):
     indices_list = [x % 2 for x in range(num)]
     part1 = [x for x in range(num) if x % 2 == 0]
     part2 = [x for x in range(num) if x % 2 == 1]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -104,7 +104,7 @@ class DynamicPartitionTest(test.TestCase):
     parts = [[] for _ in range(num_partitions)]
     for i in range(rows):
       parts[(i ** 2) % num_partitions].append(data_list[i])
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -120,7 +120,7 @@ class DynamicPartitionTest(test.TestCase):
   def testSimpleComplex(self):
     data_list = [1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j]
     indices_list = [1, 0, 1, 0]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.complex64)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -133,7 +133,7 @@ class DynamicPartitionTest(test.TestCase):
 
   def testScalarPartitions(self):
     data_list = [10, 13, 12, 11]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float64)
       indices = 3
       partitions = data_flow_ops.dynamic_partition(
@@ -153,7 +153,7 @@ class DynamicPartitionTest(test.TestCase):
 
   def testHigherRank(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for n in 2, 3:
         for shape in (4,), (4, 5), (4, 5, 2):
           partitions = np.random.randint(n, size=np.prod(shape)).reshape(shape)
@@ -178,7 +178,7 @@ class DynamicPartitionTest(test.TestCase):
   def testEmptyParts(self):
     data_list = [1, 2, 3, 4]
     indices_list = [1, 3, 1, 3]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -194,7 +194,7 @@ class DynamicPartitionTest(test.TestCase):
   def testEmptyDataTwoDimensional(self):
     data_list = [[], []]
     indices_list = [0, 1]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -210,7 +210,7 @@ class DynamicPartitionTest(test.TestCase):
   def testEmptyPartitions(self):
     data_list = []
     indices_list = []
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -231,7 +231,7 @@ class DynamicPartitionTest(test.TestCase):
 
     data_list = [1, 2, 3, 4, 5, 6]
     indices_list = [6, 5, 4, 3, 1, 0]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -252,7 +252,7 @@ class DynamicPartitionTest(test.TestCase):
 
     data_list = [1, 2, 3, 4, 5, 6]
     indices_list = [10, 11, 2, 12, 0, 1000]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
@@ -276,7 +276,7 @@ class DynamicPartitionTest(test.TestCase):
 
     data_list = [1.1, 2.1, 3.1, 4.1, 5.1, 6.1]
     indices_list = [90, 70, 60, 100, 110, 40]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       data = constant_op.constant(data_list, dtype=dtypes.float32)
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
diff --git a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
index 61542528b8..c3f67d29aa 100644
--- a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py
@@ -36,7 +36,7 @@ class DynamicStitchTestBase(object):
     self.stitch_op = stitch_op
 
   def testScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [constant_op.constant(0), constant_op.constant(1)]
       data = [constant_op.constant(40), constant_op.constant(60)]
       for step in -1, 1:
@@ -47,7 +47,7 @@ class DynamicStitchTestBase(object):
         self.assertEqual([2], stitched_t.get_shape().as_list())
 
   def testShapeInferenceForScalarWithNonConstantIndices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [
           array_ops.placeholder(dtype=dtypes.int32),
           constant_op.constant(1)
@@ -61,7 +61,7 @@ class DynamicStitchTestBase(object):
         self.assertEqual([None], stitched_t.get_shape().as_list())
 
   def testSimpleOneDimensional(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Test various datatypes in the simple case to ensure that the op was
       # registered under those types.
       dtypes_to_test = [
@@ -84,7 +84,7 @@ class DynamicStitchTestBase(object):
         self.assertEqual([8], stitched_t.get_shape().as_list())
 
   def testOneListOneDimensional(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [constant_op.constant([1, 6, 2, 3, 5, 0, 4, 7])]
       data = [constant_op.constant([10, 60, 20, 30, 50, 0, 40, 70])]
       stitched_t = self.stitch_op(indices, data)
@@ -94,7 +94,7 @@ class DynamicStitchTestBase(object):
       self.assertEqual([8], stitched_t.get_shape().as_list())
 
   def testSimpleTwoDimensional(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [
           constant_op.constant([0, 4, 7]),
           constant_op.constant([1, 6]),
@@ -113,7 +113,7 @@ class DynamicStitchTestBase(object):
       self.assertEqual([8, 2], stitched_t.get_shape().as_list())
 
   def testZeroSizeTensor(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [
           constant_op.constant([0, 4, 7]),
           constant_op.constant([1, 6]),
@@ -134,7 +134,7 @@ class DynamicStitchTestBase(object):
       self.assertEqual([8, 2], stitched_t.get_shape().as_list())
 
   def testHigherRank(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       indices = [
           constant_op.constant(6),
           constant_op.constant([4, 1]),
@@ -222,7 +222,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase):
     DynamicStitchTestBase.__init__(self, data_flow_ops.parallel_dynamic_stitch)
 
   def testScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [constant_op.constant(0), constant_op.constant(1)]
       data = [constant_op.constant(40.0), constant_op.constant(60.0)]
       for step in -1, 1:
@@ -233,7 +233,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase):
         self.assertEqual([2], stitched_t.get_shape().as_list())
 
   def testHigherRank(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       indices = [
           constant_op.constant(6),
           constant_op.constant([4, 1]),
diff --git a/tensorflow/python/kernel_tests/edit_distance_op_test.py b/tensorflow/python/kernel_tests/edit_distance_op_test.py
index 12f85af7a5..dab5eee7f5 100644
--- a/tensorflow/python/kernel_tests/edit_distance_op_test.py
+++ b/tensorflow/python/kernel_tests/edit_distance_op_test.py
@@ -68,7 +68,7 @@ class EditDistanceTest(test.TestCase):
     ]
 
     # SparseTensorValue inputs.
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       # hypothesis and truth are (index, value, shape) tuples
       self._testEditDistanceST(
           hypothesis_st=sparse_tensor.SparseTensorValue(
@@ -81,7 +81,7 @@ class EditDistanceTest(test.TestCase):
           expected_err_re=expected_err_re)
 
     # SparseTensor inputs.
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       # hypothesis and truth are (index, value, shape) tuples
       self._testEditDistanceST(
           hypothesis_st=sparse_tensor.SparseTensor(
diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py
index 40b8548cea..008d6fbf57 100644
--- a/tensorflow/python/kernel_tests/embedding_ops_test.py
+++ b/tensorflow/python/kernel_tests/embedding_ops_test.py
@@ -61,7 +61,7 @@ class ScatterAddSubTest(test.TestCase):
       scatter_op: ScatterAdd or ScatterSub.
     """
     super(ScatterAddSubTest, self).setUp()
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       # Create a random parameter array of given shape
       p_init = np.random.rand(*shape).astype("f")
       # Create the shape of the update array. All dimensions except the last
@@ -969,7 +969,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
 class DynamicStitchOpTest(test.TestCase):
 
   def testCint32Cpu(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [
           ops.convert_to_tensor([0, 1, 2]),
           ops.convert_to_tensor([2, 3])
@@ -982,7 +982,7 @@ class DynamicStitchOpTest(test.TestCase):
           data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2])
 
   def testCint32Gpu(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [
           ops.convert_to_tensor([0, 1, 2]),
           ops.convert_to_tensor([2, 3])
@@ -995,7 +995,7 @@ class DynamicStitchOpTest(test.TestCase):
           data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2])
 
   def testInt32Cpu(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [
           ops.convert_to_tensor([0, 1, 2]),
           ops.convert_to_tensor([2, 3])
@@ -1008,7 +1008,7 @@ class DynamicStitchOpTest(test.TestCase):
           data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2])
 
   def testInt32Gpu(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = [
           ops.convert_to_tensor([0, 1, 2]),
           ops.convert_to_tensor([2, 3])
@@ -1021,7 +1021,7 @@ class DynamicStitchOpTest(test.TestCase):
           data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2])
 
   def testSumGradArgs(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [
           ops.convert_to_tensor([0, 1, 2, 3]),
           ops.convert_to_tensor([2, 3])
@@ -1050,7 +1050,7 @@ class DynamicStitchOpTest(test.TestCase):
 class ParallelDynamicStitchOpTest(test.TestCase):
 
   def testCint32Cpu(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [
           ops.convert_to_tensor([0, 1, 4, 6]),
           ops.convert_to_tensor([2, 3, 5])
@@ -1064,7 +1064,7 @@ class ParallelDynamicStitchOpTest(test.TestCase):
           [12, 23, 1, 2, 34, 3, 45])
 
   def testInt32Cpu(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [
           ops.convert_to_tensor([0, 1, 5, 6, 7]),
           ops.convert_to_tensor([2, 4, 3])
@@ -1078,7 +1078,7 @@ class ParallelDynamicStitchOpTest(test.TestCase):
           [12, 23, 1, 2, 3, 34, 45, 56])
 
   def testSimple(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = [ops.convert_to_tensor([0, 1]), ops.convert_to_tensor([2, 3])]
       values = [ops.convert_to_tensor([2, 3]), ops.convert_to_tensor([1, 1])]
       self.assertAllEqual(
diff --git a/tensorflow/python/kernel_tests/extract_image_patches_op_test.py b/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
index 6ea9f1badc..61436f24cf 100644
--- a/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
@@ -43,7 +43,7 @@ class ExtractImagePatches(test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       out_tensor = array_ops.extract_image_patches(
           constant_op.constant(image),
           ksizes=ksizes,
diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
index 64757a3e07..bbb3fef85b 100644
--- a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py
@@ -45,7 +45,7 @@ class ExtractVolumePatches(test.TestCase):
     ksizes = [1] + ksizes + [1]
     strides = [1] + strides + [1]
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       out_tensor = array_ops.extract_volume_patches(
           constant_op.constant(image),
           ksizes=ksizes,
diff --git a/tensorflow/python/kernel_tests/fft_ops_test.py b/tensorflow/python/kernel_tests/fft_ops_test.py
index f117934e4b..8592550f99 100644
--- a/tensorflow/python/kernel_tests/fft_ops_test.py
+++ b/tensorflow/python/kernel_tests/fft_ops_test.py
@@ -68,12 +68,12 @@ class BaseFFTOpsTest(test.TestCase):
   def _checkMemoryFail(self, x, rank):
     config = config_pb2.ConfigProto()
     config.gpu_options.per_process_gpu_memory_fraction = 1e-2
-    with self.test_session(config=config, force_gpu=True):
+    with self.cached_session(config=config, force_gpu=True):
       self._tfFFT(x, rank, fft_length=None)
 
   def _checkGradComplex(self, func, x, y, result_is_complex=True,
                         rtol=1e-2, atol=1e-2):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       # func is a forward or inverse, real or complex, batched or unbatched FFT
@@ -93,7 +93,7 @@ class BaseFFTOpsTest(test.TestCase):
     self.assertAllClose(y_jacob_t, y_jacob_n, rtol=rtol, atol=atol)
 
   def _checkGradReal(self, func, x, rtol=1e-2, atol=1e-2):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = ops.convert_to_tensor(x)
       # func is a forward RFFT function (batched or unbatched).
       z = func(inx)
@@ -109,12 +109,12 @@ class FFTOpsTest(BaseFFTOpsTest):
 
   def _tfFFT(self, x, rank, fft_length=None, feed_dict=None):
     # fft_length unused for complex FFTs.
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       return self._tfFFTForRank(rank)(x).eval(feed_dict=feed_dict)
 
   def _tfIFFT(self, x, rank, fft_length=None, feed_dict=None):
     # fft_length unused for complex FFTs.
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       return self._tfIFFTForRank(rank)(x).eval(feed_dict=feed_dict)
 
   def _npFFT(self, x, rank, fft_length=None):
@@ -283,11 +283,11 @@ class RFFTOpsTest(BaseFFTOpsTest):
                                               use_placeholder)
 
   def _tfFFT(self, x, rank, fft_length=None, feed_dict=None):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       return self._tfFFTForRank(rank)(x, fft_length).eval(feed_dict=feed_dict)
 
   def _tfIFFT(self, x, rank, fft_length=None, feed_dict=None):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       return self._tfIFFTForRank(rank)(x, fft_length).eval(feed_dict=feed_dict)
 
   def _npFFT(self, x, rank, fft_length=None):
diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py
index a5f8f64e0c..8961c4b13c 100644
--- a/tensorflow/python/kernel_tests/fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/fifo_queue_test.py
@@ -1586,7 +1586,7 @@ class FIFOQueueDictTest(test.TestCase):
 class FIFOQueueWithTimeoutTest(test.TestCase):
 
   def testDequeueWithTimeout(self):
-    with self.test_session(
+    with self.session(
         config=config_pb2.ConfigProto(operation_timeout_in_ms=20)) as sess:
       q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
       self.assertEqual(
diff --git a/tensorflow/python/kernel_tests/gather_nd_op_test.py b/tensorflow/python/kernel_tests/gather_nd_op_test.py
index c0b419e1d1..706a4e27e5 100644
--- a/tensorflow/python/kernel_tests/gather_nd_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_nd_op_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.platform import test
 class GatherNdTest(test.TestCase):
 
   def _testSimpleDtype(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       params = constant_op.constant(np.array([8, 1, 2, 3, 7, 5], dtype=dtype))
       indices = constant_op.constant([[4], [4], [0]])
       gather_nd_t = array_ops.gather_nd(params, indices)
@@ -54,7 +54,7 @@ class GatherNdTest(test.TestCase):
     self._testSimpleDtype("|S")  # byte strings in python2 + 3
 
   def testEmptyIndicesAndParamsOKButJustEmptyParamsFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.ones((3, 3), dtype=np.float32)
 
       indices_empty = np.empty((0, 2), dtype=np.int32)
@@ -85,7 +85,7 @@ class GatherNdTest(test.TestCase):
       self.assertAllClose(np.empty((0,), dtype=np.float32), gather_nd_ok_val)
 
   def testIndexScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]], dtype=np.float32).T
       indices = constant_op.constant([4, 1])
@@ -95,7 +95,7 @@ class GatherNdTest(test.TestCase):
       self.assertAllEqual(np.array(7), gather_nd_val)
 
   def testParamsRankLargerThanIndexIndexScalarSlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]], dtype=np.float32).T
       indices = constant_op.constant([4])
@@ -105,7 +105,7 @@ class GatherNdTest(test.TestCase):
       self.assertAllEqual(np.array([-7, 7]), gather_nd_val)
 
   def testParamsRankLargerThanIndexSlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]], dtype=np.float32).T
       indices = constant_op.constant([[4], [4], [0]])
@@ -116,7 +116,7 @@ class GatherNdTest(test.TestCase):
     self.assertAllEqual(np.array([[-7, 7], [-7, 7], [-8, 8]]), gather_nd_val)
 
   def testHigherRankParamsLargerThanIndexSlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]],
            [[-80, -10, -20, -30, -70, -50], [80, 10, 20, 30, 70, 50]]],
@@ -130,7 +130,7 @@ class GatherNdTest(test.TestCase):
     self.assertAllEqual(params[[4, 4, 0]], gather_nd_val)
 
   def testEmptyIndicesLastRankMeansCopyEntireTensor(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]],
            [[-80, -10, -20, -30, -70, -50], [80, 10, 20, 30, 70, 50]]],
@@ -147,7 +147,7 @@ class GatherNdTest(test.TestCase):
         gather_nd_val)
 
   def testHigherRankParamsAndIndicesLargerThanIndexSlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = np.array(
           [[[-8, -1, -2, -3, -7, -5], [8, 1, 2, 3, 7, 5]],
            [[-80, -10, -20, -30, -70, -50], [80, 10, 20, 30, 70, 50]]],
@@ -162,7 +162,7 @@ class GatherNdTest(test.TestCase):
                         gather_nd_val)
 
   def testHigherRankParams(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = (10, 20, 5, 1, 17)
       params = np.random.rand(*shape)
       indices = np.vstack([np.random.randint(0, s, size=2000) for s in shape]).T
@@ -174,7 +174,7 @@ class GatherNdTest(test.TestCase):
     self.assertEqual([2000], gather_nd_t.get_shape())
 
   def testHigherRankParamsAndIndices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = (10, 20, 5, 1, 17)
       params = np.random.rand(*shape)
       indices = np.vstack([np.random.randint(0, s, size=2000) for s in shape]).T
@@ -198,7 +198,7 @@ class GatherNdTest(test.TestCase):
     self.assertEqual(None, shape[0].value)
 
   def testBadIndicesCPU(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       params = [0, 1, 2]
       indices = [[[0], [7]]]  # Make this one higher rank
       gather_nd = array_ops.gather_nd(params, indices)
@@ -211,7 +211,7 @@ class GatherNdTest(test.TestCase):
     # On GPU the bad indices do not raise error but fetch 0 values
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = [0, 1, 2]
       indices = [[[0], [7]]]  # Make this one higher rank
       gather_nd = array_ops.gather_nd(params, indices)
@@ -220,7 +220,7 @@ class GatherNdTest(test.TestCase):
         gather_nd.eval()
 
   def testBadIndicesWithSlicesCPU(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       params = [[0, 1, 2]]
       indices = [[[0], [0], [1]]]  # Make this one higher rank
       gather_nd = array_ops.gather_nd(params, indices)
@@ -233,7 +233,7 @@ class GatherNdTest(test.TestCase):
     # On GPU the bad indices do not raise error but fetch 0 values
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = [[0, 1, 2]]
       indices = [[[0], [0], [1]]]  # Make this one higher rank
       gather_nd = array_ops.gather_nd(params, indices)
@@ -249,7 +249,7 @@ class GatherNdTest(test.TestCase):
     grad_vals = constant_op.constant([1, 2], dtype=dtypes.float64)
     grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
     expected_grads = np.array([[1, 0], [0, 2]], dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       assert np.array_equal(expected_grads, grads.eval())
 
   def testGradientsRank2Slices(self):
@@ -260,7 +260,7 @@ class GatherNdTest(test.TestCase):
     grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float64)
     grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
     expected_grads = np.array([[3, 4], [1, 2]], dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertIndexedSlices(grads)
       self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval())
 
@@ -276,7 +276,7 @@ class GatherNdTest(test.TestCase):
     grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
     expected_grads = np.array(
         [[[5, 6], [1, 2]], [[3, 4], [7, 8]]], dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(expected_grads, grads.eval())
 
   def testGradientsRank7Elements(self):
@@ -305,7 +305,7 @@ class GatherNdTest(test.TestCase):
             [[[[5, 6], [1, 2]]]],
             [[[[3, 4], [7, 8]]]]
         ]]], dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(expected_grads, grads.eval())
 
   def testGradientsInt64Indices(self):
@@ -320,7 +320,7 @@ class GatherNdTest(test.TestCase):
     grads = gradients_impl.gradients([outputs], [inputs], [grad_vals])[0]
     expected_grads = np.array(
         [[[5, 6], [1, 2]], [[3, 4], [7, 8]]], dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(expected_grads, grads.eval())
 
   def testGradientsRank2SlicesWithEmptySpace(self):
@@ -341,7 +341,7 @@ class GatherNdTest(test.TestCase):
          [1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0], [3, 3, 3, 3, 3, 3, 3, 3, 3]],
         dtype=np.float64)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertIndexedSlices(grads)
       self.assertAllEqual(expected_grads, ops.convert_to_tensor(grads).eval())
 
diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py
index 85bf969068..bdafc52ab5 100644
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py
@@ -42,7 +42,7 @@ class GatherTest(test.TestCase):
     return data
 
   def testScalar1D(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       data = np.array([0, 1, 2, 3, 7, 5])
       for dtype in _TEST_TYPES:
         for indices in 4, [1, 2, 2, 4, 5]:
@@ -56,7 +56,7 @@ class GatherTest(test.TestCase):
           self.assertEqual(np_val.shape, gather_t.get_shape())
 
   def testScalar2D(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       data = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8],
                        [9, 10, 11], [12, 13, 14]])
       for dtype in _TEST_TYPES:
@@ -71,7 +71,7 @@ class GatherTest(test.TestCase):
           self.assertEqual(expected_shape, gather_t.get_shape())
 
   def testSimpleTwoD32(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       data = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8],
                        [9, 10, 11], [12, 13, 14]])
       for dtype in _TEST_TYPES:
@@ -95,7 +95,7 @@ class GatherTest(test.TestCase):
         for axis in range(len(shape)):
           params = self._buildParams(np.random.randn(*shape), dtype)
           indices = np.random.randint(shape[axis], size=indices_shape)
-          with self.test_session(use_gpu=True) as sess:
+          with self.cached_session(use_gpu=True) as sess:
             tf_params = constant_op.constant(params)
             tf_indices = constant_op.constant(indices)
             # Check that both positive and negative indices for axis work.
@@ -182,7 +182,7 @@ class GatherTest(test.TestCase):
     self.assertEqual(None, gather_t.shape)
 
   def testBadIndicesCPU(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       params = [[0, 1, 2], [3, 4, 5]]
       with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"):
         array_ops.gather(params, [[7]], axis=0).eval()
@@ -194,7 +194,7 @@ class GatherTest(test.TestCase):
     # On GPU the bad indices do not raise error but fetch 0 values
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = [[0, 1, 2], [3, 4, 5]]
       with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 2\)"):
         array_ops.gather(params, [[7]], axis=0).eval()
@@ -202,7 +202,7 @@ class GatherTest(test.TestCase):
         array_ops.gather(params, [[7]], axis=1).eval()
 
   def testBadAxis(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       params = [0, 1, 2]
       params_ph = array_ops.placeholder(dtypes.int32)
       indices = 0
@@ -218,7 +218,7 @@ class GatherTest(test.TestCase):
               feed_dict={params_ph: params})
 
   def testEmptySlices(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in _TEST_TYPES:
         for itype in np.int32, np.int64:
           # Leading axis gather.
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index 292679e4b9..70bfbf8544 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -107,7 +107,7 @@ def _init_sampler(tc, init, num):
 class ConstantInitializersTest(test.TestCase):
 
   def testZerosInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3]
       x = variable_scope.get_variable(
           "x", shape=shape, initializer=init_ops.zeros_initializer())
@@ -115,7 +115,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), np.zeros(shape))
 
   def testOnesInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3]
       x = variable_scope.get_variable(
           "x", shape=shape, initializer=init_ops.ones_initializer())
@@ -123,7 +123,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), np.ones(shape))
 
   def testConstantZeroInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3]
       x = variable_scope.get_variable(
           "x", shape=shape, initializer=init_ops.constant_initializer(0.0))
@@ -131,7 +131,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), np.zeros(shape))
 
   def testConstantOneInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3]
       x = variable_scope.get_variable(
           "x", shape=shape, initializer=init_ops.constant_initializer(1.0))
@@ -139,7 +139,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), np.ones(shape))
 
   def testConstantIntInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [2, 3]
       x = variable_scope.get_variable(
           "x",
@@ -151,7 +151,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), 7 * np.ones(shape, dtype=np.int32))
 
   def testConstantTupleInitializer(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [3]
       x = variable_scope.get_variable(
           "x",
@@ -163,7 +163,7 @@ class ConstantInitializersTest(test.TestCase):
       self.assertAllEqual(x.eval(), [10, 20, 30])
 
   def _testNDimConstantInitializer(self, name, value, shape, expected):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       init = init_ops.constant_initializer(value, dtype=dtypes.int32)
       x = variable_scope.get_variable(name, shape=shape, initializer=init)
       x.initializer.run()
@@ -187,7 +187,7 @@ class ConstantInitializersTest(test.TestCase):
 
   def _testNDimConstantInitializerLessValues(self, name, value, shape,
                                              expected):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       init = init_ops.constant_initializer(value, dtype=dtypes.int32)
       x = variable_scope.get_variable(name, shape=shape, initializer=init)
       x.initializer.run()
@@ -213,7 +213,7 @@ class ConstantInitializersTest(test.TestCase):
 
   def _testNDimConstantInitializerMoreValues(self, value, shape):
     ops.reset_default_graph()
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       init = init_ops.constant_initializer(value, dtype=dtypes.int32)
       self.assertRaises(
           ValueError,
@@ -371,7 +371,7 @@ class VarianceScalingInitializationTest(test.TestCase):
     init = init_ops.variance_scaling_initializer(
         distribution='truncated_normal')
 
-    with self.test_session(use_gpu=True), \
+    with self.session(use_gpu=True), \
       test.mock.patch.object(
           random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
           as mock_truncated_normal:
@@ -387,7 +387,7 @@ class VarianceScalingInitializationTest(test.TestCase):
     expect_var = 1. / shape[0]
     init = init_ops.variance_scaling_initializer(distribution='normal')
 
-    with self.test_session(use_gpu=True), \
+    with self.session(use_gpu=True), \
       test.mock.patch.object(
           random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \
           as mock_truncated_normal:
@@ -404,7 +404,7 @@ class VarianceScalingInitializationTest(test.TestCase):
     init = init_ops.variance_scaling_initializer(
         distribution='untruncated_normal')
 
-    with self.test_session(use_gpu=True), \
+    with self.session(use_gpu=True), \
       test.mock.patch.object(
           random_ops, 'random_normal', wraps=random_ops.random_normal) \
           as mock_random_normal:
@@ -420,7 +420,7 @@ class VarianceScalingInitializationTest(test.TestCase):
     expect_var = 1. / shape[0]
     init = init_ops.variance_scaling_initializer(distribution='uniform')
 
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = init(shape).eval()
 
     self.assertNear(np.mean(x), expect_mean, err=1e-2)
@@ -431,7 +431,7 @@ class VarianceScalingInitializationTest(test.TestCase):
 class RangeTest(test.TestCase):
 
   def _Range(self, start, limit, delta):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_ans = math_ops.range(start, limit, delta, name="range")
       self.assertEqual([len(np.arange(start, limit, delta))],
                        tf_ans.get_shape())
@@ -450,7 +450,7 @@ class RangeTest(test.TestCase):
     self.assertEqual(math_ops.range(0, 5, 1).dtype, dtypes.int32)
 
   def testLimitOnly(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(np.arange(5), math_ops.range(5).eval())
 
   def testEmpty(self):
@@ -520,7 +520,6 @@ class LinSpaceTest(test.TestCase):
       return [False]
 
   def _LinSpace(self, start, stop, num):
-    # NOTE(touts): Needs to pass a graph to get a new session each time.
     with ops.Graph().as_default() as graph:
       with self.session(graph=graph, force_gpu=self.force_gpu):
         tf_ans = math_ops.linspace(start, stop, num, name="linspace")
@@ -704,7 +703,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase):
         outputs_2norm = linalg_ops.norm(outputs)
         ratio = outputs_2norm / inputs_2norm
         my_ops = variables.global_variables_initializer()
-        with self.test_session(use_gpu=True) as sess:
+        with self.session(use_gpu=True) as sess:
           sess.run(my_ops)
           # Check the shape of the outputs
           t = outputs.eval()
@@ -719,7 +718,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase):
     shape = [3, 3, 10, 10]
     count = 70
     tol = 1e-5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for i in range(count):
         x = variable_scope.get_variable("{}".format(i), shape=shape,
                                         initializer=
@@ -783,7 +782,7 @@ class ConvolutionOrthogonal1dInitializerTest(test.TestCase):
     shape = [3, 10, 10]
     count = 70
     tol = 1e-5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for i in range(count):
         x = variable_scope.get_variable("{}".format(i), shape=shape,
                                         initializer=
@@ -843,7 +842,7 @@ class ConvolutionOrthogonal1dInitializerTest(test.TestCase):
       outputs_2norm = linalg_ops.norm(outputs)
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         sess.run(my_ops)
         # Check the shape of the outputs
         t = outputs.eval()
@@ -938,7 +937,7 @@ class ConvolutionOrthogonal2dInitializerTest(test.TestCase):
       outputs_2norm = linalg_ops.norm(outputs)
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         sess.run(my_ops)
         # Check the shape of the outputs
         t = outputs.eval()
@@ -992,7 +991,7 @@ class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
     shape = [3, 3, 3, 5, 5]
     count = 20
     tol = 1e-5
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for i in range(count):
         x = variable_scope.get_variable("{}".format(i), shape=shape,
                                         initializer=
@@ -1063,7 +1062,7 @@ class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
       outputs_2norm = linalg_ops.norm(outputs)
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         sess.run(my_ops)
         # Check the shape of the outputs
         t = outputs.eval()
diff --git a/tensorflow/python/kernel_tests/inplace_ops_test.py b/tensorflow/python/kernel_tests/inplace_ops_test.py
index 90759c23ae..51d16861dd 100644
--- a/tensorflow/python/kernel_tests/inplace_ops_test.py
+++ b/tensorflow/python/kernel_tests/inplace_ops_test.py
@@ -33,7 +33,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
 
   def testBasicUpdate(self):
     for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         x = array_ops.ones([7, 3], dtype)
         y = np.ones([7, 3], dtype.as_numpy_dtype)
         self.assertAllClose(x.eval(), y)
@@ -49,7 +49,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
         self.assertAllClose(x.eval(), y)
 
   def testBasicUpdateBool(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.ones([7, 3], dtypes.bool)
       y = np.ones([7, 3], dtypes.bool.as_numpy_dtype)
       self.assertAllClose(x.eval(), y)
@@ -67,7 +67,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
 
   def testBasicAdd(self):
     for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = array_ops.ones([7, 3], dtype)
         y = np.ones([7, 3], dtype.as_numpy_dtype)
         self.assertAllClose(x.eval(), y)
@@ -86,7 +86,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
 
   def testBasicSub(self):
     for dtype in [dtypes.float32, dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         x = array_ops.ones([7, 3], dtype)
         y = np.ones([7, 3], dtype.as_numpy_dtype)
         self.assertAllClose(x.eval(), y)
@@ -104,7 +104,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
         self.assertAllClose(x.eval(), y)
 
   def testRandom(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       d0, d1, d2 = 100, 3, 5
       x = array_ops.zeros([d0, d1, d2])
       y = np.zeros([d0, d1, d2])
@@ -124,7 +124,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
         self.assertAllClose(x.eval(), y)
 
   def testRandom1D(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       d0 = 100
       x = array_ops.zeros([d0])
       y = np.zeros([d0])
@@ -144,7 +144,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
         self.assertAllClose(x.eval(), y)
 
   def testAlias(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       x = array_ops.ones([2, 3])
       y = inplace_ops.alias_inplace_add(x, [0], [[1, 2, 3]])
       with ops.control_dependencies([y]):
@@ -169,7 +169,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
         dtypes.float32, dtypes.float64, dtypes.int32, dtypes.int64, dtypes.bool,
         dtypes.uint8
     ]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         test_shapes = [(), (1,), (2, 3), (0, 2), (2, 3, 5), (2, 0, 5)]
         for shape in test_shapes:
           val = inplace_ops.empty(shape, dtype).eval()
@@ -188,7 +188,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
           self.assertEqual(val.dtype, dtype.as_numpy_dtype)
           self.assertAllEqual(val, np.zeros(shape, dtype.as_numpy_dtype))
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       val = inplace_ops.empty((1, 2), dtypes.string, init=True).eval()
       self.assertEqual(val.tolist(), [[b"", b""]])
 
diff --git a/tensorflow/python/kernel_tests/large_concat_op_test.py b/tensorflow/python/kernel_tests/large_concat_op_test.py
index 66afb6ec01..1b23e74776 100644
--- a/tensorflow/python/kernel_tests/large_concat_op_test.py
+++ b/tensorflow/python/kernel_tests/large_concat_op_test.py
@@ -32,7 +32,7 @@ class LargeConcatOpTest(test.TestCase):
       a = array_ops.ones([2**31 + 6], dtype=dtypes.int8)
       b = array_ops.zeros([1024], dtype=dtypes.int8)
       onezeros = array_ops.concat([a, b], 0)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       # TODO(dga):  Add more depth to this test to validate correctness,
       # not just non-crashingness, once other large tensor fixes have gone in.
       _ = onezeros.eval()
diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index e52f303fe0..bd78c484ea 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -60,7 +60,7 @@ class MatrixUnaryFunctorGradientTest(test_lib.TestCase):
 def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
 
   def Test(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       np.random.seed(1)
       a_np = np.random.uniform(
           low=-1.0, high=1.0,
@@ -102,7 +102,7 @@ def _GetMatrixBinaryFunctorGradientTest(functor_,
     # GPU test for matrix_solve.
     use_gpu = False if functor_ == linalg_ops.matrix_solve else True
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.session(use_gpu=use_gpu):
       np.random.seed(1)
       a_np = np.random.uniform(
           low=-1.0, high=1.0,
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index fb0b5f1137..b04996f788 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -1348,7 +1348,7 @@ class ComputeWeightedLossTest(test.TestCase):
                 raw_losses, weights=np.ones(self._shape), reduction=reduction)
         )
         self.assertEqual(9, len(util.get_losses()))
-        with self.test_session(g):
+        with self.session(g):
           for unweighted_loss in unweighted_losses:
             if reduction == losses.Reduction.NONE:
               self.assertAllClose(self._raw_losses, unweighted_loss.eval())
@@ -1375,7 +1375,7 @@ class ComputeWeightedLossTest(test.TestCase):
                 raw_losses, weights=np.ones((1, 1, 4)), reduction=reduction),
         )
         self.assertEqual(3, len(util.get_losses()))
-        with self.test_session(g):
+        with self.session(g):
           for unweighted_loss in unweighted_losses:
             if reduction == losses.Reduction.NONE:
               self.assertAllClose(
@@ -1466,7 +1466,7 @@ class ComputeWeightedLossTest(test.TestCase):
         weighted_loss = losses.compute_weighted_loss(
             self._raw_losses, weights=weights, reduction=reduction)
         self.assertEqual(1, len(util.get_losses()))
-        with self.test_session(g):
+        with self.session(g):
           weighted_losses = weights * self._raw_losses
           weighted_sum = np.sum(weighted_losses)
           if reduction == losses.Reduction.NONE:
diff --git a/tensorflow/python/kernel_tests/lrn_op_test.py b/tensorflow/python/kernel_tests/lrn_op_test.py
index 9eba059549..7ebeb91d90 100644
--- a/tensorflow/python/kernel_tests/lrn_op_test.py
+++ b/tensorflow/python/kernel_tests/lrn_op_test.py
@@ -54,7 +54,7 @@ class LRNOpTest(test.TestCase):
     return output
 
   def _RunAndVerify(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       # random shape
       shape = np.random.randint(1, 16, size=4)
       # Make depth at least 2 to make it meaningful
@@ -100,7 +100,7 @@ class LRNOpTest(test.TestCase):
         self._RunAndVerify(dtypes.float16)
 
   def testGradientsZeroInput(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       shape = [4, 4, 4, 4]
       p = array_ops.placeholder(dtypes.float32, shape=shape)
       inp_array = np.zeros(shape).astype("f")
@@ -113,7 +113,7 @@ class LRNOpTest(test.TestCase):
     self.assertShapeEqual(expected, grad)
 
   def _RunAndVerifyGradients(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       # random shape
       shape = np.random.randint(1, 5, size=4)
       # Make depth at least 2 to make it meaningful
diff --git a/tensorflow/python/kernel_tests/map_stage_op_test.py b/tensorflow/python/kernel_tests/map_stage_op_test.py
index acfafde9e0..d503f3d7c9 100644
--- a/tensorflow/python/kernel_tests/map_stage_op_test.py
+++ b/tensorflow/python/kernel_tests/map_stage_op_test.py
@@ -44,7 +44,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -65,7 +65,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -92,7 +92,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -141,7 +141,7 @@ class MapStageTest(test.TestCase):
 
     n = 10
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       for i in range(n):
         sess.run(stage, feed_dict={x: i, pi: i})
 
@@ -168,7 +168,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 3})
       self.assertEqual(sess.run(size), 1)
       sess.run(stage, feed_dict={x: -1, pi: 1})
@@ -202,7 +202,7 @@ class MapStageTest(test.TestCase):
     queue = Queue.Queue()
     n = 8
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
@@ -265,7 +265,7 @@ class MapStageTest(test.TestCase):
     queue = Queue.Queue()
     n = 8
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
@@ -325,7 +325,7 @@ class MapStageTest(test.TestCase):
 
     n = 10
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Keys n-1..0
       keys = list(reversed(six.moves.range(n)))
 
@@ -362,7 +362,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # 0 complete and incomplete entries
       self.assertTrue(sess.run([size, isize]) == [0, 0])
       # Stage key 0, x and f tuple entries
@@ -419,7 +419,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # 0 complete and incomplete entries
       self.assertTrue(sess.run([size, isize]) == [0, 0])
       # Stage key 0, x and f tuple entries
@@ -470,7 +470,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # 0 complete and incomplete entries
       self.assertTrue(sess.run([size, isize]) == [0, 0])
       # Stage key 0, x and f tuple entries
@@ -561,7 +561,7 @@ class MapStageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Stage complete tuple
       sess.run(stage_xvf, feed_dict={pi: 0, x: 1, f: 2, v: 3})
 
diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index 01c4643235..4760236ca0 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -72,7 +72,7 @@ def _GetMatMulTest(a_np_, b_np_, use_static_shape_, **kwargs_):
     # np.matrix(a_np_) * np.matrix(b_np_)
     effective_a_np = _GetTransposedMatrices(a_np_, "a", kwargs_)
     effective_b_np = _GetTransposedMatrices(b_np_, "b", kwargs_)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.session(use_gpu=use_gpu) as sess:
       if use_static_shape_:
         a = constant_op.constant(effective_a_np)
         b = constant_op.constant(effective_b_np)
@@ -115,7 +115,7 @@ def _GetMatMulGradientTest(a_np_, b_np_, use_static_shape_, **kwargs_):
     epsilon = np.finfo(a_np_.dtype).eps
     delta = epsilon**(1.0 / 3.0)
     tol = 20 * delta
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       a = constant_op.constant(effective_a_np)
       b = constant_op.constant(effective_b_np)
       res = math_ops.matmul(a, b, **kwargs_)
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index 5660a29493..93a668f125 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -57,7 +57,7 @@ def _GetMatrixBandPartTest(dtype_, batch_shape_, shape_):
         if batch_shape_ is not ():
           band_np = np.tile(band_np, batch_shape_ + (1, 1))
         for index_dtype in [dtypes_lib.int32, dtypes_lib.int64]:
-          with self.test_session(use_gpu=False):
+          with self.cached_session(use_gpu=False):
             band = array_ops.matrix_band_part(
                 batch_mat,
                 constant_op.constant(lower, index_dtype),
@@ -76,7 +76,7 @@ def _GetMatrixBandPartGradTest(dtype_, batch_shape_, shape_):
   def Test(self):
     shape = batch_shape_ + shape_
     x = constant_op.constant(np.random.rand(*shape), dtype=dtype_)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for lower in -1, 0, 1, shape_[-2] - 1:
         for upper in -1, 0, 1, shape_[-1] - 1:
           y = array_ops.matrix_band_part(x, lower, upper)
diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index 9630c052b8..3abdf50ece 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -50,7 +50,7 @@ class ExponentialOpTest(test.TestCase):
 
   def _verifyExponential(self, x, np_type):
     inp = x.astype(np_type)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_ans = linalg_impl.matrix_exponential(inp)
       if x.size == 0:
         np_ans = np.empty(x.shape, dtype=np_type)
@@ -138,14 +138,14 @@ class ExponentialOpTest(test.TestCase):
     self._verifyExponentialReal(np.empty([2, 0, 0]))
 
   def testDynamic(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inp = array_ops.placeholder(ops.dtypes.float32)
       expm = linalg_impl.matrix_exponential(inp)
       matrix = np.array([[1., 2.], [3., 4.]])
       sess.run(expm, feed_dict={inp: matrix})
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       matrix1 = random_ops.random_normal([5, 5], seed=42)
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       expm1 = linalg_impl.matrix_exponential(matrix1)
diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
index 8bda04b53d..2247f1541e 100644
--- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
@@ -37,7 +37,7 @@ class InverseOpTest(test.TestCase):
   def _verifyInverse(self, x, np_type):
     for adjoint in False, True:
       y = x.astype(np_type)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # Verify that x^{-1} * x == Identity matrix.
         inv = linalg_ops.matrix_inverse(y, adjoint=adjoint)
         tf_ans = math_ops.matmul(inv, y, adjoint_b=adjoint)
@@ -138,7 +138,7 @@ class InverseOpTest(test.TestCase):
           self._verifyInverseReal(matrix)
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       all_ops = []
       for adjoint_ in True, False:
         matrix1 = random_ops.random_normal([5, 5], seed=42)
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index 3205e211d9..2010a4b2a8 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -39,7 +39,7 @@ class LogarithmOpTest(test.TestCase):
 
   def _verifyLogarithm(self, x, np_type):
     inp = x.astype(np_type)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       # Verify that expm(logm(A)) == A.
       tf_ans = linalg_impl.matrix_exponential(
           gen_linalg_ops.matrix_logarithm(inp))
@@ -121,7 +121,7 @@ class LogarithmOpTest(test.TestCase):
         self._verifyLogarithmComplex(matrix)
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       matrix1 = math_ops.cast(
           random_ops.random_normal([5, 5], seed=42), dtypes.complex64)
       matrix2 = math_ops.cast(
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index 225a10e117..13a7df7f95 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -107,7 +107,7 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
         b = np.tile(b, batch_shape + (1, 1))
         np_ans = np.tile(np_ans, batch_shape + (1, 1))
         np_r_norm = np.tile(np_r_norm, batch_shape)
-      with self.test_session(use_gpu=fast) as sess:
+      with self.cached_session(use_gpu=fast) as sess:
         if use_placeholder:
           a_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
           b_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
@@ -135,7 +135,7 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
 
   def testWrongDimensions(self):
     # The matrix and right-hand sides should have the same number of rows.
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       matrix = constant_op.constant([[1., 0.], [0., 1.]])
       rhs = constant_op.constant([[1., 0.]])
       with self.assertRaises(ValueError):
@@ -146,7 +146,7 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
     empty0 = np.empty([3, 0])
     empty1 = np.empty([0, 2])
     for fast in [True, False]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         tf_ans = linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast).eval()
         self.assertEqual(tf_ans.shape, (0, 0))
         tf_ans = linalg_ops.matrix_solve_ls(empty0, full, fast=fast).eval()
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index 264df2565c..9e30ae1628 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -55,7 +55,7 @@ class MatrixSolveOpTest(test.TestCase):
           b = np.tile(b, batch_dims + [1, 1])
         np_ans = np.linalg.solve(a_np, b)
         for use_placeholder in False, True:
-          with self.test_session(use_gpu=True) as sess:
+          with self.cached_session(use_gpu=True) as sess:
             if use_placeholder:
               a_ph = array_ops.placeholder(dtypes.as_dtype(np_type))
               b_ph = array_ops.placeholder(dtypes.as_dtype(np_type))
@@ -93,14 +93,14 @@ class MatrixSolveOpTest(test.TestCase):
   def testNonSquareMatrix(self):
     # When the solve of a non-square matrix is attempted we should return
     # an error
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         matrix = constant_op.constant([[1., 2., 3.], [3., 4., 5.]])
         linalg_ops.matrix_solve(matrix, matrix)
 
   def testWrongDimensions(self):
     # The matrix and right-hand sides should have the same number of rows.
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       matrix = constant_op.constant([[1., 0.], [0., 1.]])
       rhs = constant_op.constant([[1., 0.]])
       with self.assertRaises(ValueError):
@@ -108,7 +108,7 @@ class MatrixSolveOpTest(test.TestCase):
 
   def testNotInvertible(self):
     # The input should be invertible.
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesOpError("Input matrix is not invertible."):
         # All rows of the matrix below add to zero
         matrix = constant_op.constant([[1., 0., -1.], [-1., 1., 0.],
@@ -116,7 +116,7 @@ class MatrixSolveOpTest(test.TestCase):
         linalg_ops.matrix_solve(matrix, matrix).eval()
 
   def testConcurrent(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       all_ops = []
       for adjoint_ in False, True:
         lhs1 = random_ops.random_normal([3, 3], seed=42)
diff --git a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
index dd01ba11af..445faca3ee 100644
--- a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
@@ -74,7 +74,7 @@ class MatrixTriangularSolveOpTest(test.TestCase):
         a_np = np.tile(a_np, batch_dims + [1, 1])
         b = np.tile(b, batch_dims + [1, 1])
 
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         if use_placeholder:
           a_tf = array_ops.placeholder(a.dtype)
           b_tf = array_ops.placeholder(b.dtype)
diff --git a/tensorflow/python/kernel_tests/morphological_ops_test.py b/tensorflow/python/kernel_tests/morphological_ops_test.py
index ce4d8acfbd..6d601554b8 100644
--- a/tensorflow/python/kernel_tests/morphological_ops_test.py
+++ b/tensorflow/python/kernel_tests/morphological_ops_test.py
@@ -44,7 +44,7 @@ class DilationTest(test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       out_tensor = nn_ops.dilation2d(
           constant_op.constant(image),
           constant_op.constant(kernel),
@@ -204,7 +204,7 @@ class DilationTest(test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       image_tensor = constant_op.constant(
           image, shape=image_shape, name="input")
       kernel_tensor = constant_op.constant(
@@ -319,7 +319,7 @@ class ErosionTest(test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       out_tensor = nn_ops.erosion2d(
           constant_op.constant(image),
           constant_op.constant(kernel),
@@ -479,7 +479,7 @@ class ErosionTest(test.TestCase):
     strides = [1] + strides + [1]
     rates = [1] + rates + [1]
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       image_tensor = constant_op.constant(
           image, shape=image_shape, name="input")
       kernel_tensor = constant_op.constant(
diff --git a/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
index 3cbbd48c8c..15e3826542 100644
--- a/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
@@ -114,7 +114,7 @@ class DepthwiseConv2DTest(test.TestCase):
     # Initializes the input and filter tensor with numbers incrementing from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       with sess.graph._kernel_label_map({"DepthwiseConv2dNative": "neon"}):
         t1 = constant_op.constant(x1, shape=tensor_in_sizes)
         t1.set_shape(tensor_in_sizes)
@@ -204,7 +204,7 @@ class DepthwiseConv2DTest(test.TestCase):
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       with sess.graph._kernel_label_map({"DepthwiseConv2dNative": "neon"}):
         t1 = constant_op.constant(x1, shape=tensor_in_sizes)
         t1.set_shape(tensor_in_sizes)
diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py
index 3f71b326a2..e202b6e8a4 100644
--- a/tensorflow/python/kernel_tests/norm_op_test.py
+++ b/tensorflow/python/kernel_tests/norm_op_test.py
@@ -65,7 +65,7 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_):
 
   def _CompareNorm(self, matrix):
     np_norm = np.linalg.norm(matrix, ord=ord_, axis=axis_, keepdims=keep_dims_)
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       if use_static_shape_:
         tf_matrix = constant_op.constant(matrix)
         tf_norm = linalg_ops.norm(
diff --git a/tensorflow/python/kernel_tests/nth_element_op_test.py b/tensorflow/python/kernel_tests/nth_element_op_test.py
index 1b8f02140f..338b6cec01 100644
--- a/tensorflow/python/kernel_tests/nth_element_op_test.py
+++ b/tensorflow/python/kernel_tests/nth_element_op_test.py
@@ -32,7 +32,7 @@ class NthElementTest(test.TestCase):
 
   def _validateNthElement(self, inputs, dtype, n, reverse, expected_values):
     np_expected_values = np.array(expected_values)
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       inputs_op = ops.convert_to_tensor(inputs, dtype=dtype)
       values_op = nn_ops.nth_element(inputs_op, n, reverse=reverse)
       values = sess.run(values_op)
@@ -117,7 +117,7 @@ class NthElementTest(test.TestCase):
       nn_ops.nth_element(5, 0)
 
   def testInvalidInputAtEval(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       v = array_ops.placeholder(dtype=dtypes.float32)
       with self.assertRaisesOpError("Input must be >= 1-D"):
         nn_ops.nth_element(v, 0).eval(feed_dict={v: 5.0})
@@ -132,7 +132,7 @@ class NthElementTest(test.TestCase):
 
   def testInvalidNAtEval(self):
     inputs = [[0.1, 0.2], [0.3, 0.4]]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       n = array_ops.placeholder(dtypes.int32)
       values = nn_ops.nth_element(inputs, n)
       with self.assertRaisesOpError("Need n >= 0, got -7"):
@@ -146,14 +146,14 @@ class NthElementTest(test.TestCase):
 
   def testNTooLargeAtEval(self):
     inputs = [[0.1, 0.2], [0.3, 0.4]]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       n = array_ops.placeholder(dtypes.int32)
       values = nn_ops.nth_element(inputs, n)
       with self.assertRaisesOpError(r"Input must have at least n\+1 columns"):
         values.eval(feed_dict={n: 2})
 
   def testGradients(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       inputs = array_ops.placeholder(dtypes.float32, shape=[3, 5])
       values = nn_ops.nth_element(inputs, 3)
       grad = sess.run(
diff --git a/tensorflow/python/kernel_tests/numerics_test.py b/tensorflow/python/kernel_tests/numerics_test.py
index 6cc70f7c89..5db591ed30 100644
--- a/tensorflow/python/kernel_tests/numerics_test.py
+++ b/tensorflow/python/kernel_tests/numerics_test.py
@@ -35,7 +35,7 @@ class VerifyTensorAllFiniteTest(test.TestCase):
   def testVerifyTensorAllFiniteSucceeds(self):
     x_shape = [5, 4]
     x = np.random.random_sample(x_shape).astype(np.float32)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
       t_verified = numerics.verify_tensor_all_finite(t,
                                                      "Input is not a number.")
@@ -48,7 +48,7 @@ class VerifyTensorAllFiniteTest(test.TestCase):
 
     # Test NaN.
     x[0] = np.nan
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesOpError(my_msg):
         t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
         t_verified = numerics.verify_tensor_all_finite(t, my_msg)
@@ -56,7 +56,7 @@ class VerifyTensorAllFiniteTest(test.TestCase):
 
     # Test Inf.
     x[0] = np.inf
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesOpError(my_msg):
         t = constant_op.constant(x, shape=x_shape, dtype=dtypes.float32)
         t_verified = numerics.verify_tensor_all_finite(t, my_msg)
diff --git a/tensorflow/python/kernel_tests/one_hot_op_test.py b/tensorflow/python/kernel_tests/one_hot_op_test.py
index b449a195a7..377d545c9c 100644
--- a/tensorflow/python/kernel_tests/one_hot_op_test.py
+++ b/tensorflow/python/kernel_tests/one_hot_op_test.py
@@ -34,7 +34,7 @@ class OneHotTest(test.TestCase):
                   expected_err_re=None,
                   raises=None,
                   **inputs):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if raises is not None:
         with self.assertRaises(raises):
           array_ops.one_hot(**inputs)
diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py
index e415d7879e..fc302c4141 100644
--- a/tensorflow/python/kernel_tests/pad_op_test.py
+++ b/tensorflow/python/kernel_tests/pad_op_test.py
@@ -85,7 +85,7 @@ class PadOpTest(test.TestCase):
   def _testPad(self, np_inputs, paddings, mode, constant_values):
     np_val = self._npPad(np_inputs, paddings, mode=mode,
                          constant_values=constant_values)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_val = array_ops.pad(np_inputs, paddings, mode=mode,
                              constant_values=constant_values)
       out = tf_val.eval()
@@ -93,7 +93,7 @@ class PadOpTest(test.TestCase):
     self.assertShapeEqual(np_val, tf_val)
 
   def _testGradient(self, x, a, mode, constant_values):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = ops.convert_to_tensor(x)
       xs = list(x.shape)
       ina = ops.convert_to_tensor(a)
@@ -117,7 +117,7 @@ class PadOpTest(test.TestCase):
                              constant_values=constant_values)
 
   def testInputDims(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.pad(array_ops.reshape(
             [1, 2], shape=[1, 2, 1, 1, 1, 1]),
@@ -125,7 +125,7 @@ class PadOpTest(test.TestCase):
                           [1, 2], shape=[1, 2]))
 
   def testPaddingsDim(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.pad(array_ops.reshape(
             [1, 2], shape=[1, 2]),
@@ -133,7 +133,7 @@ class PadOpTest(test.TestCase):
                           [1, 2], shape=[2]))
 
   def testPaddingsDim2(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.pad(array_ops.reshape(
             [1, 2], shape=[1, 2]),
@@ -141,7 +141,7 @@ class PadOpTest(test.TestCase):
                           [1, 2], shape=[2, 1]))
 
   def testPaddingsDim3(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.pad(array_ops.reshape(
             [1, 2], shape=[1, 2]),
@@ -149,7 +149,7 @@ class PadOpTest(test.TestCase):
                           [1, 2], shape=[1, 2]))
 
   def testPaddingsDim4(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.pad(array_ops.reshape(
             [1, 2], shape=[1, 2]),
@@ -157,7 +157,7 @@ class PadOpTest(test.TestCase):
                           [1, 2, 3, 4, 5, 6], shape=[3, 2]))
 
   def testPaddingsNonNegative(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesRegexp(ValueError, "must be non-negative"):
         array_ops.pad(constant_op.constant(
             [1], shape=[1]),
@@ -165,7 +165,7 @@ class PadOpTest(test.TestCase):
                           [-1, 0], shape=[1, 2]))
 
   def testPaddingsNonNegative2(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesRegexp(ValueError, "must be non-negative"):
         array_ops.pad(constant_op.constant(
             [1], shape=[1]),
@@ -173,7 +173,7 @@ class PadOpTest(test.TestCase):
                           [-1, 0], shape=[1, 2]))
 
   def testPaddingsMaximum(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(Exception):
         array_ops.pad(constant_op.constant(
             [1], shape=[2]),
@@ -203,7 +203,7 @@ class PadOpTest(test.TestCase):
                              paddings,
                              mode=mode,
                              constant_values=0)
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           tf_val = array_ops.pad(inputs,
                                  constant_op.constant(paddings, padding_dtype),
                                  mode=mode,
@@ -249,7 +249,7 @@ class PadOpTest(test.TestCase):
                             constant_values="PAD")
     symmetric = array_ops.pad(x, [[1, 0], [0, 1]], mode="SYMMETRIC",
                               constant_values="PAD")
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual([[b"PAD", b"PAD", b"PAD"],
                            [b"Hello", b"World", b"PAD"],
                            [b"Goodnight", b"Moon", b"PAD"]], constant.eval())
@@ -325,7 +325,7 @@ class PadOpTest(test.TestCase):
   def testScalars(self):
     paddings = np.zeros((0, 2), dtype=np.int32)
     inp = np.asarray(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_val = array_ops.pad(inp, paddings)
       out = tf_val.eval()
     self.assertAllEqual(inp, out)
@@ -335,7 +335,7 @@ class PadOpTest(test.TestCase):
     for dtype in [dtypes.int32, dtypes.int64]:
       paddings = np.zeros((0, 2))
       inp = np.asarray(7)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         tf_val = array_ops.pad(inp, constant_op.constant(paddings, dtype=dtype))
         out = tf_val.eval()
       self.assertAllEqual(inp, out)
@@ -360,7 +360,7 @@ class PadOpTest(test.TestCase):
             padded,
             [paddings_value[i][0] + inp.shape.dims[i].value for i in range(4)],
             [-1, -1, -1, -1])
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           self.assertAllEqual(inp.eval(), middle.eval())
           self.assertAllEqual(
               np.zeros([row[0] for row in paddings_value]), left.eval())
diff --git a/tensorflow/python/kernel_tests/parameterized_truncated_normal_op_test.py b/tensorflow/python/kernel_tests/parameterized_truncated_normal_op_test.py
index e14894cf56..53b713c03e 100644
--- a/tensorflow/python/kernel_tests/parameterized_truncated_normal_op_test.py
+++ b/tensorflow/python/kernel_tests/parameterized_truncated_normal_op_test.py
@@ -115,7 +115,7 @@ class ParameterizedTruncatedNormalTest(test.TestCase):
       # Give up early if we are unable to import it.
       import scipy.stats  # pylint: disable=g-import-not-at-top,unused-variable
       random_seed.set_random_seed(seed)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         samples = random_ops.parameterized_truncated_normal(shape, mean, stddev,
                                                             minval,
                                                             maxval).eval()
@@ -139,7 +139,7 @@ class ParameterizedTruncatedNormalTest(test.TestCase):
     try:
       import scipy.stats  # pylint: disable=g-import-not-at-top
       random_seed.set_random_seed(seed)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         samples = random_ops.parameterized_truncated_normal(shape, mean, stddev,
                                                             minval,
                                                             maxval).eval()
@@ -186,7 +186,7 @@ class ParameterizedTruncatedNormalTest(test.TestCase):
     sample_op = random_ops.parameterized_truncated_normal(
         shape=(int(1e5),), means=0.8, stddevs=0.05, minvals=-1., maxvals=1.)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       samples = sess.run(sample_op)
       # 0. is more than 16 standard deviations from the mean, and
       # should have a likelihood < 1e-57.
diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index b34d30f5c0..d1f0c6c2a0 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -600,7 +600,7 @@ class PartitionedVariablesTestCase(test.TestCase):
   def testMetaGraphSaveLoad(self):
     save_prefix = os.path.join(self.get_temp_dir(), "ckpt")
     save_graph = ops.Graph()
-    with save_graph.as_default(), self.test_session(
+    with save_graph.as_default(), self.session(
         graph=save_graph) as session:
       partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0)
       with variable_scope.variable_scope("root", partitioner=partitioner):
@@ -620,7 +620,7 @@ class PartitionedVariablesTestCase(test.TestCase):
             save_graph.get_tensor_by_name(v0.name + ":0"))
 
     restore_graph = ops.Graph()
-    with restore_graph.as_default(), self.test_session(
+    with restore_graph.as_default(), self.session(
         graph=restore_graph) as session:
       saver = saver_lib.import_meta_graph(save_path + ".meta")
       saver.restore(sess=session, save_path=save_path)
diff --git a/tensorflow/python/kernel_tests/pool_test.py b/tensorflow/python/kernel_tests/pool_test.py
index 6ede654aad..372861297f 100644
--- a/tensorflow/python/kernel_tests/pool_test.py
+++ b/tensorflow/python/kernel_tests/pool_test.py
@@ -154,7 +154,7 @@ class PoolingTest(test.TestCase):
     self.assertAllClose(y1, y2.eval(), rtol=1e-2, atol=1e-2)
 
   def testPoolSimple(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           self._test(
@@ -166,7 +166,7 @@ class PoolingTest(test.TestCase):
               strides=[1, 2])
 
   def testPool1D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 2], [2, 10, 2]]:
@@ -192,7 +192,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testPool2D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 10, 2], [2, 10, 9, 2]]:
@@ -218,7 +218,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testPool3D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["MAX", "AVG"]:
           for input_shape in [[2, 9, 10, 11, 2], [2, 10, 9, 11, 2]]:
@@ -247,7 +247,7 @@ class PoolingTest(test.TestCase):
   def testPoolNC(self):
     if test.is_gpu_available(cuda_only=True):
       # "NC*" format is currently only supported on CUDA.
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         for padding in ["SAME", "VALID"]:
           self._test(
               input_shape=[2, 2, 9],
@@ -302,7 +302,7 @@ class PoolingTest(test.TestCase):
     self.assertLess(err, err_tolerance)
 
   def testGradient1D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[2, 5, 2], [1, 4, 1]]:
@@ -328,7 +328,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testGradient2D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[2, 4, 5, 2], [1, 5, 4, 1]]:
@@ -354,7 +354,7 @@ class PoolingTest(test.TestCase):
                     strides=strides)
 
   def testGradient3D(self):
-    with self.test_session(use_gpu=test.is_gpu_available()):
+    with self.session(use_gpu=test.is_gpu_available()):
       for padding in ["SAME", "VALID"]:
         for pooling_type in ["AVG", "MAX"]:
           for input_shape in [[1, 3, 5, 4, 1], [1, 5, 4, 3, 1]]:
diff --git a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
index b01fc12953..e393c7a022 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
@@ -65,7 +65,7 @@ class PoolingTest(test.TestCase):
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = [f * 1.0 for f in range(1, total_size + 1)]
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       t = constant_op.constant(x, shape=input_sizes)
       window = [1] + list(window) + [1]
       strides = [1] + list(strides) + [1]
@@ -233,7 +233,7 @@ class PoolingTest(test.TestCase):
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = np.arange(1, total_size + 1, dtype=np.float32)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       input_tensor = constant_op.constant(x, shape=input_sizes, name="input")
       err_g_margin = 1e-3
       err_gg_margin = 1.5e-2
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index e95c729715..53003a7f28 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -129,7 +129,7 @@ class PoolingTest(test.TestCase):
     # Initializes the input tensor with array containing incrementing
     # numbers from 1, wrapping round to -127 after 127 to support int8.
     x = [((f + 128) % 255) - 127 for f in range(total_size)]
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       t = constant_op.constant(x, shape=input_sizes, dtype=data_type)
       if data_format in ("NCHW", "NCHW_VECT_C"):
         if data_format == "NCHW_VECT_C":
@@ -718,7 +718,7 @@ class PoolingTest(test.TestCase):
                                          strides,
                                          error_msg,
                                          use_gpu=False):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       t = constant_op.constant(1.0, shape=in_size)
       with self.assertRaisesRegexp(errors_impl.UnimplementedError, error_msg):
         t = nn_ops.max_pool(
@@ -734,7 +734,7 @@ class PoolingTest(test.TestCase):
     self._testDepthwiseMaxPoolInvalidConfig([1, 2, 2, 4], [1, 1, 1, 3],
                                             [1, 1, 1, 3], "evenly divide")
     if test.is_gpu_available():
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         t = variables.Variable(np.ones([1, 2, 2, 4]))
         variables.global_variables_initializer().run()
         with self.assertRaisesOpError("for CPU devices"):
@@ -747,11 +747,11 @@ class PoolingTest(test.TestCase):
   def _CompareMaxPoolingFwd(self, input_shape, ksize, strides, padding):
     for dtype in np.float64, np.float32, np.float16:
       tensor_input = np.random.rand(*input_shape).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         t = constant_op.constant(tensor_input, shape=input_shape)
         out_op, _ = nn_ops.max_pool_with_argmax(t, ksize, strides, padding)
         gpu_val = out_op.eval()
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         t = constant_op.constant(tensor_input, shape=input_shape)
         out_op = nn_ops.max_pool(t, ksize, strides, padding)
         cpu_val = out_op.eval()
@@ -764,7 +764,7 @@ class PoolingTest(test.TestCase):
       # in the input.
       tensor_input = np.random.random_integers(0, 3, input_shape).astype(dtype)
       tensor_output = np.random.rand(*output_shape).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         t = constant_op.constant(tensor_input, shape=input_shape)
         _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding)
         argmax = argmax_op.eval()
@@ -773,7 +773,7 @@ class PoolingTest(test.TestCase):
                                                       strides, padding)
         gpu_val = out_op.eval()
         self.assertShapeEqual(gpu_val, out_op)
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         t = constant_op.constant(tensor_input, shape=input_shape)
         out_op = nn_ops.max_pool(t, ksize, strides, padding)
         orig_out = out_op.eval()
@@ -793,7 +793,7 @@ class PoolingTest(test.TestCase):
       # Generate numbers in a narrow range, so that there are many duplicates
       # in the input.
       tensor_input = np.random.random_integers(0, 3, input_shape).astype(dtype)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         t = constant_op.constant(tensor_input, shape=input_shape)
         _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding)
         argmax = argmax_op.eval()
@@ -802,7 +802,7 @@ class PoolingTest(test.TestCase):
             t, grad_in, argmax, ksize, strides, padding)
         gpu_val = out_op.eval()
         self.assertShapeEqual(gpu_val, out_op)
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         t = constant_op.constant(tensor_input, shape=input_shape)
         out_op = nn_ops.max_pool(t, ksize, strides, padding)
         orig_out = out_op.eval()
@@ -818,7 +818,7 @@ class PoolingTest(test.TestCase):
 
   def testMaxPoolingWithArgmax(self):
     tensor_input = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       t = constant_op.constant(tensor_input, shape=[1, 3, 3, 1])
       out_op, argmax_op = nn_ops.max_pool_with_argmax(
           t,
@@ -836,7 +836,7 @@ class PoolingTest(test.TestCase):
     orig_input = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]
     tensor_input = [11.0, 12.0, 13.0, 14.0]
     tensor_argmax = list(np.array([0, 1, 3, 5], dtype=np.int64))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       orig_in = constant_op.constant(orig_input, shape=[1, 3, 3, 1])
       t = constant_op.constant(tensor_input, shape=[1, 2, 2, 1])
       argmax = constant_op.constant(
@@ -859,7 +859,7 @@ class PoolingTest(test.TestCase):
     orig_input = [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]
     tensor_input = [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0]
     tensor_argmax = list(np.array([0, 1, 3, 5], dtype=np.int64))
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       orig_in = constant_op.constant(orig_input, shape=[1, 3, 3, 1])
       t = constant_op.constant(tensor_input, shape=[1, 3, 3, 1])
       argmax = constant_op.constant(
@@ -910,7 +910,7 @@ class PoolingTest(test.TestCase):
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = [f * 1.0 for f in range(1, total_size + 1)]
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       input_tensor = constant_op.constant(x, shape=input_sizes, name="input")
       if pool_func == nn_ops.avg_pool:
         func_name = "avg_pool"
@@ -986,7 +986,7 @@ class PoolingTest(test.TestCase):
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = [f * 1.0 for f in range(1, total_size + 1)]
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       input_tensor = constant_op.constant(x, shape=input_sizes, name="input")
       if pool_func == nn_ops.avg_pool:
         func_name = "avg_pool"
@@ -1208,7 +1208,7 @@ class PoolingTest(test.TestCase):
                              window_rows, window_cols, row_stride, col_stride,
                              padding, use_gpu, v2):
     pool_func = gen_nn_ops.max_pool_v2 if v2 else nn_ops.max_pool
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       input_tensor = variables.Variable(
           np.array(input_data, dtype=np.float32).reshape(input_sizes))
       variables.global_variables_initializer().run()
@@ -1807,7 +1807,7 @@ class PoolingTest(test.TestCase):
             padding="SAME")
 
   def testOpEdgeCases(self):
-    with self.test_session(use_gpu=test.is_gpu_available()) as sess:
+    with self.session(use_gpu=test.is_gpu_available()) as sess:
       pool_funcs = [nn_ops.max_pool, nn_ops.avg_pool]
       if test.is_gpu_available():
         pool_funcs.append(nn_ops.max_pool_with_argmax)
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 5f5e24bd63..837f1ec054 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -644,7 +644,7 @@ class PyFuncTest(test.TestCase):
       y = script_ops.eager_py_func(func=f, inp=[x], Tout=dtypes.float32)
       z = script_ops.eager_py_func(func=g, inp=[y], Tout=dtypes.float32)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       output = sess.run(z, feed_dict={x: 3.0})
       self.assertEqual(output, 18.0)
 
diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py
index 8848c15e76..a60237fb25 100644
--- a/tensorflow/python/kernel_tests/qr_op_test.py
+++ b/tensorflow/python/kernel_tests/qr_op_test.py
@@ -50,7 +50,7 @@ class QrOpTest(test.TestCase):
       linalg_ops.qr(vector)
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       all_ops = []
       for full_matrices_ in True, False:
         for rows_ in 4, 5:
@@ -121,7 +121,7 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_):
           low=-1.0, high=1.0,
           size=np.prod(shape_)).reshape(shape_).astype(dtype_)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       if use_static_shape_:
         x_tf = constant_op.constant(x_np)
       else:
@@ -173,7 +173,7 @@ def _GetQrGradOpTest(dtype_, shape_, full_matrices_):
       tol = 3e-2
     else:
       tol = 1e-6
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_a = constant_op.constant(a)
       tf_b = linalg_ops.qr(tf_a, full_matrices=full_matrices_)
       for b in tf_b:
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 8c84b2a49f..ac9be56d63 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -707,7 +707,7 @@ class AsyncReaderTest(test.TestCase):
     """Tests that reading does not block main execution threads."""
     config = config_pb2.ConfigProto(
         inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)
-    with self.test_session(config=config) as sess:
+    with self.session(config=config) as sess:
       thread_data_t = collections.namedtuple("thread_data_t",
                                              ["thread", "queue", "output"])
       thread_data = []
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index 248036a82a..7cca170ef3 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -131,7 +131,7 @@ class BaseReductionTest(test.TestCase):
 
   def _compare(self, x, reduction_axes, keepdims, feed_dict=None):
     np_ans = self._np_reduce(x, reduction_axes, keepdims)
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       tf_ans = self._tf_reduce(x, reduction_axes, keepdims)
       out = sess.run(tf_ans, feed_dict)
     self.assertAllClose(np_ans, out)
@@ -153,7 +153,7 @@ class BaseReductionTest(test.TestCase):
     if reduction_axes is not None and np.shape(reduction_axes) == (1,):
       # Test scalar reduction_axes argument
       self._compareGradient(x, reduction_axes[0], rtol=rtol, atol=atol)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       t = ops.convert_to_tensor(x)
       su = self._tf_reduce(t, reduction_axes, False)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -183,7 +183,7 @@ class SumReductionTest(BaseReductionTest):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_sum([0, 0], constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
       self.assertAllEqual(tf_v, 0)
@@ -356,14 +356,14 @@ class SumReductionTest(BaseReductionTest):
     self._compareAll(x, [1])
 
   def testEmptyGradients(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.zeros([0, 3])
       y = math_ops.reduce_sum(x, [1])
       error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
       self.assertEqual(error, 0)
 
   def testDegenerate(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64,
                     dtypes.complex64, dtypes.complex128):
         # A large number is needed to get Eigen to die
@@ -398,7 +398,7 @@ class MeanReductionTest(BaseReductionTest):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_mean([0, 0], constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
       self.assertAllEqual(tf_v, 0)
@@ -442,14 +442,14 @@ class MeanReductionTest(BaseReductionTest):
       self._compareGradientAxes(x, rtol=1e-3, atol=1e-3)
 
   def testEmptyGradients(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.zeros([0, 3])
       y = math_ops.reduce_mean(x, [1])
       error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
       self.assertEqual(error, 0)
 
   def testDegenerate(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
         # A large number is needed to get Eigen to die
         x = array_ops.zeros((0, 9938), dtype=dtype)
@@ -471,7 +471,7 @@ class ProdReductionTest(BaseReductionTest):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_prod([0, 0], constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
       self.assertAllEqual(tf_v, 0)
@@ -534,14 +534,14 @@ class ProdReductionTest(BaseReductionTest):
     self._compareGradientAxes(x4, rtol=1e-3, atol=1e-3)
 
   def testEmptyGradients(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       x = array_ops.zeros([0, 3])
       y = math_ops.reduce_prod(x, [1])
       error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
       self.assertEqual(error, 0)
 
   def testDegenerate(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
         # A large number is needed to get Eigen to die
         x = array_ops.zeros((0, 9938), dtype=dtype)
@@ -558,7 +558,7 @@ class MinReductionTest(test.TestCase):
     else:
       for ra in reduction_axes[::-1]:
         np_ans = np.amin(np_ans, axis=ra, keepdims=keepdims)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if reduction_axes is not None:
         reduction_axes = np.array(reduction_axes).astype(np.int32)
       tf_ans = math_ops.reduce_min(x, reduction_axes, keepdims)
@@ -574,7 +574,7 @@ class MinReductionTest(test.TestCase):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_min([0, 0], constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
       self.assertAllEqual(tf_v, 0)
@@ -671,7 +671,7 @@ class MaxReductionTest(test.TestCase):
     else:
       for ra in reduction_axes[::-1]:
         np_ans = np.amax(np_ans, axis=ra, keepdims=keepdims)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if reduction_axes is not None:
         reduction_axes = np.array(reduction_axes).astype(np.int32)
       tf_ans = math_ops.reduce_max(x, reduction_axes, keepdims)
@@ -687,7 +687,7 @@ class MaxReductionTest(test.TestCase):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_max([0, 0], constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
       self.assertAllEqual(tf_v, 0)
@@ -798,7 +798,7 @@ class AllReductionTest(test.TestCase):
     else:
       for ra in reduction_axes[::-1]:
         np_ans = np.all(np_ans, axis=ra, keepdims=keepdims)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if reduction_axes is not None:
         reduction_axes = np.array(reduction_axes).astype(np.int32)
       tf_ans = math_ops.reduce_all(x, reduction_axes, keepdims)
@@ -814,7 +814,7 @@ class AllReductionTest(test.TestCase):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         v = math_ops.reduce_all([True, True],
                                 constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
@@ -847,7 +847,7 @@ class AnyReductionTest(test.TestCase):
     else:
       for ra in reduction_axes[::-1]:
         np_ans = np.any(np_ans, axis=ra, keepdims=keepdims)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if reduction_axes is not None:
         reduction_axes = np.array(reduction_axes).astype(np.int32)
       tf_ans = math_ops.reduce_any(x, reduction_axes, keepdims)
@@ -863,7 +863,7 @@ class AnyReductionTest(test.TestCase):
 
   def testAxesType(self):
     for dtype in [dtypes.int64, dtypes.int32]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         v = math_ops.reduce_any([True, True],
                                 constant_op.constant(0, dtype=dtype))
         tf_v = sess.run(v)
@@ -898,7 +898,7 @@ class CountNonzeroReductionTest(test.TestCase):
       reduction_axes = np.array(reduction_axes).astype(np.int32)
       for ra in reduction_axes.ravel()[::-1]:
         np_ans = np.sum(np_ans, axis=ra, keepdims=keepdims)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       tf_ans = math_ops.count_nonzero(x, reduction_axes, keepdims)
       out = sess.run(tf_ans, feed_dict)
     self.assertAllClose(np_ans, out)
@@ -951,7 +951,7 @@ class CountNonzeroReductionTest(test.TestCase):
 
   def testDegenerate(self):
     for use_gpu in False, True:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         for dtype in (dtypes.bool,):
           # A large number is needed to get Eigen to die
           x = array_ops.zeros((0, 9938), dtype=dtype)
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 672d6556f5..b0f2796ede 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -57,7 +57,7 @@ class ReluTest(test.TestCase):
 
   def _testRelu(self, np_features, use_gpu=False):
     np_relu = self._npRelu(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       relu = nn_ops.relu(np_features)
       tf_relu = relu.eval()
     self.assertAllClose(np_relu, tf_relu)
@@ -77,7 +77,7 @@ class ReluTest(test.TestCase):
     if not test.is_gpu_available(cuda_only=True):
       return
     np_relu = self._npRelu(np_inputs)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       relu = nn_ops.relu(constant_op.constant(np_inputs, dtypes.qint8))
       if np_inputs.size % 4 == 0:
         tf_relu = relu.eval()
@@ -124,7 +124,7 @@ class ReluTest(test.TestCase):
   # Instead of relying on compute_gradient_error, we compare the fp16 analytical
   # gradient against their fp32 counterpart.
   def testGradientFloat16(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       # Randomly construct a 1D shape from [1, 40)
       shape = random_ops.random_uniform(
           [1], minval=1, maxval=40, dtype=dtypes.int32)
@@ -230,7 +230,7 @@ class Relu6Test(test.TestCase):
 
   def _testRelu6(self, np_features, use_gpu=False):
     np_relu6 = self._npRelu6(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       relu6 = nn_ops.relu6(np_features)
       tf_relu6 = relu6.eval()
     self.assertAllClose(np_relu6, tf_relu6)
@@ -417,7 +417,7 @@ class EluTest(test.TestCase):
 
   def _testElu(self, np_features, use_gpu=False):
     np_elu = self._npElu(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       elu = nn_ops.elu(np_features)
       tf_elu = elu.eval()
     self.assertAllClose(np_elu, tf_elu)
@@ -519,7 +519,7 @@ class SeluTest(test.TestCase):
 
   def _testSelu(self, np_features, use_gpu=False):
     np_selu = self._npSelu(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       selu = nn_ops.selu(np_features)
       tf_selu = selu.eval()
     self.assertAllClose(np_selu, tf_selu)
@@ -605,7 +605,7 @@ class CreluTest(test.TestCase):
     np_crelu = np.concatenate((np_relu, np_neg_relu),
                               len(np_features.shape) - 1)
 
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       crelu = nn_ops.crelu(np_features)
       tf_relu = crelu.eval()
 
diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py
index ca3ff1d1df..14cdae1837 100644
--- a/tensorflow/python/kernel_tests/reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/reshape_op_test.py
@@ -30,7 +30,7 @@ from tensorflow.python.platform import test
 class ReshapeTest(test.TestCase):
 
   def _testReshape(self, x, y, use_gpu=False):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       np_ans = x.reshape(y)
       tf_ans = array_ops.reshape(x, y)
       out = tf_ans.eval()
diff --git a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
index 8fc71e0c57..56609bd0a5 100644
--- a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
+++ b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py
@@ -38,7 +38,7 @@ class ReverseSequenceTest(test.TestCase):
                            truth,
                            use_gpu=False,
                            expected_err_re=None):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       ans = array_ops.reverse_sequence(
           x, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths)
       if expected_err_re is None:
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 907e1277a9..993ea4b6b7 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -554,7 +554,7 @@ class RNNTest(test.TestCase):
     kernel, recurrent_kernel, bias = keras_weights
     tf_weights = [np.concatenate((kernel, recurrent_kernel)), bias]
 
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=(None, timestep, input_shape))
       cell = keras.layers.SimpleRNNCell(output_shape)
@@ -562,7 +562,7 @@ class RNNTest(test.TestCase):
           cell, inputs, dtype=dtypes.float32)
       cell.set_weights(keras_weights)
       [k_out, k_state] = sess.run([k_out, k_state], {inputs: x_train})
-    with self.test_session(graph=ops_lib.Graph()) as sess:
+    with self.session(graph=ops_lib.Graph()) as sess:
       inputs = array_ops.placeholder(
           dtypes.float32, shape=(None, timestep, input_shape))
       cell = rnn_cell_impl.BasicRNNCell(output_shape)
diff --git a/tensorflow/python/kernel_tests/scan_ops_test.py b/tensorflow/python/kernel_tests/scan_ops_test.py
index 08b4a2aaae..b369222565 100644
--- a/tensorflow/python/kernel_tests/scan_ops_test.py
+++ b/tensorflow/python/kernel_tests/scan_ops_test.py
@@ -78,7 +78,7 @@ class CumsumTest(test.TestCase):
 
   def _compare(self, x, axis, exclusive, reverse):
     np_out = handle_options(np.cumsum, x, axis, exclusive, reverse)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_out = math_ops.cumsum(x, axis, exclusive, reverse).eval()
 
     self.assertAllClose(np_out, tf_out)
@@ -98,7 +98,7 @@ class CumsumTest(test.TestCase):
     for dtype in self.valid_dtypes:
       x = np.arange(1, 6).reshape([5]).astype(dtype)
       for axis_dtype in [dtypes.int64, dtypes.int32]:
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           axis = constant_op.constant(0, axis_dtype)
           tf_out = math_ops.cumsum(x, axis).eval()
 
@@ -129,7 +129,7 @@ class CumsumTest(test.TestCase):
   def testInvalidAxis(self):
     x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
     input_tensor = ops.convert_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesWithPredicateMatch(
           errors_impl.InvalidArgumentError,
           lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
@@ -145,7 +145,7 @@ class CumsumTest(test.TestCase):
 
   def _compareGradient(self, shape, axis, exclusive, reverse):
     x = np.arange(0, 50).reshape(shape).astype(np.float64)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       t = ops.convert_to_tensor(x)
       result = math_ops.cumsum(t, axis, exclusive, reverse)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
@@ -184,7 +184,7 @@ class CumprodTest(test.TestCase):
 
   def _compare(self, x, axis, exclusive, reverse):
     np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_out = math_ops.cumprod(x, axis, exclusive, reverse).eval()
 
     self.assertAllClose(np_out, tf_out)
@@ -204,7 +204,7 @@ class CumprodTest(test.TestCase):
     for dtype in self.valid_dtypes:
       x = np.arange(1, 6).reshape([5]).astype(dtype)
       for axis_dtype in [dtypes.int64, dtypes.int32]:
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           axis = constant_op.constant(0, axis_dtype)
           tf_out = math_ops.cumprod(x, axis).eval()
 
@@ -235,7 +235,7 @@ class CumprodTest(test.TestCase):
   def testInvalidAxis(self):
     x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
     input_tensor = ops.convert_to_tensor(x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesWithPredicateMatch(
           errors_impl.InvalidArgumentError,
           lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
@@ -251,7 +251,7 @@ class CumprodTest(test.TestCase):
 
   def _compareGradient(self, shape, axis, exclusive, reverse):
     x = np.arange(1, 9).reshape(shape).astype(np.float64)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       t = ops.convert_to_tensor(x)
       result = math_ops.cumprod(t, axis, exclusive, reverse)
       jacob_t, jacob_n = gradient_checker.compute_gradient(
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 9843bf4be0..54d542fb5f 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -102,7 +102,7 @@ class StatefulScatterNdTest(test.TestCase):
     np.random.seed(8)
     ref_shapes = [(3, 6), (3, 6), (3, 6, 9), (3, 6, 9), (3, 6, 9), (3, 6, 9)]
     indices_shapes = [(2,), (2, 2), (2,), (2, 2), (2, 3), (2, 3, 3)]
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       for ref_shape, indices_shape in zip(ref_shapes, indices_shapes):
         num_updates = indices_shape[0]
         ixdim = indices_shape[-1]
@@ -158,7 +158,7 @@ class StatefulScatterNdTest(test.TestCase):
     scatter = state_ops.scatter_nd_update(ref, indices, updates)
     init = variables.global_variables_initializer()
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(init)
       result = sess.run(scatter)
       self.assertAllClose(result, expected)
@@ -172,7 +172,7 @@ class StatefulScatterNdTest(test.TestCase):
     scatter = state_ops.scatter_nd_update(ref, indices, updates)
     init = variables.global_variables_initializer()
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(init)
       sess.run(scatter)
       self.assertAllClose(ref.eval(), expected)
@@ -186,7 +186,7 @@ class StatefulScatterNdTest(test.TestCase):
     scatter = state_ops.scatter_nd_update(ref, indices, updates)
     init = variables.global_variables_initializer()
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(init)
       result = sess.run(scatter)
       self.assertAllClose(result, expected)
@@ -200,7 +200,7 @@ class StatefulScatterNdTest(test.TestCase):
     scatter = state_ops.scatter_nd_update(ref, indices, updates)
     init = variables.global_variables_initializer()
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       sess.run(init)
       result = sess.run(scatter)
       self.assertAllClose(result, expected)
@@ -239,7 +239,7 @@ class StatefulScatterNdTest(test.TestCase):
   # TODO(simister): Re-enable once binary size increase due to
   # extra templating is back under control and this op is re-enabled
   # def testBooleanScatterUpdate(self):
-  #   with self.test_session(use_gpu=False) as session:
+  #   with self.session(use_gpu=False) as session:
   #     var = tf.Variable([True, False])
   #     update0 = tf.scatter_nd_update(var, [[1]], [True])
   #     update1 = tf.scatter_nd_update(
@@ -257,7 +257,7 @@ class StatefulScatterNdTest(test.TestCase):
                state_ops.scatter_nd_update):
       params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32)
       updates = np.array([-3, -4, -5]).astype(np.float32)
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         ref = variables.VariableV1(params)
         ref.initializer.run()
 
@@ -356,7 +356,7 @@ class StatefulScatterNdTest(test.TestCase):
       updates = np.array([-3, -4, -5]).astype(np.float32)
       # With GPU, the code ignores indices that are out of range.
       # We don't test the implementation; just test there's no failures.
-      with self.test_session(force_gpu=True):
+      with self.cached_session(force_gpu=True):
         ref = variables.Variable(params)
         ref.initializer.run()
 
diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py
index 527b7daf10..87c345245c 100644
--- a/tensorflow/python/kernel_tests/scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_ops_test.py
@@ -133,7 +133,7 @@ class ScatterTest(test.TestCase):
                         repeat_indices=False,
                         updates_are_scalar=False):
     np.random.seed(8)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       for indices_shape in (), (2,), (3, 7), (3, 4, 7):
         for extra_shape in (), (5,), (5, 9):
           # Generate random indices with no duplicates for easy numpy comparison
@@ -276,7 +276,7 @@ class ScatterTest(test.TestCase):
 
   def testBooleanScatterUpdate(self):
     if not test.is_gpu_available():
-      with self.test_session(use_gpu=False) as session:
+      with self.session(use_gpu=False) as session:
         var = variables.Variable([True, False])
         update0 = state_ops.scatter_update(var, 1, True)
         update1 = state_ops.scatter_update(
@@ -293,7 +293,7 @@ class ScatterTest(test.TestCase):
       params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32)
       updates = np.array([-3, -4, -5]).astype(np.float32)
       if not test.is_gpu_available():
-        with self.test_session(use_gpu=False):
+        with self.session(use_gpu=False):
           ref = variables.VariableV1(params)
           ref.initializer.run()
 
@@ -320,7 +320,7 @@ class ScatterTest(test.TestCase):
       updates = np.array([-3, -4, -5]).astype(np.float32)
       # With GPU, the code ignores indices that are out of range.
       # We don't test the implementation; just test there's no failures.
-      with self.test_session(force_gpu=True):
+      with self.cached_session(force_gpu=True):
         ref = variables.Variable(params)
         ref.initializer.run()
 
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 2931877c11..3f7e43b533 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -113,7 +113,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
       else:
         curr_ops_list = ops_list
       for use_gpu in [True, False]:
-        with self.test_session(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
           tf_x, np_x = self._input(shape, dtype=dtype)
           for np_op1, np_op2, tf_op in curr_ops_list:
             np_ans = self._segmentReduce(indices, np_x, np_op1, np_op2)
@@ -136,7 +136,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
   def testSegmentIdsSize(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, _ = self._input(shape)
         indices = [0, 1]
         s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -147,7 +147,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
     # This is a baseline for the following SegmentIdsInvalid* tests.
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
         indices = [0, 0, 0, 1]
         result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval()
@@ -156,7 +156,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
   def testSegmentIdsGreaterThanZero(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32)
         indices = [1, 1, 2, 2]
         np_ans = self._segmentReduce(indices, np_x, np.add)
@@ -167,7 +167,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
   def testSegmentIdsHole(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32)
         indices = [0, 0, 3, 3]
         np_ans = self._segmentReduce(indices, np_x, np.add)
@@ -209,7 +209,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
   def testSegmentIdsInvalid4(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
         indices = [0, 0, 0, -1]
         s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -219,7 +219,7 @@ class SegmentReductionOpTest(SegmentReductionHelper):
   def testSegmentIdsInvalid5(self):
     shape = [4, 4]
     for use_gpu in [True, False]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_x, _ = self._input(shape, dtype=dtypes_lib.float32)
         indices = [0, 0, 0, -2]
         s = math_ops.segment_sum(data=tf_x, segment_ids=indices)
@@ -284,7 +284,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
         ops_list = self.complex_ops_list if dtype.is_complex else self.ops_list
         tf_x, np_x = self._input(shape, dtype=dtype)
         for use_gpu in [True, False]:
-          with self.test_session(use_gpu=True):
+          with self.cached_session(use_gpu=True):
             for np_op1, np_op2, tf_op, init_op in ops_list:
               # sqrt_n doesn't support integers
               if (np_op2 == self._sqrt_n_reduce_op and dtype.is_integer):
@@ -310,7 +310,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
     for indices in indices_flat, indices_flat.reshape(5, 2):
       shape = indices.shape + (2,)
       for dtype in dtypes:
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           tf_x, np_x = self._input(shape)
           num_segments_constant = constant_op.constant(
               num_segments, dtype=dtype)
@@ -334,7 +334,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
         shape = indices.shape + (num_cols,)
         # test CPU and GPU as tf.gather behaves differently on each device
         for use_gpu in [False, True]:
-          with self.test_session(use_gpu=use_gpu):
+          with self.cached_session(use_gpu=use_gpu):
             for _, _, tf_op, _ in ops_list:
               tf_x, np_x = self._input(shape, dtype=dtype)
               s = tf_op(tf_x, indices, num_segments)
@@ -360,7 +360,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
     gradients_indices[range(9), indices] = [0, 0, 0, 4, 0, 0, 9, 9, 9]
     gradients_indices_neg[range(9), indices_neg] = [0, 1, 0, 0, 2, 2, 0, 3, 3]
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         for ind, grad_gt in [(indices, gradients_indices),
                              (indices_neg, gradients_indices_neg)]:
           s = math_ops.unsorted_segment_prod(values_tf,
@@ -382,7 +382,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
     shape = [n, num_cols]
     num_segments = max(indices) + 1
     for dtype in self.differentiable_dtypes:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         tf_x, np_x = self._input(shape, dtype=dtype)
         # Results from UnsortedSegmentSum
         unsorted_s = math_ops.unsorted_segment_sum(
@@ -407,7 +407,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
     # Note: GPU kernel does not return the out-of-range error needed for this
     # test, so this test is marked as cpu-only.
     # Note: With PR #13055 a negative index will be ignored silently.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for bad in [[2]], [[7]]:
         unsorted = math_ops.unsorted_segment_sum([[17]], bad, num_segments=2)
         with self.assertRaisesOpError(
@@ -417,7 +417,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
   def testEmptySecondDimension(self):
     dtypes = [np.float16, np.float32, np.float64, np.int64, np.int32,
               np.complex64, np.complex128]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in dtypes:
         for itype in (np.int32, np.int64):
           data = np.zeros((2, 0), dtype=dtype)
@@ -433,7 +433,7 @@ class UnsortedSegmentTest(SegmentReductionHelper):
     for indices in indices_flat, indices_flat.reshape(5, 2):
       shape = indices.shape + (2,)
       for dtype in self.all_dtypes:
-        with self.test_session(use_gpu=True):
+        with self.session(use_gpu=True):
           tf_x, np_x = self._input(shape, dtype=dtype)
           np_ans = self._segmentReduce(
               indices, np_x, np.add, op2=None, num_segments=num_segments)
@@ -490,7 +490,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         segment_indices.append(i)
     num_indices = len(segment_indices)
     for dtype in dtypes:
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         tf_indices, np_indices, tf_x, np_x = self._sparse_input(
             shape, num_indices, dtype=dtype)
         for np_op1, np_op2, tf_op in ops_list:
@@ -513,7 +513,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         self._mean_cum_op, self._mean_reduce_op, math_ops.sparse_segment_mean)]
     segment_indices = [0, 2, 2, 2]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for np_op1, np_op2, tf_op in ops_list:
         np_ans = self._sparseSegmentReduce(np_x, tf_indices, segment_indices,
                                            np_op1, np_op2)
@@ -529,7 +529,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     segment_indices = [0, 2, 2, 2]
     tf_indices = [8, 3, 0, 9]
     num_segments = 5
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for np_op1, np_op2, tf_op in ops_list:
         np_ans = self._sparseSegmentReduce(
             np_x,
@@ -555,7 +555,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     segment_indices = []
     tf_indices = []
     num_segments = 5
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(
             data=tf_x,
@@ -571,7 +571,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
         self._mean_cum_op, self._mean_reduce_op, math_ops.sparse_segment_mean)]
     segment_indices = [1, 2, 2, 2]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for np_op1, np_op2, tf_op in ops_list:
         np_ans = self._sparseSegmentReduce(np_x, tf_indices, segment_indices,
                                            np_op1, np_op2)
@@ -585,7 +585,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         s.eval()
@@ -595,7 +595,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, -1, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError(
@@ -607,7 +607,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, 3, 0, 10]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError(
@@ -619,7 +619,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 1, 0, 1]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError("segment ids are not increasing"):
@@ -630,7 +630,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 1, 2, 0]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError(
@@ -643,7 +643,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [-1, 0, 1, 1]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError(
@@ -656,7 +656,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 0, 0, -1]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError("segment ids must be >= 0"):
@@ -667,7 +667,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ops_list = [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]
     segment_indices = [0, 0, 0, -2]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices)
         with self.assertRaisesOpError("segment ids must be >= 0"):
@@ -683,7 +683,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     num_segments = 5
     segment_indices = [0, 1, 3, 3]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(
             data=tf_x,
@@ -701,7 +701,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     num_segments = 5
     segment_indices = [0, 1, 3, 5]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(
             data=tf_x,
@@ -720,7 +720,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     num_segments = -2
     segment_indices = [0, 1, 3, 3]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         with self.assertRaisesRegexp(
             ValueError, "Cannot specify a negative value for num_segments"):
@@ -782,7 +782,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         s.eval()
@@ -794,7 +794,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, 3, 0, 10]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError(r"Index 10 out of range \[0, 10\)"):
@@ -807,7 +807,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 2, 2]
     tf_indices = [8, 3, -1, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError(r"Index -1 out of range \[0, 10\)"):
@@ -821,7 +821,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 1, 4]  # 5 segments
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError("Invalid number of segments"):
@@ -834,7 +834,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 2, 0]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError(r"Segment id 1 out of range \[0, 1\)"):
@@ -847,7 +847,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [-1, 0, 1, 1]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError(r"Segment id -1 out of range \[0, 2\)"):
@@ -860,7 +860,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
     ]
     segment_indices = [0, 1, 2, -1]
     tf_indices = [8, 3, 0, 9]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for tf_op in ops_list:
         s = tf_op(tf_x, tf_indices, segment_indices, 10)
         with self.assertRaisesOpError(r"Segment id 0 out of range \[0, 0\)"):
diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index d2647088c5..1b4aff8c9c 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -51,7 +51,7 @@ class SelfAdjointEigTest(test.TestCase):
 
   def testConcurrentExecutesWithoutError(self):
     all_ops = []
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       for compute_v_ in True, False:
         matrix1 = random_ops.random_normal([5, 5], seed=42)
         matrix2 = random_ops.random_normal([5, 5], seed=42)
@@ -80,7 +80,7 @@ class SelfAdjointEigTest(test.TestCase):
             "self_adjoint_eig_fail_if_denorms_flushed.txt")).astype(np.float32)
     self.assertEqual(matrix.shape, (32, 32))
     matrix_tensor = constant_op.constant(matrix)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       (e, v) = sess.run(linalg_ops.self_adjoint_eig(matrix_tensor))
       self.assertEqual(e.size, 32)
       self.assertAllClose(
@@ -152,7 +152,7 @@ def _GetSelfAdjointEigTest(dtype_, shape_, compute_v_):
     else:
       atol = 1e-12
     np_e, np_v = np.linalg.eigh(a)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       if compute_v_:
         tf_e, tf_v = linalg_ops.self_adjoint_eig(constant_op.constant(a))
 
@@ -201,7 +201,7 @@ def _GetSelfAdjointEigGradTest(dtype_, shape_, compute_v_):
       tol = 1e-2
     else:
       tol = 1e-7
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_a = constant_op.constant(a)
       if compute_v_:
         tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 0304dc3875..ee813e5ffd 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -50,7 +50,7 @@ class ShapeOpsTest(test.TestCase):
 
   def _compareShape(self, x, use_gpu=False):
     np_ans = np.array(np.shape(x))
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.shape(x)
       tf_ans_64 = array_ops.shape(x, out_type=dtypes.int64)
       result = tf_ans.eval()
@@ -62,7 +62,7 @@ class ShapeOpsTest(test.TestCase):
   def _compareShapeSparse(self, x_np, use_gpu=False):
     np_ans = np.array(np.shape(x_np))
     x_tf, unused_nnz = _sparsify(x_np)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.shape(x_tf)
       result = tf_ans.eval()
     self.assertAllEqual(np_ans, result)
@@ -70,7 +70,7 @@ class ShapeOpsTest(test.TestCase):
 
   def _compareShapeN(self, x, use_gpu=False):
     np_ans = np.array(np.shape(x))
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       tf_ans = array_ops.shape_n([x, x, x])
       tf_ans_64 = array_ops.shape_n([x, x, x], out_type=dtypes.int64)
       result = sess.run(tf_ans)
@@ -82,7 +82,7 @@ class ShapeOpsTest(test.TestCase):
 
   def _compareRank(self, x, use_gpu=False):
     np_ans = np.asarray(np.ndim(x))
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.rank(x)
       result = tf_ans.eval()
     self.assertAllEqual(np_ans, result)
@@ -91,7 +91,7 @@ class ShapeOpsTest(test.TestCase):
   def _compareRankSparse(self, x_np, use_gpu=False):
     np_ans = np.asarray(np.ndim(x_np))
     x_tf, unused_nnz = _sparsify(x_np)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.rank(x_tf)
       result = tf_ans.eval()
     self.assertAllEqual(np_ans, result)
@@ -99,7 +99,7 @@ class ShapeOpsTest(test.TestCase):
 
   def _compareSize(self, x, use_gpu=False):
     np_ans = np.asarray(np.size(x))
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.size(x)
       result = tf_ans.eval()
       tf_ans_64 = array_ops.size(x, out_type=dtypes.int64)
@@ -111,7 +111,7 @@ class ShapeOpsTest(test.TestCase):
   def _compareSizeSparse(self, x_np, use_gpu=False):
     np_ans = np.asarray(np.size(x_np))
     x_tf, unused_nnz = _sparsify(x_np)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_ans = array_ops.size(x_tf)
       result = tf_ans.eval()
     self.assertAllEqual(np_ans, result)
@@ -174,7 +174,7 @@ class ShapeOpsTest(test.TestCase):
 
   def _compareExpandDims(self, x, dim, use_gpu):
     np_ans = np.expand_dims(x, axis=dim)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tensor = array_ops.expand_dims(x, dim)
       tf_ans = tensor.eval()
     self.assertShapeEqual(np_ans, tensor)
@@ -262,14 +262,14 @@ class ShapeOpsTest(test.TestCase):
     for dtype in [dtypes.int32, dtypes.int64]:
       x = np.zeros([2])
       np_ans = np.expand_dims(x, axis=0)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         tensor = array_ops.expand_dims(x, constant_op.constant(0, dtype))
         tf_ans = tensor.eval()
       self.assertShapeEqual(np_ans, tensor)
       self.assertAllEqual(np_ans, tf_ans)
 
   def _compareSqueeze(self, x, squeeze_dims, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if squeeze_dims:
         np_ans = np.squeeze(x, axis=tuple(squeeze_dims))
         tensor = array_ops.squeeze(x, squeeze_dims)
@@ -337,7 +337,7 @@ class ShapeOpsTest(test.TestCase):
     # Numpy squeezes a 1 element tensor into a zero dimensional tensor.
     # Verify that we do the same.
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tensor = array_ops.squeeze(np.zeros([1, 1, 1]), [])
         self.assertEqual(np.shape(1), tensor.get_shape())
         tf_ans = tensor.eval()
@@ -347,7 +347,7 @@ class ShapeOpsTest(test.TestCase):
     # Numpy squeezes a 1 element tensor into a zero dimensional tensor.
     # Verify that we do the same.
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tensor = array_ops.squeeze([[[False]]], [])
         self.assertEqual(np.shape(1), tensor.get_shape())
         tf_ans = tensor.eval()
@@ -355,7 +355,7 @@ class ShapeOpsTest(test.TestCase):
 
   def testSqueezeOnlyOnes(self):
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         input_1x1x3 = np.zeros([1, 1, 3])
         self._compareSqueezeAll(input_1x1x3)
         self._compareSqueezeAll(input_1x1x3, [0])
@@ -364,7 +364,7 @@ class ShapeOpsTest(test.TestCase):
 
   def testSqueezeErrors(self):
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         self.assertRaises(ValueError, array_ops.squeeze,
                           np.zeros([1, 2, 1]), [-4])
         self.assertRaises(ValueError, array_ops.squeeze,
@@ -412,7 +412,7 @@ class TileTest(test.TestCase):
 
   def testScalar(self):
     for use_gpu in False, True:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         a = constant_op.constant(7, shape=[], dtype=dtypes.float32)
         tiled = array_ops.tile(a, [])
         result = tiled.eval()
@@ -423,7 +423,7 @@ class TileTest(test.TestCase):
   def testSimple(self):
     # multiples could be int32 or int64
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = np.random.rand(4, 1).astype(np.float32)
         a = constant_op.constant(inp)
         tiled = array_ops.tile(a, constant_op.constant([1, 4], dtype=dtype))
@@ -490,7 +490,7 @@ class TileTest(test.TestCase):
         bytes: (dtypes.string, bytes)
     }
     for dtype_np, (dtype_tf, cast) in types_to_test.items():
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = np.random.rand(4, 1).astype(dtype_np)
         a = constant_op.constant(
             [cast(x) for x in inp.ravel(order="C")],
@@ -517,7 +517,7 @@ class TileTest(test.TestCase):
         array_ops.tile(a, [[2, 3], [3, 4]]).eval()
 
   def _RunAndVerifyResult(self, rank, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       # Random dims of given rank
       input_shape = np.random.randint(1, 4, size=rank)
       inp = np.random.rand(*input_shape).astype("f")
@@ -580,7 +580,7 @@ class TileTest(test.TestCase):
     self.assertTrue((np.abs(expected - result) < 1e-3).all())
 
   def testGradientSimpleReductionOnGPU(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       inp = np.random.rand(4, 1).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.flatten()], shape=[4, 1], dtype=dtypes.float32)
@@ -594,7 +594,7 @@ class TileTest(test.TestCase):
     self.assertAllClose(np.sum(grad_inp, axis=1).reshape(4, 1), result, 1e-3)
 
   def testGradientStridedReductionOnGPU(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       inp = np.random.rand(4, 2).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32)
@@ -613,7 +613,7 @@ class TileTest(test.TestCase):
 
   def _RunAndVerifyGradientResult(self, input_shape, multiples):
     for use_gpu in False, True:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         # Random values
         inp = np.asarray(np.random.rand(*input_shape))
         a = constant_op.constant(inp, dtype=dtypes.float64)
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index c08d3222b3..0e8c276ba9 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -35,7 +35,7 @@ class SliceTest(test.TestCase):
   def testEmpty(self):
     inp = np.random.rand(4, 4).astype("f")
     for k in xrange(4):
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         a = constant_op.constant(inp, shape=[4, 4], dtype=dtypes.float32)
         slice_t = a[2, k:k]
         slice_val = slice_t.eval()
@@ -44,14 +44,14 @@ class SliceTest(test.TestCase):
   def testInt32(self):
     inp = np.random.rand(4, 4).astype("i")
     for k in xrange(4):
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         a = constant_op.constant(inp, shape=[4, 4], dtype=dtypes.int32)
         slice_t = a[2, k:k]
         slice_val = slice_t.eval()
       self.assertAllEqual(slice_val, inp[2, k:k])
 
   def testInt64Slicing(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       a = constant_op.constant([0, 1, 2], dtype=dtypes.int64)
 
       # Slice using int64 Tensor.
@@ -74,7 +74,7 @@ class SliceTest(test.TestCase):
 
   def testSelectAll(self):
     for _ in range(10):
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = np.random.rand(4, 4, 4, 4).astype("f")
         a = constant_op.constant(inp, shape=[4, 4, 4, 4], dtype=dtypes.float32)
 
@@ -88,7 +88,7 @@ class SliceTest(test.TestCase):
 
   def testSingleDimension(self):
     for _ in range(10):
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = np.random.rand(10).astype("f")
         a = constant_op.constant(inp, shape=[10], dtype=dtypes.float32)
 
@@ -134,7 +134,7 @@ class SliceTest(test.TestCase):
         sess.run([slice_t], feed_dict={input_t: input_val})
 
   def _testSliceMatrixDim0(self, x, begin, size):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_ans = array_ops.slice(x, [begin, 0], [size, x.shape[1]]).eval()
     np_ans = x[begin:begin + size, :]
     self.assertAllEqual(tf_ans, np_ans)
@@ -149,7 +149,7 @@ class SliceTest(test.TestCase):
 
   def testSingleElementAll(self):
     for _ in range(10):
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inp = np.random.rand(4, 4).astype("f")
         a = constant_op.constant(inp, shape=[4, 4], dtype=dtypes.float32)
 
@@ -159,7 +159,7 @@ class SliceTest(test.TestCase):
       self.assertAllEqual(slice_val, inp[x, 0:y])
 
   def testSimple(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inp = np.random.rand(4, 4).astype("f")
       a = constant_op.constant(
           [float(x) for x in inp.ravel(order="C")],
@@ -174,7 +174,7 @@ class SliceTest(test.TestCase):
     self.assertEqual(slice2_val.shape, slice2_t.get_shape())
 
   def testComplex(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       inp = np.random.rand(4, 10, 10, 4).astype("f")
       a = constant_op.constant(inp, dtype=dtypes.float32)
 
@@ -191,7 +191,7 @@ class SliceTest(test.TestCase):
     # Random dims of rank 6
     input_shape = np.random.randint(0, 20, size=6)
     inp = np.random.rand(*input_shape).astype("f")
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       a = constant_op.constant(
           [float(x) for x in inp.ravel(order="C")],
           shape=input_shape,
@@ -230,7 +230,7 @@ class SliceTest(test.TestCase):
 
 
   def _testGradientSlice(self, input_shape, slice_begin, slice_size):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       num_inputs = np.prod(input_shape)
       num_grads = np.prod(slice_size)
       inp = np.random.rand(num_inputs).astype("f").reshape(input_shape)
@@ -255,7 +255,7 @@ class SliceTest(test.TestCase):
     self.assertAllClose(np_ans, result)
 
   def _testGradientVariableSize(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inp = constant_op.constant([1.0, 2.0, 3.0], name="in")
       out = array_ops.slice(inp, [1], [-1])
       grad_actual = gradients_impl.gradients(out, inp)[0].eval()
@@ -265,7 +265,7 @@ class SliceTest(test.TestCase):
     # Regression test for bug in slice. A low-level bug in Eigen was causing
     # incorrect results for negative indices in multi-dimensional tensors.
     # See b/114318298.
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       x = constant_op.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 7]])
       loss1 = math_ops.reduce_sum(x[:-1, :-1] * 1.0)
       loss2 = math_ops.reduce_sum(x[:-1][:, :-1])
@@ -322,7 +322,7 @@ class SliceTest(test.TestCase):
     self.assertEqual([None, 2], c.get_shape().as_list())
 
   def testSliceOfSlice(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       a = constant_op.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
       b = a[1:, :]
       c = b[:-1, :]
diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index 89f4697e5c..3218d00c66 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -59,7 +59,7 @@ class SoftmaxTest(test.TestCase):
     # this bug in future.
     name = "arbitrary"
     np_softmax = self._npSoftmax(np_features, dim=dim, log=log)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       if log:
         tf_softmax = nn_ops.log_softmax(np_features, axis=dim, name=name)
       else:
@@ -111,7 +111,7 @@ class SoftmaxTest(test.TestCase):
       type = np.float64  # pylint: disable=redefined-builtin
     max = np.finfo(type).max  # pylint: disable=redefined-builtin
     features = np.array([[1., 1., 1., 1.], [max, 1., 2., 3.]]).astype(type)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       tf_log_softmax = nn_ops.log_softmax(features)
       out = tf_log_softmax.eval()
     self.assertAllClose(
@@ -230,7 +230,7 @@ class SoftmaxTest(test.TestCase):
       np_softmax = self._npSoftmax(ones)
 
       for use_gpu in [True, False]:
-        with self.test_session(use_gpu=use_gpu) as sess:
+        with self.cached_session(use_gpu=use_gpu) as sess:
           x = array_ops.placeholder(dtypes.float32)
           y = nn_ops.softmax(x)
           tf_softmax = sess.run(y, feed_dict={x: ones})
diff --git a/tensorflow/python/kernel_tests/softplus_op_test.py b/tensorflow/python/kernel_tests/softplus_op_test.py
index 636ed4747e..50a8291ea8 100644
--- a/tensorflow/python/kernel_tests/softplus_op_test.py
+++ b/tensorflow/python/kernel_tests/softplus_op_test.py
@@ -37,7 +37,7 @@ class SoftplusTest(test.TestCase):
 
   def _testSoftplus(self, np_features, use_gpu=False):
     np_softplus = self._npSoftplus(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       softplus = nn_ops.softplus(np_features)
       tf_softplus = softplus.eval()
     self.assertAllCloseAccordingToType(np_softplus, tf_softplus)
diff --git a/tensorflow/python/kernel_tests/softsign_op_test.py b/tensorflow/python/kernel_tests/softsign_op_test.py
index 1b4db9fa46..ee2e2e0303 100644
--- a/tensorflow/python/kernel_tests/softsign_op_test.py
+++ b/tensorflow/python/kernel_tests/softsign_op_test.py
@@ -34,7 +34,7 @@ class SoftsignTest(test.TestCase):
 
   def _testSoftsign(self, np_features, use_gpu=False):
     np_softsign = self._npSoftsign(np_features)
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       softsign = nn_ops.softsign(np_features)
       tf_softsign = softsign.eval()
     self.assertAllClose(np_softsign, tf_softsign)
diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
index e267c05915..21134adf2c 100644
--- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py
@@ -100,7 +100,7 @@ class SpaceToBatchTest(test.TestCase, PythonOpImpl):
   """
 
   def _testPad(self, inputs, paddings, block_size, outputs):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       # outputs = space_to_batch(inputs)
       x_tf = self.space_to_batch(
           math_ops.to_float(inputs), paddings, block_size=block_size)
@@ -190,7 +190,7 @@ class SpaceToBatchNDTest(test.TestCase):
     block_shape = np.array(block_shape)
     paddings = np.array(paddings).reshape((len(block_shape), 2))
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         # outputs = space_to_batch(inputs)
         x_tf = array_ops.space_to_batch_nd(
             math_ops.to_float(inputs), block_shape, paddings)
@@ -309,7 +309,7 @@ class SpaceToBatchSpaceToDepth(test.TestCase, PythonOpImpl):
         array_ops.space_to_depth(
             array_ops.transpose(x, [3, 1, 2, 0]), block_size=block_size),
         [3, 1, 2, 0])
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(y1.eval(), y2.eval())
 
 
@@ -494,7 +494,7 @@ class SpaceToBatchGradientTest(test.TestCase, PythonOpImpl):
   # Check the gradients.
   def _checkGrad(self, x, paddings, block_size):
     assert 4 == x.ndim
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_x = ops.convert_to_tensor(x)
       tf_y = self.space_to_batch(tf_x, paddings, block_size)
       epsilon = 1e-5
diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py
index cd90d16aac..9bea1b952a 100644
--- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py
@@ -36,12 +36,12 @@ class SpaceToDepthTest(test.TestCase):
 
   def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32):
     input_nhwc = math_ops.cast(inputs, dtype)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       # test NHWC (default) on CPU
       x_tf = array_ops.space_to_depth(input_nhwc, block_size)
       self.assertAllEqual(x_tf.eval(), outputs)
     if test.is_gpu_available():
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # test NHWC (default) on GPU
         x_tf = array_ops.space_to_depth(input_nhwc, block_size)
         self.assertAllEqual(x_tf.eval(), outputs)
@@ -138,13 +138,13 @@ class SpaceToDepthTest(test.TestCase):
     input_nhwc = array_ops.ones([batch_size, 4, 6, 3])
     x_out = array_ops.ones([batch_size, 2, 3, 12])
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       # test NHWC (default) on CPU
       x_tf = array_ops.space_to_depth(input_nhwc, block_size)
       self.assertAllEqual(x_tf.shape, x_out.shape)
       x_tf.eval()
     if test.is_gpu_available():
-      with self.test_session(use_gpu=True):
+      with self.session(use_gpu=True):
         # test NHWC (default) on GPU
         x_tf = array_ops.space_to_depth(input_nhwc, block_size)
         self.assertAllEqual(x_tf.shape, x_out.shape)
@@ -274,7 +274,7 @@ class SpaceToDepthTest(test.TestCase):
       expected = self.spaceToDepthUsingTranspose(t, block_size, data_format)
       actual = array_ops.space_to_depth(t, block_size, data_format=data_format)
 
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       actual_vals, expected_vals = sess.run([actual, expected])
       self.assertTrue(np.array_equal(actual_vals, expected_vals))
 
@@ -307,7 +307,7 @@ class SpaceToDepthGradientTest(test.TestCase):
       return
 
     assert 4 == x.ndim
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_x = ops.convert_to_tensor(x)
       tf_y = array_ops.space_to_depth(tf_x, block_size, data_format=data_format)
       epsilon = 1e-2
diff --git a/tensorflow/python/kernel_tests/sparse_add_op_test.py b/tensorflow/python/kernel_tests/sparse_add_op_test.py
index 7371ebe389..a746830afb 100644
--- a/tensorflow/python/kernel_tests/sparse_add_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_add_op_test.py
@@ -85,7 +85,7 @@ class SparseAddTest(test.TestCase):
         constant_op.constant(shape, dtypes.int64))
 
   def testAddSelf(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
         for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
           sp_sum = sparse_ops.sparse_add(sp_a, sp_b)
@@ -99,7 +99,7 @@ class SparseAddTest(test.TestCase):
           self.assertAllEqual(sum_out.dense_shape, [3, 3])
 
   def testAddSelfAndNegation(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x3(negate=True)
 
@@ -112,7 +112,7 @@ class SparseAddTest(test.TestCase):
       self.assertAllEqual(sum_out.dense_shape, [3, 3])
 
   def testSmallValuesShouldVanish(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x3_v2()
 
@@ -141,7 +141,7 @@ class SparseAddTest(test.TestCase):
 
   def testGradients(self):
     np.random.seed(1618)  # Make it reproducible.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for n in [10, 31]:
         for m in [4, 17]:
           sp_a, nnz_a = self._randomTensor([n, m], np.float32)
@@ -162,7 +162,7 @@ class SparseAddTest(test.TestCase):
         rand_vals_np = np.random.randn(n, m).astype(dtype)
         dense_np = np.random.randn(n, m).astype(dtype)
 
-        with self.test_session(use_gpu=False):
+        with self.cached_session(use_gpu=False):
           sparse, unused_nnz = _sparsify(rand_vals_np, index_dtype=index_dtype)
           s = sparse_ops.sparse_add(sparse,
                                     constant_op.constant(dense_np)).eval()
@@ -181,7 +181,7 @@ class SparseAddTest(test.TestCase):
     rand_vals_np = np.random.randn(n, m).astype(np.float32)
     dense_np = np.random.randn(n, m).astype(np.float32)
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sparse, nnz = _sparsify(rand_vals_np)
       dense = constant_op.constant(dense_np, dtype=dtypes.float32)
       s = sparse_ops.sparse_add(sparse, dense)
@@ -191,7 +191,7 @@ class SparseAddTest(test.TestCase):
       self.assertLess(err, 1e-3)
 
   def testInvalidSparseTensor(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       shape = [2, 2]
       val = [0]
       dense = constant_op.constant(np.zeros(shape, dtype=np.int32))
diff --git a/tensorflow/python/kernel_tests/sparse_concat_op_test.py b/tensorflow/python/kernel_tests/sparse_concat_op_test.py
index d3c7983128..402c5eb4ea 100644
--- a/tensorflow/python/kernel_tests/sparse_concat_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_concat_op_test.py
@@ -132,7 +132,7 @@ class SparseConcatTest(test.TestCase):
         constant_op.constant(shape, dtypes.int64))
 
   def testConcat1(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # concat(A):
       # [    1]
       # [2    ]
@@ -155,7 +155,7 @@ class SparseConcatTest(test.TestCase):
           self.assertAllEqual(concat_out.dense_shape, [3, 3])
 
   def testConcat2(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # concat(A, B):
       # [    1          ]
       # [2       1      ]
@@ -178,7 +178,7 @@ class SparseConcatTest(test.TestCase):
             self.assertAllEqual(concat_out.dense_shape, [3, 8])
 
   def testConcatDim0(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # concat(A, D):
       # [    1]
       # [2    ]
@@ -204,7 +204,7 @@ class SparseConcatTest(test.TestCase):
         self.assertAllEqual(concat_out.dense_shape, np.array([5, 3]))
 
   def testConcat3(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # concat(A, B, C):
       # [    1              ]
       # [2       1       1  ]
@@ -229,7 +229,7 @@ class SparseConcatTest(test.TestCase):
         self.assertAllEqual(concat_out.dense_shape, [3, 10])
 
   def testConcatNonNumeric(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # concat(A, B):
       # [    a          ]
       # [b       e      ]
@@ -254,7 +254,7 @@ class SparseConcatTest(test.TestCase):
         self.assertAllEqual(concat_out.dense_shape, [3, 8])
 
   def testMismatchedRank(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_a = self._SparseTensor_3x3()
       sp_e = self._SparseTensor_2x3x4()
 
@@ -264,7 +264,7 @@ class SparseConcatTest(test.TestCase):
           sparse_ops.sparse_concat(concat_dim, [sp_a, sp_e])
 
   def testMismatchedRankExpandNonconcatDim(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_a = self._SparseTensor_3x3()
       sp_e = self._SparseTensor_2x3x4()
 
@@ -276,7 +276,7 @@ class SparseConcatTest(test.TestCase):
               concat_dim, [sp_a, sp_e], expand_nonconcat_dim=True)
 
   def testMismatchedShapes(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x5()
       sp_c = self._SparseTensor_3x2()
@@ -290,7 +290,7 @@ class SparseConcatTest(test.TestCase):
           sess.run(sp_concat)
 
   def testMismatchedShapesExpandNonconcatDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x5()
       sp_c = self._SparseTensor_3x2()
@@ -322,7 +322,7 @@ class SparseConcatTest(test.TestCase):
           self.assertAllEqual(sp_concat_dim1_out.dense_shape, [3, 13])
 
   def testShapeInferenceUnknownShapes(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_inputs = [
           self._SparseTensor_UnknownShape(),
           self._SparseTensor_UnknownShape(val_shape=[3]),
diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
index 90009fc33e..541463e76b 100644
--- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py
@@ -48,7 +48,7 @@ class SparseMatMulTest(test.TestCase):
                      sp_b=False,
                      x_dtype=dtypes.float32,
                      y_dtype=dtypes.float32):
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       tf_x = math_ops.cast(x, x_dtype)
       tf_y = math_ops.cast(y, y_dtype)
       tf_ans = math_ops.matmul(
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index 79efee3f5b..a45ce2e13b 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -72,7 +72,7 @@ class SparseToIndicatorTest(test_util.TensorFlowTestCase):
         constant_op.constant(shape, dtypes.int64))
 
   def testInt32(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_5x6(dtypes.int32)
       output = sparse_ops.sparse_to_indicator(sp_input, 50).eval()
 
@@ -84,7 +84,7 @@ class SparseToIndicatorTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output, expected_output)
 
   def testInt64(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_5x6(dtypes.int64)
       output = sparse_ops.sparse_to_indicator(sp_input, 50).eval()
 
@@ -96,7 +96,7 @@ class SparseToIndicatorTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output, expected_output)
 
   def testHigherRank(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_2x3x4(dtypes.int64)
       output = sparse_ops.sparse_to_indicator(sp_input, 200).eval()
 
@@ -147,7 +147,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
   def testInt32AndFloat32(self):
     vocab_size = 50
     indices_v, values_v = self._SparseTensorValue_3x50(np.int32, np.float32)
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for indices in (indices_v,
                       sparse_tensor.SparseTensor.from_value(indices_v)):
         for values in (values_v,
@@ -159,7 +159,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat32(self):
     vocab_size = 50
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float32)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
@@ -168,7 +168,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat64(self):
     vocab_size = 50
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
@@ -177,7 +177,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
 
   def testInt32AndFloat32NonCanonicalOrder(self):
     vocab_size = 50
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int32, np.float32)
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size, already_sorted=True)
@@ -187,7 +187,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat32NonCanonicalOrder(self):
     vocab_size = 50
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float32)
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size, already_sorted=True)
@@ -198,7 +198,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
   def testInt64AndFloat64NonCanonicalOrder(self):
     vocab_size = 50
     vocab_size_tensor = constant_op.constant(vocab_size, dtypes.int64)
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size_tensor, already_sorted=True)
@@ -257,7 +257,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat32(self):
     vocab_size = [50, 31]
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float32)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
@@ -266,7 +266,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat64(self):
     vocab_size = [50, 31]
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
@@ -275,7 +275,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
 
   def testInt64AndFloat64Shape(self):
     vocab_size = [50, 30]
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
@@ -297,7 +297,7 @@ class SparseRetainTest(test_util.TensorFlowTestCase):
     return sparse_tensor.SparseTensor.from_value(self._SparseTensorValue_5x6())
 
   def testBasic(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for sp_input in (self._SparseTensorValue_5x6(), self._SparseTensor_5x6()):
         to_retain = np.array([1, 0, 0, 1, 1, 0], dtype=np.bool)
         sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
@@ -309,7 +309,7 @@ class SparseRetainTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(output.dense_shape, [5, 6])
 
   def testRetainNone(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_5x6()
       to_retain = np.zeros((6,), dtype=np.bool)
       sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
@@ -321,7 +321,7 @@ class SparseRetainTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [5, 6])
 
   def testMismatchedRetainShape(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_5x6()
       to_retain = np.array([1, 0, 0, 1, 0], dtype=np.bool)
       with self.assertRaises(ValueError):
@@ -360,7 +360,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
     self.assertAllEqual([3, 6, 7], sp_output.get_shape())
 
   def testBasic(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_2x5x6()
       new_shape = np.array([3, 6, 7], dtype=np.int64)
       sp_output = sparse_ops.sparse_reset_shape(sp_input, new_shape)
@@ -373,7 +373,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [3, 6, 7])
 
   def testInputUnavailableInGraphConstructionOk(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_2x5x6()
       new_shape = np.array([3, 6, 7], dtype=np.int64)
       sp_output = sparse_ops.sparse_reset_shape(sp_input, new_shape)
@@ -386,7 +386,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [3, 6, 7])
 
   def testFeedInputUnavailableInGraphConstructionOk(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = array_ops.sparse_placeholder(dtype=dtypes.int32)
       new_shape = np.array([3, 6, 7], dtype=np.int64)
       sp_output = sparse_ops.sparse_reset_shape(sp_input, new_shape)
@@ -400,7 +400,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [3, 6, 7])
 
   def testTightBoundingBox(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_2x5x6()
       sp_output = sparse_ops.sparse_reset_shape(sp_input)
 
@@ -412,7 +412,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [2, 4, 5])
 
   def testTightBoundingBoxEmpty(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_2x5x6_Empty()
       sp_output = sparse_ops.sparse_reset_shape(sp_input)
 
@@ -423,7 +423,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(output.dense_shape, [0, 0, 0])
 
   def testInvalidRank(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_2x5x6()
       new_shape = np.array([3, 7], dtype=np.int64)
 
@@ -431,7 +431,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
         sparse_ops.sparse_reset_shape(sp_input, new_shape)
 
   def testInvalidRankNewShapeUnavailableInGraphConstruction(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       new_shape = array_ops.placeholder(dtype=dtypes.int64)
       sp_input = self._SparseTensor_2x5x6()
       out = sparse_ops.sparse_reset_shape(sp_input, new_shape)
@@ -447,7 +447,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       sparse_ops.sparse_reset_shape(sp_input, new_shape)
 
   def testInvalidDimensionSizeDynamic(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_2x5x6()
       new_shape = array_ops.placeholder(dtype=dtypes.int32)
       out = sparse_ops.sparse_reset_shape(sp_input, new_shape)
@@ -457,7 +457,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
 
   def testInvalidDimensionSizeInputUnavailableInGraphConstruction(self):
     sp_input = array_ops.sparse_placeholder(dtype=dtypes.int32)
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       new_shape = np.array([3, 7, 5], dtype=np.int64)
       out = sparse_ops.sparse_reset_shape(sp_input, new_shape)
 
@@ -497,7 +497,7 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
         constant_op.constant(shape, dtypes.int64))
 
   def testFillNumber(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for sp_input in (self._SparseTensorValue_5x6(), self._SparseTensor_5x6()):
         sp_output, empty_row_indicator = (
             sparse_ops.sparse_fill_empty_rows(sp_input, -1))
@@ -514,7 +514,7 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
                             np.array([0, 0, 1, 0, 1]).astype(np.bool))
 
   def testFillFloat(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       values = constant_op.constant(
           [0.0, 10.0, 13.0, 14.0, 32.0, 33.0], dtype=dtypes.float64)
       default_value = constant_op.constant(-1.0, dtype=dtypes.float64)
@@ -548,7 +548,7 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
       self.assertLess(default_value_grad_err, 1e-8)
 
   def testFillString(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_String5x6()
       sp_output, empty_row_indicator = (
           sparse_ops.sparse_fill_empty_rows(sp_input, ""))
@@ -566,7 +566,7 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
                           np.array([0, 0, 1, 0, 1]).astype(np.bool))
 
   def testNoEmptyRows(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensor_2x6()
       sp_output, empty_row_indicator = (
           sparse_ops.sparse_fill_empty_rows(sp_input, -1))
@@ -590,7 +590,7 @@ class SparseAddTest(test_util.TensorFlowTestCase):
     sp_input = sparse_tensor.SparseTensor(indices, values, shape)
     sp_output = sparse_ops.sparse_add(sp_input, sp_input)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sess.run(variables.global_variables_initializer())
       output = sess.run(sp_output)
       self.assertAllEqual(output.values, [2])
@@ -663,7 +663,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase):
 
     sp_t = sparse_tensor.SparseTensor(self.ind, self.vals, self.dense_shape)
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       self._compare_all(sp_t, None, ndims=2)
       self._compare_all(sp_t, 0, ndims=2)
       self._compare_all(sp_t, [1], ndims=2)
@@ -674,7 +674,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase):
 
     np.random.seed(1618)
     test_dims = [(1618, 1, 11, 7, 1), (1,), (1, 1, 1)]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for dims in test_dims:
         sp_t, unused_nnz = _sparsify(np.random.randn(*dims))
         # reduce all using None
@@ -686,7 +686,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase):
 
   def testInvalidAxes(self):
     sp_t = sparse_tensor.SparseTensor(self.ind, self.vals, self.dense_shape)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       with self.assertRaisesOpError("Invalid reduction dimension -3"):
         sparse_ops.sparse_reduce_sum(sp_t, -3).eval()
       with self.assertRaisesOpError("Invalid reduction dimension 2"):
@@ -702,7 +702,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase):
 
     np.random.seed(8161)
     test_dims = [(11, 1, 5, 7, 1), (2, 2)]
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for dims in test_dims:
         sp_t, nnz = _sparsify(np.random.randn(*dims))
         # reduce random axes from 1D to N-D
@@ -742,7 +742,7 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
     sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)]
     dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)]
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for dtype in [np.float32, np.float64, np.int32, np.int64]:
         for sp_shape, dense_shape in zip(sp_shapes, dense_shapes):
           sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1
@@ -761,7 +761,7 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
             self.assertEqual(res.values.eval().dtype, np.float64)
 
   def testCwiseAdd(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       # Identity(2) + AllOnes(2,2).  Should be equal to 2 * Identity(2).
       indices = [[0, 0], [1, 1]]
       vals = [1, 1]
@@ -784,7 +784,7 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
     sp_shapes = [(10, 10, 10), (5, 5), (1618,), (3, 3, 7)]
     dense_shapes = [(10, 10, 1), (5, 5), (1,), (1, 7)]
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for dtype in [np.float32, np.float64]:
         for sp_shape, dense_shape in zip(sp_shapes, dense_shapes):
           sp_vals_np = np.random.rand(*sp_shape).astype(dtype) + 1
@@ -822,7 +822,7 @@ class SparseSoftmaxTest(test_util.TensorFlowTestCase):
       batched_sp_t, unused_nnz1 = _sparsify(
           sp_vals_np.reshape((1, n, m)), thresh=0.)  # No masking.
 
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         densified = constant_op.constant(sp_vals_np)
 
         sp_result = sparse_ops.sparse_softmax(batched_sp_t).eval(
@@ -853,7 +853,7 @@ class SparseSoftmaxTest(test_util.TensorFlowTestCase):
       sp_t, unused_nnz = _sparsify(values, thresh=1e-2)
       expected_values = [1., 1., 1., .5, .5]
 
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         result = sparse_ops.sparse_softmax(sp_t).eval()
 
         self.assertAllEqual(expected_values, result.values)
@@ -862,7 +862,7 @@ class SparseSoftmaxTest(test_util.TensorFlowTestCase):
 
   def testGradient(self):
     x_shape = [2, 5, 10]
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       for dtype in [np.float32, np.float64]:
         x_np = np.random.randn(*x_shape).astype(dtype)
         x_tf, nnz = _sparsify(x_np)
@@ -880,7 +880,7 @@ class SparseMinimumMaximumTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(a.dense_shape, b.dense_shape)
 
   def testBasic(self):
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       # 1-D, values at index 0.
       sp_zero = sparse_tensor.SparseTensor([[0]], [0], [7])
       sp_one = sparse_tensor.SparseTensor([[0]], [1], [7])
@@ -908,7 +908,7 @@ class SparseMinimumMaximumTest(test_util.TensorFlowTestCase):
         sp_a, unused_a_nnz = _sparsify(a_np, thresh=-.5)
         sp_b, unused_b_nnz = _sparsify(b_np, thresh=-.5)
 
-        with self.test_session(use_gpu=False):
+        with self.cached_session(use_gpu=False):
           maximum_tf = sparse_ops.sparse_maximum(sp_a, sp_b)
           maximum_tf_densified = sparse_ops.sparse_tensor_to_dense(
               maximum_tf).eval()
@@ -925,7 +925,7 @@ class SparseMinimumMaximumTest(test_util.TensorFlowTestCase):
             np.minimum(a_densified, b_densified), minimum_tf_densified)
 
   def testMismatchedShapes(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_zero = sparse_tensor.SparseTensor([[0, 0]], [0], [1, 1])
       sp_one = sparse_tensor.SparseTensor([[0]], [1], [2])
       with self.assertRaisesOpError("Operands do not have the same ranks"):
@@ -943,7 +943,7 @@ class SparseTransposeTest(test.TestCase):
     if np.__version__ == "1.13.0":
       self.skipTest("numpy 1.13.0 bug")
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       np.random.seed(1618)
       shapes = [np.random.randint(1, 10, size=rank) for rank in range(1, 6)]
       for shape in shapes:
diff --git a/tensorflow/python/kernel_tests/sparse_reorder_op_test.py b/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
index 18335d665a..7b83ae5177 100644
--- a/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
@@ -56,7 +56,7 @@ class SparseReorderTest(test.TestCase):
     self.assertAllEqual((5, 6), sp_output.get_shape())
 
   def testAlreadyInOrder(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6(np.arange(6))
       sp_output = sparse_ops.sparse_reorder(input_val)
 
@@ -66,7 +66,7 @@ class SparseReorderTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
   def testFeedAlreadyInOrder(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6(np.arange(6))
       sp_output = sparse_ops.sparse_reorder(sp_input)
@@ -78,7 +78,7 @@ class SparseReorderTest(test.TestCase):
 
   def testOutOfOrder(self):
     expected_output_val = self._SparseTensorValue_5x6(np.arange(6))
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for _ in range(5):  # To test various random permutations
         input_val = self._SparseTensorValue_5x6(np.random.permutation(6))
         sp_output = sparse_ops.sparse_reorder(input_val)
@@ -91,7 +91,7 @@ class SparseReorderTest(test.TestCase):
 
   def testFeedOutOfOrder(self):
     expected_output_val = self._SparseTensorValue_5x6(np.arange(6))
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       for _ in range(5):  # To test various random permutations
         sp_input = self._SparseTensorPlaceholder()
         input_val = self._SparseTensorValue_5x6(np.random.permutation(6))
@@ -104,7 +104,7 @@ class SparseReorderTest(test.TestCase):
                             expected_output_val.dense_shape)
 
   def testGradients(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for _ in range(5):  # To test various random permutations
         input_val = self._SparseTensorValue_5x6(np.random.permutation(6))
         sp_input = sparse_tensor.SparseTensor(input_val.indices,
diff --git a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
index 89a54c8ab6..f7be397c33 100644
--- a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
@@ -77,7 +77,7 @@ class SparseReshapeTest(test.TestCase):
       sparse_ops.sparse_reshape(sp_input, shape=(-1, 7))
 
   def testSameShape(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(input_val, [5, 6])
 
@@ -87,7 +87,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
   def testFeedSameShape(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [5, 6])
@@ -98,7 +98,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
   def testWorksWellWithTfShape(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       shape = array_ops.shape(sp_input)  # tf.shape generates int32 output
@@ -110,7 +110,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
   def testFeedSameShapeWithInferredDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [-1, 6])
@@ -121,7 +121,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
   def testFeedNewShapeSameRank(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [3, 10])
@@ -134,7 +134,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [3, 10])
 
   def testFeedNewShapeSameRankWithInferredDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [3, -1])
@@ -147,7 +147,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [3, 10])
 
   def testUpRank(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(input_val, [2, 3, 5])
 
@@ -159,7 +159,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
 
   def testFeedUpRank(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [2, 3, 5])
@@ -172,7 +172,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
 
   def testFeedUpRankWithInferredDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [2, -1, 5])
@@ -185,7 +185,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [2, 3, 5])
 
   def testFeedDownRank(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_2x3x4()
       sp_output = sparse_ops.sparse_reshape(sp_input, [6, 4])
@@ -198,7 +198,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [6, 4])
 
   def testFeedDownRankWithInferredDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_2x3x4()
       sp_output = sparse_ops.sparse_reshape(sp_input, [6, -1])
@@ -211,7 +211,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.dense_shape, [6, 4])
 
   def testFeedMultipleInferredDims(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1, -1])
@@ -225,7 +225,7 @@ class SparseReshapeTest(test.TestCase):
       sparse_ops.sparse_reshape(sp_input, [4, 7])
 
   def testFeedMismatchedSizes(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [4, 7])
@@ -234,7 +234,7 @@ class SparseReshapeTest(test.TestCase):
         sess.run(sp_output, {sp_input: input_val})
 
   def testFeedMismatchedSizesWithInferredDim(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(sp_input, [4, -1])
@@ -242,7 +242,7 @@ class SparseReshapeTest(test.TestCase):
         sess.run(sp_output, {sp_input: input_val})
 
   def testFeedPartialShapes(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       # Incorporate new rank into shape information if known
       sp_input = self._SparseTensorPlaceholder()
       sp_output = sparse_ops.sparse_reshape(sp_input, [2, 3, 5])
@@ -267,7 +267,7 @@ class SparseReshapeTest(test.TestCase):
       self.assertListEqual(sp_output.dense_shape.get_shape().as_list(), [None])
 
   def testFeedDenseReshapeSemantics(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # Compute a random rank-5 initial shape and new shape, randomly sparsify
       # it, and check that the output of SparseReshape has the same semantics
       # as a dense reshape.
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index 3847cebc7d..b24a086969 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -68,7 +68,7 @@ class SerializeSparseTest(test.TestCase):
                                       serialize_fn,
                                       deserialize_fn,
                                       out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
       serialized = serialize_fn(sp_input, out_type=out_type)
       sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
@@ -92,7 +92,7 @@ class SerializeSparseTest(test.TestCase):
                                            serialize_fn,
                                            deserialize_fn,
                                            out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
       serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
@@ -125,7 +125,7 @@ class SerializeSparseTest(test.TestCase):
 
   def _testSerializeDeserializeBatchInconsistentShapeHelper(
       self, serialize_fn, deserialize_fn, out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorValue_5x6(np.arange(6))
       sp_input1 = self._SparseTensorValue_3x4(np.arange(6))
       serialized0 = serialize_fn(sp_input0, out_type=out_type)
@@ -158,7 +158,7 @@ class SerializeSparseTest(test.TestCase):
                                                  serialize_fn,
                                                  deserialize_fn,
                                                  out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorValue_5x6(np.arange(6))
       serialized = serialize_fn(sp_input, out_type=out_type)
       serialized = array_ops.stack([serialized, serialized])
@@ -201,7 +201,7 @@ class SerializeSparseTest(test.TestCase):
                                                serialize_fn,
                                                deserialize_fn,
                                                out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
@@ -240,7 +240,7 @@ class SerializeSparseTest(test.TestCase):
   def _testSerializeManyShapeHelper(self,
                                     serialize_many_fn,
                                     out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       # N == 4 because shape_value == [4, 5]
       indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
       values_value = np.array([b"a", b"b", b"c"])
@@ -268,7 +268,7 @@ class SerializeSparseTest(test.TestCase):
                                                serialize_many_fn,
                                                deserialize_fn,
                                                out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       # N == 4 because shape_value == [4, 5]
       indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
       values_value = np.array([b"a", b"b", b"c"])
@@ -301,7 +301,7 @@ class SerializeSparseTest(test.TestCase):
         dtypes.variant)
 
   def testVariantSerializeDeserializeScalar(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices_value = np.array([[]], dtype=np.int64)
       values_value = np.array([37], dtype=np.int32)
       shape_value = np.array([], dtype=np.int64)
@@ -322,7 +322,7 @@ class SerializeSparseTest(test.TestCase):
       self.assertAllEqual(deserialized_value.dense_shape, shape_value)
 
   def testVariantSerializeDeserializeScalarBatch(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       indices_value = np.array([[]], dtype=np.int64)
       values_value = np.array([37], dtype=np.int32)
       shape_value = np.array([], dtype=np.int64)
@@ -349,7 +349,7 @@ class SerializeSparseTest(test.TestCase):
                                            serialize_fn,
                                            deserialize_fn,
                                            out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
@@ -384,7 +384,7 @@ class SerializeSparseTest(test.TestCase):
                                                   serialize_fn,
                                                   deserialize_fn,
                                                   out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       sp_input1 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
@@ -419,7 +419,7 @@ class SerializeSparseTest(test.TestCase):
                                               serialize_fn,
                                               deserialize_fn,
                                               out_type=dtypes.string):
-    with self.test_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False) as sess:
       sp_input0 = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       serialized0 = serialize_fn(sp_input0, out_type=out_type)
diff --git a/tensorflow/python/kernel_tests/sparse_slice_op_test.py b/tensorflow/python/kernel_tests/sparse_slice_op_test.py
index 97f30daf4a..098353741f 100644
--- a/tensorflow/python/kernel_tests/sparse_slice_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_slice_op_test.py
@@ -80,7 +80,7 @@ class SparseSliceOpTest(test.TestCase):
         self._SparseTensorValue_3x4x2())
 
   def testSliceMatrixRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_4x6()
       sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [2, 6])
       sp_tensor1 = sparse_ops.sparse_slice(sp_input, [2, 0], [3, 7])
@@ -97,7 +97,7 @@ class SparseSliceOpTest(test.TestCase):
       self.assertAllEqual(sp_tensor1.dense_shape.eval(), [2, 6])
 
   def testSliceMatrixUnevenCols(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_5x7()
       sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [5, 3])
       sp_tensor1 = sparse_ops.sparse_slice(sp_input, [0, 3], [5, 2])
@@ -138,7 +138,7 @@ class SparseSliceOpTest(test.TestCase):
       self.assertAllEqual(sp_tensor3.dense_shape.eval(), [5, 1])
 
   def testSliceMatrixUnevenRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_5x7()
       sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [3, 7])
       sp_tensor1 = sparse_ops.sparse_slice(sp_input, [3, 0], [3, 7])
@@ -174,7 +174,7 @@ class SparseSliceOpTest(test.TestCase):
     return
 
   def testSliceAllRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_4x6()
       sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [1, 6])
       sp_tensor1 = sparse_ops.sparse_slice(sp_input, [1, 0], [1, 6])
@@ -196,7 +196,7 @@ class SparseSliceOpTest(test.TestCase):
       self.assertAllEqual(sp_tensor3.dense_shape.eval(), [1, 6])
 
   def testSliceColumns(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_4x6()
       sparse_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [4, 2])
       sparse_tensor1 = sparse_ops.sparse_slice(sp_input, [0, 2], [5, 2])
@@ -216,7 +216,7 @@ class SparseSliceOpTest(test.TestCase):
       self.assertAllEqual(sparse_tensor2.dense_shape.eval(), [4, 2])
 
   def testSliceAllColumns(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_input = self._SparseTensor_4x6()
       sparse_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [4, 1])
       sparse_tensor1 = sparse_ops.sparse_slice(sp_input, [0, 1], [4, 1])
@@ -252,7 +252,7 @@ class SparseSliceOpTest(test.TestCase):
                       ([0, 2], [5, 2]),
                       ([0, 4], [5, 3])]
 
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for start, size in start_and_size:
         sp_output = sparse_ops.sparse_slice(sp_input, start, size)
         nnz_in = len(sp_input.values.eval())
diff --git a/tensorflow/python/kernel_tests/sparse_split_op_test.py b/tensorflow/python/kernel_tests/sparse_split_op_test.py
index 23c6c390b2..95661ded4b 100644
--- a/tensorflow/python/kernel_tests/sparse_split_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_split_op_test.py
@@ -76,7 +76,7 @@ class SparseSplitOpTest(test.TestCase):
     ))
 
   def testSplitMatrixRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_tensors = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_4x6(), num_split=2, axis=0)
       self.assertAllEqual(len(sp_tensors), 2)
@@ -93,7 +93,7 @@ class SparseSplitOpTest(test.TestCase):
       self.assertAllEqual(sp_tensors[1].dense_shape.eval(), [2, 6])
 
   def testSplitMatrixUnevenCols(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_tensors_3 = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_5x7(), num_split=3, axis=1)
       self.assertAllEqual(len(sp_tensors_3), 3)
@@ -132,7 +132,7 @@ class SparseSplitOpTest(test.TestCase):
       self.assertAllEqual(sp_tensors_4[3].dense_shape.eval(), [5, 1])
 
   def testSplitMatrixUnevenRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_tensors_2 = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_5x7(), num_split=2, axis=0)
       self.assertAllEqual(sp_tensors_2[0].indices.eval(),
@@ -168,7 +168,7 @@ class SparseSplitOpTest(test.TestCase):
     return
 
   def testSplitAllRows(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sp_tensors = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_4x6(), num_split=4, axis=0)
       self.assertAllEqual(len(sp_tensors), 4)
@@ -190,7 +190,7 @@ class SparseSplitOpTest(test.TestCase):
       self.assertAllEqual(sp_tensors[3].dense_shape.eval(), [1, 6])
 
   def testSplitColumns(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sparse_tensors = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_4x6(), num_split=3, axis=1)
       self.assertAllEqual(len(sparse_tensors), 3)
@@ -208,7 +208,7 @@ class SparseSplitOpTest(test.TestCase):
       self.assertAllEqual(sparse_tensors[2].dense_shape.eval(), [4, 2])
 
   def testSplitAllColumns(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       sparse_tensors = sparse_ops.sparse_split(
           sp_input=self._SparseTensor_4x6(), num_split=6, axis=1)
       self.assertAllEqual(len(sparse_tensors), 6)
@@ -237,7 +237,7 @@ class SparseSplitOpTest(test.TestCase):
   def testSliceConcat(self):
     for sp_input in (self._SparseTensorValue_3x4x2(),
                      self._SparseTensor_3x4x2()):
-      with self.test_session(use_gpu=False):
+      with self.cached_session(use_gpu=False):
         sparse_tensors = sparse_ops.sparse_split(
             sp_input=sp_input, num_split=2, axis=1)
         concat_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
diff --git a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_grad_test.py b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_grad_test.py
index e8b94294b1..b8f33d6a81 100644
--- a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_grad_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_grad_test.py
@@ -72,7 +72,7 @@ class SparseTensorDenseMatMulGradientTest(test.TestCase):
     matmul = sparse_ops.sparse_tensor_dense_matmul(
         sp_t, dense_t, adjoint_a=adjoint_a, adjoint_b=adjoint_b, name=name)
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       dense_t_shape = [m, k] if adjoint_b else [k, m]
       sp_t_val_shape = [nnz]
       err = gradient_checker.compute_gradient_error(
diff --git a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py
index e20c699252..fe334045af 100644
--- a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py
@@ -65,7 +65,7 @@ class SparseTensorDenseMatMulTest(test.TestCase):
     x_values = x[np.where(x)]
     x_shape = x.shape
 
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       sp_x_value = sparse_tensor.SparseTensorValue(
           indices=x_indices, values=x_values, dense_shape=x_shape)
       tf_value_ans = sparse_ops.sparse_tensor_dense_matmul(
@@ -133,7 +133,7 @@ class SparseTensorDenseMatMulTest(test.TestCase):
 
   def testInvalidIndicesForSparseTensorDenseMatmul(self):
     # Note: use_gpu=False because nice errors are only returned from CPU kernel.
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = np.matrix([[1, 10]]).astype(np.int64)
       values = np.array([10]).astype(np.float32)
       shape = [3, 2]
@@ -166,7 +166,7 @@ class SparseTensorDenseMatMulTest(test.TestCase):
     # Note: use_gpu=False because nice errors are only returned from CPU kerne
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       indices = np.array([[1, 10]]).astype(np.int64)
       values = np.array([10]).astype(np.float32)
       shape = [3, 2]
diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
index fdfe1001b8..e08464a701 100644
--- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
@@ -99,7 +99,7 @@ class SparseTensorsMapTest(test.TestCase):
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
   def testFeedAddTakeMany(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_3x4(np.arange(6))
@@ -125,7 +125,7 @@ class SparseTensorsMapTest(test.TestCase):
       self.assertAllEqual(combined_shape, [2, 5, 6])
 
   def testAddManyTakeManyRoundTrip(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       # N == 4 because shape_value == [4, 5]
       indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
       values_value = np.array([b"a", b"b", b"c"])
@@ -147,7 +147,7 @@ class SparseTensorsMapTest(test.TestCase):
       self.assertAllEqual(roundtrip_value.dense_shape, shape_value)
 
   def testDeserializeFailsInconsistentRank(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
       input0_val = self._SparseTensorValue_5x6(np.arange(6))
       input1_val = self._SparseTensorValue_1x1x1()
@@ -168,7 +168,7 @@ class SparseTensorsMapTest(test.TestCase):
         sess.run(sp_roundtrip)
 
   def testTakeManyFailsWrongInputOp(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6(np.arange(6))
       handle = add_sparse_to_tensors_map(input_val)
       handle_value = sess.run(handle)
diff --git a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
index c71746cc99..7f63532e10 100644
--- a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
+++ b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py
@@ -42,38 +42,38 @@ def _SparseToDense(sparse_indices,
 class SparseToDenseTest(test.TestCase):
 
   def testInt(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([1, 3], [5], 1, 0).eval()
     np_ans = np.array([0, 1, 0, 1, 0]).astype(np.int32)
     self.assertAllClose(np_ans, tf_ans)
 
   def testFloat(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([1, 3], [5], 1.0, 0.0).eval()
     np_ans = np.array([0, 1, 0, 1, 0]).astype(np.float32)
     self.assertAllClose(np_ans, tf_ans)
 
   def testString(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([1, 3], [5], "a", "b").eval()
     np_ans = np.array(["b", "a", "b", "a", "b"]).astype(np.string_)
     self.assertAllEqual(np_ans, tf_ans)
 
   def testSetValue(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([1, 3], [5], [1, 2], -1).eval()
     np_ans = np.array([-1, 1, -1, 2, -1]).astype(np.int32)
     self.assertAllClose(np_ans, tf_ans)
 
   def testSetSingleValue(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([1, 3], [5], 1, -1).eval()
     np_ans = np.array([-1, 1, -1, 1, -1]).astype(np.int32)
     self.assertAllClose(np_ans, tf_ans)
 
   def test2d(self):
     # pylint: disable=bad-whitespace
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([[1, 3], [2, 0]], [3, 4], 1, -1).eval()
     np_ans = np.array([[-1, -1, -1, -1],
                        [-1, -1, -1,  1],
@@ -86,7 +86,7 @@ class SparseToDenseTest(test.TestCase):
       self.assertAllEqual(x, [0, 0, 7, 0])
 
   def test3d(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       tf_ans = _SparseToDense([[1, 3, 0], [2, 0, 1]], [3, 4, 2], 1, -1).eval()
     np_ans = np.ones((3, 4, 2), dtype=np.int32) * -1
     np_ans[1, 3, 0] = 1
@@ -176,7 +176,7 @@ class SparseToDenseTest(test.TestCase):
       dense_without_validation.eval()
 
   def testShapeInferenceKnownShape(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = array_ops.placeholder(dtypes.int64)
 
       shape = [4, 5, 6]
@@ -188,7 +188,7 @@ class SparseToDenseTest(test.TestCase):
       self.assertEqual(output.get_shape().as_list(), [None, None, None])
 
   def testShapeInferenceUnknownShape(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       indices = array_ops.placeholder(dtypes.int64)
       shape = array_ops.placeholder(dtypes.int64)
       output = sparse_ops.sparse_to_dense(indices, shape, 1, 0)
diff --git a/tensorflow/python/kernel_tests/sparse_xent_op_test.py b/tensorflow/python/kernel_tests/sparse_xent_op_test.py
index a841fe83a7..0510bc5321 100644
--- a/tensorflow/python/kernel_tests/sparse_xent_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_xent_op_test.py
@@ -63,7 +63,7 @@ class SparseXentTest(test.TestCase):
 
   def _testXent(self, np_features, np_labels):
     np_loss, np_backprop = self._npXent(np_features, np_labels)
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
           np_features, np_labels)
       tf_loss, tf_backprop = sess.run([loss, backprop])
@@ -72,7 +72,7 @@ class SparseXentTest(test.TestCase):
 
   def testSingleClass(self):
     for label_dtype in np.int32, np.int64:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
             np.array([[1.], [-1.], [0.]]).astype(np.float32),
             np.array([0, 0, 0]).astype(label_dtype))
@@ -86,7 +86,7 @@ class SparseXentTest(test.TestCase):
     labels = [4, 3, 0, -1]
 
     if test.is_built_with_cuda() and test.is_gpu_available():
-      with self.test_session(use_gpu=True) as sess:
+      with self.session(use_gpu=True) as sess:
         loss, backprop = (
             gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
                 features, labels))
@@ -100,7 +100,7 @@ class SparseXentTest(test.TestCase):
         self.assertAllClose(
             [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
 
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       loss, backprop = (
           gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels))
       with self.assertRaisesOpError("Received a label value of"):
@@ -141,19 +141,19 @@ class SparseXentTest(test.TestCase):
         np.array([1.3862, 3.4420]), np_loss, rtol=1.e-3, atol=1.e-3)
 
   def testShapeMismatch(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesRegexp(ValueError, ".*Rank mismatch:*"):
         nn_ops.sparse_softmax_cross_entropy_with_logits(
             labels=[[0, 2]], logits=[[0., 1.], [2., 3.], [2., 3.]])
 
   def testScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaisesRegexp(ValueError, ".*Logits cannot be scalars*"):
         nn_ops.sparse_softmax_cross_entropy_with_logits(
             labels=constant_op.constant(0), logits=constant_op.constant(1.0))
 
   def testLabelsPlaceholderScalar(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       labels = array_ops.placeholder(np.int32)
       y = nn_ops.sparse_softmax_cross_entropy_with_logits(
           labels=labels, logits=[[7.]])
@@ -161,7 +161,7 @@ class SparseXentTest(test.TestCase):
         y.eval(feed_dict={labels: 0})
 
   def testVector(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
           labels=constant_op.constant(0), logits=constant_op.constant([1.0]))
       self.assertAllClose(0.0, loss.eval())
@@ -188,7 +188,7 @@ class SparseXentTest(test.TestCase):
     self._testXent(np.zeros((0, 3)), np.zeros((0,), dtype=np.int32))
 
   def testGradient(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       l = constant_op.constant([3, 0, 1], name="l")
       f = constant_op.constant(
           [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4],
@@ -222,7 +222,7 @@ class SparseXentTest(test.TestCase):
     np_loss, np_backprop = self._npXent(np.array(features), np.array(labels))
     # manually reshape loss
     np_loss = np.reshape(np_loss, np.array(labels).shape)
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
           labels=labels, logits=features)
       backprop = loss.op.inputs[0].op.outputs[1]
@@ -242,7 +242,7 @@ class SparseXentTest(test.TestCase):
     self._testHighDim(features, labels)
 
   def testScalarHandling(self):
-    with self.test_session(use_gpu=False) as sess:
+    with self.session(use_gpu=False) as sess:
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    ".*labels must be 1-D.*"):
         labels = array_ops.placeholder(dtypes.int32, shape=[None, 1])
diff --git a/tensorflow/python/kernel_tests/split_op_test.py b/tensorflow/python/kernel_tests/split_op_test.py
index 3f9b029a6a..944b0e59b1 100644
--- a/tensorflow/python/kernel_tests/split_op_test.py
+++ b/tensorflow/python/kernel_tests/split_op_test.py
@@ -54,13 +54,13 @@ class SplitOpTest(test.TestCase):
     model_input = array_ops.placeholder(dtypes.float32)
     inp = np.zeros((1, 10))
     # check that we still fail at runtime if the shapes were unknown
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       with self.assertRaises(errors_impl.InvalidArgumentError):
         sess.run(array_ops.split(model_input, [4]), {model_input: inp})
 
     # test that we can pass a scalar Tensor as num_splits
     for axis in [0, -2]:
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         result = sess.run(
             array_ops.split(
                 array_ops.ones([4, 4]),
@@ -82,7 +82,7 @@ class SplitOpTest(test.TestCase):
     model_input2 = array_ops.placeholder(dtypes.float32, shape=[None, 2])
     result = array_ops.split(model_input2, [2, 2], axis=0)[0]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       sess.run(result, feed_dict={model_input2: np.ones([4, 2])})
 
   def testFailWithoutExplicitNum(self):
@@ -90,7 +90,7 @@ class SplitOpTest(test.TestCase):
 
     value = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with self.assertRaises(ValueError) as context:
         sess.run(array_ops.split(value, size_splits), {size_splits: [2, 2, 6]})
       self.assertTrue("Cannot infer num from shape" in str(context.exception))
@@ -211,7 +211,7 @@ class SplitOpTest(test.TestCase):
 
   def testOutputShape(self):
     for axis in [1, -1]:
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         tensor = array_ops.placeholder(dtypes.float32, shape=[None, 12])
         size_splits = [3, 7, 2]
         outputs = array_ops.split(tensor, size_splits, axis)
@@ -312,7 +312,7 @@ class SplitOpTest(test.TestCase):
 
   def _testGradientsSimple(self, dtype):
     inp = self._makeData((4, 4), dtype)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inp_tensor = ops.convert_to_tensor(inp)
       s = array_ops.split(value=inp_tensor, num_or_size_splits=4, axis=1)
       inp_grads = [self._makeData((4, 1), dtype)for _ in range(4)]
@@ -375,7 +375,7 @@ class SplitOpTest(test.TestCase):
 
     splits = array_ops.placeholder(dtypes.int32, [3])
     y = array_ops.split(values, splits, axis=x)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    "must have exactly one element"):
         sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]})
diff --git a/tensorflow/python/kernel_tests/stack_op_test.py b/tensorflow/python/kernel_tests/stack_op_test.py
index 2a33c594a4..4b355620bf 100644
--- a/tensorflow/python/kernel_tests/stack_op_test.py
+++ b/tensorflow/python/kernel_tests/stack_op_test.py
@@ -43,7 +43,7 @@ class StackOpTest(test.TestCase):
 
   def testSimple(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         for dtype in [np.bool, np.float32, np.int32, np.int64]:
           data = np.random.randn(*shape).astype(dtype)
@@ -56,7 +56,7 @@ class StackOpTest(test.TestCase):
 
   def testSimpleParallelCPU(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         data = np.random.randn(*shape).astype(np.float32)
         xs = list(map(constant_op.constant, data))
@@ -65,7 +65,7 @@ class StackOpTest(test.TestCase):
 
   def testSimpleParallelGPU(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         data = np.random.randn(*shape).astype(np.float32)
         xs = list(map(constant_op.constant, data))
@@ -74,7 +74,7 @@ class StackOpTest(test.TestCase):
 
   def testConst(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         for dtype in [np.bool, np.float32, np.int32, np.int64]:
           data = np.random.randn(*shape).astype(dtype)
@@ -98,7 +98,7 @@ class StackOpTest(test.TestCase):
 
   def testConstParallelCPU(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         data = np.random.randn(*shape).astype(np.float32)
         if len(shape) == 1:
@@ -112,7 +112,7 @@ class StackOpTest(test.TestCase):
 
   def testConstParallelGPU(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         data = np.random.randn(*shape).astype(np.float32)
         if len(shape) == 1:
@@ -129,7 +129,7 @@ class StackOpTest(test.TestCase):
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
       data = np.random.randn(*shape)
       shapes = [shape[1:]] * shape[0]
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # TODO(irving): Remove list() once we handle maps correctly
         xs = list(map(constant_op.constant, data))
         c = array_ops.stack(xs)
@@ -143,7 +143,7 @@ class StackOpTest(test.TestCase):
       shapes = [shape[1:]] * shape[0]
       out_shape = list(shape[1:])
       out_shape.insert(1, shape[0])
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         # TODO(irving): Remove list() once we handle maps correctly
         xs = list(map(constant_op.constant, data))
         c = array_ops.stack(xs, axis=1)
@@ -152,7 +152,7 @@ class StackOpTest(test.TestCase):
 
   def testZeroSizeCPU(self):
     # Verify that stack doesn't crash for zero size inputs
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       for shape in (0,), (3, 0), (0, 3):
         x = np.zeros((2,) + shape).astype(np.int32)
         p = array_ops.stack(list(x)).eval()
@@ -163,7 +163,7 @@ class StackOpTest(test.TestCase):
 
   def testZeroSizeGPU(self):
     # Verify that stack doesn't crash for zero size inputs
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (0,), (3, 0), (0, 3):
         x = np.zeros((2,) + shape).astype(np.int32)
         p = array_ops.stack(list(x)).eval()
@@ -173,7 +173,7 @@ class StackOpTest(test.TestCase):
         self.assertAllEqual(p, x)
 
   def testAxis0DefaultCPU(self):
-    with self.test_session(use_gpu=False):
+    with self.session(use_gpu=False):
       t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])]
       stacked = array_ops.stack(t).eval()
       parallel_stacked = array_ops.parallel_stack(t).eval()
@@ -183,7 +183,7 @@ class StackOpTest(test.TestCase):
     self.assertAllEqual(parallel_stacked, expected)
 
   def testAxis0DefaultGPU(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       t = [constant_op.constant([1, 2, 3]), constant_op.constant([4, 5, 6])]
       stacked = array_ops.stack(t).eval()
       parallel_stacked = array_ops.parallel_stack(t).eval()
@@ -201,7 +201,7 @@ class StackOpTest(test.TestCase):
       for j in range(-i, i):
         test_arrays = np_split_squeeze(expected, j)
 
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           actual_pack = array_ops.stack(test_arrays, axis=j)
           self.assertEqual(expected.shape, actual_pack.get_shape())
           actual_pack = actual_pack.eval()
@@ -226,7 +226,7 @@ class StackOpTest(test.TestCase):
 class AutomaticStackingTest(test.TestCase):
 
   def testSimple(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       self.assertAllEqual(
           [1, 0, 2],
           ops.convert_to_tensor([1, constant_op.constant(0), 2]).eval())
@@ -246,7 +246,7 @@ class AutomaticStackingTest(test.TestCase):
                           ]).eval())
 
   def testWithNDArray(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       result = ops.convert_to_tensor([[[0., 0.],
                                        constant_op.constant([1., 1.])],
                                       np.array(
@@ -256,7 +256,7 @@ class AutomaticStackingTest(test.TestCase):
                           result.eval())
 
   def testVariable(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       v = variables.Variable(17)
       result = ops.convert_to_tensor([[0, 0, 0], [0, v, 0], [0, 0, 0]])
       v.initializer.run()
@@ -307,7 +307,7 @@ class AutomaticStackingTest(test.TestCase):
     self.assertEqual(dtypes.float64, t_2.dtype)
 
   def testPlaceholder(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       # Test using placeholder with a defined shape.
       ph_0 = array_ops.placeholder(dtypes.int32, shape=[])
       result_0 = ops.convert_to_tensor([[0, 0, 0], [0, ph_0, 0], [0, 0, 0]])
@@ -333,7 +333,7 @@ class AutomaticStackingTest(test.TestCase):
     # Dynamic shape error.
     ph_1 = array_ops.placeholder(dtypes.int32)
     result_1 = ops.convert_to_tensor([[0, 0, 0], [0, ph_1, 0], [0, 0, 0]])
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(errors_impl.InvalidArgumentError):
         result_1.eval(feed_dict={ph_1: [1]})
 
diff --git a/tensorflow/python/kernel_tests/stack_ops_test.py b/tensorflow/python/kernel_tests/stack_ops_test.py
index afd2eaffab..1aa12009ea 100644
--- a/tensorflow/python/kernel_tests/stack_ops_test.py
+++ b/tensorflow/python/kernel_tests/stack_ops_test.py
@@ -33,7 +33,7 @@ from tensorflow.python.platform import test
 class StackOpTest(test.TestCase):
 
   def _testStackPushPop(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       h = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]])
@@ -46,7 +46,7 @@ class StackOpTest(test.TestCase):
     self._testStackPushPop(use_gpu=True)
 
   def _testStackPushPopSwap(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       a = np.arange(2000)
       x = constant_op.constant(a, dtype=dtypes.float32)
       h = gen_data_flow_ops.stack_v2(
@@ -61,7 +61,7 @@ class StackOpTest(test.TestCase):
     self._testStackPushPopSwap(use_gpu=True)
 
   def _testStackWhileSwap(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       n = constant_op.constant(0)
       h = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
@@ -98,7 +98,7 @@ class StackOpTest(test.TestCase):
     self._testStackWhileSwap(use_gpu=True)
 
   def _testMultiStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       h1 = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_push_v2(h1, 4.0)
@@ -118,7 +118,7 @@ class StackOpTest(test.TestCase):
 
   def _testSameNameStacks(self, use_gpu):
     """Different stacks with the same name do not interfere."""
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       h1 = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       h2 = gen_data_flow_ops.stack_v2(
@@ -140,7 +140,7 @@ class StackOpTest(test.TestCase):
     self._testSameNameStacks(use_gpu=True)
 
   def _testCloseStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       h = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_close_v2(h)
@@ -151,7 +151,7 @@ class StackOpTest(test.TestCase):
     self._testCloseStack(use_gpu=True)
 
   def _testPushCloseStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       h = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]])
@@ -168,7 +168,7 @@ class StackOpRefTest(test.TestCase):
   """Tests for deprecated non-resource variant of stack ops."""
 
   def _testStackPushPop(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]])
       with ops.control_dependencies([c]):
@@ -180,7 +180,7 @@ class StackOpRefTest(test.TestCase):
     self._testStackPushPop(use_gpu=True)
 
   def _testStackPushPopSwap(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       a = np.arange(2000)
       x = constant_op.constant(a, dtype=dtypes.float32)
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
@@ -194,7 +194,7 @@ class StackOpRefTest(test.TestCase):
     self._testStackPushPopSwap(use_gpu=True)
 
   def _testMultiStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_push(h1, 4.0)
       with ops.control_dependencies([c1]):
@@ -207,7 +207,7 @@ class StackOpRefTest(test.TestCase):
       self.assertAllClose(9.0, r.eval())
 
   def _testStackWhileSwap(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       n = constant_op.constant(0)
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
 
@@ -247,7 +247,7 @@ class StackOpRefTest(test.TestCase):
     self._testMultiStack(use_gpu=True)
 
   def _testSameNameStacks(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu):
+    with self.cached_session(use_gpu=use_gpu):
       h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_push(h1, 4.0)
       h2 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
@@ -260,7 +260,7 @@ class StackOpRefTest(test.TestCase):
     self._testSameNameStacks(use_gpu=True)
 
   def _testCloseStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_close(h)
       sess.run(c1)
@@ -270,7 +270,7 @@ class StackOpRefTest(test.TestCase):
     self._testCloseStack(use_gpu=True)
 
   def _testPushCloseStack(self, use_gpu):
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]])
       with ops.control_dependencies([c]):
diff --git a/tensorflow/python/kernel_tests/stage_op_test.py b/tensorflow/python/kernel_tests/stage_op_test.py
index dd06d30391..b814843b86 100644
--- a/tensorflow/python/kernel_tests/stage_op_test.py
+++ b/tensorflow/python/kernel_tests/stage_op_test.py
@@ -41,7 +41,7 @@ class StageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i})
@@ -60,7 +60,7 @@ class StageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i})
@@ -85,7 +85,7 @@ class StageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i})
@@ -126,7 +126,7 @@ class StageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       for i in range(10):
         sess.run(stage, feed_dict={x: i})
 
@@ -150,7 +150,7 @@ class StageTest(test.TestCase):
 
     G.finalize()
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       sess.run(stage, feed_dict={x: -1})
       self.assertEqual(sess.run(size), 1)
       sess.run(stage, feed_dict={x: -1})
@@ -181,7 +181,7 @@ class StageTest(test.TestCase):
     queue = Queue.Queue()
     n = 8
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
@@ -245,7 +245,7 @@ class StageTest(test.TestCase):
     queue = Queue.Queue()
     n = 8
 
-    with self.test_session(use_gpu=True, graph=G) as sess:
+    with self.session(use_gpu=True, graph=G) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
diff --git a/tensorflow/python/kernel_tests/string_length_op_test.py b/tensorflow/python/kernel_tests/string_length_op_test.py
index 4afe3ad3f4..57db7302b1 100644
--- a/tensorflow/python/kernel_tests/string_length_op_test.py
+++ b/tensorflow/python/kernel_tests/string_length_op_test.py
@@ -38,7 +38,7 @@ class StringLengthOpTest(test.TestCase):
     expected_utf8_byte_lengths = [6, 4]
     expected_utf8_char_lengths = [5, 1]
 
-    with self.test_session() as sess:
+    with self.session() as sess:
       utf8_byte_lengths = string_ops.string_length(utf8_strings, unit="BYTE")
       utf8_char_lengths = string_ops.string_length(
           utf8_strings, unit="UTF8_CHAR")
@@ -56,7 +56,7 @@ class StringLengthOpTest(test.TestCase):
     # argument for the 'name' parameter.  Check that we don't break such code.
     strings = [[["1", "12"], ["123", "1234"], ["12345", "123456"]]]
     lengths = string_ops.string_length(strings, "some_name")
-    with self.test_session():
+    with self.session():
       self.assertAllEqual(lengths.eval(), [[[1, 2], [3, 4], [5, 6]]])
 
 
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index d20567bf0e..57298c0fec 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -50,7 +50,7 @@ class SvdOpTest(test.TestCase):
       linalg_ops.svd(vector)
 
   def testConcurrentExecutesWithoutError(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       all_ops = []
       for compute_uv_ in True, False:
         for full_matrices_ in True, False:
@@ -140,7 +140,7 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
           low=-1.0, high=1.0,
           size=np.prod(shape_)).reshape(shape_).astype(dtype_)
 
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       if use_static_shape_:
         x_tf = constant_op.constant(x_np)
       else:
@@ -229,7 +229,7 @@ def _GetSvdGradOpTest(dtype_, shape_, compute_uv_, full_matrices_):
       tol = 3e-2
     else:
       tol = 1e-6
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_a = constant_op.constant(a)
       if compute_uv_:
         tf_s, tf_u, tf_v = _NormalizingSvd(tf_a)
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 0ad2063558..91bd93712a 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -77,7 +77,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayWriteRead(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -98,7 +98,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(-3.0, d2)
 
   def _testTensorArrayWritePack(self, tf_dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=tf_dtype, tensor_array_name="foo", size=3)
 
@@ -129,7 +129,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testEmptyTensorArrayPack(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
 
@@ -144,7 +144,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual([3, 0, 1], c0.shape)
 
   def _testTensorArrayWriteConcat(self, tf_dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=tf_dtype, tensor_array_name="foo", size=3, infer_shape=False)
 
@@ -172,7 +172,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayWriteConcat(dtypes.string)
 
   def _testTensorArrayReadOrPackNotAllValuesAvailableFillsZeros(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -205,7 +205,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayReadOrPackNotAllValuesAvailableInferShapeFillsZeros()
 
   def _testTensorArrayUnpackRead(self, tf_dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       convert = _make_converter(tf_dtype)
 
       ta = _make_ta(3, "foo", dtype=tf_dtype)
@@ -256,7 +256,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayUnpackReadMaybeLegacy()
 
   def _testTensorArraySplitRead(self, tf_dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       convert = _make_converter(tf_dtype)
 
       # Split an empty vector
@@ -308,7 +308,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArraySplitRead(dtypes.string)
 
   def testTensorGradArrayWriteRead(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -341,7 +341,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(-2.0, g_d2)
 
   def testTensorGradArrayDynamicWriteRead(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -382,7 +382,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(3, g_vs)
 
   def testTensorGradAccessTwiceReceiveSameObject(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
       g_ta_0 = ta.grad("grad")
@@ -399,7 +399,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayWriteWrongIndexOrDataTypeFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
       # Test writing the wrong datatype
       with self.assertRaisesOpError(
@@ -418,7 +418,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayReadWrongIndexOrDataTypeFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
 
       w0 = ta.write(0, [[4.0, 5.0]])
@@ -441,7 +441,7 @@ class TensorArrayTest(test.TestCase):
         self.evaluate(ta.read(3))
 
   def testTensorArrayWriteMultipleFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
 
@@ -452,7 +452,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayConcatIncompatibleShapesFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -484,7 +484,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArraySplitIncompatibleShapesFails(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       in_eager_mode = context.executing_eagerly()
       ta = _make_ta(3, "foo")
       with self.assertRaisesOpError(
@@ -513,7 +513,7 @@ class TensorArrayTest(test.TestCase):
         self.evaluate(ta.split([1.0], [1]).flow)
 
   def _testTensorArrayWriteGradientAddMultipleAdds(self, dtype):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtype, tensor_array_name="foo", size=3, infer_shape=False)
       ta_grad = ta.grad("grad")
@@ -552,7 +552,7 @@ class TensorArrayTest(test.TestCase):
       self._testTensorArrayWriteGradientAddMultipleAdds(dtype)
 
   def testTensorArrayGradWithShapeKnownElementShape(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
           size=3,
           dtype=dtypes.float32,
@@ -581,7 +581,7 @@ class TensorArrayTest(test.TestCase):
                           sess.run(read_value, feed_dict={value: fed_value}))
 
   def testTensorArrayGradWithShapeUnknownElementShape(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
           size=3, dtype=dtypes.float32,
           element_shape=None)  # Note that element_shape is unknown
@@ -605,7 +605,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testMultiTensorArray(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       h1 = tensor_array_ops.TensorArray(
           size=1, dtype=dtypes.float32, tensor_array_name="foo")
       w1 = h1.write(0, 4.0)
@@ -621,7 +621,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllClose(9.0, val)
 
   def _testTensorArrayGradientWriteReadType(self, dtype):
-    with self.test_session(use_gpu=True) as session:
+    with self.cached_session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.as_dtype(dtype),
           tensor_array_name="foo",
@@ -672,7 +672,7 @@ class TensorArrayTest(test.TestCase):
       self._testTensorArrayGradientWriteReadType(dtype)
 
   def _testTensorArrayGradientWritePackConcatAndRead(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -708,7 +708,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayReadTwice(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]])
 
       ta_readonce = tensor_array_ops.TensorArray(
@@ -736,7 +736,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual([1.0, -1.0], self.evaluate(r1_readtwice))
 
   def _testTensorArrayGradientUnpackRead(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.cached_session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -764,7 +764,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayGradientUnpackRead()
 
   def testTensorArrayGradientSplitConcat(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=2,
           infer_shape=False)
@@ -787,7 +787,7 @@ class TensorArrayTest(test.TestCase):
                           grad_vals[0])
 
   def _testTensorArrayGradientDynamicUnpackRead(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.cached_session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -813,14 +813,14 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testCloseTensorArray(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
       self.evaluate(ta.close())
 
   @test_util.run_in_graph_and_eager_modes
   def testSizeTensorArray(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
       s = ta.size()
@@ -828,7 +828,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testWriteCloseTensorArray(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -840,7 +840,7 @@ class TensorArrayTest(test.TestCase):
 
   def _testWhileLoopWritePackGradients(self, dynamic_size, dtype):
     np_dtype = dtype.as_numpy_dtype
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       def func(v0, state0, var):
         ta = tensor_array_ops.TensorArray(
             dtype=dtype,
@@ -938,7 +938,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testGradSerialTwoLoops(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       def loop(x):
         num_steps = 100
         acc = tensor_array_ops.TensorArray(
@@ -977,7 +977,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllClose(31.0, self.evaluate(grad))
 
   def testSumOfTwoReadVariablesWithoutRepeatGrad(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       a = array_ops.identity(
           np.arange(
               3 * 5, dtype=np.float32).reshape(3, 5) + 1)
@@ -1050,7 +1050,7 @@ class TensorArrayTest(test.TestCase):
         self._grad_source_for_name("foo/gradients/bar/gradients_0/baz"))
 
   def testWriteShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=3)
       c0 = constant_op.constant([4.0, 5.0])
@@ -1074,7 +1074,7 @@ class TensorArrayTest(test.TestCase):
         w0.write(0, c2)
 
   def testPartlyUnknownShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, tensor_array_name="foo", size=6)
 
@@ -1115,7 +1115,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def _testUnpackShape(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -1149,7 +1149,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testSplitShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -1179,7 +1179,7 @@ class TensorArrayTest(test.TestCase):
                 ta1.handle.op.get_attr("element_shape")).ndims, None)
 
   def testWriteUnknownShape(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -1191,7 +1191,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(r0.get_shape(), tensor_shape.unknown_shape())
 
   def _testGradientWhenNotAllComponentsRead(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.cached_session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)
       x = constant_op.constant([2.0, 3.0])
       w = ta.unstack(x)
@@ -1205,7 +1205,7 @@ class TensorArrayTest(test.TestCase):
     self._testGradientWhenNotAllComponentsRead()
 
   def _testTensorArrayUnpackDynamic(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, size=3, dynamic_size=True)
       x = constant_op.constant([1.0, 2.0, 3.0])
@@ -1220,7 +1220,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayUnpackDynamic()
 
   def testTensorArraySplitDynamic(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, size=3, dynamic_size=True)
       x = constant_op.constant([1.0, 2.0, 3.0])
@@ -1232,7 +1232,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(np.array([1.0, 1.0, 1.0]), sess.run(grad)[0])
 
   def _testTensorArrayEvalEmpty(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, size=0, dynamic_size=False, infer_shape=False)
       with self.assertRaisesOpError(
@@ -1247,7 +1247,7 @@ class TensorArrayTest(test.TestCase):
   # this test is ill-defined for Eager mode --- unpacking an empty tensor
   # gives an empty list / there is not equivalent of "mark_used" in Eager
   def _testTensorArrayEvalEmptyWithDefault(self):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32, size=0, dynamic_size=False, infer_shape=True)
       self.assertEqual(0, ta.size().eval())
@@ -1264,7 +1264,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayEvalEmptyWithDefault()
 
   def testTensorArrayScatterReadAndGradients(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -1291,7 +1291,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayWriteGatherAndGradients(self):
-    with self.test_session(use_gpu=True) as session:
+    with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
           dtype=dtypes.float32,
           tensor_array_name="foo",
@@ -1435,7 +1435,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testTensorArrayIdentity(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       ta0 = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2,
                                          infer_shape=False)
       ta1 = tensor_array_ops.TensorArray(dtype=dtypes.int32, size=4,
@@ -1500,14 +1500,14 @@ class TensorArrayTest(test.TestCase):
       # dy is outside of the gradients name scope; tf.gradients must
       # wrap it in the correct name scope.
       dx, = gradients_impl.gradients(ys=[y], xs=[x], grad_ys=[dy])
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         vdx, vdy = sess.run([dx, dy])
       self.assertAllClose(vdx, vdy)
 
   def testTensorArrayInt64GPU(self):
     if not test.is_gpu_available():
       return
-    with self.test_session(use_gpu=True, force_gpu=True) as sess:
+    with self.session(use_gpu=True, force_gpu=True) as sess:
       value = array_ops.placeholder(dtypes.int64)
       ta = tensor_array_ops.TensorArray(dtype=dtypes.int64, size=2)
       ta = ta.scatter([0, 1], value)
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index d8d76440f1..123c9b376c 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -165,7 +165,7 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
     for _ in range(num_trials):
       a_np, b_np, a_dims_np, b_dims_np = _generate_random_tensors_and_dims()
       np_ans = np.tensordot(a_np, b_np, axes=(a_dims_np, b_dims_np))
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         if dynamic_shape_:
           a = array_ops.placeholder(dtype_)
           b = array_ops.placeholder(dtype_)
@@ -201,7 +201,7 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
       all_axes.append(a_np.ndim - 1)
     for axes in all_axes:
       np_ans = np.tensordot(a_np, b_np, axes=axes)
-      with self.test_session(use_gpu=True) as sess:
+      with self.cached_session(use_gpu=True) as sess:
         if dynamic_shape_:
           a = array_ops.placeholder(dtype_)
           b = array_ops.placeholder(dtype_)
diff --git a/tensorflow/python/kernel_tests/topk_op_test.py b/tensorflow/python/kernel_tests/topk_op_test.py
index d5f0726106..d9f340de6b 100644
--- a/tensorflow/python/kernel_tests/topk_op_test.py
+++ b/tensorflow/python/kernel_tests/topk_op_test.py
@@ -46,7 +46,7 @@ class TopKTest(test.TestCase):
                     sorted=True):  # pylint: disable=redefined-builtin
     np_expected_values = np.array(expected_values)
     np_expected_indices = np.array(expected_indices)
-    with self.test_session(use_gpu=True) as sess:
+    with self.cached_session(use_gpu=True) as sess:
       values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted)
       values, indices = sess.run([values_op, indices_op])
 
@@ -183,7 +183,7 @@ class TopKTest(test.TestCase):
 
   def testKNegative(self):
     inputs = [[0.1, 0.2], [0.3, 0.4]]
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       k = array_ops.placeholder(dtypes.int32)
       values, _ = nn_ops.top_k(inputs, k)
       with self.assertRaisesOpError("Need k >= 0, got -7"):
@@ -196,7 +196,7 @@ class TopKTest(test.TestCase):
       nn_ops.top_k(inputs, 4)
 
   def testTopKGradients(self):
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       inputs = array_ops.placeholder(dtypes.float32, shape=[2, 5])
       values, _ = nn_ops.top_k(inputs, 3)
       grad = sess.run(
diff --git a/tensorflow/python/kernel_tests/trace_op_test.py b/tensorflow/python/kernel_tests/trace_op_test.py
index a5d5bcc149..f1abaefb66 100644
--- a/tensorflow/python/kernel_tests/trace_op_test.py
+++ b/tensorflow/python/kernel_tests/trace_op_test.py
@@ -30,7 +30,7 @@ class TraceTest(test.TestCase):
 
   def compare(self, x):
     np_ans = np.trace(x, axis1=-2, axis2=-1)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       tf_ans = math_ops.trace(x).eval()
     self.assertAllClose(tf_ans, np_ans)
 
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index a825052dd2..8c11c20709 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -47,7 +47,7 @@ class TransposeTest(test.TestCase):
     np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
-    with self.test_session(use_gpu=False):
+    with self.cached_session(use_gpu=False):
       inx = ops.convert_to_tensor(x)
       y = array_ops.transpose(inx, p, conjugate=conjugate)
       tf_ans = y.eval()
@@ -78,7 +78,7 @@ class TransposeTest(test.TestCase):
     np_ans = self._np_transpose(x, perm)
     if conjugate:
       np_ans = np.conj(np_ans)
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       inx = ops.convert_to_tensor(x)
       y = array_ops.transpose(inx, p, conjugate=conjugate)
       tf_ans = y.eval()
@@ -165,7 +165,7 @@ class TransposeTest(test.TestCase):
         total_size = np.prod(input_shape)
         inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
         np_ans = self._np_transpose(inp, perm)
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           inx = ops.convert_to_tensor(inp)
           y = array_ops.transpose(inx, perm)
           tf_ans = y.eval()
@@ -186,7 +186,7 @@ class TransposeTest(test.TestCase):
       total_size = np.prod(input_shape)
       inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
       np_ans = self._np_transpose(inp, perm)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inx = ops.convert_to_tensor(inp)
         y = array_ops.transpose(inx, perm)
         tf_ans = y.eval()
@@ -221,7 +221,7 @@ class TransposeTest(test.TestCase):
       total_size = np.prod(input_shape)
       inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
       np_ans = self._np_transpose(inp, perm)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inx = ops.convert_to_tensor(inp)
         y = array_ops.transpose(inx, perm)
         tf_ans = y.eval()
@@ -243,7 +243,7 @@ class TransposeTest(test.TestCase):
         total_size = np.prod(input_shape)
         inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
         np_ans = self._np_transpose(inp, perm)
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           inx = ops.convert_to_tensor(inp)
           y = array_ops.transpose(inx, perm)
           tf_ans = y.eval()
@@ -264,7 +264,7 @@ class TransposeTest(test.TestCase):
       total_size = np.prod(input_shape)
       inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
       np_ans = self._np_transpose(inp, perm)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inx = ops.convert_to_tensor(inp)
         y = array_ops.transpose(inx, perm)
         tf_ans = y.eval()
@@ -316,7 +316,7 @@ class TransposeTest(test.TestCase):
       # generate input data with random ints from 0 to 9.
       inp = np.random.randint(10, size=input_shape)
       np_ans = self._np_transpose(inp, perm)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inx = ops.convert_to_tensor(inp)
         y = array_ops.transpose(inx, perm)
         tf_ans = y.eval()
@@ -337,7 +337,7 @@ class TransposeTest(test.TestCase):
       x = np.arange(0, 8).reshape([2, 4]).astype(np.float32)
       p = np.array([1, 0]).astype(perm_dtype)
       np_ans = np.copy(x).transpose(p)
-      with self.test_session(use_gpu=True):
+      with self.cached_session(use_gpu=True):
         inx = ops.convert_to_tensor(x)
         inp = constant_op.constant(p)
         y = array_ops.transpose(inx, inp)
@@ -414,7 +414,7 @@ class TransposeTest(test.TestCase):
   def testTranspose2DAuto(self):
     x_np = [[1, 2, 3], [4, 5, 6]]
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         x_tf = array_ops.transpose(x_np).eval()
         self.assertAllEqual(x_tf, [[1, 4], [2, 5], [3, 6]])
 
diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py
index b373c419b6..48ab258b7f 100644
--- a/tensorflow/python/kernel_tests/unstack_op_test.py
+++ b/tensorflow/python/kernel_tests/unstack_op_test.py
@@ -41,7 +41,7 @@ class UnstackOpTest(test.TestCase):
 
   def testSimple(self):
     np.random.seed(7)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         for dtype in [
             np.bool, np.float16, np.float32, np.float64, np.int32, np.int64
@@ -60,7 +60,7 @@ class UnstackOpTest(test.TestCase):
     if not test_util.is_gpu_available():
       self.skipTest('No GPU available')
     np.random.seed(7)
-    with self.test_session(use_gpu=True, force_gpu=True):
+    with self.session(use_gpu=True, force_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
         for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]:
           data = np.random.randn(*shape).astype(dtype)
@@ -78,7 +78,7 @@ class UnstackOpTest(test.TestCase):
       data = np.random.randn(*shape)
       shapes = [shape[1:]] * shape[0]
       for i in xrange(shape[0]):
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           x = constant_op.constant(data)
           cs = array_ops.unstack(x, num=shape[0])
           err = gradient_checker.compute_gradient_error(x, shape, cs[i],
@@ -91,7 +91,7 @@ class UnstackOpTest(test.TestCase):
       out_shape = list(shape)
       del out_shape[1]
       for i in xrange(shape[1]):
-        with self.test_session(use_gpu=True):
+        with self.cached_session(use_gpu=True):
           x = constant_op.constant(data)
           cs = array_ops.unstack(x, num=shape[1], axis=1)
           err = gradient_checker.compute_gradient_error(x, shape, cs[i],
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 70507ad6a6..b3eebf8316 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -124,7 +124,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(4.0, var.eval())
 
   def testResourceAssignments(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       var = resource_variable_ops.ResourceVariable(0.0)
       plus_one = var.assign_add(1.0)
       minus_one = var.assign_sub(2.0)
diff --git a/tensorflow/python/kernel_tests/where_op_test.py b/tensorflow/python/kernel_tests/where_op_test.py
index 04ac589432..fca45c3ece 100644
--- a/tensorflow/python/kernel_tests/where_op_test.py
+++ b/tensorflow/python/kernel_tests/where_op_test.py
@@ -37,7 +37,7 @@ from tensorflow.python.platform import test
 class WhereOpTest(test.TestCase):
 
   def _testWhere(self, x, truth, expected_err_re=None):
-    with self.test_session(use_gpu=True):
+    with self.cached_session(use_gpu=True):
       ans = array_ops.where(x)
       self.assertEqual([None, x.ndim], ans.get_shape().as_list())
       if expected_err_re is None:
@@ -48,7 +48,7 @@ class WhereOpTest(test.TestCase):
           ans.eval()
 
   def testWrongNumbers(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       with self.assertRaises(ValueError):
         array_ops.where([False, True], [1, 2], None)
       with self.assertRaises(ValueError):
@@ -132,7 +132,7 @@ class WhereOpTest(test.TestCase):
   def testThreeArgument(self):
     x = np.array([[-2, 3, -1], [1, -3, -3]])
     np_val = np.where(x > 0, x * x, -x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_val = array_ops.where(constant_op.constant(x) > 0, x * x, -x).eval()
     self.assertAllEqual(tf_val, np_val)
 
@@ -141,7 +141,7 @@ class WhereOpTest(test.TestCase):
     c_mat = np.array([[False] * 192, [True] * 192] * 8192)  # [16384, 192]
     c_vec = np.array([False, True] * 8192)  # [16384]
     np_val = np.where(c_mat, x * x, -x)
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       tf_val = array_ops.where(c_vec, x * x, -x).eval()
     self.assertAllEqual(tf_val, np_val)
 
diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py
index 729885169e..c3c7f867a1 100644
--- a/tensorflow/python/kernel_tests/xent_op_test.py
+++ b/tensorflow/python/kernel_tests/xent_op_test.py
@@ -53,7 +53,7 @@ class XentTest(test.TestCase):
 
   def _testXent(self, np_features, np_labels, use_gpu=False):
     np_loss, np_backprop = self._npXent(np_features, np_labels)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
           np_features, np_labels)
       tf_loss, tf_backprop = sess.run([loss, backprop])
@@ -62,7 +62,7 @@ class XentTest(test.TestCase):
 
   def _testXentWrapper(self, np_features, np_labels, dim=-1, use_gpu=False):
     np_loss, _ = self._npXent(np_features, np_labels, dim=dim)
-    with self.test_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu) as sess:
       loss = nn_ops.softmax_cross_entropy_with_logits(
           labels=np_labels, logits=np_features, dim=dim)
       tf_loss = sess.run(loss)
@@ -76,7 +76,7 @@ class XentTest(test.TestCase):
 
   def _testSingleClass(self, use_gpu=False):
     for dtype in np.float16, np.float32:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
             np.array([[1.], [-1.], [0.]]).astype(dtype),
             np.array([[-1.], [0.], [1.]]).astype(dtype))
@@ -145,7 +145,7 @@ class XentTest(test.TestCase):
     tf_l = constant_op.constant(
         np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float32))
     for use_gpu in [False, True]:
-      with self.test_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu) as sess:
         loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
             tf_f, tf_l)
         tf_loss, tf_backprop = sess.run([loss, backprop])
@@ -277,7 +277,7 @@ class XentTest(test.TestCase):
     features = np.zeros([0, 2, 4]).astype(np.float32)
     labels = np.zeros([0, 2, 4]).astype(np.float32)
     np_loss, _ = self._npXent(features, labels)
-    with self.test_session(use_gpu=True) as sess:
+    with self.session(use_gpu=True) as sess:
       loss = nn_ops.softmax_cross_entropy_with_logits(
           labels=labels, logits=features)
       tf_loss = sess.run(loss)
diff --git a/tensorflow/python/kernel_tests/zero_division_test.py b/tensorflow/python/kernel_tests/zero_division_test.py
index dd0214e0f1..e68b96e670 100644
--- a/tensorflow/python/kernel_tests/zero_division_test.py
+++ b/tensorflow/python/kernel_tests/zero_division_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import test
 class ZeroDivisionTest(test.TestCase):
 
   def testZeros(self):
-    with self.test_session(use_gpu=True):
+    with self.session(use_gpu=True):
       for dtype in dtypes.uint8, dtypes.int16, dtypes.int32, dtypes.int64:
         zero = constant_op.constant(0, dtype=dtype)
         one = constant_op.constant(1, dtype=dtype)
-- 
GitLab


From 6f7cbd60a2a5ecadd9f96ed267d758834adda159 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 16 Oct 2018 18:11:46 -0700
Subject: [PATCH 1075/1085] Makes a number of Keras layer tests run in both
 graph and eager modes.

PiperOrigin-RevId: 217422423
---
 .../keras/layers/advanced_activations_test.py |  64 ++-
 .../python/keras/layers/embeddings_test.py    |  16 +-
 tensorflow/python/keras/layers/lstm_test.py   | 423 +++++++++---------
 tensorflow/python/keras/layers/merge_test.py  |  72 ++-
 tensorflow/python/keras/layers/noise_test.py  |  20 +-
 .../python/keras/layers/normalization_test.py | 196 ++++----
 .../python/keras/layers/serialization_test.py |   2 +
 .../python/keras/layers/simplernn_test.py     |  92 ++--
 8 files changed, 437 insertions(+), 448 deletions(-)

diff --git a/tensorflow/python/keras/layers/advanced_activations_test.py b/tensorflow/python/keras/layers/advanced_activations_test.py
index c41087be0a..4aadf535e0 100644
--- a/tensorflow/python/keras/layers/advanced_activations_test.py
+++ b/tensorflow/python/keras/layers/advanced_activations_test.py
@@ -19,55 +19,52 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python import keras
+from tensorflow.python.eager import context
+from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class AdvancedActivationsTest(test.TestCase):
 
   def test_leaky_relu(self):
-    with self.cached_session():
-      for alpha in [0., .5, -1.]:
-        testing_utils.layer_test(keras.layers.LeakyReLU,
-                                 kwargs={'alpha': alpha},
-                                 input_shape=(2, 3, 4))
+    for alpha in [0., .5, -1.]:
+      testing_utils.layer_test(keras.layers.LeakyReLU,
+                               kwargs={'alpha': alpha},
+                               input_shape=(2, 3, 4))
 
   def test_prelu(self):
-    with self.cached_session():
-      testing_utils.layer_test(keras.layers.PReLU, kwargs={},
-                               input_shape=(2, 3, 4))
+    testing_utils.layer_test(keras.layers.PReLU, kwargs={},
+                             input_shape=(2, 3, 4))
 
   def test_prelu_share(self):
-    with self.cached_session():
-      testing_utils.layer_test(keras.layers.PReLU,
-                               kwargs={'shared_axes': 1},
-                               input_shape=(2, 3, 4))
+    testing_utils.layer_test(keras.layers.PReLU,
+                             kwargs={'shared_axes': 1},
+                             input_shape=(2, 3, 4))
 
   def test_elu(self):
-    with self.cached_session():
-      for alpha in [0., .5, -1.]:
-        testing_utils.layer_test(keras.layers.ELU,
-                                 kwargs={'alpha': alpha},
-                                 input_shape=(2, 3, 4))
+    for alpha in [0., .5, -1.]:
+      testing_utils.layer_test(keras.layers.ELU,
+                               kwargs={'alpha': alpha},
+                               input_shape=(2, 3, 4))
 
   def test_thresholded_relu(self):
-    with self.cached_session():
-      testing_utils.layer_test(keras.layers.ThresholdedReLU,
-                               kwargs={'theta': 0.5},
-                               input_shape=(2, 3, 4))
+    testing_utils.layer_test(keras.layers.ThresholdedReLU,
+                             kwargs={'theta': 0.5},
+                             input_shape=(2, 3, 4))
 
   def test_softmax(self):
-    with self.cached_session():
-      testing_utils.layer_test(keras.layers.Softmax,
-                               kwargs={'axis': 1},
-                               input_shape=(2, 3, 4))
+    testing_utils.layer_test(keras.layers.Softmax,
+                             kwargs={'axis': 1},
+                             input_shape=(2, 3, 4))
 
   def test_relu(self):
-    with self.cached_session():
-      testing_utils.layer_test(keras.layers.ReLU,
-                               kwargs={'max_value': 10},
-                               input_shape=(2, 3, 4))
-      x = keras.backend.ones((3, 4))
+    testing_utils.layer_test(keras.layers.ReLU,
+                             kwargs={'max_value': 10},
+                             input_shape=(2, 3, 4))
+    x = keras.backend.ones((3, 4))
+    if not context.executing_eagerly():
       # Test that we use `leaky_relu` when appropriate in graph mode.
       self.assertTrue(
           'LeakyRelu' in keras.layers.ReLU(negative_slope=0.2)(x).name)
@@ -79,10 +76,9 @@ class AdvancedActivationsTest(test.TestCase):
   def test_relu_with_invalid_arg(self):
     with self.assertRaisesRegexp(
         ValueError, 'max_value of Relu layer cannot be negative value: -10'):
-      with self.cached_session():
-        testing_utils.layer_test(keras.layers.ReLU,
-                                 kwargs={'max_value': -10},
-                                 input_shape=(2, 3, 4))
+      testing_utils.layer_test(keras.layers.ReLU,
+                               kwargs={'max_value': -10},
+                               input_shape=(2, 3, 4))
     with self.assertRaisesRegexp(
         ValueError,
         'negative_slope of Relu layer cannot be negative value: -2'):
diff --git a/tensorflow/python/keras/layers/embeddings_test.py b/tensorflow/python/keras/layers/embeddings_test.py
index 2e42e403aa..aaa17b7e96 100644
--- a/tensorflow/python/keras/layers/embeddings_test.py
+++ b/tensorflow/python/keras/layers/embeddings_test.py
@@ -69,16 +69,16 @@ class EmbeddingTest(test.TestCase):
         input_dtype='int32',
         expected_output_dtype='float32')
 
+  @tf_test_util.run_in_graph_and_eager_modes()
   def test_embedding_correctness(self):
-    with self.cached_session():
-      layer = keras.layers.Embedding(output_dim=2, input_dim=2)
-      layer.build((None, 2))
-      matrix = np.array([[1, 1], [2, 2]])
-      layer.set_weights([matrix])
+    layer = keras.layers.Embedding(output_dim=2, input_dim=2)
+    layer.build((None, 2))
+    matrix = np.array([[1, 1], [2, 2]])
+    layer.set_weights([matrix])
 
-      inputs = keras.backend.constant([[0, 1, 0]], dtype='int32')
-      outputs = keras.backend.eval(layer(inputs))
-      self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])
+    inputs = keras.backend.constant([[0, 1, 0]], dtype='int32')
+    outputs = keras.backend.eval(layer(inputs))
+    self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])
 
   @tf_test_util.run_in_graph_and_eager_modes()
   def test_eager_gpu_cpu(self):
diff --git a/tensorflow/python/keras/layers/lstm_test.py b/tensorflow/python/keras/layers/lstm_test.py
index f536915324..e0094d99f4 100644
--- a/tensorflow/python/keras/layers/lstm_test.py
+++ b/tensorflow/python/keras/layers/lstm_test.py
@@ -24,12 +24,14 @@ from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import adam
+from tensorflow.python.training import gradient_descent
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class LSTMLayerTest(test.TestCase):
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_return_sequences_LSTM(self):
     num_samples = 2
     timesteps = 3
@@ -56,7 +58,6 @@ class LSTMLayerTest(test.TestCase):
     outputs = model.layers[-1].output
     self.assertEquals(outputs.get_shape().as_list(), [None, timesteps, units])
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dynamic_behavior_LSTM(self):
     num_samples = 2
     timesteps = 3
@@ -70,7 +71,6 @@ class LSTMLayerTest(test.TestCase):
     y = np.random.random((num_samples, units))
     model.train_on_batch(x, y)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dropout_LSTM(self):
     num_samples = 2
     timesteps = 3
@@ -83,7 +83,6 @@ class LSTMLayerTest(test.TestCase):
                 'recurrent_dropout': 0.1},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_implementation_mode_LSTM(self):
     num_samples = 2
     timesteps = 3
@@ -96,120 +95,36 @@ class LSTMLayerTest(test.TestCase):
                   'implementation': mode},
           input_shape=(num_samples, timesteps, embedding_dim))
 
-  def test_statefulness_LSTM(self):
-    num_samples = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 2
-    layer_class = keras.layers.LSTM
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Embedding(
-              4,
-              embedding_dim,
-              mask_zero=True,
-              input_length=timesteps,
-              batch_input_shape=(num_samples, timesteps)))
-      layer = layer_class(
-          units, return_sequences=False, stateful=True, weights=None)
-      model.add(layer)
-      model.compile(optimizer='sgd', loss='mse')
-      out1 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertEqual(out1.shape, (num_samples, units))
-
-      # train once so that the states change
-      model.train_on_batch(
-          np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
-      out2 = model.predict(np.ones((num_samples, timesteps)))
-
-      # if the state is not reset, output should be different
-      self.assertNotEqual(out1.max(), out2.max())
-
-      # check that output changes after states are reset
-      # (even though the model itself didn't change)
-      layer.reset_states()
-      out3 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out2.max(), out3.max())
-
-      # check that container-level reset_states() works
-      model.reset_states()
-      out4 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertAllClose(out3, out4, atol=1e-5)
-
-      # check that the call to `predict` updated the states
-      out5 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out4.max(), out5.max())
-
-      # Check masking
-      layer.reset_states()
-
-      left_padded_input = np.ones((num_samples, timesteps))
-      left_padded_input[0, :1] = 0
-      left_padded_input[1, :2] = 0
-      out6 = model.predict(left_padded_input)
-
-      layer.reset_states()
-
-      right_padded_input = np.ones((num_samples, timesteps))
-      right_padded_input[0, -1:] = 0
-      right_padded_input[1, -2:] = 0
-      out7 = model.predict(right_padded_input)
-
-      self.assertAllClose(out7, out6, atol=1e-5)
-
-  def test_regularizers_LSTM(self):
-    embedding_dim = 4
-    layer_class = keras.layers.LSTM
-    with self.cached_session():
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          recurrent_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l2',
-          activity_regularizer='l1')
-      layer.build((None, None, 2))
-      self.assertEqual(len(layer.losses), 3)
-      x = keras.backend.variable(np.ones((2, 3, 2)))
-      layer(x)
-      self.assertEqual(len(layer.get_losses_for(x)), 1)
-
   def test_constraints_LSTM(self):
     embedding_dim = 4
     layer_class = keras.layers.LSTM
-    with self.cached_session():
-      k_constraint = keras.constraints.max_norm(0.01)
-      r_constraint = keras.constraints.max_norm(0.01)
-      b_constraint = keras.constraints.max_norm(0.01)
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_constraint=k_constraint,
-          recurrent_constraint=r_constraint,
-          bias_constraint=b_constraint)
-      layer.build((None, None, embedding_dim))
-      self.assertEqual(layer.cell.kernel.constraint, k_constraint)
-      self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
-      self.assertEqual(layer.cell.bias.constraint, b_constraint)
-
-  @tf_test_util.run_in_graph_and_eager_modes
+    k_constraint = keras.constraints.max_norm(0.01)
+    r_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_constraint=k_constraint,
+        recurrent_constraint=r_constraint,
+        bias_constraint=b_constraint)
+    layer.build((None, None, embedding_dim))
+    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
+    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
+    self.assertEqual(layer.cell.bias.constraint, b_constraint)
+
   def test_with_masking_layer_LSTM(self):
     layer_class = keras.layers.LSTM
-    with self.cached_session():
-      inputs = np.random.random((2, 3, 4))
-      targets = np.abs(np.random.random((2, 3, 5)))
-      targets /= targets.sum(axis=-1, keepdims=True)
-      model = keras.models.Sequential()
-      model.add(keras.layers.Masking(input_shape=(3, 4)))
-      model.add(layer_class(units=5, return_sequences=True, unroll=False))
-      model.compile(loss='categorical_crossentropy',
-                    optimizer=RMSPropOptimizer(0.01))
-      model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(layer_class(units=5, return_sequences=True, unroll=False))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSPropOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
   def test_from_config_LSTM(self):
     layer_class = keras.layers.LSTM
@@ -225,25 +140,25 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      # Test with Keras tensor
-      inputs = keras.Input((timesteps, embedding_dim))
-      initial_state = [keras.Input((units,)) for _ in range(num_states)]
-      layer = keras.layers.LSTM(units)
-      if len(initial_state) == 1:
-        output = layer(inputs, initial_state=initial_state[0])
-      else:
-        output = layer(inputs, initial_state=initial_state)
-      assert initial_state[0] in layer._inbound_nodes[0].input_tensors
-
-      model = keras.models.Model([inputs] + initial_state, output)
-      model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-      inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      initial_state = [np.random.random((num_samples, units))
-                       for _ in range(num_states)]
-      targets = np.random.random((num_samples, units))
-      model.train_on_batch([inputs] + initial_state, targets)
+    # Test with Keras tensor
+    inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    layer = keras.layers.LSTM(units)
+    if len(initial_state) == 1:
+      output = layer(inputs, initial_state=initial_state[0])
+    else:
+      output = layer(inputs, initial_state=initial_state)
+    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
+
+    model = keras.models.Model([inputs] + initial_state, output)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam.AdamOptimizer())
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [np.random.random((num_samples, units))
+                     for _ in range(num_states)]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([inputs] + initial_state, targets)
 
   def test_specify_initial_state_non_keras_tensor(self):
     num_states = 2
@@ -252,21 +167,21 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      # Test with non-Keras tensor
-      inputs = keras.Input((timesteps, embedding_dim))
-      initial_state = [keras.backend.random_normal_variable(
-          (num_samples, units), 0, 1)
-                       for _ in range(num_states)]
-      layer = keras.layers.LSTM(units)
-      output = layer(inputs, initial_state=initial_state)
+    # Test with non-Keras tensor
+    inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [keras.backend.random_normal_variable(
+        (num_samples, units), 0, 1)
+                     for _ in range(num_states)]
+    layer = keras.layers.LSTM(units)
+    output = layer(inputs, initial_state=initial_state)
 
-      model = keras.models.Model(inputs, output)
-      model.compile(loss='categorical_crossentropy', optimizer='adam')
+    model = keras.models.Model(inputs, output)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam.AdamOptimizer())
 
-      inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      targets = np.random.random((num_samples, units))
-      model.train_on_batch(inputs, targets)
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch(inputs, targets)
 
   def test_reset_states_with_values(self):
     num_states = 2
@@ -275,29 +190,28 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      layer = keras.layers.LSTM(units, stateful=True)
-      layer.build((num_samples, timesteps, embedding_dim))
-      layer.reset_states()
-      assert len(layer.states) == num_states
-      assert layer.states[0] is not None
-      self.assertAllClose(
-          keras.backend.eval(layer.states[0]),
-          np.zeros(keras.backend.int_shape(layer.states[0])),
-          atol=1e-4)
-      state_shapes = [keras.backend.int_shape(state) for state in layer.states]
-      values = [np.ones(shape) for shape in state_shapes]
-      if len(values) == 1:
-        values = values[0]
-      layer.reset_states(values)
-      self.assertAllClose(
-          keras.backend.eval(layer.states[0]),
-          np.ones(keras.backend.int_shape(layer.states[0])),
-          atol=1e-4)
-
-      # Test with invalid data
-      with self.assertRaises(ValueError):
-        layer.reset_states([1] * (len(layer.states) + 1))
+    layer = keras.layers.LSTM(units, stateful=True)
+    layer.build((num_samples, timesteps, embedding_dim))
+    layer.reset_states()
+    assert len(layer.states) == num_states
+    assert layer.states[0] is not None
+    self.assertAllClose(
+        keras.backend.eval(layer.states[0]),
+        np.zeros(keras.backend.int_shape(layer.states[0])),
+        atol=1e-4)
+    state_shapes = [keras.backend.int_shape(state) for state in layer.states]
+    values = [np.ones(shape) for shape in state_shapes]
+    if len(values) == 1:
+      values = values[0]
+    layer.reset_states(values)
+    self.assertAllClose(
+        keras.backend.eval(layer.states[0]),
+        np.ones(keras.backend.int_shape(layer.states[0])),
+        atol=1e-4)
+
+    # Test with invalid data
+    with self.assertRaises(ValueError):
+      layer.reset_states([1] * (len(layer.states) + 1))
 
   def test_specify_state_with_masking(self):
     num_states = 2
@@ -306,21 +220,20 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      inputs = keras.Input((timesteps, embedding_dim))
-      _ = keras.layers.Masking()(inputs)
-      initial_state = [keras.Input((units,)) for _ in range(num_states)]
-      output = keras.layers.LSTM(units)(inputs, initial_state=initial_state)
+    inputs = keras.Input((timesteps, embedding_dim))
+    _ = keras.layers.Masking()(inputs)
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    output = keras.layers.LSTM(units)(inputs, initial_state=initial_state)
 
-      model = keras.models.Model([inputs] + initial_state, output)
-      model.compile(loss='categorical_crossentropy',
-                    optimizer=RMSPropOptimizer(0.01))
+    model = keras.models.Model([inputs] + initial_state, output)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSPropOptimizer(0.01))
 
-      inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      initial_state = [np.random.random((num_samples, units))
-                       for _ in range(num_states)]
-      targets = np.random.random((num_samples, units))
-      model.train_on_batch([inputs] + initial_state, targets)
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [np.random.random((num_samples, units))
+                     for _ in range(num_states)]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([inputs] + initial_state, targets)
 
   def test_return_state(self):
     num_states = 2
@@ -329,17 +242,16 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-      layer = keras.layers.LSTM(units, return_state=True, stateful=True)
-      outputs = layer(inputs)
-      state = outputs[1:]
-      assert len(state) == num_states
-      model = keras.models.Model(inputs, state[0])
+    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    layer = keras.layers.LSTM(units, return_state=True, stateful=True)
+    outputs = layer(inputs)
+    state = outputs[1:]
+    assert len(state) == num_states
+    model = keras.models.Model(inputs, state[0])
 
-      inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      state = model.predict(inputs)
-      self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4)
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    state = model.predict(inputs)
+    self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4)
 
   def test_state_reuse(self):
     timesteps = 3
@@ -347,16 +259,15 @@ class LSTMLayerTest(test.TestCase):
     units = 3
     num_samples = 2
 
-    with self.cached_session():
-      inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-      layer = keras.layers.LSTM(units, return_state=True, return_sequences=True)
-      outputs = layer(inputs)
-      output, state = outputs[0], outputs[1:]
-      output = keras.layers.LSTM(units)(output, initial_state=state)
-      model = keras.models.Model(inputs, output)
+    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    layer = keras.layers.LSTM(units, return_state=True, return_sequences=True)
+    outputs = layer(inputs)
+    output, state = outputs[0], outputs[1:]
+    output = keras.layers.LSTM(units)(output, initial_state=state)
+    model = keras.models.Model(inputs, output)
 
-      inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      outputs = model.predict(inputs)
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    outputs = model.predict(inputs)
 
   def test_initial_states_as_other_inputs(self):
     timesteps = 3
@@ -366,25 +277,109 @@ class LSTMLayerTest(test.TestCase):
     num_states = 2
     layer_class = keras.layers.LSTM
 
+    # Test with Keras tensor
+    main_inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    inputs = [main_inputs] + initial_state
+
+    layer = layer_class(units)
+    output = layer(inputs)
+    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
+
+    model = keras.models.Model(inputs, output)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam.AdamOptimizer())
+
+    main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [np.random.random((num_samples, units))
+                     for _ in range(num_states)]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([main_inputs] + initial_state, targets)
+
+
+class LSTMLayerGraphOnlyTest(test.TestCase):
+
+  def test_statefulness_LSTM(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    layer_class = keras.layers.LSTM
     with self.cached_session():
-      # Test with Keras tensor
-      main_inputs = keras.Input((timesteps, embedding_dim))
-      initial_state = [keras.Input((units,)) for _ in range(num_states)]
-      inputs = [main_inputs] + initial_state
+      model = keras.models.Sequential()
+      model.add(
+          keras.layers.Embedding(
+              4,
+              embedding_dim,
+              mask_zero=True,
+              input_length=timesteps,
+              batch_input_shape=(num_samples, timesteps)))
+      layer = layer_class(
+          units, return_sequences=False, stateful=True, weights=None)
+      model.add(layer)
+      model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                    loss='mse')
+      out1 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertEqual(out1.shape, (num_samples, units))
+
+      # train once so that the states change
+      model.train_on_batch(
+          np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
+      out2 = model.predict(np.ones((num_samples, timesteps)))
+
+      # if the state is not reset, output should be different
+      self.assertNotEqual(out1.max(), out2.max())
+
+      # check that output changes after states are reset
+      # (even though the model itself didn't change)
+      layer.reset_states()
+      out3 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertNotEqual(out2.max(), out3.max())
 
-      layer = layer_class(units)
-      output = layer(inputs)
-      assert initial_state[0] in layer._inbound_nodes[0].input_tensors
+      # check that container-level reset_states() works
+      model.reset_states()
+      out4 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertAllClose(out3, out4, atol=1e-5)
 
-      model = keras.models.Model(inputs, output)
-      model.compile(loss='categorical_crossentropy', optimizer='adam')
+      # check that the call to `predict` updated the states
+      out5 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertNotEqual(out4.max(), out5.max())
 
-      main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
-      initial_state = [np.random.random((num_samples, units))
-                       for _ in range(num_states)]
-      targets = np.random.random((num_samples, units))
-      model.train_on_batch([main_inputs] + initial_state, targets)
+      # Check masking
+      layer.reset_states()
+
+      left_padded_input = np.ones((num_samples, timesteps))
+      left_padded_input[0, :1] = 0
+      left_padded_input[1, :2] = 0
+      out6 = model.predict(left_padded_input)
 
+      layer.reset_states()
+
+      right_padded_input = np.ones((num_samples, timesteps))
+      right_padded_input[0, -1:] = 0
+      right_padded_input[1, -2:] = 0
+      out7 = model.predict(right_padded_input)
+
+      self.assertAllClose(out7, out6, atol=1e-5)
+
+  def test_regularizers_LSTM(self):
+    embedding_dim = 4
+    layer_class = keras.layers.LSTM
+    with self.cached_session():
+      layer = layer_class(
+          5,
+          return_sequences=False,
+          weights=None,
+          input_shape=(None, embedding_dim),
+          kernel_regularizer=keras.regularizers.l1(0.01),
+          recurrent_regularizer=keras.regularizers.l1(0.01),
+          bias_regularizer='l2',
+          activity_regularizer='l1')
+      layer.build((None, None, 2))
+      self.assertEqual(len(layer.losses), 3)
+      x = keras.backend.variable(np.ones((2, 3, 2)))
+      layer(x)
+      self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/merge_test.py b/tensorflow/python/keras/layers/merge_test.py
index 7bcfcaeddb..698c5662b6 100644
--- a/tensorflow/python/keras/layers/merge_test.py
+++ b/tensorflow/python/keras/layers/merge_test.py
@@ -26,9 +26,9 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class MergeLayersTest(test.TestCase):
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_add(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -45,25 +45,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, x1 + x2 + x3, atol=1e-4)
 
-  def test_merge_add_masking(self):
-    with self.cached_session():
-      i1 = keras.layers.Input(shape=(4, 5))
-      i2 = keras.layers.Input(shape=(4, 5))
-      m1 = keras.layers.Masking()(i1)
-      layer = keras.layers.Add()
-      o = layer([m1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
-      mask = layer.output_mask
-      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
-
-  def test_merge_add_dynamic_shape(self):
-    with self.cached_session():
-      i1 = array_ops.placeholder(shape=(4, None), dtype='float32')
-      i2 = array_ops.placeholder(shape=(4, 5), dtype='float32')
-      layer = keras.layers.Add()
-      o = layer([i1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [4, 5])
-
   def test_merge_elementwise_errors(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 6))
@@ -76,7 +57,6 @@ class MergeLayersTest(test.TestCase):
     with self.assertRaises(ValueError):
       keras.layers.add([i1])
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_multiply(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -92,7 +72,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, x1 * x2 * x3, atol=1e-4)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_average(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -106,7 +85,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_maximum(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -120,7 +98,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_minimum(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -134,7 +111,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_concatenate(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -148,17 +124,6 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 8, 5))
     self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4)
 
-  def test_merge_concatenate_masking(self):
-    with self.cached_session():
-      i1 = keras.layers.Input(shape=(4, 5))
-      i2 = keras.layers.Input(shape=(4, 5))
-      m1 = keras.layers.Masking()(i1)
-      layer = keras.layers.Concatenate()
-      o = layer([m1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [None, 4, 10])
-      mask = layer.output_mask
-      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
-
   def test_concatenate_errors(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(3, 5))
@@ -169,7 +134,6 @@ class MergeLayersTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'called on a list'):
       keras.layers.concatenate([i1], axis=-1)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_dot(self):
     i1 = keras.layers.Input(shape=(4,))
     i2 = keras.layers.Input(shape=(4,))
@@ -215,7 +179,6 @@ class MergeLayersTest(test.TestCase):
       dot = keras.layers.Dot(1)
       dot.compute_output_shape(1)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_merge_subtract(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
@@ -231,5 +194,38 @@ class MergeLayersTest(test.TestCase):
       keras.layers.subtract([i1, i1, i1])
 
 
+class MergeLayersGraphOnlyTest(test.TestCase):
+
+  def test_merge_add_masking(self):
+    with self.cached_session():
+      i1 = keras.layers.Input(shape=(4, 5))
+      i2 = keras.layers.Input(shape=(4, 5))
+      m1 = keras.layers.Masking()(i1)
+      layer = keras.layers.Add()
+      o = layer([m1, i2])
+      self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+      mask = layer.output_mask
+      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
+
+  def test_merge_add_dynamic_shape(self):
+    with self.cached_session():
+      i1 = array_ops.placeholder(shape=(4, None), dtype='float32')
+      i2 = array_ops.placeholder(shape=(4, 5), dtype='float32')
+      layer = keras.layers.Add()
+      o = layer([i1, i2])
+      self.assertListEqual(o.get_shape().as_list(), [4, 5])
+
+  def test_merge_concatenate_masking(self):
+    with self.cached_session():
+      i1 = keras.layers.Input(shape=(4, 5))
+      i2 = keras.layers.Input(shape=(4, 5))
+      m1 = keras.layers.Masking()(i1)
+      layer = keras.layers.Concatenate()
+      o = layer([m1, i2])
+      self.assertListEqual(o.get_shape().as_list(), [None, 4, 10])
+      mask = layer.output_mask
+      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/noise_test.py b/tensorflow/python/keras/layers/noise_test.py
index cea304680b..325dd933b2 100644
--- a/tensorflow/python/keras/layers/noise_test.py
+++ b/tensorflow/python/keras/layers/noise_test.py
@@ -24,23 +24,21 @@ from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class NoiseLayersTest(test.TestCase):
 
   def test_GaussianNoise(self):
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.GaussianNoise,
-          kwargs={'stddev': 1.},
-          input_shape=(3, 2, 3))
+    testing_utils.layer_test(
+        keras.layers.GaussianNoise,
+        kwargs={'stddev': 1.},
+        input_shape=(3, 2, 3))
 
   def test_GaussianDropout(self):
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.GaussianDropout,
-          kwargs={'rate': 0.5},
-          input_shape=(3, 2, 3))
+    testing_utils.layer_test(
+        keras.layers.GaussianDropout,
+        kwargs={'rate': 0.5},
+        input_shape=(3, 2, 3))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_AlphaDropout(self):
     testing_utils.layer_test(
         keras.layers.AlphaDropout,
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index ff705183ef..b11a350dbf 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -21,97 +21,97 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class NormalizationLayersTest(test.TestCase):
 
   def test_basic_batchnorm(self):
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.BatchNormalization,
-          kwargs={
-              'momentum': 0.9,
-              'epsilon': 0.1,
-              'gamma_regularizer': keras.regularizers.l2(0.01),
-              'beta_regularizer': keras.regularizers.l2(0.01)
-          },
-          input_shape=(3, 4, 2))
-      testing_utils.layer_test(
-          keras.layers.BatchNormalization,
-          kwargs={
-              'gamma_initializer': 'ones',
-              'beta_initializer': 'ones',
-              'moving_mean_initializer': 'zeros',
-              'moving_variance_initializer': 'ones'
-          },
-          input_shape=(3, 4, 2))
-      testing_utils.layer_test(
-          keras.layers.BatchNormalization,
-          kwargs={'scale': False,
-                  'center': False},
-          input_shape=(3, 3))
+    testing_utils.layer_test(
+        keras.layers.BatchNormalization,
+        kwargs={
+            'momentum': 0.9,
+            'epsilon': 0.1,
+            'gamma_regularizer': keras.regularizers.l2(0.01),
+            'beta_regularizer': keras.regularizers.l2(0.01)
+        },
+        input_shape=(3, 4, 2))
+    testing_utils.layer_test(
+        keras.layers.BatchNormalization,
+        kwargs={
+            'gamma_initializer': 'ones',
+            'beta_initializer': 'ones',
+            'moving_mean_initializer': 'zeros',
+            'moving_variance_initializer': 'ones'
+        },
+        input_shape=(3, 4, 2))
+    testing_utils.layer_test(
+        keras.layers.BatchNormalization,
+        kwargs={'scale': False,
+                'center': False},
+        input_shape=(3, 3))
 
   def test_batchnorm_weights(self):
-    with self.cached_session():
-      layer = keras.layers.BatchNormalization(scale=False, center=False)
-      layer.build((None, 3, 4))
-      self.assertEqual(len(layer.trainable_weights), 0)
-      self.assertEqual(len(layer.weights), 2)
+    layer = keras.layers.BatchNormalization(scale=False, center=False)
+    layer.build((None, 3, 4))
+    self.assertEqual(len(layer.trainable_weights), 0)
+    self.assertEqual(len(layer.weights), 2)
 
-      layer = keras.layers.BatchNormalization()
-      layer.build((None, 3, 4))
-      self.assertEqual(len(layer.trainable_weights), 2)
-      self.assertEqual(len(layer.weights), 4)
+    layer = keras.layers.BatchNormalization()
+    layer.build((None, 3, 4))
+    self.assertEqual(len(layer.trainable_weights), 2)
+    self.assertEqual(len(layer.weights), 4)
 
   def test_batchnorm_regularization(self):
-    with self.cached_session():
-      layer = keras.layers.BatchNormalization(
-          gamma_regularizer='l1', beta_regularizer='l1')
-      layer.build((None, 3, 4))
-      self.assertEqual(len(layer.losses), 2)
-      max_norm = keras.constraints.max_norm
-      layer = keras.layers.BatchNormalization(
-          gamma_constraint=max_norm, beta_constraint=max_norm)
-      layer.build((None, 3, 4))
-      self.assertEqual(layer.gamma.constraint, max_norm)
-      self.assertEqual(layer.beta.constraint, max_norm)
+    layer = keras.layers.BatchNormalization(
+        gamma_regularizer='l1', beta_regularizer='l1')
+    layer.build((None, 3, 4))
+    self.assertEqual(len(layer.losses), 2)
+    max_norm = keras.constraints.max_norm
+    layer = keras.layers.BatchNormalization(
+        gamma_constraint=max_norm, beta_constraint=max_norm)
+    layer.build((None, 3, 4))
+    self.assertEqual(layer.gamma.constraint, max_norm)
+    self.assertEqual(layer.beta.constraint, max_norm)
 
   def test_batchnorm_correctness(self):
-    with self.cached_session():
-      model = keras.models.Sequential()
-      norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
-      model.add(norm)
-      model.compile(loss='mse', optimizer='sgd')
-
-      # centered on 5.0, variance 10.0
-      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
-      model.fit(x, x, epochs=4, verbose=0)
-      out = model.predict(x)
-      out -= keras.backend.eval(norm.beta)
-      out /= keras.backend.eval(norm.gamma)
-
-      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
-      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+    model = keras.models.Sequential()
+    norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
+    model.add(norm)
+    model.compile(loss='mse',
+                  optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    # centered on 5.0, variance 10.0
+    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
+    model.fit(x, x, epochs=4, verbose=0)
+    out = model.predict(x)
+    out -= keras.backend.eval(norm.beta)
+    out /= keras.backend.eval(norm.gamma)
+
+    np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
+    np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
   def test_batchnorm_mixed_precision(self):
-    with self.cached_session():
-      model = keras.models.Sequential()
-      norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
-      model.add(norm)
-      model.compile(loss='mse', optimizer='sgd')
-
-      # centered on 5.0, variance 10.0
-      x = np.random.normal(
-          loc=5.0, scale=10.0, size=(1000, 10)).astype(np.float16)
-      model.fit(x, x, epochs=4, verbose=0)
-      out = model.predict(x)
-      out -= keras.backend.eval(norm.beta)
-      out /= keras.backend.eval(norm.gamma)
-
-      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
-      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+    model = keras.models.Sequential()
+    norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
+    model.add(norm)
+    model.compile(loss='mse',
+                  optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    # centered on 5.0, variance 10.0
+    x = np.random.normal(
+        loc=5.0, scale=10.0, size=(1000, 10)).astype(np.float16)
+    model.fit(x, x, epochs=4, verbose=0)
+    out = model.predict(x)
+    out -= keras.backend.eval(norm.beta)
+    out /= keras.backend.eval(norm.gamma)
+
+    np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
+    np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
   def test_batchnorm_convnet(self):
     if test.is_gpu_available(cuda_only=True):
@@ -120,7 +120,8 @@ class NormalizationLayersTest(test.TestCase):
         norm = keras.layers.BatchNormalization(
             axis=1, input_shape=(3, 4, 4), momentum=0.8)
         model.add(norm)
-        model.compile(loss='mse', optimizer='sgd')
+        model.compile(loss='mse',
+                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
 
         # centered on 5.0, variance 10.0
         x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
@@ -133,24 +134,27 @@ class NormalizationLayersTest(test.TestCase):
         np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
 
   def test_batchnorm_convnet_channel_last(self):
-    with self.cached_session():
-      # keras.backend.set_learning_phase(True)
+    # keras.backend.set_learning_phase(True)
+
+    model = keras.models.Sequential()
+    norm = keras.layers.BatchNormalization(
+        axis=-1, input_shape=(4, 4, 3), momentum=0.8)
+    model.add(norm)
+    model.compile(loss='mse',
+                  optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    # centered on 5.0, variance 10.0
+    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3))
+    model.fit(x, x, epochs=4, verbose=0)
+    out = model.predict(x)
+    out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3))
+    out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3))
 
-      model = keras.models.Sequential()
-      norm = keras.layers.BatchNormalization(
-          axis=-1, input_shape=(4, 4, 3), momentum=0.8)
-      model.add(norm)
-      model.compile(loss='mse', optimizer='sgd')
+    np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1)
+    np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1)
 
-      # centered on 5.0, variance 10.0
-      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3))
-      model.fit(x, x, epochs=4, verbose=0)
-      out = model.predict(x)
-      out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3))
-      out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3))
 
-      np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1)
-      np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1)
+class NormalizationLayersGraphModeOnlyTest(test.TestCase):
 
   def test_shared_batchnorm(self):
     """Test that a BN layer can be shared across different data streams.
@@ -167,7 +171,7 @@ class NormalizationLayersTest(test.TestCase):
       x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10))
       model = keras.models.Model(x2, y2)
 
-      model.compile('sgd', 'mse')
+      model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
       model.train_on_batch(x, x)
 
       self.assertEqual(len(bn.updates), 4)
@@ -183,7 +187,7 @@ class NormalizationLayersTest(test.TestCase):
       self.assertEqual(len(new_model.updates), 2)
       self.assertEqual(len(model.updates), 4)
       self.assertEqual(len(new_model.get_updates_for(x3)), 2)
-      new_model.compile('sgd', 'mse')
+      new_model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
       new_model.train_on_batch(x, x)
 
   def test_that_trainable_disables_updates(self):
@@ -199,7 +203,7 @@ class NormalizationLayersTest(test.TestCase):
       model.trainable = False
       assert not model.updates
 
-      model.compile('sgd', 'mse')
+      model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
       assert not model.updates
 
       x1 = model.predict(val_a)
@@ -208,7 +212,7 @@ class NormalizationLayersTest(test.TestCase):
       self.assertAllClose(x1, x2, atol=1e-7)
 
       model.trainable = True
-      model.compile('sgd', 'mse')
+      model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
       assert model.updates
 
       model.train_on_batch(val_a, val_out)
@@ -216,7 +220,7 @@ class NormalizationLayersTest(test.TestCase):
       assert np.abs(np.sum(x1 - x2)) > 1e-5
 
       layer.trainable = False
-      model.compile('sgd', 'mse')
+      model.compile(gradient_descent.GradientDescentOptimizer(0.01), 'mse')
       assert not model.updates
 
       x1 = model.predict(val_a)
diff --git a/tensorflow/python/keras/layers/serialization_test.py b/tensorflow/python/keras/layers/serialization_test.py
index 5872185ef7..548c3ec1ac 100644
--- a/tensorflow/python/keras/layers/serialization_test.py
+++ b/tensorflow/python/keras/layers/serialization_test.py
@@ -19,9 +19,11 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python import keras
+from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.platform import test
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class LayerSerializationTest(test.TestCase):
 
   def test_serialize_deserialize(self):
diff --git a/tensorflow/python/keras/layers/simplernn_test.py b/tensorflow/python/keras/layers/simplernn_test.py
index 2f2295a793..93456b5e3a 100644
--- a/tensorflow/python/keras/layers/simplernn_test.py
+++ b/tensorflow/python/keras/layers/simplernn_test.py
@@ -24,12 +24,13 @@ from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
+@tf_test_util.run_all_in_graph_and_eager_modes
 class SimpleRNNLayerTest(test.TestCase):
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_return_sequences_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
@@ -41,7 +42,6 @@ class SimpleRNNLayerTest(test.TestCase):
                 'return_sequences': True},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dynamic_behavior_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
@@ -55,7 +55,6 @@ class SimpleRNNLayerTest(test.TestCase):
     y = np.random.random((num_samples, units))
     model.train_on_batch(x, y)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dropout_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
@@ -68,7 +67,6 @@ class SimpleRNNLayerTest(test.TestCase):
                 'recurrent_dropout': 0.1},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_implementation_mode_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
@@ -81,6 +79,47 @@ class SimpleRNNLayerTest(test.TestCase):
                   'implementation': mode},
           input_shape=(num_samples, timesteps, embedding_dim))
 
+  def test_constraints_SimpleRNN(self):
+    embedding_dim = 4
+    layer_class = keras.layers.SimpleRNN
+    k_constraint = keras.constraints.max_norm(0.01)
+    r_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_constraint=k_constraint,
+        recurrent_constraint=r_constraint,
+        bias_constraint=b_constraint)
+    layer.build((None, None, embedding_dim))
+    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
+    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
+    self.assertEqual(layer.cell.bias.constraint, b_constraint)
+
+  def test_with_masking_layer_SimpleRNN(self):
+    layer_class = keras.layers.SimpleRNN
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(layer_class(units=5, return_sequences=True, unroll=False))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSPropOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+  def test_from_config_SimpleRNN(self):
+    layer_class = keras.layers.SimpleRNN
+    for stateful in (False, True):
+      l1 = layer_class(units=1, stateful=stateful)
+      l2 = layer_class.from_config(l1.get_config())
+      assert l1.get_config() == l2.get_config()
+
+
+class SimpleRNNLayerGraphOnlyTest(test.TestCase):
+
   def test_statefulness_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
@@ -99,7 +138,8 @@ class SimpleRNNLayerTest(test.TestCase):
       layer = layer_class(
           units, return_sequences=False, stateful=True, weights=None)
       model.add(layer)
-      model.compile(optimizer='sgd', loss='mse')
+      model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                    loss='mse')
       out1 = model.predict(np.ones((num_samples, timesteps)))
       self.assertEqual(out1.shape, (num_samples, units))
 
@@ -163,47 +203,5 @@ class SimpleRNNLayerTest(test.TestCase):
       layer(x)
       self.assertEqual(len(layer.get_losses_for(x)), 1)
 
-  def test_constraints_SimpleRNN(self):
-    embedding_dim = 4
-    layer_class = keras.layers.SimpleRNN
-    with self.cached_session():
-      k_constraint = keras.constraints.max_norm(0.01)
-      r_constraint = keras.constraints.max_norm(0.01)
-      b_constraint = keras.constraints.max_norm(0.01)
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_constraint=k_constraint,
-          recurrent_constraint=r_constraint,
-          bias_constraint=b_constraint)
-      layer.build((None, None, embedding_dim))
-      self.assertEqual(layer.cell.kernel.constraint, k_constraint)
-      self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
-      self.assertEqual(layer.cell.bias.constraint, b_constraint)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_with_masking_layer_SimpleRNN(self):
-    layer_class = keras.layers.SimpleRNN
-    with self.cached_session():
-      inputs = np.random.random((2, 3, 4))
-      targets = np.abs(np.random.random((2, 3, 5)))
-      targets /= targets.sum(axis=-1, keepdims=True)
-      model = keras.models.Sequential()
-      model.add(keras.layers.Masking(input_shape=(3, 4)))
-      model.add(layer_class(units=5, return_sequences=True, unroll=False))
-      model.compile(loss='categorical_crossentropy',
-                    optimizer=RMSPropOptimizer(0.01))
-      model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
-
-  def test_from_config_SimpleRNN(self):
-    layer_class = keras.layers.SimpleRNN
-    for stateful in (False, True):
-      l1 = layer_class(units=1, stateful=stateful)
-      l2 = layer_class.from_config(l1.get_config())
-      assert l1.get_config() == l2.get_config()
-
-
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From ead9f381d6eb96d075b4c3c7b1c22a04c4118842 Mon Sep 17 00:00:00 2001
From: Alexey Radul <axch@google.com>
Date: Tue, 16 Oct 2018 18:23:05 -0700
Subject: [PATCH 1076/1085] Internal change.

PiperOrigin-RevId: 217423671
---
 tensorflow/python/autograph/pyct/common_transformers/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/autograph/pyct/common_transformers/BUILD b/tensorflow/python/autograph/pyct/common_transformers/BUILD
index 1106a19de1..5e2f8f3ac0 100644
--- a/tensorflow/python/autograph/pyct/common_transformers/BUILD
+++ b/tensorflow/python/autograph/pyct/common_transformers/BUILD
@@ -34,7 +34,6 @@ py_test(
     name = "anf_test",
     srcs = ["anf_test.py"],
     srcs_version = "PY2AND3",
-    tags = ["no_oss"],
     deps = [
         ":common_transformers",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From ecc910875ded2f1205bf392bacf59ee3b701ebe3 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Tue, 16 Oct 2018 18:35:56 -0700
Subject: [PATCH 1077/1085] set_use_resource back to default value in defun to
 unbreak existing test cases around variable_scope.

PiperOrigin-RevId: 217424918
---
 tensorflow/python/eager/function.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6b37ab9410..5fd49dd979 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -924,7 +924,9 @@ def func_graph_from_py_func(name,
   else:
     control_manager = ops.NullContextmanager
   with func_graph.as_default(), control_manager() as a:
-    variable_scope.get_variable_scope().set_use_resource(True)
+    current_scope = variable_scope.get_variable_scope()
+    default_use_recource = current_scope.use_resource
+    current_scope.set_use_resource(True)
 
     if signature is not None:
       args = signature
@@ -976,6 +978,7 @@ def func_graph_from_py_func(name,
       check_mutation(func_kwargs_before, func_kwargs)
     finally:
       tape.pop_tape(this_tape)
+      current_scope.set_use_resource(default_use_recource)
 
     # Variables in `func_args`, `func_kwargs` should be explicit inputs
     # to the function, not captured inputs.
-- 
GitLab


From dc7ca1cc490b9efbd907553dff133df933591c01 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Tue, 16 Oct 2018 19:17:29 -0700
Subject: [PATCH 1078/1085] Internal change

PiperOrigin-RevId: 217428291
---
 .../compiler/xla/tests/reduce_precision_test.cc      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/reduce_precision_test.cc b/tensorflow/compiler/xla/tests/reduce_precision_test.cc
index 26e2bfde5c..193e669692 100644
--- a/tensorflow/compiler/xla/tests/reduce_precision_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_precision_test.cc
@@ -283,7 +283,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
            DISABLED_ON_INTERPRETER(ReducePrecisionSkippedAfterFusion)) {
   XlaBuilder builder(TestName());
 
-  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001});
+  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001, 1.00001});
   std::unique_ptr<GlobalData> a_data =
       client_->TransferToServer(a_literal).ConsumeValueOrDie();
   auto a = Parameter(&builder, 0, a_literal.shape(), "a");
@@ -301,7 +301,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
       HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kAbs; });
 
-  ComputeAndCompareR1<float>(&builder, {-1.00001f}, {a_data.get()});
+  ComputeAndCompareR1<float>(&builder, {-1.00001f, -1.00001f}, {a_data.get()});
 }
 
 // The interpreter has no fusion pass, so skip this test.
@@ -309,7 +309,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
            DISABLED_ON_INTERPRETER(ReducePrecisionAddedAfterFusion)) {
   XlaBuilder builder(TestName());
 
-  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001});
+  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001, 1.00001});
   std::unique_ptr<GlobalData> a_data =
       client_->TransferToServer(a_literal).ConsumeValueOrDie();
   auto a = Parameter(&builder, 0, a_literal.shape(), "a");
@@ -325,7 +325,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
       HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kFusion; });
 
-  ComputeAndCompareR1<float>(&builder, {-1.0f}, {a_data.get()});
+  ComputeAndCompareR1<float>(&builder, {-1.0f, -1.0f}, {a_data.get()});
 }
 
 // The interpreter has no fusion pass, so skip this test.
@@ -358,7 +358,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
            DISABLED_ON_INTERPRETER(ReducePrecisionAddedFusionContains)) {
   XlaBuilder builder(TestName());
 
-  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001});
+  Literal a_literal = LiteralUtil::CreateR1<float>({1.00001, 1.00001});
   std::unique_ptr<GlobalData> a_data =
       client_->TransferToServer(a_literal).ConsumeValueOrDie();
   auto a = Parameter(&builder, 0, a_literal.shape(), "a");
@@ -375,7 +375,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest,
       HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kAbs; });
 
-  ComputeAndCompareR1<float>(&builder, {-1.0f}, {a_data.get()});
+  ComputeAndCompareR1<float>(&builder, {-1.0f, -1.0f}, {a_data.get()});
 }
 
 }  // namespace
-- 
GitLab


From 0f1894de6cfddb3df8a3d6865d97b947af56f995 Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Tue, 16 Oct 2018 19:57:07 -0700
Subject: [PATCH 1079/1085] Move BUILD file to optimizer_v2 subfolder.

PiperOrigin-RevId: 217430948
---
 tensorflow/contrib/optimizer_v2/BUILD      |   2 +-
 tensorflow/python/keras/BUILD              | 156 +-------------------
 tensorflow/python/keras/optimizer_v2/BUILD | 164 +++++++++++++++++++++
 3 files changed, 166 insertions(+), 156 deletions(-)
 create mode 100644 tensorflow/python/keras/optimizer_v2/BUILD

diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD
index 2cf445a85e..0700b7c73c 100644
--- a/tensorflow/contrib/optimizer_v2/BUILD
+++ b/tensorflow/contrib/optimizer_v2/BUILD
@@ -48,7 +48,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:util",
-        "//tensorflow/python/keras:optimizer_v2",
+        "//tensorflow/python/keras/optimizer_v2",
     ],
 )
 
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index a566c9acab..7b57871e77 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -62,7 +62,7 @@ py_library(
         ":backend",
         ":engine",
         ":layers",
-        ":optimizer_v2",
+        "//tensorflow/python/keras/optimizer_v2:optimizer_v2",
         "//tensorflow/python/saved_model",
         "//tensorflow/python:training",
     ],
@@ -190,30 +190,6 @@ py_library(
     ],
 )
 
-py_library(
-    name = "optimizer_v2",
-    srcs = [
-        "optimizer_v2/adadelta.py",
-        "optimizer_v2/adagrad.py",
-        "optimizer_v2/adam.py",
-        "optimizer_v2/optimizer_v2.py",
-        "optimizer_v2/rmsprop.py",
-        "optimizer_v2/sgd.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:distribute",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-    ],
-)
-
 py_test(
     name = "integration_test",
     size = "medium",
@@ -865,133 +841,3 @@ py_library(
         "//third_party/py/numpy",
     ],
 )
-
-cuda_py_test(
-    name = "adadelta_test",
-    size = "medium",
-    srcs = ["optimizer_v2/adadelta_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:embedding_ops",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:variables",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "adagrad_test",
-    size = "small",
-    srcs = ["optimizer_v2/adagrad_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "//tensorflow/python:embedding_ops",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "adam_test",
-    size = "small",
-    srcs = ["optimizer_v2/adam_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "checkpointable_utils_test",
-    srcs = ["optimizer_v2/checkpointable_utils_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "@six_archive//:six",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:layers",
-        "//tensorflow/python:layers_base",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:training",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:test",
-        "//tensorflow/python/keras",
-    ],
-    tags = ["notsan"],
-)
-
-cuda_py_test(
-    name = "sgd_test",
-    size = "medium",
-    srcs = ["optimizer_v2/sgd_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:embedding_ops",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:resources",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/eager:context",
-    ],
-)
-
-cuda_py_test(
-    name = "optimizer_v2_test",
-    size = "medium",
-    srcs = ["optimizer_v2/optimizer_v2_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:clip_ops",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:variables",
-    ],
-)
-
-cuda_py_test(
-    name = "rmsprop_test",
-    size = "small",
-    srcs = ["optimizer_v2/rmsprop_test.py"],
-    additional_deps = [
-        ":optimizer_v2",
-        "@absl_py//absl/testing:parameterized",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:embedding_ops",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-    tags = ["optonly"],
-)
diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD
new file mode 100644
index 0000000000..292c717e36
--- /dev/null
+++ b/tensorflow/python/keras/optimizer_v2/BUILD
@@ -0,0 +1,164 @@
+# Description:
+#   Contains the Keras OptimizerV2 API (internal TensorFlow version).
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
+py_library(
+    name = "optimizer_v2",
+    srcs = [
+        "adadelta.py",
+        "adagrad.py",
+        "adam.py",
+        "optimizer_v2.py",
+        "rmsprop.py",
+        "sgd.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:distribute",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+    ],
+)
+
+cuda_py_test(
+    name = "adadelta_test",
+    size = "medium",
+    srcs = ["adadelta_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adagrad_test",
+    size = "small",
+    srcs = ["adagrad_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "adam_test",
+    size = "small",
+    srcs = ["adam_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
+cuda_py_test(
+    name = "checkpointable_utils_test",
+    srcs = ["checkpointable_utils_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@six_archive//:six",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:layers",
+        "//tensorflow/python:layers_base",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras",
+    ],
+    tags = ["notsan"],
+)
+
+cuda_py_test(
+    name = "sgd_test",
+    size = "medium",
+    srcs = ["sgd_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:resources",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
+cuda_py_test(
+    name = "optimizer_v2_test",
+    size = "medium",
+    srcs = ["optimizer_v2_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:clip_ops",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:variables",
+    ],
+)
+
+cuda_py_test(
+    name = "rmsprop_test",
+    size = "small",
+    srcs = ["rmsprop_test.py"],
+    additional_deps = [
+        ":optimizer_v2",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:embedding_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+    tags = ["optonly"],
+)
-- 
GitLab


From 7bcbcc1392516a2b2d7a7abae2ccce7091c8dae3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 17 Oct 2018 02:02:20 -0700
Subject: [PATCH 1080/1085] compat: Update forward compatibility horizon to
 2018-10-17

PiperOrigin-RevId: 217456158
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index dac46aaa7d..f2967f6d71 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 16)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 17)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 5674c9423f0732e6ab7b4cc428f730ce7bd3e857 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 17 Oct 2018 09:04:45 -0700
Subject: [PATCH 1081/1085] Internal change

PiperOrigin-RevId: 217527648
---
 tensorflow/core/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 4f95f207ad..7d864d434d 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -593,6 +593,7 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
+        "@com_google_absl//absl/flags:flag",
     ],
 )
 
@@ -2194,6 +2195,7 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
+        "@com_google_absl//absl/flags:flag",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
-- 
GitLab


From 4513f1910bf435c4cc46d0f0dfb3e0641286ce21 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 17 Oct 2018 09:51:13 -0700
Subject: [PATCH 1082/1085] Raise ValueError in tf.einsum() when passed
 unsupported input.

The case where an axis appears more than once for a single input
was listed in the docstring as unsupported, but the code didn't
raise an exception in this case, and instead returned an incorrect
result.

This change also fixes an unrelated bug in the docstring.

PiperOrigin-RevId: 217535799
---
 tensorflow/python/ops/special_math_ops.py      | 8 +++++++-
 tensorflow/python/ops/special_math_ops_test.py | 5 +++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py
index e44bafedfc..cb417e4eb5 100644
--- a/tensorflow/python/ops/special_math_ops.py
+++ b/tensorflow/python/ops/special_math_ops.py
@@ -182,7 +182,6 @@ def einsum(equation, *inputs, **kwargs):
   * Ellipses (subscripts like `ij...,jk...->ik...`)
   * Subscripts where an axis appears more than once for a single input
     (e.g. `ijj,k->ik`).
-  * Subscripts that are summed across multiple inputs (e.g., `ij,ij,jk->ik`).
 
   Args:
     equation: a `str` describing the contraction, in the same format as
@@ -238,6 +237,13 @@ def einsum(equation, *inputs, **kwargs):
       output_axis_labels = ''.join(
           sorted(ax for ax in indices if counts[ax] == 1))
 
+    for a in axis_labels:
+      for input_labels in input_axis_labels:
+        if input_labels.count(a) > 1:
+          raise ValueError(
+              'Subscript not supported: an axis appears more than once: %s' %
+              input_labels)
+
     for a in axis_labels:
       input_count = sum(1 for s in input_axis_labels if a in s)
       if input_count > 2 and a not in output_axis_labels:
diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py
index d2f6b47697..7438cdb3f1 100644
--- a/tensorflow/python/ops/special_math_ops_test.py
+++ b/tensorflow/python/ops/special_math_ops_test.py
@@ -311,6 +311,11 @@ class EinsumTest(test.TestCase):
           invalid1='value1',
           invalid2='value2')
 
+  def test_repeated_axis_single_input(self):
+    x = array_ops.placeholder(dtypes.float32, shape=[2, 2])
+    with self.assertRaises(ValueError):
+      _ = special_math_ops.einsum('ii->', x)
+
   def test_dim_mismatch(self):
     for axes, input_shapes in self.dim_mismatch_cases:
       inputs = [
-- 
GitLab


From af7ebf45751df025bf5561ddb992b9c7a91b4201 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Wed, 17 Oct 2018 10:01:58 -0700
Subject: [PATCH 1083/1085] Automated rollback of commit
 5674c9423f0732e6ab7b4cc428f730ce7bd3e857

PiperOrigin-RevId: 217537594
---
 tensorflow/core/BUILD | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 7d864d434d..4f95f207ad 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -593,7 +593,6 @@ cc_library(
         "//tensorflow/core/platform/default/build_config:other",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/platform/default/build_config:port",
-        "@com_google_absl//absl/flags:flag",
     ],
 )
 
@@ -2195,7 +2194,6 @@ cc_library(
         ":lib_proto_parsing",
         ":abi",
         ":core_stringpiece",
-        "@com_google_absl//absl/flags:flag",
         "//third_party/eigen3",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "@snappy",
-- 
GitLab


From db46af2f820242a0249022135b15fab738bc7865 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 17 Oct 2018 10:27:19 -0700
Subject: [PATCH 1084/1085] Enabling tf.function on one more benchmark.

PiperOrigin-RevId: 217542911
---
 .../eager/python/examples/resnet50/resnet50_test.py      | 3 +--
 tensorflow/python/eager/def_function.py                  | 9 +++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
index e406aee29d..fb81979d7b 100644
--- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
+++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py
@@ -267,8 +267,7 @@ class ResNet50Benchmarks(tf.test.Benchmark):
         apply_grads = apply_gradients
         if defun:
           model.call = tfe.function(model.call)
-          # TODO(apassos) enable tf.function here
-          apply_grads = tfe.defun(apply_gradients)
+          apply_grads = tfe.function(apply_gradients)
 
         num_burn = 3
         num_iters = 10
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 63f8e698a8..022c8685a8 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -29,7 +29,9 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.util import nest
 
 
 class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
@@ -178,12 +180,15 @@ def _defun_with_scope(scope, fn, input_signature):
   return function_lib.defun(wrapped_fn, input_signature=input_signature)
 
 
+# TODO(apassos) there should be an easier way to call a concrete defun.
 def _call_concrete(fn, args, unused_kwargs):
   """Calls the given concrete function with only the tensor arguments."""
 
   def inner():
     # TODO(apassos) figure out what to do with kwargs and concrete functions.
-    return fn(*[x for x in args if isinstance(x, ops.Tensor)])
+    return fn(*[x if isinstance(x, ops.Tensor) else x.handle
+                for x in nest.flatten(args)
+                if isinstance(x, (ops.Tensor, variables.Variable))])
 
   return inner
 
@@ -254,7 +259,7 @@ class PolymorphicFunction(object):
     elif self._stateful_fn is not None:
       # In this case we have not created variables on the first call. So we can
       # run the first trace but we should fail if variables are created.
-      results = self._first_trace(*args, **kwds)
+      results = self._stateful_fn(*args, **kwds)
       if self._created_variables:
         raise ValueError("Creating variables on a non-first call to a function"
                          " decorated with tf.function.")
-- 
GitLab


From 9a20e435c5b4b941fd7014342b7a359e5c6cdd6b Mon Sep 17 00:00:00 2001
From: Jing Li <jingli@google.com>
Date: Wed, 17 Oct 2018 10:33:20 -0700
Subject: [PATCH 1085/1085] - Support bfloat16 in Keras TPU. - Explicitly put
 optimizer parameter variables on TPU.

PiperOrigin-RevId: 217544154
---
 .../contrib/tpu/python/tpu/keras_support.py   | 22 ++++++++++++++-----
 .../tpu/python/tpu/keras_tpu_variables.py     |  3 ++-
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 9d7b894717..083b65a8da 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -280,9 +280,9 @@ def _cross_replica_concat(tensor, core_id, num_cores, name):
   """
 
   input_dtype = tensor.dtype
-  if input_dtype not in [dtypes.float32, dtypes.int32]:
-    raise TypeError('For model replication, only (float32 and int32) is '
-                    'supported for model outputs and targets. Got {} for '
+  if input_dtype not in [dtypes.bfloat16, dtypes.float32, dtypes.int32]:
+    raise TypeError('For model replication, only (bfloat16, float32 and int32) '
+                    'is supported for model outputs and targets. Got {} for '
                     '{}.'.format(input_dtype, name))
 
   batch_size = tensor.shape[0]
@@ -362,7 +362,7 @@ def _replicated_optimizer(opt):
     return KerasCrossShardOptimizer(opt)
 
 
-def _clone_optimizer(optimizer, config=None):
+def _clone_optimizer(optimizer, config=None, worker_name=None):
   """Returns a cloned optimizer with the provided optimizer.config or config."""
   if not isinstance(optimizer, keras_optimizers.Optimizer):
     # In the first call to tpu_model(model), Keras may not have wrapped the TF
@@ -377,7 +377,10 @@ def _clone_optimizer(optimizer, config=None):
   if config is None:
     config = optimizer.get_config()
   logging.info('Cloning %s %s', optimizer.__class__.__name__, config)
-  return optimizer.__class__.from_config(config)
+  with ops.device(
+      '%s/device:CPU:0' % ('/job:%s' % worker_name if worker_name else '')):
+    # Explicitly put optimizer parameter variables on TPU worker.
+    return optimizer.__class__.from_config(config)
 
 
 class TPURewriteContext(object):
@@ -956,7 +959,8 @@ class TPUFunction(object):
               self._tpu_assignment.num_towers):
             if not self._cloned_optimizer:
               self._cloned_optimizer = _clone_optimizer(
-                  self.model.cpu_optimizer)
+                  self.model.cpu_optimizer,
+                  worker_name=self._tpu_assignment.worker_name)
 
             self._cloned_model = models.clone_model(self.model)
 
@@ -973,6 +977,12 @@ class TPUFunction(object):
                       name='model output ({})'.format(o.name))
                   for o in self._cloned_model.outputs
               ]
+              # Recast all low precision outputs back to float32 since we only
+              # casted the inputs to bfloat16 and not targets. This is done so
+              # that we can preserve precision when calculating the loss value.
+              if new_outputs and new_outputs[0].dtype == dtypes.bfloat16:
+                new_outputs = [
+                    math_ops.cast(o, dtypes.float32) for o in new_outputs]
               self._cloned_model.outputs = new_outputs
               tpu_targets = [
                   _cross_replica_concat(
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index 004b1012e5..28d3a93851 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -33,6 +33,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_resource_variable_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 
@@ -227,7 +228,7 @@ class ReplicatedVariable(object):
       return self._primary_var._dense_var_to_tensor(dtype, name, as_ref)
     # pylint: enable=protected-access
     if dtype is not None and dtype != self.dtype:
-      return NotImplemented
+      return math_ops.cast(self._read_variable_op(), dtype)
     if as_ref:
       return self.handle
     else:
-- 
GitLab